From 464956f75e9e26bcbbcbef435213e8f5fa854d07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Sat, 6 Feb 2021 16:13:46 +0100
Subject: [PATCH 0001/3292] HID: intel-ish-hid: Drop if block with an always
 false condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A remove callback is only ever called for a bound device. So there is no
need to check for device or driver being NULL.

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp/bus.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index bba29cd36d29..ccd54f244503 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -257,17 +257,13 @@ static int ishtp_cl_bus_match(struct device *dev, struct device_driver *drv)
 static int ishtp_cl_device_remove(struct device *dev)
 {
 	struct ishtp_cl_device *device = to_ishtp_cl_device(dev);
-	struct ishtp_cl_driver *driver;
-
-	if (!device || !dev->driver)
-		return 0;
+	struct ishtp_cl_driver *driver = to_ishtp_cl_driver(dev->driver);
 
 	if (device->event_cb) {
 		device->event_cb = NULL;
 		cancel_work_sync(&device->event_work);
 	}
 
-	driver = to_ishtp_cl_driver(dev->driver);
 	if (!driver->remove) {
 		dev->driver = NULL;
 

From 7c746603b5c58939ec823cff5dca3894cc3afb3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Sat, 6 Feb 2021 16:13:47 +0100
Subject: [PATCH 0002/3292] HID: intel-ish-hid: Simplify logic in
 ishtp_cl_device_remove()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is only a single change in behavior: Now dev->driver isn't modified.
Assigning to this variable is in the domain of the driver core only. (And
it's done in __device_release_driver shortly after bus->remove() (i.e
ishtp_cl_device_remove() here) returns.)

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp/bus.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index ccd54f244503..7f36ce6187a1 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -258,19 +258,17 @@ static int ishtp_cl_device_remove(struct device *dev)
 {
 	struct ishtp_cl_device *device = to_ishtp_cl_device(dev);
 	struct ishtp_cl_driver *driver = to_ishtp_cl_driver(dev->driver);
+	int ret = 0;
 
 	if (device->event_cb) {
 		device->event_cb = NULL;
 		cancel_work_sync(&device->event_work);
 	}
 
-	if (!driver->remove) {
-		dev->driver = NULL;
+	if (driver->remove)
+		ret = driver->remove(device);
 
-		return 0;
-	}
-
-	return driver->remove(device);
+	return ret;
 }
 
 /**

From e71da1fd0e84bc5c87a78b405e40713840eecc80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Sat, 6 Feb 2021 16:13:48 +0100
Subject: [PATCH 0003/3292] HID: intel-ish-hid: Make remove callback return
 void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver core ignores the return value of struct bus_type::remove()
because there is only little that can be done. To simplify the quest to
make this function return void, let struct ishtp_cl_driver::remove() return
void, too. All users already unconditionally return 0, this commit makes
it obvious that returning an error value is a bad idea.

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c  | 4 +---
 drivers/hid/intel-ish-hid/ishtp-hid-client.c | 4 +---
 drivers/hid/intel-ish-hid/ishtp/bus.c        | 5 ++---
 drivers/platform/chrome/cros_ec_ishtp.c      | 4 +---
 include/linux/intel-ish-client-if.h          | 2 +-
 5 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index 6cf59fd26ad7..edb0bd084c27 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -1015,7 +1015,7 @@ static int loader_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
  *
  * Return: 0
  */
-static int loader_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
+static void loader_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
 {
 	struct ishtp_cl_data *client_data;
 	struct ishtp_cl	*loader_ishtp_cl = ishtp_get_drvdata(cl_device);
@@ -1032,8 +1032,6 @@ static int loader_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
 	cancel_work_sync(&client_data->work_ishtp_reset);
 	loader_deinit(loader_ishtp_cl);
 	ishtp_put_device(cl_device);
-
-	return 0;
 }
 
 /**
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 6ba944b40fdb..0f1b5283bab4 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -838,7 +838,7 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
  *
  * Return: 0
  */
-static int hid_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
+static void hid_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
 {
 	struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
 	struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
@@ -856,8 +856,6 @@ static int hid_ishtp_cl_remove(struct ishtp_cl_device *cl_device)
 	hid_ishtp_cl = NULL;
 
 	client_data->num_hid_devices = 0;
-
-	return 0;
 }
 
 /**
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index 7f36ce6187a1..ffc9ce5c86ee 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -258,7 +258,6 @@ static int ishtp_cl_device_remove(struct device *dev)
 {
 	struct ishtp_cl_device *device = to_ishtp_cl_device(dev);
 	struct ishtp_cl_driver *driver = to_ishtp_cl_driver(dev->driver);
-	int ret = 0;
 
 	if (device->event_cb) {
 		device->event_cb = NULL;
@@ -266,9 +265,9 @@ static int ishtp_cl_device_remove(struct device *dev)
 	}
 
 	if (driver->remove)
-		ret = driver->remove(device);
+		driver->remove(device);
 
-	return ret;
+	return 0;
 }
 
 /**
diff --git a/drivers/platform/chrome/cros_ec_ishtp.c b/drivers/platform/chrome/cros_ec_ishtp.c
index f00107017318..9d1e7e03628e 100644
--- a/drivers/platform/chrome/cros_ec_ishtp.c
+++ b/drivers/platform/chrome/cros_ec_ishtp.c
@@ -703,7 +703,7 @@ end_ishtp_cl_alloc_error:
  *
  * Return: 0
  */
-static int cros_ec_ishtp_remove(struct ishtp_cl_device *cl_device)
+static void cros_ec_ishtp_remove(struct ishtp_cl_device *cl_device)
 {
 	struct ishtp_cl	*cros_ish_cl = ishtp_get_drvdata(cl_device);
 	struct ishtp_cl_data *client_data = ishtp_get_client_data(cros_ish_cl);
@@ -712,8 +712,6 @@ static int cros_ec_ishtp_remove(struct ishtp_cl_device *cl_device)
 	cancel_work_sync(&client_data->work_ec_evt);
 	cros_ish_deinit(cros_ish_cl);
 	ishtp_put_device(cl_device);
-
-	return 0;
 }
 
 /**
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index 0d6b4bc191c5..94669e21dc8b 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -36,7 +36,7 @@ struct ishtp_cl_driver {
 	const char *name;
 	const guid_t *guid;
 	int (*probe)(struct ishtp_cl_device *dev);
-	int (*remove)(struct ishtp_cl_device *dev);
+	void (*remove)(struct ishtp_cl_device *dev);
 	int (*reset)(struct ishtp_cl_device *dev);
 	const struct dev_pm_ops *pm;
 };

From 94cad2ddb298699882f98099e7346b7bcb5454e1 Mon Sep 17 00:00:00 2001
From: Ye Xiang <xiang.ye@intel.com>
Date: Tue, 16 Mar 2021 13:23:34 -0700
Subject: [PATCH 0004/3292] HID: intel_ish-hid: HBM: Use connected standby
 state bit during suspend/resume

The individual sensor drivers implemented in the ISH firmware needs
capability to take special actions when there is a change in the system
standby state. The ISH core firmware passes this notification to
individual sensor drivers in response to the OS request via connected
standby bit in the SYSTEM_STATE_STATUS command.

This change sets CONNECTED_STANDBY_STATE_BIT bit to 1 during suspend
callback and clears during resume callback.

Signed-off-by: Ye Xiang <xiang.ye@intel.com>
[srinivas.pandruvada@linux.intel.com: changelog rewrite]
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp/hbm.c | 6 +++---
 drivers/hid/intel-ish-hid/ishtp/hbm.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/hbm.c b/drivers/hid/intel-ish-hid/ishtp/hbm.c
index 30a91d068306..dbfae60f2621 100644
--- a/drivers/hid/intel-ish-hid/ishtp/hbm.c
+++ b/drivers/hid/intel-ish-hid/ishtp/hbm.c
@@ -914,7 +914,7 @@ static inline void fix_cl_hdr(struct ishtp_msg_hdr *hdr, size_t length,
 /*** Suspend and resume notification ***/
 
 static uint32_t current_state;
-static uint32_t supported_states = 0 | SUSPEND_STATE_BIT;
+static uint32_t supported_states = SUSPEND_STATE_BIT | CONNECTED_STANDBY_STATE_BIT;
 
 /**
  * ishtp_send_suspend() - Send suspend message to FW
@@ -933,7 +933,7 @@ void ishtp_send_suspend(struct ishtp_device *dev)
 	memset(&state_status_msg, 0, len);
 	state_status_msg.hdr.cmd = SYSTEM_STATE_STATUS;
 	state_status_msg.supported_states = supported_states;
-	current_state |= SUSPEND_STATE_BIT;
+	current_state |= (SUSPEND_STATE_BIT | CONNECTED_STANDBY_STATE_BIT);
 	dev->print_log(dev, "%s() sends SUSPEND notification\n", __func__);
 	state_status_msg.states_status = current_state;
 
@@ -959,7 +959,7 @@ void ishtp_send_resume(struct ishtp_device *dev)
 	memset(&state_status_msg, 0, len);
 	state_status_msg.hdr.cmd = SYSTEM_STATE_STATUS;
 	state_status_msg.supported_states = supported_states;
-	current_state &= ~SUSPEND_STATE_BIT;
+	current_state &= ~(CONNECTED_STANDBY_STATE_BIT | SUSPEND_STATE_BIT);
 	dev->print_log(dev, "%s() sends RESUME notification\n", __func__);
 	state_status_msg.states_status = current_state;
 
diff --git a/drivers/hid/intel-ish-hid/ishtp/hbm.h b/drivers/hid/intel-ish-hid/ishtp/hbm.h
index 7c445b203f2a..08f3f3ceb18c 100644
--- a/drivers/hid/intel-ish-hid/ishtp/hbm.h
+++ b/drivers/hid/intel-ish-hid/ishtp/hbm.h
@@ -235,6 +235,7 @@ struct dma_xfer_hbm {
 #define SYSTEM_STATE_QUERY_SUBSCRIBERS		0x3
 #define SYSTEM_STATE_STATE_CHANGE_REQ		0x4
 /*indicates suspend and resume states*/
+#define CONNECTED_STANDBY_STATE_BIT		(1<<0)
 #define SUSPEND_STATE_BIT			(1<<1)
 
 struct ish_system_states_header {

From 4ce3ba52340165c33d1a43a407a138028e210b21 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:34 +0000
Subject: [PATCH 0005/3292] HID: intel-ish-hid: Remove unused variable 'err'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ishtp/client.c: In function ‘ishtp_cl_disconnect’:
 drivers/hid/intel-ish-hid/ishtp/client.c:266:6: warning: variable ‘err’ set but not used [-Wunused-but-set-variable]

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp/client.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c
index 1cc157126fce..c81a1f8a9268 100644
--- a/drivers/hid/intel-ish-hid/ishtp/client.c
+++ b/drivers/hid/intel-ish-hid/ishtp/client.c
@@ -263,7 +263,6 @@ EXPORT_SYMBOL(ishtp_cl_unlink);
 int ishtp_cl_disconnect(struct ishtp_cl *cl)
 {
 	struct ishtp_device *dev;
-	int err;
 
 	if (WARN_ON(!cl || !cl->dev))
 		return -ENODEV;
@@ -283,7 +282,7 @@ int ishtp_cl_disconnect(struct ishtp_cl *cl)
 		return -ENODEV;
 	}
 
-	err = wait_event_interruptible_timeout(cl->wait_ctrl_res,
+	wait_event_interruptible_timeout(cl->wait_ctrl_res,
 			(dev->dev_state != ISHTP_DEV_ENABLED ||
 			cl->state == ISHTP_CL_DISCONNECTED),
 			ishtp_secs_to_jiffies(ISHTP_CL_CONNECT_TIMEOUT));

From a2e7aa05d2ad41a3cfb60323f36a87ed7760bd8b Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:35 +0000
Subject: [PATCH 0006/3292] HID: ishtp-hid-client: Move variable to where it's
 actually used
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 In file included from drivers/hid/intel-ish-hid/ishtp-hid.c:11:
     drivers/hid/intel-ish-hid/ishtp-hid.h:24:21: warning: ‘hid_ishtp_guid’ defined but not used [-Wunused-const-variable=]

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Daniel Drubin <daniel.drubin@intel.com>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp-hid-client.c | 5 +++++
 drivers/hid/intel-ish-hid/ishtp-hid.h        | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 0f1b5283bab4..24599280105d 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -11,6 +11,11 @@
 #include <linux/sched.h>
 #include "ishtp-hid.h"
 
+/* ISH Transport protocol (ISHTP in short) GUID */
+static const guid_t hid_ishtp_guid =
+	GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
+		  0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26);
+
 /* Rx ring buffer pool size */
 #define HID_CL_RX_RING_SIZE	32
 #define HID_CL_TX_RING_SIZE	16
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h
index 5ffd0da3cf1f..e2423f7d2b54 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid.h
+++ b/drivers/hid/intel-ish-hid/ishtp-hid.h
@@ -20,11 +20,6 @@ extern void (*hid_print_trace)(void *unused, const char *format, ...);
 #define hid_ishtp_trace(client, ...) \
 		(hid_print_trace)(NULL, __VA_ARGS__)
 
-/* ISH Transport protocol (ISHTP in short) GUID */
-static const guid_t hid_ishtp_guid =
-	GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
-		  0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26);
-
 /* ISH HID message structure */
 struct hostif_msg_hdr {
 	uint8_t	command; /* Bit 7: is_response */

From 3977e00eb33bcb62cffdf9475d047b347cf79e06 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:36 +0000
Subject: [PATCH 0007/3292] HID: intel-ish-hid: pci-ish: Remove unused variable
 'ret'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ipc/pci-ish.c: In function ‘ish_resume_handler’:
 drivers/hid/intel-ish-hid/ipc/pci-ish.c:264:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Zhang Lixu <lixu.zhang@intel.com>
Cc: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: Daniel Drubin <daniel.drubin@intel.com>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ipc/pci-ish.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 06081cf9b85a..61efc30ed9af 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -261,7 +261,6 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work)
 	struct pci_dev *pdev = to_pci_dev(ish_resume_device);
 	struct ishtp_device *dev = pci_get_drvdata(pdev);
 	uint32_t fwsts = dev->ops->get_fw_status(dev);
-	int ret;
 
 	if (ish_should_leave_d0i3(pdev) && !dev->suspend_flag
 			&& IPC_IS_ISH_ILUP(fwsts)) {
@@ -273,7 +272,7 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work)
 
 		/* Waiting to get resume response */
 		if (dev->resume_flag)
-			ret = wait_event_interruptible_timeout(dev->resume_wait,
+			wait_event_interruptible_timeout(dev->resume_wait,
 				!dev->resume_flag,
 				msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS));
 

From d5831bee4e1a9eca21570de12baf2043b3df4b41 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:37 +0000
Subject: [PATCH 0008/3292] HID: intel-ish: Supply some missing param
 descriptions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ishtp/bus.c:173: warning: Function parameter or member 'fw_client' not described in 'ishtp_get_fw_client_id'
 drivers/hid/intel-ish-hid/ishtp/bus.c:845: warning: Function parameter or member 'device' not described in 'ishtp_device'
 drivers/hid/intel-ish-hid/ishtp/bus.c:858: warning: Function parameter or member 'device' not described in 'ishtp_get_pci_device'
 drivers/hid/intel-ish-hid/ishtp/bus.c:871: warning: Function parameter or member 'cl_device' not described in 'ishtp_trace_callback'
 drivers/hid/intel-ish-hid/ishtp/bus.c:884: warning: Function parameter or member 'dev' not described in 'ish_hw_reset'

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: "Uwe Kleine-König" <uwe@kleine-koenig.org>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp/bus.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index ffc9ce5c86ee..c1c7d5356208 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -164,6 +164,7 @@ EXPORT_SYMBOL(ishtp_fw_cl_get_client);
 
 /**
  * ishtp_get_fw_client_id() - Get fw client id
+ * @fw_client:	firmware client used to fetch the ID
  *
  * This interface is used to reset HW get FW client id.
  *
@@ -835,6 +836,7 @@ int ishtp_use_dma_transfer(void)
 
 /**
  * ishtp_device() - Return device pointer
+ * @device: ISH-TP client device instance
  *
  * This interface is used to return device pointer from ishtp_cl_device
  * instance.
@@ -851,6 +853,7 @@ EXPORT_SYMBOL(ishtp_device);
  * ishtp_get_pci_device() - Return PCI device dev pointer
  * This interface is used to return PCI device pointer
  * from ishtp_cl_device instance.
+ * @device: ISH-TP client device instance
  *
  * Return: device *.
  */
@@ -862,6 +865,7 @@ EXPORT_SYMBOL(ishtp_get_pci_device);
 
 /**
  * ishtp_trace_callback() - Return trace callback
+ * @cl_device: ISH-TP client device instance
  *
  * This interface is used to return trace callback function pointer.
  *
@@ -875,6 +879,7 @@ EXPORT_SYMBOL(ishtp_trace_callback);
 
 /**
  * ish_hw_reset() - Call HW reset IPC callback
+ * @dev:	ISHTP device instance
  *
  * This interface is used to reset HW in case of error.
  *

From 15484948a3504c4f9f4b4db9b4f819a4b6a06aa9 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:38 +0000
Subject: [PATCH 0009/3292] HID: intel-ish: Fix a naming disparity and a
 formatting error

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ishtp/hbm.c:409: warning: expecting prototype for ishtp_client_disconnect_request(). Prototype was for ishtp_hbm_fw_disconnect_req() instead
 drivers/hid/intel-ish-hid/ishtp/hbm.c:433: warning: wrong kernel-doc identifier on line:

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp/hbm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/hbm.c b/drivers/hid/intel-ish-hid/ishtp/hbm.c
index dbfae60f2621..9c031a06e4c4 100644
--- a/drivers/hid/intel-ish-hid/ishtp/hbm.c
+++ b/drivers/hid/intel-ish-hid/ishtp/hbm.c
@@ -398,7 +398,7 @@ static void ishtp_hbm_cl_connect_res(struct ishtp_device *dev,
 }
 
 /**
- * ishtp_client_disconnect_request() - Receive disconnect request
+ * ishtp_hbm_fw_disconnect_req() - Receive disconnect request
  * @dev: ISHTP device instance
  * @disconnect_req: disconnect request structure
  *
@@ -430,7 +430,7 @@ static void ishtp_hbm_fw_disconnect_req(struct ishtp_device *dev,
 }
 
 /**
- * ishtp_hbm_dma_xfer_ack(() - Receive transfer ACK
+ * ishtp_hbm_dma_xfer_ack() - Receive transfer ACK
  * @dev: ISHTP device instance
  * @dma_xfer: HBM transfer message
  *

From 73c26336b11add63b6e5e8403806ab5693da8a39 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:40 +0000
Subject: [PATCH 0010/3292] HID: intel-ish-hid: Fix a little doc-rot

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ishtp/client.c:121: warning: Function parameter or member 'cl_device' not described in 'ishtp_cl_allocate'
 drivers/hid/intel-ish-hid/ishtp/client.c:121: warning: Excess function parameter 'dev' description in 'ishtp_cl_allocate'

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c
index c81a1f8a9268..585a5c4066cb 100644
--- a/drivers/hid/intel-ish-hid/ishtp/client.c
+++ b/drivers/hid/intel-ish-hid/ishtp/client.c
@@ -111,7 +111,7 @@ static void ishtp_cl_init(struct ishtp_cl *cl, struct ishtp_device *dev)
 
 /**
  * ishtp_cl_allocate() - allocates client structure and sets it up.
- * @dev: ishtp device
+ * @cl_device: ishtp client device
  *
  * Allocate memory for new client device and call to initialize each field.
  *

From 99c6f96570336179c3372061df86e0278b1b3a30 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:43 +0000
Subject: [PATCH 0011/3292] HID: intel-ish-hid: Fix potential copy/paste error

Fixes the following W=1 kernel build warning(s):

 In file included from drivers/hid/intel-ish-hid/ishtp-hid.c:11:
 drivers/hid/intel-ish-hid/ishtp-hid.c:263: warning: expecting prototype for ishtp_hid_probe(). Prototype was for ishtp_hid_remove() instead

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp-hid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.c b/drivers/hid/intel-ish-hid/ishtp-hid.c
index 393bed0abee9..14c271d7d8a9 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid.c
@@ -254,7 +254,7 @@ err_hid_data:
 }
 
 /**
- * ishtp_hid_probe() - Remove registered hid device
+ * ishtp_hid_remove() - Remove registered hid device
  * @client_data:	client data pointer
  *
  * This function is used to destroy allocatd HID device.

From 5f87e027913009bfcdd368b8ab9e10c1a8c8b22f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:45 +0000
Subject: [PATCH 0012/3292] HID: intel-ish-hid: ipc: Correct fw_reset_work_fn()
 function name in header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ipc/ipc.c:553: warning: expecting prototype for ish_fw_reset_work_fn(). Prototype was for fw_reset_work_fn() instead

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Zhang Lixu <lixu.zhang@intel.com>
Cc: "Krzysztof Wilczyński" <kw@linux.com>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ipc/ipc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index 47bbeb8b492b..9037816e2bf7 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -544,7 +544,7 @@ static int ish_fw_reset_handler(struct ishtp_device *dev)
 #define TIMEOUT_FOR_HW_RDY_MS			300
 
 /**
- * ish_fw_reset_work_fn() - FW reset worker function
+ * fw_reset_work_fn() - FW reset worker function
  * @unused: not used
  *
  * Call ish_fw_reset_handler to complete FW reset

From fb42b1da32437ee3c33d3d631f5dbe1a5af9b731 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:46 +0000
Subject: [PATCH 0013/3292] HID: ishtp-hid-client: Fix incorrect function name
 report_bad_packet()

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ishtp-hid-client.c:36: warning: expecting prototype for report_bad_packets(). Prototype was for report_bad_packet() instead

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Daniel Drubin <daniel.drubin@intel.com>
Cc: linux-input@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp-hid-client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 24599280105d..042a7091802d 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -23,7 +23,7 @@ static const guid_t hid_ishtp_guid =
 #define cl_data_to_dev(client_data) ishtp_device(client_data->cl_device)
 
 /**
- * report_bad_packets() - Report bad packets
+ * report_bad_packet() - Report bad packets
  * @hid_ishtp_cl:	Client instance to get stats
  * @recv_buf:		Raw received host interface message
  * @cur_pos:		Current position index in payload

From 509405cd7ed2562d366fdf97fe00c549e33ad94d Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:57 +0000
Subject: [PATCH 0014/3292] HID: intel-ish-hid: ishtp-fw-loader: Fix a bunch of
 formatting issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

And demote non-conformant header

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:46: warning: Enum value 'LOADER_CMD_XFER_QUERY' not described in enum 'ish_loader_commands'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:46: warning: Enum value 'LOADER_CMD_XFER_FRAGMENT' not described in enum 'ish_loader_commands'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:46: warning: Enum value 'LOADER_CMD_START' not described in enum 'ish_loader_commands'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:108: warning: Function parameter or member 'reserved' not described in 'loader_msg_hdr'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:196: warning: Function parameter or member 'data' not described in 'response_info'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:196: warning: Function parameter or member 'max_size' not described in 'response_info'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:196: warning: Function parameter or member 'size' not described in 'response_info'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:196: warning: Function parameter or member 'error' not described in 'response_info'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:196: warning: Function parameter or member 'received' not described in 'response_info'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:196: warning: Function parameter or member 'wait_queue' not described in 'response_info'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:233: warning: Function parameter or member 'loader_ishtp_cl' not described in 'ishtp_cl_data'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:233: warning: Function parameter or member 'cl_device' not described in 'ishtp_cl_data'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:233: warning: Function parameter or member 'response' not described in 'ishtp_cl_data'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:233: warning: Function parameter or member 'flag_retry' not described in 'ishtp_cl_data'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:233: warning: Function parameter or member 'retry_count' not described in 'ishtp_cl_data'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:252: warning: Function parameter or member 'client_data' not described in 'get_firmware_variant'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:252: warning: Function parameter or member 'filename' not described in 'get_firmware_variant'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:281: warning: Function parameter or member 'out_msg' not described in 'loader_cl_send'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:281: warning: Function parameter or member 'out_size' not described in 'loader_cl_send'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:281: warning: Function parameter or member 'in_msg' not described in 'loader_cl_send'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:281: warning: Function parameter or member 'in_size' not described in 'loader_cl_send'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:281: warning: expecting prototype for Send message from host to firmware(). Prototype was for loader_cl_send() instead
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:445: warning: Function parameter or member 'cl_device' not described in 'loader_cl_event_cb'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:445: warning: Excess function parameter 'device' description in 'loader_cl_event_cb'
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:551: warning: expecting prototype for Loads ISH firmware using ishtp interface(). Prototype was for ish_fw_xfer_ishtp() instead
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:745: warning: expecting prototype for Start executing ISH main firmware(). Prototype was for ish_fw_start() instead
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c:767: warning: expecting prototype for Loads ISH firmware from host(). Prototype was for load_fw_from_host() instead

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Rushikesh S Kadam <rushikesh.s.kadam@intel.com>
Cc: linux-input@vger.kernel.org
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linaro-mm-sig@lists.linaro.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 45 +++++++++++----------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index edb0bd084c27..d20d74a890e9 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -31,13 +31,13 @@
 
 /**
  * enum ish_loader_commands -	ISH loader host commands.
- * LOADER_CMD_XFER_QUERY	Query the Shim firmware loader for
+ * @LOADER_CMD_XFER_QUERY:	Query the Shim firmware loader for
  *				capabilities
- * LOADER_CMD_XFER_FRAGMENT	Transfer one firmware image fragment at a
+ * @LOADER_CMD_XFER_FRAGMENT:	Transfer one firmware image fragment at a
  *				time. The command may be executed
  *				multiple times until the entire firmware
  *				image is downloaded to SRAM.
- * LOADER_CMD_START		Start executing the main firmware.
+ * @LOADER_CMD_START:		Start executing the main firmware.
  */
 enum ish_loader_commands {
 	LOADER_CMD_XFER_QUERY = 0,
@@ -95,6 +95,7 @@ static int dma_buf_size_limit = 4 * PAGE_SIZE;
 /**
  * struct loader_msg_hdr - Header for ISH Loader commands.
  * @command:		LOADER_CMD* commands. Bit 7 is the response.
+ * @reserved:		Reserved space
  * @status:		Command response status. Non 0, is error
  *			condition.
  *
@@ -173,16 +174,16 @@ struct loader_start {
  * struct response_info - Encapsulate firmware response related
  *			information for passing between function
  *			loader_cl_send() and process_recv() callback.
- * @data		Copy the data received from firmware here.
- * @max_size		Max size allocated for the @data buffer. If the
+ * @data:		Copy the data received from firmware here.
+ * @max_size:		Max size allocated for the @data buffer. If the
  *			received data exceeds this value, we log an
  *			error.
- * @size		Actual size of data received from firmware.
- * @error		Returns 0 for success, negative error code for a
+ * @size:		Actual size of data received from firmware.
+ * @error:		Returns 0 for success, negative error code for a
  *			failure in function process_recv().
- * @received		Set to true on receiving a valid firmware
+ * @received:		Set to true on receiving a valid firmware
  *			response to host command
- * @wait_queue		Wait queue for Host firmware loading where the
+ * @wait_queue:		Wait queue for Host firmware loading where the
  *			client sends message to ISH firmware and waits
  *			for response
  */
@@ -195,13 +196,13 @@ struct response_info {
 	wait_queue_head_t wait_queue;
 };
 
-/**
+/*
  * struct ishtp_cl_data - Encapsulate per ISH-TP Client Data.
  * @work_ishtp_reset:	Work queue for reset handling.
  * @work_fw_load:	Work queue for host firmware loading.
- * @flag_retry		Flag for indicating host firmware loading should
+ * @flag_retry:		Flag for indicating host firmware loading should
  *			be retried.
- * @retry_count		Count the number of retries.
+ * @retry_count:	Count the number of retries.
  *
  * This structure is used to store data per client.
  */
@@ -240,8 +241,8 @@ struct ishtp_cl_data {
 /**
  * get_firmware_variant() - Gets the filename of firmware image to be
  *			loaded based on platform variant.
- * @client_data		Client data instance.
- * @filename		Returns firmware filename.
+ * @client_data:	Client data instance.
+ * @filename:		Returns firmware filename.
  *
  * Queries the firmware-name device property string.
  *
@@ -266,11 +267,11 @@ static int get_firmware_variant(struct ishtp_cl_data *client_data,
 /**
  * loader_cl_send()	Send message from host to firmware
  * @client_data:	Client data instance
- * @out_msg		Message buffer to be sent to firmware
- * @out_size		Size of out going message
- * @in_msg		Message buffer where the incoming data copied.
+ * @out_msg:		Message buffer to be sent to firmware
+ * @out_size:		Size of out going message
+ * @in_msg:		Message buffer where the incoming data copied.
  *			This buffer is allocated by calling
- * @in_size		Max size of incoming message
+ * @in_size:		Max size of incoming message
  *
  * Return: Number of bytes copied in the in_msg on success, negative
  * error code on failure.
@@ -435,7 +436,7 @@ end:
 
 /**
  * loader_cl_event_cb() - bus driver callback for incoming message
- * @device:		Pointer to the ishtp client device for which this
+ * @cl_device:		Pointer to the ishtp client device for which this
  *			message is targeted
  *
  * Remove the packet from the list and process the message by calling
@@ -536,7 +537,7 @@ static int ish_query_loader_prop(struct ishtp_cl_data *client_data,
 }
 
 /**
- * ish_fw_xfer_ishtp()	Loads ISH firmware using ishtp interface
+ * ish_fw_xfer_ishtp() - Loads ISH firmware using ishtp interface
  * @client_data:	Client data instance
  * @fw:			Pointer to firmware data struct in host memory
  *
@@ -733,7 +734,7 @@ end_err_dma_buf_release:
 }
 
 /**
- * ish_fw_start()	Start executing ISH main firmware
+ * ish_fw_start() -	Start executing ISH main firmware
  * @client_data:	client data instance
  *
  * This function sends message to Shim firmware loader to start
@@ -756,7 +757,7 @@ static int ish_fw_start(struct ishtp_cl_data *client_data)
 }
 
 /**
- * load_fw_from_host()	Loads ISH firmware from host
+ * load_fw_from_host() - Loads ISH firmware from host
  * @client_data:	Client data instance
  *
  * This function loads the ISH firmware to ISH SRAM and starts execution

From c57179c73562e31d39139ac245b8a2d337e1823b Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 26 Mar 2021 14:34:58 +0000
Subject: [PATCH 0015/3292] HID: ishtp-hid-client: Fix
 'suggest-attribute=format' compiler warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/hid/intel-ish-hid/ishtp/bus.c: In function ‘ishtp_trace_callback’:
 drivers/hid/intel-ish-hid/ishtp/bus.c:876:29: warning: return type might be a candidate for a format attribute [-Wsuggest-attribute=format]
 876 | return cl_device->ishtp_dev->print_log;
 | ~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Daniel Drubin <daniel.drubin@intel.com>
Cc: linux-input@vger.kernel.org
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/intel-ish-hid/ishtp-hid-client.c | 4 ++--
 drivers/hid/intel-ish-hid/ishtp-hid.h        | 4 ++--
 drivers/hid/intel-ish-hid/ishtp/bus.c        | 4 ++--
 drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h  | 4 ++--
 include/linux/intel-ish-client-if.h          | 8 +++++++-
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 042a7091802d..6b1fa971b33e 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -784,7 +784,7 @@ static void hid_ishtp_cl_reset_handler(struct work_struct *work)
 	}
 }
 
-void (*hid_print_trace)(void *unused, const char *format, ...);
+ishtp_print_log ishtp_hid_print_trace;
 
 /**
  * hid_ishtp_cl_probe() - ISHTP client driver probe
@@ -823,7 +823,7 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
 
 	INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler);
 
-	hid_print_trace = ishtp_trace_callback(cl_device);
+	ishtp_hid_print_trace = ishtp_trace_callback(cl_device);
 
 	rv = hid_ishtp_cl_init(hid_ishtp_cl, 0);
 	if (rv) {
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h
index e2423f7d2b54..f88443a7d935 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid.h
+++ b/drivers/hid/intel-ish-hid/ishtp-hid.h
@@ -16,9 +16,9 @@
 #define	IS_RESPONSE	0x80
 
 /* Used to dump to Linux trace buffer, if enabled */
-extern void (*hid_print_trace)(void *unused, const char *format, ...);
+extern ishtp_print_log ishtp_hid_print_trace;
 #define hid_ishtp_trace(client, ...) \
-		(hid_print_trace)(NULL, __VA_ARGS__)
+	(ishtp_hid_print_trace)(NULL, __VA_ARGS__)
 
 /* ISH HID message structure */
 struct hostif_msg_hdr {
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index c1c7d5356208..f0802b047ed8 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -869,9 +869,9 @@ EXPORT_SYMBOL(ishtp_get_pci_device);
  *
  * This interface is used to return trace callback function pointer.
  *
- * Return: void *.
+ * Return: *ishtp_print_log()
  */
-void *ishtp_trace_callback(struct ishtp_cl_device *cl_device)
+ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device)
 {
 	return cl_device->ishtp_dev->print_log;
 }
diff --git a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
index 1cc6364aa957..f579b16e6d7a 100644
--- a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
+++ b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
@@ -10,6 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/intel-ish-client-if.h>
 #include "bus.h"
 #include "hbm.h"
 
@@ -202,8 +203,7 @@ struct ishtp_device {
 	uint64_t ishtp_host_dma_rx_buf_phys;
 
 	/* Dump to trace buffers if enabled*/
-	__printf(2, 3) void (*print_log)(struct ishtp_device *dev,
-					 const char *format, ...);
+	ishtp_print_log print_log;
 
 	/* Debug stats */
 	unsigned int	ipc_rx_cnt;
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index 94669e21dc8b..25e2b4e80502 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -8,11 +8,17 @@
 #ifndef _INTEL_ISH_CLIENT_IF_H_
 #define _INTEL_ISH_CLIENT_IF_H_
 
+#include <linux/device.h>
+#include <linux/uuid.h>
+
 struct ishtp_cl_device;
 struct ishtp_device;
 struct ishtp_cl;
 struct ishtp_fw_client;
 
+typedef __printf(2, 3) void (*ishtp_print_log)(struct ishtp_device *dev,
+					       const char *format, ...);
+
 /* Client state */
 enum cl_state {
 	ISHTP_CL_INITIALIZING = 0,
@@ -76,7 +82,7 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device,
 /* Get the device * from ishtp device instance */
 struct device *ishtp_device(struct ishtp_cl_device *cl_device);
 /* Trace interface for clients */
-void *ishtp_trace_callback(struct ishtp_cl_device *cl_device);
+ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device);
 /* Get device pointer of PCI device for DMA acces */
 struct device *ishtp_get_pci_device(struct ishtp_cl_device *cl_device);
 

From b515d2637276a3810d6595e10ab02c13bfd0b63a Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 16 Apr 2021 11:27:59 +0200
Subject: [PATCH 0016/3292] xfrm: xfrm_state_mtu should return at least 1280
 for ipv6

Jianwen reported that IPv6 Interoperability tests are failing in an
IPsec case where one of the links between the IPsec peers has an MTU
of 1280. The peer generates a packet larger than this MTU, the router
replies with a "Packet too big" message indicating an MTU of 1280.
When the peer tries to send another large packet, xfrm_state_mtu
returns 1280 - ipsec_overhead, which causes ip6_setup_cork to fail
with EINVAL.

We can fix this by forcing xfrm_state_mtu to return IPV6_MIN_MTU when
IPv6 is used. After going through IPsec, the packet will then be
fragmented to obey the actual network's PMTU, just before leaving the
host.

Currently, TFC padding is capped to PMTU - overhead to avoid
fragementation: after padding and encapsulation, we still fit within
the PMTU. That behavior is preserved in this patch.

Fixes: 91657eafb64b ("xfrm: take net hdr len into account for esp payload size calculation")
Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h    |  1 +
 net/ipv4/esp4.c       |  2 +-
 net/ipv6/esp6.c       |  2 +-
 net/xfrm/xfrm_state.c | 14 ++++++++++++--
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c58a6d4eb610..6232a5f048bd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1546,6 +1546,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
 int xfrm_init_replay(struct xfrm_state *x);
+u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu);
 u32 xfrm_state_mtu(struct xfrm_state *x, int mtu);
 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
 int xfrm_init_state(struct xfrm_state *x);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4b834bbf95e0..ed9857b2875d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -673,7 +673,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 		struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
 		u32 padto;
 
-		padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
+		padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
 		if (skb->len < padto)
 			esp.tfclen = padto - skb->len;
 	}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 727d791ed5e6..9d1327b36bd3 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -708,7 +708,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 		struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
 		u32 padto;
 
-		padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
+		padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
 		if (skb->len < padto)
 			esp.tfclen = padto - skb->len;
 	}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 4496f7efa220..c25586156c6a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2518,7 +2518,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
 
-u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
 {
 	const struct xfrm_type *type = READ_ONCE(x->type);
 	struct crypto_aead *aead;
@@ -2549,7 +2549,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
 		 net_adj) & ~(blksize - 1)) + net_adj - 2;
 }
-EXPORT_SYMBOL_GPL(xfrm_state_mtu);
+EXPORT_SYMBOL_GPL(__xfrm_state_mtu);
+
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
+{
+	mtu = __xfrm_state_mtu(x, mtu);
+
+	if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU)
+		return IPV6_MIN_MTU;
+
+	return mtu;
+}
 
 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 {

From f9c82a4ea89c384d49ce03768ba88d049ed3f1f0 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:08 +0200
Subject: [PATCH 0017/3292] Increase size of ucounts to atomic_long_t

RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK use unsigned long to store their
counters. As a preparation for moving rlimits based on ucounts, we need
to increase the size of the variable to long.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/257aa5fb1a7d81cf0f4c34f39ada2320c4284771.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  4 ++--
 kernel/ucount.c                | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index f6c5f784be5a..c242c10906c5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -88,7 +88,7 @@ struct user_namespace {
 	struct ctl_table_header *sysctls;
 #endif
 	struct ucounts		*ucounts;
-	int ucount_max[UCOUNT_COUNTS];
+	long ucount_max[UCOUNT_COUNTS];
 } __randomize_layout;
 
 struct ucounts {
@@ -96,7 +96,7 @@ struct ucounts {
 	struct user_namespace *ns;
 	kuid_t uid;
 	int count;
-	atomic_t ucount[UCOUNT_COUNTS];
+	atomic_long_t ucount[UCOUNT_COUNTS];
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 11b1596e2542..04c561751af1 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -175,14 +175,14 @@ static void put_ucounts(struct ucounts *ucounts)
 	kfree(ucounts);
 }
 
-static inline bool atomic_inc_below(atomic_t *v, int u)
+static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
 {
-	int c, old;
-	c = atomic_read(v);
+	long c, old;
+	c = atomic_long_read(v);
 	for (;;) {
 		if (unlikely(c >= u))
 			return false;
-		old = atomic_cmpxchg(v, c, c+1);
+		old = atomic_long_cmpxchg(v, c, c+1);
 		if (likely(old == c))
 			return true;
 		c = old;
@@ -196,17 +196,17 @@ struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
 	struct user_namespace *tns;
 	ucounts = get_ucounts(ns, uid);
 	for (iter = ucounts; iter; iter = tns->ucounts) {
-		int max;
+		long max;
 		tns = iter->ns;
 		max = READ_ONCE(tns->ucount_max[type]);
-		if (!atomic_inc_below(&iter->ucount[type], max))
+		if (!atomic_long_inc_below(&iter->ucount[type], max))
 			goto fail;
 	}
 	return ucounts;
 fail:
 	bad = iter;
 	for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
-		atomic_dec(&iter->ucount[type]);
+		atomic_long_dec(&iter->ucount[type]);
 
 	put_ucounts(ucounts);
 	return NULL;
@@ -216,7 +216,7 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 {
 	struct ucounts *iter;
 	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
-		int dec = atomic_dec_if_positive(&iter->ucount[type]);
+		long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
 		WARN_ON_ONCE(dec < 0);
 	}
 	put_ucounts(ucounts);

From 905ae01c4ae2ae3df05bb141801b1db4b7d83c61 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:09 +0200
Subject: [PATCH 0018/3292] Add a reference to ucounts for each cred

For RLIMIT_NPROC and some other rlimits the user_struct that holds the
global limit is kept alive for the lifetime of a process by keeping it
in struct cred. Adding a pointer to ucounts in the struct cred will
allow to track RLIMIT_NPROC not only for user in the system, but for
user in the user_namespace.

Updating ucounts may require memory allocation which may fail. So, we
cannot change cred.ucounts in the commit_creds() because this function
cannot fail and it should always return 0. For this reason, we modify
cred.ucounts before calling the commit_creds().

Changelog

v6:
* Fix null-ptr-deref in is_ucounts_overlimit() detected by trinity. This
  error was caused by the fact that cred_alloc_blank() left the ucounts
  pointer empty.

Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/b37aaef28d8b9b0d757e07ba6dd27281bbe39259.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/exec.c                      |  4 ++++
 include/linux/cred.h           |  2 ++
 include/linux/user_namespace.h |  4 ++++
 kernel/cred.c                  | 40 ++++++++++++++++++++++++++++++++++
 kernel/fork.c                  |  6 +++++
 kernel/sys.c                   | 12 ++++++++++
 kernel/ucount.c                | 40 +++++++++++++++++++++++++++++++---
 kernel/user_namespace.c        |  3 +++
 8 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 18594f11c31f..d7c4187ca023 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1360,6 +1360,10 @@ int begin_new_exec(struct linux_binprm * bprm)
 	WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
 	flush_signal_handlers(me, 0);
 
+	retval = set_cred_ucounts(bprm->cred);
+	if (retval < 0)
+		goto out_unlock;
+
 	/*
 	 * install the new credentials for this executable
 	 */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4c6350503697..66436e655032 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -144,6 +144,7 @@ struct cred {
 #endif
 	struct user_struct *user;	/* real user ID subscription */
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
+	struct ucounts *ucounts;
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	/* RCU deletion */
 	union {
@@ -170,6 +171,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
 extern int set_create_files_as(struct cred *, struct inode *);
 extern int cred_fscmp(const struct cred *, const struct cred *);
 extern void __init cred_init(void);
+extern int set_cred_ucounts(struct cred *);
 
 /*
  * check for validity of credentials
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index c242c10906c5..7919b80d57ed 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -100,11 +100,15 @@ struct ucounts {
 };
 
 extern struct user_namespace init_user_ns;
+extern struct ucounts init_ucounts;
 
 bool setup_userns_sysctls(struct user_namespace *ns);
 void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
+struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
+struct ucounts *get_ucounts(struct ucounts *ucounts);
+void put_ucounts(struct ucounts *ucounts);
 
 #ifdef CONFIG_USER_NS
 
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b1149c651..58a8a9e24347 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -60,6 +60,7 @@ struct cred init_cred = {
 	.user			= INIT_USER,
 	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
+	.ucounts		= &init_ucounts,
 };
 
 static inline void set_cred_subscribers(struct cred *cred, int n)
@@ -119,6 +120,8 @@ static void put_cred_rcu(struct rcu_head *rcu)
 	if (cred->group_info)
 		put_group_info(cred->group_info);
 	free_uid(cred->user);
+	if (cred->ucounts)
+		put_ucounts(cred->ucounts);
 	put_user_ns(cred->user_ns);
 	kmem_cache_free(cred_jar, cred);
 }
@@ -222,6 +225,7 @@ struct cred *cred_alloc_blank(void)
 #ifdef CONFIG_DEBUG_CREDENTIALS
 	new->magic = CRED_MAGIC;
 #endif
+	new->ucounts = get_ucounts(&init_ucounts);
 
 	if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
@@ -284,6 +288,11 @@ struct cred *prepare_creds(void)
 
 	if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
+
+	new->ucounts = get_ucounts(new->ucounts);
+	if (!new->ucounts)
+		goto error;
+
 	validate_creds(new);
 	return new;
 
@@ -363,6 +372,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		ret = create_user_ns(new);
 		if (ret < 0)
 			goto error_put;
+		if (set_cred_ucounts(new) < 0)
+			goto error_put;
 	}
 
 #ifdef CONFIG_KEYS
@@ -653,6 +664,31 @@ int cred_fscmp(const struct cred *a, const struct cred *b)
 }
 EXPORT_SYMBOL(cred_fscmp);
 
+int set_cred_ucounts(struct cred *new)
+{
+	struct task_struct *task = current;
+	const struct cred *old = task->real_cred;
+	struct ucounts *old_ucounts = new->ucounts;
+
+	if (new->user == old->user && new->user_ns == old->user_ns)
+		return 0;
+
+	/*
+	 * This optimization is needed because alloc_ucounts() uses locks
+	 * for table lookups.
+	 */
+	if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
+		return 0;
+
+	if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid)))
+		return -EAGAIN;
+
+	if (old_ucounts)
+		put_ucounts(old_ucounts);
+
+	return 0;
+}
+
 /*
  * initialise the credentials stuff
  */
@@ -719,6 +755,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
 
+	new->ucounts = get_ucounts(new->ucounts);
+	if (!new->ucounts)
+		goto error;
+
 	put_cred(old);
 	validate_creds(new);
 	return new;
diff --git a/kernel/fork.c b/kernel/fork.c
index 426cd0c51f9e..321a5e31d817 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2995,6 +2995,12 @@ int ksys_unshare(unsigned long unshare_flags)
 	if (err)
 		goto bad_unshare_cleanup_cred;
 
+	if (new_cred) {
+		err = set_cred_ucounts(new_cred);
+		if (err)
+			goto bad_unshare_cleanup_cred;
+	}
+
 	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 2e2e3f378d97..cabfc5b86175 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -552,6 +552,10 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
 	if (retval < 0)
 		goto error;
 
+	retval = set_cred_ucounts(new);
+	if (retval < 0)
+		goto error;
+
 	return commit_creds(new);
 
 error:
@@ -610,6 +614,10 @@ long __sys_setuid(uid_t uid)
 	if (retval < 0)
 		goto error;
 
+	retval = set_cred_ucounts(new);
+	if (retval < 0)
+		goto error;
+
 	return commit_creds(new);
 
 error:
@@ -685,6 +693,10 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	if (retval < 0)
 		goto error;
 
+	retval = set_cred_ucounts(new);
+	if (retval < 0)
+		goto error;
+
 	return commit_creds(new);
 
 error:
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 04c561751af1..50cc1dfb7d28 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -8,6 +8,12 @@
 #include <linux/kmemleak.h>
 #include <linux/user_namespace.h>
 
+struct ucounts init_ucounts = {
+	.ns    = &init_user_ns,
+	.uid   = GLOBAL_ROOT_UID,
+	.count = 1,
+};
+
 #define UCOUNTS_HASHTABLE_BITS 10
 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
 static DEFINE_SPINLOCK(ucounts_lock);
@@ -125,7 +131,15 @@ static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struc
 	return NULL;
 }
 
-static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
+static void hlist_add_ucounts(struct ucounts *ucounts)
+{
+	struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
+	spin_lock_irq(&ucounts_lock);
+	hlist_add_head(&ucounts->node, hashent);
+	spin_unlock_irq(&ucounts_lock);
+}
+
+struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 {
 	struct hlist_head *hashent = ucounts_hashentry(ns, uid);
 	struct ucounts *ucounts, *new;
@@ -160,7 +174,26 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
 	return ucounts;
 }
 
-static void put_ucounts(struct ucounts *ucounts)
+struct ucounts *get_ucounts(struct ucounts *ucounts)
+{
+	unsigned long flags;
+
+	if (!ucounts)
+		return NULL;
+
+	spin_lock_irqsave(&ucounts_lock, flags);
+	if (ucounts->count == INT_MAX) {
+		WARN_ONCE(1, "ucounts: counter has reached its maximum value");
+		ucounts = NULL;
+	} else {
+		ucounts->count += 1;
+	}
+	spin_unlock_irqrestore(&ucounts_lock, flags);
+
+	return ucounts;
+}
+
+void put_ucounts(struct ucounts *ucounts)
 {
 	unsigned long flags;
 
@@ -194,7 +227,7 @@ struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
 {
 	struct ucounts *ucounts, *iter, *bad;
 	struct user_namespace *tns;
-	ucounts = get_ucounts(ns, uid);
+	ucounts = alloc_ucounts(ns, uid);
 	for (iter = ucounts; iter; iter = tns->ucounts) {
 		long max;
 		tns = iter->ns;
@@ -237,6 +270,7 @@ static __init int user_namespace_sysctl_init(void)
 	BUG_ON(!user_header);
 	BUG_ON(!setup_userns_sysctls(&init_user_ns));
 #endif
+	hlist_add_ucounts(&init_ucounts);
 	return 0;
 }
 subsys_initcall(user_namespace_sysctl_init);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9a4b980d695b..f1b7b4b8ffa2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1340,6 +1340,9 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns)
 	put_user_ns(cred->user_ns);
 	set_cred_user_ns(cred, get_user_ns(user_ns));
 
+	if (set_cred_ucounts(cred) < 0)
+		return -EINVAL;
+
 	return 0;
 }
 

From b6c336528926ef73b0f70260f2636de2c3b94c14 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:10 +0200
Subject: [PATCH 0019/3292] Use atomic_t for ucounts reference counting

The current implementation of the ucounts reference counter requires the
use of spin_lock. We're going to use get_ucounts() in more performance
critical areas like a handling of RLIMIT_SIGPENDING.

Now we need to use spin_lock only if we want to change the hashtable.

v10:
* Always try to put ucounts in case we cannot increase ucounts->count.
  This will allow to cover the case when all consumers will return
  ucounts at once.

v9:
* Use a negative value to check that the ucounts->count is close to
  overflow.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/94d1dbecab060a6b116b0a2d1accd8ca1bbb4f5f.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  4 +--
 kernel/ucount.c                | 53 ++++++++++++----------------------
 2 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7919b80d57ed..80b5bf12feae 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -95,7 +95,7 @@ struct ucounts {
 	struct hlist_node node;
 	struct user_namespace *ns;
 	kuid_t uid;
-	int count;
+	atomic_t count;
 	atomic_long_t ucount[UCOUNT_COUNTS];
 };
 
@@ -107,7 +107,7 @@ void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
-struct ucounts *get_ucounts(struct ucounts *ucounts);
+struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
 void put_ucounts(struct ucounts *ucounts);
 
 #ifdef CONFIG_USER_NS
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 50cc1dfb7d28..365865f368ec 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -11,7 +11,7 @@
 struct ucounts init_ucounts = {
 	.ns    = &init_user_ns,
 	.uid   = GLOBAL_ROOT_UID,
-	.count = 1,
+	.count = ATOMIC_INIT(1),
 };
 
 #define UCOUNTS_HASHTABLE_BITS 10
@@ -139,6 +139,15 @@ static void hlist_add_ucounts(struct ucounts *ucounts)
 	spin_unlock_irq(&ucounts_lock);
 }
 
+struct ucounts *get_ucounts(struct ucounts *ucounts)
+{
+	if (ucounts && atomic_add_negative(1, &ucounts->count)) {
+		put_ucounts(ucounts);
+		ucounts = NULL;
+	}
+	return ucounts;
+}
+
 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 {
 	struct hlist_head *hashent = ucounts_hashentry(ns, uid);
@@ -155,7 +164,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 
 		new->ns = ns;
 		new->uid = uid;
-		new->count = 0;
+		atomic_set(&new->count, 1);
 
 		spin_lock_irq(&ucounts_lock);
 		ucounts = find_ucounts(ns, uid, hashent);
@@ -163,33 +172,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 			kfree(new);
 		} else {
 			hlist_add_head(&new->node, hashent);
-			ucounts = new;
+			spin_unlock_irq(&ucounts_lock);
+			return new;
 		}
 	}
-	if (ucounts->count == INT_MAX)
-		ucounts = NULL;
-	else
-		ucounts->count += 1;
 	spin_unlock_irq(&ucounts_lock);
-	return ucounts;
-}
-
-struct ucounts *get_ucounts(struct ucounts *ucounts)
-{
-	unsigned long flags;
-
-	if (!ucounts)
-		return NULL;
-
-	spin_lock_irqsave(&ucounts_lock, flags);
-	if (ucounts->count == INT_MAX) {
-		WARN_ONCE(1, "ucounts: counter has reached its maximum value");
-		ucounts = NULL;
-	} else {
-		ucounts->count += 1;
-	}
-	spin_unlock_irqrestore(&ucounts_lock, flags);
-
+	ucounts = get_ucounts(ucounts);
 	return ucounts;
 }
 
@@ -197,15 +185,12 @@ void put_ucounts(struct ucounts *ucounts)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ucounts_lock, flags);
-	ucounts->count -= 1;
-	if (!ucounts->count)
+	if (atomic_dec_and_test(&ucounts->count)) {
+		spin_lock_irqsave(&ucounts_lock, flags);
 		hlist_del_init(&ucounts->node);
-	else
-		ucounts = NULL;
-	spin_unlock_irqrestore(&ucounts_lock, flags);
-
-	kfree(ucounts);
+		spin_unlock_irqrestore(&ucounts_lock, flags);
+		kfree(ucounts);
+	}
 }
 
 static inline bool atomic_long_inc_below(atomic_long_t *v, int u)

From 21d1c5e386bc751f1953b371d72cd5b7d9c9e270 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:11 +0200
Subject: [PATCH 0020/3292] Reimplement RLIMIT_NPROC on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

To illustrate the impact of rlimits, let's say there is a program that
does not fork. Some service-A wants to run this program as user X in
multiple containers. Since the program never fork the service wants to
set RLIMIT_NPROC=1.

service-A
 \- program (uid=1000, container1, rlimit_nproc=1)
 \- program (uid=1000, container2, rlimit_nproc=1)

The service-A sets RLIMIT_NPROC=1 and runs the program in container1.
When the service-A tries to run a program with RLIMIT_NPROC=1 in
container2 it fails since user X already has one running process.

We cannot use existing inc_ucounts / dec_ucounts because they do not
allow us to exceed the maximum for the counter. Some rlimits can be
overlimited by root or if the user has the appropriate capability.

Changelog

v11:
* Change inc_rlimit_ucounts() which now returns top value of ucounts.
* Drop inc_rlimit_ucounts_and_test() because the return code of
  inc_rlimit_ucounts() can be checked.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/c5286a8aa16d2d698c222f7532f3d735c82bc6bc.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/exec.c                      |  2 +-
 include/linux/cred.h           |  2 ++
 include/linux/sched/user.h     |  1 -
 include/linux/user_namespace.h | 12 ++++++++++
 kernel/cred.c                  | 10 ++++----
 kernel/exit.c                  |  2 +-
 kernel/fork.c                  |  9 +++----
 kernel/sys.c                   |  2 +-
 kernel/ucount.c                | 44 ++++++++++++++++++++++++++++++++++
 kernel/user.c                  |  1 -
 kernel/user_namespace.c        |  3 ++-
 11 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index d7c4187ca023..f2bcdbeb3afb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1878,7 +1878,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 	 * whether NPROC limit is still exceeded.
 	 */
 	if ((current->flags & PF_NPROC_EXCEEDED) &&
-	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+	    is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
 		retval = -EAGAIN;
 		goto out_ret;
 	}
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 66436e655032..5ca1e8a1d035 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -372,6 +372,7 @@ static inline void put_cred(const struct cred *_cred)
 
 #define task_uid(task)		(task_cred_xxx((task), uid))
 #define task_euid(task)		(task_cred_xxx((task), euid))
+#define task_ucounts(task)	(task_cred_xxx((task), ucounts))
 
 #define current_cred_xxx(xxx)			\
 ({						\
@@ -388,6 +389,7 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
+#define current_ucounts()	(current_cred_xxx(ucounts))
 
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index a8ec3b6093fc..d33d867ad6c1 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 80b5bf12feae..4a97acc35990 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -50,9 +50,12 @@ enum ucount_type {
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
 #endif
+	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_COUNTS,
 };
 
+#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -110,6 +113,15 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
 struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
 void put_ucounts(struct ucounts *ucounts);
 
+static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type)
+{
+	return atomic_long_read(&ucounts->ucount[type]);
+}
+
+long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/cred.c b/kernel/cred.c
index 58a8a9e24347..dcfa30b337c5 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -360,7 +360,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		kdebug("share_creds(%p{%d,%d})",
 		       p->cred, atomic_read(&p->cred->usage),
 		       read_cred_subscribers(p->cred));
-		atomic_inc(&p->cred->user->processes);
+		inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 		return 0;
 	}
 
@@ -395,8 +395,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	}
 #endif
 
-	atomic_inc(&new->user->processes);
 	p->cred = p->real_cred = get_cred(new);
+	inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 	alter_cred_subscribers(new, 2);
 	validate_creds(new);
 	return 0;
@@ -496,12 +496,12 @@ int commit_creds(struct cred *new)
 	 * in set_user().
 	 */
 	alter_cred_subscribers(new, 2);
-	if (new->user != old->user)
-		atomic_inc(&new->user->processes);
+	if (new->user != old->user || new->user_ns != old->user_ns)
+		inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user)
-		atomic_dec(&old->user->processes);
+		dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 	alter_cred_subscribers(old, -2);
 
 	/* send notifications */
diff --git a/kernel/exit.c b/kernel/exit.c
index 04029e35e69a..61c0fe902b50 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -188,7 +188,7 @@ repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
-	atomic_dec(&__task_cred(p)->user->processes);
+	dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 	rcu_read_unlock();
 
 	cgroup_release(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 321a5e31d817..ed7dfb07178d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -819,9 +819,11 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
 
-	for (i = 0; i < UCOUNT_COUNTS; i++)
+	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++)
 		init_user_ns.ucount_max[i] = max_threads/2;
 
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
+
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
 			  NULL, free_vm_stack_cache);
@@ -1978,8 +1980,7 @@ static __latent_entropy struct task_struct *copy_process(
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
 	retval = -EAGAIN;
-	if (atomic_read(&p->real_cred->user->processes) >=
-			task_rlimit(p, RLIMIT_NPROC)) {
+	if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
 		if (p->real_cred->user != INIT_USER &&
 		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
 			goto bad_fork_free;
@@ -2382,7 +2383,7 @@ bad_fork_cleanup_threadgroup_lock:
 #endif
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
-	atomic_dec(&p->cred->user->processes);
+	dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 	exit_creds(p);
 bad_fork_free:
 	p->state = TASK_DEAD;
diff --git a/kernel/sys.c b/kernel/sys.c
index cabfc5b86175..00266a65a000 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -473,7 +473,7 @@ static int set_user(struct cred *new)
 	 * for programs doing set*uid()+execve() by harmlessly deferring the
 	 * failure to the execve() stage.
 	 */
-	if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
+	if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
 			new_user != INIT_USER)
 		current->flags |= PF_NPROC_EXCEEDED;
 	else
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 365865f368ec..6caa56f7dec8 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -80,6 +80,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -240,6 +241,48 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 	put_ucounts(ucounts);
 }
 
+long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
+{
+	struct ucounts *iter;
+	long ret = 0;
+
+	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+		long max = READ_ONCE(iter->ns->ucount_max[type]);
+		long new = atomic_long_add_return(v, &iter->ucount[type]);
+		if (new < 0 || new > max)
+			ret = LONG_MAX;
+		else if (iter == ucounts)
+			ret = new;
+	}
+	return ret;
+}
+
+bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
+{
+	struct ucounts *iter;
+	long new;
+	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+		long dec = atomic_long_add_return(-v, &iter->ucount[type]);
+		WARN_ON_ONCE(dec < 0);
+		if (iter == ucounts)
+			new = dec;
+	}
+	return (new == 0);
+}
+
+bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
+{
+	struct ucounts *iter;
+	if (get_ucounts_value(ucounts, type) > max)
+		return true;
+	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+		max = READ_ONCE(iter->ns->ucount_max[type]);
+		if (get_ucounts_value(iter, type) > max)
+			return true;
+	}
+	return false;
+}
+
 static __init int user_namespace_sysctl_init(void)
 {
 #ifdef CONFIG_SYSCTL
@@ -256,6 +299,7 @@ static __init int user_namespace_sysctl_init(void)
 	BUG_ON(!setup_userns_sysctls(&init_user_ns));
 #endif
 	hlist_add_ucounts(&init_ucounts);
+	inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
 	return 0;
 }
 subsys_initcall(user_namespace_sysctl_init);
diff --git a/kernel/user.c b/kernel/user.c
index a2478cddf536..7f5ff498207a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.processes	= ATOMIC_INIT(1),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index f1b7b4b8ffa2..e6577c835072 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -119,9 +119,10 @@ int create_user_ns(struct cred *new)
 	ns->owner = owner;
 	ns->group = group;
 	INIT_WORK(&ns->work, free_user_ns);
-	for (i = 0; i < UCOUNT_COUNTS; i++) {
+	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
 		ns->ucount_max[i] = INT_MAX;
 	}
+	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */

From 6e52a9f0532f912af37bab4caf18b57d1b9845f4 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:12 +0200
Subject: [PATCH 0021/3292] Reimplement RLIMIT_MSGQUEUE on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/2531f42f7884bbfee56a978040b3e0d25cdf6cde.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sched/user.h     |  4 ----
 include/linux/user_namespace.h |  1 +
 ipc/mqueue.c                   | 42 ++++++++++++++++++----------------
 kernel/fork.c                  |  1 +
 kernel/ucount.c                |  1 +
 kernel/user_namespace.c        |  1 +
 6 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index d33d867ad6c1..8a34446681aa 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -18,10 +18,6 @@ struct user_struct {
 #endif
 #ifdef CONFIG_EPOLL
 	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
-#endif
-#ifdef CONFIG_POSIX_MQUEUE
-	/* protected by mq_lock	*/
-	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
 #endif
 	unsigned long locked_shm; /* How many pages of mlocked shm ? */
 	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4a97acc35990..5eeb86b00e68 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -51,6 +51,7 @@ enum ucount_type {
 	UCOUNT_INOTIFY_WATCHES,
 #endif
 	UCOUNT_RLIMIT_NPROC,
+	UCOUNT_RLIMIT_MSGQUEUE,
 	UCOUNT_COUNTS,
 };
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 8031464ed4ae..461fcf8c873d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -144,7 +144,7 @@ struct mqueue_inode_info {
 	struct pid *notify_owner;
 	u32 notify_self_exec_id;
 	struct user_namespace *notify_user_ns;
-	struct user_struct *user;	/* user who created, for accounting */
+	struct ucounts *ucounts;	/* user who created, for accounting */
 	struct sock *notify_sock;
 	struct sk_buff *notify_cookie;
 
@@ -292,7 +292,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		struct ipc_namespace *ipc_ns, umode_t mode,
 		struct mq_attr *attr)
 {
-	struct user_struct *u = current_user();
 	struct inode *inode;
 	int ret = -ENOMEM;
 
@@ -321,7 +320,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		info->notify_owner = NULL;
 		info->notify_user_ns = NULL;
 		info->qsize = 0;
-		info->user = NULL;	/* set when all is ok */
+		info->ucounts = NULL;	/* set when all is ok */
 		info->msg_tree = RB_ROOT;
 		info->msg_tree_rightmost = NULL;
 		info->node_cache = NULL;
@@ -371,19 +370,23 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		if (mq_bytes + mq_treesize < mq_bytes)
 			goto out_inode;
 		mq_bytes += mq_treesize;
-		spin_lock(&mq_lock);
-		if (u->mq_bytes + mq_bytes < u->mq_bytes ||
-		    u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
-			spin_unlock(&mq_lock);
-			/* mqueue_evict_inode() releases info->messages */
-			ret = -EMFILE;
-			goto out_inode;
-		}
-		u->mq_bytes += mq_bytes;
-		spin_unlock(&mq_lock);
+		info->ucounts = get_ucounts(current_ucounts());
+		if (info->ucounts) {
+			long msgqueue;
 
-		/* all is ok */
-		info->user = get_uid(u);
+			spin_lock(&mq_lock);
+			msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
+			if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
+				dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
+				spin_unlock(&mq_lock);
+				put_ucounts(info->ucounts);
+				info->ucounts = NULL;
+				/* mqueue_evict_inode() releases info->messages */
+				ret = -EMFILE;
+				goto out_inode;
+			}
+			spin_unlock(&mq_lock);
+		}
 	} else if (S_ISDIR(mode)) {
 		inc_nlink(inode);
 		/* Some things misbehave if size == 0 on a directory */
@@ -497,7 +500,6 @@ static void mqueue_free_inode(struct inode *inode)
 static void mqueue_evict_inode(struct inode *inode)
 {
 	struct mqueue_inode_info *info;
-	struct user_struct *user;
 	struct ipc_namespace *ipc_ns;
 	struct msg_msg *msg, *nmsg;
 	LIST_HEAD(tmp_msg);
@@ -520,8 +522,7 @@ static void mqueue_evict_inode(struct inode *inode)
 		free_msg(msg);
 	}
 
-	user = info->user;
-	if (user) {
+	if (info->ucounts) {
 		unsigned long mq_bytes, mq_treesize;
 
 		/* Total amount of bytes accounted for the mqueue */
@@ -533,7 +534,7 @@ static void mqueue_evict_inode(struct inode *inode)
 					  info->attr.mq_msgsize);
 
 		spin_lock(&mq_lock);
-		user->mq_bytes -= mq_bytes;
+		dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
 		/*
 		 * get_ns_from_inode() ensures that the
 		 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
@@ -543,7 +544,8 @@ static void mqueue_evict_inode(struct inode *inode)
 		if (ipc_ns)
 			ipc_ns->mq_queues_count--;
 		spin_unlock(&mq_lock);
-		free_uid(user);
+		put_ucounts(info->ucounts);
+		info->ucounts = NULL;
 	}
 	if (ipc_ns)
 		put_ipc_ns(ipc_ns);
diff --git a/kernel/fork.c b/kernel/fork.c
index ed7dfb07178d..a9c5097dfc86 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -823,6 +823,7 @@ void __init fork_init(void)
 		init_user_ns.ucount_max[i] = max_threads/2;
 
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 6caa56f7dec8..6e6f936a5963 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -80,6 +80,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ },
 	{ }
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e6577c835072..7eccc4f84549 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -123,6 +123,7 @@ int create_user_ns(struct cred *new)
 		ns->ucount_max[i] = INT_MAX;
 	}
 	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
+	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */

From d64696905554e919321e31afc210606653b8f6a4 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:13 +0200
Subject: [PATCH 0022/3292] Reimplement RLIMIT_SIGPENDING on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Changelog

v11:
* Revert most of changes to fix performance issues.

v10:
* Fix memory leak on get_ucounts failure.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/df9d7764dddd50f28616b7840de74ec0f81711a8.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/array.c                |  2 +-
 include/linux/sched/user.h     |  1 -
 include/linux/signal_types.h   |  4 +++-
 include/linux/user_namespace.h |  1 +
 kernel/fork.c                  |  1 +
 kernel/signal.c                | 25 +++++++++++++------------
 kernel/ucount.c                |  1 +
 kernel/user.c                  |  1 -
 kernel/user_namespace.c        |  1 +
 9 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index bb87e4d89cd8..74b0ea4b7e38 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -284,7 +284,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = get_nr_threads(p);
 		rcu_read_lock();  /* FIXME: is this correct? */
-		qsize = atomic_read(&__task_cred(p)->user->sigpending);
+		qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING);
 		rcu_read_unlock();
 		qlim = task_rlimit(p, RLIMIT_SIGPENDING);
 		unlock_task_sighand(p, &flags);
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 8a34446681aa..8ba9cec4fb99 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
 #endif
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 68e06c75c5b2..34cb28b8f16c 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -13,6 +13,8 @@ typedef struct kernel_siginfo {
 	__SIGINFO;
 } kernel_siginfo_t;
 
+struct ucounts;
+
 /*
  * Real Time signals may be queued.
  */
@@ -21,7 +23,7 @@ struct sigqueue {
 	struct list_head list;
 	int flags;
 	kernel_siginfo_t info;
-	struct user_struct *user;
+	struct ucounts *ucounts;
 };
 
 /* flags values. */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 5eeb86b00e68..58f417986472 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -52,6 +52,7 @@ enum ucount_type {
 #endif
 	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_RLIMIT_MSGQUEUE,
+	UCOUNT_RLIMIT_SIGPENDING,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index a9c5097dfc86..03119926b27d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -824,6 +824,7 @@ void __init fork_init(void)
 
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/signal.c b/kernel/signal.c
index f2718350bf4b..9a6dab712123 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -413,8 +413,8 @@ static struct sigqueue *
 __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
 {
 	struct sigqueue *q = NULL;
-	struct user_struct *user;
-	int sigpending;
+	struct ucounts *ucounts = NULL;
+	long sigpending;
 
 	/*
 	 * Protect access to @t credentials. This can go away when all
@@ -425,27 +425,26 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
 	 * changes from/to zero.
 	 */
 	rcu_read_lock();
-	user = __task_cred(t)->user;
-	sigpending = atomic_inc_return(&user->sigpending);
+	ucounts = task_ucounts(t);
+	sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
 	if (sigpending == 1)
-		get_uid(user);
+		ucounts = get_ucounts(ucounts);
 	rcu_read_unlock();
 
-	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
+	if (override_rlimit || (sigpending < LONG_MAX && sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
 	} else {
 		print_dropped_signal(sig);
 	}
 
 	if (unlikely(q == NULL)) {
-		if (atomic_dec_and_test(&user->sigpending))
-			free_uid(user);
+		if (ucounts && dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
+			put_ucounts(ucounts);
 	} else {
 		INIT_LIST_HEAD(&q->list);
 		q->flags = 0;
-		q->user = user;
+		q->ucounts = ucounts;
 	}
-
 	return q;
 }
 
@@ -453,8 +452,10 @@ static void __sigqueue_free(struct sigqueue *q)
 {
 	if (q->flags & SIGQUEUE_PREALLOC)
 		return;
-	if (atomic_dec_and_test(&q->user->sigpending))
-		free_uid(q->user);
+	if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
+		put_ucounts(q->ucounts);
+		q->ucounts = NULL;
+	}
 	kmem_cache_free(sigqueue_cachep, q);
 }
 
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 6e6f936a5963..8ce62da6a62c 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -80,6 +80,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ },
 	{ },
 	{ }
diff --git a/kernel/user.c b/kernel/user.c
index 7f5ff498207a..6737327f83be 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
 	.ratelimit	= RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 7eccc4f84549..822eacee4588 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -124,6 +124,7 @@ int create_user_ns(struct cred *new)
 	}
 	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
 	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
+	ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */

From d7c9e99aee48e6bc0b427f3e3c658a6aba15001e Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:14 +0200
Subject: [PATCH 0023/3292] Reimplement RLIMIT_MEMLOCK on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Changelog

v11:
* Fix issue found by lkp robot.

v8:
* Fix issues found by lkp-tests project.

v7:
* Keep only ucounts for RLIMIT_MEMLOCK checks instead of struct cred.

v6:
* Fix bug in hugetlb_file_setup() detected by trinity.

Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/970d50c70c71bfd4496e0e8d2a0a32feebebb350.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/hugetlbfs/inode.c           | 16 ++++++++--------
 include/linux/hugetlb.h        |  4 ++--
 include/linux/mm.h             |  4 ++--
 include/linux/sched/user.h     |  1 -
 include/linux/shmem_fs.h       |  2 +-
 include/linux/user_namespace.h |  1 +
 ipc/shm.c                      | 26 +++++++++++++-------------
 kernel/fork.c                  |  1 +
 kernel/ucount.c                |  1 +
 kernel/user.c                  |  1 -
 kernel/user_namespace.c        |  1 +
 mm/memfd.c                     |  4 ++--
 mm/mlock.c                     | 22 ++++++++++++++--------
 mm/mmap.c                      |  4 ++--
 mm/shmem.c                     | 10 +++++-----
 15 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 701c82c36138..be519fc9559a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1443,7 +1443,7 @@ static int get_hstate_idx(int page_size_log)
  * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
  */
 struct file *hugetlb_file_setup(const char *name, size_t size,
-				vm_flags_t acctflag, struct user_struct **user,
+				vm_flags_t acctflag, struct ucounts **ucounts,
 				int creat_flags, int page_size_log)
 {
 	struct inode *inode;
@@ -1455,20 +1455,20 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	if (hstate_idx < 0)
 		return ERR_PTR(-ENODEV);
 
-	*user = NULL;
+	*ucounts = NULL;
 	mnt = hugetlbfs_vfsmount[hstate_idx];
 	if (!mnt)
 		return ERR_PTR(-ENOENT);
 
 	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
-		*user = current_user();
-		if (user_shm_lock(size, *user)) {
+		*ucounts = current_ucounts();
+		if (user_shm_lock(size, *ucounts)) {
 			task_lock(current);
 			pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
 				current->comm, current->pid);
 			task_unlock(current);
 		} else {
-			*user = NULL;
+			*ucounts = NULL;
 			return ERR_PTR(-EPERM);
 		}
 	}
@@ -1495,9 +1495,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 
 	iput(inode);
 out:
-	if (*user) {
-		user_shm_unlock(size, *user);
-		*user = NULL;
+	if (*ucounts) {
+		user_shm_unlock(size, *ucounts);
+		*ucounts = NULL;
 	}
 	return file;
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cccd1aab69dd..96d63dbdec65 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -434,7 +434,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
 extern const struct file_operations hugetlbfs_file_operations;
 extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
-				struct user_struct **user, int creat_flags,
+				struct ucounts **ucounts, int creat_flags,
 				int page_size_log);
 
 static inline bool is_file_hugepages(struct file *file)
@@ -454,7 +454,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
 #define is_file_hugepages(file)			false
 static inline struct file *
 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
-		struct user_struct **user, int creat_flags,
+		struct ucounts **ucounts, int creat_flags,
 		int page_size_log)
 {
 	return ERR_PTR(-ENOSYS);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8ba434287387..3b4e24738ce4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1670,8 +1670,8 @@ extern bool can_do_mlock(void);
 #else
 static inline bool can_do_mlock(void) { return false; }
 #endif
-extern int user_shm_lock(size_t, struct user_struct *);
-extern void user_shm_unlock(size_t, struct user_struct *);
+extern int user_shm_lock(size_t, struct ucounts *);
+extern void user_shm_unlock(size_t, struct ucounts *);
 
 /*
  * Parameter block passed down to zap_pte_range in exceptional cases.
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 8ba9cec4fb99..82bd2532da6b 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -18,7 +18,6 @@ struct user_struct {
 #ifdef CONFIG_EPOLL
 	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
 #endif
-	unsigned long locked_shm; /* How many pages of mlocked shm ? */
 	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
 	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
 
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index d82b6f396588..aa77dcd1646f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -65,7 +65,7 @@ extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
-extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
 #ifdef CONFIG_SHMEM
 extern const struct address_space_operations shmem_aops;
 static inline bool shmem_mapping(struct address_space *mapping)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 58f417986472..2a3177b9b8bf 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -53,6 +53,7 @@ enum ucount_type {
 	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_RLIMIT_MSGQUEUE,
 	UCOUNT_RLIMIT_SIGPENDING,
+	UCOUNT_RLIMIT_MEMLOCK,
 	UCOUNT_COUNTS,
 };
 
diff --git a/ipc/shm.c b/ipc/shm.c
index febd88daba8c..003234fbbd17 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -60,7 +60,7 @@ struct shmid_kernel /* private to the kernel */
 	time64_t		shm_ctim;
 	struct pid		*shm_cprid;
 	struct pid		*shm_lprid;
-	struct user_struct	*mlock_user;
+	struct ucounts		*mlock_ucounts;
 
 	/* The task created the shm object.  NULL if the task is dead. */
 	struct task_struct	*shm_creator;
@@ -286,10 +286,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	shm_rmid(ns, shp);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shm_file))
-		shmem_lock(shm_file, 0, shp->mlock_user);
-	else if (shp->mlock_user)
+		shmem_lock(shm_file, 0, shp->mlock_ucounts);
+	else if (shp->mlock_ucounts)
 		user_shm_unlock(i_size_read(file_inode(shm_file)),
-				shp->mlock_user);
+				shp->mlock_ucounts);
 	fput(shm_file);
 	ipc_update_pid(&shp->shm_cprid, NULL);
 	ipc_update_pid(&shp->shm_lprid, NULL);
@@ -625,7 +625,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 
 	shp->shm_perm.key = key;
 	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
-	shp->mlock_user = NULL;
+	shp->mlock_ucounts = NULL;
 
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(&shp->shm_perm);
@@ -650,7 +650,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
 		file = hugetlb_file_setup(name, hugesize, acctflag,
-				  &shp->mlock_user, HUGETLB_SHMFS_INODE,
+				  &shp->mlock_ucounts, HUGETLB_SHMFS_INODE,
 				(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
 	} else {
 		/*
@@ -698,8 +698,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 no_id:
 	ipc_update_pid(&shp->shm_cprid, NULL);
 	ipc_update_pid(&shp->shm_lprid, NULL);
-	if (is_file_hugepages(file) && shp->mlock_user)
-		user_shm_unlock(size, shp->mlock_user);
+	if (is_file_hugepages(file) && shp->mlock_ucounts)
+		user_shm_unlock(size, shp->mlock_ucounts);
 	fput(file);
 	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 	return error;
@@ -1105,12 +1105,12 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
 		goto out_unlock0;
 
 	if (cmd == SHM_LOCK) {
-		struct user_struct *user = current_user();
+		struct ucounts *ucounts = current_ucounts();
 
-		err = shmem_lock(shm_file, 1, user);
+		err = shmem_lock(shm_file, 1, ucounts);
 		if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
 			shp->shm_perm.mode |= SHM_LOCKED;
-			shp->mlock_user = user;
+			shp->mlock_ucounts = ucounts;
 		}
 		goto out_unlock0;
 	}
@@ -1118,9 +1118,9 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
 	/* SHM_UNLOCK */
 	if (!(shp->shm_perm.mode & SHM_LOCKED))
 		goto out_unlock0;
-	shmem_lock(shm_file, 0, shp->mlock_user);
+	shmem_lock(shm_file, 0, shp->mlock_ucounts);
 	shp->shm_perm.mode &= ~SHM_LOCKED;
-	shp->mlock_user = NULL;
+	shp->mlock_ucounts = NULL;
 	get_file(shm_file);
 	ipc_unlock_object(&shp->shm_perm);
 	rcu_read_unlock();
diff --git a/kernel/fork.c b/kernel/fork.c
index 03119926b27d..610fd4de60d7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -825,6 +825,7 @@ void __init fork_init(void)
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_MEMLOCK] = task_rlimit(&init_task, RLIMIT_MEMLOCK);
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 8ce62da6a62c..d316bac3e520 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -83,6 +83,7 @@ static struct ctl_table user_table[] = {
 	{ },
 	{ },
 	{ },
+	{ },
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
diff --git a/kernel/user.c b/kernel/user.c
index 6737327f83be..c82399c1618a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
 	.ratelimit	= RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 822eacee4588..892da1360862 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -125,6 +125,7 @@ int create_user_ns(struct cred *new)
 	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
 	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
 	ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
+	ns->ucount_max[UCOUNT_RLIMIT_MEMLOCK] = rlimit(RLIMIT_MEMLOCK);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
diff --git a/mm/memfd.c b/mm/memfd.c
index 2647c898990c..081dd33e6a61 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -297,9 +297,9 @@ SYSCALL_DEFINE2(memfd_create,
 	}
 
 	if (flags & MFD_HUGETLB) {
-		struct user_struct *user = NULL;
+		struct ucounts *ucounts = NULL;
 
-		file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
+		file = hugetlb_file_setup(name, 0, VM_NORESERVE, &ucounts,
 					HUGETLB_ANONHUGE_INODE,
 					(flags >> MFD_HUGE_SHIFT) &
 					MFD_HUGE_MASK);
diff --git a/mm/mlock.c b/mm/mlock.c
index f8f8cc32d03d..dd411aabf695 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -817,9 +817,10 @@ SYSCALL_DEFINE0(munlockall)
  */
 static DEFINE_SPINLOCK(shmlock_user_lock);
 
-int user_shm_lock(size_t size, struct user_struct *user)
+int user_shm_lock(size_t size, struct ucounts *ucounts)
 {
 	unsigned long lock_limit, locked;
+	long memlock;
 	int allowed = 0;
 
 	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -828,21 +829,26 @@ int user_shm_lock(size_t size, struct user_struct *user)
 		allowed = 1;
 	lock_limit >>= PAGE_SHIFT;
 	spin_lock(&shmlock_user_lock);
-	if (!allowed &&
-	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
+	memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
+
+	if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
 		goto out;
-	get_uid(user);
-	user->locked_shm += locked;
+	}
+	if (!get_ucounts(ucounts)) {
+		dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
+		goto out;
+	}
 	allowed = 1;
 out:
 	spin_unlock(&shmlock_user_lock);
 	return allowed;
 }
 
-void user_shm_unlock(size_t size, struct user_struct *user)
+void user_shm_unlock(size_t size, struct ucounts *ucounts)
 {
 	spin_lock(&shmlock_user_lock);
-	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
 	spin_unlock(&shmlock_user_lock);
-	free_uid(user);
+	put_ucounts(ucounts);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f287599a7a3..99f97d200aa4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1605,7 +1605,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			goto out_fput;
 		}
 	} else if (flags & MAP_HUGETLB) {
-		struct user_struct *user = NULL;
+		struct ucounts *ucounts = NULL;
 		struct hstate *hs;
 
 		hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
@@ -1621,7 +1621,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 		 */
 		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
 				VM_NORESERVE,
-				&user, HUGETLB_ANONHUGE_INODE,
+				&ucounts, HUGETLB_ANONHUGE_INODE,
 				(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
 		if (IS_ERR(file))
 			return PTR_ERR(file);
diff --git a/mm/shmem.c b/mm/shmem.c
index b2db4ed0fbc7..7ee6d27222e9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2227,7 +2227,7 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 }
 #endif
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
 {
 	struct inode *inode = file_inode(file);
 	struct shmem_inode_info *info = SHMEM_I(inode);
@@ -2239,13 +2239,13 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	 * no serialization needed when called from shm_destroy().
 	 */
 	if (lock && !(info->flags & VM_LOCKED)) {
-		if (!user_shm_lock(inode->i_size, user))
+		if (!user_shm_lock(inode->i_size, ucounts))
 			goto out_nomem;
 		info->flags |= VM_LOCKED;
 		mapping_set_unevictable(file->f_mapping);
 	}
-	if (!lock && (info->flags & VM_LOCKED) && user) {
-		user_shm_unlock(inode->i_size, user);
+	if (!lock && (info->flags & VM_LOCKED) && ucounts) {
+		user_shm_unlock(inode->i_size, ucounts);
 		info->flags &= ~VM_LOCKED;
 		mapping_clear_unevictable(file->f_mapping);
 	}
@@ -4093,7 +4093,7 @@ int shmem_unuse(unsigned int type, bool frontswap,
 	return 0;
 }
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
 {
 	return 0;
 }

From e4aebf06695c32d49f1007f9d252f97b5b2998a7 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:15 +0200
Subject: [PATCH 0024/3292] kselftests: Add test to check for rlimit changes in
 different user namespaces

The testcase runs few instances of the program with RLIMIT_NPROC=1 from
user uid=60000, in different user namespaces.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/28cafdcdd4abd8494b34a27f1970b666b30de8bf.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/rlimits/.gitignore    |   2 +
 tools/testing/selftests/rlimits/Makefile      |   6 +
 tools/testing/selftests/rlimits/config        |   1 +
 .../selftests/rlimits/rlimits-per-userns.c    | 161 ++++++++++++++++++
 5 files changed, 171 insertions(+)
 create mode 100644 tools/testing/selftests/rlimits/.gitignore
 create mode 100644 tools/testing/selftests/rlimits/Makefile
 create mode 100644 tools/testing/selftests/rlimits/config
 create mode 100644 tools/testing/selftests/rlimits/rlimits-per-userns.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6c575cf34a71..a4ea1481bd9a 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -48,6 +48,7 @@ TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += openat2
+TARGETS += rlimits
 TARGETS += rseq
 TARGETS += rtc
 TARGETS += seccomp
diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore
new file mode 100644
index 000000000000..091021f255b3
--- /dev/null
+++ b/tools/testing/selftests/rlimits/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rlimits-per-userns
diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile
new file mode 100644
index 000000000000..03aadb406212
--- /dev/null
+++ b/tools/testing/selftests/rlimits/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g
+TEST_GEN_PROGS := rlimits-per-userns
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/rlimits/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
new file mode 100644
index 000000000000..26dc949e93ea
--- /dev/null
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Alexey Gladkov <gladkov.alexey@gmail.com>
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <err.h>
+
+#define NR_CHILDS 2
+
+static char *service_prog;
+static uid_t user   = 60000;
+static uid_t group  = 60000;
+
+static void setrlimit_nproc(rlim_t n)
+{
+	pid_t pid = getpid();
+	struct rlimit limit = {
+		.rlim_cur = n,
+		.rlim_max = n
+	};
+
+	warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
+
+	if (setrlimit(RLIMIT_NPROC, &limit) < 0)
+		err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
+}
+
+static pid_t fork_child(void)
+{
+	pid_t pid = fork();
+
+	if (pid < 0)
+		err(EXIT_FAILURE, "fork");
+
+	if (pid > 0)
+		return pid;
+
+	pid = getpid();
+
+	warnx("(pid=%d): New process starting ...", pid);
+
+	if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+		err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid);
+
+	signal(SIGUSR1, SIG_DFL);
+
+	warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group);
+
+	if (setgid(group) < 0)
+		err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group);
+	if (setuid(user) < 0)
+		err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user);
+
+	warnx("(pid=%d): Service running ...", pid);
+
+	warnx("(pid=%d): Unshare user namespace", pid);
+	if (unshare(CLONE_NEWUSER) < 0)
+		err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+
+	char *const argv[] = { "service", NULL };
+	char *const envp[] = { "I_AM_SERVICE=1", NULL };
+
+	warnx("(pid=%d): Executing real service ...", pid);
+
+	execve(service_prog, argv, envp);
+	err(EXIT_FAILURE, "(pid=%d): execve", pid);
+}
+
+int main(int argc, char **argv)
+{
+	size_t i;
+	pid_t child[NR_CHILDS];
+	int wstatus[NR_CHILDS];
+	int childs = NR_CHILDS;
+	pid_t pid;
+
+	if (getenv("I_AM_SERVICE")) {
+		pause();
+		exit(EXIT_SUCCESS);
+	}
+
+	service_prog = argv[0];
+	pid = getpid();
+
+	warnx("(pid=%d) Starting testcase", pid);
+
+	/*
+	 * This rlimit is not a problem for root because it can be exceeded.
+	 */
+	setrlimit_nproc(1);
+
+	for (i = 0; i < NR_CHILDS; i++) {
+		child[i] = fork_child();
+		wstatus[i] = 0;
+		usleep(250000);
+	}
+
+	while (1) {
+		for (i = 0; i < NR_CHILDS; i++) {
+			if (child[i] <= 0)
+				continue;
+
+			errno = 0;
+			pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG);
+
+			if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i])))
+				continue;
+
+			if (ret < 0 && errno != ECHILD)
+				warn("(pid=%d): waitpid(%d)", pid, child[i]);
+
+			child[i] *= -1;
+			childs -= 1;
+		}
+
+		if (!childs)
+			break;
+
+		usleep(250000);
+
+		for (i = 0; i < NR_CHILDS; i++) {
+			if (child[i] <= 0)
+				continue;
+			kill(child[i], SIGUSR1);
+		}
+	}
+
+	for (i = 0; i < NR_CHILDS; i++) {
+		if (WIFEXITED(wstatus[i]))
+			warnx("(pid=%d): pid %d exited, status=%d",
+				pid, -child[i], WEXITSTATUS(wstatus[i]));
+		else if (WIFSIGNALED(wstatus[i]))
+			warnx("(pid=%d): pid %d killed by signal %d",
+				pid, -child[i], WTERMSIG(wstatus[i]));
+
+		if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1)
+			continue;
+
+		warnx("(pid=%d): Test failed", pid);
+		exit(EXIT_FAILURE);
+	}
+
+	warnx("(pid=%d): Test passed", pid);
+	exit(EXIT_SUCCESS);
+}

From c1ada3dc7219b02b3467aa906c2f5f8b098578d1 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:16 +0200
Subject: [PATCH 0025/3292] ucounts: Set ucount_max to the largest positive
 value the type can hold

The ns->ucount_max[] is signed long which is less than the rlimit size.
We have to protect ucount_max[] from overflow and only use the largest
value that we can hold.

On 32bit using "long" instead of "unsigned long" to hold the counts has
the downside that RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK are limited to 2GiB
instead of 4GiB. I don't think anyone cares but it should be mentioned
in case someone does.

The RLIMIT_NPROC and RLIMIT_SIGPENDING used atomic_t so their maximum
hasn't changed.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/1825a5dfa18bc5a570e79feb05e2bd07fd57e7e3.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 6 ++++++
 kernel/fork.c                  | 8 ++++----
 kernel/user_namespace.c        | 8 ++++----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 2a3177b9b8bf..61794ae32fa8 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -125,6 +125,12 @@ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
 
+static inline void set_rlimit_ucount_max(struct user_namespace *ns,
+		enum ucount_type type, unsigned long max)
+{
+	ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX;
+}
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/fork.c b/kernel/fork.c
index 610fd4de60d7..c41820481b2e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -822,10 +822,10 @@ void __init fork_init(void)
 	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++)
 		init_user_ns.ucount_max[i] = max_threads/2;
 
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_MEMLOCK] = task_rlimit(&init_task, RLIMIT_MEMLOCK);
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, task_rlimit(&init_task, RLIMIT_NPROC));
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, task_rlimit(&init_task, RLIMIT_MSGQUEUE));
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, task_rlimit(&init_task, RLIMIT_SIGPENDING));
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, task_rlimit(&init_task, RLIMIT_MEMLOCK));
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 892da1360862..d4a545bbab7f 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -122,10 +122,10 @@ int create_user_ns(struct cred *new)
 	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
 		ns->ucount_max[i] = INT_MAX;
 	}
-	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
-	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
-	ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
-	ns->ucount_max[UCOUNT_RLIMIT_MEMLOCK] = rlimit(RLIMIT_MEMLOCK);
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC));
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */

From f928ef685db5d9b82c1c1e24e229c167426c5a1f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 Apr 2021 13:00:26 -0500
Subject: [PATCH 0026/3292] ucounts: Silence warning in dec_rlimit_ucounts

Dan Carpenter <dan.carpenter@oracle.com> wrote:
>
> url:    https://github.com/0day-ci/linux/commits/legion-kernel-org/Count-rlimits-in-each-user-namespace/20210427-162857
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
> config: arc-randconfig-m031-20210426 (attached as .config)
> compiler: arceb-elf-gcc (GCC) 9.3.0
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
>
> smatch warnings:
> kernel/ucount.c:270 dec_rlimit_ucounts() error: uninitialized symbol 'new'.
>
> vim +/new +270 kernel/ucount.c
>
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  260  bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  261  {
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  262   struct ucounts *iter;
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  263   long new;
>                                                 ^^^^^^^^
>
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  264   for (iter = ucounts; iter; iter = iter->ns->ucounts) {
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  265    long dec = atomic_long_add_return(-v, &iter->ucount[type]);
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  266    WARN_ON_ONCE(dec < 0);
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  267    if (iter == ucounts)
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  268     new = dec;
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  269   }
> 176ec2b092cc22 Alexey Gladkov 2021-04-22 @270   return (new == 0);
>                                                         ^^^^^^^^
> I don't know if this is a bug or not, but I can definitely tell why the
> static checker complains about it.
>
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  271  }

In the only two cases that care about the return value of
dec_rlimit_ucounts the code first tests to see that ucounts is not
NULL.  In those cases it is guaranteed at least one iteration of the
loop will execute guaranteeing the variable new will be initialized.

Initialize new to -1 so that the return value is well defined even
when the loop does not execute and the static checker is silenced.

Link: https://lkml.kernel.org/r/m1tunny77w.fsf@fess.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/ucount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index d316bac3e520..df84a2a63926 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -263,7 +263,7 @@ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 {
 	struct ucounts *iter;
-	long new;
+	long new = -1; /* Silence compiler warning */
 	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 		long dec = atomic_long_add_return(-v, &iter->ucount[type]);
 		WARN_ON_ONCE(dec < 0);

From f2145f8dc566c4f3b5a8deb58dcd12bed4e20194 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 19 Mar 2021 17:27:16 -0700
Subject: [PATCH 0027/3292] HID: do not use down_interruptible() when unbinding
 devices

Action of unbinding driver from a device is not cancellable and should not
fail, and driver core does not pay attention to the result of "remove"
method, therefore using down_interruptible() in hid_device_remove() does
not make sense.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 0ae9f6df59d1..ff695ec520f8 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2303,12 +2303,8 @@ static int hid_device_remove(struct device *dev)
 {
 	struct hid_device *hdev = to_hid_device(dev);
 	struct hid_driver *hdrv;
-	int ret = 0;
 
-	if (down_interruptible(&hdev->driver_input_lock)) {
-		ret = -EINTR;
-		goto end;
-	}
+	down(&hdev->driver_input_lock);
 	hdev->io_started = false;
 
 	hdrv = hdev->driver;
@@ -2323,8 +2319,8 @@ static int hid_device_remove(struct device *dev)
 
 	if (!hdev->io_started)
 		up(&hdev->driver_input_lock);
-end:
-	return ret;
+
+	return 0;
 }
 
 static ssize_t modalias_show(struct device *dev, struct device_attribute *a,

From 7383354ad59573b42085df3b089ba925a7d1a15a Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 4 Apr 2021 20:56:09 +0200
Subject: [PATCH 0028/3292] HID: logitech-dj/hidpp: Add info/warn/err messages
 about 27 MHz keyboard encryption

By default 27 MHz Logitech keyboards send the keypresses to the receiver
in plain text. Logitech's Windows "Keyboard and Mouse settings" tool allows
configuring encryption for the keyboard wireless link.

Now there also is a Linux tool for setting this up:
https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup

Once a keyboard is paired in encrypted mode, it will no longer work with
any receiver other then the receiver it was paired with.
If a user accidentally tries to pair another keyboard with the receiver
with which a keyboard has been paired in encrypted mode, then the receiver
looses the encryption key and the originally paired keyboard will no longer
work with either that receiver, or another receiver until the keyboard's
encryption key is rewritten by another encrypted mode pairing using the
tool. Or until it is reset by following a special reset procedure.

This commit adds 3 new log messages related to 27 MHz keyboard link
encryption to the Logitech dj / hidpp drivers:

1. An info level message when keyboard encryption is being used
2. A warning message when keyboard encryption is not used
3. An error message when the encryption key has been lost and the
   keyboard will not work because of this

Messages 2. and 3. contain a link to the userspace tool to setup the
encryption. The linked page also contains instructions to clear the key
(without needing the tool) to help a user recover from the keyboard not
working in case 3.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-logitech-dj.c    |  8 ++++++++
 drivers/hid/hid-logitech-hidpp.c | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index fa835d565982..18d37b3765f3 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -100,6 +100,7 @@
 #define HIDPP_DEVICE_TYPE_MASK			GENMASK(3, 0)
 #define HIDPP_LINK_STATUS_MASK			BIT(6)
 #define HIDPP_MANUFACTURER_MASK			BIT(7)
+#define HIDPP_27MHZ_SECURE_MASK			BIT(7)
 
 #define HIDPP_DEVICE_TYPE_KEYBOARD		1
 #define HIDPP_DEVICE_TYPE_MOUSE			2
@@ -984,6 +985,13 @@ static void logi_hidpp_dev_conn_notif_27mhz(struct hid_device *hdev,
 		workitem->reports_supported |= STD_MOUSE | HIDPP;
 		break;
 	case 3: /* Index 3 is always the keyboard */
+		if (hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] & HIDPP_27MHZ_SECURE_MASK) {
+			hid_info(hdev, "Keyboard connection is encrypted\n");
+		} else {
+			hid_warn(hdev, "Keyboard events are send over the air in plain-text / unencrypted\n");
+			hid_warn(hdev, "See: https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup/\n");
+		}
+		fallthrough;
 	case 4: /* Index 4 is used for an optional separate numpad */
 		workitem->device_type = HIDPP_DEVICE_TYPE_KEYBOARD;
 		workitem->reports_supported |= STD_KEYBOARD | MULTIMEDIA |
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index d598094dadd0..69670ca7e1e1 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -56,6 +56,8 @@ MODULE_PARM_DESC(disable_tap_to_click,
 #define HIDPP_SUB_ID_CONSUMER_VENDOR_KEYS	0x03
 #define HIDPP_SUB_ID_ROLLER			0x05
 #define HIDPP_SUB_ID_MOUSE_EXTRA_BTNS		0x06
+#define HIDPP_SUB_ID_USER_IFACE_EVENT		0x08
+#define HIDPP_USER_IFACE_EVENT_ENCRYPTION_KEY_LOST	BIT(5)
 
 #define HIDPP_QUIRK_CLASS_WTP			BIT(0)
 #define HIDPP_QUIRK_CLASS_M560			BIT(1)
@@ -3528,6 +3530,16 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data,
 		return 1;
 	}
 
+	if (hidpp->hid_dev->group == HID_GROUP_LOGITECH_27MHZ_DEVICE &&
+	    data[0] == REPORT_ID_HIDPP_SHORT &&
+	    data[2] == HIDPP_SUB_ID_USER_IFACE_EVENT &&
+	    (data[3] & HIDPP_USER_IFACE_EVENT_ENCRYPTION_KEY_LOST)) {
+		dev_err_ratelimited(&hidpp->hid_dev->dev,
+			"Error the keyboard's wireless encryption key has been lost, your keyboard will not work unless you re-configure encryption.\n");
+		dev_err_ratelimited(&hidpp->hid_dev->dev,
+			"See: https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup/\n");
+	}
+
 	if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) {
 		ret = hidpp20_battery_event_1000(hidpp, data, size);
 		if (ret != 0)

From 8dcaa046bf96353c878ddadda506e2708cb4b8c3 Mon Sep 17 00:00:00 2001
From: Ikjoon Jang <ikjn@chromium.org>
Date: Thu, 15 Apr 2021 11:29:58 +0800
Subject: [PATCH 0029/3292] HID: google: Add of_match table to Whiskers switch
 device.

Add a device tree match table for "cros-cbas" switch device.

Signed-off-by: Ikjoon Jang <ikjn@chromium.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-google-hammer.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index e60c31dd05ff..8123b871a3eb 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -17,6 +17,7 @@
 #include <linux/hid.h>
 #include <linux/leds.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_data/cros_ec_commands.h>
 #include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
@@ -272,12 +273,21 @@ static const struct acpi_device_id cbas_ec_acpi_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, cbas_ec_acpi_ids);
 
+#ifdef CONFIG_OF
+static const struct of_device_id cbas_ec_of_match[] = {
+	{ .compatible = "google,cros-cbas" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, cbas_ec_of_match);
+#endif
+
 static struct platform_driver cbas_ec_driver = {
 	.probe = cbas_ec_probe,
 	.remove = cbas_ec_remove,
 	.driver = {
 		.name = "cbas_ec",
 		.acpi_match_table = ACPI_PTR(cbas_ec_acpi_ids),
+		.of_match_table = of_match_ptr(cbas_ec_of_match),
 		.pm = &cbas_ec_pm_ops,
 	},
 };

From 042d05b2e30e8a2bb378ecb668a85790dce39355 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Apr 2021 15:13:18 +0200
Subject: [PATCH 0030/3292] HID: lg-g15: Remove unused size argument from
 lg_*_event() functions

The report-size is already checked in lg_g15_raw_event() before calling
the lg_*_event() functions and these functions don't use the passed
in size at all, drop the unused parameter.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lg-g15.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/hid/hid-lg-g15.c b/drivers/hid/hid-lg-g15.c
index bfbba0d41933..b887af72957c 100644
--- a/drivers/hid/hid-lg-g15.c
+++ b/drivers/hid/hid-lg-g15.c
@@ -464,7 +464,7 @@ static int lg_g15_get_initial_led_brightness(struct lg_g15_data *g15)
 /******** Input functions ********/
 
 /* On the G15 Mark I Logitech has been quite creative with which bit is what */
-static int lg_g15_event(struct lg_g15_data *g15, u8 *data, int size)
+static int lg_g15_event(struct lg_g15_data *g15, u8 *data)
 {
 	int i, val;
 
@@ -510,7 +510,7 @@ static int lg_g15_event(struct lg_g15_data *g15, u8 *data, int size)
 	return 0;
 }
 
-static int lg_g15_v2_event(struct lg_g15_data *g15, u8 *data, int size)
+static int lg_g15_v2_event(struct lg_g15_data *g15, u8 *data)
 {
 	int i, val;
 
@@ -542,7 +542,7 @@ static int lg_g15_v2_event(struct lg_g15_data *g15, u8 *data, int size)
 	return 0;
 }
 
-static int lg_g510_event(struct lg_g15_data *g15, u8 *data, int size)
+static int lg_g510_event(struct lg_g15_data *g15, u8 *data)
 {
 	bool game_mode_enabled;
 	int i, val;
@@ -586,7 +586,7 @@ static int lg_g510_event(struct lg_g15_data *g15, u8 *data, int size)
 	return 0;
 }
 
-static int lg_g510_leds_event(struct lg_g15_data *g15, u8 *data, int size)
+static int lg_g510_leds_event(struct lg_g15_data *g15, u8 *data)
 {
 	bool backlight_disabled;
 
@@ -613,18 +613,18 @@ static int lg_g15_raw_event(struct hid_device *hdev, struct hid_report *report,
 	switch (g15->model) {
 	case LG_G15:
 		if (data[0] == 0x02 && size == 9)
-			return lg_g15_event(g15, data, size);
+			return lg_g15_event(g15, data);
 		break;
 	case LG_G15_V2:
 		if (data[0] == 0x02 && size == 5)
-			return lg_g15_v2_event(g15, data, size);
+			return lg_g15_v2_event(g15, data);
 		break;
 	case LG_G510:
 	case LG_G510_USB_AUDIO:
 		if (data[0] == 0x03 && size == 5)
-			return lg_g510_event(g15, data, size);
+			return lg_g510_event(g15, data);
 		if (data[0] == 0x04 && size == 2)
-			return lg_g510_leds_event(g15, data, size);
+			return lg_g510_leds_event(g15, data);
 		break;
 	}
 

From ba3e054e7a70d447aa95883dab87a4eae641d6bc Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Apr 2021 15:13:19 +0200
Subject: [PATCH 0031/3292] HID: lg-g15: Add a lg_g15_handle_lcd_menu_keys()
 helper function

Factor out the handling of the G15 LCD menu keys out of
lg_g15_event() into a new lg_g15_handle_lcd_menu_keys() helper function.

This is a preparation patch for adding support for the LCD menu
keys on the Logitech Z-10 speakers (with LCD) which use the same
funky HID report format.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lg-g15.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/hid/hid-lg-g15.c b/drivers/hid/hid-lg-g15.c
index b887af72957c..75a27b48d9c9 100644
--- a/drivers/hid/hid-lg-g15.c
+++ b/drivers/hid/hid-lg-g15.c
@@ -464,6 +464,19 @@ static int lg_g15_get_initial_led_brightness(struct lg_g15_data *g15)
 /******** Input functions ********/
 
 /* On the G15 Mark I Logitech has been quite creative with which bit is what */
+static void lg_g15_handle_lcd_menu_keys(struct lg_g15_data *g15, u8 *data)
+{
+	int i, val;
+
+	/* Most left (round/display) button below the LCD */
+	input_report_key(g15->input, KEY_KBD_LCD_MENU1, data[8] & 0x80);
+	/* 4 other buttons below the LCD */
+	for (i = 0; i < 4; i++) {
+		val = data[i + 2] & 0x80;
+		input_report_key(g15->input, KEY_KBD_LCD_MENU2 + i, val);
+	}
+}
+
 static int lg_g15_event(struct lg_g15_data *g15, u8 *data)
 {
 	int i, val;
@@ -494,13 +507,7 @@ static int lg_g15_event(struct lg_g15_data *g15, u8 *data)
 	/* MR */
 	input_report_key(g15->input, KEY_MACRO_RECORD_START, data[7] & 0x40);
 
-	/* Most left (round) button below the LCD */
-	input_report_key(g15->input, KEY_KBD_LCD_MENU1, data[8] & 0x80);
-	/* 4 other buttons below the LCD */
-	for (i = 0; i < 4; i++) {
-		val = data[i + 2] & 0x80;
-		input_report_key(g15->input, KEY_KBD_LCD_MENU2 + i, val);
-	}
+	lg_g15_handle_lcd_menu_keys(g15, data);
 
 	/* Backlight cycle button pressed? */
 	if (data[1] & 0x80)

From 614d34f8b3dba62ff0d13d0d45e3220c2960b17d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Apr 2021 15:13:20 +0200
Subject: [PATCH 0032/3292] HID: lg-g15: Add a lg_g15_init_input_dev() helper
 function

Factor the input-device setup + KEY_KBD_LCD_MENU capability setting out
of lg_g15_probe() into a new lg_g15_init_input_dev() helper function.

This is a preparation patch for adding support for the LCD menu
keys + LCD brightness control on the Logitech Z-10 speakers (with LCD)
which use the same protocol as the G15 keyboards.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lg-g15.c | 42 ++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/hid/hid-lg-g15.c b/drivers/hid/hid-lg-g15.c
index 75a27b48d9c9..eff8c51a8dce 100644
--- a/drivers/hid/hid-lg-g15.c
+++ b/drivers/hid/hid-lg-g15.c
@@ -709,6 +709,28 @@ static int lg_g15_register_led(struct lg_g15_data *g15, int i)
 	return devm_led_classdev_register(&g15->hdev->dev, &g15->leds[i].cdev);
 }
 
+/* Common input device init code shared between keyboards and Z-10 speaker handling */
+static void lg_g15_init_input_dev(struct hid_device *hdev, struct input_dev *input,
+				  const char *name)
+{
+	int i;
+
+	input->name = name;
+	input->phys = hdev->phys;
+	input->uniq = hdev->uniq;
+	input->id.bustype = hdev->bus;
+	input->id.vendor  = hdev->vendor;
+	input->id.product = hdev->product;
+	input->id.version = hdev->version;
+	input->dev.parent = &hdev->dev;
+	input->open = lg_g15_input_open;
+	input->close = lg_g15_input_close;
+
+	/* Keys below the LCD, intended for controlling a menu on the LCD */
+	for (i = 0; i < 5; i++)
+		input_set_capability(input, EV_KEY, KEY_KBD_LCD_MENU1 + i);
+}
+
 static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
 	u8 gkeys_settings_output_report = 0;
@@ -751,6 +773,8 @@ static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	g15->hdev = hdev;
 	g15->model = id->driver_data;
+	g15->input = input;
+	input_set_drvdata(input, hdev);
 	hid_set_drvdata(hdev, (void *)g15);
 
 	switch (g15->model) {
@@ -822,16 +846,7 @@ static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		goto error_hw_stop;
 
 	/* Setup and register input device */
-	input->name = "Logitech Gaming Keyboard Gaming Keys";
-	input->phys = hdev->phys;
-	input->uniq = hdev->uniq;
-	input->id.bustype = hdev->bus;
-	input->id.vendor  = hdev->vendor;
-	input->id.product = hdev->product;
-	input->id.version = hdev->version;
-	input->dev.parent = &hdev->dev;
-	input->open = lg_g15_input_open;
-	input->close = lg_g15_input_close;
+	lg_g15_init_input_dev(hdev, input, "Logitech Gaming Keyboard Gaming Keys");
 
 	/* G-keys */
 	for (i = 0; i < gkeys; i++)
@@ -842,10 +857,6 @@ static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		input_set_capability(input, EV_KEY, KEY_MACRO_PRESET1 + i);
 	input_set_capability(input, EV_KEY, KEY_MACRO_RECORD_START);
 
-	/* Keys below the LCD, intended for controlling a menu on the LCD */
-	for (i = 0; i < 5; i++)
-		input_set_capability(input, EV_KEY, KEY_KBD_LCD_MENU1 + i);
-
 	/*
 	 * On the G510 only report headphone and mic mute keys when *not* using
 	 * the builtin USB audio device. When the builtin audio is used these
@@ -857,9 +868,6 @@ static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		input_set_capability(input, EV_KEY, KEY_F20);
 	}
 
-	g15->input = input;
-	input_set_drvdata(input, hdev);
-
 	ret = input_register_device(input);
 	if (ret)
 		goto error_hw_stop;

From 1e5c22983f13fc1ac05fc21772893b2602ddeb66 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Apr 2021 15:13:21 +0200
Subject: [PATCH 0033/3292] HID: lg-g15: Make the LED-name used by
 lg_g15_register_led() a parameter

Make the LED-name used by lg_g15_register_led() a parameter.

This is a preparation patch for adding support for the LCD menu
keys + LCD brightness control on the Logitech Z-10 speakers (with LCD)
which use the same protocol as the G15 keyboards.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lg-g15.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/hid/hid-lg-g15.c b/drivers/hid/hid-lg-g15.c
index eff8c51a8dce..b76988c19f84 100644
--- a/drivers/hid/hid-lg-g15.c
+++ b/drivers/hid/hid-lg-g15.c
@@ -652,19 +652,10 @@ static void lg_g15_input_close(struct input_dev *dev)
 	hid_hw_close(hdev);
 }
 
-static int lg_g15_register_led(struct lg_g15_data *g15, int i)
+static int lg_g15_register_led(struct lg_g15_data *g15, int i, const char *name)
 {
-	static const char * const led_names[] = {
-		"g15::kbd_backlight",
-		"g15::lcd_backlight",
-		"g15::macro_preset1",
-		"g15::macro_preset2",
-		"g15::macro_preset3",
-		"g15::macro_record",
-	};
-
 	g15->leds[i].led = i;
-	g15->leds[i].cdev.name = led_names[i];
+	g15->leds[i].cdev.name = name;
 
 	switch (g15->model) {
 	case LG_G15:
@@ -733,6 +724,14 @@ static void lg_g15_init_input_dev(struct hid_device *hdev, struct input_dev *inp
 
 static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
+	static const char * const led_names[] = {
+		"g15::kbd_backlight",
+		"g15::lcd_backlight",
+		"g15::macro_preset1",
+		"g15::macro_preset2",
+		"g15::macro_preset3",
+		"g15::macro_record",
+	};
 	u8 gkeys_settings_output_report = 0;
 	u8 gkeys_settings_feature_report = 0;
 	struct hid_report_enum *rep_enum;
@@ -874,7 +873,7 @@ static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	/* Register LED devices */
 	for (i = 0; i < LG_G15_LED_MAX; i++) {
-		ret = lg_g15_register_led(g15, i);
+		ret = lg_g15_register_led(g15, i, led_names[i]);
 		if (ret)
 			goto error_hw_stop;
 	}

From cbe5b6b6a77ad262d9f9a56962c9b1ac2f91c0f5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Apr 2021 15:13:22 +0200
Subject: [PATCH 0034/3292] HID: lg-g15: Add support for the Logitech Z-10
 speakers

Add support to hid-lg-g15 for the Logitech Z-10 speakers. This adds
support for the LCD menu keys found on these speakers, as well as
support for controlling the LCD's brightness through a LED classdev.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h    |  1 +
 drivers/hid/hid-lg-g15.c | 38 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 84b8da3e7d09..b0b0a55de26d 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -763,6 +763,7 @@
 #define I2C_DEVICE_ID_LG_7010		0x7010
 
 #define USB_VENDOR_ID_LOGITECH		0x046d
+#define USB_DEVICE_ID_LOGITECH_Z_10_SPK	0x0a07
 #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e
 #define USB_DEVICE_ID_LOGITECH_T651	0xb00c
 #define USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD	0xb309
diff --git a/drivers/hid/hid-lg-g15.c b/drivers/hid/hid-lg-g15.c
index b76988c19f84..701ddb98e7d2 100644
--- a/drivers/hid/hid-lg-g15.c
+++ b/drivers/hid/hid-lg-g15.c
@@ -28,6 +28,7 @@ enum lg_g15_model {
 	LG_G15_V2,
 	LG_G510,
 	LG_G510_USB_AUDIO,
+	LG_Z10,
 };
 
 enum lg_g15_led_type {
@@ -457,6 +458,13 @@ static int lg_g15_get_initial_led_brightness(struct lg_g15_data *g15)
 			return ret;
 
 		return lg_g510_update_mkey_led_brightness(g15);
+	case LG_Z10:
+		/*
+		 * Getting the LCD backlight brightness is not supported.
+		 * Reading Feature(2) fails with -EPIPE and this crashes
+		 * the LCD and touch keys part of the speakers.
+		 */
+		return 0;
 	}
 	return -EINVAL; /* Never reached */
 }
@@ -626,6 +634,12 @@ static int lg_g15_raw_event(struct hid_device *hdev, struct hid_report *report,
 		if (data[0] == 0x02 && size == 5)
 			return lg_g15_v2_event(g15, data);
 		break;
+	case LG_Z10:
+		if (data[0] == 0x02 && size == 9) {
+			lg_g15_handle_lcd_menu_keys(g15, data);
+			input_sync(g15->input);
+		}
+		break;
 	case LG_G510:
 	case LG_G510_USB_AUDIO:
 		if (data[0] == 0x03 && size == 5)
@@ -660,8 +674,10 @@ static int lg_g15_register_led(struct lg_g15_data *g15, int i, const char *name)
 	switch (g15->model) {
 	case LG_G15:
 	case LG_G15_V2:
-		g15->leds[i].cdev.brightness_set_blocking = lg_g15_led_set;
 		g15->leds[i].cdev.brightness_get = lg_g15_led_get;
+		fallthrough;
+	case LG_Z10:
+		g15->leds[i].cdev.brightness_set_blocking = lg_g15_led_set;
 		if (i < LG_G15_BRIGHTNESS_MAX) {
 			g15->leds[i].cdev.flags = LED_BRIGHT_HW_CHANGED;
 			g15->leds[i].cdev.max_brightness = 2;
@@ -802,6 +818,9 @@ static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		gkeys_settings_feature_report = 0x01;
 		gkeys = 18;
 		break;
+	case LG_Z10:
+		connect_mask = HID_CONNECT_HIDRAW;
+		break;
 	}
 
 	ret = hid_hw_start(hdev, connect_mask);
@@ -844,6 +863,19 @@ static int lg_g15_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	if (ret)
 		goto error_hw_stop;
 
+	if (g15->model == LG_Z10) {
+		lg_g15_init_input_dev(hdev, g15->input, "Logitech Z-10 LCD Menu Keys");
+		ret = input_register_device(g15->input);
+		if (ret)
+			goto error_hw_stop;
+
+		ret = lg_g15_register_led(g15, 1, "z-10::lcd_backlight");
+		if (ret)
+			goto error_hw_stop;
+
+		return 0; /* All done */
+	}
+
 	/* Setup and register input device */
 	lg_g15_init_input_dev(hdev, input, "Logitech Gaming Keyboard Gaming Keys");
 
@@ -904,6 +936,10 @@ static const struct hid_device_id lg_g15_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
 			 USB_DEVICE_ID_LOGITECH_G510_USB_AUDIO),
 		.driver_data = LG_G510_USB_AUDIO },
+	/* Z-10 speakers */
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
+			 USB_DEVICE_ID_LOGITECH_Z_10_SPK),
+		.driver_data = LG_Z10 },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, lg_g15_devices);

From b413c59ead320a21eb72ab7c2e0f0a81911f67ab Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Apr 2021 15:13:23 +0200
Subject: [PATCH 0035/3292] HID: lg-g15 + ite: Add MODULE_AUTHOR

I noticed that the 2 HID drivers which I've written and maintain were
missing a MODULE_AUTHOR tag, add this so that people can easily figure
out who to email with questions.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ite.c    | 1 +
 drivers/hid/hid-lg-g15.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index 14fc068affad..430fa4f52ed3 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -135,4 +135,5 @@ static struct hid_driver ite_driver = {
 };
 module_hid_driver(ite_driver);
 
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/hid/hid-lg-g15.c b/drivers/hid/hid-lg-g15.c
index 701ddb98e7d2..b2a08233f8d5 100644
--- a/drivers/hid/hid-lg-g15.c
+++ b/drivers/hid/hid-lg-g15.c
@@ -952,4 +952,5 @@ static struct hid_driver lg_g15_driver = {
 };
 module_hid_driver(lg_g15_driver);
 
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
 MODULE_LICENSE("GPL");

From eb134536cf6fb2e50b5ced653f7c34d306b2d73f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?=
 <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 8 Mar 2021 18:42:13 +0100
Subject: [PATCH 0036/3292] HID: input: replace outdated HID numbers+comments
 with macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These were untouched since 2.3.99-pre3, and the explanatory comment for
HID_DG_TIPPRESSURE is TipPressure in other places

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 18f5e28d475c..c62c6a9b2132 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1313,12 +1313,12 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 		return;
 	}
 
-	if (usage->hid == (HID_UP_DIGITIZER | 0x003c)) { /* Invert */
+	if (usage->hid == HID_DG_INVERT) {
 		*quirks = value ? (*quirks | HID_QUIRK_INVERT) : (*quirks & ~HID_QUIRK_INVERT);
 		return;
 	}
 
-	if (usage->hid == (HID_UP_DIGITIZER | 0x0032)) { /* InRange */
+	if (usage->hid == HID_DG_INRANGE) {
 		if (value) {
 			input_event(input, usage->type, (*quirks & HID_QUIRK_INVERT) ? BTN_TOOL_RUBBER : usage->code, 1);
 			return;
@@ -1328,7 +1328,7 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 		return;
 	}
 
-	if (usage->hid == (HID_UP_DIGITIZER | 0x0030) && (*quirks & HID_QUIRK_NOTOUCH)) { /* Pressure */
+	if (usage->hid == HID_DG_TIPPRESSURE && (*quirks & HID_QUIRK_NOTOUCH)) {
 		int a = field->logical_minimum;
 		int b = field->logical_maximum;
 		input_event(input, EV_KEY, BTN_TOUCH, value > a + ((b - a) >> 3));

From 89f5f8fb5bf4305a5425f70abf3c0d93643c93dc Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 6 May 2021 07:49:34 +0200
Subject: [PATCH 0037/3292] EDAC/thunderx: Remove irrelevant variable from
 error messages

'ret' is irrelevant (it is 0) for both dev_err() calls, so just remove
it from the error message.

 [ bp: Massage commit message. ]

Fixes: 41003396f932 ("EDAC, thunderx: Add Cavium ThunderX EDAC driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/0c046ef5cfb367a3f707ef4270e21a2bcbf44952.1620280098.git.christophe.jaillet@wanadoo.fr
---
 drivers/edac/thunderx_edac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 0eb5eb97fd74..f13674081cb6 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -1368,7 +1368,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
 					      name, 1, "CCPI", 1,
 					      0, NULL, 0, idx);
 	if (!edac_dev) {
-		dev_err(&pdev->dev, "Cannot allocate EDAC device: %d\n", ret);
+		dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
 		return -ENOMEM;
 	}
 	ocx = edac_dev->pvt_info;
@@ -1380,7 +1380,7 @@ static int thunderx_ocx_probe(struct pci_dev *pdev,
 
 	ocx->regs = pcim_iomap_table(pdev)[0];
 	if (!ocx->regs) {
-		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+		dev_err(&pdev->dev, "Cannot map PCI resources\n");
 		ret = -ENODEV;
 		goto err_free;
 	}

From 3cd8015040d7537a6b88e26f36768a90d9247829 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 10 May 2021 12:56:12 +0300
Subject: [PATCH 0038/3292] device property: Retrieve fwnode from of_node via
 accessor

OF provides a specific accessor to retrieve fwnode handle.
Use it instead of direct dereferencing.

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 1421e9548857..dd98759d688b 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -21,7 +21,7 @@
 struct fwnode_handle *dev_fwnode(struct device *dev)
 {
 	return IS_ENABLED(CONFIG_OF) && dev->of_node ?
-		&dev->of_node->fwnode : dev->fwnode;
+		of_fwnode_handle(dev->of_node) : dev->fwnode;
 }
 EXPORT_SYMBOL_GPL(dev_fwnode);
 
@@ -763,7 +763,7 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev,
 	struct fwnode_handle *fwnode = NULL, *next;
 
 	if (dev->of_node)
-		fwnode = &dev->of_node->fwnode;
+		fwnode = of_fwnode_handle(dev->of_node);
 	else if (adev)
 		fwnode = acpi_fwnode_handle(adev);
 

From 2e08fb550a2d1017052685a27691911a633c1771 Mon Sep 17 00:00:00 2001
From: Xiong Zhenwu <xiong.zhenwu@zte.com.cn>
Date: Thu, 18 Mar 2021 04:04:31 -0700
Subject: [PATCH 0039/3292] security/smack/: fix misspellings using codespell
 tool

A typo is found out by codespell tool in 383th line of smackfs.c:

$ codespell ./security/smack/
./smackfs.c:383: numer  ==> number

Fix a typo found by codespell.

Signed-off-by: Xiong Zhenwu <xiong.zhenwu@zte.com.cn>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smackfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 22ded2c26089..66f3c539d504 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -380,7 +380,7 @@ static int smk_parse_rule(const char *data, struct smack_parsed_rule *rule,
  * @data: string to be parsed, null terminated
  * @rule: Will be filled with Smack parsed rule
  * @import: if non-zero, import labels
- * @tokens: numer of substrings expected in data
+ * @tokens: number of substrings expected in data
  *
  * Returns number of processed bytes on success, -ERRNO on failure.
  */

From 49ec114a6e62d8d320037ce71c1aaf9650b3cafd Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Mon, 12 Apr 2021 22:45:50 +0900
Subject: [PATCH 0040/3292] smackfs: restrict bytes count in smk_set_cipso()

Oops, I failed to update subject line.

From 07571157c91b98ce1a4aa70967531e64b78e8346 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 12 Apr 2021 22:25:06 +0900
Subject: [PATCH] smackfs: restrict bytes count in smk_set_cipso()

Commit 7ef4c19d245f3dc2 ("smackfs: restrict bytes count in smackfs write
functions") missed that count > SMK_CIPSOMAX check applies to only
format == SMK_FIXED24_FMT case.

Reported-by: syzbot <syzbot+77c53db50c9fff774e8e@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smackfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 66f3c539d504..3a75d2a8f517 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -855,6 +855,8 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf,
 	if (format == SMK_FIXED24_FMT &&
 	    (count < SMK_CIPSOMIN || count > SMK_CIPSOMAX))
 		return -EINVAL;
+	if (count > PAGE_SIZE)
+		return -EINVAL;
 
 	data = memdup_user_nul(buf, count);
 	if (IS_ERR(data))

From d2792e91de2bedf829828b091720f0f7920719ed Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Sat, 8 May 2021 10:27:07 +0800
Subject: [PATCH 0041/3292] net: openvswitch: Remove unnecessary skb_nfct()

There is no need add 'if (skb_nfct(skb))' assignment, the
nf_conntrack_put() would check it.

Signed-off-by: Yejune Deng <yejunedeng@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index cadb6a29b285..1b5eae57bc90 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -967,8 +967,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 
 		/* Associate skb with specified zone. */
 		if (tmpl) {
-			if (skb_nfct(skb))
-				nf_conntrack_put(skb_nfct(skb));
+			nf_conntrack_put(skb_nfct(skb));
 			nf_conntrack_get(&tmpl->ct_general);
 			nf_ct_set(skb, tmpl, IP_CT_NEW);
 		}
@@ -1329,11 +1328,9 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 
 int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	if (skb_nfct(skb)) {
-		nf_conntrack_put(skb_nfct(skb));
-		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-		ovs_ct_fill_key(skb, key, false);
-	}
+	nf_conntrack_put(skb_nfct(skb));
+	nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+	ovs_ct_fill_key(skb, key, false);
 
 	return 0;
 }

From 48de7c0c1c9225b478c602184be8e00b92b35c61 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Sat, 8 May 2021 18:03:05 +0800
Subject: [PATCH 0042/3292] neighbour: Remove redundant initialization of
 'bucket'

Integer variable 'bucket' is being initialized however
this value is never read as 'bucket' is assigned zero
in for statement. Remove the redundant assignment.

Cleans up clang warning:

net/core/neighbour.c:3144:6: warning: Value stored to 'bucket' during
its initialization is never read [clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 98f20efbfadf..2b2f333bcdfe 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3141,7 +3141,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 	struct neigh_table *tbl = state->tbl;
 	struct pneigh_entry *pn = NULL;
-	int bucket = state->bucket;
+	int bucket;
 
 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {

From b76078df15935b54c353eb0461c95a6eaf73c7ca Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sat, 8 May 2021 14:36:33 +0200
Subject: [PATCH 0043/3292] net: qca_spi: Avoid reading signature three times
 in a row

There is no need to read the signature three times. So bail out
in case the second check failed.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index ab9b02574a15..3e2a54c2fc83 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -506,6 +506,7 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event)
 		if (signature != QCASPI_GOOD_SIGNATURE) {
 			qca->sync = QCASPI_SYNC_UNKNOWN;
 			netdev_dbg(qca->net_dev, "sync: got CPU on, but signature was invalid, restart\n");
+			return;
 		} else {
 			/* ensure that the WRBUF is empty */
 			qcaspi_read_register(qca, SPI_REG_WRBUF_SPC_AVA,

From 6e03f3ff29c1b479cd10cab0d1c4530bafad601c Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sat, 8 May 2021 14:36:34 +0200
Subject: [PATCH 0044/3292] net: qca_spi: Avoid re-sync for single signature
 error

Setting a new network key would cause a reset of the QCA7000. Usually
the driver only notice the SPI interrupt and a single signature error.
So avoid the whole re-sync process (possible packet loss, transmit queue
stop and no carrier for at least 1 second) in this case.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 3e2a54c2fc83..0937ceb08296 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -524,8 +524,11 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event)
 
 	switch (qca->sync) {
 	case QCASPI_SYNC_READY:
-		/* Read signature, if not valid go to unknown state. */
+		/* Check signature twice, if not valid go to unknown state. */
 		qcaspi_read_register(qca, SPI_REG_SIGNATURE, &signature);
+		if (signature != QCASPI_GOOD_SIGNATURE)
+			qcaspi_read_register(qca, SPI_REG_SIGNATURE, &signature);
+
 		if (signature != QCASPI_GOOD_SIGNATURE) {
 			qca->sync = QCASPI_SYNC_UNKNOWN;
 			netdev_dbg(qca->net_dev, "sync: bad signature, restart\n");

From a53935674563f60d8e22ca88ab607dac89fb353d Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sat, 8 May 2021 14:36:35 +0200
Subject: [PATCH 0045/3292] net: qca_spi: Introduce stat about bad signature

In order to identify significant signature issues add a new stat counter,
which increases on bad signature values that causes a sync loss.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/qca_debug.c | 1 +
 drivers/net/ethernet/qualcomm/qca_spi.c   | 4 ++++
 drivers/net/ethernet/qualcomm/qca_spi.h   | 1 +
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
index 702aa217a27a..d59fff2fbcc6 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.c
+++ b/drivers/net/ethernet/qualcomm/qca_debug.c
@@ -62,6 +62,7 @@ static const char qcaspi_gstrings_stats[][ETH_GSTRING_LEN] = {
 	"SPI errors",
 	"Write verify errors",
 	"Buffer available errors",
+	"Bad signature",
 };
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 0937ceb08296..79fe3ec4e581 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -504,6 +504,9 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event)
 		qcaspi_read_register(qca, SPI_REG_SIGNATURE, &signature);
 		qcaspi_read_register(qca, SPI_REG_SIGNATURE, &signature);
 		if (signature != QCASPI_GOOD_SIGNATURE) {
+			if (qca->sync == QCASPI_SYNC_READY)
+				qca->stats.bad_signature++;
+
 			qca->sync = QCASPI_SYNC_UNKNOWN;
 			netdev_dbg(qca->net_dev, "sync: got CPU on, but signature was invalid, restart\n");
 			return;
@@ -531,6 +534,7 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event)
 
 		if (signature != QCASPI_GOOD_SIGNATURE) {
 			qca->sync = QCASPI_SYNC_UNKNOWN;
+			qca->stats.bad_signature++;
 			netdev_dbg(qca->net_dev, "sync: bad signature, restart\n");
 			/* don't reset right away */
 			return;
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h
index d13a67e20d65..3067356106f0 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.h
+++ b/drivers/net/ethernet/qualcomm/qca_spi.h
@@ -75,6 +75,7 @@ struct qcaspi_stats {
 	u64 spi_err;
 	u64 write_verify_failed;
 	u64 buf_avail_err;
+	u64 bad_signature;
 };
 
 struct qcaspi {

From a100243d95a60d74ae9bb9df1f5f2192e9aed6a7 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Sat, 8 May 2021 11:00:33 -0700
Subject: [PATCH 0046/3292] rtnetlink: avoid RCU read lock when holding RTNL

When we call af_ops->set_link_af() we hold a RCU read lock
as we retrieve af_ops from the RCU protected list, but this
is unnecessary because we already hold RTNL lock, which is
the writer lock for protecting rtnl_af_ops, so it is safer
than RCU read lock. Similar for af_ops->validate_link_af().

This was not a problem until we begin to take mutex lock
down the path of ->set_link_af() in __ipv6_dev_mc_dec()
recently. We can just drop the RCU read lock there and
assert RTNL lock.

Reported-and-tested-by: syzbot+7d941e89dd48bcf42573@syzkaller.appspotmail.com
Fixes: 63ed8de4be81 ("mld: add mc_lock for protecting per-interface mld data")
Tested-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 26 +++++++-------------------
 net/ipv4/devinet.c   |  4 ++--
 2 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 714d5fa38546..04b4f0f2a3d2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -543,7 +543,9 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
 {
 	const struct rtnl_af_ops *ops;
 
-	list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
+	ASSERT_RTNL();
+
+	list_for_each_entry(ops, &rtnl_af_ops, list) {
 		if (ops->family == family)
 			return ops;
 	}
@@ -2274,27 +2276,18 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
 			const struct rtnl_af_ops *af_ops;
 
-			rcu_read_lock();
 			af_ops = rtnl_af_lookup(nla_type(af));
-			if (!af_ops) {
-				rcu_read_unlock();
+			if (!af_ops)
 				return -EAFNOSUPPORT;
-			}
 
-			if (!af_ops->set_link_af) {
-				rcu_read_unlock();
+			if (!af_ops->set_link_af)
 				return -EOPNOTSUPP;
-			}
 
 			if (af_ops->validate_link_af) {
 				err = af_ops->validate_link_af(dev, af);
-				if (err < 0) {
-					rcu_read_unlock();
+				if (err < 0)
 					return err;
-				}
 			}
-
-			rcu_read_unlock();
 		}
 	}
 
@@ -2868,17 +2861,12 @@ static int do_setlink(const struct sk_buff *skb,
 		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
 			const struct rtnl_af_ops *af_ops;
 
-			rcu_read_lock();
-
 			BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
 
 			err = af_ops->set_link_af(dev, af, extack);
-			if (err < 0) {
-				rcu_read_unlock();
+			if (err < 0)
 				goto errout;
-			}
 
-			rcu_read_unlock();
 			status |= DO_SETLINK_NOTIFY;
 		}
 	}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2e35f68da40a..50deeff48c8b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1955,7 +1955,7 @@ static int inet_validate_link_af(const struct net_device *dev,
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int err, rem;
 
-	if (dev && !__in_dev_get_rcu(dev))
+	if (dev && !__in_dev_get_rtnl(dev))
 		return -EAFNOSUPPORT;
 
 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
@@ -1981,7 +1981,7 @@ static int inet_validate_link_af(const struct net_device *dev,
 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
 			    struct netlink_ext_ack *extack)
 {
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int rem;
 

From 71f0891c84dfdc448736082ab0a00acd29853896 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 10 May 2021 12:58:05 +0300
Subject: [PATCH 0047/3292] net: mvpp2: Put fwnode in error case during
 ->probe()

In each iteration fwnode_for_each_available_child_node() bumps a reference
counting of a loop variable followed by dropping in on a next iteration,

Since in error case the loop is broken, we have to drop a reference count
by ourselves. Do it for port_fwnode in error case during ->probe().

Fixes: 248122212f68 ("net: mvpp2: use device_*/fwnode_* APIs instead of of_*")
Cc: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index ec706d614cac..b48c08829a31 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7552,6 +7552,8 @@ static int mvpp2_probe(struct platform_device *pdev)
 	return 0;
 
 err_port_probe:
+	fwnode_handle_put(port_fwnode);
+
 	i = 0;
 	fwnode_for_each_available_child_node(fwnode, port_fwnode) {
 		if (priv->port_list[i])

From 692b82c57f71bc755c879ea4eae7fc0eb92cc00b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 12:58:06 +0300
Subject: [PATCH 0048/3292] net: mvpp2: Use device_get_match_data() helper

Use the device_get_match_data() helper instead of open coding.

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index b48c08829a31..6bfad75c4087 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7311,7 +7311,6 @@ static int mvpp2_get_sram(struct platform_device *pdev,
 
 static int mvpp2_probe(struct platform_device *pdev)
 {
-	const struct acpi_device_id *acpi_id;
 	struct fwnode_handle *fwnode = pdev->dev.fwnode;
 	struct fwnode_handle *port_fwnode;
 	struct mvpp2 *priv;
@@ -7324,16 +7323,7 @@ static int mvpp2_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
-	if (has_acpi_companion(&pdev->dev)) {
-		acpi_id = acpi_match_device(pdev->dev.driver->acpi_match_table,
-					    &pdev->dev);
-		if (!acpi_id)
-			return -EINVAL;
-		priv->hw_version = (unsigned long)acpi_id->driver_data;
-	} else {
-		priv->hw_version =
-			(unsigned long)of_device_get_match_data(&pdev->dev);
-	}
+	priv->hw_version = (unsigned long)device_get_match_data(&pdev->dev);
 
 	/* multi queue mode isn't supported on PPV2.1, fallback to single
 	 * mode

From cf3399b731d36bc780803ff63e4d480a1efa33ac Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 12:58:07 +0300
Subject: [PATCH 0049/3292] net: mvpp2: Use devm_clk_get_optional()

Replace open coded variants of devm_clk_get_optional().

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 34 ++++++++-----------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 6bfad75c4087..b6b7ba891e71 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7435,28 +7435,27 @@ static int mvpp2_probe(struct platform_device *pdev)
 			if (err < 0)
 				goto err_gop_clk;
 
-			priv->mg_core_clk = devm_clk_get(&pdev->dev, "mg_core_clk");
+			priv->mg_core_clk = devm_clk_get_optional(&pdev->dev, "mg_core_clk");
 			if (IS_ERR(priv->mg_core_clk)) {
-				priv->mg_core_clk = NULL;
-			} else {
-				err = clk_prepare_enable(priv->mg_core_clk);
-				if (err < 0)
-					goto err_mg_clk;
+				err = PTR_ERR(priv->mg_core_clk);
+				goto err_mg_clk;
 			}
+
+			err = clk_prepare_enable(priv->mg_core_clk);
+			if (err < 0)
+				goto err_mg_clk;
 		}
 
-		priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk");
+		priv->axi_clk = devm_clk_get_optional(&pdev->dev, "axi_clk");
 		if (IS_ERR(priv->axi_clk)) {
 			err = PTR_ERR(priv->axi_clk);
-			if (err == -EPROBE_DEFER)
-				goto err_mg_core_clk;
-			priv->axi_clk = NULL;
-		} else {
-			err = clk_prepare_enable(priv->axi_clk);
-			if (err < 0)
-				goto err_mg_core_clk;
+			goto err_mg_core_clk;
 		}
 
+		err = clk_prepare_enable(priv->axi_clk);
+		if (err < 0)
+			goto err_mg_core_clk;
+
 		/* Get system's tclk rate */
 		priv->tclk = clk_get_rate(priv->pp_clk);
 	} else if (device_property_read_u32(&pdev->dev, "clock-frequency",
@@ -7552,13 +7551,10 @@ err_port_probe:
 	}
 err_axi_clk:
 	clk_disable_unprepare(priv->axi_clk);
-
 err_mg_core_clk:
-	if (priv->hw_version >= MVPP22)
-		clk_disable_unprepare(priv->mg_core_clk);
+	clk_disable_unprepare(priv->mg_core_clk);
 err_mg_clk:
-	if (priv->hw_version >= MVPP22)
-		clk_disable_unprepare(priv->mg_clk);
+	clk_disable_unprepare(priv->mg_clk);
 err_gop_clk:
 	clk_disable_unprepare(priv->gop_clk);
 err_pp_clk:

From 584525554fd61040bbb28cd1280d07884ed148e8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 10 May 2021 12:58:08 +0300
Subject: [PATCH 0050/3292] net: mvpp2: Unshadow error code of
 device_property_read_u32()

device_property_read_u32() may return different error codes.
Propagate it to the caller.

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index b6b7ba891e71..9f464e44ae1b 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7458,10 +7458,12 @@ static int mvpp2_probe(struct platform_device *pdev)
 
 		/* Get system's tclk rate */
 		priv->tclk = clk_get_rate(priv->pp_clk);
-	} else if (device_property_read_u32(&pdev->dev, "clock-frequency",
-					    &priv->tclk)) {
-		dev_err(&pdev->dev, "missing clock-frequency value\n");
-		return -EINVAL;
+	} else {
+		err = device_property_read_u32(&pdev->dev, "clock-frequency", &priv->tclk);
+		if (err) {
+			dev_err(&pdev->dev, "missing clock-frequency value\n");
+			return err;
+		}
 	}
 
 	if (priv->hw_version >= MVPP22) {

From aed6864035b1e51ba16323b54aa3e4861c1a0a3e Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 10 May 2021 22:10:02 +0800
Subject: [PATCH 0051/3292] net: stmmac: platform: Delete a redundant condition
 branch

The statement of the last "if (xxx)" branch is the same as the "else"
branch. Delete it to simplify code.

No functional change.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 1e17a23d9118..97a1fedcc9ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -230,8 +230,6 @@ static int stmmac_mtl_setup(struct platform_device *pdev,
 		plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WFQ;
 	else if (of_property_read_bool(tx_node, "snps,tx-sched-dwrr"))
 		plat->tx_sched_algorithm = MTL_TX_ALGORITHM_DWRR;
-	else if (of_property_read_bool(tx_node, "snps,tx-sched-sp"))
-		plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
 	else
 		plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
 

From 532062b099563cba7a93bd9bd50554948661f013 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 17:49:09 +0300
Subject: [PATCH 0052/3292] atm: Replace custom isprint() with generic analogue

Custom isprint() definition may collide with one form ctype.h.
In order to avoid this, replace it with a functional analogue
which is isascii() && isprint() in this case.

First appearance of the code is in the commit 636b38438001
("Import 2.3.43").

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/iphase.c | 11 +++++++----
 drivers/atm/iphase.h |  1 -
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 933e3ff2ee8d..e3f5d073caa6 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -47,6 +47,7 @@
 #include <linux/errno.h>  
 #include <linux/atm.h>  
 #include <linux/atmdev.h>  
+#include <linux/ctype.h>
 #include <linux/sonet.h>  
 #include <linux/skbuff.h>  
 #include <linux/time.h>  
@@ -996,10 +997,12 @@ static void xdump( u_char*  cp, int  length, char*  prefix )
         }
         pBuf += sprintf( pBuf, "  " );
         for(col = 0;count + col < length && col < 16; col++){
-            if (isprint((int)cp[count + col]))
-                pBuf += sprintf( pBuf, "%c", cp[count + col] );
-            else
-                pBuf += sprintf( pBuf, "." );
+		u_char c = cp[count + col];
+
+		if (isascii(c) && isprint(c))
+			pBuf += sprintf(pBuf, "%c", c);
+		else
+			pBuf += sprintf(pBuf, ".");
                 }
         printk("%s\n", prntBuf);
         count += col;
diff --git a/drivers/atm/iphase.h b/drivers/atm/iphase.h
index 2beacf2fc1ec..2f5f8875cbd1 100644
--- a/drivers/atm/iphase.h
+++ b/drivers/atm/iphase.h
@@ -124,7 +124,6 @@
 #define IF_RXPKT(A)
 #endif /* CONFIG_ATM_IA_DEBUG */ 
 
-#define isprint(a) ((a >=' ')&&(a <= '~'))  
 #define ATM_DESC(skb) (skb->protocol)
 #define IA_SKB_STATE(skb) (skb->protocol)
 #define IA_DLED   1

From 9d9d415f0048e4f7a6109595e2d1657850569c6c Mon Sep 17 00:00:00 2001
From: "Radu Pirea (NXP OSS)" <radu-nicolae.pirea@oss.nxp.com>
Date: Mon, 10 May 2021 18:34:32 +0300
Subject: [PATCH 0053/3292] ptp: ptp_clock: make scaled_ppm_to_ppb static
 inline

Make scaled_ppm_to_ppb static inline to be able to build drivers that
use this function even with PTP_1588_CLOCK disabled.

Signed-off-by: Radu Pirea (NXP OSS) <radu-nicolae.pirea@oss.nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c          | 21 --------------------
 include/linux/ptp_clock_kernel.h | 34 ++++++++++++++++++++++++--------
 2 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 03a246e60fd9..a780435331c8 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -63,27 +63,6 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
 	spin_unlock_irqrestore(&queue->lock, flags);
 }
 
-s32 scaled_ppm_to_ppb(long ppm)
-{
-	/*
-	 * The 'freq' field in the 'struct timex' is in parts per
-	 * million, but with a 16 bit binary fractional field.
-	 *
-	 * We want to calculate
-	 *
-	 *    ppb = scaled_ppm * 1000 / 2^16
-	 *
-	 * which simplifies to
-	 *
-	 *    ppb = scaled_ppm * 125 / 2^13
-	 */
-	s64 ppb = 1 + ppm;
-	ppb *= 125;
-	ppb >>= 13;
-	return (s32) ppb;
-}
-EXPORT_SYMBOL(scaled_ppm_to_ppb);
-
 /* posix clock implementation */
 
 static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp)
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 0d47fd33b228..a311bddd9e85 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -186,6 +186,32 @@ struct ptp_clock_event {
 	};
 };
 
+/**
+ * scaled_ppm_to_ppb() - convert scaled ppm to ppb
+ *
+ * @ppm:    Parts per million, but with a 16 bit binary fractional field
+ */
+static inline s32 scaled_ppm_to_ppb(long ppm)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 *
+	 * We want to calculate
+	 *
+	 *    ppb = scaled_ppm * 1000 / 2^16
+	 *
+	 * which simplifies to
+	 *
+	 *    ppb = scaled_ppm * 125 / 2^13
+	 */
+	s64 ppb = 1 + ppm;
+
+	ppb *= 125;
+	ppb >>= 13;
+	return (s32)ppb;
+}
+
 #if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
 
 /**
@@ -229,14 +255,6 @@ extern void ptp_clock_event(struct ptp_clock *ptp,
 
 extern int ptp_clock_index(struct ptp_clock *ptp);
 
-/**
- * scaled_ppm_to_ppb() - convert scaled ppm to ppb
- *
- * @ppm:    Parts per million, but with a 16 bit binary fractional field
- */
-
-extern s32 scaled_ppm_to_ppb(long ppm);
-
 /**
  * ptp_find_pin() - obtain the pin index of a given auxiliary function
  *

From 514def5dd33984e986e0a3afd2a369d8b4688d3f Mon Sep 17 00:00:00 2001
From: "Radu Pirea (NXP OSS)" <radu-nicolae.pirea@oss.nxp.com>
Date: Mon, 10 May 2021 18:34:33 +0300
Subject: [PATCH 0054/3292] phy: nxp-c45-tja11xx: add timestamping support

Add mii_timestamper interface and register a ptp clock.
The package timestamping can work with or without interrupts.
RX timestamps are received in the reserved field of the PTP package.
TX timestamps are read via MDIO from a set of registers.

Signed-off-by: Radu Pirea (NXP OSS) <radu-nicolae.pirea@oss.nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 531 +++++++++++++++++++++++++++++-
 1 file changed, 530 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 26b9c0d7cb9d..512e4cb5d2c2 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -13,6 +13,9 @@
 #include <linux/phy.h>
 #include <linux/processor.h>
 #include <linux/property.h>
+#include <linux/ptp_classify.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
 
 #define PHY_ID_TJA_1103			0x001BB010
 
@@ -57,6 +60,9 @@
 #define VEND1_PORT_CONTROL		0x8040
 #define PORT_CONTROL_EN			BIT(14)
 
+#define VEND1_PORT_ABILITIES		0x8046
+#define PTP_ABILITY			BIT(3)
+
 #define VEND1_PORT_INFRA_CONTROL	0xAC00
 #define PORT_INFRA_CONTROL_EN		BIT(14)
 
@@ -91,13 +97,106 @@
 #define VEND1_TX_IPG_LENGTH		0xAFD1
 #define COUNTER_EN			BIT(15)
 
+#define VEND1_LTC_LOAD_CTRL		0x1105
+#define READ_LTC			BIT(2)
+#define LOAD_LTC			BIT(0)
+
+#define VEND1_LTC_WR_NSEC_0		0x1106
+#define VEND1_LTC_WR_NSEC_1		0x1107
+#define VEND1_LTC_WR_SEC_0		0x1108
+#define VEND1_LTC_WR_SEC_1		0x1109
+
+#define VEND1_LTC_RD_NSEC_0		0x110A
+#define VEND1_LTC_RD_NSEC_1		0x110B
+#define VEND1_LTC_RD_SEC_0		0x110C
+#define VEND1_LTC_RD_SEC_1		0x110D
+
+#define VEND1_RATE_ADJ_SUBNS_0		0x110F
+#define VEND1_RATE_ADJ_SUBNS_1		0x1110
+#define CLK_RATE_ADJ_LD			BIT(15)
+#define CLK_RATE_ADJ_DIR		BIT(14)
+
+#define VEND1_HW_LTC_LOCK_CTRL		0x1115
+#define HW_LTC_LOCK_EN			BIT(0)
+
+#define VEND1_PTP_IRQ_EN		0x1131
+#define VEND1_PTP_IRQ_STATUS		0x1132
+#define PTP_IRQ_EGR_TS			BIT(0)
+
+#define VEND1_RX_TS_INSRT_CTRL		0x114D
+#define RX_TS_INSRT_MODE2		0x02
+
+#define VEND1_EGR_RING_DATA_0		0x114E
+#define VEND1_EGR_RING_DATA_1_SEQ_ID	0x114F
+#define VEND1_EGR_RING_DATA_2_NSEC_15_0	0x1150
+#define VEND1_EGR_RING_DATA_3		0x1151
+#define VEND1_EGR_RING_CTRL		0x1154
+
+#define RING_DATA_0_DOMAIN_NUMBER	GENMASK(7, 0)
+#define RING_DATA_0_MSG_TYPE		GENMASK(11, 8)
+#define RING_DATA_0_SEC_4_2		GENMASK(14, 2)
+#define RING_DATA_0_TS_VALID		BIT(15)
+
+#define RING_DATA_3_NSEC_29_16		GENMASK(13, 0)
+#define RING_DATA_3_SEC_1_0		GENMASK(15, 14)
+#define RING_DATA_5_SEC_16_5		GENMASK(15, 4)
+#define RING_DONE			BIT(0)
+
+#define TS_SEC_MASK			GENMASK(1, 0)
+
+#define VEND1_PORT_FUNC_ENABLES		0x8048
+#define PTP_ENABLE			BIT(3)
+
+#define VEND1_PORT_PTP_CONTROL		0x9000
+#define PORT_PTP_CONTROL_BYPASS		BIT(11)
+
+#define VEND1_PTP_CLK_PERIOD		0x1104
+#define PTP_CLK_PERIOD_100BT1		15ULL
+
+#define VEND1_EVENT_MSG_FILT		0x1148
+#define EVENT_MSG_FILT_ALL		0x0F
+#define EVENT_MSG_FILT_NONE		0x00
+
+#define VEND1_TX_PIPE_DLY_NS		0x1149
+#define VEND1_TX_PIPEDLY_SUBNS		0x114A
+#define VEND1_RX_PIPE_DLY_NS		0x114B
+#define VEND1_RX_PIPEDLY_SUBNS		0x114C
+
 #define RGMII_PERIOD_PS			8000U
 #define PS_PER_DEGREE			div_u64(RGMII_PERIOD_PS, 360)
 #define MIN_ID_PS			1644U
 #define MAX_ID_PS			2260U
 #define DEFAULT_ID_PS			2000U
 
+#define PPM_TO_SUBNS_INC(ppb)	div_u64(GENMASK(31, 0) * (ppb) * \
+					PTP_CLK_PERIOD_100BT1, NSEC_PER_SEC)
+
+#define NXP_C45_SKB_CB(skb)	((struct nxp_c45_skb_cb *)(skb)->cb)
+
+struct nxp_c45_skb_cb {
+	struct ptp_header *header;
+	unsigned int type;
+};
+
+struct nxp_c45_hwts {
+	u32	nsec;
+	u32	sec;
+	u8	domain_number;
+	u16	sequence_id;
+	u8	msg_type;
+};
+
 struct nxp_c45_phy {
+	struct phy_device *phydev;
+	struct mii_timestamper mii_ts;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info caps;
+	struct sk_buff_head tx_queue;
+	struct sk_buff_head rx_queue;
+	/* used to access the PTP registers atomic */
+	struct mutex ptp_lock;
+	int hwts_tx;
+	int hwts_rx;
 	u32 tx_delay;
 	u32 rx_delay;
 };
@@ -110,6 +209,382 @@ struct nxp_c45_phy_stats {
 	u16		mask;
 };
 
+static bool nxp_c45_poll_txts(struct phy_device *phydev)
+{
+	return phydev->irq <= 0;
+}
+
+static int _nxp_c45_ptp_gettimex64(struct ptp_clock_info *ptp,
+				   struct timespec64 *ts,
+				   struct ptp_system_timestamp *sts)
+{
+	struct nxp_c45_phy *priv = container_of(ptp, struct nxp_c45_phy, caps);
+
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_LTC_LOAD_CTRL,
+		      READ_LTC);
+	ts->tv_nsec = phy_read_mmd(priv->phydev, MDIO_MMD_VEND1,
+				   VEND1_LTC_RD_NSEC_0);
+	ts->tv_nsec |= phy_read_mmd(priv->phydev, MDIO_MMD_VEND1,
+				    VEND1_LTC_RD_NSEC_1) << 16;
+	ts->tv_sec = phy_read_mmd(priv->phydev, MDIO_MMD_VEND1,
+				  VEND1_LTC_RD_SEC_0);
+	ts->tv_sec |= phy_read_mmd(priv->phydev, MDIO_MMD_VEND1,
+				   VEND1_LTC_RD_SEC_1) << 16;
+
+	return 0;
+}
+
+static int nxp_c45_ptp_gettimex64(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct nxp_c45_phy *priv = container_of(ptp, struct nxp_c45_phy, caps);
+
+	mutex_lock(&priv->ptp_lock);
+	_nxp_c45_ptp_gettimex64(ptp, ts, sts);
+	mutex_unlock(&priv->ptp_lock);
+
+	return 0;
+}
+
+static int _nxp_c45_ptp_settime64(struct ptp_clock_info *ptp,
+				  const struct timespec64 *ts)
+{
+	struct nxp_c45_phy *priv = container_of(ptp, struct nxp_c45_phy, caps);
+
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_LTC_WR_NSEC_0,
+		      ts->tv_nsec);
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_LTC_WR_NSEC_1,
+		      ts->tv_nsec >> 16);
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_LTC_WR_SEC_0,
+		      ts->tv_sec);
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_LTC_WR_SEC_1,
+		      ts->tv_sec >> 16);
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_LTC_LOAD_CTRL,
+		      LOAD_LTC);
+
+	return 0;
+}
+
+static int nxp_c45_ptp_settime64(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct nxp_c45_phy *priv = container_of(ptp, struct nxp_c45_phy, caps);
+
+	mutex_lock(&priv->ptp_lock);
+	_nxp_c45_ptp_settime64(ptp, ts);
+	mutex_unlock(&priv->ptp_lock);
+
+	return 0;
+}
+
+static int nxp_c45_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct nxp_c45_phy *priv = container_of(ptp, struct nxp_c45_phy, caps);
+	s32 ppb = scaled_ppm_to_ppb(scaled_ppm);
+	u64 subns_inc_val;
+	bool inc;
+
+	mutex_lock(&priv->ptp_lock);
+	inc = ppb >= 0;
+	ppb = abs(ppb);
+
+	subns_inc_val = PPM_TO_SUBNS_INC(ppb);
+
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_RATE_ADJ_SUBNS_0,
+		      subns_inc_val);
+	subns_inc_val >>= 16;
+	subns_inc_val |= CLK_RATE_ADJ_LD;
+	if (inc)
+		subns_inc_val |= CLK_RATE_ADJ_DIR;
+
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_RATE_ADJ_SUBNS_1,
+		      subns_inc_val);
+	mutex_unlock(&priv->ptp_lock);
+
+	return 0;
+}
+
+static int nxp_c45_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct nxp_c45_phy *priv = container_of(ptp, struct nxp_c45_phy, caps);
+	struct timespec64 now, then;
+
+	mutex_lock(&priv->ptp_lock);
+	then = ns_to_timespec64(delta);
+	_nxp_c45_ptp_gettimex64(ptp, &now, NULL);
+	now = timespec64_add(now, then);
+	_nxp_c45_ptp_settime64(ptp, &now);
+	mutex_unlock(&priv->ptp_lock);
+
+	return 0;
+}
+
+static void nxp_c45_reconstruct_ts(struct timespec64 *ts,
+				   struct nxp_c45_hwts *hwts)
+{
+	ts->tv_nsec = hwts->nsec;
+	if ((ts->tv_sec & TS_SEC_MASK) < (hwts->sec & TS_SEC_MASK))
+		ts->tv_sec -= BIT(2);
+	ts->tv_sec &= ~TS_SEC_MASK;
+	ts->tv_sec |= hwts->sec & TS_SEC_MASK;
+}
+
+static bool nxp_c45_match_ts(struct ptp_header *header,
+			     struct nxp_c45_hwts *hwts,
+			     unsigned int type)
+{
+	return ntohs(header->sequence_id) == hwts->sequence_id &&
+	       ptp_get_msgtype(header, type) == hwts->msg_type &&
+	       header->domain_number  == hwts->domain_number;
+}
+
+static bool nxp_c45_get_hwtxts(struct nxp_c45_phy *priv,
+			       struct nxp_c45_hwts *hwts)
+{
+	bool valid;
+	u16 reg;
+
+	mutex_lock(&priv->ptp_lock);
+	phy_write_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_EGR_RING_CTRL,
+		      RING_DONE);
+	reg = phy_read_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_EGR_RING_DATA_0);
+	valid = !!(reg & RING_DATA_0_TS_VALID);
+	if (!valid)
+		goto nxp_c45_get_hwtxts_out;
+
+	hwts->domain_number = reg;
+	hwts->msg_type = (reg & RING_DATA_0_MSG_TYPE) >> 8;
+	hwts->sec = (reg & RING_DATA_0_SEC_4_2) >> 10;
+	hwts->sequence_id = phy_read_mmd(priv->phydev, MDIO_MMD_VEND1,
+					 VEND1_EGR_RING_DATA_1_SEQ_ID);
+	hwts->nsec = phy_read_mmd(priv->phydev, MDIO_MMD_VEND1,
+				  VEND1_EGR_RING_DATA_2_NSEC_15_0);
+	reg = phy_read_mmd(priv->phydev, MDIO_MMD_VEND1, VEND1_EGR_RING_DATA_3);
+	hwts->nsec |= (reg & RING_DATA_3_NSEC_29_16) << 16;
+	hwts->sec |= (reg & RING_DATA_3_SEC_1_0) >> 14;
+
+nxp_c45_get_hwtxts_out:
+	mutex_unlock(&priv->ptp_lock);
+	return valid;
+}
+
+static void nxp_c45_process_txts(struct nxp_c45_phy *priv,
+				 struct nxp_c45_hwts *txts)
+{
+	struct sk_buff *skb, *tmp, *skb_match = NULL;
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct timespec64 ts;
+	unsigned long flags;
+	bool ts_match;
+	s64 ts_ns;
+
+	spin_lock_irqsave(&priv->tx_queue.lock, flags);
+	skb_queue_walk_safe(&priv->tx_queue, skb, tmp) {
+		ts_match = nxp_c45_match_ts(NXP_C45_SKB_CB(skb)->header, txts,
+					    NXP_C45_SKB_CB(skb)->type);
+		if (!ts_match)
+			continue;
+		skb_match = skb;
+		__skb_unlink(skb, &priv->tx_queue);
+		break;
+	}
+	spin_unlock_irqrestore(&priv->tx_queue.lock, flags);
+
+	if (skb_match) {
+		nxp_c45_ptp_gettimex64(&priv->caps, &ts, NULL);
+		nxp_c45_reconstruct_ts(&ts, txts);
+		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+		ts_ns = timespec64_to_ns(&ts);
+		shhwtstamps.hwtstamp = ns_to_ktime(ts_ns);
+		skb_complete_tx_timestamp(skb_match, &shhwtstamps);
+	} else {
+		phydev_warn(priv->phydev,
+			    "the tx timestamp doesn't match with any skb\n");
+	}
+}
+
+static long nxp_c45_do_aux_work(struct ptp_clock_info *ptp)
+{
+	struct nxp_c45_phy *priv = container_of(ptp, struct nxp_c45_phy, caps);
+	bool poll_txts = nxp_c45_poll_txts(priv->phydev);
+	struct skb_shared_hwtstamps *shhwtstamps_rx;
+	struct nxp_c45_hwts hwts;
+	bool reschedule = false;
+	struct timespec64 ts;
+	struct sk_buff *skb;
+	bool txts_valid;
+	u32 ts_raw;
+
+	while (!skb_queue_empty_lockless(&priv->tx_queue) && poll_txts) {
+		txts_valid = nxp_c45_get_hwtxts(priv, &hwts);
+		if (unlikely(!txts_valid)) {
+			/* Still more skbs in the queue */
+			reschedule = true;
+			break;
+		}
+
+		nxp_c45_process_txts(priv, &hwts);
+	}
+
+	nxp_c45_ptp_gettimex64(&priv->caps, &ts, NULL);
+	while ((skb = skb_dequeue(&priv->rx_queue)) != NULL) {
+		ts_raw = __be32_to_cpu(NXP_C45_SKB_CB(skb)->header->reserved2);
+		hwts.sec = ts_raw >> 30;
+		hwts.nsec = ts_raw & GENMASK(29, 0);
+		nxp_c45_reconstruct_ts(&ts, &hwts);
+		shhwtstamps_rx = skb_hwtstamps(skb);
+		shhwtstamps_rx->hwtstamp = ns_to_ktime(timespec64_to_ns(&ts));
+		NXP_C45_SKB_CB(skb)->header->reserved2 = 0;
+		netif_rx_ni(skb);
+	}
+
+	return reschedule ? 1 : -1;
+}
+
+static int nxp_c45_init_ptp_clock(struct nxp_c45_phy *priv)
+{
+	priv->caps = (struct ptp_clock_info) {
+		.owner		= THIS_MODULE,
+		.name		= "NXP C45 PHC",
+		.max_adj	= 16666666,
+		.adjfine	= nxp_c45_ptp_adjfine,
+		.adjtime	= nxp_c45_ptp_adjtime,
+		.gettimex64	= nxp_c45_ptp_gettimex64,
+		.settime64	= nxp_c45_ptp_settime64,
+		.do_aux_work	= nxp_c45_do_aux_work,
+	};
+
+	priv->ptp_clock = ptp_clock_register(&priv->caps,
+					     &priv->phydev->mdio.dev);
+
+	if (IS_ERR(priv->ptp_clock))
+		return PTR_ERR(priv->ptp_clock);
+
+	if (!priv->ptp_clock)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void nxp_c45_txtstamp(struct mii_timestamper *mii_ts,
+			     struct sk_buff *skb, int type)
+{
+	struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy,
+						mii_ts);
+
+	switch (priv->hwts_tx) {
+	case HWTSTAMP_TX_ON:
+		NXP_C45_SKB_CB(skb)->type = type;
+		NXP_C45_SKB_CB(skb)->header = ptp_parse_header(skb, type);
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		skb_queue_tail(&priv->tx_queue, skb);
+		if (nxp_c45_poll_txts(priv->phydev))
+			ptp_schedule_worker(priv->ptp_clock, 0);
+		break;
+	case HWTSTAMP_TX_OFF:
+	default:
+		kfree_skb(skb);
+		break;
+	}
+}
+
+static bool nxp_c45_rxtstamp(struct mii_timestamper *mii_ts,
+			     struct sk_buff *skb, int type)
+{
+	struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy,
+						mii_ts);
+	struct ptp_header *header = ptp_parse_header(skb, type);
+
+	if (!header)
+		return false;
+
+	if (!priv->hwts_rx)
+		return false;
+
+	NXP_C45_SKB_CB(skb)->header = header;
+	skb_queue_tail(&priv->rx_queue, skb);
+	ptp_schedule_worker(priv->ptp_clock, 0);
+
+	return true;
+}
+
+static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts,
+			    struct ifreq *ifreq)
+{
+	struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy,
+						mii_ts);
+	struct phy_device *phydev = priv->phydev;
+	struct hwtstamp_config cfg;
+
+	if (copy_from_user(&cfg, ifreq->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ON)
+		return -ERANGE;
+
+	priv->hwts_tx = cfg.tx_type;
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		priv->hwts_rx = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		priv->hwts_rx = 1;
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (priv->hwts_rx || priv->hwts_tx) {
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_EVENT_MSG_FILT,
+			      EVENT_MSG_FILT_ALL);
+		phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1,
+				   VEND1_PORT_PTP_CONTROL,
+				   PORT_PTP_CONTROL_BYPASS);
+	} else {
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_EVENT_MSG_FILT,
+			      EVENT_MSG_FILT_NONE);
+		phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PORT_PTP_CONTROL,
+				 PORT_PTP_CONTROL_BYPASS);
+	}
+
+	if (nxp_c45_poll_txts(priv->phydev))
+		goto nxp_c45_no_ptp_irq;
+
+	if (priv->hwts_tx)
+		phy_set_bits_mmd(phydev, MDIO_MMD_VEND1,
+				 VEND1_PTP_IRQ_EN, PTP_IRQ_EGR_TS);
+	else
+		phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1,
+				   VEND1_PTP_IRQ_EN, PTP_IRQ_EGR_TS);
+
+nxp_c45_no_ptp_irq:
+	return copy_to_user(ifreq->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int nxp_c45_ts_info(struct mii_timestamper *mii_ts,
+			   struct ethtool_ts_info *ts_info)
+{
+	struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy,
+						mii_ts);
+
+	ts_info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+			SOF_TIMESTAMPING_RX_HARDWARE |
+			SOF_TIMESTAMPING_RAW_HARDWARE;
+	ts_info->phc_index = ptp_clock_index(priv->ptp_clock);
+	ts_info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
+	ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			(1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+			(1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+			(1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT);
+
+	return 0;
+}
+
 static const struct nxp_c45_phy_stats nxp_c45_hw_stats[] = {
 	{ "phy_symbol_error_cnt", MDIO_MMD_VEND1,
 		VEND1_SYMBOL_ERROR_COUNTER, 0, GENMASK(15, 0) },
@@ -205,7 +680,9 @@ static int nxp_c45_config_intr(struct phy_device *phydev)
 
 static irqreturn_t nxp_c45_handle_interrupt(struct phy_device *phydev)
 {
+	struct nxp_c45_phy *priv = phydev->priv;
 	irqreturn_t ret = IRQ_NONE;
+	struct nxp_c45_hwts hwts;
 	int irq;
 
 	irq = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_IRQ_STATUS);
@@ -216,6 +693,18 @@ static irqreturn_t nxp_c45_handle_interrupt(struct phy_device *phydev)
 		ret = IRQ_HANDLED;
 	}
 
+	/* There is no need for ACK.
+	 * The irq signal will be asserted until the EGR TS FIFO will be
+	 * emptied.
+	 */
+	irq = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_PTP_IRQ_STATUS);
+	if (irq & PTP_IRQ_EGR_TS) {
+		while (nxp_c45_get_hwtxts(priv, &hwts))
+			nxp_c45_process_txts(priv, &hwts);
+
+		ret = IRQ_HANDLED;
+	}
+
 	return ret;
 }
 
@@ -566,20 +1055,60 @@ static int nxp_c45_config_init(struct phy_device *phydev)
 
 	phydev->autoneg = AUTONEG_DISABLE;
 
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_PTP_CLK_PERIOD,
+		      PTP_CLK_PERIOD_100BT1);
+	phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_HW_LTC_LOCK_CTRL,
+			   HW_LTC_LOCK_EN);
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_RX_TS_INSRT_CTRL,
+		      RX_TS_INSRT_MODE2);
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PORT_FUNC_ENABLES,
+			 PTP_ENABLE);
+
 	return nxp_c45_start_op(phydev);
 }
 
 static int nxp_c45_probe(struct phy_device *phydev)
 {
 	struct nxp_c45_phy *priv;
+	int ptp_ability;
+	int ret = 0;
 
 	priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
+	skb_queue_head_init(&priv->tx_queue);
+	skb_queue_head_init(&priv->rx_queue);
+
+	priv->phydev = phydev;
+
 	phydev->priv = priv;
 
-	return 0;
+	mutex_init(&priv->ptp_lock);
+
+	ptp_ability = phy_read_mmd(phydev, MDIO_MMD_VEND1,
+				   VEND1_PORT_ABILITIES);
+	ptp_ability = !!(ptp_ability & PTP_ABILITY);
+	if (!ptp_ability) {
+		phydev_info(phydev, "the phy does not support PTP");
+		goto no_ptp_support;
+	}
+
+	if (IS_ENABLED(CONFIG_PTP_1588_CLOCK) &&
+	    IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING)) {
+		priv->mii_ts.rxtstamp = nxp_c45_rxtstamp;
+		priv->mii_ts.txtstamp = nxp_c45_txtstamp;
+		priv->mii_ts.hwtstamp = nxp_c45_hwtstamp;
+		priv->mii_ts.ts_info = nxp_c45_ts_info;
+		phydev->mii_ts = &priv->mii_ts;
+		ret = nxp_c45_init_ptp_clock(priv);
+	} else {
+		phydev_dbg(phydev, "PTP support not enabled even if the phy supports it");
+	}
+
+no_ptp_support:
+
+	return ret;
 }
 
 static struct phy_driver nxp_c45_driver[] = {

From 9e3617a7b84512bf96c04f9cf82d1a7257d33794 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 19:39:27 +0300
Subject: [PATCH 0055/3292] net: pch_gbe: Propagate error from
 devm_gpio_request_one()

If GPIO controller is not available yet we need to defer
the probe of GBE until provider will become available.

While here, drop GPIOF_EXPORT because it's deprecated and
may not be available.

Fixes: f1a26fdf5944 ("pch_gbe: Add MinnowBoard support")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Flavio Suligoi <f.suligoi@asem.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 334af49e5add..3dc29b282a88 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -2532,9 +2532,13 @@ static int pch_gbe_probe(struct pci_dev *pdev,
 	adapter->pdev = pdev;
 	adapter->hw.back = adapter;
 	adapter->hw.reg = pcim_iomap_table(pdev)[PCH_GBE_PCI_BAR];
+
 	adapter->pdata = (struct pch_gbe_privdata *)pci_id->driver_data;
-	if (adapter->pdata && adapter->pdata->platform_init)
-		adapter->pdata->platform_init(pdev);
+	if (adapter->pdata && adapter->pdata->platform_init) {
+		ret = adapter->pdata->platform_init(pdev);
+		if (ret)
+			goto err_free_netdev;
+	}
 
 	adapter->ptp_pdev =
 		pci_get_domain_bus_and_slot(pci_domain_nr(adapter->pdev->bus),
@@ -2629,7 +2633,7 @@ err_free_netdev:
  */
 static int pch_gbe_minnow_platform_init(struct pci_dev *pdev)
 {
-	unsigned long flags = GPIOF_DIR_OUT | GPIOF_INIT_HIGH | GPIOF_EXPORT;
+	unsigned long flags = GPIOF_OUT_INIT_HIGH;
 	unsigned gpio = MINNOW_PHY_RESET_GPIO;
 	int ret;
 

From aca6a8746c36c160517f3216da81196248844d58 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 19:39:28 +0300
Subject: [PATCH 0056/3292] net: pch_gbe: Convert to use GPIO descriptors

This switches the PCH GBE driver to use GPIO descriptors.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Flavio Suligoi <f.suligoi@asem.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c  | 44 ++++++++++++++-----
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 3dc29b282a88..8adc8cfaca03 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -8,6 +8,9 @@
 
 #include "pch_gbe.h"
 #include "pch_gbe_phy.h"
+
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
 #include <linux/module.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_classify.h>
@@ -97,8 +100,6 @@ const char pch_driver_version[] = DRV_VERSION;
 #define PTP_L4_MULTICAST_SA "01:00:5e:00:01:81"
 #define PTP_L2_MULTICAST_SA "01:1b:19:00:00:00"
 
-#define MINNOW_PHY_RESET_GPIO		13
-
 static int pch_gbe_mdio_read(struct net_device *netdev, int addr, int reg);
 static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg,
 			       int data);
@@ -2628,26 +2629,45 @@ err_free_netdev:
 	return ret;
 }
 
+static void pch_gbe_gpio_remove_table(void *table)
+{
+	gpiod_remove_lookup_table(table);
+}
+
+static int pch_gbe_gpio_add_table(struct device *dev, void *table)
+{
+	gpiod_add_lookup_table(table);
+	return devm_add_action_or_reset(dev, pch_gbe_gpio_remove_table, table);
+}
+
+static struct gpiod_lookup_table pch_gbe_minnow_gpio_table = {
+	.dev_id		= "0000:02:00.1",
+	.table		= {
+		GPIO_LOOKUP("sch_gpio.33158", 13, NULL, GPIO_ACTIVE_LOW),
+		{}
+	},
+};
+
 /* The AR803X PHY on the MinnowBoard requires a physical pin to be toggled to
  * ensure it is awake for probe and init. Request the line and reset the PHY.
  */
 static int pch_gbe_minnow_platform_init(struct pci_dev *pdev)
 {
-	unsigned long flags = GPIOF_OUT_INIT_HIGH;
-	unsigned gpio = MINNOW_PHY_RESET_GPIO;
+	struct gpio_desc *gpiod;
 	int ret;
 
-	ret = devm_gpio_request_one(&pdev->dev, gpio, flags,
-				    "minnow_phy_reset");
-	if (ret) {
-		dev_err(&pdev->dev,
-			"ERR: Can't request PHY reset GPIO line '%d'\n", gpio);
+	ret = pch_gbe_gpio_add_table(&pdev->dev, &pch_gbe_minnow_gpio_table);
+	if (ret)
 		return ret;
-	}
 
-	gpio_set_value(gpio, 0);
+	gpiod = devm_gpiod_get(&pdev->dev, NULL, GPIOD_OUT_HIGH);
+	if (IS_ERR(gpiod))
+		return dev_err_probe(&pdev->dev, PTR_ERR(gpiod),
+				     "Can't request PHY reset GPIO line\n");
+
+	gpiod_set_value(gpiod, 1);
 	usleep_range(1250, 1500);
-	gpio_set_value(gpio, 1);
+	gpiod_set_value(gpiod, 0);
 	usleep_range(1250, 1500);
 
 	return ret;

From 6fcfb267cb4936d8427ceb431bb7e170a13c4922 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 19:39:29 +0300
Subject: [PATCH 0057/3292] net: pch_gbe: use readx_poll_timeout_atomic()
 variant

Use readx_poll_timeout_atomic() instead of open coded variants.

While at it, add __iomem attribute to the parameter of pch_gbe_wait_clr_bit().
This in particular will fix a lot of warnings detected by Sparse, e.g.

.../pch_gbe_main.c:308:26: warning: incorrect type in argument 1 (different address spaces)
.../pch_gbe_main.c:308:26:    expected void const [noderef] __iomem *
.../pch_gbe_main.c:308:26:    got void *reg

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Flavio Suligoi <f.suligoi@asem.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c  | 27 ++++++-------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 8adc8cfaca03..7b224745bf3e 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -11,6 +11,7 @@
 
 #include <linux/gpio/consumer.h>
 #include <linux/gpio/machine.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_classify.h>
@@ -299,15 +300,12 @@ static s32 pch_gbe_mac_read_mac_addr(struct pch_gbe_hw *hw)
  * @reg:	Pointer of register
  * @bit:	Busy bit
  */
-static void pch_gbe_wait_clr_bit(void *reg, u32 bit)
+static void pch_gbe_wait_clr_bit(void __iomem *reg, u32 bit)
 {
 	u32 tmp;
 
 	/* wait busy */
-	tmp = 1000;
-	while ((ioread32(reg) & bit) && --tmp)
-		cpu_relax();
-	if (!tmp)
+	if (readx_poll_timeout_atomic(ioread32, reg, tmp, !(tmp & bit), 0, 10))
 		pr_err("Error: busy bit is not cleared\n");
 }
 
@@ -491,18 +489,13 @@ u16 pch_gbe_mac_ctrl_miim(struct pch_gbe_hw *hw, u32 addr, u32 dir, u32 reg,
 			u16 data)
 {
 	struct pch_gbe_adapter *adapter = pch_gbe_hw_to_adapter(hw);
-	u32 data_out = 0;
-	unsigned int i;
 	unsigned long flags;
+	u32 data_out;
 
 	spin_lock_irqsave(&hw->miim_lock, flags);
 
-	for (i = 100; i; --i) {
-		if ((ioread32(&hw->reg->MIIM) & PCH_GBE_MIIM_OPER_READY))
-			break;
-		udelay(20);
-	}
-	if (i == 0) {
+	if (readx_poll_timeout_atomic(ioread32, &hw->reg->MIIM, data_out,
+				      data_out & PCH_GBE_MIIM_OPER_READY, 20, 2000)) {
 		netdev_err(adapter->netdev, "pch-gbe.miim won't go Ready\n");
 		spin_unlock_irqrestore(&hw->miim_lock, flags);
 		return 0;	/* No way to indicate timeout error */
@@ -510,12 +503,8 @@ u16 pch_gbe_mac_ctrl_miim(struct pch_gbe_hw *hw, u32 addr, u32 dir, u32 reg,
 	iowrite32(((reg << PCH_GBE_MIIM_REG_ADDR_SHIFT) |
 		  (addr << PCH_GBE_MIIM_PHY_ADDR_SHIFT) |
 		  dir | data), &hw->reg->MIIM);
-	for (i = 0; i < 100; i++) {
-		udelay(20);
-		data_out = ioread32(&hw->reg->MIIM);
-		if ((data_out & PCH_GBE_MIIM_OPER_READY))
-			break;
-	}
+	readx_poll_timeout_atomic(ioread32, &hw->reg->MIIM, data_out,
+				  data_out & PCH_GBE_MIIM_OPER_READY, 20, 2000);
 	spin_unlock_irqrestore(&hw->miim_lock, flags);
 
 	netdev_dbg(adapter->netdev, "PHY %s: reg=%d, data=0x%04X\n",

From 443ef39b499cc9c6635f83238101f1bb923e9326 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 19:39:30 +0300
Subject: [PATCH 0058/3292] net: pch_gbe: Use proper accessors to BE data in
 pch_ptp_match()

Sparse is not happy about handling of strict types in pch_ptp_match():

  .../pch_gbe_main.c:158:33: warning: incorrect type in argument 2 (different base types)
  .../pch_gbe_main.c:158:33:    expected unsigned short [usertype] uid_hi
  .../pch_gbe_main.c:158:33:    got restricted __be16 [usertype]
  .../pch_gbe_main.c:158:45: warning: incorrect type in argument 3 (different base types)
  .../pch_gbe_main.c:158:45:    expected unsigned int [usertype] uid_lo
  .../pch_gbe_main.c:158:45:    got restricted __be32 [usertype]
  .../pch_gbe_main.c:158:56: warning: incorrect type in argument 4 (different base types)
  .../pch_gbe_main.c:158:56:    expected unsigned short [usertype] seqid
  .../pch_gbe_main.c:158:56:    got restricted __be16 [usertype]

Fix that by switching to use proper accessors to BE data.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Flavio Suligoi <f.suligoi@asem.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c  | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 7b224745bf3e..5e8acf76410d 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -110,7 +110,7 @@ static int pch_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid)
 {
 	u8 *data = skb->data;
 	unsigned int offset;
-	u16 *hi, *id;
+	u16 hi, id;
 	u32 lo;
 
 	if (ptp_classify_raw(skb) == PTP_CLASS_NONE)
@@ -121,14 +121,11 @@ static int pch_ptp_match(struct sk_buff *skb, u16 uid_hi, u32 uid_lo, u16 seqid)
 	if (skb->len < offset + OFF_PTP_SEQUENCE_ID + sizeof(seqid))
 		return 0;
 
-	hi = (u16 *)(data + offset + OFF_PTP_SOURCE_UUID);
-	id = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+	hi = get_unaligned_be16(data + offset + OFF_PTP_SOURCE_UUID + 0);
+	lo = get_unaligned_be32(data + offset + OFF_PTP_SOURCE_UUID + 2);
+	id = get_unaligned_be16(data + offset + OFF_PTP_SEQUENCE_ID);
 
-	memcpy(&lo, &hi[1], sizeof(lo));
-
-	return (uid_hi == *hi &&
-		uid_lo == lo &&
-		seqid  == *id);
+	return (uid_hi == hi && uid_lo == lo && seqid == id);
 }
 
 static void
@@ -138,7 +135,6 @@ pch_rx_timestamp(struct pch_gbe_adapter *adapter, struct sk_buff *skb)
 	struct pci_dev *pdev;
 	u64 ns;
 	u32 hi, lo, val;
-	u16 uid, seq;
 
 	if (!adapter->hwts_rx_en)
 		return;
@@ -154,10 +150,7 @@ pch_rx_timestamp(struct pch_gbe_adapter *adapter, struct sk_buff *skb)
 	lo = pch_src_uuid_lo_read(pdev);
 	hi = pch_src_uuid_hi_read(pdev);
 
-	uid = hi & 0xffff;
-	seq = (hi >> 16) & 0xffff;
-
-	if (!pch_ptp_match(skb, htons(uid), htonl(lo), htons(seq)))
+	if (!pch_ptp_match(skb, hi, lo, hi >> 16))
 		goto out;
 
 	ns = pch_rx_snap_read(pdev);

From 40b161bb16c4456434b5f8f7fb74e3e3d6257c9a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 19:39:31 +0300
Subject: [PATCH 0059/3292] net: pch_gbe: remove unneeded MODULE_VERSION() call

Remove MODULE_VERSION(), as it doesn't seem to serve any practical purpose.
For in-tree drivers, the kernel version matters. The code received lots of
changes, but module version remained constant, since the driver landed in
mainline. So, this version doesn't seem have any practical meaning anymore.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Flavio Suligoi <f.suligoi@asem.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h         | 2 --
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c | 2 ++
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c    | 4 ----
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index a6823c4d355d..108f312bc542 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
@@ -596,8 +596,6 @@ struct pch_gbe_adapter {
 
 #define pch_gbe_hw_to_adapter(hw)	container_of(hw, struct pch_gbe_adapter, hw)
 
-extern const char pch_driver_version[];
-
 /* pch_gbe_main.c */
 int pch_gbe_up(struct pch_gbe_adapter *adapter);
 void pch_gbe_down(struct pch_gbe_adapter *adapter);
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index a58f14aca10c..660b07cb5b92 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
@@ -8,6 +8,8 @@
 #include "pch_gbe.h"
 #include "pch_gbe_phy.h"
 
+static const char pch_driver_version[] = "1.01";
+
 /*
  * pch_gbe_stats - Stats item information
  */
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 5e8acf76410d..e351f3d1608f 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -18,9 +18,6 @@
 #include <linux/ptp_pch.h>
 #include <linux/gpio.h>
 
-#define DRV_VERSION     "1.01"
-const char pch_driver_version[] = DRV_VERSION;
-
 #define PCH_GBE_MAR_ENTRIES		16
 #define PCH_GBE_SHORT_PKT		64
 #define DSC_INIT16			0xC000
@@ -2728,7 +2725,6 @@ module_pci_driver(pch_gbe_driver);
 MODULE_DESCRIPTION("EG20T PCH Gigabit ethernet Driver");
 MODULE_AUTHOR("LAPIS SEMICONDUCTOR, <tshimizu818@gmail.com>");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, pch_gbe_pcidev_id);
 
 /* pch_gbe_main.c */

From 376d68929d5bb5edc57be5fb38daaff3f3375f64 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Mon, 10 May 2021 18:52:31 +0200
Subject: [PATCH 0060/3292] mvpp2: remove unused parameter

mvpp2_run_xdp() has an unused parameter rxq, remove it.

Fixes: 07dd0a7aae7f ("mvpp2: add basic XDP support")
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 9f464e44ae1b..b2259bf1d299 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3784,9 +3784,9 @@ mvpp2_xdp_xmit(struct net_device *dev, int num_frame,
 }
 
 static int
-mvpp2_run_xdp(struct mvpp2_port *port, struct mvpp2_rx_queue *rxq,
-	      struct bpf_prog *prog, struct xdp_buff *xdp,
-	      struct page_pool *pp, struct mvpp2_pcpu_stats *stats)
+mvpp2_run_xdp(struct mvpp2_port *port, struct bpf_prog *prog,
+	      struct xdp_buff *xdp, struct page_pool *pp,
+	      struct mvpp2_pcpu_stats *stats)
 {
 	unsigned int len, sync, err;
 	struct page *page;
@@ -3925,7 +3925,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 					 MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM,
 					 rx_bytes, false);
 
-			ret = mvpp2_run_xdp(port, rxq, xdp_prog, &xdp, pp, &ps);
+			ret = mvpp2_run_xdp(port, xdp_prog, &xdp, pp, &ps);
 
 			if (ret) {
 				xdp_ret |= ret;

From 4c598e5e679c31106914b63b5e3877994dfbba19 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Mon, 10 May 2021 18:52:32 +0200
Subject: [PATCH 0061/3292] mvpp2: suppress warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove some unreachable code, so to suppress this warning:

drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c: In function ‘mvpp2_prs_tcam_first_free’:
drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c:397:10: warning: comparison is always false due to limited range of data type [-Wtype-limits]
  397 |  if (end >= MVPP2_PRS_TCAM_SRAM_SIZE)
      |          ^~

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index 7cc7d72d761e..93575800ca92 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
@@ -394,9 +394,6 @@ static int mvpp2_prs_tcam_first_free(struct mvpp2 *priv, unsigned char start,
 	if (start > end)
 		swap(start, end);
 
-	if (end >= MVPP2_PRS_TCAM_SRAM_SIZE)
-		end = MVPP2_PRS_TCAM_SRAM_SIZE - 1;
-
 	for (tid = start; tid <= end; tid++) {
 		if (!priv->prs_shadow[tid].valid)
 			return tid;

From 254c8b96c4af02a09004d605c7f2dfad7f1cb5ca Mon Sep 17 00:00:00 2001
From: Roni Nevalainen <catmaster.kissa@gmail.com>
Date: Wed, 5 May 2021 19:19:58 +0300
Subject: [PATCH 0062/3292] audit: add blank line after variable declarations

Fix the following checkpatch warning in auditsc.c:

WARNING: Missing a blank line after declarations

Signed-off-by: Roni Nevalainen <kitten@kittenz.dev>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/auditsc.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 175ef6f3ea4e..0a9a1569f1ea 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -160,6 +160,7 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
 static int audit_match_perm(struct audit_context *ctx, int mask)
 {
 	unsigned n;
+
 	if (unlikely(!ctx))
 		return 0;
 	n = ctx->major;
@@ -239,6 +240,7 @@ static int put_tree_ref(struct audit_context *ctx, struct audit_chunk *chunk)
 {
 	struct audit_tree_refs *p = ctx->trees;
 	int left = ctx->tree_count;
+
 	if (likely(left)) {
 		p->c[--left] = chunk;
 		ctx->tree_count = left;
@@ -259,6 +261,7 @@ static int put_tree_ref(struct audit_context *ctx, struct audit_chunk *chunk)
 static int grow_tree_refs(struct audit_context *ctx)
 {
 	struct audit_tree_refs *p = ctx->trees;
+
 	ctx->trees = kzalloc(sizeof(struct audit_tree_refs), GFP_KERNEL);
 	if (!ctx->trees) {
 		ctx->trees = p;
@@ -277,6 +280,7 @@ static void unroll_tree_refs(struct audit_context *ctx,
 {
 	struct audit_tree_refs *q;
 	int n;
+
 	if (!p) {
 		/* we started with empty chain */
 		p = ctx->first_trees;
@@ -303,6 +307,7 @@ static void unroll_tree_refs(struct audit_context *ctx,
 static void free_tree_refs(struct audit_context *ctx)
 {
 	struct audit_tree_refs *p, *q;
+
 	for (p = ctx->first_trees; p; p = q) {
 		q = p->next;
 		kfree(p);
@@ -313,6 +318,7 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree)
 {
 	struct audit_tree_refs *p;
 	int n;
+
 	if (!tree)
 		return 0;
 	/* full ones */
@@ -1225,6 +1231,7 @@ static void show_special(struct audit_context *context, int *call_panic)
 	switch (context->type) {
 	case AUDIT_SOCKETCALL: {
 		int nargs = context->socketcall.nargs;
+
 		audit_log_format(ab, "nargs=%d", nargs);
 		for (i = 0; i < nargs; i++)
 			audit_log_format(ab, " a%d=%lx", i,
@@ -1240,6 +1247,7 @@ static void show_special(struct audit_context *context, int *call_panic)
 		if (osid) {
 			char *ctx = NULL;
 			u32 len;
+
 			if (security_secid_to_secctx(osid, &ctx, &len)) {
 				audit_log_format(ab, " osid=%u", osid);
 				*call_panic = 1;
@@ -1289,6 +1297,7 @@ static void show_special(struct audit_context *context, int *call_panic)
 		break;
 	case AUDIT_MQ_GETSETATTR: {
 		struct mq_attr *attr = &context->mq_getsetattr.mqstat;
+
 		audit_log_format(ab,
 			"mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
 			"mq_curmsgs=%ld ",
@@ -1325,6 +1334,7 @@ static void show_special(struct audit_context *context, int *call_panic)
 static inline int audit_proctitle_rtrim(char *proctitle, int len)
 {
 	char *end = proctitle + len - 1;
+
 	while (end > proctitle && !isprint(*end))
 		end--;
 
@@ -1513,6 +1523,7 @@ static void audit_log_exit(void)
 
 		case AUDIT_BPRM_FCAPS: {
 			struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
+
 			audit_log_format(ab, "fver=%x", axs->fcap_ver);
 			audit_log_cap(ab, "fp", &axs->fcap.permitted);
 			audit_log_cap(ab, "fi", &axs->fcap.inheritable);
@@ -1765,6 +1776,7 @@ static inline void handle_one(const struct inode *inode)
 	struct audit_tree_refs *p;
 	struct audit_chunk *chunk;
 	int count;
+
 	if (likely(!inode->i_fsnotify_marks))
 		return;
 	context = audit_context();
@@ -1806,8 +1818,10 @@ retry:
 	seq = read_seqbegin(&rename_lock);
 	for(;;) {
 		struct inode *inode = d_backing_inode(d);
+
 		if (inode && unlikely(inode->i_fsnotify_marks)) {
 			struct audit_chunk *chunk;
+
 			chunk = audit_tree_lookup(inode);
 			if (chunk) {
 				if (unlikely(!put_tree_ref(context, chunk))) {
@@ -2285,6 +2299,7 @@ void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
 	struct audit_context *context = audit_context();
+
 	context->mq_getsetattr.mqdes = mqdes;
 	context->mq_getsetattr.mqstat = *mqstat;
 	context->type = AUDIT_MQ_GETSETATTR;
@@ -2298,6 +2313,7 @@ void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
 	struct audit_context *context = audit_context();
+
 	context->ipc.uid = ipcp->uid;
 	context->ipc.gid = ipcp->gid;
 	context->ipc.mode = ipcp->mode;
@@ -2362,6 +2378,7 @@ int __audit_socketcall(int nargs, unsigned long *args)
 void __audit_fd_pair(int fd1, int fd2)
 {
 	struct audit_context *context = audit_context();
+
 	context->fds[0] = fd1;
 	context->fds[1] = fd2;
 }
@@ -2379,6 +2396,7 @@ int __audit_sockaddr(int len, void *a)
 
 	if (!context->sockaddr) {
 		void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
+
 		if (!p)
 			return -ENOMEM;
 		context->sockaddr = p;
@@ -2510,6 +2528,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 void __audit_log_capset(const struct cred *new, const struct cred *old)
 {
 	struct audit_context *context = audit_context();
+
 	context->capset.pid = task_tgid_nr(current);
 	context->capset.cap.effective   = new->cap_effective;
 	context->capset.cap.inheritable = new->cap_effective;
@@ -2521,6 +2540,7 @@ void __audit_log_capset(const struct cred *new, const struct cred *old)
 void __audit_mmap_fd(int fd, int flags)
 {
 	struct audit_context *context = audit_context();
+
 	context->mmap.fd = fd;
 	context->mmap.flags = flags;
 	context->type = AUDIT_MMAP;
@@ -2686,6 +2706,7 @@ void audit_seccomp_actions_logged(const char *names, const char *old_names,
 struct list_head *audit_killed_trees(void)
 {
 	struct audit_context *ctx = audit_context();
+
 	if (likely(!ctx || !ctx->in_syscall))
 		return NULL;
 	return &ctx->killed_trees;

From c69431aab67a912836e5831f03d99a819c14c9c3 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Thu, 29 Apr 2021 14:46:54 +0100
Subject: [PATCH 0063/3292] bpf: verifier: Improve function state reallocation

Resizing and copying stack and reference tracking state currently
does a lot of kfree / kmalloc when the size of the tracked set changes.
The logic in copy_*_state and realloc_*_state is also hard to follow.

Refactor this into two core functions. copy_array copies from a source
into a destination. It avoids reallocation by taking the allocated
size of the destination into account via ksize(). The function is
essentially krealloc_array, with the difference that the contents of
dst are not preserved. realloc_array changes the size of an array and
zeroes newly allocated items. Contrary to krealloc both functions don't
free the destination if the size is zero. Instead we rely on free_func_state
to clean up.

realloc_stack_state is renamed to grow_stack_state to better convey
that it never shrinks the stack state.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210429134656.122225-2-lmb@cloudflare.com
---
 kernel/bpf/verifier.c | 197 ++++++++++++++++++++++--------------------
 1 file changed, 101 insertions(+), 96 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 757476c91c98..a31afd04fa5d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -737,81 +737,104 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 	verbose(env, "\n");
 }
 
-#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
-static int copy_##NAME##_state(struct bpf_func_state *dst,		\
-			       const struct bpf_func_state *src)	\
-{									\
-	if (!src->FIELD)						\
-		return 0;						\
-	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
-		/* internal bug, make state invalid to reject the program */ \
-		memset(dst, 0, sizeof(*dst));				\
-		return -EFAULT;						\
-	}								\
-	memcpy(dst->FIELD, src->FIELD,					\
-	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
-	return 0;							\
-}
-/* copy_reference_state() */
-COPY_STATE_FN(reference, acquired_refs, refs, 1)
-/* copy_stack_state() */
-COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
-#undef COPY_STATE_FN
-
-#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
-static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
-				  bool copy_old)			\
-{									\
-	u32 old_size = state->COUNT;					\
-	struct bpf_##NAME##_state *new_##FIELD;				\
-	int slot = size / SIZE;						\
-									\
-	if (size <= old_size || !size) {				\
-		if (copy_old)						\
-			return 0;					\
-		state->COUNT = slot * SIZE;				\
-		if (!size && old_size) {				\
-			kfree(state->FIELD);				\
-			state->FIELD = NULL;				\
-		}							\
-		return 0;						\
-	}								\
-	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
-				    GFP_KERNEL);			\
-	if (!new_##FIELD)						\
-		return -ENOMEM;						\
-	if (copy_old) {							\
-		if (state->FIELD)					\
-			memcpy(new_##FIELD, state->FIELD,		\
-			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
-		memset(new_##FIELD + old_size / SIZE, 0,		\
-		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
-	}								\
-	state->COUNT = slot * SIZE;					\
-	kfree(state->FIELD);						\
-	state->FIELD = new_##FIELD;					\
-	return 0;							\
-}
-/* realloc_reference_state() */
-REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
-/* realloc_stack_state() */
-REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
-#undef REALLOC_STATE_FN
-
-/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
- * make it consume minimal amount of memory. check_stack_write() access from
- * the program calls into realloc_func_state() to grow the stack size.
- * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
- * which realloc_stack_state() copies over. It points to previous
- * bpf_verifier_state which is never reallocated.
+/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
+ * small to hold src. This is different from krealloc since we don't want to preserve
+ * the contents of dst.
+ *
+ * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
+ * not be allocated.
  */
-static int realloc_func_state(struct bpf_func_state *state, int stack_size,
-			      int refs_size, bool copy_old)
+static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
 {
-	int err = realloc_reference_state(state, refs_size, copy_old);
-	if (err)
-		return err;
-	return realloc_stack_state(state, stack_size, copy_old);
+	size_t bytes;
+
+	if (ZERO_OR_NULL_PTR(src))
+		goto out;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
+		return NULL;
+
+	if (ksize(dst) < bytes) {
+		kfree(dst);
+		dst = kmalloc_track_caller(bytes, flags);
+		if (!dst)
+			return NULL;
+	}
+
+	memcpy(dst, src, bytes);
+out:
+	return dst ? dst : ZERO_SIZE_PTR;
+}
+
+/* resize an array from old_n items to new_n items. the array is reallocated if it's too
+ * small to hold new_n items. new items are zeroed out if the array grows.
+ *
+ * Contrary to krealloc_array, does not free arr if new_n is zero.
+ */
+static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
+{
+	if (!new_n || old_n == new_n)
+		goto out;
+
+	arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
+	if (!arr)
+		return NULL;
+
+	if (new_n > old_n)
+		memset(arr + old_n * size, 0, (new_n - old_n) * size);
+
+out:
+	return arr ? arr : ZERO_SIZE_PTR;
+}
+
+static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
+{
+	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
+			       sizeof(struct bpf_reference_state), GFP_KERNEL);
+	if (!dst->refs)
+		return -ENOMEM;
+
+	dst->acquired_refs = src->acquired_refs;
+	return 0;
+}
+
+static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
+{
+	size_t n = src->allocated_stack / BPF_REG_SIZE;
+
+	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
+				GFP_KERNEL);
+	if (!dst->stack)
+		return -ENOMEM;
+
+	dst->allocated_stack = src->allocated_stack;
+	return 0;
+}
+
+static int resize_reference_state(struct bpf_func_state *state, size_t n)
+{
+	state->refs = realloc_array(state->refs, state->acquired_refs, n,
+				    sizeof(struct bpf_reference_state));
+	if (!state->refs)
+		return -ENOMEM;
+
+	state->acquired_refs = n;
+	return 0;
+}
+
+static int grow_stack_state(struct bpf_func_state *state, int size)
+{
+	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
+
+	if (old_n >= n)
+		return 0;
+
+	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
+	if (!state->stack)
+		return -ENOMEM;
+
+	state->allocated_stack = size;
+	return 0;
 }
 
 /* Acquire a pointer id from the env and update the state->refs to include
@@ -825,7 +848,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 	int new_ofs = state->acquired_refs;
 	int id, err;
 
-	err = realloc_reference_state(state, state->acquired_refs + 1, true);
+	err = resize_reference_state(state, state->acquired_refs + 1);
 	if (err)
 		return err;
 	id = ++env->id_gen;
@@ -854,18 +877,6 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 	return -EINVAL;
 }
 
-static int transfer_reference_state(struct bpf_func_state *dst,
-				    struct bpf_func_state *src)
-{
-	int err = realloc_reference_state(dst, src->acquired_refs, false);
-	if (err)
-		return err;
-	err = copy_reference_state(dst, src);
-	if (err)
-		return err;
-	return 0;
-}
-
 static void free_func_state(struct bpf_func_state *state)
 {
 	if (!state)
@@ -904,10 +915,6 @@ static int copy_func_state(struct bpf_func_state *dst,
 {
 	int err;
 
-	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
-				 false);
-	if (err)
-		return err;
 	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 	err = copy_reference_state(dst, src);
 	if (err)
@@ -2590,8 +2597,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
 	struct bpf_reg_state *reg = NULL;
 
-	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
-				 state->acquired_refs, true);
+	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
 	if (err)
 		return err;
 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -2753,8 +2759,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 	if (value_reg && register_is_null(value_reg))
 		writing_zero = true;
 
-	err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
-				 state->acquired_refs, true);
+	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
 	if (err)
 		return err;
 
@@ -5629,7 +5634,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			subprog /* subprog number within this prog */);
 
 	/* Transfer references to the callee */
-	err = transfer_reference_state(callee, caller);
+	err = copy_reference_state(callee, caller);
 	if (err)
 		return err;
 
@@ -5780,7 +5785,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 	}
 
 	/* Transfer references to the caller */
-	err = transfer_reference_state(caller, callee);
+	err = copy_reference_state(caller, callee);
 	if (err)
 		return err;
 

From 06ab6a505583f9adbf5e1f05d86e7bdd7b02248e Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Thu, 29 Apr 2021 14:46:55 +0100
Subject: [PATCH 0064/3292] bpf: verifier: Use copy_array for jmp_history

Eliminate a couple needless kfree / kmalloc cycles by using
copy_array for jmp_history.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210429134656.122225-3-lmb@cloudflare.com
---
 kernel/bpf/verifier.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a31afd04fa5d..380c8ad49b7f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -926,16 +926,13 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 			       const struct bpf_verifier_state *src)
 {
 	struct bpf_func_state *dst;
-	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
 	int i, err;
 
-	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
-		kfree(dst_state->jmp_history);
-		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
-		if (!dst_state->jmp_history)
-			return -ENOMEM;
-	}
-	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
+	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
+					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
+					    GFP_USER);
+	if (!dst_state->jmp_history)
+		return -ENOMEM;
 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
 
 	/* if dst has more stack frames then src frame, free them */

From c9e73e3d2b1eb1ea7ff068e05007eec3bd8ef1c9 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Thu, 29 Apr 2021 14:46:56 +0100
Subject: [PATCH 0065/3292] bpf: verifier: Allocate idmap scratch in verifier
 env

func_states_equal makes a very short lived allocation for idmap,
probably because it's too large to fit on the stack. However the
function is called quite often, leading to a lot of alloc / free
churn. Replace the temporary allocation with dedicated scratch
space in struct bpf_verifier_env.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Link: https://lore.kernel.org/bpf/20210429134656.122225-4-lmb@cloudflare.com
---
 include/linux/bpf_verifier.h |  8 +++++++
 kernel/bpf/verifier.c        | 46 ++++++++++++------------------------
 2 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 06841517ab1e..d4632aa3ca50 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -215,6 +215,13 @@ struct bpf_idx_pair {
 	u32 idx;
 };
 
+struct bpf_id_pair {
+	u32 old;
+	u32 cur;
+};
+
+/* Maximum number of register states that can exist at once */
+#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
 #define MAX_CALL_FRAMES 8
 struct bpf_verifier_state {
 	/* call stack tracking */
@@ -418,6 +425,7 @@ struct bpf_verifier_env {
 	const struct bpf_line_info *prev_linfo;
 	struct bpf_verifier_log log;
 	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+	struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
 	struct {
 		int *insn_state;
 		int *insn_stack;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 380c8ad49b7f..bdfdb54676ea 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9748,13 +9748,6 @@ static bool range_within(struct bpf_reg_state *old,
 	       old->s32_max_value >= cur->s32_max_value;
 }
 
-/* Maximum number of register states that can exist at once */
-#define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
-struct idpair {
-	u32 old;
-	u32 cur;
-};
-
 /* If in the old state two registers had the same id, then they need to have
  * the same id in the new state as well.  But that id could be different from
  * the old state, so we need to track the mapping from old to new ids.
@@ -9765,11 +9758,11 @@ struct idpair {
  * So we look through our idmap to see if this old id has been seen before.  If
  * so, we require the new id to match; otherwise, we add the id pair to the map.
  */
-static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
+static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
 {
 	unsigned int i;
 
-	for (i = 0; i < ID_MAP_SIZE; i++) {
+	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
 		if (!idmap[i].old) {
 			/* Reached an empty slot; haven't seen this id before */
 			idmap[i].old = old_id;
@@ -9882,7 +9875,7 @@ next:
 
 /* Returns true if (rold safe implies rcur safe) */
 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
-		    struct idpair *idmap)
+		    struct bpf_id_pair *idmap)
 {
 	bool equal;
 
@@ -10000,7 +9993,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 
 static bool stacksafe(struct bpf_func_state *old,
 		      struct bpf_func_state *cur,
-		      struct idpair *idmap)
+		      struct bpf_id_pair *idmap)
 {
 	int i, spi;
 
@@ -10097,32 +10090,23 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
  * whereas register type in current state is meaningful, it means that
  * the current state will reach 'bpf_exit' instruction safely
  */
-static bool func_states_equal(struct bpf_func_state *old,
+static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
 			      struct bpf_func_state *cur)
 {
-	struct idpair *idmap;
-	bool ret = false;
 	int i;
 
-	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
-	/* If we failed to allocate the idmap, just say it's not safe */
-	if (!idmap)
+	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+	for (i = 0; i < MAX_BPF_REG; i++)
+		if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
+			return false;
+
+	if (!stacksafe(old, cur, env->idmap_scratch))
 		return false;
 
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
-			goto out_free;
-	}
-
-	if (!stacksafe(old, cur, idmap))
-		goto out_free;
-
 	if (!refsafe(old, cur))
-		goto out_free;
-	ret = true;
-out_free:
-	kfree(idmap);
-	return ret;
+		return false;
+
+	return true;
 }
 
 static bool states_equal(struct bpf_verifier_env *env,
@@ -10149,7 +10133,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 	for (i = 0; i <= old->curframe; i++) {
 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
 			return false;
-		if (!func_states_equal(old->frame[i], cur->frame[i]))
+		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
 			return false;
 	}
 	return true;

From fba472bb38e2d2b9a676364ade4be4e5df665a82 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Tue, 30 Mar 2021 15:16:45 +0200
Subject: [PATCH 0066/3292] selinux: simplify duplicate_policydb_cond_list() by
 using kmemdup()

We can do the allocation + copying of expr.nodes in one go using
kmemdup().

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/conditional.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 1ef74c085f2b..f6dfa9c821d6 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -628,7 +628,8 @@ static int cond_dup_av_list(struct cond_av_list *new,
 static int duplicate_policydb_cond_list(struct policydb *newp,
 					struct policydb *origp)
 {
-	int rc, i, j;
+	int rc;
+	u32 i;
 
 	rc = avtab_alloc_dup(&newp->te_cond_avtab, &origp->te_cond_avtab);
 	if (rc)
@@ -648,12 +649,12 @@ static int duplicate_policydb_cond_list(struct policydb *newp,
 		newp->cond_list_len++;
 
 		newn->cur_state = orign->cur_state;
-		newn->expr.nodes = kcalloc(orign->expr.len,
-					sizeof(*newn->expr.nodes), GFP_KERNEL);
+		newn->expr.nodes = kmemdup(orign->expr.nodes,
+				orign->expr.len * sizeof(*orign->expr.nodes),
+				GFP_KERNEL);
 		if (!newn->expr.nodes)
 			goto error;
-		for (j = 0; j < orign->expr.len; j++)
-			newn->expr.nodes[j] = orign->expr.nodes[j];
+
 		newn->expr.len = orign->expr.len;
 
 		rc = cond_dup_av_list(&newn->true_list, &orign->true_list,

From e1cce3a3cb28ba39d50166a78da91c8b5aae4af1 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Tue, 30 Mar 2021 15:16:46 +0200
Subject: [PATCH 0067/3292] selinux: constify some avtab function arguments

This makes the code a bit easier to reason about.

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/avtab.c       | 28 +++++++++++++++-------------
 security/selinux/ss/avtab.h       | 16 +++++++++-------
 security/selinux/ss/conditional.c |  3 ++-
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index 75df32906055..c97695ae508f 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -29,7 +29,7 @@ static struct kmem_cache *avtab_xperms_cachep __ro_after_init;
 /* Based on MurmurHash3, written by Austin Appleby and placed in the
  * public domain.
  */
-static inline int avtab_hash(struct avtab_key *keyp, u32 mask)
+static inline int avtab_hash(const struct avtab_key *keyp, u32 mask)
 {
 	static const u32 c1 = 0xcc9e2d51;
 	static const u32 c2 = 0x1b873593;
@@ -68,7 +68,7 @@ static inline int avtab_hash(struct avtab_key *keyp, u32 mask)
 static struct avtab_node*
 avtab_insert_node(struct avtab *h, int hvalue,
 		  struct avtab_node *prev, struct avtab_node *cur,
-		  struct avtab_key *key, struct avtab_datum *datum)
+		  const struct avtab_key *key, const struct avtab_datum *datum)
 {
 	struct avtab_node *newnode;
 	struct avtab_extended_perms *xperms;
@@ -103,7 +103,8 @@ avtab_insert_node(struct avtab *h, int hvalue,
 	return newnode;
 }
 
-static int avtab_insert(struct avtab *h, struct avtab_key *key, struct avtab_datum *datum)
+static int avtab_insert(struct avtab *h, const struct avtab_key *key,
+			const struct avtab_datum *datum)
 {
 	int hvalue;
 	struct avtab_node *prev, *cur, *newnode;
@@ -147,8 +148,9 @@ static int avtab_insert(struct avtab *h, struct avtab_key *key, struct avtab_dat
  * key/specified mask into the table, as needed by the conditional avtab.
  * It also returns a pointer to the node inserted.
  */
-struct avtab_node *
-avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datum *datum)
+struct avtab_node *avtab_insert_nonunique(struct avtab *h,
+					  const struct avtab_key *key,
+					  const struct avtab_datum *datum)
 {
 	int hvalue;
 	struct avtab_node *prev, *cur;
@@ -178,7 +180,7 @@ avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datu
 	return avtab_insert_node(h, hvalue, prev, cur, key, datum);
 }
 
-struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key)
+struct avtab_datum *avtab_search(struct avtab *h, const struct avtab_key *key)
 {
 	int hvalue;
 	struct avtab_node *cur;
@@ -213,8 +215,8 @@ struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key)
 /* This search function returns a node pointer, and can be used in
  * conjunction with avtab_search_next_node()
  */
-struct avtab_node*
-avtab_search_node(struct avtab *h, struct avtab_key *key)
+struct avtab_node *avtab_search_node(struct avtab *h,
+				     const struct avtab_key *key)
 {
 	int hvalue;
 	struct avtab_node *cur;
@@ -396,8 +398,8 @@ static uint16_t spec_order[] = {
 };
 
 int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
-		    int (*insertf)(struct avtab *a, struct avtab_key *k,
-				   struct avtab_datum *d, void *p),
+		    int (*insertf)(struct avtab *a, const struct avtab_key *k,
+				   const struct avtab_datum *d, void *p),
 		    void *p)
 {
 	__le16 buf16[4];
@@ -557,8 +559,8 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 	return insertf(a, &key, &datum, p);
 }
 
-static int avtab_insertf(struct avtab *a, struct avtab_key *k,
-			 struct avtab_datum *d, void *p)
+static int avtab_insertf(struct avtab *a, const struct avtab_key *k,
+			 const struct avtab_datum *d, void *p)
 {
 	return avtab_insert(a, k, d);
 }
@@ -607,7 +609,7 @@ bad:
 	goto out;
 }
 
-int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp)
+int avtab_write_item(struct policydb *p, const struct avtab_node *cur, void *fp)
 {
 	__le16 buf16[4];
 	__le32 buf32[ARRAY_SIZE(cur->datum.u.xperms->perms.p)];
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h
index f2eeb36265d1..d3ebea8d146f 100644
--- a/security/selinux/ss/avtab.h
+++ b/security/selinux/ss/avtab.h
@@ -90,24 +90,26 @@ struct avtab {
 void avtab_init(struct avtab *h);
 int avtab_alloc(struct avtab *, u32);
 int avtab_alloc_dup(struct avtab *new, const struct avtab *orig);
-struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *k);
+struct avtab_datum *avtab_search(struct avtab *h, const struct avtab_key *k);
 void avtab_destroy(struct avtab *h);
 void avtab_hash_eval(struct avtab *h, char *tag);
 
 struct policydb;
 int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
-		    int (*insert)(struct avtab *a, struct avtab_key *k,
-				  struct avtab_datum *d, void *p),
+		    int (*insert)(struct avtab *a, const struct avtab_key *k,
+				  const struct avtab_datum *d, void *p),
 		    void *p);
 
 int avtab_read(struct avtab *a, void *fp, struct policydb *pol);
-int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp);
+int avtab_write_item(struct policydb *p, const struct avtab_node *cur, void *fp);
 int avtab_write(struct policydb *p, struct avtab *a, void *fp);
 
-struct avtab_node *avtab_insert_nonunique(struct avtab *h, struct avtab_key *key,
-					  struct avtab_datum *datum);
+struct avtab_node *avtab_insert_nonunique(struct avtab *h,
+					  const struct avtab_key *key,
+					  const struct avtab_datum *datum);
 
-struct avtab_node *avtab_search_node(struct avtab *h, struct avtab_key *key);
+struct avtab_node *avtab_search_node(struct avtab *h,
+				     const struct avtab_key *key);
 
 struct avtab_node *avtab_search_node_next(struct avtab_node *node, int specified);
 
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index f6dfa9c821d6..2ec6e5cd25d9 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -254,7 +254,8 @@ struct cond_insertf_data {
 	struct cond_av_list *other;
 };
 
-static int cond_insertf(struct avtab *a, struct avtab_key *k, struct avtab_datum *d, void *ptr)
+static int cond_insertf(struct avtab *a, const struct avtab_key *k,
+			const struct avtab_datum *d, void *ptr)
 {
 	struct cond_insertf_data *data = ptr;
 	struct policydb *p = data->p;

From 8a922805fb0950187ff037801e337aec010a6ccb Mon Sep 17 00:00:00 2001
From: Zhongjun Tan <tanzhongjun@yulong.com>
Date: Fri, 9 Apr 2021 13:48:41 +0800
Subject: [PATCH 0068/3292] selinux: delete selinux_xfrm_policy_lookup()
 useless argument

seliunx_xfrm_policy_lookup() is hooks of security_xfrm_policy_lookup().
The dir argument is uselss in security_xfrm_policy_lookup(). So
remove the dir argument from selinux_xfrm_policy_lookup() and
security_xfrm_policy_lookup().

Signed-off-by: Zhongjun Tan <tanzhongjun@yulong.com>
[PM: reformat the subject line]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h   | 3 +--
 include/linux/security.h        | 4 ++--
 net/xfrm/xfrm_policy.c          | 6 ++----
 security/security.c             | 4 ++--
 security/selinux/include/xfrm.h | 2 +-
 security/selinux/xfrm.c         | 2 +-
 6 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 04c01794de83..2adeea44c0d5 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -358,8 +358,7 @@ LSM_HOOK(int, 0, xfrm_state_alloc_acquire, struct xfrm_state *x,
 	 struct xfrm_sec_ctx *polsec, u32 secid)
 LSM_HOOK(void, LSM_RET_VOID, xfrm_state_free_security, struct xfrm_state *x)
 LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x)
-LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid,
-	 u8 dir)
+LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid)
 LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x,
 	 struct xfrm_policy *xp, const struct flowi_common *flic)
 LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid,
diff --git a/include/linux/security.h b/include/linux/security.h
index 06f7c50ce77f..24eda04221e9 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1681,7 +1681,7 @@ int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
 				      struct xfrm_sec_ctx *polsec, u32 secid);
 int security_xfrm_state_delete(struct xfrm_state *x);
 void security_xfrm_state_free(struct xfrm_state *x);
-int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
+int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid);
 int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
 				       struct xfrm_policy *xp,
 				       const struct flowi_common *flic);
@@ -1732,7 +1732,7 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x)
 	return 0;
 }
 
-static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid)
 {
 	return 0;
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ce500f847b99..e70cf1d2c0e0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1902,8 +1902,7 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
 
 	match = xfrm_selector_match(sel, fl, family);
 	if (match)
-		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
-						  dir);
+		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid);
 	return ret;
 }
 
@@ -2181,8 +2180,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 				goto out;
 			}
 			err = security_xfrm_policy_lookup(pol->security,
-						      fl->flowi_secid,
-						      dir);
+						      fl->flowi_secid);
 			if (!err) {
 				if (!xfrm_pol_hold_rcu(pol))
 					goto again;
diff --git a/security/security.c b/security/security.c
index b38155b2de83..0c1c9796e3e4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2466,9 +2466,9 @@ void security_xfrm_state_free(struct xfrm_state *x)
 	call_void_hook(xfrm_state_free_security, x);
 }
 
-int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid)
 {
-	return call_int_hook(xfrm_policy_lookup, 0, ctx, fl_secid, dir);
+	return call_int_hook(xfrm_policy_lookup, 0, ctx, fl_secid);
 }
 
 int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 0a6f34a7a971..74159400eeee 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -23,7 +23,7 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
 				     struct xfrm_sec_ctx *polsec, u32 secid);
 void selinux_xfrm_state_free(struct xfrm_state *x);
 int selinux_xfrm_state_delete(struct xfrm_state *x);
-int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
+int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid);
 int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
 				      struct xfrm_policy *xp,
 				      const struct flowi_common *flic);
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 634f3db24da6..be83e5ce4469 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -150,7 +150,7 @@ static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx)
  * LSM hook implementation that authorizes that a flow can use a xfrm policy
  * rule.
  */
-int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid)
 {
 	int rc;
 

From 7cffc377e1633b84a880b21eebf10562eaa47a23 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Sun, 25 Apr 2021 14:05:37 +0530
Subject: [PATCH 0069/3292] selinux: Corrected comment to match kernel-doc
 comment

Minor documentation update.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/avc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index ad451cf9375e..c90f45587a0c 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -819,7 +819,7 @@ out:
 }
 
 /**
- * avc_update_node Update an AVC entry
+ * avc_update_node - Update an AVC entry
  * @event : Updating event
  * @perms : Permission mask bits
  * @ssid,@tsid,@tclass : identifier of an AVC entry

From fd781f459b60522f9fbfa1e125f122cf86d4a45b Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 28 Apr 2021 18:00:15 +0800
Subject: [PATCH 0070/3292] selinux: Remove redundant assignment to rc

Variable rc is set to '-EINVAL' but this value is never read as
it is overwritten or not used later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

security/selinux/ss/services.c:2103:3: warning: Value stored to 'rc' is
never read [clang-analyzer-deadcode.DeadStores].

security/selinux/ss/services.c:2079:2: warning: Value stored to 'rc' is
never read [clang-analyzer-deadcode.DeadStores].

security/selinux/ss/services.c:2071:2: warning: Value stored to 'rc' is
never read [clang-analyzer-deadcode.DeadStores].

security/selinux/ss/services.c:2062:2: warning: Value stored to 'rc' is
never read [clang-analyzer-deadcode.DeadStores].

security/selinux/ss/policydb.c:2592:3: warning: Value stored to 'rc' is
never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/policydb.c | 1 -
 security/selinux/ss/services.c | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 9fccf417006b..defc5ef35c66 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -2589,7 +2589,6 @@ int policydb_read(struct policydb *p, void *fp)
 		if (rc)
 			goto bad;
 
-		rc = -EINVAL;
 		rtk->role = le32_to_cpu(buf[0]);
 		rtk->type = le32_to_cpu(buf[1]);
 		rtd->new_role = le32_to_cpu(buf[2]);
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 0a5ce001609b..3658488efdcb 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2060,7 +2060,6 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 	context_init(newc);
 
 	/* Convert the user. */
-	rc = -EINVAL;
 	usrdatum = symtab_search(&args->newp->p_users,
 				 sym_name(args->oldp,
 					  SYM_USERS, oldc->user - 1));
@@ -2069,7 +2068,6 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 	newc->user = usrdatum->value;
 
 	/* Convert the role. */
-	rc = -EINVAL;
 	role = symtab_search(&args->newp->p_roles,
 			     sym_name(args->oldp, SYM_ROLES, oldc->role - 1));
 	if (!role)
@@ -2077,7 +2075,6 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 	newc->role = role->value;
 
 	/* Convert the type. */
-	rc = -EINVAL;
 	typdatum = symtab_search(&args->newp->p_types,
 				 sym_name(args->oldp,
 					  SYM_TYPES, oldc->type - 1));
@@ -2101,7 +2098,6 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
 		oc = args->newp->ocontexts[OCON_ISID];
 		while (oc && oc->sid[0] != SECINITSID_UNLABELED)
 			oc = oc->next;
-		rc = -EINVAL;
 		if (!oc) {
 			pr_err("SELinux:  unable to look up"
 				" the initial SIDs list\n");

From 37f05601eabc29f82c03b461a22d8fafacd736d2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 6 May 2021 22:41:13 -0700
Subject: [PATCH 0071/3292] bpftool: Strip const/volatile/restrict modifiers
 from .bss and .data vars

Similarly to .rodata, strip any const/volatile/restrict modifiers when
generating BPF skeleton. They are not helpful and actually just get in the way.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210507054119.270888-2-andrii@kernel.org
---
 tools/bpf/bpftool/gen.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 31ade77f5ef8..440a2fcb6441 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -106,8 +106,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
 
 	if (strcmp(sec_name, ".data") == 0) {
 		sec_ident = "data";
+		strip_mods = true;
 	} else if (strcmp(sec_name, ".bss") == 0) {
 		sec_ident = "bss";
+		strip_mods = true;
 	} else if (strcmp(sec_name, ".rodata") == 0) {
 		sec_ident = "rodata";
 		strip_mods = true;

From fdbf5ddeb855a80831af2e5bb9db9218926e6789 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 6 May 2021 22:41:14 -0700
Subject: [PATCH 0072/3292] libbpf: Add per-file linker opts

For better future extensibility add per-file linker options. Currently
the set of available options is empty. This changes bpf_linker__add_file()
API, but it's not a breaking change as bpf_linker APIs hasn't been released
yet.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210507054119.270888-3-andrii@kernel.org
---
 tools/bpf/bpftool/gen.c |  2 +-
 tools/lib/bpf/libbpf.h  | 10 +++++++++-
 tools/lib/bpf/linker.c  | 16 ++++++++++++----
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 440a2fcb6441..06fee4a2910a 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -638,7 +638,7 @@ static int do_object(int argc, char **argv)
 	while (argc) {
 		file = GET_ARG();
 
-		err = bpf_linker__add_file(linker, file);
+		err = bpf_linker__add_file(linker, file, NULL);
 		if (err) {
 			p_err("failed to link '%s': %s (%d)", file, strerror(err), err);
 			goto out;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index bec4e6a6e31d..3f3a24763459 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -768,10 +768,18 @@ struct bpf_linker_opts {
 };
 #define bpf_linker_opts__last_field sz
 
+struct bpf_linker_file_opts {
+	/* size of this struct, for forward/backward compatiblity */
+	size_t sz;
+};
+#define bpf_linker_file_opts__last_field sz
+
 struct bpf_linker;
 
 LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
-LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename);
+LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker,
+				    const char *filename,
+				    const struct bpf_linker_file_opts *opts);
 LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
 LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
 
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 9de084b1c699..3b1fbc27be37 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -158,7 +158,9 @@ struct bpf_linker {
 
 static int init_output_elf(struct bpf_linker *linker, const char *file);
 
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+				const struct bpf_linker_file_opts *opts,
+				struct src_obj *obj);
 static int linker_sanity_check_elf(struct src_obj *obj);
 static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
 static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
@@ -435,15 +437,19 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
 	return 0;
 }
 
-int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
+			 const struct bpf_linker_file_opts *opts)
 {
 	struct src_obj obj = {};
 	int err = 0;
 
+	if (!OPTS_VALID(opts, bpf_linker_file_opts))
+		return -EINVAL;
+
 	if (!linker->elf)
 		return -EINVAL;
 
-	err = err ?: linker_load_obj_file(linker, filename, &obj);
+	err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
 	err = err ?: linker_append_sec_data(linker, &obj);
 	err = err ?: linker_append_elf_syms(linker, &obj);
 	err = err ?: linker_append_elf_relos(linker, &obj);
@@ -529,7 +535,9 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
 	return sec;
 }
 
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj)
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
+				const struct bpf_linker_file_opts *opts,
+				struct src_obj *obj)
 {
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 	const int host_endianness = ELFDATA2LSB;

From 256eab48e70c0eaf5b1b9af83c0588491986c7de Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 6 May 2021 22:41:15 -0700
Subject: [PATCH 0073/3292] selftests/bpf: Stop using static variables for
 passing data to/from user-space

In preparation of skipping emitting static variables in BPF skeletons, switch
all current selftests uses of static variables to pass data between BPF and
user-space to use global variables.

All non-read-only `static volatile` variables become just plain global
variables by dropping `static volatile` part.

Read-only `static volatile const` variables, though, still require `volatile`
modifier, otherwise compiler will ignore whatever values are set from
user-space.

Few static linker tests are using name-conflicting static variables to
validate that static linker still properly handles static variables and
doesn't trip up on name conflicts.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210507054119.270888-4-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/send_signal.c      | 2 +-
 tools/testing/selftests/bpf/prog_tests/skeleton.c         | 6 ++----
 tools/testing/selftests/bpf/prog_tests/static_linked.c    | 5 -----
 tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c   | 4 ++--
 tools/testing/selftests/bpf/progs/kfree_skb.c             | 4 ++--
 tools/testing/selftests/bpf/progs/tailcall3.c             | 2 +-
 tools/testing/selftests/bpf/progs/tailcall4.c             | 2 +-
 tools/testing/selftests/bpf/progs/tailcall5.c             | 2 +-
 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c     | 2 +-
 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c     | 2 +-
 tools/testing/selftests/bpf/progs/test_check_mtu.c        | 4 ++--
 tools/testing/selftests/bpf/progs/test_cls_redirect.c     | 4 ++--
 tools/testing/selftests/bpf/progs/test_global_func_args.c | 2 +-
 tools/testing/selftests/bpf/progs/test_rdonly_maps.c      | 6 +++---
 tools/testing/selftests/bpf/progs/test_skeleton.c         | 4 ++--
 tools/testing/selftests/bpf/progs/test_snprintf_single.c  | 2 +-
 tools/testing/selftests/bpf/progs/test_sockmap_listen.c   | 4 ++--
 tools/testing/selftests/bpf/progs/test_static_linked1.c   | 8 ++++----
 tools/testing/selftests/bpf/progs/test_static_linked2.c   | 8 ++++----
 19 files changed, 33 insertions(+), 40 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 7043e6ded0e6..a1eade51d440 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include "test_send_signal_kern.skel.h"
 
-static volatile int sigusr1_received = 0;
+int sigusr1_received = 0;
 
 static void sigusr1_handler(int signum)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fe87b77af459..f6f130c99b8c 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -82,10 +82,8 @@ void test_skeleton(void)
 	CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2);
 	CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
 	CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
-	CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
-	      bss->handler_out5.a, 5);
-	CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
-	      bss->handler_out5.b, 6);
+	CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5);
+	CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6);
 	CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14);
 
 	CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
index 46556976dccc..ab6acbaf9d8c 100644
--- a/tools/testing/selftests/bpf/prog_tests/static_linked.c
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -14,12 +14,7 @@ void test_static_linked(void)
 		return;
 
 	skel->rodata->rovar1 = 1;
-	skel->bss->static_var1 = 2;
-	skel->bss->static_var11 = 3;
-
 	skel->rodata->rovar2 = 4;
-	skel->bss->static_var2 = 5;
-	skel->bss->static_var22 = 6;
 
 	err = test_static_linked__load(skel);
 	if (!ASSERT_OK(err, "skel_load"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index ee49493dc125..400fdf8d6233 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -9,8 +9,8 @@ __u32 map1_id = 0, map2_id = 0;
 __u32 map1_accessed = 0, map2_accessed = 0;
 __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
 
-static volatile const __u32 print_len;
-static volatile const __u32 ret1;
+volatile const __u32 print_len;
+volatile const __u32 ret1;
 
 SEC("iter/bpf_map")
 int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index a46a264ce24e..55e283050cab 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
 	return 0;
 }
 
-static volatile struct {
+struct {
 	bool fentry_test_ok;
 	bool fexit_test_ok;
-} result;
+} result = {};
 
 SEC("fentry/eth_type_trans")
 int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index 739dc2a51e74..910858fe078a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -10,7 +10,7 @@ struct {
 	__uint(value_size, sizeof(__u32));
 } jmp_table SEC(".maps");
 
-static volatile int count;
+int count = 0;
 
 SEC("classifier/0")
 int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
index f82075b47d7d..bd4be135c39d 100644
--- a/tools/testing/selftests/bpf/progs/tailcall4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -10,7 +10,7 @@ struct {
 	__uint(value_size, sizeof(__u32));
 } jmp_table SEC(".maps");
 
-static volatile int selector;
+int selector = 0;
 
 #define TAIL_FUNC(x)				\
 	SEC("classifier/" #x)			\
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
index ce5450744fd4..adf30a33064e 100644
--- a/tools/testing/selftests/bpf/progs/tailcall5.c
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -10,7 +10,7 @@ struct {
 	__uint(value_size, sizeof(__u32));
 } jmp_table SEC(".maps");
 
-static volatile int selector;
+int selector = 0;
 
 #define TAIL_FUNC(x)				\
 	SEC("classifier/" #x)			\
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
index 7b1c04183824..3cc4c12817b5 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -20,7 +20,7 @@ int subprog_tail(struct __sk_buff *skb)
 	return 1;
 }
 
-static volatile int count;
+int count = 0;
 
 SEC("classifier/0")
 int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index 9a1b166b7fbe..77df6d4db895 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -9,7 +9,7 @@ struct {
 	__uint(value_size, sizeof(__u32));
 } jmp_table SEC(".maps");
 
-static volatile int count;
+int count = 0;
 
 __noinline
 int subprog_tail_2(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index c4a9bae96e75..71184af57749 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -11,8 +11,8 @@
 char _license[] SEC("license") = "GPL";
 
 /* Userspace will update with MTU it can see on device */
-static volatile const int GLOBAL_USER_MTU;
-static volatile const __u32 GLOBAL_USER_IFINDEX;
+volatile const int GLOBAL_USER_MTU;
+volatile const __u32 GLOBAL_USER_IFINDEX;
 
 /* BPF-prog will update these with MTU values it can see */
 __u32 global_bpf_mtu_xdp = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 3c1e042962e6..e2a5acc4785c 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -39,8 +39,8 @@ char _license[] SEC("license") = "Dual BSD/GPL";
 /**
  * Destination port and IP used for UDP encapsulation.
  */
-static volatile const __be16 ENCAPSULATION_PORT;
-static volatile const __be32 ENCAPSULATION_IP;
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
 
 typedef struct {
 	uint64_t processed_packets_total;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c
index cae309538a9e..e712bf77daae 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c
@@ -8,7 +8,7 @@ struct S {
 	int v;
 };
 
-static volatile struct S global_variable;
+struct S global_variable = {};
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
index ecbeea2df259..fc8e8a34a3db 100644
--- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
+++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
@@ -5,7 +5,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-static volatile const struct {
+const struct {
 	unsigned a[4];
 	/*
 	 * if the struct's size is multiple of 16, compiler will put it into
@@ -15,11 +15,11 @@ static volatile const struct {
 	char _y;
 } rdonly_values = { .a = {2, 3, 4, 5} };
 
-static volatile struct {
+struct {
 	unsigned did_run;
 	unsigned iters;
 	unsigned sum;
-} res;
+} res = {};
 
 SEC("raw_tracepoint/sys_enter:skip_loop")
 int skip_loop(struct pt_regs *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 374ccef704e1..441fa1c552c8 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -38,11 +38,11 @@ extern int LINUX_KERNEL_VERSION __kconfig;
 bool bpf_syscall = 0;
 int kern_ver = 0;
 
+struct s out5 = {};
+
 SEC("raw_tp/sys_enter")
 int handler(const void *ctx)
 {
-	static volatile struct s out5;
-
 	out1 = in1;
 	out2 = in2;
 	out3 = in3;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
index 402adaf344f9..3095837334d3 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf_single.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -5,7 +5,7 @@
 #include <bpf/bpf_helpers.h>
 
 /* The format string is filled from the userspace such that loading fails */
-static const char fmt[10];
+const char fmt[10];
 
 SEC("raw_tp/sys_enter")
 int handler(const void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a39eba9f5201..a1cc58b10c7c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,8 +28,8 @@ struct {
 	__type(value, unsigned int);
 } verdict_map SEC(".maps");
 
-static volatile bool test_sockmap; /* toggled by user-space */
-static volatile bool test_ingress; /* toggled by user-space */
+bool test_sockmap = false; /* toggled by user-space */
+bool test_ingress = false; /* toggled by user-space */
 
 SEC("sk_skb/stream_parser")
 int prog_stream_parser(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
index ea1a6c4c7172..cae304045d9c 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked1.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -4,9 +4,9 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-/* 8-byte aligned .bss */
-static volatile long static_var1;
-static volatile int static_var11;
+/* 8-byte aligned .data */
+static volatile long static_var1 = 2;
+static volatile int static_var2 = 3;
 int var1 = 0;
 /* 4-byte aligned .rodata */
 const volatile int rovar1;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
 SEC("raw_tp/sys_enter")
 int handler1(const void *ctx)
 {
-	var1 = subprog(rovar1) + static_var1 + static_var11;
+	var1 = subprog(rovar1) + static_var1 + static_var2;
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
index 54d8d1ab577c..c54c4e865ed8 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked2.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -4,9 +4,9 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-/* 4-byte aligned .bss */
-static volatile int static_var2;
-static volatile int static_var22;
+/* 4-byte aligned .data */
+static volatile int static_var1 = 5;
+static volatile int static_var2 = 6;
 int var2 = 0;
 /* 8-byte aligned .rodata */
 const volatile long rovar2;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
 SEC("raw_tp/sys_enter")
 int handler2(const void *ctx)
 {
-	var2 = subprog(rovar2) + static_var2 + static_var22;
+	var2 = subprog(rovar2) + static_var1 + static_var2;
 
 	return 0;
 }

From 31332ccb756274c185cfd458b68b29a9371dceac Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 6 May 2021 22:41:16 -0700
Subject: [PATCH 0074/3292] bpftool: Stop emitting static variables in BPF
 skeleton

As discussed in [0], stop emitting static variables in BPF skeletons to avoid
issues with name-conflicting static variables across multiple
statically-linked BPF object files.

Users using static variables to pass data between BPF programs and user-space
should do a trivial one-time switch according to the following simple rules:
  - read-only `static volatile const` variables should be converted to
    `volatile const`;
  - read/write `static volatile` variables should just drop `static volatile`
    modifiers to become global variables/symbols. To better handle older Clang
    versions, such newly converted global variables should be explicitly
    initialized with a specific value or `= 0`/`= {}`, whichever is
    appropriate.

  [0] https://lore.kernel.org/bpf/CAEf4BzZo7_r-hsNvJt3w3kyrmmBJj7ghGY8+k4nvKF0KLjma=w@mail.gmail.com/T/#m664d4b0d6b31ac8b2669360e0fc2d6962e9f5ec1

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210507054119.270888-5-andrii@kernel.org
---
 tools/bpf/bpftool/gen.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 06fee4a2910a..27dceaf66ecb 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -131,6 +131,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
 		int need_off = sec_var->offset, align_off, align;
 		__u32 var_type_id = var->type;
 
+		/* static variables are not exposed through BPF skeleton */
+		if (btf_var(var)->linkage == BTF_VAR_STATIC)
+			continue;
+
 		if (off > need_off) {
 			p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
 			      sec_name, i, need_off, off);

From 247b8634e6446dbc8024685f803290501cba226f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 6 May 2021 22:41:17 -0700
Subject: [PATCH 0075/3292] libbpf: Fix ELF symbol visibility update logic

Fix silly bug in updating ELF symbol's visibility.

Fixes: a46349227cd8 ("libbpf: Add linker extern resolution support for functions and global variables")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210507054119.270888-6-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 3b1fbc27be37..b594a88620ce 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1788,7 +1788,7 @@ static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
 	/* libelf doesn't provide setters for ST_VISIBILITY,
 	 * but it is stored in the lower 2 bits of st_other
 	 */
-	sym->st_other &= 0x03;
+	sym->st_other &= ~0x03;
 	sym->st_other |= sym_vis;
 }
 

From e5670fa0293b05e8e24dae7d18481aba281cb85d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 6 May 2021 22:41:18 -0700
Subject: [PATCH 0076/3292] libbpf: Treat STV_INTERNAL same as STV_HIDDEN for
 functions

Do the same global -> static BTF update for global functions with STV_INTERNAL
visibility to turn on static BPF verification mode.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210507054119.270888-7-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e2a3cf437814..b8cf93fa1b4d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -700,13 +700,14 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 		if (err)
 			return err;
 
-		/* if function is a global/weak symbol, but has hidden
-		 * visibility (STV_HIDDEN), mark its BTF FUNC as static to
-		 * enable more permissive BPF verification mode with more
-		 * outside context available to BPF verifier
+		/* if function is a global/weak symbol, but has restricted
+		 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
+		 * as static to enable more permissive BPF verification mode
+		 * with more outside context available to BPF verifier
 		 */
 		if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
-		    && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN)
+		    && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN
+			|| GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL))
 			prog->mark_btf_static = true;
 
 		nr_progs++;

From 1c72e6ab66b9598cac741ed397438a52065a8f1f Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 11 May 2021 14:53:36 +0800
Subject: [PATCH 0077/3292] atm: iphase: fix possible use-after-free in
 ia_module_exit()

This module's remove path calls del_timer(). However, that function
does not wait until the timer handler finishes. This means that the
timer handler may still be running after the driver's remove function
has finished, which would result in a use-after-free.

Fix by calling del_timer_sync(), which makes sure the timer handler
has finished, and unable to re-schedule itself.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/iphase.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index e3f5d073caa6..bc8e8d9f176b 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -3282,7 +3282,7 @@ static void __exit ia_module_exit(void)
 {
 	pci_unregister_driver(&ia_driver);
 
-        del_timer(&ia_timer);
+	del_timer_sync(&ia_timer);
 }
 
 module_init(ia_module_init);

From 009fc857c5f6fda81f2f7dd851b2d54193a8e733 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 11 May 2021 14:58:53 +0800
Subject: [PATCH 0078/3292] mISDN: fix possible use-after-free in HFC_cleanup()

This module's remove path calls del_timer(). However, that function
does not wait until the timer handler finishes. This means that the
timer handler may still be running after the driver's remove function
has finished, which would result in a use-after-free.

Fix by calling del_timer_sync(), which makes sure the timer handler
has finished, and unable to re-schedule itself.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcpci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index 56bd2e9db6ed..e501cb03f211 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -2342,7 +2342,7 @@ static void __exit
 HFC_cleanup(void)
 {
 	if (timer_pending(&hfc_tl))
-		del_timer(&hfc_tl);
+		del_timer_sync(&hfc_tl);
 
 	pci_unregister_driver(&hfc_driver);
 }

From bf30396cdf8132a199af5f8f0e60367876f455df Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 11 May 2021 16:42:22 +0200
Subject: [PATCH 0079/3292] net: wwan: Add unknown port type

Some devices may have ports with unknown type/protocol which need to
be tagged (though not supported by WWAN core). This will be the case
for cdc-wdm based drivers.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/wwan.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index aa05a253dcf9..7216c114d758 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -15,6 +15,7 @@
  * @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control
  * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface
  * @WWAN_PORT_FIREHOSE: XML based command protocol
+ * @WWAN_PORT_UNKNOWN: Unknown port type
  * @WWAN_PORT_MAX: Number of supported port types
  */
 enum wwan_port_type {
@@ -23,7 +24,8 @@ enum wwan_port_type {
 	WWAN_PORT_QMI,
 	WWAN_PORT_QCDM,
 	WWAN_PORT_FIREHOSE,
-	WWAN_PORT_MAX,
+	WWAN_PORT_UNKNOWN,
+	WWAN_PORT_MAX = WWAN_PORT_UNKNOWN,
 };
 
 struct wwan_port;

From cac6fb015f719104e60b1c68c15ca5b734f57b9c Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 11 May 2021 16:42:23 +0200
Subject: [PATCH 0080/3292] usb: class: cdc-wdm: WWAN framework integration

The WWAN framework provides a unified way to handle WWAN/modems and its
control port(s). It has initially been introduced to support MHI/PCI
modems, offering the same control protocols as the USB variants such as
MBIM, QMI, AT... The WWAN framework exposes these control protocols as
character devices, similarly to cdc-wdm, but in a bus agnostic fashion.

This change adds registration of the USB modem cdc-wdm control endpoints
to the WWAN framework as standard control ports (wwanXpY...).

Exposing cdc-wdm through WWAN framework normally maintains backward
compatibility, e.g:
    $ qmicli --device-open-qmi -d /dev/wwan0p1QMI --dms-get-ids
instead of
    $ qmicli --device-open-qmi -d /dev/cdc-wdm0 --dms-get-ids

However, some tools may rely on cdc-wdm driver/device name for device
detection. It is then safer to keep the 'legacy' cdc-wdm character
device to prevent any breakage. This is handled in this change by
API mutual exclusion, only one access method can be used at a time,
either cdc-wdm chardev or WWAN API.

Note that unknown channel types (other than MBIM, AT or MBIM) are not
registered to the WWAN framework.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c       |   1 +
 drivers/net/usb/huawei_cdc_ncm.c |   1 +
 drivers/net/usb/qmi_wwan.c       |   3 +-
 drivers/usb/class/cdc-wdm.c      | 180 ++++++++++++++++++++++++++++++-
 include/linux/usb/cdc-wdm.h      |   3 +-
 5 files changed, 182 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 5db66272fc82..42fb75057c15 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -168,6 +168,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
 		subdriver = usb_cdc_wdm_register(ctx->control,
 						 &dev->status->desc,
 						 le16_to_cpu(ctx->mbim_desc->wMaxControlMessage),
+						 WWAN_PORT_MBIM,
 						 cdc_mbim_wdm_manage_power);
 	if (IS_ERR(subdriver)) {
 		ret = PTR_ERR(subdriver);
diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index a87f0dabcdb7..849b77330bf2 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c
@@ -96,6 +96,7 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev,
 		subdriver = usb_cdc_wdm_register(ctx->control,
 						 &usbnet_dev->status->desc,
 						 1024, /* wMaxCommand */
+						 WWAN_PORT_AT,
 						 huawei_cdc_ncm_wdm_manage_power);
 	if (IS_ERR(subdriver)) {
 		ret = PTR_ERR(subdriver);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 6700f1970b24..db157f21a322 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -710,7 +710,8 @@ static int qmi_wwan_register_subdriver(struct usbnet *dev)
 
 	/* register subdriver */
 	subdriver = usb_cdc_wdm_register(info->control, &dev->status->desc,
-					 4096, &qmi_wwan_cdc_wdm_manage_power);
+					 4096, WWAN_PORT_QMI,
+					 &qmi_wwan_cdc_wdm_manage_power);
 	if (IS_ERR(subdriver)) {
 		dev_err(&info->control->dev, "subdriver registration failed\n");
 		rv = PTR_ERR(subdriver);
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 508b1c3f8b73..457b00c6e984 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -21,8 +21,10 @@
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/poll.h>
+#include <linux/skbuff.h>
 #include <linux/usb.h>
 #include <linux/usb/cdc.h>
+#include <linux/wwan.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 #include <linux/usb/cdc-wdm.h>
@@ -55,6 +57,7 @@ MODULE_DEVICE_TABLE (usb, wdm_ids);
 #define WDM_SUSPENDING		8
 #define WDM_RESETTING		9
 #define WDM_OVERFLOW		10
+#define WDM_WWAN_IN_USE		11
 
 #define WDM_MAX			16
 
@@ -106,6 +109,9 @@ struct wdm_device {
 
 	struct list_head	device_list;
 	int			(*manage_power)(struct usb_interface *, int);
+
+	enum wwan_port_type	wwanp_type;
+	struct wwan_port	*wwanp;
 };
 
 static struct usb_driver wdm_driver;
@@ -157,6 +163,8 @@ static void wdm_out_callback(struct urb *urb)
 	wake_up_all(&desc->wait);
 }
 
+static void wdm_wwan_rx(struct wdm_device *desc, int length);
+
 static void wdm_in_callback(struct urb *urb)
 {
 	unsigned long flags;
@@ -192,6 +200,11 @@ static void wdm_in_callback(struct urb *urb)
 		}
 	}
 
+	if (test_bit(WDM_WWAN_IN_USE, &desc->flags)) {
+		wdm_wwan_rx(desc, length);
+		goto out;
+	}
+
 	/*
 	 * only set a new error if there is no previous error.
 	 * Errors are only cleared during read/open
@@ -226,6 +239,7 @@ skip_error:
 		set_bit(WDM_READ, &desc->flags);
 		wake_up(&desc->wait);
 	}
+out:
 	spin_unlock_irqrestore(&desc->iuspin, flags);
 }
 
@@ -697,6 +711,11 @@ static int wdm_open(struct inode *inode, struct file *file)
 		goto out;
 	file->private_data = desc;
 
+	if (test_bit(WDM_WWAN_IN_USE, &desc->flags)) {
+		rv = -EBUSY;
+		goto out;
+	}
+
 	rv = usb_autopm_get_interface(desc->intf);
 	if (rv < 0) {
 		dev_err(&desc->intf->dev, "Error autopm - %d\n", rv);
@@ -792,6 +811,151 @@ static struct usb_class_driver wdm_class = {
 	.minor_base =	WDM_MINOR_BASE,
 };
 
+/* --- WWAN framework integration --- */
+#ifdef CONFIG_WWAN
+static int wdm_wwan_port_start(struct wwan_port *port)
+{
+	struct wdm_device *desc = wwan_port_get_drvdata(port);
+
+	/* The interface is both exposed via the WWAN framework and as a
+	 * legacy usbmisc chardev. If chardev is already open, just fail
+	 * to prevent concurrent usage. Otherwise, switch to WWAN mode.
+	 */
+	mutex_lock(&wdm_mutex);
+	if (desc->count) {
+		mutex_unlock(&wdm_mutex);
+		return -EBUSY;
+	}
+	set_bit(WDM_WWAN_IN_USE, &desc->flags);
+	mutex_unlock(&wdm_mutex);
+
+	desc->manage_power(desc->intf, 1);
+
+	/* tx is allowed */
+	wwan_port_txon(port);
+
+	/* Start getting events */
+	return usb_submit_urb(desc->validity, GFP_KERNEL);
+}
+
+static void wdm_wwan_port_stop(struct wwan_port *port)
+{
+	struct wdm_device *desc = wwan_port_get_drvdata(port);
+
+	/* Stop all transfers and disable WWAN mode */
+	kill_urbs(desc);
+	desc->manage_power(desc->intf, 0);
+	clear_bit(WDM_READ, &desc->flags);
+	clear_bit(WDM_WWAN_IN_USE, &desc->flags);
+}
+
+static void wdm_wwan_port_tx_complete(struct urb *urb)
+{
+	struct sk_buff *skb = urb->context;
+	struct wdm_device *desc = skb_shinfo(skb)->destructor_arg;
+
+	usb_autopm_put_interface(desc->intf);
+	wwan_port_txon(desc->wwanp);
+	kfree_skb(skb);
+}
+
+static int wdm_wwan_port_tx(struct wwan_port *port, struct sk_buff *skb)
+{
+	struct wdm_device *desc = wwan_port_get_drvdata(port);
+	struct usb_interface *intf = desc->intf;
+	struct usb_ctrlrequest *req = desc->orq;
+	int rv;
+
+	rv = usb_autopm_get_interface(intf);
+	if (rv)
+		return rv;
+
+	usb_fill_control_urb(
+		desc->command,
+		interface_to_usbdev(intf),
+		usb_sndctrlpipe(interface_to_usbdev(intf), 0),
+		(unsigned char *)req,
+		skb->data,
+		skb->len,
+		wdm_wwan_port_tx_complete,
+		skb
+	);
+
+	req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE);
+	req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND;
+	req->wValue = 0;
+	req->wIndex = desc->inum;
+	req->wLength = cpu_to_le16(skb->len);
+
+	skb_shinfo(skb)->destructor_arg = desc;
+
+	rv = usb_submit_urb(desc->command, GFP_KERNEL);
+	if (rv)
+		usb_autopm_put_interface(intf);
+	else /* One transfer at a time, stop TX until URB completion */
+		wwan_port_txoff(port);
+
+	return rv;
+}
+
+static struct wwan_port_ops wdm_wwan_port_ops = {
+	.start = wdm_wwan_port_start,
+	.stop = wdm_wwan_port_stop,
+	.tx = wdm_wwan_port_tx,
+};
+
+static void wdm_wwan_init(struct wdm_device *desc)
+{
+	struct usb_interface *intf = desc->intf;
+	struct wwan_port *port;
+
+	/* Only register to WWAN core if protocol/type is known */
+	if (desc->wwanp_type == WWAN_PORT_UNKNOWN) {
+		dev_info(&intf->dev, "Unknown control protocol\n");
+		return;
+	}
+
+	port = wwan_create_port(&intf->dev, desc->wwanp_type, &wdm_wwan_port_ops, desc);
+	if (IS_ERR(port)) {
+		dev_err(&intf->dev, "%s: Unable to create WWAN port\n",
+			dev_name(intf->usb_dev));
+		return;
+	}
+
+	desc->wwanp = port;
+}
+
+static void wdm_wwan_deinit(struct wdm_device *desc)
+{
+	if (!desc->wwanp)
+		return;
+
+	wwan_remove_port(desc->wwanp);
+	desc->wwanp = NULL;
+}
+
+static void wdm_wwan_rx(struct wdm_device *desc, int length)
+{
+	struct wwan_port *port = desc->wwanp;
+	struct sk_buff *skb;
+
+	/* Forward data to WWAN port */
+	skb = alloc_skb(length, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	memcpy(skb_put(skb, length), desc->inbuf, length);
+	wwan_port_rx(port, skb);
+
+	/* inbuf has been copied, it is safe to check for outstanding data */
+	schedule_work(&desc->service_outs_intr);
+}
+#else /* CONFIG_WWAN */
+static void wdm_wwan_init(struct wdm_device *desc) {}
+static void wdm_wwan_deinit(struct wdm_device *desc) {}
+static void wdm_wwan_rx(struct wdm_device *desc, int length) {}
+#endif /* CONFIG_WWAN */
+
 /* --- error handling --- */
 static void wdm_rxwork(struct work_struct *work)
 {
@@ -836,7 +1000,8 @@ static void service_interrupt_work(struct work_struct *work)
 /* --- hotplug --- */
 
 static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep,
-		u16 bufsize, int (*manage_power)(struct usb_interface *, int))
+		      u16 bufsize, enum wwan_port_type type,
+		      int (*manage_power)(struct usb_interface *, int))
 {
 	int rv = -ENOMEM;
 	struct wdm_device *desc;
@@ -853,6 +1018,7 @@ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor
 	/* this will be expanded and needed in hardware endianness */
 	desc->inum = cpu_to_le16((u16)intf->cur_altsetting->desc.bInterfaceNumber);
 	desc->intf = intf;
+	desc->wwanp_type = type;
 	INIT_WORK(&desc->rxwork, wdm_rxwork);
 	INIT_WORK(&desc->service_outs_intr, service_interrupt_work);
 
@@ -933,6 +1099,9 @@ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor
 		goto err;
 	else
 		dev_info(&intf->dev, "%s: USB WDM device\n", dev_name(intf->usb_dev));
+
+	wdm_wwan_init(desc);
+
 out:
 	return rv;
 err:
@@ -977,7 +1146,7 @@ static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id)
 		goto err;
 	ep = &iface->endpoint[0].desc;
 
-	rv = wdm_create(intf, ep, maxcom, &wdm_manage_power);
+	rv = wdm_create(intf, ep, maxcom, WWAN_PORT_UNKNOWN, &wdm_manage_power);
 
 err:
 	return rv;
@@ -988,6 +1157,7 @@ err:
  * @intf: usb interface the subdriver will associate with
  * @ep: interrupt endpoint to monitor for notifications
  * @bufsize: maximum message size to support for read/write
+ * @type: Type/protocol of the transported data (MBIM, QMI...)
  * @manage_power: call-back invoked during open and release to
  *                manage the device's power
  * Create WDM usb class character device and associate it with intf
@@ -1005,12 +1175,12 @@ err:
  */
 struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf,
 					struct usb_endpoint_descriptor *ep,
-					int bufsize,
+					int bufsize, enum wwan_port_type type,
 					int (*manage_power)(struct usb_interface *, int))
 {
 	int rv;
 
-	rv = wdm_create(intf, ep, bufsize, manage_power);
+	rv = wdm_create(intf, ep, bufsize, type, manage_power);
 	if (rv < 0)
 		goto err;
 
@@ -1029,6 +1199,8 @@ static void wdm_disconnect(struct usb_interface *intf)
 	desc = wdm_find_device(intf);
 	mutex_lock(&wdm_mutex);
 
+	wdm_wwan_deinit(desc);
+
 	/* the spinlock makes sure no new urbs are generated in the callbacks */
 	spin_lock_irqsave(&desc->iuspin, flags);
 	set_bit(WDM_DISCONNECTING, &desc->flags);
diff --git a/include/linux/usb/cdc-wdm.h b/include/linux/usb/cdc-wdm.h
index 9b895f93d8de..9f5a51f79ba5 100644
--- a/include/linux/usb/cdc-wdm.h
+++ b/include/linux/usb/cdc-wdm.h
@@ -12,11 +12,12 @@
 #ifndef __LINUX_USB_CDC_WDM_H
 #define __LINUX_USB_CDC_WDM_H
 
+#include <linux/wwan.h>
 #include <uapi/linux/usb/cdc-wdm.h>
 
 extern struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf,
 					struct usb_endpoint_descriptor *ep,
-					int bufsize,
+					int bufsize, enum wwan_port_type type,
 					int (*manage_power)(struct usb_interface *, int));
 
 #endif /* __LINUX_USB_CDC_WDM_H */

From faa5f5da809b690542e1108ba66886574ac57d2c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 11 May 2021 13:54:49 -0700
Subject: [PATCH 0081/3292] net/sched: taprio: Drop unnecessary NULL check
 after container_of

The rcu_head pointer passed to taprio_free_sched_cb is never NULL.
That means that the result of container_of() operations on it is also
never NULL, even though rcu_head is the first element of the structure
embedding it. On top of that, it is misleading to perform a NULL check
on the result of container_of() because the position of the contained
element could change, which would make the check invalid. Remove the
unnecessary NULL check.

This change was made automatically with the following Coccinelle script.

@@
type t;
identifier v;
statement s;
@@

<+...
(
  t v = container_of(...);
|
  v = container_of(...);
)
  ...
  when != v
- if (\( !v \| v == NULL \) ) s
...+>

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_taprio.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 5c91df52b8c2..71e8a7a84841 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -114,9 +114,6 @@ static void taprio_free_sched_cb(struct rcu_head *head)
 	struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
 	struct sched_entry *entry, *n;
 
-	if (!sched)
-		return;
-
 	list_for_each_entry_safe(entry, n, &sched->entries, list) {
 		list_del(&entry->list);
 		kfree(entry);

From 7d966119f7db5b0bbba61ec3376dad48567aecae Mon Sep 17 00:00:00 2001
From: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Date: Mon, 10 May 2021 21:28:19 +0530
Subject: [PATCH 0082/3292] ACPI: DPTF: Add battery participant for Intel SoCs

Add ACPI Device ID for DPTF battery participant for the Intel
Jasper Lake (INT3532) and Tiger Lake (INTC1050) SoC based platforms.

Signed-off-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/dptf/int340x_thermal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c
index d14025a85ce8..da5d5f0be2f2 100644
--- a/drivers/acpi/dptf/int340x_thermal.c
+++ b/drivers/acpi/dptf/int340x_thermal.c
@@ -24,6 +24,7 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = {
 	{"INT3409"},
 	{"INT340A"},
 	{"INT340B"},
+	{"INT3532"},
 	{"INTC1040"},
 	{"INTC1041"},
 	{"INTC1043"},
@@ -33,6 +34,7 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = {
 	{"INTC1047"},
 	{"INTC1048"},
 	{"INTC1049"},
+	{"INTC1050"},
 	{"INTC1060"},
 	{"INTC1061"},
 	{""},

From 0303ce17347a02863c4ddef9777a42ff0315acb6 Mon Sep 17 00:00:00 2001
From: Hailong Liu <liu.hailong6@zte.com.cn>
Date: Tue, 11 May 2021 22:04:29 +0800
Subject: [PATCH 0083/3292] samples, bpf: Suppress compiler warning

While cross compiling on ARM32 , the casting from pointer to __u64 will
cause warnings:

samples/bpf/task_fd_query_user.c: In function 'main':
samples/bpf/task_fd_query_user.c:399:23: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
399 | uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
| ^
samples/bpf/task_fd_query_user.c:399:37: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
399 | uprobe_file_offset = (__u64)main - (__u64)&__executable_start;

Workaround this by using "unsigned long" to adapt to different ARCHs.

Signed-off-by: Hailong Liu <liu.hailong6@zte.com.cn>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210511140429.89426-1-liuhailongg6@163.com
---
 samples/bpf/task_fd_query_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index a78025b0026b..c9a0ca8351fd 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -396,7 +396,7 @@ int main(int argc, char **argv)
 	 * on different systems with different compilers. The right way is
 	 * to parse the ELF file. We took a shortcut here.
 	 */
-	uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
+	uprobe_file_offset = (unsigned long)main - (unsigned long)&__executable_start;
 	CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
 					     uprobe_file_offset, 0x0, false,
 					     BPF_FD_TYPE_UPROBE,

From 3f2870989f10ddb276063097d8e44e543277694a Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Mon, 10 May 2021 20:51:59 +0800
Subject: [PATCH 0084/3292] bpf, arm64: Replace STACK_ALIGN() with round_up()
 to align stack size

Use the common function round_up() directly to show the align size
explicitly, the function STACK_ALIGN() is needless, remove it. Other
JITs also just rely on round_up().

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1620651119-5663-1-git-send-email-yangtiezhu@loongson.cn
---
 arch/arm64/net/bpf_jit_comp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f7b194878a99..978502629f4e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -178,9 +178,6 @@ static bool is_addsub_imm(u32 imm)
 	return !(imm & ~0xfff) || !(imm & ~0xfff000);
 }
 
-/* Stack must be multiples of 16B */
-#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
-
 /* Tail call offset to jump into */
 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
 #define PROLOGUE_OFFSET 8
@@ -255,7 +252,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 			emit(A64_BTI_J, ctx);
 	}
 
-	ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
+	/* Stack must be multiples of 16B */
+	ctx->stack_size = round_up(prog->aux->stack_depth, 16);
 
 	/* Set up function call stack */
 	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);

From 34e7434ba4e97f4b85c1423a59b2922ba7dff2ea Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 12 May 2021 15:00:24 +0800
Subject: [PATCH 0085/3292] atm: nicstar: Fix possible use-after-free in
 nicstar_cleanup()

This module's remove path calls del_timer(). However, that function
does not wait until the timer handler finishes. This means that the
timer handler may still be running after the driver's remove function
has finished, which would result in a use-after-free.

Fix by calling del_timer_sync(), which makes sure the timer handler
has finished, and unable to re-schedule itself.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/nicstar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 5c7e4df159b9..b015c3e14336 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -299,7 +299,7 @@ static void __exit nicstar_cleanup(void)
 {
 	XPRINTK("nicstar: nicstar_cleanup() called.\n");
 
-	del_timer(&ns_timer);
+	del_timer_sync(&ns_timer);
 
 	pci_unregister_driver(&nicstar_driver);
 

From d8654f4f9300e5e7cf8d5e7885978541cf61326b Mon Sep 17 00:00:00 2001
From: Jim Ma <majinjing3@gmail.com>
Date: Wed, 12 May 2021 17:00:11 +0800
Subject: [PATCH 0086/3292] tls splice: remove inappropriate flags checking for
 MSG_PEEK

In function tls_sw_splice_read, before call tls_sw_advance_skb
it checks likely(!(flags & MSG_PEEK)), while MSG_PEEK is used
for recvmsg, splice supports SPLICE_F_NONBLOCK, SPLICE_F_MOVE,
SPLICE_F_MORE, should remove this checking.

Signed-off-by: Jim Ma <majinjing3@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1dcb34dfd56b..7b59ec9a24c5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2018,8 +2018,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	if (copied < 0)
 		goto splice_read_end;
 
-	if (likely(!(flags & MSG_PEEK)))
-		tls_sw_advance_skb(sk, skb, copied);
+	tls_sw_advance_skb(sk, skb, copied);
 
 splice_read_end:
 	release_sock(sk);

From 2253042d86f57d90a621ac2513a7a7a13afcf809 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Thu, 13 May 2021 14:26:36 +0200
Subject: [PATCH 0087/3292] ipmi/watchdog: Stop watchdog timer when the current
 action is 'none'

When an IPMI watchdog timer is being stopped in ipmi_close() or
ipmi_ioctl(WDIOS_DISABLECARD), the current watchdog action is updated to
WDOG_TIMEOUT_NONE and _ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB) is called
to install this action. The latter function ends up invoking
__ipmi_set_timeout() which makes the actual 'Set Watchdog Timer' IPMI
request.

For IPMI 1.0, this operation results in fully stopping the watchdog timer.
For IPMI >= 1.5, function __ipmi_set_timeout() always specifies the "don't
stop" flag in the prepared 'Set Watchdog Timer' IPMI request. This causes
that the watchdog timer has its action correctly updated to 'none' but the
timer continues to run. A problem is that IPMI firmware can then still log
an expiration event when the configured timeout is reached, which is
unexpected because the watchdog timer was requested to be stopped.

The patch fixes this problem by not setting the "don't stop" flag in
__ipmi_set_timeout() when the current action is WDOG_TIMEOUT_NONE which
results in stopping the watchdog timer. This makes the behaviour for
IPMI >= 1.5 consistent with IPMI 1.0. It also matches the logic in
__ipmi_heartbeat() which does not allow to reset the watchdog if the
current action is WDOG_TIMEOUT_NONE as that would start the timer.

Signed-off-by: Petr Pavlu <petr.pavlu@suse.com>
Message-Id: <10a41bdc-9c99-089c-8d89-fa98ce5ea080@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_watchdog.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 32c334e34d55..e4ff3b50de7f 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -371,16 +371,18 @@ static int __ipmi_set_timeout(struct ipmi_smi_msg  *smi_msg,
 	data[0] = 0;
 	WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
 
-	if ((ipmi_version_major > 1)
-	    || ((ipmi_version_major == 1) && (ipmi_version_minor >= 5))) {
-		/* This is an IPMI 1.5-only feature. */
-		data[0] |= WDOG_DONT_STOP_ON_SET;
-	} else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
-		/*
-		 * In ipmi 1.0, setting the timer stops the watchdog, we
-		 * need to start it back up again.
-		 */
-		hbnow = 1;
+	if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
+		if ((ipmi_version_major > 1) ||
+		    ((ipmi_version_major == 1) && (ipmi_version_minor >= 5))) {
+			/* This is an IPMI 1.5-only feature. */
+			data[0] |= WDOG_DONT_STOP_ON_SET;
+		} else {
+			/*
+			 * In ipmi 1.0, setting the timer stops the watchdog, we
+			 * need to start it back up again.
+			 */
+			hbnow = 1;
+		}
 	}
 
 	data[1] = 0;

From 13511704f8d7591faf19fdb84f0902dff0535ccb Mon Sep 17 00:00:00 2001
From: Yannick Vignon <yannick.vignon@nxp.com>
Date: Tue, 11 May 2021 19:18:29 +0200
Subject: [PATCH 0088/3292] net: taprio offload: enforce qdisc to netdev queue
 mapping

Even though the taprio qdisc is designed for multiqueue devices, all the
queues still point to the same top-level taprio qdisc. This works and is
probably required for software taprio, but at least with offload taprio,
it has an undesirable side effect: because the whole qdisc is run when a
packet has to be sent, it allows packets in a best-effort class to be
processed in the context of a task sending higher priority traffic. If
there are packets left in the qdisc after that first run, the NET_TX
softirq is raised and gets executed immediately in the same process
context. As with any other softirq, it runs up to 10 times and for up to
2ms, during which the calling process is waiting for the sendmsg call (or
similar) to return. In my use case, that calling process is a real-time
task scheduled to send a packet every 2ms, so the long sendmsg calls are
leading to missed timeslots.

By attaching each netdev queue to its own qdisc, as it is done with
the "classic" mq qdisc, each traffic class can be processed independently
without touching the other classes. A high-priority process can then send
packets without getting stuck in the sendmsg call anymore.

Signed-off-by: Yannick Vignon <yannick.vignon@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_taprio.c | 85 ++++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 71e8a7a84841..66fe2b82af9a 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -435,6 +435,11 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	struct Qdisc *child;
 	int queue;
 
+	if (unlikely(FULL_OFFLOAD_IS_ENABLED(q->flags))) {
+		WARN_ONCE(1, "Trying to enqueue skb into the root of a taprio qdisc configured with full offload\n");
+		return qdisc_drop(skb, sch, to_free);
+	}
+
 	queue = skb_get_queue_mapping(skb);
 
 	child = q->qdiscs[queue];
@@ -526,23 +531,7 @@ static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
 
 static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
 {
-	struct taprio_sched *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
-	struct sk_buff *skb;
-	int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct Qdisc *child = q->qdiscs[i];
-
-		if (unlikely(!child))
-			continue;
-
-		skb = child->ops->peek(child);
-		if (!skb)
-			continue;
-
-		return skb;
-	}
+	WARN_ONCE(1, "Trying to peek into the root of a taprio qdisc configured with full offload\n");
 
 	return NULL;
 }
@@ -651,27 +640,7 @@ done:
 
 static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
 {
-	struct taprio_sched *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
-	struct sk_buff *skb;
-	int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct Qdisc *child = q->qdiscs[i];
-
-		if (unlikely(!child))
-			continue;
-
-		skb = child->ops->dequeue(child);
-		if (unlikely(!skb))
-			continue;
-
-		qdisc_bstats_update(sch, skb);
-		qdisc_qstats_backlog_dec(sch, skb);
-		sch->q.qlen--;
-
-		return skb;
-	}
+	WARN_ONCE(1, "Trying to dequeue from the root of a taprio qdisc configured with full offload\n");
 
 	return NULL;
 }
@@ -1756,6 +1725,37 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
 	return taprio_change(sch, opt, extack);
 }
 
+static void taprio_attach(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int ntx;
+
+	/* Attach underlying qdisc */
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		struct Qdisc *qdisc = q->qdiscs[ntx];
+		struct Qdisc *old;
+
+		if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+			qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+			old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+			if (ntx < dev->real_num_tx_queues)
+				qdisc_hash_add(qdisc, false);
+		} else {
+			old = dev_graft_qdisc(qdisc->dev_queue, sch);
+			qdisc_refcount_inc(sch);
+		}
+		if (old)
+			qdisc_put(old);
+	}
+
+	/* access to the child qdiscs is not needed in offload mode */
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		kfree(q->qdiscs);
+		q->qdiscs = NULL;
+	}
+}
+
 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
 					     unsigned long cl)
 {
@@ -1782,8 +1782,12 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl,
 	if (dev->flags & IFF_UP)
 		dev_deactivate(dev);
 
-	*old = q->qdiscs[cl - 1];
-	q->qdiscs[cl - 1] = new;
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		*old = dev_graft_qdisc(dev_queue, new);
+	} else {
+		*old = q->qdiscs[cl - 1];
+		q->qdiscs[cl - 1] = new;
+	}
 
 	if (new)
 		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
@@ -2017,6 +2021,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
 	.change		= taprio_change,
 	.destroy	= taprio_destroy,
 	.reset		= taprio_reset,
+	.attach		= taprio_attach,
 	.peek		= taprio_peek,
 	.dequeue	= taprio_dequeue,
 	.enqueue	= taprio_enqueue,

From 4a5fe57e775188be96359a1934501be45fe5f705 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 12 May 2021 12:19:50 +0200
Subject: [PATCH 0089/3292] alx: use fine-grained locking instead of RTNL

In the alx driver, all locking depended on the RTNL, but
that causes issues with ipconfig ("ip=..." command line)
because that waits for the netdev to have a carrier while
holding the RTNL, but the alx workers etc. require RTNL,
so the carrier won't be set until the RTNL is dropped and
can be acquired by alx workers. This causes long delays
at boot, as reported by Nikolai Zhubr.

Really the only sensible thing to do here is to not use
the RTNL for everything, but instead have fine-grained
locking for just the driver. Do that, it's not that hard.

Reported-by: Nikolai Zhubr <zhubr.2@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/alx.h     |  2 +
 drivers/net/ethernet/atheros/alx/ethtool.c | 17 ++++-
 drivers/net/ethernet/atheros/alx/main.c    | 86 +++++++++++++++-------
 3 files changed, 76 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 9d0e74f6b089..693006c5a498 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -137,6 +137,8 @@ struct alx_priv {
 
 	/* protects hw.stats */
 	spinlock_t stats_lock;
+
+	struct mutex mtx;
 };
 
 extern const struct ethtool_ops alx_ethtool_ops;
diff --git a/drivers/net/ethernet/atheros/alx/ethtool.c b/drivers/net/ethernet/atheros/alx/ethtool.c
index 2f4eabf652e8..f3627157a38a 100644
--- a/drivers/net/ethernet/atheros/alx/ethtool.c
+++ b/drivers/net/ethernet/atheros/alx/ethtool.c
@@ -163,8 +163,10 @@ static int alx_get_link_ksettings(struct net_device *netdev,
 		}
 	}
 
+	mutex_lock(&alx->mtx);
 	cmd->base.speed = hw->link_speed;
 	cmd->base.duplex = hw->duplex;
+	mutex_unlock(&alx->mtx);
 
 	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
 						supported);
@@ -181,8 +183,7 @@ static int alx_set_link_ksettings(struct net_device *netdev,
 	struct alx_hw *hw = &alx->hw;
 	u32 adv_cfg;
 	u32 advertising;
-
-	ASSERT_RTNL();
+	int ret;
 
 	ethtool_convert_link_mode_to_legacy_u32(&advertising,
 						cmd->link_modes.advertising);
@@ -200,7 +201,12 @@ static int alx_set_link_ksettings(struct net_device *netdev,
 	}
 
 	hw->adv_cfg = adv_cfg;
-	return alx_setup_speed_duplex(hw, adv_cfg, hw->flowctrl);
+
+	mutex_lock(&alx->mtx);
+	ret = alx_setup_speed_duplex(hw, adv_cfg, hw->flowctrl);
+	mutex_unlock(&alx->mtx);
+
+	return ret;
 }
 
 static void alx_get_pauseparam(struct net_device *netdev,
@@ -209,10 +215,12 @@ static void alx_get_pauseparam(struct net_device *netdev,
 	struct alx_priv *alx = netdev_priv(netdev);
 	struct alx_hw *hw = &alx->hw;
 
+	mutex_lock(&alx->mtx);
 	pause->autoneg = !!(hw->flowctrl & ALX_FC_ANEG &&
 			    hw->adv_cfg & ADVERTISED_Autoneg);
 	pause->tx_pause = !!(hw->flowctrl & ALX_FC_TX);
 	pause->rx_pause = !!(hw->flowctrl & ALX_FC_RX);
+	mutex_unlock(&alx->mtx);
 }
 
 
@@ -232,7 +240,7 @@ static int alx_set_pauseparam(struct net_device *netdev,
 	if (pause->autoneg)
 		fc |= ALX_FC_ANEG;
 
-	ASSERT_RTNL();
+	mutex_lock(&alx->mtx);
 
 	/* restart auto-neg for auto-mode */
 	if (hw->adv_cfg & ADVERTISED_Autoneg) {
@@ -254,6 +262,7 @@ static int alx_set_pauseparam(struct net_device *netdev,
 		alx_cfg_mac_flowcontrol(hw, fc);
 
 	hw->flowctrl = fc;
+	mutex_unlock(&alx->mtx);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index b3d74332ed33..e8884879a50f 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (c) 2013, 2021 Johannes Berg <johannes@sipsolutions.net>
  *
  *  This file is free software: you may copy, redistribute and/or modify it
  *  under the terms of the GNU General Public License as published by the
@@ -1091,8 +1091,9 @@ static int alx_init_sw(struct alx_priv *alx)
 		      ALX_MAC_CTRL_RXFC_EN |
 		      ALX_MAC_CTRL_TXFC_EN |
 		      7 << ALX_MAC_CTRL_PRMBLEN_SHIFT;
+	mutex_init(&alx->mtx);
 
-	return err;
+	return 0;
 }
 
 
@@ -1122,6 +1123,8 @@ static void alx_halt(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
 
+	lockdep_assert_held(&alx->mtx);
+
 	alx_netif_stop(alx);
 	hw->link_speed = SPEED_UNKNOWN;
 	hw->duplex = DUPLEX_UNKNOWN;
@@ -1147,6 +1150,8 @@ static void alx_configure(struct alx_priv *alx)
 
 static void alx_activate(struct alx_priv *alx)
 {
+	lockdep_assert_held(&alx->mtx);
+
 	/* hardware setting lost, restore it */
 	alx_reinit_rings(alx);
 	alx_configure(alx);
@@ -1161,7 +1166,7 @@ static void alx_activate(struct alx_priv *alx)
 
 static void alx_reinit(struct alx_priv *alx)
 {
-	ASSERT_RTNL();
+	lockdep_assert_held(&alx->mtx);
 
 	alx_halt(alx);
 	alx_activate(alx);
@@ -1249,6 +1254,8 @@ out_disable_adv_intr:
 
 static void __alx_stop(struct alx_priv *alx)
 {
+	lockdep_assert_held(&alx->mtx);
+
 	alx_free_irq(alx);
 
 	cancel_work_sync(&alx->link_check_wk);
@@ -1284,6 +1291,8 @@ static void alx_check_link(struct alx_priv *alx)
 	int old_speed;
 	int err;
 
+	lockdep_assert_held(&alx->mtx);
+
 	/* clear PHY internal interrupt status, otherwise the main
 	 * interrupt status will be asserted forever
 	 */
@@ -1338,12 +1347,24 @@ reset:
 
 static int alx_open(struct net_device *netdev)
 {
-	return __alx_open(netdev_priv(netdev), false);
+	struct alx_priv *alx = netdev_priv(netdev);
+	int ret;
+
+	mutex_lock(&alx->mtx);
+	ret = __alx_open(alx, false);
+	mutex_unlock(&alx->mtx);
+
+	return ret;
 }
 
 static int alx_stop(struct net_device *netdev)
 {
-	__alx_stop(netdev_priv(netdev));
+	struct alx_priv *alx = netdev_priv(netdev);
+
+	mutex_lock(&alx->mtx);
+	__alx_stop(alx);
+	mutex_unlock(&alx->mtx);
+
 	return 0;
 }
 
@@ -1353,18 +1374,18 @@ static void alx_link_check(struct work_struct *work)
 
 	alx = container_of(work, struct alx_priv, link_check_wk);
 
-	rtnl_lock();
+	mutex_lock(&alx->mtx);
 	alx_check_link(alx);
-	rtnl_unlock();
+	mutex_unlock(&alx->mtx);
 }
 
 static void alx_reset(struct work_struct *work)
 {
 	struct alx_priv *alx = container_of(work, struct alx_priv, reset_wk);
 
-	rtnl_lock();
+	mutex_lock(&alx->mtx);
 	alx_reinit(alx);
-	rtnl_unlock();
+	mutex_unlock(&alx->mtx);
 }
 
 static int alx_tpd_req(struct sk_buff *skb)
@@ -1771,6 +1792,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_unmap;
 	}
 
+	mutex_lock(&alx->mtx);
+
 	alx_reset_pcie(hw);
 
 	phy_configured = alx_phy_configured(hw);
@@ -1781,7 +1804,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	err = alx_reset_mac(hw);
 	if (err) {
 		dev_err(&pdev->dev, "MAC Reset failed, error = %d\n", err);
-		goto out_unmap;
+		goto out_unlock;
 	}
 
 	/* setup link to put it in a known good starting state */
@@ -1791,7 +1814,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			dev_err(&pdev->dev,
 				"failed to configure PHY speed/duplex (err=%d)\n",
 				err);
-			goto out_unmap;
+			goto out_unlock;
 		}
 	}
 
@@ -1824,9 +1847,11 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!alx_get_phy_info(hw)) {
 		dev_err(&pdev->dev, "failed to identify PHY\n");
 		err = -EIO;
-		goto out_unmap;
+		goto out_unlock;
 	}
 
+	mutex_unlock(&alx->mtx);
+
 	INIT_WORK(&alx->link_check_wk, alx_link_check);
 	INIT_WORK(&alx->reset_wk, alx_reset);
 	netif_carrier_off(netdev);
@@ -1834,7 +1859,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "register netdevice failed\n");
-		goto out_unmap;
+		goto out_unlock;
 	}
 
 	netdev_info(netdev,
@@ -1843,6 +1868,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	return 0;
 
+out_unlock:
+	mutex_unlock(&alx->mtx);
 out_unmap:
 	iounmap(hw->hw_addr);
 out_free_netdev:
@@ -1869,6 +1896,8 @@ static void alx_remove(struct pci_dev *pdev)
 	pci_disable_pcie_error_reporting(pdev);
 	pci_disable_device(pdev);
 
+	mutex_destroy(&alx->mtx);
+
 	free_netdev(alx->dev);
 }
 
@@ -1880,7 +1909,11 @@ static int alx_suspend(struct device *dev)
 	if (!netif_running(alx->dev))
 		return 0;
 	netif_device_detach(alx->dev);
+
+	mutex_lock(&alx->mtx);
 	__alx_stop(alx);
+	mutex_unlock(&alx->mtx);
+
 	return 0;
 }
 
@@ -1890,20 +1923,23 @@ static int alx_resume(struct device *dev)
 	struct alx_hw *hw = &alx->hw;
 	int err;
 
+	mutex_lock(&alx->mtx);
 	alx_reset_phy(hw);
 
-	if (!netif_running(alx->dev))
-		return 0;
+	if (!netif_running(alx->dev)) {
+		err = 0;
+		goto unlock;
+	}
 
-	rtnl_lock();
 	err = __alx_open(alx, true);
-	rtnl_unlock();
 	if (err)
-		return err;
+		goto unlock;
 
 	netif_device_attach(alx->dev);
 
-	return 0;
+unlock:
+	mutex_unlock(&alx->mtx);
+	return err;
 }
 
 static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume);
@@ -1922,7 +1958,7 @@ static pci_ers_result_t alx_pci_error_detected(struct pci_dev *pdev,
 
 	dev_info(&pdev->dev, "pci error detected\n");
 
-	rtnl_lock();
+	mutex_lock(&alx->mtx);
 
 	if (netif_running(netdev)) {
 		netif_device_detach(netdev);
@@ -1934,7 +1970,7 @@ static pci_ers_result_t alx_pci_error_detected(struct pci_dev *pdev,
 	else
 		pci_disable_device(pdev);
 
-	rtnl_unlock();
+	mutex_unlock(&alx->mtx);
 
 	return rc;
 }
@@ -1947,7 +1983,7 @@ static pci_ers_result_t alx_pci_error_slot_reset(struct pci_dev *pdev)
 
 	dev_info(&pdev->dev, "pci error slot reset\n");
 
-	rtnl_lock();
+	mutex_lock(&alx->mtx);
 
 	if (pci_enable_device(pdev)) {
 		dev_err(&pdev->dev, "Failed to re-enable PCI device after reset\n");
@@ -1960,7 +1996,7 @@ static pci_ers_result_t alx_pci_error_slot_reset(struct pci_dev *pdev)
 	if (!alx_reset_mac(hw))
 		rc = PCI_ERS_RESULT_RECOVERED;
 out:
-	rtnl_unlock();
+	mutex_unlock(&alx->mtx);
 
 	return rc;
 }
@@ -1972,14 +2008,14 @@ static void alx_pci_error_resume(struct pci_dev *pdev)
 
 	dev_info(&pdev->dev, "pci error resume\n");
 
-	rtnl_lock();
+	mutex_lock(&alx->mtx);
 
 	if (netif_running(netdev)) {
 		alx_activate(alx);
 		netif_device_attach(netdev);
 	}
 
-	rtnl_unlock();
+	mutex_unlock(&alx->mtx);
 }
 
 static const struct pci_error_handlers alx_err_handlers = {

From 8380c81d5c4fced6f4397795a5ae65758272bbfd Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 12 May 2021 23:43:24 +0200
Subject: [PATCH 0090/3292] net: Treat __napi_schedule_irqoff() as
 __napi_schedule() on PREEMPT_RT

__napi_schedule_irqoff() is an optimized version of __napi_schedule()
which can be used where it is known that interrupts are disabled,
e.g. in interrupt-handlers, spin_lock_irq() sections or hrtimer
callbacks.

On PREEMPT_RT enabled kernels this assumptions is not true. Force-
threaded interrupt handlers and spinlocks are not disabling interrupts
and the NAPI hrtimer callback is forced into softirq context which runs
with interrupts enabled as well.

Chasing all usage sites of __napi_schedule_irqoff() is a whack-a-mole
game so make __napi_schedule_irqoff() invoke __napi_schedule() for
PREEMPT_RT kernels.

The callers of ____napi_schedule() in the networking core have been
audited and are correct on PREEMPT_RT kernels as well.

Reported-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 222b1d322c96..febb23708184 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6501,11 +6501,18 @@ EXPORT_SYMBOL(napi_schedule_prep);
  * __napi_schedule_irqoff - schedule for receive
  * @n: entry to schedule
  *
- * Variant of __napi_schedule() assuming hard irqs are masked
+ * Variant of __napi_schedule() assuming hard irqs are masked.
+ *
+ * On PREEMPT_RT enabled kernels this maps to __napi_schedule()
+ * because the interrupt disabled assumption might not be true
+ * due to force-threaded interrupts and spinlock substitution.
  */
 void __napi_schedule_irqoff(struct napi_struct *n)
 {
-	____napi_schedule(this_cpu_ptr(&softnet_data), n);
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		____napi_schedule(this_cpu_ptr(&softnet_data), n);
+	else
+		__napi_schedule(n);
 }
 EXPORT_SYMBOL(__napi_schedule_irqoff);
 

From ce6f709775bdf9bc8dd852a8758e10a98f31f280 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:43 +0200
Subject: [PATCH 0091/3292] net: bridge: mcast: rename multicast router lists
 and timers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants, rename the affected variable to the IPv4
version first to avoid some renames in later commits.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c   |  4 ++--
 net/bridge/br_mdb.c       |  6 ++---
 net/bridge/br_multicast.c | 48 +++++++++++++++++++--------------------
 net/bridge/br_private.h   | 10 ++++----
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6e9b049ae521..eb9847ad40cf 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -276,7 +276,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 	bool allow_mode_include = true;
 	struct hlist_node *rp;
 
-	rp = rcu_dereference(hlist_first_rcu(&br->router_list));
+	rp = rcu_dereference(hlist_first_rcu(&br->ip4_mc_router_list));
 	if (mdst) {
 		p = rcu_dereference(mdst->ports);
 		if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
@@ -290,7 +290,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		struct net_bridge_port *port, *lport, *rport;
 
 		lport = p ? p->key.port : NULL;
-		rport = hlist_entry_safe(rp, struct net_bridge_port, rlist);
+		rport = hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
 
 		if ((unsigned long)lport > (unsigned long)rport) {
 			port = lport;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 95fa4af0e8dd..d61def8c4647 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -23,14 +23,14 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 	struct net_bridge_port *p;
 	struct nlattr *nest, *port_nest;
 
-	if (!br->multicast_router || hlist_empty(&br->router_list))
+	if (!br->multicast_router || hlist_empty(&br->ip4_mc_router_list))
 		return 0;
 
 	nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
 	if (nest == NULL)
 		return -EMSGSIZE;
 
-	hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
+	hlist_for_each_entry_rcu(p, &br->ip4_mc_router_list, ip4_rlist) {
 		if (!p)
 			continue;
 		port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
@@ -38,7 +38,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			goto fail;
 		if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) ||
 		    nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
-				br_timer_value(&p->multicast_router_timer)) ||
+				br_timer_value(&p->ip4_mc_router_timer)) ||
 		    nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
 			       p->multicast_router)) {
 			nla_nest_cancel(skb, port_nest);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 226bb05c3b42..6fe93a30b575 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1357,13 +1357,13 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 static void br_multicast_router_expired(struct timer_list *t)
 {
 	struct net_bridge_port *port =
-			from_timer(port, t, multicast_router_timer);
+			from_timer(port, t, ip4_mc_router_timer);
 	struct net_bridge *br = port->br;
 
 	spin_lock(&br->multicast_lock);
 	if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
 	    port->multicast_router == MDB_RTR_TYPE_PERM ||
-	    timer_pending(&port->multicast_router_timer))
+	    timer_pending(&port->ip4_mc_router_timer))
 		goto out;
 
 	__del_port_router(port);
@@ -1386,12 +1386,12 @@ static void br_mc_router_state_change(struct net_bridge *p,
 
 static void br_multicast_local_router_expired(struct timer_list *t)
 {
-	struct net_bridge *br = from_timer(br, t, multicast_router_timer);
+	struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer);
 
 	spin_lock(&br->multicast_lock);
 	if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
 	    br->multicast_router == MDB_RTR_TYPE_PERM ||
-	    timer_pending(&br->multicast_router_timer))
+	    timer_pending(&br->ip4_mc_router_timer))
 		goto out;
 
 	br_mc_router_state_change(br, false);
@@ -1613,7 +1613,7 @@ int br_multicast_add_port(struct net_bridge_port *port)
 	port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 	port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
 
-	timer_setup(&port->multicast_router_timer,
+	timer_setup(&port->ip4_mc_router_timer,
 		    br_multicast_router_expired, 0);
 	timer_setup(&port->ip4_own_query.timer,
 		    br_ip4_multicast_port_query_expired, 0);
@@ -1649,7 +1649,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
 	hlist_move_list(&br->mcast_gc_list, &deleted_head);
 	spin_unlock_bh(&br->multicast_lock);
 	br_multicast_gc(&deleted_head);
-	del_timer_sync(&port->multicast_router_timer);
+	del_timer_sync(&port->ip4_mc_router_timer);
 	free_percpu(port->mcast_stats);
 }
 
@@ -1674,7 +1674,7 @@ static void __br_multicast_enable_port(struct net_bridge_port *port)
 	br_multicast_enable(&port->ip6_own_query);
 #endif
 	if (port->multicast_router == MDB_RTR_TYPE_PERM &&
-	    hlist_unhashed(&port->rlist))
+	    hlist_unhashed(&port->ip4_rlist))
 		br_multicast_add_router(br, port);
 }
 
@@ -1700,7 +1700,7 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 
 	__del_port_router(port);
 
-	del_timer(&port->multicast_router_timer);
+	del_timer(&port->ip4_mc_router_timer);
 	del_timer(&port->ip4_own_query.timer);
 #if IS_ENABLED(CONFIG_IPV6)
 	del_timer(&port->ip6_own_query.timer);
@@ -2666,19 +2666,19 @@ static void br_multicast_add_router(struct net_bridge *br,
 	struct net_bridge_port *p;
 	struct hlist_node *slot = NULL;
 
-	if (!hlist_unhashed(&port->rlist))
+	if (!hlist_unhashed(&port->ip4_rlist))
 		return;
 
-	hlist_for_each_entry(p, &br->router_list, rlist) {
+	hlist_for_each_entry(p, &br->ip4_mc_router_list, ip4_rlist) {
 		if ((unsigned long) port >= (unsigned long) p)
 			break;
-		slot = &p->rlist;
+		slot = &p->ip4_rlist;
 	}
 
 	if (slot)
-		hlist_add_behind_rcu(&port->rlist, slot);
+		hlist_add_behind_rcu(&port->ip4_rlist, slot);
 	else
-		hlist_add_head_rcu(&port->rlist, &br->router_list);
+		hlist_add_head_rcu(&port->ip4_rlist, &br->ip4_mc_router_list);
 	br_rtr_notify(br->dev, port, RTM_NEWMDB);
 	br_port_mc_router_state_change(port, true);
 }
@@ -2690,9 +2690,9 @@ static void br_multicast_mark_router(struct net_bridge *br,
 
 	if (!port) {
 		if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
-			if (!timer_pending(&br->multicast_router_timer))
+			if (!timer_pending(&br->ip4_mc_router_timer))
 				br_mc_router_state_change(br, true);
-			mod_timer(&br->multicast_router_timer,
+			mod_timer(&br->ip4_mc_router_timer,
 				  now + br->multicast_querier_interval);
 		}
 		return;
@@ -2704,7 +2704,7 @@ static void br_multicast_mark_router(struct net_bridge *br,
 
 	br_multicast_add_router(br, port);
 
-	mod_timer(&port->multicast_router_timer,
+	mod_timer(&port->ip4_mc_router_timer,
 		  now + br->multicast_querier_interval);
 }
 
@@ -3316,7 +3316,7 @@ void br_multicast_init(struct net_bridge *br)
 	br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
 
 	spin_lock_init(&br->multicast_lock);
-	timer_setup(&br->multicast_router_timer,
+	timer_setup(&br->ip4_mc_router_timer,
 		    br_multicast_local_router_expired, 0);
 	timer_setup(&br->ip4_other_query.timer,
 		    br_ip4_multicast_querier_expired, 0);
@@ -3416,7 +3416,7 @@ void br_multicast_open(struct net_bridge *br)
 
 void br_multicast_stop(struct net_bridge *br)
 {
-	del_timer_sync(&br->multicast_router_timer);
+	del_timer_sync(&br->ip4_mc_router_timer);
 	del_timer_sync(&br->ip4_other_query.timer);
 	del_timer_sync(&br->ip4_own_query.timer);
 #if IS_ENABLED(CONFIG_IPV6)
@@ -3453,7 +3453,7 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
 	case MDB_RTR_TYPE_DISABLED:
 	case MDB_RTR_TYPE_PERM:
 		br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
-		del_timer(&br->multicast_router_timer);
+		del_timer(&br->ip4_mc_router_timer);
 		br->multicast_router = val;
 		err = 0;
 		break;
@@ -3472,9 +3472,9 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
 
 static void __del_port_router(struct net_bridge_port *p)
 {
-	if (hlist_unhashed(&p->rlist))
+	if (hlist_unhashed(&p->ip4_rlist))
 		return;
-	hlist_del_init_rcu(&p->rlist);
+	hlist_del_init_rcu(&p->ip4_rlist);
 	br_rtr_notify(p->br->dev, p, RTM_DELMDB);
 	br_port_mc_router_state_change(p, false);
 
@@ -3493,7 +3493,7 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 	if (p->multicast_router == val) {
 		/* Refresh the temp router port timer */
 		if (p->multicast_router == MDB_RTR_TYPE_TEMP)
-			mod_timer(&p->multicast_router_timer,
+			mod_timer(&p->ip4_mc_router_timer,
 				  now + br->multicast_querier_interval);
 		err = 0;
 		goto unlock;
@@ -3502,7 +3502,7 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 	case MDB_RTR_TYPE_DISABLED:
 		p->multicast_router = MDB_RTR_TYPE_DISABLED;
 		__del_port_router(p);
-		del_timer(&p->multicast_router_timer);
+		del_timer(&p->ip4_mc_router_timer);
 		break;
 	case MDB_RTR_TYPE_TEMP_QUERY:
 		p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
@@ -3510,7 +3510,7 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 		break;
 	case MDB_RTR_TYPE_PERM:
 		p->multicast_router = MDB_RTR_TYPE_PERM;
-		del_timer(&p->multicast_router_timer);
+		del_timer(&p->ip4_mc_router_timer);
 		br_multicast_add_router(br, p);
 		break;
 	case MDB_RTR_TYPE_TEMP:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7ce8a77cc6b6..26e91d253687 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -307,6 +307,8 @@ struct net_bridge_port {
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	struct bridge_mcast_own_query	ip4_own_query;
+	struct timer_list		ip4_mc_router_timer;
+	struct hlist_node		ip4_rlist;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct bridge_mcast_own_query	ip6_own_query;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
@@ -314,9 +316,7 @@ struct net_bridge_port {
 	u32				multicast_eht_hosts_cnt;
 	unsigned char			multicast_router;
 	struct bridge_mcast_stats	__percpu *mcast_stats;
-	struct timer_list		multicast_router_timer;
 	struct hlist_head		mglist;
-	struct hlist_node		rlist;
 #endif
 
 #ifdef CONFIG_SYSFS
@@ -449,9 +449,9 @@ struct net_bridge {
 
 	struct hlist_head		mcast_gc_list;
 	struct hlist_head		mdb_list;
-	struct hlist_head		router_list;
 
-	struct timer_list		multicast_router_timer;
+	struct hlist_head		ip4_mc_router_list;
+	struct timer_list		ip4_mc_router_timer;
 	struct bridge_mcast_other_query	ip4_other_query;
 	struct bridge_mcast_own_query	ip4_own_query;
 	struct bridge_mcast_querier	ip4_querier;
@@ -868,7 +868,7 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
 {
 	return br->multicast_router == 2 ||
 	       (br->multicast_router == 1 &&
-		timer_pending(&br->multicast_router_timer));
+		timer_pending(&br->ip4_mc_router_timer));
 }
 
 static inline bool

From 44ebb081dc6934e43d3c7444f183d6426adeca21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:44 +0200
Subject: [PATCH 0092/3292] net: bridge: mcast: add wrappers for router node
 retrieval
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants and to avoid IPv6 #ifdef clutter later add
two wrapper functions for router node retrieval in the payload
forwarding code.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c |  5 +++--
 net/bridge/br_private.h | 10 ++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index eb9847ad40cf..07856362538f 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -276,7 +276,8 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 	bool allow_mode_include = true;
 	struct hlist_node *rp;
 
-	rp = rcu_dereference(hlist_first_rcu(&br->ip4_mc_router_list));
+	rp = br_multicast_get_first_rport_node(br, skb);
+
 	if (mdst) {
 		p = rcu_dereference(mdst->ports);
 		if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
@@ -290,7 +291,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		struct net_bridge_port *port, *lport, *rport;
 
 		lport = p ? p->key.port : NULL;
-		rport = hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
+		rport = br_multicast_rport_from_node_skb(rp, skb);
 
 		if ((unsigned long)lport > (unsigned long)rport) {
 			port = lport;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 26e91d253687..d970ef78bf98 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -864,6 +864,16 @@ static inline bool br_group_is_l2(const struct br_ip *group)
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
 
+static inline struct hlist_node *
+br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) {
+	return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list));
+}
+
+static inline struct net_bridge_port *
+br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) {
+	return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
+}
+
 static inline bool br_multicast_is_router(struct net_bridge *br)
 {
 	return br->multicast_router == 2 ||

From ff391c5d9871894c620f1e6ae2b18d7db572e49d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:45 +0200
Subject: [PATCH 0093/3292] net: bridge: mcast: prepare mdb netlink for mcast
 router split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants and to avoid IPv6 #ifdef clutter later add
some inline functions for the protocol specific parts in the mdb router
netlink code. Also the we need iterate over the port instead of router
list to be able put one router port entry with both the IPv4 and IPv6
multicast router info later.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index d61def8c4647..482edb9aadc7 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -16,29 +16,58 @@
 
 #include "br_private.h"
 
+static bool br_rports_have_mc_router(struct net_bridge *br)
+{
+	return !hlist_empty(&br->ip4_mc_router_list);
+}
+
+static bool
+br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
+{
+	*timer = br_timer_value(&port->ip4_mc_router_timer);
+	return !hlist_unhashed(&port->ip4_rlist);
+}
+
+static bool
+br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
+{
+	*timer = 0;
+	return false;
+}
+
 static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			       struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *p;
+	bool have_ip4_mc_rtr, have_ip6_mc_rtr;
+	unsigned long ip4_timer, ip6_timer;
 	struct nlattr *nest, *port_nest;
+	struct net_bridge_port *p;
 
-	if (!br->multicast_router || hlist_empty(&br->ip4_mc_router_list))
+	if (!br->multicast_router)
+		return 0;
+
+	if (!br_rports_have_mc_router(br))
 		return 0;
 
 	nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
 	if (nest == NULL)
 		return -EMSGSIZE;
 
-	hlist_for_each_entry_rcu(p, &br->ip4_mc_router_list, ip4_rlist) {
-		if (!p)
+	list_for_each_entry_rcu(p, &br->port_list, list) {
+		have_ip4_mc_rtr = br_ip4_rports_get_timer(p, &ip4_timer);
+		have_ip6_mc_rtr = br_ip6_rports_get_timer(p, &ip6_timer);
+
+		if (!have_ip4_mc_rtr && !have_ip6_mc_rtr)
 			continue;
+
 		port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
 		if (!port_nest)
 			goto fail;
+
 		if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) ||
 		    nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
-				br_timer_value(&p->ip4_mc_router_timer)) ||
+				max(ip4_timer, ip6_timer)) ||
 		    nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
 			       p->multicast_router)) {
 			nla_nest_cancel(skb, port_nest);

From b19232effd09c2cb5e11b1b74547406a3c9adc5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:46 +0200
Subject: [PATCH 0094/3292] net: bridge: mcast: prepare query reception for
 mcast router split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants and as the br_multicast_mark_router() will
be split for that remove the select querier wrapper and instead add
ip4 and ip6 variants for br_multicast_query_received().

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 53 ++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6fe93a30b575..7edbbc9941ea 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2615,22 +2615,6 @@ update:
 }
 #endif
 
-static bool br_multicast_select_querier(struct net_bridge *br,
-					struct net_bridge_port *port,
-					struct br_ip *saddr)
-{
-	switch (saddr->proto) {
-	case htons(ETH_P_IP):
-		return br_ip4_multicast_select_querier(br, port, saddr->src.ip4);
-#if IS_ENABLED(CONFIG_IPV6)
-	case htons(ETH_P_IPV6):
-		return br_ip6_multicast_select_querier(br, port, &saddr->src.ip6);
-#endif
-	}
-
-	return false;
-}
-
 static void
 br_multicast_update_query_timer(struct net_bridge *br,
 				struct bridge_mcast_other_query *query,
@@ -2708,19 +2692,36 @@ static void br_multicast_mark_router(struct net_bridge *br,
 		  now + br->multicast_querier_interval);
 }
 
-static void br_multicast_query_received(struct net_bridge *br,
-					struct net_bridge_port *port,
-					struct bridge_mcast_other_query *query,
-					struct br_ip *saddr,
-					unsigned long max_delay)
+static void
+br_ip4_multicast_query_received(struct net_bridge *br,
+				struct net_bridge_port *port,
+				struct bridge_mcast_other_query *query,
+				struct br_ip *saddr,
+				unsigned long max_delay)
 {
-	if (!br_multicast_select_querier(br, port, saddr))
+	if (!br_ip4_multicast_select_querier(br, port, saddr->src.ip4))
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
 	br_multicast_mark_router(br, port);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void
+br_ip6_multicast_query_received(struct net_bridge *br,
+				struct net_bridge_port *port,
+				struct bridge_mcast_other_query *query,
+				struct br_ip *saddr,
+				unsigned long max_delay)
+{
+	if (!br_ip6_multicast_select_querier(br, port, &saddr->src.ip6))
+		return;
+
+	br_multicast_update_query_timer(br, query, max_delay);
+	br_multicast_mark_router(br, port);
+}
+#endif
+
 static void br_ip4_multicast_query(struct net_bridge *br,
 				   struct net_bridge_port *port,
 				   struct sk_buff *skb,
@@ -2768,8 +2769,8 @@ static void br_ip4_multicast_query(struct net_bridge *br,
 		saddr.proto = htons(ETH_P_IP);
 		saddr.src.ip4 = iph->saddr;
 
-		br_multicast_query_received(br, port, &br->ip4_other_query,
-					    &saddr, max_delay);
+		br_ip4_multicast_query_received(br, port, &br->ip4_other_query,
+						&saddr, max_delay);
 		goto out;
 	}
 
@@ -2856,8 +2857,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 		saddr.proto = htons(ETH_P_IPV6);
 		saddr.src.ip6 = ipv6_hdr(skb)->saddr;
 
-		br_multicast_query_received(br, port, &br->ip6_other_query,
-					    &saddr, max_delay);
+		br_ip6_multicast_query_received(br, port, &br->ip6_other_query,
+						&saddr, max_delay);
 		goto out;
 	} else if (!group) {
 		goto out;

From 1a3065a26807b4cdd65d3b696ddb18385610f7da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:47 +0200
Subject: [PATCH 0095/3292] net: bridge: mcast: prepare is-router function for
 mcast router split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants make br_multicast_is_router() protocol
family aware.

Note that for now br_ip6_multicast_is_router() uses the currently still
common ip4_mc_router_timer for now. It will be renamed to
ip6_mc_router_timer later when the split is performed.

While at it also renames the "1" and "2" constants in
br_multicast_is_router() to the MDB_RTR_TYPE_TEMP_QUERY and
MDB_RTR_TYPE_PERM enums.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c     |  2 +-
 net/bridge/br_multicast.c |  5 +++--
 net/bridge/br_private.h   | 37 +++++++++++++++++++++++++++++++++----
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8875e953ac53..1f506309efa8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -132,7 +132,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
 		    br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
 			if ((mdst && mdst->host_joined) ||
-			    br_multicast_is_router(br)) {
+			    br_multicast_is_router(br, skb)) {
 				local_rcv = true;
 				br->dev->stats.multicast++;
 			}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 7edbbc9941ea..048b5b9e9c89 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1391,7 +1391,8 @@ static void br_multicast_local_router_expired(struct timer_list *t)
 	spin_lock(&br->multicast_lock);
 	if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
 	    br->multicast_router == MDB_RTR_TYPE_PERM ||
-	    timer_pending(&br->ip4_mc_router_timer))
+	    br_ip4_multicast_is_router(br) ||
+	    br_ip6_multicast_is_router(br))
 		goto out;
 
 	br_mc_router_state_change(br, false);
@@ -3622,7 +3623,7 @@ bool br_multicast_router(const struct net_device *dev)
 	bool is_router;
 
 	spin_lock_bh(&br->multicast_lock);
-	is_router = br_multicast_is_router(br);
+	is_router = br_multicast_is_router(br, NULL);
 	spin_unlock_bh(&br->multicast_lock);
 	return is_router;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d970ef78bf98..f9a381fcff09 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -874,11 +874,40 @@ br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) {
 	return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
 }
 
-static inline bool br_multicast_is_router(struct net_bridge *br)
+static inline bool br_ip4_multicast_is_router(struct net_bridge *br)
 {
-	return br->multicast_router == 2 ||
-	       (br->multicast_router == 1 &&
-		timer_pending(&br->ip4_mc_router_timer));
+	return timer_pending(&br->ip4_mc_router_timer);
+}
+
+static inline bool br_ip6_multicast_is_router(struct net_bridge *br)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	return timer_pending(&br->ip4_mc_router_timer);
+#else
+	return false;
+#endif
+}
+
+static inline bool
+br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
+{
+	switch (br->multicast_router) {
+	case MDB_RTR_TYPE_PERM:
+		return true;
+	case MDB_RTR_TYPE_TEMP_QUERY:
+		if (skb) {
+			if (skb->protocol == htons(ETH_P_IP))
+				return br_ip4_multicast_is_router(br);
+			else if (skb->protocol == htons(ETH_P_IPV6))
+				return br_ip6_multicast_is_router(br);
+		} else {
+			return br_ip4_multicast_is_router(br) ||
+			       br_ip6_multicast_is_router(br);
+		}
+		fallthrough;
+	default:
+		return false;
+	}
 }
 
 static inline bool

From ee5fb2223ee581676fe7e4e5a87481c419569454 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:48 +0200
Subject: [PATCH 0096/3292] net: bridge: mcast: prepare expiry functions for
 mcast router split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants move the protocol specific timer access to
an ip4 wrapper function.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 048b5b9e9c89..781599155d8a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1354,16 +1354,16 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 }
 #endif
 
-static void br_multicast_router_expired(struct timer_list *t)
+static void br_multicast_router_expired(struct net_bridge_port *port,
+					struct timer_list *t,
+					struct hlist_node *rlist)
 {
-	struct net_bridge_port *port =
-			from_timer(port, t, ip4_mc_router_timer);
 	struct net_bridge *br = port->br;
 
 	spin_lock(&br->multicast_lock);
 	if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
 	    port->multicast_router == MDB_RTR_TYPE_PERM ||
-	    timer_pending(&port->ip4_mc_router_timer))
+	    timer_pending(t))
 		goto out;
 
 	__del_port_router(port);
@@ -1371,6 +1371,13 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
+static void br_ip4_multicast_router_expired(struct timer_list *t)
+{
+	struct net_bridge_port *port = from_timer(port, t, ip4_mc_router_timer);
+
+	br_multicast_router_expired(port, t, &port->ip4_rlist);
+}
+
 static void br_mc_router_state_change(struct net_bridge *p,
 				      bool is_mc_router)
 {
@@ -1384,10 +1391,9 @@ static void br_mc_router_state_change(struct net_bridge *p,
 	switchdev_port_attr_set(p->dev, &attr, NULL);
 }
 
-static void br_multicast_local_router_expired(struct timer_list *t)
+static void br_multicast_local_router_expired(struct net_bridge *br,
+					      struct timer_list *timer)
 {
-	struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer);
-
 	spin_lock(&br->multicast_lock);
 	if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
 	    br->multicast_router == MDB_RTR_TYPE_PERM ||
@@ -1400,6 +1406,13 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
+static void br_ip4_multicast_local_router_expired(struct timer_list *t)
+{
+	struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer);
+
+	br_multicast_local_router_expired(br, t);
+}
+
 static void br_multicast_querier_expired(struct net_bridge *br,
 					 struct bridge_mcast_own_query *query)
 {
@@ -1615,7 +1628,7 @@ int br_multicast_add_port(struct net_bridge_port *port)
 	port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
 
 	timer_setup(&port->ip4_mc_router_timer,
-		    br_multicast_router_expired, 0);
+		    br_ip4_multicast_router_expired, 0);
 	timer_setup(&port->ip4_own_query.timer,
 		    br_ip4_multicast_port_query_expired, 0);
 #if IS_ENABLED(CONFIG_IPV6)
@@ -3319,7 +3332,7 @@ void br_multicast_init(struct net_bridge *br)
 
 	spin_lock_init(&br->multicast_lock);
 	timer_setup(&br->ip4_mc_router_timer,
-		    br_multicast_local_router_expired, 0);
+		    br_ip4_multicast_local_router_expired, 0);
 	timer_setup(&br->ip4_other_query.timer,
 		    br_ip4_multicast_querier_expired, 0);
 	timer_setup(&br->ip4_own_query.timer,

From d9b8c4d8d937f58e618aa1e756162e80b385c701 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:49 +0200
Subject: [PATCH 0097/3292] net: bridge: mcast: prepare add-router function for
 mcast router split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants move the protocol specific router list
and timer access to ip4 wrapper functions.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 117 +++++++++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 32 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 781599155d8a..dc9546415520 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -51,8 +51,8 @@ static const struct rhashtable_params br_sg_port_rht_params = {
 
 static void br_multicast_start_querier(struct net_bridge *br,
 				       struct bridge_mcast_own_query *query);
-static void br_multicast_add_router(struct net_bridge *br,
-				    struct net_bridge_port *port);
+static void br_ip4_multicast_add_router(struct net_bridge *br,
+					struct net_bridge_port *port);
 static void br_ip4_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
 					 __be32 group,
@@ -1687,9 +1687,8 @@ static void __br_multicast_enable_port(struct net_bridge_port *port)
 #if IS_ENABLED(CONFIG_IPV6)
 	br_multicast_enable(&port->ip6_own_query);
 #endif
-	if (port->multicast_router == MDB_RTR_TYPE_PERM &&
-	    hlist_unhashed(&port->ip4_rlist))
-		br_multicast_add_router(br, port);
+	if (port->multicast_router == MDB_RTR_TYPE_PERM)
+		br_ip4_multicast_add_router(br, port);
 }
 
 void br_multicast_enable_port(struct net_bridge_port *port)
@@ -2653,45 +2652,86 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p,
 	switchdev_port_attr_set(p->dev, &attr, NULL);
 }
 
-/*
- * Add port to router_list
+static struct net_bridge_port *
+br_multicast_rport_from_node(struct net_bridge *br,
+			     struct hlist_head *mc_router_list,
+			     struct hlist_node *rlist)
+{
+	return hlist_entry(rlist, struct net_bridge_port, ip4_rlist);
+}
+
+static struct hlist_node *
+br_multicast_get_rport_slot(struct net_bridge *br,
+			    struct net_bridge_port *port,
+			    struct hlist_head *mc_router_list)
+
+{
+	struct hlist_node *slot = NULL;
+	struct net_bridge_port *p;
+	struct hlist_node *rlist;
+
+	hlist_for_each(rlist, mc_router_list) {
+		p = br_multicast_rport_from_node(br, mc_router_list, rlist);
+
+		if ((unsigned long)port >= (unsigned long)p)
+			break;
+
+		slot = rlist;
+	}
+
+	return slot;
+}
+
+/* Add port to router_list
  *  list is maintained ordered by pointer value
  *  and locked by br->multicast_lock and RCU
  */
 static void br_multicast_add_router(struct net_bridge *br,
-				    struct net_bridge_port *port)
+				    struct net_bridge_port *port,
+				    struct hlist_node *rlist,
+				    struct hlist_head *mc_router_list)
 {
-	struct net_bridge_port *p;
-	struct hlist_node *slot = NULL;
+	struct hlist_node *slot;
 
-	if (!hlist_unhashed(&port->ip4_rlist))
+	if (!hlist_unhashed(rlist))
 		return;
 
-	hlist_for_each_entry(p, &br->ip4_mc_router_list, ip4_rlist) {
-		if ((unsigned long) port >= (unsigned long) p)
-			break;
-		slot = &p->ip4_rlist;
-	}
+	slot = br_multicast_get_rport_slot(br, port, mc_router_list);
 
 	if (slot)
-		hlist_add_behind_rcu(&port->ip4_rlist, slot);
+		hlist_add_behind_rcu(rlist, slot);
 	else
-		hlist_add_head_rcu(&port->ip4_rlist, &br->ip4_mc_router_list);
+		hlist_add_head_rcu(rlist, mc_router_list);
+
 	br_rtr_notify(br->dev, port, RTM_NEWMDB);
 	br_port_mc_router_state_change(port, true);
 }
 
+/* Add port to router_list
+ *  list is maintained ordered by pointer value
+ *  and locked by br->multicast_lock and RCU
+ */
+static void br_ip4_multicast_add_router(struct net_bridge *br,
+					struct net_bridge_port *port)
+{
+	br_multicast_add_router(br, port, &port->ip4_rlist,
+				&br->ip4_mc_router_list);
+}
+
 static void br_multicast_mark_router(struct net_bridge *br,
-				     struct net_bridge_port *port)
+				     struct net_bridge_port *port,
+				     struct timer_list *timer,
+				     struct hlist_node *rlist,
+				     struct hlist_head *mc_router_list)
 {
 	unsigned long now = jiffies;
 
 	if (!port) {
 		if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
-			if (!timer_pending(&br->ip4_mc_router_timer))
+			if (!br_ip4_multicast_is_router(br) &&
+			    !br_ip6_multicast_is_router(br))
 				br_mc_router_state_change(br, true);
-			mod_timer(&br->ip4_mc_router_timer,
-				  now + br->multicast_querier_interval);
+			mod_timer(timer, now + br->multicast_querier_interval);
 		}
 		return;
 	}
@@ -2700,10 +2740,23 @@ static void br_multicast_mark_router(struct net_bridge *br,
 	    port->multicast_router == MDB_RTR_TYPE_PERM)
 		return;
 
-	br_multicast_add_router(br, port);
+	br_multicast_add_router(br, port, rlist, mc_router_list);
+	mod_timer(timer, now + br->multicast_querier_interval);
+}
 
-	mod_timer(&port->ip4_mc_router_timer,
-		  now + br->multicast_querier_interval);
+static void br_ip4_multicast_mark_router(struct net_bridge *br,
+					 struct net_bridge_port *port)
+{
+	struct timer_list *timer = &br->ip4_mc_router_timer;
+	struct hlist_node *rlist = NULL;
+
+	if (port) {
+		timer = &port->ip4_mc_router_timer;
+		rlist = &port->ip4_rlist;
+	}
+
+	br_multicast_mark_router(br, port, timer, rlist,
+				 &br->ip4_mc_router_list);
 }
 
 static void
@@ -2717,7 +2770,7 @@ br_ip4_multicast_query_received(struct net_bridge *br,
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
-	br_multicast_mark_router(br, port);
+	br_ip4_multicast_mark_router(br, port);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -2732,7 +2785,7 @@ br_ip6_multicast_query_received(struct net_bridge *br,
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
-	br_multicast_mark_router(br, port);
+	br_ip4_multicast_mark_router(br, port);
 }
 #endif
 
@@ -3102,7 +3155,7 @@ static void br_multicast_pim(struct net_bridge *br,
 	    pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
 		return;
 
-	br_multicast_mark_router(br, port);
+	br_ip4_multicast_mark_router(br, port);
 }
 
 static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
@@ -3113,7 +3166,7 @@ static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
 	    igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
 		return -ENOMSG;
 
-	br_multicast_mark_router(br, port);
+	br_ip4_multicast_mark_router(br, port);
 
 	return 0;
 }
@@ -3181,7 +3234,7 @@ static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
 	if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
 		return;
 
-	br_multicast_mark_router(br, port);
+	br_ip4_multicast_mark_router(br, port);
 }
 
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
@@ -3526,11 +3579,11 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 	case MDB_RTR_TYPE_PERM:
 		p->multicast_router = MDB_RTR_TYPE_PERM;
 		del_timer(&p->ip4_mc_router_timer);
-		br_multicast_add_router(br, p);
+		br_ip4_multicast_add_router(br, p);
 		break;
 	case MDB_RTR_TYPE_TEMP:
 		p->multicast_router = MDB_RTR_TYPE_TEMP;
-		br_multicast_mark_router(br, p);
+		br_ip4_multicast_mark_router(br, p);
 		break;
 	default:
 		goto unlock;

From ed2d35971a8066aa24ce65dd66c113c0506bb206 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:50 +0200
Subject: [PATCH 0098/3292] net: bridge: mcast: split router port del+notify
 for mcast router split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for the upcoming split of multicast router state into
their IPv4 and IPv6 variants split router port deletion and notification
into two functions. When we disable a port for instance later we want to
only send one notification to switchdev and netlink for compatibility
and want to avoid sending one for IPv4 and one for IPv6. For that the
split is needed.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 40 ++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dc9546415520..30144f9d19d7 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -60,7 +60,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 					 const unsigned char *src);
 static void br_multicast_port_group_rexmit(struct timer_list *t);
 
-static void __del_port_router(struct net_bridge_port *p);
+static void
+br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted);
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
@@ -1354,11 +1355,26 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 }
 #endif
 
+static bool br_multicast_rport_del(struct hlist_node *rlist)
+{
+	if (hlist_unhashed(rlist))
+		return false;
+
+	hlist_del_init_rcu(rlist);
+	return true;
+}
+
+static bool br_ip4_multicast_rport_del(struct net_bridge_port *p)
+{
+	return br_multicast_rport_del(&p->ip4_rlist);
+}
+
 static void br_multicast_router_expired(struct net_bridge_port *port,
 					struct timer_list *t,
 					struct hlist_node *rlist)
 {
 	struct net_bridge *br = port->br;
+	bool del;
 
 	spin_lock(&br->multicast_lock);
 	if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
@@ -1366,7 +1382,8 @@ static void br_multicast_router_expired(struct net_bridge_port *port,
 	    timer_pending(t))
 		goto out;
 
-	__del_port_router(port);
+	del = br_multicast_rport_del(rlist);
+	br_multicast_rport_del_notify(port, del);
 out:
 	spin_unlock(&br->multicast_lock);
 }
@@ -1705,19 +1722,20 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 	struct net_bridge *br = port->br;
 	struct net_bridge_port_group *pg;
 	struct hlist_node *n;
+	bool del = false;
 
 	spin_lock(&br->multicast_lock);
 	hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
 		if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
 			br_multicast_find_del_pg(br, pg);
 
-	__del_port_router(port);
-
+	del |= br_ip4_multicast_rport_del(port);
 	del_timer(&port->ip4_mc_router_timer);
 	del_timer(&port->ip4_own_query.timer);
 #if IS_ENABLED(CONFIG_IPV6)
 	del_timer(&port->ip6_own_query.timer);
 #endif
+	br_multicast_rport_del_notify(port, del);
 	spin_unlock(&br->multicast_lock);
 }
 
@@ -3538,11 +3556,12 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
 	return err;
 }
 
-static void __del_port_router(struct net_bridge_port *p)
+static void
+br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
 {
-	if (hlist_unhashed(&p->ip4_rlist))
+	if (!deleted)
 		return;
-	hlist_del_init_rcu(&p->ip4_rlist);
+
 	br_rtr_notify(p->br->dev, p, RTM_DELMDB);
 	br_port_mc_router_state_change(p, false);
 
@@ -3556,6 +3575,7 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 	struct net_bridge *br = p->br;
 	unsigned long now = jiffies;
 	int err = -EINVAL;
+	bool del = false;
 
 	spin_lock(&br->multicast_lock);
 	if (p->multicast_router == val) {
@@ -3569,12 +3589,14 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 	switch (val) {
 	case MDB_RTR_TYPE_DISABLED:
 		p->multicast_router = MDB_RTR_TYPE_DISABLED;
-		__del_port_router(p);
+		del |= br_ip4_multicast_rport_del(p);
 		del_timer(&p->ip4_mc_router_timer);
+		br_multicast_rport_del_notify(p, del);
 		break;
 	case MDB_RTR_TYPE_TEMP_QUERY:
 		p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
-		__del_port_router(p);
+		del |= br_ip4_multicast_rport_del(p);
+		br_multicast_rport_del_notify(p, del);
 		break;
 	case MDB_RTR_TYPE_PERM:
 		p->multicast_router = MDB_RTR_TYPE_PERM;

From a3c02e769efe66dce5e2c716862b60c8d44d191e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:51 +0200
Subject: [PATCH 0099/3292] net: bridge: mcast: split multicast router state
 for IPv4 and IPv6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A multicast router for IPv4 does not imply that the same host also is a
multicast router for IPv6 and vice versa.

To reduce multicast traffic when a host is only a multicast router for
one of these two protocol families, keep router state for IPv4 and IPv6
separately. Similar to how querier state is kept separately.

For backwards compatibility for netlink and switchdev notifications
these two will still only notify if a port switched from either no
IPv4/IPv6 multicast router to any IPv4/IPv6 multicast router or the
other way round. However a full netlink MDB router dump will now also
include a multicast router timeout for both IPv4 and IPv6.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c       |  10 +++
 net/bridge/br_multicast.c | 134 ++++++++++++++++++++++++++++++++++++--
 net/bridge/br_private.h   |  14 +++-
 3 files changed, 151 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 482edb9aadc7..10c416c7bf47 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -18,7 +18,12 @@
 
 static bool br_rports_have_mc_router(struct net_bridge *br)
 {
+#if IS_ENABLED(CONFIG_IPV6)
+	return !hlist_empty(&br->ip4_mc_router_list) ||
+	       !hlist_empty(&br->ip6_mc_router_list);
+#else
 	return !hlist_empty(&br->ip4_mc_router_list);
+#endif
 }
 
 static bool
@@ -31,8 +36,13 @@ br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
 static bool
 br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
 {
+#if IS_ENABLED(CONFIG_IPV6)
+	*timer = br_timer_value(&port->ip6_mc_router_timer);
+	return !hlist_unhashed(&port->ip6_rlist);
+#else
 	*timer = 0;
 	return false;
+#endif
 }
 
 static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 30144f9d19d7..f234c48036c8 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -63,6 +63,8 @@ static void br_multicast_port_group_rexmit(struct timer_list *t);
 static void
 br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted);
 #if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_add_router(struct net_bridge *br,
+					struct net_bridge_port *port);
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
 					 const struct in6_addr *group,
@@ -1369,6 +1371,15 @@ static bool br_ip4_multicast_rport_del(struct net_bridge_port *p)
 	return br_multicast_rport_del(&p->ip4_rlist);
 }
 
+static bool br_ip6_multicast_rport_del(struct net_bridge_port *p)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	return br_multicast_rport_del(&p->ip6_rlist);
+#else
+	return false;
+#endif
+}
+
 static void br_multicast_router_expired(struct net_bridge_port *port,
 					struct timer_list *t,
 					struct hlist_node *rlist)
@@ -1395,6 +1406,15 @@ static void br_ip4_multicast_router_expired(struct timer_list *t)
 	br_multicast_router_expired(port, t, &port->ip4_rlist);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_router_expired(struct timer_list *t)
+{
+	struct net_bridge_port *port = from_timer(port, t, ip6_mc_router_timer);
+
+	br_multicast_router_expired(port, t, &port->ip6_rlist);
+}
+#endif
+
 static void br_mc_router_state_change(struct net_bridge *p,
 				      bool is_mc_router)
 {
@@ -1430,6 +1450,15 @@ static void br_ip4_multicast_local_router_expired(struct timer_list *t)
 	br_multicast_local_router_expired(br, t);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_local_router_expired(struct timer_list *t)
+{
+	struct net_bridge *br = from_timer(br, t, ip6_mc_router_timer);
+
+	br_multicast_local_router_expired(br, t);
+}
+#endif
+
 static void br_multicast_querier_expired(struct net_bridge *br,
 					 struct bridge_mcast_own_query *query)
 {
@@ -1649,6 +1678,8 @@ int br_multicast_add_port(struct net_bridge_port *port)
 	timer_setup(&port->ip4_own_query.timer,
 		    br_ip4_multicast_port_query_expired, 0);
 #if IS_ENABLED(CONFIG_IPV6)
+	timer_setup(&port->ip6_mc_router_timer,
+		    br_ip6_multicast_router_expired, 0);
 	timer_setup(&port->ip6_own_query.timer,
 		    br_ip6_multicast_port_query_expired, 0);
 #endif
@@ -1681,6 +1712,9 @@ void br_multicast_del_port(struct net_bridge_port *port)
 	spin_unlock_bh(&br->multicast_lock);
 	br_multicast_gc(&deleted_head);
 	del_timer_sync(&port->ip4_mc_router_timer);
+#if IS_ENABLED(CONFIG_IPV6)
+	del_timer_sync(&port->ip6_mc_router_timer);
+#endif
 	free_percpu(port->mcast_stats);
 }
 
@@ -1704,8 +1738,10 @@ static void __br_multicast_enable_port(struct net_bridge_port *port)
 #if IS_ENABLED(CONFIG_IPV6)
 	br_multicast_enable(&port->ip6_own_query);
 #endif
-	if (port->multicast_router == MDB_RTR_TYPE_PERM)
+	if (port->multicast_router == MDB_RTR_TYPE_PERM) {
 		br_ip4_multicast_add_router(br, port);
+		br_ip6_multicast_add_router(br, port);
+	}
 }
 
 void br_multicast_enable_port(struct net_bridge_port *port)
@@ -1732,7 +1768,9 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 	del |= br_ip4_multicast_rport_del(port);
 	del_timer(&port->ip4_mc_router_timer);
 	del_timer(&port->ip4_own_query.timer);
+	del |= br_ip6_multicast_rport_del(port);
 #if IS_ENABLED(CONFIG_IPV6)
+	del_timer(&port->ip6_mc_router_timer);
 	del_timer(&port->ip6_own_query.timer);
 #endif
 	br_multicast_rport_del_notify(port, del);
@@ -2675,6 +2713,10 @@ br_multicast_rport_from_node(struct net_bridge *br,
 			     struct hlist_head *mc_router_list,
 			     struct hlist_node *rlist)
 {
+#if IS_ENABLED(CONFIG_IPV6)
+	if (mc_router_list == &br->ip6_mc_router_list)
+		return hlist_entry(rlist, struct net_bridge_port, ip6_rlist);
+#endif
 	return hlist_entry(rlist, struct net_bridge_port, ip4_rlist);
 }
 
@@ -2700,6 +2742,19 @@ br_multicast_get_rport_slot(struct net_bridge *br,
 	return slot;
 }
 
+static bool br_multicast_no_router_otherpf(struct net_bridge_port *port,
+					   struct hlist_node *rnode)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (rnode != &port->ip6_rlist)
+		return hlist_unhashed(&port->ip6_rlist);
+	else
+		return hlist_unhashed(&port->ip4_rlist);
+#else
+	return true;
+#endif
+}
+
 /* Add port to router_list
  *  list is maintained ordered by pointer value
  *  and locked by br->multicast_lock and RCU
@@ -2721,8 +2776,14 @@ static void br_multicast_add_router(struct net_bridge *br,
 	else
 		hlist_add_head_rcu(rlist, mc_router_list);
 
-	br_rtr_notify(br->dev, port, RTM_NEWMDB);
-	br_port_mc_router_state_change(port, true);
+	/* For backwards compatibility for now, only notify if we
+	 * switched from no IPv4/IPv6 multicast router to a new
+	 * IPv4 or IPv6 multicast router.
+	 */
+	if (br_multicast_no_router_otherpf(port, rlist)) {
+		br_rtr_notify(br->dev, port, RTM_NEWMDB);
+		br_port_mc_router_state_change(port, true);
+	}
 }
 
 /* Add port to router_list
@@ -2736,6 +2797,19 @@ static void br_ip4_multicast_add_router(struct net_bridge *br,
 				&br->ip4_mc_router_list);
 }
 
+/* Add port to router_list
+ *  list is maintained ordered by pointer value
+ *  and locked by br->multicast_lock and RCU
+ */
+static void br_ip6_multicast_add_router(struct net_bridge *br,
+					struct net_bridge_port *port)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	br_multicast_add_router(br, port, &port->ip6_rlist,
+				&br->ip6_mc_router_list);
+#endif
+}
+
 static void br_multicast_mark_router(struct net_bridge *br,
 				     struct net_bridge_port *port,
 				     struct timer_list *timer,
@@ -2777,6 +2851,23 @@ static void br_ip4_multicast_mark_router(struct net_bridge *br,
 				 &br->ip4_mc_router_list);
 }
 
+static void br_ip6_multicast_mark_router(struct net_bridge *br,
+					 struct net_bridge_port *port)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct timer_list *timer = &br->ip6_mc_router_timer;
+	struct hlist_node *rlist = NULL;
+
+	if (port) {
+		timer = &port->ip6_mc_router_timer;
+		rlist = &port->ip6_rlist;
+	}
+
+	br_multicast_mark_router(br, port, timer, rlist,
+				 &br->ip6_mc_router_list);
+#endif
+}
+
 static void
 br_ip4_multicast_query_received(struct net_bridge *br,
 				struct net_bridge_port *port,
@@ -2803,7 +2894,7 @@ br_ip6_multicast_query_received(struct net_bridge *br,
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
-	br_ip4_multicast_mark_router(br, port);
+	br_ip6_multicast_mark_router(br, port);
 }
 #endif
 
@@ -3252,7 +3343,7 @@ static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
 	if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
 		return;
 
-	br_ip4_multicast_mark_router(br, port);
+	br_ip6_multicast_mark_router(br, port);
 }
 
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
@@ -3409,6 +3500,8 @@ void br_multicast_init(struct net_bridge *br)
 	timer_setup(&br->ip4_own_query.timer,
 		    br_ip4_multicast_query_expired, 0);
 #if IS_ENABLED(CONFIG_IPV6)
+	timer_setup(&br->ip6_mc_router_timer,
+		    br_ip6_multicast_local_router_expired, 0);
 	timer_setup(&br->ip6_other_query.timer,
 		    br_ip6_multicast_querier_expired, 0);
 	timer_setup(&br->ip6_own_query.timer,
@@ -3506,6 +3599,7 @@ void br_multicast_stop(struct net_bridge *br)
 	del_timer_sync(&br->ip4_other_query.timer);
 	del_timer_sync(&br->ip4_own_query.timer);
 #if IS_ENABLED(CONFIG_IPV6)
+	del_timer_sync(&br->ip6_mc_router_timer);
 	del_timer_sync(&br->ip6_other_query.timer);
 	del_timer_sync(&br->ip6_own_query.timer);
 #endif
@@ -3540,6 +3634,9 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
 	case MDB_RTR_TYPE_PERM:
 		br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
 		del_timer(&br->ip4_mc_router_timer);
+#if IS_ENABLED(CONFIG_IPV6)
+		del_timer(&br->ip6_mc_router_timer);
+#endif
 		br->multicast_router = val;
 		err = 0;
 		break;
@@ -3562,6 +3659,16 @@ br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
 	if (!deleted)
 		return;
 
+	/* For backwards compatibility for now, only notify if there is
+	 * no multicast router anymore for both IPv4 and IPv6.
+	 */
+	if (!hlist_unhashed(&p->ip4_rlist))
+		return;
+#if IS_ENABLED(CONFIG_IPV6)
+	if (!hlist_unhashed(&p->ip6_rlist))
+		return;
+#endif
+
 	br_rtr_notify(p->br->dev, p, RTM_DELMDB);
 	br_port_mc_router_state_change(p, false);
 
@@ -3580,9 +3687,14 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 	spin_lock(&br->multicast_lock);
 	if (p->multicast_router == val) {
 		/* Refresh the temp router port timer */
-		if (p->multicast_router == MDB_RTR_TYPE_TEMP)
+		if (p->multicast_router == MDB_RTR_TYPE_TEMP) {
 			mod_timer(&p->ip4_mc_router_timer,
 				  now + br->multicast_querier_interval);
+#if IS_ENABLED(CONFIG_IPV6)
+			mod_timer(&p->ip6_mc_router_timer,
+				  now + br->multicast_querier_interval);
+#endif
+		}
 		err = 0;
 		goto unlock;
 	}
@@ -3591,21 +3703,31 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
 		p->multicast_router = MDB_RTR_TYPE_DISABLED;
 		del |= br_ip4_multicast_rport_del(p);
 		del_timer(&p->ip4_mc_router_timer);
+		del |= br_ip6_multicast_rport_del(p);
+#if IS_ENABLED(CONFIG_IPV6)
+		del_timer(&p->ip6_mc_router_timer);
+#endif
 		br_multicast_rport_del_notify(p, del);
 		break;
 	case MDB_RTR_TYPE_TEMP_QUERY:
 		p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 		del |= br_ip4_multicast_rport_del(p);
+		del |= br_ip6_multicast_rport_del(p);
 		br_multicast_rport_del_notify(p, del);
 		break;
 	case MDB_RTR_TYPE_PERM:
 		p->multicast_router = MDB_RTR_TYPE_PERM;
 		del_timer(&p->ip4_mc_router_timer);
 		br_ip4_multicast_add_router(br, p);
+#if IS_ENABLED(CONFIG_IPV6)
+		del_timer(&p->ip6_mc_router_timer);
+#endif
+		br_ip6_multicast_add_router(br, p);
 		break;
 	case MDB_RTR_TYPE_TEMP:
 		p->multicast_router = MDB_RTR_TYPE_TEMP;
 		br_ip4_multicast_mark_router(br, p);
+		br_ip6_multicast_mark_router(br, p);
 		break;
 	default:
 		goto unlock;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f9a381fcff09..03197ab4af76 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -311,6 +311,8 @@ struct net_bridge_port {
 	struct hlist_node		ip4_rlist;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct bridge_mcast_own_query	ip6_own_query;
+	struct timer_list		ip6_mc_router_timer;
+	struct hlist_node		ip6_rlist;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 	u32				multicast_eht_hosts_limit;
 	u32				multicast_eht_hosts_cnt;
@@ -457,6 +459,8 @@ struct net_bridge {
 	struct bridge_mcast_querier	ip4_querier;
 	struct bridge_mcast_stats	__percpu *mcast_stats;
 #if IS_ENABLED(CONFIG_IPV6)
+	struct hlist_head		ip6_mc_router_list;
+	struct timer_list		ip6_mc_router_timer;
 	struct bridge_mcast_other_query	ip6_other_query;
 	struct bridge_mcast_own_query	ip6_own_query;
 	struct bridge_mcast_querier	ip6_querier;
@@ -866,11 +870,19 @@ static inline bool br_group_is_l2(const struct br_ip *group)
 
 static inline struct hlist_node *
 br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) {
+#if IS_ENABLED(CONFIG_IPV6)
+	if (skb->protocol == htons(ETH_P_IPV6))
+		return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list));
+#endif
 	return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list));
 }
 
 static inline struct net_bridge_port *
 br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) {
+#if IS_ENABLED(CONFIG_IPV6)
+	if (skb->protocol == htons(ETH_P_IPV6))
+		return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist);
+#endif
 	return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
 }
 
@@ -882,7 +894,7 @@ static inline bool br_ip4_multicast_is_router(struct net_bridge *br)
 static inline bool br_ip6_multicast_is_router(struct net_bridge *br)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-	return timer_pending(&br->ip4_mc_router_timer);
+	return timer_pending(&br->ip6_mc_router_timer);
 #else
 	return false;
 #endif

From b7fb0916544de44ce099d9f3b6129c86b484de25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:52 +0200
Subject: [PATCH 0100/3292] net: bridge: mcast: add ip4+ip6 mcast router timers
 to mdb netlink
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we have split the multicast router state into two, one for IPv4
and one for IPv6, also add individual timers to the mdb netlink router
port dump. Leaving the old timer attribute for backwards compatibility.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 2 ++
 net/bridge/br_mdb.c            | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 13d59c51ef5b..6b56a7549531 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -627,6 +627,8 @@ enum {
 	MDBA_ROUTER_PATTR_UNSPEC,
 	MDBA_ROUTER_PATTR_TIMER,
 	MDBA_ROUTER_PATTR_TYPE,
+	MDBA_ROUTER_PATTR_INET_TIMER,
+	MDBA_ROUTER_PATTR_INET6_TIMER,
 	__MDBA_ROUTER_PATTR_MAX
 };
 #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 10c416c7bf47..3f839a8cc9fb 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -79,7 +79,13 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 		    nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
 				max(ip4_timer, ip6_timer)) ||
 		    nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
-			       p->multicast_router)) {
+			       p->multicast_router) ||
+		    (have_ip4_mc_rtr &&
+		     nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER,
+				 ip4_timer)) ||
+		    (have_ip6_mc_rtr &&
+		     nla_put_u32(skb, MDBA_ROUTER_PATTR_INET6_TIMER,
+				 ip6_timer))) {
 			nla_nest_cancel(skb, port_nest);
 			goto fail;
 		}

From 3b85f9ba3480c1bcbebb2bb490822bec0e7a1201 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Thu, 13 May 2021 15:20:53 +0200
Subject: [PATCH 0101/3292] net: bridge: mcast: export multicast router
 presence adjacent to a port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To properly support routable multicast addresses in batman-adv in a
group-aware way, a batman-adv node needs to know if it serves multicast
routers.

This adds a function to the bridge to export this so that batman-adv
can then make full use of the Multicast Router Discovery capability of
the bridge.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  8 ++++++
 net/bridge/br_multicast.c | 55 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 2cc35038a8ca..12e9a32dbca0 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -67,6 +67,7 @@ int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
 bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
+bool br_multicast_has_router_adjacent(struct net_device *dev, int proto);
 bool br_multicast_enabled(const struct net_device *dev);
 bool br_multicast_router(const struct net_device *dev);
 int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
@@ -87,6 +88,13 @@ static inline bool br_multicast_has_querier_adjacent(struct net_device *dev,
 {
 	return false;
 }
+
+static inline bool br_multicast_has_router_adjacent(struct net_device *dev,
+						    int proto)
+{
+	return true;
+}
+
 static inline bool br_multicast_enabled(const struct net_device *dev)
 {
 	return false;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f234c48036c8..0703725527b3 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -4054,6 +4054,61 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
 
+/**
+ * br_multicast_has_router_adjacent - Checks for a router behind a bridge port
+ * @dev: The bridge port adjacent to which to check for a multicast router
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a multicast router is behind one of the other ports of this
+ * bridge. Otherwise returns false.
+ */
+bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
+{
+	struct net_bridge_port *port, *p;
+	bool ret = false;
+
+	rcu_read_lock();
+	port = br_port_get_check_rcu(dev);
+	if (!port)
+		goto unlock;
+
+	switch (proto) {
+	case ETH_P_IP:
+		hlist_for_each_entry_rcu(p, &port->br->ip4_mc_router_list,
+					 ip4_rlist) {
+			if (p == port)
+				continue;
+
+			ret = true;
+			goto unlock;
+		}
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case ETH_P_IPV6:
+		hlist_for_each_entry_rcu(p, &port->br->ip6_mc_router_list,
+					 ip6_rlist) {
+			if (p == port)
+				continue;
+
+			ret = true;
+			goto unlock;
+		}
+		break;
+#endif
+	default:
+		/* when compiled without IPv6 support, be conservative and
+		 * always assume presence of an IPv6 multicast router
+		 */
+		ret = true;
+	}
+
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_router_adjacent);
+
 static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
 			       const struct sk_buff *skb, u8 type, u8 dir)
 {

From 9afcb59597301c8e8dca8ba855f5ec74f918a479 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 13 May 2021 14:26:50 +0800
Subject: [PATCH 0102/3292] net: hinic: remove unnecessary blank line

There are two blank lines are unnecessary, this patch removes them.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 1 -
 drivers/net/ethernet/huawei/hinic/hinic_main.c    | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index dc024ef521c0..162d3c330dec 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -1663,7 +1663,6 @@ static void hinic_diag_test(struct net_device *netdev,
 	err = hinic_port_link_state(nic_dev, &link_state);
 	if (!err && link_state == HINIC_LINK_STATE_UP)
 		netif_carrier_on(netdev);
-
 }
 
 static int hinic_set_phys_id(struct net_device *netdev,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 9a9b09401d01..1da5997f034c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -172,7 +172,6 @@ static int create_txqs(struct hinic_dev *nic_dev)
 				  "Failed to add SQ%d debug\n", i);
 			goto err_add_sq_dbg;
 		}
-
 	}
 
 	return 0;

From 3402ab54a8e37a48f72b8b3dad543bb839ec4d2d Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 13 May 2021 14:26:51 +0800
Subject: [PATCH 0103/3292] net: hinic: add blank line after function
 declaration

There should be a blank line after function declaration, so add two
missed blank lines.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c | 1 +
 drivers/net/ethernet/huawei/hinic/hinic_rx.c    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index 5dc3743f8091..7f0f1aa3cedd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -89,6 +89,7 @@ static inline int WQE_PAGE_NUM(struct hinic_wq *wq, u16 idx)
 	return (((idx) >> ((wq)->wqebbs_per_page_shift))
 		& ((wq)->num_q_pages - 1));
 }
+
 /**
  * queue_alloc_page - allocate page for Queue
  * @hwif: HW interface for allocating DMA
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index cce08647b9b2..fed3b6bc0d76 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -118,6 +118,7 @@ static void rx_csum(struct hinic_rxq *rxq, u32 status,
 		skb->ip_summed = CHECKSUM_NONE;
 	}
 }
+
 /**
  * rx_alloc_skb - allocate skb and map it to dma address
  * @rxq: rx queue

From c8ad5df6151e457ad995fdee6440a45af09b70f6 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 13 May 2021 14:26:52 +0800
Subject: [PATCH 0104/3292] net: hinic: remove unnecessary parentheses

There are some unnecessary parentheses, this patch deletes them.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c |  2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  | 10 +++++-----
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c  |  6 +++---
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c   |  4 ++--
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c |  4 ++--
 drivers/net/ethernet/huawei/hinic/hinic_port.c    | 10 +++++-----
 drivers/net/ethernet/huawei/hinic/hinic_tx.c      |  2 +-
 7 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 5a6bbee819cd..577cb2cffff2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -223,7 +223,7 @@ static void cmdq_prepare_wqe_ctrl(struct hinic_cmdq_wqe *wqe, int wrapped,
 	saved_data = CMDQ_WQE_HEADER(wqe)->saved_data;
 	saved_data = HINIC_SAVED_DATA_CLEAR(saved_data, ARM);
 
-	if ((cmd == CMDQ_SET_ARM_CMD) && (mod == HINIC_MOD_COMM))
+	if (cmd == CMDQ_SET_ARM_CMD && mod == HINIC_MOD_COMM)
 		CMDQ_WQE_HEADER(wqe)->saved_data |=
 						HINIC_SAVED_DATA_SET(1, ARM);
 	else
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 0c74f6674634..19a91c0223a7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -257,7 +257,7 @@ static int init_fw_ctxt(struct hinic_hwdev *hwdev)
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_FWCTXT_INIT,
 				 &fw_ctxt, sizeof(fw_ctxt),
 				 &fw_ctxt, &out_size);
-	if (err || (out_size != sizeof(fw_ctxt)) || fw_ctxt.status) {
+	if (err || out_size != sizeof(fw_ctxt) || fw_ctxt.status) {
 		dev_err(&pdev->dev, "Failed to init FW ctxt, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, fw_ctxt.status, out_size);
 		return -EIO;
@@ -424,7 +424,7 @@ static int get_base_qpn(struct hinic_hwdev *hwdev, u16 *base_qpn)
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_GLOBAL_QPN,
 				 &cmd_base_qpn, sizeof(cmd_base_qpn),
 				 &cmd_base_qpn, &out_size);
-	if (err || (out_size != sizeof(cmd_base_qpn)) || cmd_base_qpn.status) {
+	if (err || out_size != sizeof(cmd_base_qpn) || cmd_base_qpn.status) {
 		dev_err(&pdev->dev, "Failed to get base qpn, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, cmd_base_qpn.status, out_size);
 		return -EIO;
@@ -605,8 +605,8 @@ static void nic_mgmt_msg_handler(void *handle, u8 cmd, void *buf_in,
 	hwif = hwdev->hwif;
 	pdev = hwif->pdev;
 
-	if ((cmd < HINIC_MGMT_MSG_CMD_BASE) ||
-	    (cmd >= HINIC_MGMT_MSG_CMD_MAX)) {
+	if (cmd < HINIC_MGMT_MSG_CMD_BASE ||
+	    cmd >= HINIC_MGMT_MSG_CMD_MAX) {
 		dev_err(&pdev->dev, "unknown L2NIC event, cmd = %d\n", cmd);
 		return;
 	}
@@ -619,7 +619,7 @@ static void nic_mgmt_msg_handler(void *handle, u8 cmd, void *buf_in,
 			   HINIC_CB_ENABLED,
 			   HINIC_CB_ENABLED | HINIC_CB_RUNNING);
 
-	if ((cb_state == HINIC_CB_ENABLED) && (nic_cb->handler))
+	if (cb_state == HINIC_CB_ENABLED && nic_cb->handler)
 		nic_cb->handler(nic_cb->handle, buf_in,
 				in_size, buf_out, out_size);
 	else
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index 19942fef99d9..d3fc05a07fdb 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -254,8 +254,8 @@ static void aeq_irq_handler(struct hinic_eq *eq)
 					    HINIC_EQE_ENABLED,
 					    HINIC_EQE_ENABLED |
 					    HINIC_EQE_RUNNING);
-			if ((eqe_state == HINIC_EQE_ENABLED) &&
-			    (hwe_cb->hwe_handler))
+			if (eqe_state == HINIC_EQE_ENABLED &&
+			    hwe_cb->hwe_handler)
 				hwe_cb->hwe_handler(hwe_cb->handle,
 						    aeqe_curr->data, size);
 			else
@@ -299,7 +299,7 @@ static void ceq_event_handler(struct hinic_ceqs *ceqs, u32 ceqe)
 			    HINIC_EQE_ENABLED,
 			    HINIC_EQE_ENABLED | HINIC_EQE_RUNNING);
 
-	if ((eqe_state == HINIC_EQE_ENABLED) && (ceq_cb->handler))
+	if (eqe_state == HINIC_EQE_ENABLED && ceq_cb->handler)
 		ceq_cb->handler(ceq_cb->handle, CEQE_DATA(ceqe));
 	else
 		dev_err(&pdev->dev, "Unhandled CEQ Event %d\n", event);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index 4ef4008e65bd..a6e43d686293 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -137,7 +137,7 @@ static int write_sq_ctxts(struct hinic_func_to_io *func_to_io, u16 base_qpn,
 	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
 				     IO_CMD_MODIFY_QUEUE_CTXT, &cmdq_buf,
 				     &out_param);
-	if ((err) || (out_param != 0)) {
+	if (err || out_param != 0) {
 		dev_err(&pdev->dev, "Failed to set SQ ctxts\n");
 		err = -EFAULT;
 	}
@@ -181,7 +181,7 @@ static int write_rq_ctxts(struct hinic_func_to_io *func_to_io, u16 base_qpn,
 	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
 				     IO_CMD_MODIFY_QUEUE_CTXT, &cmdq_buf,
 				     &out_param);
-	if ((err) || (out_param != 0)) {
+	if (err || out_param != 0) {
 		dev_err(&pdev->dev, "Failed to set RQ ctxts\n");
 		err = -EFAULT;
 	}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 817173f1fbb7..ebc77771f5da 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -294,7 +294,7 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt,
 		goto unlock_sync_msg;
 	}
 
-	if ((buf_out) && (recv_msg->msg_len <= MAX_PF_MGMT_BUF_SIZE)) {
+	if (buf_out && recv_msg->msg_len <= MAX_PF_MGMT_BUF_SIZE) {
 		memcpy(buf_out, recv_msg->msg, recv_msg->msg_len);
 		*out_size = recv_msg->msg_len;
 	}
@@ -411,7 +411,7 @@ static void recv_mgmt_msg_work_handler(struct work_struct *work)
 			   HINIC_MGMT_CB_ENABLED,
 			   HINIC_MGMT_CB_ENABLED | HINIC_MGMT_CB_RUNNING);
 
-	if ((cb_state == HINIC_MGMT_CB_ENABLED) && (mgmt_cb->cb))
+	if (cb_state == HINIC_MGMT_CB_ENABLED && mgmt_cb->cb)
 		mgmt_cb->cb(mgmt_cb->handle, mgmt_work->cmd,
 			    mgmt_work->msg, mgmt_work->msg_len,
 			    buf_out, &out_size);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index eb97f2d6b1ad..28ae6f1201a8 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -128,7 +128,7 @@ int hinic_port_get_mac(struct hinic_dev *nic_dev, u8 *addr)
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_MAC,
 				 &port_mac_cmd, sizeof(port_mac_cmd),
 				 &port_mac_cmd, &out_size);
-	if (err || (out_size != sizeof(port_mac_cmd)) || port_mac_cmd.status) {
+	if (err || out_size != sizeof(port_mac_cmd) || port_mac_cmd.status) {
 		dev_err(&pdev->dev, "Failed to get mac, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, port_mac_cmd.status, out_size);
 		return -EFAULT;
@@ -263,7 +263,7 @@ int hinic_port_link_state(struct hinic_dev *nic_dev,
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_LINK_STATE,
 				 &link_cmd, sizeof(link_cmd),
 				 &link_cmd, &out_size);
-	if (err || (out_size != sizeof(link_cmd)) || link_cmd.status) {
+	if (err || out_size != sizeof(link_cmd) || link_cmd.status) {
 		dev_err(&pdev->dev, "Failed to get link state, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, link_cmd.status, out_size);
 		return -EINVAL;
@@ -297,7 +297,7 @@ int hinic_port_set_state(struct hinic_dev *nic_dev, enum hinic_port_state state)
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_PORT_STATE,
 				 &port_state, sizeof(port_state),
 				 &port_state, &out_size);
-	if (err || (out_size != sizeof(port_state)) || port_state.status) {
+	if (err || out_size != sizeof(port_state) || port_state.status) {
 		dev_err(&pdev->dev, "Failed to set port state, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, port_state.status, out_size);
 		return -EFAULT;
@@ -329,7 +329,7 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_FUNC_STATE,
 				 &func_state, sizeof(func_state),
 				 &func_state, &out_size);
-	if (err || (out_size != sizeof(func_state)) || func_state.status) {
+	if (err || out_size != sizeof(func_state) || func_state.status) {
 		dev_err(&pdev->dev, "Failed to set port func state, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, func_state.status, out_size);
 		return -EFAULT;
@@ -359,7 +359,7 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_CAP,
 				 port_cap, sizeof(*port_cap),
 				 port_cap, &out_size);
-	if (err || (out_size != sizeof(*port_cap)) || port_cap->status) {
+	if (err || out_size != sizeof(*port_cap) || port_cap->status) {
 		dev_err(&pdev->dev,
 			"Failed to get port capabilities, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, port_cap->status, out_size);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 710c4ff7bc0e..7bd414aed6ff 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -717,7 +717,7 @@ static int free_tx_poll(struct napi_struct *napi, int budget)
 
 		/* Reading a WQEBB to get real WQE size and consumer index. */
 		sq_wqe = hinic_sq_read_wqebb(sq, &skb, &wqe_size, &sw_ci);
-		if ((!sq_wqe) ||
+		if (!sq_wqe ||
 		    (((hw_ci - sw_ci) & wq->mask) * wq->wqebb_size < wqe_size))
 			break;
 

From 5db8c86e8904e2dc9c27a61fc9b77131c8751d67 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 13 May 2021 14:26:53 +0800
Subject: [PATCH 0105/3292] net: hinic: fix misspelled "acessing"

The word "acessing" is misspelled, so fix it.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index cab38ff0713c..55b327eebe64 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -395,7 +395,7 @@ static void __print_selftest_reg(struct hinic_hwif *hwif)
 /**
  * hinic_init_hwif - initialize the hw interface
  * @hwif: the HW interface of a pci function device
- * @pdev: the pci device for acessing PCI resources
+ * @pdev: the pci device for accessing PCI resources
  *
  * Return 0 - Success, negative - Failure
  **/

From f19d4997fd1fb01bed127e1056ce3a5de922d9ee Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 13 May 2021 14:43:22 +0300
Subject: [PATCH 0106/3292] atl1c: show correct link speed on Mikrotik 10/25G
 NIC

The new Mikrotik 10/25G NIC maintains compatibility with existing atl1c
driver. However it does have new features.

This defines some new register offsets, code for identifying the new type
of NIC and correct speed detection for the NIC.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c.h      | 1 +
 drivers/net/ethernet/atheros/atl1c/atl1c_hw.c   | 9 +++++++++
 drivers/net/ethernet/atheros/atl1c/atl1c_hw.h   | 7 +++++++
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 4 ++++
 4 files changed, 21 insertions(+)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index 28ae5c16831e..3fda7eb3bd69 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -289,6 +289,7 @@ enum atl1c_nic_type {
 	athr_l2c_b2,
 	athr_l1d,
 	athr_l1d_2,
+	athr_mt,
 };
 
 enum atl1c_trans_queue {
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
index 140358dcf61e..ddb9442416cd 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
@@ -648,6 +648,15 @@ int atl1c_get_speed_and_duplex(struct atl1c_hw *hw, u16 *speed, u16 *duplex)
 	int err;
 	u16 phy_data;
 
+	if (hw->nic_type == athr_mt) {
+		u32 spd;
+
+		AT_READ_REG(hw, REG_MT_SPEED, &spd);
+		*speed = spd;
+		*duplex = FULL_DUPLEX;
+		return 0;
+	}
+
 	/* Read   PHY Specific Status Register (17) */
 	err = atl1c_read_phy_reg(hw, MII_GIGA_PSSR, &phy_data);
 	if (err)
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
index ce1a123dce2c..73cbc049a63e 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
@@ -764,6 +764,13 @@ void atl1c_post_phy_linkchg(struct atl1c_hw *hw, u16 link_speed);
 #define REG_DEBUG_DATA0 		0x1900
 #define REG_DEBUG_DATA1 		0x1904
 
+#define REG_MT_MAGIC			0x1F00
+#define REG_MT_MODE			0x1F04
+#define REG_MT_SPEED			0x1F08
+#define REG_MT_VERSION			0x1F0C
+
+#define MT_MAGIC			0xaabb1234
+
 #define L1D_MPW_PHYID1			0xD01C  /* V7 */
 #define L1D_MPW_PHYID2			0xD01D  /* V1-V6 */
 #define L1D_MPW_PHYID3			0xD01E  /* V8 */
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index c6263cf8d3c0..28c30d5288e4 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -644,6 +644,7 @@ static int atl1c_alloc_queues(struct atl1c_adapter *adapter)
 
 static void atl1c_set_mac_type(struct atl1c_hw *hw)
 {
+	u32 magic;
 	switch (hw->device_id) {
 	case PCI_DEVICE_ID_ATTANSIC_L2C:
 		hw->nic_type = athr_l2c;
@@ -662,6 +663,9 @@ static void atl1c_set_mac_type(struct atl1c_hw *hw)
 		break;
 	case PCI_DEVICE_ID_ATHEROS_L1D_2_0:
 		hw->nic_type = athr_l1d_2;
+		AT_READ_REG(hw, REG_MT_MAGIC, &magic);
+		if (magic == MT_MAGIC)
+			hw->nic_type = athr_mt;
 		break;
 	default:
 		break;

From d7ab6419bdee50dbc4a53e69c290a1ef05dae7f9 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 13 May 2021 14:43:23 +0300
Subject: [PATCH 0107/3292] atl1c: improve performance by avoiding unnecessary
 pcie writes on xmit

The kernel has xmit_more facility that hints the networking driver xmit
path about whether more packets are coming soon. This information can be
used to avoid unnecessary expensive PCIe transaction per tx packet.

Max TX pps on Mikrotik 10/25G NIC in a Threadripper 3960X system
improved from 1150Kpps to 1700Kpps.

Testing L2 forwarding on AR8151 hardware did not reveal a measurable
increase in latency.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 28c30d5288e4..08a0f49e03ce 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2211,8 +2211,8 @@ err_dma:
 	return -1;
 }
 
-static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb,
-			   struct atl1c_tpd_desc *tpd, enum atl1c_trans_queue type)
+static void atl1c_tx_queue(struct atl1c_adapter *adapter,
+			   enum atl1c_trans_queue type)
 {
 	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
 	u16 reg;
@@ -2238,6 +2238,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 
 	if (atl1c_tpd_avail(adapter, type) < tpd_req) {
 		/* no enough descriptor, just stop queue */
+		atl1c_tx_queue(adapter, type);
 		netif_stop_queue(netdev);
 		return NETDEV_TX_BUSY;
 	}
@@ -2246,6 +2247,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 
 	/* do TSO and check sum */
 	if (atl1c_tso_csum(adapter, skb, &tpd, type) != 0) {
+		atl1c_tx_queue(adapter, type);
 		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
@@ -2270,8 +2272,10 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 		atl1c_tx_rollback(adapter, tpd, type);
 		dev_kfree_skb_any(skb);
 	} else {
-		netdev_sent_queue(adapter->netdev, skb->len);
-		atl1c_tx_queue(adapter, skb, tpd, type);
+		bool more = netdev_xmit_more();
+
+		if (__netdev_sent_queue(adapter->netdev, skb->len, more))
+			atl1c_tx_queue(adapter, type);
 	}
 
 	return NETDEV_TX_OK;

From 545fa3fb1e84855820a5ed25053d6a5afbd30900 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 13 May 2021 14:43:24 +0300
Subject: [PATCH 0108/3292] atl1c: adjust max mtu according to Mikrotik 10/25G
 NIC ability

The new Mikrotik 10/25G NIC supports jumbo frames. Jumbo frames are
supported for TSO as well.

This enables the support for mtu up to 9500 bytes.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 08a0f49e03ce..dbafd8118a86 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -478,6 +478,9 @@ static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 static netdev_features_t atl1c_fix_features(struct net_device *netdev,
 	netdev_features_t features)
 {
+	struct atl1c_adapter *adapter = netdev_priv(netdev);
+	struct atl1c_hw *hw = &adapter->hw;
+
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
 	 * enable/disable make sure tx flag is always in same state as rx.
@@ -487,8 +490,10 @@ static netdev_features_t atl1c_fix_features(struct net_device *netdev,
 	else
 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
 
-	if (netdev->mtu > MAX_TSO_FRAME_SIZE)
-		features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
+	if (hw->nic_type != athr_mt) {
+		if (netdev->mtu > MAX_TSO_FRAME_SIZE)
+			features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
+	}
 
 	return features;
 }
@@ -515,9 +520,12 @@ static void atl1c_set_max_mtu(struct net_device *netdev)
 	case athr_l1d:
 	case athr_l1d_2:
 		netdev->max_mtu = MAX_JUMBO_FRAME_SIZE -
-				  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+			(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
 		break;
-	/* The 10/100 devices don't support jumbo packets, max_mtu 1500 */
+	case athr_mt:
+		netdev->max_mtu = 9500;
+		break;
+		/* The 10/100 devices don't support jumbo packets, max_mtu 1500 */
 	default:
 		netdev->max_mtu = ETH_DATA_LEN;
 		break;

From b0390009502b0c635f9ddc26a16025268f6c6211 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 13 May 2021 14:43:25 +0300
Subject: [PATCH 0109/3292] atl1c: enable rx csum offload on Mikrotik 10/25G
 NIC

Mikrotik 10/25G NIC supports hw checksum verification on rx for
IP/IPv6 + TCP/UDP packets. HW checksum offload helps reduce host
cpu load.

This enables the csum offload specifically for Mikrotik 10/25G NIC
as other HW supported by the driver is known to have problems with it.

TCP iperf3 to Threadripper 3960X with NIC improved 16.5 -> 20.0 Gbps
with mtu=1500.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c.h      | 2 ++
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index 3fda7eb3bd69..9d70cb7544f1 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -241,6 +241,8 @@ struct atl1c_tpd_ext_desc {
 #define RRS_PACKET_PROT_IS_IPV6_ONLY(word) \
 	((((word) >> RRS_PROT_ID_SHIFT) & RRS_PROT_ID_MASK) == 6)
 
+#define RRS_MT_PROT_ID_TCPUDP	BIT(19)
+
 struct atl1c_recv_ret_status {
 	__le32  word0;
 	__le32	rss_hash;
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index dbafd8118a86..9693da5028cf 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -1671,6 +1671,11 @@ static irqreturn_t atl1c_intr(int irq, void *data)
 static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
 		  struct sk_buff *skb, struct atl1c_recv_ret_status *prrs)
 {
+	if (adapter->hw.nic_type == athr_mt) {
+		if (prrs->word3 & RRS_MT_PROT_ID_TCPUDP)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return;
+	}
 	/*
 	 * The pid field in RRS in not correct sometimes, so we
 	 * cannot figure out if the packet is fragmented or not,

From ea0fbd05d7bd3298290d3579a837311ee5ceaf18 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 13 May 2021 14:43:26 +0300
Subject: [PATCH 0110/3292] atl1c: improve link detection reliability on
 Mikrotik 10/25G NIC

Mikrotik 10/25G NIC emulates the MDIO accesses, but the emulation is
not 100% reliable - the MDIO ops occasionally can timeout.

This adds a reliable way of detecting link on Mikrotik 10/25G NIC.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_hw.c | 26 ++++++++++++++-----
 drivers/net/ethernet/atheros/atl1c/atl1c_hw.h |  1 +
 .../net/ethernet/atheros/atl1c/atl1c_main.c   | 18 +++++--------
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
index ddb9442416cd..7dff20350865 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
@@ -636,6 +636,23 @@ int atl1c_phy_init(struct atl1c_hw *hw)
 	return 0;
 }
 
+bool atl1c_get_link_status(struct atl1c_hw *hw)
+{
+	u16 phy_data;
+
+	if (hw->nic_type == athr_mt) {
+		u32 spd;
+
+		AT_READ_REG(hw, REG_MT_SPEED, &spd);
+		return !!spd;
+	}
+
+	/* MII_BMSR must be read twice */
+	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
+	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
+	return !!(phy_data & BMSR_LSTATUS);
+}
+
 /*
  * Detects the current speed and duplex settings of the hardware.
  *
@@ -695,15 +712,12 @@ int atl1c_phy_to_ps_link(struct atl1c_hw *hw)
 	int ret = 0;
 	u16 autoneg_advertised = ADVERTISED_10baseT_Half;
 	u16 save_autoneg_advertised;
-	u16 phy_data;
 	u16 mii_lpa_data;
 	u16 speed = SPEED_0;
 	u16 duplex = FULL_DUPLEX;
 	int i;
 
-	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
-	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
-	if (phy_data & BMSR_LSTATUS) {
+	if (atl1c_get_link_status(hw)) {
 		atl1c_read_phy_reg(hw, MII_LPA, &mii_lpa_data);
 		if (mii_lpa_data & LPA_10FULL)
 			autoneg_advertised = ADVERTISED_10baseT_Full;
@@ -726,9 +740,7 @@ int atl1c_phy_to_ps_link(struct atl1c_hw *hw)
 		if (mii_lpa_data) {
 			for (i = 0; i < AT_SUSPEND_LINK_TIMEOUT; i++) {
 				mdelay(100);
-				atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
-				atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
-				if (phy_data & BMSR_LSTATUS) {
+				if (atl1c_get_link_status(hw)) {
 					if (atl1c_get_speed_and_duplex(hw, &speed,
 									&duplex) != 0)
 						dev_dbg(&pdev->dev,
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
index 73cbc049a63e..c263b326cec5 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
@@ -26,6 +26,7 @@ void atl1c_phy_disable(struct atl1c_hw *hw);
 void atl1c_hw_set_mac_addr(struct atl1c_hw *hw, u8 *mac_addr);
 int atl1c_phy_reset(struct atl1c_hw *hw);
 int atl1c_read_mac_addr(struct atl1c_hw *hw);
+bool atl1c_get_link_status(struct atl1c_hw *hw);
 int atl1c_get_speed_and_duplex(struct atl1c_hw *hw, u16 *speed, u16 *duplex);
 u32 atl1c_hash_mc_addr(struct atl1c_hw *hw, u8 *mc_addr);
 void atl1c_hash_set(struct atl1c_hw *hw, u32 hash_value);
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 9693da5028cf..740127a6a21d 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -232,15 +232,14 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter)
 	struct pci_dev    *pdev   = adapter->pdev;
 	int err;
 	unsigned long flags;
-	u16 speed, duplex, phy_data;
+	u16 speed, duplex;
+	bool link;
 
 	spin_lock_irqsave(&adapter->mdio_lock, flags);
-	/* MII_BMSR must read twise */
-	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
-	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
+	link = atl1c_get_link_status(hw);
 	spin_unlock_irqrestore(&adapter->mdio_lock, flags);
 
-	if ((phy_data & BMSR_LSTATUS) == 0) {
+	if (!link) {
 		/* link down */
 		netif_carrier_off(netdev);
 		hw->hibernate = true;
@@ -284,16 +283,13 @@ static void atl1c_link_chg_event(struct atl1c_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev    *pdev   = adapter->pdev;
-	u16 phy_data;
-	u16 link_up;
+	bool link;
 
 	spin_lock(&adapter->mdio_lock);
-	atl1c_read_phy_reg(&adapter->hw, MII_BMSR, &phy_data);
-	atl1c_read_phy_reg(&adapter->hw, MII_BMSR, &phy_data);
+	link = atl1c_get_link_status(&adapter->hw);
 	spin_unlock(&adapter->mdio_lock);
-	link_up = phy_data & BMSR_LSTATUS;
 	/* notify upper layer link down ASAP */
-	if (!link_up) {
+	if (!link) {
 		if (netif_carrier_ok(netdev)) {
 			/* old link state: Up */
 			netif_carrier_off(netdev);

From 6c66c147b9a4004f880cbd04bf01b40cf6f98018 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Thu, 13 May 2021 19:48:07 +0800
Subject: [PATCH 0111/3292] virtio-net: fix for unable to handle page fault for
 address

In merge mode, when xdp is enabled, if the headroom of buf is smaller
than virtnet_get_headroom(), xdp_linearize_page() will be called but the
variable of "headroom" is still 0, which leads to wrong logic after
entering page_to_skb().

[   16.600944] BUG: unable to handle page fault for address: ffffecbfff7b43c8[   16.602175] #PF: supervisor read access in kernel mode
[   16.603350] #PF: error_code(0x0000) - not-present page
[   16.604200] PGD 0 P4D 0
[   16.604686] Oops: 0000 [#1] SMP PTI
[   16.605306] CPU: 4 PID: 715 Comm: sh Tainted: G    B             5.12.0+ #312
[   16.606429] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/04
[   16.608217] RIP: 0010:unmap_page_range+0x947/0xde0
[   16.609014] Code: 00 00 08 00 48 83 f8 01 45 19 e4 41 f7 d4 41 83 e4 03 e9 a4 fd ff ff e8 b7 63 ed ff 4c 89 e0 48 c1 e0 065
[   16.611863] RSP: 0018:ffffc90002503c58 EFLAGS: 00010286
[   16.612720] RAX: ffffecbfff7b43c0 RBX: 00007f19f7203000 RCX: ffffffff812ff359
[   16.613853] RDX: ffff888107778000 RSI: 0000000000000000 RDI: 0000000000000005
[   16.614976] RBP: ffffea000425e000 R08: 0000000000000000 R09: 3030303030303030
[   16.616124] R10: ffffffff82ed7d94 R11: 6637303030302052 R12: 7c00000afffded0f
[   16.617276] R13: 0000000000000001 R14: ffff888119ee7010 R15: 00007f19f7202000
[   16.618423] FS:  0000000000000000(0000) GS:ffff88842fd00000(0000) knlGS:0000000000000000
[   16.619738] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   16.620670] CR2: ffffecbfff7b43c8 CR3: 0000000103220005 CR4: 0000000000370ee0
[   16.621792] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   16.622920] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   16.624047] Call Trace:
[   16.624525]  ? release_pages+0x24d/0x730
[   16.625209]  unmap_single_vma+0xa9/0x130
[   16.625885]  unmap_vmas+0x76/0xf0
[   16.626480]  exit_mmap+0xa0/0x210
[   16.627129]  mmput+0x67/0x180
[   16.627673]  do_exit+0x3d1/0xf10
[   16.628259]  ? do_user_addr_fault+0x231/0x840
[   16.629000]  do_group_exit+0x53/0xd0
[   16.629631]  __x64_sys_exit_group+0x1d/0x20
[   16.630354]  do_syscall_64+0x3c/0x80
[   16.630988]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[   16.631828] RIP: 0033:0x7f1a043d0191
[   16.632464] Code: Unable to access opcode bytes at RIP 0x7f1a043d0167.
[   16.633502] RSP: 002b:00007ffe3d993308 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
[   16.634737] RAX: ffffffffffffffda RBX: 00007f1a044c9490 RCX: 00007f1a043d0191
[   16.635857] RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000
[   16.636986] RBP: 0000000000000000 R08: ffffffffffffff88 R09: 0000000000000001
[   16.638120] R10: 0000000000000008 R11: 0000000000000246 R12: 00007f1a044c9490
[   16.639245] R13: 0000000000000001 R14: 00007f1a044c9968 R15: 0000000000000000
[   16.640408] Modules linked in:
[   16.640958] CR2: ffffecbfff7b43c8
[   16.641557] ---[ end trace bc4891c6ce46354c ]---
[   16.642335] RIP: 0010:unmap_page_range+0x947/0xde0
[   16.643135] Code: 00 00 08 00 48 83 f8 01 45 19 e4 41 f7 d4 41 83 e4 03 e9 a4 fd ff ff e8 b7 63 ed ff 4c 89 e0 48 c1 e0 065
[   16.645983] RSP: 0018:ffffc90002503c58 EFLAGS: 00010286
[   16.646845] RAX: ffffecbfff7b43c0 RBX: 00007f19f7203000 RCX: ffffffff812ff359
[   16.647970] RDX: ffff888107778000 RSI: 0000000000000000 RDI: 0000000000000005
[   16.649091] RBP: ffffea000425e000 R08: 0000000000000000 R09: 3030303030303030
[   16.650250] R10: ffffffff82ed7d94 R11: 6637303030302052 R12: 7c00000afffded0f
[   16.651394] R13: 0000000000000001 R14: ffff888119ee7010 R15: 00007f19f7202000
[   16.652529] FS:  0000000000000000(0000) GS:ffff88842fd00000(0000) knlGS:0000000000000000
[   16.653887] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   16.654841] CR2: ffffecbfff7b43c8 CR3: 0000000103220005 CR4: 0000000000370ee0
[   16.655992] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   16.657150] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   16.658290] Kernel panic - not syncing: Fatal exception
[   16.659613] Kernel Offset: disabled
[   16.660234] ---[ end Kernel panic - not syncing: Fatal exception ]---

Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9b6a4a875c55..3e46c12dde08 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct page *page, unsigned int offset,
 				   unsigned int len, unsigned int truesize,
 				   bool hdr_valid, unsigned int metasize,
-				   unsigned int headroom)
+				   bool whole_page)
 {
 	struct sk_buff *skb;
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -398,12 +398,12 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	else
 		hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-	/* If headroom is not 0, there is an offset between the beginning of the
+	/* If whole_page, there is an offset between the beginning of the
 	 * data and the allocated space, otherwise the data and the allocated
 	 * space are aligned.
 	 */
-	if (headroom) {
-		/* Buffers with headroom use PAGE_SIZE as alloc size,
+	if (whole_page) {
+		/* Buffers with whole_page use PAGE_SIZE as alloc size,
 		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
 		 */
 		truesize = PAGE_SIZE;
@@ -958,7 +958,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 				put_page(page);
 				head_skb = page_to_skb(vi, rq, xdp_page, offset,
 						       len, PAGE_SIZE, false,
-						       metasize, headroom);
+						       metasize, true);
 				return head_skb;
 			}
 			break;
@@ -1016,7 +1016,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
-			       metasize, headroom);
+			       metasize, !!headroom);
 	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))

From 7bf64460e3b2af4e6e46d932b2fbd933d662d19f Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Thu, 13 May 2021 19:48:08 +0800
Subject: [PATCH 0112/3292] virtio-net: get build_skb() buf by data ptr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the case of merge, the page passed into page_to_skb() may be a head
page, not the page where the current data is located. So when trying to
get the buf where the data is located, you should directly use the
pointer(p) to get the address corresponding to the page.

At the same time, the offset of the data in the page should also be
obtained using offset_in_page().

This patch solves this problem. But if you don’t use this patch, the
original code can also run, because if the page is not the page of the
current data, the calculated tailroom will be less than 0, and will not
enter the logic of build_skb() . The significance of this patch is to
modify this logical problem, allowing more situations to use
build_skb().

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3e46c12dde08..073fec4c0df1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -407,8 +407,12 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
 		 */
 		truesize = PAGE_SIZE;
-		tailroom = truesize - len - offset;
-		buf = page_address(page);
+
+		/* page maybe head page, so we should get the buf by p, not the
+		 * page
+		 */
+		tailroom = truesize - len - offset_in_page(p);
+		buf = (char *)((unsigned long)p & PAGE_MASK);
 	} else {
 		tailroom = truesize - len;
 		buf = p;

From 5efe2575316f97aa6d004fc0534e48ea85acdff3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 13 May 2021 12:49:10 +0100
Subject: [PATCH 0113/3292] net: qed: remove redundant initialization of
 variable rc

The variable rc is being initialized with a value that is never read,
it is being updated later on.  The assignment is redundant and can be
removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 4eae4ee3538f..448567a1f520 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -453,7 +453,7 @@ static int qed_sp_iscsi_conn_update(struct qed_hwfn *p_hwfn,
 	struct iscsi_conn_update_ramrod_params *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
+	int rc;
 	u32 dval;
 
 	/* Get SPQ entry */

From 0f3ee280331e28b81560715356d47351a6016bce Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 13 May 2021 09:58:40 -0700
Subject: [PATCH 0114/3292] net: caif: Drop unnecessary NULL check after
 container_of

The first parameter passed to chnl_recv_cb() can never be NULL since all
callers dereferenced it. Consequently, container_of() on it is also never
NULL, even though the reference into the structure points to the first
element of the structure. The NULL check is therefore unnecessary.
On top of that, it is misleading to perform a NULL check on the result of
container_of() because the position of the contained element could change,
which would make the test invalid. Remove the unnecessary NULL check.

This change was made automatically with the following Coccinelle script.

@@
type t;
identifier v;
statement s;
@@

<+...
(
  t v = container_of(...);
|
  v = container_of(...);
)
  ...
  when != v
- if (\( !v \| v == NULL \) ) s
...+>

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index fadc7c8a3107..37b67194c0df 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -76,8 +76,6 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 	u8 buf;
 
 	priv = container_of(layr, struct chnl_net, chnl);
-	if (!priv)
-		return -EINVAL;
 
 	skb = (struct sk_buff *) cfpkt_tonative(pkt);
 

From fe0bdaec8dea9912b95296d758422d95aa57fac0 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 13 May 2021 16:50:49 -0500
Subject: [PATCH 0115/3292] bpf: Use struct_size() in kzalloc()

Make use of the struct_size() helper instead of an open-coded version,
in order to avoid any potential type mistakes or integer overflows
that, in the worst scenario, could lead to heap overflows.

This code was detected with the help of Coccinelle and, audited and
fixed manually.

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/bpf_sk_storage.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cc3712ad8716..f564f82e91d9 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -524,8 +524,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
 			nr_maps++;
 	}
 
-	diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps,
-		       GFP_KERNEL);
+	diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
 	if (!diag)
 		return ERR_PTR(-ENOMEM);
 

From ea89c862f01e02ec459932c7c3113fa37aedd09a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 13 May 2021 17:49:14 -0500
Subject: [PATCH 0116/3292] net: mana: Use struct_size() in kzalloc()

Make use of the struct_size() helper instead of an open-coded version,
in order to avoid any potential type mistakes or integer overflows
that, in the worst scenario, could lead to heap overflows.

This code was detected with the help of Coccinelle and, audited and
fixed manually.

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 04d067243457..46aee2c49f1b 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1387,8 +1387,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
 	gc = gd->gdma_context;
 
-	rxq = kzalloc(sizeof(*rxq) +
-		      RX_BUFFERS_PER_QUEUE * sizeof(struct mana_recv_buf_oob),
+	rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE),
 		      GFP_KERNEL);
 	if (!rxq)
 		return NULL;

From 9e9b451593b161403aedf15eef8ced62dba65f40 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 13 May 2021 16:36:42 -0700
Subject: [PATCH 0117/3292] selftests/bpf: Validate skeleton gen handles
 skipped fields

Adjust static_linked selftests to test a mix of global and static variables
and their handling of bpftool's skeleton generation code.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210513233643.194711-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/static_linked.c  | 4 ++--
 tools/testing/selftests/bpf/progs/linked_maps1.c        | 2 +-
 tools/testing/selftests/bpf/progs/test_static_linked1.c | 2 +-
 tools/testing/selftests/bpf/progs/test_static_linked2.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
index ab6acbaf9d8c..5c4e3014e063 100644
--- a/tools/testing/selftests/bpf/prog_tests/static_linked.c
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -27,8 +27,8 @@ void test_static_linked(void)
 	/* trigger */
 	usleep(1);
 
-	ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1");
-	ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2");
+	ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1");
+	ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2");
 
 cleanup:
 	test_static_linked__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
index 52291515cc72..00bf1ca95986 100644
--- a/tools/testing/selftests/bpf/progs/linked_maps1.c
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -75,7 +75,7 @@ int BPF_PROG(handler_exit1)
 	val = bpf_map_lookup_elem(&map_weak, &key);
 	if (val)
 		output_weak1 = *val;
-	
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
index cae304045d9c..4f0b612e1661 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked1.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -7,7 +7,7 @@
 /* 8-byte aligned .data */
 static volatile long static_var1 = 2;
 static volatile int static_var2 = 3;
-int var1 = 0;
+int var1 = -1;
 /* 4-byte aligned .rodata */
 const volatile int rovar1;
 
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
index c54c4e865ed8..766ebd502a60 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked2.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -7,7 +7,7 @@
 /* 4-byte aligned .data */
 static volatile int static_var1 = 5;
 static volatile int static_var2 = 6;
-int var2 = 0;
+int var2 = -1;
 /* 8-byte aligned .rodata */
 const volatile long rovar2;
 

From c1cccec9c63637c4c5ee0aa2da2850d983c19e88 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 13 May 2021 16:36:43 -0700
Subject: [PATCH 0118/3292] libbpf: Reject static maps

Static maps never really worked with libbpf, because all such maps were always
silently resolved to the very first map. Detect static maps (both legacy and
BTF-defined) and report user-friendly error.

Tested locally by switching few maps (legacy and BTF-defined) in selftests to
static ones and verifying that now libbpf rejects them loudly.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210513233643.194711-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b8cf93fa1b4d..182bd3d3f728 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1795,7 +1795,6 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 	if (!symbols)
 		return -EINVAL;
 
-
 	scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
 	data = elf_sec_data(obj, scn);
 	if (!scn || !data) {
@@ -1855,6 +1854,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
+		if (GELF_ST_TYPE(sym.st_info) == STT_SECTION
+		    || GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+			pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
+			return -ENOTSUP;
+		}
+
 		map->libbpf_type = LIBBPF_MAP_UNSPEC;
 		map->sec_idx = sym.st_shndx;
 		map->sec_offset = sym.st_value;
@@ -2262,6 +2267,16 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
 		pr_debug("map '%s': found inner map definition.\n", map->name);
 }
 
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+	switch (linkage) {
+	case BTF_VAR_STATIC: return "static";
+	case BTF_VAR_GLOBAL_ALLOCATED: return "global";
+	case BTF_VAR_GLOBAL_EXTERN: return "extern";
+	default: return "unknown";
+	}
+}
+
 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 					 const struct btf_type *sec,
 					 int var_idx, int sec_idx,
@@ -2294,10 +2309,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			map_name, btf_kind_str(var));
 		return -EINVAL;
 	}
-	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
-	    var_extra->linkage != BTF_VAR_STATIC) {
-		pr_warn("map '%s': unsupported var linkage %u.\n",
-			map_name, var_extra->linkage);
+	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+		pr_warn("map '%s': unsupported map linkage %s.\n",
+			map_name, btf_var_linkage_str(var_extra->linkage));
 		return -EOPNOTSUPP;
 	}
 

From 0a37f32ba5272b2d4ec8c8d0f6b212b81b578f7e Mon Sep 17 00:00:00 2001
From: Bixuan Cui <cuibixuan@huawei.com>
Date: Wed, 12 May 2021 11:37:27 +0800
Subject: [PATCH 0119/3292] EDAC/ti: Add missing MODULE_DEVICE_TABLE

The module misses MODULE_DEVICE_TABLE() for of_device_id tables and thus
never autoloads on ID matches.

Add the missing declaration.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Tero Kristo <kristo@kernel.org>
Link: https://lkml.kernel.org/r/20210512033727.26701-1-cuibixuan@huawei.com
---
 drivers/edac/ti_edac.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c
index e7eae20f83d1..169f96e51c29 100644
--- a/drivers/edac/ti_edac.c
+++ b/drivers/edac/ti_edac.c
@@ -197,6 +197,7 @@ static const struct of_device_id ti_edac_of_match[] = {
 	{ .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 },
 	{},
 };
+MODULE_DEVICE_TABLE(of, ti_edac_of_match);
 
 static int _emif_get_id(struct device_node *node)
 {

From 335a2a1fcefc948927e8c15636d9dc5d983b8f50 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Sun, 25 Apr 2021 18:14:32 +0800
Subject: [PATCH 0120/3292] esp: drop unneeded assignment in esp4_gro_receive()

Making '!=' operation with 0 directly after calling
the function xfrm_parse_spi() is more efficient,
assignment to err is redundant.

Eliminate the following clang_analyzer warning:
net/ipv4/esp4_offload.c:41:7: warning: Although the value stored to
'err' is used in the enclosing expression, the value is never actually
read from 'err'

No functional change, only more efficient.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4_offload.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 33687cf58286..be019a1fe3af 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -33,12 +33,11 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	struct xfrm_state *x;
 	__be32 seq;
 	__be32 spi;
-	int err;
 
 	if (!pskb_pull(skb, offset))
 		return NULL;
 
-	if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
+	if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0)
 		goto out;
 
 	xo = xfrm_offload(skb);

From fe9f1d8779cb47046e76ea209b6eece7ec56d1b4 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Sun, 25 Apr 2021 21:47:12 +0200
Subject: [PATCH 0121/3292] xfrm: add state hashtable keyed by seq

When creating new states with seq set in xfrm_usersa_info, we walk
through all the states already installed in that netns to find a
matching ACQUIRE state (__xfrm_find_acq_byseq, called from
xfrm_state_add). This causes severe slowdowns on systems with a large
number of states.

This patch introduces a hashtable using x->km.seq as key, so that the
corresponding state can be found in a reasonable time.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h |  1 +
 include/net/xfrm.h       |  1 +
 net/xfrm/xfrm_hash.h     |  7 +++++
 net/xfrm/xfrm_state.c    | 65 ++++++++++++++++++++++++++++++++--------
 4 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index e816b6a3ef2b..e946366e8ba5 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -42,6 +42,7 @@ struct netns_xfrm {
 	struct hlist_head	__rcu *state_bydst;
 	struct hlist_head	__rcu *state_bysrc;
 	struct hlist_head	__rcu *state_byspi;
+	struct hlist_head	__rcu *state_byseq;
 	unsigned int		state_hmask;
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c58a6d4eb610..6e11db6fa0ab 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -154,6 +154,7 @@ struct xfrm_state {
 	};
 	struct hlist_node	bysrc;
 	struct hlist_node	byspi;
+	struct hlist_node	byseq;
 
 	refcount_t		refcnt;
 	spinlock_t		lock;
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index ce66323102f9..d12bb906c9c9 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -131,6 +131,13 @@ __xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto,
 	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
 }
 
+static inline unsigned int
+__xfrm_seq_hash(u32 seq, unsigned int hmask)
+{
+	unsigned int h = seq;
+	return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
+}
+
 static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
 {
 	return (index ^ (index >> 8)) & hmask;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 4496f7efa220..8f6058e56f7f 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -78,10 +78,16 @@ xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
 	return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
 }
 
+static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
+{
+	return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
+}
+
 static void xfrm_hash_transfer(struct hlist_head *list,
 			       struct hlist_head *ndsttable,
 			       struct hlist_head *nsrctable,
 			       struct hlist_head *nspitable,
+			       struct hlist_head *nseqtable,
 			       unsigned int nhashmask)
 {
 	struct hlist_node *tmp;
@@ -106,6 +112,11 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 					    nhashmask);
 			hlist_add_head_rcu(&x->byspi, nspitable + h);
 		}
+
+		if (x->km.seq) {
+			h = __xfrm_seq_hash(x->km.seq, nhashmask);
+			hlist_add_head_rcu(&x->byseq, nseqtable + h);
+		}
 	}
 }
 
@@ -117,7 +128,7 @@ static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
 static void xfrm_hash_resize(struct work_struct *work)
 {
 	struct net *net = container_of(work, struct net, xfrm.state_hash_work);
-	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
+	struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
 	unsigned long nsize, osize;
 	unsigned int nhashmask, ohashmask;
 	int i;
@@ -137,6 +148,13 @@ static void xfrm_hash_resize(struct work_struct *work)
 		xfrm_hash_free(nsrc, nsize);
 		return;
 	}
+	nseq = xfrm_hash_alloc(nsize);
+	if (!nseq) {
+		xfrm_hash_free(ndst, nsize);
+		xfrm_hash_free(nsrc, nsize);
+		xfrm_hash_free(nspi, nsize);
+		return;
+	}
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 	write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
@@ -144,15 +162,17 @@ static void xfrm_hash_resize(struct work_struct *work)
 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
 	odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
 	for (i = net->xfrm.state_hmask; i >= 0; i--)
-		xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
+		xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
 
 	osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
 	ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
+	oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
 	ohashmask = net->xfrm.state_hmask;
 
 	rcu_assign_pointer(net->xfrm.state_bydst, ndst);
 	rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
 	rcu_assign_pointer(net->xfrm.state_byspi, nspi);
+	rcu_assign_pointer(net->xfrm.state_byseq, nseq);
 	net->xfrm.state_hmask = nhashmask;
 
 	write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
@@ -165,6 +185,7 @@ static void xfrm_hash_resize(struct work_struct *work)
 	xfrm_hash_free(odst, osize);
 	xfrm_hash_free(osrc, osize);
 	xfrm_hash_free(ospi, osize);
+	xfrm_hash_free(oseq, osize);
 }
 
 static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
@@ -621,6 +642,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
+		INIT_HLIST_NODE(&x->byseq);
 		hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
 		x->mtimer.function = xfrm_timer_handler;
 		timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
@@ -664,6 +686,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
 		list_del(&x->km.all);
 		hlist_del_rcu(&x->bydst);
 		hlist_del_rcu(&x->bysrc);
+		if (x->km.seq)
+			hlist_del_rcu(&x->byseq);
 		if (x->id.spi)
 			hlist_del_rcu(&x->byspi);
 		net->xfrm.state_num--;
@@ -1148,6 +1172,10 @@ found:
 				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 			}
+			if (x->km.seq) {
+				h = xfrm_seq_hash(net, x->km.seq);
+				hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 			hrtimer_start(&x->mtimer,
 				      ktime_set(net->xfrm.sysctl_acq_expires, 0),
@@ -1263,6 +1291,12 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 	}
 
+	if (x->km.seq) {
+		h = xfrm_seq_hash(net, x->km.seq);
+
+		hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+	}
+
 	hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
 	if (x->replay_maxage)
 		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1932,20 +1966,18 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
 
 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
 {
-	int i;
+	unsigned int h = xfrm_seq_hash(net, seq);
+	struct xfrm_state *x;
 
-	for (i = 0; i <= net->xfrm.state_hmask; i++) {
-		struct xfrm_state *x;
-
-		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
-			if (x->km.seq == seq &&
-			    (mark & x->mark.m) == x->mark.v &&
-			    x->km.state == XFRM_STATE_ACQ) {
-				xfrm_state_hold(x);
-				return x;
-			}
+	hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
+		if (x->km.seq == seq &&
+		    (mark & x->mark.m) == x->mark.v &&
+		    x->km.state == XFRM_STATE_ACQ) {
+			xfrm_state_hold(x);
+			return x;
 		}
 	}
+
 	return NULL;
 }
 
@@ -2660,6 +2692,9 @@ int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_byspi = xfrm_hash_alloc(sz);
 	if (!net->xfrm.state_byspi)
 		goto out_byspi;
+	net->xfrm.state_byseq = xfrm_hash_alloc(sz);
+	if (!net->xfrm.state_byseq)
+		goto out_byseq;
 	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	net->xfrm.state_num = 0;
@@ -2669,6 +2704,8 @@ int __net_init xfrm_state_init(struct net *net)
 			       &net->xfrm.xfrm_state_lock);
 	return 0;
 
+out_byseq:
+	xfrm_hash_free(net->xfrm.state_byspi, sz);
 out_byspi:
 	xfrm_hash_free(net->xfrm.state_bysrc, sz);
 out_bysrc:
@@ -2688,6 +2725,8 @@ void xfrm_state_fini(struct net *net)
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 
 	sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
+	WARN_ON(!hlist_empty(net->xfrm.state_byseq));
+	xfrm_hash_free(net->xfrm.state_byseq, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_byspi));
 	xfrm_hash_free(net->xfrm.state_byspi, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_bysrc));

From bbc6f2cca74e548914a7705f5c39549c28ab8815 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Fri, 14 May 2021 10:32:33 +0300
Subject: [PATCH 0122/3292] net: bridge: fix br_multicast_is_router stub when
 igmp is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

br_multicast_is_router takes two arguments when bridge IGMP is enabled
and just one when it's disabled, fix the stub to take two as well.

Fixes: 1a3065a26807 ("net: bridge: mcast: prepare is-router function for mcast router split")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Acked-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 03197ab4af76..ec661130c2d0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1068,7 +1068,8 @@ static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 {
 }
 
-static inline bool br_multicast_is_router(struct net_bridge *br)
+static inline bool br_multicast_is_router(struct net_bridge *br,
+					  struct sk_buff *skb)
 {
 	return false;
 }

From 03b30cc38dd3c3521dafb1cb2ac4ecd8470bbf0a Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 29 Apr 2021 18:21:21 +0800
Subject: [PATCH 0123/3292] hv_balloon: Remove redundant assignment to
 region_start

Variable region_start is set to pg_start but this value is never
read as it is overwritten later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

drivers/hv/hv_balloon.c:1013:3: warning: Value stored to 'region_start'
is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619691681-86256-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/hv_balloon.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 58af84e30144..7f11ea07d698 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1010,7 +1010,6 @@ static void hot_add_req(struct work_struct *dummy)
 		 * that need to be hot-added while ensuring the alignment
 		 * and size requirements of Linux as it relates to hot-add.
 		 */
-		region_start = pg_start;
 		region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
 		if (pfn_cnt % HA_CHUNK)
 			region_size += HA_CHUNK;

From adae1e931acd8b430d31141a283ea06d4b705417 Mon Sep 17 00:00:00 2001
From: Andres Beltran <lkmlabelt@gmail.com>
Date: Thu, 8 Apr 2021 18:14:39 +0200
Subject: [PATCH 0124/3292] Drivers: hv: vmbus: Copy packets sent by Hyper-V
 out of the ring buffer

Pointers to ring-buffer packets sent by Hyper-V are used within the
guest VM. Hyper-V can send packets with erroneous values or modify
packet fields after they are processed by the guest. To defend
against these scenarios, return a copy of the incoming VMBus packet
after validating its length and offset fields in hv_pkt_iter_first().
In this way, the packet can no longer be modified by the host.

Signed-off-by: Andres Beltran <lkmlabelt@gmail.com>
Co-developed-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20210408161439.341988-1-parri.andrea@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/channel.c              |  9 ++--
 drivers/hv/hv_fcopy.c             |  1 +
 drivers/hv/hv_kvp.c               |  1 +
 drivers/hv/hyperv_vmbus.h         |  2 +-
 drivers/hv/ring_buffer.c          | 82 ++++++++++++++++++++++++++-----
 drivers/net/hyperv/hyperv_net.h   |  7 +++
 drivers/net/hyperv/netvsc.c       |  2 +
 drivers/net/hyperv/rndis_filter.c |  2 +
 drivers/scsi/storvsc_drv.c        | 10 ++++
 include/linux/hyperv.h            | 48 +++++++++++++++---
 net/vmw_vsock/hyperv_transport.c  |  4 +-
 11 files changed, 143 insertions(+), 25 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index c2635e913a92..bfbca4eeb773 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -662,12 +662,15 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
 	newchannel->onchannel_callback = onchannelcallback;
 	newchannel->channel_callback_context = context;
 
-	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages);
+	if (!newchannel->max_pkt_size)
+		newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE;
+
+	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0);
 	if (err)
 		goto error_clean_ring;
 
-	err = hv_ringbuffer_init(&newchannel->inbound,
-				 &page[send_pages], recv_pages);
+	err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
+				 recv_pages, newchannel->max_pkt_size);
 	if (err)
 		goto error_clean_ring;
 
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 59ce85e00a02..660036da7449 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -349,6 +349,7 @@ int hv_fcopy_init(struct hv_util_service *srv)
 {
 	recv_buffer = srv->recv_buffer;
 	fcopy_transaction.recv_channel = srv->channel;
+	fcopy_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2;
 
 	/*
 	 * When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index b49962d312ce..c698592b83e4 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -757,6 +757,7 @@ hv_kvp_init(struct hv_util_service *srv)
 {
 	recv_buffer = srv->recv_buffer;
 	kvp_transaction.recv_channel = srv->channel;
+	kvp_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 4;
 
 	/*
 	 * When this driver loads, the user level daemon that
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 9416e09ebd58..42f3d9d123a1 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -174,7 +174,7 @@ extern int hv_synic_cleanup(unsigned int cpu);
 void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
 
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		       struct page *pages, u32 pagecnt);
+		       struct page *pages, u32 pagecnt, u32 max_pkt_size);
 
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 374f8afbf8a5..e621f8d9b436 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -181,7 +181,7 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
 
 /* Initialize the ring buffer. */
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		       struct page *pages, u32 page_cnt)
+		       struct page *pages, u32 page_cnt, u32 max_pkt_size)
 {
 	int i;
 	struct page **pages_wraparound;
@@ -223,6 +223,14 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 		sizeof(struct hv_ring_buffer);
 	ring_info->priv_read_index = 0;
 
+	/* Initialize buffer that holds copies of incoming packets */
+	if (max_pkt_size) {
+		ring_info->pkt_buffer = kzalloc(max_pkt_size, GFP_KERNEL);
+		if (!ring_info->pkt_buffer)
+			return -ENOMEM;
+		ring_info->pkt_buffer_size = max_pkt_size;
+	}
+
 	spin_lock_init(&ring_info->ring_lock);
 
 	return 0;
@@ -235,6 +243,9 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 	vunmap(ring_info->ring_buffer);
 	ring_info->ring_buffer = NULL;
 	mutex_unlock(&ring_info->ring_buffer_mutex);
+
+	kfree(ring_info->pkt_buffer);
+	ring_info->pkt_buffer_size = 0;
 }
 
 /* Write to the ring buffer. */
@@ -375,7 +386,7 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
 	memcpy(buffer, (const char *)desc + offset, packetlen);
 
 	/* Advance ring index to next packet descriptor */
-	__hv_pkt_iter_next(channel, desc);
+	__hv_pkt_iter_next(channel, desc, true);
 
 	/* Notify host of update */
 	hv_pkt_iter_close(channel);
@@ -401,6 +412,22 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 		return (rbi->ring_datasize - priv_read_loc) + write_loc;
 }
 
+/*
+ * Get first vmbus packet without copying it out of the ring buffer
+ */
+struct vmpacket_descriptor *hv_pkt_iter_first_raw(struct vmbus_channel *channel)
+{
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	hv_debug_delay_test(channel, MESSAGE_DELAY);
+
+	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+		return NULL;
+
+	return (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
+}
+EXPORT_SYMBOL_GPL(hv_pkt_iter_first_raw);
+
 /*
  * Get first vmbus packet from ring buffer after read_index
  *
@@ -409,17 +436,49 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
 {
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
-	struct vmpacket_descriptor *desc;
+	struct vmpacket_descriptor *desc, *desc_copy;
+	u32 bytes_avail, pkt_len, pkt_offset;
 
-	hv_debug_delay_test(channel, MESSAGE_DELAY);
-	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+	desc = hv_pkt_iter_first_raw(channel);
+	if (!desc)
 		return NULL;
 
-	desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index;
-	if (desc)
-		prefetch((char *)desc + (desc->len8 << 3));
+	bytes_avail = min(rbi->pkt_buffer_size, hv_pkt_iter_avail(rbi));
 
-	return desc;
+	/*
+	 * Ensure the compiler does not use references to incoming Hyper-V values (which
+	 * could change at any moment) when reading local variables later in the code
+	 */
+	pkt_len = READ_ONCE(desc->len8) << 3;
+	pkt_offset = READ_ONCE(desc->offset8) << 3;
+
+	/*
+	 * If pkt_len is invalid, set it to the smaller of hv_pkt_iter_avail() and
+	 * rbi->pkt_buffer_size
+	 */
+	if (pkt_len < sizeof(struct vmpacket_descriptor) || pkt_len > bytes_avail)
+		pkt_len = bytes_avail;
+
+	/*
+	 * If pkt_offset is invalid, arbitrarily set it to
+	 * the size of vmpacket_descriptor
+	 */
+	if (pkt_offset < sizeof(struct vmpacket_descriptor) || pkt_offset > pkt_len)
+		pkt_offset = sizeof(struct vmpacket_descriptor);
+
+	/* Copy the Hyper-V packet out of the ring buffer */
+	desc_copy = (struct vmpacket_descriptor *)rbi->pkt_buffer;
+	memcpy(desc_copy, desc, pkt_len);
+
+	/*
+	 * Hyper-V could still change len8 and offset8 after the earlier read.
+	 * Ensure that desc_copy has legal values for len8 and offset8 that
+	 * are consistent with the copy we just made
+	 */
+	desc_copy->len8 = pkt_len >> 3;
+	desc_copy->offset8 = pkt_offset >> 3;
+
+	return desc_copy;
 }
 EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
 
@@ -431,7 +490,8 @@ EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
  */
 struct vmpacket_descriptor *
 __hv_pkt_iter_next(struct vmbus_channel *channel,
-		   const struct vmpacket_descriptor *desc)
+		   const struct vmpacket_descriptor *desc,
+		   bool copy)
 {
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
 	u32 packetlen = desc->len8 << 3;
@@ -444,7 +504,7 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
 		rbi->priv_read_index -= dsize;
 
 	/* more data? */
-	return hv_pkt_iter_first(channel);
+	return copy ? hv_pkt_iter_first(channel) : hv_pkt_iter_first_raw(channel);
 }
 EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
 
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 442c520ab8f3..b11aa68b44ec 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -895,9 +895,16 @@ static inline u32 netvsc_rqstor_size(unsigned long ringbytes)
 		ringbytes / NETVSC_MIN_IN_MSG_SIZE;
 }
 
+/* XFER PAGE packets can specify a maximum of 375 ranges for NDIS >= 6.0
+ * and a maximum of 64 ranges for NDIS < 6.0 with no RSC; with RSC, this
+ * limit is raised to 562 (= NVSP_RSC_MAX).
+ */
+#define NETVSC_MAX_XFER_PAGE_RANGES NVSP_RSC_MAX
 #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \
 		(offsetof(struct vmtransfer_page_packet_header, ranges) + \
 		(rng_cnt) * sizeof(struct vmtransfer_page_range))
+#define NETVSC_MAX_PKT_SIZE (NETVSC_XFER_HEADER_SIZE(NETVSC_MAX_XFER_PAGE_RANGES) + \
+		sizeof(struct nvsp_message) + (sizeof(u32) * VRSS_SEND_TAB_SIZE))
 
 struct multi_send_data {
 	struct sk_buff *skb; /* skb containing the pkt */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 9d07c9ce4be2..067077138e52 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1650,6 +1650,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 
 	/* Open the channel */
 	device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+	device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
+
 	ret = vmbus_open(device->channel, netvsc_ring_bytes,
 			 netvsc_ring_bytes,  NULL, 0,
 			 netvsc_channel_cb, net_device->chan_table);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index c0e89e107d57..d7ff9ddcbae2 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1260,6 +1260,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 	nvchan->channel = new_sc;
 
 	new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+	new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE;
+
 	ret = vmbus_open(new_sc, netvsc_ring_bytes,
 			 netvsc_ring_bytes, NULL, 0,
 			 netvsc_channel_cb, nvchan);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index e6718a74e5da..07149fa72b68 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -406,6 +406,14 @@ static void storvsc_on_channel_callback(void *context);
 #define STORVSC_IDE_MAX_TARGETS				1
 #define STORVSC_IDE_MAX_CHANNELS			1
 
+/*
+ * Upper bound on the size of a storvsc packet. vmscsi_size_delta is not
+ * included in the calculation because it is set after STORVSC_MAX_PKT_SIZE
+ * is used in storvsc_connect_to_vsp
+ */
+#define STORVSC_MAX_PKT_SIZE (sizeof(struct vmpacket_descriptor) +\
+			      sizeof(struct vstor_packet))
+
 struct storvsc_cmd_request {
 	struct scsi_cmnd *cmd;
 
@@ -701,6 +709,7 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
 		return;
 
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
+	new_sc->max_pkt_size = STORVSC_MAX_PKT_SIZE;
 
 	/*
 	 * The size of vmbus_requestor is an upper bound on the number of requests
@@ -1294,6 +1303,7 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
 
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 
+	device->channel->max_pkt_size = STORVSC_MAX_PKT_SIZE;
 	/*
 	 * The size of vmbus_requestor is an upper bound on the number of requests
 	 * that can be in-progress at any one time across all channels.
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d1e59dbef1dd..3932446f215f 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -181,6 +181,10 @@ struct hv_ring_buffer_info {
 	 * being freed while the ring buffer is being accessed.
 	 */
 	struct mutex ring_buffer_mutex;
+
+	/* Buffer that holds a copy of an incoming host packet */
+	void *pkt_buffer;
+	u32 pkt_buffer_size;
 };
 
 
@@ -799,6 +803,8 @@ struct vmbus_device {
 	bool allowed_in_isolated;
 };
 
+#define VMBUS_DEFAULT_MAX_PKT_SIZE 4096
+
 struct vmbus_channel {
 	struct list_head listentry;
 
@@ -1021,6 +1027,9 @@ struct vmbus_channel {
 	/* request/transaction ids for VMBus */
 	struct vmbus_requestor requestor;
 	u32 rqstor_size;
+
+	/* The max size of a packet on this channel */
+	u32 max_pkt_size;
 };
 
 u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
@@ -1662,15 +1671,44 @@ static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc)
 }
 
 
+struct vmpacket_descriptor *
+hv_pkt_iter_first_raw(struct vmbus_channel *channel);
+
 struct vmpacket_descriptor *
 hv_pkt_iter_first(struct vmbus_channel *channel);
 
 struct vmpacket_descriptor *
 __hv_pkt_iter_next(struct vmbus_channel *channel,
-		   const struct vmpacket_descriptor *pkt);
+		   const struct vmpacket_descriptor *pkt,
+		   bool copy);
 
 void hv_pkt_iter_close(struct vmbus_channel *channel);
 
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next_pkt(struct vmbus_channel *channel,
+		     const struct vmpacket_descriptor *pkt,
+		     bool copy)
+{
+	struct vmpacket_descriptor *nxt;
+
+	nxt = __hv_pkt_iter_next(channel, pkt, copy);
+	if (!nxt)
+		hv_pkt_iter_close(channel);
+
+	return nxt;
+}
+
+/*
+ * Get next packet descriptor without copying it out of the ring buffer
+ * If at end of list, return NULL and update host.
+ */
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next_raw(struct vmbus_channel *channel,
+		     const struct vmpacket_descriptor *pkt)
+{
+	return hv_pkt_iter_next_pkt(channel, pkt, false);
+}
+
 /*
  * Get next packet descriptor from iterator
  * If at end of list, return NULL and update host.
@@ -1679,13 +1717,7 @@ static inline struct vmpacket_descriptor *
 hv_pkt_iter_next(struct vmbus_channel *channel,
 		 const struct vmpacket_descriptor *pkt)
 {
-	struct vmpacket_descriptor *nxt;
-
-	nxt = __hv_pkt_iter_next(channel, pkt);
-	if (!nxt)
-		hv_pkt_iter_close(channel);
-
-	return nxt;
+	return hv_pkt_iter_next_pkt(channel, pkt, true);
 }
 
 #define foreach_vmbus_pkt(pkt, channel) \
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index cc3bae2659e7..19189cf30a72 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -596,7 +596,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 		return -EOPNOTSUPP;
 
 	if (need_refill) {
-		hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+		hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
 		ret = hvs_update_recv_data(hvs);
 		if (ret)
 			return ret;
@@ -610,7 +610,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 
 	hvs->recv_data_len -= to_read;
 	if (hvs->recv_data_len == 0) {
-		hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
+		hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, hvs->recv_desc);
 		if (hvs->recv_desc) {
 			ret = hvs_update_recv_data(hvs);
 			if (ret)

From 30515832e987597eae354f6ffcdb3374bdfde16d Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Fri, 14 May 2021 03:53:48 +0200
Subject: [PATCH 0125/3292] net: bridge: fix build when IPv6 is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The br_ip6_multicast_add_router() prototype is defined only when
CONFIG_IPV6 is enabled, but the function is always referenced, so there
is this build error with CONFIG_IPV6 not defined:

net/bridge/br_multicast.c: In function ‘__br_multicast_enable_port’:
net/bridge/br_multicast.c:1743:3: error: implicit declaration of function ‘br_ip6_multicast_add_router’; did you mean ‘br_ip4_multicast_add_router’? [-Werror=implicit-function-declaration]
 1743 |   br_ip6_multicast_add_router(br, port);
      |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~
      |   br_ip4_multicast_add_router
net/bridge/br_multicast.c: At top level:
net/bridge/br_multicast.c:2804:13: warning: conflicting types for ‘br_ip6_multicast_add_router’
 2804 | static void br_ip6_multicast_add_router(struct net_bridge *br,
      |             ^~~~~~~~~~~~~~~~~~~~~~~~~~~
net/bridge/br_multicast.c:2804:13: error: static declaration of ‘br_ip6_multicast_add_router’ follows non-static declaration
net/bridge/br_multicast.c:1743:3: note: previous implicit declaration of ‘br_ip6_multicast_add_router’ was here
 1743 |   br_ip6_multicast_add_router(br, port);
      |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~

Fix this build error by moving the definition out of the #ifdef.

Fixes: a3c02e769efe ("net: bridge: mcast: split multicast router state for IPv4 and IPv6")
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 0703725527b3..53c3a9d80d9c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -62,9 +62,9 @@ static void br_multicast_port_group_rexmit(struct timer_list *t);
 
 static void
 br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted);
-#if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_add_router(struct net_bridge *br,
 					struct net_bridge_port *port);
+#if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
 					 const struct in6_addr *group,

From bf5fd8cae3c8f0d1e6f71a076e0ce2bd17645d0b Mon Sep 17 00:00:00 2001
From: "Andrea Parri (Microsoft)" <parri.andrea@gmail.com>
Date: Mon, 10 May 2021 23:08:41 +0200
Subject: [PATCH 0126/3292] scsi: storvsc: Use blk_mq_unique_tag() to generate
 requestIDs

Use blk_mq_unique_tag() to generate requestIDs for StorVSC, avoiding
all issues with allocating enough entries in the VMbus requestor.

Suggested-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20210510210841.370472-1-parri.andrea@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/channel.c              | 14 ++---
 drivers/hv/ring_buffer.c          | 13 +++--
 drivers/net/hyperv/netvsc.c       |  8 ++-
 drivers/net/hyperv/rndis_filter.c |  2 +
 drivers/scsi/storvsc_drv.c        | 94 +++++++++++++++++++++----------
 include/linux/hyperv.h            | 13 ++++-
 6 files changed, 95 insertions(+), 49 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index bfbca4eeb773..f3761c73b074 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -1189,15 +1189,14 @@ EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);
  * vmbus_next_request_id - Returns a new request id. It is also
  * the index at which the guest memory address is stored.
  * Uses a spin lock to avoid race conditions.
- * @rqstor: Pointer to the requestor struct
+ * @channel: Pointer to the VMbus channel struct
  * @rqst_add: Guest memory address to be stored in the array
  */
-u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr)
+u64 vmbus_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
 {
+	struct vmbus_requestor *rqstor = &channel->requestor;
 	unsigned long flags;
 	u64 current_id;
-	const struct vmbus_channel *channel =
-		container_of(rqstor, const struct vmbus_channel, requestor);
 
 	/* Check rqstor has been initialized */
 	if (!channel->rqstor_size)
@@ -1231,16 +1230,15 @@ EXPORT_SYMBOL_GPL(vmbus_next_request_id);
 /*
  * vmbus_request_addr - Returns the memory address stored at @trans_id
  * in @rqstor. Uses a spin lock to avoid race conditions.
- * @rqstor: Pointer to the requestor struct
+ * @channel: Pointer to the VMbus channel struct
  * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's
  * next request id.
  */
-u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id)
+u64 vmbus_request_addr(struct vmbus_channel *channel, u64 trans_id)
 {
+	struct vmbus_requestor *rqstor = &channel->requestor;
 	unsigned long flags;
 	u64 req_addr;
-	const struct vmbus_channel *channel =
-		container_of(rqstor, const struct vmbus_channel, requestor);
 
 	/* Check rqstor has been initialized */
 	if (!channel->rqstor_size)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index e621f8d9b436..2aee356840a2 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -312,10 +312,12 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 	 */
 
 	if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) {
-		rqst_id = vmbus_next_request_id(&channel->requestor, requestid);
-		if (rqst_id == VMBUS_RQST_ERROR) {
-			spin_unlock_irqrestore(&outring_info->ring_lock, flags);
-			return -EAGAIN;
+		if (channel->next_request_id_callback != NULL) {
+			rqst_id = channel->next_request_id_callback(channel, requestid);
+			if (rqst_id == VMBUS_RQST_ERROR) {
+				spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+				return -EAGAIN;
+			}
 		}
 	}
 	desc = hv_get_ring_buffer(outring_info) + old_write;
@@ -343,7 +345,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 	if (channel->rescind) {
 		if (rqst_id != VMBUS_NO_RQSTOR) {
 			/* Reclaim request ID to avoid leak of IDs */
-			vmbus_request_addr(&channel->requestor, rqst_id);
+			if (channel->request_addr_callback != NULL)
+				channel->request_addr_callback(channel, rqst_id);
 		}
 		return -ENODEV;
 	}
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 067077138e52..7bd935412853 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -757,7 +757,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
 	int queue_sends;
 	u64 cmd_rqst;
 
-	cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id);
+	cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id);
 	if (cmd_rqst == VMBUS_RQST_ERROR) {
 		netdev_err(ndev, "Incorrect transaction id\n");
 		return;
@@ -817,8 +817,8 @@ static void netvsc_send_completion(struct net_device *ndev,
 
 	/* First check if this is a VMBUS completion without data payload */
 	if (!msglen) {
-		cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
-					      (u64)desc->trans_id);
+		cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
+								   (u64)desc->trans_id);
 		if (cmd_rqst == VMBUS_RQST_ERROR) {
 			netdev_err(ndev, "Invalid transaction id\n");
 			return;
@@ -1649,6 +1649,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 		       netvsc_poll, NAPI_POLL_WEIGHT);
 
 	/* Open the channel */
+	device->channel->next_request_id_callback = vmbus_next_request_id;
+	device->channel->request_addr_callback = vmbus_request_addr;
 	device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
 	device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
 
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index d7ff9ddcbae2..983bf362466a 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1259,6 +1259,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 	/* Set the channel before opening.*/
 	nvchan->channel = new_sc;
 
+	new_sc->next_request_id_callback = vmbus_next_request_id;
+	new_sc->request_addr_callback = vmbus_request_addr;
 	new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
 	new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE;
 
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 07149fa72b68..403753929320 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -696,6 +696,23 @@ old_is_alloced:
 	spin_unlock_irqrestore(&stor_device->lock, flags);
 }
 
+static u64 storvsc_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
+{
+	struct storvsc_cmd_request *request =
+		(struct storvsc_cmd_request *)(unsigned long)rqst_addr;
+
+	if (rqst_addr == VMBUS_RQST_INIT)
+		return VMBUS_RQST_INIT;
+	if (rqst_addr == VMBUS_RQST_RESET)
+		return VMBUS_RQST_RESET;
+
+	/*
+	 * Cannot return an ID of 0, which is reserved for an unsolicited
+	 * message from Hyper-V.
+	 */
+	return (u64)blk_mq_unique_tag(request->cmd->request) + 1;
+}
+
 static void handle_sc_creation(struct vmbus_channel *new_sc)
 {
 	struct hv_device *device = new_sc->primary_channel->device_obj;
@@ -711,11 +728,7 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 	new_sc->max_pkt_size = STORVSC_MAX_PKT_SIZE;
 
-	/*
-	 * The size of vmbus_requestor is an upper bound on the number of requests
-	 * that can be in-progress at any one time across all channels.
-	 */
-	new_sc->rqstor_size = scsi_driver.can_queue;
+	new_sc->next_request_id_callback = storvsc_next_request_id;
 
 	ret = vmbus_open(new_sc,
 			 storvsc_ringbuffer_size,
@@ -782,7 +795,7 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
 	ret = vmbus_sendpacket(device->channel, vstor_packet,
 			       (sizeof(struct vstor_packet) -
 			       stor_device->vmscsi_size_delta),
-			       (unsigned long)request,
+			       VMBUS_RQST_INIT,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
@@ -851,7 +864,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device,
 	ret = vmbus_sendpacket(device->channel, vstor_packet,
 			       (sizeof(struct vstor_packet) -
 			       stor_device->vmscsi_size_delta),
-			       (unsigned long)request,
+			       VMBUS_RQST_INIT,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0)
@@ -1253,6 +1266,7 @@ static void storvsc_on_channel_callback(void *context)
 	const struct vmpacket_descriptor *desc;
 	struct hv_device *device;
 	struct storvsc_device *stor_device;
+	struct Scsi_Host *shost;
 
 	if (channel->primary_channel != NULL)
 		device = channel->primary_channel->device_obj;
@@ -1263,20 +1277,12 @@ static void storvsc_on_channel_callback(void *context)
 	if (!stor_device)
 		return;
 
+	shost = stor_device->host;
+
 	foreach_vmbus_pkt(desc, channel) {
-		void *packet = hv_pkt_data(desc);
-		struct storvsc_cmd_request *request;
-		u64 cmd_rqst;
-
-		cmd_rqst = vmbus_request_addr(&channel->requestor,
-					      desc->trans_id);
-		if (cmd_rqst == VMBUS_RQST_ERROR) {
-			dev_err(&device->device,
-				"Incorrect transaction id\n");
-			continue;
-		}
-
-		request = (struct storvsc_cmd_request *)(unsigned long)cmd_rqst;
+		struct vstor_packet *packet = hv_pkt_data(desc);
+		struct storvsc_cmd_request *request = NULL;
+		u64 rqst_id = desc->trans_id;
 
 		if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) -
 				stor_device->vmscsi_size_delta) {
@@ -1284,14 +1290,44 @@ static void storvsc_on_channel_callback(void *context)
 			continue;
 		}
 
-		if (request == &stor_device->init_request ||
-		    request == &stor_device->reset_request) {
-			memcpy(&request->vstor_packet, packet,
-			       (sizeof(struct vstor_packet) - stor_device->vmscsi_size_delta));
-			complete(&request->wait_event);
+		if (rqst_id == VMBUS_RQST_INIT) {
+			request = &stor_device->init_request;
+		} else if (rqst_id == VMBUS_RQST_RESET) {
+			request = &stor_device->reset_request;
 		} else {
+			/* Hyper-V can send an unsolicited message with ID of 0 */
+			if (rqst_id == 0) {
+				/*
+				 * storvsc_on_receive() looks at the vstor_packet in the message
+				 * from the ring buffer.  If the operation in the vstor_packet is
+				 * COMPLETE_IO, then we call storvsc_on_io_completion(), and
+				 * dereference the guest memory address.  Make sure we don't call
+				 * storvsc_on_io_completion() with a guest memory address that is
+				 * zero if Hyper-V were to construct and send such a bogus packet.
+				 */
+				if (packet->operation == VSTOR_OPERATION_COMPLETE_IO) {
+					dev_err(&device->device, "Invalid packet with ID of 0\n");
+					continue;
+				}
+			} else {
+				struct scsi_cmnd *scmnd;
+
+				/* Transaction 'rqst_id' corresponds to tag 'rqst_id - 1' */
+				scmnd = scsi_host_find_tag(shost, rqst_id - 1);
+				if (scmnd == NULL) {
+					dev_err(&device->device, "Incorrect transaction ID\n");
+					continue;
+				}
+				request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd);
+			}
+
 			storvsc_on_receive(stor_device, packet, request);
+			continue;
 		}
+
+		memcpy(&request->vstor_packet, packet,
+		       (sizeof(struct vstor_packet) - stor_device->vmscsi_size_delta));
+		complete(&request->wait_event);
 	}
 }
 
@@ -1304,11 +1340,7 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 
 	device->channel->max_pkt_size = STORVSC_MAX_PKT_SIZE;
-	/*
-	 * The size of vmbus_requestor is an upper bound on the number of requests
-	 * that can be in-progress at any one time across all channels.
-	 */
-	device->channel->rqstor_size = scsi_driver.can_queue;
+	device->channel->next_request_id_callback = storvsc_next_request_id;
 
 	ret = vmbus_open(device->channel,
 			 ring_size,
@@ -1634,7 +1666,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
 	ret = vmbus_sendpacket(device->channel, vstor_packet,
 			       (sizeof(struct vstor_packet) -
 				stor_device->vmscsi_size_delta),
-			       (unsigned long)&stor_device->reset_request,
+			       VMBUS_RQST_RESET,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3932446f215f..2e859d2f9609 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -794,7 +794,11 @@ struct vmbus_requestor {
 
 #define VMBUS_NO_RQSTOR U64_MAX
 #define VMBUS_RQST_ERROR (U64_MAX - 1)
+/* NetVSC-specific */
 #define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2)
+/* StorVSC-specific */
+#define VMBUS_RQST_INIT (U64_MAX - 2)
+#define VMBUS_RQST_RESET (U64_MAX - 3)
 
 struct vmbus_device {
 	u16  dev_type;
@@ -1024,6 +1028,11 @@ struct vmbus_channel {
 	u32 fuzz_testing_interrupt_delay;
 	u32 fuzz_testing_message_delay;
 
+	/* callback to generate a request ID from a request address */
+	u64 (*next_request_id_callback)(struct vmbus_channel *channel, u64 rqst_addr);
+	/* callback to retrieve a request address from a request ID */
+	u64 (*request_addr_callback)(struct vmbus_channel *channel, u64 rqst_id);
+
 	/* request/transaction ids for VMBus */
 	struct vmbus_requestor requestor;
 	u32 rqstor_size;
@@ -1032,8 +1041,8 @@ struct vmbus_channel {
 	u32 max_pkt_size;
 };
 
-u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
-u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id);
+u64 vmbus_next_request_id(struct vmbus_channel *channel, u64 rqst_addr);
+u64 vmbus_request_addr(struct vmbus_channel *channel, u64 trans_id);
 
 static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 {

From 869cbeef18e5c4370157e733b947d44f37441ea9 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Wed, 12 May 2021 16:32:10 +0200
Subject: [PATCH 0127/3292] lsm_audit,selinux: pass IB device name by reference

While trying to address a Coverity warning that the dev_name string
might end up unterminated when strcpy'ing it in
selinux_ib_endport_manage_subnet(), I realized that it is possible (and
simpler) to just pass the dev_name pointer directly, rather than copying
the string to a buffer.

The ibendport variable goes out of scope at the end of the function
anyway, so the lifetime of the dev_name pointer will never be shorter
than that of ibendport, thus we can safely just pass the dev_name
pointer and be done with it.

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_audit.h | 8 ++++----
 security/selinux/hooks.c  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index cd23355d2271..17d02eda9538 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -48,13 +48,13 @@ struct lsm_ioctlop_audit {
 };
 
 struct lsm_ibpkey_audit {
-	u64	subnet_prefix;
-	u16	pkey;
+	u64 subnet_prefix;
+	u16 pkey;
 };
 
 struct lsm_ibendport_audit {
-	char	dev_name[IB_DEVICE_NAME_MAX];
-	u8	port;
+	const char *dev_name;
+	u8 port;
 };
 
 /* Auxiliary data to use in generating the audit record. */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index eaea837d89d1..fc6a3ab7e179 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6850,7 +6850,7 @@ static int selinux_ib_endport_manage_subnet(void *ib_sec, const char *dev_name,
 		return err;
 
 	ad.type = LSM_AUDIT_DATA_IBENDPORT;
-	strncpy(ibendport.dev_name, dev_name, sizeof(ibendport.dev_name));
+	ibendport.dev_name = dev_name;
 	ibendport.port = port_num;
 	ad.u.ibendport = &ibendport;
 	return avc_has_perm(&selinux_state,

From fa44821a4ddda0101f64908abc56dc25b905475d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 13 May 2021 23:29:12 +0200
Subject: [PATCH 0128/3292] sfc: don't use netif_info et al before net_device
 is registered

Using netif_info() before the net_device is registered results in ugly
messages like the following:
sfc 0000:01:00.1 (unnamed net_device) (uninitialized): Solarflare NIC detected
Therefore use pci_info() et al until net_device is registered.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index c746ca7235f1..4fd9903ffe98 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -722,8 +722,7 @@ static int efx_register_netdev(struct efx_nic *efx)
 	efx->state = STATE_READY;
 	smp_mb(); /* ensure we change state before checking reset_pending */
 	if (efx->reset_pending) {
-		netif_err(efx, probe, efx->net_dev,
-			  "aborting probe due to scheduled reset\n");
+		pci_err(efx->pci_dev, "aborting probe due to scheduled reset\n");
 		rc = -EIO;
 		goto fail_locked;
 	}
@@ -990,8 +989,7 @@ static int efx_pci_probe_main(struct efx_nic *efx)
 	rc = efx->type->init(efx);
 	up_write(&efx->filter_sem);
 	if (rc) {
-		netif_err(efx, probe, efx->net_dev,
-			  "failed to initialise NIC\n");
+		pci_err(efx->pci_dev, "failed to initialise NIC\n");
 		goto fail3;
 	}
 
@@ -1038,8 +1036,8 @@ static int efx_pci_probe_post_io(struct efx_nic *efx)
 	if (efx->type->sriov_init) {
 		rc = efx->type->sriov_init(efx);
 		if (rc)
-			netif_err(efx, probe, efx->net_dev,
-				  "SR-IOV can't be enabled rc %d\n", rc);
+			pci_err(efx->pci_dev, "SR-IOV can't be enabled rc %d\n",
+				rc);
 	}
 
 	/* Determine netdevice features */
@@ -1106,8 +1104,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 	if (rc)
 		goto fail1;
 
-	netif_info(efx, probe, efx->net_dev,
-		   "Solarflare NIC detected\n");
+	pci_info(pci_dev, "Solarflare NIC detected\n");
 
 	if (!efx->type->is_vf)
 		efx_probe_vpd_strings(efx);

From fc25f9f631acad4f4d0089fc400f2943c989522c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 13 May 2021 16:04:18 -0700
Subject: [PATCH 0129/3292] net: thunderx: Drop unnecessary NULL check after
 container_of

The result of container_of() operations is never NULL unless the embedded
element is the first element of the structure. This is not the case here.
The NULL check is therefore unnecessary and misleading. Remove it.

This change was made automatically with the following Coccinelle script.

@@
type t;
identifier v;
statement s;
@@

<+...
(
  t v = container_of(...);
|
  v = container_of(...);
)
  ...
  when != v
- if (\( !v \| v == NULL \) ) s
...+>

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 0c783aadf393..c36fed9c3d73 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -594,9 +594,6 @@ static void bgx_lmac_handler(struct net_device *netdev)
 	struct phy_device *phydev;
 	int link_changed = 0;
 
-	if (!lmac)
-		return;
-
 	phydev = lmac->phydev;
 
 	if (!phydev->link && lmac->last_link)

From 796640778c26f3d99fde173bb7b1d726b5f0d816 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Fri, 14 May 2021 11:25:09 +0800
Subject: [PATCH 0130/3292] net: hns3: support RXD advanced layout

Currently, the driver gets packet type by parsing the
L3_ID/L4_ID/OL3_ID/OL4_ID from RX descriptor, it's
time-consuming.

Now some new devices support RXD advanced layout, which combines
previous OL3_ID/OL4_ID to 8bit ptype field, so the driver gets
packet type by looking up only one table, and L3_ID/L4_ID become
reserved fields.

Considering compatibility, the firmware will report capability of
RXD advanced layout, the driver will identify and enable it by
default. This patch provides basic function: identify and enable
the RXD advanced layout, and refactor out hns3_rx_checksum() by
using ptype table to handle RX checksum if supported.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   4 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   3 +
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 358 ++++++++++++++++--
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  12 +
 .../hisilicon/hns3/hns3pf/hclge_cmd.c         |   2 +
 .../hisilicon/hns3/hns3pf/hclge_cmd.h         |   1 +
 .../hisilicon/hns3/hns3pf/hclge_main.c        |  17 +
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   1 +
 .../hisilicon/hns3/hns3vf/hclgevf_cmd.c       |   2 +
 .../hisilicon/hns3/hns3vf/hclgevf_cmd.h       |   1 +
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      |  17 +
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |   1 +
 12 files changed, 393 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 1d2189047781..1d265c34700f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -91,6 +91,7 @@ enum HNAE3_DEV_CAP_BITS {
 	HNAE3_DEV_SUPPORT_STASH_B,
 	HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
 	HNAE3_DEV_SUPPORT_PAUSE_B,
+	HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
 };
 
 #define hnae3_dev_fd_supported(hdev) \
@@ -141,6 +142,9 @@ enum HNAE3_DEV_CAP_BITS {
 #define hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev) \
 	test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (ae_dev)->caps)
 
+#define hnae3_ae_dev_rxd_adv_layout_supported(ae_dev) \
+	test_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, (ae_dev)->caps)
+
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
 #define ring_ptr_move_bw(ring, p) \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 9d702bd0c7c1..e58a2c1f7c1c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -367,6 +367,9 @@ static void hns3_dbg_dev_caps(struct hnae3_handle *h)
 		 "yes" : "no");
 	dev_info(&h->pdev->dev, "support imp-controlled PHY: %s\n",
 		 test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, caps) ? "yes" : "no");
+	dev_info(&h->pdev->dev, "support rxd advanced layout: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, caps) ?
+		 "yes" : "no");
 }
 
 static void hns3_dbg_dev_specs(struct hnae3_handle *h)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 783fdaf8f8d6..712a6db8bf79 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -91,6 +91,278 @@ static const struct pci_device_id hns3_pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, hns3_pci_tbl);
 
+#define HNS3_RX_PTYPE_ENTRY(ptype, l, s, t) \
+	{	ptype, \
+		l, \
+		CHECKSUM_##s, \
+		HNS3_L3_TYPE_##t, \
+		1 }
+
+#define HNS3_RX_PTYPE_UNUSED_ENTRY(ptype) \
+		{ ptype, 0, CHECKSUM_NONE, HNS3_L3_TYPE_PARSE_FAIL, 0 }
+
+static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = {
+	HNS3_RX_PTYPE_UNUSED_ENTRY(0),
+	HNS3_RX_PTYPE_ENTRY(1, 0, COMPLETE, ARP),
+	HNS3_RX_PTYPE_ENTRY(2, 0, COMPLETE, RARP),
+	HNS3_RX_PTYPE_ENTRY(3, 0, COMPLETE, LLDP),
+	HNS3_RX_PTYPE_ENTRY(4, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(5, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(6, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(7, 0, COMPLETE, CNM),
+	HNS3_RX_PTYPE_ENTRY(8, 0, NONE, PARSE_FAIL),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(9),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(10),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(11),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(12),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(13),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(14),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(15),
+	HNS3_RX_PTYPE_ENTRY(16, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(17, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(18, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(19, 0, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(20, 0, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(21, 0, NONE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(22, 0, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(23, 0, NONE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(24, 0, NONE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(25, 0, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(26),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(27),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(28),
+	HNS3_RX_PTYPE_ENTRY(29, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(30, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(31, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(32, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(33, 1, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(34, 1, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(35, 1, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(36, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(37, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(38),
+	HNS3_RX_PTYPE_ENTRY(39, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(40, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(41, 1, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(42, 1, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(43, 1, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(44, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(45, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(46),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(47),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(48),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(49),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(50),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(51),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(52),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(53),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(54),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(55),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(56),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(57),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(58),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(59),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(60),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(61),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(62),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(63),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(64),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(65),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(66),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(67),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(68),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(69),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(70),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(71),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(72),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(73),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(74),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(75),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(76),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(77),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(78),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(79),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(80),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(81),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(82),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(83),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(84),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(85),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(86),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(87),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(88),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(89),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(90),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(91),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(92),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(93),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(94),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(95),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(96),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(97),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(98),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(99),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(100),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(101),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(102),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(103),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(104),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(105),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(106),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(107),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(108),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(109),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(110),
+	HNS3_RX_PTYPE_ENTRY(111, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(112, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(113, 0, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(114, 0, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(115, 0, NONE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(116, 0, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(117, 0, NONE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(118, 0, NONE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(119, 0, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(120),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(121),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(122),
+	HNS3_RX_PTYPE_ENTRY(123, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(124, 0, COMPLETE, PARSE_FAIL),
+	HNS3_RX_PTYPE_ENTRY(125, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(126, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(127, 1, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(128, 1, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(129, 1, UNNECESSARY, IPV4),
+	HNS3_RX_PTYPE_ENTRY(130, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_ENTRY(131, 0, COMPLETE, IPV4),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(132),
+	HNS3_RX_PTYPE_ENTRY(133, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(134, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(135, 1, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(136, 1, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(137, 1, UNNECESSARY, IPV6),
+	HNS3_RX_PTYPE_ENTRY(138, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_ENTRY(139, 0, COMPLETE, IPV6),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(140),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(141),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(142),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(143),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(144),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(145),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(146),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(147),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(148),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(149),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(150),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(151),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(152),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(153),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(154),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(155),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(156),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(157),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(158),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(159),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(160),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(161),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(162),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(163),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(164),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(165),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(166),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(167),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(168),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(169),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(170),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(171),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(172),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(173),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(174),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(175),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(176),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(177),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(178),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(179),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(180),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(181),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(182),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(183),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(184),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(185),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(186),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(187),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(188),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(189),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(190),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(191),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(192),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(193),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(194),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(195),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(196),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(197),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(198),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(199),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(200),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(201),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(202),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(203),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(204),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(205),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(206),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(207),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(208),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(209),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(210),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(211),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(212),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(213),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(214),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(215),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(216),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(217),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(218),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(219),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(220),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(221),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(222),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(223),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(224),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(225),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(226),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(227),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(228),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(229),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(230),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(231),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(232),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(233),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(234),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(235),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(236),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(237),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(238),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(239),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(240),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(241),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(242),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(243),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(244),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(245),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(246),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(247),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(248),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(249),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(250),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(251),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(252),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(253),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(254),
+	HNS3_RX_PTYPE_UNUSED_ENTRY(255),
+};
+
+#define HNS3_INVALID_PTYPE \
+		ARRAY_SIZE(hns3_rx_ptype_tbl)
+
 static irqreturn_t hns3_irq_handle(int irq, void *vector)
 {
 	struct hns3_enet_tqp_vector *tqp_vector = vector;
@@ -2996,35 +3268,15 @@ static void hns3_checksum_complete(struct hns3_enet_ring *ring,
 	skb->csum = csum_unfold((__force __sum16)(lo | hi << 8));
 }
 
-static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
-			     u32 l234info, u32 bd_base_info, u32 ol_info)
+static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info,
+				u32 ol_info, u32 ptype)
 {
-	struct net_device *netdev = ring_to_netdev(ring);
 	int l3_type, l4_type;
 	int ol4_type;
 
-	skb->ip_summed = CHECKSUM_NONE;
-
-	skb_checksum_none_assert(skb);
-
-	if (!(netdev->features & NETIF_F_RXCSUM))
-		return;
-
-	if (l234info & BIT(HNS3_RXD_L2_CSUM_B)) {
-		hns3_checksum_complete(ring, skb, l234info);
-		return;
-	}
-
-	/* check if hardware has done checksum */
-	if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
-		return;
-
-	if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) |
-				 BIT(HNS3_RXD_OL3E_B) |
-				 BIT(HNS3_RXD_OL4E_B)))) {
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.l3l4_csum_err++;
-		u64_stats_update_end(&ring->syncp);
+	if (ptype != HNS3_INVALID_PTYPE) {
+		skb->csum_level = hns3_rx_ptype_tbl[ptype].csum_level;
+		skb->ip_summed = hns3_rx_ptype_tbl[ptype].ip_summed;
 
 		return;
 	}
@@ -3054,6 +3306,46 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
 	}
 }
 
+static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
+			     u32 l234info, u32 bd_base_info, u32 ol_info)
+{
+	struct net_device *netdev = ring_to_netdev(ring);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	u32 ptype = HNS3_INVALID_PTYPE;
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_checksum_none_assert(skb);
+
+	if (!(netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state))
+		ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M,
+					HNS3_RXD_PTYPE_S);
+
+	if (l234info & BIT(HNS3_RXD_L2_CSUM_B)) {
+		hns3_checksum_complete(ring, skb, l234info);
+		return;
+	}
+
+	/* check if hardware has done checksum */
+	if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
+		return;
+
+	if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) |
+				 BIT(HNS3_RXD_OL3E_B) |
+				 BIT(HNS3_RXD_OL4E_B)))) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.l3l4_csum_err++;
+		u64_stats_update_end(&ring->syncp);
+
+		return;
+	}
+
+	hns3_rx_handle_csum(skb, l234info, ol_info, ptype);
+}
+
 static void hns3_rx_skb(struct hns3_enet_ring *ring, struct sk_buff *skb)
 {
 	if (skb_has_frag_list(skb))
@@ -3237,6 +3529,8 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
 				     struct sk_buff *skb, u32 l234info,
 				     u32 bd_base_info, u32 ol_info)
 {
+	struct net_device *netdev = ring_to_netdev(ring);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	u32 l3_type;
 
 	skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info,
@@ -3252,7 +3546,16 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
 						  HNS3_RXD_GRO_COUNT_M,
 						  HNS3_RXD_GRO_COUNT_S);
 
-	l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S);
+	if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) {
+		u32 ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M,
+					    HNS3_RXD_PTYPE_S);
+
+		l3_type = hns3_rx_ptype_tbl[ptype].l3_type;
+	} else {
+		l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M,
+					  HNS3_RXD_L3ID_S);
+	}
+
 	if (l3_type == HNS3_L3_TYPE_IPV4)
 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 	else if (l3_type == HNS3_L3_TYPE_IPV6)
@@ -4350,6 +4653,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps))
 		set_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state);
 
+	if (hnae3_ae_dev_rxd_adv_layout_supported(ae_dev))
+		set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
+
 	set_bit(HNS3_NIC_STATE_INITED, &priv->state);
 
 	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index daa04aeb0942..843642b9884b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -19,6 +19,7 @@ enum hns3_nic_state {
 	HNS3_NIC_STATE_SERVICE_SCHED,
 	HNS3_NIC_STATE2_RESET_REQUESTED,
 	HNS3_NIC_STATE_HW_TX_CSUM_ENABLE,
+	HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE,
 	HNS3_NIC_STATE_MAX
 };
 
@@ -114,6 +115,9 @@ enum hns3_nic_state {
 #define HNS3_RXD_FBLI_S				14
 #define HNS3_RXD_FBLI_M				(0x3 << HNS3_RXD_FBLI_S)
 
+#define HNS3_RXD_PTYPE_S			4
+#define HNS3_RXD_PTYPE_M			GENMASK(11, 4)
+
 #define HNS3_RXD_BDTYPE_S			0
 #define HNS3_RXD_BDTYPE_M			(0xf << HNS3_RXD_BDTYPE_S)
 #define HNS3_RXD_VLD_B				4
@@ -366,6 +370,14 @@ enum hns3_pkt_ol4type {
 	HNS3_OL4_TYPE_UNKNOWN
 };
 
+struct hns3_rx_ptype {
+	u32 ptype:8;
+	u32 csum_level:2;
+	u32 ip_summed:2;
+	u32 l3_type:4;
+	u32 valid:1;
+};
+
 struct ring_stats {
 	u64 sw_err_cnt;
 	u64 seg_pkt_cnt;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 76a482456f1f..0df9ca311e6c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -386,6 +386,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
 		set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_RXD_ADV_LAYOUT_B))
+		set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
 }
 
 static __le32 hclge_build_api_caps(void)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index c6fc22e29581..c6cd273363d5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -391,6 +391,7 @@ enum HCLGE_CAP_BITS {
 	HCLGE_CAP_UDP_TUNNEL_CSUM_B,
 	HCLGE_CAP_FEC_B = 13,
 	HCLGE_CAP_PAUSE_B = 14,
+	HCLGE_CAP_RXD_ADV_LAYOUT_B = 15,
 };
 
 enum HCLGE_API_CAP_BITS {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6304aed49f22..55b0453d3948 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11167,6 +11167,18 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev)
 	}
 }
 
+static void hclge_init_rxd_adv_layout(struct hclge_dev *hdev)
+{
+	if (hnae3_ae_dev_rxd_adv_layout_supported(hdev->ae_dev))
+		hclge_write_dev(&hdev->hw, HCLGE_RXD_ADV_LAYOUT_EN_REG, 1);
+}
+
+static void hclge_uninit_rxd_adv_layout(struct hclge_dev *hdev)
+{
+	if (hnae3_ae_dev_rxd_adv_layout_supported(hdev->ae_dev))
+		hclge_write_dev(&hdev->hw, HCLGE_RXD_ADV_LAYOUT_EN_REG, 0);
+}
+
 static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
@@ -11339,6 +11351,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
 	}
 
+	hclge_init_rxd_adv_layout(hdev);
+
 	/* Enable MISC vector(vector0) */
 	hclge_enable_vector(&hdev->misc_vector, true);
 
@@ -11720,6 +11734,8 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 	if (ret)
 		return ret;
 
+	hclge_init_rxd_adv_layout(hdev);
+
 	dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n",
 		 HCLGE_DRIVER_NAME);
 
@@ -11735,6 +11751,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_clear_vf_vlan(hdev);
 	hclge_misc_affinity_teardown(hdev);
 	hclge_state_uninit(hdev);
+	hclge_uninit_rxd_adv_layout(hdev);
 	hclge_uninit_mac_table(hdev);
 	hclge_del_all_fd_entries(hdev);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index ff1d47308c2d..10f5c11e6b66 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -53,6 +53,7 @@
 /* bar registers for common func */
 #define HCLGE_VECTOR0_OTER_EN_REG	0x20600
 #define HCLGE_GRO_EN_REG		0x28000
+#define HCLGE_RXD_ADV_LAYOUT_EN_REG	0x28008
 
 /* bar registers for rcb */
 #define HCLGE_RING_RX_ADDR_L_REG	0x80000
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index d8c5c5810b99..bd19a2d89f6c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -359,6 +359,8 @@ static void hclgevf_parse_capability(struct hclgevf_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_TUNNEL_CSUM_B))
 		set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGEVF_CAP_RXD_ADV_LAYOUT_B))
+		set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
 }
 
 static __le32 hclgevf_build_api_caps(void)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index c6dc11b32aa7..202feb70dba5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -159,6 +159,7 @@ enum HCLGEVF_CAP_BITS {
 	HCLGEVF_CAP_HW_PAD_B,
 	HCLGEVF_CAP_STASH_B,
 	HCLGEVF_CAP_UDP_TUNNEL_CSUM_B,
+	HCLGEVF_CAP_RXD_ADV_LAYOUT_B = 15,
 };
 
 enum HCLGEVF_API_CAP_BITS {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 0db51ef15ef6..7bef6b24e610 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -3242,6 +3242,18 @@ static int hclgevf_clear_vport_list(struct hclgevf_dev *hdev)
 	return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
 }
 
+static void hclgevf_init_rxd_adv_layout(struct hclgevf_dev *hdev)
+{
+	if (hnae3_ae_dev_rxd_adv_layout_supported(hdev->ae_dev))
+		hclgevf_write_dev(&hdev->hw, HCLGEVF_RXD_ADV_LAYOUT_EN_REG, 1);
+}
+
+static void hclgevf_uninit_rxd_adv_layout(struct hclgevf_dev *hdev)
+{
+	if (hnae3_ae_dev_rxd_adv_layout_supported(hdev->ae_dev))
+		hclgevf_write_dev(&hdev->hw, HCLGEVF_RXD_ADV_LAYOUT_EN_REG, 0);
+}
+
 static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
@@ -3279,6 +3291,8 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
 
 	set_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state);
 
+	hclgevf_init_rxd_adv_layout(hdev);
+
 	dev_info(&hdev->pdev->dev, "Reset done\n");
 
 	return 0;
@@ -3379,6 +3393,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 		goto err_config;
 	}
 
+	hclgevf_init_rxd_adv_layout(hdev);
+
 	hdev->last_reset_time = jiffies;
 	dev_info(&hdev->pdev->dev, "finished initializing %s driver\n",
 		 HCLGEVF_DRIVER_NAME);
@@ -3405,6 +3421,7 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 	struct hclge_vf_to_pf_msg send_msg;
 
 	hclgevf_state_uninit(hdev);
+	hclgevf_uninit_rxd_adv_layout(hdev);
 
 	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_VF_UNINIT, 0);
 	hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 265c9b0b4728..b146d04526de 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -47,6 +47,7 @@
 
 /* bar registers for common func */
 #define HCLGEVF_GRO_EN_REG			0x28000
+#define HCLGEVF_RXD_ADV_LAYOUT_EN_REG		0x28008
 
 /* bar registers for rcb */
 #define HCLGEVF_RING_RX_ADDR_L_REG		0x80000

From 1ddc028ac84988b6b1c9ceb9d15acbf321735ca3 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Fri, 14 May 2021 11:25:10 +0800
Subject: [PATCH 0131/3292] net: hns3: refactor out RX completion checksum

Only when RXD advanced layout is enabled, in some cases
(e.g. ip fragments), the checksum of entire packet will be
calculated and filled in the least significant 16 bits of
the unused addr field.

So refactor out the handling of RX completion checksum: adjust
the location of the checksum in RX descriptor, and use ptype table
to identify whether this kind of checksum is calculated.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 10 ------
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 32 ++++++++++---------
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   | 12 +++----
 3 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index e58a2c1f7c1c..e405fef81d15 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -260,16 +260,6 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 	dev_info(dev, "(RX)addr: %pad\n", &addr);
 	dev_info(dev, "(RX)l234_info: %u\n", l234info);
 
-	if (l234info & BIT(HNS3_RXD_L2_CSUM_B)) {
-		u32 lo, hi;
-
-		lo = hnae3_get_field(l234info, HNS3_RXD_L2_CSUM_L_M,
-				     HNS3_RXD_L2_CSUM_L_S);
-		hi = hnae3_get_field(l234info, HNS3_RXD_L2_CSUM_H_M,
-				     HNS3_RXD_L2_CSUM_H_S);
-		dev_info(dev, "(RX)csum: %u\n", lo | hi << 8);
-	}
-
 	dev_info(dev, "(RX)pkt_len: %u\n", le16_to_cpu(rx_desc->rx.pkt_len));
 	dev_info(dev, "(RX)size: %u\n", le16_to_cpu(rx_desc->rx.size));
 	dev_info(dev, "(RX)rss_hash: %u\n", le32_to_cpu(rx_desc->rx.rss_hash));
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 712a6db8bf79..5826d865eeed 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3252,20 +3252,20 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
 	return 0;
 }
 
-static void hns3_checksum_complete(struct hns3_enet_ring *ring,
-				   struct sk_buff *skb, u32 l234info)
+static bool hns3_checksum_complete(struct hns3_enet_ring *ring,
+				   struct sk_buff *skb, u32 ptype, u16 csum)
 {
-	u32 lo, hi;
+	if (ptype == HNS3_INVALID_PTYPE ||
+	    hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE)
+		return false;
 
 	u64_stats_update_begin(&ring->syncp);
 	ring->stats.csum_complete++;
 	u64_stats_update_end(&ring->syncp);
 	skb->ip_summed = CHECKSUM_COMPLETE;
-	lo = hnae3_get_field(l234info, HNS3_RXD_L2_CSUM_L_M,
-			     HNS3_RXD_L2_CSUM_L_S);
-	hi = hnae3_get_field(l234info, HNS3_RXD_L2_CSUM_H_M,
-			     HNS3_RXD_L2_CSUM_H_S);
-	skb->csum = csum_unfold((__force __sum16)(lo | hi << 8));
+	skb->csum = csum_unfold((__force __sum16)csum);
+
+	return true;
 }
 
 static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info,
@@ -3307,7 +3307,8 @@ static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info,
 }
 
 static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
-			     u32 l234info, u32 bd_base_info, u32 ol_info)
+			     u32 l234info, u32 bd_base_info, u32 ol_info,
+			     u16 csum)
 {
 	struct net_device *netdev = ring_to_netdev(ring);
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -3324,10 +3325,8 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
 		ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M,
 					HNS3_RXD_PTYPE_S);
 
-	if (l234info & BIT(HNS3_RXD_L2_CSUM_B)) {
-		hns3_checksum_complete(ring, skb, l234info);
+	if (hns3_checksum_complete(ring, skb, ptype, csum))
 		return;
-	}
 
 	/* check if hardware has done checksum */
 	if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
@@ -3527,7 +3526,7 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
 
 static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
 				     struct sk_buff *skb, u32 l234info,
-				     u32 bd_base_info, u32 ol_info)
+				     u32 bd_base_info, u32 ol_info, u16 csum)
 {
 	struct net_device *netdev = ring_to_netdev(ring);
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -3538,7 +3537,8 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring,
 						    HNS3_RXD_GRO_SIZE_S);
 	/* if there is no HW GRO, do not set gro params */
 	if (!skb_shinfo(skb)->gso_size) {
-		hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info);
+		hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info,
+				 csum);
 		return 0;
 	}
 
@@ -3588,6 +3588,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 	struct hns3_desc *desc;
 	unsigned int len;
 	int pre_ntc, ret;
+	u16 csum;
 
 	/* bdinfo handled below is only valid on the last BD of the
 	 * current packet, and ring->next_to_clean indicates the first
@@ -3599,6 +3600,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
 	l234info = le32_to_cpu(desc->rx.l234_info);
 	ol_info = le32_to_cpu(desc->rx.ol_info);
+	csum = le16_to_cpu(desc->csum);
 
 	/* Based on hw strategy, the tag offloaded will be stored at
 	 * ot_vlan_tag in two layer tag case, and stored at vlan_tag
@@ -3631,7 +3633,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 
 	/* This is needed in order to enable forwarding support */
 	ret = hns3_set_gro_and_checksum(ring, skb, l234info,
-					bd_base_info, ol_info);
+					bd_base_info, ol_info, csum);
 	if (unlikely(ret)) {
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.rx_err_cnt++;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 843642b9884b..c9aebda26f6a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -83,12 +83,6 @@ enum hns3_nic_state {
 #define HNS3_RXD_STRP_TAGP_S			13
 #define HNS3_RXD_STRP_TAGP_M			(0x3 << HNS3_RXD_STRP_TAGP_S)
 
-#define HNS3_RXD_L2_CSUM_B			15
-#define HNS3_RXD_L2_CSUM_L_S			4
-#define HNS3_RXD_L2_CSUM_L_M			(0xff << HNS3_RXD_L2_CSUM_L_S)
-#define HNS3_RXD_L2_CSUM_H_S			24
-#define HNS3_RXD_L2_CSUM_H_M			(0xff << HNS3_RXD_L2_CSUM_H_S)
-
 #define HNS3_RXD_L2E_B				16
 #define HNS3_RXD_L3E_B				17
 #define HNS3_RXD_L4E_B				18
@@ -242,7 +236,10 @@ enum hns3_pkt_tun_type {
 
 /* hardware spec ring buffer format */
 struct __packed hns3_desc {
-	__le64 addr;
+	union {
+		__le64 addr;
+		__le16 csum;
+	};
 	union {
 		struct {
 			__le16 vlan_tag;
@@ -409,6 +406,7 @@ struct ring_stats {
 			u64 rx_multicast;
 			u64 non_reuse_pg;
 		};
+		__le16 csum;
 	};
 };
 

From 5e69ea7ee2a69f68c4172afcb0cbe29e7162fb6e Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Fri, 14 May 2021 11:25:11 +0800
Subject: [PATCH 0132/3292] net: hns3: refactor the debugfs process

Currently, each debugfs command needs to create a file to get
the information. To better support more debugfs commands, the
debugfs process is reconstructed, including the process of
creating dentries and files, and obtaining information.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  13 +-
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 193 +++++++++++++++---
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |  29 +++
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   |   7 +-
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |   2 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     |  32 ++-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |   5 +
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   2 +-
 8 files changed, 236 insertions(+), 47 deletions(-)
 create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 1d265c34700f..eee9639802a3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -250,6 +250,13 @@ enum hnae3_port_base_vlan_state {
 	HNAE3_PORT_BASE_VLAN_NOCHANGE,
 };
 
+enum hnae3_dbg_cmd {
+	HNAE3_DBG_CMD_TM_NODES,
+	HNAE3_DBG_CMD_TM_PRI,
+	HNAE3_DBG_CMD_TM_QSET,
+	HNAE3_DBG_CMD_UNKNOWN,
+};
+
 struct hnae3_vector_info {
 	u8 __iomem *io_addr;
 	int vector;
@@ -627,7 +634,7 @@ struct hnae3_ae_ops {
 	int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id,
 			      u16 flow_id, struct flow_keys *fkeys);
 	int (*dbg_run_cmd)(struct hnae3_handle *handle, const char *cmd_buf);
-	int (*dbg_read_cmd)(struct hnae3_handle *handle, const char *cmd_buf,
+	int (*dbg_read_cmd)(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
 			    char *buf, int len);
 	pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev);
 	bool (*get_hw_reset_stat)(struct hnae3_handle *handle);
@@ -790,10 +797,6 @@ struct hnae3_handle {
 #define hnae3_get_bit(origin, shift) \
 	hnae3_get_field(origin, 0x1 << (shift), shift)
 
-#define HNAE3_DBG_TM_NODES		"tm_nodes"
-#define HNAE3_DBG_TM_PRI		"tm_priority"
-#define HNAE3_DBG_TM_QSET		"tm_qset"
-
 int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
 void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index e405fef81d15..62a05955fda2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -5,13 +5,48 @@
 #include <linux/device.h>
 
 #include "hnae3.h"
+#include "hns3_debugfs.h"
 #include "hns3_enet.h"
 
-#define HNS3_DBG_READ_LEN 65536
-#define HNS3_DBG_WRITE_LEN 1024
-
 static struct dentry *hns3_dbgfs_root;
 
+static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
+	{
+		.name = "tm"
+	},
+	/* keep common at the bottom and add new directory above */
+	{
+		.name = "common"
+	},
+};
+
+static int hns3_dbg_common_file_init(struct hnae3_handle *handle,
+				     unsigned int cmd);
+
+static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
+	{
+		.name = "tm_nodes",
+		.cmd = HNAE3_DBG_CMD_TM_NODES,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "tm_priority",
+		.cmd = HNAE3_DBG_CMD_TM_PRI,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "tm_qset",
+		.cmd = HNAE3_DBG_CMD_TM_QSET,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+};
+
 static int hns3_dbg_queue_info(struct hnae3_handle *h,
 			       const char *cmd_buf)
 {
@@ -493,37 +528,90 @@ static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer,
 	return count;
 }
 
+static int hns3_dbg_get_cmd_index(struct hnae3_handle *handle,
+				  const unsigned char *name, u32 *index)
+{
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) {
+		if (!strncmp(name, hns3_dbg_cmd[i].name,
+			     strlen(hns3_dbg_cmd[i].name))) {
+			*index = i;
+			return 0;
+		}
+	}
+
+	dev_err(&handle->pdev->dev, "unknown command(%s)\n", name);
+	return -EINVAL;
+}
+
+static int hns3_dbg_read_cmd(struct hnae3_handle *handle,
+			     enum hnae3_dbg_cmd cmd, char *buf, int len)
+{
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+	if (!ops->dbg_read_cmd)
+		return -EOPNOTSUPP;
+
+	return ops->dbg_read_cmd(handle, cmd, buf, len);
+}
+
 static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
 			     size_t count, loff_t *ppos)
 {
 	struct hnae3_handle *handle = filp->private_data;
-	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
 	struct hns3_nic_priv *priv = handle->priv;
-	char *cmd_buf, *read_buf;
 	ssize_t size = 0;
-	int ret = 0;
+	char **save_buf;
+	char *read_buf;
+	u32 index;
+	int ret;
 
-	read_buf = kzalloc(HNS3_DBG_READ_LEN, GFP_KERNEL);
-	if (!read_buf)
-		return -ENOMEM;
+	ret = hns3_dbg_get_cmd_index(handle, filp->f_path.dentry->d_iname,
+				     &index);
+	if (ret)
+		return ret;
 
-	cmd_buf = filp->f_path.dentry->d_iname;
+	save_buf = &hns3_dbg_cmd[index].buf;
 
-	if (ops->dbg_read_cmd)
-		ret = ops->dbg_read_cmd(handle, cmd_buf, read_buf,
-					HNS3_DBG_READ_LEN);
-
-	if (ret) {
-		dev_info(priv->dev, "unknown command\n");
+	if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+	    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state)) {
+		ret = -EBUSY;
 		goto out;
 	}
 
+	if (*save_buf) {
+		read_buf = *save_buf;
+	} else {
+		read_buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL);
+		if (!read_buf)
+			return -ENOMEM;
+
+		/* save the buffer addr until the last read operation */
+		*save_buf = read_buf;
+	}
+
+	/* get data ready for the first time to read */
+	if (!*ppos) {
+		ret = hns3_dbg_read_cmd(handle, hns3_dbg_cmd[index].cmd,
+					read_buf, hns3_dbg_cmd[index].buf_len);
+		if (ret)
+			goto out;
+	}
+
 	size = simple_read_from_buffer(buffer, count, ppos, read_buf,
 				       strlen(read_buf));
+	if (size > 0)
+		return size;
 
 out:
-	kfree(read_buf);
-	return size;
+	/* free the buffer for the last read operation */
+	if (*save_buf) {
+		kvfree(*save_buf);
+		*save_buf = NULL;
+	}
+
+	return ret;
 }
 
 static const struct file_operations hns3_dbg_cmd_fops = {
@@ -539,29 +627,76 @@ static const struct file_operations hns3_dbg_fops = {
 	.read  = hns3_dbg_read,
 };
 
-void hns3_dbg_init(struct hnae3_handle *handle)
+static int
+hns3_dbg_common_file_init(struct hnae3_handle *handle, u32 cmd)
+{
+	struct dentry *entry_dir;
+
+	entry_dir = hns3_dbg_dentry[hns3_dbg_cmd[cmd].dentry].dentry;
+	debugfs_create_file(hns3_dbg_cmd[cmd].name, 0400, entry_dir,
+			    handle, &hns3_dbg_fops);
+
+	return 0;
+}
+
+int hns3_dbg_init(struct hnae3_handle *handle)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	const char *name = pci_name(handle->pdev);
-	struct dentry *entry_dir;
+	int ret;
+	u32 i;
 
-	handle->hnae3_dbgfs = debugfs_create_dir(name, hns3_dbgfs_root);
+	hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry =
+				debugfs_create_dir(name, hns3_dbgfs_root);
+	handle->hnae3_dbgfs = hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry;
 
 	debugfs_create_file("cmd", 0600, handle->hnae3_dbgfs, handle,
 			    &hns3_dbg_cmd_fops);
 
-	entry_dir = debugfs_create_dir("tm", handle->hnae3_dbgfs);
-	if (ae_dev->dev_version > HNAE3_DEVICE_VERSION_V2)
-		debugfs_create_file(HNAE3_DBG_TM_NODES, 0600, entry_dir, handle,
-				    &hns3_dbg_fops);
-	debugfs_create_file(HNAE3_DBG_TM_PRI, 0600, entry_dir, handle,
-			    &hns3_dbg_fops);
-	debugfs_create_file(HNAE3_DBG_TM_QSET, 0600, entry_dir, handle,
-			    &hns3_dbg_fops);
+	for (i = 0; i < HNS3_DBG_DENTRY_COMMON; i++)
+		hns3_dbg_dentry[i].dentry =
+			debugfs_create_dir(hns3_dbg_dentry[i].name,
+					   handle->hnae3_dbgfs);
+
+	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) {
+		if (hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES &&
+		    ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2)
+			continue;
+
+		if (!hns3_dbg_cmd[i].init) {
+			dev_err(&handle->pdev->dev,
+				"cmd %s lack of init func\n",
+				hns3_dbg_cmd[i].name);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		ret = hns3_dbg_cmd[i].init(handle, i);
+		if (ret) {
+			dev_err(&handle->pdev->dev, "failed to init cmd %s\n",
+				hns3_dbg_cmd[i].name);
+			goto out;
+		}
+	}
+
+	return 0;
+
+out:
+	debugfs_remove_recursive(handle->hnae3_dbgfs);
+	handle->hnae3_dbgfs = NULL;
+	return ret;
 }
 
 void hns3_dbg_uninit(struct hnae3_handle *handle)
 {
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
+		if (hns3_dbg_cmd[i].buf) {
+			kvfree(hns3_dbg_cmd[i].buf);
+			hns3_dbg_cmd[i].buf = NULL;
+		}
+
 	debugfs_remove_recursive(handle->hnae3_dbgfs);
 	handle->hnae3_dbgfs = NULL;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
new file mode 100644
index 000000000000..1648f686114e
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#ifndef __HNS3_DEBUGFS_H
+#define __HNS3_DEBUGFS_H
+
+#define HNS3_DBG_READ_LEN	65536
+#define HNS3_DBG_WRITE_LEN	1024
+
+enum hns3_dbg_dentry_type {
+	HNS3_DBG_DENTRY_TM,
+	HNS3_DBG_DENTRY_COMMON,
+};
+
+struct hns3_dbg_dentry_info {
+	const char *name;
+	struct dentry *dentry;
+};
+
+struct hns3_dbg_cmd_info {
+	const char *name;
+	enum hnae3_dbg_cmd cmd;
+	enum hns3_dbg_dentry_type dentry;
+	u32 buf_len;
+	char *buf;
+	int (*init)(struct hnae3_handle *handle, unsigned int cmd);
+};
+
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 5826d865eeed..02ce7a3e62cc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4648,7 +4648,12 @@ static int hns3_client_init(struct hnae3_handle *handle)
 
 	hns3_dcbnl_setup(handle);
 
-	hns3_dbg_init(handle);
+	ret = hns3_dbg_init(handle);
+	if (ret) {
+		dev_err(priv->dev, "failed to init debugfs, ret = %d\n",
+			ret);
+		goto out_client_start;
+	}
 
 	netdev->max_mtu = HNS3_MAX_MTU(ae_dev->dev_specs.max_frm_size);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index c9aebda26f6a..5c72f41cf57e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -650,7 +650,7 @@ void hns3_dcbnl_setup(struct hnae3_handle *handle);
 static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {}
 #endif
 
-void hns3_dbg_init(struct hnae3_handle *handle);
+int hns3_dbg_init(struct hnae3_handle *handle);
 void hns3_dbg_uninit(struct hnae3_handle *handle);
 void hns3_dbg_register_debugfs(const char *debugfs_dir_name);
 void hns3_dbg_unregister_debugfs(void);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 85d306459e36..7f1abdff25b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1800,21 +1800,33 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	return 0;
 }
 
-int hclge_dbg_read_cmd(struct hnae3_handle *handle, const char *cmd_buf,
+static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
+	{
+		.cmd = HNAE3_DBG_CMD_TM_NODES,
+		.dbg_dump = hclge_dbg_dump_tm_nodes,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_TM_PRI,
+		.dbg_dump = hclge_dbg_dump_tm_pri,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_TM_QSET,
+		.dbg_dump = hclge_dbg_dump_tm_qset,
+	},
+};
+
+int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
 		       char *buf, int len)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	u32 i;
 
-	if (strncmp(cmd_buf, HNAE3_DBG_TM_NODES,
-		    strlen(HNAE3_DBG_TM_NODES)) == 0)
-		return hclge_dbg_dump_tm_nodes(hdev, buf, len);
-	else if (strncmp(cmd_buf, HNAE3_DBG_TM_PRI,
-			 strlen(HNAE3_DBG_TM_PRI)) == 0)
-		return hclge_dbg_dump_tm_pri(hdev, buf, len);
-	else if (strncmp(cmd_buf, HNAE3_DBG_TM_QSET,
-			 strlen(HNAE3_DBG_TM_QSET)) == 0)
-		return hclge_dbg_dump_tm_qset(hdev, buf, len);
+	for (i = 0; i < ARRAY_SIZE(hclge_dbg_cmd_func); i++) {
+		if (cmd == hclge_dbg_cmd_func[i].cmd)
+			return hclge_dbg_cmd_func[i].dbg_dump(hdev, buf, len);
+	}
 
+	dev_err(&hdev->pdev->dev, "invalid command(%d)\n", cmd);
 	return -EINVAL;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index ca2ab6cf84d9..0c144532e6ad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -83,6 +83,11 @@ struct hclge_dbg_reg_type_info {
 	struct hclge_dbg_reg_common_msg reg_msg;
 };
 
+struct hclge_dbg_func {
+	enum hnae3_dbg_cmd cmd;
+	int (*dbg_dump)(struct hclge_dev *hdev, char *buf, int len);
+};
+
 static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
 	{false, "Reserved"},
 	{true,	"BP_CPU_STATE"},
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 10f5c11e6b66..9e17c02de403 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1061,7 +1061,7 @@ int hclge_vport_start(struct hclge_vport *vport);
 void hclge_vport_stop(struct hclge_vport *vport);
 int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu);
 int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf);
-int hclge_dbg_read_cmd(struct hnae3_handle *handle, const char *cmd_buf,
+int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
 		       char *buf, int len);
 u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id);
 int hclge_notify_client(struct hclge_dev *hdev,

From c929bc2ac36efa9344e6c8b8f55f6b8eeebb4393 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Fri, 14 May 2021 11:25:12 +0800
Subject: [PATCH 0133/3292] net: hns3: refactor dev capability and dev spec of
 debugfs

Currently, the debugfs command for dev capability and dev spec
are implemented by "echo xxxx > cmd", and record the information
in dmesg. It's unnecessary and heavy. To improve it, create a
single file "dev_info" for them, and query them by command
"cat dev_info", return the result to userspace, rather than
record in dmesg.

The display style is below:
$cat dev_info
dev capability:
support FD: yes
support GRO: yes
support FEC: yes
support UDP GSO: no
support PTP: no
support INT QL: no
support HW TX csum: no
support UDP tunnel csum: no
support TX push: no
support imp-controlled PHY: no
support rxd advanced layout: no

dev spec:
MAC entry num: 0
MNG entry num: 0
MAX non tso bd num: 8
RSS ind tbl size: 512
RSS key size: 40
RSS size: 1
Allocated RSS size: 0
Task queue pairs numbers: 1
RX buffer length: 2048
Desc num per TX queue: 1024
Desc num per RX queue: 1024
Total number of enabled TCs: 1
MAX INT QL: 0
MAX INT GL: 8160
MAX TM RATE: 100000
MAX QSET number: 1024

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 173 ++++++++++++------
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |  10 +
 3 files changed, 128 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index eee9639802a3..a5cf92710ad4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -254,6 +254,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TM_NODES,
 	HNAE3_DBG_CMD_TM_PRI,
 	HNAE3_DBG_CMD_TM_QSET,
+	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 62a05955fda2..e6c3175bca4d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -45,6 +45,50 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "dev_info",
+		.cmd = HNAE3_DBG_CMD_DEV_INFO,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+};
+
+static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
+	{
+		.name = "support FD",
+		.cap_bit = HNAE3_DEV_SUPPORT_FD_B,
+	}, {
+		.name = "support GRO",
+		.cap_bit = HNAE3_DEV_SUPPORT_GRO_B,
+	}, {
+		.name = "support FEC",
+		.cap_bit = HNAE3_DEV_SUPPORT_FEC_B,
+	}, {
+		.name = "support UDP GSO",
+		.cap_bit = HNAE3_DEV_SUPPORT_UDP_GSO_B,
+	}, {
+		.name = "support PTP",
+		.cap_bit = HNAE3_DEV_SUPPORT_PTP_B,
+	}, {
+		.name = "support INT QL",
+		.cap_bit = HNAE3_DEV_SUPPORT_INT_QL_B,
+	}, {
+		.name = "support HW TX csum",
+		.cap_bit = HNAE3_DEV_SUPPORT_HW_TX_CSUM_B,
+	}, {
+		.name = "support UDP tunnel csum",
+		.cap_bit = HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
+	}, {
+		.name = "support TX push",
+		.cap_bit = HNAE3_DEV_SUPPORT_TX_PUSH_B,
+	}, {
+		.name = "support imp-controlled PHY",
+		.cap_bit = HNAE3_DEV_SUPPORT_PHY_IMP_B,
+	}, {
+		.name = "support rxd advanced layout",
+		.cap_bit = HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
+	},
 };
 
 static int hns3_dbg_queue_info(struct hnae3_handle *h,
@@ -320,8 +364,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "queue info <number>\n");
 	dev_info(&h->pdev->dev, "queue map\n");
 	dev_info(&h->pdev->dev, "bd info <q_num> <bd index>\n");
-	dev_info(&h->pdev->dev, "dev capability\n");
-	dev_info(&h->pdev->dev, "dev spec\n");
 
 	if (!hns3_is_phys_func(h->pdev))
 		return;
@@ -363,68 +405,78 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "%s", printf_buf);
 }
 
-static void hns3_dbg_dev_caps(struct hnae3_handle *h)
+static void
+hns3_dbg_dev_caps(struct hnae3_handle *h, char *buf, int len, int *pos)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
-	unsigned long *caps;
+	static const char * const str[] = {"no", "yes"};
+	unsigned long *caps = ae_dev->caps;
+	u32 i, state;
 
-	caps = ae_dev->caps;
+	*pos += scnprintf(buf + *pos, len - *pos, "dev capability:\n");
 
-	dev_info(&h->pdev->dev, "support FD: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_FD_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support GRO: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_GRO_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support FEC: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_FEC_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support UDP GSO: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support PTP: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_PTP_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support INT QL: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_INT_QL_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support HW TX csum: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support UDP tunnel csum: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, caps) ?
-		 "yes" : "no");
-	dev_info(&h->pdev->dev, "support PAUSE: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps) ?
-		 "yes" : "no");
-	dev_info(&h->pdev->dev, "support imp-controlled PHY: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, caps) ? "yes" : "no");
-	dev_info(&h->pdev->dev, "support rxd advanced layout: %s\n",
-		 test_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, caps) ?
-		 "yes" : "no");
+	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cap); i++) {
+		state = test_bit(hns3_dbg_cap[i].cap_bit, caps);
+		*pos += scnprintf(buf + *pos, len - *pos, "%s: %s\n",
+				  hns3_dbg_cap[i].name, str[state]);
+	}
+
+	*pos += scnprintf(buf + *pos, len - *pos, "\n");
 }
 
-static void hns3_dbg_dev_specs(struct hnae3_handle *h)
+static void
+hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 	struct hnae3_dev_specs *dev_specs = &ae_dev->dev_specs;
 	struct hnae3_knic_private_info *kinfo = &h->kinfo;
-	struct hns3_nic_priv *priv  = h->priv;
 
-	dev_info(priv->dev, "MAC entry num: %u\n", dev_specs->mac_entry_num);
-	dev_info(priv->dev, "MNG entry num: %u\n", dev_specs->mng_entry_num);
-	dev_info(priv->dev, "MAX non tso bd num: %u\n",
-		 dev_specs->max_non_tso_bd_num);
-	dev_info(priv->dev, "RSS ind tbl size: %u\n",
-		 dev_specs->rss_ind_tbl_size);
-	dev_info(priv->dev, "RSS key size: %u\n", dev_specs->rss_key_size);
-	dev_info(priv->dev, "RSS size: %u\n", kinfo->rss_size);
-	dev_info(priv->dev, "Allocated RSS size: %u\n", kinfo->req_rss_size);
-	dev_info(priv->dev, "Task queue pairs numbers: %u\n", kinfo->num_tqps);
+	*pos += scnprintf(buf + *pos, len - *pos, "dev_spec:\n");
+	*pos += scnprintf(buf + *pos, len - *pos, "MAC entry num: %u\n",
+			  dev_specs->mac_entry_num);
+	*pos += scnprintf(buf + *pos, len - *pos, "MNG entry num: %u\n",
+			  dev_specs->mng_entry_num);
+	*pos += scnprintf(buf + *pos, len - *pos, "MAX non tso bd num: %u\n",
+			  dev_specs->max_non_tso_bd_num);
+	*pos += scnprintf(buf + *pos, len - *pos, "RSS ind tbl size: %u\n",
+			  dev_specs->rss_ind_tbl_size);
+	*pos += scnprintf(buf + *pos, len - *pos, "RSS key size: %u\n",
+			  dev_specs->rss_key_size);
+	*pos += scnprintf(buf + *pos, len - *pos, "RSS size: %u\n",
+			  kinfo->rss_size);
+	*pos += scnprintf(buf + *pos, len - *pos, "Allocated RSS size: %u\n",
+			  kinfo->req_rss_size);
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "Task queue pairs numbers: %u\n",
+			  kinfo->num_tqps);
+	*pos += scnprintf(buf + *pos, len - *pos, "RX buffer length: %u\n",
+			  kinfo->rx_buf_len);
+	*pos += scnprintf(buf + *pos, len - *pos, "Desc num per TX queue: %u\n",
+			  kinfo->num_tx_desc);
+	*pos += scnprintf(buf + *pos, len - *pos, "Desc num per RX queue: %u\n",
+			  kinfo->num_rx_desc);
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "Total number of enabled TCs: %u\n",
+			  kinfo->tc_info.num_tc);
+	*pos += scnprintf(buf + *pos, len - *pos, "MAX INT QL: %u\n",
+			  dev_specs->int_ql_max);
+	*pos += scnprintf(buf + *pos, len - *pos, "MAX INT GL: %u\n",
+			  dev_specs->max_int_gl);
+	*pos += scnprintf(buf + *pos, len - *pos, "MAX TM RATE: %u\n",
+			  dev_specs->max_tm_rate);
+	*pos += scnprintf(buf + *pos, len - *pos, "MAX QSET number: %u\n",
+			  dev_specs->max_qset_num);
+}
 
-	dev_info(priv->dev, "RX buffer length: %u\n", kinfo->rx_buf_len);
-	dev_info(priv->dev, "Desc num per TX queue: %u\n", kinfo->num_tx_desc);
-	dev_info(priv->dev, "Desc num per RX queue: %u\n", kinfo->num_rx_desc);
-	dev_info(priv->dev, "Total number of enabled TCs: %u\n",
-		 kinfo->tc_info.num_tc);
-	dev_info(priv->dev, "MAX INT QL: %u\n", dev_specs->int_ql_max);
-	dev_info(priv->dev, "MAX INT GL: %u\n", dev_specs->max_int_gl);
-	dev_info(priv->dev, "MAX frame size: %u\n", dev_specs->max_frm_size);
-	dev_info(priv->dev, "MAX TM RATE: %uMbps\n", dev_specs->max_tm_rate);
-	dev_info(priv->dev, "MAX QSET number: %u\n", dev_specs->max_qset_num);
+static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
+{
+	int pos = 0;
+
+	hns3_dbg_dev_caps(h, buf, len, &pos);
+
+	hns3_dbg_dev_specs(h, buf, len, &pos);
+
+	return 0;
 }
 
 static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer,
@@ -468,10 +520,6 @@ static int hns3_dbg_check_cmd(struct hnae3_handle *handle, char *cmd_buf)
 		ret = hns3_dbg_queue_map(handle);
 	else if (strncmp(cmd_buf, "bd info", 7) == 0)
 		ret = hns3_dbg_bd_info(handle, cmd_buf);
-	else if (strncmp(cmd_buf, "dev capability", 14) == 0)
-		hns3_dbg_dev_caps(handle);
-	else if (strncmp(cmd_buf, "dev spec", 8) == 0)
-		hns3_dbg_dev_specs(handle);
 	else if (handle->ae_algo->ops->dbg_run_cmd)
 		ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
 	else
@@ -545,10 +593,23 @@ static int hns3_dbg_get_cmd_index(struct hnae3_handle *handle,
 	return -EINVAL;
 }
 
+static const struct hns3_dbg_func hns3_dbg_cmd_func[] = {
+	{
+		.cmd = HNAE3_DBG_CMD_DEV_INFO,
+		.dbg_dump = hns3_dbg_dev_info,
+	},
+};
+
 static int hns3_dbg_read_cmd(struct hnae3_handle *handle,
 			     enum hnae3_dbg_cmd cmd, char *buf, int len)
 {
 	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd_func); i++) {
+		if (cmd == hns3_dbg_cmd_func[i].cmd)
+			return hns3_dbg_cmd_func[i].dbg_dump(handle, buf, len);
+	}
 
 	if (!ops->dbg_read_cmd)
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 1648f686114e..d16ec876bc71 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -26,4 +26,14 @@ struct hns3_dbg_cmd_info {
 	int (*init)(struct hnae3_handle *handle, unsigned int cmd);
 };
 
+struct hns3_dbg_func {
+	enum hnae3_dbg_cmd cmd;
+	int (*dbg_dump)(struct hnae3_handle *handle, char *buf, int len);
+};
+
+struct hns3_dbg_cap_info {
+	const char *name;
+	enum HNAE3_DEV_CAP_BITS cap_bit;
+};
+
 #endif

From 77e9184869c9fb00a482357ea8eef3bd7ae3d45a Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Fri, 14 May 2021 11:25:13 +0800
Subject: [PATCH 0134/3292] net: hns3: refactor dump bd info of debugfs

Currently, the debugfs command for bd info is implemented
by "echo xxxx > cmd", and record the information in dmesg.
It's unnecessary and heavy.

To improve it, add two debugfs directories "tx_bd_info" and
"rx_bd_info", and create a file for each queue under these
two directories, and query the bd info of specific queue by
"cat tx_bd_info/tx_bd_queue*" or "cat rx_bd_info/rx_bd_queue*",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat rx_bd_info/rx_bd_queue0
Queue 0 rx bd info:
BD_IDX   L234_INFO  PKT_LEN   SIZE...
0        0x0             60     60...
1        0x0           1512   1512...

$ cat tx_bd_info/tx_bd_queue0
Queue 0 tx bd info:
BD_IDX     ADDRESS  VLAN_TAG  SIZE...
0          0x0          0        0...
1          0x0          0        0...

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   2 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 348 ++++++++++++------
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |  19 +
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   |   2 +-
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |   1 +
 5 files changed, 267 insertions(+), 105 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a5cf92710ad4..6ec504a70bed 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -255,6 +255,8 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TM_PRI,
 	HNAE3_DBG_CMD_TM_QSET,
 	HNAE3_DBG_CMD_DEV_INFO,
+	HNAE3_DBG_CMD_TX_BD,
+	HNAE3_DBG_CMD_RX_BD,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index e6c3175bca4d..fb3c2d4fbcc5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -14,12 +14,19 @@ static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
 	{
 		.name = "tm"
 	},
+	{
+		.name = "tx_bd_info"
+	},
+	{
+		.name = "rx_bd_info"
+	},
 	/* keep common at the bottom and add new directory above */
 	{
 		.name = "common"
 	},
 };
 
+static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, unsigned int cmd);
 static int hns3_dbg_common_file_init(struct hnae3_handle *handle,
 				     unsigned int cmd);
 
@@ -52,6 +59,20 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "tx_bd_queue",
+		.cmd = HNAE3_DBG_CMD_TX_BD,
+		.dentry = HNS3_DBG_DENTRY_TX_BD,
+		.buf_len = HNS3_DBG_READ_LEN_4MB,
+		.init = hns3_dbg_bd_file_init,
+	},
+	{
+		.name = "rx_bd_queue",
+		.cmd = HNAE3_DBG_CMD_RX_BD,
+		.dentry = HNS3_DBG_DENTRY_RX_BD,
+		.buf_len = HNS3_DBG_READ_LEN_4MB,
+		.init = hns3_dbg_bd_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -91,6 +112,27 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
 	},
 };
 
+static void hns3_dbg_fill_content(char *content, u16 len,
+				  const struct hns3_dbg_item *items,
+				  const char **result, u16 size)
+{
+	char *pos = content;
+	u16 i;
+
+	memset(content, ' ', len);
+	for (i = 0; i < size; i++) {
+		if (result)
+			strncpy(pos, result[i], strlen(result[i]));
+		else
+			strncpy(pos, items[i].name, strlen(items[i].name));
+
+		pos += strlen(items[i].name) + items[i].interval;
+	}
+
+	*pos++ = '\n';
+	*pos++ = '\0';
+}
+
 static int hns3_dbg_queue_info(struct hnae3_handle *h,
 			       const char *cmd_buf)
 {
@@ -248,108 +290,159 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h)
 	return 0;
 }
 
-static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
+static const struct hns3_dbg_item rx_bd_info_items[] = {
+	{ "BD_IDX", 3 },
+	{ "L234_INFO", 2 },
+	{ "PKT_LEN", 3 },
+	{ "SIZE", 4 },
+	{ "RSS_HASH", 4 },
+	{ "FD_ID", 2 },
+	{ "VLAN_TAG", 2 },
+	{ "O_DM_VLAN_ID_FB", 2 },
+	{ "OT_VLAN_TAG", 2 },
+	{ "BD_BASE_INFO", 2 },
+	{ "PTYPE", 2 },
+	{ "HW_CSUM", 2 },
+};
+
+static void hns3_dump_rx_bd_info(struct hns3_nic_priv *priv,
+				 struct hns3_desc *desc, char **result, int idx)
 {
-	struct hns3_nic_priv *priv = h->priv;
-	struct hns3_desc *rx_desc, *tx_desc;
-	struct device *dev = &h->pdev->dev;
-	struct hns3_enet_ring *ring;
-	u32 tx_index, rx_index;
-	u32 q_num, value;
-	dma_addr_t addr;
-	u16 mss_hw_csum;
-	u32 l234info;
-	int cnt;
+	unsigned int j = 0;
 
-	cnt = sscanf(&cmd_buf[8], "%u %u", &q_num, &tx_index);
-	if (cnt == 2) {
-		rx_index = tx_index;
-	} else if (cnt != 1) {
-		dev_err(dev, "bd info: bad command string, cnt=%d\n", cnt);
-		return -EINVAL;
-	}
+	sprintf(result[j++], "%5d", idx);
+	sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.l234_info));
+	sprintf(result[j++], "%7u", le16_to_cpu(desc->rx.pkt_len));
+	sprintf(result[j++], "%4u", le16_to_cpu(desc->rx.size));
+	sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.rss_hash));
+	sprintf(result[j++], "%5u", le16_to_cpu(desc->rx.fd_id));
+	sprintf(result[j++], "%8u", le16_to_cpu(desc->rx.vlan_tag));
+	sprintf(result[j++], "%15u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb));
+	sprintf(result[j++], "%11u", le16_to_cpu(desc->rx.ot_vlan_tag));
+	sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.bd_base_info));
+	if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) {
+		u32 ol_info = le32_to_cpu(desc->rx.ol_info);
 
-	if (q_num >= h->kinfo.num_tqps) {
-		dev_err(dev, "Queue number(%u) is out of range(0-%u)\n", q_num,
-			h->kinfo.num_tqps - 1);
-		return -EINVAL;
-	}
-
-	ring = &priv->ring[q_num];
-	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
-	tx_index = (cnt == 1) ? value : tx_index;
-
-	if (tx_index >= ring->desc_num) {
-		dev_err(dev, "bd index(%u) is out of range(0-%u)\n", tx_index,
-			ring->desc_num - 1);
-		return -EINVAL;
-	}
-
-	tx_desc = &ring->desc[tx_index];
-	addr = le64_to_cpu(tx_desc->addr);
-	mss_hw_csum = le16_to_cpu(tx_desc->tx.mss_hw_csum);
-	dev_info(dev, "TX Queue Num: %u, BD Index: %u\n", q_num, tx_index);
-	dev_info(dev, "(TX)addr: %pad\n", &addr);
-	dev_info(dev, "(TX)vlan_tag: %u\n", le16_to_cpu(tx_desc->tx.vlan_tag));
-	dev_info(dev, "(TX)send_size: %u\n",
-		 le16_to_cpu(tx_desc->tx.send_size));
-
-	if (mss_hw_csum & BIT(HNS3_TXD_HW_CS_B)) {
-		u32 offset = le32_to_cpu(tx_desc->tx.ol_type_vlan_len_msec);
-		u32 start = le32_to_cpu(tx_desc->tx.type_cs_vlan_tso_len);
-
-		dev_info(dev, "(TX)csum start: %u\n",
-			 hnae3_get_field(start,
-					 HNS3_TXD_CSUM_START_M,
-					 HNS3_TXD_CSUM_START_S));
-		dev_info(dev, "(TX)csum offset: %u\n",
-			 hnae3_get_field(offset,
-					 HNS3_TXD_CSUM_OFFSET_M,
-					 HNS3_TXD_CSUM_OFFSET_S));
+		sprintf(result[j++], "%5lu", hnae3_get_field(ol_info,
+							     HNS3_RXD_PTYPE_M,
+							     HNS3_RXD_PTYPE_S));
+		sprintf(result[j++], "%7u", le16_to_cpu(desc->csum));
 	} else {
-		dev_info(dev, "(TX)vlan_tso: %u\n",
-			 tx_desc->tx.type_cs_vlan_tso);
-		dev_info(dev, "(TX)l2_len: %u\n", tx_desc->tx.l2_len);
-		dev_info(dev, "(TX)l3_len: %u\n", tx_desc->tx.l3_len);
-		dev_info(dev, "(TX)l4_len: %u\n", tx_desc->tx.l4_len);
-		dev_info(dev, "(TX)vlan_msec: %u\n",
-			 tx_desc->tx.ol_type_vlan_msec);
-		dev_info(dev, "(TX)ol2_len: %u\n", tx_desc->tx.ol2_len);
-		dev_info(dev, "(TX)ol3_len: %u\n", tx_desc->tx.ol3_len);
-		dev_info(dev, "(TX)ol4_len: %u\n", tx_desc->tx.ol4_len);
+		sprintf(result[j++], "NA");
+		sprintf(result[j++], "NA");
+	}
+}
+
+static int hns3_dbg_rx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
+{
+	char data_str[ARRAY_SIZE(rx_bd_info_items)][HNS3_DBG_DATA_STR_LEN];
+	struct hns3_nic_priv *priv = d->handle->priv;
+	char *result[ARRAY_SIZE(rx_bd_info_items)];
+	char content[HNS3_DBG_INFO_LEN];
+	struct hns3_enet_ring *ring;
+	struct hns3_desc *desc;
+	unsigned int i;
+	int pos = 0;
+
+	if (d->qid >= d->handle->kinfo.num_tqps) {
+		dev_err(&d->handle->pdev->dev,
+			"queue%u is not in use\n", d->qid);
+		return -EINVAL;
 	}
 
-	dev_info(dev, "(TX)vlan_tag: %u\n",
-		 le16_to_cpu(tx_desc->tx.outer_vlan_tag));
-	dev_info(dev, "(TX)tv: %u\n", le16_to_cpu(tx_desc->tx.tv));
-	dev_info(dev, "(TX)paylen_ol4cs: %u\n",
-		 le32_to_cpu(tx_desc->tx.paylen_ol4cs));
-	dev_info(dev, "(TX)vld_ra_ri: %u\n",
-		 le16_to_cpu(tx_desc->tx.bdtp_fe_sc_vld_ra_ri));
-	dev_info(dev, "(TX)mss_hw_csum: %u\n", mss_hw_csum);
+	for (i = 0; i < ARRAY_SIZE(rx_bd_info_items); i++)
+		result[i] = &data_str[i][0];
 
-	ring = &priv->ring[q_num + h->kinfo.num_tqps];
-	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG);
-	rx_index = (cnt == 1) ? value : tx_index;
-	rx_desc = &ring->desc[rx_index];
+	pos += scnprintf(buf + pos, len - pos,
+			  "Queue %u rx bd info:\n", d->qid);
+	hns3_dbg_fill_content(content, sizeof(content), rx_bd_info_items,
+			      NULL, ARRAY_SIZE(rx_bd_info_items));
+	pos += scnprintf(buf + pos, len - pos, "%s", content);
 
-	addr = le64_to_cpu(rx_desc->addr);
-	l234info = le32_to_cpu(rx_desc->rx.l234_info);
-	dev_info(dev, "RX Queue Num: %u, BD Index: %u\n", q_num, rx_index);
-	dev_info(dev, "(RX)addr: %pad\n", &addr);
-	dev_info(dev, "(RX)l234_info: %u\n", l234info);
+	ring = &priv->ring[d->qid + d->handle->kinfo.num_tqps];
+	for (i = 0; i < ring->desc_num; i++) {
+		desc = &ring->desc[i];
 
-	dev_info(dev, "(RX)pkt_len: %u\n", le16_to_cpu(rx_desc->rx.pkt_len));
-	dev_info(dev, "(RX)size: %u\n", le16_to_cpu(rx_desc->rx.size));
-	dev_info(dev, "(RX)rss_hash: %u\n", le32_to_cpu(rx_desc->rx.rss_hash));
-	dev_info(dev, "(RX)fd_id: %u\n", le16_to_cpu(rx_desc->rx.fd_id));
-	dev_info(dev, "(RX)vlan_tag: %u\n", le16_to_cpu(rx_desc->rx.vlan_tag));
-	dev_info(dev, "(RX)o_dm_vlan_id_fb: %u\n",
-		 le16_to_cpu(rx_desc->rx.o_dm_vlan_id_fb));
-	dev_info(dev, "(RX)ot_vlan_tag: %u\n",
-		 le16_to_cpu(rx_desc->rx.ot_vlan_tag));
-	dev_info(dev, "(RX)bd_base_info: %u\n",
-		 le32_to_cpu(rx_desc->rx.bd_base_info));
+		hns3_dump_rx_bd_info(priv, desc, result, i);
+		hns3_dbg_fill_content(content, sizeof(content),
+				      rx_bd_info_items, (const char **)result,
+				      ARRAY_SIZE(rx_bd_info_items));
+		pos += scnprintf(buf + pos, len - pos, "%s", content);
+	}
+
+	return 0;
+}
+
+static const struct hns3_dbg_item tx_bd_info_items[] = {
+	{ "BD_IDX", 5 },
+	{ "ADDRESS", 2 },
+	{ "VLAN_TAG", 2 },
+	{ "SIZE", 2 },
+	{ "T_CS_VLAN_TSO", 2 },
+	{ "OT_VLAN_TAG", 3 },
+	{ "TV", 2 },
+	{ "OLT_VLAN_LEN", 2},
+	{ "PAYLEN_OL4CS", 2},
+	{ "BD_FE_SC_VLD", 2},
+	{ "MSS_HW_CSUM", 0},
+};
+
+static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv,
+				 struct hns3_desc *desc, char **result, int idx)
+{
+	unsigned int j = 0;
+
+	sprintf(result[j++], "%6d", idx);
+	sprintf(result[j++], "%#llx", le64_to_cpu(desc->addr));
+	sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.vlan_tag));
+	sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.send_size));
+	sprintf(result[j++], "%#x",
+		le32_to_cpu(desc->tx.type_cs_vlan_tso_len));
+	sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.outer_vlan_tag));
+	sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.tv));
+	sprintf(result[j++], "%10u",
+		le32_to_cpu(desc->tx.ol_type_vlan_len_msec));
+	sprintf(result[j++], "%#x", le32_to_cpu(desc->tx.paylen_ol4cs));
+	sprintf(result[j++], "%#x", le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri));
+	sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.mss_hw_csum));
+}
+
+static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
+{
+	char data_str[ARRAY_SIZE(tx_bd_info_items)][HNS3_DBG_DATA_STR_LEN];
+	struct hns3_nic_priv *priv = d->handle->priv;
+	char *result[ARRAY_SIZE(tx_bd_info_items)];
+	char content[HNS3_DBG_INFO_LEN];
+	struct hns3_enet_ring *ring;
+	struct hns3_desc *desc;
+	unsigned int i;
+	int pos = 0;
+
+	if (d->qid >= d->handle->kinfo.num_tqps) {
+		dev_err(&d->handle->pdev->dev,
+			"queue%u is not in use\n", d->qid);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tx_bd_info_items); i++)
+		result[i] = &data_str[i][0];
+
+	pos += scnprintf(buf + pos, len - pos,
+			  "Queue %u tx bd info:\n", d->qid);
+	hns3_dbg_fill_content(content, sizeof(content), tx_bd_info_items,
+			      NULL, ARRAY_SIZE(tx_bd_info_items));
+	pos += scnprintf(buf + pos, len - pos, "%s", content);
+
+	ring = &priv->ring[d->qid];
+	for (i = 0; i < ring->desc_num; i++) {
+		desc = &ring->desc[i];
+
+		hns3_dump_tx_bd_info(priv, desc, result, i);
+		hns3_dbg_fill_content(content, sizeof(content),
+				      tx_bd_info_items, (const char **)result,
+				      ARRAY_SIZE(tx_bd_info_items));
+		pos += scnprintf(buf + pos, len - pos, "%s", content);
+	}
 
 	return 0;
 }
@@ -363,7 +456,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "available commands\n");
 	dev_info(&h->pdev->dev, "queue info <number>\n");
 	dev_info(&h->pdev->dev, "queue map\n");
-	dev_info(&h->pdev->dev, "bd info <q_num> <bd index>\n");
 
 	if (!hns3_is_phys_func(h->pdev))
 		return;
@@ -518,8 +610,6 @@ static int hns3_dbg_check_cmd(struct hnae3_handle *handle, char *cmd_buf)
 		ret = hns3_dbg_queue_info(handle, cmd_buf);
 	else if (strncmp(cmd_buf, "queue map", 9) == 0)
 		ret = hns3_dbg_queue_map(handle);
-	else if (strncmp(cmd_buf, "bd info", 7) == 0)
-		ret = hns3_dbg_bd_info(handle, cmd_buf);
 	else if (handle->ae_algo->ops->dbg_run_cmd)
 		ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
 	else
@@ -598,29 +688,46 @@ static const struct hns3_dbg_func hns3_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
 		.dbg_dump = hns3_dbg_dev_info,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_TX_BD,
+		.dbg_dump_bd = hns3_dbg_tx_bd_info,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_RX_BD,
+		.dbg_dump_bd = hns3_dbg_rx_bd_info,
+	},
 };
 
-static int hns3_dbg_read_cmd(struct hnae3_handle *handle,
+static int hns3_dbg_read_cmd(struct hns3_dbg_data *dbg_data,
 			     enum hnae3_dbg_cmd cmd, char *buf, int len)
 {
-	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+	const struct hnae3_ae_ops *ops = dbg_data->handle->ae_algo->ops;
+	const struct hns3_dbg_func *cmd_func;
 	u32 i;
 
 	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd_func); i++) {
-		if (cmd == hns3_dbg_cmd_func[i].cmd)
-			return hns3_dbg_cmd_func[i].dbg_dump(handle, buf, len);
+		if (cmd == hns3_dbg_cmd_func[i].cmd) {
+			cmd_func = &hns3_dbg_cmd_func[i];
+			if (cmd_func->dbg_dump)
+				return cmd_func->dbg_dump(dbg_data->handle, buf,
+							  len);
+			else
+				return cmd_func->dbg_dump_bd(dbg_data, buf,
+							     len);
+		}
 	}
 
 	if (!ops->dbg_read_cmd)
 		return -EOPNOTSUPP;
 
-	return ops->dbg_read_cmd(handle, cmd, buf, len);
+	return ops->dbg_read_cmd(dbg_data->handle, cmd, buf, len);
 }
 
 static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
 			     size_t count, loff_t *ppos)
 {
-	struct hnae3_handle *handle = filp->private_data;
+	struct hns3_dbg_data *dbg_data = filp->private_data;
+	struct hnae3_handle *handle = dbg_data->handle;
 	struct hns3_nic_priv *priv = handle->priv;
 	ssize_t size = 0;
 	char **save_buf;
@@ -654,7 +761,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
 
 	/* get data ready for the first time to read */
 	if (!*ppos) {
-		ret = hns3_dbg_read_cmd(handle, hns3_dbg_cmd[index].cmd,
+		ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd,
 					read_buf, hns3_dbg_cmd[index].buf_len);
 		if (ret)
 			goto out;
@@ -688,14 +795,47 @@ static const struct file_operations hns3_dbg_fops = {
 	.read  = hns3_dbg_read,
 };
 
+static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd)
+{
+	struct dentry *entry_dir;
+	struct hns3_dbg_data *data;
+	u16 max_queue_num;
+	unsigned int i;
+
+	entry_dir = hns3_dbg_dentry[hns3_dbg_cmd[cmd].dentry].dentry;
+	max_queue_num = hns3_get_max_available_channels(handle);
+	data = devm_kzalloc(&handle->pdev->dev, max_queue_num * sizeof(*data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	for (i = 0; i < max_queue_num; i++) {
+		char name[HNS3_DBG_FILE_NAME_LEN];
+
+		data[i].handle = handle;
+		data[i].qid = i;
+		sprintf(name, "%s%u", hns3_dbg_cmd[cmd].name, i);
+		debugfs_create_file(name, 0400, entry_dir, &data[i],
+				    &hns3_dbg_fops);
+	}
+
+	return 0;
+}
+
 static int
 hns3_dbg_common_file_init(struct hnae3_handle *handle, u32 cmd)
 {
+	struct hns3_dbg_data *data;
 	struct dentry *entry_dir;
 
+	data = devm_kzalloc(&handle->pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->handle = handle;
 	entry_dir = hns3_dbg_dentry[hns3_dbg_cmd[cmd].dentry].dentry;
 	debugfs_create_file(hns3_dbg_cmd[cmd].name, 0400, entry_dir,
-			    handle, &hns3_dbg_fops);
+			    data, &hns3_dbg_fops);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index d16ec876bc71..06868b618010 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -5,10 +5,28 @@
 #define __HNS3_DEBUGFS_H
 
 #define HNS3_DBG_READ_LEN	65536
+#define HNS3_DBG_READ_LEN_4MB	0x400000
 #define HNS3_DBG_WRITE_LEN	1024
 
+#define HNS3_DBG_DATA_STR_LEN	32
+#define HNS3_DBG_INFO_LEN	256
+#define HNS3_DBG_ITEM_NAME_LEN	32
+#define HNS3_DBG_FILE_NAME_LEN	16
+
+struct hns3_dbg_item {
+	char name[HNS3_DBG_ITEM_NAME_LEN];
+	u16 interval; /* blank numbers after the item */
+};
+
+struct hns3_dbg_data {
+	struct hnae3_handle *handle;
+	u16 qid;
+};
+
 enum hns3_dbg_dentry_type {
 	HNS3_DBG_DENTRY_TM,
+	HNS3_DBG_DENTRY_TX_BD,
+	HNS3_DBG_DENTRY_RX_BD,
 	HNS3_DBG_DENTRY_COMMON,
 };
 
@@ -29,6 +47,7 @@ struct hns3_dbg_cmd_info {
 struct hns3_dbg_func {
 	enum hnae3_dbg_cmd cmd;
 	int (*dbg_dump)(struct hnae3_handle *handle, char *buf, int len);
+	int (*dbg_dump_bd)(struct hns3_dbg_data *data, char *buf, int len);
 };
 
 struct hns3_dbg_cap_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 02ce7a3e62cc..de0e2d215879 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -634,7 +634,7 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
 	return 0;
 }
 
-static u16 hns3_get_max_available_channels(struct hnae3_handle *h)
+u16 hns3_get_max_available_channels(struct hnae3_handle *h)
 {
 	u16 alloc_tqps, max_rss_size, rss_size;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 5c72f41cf57e..79ff2fa61d47 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -655,4 +655,5 @@ void hns3_dbg_uninit(struct hnae3_handle *handle);
 void hns3_dbg_register_debugfs(const char *debugfs_dir_name);
 void hns3_dbg_unregister_debugfs(void);
 void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size);
+u16 hns3_get_max_available_channels(struct hnae3_handle *h);
 #endif

From 1556ea9120ffcf4faf7ac6b62a6e28216f260a23 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Fri, 14 May 2021 11:25:14 +0800
Subject: [PATCH 0135/3292] net: hns3: refactor dump mac list of debugfs

Currently, the debugfs command for mac list info is implemented
by "echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create two files "uc" and
"mc" under directory "mac_list" for it, and query mac list info
by "cat mac_list/uc" and "mac_list/mc", return the result to
userspace, rather than record in dmesg.

The display style is below:
$ cat mac_list/uc
UC MAC_LIST:
FUNC_ID  MAC_ADDR            STATE
pf       00:18:2d:00:00:71   ACTIVE

$ cat mac_list/mc
MC MAC_LIST:
FUNC_ID  MAC_ADDR            STATE
pf       01:80:c2:00:00:21   ACTIVE

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   2 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  19 ++-
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |   1 +
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 127 +++++++++++++-----
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |   9 ++
 5 files changed, 119 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 6ec504a70bed..ce3910f891ae 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -257,6 +257,8 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_TX_BD,
 	HNAE3_DBG_CMD_RX_BD,
+	HNAE3_DBG_CMD_MAC_UC,
+	HNAE3_DBG_CMD_MAC_MC,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index fb3c2d4fbcc5..5e0278604c12 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -20,6 +20,9 @@ static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
 	{
 		.name = "rx_bd_info"
 	},
+	{
+		.name = "mac_list"
+	},
 	/* keep common at the bottom and add new directory above */
 	{
 		.name = "common"
@@ -73,6 +76,20 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN_4MB,
 		.init = hns3_dbg_bd_file_init,
 	},
+	{
+		.name = "uc",
+		.cmd = HNAE3_DBG_CMD_MAC_UC,
+		.dentry = HNS3_DBG_DENTRY_MAC,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "mc",
+		.cmd = HNAE3_DBG_CMD_MAC_MC,
+		.dentry = HNS3_DBG_DENTRY_MAC,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -474,8 +491,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump loopback\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
-	dev_info(&h->pdev->dev, "dump uc mac list <func id>\n");
-	dev_info(&h->pdev->dev, "dump mc mac list <func id>\n");
 	dev_info(&h->pdev->dev, "dump intr\n");
 
 	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 06868b618010..3d2ee3642459 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -27,6 +27,7 @@ enum hns3_dbg_dentry_type {
 	HNS3_DBG_DENTRY_TM,
 	HNS3_DBG_DENTRY_TX_BD,
 	HNS3_DBG_DENTRY_RX_BD,
+	HNS3_DBG_DENTRY_MAC,
 	HNS3_DBG_DENTRY_COMMON,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 7f1abdff25b0..ea0d43fa9149 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -8,6 +8,10 @@
 #include "hclge_tm.h"
 #include "hnae3.h"
 
+static const char * const hclge_mac_state_str[] = {
+	"TO_ADD", "TO_DEL", "ACTIVE"
+};
+
 static const struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
 	{ .reg_type = "bios common",
 	  .dfx_msg = &hclge_dbg_bios_common_reg[0],
@@ -71,6 +75,35 @@ static const struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
 		       .cmd = HCLGE_OPC_DFX_TQP_REG } },
 };
 
+static void hclge_dbg_fill_content(char *content, u16 len,
+				   const struct hclge_dbg_item *items,
+				   const char **result, u16 size)
+{
+	char *pos = content;
+	u16 i;
+
+	memset(content, ' ', len);
+	for (i = 0; i < size; i++) {
+		if (result)
+			strncpy(pos, result[i], strlen(result[i]));
+		else
+			strncpy(pos, items[i].name, strlen(items[i].name));
+		pos += strlen(items[i].name) + items[i].interval;
+	}
+	*pos++ = '\n';
+	*pos++ = '\0';
+}
+
+static char *hclge_dbg_get_func_id_str(char *buf, u8 id)
+{
+	if (id)
+		sprintf(buf, "vf%u", id - 1);
+	else
+		sprintf(buf, "pf");
+
+	return buf;
+}
+
 static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset)
 {
 	struct hclge_desc desc[HCLGE_GET_DFX_REG_TYPE_CNT];
@@ -1693,45 +1726,65 @@ static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev,
 	hclge_dbg_dump_qs_shaper_single(hdev, qsid);
 }
 
-static int hclge_dbg_dump_mac_list(struct hclge_dev *hdev, const char *cmd_buf,
-				   bool is_unicast)
+static const struct hclge_dbg_item mac_list_items[] = {
+	{ "FUNC_ID", 2 },
+	{ "MAC_ADDR", 12 },
+	{ "STATE", 2 },
+};
+
+static void hclge_dbg_dump_mac_list(struct hclge_dev *hdev, char *buf, int len,
+				    bool is_unicast)
 {
+	char data_str[ARRAY_SIZE(mac_list_items)][HCLGE_DBG_DATA_STR_LEN];
+	char content[HCLGE_DBG_INFO_LEN], str_id[HCLGE_DBG_ID_LEN];
+	char *result[ARRAY_SIZE(mac_list_items)];
 	struct hclge_mac_node *mac_node, *tmp;
 	struct hclge_vport *vport;
 	struct list_head *list;
 	u32 func_id;
-	int ret;
+	int pos = 0;
+	int i;
 
-	ret = kstrtouint(cmd_buf, 0, &func_id);
-	if (ret < 0) {
-		dev_err(&hdev->pdev->dev,
-			"dump mac list: bad command string, ret = %d\n", ret);
-		return -EINVAL;
+	for (i = 0; i < ARRAY_SIZE(mac_list_items); i++)
+		result[i] = &data_str[i][0];
+
+	pos += scnprintf(buf + pos, len - pos, "%s MAC_LIST:\n",
+			 is_unicast ? "UC" : "MC");
+	hclge_dbg_fill_content(content, sizeof(content), mac_list_items,
+			       NULL, ARRAY_SIZE(mac_list_items));
+	pos += scnprintf(buf + pos, len - pos, "%s", content);
+
+	for (func_id = 0; func_id < hdev->num_alloc_vport; func_id++) {
+		vport = &hdev->vport[func_id];
+		list = is_unicast ? &vport->uc_mac_list : &vport->mc_mac_list;
+		spin_lock_bh(&vport->mac_list_lock);
+		list_for_each_entry_safe(mac_node, tmp, list, node) {
+			i = 0;
+			result[i++] = hclge_dbg_get_func_id_str(str_id,
+								func_id);
+			sprintf(result[i++], "%pM", mac_node->mac_addr);
+			sprintf(result[i++], "%5s",
+				hclge_mac_state_str[mac_node->state]);
+			hclge_dbg_fill_content(content, sizeof(content),
+					       mac_list_items,
+					       (const char **)result,
+					       ARRAY_SIZE(mac_list_items));
+			pos += scnprintf(buf + pos, len - pos, "%s", content);
+		}
+		spin_unlock_bh(&vport->mac_list_lock);
 	}
+}
 
-	if (func_id >= hdev->num_alloc_vport) {
-		dev_err(&hdev->pdev->dev,
-			"function id(%u) is out of range(0-%u)\n", func_id,
-			hdev->num_alloc_vport - 1);
-		return -EINVAL;
-	}
+static int hclge_dbg_dump_mac_uc(struct hclge_dev *hdev, char *buf, int len)
+{
+	hclge_dbg_dump_mac_list(hdev, buf, len, true);
 
-	vport = &hdev->vport[func_id];
+	return 0;
+}
 
-	list = is_unicast ? &vport->uc_mac_list : &vport->mc_mac_list;
-
-	dev_info(&hdev->pdev->dev, "vport %u %s mac list:\n",
-		 func_id, is_unicast ? "uc" : "mc");
-	dev_info(&hdev->pdev->dev, "mac address              state\n");
-
-	spin_lock_bh(&vport->mac_list_lock);
-
-	list_for_each_entry_safe(mac_node, tmp, list, node) {
-		dev_info(&hdev->pdev->dev, "%pM         %d\n",
-			 mac_node->mac_addr, mac_node->state);
-	}
-
-	spin_unlock_bh(&vport->mac_list_lock);
+static int hclge_dbg_dump_mac_mc(struct hclge_dev *hdev, char *buf, int len)
+{
+	hclge_dbg_dump_mac_list(hdev, buf, len, false);
 
 	return 0;
 }
@@ -1781,14 +1834,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
 		hclge_dbg_dump_qs_shaper(hdev,
 					 &cmd_buf[sizeof("dump qs shaper")]);
-	} else if (strncmp(cmd_buf, "dump uc mac list", 16) == 0) {
-		hclge_dbg_dump_mac_list(hdev,
-					&cmd_buf[sizeof("dump uc mac list")],
-					true);
-	} else if (strncmp(cmd_buf, "dump mc mac list", 16) == 0) {
-		hclge_dbg_dump_mac_list(hdev,
-					&cmd_buf[sizeof("dump mc mac list")],
-					false);
 	} else if (strncmp(cmd_buf, DUMP_INTERRUPT,
 		   strlen(DUMP_INTERRUPT)) == 0) {
 		hclge_dbg_dump_interrupt(hdev);
@@ -1813,6 +1858,14 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_TM_QSET,
 		.dbg_dump = hclge_dbg_dump_tm_qset,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_MAC_UC,
+		.dbg_dump = hclge_dbg_dump_mac_uc,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_MAC_MC,
+		.dbg_dump = hclge_dbg_dump_mac_mc,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 0c144532e6ad..c5c18afb8b88 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -728,4 +728,13 @@ static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
 	{true, "RCB_CFG_TX_RING_EBDNUM"},
 };
 
+#define HCLGE_DBG_INFO_LEN			256
+#define HCLGE_DBG_ID_LEN			16
+#define HCLGE_DBG_ITEM_NAME_LEN			32
+#define HCLGE_DBG_DATA_STR_LEN			32
+struct hclge_dbg_item {
+	char name[HCLGE_DBG_ITEM_NAME_LEN];
+	u16 interval; /* blank numbers after the item */
+};
+
 #endif

From 8ddfd9c46ef49ed75abc745c1f90532a2abb8f01 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Fri, 14 May 2021 11:25:15 +0800
Subject: [PATCH 0136/3292] net: hns3: refactor dump mng tbl of debugfs

Currently, the debugfs command for mng tbl is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"mng_tbl" for it, and query it by command "cat mng_tbl",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat mng_tbl
entry  mac_addr          mask  ether  mask  vlan  mask  i_map ...
00     00:00:00:00:00:00 0     88cc   0     0000  1     0f    ...

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 78 +++++++++----------
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |  1 -
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ce3910f891ae..a2033cb19b3d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -259,6 +259,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_RX_BD,
 	HNAE3_DBG_CMD_MAC_UC,
 	HNAE3_DBG_CMD_MAC_MC,
+	HNAE3_DBG_CMD_MNG_TBL,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 5e0278604c12..4af997d6bd59 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -90,6 +90,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "mng_tbl",
+		.cmd = HNAE3_DBG_CMD_MNG_TBL,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -484,7 +491,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
-	dev_info(&h->pdev->dev, "dump mng tbl\n");
 	dev_info(&h->pdev->dev, "dump reset info\n");
 	dev_info(&h->pdev->dev, "dump m7 info\n");
 	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index ea0d43fa9149..613730f36148 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1212,24 +1212,19 @@ err_qos_cmd_send:
 		"dump qos buf cfg fail(0x%x), ret = %d\n", cmd, ret);
 }
 
-static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
+static int hclge_dbg_dump_mng_table(struct hclge_dev *hdev, char *buf, int len)
 {
 	struct hclge_mac_ethertype_idx_rd_cmd *req0;
-	char printf_buf[HCLGE_DBG_BUF_LEN];
 	struct hclge_desc desc;
 	u32 msg_egress_port;
+	int pos = 0;
 	int ret, i;
 
-	dev_info(&hdev->pdev->dev, "mng tab:\n");
-	memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
-	strncat(printf_buf,
-		"entry|mac_addr         |mask|ether|mask|vlan|mask",
-		HCLGE_DBG_BUF_LEN - 1);
-	strncat(printf_buf + strlen(printf_buf),
-		"|i_map|i_dir|e_type|pf_id|vf_id|q_id|drop\n",
-		HCLGE_DBG_BUF_LEN - strlen(printf_buf) - 1);
-
-	dev_info(&hdev->pdev->dev, "%s", printf_buf);
+	pos += scnprintf(buf + pos, len - pos,
+			 "entry  mac_addr          mask  ether  ");
+	pos += scnprintf(buf + pos, len - pos,
+			 "mask  vlan  mask  i_map  i_dir  e_type  ");
+	pos += scnprintf(buf + pos, len - pos, "pf_id  vf_id  q_id  drop\n");
 
 	for (i = 0; i < HCLGE_DBG_MNG_TBL_MAX; i++) {
 		hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_ETHERTYPE_IDX_RD,
@@ -1240,43 +1235,40 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"call hclge_cmd_send fail, ret = %d\n", ret);
-			return;
+				"failed to dump manage table, ret = %d\n", ret);
+			return ret;
 		}
 
 		if (!req0->resp_code)
 			continue;
 
-		memset(printf_buf, 0, HCLGE_DBG_BUF_LEN);
-		snprintf(printf_buf, HCLGE_DBG_BUF_LEN,
-			 "%02u   |%02x:%02x:%02x:%02x:%02x:%02x|",
-			 le16_to_cpu(req0->index),
-			 req0->mac_addr[0], req0->mac_addr[1],
-			 req0->mac_addr[2], req0->mac_addr[3],
-			 req0->mac_addr[4], req0->mac_addr[5]);
+		pos += scnprintf(buf + pos, len - pos, "%02u     %pM ",
+				 le16_to_cpu(req0->index), req0->mac_addr);
 
-		snprintf(printf_buf + strlen(printf_buf),
-			 HCLGE_DBG_BUF_LEN - strlen(printf_buf),
-			 "%x   |%04x |%x   |%04x|%x   |%02x   |%02x   |",
-			 !!(req0->flags & HCLGE_DBG_MNG_MAC_MASK_B),
-			 le16_to_cpu(req0->ethter_type),
-			 !!(req0->flags & HCLGE_DBG_MNG_ETHER_MASK_B),
-			 le16_to_cpu(req0->vlan_tag) & HCLGE_DBG_MNG_VLAN_TAG,
-			 !!(req0->flags & HCLGE_DBG_MNG_VLAN_MASK_B),
-			 req0->i_port_bitmap, req0->i_port_direction);
+		pos += scnprintf(buf + pos, len - pos,
+				 "%x     %04x   %x     %04x  ",
+				 !!(req0->flags & HCLGE_DBG_MNG_MAC_MASK_B),
+				 le16_to_cpu(req0->ethter_type),
+				 !!(req0->flags & HCLGE_DBG_MNG_ETHER_MASK_B),
+				 le16_to_cpu(req0->vlan_tag) &
+				 HCLGE_DBG_MNG_VLAN_TAG);
+
+		pos += scnprintf(buf + pos, len - pos,
+				 "%x     %02x     %02x     ",
+				 !!(req0->flags & HCLGE_DBG_MNG_VLAN_MASK_B),
+				 req0->i_port_bitmap, req0->i_port_direction);
 
 		msg_egress_port = le16_to_cpu(req0->egress_port);
-		snprintf(printf_buf + strlen(printf_buf),
-			 HCLGE_DBG_BUF_LEN - strlen(printf_buf),
-			 "%x     |%x    |%02x   |%04x|%x\n",
-			 !!(msg_egress_port & HCLGE_DBG_MNG_E_TYPE_B),
-			 msg_egress_port & HCLGE_DBG_MNG_PF_ID,
-			 (msg_egress_port >> 3) & HCLGE_DBG_MNG_VF_ID,
-			 le16_to_cpu(req0->egress_queue),
-			 !!(msg_egress_port & HCLGE_DBG_MNG_DROP_B));
-
-		dev_info(&hdev->pdev->dev, "%s", printf_buf);
+		pos += scnprintf(buf + pos, len - pos,
+				 "%x       %x      %02x     %04x  %x\n",
+				 !!(msg_egress_port & HCLGE_DBG_MNG_E_TYPE_B),
+				 msg_egress_port & HCLGE_DBG_MNG_PF_ID,
+				 (msg_egress_port >> 3) & HCLGE_DBG_MNG_VF_ID,
+				 le16_to_cpu(req0->egress_queue),
+				 !!(msg_egress_port & HCLGE_DBG_MNG_DROP_B));
 	}
+
+	return 0;
 }
 
 static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
@@ -1813,8 +1805,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_qos_pri_map(hdev);
 	} else if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
 		hclge_dbg_dump_qos_buf_cfg(hdev);
-	} else if (strncmp(cmd_buf, "dump mng tbl", 12) == 0) {
-		hclge_dbg_dump_mng_table(hdev);
 	} else if (strncmp(cmd_buf, DUMP_REG, strlen(DUMP_REG)) == 0) {
 		hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
 	} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
@@ -1866,6 +1856,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_MAC_MC,
 		.dbg_dump = hclge_dbg_dump_mac_mc,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_MNG_TBL,
+		.dbg_dump = hclge_dbg_dump_mng_table,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index c5c18afb8b88..bf6a0ff66047 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -7,7 +7,6 @@
 #include <linux/etherdevice.h>
 #include "hclge_cmd.h"
 
-#define HCLGE_DBG_BUF_LEN	   256
 #define HCLGE_DBG_MNG_TBL_MAX	   64
 
 #define HCLGE_DBG_MNG_VLAN_MASK_B  BIT(0)

From d658ff34dd7ff9ccf13c59a7c464b55ca122977d Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Fri, 14 May 2021 11:25:16 +0800
Subject: [PATCH 0137/3292] net: hns3: refactor dump loopback of debugfs

Currently, the debugfs command for loopback is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"loopback" for it, and query it by command "cat loopback",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat loopback
mac id: 0
app loopback: off
serdes serial loopback: off
serdes parallel loopback: off

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 +++-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 44 +++++++++++--------
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a2033cb19b3d..0a78ce221336 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -260,6 +260,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_MAC_UC,
 	HNAE3_DBG_CMD_MAC_MC,
 	HNAE3_DBG_CMD_MNG_TBL,
+	HNAE3_DBG_CMD_LOOPBACK,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 4af997d6bd59..d2e3965c66d4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -97,6 +97,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "loopback",
+		.cmd = HNAE3_DBG_CMD_LOOPBACK,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -495,7 +502,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump m7 info\n");
 	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
-	dev_info(&h->pdev->dev, "dump loopback\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
 	dev_info(&h->pdev->dev, "dump intr\n");
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 613730f36148..7c02973ae218 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -8,6 +8,7 @@
 #include "hclge_tm.h"
 #include "hnae3.h"
 
+static const char * const state_str[] = { "off", "on" };
 static const char * const hclge_mac_state_str[] = {
 	"TO_ADD", "TO_DEL", "ACTIVE"
 };
@@ -1566,32 +1567,34 @@ static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev,
 	}
 }
 
-static void hclge_dbg_dump_loopback(struct hclge_dev *hdev)
+static int hclge_dbg_dump_loopback(struct hclge_dev *hdev, char *buf, int len)
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	struct hclge_config_mac_mode_cmd *req_app;
 	struct hclge_common_lb_cmd *req_common;
 	struct hclge_desc desc;
 	u8 loopback_en;
+	int pos = 0;
 	int ret;
 
 	req_app = (struct hclge_config_mac_mode_cmd *)desc.data;
 	req_common = (struct hclge_common_lb_cmd *)desc.data;
 
-	dev_info(&hdev->pdev->dev, "mac id: %u\n", hdev->hw.mac.mac_id);
+	pos += scnprintf(buf + pos, len - pos, "mac id: %u\n",
+			 hdev->hw.mac.mac_id);
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"failed to dump app loopback status, ret = %d\n", ret);
-		return;
+		return ret;
 	}
 
 	loopback_en = hnae3_get_bit(le32_to_cpu(req_app->txrx_pad_fcs_loop_en),
 				    HCLGE_MAC_APP_LP_B);
-	dev_info(&hdev->pdev->dev, "app loopback: %s\n",
-		 loopback_en ? "on" : "off");
+	pos += scnprintf(buf + pos, len - pos, "app loopback: %s\n",
+			 state_str[loopback_en]);
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1599,27 +1602,30 @@ static void hclge_dbg_dump_loopback(struct hclge_dev *hdev)
 		dev_err(&hdev->pdev->dev,
 			"failed to dump common loopback status, ret = %d\n",
 			ret);
-		return;
+		return ret;
 	}
 
 	loopback_en = req_common->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
-	dev_info(&hdev->pdev->dev, "serdes serial loopback: %s\n",
-		 loopback_en ? "on" : "off");
+	pos += scnprintf(buf + pos, len - pos, "serdes serial loopback: %s\n",
+			 state_str[loopback_en]);
 
 	loopback_en = req_common->enable &
-			HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B;
-	dev_info(&hdev->pdev->dev, "serdes parallel loopback: %s\n",
-		 loopback_en ? "on" : "off");
+			HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B ? 1 : 0;
+	pos += scnprintf(buf + pos, len - pos, "serdes parallel loopback: %s\n",
+			 state_str[loopback_en]);
 
 	if (phydev) {
-		dev_info(&hdev->pdev->dev, "phy loopback: %s\n",
-			 phydev->loopback_enabled ? "on" : "off");
+		loopback_en = phydev->loopback_enabled;
+		pos += scnprintf(buf + pos, len - pos, "phy loopback: %s\n",
+				 state_str[loopback_en]);
 	} else if (hnae3_dev_phy_imp_supported(hdev)) {
 		loopback_en = req_common->enable &
 			      HCLGE_CMD_GE_PHY_INNER_LOOP_B;
-		dev_info(&hdev->pdev->dev, "phy loopback: %s\n",
-			 loopback_en ? "on" : "off");
+		pos += scnprintf(buf + pos, len - pos, "phy loopback: %s\n",
+				 state_str[loopback_en]);
 	}
+
+	return 0;
 }
 
 /* hclge_dbg_dump_mac_tnl_status: print message about mac tnl interrupt
@@ -1785,7 +1791,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
 #define DUMP_REG	"dump reg"
 #define DUMP_TM_MAP	"dump tm map"
-#define DUMP_LOOPBACK	"dump loopback"
 #define DUMP_INTERRUPT	"dump intr"
 
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -1818,9 +1823,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 					  &cmd_buf[sizeof("dump ncl_config")]);
 	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
 		hclge_dbg_dump_mac_tnl_status(hdev);
-	} else if (strncmp(cmd_buf, DUMP_LOOPBACK,
-		   strlen(DUMP_LOOPBACK)) == 0) {
-		hclge_dbg_dump_loopback(hdev);
 	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
 		hclge_dbg_dump_qs_shaper(hdev,
 					 &cmd_buf[sizeof("dump qs shaper")]);
@@ -1860,6 +1862,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_MNG_TBL,
 		.dbg_dump = hclge_dbg_dump_mng_table,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_LOOPBACK,
+		.dbg_dump = hclge_dbg_dump_loopback,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,

From 9149ca0f115acf44f8473c0a1e464e716c4ca83b Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Fri, 14 May 2021 11:25:17 +0800
Subject: [PATCH 0138/3292] net: hns3: refactor dump intr of debugfs

Currently, the debugfs command for intr is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"interrupt_info" for it, and query it by command "cat interrupt_info",
return the result to userspace, rather than record in dmesg.

The display style is below:
$cat interrupt_info
num_nic_msi: 65
num_roce_msi: 65
num_msi_used: 2
num_msi_left: 128

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 +++++-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 26 ++++++++++++-------
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 0a78ce221336..aea6ddd2b588 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -261,6 +261,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_MAC_MC,
 	HNAE3_DBG_CMD_MNG_TBL,
 	HNAE3_DBG_CMD_LOOPBACK,
+	HNAE3_DBG_CMD_INTERRUPT_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index d2e3965c66d4..0eb5eda10ee0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -104,6 +104,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "interrupt_info",
+		.cmd = HNAE3_DBG_CMD_INTERRUPT_INFO,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -503,7 +510,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
-	dev_info(&h->pdev->dev, "dump intr\n");
 
 	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
 	strncat(printf_buf, "dump reg [[bios common] [ssu <port_id>]",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 7c02973ae218..c3d84a4bd734 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1430,12 +1430,20 @@ static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev)
 		 hdev->serv_processed_cnt);
 }
 
-static void hclge_dbg_dump_interrupt(struct hclge_dev *hdev)
+static int hclge_dbg_dump_interrupt(struct hclge_dev *hdev, char *buf, int len)
 {
-	dev_info(&hdev->pdev->dev, "num_nic_msi: %u\n", hdev->num_nic_msi);
-	dev_info(&hdev->pdev->dev, "num_roce_msi: %u\n", hdev->num_roce_msi);
-	dev_info(&hdev->pdev->dev, "num_msi_used: %u\n", hdev->num_msi_used);
-	dev_info(&hdev->pdev->dev, "num_msi_left: %u\n", hdev->num_msi_left);
+	int pos = 0;
+
+	pos += scnprintf(buf + pos, len - pos, "num_nic_msi: %u\n",
+			 hdev->num_nic_msi);
+	pos += scnprintf(buf + pos, len - pos, "num_roce_msi: %u\n",
+			 hdev->num_roce_msi);
+	pos += scnprintf(buf + pos, len - pos, "num_msi_used: %u\n",
+			 hdev->num_msi_used);
+	pos += scnprintf(buf + pos, len - pos, "num_msi_left: %u\n",
+			 hdev->num_msi_left);
+
+	return 0;
 }
 
 static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
@@ -1791,7 +1799,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
 #define DUMP_REG	"dump reg"
 #define DUMP_TM_MAP	"dump tm map"
-#define DUMP_INTERRUPT	"dump intr"
 
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
@@ -1826,9 +1833,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
 		hclge_dbg_dump_qs_shaper(hdev,
 					 &cmd_buf[sizeof("dump qs shaper")]);
-	} else if (strncmp(cmd_buf, DUMP_INTERRUPT,
-		   strlen(DUMP_INTERRUPT)) == 0) {
-		hclge_dbg_dump_interrupt(hdev);
 	} else {
 		dev_info(&hdev->pdev->dev, "unknown command\n");
 		return -EINVAL;
@@ -1866,6 +1870,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_LOOPBACK,
 		.dbg_dump = hclge_dbg_dump_loopback,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_INTERRUPT_INFO,
+		.dbg_dump = hclge_dbg_dump_interrupt,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,

From 1a7ff8280b16fe8a085e24aca9008724700d6878 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Fri, 14 May 2021 11:25:18 +0800
Subject: [PATCH 0139/3292] net: hns3: refactor dump reset info of debugfs

Currently, the debugfs command for reset info is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"reset_info" for it, and query it by command "cat reset_info",
return the result to userspace, rather than record in dmesg.

The display style is below:
$cat reset_info
PF reset count: 0
FLR reset count: 0
GLOBAL reset count: 0
IMP reset count: 0
reset done count: 0
HW reset done count: 0
reset count: 0
reset fail count: 0
vector0 interrupt enable status: 0x1
reset interrupt source: 0x0
reset interrupt status: 0x0
RAS interrupt status:0x0
hardware reset status: 0x0
handshake status: 0x80
function reset status: 0x0

Change to the "hclge_show_rst_info" in the "hclge_reset_err_handle",
when the reset fails, display reset info immediately.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 76 +++++++++++--------
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 17 ++++-
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  4 +-
 5 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index aea6ddd2b588..455174cf2e5c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -262,6 +262,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_MNG_TBL,
 	HNAE3_DBG_CMD_LOOPBACK,
 	HNAE3_DBG_CMD_INTERRUPT_INFO,
+	HNAE3_DBG_CMD_RESET_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 0eb5eda10ee0..49c87c8ac37f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -111,6 +111,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "reset_info",
+		.cmd = HNAE3_DBG_CMD_RESET_INFO,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -505,7 +512,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
-	dev_info(&h->pdev->dev, "dump reset info\n");
 	dev_info(&h->pdev->dev, "dump m7 info\n");
 	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index c3d84a4bd734..736746b6ff75 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -4,6 +4,7 @@
 #include <linux/device.h>
 
 #include "hclge_debugfs.h"
+#include "hclge_err.h"
 #include "hclge_main.h"
 #include "hclge_tm.h"
 #include "hnae3.h"
@@ -1389,37 +1390,46 @@ static void hclge_dbg_fd_tcam(struct hclge_dev *hdev)
 	kfree(rule_locs);
 }
 
-void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
+int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len)
 {
-	dev_info(&hdev->pdev->dev, "PF reset count: %u\n",
-		 hdev->rst_stats.pf_rst_cnt);
-	dev_info(&hdev->pdev->dev, "FLR reset count: %u\n",
-		 hdev->rst_stats.flr_rst_cnt);
-	dev_info(&hdev->pdev->dev, "GLOBAL reset count: %u\n",
-		 hdev->rst_stats.global_rst_cnt);
-	dev_info(&hdev->pdev->dev, "IMP reset count: %u\n",
-		 hdev->rst_stats.imp_rst_cnt);
-	dev_info(&hdev->pdev->dev, "reset done count: %u\n",
-		 hdev->rst_stats.reset_done_cnt);
-	dev_info(&hdev->pdev->dev, "HW reset done count: %u\n",
-		 hdev->rst_stats.hw_reset_done_cnt);
-	dev_info(&hdev->pdev->dev, "reset count: %u\n",
-		 hdev->rst_stats.reset_cnt);
-	dev_info(&hdev->pdev->dev, "reset fail count: %u\n",
-		 hdev->rst_stats.reset_fail_cnt);
-	dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n",
-		 hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_REG_BASE));
-	dev_info(&hdev->pdev->dev, "reset interrupt source: 0x%x\n",
-		 hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG));
-	dev_info(&hdev->pdev->dev, "reset interrupt status: 0x%x\n",
-		 hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS));
-	dev_info(&hdev->pdev->dev, "hardware reset status: 0x%x\n",
-		 hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG));
-	dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n",
-		 hclge_read_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG));
-	dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
-		 hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING));
-	dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
+	int pos = 0;
+
+	pos += scnprintf(buf + pos, len - pos, "PF reset count: %u\n",
+			 hdev->rst_stats.pf_rst_cnt);
+	pos += scnprintf(buf + pos, len - pos, "FLR reset count: %u\n",
+			 hdev->rst_stats.flr_rst_cnt);
+	pos += scnprintf(buf + pos, len - pos, "GLOBAL reset count: %u\n",
+			 hdev->rst_stats.global_rst_cnt);
+	pos += scnprintf(buf + pos, len - pos, "IMP reset count: %u\n",
+			 hdev->rst_stats.imp_rst_cnt);
+	pos += scnprintf(buf + pos, len - pos, "reset done count: %u\n",
+			 hdev->rst_stats.reset_done_cnt);
+	pos += scnprintf(buf + pos, len - pos, "HW reset done count: %u\n",
+			 hdev->rst_stats.hw_reset_done_cnt);
+	pos += scnprintf(buf + pos, len - pos, "reset count: %u\n",
+			 hdev->rst_stats.reset_cnt);
+	pos += scnprintf(buf + pos, len - pos, "reset fail count: %u\n",
+			 hdev->rst_stats.reset_fail_cnt);
+	pos += scnprintf(buf + pos, len - pos,
+			 "vector0 interrupt enable status: 0x%x\n",
+			 hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_REG_BASE));
+	pos += scnprintf(buf + pos, len - pos, "reset interrupt source: 0x%x\n",
+			 hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG));
+	pos += scnprintf(buf + pos, len - pos, "reset interrupt status: 0x%x\n",
+			 hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS));
+	pos += scnprintf(buf + pos, len - pos, "RAS interrupt status: 0x%x\n",
+			 hclge_read_dev(&hdev->hw,
+					HCLGE_RAS_PF_OTHER_INT_STS_REG));
+	pos += scnprintf(buf + pos, len - pos, "hardware reset status: 0x%x\n",
+			 hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG));
+	pos += scnprintf(buf + pos, len - pos, "handshake status: 0x%x\n",
+			 hclge_read_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG));
+	pos += scnprintf(buf + pos, len - pos, "function reset status: 0x%x\n",
+			 hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING));
+	pos += scnprintf(buf + pos, len - pos, "hdev state: 0x%lx\n",
+			 hdev->state);
+
+	return 0;
 }
 
 static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev)
@@ -1819,8 +1829,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_qos_buf_cfg(hdev);
 	} else if (strncmp(cmd_buf, DUMP_REG, strlen(DUMP_REG)) == 0) {
 		hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
-	} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
-		hclge_dbg_dump_rst_info(hdev);
 	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
 	} else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
@@ -1874,6 +1882,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_INTERRUPT_INFO,
 		.dbg_dump = hclge_dbg_dump_interrupt,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_RESET_INFO,
+		.dbg_dump = hclge_dbg_dump_rst_info,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 55b0453d3948..d4d3f0b247af 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3936,6 +3936,21 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 	return ret;
 }
 
+static void hclge_show_rst_info(struct hclge_dev *hdev)
+{
+	char *buf;
+
+	buf = kzalloc(HCLGE_DBG_RESET_INFO_LEN, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	hclge_dbg_dump_rst_info(hdev, buf, HCLGE_DBG_RESET_INFO_LEN);
+
+	dev_info(&hdev->pdev->dev, "dump reset info:\n%s", buf);
+
+	kfree(buf);
+}
+
 static bool hclge_reset_err_handle(struct hclge_dev *hdev)
 {
 #define MAX_RESET_FAIL_CNT 5
@@ -3966,7 +3981,7 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
 
 	dev_err(&hdev->pdev->dev, "Reset fail!\n");
 
-	hclge_dbg_dump_rst_info(hdev);
+	hclge_show_rst_info(hdev);
 
 	set_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9e17c02de403..8bf451ef0b05 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -148,6 +148,8 @@
 
 #define HCLGE_MAX_QSET_NUM		1024
 
+#define HCLGE_DBG_RESET_INFO_LEN	1024
+
 enum HLCGE_PORT_TYPE {
 	HOST_PORT,
 	NETWORK_PORT
@@ -1089,6 +1091,6 @@ int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev,
 void hclge_report_hw_error(struct hclge_dev *hdev,
 			   enum hnae3_hw_error_type type);
 void hclge_inform_vf_promisc_info(struct hclge_vport *vport);
-void hclge_dbg_dump_rst_info(struct hclge_dev *hdev);
+int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len);
 int hclge_push_vf_link_status(struct hclge_vport *vport);
 #endif

From 0b198b0d80ea091f2a917536a097adefb2eaa52f Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Fri, 14 May 2021 11:25:19 +0800
Subject: [PATCH 0140/3292] net: hns3: refactor dump m7 info of debugfs

Currently, the debugfs command for m7 info is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"imp_info" for it, and query it by command "cat imp_info",
return the result to userspace, rather than record in dmesg.

The display style is below:
$cat imp_info
offset | data
0x0000 | 0x00000000  0x00000000
0x0008 | 0x00000000  0x00000000
0x0010 | 0x00000000  0x00000001
0x0018 | 0x00000000  0x00000000
0x0020 | 0x00000000  0x00000000
0x0028 | 0x00000000  0x00000000
0x0030 | 0x00000000  0x00000000

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 +-
 .../hisilicon/hns3/hns3pf/hclge_cmd.c         |  2 +-
 .../hisilicon/hns3/hns3pf/hclge_cmd.h         | 10 +--
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 81 ++++++++++++-------
 5 files changed, 64 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 455174cf2e5c..7064fae8a1b7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -263,6 +263,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_LOOPBACK,
 	HNAE3_DBG_CMD_INTERRUPT_INFO,
 	HNAE3_DBG_CMD_RESET_INFO,
+	HNAE3_DBG_CMD_IMP_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 49c87c8ac37f..d91c0051b78d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -118,6 +118,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "imp_info",
+		.cmd = HNAE3_DBG_CMD_IMP_INFO,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -512,7 +519,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
-	dev_info(&h->pdev->dev, "dump m7 info\n");
 	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 0df9ca311e6c..6aed30cc22f2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -471,7 +471,7 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev)
 	struct hclge_desc desc;
 	u32 compat = 0;
 
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_COMPAT_CFG, false);
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_IMP_COMPAT_CFG, false);
 
 	req = (struct hclge_firmware_compat_cmd *)desc.data;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index c6cd273363d5..12558aa0fe0a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -267,10 +267,10 @@ enum hclge_opcode_type {
 	/* NCL config command */
 	HCLGE_OPC_QUERY_NCL_CONFIG	= 0x7011,
 
-	/* M7 stats command */
-	HCLGE_OPC_M7_STATS_BD		= 0x7012,
-	HCLGE_OPC_M7_STATS_INFO		= 0x7013,
-	HCLGE_OPC_M7_COMPAT_CFG		= 0x701A,
+	/* IMP stats command */
+	HCLGE_OPC_IMP_STATS_BD		= 0x7012,
+	HCLGE_OPC_IMP_STATS_INFO		= 0x7013,
+	HCLGE_OPC_IMP_COMPAT_CFG		= 0x701A,
 
 	/* SFP command */
 	HCLGE_OPC_GET_SFP_EEPROM	= 0x7100,
@@ -1101,7 +1101,7 @@ struct hclge_fd_user_def_cfg_cmd {
 	u8 rsv[12];
 };
 
-struct hclge_get_m7_bd_cmd {
+struct hclge_get_imp_bd_cmd {
 	__le32 bd_num;
 	u8 rsv[20];
 };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 736746b6ff75..d0634ba3ba1c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1456,56 +1456,73 @@ static int hclge_dbg_dump_interrupt(struct hclge_dev *hdev, char *buf, int len)
 	return 0;
 }
 
-static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
+static void hclge_dbg_imp_info_data_print(struct hclge_desc *desc_src,
+					  char *buf, int len, u32 bd_num)
 {
-	struct hclge_desc *desc_src, *desc_tmp;
-	struct hclge_get_m7_bd_cmd *req;
+#define HCLGE_DBG_IMP_INFO_PRINT_OFFSET 0x2
+
+	struct hclge_desc *desc_index = desc_src;
+	u32 offset = 0;
+	int pos = 0;
+	u32 i, j;
+
+	pos += scnprintf(buf + pos, len - pos, "offset | data\n");
+
+	for (i = 0; i < bd_num; i++) {
+		j = 0;
+		while (j < HCLGE_DESC_DATA_LEN - 1) {
+			pos += scnprintf(buf + pos, len - pos, "0x%04x | ",
+					 offset);
+			pos += scnprintf(buf + pos, len - pos, "0x%08x  ",
+					 le32_to_cpu(desc_index->data[j++]));
+			pos += scnprintf(buf + pos, len - pos, "0x%08x\n",
+					 le32_to_cpu(desc_index->data[j++]));
+			offset += sizeof(u32) * HCLGE_DBG_IMP_INFO_PRINT_OFFSET;
+		}
+		desc_index++;
+	}
+}
+
+static int
+hclge_dbg_get_imp_stats_info(struct hclge_dev *hdev, char *buf, int len)
+{
+	struct hclge_get_imp_bd_cmd *req;
+	struct hclge_desc *desc_src;
 	struct hclge_desc desc;
-	u32 bd_num, buf_len;
-	int ret, i;
+	u32 bd_num;
+	int ret;
 
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_STATS_BD, true);
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_IMP_STATS_BD, true);
 
-	req = (struct hclge_get_m7_bd_cmd *)desc.data;
+	req = (struct hclge_get_imp_bd_cmd *)desc.data;
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"get firmware statistics bd number failed, ret = %d\n",
+			"failed to get imp statistics bd number, ret = %d\n",
 			ret);
-		return;
+		return ret;
 	}
 
 	bd_num = le32_to_cpu(req->bd_num);
 
-	buf_len	 = sizeof(struct hclge_desc) * bd_num;
-	desc_src = kzalloc(buf_len, GFP_KERNEL);
+	desc_src = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
 	if (!desc_src)
-		return;
+		return -ENOMEM;
 
-	desc_tmp = desc_src;
-	ret  = hclge_dbg_cmd_send(hdev, desc_tmp, 0, bd_num,
-				  HCLGE_OPC_M7_STATS_INFO);
+	ret  = hclge_dbg_cmd_send(hdev, desc_src, 0, bd_num,
+				  HCLGE_OPC_IMP_STATS_INFO);
 	if (ret) {
 		kfree(desc_src);
 		dev_err(&hdev->pdev->dev,
-			"get firmware statistics failed, ret = %d\n", ret);
-		return;
+			"failed to get imp statistics, ret = %d\n", ret);
+		return ret;
 	}
 
-	for (i = 0; i < bd_num; i++) {
-		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
-			 le32_to_cpu(desc_tmp->data[0]),
-			 le32_to_cpu(desc_tmp->data[1]),
-			 le32_to_cpu(desc_tmp->data[2]));
-		dev_info(&hdev->pdev->dev, "0x%08x  0x%08x  0x%08x\n",
-			 le32_to_cpu(desc_tmp->data[3]),
-			 le32_to_cpu(desc_tmp->data[4]),
-			 le32_to_cpu(desc_tmp->data[5]));
-
-		desc_tmp++;
-	}
+	hclge_dbg_imp_info_data_print(desc_src, buf, len, bd_num);
 
 	kfree(desc_src);
+
+	return 0;
 }
 
 #define HCLGE_CMD_NCL_CONFIG_BD_NUM	5
@@ -1831,8 +1848,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
 	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
-	} else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
-		hclge_dbg_get_m7_stats_info(hdev);
 	} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
 		hclge_dbg_dump_ncl_config(hdev,
 					  &cmd_buf[sizeof("dump ncl_config")]);
@@ -1886,6 +1901,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_RESET_INFO,
 		.dbg_dump = hclge_dbg_dump_rst_info,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_IMP_INFO,
+		.dbg_dump = hclge_dbg_get_imp_stats_info,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,

From e76e6886646b88b258f9deba92d49080f26028ae Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Fri, 14 May 2021 11:25:20 +0800
Subject: [PATCH 0141/3292] net: hns3: refactor dump ncl config of debugfs

Currently, the debugfs command for ncl config is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"ncl_config" for it, and query it by command "cat ncl_config",
return the result to userspace, rather than record in dmesg.

The display style is below:
$cat ncl_config
offset | data
0x0000 | 0x00000028
0x0004 | 0x00000400
0x0008 | 0x08040201
0x000c | 0x00000000
0x0010 | 0x00040004
0x0014 | 0x00040004
0x0018 | 0x00000000
0x001c | 0x00000000
0x0020 | 0x00040004

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 +-
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |  1 +
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 78 ++++++++-----------
 4 files changed, 40 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 7064fae8a1b7..d1cdb7494b9e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -264,6 +264,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_INTERRUPT_INFO,
 	HNAE3_DBG_CMD_RESET_INFO,
 	HNAE3_DBG_CMD_IMP_INFO,
+	HNAE3_DBG_CMD_NCL_CONFIG,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index d91c0051b78d..ba4ee8ca7e71 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -125,6 +125,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "ncl_config",
+		.cmd = HNAE3_DBG_CMD_NCL_CONFIG,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN_128KB,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -519,7 +526,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
-	dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 3d2ee3642459..a7af9277ae69 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -5,6 +5,7 @@
 #define __HNS3_DEBUGFS_H
 
 #define HNS3_DBG_READ_LEN	65536
+#define HNS3_DBG_READ_LEN_128KB	0x20000
 #define HNS3_DBG_READ_LEN_4MB	0x400000
 #define HNS3_DBG_WRITE_LEN	1024
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index d0634ba3ba1c..8a92ab448a19 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1526,80 +1526,63 @@ hclge_dbg_get_imp_stats_info(struct hclge_dev *hdev, char *buf, int len)
 }
 
 #define HCLGE_CMD_NCL_CONFIG_BD_NUM	5
+#define HCLGE_MAX_NCL_CONFIG_LENGTH	16384
 
-static void hclge_ncl_config_data_print(struct hclge_dev *hdev,
-					struct hclge_desc *desc, int *offset,
-					int *length)
+static void hclge_ncl_config_data_print(struct hclge_desc *desc, int *index,
+					char *buf, int *len, int *pos)
 {
 #define HCLGE_CMD_DATA_NUM		6
 
-	int i;
-	int j;
+	int offset = HCLGE_MAX_NCL_CONFIG_LENGTH - *index;
+	int i, j;
 
 	for (i = 0; i < HCLGE_CMD_NCL_CONFIG_BD_NUM; i++) {
 		for (j = 0; j < HCLGE_CMD_DATA_NUM; j++) {
 			if (i == 0 && j == 0)
 				continue;
 
-			dev_info(&hdev->pdev->dev, "0x%04x | 0x%08x\n",
-				 *offset,
-				 le32_to_cpu(desc[i].data[j]));
-			*offset += sizeof(u32);
-			*length -= sizeof(u32);
-			if (*length <= 0)
+			*pos += scnprintf(buf + *pos, *len - *pos,
+					  "0x%04x | 0x%08x\n", offset,
+					  le32_to_cpu(desc[i].data[j]));
+
+			offset += sizeof(u32);
+			*index -= sizeof(u32);
+
+			if (*index <= 0)
 				return;
 		}
 	}
 }
 
-/* hclge_dbg_dump_ncl_config: print specified range of NCL_CONFIG file
- * @hdev: pointer to struct hclge_dev
- * @cmd_buf: string that contains offset and length
- */
-static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev,
-				      const char *cmd_buf)
+static int
+hclge_dbg_dump_ncl_config(struct hclge_dev *hdev, char *buf, int len)
 {
-#define HCLGE_MAX_NCL_CONFIG_OFFSET	4096
 #define HCLGE_NCL_CONFIG_LENGTH_IN_EACH_CMD	(20 + 24 * 4)
-#define HCLGE_NCL_CONFIG_PARAM_NUM	2
 
 	struct hclge_desc desc[HCLGE_CMD_NCL_CONFIG_BD_NUM];
 	int bd_num = HCLGE_CMD_NCL_CONFIG_BD_NUM;
-	int offset;
-	int length;
-	int data0;
+	int index = HCLGE_MAX_NCL_CONFIG_LENGTH;
+	int pos = 0;
+	u32 data0;
 	int ret;
 
-	ret = sscanf(cmd_buf, "%x %x", &offset, &length);
-	if (ret != HCLGE_NCL_CONFIG_PARAM_NUM) {
-		dev_err(&hdev->pdev->dev,
-			"Too few parameters, num = %d.\n", ret);
-		return;
-	}
+	pos += scnprintf(buf + pos, len - pos, "offset | data\n");
 
-	if (offset < 0 || offset >= HCLGE_MAX_NCL_CONFIG_OFFSET ||
-	    length <= 0 || length > HCLGE_MAX_NCL_CONFIG_OFFSET - offset) {
-		dev_err(&hdev->pdev->dev,
-			"Invalid input, offset = %d, length = %d.\n",
-			offset, length);
-		return;
-	}
-
-	dev_info(&hdev->pdev->dev, "offset |    data\n");
-
-	while (length > 0) {
-		data0 = offset;
-		if (length >= HCLGE_NCL_CONFIG_LENGTH_IN_EACH_CMD)
+	while (index > 0) {
+		data0 = HCLGE_MAX_NCL_CONFIG_LENGTH - index;
+		if (index >= HCLGE_NCL_CONFIG_LENGTH_IN_EACH_CMD)
 			data0 |= HCLGE_NCL_CONFIG_LENGTH_IN_EACH_CMD << 16;
 		else
-			data0 |= length << 16;
+			data0 |= (u32)index << 16;
 		ret = hclge_dbg_cmd_send(hdev, desc, data0, bd_num,
 					 HCLGE_OPC_QUERY_NCL_CONFIG);
 		if (ret)
-			return;
+			return ret;
 
-		hclge_ncl_config_data_print(hdev, desc, &offset, &length);
+		hclge_ncl_config_data_print(desc, &index, buf, &len, &pos);
 	}
+
+	return 0;
 }
 
 static int hclge_dbg_dump_loopback(struct hclge_dev *hdev, char *buf, int len)
@@ -1848,9 +1831,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
 	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
-	} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
-		hclge_dbg_dump_ncl_config(hdev,
-					  &cmd_buf[sizeof("dump ncl_config")]);
 	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
 		hclge_dbg_dump_mac_tnl_status(hdev);
 	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
@@ -1905,6 +1885,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_IMP_INFO,
 		.dbg_dump = hclge_dbg_get_imp_stats_info,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_NCL_CONFIG,
+		.dbg_dump = hclge_dbg_dump_ncl_config,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,

From 699e803e9a4d1c8676c77bfa7b085a1f73083e64 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 14 May 2021 15:31:39 +0800
Subject: [PATCH 0142/3292] net: hns: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...
This patch fix the comments style issue.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index f4cf569a2599..de6f051f5b0b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -914,8 +914,7 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
 		}
 	} else if (is_acpi_node(mac_cb->fw_port)) {
 		ret = hns_mac_register_phy(mac_cb);
-		/*
-		 * Mac can work well if there is phy or not.If the port don't
+		/* Mac can work well if there is phy or not.If the port don't
 		 * connect with phy, the return value will be ignored. Only
 		 * when there is phy but can't find mdio bus, the return value
 		 * will be handled.

From 510fe8e70b0b659d8a7f3fda3fc61390e9cff7f7 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 14 May 2021 15:31:40 +0800
Subject: [PATCH 0143/3292] net: hns: fix some code style issue about space

Spaces at the start of a line will cause checkpatch warning.
This patch replaces the spaces by tab at the start of a line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns/hns_dsaf_misc.c    | 70 +++++++++----------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 325e81d30cfd..4f7684ab9409 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -56,31 +56,31 @@ static u32 dsaf_read_sub(struct dsaf_device *dsaf_dev, u32 reg)
 }
 
 static void hns_dsaf_acpi_ledctrl_by_port(struct hns_mac_cb *mac_cb, u8 op_type,
-                                      u32 link, u32 port, u32 act)
+					  u32 link, u32 port, u32 act)
 {
-       union acpi_object *obj;
-       union acpi_object obj_args[3], argv4;
+	union acpi_object *obj;
+	union acpi_object obj_args[3], argv4;
 
-       obj_args[0].integer.type = ACPI_TYPE_INTEGER;
-       obj_args[0].integer.value = link;
-       obj_args[1].integer.type = ACPI_TYPE_INTEGER;
-       obj_args[1].integer.value = port;
-       obj_args[2].integer.type = ACPI_TYPE_INTEGER;
-       obj_args[2].integer.value = act;
+	obj_args[0].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[0].integer.value = link;
+	obj_args[1].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[1].integer.value = port;
+	obj_args[2].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[2].integer.value = act;
 
-       argv4.type = ACPI_TYPE_PACKAGE;
-       argv4.package.count = 3;
-       argv4.package.elements = obj_args;
+	argv4.type = ACPI_TYPE_PACKAGE;
+	argv4.package.count = 3;
+	argv4.package.elements = obj_args;
 
-       obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-                               &hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
-       if (!obj) {
-               dev_warn(mac_cb->dev, "ledctrl fail, link:%d port:%d act:%d!\n",
-                        link, port, act);
-               return;
-       }
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
+				&hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
+	if (!obj) {
+		dev_warn(mac_cb->dev, "ledctrl fail, link:%d port:%d act:%d!\n",
+			 link, port, act);
+		return;
+	}
 
-       ACPI_FREE(obj);
+	ACPI_FREE(obj);
 }
 
 static void hns_dsaf_acpi_locate_ledctrl_by_port(struct hns_mac_cb *mac_cb,
@@ -151,15 +151,15 @@ static void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
 }
 
 static void hns_cpld_set_led_acpi(struct hns_mac_cb *mac_cb, int link_status,
-                            u16 speed, int data)
+				  u16 speed, int data)
 {
-       if (!mac_cb) {
-               pr_err("cpld_led_set mac_cb is null!\n");
-               return;
-       }
+	if (!mac_cb) {
+		pr_err("cpld_led_set mac_cb is null!\n");
+		return;
+	}
 
-       hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC,
-               link_status, mac_cb->mac_id, data);
+	hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC,
+				      link_status, mac_cb->mac_id, data);
 }
 
 static void cpld_led_reset(struct hns_mac_cb *mac_cb)
@@ -174,16 +174,16 @@ static void cpld_led_reset(struct hns_mac_cb *mac_cb)
 
 static void cpld_led_reset_acpi(struct hns_mac_cb *mac_cb)
 {
-       if (!mac_cb) {
-               pr_err("cpld_led_reset mac_cb is null!\n");
-               return;
-       }
+	if (!mac_cb) {
+		pr_err("cpld_led_reset mac_cb is null!\n");
+		return;
+	}
 
-       if (mac_cb->media_type != HNAE_MEDIA_TYPE_FIBER)
-                return;
+	if (mac_cb->media_type != HNAE_MEDIA_TYPE_FIBER)
+		return;
 
-       hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC,
-               0, mac_cb->mac_id, 0);
+	hns_dsaf_acpi_ledctrl_by_port(mac_cb, HNS_OP_LED_SET_FUNC,
+				      0, mac_cb->mac_id, 0);
 }
 
 static int cpld_set_led_id(struct hns_mac_cb *mac_cb,

From 5caab55a29792beade82a05e9e7089611f95bbbf Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 14 May 2021 15:31:41 +0800
Subject: [PATCH 0144/3292] net: hns: space required before the open brace '{'

Add the space required before the open brace '{'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 4f7684ab9409..1eaac89d60b7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -521,7 +521,7 @@ static phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb *mac_cb)
 			reg = HNS_MAC_HILINK4_REG;
 		else
 			reg = HNS_MAC_HILINK3_REG;
-	} else{
+	} else {
 		if (!HNS_DSAF_IS_DEBUG(mac_cb->dsaf_dev) && mac_id <= 3)
 			reg = HNS_MAC_HILINK4V2_REG;
 		else

From cb067269760155b569c03ff6a2dcfd09ad824b94 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 14 May 2021 15:31:42 +0800
Subject: [PATCH 0145/3292] net: hns: remove redundant return int void function

Void function return statements are not generally useful,
so remove the redundant return.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index c615fbf9094e..75e4ec569da8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -462,8 +462,6 @@ static void hns_ae_adjust_link(struct hnae_handle *handle, int speed,
 	default:
 		break;
 	}
-
-	return;
 }
 
 static void hns_ae_get_ring_bdnum_limit(struct hnae_queue *queue,

From 2d1c5f29d27a338b6ca98f58caf191482105c418 Mon Sep 17 00:00:00 2001
From: Pu Lehui <pulehui@huawei.com>
Date: Fri, 14 May 2021 16:24:05 +0800
Subject: [PATCH 0146/3292] alx: fix missing unlock on error in
 alx_set_pauseparam()

Add the missing unlock before return from function alx_set_pauseparam()
in the error handling case.

Fixes: 4a5fe57e7751 ("alx: use fine-grained locking instead of RTNL")
Signed-off-by: Pu Lehui <pulehui@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/ethtool.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/alx/ethtool.c b/drivers/net/ethernet/atheros/alx/ethtool.c
index f3627157a38a..b716adacd815 100644
--- a/drivers/net/ethernet/atheros/alx/ethtool.c
+++ b/drivers/net/ethernet/atheros/alx/ethtool.c
@@ -253,8 +253,10 @@ static int alx_set_pauseparam(struct net_device *netdev,
 
 	if (reconfig_phy) {
 		err = alx_setup_speed_duplex(hw, hw->adv_cfg, fc);
-		if (err)
+		if (err) {
+			mutex_unlock(&alx->mtx);
 			return err;
+		}
 	}
 
 	/* flow control on mac */

From 2cc8c910f51594dde79764a52b2974ddc8f70509 Mon Sep 17 00:00:00 2001
From: Tobias Schramm <t.schramm@manjaro.org>
Date: Fri, 14 May 2021 13:38:11 +0200
Subject: [PATCH 0147/3292] dt-bindings: net: rockchip-dwmac: add rk3308 gmac
 compatible

The Rockchip RK3308 has a gmac that is not fully compatible with any of the
other Rockchip gmacs.
This patch adds a compatible string for it.

Signed-off-by: Tobias Schramm <t.schramm@manjaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/rockchip-dwmac.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 5acddb6171bf..34a660ad6b30 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -19,6 +19,7 @@ select:
           - rockchip,rk3128-gmac
           - rockchip,rk3228-gmac
           - rockchip,rk3288-gmac
+          - rockchip,rk3308-gmac
           - rockchip,rk3328-gmac
           - rockchip,rk3366-gmac
           - rockchip,rk3368-gmac
@@ -38,6 +39,7 @@ properties:
           - rockchip,rk3128-gmac
           - rockchip,rk3228-gmac
           - rockchip,rk3288-gmac
+          - rockchip,rk3308-gmac
           - rockchip,rk3328-gmac
           - rockchip,rk3366-gmac
           - rockchip,rk3368-gmac

From b4ac94565c142da9f050e2c2ffd85f7288d556d2 Mon Sep 17 00:00:00 2001
From: Tobias Schramm <t.schramm@manjaro.org>
Date: Fri, 14 May 2021 13:38:12 +0200
Subject: [PATCH 0148/3292] net: stmmac: dwmac-rk: add support for rk3308 gmac

The Rockchip RK3308 SoC has a gmac with only the RMII interface signals
exposed.
This patch adds support for it.

Signed-off-by: Tobias Schramm <t.schramm@manjaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-rk.c    | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 8d28a536e1bb..584db4ce6e39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -482,6 +482,54 @@ static const struct rk_gmac_ops rk3288_ops = {
 	.set_rmii_speed = rk3288_set_rmii_speed,
 };
 
+#define RK3308_GRF_MAC_CON0		0x04a0
+
+/* RK3308_GRF_MAC_CON0 */
+#define RK3308_GMAC_PHY_INTF_SEL_RMII	(GRF_CLR_BIT(2) | GRF_CLR_BIT(3) | \
+					GRF_BIT(4))
+#define RK3308_GMAC_FLOW_CTRL		GRF_BIT(3)
+#define RK3308_GMAC_FLOW_CTRL_CLR	GRF_CLR_BIT(3)
+#define RK3308_GMAC_SPEED_10M		GRF_CLR_BIT(0)
+#define RK3308_GMAC_SPEED_100M		GRF_BIT(0)
+
+static void rk3308_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "Missing rockchip,grf property\n");
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RK3308_GRF_MAC_CON0,
+		     RK3308_GMAC_PHY_INTF_SEL_RMII);
+}
+
+static void rk3308_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "Missing rockchip,grf property\n");
+		return;
+	}
+
+	if (speed == 10) {
+		regmap_write(bsp_priv->grf, RK3308_GRF_MAC_CON0,
+			     RK3308_GMAC_SPEED_10M);
+	} else if (speed == 100) {
+		regmap_write(bsp_priv->grf, RK3308_GRF_MAC_CON0,
+			     RK3308_GMAC_SPEED_100M);
+	} else {
+		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+	}
+}
+
+static const struct rk_gmac_ops rk3308_ops = {
+	.set_to_rmii = rk3308_set_to_rmii,
+	.set_rmii_speed = rk3308_set_rmii_speed,
+};
+
 #define RK3328_GRF_MAC_CON0	0x0900
 #define RK3328_GRF_MAC_CON1	0x0904
 #define RK3328_GRF_MAC_CON2	0x0908
@@ -1477,6 +1525,7 @@ static const struct of_device_id rk_gmac_dwmac_match[] = {
 	{ .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops },
 	{ .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
 	{ .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
+	{ .compatible = "rockchip,rk3308-gmac", .data = &rk3308_ops },
 	{ .compatible = "rockchip,rk3328-gmac", .data = &rk3328_ops },
 	{ .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops },
 	{ .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },

From 8d1a81f21a9e0c5967bbec1918a777369dc07856 Mon Sep 17 00:00:00 2001
From: Tobias Schramm <t.schramm@manjaro.org>
Date: Fri, 14 May 2021 13:38:13 +0200
Subject: [PATCH 0149/3292] arm64: dts: rockchip: add gmac to rk3308 dts

The RK3308 SoC has a gmac with only the RMII interface exposed. This
commit adds it to the RK3308 dtsi.

Signed-off-by: Tobias Schramm <t.schramm@manjaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/rockchip/rk3308.dtsi | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
index 0c5fa9801e6f..b815ce73e5c6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -637,6 +637,28 @@
 		status = "disabled";
 	};
 
+	gmac: ethernet@ff4e0000 {
+		compatible = "rockchip,rk3308-gmac";
+		reg = <0x0 0xff4e0000 0x0 0x10000>;
+		interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "macirq";
+		clocks = <&cru SCLK_MAC>, <&cru SCLK_MAC_RX_TX>,
+			 <&cru SCLK_MAC_RX_TX>, <&cru SCLK_MAC_REF>,
+			 <&cru SCLK_MAC>, <&cru ACLK_MAC>,
+			 <&cru PCLK_MAC>, <&cru SCLK_MAC_RMII>;
+		clock-names = "stmmaceth", "mac_clk_rx",
+			      "mac_clk_tx", "clk_mac_ref",
+			      "clk_mac_refout", "aclk_mac",
+			      "pclk_mac", "clk_mac_speed";
+		phy-mode = "rmii";
+		pinctrl-names = "default";
+		pinctrl-0 = <&rmii_pins &mac_refclk_12ma>;
+		resets = <&cru SRST_MAC_A>;
+		reset-names = "stmmaceth";
+		rockchip,grf = <&grf>;
+		status = "disabled";
+	};
+
 	cru: clock-controller@ff500000 {
 		compatible = "rockchip,rk3308-cru";
 		reg = <0x0 0xff500000 0x0 0x1000>;

From d172268f93cfbead85102c2171c83c9ad558831b Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Fri, 14 May 2021 20:39:52 +0200
Subject: [PATCH 0150/3292] stmmac: use XDP helpers

Make use of the xdp_{init,prepare}_buff() helpers instead of
an open-coded version.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 345b4c6d1fd4..bf9fe25fed69 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5167,12 +5167,9 @@ read_again:
 			dma_sync_single_for_cpu(priv->device, buf->addr,
 						buf1_len, dma_dir);
 
-			xdp.data = page_address(buf->page) + buf->page_offset;
-			xdp.data_end = xdp.data + buf1_len;
-			xdp.data_hard_start = page_address(buf->page);
-			xdp_set_data_meta_invalid(&xdp);
-			xdp.frame_sz = buf_sz;
-			xdp.rxq = &rx_q->xdp_rxq;
+			xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
+			xdp_prepare_buff(&xdp, page_address(buf->page),
+					 buf->page_offset, buf1_len, false);
 
 			pre_len = xdp.data_end - xdp.data_hard_start -
 				  buf->page_offset;

From 082294f294f6de4b50be0af354adb83e45816db4 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Fri, 14 May 2021 20:39:53 +0200
Subject: [PATCH 0151/3292] igc: use XDP helpers

Make use of the xdp_{init,prepare}_buff() helpers instead of
an open-coded version.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 069471b7ffb0..92c0701e2a36 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2151,12 +2151,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		}
 
 		if (!skb) {
-			xdp.data = pktbuf + pkt_offset;
-			xdp.data_end = xdp.data + size;
-			xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring);
-			xdp_set_data_meta_invalid(&xdp);
-			xdp.frame_sz = truesize;
-			xdp.rxq = &rx_ring->xdp_rxq;
+			xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
+			xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
+					 igc_rx_offset(rx_ring) + pkt_offset, size, false);
 
 			skb = igc_xdp_run_prog(adapter, &xdp);
 		}

From 224bf7db5518d804932c0c78b1206ebb21f43d6a Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Fri, 14 May 2021 20:39:54 +0200
Subject: [PATCH 0152/3292] vhost_net: use XDP helpers

Make use of the xdp_{init,prepare}_buff() helpers instead of
an open-coded version.

Also, the field xdp->rxq was never set, so pass NULL to xdp_init_buff()
to clear it.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/net.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index df82b124170e..6414bd5741b8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -744,11 +744,9 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
 	if (copied != len)
 		return -EFAULT;
 
-	xdp->data_hard_start = buf;
-	xdp->data = buf + pad;
-	xdp->data_end = xdp->data + len;
+	xdp_init_buff(xdp, buflen, NULL);
+	xdp_prepare_buff(xdp, buf, pad, len, true);
 	hdr->buflen = buflen;
-	xdp->frame_sz = buflen;
 
 	--net->refcnt_bias;
 	alloc_frag->offset += buflen;

From 709c0314239992162cba26a860f04319a15860c4 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 14 May 2021 13:04:25 -0700
Subject: [PATCH 0153/3292] tcp: add tracepoint for checksum errors

Add a tracepoint for capturing TCP segments with
a bad checksum. This makes it easy to identify
sources of bad frames in the fleet (e.g. machines
with faulty NICs).

It should also help tools like IOvisor's tcpdrop.py
which are used today to get detailed information
about such packets.

We don't have a socket in many cases so we must
open code the address extraction based just on
the skb.

v2: add missing export for ipv6=m

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/tcp.h | 76 ++++++++++++++++++++++++++++++++++++++
 net/core/net-traces.c      |  1 +
 net/ipv4/tcp_input.c       |  1 +
 net/ipv4/tcp_ipv4.c        |  3 ++
 net/ipv6/tcp_ipv6.c        |  2 +
 5 files changed, 83 insertions(+)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index ba94857eea11..521059d8dc0a 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -295,6 +295,82 @@ TRACE_EVENT(tcp_probe,
 		  __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
 );
 
+#define TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb)			\
+	do {								\
+		const struct tcphdr *th = (const struct tcphdr *)skb->data; \
+		struct sockaddr_in *v4 = (void *)__entry->saddr;	\
+									\
+		v4->sin_family = AF_INET;				\
+		v4->sin_port = th->source;				\
+		v4->sin_addr.s_addr = ip_hdr(skb)->saddr;		\
+		v4 = (void *)__entry->daddr;				\
+		v4->sin_family = AF_INET;				\
+		v4->sin_port = th->dest;				\
+		v4->sin_addr.s_addr = ip_hdr(skb)->daddr;		\
+	} while (0)
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+#define TP_STORE_ADDR_PORTS_SKB(__entry, skb)				\
+	do {								\
+		const struct iphdr *iph = ip_hdr(skb);			\
+									\
+		if (iph->version == 6) {				\
+			const struct tcphdr *th = (const struct tcphdr *)skb->data; \
+			struct sockaddr_in6 *v6 = (void *)__entry->saddr; \
+									\
+			v6->sin6_family = AF_INET6;			\
+			v6->sin6_port = th->source;			\
+			v6->sin6_addr = ipv6_hdr(skb)->saddr;		\
+			v6 = (void *)__entry->daddr;			\
+			v6->sin6_family = AF_INET6;			\
+			v6->sin6_port = th->dest;			\
+			v6->sin6_addr = ipv6_hdr(skb)->daddr;		\
+		} else							\
+			TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb);	\
+	} while (0)
+
+#else
+
+#define TP_STORE_ADDR_PORTS_SKB(__entry, skb)		\
+	TP_STORE_ADDR_PORTS_SKB_V4(__entry, skb)
+
+#endif
+
+/*
+ * tcp event with only skb
+ */
+DECLARE_EVENT_CLASS(tcp_event_skb,
+
+	TP_PROTO(const struct sk_buff *skb),
+
+	TP_ARGS(skb),
+
+	TP_STRUCT__entry(
+		__field(const void *, skbaddr)
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+		TP_STORE_ADDR_PORTS_SKB(__entry, skb);
+	),
+
+	TP_printk("src=%pISpc dest=%pISpc", __entry->saddr, __entry->daddr)
+);
+
+DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
+
+	TP_PROTO(const struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
 #endif /* _TRACE_TCP_H */
 
 /* This part must be outside protection */
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 283ddb2dbc7d..c40cd8dd75c7 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -60,3 +60,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
 EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4cf4dd532d1c..cd52ce0a2a85 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5885,6 +5885,7 @@ step5:
 	return;
 
 csum_error:
+	trace_tcp_bad_csum(skb);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 312184cead57..4f5b68a90be9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1731,6 +1731,7 @@ discard:
 	return 0;
 
 csum_err:
+	trace_tcp_bad_csum(skb);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
@@ -1801,6 +1802,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 
 	if (unlikely(tcp_checksum_complete(skb))) {
 		bh_unlock_sock(sk);
+		trace_tcp_bad_csum(skb);
 		__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
 		__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 		return true;
@@ -2098,6 +2100,7 @@ no_tcp_socket:
 
 	if (tcp_checksum_complete(skb)) {
 csum_error:
+		trace_tcp_bad_csum(skb);
 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
 bad_packet:
 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5f47c0b6e3de..4435fa342e7a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1538,6 +1538,7 @@ discard:
 	kfree_skb(skb);
 	return 0;
 csum_err:
+	trace_tcp_bad_csum(skb);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
@@ -1754,6 +1755,7 @@ no_tcp_socket:
 
 	if (tcp_checksum_complete(skb)) {
 csum_error:
+		trace_tcp_bad_csum(skb);
 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
 bad_packet:
 		__TCP_INC_STATS(net, TCP_MIB_INERRS);

From 5d9e068402dcf7354cc8ee66c2152845306d2ccb Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:51 +0200
Subject: [PATCH 0154/3292] net: dsa: qca8k: change simple print to dev variant

Change pr_err and pr_warn to dev variant.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index cdaf9f85a2cb..0b295da6c356 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -701,7 +701,7 @@ qca8k_setup(struct dsa_switch *ds)
 
 	/* Make sure that port 0 is the cpu port */
 	if (!dsa_is_cpu_port(ds, 0)) {
-		pr_err("port 0 is not the CPU port\n");
+		dev_err(priv->dev, "port 0 is not the CPU port");
 		return -EINVAL;
 	}
 
@@ -711,7 +711,7 @@ qca8k_setup(struct dsa_switch *ds)
 	priv->regmap = devm_regmap_init(ds->dev, NULL, priv,
 					&qca8k_regmap_config);
 	if (IS_ERR(priv->regmap))
-		pr_warn("regmap initialization failed");
+		dev_warn(priv->dev, "regmap initialization failed");
 
 	ret = qca8k_setup_mdio_bus(priv);
 	if (ret)

From 2ad255f2faaffb3af786031fba2e7955454b558a Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:52 +0200
Subject: [PATCH 0155/3292] net: dsa: qca8k: use iopoll macro for
 qca8k_busy_wait

Use iopoll macro instead of while loop.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 23 +++++++++++------------
 drivers/net/dsa/qca8k.h |  2 ++
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 0b295da6c356..25fa7084e820 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -262,21 +262,20 @@ static struct regmap_config qca8k_regmap_config = {
 static int
 qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
 {
-	unsigned long timeout;
+	u32 val;
+	int ret;
 
-	timeout = jiffies + msecs_to_jiffies(20);
+	ret = read_poll_timeout(qca8k_read, val, !(val & mask),
+				0, QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
+				priv, reg);
 
-	/* loop until the busy flag has cleared */
-	do {
-		u32 val = qca8k_read(priv, reg);
-		int busy = val & mask;
+	/* Check if qca8k_read has failed for a different reason
+	 * before returning -ETIMEDOUT
+	 */
+	if (ret < 0 && val < 0)
+		return val;
 
-		if (!busy)
-			break;
-		cond_resched();
-	} while (!time_after_eq(jiffies, timeout));
-
-	return time_after_eq(jiffies, timeout);
+	return ret;
 }
 
 static void
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 7ca4b93e0bb5..86c585b7ec4a 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -18,6 +18,8 @@
 #define PHY_ID_QCA8337					0x004dd036
 #define QCA8K_ID_QCA8337				0x13
 
+#define QCA8K_BUSY_WAIT_TIMEOUT				20
+
 #define QCA8K_NUM_FDB_RECORDS				2048
 
 #define QCA8K_CPU_PORT					0

From 504bf65931824eda83494e5b5d75686e27ace03e Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:53 +0200
Subject: [PATCH 0156/3292] net: dsa: qca8k: improve qca8k read/write/rmw bus
 access

Put bus in local variable to improve faster access to the mdio bus.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 25fa7084e820..3c882d325fdf 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -142,17 +142,18 @@ qca8k_set_page(struct mii_bus *bus, u16 page)
 static u32
 qca8k_read(struct qca8k_priv *priv, u32 reg)
 {
+	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
 	u32 val;
 
 	qca8k_split_addr(reg, &r1, &r2, &page);
 
-	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	qca8k_set_page(priv->bus, page);
-	val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
+	qca8k_set_page(bus, page);
+	val = qca8k_mii_read32(bus, 0x10 | r2, r1);
 
-	mutex_unlock(&priv->bus->mdio_lock);
+	mutex_unlock(&bus->mdio_lock);
 
 	return val;
 }
@@ -160,35 +161,37 @@ qca8k_read(struct qca8k_priv *priv, u32 reg)
 static void
 qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val)
 {
+	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
 
 	qca8k_split_addr(reg, &r1, &r2, &page);
 
-	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	qca8k_set_page(priv->bus, page);
-	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
+	qca8k_set_page(bus, page);
+	qca8k_mii_write32(bus, 0x10 | r2, r1, val);
 
-	mutex_unlock(&priv->bus->mdio_lock);
+	mutex_unlock(&bus->mdio_lock);
 }
 
 static u32
 qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val)
 {
+	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
 	u32 ret;
 
 	qca8k_split_addr(reg, &r1, &r2, &page);
 
-	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	qca8k_set_page(priv->bus, page);
-	ret = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
+	qca8k_set_page(bus, page);
+	ret = qca8k_mii_read32(bus, 0x10 | r2, r1);
 	ret &= ~mask;
 	ret |= val;
-	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, ret);
+	qca8k_mii_write32(bus, 0x10 | r2, r1, ret);
 
-	mutex_unlock(&priv->bus->mdio_lock);
+	mutex_unlock(&bus->mdio_lock);
 
 	return ret;
 }

From ba5707ec58cfb6853dff41c2aae72deb6a03d389 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:54 +0200
Subject: [PATCH 0157/3292] net: dsa: qca8k: handle qca8k_set_page errors

With a remote possibility, the set_page function can fail. Since this is
a critical part of the write/read qca8k regs, propagate the error and
terminate any read/write operation.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 3c882d325fdf..c9830286fd6d 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -127,16 +127,23 @@ qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
 				    "failed to write qca8k 32bit register\n");
 }
 
-static void
+static int
 qca8k_set_page(struct mii_bus *bus, u16 page)
 {
-	if (page == qca8k_current_page)
-		return;
+	int ret;
 
-	if (bus->write(bus, 0x18, 0, page) < 0)
+	if (page == qca8k_current_page)
+		return 0;
+
+	ret = bus->write(bus, 0x18, 0, page);
+	if (ret < 0) {
 		dev_err_ratelimited(&bus->dev,
 				    "failed to set qca8k page\n");
+		return ret;
+	}
+
 	qca8k_current_page = page;
+	return 0;
 }
 
 static u32
@@ -150,11 +157,14 @@ qca8k_read(struct qca8k_priv *priv, u32 reg)
 
 	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	qca8k_set_page(bus, page);
+	val = qca8k_set_page(bus, page);
+	if (val < 0)
+		goto exit;
+
 	val = qca8k_mii_read32(bus, 0x10 | r2, r1);
 
+exit:
 	mutex_unlock(&bus->mdio_lock);
-
 	return val;
 }
 
@@ -163,14 +173,19 @@ qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val)
 {
 	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
+	int ret;
 
 	qca8k_split_addr(reg, &r1, &r2, &page);
 
 	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	qca8k_set_page(bus, page);
+	ret = qca8k_set_page(bus, page);
+	if (ret < 0)
+		goto exit;
+
 	qca8k_mii_write32(bus, 0x10 | r2, r1, val);
 
+exit:
 	mutex_unlock(&bus->mdio_lock);
 }
 
@@ -185,12 +200,16 @@ qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val)
 
 	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	qca8k_set_page(bus, page);
+	ret = qca8k_set_page(bus, page);
+	if (ret < 0)
+		goto exit;
+
 	ret = qca8k_mii_read32(bus, 0x10 | r2, r1);
 	ret &= ~mask;
 	ret |= val;
 	qca8k_mii_write32(bus, 0x10 | r2, r1, ret);
 
+exit:
 	mutex_unlock(&bus->mdio_lock);
 
 	return ret;

From 028f5f8ef44fcf87a456772cbb9f0d90a0a22884 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:55 +0200
Subject: [PATCH 0158/3292] net: dsa: qca8k: handle error with qca8k_read
 operation

qca8k_read can fail. Rework any user to handle error values and
correctly return.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 73 ++++++++++++++++++++++++++++++++---------
 1 file changed, 58 insertions(+), 15 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index c9830286fd6d..5eb4d13fe0ba 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -231,8 +231,13 @@ static int
 qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ctx;
+	int ret;
 
-	*val = qca8k_read(priv, reg);
+	ret = qca8k_read(priv, reg);
+	if (ret < 0)
+		return ret;
+
+	*val = ret;
 
 	return 0;
 }
@@ -300,15 +305,20 @@ qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
 	return ret;
 }
 
-static void
+static int
 qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb)
 {
-	u32 reg[4];
+	u32 reg[4], val;
 	int i;
 
 	/* load the ARL table into an array */
-	for (i = 0; i < 4; i++)
-		reg[i] = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4));
+	for (i = 0; i < 4; i++) {
+		val = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4));
+		if (val < 0)
+			return val;
+
+		reg[i] = val;
+	}
 
 	/* vid - 83:72 */
 	fdb->vid = (reg[2] >> QCA8K_ATU_VID_S) & QCA8K_ATU_VID_M;
@@ -323,6 +333,8 @@ qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb)
 	fdb->mac[3] = (reg[0] >> QCA8K_ATU_ADDR3_S) & 0xff;
 	fdb->mac[4] = (reg[0] >> QCA8K_ATU_ADDR4_S) & 0xff;
 	fdb->mac[5] = reg[0] & 0xff;
+
+	return 0;
 }
 
 static void
@@ -374,6 +386,8 @@ qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port)
 	/* Check for table full violation when adding an entry */
 	if (cmd == QCA8K_FDB_LOAD) {
 		reg = qca8k_read(priv, QCA8K_REG_ATU_FUNC);
+		if (reg < 0)
+			return reg;
 		if (reg & QCA8K_ATU_FUNC_FULL)
 			return -1;
 	}
@@ -388,10 +402,10 @@ qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, int port)
 
 	qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging);
 	ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port);
-	if (ret >= 0)
-		qca8k_fdb_read(priv, fdb);
+	if (ret < 0)
+		return ret;
 
-	return ret;
+	return qca8k_fdb_read(priv, fdb);
 }
 
 static int
@@ -449,6 +463,8 @@ qca8k_vlan_access(struct qca8k_priv *priv, enum qca8k_vlan_cmd cmd, u16 vid)
 	/* Check for table full violation when adding an entry */
 	if (cmd == QCA8K_VLAN_LOAD) {
 		reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC1);
+		if (reg < 0)
+			return reg;
 		if (reg & QCA8K_VTU_FUNC1_FULL)
 			return -ENOMEM;
 	}
@@ -475,6 +491,8 @@ qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, bool untagged)
 		goto out;
 
 	reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
+	if (reg < 0)
+		return reg;
 	reg |= QCA8K_VTU_FUNC0_VALID | QCA8K_VTU_FUNC0_IVL_EN;
 	reg &= ~(QCA8K_VTU_FUNC0_EG_MODE_MASK << QCA8K_VTU_FUNC0_EG_MODE_S(port));
 	if (untagged)
@@ -506,6 +524,8 @@ qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid)
 		goto out;
 
 	reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
+	if (reg < 0)
+		return reg;
 	reg &= ~(3 << QCA8K_VTU_FUNC0_EG_MODE_S(port));
 	reg |= QCA8K_VTU_FUNC0_EG_MODE_NOT <<
 			QCA8K_VTU_FUNC0_EG_MODE_S(port);
@@ -621,8 +641,11 @@ qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 			    QCA8K_MDIO_MASTER_BUSY))
 		return -ETIMEDOUT;
 
-	val = (qca8k_read(priv, QCA8K_MDIO_MASTER_CTRL) &
-		QCA8K_MDIO_MASTER_DATA_MASK);
+	val = qca8k_read(priv, QCA8K_MDIO_MASTER_CTRL);
+	if (val < 0)
+		return val;
+
+	val &= QCA8K_MDIO_MASTER_DATA_MASK;
 
 	return val;
 }
@@ -978,6 +1001,8 @@ qca8k_phylink_mac_link_state(struct dsa_switch *ds, int port,
 	u32 reg;
 
 	reg = qca8k_read(priv, QCA8K_REG_PORT_STATUS(port));
+	if (reg < 0)
+		return reg;
 
 	state->link = !!(reg & QCA8K_PORT_STATUS_LINK_UP);
 	state->an_complete = state->link;
@@ -1078,18 +1103,26 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	const struct qca8k_mib_desc *mib;
-	u32 reg, i;
+	u32 reg, i, val;
 	u64 hi;
 
 	for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) {
 		mib = &ar8327_mib[i];
 		reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset;
 
-		data[i] = qca8k_read(priv, reg);
+		val = qca8k_read(priv, reg);
+		if (val < 0)
+			continue;
+
 		if (mib->size == 2) {
 			hi = qca8k_read(priv, reg + 4);
-			data[i] |= hi << 32;
+			if (hi < 0)
+				continue;
 		}
+
+		data[i] = val;
+		if (mib->size == 2)
+			data[i] |= hi << 32;
 	}
 }
 
@@ -1107,18 +1140,25 @@ qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
+	int ret = 0;
 	u32 reg;
 
 	mutex_lock(&priv->reg_mutex);
 	reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
+	if (reg < 0) {
+		ret = reg;
+		goto exit;
+	}
+
 	if (eee->eee_enabled)
 		reg |= lpi_en;
 	else
 		reg &= ~lpi_en;
 	qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
-	mutex_unlock(&priv->reg_mutex);
 
-	return 0;
+exit:
+	mutex_unlock(&priv->reg_mutex);
+	return ret;
 }
 
 static int
@@ -1443,6 +1483,9 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
 
 	/* read the switches ID register */
 	id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
+	if (id < 0)
+		return id;
+
 	id >>= QCA8K_MASK_CTRL_ID_S;
 	id &= QCA8K_MASK_CTRL_ID_M;
 	if (id != QCA8K_ID_QCA8337)

From d7805757c75c76e9518fc1023a29f0c4eed5b581 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:56 +0200
Subject: [PATCH 0159/3292] net: dsa: qca8k: handle error with qca8k_write
 operation

qca8k_write can fail. Rework any user to handle error values and
correctly return.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 102 ++++++++++++++++++++++++++--------------
 1 file changed, 67 insertions(+), 35 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 5eb4d13fe0ba..2fdd7c2e74d5 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -168,7 +168,7 @@ exit:
 	return val;
 }
 
-static void
+static int
 qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val)
 {
 	struct mii_bus *bus = priv->bus;
@@ -187,6 +187,7 @@ qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val)
 
 exit:
 	mutex_unlock(&bus->mdio_lock);
+	return ret;
 }
 
 static u32
@@ -247,9 +248,7 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ctx;
 
-	qca8k_write(priv, reg, val);
-
-	return 0;
+	return qca8k_write(priv, reg, val);
 }
 
 static const struct regmap_range qca8k_readable_ranges[] = {
@@ -367,6 +366,7 @@ static int
 qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port)
 {
 	u32 reg;
+	int ret;
 
 	/* Set the command and FDB index */
 	reg = QCA8K_ATU_FUNC_BUSY;
@@ -377,7 +377,9 @@ qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port)
 	}
 
 	/* Write the function register triggering the table access */
-	qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg);
+	ret = qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg);
+	if (ret)
+		return ret;
 
 	/* wait for completion */
 	if (qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY))
@@ -447,6 +449,7 @@ static int
 qca8k_vlan_access(struct qca8k_priv *priv, enum qca8k_vlan_cmd cmd, u16 vid)
 {
 	u32 reg;
+	int ret;
 
 	/* Set the command and VLAN index */
 	reg = QCA8K_VTU_FUNC1_BUSY;
@@ -454,7 +457,9 @@ qca8k_vlan_access(struct qca8k_priv *priv, enum qca8k_vlan_cmd cmd, u16 vid)
 	reg |= vid << QCA8K_VTU_FUNC1_VID_S;
 
 	/* Write the function register triggering the table access */
-	qca8k_write(priv, QCA8K_REG_VTU_FUNC1, reg);
+	ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC1, reg);
+	if (ret)
+		return ret;
 
 	/* wait for completion */
 	if (qca8k_busy_wait(priv, QCA8K_REG_VTU_FUNC1, QCA8K_VTU_FUNC1_BUSY))
@@ -502,7 +507,9 @@ qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, bool untagged)
 		reg |= QCA8K_VTU_FUNC0_EG_MODE_TAG <<
 				QCA8K_VTU_FUNC0_EG_MODE_S(port);
 
-	qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
+	ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
+	if (ret)
+		return ret;
 	ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid);
 
 out:
@@ -545,7 +552,9 @@ qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid)
 	if (del) {
 		ret = qca8k_vlan_access(priv, QCA8K_VLAN_PURGE, vid);
 	} else {
-		qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
+		ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
+		if (ret)
+			return ret;
 		ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid);
 	}
 
@@ -555,15 +564,20 @@ out:
 	return ret;
 }
 
-static void
+static int
 qca8k_mib_init(struct qca8k_priv *priv)
 {
+	int ret;
+
 	mutex_lock(&priv->reg_mutex);
 	qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY);
 	qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY);
 	qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP);
-	qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB);
+
+	ret = qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB);
+
 	mutex_unlock(&priv->reg_mutex);
+	return ret;
 }
 
 static void
@@ -600,6 +614,7 @@ static int
 qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
 {
 	u32 phy, val;
+	int ret;
 
 	if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
 		return -EINVAL;
@@ -613,7 +628,9 @@ qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
 	      QCA8K_MDIO_MASTER_REG_ADDR(regnum) |
 	      QCA8K_MDIO_MASTER_DATA(data);
 
-	qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
+	ret = qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
+	if (ret)
+		return ret;
 
 	return qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
 		QCA8K_MDIO_MASTER_BUSY);
@@ -623,6 +640,7 @@ static int
 qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 {
 	u32 phy, val;
+	int ret;
 
 	if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
 		return -EINVAL;
@@ -635,7 +653,9 @@ qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 	      QCA8K_MDIO_MASTER_READ | QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
 	      QCA8K_MDIO_MASTER_REG_ADDR(regnum);
 
-	qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
+	ret = qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
+	if (ret)
+		return ret;
 
 	if (qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
 			    QCA8K_MDIO_MASTER_BUSY))
@@ -766,12 +786,18 @@ qca8k_setup(struct dsa_switch *ds)
 		      QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
 
 	/* Enable MIB counters */
-	qca8k_mib_init(priv);
+	ret = qca8k_mib_init(priv);
+	if (ret)
+		dev_warn(priv->dev, "mib init failed");
 
 	/* Enable QCA header mode on the cpu port */
-	qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT),
-		    QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S |
-		    QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S);
+	ret = qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT),
+			  QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S |
+			  QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S);
+	if (ret) {
+		dev_err(priv->dev, "failed enabling QCA header mode");
+		return ret;
+	}
 
 	/* Disable forwarding by default on all ports */
 	for (i = 0; i < QCA8K_NUM_PORTS; i++)
@@ -783,11 +809,13 @@ qca8k_setup(struct dsa_switch *ds)
 		qca8k_port_set_status(priv, i, 0);
 
 	/* Forward all unknown frames to CPU port for Linux processing */
-	qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1,
-		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S |
-		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S |
-		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S |
-		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S);
+	ret = qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1,
+			  BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S |
+			  BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S |
+			  BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S |
+			  BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S);
+	if (ret)
+		return ret;
 
 	/* Setup connection between CPU port & user ports */
 	for (i = 0; i < QCA8K_NUM_PORTS; i++) {
@@ -815,16 +843,20 @@ qca8k_setup(struct dsa_switch *ds)
 			qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i),
 				  0xfff << shift,
 				  QCA8K_PORT_VID_DEF << shift);
-			qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i),
-				    QCA8K_PORT_VLAN_CVID(QCA8K_PORT_VID_DEF) |
-				    QCA8K_PORT_VLAN_SVID(QCA8K_PORT_VID_DEF));
+			ret = qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i),
+					  QCA8K_PORT_VLAN_CVID(QCA8K_PORT_VID_DEF) |
+					  QCA8K_PORT_VLAN_SVID(QCA8K_PORT_VID_DEF));
+			if (ret)
+				return ret;
 		}
 	}
 
 	/* Setup our port MTUs to match power on defaults */
 	for (i = 0; i < QCA8K_NUM_PORTS; i++)
 		priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN;
-	qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, ETH_FRAME_LEN + ETH_FCS_LEN);
+	ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, ETH_FRAME_LEN + ETH_FCS_LEN);
+	if (ret)
+		dev_warn(priv->dev, "failed setting MTU settings");
 
 	/* Flush the FDB table */
 	qca8k_fdb_flush(priv);
@@ -1140,8 +1172,8 @@ qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
-	int ret = 0;
 	u32 reg;
+	int ret;
 
 	mutex_lock(&priv->reg_mutex);
 	reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
@@ -1154,7 +1186,7 @@ qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 		reg |= lpi_en;
 	else
 		reg &= ~lpi_en;
-	qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
+	ret = qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
 
 exit:
 	mutex_unlock(&priv->reg_mutex);
@@ -1284,9 +1316,7 @@ qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 			mtu = priv->port_mtu[i];
 
 	/* Include L2 header / FCS length */
-	qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, mtu + ETH_HLEN + ETH_FCS_LEN);
-
-	return 0;
+	return qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, mtu + ETH_HLEN + ETH_FCS_LEN);
 }
 
 static int
@@ -1381,7 +1411,7 @@ qca8k_port_vlan_add(struct dsa_switch *ds, int port,
 	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
 	struct qca8k_priv *priv = ds->priv;
-	int ret = 0;
+	int ret;
 
 	ret = qca8k_vlan_add(priv, port, vlan->vid, untagged);
 	if (ret) {
@@ -1394,9 +1424,11 @@ qca8k_port_vlan_add(struct dsa_switch *ds, int port,
 
 		qca8k_rmw(priv, QCA8K_EGRESS_VLAN(port),
 			  0xfff << shift, vlan->vid << shift);
-		qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(port),
-			    QCA8K_PORT_VLAN_CVID(vlan->vid) |
-			    QCA8K_PORT_VLAN_SVID(vlan->vid));
+		ret = qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(port),
+				  QCA8K_PORT_VLAN_CVID(vlan->vid) |
+				  QCA8K_PORT_VLAN_SVID(vlan->vid));
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -1407,7 +1439,7 @@ qca8k_port_vlan_del(struct dsa_switch *ds, int port,
 		    const struct switchdev_obj_port_vlan *vlan)
 {
 	struct qca8k_priv *priv = ds->priv;
-	int ret = 0;
+	int ret;
 
 	ret = qca8k_vlan_del(priv, port, vlan->vid);
 	if (ret)

From aaf421425cbdec4eb6fd75a29e65c2867b0b7bbd Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:57 +0200
Subject: [PATCH 0160/3292] net: dsa: qca8k: handle error with qca8k_rmw
 operation

qca8k_rmw can fail. Rework any user to handle error values and
correctly return. Change qca8k_rmw to return the error code or 0 instead
of the reg value. The reg returned by qca8k_rmw wasn't used anywhere,
so this doesn't cause any functional change.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 135 +++++++++++++++++++++++++---------------
 1 file changed, 84 insertions(+), 51 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 2fdd7c2e74d5..409f6592048a 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -190,12 +190,13 @@ exit:
 	return ret;
 }
 
-static u32
-qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val)
+static int
+qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val)
 {
 	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
-	u32 ret;
+	u32 val;
+	int ret;
 
 	qca8k_split_addr(reg, &r1, &r2, &page);
 
@@ -205,10 +206,15 @@ qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val)
 	if (ret < 0)
 		goto exit;
 
-	ret = qca8k_mii_read32(bus, 0x10 | r2, r1);
-	ret &= ~mask;
-	ret |= val;
-	qca8k_mii_write32(bus, 0x10 | r2, r1, ret);
+	val = qca8k_mii_read32(bus, 0x10 | r2, r1);
+	if (val < 0) {
+		ret = val;
+		goto exit;
+	}
+
+	val &= ~mask;
+	val |= write_val;
+	qca8k_mii_write32(bus, 0x10 | r2, r1, val);
 
 exit:
 	mutex_unlock(&bus->mdio_lock);
@@ -216,16 +222,16 @@ exit:
 	return ret;
 }
 
-static void
+static int
 qca8k_reg_set(struct qca8k_priv *priv, u32 reg, u32 val)
 {
-	qca8k_rmw(priv, reg, 0, val);
+	return qca8k_rmw(priv, reg, 0, val);
 }
 
-static void
+static int
 qca8k_reg_clear(struct qca8k_priv *priv, u32 reg, u32 val)
 {
-	qca8k_rmw(priv, reg, val, 0);
+	return qca8k_rmw(priv, reg, val, 0);
 }
 
 static int
@@ -570,12 +576,19 @@ qca8k_mib_init(struct qca8k_priv *priv)
 	int ret;
 
 	mutex_lock(&priv->reg_mutex);
-	qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY);
+	ret = qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY);
+	if (ret)
+		goto exit;
+
 	qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY);
-	qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP);
+
+	ret = qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP);
+	if (ret)
+		goto exit;
 
 	ret = qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB);
 
+exit:
 	mutex_unlock(&priv->reg_mutex);
 	return ret;
 }
@@ -747,9 +760,8 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
 		 * a dt-overlay and driver reload changed the configuration
 		 */
 
-		qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
-				QCA8K_MDIO_MASTER_EN);
-		return 0;
+		return qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
+				       QCA8K_MDIO_MASTER_EN);
 	}
 
 	priv->ops.phy_read = qca8k_phy_read;
@@ -782,8 +794,12 @@ qca8k_setup(struct dsa_switch *ds)
 		return ret;
 
 	/* Enable CPU Port */
-	qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
-		      QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
+	ret = qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
+			    QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
+	if (ret) {
+		dev_err(priv->dev, "failed enabling CPU port");
+		return ret;
+	}
 
 	/* Enable MIB counters */
 	ret = qca8k_mib_init(priv);
@@ -800,9 +816,12 @@ qca8k_setup(struct dsa_switch *ds)
 	}
 
 	/* Disable forwarding by default on all ports */
-	for (i = 0; i < QCA8K_NUM_PORTS; i++)
-		qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
-			  QCA8K_PORT_LOOKUP_MEMBER, 0);
+	for (i = 0; i < QCA8K_NUM_PORTS; i++) {
+		ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+				QCA8K_PORT_LOOKUP_MEMBER, 0);
+		if (ret)
+			return ret;
+	}
 
 	/* Disable MAC by default on all ports */
 	for (i = 1; i < QCA8K_NUM_PORTS; i++)
@@ -821,28 +840,37 @@ qca8k_setup(struct dsa_switch *ds)
 	for (i = 0; i < QCA8K_NUM_PORTS; i++) {
 		/* CPU port gets connected to all user ports of the switch */
 		if (dsa_is_cpu_port(ds, i)) {
-			qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT),
-				  QCA8K_PORT_LOOKUP_MEMBER, dsa_user_ports(ds));
+			ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT),
+					QCA8K_PORT_LOOKUP_MEMBER, dsa_user_ports(ds));
+			if (ret)
+				return ret;
 		}
 
 		/* Individual user ports get connected to CPU port only */
 		if (dsa_is_user_port(ds, i)) {
 			int shift = 16 * (i % 2);
 
-			qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
-				  QCA8K_PORT_LOOKUP_MEMBER,
-				  BIT(QCA8K_CPU_PORT));
+			ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+					QCA8K_PORT_LOOKUP_MEMBER,
+					BIT(QCA8K_CPU_PORT));
+			if (ret)
+				return ret;
 
 			/* Enable ARP Auto-learning by default */
-			qca8k_reg_set(priv, QCA8K_PORT_LOOKUP_CTRL(i),
-				      QCA8K_PORT_LOOKUP_LEARN);
+			ret = qca8k_reg_set(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+					    QCA8K_PORT_LOOKUP_LEARN);
+			if (ret)
+				return ret;
 
 			/* For port based vlans to work we need to set the
 			 * default egress vid
 			 */
-			qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i),
-				  0xfff << shift,
-				  QCA8K_PORT_VID_DEF << shift);
+			ret = qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i),
+					0xfff << shift,
+					QCA8K_PORT_VID_DEF << shift);
+			if (ret)
+				return ret;
+
 			ret = qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i),
 					  QCA8K_PORT_VLAN_CVID(QCA8K_PORT_VID_DEF) |
 					  QCA8K_PORT_VLAN_SVID(QCA8K_PORT_VID_DEF));
@@ -1234,7 +1262,7 @@ qca8k_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *br)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	int port_mask = BIT(QCA8K_CPU_PORT);
-	int i;
+	int i, ret;
 
 	for (i = 1; i < QCA8K_NUM_PORTS; i++) {
 		if (dsa_to_port(ds, i)->bridge_dev != br)
@@ -1242,17 +1270,20 @@ qca8k_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *br)
 		/* Add this port to the portvlan mask of the other ports
 		 * in the bridge
 		 */
-		qca8k_reg_set(priv,
-			      QCA8K_PORT_LOOKUP_CTRL(i),
-			      BIT(port));
+		ret = qca8k_reg_set(priv,
+				    QCA8K_PORT_LOOKUP_CTRL(i),
+				    BIT(port));
+		if (ret)
+			return ret;
 		if (i != port)
 			port_mask |= BIT(i);
 	}
-	/* Add all other ports to this ports portvlan mask */
-	qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
-		  QCA8K_PORT_LOOKUP_MEMBER, port_mask);
 
-	return 0;
+	/* Add all other ports to this ports portvlan mask */
+	ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+			QCA8K_PORT_LOOKUP_MEMBER, port_mask);
+
+	return ret;
 }
 
 static void
@@ -1389,18 +1420,19 @@ qca8k_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering,
 			  struct netlink_ext_ack *extack)
 {
 	struct qca8k_priv *priv = ds->priv;
+	int ret;
 
 	if (vlan_filtering) {
-		qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
-			  QCA8K_PORT_LOOKUP_VLAN_MODE,
-			  QCA8K_PORT_LOOKUP_VLAN_MODE_SECURE);
+		ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+				QCA8K_PORT_LOOKUP_VLAN_MODE,
+				QCA8K_PORT_LOOKUP_VLAN_MODE_SECURE);
 	} else {
-		qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
-			  QCA8K_PORT_LOOKUP_VLAN_MODE,
-			  QCA8K_PORT_LOOKUP_VLAN_MODE_NONE);
+		ret = qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+				QCA8K_PORT_LOOKUP_VLAN_MODE,
+				QCA8K_PORT_LOOKUP_VLAN_MODE_NONE);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int
@@ -1422,16 +1454,17 @@ qca8k_port_vlan_add(struct dsa_switch *ds, int port,
 	if (pvid) {
 		int shift = 16 * (port % 2);
 
-		qca8k_rmw(priv, QCA8K_EGRESS_VLAN(port),
-			  0xfff << shift, vlan->vid << shift);
+		ret = qca8k_rmw(priv, QCA8K_EGRESS_VLAN(port),
+				0xfff << shift, vlan->vid << shift);
+		if (ret)
+			return ret;
+
 		ret = qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(port),
 				  QCA8K_PORT_VLAN_CVID(vlan->vid) |
 				  QCA8K_PORT_VLAN_SVID(vlan->vid));
-		if (ret)
-			return ret;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int

From b7c818d194927bdc60ed15db55bb8654496a36b7 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:58 +0200
Subject: [PATCH 0161/3292] net: dsa: qca8k: handle error from qca8k_busy_wait

Propagate errors from qca8k_busy_wait instead of hardcoding return
value.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 409f6592048a..d4e3f81576ec 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -388,8 +388,9 @@ qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port)
 		return ret;
 
 	/* wait for completion */
-	if (qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY))
-		return -1;
+	ret = qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY);
+	if (ret)
+		return ret;
 
 	/* Check for table full violation when adding an entry */
 	if (cmd == QCA8K_FDB_LOAD) {
@@ -468,8 +469,9 @@ qca8k_vlan_access(struct qca8k_priv *priv, enum qca8k_vlan_cmd cmd, u16 vid)
 		return ret;
 
 	/* wait for completion */
-	if (qca8k_busy_wait(priv, QCA8K_REG_VTU_FUNC1, QCA8K_VTU_FUNC1_BUSY))
-		return -ETIMEDOUT;
+	ret = qca8k_busy_wait(priv, QCA8K_REG_VTU_FUNC1, QCA8K_VTU_FUNC1_BUSY);
+	if (ret)
+		return ret;
 
 	/* Check for table full violation when adding an entry */
 	if (cmd == QCA8K_VLAN_LOAD) {
@@ -580,7 +582,9 @@ qca8k_mib_init(struct qca8k_priv *priv)
 	if (ret)
 		goto exit;
 
-	qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY);
+	ret = qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY);
+	if (ret)
+		goto exit;
 
 	ret = qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP);
 	if (ret)
@@ -670,9 +674,10 @@ qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 	if (ret)
 		return ret;
 
-	if (qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
-			    QCA8K_MDIO_MASTER_BUSY))
-		return -ETIMEDOUT;
+	ret = qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+			      QCA8K_MDIO_MASTER_BUSY);
+	if (ret)
+		return ret;
 
 	val = qca8k_read(priv, QCA8K_MDIO_MASTER_CTRL);
 	if (val < 0)

From 6e82a457e06252b59102486767539cc9c2aba60b Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 22:59:59 +0200
Subject: [PATCH 0162/3292] net: dsa: qca8k: add support for qca8327 switch

qca8327 switch is a low tier version of the more recent qca8337.
It does share the same regs used by the qca8k driver and can be
supported with minimal change.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 23 ++++++++++++++++++++---
 drivers/net/dsa/qca8k.h |  6 ++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index d4e3f81576ec..693bd9fd532b 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1524,6 +1524,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
 static int
 qca8k_sw_probe(struct mdio_device *mdiodev)
 {
+	const struct qca8k_match_data *data;
 	struct qca8k_priv *priv;
 	u32 id;
 
@@ -1551,6 +1552,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
 		gpiod_set_value_cansleep(priv->reset_gpio, 0);
 	}
 
+	/* get the switches ID from the compatible */
+	data = of_device_get_match_data(&mdiodev->dev);
+	if (!data)
+		return -ENODEV;
+
 	/* read the switches ID register */
 	id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
 	if (id < 0)
@@ -1558,8 +1564,10 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
 
 	id >>= QCA8K_MASK_CTRL_ID_S;
 	id &= QCA8K_MASK_CTRL_ID_M;
-	if (id != QCA8K_ID_QCA8337)
+	if (id != data->id) {
+		dev_err(&mdiodev->dev, "Switch id detected %x but expected %x", id, data->id);
 		return -ENODEV;
+	}
 
 	priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL);
 	if (!priv->ds)
@@ -1624,9 +1632,18 @@ static int qca8k_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(qca8k_pm_ops,
 			 qca8k_suspend, qca8k_resume);
 
+static const struct qca8k_match_data qca832x = {
+	.id = QCA8K_ID_QCA8327,
+};
+
+static const struct qca8k_match_data qca833x = {
+	.id = QCA8K_ID_QCA8337,
+};
+
 static const struct of_device_id qca8k_of_match[] = {
-	{ .compatible = "qca,qca8334" },
-	{ .compatible = "qca,qca8337" },
+	{ .compatible = "qca,qca8327", .data = &qca832x },
+	{ .compatible = "qca,qca8334", .data = &qca833x },
+	{ .compatible = "qca,qca8337", .data = &qca833x },
 	{ /* sentinel */ },
 };
 
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 86c585b7ec4a..87a8b10459c6 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -15,6 +15,8 @@
 #define QCA8K_NUM_PORTS					7
 #define QCA8K_MAX_MTU					9000
 
+#define PHY_ID_QCA8327					0x004dd034
+#define QCA8K_ID_QCA8327				0x12
 #define PHY_ID_QCA8337					0x004dd036
 #define QCA8K_ID_QCA8337				0x13
 
@@ -213,6 +215,10 @@ struct ar8xxx_port_status {
 	int enabled;
 };
 
+struct qca8k_match_data {
+	u8 id;
+};
+
 struct qca8k_priv {
 	struct regmap *regmap;
 	struct mii_bus *bus;

From 227a9ffc1bc77037339530607fe129af3824620e Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:00 +0200
Subject: [PATCH 0163/3292] devicetree: net: dsa: qca8k: Document new
 compatible qca8327

Add support for qca8327 in the compatible list.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/qca8k.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
index ccbc6d89325d..1daf68e7ae19 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@@ -3,6 +3,7 @@
 Required properties:
 
 - compatible: should be one of:
+    "qca,qca8327"
     "qca,qca8334"
     "qca,qca8337"
 

From 83a3ceb39b2495171aabe9446271b94c678354f3 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:01 +0200
Subject: [PATCH 0164/3292] net: dsa: qca8k: add priority tweak to qca8337
 switch

The port 5 of the qca8337 have some problem in flood condition. The
original legacy driver had some specific buffer and priority settings
for the different port suggested by the QCA switch team. Add this
missing settings to improve switch stability under load condition.
The packet priority tweak is only needed for the qca8337 switch and
other qca8k switch are not affected.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 47 +++++++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/qca8k.h | 25 ++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 693bd9fd532b..65f27d136aef 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -779,6 +779,7 @@ qca8k_setup(struct dsa_switch *ds)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	int ret, i;
+	u32 mask;
 
 	/* Make sure that port 0 is the cpu port */
 	if (!dsa_is_cpu_port(ds, 0)) {
@@ -884,6 +885,51 @@ qca8k_setup(struct dsa_switch *ds)
 		}
 	}
 
+	/* The port 5 of the qca8337 have some problem in flood condition. The
+	 * original legacy driver had some specific buffer and priority settings
+	 * for the different port suggested by the QCA switch team. Add this
+	 * missing settings to improve switch stability under load condition.
+	 * This problem is limited to qca8337 and other qca8k switch are not affected.
+	 */
+	if (priv->switch_id == QCA8K_ID_QCA8337) {
+		for (i = 0; i < QCA8K_NUM_PORTS; i++) {
+			switch (i) {
+			/* The 2 CPU port and port 5 requires some different
+			 * priority than any other ports.
+			 */
+			case 0:
+			case 5:
+			case 6:
+				mask = QCA8K_PORT_HOL_CTRL0_EG_PRI0(0x3) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI1(0x4) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI2(0x4) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI3(0x4) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI4(0x6) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI5(0x8) |
+					QCA8K_PORT_HOL_CTRL0_EG_PORT(0x1e);
+				break;
+			default:
+				mask = QCA8K_PORT_HOL_CTRL0_EG_PRI0(0x3) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI1(0x4) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI2(0x6) |
+					QCA8K_PORT_HOL_CTRL0_EG_PRI3(0x8) |
+					QCA8K_PORT_HOL_CTRL0_EG_PORT(0x19);
+			}
+			qca8k_write(priv, QCA8K_REG_PORT_HOL_CTRL0(i), mask);
+
+			mask = QCA8K_PORT_HOL_CTRL1_ING(0x6) |
+			QCA8K_PORT_HOL_CTRL1_EG_PRI_BUF_EN |
+			QCA8K_PORT_HOL_CTRL1_EG_PORT_BUF_EN |
+			QCA8K_PORT_HOL_CTRL1_WRED_EN;
+			qca8k_rmw(priv, QCA8K_REG_PORT_HOL_CTRL1(i),
+				  QCA8K_PORT_HOL_CTRL1_ING_BUF |
+				  QCA8K_PORT_HOL_CTRL1_EG_PRI_BUF_EN |
+				  QCA8K_PORT_HOL_CTRL1_EG_PORT_BUF_EN |
+				  QCA8K_PORT_HOL_CTRL1_WRED_EN,
+				  mask);
+		}
+	}
+
 	/* Setup our port MTUs to match power on defaults */
 	for (i = 0; i < QCA8K_NUM_PORTS; i++)
 		priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN;
@@ -1569,6 +1615,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
 		return -ENODEV;
 	}
 
+	priv->switch_id = id;
 	priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL);
 	if (!priv->ds)
 		return -ENOMEM;
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 87a8b10459c6..42d90836dffa 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -168,6 +168,30 @@
 #define   QCA8K_PORT_LOOKUP_STATE			GENMASK(18, 16)
 #define   QCA8K_PORT_LOOKUP_LEARN			BIT(20)
 
+#define QCA8K_REG_PORT_HOL_CTRL0(_i)			(0x970 + (_i) * 0x8)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI0_BUF		GENMASK(3, 0)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI0(x)		((x) << 0)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI1_BUF		GENMASK(7, 4)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI1(x)		((x) << 4)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI2_BUF		GENMASK(11, 8)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI2(x)		((x) << 8)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI3_BUF		GENMASK(15, 12)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI3(x)		((x) << 12)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI4_BUF		GENMASK(19, 16)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI4(x)		((x) << 16)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI5_BUF		GENMASK(23, 20)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PRI5(x)		((x) << 20)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PORT_BUF		GENMASK(29, 24)
+#define   QCA8K_PORT_HOL_CTRL0_EG_PORT(x)		((x) << 24)
+
+#define QCA8K_REG_PORT_HOL_CTRL1(_i)			(0x974 + (_i) * 0x8)
+#define   QCA8K_PORT_HOL_CTRL1_ING_BUF			GENMASK(3, 0)
+#define   QCA8K_PORT_HOL_CTRL1_ING(x)			((x) << 0)
+#define   QCA8K_PORT_HOL_CTRL1_EG_PRI_BUF_EN		BIT(6)
+#define   QCA8K_PORT_HOL_CTRL1_EG_PORT_BUF_EN		BIT(7)
+#define   QCA8K_PORT_HOL_CTRL1_WRED_EN			BIT(8)
+#define   QCA8K_PORT_HOL_CTRL1_EG_MIRROR_EN		BIT(16)
+
 /* Pkt edit registers */
 #define QCA8K_EGRESS_VLAN(x)				(0x0c70 + (4 * (x / 2)))
 
@@ -220,6 +244,7 @@ struct qca8k_match_data {
 };
 
 struct qca8k_priv {
+	u8 switch_id;
 	struct regmap *regmap;
 	struct mii_bus *bus;
 	struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];

From 5bf9ff3b9fb5ecb67a1a3517b26db3a00f2a2f11 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:02 +0200
Subject: [PATCH 0165/3292] net: dsa: qca8k: limit port5 delay to qca8337

Limit port5 rx delay to qca8337. This is taken from the legacy QSDK code
that limits the rx delay on port5 to only this particular switch version,
on other switch only the tx and rx delay for port0 are needed.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 65f27d136aef..b598930190e1 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1003,8 +1003,10 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 			    QCA8K_PORT_PAD_RGMII_EN |
 			    QCA8K_PORT_PAD_RGMII_TX_DELAY(QCA8K_MAX_DELAY) |
 			    QCA8K_PORT_PAD_RGMII_RX_DELAY(QCA8K_MAX_DELAY));
-		qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL,
-			    QCA8K_PORT_PAD_RGMII_RX_DELAY_EN);
+		/* QCA8337 requires to set rgmii rx delay */
+		if (priv->switch_id == QCA8K_ID_QCA8337)
+			qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL,
+				    QCA8K_PORT_PAD_RGMII_RX_DELAY_EN);
 		break;
 	case PHY_INTERFACE_MODE_SGMII:
 	case PHY_INTERFACE_MODE_1000BASEX:

From 0fc57e4b5e39461fc0a54aae0afe4241363a7267 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:03 +0200
Subject: [PATCH 0166/3292] net: dsa: qca8k: add GLOBAL_FC settings needed for
 qca8327

Switch qca8327 needs special settings for the GLOBAL_FC_THRES regs.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 10 ++++++++++
 drivers/net/dsa/qca8k.h |  6 ++++++
 2 files changed, 16 insertions(+)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index b598930190e1..10e3e1ca7e95 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -930,6 +930,16 @@ qca8k_setup(struct dsa_switch *ds)
 		}
 	}
 
+	/* Special GLOBAL_FC_THRESH value are needed for ar8327 switch */
+	if (priv->switch_id == QCA8K_ID_QCA8327) {
+		mask = QCA8K_GLOBAL_FC_GOL_XON_THRES(288) |
+		       QCA8K_GLOBAL_FC_GOL_XOFF_THRES(496);
+		qca8k_rmw(priv, QCA8K_REG_GLOBAL_FC_THRESH,
+			  QCA8K_GLOBAL_FC_GOL_XON_THRES_S |
+			  QCA8K_GLOBAL_FC_GOL_XOFF_THRES_S,
+			  mask);
+	}
+
 	/* Setup our port MTUs to match power on defaults */
 	for (i = 0; i < QCA8K_NUM_PORTS; i++)
 		priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN;
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 42d90836dffa..eceeacfe2c5d 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -168,6 +168,12 @@
 #define   QCA8K_PORT_LOOKUP_STATE			GENMASK(18, 16)
 #define   QCA8K_PORT_LOOKUP_LEARN			BIT(20)
 
+#define QCA8K_REG_GLOBAL_FC_THRESH			0x800
+#define   QCA8K_GLOBAL_FC_GOL_XON_THRES(x)		((x) << 16)
+#define   QCA8K_GLOBAL_FC_GOL_XON_THRES_S		GENMASK(24, 16)
+#define   QCA8K_GLOBAL_FC_GOL_XOFF_THRES(x)		((x) << 0)
+#define   QCA8K_GLOBAL_FC_GOL_XOFF_THRES_S		GENMASK(8, 0)
+
 #define QCA8K_REG_PORT_HOL_CTRL0(_i)			(0x970 + (_i) * 0x8)
 #define   QCA8K_PORT_HOL_CTRL0_EG_PRI0_BUF		GENMASK(3, 0)
 #define   QCA8K_PORT_HOL_CTRL0_EG_PRI0(x)		((x) << 0)

From 95ffeaf18b3bb90eeef52cbf7d79ccc9d0345ff5 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:04 +0200
Subject: [PATCH 0167/3292] net: dsa: qca8k: add support for switch rev

qca8k internal phy driver require some special debug value to be set
based on the switch revision. Rework the switch id read function to
also read the chip revision.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 53 ++++++++++++++++++++++++++---------------
 drivers/net/dsa/qca8k.h |  7 ++++--
 2 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 10e3e1ca7e95..35ff4cf08786 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1579,12 +1579,40 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
 	.phylink_mac_link_up	= qca8k_phylink_mac_link_up,
 };
 
+static int qca8k_read_switch_id(struct qca8k_priv *priv)
+{
+	const struct qca8k_match_data *data;
+	u32 val;
+	u8 id;
+
+	/* get the switches ID from the compatible */
+	data = of_device_get_match_data(priv->dev);
+	if (!data)
+		return -ENODEV;
+
+	val = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
+	if (val < 0)
+		return -ENODEV;
+
+	id = QCA8K_MASK_CTRL_DEVICE_ID(val & QCA8K_MASK_CTRL_DEVICE_ID_MASK);
+	if (id != data->id) {
+		dev_err(priv->dev, "Switch id detected %x but expected %x", id, data->id);
+		return -ENODEV;
+	}
+
+	priv->switch_id = id;
+
+	/* Save revision to communicate to the internal PHY driver */
+	priv->switch_revision = (val & QCA8K_MASK_CTRL_REV_ID_MASK);
+
+	return 0;
+}
+
 static int
 qca8k_sw_probe(struct mdio_device *mdiodev)
 {
-	const struct qca8k_match_data *data;
 	struct qca8k_priv *priv;
-	u32 id;
+	int ret;
 
 	/* allocate the private data struct so that we can probe the switches
 	 * ID register
@@ -1610,24 +1638,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev)
 		gpiod_set_value_cansleep(priv->reset_gpio, 0);
 	}
 
-	/* get the switches ID from the compatible */
-	data = of_device_get_match_data(&mdiodev->dev);
-	if (!data)
-		return -ENODEV;
+	/* Check the detected switch id */
+	ret = qca8k_read_switch_id(priv);
+	if (ret)
+		return ret;
 
-	/* read the switches ID register */
-	id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
-	if (id < 0)
-		return id;
-
-	id >>= QCA8K_MASK_CTRL_ID_S;
-	id &= QCA8K_MASK_CTRL_ID_M;
-	if (id != data->id) {
-		dev_err(&mdiodev->dev, "Switch id detected %x but expected %x", id, data->id);
-		return -ENODEV;
-	}
-
-	priv->switch_id = id;
 	priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL);
 	if (!priv->ds)
 		return -ENOMEM;
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index eceeacfe2c5d..338277978ec0 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -30,8 +30,10 @@
 
 /* Global control registers */
 #define QCA8K_REG_MASK_CTRL				0x000
-#define   QCA8K_MASK_CTRL_ID_M				0xff
-#define   QCA8K_MASK_CTRL_ID_S				8
+#define   QCA8K_MASK_CTRL_REV_ID_MASK			GENMASK(7, 0)
+#define   QCA8K_MASK_CTRL_REV_ID(x)			((x) >> 0)
+#define   QCA8K_MASK_CTRL_DEVICE_ID_MASK		GENMASK(15, 8)
+#define   QCA8K_MASK_CTRL_DEVICE_ID(x)			((x) >> 8)
 #define QCA8K_REG_PORT0_PAD_CTRL			0x004
 #define QCA8K_REG_PORT5_PAD_CTRL			0x008
 #define QCA8K_REG_PORT6_PAD_CTRL			0x00c
@@ -251,6 +253,7 @@ struct qca8k_match_data {
 
 struct qca8k_priv {
 	u8 switch_id;
+	u8 switch_revision;
 	struct regmap *regmap;
 	struct mii_bus *bus;
 	struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];

From 1ee0591a1093c2448642c33433483e9260275f7b Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:05 +0200
Subject: [PATCH 0168/3292] net: dsa: qca8k: add ethernet-ports fallback to
 setup_mdio_bus

Dsa now also supports ethernet-ports. Add this new binding as a fallback
if the ports node can't be found.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 35ff4cf08786..cc9ab35f8b17 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -718,6 +718,9 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
 	int err;
 
 	ports = of_get_child_by_name(priv->dev->of_node, "ports");
+	if (!ports)
+		ports = of_get_child_by_name(priv->dev->of_node, "ethernet-ports");
+
 	if (!ports)
 		return -EINVAL;
 

From e4b9977cee1583da38a6e9118078bb728aaccf7b Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:06 +0200
Subject: [PATCH 0169/3292] net: dsa: qca8k: make rgmii delay configurable

The legacy qsdk code used a different delay instead of the max value.
Qsdk use 1 ns for rx and 2 ns for tx. Make these values configurable
using the standard rx/tx-internal-delay-ps ethernet binding and apply
qsdk values by default. The connected gmac doesn't add any delay so no
additional delay is added to tx/rx.
On this switch the delay is actually in ns so value should be in the
1000 order. Any value converted from ps to ns by dividing it by 1000
as the switch max value for delay is 3ns.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 82 ++++++++++++++++++++++++++++++++++++++++-
 drivers/net/dsa/qca8k.h | 11 +++---
 2 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index cc9ab35f8b17..dedbc6565516 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -777,6 +777,68 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
 	return 0;
 }
 
+static int
+qca8k_setup_of_rgmii_delay(struct qca8k_priv *priv)
+{
+	struct device_node *port_dn;
+	phy_interface_t mode;
+	struct dsa_port *dp;
+	u32 val;
+
+	/* CPU port is already checked */
+	dp = dsa_to_port(priv->ds, 0);
+
+	port_dn = dp->dn;
+
+	/* Check if port 0 is set to the correct type */
+	of_get_phy_mode(port_dn, &mode);
+	if (mode != PHY_INTERFACE_MODE_RGMII_ID &&
+	    mode != PHY_INTERFACE_MODE_RGMII_RXID &&
+	    mode != PHY_INTERFACE_MODE_RGMII_TXID) {
+		return 0;
+	}
+
+	switch (mode) {
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+		if (of_property_read_u32(port_dn, "rx-internal-delay-ps", &val))
+			val = 2;
+		else
+			/* Switch regs accept value in ns, convert ps to ns */
+			val = val / 1000;
+
+		if (val > QCA8K_MAX_DELAY) {
+			dev_err(priv->dev, "rgmii rx delay is limited to a max value of 3ns, setting to the max value");
+			val = 3;
+		}
+
+		priv->rgmii_rx_delay = val;
+		/* Stop here if we need to check only for rx delay */
+		if (mode != PHY_INTERFACE_MODE_RGMII_ID)
+			break;
+
+		fallthrough;
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		if (of_property_read_u32(port_dn, "tx-internal-delay-ps", &val))
+			val = 1;
+		else
+			/* Switch regs accept value in ns, convert ps to ns */
+			val = val / 1000;
+
+		if (val > QCA8K_MAX_DELAY) {
+			dev_err(priv->dev, "rgmii tx delay is limited to a max value of 3ns, setting to the max value");
+			val = 3;
+		}
+
+		priv->rgmii_tx_delay = val;
+		break;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
 static int
 qca8k_setup(struct dsa_switch *ds)
 {
@@ -802,6 +864,10 @@ qca8k_setup(struct dsa_switch *ds)
 	if (ret)
 		return ret;
 
+	ret = qca8k_setup_of_rgmii_delay(priv);
+	if (ret)
+		return ret;
+
 	/* Enable CPU Port */
 	ret = qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
 			    QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
@@ -970,6 +1036,8 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 	case 0: /* 1st CPU port */
 		if (state->interface != PHY_INTERFACE_MODE_RGMII &&
 		    state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
 		    state->interface != PHY_INTERFACE_MODE_SGMII)
 			return;
 
@@ -985,6 +1053,8 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 	case 6: /* 2nd CPU port / external PHY */
 		if (state->interface != PHY_INTERFACE_MODE_RGMII &&
 		    state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
 		    state->interface != PHY_INTERFACE_MODE_SGMII &&
 		    state->interface != PHY_INTERFACE_MODE_1000BASEX)
 			return;
@@ -1008,14 +1078,18 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 		qca8k_write(priv, reg, QCA8K_PORT_PAD_RGMII_EN);
 		break;
 	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
 		/* RGMII_ID needs internal delay. This is enabled through
 		 * PORT5_PAD_CTRL for all ports, rather than individual port
 		 * registers
 		 */
 		qca8k_write(priv, reg,
 			    QCA8K_PORT_PAD_RGMII_EN |
-			    QCA8K_PORT_PAD_RGMII_TX_DELAY(QCA8K_MAX_DELAY) |
-			    QCA8K_PORT_PAD_RGMII_RX_DELAY(QCA8K_MAX_DELAY));
+			    QCA8K_PORT_PAD_RGMII_TX_DELAY(priv->rgmii_tx_delay) |
+			    QCA8K_PORT_PAD_RGMII_RX_DELAY(priv->rgmii_rx_delay) |
+			    QCA8K_PORT_PAD_RGMII_TX_DELAY_EN |
+			    QCA8K_PORT_PAD_RGMII_RX_DELAY_EN);
 		/* QCA8337 requires to set rgmii rx delay */
 		if (priv->switch_id == QCA8K_ID_QCA8337)
 			qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL,
@@ -1073,6 +1147,8 @@ qca8k_phylink_validate(struct dsa_switch *ds, int port,
 		if (state->interface != PHY_INTERFACE_MODE_NA &&
 		    state->interface != PHY_INTERFACE_MODE_RGMII &&
 		    state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
 		    state->interface != PHY_INTERFACE_MODE_SGMII)
 			goto unsupported;
 		break;
@@ -1090,6 +1166,8 @@ qca8k_phylink_validate(struct dsa_switch *ds, int port,
 		if (state->interface != PHY_INTERFACE_MODE_NA &&
 		    state->interface != PHY_INTERFACE_MODE_RGMII &&
 		    state->interface != PHY_INTERFACE_MODE_RGMII_ID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_TXID &&
+		    state->interface != PHY_INTERFACE_MODE_RGMII_RXID &&
 		    state->interface != PHY_INTERFACE_MODE_SGMII &&
 		    state->interface != PHY_INTERFACE_MODE_1000BASEX)
 			goto unsupported;
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 338277978ec0..a878486d9bcd 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -38,12 +38,11 @@
 #define QCA8K_REG_PORT5_PAD_CTRL			0x008
 #define QCA8K_REG_PORT6_PAD_CTRL			0x00c
 #define   QCA8K_PORT_PAD_RGMII_EN			BIT(26)
-#define   QCA8K_PORT_PAD_RGMII_TX_DELAY(x)		\
-						((0x8 + (x & 0x3)) << 22)
-#define   QCA8K_PORT_PAD_RGMII_RX_DELAY(x)		\
-						((0x10 + (x & 0x3)) << 20)
-#define   QCA8K_MAX_DELAY				3
+#define   QCA8K_PORT_PAD_RGMII_TX_DELAY(x)		((x) << 22)
+#define   QCA8K_PORT_PAD_RGMII_RX_DELAY(x)		((x) << 20)
+#define	  QCA8K_PORT_PAD_RGMII_TX_DELAY_EN		BIT(25)
 #define   QCA8K_PORT_PAD_RGMII_RX_DELAY_EN		BIT(24)
+#define   QCA8K_MAX_DELAY				3
 #define   QCA8K_PORT_PAD_SGMII_EN			BIT(7)
 #define QCA8K_REG_PWS					0x010
 #define   QCA8K_PWS_SERDES_AEN_DIS			BIT(7)
@@ -254,6 +253,8 @@ struct qca8k_match_data {
 struct qca8k_priv {
 	u8 switch_id;
 	u8 switch_revision;
+	u8 rgmii_tx_delay;
+	u8 rgmii_rx_delay;
 	struct regmap *regmap;
 	struct mii_bus *bus;
 	struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];

From 63c33bbfeb6842a956a0eb12901e28eb335bdb18 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:07 +0200
Subject: [PATCH 0170/3292] net: dsa: qca8k: clear MASTER_EN after phy
 read/write

Clear MDIO_MASTER_EN bit from MDIO_MASTER_CTRL after read/write
operation. The MDIO_MASTER_EN bit is not reset after read/write
operation and the next operation can be wrongly interpreted by the
switch as a mdio operation. This cause a production of wrong/garbage
data from the switch and underfined bheavior. (random port drop,
unplugged port flagged with link up, wrong port speed)
Also on driver remove the MASTER_CTRL can be left set and cause the
malfunction of any next driver using the mdio device.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index dedbc6565516..a2b4d5097868 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -649,8 +649,14 @@ qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
 	if (ret)
 		return ret;
 
-	return qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
-		QCA8K_MDIO_MASTER_BUSY);
+	ret = qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+			      QCA8K_MDIO_MASTER_BUSY);
+
+	/* even if the busy_wait timeouts try to clear the MASTER_EN */
+	qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
+			QCA8K_MDIO_MASTER_EN);
+
+	return ret;
 }
 
 static int
@@ -685,6 +691,10 @@ qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 
 	val &= QCA8K_MDIO_MASTER_DATA_MASK;
 
+	/* even if the busy_wait timeouts try to clear the MASTER_EN */
+	qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
+			QCA8K_MDIO_MASTER_EN);
+
 	return val;
 }
 

From 60df02b6ea4581d72eb7a3ab7204504a54059b72 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:08 +0200
Subject: [PATCH 0171/3292] net: dsa: qca8k: dsa: qca8k: protect MASTER
 busy_wait with mdio mutex

MDIO_MASTER operation have a dedicated busy wait that is not protected
by the mdio mutex. This can cause situation where the MASTER operation
is done and a normal operation is executed between the MASTER read/write
and the MASTER busy_wait. Rework the qca8k_mdio_read/write function to
address this issue by binding the lock for the whole MASTER operation
and not only the mdio read/write common operation.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 70 ++++++++++++++++++++++++++++++++---------
 1 file changed, 56 insertions(+), 14 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index a2b4d5097868..1f8bfe0a78f4 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -627,9 +627,32 @@ qca8k_port_to_phy(int port)
 	return port - 1;
 }
 
+static int
+qca8k_mdio_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
+{
+	u16 r1, r2, page;
+	u32 val;
+	int ret;
+
+	qca8k_split_addr(reg, &r1, &r2, &page);
+
+	ret = read_poll_timeout(qca8k_mii_read32, val, !(val & mask), 0,
+				QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
+				priv->bus, 0x10 | r2, r1);
+
+	/* Check if qca8k_read has failed for a different reason
+	 * before returnting -ETIMEDOUT
+	 */
+	if (ret < 0 && val < 0)
+		return val;
+
+	return ret;
+}
+
 static int
 qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
 {
+	u16 r1, r2, page;
 	u32 phy, val;
 	int ret;
 
@@ -645,12 +668,21 @@ qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
 	      QCA8K_MDIO_MASTER_REG_ADDR(regnum) |
 	      QCA8K_MDIO_MASTER_DATA(data);
 
-	ret = qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
-	if (ret)
-		return ret;
+	qca8k_split_addr(QCA8K_MDIO_MASTER_CTRL, &r1, &r2, &page);
 
-	ret = qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
-			      QCA8K_MDIO_MASTER_BUSY);
+	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+	ret = qca8k_set_page(priv->bus, page);
+	if (ret)
+		goto exit;
+
+	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
+
+	ret = qca8k_mdio_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+				   QCA8K_MDIO_MASTER_BUSY);
+
+exit:
+	mutex_unlock(&priv->bus->mdio_lock);
 
 	/* even if the busy_wait timeouts try to clear the MASTER_EN */
 	qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
@@ -662,6 +694,7 @@ qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
 static int
 qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 {
+	u16 r1, r2, page;
 	u32 phy, val;
 	int ret;
 
@@ -676,21 +709,30 @@ qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 	      QCA8K_MDIO_MASTER_READ | QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
 	      QCA8K_MDIO_MASTER_REG_ADDR(regnum);
 
-	ret = qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val);
+	qca8k_split_addr(QCA8K_MDIO_MASTER_CTRL, &r1, &r2, &page);
+
+	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+	ret = qca8k_set_page(priv->bus, page);
 	if (ret)
-		return ret;
+		goto exit;
 
-	ret = qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
-			      QCA8K_MDIO_MASTER_BUSY);
+	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
+
+	ret = qca8k_mdio_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+				   QCA8K_MDIO_MASTER_BUSY);
 	if (ret)
-		return ret;
-
-	val = qca8k_read(priv, QCA8K_MDIO_MASTER_CTRL);
-	if (val < 0)
-		return val;
+		goto exit;
 
+	val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
 	val &= QCA8K_MDIO_MASTER_DATA_MASK;
 
+exit:
+	mutex_unlock(&priv->bus->mdio_lock);
+
+	if (val >= 0)
+		val &= QCA8K_MDIO_MASTER_DATA_MASK;
+
 	/* even if the busy_wait timeouts try to clear the MASTER_EN */
 	qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
 			QCA8K_MDIO_MASTER_EN);

From 617960d72e93de0f3fa52407e2d39e8c43e73b0a Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:09 +0200
Subject: [PATCH 0172/3292] net: dsa: qca8k: enlarge mdio delay and timeout

The witch require some extra delay after setting page or the next
read/write can use still use the old page. Add a delay after the
set_page function to address this as it's done in QSDK legacy driver.
Some timeouts were notice with VLAN and phy function, enlarge the
mdio busy wait timeout to fix these problems.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 1 +
 drivers/net/dsa/qca8k.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 1f8bfe0a78f4..df4cf6d75074 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -143,6 +143,7 @@ qca8k_set_page(struct mii_bus *bus, u16 page)
 	}
 
 	qca8k_current_page = page;
+	usleep_range(1000, 2000);
 	return 0;
 }
 
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index a878486d9bcd..d365f85ab34f 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -20,7 +20,7 @@
 #define PHY_ID_QCA8337					0x004dd036
 #define QCA8K_ID_QCA8337				0x13
 
-#define QCA8K_BUSY_WAIT_TIMEOUT				20
+#define QCA8K_BUSY_WAIT_TIMEOUT				2000
 
 #define QCA8K_NUM_FDB_RECORDS				2048
 

From 759bafb8a3226326ca357613bc90acf738f80c32 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:10 +0200
Subject: [PATCH 0173/3292] net: dsa: qca8k: add support for internal phy and
 internal mdio

Add support to setup_mdio_bus for internal phy declaration. Introduce a
flag to use the legacy port phy mapping by default and use the direct
mapping if a mdio node is detected in the switch node. Register a
dedicated mdio internal mdio bus to address the different mapping
between port and phy if the mdio node is detected.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 112 +++++++++++++++++++++++++++++-----------
 drivers/net/dsa/qca8k.h |   1 +
 2 files changed, 83 insertions(+), 30 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index df4cf6d75074..ba288181fd1a 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -11,6 +11,7 @@
 #include <linux/netdevice.h>
 #include <net/dsa.h>
 #include <linux/of_net.h>
+#include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/if_bridge.h>
 #include <linux/mdio.h>
@@ -629,7 +630,7 @@ qca8k_port_to_phy(int port)
 }
 
 static int
-qca8k_mdio_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
+qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask)
 {
 	u16 r1, r2, page;
 	u32 val;
@@ -639,7 +640,7 @@ qca8k_mdio_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
 
 	ret = read_poll_timeout(qca8k_mii_read32, val, !(val & mask), 0,
 				QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
-				priv->bus, 0x10 | r2, r1);
+				bus, 0x10 | r2, r1);
 
 	/* Check if qca8k_read has failed for a different reason
 	 * before returnting -ETIMEDOUT
@@ -651,19 +652,16 @@ qca8k_mdio_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
 }
 
 static int
-qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
+qca8k_mdio_write(struct mii_bus *salve_bus, int phy, int regnum, u16 data)
 {
+	struct qca8k_priv *priv = salve_bus->priv;
 	u16 r1, r2, page;
-	u32 phy, val;
+	u32 val;
 	int ret;
 
 	if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
 		return -EINVAL;
 
-	/* callee is responsible for not passing bad ports,
-	 * but we still would like to make spills impossible.
-	 */
-	phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
 	val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN |
 	      QCA8K_MDIO_MASTER_WRITE | QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
 	      QCA8K_MDIO_MASTER_REG_ADDR(regnum) |
@@ -679,33 +677,29 @@ qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data)
 
 	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
 
-	ret = qca8k_mdio_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+	ret = qca8k_mdio_busy_wait(priv->bus, QCA8K_MDIO_MASTER_CTRL,
 				   QCA8K_MDIO_MASTER_BUSY);
 
 exit:
-	mutex_unlock(&priv->bus->mdio_lock);
-
 	/* even if the busy_wait timeouts try to clear the MASTER_EN */
-	qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
-			QCA8K_MDIO_MASTER_EN);
+	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, 0);
+
+	mutex_unlock(&priv->bus->mdio_lock);
 
 	return ret;
 }
 
 static int
-qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
+qca8k_mdio_read(struct mii_bus *salve_bus, int phy, int regnum)
 {
+	struct qca8k_priv *priv = salve_bus->priv;
 	u16 r1, r2, page;
-	u32 phy, val;
+	u32 val;
 	int ret;
 
 	if (regnum >= QCA8K_MDIO_MASTER_MAX_REG)
 		return -EINVAL;
 
-	/* callee is responsible for not passing bad ports,
-	 * but we still would like to make spills impossible.
-	 */
-	phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
 	val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN |
 	      QCA8K_MDIO_MASTER_READ | QCA8K_MDIO_MASTER_PHY_ADDR(phy) |
 	      QCA8K_MDIO_MASTER_REG_ADDR(regnum);
@@ -720,24 +714,22 @@ qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum)
 
 	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
 
-	ret = qca8k_mdio_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL,
+	ret = qca8k_mdio_busy_wait(priv->bus, QCA8K_MDIO_MASTER_CTRL,
 				   QCA8K_MDIO_MASTER_BUSY);
 	if (ret)
 		goto exit;
 
 	val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
-	val &= QCA8K_MDIO_MASTER_DATA_MASK;
 
 exit:
+	/* even if the busy_wait timeouts try to clear the MASTER_EN */
+	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, 0);
+
 	mutex_unlock(&priv->bus->mdio_lock);
 
 	if (val >= 0)
 		val &= QCA8K_MDIO_MASTER_DATA_MASK;
 
-	/* even if the busy_wait timeouts try to clear the MASTER_EN */
-	qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL,
-			QCA8K_MDIO_MASTER_EN);
-
 	return val;
 }
 
@@ -746,7 +738,14 @@ qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data)
 {
 	struct qca8k_priv *priv = ds->priv;
 
-	return qca8k_mdio_write(priv, port, regnum, data);
+	/* Check if the legacy mapping should be used and the
+	 * port is not correctly mapped to the right PHY in the
+	 * devicetree
+	 */
+	if (priv->legacy_phy_port_mapping)
+		port = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
+
+	return qca8k_mdio_write(priv->bus, port, regnum, data);
 }
 
 static int
@@ -755,7 +754,14 @@ qca8k_phy_read(struct dsa_switch *ds, int port, int regnum)
 	struct qca8k_priv *priv = ds->priv;
 	int ret;
 
-	ret = qca8k_mdio_read(priv, port, regnum);
+	/* Check if the legacy mapping should be used and the
+	 * port is not correctly mapped to the right PHY in the
+	 * devicetree
+	 */
+	if (priv->legacy_phy_port_mapping)
+		port = qca8k_port_to_phy(port) % PHY_MAX_ADDR;
+
+	ret = qca8k_mdio_read(priv->bus, port, regnum);
 
 	if (ret < 0)
 		return 0xffff;
@@ -763,11 +769,38 @@ qca8k_phy_read(struct dsa_switch *ds, int port, int regnum)
 	return ret;
 }
 
+static int
+qca8k_mdio_register(struct qca8k_priv *priv, struct device_node *mdio)
+{
+	struct dsa_switch *ds = priv->ds;
+	struct mii_bus *bus;
+
+	bus = devm_mdiobus_alloc(ds->dev);
+
+	if (!bus)
+		return -ENOMEM;
+
+	bus->priv = (void *)priv;
+	bus->name = "qca8k slave mii";
+	bus->read = qca8k_mdio_read;
+	bus->write = qca8k_mdio_write;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "qca8k-%d",
+		 ds->index);
+
+	bus->parent = ds->dev;
+	bus->phy_mask = ~ds->phys_mii_mask;
+
+	ds->slave_mii_bus = bus;
+
+	return devm_of_mdiobus_register(priv->dev, bus, mdio);
+}
+
 static int
 qca8k_setup_mdio_bus(struct qca8k_priv *priv)
 {
 	u32 internal_mdio_mask = 0, external_mdio_mask = 0, reg;
-	struct device_node *ports, *port;
+	struct device_node *ports, *port, *mdio;
+	phy_interface_t mode;
 	int err;
 
 	ports = of_get_child_by_name(priv->dev->of_node, "ports");
@@ -788,7 +821,10 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
 		if (!dsa_is_user_port(priv->ds, reg))
 			continue;
 
-		if (of_property_read_bool(port, "phy-handle"))
+		of_get_phy_mode(port, &mode);
+
+		if (of_property_read_bool(port, "phy-handle") &&
+		    mode != PHY_INTERFACE_MODE_INTERNAL)
 			external_mdio_mask |= BIT(reg);
 		else
 			internal_mdio_mask |= BIT(reg);
@@ -825,8 +861,23 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
 				       QCA8K_MDIO_MASTER_EN);
 	}
 
+	/* Check if the devicetree declare the port:phy mapping */
+	mdio = of_get_child_by_name(priv->dev->of_node, "mdio");
+	if (of_device_is_available(mdio)) {
+		err = qca8k_mdio_register(priv, mdio);
+		if (err)
+			of_node_put(mdio);
+
+		return err;
+	}
+
+	/* If a mapping can't be found the legacy mapping is used,
+	 * using the qca8k_port_to_phy function
+	 */
+	priv->legacy_phy_port_mapping = true;
 	priv->ops.phy_read = qca8k_phy_read;
 	priv->ops.phy_write = qca8k_phy_write;
+
 	return 0;
 }
 
@@ -1212,7 +1263,8 @@ qca8k_phylink_validate(struct dsa_switch *ds, int port,
 	case 5:
 		/* Internal PHY */
 		if (state->interface != PHY_INTERFACE_MODE_NA &&
-		    state->interface != PHY_INTERFACE_MODE_GMII)
+		    state->interface != PHY_INTERFACE_MODE_GMII &&
+		    state->interface != PHY_INTERFACE_MODE_INTERNAL)
 			goto unsupported;
 		break;
 	case 6: /* 2nd CPU port / external PHY */
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index d365f85ab34f..ed3b05ad6745 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -255,6 +255,7 @@ struct qca8k_priv {
 	u8 switch_revision;
 	u8 rgmii_tx_delay;
 	u8 rgmii_rx_delay;
+	bool legacy_phy_port_mapping;
 	struct regmap *regmap;
 	struct mii_bus *bus;
 	struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];

From 0c994a28e7518f098c84a3049cb2915780db873a Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:11 +0200
Subject: [PATCH 0174/3292] devicetree: bindings: dsa: qca8k: Document internal
 mdio definition

Document new way of declare mapping of internal PHY to port.
The new implementation directly declare the PHY connected to the port
by adding a node in the switch node. The driver detect this and register
an internal mdiobus using the mapping defined in the mdio node.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/dsa/qca8k.txt     | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
index 1daf68e7ae19..8c73f67c43ca 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@@ -21,6 +21,10 @@ described in dsa/dsa.txt. If the QCA8K switch is connect to a SoC's external
 mdio-bus each subnode describing a port needs to have a valid phandle
 referencing the internal PHY it is connected to. This is because there's no
 N:N mapping of port and PHY id.
+To declare the internal mdio-bus configuration, declare a mdio node in the
+switch node and declare the phandle for the port referencing the internal
+PHY is connected to. In this config a internal mdio-bus is registered and
+the mdio MASTER is used as communication.
 
 Don't use mixed external and internal mdio-bus configurations, as this is
 not supported by the hardware.
@@ -150,26 +154,61 @@ for the internal master mdio-bus configuration:
 				port@1 {
 					reg = <1>;
 					label = "lan1";
+					phy-mode = "internal";
+					phy-handle = <&phy_port1>;
 				};
 
 				port@2 {
 					reg = <2>;
 					label = "lan2";
+					phy-mode = "internal";
+					phy-handle = <&phy_port2>;
 				};
 
 				port@3 {
 					reg = <3>;
 					label = "lan3";
+					phy-mode = "internal";
+					phy-handle = <&phy_port3>;
 				};
 
 				port@4 {
 					reg = <4>;
 					label = "lan4";
+					phy-mode = "internal";
+					phy-handle = <&phy_port4>;
 				};
 
 				port@5 {
 					reg = <5>;
 					label = "wan";
+					phy-mode = "internal";
+					phy-handle = <&phy_port5>;
+				};
+			};
+
+			mdio {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				phy_port1: phy@0 {
+					reg = <0>;
+				};
+
+				phy_port2: phy@1 {
+					reg = <1>;
+				};
+
+				phy_port3: phy@2 {
+					reg = <2>;
+				};
+
+				phy_port4: phy@3 {
+					reg = <3>;
+				};
+
+				phy_port5: phy@4 {
+					reg = <4>;
 				};
 			};
 		};

From b7ebac354d54f1657bb89b7a7ca149db50203e6a Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:12 +0200
Subject: [PATCH 0175/3292] net: dsa: qca8k: improve internal mdio read/write
 bus access

Improve the internal mdio read/write bus access by caching the value
without accessing it for every read/write.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index ba288181fd1a..ccb3d89cf58c 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -655,6 +655,7 @@ static int
 qca8k_mdio_write(struct mii_bus *salve_bus, int phy, int regnum, u16 data)
 {
 	struct qca8k_priv *priv = salve_bus->priv;
+	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
 	u32 val;
 	int ret;
@@ -669,22 +670,22 @@ qca8k_mdio_write(struct mii_bus *salve_bus, int phy, int regnum, u16 data)
 
 	qca8k_split_addr(QCA8K_MDIO_MASTER_CTRL, &r1, &r2, &page);
 
-	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	ret = qca8k_set_page(priv->bus, page);
+	ret = qca8k_set_page(bus, page);
 	if (ret)
 		goto exit;
 
-	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
+	qca8k_mii_write32(bus, 0x10 | r2, r1, val);
 
-	ret = qca8k_mdio_busy_wait(priv->bus, QCA8K_MDIO_MASTER_CTRL,
+	ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
 				   QCA8K_MDIO_MASTER_BUSY);
 
 exit:
 	/* even if the busy_wait timeouts try to clear the MASTER_EN */
-	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, 0);
+	qca8k_mii_write32(bus, 0x10 | r2, r1, 0);
 
-	mutex_unlock(&priv->bus->mdio_lock);
+	mutex_unlock(&bus->mdio_lock);
 
 	return ret;
 }
@@ -693,6 +694,7 @@ static int
 qca8k_mdio_read(struct mii_bus *salve_bus, int phy, int regnum)
 {
 	struct qca8k_priv *priv = salve_bus->priv;
+	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
 	u32 val;
 	int ret;
@@ -706,26 +708,26 @@ qca8k_mdio_read(struct mii_bus *salve_bus, int phy, int regnum)
 
 	qca8k_split_addr(QCA8K_MDIO_MASTER_CTRL, &r1, &r2, &page);
 
-	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	ret = qca8k_set_page(priv->bus, page);
+	ret = qca8k_set_page(bus, page);
 	if (ret)
 		goto exit;
 
-	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
+	qca8k_mii_write32(bus, 0x10 | r2, r1, val);
 
-	ret = qca8k_mdio_busy_wait(priv->bus, QCA8K_MDIO_MASTER_CTRL,
+	ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL,
 				   QCA8K_MDIO_MASTER_BUSY);
 	if (ret)
 		goto exit;
 
-	val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
+	val = qca8k_mii_read32(bus, 0x10 | r2, r1);
 
 exit:
 	/* even if the busy_wait timeouts try to clear the MASTER_EN */
-	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, 0);
+	qca8k_mii_write32(bus, 0x10 | r2, r1, 0);
 
-	mutex_unlock(&priv->bus->mdio_lock);
+	mutex_unlock(&bus->mdio_lock);
 
 	if (val >= 0)
 		val &= QCA8K_MDIO_MASTER_DATA_MASK;

From a46aec02bc06ac2c33f326339e4ef88c735dc30d Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:13 +0200
Subject: [PATCH 0176/3292] net: dsa: qca8k: pass switch_revision info to phy
 dev_flags

Define get_phy_flags to pass switch_Revision needed to tweak the
internal PHY with debug values based on the revision.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index ccb3d89cf58c..4753228f02b3 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1732,6 +1732,22 @@ qca8k_port_vlan_del(struct dsa_switch *ds, int port,
 	return ret;
 }
 
+static u32 qca8k_get_phy_flags(struct dsa_switch *ds, int port)
+{
+	struct qca8k_priv *priv = ds->priv;
+
+	/* Communicate to the phy internal driver the switch revision.
+	 * Based on the switch revision different values needs to be
+	 * set to the dbg and mmd reg on the phy.
+	 * The first 2 bit are used to communicate the switch revision
+	 * to the phy driver.
+	 */
+	if (port > 0 && port < 6)
+		return priv->switch_revision;
+
+	return 0;
+}
+
 static enum dsa_tag_protocol
 qca8k_get_tag_protocol(struct dsa_switch *ds, int port,
 		       enum dsa_tag_protocol mp)
@@ -1765,6 +1781,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
 	.phylink_mac_config	= qca8k_phylink_mac_config,
 	.phylink_mac_link_down	= qca8k_phylink_mac_link_down,
 	.phylink_mac_link_up	= qca8k_phylink_mac_link_up,
+	.get_phy_flags		= qca8k_get_phy_flags,
 };
 
 static int qca8k_read_switch_id(struct qca8k_priv *priv)

From d0e13fd5626c3346dfb43831f8fb42b14764dac3 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:14 +0200
Subject: [PATCH 0177/3292] net: phy: at803x: clean whitespace errors

Clean any whitespace errors and fix not aligned define.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 32af52dd5aed..d2378a73de6f 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -83,8 +83,8 @@
 #define AT803X_MODE_CFG_MASK			0x0F
 #define AT803X_MODE_CFG_SGMII			0x01
 
-#define AT803X_PSSR			0x11	/*PHY-Specific Status Register*/
-#define AT803X_PSSR_MR_AN_COMPLETE	0x0200
+#define AT803X_PSSR				0x11	/*PHY-Specific Status Register*/
+#define AT803X_PSSR_MR_AN_COMPLETE		0x0200
 
 #define AT803X_DEBUG_REG_0			0x00
 #define AT803X_DEBUG_RX_CLK_DLY_EN		BIT(15)
@@ -128,24 +128,28 @@
 #define AT803X_CLK_OUT_STRENGTH_HALF		1
 #define AT803X_CLK_OUT_STRENGTH_QUARTER		2
 
-#define AT803X_DEFAULT_DOWNSHIFT 5
-#define AT803X_MIN_DOWNSHIFT 2
-#define AT803X_MAX_DOWNSHIFT 9
+#define AT803X_DEFAULT_DOWNSHIFT		5
+#define AT803X_MIN_DOWNSHIFT			2
+#define AT803X_MAX_DOWNSHIFT			9
 
 #define AT803X_MMD3_SMARTEEE_CTL1		0x805b
 #define AT803X_MMD3_SMARTEEE_CTL2		0x805c
 #define AT803X_MMD3_SMARTEEE_CTL3		0x805d
 #define AT803X_MMD3_SMARTEEE_CTL3_LPI_EN	BIT(8)
 
-#define ATH9331_PHY_ID 0x004dd041
-#define ATH8030_PHY_ID 0x004dd076
-#define ATH8031_PHY_ID 0x004dd074
-#define ATH8032_PHY_ID 0x004dd023
-#define ATH8035_PHY_ID 0x004dd072
+#define ATH9331_PHY_ID				0x004dd041
+#define ATH8030_PHY_ID				0x004dd076
+#define ATH8031_PHY_ID				0x004dd074
+#define ATH8032_PHY_ID				0x004dd023
+#define ATH8035_PHY_ID				0x004dd072
 #define AT8030_PHY_ID_MASK			0xffffffef
 
-#define AT803X_PAGE_FIBER		0
-#define AT803X_PAGE_COPPER		1
+#define AT803X_PAGE_FIBER			0
+#define AT803X_PAGE_COPPER			1
+
+/* don't turn off internal PLL */
+#define AT803X_KEEP_PLL_ENABLED			BIT(0)
+#define AT803X_DISABLE_SMARTEEE			BIT(1)
 
 MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
 MODULE_AUTHOR("Matus Ujhelyi");
@@ -153,8 +157,6 @@ MODULE_LICENSE("GPL");
 
 struct at803x_priv {
 	int flags;
-#define AT803X_KEEP_PLL_ENABLED	BIT(0)	/* don't turn off internal PLL */
-#define AT803X_DISABLE_SMARTEEE	BIT(1)
 	u16 clk_25m_reg;
 	u16 clk_25m_mask;
 	u8 smarteee_lpi_tw_1g;

From 272833b9b3b3969be7a91839121d86662c8c4253 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:00:15 +0200
Subject: [PATCH 0178/3292] net: phy: add support for qca8k switch internal PHY
 in at803x

Since the at803x share the same regs, it's assumed they are based on the
same implementation. Make it part of the at803x PHY driver to skip
having redudant code.
Add initial support for qca8k internal PHYs. The internal PHYs requires
special mmd and debug values to be set based on the switch revision
passwd using the dev_flags. Supports output of idle, receive and eee_wake
errors stats.
Some debug values sets can't be translated as the documentation lacks any
reference about them.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig  |   5 +-
 drivers/net/phy/at803x.c | 132 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 288bf405ebdb..25511f39b01f 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -247,10 +247,11 @@ config NXP_TJA11XX_PHY
 	  Currently supports the NXP TJA1100 and TJA1101 PHY.
 
 config AT803X_PHY
-	tristate "Qualcomm Atheros AR803X PHYs"
+	tristate "Qualcomm Atheros AR803X PHYs and QCA833x PHYs"
 	depends on REGULATOR
 	help
-	  Currently supports the AR8030, AR8031, AR8033 and AR8035 model
+	  Currently supports the AR8030, AR8031, AR8033, AR8035 and internal
+	  QCA8337(Internal qca8k PHY) model
 
 config QSEMI_PHY
 	tristate "Quality Semiconductor PHYs"
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index d2378a73de6f..6697c9368b40 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -92,10 +92,16 @@
 #define AT803X_DEBUG_REG_5			0x05
 #define AT803X_DEBUG_TX_CLK_DLY_EN		BIT(8)
 
+#define AT803X_DEBUG_REG_3C			0x3C
+
+#define AT803X_DEBUG_REG_3D			0x3D
+
 #define AT803X_DEBUG_REG_1F			0x1F
 #define AT803X_DEBUG_PLL_ON			BIT(2)
 #define AT803X_DEBUG_RGMII_1V8			BIT(3)
 
+#define MDIO_AZ_DEBUG				0x800D
+
 /* AT803x supports either the XTAL input pad, an internal PLL or the
  * DSP as clock reference for the clock output pad. The XTAL reference
  * is only used for 25 MHz output, all other frequencies need the PLL.
@@ -144,6 +150,12 @@
 #define ATH8035_PHY_ID				0x004dd072
 #define AT8030_PHY_ID_MASK			0xffffffef
 
+#define QCA8327_PHY_ID				0x004dd034
+#define QCA8337_PHY_ID				0x004dd036
+#define QCA8K_PHY_ID_MASK			0xffffffff
+
+#define QCA8K_DEVFLAGS_REVISION_MASK		GENMASK(2, 0)
+
 #define AT803X_PAGE_FIBER			0
 #define AT803X_PAGE_COPPER			1
 
@@ -155,6 +167,24 @@ MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
 MODULE_AUTHOR("Matus Ujhelyi");
 MODULE_LICENSE("GPL");
 
+enum stat_access_type {
+	PHY,
+	MMD
+};
+
+struct at803x_hw_stat {
+	const char *string;
+	u8 reg;
+	u32 mask;
+	enum stat_access_type access_type;
+};
+
+static struct at803x_hw_stat at803x_hw_stats[] = {
+	{ "phy_idle_errors", 0xa, GENMASK(7, 0), PHY},
+	{ "phy_receive_errors", 0x15, GENMASK(15, 0), PHY},
+	{ "eee_wake_errors", 0x16, GENMASK(15, 0), MMD},
+};
+
 struct at803x_priv {
 	int flags;
 	u16 clk_25m_reg;
@@ -164,6 +194,7 @@ struct at803x_priv {
 	struct regulator_dev *vddio_rdev;
 	struct regulator_dev *vddh_rdev;
 	struct regulator *vddio;
+	u64 stats[ARRAY_SIZE(at803x_hw_stats)];
 };
 
 struct at803x_context {
@@ -175,6 +206,17 @@ struct at803x_context {
 	u16 led_control;
 };
 
+static int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data)
+{
+	int ret;
+
+	ret = phy_write(phydev, AT803X_DEBUG_ADDR, reg);
+	if (ret < 0)
+		return ret;
+
+	return phy_write(phydev, AT803X_DEBUG_DATA, data);
+}
+
 static int at803x_debug_reg_read(struct phy_device *phydev, u16 reg)
 {
 	int ret;
@@ -337,6 +379,53 @@ static void at803x_get_wol(struct phy_device *phydev,
 		wol->wolopts |= WAKE_MAGIC;
 }
 
+static int at803x_get_sset_count(struct phy_device *phydev)
+{
+	return ARRAY_SIZE(at803x_hw_stats);
+}
+
+static void at803x_get_strings(struct phy_device *phydev, u8 *data)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(at803x_hw_stats); i++) {
+		strscpy(data + i * ETH_GSTRING_LEN,
+			at803x_hw_stats[i].string, ETH_GSTRING_LEN);
+	}
+}
+
+static u64 at803x_get_stat(struct phy_device *phydev, int i)
+{
+	struct at803x_hw_stat stat = at803x_hw_stats[i];
+	struct at803x_priv *priv = phydev->priv;
+	int val;
+	u64 ret;
+
+	if (stat.access_type == MMD)
+		val = phy_read_mmd(phydev, MDIO_MMD_PCS, stat.reg);
+	else
+		val = phy_read(phydev, stat.reg);
+
+	if (val < 0) {
+		ret = U64_MAX;
+	} else {
+		val = val & stat.mask;
+		priv->stats[i] += val;
+		ret = priv->stats[i];
+	}
+
+	return ret;
+}
+
+static void at803x_get_stats(struct phy_device *phydev,
+			     struct ethtool_stats *stats, u64 *data)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(at803x_hw_stats); i++)
+		data[i] = at803x_get_stat(phydev, i);
+}
+
 static int at803x_suspend(struct phy_device *phydev)
 {
 	int value;
@@ -1172,6 +1261,34 @@ static int at803x_cable_test_start(struct phy_device *phydev)
 	return 0;
 }
 
+static int qca83xx_config_init(struct phy_device *phydev)
+{
+	u8 switch_revision;
+
+	switch_revision = phydev->dev_flags & QCA8K_DEVFLAGS_REVISION_MASK;
+
+	switch (switch_revision) {
+	case 1:
+		/* For 100M waveform */
+		at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_0, 0x02ea);
+		/* Turn on Gigabit clock */
+		at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3D, 0x68a0);
+		break;
+
+	case 2:
+		phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0);
+		fallthrough;
+	case 4:
+		phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_AZ_DEBUG, 0x803f);
+		at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3D, 0x6860);
+		at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_5, 0x2c46);
+		at803x_debug_reg_write(phydev, AT803X_DEBUG_REG_3C, 0x6000);
+		break;
+	}
+
+	return 0;
+}
+
 static struct phy_driver at803x_driver[] = {
 {
 	/* Qualcomm Atheros AR8035 */
@@ -1268,7 +1385,20 @@ static struct phy_driver at803x_driver[] = {
 	.read_status		= at803x_read_status,
 	.soft_reset		= genphy_soft_reset,
 	.config_aneg		= at803x_config_aneg,
-} };
+}, {
+	/* QCA8337 */
+	.phy_id = QCA8337_PHY_ID,
+	.phy_id_mask = QCA8K_PHY_ID_MASK,
+	.name = "QCA PHY 8337",
+	/* PHY_GBIT_FEATURES */
+	.probe = at803x_probe,
+	.flags = PHY_IS_INTERNAL,
+	.config_init = qca83xx_config_init,
+	.soft_reset = genphy_soft_reset,
+	.get_sset_count = at803x_get_sset_count,
+	.get_strings = at803x_get_strings,
+	.get_stats = at803x_get_stats,
+}, };
 
 module_phy_driver(at803x_driver);
 

From 948640698199c06c8bfbdcc4d602922ad7a6f77d Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:03:49 +0200
Subject: [PATCH 0179/3292] net: mdio: ipq8064: clean whitespaces in define

Fix mixed whitespace and tab for define spacing.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-ipq8064.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index 8fe8f0119fc1..f776a843a63b 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -15,25 +15,26 @@
 #include <linux/regmap.h>
 
 /* MII address register definitions */
-#define MII_ADDR_REG_ADDR                       0x10
-#define MII_BUSY                                BIT(0)
-#define MII_WRITE                               BIT(1)
-#define MII_CLKRANGE_60_100M                    (0 << 2)
-#define MII_CLKRANGE_100_150M                   (1 << 2)
-#define MII_CLKRANGE_20_35M                     (2 << 2)
-#define MII_CLKRANGE_35_60M                     (3 << 2)
-#define MII_CLKRANGE_150_250M                   (4 << 2)
-#define MII_CLKRANGE_250_300M                   (5 << 2)
+#define MII_ADDR_REG_ADDR			0x10
+#define MII_BUSY				BIT(0)
+#define MII_WRITE				BIT(1)
+#define MII_CLKRANGE(x)				((x) << 2)
+#define MII_CLKRANGE_60_100M			MII_CLKRANGE(0)
+#define MII_CLKRANGE_100_150M			MII_CLKRANGE(1)
+#define MII_CLKRANGE_20_35M			MII_CLKRANGE(2)
+#define MII_CLKRANGE_35_60M			MII_CLKRANGE(3)
+#define MII_CLKRANGE_150_250M			MII_CLKRANGE(4)
+#define MII_CLKRANGE_250_300M			MII_CLKRANGE(5)
 #define MII_CLKRANGE_MASK			GENMASK(4, 2)
 #define MII_REG_SHIFT				6
 #define MII_REG_MASK				GENMASK(10, 6)
 #define MII_ADDR_SHIFT				11
 #define MII_ADDR_MASK				GENMASK(15, 11)
 
-#define MII_DATA_REG_ADDR                       0x14
+#define MII_DATA_REG_ADDR			0x14
 
-#define MII_MDIO_DELAY_USEC                     (1000)
-#define MII_MDIO_RETRY_MSEC                     (10)
+#define MII_MDIO_DELAY_USEC			(1000)
+#define MII_MDIO_RETRY_MSEC			(10)
 
 struct ipq8064_mdio {
 	struct regmap *base; /* NSS_GMAC0_BASE */

From b097bea10215315e8ee17f88b4c1bbb521b1878c Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:03:50 +0200
Subject: [PATCH 0180/3292] net: mdio: ipq8064: add regmap config to disable
 REGCACHE

mdio drivers should not use REGCHACHE. Also disable locking since it's
handled by the mdio users and regmap is always accessed atomically.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-ipq8064.c | 34 +++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index f776a843a63b..14b3c310af73 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -7,10 +7,9 @@
 
 #include <linux/delay.h>
 #include <linux/kernel.h>
-#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of_mdio.h>
-#include <linux/phy.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
@@ -97,14 +96,34 @@ ipq8064_mdio_write(struct mii_bus *bus, int phy_addr, int reg_offset, u16 data)
 	return ipq8064_mdio_wait_busy(priv);
 }
 
+static const struct regmap_config ipq8064_mdio_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.can_multi_write = false,
+	/* the mdio lock is used by any user of this mdio driver */
+	.disable_locking = true,
+
+	.cache_type = REGCACHE_NONE,
+};
+
 static int
 ipq8064_mdio_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct ipq8064_mdio *priv;
+	struct resource res;
 	struct mii_bus *bus;
+	void __iomem *base;
 	int ret;
 
+	if (of_address_to_resource(np, 0, &res))
+		return -ENOMEM;
+
+	base = ioremap(res.start, resource_size(&res));
+	if (!base)
+		return -ENOMEM;
+
 	bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*priv));
 	if (!bus)
 		return -ENOMEM;
@@ -116,15 +135,10 @@ ipq8064_mdio_probe(struct platform_device *pdev)
 	bus->parent = &pdev->dev;
 
 	priv = bus->priv;
-	priv->base = device_node_to_regmap(np);
-	if (IS_ERR(priv->base)) {
-		if (priv->base == ERR_PTR(-EPROBE_DEFER))
-			return -EPROBE_DEFER;
-
-		dev_err(&pdev->dev, "error getting device regmap, error=%pe\n",
-			priv->base);
+	priv->base = devm_regmap_init_mmio(&pdev->dev, base,
+					   &ipq8064_mdio_regmap_config);
+	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
-	}
 
 	ret = of_mdiobus_register(bus, np);
 	if (ret)

From 77091933e453a258bbe9ff2aeb1c8d6fc1db7ef9 Mon Sep 17 00:00:00 2001
From: Ansuel Smith <ansuelsmth@gmail.com>
Date: Fri, 14 May 2021 23:03:51 +0200
Subject: [PATCH 0181/3292] net: mdio: ipq8064: enlarge sleep after read/write
 operation

With the use of the qca8k dsa driver, some problem arised related to
port status detection. With a load on a specific port (for example a
simple speed test), the driver starts to behave in a strange way and
garbage data is produced. To address this, enlarge the sleep delay and
address a bug for the reg offset 31 that require additional delay for
this specific reg.

Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-ipq8064.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index 14b3c310af73..bd1aea2d5a26 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -65,7 +65,7 @@ ipq8064_mdio_read(struct mii_bus *bus, int phy_addr, int reg_offset)
 		   ((reg_offset << MII_REG_SHIFT) & MII_REG_MASK);
 
 	regmap_write(priv->base, MII_ADDR_REG_ADDR, miiaddr);
-	usleep_range(8, 10);
+	usleep_range(10, 13);
 
 	err = ipq8064_mdio_wait_busy(priv);
 	if (err)
@@ -91,7 +91,14 @@ ipq8064_mdio_write(struct mii_bus *bus, int phy_addr, int reg_offset, u16 data)
 		   ((reg_offset << MII_REG_SHIFT) & MII_REG_MASK);
 
 	regmap_write(priv->base, MII_ADDR_REG_ADDR, miiaddr);
-	usleep_range(8, 10);
+
+	/* For the specific reg 31 extra time is needed or the next
+	 * read will produce garbage data.
+	 */
+	if (reg_offset == 31)
+		usleep_range(30, 43);
+	else
+		usleep_range(10, 13);
 
 	return ipq8064_mdio_wait_busy(priv);
 }

From 513f485ca5163c6cba869602d076a8e2f04d1ca1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 14 May 2021 12:55:34 -0700
Subject: [PATCH 0182/3292] libbpf: Reject static entry-point BPF programs

Detect use of static entry-point BPF programs (those with SEC() markings) and
emit error message. This is similar to
c1cccec9c636 ("libbpf: Reject static maps") but for BPF programs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210514195534.1440970-1-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 182bd3d3f728..e58f51b24574 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -677,6 +677,11 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
+		if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+			pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
+			return -ENOTSUP;
+		}
+
 		pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
 			 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
 

From 5b2d3326cdf83fde3590f45c93507c1c852327e8 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Fri, 14 May 2021 12:32:26 +0300
Subject: [PATCH 0183/3292] gpio: crystalcove: remove platform_set_drvdata() +
 cleanup probe

The platform_set_drvdata() call is only useful if we need to retrieve back
the private information.
Since the driver doesn't do that, it's not useful to have it.

If this is removed, we can also just do a direct return on
devm_gpiochip_add_data(). We don't need to print that this call failed as
there are other ways to log/see this during probe.

Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-crystalcove.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index 2ba225720086..5a909f3c79e8 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c
@@ -339,8 +339,6 @@ static int crystalcove_gpio_probe(struct platform_device *pdev)
 	if (!cg)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, cg);
-
 	mutex_init(&cg->buslock);
 	cg->chip.label = KBUILD_MODNAME;
 	cg->chip.direction_input = crystalcove_gpio_dir_in;
@@ -372,13 +370,7 @@ static int crystalcove_gpio_probe(struct platform_device *pdev)
 		return retval;
 	}
 
-	retval = devm_gpiochip_add_data(&pdev->dev, &cg->chip, cg);
-	if (retval) {
-		dev_warn(&pdev->dev, "add gpio chip error: %d\n", retval);
-		return retval;
-	}
-
-	return 0;
+	return devm_gpiochip_add_data(&pdev->dev, &cg->chip, cg);
 }
 
 static struct platform_driver crystalcove_gpio_driver = {

From 9a959cab22194d633b3a1d9d1943b0df3475122c Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Mon, 17 May 2021 09:11:48 +0200
Subject: [PATCH 0184/3292] batman-adv: Start new development cycle

This version will contain all the (major or even only minor) changes for
Linux 5.14.

The version number isn't a semantic version number with major and minor
information. It is just encoding the year of the expected publishing as
Linux -rc1 and the number of published versions this year (starting at 0).

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 8f0102b71656..b8f819a53a86 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2021.1"
+#define BATADV_SOURCE_VERSION "2021.2"
 #endif
 
 /* B.A.T.M.A.N. parameters */

From d295345abb3e91e5a16f3293eb12b111e352bd2b Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 10 May 2021 15:05:42 +0200
Subject: [PATCH 0185/3292] batman-adv: Always send iface index+name in genlmsg

The batman-adv netlink messages often contain the interface index and
interface name in the same message. This makes it easy for the receiver to
operate on the incoming data when it either needs to print something or
needs to operate on the interface index.

But one of the attributes was missing for:

* neighbor table dumps
* originator table dumps
* gateway list dumps
* query of hardif information
* query of vid information

The userspace therefore had to implement special workarounds using
SIOCGIFNAME or SIOCGIFINDEX depending on what was actually provided.
Providing both information simplifies the userspace code massively without
adding a lot of extra overhead in the kernel portion.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_iv_ogm.c |  6 ++++++
 net/batman-adv/bat_v.c      | 10 ++++++++++
 net/batman-adv/netlink.c    |  8 ++++++++
 3 files changed, 24 insertions(+)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 789f257be24f..680def809838 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1849,6 +1849,8 @@ batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
 		    orig_node->orig) ||
 	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
 		    neigh_node->addr) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   neigh_node->if_incoming->net_dev->name) ||
 	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 			neigh_node->if_incoming->net_dev->ifindex) ||
 	    nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
@@ -2078,6 +2080,8 @@ batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
 
 	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
 		    hardif_neigh->addr) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   hardif_neigh->if_incoming->net_dev->name) ||
 	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 			hardif_neigh->if_incoming->net_dev->ifindex) ||
 	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
@@ -2459,6 +2463,8 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
 		    router->addr) ||
 	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
 			   router->if_incoming->net_dev->name) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			router->if_incoming->net_dev->ifindex) ||
 	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
 			gw_node->bandwidth_down) ||
 	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index e1ca2b8c3152..b98aea958e3d 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -146,6 +146,8 @@ batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
 
 	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
 		    hardif_neigh->addr) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   hardif_neigh->if_incoming->net_dev->name) ||
 	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 			hardif_neigh->if_incoming->net_dev->ifindex) ||
 	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
@@ -298,6 +300,8 @@ batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
 	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
 	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
 		    neigh_node->addr) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   neigh_node->if_incoming->net_dev->name) ||
 	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 			neigh_node->if_incoming->net_dev->ifindex) ||
 	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
@@ -739,6 +743,12 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
 		goto out;
 	}
 
+	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			router->if_incoming->net_dev->ifindex)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
 	if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
 			gw_node->bandwidth_down)) {
 		genlmsg_cancel(msg, hdr);
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index f317d206b411..b6cc746e01a6 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -814,6 +814,10 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg,
 			bat_priv->soft_iface->ifindex))
 		goto nla_put_failure;
 
+	if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME,
+			   bat_priv->soft_iface->name))
+		goto nla_put_failure;
+
 	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 			net_dev->ifindex) ||
 	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
@@ -1045,6 +1049,10 @@ static int batadv_netlink_vlan_fill(struct sk_buff *msg,
 			bat_priv->soft_iface->ifindex))
 		goto nla_put_failure;
 
+	if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME,
+			   bat_priv->soft_iface->name))
+		goto nla_put_failure;
+
 	if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK))
 		goto nla_put_failure;
 

From 3f69339068f93e206e581e6ab9927502f8722ac7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Mon, 17 May 2021 00:33:07 +0200
Subject: [PATCH 0186/3292] batman-adv: bcast: queue per interface, if needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we schedule a broadcast packet like:

3x: [ [(re-)queue] --> for(hard-if): maybe-transmit ]

The intention of queueing a broadcast packet multiple times is to
increase robustness for wireless interfaces. However on interfaces
which we only broadcast on once the queueing induces an unnecessary
penalty. This patch restructures the queueing to be performed on a per
interface basis:

for(hard-if):
- transmit
- if wireless: [queue] --> transmit --> [requeue] --> transmit

Next to the performance benefits on non-wireless interfaces this
should also make it easier to apply alternative strategies for
transmissions on wireless interfaces in the future (for instance sending
via unicast transmissions on wireless interfaces, without queueing in
batman-adv, if appropriate).

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/main.h           |   1 -
 net/batman-adv/routing.c        |   9 +-
 net/batman-adv/send.c           | 376 +++++++++++++++++++++-----------
 net/batman-adv/send.h           |  12 +-
 net/batman-adv/soft-interface.c |  12 +-
 5 files changed, 271 insertions(+), 139 deletions(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index b8f819a53a86..014235fd4681 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -88,7 +88,6 @@
 /* number of packets to send for broadcasts on different interface types */
 #define BATADV_NUM_BCASTS_DEFAULT 1
 #define BATADV_NUM_BCASTS_WIRELESS 3
-#define BATADV_NUM_BCASTS_MAX 3
 
 /* length of the single packet used by the TP meter */
 #define BATADV_TP_PACKET_LEN ETH_DATA_LEN
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 40f5cffde6a3..bb9e93e3d98c 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1182,9 +1182,9 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
 	struct batadv_bcast_packet *bcast_packet;
 	struct ethhdr *ethhdr;
 	int hdr_size = sizeof(*bcast_packet);
-	int ret = NET_RX_DROP;
 	s32 seq_diff;
 	u32 seqno;
+	int ret;
 
 	/* drop packet if it has not necessary minimum size */
 	if (unlikely(!pskb_may_pull(skb, hdr_size)))
@@ -1210,7 +1210,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
 	if (batadv_is_my_mac(bat_priv, bcast_packet->orig))
 		goto free_skb;
 
-	if (bcast_packet->ttl < 2)
+	if (bcast_packet->ttl-- < 2)
 		goto free_skb;
 
 	orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig);
@@ -1249,7 +1249,9 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
 	batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
 
 	/* rebroadcast packet */
-	batadv_add_bcast_packet_to_list(bat_priv, skb, 1, false);
+	ret = batadv_forw_bcast_packet(bat_priv, skb, 0, false);
+	if (ret == NETDEV_TX_BUSY)
+		goto free_skb;
 
 	/* don't hand the broadcast up if it is from an originator
 	 * from the same backbone.
@@ -1275,6 +1277,7 @@ spin_unlock:
 	spin_unlock_bh(&orig_node->bcast_seqno_lock);
 free_skb:
 	kfree_skb(skb);
+	ret = NET_RX_DROP;
 out:
 	if (orig_node)
 		batadv_orig_node_put(orig_node);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 157abe92d827..07b0ba265472 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -737,57 +737,48 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
 }
 
 /**
- * batadv_add_bcast_packet_to_list() - queue broadcast packet for multiple sends
+ * batadv_forw_bcast_packet_to_list() - queue broadcast packet for transmissions
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: broadcast packet to add
  * @delay: number of jiffies to wait before sending
  * @own_packet: true if it is a self-generated broadcast packet
+ * @if_in: the interface where the packet was received on
+ * @if_out: the outgoing interface to queue on
  *
- * add a broadcast packet to the queue and setup timers. broadcast packets
+ * Adds a broadcast packet to the queue and sets up timers. Broadcast packets
  * are sent multiple times to increase probability for being received.
  *
- * The skb is not consumed, so the caller should make sure that the
- * skb is freed.
- *
  * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
  */
-int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
-				    const struct sk_buff *skb,
-				    unsigned long delay,
-				    bool own_packet)
+static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv,
+					    struct sk_buff *skb,
+					    unsigned long delay,
+					    bool own_packet,
+					    struct batadv_hard_iface *if_in,
+					    struct batadv_hard_iface *if_out)
 {
-	struct batadv_hard_iface *primary_if;
 	struct batadv_forw_packet *forw_packet;
-	struct batadv_bcast_packet *bcast_packet;
+	unsigned long send_time = jiffies;
 	struct sk_buff *newskb;
 
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto err;
-
 	newskb = skb_copy(skb, GFP_ATOMIC);
-	if (!newskb) {
-		batadv_hardif_put(primary_if);
+	if (!newskb)
 		goto err;
-	}
 
-	forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
+	forw_packet = batadv_forw_packet_alloc(if_in, if_out,
 					       &bat_priv->bcast_queue_left,
 					       bat_priv, newskb);
-	batadv_hardif_put(primary_if);
 	if (!forw_packet)
 		goto err_packet_free;
 
-	/* as we have a copy now, it is safe to decrease the TTL */
-	bcast_packet = (struct batadv_bcast_packet *)newskb->data;
-	bcast_packet->ttl--;
-
 	forw_packet->own = own_packet;
 
 	INIT_DELAYED_WORK(&forw_packet->delayed_work,
 			  batadv_send_outstanding_bcast_packet);
 
-	batadv_forw_packet_bcast_queue(bat_priv, forw_packet, jiffies + delay);
+	send_time += delay ? delay : msecs_to_jiffies(5);
+
+	batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time);
 	return NETDEV_TX_OK;
 
 err_packet_free:
@@ -796,10 +787,220 @@ err:
 	return NETDEV_TX_BUSY;
 }
 
+/**
+ * batadv_forw_bcast_packet_if() - forward and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ * @if_in: the interface where the packet was received on
+ * @if_out: the outgoing interface to forward to
+ *
+ * Transmits a broadcast packet on the specified interface either immediately
+ * or if a delay is given after that. Furthermore, queues additional
+ * retransmissions if this interface is a wireless one.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
+ */
+static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv,
+				       struct sk_buff *skb,
+				       unsigned long delay,
+				       bool own_packet,
+				       struct batadv_hard_iface *if_in,
+				       struct batadv_hard_iface *if_out)
+{
+	unsigned int num_bcasts = if_out->num_bcasts;
+	struct sk_buff *newskb;
+	int ret = NETDEV_TX_OK;
+
+	if (!delay) {
+		newskb = skb_copy(skb, GFP_ATOMIC);
+		if (!newskb)
+			return NETDEV_TX_BUSY;
+
+		batadv_send_broadcast_skb(newskb, if_out);
+		num_bcasts--;
+	}
+
+	/* delayed broadcast or rebroadcasts? */
+	if (num_bcasts >= 1) {
+		BATADV_SKB_CB(skb)->num_bcasts = num_bcasts;
+
+		ret = batadv_forw_bcast_packet_to_list(bat_priv, skb, delay,
+						       own_packet, if_in,
+						       if_out);
+	}
+
+	return ret;
+}
+
+/**
+ * batadv_send_no_broadcast() - check whether (re)broadcast is necessary
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to check
+ * @own_packet: true if it is a self-generated broadcast packet
+ * @if_out: the outgoing interface checked and considered for (re)broadcast
+ *
+ * Return: False if a packet needs to be (re)broadcasted on the given interface,
+ * true otherwise.
+ */
+static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv,
+				     struct sk_buff *skb, bool own_packet,
+				     struct batadv_hard_iface *if_out)
+{
+	struct batadv_hardif_neigh_node *neigh_node = NULL;
+	struct batadv_bcast_packet *bcast_packet;
+	u8 *orig_neigh;
+	u8 *neigh_addr;
+	char *type;
+	int ret;
+
+	if (!own_packet) {
+		neigh_addr = eth_hdr(skb)->h_source;
+		neigh_node = batadv_hardif_neigh_get(if_out,
+						     neigh_addr);
+	}
+
+	bcast_packet = (struct batadv_bcast_packet *)skb->data;
+	orig_neigh = neigh_node ? neigh_node->orig : NULL;
+
+	ret = batadv_hardif_no_broadcast(if_out, bcast_packet->orig,
+					 orig_neigh);
+
+	if (neigh_node)
+		batadv_hardif_neigh_put(neigh_node);
+
+	/* ok, may broadcast */
+	if (!ret)
+		return false;
+
+	/* no broadcast */
+	switch (ret) {
+	case BATADV_HARDIF_BCAST_NORECIPIENT:
+		type = "no neighbor";
+		break;
+	case BATADV_HARDIF_BCAST_DUPFWD:
+		type = "single neighbor is source";
+		break;
+	case BATADV_HARDIF_BCAST_DUPORIG:
+		type = "single neighbor is originator";
+		break;
+	default:
+		type = "unknown";
+	}
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "BCAST packet from orig %pM on %s suppressed: %s\n",
+		   bcast_packet->orig,
+		   if_out->net_dev->name, type);
+
+	return true;
+}
+
+/**
+ * __batadv_forw_bcast_packet() - forward and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ *
+ * Transmits a broadcast packet either immediately or if a delay is given
+ * after that. Furthermore, queues additional retransmissions on wireless
+ * interfaces.
+ *
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the given skb might not be
+ * modifiable anymore.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
+ */
+static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
+				      struct sk_buff *skb,
+				      unsigned long delay,
+				      bool own_packet)
+{
+	struct batadv_hard_iface *hard_iface;
+	struct batadv_hard_iface *primary_if;
+	int ret = NETDEV_TX_OK;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if)
+		return NETDEV_TX_BUSY;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->soft_iface != bat_priv->soft_iface)
+			continue;
+
+		if (!kref_get_unless_zero(&hard_iface->refcount))
+			continue;
+
+		if (batadv_send_no_broadcast(bat_priv, skb, own_packet,
+					     hard_iface)) {
+			batadv_hardif_put(hard_iface);
+			continue;
+		}
+
+		ret = batadv_forw_bcast_packet_if(bat_priv, skb, delay,
+						  own_packet, primary_if,
+						  hard_iface);
+		batadv_hardif_put(hard_iface);
+
+		if (ret == NETDEV_TX_BUSY)
+			break;
+	}
+	rcu_read_unlock();
+
+	batadv_hardif_put(primary_if);
+	return ret;
+}
+
+/**
+ * batadv_forw_bcast_packet() - forward and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ *
+ * Transmits a broadcast packet either immediately or if a delay is given
+ * after that. Furthermore, queues additional retransmissions on wireless
+ * interfaces.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
+ */
+int batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
+			     struct sk_buff *skb,
+			     unsigned long delay,
+			     bool own_packet)
+{
+	return __batadv_forw_bcast_packet(bat_priv, skb, delay, own_packet);
+}
+
+/**
+ * batadv_send_bcast_packet() - send and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ *
+ * Transmits a broadcast packet either immediately or if a delay is given
+ * after that. Furthermore, queues additional retransmissions on wireless
+ * interfaces.
+ *
+ * Consumes the provided skb.
+ */
+void batadv_send_bcast_packet(struct batadv_priv *bat_priv,
+			      struct sk_buff *skb,
+			      unsigned long delay,
+			      bool own_packet)
+{
+	__batadv_forw_bcast_packet(bat_priv, skb, delay, own_packet);
+	consume_skb(skb);
+}
+
 /**
  * batadv_forw_packet_bcasts_left() - check if a retransmission is necessary
  * @forw_packet: the forwarding packet to check
- * @hard_iface: the interface to check on
  *
  * Checks whether a given packet has any (re)transmissions left on the provided
  * interface.
@@ -811,28 +1012,20 @@ err:
  * Return: True if (re)transmissions are left, false otherwise.
  */
 static bool
-batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet,
-			       struct batadv_hard_iface *hard_iface)
+batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet)
 {
-	unsigned int max;
-
-	if (hard_iface)
-		max = hard_iface->num_bcasts;
-	else
-		max = BATADV_NUM_BCASTS_MAX;
-
-	return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max;
+	return BATADV_SKB_CB(forw_packet->skb)->num_bcasts;
 }
 
 /**
- * batadv_forw_packet_bcasts_inc() - increment retransmission counter of a
+ * batadv_forw_packet_bcasts_dec() - decrement retransmission counter of a
  *  packet
- * @forw_packet: the packet to increase the counter for
+ * @forw_packet: the packet to decrease the counter for
  */
 static void
-batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
+batadv_forw_packet_bcasts_dec(struct batadv_forw_packet *forw_packet)
 {
-	BATADV_SKB_CB(forw_packet->skb)->num_bcasts++;
+	BATADV_SKB_CB(forw_packet->skb)->num_bcasts--;
 }
 
 /**
@@ -843,30 +1036,30 @@ batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
  */
 bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet)
 {
-	return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0;
+	unsigned char num_bcasts = BATADV_SKB_CB(forw_packet->skb)->num_bcasts;
+
+	return num_bcasts != forw_packet->if_outgoing->num_bcasts;
 }
 
+/**
+ * batadv_send_outstanding_bcast_packet() - transmit a queued broadcast packet
+ * @work: work queue item
+ *
+ * Transmits a queued broadcast packet and if necessary reschedules it.
+ */
 static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 {
-	struct batadv_hard_iface *hard_iface;
-	struct batadv_hardif_neigh_node *neigh_node;
-	struct delayed_work *delayed_work;
-	struct batadv_forw_packet *forw_packet;
-	struct batadv_bcast_packet *bcast_packet;
-	struct sk_buff *skb1;
-	struct net_device *soft_iface;
-	struct batadv_priv *bat_priv;
 	unsigned long send_time = jiffies + msecs_to_jiffies(5);
+	struct batadv_forw_packet *forw_packet;
+	struct delayed_work *delayed_work;
+	struct batadv_priv *bat_priv;
+	struct sk_buff *skb1;
 	bool dropped = false;
-	u8 *neigh_addr;
-	u8 *orig_neigh;
-	int ret = 0;
 
 	delayed_work = to_delayed_work(work);
 	forw_packet = container_of(delayed_work, struct batadv_forw_packet,
 				   delayed_work);
-	soft_iface = forw_packet->if_incoming->soft_iface;
-	bat_priv = netdev_priv(soft_iface);
+	bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
 
 	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
 		dropped = true;
@@ -878,76 +1071,15 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 		goto out;
 	}
 
-	bcast_packet = (struct batadv_bcast_packet *)forw_packet->skb->data;
+	/* send a copy of the saved skb */
+	skb1 = skb_copy(forw_packet->skb, GFP_ATOMIC);
+	if (!skb1)
+		goto out;
 
-	/* rebroadcast packet */
-	rcu_read_lock();
-	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
-		if (hard_iface->soft_iface != soft_iface)
-			continue;
+	batadv_send_broadcast_skb(skb1, forw_packet->if_outgoing);
+	batadv_forw_packet_bcasts_dec(forw_packet);
 
-		if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface))
-			continue;
-
-		if (forw_packet->own) {
-			neigh_node = NULL;
-		} else {
-			neigh_addr = eth_hdr(forw_packet->skb)->h_source;
-			neigh_node = batadv_hardif_neigh_get(hard_iface,
-							     neigh_addr);
-		}
-
-		orig_neigh = neigh_node ? neigh_node->orig : NULL;
-
-		ret = batadv_hardif_no_broadcast(hard_iface, bcast_packet->orig,
-						 orig_neigh);
-
-		if (ret) {
-			char *type;
-
-			switch (ret) {
-			case BATADV_HARDIF_BCAST_NORECIPIENT:
-				type = "no neighbor";
-				break;
-			case BATADV_HARDIF_BCAST_DUPFWD:
-				type = "single neighbor is source";
-				break;
-			case BATADV_HARDIF_BCAST_DUPORIG:
-				type = "single neighbor is originator";
-				break;
-			default:
-				type = "unknown";
-			}
-
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n",
-				   bcast_packet->orig,
-				   hard_iface->net_dev->name, type);
-
-			if (neigh_node)
-				batadv_hardif_neigh_put(neigh_node);
-
-			continue;
-		}
-
-		if (neigh_node)
-			batadv_hardif_neigh_put(neigh_node);
-
-		if (!kref_get_unless_zero(&hard_iface->refcount))
-			continue;
-
-		/* send a copy of the saved skb */
-		skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
-		if (skb1)
-			batadv_send_broadcast_skb(skb1, hard_iface);
-
-		batadv_hardif_put(hard_iface);
-	}
-	rcu_read_unlock();
-
-	batadv_forw_packet_bcasts_inc(forw_packet);
-
-	/* if we still have some more bcasts to send */
-	if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) {
+	if (batadv_forw_packet_bcasts_left(forw_packet)) {
 		batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
 					       send_time);
 		return;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 2b0daf8b2bc4..08af251b765c 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -39,10 +39,14 @@ int batadv_send_broadcast_skb(struct sk_buff *skb,
 			      struct batadv_hard_iface *hard_iface);
 int batadv_send_unicast_skb(struct sk_buff *skb,
 			    struct batadv_neigh_node *neigh_node);
-int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
-				    const struct sk_buff *skb,
-				    unsigned long delay,
-				    bool own_packet);
+int batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
+			     struct sk_buff *skb,
+			     unsigned long delay,
+			     bool own_packet);
+void batadv_send_bcast_packet(struct batadv_priv *bat_priv,
+			      struct sk_buff *skb,
+			      unsigned long delay,
+			      bool own_packet);
 void
 batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 				 const struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 6b8181bc3122..a21884c0d47f 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -191,7 +191,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
 	struct vlan_ethhdr *vhdr;
 	unsigned int header_len = 0;
 	int data_len = skb->len, ret;
-	unsigned long brd_delay = 1;
+	unsigned long brd_delay = 0;
 	bool do_bcast = false, client_added;
 	unsigned short vid;
 	u32 seqno;
@@ -330,7 +330,7 @@ send:
 
 		bcast_packet = (struct batadv_bcast_packet *)skb->data;
 		bcast_packet->version = BATADV_COMPAT_VERSION;
-		bcast_packet->ttl = BATADV_TTL;
+		bcast_packet->ttl = BATADV_TTL - 1;
 
 		/* batman packet type: broadcast */
 		bcast_packet->packet_type = BATADV_BCAST;
@@ -346,13 +346,7 @@ send:
 		seqno = atomic_inc_return(&bat_priv->bcast_seqno);
 		bcast_packet->seqno = htonl(seqno);
 
-		batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay, true);
-
-		/* a copy is stored in the bcast list, therefore removing
-		 * the original skb.
-		 */
-		consume_skb(skb);
-
+		batadv_send_bcast_packet(bat_priv, skb, brd_delay, true);
 	/* unicast packet */
 	} else {
 		/* DHCP packets going to a server will use the GW feature */

From 4cbf055002c53c364d1b3275792e4487af76dd2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Mon, 17 May 2021 00:33:08 +0200
Subject: [PATCH 0187/3292] batman-adv: bcast: avoid skb-copy for (re)queued
 broadcasts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broadcast packets send via batadv_send_outstanding_bcast_packet() were
originally copied in batadv_forw_bcast_packet_to_list() before being
queued. And after that only the ethernet header will be pushed through
batadv_send_broadcast_skb()->batadv_send_skb_packet() which works safely
on skb clones as it uses batadv_skb_head_push()->skb_cow_head().

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/send.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 07b0ba265472..0b9dd29d3b6a 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1072,7 +1072,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 	}
 
 	/* send a copy of the saved skb */
-	skb1 = skb_copy(forw_packet->skb, GFP_ATOMIC);
+	skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
 	if (!skb1)
 		goto out;
 

From 20bc8c1e972f29afcac85e524e430c11a6df5f58 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:55 +0300
Subject: [PATCH 0188/3292] lib/vsprintf: Allow to override ISO 8601 date and
 time separator

ISO 8601 defines 'T' as a separator between date and time. Though,
some ABIs use time and date with ' ' (space) separator instead.

Add a flavour to the %pt specifier to override default separator.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-1-andriy.shevchenko@linux.intel.com
---
 Documentation/core-api/printk-formats.rst |  7 ++++++-
 lib/test_printf.c                         |  5 +++++
 lib/vsprintf.c                            | 22 +++++++++++++++++-----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 9be6de402cb9..7b6bfd8a78d3 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -514,9 +514,10 @@ Time and date
 ::
 
 	%pt[RT]			YYYY-mm-ddTHH:MM:SS
+	%pt[RT]s		YYYY-mm-dd HH:MM:SS
 	%pt[RT]d		YYYY-mm-dd
 	%pt[RT]t		HH:MM:SS
-	%pt[RT][dt][r]
+	%pt[RT][dt][r][s]
 
 For printing date and time as represented by::
 
@@ -528,6 +529,10 @@ in human readable format.
 By default year will be incremented by 1900 and month by 1.
 Use %pt[RT]r (raw) to suppress this behaviour.
 
+The %pt[RT]s (space) will override ISO 8601 separator by using ' ' (space)
+instead of 'T' (Capital T) between date and time. It won't have any effect
+when date or time is omitted.
+
 Passed by reference.
 
 struct clk
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 27b964ec723d..69b04b531492 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -528,6 +528,11 @@ time_and_date(void)
 	test("0119-00-04T15:32:23", "%ptTr", &t);
 	test("15:32:23|2019-01-04", "%ptTt|%ptTd", &t, &t);
 	test("15:32:23|0119-00-04", "%ptTtr|%ptTdr", &t, &t);
+
+	test("2019-01-04 15:32:23", "%ptTs", &t);
+	test("0119-00-04 15:32:23", "%ptTsr", &t);
+	test("15:32:23|2019-01-04", "%ptTts|%ptTds", &t, &t);
+	test("15:32:23|0119-00-04", "%ptTtrs|%ptTdrs", &t, &t);
 }
 
 static void __init
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6c56c62fd9a5..8d5142ae742e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1798,7 +1798,8 @@ char *rtc_str(char *buf, char *end, const struct rtc_time *tm,
 	      struct printf_spec spec, const char *fmt)
 {
 	bool have_t = true, have_d = true;
-	bool raw = false;
+	bool raw = false, iso8601_separator = true;
+	bool found = true;
 	int count = 2;
 
 	if (check_pointer(&buf, end, tm, spec))
@@ -1815,14 +1816,25 @@ char *rtc_str(char *buf, char *end, const struct rtc_time *tm,
 		break;
 	}
 
-	raw = fmt[count] == 'r';
+	do {
+		switch (fmt[count++]) {
+		case 'r':
+			raw = true;
+			break;
+		case 's':
+			iso8601_separator = false;
+			break;
+		default:
+			found = false;
+			break;
+		}
+	} while (found);
 
 	if (have_d)
 		buf = date_str(buf, end, tm, raw);
 	if (have_d && have_t) {
-		/* Respect ISO 8601 */
 		if (buf < end)
-			*buf = 'T';
+			*buf = iso8601_separator ? 'T' : ' ';
 		buf++;
 	}
 	if (have_t)
@@ -2261,7 +2273,7 @@ early_param("no_hash_pointers", no_hash_pointers_enable);
  * - 'd[234]' For a dentry name (optionally 2-4 last components)
  * - 'D[234]' Same as 'd' but for a struct file
  * - 'g' For block_device name (gendisk + partition number)
- * - 't[RT][dt][r]' For time and date as represented by:
+ * - 't[RT][dt][r][s]' For time and date as represented by:
  *      R    struct rtc_time
  *      T    time64_t
  * - 'C' For a clock, it prints the name (Common Clock Framework) or address

From 126ac4d67d97fdeef52b6249702266eb94a05d9e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:56 +0300
Subject: [PATCH 0189/3292] kdb: Switch to use %ptTs

Use %ptTs instead of open-coded variant to print contents
of time64_t type in human readable form.

Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: kgdb-bugreport@lists.sourceforge.net
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-2-andriy.shevchenko@linux.intel.com
---
 kernel/debug/kdb/kdb_main.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1baa96a2ecb8..622410c45da1 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2488,7 +2488,6 @@ static void kdb_sysinfo(struct sysinfo *val)
 static int kdb_summary(int argc, const char **argv)
 {
 	time64_t now;
-	struct tm tm;
 	struct sysinfo val;
 
 	if (argc)
@@ -2502,13 +2501,7 @@ static int kdb_summary(int argc, const char **argv)
 	kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
 
 	now = __ktime_get_real_seconds();
-	time64_to_tm(now, 0, &tm);
-	kdb_printf("date       %04ld-%02d-%02d %02d:%02d:%02d "
-		   "tz_minuteswest %d\n",
-		1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
-		tm.tm_hour, tm.tm_min, tm.tm_sec,
-		sys_tz.tz_minuteswest);
-
+	kdb_printf("date       %ptTs tz_minuteswest %d\n", &now, sys_tz.tz_minuteswest);
 	kdb_sysinfo(&val);
 	kdb_printf("uptime     ");
 	if (val.uptime > (24*60*60)) {

From 776797f1bd1caef34c4ca6dd362fa6376b880e10 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:57 +0300
Subject: [PATCH 0190/3292] nilfs2: Switch to use %ptTs

Use %ptTs instead of open coded variant to print contents
of time64_t type in human readable form.

Use sysfs_emit() at the same time in the changed functions.

Cc: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: linux-nilfs@vger.kernel.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-3-andriy.shevchenko@linux.intel.com
---
 fs/nilfs2/sysfs.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 303d71430bdd..4e10423f0448 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -19,19 +19,6 @@
 /* /sys/fs/<nilfs>/ */
 static struct kset *nilfs_kset;
 
-#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
-		struct tm res; \
-		int count = 0; \
-		time64_to_tm(time_t_val, 0, &res); \
-		res.tm_year += 1900; \
-		res.tm_mon += 1; \
-		count = scnprintf(buf, PAGE_SIZE, \
-				    "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
-				    res.tm_year, res.tm_mon, res.tm_mday, \
-				    res.tm_hour, res.tm_min, res.tm_sec);\
-		count; \
-})
-
 #define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
 static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
 					struct attribute *attr, char *buf) \
@@ -576,7 +563,7 @@ nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
 	ctime = nilfs->ns_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return NILFS_SHOW_TIME(ctime, buf);
+	return sysfs_emit(buf, "%ptTs\n", &ctime);
 }
 
 static ssize_t
@@ -604,7 +591,7 @@ nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
 	nongc_ctime = nilfs->ns_nongc_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return NILFS_SHOW_TIME(nongc_ctime, buf);
+	return sysfs_emit(buf, "%ptTs\n", &nongc_ctime);
 }
 
 static ssize_t
@@ -724,7 +711,7 @@ nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
 	sbwtime = nilfs->ns_sbwtime;
 	up_read(&nilfs->ns_sem);
 
-	return NILFS_SHOW_TIME(sbwtime, buf);
+	return sysfs_emit(buf, "%ptTs\n", &sbwtime);
 }
 
 static ssize_t

From 2f9e0f8c7e173e312e1d98b50fd8dc890245831a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:58 +0300
Subject: [PATCH 0191/3292] usb: host: xhci-tegra: Switch to use %ptTs

Use %ptTs instead of open coded variant to print contents
of time64_t type in human readable form.

Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Jonathan Hunter <jonathanh@nvidia.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-4-andriy.shevchenko@linux.intel.com
---
 drivers/usb/host/xhci-tegra.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 50bb91b6a4b8..c7387677a26a 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -917,7 +917,6 @@ static int tegra_xusb_load_firmware(struct tegra_xusb *tegra)
 	struct xhci_op_regs __iomem *op;
 	unsigned long timeout;
 	time64_t timestamp;
-	struct tm time;
 	u64 address;
 	u32 value;
 	int err;
@@ -1014,11 +1013,8 @@ static int tegra_xusb_load_firmware(struct tegra_xusb *tegra)
 	}
 
 	timestamp = le32_to_cpu(header->fwimg_created_time);
-	time64_to_tm(timestamp, 0, &time);
 
-	dev_info(dev, "Firmware timestamp: %ld-%02d-%02d %02d:%02d:%02d UTC\n",
-		 time.tm_year + 1900, time.tm_mon + 1, time.tm_mday,
-		 time.tm_hour, time.tm_min, time.tm_sec);
+	dev_info(dev, "Firmware timestamp: %ptTs UTC\n", &timestamp);
 
 	return 0;
 }

From a149127be52fa7eaf5b3681a0317a2bbb772d5a9 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 17 May 2021 15:15:45 +0300
Subject: [PATCH 0192/3292] reiserfs: add check for invalid 1st journal block

syzbot reported divide error in reiserfs.
The problem was in incorrect journal 1st block.

Syzbot's reproducer manualy generated wrong superblock
with incorrect 1st block. In journal_init() wasn't
any checks about this particular case.

For example, if 1st journal block is before superblock
1st block, it can cause zeroing important superblock members
in do_journal_end().

Link: https://lore.kernel.org/r/20210517121545.29645-1-paskripkin@gmail.com
Reported-by: syzbot+0ba9909df31c6a36974d@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/journal.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9edc8e2b154e..0834b101c316 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2758,6 +2758,20 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
 		goto free_and_return;
 	}
 
+	/*
+	 * Sanity check to see if journal first block is correct.
+	 * If journal first block is invalid it can cause
+	 * zeroing important superblock members.
+	 */
+	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
+	    SB_ONDISK_JOURNAL_1st_BLOCK(sb) < SB_JOURNAL_1st_RESERVED_BLOCK(sb)) {
+		reiserfs_warning(sb, "journal-1393",
+				 "journal 1st super block is invalid: 1st reserved block %d, but actual 1st block is %d",
+				 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
+				 SB_ONDISK_JOURNAL_1st_BLOCK(sb));
+		goto free_and_return;
+	}
+
 	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
 		reiserfs_warning(sb, "sh-462",
 				 "unable to initialize journal device");

From f59a905b962c34642e862b5edec35c0eda72d70d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 5 May 2021 09:20:32 -0400
Subject: [PATCH 0193/3292] ACPI: PM: s2idle: Add missing LPS0 functions for
 AMD

These are supposedly not required for AMD platforms,
but at least some HP laptops seem to require it to
properly turn off the keyboard backlight.

Based on a patch from Marcin Bachry <hegel666@gmail.com>.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1230
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 2b69536cdccb..2d7ddb8a8cb6 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -42,6 +42,8 @@ static const struct acpi_device_id lps0_device_ids[] = {
 
 /* AMD */
 #define ACPI_LPS0_DSM_UUID_AMD      "e3f32452-febc-43ce-9039-932122d37721"
+#define ACPI_LPS0_ENTRY_AMD         2
+#define ACPI_LPS0_EXIT_AMD          3
 #define ACPI_LPS0_SCREEN_OFF_AMD    4
 #define ACPI_LPS0_SCREEN_ON_AMD     5
 
@@ -408,6 +410,7 @@ int acpi_s2idle_prepare_late(void)
 
 	if (acpi_s2idle_vendor_amd()) {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY_AMD);
 	} else {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
@@ -422,6 +425,7 @@ void acpi_s2idle_restore_early(void)
 		return;
 
 	if (acpi_s2idle_vendor_amd()) {
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT_AMD);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD);
 	} else {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);

From 6d27975851b134be8d2a170437210c9719e524aa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 10 May 2021 19:53:18 +0200
Subject: [PATCH 0194/3292] ACPI: scan: Rearrange dep_unmet initialization

The dep_unmet field in struct acpi_device is used to store the
number of unresolved _DEP dependencies (that is, operation region
dependencies for which there are no drivers present) for the ACPI
device object represented by it.

That field is initialized to 1 for all ACPI device objects in
acpi_add_single_object(), via acpi_init_device_object(), so as to
avoid evaluating _STA prematurely for battery device objects in
acpi_scan_init_status(), and it is "fixed up" in acpi_bus_check_add()
after the acpi_add_single_object() called by it has returned.

This is not particularly straightforward and causes dep_unmet to
remain 1 for device objects without dependencies created by invoking
acpi_add_single_object() directly, outside acpi_bus_check_add().

For this reason, rearrange acpi_add_single_object() to initialize
dep_unmet completely before calling acpi_scan_init_status(), which
requires passing one extra bool argument to it, and update all of
its callers accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/scan.c | 60 +++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 453eff8ec8c3..a2df7bcf4d07 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1671,8 +1671,20 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	device_initialize(&device->dev);
 	dev_set_uevent_suppress(&device->dev, true);
 	acpi_init_coherency(device);
-	/* Assume there are unmet deps to start with. */
-	device->dep_unmet = 1;
+}
+
+static void acpi_scan_dep_init(struct acpi_device *adev)
+{
+	struct acpi_dep_data *dep;
+
+	mutex_lock(&acpi_dep_list_lock);
+
+	list_for_each_entry(dep, &acpi_dep_list, node) {
+		if (dep->consumer == adev->handle)
+			adev->dep_unmet++;
+	}
+
+	mutex_unlock(&acpi_dep_list_lock);
 }
 
 void acpi_device_add_finalize(struct acpi_device *device)
@@ -1688,7 +1700,7 @@ static void acpi_scan_init_status(struct acpi_device *adev)
 }
 
 static int acpi_add_single_object(struct acpi_device **child,
-				  acpi_handle handle, int type)
+				  acpi_handle handle, int type, bool dep_init)
 {
 	struct acpi_device *device;
 	int result;
@@ -1703,8 +1715,12 @@ static int acpi_add_single_object(struct acpi_device **child,
 	 * acpi_bus_get_status() and use its quirk handling.  Note that
 	 * this must be done before the get power-/wakeup_dev-flags calls.
 	 */
-	if (type == ACPI_BUS_TYPE_DEVICE || type == ACPI_BUS_TYPE_PROCESSOR)
+	if (type == ACPI_BUS_TYPE_DEVICE || type == ACPI_BUS_TYPE_PROCESSOR) {
+		if (dep_init)
+			acpi_scan_dep_init(device);
+
 		acpi_scan_init_status(device);
+	}
 
 	acpi_bus_get_power_flags(device);
 	acpi_bus_get_wakeup_device_flags(device);
@@ -1886,22 +1902,6 @@ static u32 acpi_scan_check_dep(acpi_handle handle, bool check_dep)
 	return count;
 }
 
-static void acpi_scan_dep_init(struct acpi_device *adev)
-{
-	struct acpi_dep_data *dep;
-
-	adev->dep_unmet = 0;
-
-	mutex_lock(&acpi_dep_list_lock);
-
-	list_for_each_entry(dep, &acpi_dep_list, node) {
-		if (dep->consumer == adev->handle)
-			adev->dep_unmet++;
-	}
-
-	mutex_unlock(&acpi_dep_list_lock);
-}
-
 static bool acpi_bus_scan_second_pass;
 
 static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep,
@@ -1949,19 +1949,15 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep,
 		return AE_OK;
 	}
 
-	acpi_add_single_object(&device, handle, type);
-	if (!device)
-		return AE_CTRL_DEPTH;
-
-	acpi_scan_init_hotplug(device);
 	/*
 	 * If check_dep is true at this point, the device has no dependencies,
 	 * or the creation of the device object would have been postponed above.
 	 */
-	if (check_dep)
-		device->dep_unmet = 0;
-	else
-		acpi_scan_dep_init(device);
+	acpi_add_single_object(&device, handle, type, !check_dep);
+	if (!device)
+		return AE_CTRL_DEPTH;
+
+	acpi_scan_init_hotplug(device);
 
 out:
 	if (!*adev_p)
@@ -2223,7 +2219,7 @@ int acpi_bus_register_early_device(int type)
 	struct acpi_device *device = NULL;
 	int result;
 
-	result = acpi_add_single_object(&device, NULL, type);
+	result = acpi_add_single_object(&device, NULL, type, false);
 	if (result)
 		return result;
 
@@ -2243,7 +2239,7 @@ static int acpi_bus_scan_fixed(void)
 		struct acpi_device *device = NULL;
 
 		result = acpi_add_single_object(&device, NULL,
-						ACPI_BUS_TYPE_POWER_BUTTON);
+						ACPI_BUS_TYPE_POWER_BUTTON, false);
 		if (result)
 			return result;
 
@@ -2259,7 +2255,7 @@ static int acpi_bus_scan_fixed(void)
 		struct acpi_device *device = NULL;
 
 		result = acpi_add_single_object(&device, NULL,
-						ACPI_BUS_TYPE_SLEEP_BUTTON);
+						ACPI_BUS_TYPE_SLEEP_BUTTON, false);
 		if (result)
 			return result;
 

From 0df316b8ec04d849ec7908bc90b61a2dce46f3a9 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Tue, 11 May 2021 15:55:28 +0300
Subject: [PATCH 0195/3292] ACPI: IORT: Handle device properties with software
 node API

The older device property API is going to be removed.
Replacing the device_add_properties() call with software
node API equivalent device_create_managed_software_node().

Fixes: 434b73e61cc6 ("iommu/arm-smmu-v3: Use device properties for pasid-num-bits")
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/arm64/iort.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 3912a1f6058e..e34937e11186 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -976,7 +976,7 @@ static void iort_named_component_init(struct device *dev,
 				      FIELD_GET(ACPI_IORT_NC_PASID_BITS,
 						nc->node_flags));
 
-	if (device_add_properties(dev, props))
+	if (device_create_managed_software_node(dev, props, NULL))
 		dev_warn(dev, "Could not add device properties\n");
 }
 

From daadabfbd36d57a158623fa81585a4e9aa954c53 Mon Sep 17 00:00:00 2001
From: Anupama K Patil <anupamakpatil123@gmail.com>
Date: Thu, 29 Apr 2021 01:09:01 +0530
Subject: [PATCH 0196/3292] drivers: pnp: isapnp: proc.c: Remove unnecessary
 local variables

In the PNP code, there are two redundant local variables that can be
dropped.

This also fixes a coding style issue reported by checkpatch about an
assignment made under an if () statement.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Anupama K Patil <anupamakpatil123@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/isapnp/proc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 785a796430fa..1ae458c02656 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -57,21 +57,20 @@ static const struct proc_ops isapnp_proc_bus_proc_ops = {
 static int isapnp_proc_attach_device(struct pnp_dev *dev)
 {
 	struct pnp_card *bus = dev->card;
-	struct proc_dir_entry *de, *e;
 	char name[16];
 
-	if (!(de = bus->procdir)) {
+	if (!bus->procdir) {
 		sprintf(name, "%02x", bus->number);
-		de = bus->procdir = proc_mkdir(name, isapnp_proc_bus_dir);
-		if (!de)
+		bus->procdir = proc_mkdir(name, isapnp_proc_bus_dir);
+		if (!bus->procdir)
 			return -ENOMEM;
 	}
 	sprintf(name, "%02x", dev->number);
-	e = dev->procent = proc_create_data(name, S_IFREG | S_IRUGO, de,
+	dev->procent = proc_create_data(name, S_IFREG | S_IRUGO, bus->procdir,
 					    &isapnp_proc_bus_proc_ops, dev);
-	if (!e)
+	if (!dev->procent)
 		return -ENOMEM;
-	proc_set_size(e, 256);
+	proc_set_size(dev->procent, 256);
 	return 0;
 }
 

From b15fc7c2c88e7a97fa347446301c37272de20ed5 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 12 May 2021 22:36:12 +0200
Subject: [PATCH 0197/3292] PNP: Remove pnp_alloc()

The kernel will complain anyway if it runs out of memory, so it is
not necessary to print an extra error message when that happens and
kzalloc() can be called directly instead of pnp_alloc() which then
becomes redundant and can be dropped.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/base.h      |  1 -
 drivers/pnp/card.c      |  2 +-
 drivers/pnp/core.c      | 12 ------------
 drivers/pnp/interface.c |  4 ++--
 4 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h
index cdcfa39cf167..e74a0f6a3157 100644
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -6,7 +6,6 @@
 
 extern struct mutex pnp_lock;
 extern const struct attribute_group *pnp_dev_groups[];
-void *pnp_alloc(long size);
 
 int pnp_register_protocol(struct pnp_protocol *protocol);
 void pnp_unregister_protocol(struct pnp_protocol *protocol);
diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c
index c2464ee08e4a..2430c14f472d 100644
--- a/drivers/pnp/card.c
+++ b/drivers/pnp/card.c
@@ -80,7 +80,7 @@ static int card_probe(struct pnp_card *card, struct pnp_card_driver *drv)
 	if (!id)
 		return 0;
 
-	clink = pnp_alloc(sizeof(*clink));
+	clink = kzalloc(sizeof(*clink), GFP_KERNEL);
 	if (!clink)
 		return 0;
 	clink->card = card;
diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
index a50ab002e9e4..ccdfbf3978c0 100644
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -31,18 +31,6 @@ DEFINE_MUTEX(pnp_lock);
 int pnp_platform_devices;
 EXPORT_SYMBOL(pnp_platform_devices);
 
-void *pnp_alloc(long size)
-{
-	void *result;
-
-	result = kzalloc(size, GFP_KERNEL);
-	if (!result) {
-		printk(KERN_ERR "pnp: Out of Memory\n");
-		return NULL;
-	}
-	return result;
-}
-
 static void pnp_remove_protocol(struct pnp_protocol *protocol)
 {
 	mutex_lock(&pnp_lock);
diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c
index 602c46893e83..44efcdb87e6f 100644
--- a/drivers/pnp/interface.c
+++ b/drivers/pnp/interface.c
@@ -214,7 +214,7 @@ static ssize_t options_show(struct device *dmdev, struct device_attribute *attr,
 	int ret, dep = 0, set = 0;
 	char *indent;
 
-	buffer = pnp_alloc(sizeof(pnp_info_buffer_t));
+	buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 
@@ -257,7 +257,7 @@ static ssize_t resources_show(struct device *dmdev,
 	if (!dev)
 		return -EINVAL;
 
-	buffer = pnp_alloc(sizeof(pnp_info_buffer_t));
+	buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 

From 5bb5ceac1275cbbe757d9eecbd4b8c8a4f403c32 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 12 May 2021 22:44:41 +0200
Subject: [PATCH 0198/3292] PNP: Switch over to dev_dbg()

Debug output in dmesg log may confuse users, so restrict debug output
to cases where DEBUG is defined or dynamic debug output is enabled
for the respective code piece.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/core.c     | 5 ++---
 drivers/pnp/resource.c | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c
index ccdfbf3978c0..4df5aa6a309c 100644
--- a/drivers/pnp/core.c
+++ b/drivers/pnp/core.c
@@ -215,9 +215,8 @@ int pnp_add_device(struct pnp_dev *dev)
 	for (id = dev->id; id; id = id->next)
 		len += scnprintf(buf + len, sizeof(buf) - len, " %s", id->id);
 
-	dev_printk(KERN_DEBUG, &dev->dev, "%s device, IDs%s (%s)\n",
-		   dev->protocol->name, buf,
-		   dev->active ? "active" : "disabled");
+	dev_dbg(&dev->dev, "%s device, IDs%s (%s)\n", dev->protocol->name, buf,
+		dev->active ? "active" : "disabled");
 	return 0;
 }
 
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index 70d4ba95735a..2fa0f7d55259 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -540,7 +540,7 @@ struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq,
 	res->start = irq;
 	res->end = irq;
 
-	dev_printk(KERN_DEBUG, &dev->dev, "%pR\n", res);
+	dev_dbg(&dev->dev, "%pR\n", res);
 	return pnp_res;
 }
 

From 65ea8f2c6e230bdf71fed0137cf9e9d1b307db32 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 12 May 2021 17:15:14 -0500
Subject: [PATCH 0199/3292] ACPI: processor idle: Fix up C-state latency if not
 ordered

Generally, the C-state latency is provided by the _CST method or
FADT, but some OEM platforms using AMD Picasso, Renoir, Van Gogh,
and Cezanne set the C2 latency greater than C3's which causes the
C2 state to be skipped.

That will block the core entering PC6, which prevents S0ix working
properly on Linux systems.

In other operating systems, the latency values are not validated and
this does not cause problems by skipping states.

To avoid this issue on Linux, detect when latencies are not an
arithmetic progression and sort them.

Link: https://gitlab.freedesktop.org/agd5f/linux/-/commit/026d186e4592c1ee9c1cb44295912d0294508725
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1230#note_712174
Suggested-by: Prike Liang <Prike.Liang@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_idle.c | 40 +++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 45a019619e4a..095c8aca141e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -16,6 +16,7 @@
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/sched.h>       /* need_resched() */
+#include <linux/sort.h>
 #include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu.h>
@@ -384,10 +385,37 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
 	return;
 }
 
+static int acpi_cst_latency_cmp(const void *a, const void *b)
+{
+	const struct acpi_processor_cx *x = a, *y = b;
+
+	if (!(x->valid && y->valid))
+		return 0;
+	if (x->latency > y->latency)
+		return 1;
+	if (x->latency < y->latency)
+		return -1;
+	return 0;
+}
+static void acpi_cst_latency_swap(void *a, void *b, int n)
+{
+	struct acpi_processor_cx *x = a, *y = b;
+	u32 tmp;
+
+	if (!(x->valid && y->valid))
+		return;
+	tmp = x->latency;
+	x->latency = y->latency;
+	y->latency = tmp;
+}
+
 static int acpi_processor_power_verify(struct acpi_processor *pr)
 {
 	unsigned int i;
 	unsigned int working = 0;
+	unsigned int last_latency = 0;
+	unsigned int last_type = 0;
+	bool buggy_latency = false;
 
 	pr->power.timer_broadcast_on_state = INT_MAX;
 
@@ -411,12 +439,24 @@ static int acpi_processor_power_verify(struct acpi_processor *pr)
 		}
 		if (!cx->valid)
 			continue;
+		if (cx->type >= last_type && cx->latency < last_latency)
+			buggy_latency = true;
+		last_latency = cx->latency;
+		last_type = cx->type;
 
 		lapic_timer_check_state(i, pr, cx);
 		tsc_check_state(cx->type);
 		working++;
 	}
 
+	if (buggy_latency) {
+		pr_notice("FW issue: working around C-state latencies out of order\n");
+		sort(&pr->power.states[1], max_cstate,
+		     sizeof(struct acpi_processor_cx),
+		     acpi_cst_latency_cmp,
+		     acpi_cst_latency_swap);
+	}
+
 	lapic_timer_propagate_broadcast(pr);
 
 	return (working);

From 14fad24d0520c65ecfc2eebe8e4cf25ca02f19cf Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 13 May 2021 15:33:41 +0200
Subject: [PATCH 0200/3292] x86/acpi: Switch to pr_xxx log functions

Switching to pr_debug et al has two benefits:
- We don't have to add PREFIX to each log statement
- Debug output is suppressed except DEBUG is defined or dynamic
  debugging is enabled for the respective code piece.

In addition ensure that longer messages aren't split to multiple lines
in source code, checkpatch complains otherwise.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 118 ++++++++++++++----------------------
 1 file changed, 47 insertions(+), 71 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e90310cbe73a..e55e0c1fad8c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -5,6 +5,7 @@
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
  */
+#define pr_fmt(fmt) "ACPI: " fmt
 
 #include <linux/init.h>
 #include <linux/acpi.h>
@@ -42,8 +43,6 @@ EXPORT_SYMBOL(acpi_disabled);
 # include <asm/proto.h>
 #endif				/* X86 */
 
-#define PREFIX			"ACPI: "
-
 int acpi_noirq;				/* skip ACPI IRQ initialization */
 static int acpi_nobgrt;			/* skip ACPI BGRT */
 int acpi_pci_disabled;		/* skip ACPI PCI scan and IRQ initialization */
@@ -130,15 +129,14 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 
 	madt = (struct acpi_table_madt *)table;
 	if (!madt) {
-		printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+		pr_warn("Unable to map MADT\n");
 		return -ENODEV;
 	}
 
 	if (madt->address) {
 		acpi_lapic_addr = (u64) madt->address;
 
-		printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
-		       madt->address);
+		pr_debug("Local APIC address 0x%08x\n", madt->address);
 	}
 
 	default_acpi_madt_oem_check(madt->header.oem_id,
@@ -161,7 +159,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 	int cpu;
 
 	if (id >= MAX_LOCAL_APIC) {
-		printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+		pr_info("skipped apicid that is too big\n");
 		return -EINVAL;
 	}
 
@@ -213,13 +211,13 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
 	 */
 	if (!apic->apic_id_valid(apic_id)) {
 		if (enabled)
-			pr_warn(PREFIX "x2apic entry ignored\n");
+			pr_warn("x2apic entry ignored\n");
 		return 0;
 	}
 
 	acpi_register_lapic(apic_id, processor->uid, enabled);
 #else
-	printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+	pr_warn("x2apic entry ignored\n");
 #endif
 
 	return 0;
@@ -306,7 +304,7 @@ acpi_parse_x2apic_nmi(union acpi_subtable_headers *header,
 	acpi_table_print_madt_entry(&header->common);
 
 	if (x2apic_nmi->lint != 1)
-		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+		pr_warn("NMI not connected to LINT 1!\n");
 
 	return 0;
 }
@@ -324,7 +322,7 @@ acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long e
 	acpi_table_print_madt_entry(&header->common);
 
 	if (lapic_nmi->lint != 1)
-		printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+		pr_warn("NMI not connected to LINT 1!\n");
 
 	return 0;
 }
@@ -514,14 +512,14 @@ acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
 
 	if (intsrc->source_irq == 0) {
 		if (acpi_skip_timer_override) {
-			printk(PREFIX "BIOS IRQ0 override ignored.\n");
+			pr_warn("BIOS IRQ0 override ignored.\n");
 			return 0;
 		}
 
 		if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
 			&& (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
 			intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
-			printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
+			pr_warn("BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
 		}
 	}
 
@@ -597,7 +595,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
 	if (old == new)
 		return;
 
-	printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
+	pr_warn("setting ELCR to %04x (from %04x)\n", new, old);
 	outb(new, 0x4d0);
 	outb(new >> 8, 0x4d1);
 }
@@ -754,7 +752,7 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
 
 	cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED);
 	if (cpu < 0) {
-		pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
+		pr_info("Unable to map lapic to logical cpu number\n");
 		return cpu;
 	}
 
@@ -870,8 +868,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
 	struct acpi_table_hpet *hpet_tbl = (struct acpi_table_hpet *)table;
 
 	if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) {
-		printk(KERN_WARNING PREFIX "HPET timers must be located in "
-		       "memory.\n");
+		pr_warn("HPET timers must be located in memory.\n");
 		return -1;
 	}
 
@@ -883,9 +880,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
 	 * want to allocate a resource there.
 	 */
 	if (!hpet_address) {
-		printk(KERN_WARNING PREFIX
-		       "HPET id: %#x base: %#lx is invalid\n",
-		       hpet_tbl->id, hpet_address);
+		pr_warn("HPET id: %#x base: %#lx is invalid\n", hpet_tbl->id, hpet_address);
 		return 0;
 	}
 #ifdef CONFIG_X86_64
@@ -896,21 +891,17 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
 	 */
 	if (hpet_address == 0xfed0000000000000UL) {
 		if (!hpet_force_user) {
-			printk(KERN_WARNING PREFIX "HPET id: %#x "
-			       "base: 0xfed0000000000000 is bogus\n "
-			       "try hpet=force on the kernel command line to "
-			       "fix it up to 0xfed00000.\n", hpet_tbl->id);
+			pr_warn("HPET id: %#x base: 0xfed0000000000000 is bogus, try hpet=force on the kernel command line to fix it up to 0xfed00000.\n",
+				hpet_tbl->id);
 			hpet_address = 0;
 			return 0;
 		}
-		printk(KERN_WARNING PREFIX
-		       "HPET id: %#x base: 0xfed0000000000000 fixed up "
-		       "to 0xfed00000.\n", hpet_tbl->id);
+		pr_warn("HPET id: %#x base: 0xfed0000000000000 fixed up to 0xfed00000.\n",
+			hpet_tbl->id);
 		hpet_address >>= 32;
 	}
 #endif
-	printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
-	       hpet_tbl->id, hpet_address);
+	pr_info("HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet_address);
 
 	/*
 	 * Allocate and initialize the HPET firmware resource for adding into
@@ -955,24 +946,24 @@ late_initcall(hpet_insert_resource);
 static int __init acpi_parse_fadt(struct acpi_table_header *table)
 {
 	if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) {
-		pr_debug("ACPI: no legacy devices present\n");
+		pr_debug("no legacy devices present\n");
 		x86_platform.legacy.devices.pnpbios = 0;
 	}
 
 	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
 	    !(acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) &&
 	    x86_platform.legacy.i8042 != X86_LEGACY_I8042_PLATFORM_ABSENT) {
-		pr_debug("ACPI: i8042 controller is absent\n");
+		pr_debug("i8042 controller is absent\n");
 		x86_platform.legacy.i8042 = X86_LEGACY_I8042_FIRMWARE_ABSENT;
 	}
 
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
-		pr_debug("ACPI: not registering RTC platform device\n");
+		pr_debug("not registering RTC platform device\n");
 		x86_platform.legacy.rtc = 0;
 	}
 
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_VGA) {
-		pr_debug("ACPI: probing for VGA not safe\n");
+		pr_debug("probing for VGA not safe\n");
 		x86_platform.legacy.no_vga = 1;
 	}
 
@@ -997,8 +988,7 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 		pmtmr_ioport = acpi_gbl_FADT.pm_timer_block;
 	}
 	if (pmtmr_ioport)
-		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
-		       pmtmr_ioport);
+		pr_info("PM-Timer IO Port: %#x\n", pmtmr_ioport);
 #endif
 	return 0;
 }
@@ -1024,8 +1014,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
 				      acpi_parse_lapic_addr_ovr, 0);
 	if (count < 0) {
-		printk(KERN_ERR PREFIX
-		       "Error parsing LAPIC address override entry\n");
+		pr_err("Error parsing LAPIC address override entry\n");
 		return count;
 	}
 
@@ -1057,8 +1046,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
 				sizeof(struct acpi_table_madt),
 				madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
 		if (ret < 0) {
-			printk(KERN_ERR PREFIX
-					"Error parsing LAPIC/X2APIC entries\n");
+			pr_err("Error parsing LAPIC/X2APIC entries\n");
 			return ret;
 		}
 
@@ -1066,11 +1054,11 @@ static int __init acpi_parse_madt_lapic_entries(void)
 		x2count = madt_proc[1].count;
 	}
 	if (!count && !x2count) {
-		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+		pr_err("No LAPIC entries present\n");
 		/* TBD: Cleanup to allow fallback to MPS */
 		return -ENODEV;
 	} else if (count < 0 || x2count < 0) {
-		printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+		pr_err("Error parsing LAPIC entry\n");
 		/* TBD: Cleanup to allow fallback to MPS */
 		return count;
 	}
@@ -1080,7 +1068,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
 				      acpi_parse_lapic_nmi, 0);
 	if (count < 0 || x2count < 0) {
-		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+		pr_err("Error parsing LAPIC NMI entry\n");
 		/* TBD: Cleanup to allow fallback to MPS */
 		return count;
 	}
@@ -1139,7 +1127,7 @@ static void __init mp_config_acpi_legacy_irqs(void)
 		}
 
 		if (idx != mp_irq_entries) {
-			printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+			pr_debug("ACPI: IRQ%d used by override.\n", i);
 			continue;	/* IRQ already used */
 		}
 
@@ -1179,26 +1167,24 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 	 * if "noapic" boot option, don't look for IO-APICs
 	 */
 	if (skip_ioapic_setup) {
-		printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
-		       "due to 'noapic' option.\n");
+		pr_info("Skipping IOAPIC probe due to 'noapic' option.\n");
 		return -ENODEV;
 	}
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
 				      MAX_IO_APICS);
 	if (!count) {
-		printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+		pr_err("No IOAPIC entries present\n");
 		return -ENODEV;
 	} else if (count < 0) {
-		printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+		pr_err("Error parsing IOAPIC entry\n");
 		return count;
 	}
 
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
 				      acpi_parse_int_src_ovr, nr_irqs);
 	if (count < 0) {
-		printk(KERN_ERR PREFIX
-		       "Error parsing interrupt source overrides entry\n");
+		pr_err("Error parsing interrupt source overrides entry\n");
 		/* TBD: Cleanup to allow fallback to MPS */
 		return count;
 	}
@@ -1218,7 +1204,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE,
 				      acpi_parse_nmi_src, nr_irqs);
 	if (count < 0) {
-		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+		pr_err("Error parsing NMI SRC entry\n");
 		/* TBD: Cleanup to allow fallback to MPS */
 		return count;
 	}
@@ -1251,8 +1237,7 @@ static void __init early_acpi_process_madt(void)
 			/*
 			 * Dell Precision Workstation 410, 610 come here.
 			 */
-			printk(KERN_ERR PREFIX
-			       "Invalid BIOS MADT, disabling ACPI\n");
+			pr_err("Invalid BIOS MADT, disabling ACPI\n");
 			disable_acpi();
 		}
 	}
@@ -1289,8 +1274,7 @@ static void __init acpi_process_madt(void)
 			/*
 			 * Dell Precision Workstation 410, 610 come here.
 			 */
-			printk(KERN_ERR PREFIX
-			       "Invalid BIOS MADT, disabling ACPI\n");
+			pr_err("Invalid BIOS MADT, disabling ACPI\n");
 			disable_acpi();
 		}
 	} else {
@@ -1300,8 +1284,7 @@ static void __init acpi_process_madt(void)
  		 * Boot with "acpi=off" to use MPS on such a system.
  		 */
 		if (smp_found_config) {
-			printk(KERN_WARNING PREFIX
-				"No APIC-table, disabling MPS\n");
+			pr_warn("No APIC-table, disabling MPS\n");
 			smp_found_config = 0;
 		}
 	}
@@ -1311,11 +1294,9 @@ static void __init acpi_process_madt(void)
 	 * processors, where MPS only supports physical.
 	 */
 	if (acpi_lapic && acpi_ioapic)
-		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
-		       "information\n");
+		pr_info("Using ACPI (MADT) for SMP configuration information\n");
 	else if (acpi_lapic)
-		printk(KERN_INFO "Using ACPI for processor (LAPIC) "
-		       "configuration information\n");
+		pr_info("Using ACPI for processor (LAPIC) configuration information\n");
 #endif
 	return;
 }
@@ -1323,8 +1304,7 @@ static void __init acpi_process_madt(void)
 static int __init disable_acpi_irq(const struct dmi_system_id *d)
 {
 	if (!acpi_force) {
-		printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
-		       d->ident);
+		pr_notice("%s detected: force use of acpi=noirq\n", d->ident);
 		acpi_noirq_set();
 	}
 	return 0;
@@ -1333,8 +1313,7 @@ static int __init disable_acpi_irq(const struct dmi_system_id *d)
 static int __init disable_acpi_pci(const struct dmi_system_id *d)
 {
 	if (!acpi_force) {
-		printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
-		       d->ident);
+		pr_notice("%s detected: force use of pci=noacpi\n", d->ident);
 		acpi_disable_pci();
 	}
 	return 0;
@@ -1343,11 +1322,10 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d)
 static int __init dmi_disable_acpi(const struct dmi_system_id *d)
 {
 	if (!acpi_force) {
-		printk(KERN_NOTICE "%s detected: acpi off\n", d->ident);
+		pr_notice("%s detected: acpi off\n", d->ident);
 		disable_acpi();
 	} else {
-		printk(KERN_NOTICE
-		       "Warning: DMI blacklist says broken, but acpi forced\n");
+		pr_notice("Warning: DMI blacklist says broken, but acpi forced\n");
 	}
 	return 0;
 }
@@ -1574,9 +1552,9 @@ int __init early_acpi_boot_init(void)
 	 */
 	if (acpi_blacklisted()) {
 		if (acpi_force) {
-			printk(KERN_WARNING PREFIX "acpi=force override\n");
+			pr_warn("acpi=force override\n");
 		} else {
-			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+			pr_warn("Disabling ACPI support\n");
 			disable_acpi();
 			return 1;
 		}
@@ -1692,9 +1670,7 @@ int __init acpi_mps_check(void)
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE)
 /* mptable code is not built-in*/
 	if (acpi_disabled || acpi_noirq) {
-		printk(KERN_WARNING "MPS support code is not built-in.\n"
-		       "Using acpi=off or acpi=noirq or pci=noacpi "
-		       "may have problem\n");
+		pr_warn("MPS support code is not built-in, using acpi=off or acpi=noirq or pci=noacpi may have problem\n");
 		return 1;
 	}
 #endif

From 8bbb77b7c7a226803270dac3fc8dd564fd2f5756 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Wed, 12 May 2021 16:04:49 +0530
Subject: [PATCH 0201/3292] libbpf: Add various netlink helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change introduces a few helpers to wrap open coded attribute
preparation in netlink.c. It also adds a libbpf_netlink_send_recv() that
is useful to wrap send + recv handling in a generic way. Subsequent patch
will also use this function for sending and receiving a netlink response.
The libbpf_nl_get_link() helper has been removed instead, moving socket
creation into the newly named libbpf_netlink_send_recv().

Every nested attribute's closure must happen using the helper
nlattr_end_nested(), which sets its length properly. NLA_F_NESTED is
enforced using nlattr_begin_nested() helper. Other simple attributes
can be added directly.

The maxsz parameter corresponds to the size of the request structure
which is being filled in, so for instance with req being:

  struct {
	struct nlmsghdr nh;
	struct tcmsg t;
	char buf[4096];
  } req;

Then, maxsz should be sizeof(req).

This change also converts the open coded attribute preparation with these
helpers. Note that the only failure the internal call to nlattr_add()
could result in the nested helper would be -EMSGSIZE, hence that is what
we return to our caller.

The libbpf_netlink_send_recv() call takes care of opening the socket,
sending the netlink message, receiving the response, potentially invoking
callbacks, and return errors if any, and then finally close the socket.
This allows users to avoid identical socket setup code in different places.
The only user of libbpf_nl_get_link() has been converted to make use of it.
__bpf_set_link_xdp_fd_replace() has also been refactored to use it.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
[ Daniel: major patch cleanup ]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210512103451.989420-2-memxor@gmail.com
---
 tools/lib/bpf/netlink.c | 169 ++++++++++++++++++----------------------
 tools/lib/bpf/nlattr.h  |  48 ++++++++++++
 2 files changed, 124 insertions(+), 93 deletions(-)

diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index d2cb28e9ef52..8bbdc6c38f06 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -73,9 +73,14 @@ cleanup:
 	return ret;
 }
 
-static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
-			    __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
-			    void *cookie)
+static void libbpf_netlink_close(int sock)
+{
+	close(sock);
+}
+
+static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+			       __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+			       void *cookie)
 {
 	bool multipart = true;
 	struct nlmsgerr *err;
@@ -131,74 +136,74 @@ done:
 	return ret;
 }
 
-static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
-					 __u32 flags)
+static int libbpf_netlink_send_recv(struct nlmsghdr *nh,
+				    __dump_nlmsg_t parse_msg,
+				    libbpf_dump_nlmsg_t parse_attr,
+				    void *cookie)
 {
-	int sock, seq = 0, ret;
-	struct nlattr *nla, *nla_xdp;
-	struct {
-		struct nlmsghdr  nh;
-		struct ifinfomsg ifinfo;
-		char             attrbuf[64];
-	} req;
 	__u32 nl_pid = 0;
+	int sock, ret;
 
 	sock = libbpf_netlink_open(&nl_pid);
 	if (sock < 0)
 		return sock;
 
-	memset(&req, 0, sizeof(req));
-	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-	req.nh.nlmsg_type = RTM_SETLINK;
-	req.nh.nlmsg_pid = 0;
-	req.nh.nlmsg_seq = ++seq;
-	req.ifinfo.ifi_family = AF_UNSPEC;
-	req.ifinfo.ifi_index = ifindex;
+	nh->nlmsg_pid = 0;
+	nh->nlmsg_seq = time(NULL);
 
-	/* started nested attribute for XDP */
-	nla = (struct nlattr *)(((char *)&req)
-				+ NLMSG_ALIGN(req.nh.nlmsg_len));
-	nla->nla_type = NLA_F_NESTED | IFLA_XDP;
-	nla->nla_len = NLA_HDRLEN;
-
-	/* add XDP fd */
-	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
-	nla_xdp->nla_type = IFLA_XDP_FD;
-	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
-	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
-	nla->nla_len += nla_xdp->nla_len;
-
-	/* if user passed in any flags, add those too */
-	if (flags) {
-		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
-		nla_xdp->nla_type = IFLA_XDP_FLAGS;
-		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
-		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
-		nla->nla_len += nla_xdp->nla_len;
-	}
-
-	if (flags & XDP_FLAGS_REPLACE) {
-		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
-		nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
-		nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
-		memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
-		nla->nla_len += nla_xdp->nla_len;
-	}
-
-	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
-	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+	if (send(sock, nh, nh->nlmsg_len, 0) < 0) {
 		ret = -errno;
-		goto cleanup;
+		goto out;
 	}
-	ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
 
-cleanup:
-	close(sock);
+	ret = libbpf_netlink_recv(sock, nl_pid, nh->nlmsg_seq,
+				  parse_msg, parse_attr, cookie);
+out:
+	libbpf_netlink_close(sock);
 	return ret;
 }
 
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+					 __u32 flags)
+{
+	struct nlattr *nla;
+	int ret;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags    = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type     = RTM_SETLINK;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index  = ifindex;
+
+	nla = nlattr_begin_nested(&req.nh, sizeof(req), IFLA_XDP);
+	if (!nla)
+		return -EMSGSIZE;
+	ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FD, &fd, sizeof(fd));
+	if (ret < 0)
+		return ret;
+	if (flags) {
+		ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FLAGS, &flags,
+				 sizeof(flags));
+		if (ret < 0)
+			return ret;
+	}
+	if (flags & XDP_FLAGS_REPLACE) {
+		ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_EXPECTED_FD,
+				 &old_fd, sizeof(old_fd));
+		if (ret < 0)
+			return ret;
+	}
+	nlattr_end_nested(&req.nh, nla);
+
+	return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+}
+
 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
 			     const struct bpf_xdp_set_link_opts *opts)
 {
@@ -212,9 +217,7 @@ int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
 		flags |= XDP_FLAGS_REPLACE;
 	}
 
-	return __bpf_set_link_xdp_fd_replace(ifindex, fd,
-					     old_fd,
-					     flags);
+	return __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
 }
 
 int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
@@ -231,6 +234,7 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh,
 
 	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
 	attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+
 	if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
 		return -LIBBPF_ERRNO__NLPARSE;
 
@@ -282,16 +286,21 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
 	return 0;
 }
 
-static int libbpf_nl_get_link(int sock, unsigned int nl_pid,
-			      libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-
 int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 			  size_t info_size, __u32 flags)
 {
 	struct xdp_id_md xdp_id = {};
-	int sock, ret;
-	__u32 nl_pid = 0;
 	__u32 mask;
+	int ret;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifm;
+	} req = {
+		.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+		.nh.nlmsg_type  = RTM_GETLINK,
+		.nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.ifm.ifi_family = AF_PACKET,
+	};
 
 	if (flags & ~XDP_FLAGS_MASK || !info_size)
 		return -EINVAL;
@@ -302,14 +311,11 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 	if (flags && flags & mask)
 		return -EINVAL;
 
-	sock = libbpf_netlink_open(&nl_pid);
-	if (sock < 0)
-		return sock;
-
 	xdp_id.ifindex = ifindex;
 	xdp_id.flags = flags;
 
-	ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id);
+	ret = libbpf_netlink_send_recv(&req.nh, __dump_link_nlmsg,
+				       get_xdp_info, &xdp_id);
 	if (!ret) {
 		size_t sz = min(info_size, sizeof(xdp_id.info));
 
@@ -317,7 +323,6 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 		memset((void *) info + sz, 0, info_size - sz);
 	}
 
-	close(sock);
 	return ret;
 }
 
@@ -348,25 +353,3 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
 
 	return ret;
 }
-
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
-		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
-{
-	struct {
-		struct nlmsghdr nlh;
-		struct ifinfomsg ifm;
-	} req = {
-		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
-		.nlh.nlmsg_type = RTM_GETLINK,
-		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
-		.ifm.ifi_family = AF_PACKET,
-	};
-	int seq = time(NULL);
-
-	req.nlh.nlmsg_seq = seq;
-	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
-		return -errno;
-
-	return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
-				dump_link_nlmsg, cookie);
-}
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 6cc3ac91690f..3c780ab6d022 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -10,7 +10,10 @@
 #define __LIBBPF_NLATTR_H
 
 #include <stdint.h>
+#include <string.h>
+#include <errno.h>
 #include <linux/netlink.h>
+
 /* avoid multiple definition of netlink features */
 #define __LINUX_NETLINK_H
 
@@ -103,4 +106,49 @@ int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
 
 int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
 
+static inline struct nlattr *nla_data(struct nlattr *nla)
+{
+	return (struct nlattr *)((char *)nla + NLA_HDRLEN);
+}
+
+static inline struct nlattr *nh_tail(struct nlmsghdr *nh)
+{
+	return (struct nlattr *)((char *)nh + NLMSG_ALIGN(nh->nlmsg_len));
+}
+
+static inline int nlattr_add(struct nlmsghdr *nh, size_t maxsz, int type,
+			     const void *data, int len)
+{
+	struct nlattr *nla;
+
+	if (NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > maxsz)
+		return -EMSGSIZE;
+	if (!!data != !!len)
+		return -EINVAL;
+
+	nla = nh_tail(nh);
+	nla->nla_type = type;
+	nla->nla_len = NLA_HDRLEN + len;
+	if (data)
+		memcpy(nla_data(nla), data, len);
+	nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(nla->nla_len);
+	return 0;
+}
+
+static inline struct nlattr *nlattr_begin_nested(struct nlmsghdr *nh,
+						 size_t maxsz, int type)
+{
+	struct nlattr *tail;
+
+	tail = nh_tail(nh);
+	if (nlattr_add(nh, maxsz, type | NLA_F_NESTED, NULL, 0))
+		return NULL;
+	return tail;
+}
+
+static inline void nlattr_end_nested(struct nlmsghdr *nh, struct nlattr *tail)
+{
+	tail->nla_len = (char *)nh_tail(nh) - (char *)tail;
+}
+
 #endif /* __LIBBPF_NLATTR_H */

From 715c5ce454a6a9b94a1a4a3360de6a87eaf0d833 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 13 May 2021 01:41:22 +0200
Subject: [PATCH 0202/3292] libbpf: Add low level TC-BPF management API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds functions that wrap the netlink API used for adding, manipulating,
and removing traffic control filters.

The API summary:

A bpf_tc_hook represents a location where a TC-BPF filter can be attached.
This means that creating a hook leads to creation of the backing qdisc,
while destruction either removes all filters attached to a hook, or destroys
qdisc if requested explicitly (as discussed below).

The TC-BPF API functions operate on this bpf_tc_hook to attach, replace,
query, and detach tc filters. All functions return 0 on success, and a
negative error code on failure.

bpf_tc_hook_create - Create a hook
Parameters:
	@hook - Cannot be NULL, ifindex > 0, attach_point must be set to
		proper enum constant. Note that parent must be unset when
		attach_point is one of BPF_TC_INGRESS or BPF_TC_EGRESS. Note
		that as an exception BPF_TC_INGRESS|BPF_TC_EGRESS is also a
		valid value for attach_point.

		Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM.

bpf_tc_hook_destroy - Destroy a hook
Parameters:
	@hook - Cannot be NULL. The behaviour depends on value of
		attach_point. If BPF_TC_INGRESS, all filters attached to
		the ingress hook will be detached. If BPF_TC_EGRESS, all
		filters attached to the egress hook will be detached. If
		BPF_TC_INGRESS|BPF_TC_EGRESS, the clsact qdisc will be
		deleted, also detaching all filters. As before, parent must
		be unset for these attach_points, and set for BPF_TC_CUSTOM.

		It is advised that if the qdisc is operated on by many programs,
		then the program at least check that there are no other existing
		filters before deleting the clsact qdisc. An example is shown
		below:

		DECLARE_LIBBPF_OPTS(bpf_tc_hook, .ifindex = if_nametoindex("lo"),
				    .attach_point = BPF_TC_INGRESS);
		/* set opts as NULL, as we're not really interested in
		 * getting any info for a particular filter, but just
	 	 * detecting its presence.
		 */
		r = bpf_tc_query(&hook, NULL);
		if (r == -ENOENT) {
			/* no filters */
			hook.attach_point = BPF_TC_INGRESS|BPF_TC_EGREESS;
			return bpf_tc_hook_destroy(&hook);
		} else {
			/* failed or r == 0, the latter means filters do exist */
			return r;
		}

		Note that there is a small race between checking for no
		filters and deleting the qdisc. This is currently unavoidable.

		Returns -EOPNOTSUPP when hook has attach_point as BPF_TC_CUSTOM.

bpf_tc_attach - Attach a filter to a hook
Parameters:
	@hook - Cannot be NULL. Represents the hook the filter will be
		attached to. Requirements for ifindex and attach_point are
		same as described in bpf_tc_hook_create, but BPF_TC_CUSTOM
		is also supported.  In that case, parent must be set to the
		handle where the filter will be attached (using BPF_TC_PARENT).
		E.g. to set parent to 1:16 like in tc command line, the
		equivalent would be BPF_TC_PARENT(1, 16).

	@opts - Cannot be NULL. The following opts are optional:
		* handle   - The handle of the filter
		* priority - The priority of the filter
			     Must be >= 0 and <= UINT16_MAX
		Note that when left unset, they will be auto-allocated by
		the kernel. The following opts must be set:
		* prog_fd - The fd of the loaded SCHED_CLS prog
		The following opts must be unset:
		* prog_id - The ID of the BPF prog
		The following opts are optional:
		* flags - Currently only BPF_TC_F_REPLACE is allowed. It
			  allows replacing an existing filter instead of
			  failing with -EEXIST.
		The following opts will be filled by bpf_tc_attach on a
		successful attach operation if they are unset:
		* handle   - The handle of the attached filter
		* priority - The priority of the attached filter
		* prog_id  - The ID of the attached SCHED_CLS prog
		This way, the user can know what the auto allocated values
		for optional opts like handle and priority are for the newly
		attached filter, if they were unset.

		Note that some other attributes are set to fixed default
		values listed below (this holds for all bpf_tc_* APIs):
		protocol as ETH_P_ALL, direct action mode, chain index of 0,
		and class ID of 0 (this can be set by writing to the
		skb->tc_classid field from the BPF program).

bpf_tc_detach
Parameters:
	@hook - Cannot be NULL. Represents the hook the filter will be
		detached from. Requirements are same as described above
		in bpf_tc_attach.

	@opts - Cannot be NULL. The following opts must be set:
		* handle, priority
		The following opts must be unset:
		* prog_fd, prog_id, flags

bpf_tc_query
Parameters:
	@hook - Cannot be NULL. Represents the hook where the filter lookup will
		be performed. Requirements are same as described above in
		bpf_tc_attach().

	@opts - Cannot be NULL. The following opts must be set:
		* handle, priority
		The following opts must be unset:
		* prog_fd, prog_id, flags
		The following fields will be filled by bpf_tc_query upon a
		successful lookup:
		* prog_id

Some usage examples (using BPF skeleton infrastructure):

BPF program (test_tc_bpf.c):

	#include <linux/bpf.h>
	#include <bpf/bpf_helpers.h>

	SEC("classifier")
	int cls(struct __sk_buff *skb)
	{
		return 0;
	}

Userspace loader:

	struct test_tc_bpf *skel = NULL;
	int fd, r;

	skel = test_tc_bpf__open_and_load();
	if (!skel)
		return -ENOMEM;

	fd = bpf_program__fd(skel->progs.cls);

	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex =
			    if_nametoindex("lo"), .attach_point =
			    BPF_TC_INGRESS);
	/* Create clsact qdisc */
	r = bpf_tc_hook_create(&hook);
	if (r < 0)
		goto end;

	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .prog_fd = fd);
	r = bpf_tc_attach(&hook, &opts);
	if (r < 0)
		goto end;
	/* Print the auto allocated handle and priority */
	printf("Handle=%u", opts.handle);
	printf("Priority=%u", opts.priority);

	opts.prog_fd = opts.prog_id = 0;
	bpf_tc_detach(&hook, &opts);
end:
	test_tc_bpf__destroy(skel);

This is equivalent to doing the following using tc command line:
  # tc qdisc add dev lo clsact
  # tc filter add dev lo ingress bpf obj foo.o sec classifier da
  # tc filter del dev lo ingress handle <h> prio <p> bpf
... where the handle and priority can be found using:
  # tc filter show dev lo ingress

Another example replacing a filter (extending prior example):

	/* We can also choose both (or one), let's try replacing an
	 * existing filter.
	 */
	DECLARE_LIBBPF_OPTS(bpf_tc_opts, replace_opts, .handle =
			    opts.handle, .priority = opts.priority,
			    .prog_fd = fd);
	r = bpf_tc_attach(&hook, &replace_opts);
	if (r == -EEXIST) {
		/* Expected, now use BPF_TC_F_REPLACE to replace it */
		replace_opts.flags = BPF_TC_F_REPLACE;
		return bpf_tc_attach(&hook, &replace_opts);
	} else if (r < 0) {
		return r;
	}
	/* There must be no existing filter with these
	 * attributes, so cleanup and return an error.
	 */
	replace_opts.prog_fd = replace_opts.prog_id = 0;
	bpf_tc_detach(&hook, &replace_opts);
	return -1;

To obtain info of a particular filter:

	/* Find info for filter with handle 1 and priority 50 */
	DECLARE_LIBBPF_OPTS(bpf_tc_opts, info_opts, .handle = 1,
			    .priority = 50);
	r = bpf_tc_query(&hook, &info_opts);
	if (r == -ENOENT)
		printf("Filter not found");
	else if (r < 0)
		return r;
	printf("Prog ID: %u", info_opts.prog_id);
	return 0;

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net> # libbpf API design
[ Daniel: also did major patch cleanup ]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210512103451.989420-3-memxor@gmail.com
---
 tools/lib/bpf/libbpf.h   |  44 ++++
 tools/lib/bpf/libbpf.map |   5 +
 tools/lib/bpf/netlink.c  | 421 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 469 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3f3a24763459..cc51dc4465f2 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -498,6 +498,7 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
 			     struct bpf_object **pobj, int *prog_fd);
 
+/* XDP related API */
 struct xdp_link_info {
 	__u32 prog_id;
 	__u32 drv_prog_id;
@@ -520,6 +521,49 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
 LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 				     size_t info_size, __u32 flags);
 
+/* TC related API */
+enum bpf_tc_attach_point {
+	BPF_TC_INGRESS = 1 << 0,
+	BPF_TC_EGRESS  = 1 << 1,
+	BPF_TC_CUSTOM  = 1 << 2,
+};
+
+#define BPF_TC_PARENT(a, b) 	\
+	((((a) << 16) & 0xFFFF0000U) | ((b) & 0x0000FFFFU))
+
+enum bpf_tc_flags {
+	BPF_TC_F_REPLACE = 1 << 0,
+};
+
+struct bpf_tc_hook {
+	size_t sz;
+	int ifindex;
+	enum bpf_tc_attach_point attach_point;
+	__u32 parent;
+	size_t :0;
+};
+#define bpf_tc_hook__last_field parent
+
+struct bpf_tc_opts {
+	size_t sz;
+	int prog_fd;
+	__u32 flags;
+	__u32 prog_id;
+	__u32 handle;
+	__u32 priority;
+	size_t :0;
+};
+#define bpf_tc_opts__last_field priority
+
+LIBBPF_API int bpf_tc_hook_create(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_hook_destroy(struct bpf_tc_hook *hook);
+LIBBPF_API int bpf_tc_attach(const struct bpf_tc_hook *hook,
+			     struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_detach(const struct bpf_tc_hook *hook,
+			     const struct bpf_tc_opts *opts);
+LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
+			    struct bpf_tc_opts *opts);
+
 /* Ring buffer APIs */
 struct ring_buffer;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b9b29baf1df8..6c96729050dc 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -361,4 +361,9 @@ LIBBPF_0.4.0 {
 		bpf_linker__new;
 		bpf_map__inner_map;
 		bpf_object__set_kversion;
+		bpf_tc_attach;
+		bpf_tc_detach;
+		bpf_tc_hook_create;
+		bpf_tc_hook_destroy;
+		bpf_tc_query;
 } LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 8bbdc6c38f06..47444588e0d2 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -4,7 +4,10 @@
 #include <stdlib.h>
 #include <memory.h>
 #include <unistd.h>
+#include <arpa/inet.h>
 #include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
 #include <linux/rtnetlink.h>
 #include <sys/socket.h>
 #include <errno.h>
@@ -78,6 +81,12 @@ static void libbpf_netlink_close(int sock)
 	close(sock);
 }
 
+enum {
+	NL_CONT,
+	NL_NEXT,
+	NL_DONE,
+};
+
 static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
 			       __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
 			       void *cookie)
@@ -89,6 +98,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
 	int len, ret;
 
 	while (multipart) {
+start:
 		multipart = false;
 		len = recv(sock, buf, sizeof(buf), 0);
 		if (len < 0) {
@@ -126,8 +136,16 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
 			}
 			if (_fn) {
 				ret = _fn(nh, fn, cookie);
-				if (ret)
+				switch (ret) {
+				case NL_CONT:
+					break;
+				case NL_NEXT:
+					goto start;
+				case NL_DONE:
+					return 0;
+				default:
 					return ret;
+				}
 			}
 		}
 	}
@@ -353,3 +371,404 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
 
 	return ret;
 }
+
+typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t,
+			      size_t maxsz);
+
+static int clsact_config(struct nlmsghdr *nh, struct tcmsg *t, size_t maxsz)
+{
+	t->tcm_parent = TC_H_CLSACT;
+	t->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+
+	return nlattr_add(nh, maxsz, TCA_KIND, "clsact", sizeof("clsact"));
+}
+
+static int attach_point_to_config(struct bpf_tc_hook *hook,
+				  qdisc_config_t *config)
+{
+	switch (OPTS_GET(hook, attach_point, 0)) {
+	case BPF_TC_INGRESS:
+	case BPF_TC_EGRESS:
+	case BPF_TC_INGRESS | BPF_TC_EGRESS:
+		if (OPTS_GET(hook, parent, 0))
+			return -EINVAL;
+		*config = &clsact_config;
+		return 0;
+	case BPF_TC_CUSTOM:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point,
+			     __u32 *parent)
+{
+	switch (attach_point) {
+	case BPF_TC_INGRESS:
+	case BPF_TC_EGRESS:
+		if (*parent)
+			return -EINVAL;
+		*parent = TC_H_MAKE(TC_H_CLSACT,
+				    attach_point == BPF_TC_INGRESS ?
+				    TC_H_MIN_INGRESS : TC_H_MIN_EGRESS);
+		break;
+	case BPF_TC_CUSTOM:
+		if (!*parent)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
+{
+	qdisc_config_t config;
+	int ret;
+	struct {
+		struct nlmsghdr nh;
+		struct tcmsg tc;
+		char buf[256];
+	} req;
+
+	ret = attach_point_to_config(hook, &config);
+	if (ret < 0)
+		return ret;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
+	req.nh.nlmsg_type  = cmd;
+	req.tc.tcm_family  = AF_UNSPEC;
+	req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
+
+	ret = config(&req.nh, &req.tc, sizeof(req));
+	if (ret < 0)
+		return ret;
+
+	return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+}
+
+static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
+{
+	return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE);
+}
+
+static int tc_qdisc_delete(struct bpf_tc_hook *hook)
+{
+	return tc_qdisc_modify(hook, RTM_DELQDISC, 0);
+}
+
+int bpf_tc_hook_create(struct bpf_tc_hook *hook)
+{
+	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+	    OPTS_GET(hook, ifindex, 0) <= 0)
+		return -EINVAL;
+
+	return tc_qdisc_create_excl(hook);
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+			   const struct bpf_tc_opts *opts,
+			   const bool flush);
+
+int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
+{
+	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
+	    OPTS_GET(hook, ifindex, 0) <= 0)
+		return -EINVAL;
+
+	switch (OPTS_GET(hook, attach_point, 0)) {
+	case BPF_TC_INGRESS:
+	case BPF_TC_EGRESS:
+		return __bpf_tc_detach(hook, NULL, true);
+	case BPF_TC_INGRESS | BPF_TC_EGRESS:
+		return tc_qdisc_delete(hook);
+	case BPF_TC_CUSTOM:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+struct bpf_cb_ctx {
+	struct bpf_tc_opts *opts;
+	bool processed;
+};
+
+static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb,
+			 bool unicast)
+{
+	struct nlattr *tbb[TCA_BPF_MAX + 1];
+	struct bpf_cb_ctx *info = cookie;
+
+	if (!info || !info->opts)
+		return -EINVAL;
+	if (unicast && info->processed)
+		return -EINVAL;
+	if (!tb[TCA_OPTIONS])
+		return NL_CONT;
+
+	libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL);
+	if (!tbb[TCA_BPF_ID])
+		return -EINVAL;
+
+	OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID]));
+	OPTS_SET(info->opts, handle, tc->tcm_handle);
+	OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16);
+
+	info->processed = true;
+	return unicast ? NL_NEXT : NL_DONE;
+}
+
+static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
+		       void *cookie)
+{
+	struct tcmsg *tc = NLMSG_DATA(nh);
+	struct nlattr *tb[TCA_MAX + 1];
+
+	libbpf_nla_parse(tb, TCA_MAX,
+			 (struct nlattr *)((char *)tc + NLMSG_ALIGN(sizeof(*tc))),
+			 NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
+	if (!tb[TCA_KIND])
+		return NL_CONT;
+	return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	char name[256];
+	int len, ret;
+
+	ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	if (ret < 0)
+		return ret;
+
+	ret = nlattr_add(nh, maxsz, TCA_BPF_FD, &fd, sizeof(fd));
+	if (ret < 0)
+		return ret;
+	len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
+	if (len < 0)
+		return -errno;
+	if (len >= sizeof(name))
+		return -ENAMETOOLONG;
+	return nlattr_add(nh, maxsz, TCA_BPF_NAME, name, len + 1);
+}
+
+int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+	__u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
+	int ret, ifindex, attach_point, prog_fd;
+	struct bpf_cb_ctx info = {};
+	struct nlattr *nla;
+	struct {
+		struct nlmsghdr nh;
+		struct tcmsg tc;
+		char buf[256];
+	} req;
+
+	if (!hook || !opts ||
+	    !OPTS_VALID(hook, bpf_tc_hook) ||
+	    !OPTS_VALID(opts, bpf_tc_opts))
+		return -EINVAL;
+
+	ifindex      = OPTS_GET(hook, ifindex, 0);
+	parent       = OPTS_GET(hook, parent, 0);
+	attach_point = OPTS_GET(hook, attach_point, 0);
+
+	handle       = OPTS_GET(opts, handle, 0);
+	priority     = OPTS_GET(opts, priority, 0);
+	prog_fd      = OPTS_GET(opts, prog_fd, 0);
+	prog_id      = OPTS_GET(opts, prog_id, 0);
+	flags        = OPTS_GET(opts, flags, 0);
+
+	if (ifindex <= 0 || !prog_fd || prog_id)
+		return -EINVAL;
+	if (priority > UINT16_MAX)
+		return -EINVAL;
+	if (flags & ~BPF_TC_F_REPLACE)
+		return -EINVAL;
+
+	flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
+	protocol = ETH_P_ALL;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE |
+			     NLM_F_ECHO | flags;
+	req.nh.nlmsg_type  = RTM_NEWTFILTER;
+	req.tc.tcm_family  = AF_UNSPEC;
+	req.tc.tcm_ifindex = ifindex;
+	req.tc.tcm_handle  = handle;
+	req.tc.tcm_info    = TC_H_MAKE(priority << 16, htons(protocol));
+
+	ret = tc_get_tcm_parent(attach_point, &parent);
+	if (ret < 0)
+		return ret;
+	req.tc.tcm_parent = parent;
+
+	ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
+	if (ret < 0)
+		return ret;
+	nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS);
+	if (!nla)
+		return -EMSGSIZE;
+	ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd);
+	if (ret < 0)
+		return ret;
+	bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
+	ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags,
+			 sizeof(bpf_flags));
+	if (ret < 0)
+		return ret;
+	nlattr_end_nested(&req.nh, nla);
+
+	info.opts = opts;
+
+	ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
+	if (ret < 0)
+		return ret;
+	if (!info.processed)
+		return -ENOENT;
+	return ret;
+}
+
+static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
+			   const struct bpf_tc_opts *opts,
+			   const bool flush)
+{
+	__u32 protocol = 0, handle, priority, parent, prog_id, flags;
+	int ret, ifindex, attach_point, prog_fd;
+	struct {
+		struct nlmsghdr nh;
+		struct tcmsg tc;
+		char buf[256];
+	} req;
+
+	if (!hook ||
+	    !OPTS_VALID(hook, bpf_tc_hook) ||
+	    !OPTS_VALID(opts, bpf_tc_opts))
+		return -EINVAL;
+
+	ifindex      = OPTS_GET(hook, ifindex, 0);
+	parent       = OPTS_GET(hook, parent, 0);
+	attach_point = OPTS_GET(hook, attach_point, 0);
+
+	handle       = OPTS_GET(opts, handle, 0);
+	priority     = OPTS_GET(opts, priority, 0);
+	prog_fd      = OPTS_GET(opts, prog_fd, 0);
+	prog_id      = OPTS_GET(opts, prog_id, 0);
+	flags        = OPTS_GET(opts, flags, 0);
+
+	if (ifindex <= 0 || flags || prog_fd || prog_id)
+		return -EINVAL;
+	if (priority > UINT16_MAX)
+		return -EINVAL;
+	if (flags & ~BPF_TC_F_REPLACE)
+		return -EINVAL;
+	if (!flush) {
+		if (!handle || !priority)
+			return -EINVAL;
+		protocol = ETH_P_ALL;
+	} else {
+		if (handle || priority)
+			return -EINVAL;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type  = RTM_DELTFILTER;
+	req.tc.tcm_family  = AF_UNSPEC;
+	req.tc.tcm_ifindex = ifindex;
+	if (!flush) {
+		req.tc.tcm_handle = handle;
+		req.tc.tcm_info   = TC_H_MAKE(priority << 16, htons(protocol));
+	}
+
+	ret = tc_get_tcm_parent(attach_point, &parent);
+	if (ret < 0)
+		return ret;
+	req.tc.tcm_parent = parent;
+
+	if (!flush) {
+		ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND,
+				 "bpf", sizeof("bpf"));
+		if (ret < 0)
+			return ret;
+	}
+
+	return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+}
+
+int bpf_tc_detach(const struct bpf_tc_hook *hook,
+		  const struct bpf_tc_opts *opts)
+{
+	return !opts ? -EINVAL : __bpf_tc_detach(hook, opts, false);
+}
+
+int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
+{
+	__u32 protocol, handle, priority, parent, prog_id, flags;
+	int ret, ifindex, attach_point, prog_fd;
+	struct bpf_cb_ctx info = {};
+	struct {
+		struct nlmsghdr nh;
+		struct tcmsg tc;
+		char buf[256];
+	} req;
+
+	if (!hook || !opts ||
+	    !OPTS_VALID(hook, bpf_tc_hook) ||
+	    !OPTS_VALID(opts, bpf_tc_opts))
+		return -EINVAL;
+
+	ifindex      = OPTS_GET(hook, ifindex, 0);
+	parent       = OPTS_GET(hook, parent, 0);
+	attach_point = OPTS_GET(hook, attach_point, 0);
+
+	handle       = OPTS_GET(opts, handle, 0);
+	priority     = OPTS_GET(opts, priority, 0);
+	prog_fd      = OPTS_GET(opts, prog_fd, 0);
+	prog_id      = OPTS_GET(opts, prog_id, 0);
+	flags        = OPTS_GET(opts, flags, 0);
+
+	if (ifindex <= 0 || flags || prog_fd || prog_id ||
+	    !handle || !priority)
+		return -EINVAL;
+	if (priority > UINT16_MAX)
+		return -EINVAL;
+
+	protocol = ETH_P_ALL;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct tcmsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST;
+	req.nh.nlmsg_type  = RTM_GETTFILTER;
+	req.tc.tcm_family  = AF_UNSPEC;
+	req.tc.tcm_ifindex = ifindex;
+	req.tc.tcm_handle  = handle;
+	req.tc.tcm_info    = TC_H_MAKE(priority << 16, htons(protocol));
+
+	ret = tc_get_tcm_parent(attach_point, &parent);
+	if (ret < 0)
+		return ret;
+	req.tc.tcm_parent = parent;
+
+	ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
+	if (ret < 0)
+		return ret;
+
+	info.opts = opts;
+
+	ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
+	if (ret < 0)
+		return ret;
+	if (!info.processed)
+		return -ENOENT;
+	return ret;
+}

From f18ba26da88a89db9b50cb4ff47fadb159f2810b Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Wed, 12 May 2021 16:04:51 +0530
Subject: [PATCH 0203/3292] libbpf: Add selftests for TC-BPF management API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds some basic tests for the low level bpf_tc_* API.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210512103451.989420-4-memxor@gmail.com
---
 .../testing/selftests/bpf/prog_tests/tc_bpf.c | 395 ++++++++++++++++++
 .../testing/selftests/bpf/progs/test_tc_bpf.c |  12 +
 2 files changed, 407 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_bpf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_tc_bpf.c

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
new file mode 100644
index 000000000000..4a505a5adf4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_bpf.skel.h"
+
+#define LO_IFINDEX 1
+
+#define TEST_DECLARE_OPTS(__fd)                                                                   \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1);                                     \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1);                                   \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd);                                 \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1);                     \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd);                   \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd);                 \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd);   \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42);     \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1,                     \
+			    .flags = BPF_TC_F_REPLACE);                                            \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd,   \
+			    .prog_id = 42);                                                        \
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1);
+
+static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	int ret;
+
+	ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+	if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd"))
+		return ret;
+
+	ret = bpf_tc_attach(hook, &opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach"))
+		return ret;
+
+	if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+	    !ASSERT_EQ(opts.priority, 1, "priority set") ||
+	    !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+		goto end;
+
+	opts.prog_id = 0;
+	opts.flags = BPF_TC_F_REPLACE;
+	ret = bpf_tc_attach(hook, &opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach replace mode"))
+		goto end;
+
+	opts.flags = opts.prog_fd = opts.prog_id = 0;
+	ret = bpf_tc_query(hook, &opts);
+	if (!ASSERT_OK(ret, "bpf_tc_query"))
+		goto end;
+
+	if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+	    !ASSERT_EQ(opts.priority, 1, "priority set") ||
+	    !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+		goto end;
+
+end:
+	opts.flags = opts.prog_fd = opts.prog_id = 0;
+	ret = bpf_tc_detach(hook, &opts);
+	ASSERT_OK(ret, "bpf_tc_detach");
+	return ret;
+}
+
+static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd);
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS);
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1);
+	int ret;
+
+	ret = bpf_tc_hook_create(NULL);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL"))
+		return -EINVAL;
+
+	/* hook ifindex = 0 */
+	ret = bpf_tc_hook_create(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0"))
+		return -EINVAL;
+
+	ret = bpf_tc_hook_destroy(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0"))
+		return -EINVAL;
+
+	ret = bpf_tc_attach(&inv_hook, &attach_opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0"))
+		return -EINVAL;
+	attach_opts.prog_id = 0;
+
+	ret = bpf_tc_detach(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0"))
+		return -EINVAL;
+
+	ret = bpf_tc_query(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0"))
+		return -EINVAL;
+
+	/* hook ifindex < 0 */
+	inv_hook.ifindex = -1;
+
+	ret = bpf_tc_hook_create(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0"))
+		return -EINVAL;
+
+	ret = bpf_tc_hook_destroy(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0"))
+		return -EINVAL;
+
+	ret = bpf_tc_attach(&inv_hook, &attach_opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0"))
+		return -EINVAL;
+	attach_opts.prog_id = 0;
+
+	ret = bpf_tc_detach(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0"))
+		return -EINVAL;
+
+	ret = bpf_tc_query(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0"))
+		return -EINVAL;
+
+	inv_hook.ifindex = LO_IFINDEX;
+
+	/* hook.attach_point invalid */
+	inv_hook.attach_point = 0xabcd;
+	ret = bpf_tc_hook_create(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point"))
+		return -EINVAL;
+
+	ret = bpf_tc_hook_destroy(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point"))
+		return -EINVAL;
+
+	ret = bpf_tc_attach(&inv_hook, &attach_opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point"))
+		return -EINVAL;
+
+	ret = bpf_tc_detach(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point"))
+		return -EINVAL;
+
+	ret = bpf_tc_query(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point"))
+		return -EINVAL;
+
+	inv_hook.attach_point = BPF_TC_INGRESS;
+
+	/* hook.attach_point valid, but parent invalid */
+	inv_hook.parent = TC_H_MAKE(1UL << 16, 10);
+	ret = bpf_tc_hook_create(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_hook_destroy(&inv_hook);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_attach(&inv_hook, &attach_opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_detach(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_query(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+		return -EINVAL;
+
+	inv_hook.attach_point = BPF_TC_CUSTOM;
+	inv_hook.parent = 0;
+	/* These return EOPNOTSUPP instead of EINVAL as parent is checked after
+	 * attach_point of the hook.
+	 */
+	ret = bpf_tc_hook_create(&inv_hook);
+	if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_hook_destroy(&inv_hook);
+	if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_attach(&inv_hook, &attach_opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_detach(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+		return -EINVAL;
+
+	ret = bpf_tc_query(&inv_hook, &opts);
+	if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+		return -EINVAL;
+
+	inv_hook.attach_point = BPF_TC_INGRESS;
+
+	/* detach */
+	{
+		TEST_DECLARE_OPTS(fd);
+
+		ret = bpf_tc_detach(NULL, &opts_hp);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL"))
+			return -EINVAL;
+
+		ret = bpf_tc_detach(hook, NULL);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL"))
+			return -EINVAL;
+
+		ret = bpf_tc_detach(hook, &opts_hpr);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set"))
+			return -EINVAL;
+
+		ret = bpf_tc_detach(hook, &opts_hpf);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set"))
+			return -EINVAL;
+
+		ret = bpf_tc_detach(hook, &opts_hpi);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set"))
+			return -EINVAL;
+
+		ret = bpf_tc_detach(hook, &opts_p);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset"))
+			return -EINVAL;
+
+		ret = bpf_tc_detach(hook, &opts_h);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset"))
+			return -EINVAL;
+
+		ret = bpf_tc_detach(hook, &opts_prio_max);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX"))
+			return -EINVAL;
+	}
+
+	/* query */
+	{
+		TEST_DECLARE_OPTS(fd);
+
+		ret = bpf_tc_query(NULL, &opts);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL"))
+			return -EINVAL;
+
+		ret = bpf_tc_query(hook, NULL);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL"))
+			return -EINVAL;
+
+		ret = bpf_tc_query(hook, &opts_hpr);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set"))
+			return -EINVAL;
+
+		ret = bpf_tc_query(hook, &opts_hpf);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set"))
+			return -EINVAL;
+
+		ret = bpf_tc_query(hook, &opts_hpi);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set"))
+			return -EINVAL;
+
+		ret = bpf_tc_query(hook, &opts_p);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset"))
+			return -EINVAL;
+
+		ret = bpf_tc_query(hook, &opts_h);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset"))
+			return -EINVAL;
+
+		ret = bpf_tc_query(hook, &opts_prio_max);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX"))
+			return -EINVAL;
+
+		/* when chain is not present, kernel returns -EINVAL */
+		ret = bpf_tc_query(hook, &opts_hp);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set"))
+			return -EINVAL;
+	}
+
+	/* attach */
+	{
+		TEST_DECLARE_OPTS(fd);
+
+		ret = bpf_tc_attach(NULL, &opts_hp);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL"))
+			return -EINVAL;
+
+		ret = bpf_tc_attach(hook, NULL);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL"))
+			return -EINVAL;
+
+		opts_hp.flags = 42;
+		ret = bpf_tc_attach(hook, &opts_hp);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags"))
+			return -EINVAL;
+
+		ret = bpf_tc_attach(hook, NULL);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset"))
+			return -EINVAL;
+
+		ret = bpf_tc_attach(hook, &opts_hpi);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set"))
+			return -EINVAL;
+
+		ret = bpf_tc_attach(hook, &opts_pf);
+		if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset"))
+			return -EINVAL;
+		opts_pf.prog_fd = opts_pf.prog_id = 0;
+		ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach");
+
+		ret = bpf_tc_attach(hook, &opts_hf);
+		if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset"))
+			return -EINVAL;
+		opts_hf.prog_fd = opts_hf.prog_id = 0;
+		ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach");
+
+		ret = bpf_tc_attach(hook, &opts_prio_max);
+		if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX"))
+			return -EINVAL;
+
+		ret = bpf_tc_attach(hook, &opts_f);
+		if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset"))
+			return -EINVAL;
+		opts_f.prog_fd = opts_f.prog_id = 0;
+		ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach");
+	}
+
+	return 0;
+}
+
+void test_tc_bpf(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+			    .attach_point = BPF_TC_INGRESS);
+	struct test_tc_bpf *skel = NULL;
+	bool hook_created = false;
+	int cls_fd, ret;
+
+	skel = test_tc_bpf__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+		return;
+
+	cls_fd = bpf_program__fd(skel->progs.cls);
+
+	ret = bpf_tc_hook_create(&hook);
+	if (ret == 0)
+		hook_created = true;
+
+	ret = ret == -EEXIST ? 0 : ret;
+	if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)"))
+		goto end;
+
+	hook.attach_point = BPF_TC_CUSTOM;
+	hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+	ret = bpf_tc_hook_create(&hook);
+	if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point"))
+		goto end;
+
+	ret = test_tc_bpf_basic(&hook, cls_fd);
+	if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+		goto end;
+
+	ret = bpf_tc_hook_destroy(&hook);
+	if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point"))
+		goto end;
+
+	hook.attach_point = BPF_TC_INGRESS;
+	hook.parent = 0;
+	bpf_tc_hook_destroy(&hook);
+
+	ret = test_tc_bpf_basic(&hook, cls_fd);
+	if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+		goto end;
+
+	bpf_tc_hook_destroy(&hook);
+
+	hook.attach_point = BPF_TC_EGRESS;
+	ret = test_tc_bpf_basic(&hook, cls_fd);
+	if (!ASSERT_OK(ret, "test_tc_internal egress"))
+		goto end;
+
+	bpf_tc_hook_destroy(&hook);
+
+	ret = test_tc_bpf_api(&hook, cls_fd);
+	if (!ASSERT_OK(ret, "test_tc_bpf_api"))
+		goto end;
+
+	bpf_tc_hook_destroy(&hook);
+
+end:
+	if (hook_created) {
+		hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+		bpf_tc_hook_destroy(&hook);
+	}
+	test_tc_bpf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
new file mode 100644
index 000000000000..18a3a7ed924a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* Dummy prog to test TC-BPF API */
+
+SEC("classifier")
+int cls(struct __sk_buff *skb)
+{
+	return 0;
+}

From c2b1063e8feb2115537addce10f36c0c82d11d9b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 Apr 2021 08:23:25 +0200
Subject: [PATCH 0204/3292] genirq: Add a IRQF_NO_DEBUG flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The whole call to note_interrupt() can be avoided or return early when
interrupts would be marked accordingly. For IPI handlers which always
return HANDLED the whole procedure is pretty pointless to begin with.

Add a IRQF_NO_DEBUG flag and mark the interrupt accordingly if supplied
when the interrupt is requested.

When noirqdebug is set on the kernel commandline, then the interrupt is
marked unconditionally so that there is only one condition in the hotpath
to evaluate.

 [ clg: Add changelog ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/7a8ad02f-63a8-c1aa-fdd1-39d973593d02@kaod.org
---
 include/linux/interrupt.h |  3 +++
 include/linux/irq.h       |  2 ++
 kernel/irq/chip.c         |  2 +-
 kernel/irq/handle.c       |  2 +-
 kernel/irq/manage.c       |  5 +++++
 kernel/irq/settings.h     | 12 ++++++++++++
 6 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 4777850a6dc7..a52109c3f3a4 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -64,6 +64,8 @@
  * IRQF_NO_AUTOEN - Don't enable IRQ or NMI automatically when users request it.
  *                Users will enable it explicitly by enable_irq() or enable_nmi()
  *                later.
+ * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers,
+ *		   depends on IRQF_PERCPU.
  */
 #define IRQF_SHARED		0x00000080
 #define IRQF_PROBE_SHARED	0x00000100
@@ -78,6 +80,7 @@
 #define IRQF_EARLY_RESUME	0x00020000
 #define IRQF_COND_SUSPEND	0x00040000
 #define IRQF_NO_AUTOEN		0x00080000
+#define IRQF_NO_DEBUG		0x00100000
 
 #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 31b347c9f8dd..8e9a9ae471a6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -72,6 +72,7 @@ enum irqchip_irq_state;
  *				  mechanism and from core side polling.
  * IRQ_DISABLE_UNLAZY		- Disable lazy irq disable
  * IRQ_HIDDEN			- Don't show up in /proc/interrupts
+ * IRQ_NO_DEBUG			- Exclude from note_interrupt() debugging
  */
 enum {
 	IRQ_TYPE_NONE		= 0x00000000,
@@ -99,6 +100,7 @@ enum {
 	IRQ_IS_POLLED		= (1 << 18),
 	IRQ_DISABLE_UNLAZY	= (1 << 19),
 	IRQ_HIDDEN		= (1 << 20),
+	IRQ_NO_DEBUG		= (1 << 21),
 };
 
 #define IRQF_MODIFY_MASK	\
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 8cc8e5713287..7f04c7d8296e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -481,7 +481,7 @@ void handle_nested_irq(unsigned int irq)
 	for_each_action_of_desc(desc, action)
 		action_ret |= action->thread_fn(action->irq, action->dev_id);
 
-	if (!noirqdebug)
+	if (!irq_settings_no_debug(desc))
 		note_interrupt(desc, action_ret);
 
 	raw_spin_lock_irq(&desc->lock);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 762a928e18f9..221d80c31e94 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -197,7 +197,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
 	add_interrupt_randomness(desc->irq_data.irq, flags);
 
-	if (!noirqdebug)
+	if (!irq_settings_no_debug(desc))
 		note_interrupt(desc, retval);
 	return retval;
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4c14356543d9..7bdd09e7d5f0 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1686,8 +1686,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		if (new->flags & IRQF_PERCPU) {
 			irqd_set(&desc->irq_data, IRQD_PER_CPU);
 			irq_settings_set_per_cpu(desc);
+			if (new->flags & IRQF_NO_DEBUG)
+				irq_settings_set_no_debug(desc);
 		}
 
+		if (noirqdebug)
+			irq_settings_set_no_debug(desc);
+
 		if (new->flags & IRQF_ONESHOT)
 			desc->istate |= IRQS_ONESHOT;
 
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 403378b9947b..7b7efb1a114b 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -18,6 +18,7 @@ enum {
 	_IRQ_IS_POLLED		= IRQ_IS_POLLED,
 	_IRQ_DISABLE_UNLAZY	= IRQ_DISABLE_UNLAZY,
 	_IRQ_HIDDEN		= IRQ_HIDDEN,
+	_IRQ_NO_DEBUG		= IRQ_NO_DEBUG,
 	_IRQF_MODIFY_MASK	= IRQF_MODIFY_MASK,
 };
 
@@ -33,6 +34,7 @@ enum {
 #define IRQ_IS_POLLED		GOT_YOU_MORON
 #define IRQ_DISABLE_UNLAZY	GOT_YOU_MORON
 #define IRQ_HIDDEN		GOT_YOU_MORON
+#define IRQ_NO_DEBUG		GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
 #define IRQF_MODIFY_MASK	GOT_YOU_MORON
 
@@ -174,3 +176,13 @@ static inline bool irq_settings_is_hidden(struct irq_desc *desc)
 {
 	return desc->status_use_accessors & _IRQ_HIDDEN;
 }
+
+static inline void irq_settings_set_no_debug(struct irq_desc *desc)
+{
+	desc->status_use_accessors |= _IRQ_NO_DEBUG;
+}
+
+static inline bool irq_settings_no_debug(struct irq_desc *desc)
+{
+	return desc->status_use_accessors & _IRQ_NO_DEBUG;
+}

From 94c821c74bf5fe0c25e09df5334a16f98608db90 Mon Sep 17 00:00:00 2001
From: Seth David Schoen <schoen@loyalty.org>
Date: Wed, 12 May 2021 21:37:49 -0700
Subject: [PATCH 0205/3292] ip: Treat IPv4 segment's lowest address as unicast

Treat only the highest, not the lowest, IPv4 address within a local
subnet as a broadcast address.

Signed-off-by: Seth David Schoen <schoen@loyalty.org>
Suggested-by: John Gilmore <gnu@toad.com>
Acked-by: Dave Taht <dave.taht@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 84bb707bd88d..bfb345c88271 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1122,10 +1122,8 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 				  prefix, ifa->ifa_prefixlen, prim,
 				  ifa->ifa_rt_priority);
 
-		/* Add network specific broadcasts, when it takes a sense */
+		/* Add the network broadcast address, when it makes sense */
 		if (ifa->ifa_prefixlen < 31) {
-			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
-				  prim, 0);
 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
 				  32, prim, 0);
 		}

From 6101ca0384e3778cf6ad4f938fbc094a0386ec01 Mon Sep 17 00:00:00 2001
From: Seth David Schoen <schoen@loyalty.org>
Date: Wed, 12 May 2021 21:38:25 -0700
Subject: [PATCH 0206/3292] selftests: Lowest IPv4 address in a subnet is valid

Expect the lowest IPv4 address in a subnet to be assignable
and addressable as a unicast (non-broadcast) address on a
local network segment.

Signed-off-by: Seth David Schoen <schoen@loyalty.org>
Suggested-by: John Gilmore <gnu@toad.com>
Acked-by: Dave Taht <dave.taht@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/unicast_extensions.sh | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index dbf0421986df..66354cdd5ce4 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -189,6 +189,15 @@ segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255
 route_test 240.5.6.7 240.5.6.1  255.1.2.1    255.1.2.3      24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
 route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
 #
+# Test support for lowest address ending in .0
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)"
+#
+# Test support for lowest address not ending in .0
+segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)"
+#
+# Routing using lowest address as a gateway/endpoint
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address"
+#
 # ==============================================
 # ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
 # ==============================================
@@ -202,14 +211,6 @@ segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forb
 # Currently Linux does not allow this, so this should fail too
 segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
 #
-# Test support for lowest address
-# Currently Linux does not allow this, so this should fail too
-segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)"
-#
-# Routing using lowest address as a gateway/endpoint
-# Currently Linux does not allow this, so this should fail too
-route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)"
-#
 # Test support for unicast use of class D
 # Currently Linux does not allow this, so this should fail too
 segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"

From 7cb7541a8cc070e9b2ee66cb0b72b1ceb1feef7d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 15 May 2021 13:53:21 +0200
Subject: [PATCH 0207/3292] r8169: use KBUILD_MODNAME instead of own module
 name definition

Remove own module name definition and use KBUILD_MODNAME instead.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 2c89cde7da1e..1663e0486496 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -34,8 +34,6 @@
 #include "r8169.h"
 #include "r8169_firmware.h"
 
-#define MODULENAME "r8169"
-
 #define FIRMWARE_8168D_1	"rtl_nic/rtl8168d-1.fw"
 #define FIRMWARE_8168D_2	"rtl_nic/rtl8168d-2.fw"
 #define FIRMWARE_8168E_1	"rtl_nic/rtl8168e-1.fw"
@@ -1454,7 +1452,7 @@ static void rtl8169_get_drvinfo(struct net_device *dev,
 	struct rtl8169_private *tp = netdev_priv(dev);
 	struct rtl_fw *rtl_fw = tp->rtl_fw;
 
-	strlcpy(info->driver, MODULENAME, sizeof(info->driver));
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
 	strlcpy(info->bus_info, pci_name(tp->pci_dev), sizeof(info->bus_info));
 	BUILD_BUG_ON(sizeof(info->fw_version) < sizeof(rtl_fw->version));
 	if (rtl_fw)
@@ -5305,7 +5303,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENODEV;
 	}
 
-	rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
+	rc = pcim_iomap_regions(pdev, BIT(region), KBUILD_MODNAME);
 	if (rc < 0) {
 		dev_err(&pdev->dev, "cannot remap MMIO, aborting\n");
 		return rc;
@@ -5440,7 +5438,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 }
 
 static struct pci_driver rtl8169_pci_driver = {
-	.name		= MODULENAME,
+	.name		= KBUILD_MODNAME,
 	.id_table	= rtl8169_pci_tbl,
 	.probe		= rtl_init_one,
 	.remove		= rtl_remove_one,

From 1d7f7ecadc5ac94aaca15c4dcfc910848d66103f Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:12 +0800
Subject: [PATCH 0208/3292] net: arc: Demote non-compliant kernel-doc headers

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/arc/emac_rockchip.c:18: warning: expecting prototype for emac(). Prototype was for DRV_NAME() instead

Signed-off-by: Yang Shen <shenyang39@huawei.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/arc/emac_rockchip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index 48ecdf15eddc..1c9ca3bcb871 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
  * emac-rockchip.c - Rockchip EMAC specific glue layer
  *
  * Copyright (C) 2014 Romain Perier <romain.perier@gmail.com>

From 8965c1c535b1514b1b5a90b7de49334ba9e3851d Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:13 +0800
Subject: [PATCH 0209/3292] net: atheros: atl1c: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/atheros/atl1c/atl1c_main.c:442: warning: expecting prototype for atl1c_set_mac(). Prototype was for atl1c_set_mac_addr() instead
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c:969: warning: expecting prototype for atl1c_setup_mem_resources(). Prototype was for atl1c_setup_ring_resources() instead
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c:1375: warning: expecting prototype for atl1c_configure(). Prototype was for atl1c_configure_mac() instead

Cc: Chris Snook <chris.snook@gmail.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 740127a6a21d..77da1c54c49f 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -432,7 +432,7 @@ static void atl1c_restore_vlan(struct atl1c_adapter *adapter)
 }
 
 /**
- * atl1c_set_mac - Change the Ethernet Address of the NIC
+ * atl1c_set_mac_addr - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
  * @p: pointer to an address structure
  *
@@ -960,7 +960,7 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
 }
 
 /**
- * atl1c_setup_mem_resources - allocate Tx / RX descriptor resources
+ * atl1c_setup_ring_resources - allocate Tx / RX descriptor resources
  * @adapter: board private structure
  *
  * Return 0 on success, negative on failure
@@ -1366,7 +1366,7 @@ static void atl1c_set_aspm(struct atl1c_hw *hw, u16 link_speed)
 }
 
 /**
- * atl1c_configure - Configure Transmit&Receive Unit after Reset
+ * atl1c_configure_mac - Configure Transmit&Receive Unit after Reset
  * @adapter: board private structure
  *
  * Configure the Tx /Rx unit of the MAC after a reset.

From b43e1554a7cf785afdcb91df259bbe8dfe0e729d Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:14 +0800
Subject: [PATCH 0210/3292] net: atheros: atl1e: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/atheros/atl1e/atl1e_main.c:367: warning: expecting prototype for atl1e_set_mac(). Prototype was for atl1e_set_mac_addr() instead
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c:796: warning: expecting prototype for atl1e_setup_mem_resources(). Prototype was for atl1e_setup_ring_resources() instead

Cc: Chris Snook <chris.snook@gmail.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index ff9f96de74b8..2eb0a2ab69f6 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -357,7 +357,7 @@ static void atl1e_restore_vlan(struct atl1e_adapter *adapter)
 }
 
 /**
- * atl1e_set_mac - Change the Ethernet Address of the NIC
+ * atl1e_set_mac_addr - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
  * @p: pointer to an address structure
  *
@@ -787,7 +787,7 @@ static void atl1e_free_ring_resources(struct atl1e_adapter *adapter)
 }
 
 /**
- * atl1e_setup_mem_resources - allocate Tx / RX descriptor resources
+ * atl1e_setup_ring_resources - allocate Tx / RX descriptor resources
  * @adapter: board private structure
  *
  * Return 0 on success, negative on failure

From c706c75aaee20e5ee05a05f6504a5711e473002e Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:15 +0800
Subject: [PATCH 0211/3292] net: atheros: atl1x: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/atheros/atlx/atl1.c:1020: warning: expecting prototype for atl1_setup_mem_resources(). Prototype was for atl1_setup_ring_resources() instead

Cc: Chris Snook <chris.snook@gmail.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atlx/atl1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index eaf96d002fa5..c67201a13cf5 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1011,7 +1011,7 @@ static int atl1_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 }
 
 /**
- * atl1_setup_mem_resources - allocate Tx / RX descriptor resources
+ * atl1_setup_ring_resources - allocate Tx / RX descriptor resources
  * @adapter: board private structure
  *
  * Return 0 on success, negative on failure

From 76d85049173ba0138ab61b29c0f1ee35bf7da5fa Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:16 +0800
Subject: [PATCH 0212/3292] net: broadcom: bnx2x: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c:13595: warning: expecting prototype for bnx2x_get_num_none_def_sbs(). Prototype was for bnx2x_get_num_non_def_sbs() instead
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c:4165: warning: expecting prototype for atomic_add_ifless(). Prototype was for __atomic_add_ifless() instead
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c:4193: warning: expecting prototype for atomic_dec_ifmoe(). Prototype was for __atomic_dec_ifmoe() instead

Cc: Ariel Elior <aelior@marvell.com>
Cc: Sudarsana Kalluru <skalluru@marvell.com>
Cc: GR-everest-linux-l2@marvell.com
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 281b1c2e04a7..2acbc73dcd18 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13586,7 +13586,7 @@ static int bnx2x_set_qm_cid_count(struct bnx2x *bp)
 }
 
 /**
- * bnx2x_get_num_none_def_sbs - return the number of none default SBs
+ * bnx2x_get_num_non_def_sbs - return the number of none default SBs
  * @pdev: pci device
  * @cnic_cnt: count
  *
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 6cd1523ad9e5..542c69822649 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -4152,7 +4152,7 @@ void bnx2x_init_mcast_obj(struct bnx2x *bp,
 /*************************** Credit handling **********************************/
 
 /**
- * atomic_add_ifless - add if the result is less than a given value.
+ * __atomic_add_ifless - add if the result is less than a given value.
  *
  * @v:	pointer of type atomic_t
  * @a:	the amount to add to v...
@@ -4180,7 +4180,7 @@ static inline bool __atomic_add_ifless(atomic_t *v, int a, int u)
 }
 
 /**
- * atomic_dec_ifmoe - dec if the result is more or equal than a given value.
+ * __atomic_dec_ifmoe - dec if the result is more or equal than a given value.
  *
  * @v:	pointer of type atomic_t
  * @a:	the amount to dec from v...

From 5a02bf4fefd5e6e3588d650ccef79a768621d07b Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:17 +0800
Subject: [PATCH 0213/3292] net: brocade: bna: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/brocade/bna/bfa_cee.c:91: warning: expecting prototype for bfa_cee_get_attr_isr(). Prototype was for bfa_cee_get_stats_isr() instead

Cc: Rasesh Mody <rmody@marvell.com>
Cc: Sudarsana Kalluru <skalluru@marvell.com>
Cc: GR-Linux-NIC-Dev@marvell.com
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bfa_cee.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/brocade/bna/bfa_cee.c b/drivers/net/ethernet/brocade/bna/bfa_cee.c
index 06f221c44802..eeb05e31713f 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_cee.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_cee.c
@@ -82,7 +82,7 @@ bfa_cee_get_attr_isr(struct bfa_cee *cee, enum bfa_status status)
 }
 
 /**
- * bfa_cee_get_attr_isr - CEE ISR for get-stats responses from f/w
+ * bfa_cee_get_stats_isr - CEE ISR for get-stats responses from f/w
  *
  * @cee: Pointer to the CEE module
  * @status: Return status from the f/w

From c1167cee462d5a2d446a51189fdd3b0534cf5add Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:18 +0800
Subject: [PATCH 0214/3292] net: cadence: Demote non-compliant kernel-doc
 headers

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/cadence/macb_pci.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/ethernet/cadence/macb_ptp.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_pci.c | 2 +-
 drivers/net/ethernet/cadence/macb_ptp.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index 353393dea639..8b7b59908a1a 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
  * DOC: Cadence GEM PCI wrapper.
  *
  * Copyright (C) 2016 Cadence Design Systems - https://www.cadence.com
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index 283918aeb741..5c368a9cbbbc 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
  * 1588 PTP support for Cadence GEM device.
  *
  * Copyright (C) 2017 Cadence Design Systems - https://www.cadence.com

From 2e45d961a6a851897a4db15a4f6b2d9feb123c65 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:19 +0800
Subject: [PATCH 0215/3292] net: calxeda: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/calxeda/xgmac.c:720: warning: expecting prototype for init_xgmac_dma_desc_rings(). Prototype was for xgmac_dma_desc_rings_init() instead
 drivers/net/ethernet/calxeda/xgmac.c:867: warning: expecting prototype for xgmac_tx(). Prototype was for xgmac_tx_complete() instead
 drivers/net/ethernet/calxeda/xgmac.c:1049: warning: expecting prototype for xgmac_release(). Prototype was for xgmac_stop() instead
 drivers/net/ethernet/calxeda/xgmac.c:1822: warning: expecting prototype for xgmac_dvr_remove(). Prototype was for xgmac_remove() instead

Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/calxeda/xgmac.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index bbb453c6a5f7..b6a066404f4b 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -711,7 +711,7 @@ static void xgmac_rx_refill(struct xgmac_priv *priv)
 }
 
 /**
- * init_xgmac_dma_desc_rings - init the RX/TX descriptor rings
+ * xgmac_dma_desc_rings_init - init the RX/TX descriptor rings
  * @dev: net device structure
  * Description:  this function initializes the DMA RX/TX descriptors
  * and allocates the socket buffers.
@@ -859,7 +859,7 @@ static void xgmac_free_dma_desc_rings(struct xgmac_priv *priv)
 }
 
 /**
- * xgmac_tx:
+ * xgmac_tx_complete:
  * @priv: private driver structure
  * Description: it reclaims resources after transmission completes.
  */
@@ -1040,7 +1040,7 @@ static int xgmac_open(struct net_device *dev)
 }
 
 /**
- *  xgmac_release - close entry point of the driver
+ *  xgmac_stop - close entry point of the driver
  *  @dev : device pointer.
  *  Description:
  *  This is the stop entry point of the driver.
@@ -1812,7 +1812,7 @@ err_alloc:
 }
 
 /**
- * xgmac_dvr_remove
+ * xgmac_remove
  * @pdev: platform device pointer
  * Description: this function resets the TX/RX processes, disables the MAC RX/TX
  * changes the link status, releases the DMA descriptor rings,

From aeed744a49ba64a53095d6020e5533b9477fc7f4 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:20 +0800
Subject: [PATCH 0216/3292] net: chelsio: cxgb3: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/chelsio/cxgb3/sge.c:677: warning: expecting prototype for free_qset(). Prototype was for t3_free_qset() instead
 drivers/net/ethernet/chelsio/cxgb3/sge.c:1266: warning: expecting prototype for eth_xmit(). Prototype was for t3_eth_xmit() instead

Cc: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/sge.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 1cc3c51eff71..cec7308e2d5b 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -665,7 +665,7 @@ static void t3_reset_qset(struct sge_qset *q)
 
 
 /**
- *	free_qset - free the resources of an SGE queue set
+ *	t3_free_qset - free the resources of an SGE queue set
  *	@adapter: the adapter owning the queue set
  *	@q: the queue set
  *
@@ -1256,7 +1256,7 @@ static inline void t3_stop_tx_queue(struct netdev_queue *txq,
 }
 
 /**
- *	eth_xmit - add a packet to the Ethernet Tx queue
+ *	t3_eth_xmit - add a packet to the Ethernet Tx queue
  *	@skb: the packet
  *	@dev: the egress net device
  *

From e0333b1bb174e38db03943fb3138866bbec979bc Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:21 +0800
Subject: [PATCH 0217/3292] net: chelsio: cxgb4: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/chelsio/cxgb3/sge.c:677: warning: expecting prototype for free_qset(). Prototype was for t3_free_qset() instead
 drivers/net/ethernet/chelsio/cxgb3/sge.c:1266: warning: expecting prototype for eth_xmit(). Prototype was for t3_eth_xmit() instead

Cc: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 2 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
index 70dbee89118e..5bf117d2179f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c
@@ -446,7 +446,7 @@ void cxgb4_ptp_init(struct adapter *adapter)
 }
 
 /**
- * cxgb4_ptp_remove - disable PTP device and stop the overflow check
+ * cxgb4_ptp_stop - disable PTP device and stop the overflow check
  * @adapter: board private structure
  *
  * Stop the PTP support.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 9428ef1f04a8..ae3ad99fbd06 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -6983,7 +6983,7 @@ int t4_fw_bye(struct adapter *adap, unsigned int mbox)
 }
 
 /**
- *	t4_init_cmd - ask FW to initialize the device
+ *	t4_early_init - ask FW to initialize the device
  *	@adap: the adapter
  *	@mbox: mailbox to use for the FW command
  *
@@ -10224,7 +10224,7 @@ out:
 }
 
 /**
- *	t4_set_vf_mac - Set MAC address for the specified VF
+ *	t4_set_vf_mac_acl - Set MAC address for the specified VF
  *	@adapter: The adapter
  *	@vf: one of the VFs instantiated by the specified PF
  *	@naddr: the number of MAC addresses

From 1eb00ff517f40c8c170b32532b26f48da575fdb7 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:22 +0800
Subject: [PATCH 0218/3292] net: chelsio: cxgb4vf: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/chelsio/cxgb4vf/sge.c:966: warning: expecting prototype for check_ring_tx_db(). Prototype was for ring_tx_db() instead

Cc: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 95657da0aa4b..7bc80eeb2c21 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -954,7 +954,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq,
 }
 
 /**
- *	check_ring_tx_db - check and potentially ring a TX queue's doorbell
+ *	ring_tx_db - check and potentially ring a TX queue's doorbell
  *	@adapter: the adapter
  *	@tq: the TX queue
  *	@n: number of new descriptors to give to HW

From d6174870c0f1f1049bd4ae1971527fd5901fedf9 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:23 +0800
Subject: [PATCH 0219/3292] net: huawei: hinic: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c:604: warning: expecting prototype for cmdq_arm_ceq_handler(). Prototype was for cmdq_sync_cmd_handler() instead
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c:59: warning: expecting prototype for get_capability(). Prototype was for parse_capability() instead
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c:101: warning: expecting prototype for get_cap_from_fw(). Prototype was for get_capability() instead
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c:355: warning: expecting prototype for clear_io_resource(). Prototype was for clear_io_resources() instead
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c:1100: warning: expecting prototype for hinic_hwdev_get_sq(). Prototype was for hinic_hwdev_get_rq() instead
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c:341: warning: expecting prototype for dma_attr_table_init(). Prototype was for dma_attr_init() instead
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c:904: warning: expecting prototype for hinic_put_wqe(). Prototype was for hinic_rq_put_wqe() instead
 drivers/net/ethernet/huawei/hinic/hinic_main.c:241: warning: expecting prototype for create_txqs(). Prototype was for create_rxqs() instead
 drivers/net/ethernet/huawei/hinic/hinic_main.c:295: warning: expecting prototype for free_txqs(). Prototype was for free_rxqs() instead
 drivers/net/ethernet/huawei/hinic/hinic_tx.c:667: warning: expecting prototype for free_all_rx_skbs(). Prototype was for free_all_tx_skbs() instead

Cc: Bin Luo <luobin9@huawei.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  | 8 ++++----
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c   | 2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c   | 2 +-
 drivers/net/ethernet/huawei/hinic/hinic_main.c    | 4 ++--
 drivers/net/ethernet/huawei/hinic/hinic_tx.c      | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 577cb2cffff2..307a6d4af993 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -594,7 +594,7 @@ static void cmdq_update_errcode(struct hinic_cmdq *cmdq, u16 prod_idx,
 }
 
 /**
- * cmdq_arm_ceq_handler - cmdq completion event handler for sync command
+ * cmdq_sync_cmd_handler - cmdq completion event handler for sync command
  * @cmdq: the cmdq of the command
  * @cons_idx: the consumer index to update the error code for
  * @errcode: the error code
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 19a91c0223a7..428108eb10d2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -48,7 +48,7 @@ enum io_status {
 };
 
 /**
- * get_capability - convert device capabilities to NIC capabilities
+ * parse_capability - convert device capabilities to NIC capabilities
  * @hwdev: the HW device to set and convert device capabilities for
  * @dev_cap: device capabilities from FW
  *
@@ -92,7 +92,7 @@ static int parse_capability(struct hinic_hwdev *hwdev,
 }
 
 /**
- * get_cap_from_fw - get device capabilities from FW
+ * get_capability - get device capabilities from FW
  * @pfhwdev: the PF HW device to get capabilities for
  *
  * Return 0 - Success, negative - Failure
@@ -346,7 +346,7 @@ static int wait_for_db_state(struct hinic_hwdev *hwdev)
 }
 
 /**
- * clear_io_resource - set the IO resources as not active in the NIC
+ * clear_io_resources - set the IO resources as not active in the NIC
  * @hwdev: the NIC HW device
  *
  * Return 0 - Success, negative - Failure
@@ -1090,7 +1090,7 @@ struct hinic_sq *hinic_hwdev_get_sq(struct hinic_hwdev *hwdev, int i)
 }
 
 /**
- * hinic_hwdev_get_sq - get RQ
+ * hinic_hwdev_get_rq - get RQ
  * @hwdev: the NIC HW device
  * @i: the position of the RQ
  *
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index 55b327eebe64..0428faa68e80 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -334,7 +334,7 @@ static void set_dma_attr(struct hinic_hwif *hwif, u32 entry_idx,
 }
 
 /**
- * dma_attr_table_init - initialize the default dma attributes
+ * dma_attr_init - initialize the default dma attributes
  * @hwif: the HW interface of a pci function device
  **/
 static void dma_attr_init(struct hinic_hwif *hwif)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index dcba4d009bad..336248aa2e48 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -894,7 +894,7 @@ struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
 }
 
 /**
- * hinic_put_wqe - release the ci for new wqes
+ * hinic_rq_put_wqe - release the ci for new wqes
  * @rq: recv queue
  * @cons_idx: consumer index of the wqe
  * @wqe_size: the size of the wqe
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 1da5997f034c..405ee4d2d2b1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -232,7 +232,7 @@ static void free_txqs(struct hinic_dev *nic_dev)
 }
 
 /**
- * create_txqs - Create the Logical Rx Queues of specific NIC device
+ * create_rxqs - Create the Logical Rx Queues of specific NIC device
  * @nic_dev: the specific NIC device
  *
  * Return 0 - Success, negative - Failure
@@ -288,7 +288,7 @@ err_init_rxq:
 }
 
 /**
- * free_txqs - Free the Logical Rx Queues of specific NIC device
+ * free_rxqs - Free the Logical Rx Queues of specific NIC device
  * @nic_dev: the specific NIC device
  **/
 static void free_rxqs(struct hinic_dev *nic_dev)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 7bd414aed6ff..c5bdb0d374ef 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -660,7 +660,7 @@ static void tx_free_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
 }
 
 /**
- * free_all_rx_skbs - free all skbs in tx queue
+ * free_all_tx_skbs - free all skbs in tx queue
  * @txq: tx queue
  **/
 static void free_all_tx_skbs(struct hinic_txq *txq)

From 229fd41f64474e3ca739f4128983f989f928bb42 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:24 +0800
Subject: [PATCH 0220/3292] net: micrel: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/micrel/ksz884x.c:2163: warning: expecting prototype for sw_get_board_storm(). Prototype was for sw_get_broad_storm() instead
 drivers/net/ethernet/micrel/ksz884x.c:2985: warning: expecting prototype for port_w_phy(). Prototype was for hw_w_phy() instead
 drivers/net/ethernet/micrel/ksz884x.c:4792: warning: expecting prototype for transmit_done(). Prototype was for tx_done() instead

Cc: Tristram Ha <Tristram.Ha@micrel.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ksz884x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 9ed264ed7070..3532bfe936f6 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -2153,7 +2153,7 @@ static void sw_cfg_broad_storm(struct ksz_hw *hw, u8 percent)
 }
 
 /**
- * sw_get_board_storm - get broadcast storm threshold
+ * sw_get_broad_storm - get broadcast storm threshold
  * @hw: 	The hardware instance.
  * @percent:	Buffer to store the broadcast storm threshold percentage.
  *
@@ -2973,7 +2973,7 @@ static void hw_r_phy(struct ksz_hw *hw, int port, u16 reg, u16 *val)
 }
 
 /**
- * port_w_phy - write data to PHY register
+ * hw_w_phy - write data to PHY register
  * @hw: 	The hardware instance.
  * @port:	Port to write.
  * @reg:	PHY register to write.
@@ -4782,7 +4782,7 @@ static void transmit_cleanup(struct dev_info *hw_priv, int normal)
 }
 
 /**
- * transmit_done - transmit done processing
+ * tx_done - transmit done processing
  * @hw_priv:	Network device.
  *
  * This routine is called when the transmit interrupt is triggered, indicating

From 331a3219d3b6c23988289eb29cf292d3006cd424 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:25 +0800
Subject: [PATCH 0221/3292] net: microchip: Demote non-compliant kernel-doc
 headers

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/microchip/encx24j600.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Cc: Jon Ringle <jringle@gridpoint.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/encx24j600.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 3658c4ae3c37..ee921a99e439 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
  * Microchip ENCX24J600 ethernet driver
  *
  * Copyright (C) 2015 Gridpoint

From dc432f5acb8692c3e62bd4d9ab513187a56483ff Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:26 +0800
Subject: [PATCH 0222/3292] net: neterion: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/neterion/s2io.c:2759: warning: expecting prototype for s2io_poll(). Prototype was for s2io_poll_msix() instead
 drivers/net/ethernet/neterion/s2io.c:5304: warning: expecting prototype for s2io_ethtol_get_link_ksettings(). Prototype was for s2io_ethtool_get_link_ksettings() instead

Cc: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/neterion/s2io.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 9cfcd5500462..27a65ab3d501 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -2743,7 +2743,7 @@ static int s2io_chk_rx_buffers(struct s2io_nic *nic, struct ring_info *ring)
 }
 
 /**
- * s2io_poll - Rx interrupt handler for NAPI support
+ * s2io_poll_msix - Rx interrupt handler for NAPI support
  * @napi : pointer to the napi structure.
  * @budget : The number of packets that were budgeted to be processed
  * during  one pass through the 'Poll" function.
@@ -5288,7 +5288,7 @@ s2io_ethtool_set_link_ksettings(struct net_device *dev,
 }
 
 /**
- * s2io_ethtol_get_link_ksettings - Return link specific information.
+ * s2io_ethtool_get_link_ksettings - Return link specific information.
  * @dev: pointer to netdev
  * @cmd : pointer to the structure with parameters given by ethtool
  * to return link information.

From 146c91e2bc9a11e6091dce31caacf004dd9c7443 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:27 +0800
Subject: [PATCH 0223/3292] net: neterion: vxge: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/neterion/vxge/vxge-config.c:4895: warning: expecting prototype for vxge_hw_vpath_rx_doorbell_post(). Prototype was for vxge_hw_vpath_rx_doorbell_init() instead
 drivers/net/ethernet/neterion/vxge/vxge-main.c:1814: warning: expecting prototype for vxge_poll(). Prototype was for vxge_poll_msix() instead
 drivers/net/ethernet/neterion/vxge/vxge-main.c:4761: warning: expecting prototype for vxge_rem_nic(). Prototype was for vxge_remove() instead

Cc: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/neterion/vxge/vxge-config.c | 2 +-
 drivers/net/ethernet/neterion/vxge/vxge-main.c   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c
index 5162b938a1ac..38a273c4d593 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c
@@ -4884,7 +4884,7 @@ vpath_open_exit1:
 }
 
 /**
- * vxge_hw_vpath_rx_doorbell_post - Close the handle got from previous vpath
+ * vxge_hw_vpath_rx_doorbell_init - Close the handle got from previous vpath
  * (vpath) open
  * @vp: Handle got from previous vpath open
  *
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 87892bd992b1..b113c158d6e3 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -1799,7 +1799,7 @@ static void vxge_reset(struct work_struct *work)
 }
 
 /**
- * vxge_poll - Receive handler when Receive Polling is used.
+ * vxge_poll_msix - Receive handler when Receive Polling is used.
  * @napi: pointer to the napi structure.
  * @budget: Number of packets budgeted to be processed in this iteration.
  *
@@ -4752,7 +4752,7 @@ _exit0:
 }
 
 /**
- * vxge_rem_nic - Free the PCI device
+ * vxge_remove - Free the PCI device
  * @pdev: structure containing the PCI related information of the device.
  * Description: This function is called by the Pci subsystem to release a
  * PCI device and free up all resource held up by the device.

From a507b16445240fc655721debd6beeac43f020bcb Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:28 +0800
Subject: [PATCH 0224/3292] net: netronome: nfp: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/netronome/nfp/ccm_mbox.c:52: warning: expecting prototype for struct nfp_ccm_mbox_skb_cb. Prototype was for struct nfp_ccm_mbox_cmsg_cb instead
 drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c:35: warning: expecting prototype for struct nfp_tun_pre_run_rule. Prototype was for struct nfp_tun_pre_tun_rule instead
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c:38: warning: expecting prototype for NFFW_INFO_VERSION history(). Prototype was for NFFW_INFO_VERSION_CURRENT() instead

Cc: Simon Horman <simon.horman@netronome.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/ccm_mbox.c           | 2 +-
 drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 2 +-
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
index f0783aa9e66e..4247bca09807 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
+++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c
@@ -36,7 +36,7 @@ enum nfp_net_mbox_cmsg_state {
 };
 
 /**
- * struct nfp_ccm_mbox_skb_cb - CCM mailbox specific info
+ * struct nfp_ccm_mbox_cmsg_cb - CCM mailbox specific info
  * @state:	processing state (/stage) of the message
  * @err:	error encountered during processing if any
  * @max_len:	max(request_len, reply_len)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index d19c02e99114..ab70179728f6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -21,7 +21,7 @@
 #define NFP_TUN_PRE_TUN_IPV6_BIT	BIT(7)
 
 /**
- * struct nfp_tun_pre_run_rule - rule matched before decap
+ * struct nfp_tun_pre_tun_rule - rule matched before decap
  * @flags:		options for the rule offset
  * @port_idx:		index of destination MAC address for the rule
  * @vlan_tci:		VLAN info associated with MAC
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
index d4e02542e2e9..e2e5fd003ad6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
@@ -24,7 +24,7 @@
 
 #define NFFW_FWID_ALL   255
 
-/**
+/*
  * NFFW_INFO_VERSION history:
  * 0: This was never actually used (before versioning), but it refers to
  *    the previous struct which had FWINFO_CNT = MEINFO_CNT = 120 that later

From 9f2e6fb6341349f1ec4565a055669bf4ac14e321 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:29 +0800
Subject: [PATCH 0225/3292] net: calxeda: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c:761: warning: expecting prototype for qlcnic_83xx_idc_cold_state(). Prototype was for qlcnic_83xx_idc_cold_state_handler() instead
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c:192: warning: expecting prototype for qlcnic_83xx_vnic_opmode(). Prototype was for qlcnic_83xx_config_vnic_opmode() instead

Cc: Shahed Shaikh <shshaikh@marvell.com>
Cc: Manish Chopra <manishc@marvell.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index d2c190732d3e..0a2f34fc8b24 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -746,7 +746,7 @@ static int qlcnic_83xx_idc_unknown_state(struct qlcnic_adapter *adapter)
 }
 
 /**
- * qlcnic_83xx_idc_cold_state
+ * qlcnic_83xx_idc_cold_state_handler
  *
  * @adapter: adapter structure
  *
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c
index c4297aea7d15..711609503ba6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c
@@ -180,7 +180,7 @@ static int qlcnic_83xx_init_non_privileged_vnic(struct qlcnic_adapter *adapter)
 }
 
 /**
- * qlcnic_83xx_vnic_opmode
+ * qlcnic_83xx_config_vnic_opmode
  *
  * @adapter: adapter structure
  * Identify virtual NIC operational modes.

From 61633d71a71c4bcc27f43c9020fcb15137de6dd5 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:30 +0800
Subject: [PATCH 0226/3292] net: samsung: sxgbe: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c:797: warning: expecting prototype for sxgbe_tx_clean(). Prototype was for sxgbe_tx_all_clean() instead
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c:1026: warning: expecting prototype for sxgbe_init_tx_coalesce(). Prototype was for sxgbe_tx_init_coalesce() instead

Cc: Byungho An <bh74.an@samsung.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 971f1e54b652..090bcd2fb758 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -789,7 +789,7 @@ static void sxgbe_tx_queue_clean(struct sxgbe_tx_queue *tqueue)
 }
 
 /**
- * sxgbe_tx_clean:
+ * sxgbe_tx_all_clean:
  * @priv: driver private structure
  * Description: it reclaims resources after transmission completes.
  */
@@ -1015,7 +1015,7 @@ static void sxgbe_tx_timer(struct timer_list *t)
 }
 
 /**
- * sxgbe_init_tx_coalesce: init tx mitigation options.
+ * sxgbe_tx_init_coalesce: init tx mitigation options.
  * @priv: driver private structure
  * Description:
  * This inits the transmit coalesce parameters: i.e. timer rate,

From 40d9fca8b3fd7c4617c2af064d7ec8124cd79c75 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:31 +0800
Subject: [PATCH 0227/3292] net: socionext: Demote non-compliant kernel-doc
 headers

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/socionext/sni_ave.c:28: warning: expecting prototype for sni_ave.c(). Prototype was for AVE_IDR() instead

Signed-off-by: Yang Shen <shenyang39@huawei.com>
Reviewed-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/socionext/sni_ave.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index fcbb4bb31408..5eb6bb4f7b6c 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * sni_ave.c - Socionext UniPhier AVE ethernet driver
  * Copyright 2014 Panasonic Corporation
  * Copyright 2015-2017 Socionext Inc.

From 85ead77dc3d58adcca7b74afa02c1b2083e4c2ac Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:32 +0800
Subject: [PATCH 0228/3292] net: ti: Fix wrong struct name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/ti/cpsw_ale.c:88: warning: expecting prototype for struct ale_dev_id. Prototype was for struct cpsw_ale_dev_id instead

Cc: Cyril Chemparathy <cyril@ti.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw_ale.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index d828f856237a..0c75e0576ee1 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -70,7 +70,7 @@ enum {
 };
 
 /**
- * struct ale_dev_id - The ALE version/SoC specific configuration
+ * struct cpsw_ale_dev_id - The ALE version/SoC specific configuration
  * @dev_id: ALE version/SoC id
  * @features: features supported by ALE
  * @tbl_entries: number of ALE entries

From 03055a25213b910d8c74808e728c737638392dff Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:33 +0800
Subject: [PATCH 0229/3292] net: via: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/via/via-velocity.c:1908: warning: expecting prototype for tx_srv(). Prototype was for velocity_tx_srv() instead
 drivers/net/ethernet/via/via-velocity.c:2466: warning: expecting prototype for velocity_get_status(). Prototype was for velocity_get_stats() instead
 drivers/net/ethernet/via/via-velocity.c:3734: warning: expecting prototype for velocity_cleanup(). Prototype was for velocity_cleanup_module() instead

Cc: Francois Romieu <romieu@fr.zoreil.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/via-velocity.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index fecc4d7b00b0..88426b5e410b 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -1897,7 +1897,7 @@ static void velocity_error(struct velocity_info *vptr, int status)
 }
 
 /**
- *	tx_srv		-	transmit interrupt service
+ *	velocity_tx_srv		-	transmit interrupt service
  *	@vptr: Velocity
  *
  *	Scan the queues looking for transmitted packets that
@@ -2453,7 +2453,7 @@ static int velocity_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 
 /**
- *	velocity_get_status	-	statistics callback
+ *	velocity_get_stats	-	statistics callback
  *	@dev: network device
  *
  *	Callback from the network layer to allow driver statistics
@@ -3723,7 +3723,7 @@ static int __init velocity_init_module(void)
 }
 
 /**
- *	velocity_cleanup	-	module unload
+ *	velocity_cleanup_module		-	module unload
  *
  *	When the velocity hardware is unloaded this function is called.
  *	It will clean up the notifiers and the unregister the PCI

From 1f2d109e8363ef6c6df990fb29afcf44a6e3c5a5 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:34 +0800
Subject: [PATCH 0230/3292] net: phy: Demote non-compliant kernel-doc headers

Fixes the following W=1 kernel build warning(s):

 drivers/net/phy/adin.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/phy/rockchip.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/adin.c     | 2 +-
 drivers/net/phy/rockchip.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/adin.c b/drivers/net/phy/adin.c
index 55a0b91816e2..5ce6da62cc8e 100644
--- a/drivers/net/phy/adin.c
+++ b/drivers/net/phy/adin.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0+
-/**
+/*
  *  Driver for Analog Devices Industrial Ethernet PHYs
  *
  * Copyright 2019 Analog Devices Inc.
diff --git a/drivers/net/phy/rockchip.c b/drivers/net/phy/rockchip.c
index 52f1f65320fe..bb13e75183ee 100644
--- a/drivers/net/phy/rockchip.c
+++ b/drivers/net/phy/rockchip.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0+
-/**
+/*
  * drivers/net/phy/rockchip.c
  *
  * Driver for ROCKCHIP Ethernet PHYs

From 5a9594cf1d143a713a148aaa62b64e6a6de97fb3 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 12:45:35 +0800
Subject: [PATCH 0231/3292] net: hisilicon: hns: Fix wrong function name in
 comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c:121: warning: expecting prototype for hns_mac_is_adjust_link(). Prototype was for hns_mac_need_adjust_link() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c:386: warning: expecting prototype for hns_mac_queue_config_bc_en(). Prototype was for hns_mac_port_config_bc_en() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c:607: warning: expecting prototype for hns_mac_set_autoneg(). Prototype was for hns_mac_set_pauseparam() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:236: warning: expecting prototype for hns_ppe_qid_cfg(). Prototype was for hns_dsaf_ppe_qid_cfg() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:623: warning: expecting prototype for dsaf_tbl_tcam_mcast_cfg(). Prototype was for hns_dsaf_tbl_tcam_mcast_cfg() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:1220: warning: expecting prototype for hns_dsaf_tbl_tcam_init(). Prototype was for hns_dsaf_comm_init() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:2121: warning: expecting prototype for dsaf_pfc_unit_cnt(). Prototype was for hns_dsaf_pfc_unit_cnt() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:2153: warning: expecting prototype for dsaf_port_work_rate_cfg(). Prototype was for hns_dsaf_port_work_rate_cfg() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:2745: warning: expecting prototype for hns_dsaf_get_sset_count(). Prototype was for hns_dsaf_get_regs_count() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:2957: warning: expecting prototype for dsaf_probe(). Prototype was for hns_dsaf_probe() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c:3011: warning: expecting prototype for dsaf_remove(). Prototype was for hns_dsaf_remove() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c:366: warning: expecting prototype for hns_dsaf_srst_chns(). Prototype was for hns_dsaf_srst_chns_acpi() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c:509: warning: expecting prototype for hns_mac_get_sds_mode(). Prototype was for hns_mac_get_phy_if() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c:303: warning: expecting prototype for ppe_init_hw(). Prototype was for hns_ppe_init_hw() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c:350: warning: expecting prototype for ppe_uninit_hw(). Prototype was for hns_ppe_uninit_hw() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c:391: warning: expecting prototype for hns_ppe_reset(). Prototype was for hns_ppe_reset_common() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c:464: warning: expecting prototype for ppe_get_strings(). Prototype was for hns_ppe_get_strings() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c:920: warning: expecting prototype for rcb_get_sset_count(). Prototype was for hns_rcb_get_ring_regs_count() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c:112: warning: expecting prototype for hns_xgmac_tx_lf_rf_insert(). Prototype was for hns_xgmac_lf_rf_insert() instead
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c:122: warning: expecting prototype for hns_xgmac__lf_rf_control_init(). Prototype was for hns_xgmac_lf_rf_control_init() instead
 drivers/net/ethernet/hisilicon/hns/hns_enet.c:777: warning: expecting prototype for hns_nic_adp_coalesce(). Prototype was for hns_nic_adpt_coalesce() instead
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:202: warning: expecting prototype for hns_nic_set_link_settings(). Prototype was for hns_nic_set_link_ksettings() instead
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:837: warning: expecting prototype for get_ethtool_stats(). Prototype was for hns_get_ethtool_stats() instead
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:894: warning: expecting prototype for get_strings(). Prototype was for hns_get_strings() instead

Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
Cc: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c    |  6 +++---
 .../net/ethernet/hisilicon/hns/hns_dsaf_main.c   | 16 ++++++++--------
 .../net/ethernet/hisilicon/hns/hns_dsaf_misc.c   |  4 ++--
 .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.c    |  8 ++++----
 .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.c    |  2 +-
 .../net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c  |  4 ++--
 drivers/net/ethernet/hisilicon/hns/hns_enet.c    |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c |  6 +++---
 8 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index de6f051f5b0b..f41379de2186 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -111,7 +111,7 @@ int hns_mac_get_port_info(struct hns_mac_cb *mac_cb,
 }
 
 /**
- *hns_mac_is_adjust_link - check is need change mac speed and duplex register
+ *hns_mac_need_adjust_link - check is need change mac speed and duplex register
  *@mac_cb: mac device
  *@speed: phy device speed
  *@duplex:phy device duplex
@@ -374,7 +374,7 @@ static void hns_mac_param_get(struct mac_params *param,
 }
 
 /**
- * hns_mac_queue_config_bc_en - set broadcast rx&tx enable
+ * hns_mac_port_config_bc_en - set broadcast rx&tx enable
  * @mac_cb: mac device
  * @port_num: queue number
  * @vlan_id: vlan id`
@@ -597,7 +597,7 @@ int hns_mac_set_autoneg(struct hns_mac_cb *mac_cb, u8 enable)
 }
 
 /**
- * hns_mac_set_autoneg - set rx & tx pause parameter
+ * hns_mac_set_pauseparam - set rx & tx pause parameter
  * @mac_cb: mac control block
  * @rx_en: rx enable or not
  * @tx_en: tx enable or not
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index c2a60612f503..fcaf5132b865 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -227,7 +227,7 @@ hns_dsaf_reg_cnt_clr_ce(struct dsaf_device *dsaf_dev, u32 reg_cnt_clr_ce)
 }
 
 /**
- * hns_ppe_qid_cfg - config ppe qid
+ * hns_dsaf_ppe_qid_cfg - config ppe qid
  * @dsaf_dev: dsa fabric id
  * @qid_cfg: value array
  */
@@ -613,7 +613,7 @@ static void hns_dsaf_tbl_tcam_data_cfg(
 }
 
 /**
- * dsaf_tbl_tcam_mcast_cfg - tbl
+ * hns_dsaf_tbl_tcam_mcast_cfg - tbl
  * @dsaf_dev: dsa fabric id
  * @mcast: addr
  */
@@ -1213,7 +1213,7 @@ void hns_dsaf_get_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id,
 }
 
 /**
- * hns_dsaf_tbl_tcam_init - INT
+ * hns_dsaf_comm_init - INT
  * @dsaf_dev: dsa fabric id
  */
 static void hns_dsaf_comm_init(struct dsaf_device *dsaf_dev)
@@ -2111,7 +2111,7 @@ static void hns_dsaf_free_dev(struct dsaf_device *dsaf_dev)
 }
 
 /**
- * dsaf_pfc_unit_cnt - set pfc unit count
+ * hns_dsaf_pfc_unit_cnt - set pfc unit count
  * @dsaf_dev: dsa fabric id
  * @mac_id: id in use
  * @rate:  value array
@@ -2142,7 +2142,7 @@ static void hns_dsaf_pfc_unit_cnt(struct dsaf_device *dsaf_dev, int  mac_id,
 }
 
 /**
- * dsaf_port_work_rate_cfg - fifo
+ * hns_dsaf_port_work_rate_cfg - fifo
  * @dsaf_dev: dsa fabric id
  * @mac_id: mac contrl block
  * @rate_mode: value array
@@ -2738,7 +2738,7 @@ void hns_dsaf_get_strings(int stringset, u8 *data, int port,
 }
 
 /**
- *hns_dsaf_get_sset_count - get dsaf regs count
+ *hns_dsaf_get_regs_count - get dsaf regs count
  *return dsaf regs count
  */
 int hns_dsaf_get_regs_count(void)
@@ -2949,7 +2949,7 @@ int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port)
 }
 
 /**
- * dsaf_probe - probo dsaf dev
+ * hns_dsaf_probe - probo dsaf dev
  * @pdev: dasf platform device
  * return 0 - success , negative --fail
  */
@@ -3004,7 +3004,7 @@ free_dev:
 }
 
 /**
- * dsaf_remove - remove dsaf dev
+ * hns_dsaf_remove - remove dsaf dev
  * @pdev: dasf platform device
  */
 static int hns_dsaf_remove(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 1eaac89d60b7..23d9cbf262c3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -351,7 +351,7 @@ hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset)
 }
 
 /**
- * hns_dsaf_srst_chns - reset dsaf channels
+ * hns_dsaf_srst_chns_acpi - reset dsaf channels
  * @dsaf_dev: dsaf device struct pointer
  * @msk: xbar channels mask value:
  * @dereset: false - request reset , true - drop reset
@@ -501,7 +501,7 @@ static void hns_ppe_com_srst(struct dsaf_device *dsaf_dev, bool dereset)
 }
 
 /**
- * hns_mac_get_sds_mode - get phy ifterface form serdes mode
+ * hns_mac_get_phy_if - get phy ifterface form serdes mode
  * @mac_cb: mac control block
  * retuen phy interface
  */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index ff03cafccb66..a7eb87da4e70 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -296,7 +296,7 @@ int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb)
 }
 
 /**
- * ppe_init_hw - init ppe
+ * hns_ppe_init_hw - init ppe
  * @ppe_cb: ppe device
  */
 static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
@@ -343,7 +343,7 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 }
 
 /**
- * ppe_uninit_hw - uninit ppe
+ * hns_ppe_uninit_hw - uninit ppe
  * @ppe_cb: ppe device
  */
 static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb)
@@ -382,7 +382,7 @@ void hns_ppe_uninit(struct dsaf_device *dsaf_dev)
 }
 
 /**
- * hns_ppe_reset - reinit ppe/rcb hw
+ * hns_ppe_reset_common - reinit ppe/rcb hw
  * @dsaf_dev: dasf device
  * @ppe_common_index: the index
  * return void
@@ -455,7 +455,7 @@ int hns_ppe_get_regs_count(void)
 }
 
 /**
- * ppe_get_strings - get ppe srting
+ * hns_ppe_get_strings - get ppe srting
  * @ppe_cb: ppe device
  * @stringset: string set type
  * @data: output string
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 5d5dc6942232..e2ff3ca198d1 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -913,7 +913,7 @@ int hns_rcb_get_common_regs_count(void)
 }
 
 /**
- *rcb_get_sset_count - rcb ring regs count
+ *hns_rcb_get_ring_regs_count - rcb ring regs count
  *return regs count
  */
 int hns_rcb_get_ring_regs_count(void)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index be52acd448f9..401fef5f1d07 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -104,7 +104,7 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value)
 }
 
 /**
- * hns_xgmac_tx_lf_rf_insert - insert lf rf control about xgmac
+ * hns_xgmac_lf_rf_insert - insert lf rf control about xgmac
  * @mac_drv: mac driver
  * @mode: inserf rf or lf
  */
@@ -115,7 +115,7 @@ static void hns_xgmac_lf_rf_insert(struct mac_driver *mac_drv, u32 mode)
 }
 
 /**
- * hns_xgmac__lf_rf_control_init - initial the lf rf control register
+ * hns_xgmac_lf_rf_control_init - initial the lf rf control register
  * @mac_drv: mac driver
  */
 static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 5e349c0bdecc..ad534f9e41ab 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -770,7 +770,7 @@ static u32 smooth_alg(u32 new_param, u32 old_param)
 }
 
 /**
- * hns_nic_adp_coalesce - self adapte coalesce according to rx rate
+ * hns_nic_adpt_coalesce - self adapte coalesce according to rx rate
  * @ring_data: pointer to hns_nic_ring_data
  **/
 static void hns_nic_adpt_coalesce(struct hns_nic_ring_data *ring_data)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index da48c05435ea..7e62dcff2426 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -192,7 +192,7 @@ static int hns_nic_get_link_ksettings(struct net_device *net_dev,
 }
 
 /**
- *hns_nic_set_link_settings - implement ethtool set link ksettings
+ *hns_nic_set_link_ksettings - implement ethtool set link ksettings
  *@net_dev: net_device
  *@cmd: ethtool_link_ksettings
  *retuen 0 - success , negative --fail
@@ -827,7 +827,7 @@ hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch)
 }
 
 /**
- * get_ethtool_stats - get detail statistics.
+ * hns_get_ethtool_stats - get detail statistics.
  * @netdev: net device
  * @stats: statistics info.
  * @data: statistics data.
@@ -885,7 +885,7 @@ static void hns_get_ethtool_stats(struct net_device *netdev,
 }
 
 /**
- * get_strings: Return a set of strings that describe the requested objects
+ * hns_get_strings: Return a set of strings that describe the requested objects
  * @netdev: net device
  * @stringset: string set ID.
  * @data: objects data.

From 5d01071e64b6ddca3f4c86154407307e482959b4 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Mon, 17 May 2021 20:03:51 +0300
Subject: [PATCH 0232/3292] selftests: mlxsw: Make the unsplit array global in
 port_scale test

Currently, the array of the ports that were split in the port_scale test
is local, so the port_cleanup() unsplits an empty array.

Make the array global so the cleanup will be preformed properly.

Suggested-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/port_scale.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
index 65f43a7ce9c9..1e9a4aff76a2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -7,6 +7,8 @@
 
 PORT_NUM_NETIFS=0
 
+declare -a unsplit
+
 port_setup_prepare()
 {
 	:
@@ -20,12 +22,12 @@ port_cleanup()
 		devlink port unsplit $port
 		check_err $? "Did not unsplit $netdev"
 	done
+	unsplit=()
 }
 
 split_all_ports()
 {
 	local should_fail=$1; shift
-	local -a unsplit
 
 	# Loop over the splittable netdevs and create tuples of netdev along
 	# with its width. For example:

From 16355c0b101e783d57c2e155ef543cb9111205a4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 20:03:52 +0300
Subject: [PATCH 0233/3292] selftests: mlxsw: Make sampling test more robust

The test sometimes fails with an error message such as:

TEST: tc sample (w/ flower) rate (egress)                           [FAIL]
	Expected 100 packets, got 70 packets, which is -30% off. Required accuracy is +-25%

Make the test more robust by generating more packets, therefore
increasing the number of expected samples. Decrease the transmission
delay in order not to needlessly prolong the test.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/drivers/net/mlxsw/tc_sample.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
index 093bed088ad0..373d5f2a846e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -234,15 +234,15 @@ __tc_sample_rate_test()
 
 	psample_capture_start
 
-	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
 		-B $dip -t udp dp=52768,sp=42768 -q
 
 	psample_capture_stop
 
 	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
-	pct=$((100 * (pkts - 100) / 100))
+	pct=$((100 * (pkts - 10000) / 10000))
 	(( -25 <= pct && pct <= 25))
-	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
 
 	log_test "tc sample rate ($desc)"
 
@@ -587,15 +587,15 @@ __tc_sample_acl_rate_test()
 
 	psample_capture_start
 
-	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
 		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
 
 	psample_capture_stop
 
 	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
-	pct=$((100 * (pkts - 100) / 100))
+	pct=$((100 * (pkts - 10000) / 10000))
 	(( -25 <= pct && pct <= 25))
-	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
 
 	# Setup a filter that should not match any packet and make sure packets
 	# are not sampled.

From 9a1cac062d3ee884d66dd4fc61ada224b587d40c Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 17 May 2021 20:03:53 +0300
Subject: [PATCH 0234/3292] selftests: mlxsw: qos_headroom: Convert to iproute2
 dcb

There is a dedicated tool for configuration of DCB in iproute2 now. Use it
in the selftest instead of mlnx_qos.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/qos_headroom.sh         | 69 ++++++++++---------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index 27de3d9ed08e..f4493ef9cca1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -29,37 +29,38 @@ cleanup()
 
 get_prio_pg()
 {
-	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
-		grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+	# Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number
+	# of buffer that priority X is mapped to.
+	dcb -j buffer show dev $swp |
+		jq -r '[.prio_buffer | .[] | tostring + " "] | add'
 }
 
 get_prio_pfc()
 {
-	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
-		grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+	# Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes
+	# whether priority X has PFC enabled (the value is 1) or disabled (0).
+	dcb -j pfc show dev $swp |
+		jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add'
 }
 
 get_prio_tc()
 {
-	__mlnx_qos -i $swp | sed -n '/^tc/,$p' |
-		awk '/^tc/ { TC = $2 }
-		     /priority:/ { PRIO[$2]=TC }
-		     END {
-			for (i in PRIO)
-			    printf("%d ", PRIO[i])
-		     }'
+	# Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number
+	# of TC that priority X is mapped to.
+	dcb -j ets show dev $swp |
+		jq -r '[.prio_tc | .[] | tostring + " "] | add'
 }
 
 get_buf_size()
 {
 	local idx=$1; shift
 
-	__mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+	dcb -j buffer show dev $swp | jq ".buffer_size[$idx]"
 }
 
 get_tot_size()
 {
-	__mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+	dcb -j buffer show dev $swp | jq '.total_size'
 }
 
 check_prio_pg()
@@ -121,18 +122,18 @@ test_dcb_ets()
 {
 	RET=0
 
-	__mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+	dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7
 
 	check_prio_pg "0 2 4 6 1 3 5 7 "
 	check_prio_tc "0 2 4 6 1 3 5 7 "
 	check_prio_pfc "0 0 0 0 0 0 0 0 "
 
-	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+	dcb ets set dev $swp prio-tc all:0
 
 	check_prio_pg "0 0 0 0 0 0 0 0 "
 	check_prio_tc "0 0 0 0 0 0 0 0 "
 
-	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+	dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null
 	check_fail $? "prio2buffer accepted in DCB mode"
 
 	log_test "Configuring headroom through ETS"
@@ -174,7 +175,7 @@ test_pfc()
 {
 	RET=0
 
-	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+	dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3
 
 	local buf0size=$(get_buf_size 0)
 	local buf1size=$(get_buf_size 1)
@@ -193,7 +194,7 @@ test_pfc()
 
 	RET=0
 
-	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+	dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0
 
 	check_prio_pg "0 0 0 0 0 1 2 3 "
 	check_prio_pfc "0 0 0 0 0 1 1 1 "
@@ -210,7 +211,7 @@ test_pfc()
 
 	RET=0
 
-	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+	dcb pfc set dev $swp delay 1000
 
 	check_buf_size 0 "== $buf0size"
 	check_buf_size 1 "> $buf1size"
@@ -221,8 +222,8 @@ test_pfc()
 
 	RET=0
 
-	__mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
-	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+	dcb pfc set dev $swp prio-pfc all:off delay 0
+	dcb ets set dev $swp prio-tc all:0
 
 	check_prio_pg "0 0 0 0 0 0 0 0 "
 	check_prio_tc "0 0 0 0 0 0 0 0 "
@@ -242,13 +243,13 @@ test_tc_priomap()
 {
 	RET=0
 
-	__mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+	dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
 	check_prio_pg "0 1 2 3 4 5 6 7 "
 
 	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
 	check_prio_pg "0 0 0 0 0 0 0 0 "
 
-	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+	dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6
 	check_prio_pg "1 3 5 7 0 2 4 6 "
 
 	tc qdisc delete dev $swp root
@@ -256,9 +257,9 @@ test_tc_priomap()
 
 	# Clean up.
 	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
-	__mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp prio-buffer all:0
 	tc qdisc delete dev $swp root
-	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+	dcb ets set dev $swp prio-tc all:0
 
 	log_test "TC: priomap"
 }
@@ -270,12 +271,12 @@ test_tc_sizes()
 
 	RET=0
 
-	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+	dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
 	check_fail $? "buffer_size should fail before qdisc is added"
 
 	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
 
-	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp buffer-size all:0 0:$size
 	check_err $? "buffer_size should pass after qdisc is added"
 	check_buf_size 0 "== $size" "set size: "
 
@@ -283,26 +284,26 @@ test_tc_sizes()
 	check_buf_size 0 "== $size" "set MTU: "
 	mtu_restore $swp
 
-	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp buffer-size all:0
 
 	# After replacing the qdisc for the same kind, buffer_size still has to
 	# work.
 	tc qdisc replace dev $swp root handle 1: bfifo limit 1M
 
-	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp buffer-size all:0 0:$size
 	check_buf_size 0 "== $size" "post replace, set size: "
 
-	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp buffer-size all:0
 
 	# Likewise after replacing for a different kind.
 	tc qdisc replace dev $swp root handle 2: prio bands 8
 
-	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp buffer-size all:0 0:$size
 	check_buf_size 0 "== $size" "post replace different kind, set size: "
 
 	tc qdisc delete dev $swp root
 
-	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+	dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
 	check_fail $? "buffer_size should fail after qdisc is deleted"
 
 	log_test "TC: buffer size"
@@ -363,10 +364,10 @@ test_tc_int_buf()
 	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
 	test_int_buf "TC: "
 
-	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp buffer-size all:0 0:$size
 	test_int_buf "TC+buffsize: "
 
-	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+	dcb buffer set dev $swp buffer-size all:0
 	tc qdisc delete dev $swp root
 }
 

From b0bab2298ec9b3a837f8ef4a0cae4b42a4d03365 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 17 May 2021 20:03:54 +0300
Subject: [PATCH 0235/3292] selftests: mlxsw: qos_pfc: Convert to iproute2 dcb

There is a dedicated tool for configuration of DCB in iproute2 now. Use it
in the selftest instead of mlnx_qos.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/qos_pfc.sh    | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5c7700212f75..5d5622fc2758 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -171,7 +171,7 @@ switch_create()
 	# assignment.
 	tc qdisc replace dev $swp1 root handle 1: \
 	   ets bands 8 strict 8 priomap 7 6
-	__mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+	dcb buffer set dev $swp1 prio-buffer all:0 1:1
 
 	# $swp2
 	# -----
@@ -209,8 +209,8 @@ switch_create()
 	# the lossless prio into a buffer of its own. Don't bother with buffer
 	# sizes though, there is not going to be any pressure in the "backward"
 	# direction.
-	__mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
-	__mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+	dcb buffer set dev $swp3 prio-buffer all:0 1:1
+	dcb pfc set dev $swp3 prio-pfc all:off 1:on
 
 	# $swp4
 	# -----
@@ -226,11 +226,11 @@ switch_create()
 	# Configure qdisc so that we can hand-tune headroom.
 	tc qdisc replace dev $swp4 root handle 1: \
 	   ets bands 8 strict 8 priomap 7 6
-	__mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
-	__mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+	dcb buffer set dev $swp4 prio-buffer all:0 1:1
+	dcb pfc set dev $swp4 prio-pfc all:off 1:on
 	# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
 	# is (-2*MTU) about 80K of delay provision.
-	__mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+	dcb buffer set dev $swp4 buffer-size all:0 1:$_100KB
 
 	# bridges
 	# -------
@@ -273,9 +273,9 @@ switch_destroy()
 	# $swp4
 	# -----
 
-	__mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
-	__mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
-	__mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+	dcb buffer set dev $swp4 buffer-size all:0
+	dcb pfc set dev $swp4 prio-pfc all:off
+	dcb buffer set dev $swp4 prio-buffer all:0
 	tc qdisc del dev $swp4 root
 
 	devlink_tc_bind_pool_th_restore $swp4 1 ingress
@@ -288,8 +288,8 @@ switch_destroy()
 	# $swp3
 	# -----
 
-	__mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
-	__mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+	dcb pfc set dev $swp3 prio-pfc all:off
+	dcb buffer set dev $swp3 prio-buffer all:0
 	tc qdisc del dev $swp3 root
 
 	devlink_tc_bind_pool_th_restore $swp3 1 egress
@@ -315,7 +315,7 @@ switch_destroy()
 	# $swp1
 	# -----
 
-	__mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+	dcb buffer set dev $swp1 prio-buffer all:0
 	tc qdisc del dev $swp1 root
 
 	devlink_tc_bind_pool_th_restore $swp1 1 ingress

From b4d786941b585ee0075decd531660852bea81479 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 17 May 2021 20:03:55 +0300
Subject: [PATCH 0236/3292] selftests: mlxsw: qos_lib: Drop __mlnx_qos

Now that the two users of this helper have been converted to iproute2 dcb,
it is not necessary anymore. Drop it.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/drivers/net/mlxsw/qos_lib.sh | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index 0bf76f13c030..faa51012cdac 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -82,17 +82,3 @@ bail_on_lldpad()
 		fi
 	fi
 }
-
-__mlnx_qos()
-{
-	local err
-
-	mlnx_qos "$@" 2>/dev/null
-	err=$?
-
-	if ((err)); then
-		echo "Error ($err) in mlnx_qos $@" >/dev/stderr
-	fi
-
-	return $err
-}

From ece5df874d3a80fcade92ca3b3877bd78dbb6116 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Mon, 17 May 2021 20:03:56 +0300
Subject: [PATCH 0237/3292] mlxsw: spectrum_buffers: Switch function arguments

In the call path:

mlxsw_sp_hdroom_bufs_reset_sizes()
    mlxsw_sp_hdroom_int_buf_size_get()
        ->int_buf_size_get()

The 'speed' and 'mtu' arguments were mistakenly switched twice. The two
bugs thus canceled each other.

Clean this up by switching the arguments in both call sites, so that
they are passed in the right order.

Found during manual code inspection.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 37ff29a1686e..9de160e740b2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -364,7 +364,7 @@ static u16 mlxsw_sp_hdroom_buf_delay_get(const struct mlxsw_sp *mlxsw_sp,
 
 static u32 mlxsw_sp_hdroom_int_buf_size_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed)
 {
-	u32 buffsize = mlxsw_sp->sb_ops->int_buf_size_get(speed, mtu);
+	u32 buffsize = mlxsw_sp->sb_ops->int_buf_size_get(mtu, speed);
 
 	return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1;
 }
@@ -388,8 +388,8 @@ void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port,
 	int i;
 
 	/* Internal buffer. */
-	reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, mlxsw_sp_port->max_speed,
-							 mlxsw_sp_port->max_mtu);
+	reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, mlxsw_sp_port->max_mtu,
+							 mlxsw_sp_port->max_speed);
 	reserve_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, reserve_cells);
 	hdroom->int_buf.reserve_cells = reserve_cells;
 

From 837ec05cfea08284c575e8e834777b107da5ff9d Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Mon, 17 May 2021 20:03:57 +0300
Subject: [PATCH 0238/3292] mlxsw: Verify the accessed index doesn't exceed the
 array length

There are few cases in which an array index queried from a fw register,
is accessed without any validation that it doesn't exceed the array
length.

Add a proper length validation, so accessing memory past the end of an
array will be forbidden.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/minimal.c            | 4 ++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c           | 5 +++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c       | 3 +++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c    | 3 +++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 4 ++++
 5 files changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index b34c44723f8b..68102726c6a7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -234,6 +234,7 @@ static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u8 local_port)
 static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u8 local_port,
 				   u8 *last_module)
 {
+	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core);
 	u8 module, width;
 	int err;
 
@@ -249,6 +250,9 @@ static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u8 local_port,
 	if (module == *last_module)
 		return 0;
 	*last_module = module;
+
+	if (WARN_ON_ONCE(module >= max_ports))
+		return -EINVAL;
 	mlxsw_m->module_to_port[module] = ++mlxsw_m->max_ports;
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bca0354482cb..88699e678544 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2125,9 +2125,14 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	enum mlxsw_reg_pude_oper_status status;
+	unsigned int max_ports;
 	u8 local_port;
 
+	max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
 	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
+
+	if (WARN_ON_ONCE(local_port >= max_ports))
+		return;
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port)
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index d6e9ecb14681..bfef65d1587c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -568,10 +568,13 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress,
 				 u8 domain_number, u16 sequence_id,
 				 u64 timestamp)
 {
+	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct mlxsw_sp1_ptp_key key;
 	u8 types;
 
+	if (WARN_ON_ONCE(local_port >= max_ports))
+		return;
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port)
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 41259c0004d1..99015dca86c9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2282,6 +2282,7 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 						   char *rauhtd_pl,
 						   int ent_index)
 {
+	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
 	struct net_device *dev;
 	struct neighbour *n;
 	__be32 dipn;
@@ -2290,6 +2291,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 
 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
 
+	if (WARN_ON_ONCE(rif >= max_rifs))
+		return;
 	if (!mlxsw_sp->router->rifs[rif]) {
 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index eeccd586e781..0cfba2986841 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2520,6 +2520,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
 					    char *sfn_pl, int rec_index,
 					    bool adding)
 {
+	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 	struct mlxsw_sp_bridge_device *bridge_device;
 	struct mlxsw_sp_bridge_port *bridge_port;
@@ -2532,6 +2533,9 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port);
+
+	if (WARN_ON_ONCE(local_port >= max_ports))
+		return;
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port) {
 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n");

From 8c2b58e65d0186af5f73c7b78e46b798166d7f68 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 20:03:58 +0300
Subject: [PATCH 0239/3292] mlxsw: core: Avoid unnecessary EMAD buffer copy

mlxsw_emad_transmit() takes care of sending EMAD transactions to the
device. Since these transactions can time out, the driver performs up to
5 retransmissions, each time copying the skb with the original request.

The data of the skb does not change throughout the process, so there is
no need to copy it each time. Instead, only the skb itself can be
copied. Therefore, use skb_clone() instead of skb_copy().

This reduces the latency of the function by about 16%.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 7e9a7cb31720..ad93e01b2cda 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -630,7 +630,7 @@ static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
 	struct sk_buff *skb;
 	int err;
 
-	skb = skb_copy(trans->tx_skb, GFP_KERNEL);
+	skb = skb_clone(trans->tx_skb, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 

From 51746a353b44f9e2635ebbe278c46b2d9303c9d9 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 20:03:59 +0300
Subject: [PATCH 0240/3292] mlxsw: spectrum_router: Avoid missing error code
 warning

Explicitly set the error code to zero before the goto statement to avoid
the following smatch warning:

drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c:3598 mlxsw_sp_nexthop_group_refresh() warn: missing error code 'err'

The warning is a false positive, but the change both suppresses the
warning and makes it clear to future readers that this is not an error
path.

The original report and discussion can be found here [1].

[1] https://lore.kernel.org/lkml/202105141823.Td2h3Mbi-lkp@intel.com/

Cc: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 99015dca86c9..ec2af77a126d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3844,8 +3844,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 	bool offload_change = false;
 	u32 adj_index;
 	bool old_adj_index_valid;
-	int i, err2, err = 0;
 	u32 old_adj_index;
+	int i, err2, err;
 
 	if (!nhgi->gateway)
 		return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
@@ -3875,11 +3875,13 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		return 0;
 	}
 	mlxsw_sp_nexthop_group_normalize(nhgi);
-	if (!nhgi->sum_norm_weight)
+	if (!nhgi->sum_norm_weight) {
 		/* No neigh of this group is connected so we just set
 		 * the trap and let everthing flow through kernel.
 		 */
+		err = 0;
 		goto set_trap;
+	}
 
 	ecmp_size = nhgi->sum_norm_weight;
 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);

From 9b43fbb8ce243603444780c0bbb962a047a35b7c Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Mon, 17 May 2021 20:04:00 +0300
Subject: [PATCH 0241/3292] mlxsw: Remove Mellanox SwitchIB ASIC support

Initial support for the Mellanox SwitchIB and SwitchIB-2 ASICs was added
in October 2016, but since then development of this driver stopped.
Therefore, the driver does not support any offloads and simply registers
devlink ports for its front panel ports, rendering it irrelevant for
deployment.

Given the driver is not used by any users and that there is no intention
of investing in its development, remove it from the kernel.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig   |  11 -
 drivers/net/ethernet/mellanox/mlxsw/Makefile  |   2 -
 drivers/net/ethernet/mellanox/mlxsw/pci.h     |   2 -
 .../net/ethernet/mellanox/mlxsw/switchib.c    | 595 ------------------
 4 files changed, 610 deletions(-)
 delete mode 100644 drivers/net/ethernet/mellanox/mlxsw/switchib.c

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index a619d90559f7..6509b5fab936 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -49,17 +49,6 @@ config MLXSW_I2C
 	  To compile this driver as a module, choose M here: the
 	  module will be called mlxsw_i2c.
 
-config MLXSW_SWITCHIB
-	tristate "Mellanox Technologies SwitchIB and SwitchIB-2 support"
-	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
-	default m
-	help
-	  This driver supports Mellanox Technologies SwitchIB and SwitchIB-2
-	  Infiniband Switch ASICs.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called mlxsw_switchib.
-
 config MLXSW_SWITCHX2
 	tristate "Mellanox Technologies SwitchX-2 support"
 	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index f545fd2c5896..b68e5ba323cc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -8,8 +8,6 @@ obj-$(CONFIG_MLXSW_PCI)		+= mlxsw_pci.o
 mlxsw_pci-objs			:= pci.o
 obj-$(CONFIG_MLXSW_I2C)		+= mlxsw_i2c.o
 mlxsw_i2c-objs			:= i2c.o
-obj-$(CONFIG_MLXSW_SWITCHIB)	+= mlxsw_switchib.o
-mlxsw_switchib-objs		:= switchib.o
 obj-$(CONFIG_MLXSW_SWITCHX2)	+= mlxsw_switchx2.o
 mlxsw_switchx2-objs		:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)	+= mlxsw_spectrum.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index 5b1323645a5d..b0702947d895 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -10,8 +10,6 @@
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM		0xcb84
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM2	0xcf6c
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM3	0xcf70
-#define PCI_DEVICE_ID_MELLANOX_SWITCHIB		0xcb20
-#define PCI_DEVICE_ID_MELLANOX_SWITCHIB2	0xcf08
 
 #if IS_ENABLED(CONFIG_MLXSW_PCI)
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
deleted file mode 100644
index 1e561132eb1e..000000000000
--- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c
+++ /dev/null
@@ -1,595 +0,0 @@
-// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
-/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/skbuff.h>
-#include <linux/if_vlan.h>
-#include <net/switchdev.h>
-
-#include "pci.h"
-#include "core.h"
-#include "reg.h"
-#include "port.h"
-#include "trap.h"
-#include "txheader.h"
-#include "ib.h"
-
-static const char mlxsw_sib_driver_name[] = "mlxsw_switchib";
-static const char mlxsw_sib2_driver_name[] = "mlxsw_switchib2";
-
-struct mlxsw_sib_port;
-
-struct mlxsw_sib {
-	struct mlxsw_sib_port **ports;
-	struct mlxsw_core *core;
-	const struct mlxsw_bus_info *bus_info;
-	u8 hw_id[ETH_ALEN];
-};
-
-struct mlxsw_sib_port {
-	struct mlxsw_sib *mlxsw_sib;
-	u8 local_port;
-	struct {
-		u8 module;
-	} mapping;
-};
-
-/* tx_v1_hdr_version
- * Tx header version.
- * Must be set to 1.
- */
-MLXSW_ITEM32(tx_v1, hdr, version, 0x00, 28, 4);
-
-/* tx_v1_hdr_ctl
- * Packet control type.
- * 0 - Ethernet control (e.g. EMADs, LACP)
- * 1 - Ethernet data
- */
-MLXSW_ITEM32(tx_v1, hdr, ctl, 0x00, 26, 2);
-
-/* tx_v1_hdr_proto
- * Packet protocol type. Must be set to 1 (Ethernet).
- */
-MLXSW_ITEM32(tx_v1, hdr, proto, 0x00, 21, 3);
-
-/* tx_v1_hdr_swid
- * Switch partition ID. Must be set to 0.
- */
-MLXSW_ITEM32(tx_v1, hdr, swid, 0x00, 12, 3);
-
-/* tx_v1_hdr_control_tclass
- * Indicates if the packet should use the control TClass and not one
- * of the data TClasses.
- */
-MLXSW_ITEM32(tx_v1, hdr, control_tclass, 0x00, 6, 1);
-
-/* tx_v1_hdr_port_mid
- * Destination local port for unicast packets.
- * Destination multicast ID for multicast packets.
- *
- * Control packets are directed to a specific egress port, while data
- * packets are transmitted through the CPU port (0) into the switch partition,
- * where forwarding rules are applied.
- */
-MLXSW_ITEM32(tx_v1, hdr, port_mid, 0x04, 16, 16);
-
-/* tx_v1_hdr_type
- * 0 - Data packets
- * 6 - Control packets
- */
-MLXSW_ITEM32(tx_v1, hdr, type, 0x0C, 0, 4);
-
-static void
-mlxsw_sib_tx_v1_hdr_construct(struct sk_buff *skb,
-			      const struct mlxsw_tx_info *tx_info)
-{
-	char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
-
-	memset(txhdr, 0, MLXSW_TXHDR_LEN);
-
-	mlxsw_tx_v1_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1);
-	mlxsw_tx_v1_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
-	mlxsw_tx_v1_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
-	mlxsw_tx_v1_hdr_swid_set(txhdr, 0);
-	mlxsw_tx_v1_hdr_control_tclass_set(txhdr, 1);
-	mlxsw_tx_v1_hdr_port_mid_set(txhdr, tx_info->local_port);
-	mlxsw_tx_v1_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
-}
-
-static int mlxsw_sib_hw_id_get(struct mlxsw_sib *mlxsw_sib)
-{
-	char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
-	int err;
-
-	err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(spad), spad_pl);
-	if (err)
-		return err;
-	mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sib->hw_id);
-	return 0;
-}
-
-static int
-mlxsw_sib_port_admin_status_set(struct mlxsw_sib_port *mlxsw_sib_port,
-				bool is_up)
-{
-	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
-	char paos_pl[MLXSW_REG_PAOS_LEN];
-
-	mlxsw_reg_paos_pack(paos_pl, mlxsw_sib_port->local_port,
-			    is_up ? MLXSW_PORT_ADMIN_STATUS_UP :
-			    MLXSW_PORT_ADMIN_STATUS_DOWN);
-	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(paos), paos_pl);
-}
-
-static int mlxsw_sib_port_mtu_set(struct mlxsw_sib_port *mlxsw_sib_port,
-				  u16 mtu)
-{
-	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
-	char pmtu_pl[MLXSW_REG_PMTU_LEN];
-	int max_mtu;
-	int err;
-
-	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, 0);
-	err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
-	if (err)
-		return err;
-	max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
-
-	if (mtu > max_mtu)
-		return -EINVAL;
-
-	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, mtu);
-	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
-}
-
-static int mlxsw_sib_port_set(struct mlxsw_sib_port *mlxsw_sib_port, u8 port)
-{
-	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
-	char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
-	int err;
-
-	mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sib_port->local_port);
-	mlxsw_reg_plib_ib_port_set(plib_pl, port);
-	err = mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(plib), plib_pl);
-	return err;
-}
-
-static int mlxsw_sib_port_swid_set(struct mlxsw_sib_port *mlxsw_sib_port,
-				   u8 swid)
-{
-	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
-	char pspa_pl[MLXSW_REG_PSPA_LEN];
-
-	mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sib_port->local_port);
-	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pspa), pspa_pl);
-}
-
-static int mlxsw_sib_port_module_info_get(struct mlxsw_sib *mlxsw_sib,
-					  u8 local_port, u8 *p_module,
-					  u8 *p_width)
-{
-	char pmlp_pl[MLXSW_REG_PMLP_LEN];
-	int err;
-
-	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
-	err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmlp), pmlp_pl);
-	if (err)
-		return err;
-	*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
-	*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-	return 0;
-}
-
-static int mlxsw_sib_port_speed_set(struct mlxsw_sib_port *mlxsw_sib_port,
-				    u16 speed, u16 width)
-{
-	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
-
-	mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sib_port->local_port, speed,
-			       width);
-	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(ptys), ptys_pl);
-}
-
-static bool mlxsw_sib_port_created(struct mlxsw_sib *mlxsw_sib, u8 local_port)
-{
-	return mlxsw_sib->ports[local_port] != NULL;
-}
-
-static int __mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
-				   u8 module, u8 width)
-{
-	struct mlxsw_sib_port *mlxsw_sib_port;
-	int err;
-
-	mlxsw_sib_port = kzalloc(sizeof(*mlxsw_sib_port), GFP_KERNEL);
-	if (!mlxsw_sib_port)
-		return -ENOMEM;
-	mlxsw_sib_port->mlxsw_sib = mlxsw_sib;
-	mlxsw_sib_port->local_port = local_port;
-	mlxsw_sib_port->mapping.module = module;
-
-	err = mlxsw_sib_port_swid_set(mlxsw_sib_port, 0);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set SWID\n",
-			mlxsw_sib_port->local_port);
-		goto err_port_swid_set;
-	}
-
-	/* Expose the IB port number as it's front panel name */
-	err = mlxsw_sib_port_set(mlxsw_sib_port, module + 1);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set IB port\n",
-			mlxsw_sib_port->local_port);
-		goto err_port_ib_set;
-	}
-
-	/* Supports all speeds from SDR to FDR (bitmask) and support bus width
-	 * of 1x, 2x and 4x (3 bits bitmask)
-	 */
-	err = mlxsw_sib_port_speed_set(mlxsw_sib_port,
-				       MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
-				       BIT(3) - 1);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set speed\n",
-			mlxsw_sib_port->local_port);
-		goto err_port_speed_set;
-	}
-
-	/* Change to the maximum MTU the device supports, the SMA will take
-	 * care of the active MTU
-	 */
-	err = mlxsw_sib_port_mtu_set(mlxsw_sib_port, MLXSW_IB_DEFAULT_MTU);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set MTU\n",
-			mlxsw_sib_port->local_port);
-		goto err_port_mtu_set;
-	}
-
-	err = mlxsw_sib_port_admin_status_set(mlxsw_sib_port, true);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
-			mlxsw_sib_port->local_port);
-		goto err_port_admin_set;
-	}
-
-	mlxsw_core_port_ib_set(mlxsw_sib->core, mlxsw_sib_port->local_port,
-			       mlxsw_sib_port);
-	mlxsw_sib->ports[local_port] = mlxsw_sib_port;
-	return 0;
-
-err_port_admin_set:
-err_port_mtu_set:
-err_port_speed_set:
-err_port_ib_set:
-	mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
-err_port_swid_set:
-	kfree(mlxsw_sib_port);
-	return err;
-}
-
-static int mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
-				 u8 module, u8 width)
-{
-	int err;
-
-	err = mlxsw_core_port_init(mlxsw_sib->core, local_port,
-				   module + 1, false, 0, false, 0,
-				   mlxsw_sib->hw_id, sizeof(mlxsw_sib->hw_id));
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to init core port\n",
-			local_port);
-		return err;
-	}
-	err = __mlxsw_sib_port_create(mlxsw_sib, local_port, module, width);
-	if (err)
-		goto err_port_create;
-
-	return 0;
-
-err_port_create:
-	mlxsw_core_port_fini(mlxsw_sib->core, local_port);
-	return err;
-}
-
-static void __mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
-{
-	struct mlxsw_sib_port *mlxsw_sib_port = mlxsw_sib->ports[local_port];
-
-	mlxsw_core_port_clear(mlxsw_sib->core, local_port, mlxsw_sib);
-	mlxsw_sib->ports[local_port] = NULL;
-	mlxsw_sib_port_admin_status_set(mlxsw_sib_port, false);
-	mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
-	kfree(mlxsw_sib_port);
-}
-
-static void mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
-{
-	__mlxsw_sib_port_remove(mlxsw_sib, local_port);
-	mlxsw_core_port_fini(mlxsw_sib->core, local_port);
-}
-
-static void mlxsw_sib_ports_remove(struct mlxsw_sib *mlxsw_sib)
-{
-	int i;
-
-	for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++)
-		if (mlxsw_sib_port_created(mlxsw_sib, i))
-			mlxsw_sib_port_remove(mlxsw_sib, i);
-	kfree(mlxsw_sib->ports);
-}
-
-static int mlxsw_sib_ports_create(struct mlxsw_sib *mlxsw_sib)
-{
-	size_t alloc_size;
-	u8 module, width;
-	int i;
-	int err;
-
-	alloc_size = sizeof(struct mlxsw_sib_port *) * MLXSW_PORT_MAX_IB_PORTS;
-	mlxsw_sib->ports = kzalloc(alloc_size, GFP_KERNEL);
-	if (!mlxsw_sib->ports)
-		return -ENOMEM;
-
-	for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++) {
-		err = mlxsw_sib_port_module_info_get(mlxsw_sib, i, &module,
-						     &width);
-		if (err)
-			goto err_port_module_info_get;
-		if (!width)
-			continue;
-		err = mlxsw_sib_port_create(mlxsw_sib, i, module, width);
-		if (err)
-			goto err_port_create;
-	}
-	return 0;
-
-err_port_create:
-err_port_module_info_get:
-	for (i--; i >= 1; i--)
-		if (mlxsw_sib_port_created(mlxsw_sib, i))
-			mlxsw_sib_port_remove(mlxsw_sib, i);
-	kfree(mlxsw_sib->ports);
-	return err;
-}
-
-static void
-mlxsw_sib_pude_ib_event_func(struct mlxsw_sib_port *mlxsw_sib_port,
-			     enum mlxsw_reg_pude_oper_status status)
-{
-	if (status == MLXSW_PORT_OPER_STATUS_UP)
-		pr_info("ib link for port %d - up\n",
-			mlxsw_sib_port->mapping.module + 1);
-	else
-		pr_info("ib link for port %d - down\n",
-			mlxsw_sib_port->mapping.module + 1);
-}
-
-static void mlxsw_sib_pude_event_func(const struct mlxsw_reg_info *reg,
-				      char *pude_pl, void *priv)
-{
-	struct mlxsw_sib *mlxsw_sib = priv;
-	struct mlxsw_sib_port *mlxsw_sib_port;
-	enum mlxsw_reg_pude_oper_status status;
-	u8 local_port;
-
-	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
-	mlxsw_sib_port = mlxsw_sib->ports[local_port];
-	if (!mlxsw_sib_port) {
-		dev_warn(mlxsw_sib->bus_info->dev, "Port %d: Link event received for non-existent port\n",
-			 local_port);
-		return;
-	}
-
-	status = mlxsw_reg_pude_oper_status_get(pude_pl);
-	mlxsw_sib_pude_ib_event_func(mlxsw_sib_port, status);
-}
-
-static const struct mlxsw_listener mlxsw_sib_listener[] = {
-	MLXSW_EVENTL(mlxsw_sib_pude_event_func, PUDE, EMAD),
-};
-
-static int mlxsw_sib_taps_init(struct mlxsw_sib *mlxsw_sib)
-{
-	int i;
-	int err;
-
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
-		err = mlxsw_core_trap_register(mlxsw_sib->core,
-					       &mlxsw_sib_listener[i],
-					       mlxsw_sib);
-		if (err)
-			goto err_rx_listener_register;
-	}
-
-	return 0;
-
-err_rx_listener_register:
-	for (i--; i >= 0; i--) {
-		mlxsw_core_trap_unregister(mlxsw_sib->core,
-					   &mlxsw_sib_listener[i],
-					   mlxsw_sib);
-	}
-
-	return err;
-}
-
-static void mlxsw_sib_traps_fini(struct mlxsw_sib *mlxsw_sib)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
-		mlxsw_core_trap_unregister(mlxsw_sib->core,
-					   &mlxsw_sib_listener[i], mlxsw_sib);
-	}
-}
-
-static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
-{
-	char htgt_pl[MLXSW_REG_HTGT_LEN];
-
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
-			    MLXSW_REG_HTGT_INVALID_POLICER,
-			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-			    MLXSW_REG_HTGT_DEFAULT_TC);
-	mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
-	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
-					MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD);
-	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-}
-
-static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core,
-			  const struct mlxsw_bus_info *mlxsw_bus_info,
-			  struct netlink_ext_ack *extack)
-{
-	struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
-	int err;
-
-	mlxsw_sib->core = mlxsw_core;
-	mlxsw_sib->bus_info = mlxsw_bus_info;
-
-	err = mlxsw_sib_hw_id_get(mlxsw_sib);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Failed to get switch HW ID\n");
-		return err;
-	}
-
-	err = mlxsw_sib_ports_create(mlxsw_sib);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Failed to create ports\n");
-		return err;
-	}
-
-	err = mlxsw_sib_taps_init(mlxsw_sib);
-	if (err) {
-		dev_err(mlxsw_sib->bus_info->dev, "Failed to set traps\n");
-		goto err_traps_init_err;
-	}
-
-	return 0;
-
-err_traps_init_err:
-	mlxsw_sib_ports_remove(mlxsw_sib);
-	return err;
-}
-
-static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core)
-{
-	struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
-
-	mlxsw_sib_traps_fini(mlxsw_sib);
-	mlxsw_sib_ports_remove(mlxsw_sib);
-}
-
-static const struct mlxsw_config_profile mlxsw_sib_config_profile = {
-	.used_max_system_port		= 1,
-	.max_system_port		= 48000,
-	.used_max_ib_mc			= 1,
-	.max_ib_mc			= 27,
-	.used_max_pkey			= 1,
-	.max_pkey			= 32,
-	.swid_config			= {
-		{
-			.used_type	= 1,
-			.type		= MLXSW_PORT_SWID_TYPE_IB,
-		}
-	},
-};
-
-static struct mlxsw_driver mlxsw_sib_driver = {
-	.kind			= mlxsw_sib_driver_name,
-	.priv_size		= sizeof(struct mlxsw_sib),
-	.init			= mlxsw_sib_init,
-	.fini			= mlxsw_sib_fini,
-	.basic_trap_groups_set	= mlxsw_sib_basic_trap_groups_set,
-	.txhdr_construct	= mlxsw_sib_tx_v1_hdr_construct,
-	.txhdr_len		= MLXSW_TXHDR_LEN,
-	.profile		= &mlxsw_sib_config_profile,
-};
-
-static struct mlxsw_driver mlxsw_sib2_driver = {
-	.kind			= mlxsw_sib2_driver_name,
-	.priv_size		= sizeof(struct mlxsw_sib),
-	.init			= mlxsw_sib_init,
-	.fini			= mlxsw_sib_fini,
-	.basic_trap_groups_set	= mlxsw_sib_basic_trap_groups_set,
-	.txhdr_construct	= mlxsw_sib_tx_v1_hdr_construct,
-	.txhdr_len		= MLXSW_TXHDR_LEN,
-	.profile		= &mlxsw_sib_config_profile,
-};
-
-static const struct pci_device_id mlxsw_sib_pci_id_table[] = {
-	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB), 0},
-	{0, },
-};
-
-static struct pci_driver mlxsw_sib_pci_driver = {
-	.name = mlxsw_sib_driver_name,
-	.id_table = mlxsw_sib_pci_id_table,
-};
-
-static const struct pci_device_id mlxsw_sib2_pci_id_table[] = {
-	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB2), 0},
-	{0, },
-};
-
-static struct pci_driver mlxsw_sib2_pci_driver = {
-	.name = mlxsw_sib2_driver_name,
-	.id_table = mlxsw_sib2_pci_id_table,
-};
-
-static int __init mlxsw_sib_module_init(void)
-{
-	int err;
-
-	err = mlxsw_core_driver_register(&mlxsw_sib_driver);
-	if (err)
-		return err;
-
-	err = mlxsw_core_driver_register(&mlxsw_sib2_driver);
-	if (err)
-		goto err_sib2_driver_register;
-
-	err = mlxsw_pci_driver_register(&mlxsw_sib_pci_driver);
-	if (err)
-		goto err_sib_pci_driver_register;
-
-	err = mlxsw_pci_driver_register(&mlxsw_sib2_pci_driver);
-	if (err)
-		goto err_sib2_pci_driver_register;
-
-	return 0;
-
-err_sib2_pci_driver_register:
-	mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
-err_sib_pci_driver_register:
-	mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
-err_sib2_driver_register:
-	mlxsw_core_driver_unregister(&mlxsw_sib_driver);
-	return err;
-}
-
-static void __exit mlxsw_sib_module_exit(void)
-{
-	mlxsw_pci_driver_unregister(&mlxsw_sib2_pci_driver);
-	mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
-	mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
-	mlxsw_core_driver_unregister(&mlxsw_sib_driver);
-}
-
-module_init(mlxsw_sib_module_init);
-module_exit(mlxsw_sib_module_exit);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Elad Raz <eladr@@mellanox.com>");
-MODULE_DESCRIPTION("Mellanox SwitchIB and SwitchIB-2 driver");
-MODULE_ALIAS("mlxsw_switchib2");
-MODULE_DEVICE_TABLE(pci, mlxsw_sib_pci_id_table);
-MODULE_DEVICE_TABLE(pci, mlxsw_sib2_pci_id_table);

From b0d80c013b04a13323f117764c77ef211af28aaf Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Mon, 17 May 2021 20:04:01 +0300
Subject: [PATCH 0242/3292] mlxsw: Remove Mellanox SwitchX-2 ASIC support

Initial support for the Mellanox SwitchX-2 ASIC was added in July 2015.
Since then all development efforts shifted towards the Mellanox Spectrum
ASICs and development of this driver stopped beside trivial fixes and
refactoring. Therefore, the driver does not support any switch offloads
and simply traps all traffic to the CPU, rendering it irrelevant for
deployment.

In addition, support for this ASIC was dropped by Mellanox a few years
ago.

Given the driver is not used by any users and that there is no
intention of investing in its development, remove it from the kernel.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig   |   11 -
 drivers/net/ethernet/mellanox/mlxsw/Makefile  |    2 -
 drivers/net/ethernet/mellanox/mlxsw/ib.h      |    9 -
 drivers/net/ethernet/mellanox/mlxsw/pci.c     |    5 -
 drivers/net/ethernet/mellanox/mlxsw/pci.h     |    1 -
 .../net/ethernet/mellanox/mlxsw/switchx2.c    | 1691 -----------------
 6 files changed, 1719 deletions(-)
 delete mode 100644 drivers/net/ethernet/mellanox/mlxsw/ib.h
 delete mode 100644 drivers/net/ethernet/mellanox/mlxsw/switchx2.c

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 6509b5fab936..12871c8dc7c1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -49,17 +49,6 @@ config MLXSW_I2C
 	  To compile this driver as a module, choose M here: the
 	  module will be called mlxsw_i2c.
 
-config MLXSW_SWITCHX2
-	tristate "Mellanox Technologies SwitchX-2 support"
-	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
-	default m
-	help
-	  This driver supports Mellanox Technologies SwitchX-2 Ethernet
-	  Switch ASICs.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called mlxsw_switchx2.
-
 config MLXSW_SPECTRUM
 	tristate "Mellanox Technologies Spectrum family support"
 	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index b68e5ba323cc..196adeb33495 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -8,8 +8,6 @@ obj-$(CONFIG_MLXSW_PCI)		+= mlxsw_pci.o
 mlxsw_pci-objs			:= pci.o
 obj-$(CONFIG_MLXSW_I2C)		+= mlxsw_i2c.o
 mlxsw_i2c-objs			:= i2c.o
-obj-$(CONFIG_MLXSW_SWITCHX2)	+= mlxsw_switchx2.o
-mlxsw_switchx2-objs		:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)	+= mlxsw_spectrum.o
 mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_switchdev.o spectrum_router.o \
diff --git a/drivers/net/ethernet/mellanox/mlxsw/ib.h b/drivers/net/ethernet/mellanox/mlxsw/ib.h
deleted file mode 100644
index 2d0cb0f5eb85..000000000000
--- a/drivers/net/ethernet/mellanox/mlxsw/ib.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
-/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
-
-#ifndef _MLXSW_IB_H
-#define _MLXSW_IB_H
-
-#define MLXSW_IB_DEFAULT_MTU 4096
-
-#endif /* _MLXSW_IB_H */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 8e8456811384..13b0259f7ea6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1426,11 +1426,6 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
 	unsigned long end;
 	u32 val;
 
-	if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) {
-		msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
-		return 0;
-	}
-
 	/* We must wait for the HW to become responsive. */
 	msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index b0702947d895..9899c1a2ea8f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -6,7 +6,6 @@
 
 #include <linux/pci.h>
 
-#define PCI_DEVICE_ID_MELLANOX_SWITCHX2		0xc738
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM		0xcb84
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM2	0xcf6c
 #define PCI_DEVICE_ID_MELLANOX_SPECTRUM3	0xcf70
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
deleted file mode 100644
index 131b2a53d261..000000000000
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ /dev/null
@@ -1,1691 +0,0 @@
-// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
-/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/netdevice.h>
-#include <linux/ethtool.h>
-#include <linux/etherdevice.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/skbuff.h>
-#include <linux/if_vlan.h>
-
-#include "pci.h"
-#include "core.h"
-#include "reg.h"
-#include "port.h"
-#include "trap.h"
-#include "txheader.h"
-#include "ib.h"
-
-static const char mlxsw_sx_driver_name[] = "mlxsw_switchx2";
-static const char mlxsw_sx_driver_version[] = "1.0";
-
-struct mlxsw_sx_port;
-
-struct mlxsw_sx {
-	struct mlxsw_sx_port **ports;
-	struct mlxsw_core *core;
-	const struct mlxsw_bus_info *bus_info;
-	u8 hw_id[ETH_ALEN];
-};
-
-struct mlxsw_sx_port_pcpu_stats {
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			tx_packets;
-	u64			tx_bytes;
-	struct u64_stats_sync	syncp;
-	u32			tx_dropped;
-};
-
-struct mlxsw_sx_port {
-	struct net_device *dev;
-	struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats;
-	struct mlxsw_sx *mlxsw_sx;
-	u8 local_port;
-	struct {
-		u8 module;
-	} mapping;
-};
-
-/* tx_hdr_version
- * Tx header version.
- * Must be set to 0.
- */
-MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4);
-
-/* tx_hdr_ctl
- * Packet control type.
- * 0 - Ethernet control (e.g. EMADs, LACP)
- * 1 - Ethernet data
- */
-MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2);
-
-/* tx_hdr_proto
- * Packet protocol type. Must be set to 1 (Ethernet).
- */
-MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3);
-
-/* tx_hdr_etclass
- * Egress TClass to be used on the egress device on the egress port.
- * The MSB is specified in the 'ctclass3' field.
- * Range is 0-15, where 15 is the highest priority.
- */
-MLXSW_ITEM32(tx, hdr, etclass, 0x00, 18, 3);
-
-/* tx_hdr_swid
- * Switch partition ID.
- */
-MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3);
-
-/* tx_hdr_port_mid
- * Destination local port for unicast packets.
- * Destination multicast ID for multicast packets.
- *
- * Control packets are directed to a specific egress port, while data
- * packets are transmitted through the CPU port (0) into the switch partition,
- * where forwarding rules are applied.
- */
-MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16);
-
-/* tx_hdr_ctclass3
- * See field 'etclass'.
- */
-MLXSW_ITEM32(tx, hdr, ctclass3, 0x04, 14, 1);
-
-/* tx_hdr_rdq
- * RDQ for control packets sent to remote CPU.
- * Must be set to 0x1F for EMADs, otherwise 0.
- */
-MLXSW_ITEM32(tx, hdr, rdq, 0x04, 9, 5);
-
-/* tx_hdr_cpu_sig
- * Signature control for packets going to CPU. Must be set to 0.
- */
-MLXSW_ITEM32(tx, hdr, cpu_sig, 0x04, 0, 9);
-
-/* tx_hdr_sig
- * Stacking protocl signature. Must be set to 0xE0E0.
- */
-MLXSW_ITEM32(tx, hdr, sig, 0x0C, 16, 16);
-
-/* tx_hdr_stclass
- * Stacking TClass.
- */
-MLXSW_ITEM32(tx, hdr, stclass, 0x0C, 13, 3);
-
-/* tx_hdr_emad
- * EMAD bit. Must be set for EMADs.
- */
-MLXSW_ITEM32(tx, hdr, emad, 0x0C, 5, 1);
-
-/* tx_hdr_type
- * 0 - Data packets
- * 6 - Control packets
- */
-MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
-
-static void mlxsw_sx_txhdr_construct(struct sk_buff *skb,
-				     const struct mlxsw_tx_info *tx_info)
-{
-	char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
-	bool is_emad = tx_info->is_emad;
-
-	memset(txhdr, 0, MLXSW_TXHDR_LEN);
-
-	/* We currently set default values for the egress tclass (QoS). */
-	mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_0);
-	mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
-	mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
-	mlxsw_tx_hdr_etclass_set(txhdr, is_emad ? MLXSW_TXHDR_ETCLASS_6 :
-						  MLXSW_TXHDR_ETCLASS_5);
-	mlxsw_tx_hdr_swid_set(txhdr, 0);
-	mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port);
-	mlxsw_tx_hdr_ctclass3_set(txhdr, MLXSW_TXHDR_CTCLASS3);
-	mlxsw_tx_hdr_rdq_set(txhdr, is_emad ? MLXSW_TXHDR_RDQ_EMAD :
-					      MLXSW_TXHDR_RDQ_OTHER);
-	mlxsw_tx_hdr_cpu_sig_set(txhdr, MLXSW_TXHDR_CPU_SIG);
-	mlxsw_tx_hdr_sig_set(txhdr, MLXSW_TXHDR_SIG);
-	mlxsw_tx_hdr_stclass_set(txhdr, MLXSW_TXHDR_STCLASS_NONE);
-	mlxsw_tx_hdr_emad_set(txhdr, is_emad ? MLXSW_TXHDR_EMAD :
-					       MLXSW_TXHDR_NOT_EMAD);
-	mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
-}
-
-static int mlxsw_sx_port_admin_status_set(struct mlxsw_sx_port *mlxsw_sx_port,
-					  bool is_up)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char paos_pl[MLXSW_REG_PAOS_LEN];
-
-	mlxsw_reg_paos_pack(paos_pl, mlxsw_sx_port->local_port,
-			    is_up ? MLXSW_PORT_ADMIN_STATUS_UP :
-			    MLXSW_PORT_ADMIN_STATUS_DOWN);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(paos), paos_pl);
-}
-
-static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port,
-					 bool *p_is_up)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char paos_pl[MLXSW_REG_PAOS_LEN];
-	u8 oper_status;
-	int err;
-
-	mlxsw_reg_paos_pack(paos_pl, mlxsw_sx_port->local_port, 0);
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(paos), paos_pl);
-	if (err)
-		return err;
-	oper_status = mlxsw_reg_paos_oper_status_get(paos_pl);
-	*p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP;
-	return 0;
-}
-
-static int __mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				   u16 mtu)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char pmtu_pl[MLXSW_REG_PMTU_LEN];
-	int max_mtu;
-	int err;
-
-	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, 0);
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
-	if (err)
-		return err;
-	max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
-
-	if (mtu > max_mtu)
-		return -EINVAL;
-
-	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, mtu);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
-}
-
-static int mlxsw_sx_port_mtu_eth_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				     u16 mtu)
-{
-	mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
-	return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
-}
-
-static int mlxsw_sx_port_mtu_ib_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				    u16 mtu)
-{
-	return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
-}
-
-static int mlxsw_sx_port_ib_port_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				     u8 ib_port)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
-	int err;
-
-	mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sx_port->local_port);
-	mlxsw_reg_plib_ib_port_set(plib_pl, ib_port);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(plib), plib_pl);
-	return err;
-}
-
-static int mlxsw_sx_port_swid_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 swid)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char pspa_pl[MLXSW_REG_PSPA_LEN];
-
-	mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sx_port->local_port);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pspa), pspa_pl);
-}
-
-static int
-mlxsw_sx_port_system_port_mapping_set(struct mlxsw_sx_port *mlxsw_sx_port)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char sspr_pl[MLXSW_REG_SSPR_LEN];
-
-	mlxsw_reg_sspr_pack(sspr_pl, mlxsw_sx_port->local_port);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sspr), sspr_pl);
-}
-
-static int mlxsw_sx_port_module_info_get(struct mlxsw_sx *mlxsw_sx,
-					 u8 local_port, u8 *p_module,
-					 u8 *p_width)
-{
-	char pmlp_pl[MLXSW_REG_PMLP_LEN];
-	int err;
-
-	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmlp), pmlp_pl);
-	if (err)
-		return err;
-	*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
-	*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
-	return 0;
-}
-
-static int mlxsw_sx_port_open(struct net_device *dev)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	int err;
-
-	err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
-	if (err)
-		return err;
-	netif_start_queue(dev);
-	return 0;
-}
-
-static int mlxsw_sx_port_stop(struct net_device *dev)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-
-	netif_stop_queue(dev);
-	return mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
-}
-
-static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
-				      struct net_device *dev)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	struct mlxsw_sx_port_pcpu_stats *pcpu_stats;
-	const struct mlxsw_tx_info tx_info = {
-		.local_port = mlxsw_sx_port->local_port,
-		.is_emad = false,
-	};
-	u64 len;
-	int err;
-
-	if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) {
-		this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped);
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
-
-	if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info))
-		return NETDEV_TX_BUSY;
-
-	mlxsw_sx_txhdr_construct(skb, &tx_info);
-	/* TX header is consumed by HW on the way so we shouldn't count its
-	 * bytes as being sent.
-	 */
-	len = skb->len - MLXSW_TXHDR_LEN;
-	/* Due to a race we might fail here because of a full queue. In that
-	 * unlikely case we simply drop the packet.
-	 */
-	err = mlxsw_core_skb_transmit(mlxsw_sx->core, skb, &tx_info);
-
-	if (!err) {
-		pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats);
-		u64_stats_update_begin(&pcpu_stats->syncp);
-		pcpu_stats->tx_packets++;
-		pcpu_stats->tx_bytes += len;
-		u64_stats_update_end(&pcpu_stats->syncp);
-	} else {
-		this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped);
-		dev_kfree_skb_any(skb);
-	}
-	return NETDEV_TX_OK;
-}
-
-static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	int err;
-
-	err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, mtu);
-	if (err)
-		return err;
-	dev->mtu = mtu;
-	return 0;
-}
-
-static void
-mlxsw_sx_port_get_stats64(struct net_device *dev,
-			  struct rtnl_link_stats64 *stats)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx_port_pcpu_stats *p;
-	u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
-	u32 tx_dropped = 0;
-	unsigned int start;
-	int i;
-
-	for_each_possible_cpu(i) {
-		p = per_cpu_ptr(mlxsw_sx_port->pcpu_stats, i);
-		do {
-			start = u64_stats_fetch_begin_irq(&p->syncp);
-			rx_packets	= p->rx_packets;
-			rx_bytes	= p->rx_bytes;
-			tx_packets	= p->tx_packets;
-			tx_bytes	= p->tx_bytes;
-		} while (u64_stats_fetch_retry_irq(&p->syncp, start));
-
-		stats->rx_packets	+= rx_packets;
-		stats->rx_bytes		+= rx_bytes;
-		stats->tx_packets	+= tx_packets;
-		stats->tx_bytes		+= tx_bytes;
-		/* tx_dropped is u32, updated without syncp protection. */
-		tx_dropped	+= p->tx_dropped;
-	}
-	stats->tx_dropped	= tx_dropped;
-}
-
-static struct devlink_port *
-mlxsw_sx_port_get_devlink_port(struct net_device *dev)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-
-	return mlxsw_core_port_devlink_port_get(mlxsw_sx->core,
-						mlxsw_sx_port->local_port);
-}
-
-static const struct net_device_ops mlxsw_sx_port_netdev_ops = {
-	.ndo_open		= mlxsw_sx_port_open,
-	.ndo_stop		= mlxsw_sx_port_stop,
-	.ndo_start_xmit		= mlxsw_sx_port_xmit,
-	.ndo_change_mtu		= mlxsw_sx_port_change_mtu,
-	.ndo_get_stats64	= mlxsw_sx_port_get_stats64,
-	.ndo_get_devlink_port	= mlxsw_sx_port_get_devlink_port,
-};
-
-static void mlxsw_sx_port_get_drvinfo(struct net_device *dev,
-				      struct ethtool_drvinfo *drvinfo)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-
-	strlcpy(drvinfo->driver, mlxsw_sx_driver_name, sizeof(drvinfo->driver));
-	strlcpy(drvinfo->version, mlxsw_sx_driver_version,
-		sizeof(drvinfo->version));
-	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-		 "%d.%d.%d",
-		 mlxsw_sx->bus_info->fw_rev.major,
-		 mlxsw_sx->bus_info->fw_rev.minor,
-		 mlxsw_sx->bus_info->fw_rev.subminor);
-	strlcpy(drvinfo->bus_info, mlxsw_sx->bus_info->device_name,
-		sizeof(drvinfo->bus_info));
-}
-
-struct mlxsw_sx_port_hw_stats {
-	char str[ETH_GSTRING_LEN];
-	u64 (*getter)(const char *payload);
-};
-
-static const struct mlxsw_sx_port_hw_stats mlxsw_sx_port_hw_stats[] = {
-	{
-		.str = "a_frames_transmitted_ok",
-		.getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get,
-	},
-	{
-		.str = "a_frames_received_ok",
-		.getter = mlxsw_reg_ppcnt_a_frames_received_ok_get,
-	},
-	{
-		.str = "a_frame_check_sequence_errors",
-		.getter = mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get,
-	},
-	{
-		.str = "a_alignment_errors",
-		.getter = mlxsw_reg_ppcnt_a_alignment_errors_get,
-	},
-	{
-		.str = "a_octets_transmitted_ok",
-		.getter = mlxsw_reg_ppcnt_a_octets_transmitted_ok_get,
-	},
-	{
-		.str = "a_octets_received_ok",
-		.getter = mlxsw_reg_ppcnt_a_octets_received_ok_get,
-	},
-	{
-		.str = "a_multicast_frames_xmitted_ok",
-		.getter = mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get,
-	},
-	{
-		.str = "a_broadcast_frames_xmitted_ok",
-		.getter = mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get,
-	},
-	{
-		.str = "a_multicast_frames_received_ok",
-		.getter = mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get,
-	},
-	{
-		.str = "a_broadcast_frames_received_ok",
-		.getter = mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get,
-	},
-	{
-		.str = "a_in_range_length_errors",
-		.getter = mlxsw_reg_ppcnt_a_in_range_length_errors_get,
-	},
-	{
-		.str = "a_out_of_range_length_field",
-		.getter = mlxsw_reg_ppcnt_a_out_of_range_length_field_get,
-	},
-	{
-		.str = "a_frame_too_long_errors",
-		.getter = mlxsw_reg_ppcnt_a_frame_too_long_errors_get,
-	},
-	{
-		.str = "a_symbol_error_during_carrier",
-		.getter = mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get,
-	},
-	{
-		.str = "a_mac_control_frames_transmitted",
-		.getter = mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get,
-	},
-	{
-		.str = "a_mac_control_frames_received",
-		.getter = mlxsw_reg_ppcnt_a_mac_control_frames_received_get,
-	},
-	{
-		.str = "a_unsupported_opcodes_received",
-		.getter = mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get,
-	},
-	{
-		.str = "a_pause_mac_ctrl_frames_received",
-		.getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_received_get,
-	},
-	{
-		.str = "a_pause_mac_ctrl_frames_xmitted",
-		.getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_transmitted_get,
-	},
-};
-
-#define MLXSW_SX_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sx_port_hw_stats)
-
-static void mlxsw_sx_port_get_strings(struct net_device *dev,
-				      u32 stringset, u8 *data)
-{
-	u8 *p = data;
-	int i;
-
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++) {
-			memcpy(p, mlxsw_sx_port_hw_stats[i].str,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		break;
-	}
-}
-
-static void mlxsw_sx_port_get_stats(struct net_device *dev,
-				    struct ethtool_stats *stats, u64 *data)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
-	int i;
-	int err;
-
-	mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port,
-			     MLXSW_REG_PPCNT_IEEE_8023_CNT, 0);
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppcnt), ppcnt_pl);
-	for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++)
-		data[i] = !err ? mlxsw_sx_port_hw_stats[i].getter(ppcnt_pl) : 0;
-}
-
-static int mlxsw_sx_port_get_sset_count(struct net_device *dev, int sset)
-{
-	switch (sset) {
-	case ETH_SS_STATS:
-		return MLXSW_SX_PORT_HW_STATS_LEN;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-struct mlxsw_sx_port_link_mode {
-	u32 mask;
-	u32 supported;
-	u32 advertised;
-	u32 speed;
-};
-
-static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_SGMII |
-				  MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
-		.supported	= SUPPORTED_1000baseKX_Full,
-		.advertised	= ADVERTISED_1000baseKX_Full,
-		.speed		= 1000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
-		.supported	= SUPPORTED_10000baseKX4_Full,
-		.advertised	= ADVERTISED_10000baseKX4_Full,
-		.speed		= 10000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
-				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
-				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
-				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR,
-		.supported	= SUPPORTED_10000baseKR_Full,
-		.advertised	= ADVERTISED_10000baseKR_Full,
-		.speed		= 10000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
-		.supported	= SUPPORTED_40000baseCR4_Full,
-		.advertised	= ADVERTISED_40000baseCR4_Full,
-		.speed		= 40000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4,
-		.supported	= SUPPORTED_40000baseKR4_Full,
-		.advertised	= ADVERTISED_40000baseKR4_Full,
-		.speed		= 40000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4,
-		.supported	= SUPPORTED_40000baseSR4_Full,
-		.advertised	= ADVERTISED_40000baseSR4_Full,
-		.speed		= 40000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4,
-		.supported	= SUPPORTED_40000baseLR4_Full,
-		.advertised	= ADVERTISED_40000baseLR4_Full,
-		.speed		= 40000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR |
-				  MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR |
-				  MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
-		.speed		= 25000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 |
-				  MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2,
-		.speed		= 50000,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
-		.speed		= 100000,
-	},
-};
-
-#define MLXSW_SX_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sx_port_link_mode)
-#define MLXSW_SX_PORT_BASE_SPEED 10000 /* Mb/s */
-
-static u32 mlxsw_sx_from_ptys_supported_port(u32 ptys_eth_proto)
-{
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
-			      MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_SGMII))
-		return SUPPORTED_FIBRE;
-
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
-			      MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX))
-		return SUPPORTED_Backplane;
-	return 0;
-}
-
-static u32 mlxsw_sx_from_ptys_supported_link(u32 ptys_eth_proto)
-{
-	u32 modes = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask)
-			modes |= mlxsw_sx_port_link_mode[i].supported;
-	}
-	return modes;
-}
-
-static u32 mlxsw_sx_from_ptys_advert_link(u32 ptys_eth_proto)
-{
-	u32 modes = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask)
-			modes |= mlxsw_sx_port_link_mode[i].advertised;
-	}
-	return modes;
-}
-
-static void mlxsw_sx_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto,
-					    struct ethtool_link_ksettings *cmd)
-{
-	u32 speed = SPEED_UNKNOWN;
-	u8 duplex = DUPLEX_UNKNOWN;
-	int i;
-
-	if (!carrier_ok)
-		goto out;
-
-	for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask) {
-			speed = mlxsw_sx_port_link_mode[i].speed;
-			duplex = DUPLEX_FULL;
-			break;
-		}
-	}
-out:
-	cmd->base.speed = speed;
-	cmd->base.duplex = duplex;
-}
-
-static u8 mlxsw_sx_port_connector_port(u32 ptys_eth_proto)
-{
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_SGMII))
-		return PORT_FIBRE;
-
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4))
-		return PORT_DA;
-
-	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
-			      MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
-			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4))
-		return PORT_NONE;
-
-	return PORT_OTHER;
-}
-
-static int
-mlxsw_sx_port_get_link_ksettings(struct net_device *dev,
-				 struct ethtool_link_ksettings *cmd)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
-	u32 eth_proto_cap;
-	u32 eth_proto_admin;
-	u32 eth_proto_oper;
-	u32 supported, advertising, lp_advertising;
-	int err;
-
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
-	if (err) {
-		netdev_err(dev, "Failed to get proto");
-		return err;
-	}
-	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap,
-				  &eth_proto_admin, &eth_proto_oper);
-
-	supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) |
-			 mlxsw_sx_from_ptys_supported_link(eth_proto_cap) |
-			 SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-	advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_admin);
-	mlxsw_sx_from_ptys_speed_duplex(netif_carrier_ok(dev),
-					eth_proto_oper, cmd);
-
-	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
-	cmd->base.port = mlxsw_sx_port_connector_port(eth_proto_oper);
-	lp_advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_oper);
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
-						lp_advertising);
-
-	return 0;
-}
-
-static u32 mlxsw_sx_to_ptys_advert_link(u32 advertising)
-{
-	u32 ptys_proto = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
-		if (advertising & mlxsw_sx_port_link_mode[i].advertised)
-			ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
-	}
-	return ptys_proto;
-}
-
-static u32 mlxsw_sx_to_ptys_speed(u32 speed)
-{
-	u32 ptys_proto = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
-		if (speed == mlxsw_sx_port_link_mode[i].speed)
-			ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
-	}
-	return ptys_proto;
-}
-
-static u32 mlxsw_sx_to_ptys_upper_speed(u32 upper_speed)
-{
-	u32 ptys_proto = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
-		if (mlxsw_sx_port_link_mode[i].speed <= upper_speed)
-			ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
-	}
-	return ptys_proto;
-}
-
-static int
-mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
-				 const struct ethtool_link_ksettings *cmd)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
-	u32 speed;
-	u32 eth_proto_new;
-	u32 eth_proto_cap;
-	u32 eth_proto_admin;
-	u32 advertising;
-	bool is_up;
-	int err;
-
-	speed = cmd->base.speed;
-
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						cmd->link_modes.advertising);
-
-	eth_proto_new = cmd->base.autoneg == AUTONEG_ENABLE ?
-		mlxsw_sx_to_ptys_advert_link(advertising) :
-		mlxsw_sx_to_ptys_speed(speed);
-
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
-	if (err) {
-		netdev_err(dev, "Failed to get proto");
-		return err;
-	}
-	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
-				  NULL);
-
-	eth_proto_new = eth_proto_new & eth_proto_cap;
-	if (!eth_proto_new) {
-		netdev_err(dev, "Not supported proto admin requested");
-		return -EINVAL;
-	}
-	if (eth_proto_new == eth_proto_admin)
-		return 0;
-
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_new, true);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
-	if (err) {
-		netdev_err(dev, "Failed to set proto admin");
-		return err;
-	}
-
-	err = mlxsw_sx_port_oper_status_get(mlxsw_sx_port, &is_up);
-	if (err) {
-		netdev_err(dev, "Failed to get oper status");
-		return err;
-	}
-	if (!is_up)
-		return 0;
-
-	err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
-	if (err) {
-		netdev_err(dev, "Failed to set admin status");
-		return err;
-	}
-
-	err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
-	if (err) {
-		netdev_err(dev, "Failed to set admin status");
-		return err;
-	}
-
-	return 0;
-}
-
-static const struct ethtool_ops mlxsw_sx_port_ethtool_ops = {
-	.get_drvinfo		= mlxsw_sx_port_get_drvinfo,
-	.get_link		= ethtool_op_get_link,
-	.get_strings		= mlxsw_sx_port_get_strings,
-	.get_ethtool_stats	= mlxsw_sx_port_get_stats,
-	.get_sset_count		= mlxsw_sx_port_get_sset_count,
-	.get_link_ksettings	= mlxsw_sx_port_get_link_ksettings,
-	.set_link_ksettings	= mlxsw_sx_port_set_link_ksettings,
-};
-
-static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx)
-{
-	char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
-	int err;
-
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(spad), spad_pl);
-	if (err)
-		return err;
-	mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sx->hw_id);
-	return 0;
-}
-
-static int mlxsw_sx_port_dev_addr_get(struct mlxsw_sx_port *mlxsw_sx_port)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	struct net_device *dev = mlxsw_sx_port->dev;
-	char ppad_pl[MLXSW_REG_PPAD_LEN];
-	int err;
-
-	mlxsw_reg_ppad_pack(ppad_pl, false, 0);
-	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppad), ppad_pl);
-	if (err)
-		return err;
-	mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, dev->dev_addr);
-	/* The last byte value in base mac address is guaranteed
-	 * to be such it does not overflow when adding local_port
-	 * value.
-	 */
-	dev->dev_addr[ETH_ALEN - 1] += mlxsw_sx_port->local_port;
-	return 0;
-}
-
-static int mlxsw_sx_port_stp_state_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				       u16 vid, enum mlxsw_reg_spms_state state)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char *spms_pl;
-	int err;
-
-	spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
-	if (!spms_pl)
-		return -ENOMEM;
-	mlxsw_reg_spms_pack(spms_pl, mlxsw_sx_port->local_port);
-	mlxsw_reg_spms_vid_pack(spms_pl, vid, state);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spms), spms_pl);
-	kfree(spms_pl);
-	return err;
-}
-
-static int mlxsw_sx_port_ib_speed_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				      u16 speed, u16 width)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
-
-	mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sx_port->local_port, speed,
-			       width);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
-}
-
-static int
-mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	u32 upper_speed = MLXSW_SX_PORT_BASE_SPEED * width;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
-	u32 eth_proto_admin;
-
-	eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_admin, true);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
-}
-
-static int
-mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				    enum mlxsw_reg_spmlr_learn_mode mode)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
-	char spmlr_pl[MLXSW_REG_SPMLR_LEN];
-
-	mlxsw_reg_spmlr_pack(spmlr_pl, mlxsw_sx_port->local_port, mode);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spmlr), spmlr_pl);
-}
-
-static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
-				      u8 module, u8 width)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port;
-	struct net_device *dev;
-	int err;
-
-	dev = alloc_etherdev(sizeof(struct mlxsw_sx_port));
-	if (!dev)
-		return -ENOMEM;
-	SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev);
-	dev_net_set(dev, mlxsw_core_net(mlxsw_sx->core));
-	mlxsw_sx_port = netdev_priv(dev);
-	mlxsw_sx_port->dev = dev;
-	mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
-	mlxsw_sx_port->local_port = local_port;
-	mlxsw_sx_port->mapping.module = module;
-
-	mlxsw_sx_port->pcpu_stats =
-		netdev_alloc_pcpu_stats(struct mlxsw_sx_port_pcpu_stats);
-	if (!mlxsw_sx_port->pcpu_stats) {
-		err = -ENOMEM;
-		goto err_alloc_stats;
-	}
-
-	dev->netdev_ops = &mlxsw_sx_port_netdev_ops;
-	dev->ethtool_ops = &mlxsw_sx_port_ethtool_ops;
-
-	err = mlxsw_sx_port_dev_addr_get(mlxsw_sx_port);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Unable to get port mac address\n",
-			mlxsw_sx_port->local_port);
-		goto err_dev_addr_get;
-	}
-
-	netif_carrier_off(dev);
-
-	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG |
-			 NETIF_F_VLAN_CHALLENGED;
-
-	dev->min_mtu = 0;
-	dev->max_mtu = ETH_MAX_MTU;
-
-	/* Each packet needs to have a Tx header (metadata) on top all other
-	 * headers.
-	 */
-	dev->needed_headroom = MLXSW_TXHDR_LEN;
-
-	err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_system_port_mapping_set;
-	}
-
-	err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 0);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_swid_set;
-	}
-
-	err = mlxsw_sx_port_speed_by_width_set(mlxsw_sx_port, width);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_speed_set;
-	}
-
-	err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, ETH_DATA_LEN);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_mtu_set;
-	}
-
-	err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
-	if (err)
-		goto err_port_admin_status_set;
-
-	err = mlxsw_sx_port_stp_state_set(mlxsw_sx_port,
-					  MLXSW_PORT_DEFAULT_VID,
-					  MLXSW_REG_SPMS_STATE_FORWARDING);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set STP state\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_stp_state_set;
-	}
-
-	err = mlxsw_sx_port_mac_learning_mode_set(mlxsw_sx_port,
-						  MLXSW_REG_SPMLR_LEARN_MODE_DISABLE);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MAC learning mode\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_mac_learning_mode_set;
-	}
-
-	err = register_netdev(dev);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to register netdev\n",
-			mlxsw_sx_port->local_port);
-		goto err_register_netdev;
-	}
-
-	mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
-				mlxsw_sx_port, dev);
-	mlxsw_sx->ports[local_port] = mlxsw_sx_port;
-	return 0;
-
-err_register_netdev:
-err_port_mac_learning_mode_set:
-err_port_stp_state_set:
-err_port_admin_status_set:
-err_port_mtu_set:
-err_port_speed_set:
-	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
-err_port_swid_set:
-err_port_system_port_mapping_set:
-err_dev_addr_get:
-	free_percpu(mlxsw_sx_port->pcpu_stats);
-err_alloc_stats:
-	free_netdev(dev);
-	return err;
-}
-
-static int mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
-				    u8 module, u8 width)
-{
-	int err;
-
-	err = mlxsw_core_port_init(mlxsw_sx->core, local_port,
-				   module + 1, false, 0, false, 0,
-				   mlxsw_sx->hw_id, sizeof(mlxsw_sx->hw_id));
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n",
-			local_port);
-		return err;
-	}
-	err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module, width);
-	if (err)
-		goto err_port_create;
-
-	return 0;
-
-err_port_create:
-	mlxsw_core_port_fini(mlxsw_sx->core, local_port);
-	return err;
-}
-
-static void __mlxsw_sx_port_eth_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
-
-	mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
-	unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */
-	mlxsw_sx->ports[local_port] = NULL;
-	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
-	free_percpu(mlxsw_sx_port->pcpu_stats);
-	free_netdev(mlxsw_sx_port->dev);
-}
-
-static bool mlxsw_sx_port_created(struct mlxsw_sx *mlxsw_sx, u8 local_port)
-{
-	return mlxsw_sx->ports[local_port] != NULL;
-}
-
-static int __mlxsw_sx_port_ib_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
-				     u8 module, u8 width)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port;
-	int err;
-
-	mlxsw_sx_port = kzalloc(sizeof(*mlxsw_sx_port), GFP_KERNEL);
-	if (!mlxsw_sx_port)
-		return -ENOMEM;
-	mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
-	mlxsw_sx_port->local_port = local_port;
-	mlxsw_sx_port->mapping.module = module;
-
-	err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_system_port_mapping_set;
-	}
-
-	/* Adding port to Infiniband swid (1) */
-	err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 1);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_swid_set;
-	}
-
-	/* Expose the IB port number as it's front panel name */
-	err = mlxsw_sx_port_ib_port_set(mlxsw_sx_port, module + 1);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set IB port\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_ib_set;
-	}
-
-	/* Supports all speeds from SDR to FDR (bitmask) and support bus width
-	 * of 1x, 2x and 4x (3 bits bitmask)
-	 */
-	err = mlxsw_sx_port_ib_speed_set(mlxsw_sx_port,
-					 MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
-					 BIT(3) - 1);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_speed_set;
-	}
-
-	/* Change to the maximum MTU the device supports, the SMA will take
-	 * care of the active MTU
-	 */
-	err = mlxsw_sx_port_mtu_ib_set(mlxsw_sx_port, MLXSW_IB_DEFAULT_MTU);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_mtu_set;
-	}
-
-	err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_admin_set;
-	}
-
-	mlxsw_core_port_ib_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
-			       mlxsw_sx_port);
-	mlxsw_sx->ports[local_port] = mlxsw_sx_port;
-	return 0;
-
-err_port_admin_set:
-err_port_mtu_set:
-err_port_speed_set:
-err_port_ib_set:
-	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
-err_port_swid_set:
-err_port_system_port_mapping_set:
-	kfree(mlxsw_sx_port);
-	return err;
-}
-
-static void __mlxsw_sx_port_ib_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
-{
-	struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
-
-	mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
-	mlxsw_sx->ports[local_port] = NULL;
-	mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
-	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
-	kfree(mlxsw_sx_port);
-}
-
-static void __mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
-{
-	enum devlink_port_type port_type =
-		mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
-
-	if (port_type == DEVLINK_PORT_TYPE_ETH)
-		__mlxsw_sx_port_eth_remove(mlxsw_sx, local_port);
-	else if (port_type == DEVLINK_PORT_TYPE_IB)
-		__mlxsw_sx_port_ib_remove(mlxsw_sx, local_port);
-}
-
-static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
-{
-	__mlxsw_sx_port_remove(mlxsw_sx, local_port);
-	mlxsw_core_port_fini(mlxsw_sx->core, local_port);
-}
-
-static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx)
-{
-	int i;
-
-	for (i = 1; i < mlxsw_core_max_ports(mlxsw_sx->core); i++)
-		if (mlxsw_sx_port_created(mlxsw_sx, i))
-			mlxsw_sx_port_remove(mlxsw_sx, i);
-	kfree(mlxsw_sx->ports);
-	mlxsw_sx->ports = NULL;
-}
-
-static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx)
-{
-	unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sx->core);
-	size_t alloc_size;
-	u8 module, width;
-	int i;
-	int err;
-
-	alloc_size = sizeof(struct mlxsw_sx_port *) * max_ports;
-	mlxsw_sx->ports = kzalloc(alloc_size, GFP_KERNEL);
-	if (!mlxsw_sx->ports)
-		return -ENOMEM;
-
-	for (i = 1; i < max_ports; i++) {
-		err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module,
-						    &width);
-		if (err)
-			goto err_port_module_info_get;
-		if (!width)
-			continue;
-		err = mlxsw_sx_port_eth_create(mlxsw_sx, i, module, width);
-		if (err)
-			goto err_port_create;
-	}
-	return 0;
-
-err_port_create:
-err_port_module_info_get:
-	for (i--; i >= 1; i--)
-		if (mlxsw_sx_port_created(mlxsw_sx, i))
-			mlxsw_sx_port_remove(mlxsw_sx, i);
-	kfree(mlxsw_sx->ports);
-	mlxsw_sx->ports = NULL;
-	return err;
-}
-
-static void mlxsw_sx_pude_eth_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
-					 enum mlxsw_reg_pude_oper_status status)
-{
-	if (status == MLXSW_PORT_OPER_STATUS_UP) {
-		netdev_info(mlxsw_sx_port->dev, "link up\n");
-		netif_carrier_on(mlxsw_sx_port->dev);
-	} else {
-		netdev_info(mlxsw_sx_port->dev, "link down\n");
-		netif_carrier_off(mlxsw_sx_port->dev);
-	}
-}
-
-static void mlxsw_sx_pude_ib_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
-					enum mlxsw_reg_pude_oper_status status)
-{
-	if (status == MLXSW_PORT_OPER_STATUS_UP)
-		pr_info("ib link for port %d - up\n",
-			mlxsw_sx_port->mapping.module + 1);
-	else
-		pr_info("ib link for port %d - down\n",
-			mlxsw_sx_port->mapping.module + 1);
-}
-
-static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg,
-				     char *pude_pl, void *priv)
-{
-	struct mlxsw_sx *mlxsw_sx = priv;
-	struct mlxsw_sx_port *mlxsw_sx_port;
-	enum mlxsw_reg_pude_oper_status status;
-	enum devlink_port_type port_type;
-	u8 local_port;
-
-	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
-	mlxsw_sx_port = mlxsw_sx->ports[local_port];
-	if (!mlxsw_sx_port) {
-		dev_warn(mlxsw_sx->bus_info->dev, "Port %d: Link event received for non-existent port\n",
-			 local_port);
-		return;
-	}
-
-	status = mlxsw_reg_pude_oper_status_get(pude_pl);
-	port_type = mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
-	if (port_type == DEVLINK_PORT_TYPE_ETH)
-		mlxsw_sx_pude_eth_event_func(mlxsw_sx_port, status);
-	else if (port_type == DEVLINK_PORT_TYPE_IB)
-		mlxsw_sx_pude_ib_event_func(mlxsw_sx_port, status);
-}
-
-static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port,
-				      void *priv)
-{
-	struct mlxsw_sx *mlxsw_sx = priv;
-	struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
-	struct mlxsw_sx_port_pcpu_stats *pcpu_stats;
-
-	if (unlikely(!mlxsw_sx_port)) {
-		dev_warn_ratelimited(mlxsw_sx->bus_info->dev, "Port %d: skb received for non-existent port\n",
-				     local_port);
-		return;
-	}
-
-	skb->dev = mlxsw_sx_port->dev;
-
-	pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats);
-	u64_stats_update_begin(&pcpu_stats->syncp);
-	pcpu_stats->rx_packets++;
-	pcpu_stats->rx_bytes += skb->len;
-	u64_stats_update_end(&pcpu_stats->syncp);
-
-	skb->protocol = eth_type_trans(skb, skb->dev);
-	netif_receive_skb(skb);
-}
-
-static int mlxsw_sx_port_type_set(struct mlxsw_core *mlxsw_core, u8 local_port,
-				  enum devlink_port_type new_type)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
-	u8 module, width;
-	int err;
-
-	if (!mlxsw_sx->ports || !mlxsw_sx->ports[local_port]) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port number \"%d\" does not exist\n",
-			local_port);
-		return -EINVAL;
-	}
-
-	if (new_type == DEVLINK_PORT_TYPE_AUTO)
-		return -EOPNOTSUPP;
-
-	__mlxsw_sx_port_remove(mlxsw_sx, local_port);
-	err = mlxsw_sx_port_module_info_get(mlxsw_sx, local_port, &module,
-					    &width);
-	if (err)
-		goto err_port_module_info_get;
-
-	if (new_type == DEVLINK_PORT_TYPE_ETH)
-		err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module,
-						 width);
-	else if (new_type == DEVLINK_PORT_TYPE_IB)
-		err = __mlxsw_sx_port_ib_create(mlxsw_sx, local_port, module,
-						width);
-
-err_port_module_info_get:
-	return err;
-}
-
-enum {
-	MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX = 1,
-	MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL = 2,
-};
-
-#define MLXSW_SX_RXL(_trap_id) \
-	MLXSW_RXL(mlxsw_sx_rx_listener_func, _trap_id, TRAP_TO_CPU,	\
-		  false, SX2_RX, FORWARD)
-
-static const struct mlxsw_listener mlxsw_sx_listener[] = {
-	MLXSW_EVENTL(mlxsw_sx_pude_event_func, PUDE, EMAD),
-	MLXSW_SX_RXL(FDB_MC),
-	MLXSW_SX_RXL(STP),
-	MLXSW_SX_RXL(LACP),
-	MLXSW_SX_RXL(EAPOL),
-	MLXSW_SX_RXL(LLDP),
-	MLXSW_SX_RXL(MMRP),
-	MLXSW_SX_RXL(MVRP),
-	MLXSW_SX_RXL(RPVST),
-	MLXSW_SX_RXL(DHCP),
-	MLXSW_SX_RXL(IGMP_QUERY),
-	MLXSW_SX_RXL(IGMP_V1_REPORT),
-	MLXSW_SX_RXL(IGMP_V2_REPORT),
-	MLXSW_SX_RXL(IGMP_V2_LEAVE),
-	MLXSW_SX_RXL(IGMP_V3_REPORT),
-};
-
-static int mlxsw_sx_traps_init(struct mlxsw_sx *mlxsw_sx)
-{
-	char htgt_pl[MLXSW_REG_HTGT_LEN];
-	int i;
-	int err;
-
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
-			    MLXSW_REG_HTGT_INVALID_POLICER,
-			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-			    MLXSW_REG_HTGT_DEFAULT_TC);
-	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
-					  MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX);
-
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
-			    MLXSW_REG_HTGT_INVALID_POLICER,
-			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-			    MLXSW_REG_HTGT_DEFAULT_TC);
-	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
-					MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL);
-
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
-	if (err)
-		return err;
-
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
-		err = mlxsw_core_trap_register(mlxsw_sx->core,
-					       &mlxsw_sx_listener[i],
-					       mlxsw_sx);
-		if (err)
-			goto err_listener_register;
-
-	}
-	return 0;
-
-err_listener_register:
-	for (i--; i >= 0; i--) {
-		mlxsw_core_trap_unregister(mlxsw_sx->core,
-					   &mlxsw_sx_listener[i],
-					   mlxsw_sx);
-	}
-	return err;
-}
-
-static void mlxsw_sx_traps_fini(struct mlxsw_sx *mlxsw_sx)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
-		mlxsw_core_trap_unregister(mlxsw_sx->core,
-					   &mlxsw_sx_listener[i],
-					   mlxsw_sx);
-	}
-}
-
-static int mlxsw_sx_flood_init(struct mlxsw_sx *mlxsw_sx)
-{
-	char sfgc_pl[MLXSW_REG_SFGC_LEN];
-	char sgcr_pl[MLXSW_REG_SGCR_LEN];
-	char *sftr_pl;
-	int err;
-
-	/* Configure a flooding table, which includes only CPU port. */
-	sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
-	if (!sftr_pl)
-		return -ENOMEM;
-	mlxsw_reg_sftr_pack(sftr_pl, 0, 0, MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, 0,
-			    MLXSW_PORT_CPU_PORT, true);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sftr), sftr_pl);
-	kfree(sftr_pl);
-	if (err)
-		return err;
-
-	/* Flood different packet types using the flooding table. */
-	mlxsw_reg_sfgc_pack(sfgc_pl,
-			    MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST,
-			    MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
-			    MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
-			    0);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_sfgc_pack(sfgc_pl,
-			    MLXSW_REG_SFGC_TYPE_BROADCAST,
-			    MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
-			    MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
-			    0);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_sfgc_pack(sfgc_pl,
-			    MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP,
-			    MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
-			    MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
-			    0);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_sfgc_pack(sfgc_pl,
-			    MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6,
-			    MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
-			    MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
-			    0);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_sfgc_pack(sfgc_pl,
-			    MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4,
-			    MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
-			    MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
-			    0);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_sgcr_pack(sgcr_pl, true);
-	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sgcr), sgcr_pl);
-}
-
-static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
-{
-	char htgt_pl[MLXSW_REG_HTGT_LEN];
-
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
-			    MLXSW_REG_HTGT_INVALID_POLICER,
-			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
-			    MLXSW_REG_HTGT_DEFAULT_TC);
-	mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
-	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
-					MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD);
-	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-}
-
-static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
-			 const struct mlxsw_bus_info *mlxsw_bus_info,
-			 struct netlink_ext_ack *extack)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
-	int err;
-
-	mlxsw_sx->core = mlxsw_core;
-	mlxsw_sx->bus_info = mlxsw_bus_info;
-
-	err = mlxsw_sx_hw_id_get(mlxsw_sx);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Failed to get switch HW ID\n");
-		return err;
-	}
-
-	err = mlxsw_sx_ports_create(mlxsw_sx);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Failed to create ports\n");
-		return err;
-	}
-
-	err = mlxsw_sx_traps_init(mlxsw_sx);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Failed to set traps\n");
-		goto err_listener_register;
-	}
-
-	err = mlxsw_sx_flood_init(mlxsw_sx);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Failed to initialize flood tables\n");
-		goto err_flood_init;
-	}
-
-	return 0;
-
-err_flood_init:
-	mlxsw_sx_traps_fini(mlxsw_sx);
-err_listener_register:
-	mlxsw_sx_ports_remove(mlxsw_sx);
-	return err;
-}
-
-static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core)
-{
-	struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
-
-	mlxsw_sx_traps_fini(mlxsw_sx);
-	mlxsw_sx_ports_remove(mlxsw_sx);
-}
-
-static const struct mlxsw_config_profile mlxsw_sx_config_profile = {
-	.used_max_vepa_channels		= 1,
-	.max_vepa_channels		= 0,
-	.used_max_mid			= 1,
-	.max_mid			= 7000,
-	.used_max_pgt			= 1,
-	.max_pgt			= 0,
-	.used_max_system_port		= 1,
-	.max_system_port		= 48000,
-	.used_max_vlan_groups		= 1,
-	.max_vlan_groups		= 127,
-	.used_max_regions		= 1,
-	.max_regions			= 400,
-	.used_flood_tables		= 1,
-	.max_flood_tables		= 2,
-	.max_vid_flood_tables		= 1,
-	.used_flood_mode		= 1,
-	.flood_mode			= 3,
-	.used_max_ib_mc			= 1,
-	.max_ib_mc			= 6,
-	.used_max_pkey			= 1,
-	.max_pkey			= 0,
-	.swid_config			= {
-		{
-			.used_type	= 1,
-			.type		= MLXSW_PORT_SWID_TYPE_ETH,
-		},
-		{
-			.used_type	= 1,
-			.type		= MLXSW_PORT_SWID_TYPE_IB,
-		}
-	},
-};
-
-static struct mlxsw_driver mlxsw_sx_driver = {
-	.kind			= mlxsw_sx_driver_name,
-	.priv_size		= sizeof(struct mlxsw_sx),
-	.init			= mlxsw_sx_init,
-	.fini			= mlxsw_sx_fini,
-	.basic_trap_groups_set	= mlxsw_sx_basic_trap_groups_set,
-	.txhdr_construct	= mlxsw_sx_txhdr_construct,
-	.txhdr_len		= MLXSW_TXHDR_LEN,
-	.profile		= &mlxsw_sx_config_profile,
-	.port_type_set		= mlxsw_sx_port_type_set,
-};
-
-static const struct pci_device_id mlxsw_sx_pci_id_table[] = {
-	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0},
-	{0, },
-};
-
-static struct pci_driver mlxsw_sx_pci_driver = {
-	.name = mlxsw_sx_driver_name,
-	.id_table = mlxsw_sx_pci_id_table,
-};
-
-static int __init mlxsw_sx_module_init(void)
-{
-	int err;
-
-	err = mlxsw_core_driver_register(&mlxsw_sx_driver);
-	if (err)
-		return err;
-
-	err = mlxsw_pci_driver_register(&mlxsw_sx_pci_driver);
-	if (err)
-		goto err_pci_driver_register;
-
-	return 0;
-
-err_pci_driver_register:
-	mlxsw_core_driver_unregister(&mlxsw_sx_driver);
-	return err;
-}
-
-static void __exit mlxsw_sx_module_exit(void)
-{
-	mlxsw_pci_driver_unregister(&mlxsw_sx_pci_driver);
-	mlxsw_core_driver_unregister(&mlxsw_sx_driver);
-}
-
-module_init(mlxsw_sx_module_init);
-module_exit(mlxsw_sx_module_exit);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
-MODULE_DESCRIPTION("Mellanox SwitchX-2 driver");
-MODULE_DEVICE_TABLE(pci, mlxsw_sx_pci_id_table);

From 5796254e467bf1cff002df65fbb53ecef6a0e060 Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Mon, 17 May 2021 20:22:05 +0800
Subject: [PATCH 0243/3292] net: Remove the member netns_ok

Every protocol has the 'netns_ok' member and it is euqal to 1. The
'if (!prot->netns_ok)' always false in inet_add_protocol().

Signed-off-by: Yejune Deng <yejunedeng@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h    | 1 -
 net/dccp/ipv4.c           | 1 -
 net/ipv4/af_inet.c        | 4 ----
 net/ipv4/gre_demux.c      | 1 -
 net/ipv4/ipmr.c           | 1 -
 net/ipv4/protocol.c       | 6 ------
 net/ipv4/tunnel4.c        | 3 ---
 net/ipv4/udplite.c        | 1 -
 net/ipv4/xfrm4_protocol.c | 3 ---
 net/l2tp/l2tp_ip.c        | 1 -
 net/sctp/protocol.c       | 1 -
 11 files changed, 23 deletions(-)

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 2b778e1d2d8f..f51c06ae365f 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -43,7 +43,6 @@ struct net_protocol {
 	int			(*err_handler)(struct sk_buff *skb, u32 info);
 
 	unsigned int		no_policy:1,
-				netns_ok:1,
 				/* does the protocol do more stringent
 				 * icmp tag validation than simple
 				 * socket lookup?
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ffc601a3b329..f81c1df761d3 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -977,7 +977,6 @@ static const struct net_protocol dccp_v4_protocol = {
 	.handler	= dccp_v4_rcv,
 	.err_handler	= dccp_v4_err,
 	.no_policy	= 1,
-	.netns_ok	= 1,
 	.icmp_strict_tag_validation = 1,
 };
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f17870ee558b..d9bccad65e2b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1720,7 +1720,6 @@ EXPORT_SYMBOL_GPL(snmp_fold_field64);
 #ifdef CONFIG_IP_MULTICAST
 static const struct net_protocol igmp_protocol = {
 	.handler =	igmp_rcv,
-	.netns_ok =	1,
 };
 #endif
 
@@ -1733,7 +1732,6 @@ static struct net_protocol tcp_protocol = {
 	.handler	=	tcp_v4_rcv,
 	.err_handler	=	tcp_v4_err,
 	.no_policy	=	1,
-	.netns_ok	=	1,
 	.icmp_strict_tag_validation = 1,
 };
 
@@ -1746,14 +1744,12 @@ static struct net_protocol udp_protocol = {
 	.handler =	udp_rcv,
 	.err_handler =	udp_err,
 	.no_policy =	1,
-	.netns_ok =	1,
 };
 
 static const struct net_protocol icmp_protocol = {
 	.handler =	icmp_rcv,
 	.err_handler =	icmp_err,
 	.no_policy =	1,
-	.netns_ok =	1,
 };
 
 static __net_init int ipv4_mib_init_net(struct net *net)
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 5d1e6fe9d838..cbb2b4bb0dfa 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -195,7 +195,6 @@ static int gre_err(struct sk_buff *skb, u32 info)
 static const struct net_protocol net_gre_protocol = {
 	.handler     = gre_rcv,
 	.err_handler = gre_err,
-	.netns_ok    = 1,
 };
 
 static int __init gre_init(void)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 939792a38814..12b564b1ecb4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3007,7 +3007,6 @@ static const struct seq_operations ipmr_mfc_seq_ops = {
 #ifdef CONFIG_IP_PIMSM_V2
 static const struct net_protocol pim_protocol = {
 	.handler	=	pim_rcv,
-	.netns_ok	=	1,
 };
 #endif
 
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 9a8c0892622b..6913979948d7 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -31,12 +31,6 @@ EXPORT_SYMBOL(inet_offloads);
 
 int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
-	if (!prot->netns_ok) {
-		pr_err("Protocol %u is not namespace aware, cannot register.\n",
-			protocol);
-		return -EINVAL;
-	}
-
 	return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
 			NULL, prot) ? 0 : -1;
 }
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index e44aaf41a138..5048c47c79b2 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -218,7 +218,6 @@ static const struct net_protocol tunnel4_protocol = {
 	.handler	=	tunnel4_rcv,
 	.err_handler	=	tunnel4_err,
 	.no_policy	=	1,
-	.netns_ok	=	1,
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -226,7 +225,6 @@ static const struct net_protocol tunnel64_protocol = {
 	.handler	=	tunnel64_rcv,
 	.err_handler	=	tunnel64_err,
 	.no_policy	=	1,
-	.netns_ok	=	1,
 };
 #endif
 
@@ -235,7 +233,6 @@ static const struct net_protocol tunnelmpls4_protocol = {
 	.handler	=	tunnelmpls4_rcv,
 	.err_handler	=	tunnelmpls4_err,
 	.no_policy	=	1,
-	.netns_ok	=	1,
 };
 #endif
 
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index bd8773b49e72..cd1cd68adeec 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -31,7 +31,6 @@ static const struct net_protocol udplite_protocol = {
 	.handler	= udplite_rcv,
 	.err_handler	= udplite_err,
 	.no_policy	= 1,
-	.netns_ok	= 1,
 };
 
 struct proto 	udplite_prot = {
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index ea595c8549c7..2fe5860c21d6 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -181,21 +181,18 @@ static const struct net_protocol esp4_protocol = {
 	.handler	=	xfrm4_esp_rcv,
 	.err_handler	=	xfrm4_esp_err,
 	.no_policy	=	1,
-	.netns_ok	=	1,
 };
 
 static const struct net_protocol ah4_protocol = {
 	.handler	=	xfrm4_ah_rcv,
 	.err_handler	=	xfrm4_ah_err,
 	.no_policy	=	1,
-	.netns_ok	=	1,
 };
 
 static const struct net_protocol ipcomp4_protocol = {
 	.handler	=	xfrm4_ipcomp_rcv,
 	.err_handler	=	xfrm4_ipcomp_err,
 	.no_policy	=	1,
-	.netns_ok	=	1,
 };
 
 static const struct xfrm_input_afinfo xfrm4_input_afinfo = {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 97ae1255fcb6..536c30d4dd7d 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -635,7 +635,6 @@ static struct inet_protosw l2tp_ip_protosw = {
 
 static struct net_protocol l2tp_ip_protocol __read_mostly = {
 	.handler	= l2tp_ip_recv,
-	.netns_ok	= 1,
 };
 
 static int __init l2tp_ip_init(void)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6f2bbfeec3a4..baa4e770e4ba 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1171,7 +1171,6 @@ static const struct net_protocol sctp_protocol = {
 	.handler     = sctp4_rcv,
 	.err_handler = sctp_v4_err,
 	.no_policy   = 1,
-	.netns_ok    = 1,
 	.icmp_strict_tag_validation = 1,
 };
 

From d6b0625163a8948341d12cac420402a31093b5ed Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Mon, 17 May 2021 12:40:34 -0300
Subject: [PATCH 0244/3292] net: stmmac: Don't set has_gmac if has_gmac4 is set

Some Rockchip platforms have a GMAC4 core, and therefore
'plat_stmmacenet_data.has_gmac' shouldn't be set if
'plat_stmmacenet_data.has_gmac4' is set.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 584db4ce6e39..56034f21fcef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1448,7 +1448,11 @@ static int rk_gmac_probe(struct platform_device *pdev)
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
-	plat_dat->has_gmac = true;
+	/* If the stmmac is not already selected as gmac4,
+	 * then make sure we fallback to gmac.
+	 */
+	if (!plat_dat->has_gmac4)
+		plat_dat->has_gmac = true;
 	plat_dat->fix_mac_speed = rk_fix_speed;
 
 	plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data);

From 37c80d15ff4bf7526caf5de8b8cab17ac8769d4c Mon Sep 17 00:00:00 2001
From: David Wu <david.wu@rock-chips.com>
Date: Mon, 17 May 2021 12:40:35 -0300
Subject: [PATCH 0245/3292] net: stmmac: dwmac-rk: Check platform-specific ops

Add a check for non-null struct rk_gmac_ops for the
configured PHY interface mode, failing if unsupported.

Signed-off-by: David Wu <david.wu@rock-chips.com>
[Ezequiel: Refactor so it fails if unsupported]
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-rk.c    | 31 +++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 56034f21fcef..791c13d47a35 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1342,11 +1342,36 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
 	return bsp_priv;
 }
 
+static int rk_gmac_check_ops(struct rk_priv_data *bsp_priv)
+{
+	switch (bsp_priv->phy_iface) {
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		if (!bsp_priv->ops->set_to_rgmii)
+			return -EINVAL;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		if (!bsp_priv->ops->set_to_rmii)
+			return -EINVAL;
+		break;
+	default:
+		dev_err(&bsp_priv->pdev->dev,
+			"unsupported interface %d", bsp_priv->phy_iface);
+	}
+	return 0;
+}
+
 static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
 {
 	int ret;
 	struct device *dev = &bsp_priv->pdev->dev;
 
+	ret = rk_gmac_check_ops(bsp_priv);
+	if (ret)
+		return ret;
+
 	ret = gmac_clk_enable(bsp_priv, true);
 	if (ret)
 		return ret;
@@ -1417,10 +1442,12 @@ static void rk_fix_speed(void *priv, unsigned int speed)
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_TXID:
-		bsp_priv->ops->set_rgmii_speed(bsp_priv, speed);
+		if (bsp_priv->ops->set_rgmii_speed)
+			bsp_priv->ops->set_rgmii_speed(bsp_priv, speed);
 		break;
 	case PHY_INTERFACE_MODE_RMII:
-		bsp_priv->ops->set_rmii_speed(bsp_priv, speed);
+		if (bsp_priv->ops->set_rmii_speed)
+			bsp_priv->ops->set_rmii_speed(bsp_priv, speed);
 		break;
 	default:
 		dev_err(dev, "unsupported interface %d", bsp_priv->phy_iface);

From f9da1c9d7fb5e26272a060089c19823f748aab73 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Mon, 17 May 2021 12:40:36 -0300
Subject: [PATCH 0246/3292] dt-bindings: net: rockchip-dwmac: add rk3568
 compatible string

Add compatible string for RK3568 gmac, and constrain it to
be compatible with Synopsys dwmac 4.20a.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/rockchip-dwmac.yaml          | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 34a660ad6b30..083623c8d718 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -24,6 +24,7 @@ select:
           - rockchip,rk3366-gmac
           - rockchip,rk3368-gmac
           - rockchip,rk3399-gmac
+          - rockchip,rk3568-gmac
           - rockchip,rv1108-gmac
   required:
     - compatible
@@ -33,18 +34,23 @@ allOf:
 
 properties:
   compatible:
-    items:
-      - enum:
-          - rockchip,px30-gmac
-          - rockchip,rk3128-gmac
-          - rockchip,rk3228-gmac
-          - rockchip,rk3288-gmac
-          - rockchip,rk3308-gmac
-          - rockchip,rk3328-gmac
-          - rockchip,rk3366-gmac
-          - rockchip,rk3368-gmac
-          - rockchip,rk3399-gmac
-          - rockchip,rv1108-gmac
+    oneOf:
+      - items:
+          - enum:
+              - rockchip,px30-gmac
+              - rockchip,rk3128-gmac
+              - rockchip,rk3228-gmac
+              - rockchip,rk3288-gmac
+              - rockchip,rk3308-gmac
+              - rockchip,rk3328-gmac
+              - rockchip,rk3366-gmac
+              - rockchip,rk3368-gmac
+              - rockchip,rk3399-gmac
+              - rockchip,rv1108-gmac
+      - items:
+          - enum:
+              - rockchip,rk3568-gmac
+          - const: snps,dwmac-4.20a
 
   clocks:
     minItems: 5

From 3bb3d6b1c1957e88bfc5e77a4557f7e6ba761fe3 Mon Sep 17 00:00:00 2001
From: David Wu <david.wu@rock-chips.com>
Date: Mon, 17 May 2021 12:40:37 -0300
Subject: [PATCH 0247/3292] net: stmmac: Add RK3566/RK3568 SoC support

Add constants and callback functions for the dwmac present
on RK3566/RK3568 SoCs.

RK3568 has two MACs, and RK3566 just one, but it's otherwise
the same IP core.

Signed-off-by: David Wu <david.wu@rock-chips.com>
[Ezequiel: Separate rk3566-gmac support]
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-rk.c    | 121 ++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 791c13d47a35..280ac0129572 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -33,11 +33,13 @@ struct rk_gmac_ops {
 	void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed);
 	void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed);
 	void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv);
+	u32 regs[];
 };
 
 struct rk_priv_data {
 	struct platform_device *pdev;
 	phy_interface_t phy_iface;
+	int id;
 	struct regulator *regulator;
 	bool suspended;
 	const struct rk_gmac_ops *ops;
@@ -996,6 +998,107 @@ static const struct rk_gmac_ops rk3399_ops = {
 	.set_rmii_speed = rk3399_set_rmii_speed,
 };
 
+#define RK3568_GRF_GMAC0_CON0		0x0380
+#define RK3568_GRF_GMAC0_CON1		0x0384
+#define RK3568_GRF_GMAC1_CON0		0x0388
+#define RK3568_GRF_GMAC1_CON1		0x038c
+
+/* RK3568_GRF_GMAC0_CON1 && RK3568_GRF_GMAC1_CON1 */
+#define RK3568_GMAC_PHY_INTF_SEL_RGMII	\
+		(GRF_BIT(4) | GRF_CLR_BIT(5) | GRF_CLR_BIT(6))
+#define RK3568_GMAC_PHY_INTF_SEL_RMII	\
+		(GRF_CLR_BIT(4) | GRF_CLR_BIT(5) | GRF_BIT(6))
+#define RK3568_GMAC_FLOW_CTRL			GRF_BIT(3)
+#define RK3568_GMAC_FLOW_CTRL_CLR		GRF_CLR_BIT(3)
+#define RK3568_GMAC_RXCLK_DLY_ENABLE		GRF_BIT(1)
+#define RK3568_GMAC_RXCLK_DLY_DISABLE		GRF_CLR_BIT(1)
+#define RK3568_GMAC_TXCLK_DLY_ENABLE		GRF_BIT(0)
+#define RK3568_GMAC_TXCLK_DLY_DISABLE		GRF_CLR_BIT(0)
+
+/* RK3568_GRF_GMAC0_CON0 && RK3568_GRF_GMAC1_CON0 */
+#define RK3568_GMAC_CLK_RX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3568_GMAC_CLK_TX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3568_set_to_rgmii(struct rk_priv_data *bsp_priv,
+				int tx_delay, int rx_delay)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	u32 con0, con1;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "Missing rockchip,grf property\n");
+		return;
+	}
+
+	con0 = (bsp_priv->id == 1) ? RK3568_GRF_GMAC1_CON0 :
+				     RK3568_GRF_GMAC0_CON0;
+	con1 = (bsp_priv->id == 1) ? RK3568_GRF_GMAC1_CON1 :
+				     RK3568_GRF_GMAC0_CON1;
+
+	regmap_write(bsp_priv->grf, con0,
+		     RK3568_GMAC_CLK_RX_DL_CFG(rx_delay) |
+		     RK3568_GMAC_CLK_TX_DL_CFG(tx_delay));
+
+	regmap_write(bsp_priv->grf, con1,
+		     RK3568_GMAC_PHY_INTF_SEL_RGMII |
+		     RK3568_GMAC_RXCLK_DLY_ENABLE |
+		     RK3568_GMAC_TXCLK_DLY_ENABLE);
+}
+
+static void rk3568_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	u32 con1;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	con1 = (bsp_priv->id == 1) ? RK3568_GRF_GMAC1_CON1 :
+				     RK3568_GRF_GMAC0_CON1;
+	regmap_write(bsp_priv->grf, con1, RK3568_GMAC_PHY_INTF_SEL_RMII);
+}
+
+static void rk3568_set_gmac_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	unsigned long rate;
+	int ret;
+
+	switch (speed) {
+	case 10:
+		rate = 2500000;
+		break;
+	case 100:
+		rate = 25000000;
+		break;
+	case 1000:
+		rate = 125000000;
+		break;
+	default:
+		dev_err(dev, "unknown speed value for GMAC speed=%d", speed);
+		return;
+	}
+
+	ret = clk_set_rate(bsp_priv->clk_mac_speed, rate);
+	if (ret)
+		dev_err(dev, "%s: set clk_mac_speed rate %ld failed %d\n",
+			__func__, rate, ret);
+}
+
+static const struct rk_gmac_ops rk3568_ops = {
+	.set_to_rgmii = rk3568_set_to_rgmii,
+	.set_to_rmii = rk3568_set_to_rmii,
+	.set_rgmii_speed = rk3568_set_gmac_speed,
+	.set_rmii_speed = rk3568_set_gmac_speed,
+	.regs = {
+		0xfe2a0000, /* gmac0 */
+		0xfe010000, /* gmac1 */
+		0x0, /* sentinel */
+	},
+};
+
 #define RV1108_GRF_GMAC_CON0		0X0900
 
 /* RV1108_GRF_GMAC_CON0 */
@@ -1264,6 +1367,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
 {
 	struct rk_priv_data *bsp_priv;
 	struct device *dev = &pdev->dev;
+	struct resource *res;
 	int ret;
 	const char *strings = NULL;
 	int value;
@@ -1275,6 +1379,22 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
 	of_get_phy_mode(dev->of_node, &bsp_priv->phy_iface);
 	bsp_priv->ops = ops;
 
+	/* Some SoCs have multiple MAC controllers, which need
+	 * to be distinguished.
+	 */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res) {
+		int i = 0;
+
+		while (ops->regs[i]) {
+			if (ops->regs[i] == res->start) {
+				bsp_priv->id = i;
+				break;
+			}
+			i++;
+		}
+	}
+
 	bsp_priv->regulator = devm_regulator_get_optional(dev, "phy");
 	if (IS_ERR(bsp_priv->regulator)) {
 		if (PTR_ERR(bsp_priv->regulator) == -EPROBE_DEFER) {
@@ -1561,6 +1681,7 @@ static const struct of_device_id rk_gmac_dwmac_match[] = {
 	{ .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops },
 	{ .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },
 	{ .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops },
+	{ .compatible = "rockchip,rk3568-gmac", .data = &rk3568_ops },
 	{ .compatible = "rockchip,rv1108-gmac", .data = &rv1108_ops },
 	{ }
 };

From b3e22e10fdda8e7be3830289a4a63ae8b88d450c Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Mon, 17 May 2021 11:53:34 +0200
Subject: [PATCH 0248/3292] net: wwan: Add WWAN port type attribute

The port type is by default part of the WWAN port device name.
However device name can not be considered as a 'stable' API and
may be subject to change in the future. This change adds a proper
device attribute that can be used to determine the WWAN protocol/
type.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index cff04e532c1e..92a8a6ffc567 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -169,6 +169,30 @@ static void wwan_remove_dev(struct wwan_device *wwandev)
 
 /* ------- WWAN port management ------- */
 
+/* Keep aligned with wwan_port_type enum */
+static const char * const wwan_port_type_str[] = {
+	"AT",
+	"MBIM",
+	"QMI",
+	"QCDM",
+	"FIREHOSE"
+};
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct wwan_port *port = to_wwan_port(dev);
+
+	return sprintf(buf, "%s\n", wwan_port_type_str[port->type]);
+}
+static DEVICE_ATTR_RO(type);
+
+static struct attribute *wwan_port_attrs[] = {
+	&dev_attr_type.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(wwan_port);
+
 static void wwan_port_destroy(struct device *dev)
 {
 	struct wwan_port *port = to_wwan_port(dev);
@@ -182,6 +206,7 @@ static void wwan_port_destroy(struct device *dev)
 static const struct device_type wwan_port_dev_type = {
 	.name = "wwan_port",
 	.release = wwan_port_destroy,
+	.groups = wwan_port_groups,
 };
 
 static int wwan_port_minor_match(struct device *dev, const void *minor)
@@ -201,15 +226,6 @@ static struct wwan_port *wwan_port_get_by_minor(unsigned int minor)
 	return to_wwan_port(dev);
 }
 
-/* Keep aligned with wwan_port_type enum */
-static const char * const wwan_port_type_str[] = {
-	"AT",
-	"MBIM",
-	"QMI",
-	"QCDM",
-	"FIREHOSE"
-};
-
 struct wwan_port *wwan_create_port(struct device *parent,
 				   enum wwan_port_type type,
 				   const struct wwan_port_ops *ops,

From 9d8a29aed03539a9012bff1232bacf062b5459cf Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 May 2021 11:57:56 +0300
Subject: [PATCH 0249/3292] alx: fix a double unlock in alx_probe()

We're not holding the lock at this point so "goto unlock;" should be
"goto unmap;"

Fixes: 4a5fe57e7751 ("alx: use fine-grained locking instead of RTNL")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index e8884879a50f..45e380f3b065 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1859,7 +1859,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "register netdevice failed\n");
-		goto out_unlock;
+		goto out_unmap;
 	}
 
 	netdev_info(netdev,

From 7617af3d1a5e0938eb1fd2742f19bcea772c7f8d Mon Sep 17 00:00:00 2001
From: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Date: Mon, 17 May 2021 17:43:31 +0800
Subject: [PATCH 0250/3292] net: pcs: Introducing support for DWC xpcs Energy
 Efficient Ethernet

Add DWC xpcs EEE support callbacks.The callback function is used to
set EEE registers on xpcs.

xpcs transparent mode is enabled to allow PHY to detect MAC EEE status.

Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c   | 51 ++++++++++++++++++++++++++++++++++++
 include/linux/pcs/pcs-xpcs.h |  2 ++
 2 files changed, 53 insertions(+)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 944ba105cac1..aa985a5aae8d 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -63,6 +63,9 @@
 #define DW_VR_MII_DIG_CTRL1		0x8000
 #define DW_VR_MII_AN_CTRL		0x8001
 #define DW_VR_MII_AN_INTR_STS		0x8002
+/* EEE Mode Control Register */
+#define DW_VR_MII_EEE_MCTRL0		0x8006
+#define DW_VR_MII_EEE_MCTRL1		0x800b
 
 /* VR_MII_DIG_CTRL1 */
 #define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW		BIT(9)
@@ -86,6 +89,20 @@
 #define DW_VR_MII_C37_ANSGM_SP_1000		0x2
 #define DW_VR_MII_C37_ANSGM_SP_LNKSTS		BIT(4)
 
+/* VR MII EEE Control 0 defines */
+#define DW_VR_MII_EEE_LTX_EN		BIT(0)  /* LPI Tx Enable */
+#define DW_VR_MII_EEE_LRX_EN		BIT(1)  /* LPI Rx Enable */
+#define DW_VR_MII_EEE_TX_QUIET_EN		BIT(2)  /* Tx Quiet Enable */
+#define DW_VR_MII_EEE_RX_QUIET_EN		BIT(3)  /* Rx Quiet Enable */
+#define DW_VR_MII_EEE_TX_EN_CTRL		BIT(4)  /* Tx Control Enable */
+#define DW_VR_MII_EEE_RX_EN_CTRL		BIT(7)  /* Rx Control Enable */
+
+#define DW_VR_MII_EEE_MULT_FACT_100NS_SHIFT	8
+#define DW_VR_MII_EEE_MULT_FACT_100NS		GENMASK(11, 8)
+
+/* VR MII EEE Control 1 defines */
+#define DW_VR_MII_EEE_TRN_LPI		BIT(0)	/* Transparent Mode Enable */
+
 static const int xpcs_usxgmii_features[] = {
 	ETHTOOL_LINK_MODE_Pause_BIT,
 	ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -650,6 +667,39 @@ static int xpcs_validate(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
+static int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
+			   int enable)
+{
+	int ret;
+
+	if (enable) {
+	/* Enable EEE */
+		ret = DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN |
+		      DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN |
+		      DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL |
+		      mult_fact_100ns << DW_VR_MII_EEE_MULT_FACT_100NS_SHIFT;
+	} else {
+		ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0);
+		if (ret < 0)
+			return ret;
+		ret &= ~(DW_VR_MII_EEE_LTX_EN | DW_VR_MII_EEE_LRX_EN |
+		       DW_VR_MII_EEE_TX_QUIET_EN | DW_VR_MII_EEE_RX_QUIET_EN |
+		       DW_VR_MII_EEE_TX_EN_CTRL | DW_VR_MII_EEE_RX_EN_CTRL |
+		       DW_VR_MII_EEE_MULT_FACT_100NS);
+	}
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL0, ret);
+	if (ret < 0)
+		return ret;
+
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret |= DW_VR_MII_EEE_TRN_LPI;
+	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1, ret);
+}
+
 static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
 {
 	int ret;
@@ -908,6 +958,7 @@ static struct mdio_xpcs_ops xpcs_ops = {
 	.get_state = xpcs_get_state,
 	.link_up = xpcs_link_up,
 	.probe = xpcs_probe,
+	.config_eee = xpcs_config_eee,
 };
 
 struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 2cb5188a7ef1..5938ced805f4 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -32,6 +32,8 @@ struct mdio_xpcs_ops {
 	int (*link_up)(struct mdio_xpcs_args *xpcs, int speed,
 		       phy_interface_t interface);
 	int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
+	int (*config_eee)(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
+			  int enable);
 };
 
 #if IS_ENABLED(CONFIG_PCS_XPCS)

From e80fe71b3ffe1ec31c4a9be60170f897bbdf1b92 Mon Sep 17 00:00:00 2001
From: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Date: Mon, 17 May 2021 17:43:32 +0800
Subject: [PATCH 0251/3292] net: stmmac: Add callbacks for DWC xpcs Energy
 Efficient Ethernet

Link xpcs callback functions for MAC to configure the xpcs EEE feature.

The clk_eee frequency is used to calculate the MULT_FACT_100NS. This is
to adjust the clock tic closer to 100ns.

Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c    | 11 +++++++++++
 drivers/net/ethernet/stmicro/stmmac/hwif.h           |  2 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c |  6 ++++++
 include/linux/stmmac.h                               |  1 +
 4 files changed, 20 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 80728a4c0e3f..e36a8cc59ad0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -429,6 +429,17 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->force_sf_dma_mode = 0;
 	plat->tso_en = 1;
 
+	/* Multiplying factor to the clk_eee_i clock time
+	 * period to make it closer to 100 ns. This value
+	 * should be programmed such that the clk_eee_time_period *
+	 * (MULT_FACT_100NS + 1) should be within 80 ns to 120 ns
+	 * clk_eee frequency is 19.2Mhz
+	 * clk_eee_time_period is 52ns
+	 * 52ns * (1 + 1) = 104ns
+	 * MULT_FACT_100NS = 1
+	 */
+	plat->mult_fact_100ns = 1;
+
 	plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
 
 	for (i = 0; i < plat->rx_queues_to_use; i++) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 6d5e0f2b03ce..75a8b90c202a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -623,6 +623,8 @@ struct stmmac_mmc_ops {
 	stmmac_do_callback(__priv, xpcs, link_up, __args)
 #define stmmac_xpcs_probe(__priv, __args...) \
 	stmmac_do_callback(__priv, xpcs, probe, __args)
+#define stmmac_xpcs_config_eee(__priv, __args...) \
+	stmmac_do_callback(__priv, xpcs, config_eee, __args)
 
 struct stmmac_regs_off {
 	u32 ptp_off;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 61b11639ee0c..1f6d749fd9a3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -720,6 +720,12 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 		netdev_warn(priv->dev,
 			    "Setting EEE tx-lpi is not supported\n");
 
+	ret = stmmac_xpcs_config_eee(priv, &priv->hw->xpcs_args,
+				     priv->plat->mult_fact_100ns,
+				     edata->eee_enabled);
+	if (ret)
+		return ret;
+
 	if (!edata->eee_enabled)
 		stmmac_disable_eee_mode(priv);
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 0db36360ef21..e14a12df381b 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -223,6 +223,7 @@ struct plat_stmmacenet_data {
 	struct clk *clk_ptp_ref;
 	unsigned int clk_ptp_rate;
 	unsigned int clk_ref_rate;
+	unsigned int mult_fact_100ns;
 	s32 ptp_max_adj;
 	struct reset_control *stmmac_rst;
 	struct stmmac_axi *axi;

From 25c55b38d85b54e49f2c9a3c7d483d1a24173b94 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 17 May 2021 18:15:25 +0800
Subject: [PATCH 0252/3292] net/packet: Remove redundant assignment to ret

Variable ret is set to '0' or '-EBUSY', but this value is never read
as it is not used later on, hence it is a redundant assignment and
can be removed.

Clean up the following clang-analyzer warning:

net/packet/af_packet.c:3936:4: warning: Value stored to 'ret' is never
read [clang-analyzer-deadcode.DeadStores].

net/packet/af_packet.c:3933:4: warning: Value stored to 'ret' is never
read [clang-analyzer-deadcode.DeadStores].

No functional change.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ba96db1880ea..597d798ac0a5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3923,12 +3923,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
 			return -EFAULT;
 
 		lock_sock(sk);
-		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
-			ret = -EBUSY;
-		} else {
+		if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
 			po->tp_tx_has_off = !!val;
-			ret = 0;
-		}
+
 		release_sock(sk);
 		return 0;
 	}

From 06b38e233ce4745571106cba4f39fc8c5eda9c29 Mon Sep 17 00:00:00 2001
From: Juerg Haefliger <juerg.haefliger@canonical.com>
Date: Mon, 17 May 2021 11:58:33 +0200
Subject: [PATCH 0253/3292] drivers/net: Remove leading spaces in Kconfig

Remove leading spaces before tabs in Kconfig file(s) by running the
following command:

  $ find drivers/net -name 'Kconfig*' | xargs sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Juerg Haefliger <juergh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/Kconfig | 10 +++++-----
 drivers/net/wan/Kconfig |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index fbbe78643631..179308782888 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -169,7 +169,7 @@ config USB_NET_AX8817X
 	  This option adds support for ASIX AX88xxx based USB 2.0
 	  10/100 Ethernet adapters.
 
- 	  This driver should work with at least the following devices:
+	  This driver should work with at least the following devices:
 	    * Aten UC210T
 	    * ASIX AX88172
 	    * Billionton Systems, USB2AR
@@ -220,13 +220,13 @@ config USB_NET_CDCETHER
 	  CDC Ethernet is an implementation option for DOCSIS cable modems
 	  that support USB connectivity, used for non-Microsoft USB hosts.
 	  The Linux-USB CDC Ethernet Gadget driver is an open implementation.
- 	  This driver should work with at least the following devices:
+	  This driver should work with at least the following devices:
 
 	    * Dell Wireless 5530 HSPA
- 	    * Ericsson PipeRider (all variants)
+	    * Ericsson PipeRider (all variants)
 	    * Ericsson Mobile Broadband Module (all variants)
- 	    * Motorola (DM100 and SB4100)
- 	    * Broadcom Cable Modem (reference design)
+	    * Motorola (DM100 and SB4100)
+	    * Broadcom Cable Modem (reference design)
 	    * Toshiba (PCX1100U and F3507g/F3607gw)
 	    * ...
 
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 83c9481995dd..473df2505c8e 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -49,7 +49,7 @@ config COSA
 	  network device.
 
 	  You will need user-space utilities COSA or SRP boards for downloading
- 	  the firmware to the cards and to set them up. Look at the
+	  the firmware to the cards and to set them up. Look at the
 	  <http://www.fi.muni.cz/~kas/cosa/> for more information. You can also
 	  read the comment at the top of the <file:drivers/net/wan/cosa.c> for
 	  details about the cards and the driver itself.
@@ -108,7 +108,7 @@ config HDLC
 	  Generic HDLC driver currently supports raw HDLC, Cisco HDLC, Frame
 	  Relay, synchronous Point-to-Point Protocol (PPP) and X.25.
 
- 	  To compile this driver as a module, choose M here: the
+	  To compile this driver as a module, choose M here: the
 	  module will be called hdlc.
 
 	  If unsure, say N.

From c6a8625fa4c6b0a97860d053271660ccedc3d1b3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 14 May 2021 15:01:16 +0800
Subject: [PATCH 0254/3292] hv_utils: Fix passing zero to 'PTR_ERR' warning

Sparse warn this:

drivers/hv/hv_util.c:753 hv_timesync_init() warn:
 passing zero to 'PTR_ERR'

Use PTR_ERR_OR_ZERO instead of PTR_ERR to fix this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20210514070116.16800-1-yuehaibing@huawei.com
[ wei: change %ld to %d ]
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/hv_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index e4aefeb330da..136576cba26f 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -750,8 +750,8 @@ static int hv_timesync_init(struct hv_util_service *srv)
 	 */
 	hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
 	if (IS_ERR_OR_NULL(hv_ptp_clock)) {
-		pr_err("cannot register PTP clock: %ld\n",
-		       PTR_ERR(hv_ptp_clock));
+		pr_err("cannot register PTP clock: %d\n",
+		       PTR_ERR_OR_ZERO(hv_ptp_clock));
 		hv_ptp_clock = NULL;
 	}
 

From 2e2f16d5cdb33e5f6fc53b7ad66c9f456d5f2950 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Apr 2021 15:54:53 +0200
Subject: [PATCH 0255/3292] EDAC/aspeed: Use proper format string for printing
 resource

On ARMv7, resource_size_t can be 64-bit, which breaks printing
it as %x:

  drivers/edac/aspeed_edac.c: In function 'init_csrows':
  drivers/edac/aspeed_edac.c:257:28: error: format '%x' expects argument of \
    type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long \
    long unsigned int'} [-Werror=format=]
  257 |         dev_dbg(mci->pdev, "dt: /memory node resources: first page \
    r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",

Use the special %pR format string to pretty-print the entire resource
instead.

Fixes: edfc2d73ca45 ("EDAC/aspeed: Add support for AST2400 and AST2600")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Link: https://lkml.kernel.org/r/20210421135500.3518661-1-arnd@kernel.org
---
 drivers/edac/aspeed_edac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c
index a46da56d6d54..6bd5f8815919 100644
--- a/drivers/edac/aspeed_edac.c
+++ b/drivers/edac/aspeed_edac.c
@@ -254,8 +254,8 @@ static int init_csrows(struct mem_ctl_info *mci)
 		return rc;
 	}
 
-	dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",
-		r.start, resource_size(&r), PAGE_SHIFT);
+	dev_dbg(mci->pdev, "dt: /memory node resources: first page %pR, PAGE_SHIFT macro=0x%x\n",
+		&r, PAGE_SHIFT);
 
 	csrow->first_page = r.start >> PAGE_SHIFT;
 	nr_pages = resource_size(&r) >> PAGE_SHIFT;

From 119220d81258c1e79db9aa7b52ef09b945aaf46f Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Tue, 18 May 2021 16:56:10 +0800
Subject: [PATCH 0256/3292] bpf, arm64: Remove redundant switch case about
 BPF_DIV and BPF_MOD

After commit 96a71005bdcb ("bpf, arm64: remove obsolete exception handling
from div/mod"), there is no need to check twice about BPF_DIV and BPF_MOD,
remove the redundant switch case.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1621328170-17583-1-git-send-email-yangtiezhu@loongson.cn
---
 arch/arm64/net/bpf_jit_comp.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 978502629f4e..be873a7da62b 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -485,17 +485,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		break;
 	case BPF_ALU | BPF_DIV | BPF_X:
 	case BPF_ALU64 | BPF_DIV | BPF_X:
+		emit(A64_UDIV(is64, dst, dst, src), ctx);
+		break;
 	case BPF_ALU | BPF_MOD | BPF_X:
 	case BPF_ALU64 | BPF_MOD | BPF_X:
-		switch (BPF_OP(code)) {
-		case BPF_DIV:
-			emit(A64_UDIV(is64, dst, dst, src), ctx);
-			break;
-		case BPF_MOD:
-			emit(A64_UDIV(is64, tmp, dst, src), ctx);
-			emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
-			break;
-		}
+		emit(A64_UDIV(is64, tmp, dst, src), ctx);
+		emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
 		break;
 	case BPF_ALU | BPF_LSH | BPF_X:
 	case BPF_ALU64 | BPF_LSH | BPF_X:

From c49661aa6f7097047b7e86ad37b1cf308a7a8d4f Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Sun, 16 May 2021 19:23:48 -0700
Subject: [PATCH 0257/3292] skmsg: Remove unused parameters of
 sk_msg_wait_data()

'err' and 'flags' are not used, we can just get rid of them.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <song@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210517022348.50555-1-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 3 +--
 net/core/skmsg.c      | 3 +--
 net/ipv4/tcp_bpf.c    | 9 ++-------
 net/ipv4/udp_bpf.c    | 8 ++------
 4 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index aba0f0f429be..fcaa9a7996c8 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -126,8 +126,7 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			      struct sk_msg *msg, u32 bytes);
 int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 			     struct sk_msg *msg, u32 bytes);
-int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
-		     long timeo, int *err);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo);
 int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 		   int len, int flags);
 
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 43ce17a6a585..f0b9decdf279 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,8 +399,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
 
-int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
-		     long timeo, int *err)
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	int ret = 0;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ad9d17923fc5..a80de92ea3b6 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -184,11 +184,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 msg_bytes_ready:
 	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
 	if (!copied) {
-		int data, err = 0;
 		long timeo;
+		int data;
 
 		timeo = sock_rcvtimeo(sk, nonblock);
-		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		data = sk_msg_wait_data(sk, psock, timeo);
 		if (data) {
 			if (!sk_psock_queue_empty(psock))
 				goto msg_bytes_ready;
@@ -196,14 +196,9 @@ msg_bytes_ready:
 			sk_psock_put(sk, psock);
 			return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 		}
-		if (err) {
-			ret = err;
-			goto out;
-		}
 		copied = -EAGAIN;
 	}
 	ret = copied;
-out:
 	release_sock(sk);
 	sk_psock_put(sk, psock);
 	return ret;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 954c4591a6fd..b07e4b6dda25 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -43,21 +43,17 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 msg_bytes_ready:
 	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
 	if (!copied) {
-		int data, err = 0;
 		long timeo;
+		int data;
 
 		timeo = sock_rcvtimeo(sk, nonblock);
-		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		data = sk_msg_wait_data(sk, psock, timeo);
 		if (data) {
 			if (!sk_psock_queue_empty(psock))
 				goto msg_bytes_ready;
 			ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 			goto out;
 		}
-		if (err) {
-			ret = err;
-			goto out;
-		}
 		copied = -EAGAIN;
 	}
 	ret = copied;

From 0169d8f33ab7a58675a94c18122dba58d8f6a1b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 25 Mar 2021 18:42:50 -0600
Subject: [PATCH 0258/3292] Revert "Smack: Handle io_uring kernel thread
 privileges"

This reverts commit 942cb357ae7d9249088e3687ee6a00ed2745a0c7.

The io_uring PF_IO_WORKER threads no longer have PF_KTHREAD set, so no
need to special case them for credential checks.

Cc: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack_access.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 7eabb448acab..efe2406a3960 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -688,10 +688,9 @@ bool smack_privileged_cred(int cap, const struct cred *cred)
 bool smack_privileged(int cap)
 {
 	/*
-	 * Kernel threads may not have credentials we can use.
-	 * The io_uring kernel threads do have reliable credentials.
+	 * All kernel tasks are privileged
 	 */
-	if ((current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD)
+	if (unlikely(current->flags & PF_KTHREAD))
 		return true;
 
 	return smack_privileged_cred(cap, current_cred());

From fa7b83bf3b156c767f3e4a25bbf3817b08f3ff8e Mon Sep 17 00:00:00 2001
From: Dongseok Yi <dseok.yi@samsung.com>
Date: Wed, 12 May 2021 16:27:33 +0900
Subject: [PATCH 0259/3292] bpf: Check for BPF_F_ADJ_ROOM_FIXED_GSO when
 bpf_skb_change_proto

In the forwarding path GRO -> BPF 6 to 4 -> GSO for TCP traffic, the
coalesced packet payload can be > MSS, but < MSS + 20.

bpf_skb_proto_6_to_4() will upgrade the MSS and it can be > the payload
length. After then tcp_gso_segment checks for the payload length if it
is <= MSS. The condition is causing the packet to be dropped.

tcp_gso_segment():
        [...]
        mss = skb_shinfo(skb)->gso_size;
        if (unlikely(skb->len <= mss))
                goto out;
        [...]

Allow to upgrade/downgrade MSS only when BPF_F_ADJ_ROOM_FIXED_GSO is
not set.

Signed-off-by: Dongseok Yi <dseok.yi@samsung.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/1620804453-57566-1-git-send-email-dseok.yi@samsung.com
---
 net/core/filter.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a670..582ac196fd94 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3235,7 +3235,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
 	return ret;
 }
 
-static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
 	u32 off = skb_mac_header_len(skb);
@@ -3264,7 +3264,9 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 		}
 
 		/* Due to IPv6 header, MSS needs to be downgraded. */
-		skb_decrease_gso_size(shinfo, len_diff);
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			skb_decrease_gso_size(shinfo, len_diff);
+
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -3276,7 +3278,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	return 0;
 }
 
-static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
 	u32 off = skb_mac_header_len(skb);
@@ -3305,7 +3307,9 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 		}
 
 		/* Due to IPv4 header, MSS can be upgraded. */
-		skb_increase_gso_size(shinfo, len_diff);
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+			skb_increase_gso_size(shinfo, len_diff);
+
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -3317,17 +3321,17 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	return 0;
 }
 
-static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u64 flags)
 {
 	__be16 from_proto = skb->protocol;
 
 	if (from_proto == htons(ETH_P_IP) &&
 	      to_proto == htons(ETH_P_IPV6))
-		return bpf_skb_proto_4_to_6(skb);
+		return bpf_skb_proto_4_to_6(skb, flags);
 
 	if (from_proto == htons(ETH_P_IPV6) &&
 	      to_proto == htons(ETH_P_IP))
-		return bpf_skb_proto_6_to_4(skb);
+		return bpf_skb_proto_6_to_4(skb, flags);
 
 	return -ENOTSUPP;
 }
@@ -3337,7 +3341,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
 {
 	int ret;
 
-	if (unlikely(flags))
+	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO)))
 		return -EINVAL;
 
 	/* General idea is that this helper does the basic groundwork
@@ -3357,7 +3361,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
 	 * that. For offloads, we mark packet as dodgy, so that headers
 	 * need to be verified first.
 	 */
-	ret = bpf_skb_proto_xlat(skb, proto);
+	ret = bpf_skb_proto_xlat(skb, proto, flags);
 	bpf_compute_data_pointers(skb);
 	return ret;
 }

From 2e68ea92684181412b73979baf1af7d04619c52c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:17 +0300
Subject: [PATCH 0260/3292] ipv4: Calculate multipath hash inside switch
 statement

A subsequent patch will add another multipath hash policy where the
multipath hash is calculated directly by the policy specific code and
not outside of the switch statement.

Prepare for this change by moving the multipath hash calculation inside
the switch statement.

No functional changes intended.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6787c55f6ab..9d61e969446e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1912,7 +1912,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 {
 	u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
 	struct flow_keys hash_keys;
-	u32 mhash;
+	u32 mhash = 0;
 
 	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
 	case 0:
@@ -1924,6 +1924,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			hash_keys.addrs.v4addrs.src = fl4->saddr;
 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
 		}
+		mhash = flow_hash_from_keys(&hash_keys);
 		break;
 	case 1:
 		/* skb is currently provided only when forwarding */
@@ -1957,6 +1958,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			hash_keys.ports.dst = fl4->fl4_dport;
 			hash_keys.basic.ip_proto = fl4->flowi4_proto;
 		}
+		mhash = flow_hash_from_keys(&hash_keys);
 		break;
 	case 2:
 		memset(&hash_keys, 0, sizeof(hash_keys));
@@ -1987,9 +1989,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			hash_keys.addrs.v4addrs.src = fl4->saddr;
 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
 		}
+		mhash = flow_hash_from_keys(&hash_keys);
 		break;
 	}
-	mhash = flow_hash_from_keys(&hash_keys);
 
 	if (multipath_hash)
 		mhash = jhash_2words(mhash, multipath_hash, 0);

From ce5c9c20d364f156c885efed8c71fca2945db00f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:18 +0300
Subject: [PATCH 0261/3292] ipv4: Add a sysctl to control multipath hash fields

A subsequent patch will add a new multipath hash policy where the packet
fields used for multipath hash calculation are determined by user space.
This patch adds a sysctl that allows user space to set these fields.

The packet fields are represented using a bitmask and are common between
IPv4 and IPv6 to allow user space to use the same numbering across both
protocols. For example, to hash based on standard 5-tuple:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x0037
 net.ipv4.fib_multipath_hash_fields = 0x0037

The kernel rejects unknown fields, for example:

 # sysctl -w net.ipv4.fib_multipath_hash_fields=0x1000
 sysctl: setting key "net.ipv4.fib_multipath_hash_fields": Invalid argument

More fields can be added in the future, if needed.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst | 27 ++++++++++++++++
 include/net/ip_fib.h                   | 43 ++++++++++++++++++++++++++
 include/net/netns/ipv4.h               |  1 +
 net/ipv4/fib_frontend.c                |  6 ++++
 net/ipv4/sysctl_net_ipv4.c             | 12 +++++++
 5 files changed, 89 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index c2ecc9894fd0..47494798d03b 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -100,6 +100,33 @@ fib_multipath_hash_policy - INTEGER
 	- 1 - Layer 4
 	- 2 - Layer 3 or inner Layer 3 if present
 
+fib_multipath_hash_fields - UNSIGNED INTEGER
+	When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
+	fields used for multipath hash calculation are determined by this
+	sysctl.
+
+	This value is a bitmask which enables various fields for multipath hash
+	calculation.
+
+	Possible fields are:
+
+	====== ============================
+	0x0001 Source IP address
+	0x0002 Destination IP address
+	0x0004 IP protocol
+	0x0008 Unused (Flow Label)
+	0x0010 Source port
+	0x0020 Destination port
+	0x0040 Inner source IP address
+	0x0080 Inner destination IP address
+	0x0100 Inner IP protocol
+	0x0200 Inner Flow Label
+	0x0400 Inner source port
+	0x0800 Inner destination port
+	====== ============================
+
+	Default: 0x0007 (source IP, destination IP and IP protocol)
+
 fib_sync_mem - UNSIGNED INTEGER
 	Amount of dirty memory from fib entries that can be backlogged before
 	synchronize_rcu is forced.
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a914f33f3ed5..3ab2563b1a23 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -466,6 +466,49 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
 void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
 
+/* Fields used for sysctl_fib_multipath_hash_fields.
+ * Common to IPv4 and IPv6.
+ *
+ * Add new fields at the end. This is user API.
+ */
+#define FIB_MULTIPATH_HASH_FIELD_SRC_IP			BIT(0)
+#define FIB_MULTIPATH_HASH_FIELD_DST_IP			BIT(1)
+#define FIB_MULTIPATH_HASH_FIELD_IP_PROTO		BIT(2)
+#define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL		BIT(3)
+#define FIB_MULTIPATH_HASH_FIELD_SRC_PORT		BIT(4)
+#define FIB_MULTIPATH_HASH_FIELD_DST_PORT		BIT(5)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP		BIT(6)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP		BIT(7)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO		BIT(8)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL	BIT(9)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT		BIT(10)
+#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT		BIT(11)
+
+#define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK		\
+	(FIB_MULTIPATH_HASH_FIELD_SRC_IP |		\
+	 FIB_MULTIPATH_HASH_FIELD_DST_IP |		\
+	 FIB_MULTIPATH_HASH_FIELD_IP_PROTO |		\
+	 FIB_MULTIPATH_HASH_FIELD_FLOWLABEL |		\
+	 FIB_MULTIPATH_HASH_FIELD_SRC_PORT |		\
+	 FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+
+#define FIB_MULTIPATH_HASH_FIELD_INNER_MASK		\
+	(FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP |	\
+	 FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP |	\
+	 FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO |	\
+	 FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL |	\
+	 FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT |	\
+	 FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+
+#define FIB_MULTIPATH_HASH_FIELD_ALL_MASK		\
+	(FIB_MULTIPATH_HASH_FIELD_OUTER_MASK |		\
+	 FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
+
+#define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK		\
+	(FIB_MULTIPATH_HASH_FIELD_SRC_IP |		\
+	 FIB_MULTIPATH_HASH_FIELD_DST_IP |		\
+	 FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index f6af8d96d3c6..746c80cd4257 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -210,6 +210,7 @@ struct netns_ipv4 {
 #endif
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
+	u32 sysctl_fib_multipath_hash_fields;
 	u8 sysctl_fib_multipath_use_neigh;
 	u8 sysctl_fib_multipath_hash_policy;
 #endif
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index bfb345c88271..af8814a11378 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1514,6 +1514,12 @@ static int __net_init ip_fib_net_init(struct net *net)
 	if (err)
 		return err;
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	/* Default to 3-tuple */
+	net->ipv4.sysctl_fib_multipath_hash_fields =
+		FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
+#endif
+
 	/* Avoid false sharing : Use at least a full cache line */
 	size = max_t(size_t, size, L1_CACHE_BYTES);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a62934b9f15a..45bab3733621 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
+#include <net/ip_fib.h>
 #include <net/route.h>
 #include <net/tcp.h>
 #include <net/udp.h>
@@ -48,6 +49,8 @@ static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
 static int one_day_secs = 24 * 3600;
+static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
+	FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -1052,6 +1055,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two,
 	},
+	{
+		.procname	= "fib_multipath_hash_fields",
+		.data		= &init_net.ipv4.sysctl_fib_multipath_hash_fields,
+		.maxlen		= sizeof(u32),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= &fib_multipath_hash_fields_all_mask,
+	},
 #endif
 	{
 		.procname	= "ip_unprivileged_port_start",

From 4253b4986f98da4bfcb6a24d3fc6ff19f28e8420 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:19 +0300
Subject: [PATCH 0262/3292] ipv4: Add custom multipath hash policy

Add a new multipath hash policy where the packet fields used for hash
calculation are determined by user space via the
fib_multipath_hash_fields sysctl that was introduced in the previous
patch.

The current set of available packet fields includes both outer and inner
fields, which requires two invocations of the flow dissector. Avoid
unnecessary dissection of the outer or inner flows by skipping
dissection if none of the outer or inner fields are required.

In accordance with the existing policies, when an skb is not available,
packet fields are extracted from the provided flow key. In which case,
only outer fields are considered.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst |   2 +
 net/ipv4/route.c                       | 121 +++++++++++++++++++++++++
 net/ipv4/sysctl_net_ipv4.c             |   3 +-
 3 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 47494798d03b..afdcdc0691d6 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -99,6 +99,8 @@ fib_multipath_hash_policy - INTEGER
 	- 0 - Layer 3
 	- 1 - Layer 4
 	- 2 - Layer 3 or inner Layer 3 if present
+	- 3 - Custom multipath hash. Fields used for multipath hash calculation
+	  are determined by fib_multipath_hash_fields sysctl
 
 fib_multipath_hash_fields - UNSIGNED INTEGER
 	When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9d61e969446e..a4c477475f4c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1906,6 +1906,121 @@ out:
 	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
+static u32 fib_multipath_custom_hash_outer(const struct net *net,
+					   const struct sk_buff *skb,
+					   bool *p_has_inner)
+{
+	u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+	struct flow_keys keys, hash_keys;
+
+	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
+	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+		hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+		hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+		hash_keys.basic.ip_proto = keys.basic.ip_proto;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+		hash_keys.ports.src = keys.ports.src;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+		hash_keys.ports.dst = keys.ports.dst;
+
+	*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
+	return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 fib_multipath_custom_hash_inner(const struct net *net,
+					   const struct sk_buff *skb,
+					   bool has_inner)
+{
+	u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+	struct flow_keys keys, hash_keys;
+
+	/* We assume the packet carries an encapsulation, but if none was
+	 * encountered during dissection of the outer flow, then there is no
+	 * point in calling the flow dissector again.
+	 */
+	if (!has_inner)
+		return 0;
+
+	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+	if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
+		return 0;
+
+	if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+	} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+			hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+			hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
+			hash_keys.tags.flow_label = keys.tags.flow_label;
+	}
+
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
+		hash_keys.basic.ip_proto = keys.basic.ip_proto;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
+		hash_keys.ports.src = keys.ports.src;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+		hash_keys.ports.dst = keys.ports.dst;
+
+	return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 fib_multipath_custom_hash_skb(const struct net *net,
+					 const struct sk_buff *skb)
+{
+	u32 mhash, mhash_inner;
+	bool has_inner = true;
+
+	mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner);
+	mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner);
+
+	return jhash_2words(mhash, mhash_inner, 0);
+}
+
+static u32 fib_multipath_custom_hash_fl4(const struct net *net,
+					 const struct flowi4 *fl4)
+{
+	u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+	struct flow_keys hash_keys;
+
+	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+		hash_keys.addrs.v4addrs.src = fl4->saddr;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+		hash_keys.addrs.v4addrs.dst = fl4->daddr;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+		hash_keys.basic.ip_proto = fl4->flowi4_proto;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+		hash_keys.ports.src = fl4->fl4_sport;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+		hash_keys.ports.dst = fl4->fl4_dport;
+
+	return flow_hash_from_keys(&hash_keys);
+}
+
 /* if skb is set it will be used and fl4 can be NULL */
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
@@ -1991,6 +2106,12 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		}
 		mhash = flow_hash_from_keys(&hash_keys);
 		break;
+	case 3:
+		if (skb)
+			mhash = fib_multipath_custom_hash_skb(net, skb);
+		else
+			mhash = fib_multipath_custom_hash_fl4(net, fl4);
+		break;
 	}
 
 	if (multipath_hash)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 45bab3733621..ffb38ea06841 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -30,6 +30,7 @@
 #include <net/netevent.h>
 
 static int two = 2;
+static int three __maybe_unused = 3;
 static int four = 4;
 static int thousand = 1000;
 static int tcp_retr1_max = 255;
@@ -1053,7 +1054,7 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_fib_multipath_hash_policy,
 		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
+		.extra2		= &three,
 	},
 	{
 		.procname	= "fib_multipath_hash_fields",

From 67db5ca73b1f98584ae9b6ed35c1c670677c9001 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:20 +0300
Subject: [PATCH 0263/3292] ipv6: Use a more suitable label name

The 'out_timer' label was added in commit 63152fc0de4d ("[NETNS][IPV6]
ip6_fib - gc timer per namespace") when the timer was allocated on the
heap.

Commit 417f28bb3407 ("netns: dont alloc ipv6 fib timer list") removed
the allocation, but kept the label name.

Rename it to a more suitable name.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 679699e953f1..33d2d6a4e28c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2362,7 +2362,7 @@ static int __net_init fib6_net_init(struct net *net)
 
 	net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
 	if (!net->ipv6.rt6_stats)
-		goto out_timer;
+		goto out_notifier;
 
 	/* Avoid false sharing : Use at least a full cache line */
 	size = max_t(size_t, size, L1_CACHE_BYTES);
@@ -2407,7 +2407,7 @@ out_fib_table_hash:
 	kfree(net->ipv6.fib_table_hash);
 out_rt6_stats:
 	kfree(net->ipv6.rt6_stats);
-out_timer:
+out_notifier:
 	fib6_notifier_exit(net);
 	return -ENOMEM;
 }

From b95b6e072a92042320fad99de658008cc0beb3b0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:21 +0300
Subject: [PATCH 0264/3292] ipv6: Calculate multipath hash inside switch
 statement

A subsequent patch will add another multipath hash policy where the
multipath hash is calculated directly by the policy specific code and
not outside of the switch statement.

Prepare for this change by moving the multipath hash calculation inside
the switch statement.

No functional changes intended.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a22822bdbf39..9935e18146e5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2331,7 +2331,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
-	u32 mhash;
+	u32 mhash = 0;
 
 	switch (ip6_multipath_hash_policy(net)) {
 	case 0:
@@ -2345,6 +2345,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
 		}
+		mhash = flow_hash_from_keys(&hash_keys);
 		break;
 	case 1:
 		if (skb) {
@@ -2376,6 +2377,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 			hash_keys.ports.dst = fl6->fl6_dport;
 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
 		}
+		mhash = flow_hash_from_keys(&hash_keys);
 		break;
 	case 2:
 		memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2412,9 +2414,9 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
 		}
+		mhash = flow_hash_from_keys(&hash_keys);
 		break;
 	}
-	mhash = flow_hash_from_keys(&hash_keys);
 
 	return mhash >> 1;
 }

From ed13923f980ef84dde0b9010b9e09052dc31a909 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:22 +0300
Subject: [PATCH 0265/3292] ipv6: Add a sysctl to control multipath hash fields

A subsequent patch will add a new multipath hash policy where the packet
fields used for multipath hash calculation are determined by user space.
This patch adds a sysctl that allows user space to set these fields.

The packet fields are represented using a bitmask and are common between
IPv4 and IPv6 to allow user space to use the same numbering across both
protocols. For example, to hash based on standard 5-tuple:

 # sysctl -w net.ipv6.fib_multipath_hash_fields=0x0037
 net.ipv6.fib_multipath_hash_fields = 0x0037

To avoid introducing holes in 'struct netns_sysctl_ipv6', move the
'bindv6only' field after the multipath hash fields.

The kernel rejects unknown fields, for example:

 # sysctl -w net.ipv6.fib_multipath_hash_fields=0x1000
 sysctl: setting key "net.ipv6.fib_multipath_hash_fields": Invalid argument

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst | 27 ++++++++++++++++++++++++++
 include/net/ipv6.h                     |  8 ++++++++
 include/net/netns/ipv6.h               |  3 ++-
 net/ipv6/ip6_fib.c                     |  5 +++++
 net/ipv6/sysctl_net_ipv6.c             | 12 ++++++++++++
 5 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index afdcdc0691d6..4246cc4ae35b 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1773,6 +1773,33 @@ fib_multipath_hash_policy - INTEGER
 	- 1 - Layer 4 (standard 5-tuple)
 	- 2 - Layer 3 or inner Layer 3 if present
 
+fib_multipath_hash_fields - UNSIGNED INTEGER
+	When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
+	fields used for multipath hash calculation are determined by this
+	sysctl.
+
+	This value is a bitmask which enables various fields for multipath hash
+	calculation.
+
+	Possible fields are:
+
+	====== ============================
+	0x0001 Source IP address
+	0x0002 Destination IP address
+	0x0004 IP protocol
+	0x0008 Flow Label
+	0x0010 Source port
+	0x0020 Destination port
+	0x0040 Inner source IP address
+	0x0080 Inner destination IP address
+	0x0100 Inner IP protocol
+	0x0200 Inner Flow Label
+	0x0400 Inner source port
+	0x0800 Inner destination port
+	====== ============================
+
+	Default: 0x0007 (source IP, destination IP and IP protocol)
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 448bf2b34759..f2d0ecc257bb 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -926,11 +926,19 @@ static inline int ip6_multipath_hash_policy(const struct net *net)
 {
 	return net->ipv6.sysctl.multipath_hash_policy;
 }
+static inline u32 ip6_multipath_hash_fields(const struct net *net)
+{
+	return net->ipv6.sysctl.multipath_hash_fields;
+}
 #else
 static inline int ip6_multipath_hash_policy(const struct net *net)
 {
 	return 0;
 }
+static inline u32 ip6_multipath_hash_fields(const struct net *net)
+{
+	return 0;
+}
 #endif
 
 /*
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 6153c8067009..bde0b7adb4a3 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,8 +28,9 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_gc_elasticity;
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
-	u8 bindv6only;
+	u32 multipath_hash_fields;
 	u8 multipath_hash_policy;
+	u8 bindv6only;
 	u8 flowlabel_consistency;
 	u8 auto_flowlabels;
 	int icmpv6_time;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 33d2d6a4e28c..2d650dc24349 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -32,6 +32,7 @@
 #include <net/lwtunnel.h>
 #include <net/fib_notifier.h>
 
+#include <net/ip_fib.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
 
@@ -2355,6 +2356,10 @@ static int __net_init fib6_net_init(struct net *net)
 	if (err)
 		return err;
 
+	/* Default to 3-tuple */
+	net->ipv6.sysctl.multipath_hash_fields =
+		FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
+
 	spin_lock_init(&net->ipv6.fib6_gc_lock);
 	rwlock_init(&net->ipv6.fib6_walker_lock);
 	INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 27102c3d6e1d..ce23c8f7ceb3 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,7 @@
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
 #include <net/netevent.h>
+#include <net/ip_fib.h>
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
@@ -24,6 +25,8 @@
 static int two = 2;
 static int flowlabel_reflect_max = 0x7;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+static u32 rt6_multipath_hash_fields_all_mask =
+	FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
 
 static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
 					  void *buffer, size_t *lenp, loff_t *ppos)
@@ -151,6 +154,15 @@ static struct ctl_table ipv6_table_template[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two,
 	},
+	{
+		.procname	= "fib_multipath_hash_fields",
+		.data		= &init_net.ipv6.sysctl.multipath_hash_fields,
+		.maxlen		= sizeof(u32),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= &rt6_multipath_hash_fields_all_mask,
+	},
 	{
 		.procname	= "seg6_flowlabel",
 		.data		= &init_net.ipv6.sysctl.seg6_flowlabel,

From 73c2c5cbb15a8a82d5bea52594b0beb038963bcc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:23 +0300
Subject: [PATCH 0266/3292] ipv6: Add custom multipath hash policy

Add a new multipath hash policy where the packet fields used for hash
calculation are determined by user space via the
fib_multipath_hash_fields sysctl that was introduced in the previous
patch.

The current set of available packet fields includes both outer and inner
fields, which requires two invocations of the flow dissector. Avoid
unnecessary dissection of the outer or inner flows by skipping
dissection if none of the outer or inner fields are required.

In accordance with the existing policies, when an skb is not available,
packet fields are extracted from the provided flow key. In which case,
only outer fields are considered.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst |   2 +
 net/ipv6/route.c                       | 125 +++++++++++++++++++++++++
 net/ipv6/sysctl_net_ipv6.c             |   3 +-
 3 files changed, 129 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 4246cc4ae35b..a5c250044500 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1772,6 +1772,8 @@ fib_multipath_hash_policy - INTEGER
 	- 0 - Layer 3 (source and destination addresses plus flow label)
 	- 1 - Layer 4 (standard 5-tuple)
 	- 2 - Layer 3 or inner Layer 3 if present
+	- 3 - Custom multipath hash. Fields used for multipath hash calculation
+	  are determined by fib_multipath_hash_fields sysctl
 
 fib_multipath_hash_fields - UNSIGNED INTEGER
 	When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9935e18146e5..c46889381ae4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2326,6 +2326,125 @@ out:
 	}
 }
 
+static u32 rt6_multipath_custom_hash_outer(const struct net *net,
+					   const struct sk_buff *skb,
+					   bool *p_has_inner)
+{
+	u32 hash_fields = ip6_multipath_hash_fields(net);
+	struct flow_keys keys, hash_keys;
+
+	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
+	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+		hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+		hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+		hash_keys.basic.ip_proto = keys.basic.ip_proto;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
+		hash_keys.tags.flow_label = keys.tags.flow_label;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+		hash_keys.ports.src = keys.ports.src;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+		hash_keys.ports.dst = keys.ports.dst;
+
+	*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
+	return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 rt6_multipath_custom_hash_inner(const struct net *net,
+					   const struct sk_buff *skb,
+					   bool has_inner)
+{
+	u32 hash_fields = ip6_multipath_hash_fields(net);
+	struct flow_keys keys, hash_keys;
+
+	/* We assume the packet carries an encapsulation, but if none was
+	 * encountered during dissection of the outer flow, then there is no
+	 * point in calling the flow dissector again.
+	 */
+	if (!has_inner)
+		return 0;
+
+	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+	if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
+		return 0;
+
+	if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+	} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+			hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+			hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
+			hash_keys.tags.flow_label = keys.tags.flow_label;
+	}
+
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
+		hash_keys.basic.ip_proto = keys.basic.ip_proto;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
+		hash_keys.ports.src = keys.ports.src;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+		hash_keys.ports.dst = keys.ports.dst;
+
+	return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 rt6_multipath_custom_hash_skb(const struct net *net,
+					 const struct sk_buff *skb)
+{
+	u32 mhash, mhash_inner;
+	bool has_inner = true;
+
+	mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
+	mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
+
+	return jhash_2words(mhash, mhash_inner, 0);
+}
+
+static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
+					 const struct flowi6 *fl6)
+{
+	u32 hash_fields = ip6_multipath_hash_fields(net);
+	struct flow_keys hash_keys;
+
+	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+		return 0;
+
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+		hash_keys.addrs.v6addrs.src = fl6->saddr;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+		hash_keys.addrs.v6addrs.dst = fl6->daddr;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+		hash_keys.basic.ip_proto = fl6->flowi6_proto;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
+		hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+		hash_keys.ports.src = fl6->fl6_sport;
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+		hash_keys.ports.dst = fl6->fl6_dport;
+
+	return flow_hash_from_keys(&hash_keys);
+}
+
 /* if skb is set it will be used and fl6 can be NULL */
 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
@@ -2416,6 +2535,12 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 		}
 		mhash = flow_hash_from_keys(&hash_keys);
 		break;
+	case 3:
+		if (skb)
+			mhash = rt6_multipath_custom_hash_skb(net, skb);
+		else
+			mhash = rt6_multipath_custom_hash_fl6(net, fl6);
+		break;
 	}
 
 	return mhash >> 1;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index ce23c8f7ceb3..160bea5db973 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -23,6 +23,7 @@
 #endif
 
 static int two = 2;
+static int three = 3;
 static int flowlabel_reflect_max = 0x7;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 static u32 rt6_multipath_hash_fields_all_mask =
@@ -152,7 +153,7 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler   = proc_rt6_multipath_hash_policy,
 		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
+		.extra2		= &three,
 	},
 	{
 		.procname	= "fib_multipath_hash_fields",

From 511e8db54036c775b84c349167cea2c4cfd24e24 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:24 +0300
Subject: [PATCH 0267/3292] selftests: forwarding: Add test for custom
 multipath hash

Test that when the hash policy is set to custom, traffic is distributed
only according to the outer fields set in the fib_multipath_hash_fields
sysctl.

Each time set a different field and make sure traffic is only
distributed when the field is changed in the packet stream.

The test only verifies the behavior with non-encapsulated IPv4 and IPv6
packets. Subsequent patches will add tests for IPv4/IPv6 overlays on top
of IPv4/IPv6 underlay networks.

Example output:

 # ./custom_multipath_hash.sh
 TEST: ping                                                          [ OK ]
 TEST: ping6                                                         [ OK ]
 INFO: Running IPv4 custom multipath hash tests
 TEST: Multipath hash field: Source IP (balanced)                    [ OK ]
 INFO: Packets sent on path1 / path2: 6353 / 6254
 TEST: Multipath hash field: Source IP (unbalanced)                  [ OK ]
 INFO: Packets sent on path1 / path2: 0 / 12600
 TEST: Multipath hash field: Destination IP (balanced)               [ OK ]
 INFO: Packets sent on path1 / path2: 6102 / 6502
 TEST: Multipath hash field: Destination IP (unbalanced)             [ OK ]
 INFO: Packets sent on path1 / path2: 1 / 12601
 TEST: Multipath hash field: Source port (balanced)                  [ OK ]
 INFO: Packets sent on path1 / path2: 16428 / 16345
 TEST: Multipath hash field: Source port (unbalanced)                [ OK ]
 INFO: Packets sent on path1 / path2: 32770 / 2
 TEST: Multipath hash field: Destination port (balanced)             [ OK ]
 INFO: Packets sent on path1 / path2: 16428 / 16345
 TEST: Multipath hash field: Destination port (unbalanced)           [ OK ]
 INFO: Packets sent on path1 / path2: 32770 / 2
 INFO: Running IPv6 custom multipath hash tests
 TEST: Multipath hash field: Source IP (balanced)                    [ OK ]
 INFO: Packets sent on path1 / path2: 6704 / 5903
 TEST: Multipath hash field: Source IP (unbalanced)                  [ OK ]
 INFO: Packets sent on path1 / path2: 12600 / 0
 TEST: Multipath hash field: Destination IP (balanced)               [ OK ]
 INFO: Packets sent on path1 / path2: 5551 / 7052
 TEST: Multipath hash field: Destination IP (unbalanced)             [ OK ]
 INFO: Packets sent on path1 / path2: 12603 / 0
 TEST: Multipath hash field: Flowlabel (balanced)                    [ OK ]
 INFO: Packets sent on path1 / path2: 8378 / 8080
 TEST: Multipath hash field: Flowlabel (unbalanced)                  [ OK ]
 INFO: Packets sent on path1 / path2: 2 / 12603
 TEST: Multipath hash field: Source port (balanced)                  [ OK ]
 INFO: Packets sent on path1 / path2: 16385 / 16388
 TEST: Multipath hash field: Source port (unbalanced)                [ OK ]
 INFO: Packets sent on path1 / path2: 0 / 32774
 TEST: Multipath hash field: Destination port (balanced)             [ OK ]
 INFO: Packets sent on path1 / path2: 16386 / 16390
 TEST: Multipath hash field: Destination port (unbalanced)           [ OK ]
 INFO: Packets sent on path1 / path2: 32771 / 2

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/custom_multipath_hash.sh   | 364 ++++++++++++++++++
 1 file changed, 364 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/custom_multipath_hash.sh

diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
new file mode 100755
index 000000000000..a15d21dc035a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -0,0 +1,364 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution between two paths when using custom hash policy.
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |   198.51.100.{2-253}/24 |      |
+# |   2001:db8:1::{2-fd}/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------------+
+# | SW1                     |                         |
+# |                    $rp1 +                         |
+# |         198.51.100.1/24                           |
+# |        2001:db8:1::1/64                           |
+# |                                                   |
+# |                                                   |
+# |            $rp11 +             + $rp12            |
+# |     192.0.2.1/28 |             | 192.0.2.17/28    |
+# | 2001:db8:2::1/64 |             | 2001:db8:3::1/64 |
+# +------------------|-------------|------------------+
+#                    |             |
+# +------------------|-------------|------------------+
+# | SW2              |             |                  |
+# |                  |             |                  |
+# |            $rp21 +             + $rp22            |
+# |     192.0.2.2/28                 192.0.2.18/28    |
+# | 2001:db8:2::2/64                 2001:db8:3::2/64 |
+# |                                                   |
+# |                                                   |
+# |                    $rp2 +                         |
+# |          203.0.113.1/24 |                         |
+# |        2001:db8:4::1/64 |                         |
+# +-------------------------|-------------------------+
+#                           |
+# +-------------------------|------+
+# | H2                      |      |
+# |                     $h2 +      |
+# |    203.0.113.{2-253}/24        |
+# |   2001:db8:4::{2-fd}/64        |
+# +--------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	custom_hash
+"
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+	ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+	ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 default
+	ip route del vrf v$h1 default
+	simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64
+	__simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64
+	__simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64
+
+	ip route add vrf v$rp1 203.0.113.0/24 \
+		nexthop via 192.0.2.2 dev $rp11 \
+		nexthop via 192.0.2.18 dev $rp12
+
+	ip -6 route add vrf v$rp1 2001:db8:4::/64 \
+		nexthop via 2001:db8:2::2 dev $rp11 \
+		nexthop via 2001:db8:3::2 dev $rp12
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$rp1 2001:db8:4::/64
+
+	ip route del vrf v$rp1 203.0.113.0/24
+
+	__simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64
+	__simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64
+	simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64
+	__simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64
+	__simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64
+
+	ip route add vrf v$rp2 198.51.100.0/24 \
+		nexthop via 192.0.2.1 dev $rp21 \
+		nexthop via 192.0.2.17 dev $rp22
+
+	ip -6 route add vrf v$rp2 2001:db8:1::/64 \
+		nexthop via 2001:db8:2::1 dev $rp21 \
+		nexthop via 2001:db8:3::1 dev $rp22
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$rp2 2001:db8:1::/64
+
+	ip route del vrf v$rp2 198.51.100.0/24
+
+	__simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64
+	__simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64
+	simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64
+	ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+	ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 default
+	ip route del vrf v$h2 default
+	simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	rp1=${NETIFS[p2]}
+
+	rp11=${NETIFS[p3]}
+	rp21=${NETIFS[p4]}
+
+	rp12=${NETIFS[p5]}
+	rp22=${NETIFS[p6]}
+
+	rp2=${NETIFS[p7]}
+
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:4::2
+}
+
+send_src_ipv4()
+{
+	$MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+		-d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+		-d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+	$MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec v$h1 \
+			$PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+}
+
+send_src_udp6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+		-d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+		-d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+	local field="$1"; shift
+	local balanced="$1"; shift
+	local send_flows="$@"
+
+	RET=0
+
+	local t0_rp11=$(link_stats_tx_packets_get $rp11)
+	local t0_rp12=$(link_stats_tx_packets_get $rp12)
+
+	$send_flows
+
+	local t1_rp11=$(link_stats_tx_packets_get $rp11)
+	local t1_rp12=$(link_stats_tx_packets_get $rp12)
+
+	local d_rp11=$((t1_rp11 - t0_rp11))
+	local d_rp12=$((t1_rp12 - t0_rp12))
+
+	local diff=$((d_rp12 - d_rp11))
+	local sum=$((d_rp11 + d_rp12))
+
+	local pct=$(echo "$diff / $sum * 100" | bc -l)
+	local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+	[[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+	   ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+	check_err $? "Expected traffic to be $balanced, but it is not"
+
+	log_test "Multipath hash field: $field ($balanced)"
+	log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12"
+}
+
+custom_hash_v4()
+{
+	log_info "Running IPv4 custom multipath hash tests"
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001
+	custom_hash_test "Source IP" "balanced" send_src_ipv4
+	custom_hash_test "Source IP" "unbalanced" send_dst_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002
+	custom_hash_test "Destination IP" "balanced" send_dst_ipv4
+	custom_hash_test "Destination IP" "unbalanced" send_src_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010
+	custom_hash_test "Source port" "balanced" send_src_udp4
+	custom_hash_test "Source port" "unbalanced" send_dst_udp4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020
+	custom_hash_test "Destination port" "balanced" send_dst_udp4
+	custom_hash_test "Destination port" "unbalanced" send_src_udp4
+
+	sysctl_restore net.ipv4.neigh.default.gc_thresh3
+	sysctl_restore net.ipv4.neigh.default.gc_thresh2
+	sysctl_restore net.ipv4.neigh.default.gc_thresh1
+
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+custom_hash_v6()
+{
+	log_info "Running IPv6 custom multipath hash tests"
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001
+	custom_hash_test "Source IP" "balanced" send_src_ipv6
+	custom_hash_test "Source IP" "unbalanced" send_dst_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002
+	custom_hash_test "Destination IP" "balanced" send_dst_ipv6
+	custom_hash_test "Destination IP" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008
+	custom_hash_test "Flowlabel" "balanced" send_flowlabel
+	custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010
+	custom_hash_test "Source port" "balanced" send_src_udp6
+	custom_hash_test "Source port" "unbalanced" send_dst_udp6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020
+	custom_hash_test "Destination port" "balanced" send_dst_udp6
+	custom_hash_test "Destination port" "unbalanced" send_src_udp6
+
+	sysctl_restore net.ipv6.neigh.default.gc_thresh3
+	sysctl_restore net.ipv6.neigh.default.gc_thresh2
+	sysctl_restore net.ipv6.neigh.default.gc_thresh1
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+custom_hash()
+{
+	# Test that when the hash policy is set to custom, traffic is
+	# distributed only according to the fields set in the
+	# fib_multipath_hash_fields sysctl.
+	#
+	# Each time set a different field and make sure traffic is only
+	# distributed when the field is changed in the packet stream.
+	custom_hash_v4
+	custom_hash_v6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS

From 185b0c190bb6d30292783f20b4d85e8dbe8a2687 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:25 +0300
Subject: [PATCH 0268/3292] selftests: forwarding: Add test for custom
 multipath hash with IPv4 GRE

Test that when the hash policy is set to custom, traffic is distributed
only according to the inner fields set in the fib_multipath_hash_fields
sysctl.

Each time set a different field and make sure traffic is only
distributed when the field is changed in the packet stream.

The test only verifies the behavior of IPv4/IPv6 overlays on top of an
IPv4 underlay network. A subsequent patch will do the same with an IPv6
underlay network.

Example output:

 # ./gre_custom_multipath_hash.sh
 TEST: ping                                                          [ OK ]
 TEST: ping6                                                         [ OK ]
 INFO: Running IPv4 overlay custom multipath hash tests
 TEST: Multipath hash field: Inner source IP (balanced)              [ OK ]
 INFO: Packets sent on path1 / path2: 6601 / 6001
 TEST: Multipath hash field: Inner source IP (unbalanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 0 / 12600
 TEST: Multipath hash field: Inner destination IP (balanced)         [ OK ]
 INFO: Packets sent on path1 / path2: 6802 / 5802
 TEST: Multipath hash field: Inner destination IP (unbalanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 12601 / 1
 TEST: Multipath hash field: Inner source port (balanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 16430 / 16344
 TEST: Multipath hash field: Inner source port (unbalanced)          [ OK ]
 INFO: Packets sent on path1 / path2: 0 / 32772
 TEST: Multipath hash field: Inner destination port (balanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 16430 / 16343
 TEST: Multipath hash field: Inner destination port (unbalanced)     [ OK ]
 INFO: Packets sent on path1 / path2: 0 / 32772
 INFO: Running IPv6 overlay custom multipath hash tests
 TEST: Multipath hash field: Inner source IP (balanced)              [ OK ]
 INFO: Packets sent on path1 / path2: 6702 / 5900
 TEST: Multipath hash field: Inner source IP (unbalanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 0 / 12601
 TEST: Multipath hash field: Inner destination IP (balanced)         [ OK ]
 INFO: Packets sent on path1 / path2: 5751 / 6851
 TEST: Multipath hash field: Inner destination IP (unbalanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 12602 / 1
 TEST: Multipath hash field: Inner flowlabel (balanced)              [ OK ]
 INFO: Packets sent on path1 / path2: 8364 / 8065
 TEST: Multipath hash field: Inner flowlabel (unbalanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 12601 / 0
 TEST: Multipath hash field: Inner source port (balanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 16425 / 16349
 TEST: Multipath hash field: Inner source port (unbalanced)          [ OK ]
 INFO: Packets sent on path1 / path2: 1 / 32770
 TEST: Multipath hash field: Inner destination port (balanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 16425 / 16349
 TEST: Multipath hash field: Inner destination port (unbalanced)     [ OK ]
 INFO: Packets sent on path1 / path2: 2 / 32770

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../forwarding/gre_custom_multipath_hash.sh   | 456 ++++++++++++++++++
 1 file changed, 456 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh

diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..a73f52efcb6c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -0,0 +1,456 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv4 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |   198.51.100.{2-253}/24 |      |
+# |   2001:db8:1::{2-fd}/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|------------------+
+# | SW1                     |                  |
+# |                    $ol1 +                  |
+# |         198.51.100.1/24                    |
+# |        2001:db8:1::1/64                    |
+# |                                            |
+# |   + g1 (gre)                               |
+# |     loc=192.0.2.1                          |
+# |     rem=192.0.2.2 --.                      |
+# |     tos=inherit     |                      |
+# |                     v                      |
+# |                     + $ul1                 |
+# |                     | 192.0.2.17/28        |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# | SW2                 |                      |
+# |               $ul21 +                      |
+# |       192.0.2.18/28 |                      |
+# |                     |                      |
+# !   __________________+___                   |
+# |  /                      \                  |
+# |  |                      |                  |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan) |
+# |  | 192.0.2.33/28        | 192.0.2.49/28    |
+# |  |                      |                  |
+# +--|----------------------|------------------+
+#    |                      |
+# +--|----------------------|------------------+
+# |  |                      |                  |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan) |
+# |  | 192.0.2.34/28        | 192.0.2.50/28    |
+# |  |                      |                  |
+# |  \__________________+___/                  |
+# |                     |                      |
+# |                     |                      |
+# |               $ul31 +                      |
+# |       192.0.2.65/28 |                  SW3 |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# |                     + $ul4                 |
+# |                     ^ 192.0.2.66/28        |
+# |                     |                      |
+# |   + g2 (gre)        |                      |
+# |     loc=192.0.2.2   |                      |
+# |     rem=192.0.2.1 --'                      |
+# |     tos=inherit                            |
+# |                                            |
+# |                    $ol4 +                  |
+# |          203.0.113.1/24 |                  |
+# |        2001:db8:2::1/64 |              SW4 |
+# +-------------------------|------------------+
+#                           |
+# +-------------------------|------+
+# |                         |      |
+# |                     $h2 +      |
+# |    203.0.113.{2-253}/24        |
+# |   2001:db8:2::{2-fd}/64     H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+	ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+	ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 default
+	ip route del vrf v$h1 default
+	simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 192.0.2.17/28
+
+	tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 192.0.2.1/32
+	ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18
+
+	ip route add vrf v$ol1 203.0.113.0/24 dev g1
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 203.0.113.0/24
+
+	ip route del vrf v$ol1 192.0.2.2/32
+	__simple_if_fini g1 192.0.2.1/32
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 192.0.2.17/28
+	simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 192.0.2.18/28
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 192.0.2.33/28
+	vlan_create $ul22 222 v$ul21 192.0.2.49/28
+
+	ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17
+	ip route add vrf v$ul21 192.0.2.2/32 \
+	   nexthop via 192.0.2.34 \
+	   nexthop via 192.0.2.50
+}
+
+sw2_destroy()
+{
+	ip route del vrf v$ul21 192.0.2.2/32
+	ip route del vrf v$ul21 192.0.2.1/32
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 192.0.2.18/28
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 192.0.2.65/28
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 192.0.2.34/28
+	vlan_create $ul32 222 v$ul31 192.0.2.50/28
+
+	ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66
+	ip route add vrf v$ul31 192.0.2.1/32 \
+	   nexthop via 192.0.2.33 \
+	   nexthop via 192.0.2.49
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip route del vrf v$ul31 192.0.2.1/32
+	ip route del vrf v$ul31 192.0.2.2/32
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 192.0.2.65/28
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 192.0.2.66/28
+
+	tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 192.0.2.2/32
+	ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65
+
+	ip route add vrf v$ol4 198.51.100.0/24 dev g2
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+	ip route del vrf v$ol4 198.51.100.0/24
+
+	ip route del vrf v$ol4 192.0.2.1/32
+	__simple_if_fini g2 192.0.2.2/32
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 192.0.2.66/28
+	simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+	ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+	ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 default
+	ip route del vrf v$h2 default
+	simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+	$MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+		-d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+		-d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+	$MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec v$h1 \
+			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+}
+
+send_src_udp6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+	local field="$1"; shift
+	local balanced="$1"; shift
+	local send_flows="$@"
+
+	RET=0
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	$send_flows
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+
+	local diff=$((d222 - d111))
+	local sum=$((d111 + d222))
+
+	local pct=$(echo "$diff / $sum * 100" | bc -l)
+	local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+	[[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+	   ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+	check_err $? "Expected traffic to be $balanced, but it is not"
+
+	log_test "Multipath hash field: $field ($balanced)"
+	log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+	log_info "Running IPv4 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp4
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+	sysctl_restore net.ipv4.neigh.default.gc_thresh3
+	sysctl_restore net.ipv4.neigh.default.gc_thresh2
+	sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+	log_info "Running IPv6 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200
+	custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+	custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp6
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+	sysctl_restore net.ipv6.neigh.default.gc_thresh3
+	sysctl_restore net.ipv6.neigh.default.gc_thresh2
+	sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+	# Test that when the hash policy is set to custom, traffic is
+	# distributed only according to the fields set in the
+	# fib_multipath_hash_fields sysctl.
+	#
+	# Each time set a different field and make sure traffic is only
+	# distributed when the field is changed in the packet stream.
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+	custom_hash_v4
+	custom_hash_v6
+
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS

From b7715acba4d3d6e41ce8accd808b6c7c4febec6c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@OSS.NVIDIA.COM>
Date: Mon, 17 May 2021 21:15:26 +0300
Subject: [PATCH 0269/3292] selftests: forwarding: Add test for custom
 multipath hash with IPv6 GRE

Test that when the hash policy is set to custom, traffic is distributed
only according to the inner fields set in the fib_multipath_hash_fields
sysctl.

Each time set a different field and make sure traffic is only
distributed when the field is changed in the packet stream.

The test only verifies the behavior of IPv4/IPv6 overlays on top of an
IPv6 underlay network. The previous patch verified the same with an IPv4
underlay network.

Example output:

 # ./ip6gre_custom_multipath_hash.sh
 TEST: ping                                                          [ OK ]
 TEST: ping6                                                         [ OK ]
 INFO: Running IPv4 overlay custom multipath hash tests
 TEST: Multipath hash field: Inner source IP (balanced)              [ OK ]
 INFO: Packets sent on path1 / path2: 6602 / 6002
 TEST: Multipath hash field: Inner source IP (unbalanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 1 / 12601
 TEST: Multipath hash field: Inner destination IP (balanced)         [ OK ]
 INFO: Packets sent on path1 / path2: 6802 / 5801
 TEST: Multipath hash field: Inner destination IP (unbalanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 12602 / 3
 TEST: Multipath hash field: Inner source port (balanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 16431 / 16344
 TEST: Multipath hash field: Inner source port (unbalanced)          [ OK ]
 INFO: Packets sent on path1 / path2: 0 / 32773
 TEST: Multipath hash field: Inner destination port (balanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 16431 / 16344
 TEST: Multipath hash field: Inner destination port (unbalanced)     [ OK ]
 INFO: Packets sent on path1 / path2: 2 / 32772
 INFO: Running IPv6 overlay custom multipath hash tests
 TEST: Multipath hash field: Inner source IP (balanced)              [ OK ]
 INFO: Packets sent on path1 / path2: 6704 / 5902
 TEST: Multipath hash field: Inner source IP (unbalanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 1 / 12600
 TEST: Multipath hash field: Inner destination IP (balanced)         [ OK ]
 INFO: Packets sent on path1 / path2: 5751 / 6852
 TEST: Multipath hash field: Inner destination IP (unbalanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 12602 / 0
 TEST: Multipath hash field: Inner flowlabel (balanced)              [ OK ]
 INFO: Packets sent on path1 / path2: 8272 / 8181
 TEST: Multipath hash field: Inner flowlabel (unbalanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 3 / 12602
 TEST: Multipath hash field: Inner source port (balanced)            [ OK ]
 INFO: Packets sent on path1 / path2: 16424 / 16351
 TEST: Multipath hash field: Inner source port (unbalanced)          [ OK ]
 INFO: Packets sent on path1 / path2: 3 / 32774
 TEST: Multipath hash field: Inner destination port (balanced)       [ OK ]
 INFO: Packets sent on path1 / path2: 16425 / 16350
 TEST: Multipath hash field: Inner destination port (unbalanced)     [ OK ]
 INFO: Packets sent on path1 / path2: 2 / 32773

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ip6gre_custom_multipath_hash.sh           | 458 ++++++++++++++++++
 1 file changed, 458 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh

diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..8fea2c2e0b25
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -0,0 +1,458 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv6 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |   198.51.100.{2-253}/24 |      |
+# |   2001:db8:1::{2-fd}/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------+
+# | SW1                     |                   |
+# |                    $ol1 +                   |
+# |         198.51.100.1/24                     |
+# |        2001:db8:1::1/64                     |
+# |                                             |
+# |+ g1 (ip6gre)                                |
+# |  loc=2001:db8:3::1                          |
+# |  rem=2001:db8:3::2 -.                       |
+# |     tos=inherit     |                       |
+# |                     v                       |
+# |                     + $ul1                  |
+# |                     | 2001:db8:10::1/64     |
+# +---------------------|-----------------------+
+#                       |
+# +---------------------|-----------------------+
+# | SW2                 |                       |
+# |               $ul21 +                       |
+# |   2001:db8:10::2/64 |                       |
+# |                     |                       |
+# !   __________________+___                    |
+# |  /                      \                   |
+# |  |                      |                   |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan)  |
+# |  | 2001:db8:11::1/64    | 2001:db8:12::1/64 |
+# |  |                      |                   |
+# +--|----------------------|-------------------+
+#    |                      |
+# +--|----------------------|-------------------+
+# |  |                      |                   |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan)  |
+# |  | 2001:db8:11::2/64    | 2001:db8:12::2/64 |
+# |  |                      |                   |
+# |  \__________________+___/                   |
+# |                     |                       |
+# |                     |                       |
+# |               $ul31 +                       |
+# |   2001:db8:13::1/64 |                   SW3 |
+# +---------------------|-----------------------+
+#                       |
+# +---------------------|-----------------------+
+# |                     + $ul4                  |
+# |                     ^ 2001:db8:13::2/64     |
+# |                     |                       |
+# |+ g2 (ip6gre)        |                       |
+# |  loc=2001:db8:3::2  |                       |
+# |  rem=2001:db8:3::1 -'                       |
+# |  tos=inherit                                |
+# |                                             |
+# |                    $ol4 +                   |
+# |          203.0.113.1/24 |                   |
+# |        2001:db8:2::1/64 |               SW4 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|------+
+# |                         |      |
+# |                     $h2 +      |
+# |    203.0.113.{2-253}/24        |
+# |   2001:db8:2::{2-fd}/64     H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+	ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+	ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 default
+	ip route del vrf v$h1 default
+	simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 2001:db8:10::1/64
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		dev v$ol1
+	__simple_if_init g1 v$ol1 2001:db8:3::1/128
+	ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2
+
+	ip route add vrf v$ol1 203.0.113.0/24 dev g1
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 203.0.113.0/24
+
+	ip route del vrf v$ol1 2001:db8:3::2/128
+	__simple_if_fini g1 2001:db8:3::1/128
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 2001:db8:10::1/64
+	simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 2001:db8:10::2/64
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 2001:db8:11::1/64
+	vlan_create $ul22 222 v$ul21 2001:db8:12::1/64
+
+	ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1
+	ip -6 route add vrf v$ul21 2001:db8:3::2/128 \
+	   nexthop via 2001:db8:11::2 \
+	   nexthop via 2001:db8:12::2
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$ul21 2001:db8:3::2/128
+	ip -6 route del vrf v$ul21 2001:db8:3::1/128
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 2001:db8:10::2/64
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 2001:db8:13::1/64
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 2001:db8:11::2/64
+	vlan_create $ul32 222 v$ul31 2001:db8:12::2/64
+
+	ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2
+	ip -6 route add vrf v$ul31 2001:db8:3::1/128 \
+	   nexthop via 2001:db8:11::1 \
+	   nexthop via 2001:db8:12::1
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip -6 route del vrf v$ul31 2001:db8:3::1/128
+	ip -6 route del vrf v$ul31 2001:db8:3::2/128
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 2001:db8:13::1/64
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 2001:db8:13::2/64
+
+	tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+		dev v$ol4
+	__simple_if_init g2 v$ol4 2001:db8:3::2/128
+	ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1
+
+	ip route add vrf v$ol4 198.51.100.0/24 dev g2
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+	ip route del vrf v$ol4 198.51.100.0/24
+
+	ip -6 route del vrf v$ol4 2001:db8:3::1/128
+	__simple_if_fini g2 2001:db8:3::2/128
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 2001:db8:13::2/64
+	simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+	ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+	ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 default
+	ip route del vrf v$h2 default
+	simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+	$MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+		-d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+	$MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+		-d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+	$MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+		-d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec v$h1 \
+			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+}
+
+send_src_udp6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+	$MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+	local field="$1"; shift
+	local balanced="$1"; shift
+	local send_flows="$@"
+
+	RET=0
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	$send_flows
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+
+	local diff=$((d222 - d111))
+	local sum=$((d111 + d222))
+
+	local pct=$(echo "$diff / $sum * 100" | bc -l)
+	local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+	[[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+	   ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+	check_err $? "Expected traffic to be $balanced, but it is not"
+
+	log_test "Multipath hash field: $field ($balanced)"
+	log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+	log_info "Running IPv4 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp4
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+	sysctl_restore net.ipv4.neigh.default.gc_thresh3
+	sysctl_restore net.ipv4.neigh.default.gc_thresh2
+	sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+	log_info "Running IPv6 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200
+	custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+	custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp6
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+	sysctl_restore net.ipv6.neigh.default.gc_thresh3
+	sysctl_restore net.ipv6.neigh.default.gc_thresh2
+	sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+	# Test that when the hash policy is set to custom, traffic is
+	# distributed only according to the fields set in the
+	# fib_multipath_hash_fields sysctl.
+	#
+	# Each time set a different field and make sure traffic is only
+	# distributed when the field is changed in the packet stream.
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+	custom_hash_v4
+	custom_hash_v6
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS

From 4ac9e23cf2ccdad99dbc57d7d1bf264d53d23057 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Tue, 18 May 2021 17:11:41 +0800
Subject: [PATCH 0270/3292] cipso: correct comments of
 cipso_v4_cache_invalidate()

Since cipso_v4_cache_invalidate() has no return value, so drop
related descriptions in its comments.

Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index bfaf327e9d12..d6e3a92841e3 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -187,8 +187,7 @@ static int __init cipso_v4_cache_init(void)
  * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
  *
  * Description:
- * Invalidates and frees any entries in the CIPSO cache.  Returns zero on
- * success and negative values on failure.
+ * Invalidates and frees any entries in the CIPSO cache.
  *
  */
 void cipso_v4_cache_invalidate(void)

From 0d56e5c191b197e1d30a0a4c92628836dafced0f Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 18 May 2021 11:24:13 +0000
Subject: [PATCH 0271/3292] net: dsa: qca8k: fix missing unlock on error in
 qca8k_vlan_(add|del)

Add the missing unlock before return from function qca8k_vlan_add()
and qca8k_vlan_del() in the error handling case.

Fixes: 028f5f8ef44f ("net: dsa: qca8k: handle error with qca8k_read operation")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 4753228f02b3..1f1b7c4dda13 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -506,8 +506,10 @@ qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, bool untagged)
 		goto out;
 
 	reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
-	if (reg < 0)
-		return reg;
+	if (reg < 0) {
+		ret = reg;
+		goto out;
+	}
 	reg |= QCA8K_VTU_FUNC0_VALID | QCA8K_VTU_FUNC0_IVL_EN;
 	reg &= ~(QCA8K_VTU_FUNC0_EG_MODE_MASK << QCA8K_VTU_FUNC0_EG_MODE_S(port));
 	if (untagged)
@@ -519,7 +521,7 @@ qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, bool untagged)
 
 	ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
 	if (ret)
-		return ret;
+		goto out;
 	ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid);
 
 out:
@@ -541,8 +543,10 @@ qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid)
 		goto out;
 
 	reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
-	if (reg < 0)
-		return reg;
+	if (reg < 0) {
+		ret = reg;
+		goto out;
+	}
 	reg &= ~(3 << QCA8K_VTU_FUNC0_EG_MODE_S(port));
 	reg |= QCA8K_VTU_FUNC0_EG_MODE_NOT <<
 			QCA8K_VTU_FUNC0_EG_MODE_S(port);
@@ -564,7 +568,7 @@ qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid)
 	} else {
 		ret = qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
 		if (ret)
-			return ret;
+			goto out;
 		ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid);
 	}
 

From 78524c01edb24022098b4f3b49d6f74e1b3f4aa6 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 18 May 2021 20:29:50 +0800
Subject: [PATCH 0272/3292] net: wan: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/c101.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index c354a5143e99..215f2fbfc3ae 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -28,7 +28,6 @@
 
 #include "hd64570.h"
 
-
 static const char* version = "Moxa C101 driver version: 1.15";
 static const char* devname = "C101";
 
@@ -51,7 +50,6 @@ static const char* devname = "C101";
 
 static char *hw;		/* pointer to hw=xxx command line string */
 
-
 typedef struct card_s {
 	struct net_device *dev;
 	spinlock_t lock;	/* TX lock */
@@ -79,7 +77,6 @@ typedef card_t port_t;
 static card_t *first_card;
 static card_t **new_card = &first_card;
 
-
 #define sca_in(reg, card)	   readb((card)->win0base + C101_SCA + (reg))
 #define sca_out(value, reg, card)  writeb(value, (card)->win0base + C101_SCA + (reg))
 #define sca_inw(reg, card)	   readw((card)->win0base + C101_SCA + (reg))
@@ -99,7 +96,6 @@ static card_t **new_card = &first_card;
 #define get_port(card, port)	   (card)
 static void sca_msci_intr(port_t *port);
 
-
 static inline u8 sca_get_page(card_t *card)
 {
 	return card->page;
@@ -111,10 +107,8 @@ static inline void openwin(card_t *card, u8 page)
 	writeb(page, card->win0base + C101_PAGE);
 }
 
-
 #include "hd64570.c"
 
-
 static inline void set_carrier(port_t *port)
 {
 	if (!(sca_in(MSCI1_OFFSET + ST3, port) & ST3_DCD))
@@ -123,7 +117,6 @@ static inline void set_carrier(port_t *port)
 		netif_carrier_off(port_to_dev(port));
 }
 
-
 static void sca_msci_intr(port_t *port)
 {
 	u8 stat = sca_in(MSCI0_OFFSET + ST1, port); /* read MSCI ST1 status */
@@ -145,7 +138,6 @@ static void sca_msci_intr(port_t *port)
 		set_carrier(port);
 }
 
-
 static void c101_set_iface(port_t *port)
 {
 	u8 rxs = port->rxs & CLK_BRG_MASK;
@@ -179,7 +171,6 @@ static void c101_set_iface(port_t *port)
 	sca_set_port(port);
 }
 
-
 static int c101_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -206,7 +197,6 @@ static int c101_open(struct net_device *dev)
 	return 0;
 }
 
-
 static int c101_close(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -218,7 +208,6 @@ static int c101_close(struct net_device *dev)
 	return 0;
 }
 
-
 static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	const size_t size = sizeof(sync_serial_settings);
@@ -276,8 +265,6 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	}
 }
 
-
-
 static void c101_destroy_card(card_t *card)
 {
 	readb(card->win0base + C101_PAGE); /* Resets SCA? */
@@ -392,8 +379,6 @@ static int __init c101_run(unsigned long irq, unsigned long winbase)
 	return 0;
 }
 
-
-
 static int __init c101_init(void)
 {
 	if (hw == NULL) {
@@ -425,7 +410,6 @@ static int __init c101_init(void)
 	return first_card ? 0 : -EINVAL;
 }
 
-
 static void __exit c101_cleanup(void)
 {
 	card_t *card = first_card;
@@ -438,7 +422,6 @@ static void __exit c101_cleanup(void)
 	}
 }
 
-
 module_init(c101_init);
 module_exit(c101_cleanup);
 

From 23c235412411dd6f4d6069a882cb8e09e0d47f7d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 18 May 2021 20:29:51 +0800
Subject: [PATCH 0273/3292] net: wan: add some required spaces

Add space required before the open parenthesis '(',
and add spaces required around that '<', '>' and '!='.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/c101.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index 215f2fbfc3ae..5a04f18db32a 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -70,7 +70,7 @@ typedef struct card_s {
 	u8 page;
 
 	struct card_s *next_card;
-}card_t;
+} card_t;
 
 typedef card_t port_t;
 
@@ -85,7 +85,7 @@ static card_t **new_card = &first_card;
 #define sca_outw(value, reg, card) do { \
 	writeb(value & 0xFF, (card)->win0base + C101_SCA + (reg)); \
 	writeb((value >> 8 ) & 0xFF, (card)->win0base + C101_SCA + (reg + 1));\
-} while(0)
+} while (0)
 
 #define port_to_card(port)	   (port)
 #define log_node(port)		   (0)
@@ -143,7 +143,7 @@ static void c101_set_iface(port_t *port)
 	u8 rxs = port->rxs & CLK_BRG_MASK;
 	u8 txs = port->txs & CLK_BRG_MASK;
 
-	switch(port->settings.clock_type) {
+	switch (port->settings.clock_type) {
 	case CLOCK_INT:
 		rxs |= CLK_BRG_RX; /* TX clock */
 		txs |= CLK_RXCLK_TX; /* BRG output */
@@ -229,7 +229,7 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (cmd != SIOCWANDEV)
 		return hdlc_ioctl(dev, ifr, cmd);
 
-	switch(ifr->ifr_settings.type) {
+	switch (ifr->ifr_settings.type) {
 	case IF_GET_IFACE:
 		ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
 		if (ifr->ifr_settings.size < size) {
@@ -241,7 +241,7 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return 0;
 
 	case IF_IFACE_SYNC_SERIAL:
-		if(!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
 		if (copy_from_user(&new_line, line, size))
@@ -296,12 +296,12 @@ static int __init c101_run(unsigned long irq, unsigned long winbase)
 	card_t *card;
 	int result;
 
-	if (irq<3 || irq>15 || irq == 6) /* FIXME */ {
+	if (irq < 3 || irq > 15 || irq == 6) /* FIXME */ {
 		pr_err("invalid IRQ value\n");
 		return -ENODEV;
 	}
 
-	if (winbase < 0xC0000 || winbase > 0xDFFFF || (winbase & 0x3FFF) !=0) {
+	if (winbase < 0xC0000 || winbase > 0xDFFFF || (winbase & 0x3FFF) != 0) {
 		pr_err("invalid RAM value\n");
 		return -ENODEV;
 	}
@@ -404,7 +404,7 @@ static int __init c101_init(void)
 
 		if (*hw == '\x0')
 			return first_card ? 0 : -EINVAL;
-	}while(*hw++ == ':');
+	} while (*hw++ == ':');
 
 	pr_err("invalid hardware parameters\n");
 	return first_card ? 0 : -EINVAL;

From a4e5914776113192f88b230e9e3503c4bf296567 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 18 May 2021 20:29:52 +0800
Subject: [PATCH 0274/3292] net: wan: remove redundant braces {}

Braces {} are not necessary for single statement blocks,
this patch removes redundant braces {}.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/fsl_ucc_hdlc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 7eac6a3e1cde..39f05fabbfa4 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -1171,9 +1171,8 @@ static int ucc_hdlc_probe(struct platform_device *pdev)
 	ut_info->uf_info.irq = irq_of_parse_and_map(np, 0);
 
 	uhdlc_priv = kzalloc(sizeof(*uhdlc_priv), GFP_KERNEL);
-	if (!uhdlc_priv) {
+	if (!uhdlc_priv)
 		return -ENOMEM;
-	}
 
 	dev_set_drvdata(&pdev->dev, uhdlc_priv);
 	uhdlc_priv->dev = &pdev->dev;

From da8e6fddbae36e2cedd7802949d9c92e9fbf13a0 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 18 May 2021 20:29:53 +0800
Subject: [PATCH 0275/3292] net: wan: remove redundant space

Space prohibited before that close parenthesis ')',
so removes the redundant space.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/c101.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index 5a04f18db32a..e2ca559e6553 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -84,7 +84,7 @@ static card_t **new_card = &first_card;
 /* EDA address register must be set in EDAL, EDAH order - 8 bit ISA bus */
 #define sca_outw(value, reg, card) do { \
 	writeb(value & 0xFF, (card)->win0base + C101_SCA + (reg)); \
-	writeb((value >> 8 ) & 0xFF, (card)->win0base + C101_SCA + (reg + 1));\
+	writeb((value >> 8) & 0xFF, (card)->win0base + C101_SCA + (reg + 1));\
 } while (0)
 
 #define port_to_card(port)	   (port)

From a3f3e831dc2a5bf1b34c31be38b74bc7e38a2de7 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 18 May 2021 20:29:54 +0800
Subject: [PATCH 0276/3292] net: wan: fix variable definition style

Fix the checkpatch error: "foo* bar" should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/c101.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index e2ca559e6553..7e431e5b6e85 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -28,8 +28,8 @@
 
 #include "hd64570.h"
 
-static const char* version = "Moxa C101 driver version: 1.15";
-static const char* devname = "C101";
+static const char *version = "Moxa C101 driver version: 1.15";
+static const char *devname = "C101";
 
 #undef DEBUG_PKT
 #define DEBUG_RINGS

From 44e261c715b6ca0ebe79d7b43afdb2382e1ca92b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 18 May 2021 21:01:35 +0800
Subject: [PATCH 0277/3292] cxgb4: clip_tbl: use list_del_init instead of
 list_del/INIT_LIST_HEAD

Using list_del_init() instead of list_del() + INIT_LIST_HEAD()
to simpify the code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index 12fcf84d67ad..163efab27e9b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -106,8 +106,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
 	if (!list_empty(&ctbl->ce_free_head)) {
 		ce = list_first_entry(&ctbl->ce_free_head,
 				      struct clip_entry, list);
-		list_del(&ce->list);
-		INIT_LIST_HEAD(&ce->list);
+		list_del_init(&ce->list);
 		spin_lock_init(&ce->lock);
 		refcount_set(&ce->refcnt, 0);
 		atomic_dec(&ctbl->nfree);
@@ -179,8 +178,7 @@ found:
 	write_lock_bh(&ctbl->lock);
 	spin_lock_bh(&ce->lock);
 	if (refcount_dec_and_test(&ce->refcnt)) {
-		list_del(&ce->list);
-		INIT_LIST_HEAD(&ce->list);
+		list_del_init(&ce->list);
 		list_add_tail(&ce->list, &ctbl->ce_free_head);
 		atomic_inc(&ctbl->nfree);
 		if (v6)

From e2bd6bad9c1e976674de7d714a5c1567281a0843 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 18 May 2021 21:03:58 +0800
Subject: [PATCH 0278/3292] net: dcb: Remove unnecessary INIT_LIST_HEAD()

The list_head dcb_app_list is initialized statically.
It is unnecessary to initialize by INIT_LIST_HEAD().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 653e3bc9c87b..51f80a2f8194 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -2075,8 +2075,6 @@ EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
 
 static int __init dcbnl_init(void)
 {
-	INIT_LIST_HEAD(&dcb_app_list);
-
 	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
 

From 86544c3de6a2185409c5a3d02f674ea223a14217 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Tue, 18 May 2021 20:49:24 +0300
Subject: [PATCH 0279/3292] net: mdio: provide shim implementation of
 devm_of_mdiobus_register

Similar to the way in which of_mdiobus_register() has a fallback to the
non-DT based mdiobus_register() when CONFIG_OF is not set, we can create
a shim for the device-managed devm_of_mdiobus_register() which calls
devm_mdiobus_register() and discards the struct device_node *.

In particular, this solves a build issue with the qca8k DSA driver which
uses devm_of_mdiobus_register and can be compiled without CONFIG_OF.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 2b05e7f7c238..da633d34ab86 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -72,6 +72,13 @@ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *
 	return mdiobus_register(mdio);
 }
 
+static inline int devm_of_mdiobus_register(struct device *dev,
+					   struct mii_bus *mdio,
+					   struct device_node *np)
+{
+	return devm_mdiobus_register(dev, mdio);
+}
+
 static inline struct mdio_device *of_mdio_find_device(struct device_node *np)
 {
 	return NULL;

From 79a7f8bdb159d9914b58740f3d31d602a6e4aca8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:03 -0700
Subject: [PATCH 0280/3292] bpf: Introduce bpf_sys_bpf() helper and program
 type.

Add placeholders for bpf_sys_bpf() helper and new program type.
Make sure to check that expected_attach_type is zero for future extensibility.
Allow tracing helper functions to be used in this program type, since they will
only execute from user context via bpf_prog_test_run.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-2-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h            | 10 +++++++
 include/linux/bpf_types.h      |  2 ++
 include/uapi/linux/bpf.h       |  8 +++++
 kernel/bpf/syscall.c           | 53 ++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c          |  8 +++++
 net/bpf/test_run.c             | 43 +++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  8 +++++
 7 files changed, 132 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 02b02cb29ce2..04a2bf41ae72 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1826,6 +1826,9 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
 
 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
 void bpf_map_offload_map_free(struct bpf_map *map);
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+			      const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr);
 #else
 static inline int bpf_prog_offload_init(struct bpf_prog *prog,
 					union bpf_attr *attr)
@@ -1851,6 +1854,13 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 static inline void bpf_map_offload_map_free(struct bpf_map *map)
 {
 }
+
+static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+					    const union bpf_attr *kattr,
+					    union bpf_attr __user *uattr)
+{
+	return -ENOTSUPP;
+}
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index f883f01a5061..a9db1eae6796 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -77,6 +77,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
 	       void *, void *)
 #endif /* CONFIG_BPF_LSM */
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall,
+	      void *, void *)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ec6d85a81744..c92648f38144 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -937,6 +937,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 };
 
 enum bpf_attach_type {
@@ -4735,6 +4736,12 @@ union bpf_attr {
  *		be zero-terminated except when **str_size** is 0.
  *
  *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * 	Description
+ * 		Execute bpf syscall with given arguments.
+ * 	Return
+ * 		A syscall result.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4903,6 +4910,7 @@ union bpf_attr {
 	FN(check_mtu),			\
 	FN(for_each_map_elem),		\
 	FN(snprintf),			\
+	FN(sys_bpf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 941ca06d9dfa..b1e7352919cb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2014,6 +2014,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		if (expected_attach_type == BPF_SK_LOOKUP)
 			return 0;
 		return -EINVAL;
+	case BPF_PROG_TYPE_SYSCALL:
 	case BPF_PROG_TYPE_EXT:
 		if (expected_attach_type)
 			return -EINVAL;
@@ -4508,3 +4509,55 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 
 	return err;
 }
+
+static bool syscall_prog_is_valid_access(int off, int size,
+					 enum bpf_access_type type,
+					 const struct bpf_prog *prog,
+					 struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= U16_MAX)
+		return false;
+	if (off % size != 0)
+		return false;
+	return true;
+}
+
+BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
+{
+	return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_sys_bpf_proto = {
+	.func		= bpf_sys_bpf,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+
+const struct bpf_func_proto * __weak
+tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_func_proto *
+syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_sys_bpf:
+		return &bpf_sys_bpf_proto;
+	default:
+		return tracing_prog_func_proto(func_id, prog);
+	}
+}
+
+const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
+	.get_func_proto  = syscall_prog_func_proto,
+	.is_valid_access = syscall_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops bpf_syscall_prog_ops = {
+	.test_run = bpf_prog_test_run_syscall,
+};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bdfdb54676ea..37407d8fbca4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13196,6 +13196,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	int ret;
 	u64 key;
 
+	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
+		if (prog->aux->sleepable)
+			/* attach_btf_id checked to be zero already */
+			return 0;
+		verbose(env, "Syscall programs can only be sleepable\n");
+		return -EINVAL;
+	}
+
 	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
 	    prog->type != BPF_PROG_TYPE_LSM) {
 		verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a5d72c48fb66..a6972d7ddf80 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -918,3 +918,46 @@ out:
 	kfree(user_ctx);
 	return ret;
 }
+
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+			      const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr)
+{
+	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+	__u32 ctx_size_in = kattr->test.ctx_size_in;
+	void *ctx = NULL;
+	u32 retval;
+	int err = 0;
+
+	/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
+	if (kattr->test.data_in || kattr->test.data_out ||
+	    kattr->test.ctx_out || kattr->test.duration ||
+	    kattr->test.repeat || kattr->test.flags)
+		return -EINVAL;
+
+	if (ctx_size_in < prog->aux->max_ctx_offset ||
+	    ctx_size_in > U16_MAX)
+		return -EINVAL;
+
+	if (ctx_size_in) {
+		ctx = kzalloc(ctx_size_in, GFP_USER);
+		if (!ctx)
+			return -ENOMEM;
+		if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
+			err = -EFAULT;
+			goto out;
+		}
+	}
+	retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+
+	if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
+		err = -EFAULT;
+		goto out;
+	}
+	if (ctx_size_in)
+		if (copy_to_user(ctx_in, ctx, ctx_size_in))
+			err = -EFAULT;
+out:
+	kfree(ctx);
+	return err;
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ec6d85a81744..c92648f38144 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -937,6 +937,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 };
 
 enum bpf_attach_type {
@@ -4735,6 +4736,12 @@ union bpf_attr {
  *		be zero-terminated except when **str_size** is 0.
  *
  *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * 	Description
+ * 		Execute bpf syscall with given arguments.
+ * 	Return
+ * 		A syscall result.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4903,6 +4910,7 @@ union bpf_attr {
 	FN(check_mtu),			\
 	FN(for_each_map_elem),		\
 	FN(snprintf),			\
+	FN(sys_bpf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

From cdf7fb0a9f3d36b279590ac41e61c6b655db0d4a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:04 -0700
Subject: [PATCH 0281/3292] bpf: Introduce bpfptr_t user/kernel pointer.

Similar to sockptr_t introduce bpfptr_t with few additions:
make_bpfptr() creates new user/kernel pointer in the same address space as
existing user/kernel pointer.
bpfptr_add() advances the user/kernel pointer.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-3-alexei.starovoitov@gmail.com
---
 include/linux/bpfptr.h | 75 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 include/linux/bpfptr.h

diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
new file mode 100644
index 000000000000..5cdeab497cb3
--- /dev/null
+++ b/include/linux/bpfptr.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* A pointer that can point to either kernel or userspace memory. */
+#ifndef _LINUX_BPFPTR_H
+#define _LINUX_BPFPTR_H
+
+#include <linux/sockptr.h>
+
+typedef sockptr_t bpfptr_t;
+
+static inline bool bpfptr_is_kernel(bpfptr_t bpfptr)
+{
+	return bpfptr.is_kernel;
+}
+
+static inline bpfptr_t KERNEL_BPFPTR(void *p)
+{
+	return (bpfptr_t) { .kernel = p, .is_kernel = true };
+}
+
+static inline bpfptr_t USER_BPFPTR(void __user *p)
+{
+	return (bpfptr_t) { .user = p };
+}
+
+static inline bpfptr_t make_bpfptr(u64 addr, bool is_kernel)
+{
+	if (is_kernel)
+		return KERNEL_BPFPTR((void*) (uintptr_t) addr);
+	else
+		return USER_BPFPTR(u64_to_user_ptr(addr));
+}
+
+static inline bool bpfptr_is_null(bpfptr_t bpfptr)
+{
+	if (bpfptr_is_kernel(bpfptr))
+		return !bpfptr.kernel;
+	return !bpfptr.user;
+}
+
+static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val)
+{
+	if (bpfptr_is_kernel(*bpfptr))
+		bpfptr->kernel += val;
+	else
+		bpfptr->user += val;
+}
+
+static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src,
+					  size_t offset, size_t size)
+{
+	return copy_from_sockptr_offset(dst, (sockptr_t) src, offset, size);
+}
+
+static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size)
+{
+	return copy_from_bpfptr_offset(dst, src, 0, size);
+}
+
+static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
+					const void *src, size_t size)
+{
+	return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
+}
+
+static inline void *memdup_bpfptr(bpfptr_t src, size_t len)
+{
+	return memdup_sockptr((sockptr_t) src, len);
+}
+
+static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
+{
+	return strncpy_from_sockptr(dst, (sockptr_t) src, count);
+}
+
+#endif /* _LINUX_BPFPTR_H */

From af2ac3e13e45752af03c8a933f9b6e18841b128b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:05 -0700
Subject: [PATCH 0282/3292] bpf: Prepare bpf syscall to be used from kernel and
 user space.

With the help from bpfptr_t prepare relevant bpf syscall commands
to be used from kernel and user space.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-4-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h   |   8 +--
 kernel/bpf/bpf_iter.c |  13 ++---
 kernel/bpf/syscall.c  | 113 +++++++++++++++++++++++++++---------------
 kernel/bpf/verifier.c |  34 +++++++------
 net/bpf/test_run.c    |   2 +-
 5 files changed, 104 insertions(+), 66 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 04a2bf41ae72..7fd53380c981 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -22,6 +22,7 @@
 #include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/percpu-refcount.h>
+#include <linux/bpfptr.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -1428,7 +1429,7 @@ struct bpf_iter__bpf_map_elem {
 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
-int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
 bool bpf_link_is_iter(struct bpf_link *link);
 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
@@ -1459,7 +1460,7 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 
 int bpf_get_file_flag(int flags);
-int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
+int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
 			     size_t actual_size);
 
 /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
@@ -1479,8 +1480,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 }
 
 /* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
-	      union bpf_attr __user *uattr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr);
 
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 931870f9cf56..2d4fbdbb194e 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -473,15 +473,16 @@ bool bpf_link_is_iter(struct bpf_link *link)
 	return link->ops == &bpf_iter_link_lops;
 }
 
-int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
+			 struct bpf_prog *prog)
 {
-	union bpf_iter_link_info __user *ulinfo;
 	struct bpf_link_primer link_primer;
 	struct bpf_iter_target_info *tinfo;
 	union bpf_iter_link_info linfo;
 	struct bpf_iter_link *link;
 	u32 prog_btf_id, linfo_len;
 	bool existed = false;
+	bpfptr_t ulinfo;
 	int err;
 
 	if (attr->link_create.target_fd || attr->link_create.flags)
@@ -489,18 +490,18 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 
 	memset(&linfo, 0, sizeof(union bpf_iter_link_info));
 
-	ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
+	ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel);
 	linfo_len = attr->link_create.iter_info_len;
-	if (!ulinfo ^ !linfo_len)
+	if (bpfptr_is_null(ulinfo) ^ !linfo_len)
 		return -EINVAL;
 
-	if (ulinfo) {
+	if (!bpfptr_is_null(ulinfo)) {
 		err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
 					       linfo_len);
 		if (err)
 			return err;
 		linfo_len = min_t(u32, linfo_len, sizeof(linfo));
-		if (copy_from_user(&linfo, ulinfo, linfo_len))
+		if (copy_from_bpfptr(&linfo, ulinfo, linfo_len))
 			return -EFAULT;
 	}
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b1e7352919cb..28387fe149ba 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -72,11 +72,10 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
  * copy_from_user() call. However, this is not a concern since this function is
  * meant to be a future-proofing of bits.
  */
-int bpf_check_uarg_tail_zero(void __user *uaddr,
+int bpf_check_uarg_tail_zero(bpfptr_t uaddr,
 			     size_t expected_size,
 			     size_t actual_size)
 {
-	unsigned char __user *addr = uaddr + expected_size;
 	int res;
 
 	if (unlikely(actual_size > PAGE_SIZE))	/* silly large */
@@ -85,7 +84,12 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
 	if (actual_size <= expected_size)
 		return 0;
 
-	res = check_zeroed_user(addr, actual_size - expected_size);
+	if (uaddr.is_kernel)
+		res = memchr_inv(uaddr.kernel + expected_size, 0,
+				 actual_size - expected_size) == NULL;
+	else
+		res = check_zeroed_user(uaddr.user + expected_size,
+					actual_size - expected_size);
 	if (res < 0)
 		return res;
 	return res ? 0 : -E2BIG;
@@ -1004,6 +1008,17 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 	return NULL;
 }
 
+static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
+{
+	if (key_size)
+		return memdup_bpfptr(ukey, key_size);
+
+	if (!bpfptr_is_null(ukey))
+		return ERR_PTR(-EINVAL);
+
+	return NULL;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
 
@@ -1074,10 +1089,10 @@ err_put:
 
 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 
-static int map_update_elem(union bpf_attr *attr)
+static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 {
-	void __user *ukey = u64_to_user_ptr(attr->key);
-	void __user *uvalue = u64_to_user_ptr(attr->value);
+	bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
+	bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
 	int ufd = attr->map_fd;
 	struct bpf_map *map;
 	void *key, *value;
@@ -1103,7 +1118,7 @@ static int map_update_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
-	key = __bpf_copy_key(ukey, map->key_size);
+	key = ___bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
 		goto err_put;
@@ -1123,7 +1138,7 @@ static int map_update_elem(union bpf_attr *attr)
 		goto free_key;
 
 	err = -EFAULT;
-	if (copy_from_user(value, uvalue, value_size) != 0)
+	if (copy_from_bpfptr(value, uvalue, value_size) != 0)
 		goto free_value;
 
 	err = bpf_map_update_value(map, f, key, value, attr->flags);
@@ -2076,7 +2091,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 /* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
 
-static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
+static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
 {
 	enum bpf_prog_type type = attr->prog_type;
 	struct bpf_prog *prog, *dst_prog = NULL;
@@ -2101,8 +2116,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 		return -EPERM;
 
 	/* copy eBPF program license from user space */
-	if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
-			      sizeof(license) - 1) < 0)
+	if (strncpy_from_bpfptr(license,
+				make_bpfptr(attr->license, uattr.is_kernel),
+				sizeof(license) - 1) < 0)
 		return -EFAULT;
 	license[sizeof(license) - 1] = 0;
 
@@ -2186,8 +2202,9 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	prog->len = attr->insn_cnt;
 
 	err = -EFAULT;
-	if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
-			   bpf_prog_insn_size(prog)) != 0)
+	if (copy_from_bpfptr(prog->insns,
+			     make_bpfptr(attr->insns, uattr.is_kernel),
+			     bpf_prog_insn_size(prog)) != 0)
 		goto free_prog_sec;
 
 	prog->orig_prog = NULL;
@@ -3423,7 +3440,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 	u32 ulen;
 	int err;
 
-	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
 	if (err)
 		return err;
 	info_len = min_t(u32, sizeof(info), info_len);
@@ -3702,7 +3719,7 @@ static int bpf_map_get_info_by_fd(struct file *file,
 	u32 info_len = attr->info.info_len;
 	int err;
 
-	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
 	if (err)
 		return err;
 	info_len = min_t(u32, sizeof(info), info_len);
@@ -3745,7 +3762,7 @@ static int bpf_btf_get_info_by_fd(struct file *file,
 	u32 info_len = attr->info.info_len;
 	int err;
 
-	err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
 	if (err)
 		return err;
 
@@ -3762,7 +3779,7 @@ static int bpf_link_get_info_by_fd(struct file *file,
 	u32 info_len = attr->info.info_len;
 	int err;
 
-	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
 	if (err)
 		return err;
 	info_len = min_t(u32, sizeof(info), info_len);
@@ -4023,13 +4040,14 @@ err_put:
 	return err;
 }
 
-static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
+				   struct bpf_prog *prog)
 {
 	if (attr->link_create.attach_type != prog->expected_attach_type)
 		return -EINVAL;
 
 	if (prog->expected_attach_type == BPF_TRACE_ITER)
-		return bpf_iter_link_attach(attr, prog);
+		return bpf_iter_link_attach(attr, uattr, prog);
 	else if (prog->type == BPF_PROG_TYPE_EXT)
 		return bpf_tracing_prog_attach(prog,
 					       attr->link_create.target_fd,
@@ -4038,7 +4056,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
 }
 
 #define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
-static int link_create(union bpf_attr *attr)
+static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 {
 	enum bpf_prog_type ptype;
 	struct bpf_prog *prog;
@@ -4057,7 +4075,7 @@ static int link_create(union bpf_attr *attr)
 		goto out;
 
 	if (prog->type == BPF_PROG_TYPE_EXT) {
-		ret = tracing_bpf_link_attach(attr, prog);
+		ret = tracing_bpf_link_attach(attr, uattr, prog);
 		goto out;
 	}
 
@@ -4078,7 +4096,7 @@ static int link_create(union bpf_attr *attr)
 		ret = cgroup_bpf_link_attach(attr, prog);
 		break;
 	case BPF_PROG_TYPE_TRACING:
-		ret = tracing_bpf_link_attach(attr, prog);
+		ret = tracing_bpf_link_attach(attr, uattr, prog);
 		break;
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 	case BPF_PROG_TYPE_SK_LOOKUP:
@@ -4366,7 +4384,7 @@ out_prog_put:
 	return ret;
 }
 
-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 {
 	union bpf_attr attr;
 	int err;
@@ -4381,7 +4399,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 
 	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
 	memset(&attr, 0, sizeof(attr));
-	if (copy_from_user(&attr, uattr, size) != 0)
+	if (copy_from_bpfptr(&attr, uattr, size) != 0)
 		return -EFAULT;
 
 	err = security_bpf(cmd, &attr, size);
@@ -4396,7 +4414,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = map_lookup_elem(&attr);
 		break;
 	case BPF_MAP_UPDATE_ELEM:
-		err = map_update_elem(&attr);
+		err = map_update_elem(&attr, uattr);
 		break;
 	case BPF_MAP_DELETE_ELEM:
 		err = map_delete_elem(&attr);
@@ -4423,21 +4441,21 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = bpf_prog_detach(&attr);
 		break;
 	case BPF_PROG_QUERY:
-		err = bpf_prog_query(&attr, uattr);
+		err = bpf_prog_query(&attr, uattr.user);
 		break;
 	case BPF_PROG_TEST_RUN:
-		err = bpf_prog_test_run(&attr, uattr);
+		err = bpf_prog_test_run(&attr, uattr.user);
 		break;
 	case BPF_PROG_GET_NEXT_ID:
-		err = bpf_obj_get_next_id(&attr, uattr,
+		err = bpf_obj_get_next_id(&attr, uattr.user,
 					  &prog_idr, &prog_idr_lock);
 		break;
 	case BPF_MAP_GET_NEXT_ID:
-		err = bpf_obj_get_next_id(&attr, uattr,
+		err = bpf_obj_get_next_id(&attr, uattr.user,
 					  &map_idr, &map_idr_lock);
 		break;
 	case BPF_BTF_GET_NEXT_ID:
-		err = bpf_obj_get_next_id(&attr, uattr,
+		err = bpf_obj_get_next_id(&attr, uattr.user,
 					  &btf_idr, &btf_idr_lock);
 		break;
 	case BPF_PROG_GET_FD_BY_ID:
@@ -4447,7 +4465,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = bpf_map_get_fd_by_id(&attr);
 		break;
 	case BPF_OBJ_GET_INFO_BY_FD:
-		err = bpf_obj_get_info_by_fd(&attr, uattr);
+		err = bpf_obj_get_info_by_fd(&attr, uattr.user);
 		break;
 	case BPF_RAW_TRACEPOINT_OPEN:
 		err = bpf_raw_tracepoint_open(&attr);
@@ -4459,26 +4477,26 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = bpf_btf_get_fd_by_id(&attr);
 		break;
 	case BPF_TASK_FD_QUERY:
-		err = bpf_task_fd_query(&attr, uattr);
+		err = bpf_task_fd_query(&attr, uattr.user);
 		break;
 	case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
 		err = map_lookup_and_delete_elem(&attr);
 		break;
 	case BPF_MAP_LOOKUP_BATCH:
-		err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
+		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH);
 		break;
 	case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
-		err = bpf_map_do_batch(&attr, uattr,
+		err = bpf_map_do_batch(&attr, uattr.user,
 				       BPF_MAP_LOOKUP_AND_DELETE_BATCH);
 		break;
 	case BPF_MAP_UPDATE_BATCH:
-		err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
+		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH);
 		break;
 	case BPF_MAP_DELETE_BATCH:
-		err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
+		err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH);
 		break;
 	case BPF_LINK_CREATE:
-		err = link_create(&attr);
+		err = link_create(&attr, uattr);
 		break;
 	case BPF_LINK_UPDATE:
 		err = link_update(&attr);
@@ -4487,7 +4505,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = bpf_link_get_fd_by_id(&attr);
 		break;
 	case BPF_LINK_GET_NEXT_ID:
-		err = bpf_obj_get_next_id(&attr, uattr,
+		err = bpf_obj_get_next_id(&attr, uattr.user,
 					  &link_idr, &link_idr_lock);
 		break;
 	case BPF_ENABLE_STATS:
@@ -4510,6 +4528,11 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	return err;
 }
 
+SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+{
+	return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
+}
+
 static bool syscall_prog_is_valid_access(int off, int size,
 					 enum bpf_access_type type,
 					 const struct bpf_prog *prog,
@@ -4524,7 +4547,19 @@ static bool syscall_prog_is_valid_access(int off, int size,
 
 BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
 {
-	return -EINVAL;
+	switch (cmd) {
+	case BPF_MAP_CREATE:
+	case BPF_MAP_UPDATE_ELEM:
+	case BPF_MAP_FREEZE:
+	case BPF_PROG_LOAD:
+		break;
+	/* case BPF_PROG_TEST_RUN:
+	 * is not part of this list to prevent recursive test_run
+	 */
+	default:
+		return -EINVAL;
+	}
+	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
 }
 
 const struct bpf_func_proto bpf_sys_bpf_proto = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 37407d8fbca4..e63c7d60e00d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9436,7 +9436,7 @@ static int check_abnormal_return(struct bpf_verifier_env *env)
 
 static int check_btf_func(struct bpf_verifier_env *env,
 			  const union bpf_attr *attr,
-			  union bpf_attr __user *uattr)
+			  bpfptr_t uattr)
 {
 	const struct btf_type *type, *func_proto, *ret_type;
 	u32 i, nfuncs, urec_size, min_size;
@@ -9445,7 +9445,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
 	struct bpf_func_info_aux *info_aux = NULL;
 	struct bpf_prog *prog;
 	const struct btf *btf;
-	void __user *urecord;
+	bpfptr_t urecord;
 	u32 prev_offset = 0;
 	bool scalar_return;
 	int ret = -ENOMEM;
@@ -9473,7 +9473,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
 	prog = env->prog;
 	btf = prog->aux->btf;
 
-	urecord = u64_to_user_ptr(attr->func_info);
+	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
 	min_size = min_t(u32, krec_size, urec_size);
 
 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
@@ -9491,13 +9491,15 @@ static int check_btf_func(struct bpf_verifier_env *env,
 				/* set the size kernel expects so loader can zero
 				 * out the rest of the record.
 				 */
-				if (put_user(min_size, &uattr->func_info_rec_size))
+				if (copy_to_bpfptr_offset(uattr,
+							  offsetof(union bpf_attr, func_info_rec_size),
+							  &min_size, sizeof(min_size)))
 					ret = -EFAULT;
 			}
 			goto err_free;
 		}
 
-		if (copy_from_user(&krecord[i], urecord, min_size)) {
+		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
 			ret = -EFAULT;
 			goto err_free;
 		}
@@ -9549,7 +9551,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
 		}
 
 		prev_offset = krecord[i].insn_off;
-		urecord += urec_size;
+		bpfptr_add(&urecord, urec_size);
 	}
 
 	prog->aux->func_info = krecord;
@@ -9581,14 +9583,14 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
 
 static int check_btf_line(struct bpf_verifier_env *env,
 			  const union bpf_attr *attr,
-			  union bpf_attr __user *uattr)
+			  bpfptr_t uattr)
 {
 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
 	struct bpf_subprog_info *sub;
 	struct bpf_line_info *linfo;
 	struct bpf_prog *prog;
 	const struct btf *btf;
-	void __user *ulinfo;
+	bpfptr_t ulinfo;
 	int err;
 
 	nr_linfo = attr->line_info_cnt;
@@ -9614,7 +9616,7 @@ static int check_btf_line(struct bpf_verifier_env *env,
 
 	s = 0;
 	sub = env->subprog_info;
-	ulinfo = u64_to_user_ptr(attr->line_info);
+	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
 	expected_size = sizeof(struct bpf_line_info);
 	ncopy = min_t(u32, expected_size, rec_size);
 	for (i = 0; i < nr_linfo; i++) {
@@ -9622,14 +9624,15 @@ static int check_btf_line(struct bpf_verifier_env *env,
 		if (err) {
 			if (err == -E2BIG) {
 				verbose(env, "nonzero tailing record in line_info");
-				if (put_user(expected_size,
-					     &uattr->line_info_rec_size))
+				if (copy_to_bpfptr_offset(uattr,
+							  offsetof(union bpf_attr, line_info_rec_size),
+							  &expected_size, sizeof(expected_size)))
 					err = -EFAULT;
 			}
 			goto err_free;
 		}
 
-		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
+		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
 			err = -EFAULT;
 			goto err_free;
 		}
@@ -9681,7 +9684,7 @@ static int check_btf_line(struct bpf_verifier_env *env,
 		}
 
 		prev_offset = linfo[i].insn_off;
-		ulinfo += rec_size;
+		bpfptr_add(&ulinfo, rec_size);
 	}
 
 	if (s != env->subprog_cnt) {
@@ -9703,7 +9706,7 @@ err_free:
 
 static int check_btf_info(struct bpf_verifier_env *env,
 			  const union bpf_attr *attr,
-			  union bpf_attr __user *uattr)
+			  bpfptr_t uattr)
 {
 	struct btf *btf;
 	int err;
@@ -13275,8 +13278,7 @@ struct btf *bpf_get_btf_vmlinux(void)
 	return btf_vmlinux;
 }
 
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
-	      union bpf_attr __user *uattr)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
 {
 	u64 start_time = ktime_get_ns();
 	struct bpf_verifier_env *env;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a6972d7ddf80..aa47af349ba8 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -409,7 +409,7 @@ static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
 		return ERR_PTR(-ENOMEM);
 
 	if (data_in) {
-		err = bpf_check_uarg_tail_zero(data_in, max_size, size);
+		err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size);
 		if (err) {
 			kfree(data);
 			return ERR_PTR(err);

From 5452fc9a17fc26816a683ab04cf1c29131ca27e4 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:06 -0700
Subject: [PATCH 0283/3292] libbpf: Support for syscall program type

Trivial support for syscall program type.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-5-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/libbpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e58f51b24574..05315048dd10 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8904,6 +8904,8 @@ static const struct bpf_sec_def section_defs[] = {
 		.expected_attach_type = BPF_TRACE_ITER,
 		.is_attach_btf = true,
 		.attach_fn = attach_iter),
+	SEC_DEF("syscall", SYSCALL,
+		.is_sleepable = true),
 	BPF_EAPROG_SEC("xdp_devmap/",		BPF_PROG_TYPE_XDP,
 						BPF_XDP_DEVMAP),
 	BPF_EAPROG_SEC("xdp_cpumap/",		BPF_PROG_TYPE_XDP,

From 00899e7e8d9d30142ccff4cebd80effca58396b7 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:07 -0700
Subject: [PATCH 0284/3292] selftests/bpf: Test for syscall program type

bpf_prog_type_syscall is a program that creates a bpf map,
updates it, and loads another bpf program using bpf_sys_bpf() helper.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-6-alexei.starovoitov@gmail.com
---
 .../selftests/bpf/prog_tests/syscall.c        | 52 ++++++++++++++
 tools/testing/selftests/bpf/progs/syscall.c   | 71 +++++++++++++++++++
 2 files changed, 123 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/syscall.c
 create mode 100644 tools/testing/selftests/bpf/progs/syscall.c

diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
new file mode 100644
index 000000000000..1badd37148a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "syscall.skel.h"
+
+struct args {
+	__u64 log_buf;
+	__u32 log_size;
+	int max_entries;
+	int map_fd;
+	int prog_fd;
+};
+
+void test_syscall(void)
+{
+	static char verifier_log[8192];
+	struct args ctx = {
+		.max_entries = 1024,
+		.log_buf = (uintptr_t) verifier_log,
+		.log_size = sizeof(verifier_log),
+	};
+	struct bpf_prog_test_run_attr tattr = {
+		.ctx_in = &ctx,
+		.ctx_size_in = sizeof(ctx),
+	};
+	struct syscall *skel = NULL;
+	__u64 key = 12, value = 0;
+	int err;
+
+	skel = syscall__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+	err = bpf_prog_test_run_xattr(&tattr);
+	ASSERT_EQ(err, 0, "err");
+	ASSERT_EQ(tattr.retval, 1, "retval");
+	ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
+	ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd");
+	ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1),
+		  "verifier_log");
+
+	err = bpf_map_lookup_elem(ctx.map_fd, &key, &value);
+	ASSERT_EQ(err, 0, "map_lookup");
+	ASSERT_EQ(value, 34, "map lookup value");
+cleanup:
+	syscall__destroy(skel);
+	if (ctx.prog_fd > 0)
+		close(ctx.prog_fd);
+	if (ctx.map_fd > 0)
+		close(ctx.map_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
new file mode 100644
index 000000000000..865b5269ecbb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <../../../tools/include/linux/filter.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct args {
+	__u64 log_buf;
+	__u32 log_size;
+	int max_entries;
+	int map_fd;
+	int prog_fd;
+};
+
+SEC("syscall")
+int bpf_prog(struct args *ctx)
+{
+	static char license[] = "GPL";
+	static struct bpf_insn insns[] = {
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+		BPF_LD_MAP_FD(BPF_REG_1, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	static union bpf_attr map_create_attr = {
+		.map_type = BPF_MAP_TYPE_HASH,
+		.key_size = 8,
+		.value_size = 8,
+	};
+	static union bpf_attr map_update_attr = { .map_fd = 1, };
+	static __u64 key = 12;
+	static __u64 value = 34;
+	static union bpf_attr prog_load_attr = {
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.insn_cnt = sizeof(insns) / sizeof(insns[0]),
+	};
+	int ret;
+
+	map_create_attr.max_entries = ctx->max_entries;
+	prog_load_attr.license = (long) license;
+	prog_load_attr.insns = (long) insns;
+	prog_load_attr.log_buf = ctx->log_buf;
+	prog_load_attr.log_size = ctx->log_size;
+	prog_load_attr.log_level = 1;
+
+	ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr));
+	if (ret <= 0)
+		return ret;
+	ctx->map_fd = ret;
+	insns[3].imm = ret;
+
+	map_update_attr.map_fd = ret;
+	map_update_attr.key = (long) &key;
+	map_update_attr.value = (long) &value;
+	ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
+	if (ret < 0)
+		return ret;
+
+	ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr));
+	if (ret <= 0)
+		return ret;
+	ctx->prog_fd = ret;
+	return 1;
+}

From c571bd752e91602f092823b2f1ee685a74d2726c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:08 -0700
Subject: [PATCH 0285/3292] bpf: Make btf_load command to be bpfptr_t
 compatible.

Similar to prog_load make btf_load command to be availble to
bpf_prog_type_syscall program.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-7-alexei.starovoitov@gmail.com
---
 include/linux/btf.h  | 2 +-
 kernel/bpf/btf.c     | 8 ++++----
 kernel/bpf/syscall.c | 7 ++++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 3bac66e0183a..94a0c976c90f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -21,7 +21,7 @@ extern const struct file_operations btf_fops;
 
 void btf_get(struct btf *btf);
 void btf_put(struct btf *btf);
-int btf_new_fd(const union bpf_attr *attr);
+int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr);
 struct btf *btf_get_by_fd(int fd);
 int btf_get_info_by_fd(const struct btf *btf,
 		       const union bpf_attr *attr,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0600ed325fa0..fbf6c06a9d62 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4257,7 +4257,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
 	return 0;
 }
 
-static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
+static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
 			     u32 log_level, char __user *log_ubuf, u32 log_size)
 {
 	struct btf_verifier_env *env = NULL;
@@ -4306,7 +4306,7 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
 	btf->data = data;
 	btf->data_size = btf_data_size;
 
-	if (copy_from_user(data, btf_data, btf_data_size)) {
+	if (copy_from_bpfptr(data, btf_data, btf_data_size)) {
 		err = -EFAULT;
 		goto errout;
 	}
@@ -5780,12 +5780,12 @@ static int __btf_new_fd(struct btf *btf)
 	return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
 }
 
-int btf_new_fd(const union bpf_attr *attr)
+int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr)
 {
 	struct btf *btf;
 	int ret;
 
-	btf = btf_parse(u64_to_user_ptr(attr->btf),
+	btf = btf_parse(make_bpfptr(attr->btf, uattr.is_kernel),
 			attr->btf_size, attr->btf_log_level,
 			u64_to_user_ptr(attr->btf_log_buf),
 			attr->btf_log_size);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 28387fe149ba..415865c49dd4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3842,7 +3842,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 
 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level
 
-static int bpf_btf_load(const union bpf_attr *attr)
+static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr)
 {
 	if (CHECK_ATTR(BPF_BTF_LOAD))
 		return -EINVAL;
@@ -3850,7 +3850,7 @@ static int bpf_btf_load(const union bpf_attr *attr)
 	if (!bpf_capable())
 		return -EPERM;
 
-	return btf_new_fd(attr);
+	return btf_new_fd(attr, uattr);
 }
 
 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
@@ -4471,7 +4471,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 		err = bpf_raw_tracepoint_open(&attr);
 		break;
 	case BPF_BTF_LOAD:
-		err = bpf_btf_load(&attr);
+		err = bpf_btf_load(&attr, uattr);
 		break;
 	case BPF_BTF_GET_FD_BY_ID:
 		err = bpf_btf_get_fd_by_id(&attr);
@@ -4552,6 +4552,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
 	case BPF_MAP_UPDATE_ELEM:
 	case BPF_MAP_FREEZE:
 	case BPF_PROG_LOAD:
+	case BPF_BTF_LOAD:
 		break;
 	/* case BPF_PROG_TEST_RUN:
 	 * is not part of this list to prevent recursive test_run

From 2341d6bb133d44caeba9fbdc851f8624739a84a2 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:09 -0700
Subject: [PATCH 0286/3292] selftests/bpf: Test for btf_load command.

Improve selftest to check that btf_load is working from bpf program.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-8-alexei.starovoitov@gmail.com
---
 .../selftests/bpf/prog_tests/syscall.c        |  3 ++
 tools/testing/selftests/bpf/progs/syscall.c   | 50 +++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
index 1badd37148a1..81e997a69f7a 100644
--- a/tools/testing/selftests/bpf/prog_tests/syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -9,6 +9,7 @@ struct args {
 	int max_entries;
 	int map_fd;
 	int prog_fd;
+	int btf_fd;
 };
 
 void test_syscall(void)
@@ -49,4 +50,6 @@ cleanup:
 		close(ctx.prog_fd);
 	if (ctx.map_fd > 0)
 		close(ctx.map_fd);
+	if (ctx.btf_fd > 0)
+		close(ctx.btf_fd);
 }
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
index 865b5269ecbb..e550f728962d 100644
--- a/tools/testing/selftests/bpf/progs/syscall.c
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -5,6 +5,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include <../../../tools/include/linux/filter.h>
+#include <linux/btf.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -14,8 +15,48 @@ struct args {
 	int max_entries;
 	int map_fd;
 	int prog_fd;
+	int btf_fd;
 };
 
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+	BTF_INT_ENC(encoding, bits_offset, bits)
+
+static int btf_load(void)
+{
+	struct btf_blob {
+		struct btf_header btf_hdr;
+		__u32 types[8];
+		__u32 str;
+	} raw_btf = {
+		.btf_hdr = {
+			.magic = BTF_MAGIC,
+			.version = BTF_VERSION,
+			.hdr_len = sizeof(struct btf_header),
+			.type_len = sizeof(__u32) * 8,
+			.str_off = sizeof(__u32) * 8,
+			.str_len = sizeof(__u32),
+		},
+		.types = {
+			/* long */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),  /* [1] */
+			/* unsigned long */
+			BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),  /* [2] */
+		},
+	};
+	static union bpf_attr btf_load_attr = {
+		.btf_size = sizeof(raw_btf),
+	};
+
+	btf_load_attr.btf = (long)&raw_btf;
+	return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr));
+}
+
 SEC("syscall")
 int bpf_prog(struct args *ctx)
 {
@@ -33,6 +74,8 @@ int bpf_prog(struct args *ctx)
 		.map_type = BPF_MAP_TYPE_HASH,
 		.key_size = 8,
 		.value_size = 8,
+		.btf_key_type_id = 1,
+		.btf_value_type_id = 2,
 	};
 	static union bpf_attr map_update_attr = { .map_fd = 1, };
 	static __u64 key = 12;
@@ -43,7 +86,14 @@ int bpf_prog(struct args *ctx)
 	};
 	int ret;
 
+	ret = btf_load();
+	if (ret <= 0)
+		return ret;
+
+	ctx->btf_fd = ret;
 	map_create_attr.max_entries = ctx->max_entries;
+	map_create_attr.btf_fd = ret;
+
 	prog_load_attr.license = (long) license;
 	prog_load_attr.insns = (long) insns;
 	prog_load_attr.log_buf = ctx->log_buf;

From 387544bfa291a22383d60b40f887360e2b931ec6 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:10 -0700
Subject: [PATCH 0287/3292] bpf: Introduce fd_idx

Typical program loading sequence involves creating bpf maps and applying
map FDs into bpf instructions in various places in the bpf program.
This job is done by libbpf that is using compiler generated ELF relocations
to patch certain instruction after maps are created and BTFs are loaded.
The goal of fd_idx is to allow bpf instructions to stay immutable
after compilation. At load time the libbpf would still create maps as usual,
but it wouldn't need to patch instructions. It would store map_fds into
__u32 fd_array[] and would pass that pointer to sys_bpf(BPF_PROG_LOAD).

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-9-alexei.starovoitov@gmail.com
---
 include/linux/bpf_verifier.h   |  1 +
 include/uapi/linux/bpf.h       | 16 ++++++++----
 kernel/bpf/syscall.c           |  2 +-
 kernel/bpf/verifier.c          | 47 ++++++++++++++++++++++++++--------
 tools/include/uapi/linux/bpf.h | 16 ++++++++----
 5 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d4632aa3ca50..e774ecc1cd1f 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -450,6 +450,7 @@ struct bpf_verifier_env {
 	u32 peak_states;
 	/* longest register parentage chain walked for liveness marking */
 	u32 longest_mark_read_walk;
+	bpfptr_t fd_array;
 };
 
 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c92648f38144..de58a714ed36 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1098,8 +1098,8 @@ enum bpf_link_type {
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
- * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
- * insn[0].imm:      map fd
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_[FD|IDX]
+ * insn[0].imm:      map fd or fd_idx
  * insn[1].imm:      0
  * insn[0].off:      0
  * insn[1].off:      0
@@ -1107,15 +1107,19 @@ enum bpf_link_type {
  * verifier type:    CONST_PTR_TO_MAP
  */
 #define BPF_PSEUDO_MAP_FD	1
-/* insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
- * insn[0].imm:      map fd
+#define BPF_PSEUDO_MAP_IDX	5
+
+/* insn[0].src_reg:  BPF_PSEUDO_MAP_[IDX_]VALUE
+ * insn[0].imm:      map fd or fd_idx
  * insn[1].imm:      offset into value
  * insn[0].off:      0
  * insn[1].off:      0
  * ldimm64 rewrite:  address of map[0]+offset
  * verifier type:    PTR_TO_MAP_VALUE
  */
-#define BPF_PSEUDO_MAP_VALUE	2
+#define BPF_PSEUDO_MAP_VALUE		2
+#define BPF_PSEUDO_MAP_IDX_VALUE	6
+
 /* insn[0].src_reg:  BPF_PSEUDO_BTF_ID
  * insn[0].imm:      kernel btd id of VAR
  * insn[1].imm:      0
@@ -1315,6 +1319,8 @@ union bpf_attr {
 			/* or valid module BTF object fd or 0 to attach to vmlinux */
 			__u32		attach_btf_obj_fd;
 		};
+		__u32		:32;		/* pad */
+		__aligned_u64	fd_array;	/* array of FDs */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 415865c49dd4..da7dc2406470 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2089,7 +2089,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
+#define	BPF_PROG_LOAD_LAST_FIELD fd_array
 
 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e63c7d60e00d..9189eecb26dd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8915,12 +8915,14 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	mark_reg_known_zero(env, regs, insn->dst_reg);
 	dst_reg->map_ptr = map;
 
-	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
+	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
 		dst_reg->type = PTR_TO_MAP_VALUE;
 		dst_reg->off = aux->map_off;
 		if (map_value_has_spin_lock(map))
 			dst_reg->id = ++env->id_gen;
-	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
+		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
 		dst_reg->type = CONST_PTR_TO_MAP;
 	} else {
 		verbose(env, "bpf verifier is misconfigured\n");
@@ -11173,6 +11175,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 			struct bpf_map *map;
 			struct fd f;
 			u64 addr;
+			u32 fd;
 
 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@ -11202,16 +11205,38 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 			/* In final convert_pseudo_ld_imm64() step, this is
 			 * converted into regular 64-bit imm load insn.
 			 */
-			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
-			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
-			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
-			     insn[1].imm != 0)) {
-				verbose(env,
-					"unrecognized bpf_ld_imm64 insn\n");
+			switch (insn[0].src_reg) {
+			case BPF_PSEUDO_MAP_VALUE:
+			case BPF_PSEUDO_MAP_IDX_VALUE:
+				break;
+			case BPF_PSEUDO_MAP_FD:
+			case BPF_PSEUDO_MAP_IDX:
+				if (insn[1].imm == 0)
+					break;
+				fallthrough;
+			default:
+				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
 				return -EINVAL;
 			}
 
-			f = fdget(insn[0].imm);
+			switch (insn[0].src_reg) {
+			case BPF_PSEUDO_MAP_IDX_VALUE:
+			case BPF_PSEUDO_MAP_IDX:
+				if (bpfptr_is_null(env->fd_array)) {
+					verbose(env, "fd_idx without fd_array is invalid\n");
+					return -EPROTO;
+				}
+				if (copy_from_bpfptr_offset(&fd, env->fd_array,
+							    insn[0].imm * sizeof(fd),
+							    sizeof(fd)))
+					return -EFAULT;
+				break;
+			default:
+				fd = insn[0].imm;
+				break;
+			}
+
+			f = fdget(fd);
 			map = __bpf_map_get(f);
 			if (IS_ERR(map)) {
 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
@@ -11226,7 +11251,8 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 			}
 
 			aux = &env->insn_aux_data[i];
-			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
+			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
 				addr = (unsigned long)map;
 			} else {
 				u32 off = insn[1].imm;
@@ -13308,6 +13334,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
 		env->insn_aux_data[i].orig_idx = i;
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
+	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
 	is_priv = bpf_capable();
 
 	bpf_get_btf_vmlinux();
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c92648f38144..de58a714ed36 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1098,8 +1098,8 @@ enum bpf_link_type {
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * the following extensions:
  *
- * insn[0].src_reg:  BPF_PSEUDO_MAP_FD
- * insn[0].imm:      map fd
+ * insn[0].src_reg:  BPF_PSEUDO_MAP_[FD|IDX]
+ * insn[0].imm:      map fd or fd_idx
  * insn[1].imm:      0
  * insn[0].off:      0
  * insn[1].off:      0
@@ -1107,15 +1107,19 @@ enum bpf_link_type {
  * verifier type:    CONST_PTR_TO_MAP
  */
 #define BPF_PSEUDO_MAP_FD	1
-/* insn[0].src_reg:  BPF_PSEUDO_MAP_VALUE
- * insn[0].imm:      map fd
+#define BPF_PSEUDO_MAP_IDX	5
+
+/* insn[0].src_reg:  BPF_PSEUDO_MAP_[IDX_]VALUE
+ * insn[0].imm:      map fd or fd_idx
  * insn[1].imm:      offset into value
  * insn[0].off:      0
  * insn[1].off:      0
  * ldimm64 rewrite:  address of map[0]+offset
  * verifier type:    PTR_TO_MAP_VALUE
  */
-#define BPF_PSEUDO_MAP_VALUE	2
+#define BPF_PSEUDO_MAP_VALUE		2
+#define BPF_PSEUDO_MAP_IDX_VALUE	6
+
 /* insn[0].src_reg:  BPF_PSEUDO_BTF_ID
  * insn[0].imm:      kernel btd id of VAR
  * insn[1].imm:      0
@@ -1315,6 +1319,8 @@ union bpf_attr {
 			/* or valid module BTF object fd or 0 to attach to vmlinux */
 			__u32		attach_btf_obj_fd;
 		};
+		__u32		:32;		/* pad */
+		__aligned_u64	fd_array;	/* array of FDs */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */

From 3d78417b60fba249cc555468cb72d96f5cde2964 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:11 -0700
Subject: [PATCH 0288/3292] bpf: Add bpf_btf_find_by_name_kind() helper.

Add new helper:
long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags)
Description
	Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
Return
	Returns btf_id and btf_obj_fd in lower and upper 32 bits.

It will be used by loader program to find btf_id to attach the program to
and to find btf_ids of ksyms.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-10-alexei.starovoitov@gmail.com
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       |  7 ++++
 kernel/bpf/btf.c               | 62 ++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  2 ++
 tools/include/uapi/linux/bpf.h |  7 ++++
 5 files changed, 79 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7fd53380c981..9dc44ba97584 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1974,6 +1974,7 @@ extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
 extern const struct bpf_func_proto bpf_task_storage_get_proto;
 extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
+extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index de58a714ed36..3cc07351c1cf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4748,6 +4748,12 @@ union bpf_attr {
  * 		Execute bpf syscall with given arguments.
  * 	Return
  * 		A syscall result.
+ *
+ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags)
+ * 	Description
+ * 		Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
+ * 	Return
+ * 		Returns btf_id and btf_obj_fd in lower and upper 32 bits.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4917,6 +4923,7 @@ union bpf_attr {
 	FN(for_each_map_elem),		\
 	FN(snprintf),			\
 	FN(sys_bpf),			\
+	FN(btf_find_by_name_kind),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index fbf6c06a9d62..85716327c375 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6085,3 +6085,65 @@ struct module *btf_try_get_module(const struct btf *btf)
 
 	return res;
 }
+
+BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
+{
+	struct btf *btf;
+	long ret;
+
+	if (flags)
+		return -EINVAL;
+
+	if (name_sz <= 1 || name[name_sz - 1])
+		return -EINVAL;
+
+	btf = bpf_get_btf_vmlinux();
+	if (IS_ERR(btf))
+		return PTR_ERR(btf);
+
+	ret = btf_find_by_name_kind(btf, name, kind);
+	/* ret is never zero, since btf_find_by_name_kind returns
+	 * positive btf_id or negative error.
+	 */
+	if (ret < 0) {
+		struct btf *mod_btf;
+		int id;
+
+		/* If name is not found in vmlinux's BTF then search in module's BTFs */
+		spin_lock_bh(&btf_idr_lock);
+		idr_for_each_entry(&btf_idr, mod_btf, id) {
+			if (!btf_is_module(mod_btf))
+				continue;
+			/* linear search could be slow hence unlock/lock
+			 * the IDR to avoiding holding it for too long
+			 */
+			btf_get(mod_btf);
+			spin_unlock_bh(&btf_idr_lock);
+			ret = btf_find_by_name_kind(mod_btf, name, kind);
+			if (ret > 0) {
+				int btf_obj_fd;
+
+				btf_obj_fd = __btf_new_fd(mod_btf);
+				if (btf_obj_fd < 0) {
+					btf_put(mod_btf);
+					return btf_obj_fd;
+				}
+				return ret | (((u64)btf_obj_fd) << 32);
+			}
+			spin_lock_bh(&btf_idr_lock);
+			btf_put(mod_btf);
+		}
+		spin_unlock_bh(&btf_idr_lock);
+	}
+	return ret;
+}
+
+const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
+	.func		= bpf_btf_find_by_name_kind,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index da7dc2406470..f93ff2ebf96d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4584,6 +4584,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	switch (func_id) {
 	case BPF_FUNC_sys_bpf:
 		return &bpf_sys_bpf_proto;
+	case BPF_FUNC_btf_find_by_name_kind:
+		return &bpf_btf_find_by_name_kind_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index de58a714ed36..3cc07351c1cf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4748,6 +4748,12 @@ union bpf_attr {
  * 		Execute bpf syscall with given arguments.
  * 	Return
  * 		A syscall result.
+ *
+ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags)
+ * 	Description
+ * 		Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
+ * 	Return
+ * 		Returns btf_id and btf_obj_fd in lower and upper 32 bits.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4917,6 +4923,7 @@ union bpf_attr {
 	FN(for_each_map_elem),		\
 	FN(snprintf),			\
 	FN(sys_bpf),			\
+	FN(btf_find_by_name_kind),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

From 3abea089246f76c1517b054ddb5946f3f1dbd2c0 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:12 -0700
Subject: [PATCH 0289/3292] bpf: Add bpf_sys_close() helper.

Add bpf_sys_close() helper to be used by the syscall/loader program to close
intermediate FDs and other cleanup.
Note this helper must never be allowed inside fdget/fdput bracketing.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-11-alexei.starovoitov@gmail.com
---
 include/uapi/linux/bpf.h       |  7 +++++++
 kernel/bpf/syscall.c           | 19 +++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  7 +++++++
 3 files changed, 33 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3cc07351c1cf..4cd9a0181f27 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4754,6 +4754,12 @@ union bpf_attr {
  * 		Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
  * 	Return
  * 		Returns btf_id and btf_obj_fd in lower and upper 32 bits.
+ *
+ * long bpf_sys_close(u32 fd)
+ * 	Description
+ * 		Execute close syscall for given FD.
+ * 	Return
+ * 		A syscall result.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4924,6 +4930,7 @@ union bpf_attr {
 	FN(snprintf),			\
 	FN(sys_bpf),			\
 	FN(btf_find_by_name_kind),	\
+	FN(sys_close),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f93ff2ebf96d..0f1ce2171f1e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4578,6 +4578,23 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	return bpf_base_func_proto(func_id);
 }
 
+BPF_CALL_1(bpf_sys_close, u32, fd)
+{
+	/* When bpf program calls this helper there should not be
+	 * an fdget() without matching completed fdput().
+	 * This helper is allowed in the following callchain only:
+	 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close
+	 */
+	return close_fd(fd);
+}
+
+const struct bpf_func_proto bpf_sys_close_proto = {
+	.func		= bpf_sys_close,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4586,6 +4603,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sys_bpf_proto;
 	case BPF_FUNC_btf_find_by_name_kind:
 		return &bpf_btf_find_by_name_kind_proto;
+	case BPF_FUNC_sys_close:
+		return &bpf_sys_close_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3cc07351c1cf..4cd9a0181f27 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4754,6 +4754,12 @@ union bpf_attr {
  * 		Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
  * 	Return
  * 		Returns btf_id and btf_obj_fd in lower and upper 32 bits.
+ *
+ * long bpf_sys_close(u32 fd)
+ * 	Description
+ * 		Execute close syscall for given FD.
+ * 	Return
+ * 		A syscall result.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4924,6 +4930,7 @@ union bpf_attr {
 	FN(snprintf),			\
 	FN(sys_bpf),			\
 	FN(btf_find_by_name_kind),	\
+	FN(sys_close),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

From b12688267280b223256c8cf912486577d3adce25 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:13 -0700
Subject: [PATCH 0290/3292] libbpf: Change the order of data and text
 relocations.

In order to be able to generate loader program in the later
patches change the order of data and text relocations.
Also improve the test to include data relos.

If the kernel supports "FD array" the map_fd relocations can be processed
before text relos since generated loader program won't need to manually
patch ld_imm64 insns with map_fd.
But ksym and kfunc relocations can only be processed after all calls
are relocated, since loader program will consist of a sequence
of calls to bpf_btf_find_by_name_kind() followed by patching of btf_id
and btf_obj_fd into corresponding ld_imm64 insns. The locations of those
ld_imm64 insns are specified in relocations.
Hence process all data relocations (maps, ksym, kfunc) together after call relos.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-12-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/libbpf.c                        | 86 ++++++++++++++++---
 .../selftests/bpf/progs/test_subprogs.c       | 13 +++
 2 files changed, 85 insertions(+), 14 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 05315048dd10..669400a0b676 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6419,11 +6419,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 			insn[0].imm = ext->ksym.kernel_btf_id;
 			break;
 		case RELO_SUBPROG_ADDR:
-			insn[0].src_reg = BPF_PSEUDO_FUNC;
-			/* will be handled as a follow up pass */
+			if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
+				pr_warn("prog '%s': relo #%d: bad insn\n",
+					prog->name, i);
+				return -EINVAL;
+			}
+			/* handled already */
 			break;
 		case RELO_CALL:
-			/* will be handled as a follow up pass */
+			/* handled already */
 			break;
 		default:
 			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
@@ -6592,6 +6596,30 @@ static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, si
 		       sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
 }
 
+static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+	int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
+	struct reloc_desc *relos;
+	int i;
+
+	if (main_prog == subprog)
+		return 0;
+	relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
+	if (!relos)
+		return -ENOMEM;
+	memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
+	       sizeof(*relos) * subprog->nr_reloc);
+
+	for (i = main_prog->nr_reloc; i < new_cnt; i++)
+		relos[i].insn_idx += subprog->sub_insn_off;
+	/* After insn_idx adjustment the 'relos' array is still sorted
+	 * by insn_idx and doesn't break bsearch.
+	 */
+	main_prog->reloc_desc = relos;
+	main_prog->nr_reloc = new_cnt;
+	return 0;
+}
+
 static int
 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 		       struct bpf_program *prog)
@@ -6612,6 +6640,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 			continue;
 
 		relo = find_prog_insn_relo(prog, insn_idx);
+		if (relo && relo->type == RELO_EXTERN_FUNC)
+			/* kfunc relocations will be handled later
+			 * in bpf_object__relocate_data()
+			 */
+			continue;
 		if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
 			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
 				prog->name, insn_idx, relo->type);
@@ -6686,6 +6719,10 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 			pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
 				 main_prog->name, subprog->insns_cnt, subprog->name);
 
+			/* The subprog insns are now appended. Append its relos too. */
+			err = append_subprog_relos(main_prog, subprog);
+			if (err)
+				return err;
 			err = bpf_object__reloc_code(obj, main_prog, subprog);
 			if (err)
 				return err;
@@ -6819,7 +6856,7 @@ static int
 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 {
 	struct bpf_program *prog;
-	size_t i;
+	size_t i, j;
 	int err;
 
 	if (obj->btf_ext) {
@@ -6830,23 +6867,32 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 			return err;
 		}
 	}
-	/* relocate data references first for all programs and sub-programs,
-	 * as they don't change relative to code locations, so subsequent
-	 * subprogram processing won't need to re-calculate any of them
+
+	/* Before relocating calls pre-process relocations and mark
+	 * few ld_imm64 instructions that points to subprogs.
+	 * Otherwise bpf_object__reloc_code() later would have to consider
+	 * all ld_imm64 insns as relocation candidates. That would
+	 * reduce relocation speed, since amount of find_prog_insn_relo()
+	 * would increase and most of them will fail to find a relo.
 	 */
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
-		err = bpf_object__relocate_data(obj, prog);
-		if (err) {
-			pr_warn("prog '%s': failed to relocate data references: %d\n",
-				prog->name, err);
-			return err;
+		for (j = 0; j < prog->nr_reloc; j++) {
+			struct reloc_desc *relo = &prog->reloc_desc[j];
+			struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+
+			/* mark the insn, so it's recognized by insn_is_pseudo_func() */
+			if (relo->type == RELO_SUBPROG_ADDR)
+				insn[0].src_reg = BPF_PSEUDO_FUNC;
 		}
 	}
-	/* now relocate subprogram calls and append used subprograms to main
+
+	/* relocate subprogram calls and append used subprograms to main
 	 * programs; each copy of subprogram code needs to be relocated
 	 * differently for each main program, because its code location might
-	 * have changed
+	 * have changed.
+	 * Append subprog relos to main programs to allow data relos to be
+	 * processed after text is completely relocated.
 	 */
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
@@ -6863,6 +6909,18 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 			return err;
 		}
 	}
+	/* Process data relos for main programs */
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		if (prog_is_subprog(obj, prog))
+			continue;
+		err = bpf_object__relocate_data(obj, prog);
+		if (err) {
+			pr_warn("prog '%s': failed to relocate data references: %d\n",
+				prog->name, err);
+			return err;
+		}
+	}
 	/* free up relocation descriptors */
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index d3c5673c0218..b7c37ca09544 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -4,8 +4,18 @@
 
 const char LICENSE[] SEC("license") = "GPL";
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array SEC(".maps");
+
 __noinline int sub1(int x)
 {
+	int key = 0;
+
+	bpf_map_lookup_elem(&array, &key);
 	return x + 1;
 }
 
@@ -23,6 +33,9 @@ static __noinline int sub3(int z)
 
 static __noinline int sub4(int w)
 {
+	int key = 0;
+
+	bpf_map_lookup_elem(&array, &key);
 	return w + sub3(5) + sub1(6);
 }
 

From 9ca1f56ababea5f5c714074845ee1c9e4dd75956 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:14 -0700
Subject: [PATCH 0291/3292] libbpf: Add bpf_object pointer to
 kernel_supports().

Add a pointer to 'struct bpf_object' to kernel_supports() helper.
It will be used in the next patch.
No functional changes.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-13-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/libbpf.c | 44 +++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 669400a0b676..29e26fabe2ee 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -178,7 +178,7 @@ enum kern_feature_id {
 	__FEAT_CNT,
 };
 
-static bool kernel_supports(enum kern_feature_id feat_id);
+static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
 
 enum reloc_type {
 	RELO_LD64,
@@ -2463,20 +2463,20 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
 
 static bool btf_needs_sanitization(struct bpf_object *obj)
 {
-	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
-	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
-	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
-	bool has_func = kernel_supports(FEAT_BTF_FUNC);
+	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 
 	return !has_func || !has_datasec || !has_func_global || !has_float;
 }
 
 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 {
-	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
-	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
-	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
-	bool has_func = kernel_supports(FEAT_BTF_FUNC);
+	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
+	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
+	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
+	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 	struct btf_type *t;
 	int i, j, vlen;
 
@@ -2682,7 +2682,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 	if (!obj->btf)
 		return 0;
 
-	if (!kernel_supports(FEAT_BTF)) {
+	if (!kernel_supports(obj, FEAT_BTF)) {
 		if (kernel_needs_btf(obj)) {
 			err = -EOPNOTSUPP;
 			goto report;
@@ -4310,7 +4310,7 @@ static struct kern_feature_desc {
 	},
 };
 
-static bool kernel_supports(enum kern_feature_id feat_id)
+static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 {
 	struct kern_feature_desc *feat = &feature_probes[feat_id];
 	int ret;
@@ -4429,7 +4429,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 
 	memset(&create_attr, 0, sizeof(create_attr));
 
-	if (kernel_supports(FEAT_PROG_NAME))
+	if (kernel_supports(obj, FEAT_PROG_NAME))
 		create_attr.name = map->name;
 	create_attr.map_ifindex = map->map_ifindex;
 	create_attr.map_type = def->type;
@@ -4994,7 +4994,7 @@ static int load_module_btfs(struct bpf_object *obj)
 	obj->btf_modules_loaded = true;
 
 	/* kernel too old to support module BTFs */
-	if (!kernel_supports(FEAT_MODULE_BTF))
+	if (!kernel_supports(obj, FEAT_MODULE_BTF))
 		return 0;
 
 	while (true) {
@@ -6518,7 +6518,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj,
 	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
 	 * supprot func/line info
 	 */
-	if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
+	if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
 		return 0;
 
 	/* only attempt func info relocation if main program's func_info
@@ -7126,12 +7126,12 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program
 		switch (func_id) {
 		case BPF_FUNC_probe_read_kernel:
 		case BPF_FUNC_probe_read_user:
-			if (!kernel_supports(FEAT_PROBE_READ_KERN))
+			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 				insn->imm = BPF_FUNC_probe_read;
 			break;
 		case BPF_FUNC_probe_read_kernel_str:
 		case BPF_FUNC_probe_read_user_str:
-			if (!kernel_supports(FEAT_PROBE_READ_KERN))
+			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 				insn->imm = BPF_FUNC_probe_read_str;
 			break;
 		default:
@@ -7166,12 +7166,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 
 	load_attr.prog_type = prog->type;
 	/* old kernels might not support specifying expected_attach_type */
-	if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
+	if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
 	    prog->sec_def->is_exp_attach_type_optional)
 		load_attr.expected_attach_type = 0;
 	else
 		load_attr.expected_attach_type = prog->expected_attach_type;
-	if (kernel_supports(FEAT_PROG_NAME))
+	if (kernel_supports(prog->obj, FEAT_PROG_NAME))
 		load_attr.name = prog->name;
 	load_attr.insns = insns;
 	load_attr.insn_cnt = insns_cnt;
@@ -7187,7 +7187,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 
 	/* specify func_info/line_info only if kernel supports them */
 	btf_fd = bpf_object__btf_fd(prog->obj);
-	if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
+	if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) {
 		load_attr.prog_btf_fd = btf_fd;
 		load_attr.func_info = prog->func_info;
 		load_attr.func_info_rec_size = prog->func_info_rec_size;
@@ -7217,7 +7217,7 @@ retry_load:
 			pr_debug("verifier log:\n%s", log_buf);
 
 		if (prog->obj->rodata_map_idx >= 0 &&
-		    kernel_supports(FEAT_PROG_BIND_MAP)) {
+		    kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) {
 			struct bpf_map *rodata_map =
 				&prog->obj->maps[prog->obj->rodata_map_idx];
 
@@ -7575,11 +7575,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
 	bpf_object__for_each_map(m, obj) {
 		if (!bpf_map__is_internal(m))
 			continue;
-		if (!kernel_supports(FEAT_GLOBAL_DATA)) {
+		if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) {
 			pr_warn("kernel doesn't support global data\n");
 			return -ENOTSUP;
 		}
-		if (!kernel_supports(FEAT_ARRAY_MMAP))
+		if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
 			m->def.map_flags ^= BPF_F_MMAPABLE;
 	}
 

From e2fa0156a434c140998aa16ecad329e4bc19f263 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:15 -0700
Subject: [PATCH 0292/3292] libbpf: Preliminary support for fd_idx

Prep libbpf to use FD_IDX kernel feature when generating loader program.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-14-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/libbpf.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 29e26fabe2ee..f398ca83167e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -412,6 +412,8 @@ struct module_btf {
 	int fd;
 };
 
+struct bpf_gen;
+
 struct bpf_object {
 	char name[BPF_OBJ_NAME_LEN];
 	char license[64];
@@ -432,6 +434,8 @@ struct bpf_object {
 	bool loaded;
 	bool has_subcalls;
 
+	struct bpf_gen *gen_loader;
+
 	/*
 	 * Information when doing elf related work. Only valid if fd
 	 * is valid.
@@ -6388,19 +6392,34 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 
 		switch (relo->type) {
 		case RELO_LD64:
-			insn[0].src_reg = BPF_PSEUDO_MAP_FD;
-			insn[0].imm = obj->maps[relo->map_idx].fd;
+			if (obj->gen_loader) {
+				insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
+				insn[0].imm = relo->map_idx;
+			} else {
+				insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+				insn[0].imm = obj->maps[relo->map_idx].fd;
+			}
 			break;
 		case RELO_DATA:
-			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 			insn[1].imm = insn[0].imm + relo->sym_off;
-			insn[0].imm = obj->maps[relo->map_idx].fd;
+			if (obj->gen_loader) {
+				insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+				insn[0].imm = relo->map_idx;
+			} else {
+				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+				insn[0].imm = obj->maps[relo->map_idx].fd;
+			}
 			break;
 		case RELO_EXTERN_VAR:
 			ext = &obj->externs[relo->sym_off];
 			if (ext->type == EXT_KCFG) {
-				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
-				insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+				if (obj->gen_loader) {
+					insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
+					insn[0].imm = obj->kconfig_map_idx;
+				} else {
+					insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+					insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+				}
 				insn[1].imm = ext->kcfg.data_off;
 			} else /* EXT_KSYM */ {
 				if (ext->ksym.type_id) { /* typed ksyms */

From 67234743736a6ac31e3e74f6ec5e6d7bb3073676 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:16 -0700
Subject: [PATCH 0293/3292] libbpf: Generate loader program out of BPF ELF
 file.

The BPF program loading process performed by libbpf is quite complex
and consists of the following steps:
"open" phase:
- parse elf file and remember relocations, sections
- collect externs and ksyms including their btf_ids in prog's BTF
- patch BTF datasec (since llvm couldn't do it)
- init maps (old style map_def, BTF based, global data map, kconfig map)
- collect relocations against progs and maps
"load" phase:
- probe kernel features
- load vmlinux BTF
- resolve externs (kconfig and ksym)
- load program BTF
- init struct_ops
- create maps
- apply CO-RE relocations
- patch ld_imm64 insns with src_reg=PSEUDO_MAP, PSEUDO_MAP_VALUE, PSEUDO_BTF_ID
- reposition subprograms and adjust call insns
- sanitize and load progs

During this process libbpf does sys_bpf() calls to load BTF, create maps,
populate maps and finally load programs.
Instead of actually doing the syscalls generate a trace of what libbpf
would have done and represent it as the "loader program".
The "loader program" consists of single map with:
- union bpf_attr(s)
- BTF bytes
- map value bytes
- insns bytes
and single bpf program that passes bpf_attr(s) and data into bpf_sys_bpf() helper.
Executing such "loader program" via bpf_prog_test_run() command will
replay the sequence of syscalls that libbpf would have done which will result
the same maps created and programs loaded as specified in the elf file.
The "loader program" removes libelf and majority of libbpf dependency from
program loading process.

kconfig, typeless ksym, struct_ops and CO-RE are not supported yet.

The order of relocate_data and relocate_calls had to change, so that
bpf_gen__prog_load() can see all relocations for a given program with
correct insn_idx-es.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-15-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/Build              |   2 +-
 tools/lib/bpf/bpf_gen_internal.h |  40 ++
 tools/lib/bpf/gen_loader.c       | 689 +++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.c           | 226 ++++++++--
 tools/lib/bpf/libbpf.h           |  12 +
 tools/lib/bpf/libbpf.map         |   1 +
 tools/lib/bpf/libbpf_internal.h  |   2 +
 tools/lib/bpf/skel_internal.h    | 123 ++++++
 8 files changed, 1060 insertions(+), 35 deletions(-)
 create mode 100644 tools/lib/bpf/bpf_gen_internal.h
 create mode 100644 tools/lib/bpf/gen_loader.c
 create mode 100644 tools/lib/bpf/skel_internal.h

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 9b057cc7650a..430f6874fa41 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
-	    btf_dump.o ringbuf.o strset.o linker.o
+	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
new file mode 100644
index 000000000000..f42a55efd559
--- /dev/null
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __BPF_GEN_INTERNAL_H
+#define __BPF_GEN_INTERNAL_H
+
+struct ksym_relo_desc {
+	const char *name;
+	int kind;
+	int insn_idx;
+};
+
+struct bpf_gen {
+	struct gen_loader_opts *opts;
+	void *data_start;
+	void *data_cur;
+	void *insn_start;
+	void *insn_cur;
+	__u32 nr_progs;
+	__u32 nr_maps;
+	int log_level;
+	int error;
+	struct ksym_relo_desc *relos;
+	int relo_cnt;
+	char attach_target[128];
+	int attach_kind;
+};
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level);
+int bpf_gen__finish(struct bpf_gen *gen);
+void bpf_gen__free(struct bpf_gen *gen);
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
+void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx);
+struct bpf_prog_load_params;
+void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx);
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx);
+
+#endif
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
new file mode 100644
index 000000000000..0fc54b1ca311
--- /dev/null
+++ b/tools/lib/bpf/gen_loader.c
@@ -0,0 +1,689 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/filter.h>
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+#include "bpf_gen_internal.h"
+#include "skel_internal.h"
+
+#define MAX_USED_MAPS 64
+#define MAX_USED_PROGS 32
+
+/* The following structure describes the stack layout of the loader program.
+ * In addition R6 contains the pointer to context.
+ * R7 contains the result of the last sys_bpf command (typically error or FD).
+ * R9 contains the result of the last sys_close command.
+ *
+ * Naming convention:
+ * ctx - bpf program context
+ * stack - bpf program stack
+ * blob - bpf_attr-s, strings, insns, map data.
+ *        All the bytes that loader prog will use for read/write.
+ */
+struct loader_stack {
+	__u32 btf_fd;
+	__u32 map_fd[MAX_USED_MAPS];
+	__u32 prog_fd[MAX_USED_PROGS];
+	__u32 inner_map_fd;
+};
+
+#define stack_off(field) \
+	(__s16)(-sizeof(struct loader_stack) + offsetof(struct loader_stack, field))
+
+#define attr_field(attr, field) (attr + offsetof(union bpf_attr, field))
+
+static int realloc_insn_buf(struct bpf_gen *gen, __u32 size)
+{
+	size_t off = gen->insn_cur - gen->insn_start;
+	void *insn_start;
+
+	if (gen->error)
+		return gen->error;
+	if (size > INT32_MAX || off + size > INT32_MAX) {
+		gen->error = -ERANGE;
+		return -ERANGE;
+	}
+	insn_start = realloc(gen->insn_start, off + size);
+	if (!insn_start) {
+		gen->error = -ENOMEM;
+		free(gen->insn_start);
+		gen->insn_start = NULL;
+		return -ENOMEM;
+	}
+	gen->insn_start = insn_start;
+	gen->insn_cur = insn_start + off;
+	return 0;
+}
+
+static int realloc_data_buf(struct bpf_gen *gen, __u32 size)
+{
+	size_t off = gen->data_cur - gen->data_start;
+	void *data_start;
+
+	if (gen->error)
+		return gen->error;
+	if (size > INT32_MAX || off + size > INT32_MAX) {
+		gen->error = -ERANGE;
+		return -ERANGE;
+	}
+	data_start = realloc(gen->data_start, off + size);
+	if (!data_start) {
+		gen->error = -ENOMEM;
+		free(gen->data_start);
+		gen->data_start = NULL;
+		return -ENOMEM;
+	}
+	gen->data_start = data_start;
+	gen->data_cur = data_start + off;
+	return 0;
+}
+
+static void emit(struct bpf_gen *gen, struct bpf_insn insn)
+{
+	if (realloc_insn_buf(gen, sizeof(insn)))
+		return;
+	memcpy(gen->insn_cur, &insn, sizeof(insn));
+	gen->insn_cur += sizeof(insn);
+}
+
+static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn insn2)
+{
+	emit(gen, insn1);
+	emit(gen, insn2);
+}
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level)
+{
+	gen->log_level = log_level;
+	emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
+}
+
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
+{
+	void *prev;
+
+	if (realloc_data_buf(gen, size))
+		return 0;
+	prev = gen->data_cur;
+	memcpy(gen->data_cur, data, size);
+	gen->data_cur += size;
+	return prev - gen->data_start;
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+	switch (sz) {
+	case 8: return BPF_DW;
+	case 4: return BPF_W;
+	case 2: return BPF_H;
+	case 1: return BPF_B;
+	default: return -1;
+	}
+}
+
+/* *(u64 *)(blob + off) = (u64)(void *)(blob + data) */
+static void emit_rel_store(struct bpf_gen *gen, int off, int data)
+{
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, data));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, off));
+	emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+}
+
+/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */
+static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off)
+{
+	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10));
+	emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, off));
+	emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off,
+				   bool check_non_zero)
+{
+	emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off));
+	if (check_non_zero)
+		/* If value in ctx is zero don't update the blob.
+		 * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c
+		 */
+		emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, off));
+	emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off)
+{
+	emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, off));
+	emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
+}
+
+static void move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off)
+{
+	emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
+	emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
+}
+
+static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size)
+{
+	emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, attr));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf));
+	/* remember the result in R7 */
+	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+}
+
+static void emit_check_err(struct bpf_gen *gen)
+{
+	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 2));
+	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	/* TODO: close intermediate FDs in case of error */
+	emit(gen, BPF_EXIT_INSN());
+}
+
+/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */
+static void emit_debug(struct bpf_gen *gen, int reg1, int reg2,
+		       const char *fmt, va_list args)
+{
+	char buf[1024];
+	int addr, len, ret;
+
+	if (!gen->log_level)
+		return;
+	ret = vsnprintf(buf, sizeof(buf), fmt, args);
+	if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0)
+		/* The special case to accommodate common debug_ret():
+		 * to avoid specifying BPF_REG_7 and adding " r=%%d" to
+		 * prints explicitly.
+		 */
+		strcat(buf, " r=%d");
+	len = strlen(buf) + 1;
+	addr = add_data(gen, buf, len);
+
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, addr));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+	if (reg1 >= 0)
+		emit(gen, BPF_MOV64_REG(BPF_REG_3, reg1));
+	if (reg2 >= 0)
+		emit(gen, BPF_MOV64_REG(BPF_REG_4, reg2));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_trace_printk));
+}
+
+static void debug_regs(struct bpf_gen *gen, int reg1, int reg2, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	emit_debug(gen, reg1, reg2, fmt, args);
+	va_end(args);
+}
+
+static void debug_ret(struct bpf_gen *gen, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	emit_debug(gen, BPF_REG_7, -1, fmt, args);
+	va_end(args);
+}
+
+static void __emit_sys_close(struct bpf_gen *gen)
+{
+	emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0,
+			      /* 2 is the number of the following insns
+			       * * 6 is additional insns in debug_regs
+			       */
+			      2 + (gen->log_level ? 6 : 0)));
+	emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_1));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+	debug_regs(gen, BPF_REG_9, BPF_REG_0, "close(%%d) = %%d");
+}
+
+static void emit_sys_close_stack(struct bpf_gen *gen, int stack_off)
+{
+	emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, stack_off));
+	__emit_sys_close(gen);
+}
+
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
+{
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, blob_off));
+	emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0));
+	__emit_sys_close(gen);
+}
+
+int bpf_gen__finish(struct bpf_gen *gen)
+{
+	int i;
+
+	emit_sys_close_stack(gen, stack_off(btf_fd));
+	for (i = 0; i < gen->nr_progs; i++)
+		move_stack2ctx(gen,
+			       sizeof(struct bpf_loader_ctx) +
+			       sizeof(struct bpf_map_desc) * gen->nr_maps +
+			       sizeof(struct bpf_prog_desc) * i +
+			       offsetof(struct bpf_prog_desc, prog_fd), 4,
+			       stack_off(prog_fd[i]));
+	for (i = 0; i < gen->nr_maps; i++)
+		move_stack2ctx(gen,
+			       sizeof(struct bpf_loader_ctx) +
+			       sizeof(struct bpf_map_desc) * i +
+			       offsetof(struct bpf_map_desc, map_fd), 4,
+			       stack_off(map_fd[i]));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
+	emit(gen, BPF_EXIT_INSN());
+	pr_debug("gen: finish %d\n", gen->error);
+	if (!gen->error) {
+		struct gen_loader_opts *opts = gen->opts;
+
+		opts->insns = gen->insn_start;
+		opts->insns_sz = gen->insn_cur - gen->insn_start;
+		opts->data = gen->data_start;
+		opts->data_sz = gen->data_cur - gen->data_start;
+	}
+	return gen->error;
+}
+
+void bpf_gen__free(struct bpf_gen *gen)
+{
+	if (!gen)
+		return;
+	free(gen->data_start);
+	free(gen->insn_start);
+	free(gen);
+}
+
+void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
+		       __u32 btf_raw_size)
+{
+	int attr_size = offsetofend(union bpf_attr, btf_log_level);
+	int btf_data, btf_load_attr;
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_size);
+	pr_debug("gen: load_btf: size %d\n", btf_raw_size);
+	btf_data = add_data(gen, btf_raw_data, btf_raw_size);
+
+	attr.btf_size = btf_raw_size;
+	btf_load_attr = add_data(gen, &attr, attr_size);
+
+	/* populate union bpf_attr with user provided log details */
+	move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4,
+		      offsetof(struct bpf_loader_ctx, log_level), false);
+	move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_size), 4,
+		      offsetof(struct bpf_loader_ctx, log_size), false);
+	move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_buf), 8,
+		      offsetof(struct bpf_loader_ctx, log_buf), false);
+	/* populate union bpf_attr with a pointer to the BTF data */
+	emit_rel_store(gen, attr_field(btf_load_attr, btf), btf_data);
+	/* emit BTF_LOAD command */
+	emit_sys_bpf(gen, BPF_BTF_LOAD, btf_load_attr, attr_size);
+	debug_ret(gen, "btf_load size %d", btf_raw_size);
+	emit_check_err(gen);
+	/* remember btf_fd in the stack, if successful */
+	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, stack_off(btf_fd)));
+}
+
+void bpf_gen__map_create(struct bpf_gen *gen,
+			 struct bpf_create_map_attr *map_attr, int map_idx)
+{
+	int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id);
+	bool close_inner_map_fd = false;
+	int map_create_attr;
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_size);
+	attr.map_type = map_attr->map_type;
+	attr.key_size = map_attr->key_size;
+	attr.value_size = map_attr->value_size;
+	attr.map_flags = map_attr->map_flags;
+	memcpy(attr.map_name, map_attr->name,
+	       min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.numa_node = map_attr->numa_node;
+	attr.map_ifindex = map_attr->map_ifindex;
+	attr.max_entries = map_attr->max_entries;
+	switch (attr.map_type) {
+	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+	case BPF_MAP_TYPE_CGROUP_ARRAY:
+	case BPF_MAP_TYPE_STACK_TRACE:
+	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+	case BPF_MAP_TYPE_HASH_OF_MAPS:
+	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+	case BPF_MAP_TYPE_CPUMAP:
+	case BPF_MAP_TYPE_XSKMAP:
+	case BPF_MAP_TYPE_SOCKMAP:
+	case BPF_MAP_TYPE_SOCKHASH:
+	case BPF_MAP_TYPE_QUEUE:
+	case BPF_MAP_TYPE_STACK:
+	case BPF_MAP_TYPE_RINGBUF:
+		break;
+	default:
+		attr.btf_key_type_id = map_attr->btf_key_type_id;
+		attr.btf_value_type_id = map_attr->btf_value_type_id;
+	}
+
+	pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n",
+		 attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id);
+
+	map_create_attr = add_data(gen, &attr, attr_size);
+	if (attr.btf_value_type_id)
+		/* populate union bpf_attr with btf_fd saved in the stack earlier */
+		move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4,
+				stack_off(btf_fd));
+	switch (attr.map_type) {
+	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+	case BPF_MAP_TYPE_HASH_OF_MAPS:
+		move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4,
+				stack_off(inner_map_fd));
+		close_inner_map_fd = true;
+		break;
+	default:
+		break;
+	}
+	/* conditionally update max_entries */
+	if (map_idx >= 0)
+		move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4,
+			      sizeof(struct bpf_loader_ctx) +
+			      sizeof(struct bpf_map_desc) * map_idx +
+			      offsetof(struct bpf_map_desc, max_entries),
+			      true /* check that max_entries != 0 */);
+	/* emit MAP_CREATE command */
+	emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size);
+	debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d",
+		  attr.map_name, map_idx, map_attr->map_type, attr.value_size,
+		  attr.btf_value_type_id);
+	emit_check_err(gen);
+	/* remember map_fd in the stack, if successful */
+	if (map_idx < 0) {
+		/* This bpf_gen__map_create() function is called with map_idx >= 0
+		 * for all maps that libbpf loading logic tracks.
+		 * It's called with -1 to create an inner map.
+		 */
+		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+				      stack_off(inner_map_fd)));
+	} else if (map_idx != gen->nr_maps) {
+		gen->error = -EDOM; /* internal bug */
+		return;
+	} else {
+		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+				      stack_off(map_fd[map_idx])));
+		gen->nr_maps++;
+	}
+	if (close_inner_map_fd)
+		emit_sys_close_stack(gen, stack_off(inner_map_fd));
+}
+
+void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
+				   enum bpf_attach_type type)
+{
+	const char *prefix;
+	int kind, ret;
+
+	btf_get_kernel_prefix_kind(type, &prefix, &kind);
+	gen->attach_kind = kind;
+	ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s",
+		       prefix, attach_name);
+	if (ret == sizeof(gen->attach_target))
+		gen->error = -ENOSPC;
+}
+
+static void emit_find_attach_target(struct bpf_gen *gen)
+{
+	int name, len = strlen(gen->attach_target) + 1;
+
+	pr_debug("gen: find_attach_tgt %s %d\n", gen->attach_target, gen->attach_kind);
+	name = add_data(gen, gen->attach_target, len);
+
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, name));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_3, gen->attach_kind));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+	debug_ret(gen, "find_by_name_kind(%s,%d)",
+		  gen->attach_target, gen->attach_kind);
+	emit_check_err(gen);
+	/* if successful, btf_id is in lower 32-bit of R7 and
+	 * btf_obj_fd is in upper 32-bit
+	 */
+}
+
+void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind,
+			    int insn_idx)
+{
+	struct ksym_relo_desc *relo;
+
+	relo = libbpf_reallocarray(gen->relos, gen->relo_cnt + 1, sizeof(*relo));
+	if (!relo) {
+		gen->error = -ENOMEM;
+		return;
+	}
+	gen->relos = relo;
+	relo += gen->relo_cnt;
+	relo->name = name;
+	relo->kind = kind;
+	relo->insn_idx = insn_idx;
+	gen->relo_cnt++;
+}
+
+static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)
+{
+	int name, insn, len = strlen(relo->name) + 1;
+
+	pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx);
+	name = add_data(gen, relo->name, len);
+
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, name));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_2, len));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind));
+	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+	debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind);
+	emit_check_err(gen);
+	/* store btf_id into insn[insn_idx].imm */
+	insn = insns + sizeof(struct bpf_insn) * relo->insn_idx +
+		offsetof(struct bpf_insn, imm);
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, insn));
+	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0));
+	if (relo->kind == BTF_KIND_VAR) {
+		/* store btf_obj_fd into insn[insn_idx + 1].imm */
+		emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+				      sizeof(struct bpf_insn)));
+	}
+}
+
+static void emit_relos(struct bpf_gen *gen, int insns)
+{
+	int i;
+
+	for (i = 0; i < gen->relo_cnt; i++)
+		emit_relo(gen, gen->relos + i, insns);
+}
+
+static void cleanup_relos(struct bpf_gen *gen, int insns)
+{
+	int i, insn;
+
+	for (i = 0; i < gen->relo_cnt; i++) {
+		if (gen->relos[i].kind != BTF_KIND_VAR)
+			continue;
+		/* close fd recorded in insn[insn_idx + 1].imm */
+		insn = insns +
+			sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) +
+			offsetof(struct bpf_insn, imm);
+		emit_sys_close_blob(gen, insn);
+	}
+	if (gen->relo_cnt) {
+		free(gen->relos);
+		gen->relo_cnt = 0;
+		gen->relos = NULL;
+	}
+}
+
+void bpf_gen__prog_load(struct bpf_gen *gen,
+			struct bpf_prog_load_params *load_attr, int prog_idx)
+{
+	int attr_size = offsetofend(union bpf_attr, fd_array);
+	int prog_load_attr, license, insns, func_info, line_info;
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_size);
+	pr_debug("gen: prog_load: type %d insns_cnt %zd\n",
+		 load_attr->prog_type, load_attr->insn_cnt);
+	/* add license string to blob of bytes */
+	license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1);
+	/* add insns to blob of bytes */
+	insns = add_data(gen, load_attr->insns,
+			 load_attr->insn_cnt * sizeof(struct bpf_insn));
+
+	attr.prog_type = load_attr->prog_type;
+	attr.expected_attach_type = load_attr->expected_attach_type;
+	attr.attach_btf_id = load_attr->attach_btf_id;
+	attr.prog_ifindex = load_attr->prog_ifindex;
+	attr.kern_version = 0;
+	attr.insn_cnt = (__u32)load_attr->insn_cnt;
+	attr.prog_flags = load_attr->prog_flags;
+
+	attr.func_info_rec_size = load_attr->func_info_rec_size;
+	attr.func_info_cnt = load_attr->func_info_cnt;
+	func_info = add_data(gen, load_attr->func_info,
+			     attr.func_info_cnt * attr.func_info_rec_size);
+
+	attr.line_info_rec_size = load_attr->line_info_rec_size;
+	attr.line_info_cnt = load_attr->line_info_cnt;
+	line_info = add_data(gen, load_attr->line_info,
+			     attr.line_info_cnt * attr.line_info_rec_size);
+
+	memcpy(attr.prog_name, load_attr->name,
+	       min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+	prog_load_attr = add_data(gen, &attr, attr_size);
+
+	/* populate union bpf_attr with a pointer to license */
+	emit_rel_store(gen, attr_field(prog_load_attr, license), license);
+
+	/* populate union bpf_attr with a pointer to instructions */
+	emit_rel_store(gen, attr_field(prog_load_attr, insns), insns);
+
+	/* populate union bpf_attr with a pointer to func_info */
+	emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info);
+
+	/* populate union bpf_attr with a pointer to line_info */
+	emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info);
+
+	/* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */
+	emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array),
+			  stack_off(map_fd[0]));
+
+	/* populate union bpf_attr with user provided log details */
+	move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4,
+		      offsetof(struct bpf_loader_ctx, log_level), false);
+	move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4,
+		      offsetof(struct bpf_loader_ctx, log_size), false);
+	move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8,
+		      offsetof(struct bpf_loader_ctx, log_buf), false);
+	/* populate union bpf_attr with btf_fd saved in the stack earlier */
+	move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4,
+			stack_off(btf_fd));
+	if (gen->attach_kind) {
+		emit_find_attach_target(gen);
+		/* populate union bpf_attr with btf_id and btf_obj_fd found by helper */
+		emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
+						 0, 0, 0, prog_load_attr));
+		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+				      offsetof(union bpf_attr, attach_btf_id)));
+		emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
+		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
+				      offsetof(union bpf_attr, attach_btf_obj_fd)));
+	}
+	emit_relos(gen, insns);
+	/* emit PROG_LOAD command */
+	emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);
+	debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);
+	/* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
+	cleanup_relos(gen, insns);
+	if (gen->attach_kind)
+		emit_sys_close_blob(gen,
+				    attr_field(prog_load_attr, attach_btf_obj_fd));
+	emit_check_err(gen);
+	/* remember prog_fd in the stack, if successful */
+	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
+			      stack_off(prog_fd[gen->nr_progs])));
+	gen->nr_progs++;
+}
+
+void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
+			      __u32 value_size)
+{
+	int attr_size = offsetofend(union bpf_attr, flags);
+	int map_update_attr, value, key;
+	union bpf_attr attr;
+	int zero = 0;
+
+	memset(&attr, 0, attr_size);
+	pr_debug("gen: map_update_elem: idx %d\n", map_idx);
+
+	value = add_data(gen, pvalue, value_size);
+	key = add_data(gen, &zero, sizeof(zero));
+
+	/* if (map_desc[map_idx].initial_value)
+	 *    copy_from_user(value, initial_value, value_size);
+	 */
+	emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,
+			      sizeof(struct bpf_loader_ctx) +
+			      sizeof(struct bpf_map_desc) * map_idx +
+			      offsetof(struct bpf_map_desc, initial_value)));
+	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4));
+	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
+					 0, 0, 0, value));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user));
+
+	map_update_attr = add_data(gen, &attr, attr_size);
+	move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4,
+			stack_off(map_fd[map_idx]));
+	emit_rel_store(gen, attr_field(map_update_attr, key), key);
+	emit_rel_store(gen, attr_field(map_update_attr, value), value);
+	/* emit MAP_UPDATE_ELEM command */
+	emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size);
+	debug_ret(gen, "update_elem idx %d value_size %d", map_idx, value_size);
+	emit_check_err(gen);
+}
+
+void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)
+{
+	int attr_size = offsetofend(union bpf_attr, map_fd);
+	int map_freeze_attr;
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_size);
+	pr_debug("gen: map_freeze: idx %d\n", map_idx);
+	map_freeze_attr = add_data(gen, &attr, attr_size);
+	move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4,
+			stack_off(map_fd[map_idx]));
+	/* emit MAP_FREEZE command */
+	emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size);
+	debug_ret(gen, "map_freeze");
+	emit_check_err(gen);
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f398ca83167e..a8a6bb30a45b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -54,6 +54,7 @@
 #include "str_error.h"
 #include "libbpf_internal.h"
 #include "hashmap.h"
+#include "bpf_gen_internal.h"
 
 #ifndef BPF_FS_MAGIC
 #define BPF_FS_MAGIC		0xcafe4a11
@@ -412,8 +413,6 @@ struct module_btf {
 	int fd;
 };
 
-struct bpf_gen;
-
 struct bpf_object {
 	char name[BPF_OBJ_NAME_LEN];
 	char license[64];
@@ -2661,7 +2660,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
 	int err;
 
 	/* btf_vmlinux could be loaded earlier */
-	if (obj->btf_vmlinux)
+	if (obj->btf_vmlinux || obj->gen_loader)
 		return 0;
 
 	if (!force && !obj_needs_vmlinux_btf(obj))
@@ -2743,7 +2742,20 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 		bpf_object__sanitize_btf(obj, kern_btf);
 	}
 
-	err = btf__load(kern_btf);
+	if (obj->gen_loader) {
+		__u32 raw_size = 0;
+		const void *raw_data = btf__get_raw_data(kern_btf, &raw_size);
+
+		if (!raw_data)
+			return -ENOMEM;
+		bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
+		/* Pretend to have valid FD to pass various fd >= 0 checks.
+		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+		 */
+		btf__set_fd(kern_btf, 0);
+	} else {
+		err = btf__load(kern_btf);
+	}
 	if (sanitize) {
 		if (!err) {
 			/* move fd to libbpf's BTF */
@@ -4319,6 +4331,12 @@ static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id f
 	struct kern_feature_desc *feat = &feature_probes[feat_id];
 	int ret;
 
+	if (obj->gen_loader)
+		/* To generate loader program assume the latest kernel
+		 * to avoid doing extra prog_load, map_create syscalls.
+		 */
+		return true;
+
 	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
 		ret = feat->probe();
 		if (ret > 0) {
@@ -4401,6 +4419,13 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 	char *cp, errmsg[STRERR_BUFSIZE];
 	int err, zero = 0;
 
+	if (obj->gen_loader) {
+		bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
+					 map->mmaped, map->def.value_size);
+		if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
+			bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
+		return 0;
+	}
 	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
 	if (err) {
 		err = -errno;
@@ -4426,7 +4451,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 
 static void bpf_map__destroy(struct bpf_map *map);
 
-static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
 {
 	struct bpf_create_map_attr create_attr;
 	struct bpf_map_def *def = &map->def;
@@ -4474,7 +4499,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 		if (map->inner_map) {
 			int err;
 
-			err = bpf_object__create_map(obj, map->inner_map);
+			err = bpf_object__create_map(obj, map->inner_map, true);
 			if (err) {
 				pr_warn("map '%s': failed to create inner map: %d\n",
 					map->name, err);
@@ -4486,7 +4511,15 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 			create_attr.inner_map_fd = map->inner_map_fd;
 	}
 
-	map->fd = bpf_create_map_xattr(&create_attr);
+	if (obj->gen_loader) {
+		bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps);
+		/* Pretend to have valid FD to pass various fd >= 0 checks.
+		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+		 */
+		map->fd = 0;
+	} else {
+		map->fd = bpf_create_map_xattr(&create_attr);
+	}
 	if (map->fd < 0 && (create_attr.btf_key_type_id ||
 			    create_attr.btf_value_type_id)) {
 		char *cp, errmsg[STRERR_BUFSIZE];
@@ -4507,6 +4540,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 		return -errno;
 
 	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+		if (obj->gen_loader)
+			map->inner_map->fd = -1;
 		bpf_map__destroy(map->inner_map);
 		zfree(&map->inner_map);
 	}
@@ -4514,11 +4549,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 	return 0;
 }
 
-static int init_map_slots(struct bpf_map *map)
+static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
 {
 	const struct bpf_map *targ_map;
 	unsigned int i;
-	int fd, err;
+	int fd, err = 0;
 
 	for (i = 0; i < map->init_slots_sz; i++) {
 		if (!map->init_slots[i])
@@ -4526,7 +4561,13 @@ static int init_map_slots(struct bpf_map *map)
 
 		targ_map = map->init_slots[i];
 		fd = bpf_map__fd(targ_map);
-		err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+		if (obj->gen_loader) {
+			pr_warn("// TODO map_update_elem: idx %ld key %d value==map_idx %ld\n",
+				map - obj->maps, i, targ_map - obj->maps);
+			return -ENOTSUP;
+		} else {
+			err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+		}
 		if (err) {
 			err = -errno;
 			pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
@@ -4568,7 +4609,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 			pr_debug("map '%s': skipping creation (preset fd=%d)\n",
 				 map->name, map->fd);
 		} else {
-			err = bpf_object__create_map(obj, map);
+			err = bpf_object__create_map(obj, map, false);
 			if (err)
 				goto err_out;
 
@@ -4584,7 +4625,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 			}
 
 			if (map->init_slots_sz) {
-				err = init_map_slots(map);
+				err = init_map_slots(obj, map);
 				if (err < 0) {
 					zclose(map->fd);
 					goto err_out;
@@ -4994,6 +5035,9 @@ static int load_module_btfs(struct bpf_object *obj)
 	if (obj->btf_modules_loaded)
 		return 0;
 
+	if (obj->gen_loader)
+		return 0;
+
 	/* don't do this again, even if we find no module BTFs */
 	obj->btf_modules_loaded = true;
 
@@ -6141,6 +6185,12 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 	if (str_is_empty(spec_str))
 		return -EINVAL;
 
+	if (prog->obj->gen_loader) {
+		pr_warn("// TODO core_relo: prog %ld insn[%d] %s %s kind %d\n",
+			prog - prog->obj->programs, relo->insn_off / 8,
+			local_name, spec_str, relo->kind);
+		return -ENOTSUP;
+	}
 	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
 	if (err) {
 		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
@@ -6871,6 +6921,20 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
 	return 0;
 }
 
+static void
+bpf_object__free_relocs(struct bpf_object *obj)
+{
+	struct bpf_program *prog;
+	int i;
+
+	/* free up relocation descriptors */
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		zfree(&prog->reloc_desc);
+		prog->nr_reloc = 0;
+	}
+}
+
 static int
 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 {
@@ -6940,12 +7004,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 			return err;
 		}
 	}
-	/* free up relocation descriptors */
-	for (i = 0; i < obj->nr_programs; i++) {
-		prog = &obj->programs[i];
-		zfree(&prog->reloc_desc);
-		prog->nr_reloc = 0;
-	}
+	if (!obj->gen_loader)
+		bpf_object__free_relocs(obj);
 	return 0;
 }
 
@@ -7134,6 +7194,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program
 	enum bpf_func_id func_id;
 	int i;
 
+	if (obj->gen_loader)
+		return 0;
+
 	for (i = 0; i < prog->insns_cnt; i++, insn++) {
 		if (!insn_is_helper_call(insn, &func_id))
 			continue;
@@ -7218,6 +7281,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.log_level = prog->log_level;
 	load_attr.prog_flags = prog->prog_flags;
 
+	if (prog->obj->gen_loader) {
+		bpf_gen__prog_load(prog->obj->gen_loader, &load_attr,
+				   prog - prog->obj->programs);
+		*pfd = -1;
+		return 0;
+	}
 retry_load:
 	if (log_buf_size) {
 		log_buf = malloc(log_buf_size);
@@ -7295,6 +7364,38 @@ out:
 	return ret;
 }
 
+static int bpf_program__record_externs(struct bpf_program *prog)
+{
+	struct bpf_object *obj = prog->obj;
+	int i;
+
+	for (i = 0; i < prog->nr_reloc; i++) {
+		struct reloc_desc *relo = &prog->reloc_desc[i];
+		struct extern_desc *ext = &obj->externs[relo->sym_off];
+
+		switch (relo->type) {
+		case RELO_EXTERN_VAR:
+			if (ext->type != EXT_KSYM)
+				continue;
+			if (!ext->ksym.type_id) {
+				pr_warn("typeless ksym %s is not supported yet\n",
+					ext->name);
+				return -ENOTSUP;
+			}
+			bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR,
+					       relo->insn_idx);
+			break;
+		case RELO_EXTERN_FUNC:
+			bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC,
+					       relo->insn_idx);
+			break;
+		default:
+			continue;
+		}
+	}
+	return 0;
+}
+
 static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
 
 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
@@ -7340,6 +7441,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 			pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
 				prog->name, prog->instances.nr);
 		}
+		if (prog->obj->gen_loader)
+			bpf_program__record_externs(prog);
 		err = load_program(prog, prog->insns, prog->insns_cnt,
 				   license, kern_ver, &fd);
 		if (!err)
@@ -7416,6 +7519,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 		if (err)
 			return err;
 	}
+	if (obj->gen_loader)
+		bpf_object__free_relocs(obj);
 	return 0;
 }
 
@@ -7796,6 +7901,12 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
 		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
 			continue;
 
+		if (obj->gen_loader) {
+			ext->is_set = true;
+			ext->ksym.kernel_btf_obj_fd = 0;
+			ext->ksym.kernel_btf_id = 0;
+			continue;
+		}
 		t = btf__type_by_id(obj->btf, ext->btf_id);
 		if (btf_is_var(t))
 			err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
@@ -7910,6 +8021,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 		return -EINVAL;
 	}
 
+	if (obj->gen_loader)
+		bpf_gen__init(obj->gen_loader, attr->log_level);
+
 	err = bpf_object__probe_loading(obj);
 	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
@@ -7920,6 +8034,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
 	err = err ? : bpf_object__load_progs(obj, attr->log_level);
 
+	if (obj->gen_loader) {
+		/* reset FDs */
+		btf__set_fd(obj->btf, -1);
+		for (i = 0; i < obj->nr_maps; i++)
+			obj->maps[i].fd = -1;
+		if (!err)
+			err = bpf_gen__finish(obj->gen_loader);
+	}
+
 	/* clean up module BTFs */
 	for (i = 0; i < obj->btf_module_cnt; i++) {
 		close(obj->btf_modules[i].fd);
@@ -8545,6 +8668,7 @@ void bpf_object__close(struct bpf_object *obj)
 	if (obj->clear_priv)
 		obj->clear_priv(obj, obj->priv);
 
+	bpf_gen__free(obj->gen_loader);
 	bpf_object__elf_finish(obj);
 	bpf_object__unload(obj);
 	btf__free(obj->btf);
@@ -8635,6 +8759,22 @@ void *bpf_object__priv(const struct bpf_object *obj)
 	return obj ? obj->priv : ERR_PTR(-EINVAL);
 }
 
+int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
+{
+	struct bpf_gen *gen;
+
+	if (!opts)
+		return -EFAULT;
+	if (!OPTS_VALID(opts, gen_loader_opts))
+		return -EINVAL;
+	gen = calloc(sizeof(*gen), 1);
+	if (!gen)
+		return -ENOMEM;
+	gen->opts = opts;
+	obj->gen_loader = gen;
+	return 0;
+}
+
 static struct bpf_program *
 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
 		    bool forward)
@@ -9272,6 +9412,28 @@ invalid_prog:
 #define BTF_ITER_PREFIX "bpf_iter_"
 #define BTF_MAX_NAME_SIZE 128
 
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+				const char **prefix, int *kind)
+{
+	switch (attach_type) {
+	case BPF_TRACE_RAW_TP:
+		*prefix = BTF_TRACE_PREFIX;
+		*kind = BTF_KIND_TYPEDEF;
+		break;
+	case BPF_LSM_MAC:
+		*prefix = BTF_LSM_PREFIX;
+		*kind = BTF_KIND_FUNC;
+		break;
+	case BPF_TRACE_ITER:
+		*prefix = BTF_ITER_PREFIX;
+		*kind = BTF_KIND_FUNC;
+		break;
+	default:
+		*prefix = "";
+		*kind = BTF_KIND_FUNC;
+	}
+}
+
 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 				   const char *name, __u32 kind)
 {
@@ -9292,21 +9454,11 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 static inline int find_attach_btf_id(struct btf *btf, const char *name,
 				     enum bpf_attach_type attach_type)
 {
-	int err;
+	const char *prefix;
+	int kind;
 
-	if (attach_type == BPF_TRACE_RAW_TP)
-		err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
-					      BTF_KIND_TYPEDEF);
-	else if (attach_type == BPF_LSM_MAC)
-		err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
-					      BTF_KIND_FUNC);
-	else if (attach_type == BPF_TRACE_ITER)
-		err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
-					      BTF_KIND_FUNC);
-	else
-		err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
-
-	return err;
+	btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
+	return find_btf_by_prefix_kind(btf, prefix, name, kind);
 }
 
 int libbpf_find_vmlinux_btf_id(const char *name,
@@ -9405,7 +9557,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
 	__u32 attach_prog_fd = prog->attach_prog_fd;
 	const char *name = prog->sec_name, *attach_name;
 	const struct bpf_sec_def *sec = NULL;
-	int i, err;
+	int i, err = 0;
 
 	if (!name)
 		return -EINVAL;
@@ -9440,7 +9592,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
 	}
 
 	/* kernel/module BTF ID */
-	err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+	if (prog->obj->gen_loader) {
+		bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
+		*btf_obj_fd = 0;
+		*btf_type_id = 1;
+	} else {
+		err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+	}
 	if (err) {
 		pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
 		return err;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index cc51dc4465f2..46b31c1f4849 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -800,6 +800,18 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s);
 LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s);
 LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s);
 
+struct gen_loader_opts {
+	size_t sz; /* size of this struct, for forward/backward compatiblity */
+	const char *data;
+	const char *insns;
+	__u32 data_sz;
+	__u32 insns_sz;
+};
+
+#define gen_loader_opts__last_field insns_sz
+LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
+				      struct gen_loader_opts *opts);
+
 enum libbpf_tristate {
 	TRI_NO = 0,
 	TRI_YES = 1,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 6c96729050dc..bc59516a8ccf 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -360,6 +360,7 @@ LIBBPF_0.4.0 {
 		bpf_linker__free;
 		bpf_linker__new;
 		bpf_map__inner_map;
+		bpf_object__gen_loader;
 		bpf_object__set_kversion;
 		bpf_tc_attach;
 		bpf_tc_detach;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index ee426226928f..55d9b4dca64f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -258,6 +258,8 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 				__u32 *off);
 struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
+void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
+				const char **prefix, int *kind);
 
 struct btf_ext_info {
 	/*
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
new file mode 100644
index 000000000000..12a126b452c1
--- /dev/null
+++ b/tools/lib/bpf/skel_internal.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2021 Facebook */
+#ifndef __SKEL_INTERNAL_H
+#define __SKEL_INTERNAL_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+
+/* This file is a base header for auto-generated *.lskel.h files.
+ * Its contents will change and may become part of auto-generation in the future.
+ *
+ * The layout of bpf_[map|prog]_desc and bpf_loader_ctx is feature dependent
+ * and will change from one version of libbpf to another and features
+ * requested during loader program generation.
+ */
+struct bpf_map_desc {
+	union {
+		/* input for the loader prog */
+		struct {
+			__aligned_u64 initial_value;
+			__u32 max_entries;
+		};
+		/* output of the loader prog */
+		struct {
+			int map_fd;
+		};
+	};
+};
+struct bpf_prog_desc {
+	int prog_fd;
+};
+
+struct bpf_loader_ctx {
+	size_t sz;
+	__u32 log_level;
+	__u32 log_size;
+	__u64 log_buf;
+};
+
+struct bpf_load_and_run_opts {
+	struct bpf_loader_ctx *ctx;
+	const void *data;
+	const void *insns;
+	__u32 data_sz;
+	__u32 insns_sz;
+	const char *errstr;
+};
+
+static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+			  unsigned int size)
+{
+	return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int skel_closenz(int fd)
+{
+	if (fd > 0)
+		return close(fd);
+	return -EINVAL;
+}
+
+static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
+{
+	int map_fd = -1, prog_fd = -1, key = 0, err;
+	union bpf_attr attr;
+
+	map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4,
+				     opts->data_sz, 1, 0);
+	if (map_fd < 0) {
+		opts->errstr = "failed to create loader map";
+		err = -errno;
+		goto out;
+	}
+
+	err = bpf_map_update_elem(map_fd, &key, opts->data, 0);
+	if (err < 0) {
+		opts->errstr = "failed to update loader map";
+		err = -errno;
+		goto out;
+	}
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SYSCALL;
+	attr.insns = (long) opts->insns;
+	attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
+	attr.license = (long) "Dual BSD/GPL";
+	memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
+	attr.fd_array = (long) &map_fd;
+	attr.log_level = opts->ctx->log_level;
+	attr.log_size = opts->ctx->log_size;
+	attr.log_buf = opts->ctx->log_buf;
+	attr.prog_flags = BPF_F_SLEEPABLE;
+	prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+	if (prog_fd < 0) {
+		opts->errstr = "failed to load loader prog";
+		err = -errno;
+		goto out;
+	}
+
+	memset(&attr, 0, sizeof(attr));
+	attr.test.prog_fd = prog_fd;
+	attr.test.ctx_in = (long) opts->ctx;
+	attr.test.ctx_size_in = opts->ctx->sz;
+	err = skel_sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	if (err < 0 || (int)attr.test.retval < 0) {
+		opts->errstr = "failed to execute loader prog";
+		if (err < 0)
+			err = -errno;
+		else
+			err = (int)attr.test.retval;
+		goto out;
+	}
+	err = 0;
+out:
+	if (map_fd >= 0)
+		close(map_fd);
+	if (prog_fd >= 0)
+		close(prog_fd);
+	return err;
+}
+
+#endif

From 30f51aedabda92b74927979b2b3b50169e285f6b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:17 -0700
Subject: [PATCH 0294/3292] libbpf: Cleanup temp FDs when intermediate sys_bpf
 fails.

Fix loader program to close temporary FDs when intermediate
sys_bpf command fails.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-16-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/bpf_gen_internal.h |  1 +
 tools/lib/bpf/gen_loader.c       | 48 +++++++++++++++++++++++++++++---
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index f42a55efd559..615400391e57 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -15,6 +15,7 @@ struct bpf_gen {
 	void *data_cur;
 	void *insn_start;
 	void *insn_cur;
+	ssize_t cleanup_label;
 	__u32 nr_progs;
 	__u32 nr_maps;
 	int log_level;
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 0fc54b1ca311..8df718a6b142 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -101,8 +101,36 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
 
 void bpf_gen__init(struct bpf_gen *gen, int log_level)
 {
+	size_t stack_sz = sizeof(struct loader_stack);
+	int i;
+
 	gen->log_level = log_level;
+	/* save ctx pointer into R6 */
 	emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
+
+	/* bzero stack */
+	emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_10));
+	emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -stack_sz));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_2, stack_sz));
+	emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
+	emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));
+
+	/* jump over cleanup code */
+	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0,
+			      /* size of cleanup code below */
+			      (stack_sz / 4) * 3 + 2));
+
+	/* remember the label where all error branches will jump to */
+	gen->cleanup_label = gen->insn_cur - gen->insn_start;
+	/* emit cleanup code: close all temp FDs */
+	for (i = 0; i < stack_sz; i += 4) {
+		emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
+		emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
+		emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
+	}
+	/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
+	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	emit(gen, BPF_EXIT_INSN());
 }
 
 static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
@@ -187,12 +215,24 @@ static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size)
 	emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
 }
 
+static bool is_simm16(__s64 value)
+{
+	return value == (__s64)(__s16)value;
+}
+
 static void emit_check_err(struct bpf_gen *gen)
 {
-	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 2));
-	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
-	/* TODO: close intermediate FDs in case of error */
-	emit(gen, BPF_EXIT_INSN());
+	__s64 off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+
+	/* R7 contains result of last sys_bpf command.
+	 * if (R7 < 0) goto cleanup;
+	 */
+	if (is_simm16(off)) {
+		emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off));
+	} else {
+		gen->error = -ERANGE;
+		emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+	}
 }
 
 /* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */

From 7723256bf2443d6bd7db3e583953d14107955233 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:18 -0700
Subject: [PATCH 0295/3292] libbpf: Introduce bpf_map__initial_value().

Introduce bpf_map__initial_value() to read initial contents
of mmaped data/rodata/bss maps.
Note that bpf_map__set_initial_value() doesn't allow modifying
kconfig map while bpf_map__initial_value() allows reading
its values.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-17-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/libbpf.c   | 8 ++++++++
 tools/lib/bpf/libbpf.h   | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 3 files changed, 10 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a8a6bb30a45b..dc4d5fe6d9d2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -9755,6 +9755,14 @@ int bpf_map__set_initial_value(struct bpf_map *map,
 	return 0;
 }
 
+const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
+{
+	if (!map->mmaped)
+		return NULL;
+	*psize = map->def.value_size;
+	return map->mmaped;
+}
+
 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
 {
 	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 46b31c1f4849..d98523558f39 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -471,6 +471,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
 LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
 					  const void *data, size_t size);
+LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
 LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index bc59516a8ccf..2abef6f17c06 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,6 +359,7 @@ LIBBPF_0.4.0 {
 		bpf_linker__finalize;
 		bpf_linker__free;
 		bpf_linker__new;
+		bpf_map__initial_value;
 		bpf_map__inner_map;
 		bpf_object__gen_loader;
 		bpf_object__set_kversion;

From d510296d331accd4afaa13498220c93ae690628a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:19 -0700
Subject: [PATCH 0296/3292] bpftool: Use syscall/loader program in "prog load"
 and "gen skeleton" command.

Add -L flag to bpftool to use libbpf gen_trace facility and syscall/loader program
for skeleton generation and program loading.

"bpftool gen skeleton -L" command will generate a "light skeleton" or "loader skeleton"
that is similar to existing skeleton, but has one major difference:
$ bpftool gen skeleton lsm.o > lsm.skel.h
$ bpftool gen skeleton -L lsm.o > lsm.lskel.h
$ diff lsm.skel.h lsm.lskel.h
@@ -5,34 +4,34 @@
 #define __LSM_SKEL_H__

 #include <stdlib.h>
-#include <bpf/libbpf.h>
+#include <bpf/bpf.h>

The light skeleton does not use majority of libbpf infrastructure.
It doesn't need libelf. It doesn't parse .o file.
It only needs few sys_bpf wrappers. All of them are in bpf/bpf.h file.
In future libbpf/bpf.c can be inlined into bpf.h, so not even libbpf.a would be
needed to work with light skeleton.

"bpftool prog load -L file.o" command is introduced for debugging of syscall/loader
program generation. Just like the same command without -L it will try to load
the programs from file.o into the kernel. It won't even try to pin them.

"bpftool prog load -L -d file.o" command will provide additional debug messages
on how syscall/loader program was generated.
Also the execution of syscall/loader program will use bpf_trace_printk() for
each step of loading BTF, creating maps, and loading programs.
The user can do "cat /.../trace_pipe" for further debug.

An example of fexit_sleep.lskel.h generated from progs/fexit_sleep.c:
struct fexit_sleep {
	struct bpf_loader_ctx ctx;
	struct {
		struct bpf_map_desc bss;
	} maps;
	struct {
		struct bpf_prog_desc nanosleep_fentry;
		struct bpf_prog_desc nanosleep_fexit;
	} progs;
	struct {
		int nanosleep_fentry_fd;
		int nanosleep_fexit_fd;
	} links;
	struct fexit_sleep__bss {
		int pid;
		int fentry_cnt;
		int fexit_cnt;
	} *bss;
};

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-18-alexei.starovoitov@gmail.com
---
 tools/bpf/bpftool/Makefile        |   2 +-
 tools/bpf/bpftool/gen.c           | 386 ++++++++++++++++++++++++++++--
 tools/bpf/bpftool/main.c          |   7 +-
 tools/bpf/bpftool/main.h          |   1 +
 tools/bpf/bpftool/prog.c          | 107 ++++++++-
 tools/bpf/bpftool/xlated_dumper.c |   3 +
 6 files changed, 482 insertions(+), 24 deletions(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index b3073ae84018..d16d289ade7a 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -136,7 +136,7 @@ endif
 
 BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
 
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o) $(OUTPUT)disasm.o
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 27dceaf66ecb..13b0aa789178 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -18,6 +18,7 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <bpf/btf.h>
+#include <bpf/bpf_gen_internal.h>
 
 #include "json_writer.h"
 #include "main.h"
@@ -274,6 +275,327 @@ static void codegen(const char *template, ...)
 	free(s);
 }
 
+static void print_hex(const char *data, int data_sz)
+{
+	int i, len;
+
+	for (i = 0, len = 0; i < data_sz; i++) {
+		int w = data[i] ? 4 : 2;
+
+		len += w;
+		if (len > 78) {
+			printf("\\\n");
+			len = w;
+		}
+		if (!data[i])
+			printf("\\0");
+		else
+			printf("\\x%02x", (unsigned char)data[i]);
+	}
+}
+
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+	long page_sz = sysconf(_SC_PAGE_SIZE);
+	size_t map_sz;
+
+	map_sz = (size_t)roundup(bpf_map__value_size(map), 8) * bpf_map__max_entries(map);
+	map_sz = roundup(map_sz, page_sz);
+	return map_sz;
+}
+
+static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name)
+{
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *tp_name;
+
+		codegen("\
+			\n\
+			\n\
+			static inline int					    \n\
+			%1$s__%2$s__attach(struct %1$s *skel)			    \n\
+			{							    \n\
+				int prog_fd = skel->progs.%2$s.prog_fd;		    \n\
+			", obj_name, bpf_program__name(prog));
+
+		switch (bpf_program__get_type(prog)) {
+		case BPF_PROG_TYPE_RAW_TRACEPOINT:
+			tp_name = strchr(bpf_program__section_name(prog), '/') + 1;
+			printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
+			break;
+		case BPF_PROG_TYPE_TRACING:
+			printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n");
+			break;
+		default:
+			printf("\tint fd = ((void)prog_fd, 0); /* auto-attach not supported */\n");
+			break;
+		}
+		codegen("\
+			\n\
+										    \n\
+				if (fd > 0)					    \n\
+					skel->links.%1$s_fd = fd;		    \n\
+				return fd;					    \n\
+			}							    \n\
+			", bpf_program__name(prog));
+	}
+
+	codegen("\
+		\n\
+									    \n\
+		static inline int					    \n\
+		%1$s__attach(struct %1$s *skel)				    \n\
+		{							    \n\
+			int ret = 0;					    \n\
+									    \n\
+		", obj_name);
+
+	bpf_object__for_each_program(prog, obj) {
+		codegen("\
+			\n\
+				ret = ret < 0 ? ret : %1$s__%2$s__attach(skel);   \n\
+			", obj_name, bpf_program__name(prog));
+	}
+
+	codegen("\
+		\n\
+			return ret < 0 ? ret : 0;			    \n\
+		}							    \n\
+									    \n\
+		static inline void					    \n\
+		%1$s__detach(struct %1$s *skel)				    \n\
+		{							    \n\
+		", obj_name);
+
+	bpf_object__for_each_program(prog, obj) {
+		codegen("\
+			\n\
+				skel_closenz(skel->links.%1$s_fd);	    \n\
+			", bpf_program__name(prog));
+	}
+
+	codegen("\
+		\n\
+		}							    \n\
+		");
+}
+
+static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
+{
+	struct bpf_program *prog;
+	struct bpf_map *map;
+
+	codegen("\
+		\n\
+		static void						    \n\
+		%1$s__destroy(struct %1$s *skel)			    \n\
+		{							    \n\
+			if (!skel)					    \n\
+				return;					    \n\
+			%1$s__detach(skel);				    \n\
+		",
+		obj_name);
+
+	bpf_object__for_each_program(prog, obj) {
+		codegen("\
+			\n\
+				skel_closenz(skel->progs.%1$s.prog_fd);	    \n\
+			", bpf_program__name(prog));
+	}
+
+	bpf_object__for_each_map(map, obj) {
+		const char * ident;
+
+		ident = get_map_ident(map);
+		if (!ident)
+			continue;
+		if (bpf_map__is_internal(map) &&
+		    (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+			printf("\tmunmap(skel->%1$s, %2$zd);\n",
+			       ident, bpf_map_mmap_sz(map));
+		codegen("\
+			\n\
+				skel_closenz(skel->maps.%1$s.map_fd);	    \n\
+			", ident);
+	}
+	codegen("\
+		\n\
+			free(skel);					    \n\
+		}							    \n\
+		",
+		obj_name);
+}
+
+static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
+{
+	struct bpf_object_load_attr load_attr = {};
+	DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+	struct bpf_map *map;
+	int err = 0;
+
+	err = bpf_object__gen_loader(obj, &opts);
+	if (err)
+		return err;
+
+	load_attr.obj = obj;
+	if (verifier_logs)
+		/* log_level1 + log_level2 + stats, but not stable UAPI */
+		load_attr.log_level = 1 + 2 + 4;
+
+	err = bpf_object__load_xattr(&load_attr);
+	if (err) {
+		p_err("failed to load object file");
+		goto out;
+	}
+	/* If there was no error during load then gen_loader_opts
+	 * are populated with the loader program.
+	 */
+
+	/* finish generating 'struct skel' */
+	codegen("\
+		\n\
+		};							    \n\
+		", obj_name);
+
+
+	codegen_attach_detach(obj, obj_name);
+
+	codegen_destroy(obj, obj_name);
+
+	codegen("\
+		\n\
+		static inline struct %1$s *				    \n\
+		%1$s__open(void)					    \n\
+		{							    \n\
+			struct %1$s *skel;				    \n\
+									    \n\
+			skel = calloc(sizeof(*skel), 1);		    \n\
+			if (!skel)					    \n\
+				goto cleanup;				    \n\
+			skel->ctx.sz = (void *)&skel->links - (void *)skel; \n\
+		",
+		obj_name, opts.data_sz);
+	bpf_object__for_each_map(map, obj) {
+		const char *ident;
+		const void *mmap_data = NULL;
+		size_t mmap_size = 0;
+
+		ident = get_map_ident(map);
+		if (!ident)
+			continue;
+
+		if (!bpf_map__is_internal(map) ||
+		    !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+			continue;
+
+		codegen("\
+			\n\
+				skel->%1$s =					 \n\
+					mmap(NULL, %2$zd, PROT_READ | PROT_WRITE,\n\
+					     MAP_SHARED | MAP_ANONYMOUS, -1, 0); \n\
+				if (skel->%1$s == (void *) -1)			 \n\
+					goto cleanup;				 \n\
+				memcpy(skel->%1$s, (void *)\"\\			 \n\
+			", ident, bpf_map_mmap_sz(map));
+		mmap_data = bpf_map__initial_value(map, &mmap_size);
+		print_hex(mmap_data, mmap_size);
+		printf("\", %2$zd);\n"
+		       "\tskel->maps.%1$s.initial_value = (__u64)(long)skel->%1$s;\n",
+		       ident, mmap_size);
+	}
+	codegen("\
+		\n\
+			return skel;					    \n\
+		cleanup:						    \n\
+			%1$s__destroy(skel);				    \n\
+			return NULL;					    \n\
+		}							    \n\
+									    \n\
+		static inline int					    \n\
+		%1$s__load(struct %1$s *skel)				    \n\
+		{							    \n\
+			struct bpf_load_and_run_opts opts = {};		    \n\
+			int err;					    \n\
+									    \n\
+			opts.ctx = (struct bpf_loader_ctx *)skel;	    \n\
+			opts.data_sz = %2$d;				    \n\
+			opts.data = (void *)\"\\			    \n\
+		",
+		obj_name, opts.data_sz);
+	print_hex(opts.data, opts.data_sz);
+	codegen("\
+		\n\
+		\";							    \n\
+		");
+
+	codegen("\
+		\n\
+			opts.insns_sz = %d;				    \n\
+			opts.insns = (void *)\"\\			    \n\
+		",
+		opts.insns_sz);
+	print_hex(opts.insns, opts.insns_sz);
+	codegen("\
+		\n\
+		\";							    \n\
+			err = bpf_load_and_run(&opts);			    \n\
+			if (err < 0)					    \n\
+				return err;				    \n\
+		", obj_name);
+	bpf_object__for_each_map(map, obj) {
+		const char *ident, *mmap_flags;
+
+		ident = get_map_ident(map);
+		if (!ident)
+			continue;
+
+		if (!bpf_map__is_internal(map) ||
+		    !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+			continue;
+		if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+			mmap_flags = "PROT_READ";
+		else
+			mmap_flags = "PROT_READ | PROT_WRITE";
+
+		printf("\tskel->%1$s =\n"
+		       "\t\tmmap(skel->%1$s, %2$zd, %3$s, MAP_SHARED | MAP_FIXED,\n"
+		       "\t\t\tskel->maps.%1$s.map_fd, 0);\n",
+		       ident, bpf_map_mmap_sz(map), mmap_flags);
+	}
+	codegen("\
+		\n\
+			return 0;					    \n\
+		}							    \n\
+									    \n\
+		static inline struct %1$s *				    \n\
+		%1$s__open_and_load(void)				    \n\
+		{							    \n\
+			struct %1$s *skel;				    \n\
+									    \n\
+			skel = %1$s__open();				    \n\
+			if (!skel)					    \n\
+				return NULL;				    \n\
+			if (%1$s__load(skel)) {				    \n\
+				%1$s__destroy(skel);			    \n\
+				return NULL;				    \n\
+			}						    \n\
+			return skel;					    \n\
+		}							    \n\
+		", obj_name);
+
+	codegen("\
+		\n\
+									    \n\
+		#endif /* %s */						    \n\
+		",
+		header_guard);
+	err = 0;
+out:
+	return err;
+}
+
 static int do_skeleton(int argc, char **argv)
 {
 	char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
@@ -283,7 +605,7 @@ static int do_skeleton(int argc, char **argv)
 	struct bpf_object *obj = NULL;
 	const char *file, *ident;
 	struct bpf_program *prog;
-	int fd, len, err = -1;
+	int fd, err = -1;
 	struct bpf_map *map;
 	struct btf *btf;
 	struct stat st;
@@ -365,7 +687,25 @@ static int do_skeleton(int argc, char **argv)
 	}
 
 	get_header_guard(header_guard, obj_name);
-	codegen("\
+	if (use_loader) {
+		codegen("\
+		\n\
+		/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */   \n\
+		/* THIS FILE IS AUTOGENERATED! */			    \n\
+		#ifndef %2$s						    \n\
+		#define %2$s						    \n\
+									    \n\
+		#include <stdlib.h>					    \n\
+		#include <bpf/bpf.h>					    \n\
+		#include <bpf/skel_internal.h>				    \n\
+									    \n\
+		struct %1$s {						    \n\
+			struct bpf_loader_ctx ctx;			    \n\
+		",
+		obj_name, header_guard
+		);
+	} else {
+		codegen("\
 		\n\
 		/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */   \n\
 									    \n\
@@ -381,7 +721,8 @@ static int do_skeleton(int argc, char **argv)
 			struct bpf_object *obj;				    \n\
 		",
 		obj_name, header_guard
-	);
+		);
+	}
 
 	if (map_cnt) {
 		printf("\tstruct {\n");
@@ -389,7 +730,10 @@ static int do_skeleton(int argc, char **argv)
 			ident = get_map_ident(map);
 			if (!ident)
 				continue;
-			printf("\t\tstruct bpf_map *%s;\n", ident);
+			if (use_loader)
+				printf("\t\tstruct bpf_map_desc %s;\n", ident);
+			else
+				printf("\t\tstruct bpf_map *%s;\n", ident);
 		}
 		printf("\t} maps;\n");
 	}
@@ -397,14 +741,22 @@ static int do_skeleton(int argc, char **argv)
 	if (prog_cnt) {
 		printf("\tstruct {\n");
 		bpf_object__for_each_program(prog, obj) {
-			printf("\t\tstruct bpf_program *%s;\n",
-			       bpf_program__name(prog));
+			if (use_loader)
+				printf("\t\tstruct bpf_prog_desc %s;\n",
+				       bpf_program__name(prog));
+			else
+				printf("\t\tstruct bpf_program *%s;\n",
+				       bpf_program__name(prog));
 		}
 		printf("\t} progs;\n");
 		printf("\tstruct {\n");
 		bpf_object__for_each_program(prog, obj) {
-			printf("\t\tstruct bpf_link *%s;\n",
-			       bpf_program__name(prog));
+			if (use_loader)
+				printf("\t\tint %s_fd;\n",
+				       bpf_program__name(prog));
+			else
+				printf("\t\tstruct bpf_link *%s;\n",
+				       bpf_program__name(prog));
 		}
 		printf("\t} links;\n");
 	}
@@ -415,6 +767,10 @@ static int do_skeleton(int argc, char **argv)
 		if (err)
 			goto out;
 	}
+	if (use_loader) {
+		err = gen_trace(obj, obj_name, header_guard);
+		goto out;
+	}
 
 	codegen("\
 		\n\
@@ -584,19 +940,7 @@ static int do_skeleton(int argc, char **argv)
 		file_sz);
 
 	/* embed contents of BPF object file */
-	for (i = 0, len = 0; i < file_sz; i++) {
-		int w = obj_data[i] ? 4 : 2;
-
-		len += w;
-		if (len > 78) {
-			printf("\\\n");
-			len = w;
-		}
-		if (!obj_data[i])
-			printf("\\0");
-		else
-			printf("\\x%02x", (unsigned char)obj_data[i]);
-	}
+	print_hex(obj_data, file_sz);
 
 	codegen("\
 		\n\
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d9afb730136a..7f2817d97079 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -29,6 +29,7 @@ bool show_pinned;
 bool block_mount;
 bool verifier_logs;
 bool relaxed_maps;
+bool use_loader;
 struct btf *base_btf;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
@@ -392,6 +393,7 @@ int main(int argc, char **argv)
 		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ "nomount",	no_argument,	NULL,	'n' },
 		{ "debug",	no_argument,	NULL,	'd' },
+		{ "use-loader",	no_argument,	NULL,	'L' },
 		{ "base-btf",	required_argument, NULL, 'B' },
 		{ 0 }
 	};
@@ -409,7 +411,7 @@ int main(int argc, char **argv)
 	hash_init(link_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "VhpjfmndB:",
+	while ((opt = getopt_long(argc, argv, "VhpjfLmndB:",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -452,6 +454,9 @@ int main(int argc, char **argv)
 				return -1;
 			}
 			break;
+		case 'L':
+			use_loader = true;
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 76e91641262b..c1cf29798b99 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -90,6 +90,7 @@ extern bool show_pids;
 extern bool block_mount;
 extern bool verifier_logs;
 extern bool relaxed_maps;
+extern bool use_loader;
 extern struct btf *base_btf;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3f067d2d7584..d018bc7a3673 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -16,6 +16,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
+#include <dirent.h>
 
 #include <linux/err.h>
 #include <linux/perf_event.h>
@@ -24,6 +25,8 @@
 #include <bpf/bpf.h>
 #include <bpf/btf.h>
 #include <bpf/libbpf.h>
+#include <bpf/bpf_gen_internal.h>
+#include <bpf/skel_internal.h>
 
 #include "cfg.h"
 #include "main.h"
@@ -1499,7 +1502,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 	set_max_rlimit();
 
 	obj = bpf_object__open_file(file, &open_opts);
-	if (IS_ERR_OR_NULL(obj)) {
+	if (libbpf_get_error(obj)) {
 		p_err("failed to open object file");
 		goto err_free_reuse_maps;
 	}
@@ -1645,8 +1648,110 @@ err_free_reuse_maps:
 	return -1;
 }
 
+static int count_open_fds(void)
+{
+	DIR *dp = opendir("/proc/self/fd");
+	struct dirent *de;
+	int cnt = -3;
+
+	if (!dp)
+		return -1;
+
+	while ((de = readdir(dp)))
+		cnt++;
+
+	closedir(dp);
+	return cnt;
+}
+
+static int try_loader(struct gen_loader_opts *gen)
+{
+	struct bpf_load_and_run_opts opts = {};
+	struct bpf_loader_ctx *ctx;
+	int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
+					     sizeof(struct bpf_prog_desc));
+	int log_buf_sz = (1u << 24) - 1;
+	int err, fds_before, fd_delta;
+	char *log_buf;
+
+	ctx = alloca(ctx_sz);
+	memset(ctx, 0, ctx_sz);
+	ctx->sz = ctx_sz;
+	ctx->log_level = 1;
+	ctx->log_size = log_buf_sz;
+	log_buf = malloc(log_buf_sz);
+	if (!log_buf)
+		return -ENOMEM;
+	ctx->log_buf = (long) log_buf;
+	opts.ctx = ctx;
+	opts.data = gen->data;
+	opts.data_sz = gen->data_sz;
+	opts.insns = gen->insns;
+	opts.insns_sz = gen->insns_sz;
+	fds_before = count_open_fds();
+	err = bpf_load_and_run(&opts);
+	fd_delta = count_open_fds() - fds_before;
+	if (err < 0) {
+		fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf);
+		if (fd_delta)
+			fprintf(stderr, "loader prog leaked %d FDs\n",
+				fd_delta);
+	}
+	free(log_buf);
+	return err;
+}
+
+static int do_loader(int argc, char **argv)
+{
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
+	DECLARE_LIBBPF_OPTS(gen_loader_opts, gen);
+	struct bpf_object_load_attr load_attr = {};
+	struct bpf_object *obj;
+	const char *file;
+	int err = 0;
+
+	if (!REQ_ARGS(1))
+		return -1;
+	file = GET_ARG();
+
+	obj = bpf_object__open_file(file, &open_opts);
+	if (libbpf_get_error(obj)) {
+		p_err("failed to open object file");
+		goto err_close_obj;
+	}
+
+	err = bpf_object__gen_loader(obj, &gen);
+	if (err)
+		goto err_close_obj;
+
+	load_attr.obj = obj;
+	if (verifier_logs)
+		/* log_level1 + log_level2 + stats, but not stable UAPI */
+		load_attr.log_level = 1 + 2 + 4;
+
+	err = bpf_object__load_xattr(&load_attr);
+	if (err) {
+		p_err("failed to load object file");
+		goto err_close_obj;
+	}
+
+	if (verifier_logs) {
+		struct dump_data dd = {};
+
+		kernel_syms_load(&dd);
+		dump_xlated_plain(&dd, (void *)gen.insns, gen.insns_sz, false, false);
+		kernel_syms_destroy(&dd);
+	}
+	err = try_loader(&gen);
+err_close_obj:
+	bpf_object__close(obj);
+	return err;
+}
+
 static int do_load(int argc, char **argv)
 {
+	if (use_loader)
+		return do_loader(argc, argv);
 	return load_with_options(argc, argv, true);
 }
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 6fc3e6f7f40c..f1f32e21d5cd 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
 	else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+	else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "map[idx:%u]+%u", insn->imm, (insn + 1)->imm);
 	else if (insn->src_reg == BPF_PSEUDO_FUNC)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "subprog[%+d]", insn->imm);

From 4d1b62986125b6de596c98310543652a7892e097 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:20 -0700
Subject: [PATCH 0297/3292] selftests/bpf: Convert few tests to light skeleton.

Convert few tests that don't use CO-RE to light skeleton.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-19-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/.gitignore           |  1 +
 tools/testing/selftests/bpf/Makefile             | 16 +++++++++++++++-
 .../selftests/bpf/prog_tests/fentry_fexit.c      |  6 +++---
 .../selftests/bpf/prog_tests/fentry_test.c       | 10 +++++-----
 .../selftests/bpf/prog_tests/fexit_sleep.c       |  6 +++---
 .../selftests/bpf/prog_tests/fexit_test.c        | 10 +++++-----
 .../selftests/bpf/prog_tests/kfunc_call.c        |  6 +++---
 .../selftests/bpf/prog_tests/ksyms_module.c      |  2 +-
 tools/testing/selftests/bpf/prog_tests/ringbuf.c |  8 +++-----
 tools/testing/selftests/bpf/progs/test_ringbuf.c |  4 ++--
 10 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 4866f6a21901..a030aa4a8a9e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -30,6 +30,7 @@ test_sysctl
 xdping
 test_cpp
 *.skel.h
+*.lskel.h
 /no_alu32
 /bpf_gcc
 /tools
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 511259c2c6c5..fdc7785ff82d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -312,6 +312,10 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		linked_vars.skel.h linked_maps.skel.h
 
+LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
+	test_ksyms_module.c test_ringbuf.c
+SKEL_BLACKLIST += $$(LSKELS)
+
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
 linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
 linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
@@ -339,6 +343,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 				 $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
 					       $$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS))
 TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
 TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
 
@@ -380,6 +385,14 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
 	$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
 
+$(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+
 $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
 	$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
@@ -409,6 +422,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o:			\
 		      $(TRUNNER_EXTRA_HDRS)				\
 		      $(TRUNNER_BPF_OBJS)				\
 		      $(TRUNNER_BPF_SKELS)				\
+		      $(TRUNNER_BPF_LSKELS)				\
 		      $(TRUNNER_BPF_SKELS_LINKED)			\
 		      $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
@@ -516,6 +530,6 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)	\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature								\
-	$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+	$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko)
 
 .PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 109d0345a2be..91154c2ba256 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
-#include "fentry_test.skel.h"
-#include "fexit_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fexit_test.lskel.h"
 
 void test_fentry_fexit(void)
 {
@@ -26,7 +26,7 @@ void test_fentry_fexit(void)
 	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
 		goto close_prog;
 
-	prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+	prog_fd = fexit_skel->progs.test1.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	CHECK(err || retval, "ipv6",
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 7cb111b11995..174c89e7456e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,13 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
-#include "fentry_test.skel.h"
+#include "fentry_test.lskel.h"
 
 static int fentry_test(struct fentry_test *fentry_skel)
 {
 	int err, prog_fd, i;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 	__u64 *result;
 
 	err = fentry_test__attach(fentry_skel);
@@ -15,11 +15,11 @@ static int fentry_test(struct fentry_test *fentry_skel)
 		return err;
 
 	/* Check that already linked program can't be attached again. */
-	link = bpf_program__attach(fentry_skel->progs.test1);
-	if (!ASSERT_ERR_PTR(link, "fentry_attach_link"))
+	link_fd = fentry_test__test1__attach(fentry_skel);
+	if (!ASSERT_LT(link_fd, 0, "fentry_attach_link"))
 		return -1;
 
-	prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+	prog_fd = fentry_skel->progs.test1.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index ccc7e8a34ab6..4e7f4b42ea29 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -6,7 +6,7 @@
 #include <time.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
-#include "fexit_sleep.skel.h"
+#include "fexit_sleep.lskel.h"
 
 static int do_sleep(void *skel)
 {
@@ -58,8 +58,8 @@ void test_fexit_sleep(void)
 	 * waiting for percpu_ref_kill to confirm). The other one
 	 * will be freed quickly.
 	 */
-	close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry));
-	close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit));
+	close(fexit_skel->progs.nanosleep_fentry.prog_fd);
+	close(fexit_skel->progs.nanosleep_fexit.prog_fd);
 	fexit_sleep__detach(fexit_skel);
 
 	/* kill the thread to unwind sys_nanosleep stack through the trampoline */
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 6792e41f7f69..af3dba726701 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,13 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
-#include "fexit_test.skel.h"
+#include "fexit_test.lskel.h"
 
 static int fexit_test(struct fexit_test *fexit_skel)
 {
 	int err, prog_fd, i;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 	__u64 *result;
 
 	err = fexit_test__attach(fexit_skel);
@@ -15,11 +15,11 @@ static int fexit_test(struct fexit_test *fexit_skel)
 		return err;
 
 	/* Check that already linked program can't be attached again. */
-	link = bpf_program__attach(fexit_skel->progs.test1);
-	if (!ASSERT_ERR_PTR(link, "fexit_attach_link"))
+	link_fd = fexit_test__test1__attach(fexit_skel);
+	if (!ASSERT_LT(link_fd, 0, "fexit_attach_link"))
 		return -1;
 
-	prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+	prog_fd = fexit_skel->progs.test1.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 7fc0951ee75f..30a7b9b837bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <test_progs.h>
 #include <network_helpers.h>
-#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
 #include "kfunc_call_test_subprog.skel.h"
 
 static void test_main(void)
@@ -14,13 +14,13 @@ static void test_main(void)
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+	prog_fd = skel->progs.kfunc_call_test1.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, (__u32 *)&retval, NULL);
 	ASSERT_OK(err, "bpf_prog_test_run(test1)");
 	ASSERT_EQ(retval, 12, "test1-retval");
 
-	prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+	prog_fd = skel->progs.kfunc_call_test2.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, (__u32 *)&retval, NULL);
 	ASSERT_OK(err, "bpf_prog_test_run(test2)");
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index 4c232b456479..2cd5cded543f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -4,7 +4,7 @@
 #include <test_progs.h>
 #include <bpf/libbpf.h>
 #include <bpf/btf.h>
-#include "test_ksyms_module.skel.h"
+#include "test_ksyms_module.lskel.h"
 
 static int duration;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index de78617f6550..80c11ac0ffb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,7 +12,7 @@
 #include <sys/sysinfo.h>
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
-#include "test_ringbuf.skel.h"
+#include "test_ringbuf.lskel.h"
 
 #define EDONE 7777
 
@@ -93,9 +93,7 @@ void test_ringbuf(void)
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
 		return;
 
-	err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
-	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
-		goto cleanup;
+	skel->maps.ringbuf.max_entries = page_size;
 
 	err = test_ringbuf__load(skel);
 	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
@@ -104,7 +102,7 @@ void test_ringbuf(void)
 	/* only trigger BPF program for current process */
 	skel->bss->pid = getpid();
 
-	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+	ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd,
 				   process_sample, NULL, NULL);
 	if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 6b3f288b7c63..eaa7d9dba0be 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -35,7 +35,7 @@ long prod_pos = 0;
 /* inner state */
 long seq = 0;
 
-SEC("tp/syscalls/sys_enter_getpgid")
+SEC("fentry/__x64_sys_getpgid")
 int test_ringbuf(void *ctx)
 {
 	int cur_pid = bpf_get_current_pid_tgid() >> 32;
@@ -48,7 +48,7 @@ int test_ringbuf(void *ctx)
 	sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
 	if (!sample) {
 		__sync_fetch_and_add(&dropped, 1);
-		return 1;
+		return 0;
 	}
 
 	sample->pid = pid;

From 0a9306629983d0be384d4f2557c8c7e2ed086164 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:21 -0700
Subject: [PATCH 0298/3292] selftests/bpf: Convert atomics test to light
 skeleton.

Convert prog_tests/atomics.c to lskel.h

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-20-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/Makefile          |  2 +-
 .../selftests/bpf/prog_tests/atomics.c        | 72 +++++++++----------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fdc7785ff82d..b29862339222 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -313,7 +313,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		linked_vars.skel.h linked_maps.skel.h
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
-	test_ksyms_module.c test_ringbuf.c
+	test_ksyms_module.c test_ringbuf.c atomics.c
 SKEL_BLACKLIST += $$(LSKELS)
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 21efe7bbf10d..ba0e1efe5a45 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -2,19 +2,19 @@
 
 #include <test_progs.h>
 
-#include "atomics.skel.h"
+#include "atomics.lskel.h"
 
 static void test_add(struct atomics *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 
-	link = bpf_program__attach(skel->progs.add);
-	if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+	link_fd = atomics__add__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(add)"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.add);
+	prog_fd = skel->progs.add.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	if (CHECK(err || retval, "test_run add",
@@ -33,20 +33,20 @@ static void test_add(struct atomics *skel)
 	ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
 
 cleanup:
-	bpf_link__destroy(link);
+	close(link_fd);
 }
 
 static void test_sub(struct atomics *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 
-	link = bpf_program__attach(skel->progs.sub);
-	if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link)))
+	link_fd = atomics__sub__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.sub);
+	prog_fd = skel->progs.sub.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	if (CHECK(err || retval, "test_run sub",
@@ -66,20 +66,20 @@ static void test_sub(struct atomics *skel)
 	ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value");
 
 cleanup:
-	bpf_link__destroy(link);
+	close(link_fd);
 }
 
 static void test_and(struct atomics *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 
-	link = bpf_program__attach(skel->progs.and);
-	if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link)))
+	link_fd = atomics__and__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(and)"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.and);
+	prog_fd = skel->progs.and.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	if (CHECK(err || retval, "test_run and",
@@ -94,20 +94,20 @@ static void test_and(struct atomics *skel)
 
 	ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value");
 cleanup:
-	bpf_link__destroy(link);
+	close(link_fd);
 }
 
 static void test_or(struct atomics *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 
-	link = bpf_program__attach(skel->progs.or);
-	if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link)))
+	link_fd = atomics__or__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(or)"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.or);
+	prog_fd = skel->progs.or.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	if (CHECK(err || retval, "test_run or",
@@ -123,20 +123,20 @@ static void test_or(struct atomics *skel)
 
 	ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value");
 cleanup:
-	bpf_link__destroy(link);
+	close(link_fd);
 }
 
 static void test_xor(struct atomics *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 
-	link = bpf_program__attach(skel->progs.xor);
-	if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link)))
+	link_fd = atomics__xor__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.xor);
+	prog_fd = skel->progs.xor.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	if (CHECK(err || retval, "test_run xor",
@@ -151,20 +151,20 @@ static void test_xor(struct atomics *skel)
 
 	ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value");
 cleanup:
-	bpf_link__destroy(link);
+	close(link_fd);
 }
 
 static void test_cmpxchg(struct atomics *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 
-	link = bpf_program__attach(skel->progs.cmpxchg);
-	if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link)))
+	link_fd = atomics__cmpxchg__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.cmpxchg);
+	prog_fd = skel->progs.cmpxchg.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	if (CHECK(err || retval, "test_run add",
@@ -180,20 +180,20 @@ static void test_cmpxchg(struct atomics *skel)
 	ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
 
 cleanup:
-	bpf_link__destroy(link);
+	close(link_fd);
 }
 
 static void test_xchg(struct atomics *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
-	struct bpf_link *link;
+	int link_fd;
 
-	link = bpf_program__attach(skel->progs.xchg);
-	if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link)))
+	link_fd = atomics__xchg__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs.xchg);
+	prog_fd = skel->progs.xchg.prog_fd;
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
 	if (CHECK(err || retval, "test_run add",
@@ -207,7 +207,7 @@ static void test_xchg(struct atomics *skel)
 	ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result");
 
 cleanup:
-	bpf_link__destroy(link);
+	close(link_fd);
 }
 
 void test_atomics(void)

From eb0f1e0c7f103e262308d3f12a27a8331af3011a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:22 -0700
Subject: [PATCH 0299/3292] selftests/bpf: Convert test printk to use rodata.

Convert test trace_printk to more aggressively validate and use rodata.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-21-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/trace_printk.c | 3 +++
 tools/testing/selftests/bpf/progs/trace_printk.c      | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 39b0decb1bb2..2c641bdf21ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -21,6 +21,9 @@ void test_trace_printk(void)
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
 		return;
 
+	ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string");
+	skel->rodata->fmt[0] = 't';
+
 	err = trace_printk__load(skel);
 	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 8ca7f399b670..119582aa105a 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -10,11 +10,11 @@ char _license[] SEC("license") = "GPL";
 int trace_printk_ret = 0;
 int trace_printk_ran = 0;
 
-SEC("tp/raw_syscalls/sys_enter")
+const char fmt[] = "Testing,testing %d\n";
+
+SEC("fentry/__x64_sys_nanosleep")
 int sys_enter(void *ctx)
 {
-	static const char fmt[] = "testing,testing %d\n";
-
 	trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
 					    ++trace_printk_ran);
 	return 0;

From 1a532eb28df3e45ae2d403d66c0e98295688ae10 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 13 May 2021 17:36:23 -0700
Subject: [PATCH 0300/3292] selftests/bpf: Convert test trace_printk to lskel.

Convert test trace_printk to light skeleton to check
rodata support in lskel.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-22-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/Makefile                  | 2 +-
 tools/testing/selftests/bpf/prog_tests/trace_printk.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b29862339222..525e4b3fb514 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -313,7 +313,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		linked_vars.skel.h linked_maps.skel.h
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
-	test_ksyms_module.c test_ringbuf.c atomics.c
+	test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c
 SKEL_BLACKLIST += $$(LSKELS)
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 2c641bdf21ca..d39bc00feb45 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -3,7 +3,7 @@
 
 #include <test_progs.h>
 
-#include "trace_printk.skel.h"
+#include "trace_printk.lskel.h"
 
 #define TRACEBUF	"/sys/kernel/debug/tracing/trace_pipe"
 #define SEARCHMSG	"testing,testing"

From 11b3dda5e8b6cde957a6410233f30d6c48582998 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:03 +0100
Subject: [PATCH 0301/3292] lib: vsprintf: scanf: Negative number must have
 field width > 1

If a signed number field starts with a '-' the field width must be > 1,
or unlimited, to allow at least one digit after the '-'.

This patch adds a check for this. If a signed field starts with '-'
and field_width == 1 the scanf will quit.

It is ok for a signed number field to have a field width of 1 if it
starts with a digit. In that case the single digit can be converted.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-1-rf@opensource.cirrus.com
---
 lib/vsprintf.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6c56c62fd9a5..af307588ad8b 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -3526,8 +3526,12 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		str = skip_spaces(str);
 
 		digit = *str;
-		if (is_sign && digit == '-')
+		if (is_sign && digit == '-') {
+			if (field_width == 1)
+				break;
+
 			digit = *(str + 1);
+		}
 
 		if (!digit
 		    || (base == 16 && !isxdigit(digit))

From 900fdc4573766dd43b847b4f54bd4a1ee2bc7360 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:04 +0100
Subject: [PATCH 0302/3292] lib: vsprintf: Fix handling of number field widths
 in vsscanf

The existing code attempted to handle numbers by doing a strto[u]l(),
ignoring the field width, and then repeatedly dividing to extract the
field out of the full converted value. If the string contains a run of
valid digits longer than will fit in a long or long long, this would
overflow and no amount of dividing can recover the correct value.

This patch fixes vsscanf() to obey number field widths when parsing
the number.

A new _parse_integer_limit() is added that takes a limit for the number
of characters to parse. The number field conversion in vsscanf is changed
to use this new function.

If a number starts with a radix prefix, the field width  must be long
enough for at last one digit after the prefix. If not, it will be handled
like this:

 sscanf("0x4", "%1i", &i): i=0, scanning continues with the 'x'
 sscanf("0x4", "%2i", &i): i=0, scanning continues with the '4'

This is consistent with the observed behaviour of userland sscanf.

Note that this patch does NOT fix the problem of a single field value
overflowing the target type. So for example:

  sscanf("123456789abcdef", "%x", &i);

Will not produce the correct result because the value obviously overflows
INT_MAX. But sscanf will report a successful conversion.

Note that where a very large number is used to mean "unlimited", the value
INT_MAX is used for consistency with the behaviour of vsnprintf().

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-2-rf@opensource.cirrus.com
---
 lib/kstrtox.c  | 13 ++++++--
 lib/kstrtox.h  |  2 ++
 lib/vsprintf.c | 82 +++++++++++++++++++++++++++++---------------------
 3 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index a118b0b1e9b2..0b5fe8b41173 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -39,20 +39,22 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
 
 /*
  * Convert non-negative integer string representation in explicitly given radix
- * to an integer.
+ * to an integer. A maximum of max_chars characters will be converted.
+ *
  * Return number of characters consumed maybe or-ed with overflow bit.
  * If overflow occurs, result integer (incorrect) is still returned.
  *
  * Don't you dare use this function.
  */
-unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
+				  size_t max_chars)
 {
 	unsigned long long res;
 	unsigned int rv;
 
 	res = 0;
 	rv = 0;
-	while (1) {
+	while (max_chars--) {
 		unsigned int c = *s;
 		unsigned int lc = c | 0x20; /* don't tolower() this line */
 		unsigned int val;
@@ -82,6 +84,11 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 	return rv;
 }
 
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+	return _parse_integer_limit(s, base, p, INT_MAX);
+}
+
 static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
 	unsigned long long _res;
diff --git a/lib/kstrtox.h b/lib/kstrtox.h
index 3b4637bcd254..158c400ca865 100644
--- a/lib/kstrtox.h
+++ b/lib/kstrtox.h
@@ -4,6 +4,8 @@
 
 #define KSTRTOX_OVERFLOW	(1U << 31)
 const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *res,
+				  size_t max_chars);
 unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
 
 #endif
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index af307588ad8b..3290eca46e47 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -53,6 +53,31 @@
 #include <linux/string_helpers.h>
 #include "kstrtox.h"
 
+static unsigned long long simple_strntoull(const char *startp, size_t max_chars,
+					   char **endp, unsigned int base)
+{
+	const char *cp;
+	unsigned long long result = 0ULL;
+	size_t prefix_chars;
+	unsigned int rv;
+
+	cp = _parse_integer_fixup_radix(startp, &base);
+	prefix_chars = cp - startp;
+	if (prefix_chars < max_chars) {
+		rv = _parse_integer_limit(cp, base, &result, max_chars - prefix_chars);
+		/* FIXME */
+		cp += (rv & ~KSTRTOX_OVERFLOW);
+	} else {
+		/* Field too short for prefix + digit, skip over without converting */
+		cp = startp + max_chars;
+	}
+
+	if (endp)
+		*endp = (char *)cp;
+
+	return result;
+}
+
 /**
  * simple_strtoull - convert a string to an unsigned long long
  * @cp: The start of the string
@@ -63,18 +88,7 @@
  */
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
-	unsigned long long result;
-	unsigned int rv;
-
-	cp = _parse_integer_fixup_radix(cp, &base);
-	rv = _parse_integer(cp, base, &result);
-	/* FIXME */
-	cp += (rv & ~KSTRTOX_OVERFLOW);
-
-	if (endp)
-		*endp = (char *)cp;
-
-	return result;
+	return simple_strntoull(cp, INT_MAX, endp, base);
 }
 EXPORT_SYMBOL(simple_strtoull);
 
@@ -109,6 +123,21 @@ long simple_strtol(const char *cp, char **endp, unsigned int base)
 }
 EXPORT_SYMBOL(simple_strtol);
 
+static long long simple_strntoll(const char *cp, size_t max_chars, char **endp,
+				 unsigned int base)
+{
+	/*
+	 * simple_strntoull() safely handles receiving max_chars==0 in the
+	 * case cp[0] == '-' && max_chars == 1.
+	 * If max_chars == 0 we can drop through and pass it to simple_strntoull()
+	 * and the content of *cp is irrelevant.
+	 */
+	if (*cp == '-' && max_chars > 0)
+		return -simple_strntoull(cp + 1, max_chars - 1, endp, base);
+
+	return simple_strntoull(cp, max_chars, endp, base);
+}
+
 /**
  * simple_strtoll - convert a string to a signed long long
  * @cp: The start of the string
@@ -119,10 +148,7 @@ EXPORT_SYMBOL(simple_strtol);
  */
 long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 {
-	if (*cp == '-')
-		return -simple_strtoull(cp + 1, endp, base);
-
-	return simple_strtoull(cp, endp, base);
+	return simple_strntoll(cp, INT_MAX, endp, base);
 }
 EXPORT_SYMBOL(simple_strtoll);
 
@@ -3541,25 +3567,13 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 			break;
 
 		if (is_sign)
-			val.s = qualifier != 'L' ?
-				simple_strtol(str, &next, base) :
-				simple_strtoll(str, &next, base);
+			val.s = simple_strntoll(str,
+						field_width >= 0 ? field_width : INT_MAX,
+						&next, base);
 		else
-			val.u = qualifier != 'L' ?
-				simple_strtoul(str, &next, base) :
-				simple_strtoull(str, &next, base);
-
-		if (field_width > 0 && next - str > field_width) {
-			if (base == 0)
-				_parse_integer_fixup_radix(str, &base);
-			while (next - str > field_width) {
-				if (is_sign)
-					val.s = div_s64(val.s, base);
-				else
-					val.u = div_u64(val.u, base);
-				--next;
-			}
-		}
+			val.u = simple_strntoull(str,
+						 field_width >= 0 ? field_width : INT_MAX,
+						 &next, base);
 
 		switch (qualifier) {
 		case 'H':	/* that's 'hh' in format */

From 50f530e176eac808e64416732e54c0686ce2c39b Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:05 +0100
Subject: [PATCH 0303/3292] lib: test_scanf: Add tests for sscanf number
 conversion

Adds test_sscanf to test various number conversion cases, as
number conversion was previously broken.

This also tests the simple_strtoxxx() functions exported from
vsprintf.c.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-3-rf@opensource.cirrus.com
---
 MAINTAINERS       |   1 +
 lib/Kconfig.debug |   3 +
 lib/Makefile      |   1 +
 lib/test_scanf.c  | 751 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 756 insertions(+)
 create mode 100644 lib/test_scanf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 04e7de8c95be..eb05132f8ff1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19395,6 +19395,7 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk.git
 F:	Documentation/core-api/printk-formats.rst
 F:	lib/test_printf.c
+F:	lib/test_scanf.c
 F:	lib/vsprintf.c
 
 VT1211 HARDWARE MONITOR DRIVER
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2c7f46b366f1..dc437a171c0f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2163,6 +2163,9 @@ config TEST_KSTRTOX
 config TEST_PRINTF
 	tristate "Test printf() family of functions at runtime"
 
+config TEST_SCANF
+	tristate "Test scanf() family of functions at runtime"
+
 config TEST_BITMAP
 	tristate "Test bitmap_*() family of functions at runtime"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index e11cfc18b6c0..5f7f246241b1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
+obj-$(CONFIG_TEST_SCANF) += test_scanf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
new file mode 100644
index 000000000000..8d577aec6c28
--- /dev/null
+++ b/lib/test_scanf.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test cases for sscanf facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "../tools/testing/selftests/kselftest_module.h"
+
+#define BUF_SIZE 1024
+
+KSTM_MODULE_GLOBALS();
+static char *test_buffer __initdata;
+static char *fmt_buffer __initdata;
+static struct rnd_state rnd_state __initdata;
+
+typedef int (*check_fn)(const void *check_data, const char *string,
+			const char *fmt, int n_args, va_list ap);
+
+static void __scanf(4, 6) __init
+_test(check_fn fn, const void *check_data, const char *string, const char *fmt,
+	int n_args, ...)
+{
+	va_list ap, ap_copy;
+	int ret;
+
+	total_tests++;
+
+	va_start(ap, n_args);
+	va_copy(ap_copy, ap);
+	ret = vsscanf(string, fmt, ap_copy);
+	va_end(ap_copy);
+
+	if (ret != n_args) {
+		pr_warn("vsscanf(\"%s\", \"%s\", ...) returned %d expected %d\n",
+			string, fmt, ret, n_args);
+		goto fail;
+	}
+
+	ret = (*fn)(check_data, string, fmt, n_args, ap);
+	if (ret)
+		goto fail;
+
+	va_end(ap);
+
+	return;
+
+fail:
+	failed_tests++;
+	va_end(ap);
+}
+
+#define _check_numbers_template(arg_fmt, expect, str, fmt, n_args, ap)		\
+do {										\
+	pr_debug("\"%s\", \"%s\" ->\n", str, fmt);				\
+	for (; n_args > 0; n_args--, expect++) {				\
+		typeof(*expect) got = *va_arg(ap, typeof(expect));		\
+		pr_debug("\t" arg_fmt "\n", got);				\
+		if (got != *expect) {						\
+			pr_warn("vsscanf(\"%s\", \"%s\", ...) expected " arg_fmt " got " arg_fmt "\n", \
+				str, fmt, *expect, got);			\
+			return 1;						\
+		}								\
+	}									\
+	return 0;								\
+} while (0)
+
+static int __init check_ull(const void *check_data, const char *string,
+			    const char *fmt, int n_args, va_list ap)
+{
+	const unsigned long long *pval = check_data;
+
+	_check_numbers_template("%llu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_ll(const void *check_data, const char *string,
+			   const char *fmt, int n_args, va_list ap)
+{
+	const long long *pval = check_data;
+
+	_check_numbers_template("%lld", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_ulong(const void *check_data, const char *string,
+			   const char *fmt, int n_args, va_list ap)
+{
+	const unsigned long *pval = check_data;
+
+	_check_numbers_template("%lu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_long(const void *check_data, const char *string,
+			  const char *fmt, int n_args, va_list ap)
+{
+	const long *pval = check_data;
+
+	_check_numbers_template("%ld", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_uint(const void *check_data, const char *string,
+			     const char *fmt, int n_args, va_list ap)
+{
+	const unsigned int *pval = check_data;
+
+	_check_numbers_template("%u", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_int(const void *check_data, const char *string,
+			    const char *fmt, int n_args, va_list ap)
+{
+	const int *pval = check_data;
+
+	_check_numbers_template("%d", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_ushort(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const unsigned short *pval = check_data;
+
+	_check_numbers_template("%hu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_short(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const short *pval = check_data;
+
+	_check_numbers_template("%hd", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_uchar(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const unsigned char *pval = check_data;
+
+	_check_numbers_template("%hhu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_char(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const signed char *pval = check_data;
+
+	_check_numbers_template("%hhd", pval, string, fmt, n_args, ap);
+}
+
+/* Selection of interesting numbers to test, copied from test-kstrtox.c */
+static const unsigned long long numbers[] __initconst = {
+	0x0ULL,
+	0x1ULL,
+	0x7fULL,
+	0x80ULL,
+	0x81ULL,
+	0xffULL,
+	0x100ULL,
+	0x101ULL,
+	0x7fffULL,
+	0x8000ULL,
+	0x8001ULL,
+	0xffffULL,
+	0x10000ULL,
+	0x10001ULL,
+	0x7fffffffULL,
+	0x80000000ULL,
+	0x80000001ULL,
+	0xffffffffULL,
+	0x100000000ULL,
+	0x100000001ULL,
+	0x7fffffffffffffffULL,
+	0x8000000000000000ULL,
+	0x8000000000000001ULL,
+	0xfffffffffffffffeULL,
+	0xffffffffffffffffULL,
+};
+
+#define value_representable_in_type(T, val)					 \
+(is_signed_type(T)								 \
+	? ((long long)(val) >= type_min(T)) && ((long long)(val) <= type_max(T)) \
+	: ((unsigned long long)(val) >= type_min(T)) &&				 \
+	  ((unsigned long long)(val) <= type_max(T)))
+
+#define test_one_number(T, gen_fmt, scan_fmt, val, fn)			\
+do {									\
+	const T expect_val = (T)(val);					\
+	T result = ~expect_val; /* should be overwritten */		\
+									\
+	snprintf(test_buffer, BUF_SIZE, gen_fmt, expect_val);		\
+	_test(fn, &expect_val, test_buffer, "%" scan_fmt, 1, &result);	\
+} while (0)
+
+#define simple_numbers_loop(T, gen_fmt, scan_fmt, fn)			\
+do {									\
+	int i;								\
+									\
+	for (i = 0; i < ARRAY_SIZE(numbers); i++) {			\
+		if (!value_representable_in_type(T, numbers[i]))	\
+			continue;					\
+									\
+		test_one_number(T, gen_fmt, scan_fmt, numbers[i], fn);	\
+									\
+		if (is_signed_type(T))					\
+			test_one_number(T, gen_fmt, scan_fmt,		\
+					-numbers[i], fn);		\
+	}								\
+} while (0)
+
+static void __init numbers_simple(void)
+{
+	simple_numbers_loop(unsigned long long,	"%llu",	  "llu", check_ull);
+	simple_numbers_loop(long long,		"%lld",	  "lld", check_ll);
+	simple_numbers_loop(long long,		"%lld",	  "lli", check_ll);
+	simple_numbers_loop(unsigned long long,	"%llx",	  "llx", check_ull);
+	simple_numbers_loop(long long,		"%llx",	  "llx", check_ll);
+	simple_numbers_loop(long long,		"0x%llx", "lli", check_ll);
+	simple_numbers_loop(unsigned long long, "0x%llx", "llx", check_ull);
+	simple_numbers_loop(long long,		"0x%llx", "llx", check_ll);
+
+	simple_numbers_loop(unsigned long,	"%lu",	  "lu", check_ulong);
+	simple_numbers_loop(long,		"%ld",	  "ld", check_long);
+	simple_numbers_loop(long,		"%ld",	  "li", check_long);
+	simple_numbers_loop(unsigned long,	"%lx",	  "lx", check_ulong);
+	simple_numbers_loop(long,		"%lx",	  "lx", check_long);
+	simple_numbers_loop(long,		"0x%lx",  "li", check_long);
+	simple_numbers_loop(unsigned long,	"0x%lx",  "lx", check_ulong);
+	simple_numbers_loop(long,		"0x%lx",  "lx", check_long);
+
+	simple_numbers_loop(unsigned int,	"%u",	  "u", check_uint);
+	simple_numbers_loop(int,		"%d",	  "d", check_int);
+	simple_numbers_loop(int,		"%d",	  "i", check_int);
+	simple_numbers_loop(unsigned int,	"%x",	  "x", check_uint);
+	simple_numbers_loop(int,		"%x",	  "x", check_int);
+	simple_numbers_loop(int,		"0x%x",   "i", check_int);
+	simple_numbers_loop(unsigned int,	"0x%x",   "x", check_uint);
+	simple_numbers_loop(int,		"0x%x",   "x", check_int);
+
+	simple_numbers_loop(unsigned short,	"%hu",	  "hu", check_ushort);
+	simple_numbers_loop(short,		"%hd",	  "hd", check_short);
+	simple_numbers_loop(short,		"%hd",	  "hi", check_short);
+	simple_numbers_loop(unsigned short,	"%hx",	  "hx", check_ushort);
+	simple_numbers_loop(short,		"%hx",	  "hx", check_short);
+	simple_numbers_loop(short,		"0x%hx",  "hi", check_short);
+	simple_numbers_loop(unsigned short,	"0x%hx",  "hx", check_ushort);
+	simple_numbers_loop(short,		"0x%hx",  "hx", check_short);
+
+	simple_numbers_loop(unsigned char,	"%hhu",	  "hhu", check_uchar);
+	simple_numbers_loop(signed char,	"%hhd",	  "hhd", check_char);
+	simple_numbers_loop(signed char,	"%hhd",	  "hhi", check_char);
+	simple_numbers_loop(unsigned char,	"%hhx",	  "hhx", check_uchar);
+	simple_numbers_loop(signed char,	"%hhx",	  "hhx", check_char);
+	simple_numbers_loop(signed char,	"0x%hhx", "hhi", check_char);
+	simple_numbers_loop(unsigned char,	"0x%hhx", "hhx", check_uchar);
+	simple_numbers_loop(signed char,	"0x%hhx", "hhx", check_char);
+}
+
+/*
+ * This gives a better variety of number "lengths" in a small sample than
+ * the raw prandom*() functions (Not mathematically rigorous!!).
+ * Variabilty of length and value is more important than perfect randomness.
+ */
+static u32 __init next_test_random(u32 max_bits)
+{
+	u32 n_bits = hweight32(prandom_u32_state(&rnd_state)) % (max_bits + 1);
+
+	return prandom_u32_state(&rnd_state) & (UINT_MAX >> (32 - n_bits));
+}
+
+static unsigned long long __init next_test_random_ull(void)
+{
+	u32 rand1 = prandom_u32_state(&rnd_state);
+	u32 n_bits = (hweight32(rand1) * 3) % 64;
+	u64 val = (u64)prandom_u32_state(&rnd_state) * rand1;
+
+	return val & (ULLONG_MAX >> (64 - n_bits));
+}
+
+#define random_for_type(T)				\
+	((T)(sizeof(T) <= sizeof(u32)			\
+		? next_test_random(BITS_PER_TYPE(T))	\
+		: next_test_random_ull()))
+
+/*
+ * Define a pattern of negative and positive numbers to ensure we get
+ * some of both within the small number of samples in a test string.
+ */
+#define NEGATIVES_PATTERN 0x3246	/* 00110010 01000110 */
+
+#define fill_random_array(arr)							\
+do {										\
+	unsigned int neg_pattern = NEGATIVES_PATTERN;				\
+	int i;									\
+										\
+	for (i = 0; i < ARRAY_SIZE(arr); i++, neg_pattern >>= 1) {		\
+		(arr)[i] = random_for_type(typeof((arr)[0]));			\
+		if (is_signed_type(typeof((arr)[0])) && (neg_pattern & 1))	\
+			(arr)[i] = -(arr)[i];					\
+	}									\
+} while (0)
+
+/*
+ * Convenience wrapper around snprintf() to append at buf_pos in buf,
+ * updating buf_pos and returning the number of characters appended.
+ * On error buf_pos is not changed and return value is 0.
+ */
+static int __init __printf(4, 5)
+append_fmt(char *buf, int *buf_pos, int buf_len, const char *val_fmt, ...)
+{
+	va_list ap;
+	int field_len;
+
+	va_start(ap, val_fmt);
+	field_len = vsnprintf(buf + *buf_pos, buf_len - *buf_pos, val_fmt, ap);
+	va_end(ap);
+
+	if (field_len < 0)
+		field_len = 0;
+
+	*buf_pos += field_len;
+
+	return field_len;
+}
+
+/*
+ * Convenience function to append the field delimiter string
+ * to both the value string and format string buffers.
+ */
+static void __init append_delim(char *str_buf, int *str_buf_pos, int str_buf_len,
+				char *fmt_buf, int *fmt_buf_pos, int fmt_buf_len,
+				const char *delim_str)
+{
+	append_fmt(str_buf, str_buf_pos, str_buf_len, delim_str);
+	append_fmt(fmt_buf, fmt_buf_pos, fmt_buf_len, delim_str);
+}
+
+#define test_array_8(fn, check_data, string, fmt, arr)				\
+do {										\
+	BUILD_BUG_ON(ARRAY_SIZE(arr) != 8);					\
+	_test(fn, check_data, string, fmt, 8,					\
+		&(arr)[0], &(arr)[1], &(arr)[2], &(arr)[3],			\
+		&(arr)[4], &(arr)[5], &(arr)[6], &(arr)[7]);			\
+} while (0)
+
+#define numbers_list_8(T, gen_fmt, field_sep, scan_fmt, fn)			\
+do {										\
+	int i, pos = 0, fmt_pos = 0;						\
+	T expect[8], result[8];							\
+										\
+	fill_random_array(expect);						\
+										\
+	for (i = 0; i < ARRAY_SIZE(expect); i++) {				\
+		if (i != 0)							\
+			append_delim(test_buffer, &pos, BUF_SIZE,		\
+				     fmt_buffer, &fmt_pos, BUF_SIZE,		\
+				     field_sep);				\
+										\
+		append_fmt(test_buffer, &pos, BUF_SIZE, gen_fmt, expect[i]);	\
+		append_fmt(fmt_buffer, &fmt_pos, BUF_SIZE, "%%%s", scan_fmt);	\
+	}									\
+										\
+	test_array_8(fn, expect, test_buffer, fmt_buffer, result);		\
+} while (0)
+
+#define numbers_list_fix_width(T, gen_fmt, field_sep, width, scan_fmt, fn)	\
+do {										\
+	char full_fmt[16];							\
+										\
+	snprintf(full_fmt, sizeof(full_fmt), "%u%s", width, scan_fmt);		\
+	numbers_list_8(T, gen_fmt, field_sep, full_fmt, fn);			\
+} while (0)
+
+#define numbers_list_val_width(T, gen_fmt, field_sep, scan_fmt, fn)		\
+do {										\
+	int i, val_len, pos = 0, fmt_pos = 0;					\
+	T expect[8], result[8];							\
+										\
+	fill_random_array(expect);						\
+										\
+	for (i = 0; i < ARRAY_SIZE(expect); i++) {				\
+		if (i != 0)							\
+			append_delim(test_buffer, &pos, BUF_SIZE,		\
+				     fmt_buffer, &fmt_pos, BUF_SIZE, field_sep);\
+										\
+		val_len = append_fmt(test_buffer, &pos, BUF_SIZE, gen_fmt,	\
+				     expect[i]);				\
+		append_fmt(fmt_buffer, &fmt_pos, BUF_SIZE,			\
+			   "%%%u%s", val_len, scan_fmt);			\
+	}									\
+										\
+	test_array_8(fn, expect, test_buffer, fmt_buffer, result);		\
+} while (0)
+
+static void __init numbers_list(const char *delim)
+{
+	numbers_list_8(unsigned long long, "%llu",   delim, "llu", check_ull);
+	numbers_list_8(long long,	   "%lld",   delim, "lld", check_ll);
+	numbers_list_8(long long,	   "%lld",   delim, "lli", check_ll);
+	numbers_list_8(unsigned long long, "%llx",   delim, "llx", check_ull);
+	numbers_list_8(unsigned long long, "0x%llx", delim, "llx", check_ull);
+	numbers_list_8(long long,	   "0x%llx", delim, "lli", check_ll);
+
+	numbers_list_8(unsigned long,	   "%lu",    delim, "lu", check_ulong);
+	numbers_list_8(long,		   "%ld",    delim, "ld", check_long);
+	numbers_list_8(long,		   "%ld",    delim, "li", check_long);
+	numbers_list_8(unsigned long,	   "%lx",    delim, "lx", check_ulong);
+	numbers_list_8(unsigned long,	   "0x%lx",  delim, "lx", check_ulong);
+	numbers_list_8(long,		   "0x%lx",  delim, "li", check_long);
+
+	numbers_list_8(unsigned int,	   "%u",     delim, "u", check_uint);
+	numbers_list_8(int,		   "%d",     delim, "d", check_int);
+	numbers_list_8(int,		   "%d",     delim, "i", check_int);
+	numbers_list_8(unsigned int,	   "%x",     delim, "x", check_uint);
+	numbers_list_8(unsigned int,	   "0x%x",   delim, "x", check_uint);
+	numbers_list_8(int,		   "0x%x",   delim, "i", check_int);
+
+	numbers_list_8(unsigned short,	   "%hu",    delim, "hu", check_ushort);
+	numbers_list_8(short,		   "%hd",    delim, "hd", check_short);
+	numbers_list_8(short,		   "%hd",    delim, "hi", check_short);
+	numbers_list_8(unsigned short,	   "%hx",    delim, "hx", check_ushort);
+	numbers_list_8(unsigned short,	   "0x%hx",  delim, "hx", check_ushort);
+	numbers_list_8(short,		   "0x%hx",  delim, "hi", check_short);
+
+	numbers_list_8(unsigned char,	   "%hhu",   delim, "hhu", check_uchar);
+	numbers_list_8(signed char,	   "%hhd",   delim, "hhd", check_char);
+	numbers_list_8(signed char,	   "%hhd",   delim, "hhi", check_char);
+	numbers_list_8(unsigned char,	   "%hhx",   delim, "hhx", check_uchar);
+	numbers_list_8(unsigned char,	   "0x%hhx", delim, "hhx", check_uchar);
+	numbers_list_8(signed char,	   "0x%hhx", delim, "hhi", check_char);
+}
+
+/*
+ * List of numbers separated by delim. Each field width specifier is the
+ * maximum possible digits for the given type and base.
+ */
+static void __init numbers_list_field_width_typemax(const char *delim)
+{
+	numbers_list_fix_width(unsigned long long, "%llu",   delim, 20, "llu", check_ull);
+	numbers_list_fix_width(long long,	   "%lld",   delim, 20, "lld", check_ll);
+	numbers_list_fix_width(long long,	   "%lld",   delim, 20, "lli", check_ll);
+	numbers_list_fix_width(unsigned long long, "%llx",   delim, 16, "llx", check_ull);
+	numbers_list_fix_width(unsigned long long, "0x%llx", delim, 18, "llx", check_ull);
+	numbers_list_fix_width(long long,	   "0x%llx", delim, 18, "lli", check_ll);
+
+#if BITS_PER_LONG == 64
+	numbers_list_fix_width(unsigned long,	"%lu",	     delim, 20, "lu", check_ulong);
+	numbers_list_fix_width(long,		"%ld",	     delim, 20, "ld", check_long);
+	numbers_list_fix_width(long,		"%ld",	     delim, 20, "li", check_long);
+	numbers_list_fix_width(unsigned long,	"%lx",	     delim, 16, "lx", check_ulong);
+	numbers_list_fix_width(unsigned long,	"0x%lx",     delim, 18, "lx", check_ulong);
+	numbers_list_fix_width(long,		"0x%lx",     delim, 18, "li", check_long);
+#else
+	numbers_list_fix_width(unsigned long,	"%lu",	     delim, 10, "lu", check_ulong);
+	numbers_list_fix_width(long,		"%ld",	     delim, 11, "ld", check_long);
+	numbers_list_fix_width(long,		"%ld",	     delim, 11, "li", check_long);
+	numbers_list_fix_width(unsigned long,	"%lx",	     delim, 8,  "lx", check_ulong);
+	numbers_list_fix_width(unsigned long,	"0x%lx",     delim, 10, "lx", check_ulong);
+	numbers_list_fix_width(long,		"0x%lx",     delim, 10, "li", check_long);
+#endif
+
+	numbers_list_fix_width(unsigned int,	"%u",	     delim, 10, "u", check_uint);
+	numbers_list_fix_width(int,		"%d",	     delim, 11, "d", check_int);
+	numbers_list_fix_width(int,		"%d",	     delim, 11, "i", check_int);
+	numbers_list_fix_width(unsigned int,	"%x",	     delim, 8,  "x", check_uint);
+	numbers_list_fix_width(unsigned int,	"0x%x",	     delim, 10, "x", check_uint);
+	numbers_list_fix_width(int,		"0x%x",	     delim, 10, "i", check_int);
+
+	numbers_list_fix_width(unsigned short,	"%hu",	     delim, 5, "hu", check_ushort);
+	numbers_list_fix_width(short,		"%hd",	     delim, 6, "hd", check_short);
+	numbers_list_fix_width(short,		"%hd",	     delim, 6, "hi", check_short);
+	numbers_list_fix_width(unsigned short,	"%hx",	     delim, 4, "hx", check_ushort);
+	numbers_list_fix_width(unsigned short,	"0x%hx",     delim, 6, "hx", check_ushort);
+	numbers_list_fix_width(short,		"0x%hx",     delim, 6, "hi", check_short);
+
+	numbers_list_fix_width(unsigned char,	"%hhu",	     delim, 3, "hhu", check_uchar);
+	numbers_list_fix_width(signed char,	"%hhd",	     delim, 4, "hhd", check_char);
+	numbers_list_fix_width(signed char,	"%hhd",	     delim, 4, "hhi", check_char);
+	numbers_list_fix_width(unsigned char,	"%hhx",	     delim, 2, "hhx", check_uchar);
+	numbers_list_fix_width(unsigned char,	"0x%hhx",    delim, 4, "hhx", check_uchar);
+	numbers_list_fix_width(signed char,	"0x%hhx",    delim, 4, "hhi", check_char);
+}
+
+/*
+ * List of numbers separated by delim. Each field width specifier is the
+ * exact length of the corresponding value digits in the string being scanned.
+ */
+static void __init numbers_list_field_width_val_width(const char *delim)
+{
+	numbers_list_val_width(unsigned long long, "%llu",   delim, "llu", check_ull);
+	numbers_list_val_width(long long,	   "%lld",   delim, "lld", check_ll);
+	numbers_list_val_width(long long,	   "%lld",   delim, "lli", check_ll);
+	numbers_list_val_width(unsigned long long, "%llx",   delim, "llx", check_ull);
+	numbers_list_val_width(unsigned long long, "0x%llx", delim, "llx", check_ull);
+	numbers_list_val_width(long long,	   "0x%llx", delim, "lli", check_ll);
+
+	numbers_list_val_width(unsigned long,	"%lu",	     delim, "lu", check_ulong);
+	numbers_list_val_width(long,		"%ld",	     delim, "ld", check_long);
+	numbers_list_val_width(long,		"%ld",	     delim, "li", check_long);
+	numbers_list_val_width(unsigned long,	"%lx",	     delim, "lx", check_ulong);
+	numbers_list_val_width(unsigned long,	"0x%lx",     delim, "lx", check_ulong);
+	numbers_list_val_width(long,		"0x%lx",     delim, "li", check_long);
+
+	numbers_list_val_width(unsigned int,	"%u",	     delim, "u", check_uint);
+	numbers_list_val_width(int,		"%d",	     delim, "d", check_int);
+	numbers_list_val_width(int,		"%d",	     delim, "i", check_int);
+	numbers_list_val_width(unsigned int,	"%x",	     delim, "x", check_uint);
+	numbers_list_val_width(unsigned int,	"0x%x",	     delim, "x", check_uint);
+	numbers_list_val_width(int,		"0x%x",	     delim, "i", check_int);
+
+	numbers_list_val_width(unsigned short,	"%hu",	     delim, "hu", check_ushort);
+	numbers_list_val_width(short,		"%hd",	     delim, "hd", check_short);
+	numbers_list_val_width(short,		"%hd",	     delim, "hi", check_short);
+	numbers_list_val_width(unsigned short,	"%hx",	     delim, "hx", check_ushort);
+	numbers_list_val_width(unsigned short,	"0x%hx",     delim, "hx", check_ushort);
+	numbers_list_val_width(short,		"0x%hx",     delim, "hi", check_short);
+
+	numbers_list_val_width(unsigned char,	"%hhu",	     delim, "hhu", check_uchar);
+	numbers_list_val_width(signed char,	"%hhd",	     delim, "hhd", check_char);
+	numbers_list_val_width(signed char,	"%hhd",	     delim, "hhi", check_char);
+	numbers_list_val_width(unsigned char,	"%hhx",	     delim, "hhx", check_uchar);
+	numbers_list_val_width(unsigned char,	"0x%hhx",    delim, "hhx", check_uchar);
+	numbers_list_val_width(signed char,	"0x%hhx",    delim, "hhi", check_char);
+}
+
+/*
+ * Slice a continuous string of digits without field delimiters, containing
+ * numbers of varying length, using the field width to extract each group
+ * of digits. For example the hex values c0,3,bf01,303 would have a
+ * string representation of "c03bf01303" and extracted with "%2x%1x%4x%3x".
+ */
+static void __init numbers_slice(void)
+{
+	numbers_list_field_width_val_width("");
+}
+
+#define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn)	\
+do {										\
+	const T expect[2] = { expect0, expect1 };				\
+	T result[2] = {~expect[0], ~expect[1]};					\
+										\
+	_test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]);	\
+} while (0)
+
+/*
+ * Number prefix is >= field width.
+ * Expected behaviour is derived from testing userland sscanf.
+ */
+static void __init numbers_prefix_overflow(void)
+{
+	/*
+	 * Negative decimal with a field of width 1, should quit scanning
+	 * and return 0.
+	 */
+	test_number_prefix(long long,	"-1 1", "%1lld %lld",	0, 0, 0, check_ll);
+	test_number_prefix(long,	"-1 1", "%1ld %ld",	0, 0, 0, check_long);
+	test_number_prefix(int,		"-1 1", "%1d %d",	0, 0, 0, check_int);
+	test_number_prefix(short,	"-1 1", "%1hd %hd",	0, 0, 0, check_short);
+	test_number_prefix(signed char,	"-1 1", "%1hhd %hhd",	0, 0, 0, check_char);
+
+	test_number_prefix(long long,	"-1 1", "%1lli %lli",	0, 0, 0, check_ll);
+	test_number_prefix(long,	"-1 1", "%1li %li",	0, 0, 0, check_long);
+	test_number_prefix(int,		"-1 1", "%1i %i",	0, 0, 0, check_int);
+	test_number_prefix(short,	"-1 1", "%1hi %hi",	0, 0, 0, check_short);
+	test_number_prefix(signed char,	"-1 1", "%1hhi %hhi",	0, 0, 0, check_char);
+
+	/*
+	 * 0x prefix in a field of width 1: 0 is a valid digit so should
+	 * convert. Next field scan starts at the 'x' which isn't a digit so
+	 * scan quits with one field converted.
+	 */
+	test_number_prefix(unsigned long long,	"0xA7", "%1llx%llx", 0, 0, 1, check_ull);
+	test_number_prefix(unsigned long,	"0xA7", "%1lx%lx",   0, 0, 1, check_ulong);
+	test_number_prefix(unsigned int,	"0xA7", "%1x%x",     0, 0, 1, check_uint);
+	test_number_prefix(unsigned short,	"0xA7", "%1hx%hx",   0, 0, 1, check_ushort);
+	test_number_prefix(unsigned char,	"0xA7", "%1hhx%hhx", 0, 0, 1, check_uchar);
+	test_number_prefix(long long,		"0xA7", "%1lli%llx", 0, 0, 1, check_ll);
+	test_number_prefix(long,		"0xA7", "%1li%lx",   0, 0, 1, check_long);
+	test_number_prefix(int,			"0xA7", "%1i%x",     0, 0, 1, check_int);
+	test_number_prefix(short,		"0xA7", "%1hi%hx",   0, 0, 1, check_short);
+	test_number_prefix(char,		"0xA7", "%1hhi%hhx", 0, 0, 1, check_char);
+
+	/*
+	 * 0x prefix in a field of width 2 using %x conversion: first field
+	 * converts to 0. Next field scan starts at the character after "0x".
+	 * Both fields will convert.
+	 */
+	test_number_prefix(unsigned long long,	"0xA7", "%2llx%llx", 0, 0xa7, 2, check_ull);
+	test_number_prefix(unsigned long,	"0xA7", "%2lx%lx",   0, 0xa7, 2, check_ulong);
+	test_number_prefix(unsigned int,	"0xA7", "%2x%x",     0, 0xa7, 2, check_uint);
+	test_number_prefix(unsigned short,	"0xA7", "%2hx%hx",   0, 0xa7, 2, check_ushort);
+	test_number_prefix(unsigned char,	"0xA7", "%2hhx%hhx", 0, 0xa7, 2, check_uchar);
+
+	/*
+	 * 0x prefix in a field of width 2 using %i conversion: first field
+	 * converts to 0. Next field scan starts at the character after "0x",
+	 * which will convert if can be intepreted as decimal but will fail
+	 * if it contains any hex digits (since no 0x prefix).
+	 */
+	test_number_prefix(long long,	"0x67", "%2lli%lli", 0, 67, 2, check_ll);
+	test_number_prefix(long,	"0x67", "%2li%li",   0, 67, 2, check_long);
+	test_number_prefix(int,		"0x67", "%2i%i",     0, 67, 2, check_int);
+	test_number_prefix(short,	"0x67", "%2hi%hi",   0, 67, 2, check_short);
+	test_number_prefix(char,	"0x67", "%2hhi%hhi", 0, 67, 2, check_char);
+
+	test_number_prefix(long long,	"0xA7", "%2lli%lli", 0, 0,  1, check_ll);
+	test_number_prefix(long,	"0xA7", "%2li%li",   0, 0,  1, check_long);
+	test_number_prefix(int,		"0xA7", "%2i%i",     0, 0,  1, check_int);
+	test_number_prefix(short,	"0xA7", "%2hi%hi",   0, 0,  1, check_short);
+	test_number_prefix(char,	"0xA7", "%2hhi%hhi", 0, 0,  1, check_char);
+}
+
+#define _test_simple_strtoxx(T, fn, gen_fmt, expect, base)			\
+do {										\
+	T got;									\
+	char *endp;								\
+	int len;								\
+	bool fail = false;							\
+										\
+	total_tests++;								\
+	len = snprintf(test_buffer, BUF_SIZE, gen_fmt, expect);			\
+	got = (fn)(test_buffer, &endp, base);					\
+	pr_debug(#fn "(\"%s\", %d) -> " gen_fmt "\n", test_buffer, base, got);	\
+	if (got != (expect)) {							\
+		fail = true;							\
+		pr_warn(#fn "(\"%s\", %d): got " gen_fmt " expected " gen_fmt "\n", \
+			test_buffer, base, got, expect);			\
+	} else if (endp != test_buffer + len) {					\
+		fail = true;							\
+		pr_warn(#fn "(\"%s\", %d) startp=0x%px got endp=0x%px expected 0x%px\n", \
+			test_buffer, base, test_buffer,				\
+			test_buffer + len, endp);				\
+	}									\
+										\
+	if (fail)								\
+		failed_tests++;							\
+} while (0)
+
+#define test_simple_strtoxx(T, fn, gen_fmt, base)				\
+do {										\
+	int i;									\
+										\
+	for (i = 0; i < ARRAY_SIZE(numbers); i++) {				\
+		_test_simple_strtoxx(T, fn, gen_fmt, (T)numbers[i], base);	\
+										\
+		if (is_signed_type(T))						\
+			_test_simple_strtoxx(T, fn, gen_fmt,			\
+					      -(T)numbers[i], base);		\
+	}									\
+} while (0)
+
+static void __init test_simple_strtoull(void)
+{
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "%llu",   10);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "%llu",   0);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "%llx",   16);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "0x%llx", 16);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "0x%llx", 0);
+}
+
+static void __init test_simple_strtoll(void)
+{
+	test_simple_strtoxx(long long, simple_strtoll, "%lld",	 10);
+	test_simple_strtoxx(long long, simple_strtoll, "%lld",	 0);
+	test_simple_strtoxx(long long, simple_strtoll, "%llx",	 16);
+	test_simple_strtoxx(long long, simple_strtoll, "0x%llx", 16);
+	test_simple_strtoxx(long long, simple_strtoll, "0x%llx", 0);
+}
+
+static void __init test_simple_strtoul(void)
+{
+	test_simple_strtoxx(unsigned long, simple_strtoul, "%lu",   10);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "%lu",   0);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "%lx",   16);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "0x%lx", 16);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "0x%lx", 0);
+}
+
+static void __init test_simple_strtol(void)
+{
+	test_simple_strtoxx(long, simple_strtol, "%ld",   10);
+	test_simple_strtoxx(long, simple_strtol, "%ld",   0);
+	test_simple_strtoxx(long, simple_strtol, "%lx",   16);
+	test_simple_strtoxx(long, simple_strtol, "0x%lx", 16);
+	test_simple_strtoxx(long, simple_strtol, "0x%lx", 0);
+}
+
+/* Selection of common delimiters/separators between numbers in a string. */
+static const char * const number_delimiters[] __initconst = {
+	" ", ":", ",", "-", "/",
+};
+
+static void __init test_numbers(void)
+{
+	int i;
+
+	/* String containing only one number. */
+	numbers_simple();
+
+	/* String with multiple numbers separated by delimiter. */
+	for (i = 0; i < ARRAY_SIZE(number_delimiters); i++) {
+		numbers_list(number_delimiters[i]);
+
+		/* Field width may be longer than actual field digits. */
+		numbers_list_field_width_typemax(number_delimiters[i]);
+
+		/* Each field width exactly length of actual field digits. */
+		numbers_list_field_width_val_width(number_delimiters[i]);
+	}
+
+	/* Slice continuous sequence of digits using field widths. */
+	numbers_slice();
+
+	numbers_prefix_overflow();
+}
+
+static void __init selftest(void)
+{
+	test_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
+	if (!test_buffer)
+		return;
+
+	fmt_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
+	if (!fmt_buffer) {
+		kfree(test_buffer);
+		return;
+	}
+
+	prandom_seed_state(&rnd_state, 3141592653589793238ULL);
+
+	test_numbers();
+
+	test_simple_strtoull();
+	test_simple_strtoll();
+	test_simple_strtoul();
+	test_simple_strtol();
+
+	kfree(fmt_buffer);
+	kfree(test_buffer);
+}
+
+KSTM_MODULE_LOADERS(test_scanf);
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");

From ef04d4ff4b19628c78abddc768acce097d35d086 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:06 +0100
Subject: [PATCH 0304/3292] selftests: lib: Add wrapper script for test_scanf

Adds a wrapper shell script for the test_scanf module.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-4-rf@opensource.cirrus.com
---
 tools/testing/selftests/lib/Makefile | 2 +-
 tools/testing/selftests/lib/config   | 1 +
 tools/testing/selftests/lib/scanf.sh | 4 ++++
 3 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/lib/scanf.sh

diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e..ee71fc99d5b5 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,6 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265..645839b50b0a 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,4 +1,5 @@
 CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_TEST_STRSCPY=m
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
new file mode 100755
index 000000000000..b59b8ba561c3
--- /dev/null
+++ b/tools/testing/selftests/lib/scanf.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Tests the scanf infrastructure using test_scanf kernel module.
+$(dirname $0)/../kselftest/module.sh "scanf" test_scanf

From 5d67f349590ddc94b6d4e25f19085728db9de697 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 18 May 2021 18:40:32 -0700
Subject: [PATCH 0305/3292] bpf: Add cmd alias BPF_PROG_RUN

Add BPF_PROG_RUN command as an alias to BPF_RPOG_TEST_RUN to better
indicate the full range of use cases done by the command.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210519014032.20908-1-alexei.starovoitov@gmail.com
---
 include/uapi/linux/bpf.h       | 1 +
 tools/include/uapi/linux/bpf.h | 1 +
 tools/lib/bpf/skel_internal.h  | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4cd9a0181f27..418b9b813d65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -837,6 +837,7 @@ enum bpf_cmd {
 	BPF_PROG_ATTACH,
 	BPF_PROG_DETACH,
 	BPF_PROG_TEST_RUN,
+	BPF_PROG_RUN = BPF_PROG_TEST_RUN,
 	BPF_PROG_GET_NEXT_ID,
 	BPF_MAP_GET_NEXT_ID,
 	BPF_PROG_GET_FD_BY_ID,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4cd9a0181f27..418b9b813d65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -837,6 +837,7 @@ enum bpf_cmd {
 	BPF_PROG_ATTACH,
 	BPF_PROG_DETACH,
 	BPF_PROG_TEST_RUN,
+	BPF_PROG_RUN = BPF_PROG_TEST_RUN,
 	BPF_PROG_GET_NEXT_ID,
 	BPF_MAP_GET_NEXT_ID,
 	BPF_PROG_GET_FD_BY_ID,
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 12a126b452c1..b22b50c1b173 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -102,7 +102,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 	attr.test.prog_fd = prog_fd;
 	attr.test.ctx_in = (long) opts->ctx;
 	attr.test.ctx_size_in = opts->ctx->sz;
-	err = skel_sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr));
 	if (err < 0 || (int)attr.test.retval < 0) {
 		opts->errstr = "failed to execute loader prog";
 		if (err < 0)

From 3a2daa7248647c0e5e165140553f9af5006e93a2 Mon Sep 17 00:00:00 2001
From: Pu Lehui <pulehui@huawei.com>
Date: Wed, 19 May 2021 14:41:16 +0800
Subject: [PATCH 0306/3292] bpf: Make some symbols static

The sparse tool complains as follows:

kernel/bpf/syscall.c:4567:29: warning:
 symbol 'bpf_sys_bpf_proto' was not declared. Should it be static?
kernel/bpf/syscall.c:4592:29: warning:
 symbol 'bpf_sys_close_proto' was not declared. Should it be static?

This symbol is not used outside of syscall.c, so marks it static.

Signed-off-by: Pu Lehui <pulehui@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210519064116.240536-1-pulehui@huawei.com
---
 kernel/bpf/syscall.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0f1ce2171f1e..1d1cd80a6e67 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4563,7 +4563,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
 	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
 }
 
-const struct bpf_func_proto bpf_sys_bpf_proto = {
+static const struct bpf_func_proto bpf_sys_bpf_proto = {
 	.func		= bpf_sys_bpf,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
@@ -4588,7 +4588,7 @@ BPF_CALL_1(bpf_sys_close, u32, fd)
 	return close_fd(fd);
 }
 
-const struct bpf_func_proto bpf_sys_close_proto = {
+static const struct bpf_func_proto bpf_sys_close_proto = {
 	.func		= bpf_sys_close,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,

From 1756055de28412b6820b1221b1ab0092f6e780cb Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 19 May 2021 10:28:49 +0800
Subject: [PATCH 0307/3292] ibmveth: fix kobj_to_dev.cocci warnings

Use kobj_to_dev() instead of container_of()

Generated by: scripts/coccinelle/api/kobj_to_dev.cocci

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmveth.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 7fea9ae60f13..bc67a7ee872b 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1799,8 +1799,7 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr,
 	struct ibmveth_buff_pool *pool = container_of(kobj,
 						      struct ibmveth_buff_pool,
 						      kobj);
-	struct net_device *netdev = dev_get_drvdata(
-	    container_of(kobj->parent, struct device, kobj));
+	struct net_device *netdev = dev_get_drvdata(kobj_to_dev(kobj->parent));
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
 	long value = simple_strtol(buf, NULL, 10);
 	long rc;

From bc6d076daa8c66c79bdceda963fa66273103a276 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 19 May 2021 10:38:50 +0800
Subject: [PATCH 0308/3292] tun: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 84f832806313..2ced021f4faf 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2559,15 +2559,15 @@ static int tun_flags(struct tun_struct *tun)
 	return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP);
 }
 
-static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr,
+static ssize_t tun_flags_show(struct device *dev, struct device_attribute *attr,
 			      char *buf)
 {
 	struct tun_struct *tun = netdev_priv(to_net_dev(dev));
 	return sprintf(buf, "0x%x\n", tun_flags(tun));
 }
 
-static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr,
-			      char *buf)
+static ssize_t owner_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
 {
 	struct tun_struct *tun = netdev_priv(to_net_dev(dev));
 	return uid_valid(tun->owner)?
@@ -2576,8 +2576,8 @@ static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr,
 		sprintf(buf, "-1\n");
 }
 
-static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr,
-			      char *buf)
+static ssize_t group_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
 {
 	struct tun_struct *tun = netdev_priv(to_net_dev(dev));
 	return gid_valid(tun->group) ?
@@ -2586,9 +2586,9 @@ static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr,
 		sprintf(buf, "-1\n");
 }
 
-static DEVICE_ATTR(tun_flags, 0444, tun_show_flags, NULL);
-static DEVICE_ATTR(owner, 0444, tun_show_owner, NULL);
-static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
+static DEVICE_ATTR_RO(tun_flags);
+static DEVICE_ATTR_RO(owner);
+static DEVICE_ATTR_RO(group);
 
 static struct attribute *tun_dev_attrs[] = {
 	&dev_attr_tun_flags.attr,

From 959dc069aed8bad75e27eb193f55070b51a5afbc Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:34 +0800
Subject: [PATCH 0309/3292] net: 3com: remove leading spaces before tabs

There are a few leading space before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Steffen Klassert <klassert@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/3com/3c59x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 741c67e546d4..7d7d3ffe25c3 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -1464,7 +1464,7 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
 	if (pdev) {
 		vp->pm_state_valid = 1;
 		pci_save_state(pdev);
- 		acpi_set_WOL(dev);
+		acpi_set_WOL(dev);
 	}
 	retval = register_netdev(dev);
 	if (retval == 0)

From 40b1f92676f2af2bb808db62569156ec15e00c4a Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:35 +0800
Subject: [PATCH 0310/3292] net: alteon: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Jes Sorensen <jes@trained-monkey.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/alteon/acenic.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 1a7e4df9b3e9..9dc12b13061f 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -1883,16 +1883,16 @@ static u32 ace_handle_event(struct net_device *dev, u32 evtcsm, u32 evtprd)
 				}
 			}
 
- 			if (ACE_IS_TIGON_I(ap)) {
- 				struct cmd cmd;
- 				cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
- 				cmd.code = 0;
- 				cmd.idx = 0;
- 				ace_issue_cmd(ap->regs, &cmd);
- 			} else {
- 				writel(0, &((ap->regs)->RxJumboPrd));
- 				wmb();
- 			}
+			if (ACE_IS_TIGON_I(ap)) {
+				struct cmd cmd;
+				cmd.evt = C_SET_RX_JUMBO_PRD_IDX;
+				cmd.code = 0;
+				cmd.idx = 0;
+				ace_issue_cmd(ap->regs, &cmd);
+			} else {
+				writel(0, &((ap->regs)->RxJumboPrd));
+				wmb();
+			}
 
 			ap->jumbo = 0;
 			ap->rx_jumbo_skbprd = 0;
@@ -2489,9 +2489,9 @@ restart:
 		}
 	}
 
- 	wmb();
- 	ap->tx_prd = idx;
- 	ace_set_txprd(regs, ap, idx);
+	wmb();
+	ap->tx_prd = idx;
+	ace_set_txprd(regs, ap, idx);
 
 	if (flagsize & BD_FLG_COAL_NOW) {
 		netif_stop_queue(dev);

From 106b4cb597666832b063df9d5e8d2bb102206f8b Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:36 +0800
Subject: [PATCH 0311/3292] net: amd: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/amd8111e.c   |  4 ++--
 drivers/net/ethernet/amd/amd8111e.h   |  6 +++---
 drivers/net/ethernet/amd/atarilance.c |  2 +-
 drivers/net/ethernet/amd/declance.c   |  2 +-
 drivers/net/ethernet/amd/lance.c      |  4 ++--
 drivers/net/ethernet/amd/ni65.c       | 12 ++++++------
 drivers/net/ethernet/amd/nmclan_cs.c  | 12 ++++++------
 drivers/net/ethernet/amd/sun3lance.c  | 12 ++++++------
 8 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 4a1220cc6f10..9cac5aa75a73 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -19,14 +19,14 @@ Module Name:
 
 Abstract:
 
- 	 AMD8111 based 10/100 Ethernet Controller Driver.
+	 AMD8111 based 10/100 Ethernet Controller Driver.
 
 Environment:
 
 	Kernel Mode
 
 Revision History:
- 	3.0.0
+	3.0.0
 	   Initial Revision.
 	3.0.1
 	 1. Dynamic interrupt coalescing.
diff --git a/drivers/net/ethernet/amd/amd8111e.h b/drivers/net/ethernet/amd/amd8111e.h
index 493f154eccf4..37da79da5f5e 100644
--- a/drivers/net/ethernet/amd/amd8111e.h
+++ b/drivers/net/ethernet/amd/amd8111e.h
@@ -10,14 +10,14 @@ Module Name:
 
 Abstract:
 
- 	 AMD8111 based 10/100 Ethernet Controller driver definitions.
+	 AMD8111 based 10/100 Ethernet Controller driver definitions.
 
 Environment:
 
 	Kernel Mode
 
 Revision History:
- 	3.0.0
+	3.0.0
 	   Initial Revision.
 	3.0.1
 */
@@ -692,7 +692,7 @@ enum coal_type{
 };
 
 enum coal_mode{
-       	RX_INTR_COAL,
+	RX_INTR_COAL,
 	TX_INTR_COAL,
 	DISABLE_COAL,
 	ENABLE_COAL,
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index c1eab916438f..36f54d13a2eb 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -706,7 +706,7 @@ static void lance_init_ring( struct net_device *dev )
 		CHECK_OFFSET(offset);
 		MEM->tx_head[i].base = offset;
 		MEM->tx_head[i].flag = TMD1_OWN_HOST;
- 		MEM->tx_head[i].base_hi = 0;
+		MEM->tx_head[i].base_hi = 0;
 		MEM->tx_head[i].length = 0;
 		MEM->tx_head[i].misc = 0;
 		offset += PKT_BUF_SZ;
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 7282ce55ffb8..493b0cefcc2a 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -937,7 +937,7 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev_kfree_skb(skb);
 
- 	return NETDEV_TX_OK;
+	return NETDEV_TX_OK;
 }
 
 static void lance_load_multicast(struct net_device *dev)
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index aff44241988c..2178e6b89dbd 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -780,7 +780,7 @@ lance_open(struct net_device *dev)
 		outw(0x0002, ioaddr+LANCE_ADDR);
 		/* Only touch autoselect bit. */
 		outw(inw(ioaddr+LANCE_BUS_IF) | 0x0002, ioaddr+LANCE_BUS_IF);
- 	}
+	}
 
 	if (lance_debug > 1)
 		printk("%s: lance_open() irq %d dma %d tx/rx rings %#x/%#x init %#x.\n",
@@ -812,7 +812,7 @@ lance_open(struct net_device *dev)
 	 * We used to clear the InitDone bit, 0x0100, here but Mark Stockton
 	 * reports that doing so triggers a bug in the '974.
 	 */
- 	outw(0x0042, ioaddr+LANCE_DATA);
+	outw(0x0042, ioaddr+LANCE_DATA);
 
 	if (lance_debug > 2)
 		printk("%s: LANCE open after %d ticks, init block %#x csr0 %4.4x.\n",
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index c38edf6f03a3..5c1cfb0c4a42 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -193,7 +193,7 @@ static struct card {
 		.vendor_id   = ni_vendor,
 		.cardname    = "ni6510",
 		.config	     = 0x1,
-       	},
+	},
 	{
 		.id0	     = NI65_EB_ID0,
 		.id1	     = NI65_EB_ID1,
@@ -204,7 +204,7 @@ static struct card {
 		.vendor_id   = ni_vendor,
 		.cardname    = "ni6510 EtherBlaster",
 		.config	     = 0x2,
-       	},
+	},
 	{
 		.id0	     = NE2100_ID0,
 		.id1	     = NE2100_ID1,
@@ -1232,15 +1232,15 @@ MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
 
 int __init init_module(void)
 {
- 	dev_ni65 = ni65_probe(-1);
+	dev_ni65 = ni65_probe(-1);
 	return PTR_ERR_OR_ZERO(dev_ni65);
 }
 
 void __exit cleanup_module(void)
 {
- 	unregister_netdev(dev_ni65);
- 	cleanup_card(dev_ni65);
- 	free_netdev(dev_ni65);
+	unregister_netdev(dev_ni65);
+	cleanup_card(dev_ni65);
+	free_netdev(dev_ni65);
 }
 #endif /* MODULE */
 
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 11c0b13edd30..4019cab87505 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -541,7 +541,7 @@ static int mace_init(mace_private *lp, unsigned int ioaddr, char *enet_addr)
     if(++ct > 500)
     {
 	pr_err("reset failed, card removed?\n");
-    	return -1;
+	return -1;
     }
     udelay(1);
   }
@@ -585,11 +585,11 @@ static int mace_init(mace_private *lp, unsigned int ioaddr, char *enet_addr)
   ct = 0;
   while (mace_read(lp, ioaddr, MACE_IAC) & MACE_IAC_ADDRCHG)
   {
-  	if(++ ct > 500)
-  	{
+	if(++ ct > 500)
+	{
 		pr_err("ADDRCHG timeout, card removed?\n");
-  		return -1;
-  	}
+		return -1;
+	}
   }
   /* Set PADR register */
   for (i = 0; i < ETH_ALEN; i++)
@@ -655,7 +655,7 @@ static int nmclan_config(struct pcmcia_device *link)
   }
 
   if(mace_init(lp, ioaddr, dev->dev_addr) == -1)
-  	goto failed;
+	goto failed;
 
   /* The if_port symbol can be set when the module is loaded */
   if (if_port <= 2)
diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c
index 00ae1081254d..f8d7a9387a56 100644
--- a/drivers/net/ethernet/amd/sun3lance.c
+++ b/drivers/net/ethernet/amd/sun3lance.c
@@ -150,7 +150,7 @@ struct lance_memory {
 struct lance_private {
 	volatile unsigned short	*iobase;
 	struct lance_memory	*mem;
-     	int new_rx, new_tx;	/* The next free ring entry */
+	int new_rx, new_tx;	/* The next free ring entry */
 	int old_tx, old_rx;     /* ring entry to be processed */
 /* These two must be longs for set_bit() */
 	long	    tx_full;
@@ -465,7 +465,7 @@ static void lance_init_ring( struct net_device *dev )
 	for( i = 0; i < TX_RING_SIZE; i++ ) {
 		MEM->tx_head[i].base = dvma_vtob(MEM->tx_data[i]);
 		MEM->tx_head[i].flag = 0;
- 		MEM->tx_head[i].base_hi =
+		MEM->tx_head[i].base_hi =
 			(dvma_vtob(MEM->tx_data[i])) >>16;
 		MEM->tx_head[i].length = 0;
 		MEM->tx_head[i].misc = 0;
@@ -581,8 +581,8 @@ lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	AREG = CSR0;
-  	DPRINTK( 2, ( "%s: lance_start_xmit() called, csr0 %4.4x.\n",
-  				  dev->name, DREG ));
+	DPRINTK( 2, ( "%s: lance_start_xmit() called, csr0 %4.4x.\n",
+				  dev->name, DREG ));
 
 #ifdef CONFIG_SUN3X
 	/* this weirdness doesn't appear on sun3... */
@@ -636,8 +636,8 @@ lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Trigger an immediate send poll. */
 	REGA(CSR0) = CSR0_INEA | CSR0_TDMD | CSR0_STRT;
 	AREG = CSR0;
-  	DPRINTK( 2, ( "%s: lance_start_xmit() exiting, csr0 %4.4x.\n",
-  				  dev->name, DREG ));
+	DPRINTK( 2, ( "%s: lance_start_xmit() exiting, csr0 %4.4x.\n",
+				  dev->name, DREG ));
 	dev_kfree_skb(skb);
 
 	lp->lock = 0;

From a22cf81d634c83718c036728a0d119d538947b73 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:37 +0800
Subject: [PATCH 0312/3292] net: apple: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apple/bmac.c | 30 +++++++++++++++---------------
 drivers/net/ethernet/apple/mace.c |  8 ++++----
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 1e4e402f07d7..a989d2df59ad 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -477,26 +477,26 @@ static int bmac_suspend(struct macio_dev *mdev, pm_message_t state)
 		config = bmread(dev, RXCFG);
 		bmwrite(dev, RXCFG, (config & ~RxMACEnable));
 		config = bmread(dev, TXCFG);
-       		bmwrite(dev, TXCFG, (config & ~TxMACEnable));
+		bmwrite(dev, TXCFG, (config & ~TxMACEnable));
 		bmwrite(dev, INTDISABLE, DisableAll); /* disable all intrs */
-       		/* disable rx and tx dma */
+		/* disable rx and tx dma */
 		rd->control = cpu_to_le32(DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
 		td->control = cpu_to_le32(DBDMA_CLEAR(RUN|PAUSE|FLUSH|WAKE));	/* clear run bit */
-       		/* free some skb's */
-       		for (i=0; i<N_RX_RING; i++) {
-       			if (bp->rx_bufs[i] != NULL) {
-       				dev_kfree_skb(bp->rx_bufs[i]);
-       				bp->rx_bufs[i] = NULL;
-       			}
-       		}
-       		for (i = 0; i<N_TX_RING; i++) {
+		/* free some skb's */
+		for (i=0; i<N_RX_RING; i++) {
+			if (bp->rx_bufs[i] != NULL) {
+				dev_kfree_skb(bp->rx_bufs[i]);
+				bp->rx_bufs[i] = NULL;
+			}
+		}
+		for (i = 0; i<N_TX_RING; i++) {
 			if (bp->tx_bufs[i] != NULL) {
 		       		dev_kfree_skb(bp->tx_bufs[i]);
 	       			bp->tx_bufs[i] = NULL;
 		       	}
 		}
 	}
-       	pmac_call_feature(PMAC_FTR_BMAC_ENABLE, macio_get_of_node(bp->mdev), 0, 0);
+	pmac_call_feature(PMAC_FTR_BMAC_ENABLE, macio_get_of_node(bp->mdev), 0, 0);
 	return 0;
 }
 
@@ -510,9 +510,9 @@ static int bmac_resume(struct macio_dev *mdev)
 		bmac_reset_and_enable(dev);
 
 	enable_irq(dev->irq);
-       	enable_irq(bp->tx_dma_intr);
-       	enable_irq(bp->rx_dma_intr);
-       	netif_device_attach(dev);
+	enable_irq(bp->tx_dma_intr);
+	enable_irq(bp->rx_dma_intr);
+	netif_device_attach(dev);
 
 	return 0;
 }
@@ -1599,7 +1599,7 @@ static int bmac_remove(struct macio_dev *mdev)
 
 	unregister_netdev(dev);
 
-       	free_irq(dev->irq, dev);
+	free_irq(dev->irq, dev);
 	free_irq(bp->tx_dma_intr, dev);
 	free_irq(bp->rx_dma_intr, dev);
 
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 9e5006e59215..4b80e3a52a19 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -364,9 +364,9 @@ static void mace_reset(struct net_device *dev)
 	out_8(&mb->iac, 0);
 
     if (mp->port_aaui)
-    	out_8(&mb->plscc, PORTSEL_AUI + ENPLSIO);
+	out_8(&mb->plscc, PORTSEL_AUI + ENPLSIO);
     else
-    	out_8(&mb->plscc, PORTSEL_GPSI + ENPLSIO);
+	out_8(&mb->plscc, PORTSEL_GPSI + ENPLSIO);
 }
 
 static void __mace_set_address(struct net_device *dev, void *addr)
@@ -378,9 +378,9 @@ static void __mace_set_address(struct net_device *dev, void *addr)
 
     /* load up the hardware address */
     if (mp->chipid == BROKEN_ADDRCHG_REV)
-    	out_8(&mb->iac, PHYADDR);
+	out_8(&mb->iac, PHYADDR);
     else {
-    	out_8(&mb->iac, ADDRCHG | PHYADDR);
+	out_8(&mb->iac, ADDRCHG | PHYADDR);
 	while ((in_8(&mb->iac) & ADDRCHG) != 0)
 	    ;
     }

From 90e4403a6d374d37fce5e86f38a5e77359b62822 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:38 +0800
Subject: [PATCH 0313/3292] net: broadcom: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/b44.c  | 20 ++++++++++----------
 drivers/net/ethernet/broadcom/bnx2.c |  6 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index b455b60a5434..ad2655efe423 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -1556,8 +1556,8 @@ static void b44_setup_pseudo_magicp(struct b44 *bp)
 	plen0 = b44_magic_pattern(bp->dev->dev_addr, pwol_pattern, pwol_mask,
 				  B44_ETHIPV4UDP_HLEN);
 
-   	bwfilter_table(bp, pwol_pattern, B44_PATTERN_SIZE, B44_PATTERN_BASE);
-   	bwfilter_table(bp, pwol_mask, B44_PMASK_SIZE, B44_PMASK_BASE);
+	bwfilter_table(bp, pwol_pattern, B44_PATTERN_SIZE, B44_PATTERN_BASE);
+	bwfilter_table(bp, pwol_mask, B44_PMASK_SIZE, B44_PMASK_BASE);
 
 	/* Raw ethernet II magic packet pattern - pattern 1 */
 	memset(pwol_pattern, 0, B44_PATTERN_SIZE);
@@ -1565,9 +1565,9 @@ static void b44_setup_pseudo_magicp(struct b44 *bp)
 	plen1 = b44_magic_pattern(bp->dev->dev_addr, pwol_pattern, pwol_mask,
 				  ETH_HLEN);
 
-   	bwfilter_table(bp, pwol_pattern, B44_PATTERN_SIZE,
+	bwfilter_table(bp, pwol_pattern, B44_PATTERN_SIZE,
 		       B44_PATTERN_BASE + B44_PATTERN_SIZE);
-  	bwfilter_table(bp, pwol_mask, B44_PMASK_SIZE,
+	bwfilter_table(bp, pwol_mask, B44_PMASK_SIZE,
 		       B44_PMASK_BASE + B44_PMASK_SIZE);
 
 	/* Ipv6 magic packet pattern - pattern 2 */
@@ -1576,9 +1576,9 @@ static void b44_setup_pseudo_magicp(struct b44 *bp)
 	plen2 = b44_magic_pattern(bp->dev->dev_addr, pwol_pattern, pwol_mask,
 				  B44_ETHIPV6UDP_HLEN);
 
-   	bwfilter_table(bp, pwol_pattern, B44_PATTERN_SIZE,
+	bwfilter_table(bp, pwol_pattern, B44_PATTERN_SIZE,
 		       B44_PATTERN_BASE + B44_PATTERN_SIZE + B44_PATTERN_SIZE);
-  	bwfilter_table(bp, pwol_mask, B44_PMASK_SIZE,
+	bwfilter_table(bp, pwol_mask, B44_PMASK_SIZE,
 		       B44_PMASK_BASE + B44_PMASK_SIZE + B44_PMASK_SIZE);
 
 	kfree(pwol_pattern);
@@ -1631,9 +1631,9 @@ static void b44_setup_wol(struct b44 *bp)
 		val = br32(bp, B44_DEVCTRL);
 		bw32(bp, B44_DEVCTRL, val | DEVCTRL_MPM | DEVCTRL_PFE);
 
- 	} else {
- 		b44_setup_pseudo_magicp(bp);
- 	}
+	} else {
+		b44_setup_pseudo_magicp(bp);
+	}
 	b44_setup_wol_pci(bp);
 }
 
@@ -1757,7 +1757,7 @@ static void __b44_set_rx_mode(struct net_device *dev)
 			__b44_cam_write(bp, zero, i);
 
 		bw32(bp, B44_RXCONFIG, val);
-        	val = br32(bp, B44_CAM_CTRL);
+		val = br32(bp, B44_CAM_CTRL);
 	        bw32(bp, B44_CAM_CTRL, val | CAM_CTRL_ENABLE);
 	}
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index c0986096c701..2c5f36befdfe 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -572,7 +572,7 @@ bnx2_write_phy(struct bnx2 *bp, u32 reg, u32 val)
 	}
 
 	if (val1 & BNX2_EMAC_MDIO_COMM_START_BUSY)
-        	ret = -EBUSY;
+		ret = -EBUSY;
 	else
 		ret = 0;
 
@@ -3599,7 +3599,7 @@ bnx2_set_rx_mode(struct net_device *dev)
 		for (i = 0; i < NUM_MC_HASH_REGISTERS; i++) {
 			BNX2_WR(bp, BNX2_EMAC_MULTICAST_HASH0 + (i * 4),
 				0xffffffff);
-        	}
+		}
 		sort_mode |= BNX2_RPM_SORT_USER0_MC_EN;
 	}
 	else {
@@ -4674,7 +4674,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
 
 				if (addr == page_end-4) {
 					cmd_flags = BNX2_NVM_COMMAND_LAST;
-                		}
+				}
 				rc = bnx2_nvram_write_dword(bp, addr,
 					&flash_buffer[i], cmd_flags);
 

From 21b128fde6e092de0749df157304d5896a237f10 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:39 +0800
Subject: [PATCH 0314/3292] net: chelsio: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Rohit Maheshwari <rohitm@chelsio.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/sge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index cec7308e2d5b..11d3b6218ed7 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -3371,7 +3371,7 @@ void t3_sge_prep(struct adapter *adap, struct sge_params *p)
 		q->coalesce_usecs = 5;
 		q->rspq_size = 1024;
 		q->fl_size = 1024;
- 		q->jumbo_size = 512;
+		q->jumbo_size = 512;
 		q->txq_size[TXQ_ETH] = 1024;
 		q->txq_size[TXQ_OFLD] = 1024;
 		q->txq_size[TXQ_CTRL] = 256;

From cf82f9b165e464bb81b66d0b4fcca70970785564 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:40 +0800
Subject: [PATCH 0315/3292] net: dec: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/de2104x.c     |  4 ++--
 drivers/net/ethernet/dec/tulip/de4x5.c       |  6 +++---
 drivers/net/ethernet/dec/tulip/dmfe.c        | 18 +++++++++---------
 drivers/net/ethernet/dec/tulip/pnic2.c       |  4 ++--
 drivers/net/ethernet/dec/tulip/uli526x.c     | 10 +++++-----
 drivers/net/ethernet/dec/tulip/winbond-840.c |  4 ++--
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index b018195f0243..117c26fa5909 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -832,8 +832,8 @@ static struct net_device_stats *de_get_stats(struct net_device *dev)
 
 	/* The chip only need report frame silently dropped. */
 	spin_lock_irq(&de->lock);
- 	if (netif_running(dev) && netif_device_present(dev))
- 		__de_get_stats(de);
+	if (netif_running(dev) && netif_device_present(dev))
+		__de_get_stats(de);
 	spin_unlock_irq(&de->lock);
 
 	return &dev->stats;
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 683e328b5461..b125d7faefdf 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -396,7 +396,7 @@
 			   <earl@exis.net>.
 			  Updated the PCI interface to conform with the latest
 			   version. I hope nothing is broken...
-          		  Add TX done interrupt modification from suggestion
+			  Add TX done interrupt modification from suggestion
 			   by <Austin.Donnelly@cl.cam.ac.uk>.
 			  Fix is_anc_capable() bug reported by
 			   <Austin.Donnelly@cl.cam.ac.uk>.
@@ -1499,7 +1499,7 @@ de4x5_queue_pkt(struct sk_buff *skb, struct net_device *dev)
 	    spin_lock_irqsave(&lp->lock, flags);
 	    netif_stop_queue(dev);
 	    load_packet(dev, skb->data, TD_IC | TD_LS | TD_FS | skb->len, skb);
- 	    lp->stats.tx_bytes += skb->len;
+	    lp->stats.tx_bytes += skb->len;
 	    outl(POLL_DEMAND, DE4X5_TPD);/* Start the TX */
 
 	    lp->tx_new = (lp->tx_new + 1) % lp->txRingSize;
@@ -1651,7 +1651,7 @@ de4x5_rx(struct net_device *dev)
 
 		    /* Update stats */
 		    lp->stats.rx_packets++;
- 		    lp->stats.rx_bytes += pkt_len;
+		    lp->stats.rx_bytes += pkt_len;
 		}
 	    }
 
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 87a27fe2992d..c763b692e164 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -518,7 +518,7 @@ static void dmfe_remove_one(struct pci_dev *pdev)
 
 	DMFE_DBUG(0, "dmfe_remove_one()", 0);
 
- 	if (dev) {
+	if (dev) {
 
 		unregister_netdev(dev);
 		pci_iounmap(db->pdev, db->ioaddr);
@@ -567,10 +567,10 @@ static int dmfe_open(struct net_device *dev)
 	/* CR6 operation mode decision */
 	if ( !chkmode || (db->chip_id == PCI_DM9132_ID) ||
 		(db->chip_revision >= 0x30) ) {
-    		db->cr6_data |= DMFE_TXTH_256;
+		db->cr6_data |= DMFE_TXTH_256;
 		db->cr0_data = CR0_DEFAULT;
 		db->dm910x_chk_mode=4;		/* Enter the normal mode */
- 	} else {
+	} else {
 		db->cr6_data |= CR6_SFT;	/* Store & Forward mode */
 		db->cr0_data = 0;
 		db->dm910x_chk_mode = 1;	/* Enter the check mode */
@@ -903,7 +903,7 @@ static void dmfe_free_tx_pkt(struct net_device *dev, struct dmfe_board_info *db)
 			}
 		}
 
-    		txptr = txptr->next_tx_desc;
+		txptr = txptr->next_tx_desc;
 	}/* End of while */
 
 	/* Update TX remove pointer to next */
@@ -1121,7 +1121,7 @@ static void dmfe_timer(struct timer_list *t)
 	void __iomem *ioaddr = db->ioaddr;
 	u32 tmp_cr8;
 	unsigned char tmp_cr12;
- 	unsigned long flags;
+	unsigned long flags;
 
 	int link_ok, link_ok_phy;
 
@@ -1217,7 +1217,7 @@ static void dmfe_timer(struct timer_list *t)
 	if (link_ok_phy != link_ok) {
 		DMFE_DBUG (0, "PHY and chip report different link status", 0);
 		link_ok = link_ok | link_ok_phy;
- 	}
+	}
 
 	if ( !link_ok && netif_carrier_ok(dev)) {
 		/* Link Failed */
@@ -1699,14 +1699,14 @@ static void dmfe_set_phyxcer(struct dmfe_board_info *db)
 		if (db->chip_id == PCI_DM9009_ID) phy_reg &= 0x61;
 	}
 
-  	/* Write new capability to Phyxcer Reg4 */
+	/* Write new capability to Phyxcer Reg4 */
 	if ( !(phy_reg & 0x01e0)) {
 		phy_reg|=db->PHY_reg4;
 		db->media_mode|=DMFE_AUTO;
 	}
 	dmfe_phy_write(db->ioaddr, db->phy_addr, 4, phy_reg, db->chip_id);
 
- 	/* Restart Auto-Negotiation */
+	/* Restart Auto-Negotiation */
 	if ( db->chip_type && (db->chip_id == PCI_DM9102_ID) )
 		dmfe_phy_write(db->ioaddr, db->phy_addr, 0, 0x1800, db->chip_id);
 	if ( !db->chip_type )
@@ -1754,7 +1754,7 @@ static void dmfe_process_mode(struct dmfe_board_info *db)
 			}
 			dmfe_phy_write(db->ioaddr,
 				       db->phy_addr, 0, phy_reg, db->chip_id);
-       			if ( db->chip_type && (db->chip_id == PCI_DM9102_ID) )
+			if ( db->chip_type && (db->chip_id == PCI_DM9102_ID) )
 				mdelay(20);
 			dmfe_phy_write(db->ioaddr,
 				       db->phy_addr, 0, phy_reg, db->chip_id);
diff --git a/drivers/net/ethernet/dec/tulip/pnic2.c b/drivers/net/ethernet/dec/tulip/pnic2.c
index 412adaa7fdf8..72a09156b48b 100644
--- a/drivers/net/ethernet/dec/tulip/pnic2.c
+++ b/drivers/net/ethernet/dec/tulip/pnic2.c
@@ -351,7 +351,7 @@ void pnic2_lnk_change(struct net_device *dev, int csr5)
 			del_timer_sync(&tp->timer);
 			pnic2_start_nway(dev);
 			tp->timer.expires = RUN_AT(3*HZ);
-       			add_timer(&tp->timer);
+			add_timer(&tp->timer);
                 }
 
                 return;
@@ -375,7 +375,7 @@ void pnic2_lnk_change(struct net_device *dev, int csr5)
 			del_timer_sync(&tp->timer);
 			pnic2_start_nway(dev);
 			tp->timer.expires = RUN_AT(3*HZ);
-       			add_timer(&tp->timer);
+			add_timer(&tp->timer);
                 }
 
                 return;
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index 13e73ed15ef0..d67ef7d02d6b 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -780,7 +780,7 @@ static void uli526x_free_tx_pkt(struct net_device *dev,
 			}
 		}
 
-    		txptr = txptr->next_tx_desc;
+		txptr = txptr->next_tx_desc;
 	}/* End of while */
 
 	/* Update TX remove pointer to next */
@@ -1015,7 +1015,7 @@ static void uli526x_timer(struct timer_list *t)
 	struct net_device *dev = pci_get_drvdata(db->pdev);
 	struct uli_phy_ops *phy = &db->phy;
 	void __iomem *ioaddr = db->ioaddr;
- 	unsigned long flags;
+	unsigned long flags;
 	u8 tmp_cr12 = 0;
 	u32 tmp_cr8;
 
@@ -1535,14 +1535,14 @@ static void uli526x_set_phyxcer(struct uli526x_board_info *db)
 
 	}
 
-  	/* Write new capability to Phyxcer Reg4 */
+	/* Write new capability to Phyxcer Reg4 */
 	if ( !(phy_reg & 0x01e0)) {
 		phy_reg|=db->PHY_reg4;
 		db->media_mode|=ULI526X_AUTO;
 	}
 	phy->write(db, db->phy_addr, 4, phy_reg);
 
- 	/* Restart Auto-Negotiation */
+	/* Restart Auto-Negotiation */
 	phy->write(db, db->phy_addr, 0, 0x1200);
 	udelay(50);
 }
@@ -1550,7 +1550,7 @@ static void uli526x_set_phyxcer(struct uli526x_board_info *db)
 
 /*
  *	Process op-mode
- 	AUTO mode : PHY controller in Auto-negotiation Mode
+	AUTO mode : PHY controller in Auto-negotiation Mode
  *	Force mode: PHY controller in force mode with HUB
  *			N-way force capability with SWITCH
  */
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 514df170ec5d..f6ff1f76eacb 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -36,7 +36,7 @@
 		power management.
 		support for big endian descriptors
 			Copyright (C) 2001 Manfred Spraul
-  	* ethtool support (jgarzik)
+	* ethtool support (jgarzik)
 	* Replace some MII-related magic numbers with constants (jgarzik)
 
 	TODO:
@@ -1479,7 +1479,7 @@ static int netdev_close(struct net_device *dev)
 			   np->cur_rx, np->dirty_rx);
 	}
 
- 	/* Stop the chip's Tx and Rx processes. */
+	/* Stop the chip's Tx and Rx processes. */
 	spin_lock_irq(&np->lock);
 	netif_device_detach(dev);
 	update_csr6(dev, 0);

From e6f0f977407f1eba63db1e8f143e7667be61a1b8 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:41 +0800
Subject: [PATCH 0316/3292] net: dlink: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'
Cc: "Alexander A. Klimov" <grandmaster@al2klimov.de>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dlink/sundance.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index ce61f79f3b7c..ee0ca712dd1c 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -1847,20 +1847,20 @@ static int netdev_close(struct net_device *dev)
 	/* Stop the chip's Tx and Rx processes. */
 	iowrite16(TxDisable | RxDisable | StatsDisable, ioaddr + MACCtrl1);
 
-    	for (i = 2000; i > 0; i--) {
- 		if ((ioread32(ioaddr + DMACtrl) & 0xc000) == 0)
+	for (i = 2000; i > 0; i--) {
+		if ((ioread32(ioaddr + DMACtrl) & 0xc000) == 0)
 			break;
 		mdelay(1);
-    	}
+	}
 
-    	iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset,
+	iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset,
 			ioaddr + ASIC_HI_WORD(ASICCtrl));
 
-    	for (i = 2000; i > 0; i--) {
+	for (i = 2000; i > 0; i--) {
 		if ((ioread16(ioaddr + ASIC_HI_WORD(ASICCtrl)) & ResetBusy) == 0)
 			break;
 		mdelay(1);
-    	}
+	}
 
 #ifdef __i386__
 	if (netif_msg_hw(np)) {

From c11c900143e44f73628dcaa439561e8f15e2ee20 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:42 +0800
Subject: [PATCH 0317/3292] net: ibm: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'
Cc: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Acked-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/emac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h
index aa9f651288d5..09d3ac374b2d 100644
--- a/drivers/net/ethernet/ibm/emac/emac.h
+++ b/drivers/net/ethernet/ibm/emac/emac.h
@@ -77,7 +77,7 @@ struct emac_regs {
 		struct {
 			u32 rsvd1;
 			u32 revid;
- 			u32 rsvd2[2];
+			u32 rsvd2[2];
 			u32 iaht1;	/* Reset, R     */
 			u32 iaht2;	/* Reset, R     */
 			u32 iaht3;	/* Reset, R     */

From 717dc24dc5d642b9c805b8f59fd16e21ce721b9a Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:43 +0800
Subject: [PATCH 0318/3292] net: marvell: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/skge.h |  2 +-
 drivers/net/ethernet/marvell/sky2.c | 30 ++++++++++++++---------------
 drivers/net/ethernet/marvell/sky2.h |  8 ++++----
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/marvell/skge.h b/drivers/net/ethernet/marvell/skge.h
index 6928abcec0a3..f72217348eb4 100644
--- a/drivers/net/ethernet/marvell/skge.h
+++ b/drivers/net/ethernet/marvell/skge.h
@@ -263,7 +263,7 @@ enum {
 	CHIP_ID_YUKON_LP   = 0xb2, /* Chip ID for YUKON-LP */
 	CHIP_ID_YUKON_XL   = 0xb3, /* Chip ID for YUKON-2 XL */
 	CHIP_ID_YUKON_EC   = 0xb6, /* Chip ID for YUKON-2 EC */
- 	CHIP_ID_YUKON_FE   = 0xb7, /* Chip ID for YUKON-2 FE */
+	CHIP_ID_YUKON_FE   = 0xb7, /* Chip ID for YUKON-2 FE */
 
 	CHIP_REV_YU_LITE_A1  = 3,	/* Chip Rev. for YUKON-Lite A1,A2 */
 	CHIP_REV_YU_LITE_A3  = 7,	/* Chip Rev. for YUKON-Lite A3 */
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 222c32367b2c..324c280cc22c 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -471,7 +471,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port)
 			adv |= fiber_fc_adv[sky2->flow_mode];
 	} else {
 		reg |= GM_GPCR_AU_FCT_DIS;
- 		reg |= gm_fc_disable[sky2->flow_mode];
+		reg |= gm_fc_disable[sky2->flow_mode];
 
 		/* Forward pause packets to GMAC? */
 		if (sky2->flow_mode & FC_RX)
@@ -1656,16 +1656,16 @@ static void sky2_hw_up(struct sky2_port *sky2)
 	tx_init(sky2);
 
 	/*
- 	 * On dual port PCI-X card, there is an problem where status
+	 * On dual port PCI-X card, there is an problem where status
 	 * can be received out of order due to split transactions
 	 */
 	if (otherdev && netif_running(otherdev) &&
- 	    (cap = pci_find_capability(hw->pdev, PCI_CAP_ID_PCIX))) {
- 		u16 cmd;
+	    (cap = pci_find_capability(hw->pdev, PCI_CAP_ID_PCIX))) {
+		u16 cmd;
 
 		cmd = sky2_pci_read16(hw, cap + PCI_X_CMD);
- 		cmd &= ~PCI_X_CMD_MAX_SPLIT;
- 		sky2_pci_write16(hw, cap + PCI_X_CMD, cmd);
+		cmd &= ~PCI_X_CMD_MAX_SPLIT;
+		sky2_pci_write16(hw, cap + PCI_X_CMD, cmd);
 	}
 
 	sky2_mac_init(hw, port);
@@ -1836,8 +1836,8 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
 	u16 mss;
 	u8 ctrl;
 
- 	if (unlikely(tx_avail(sky2) < tx_le_req(skb)))
-  		return NETDEV_TX_BUSY;
+	if (unlikely(tx_avail(sky2) < tx_le_req(skb)))
+		return NETDEV_TX_BUSY;
 
 	len = skb_headlen(skb);
 	mapping = dma_map_single(&hw->pdev->dev, skb->data, len,
@@ -1866,9 +1866,9 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
 		if (!(hw->flags & SKY2_HW_NEW_LE))
 			mss += ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb);
 
-  		if (mss != sky2->tx_last_mss) {
+		if (mss != sky2->tx_last_mss) {
 			le = get_tx_le(sky2, &slot);
-  			le->addr = cpu_to_le32(mss);
+			le->addr = cpu_to_le32(mss);
 
 			if (hw->flags & SKY2_HW_NEW_LE)
 				le->opcode = OP_MSS | HW_OWNER;
@@ -1895,8 +1895,8 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
 	/* Handle TCP checksum offload */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		/* On Yukon EX (some versions) encoding change. */
- 		if (hw->flags & SKY2_HW_AUTO_TX_SUM)
- 			ctrl |= CALSUM;	/* auto checksum */
+		if (hw->flags & SKY2_HW_AUTO_TX_SUM)
+			ctrl |= CALSUM;	/* auto checksum */
 		else {
 			const unsigned offset = skb_transport_offset(skb);
 			u32 tcpsum;
@@ -2557,7 +2557,7 @@ nobuf:
 static struct sk_buff *sky2_receive(struct net_device *dev,
 				    u16 length, u32 status)
 {
- 	struct sky2_port *sky2 = netdev_priv(dev);
+	struct sky2_port *sky2 = netdev_priv(dev);
 	struct rx_ring_info *re = sky2->rx_ring + sky2->rx_next;
 	struct sk_buff *skb = NULL;
 	u16 count = (status & GMR_FS_LEN) >> 16;
@@ -5063,11 +5063,11 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!disable_msi && pci_enable_msi(pdev) == 0) {
 		err = sky2_test_msi(hw);
 		if (err) {
- 			pci_disable_msi(pdev);
+			pci_disable_msi(pdev);
 			if (err != -EOPNOTSUPP)
 				goto err_out_free_netdev;
 		}
- 	}
+	}
 
 	netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT);
 
diff --git a/drivers/net/ethernet/marvell/sky2.h b/drivers/net/ethernet/marvell/sky2.h
index b2dddd8a246c..ddec1627f1a7 100644
--- a/drivers/net/ethernet/marvell/sky2.h
+++ b/drivers/net/ethernet/marvell/sky2.h
@@ -538,8 +538,8 @@ enum {
 	CHIP_ID_YUKON_EC_U = 0xb4, /* YUKON-2 EC Ultra */
 	CHIP_ID_YUKON_EX   = 0xb5, /* YUKON-2 Extreme */
 	CHIP_ID_YUKON_EC   = 0xb6, /* YUKON-2 EC */
- 	CHIP_ID_YUKON_FE   = 0xb7, /* YUKON-2 FE */
- 	CHIP_ID_YUKON_FE_P = 0xb8, /* YUKON-2 FE+ */
+	CHIP_ID_YUKON_FE   = 0xb7, /* YUKON-2 FE */
+	CHIP_ID_YUKON_FE_P = 0xb8, /* YUKON-2 FE+ */
 	CHIP_ID_YUKON_SUPR = 0xb9, /* YUKON-2 Supreme */
 	CHIP_ID_YUKON_UL_2 = 0xba, /* YUKON-2 Ultra 2 */
 	CHIP_ID_YUKON_OPT  = 0xbc, /* YUKON-2 Optima */
@@ -2262,8 +2262,8 @@ struct sky2_port {
 #define SKY2_FLAG_AUTO_SPEED		0x0002
 #define SKY2_FLAG_AUTO_PAUSE		0x0004
 
- 	enum flow_control    flow_mode;
- 	enum flow_control    flow_status;
+	enum flow_control    flow_mode;
+	enum flow_control    flow_status;
 
 #ifdef CONFIG_SKY2_DEBUG
 	struct dentry	     *debugfs;

From 4a0949778c4e918686cf369ffc86fa8f8e159c56 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:44 +0800
Subject: [PATCH 0319/3292] net: natsemi: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/natsemi/natsemi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index b81e1487945c..51b4b25d15ad 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -969,7 +969,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
  err_create_file:
- 	unregister_netdev(dev);
+	unregister_netdev(dev);
 
  err_register_netdev:
 	iounmap(ioaddr);
@@ -3103,14 +3103,14 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCSMIIREG:		/* Write MII PHY register. */
 		if (dev->if_port == PORT_TP) {
 			if ((data->phy_id & 0x1f) == np->phy_addr_external) {
- 				if ((data->reg_num & 0x1f) == MII_ADVERTISE)
+				if ((data->reg_num & 0x1f) == MII_ADVERTISE)
 					np->advertising = data->val_in;
 				mdio_write(dev, data->reg_num & 0x1f,
 							data->val_in);
 			}
 		} else {
 			if ((data->phy_id & 0x1f) == np->phy_addr_external) {
- 				if ((data->reg_num & 0x1f) == MII_ADVERTISE)
+				if ((data->reg_num & 0x1f) == MII_ADVERTISE)
 					np->advertising = data->val_in;
 			}
 			move_int_phy(dev, data->phy_id & 0x1f);

From f95a73a8a8a886a7be356ac2934b76aba2d761d3 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:45 +0800
Subject: [PATCH 0320/3292] net: realtek: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/8139cp.c  | 6 +++---
 drivers/net/ethernet/realtek/8139too.c | 6 +++---
 drivers/net/ethernet/realtek/atp.c     | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 4e44313b7651..9677e257e9a1 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -6,7 +6,7 @@
 	Copyright (C) 2000, 2001 David S. Miller (davem@redhat.com) [sungem.c]
 	Copyright 2001 Manfred Spraul				    [natsemi.c]
 	Copyright 1999-2001 by Donald Becker.			    [natsemi.c]
-       	Written 1997-2001 by Donald Becker.			    [8139too.c]
+	Written 1997-2001 by Donald Becker.			    [8139too.c]
 	Copyright 1998-2001 by Jes Sorensen, <jes@trained-monkey.org>. [acenic.c]
 
 	This software may be used and distributed according to the terms of
@@ -947,8 +947,8 @@ static struct net_device_stats *cp_get_stats(struct net_device *dev)
 
 	/* The chip only need report frame silently dropped. */
 	spin_lock_irqsave(&cp->lock, flags);
- 	if (netif_running(dev) && netif_device_present(dev))
- 		__cp_get_stats(cp);
+	if (netif_running(dev) && netif_device_present(dev))
+		__cp_get_stats(cp);
 	spin_unlock_irqrestore(&cp->lock, flags);
 
 	return &dev->stats;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 1e5a453dea14..f0608f050050 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -11,7 +11,7 @@
 
 	-----<snip>-----
 
-        	Written 1997-2001 by Donald Becker.
+		Written 1997-2001 by Donald Becker.
 		This software may be used and distributed according to the
 		terms of the GNU General Public License (GPL), incorporated
 		herein by reference.  Drivers based on or derived from this
@@ -548,8 +548,8 @@ static const struct {
 
 	{ "RTL-8100",
 	  HW_REVID(1, 1, 1, 1, 0, 1, 0),
- 	  HasLWake,
- 	},
+	  HasLWake,
+	},
 
 	{ "RTL-8100B/8139D",
 	  HW_REVID(1, 1, 1, 0, 1, 0, 1),
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 9e3b35c97e63..b6c849b258a0 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -497,8 +497,8 @@ static void write_packet(long ioaddr, int length, unsigned char *packet, int pad
 {
     if (length & 1)
     {
-    	length++;
-    	pad_len++;
+	length++;
+	pad_len++;
     }
 
     outb(EOC+MAR, ioaddr + PAR_DATA);

From bf53445d81e3fda4b0d361e8e0c037c91890f6c8 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:46 +0800
Subject: [PATCH 0321/3292] net: seeq: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/seeq/ether3.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 65c98837ec45..16a4cbae9326 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -617,7 +617,7 @@ if (next_ptr < RX_START || next_ptr >= RX_END) {
  break;
 }
 		/*
- 		 * ignore our own packets...
+		 * ignore our own packets...
 	 	 */
 		if (!(*(unsigned long *)&dev->dev_addr[0] ^ *(unsigned long *)&addrs[2+6]) &&
 		    !(*(unsigned short *)&dev->dev_addr[4] ^ *(unsigned short *)&addrs[2+10])) {
@@ -672,7 +672,7 @@ done:
 	 */
 	if (!(ether3_inw(REG_STATUS) & STAT_RXON)) {
 		dev->stats.rx_dropped++;
-    		ether3_outw(next_ptr, REG_RECVPTR);
+		ether3_outw(next_ptr, REG_RECVPTR);
 		ether3_outw(priv(dev)->regs.command | CMD_RXON, REG_COMMAND);
 	}
 
@@ -690,11 +690,11 @@ static void ether3_tx(struct net_device *dev)
 	do {
 	    	unsigned long status;
 
-    		/*
+		/*
 	    	 * Read the packet header
-    		 */
+		 */
 	    	ether3_setbuffer(dev, buffer_read, tx_tail * 0x600);
-    		status = ether3_readlong(dev);
+		status = ether3_readlong(dev);
 
 		/*
 		 * Check to see if this packet has been transmitted

From a294ddfccb45531ff8cd992bd8f00e3ab16410d2 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:47 +0800
Subject: [PATCH 0322/3292] net: sis: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sis/sis900.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 620c26f71be8..ca9c00b7f588 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -678,12 +678,12 @@ static int sis900_mii_probe(struct net_device *net_dev)
 	/* Reset phy if default phy is internal sis900 */
         if ((sis_priv->mii->phy_id0 == 0x001D) &&
 	    ((sis_priv->mii->phy_id1&0xFFF0) == 0x8000))
-        	status = sis900_reset_phy(net_dev, sis_priv->cur_phy);
+		status = sis900_reset_phy(net_dev, sis_priv->cur_phy);
 
         /* workaround for ICS1893 PHY */
         if ((sis_priv->mii->phy_id0 == 0x0015) &&
             ((sis_priv->mii->phy_id1&0xFFF0) == 0xF440))
-            	mdio_write(net_dev, sis_priv->cur_phy, 0x0018, 0xD200);
+		mdio_write(net_dev, sis_priv->cur_phy, 0x0018, 0xD200);
 
 	if(status & MII_STAT_LINK){
 		while (poll_bit) {
@@ -727,7 +727,7 @@ static int sis900_mii_probe(struct net_device *net_dev)
 static u16 sis900_default_phy(struct net_device * net_dev)
 {
 	struct sis900_private *sis_priv = netdev_priv(net_dev);
- 	struct mii_phy *phy = NULL, *phy_home = NULL,
+	struct mii_phy *phy = NULL, *phy_home = NULL,
 		*default_phy = NULL, *phy_lan = NULL;
 	u16 status;
 
@@ -1339,18 +1339,18 @@ static void sis900_timer(struct timer_list *t)
 	} else {
 	/* Link ON -> OFF */
                 if (!(status & MII_STAT_LINK)){
-                	netif_carrier_off(net_dev);
+			netif_carrier_off(net_dev);
 			if(netif_msg_link(sis_priv))
-                		printk(KERN_INFO "%s: Media Link Off\n", net_dev->name);
+				printk(KERN_INFO "%s: Media Link Off\n", net_dev->name);
 
-                	/* Change mode issue */
-                	if ((mii_phy->phy_id0 == 0x001D) &&
-			    ((mii_phy->phy_id1 & 0xFFF0) == 0x8000))
-               			sis900_reset_phy(net_dev,  sis_priv->cur_phy);
+			/* Change mode issue */
+			if ((mii_phy->phy_id0 == 0x001D) &&
+				((mii_phy->phy_id1 & 0xFFF0) == 0x8000))
+					sis900_reset_phy(net_dev,  sis_priv->cur_phy);
 
 			sis630_set_eq(net_dev, sis_priv->chipset_rev);
 
-                	goto LookForLink;
+			goto LookForLink;
                 }
 	}
 
@@ -2331,7 +2331,7 @@ static int sis900_set_config(struct net_device *dev, struct ifmap *map)
 		case IF_PORT_10BASE2: /* 10Base2 */
 		case IF_PORT_AUI: /* AUI */
 		case IF_PORT_100BASEFX: /* 100BaseFx */
-                	/* These Modes are not supported (are they?)*/
+			/* These Modes are not supported (are they?)*/
 			return -EOPNOTSUPP;
 
 		default:

From 996d7ab8badf153b59db5a85da0d65623eb58a2d Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:48 +0800
Subject: [PATCH 0323/3292] net: smsc: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc9194.c | 42 ++++++++++++++---------------
 drivers/net/ethernet/smsc/smc91x.c  | 14 +++++-----
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c
index 4b2330deed47..bf7c8c8b1350 100644
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -182,8 +182,8 @@ struct smc_local {
 	struct sk_buff * saved_skb;
 
 	/*
- 	 . This keeps track of how many packets that I have
- 	 . sent out.  When an TX_EMPTY interrupt comes, I know
+	 . This keeps track of how many packets that I have
+	 . sent out.  When an TX_EMPTY interrupt comes, I know
 	 . that all of these have been sent.
 	*/
 	int	packets_waiting;
@@ -343,7 +343,7 @@ static void smc_reset( int ioaddr )
 
 	/* Note:  It doesn't seem that waiting for the MMU busy is needed here,
 	   but this is a place where future chipsets _COULD_ break.  Be wary
- 	   of issuing another MMU command right after this */
+	   of issuing another MMU command right after this */
 
 	outb( 0, ioaddr + INT_MASK );
 }
@@ -521,9 +521,9 @@ static netdev_tx_t smc_wait_to_send_packet(struct sk_buff *skb,
 	SMC_SELECT_BANK( 2 );
 	outw( MC_ALLOC | numPages, ioaddr + MMU_CMD );
 	/*
- 	. Performance Hack
+	. Performance Hack
 	.
- 	. wait a short amount of time.. if I can send a packet now, I send
+	. wait a short amount of time.. if I can send a packet now, I send
 	. it now.  Otherwise, I enable an interrupt and wait for one to be
 	. available.
 	.
@@ -540,17 +540,17 @@ static netdev_tx_t smc_wait_to_send_packet(struct sk_buff *skb,
 		if ( status & IM_ALLOC_INT ) {
 			/* acknowledge the interrupt */
 			outb( IM_ALLOC_INT, ioaddr + INTERRUPT );
-  			break;
+			break;
 		}
-   	} while ( -- time_out );
+	} while ( -- time_out );
 
-   	if ( !time_out ) {
+	if ( !time_out ) {
 		/* oh well, wait until the chip finds memory later */
 		SMC_ENABLE_INT( IM_ALLOC_INT );
 		PRINTK2((CARDNAME": memory allocation deferred.\n"));
 		/* it's deferred, but I'll handle it later */
 		return NETDEV_TX_OK;
-   	}
+	}
 	/* or YES! I can send the packet now.. */
 	smc_hardware_send_packet(dev);
 	netif_wake_queue(dev);
@@ -616,7 +616,7 @@ static void smc_hardware_send_packet( struct net_device * dev )
 #endif
 
 	/* send the packet length ( +6 for status, length and ctl byte )
- 	   and the status word ( set to zeros ) */
+	   and the status word ( set to zeros ) */
 #ifdef USE_32_BIT
 	outl(  (length +6 ) << 16 , ioaddr + DATA_1 );
 #else
@@ -629,8 +629,8 @@ static void smc_hardware_send_packet( struct net_device * dev )
 	/* send the actual data
 	 . I _think_ it's faster to send the longs first, and then
 	 . mop up by sending the last word.  It depends heavily
- 	 . on alignment, at least on the 486.  Maybe it would be
- 	 . a good idea to check which is optimal?  But that could take
+	 . on alignment, at least on the 486.  Maybe it would be
+	 . a good idea to check which is optimal?  But that could take
 	 . almost as much time as is saved?
 	*/
 #ifdef USE_32_BIT
@@ -757,7 +757,7 @@ static int __init smc_findirq(int ioaddr)
 	outb( IM_ALLOC_INT, ioaddr + INT_MASK );
 
 	/*
- 	 . Allocate 512 bytes of memory.  Note that the chip was just
+	 . Allocate 512 bytes of memory.  Note that the chip was just
 	 . reset so all the memory is available
 	*/
 	outw( MC_ALLOC | 1, ioaddr + MMU_CMD );
@@ -871,7 +871,7 @@ static int __init smc_probe(struct net_device *dev, int ioaddr)
 		goto err_out;
 	}
 	/* The above MIGHT indicate a device, but I need to write to further
- 	 	test this.  */
+		test this.  */
 	outw( 0x0, ioaddr + BANK_SELECT );
 	bank = inw( ioaddr + BANK_SELECT );
 	if ( (bank & 0xFF00 ) != 0x3300 ) {
@@ -879,7 +879,7 @@ static int __init smc_probe(struct net_device *dev, int ioaddr)
 		goto err_out;
 	}
 	/* well, we've already written once, so hopefully another time won't
- 	   hurt.  This time, I need to switch the bank register to bank 1,
+	   hurt.  This time, I need to switch the bank register to bank 1,
 	   so I can access the base address register */
 	SMC_SELECT_BANK(1);
 	base_address_register = inw( ioaddr + BASE );
@@ -917,7 +917,7 @@ static int __init smc_probe(struct net_device *dev, int ioaddr)
 	dev->base_addr = ioaddr;
 
 	/*
- 	 . Get the MAC address ( bank 1, regs 4 - 9 )
+	 . Get the MAC address ( bank 1, regs 4 - 9 )
 	*/
 	SMC_SELECT_BANK( 1 );
 	for ( i = 0; i < 6; i += 2 ) {
@@ -938,8 +938,8 @@ static int __init smc_probe(struct net_device *dev, int ioaddr)
 
 	/*
 	 Now, I want to find out more about the chip.  This is sort of
- 	 redundant, but it's cleaner to have it in both, rather than having
- 	 one VERY long probe procedure.
+	 redundant, but it's cleaner to have it in both, rather than having
+	 one VERY long probe procedure.
 	*/
 	SMC_SELECT_BANK(3);
 	revision_register  = inw( ioaddr + REVISION );
@@ -967,7 +967,7 @@ static int __init smc_probe(struct net_device *dev, int ioaddr)
 	/*
 	 . If dev->irq is 0, then the device has to be banged on to see
 	 . what the IRQ is.
- 	 .
+	 .
 	 . This banging doesn't always detect the IRQ, for unknown reasons.
 	 . a workaround is to reset the chip and try again.
 	 .
@@ -978,7 +978,7 @@ static int __init smc_probe(struct net_device *dev, int ioaddr)
 	 .
 	 . Specifying an IRQ is done with the assumption that the user knows
 	 . what (s)he is doing.  No checking is done!!!!
- 	 .
+	 .
 	*/
 	if ( dev->irq < 2 ) {
 		int	trials;
@@ -1070,7 +1070,7 @@ static int smc_open(struct net_device *dev)
 	}
 
 	/*
-  		According to Becker, I have to set the hardware address
+		According to Becker, I have to set the hardware address
 		at this point, because the (l)user can set it with an
 		ioctl.  Easily done...
 	*/
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index cbde83f620a0..813ea941b91a 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -671,19 +671,19 @@ smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		status = SMC_GET_INT(lp);
 		if (status & IM_ALLOC_INT) {
 			SMC_ACK_INT(lp, IM_ALLOC_INT);
-  			break;
+			break;
 		}
-   	} while (--poll_count);
+	} while (--poll_count);
 
 	smc_special_unlock(&lp->lock, flags);
 
 	lp->pending_tx_skb = skb;
-   	if (!poll_count) {
+	if (!poll_count) {
 		/* oh well, wait until the chip finds memory later */
 		netif_stop_queue(dev);
 		DBG(2, dev, "TX memory allocation deferred.\n");
 		SMC_ENABLE_INT(lp, IM_ALLOC_INT);
-   	} else {
+	} else {
 		/*
 		 * Allocation succeeded: push packet to the chip's own memory
 		 * immediately.
@@ -1790,7 +1790,7 @@ static int smc_findirq(struct smc_local *lp)
 	SMC_SET_INT_MASK(lp, IM_ALLOC_INT);
 
 	/*
- 	 * Allocate 512 bytes of memory.  Note that the chip was just
+	 * Allocate 512 bytes of memory.  Note that the chip was just
 	 * reset so all the memory is available
 	 */
 	SMC_SET_MMU_CMD(lp, MC_ALLOC | 1);
@@ -1998,8 +1998,8 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr,
 
 	/* Grab the IRQ */
 	retval = request_irq(dev->irq, smc_interrupt, irq_flags, dev->name, dev);
-      	if (retval)
-      		goto err_out;
+	if (retval)
+		goto err_out;
 
 #ifdef CONFIG_ARCH_PXA
 #  ifdef SMC_USE_PXA_DMA

From b54f440cb87154a78b19f8b624db1985b57b0dd7 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:49 +0800
Subject: [PATCH 0324/3292] net: sun: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Vaibhav Gupta <vaibhavgupta40@gmail.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/cassini.c |  2 +-
 drivers/net/ethernet/sun/sungem.c  | 20 ++++++++++----------
 drivers/net/ethernet/sun/sunhme.c  |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 54f45d8c79a7..981685c88308 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -486,7 +486,7 @@ page_err:
 /* initialize spare pool of rx buffers, but allocate during the open */
 static void cas_spare_init(struct cas *cp)
 {
-  	spin_lock(&cp->rx_inuse_lock);
+	spin_lock(&cp->rx_inuse_lock);
 	INIT_LIST_HEAD(&cp->rx_inuse_list);
 	spin_unlock(&cp->rx_inuse_lock);
 
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 9790656cf970..cfb9e21b18b7 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1258,8 +1258,8 @@ static void gem_begin_auto_negotiation(struct gem *gp,
 			&advertising, ep->link_modes.advertising);
 
 	if (gp->phy_type != phy_mii_mdio0 &&
-     	    gp->phy_type != phy_mii_mdio1)
-     	    	goto non_mii;
+	    gp->phy_type != phy_mii_mdio1)
+		goto non_mii;
 
 	/* Setup advertise */
 	if (found_mii_phy(gp))
@@ -1410,7 +1410,7 @@ static int gem_set_link_modes(struct gem *gp)
 
 	if (gp->phy_type == phy_serialink ||
 	    gp->phy_type == phy_serdes) {
- 		u32 pcs_lpa = readl(gp->regs + PCS_MIILP);
+		u32 pcs_lpa = readl(gp->regs + PCS_MIILP);
 
 		if (pcs_lpa & (PCS_MIIADV_SP | PCS_MIIADV_AP))
 			pause = 1;
@@ -1892,7 +1892,7 @@ static void gem_init_mac(struct gem *gp)
 
 static void gem_init_pause_thresholds(struct gem *gp)
 {
-       	u32 cfg;
+	u32 cfg;
 
 	/* Calculate pause thresholds.  Setting the OFF threshold to the
 	 * full RX fifo size effectively disables PAUSE generation which
@@ -1914,15 +1914,15 @@ static void gem_init_pause_thresholds(struct gem *gp)
 	/* Configure the chip "burst" DMA mode & enable some
 	 * HW bug fixes on Apple version
 	 */
-       	cfg  = 0;
-       	if (gp->pdev->vendor == PCI_VENDOR_ID_APPLE)
+	cfg  = 0;
+	if (gp->pdev->vendor == PCI_VENDOR_ID_APPLE)
 		cfg |= GREG_CFG_RONPAULBIT | GREG_CFG_ENBUG2FIX;
 #if !defined(CONFIG_SPARC64) && !defined(CONFIG_ALPHA)
-       	cfg |= GREG_CFG_IBURST;
+	cfg |= GREG_CFG_IBURST;
 #endif
-       	cfg |= ((31 << 1) & GREG_CFG_TXDMALIM);
-       	cfg |= ((31 << 6) & GREG_CFG_RXDMALIM);
-       	writel(cfg, gp->regs + GREG_CFG);
+	cfg |= ((31 << 1) & GREG_CFG_TXDMALIM);
+	cfg |= ((31 << 6) & GREG_CFG_RXDMALIM);
+	writel(cfg, gp->regs + GREG_CFG);
 
 	/* If Infinite Burst didn't stick, then use different
 	 * thresholds (and Apple bug fixes don't exist)
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 54b53dbdb33c..a2c1a404c52d 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -2286,8 +2286,8 @@ static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb,
 					 struct net_device *dev)
 {
 	struct happy_meal *hp = netdev_priv(dev);
- 	int entry;
- 	u32 tx_flags;
+	int entry;
+	u32 tx_flags;
 
 	tx_flags = TXFLAG_OWN;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -2301,7 +2301,7 @@ static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb,
 
 	spin_lock_irq(&hp->happy_lock);
 
- 	if (TX_BUFFS_AVAIL(hp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+	if (TX_BUFFS_AVAIL(hp) <= (skb_shinfo(skb)->nr_frags + 1)) {
 		netif_stop_queue(dev);
 		spin_unlock_irq(&hp->happy_lock);
 		printk(KERN_ERR "%s: BUG! Tx Ring full when queue awake!\n",

From d1e4916fa703d2bd8d1d75979ea61d3b3d22f125 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:50 +0800
Subject: [PATCH 0325/3292] net: fealnx: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/fealnx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index 0908771aa9ac..0f141c14d72d 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -144,7 +144,7 @@ struct chip_info {
 };
 
 static const struct chip_info skel_netdrv_tbl[] = {
- 	{ "100/10M Ethernet PCI Adapter",	HAS_MII_XCVR },
+	{ "100/10M Ethernet PCI Adapter",	HAS_MII_XCVR },
 	{ "100/10M Ethernet PCI Adapter",	HAS_CHIP_XCVR },
 	{ "1000/100/10M Ethernet PCI Adapter",	HAS_MII_XCVR },
 };

From 223f02acce1abeda94d41dec3e622a56a29843ac Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:51 +0800
Subject: [PATCH 0326/3292] net: xircom: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xircom/xirc2ps_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index 2049d76a0e68..4f6db6f5c272 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -1232,7 +1232,7 @@ do_start_xmit(struct sk_buff *skb, struct net_device *dev)
     if (pktlen < ETH_ZLEN)
     {
         if (skb_padto(skb, ETH_ZLEN))
-        	return NETDEV_TX_OK;
+		return NETDEV_TX_OK;
 	pktlen = ETH_ZLEN;
     }
 

From 59909c1ab71d92f8bec0c69ece2552aaf44bedce Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:52 +0800
Subject: [PATCH 0327/3292] net: 8390: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Armin Wolf <W_Armin@gmx.de>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/8390/axnet_cs.c  | 14 +++++++-------
 drivers/net/ethernet/8390/pcnet_cs.c  |  2 +-
 drivers/net/ethernet/8390/smc-ultra.c |  6 +++---
 drivers/net/ethernet/8390/stnic.c     |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 2488bfdb9133..8c321dfc7b3b 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -767,7 +767,7 @@ module_pcmcia_driver(axnet_cs_driver);
   Paul Gortmaker	: tweak ANK's above multicast changes a bit.
   Paul Gortmaker	: update packet statistics for v2.1.x
   Alan Cox		: support arbitrary stupid port mappings on the
-  			  68K Macintosh. Support >16bit I/O spaces
+			  68K Macintosh. Support >16bit I/O spaces
   Paul Gortmaker	: add kmod support for auto-loading of the 8390
 			  module by all drivers that require it.
   Alan Cox		: Spinlocking work, added 'BUG_83C690'
@@ -1091,7 +1091,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
 	long e8390_base;
 	int interrupts, nr_serviced = 0, i;
 	struct ei_device *ei_local;
-    	int handled = 0;
+	int handled = 0;
 	unsigned long flags;
 
 	e8390_base = dev->base_addr;
@@ -1587,12 +1587,12 @@ static void do_set_multicast_list(struct net_device *dev)
 	}
 	outb_p(E8390_NODMA + E8390_PAGE0, e8390_base + E8390_CMD);
 
-  	if(dev->flags&IFF_PROMISC)
-  		outb_p(E8390_RXCONFIG | 0x58, e8390_base + EN0_RXCR);
+	if(dev->flags&IFF_PROMISC)
+		outb_p(E8390_RXCONFIG | 0x58, e8390_base + EN0_RXCR);
 	else if (dev->flags & IFF_ALLMULTI || !netdev_mc_empty(dev))
-  		outb_p(E8390_RXCONFIG | 0x48, e8390_base + EN0_RXCR);
-  	else
-  		outb_p(E8390_RXCONFIG | 0x40, e8390_base + EN0_RXCR);
+		outb_p(E8390_RXCONFIG | 0x48, e8390_base + EN0_RXCR);
+	else
+		outb_p(E8390_RXCONFIG | 0x40, e8390_base + EN0_RXCR);
 
 	outb_p(E8390_NODMA+E8390_PAGE0+E8390_START, e8390_base+E8390_CMD);
 }
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 9d3b1e0e425c..cac036706382 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -1527,7 +1527,7 @@ static const struct pcmcia_device_id pcnet_ids[] = {
 	PCMCIA_DEVICE_PROD_ID12("ACCTON", "EN2216-PCMCIA-ETHERNET", 0xdfc6b5b2, 0x5542bfff),
 	PCMCIA_DEVICE_PROD_ID12("Allied Telesis, K.K.", "CentreCOM LA100-PCM-T V2 100/10M LAN PC Card", 0xbb7fbdd7, 0xcd91cc68),
 	PCMCIA_DEVICE_PROD_ID12("Allied Telesis K.K.", "LA100-PCM V2", 0x36634a66, 0xc6d05997),
-  	PCMCIA_DEVICE_PROD_ID12("Allied Telesis, K.K.", "CentreCOM LA-PCM_V2", 0xbb7fBdd7, 0x28e299f8),
+	PCMCIA_DEVICE_PROD_ID12("Allied Telesis, K.K.", "CentreCOM LA-PCM_V2", 0xbb7fBdd7, 0x28e299f8),
 	PCMCIA_DEVICE_PROD_ID12("Allied Telesis K.K.", "LA-PCM V3", 0x36634a66, 0x62241d96),
 	PCMCIA_DEVICE_PROD_ID12("AmbiCom", "AMB8010", 0x5070a7f9, 0x82f96e96),
 	PCMCIA_DEVICE_PROD_ID12("AmbiCom", "AMB8610", 0x5070a7f9, 0x86741224),
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 3fe3b4dfa7c5..1d8ed7357b7f 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -347,11 +347,11 @@ static int __init ultra_probe_isapnp(struct net_device *dev)
                                             idev))) {
                         /* Avoid already found cards from previous calls */
                         if (pnp_device_attach(idev) < 0)
-                        	continue;
+				continue;
                         if (pnp_activate_dev(idev) < 0) {
                               __again:
-                        	pnp_device_detach(idev);
-                        	continue;
+				pnp_device_detach(idev);
+				continue;
                         }
 			/* if no io and irq, search for next */
 			if (!pnp_port_valid(idev, 0) || !pnp_irq_valid(idev, 0))
diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c
index 1f0670cd3ea3..fbbd7f22c142 100644
--- a/drivers/net/ethernet/8390/stnic.c
+++ b/drivers/net/ethernet/8390/stnic.c
@@ -114,7 +114,7 @@ static int __init stnic_probe(void)
   /* New style probing API */
   dev = alloc_ei_netdev();
   if (!dev)
-  	return -ENOMEM;
+	return -ENOMEM;
 
 #ifdef CONFIG_SH_STANDARD_BIOS
   sh_bios_get_node_addr (stnic_eadr);

From 2174fbd719148f2b88d85c6a4f6195df42978d5f Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:30:53 +0800
Subject: [PATCH 0328/3292] net: fujitsu: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index a7b7a4aace79..7e451e61eefd 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -812,9 +812,9 @@ static netdev_tx_t fjn_start_xmit(struct sk_buff *skb,
     
     if (length < ETH_ZLEN)
     {
-    	if (skb_padto(skb, ETH_ZLEN))
-    		return NETDEV_TX_OK;
-    	length = ETH_ZLEN;
+	if (skb_padto(skb, ETH_ZLEN))
+		return NETDEV_TX_OK;
+	length = ETH_ZLEN;
     }
 
     netif_stop_queue(dev);

From 4b63b27fc59ab9fd4057e2c6efe8cfadbe3d1448 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Wed, 19 May 2021 14:14:41 +0800
Subject: [PATCH 0329/3292] net: e1000: remove repeated word "slot" for
 e1000_main.c

There are double "slot" in comment, so remove the redundant one.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 042de276e632..c2a109126c27 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -5245,7 +5245,7 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
 	if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
 		pci_disable_device(pdev);
 
-	/* Request a slot slot reset. */
+	/* Request a slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
 }
 

From e77471f1de0ddba226088ec0cea1c5b1bca0a1de Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Wed, 19 May 2021 14:14:42 +0800
Subject: [PATCH 0330/3292] net: e1000: remove repeated words for e1000_hw.c

There are double "in" and "to" in comments, so remove the redundant one.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000/e1000_hw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 19cf36360933..1042e79a1397 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -2522,7 +2522,7 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
 				 * turn it on. For compatibility with a TBI link
 				 * partner, we will store bad packets. Some
 				 * frames have an additional byte on the end and
-				 * will look like CRC errors to to the hardware.
+				 * will look like CRC errors to the hardware.
 				 */
 				if (!hw->tbi_compatibility_on) {
 					hw->tbi_compatibility_on = true;
@@ -2723,7 +2723,7 @@ static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, u32 data, u16 count)
  * e1000_shift_in_mdi_bits - Shifts data bits in from the PHY
  * @hw: Struct containing variables accessed by shared code
  *
- * Bits are shifted in in MSB to LSB order.
+ * Bits are shifted in MSB to LSB order.
  */
 static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw)
 {

From 59398afda1761ad849b437e514af54ce9b74acc6 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Wed, 19 May 2021 14:14:43 +0800
Subject: [PATCH 0331/3292] net: e1000e: remove repeated word "the" for
 ich8lan.c

There are double "the" in comment, so remove the redundant one.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 590ad110d383..cf7b3887da1d 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -4639,7 +4639,7 @@ static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw)
  *  @hw: pointer to the HW structure
  *
  *  ICH8 use the PCI Express bus, but does not contain a PCI Express Capability
- *  register, so the the bus width is hard coded.
+ *  register, so the bus width is hard coded.
  **/
 static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw)
 {

From 800b74a57363d2239a550972558a6e97af9e5903 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Wed, 19 May 2021 14:14:44 +0800
Subject: [PATCH 0332/3292] net: e1000e: remove repeated word "slot" for
 netdev.c

There are double "slot" in comment, so remove the redundant one.

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 88e9035b75cf..5435606149b0 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -7118,7 +7118,7 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
 
 	pci_disable_device(pdev);
 
-	/* Request a slot slot reset. */
+	/* Request a slot reset. */
 	return PCI_ERS_RESULT_NEED_RESET;
 }
 

From 0d27895bcbb4fc04ec2ff37f012d41784e014453 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Wed, 19 May 2021 14:14:45 +0800
Subject: [PATCH 0333/3292] net: e1000e: fix misspell word "retreived"

There is a misspell word "retreived" in comment, so fix it to "retrieved".

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000e/phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 1db35b2c7750..0f0efee5fc8e 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -2978,7 +2978,7 @@ static u32 e1000_get_phy_addr_for_hv_page(u32 page)
  *  @data: pointer to the data to be read or written
  *  @read: determines if operation is read or write
  *
- *  Reads the PHY register at offset and stores the retreived information
+ *  Reads the PHY register at offset and stores the retrieved information
  *  in data.  Assumes semaphore already acquired.  Note that the procedure
  *  to access these regs uses the address port and data port to read/write.
  *  These accesses done with PHY address 2 and without using pages.

From f7e0318a314f9271b0f0cdd4bfdc691976976d8c Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Wed, 19 May 2021 15:34:38 +0800
Subject: [PATCH 0334/3292] netlabel: remove unused parameter in
 netlbl_netlink_auditinfo()

loginuid/sessionid/secid have been read from 'current' instead of struct
netlink_skb_parms, the parameter 'skb' seems no longer needed.

Fixes: c53fa1ed92cd ("netlink: kill loginuid/sessionid/sid members from struct netlink_skb_parms")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_calipso.c   |  4 ++--
 net/netlabel/netlabel_cipso_v4.c  |  4 ++--
 net/netlabel/netlabel_mgmt.c      |  8 ++++----
 net/netlabel/netlabel_unlabeled.c | 10 +++++-----
 net/netlabel/netlabel_user.h      |  4 +---
 5 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index f28c8947c730..91a19c3ea1a3 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -105,7 +105,7 @@ static int netlbl_calipso_add(struct sk_buff *skb, struct genl_info *info)
 	    !info->attrs[NLBL_CALIPSO_A_MTYPE])
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 	switch (nla_get_u32(info->attrs[NLBL_CALIPSO_A_MTYPE])) {
 	case CALIPSO_MAP_PASS:
 		ret_val = netlbl_calipso_add_pass(info, &audit_info);
@@ -287,7 +287,7 @@ static int netlbl_calipso_remove(struct sk_buff *skb, struct genl_info *info)
 	if (!info->attrs[NLBL_CALIPSO_A_DOI])
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 	cb_arg.doi = nla_get_u32(info->attrs[NLBL_CALIPSO_A_DOI]);
 	cb_arg.audit_info = &audit_info;
 	ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 4f50a64315cf..baf235721c43 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -410,7 +410,7 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 	    !info->attrs[NLBL_CIPSOV4_A_MTYPE])
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 	switch (nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE])) {
 	case CIPSO_V4_MAP_TRANS:
 		ret_val = netlbl_cipsov4_add_std(info, &audit_info);
@@ -709,7 +709,7 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 	if (!info->attrs[NLBL_CIPSOV4_A_DOI])
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 	cb_arg.doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
 	cb_arg.audit_info = &audit_info;
 	ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index ca52f5085989..e664ab990941 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -434,7 +434,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 	     (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	return netlbl_mgmt_add_common(info, &audit_info);
 }
@@ -457,7 +457,7 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
 	if (!info->attrs[NLBL_MGMT_A_DOMAIN])
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
 	return netlbl_domhsh_remove(domain, AF_UNSPEC, &audit_info);
@@ -557,7 +557,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 	     (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	return netlbl_mgmt_add_common(info, &audit_info);
 }
@@ -576,7 +576,7 @@ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
 {
 	struct netlbl_audit audit_info;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	return netlbl_domhsh_remove_default(AF_UNSPEC, &audit_info);
 }
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 3e6ac9b790b1..2483df0bbd7c 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -814,7 +814,7 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
 		value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
 		if (value == 1 || value == 0) {
-			netlbl_netlink_auditinfo(skb, &audit_info);
+			netlbl_netlink_auditinfo(&audit_info);
 			netlbl_unlabel_acceptflg_set(value, &audit_info);
 			return 0;
 		}
@@ -897,7 +897,7 @@ static int netlbl_unlabel_staticadd(struct sk_buff *skb,
 	       !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
 	if (ret_val != 0)
@@ -947,7 +947,7 @@ static int netlbl_unlabel_staticadddef(struct sk_buff *skb,
 	       !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
 	if (ret_val != 0)
@@ -994,7 +994,7 @@ static int netlbl_unlabel_staticremove(struct sk_buff *skb,
 	       !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
 	if (ret_val != 0)
@@ -1034,7 +1034,7 @@ static int netlbl_unlabel_staticremovedef(struct sk_buff *skb,
 	       !info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
 		return -EINVAL;
 
-	netlbl_netlink_auditinfo(skb, &audit_info);
+	netlbl_netlink_auditinfo(&audit_info);
 
 	ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
 	if (ret_val != 0)
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index b9ba8112b3c5..6190cbf94bf0 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -28,11 +28,9 @@
 
 /**
  * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg
- * @skb: the packet
  * @audit_info: NetLabel audit information
  */
-static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
-					    struct netlbl_audit *audit_info)
+static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info)
 {
 	security_task_getsecid_subj(current, &audit_info->secid);
 	audit_info->loginuid = audit_get_loginuid(current);

From 9cc52f5a533a321136b9e447042ad9f8224f738c Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan@gerhold.net>
Date: Wed, 19 May 2021 11:16:12 +0200
Subject: [PATCH 0335/3292] dt-bindings: net: nfc: s3fwrn5: Add optional clock

On some systems, S3FWRN5 depends on having an external clock enabled
to function correctly. Allow declaring that clock in the device tree.

Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml         | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
index 477066e2b821..081742c2b726 100644
--- a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
+++ b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
@@ -27,6 +27,9 @@ properties:
   reg:
     maxItems: 1
 
+  clocks:
+    maxItems: 1
+
   wake-gpios:
     maxItems: 1
     description:
@@ -80,6 +83,8 @@ examples:
 
             en-gpios = <&gpf1 4 GPIO_ACTIVE_HIGH>;
             wake-gpios = <&gpj0 2 GPIO_ACTIVE_HIGH>;
+
+            clocks = <&rpmcc 20>;
         };
     };
   # UART example on Raspberry Pi

From 340f42f7ff0b87a92e69b50706a6c872da756c89 Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan@gerhold.net>
Date: Wed, 19 May 2021 11:16:13 +0200
Subject: [PATCH 0336/3292] nfc: s3fwrn5: i2c: Enable optional clock from
 device tree

S3FWRN5 depends on a clock input ("XI" pin) to function properly.
Depending on the hardware configuration this could be an always-on
oscillator or some external clock that must be explicitly enabled.

So far we assumed that the clock is always-on.
Make the driver request an (optional) clock from the device tree
and make sure the clock is running before starting S3FWRN5.

Note: S3FWRN5 asserts "GPIO2" whenever it needs the clock input to
function correctly. On some hardware configurations, GPIO2 is
connected directly to an input pin of the external clock provider
(e.g. the main PMIC of the SoC). In that case, it can automatically
AND the clock enable bit and clock request from S3FWRN5 so that
the clock is actually only enabled when needed.

It is also conceivable that on some other hardware configuration
S3FWRN5's GPIO2 might be connected as a regular GPIO input
of the SoC. In that case, follow-up patches could extend the
driver to request the GPIO, set up an interrupt and only enable
the clock when requested by S3FWRN5.

Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/s3fwrn5/i2c.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c
index 897394167522..38b8d6cab593 100644
--- a/drivers/nfc/s3fwrn5/i2c.c
+++ b/drivers/nfc/s3fwrn5/i2c.c
@@ -6,6 +6,7 @@
  * Robert Baldyga <r.baldyga@samsung.com>
  */
 
+#include <linux/clk.h>
 #include <linux/i2c.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
@@ -22,6 +23,7 @@
 struct s3fwrn5_i2c_phy {
 	struct phy_common common;
 	struct i2c_client *i2c_dev;
+	struct clk *clk;
 
 	unsigned int irq_skip:1;
 };
@@ -207,17 +209,40 @@ static int s3fwrn5_i2c_probe(struct i2c_client *client,
 	if (ret < 0)
 		return ret;
 
+	phy->clk = devm_clk_get_optional(&client->dev, NULL);
+	if (IS_ERR(phy->clk))
+		return dev_err_probe(&client->dev, PTR_ERR(phy->clk),
+				     "failed to get clock\n");
+
+	/*
+	 * S3FWRN5 depends on a clock input ("XI" pin) to function properly.
+	 * Depending on the hardware configuration this could be an always-on
+	 * oscillator or some external clock that must be explicitly enabled.
+	 * Make sure the clock is running before starting S3FWRN5.
+	 */
+	ret = clk_prepare_enable(phy->clk);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed to enable clock: %d\n", ret);
+		return ret;
+	}
+
 	ret = s3fwrn5_probe(&phy->common.ndev, phy, &phy->i2c_dev->dev,
 			    &i2c_phy_ops);
 	if (ret < 0)
-		return ret;
+		goto disable_clk;
 
 	ret = devm_request_threaded_irq(&client->dev, phy->i2c_dev->irq, NULL,
 		s3fwrn5_i2c_irq_thread_fn, IRQF_ONESHOT,
 		S3FWRN5_I2C_DRIVER_NAME, phy);
 	if (ret)
-		s3fwrn5_remove(phy->common.ndev);
+		goto s3fwrn5_remove;
 
+	return 0;
+
+s3fwrn5_remove:
+	s3fwrn5_remove(phy->common.ndev);
+disable_clk:
+	clk_disable_unprepare(phy->clk);
 	return ret;
 }
 
@@ -226,6 +251,7 @@ static int s3fwrn5_i2c_remove(struct i2c_client *client)
 	struct s3fwrn5_i2c_phy *phy = i2c_get_clientdata(client);
 
 	s3fwrn5_remove(phy->common.ndev);
+	clk_disable_unprepare(phy->clk);
 
 	return 0;
 }

From eb0e4d59b6edbe678ecfc5d5b77608b634057f08 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 19 May 2021 15:08:18 +0300
Subject: [PATCH 0337/3292] net: Add notifications when multipath hash field
 change

In-kernel notifications are already sent when the multipath hash policy
itself changes, but not when the multipath hash fields change.

Add these notifications, so that interested listeners (e.g., switch ASIC
drivers) could perform the necessary configuration.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 18 +++++++++++++++++-
 net/ipv6/sysctl_net_ipv6.c | 18 +++++++++++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ffb38ea06841..4fa77f182dcb 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -465,6 +465,22 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 
 	return ret;
 }
+
+static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
+					  void *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	struct net *net;
+	int ret;
+
+	net = container_of(table->data, struct net,
+			   ipv4.sysctl_fib_multipath_hash_fields);
+	ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
+
+	return ret;
+}
 #endif
 
 static struct ctl_table ipv4_table[] = {
@@ -1061,7 +1077,7 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_fib_multipath_hash_fields,
 		.maxlen		= sizeof(u32),
 		.mode		= 0644,
-		.proc_handler	= proc_douintvec_minmax,
+		.proc_handler	= proc_fib_multipath_hash_fields,
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &fib_multipath_hash_fields_all_mask,
 	},
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 160bea5db973..d7cf26f730d7 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -44,6 +44,22 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
 	return ret;
 }
 
+static int
+proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer,
+			       size_t *lenp, loff_t *ppos)
+{
+	struct net *net;
+	int ret;
+
+	net = container_of(table->data, struct net,
+			   ipv6.sysctl.multipath_hash_fields);
+	ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+	return ret;
+}
+
 static struct ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "bindv6only",
@@ -160,7 +176,7 @@ static struct ctl_table ipv6_table_template[] = {
 		.data		= &init_net.ipv6.sysctl.multipath_hash_fields,
 		.maxlen		= sizeof(u32),
 		.mode		= 0644,
-		.proc_handler	= proc_douintvec_minmax,
+		.proc_handler	= proc_rt6_multipath_hash_fields,
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &rt6_multipath_hash_fields_all_mask,
 	},

From 7725c1c8f73260de2ef0d01ca23b64260fc66ffd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 19 May 2021 15:08:19 +0300
Subject: [PATCH 0338/3292] mlxsw: spectrum_router: Replace if statement with a
 switch statement

The code was written when only two multipath hash policies were present,
so the if statement was sufficient. The next patch and future patches
are going to add support for more policies, so move to a switch
statement.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 67 ++++++++++---------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index ec2af77a126d..1762a790dd34 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -9612,41 +9612,48 @@ static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
 {
 	struct net *net = mlxsw_sp_net(mlxsw_sp);
-	bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
 
-	mlxsw_sp_mp_hash_header_set(recr2_pl,
-				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
-	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
-	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
-	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
-	if (only_l3)
-		return;
-	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
-	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
-	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
-	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
+	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+	case 0:
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
+		mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
+		mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
+		break;
+	case 1:
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
+		mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
+		mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
+		break;
+	}
 }
 
 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
 {
-	bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
-
-	mlxsw_sp_mp_hash_header_set(recr2_pl,
-				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
-	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
-	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
-	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
-	if (only_l3) {
-		mlxsw_sp_mp_hash_field_set(recr2_pl,
-					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
-	} else {
-		mlxsw_sp_mp_hash_header_set(recr2_pl,
-					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
-		mlxsw_sp_mp_hash_field_set(recr2_pl,
-					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
-		mlxsw_sp_mp_hash_field_set(recr2_pl,
-					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
+	switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
+	case 0:
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
+		mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
+		mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+		break;
+	case 1:
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
+		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+		mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
+		mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
+		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
+		break;
 	}
 }
 

From 9d23d3eb6f4134f19947e6319b79ce1e440aba98 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 19 May 2021 15:08:20 +0300
Subject: [PATCH 0339/3292] mlxsw: spectrum_router: Move multipath hash
 configuration to a bitmap

Currently, the multipath hash configuration is written directly to the
register payload. While this is OK for the two currently supported
policies, it is going to be hard to follow when more policies and more
packet fields are added.

Instead, set the required headers and fields in a bitmap and then dump
it to the register payload.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h     | 46 +--------
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 98 ++++++++++++-------
 2 files changed, 64 insertions(+), 80 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 900b4bf5bb5b..4039c9d21824 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -8305,6 +8305,8 @@ enum {
 	MLXSW_REG_RECR2_TCP_UDP_EN_IPV4		= 7,
 	/* Enable TCP/UDP header fields if packet is IPv6 */
 	MLXSW_REG_RECR2_TCP_UDP_EN_IPV6		= 8,
+
+	__MLXSW_REG_RECR2_HEADER_CNT,
 };
 
 /* reg_recr2_outer_header_enables
@@ -8339,6 +8341,8 @@ enum {
 	MLXSW_REG_RECR2_TCP_UDP_SPORT			= 74,
 	/* TCP/UDP Destination Port */
 	MLXSW_REG_RECR2_TCP_UDP_DPORT			= 75,
+
+	__MLXSW_REG_RECR2_FIELD_CNT,
 };
 
 /* reg_recr2_outer_header_fields_enable
@@ -8347,48 +8351,6 @@ enum {
  */
 MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1);
 
-static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload)
-{
-	int i;
-
-	for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++)
-		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
-							       true);
-}
-
-static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload)
-{
-	int i;
-
-	for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++)
-		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
-							       true);
-}
-
-static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload)
-{
-	int i = MLXSW_REG_RECR2_IPV6_SIP0_7;
-
-	mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
-
-	i = MLXSW_REG_RECR2_IPV6_SIP8;
-	for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++)
-		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
-							       true);
-}
-
-static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload)
-{
-	int i = MLXSW_REG_RECR2_IPV6_DIP0_7;
-
-	mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
-
-	i = MLXSW_REG_RECR2_IPV6_DIP8;
-	for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++)
-		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
-							       true);
-}
-
 static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed)
 {
 	MLXSW_REG_ZERO(recr2, payload);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1762a790dd34..3f896c5e50c7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -9599,73 +9599,95 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
 }
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
-{
-	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
-}
+struct mlxsw_sp_mp_hash_config {
+	DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
+	DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
+};
 
-static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
-{
-	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
-}
+#define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
+	bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
 
-static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
+#define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
+	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
+
+#define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
+	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
+
+static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_mp_hash_config *config)
 {
 	struct net *net = mlxsw_sp_net(mlxsw_sp);
+	unsigned long *headers = config->headers;
+	unsigned long *fields = config->fields;
 
 	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
 	case 0:
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
-		mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
-		mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
 		break;
 	case 1:
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
-		mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
-		mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
 		break;
 	}
 }
 
-static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
+static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_mp_hash_config *config)
 {
+	unsigned long *headers = config->headers;
+	unsigned long *fields = config->fields;
+
 	switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
 	case 0:
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
-		mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
-		mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
 		break;
 	case 1:
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
-		mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
-		mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
-		mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
-		mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
 		break;
 	}
 }
 
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_sp_mp_hash_config config = {};
 	char recr2_pl[MLXSW_REG_RECR2_LEN];
+	unsigned long bit;
 	u32 seed;
 
 	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
 	mlxsw_reg_recr2_pack(recr2_pl, seed);
-	mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
-	mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
+	mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
+	mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
+
+	for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
+		mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
+	for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
+		mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
 }

From 28bc824807a5cb95edb46807c210dfff37a3a0b3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 19 May 2021 15:08:21 +0300
Subject: [PATCH 0340/3292] mlxsw: reg: Add inner packet fields to RECRv2
 register

The RECRv2 register is used for setting up the router's ECMP hash
configuration. Extend it with inner packet fields to allow the ECMP hash
to be calculated based on inner flow information.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 42 +++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 4039c9d21824..f9419cc53480 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -8351,6 +8351,48 @@ enum {
  */
 MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1);
 
+/* reg_recr2_inner_header_enables
+ * Bit mask where each bit enables a specific inner layer to be included in the
+ * hash calculation. Same values as reg_recr2_outer_header_enables.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_enables, 0x2C, 0x04, 1);
+
+enum {
+	/* Inner IPv4 Source IP */
+	MLXSW_REG_RECR2_INNER_IPV4_SIP0			= 3,
+	MLXSW_REG_RECR2_INNER_IPV4_SIP3			= 6,
+	/* Inner IPv4 Destination IP */
+	MLXSW_REG_RECR2_INNER_IPV4_DIP0			= 7,
+	MLXSW_REG_RECR2_INNER_IPV4_DIP3			= 10,
+	/* Inner IP Protocol */
+	MLXSW_REG_RECR2_INNER_IPV4_PROTOCOL		= 11,
+	/* Inner IPv6 Source IP */
+	MLXSW_REG_RECR2_INNER_IPV6_SIP0_7		= 12,
+	MLXSW_REG_RECR2_INNER_IPV6_SIP8			= 20,
+	MLXSW_REG_RECR2_INNER_IPV6_SIP15		= 27,
+	/* Inner IPv6 Destination IP */
+	MLXSW_REG_RECR2_INNER_IPV6_DIP0_7		= 28,
+	MLXSW_REG_RECR2_INNER_IPV6_DIP8			= 36,
+	MLXSW_REG_RECR2_INNER_IPV6_DIP15		= 43,
+	/* Inner IPv6 Next Header */
+	MLXSW_REG_RECR2_INNER_IPV6_NEXT_HEADER		= 44,
+	/* Inner IPv6 Flow Label */
+	MLXSW_REG_RECR2_INNER_IPV6_FLOW_LABEL		= 45,
+	/* Inner TCP/UDP Source Port */
+	MLXSW_REG_RECR2_INNER_TCP_UDP_SPORT		= 46,
+	/* Inner TCP/UDP Destination Port */
+	MLXSW_REG_RECR2_INNER_TCP_UDP_DPORT		= 47,
+
+	__MLXSW_REG_RECR2_INNER_FIELD_CNT,
+};
+
+/* reg_recr2_inner_header_fields_enable
+ * Inner packet fields to enable for ECMP hash subject to inner_header_enables.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_fields_enable, 0x30, 0x08, 1);
+
 static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed)
 {
 	MLXSW_REG_ZERO(recr2, payload);

From b7b8f435ea3b33ba7067f992c5b85a62f24d19ed Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 19 May 2021 15:08:22 +0300
Subject: [PATCH 0341/3292] mlxsw: spectrum_outer: Factor out helper for common
 outer fields

Outer IPv4 and IPv6 addresses are used by multiple multipath hash
policies. Factor out helpers that set these fields to increase code
sharing between different policies.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 48 +++++++++++--------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 3f896c5e50c7..605515137636 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -9613,6 +9613,17 @@ struct mlxsw_sp_mp_hash_config {
 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
 
+static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
+{
+	unsigned long *headers = config->headers;
+	unsigned long *fields = config->fields;
+
+	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
+	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
+}
+
 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_mp_hash_config *config)
 {
@@ -9622,17 +9633,11 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
 
 	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
 	case 0:
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
+		mlxsw_sp_mp4_hash_outer_addr(config);
 		break;
 	case 1:
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
+		mlxsw_sp_mp4_hash_outer_addr(config);
 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
@@ -9640,6 +9645,19 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
+static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
+{
+	unsigned long *headers = config->headers;
+	unsigned long *fields = config->fields;
+
+	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
+	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
+	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
+	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
+}
+
 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_mp_hash_config *config)
 {
@@ -9648,23 +9666,13 @@ static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
 
 	switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
 	case 0:
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
-		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
-		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
+		mlxsw_sp_mp6_hash_outer_addr(config);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
 		break;
 	case 1:
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
-		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
+		mlxsw_sp_mp6_hash_outer_addr(config);
 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
-		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
-		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
-		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);

From 01848e05f8bbff2d799073b307fe2eb42bee764b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 19 May 2021 15:08:23 +0300
Subject: [PATCH 0342/3292] mlxsw: spectrum_router: Add support for inner layer
 3 multipath hash policy

When this policy is set, the kernel uses the inner layer 3 fields for
multipath hash computation and falls back to the outer fields if no
encapsulation was encountered. This behavior is most likely influenced
by the behavior of the flow dissector, which is used for the packet
dissection.

The Spectrum ASIC, however, cannot fallback to outer fields if inner
fields are not available. This should not result in a discrepancy from
the software data path because if several flows have matching inner
fields, they will tend to have matching outer fields as well.

Therefore, implement this policy by enabling both outer and inner layer
3 fields for the multipath hash computation.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 605515137636..bacac94398dd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -9602,6 +9602,8 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
 struct mlxsw_sp_mp_hash_config {
 	DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
 	DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
+	DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
+	DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
 };
 
 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
@@ -9613,6 +9615,27 @@ struct mlxsw_sp_mp_hash_config {
 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
 
+static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
+{
+	unsigned long *inner_headers = config->inner_headers;
+	unsigned long *inner_fields = config->inner_fields;
+
+	/* IPv4 inner */
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
+	/* IPv6 inner */
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
+	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
+	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
+	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
+	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
+	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
+}
+
 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
 {
 	unsigned long *headers = config->headers;
@@ -9642,6 +9665,12 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
 		break;
+	case 2:
+		/* Outer */
+		mlxsw_sp_mp4_hash_outer_addr(config);
+		/* Inner */
+		mlxsw_sp_mp_hash_inner_l3(config);
+		break;
 	}
 }
 
@@ -9677,6 +9706,14 @@ static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
 		break;
+	case 2:
+		/* Outer */
+		mlxsw_sp_mp6_hash_outer_addr(config);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
+		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
+		/* Inner */
+		mlxsw_sp_mp_hash_inner_l3(config);
+		break;
 	}
 }
 
@@ -9696,6 +9733,10 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 		mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
 	for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
 		mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
+	for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
+		mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
+	for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
+		mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
 }

From daeabf89eb892cf827608177ecae7ca9389c195a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 19 May 2021 15:08:24 +0300
Subject: [PATCH 0343/3292] mlxsw: spectrum_router: Add support for custom
 multipath hash policy

When this policy is set, only enable the packet fields that were enabled
by user space for multipath hash computation.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index bacac94398dd..6decc5a43f98 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -9647,12 +9647,53 @@ static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
 }
 
+static void
+mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
+			      u32 hash_fields)
+{
+	unsigned long *inner_headers = config->inner_headers;
+	unsigned long *inner_fields = config->inner_fields;
+
+	/* IPv4 Inner */
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
+		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
+	/* IPv6 inner */
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
+		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
+	}
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
+		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
+		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
+	}
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
+		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
+		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
+	/* L4 inner */
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
+	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
+		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
+	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
+}
+
 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_mp_hash_config *config)
 {
 	struct net *net = mlxsw_sp_net(mlxsw_sp);
 	unsigned long *headers = config->headers;
 	unsigned long *fields = config->fields;
+	u32 hash_fields;
 
 	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
 	case 0:
@@ -9671,6 +9712,25 @@ static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
 		/* Inner */
 		mlxsw_sp_mp_hash_inner_l3(config);
 		break;
+	case 3:
+		hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+		/* Outer */
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
+		/* Inner */
+		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
+		break;
 	}
 }
 
@@ -9690,6 +9750,7 @@ static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_mp_hash_config *config)
 {
+	u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
 	unsigned long *headers = config->headers;
 	unsigned long *fields = config->fields;
 
@@ -9714,6 +9775,30 @@ static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
 		/* Inner */
 		mlxsw_sp_mp_hash_inner_l3(config);
 		break;
+	case 3:
+		/* Outer */
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
+		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
+			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
+		}
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
+			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
+		}
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
+		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
+		/* Inner */
+		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
+		break;
 	}
 }
 

From 05ff8435e50569a0a6b95e5ceaea43696e8827ab Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 19 May 2021 15:17:21 +0200
Subject: [PATCH 0344/3292] net/sched: cls_api: increase max_reclassify_loop

modern userspace applications, like OVN, can configure the TC datapath to
"recirculate" packets several times. If more than 4 "recirculation" rules
are configured, packets can be dropped by __tcf_classify().
Changing the maximum number of reclassifications (from 4 to 16) should be
sufficient to prevent drops in most use cases, and guard against loops at
the same time.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 40fbea626dfd..75e3a288a7c8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1531,7 +1531,7 @@ static inline int __tcf_classify(struct sk_buff *skb,
 				 u32 *last_executed_chain)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	const int max_reclassify_loop = 4;
+	const int max_reclassify_loop = 16;
 	const struct tcf_proto *first_tp;
 	int limit = 0;
 

From 20e76d3d044d936998617f8acd7e77bebd9ca703 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 19 May 2021 14:16:27 +0000
Subject: [PATCH 0345/3292] net: ethernet: ixp4xx: Fix return value check in
 ixp4xx_eth_probe()

In case of error, the function mdiobus_get_phy() returns NULL
pointer not ERR_PTR(). The IS_ERR() test in the return value
check should be replaced with NULL test.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xscale/ixp4xx_eth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index cb89323855d8..1ecceeb9700d 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1531,8 +1531,8 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 		phydev = of_phy_get_and_connect(ndev, np, ixp4xx_adjust_link);
 	} else {
 		phydev = mdiobus_get_phy(mdio_bus, plat->phy);
-		if (IS_ERR(phydev)) {
-			err = PTR_ERR(phydev);
+		if (!phydev) {
+			err = -ENODEV;
 			dev_err(dev, "could not connect phydev (%d)\n", err);
 			goto err_free_mem;
 		}

From a49e72b3bda73d36664a084e47da9727a31b8095 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 19 May 2021 15:58:52 +0000
Subject: [PATCH 0346/3292] net: qrtr: ns: Fix error return code in
 qrtr_ns_init()

Fix to return a negative error code -ENOMEM from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: c6e08d6251f3 ("net: qrtr: Allocate workqueue before kernel_bind")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/ns.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 8d00dfe8139e..1990d496fcfc 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -775,8 +775,10 @@ int qrtr_ns_init(void)
 	}
 
 	qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1);
-	if (!qrtr_ns.workqueue)
+	if (!qrtr_ns.workqueue) {
+		ret = -ENOMEM;
 		goto err_sock;
+	}
 
 	qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready;
 

From e40d2cca01893c1941f5959b14bb0cd0d4f4d099 Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Wed, 19 May 2021 11:31:59 +0800
Subject: [PATCH 0347/3292] net: phy: add MediaTek Gigabit Ethernet PHY driver

Add support for MediaTek Gigabit Ethernet PHYs found in MT7530 and
MT7531 switches.
The initialization procedure is from the vendor driver, but due to lack
of documentation, the function of some register values remains unknown.

Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig       |   5 ++
 drivers/net/phy/Makefile      |   1 +
 drivers/net/phy/mediatek-ge.c | 112 ++++++++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+)
 create mode 100644 drivers/net/phy/mediatek-ge.c

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 25511f39b01f..1534e408505b 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -207,6 +207,11 @@ config MARVELL_88X2222_PHY
 	  Support for the Marvell 88X2222 Dual-port Multi-speed Ethernet
 	  Transceiver.
 
+config MEDIATEK_GE_PHY
+	tristate "MediaTek Gigabit Ethernet PHYs"
+	help
+	  Supports the MediaTek Gigabit Ethernet PHYs.
+
 config MICREL_PHY
 	tristate "Micrel PHYs"
 	help
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index bcda7ed2455d..24328d7cf931 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_LXT_PHY)		+= lxt.o
 obj-$(CONFIG_MARVELL_10G_PHY)	+= marvell10g.o
 obj-$(CONFIG_MARVELL_PHY)	+= marvell.o
 obj-$(CONFIG_MARVELL_88X2222_PHY)	+= marvell-88x2222.o
+obj-$(CONFIG_MEDIATEK_GE_PHY)	+= mediatek-ge.o
 obj-$(CONFIG_MESON_GXL_PHY)	+= meson-gxl.o
 obj-$(CONFIG_MICREL_KS8995MA)	+= spi_ks8995.o
 obj-$(CONFIG_MICREL_PHY)	+= micrel.o
diff --git a/drivers/net/phy/mediatek-ge.c b/drivers/net/phy/mediatek-ge.c
new file mode 100644
index 000000000000..11ff335d6228
--- /dev/null
+++ b/drivers/net/phy/mediatek-ge.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/bitfield.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+
+#define MTK_EXT_PAGE_ACCESS		0x1f
+#define MTK_PHY_PAGE_STANDARD		0x0000
+#define MTK_PHY_PAGE_EXTENDED		0x0001
+#define MTK_PHY_PAGE_EXTENDED_2		0x0002
+#define MTK_PHY_PAGE_EXTENDED_3		0x0003
+#define MTK_PHY_PAGE_EXTENDED_2A30	0x2a30
+#define MTK_PHY_PAGE_EXTENDED_52B5	0x52b5
+
+static int mtk_gephy_read_page(struct phy_device *phydev)
+{
+	return __phy_read(phydev, MTK_EXT_PAGE_ACCESS);
+}
+
+static int mtk_gephy_write_page(struct phy_device *phydev, int page)
+{
+	return __phy_write(phydev, MTK_EXT_PAGE_ACCESS, page);
+}
+
+static void mtk_gephy_config_init(struct phy_device *phydev)
+{
+	/* Disable EEE */
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0);
+
+	/* Enable HW auto downshift */
+	phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED, 0x14, 0, BIT(4));
+
+	/* Increase SlvDPSready time */
+	phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5);
+	__phy_write(phydev, 0x10, 0xafae);
+	__phy_write(phydev, 0x12, 0x2f);
+	__phy_write(phydev, 0x10, 0x8fae);
+	phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0);
+
+	/* Adjust 100_mse_threshold */
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x123, 0xffff);
+
+	/* Disable mcc */
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, 0xa6, 0x300);
+}
+
+static int mt7530_phy_config_init(struct phy_device *phydev)
+{
+	mtk_gephy_config_init(phydev);
+
+	/* Increase post_update_timer */
+	phy_write_paged(phydev, MTK_PHY_PAGE_EXTENDED_3, 0x11, 0x4b);
+
+	return 0;
+}
+
+static int mt7531_phy_config_init(struct phy_device *phydev)
+{
+	if (phydev->interface != PHY_INTERFACE_MODE_INTERNAL)
+		return -EINVAL;
+
+	mtk_gephy_config_init(phydev);
+
+	/* PHY link down power saving enable */
+	phy_set_bits(phydev, 0x17, BIT(4));
+	phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, 0xc6, 0x300);
+
+	/* Set TX Pair delay selection */
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x13, 0x404);
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x14, 0x404);
+
+	return 0;
+}
+
+static struct phy_driver mtk_gephy_driver[] = {
+	{
+		PHY_ID_MATCH_EXACT(0x03a29412),
+		.name		= "MediaTek MT7530 PHY",
+		.config_init	= mt7530_phy_config_init,
+		/* Interrupts are handled by the switch, not the PHY
+		 * itself.
+		 */
+		.config_intr	= genphy_no_config_intr,
+		.handle_interrupt = genphy_handle_interrupt_no_ack,
+		.read_page	= mtk_gephy_read_page,
+		.write_page	= mtk_gephy_write_page,
+	},
+	{
+		PHY_ID_MATCH_EXACT(0x03a29441),
+		.name		= "MediaTek MT7531 PHY",
+		.config_init	= mt7531_phy_config_init,
+		/* Interrupts are handled by the switch, not the PHY
+		 * itself.
+		 */
+		.config_intr	= genphy_no_config_intr,
+		.handle_interrupt = genphy_handle_interrupt_no_ack,
+		.read_page	= mtk_gephy_read_page,
+		.write_page	= mtk_gephy_write_page,
+	},
+};
+
+module_phy_driver(mtk_gephy_driver);
+
+static struct mdio_device_id __maybe_unused mtk_gephy_tbl[] = {
+	{ PHY_ID_MATCH_VENDOR(0x03a29400) },
+	{ }
+};
+
+MODULE_DESCRIPTION("MediaTek Gigabit Ethernet PHY driver");
+MODULE_AUTHOR("DENG, Qingfang <dqfext@gmail.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_DEVICE_TABLE(mdio, mtk_gephy_tbl);

From ba751e28d44255744a30190faad0ca09b455c44d Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Wed, 19 May 2021 11:32:00 +0800
Subject: [PATCH 0348/3292] net: dsa: mt7530: add interrupt support

Add support for MT7530 interrupt controller to handle internal PHYs.
In order to assign an IRQ number to each PHY, the registration of MDIO bus
is also done in this driver.

Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 264 +++++++++++++++++++++++++++++++++++----
 drivers/net/dsa/mt7530.h |  20 ++-
 2 files changed, 256 insertions(+), 28 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 96f7c9eede35..db838343fb05 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -10,6 +10,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/of_platform.h>
@@ -596,18 +597,14 @@ mt7530_mib_reset(struct dsa_switch *ds)
 	mt7530_write(priv, MT7530_MIB_CCR, CCR_MIB_ACTIVATE);
 }
 
-static int mt7530_phy_read(struct dsa_switch *ds, int port, int regnum)
+static int mt7530_phy_read(struct mt7530_priv *priv, int port, int regnum)
 {
-	struct mt7530_priv *priv = ds->priv;
-
 	return mdiobus_read_nested(priv->bus, port, regnum);
 }
 
-static int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum,
+static int mt7530_phy_write(struct mt7530_priv *priv, int port, int regnum,
 			    u16 val)
 {
-	struct mt7530_priv *priv = ds->priv;
-
 	return mdiobus_write_nested(priv->bus, port, regnum, val);
 }
 
@@ -785,9 +782,8 @@ out:
 }
 
 static int
-mt7531_ind_phy_read(struct dsa_switch *ds, int port, int regnum)
+mt7531_ind_phy_read(struct mt7530_priv *priv, int port, int regnum)
 {
-	struct mt7530_priv *priv = ds->priv;
 	int devad;
 	int ret;
 
@@ -803,10 +799,9 @@ mt7531_ind_phy_read(struct dsa_switch *ds, int port, int regnum)
 }
 
 static int
-mt7531_ind_phy_write(struct dsa_switch *ds, int port, int regnum,
+mt7531_ind_phy_write(struct mt7530_priv *priv, int port, int regnum,
 		     u16 data)
 {
-	struct mt7530_priv *priv = ds->priv;
 	int devad;
 	int ret;
 
@@ -822,6 +817,22 @@ mt7531_ind_phy_write(struct dsa_switch *ds, int port, int regnum,
 	return ret;
 }
 
+static int
+mt753x_phy_read(struct mii_bus *bus, int port, int regnum)
+{
+	struct mt7530_priv *priv = bus->priv;
+
+	return priv->info->phy_read(priv, port, regnum);
+}
+
+static int
+mt753x_phy_write(struct mii_bus *bus, int port, int regnum, u16 val)
+{
+	struct mt7530_priv *priv = bus->priv;
+
+	return priv->info->phy_write(priv, port, regnum, val);
+}
+
 static void
 mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset,
 		   uint8_t *data)
@@ -1828,6 +1839,210 @@ mt7530_setup_gpio(struct mt7530_priv *priv)
 }
 #endif /* CONFIG_GPIOLIB */
 
+static irqreturn_t
+mt7530_irq_thread_fn(int irq, void *dev_id)
+{
+	struct mt7530_priv *priv = dev_id;
+	bool handled = false;
+	u32 val;
+	int p;
+
+	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	val = mt7530_mii_read(priv, MT7530_SYS_INT_STS);
+	mt7530_mii_write(priv, MT7530_SYS_INT_STS, val);
+	mutex_unlock(&priv->bus->mdio_lock);
+
+	for (p = 0; p < MT7530_NUM_PHYS; p++) {
+		if (BIT(p) & val) {
+			unsigned int irq;
+
+			irq = irq_find_mapping(priv->irq_domain, p);
+			handle_nested_irq(irq);
+			handled = true;
+		}
+	}
+
+	return IRQ_RETVAL(handled);
+}
+
+static void
+mt7530_irq_mask(struct irq_data *d)
+{
+	struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
+
+	priv->irq_enable &= ~BIT(d->hwirq);
+}
+
+static void
+mt7530_irq_unmask(struct irq_data *d)
+{
+	struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
+
+	priv->irq_enable |= BIT(d->hwirq);
+}
+
+static void
+mt7530_irq_bus_lock(struct irq_data *d)
+{
+	struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
+
+	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+}
+
+static void
+mt7530_irq_bus_sync_unlock(struct irq_data *d)
+{
+	struct mt7530_priv *priv = irq_data_get_irq_chip_data(d);
+
+	mt7530_mii_write(priv, MT7530_SYS_INT_EN, priv->irq_enable);
+	mutex_unlock(&priv->bus->mdio_lock);
+}
+
+static struct irq_chip mt7530_irq_chip = {
+	.name = KBUILD_MODNAME,
+	.irq_mask = mt7530_irq_mask,
+	.irq_unmask = mt7530_irq_unmask,
+	.irq_bus_lock = mt7530_irq_bus_lock,
+	.irq_bus_sync_unlock = mt7530_irq_bus_sync_unlock,
+};
+
+static int
+mt7530_irq_map(struct irq_domain *domain, unsigned int irq,
+	       irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(irq, domain->host_data);
+	irq_set_chip_and_handler(irq, &mt7530_irq_chip, handle_simple_irq);
+	irq_set_nested_thread(irq, true);
+	irq_set_noprobe(irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mt7530_irq_domain_ops = {
+	.map = mt7530_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static void
+mt7530_setup_mdio_irq(struct mt7530_priv *priv)
+{
+	struct dsa_switch *ds = priv->ds;
+	int p;
+
+	for (p = 0; p < MT7530_NUM_PHYS; p++) {
+		if (BIT(p) & ds->phys_mii_mask) {
+			unsigned int irq;
+
+			irq = irq_create_mapping(priv->irq_domain, p);
+			ds->slave_mii_bus->irq[p] = irq;
+		}
+	}
+}
+
+static int
+mt7530_setup_irq(struct mt7530_priv *priv)
+{
+	struct device *dev = priv->dev;
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	if (!of_property_read_bool(np, "interrupt-controller")) {
+		dev_info(dev, "no interrupt support\n");
+		return 0;
+	}
+
+	priv->irq = of_irq_get(np, 0);
+	if (priv->irq <= 0) {
+		dev_err(dev, "failed to get parent IRQ: %d\n", priv->irq);
+		return priv->irq ? : -EINVAL;
+	}
+
+	priv->irq_domain = irq_domain_add_linear(np, MT7530_NUM_PHYS,
+						 &mt7530_irq_domain_ops, priv);
+	if (!priv->irq_domain) {
+		dev_err(dev, "failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/* This register must be set for MT7530 to properly fire interrupts */
+	if (priv->id != ID_MT7531)
+		mt7530_set(priv, MT7530_TOP_SIG_CTRL, TOP_SIG_CTRL_NORMAL);
+
+	ret = request_threaded_irq(priv->irq, NULL, mt7530_irq_thread_fn,
+				   IRQF_ONESHOT, KBUILD_MODNAME, priv);
+	if (ret) {
+		irq_domain_remove(priv->irq_domain);
+		dev_err(dev, "failed to request IRQ: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+mt7530_free_mdio_irq(struct mt7530_priv *priv)
+{
+	int p;
+
+	for (p = 0; p < MT7530_NUM_PHYS; p++) {
+		if (BIT(p) & priv->ds->phys_mii_mask) {
+			unsigned int irq;
+
+			irq = irq_find_mapping(priv->irq_domain, p);
+			irq_dispose_mapping(irq);
+		}
+	}
+}
+
+static void
+mt7530_free_irq_common(struct mt7530_priv *priv)
+{
+	free_irq(priv->irq, priv);
+	irq_domain_remove(priv->irq_domain);
+}
+
+static void
+mt7530_free_irq(struct mt7530_priv *priv)
+{
+	mt7530_free_mdio_irq(priv);
+	mt7530_free_irq_common(priv);
+}
+
+static int
+mt7530_setup_mdio(struct mt7530_priv *priv)
+{
+	struct dsa_switch *ds = priv->ds;
+	struct device *dev = priv->dev;
+	struct mii_bus *bus;
+	static int idx;
+	int ret;
+
+	bus = devm_mdiobus_alloc(dev);
+	if (!bus)
+		return -ENOMEM;
+
+	ds->slave_mii_bus = bus;
+	bus->priv = priv;
+	bus->name = KBUILD_MODNAME "-mii";
+	snprintf(bus->id, MII_BUS_ID_SIZE, KBUILD_MODNAME "-%d", idx++);
+	bus->read = mt753x_phy_read;
+	bus->write = mt753x_phy_write;
+	bus->parent = dev;
+	bus->phy_mask = ~ds->phys_mii_mask;
+
+	if (priv->irq)
+		mt7530_setup_mdio_irq(priv);
+
+	ret = mdiobus_register(bus);
+	if (ret) {
+		dev_err(dev, "failed to register MDIO bus: %d\n", ret);
+		if (priv->irq)
+			mt7530_free_mdio_irq(priv);
+	}
+
+	return ret;
+}
+
 static int
 mt7530_setup(struct dsa_switch *ds)
 {
@@ -2791,24 +3006,20 @@ static int
 mt753x_setup(struct dsa_switch *ds)
 {
 	struct mt7530_priv *priv = ds->priv;
+	int ret = priv->info->sw_setup(ds);
 
-	return priv->info->sw_setup(ds);
-}
+	if (ret)
+		return ret;
 
-static int
-mt753x_phy_read(struct dsa_switch *ds, int port, int regnum)
-{
-	struct mt7530_priv *priv = ds->priv;
+	ret = mt7530_setup_irq(priv);
+	if (ret)
+		return ret;
 
-	return priv->info->phy_read(ds, port, regnum);
-}
+	ret = mt7530_setup_mdio(priv);
+	if (ret && priv->irq)
+		mt7530_free_irq_common(priv);
 
-static int
-mt753x_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
-{
-	struct mt7530_priv *priv = ds->priv;
-
-	return priv->info->phy_write(ds, port, regnum, val);
+	return ret;
 }
 
 static int mt753x_get_mac_eee(struct dsa_switch *ds, int port,
@@ -2845,8 +3056,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
 	.get_tag_protocol	= mtk_get_tag_protocol,
 	.setup			= mt753x_setup,
 	.get_strings		= mt7530_get_strings,
-	.phy_read		= mt753x_phy_read,
-	.phy_write		= mt753x_phy_write,
 	.get_ethtool_stats	= mt7530_get_ethtool_stats,
 	.get_sset_count		= mt7530_get_sset_count,
 	.set_ageing_time	= mt7530_set_ageing_time,
@@ -3029,6 +3238,9 @@ mt7530_remove(struct mdio_device *mdiodev)
 		dev_err(priv->dev, "Failed to disable io pwr: %d\n",
 			ret);
 
+	if (priv->irq)
+		mt7530_free_irq(priv);
+
 	dsa_unregister_switch(priv->ds);
 	mutex_destroy(&priv->reg_mutex);
 }
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 0204da486f3a..334d610a503d 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -7,6 +7,7 @@
 #define __MT7530_H
 
 #define MT7530_NUM_PORTS		7
+#define MT7530_NUM_PHYS			5
 #define MT7530_CPU_PORT			6
 #define MT7530_NUM_FDB_RECORDS		2048
 #define MT7530_ALL_MEMBERS		0xff
@@ -393,6 +394,12 @@ enum mt7531_sgmii_force_duplex {
 #define  SYS_CTRL_SW_RST		BIT(1)
 #define  SYS_CTRL_REG_RST		BIT(0)
 
+/* Register for system interrupt */
+#define MT7530_SYS_INT_EN		0x7008
+
+/* Register for system interrupt status */
+#define MT7530_SYS_INT_STS		0x700c
+
 /* Register for PHY Indirect Access Control */
 #define MT7531_PHY_IAC			0x701C
 #define  MT7531_PHY_ACS_ST		BIT(31)
@@ -714,6 +721,8 @@ static const char *p5_intf_modes(unsigned int p5_interface)
 	}
 }
 
+struct mt7530_priv;
+
 /* struct mt753x_info -	This is the main data structure for holding the specific
  *			part for each supported device
  * @sw_setup:		Holding the handler to a device initialization
@@ -738,8 +747,8 @@ struct mt753x_info {
 	enum mt753x_id id;
 
 	int (*sw_setup)(struct dsa_switch *ds);
-	int (*phy_read)(struct dsa_switch *ds, int port, int regnum);
-	int (*phy_write)(struct dsa_switch *ds, int port, int regnum, u16 val);
+	int (*phy_read)(struct mt7530_priv *priv, int port, int regnum);
+	int (*phy_write)(struct mt7530_priv *priv, int port, int regnum, u16 val);
 	int (*pad_setup)(struct dsa_switch *ds, phy_interface_t interface);
 	int (*cpu_port_config)(struct dsa_switch *ds, int port);
 	bool (*phy_mode_supported)(struct dsa_switch *ds, int port,
@@ -773,6 +782,10 @@ struct mt753x_info {
  *			registers
  * @p6_interface	Holding the current port 6 interface
  * @p5_intf_sel:	Holding the current port 5 interface select
+ *
+ * @irq:		IRQ number of the switch
+ * @irq_domain:		IRQ domain of the switch irq_chip
+ * @irq_enable:		IRQ enable bits, synced to SYS_INT_EN
  */
 struct mt7530_priv {
 	struct device		*dev;
@@ -794,6 +807,9 @@ struct mt7530_priv {
 	struct mt7530_port	ports[MT7530_NUM_PORTS];
 	/* protect among processes for registers access*/
 	struct mutex reg_mutex;
+	int irq;
+	struct irq_domain *irq_domain;
+	u32 irq_enable;
 };
 
 struct mt7530_hw_vlan_entry {

From 4006f986c091cda1a66067f77b6f5704a9618562 Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Wed, 19 May 2021 11:32:01 +0800
Subject: [PATCH 0349/3292] dt-bindings: net: dsa: add MT7530 interrupt
 controller binding

Add device tree binding to support MT7530 interrupt controller.

Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/mt7530.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
index de04626a8e9d..18247ebfc487 100644
--- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt
+++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
@@ -81,6 +81,12 @@ Optional properties:
 - gpio-controller: Boolean; if defined, MT7530's LED controller will run on
 	GPIO mode.
 - #gpio-cells: Must be 2 if gpio-controller is defined.
+- interrupt-controller: Boolean; Enables the internal interrupt controller.
+
+If interrupt-controller is defined, the following properties are required.
+
+- #interrupt-cells: Must be 1.
+- interrupts: Parent interrupt for the interrupt controller.
 
 See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
 required, optional properties and how the integrated switch subnodes must

From f494f0935ffb62c1d5463e59dfcd7d89a46c7807 Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Wed, 19 May 2021 11:32:02 +0800
Subject: [PATCH 0350/3292] staging: mt7621-dts: enable MT7530 interrupt
 controller

Enable MT7530 interrupt controller in the MT7621 SoC.

Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/mt7621-dts/mt7621.dtsi | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/staging/mt7621-dts/mt7621.dtsi b/drivers/staging/mt7621-dts/mt7621.dtsi
index f0c9ae757bcd..093a7f8091b5 100644
--- a/drivers/staging/mt7621-dts/mt7621.dtsi
+++ b/drivers/staging/mt7621-dts/mt7621.dtsi
@@ -437,6 +437,10 @@
 				mediatek,mcm;
 				resets = <&rstctrl 2>;
 				reset-names = "mcm";
+				interrupt-controller;
+				#interrupt-cells = <1>;
+				interrupt-parent = <&gic>;
+				interrupts = <GIC_SHARED 23 IRQ_TYPE_LEVEL_HIGH>;
 
 				ports {
 					#address-cells = <1>;

From 18b380ed61f892ed06838d1f1a5124d966292ed3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 14 May 2021 14:48:43 +0800
Subject: [PATCH 0351/3292] PM / devfreq: Add missing error code in
 devfreq_add_device()

Set err code in the error path before jumping to the end of the function.

Fixes: 4dc3bab8687f ("PM / devfreq: Add support delayed timer for polling mode")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/devfreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index fe08c46642f7..28f3e0ba6cdd 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -823,6 +823,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	if (devfreq->profile->timer < 0
 		|| devfreq->profile->timer >= DEVFREQ_TIMER_NUM) {
 		mutex_unlock(&devfreq->lock);
+		err = -EINVAL;
 		goto err_dev;
 	}
 

From ac9fd3c8034011cc10a4c161b70a5837d95203f6 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 18 May 2021 12:49:10 +0800
Subject: [PATCH 0352/3292] opp: use list_del_init instead of
 list_del/INIT_LIST_HEAD

Using list_del_init() instead of list_del() + INIT_LIST_HEAD()
to simpify the code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index c582a9ca397b..aa75a1caf08a 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -433,8 +433,7 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
 
 		/* All required opp-tables found, remove from lazy list */
 		if (!lazy) {
-			list_del(&opp_table->lazy);
-			INIT_LIST_HEAD(&opp_table->lazy);
+			list_del_init(&opp_table->lazy);
 
 			list_for_each_entry(opp, &opp_table->opp_list, node)
 				_required_opps_available(opp, opp_table->required_opp_count);

From fa236c2b2d4436d9f19ee4e5d5924e90ffd7bb43 Mon Sep 17 00:00:00 2001
From: Arturo Giusti <koredump@protonmail.com>
Date: Tue, 18 May 2021 12:34:57 +0200
Subject: [PATCH 0353/3292] udf: Fix NULL pointer dereference in udf_symlink
 function

In function udf_symlink, epos.bh is assigned with the value returned
by udf_tgetblk. The function udf_tgetblk is defined in udf/misc.c
and returns the value of sb_getblk function that could be NULL.
Then, epos.bh is used without any check, causing a possible
NULL pointer dereference when sb_getblk fails.

This fix adds a check to validate the value of epos.bh.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=213083
Signed-off-by: Arturo Giusti <koredump@protonmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/namei.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 3ae9f1e91984..7c7c9bbbfa57 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -934,6 +934,10 @@ static int udf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 				iinfo->i_location.partitionReferenceNum,
 				0);
 		epos.bh = udf_tgetblk(sb, block);
+		if (unlikely(!epos.bh)) {
+			err = -ENOMEM;
+			goto out_no_entry;
+		}
 		lock_buffer(epos.bh);
 		memset(epos.bh->b_data, 0x00, bsize);
 		set_buffer_uptodate(epos.bh);

From f3019092eca09f3d093dbc306bf118daff15bfd2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 20 May 2021 12:46:05 +0300
Subject: [PATCH 0354/3292] gpio: wcove: Use IRQ hardware number getter instead
 of direct access

IRQ framework provides special type and getter to transform Linux IRQ
to the hardware pin. Use that type and getter function instead of
direct access. No functional changes intended.

While at it, remove unneeded check in wcove_update_irq_ctrl() since
it is guaranteed that function will be called with valid parameter.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-wcove.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index a19eeef6cf1e..923a37a3d6d3 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -129,7 +129,7 @@ static inline int to_ireg(int gpio, enum ctrl_register type, unsigned int *mask)
 	return reg;
 }
 
-static void wcove_update_irq_mask(struct wcove_gpio *wg, int gpio)
+static void wcove_update_irq_mask(struct wcove_gpio *wg, irq_hw_number_t gpio)
 {
 	unsigned int mask, reg = to_ireg(gpio, IRQ_MASK, &mask);
 
@@ -139,13 +139,10 @@ static void wcove_update_irq_mask(struct wcove_gpio *wg, int gpio)
 		regmap_clear_bits(wg->regmap, reg, mask);
 }
 
-static void wcove_update_irq_ctrl(struct wcove_gpio *wg, int gpio)
+static void wcove_update_irq_ctrl(struct wcove_gpio *wg, irq_hw_number_t gpio)
 {
 	int reg = to_reg(gpio, CTRL_IN);
 
-	if (reg < 0)
-		return;
-
 	regmap_update_bits(wg->regmap, reg, CTLI_INTCNT_BE, wg->intcnt);
 }
 
@@ -248,8 +245,9 @@ static int wcove_irq_type(struct irq_data *data, unsigned int type)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
 	struct wcove_gpio *wg = gpiochip_get_data(chip);
+	irq_hw_number_t gpio = irqd_to_hwirq(data);
 
-	if (data->hwirq >= WCOVE_GPIO_NUM)
+	if (gpio >= WCOVE_GPIO_NUM)
 		return 0;
 
 	switch (type) {
@@ -286,7 +284,7 @@ static void wcove_bus_sync_unlock(struct irq_data *data)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
 	struct wcove_gpio *wg = gpiochip_get_data(chip);
-	int gpio = data->hwirq;
+	irq_hw_number_t gpio = irqd_to_hwirq(data);
 
 	if (wg->update & UPDATE_IRQ_TYPE)
 		wcove_update_irq_ctrl(wg, gpio);
@@ -301,8 +299,9 @@ static void wcove_irq_unmask(struct irq_data *data)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
 	struct wcove_gpio *wg = gpiochip_get_data(chip);
+	irq_hw_number_t gpio = irqd_to_hwirq(data);
 
-	if (data->hwirq >= WCOVE_GPIO_NUM)
+	if (gpio >= WCOVE_GPIO_NUM)
 		return;
 
 	wg->set_irq_mask = false;
@@ -313,8 +312,9 @@ static void wcove_irq_mask(struct irq_data *data)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
 	struct wcove_gpio *wg = gpiochip_get_data(chip);
+	irq_hw_number_t gpio = irqd_to_hwirq(data);
 
-	if (data->hwirq >= WCOVE_GPIO_NUM)
+	if (gpio >= WCOVE_GPIO_NUM)
 		return;
 
 	wg->set_irq_mask = true;

From 5d9936648285b8ccb6b61257dd2ddd76f9cd719a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 20 May 2021 13:00:01 +0300
Subject: [PATCH 0355/3292] gpio: wcove: Unify style of to_reg() with to_ireg()

Use ternary and rename parameter to 'type' in to_reg() to be in
the same style as to_ireg().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-wcove.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index 923a37a3d6d3..5ce315f64dc5 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -99,19 +99,14 @@ struct wcove_gpio {
 	bool set_irq_mask;
 };
 
-static inline int to_reg(int gpio, enum ctrl_register reg_type)
+static inline int to_reg(int gpio, enum ctrl_register type)
 {
-	unsigned int reg;
+	unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE;
 
 	if (gpio >= WCOVE_GPIO_NUM)
 		return -EOPNOTSUPP;
 
-	if (reg_type == CTRL_IN)
-		reg = GPIO_IN_CTRL_BASE + gpio;
-	else
-		reg = GPIO_OUT_CTRL_BASE + gpio;
-
-	return reg;
+	return reg + gpio;
 }
 
 static inline int to_ireg(int gpio, enum ctrl_register type, unsigned int *mask)

From 2b71b66ac0415db5e4b9e67b11e3af7b485bb421 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 20 May 2021 15:17:15 +0300
Subject: [PATCH 0356/3292] gpio: wcove: Split error handling for CTRL and IRQ
 registers

The wcove_gpio_dbg_show() uses one message to all possible error reads.
Split it to two for CTRL and IRQ registers.

While at it, switch to use dev_err() instead of pr_err().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-wcove.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index 5ce315f64dc5..16a0fae1e32e 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -364,8 +364,7 @@ static irqreturn_t wcove_gpio_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void wcove_gpio_dbg_show(struct seq_file *s,
-				      struct gpio_chip *chip)
+static void wcove_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 {
 	unsigned int ctlo, ctli, irq_mask, irq_status;
 	struct wcove_gpio *wg = gpiochip_get_data(chip);
@@ -374,10 +373,15 @@ static void wcove_gpio_dbg_show(struct seq_file *s,
 	for (gpio = 0; gpio < WCOVE_GPIO_NUM; gpio++) {
 		ret += regmap_read(wg->regmap, to_reg(gpio, CTRL_OUT), &ctlo);
 		ret += regmap_read(wg->regmap, to_reg(gpio, CTRL_IN), &ctli);
+		if (ret) {
+			dev_err(wg->dev, "Failed to read registers: CTRL out/in\n");
+			break;
+		}
+
 		ret += regmap_read(wg->regmap, to_ireg(gpio, IRQ_MASK, &mask), &irq_mask);
 		ret += regmap_read(wg->regmap, to_ireg(gpio, IRQ_STATUS, &mask), &irq_status);
 		if (ret) {
-			pr_err("Failed to read registers: ctrl out/in or irq status/mask\n");
+			dev_err(wg->dev, "Failed to read registers: IRQ status/mask\n");
 			break;
 		}
 

From 2e958a8a510d956ec8528f0bd20e309b5bb5156c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:09 +0900
Subject: [PATCH 0357/3292] x86/entry/x32: Rename __x32_compat_sys_* to
 __x64_compat_sys_*

The SYSCALL macros are mapped to symbols as follows:

  __SYSCALL_COMMON(nr, sym)  -->  __x64_<sym>
  __SYSCALL_X32(nr, sym)     -->  __x32_<sym>

Originally, the syscalls in the x32 special range (512-547) were all
compat.

This assumption is now broken after the following commits:

  55db9c0e8534 ("net: remove compat_sys_{get,set}sockopt")
  5f764d624a89 ("fs: remove the compat readv/writev syscalls")
  598b3cec831f ("fs: remove compat_sys_vmsplice")
  c3973b401ef2 ("mm: remove compat_process_vm_{readv,writev}")

Those commits redefined __x32_sys_* to __x64_sys_* because there is no stub
like __x32_sys_*.

Defining them as follows is more sensible and cleaner.

  __SYSCALL_COMMON(nr, sym)  -->  __x64_<sym>
  __SYSCALL_X32(nr, sym)     -->  __x64_<sym>

This works because both x86_64 and x32 use the same ABI (RDI, RSI, RDX,
R10, R8, R9)

The ugly #define __x32_sys_* will go away.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-2-masahiroy@kernel.org
---
 arch/x86/entry/syscall_x32.c           | 16 ++--------------
 arch/x86/include/asm/syscall_wrapper.h | 10 +++++-----
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index f2fe0a33bcfd..3fea8fb9cd6a 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,27 +8,15 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-/*
- * Reuse the 64-bit entry points for the x32 versions that occupy different
- * slots in the syscall table.
- */
-#define __x32_sys_readv		__x64_sys_readv
-#define __x32_sys_writev	__x64_sys_writev
-#define __x32_sys_getsockopt	__x64_sys_getsockopt
-#define __x32_sys_setsockopt	__x64_sys_setsockopt
-#define __x32_sys_vmsplice	__x64_sys_vmsplice
-#define __x32_sys_process_vm_readv	__x64_sys_process_vm_readv
-#define __x32_sys_process_vm_writev	__x64_sys_process_vm_writev
-
 #define __SYSCALL_64(nr, sym)
 
-#define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
+#define __SYSCALL_X32(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_X32
 #undef __SYSCALL_COMMON
 
-#define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
+#define __SYSCALL_X32(nr, sym) [nr] = __x64_##sym,
 #define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
 
 asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 80c08c7d5e72..6a2827d0681f 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
  * __x64_sys_*()         - 64-bit native syscall
  * __ia32_sys_*()        - 32-bit native syscall or common compat syscall
  * __ia32_compat_sys_*() - 32-bit compat syscall
- * __x32_compat_sys_*()  - 64-bit X32 compat syscall
+ * __x64_compat_sys_*()  - 64-bit X32 compat syscall
  *
  * The registers are decoded according to the ABI:
  * 64-bit: RDI, RSI, RDX, R10, R8, R9
@@ -166,17 +166,17 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
  * with x86_64 obviously do not need such care.
  */
 #define __X32_COMPAT_SYS_STUB0(name)					\
-	__SYS_STUB0(x32, compat_sys_##name)
+	__SYS_STUB0(x64, compat_sys_##name)
 
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)				\
-	__SYS_STUBx(x32, compat_sys##name,				\
+	__SYS_STUBx(x64, compat_sys##name,				\
 		    SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
 
 #define __X32_COMPAT_COND_SYSCALL(name)					\
-	__COND_SYSCALL(x32, compat_sys_##name)
+	__COND_SYSCALL(x64, compat_sys_##name)
 
 #define __X32_COMPAT_SYS_NI(name)					\
-	__SYS_NI(x32, compat_sys_##name)
+	__SYS_NI(x64, compat_sys_##name)
 #else /* CONFIG_X86_X32 */
 #define __X32_COMPAT_SYS_STUB0(name)
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)

From 6218d0f6b8dece1f2e82f0a47a0e6b8ecb631ef6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:10 +0900
Subject: [PATCH 0358/3292] x86/syscalls: Switch to generic syscalltbl.sh

Many architectures duplicate similar shell scripts.

Convert x86 and UML to use scripts/syscalltbl.sh. The generic script
generates seperate headers for x86/64 and x86/x32 syscalls, while the x86
specific script coalesced them into one. Adjust the code accordingly.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-3-masahiroy@kernel.org
---
 arch/x86/entry/syscall_32.c           | 12 +++++--
 arch/x86/entry/syscall_64.c           |  9 ++----
 arch/x86/entry/syscall_x32.c          | 15 +++------
 arch/x86/entry/syscalls/Makefile      | 10 ++++--
 arch/x86/entry/syscalls/syscalltbl.sh | 46 ---------------------------
 arch/x86/include/asm/Kbuild           |  1 +
 arch/x86/um/sys_call_table_32.c       |  8 +++--
 arch/x86/um/sys_call_table_64.c       |  9 ++----
 8 files changed, 34 insertions(+), 76 deletions(-)
 delete mode 100644 arch/x86/entry/syscalls/syscalltbl.sh

diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 86eb0d89d46f..70bf46e73b1c 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -8,12 +8,18 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_I386(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
+#ifdef CONFIG_IA32_EMULATION
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, compat)
+#else
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
+#endif
+
+#define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
 
 #include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
+#undef __SYSCALL
 
-#define __SYSCALL_I386(nr, sym) [nr] = __ia32_##sym,
+#define __SYSCALL(nr, sym) [nr] = __ia32_##sym,
 
 __visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
 	/*
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 1594ec72bcbb..82670bb10931 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,14 +8,11 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_X32(nr, sym)
-#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
-
-#define __SYSCALL_64(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
-#undef __SYSCALL_64
+#undef __SYSCALL
 
-#define __SYSCALL_64(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
 
 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 3fea8fb9cd6a..6d2ef887d7b6 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,16 +8,11 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_64(nr, sym)
+#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#include <asm/syscalls_x32.h>
+#undef __SYSCALL
 
-#define __SYSCALL_X32(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#include <asm/syscalls_64.h>
-#undef __SYSCALL_X32
-#undef __SYSCALL_COMMON
-
-#define __SYSCALL_X32(nr, sym) [nr] = __x64_##sym,
-#define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
 
 asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
 	/*
@@ -25,5 +20,5 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
 	 * when the & below is removed.
 	 */
 	[0 ... __NR_x32_syscall_max] = &__x64_sys_ni_syscall,
-#include <asm/syscalls_64.h>
+#include <asm/syscalls_x32.h>
 };
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index d8c4f6c9eadc..c4bd8dd82bb1 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -10,7 +10,7 @@ syscall32 := $(src)/syscall_32.tbl
 syscall64 := $(src)/syscall_64.tbl
 
 syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
 
 quiet_cmd_syshdr = SYSHDR  $@
       cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
@@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR  $@
 		   '$(syshdr_pfx_$(basetarget))' \
 		   '$(syshdr_offset_$(basetarget))'
 quiet_cmd_systbl = SYSTBL  $@
-      cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
+      cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
 
 quiet_cmd_hypercalls = HYPERCALLS $@
       cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<, $(real-prereqs))
@@ -46,10 +46,15 @@ syshdr_pfx_unistd_64_x32 := x32_
 $(out)/unistd_64_x32.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
+$(out)/syscalls_32.h: abis := i386
 $(out)/syscalls_32.h: $(syscall32) $(systbl) FORCE
 	$(call if_changed,systbl)
+$(out)/syscalls_64.h: abis := common,64
 $(out)/syscalls_64.h: $(syscall64) $(systbl) FORCE
 	$(call if_changed,systbl)
+$(out)/syscalls_x32.h: abis := common,x32
+$(out)/syscalls_x32.h: $(syscall64) $(systbl) FORCE
+	$(call if_changed,systbl)
 
 $(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh FORCE
 	$(call if_changed,hypercalls)
@@ -60,6 +65,7 @@ uapisyshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
 syshdr-y			+= syscalls_32.h
 syshdr-$(CONFIG_X86_64)		+= unistd_32_ia32.h unistd_64_x32.h
 syshdr-$(CONFIG_X86_64)		+= syscalls_64.h
+syshdr-$(CONFIG_X86_X32)	+= syscalls_x32.h
 syshdr-$(CONFIG_XEN)		+= xen-hypercalls.h
 
 uapisyshdr-y	:= $(addprefix $(uapi)/, $(uapisyshdr-y))
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
deleted file mode 100644
index 929bde120d6b..000000000000
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-
-syscall_macro() {
-    local abi="$1"
-    local nr="$2"
-    local entry="$3"
-
-    echo "__SYSCALL_${abi}($nr, $entry)"
-}
-
-emit() {
-    local abi="$1"
-    local nr="$2"
-    local entry="$3"
-    local compat="$4"
-
-    if [ "$abi" != "I386" -a -n "$compat" ]; then
-	echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
-	exit 1
-    fi
-
-    if [ -z "$compat" ]; then
-	if [ -n "$entry" ]; then
-	    syscall_macro "$abi" "$nr" "$entry"
-	fi
-    else
-	echo "#ifdef CONFIG_X86_32"
-	if [ -n "$entry" ]; then
-	    syscall_macro "$abi" "$nr" "$entry"
-	fi
-	echo "#else"
-	syscall_macro "$abi" "$nr" "$compat"
-	echo "#endif"
-    fi
-}
-
-grep '^[0-9]' "$in" | sort -n | (
-    while read nr abi name entry compat; do
-	abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
-	emit "$abi" "$nr" "$entry" "$compat"
-    done
-) > "$out"
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index b19ec8282d50..1e51650b79d7 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -3,6 +3,7 @@
 
 generated-y += syscalls_32.h
 generated-y += syscalls_64.h
+generated-y += syscalls_x32.h
 generated-y += unistd_32_ia32.h
 generated-y += unistd_64_x32.h
 generated-y += xen-hypercalls.h
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 2ed81e581755..e83619c365dc 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -26,11 +26,13 @@
 
 #define old_mmap sys_old_mmap
 
-#define __SYSCALL_I386(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
+
+#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_32.h>
 
-#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym) [ nr ] = sym,
+#undef __SYSCALL
+#define __SYSCALL(nr, sym) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 2e8544dafbb0..6fb75af7cf54 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -36,14 +36,11 @@
 #define stub_execveat sys_execveat
 #define stub_rt_sigreturn sys_rt_sigreturn
 
-#define __SYSCALL_X32(nr, sym)
-#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
-
-#define __SYSCALL_64(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_64.h>
 
-#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym) [ nr ] = sym,
+#undef __SYSCALL
+#define __SYSCALL(nr, sym) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 

From 44fe4895f47cbe9f4692e1d3cdc2ef8352f4d88e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:11 +0900
Subject: [PATCH 0359/3292] x86/syscalls: Stop filling syscall arrays with
 *_sys_ni_syscall

This is a follow-up cleanup after switching to the generic syscalltbl.sh.

The old x86 specific script skipped non-existing syscalls. So, the
generated syscalls_64.h, for example, had a big hole in the syscall numbers
335-423 range. That is why there exists [0 ... __NR_*_syscall_max] =
&__*_sys_ni_cyscall.

The new script, scripts/syscalltbl.sh automatically fills holes
with __SYSCALL(<nr>, sys_ni_syscall), hence such ugly code can
go away. The designated initializers, '[nr] =' are also unneeded.

Also, there is no need to give __NR_*_syscall_max+1 because the array
size is implied by the number of syscalls in the generated headers.
Hence, there is no need to include <asm/unistd.h>, either.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-4-masahiroy@kernel.org
---
 arch/x86/entry/syscall_32.c     | 10 ++--------
 arch/x86/entry/syscall_64.c     | 10 ++--------
 arch/x86/entry/syscall_x32.c    | 10 ++--------
 arch/x86/um/sys_call_table_32.c |  6 ------
 arch/x86/um/sys_call_table_64.c |  6 ------
 5 files changed, 6 insertions(+), 36 deletions(-)

diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 70bf46e73b1c..8cfc9bc73e7f 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -5,7 +5,6 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <linux/syscalls.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #ifdef CONFIG_IA32_EMULATION
@@ -19,13 +18,8 @@
 #include <asm/syscalls_32.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) [nr] = __ia32_##sym,
+#define __SYSCALL(nr, sym) __ia32_##sym,
 
-__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_ia32_syscall_max] = &__ia32_sys_ni_syscall,
+__visible const sys_call_ptr_t ia32_sys_call_table[] = {
 #include <asm/syscalls_32.h>
 };
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 82670bb10931..be120eec1fc9 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -5,20 +5,14 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <linux/syscalls.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) __x64_##sym,
 
-asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
+asmlinkage const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_64.h>
 };
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 6d2ef887d7b6..bdd0e03a1265 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -5,20 +5,14 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <linux/syscalls.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_x32.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) __x64_##sym,
 
-asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_x32_syscall_max] = &__x64_sys_ni_syscall,
+asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
 #include <asm/syscalls_x32.h>
 };
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index e83619c365dc..f8323104e353 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -7,7 +7,6 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __NO_STUBS
@@ -37,11 +36,6 @@
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };
 
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 6fb75af7cf54..5ed665dc785f 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -7,7 +7,6 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __NO_STUBS
@@ -45,11 +44,6 @@
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
 

From f63815eb1d909a4121806e60928108ff040bf291 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:12 +0900
Subject: [PATCH 0360/3292] x86/unistd: Define X32_NR_syscalls only for 64-bit
 kernel

X32_NR_syscalls is needed only when building a 64bit kernel.

Move it to proper #ifdef guard.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-5-masahiroy@kernel.org
---
 arch/x86/include/asm/unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index c1c3d31b15c0..1bc6020bc58d 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -26,11 +26,11 @@
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+#  define X32_NR_syscalls (__NR_x32_syscall_max + 1)
 
 # endif
 
 # define NR_syscalls (__NR_syscall_max + 1)
-# define X32_NR_syscalls (__NR_x32_syscall_max + 1)
 # define IA32_NR_syscalls (__NR_ia32_syscall_max + 1)
 
 # define __ARCH_WANT_NEW_STAT

From 49f731f1972e6e44d8a5c3982a72902b3944bc34 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:13 +0900
Subject: [PATCH 0361/3292] x86/syscalls: Use __NR_syscalls instead of
 __NR_syscall_max

__NR_syscall_max is only used by x86 and UML. In contrast, __NR_syscalls is
widely used by all the architectures.

Convert __NR_syscall_max to __NR_syscalls and adjust the usage sites.

This prepares x86 to switch to the generic syscallhdr.sh script.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-6-masahiroy@kernel.org
---
 arch/um/kernel/skas/syscall.c         | 2 +-
 arch/x86/entry/syscalls/syscallhdr.sh | 2 +-
 arch/x86/include/asm/unistd.h         | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 3d91f89fd852..9ee19e566da3 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -41,7 +41,7 @@ void handle_syscall(struct uml_pt_regs *r)
 		goto out;
 
 	syscall = UPT_SYSCALL_NR(r);
-	if (syscall >= 0 && syscall <= __NR_syscall_max)
+	if (syscall >= 0 && syscall < __NR_syscalls)
 		PT_REGS_SET_SYSCALL_RETURN(regs,
 				EXECUTE_SYSCALL(syscall, regs));
 
diff --git a/arch/x86/entry/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
index cc1e63857427..75e66af06773 100644
--- a/arch/x86/entry/syscalls/syscallhdr.sh
+++ b/arch/x86/entry/syscalls/syscallhdr.sh
@@ -28,7 +28,7 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
 
     echo ""
     echo "#ifdef __KERNEL__"
-    echo "#define __NR_${prefix}syscall_max $max"
+    echo "#define __NR_${prefix}syscalls $(($max + 1))"
     echo "#endif"
     echo ""
     echo "#endif /* ${fileguard} */"
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 1bc6020bc58d..80e9d5206a71 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -13,7 +13,7 @@
 #  define __ARCH_WANT_SYS_OLD_MMAP
 #  define __ARCH_WANT_SYS_OLD_SELECT
 
-#  define __NR_ia32_syscall_max __NR_syscall_max
+#  define IA32_NR_syscalls (__NR_syscalls)
 
 # else
 
@@ -26,12 +26,12 @@
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
-#  define X32_NR_syscalls (__NR_x32_syscall_max + 1)
+#  define X32_NR_syscalls (__NR_x32_syscalls)
+#  define IA32_NR_syscalls (__NR_ia32_syscalls)
 
 # endif
 
-# define NR_syscalls (__NR_syscall_max + 1)
-# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1)
+# define NR_syscalls (__NR_syscalls)
 
 # define __ARCH_WANT_NEW_STAT
 # define __ARCH_WANT_OLD_READDIR

From 3cba325b358f86357b5ce50eb9e6633183927eee Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:14 +0900
Subject: [PATCH 0362/3292] x86/syscalls: Switch to generic syscallhdr.sh

Many architectures duplicate similar shell scripts.

Converts x86 to use scripts/syscallhdr.sh.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-7-masahiroy@kernel.org
---
 arch/x86/entry/syscalls/Makefile      | 26 ++++++++++----------
 arch/x86/entry/syscalls/syscallhdr.sh | 35 ---------------------------
 2 files changed, 13 insertions(+), 48 deletions(-)
 delete mode 100644 arch/x86/entry/syscalls/syscallhdr.sh

diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index c4bd8dd82bb1..8eb014bca8c9 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -9,40 +9,40 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
 syscall32 := $(src)/syscall_32.tbl
 syscall64 := $(src)/syscall_64.tbl
 
-syshdr := $(srctree)/$(src)/syscallhdr.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
 systbl := $(srctree)/scripts/syscalltbl.sh
 
 quiet_cmd_syshdr = SYSHDR  $@
-      cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
-		   '$(syshdr_abi_$(basetarget))' \
-		   '$(syshdr_pfx_$(basetarget))' \
-		   '$(syshdr_offset_$(basetarget))'
+      cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --abis $(abis) --emit-nr \
+		$(if $(offset),--offset $(offset)) \
+		$(if $(prefix),--prefix $(prefix)) \
+		$< $@
 quiet_cmd_systbl = SYSTBL  $@
       cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
 
 quiet_cmd_hypercalls = HYPERCALLS $@
       cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<, $(real-prereqs))
 
-syshdr_abi_unistd_32 := i386
+$(uapi)/unistd_32.h: abis := i386
 $(uapi)/unistd_32.h: $(syscall32) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_32_ia32 := i386
-syshdr_pfx_unistd_32_ia32 := ia32_
+$(out)/unistd_32_ia32.h: abis := i386
+$(out)/unistd_32_ia32.h: prefix := ia32_
 $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_x32 := common,x32
-syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
+$(uapi)/unistd_x32.h: abis := common,x32
+$(uapi)/unistd_x32.h: offset := __X32_SYSCALL_BIT
 $(uapi)/unistd_x32.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_64 := common,64
+$(uapi)/unistd_64.h: abis := common,64
 $(uapi)/unistd_64.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_64_x32 := x32
-syshdr_pfx_unistd_64_x32 := x32_
+$(out)/unistd_64_x32.h: abis := x32
+$(out)/unistd_64_x32.h: prefix := x32_
 $(out)/unistd_64_x32.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
diff --git a/arch/x86/entry/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
deleted file mode 100644
index 75e66af06773..000000000000
--- a/arch/x86/entry/syscalls/syscallhdr.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_ASM_X86_`basename "$out" | sed \
-    -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-    -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-    echo "#ifndef ${fileguard}"
-    echo "#define ${fileguard} 1"
-    echo ""
-
-    max=0
-    while read nr abi name entry ; do
-	if [ -z "$offset" ]; then
-	    echo "#define __NR_${prefix}${name} $nr"
-	else
-	    echo "#define __NR_${prefix}${name} ($offset + $nr)"
-        fi
-
-	max=$nr
-    done
-
-    echo ""
-    echo "#ifdef __KERNEL__"
-    echo "#define __NR_${prefix}syscalls $(($max + 1))"
-    echo "#endif"
-    echo ""
-    echo "#endif /* ${fileguard} */"
-) > "$out"

From 5a6f0dbe621a5c20dc912ac474debf9f11129e03 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 4 May 2021 20:57:42 +0200
Subject: [PATCH 0363/3292] Input: goodix - platform/x86: touchscreen_dmi -
 Move upside down quirks to touchscreen_dmi.c

Move the DMI quirks for upside-down mounted Goodix touchscreens from
drivers/input/touchscreen/goodix.c to
drivers/platform/x86/touchscreen_dmi.c,
where all the other x86 touchscreen quirks live.

Note the touchscreen_dmi.c code attaches standard touchscreen
device-properties to an i2c-client device based on a combination of a
DMI match + a device-name match. I've verified that the: Teclast X98 Pro,
WinBook TW100 and WinBook TW700 uses an ACPI devicename of "GDIX1001:00"
based on acpidumps and/or dmesg output available on the web.

This patch was tested on a Teclast X89 tablet.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210504185746.175461-2-hdegoede@redhat.com
---
 drivers/input/touchscreen/goodix.c     | 52 ------------------------
 drivers/platform/x86/touchscreen_dmi.c | 56 ++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index c682b028f0a2..4f53d3c57e69 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -178,51 +178,6 @@ static const unsigned long goodix_irq_flags[] = {
 	IRQ_TYPE_LEVEL_HIGH,
 };
 
-/*
- * Those tablets have their coordinates origin at the bottom right
- * of the tablet, as if rotated 180 degrees
- */
-static const struct dmi_system_id rotated_screen[] = {
-#if defined(CONFIG_DMI) && defined(CONFIG_X86)
-	{
-		.ident = "Teclast X89",
-		.matches = {
-			/* tPAD is too generic, also match on bios date */
-			DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
-			DMI_MATCH(DMI_BOARD_NAME, "tPAD"),
-			DMI_MATCH(DMI_BIOS_DATE, "12/19/2014"),
-		},
-	},
-	{
-		.ident = "Teclast X98 Pro",
-		.matches = {
-			/*
-			 * Only match BIOS date, because the manufacturers
-			 * BIOS does not report the board name at all
-			 * (sometimes)...
-			 */
-			DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
-			DMI_MATCH(DMI_BIOS_DATE, "10/28/2015"),
-		},
-	},
-	{
-		.ident = "WinBook TW100",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "TW100")
-		}
-	},
-	{
-		.ident = "WinBook TW700",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "TW700")
-		},
-	},
-#endif
-	{}
-};
-
 static const struct dmi_system_id nine_bytes_report[] = {
 #if defined(CONFIG_DMI) && defined(CONFIG_X86)
 	{
@@ -1123,13 +1078,6 @@ static int goodix_configure_dev(struct goodix_ts_data *ts)
 				  ABS_MT_POSITION_Y, ts->prop.max_y);
 	}
 
-	if (dmi_check_system(rotated_screen)) {
-		ts->prop.invert_x = true;
-		ts->prop.invert_y = true;
-		dev_dbg(&ts->client->dev,
-			"Applying '180 degrees rotated screen' quirk\n");
-	}
-
 	if (dmi_check_system(nine_bytes_report)) {
 		ts->contact_size = 9;
 
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 90fe4f8f3c2c..a53e176d94b9 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -273,6 +273,23 @@ static const struct ts_dmi_data estar_beauty_hd_data = {
 	.properties	= estar_beauty_hd_props,
 };
 
+/* Generic props + data for upside-down mounted GDIX1001 touchscreens */
+static const struct property_entry gdix1001_upside_down_props[] = {
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+	{ }
+};
+
+static const struct ts_dmi_data gdix1001_00_upside_down_data = {
+	.acpi_name	= "GDIX1001:00",
+	.properties	= gdix1001_upside_down_props,
+};
+
+static const struct ts_dmi_data gdix1001_01_upside_down_data = {
+	.acpi_name	= "GDIX1001:01",
+	.properties	= gdix1001_upside_down_props,
+};
+
 static const struct property_entry gp_electronic_t701_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 960),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 640),
@@ -1287,6 +1304,16 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "X3 Plus"),
 		},
 	},
+	{
+		/* Teclast X89 (Windows version / BIOS) */
+		.driver_data = (void *)&gdix1001_01_upside_down_data,
+		.matches = {
+			/* tPAD is too generic, also match on bios date */
+			DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
+			DMI_MATCH(DMI_BOARD_NAME, "tPAD"),
+			DMI_MATCH(DMI_BIOS_DATE, "12/19/2014"),
+		},
+	},
 	{
 		/* Teclast X98 Plus II */
 		.driver_data = (void *)&teclast_x98plus2_data,
@@ -1295,6 +1322,19 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "X98 Plus II"),
 		},
 	},
+	{
+		/* Teclast X98 Pro */
+		.driver_data = (void *)&gdix1001_00_upside_down_data,
+		.matches = {
+			/*
+			 * Only match BIOS date, because the manufacturers
+			 * BIOS does not report the board name at all
+			 * (sometimes)...
+			 */
+			DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
+			DMI_MATCH(DMI_BIOS_DATE, "10/28/2015"),
+		},
+	},
 	{
 		/* Trekstor Primebook C11 */
 		.driver_data = (void *)&trekstor_primebook_c11_data,
@@ -1370,6 +1410,22 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "VINGA Twizzle J116"),
 		},
 	},
+	{
+		/* "WinBook TW100" */
+		.driver_data = (void *)&gdix1001_00_upside_down_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TW100")
+		}
+	},
+	{
+		/* WinBook TW700 */
+		.driver_data = (void *)&gdix1001_00_upside_down_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TW700")
+		},
+	},
 	{
 		/* Yours Y8W81, same case and touchscreen as Chuwi Vi8 */
 		.driver_data = (void *)&chuwi_vi8_data,

From a22e3803f2a4d947ff0083a9448a169269ea0f62 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 4 May 2021 20:57:44 +0200
Subject: [PATCH 0364/3292] platform/x86: touchscreen_dmi: Add an extra entry
 for the upside down Goodix touchscreen on Teclast X89 tablets

Teclast X89 tablets come in 2 versions, with Windows pre-installed and with
Android pre-installed. These 2 versions have different DMI strings.

Add a match for the DMI strings used by the Android version BIOS.

Note the Android version BIOS has a bug in the DSDT where no IRQ is
provided, so for the touchscreen to work a DSDT override fixing this
is necessary as well.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210504185746.175461-4-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index a53e176d94b9..08ec61660b3c 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -1304,6 +1304,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "X3 Plus"),
 		},
 	},
+	{
+		/* Teclast X89 (Android version / BIOS) */
+		.driver_data = (void *)&gdix1001_00_upside_down_data,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "WISKY"),
+			DMI_MATCH(DMI_BOARD_NAME, "3G062i"),
+		},
+	},
 	{
 		/* Teclast X89 (Windows version / BIOS) */
 		.driver_data = (void *)&gdix1001_01_upside_down_data,

From fcd8cf0e3e48f4c66af82c8e799c37cb0cccffe0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 4 May 2021 20:57:45 +0200
Subject: [PATCH 0365/3292] platform/x86: touchscreen_dmi: Add info for the
 Goodix GT912 panel of TM800A550L tablets

The Bay Trail Glavey TM800A550L tablet, which ships with Android installed
from the factory, uses a GT912 touchscreen controller which needs to have
its firmware uploaded by the OS to work (this is a first for a x86 based
device with a Goodix touchscreen controller).

Add a touchscreen_dmi entry for this which specifies the filenames
to use for the firmware and config files needed for this.

Note this matches on a GDIX1001 ACPI HID, while the original DSDT uses
a HID of GODX0911. For the touchscreen to work on these devices a DSDT
override is necessary to fix a missing IRQ and broken GPIO settings in
the ACPI-resources for the touchscreen. This override also changes the
HID to the standard GDIX1001 id typically used for Goodix touchscreens.
The DSDT override is available here:
https://fedorapeople.org/~jwrdegoede/glavey-tm800a550l-dsdt-override/

Reviewed-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210504185746.175461-5-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 08ec61660b3c..d2bbec310726 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -290,6 +290,18 @@ static const struct ts_dmi_data gdix1001_01_upside_down_data = {
 	.properties	= gdix1001_upside_down_props,
 };
 
+static const struct property_entry glavey_tm800a550l_props[] = {
+	PROPERTY_ENTRY_STRING("firmware-name", "gt912-glavey-tm800a550l.fw"),
+	PROPERTY_ENTRY_STRING("goodix,config-name", "gt912-glavey-tm800a550l.cfg"),
+	PROPERTY_ENTRY_U32("goodix,main-clk", 54),
+	{ }
+};
+
+static const struct ts_dmi_data glavey_tm800a550l_data = {
+	.acpi_name	= "GDIX1001:00",
+	.properties	= glavey_tm800a550l_props,
+};
+
 static const struct property_entry gp_electronic_t701_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 960),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 640),
@@ -1020,6 +1032,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "eSTAR BEAUTY HD Intel Quad core"),
 		},
 	},
+	{	/* Glavey TM800A550L */
+		.driver_data = (void *)&glavey_tm800a550l_data,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+			DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
+			/* Above strings are too generic, also match on BIOS version */
+			DMI_MATCH(DMI_BIOS_VERSION, "ZY-8-BI-PX4S70VTR400-X423B-005-D"),
+		},
+	},
 	{
 		/* GP-electronic T701 */
 		.driver_data = (void *)&gp_electronic_t701_data,

From 15c82d98a0f783bd4b2715ea910f7bb526367f54 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:12:58 -0700
Subject: [PATCH 0366/3292] selftests/x86/syscall: Update and extend
 syscall_numbering_64

Update the syscall_numbering_64 selftest to reflect that a system call is
to be extended from 32 bits. Add a mix of tests for valid and invalid
system calls in 64-bit and x32 space.

Use an explicit system call instruction, because the glibc syscall()
wrapper might intercept instructions, extend the system call number
independently, or anything similar.

Use long long instead of long to make it possible to compile this test
on x32 as well as 64 bits.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-2-hpa@zytor.com
---
 .../testing/selftests/x86/syscall_numbering.c | 286 ++++++++++++++----
 1 file changed, 228 insertions(+), 58 deletions(-)

diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index d6b09cb1aa2c..7dd86bcbee25 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * syscall_numbering.c - test calling the x86-64 kernel with various
+ * valid and invalid system call numbers.
+ *
  * Copyright (c) 2018 Andrew Lutomirski
  */
 
@@ -11,79 +13,247 @@
 #include <stdbool.h>
 #include <errno.h>
 #include <unistd.h>
-#include <syscall.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
 
-static int nerrs;
+/* Common system call numbers */
+#define SYS_READ	  0
+#define SYS_WRITE	  1
+#define SYS_GETPID	 39
+/* x64-only system call numbers */
+#define X64_IOCTL	 16
+#define X64_READV	 19
+#define X64_WRITEV	 20
+/* x32-only system call numbers (without X32_BIT) */
+#define X32_IOCTL	514
+#define X32_READV	515
+#define X32_WRITEV	516
 
-#define X32_BIT 0x40000000UL
+#define X32_BIT 0x40000000
 
-static void check_enosys(unsigned long nr, bool *ok)
+static unsigned int nerr = 0;	/* Cumulative error count */
+static int nullfd = -1;		/* File descriptor for /dev/null */
+
+/*
+ * Directly invokes the given syscall with nullfd as the first argument
+ * and the rest zero. Avoids involving glibc wrappers in case they ever
+ * end up intercepting some system calls for some reason, or modify
+ * the system call number itself.
+ */
+static inline long long probe_syscall(int msb, int lsb)
 {
-	/* If this fails, a segfault is reasonably likely. */
-	fflush(stdout);
+	register long long arg1 asm("rdi") = nullfd;
+	register long long arg2 asm("rsi") = 0;
+	register long long arg3 asm("rdx") = 0;
+	register long long arg4 asm("r10") = 0;
+	register long long arg5 asm("r8")  = 0;
+	register long long arg6 asm("r9")  = 0;
+	long long nr = ((long long)msb << 32) | (unsigned int)lsb;
+	long long ret;
 
-	long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
-	if (ret == 0) {
-		printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
-		*ok = false;
-	} else if (errno != ENOSYS) {
-		printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
-		*ok = false;
-	}
+	asm volatile("syscall"
+		     : "=a" (ret)
+		     : "a" (nr), "r" (arg1), "r" (arg2), "r" (arg3),
+		       "r" (arg4), "r" (arg5), "r" (arg6)
+		     : "rcx", "r11", "memory", "cc");
+
+	return ret;
 }
 
-static void test_x32_without_x32_bit(void)
+static const char *syscall_str(int msb, int start, int end)
 {
-	bool ok = true;
+	static char buf[64];
+	const char * const type = (start & X32_BIT) ? "x32" : "x64";
+	int lsb = start;
 
 	/*
-	 * Syscalls 512-547 are "x32" syscalls.  They are intended to be
-	 * called with the x32 (0x40000000) bit set.  Calling them without
-	 * the x32 bit set is nonsense and should not work.
+	 * Improve readability by stripping the x32 bit, but round
+	 * toward zero so we don't display -1 as -1073741825.
 	 */
-	printf("[RUN]\tChecking syscalls 512-547\n");
-	for (int i = 512; i <= 547; i++)
-		check_enosys(i, &ok);
-
-	/*
-	 * Check that a handful of 64-bit-only syscalls are rejected if the x32
-	 * bit is set.
-	 */
-	printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
-	check_enosys(16 | X32_BIT, &ok);	/* ioctl */
-	check_enosys(19 | X32_BIT, &ok);	/* readv */
-	check_enosys(20 | X32_BIT, &ok);	/* writev */
-
-	/*
-	 * Check some syscalls with high bits set.
-	 */
-	printf("[RUN]\tChecking numbers above 2^32-1\n");
-	check_enosys((1UL << 32), &ok);
-	check_enosys(X32_BIT | (1UL << 32), &ok);
-
-	if (!ok)
-		nerrs++;
+	if (lsb < 0)
+		lsb |= X32_BIT;
 	else
-		printf("[OK]\tThey all returned -ENOSYS\n");
+		lsb &= ~X32_BIT;
+
+	if (start == end)
+		snprintf(buf, sizeof buf, "%s syscall %d:%d",
+			 type, msb, lsb);
+	else
+		snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
+			 type, msb, lsb, lsb + (end-start));
+
+	return buf;
 }
 
-int main()
+static unsigned int _check_for(int msb, int start, int end, long long expect,
+			       const char *expect_str)
 {
-	/*
-	 * Anyone diagnosing a failure will want to know whether the kernel
-	 * supports x32.  Tell them.
-	 */
-	printf("\tChecking for x32...");
-	fflush(stdout);
-	if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
-		printf(" supported\n");
-	} else if (errno == ENOSYS) {
-		printf(" not supported\n");
-	} else {
-		printf(" confused\n");
+	unsigned int err = 0;
+
+	for (int nr = start; nr <= end; nr++) {
+		long long ret = probe_syscall(msb, nr);
+
+		if (ret != expect) {
+			printf("[FAIL]\t      %s returned %lld, but it should have returned %s\n",
+			       syscall_str(msb, nr, nr),
+			       ret, expect_str);
+			err++;
+		}
 	}
 
-	test_x32_without_x32_bit();
+	if (err) {
+		nerr += err;
+		if (start != end)
+			printf("[FAIL]\t      %s had %u failure%s\n",
+			       syscall_str(msb, start, end),
+			       err, (err == 1) ? "s" : "");
+	} else {
+		printf("[OK]\t      %s returned %s as expected\n",
+		       syscall_str(msb, start, end), expect_str);
+	}
 
-	return nerrs ? 1 : 0;
+	return err;
+}
+
+#define check_for(msb,start,end,expect) \
+	_check_for(msb,start,end,expect,#expect)
+
+static bool check_zero(int msb, int nr)
+{
+	return check_for(msb, nr, nr, 0);
+}
+
+static bool check_enosys(int msb, int nr)
+{
+	return check_for(msb, nr, nr, -ENOSYS);
+}
+
+/*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them. This can also be used to conditionalize
+ * tests based on existence or nonexistence of x32.
+ */
+static bool test_x32(void)
+{
+	long long ret;
+	long long mypid = getpid();
+
+	printf("[RUN]\tChecking for x32 by calling x32 getpid()\n");
+	ret = probe_syscall(0, SYS_GETPID | X32_BIT);
+
+	if (ret == mypid) {
+		printf("[INFO]\t   x32 is supported\n");
+		return true;
+	} else if (ret == -ENOSYS) {
+		printf("[INFO]\t   x32 is not supported\n");
+		return false;
+	} else {
+		printf("[FAIL]\t   x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
+		nerr++;
+		return true;	/* Proceed as if... */
+	}
+}
+
+static void test_syscalls_common(int msb)
+{
+	printf("[RUN]\t   Checking some common syscalls as 64 bit\n");
+	check_zero(msb, SYS_READ);
+	check_zero(msb, SYS_WRITE);
+
+	printf("[RUN]\t   Checking some 64-bit only syscalls as 64 bit\n");
+	check_zero(msb, X64_READV);
+	check_zero(msb, X64_WRITEV);
+
+	printf("[RUN]\t   Checking out of range system calls\n");
+	check_for(msb, -64, -1, -ENOSYS);
+	check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
+	check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
+	check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
+}
+
+static void test_syscalls_with_x32(int msb)
+{
+	/*
+	 * Syscalls 512-547 are "x32" syscalls.  They are
+	 * intended to be called with the x32 (0x40000000) bit
+	 * set.  Calling them without the x32 bit set is
+	 * nonsense and should not work.
+	 */
+	printf("[RUN]\t   Checking x32 syscalls as 64 bit\n");
+	check_for(msb, 512, 547, -ENOSYS);
+
+	printf("[RUN]\t   Checking some common syscalls as x32\n");
+	check_zero(msb, SYS_READ   | X32_BIT);
+	check_zero(msb, SYS_WRITE  | X32_BIT);
+
+	printf("[RUN]\t   Checking some x32 syscalls as x32\n");
+	check_zero(msb, X32_READV  | X32_BIT);
+	check_zero(msb, X32_WRITEV | X32_BIT);
+
+	printf("[RUN]\t   Checking some 64-bit syscalls as x32\n");
+	check_enosys(msb, X64_IOCTL  | X32_BIT);
+	check_enosys(msb, X64_READV  | X32_BIT);
+	check_enosys(msb, X64_WRITEV | X32_BIT);
+}
+
+static void test_syscalls_without_x32(int msb)
+{
+	printf("[RUN]\t  Checking for absence of x32 system calls\n");
+	check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
+}
+
+static void test_syscall_numbering(void)
+{
+	static const int msbs[] = {
+		0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
+		INT_MIN, INT_MIN+1
+	};
+	bool with_x32 = test_x32();
+
+	/*
+	 * The MSB is supposed to be ignored, so we loop over a few
+	 * to test that out.
+	 */
+	for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+		int msb = msbs[i];
+		printf("[RUN]\tChecking system calls with msb = %d (0x%x)\n",
+		       msb, msb);
+
+		test_syscalls_common(msb);
+		if (with_x32)
+			test_syscalls_with_x32(msb);
+		else
+			test_syscalls_without_x32(msb);
+	}
+}
+
+int main(void)
+{
+	/*
+	 * It is quite likely to get a segfault on a failure, so make
+	 * sure the message gets out by setting stdout to nonbuffered.
+	 */
+	setvbuf(stdout, NULL, _IONBF, 0);
+
+	/*
+	 * Harmless file descriptor to work on...
+	 */
+	nullfd = open("/dev/null", O_RDWR);
+	if (nullfd < 0) {
+		printf("[FAIL]\tUnable to open /dev/null: %s\n",
+		       strerror(errno));
+		printf("[SKIP]\tCannot execute test\n");
+		return 71;	/* EX_OSERR */
+	}
+
+	test_syscall_numbering();
+	if (!nerr) {
+		printf("[OK]\tAll system calls succeeded or failed as expected\n");
+		return 0;
+	} else {
+		printf("[FAIL]\tA total of %u system call%s had incorrect behavior\n",
+		       nerr, nerr != 1 ? "s" : "");
+		return 1;
+	}
 }

From c5c39488dcb5f818bb07f856a349262d667ef147 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:12:59 -0700
Subject: [PATCH 0367/3292] selftests/x86/syscall: Simplify message reporting
 in syscall_numbering

Reduce some boiler plate in printing and indenting messages.
This makes it easier to produce clean status output.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-3-hpa@zytor.com
---
 .../testing/selftests/x86/syscall_numbering.c | 103 ++++++++++++------
 1 file changed, 72 insertions(+), 31 deletions(-)

diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 7dd86bcbee25..434fe0efafa0 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -16,6 +16,7 @@
 #include <string.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <sysexits.h>
 
 /* Common system call numbers */
 #define SYS_READ	  0
@@ -34,6 +35,33 @@
 
 static unsigned int nerr = 0;	/* Cumulative error count */
 static int nullfd = -1;		/* File descriptor for /dev/null */
+static int indent = 0;
+
+static inline unsigned int offset(void)
+{
+	return 8 + indent * 4;
+}
+
+#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
+				  ## __VA_ARGS__)
+
+#define run(fmt, ...)  msg(RUN,  fmt, ## __VA_ARGS__)
+#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
+#define ok(fmt, ...)   msg(OK,   fmt, ## __VA_ARGS__)
+
+#define fail(fmt, ...)					\
+	do {						\
+		msg(FAIL, fmt, ## __VA_ARGS__);		\
+		nerr++;					\
+	} while (0)
+
+#define crit(fmt, ...)					\
+	do {						\
+		indent = 0;				\
+		msg(FAIL, fmt, ## __VA_ARGS__);		\
+		msg(SKIP, "Unable to run test\n");	\
+		exit(EX_OSERR);
+	} while (0)
 
 /*
  * Directly invokes the given syscall with nullfd as the first argument
@@ -91,28 +119,37 @@ static unsigned int _check_for(int msb, int start, int end, long long expect,
 {
 	unsigned int err = 0;
 
+	indent++;
+	if (start != end)
+		indent++;
+
 	for (int nr = start; nr <= end; nr++) {
 		long long ret = probe_syscall(msb, nr);
 
 		if (ret != expect) {
-			printf("[FAIL]\t      %s returned %lld, but it should have returned %s\n",
+			fail("%s returned %lld, but it should have returned %s\n",
 			       syscall_str(msb, nr, nr),
 			       ret, expect_str);
 			err++;
 		}
 	}
 
+	if (start != end)
+		indent--;
+
 	if (err) {
 		nerr += err;
 		if (start != end)
-			printf("[FAIL]\t      %s had %u failure%s\n",
+			fail("%s had %u failure%s\n",
 			       syscall_str(msb, start, end),
-			       err, (err == 1) ? "s" : "");
+			       err, err == 1 ? "s" : "");
 	} else {
-		printf("[OK]\t      %s returned %s as expected\n",
-		       syscall_str(msb, start, end), expect_str);
+		ok("%s returned %s as expected\n",
+		   syscall_str(msb, start, end), expect_str);
 	}
 
+	indent--;
+
 	return err;
 }
 
@@ -137,35 +174,38 @@ static bool check_enosys(int msb, int nr)
 static bool test_x32(void)
 {
 	long long ret;
-	long long mypid = getpid();
+	pid_t mypid = getpid();
+	bool with_x32;
 
-	printf("[RUN]\tChecking for x32 by calling x32 getpid()\n");
+	run("Checking for x32 by calling x32 getpid()\n");
 	ret = probe_syscall(0, SYS_GETPID | X32_BIT);
 
+	indent++;
 	if (ret == mypid) {
-		printf("[INFO]\t   x32 is supported\n");
-		return true;
+		info("x32 is supported\n");
+		with_x32 = true;
 	} else if (ret == -ENOSYS) {
-		printf("[INFO]\t   x32 is not supported\n");
-		return false;
+		info("x32 is not supported\n");
+		with_x32 = false;
 	} else {
-		printf("[FAIL]\t   x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
-		nerr++;
-		return true;	/* Proceed as if... */
+		fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
+		with_x32 = false;
 	}
+	indent--;
+	return with_x32;
 }
 
 static void test_syscalls_common(int msb)
 {
-	printf("[RUN]\t   Checking some common syscalls as 64 bit\n");
+	run("Checking some common syscalls as 64 bit\n");
 	check_zero(msb, SYS_READ);
 	check_zero(msb, SYS_WRITE);
 
-	printf("[RUN]\t   Checking some 64-bit only syscalls as 64 bit\n");
+	run("Checking some 64-bit only syscalls as 64 bit\n");
 	check_zero(msb, X64_READV);
 	check_zero(msb, X64_WRITEV);
 
-	printf("[RUN]\t   Checking out of range system calls\n");
+	run("Checking out of range system calls\n");
 	check_for(msb, -64, -1, -ENOSYS);
 	check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
 	check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
@@ -180,18 +220,18 @@ static void test_syscalls_with_x32(int msb)
 	 * set.  Calling them without the x32 bit set is
 	 * nonsense and should not work.
 	 */
-	printf("[RUN]\t   Checking x32 syscalls as 64 bit\n");
+	run("Checking x32 syscalls as 64 bit\n");
 	check_for(msb, 512, 547, -ENOSYS);
 
-	printf("[RUN]\t   Checking some common syscalls as x32\n");
+	run("Checking some common syscalls as x32\n");
 	check_zero(msb, SYS_READ   | X32_BIT);
 	check_zero(msb, SYS_WRITE  | X32_BIT);
 
-	printf("[RUN]\t   Checking some x32 syscalls as x32\n");
+	run("Checking some x32 syscalls as x32\n");
 	check_zero(msb, X32_READV  | X32_BIT);
 	check_zero(msb, X32_WRITEV | X32_BIT);
 
-	printf("[RUN]\t   Checking some 64-bit syscalls as x32\n");
+	run("Checking some 64-bit syscalls as x32\n");
 	check_enosys(msb, X64_IOCTL  | X32_BIT);
 	check_enosys(msb, X64_READV  | X32_BIT);
 	check_enosys(msb, X64_WRITEV | X32_BIT);
@@ -199,7 +239,7 @@ static void test_syscalls_with_x32(int msb)
 
 static void test_syscalls_without_x32(int msb)
 {
-	printf("[RUN]\t  Checking for absence of x32 system calls\n");
+	run("Checking for absence of x32 system calls\n");
 	check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
 }
 
@@ -217,14 +257,18 @@ static void test_syscall_numbering(void)
 	 */
 	for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
 		int msb = msbs[i];
-		printf("[RUN]\tChecking system calls with msb = %d (0x%x)\n",
-		       msb, msb);
+		run("Checking system calls with msb = %d (0x%x)\n",
+		    msb, msb);
+
+		indent++;
 
 		test_syscalls_common(msb);
 		if (with_x32)
 			test_syscalls_with_x32(msb);
 		else
 			test_syscalls_without_x32(msb);
+
+		indent--;
 	}
 }
 
@@ -241,19 +285,16 @@ int main(void)
 	 */
 	nullfd = open("/dev/null", O_RDWR);
 	if (nullfd < 0) {
-		printf("[FAIL]\tUnable to open /dev/null: %s\n",
-		       strerror(errno));
-		printf("[SKIP]\tCannot execute test\n");
-		return 71;	/* EX_OSERR */
+		crit("Unable to open /dev/null: %s\n", strerror(errno));
 	}
 
 	test_syscall_numbering();
 	if (!nerr) {
-		printf("[OK]\tAll system calls succeeded or failed as expected\n");
+		ok("All system calls succeeded or failed as expected\n");
 		return 0;
 	} else {
-		printf("[FAIL]\tA total of %u system call%s had incorrect behavior\n",
-		       nerr, nerr != 1 ? "s" : "");
+		fail("A total of %u system call%s had incorrect behavior\n",
+		     nerr, nerr != 1 ? "s" : "");
 		return 1;
 	}
 }

From 795e2a023b8080b95442811f26f0762184116caa Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:00 -0700
Subject: [PATCH 0368/3292] selftests/x86/syscall: Add tests under ptrace to
 syscall_numbering_64

Add tests running under ptrace for syscall_numbering_64. ptrace stopping on
syscall entry and possibly modifying the syscall number (regs.orig_rax) or
the default return value (regs.rax) can have different results than the
normal system call path.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-4-hpa@zytor.com
---
 .../testing/selftests/x86/syscall_numbering.c | 232 ++++++++++++++++--
 1 file changed, 207 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 434fe0efafa0..991591718bb0 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -16,8 +16,16 @@
 #include <string.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <signal.h>
 #include <sysexits.h>
 
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include <linux/ptrace.h>
+
 /* Common system call numbers */
 #define SYS_READ	  0
 #define SYS_WRITE	  1
@@ -33,13 +41,45 @@
 
 #define X32_BIT 0x40000000
 
-static unsigned int nerr = 0;	/* Cumulative error count */
 static int nullfd = -1;		/* File descriptor for /dev/null */
-static int indent = 0;
+static bool with_x32;		/* x32 supported on this kernel? */
+
+enum ptrace_pass {
+	PTP_NOTHING,
+	PTP_GETREGS,
+	PTP_WRITEBACK,
+	PTP_FUZZRET,
+	PTP_FUZZHIGH,
+	PTP_INTNUM,
+	PTP_DONE
+};
+
+static const char * const ptrace_pass_name[] =
+{
+	[PTP_NOTHING]	= "just stop, no data read",
+	[PTP_GETREGS]	= "only getregs",
+	[PTP_WRITEBACK]	= "getregs, unmodified setregs",
+	[PTP_FUZZRET]	= "modifying the default return",
+	[PTP_FUZZHIGH]	= "clobbering the top 32 bits",
+	[PTP_INTNUM]	= "sign-extending the syscall number",
+};
+
+/*
+ * Shared memory block between tracer and test
+ */
+struct shared {
+	unsigned int nerr;	/* Total error count */
+	unsigned int indent;	/* Message indentation level */
+	enum ptrace_pass ptrace_pass;
+	bool probing_syscall;	/* In probe_syscall() */
+};
+static volatile struct shared *sh;
 
 static inline unsigned int offset(void)
 {
-	return 8 + indent * 4;
+	unsigned int level = sh ? sh->indent : 0;
+
+	return 8 + level * 4;
 }
 
 #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
@@ -52,16 +92,19 @@ static inline unsigned int offset(void)
 #define fail(fmt, ...)					\
 	do {						\
 		msg(FAIL, fmt, ## __VA_ARGS__);		\
-		nerr++;					\
-	} while (0)
+		sh->nerr++;				\
+       } while (0)
 
 #define crit(fmt, ...)					\
 	do {						\
-		indent = 0;				\
+		sh->indent = 0;				\
 		msg(FAIL, fmt, ## __VA_ARGS__);		\
 		msg(SKIP, "Unable to run test\n");	\
-		exit(EX_OSERR);
-	} while (0)
+		exit(EX_OSERR);				\
+       } while (0)
+
+/* Sentinel for ptrace-modified return value */
+#define MODIFIED_BY_PTRACE	-9999
 
 /*
  * Directly invokes the given syscall with nullfd as the first argument
@@ -69,7 +112,7 @@ static inline unsigned int offset(void)
  * end up intercepting some system calls for some reason, or modify
  * the system call number itself.
  */
-static inline long long probe_syscall(int msb, int lsb)
+static long long probe_syscall(int msb, int lsb)
 {
 	register long long arg1 asm("rdi") = nullfd;
 	register long long arg2 asm("rsi") = 0;
@@ -80,11 +123,21 @@ static inline long long probe_syscall(int msb, int lsb)
 	long long nr = ((long long)msb << 32) | (unsigned int)lsb;
 	long long ret;
 
+	/*
+	 * We pass in an extra copy of the extended system call number
+	 * in %rbx, so we can examine it from the ptrace handler without
+	 * worrying about it being possibly modified. This is to test
+	 * the validity of struct user regs.orig_rax a.k.a.
+	 * struct pt_regs.orig_ax.
+	 */
+	sh->probing_syscall = true;
 	asm volatile("syscall"
 		     : "=a" (ret)
-		     : "a" (nr), "r" (arg1), "r" (arg2), "r" (arg3),
+		     : "a" (nr), "b" (nr),
+		       "r" (arg1), "r" (arg2), "r" (arg3),
 		       "r" (arg4), "r" (arg5), "r" (arg6)
 		     : "rcx", "r11", "memory", "cc");
+	sh->probing_syscall = false;
 
 	return ret;
 }
@@ -119,9 +172,9 @@ static unsigned int _check_for(int msb, int start, int end, long long expect,
 {
 	unsigned int err = 0;
 
-	indent++;
+	sh->indent++;
 	if (start != end)
-		indent++;
+		sh->indent++;
 
 	for (int nr = start; nr <= end; nr++) {
 		long long ret = probe_syscall(msb, nr);
@@ -135,20 +188,19 @@ static unsigned int _check_for(int msb, int start, int end, long long expect,
 	}
 
 	if (start != end)
-		indent--;
+		sh->indent--;
 
 	if (err) {
-		nerr += err;
 		if (start != end)
 			fail("%s had %u failure%s\n",
-			       syscall_str(msb, start, end),
-			       err, err == 1 ? "s" : "");
+			     syscall_str(msb, start, end),
+			     err, err == 1 ? "s" : "");
 	} else {
 		ok("%s returned %s as expected\n",
 		   syscall_str(msb, start, end), expect_str);
 	}
 
-	indent--;
+	sh->indent--;
 
 	return err;
 }
@@ -175,12 +227,11 @@ static bool test_x32(void)
 {
 	long long ret;
 	pid_t mypid = getpid();
-	bool with_x32;
 
 	run("Checking for x32 by calling x32 getpid()\n");
 	ret = probe_syscall(0, SYS_GETPID | X32_BIT);
 
-	indent++;
+	sh->indent++;
 	if (ret == mypid) {
 		info("x32 is supported\n");
 		with_x32 = true;
@@ -188,15 +239,17 @@ static bool test_x32(void)
 		info("x32 is not supported\n");
 		with_x32 = false;
 	} else {
-		fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
+		fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
 		with_x32 = false;
 	}
-	indent--;
+	sh->indent--;
 	return with_x32;
 }
 
 static void test_syscalls_common(int msb)
 {
+	enum ptrace_pass pass = sh->ptrace_pass;
+
 	run("Checking some common syscalls as 64 bit\n");
 	check_zero(msb, SYS_READ);
 	check_zero(msb, SYS_WRITE);
@@ -206,7 +259,11 @@ static void test_syscalls_common(int msb)
 	check_zero(msb, X64_WRITEV);
 
 	run("Checking out of range system calls\n");
-	check_for(msb, -64, -1, -ENOSYS);
+	check_for(msb, -64, -2, -ENOSYS);
+	if (pass >= PTP_FUZZRET)
+		check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
+	else
+		check_for(msb, -1, -1, -ENOSYS);
 	check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
 	check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
 	check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
@@ -249,7 +306,8 @@ static void test_syscall_numbering(void)
 		0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
 		INT_MIN, INT_MIN+1
 	};
-	bool with_x32 = test_x32();
+
+	sh->indent++;
 
 	/*
 	 * The MSB is supposed to be ignored, so we loop over a few
@@ -260,7 +318,7 @@ static void test_syscall_numbering(void)
 		run("Checking system calls with msb = %d (0x%x)\n",
 		    msb, msb);
 
-		indent++;
+		sh->indent++;
 
 		test_syscalls_common(msb);
 		if (with_x32)
@@ -268,12 +326,119 @@ static void test_syscall_numbering(void)
 		else
 			test_syscalls_without_x32(msb);
 
-		indent--;
+		sh->indent--;
+	}
+
+	sh->indent--;
+}
+
+static void syscall_numbering_tracee(void)
+{
+	enum ptrace_pass pass;
+
+	if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+		crit("Failed to request tracing\n");
+		return;
+	}
+	raise(SIGSTOP);
+
+	for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
+	     sh->ptrace_pass = ++pass) {
+		run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
+		test_syscall_numbering();
+	}
+}
+
+static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
+{
+	struct user_regs_struct regs;
+
+	sh->probing_syscall = false; /* Do this on entry only */
+
+	/* For these, don't even getregs */
+	if (pass == PTP_NOTHING || pass == PTP_DONE)
+		return;
+
+	ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
+
+	if (regs.orig_rax != regs.rbx) {
+		fail("orig_rax %#llx doesn't match syscall number %#llx\n",
+		     (unsigned long long)regs.orig_rax,
+		     (unsigned long long)regs.rbx);
+	}
+
+	switch (pass) {
+	case PTP_GETREGS:
+		/* Just read, no writeback */
+		return;
+	case PTP_WRITEBACK:
+		/* Write back the same register state verbatim */
+		break;
+	case PTP_FUZZRET:
+		regs.rax = MODIFIED_BY_PTRACE;
+		break;
+	case PTP_FUZZHIGH:
+		regs.rax = MODIFIED_BY_PTRACE;
+		regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
+		break;
+	case PTP_INTNUM:
+		regs.rax = MODIFIED_BY_PTRACE;
+		regs.orig_rax = (int)regs.orig_rax;
+		break;
+	default:
+		crit("invalid ptrace_pass\n");
+		break;
+	}
+
+	ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
+}
+
+static void syscall_numbering_tracer(pid_t testpid)
+{
+	int wstatus;
+
+	do {
+		pid_t wpid = waitpid(testpid, &wstatus, 0);
+		if (wpid < 0 && errno != EINTR)
+			break;
+		if (wpid != testpid)
+			continue;
+		if (!WIFSTOPPED(wstatus))
+			break;	/* Thread exited? */
+
+		if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
+			mess_with_syscall(testpid, sh->ptrace_pass);
+	} while (sh->ptrace_pass != PTP_DONE &&
+		 !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
+
+	ptrace(PTRACE_DETACH, testpid, NULL, NULL);
+
+	/* Wait for the child process to terminate */
+	while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
+		/* wait some more */;
+}
+
+static void test_traced_syscall_numbering(void)
+{
+	pid_t testpid;
+
+	/* Launch the test thread; this thread continues as the tracer thread */
+	testpid = fork();
+
+	if (testpid < 0) {
+		crit("Unable to launch tracer process\n");
+	} else if (testpid == 0) {
+		syscall_numbering_tracee();
+		_exit(0);
+	} else {
+		syscall_numbering_tracer(testpid);
 	}
 }
 
 int main(void)
 {
+	unsigned int nerr;
+
 	/*
 	 * It is quite likely to get a segfault on a failure, so make
 	 * sure the message gets out by setting stdout to nonbuffered.
@@ -288,7 +453,24 @@ int main(void)
 		crit("Unable to open /dev/null: %s\n", strerror(errno));
 	}
 
+	/*
+	 * Set up a block of shared memory...
+	 */
+	sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
+		  MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+	if (sh == MAP_FAILED) {
+		crit("Unable to allocated shared memory block: %s\n",
+		     strerror(errno));
+	}
+
+	with_x32 = test_x32();
+
+	run("Running tests without ptrace...\n");
 	test_syscall_numbering();
+
+	test_traced_syscall_numbering();
+
+	nerr = sh->nerr;
 	if (!nerr) {
 		ok("All system calls succeeded or failed as expected\n");
 		return 0;

From 0595494891723a1dcca5eaa8eeca8ab54ad953b9 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:01 -0700
Subject: [PATCH 0369/3292] x86/entry/64: Sign-extend system calls on entry to
 int

Right now, *some* code will treat e.g. 0x0000000100000001 as a system
call and some will not. Some of the code, notably in ptrace, will
treat 0x000000018000000 as a system call and some will not. Finally,
right now, e.g. 335 for x86-64 will force the exit code to be set to
-ENOSYS even if poked by ptrace, but 548 will not, because there is an
observable difference between an out of range system call and a system
call number that falls outside the range of the table.

This is visible to the user: for example, the syscall_numbering_64
test fails if run under strace, because as strace uses ptrace, it ends
up clobbering the upper half of the 64-bit system call number.

The architecture independent code all assumes that a system call is "int"
that the value -1 specifically and not just any negative value is used for
a non-system call. This is the case on x86 as well when arch-independent
code is involved. The arch-independent API is defined/documented (but not
*implemented*!) in <asm-generic/syscall.h>.

This is an ABI change, but is in fact a revert to the original x86-64
ABI. The original assembly entry code would zero-extend the system call
number;

Use sign extend to be explicit that this is treated as a signed number
(although in practice it makes no difference, of course) and to avoid
people getting the idea of "optimizing" it, as has happened on at least
two(!) separate occasions.

Do not store the extended value into regs->orig_ax, however: on x86-64, the
ABI is that the callee is responsible for extending parameters, so only
examining the lower 32 bits is fully consistent with any "int" argument to
any system call, e.g. regs->di for write(2). The full value of %rax on
entry to the kernel is thus still available.

[ tglx: Add a comment to the ASM code ]

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-5-hpa@zytor.com
---
 arch/x86/entry/entry_64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1d9db15fdc69..a5f02d03c585 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -108,7 +108,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 
 	/* IRQs are off. */
 	movq	%rsp, %rdi
-	movq	%rax, %rsi
+	/* Sign extend the lower 32bit as syscall numbers are treated as int */
+	movslq	%eax, %rsi
 	call	do_syscall_64		/* returns with IRQs disabled */
 
 	/*

From b337b4965e3a3e567f11828a9e3fe3fb3faefa47 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:02 -0700
Subject: [PATCH 0370/3292] x86/entry: Treat out of range and gap system calls
 the same

The current 64-bit system call entry code treats out-of-range system
calls differently than system calls that map to a hole in the system
call table.

This is visible to the user if system calls are intercepted via ptrace or
seccomp and the return value (regs->ax) is modified: in the former case,
the return value is preserved, and in the latter case, sys_ni_syscall() is
called and the return value is forced to -ENOSYS.

The API spec in <asm-generic/syscalls.h> is very clear that only
(int)-1 is the non-system-call sentinel value, so make the system call
behavior consistent by calling sys_ni_syscall() for all invalid system
call numbers except for -1.

Although currently sys_ni_syscall() simply returns -ENOSYS, calling it
explicitly is friendly for tracing and future possible extensions, and
as this is an error path there is no reason to optimize it.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-6-hpa@zytor.com
---
 arch/x86/entry/common.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 00da0f5420de..f51bc17262db 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -52,6 +52,8 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, unsigned long nr)
 					X32_NR_syscalls);
 		regs->ax = x32_sys_call_table[nr](regs);
 #endif
+	} else if (unlikely((int)nr != -1)) {
+		regs->ax = __x64_sys_ni_syscall(regs);
 	}
 	instrumentation_end();
 	syscall_exit_to_user_mode(regs);
@@ -76,6 +78,8 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
 	if (likely(nr < IA32_NR_syscalls)) {
 		nr = array_index_nospec(nr, IA32_NR_syscalls);
 		regs->ax = ia32_sys_call_table[nr](regs);
+	} else if (unlikely((int)nr != -1)) {
+		regs->ax = __ia32_sys_ni_syscall(regs);
 	}
 }
 

From 0c20f2d29fff7ecd3b2802536d0089ed908304a5 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:47 -0700
Subject: [PATCH 0371/3292] igc: Move igc_xdp_is_enabled()

Move the helper igc_xdp_is_enabled() to igc_xdp.h so it can be reused in
igc_xdp.c by upcoming patches that will introduce AF_XDP zero-copy
support to the driver.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 5 -----
 drivers/net/ethernet/intel/igc/igc_xdp.h  | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 92c0701e2a36..edfe9d492071 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -515,11 +515,6 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
 	return err;
 }
 
-static bool igc_xdp_is_enabled(struct igc_adapter *adapter)
-{
-	return !!adapter->xdp_prog;
-}
-
 /**
  * igc_configure_rx_ring - Configure a receive ring after Reset
  * @adapter: board private structure
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h
index cfecb515b718..412aa369e6ba 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.h
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.h
@@ -10,4 +10,9 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
 int igc_xdp_register_rxq_info(struct igc_ring *ring);
 void igc_xdp_unregister_rxq_info(struct igc_ring *ring);
 
+static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter)
+{
+	return !!adapter->xdp_prog;
+}
+
 #endif /* _IGC_XDP_H_ */

From 73a6e3721261524567eb5e319d5dc8e37b5f18dc Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:48 -0700
Subject: [PATCH 0372/3292] igc: Refactor __igc_xdp_run_prog()

Refactor __igc_xdp_run_prog() helper from igc_xdp_run_prog(),
preparing the code for AF_XDP zero-copy support which is added
by upcoming patches.

The existing igc_xdp_run_prog() caters to regular XDP rx path
which has to verify if bpf_prog is not NULL. Zero-copy
path assumes that bpf_prog is not NULL and hence this check is
not required. Therefore it makes sense to refactor the common
code into a helper function, to avoid code duplication.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 56 +++++++++++------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index edfe9d492071..1961cc667c3b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2020,12 +2020,38 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
 	return res;
 }
 
+/* This function assumes rcu_read_lock() is held by the caller. */
+static int __igc_xdp_run_prog(struct igc_adapter *adapter,
+			      struct bpf_prog *prog,
+			      struct xdp_buff *xdp)
+{
+	u32 act = bpf_prog_run_xdp(prog, xdp);
+
+	switch (act) {
+	case XDP_PASS:
+		return IGC_XDP_PASS;
+	case XDP_TX:
+		return igc_xdp_xmit_back(adapter, xdp) < 0 ?
+			IGC_XDP_CONSUMED : IGC_XDP_TX;
+	case XDP_REDIRECT:
+		return xdp_do_redirect(adapter->netdev, xdp, prog) < 0 ?
+			IGC_XDP_CONSUMED : IGC_XDP_REDIRECT;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+		trace_xdp_exception(adapter->netdev, prog, act);
+		fallthrough;
+	case XDP_DROP:
+		return IGC_XDP_CONSUMED;
+	}
+}
+
 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
 					struct xdp_buff *xdp)
 {
 	struct bpf_prog *prog;
 	int res;
-	u32 act;
 
 	rcu_read_lock();
 
@@ -2035,33 +2061,7 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
 		goto unlock;
 	}
 
-	act = bpf_prog_run_xdp(prog, xdp);
-	switch (act) {
-	case XDP_PASS:
-		res = IGC_XDP_PASS;
-		break;
-	case XDP_TX:
-		if (igc_xdp_xmit_back(adapter, xdp) < 0)
-			res = IGC_XDP_CONSUMED;
-		else
-			res = IGC_XDP_TX;
-		break;
-	case XDP_REDIRECT:
-		if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
-			res = IGC_XDP_CONSUMED;
-		else
-			res = IGC_XDP_REDIRECT;
-		break;
-	default:
-		bpf_warn_invalid_xdp_action(act);
-		fallthrough;
-	case XDP_ABORTED:
-		trace_xdp_exception(adapter->netdev, prog, act);
-		fallthrough;
-	case XDP_DROP:
-		res = IGC_XDP_CONSUMED;
-		break;
-	}
+	res = __igc_xdp_run_prog(adapter, prog, xdp);
 
 unlock:
 	rcu_read_unlock();

From f485164867d3b960e811d94fc83e12d5a687ef05 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:49 -0700
Subject: [PATCH 0373/3292] igc: Refactor igc_clean_rx_ring()

Refactor igc_clean_rx_ring() helper, preparing the code for AF_XDP
zero-copy support which is added by upcoming patches.

The refactor consists of encapsulating page-shared specific code into
its own helper, leaving common code that will be shared by both
page-shared and xsk pool in igc_clean_rx_ring().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 1961cc667c3b..5024f8dc98f9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -346,11 +346,7 @@ static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
 	return err;
 }
 
-/**
- * igc_clean_rx_ring - Free Rx Buffers per Queue
- * @rx_ring: ring to free buffers from
- */
-static void igc_clean_rx_ring(struct igc_ring *rx_ring)
+static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
 {
 	u16 i = rx_ring->next_to_clean;
 
@@ -383,12 +379,21 @@ static void igc_clean_rx_ring(struct igc_ring *rx_ring)
 		if (i == rx_ring->count)
 			i = 0;
 	}
+}
 
-	clear_ring_uses_large_buffer(rx_ring);
+/**
+ * igc_clean_rx_ring - Free Rx Buffers per Queue
+ * @ring: ring to free buffers from
+ */
+static void igc_clean_rx_ring(struct igc_ring *ring)
+{
+	igc_clean_rx_ring_page_shared(ring);
 
-	rx_ring->next_to_alloc = 0;
-	rx_ring->next_to_clean = 0;
-	rx_ring->next_to_use = 0;
+	clear_ring_uses_large_buffer(ring);
+
+	ring->next_to_alloc = 0;
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
 }
 
 /**

From 4609ffb9f6157880e76c038f8df4fbf4e148a41a Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:50 -0700
Subject: [PATCH 0374/3292] igc: Refactor XDP rxq info registration

Refactor XDP rxq info registration code, preparing the driver for AF_XDP
zero-copy support which is added by upcoming patches.

Currently, xdp_rxq and memory model are both registered during RX
resource setup time by igc_xdp_register_rxq_info() helper. With AF_XDP,
we want to register the memory model later on while configuring the ring
because we will know which memory model type to register
(MEM_TYPE_PAGE_SHARED or MEM_TYPE_XSK_BUFF_POOL).

The helpers igc_xdp_register_rxq_info() and igc_xdp_unregister_rxq_
info() are not useful anymore so they are removed.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 16 ++++++++++----
 drivers/net/ethernet/intel/igc/igc_xdp.c  | 27 -----------------------
 drivers/net/ethernet/intel/igc/igc_xdp.h  |  3 ---
 3 files changed, 12 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 5024f8dc98f9..90d8e4355f43 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -419,7 +419,7 @@ void igc_free_rx_resources(struct igc_ring *rx_ring)
 {
 	igc_clean_rx_ring(rx_ring);
 
-	igc_xdp_unregister_rxq_info(rx_ring);
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
@@ -458,11 +458,16 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
 {
 	struct net_device *ndev = rx_ring->netdev;
 	struct device *dev = rx_ring->dev;
+	u8 index = rx_ring->queue_index;
 	int size, desc_len, res;
 
-	res = igc_xdp_register_rxq_info(rx_ring);
-	if (res < 0)
+	res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
+			       rx_ring->q_vector->napi.napi_id);
+	if (res < 0) {
+		netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
+			   index);
 		return res;
+	}
 
 	size = sizeof(struct igc_rx_buffer) * rx_ring->count;
 	rx_ring->rx_buffer_info = vzalloc(size);
@@ -488,7 +493,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
 	return 0;
 
 err:
-	igc_xdp_unregister_rxq_info(rx_ring);
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 	netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
@@ -536,6 +541,9 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
 	u32 srrctl = 0, rxdctl = 0;
 	u64 rdba = ring->dma;
 
+	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+					   MEM_TYPE_PAGE_SHARED, NULL));
+
 	if (igc_xdp_is_enabled(adapter))
 		set_ring_uses_large_buffer(ring);
 
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index 11133c4619bb..27c886a254f1 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -31,30 +31,3 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
 
 	return 0;
 }
-
-int igc_xdp_register_rxq_info(struct igc_ring *ring)
-{
-	struct net_device *dev = ring->netdev;
-	int err;
-
-	err = xdp_rxq_info_reg(&ring->xdp_rxq, dev, ring->queue_index, 0);
-	if (err) {
-		netdev_err(dev, "Failed to register xdp rxq info\n");
-		return err;
-	}
-
-	err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
-					 NULL);
-	if (err) {
-		netdev_err(dev, "Failed to register xdp rxq mem model\n");
-		xdp_rxq_info_unreg(&ring->xdp_rxq);
-		return err;
-	}
-
-	return 0;
-}
-
-void igc_xdp_unregister_rxq_info(struct igc_ring *ring)
-{
-	xdp_rxq_info_unreg(&ring->xdp_rxq);
-}
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h
index 412aa369e6ba..cdaa2c39b03a 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.h
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.h
@@ -7,9 +7,6 @@
 int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
 		     struct netlink_ext_ack *extack);
 
-int igc_xdp_register_rxq_info(struct igc_ring *ring);
-void igc_xdp_unregister_rxq_info(struct igc_ring *ring);
-
 static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter)
 {
 	return !!adapter->xdp_prog;

From a27e6e73e5501fd0cb84467d71ddeac9a5855e0b Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:51 -0700
Subject: [PATCH 0375/3292] igc: Introduce TX/RX stats helpers

In preparation for AF_XDP zero-copy support, encapsulate the code that
updates the driver RX stats in its own local helper so it can be reused
in the zero-copy path. Likewise, encapsulate TX stats code as well.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 43 ++++++++++++++++-------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 90d8e4355f43..5a262bab9116 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2111,6 +2111,20 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
 		xdp_do_flush();
 }
 
+static void igc_update_rx_stats(struct igc_q_vector *q_vector,
+				unsigned int packets, unsigned int bytes)
+{
+	struct igc_ring *ring = q_vector->rx.ring;
+
+	u64_stats_update_begin(&ring->rx_syncp);
+	ring->rx_stats.packets += packets;
+	ring->rx_stats.bytes += bytes;
+	u64_stats_update_end(&ring->rx_syncp);
+
+	q_vector->rx.total_packets += packets;
+	q_vector->rx.total_bytes += bytes;
+}
+
 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 {
 	unsigned int total_bytes = 0, total_packets = 0;
@@ -2231,12 +2245,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 	/* place incomplete frames back on ring for completion */
 	rx_ring->skb = skb;
 
-	u64_stats_update_begin(&rx_ring->rx_syncp);
-	rx_ring->rx_stats.packets += total_packets;
-	rx_ring->rx_stats.bytes += total_bytes;
-	u64_stats_update_end(&rx_ring->rx_syncp);
-	q_vector->rx.total_packets += total_packets;
-	q_vector->rx.total_bytes += total_bytes;
+	igc_update_rx_stats(q_vector, total_packets, total_bytes);
 
 	if (cleaned_count)
 		igc_alloc_rx_buffers(rx_ring, cleaned_count);
@@ -2244,6 +2253,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 	return total_packets;
 }
 
+static void igc_update_tx_stats(struct igc_q_vector *q_vector,
+				unsigned int packets, unsigned int bytes)
+{
+	struct igc_ring *ring = q_vector->tx.ring;
+
+	u64_stats_update_begin(&ring->tx_syncp);
+	ring->tx_stats.bytes += bytes;
+	ring->tx_stats.packets += packets;
+	u64_stats_update_end(&ring->tx_syncp);
+
+	q_vector->tx.total_bytes += bytes;
+	q_vector->tx.total_packets += packets;
+}
+
 /**
  * igc_clean_tx_irq - Reclaim resources after transmit completes
  * @q_vector: pointer to q_vector containing needed info
@@ -2346,12 +2369,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	u64_stats_update_begin(&tx_ring->tx_syncp);
-	tx_ring->tx_stats.bytes += total_bytes;
-	tx_ring->tx_stats.packets += total_packets;
-	u64_stats_update_end(&tx_ring->tx_syncp);
-	q_vector->tx.total_bytes += total_bytes;
-	q_vector->tx.total_packets += total_packets;
+
+	igc_update_tx_stats(q_vector, total_packets, total_bytes);
 
 	if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
 		struct igc_hw *hw = &adapter->hw;

From 6123429516c7fc6a7ee2f0a9dbef8c0c16ffb7cc Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:52 -0700
Subject: [PATCH 0376/3292] igc: Introduce igc_unmap_tx_buffer() helper

In preparation for AF_XDP zero-copy support, encapsulate the code that
unmaps Tx buffers into its own local helper so we can reuse it, avoiding
code duplication.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 49 +++++++----------------
 1 file changed, 15 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 5a262bab9116..ebb57d82e4dd 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -171,6 +171,14 @@ static void igc_get_hw_control(struct igc_adapter *adapter)
 	     ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
 }
 
+static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
+{
+	dma_unmap_single(dev, dma_unmap_addr(buf, dma),
+			 dma_unmap_len(buf, len), DMA_TO_DEVICE);
+
+	dma_unmap_len_set(buf, len, 0);
+}
+
 /**
  * igc_clean_tx_ring - Free Tx Buffers
  * @tx_ring: ring to be cleaned
@@ -188,11 +196,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 		else
 			dev_kfree_skb_any(tx_buffer->skb);
 
-		/* unmap skb header data */
-		dma_unmap_single(tx_ring->dev,
-				 dma_unmap_addr(tx_buffer, dma),
-				 dma_unmap_len(tx_buffer, len),
-				 DMA_TO_DEVICE);
+		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 
 		/* check for eop_desc to determine the end of the packet */
 		eop_desc = tx_buffer->next_to_watch;
@@ -211,10 +215,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 
 			/* unmap any remaining paged data */
 			if (dma_unmap_len(tx_buffer, len))
-				dma_unmap_page(tx_ring->dev,
-					       dma_unmap_addr(tx_buffer, dma),
-					       dma_unmap_len(tx_buffer, len),
-					       DMA_TO_DEVICE);
+				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 		}
 
 		/* move us one more past the eop_desc for start of next pkt */
@@ -1219,11 +1220,7 @@ dma_error:
 	/* clear dma mappings for failed tx_buffer_info map */
 	while (tx_buffer != first) {
 		if (dma_unmap_len(tx_buffer, len))
-			dma_unmap_page(tx_ring->dev,
-				       dma_unmap_addr(tx_buffer, dma),
-				       dma_unmap_len(tx_buffer, len),
-				       DMA_TO_DEVICE);
-		dma_unmap_len_set(tx_buffer, len, 0);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 
 		if (i-- == 0)
 			i += tx_ring->count;
@@ -1231,11 +1228,7 @@ dma_error:
 	}
 
 	if (dma_unmap_len(tx_buffer, len))
-		dma_unmap_single(tx_ring->dev,
-				 dma_unmap_addr(tx_buffer, dma),
-				 dma_unmap_len(tx_buffer, len),
-				 DMA_TO_DEVICE);
-	dma_unmap_len_set(tx_buffer, len, 0);
+		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 
 	dev_kfree_skb_any(tx_buffer->skb);
 	tx_buffer->skb = NULL;
@@ -2317,14 +2310,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 		else
 			napi_consume_skb(tx_buffer->skb, napi_budget);
 
-		/* unmap skb header data */
-		dma_unmap_single(tx_ring->dev,
-				 dma_unmap_addr(tx_buffer, dma),
-				 dma_unmap_len(tx_buffer, len),
-				 DMA_TO_DEVICE);
-
-		/* clear tx_buffer data */
-		dma_unmap_len_set(tx_buffer, len, 0);
+		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 
 		/* clear last DMA location and unmap remaining buffers */
 		while (tx_desc != eop_desc) {
@@ -2338,13 +2324,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 			}
 
 			/* unmap any remaining paged data */
-			if (dma_unmap_len(tx_buffer, len)) {
-				dma_unmap_page(tx_ring->dev,
-					       dma_unmap_addr(tx_buffer, dma),
-					       dma_unmap_len(tx_buffer, len),
-					       DMA_TO_DEVICE);
-				dma_unmap_len_set(tx_buffer, len, 0);
-			}
+			if (dma_unmap_len(tx_buffer, len))
+				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 		}
 
 		/* move us one more past the eop_desc for start of next pkt */

From 859b4dfa4115d11aa1fda7d0628a93a9d61a7c46 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:53 -0700
Subject: [PATCH 0377/3292] igc: Replace IGC_TX_FLAGS_XDP flag by an enum

Up to this point, Tx buffers are associated with either a skb or a xdpf,
and the IGC_TX_FLAGS_XDP flag was enough to distinguish between these
two case. However, with upcoming patches that will add AF_XDP zero-copy
support, a third case will be introduced so this flag-based approach
won't fit well.

In preparation to land AF_XDP zero-copy support, replace the
IGC_TX_FLAGS_XDP flag by an enum which will be extended once zero-copy
support is introduced to the driver.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |  8 ++++++--
 drivers/net/ethernet/intel/igc/igc_main.c | 25 ++++++++++++++++++-----
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 25871351730b..4815c520652b 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -390,8 +390,6 @@ enum igc_tx_flags {
 	/* olinfo flags */
 	IGC_TX_FLAGS_IPV4	= 0x10,
 	IGC_TX_FLAGS_CSUM	= 0x20,
-
-	IGC_TX_FLAGS_XDP	= 0x100,
 };
 
 enum igc_boards {
@@ -408,12 +406,18 @@ enum igc_boards {
 #define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD)
 #define DESC_NEEDED	(MAX_SKB_FRAGS + 4)
 
+enum igc_tx_buffer_type {
+	IGC_TX_BUFFER_TYPE_SKB,
+	IGC_TX_BUFFER_TYPE_XDP,
+};
+
 /* wrapper around a pointer to a socket buffer,
  * so a DMA handle can be stored along with the buffer
  */
 struct igc_tx_buffer {
 	union igc_adv_tx_desc *next_to_watch;
 	unsigned long time_stamp;
+	enum igc_tx_buffer_type type;
 	union {
 		struct sk_buff *skb;
 		struct xdp_frame *xdpf;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ebb57d82e4dd..4e1327a5a61e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -191,10 +191,17 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 	while (i != tx_ring->next_to_use) {
 		union igc_adv_tx_desc *eop_desc, *tx_desc;
 
-		if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+		switch (tx_buffer->type) {
+		case IGC_TX_BUFFER_TYPE_XDP:
 			xdp_return_frame(tx_buffer->xdpf);
-		else
+			break;
+		case IGC_TX_BUFFER_TYPE_SKB:
 			dev_kfree_skb_any(tx_buffer->skb);
+			break;
+		default:
+			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
+			break;
+		}
 
 		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 
@@ -1360,6 +1367,7 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 
 	/* record the location of the first descriptor for this packet */
 	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->type = IGC_TX_BUFFER_TYPE_SKB;
 	first->skb = skb;
 	first->bytecount = skb->len;
 	first->gso_segs = 1;
@@ -1943,8 +1951,8 @@ static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
 		return -ENOMEM;
 	}
 
+	buffer->type = IGC_TX_BUFFER_TYPE_XDP;
 	buffer->xdpf = xdpf;
-	buffer->tx_flags = IGC_TX_FLAGS_XDP;
 	buffer->protocol = 0;
 	buffer->bytecount = xdpf->len;
 	buffer->gso_segs = 1;
@@ -2305,10 +2313,17 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 		total_bytes += tx_buffer->bytecount;
 		total_packets += tx_buffer->gso_segs;
 
-		if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+		switch (tx_buffer->type) {
+		case IGC_TX_BUFFER_TYPE_XDP:
 			xdp_return_frame(tx_buffer->xdpf);
-		else
+			break;
+		case IGC_TX_BUFFER_TYPE_SKB:
 			napi_consume_skb(tx_buffer->skb, napi_budget);
+			break;
+		default:
+			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
+			break;
+		}
 
 		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 

From fc9df2a0b520d7d439ecf464794d53e91be74b93 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:54 -0700
Subject: [PATCH 0378/3292] igc: Enable RX via AF_XDP zero-copy

Add support for receiving packets via AF_XDP zero-copy mechanism.

Add a new flag to 'enum igc_ring_flags_t' to indicate the ring has
AF_XDP zero-copy enabled so proper ring setup is carried out during ring
configuration in igc_configure_rx_ring().

RX buffers can now be allocated via the shared pages mechanism (default
behavior of the driver) or via xsk pool (when AF_XDP zero-copy is
enabled) so a union is added to the 'struct igc_rx_buffer' to cover both
cases.

When AF_XDP zero-copy is enabled, rx buffers are allocated from the xsk
pool using the new helper igc_alloc_rx_buffers_zc() which is the
counterpart of igc_alloc_rx_buffers().

Likewise other Intel drivers that support AF_XDP zero-copy, in igc we
have a dedicated path for cleaning up rx irqs when zero-copy is enabled.
This avoids adding too many checks within igc_clean_rx_irq(), resulting
in a more readable and efficient code since this function is called from
the hot-path of the driver.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |  22 +-
 drivers/net/ethernet/intel/igc/igc_base.h |   1 +
 drivers/net/ethernet/intel/igc/igc_main.c | 342 +++++++++++++++++++++-
 drivers/net/ethernet/intel/igc/igc_xdp.c  | 100 +++++++
 drivers/net/ethernet/intel/igc/igc_xdp.h  |   2 +
 5 files changed, 449 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 4815c520652b..cd6f4c94c4dd 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -118,6 +118,7 @@ struct igc_ring {
 	};
 
 	struct xdp_rxq_info xdp_rxq;
+	struct xsk_buff_pool *xsk_pool;
 } ____cacheline_internodealigned_in_smp;
 
 /* Board specific private data structure */
@@ -255,6 +256,9 @@ bool igc_has_link(struct igc_adapter *adapter);
 void igc_reset(struct igc_adapter *adapter);
 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
 void igc_update_stats(struct igc_adapter *adapter);
+void igc_disable_rx_ring(struct igc_ring *ring);
+void igc_enable_rx_ring(struct igc_ring *ring);
+int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
 
 /* igc_dump declarations */
 void igc_rings_dump(struct igc_adapter *adapter);
@@ -432,14 +436,19 @@ struct igc_tx_buffer {
 };
 
 struct igc_rx_buffer {
-	dma_addr_t dma;
-	struct page *page;
+	union {
+		struct {
+			dma_addr_t dma;
+			struct page *page;
 #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 page_offset;
+			__u32 page_offset;
 #else
-	__u16 page_offset;
+			__u16 page_offset;
 #endif
-	__u16 pagecnt_bias;
+			__u16 pagecnt_bias;
+		};
+		struct xdp_buff *xdp;
+	};
 };
 
 struct igc_q_vector {
@@ -525,7 +534,8 @@ enum igc_ring_flags_t {
 	IGC_RING_FLAG_RX_SCTP_CSUM,
 	IGC_RING_FLAG_RX_LB_VLAN_BSWAP,
 	IGC_RING_FLAG_TX_CTX_IDX,
-	IGC_RING_FLAG_TX_DETECT_HANG
+	IGC_RING_FLAG_TX_DETECT_HANG,
+	IGC_RING_FLAG_AF_XDP_ZC,
 };
 
 #define ring_uses_large_buffer(ring) \
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index ea627ce52525..2ca028c1919f 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -81,6 +81,7 @@ union igc_adv_rx_desc {
 
 /* Additional Receive Descriptor Control definitions */
 #define IGC_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
+#define IGC_RXDCTL_SWFLUSH		0x04000000 /* Receive Software Flush */
 
 /* SRRCTL bit definitions */
 #define IGC_SRRCTL_BSIZEPKT_SHIFT		10 /* Shift _right_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 4e1327a5a61e..3ffc20fae4c6 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -11,7 +11,7 @@
 #include <linux/pm_runtime.h>
 #include <net/pkt_sched.h>
 #include <linux/bpf_trace.h>
-
+#include <net/xdp_sock_drv.h>
 #include <net/ipv6.h>
 
 #include "igc.h"
@@ -389,13 +389,31 @@ static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
 	}
 }
 
+static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
+{
+	struct igc_rx_buffer *bi;
+	u16 i;
+
+	for (i = 0; i < ring->count; i++) {
+		bi = &ring->rx_buffer_info[i];
+		if (!bi->xdp)
+			continue;
+
+		xsk_buff_free(bi->xdp);
+		bi->xdp = NULL;
+	}
+}
+
 /**
  * igc_clean_rx_ring - Free Rx Buffers per Queue
  * @ring: ring to free buffers from
  */
 static void igc_clean_rx_ring(struct igc_ring *ring)
 {
-	igc_clean_rx_ring_page_shared(ring);
+	if (ring->xsk_pool)
+		igc_clean_rx_ring_xsk_pool(ring);
+	else
+		igc_clean_rx_ring_page_shared(ring);
 
 	clear_ring_uses_large_buffer(ring);
 
@@ -533,6 +551,16 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
 	return err;
 }
 
+static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
+					      struct igc_ring *ring)
+{
+	if (!igc_xdp_is_enabled(adapter) ||
+	    !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
+		return NULL;
+
+	return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
+}
+
 /**
  * igc_configure_rx_ring - Configure a receive ring after Reset
  * @adapter: board private structure
@@ -548,9 +576,20 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
 	int reg_idx = ring->reg_idx;
 	u32 srrctl = 0, rxdctl = 0;
 	u64 rdba = ring->dma;
+	u32 buf_size;
 
-	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-					   MEM_TYPE_PAGE_SHARED, NULL));
+	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
+	if (ring->xsk_pool) {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_XSK_BUFF_POOL,
+						   NULL));
+		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+	} else {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_PAGE_SHARED,
+						   NULL));
+	}
 
 	if (igc_xdp_is_enabled(adapter))
 		set_ring_uses_large_buffer(ring);
@@ -574,12 +613,15 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
 	ring->next_to_clean = 0;
 	ring->next_to_use = 0;
 
-	/* set descriptor configuration */
-	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
-	if (ring_uses_large_buffer(ring))
-		srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+	if (ring->xsk_pool)
+		buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+	else if (ring_uses_large_buffer(ring))
+		buf_size = IGC_RXBUFFER_3072;
 	else
-		srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
+		buf_size = IGC_RXBUFFER_2048;
+
+	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
 	srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 	wr32(IGC_SRRCTL(reg_idx), srrctl);
@@ -1939,6 +1981,63 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
 	}
 }
 
+static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
+{
+	union igc_adv_rx_desc *desc;
+	u16 i = ring->next_to_use;
+	struct igc_rx_buffer *bi;
+	dma_addr_t dma;
+	bool ok = true;
+
+	if (!count)
+		return ok;
+
+	desc = IGC_RX_DESC(ring, i);
+	bi = &ring->rx_buffer_info[i];
+	i -= ring->count;
+
+	do {
+		bi->xdp = xsk_buff_alloc(ring->xsk_pool);
+		if (!bi->xdp) {
+			ok = false;
+			break;
+		}
+
+		dma = xsk_buff_xdp_get_dma(bi->xdp);
+		desc->read.pkt_addr = cpu_to_le64(dma);
+
+		desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			desc = IGC_RX_DESC(ring, 0);
+			bi = ring->rx_buffer_info;
+			i -= ring->count;
+		}
+
+		/* Clear the length for the next_to_use descriptor. */
+		desc->wb.upper.length = 0;
+
+		count--;
+	} while (count);
+
+	i += ring->count;
+
+	if (ring->next_to_use != i) {
+		ring->next_to_use = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, ring->tail);
+	}
+
+	return ok;
+}
+
 static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
 				  struct xdp_frame *xdpf,
 				  struct igc_ring *ring)
@@ -2254,6 +2353,148 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 	return total_packets;
 }
 
+static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
+					    struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	unsigned int datasize = xdp->data_end - xdp->data;
+	unsigned int totalsize = metasize + datasize;
+	struct sk_buff *skb;
+
+	skb = __napi_alloc_skb(&ring->q_vector->napi,
+			       xdp->data_end - xdp->data_hard_start,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+	memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	return skb;
+}
+
+static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
+				union igc_adv_rx_desc *desc,
+				struct xdp_buff *xdp,
+				ktime_t timestamp)
+{
+	struct igc_ring *ring = q_vector->rx.ring;
+	struct sk_buff *skb;
+
+	skb = igc_construct_skb_zc(ring, xdp);
+	if (!skb) {
+		ring->rx_stats.alloc_failed++;
+		return;
+	}
+
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+	if (igc_cleanup_headers(ring, desc, skb))
+		return;
+
+	igc_process_skb_fields(ring, desc, skb);
+	napi_gro_receive(&q_vector->napi, skb);
+}
+
+static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_ring *ring = q_vector->rx.ring;
+	u16 cleaned_count = igc_desc_unused(ring);
+	int total_bytes = 0, total_packets = 0;
+	u16 ntc = ring->next_to_clean;
+	struct bpf_prog *prog;
+	bool failure = false;
+	int xdp_status = 0;
+
+	rcu_read_lock();
+
+	prog = READ_ONCE(adapter->xdp_prog);
+
+	while (likely(total_packets < budget)) {
+		union igc_adv_rx_desc *desc;
+		struct igc_rx_buffer *bi;
+		ktime_t timestamp = 0;
+		unsigned int size;
+		int res;
+
+		desc = IGC_RX_DESC(ring, ntc);
+		size = le16_to_cpu(desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		bi = &ring->rx_buffer_info[ntc];
+
+		if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
+			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
+							bi->xdp->data);
+
+			bi->xdp->data += IGC_TS_HDR_LEN;
+
+			/* HW timestamp has been copied into local variable. Metadata
+			 * length when XDP program is called should be 0.
+			 */
+			bi->xdp->data_meta += IGC_TS_HDR_LEN;
+			size -= IGC_TS_HDR_LEN;
+		}
+
+		bi->xdp->data_end = bi->xdp->data + size;
+		xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
+
+		res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
+		switch (res) {
+		case IGC_XDP_PASS:
+			igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
+			fallthrough;
+		case IGC_XDP_CONSUMED:
+			xsk_buff_free(bi->xdp);
+			break;
+		case IGC_XDP_TX:
+		case IGC_XDP_REDIRECT:
+			xdp_status |= res;
+			break;
+		}
+
+		bi->xdp = NULL;
+		total_bytes += size;
+		total_packets++;
+		cleaned_count++;
+		ntc++;
+		if (ntc == ring->count)
+			ntc = 0;
+	}
+
+	ring->next_to_clean = ntc;
+	rcu_read_unlock();
+
+	if (cleaned_count >= IGC_RX_BUFFER_WRITE)
+		failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
+
+	if (xdp_status)
+		igc_finalize_xdp(adapter, xdp_status);
+
+	igc_update_rx_stats(q_vector, total_packets, total_bytes);
+
+	if (xsk_uses_need_wakeup(ring->xsk_pool)) {
+		if (failure || ring->next_to_clean == ring->next_to_use)
+			xsk_set_rx_need_wakeup(ring->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(ring->xsk_pool);
+		return total_packets;
+	}
+
+	return failure ? budget : total_packets;
+}
+
 static void igc_update_tx_stats(struct igc_q_vector *q_vector,
 				unsigned int packets, unsigned int bytes)
 {
@@ -2946,7 +3187,10 @@ static void igc_configure(struct igc_adapter *adapter)
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		struct igc_ring *ring = adapter->rx_ring[i];
 
-		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+		if (ring->xsk_pool)
+			igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
+		else
+			igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
 	}
 }
 
@@ -3561,14 +3805,17 @@ static int igc_poll(struct napi_struct *napi, int budget)
 	struct igc_q_vector *q_vector = container_of(napi,
 						     struct igc_q_vector,
 						     napi);
+	struct igc_ring *rx_ring = q_vector->rx.ring;
 	bool clean_complete = true;
 	int work_done = 0;
 
 	if (q_vector->tx.ring)
 		clean_complete = igc_clean_tx_irq(q_vector, budget);
 
-	if (q_vector->rx.ring) {
-		int cleaned = igc_clean_rx_irq(q_vector, budget);
+	if (rx_ring) {
+		int cleaned = rx_ring->xsk_pool ?
+			      igc_clean_rx_irq_zc(q_vector, budget) :
+			      igc_clean_rx_irq(q_vector, budget);
 
 		work_done += cleaned;
 		if (cleaned >= budget)
@@ -5206,6 +5453,9 @@ static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	switch (bpf->command) {
 	case XDP_SETUP_PROG:
 		return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
+	case XDP_SETUP_XSK_POOL:
+		return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
+					  bpf->xsk.queue_id);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -5251,6 +5501,43 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
 	return num_frames - drops;
 }
 
+static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
+					struct igc_q_vector *q_vector)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 eics = 0;
+
+	eics |= q_vector->eims_value;
+	wr32(IGC_EICS, eics);
+}
+
+int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+	struct igc_q_vector *q_vector;
+	struct igc_ring *ring;
+
+	if (test_bit(__IGC_DOWN, &adapter->state))
+		return -ENETDOWN;
+
+	if (!igc_xdp_is_enabled(adapter))
+		return -ENXIO;
+
+	if (queue_id >= adapter->num_rx_queues)
+		return -EINVAL;
+
+	ring = adapter->rx_ring[queue_id];
+
+	if (!ring->xsk_pool)
+		return -ENXIO;
+
+	q_vector = adapter->q_vector[queue_id];
+	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+		igc_trigger_rxtxq_interrupt(adapter, q_vector);
+
+	return 0;
+}
+
 static const struct net_device_ops igc_netdev_ops = {
 	.ndo_open		= igc_open,
 	.ndo_stop		= igc_close,
@@ -5266,6 +5553,7 @@ static const struct net_device_ops igc_netdev_ops = {
 	.ndo_setup_tc		= igc_setup_tc,
 	.ndo_bpf		= igc_bpf,
 	.ndo_xdp_xmit		= igc_xdp_xmit,
+	.ndo_xsk_wakeup		= igc_xsk_wakeup,
 };
 
 /* PCIe configuration access */
@@ -6018,6 +6306,36 @@ struct net_device *igc_get_hw_dev(struct igc_hw *hw)
 	return adapter->netdev;
 }
 
+static void igc_disable_rx_ring_hw(struct igc_ring *ring)
+{
+	struct igc_hw *hw = &ring->q_vector->adapter->hw;
+	u8 idx = ring->reg_idx;
+	u32 rxdctl;
+
+	rxdctl = rd32(IGC_RXDCTL(idx));
+	rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
+	rxdctl |= IGC_RXDCTL_SWFLUSH;
+	wr32(IGC_RXDCTL(idx), rxdctl);
+}
+
+void igc_disable_rx_ring(struct igc_ring *ring)
+{
+	igc_disable_rx_ring_hw(ring);
+	igc_clean_rx_ring(ring);
+}
+
+void igc_enable_rx_ring(struct igc_ring *ring)
+{
+	struct igc_adapter *adapter = ring->q_vector->adapter;
+
+	igc_configure_rx_ring(adapter, ring);
+
+	if (ring->xsk_pool)
+		igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
+	else
+		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+}
+
 /**
  * igc_init_module - Driver Registration Routine
  *
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index 27c886a254f1..c65d690b75bf 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020, Intel Corporation. */
 
+#include <net/xdp_sock_drv.h>
+
 #include "igc.h"
 #include "igc_xdp.h"
 
@@ -31,3 +33,101 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
 
 	return 0;
 }
+
+static int igc_xdp_enable_pool(struct igc_adapter *adapter,
+			       struct xsk_buff_pool *pool, u16 queue_id)
+{
+	struct net_device *ndev = adapter->netdev;
+	struct device *dev = &adapter->pdev->dev;
+	struct igc_ring *rx_ring;
+	struct napi_struct *napi;
+	bool needs_reset;
+	u32 frame_size;
+	int err;
+
+	if (queue_id >= adapter->num_rx_queues)
+		return -EINVAL;
+
+	frame_size = xsk_pool_get_rx_frame_size(pool);
+	if (frame_size < ETH_FRAME_LEN + VLAN_HLEN * 2) {
+		/* When XDP is enabled, the driver doesn't support frames that
+		 * span over multiple buffers. To avoid that, we check if xsk
+		 * frame size is big enough to fit the max ethernet frame size
+		 * + vlan double tagging.
+		 */
+		return -EOPNOTSUPP;
+	}
+
+	err = xsk_pool_dma_map(pool, dev, IGC_RX_DMA_ATTR);
+	if (err) {
+		netdev_err(ndev, "Failed to map xsk pool\n");
+		return err;
+	}
+
+	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
+
+	rx_ring = adapter->rx_ring[queue_id];
+	napi = &rx_ring->q_vector->napi;
+
+	if (needs_reset) {
+		igc_disable_rx_ring(rx_ring);
+		napi_disable(napi);
+	}
+
+	set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
+
+	if (needs_reset) {
+		napi_enable(napi);
+		igc_enable_rx_ring(rx_ring);
+
+		err = igc_xsk_wakeup(ndev, queue_id, XDP_WAKEUP_RX);
+		if (err) {
+			xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
+{
+	struct xsk_buff_pool *pool;
+	struct igc_ring *rx_ring;
+	struct napi_struct *napi;
+	bool needs_reset;
+
+	if (queue_id >= adapter->num_rx_queues)
+		return -EINVAL;
+
+	pool = xsk_get_pool_from_qid(adapter->netdev, queue_id);
+	if (!pool)
+		return -EINVAL;
+
+	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
+
+	rx_ring = adapter->rx_ring[queue_id];
+	napi = &rx_ring->q_vector->napi;
+
+	if (needs_reset) {
+		igc_disable_rx_ring(rx_ring);
+		napi_disable(napi);
+	}
+
+	xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
+	clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
+
+	if (needs_reset) {
+		napi_enable(napi);
+		igc_enable_rx_ring(rx_ring);
+	}
+
+	return 0;
+}
+
+int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool,
+		       u16 queue_id)
+{
+	return pool ? igc_xdp_enable_pool(adapter, pool, queue_id) :
+		      igc_xdp_disable_pool(adapter, queue_id);
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h
index cdaa2c39b03a..a74e5487d199 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.h
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.h
@@ -6,6 +6,8 @@
 
 int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
 		     struct netlink_ext_ack *extack);
+int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool,
+		       u16 queue_id);
 
 static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter)
 {

From 9acf59a752d4c686739117d3b3129e60af1ba5c1 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 22 Apr 2021 23:25:55 -0700
Subject: [PATCH 0379/3292] igc: Enable TX via AF_XDP zero-copy

Add support for transmitting packets via AF_XDP zero-copy mechanism.

The packet transmission itself is implemented by igc_xdp_xmit_zc() which
is called from igc_clean_tx_irq() when the ring has AF_XDP zero-copy
enabled. Likewise i40e and ice drivers, the transmission budget used is
the number of descriptors available on the ring.

A new tx buffer type is introduced to 'enum igc_tx_buffer_type' to
indicate the tx buffer uses memory from xsk pool so it can be properly
cleaned after transmission or when the ring is cleaned.

The I225 controller has only 4 Tx hardware queues so the main difference
between igc and other Intel drivers that support AF_XDP zero-copy is
that there is no tx ring dedicated exclusively to XDP. Instead, tx
rings are shared between the network stack and XDP, and netdev queue
lock is used to ensure mutual exclusion. This is the same approach
implemented to support XDP_TX and XDP_REDIRECT actions.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |   3 +
 drivers/net/ethernet/intel/igc/igc_base.h |   1 +
 drivers/net/ethernet/intel/igc/igc_main.c | 113 +++++++++++++++++++++-
 drivers/net/ethernet/intel/igc/igc_xdp.c  |  20 +++-
 4 files changed, 129 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index cd6f4c94c4dd..b6d3277c6f52 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -258,6 +258,8 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
 void igc_update_stats(struct igc_adapter *adapter);
 void igc_disable_rx_ring(struct igc_ring *ring);
 void igc_enable_rx_ring(struct igc_ring *ring);
+void igc_disable_tx_ring(struct igc_ring *ring);
+void igc_enable_tx_ring(struct igc_ring *ring);
 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
 
 /* igc_dump declarations */
@@ -413,6 +415,7 @@ enum igc_boards {
 enum igc_tx_buffer_type {
 	IGC_TX_BUFFER_TYPE_SKB,
 	IGC_TX_BUFFER_TYPE_XDP,
+	IGC_TX_BUFFER_TYPE_XSK,
 };
 
 /* wrapper around a pointer to a socket buffer,
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index 2ca028c1919f..ce530f5fd7bd 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -78,6 +78,7 @@ union igc_adv_rx_desc {
 
 /* Additional Transmit Descriptor Control definitions */
 #define IGC_TXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Tx Queue */
+#define IGC_TXDCTL_SWFLUSH	0x04000000 /* Transmit Software Flush */
 
 /* Additional Receive Descriptor Control definitions */
 #define IGC_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 3ffc20fae4c6..ea998d2defa4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -187,24 +187,28 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 {
 	u16 i = tx_ring->next_to_clean;
 	struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+	u32 xsk_frames = 0;
 
 	while (i != tx_ring->next_to_use) {
 		union igc_adv_tx_desc *eop_desc, *tx_desc;
 
 		switch (tx_buffer->type) {
+		case IGC_TX_BUFFER_TYPE_XSK:
+			xsk_frames++;
+			break;
 		case IGC_TX_BUFFER_TYPE_XDP:
 			xdp_return_frame(tx_buffer->xdpf);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 			break;
 		case IGC_TX_BUFFER_TYPE_SKB:
 			dev_kfree_skb_any(tx_buffer->skb);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 			break;
 		default:
 			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
 			break;
 		}
 
-		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
-
 		/* check for eop_desc to determine the end of the packet */
 		eop_desc = tx_buffer->next_to_watch;
 		tx_desc = IGC_TX_DESC(tx_ring, i);
@@ -234,6 +238,9 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 		}
 	}
 
+	if (tx_ring->xsk_pool && xsk_frames)
+		xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
+
 	/* reset BQL for queue */
 	netdev_tx_reset_queue(txring_txq(tx_ring));
 
@@ -676,6 +683,8 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter,
 	u64 tdba = ring->dma;
 	u32 txdctl = 0;
 
+	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
+
 	/* disable the queue */
 	wr32(IGC_TXDCTL(reg_idx), 0);
 	wrfl();
@@ -2509,6 +2518,65 @@ static void igc_update_tx_stats(struct igc_q_vector *q_vector,
 	q_vector->tx.total_packets += packets;
 }
 
+static void igc_xdp_xmit_zc(struct igc_ring *ring)
+{
+	struct xsk_buff_pool *pool = ring->xsk_pool;
+	struct netdev_queue *nq = txring_txq(ring);
+	union igc_adv_tx_desc *tx_desc = NULL;
+	int cpu = smp_processor_id();
+	u16 ntu = ring->next_to_use;
+	struct xdp_desc xdp_desc;
+	u16 budget;
+
+	if (!netif_carrier_ok(ring->netdev))
+		return;
+
+	__netif_tx_lock(nq, cpu);
+
+	budget = igc_desc_unused(ring);
+
+	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+		u32 cmd_type, olinfo_status;
+		struct igc_tx_buffer *bi;
+		dma_addr_t dma;
+
+		cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+			   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+			   xdp_desc.len;
+		olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
+
+		dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
+		xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
+
+		tx_desc = IGC_TX_DESC(ring, ntu);
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		bi = &ring->tx_buffer_info[ntu];
+		bi->type = IGC_TX_BUFFER_TYPE_XSK;
+		bi->protocol = 0;
+		bi->bytecount = xdp_desc.len;
+		bi->gso_segs = 1;
+		bi->time_stamp = jiffies;
+		bi->next_to_watch = tx_desc;
+
+		netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
+
+		ntu++;
+		if (ntu == ring->count)
+			ntu = 0;
+	}
+
+	ring->next_to_use = ntu;
+	if (tx_desc) {
+		igc_flush_tx_descriptors(ring);
+		xsk_tx_release(pool);
+	}
+
+	__netif_tx_unlock(nq);
+}
+
 /**
  * igc_clean_tx_irq - Reclaim resources after transmit completes
  * @q_vector: pointer to q_vector containing needed info
@@ -2525,6 +2593,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 	unsigned int i = tx_ring->next_to_clean;
 	struct igc_tx_buffer *tx_buffer;
 	union igc_adv_tx_desc *tx_desc;
+	u32 xsk_frames = 0;
 
 	if (test_bit(__IGC_DOWN, &adapter->state))
 		return true;
@@ -2555,19 +2624,22 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 		total_packets += tx_buffer->gso_segs;
 
 		switch (tx_buffer->type) {
+		case IGC_TX_BUFFER_TYPE_XSK:
+			xsk_frames++;
+			break;
 		case IGC_TX_BUFFER_TYPE_XDP:
 			xdp_return_frame(tx_buffer->xdpf);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 			break;
 		case IGC_TX_BUFFER_TYPE_SKB:
 			napi_consume_skb(tx_buffer->skb, napi_budget);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 			break;
 		default:
 			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
 			break;
 		}
 
-		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
-
 		/* clear last DMA location and unmap remaining buffers */
 		while (tx_desc != eop_desc) {
 			tx_buffer++;
@@ -2609,6 +2681,14 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 
 	igc_update_tx_stats(q_vector, total_packets, total_bytes);
 
+	if (tx_ring->xsk_pool) {
+		if (xsk_frames)
+			xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
+		if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
+			xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
+		igc_xdp_xmit_zc(tx_ring);
+	}
+
 	if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
 		struct igc_hw *hw = &adapter->hw;
 
@@ -6336,6 +6416,31 @@ void igc_enable_rx_ring(struct igc_ring *ring)
 		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
 }
 
+static void igc_disable_tx_ring_hw(struct igc_ring *ring)
+{
+	struct igc_hw *hw = &ring->q_vector->adapter->hw;
+	u8 idx = ring->reg_idx;
+	u32 txdctl;
+
+	txdctl = rd32(IGC_TXDCTL(idx));
+	txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
+	txdctl |= IGC_TXDCTL_SWFLUSH;
+	wr32(IGC_TXDCTL(idx), txdctl);
+}
+
+void igc_disable_tx_ring(struct igc_ring *ring)
+{
+	igc_disable_tx_ring_hw(ring);
+	igc_clean_tx_ring(ring);
+}
+
+void igc_enable_tx_ring(struct igc_ring *ring)
+{
+	struct igc_adapter *adapter = ring->q_vector->adapter;
+
+	igc_configure_tx_ring(adapter, ring);
+}
+
 /**
  * igc_init_module - Driver Registration Routine
  *
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index c65d690b75bf..a8cf5374be47 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -39,13 +39,14 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
 {
 	struct net_device *ndev = adapter->netdev;
 	struct device *dev = &adapter->pdev->dev;
-	struct igc_ring *rx_ring;
+	struct igc_ring *rx_ring, *tx_ring;
 	struct napi_struct *napi;
 	bool needs_reset;
 	u32 frame_size;
 	int err;
 
-	if (queue_id >= adapter->num_rx_queues)
+	if (queue_id >= adapter->num_rx_queues ||
+	    queue_id >= adapter->num_tx_queues)
 		return -EINVAL;
 
 	frame_size = xsk_pool_get_rx_frame_size(pool);
@@ -67,18 +68,23 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
 	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
 
 	rx_ring = adapter->rx_ring[queue_id];
+	tx_ring = adapter->tx_ring[queue_id];
+	/* Rx and Tx rings share the same napi context. */
 	napi = &rx_ring->q_vector->napi;
 
 	if (needs_reset) {
 		igc_disable_rx_ring(rx_ring);
+		igc_disable_tx_ring(tx_ring);
 		napi_disable(napi);
 	}
 
 	set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
+	set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
 
 	if (needs_reset) {
 		napi_enable(napi);
 		igc_enable_rx_ring(rx_ring);
+		igc_enable_tx_ring(tx_ring);
 
 		err = igc_xsk_wakeup(ndev, queue_id, XDP_WAKEUP_RX);
 		if (err) {
@@ -92,12 +98,13 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
 
 static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
 {
+	struct igc_ring *rx_ring, *tx_ring;
 	struct xsk_buff_pool *pool;
-	struct igc_ring *rx_ring;
 	struct napi_struct *napi;
 	bool needs_reset;
 
-	if (queue_id >= adapter->num_rx_queues)
+	if (queue_id >= adapter->num_rx_queues ||
+	    queue_id >= adapter->num_tx_queues)
 		return -EINVAL;
 
 	pool = xsk_get_pool_from_qid(adapter->netdev, queue_id);
@@ -107,19 +114,24 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
 	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
 
 	rx_ring = adapter->rx_ring[queue_id];
+	tx_ring = adapter->tx_ring[queue_id];
+	/* Rx and Tx rings share the same napi context. */
 	napi = &rx_ring->q_vector->napi;
 
 	if (needs_reset) {
 		igc_disable_rx_ring(rx_ring);
+		igc_disable_tx_ring(tx_ring);
 		napi_disable(napi);
 	}
 
 	xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
 	clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
+	clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
 
 	if (needs_reset) {
 		napi_enable(napi);
 		igc_enable_rx_ring(rx_ring);
+		igc_enable_tx_ring(tx_ring);
 	}
 
 	return 0;

From 2682ea324b000709dafec7e9210caa5189377c45 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Thu, 20 May 2021 10:14:11 +0800
Subject: [PATCH 0380/3292] mISDN: Remove obsolete PIPELINE_DEBUG debugging
 information

As Leon Romanovsky's tips:
The definition of macro PIPELINE_DEBUG is commented more than 10 years ago
and can be seen as a dead code that should be removed.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/dsp_pipeline.c | 46 ++-----------------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c
index 40588692cec7..e11ca6bbc7f4 100644
--- a/drivers/isdn/mISDN/dsp_pipeline.c
+++ b/drivers/isdn/mISDN/dsp_pipeline.c
@@ -17,9 +17,6 @@
 #include "dsp.h"
 #include "dsp_hwec.h"
 
-/* uncomment for debugging */
-/*#define PIPELINE_DEBUG*/
-
 struct dsp_pipeline_entry {
 	struct mISDN_dsp_element *elem;
 	void                *p;
@@ -104,10 +101,6 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem)
 		}
 	}
 
-#ifdef PIPELINE_DEBUG
-	printk(KERN_DEBUG "%s: %s registered\n", __func__, elem->name);
-#endif
-
 	return 0;
 
 err2:
@@ -129,10 +122,6 @@ void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem)
 	list_for_each_entry_safe(entry, n, &dsp_elements, list)
 		if (entry->elem == elem) {
 			device_unregister(&entry->dev);
-#ifdef PIPELINE_DEBUG
-			printk(KERN_DEBUG "%s: %s unregistered\n",
-			       __func__, elem->name);
-#endif
 			return;
 		}
 	printk(KERN_ERR "%s: element %s not in list.\n", __func__, elem->name);
@@ -145,10 +134,6 @@ int dsp_pipeline_module_init(void)
 	if (IS_ERR(elements_class))
 		return PTR_ERR(elements_class);
 
-#ifdef PIPELINE_DEBUG
-	printk(KERN_DEBUG "%s: dsp pipeline module initialized\n", __func__);
-#endif
-
 	dsp_hwec_init();
 
 	return 0;
@@ -168,10 +153,6 @@ void dsp_pipeline_module_exit(void)
 		       __func__, entry->elem->name);
 		kfree(entry);
 	}
-
-#ifdef PIPELINE_DEBUG
-	printk(KERN_DEBUG "%s: dsp pipeline module exited\n", __func__);
-#endif
 }
 
 int dsp_pipeline_init(struct dsp_pipeline *pipeline)
@@ -181,10 +162,6 @@ int dsp_pipeline_init(struct dsp_pipeline *pipeline)
 
 	INIT_LIST_HEAD(&pipeline->list);
 
-#ifdef PIPELINE_DEBUG
-	printk(KERN_DEBUG "%s: dsp pipeline ready\n", __func__);
-#endif
-
 	return 0;
 }
 
@@ -210,15 +187,11 @@ void dsp_pipeline_destroy(struct dsp_pipeline *pipeline)
 		return;
 
 	_dsp_pipeline_destroy(pipeline);
-
-#ifdef PIPELINE_DEBUG
-	printk(KERN_DEBUG "%s: dsp pipeline destroyed\n", __func__);
-#endif
 }
 
 int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
 {
-	int incomplete = 0, found = 0;
+	int found = 0;
 	char *dup, *tok, *name, *args;
 	struct dsp_element_entry *entry, *n;
 	struct dsp_pipeline_entry *pipeline_entry;
@@ -251,7 +224,6 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
 					printk(KERN_ERR "%s: failed to add "
 					       "entry to pipeline: %s (out of "
 					       "memory)\n", __func__, elem->name);
-					incomplete = 1;
 					goto _out;
 				}
 				pipeline_entry->elem = elem;
@@ -268,20 +240,12 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
 					if (pipeline_entry->p) {
 						list_add_tail(&pipeline_entry->
 							      list, &pipeline->list);
-#ifdef PIPELINE_DEBUG
-						printk(KERN_DEBUG "%s: created "
-						       "instance of %s%s%s\n",
-						       __func__, name, args ?
-						       " with args " : "", args ?
-						       args : "");
-#endif
 					} else {
 						printk(KERN_ERR "%s: failed "
 						       "to add entry to pipeline: "
 						       "%s (new() returned NULL)\n",
 						       __func__, elem->name);
 						kfree(pipeline_entry);
-						incomplete = 1;
 					}
 				}
 				found = 1;
@@ -290,11 +254,9 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg)
 
 		if (found)
 			found = 0;
-		else {
+		else
 			printk(KERN_ERR "%s: element not found, skipping: "
 			       "%s\n", __func__, name);
-			incomplete = 1;
-		}
 	}
 
 _out:
@@ -303,10 +265,6 @@ _out:
 	else
 		pipeline->inuse = 0;
 
-#ifdef PIPELINE_DEBUG
-	printk(KERN_DEBUG "%s: dsp pipeline built%s: %s\n",
-	       __func__, incomplete ? " incomplete" : "", cfg);
-#endif
 	kfree(dup);
 	return 0;
 }

From d96b0e59468dcbd61417b7dd31985a700e58d3b2 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 10:21:30 +0800
Subject: [PATCH 0381/3292] net: hns3: refactor dump reg of debugfs

Currently, the debugfs command for reg is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create some files
"bios_common/ssu/igu_egu/rpu/ncsi/rtc/ppp/rcb/tqp/mac" for it,
and query it by command "cat xxx", return the result to
userspace, rather than record in dmesg.

The display style is below:
$ cat bios_common
BP_CPU_STATE: 0x0
DFX_MSIX_INFO_NIC_0: 0xc000
DFX_MSIX_INFO_NIC_1: 0x0
DFX_MSIX_INFO_NIC_2: 0x0
DFX_MSIX_INFO_NIC_3: 0x0
DFX_MSIX_INFO_ROC_0: 0xc000
DFX_MSIX_INFO_ROC_1: 0x0
DFX_MSIX_INFO_ROC_2: 0x0
DFX_MSIX_INFO_ROC_3: 0x0

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  10 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  84 +++-
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |   1 +
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 368 ++++++++++++------
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |   4 +-
 5 files changed, 327 insertions(+), 140 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index d1cdb7494b9e..9ef4132ab2ca 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -265,6 +265,16 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_RESET_INFO,
 	HNAE3_DBG_CMD_IMP_INFO,
 	HNAE3_DBG_CMD_NCL_CONFIG,
+	HNAE3_DBG_CMD_REG_BIOS_COMMON,
+	HNAE3_DBG_CMD_REG_SSU,
+	HNAE3_DBG_CMD_REG_IGU_EGU,
+	HNAE3_DBG_CMD_REG_RPU,
+	HNAE3_DBG_CMD_REG_NCSI,
+	HNAE3_DBG_CMD_REG_RTC,
+	HNAE3_DBG_CMD_REG_PPP,
+	HNAE3_DBG_CMD_REG_RCB,
+	HNAE3_DBG_CMD_REG_TQP,
+	HNAE3_DBG_CMD_REG_MAC,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index ba4ee8ca7e71..af0751e53f79 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -23,6 +23,9 @@ static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
 	{
 		.name = "mac_list"
 	},
+	{
+		.name = "reg"
+	},
 	/* keep common at the bottom and add new directory above */
 	{
 		.name = "common"
@@ -132,6 +135,76 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN_128KB,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "bios_common",
+		.cmd = HNAE3_DBG_CMD_REG_BIOS_COMMON,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "ssu",
+		.cmd = HNAE3_DBG_CMD_REG_SSU,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "igu_egu",
+		.cmd = HNAE3_DBG_CMD_REG_IGU_EGU,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "rpu",
+		.cmd = HNAE3_DBG_CMD_REG_RPU,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "ncsi",
+		.cmd = HNAE3_DBG_CMD_REG_NCSI,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "rtc",
+		.cmd = HNAE3_DBG_CMD_REG_RTC,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "ppp",
+		.cmd = HNAE3_DBG_CMD_REG_PPP,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "rcb",
+		.cmd = HNAE3_DBG_CMD_REG_RCB,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "tqp",
+		.cmd = HNAE3_DBG_CMD_REG_TQP,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "mac",
+		.cmd = HNAE3_DBG_CMD_REG_MAC,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -529,17 +602,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
 
-	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
-	strncat(printf_buf, "dump reg [[bios common] [ssu <port_id>]",
-		HNS3_DBG_BUF_LEN - 1);
-	strncat(printf_buf + strlen(printf_buf),
-		" [igu egu <port_id>] [rpu <tc_queue_num>]",
-		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
-	strncat(printf_buf + strlen(printf_buf),
-		" [rtc] [ppp] [rcb] [tqp <queue_num>] [mac]]\n",
-		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
-	dev_info(&h->pdev->dev, "%s", printf_buf);
-
 	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
 	strncat(printf_buf, "dump reg dcb <port_id> <pri_id> <pg_id>",
 		HNS3_DBG_BUF_LEN - 1);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index a7af9277ae69..6060bfc21850 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -29,6 +29,7 @@ enum hns3_dbg_dentry_type {
 	HNS3_DBG_DENTRY_TX_BD,
 	HNS3_DBG_DENTRY_RX_BD,
 	HNS3_DBG_DENTRY_MAC,
+	HNS3_DBG_DENTRY_REG,
 	HNS3_DBG_DENTRY_COMMON,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 8a92ab448a19..2f6662892edc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -15,62 +15,62 @@ static const char * const hclge_mac_state_str[] = {
 };
 
 static const struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
-	{ .reg_type = "bios common",
+	{ .cmd = HNAE3_DBG_CMD_REG_BIOS_COMMON,
 	  .dfx_msg = &hclge_dbg_bios_common_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_bios_common_reg),
 		       .offset = HCLGE_DBG_DFX_BIOS_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_BIOS_COMMON_REG } },
-	{ .reg_type = "ssu",
+	{ .cmd = HNAE3_DBG_CMD_REG_SSU,
 	  .dfx_msg = &hclge_dbg_ssu_reg_0[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ssu_reg_0),
 		       .offset = HCLGE_DBG_DFX_SSU_0_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_SSU_REG_0 } },
-	{ .reg_type = "ssu",
+	{ .cmd = HNAE3_DBG_CMD_REG_SSU,
 	  .dfx_msg = &hclge_dbg_ssu_reg_1[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ssu_reg_1),
 		       .offset = HCLGE_DBG_DFX_SSU_1_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_SSU_REG_1 } },
-	{ .reg_type = "ssu",
+	{ .cmd = HNAE3_DBG_CMD_REG_SSU,
 	  .dfx_msg = &hclge_dbg_ssu_reg_2[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ssu_reg_2),
 		       .offset = HCLGE_DBG_DFX_SSU_2_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_SSU_REG_2 } },
-	{ .reg_type = "igu egu",
+	{ .cmd = HNAE3_DBG_CMD_REG_IGU_EGU,
 	  .dfx_msg = &hclge_dbg_igu_egu_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_igu_egu_reg),
 		       .offset = HCLGE_DBG_DFX_IGU_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_IGU_EGU_REG } },
-	{ .reg_type = "rpu",
+	{ .cmd = HNAE3_DBG_CMD_REG_RPU,
 	  .dfx_msg = &hclge_dbg_rpu_reg_0[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rpu_reg_0),
 		       .offset = HCLGE_DBG_DFX_RPU_0_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_RPU_REG_0 } },
-	{ .reg_type = "rpu",
+	{ .cmd = HNAE3_DBG_CMD_REG_RPU,
 	  .dfx_msg = &hclge_dbg_rpu_reg_1[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rpu_reg_1),
 		       .offset = HCLGE_DBG_DFX_RPU_1_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_RPU_REG_1 } },
-	{ .reg_type = "ncsi",
+	{ .cmd = HNAE3_DBG_CMD_REG_NCSI,
 	  .dfx_msg = &hclge_dbg_ncsi_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ncsi_reg),
 		       .offset = HCLGE_DBG_DFX_NCSI_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_NCSI_REG } },
-	{ .reg_type = "rtc",
+	{ .cmd = HNAE3_DBG_CMD_REG_RTC,
 	  .dfx_msg = &hclge_dbg_rtc_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rtc_reg),
 		       .offset = HCLGE_DBG_DFX_RTC_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_RTC_REG } },
-	{ .reg_type = "ppp",
+	{ .cmd = HNAE3_DBG_CMD_REG_PPP,
 	  .dfx_msg = &hclge_dbg_ppp_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_ppp_reg),
 		       .offset = HCLGE_DBG_DFX_PPP_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_PPP_REG } },
-	{ .reg_type = "rcb",
+	{ .cmd = HNAE3_DBG_CMD_REG_RCB,
 	  .dfx_msg = &hclge_dbg_rcb_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_rcb_reg),
 		       .offset = HCLGE_DBG_DFX_RCB_OFFSET,
 		       .cmd = HCLGE_OPC_DFX_RCB_REG } },
-	{ .reg_type = "tqp",
+	{ .cmd = HNAE3_DBG_CMD_REG_TQP,
 	  .dfx_msg = &hclge_dbg_tqp_reg[0],
 	  .reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_tqp_reg),
 		       .offset = HCLGE_DBG_DFX_TQP_OFFSET,
@@ -106,7 +106,8 @@ static char *hclge_dbg_get_func_id_str(char *buf, u8 id)
 	return buf;
 }
 
-static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset)
+static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset,
+				    u32 *bd_num)
 {
 	struct hclge_desc desc[HCLGE_GET_DFX_REG_TYPE_CNT];
 	int entries_per_desc;
@@ -116,13 +117,21 @@ static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset)
 	ret = hclge_query_bd_num_cmd_send(hdev, desc);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"get dfx bdnum fail, ret = %d\n", ret);
+			"failed to get dfx bd_num, offset = %d, ret = %d\n",
+			offset, ret);
 		return ret;
 	}
 
 	entries_per_desc = ARRAY_SIZE(desc[0].data);
 	index = offset % entries_per_desc;
-	return le32_to_cpu(desc[offset / entries_per_desc].data[index]);
+
+	*bd_num = le32_to_cpu(desc[offset / entries_per_desc].data[index]);
+	if (!(*bd_num)) {
+		dev_err(&hdev->pdev->dev, "The value of dfx bd_num is 0!\n");
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
@@ -149,66 +158,108 @@ static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
 	return ret;
 }
 
-static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
-				      const struct hclge_dbg_reg_type_info *reg_info,
-				      const char *cmd_buf)
+static int
+hclge_dbg_dump_reg_tqp(struct hclge_dev *hdev,
+		       const struct hclge_dbg_reg_type_info *reg_info,
+		       char *buf, int len, int *pos)
 {
-#define IDX_OFFSET	1
-
-	const char *s = &cmd_buf[strlen(reg_info->reg_type) + IDX_OFFSET];
 	const struct hclge_dbg_dfx_message *dfx_message = reg_info->dfx_msg;
 	const struct hclge_dbg_reg_common_msg *reg_msg = &reg_info->reg_msg;
 	struct hclge_desc *desc_src;
+	u32 index, entry, i, cnt;
+	int bd_num, min_num, ret;
 	struct hclge_desc *desc;
-	int entries_per_desc;
-	int bd_num, buf_len;
-	int index = 0;
-	int min_num;
-	int ret, i;
 
-	if (*s) {
-		ret = kstrtouint(s, 0, &index);
-		index = (ret != 0) ? 0 : index;
-	}
+	ret = hclge_dbg_get_dfx_bd_num(hdev, reg_msg->offset, &bd_num);
+	if (ret)
+		return ret;
 
-	bd_num = hclge_dbg_get_dfx_bd_num(hdev, reg_msg->offset);
-	if (bd_num <= 0) {
-		dev_err(&hdev->pdev->dev, "get cmd(%d) bd num(%d) failed\n",
-			reg_msg->offset, bd_num);
-		return;
-	}
-
-	buf_len = sizeof(struct hclge_desc) * bd_num;
-	desc_src = kzalloc(buf_len, GFP_KERNEL);
+	desc_src = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
 	if (!desc_src)
-		return;
+		return -ENOMEM;
 
-	desc = desc_src;
-	ret = hclge_dbg_cmd_send(hdev, desc, index, bd_num, reg_msg->cmd);
-	if (ret) {
-		kfree(desc_src);
-		return;
-	}
+	min_num = min_t(int, bd_num * HCLGE_DESC_DATA_LEN, reg_msg->msg_num);
 
-	entries_per_desc = ARRAY_SIZE(desc->data);
-	min_num = min_t(int, bd_num * entries_per_desc, reg_msg->msg_num);
+	for (i = 0, cnt = 0; i < min_num; i++, dfx_message++)
+		*pos += scnprintf(buf + *pos, len - *pos, "item%u = %s\n",
+				  cnt++, dfx_message->message);
 
-	desc = desc_src;
-	for (i = 0; i < min_num; i++) {
-		if (i > 0 && (i % entries_per_desc) == 0)
-			desc++;
-		if (dfx_message->flag)
-			dev_info(&hdev->pdev->dev, "%s: 0x%x\n",
-				 dfx_message->message,
-				 le32_to_cpu(desc->data[i % entries_per_desc]));
+	for (i = 0; i < cnt; i++)
+		*pos += scnprintf(buf + *pos, len - *pos, "item%u\t", i);
 
-		dfx_message++;
+	*pos += scnprintf(buf + *pos, len - *pos, "\n");
+
+	for (index = 0; index < hdev->vport[0].alloc_tqps; index++) {
+		dfx_message = reg_info->dfx_msg;
+		desc = desc_src;
+		ret = hclge_dbg_cmd_send(hdev, desc, index, bd_num,
+					 reg_msg->cmd);
+		if (ret)
+			break;
+
+		for (i = 0; i < min_num; i++, dfx_message++) {
+			entry = i % HCLGE_DESC_DATA_LEN;
+			if (i > 0 && !entry)
+				desc++;
+
+			*pos += scnprintf(buf + *pos, len - *pos, "%#x\t",
+					  le32_to_cpu(desc->data[entry]));
+		}
+		*pos += scnprintf(buf + *pos, len - *pos, "\n");
 	}
 
 	kfree(desc_src);
+	return ret;
 }
 
-static void hclge_dbg_dump_mac_enable_status(struct hclge_dev *hdev)
+static int
+hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
+			  const struct hclge_dbg_reg_type_info *reg_info,
+			  char *buf, int len, int *pos)
+{
+	const struct hclge_dbg_reg_common_msg *reg_msg = &reg_info->reg_msg;
+	const struct hclge_dbg_dfx_message *dfx_message = reg_info->dfx_msg;
+	struct hclge_desc *desc_src;
+	int bd_num, min_num, ret;
+	struct hclge_desc *desc;
+	u32 entry, i;
+
+	ret = hclge_dbg_get_dfx_bd_num(hdev, reg_msg->offset, &bd_num);
+	if (ret)
+		return ret;
+
+	desc_src = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc_src)
+		return -ENOMEM;
+
+	desc = desc_src;
+
+	ret = hclge_dbg_cmd_send(hdev, desc, 0, bd_num, reg_msg->cmd);
+	if (ret) {
+		kfree(desc);
+		return ret;
+	}
+
+	min_num = min_t(int, bd_num * HCLGE_DESC_DATA_LEN, reg_msg->msg_num);
+
+	for (i = 0; i < min_num; i++, dfx_message++) {
+		entry = i % HCLGE_DESC_DATA_LEN;
+		if (i > 0 && !entry)
+			desc++;
+		if (!dfx_message->flag)
+			continue;
+
+		*pos += scnprintf(buf + *pos, len - *pos, "%s: %#x\n",
+				  dfx_message->message,
+				  le32_to_cpu(desc->data[entry]));
+	}
+
+	kfree(desc_src);
+	return 0;
+}
+
+static int  hclge_dbg_dump_mac_enable_status(struct hclge_dev *hdev, char *buf,
+					     int len, int *pos)
 {
 	struct hclge_config_mac_mode_cmd *req;
 	struct hclge_desc desc;
@@ -221,43 +272,51 @@ static void hclge_dbg_dump_mac_enable_status(struct hclge_dev *hdev)
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"failed to dump mac enable status, ret = %d\n", ret);
-		return;
+		return ret;
 	}
 
 	req = (struct hclge_config_mac_mode_cmd *)desc.data;
 	loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
 
-	dev_info(&hdev->pdev->dev, "config_mac_trans_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_TX_EN_B));
-	dev_info(&hdev->pdev->dev, "config_mac_rcv_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_EN_B));
-	dev_info(&hdev->pdev->dev, "config_pad_trans_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_PAD_TX_B));
-	dev_info(&hdev->pdev->dev, "config_pad_rcv_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_PAD_RX_B));
-	dev_info(&hdev->pdev->dev, "config_1588_trans_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_1588_TX_B));
-	dev_info(&hdev->pdev->dev, "config_1588_rcv_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_1588_RX_B));
-	dev_info(&hdev->pdev->dev, "config_mac_app_loop_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_APP_LP_B));
-	dev_info(&hdev->pdev->dev, "config_mac_line_loop_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_LINE_LP_B));
-	dev_info(&hdev->pdev->dev, "config_mac_fcs_tx_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_FCS_TX_B));
-	dev_info(&hdev->pdev->dev, "config_mac_rx_oversize_truncate_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B));
-	dev_info(&hdev->pdev->dev, "config_mac_rx_fcs_strip_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B));
-	dev_info(&hdev->pdev->dev, "config_mac_rx_fcs_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_FCS_B));
-	dev_info(&hdev->pdev->dev, "config_mac_tx_under_min_err_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B));
-	dev_info(&hdev->pdev->dev, "config_mac_tx_oversize_truncate_en: %#x\n",
-		 hnae3_get_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "mac_trans_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_TX_EN_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "mac_rcv_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_RX_EN_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "pad_trans_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_PAD_TX_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "pad_rcv_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_PAD_RX_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "1588_trans_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_1588_TX_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "1588_rcv_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_1588_RX_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "mac_app_loop_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_APP_LP_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "mac_line_loop_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_LINE_LP_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "mac_fcs_tx_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_FCS_TX_B));
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "mac_rx_oversize_truncate_en: %#x\n",
+			  hnae3_get_bit(loop_en,
+					HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "mac_rx_fcs_strip_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B));
+	*pos += scnprintf(buf + *pos, len - *pos, "mac_rx_fcs_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_RX_FCS_B));
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "mac_tx_under_min_err_en: %#x\n",
+			  hnae3_get_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B));
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "mac_tx_oversize_truncate_en: %#x\n",
+			  hnae3_get_bit(loop_en,
+					HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B));
+
+	return 0;
 }
 
-static void hclge_dbg_dump_mac_frame_size(struct hclge_dev *hdev)
+static int hclge_dbg_dump_mac_frame_size(struct hclge_dev *hdev, char *buf,
+					 int len, int *pos)
 {
 	struct hclge_config_max_frm_size_cmd *req;
 	struct hclge_desc desc;
@@ -269,17 +328,21 @@ static void hclge_dbg_dump_mac_frame_size(struct hclge_dev *hdev)
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"failed to dump mac frame size, ret = %d\n", ret);
-		return;
+		return ret;
 	}
 
 	req = (struct hclge_config_max_frm_size_cmd *)desc.data;
 
-	dev_info(&hdev->pdev->dev, "max_frame_size: %u\n",
-		 le16_to_cpu(req->max_frm_size));
-	dev_info(&hdev->pdev->dev, "min_frame_size: %u\n", req->min_frm_size);
+	*pos += scnprintf(buf + *pos, len - *pos, "max_frame_size: %u\n",
+			  le16_to_cpu(req->max_frm_size));
+	*pos += scnprintf(buf + *pos, len - *pos, "min_frame_size: %u\n",
+			  req->min_frm_size);
+
+	return 0;
 }
 
-static void hclge_dbg_dump_mac_speed_duplex(struct hclge_dev *hdev)
+static int hclge_dbg_dump_mac_speed_duplex(struct hclge_dev *hdev, char *buf,
+					   int len, int *pos)
 {
 #define HCLGE_MAC_SPEED_SHIFT	0
 #define HCLGE_MAC_SPEED_MASK	GENMASK(5, 0)
@@ -295,25 +358,34 @@ static void hclge_dbg_dump_mac_speed_duplex(struct hclge_dev *hdev)
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"failed to dump mac speed duplex, ret = %d\n", ret);
-		return;
+		return ret;
 	}
 
 	req = (struct hclge_config_mac_speed_dup_cmd *)desc.data;
 
-	dev_info(&hdev->pdev->dev, "speed: %#lx\n",
-		 hnae3_get_field(req->speed_dup, HCLGE_MAC_SPEED_MASK,
-				 HCLGE_MAC_SPEED_SHIFT));
-	dev_info(&hdev->pdev->dev, "duplex: %#x\n",
-		 hnae3_get_bit(req->speed_dup, HCLGE_MAC_DUPLEX_SHIFT));
+	*pos += scnprintf(buf + *pos, len - *pos, "speed: %#lx\n",
+			  hnae3_get_field(req->speed_dup, HCLGE_MAC_SPEED_MASK,
+					  HCLGE_MAC_SPEED_SHIFT));
+	*pos += scnprintf(buf + *pos, len - *pos, "duplex: %#x\n",
+			  hnae3_get_bit(req->speed_dup,
+					HCLGE_MAC_DUPLEX_SHIFT));
+	return 0;
 }
 
-static void hclge_dbg_dump_mac(struct hclge_dev *hdev)
+static int hclge_dbg_dump_mac(struct hclge_dev *hdev, char *buf, int len)
 {
-	hclge_dbg_dump_mac_enable_status(hdev);
+	int pos = 0;
+	int ret;
 
-	hclge_dbg_dump_mac_frame_size(hdev);
+	ret = hclge_dbg_dump_mac_enable_status(hdev, buf, len, &pos);
+	if (ret)
+		return ret;
 
-	hclge_dbg_dump_mac_speed_duplex(hdev);
+	ret = hclge_dbg_dump_mac_frame_size(hdev, buf, len, &pos);
+	if (ret)
+		return ret;
+
+	return hclge_dbg_dump_mac_speed_duplex(hdev, buf, len, &pos);
 }
 
 static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf)
@@ -432,35 +504,28 @@ err_dcb_cmd_send:
 		cmd, ret);
 }
 
-static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
+static int hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev,
+				  enum hnae3_dbg_cmd cmd, char *buf, int len)
 {
 	const struct hclge_dbg_reg_type_info *reg_info;
-	bool has_dump = false;
+	int pos = 0, ret = 0;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(hclge_dbg_reg_info); i++) {
 		reg_info = &hclge_dbg_reg_info[i];
-		if (!strncmp(cmd_buf, reg_info->reg_type,
-			     strlen(reg_info->reg_type))) {
-			hclge_dbg_dump_reg_common(hdev, reg_info, cmd_buf);
-			has_dump = true;
+		if (cmd == reg_info->cmd) {
+			if (cmd == HNAE3_DBG_CMD_REG_TQP)
+				return hclge_dbg_dump_reg_tqp(hdev, reg_info,
+							      buf, len, &pos);
+
+			ret = hclge_dbg_dump_reg_common(hdev, reg_info, buf,
+							len, &pos);
+			if (ret)
+				break;
 		}
 	}
 
-	if (strncmp(cmd_buf, "mac", strlen("mac")) == 0) {
-		hclge_dbg_dump_mac(hdev);
-		has_dump = true;
-	}
-
-	if (strncmp(cmd_buf, "dcb", 3) == 0) {
-		hclge_dbg_dump_dcb(hdev, &cmd_buf[sizeof("dcb")]);
-		has_dump = true;
-	}
-
-	if (!has_dump) {
-		dev_info(&hdev->pdev->dev, "unknown command\n");
-		return;
-	}
+	return ret;
 }
 
 static void hclge_print_tc_info(struct hclge_dev *hdev, bool flag, int index)
@@ -1807,7 +1872,7 @@ static int hclge_dbg_dump_mac_mc(struct hclge_dev *hdev, char *buf, int len)
 
 int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
-#define DUMP_REG	"dump reg"
+#define DUMP_REG_DCB	"dump reg dcb"
 #define DUMP_TM_MAP	"dump tm map"
 
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -1827,8 +1892,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_qos_pri_map(hdev);
 	} else if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
 		hclge_dbg_dump_qos_buf_cfg(hdev);
-	} else if (strncmp(cmd_buf, DUMP_REG, strlen(DUMP_REG)) == 0) {
-		hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
+	} else if (strncmp(cmd_buf, DUMP_REG_DCB, strlen(DUMP_REG_DCB)) == 0) {
+		hclge_dbg_dump_dcb(hdev, &cmd_buf[sizeof(DUMP_REG_DCB)]);
 	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
 	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
@@ -1889,18 +1954,65 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_NCL_CONFIG,
 		.dbg_dump = hclge_dbg_dump_ncl_config,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_BIOS_COMMON,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_SSU,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_IGU_EGU,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_RPU,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_NCSI,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_RTC,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_PPP,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_RCB,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_TQP,
+		.dbg_dump_reg = hclge_dbg_dump_reg_cmd,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_MAC,
+		.dbg_dump = hclge_dbg_dump_mac,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
 		       char *buf, int len)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
+	const struct hclge_dbg_func *cmd_func;
 	struct hclge_dev *hdev = vport->back;
 	u32 i;
 
 	for (i = 0; i < ARRAY_SIZE(hclge_dbg_cmd_func); i++) {
-		if (cmd == hclge_dbg_cmd_func[i].cmd)
-			return hclge_dbg_cmd_func[i].dbg_dump(hdev, buf, len);
+		if (cmd == hclge_dbg_cmd_func[i].cmd) {
+			cmd_func = &hclge_dbg_cmd_func[i];
+			if (cmd_func->dbg_dump)
+				return cmd_func->dbg_dump(hdev, buf, len);
+			else
+				return cmd_func->dbg_dump_reg(hdev, cmd, buf,
+							      len);
+		}
 	}
 
 	dev_err(&hdev->pdev->dev, "invalid command(%d)\n", cmd);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index bf6a0ff66047..933f157294fd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -77,7 +77,7 @@ struct hclge_dbg_dfx_message {
 
 #define HCLGE_DBG_MAC_REG_TYPE_LEN	32
 struct hclge_dbg_reg_type_info {
-	const char *reg_type;
+	enum hnae3_dbg_cmd cmd;
 	const struct hclge_dbg_dfx_message *dfx_msg;
 	struct hclge_dbg_reg_common_msg reg_msg;
 };
@@ -85,6 +85,8 @@ struct hclge_dbg_reg_type_info {
 struct hclge_dbg_func {
 	enum hnae3_dbg_cmd cmd;
 	int (*dbg_dump)(struct hclge_dev *hdev, char *buf, int len);
+	int (*dbg_dump_reg)(struct hclge_dev *hdev, enum hnae3_dbg_cmd cmd,
+			    char *buf, int len);
 };
 
 static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {

From 365e860aa7a74afe1ba7157b3bfc547551c5795e Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 10:21:31 +0800
Subject: [PATCH 0382/3292] net: hns3: refactor dump reg dcb info of debugfs

Currently, the debugfs command for reg dcb info is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"dcb" for it, and query it by command "cat dcb",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat dcb
qset_id  roce_qset_mask  nic_qset_mask  qset_shaping_pass  qset_bp_status
0000           0x1            0x1             0x1               0x0
0001           0x1            0x1             0x1               0x0
0002           0x1            0x1             0x1               0x0
0003           0x1            0x1             0x1               0x0
0004           0x1            0x1             0x1               0x0
0005           0x1            0x1             0x1               0x0
0006           0x1            0x1             0x1               0x0
0007           0x1            0x1             0x1               0x0
pri_id  pri_mask  pri_cshaping_pass  pri_pshaping_pass
000       0x1           0x0                0x1
001       0x1           0x0                0x0
002       0x1           0x0                0x0
003       0x1           0x0                0x0
004       0x1           0x0                0x0
005       0x1           0x0                0x0
006       0x1           0x0                0x0
007       0x1           0x0                0x0
pg_id  pg_mask  pg_cshaping_pass  pg_pshaping_pass
000      0x1           0x0               0x1

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  18 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 292 ++++++++++++------
 3 files changed, 213 insertions(+), 98 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 9ef4132ab2ca..65fd333e9374 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -275,6 +275,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_REG_RCB,
 	HNAE3_DBG_CMD_REG_TQP,
 	HNAE3_DBG_CMD_REG_MAC,
+	HNAE3_DBG_CMD_REG_DCB,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index af0751e53f79..9add38948e98 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -205,6 +205,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "dcb",
+		.cmd = HNAE3_DBG_CMD_REG_DCB,
+		.dentry = HNS3_DBG_DENTRY_REG,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -581,10 +588,6 @@ static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
 
 static void hns3_dbg_help(struct hnae3_handle *h)
 {
-#define HNS3_DBG_BUF_LEN 256
-
-	char printf_buf[HNS3_DBG_BUF_LEN];
-
 	dev_info(&h->pdev->dev, "available commands\n");
 	dev_info(&h->pdev->dev, "queue info <number>\n");
 	dev_info(&h->pdev->dev, "queue map\n");
@@ -601,13 +604,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
-
-	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
-	strncat(printf_buf, "dump reg dcb <port_id> <pri_id> <pg_id>",
-		HNS3_DBG_BUF_LEN - 1);
-	strncat(printf_buf + strlen(printf_buf), " <rq_id> <nq_id> <qset_id>\n",
-		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
-	dev_info(&h->pdev->dev, "%s", printf_buf);
 }
 
 static void
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 2f6662892edc..1ad7bffb4d62 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -388,120 +388,237 @@ static int hclge_dbg_dump_mac(struct hclge_dev *hdev, char *buf, int len)
 	return hclge_dbg_dump_mac_speed_duplex(hdev, buf, len, &pos);
 }
 
-static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf)
+static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len,
+				   int *pos)
 {
-	struct device *dev = &hdev->pdev->dev;
 	struct hclge_dbg_bitmap_cmd *bitmap;
-	enum hclge_opcode_type cmd;
-	int rq_id, pri_id, qset_id;
-	int port_id, nq_id, pg_id;
-	struct hclge_desc desc[2];
+	struct hclge_desc desc;
+	u16 qset_id, qset_num;
+	int ret;
 
-	int cnt, ret;
+	ret = hclge_tm_get_qset_num(hdev, &qset_num);
+	if (ret)
+		return ret;
 
-	cnt = sscanf(cmd_buf, "%i %i %i %i %i %i",
-		     &port_id, &pri_id, &pg_id, &rq_id, &nq_id, &qset_id);
-	if (cnt != 6) {
-		dev_err(&hdev->pdev->dev,
-			"dump dcb: bad command parameter, cnt=%d\n", cnt);
-		return;
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "qset_id  roce_qset_mask  nic_qset_mask  qset_shaping_pass  qset_bp_status\n");
+	for (qset_id = 0; qset_id < qset_num; qset_id++) {
+		ret = hclge_dbg_cmd_send(hdev, &desc, qset_id, 1,
+					 HCLGE_OPC_QSET_DFX_STS);
+		if (ret)
+			return ret;
+
+		bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
+
+		*pos += scnprintf(buf + *pos, len - *pos,
+				  "%04u           %#x            %#x             %#x               %#x\n",
+				  qset_id, bitmap->bit0, bitmap->bit1,
+				  bitmap->bit2, bitmap->bit3);
 	}
 
-	cmd = HCLGE_OPC_QSET_DFX_STS;
-	ret = hclge_dbg_cmd_send(hdev, desc, qset_id, 1, cmd);
+	return 0;
+}
+
+static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len,
+				  int *pos)
+{
+	struct hclge_dbg_bitmap_cmd *bitmap;
+	struct hclge_desc desc;
+	u8 pri_id, pri_num;
+	int ret;
+
+	ret = hclge_tm_get_pri_num(hdev, &pri_num);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
-	dev_info(dev, "roce_qset_mask: 0x%x\n", bitmap->bit0);
-	dev_info(dev, "nic_qs_mask: 0x%x\n", bitmap->bit1);
-	dev_info(dev, "qs_shaping_pass: 0x%x\n", bitmap->bit2);
-	dev_info(dev, "qs_bp_sts: 0x%x\n", bitmap->bit3);
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "pri_id  pri_mask  pri_cshaping_pass  pri_pshaping_pass\n");
+	for (pri_id = 0; pri_id < pri_num; pri_id++) {
+		ret = hclge_dbg_cmd_send(hdev, &desc, pri_id, 1,
+					 HCLGE_OPC_PRI_DFX_STS);
+		if (ret)
+			return ret;
 
-	cmd = HCLGE_OPC_PRI_DFX_STS;
-	ret = hclge_dbg_cmd_send(hdev, desc, pri_id, 1, cmd);
+		bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
+
+		*pos += scnprintf(buf + *pos, len - *pos,
+				  "%03u       %#x           %#x                %#x\n",
+				  pri_id, bitmap->bit0, bitmap->bit1,
+				  bitmap->bit2);
+	}
+
+	return 0;
+}
+
+static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len,
+				 int *pos)
+{
+	struct hclge_dbg_bitmap_cmd *bitmap;
+	struct hclge_desc desc;
+	u8 pg_id;
+	int ret;
+
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "pg_id  pg_mask  pg_cshaping_pass  pg_pshaping_pass\n");
+	for (pg_id = 0; pg_id < hdev->tm_info.num_pg; pg_id++) {
+		ret = hclge_dbg_cmd_send(hdev, &desc, pg_id, 1,
+					 HCLGE_OPC_PG_DFX_STS);
+		if (ret)
+			return ret;
+
+		bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
+
+		*pos += scnprintf(buf + *pos, len - *pos,
+				  "%03u      %#x           %#x               %#x\n",
+				  pg_id, bitmap->bit0, bitmap->bit1,
+				  bitmap->bit2);
+	}
+
+	return 0;
+}
+
+static int hclge_dbg_dump_dcb_queue(struct hclge_dev *hdev, char *buf, int len,
+				    int *pos)
+{
+	struct hclge_desc desc;
+	u16 nq_id;
+	int ret;
+
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "nq_id  sch_nic_queue_cnt  sch_roce_queue_cnt\n");
+	for (nq_id = 0; nq_id < hdev->num_tqps; nq_id++) {
+		ret = hclge_dbg_cmd_send(hdev, &desc, nq_id, 1,
+					 HCLGE_OPC_SCH_NQ_CNT);
+		if (ret)
+			return ret;
+
+		*pos += scnprintf(buf + *pos, len - *pos, "%04u           %#x",
+				  nq_id, le32_to_cpu(desc.data[1]));
+
+		ret = hclge_dbg_cmd_send(hdev, &desc, nq_id, 1,
+					 HCLGE_OPC_SCH_RQ_CNT);
+		if (ret)
+			return ret;
+
+		*pos += scnprintf(buf + *pos, len - *pos,
+				  "               %#x\n",
+				  le32_to_cpu(desc.data[1]));
+	}
+
+	return 0;
+}
+
+static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len,
+				   int *pos)
+{
+	struct hclge_dbg_bitmap_cmd *bitmap;
+	struct hclge_desc desc;
+	u8 port_id = 0;
+	int ret;
+
+	ret = hclge_dbg_cmd_send(hdev, &desc, port_id, 1,
+				 HCLGE_OPC_PORT_DFX_STS);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
-	dev_info(dev, "pri_mask: 0x%x\n", bitmap->bit0);
-	dev_info(dev, "pri_cshaping_pass: 0x%x\n", bitmap->bit1);
-	dev_info(dev, "pri_pshaping_pass: 0x%x\n", bitmap->bit2);
+	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1];
 
-	cmd = HCLGE_OPC_PG_DFX_STS;
-	ret = hclge_dbg_cmd_send(hdev, desc, pg_id, 1, cmd);
+	*pos += scnprintf(buf + *pos, len - *pos, "port_mask: %#x\n",
+			 bitmap->bit0);
+	*pos += scnprintf(buf + *pos, len - *pos, "port_shaping_pass: %#x\n",
+			 bitmap->bit1);
+
+	return 0;
+}
+
+static int hclge_dbg_dump_dcb_tm(struct hclge_dev *hdev, char *buf, int len,
+				 int *pos)
+{
+	struct hclge_desc desc[2];
+	u8 port_id = 0;
+	int ret;
+
+	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+				 HCLGE_OPC_TM_INTERNAL_CNT);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
-	dev_info(dev, "pg_mask: 0x%x\n", bitmap->bit0);
-	dev_info(dev, "pg_cshaping_pass: 0x%x\n", bitmap->bit1);
-	dev_info(dev, "pg_pshaping_pass: 0x%x\n", bitmap->bit2);
+	*pos += scnprintf(buf + *pos, len - *pos, "SCH_NIC_NUM: %#x\n",
+			  le32_to_cpu(desc[0].data[1]));
+	*pos += scnprintf(buf + *pos, len - *pos, "SCH_ROCE_NUM: %#x\n",
+			  le32_to_cpu(desc[0].data[2]));
 
-	cmd = HCLGE_OPC_PORT_DFX_STS;
-	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1, cmd);
+	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 2,
+				 HCLGE_OPC_TM_INTERNAL_STS);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	bitmap = (struct hclge_dbg_bitmap_cmd *)&desc[0].data[1];
-	dev_info(dev, "port_mask: 0x%x\n", bitmap->bit0);
-	dev_info(dev, "port_shaping_pass: 0x%x\n", bitmap->bit1);
+	*pos += scnprintf(buf + *pos, len - *pos, "pri_bp: %#x\n",
+			  le32_to_cpu(desc[0].data[1]));
+	*pos += scnprintf(buf + *pos, len - *pos, "fifo_dfx_info: %#x\n",
+			  le32_to_cpu(desc[0].data[2]));
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "sch_roce_fifo_afull_gap: %#x\n",
+			  le32_to_cpu(desc[0].data[3]));
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "tx_private_waterline: %#x\n",
+			  le32_to_cpu(desc[0].data[4]));
+	*pos += scnprintf(buf + *pos, len - *pos, "tm_bypass_en: %#x\n",
+			  le32_to_cpu(desc[0].data[5]));
+	*pos += scnprintf(buf + *pos, len - *pos, "SSU_TM_BYPASS_EN: %#x\n",
+			  le32_to_cpu(desc[1].data[0]));
+	*pos += scnprintf(buf + *pos, len - *pos, "SSU_RESERVE_CFG: %#x\n",
+			  le32_to_cpu(desc[1].data[1]));
 
-	cmd = HCLGE_OPC_SCH_NQ_CNT;
-	ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, cmd);
+	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER)
+		return 0;
+
+	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1,
+				 HCLGE_OPC_TM_INTERNAL_STS_1);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	dev_info(dev, "sch_nq_cnt: 0x%x\n", le32_to_cpu(desc[0].data[1]));
+	*pos += scnprintf(buf + *pos, len - *pos, "TC_MAP_SEL: %#x\n",
+			  le32_to_cpu(desc[0].data[1]));
+	*pos += scnprintf(buf + *pos, len - *pos, "IGU_PFC_PRI_EN: %#x\n",
+			  le32_to_cpu(desc[0].data[2]));
+	*pos += scnprintf(buf + *pos, len - *pos, "MAC_PFC_PRI_EN: %#x\n",
+			  le32_to_cpu(desc[0].data[3]));
+	*pos += scnprintf(buf + *pos, len - *pos, "IGU_PRI_MAP_TC_CFG: %#x\n",
+			  le32_to_cpu(desc[0].data[4]));
+	*pos += scnprintf(buf + *pos, len - *pos,
+			  "IGU_TX_PRI_MAP_TC_CFG: %#x\n",
+			  le32_to_cpu(desc[0].data[5]));
 
-	cmd = HCLGE_OPC_SCH_RQ_CNT;
-	ret = hclge_dbg_cmd_send(hdev, desc, nq_id, 1, cmd);
+	return 0;
+}
+
+static int hclge_dbg_dump_dcb(struct hclge_dev *hdev, char *buf, int len)
+{
+	int pos = 0;
+	int ret;
+
+	ret = hclge_dbg_dump_dcb_qset(hdev, buf, len, &pos);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	dev_info(dev, "sch_rq_cnt: 0x%x\n", le32_to_cpu(desc[0].data[1]));
-
-	cmd = HCLGE_OPC_TM_INTERNAL_STS;
-	ret = hclge_dbg_cmd_send(hdev, desc, 0, 2, cmd);
+	ret = hclge_dbg_dump_dcb_pri(hdev, buf, len, &pos);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	dev_info(dev, "pri_bp: 0x%x\n", le32_to_cpu(desc[0].data[1]));
-	dev_info(dev, "fifo_dfx_info: 0x%x\n", le32_to_cpu(desc[0].data[2]));
-	dev_info(dev, "sch_roce_fifo_afull_gap: 0x%x\n",
-		 le32_to_cpu(desc[0].data[3]));
-	dev_info(dev, "tx_private_waterline: 0x%x\n",
-		 le32_to_cpu(desc[0].data[4]));
-	dev_info(dev, "tm_bypass_en: 0x%x\n", le32_to_cpu(desc[0].data[5]));
-	dev_info(dev, "SSU_TM_BYPASS_EN: 0x%x\n", le32_to_cpu(desc[1].data[0]));
-	dev_info(dev, "SSU_RESERVE_CFG: 0x%x\n", le32_to_cpu(desc[1].data[1]));
-
-	cmd = HCLGE_OPC_TM_INTERNAL_CNT;
-	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1, cmd);
+	ret = hclge_dbg_dump_dcb_pg(hdev, buf, len, &pos);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	dev_info(dev, "SCH_NIC_NUM: 0x%x\n", le32_to_cpu(desc[0].data[1]));
-	dev_info(dev, "SCH_ROCE_NUM: 0x%x\n", le32_to_cpu(desc[0].data[2]));
-
-	cmd = HCLGE_OPC_TM_INTERNAL_STS_1;
-	ret = hclge_dbg_cmd_send(hdev, desc, port_id, 1, cmd);
+	ret = hclge_dbg_dump_dcb_queue(hdev, buf, len, &pos);
 	if (ret)
-		goto err_dcb_cmd_send;
+		return ret;
 
-	dev_info(dev, "TC_MAP_SEL: 0x%x\n", le32_to_cpu(desc[0].data[1]));
-	dev_info(dev, "IGU_PFC_PRI_EN: 0x%x\n", le32_to_cpu(desc[0].data[2]));
-	dev_info(dev, "MAC_PFC_PRI_EN: 0x%x\n", le32_to_cpu(desc[0].data[3]));
-	dev_info(dev, "IGU_PRI_MAP_TC_CFG: 0x%x\n",
-		 le32_to_cpu(desc[0].data[4]));
-	dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n",
-		 le32_to_cpu(desc[0].data[5]));
-	return;
+	ret = hclge_dbg_dump_dcb_port(hdev, buf, len, &pos);
+	if (ret)
+		return ret;
 
-err_dcb_cmd_send:
-	dev_err(&hdev->pdev->dev,
-		"failed to dump dcb dfx, cmd = %#x, ret = %d\n",
-		cmd, ret);
+	return hclge_dbg_dump_dcb_tm(hdev, buf, len, &pos);
 }
 
 static int hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev,
@@ -1872,7 +1989,6 @@ static int hclge_dbg_dump_mac_mc(struct hclge_dev *hdev, char *buf, int len)
 
 int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
-#define DUMP_REG_DCB	"dump reg dcb"
 #define DUMP_TM_MAP	"dump tm map"
 
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -1892,8 +2008,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_qos_pri_map(hdev);
 	} else if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
 		hclge_dbg_dump_qos_buf_cfg(hdev);
-	} else if (strncmp(cmd_buf, DUMP_REG_DCB, strlen(DUMP_REG_DCB)) == 0) {
-		hclge_dbg_dump_dcb(hdev, &cmd_buf[sizeof(DUMP_REG_DCB)]);
 	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
 	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
@@ -1994,6 +2108,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_REG_MAC,
 		.dbg_dump = hclge_dbg_dump_mac,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_REG_DCB,
+		.dbg_dump = hclge_dbg_dump_dcb,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,

From d2f737cf21b87d4239987da69a3b14730c6a57ad Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Thu, 20 May 2021 10:21:32 +0800
Subject: [PATCH 0383/3292] net: hns3: refactor queue map of debugfs

Currently, the debugfs command for queue map is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"queue_map" for it, and query it by command "cat queue_map",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat queue_map
local_queue_id   global_queue_id   vector_id
0                0                 341

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 58 ++++++++++++++-----
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |  1 +
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 65fd333e9374..f844eb27f7cc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -276,6 +276,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_REG_TQP,
 	HNAE3_DBG_CMD_REG_MAC,
 	HNAE3_DBG_CMD_REG_DCB,
+	HNAE3_DBG_CMD_QUEUE_MAP,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 9add38948e98..fc4e17b6d86f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -26,6 +26,9 @@ static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
 	{
 		.name = "reg"
 	},
+	{
+		.name = "queue"
+	},
 	/* keep common at the bottom and add new directory above */
 	{
 		.name = "common"
@@ -212,6 +215,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "queue_map",
+		.cmd = HNAE3_DBG_CMD_QUEUE_MAP,
+		.dentry = HNS3_DBG_DENTRY_QUEUE,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -403,27 +413,44 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 	return 0;
 }
 
-static int hns3_dbg_queue_map(struct hnae3_handle *h)
+static const struct hns3_dbg_item queue_map_items[] = {
+	{ "local_queue_id", 2 },
+	{ "global_queue_id", 2 },
+	{ "vector_id", 2 },
+};
+
+static int hns3_dbg_queue_map(struct hnae3_handle *h, char *buf, int len)
 {
+	char data_str[ARRAY_SIZE(queue_map_items)][HNS3_DBG_DATA_STR_LEN];
+	char *result[ARRAY_SIZE(queue_map_items)];
 	struct hns3_nic_priv *priv = h->priv;
-	int i;
+	char content[HNS3_DBG_INFO_LEN];
+	int pos = 0;
+	int j;
+	u32 i;
 
 	if (!h->ae_algo->ops->get_global_queue_id)
 		return -EOPNOTSUPP;
 
-	dev_info(&h->pdev->dev, "map info for queue id and vector id\n");
-	dev_info(&h->pdev->dev,
-		 "local queue id | global queue id | vector id\n");
-	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		u16 global_qid;
+	for (i = 0; i < ARRAY_SIZE(queue_map_items); i++)
+		result[i] = &data_str[i][0];
 
-		global_qid = h->ae_algo->ops->get_global_queue_id(h, i);
+	hns3_dbg_fill_content(content, sizeof(content), queue_map_items,
+			      NULL, ARRAY_SIZE(queue_map_items));
+	pos += scnprintf(buf + pos, len - pos, "%s", content);
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
 		if (!priv->ring || !priv->ring[i].tqp_vector)
 			continue;
-
-		dev_info(&h->pdev->dev,
-			 "      %4d            %4u            %4d\n",
-			 i, global_qid, priv->ring[i].tqp_vector->vector_irq);
+		j = 0;
+		sprintf(result[j++], "%u", i);
+		sprintf(result[j++], "%u",
+			h->ae_algo->ops->get_global_queue_id(h, i));
+		sprintf(result[j++], "%u",
+			priv->ring[i].tqp_vector->vector_irq);
+		hns3_dbg_fill_content(content, sizeof(content), queue_map_items,
+				      (const char **)result,
+				      ARRAY_SIZE(queue_map_items));
+		pos += scnprintf(buf + pos, len - pos, "%s", content);
 	}
 
 	return 0;
@@ -590,7 +617,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 {
 	dev_info(&h->pdev->dev, "available commands\n");
 	dev_info(&h->pdev->dev, "queue info <number>\n");
-	dev_info(&h->pdev->dev, "queue map\n");
 
 	if (!hns3_is_phys_func(h->pdev))
 		return;
@@ -717,8 +743,6 @@ static int hns3_dbg_check_cmd(struct hnae3_handle *handle, char *cmd_buf)
 		hns3_dbg_help(handle);
 	else if (strncmp(cmd_buf, "queue info", 10) == 0)
 		ret = hns3_dbg_queue_info(handle, cmd_buf);
-	else if (strncmp(cmd_buf, "queue map", 9) == 0)
-		ret = hns3_dbg_queue_map(handle);
 	else if (handle->ae_algo->ops->dbg_run_cmd)
 		ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
 	else
@@ -793,6 +817,10 @@ static int hns3_dbg_get_cmd_index(struct hnae3_handle *handle,
 }
 
 static const struct hns3_dbg_func hns3_dbg_cmd_func[] = {
+	{
+		.cmd = HNAE3_DBG_CMD_QUEUE_MAP,
+		.dbg_dump = hns3_dbg_queue_map,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
 		.dbg_dump = hns3_dbg_dev_info,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 6060bfc21850..4cab37a5d44a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -30,6 +30,7 @@ enum hns3_dbg_dentry_type {
 	HNS3_DBG_DENTRY_RX_BD,
 	HNS3_DBG_DENTRY_MAC,
 	HNS3_DBG_DENTRY_REG,
+	HNS3_DBG_DENTRY_QUEUE,
 	HNS3_DBG_DENTRY_COMMON,
 };
 

From e44c495d95e0dd3a5513f72a830639a72b4c14f1 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Thu, 20 May 2021 10:21:33 +0800
Subject: [PATCH 0384/3292] net: hns3: refactor queue info of debugfs

Currently, the debugfs command for queue info is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create two files
"rx_queue_info" and "tx_queue_info" for it, and query it
by command "cat rx_queue_info" and "cat tx_queue_info",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat rx_queue_info
QUEUE_ID  BD_NUM  BD_LEN  TAIL  HEAD  FBDNUM  PKTNUM   ...
0           0       0     0     0       0       0      ...
1           0       0     0     0       0       0      ...
2           0       0     0     0       0       0      ...

$ cat tx_queue_info
QUEUE_ID  BD_NUM  TC  TAIL  HEAD  FBDNUM  OFFSET  PKTNUM  ...
0           0     0     0     0       0       0        0  ...
1           0     0     0     0       0       0        0  ...
2           0     0     0     0       0       0        0  ...

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   2 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 298 ++++++++++++------
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |   1 +
 3 files changed, 198 insertions(+), 103 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index f844eb27f7cc..f4c87960e436 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -277,6 +277,8 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_REG_MAC,
 	HNAE3_DBG_CMD_REG_DCB,
 	HNAE3_DBG_CMD_QUEUE_MAP,
+	HNAE3_DBG_CMD_RX_QUEUE_INFO,
+	HNAE3_DBG_CMD_TX_QUEUE_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index fc4e17b6d86f..93455c7ed59d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -222,6 +222,20 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "rx_queue_info",
+		.cmd = HNAE3_DBG_CMD_RX_QUEUE_INFO,
+		.dentry = HNS3_DBG_DENTRY_QUEUE,
+		.buf_len = HNS3_DBG_READ_LEN_1MB,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "tx_queue_info",
+		.cmd = HNAE3_DBG_CMD_TX_QUEUE_INFO,
+		.dentry = HNS3_DBG_DENTRY_QUEUE,
+		.buf_len = HNS3_DBG_READ_LEN_1MB,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -282,39 +296,86 @@ static void hns3_dbg_fill_content(char *content, u16 len,
 	*pos++ = '\0';
 }
 
-static int hns3_dbg_queue_info(struct hnae3_handle *h,
-			       const char *cmd_buf)
+static const struct hns3_dbg_item rx_queue_info_items[] = {
+	{ "QUEUE_ID", 2 },
+	{ "BD_NUM", 2 },
+	{ "BD_LEN", 2 },
+	{ "TAIL", 2 },
+	{ "HEAD", 2 },
+	{ "FBDNUM", 2 },
+	{ "PKTNUM", 2 },
+	{ "RING_EN", 2 },
+	{ "RX_RING_EN", 2 },
+	{ "BASE_ADDR", 10 },
+};
+
+static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring,
+				    struct hnae3_ae_dev *ae_dev, char **result,
+				    u32 index)
 {
-	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
-	struct hns3_nic_priv *priv = h->priv;
-	struct hns3_enet_ring *ring;
 	u32 base_add_l, base_add_h;
-	u32 queue_num, queue_max;
-	u32 value, i;
-	int cnt;
+	u32 j = 0;
+
+	sprintf(result[j++], "%8u", index);
+
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_RX_RING_BD_NUM_REG));
+
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_RX_RING_BD_LEN_REG));
+
+	sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_RX_RING_TAIL_REG));
+
+	sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_RX_RING_HEAD_REG));
+
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_RX_RING_FBDNUM_REG));
+
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_RX_RING_PKTNUM_RECORD_REG));
+
+	sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_EN_REG) ? "on" : "off");
+
+	if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev))
+		sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base +
+			HNS3_RING_RX_EN_REG) ? "on" : "off");
+	else
+		sprintf(result[j++], "%10s", "NA");
+
+	base_add_h = readl_relaxed(ring->tqp->io_base +
+					HNS3_RING_RX_RING_BASEADDR_H_REG);
+	base_add_l = readl_relaxed(ring->tqp->io_base +
+					HNS3_RING_RX_RING_BASEADDR_L_REG);
+	sprintf(result[j++], "0x%08x%08x", base_add_h, base_add_l);
+}
+
+static int hns3_dbg_rx_queue_info(struct hnae3_handle *h,
+				  char *buf, int len)
+{
+	char data_str[ARRAY_SIZE(rx_queue_info_items)][HNS3_DBG_DATA_STR_LEN];
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+	char *result[ARRAY_SIZE(rx_queue_info_items)];
+	struct hns3_nic_priv *priv = h->priv;
+	char content[HNS3_DBG_INFO_LEN];
+	struct hns3_enet_ring *ring;
+	int pos = 0;
+	u32 i;
 
 	if (!priv->ring) {
 		dev_err(&h->pdev->dev, "priv->ring is NULL\n");
 		return -EFAULT;
 	}
 
-	queue_max = h->kinfo.num_tqps;
-	cnt = kstrtouint(&cmd_buf[11], 0, &queue_num);
-	if (cnt)
-		queue_num = 0;
-	else
-		queue_max = queue_num + 1;
+	for (i = 0; i < ARRAY_SIZE(rx_queue_info_items); i++)
+		result[i] = &data_str[i][0];
 
-	dev_info(&h->pdev->dev, "queue info\n");
-
-	if (queue_num >= h->kinfo.num_tqps) {
-		dev_err(&h->pdev->dev,
-			"Queue number(%u) is out of range(0-%u)\n", queue_num,
-			h->kinfo.num_tqps - 1);
-		return -EINVAL;
-	}
-
-	for (i = queue_num; i < queue_max; i++) {
+	hns3_dbg_fill_content(content, sizeof(content), rx_queue_info_items,
+			      NULL, ARRAY_SIZE(rx_queue_info_items));
+	pos += scnprintf(buf + pos, len - pos, "%s", content);
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
 		/* Each cycle needs to determine whether the instance is reset,
 		 * to prevent reference to invalid memory. And need to ensure
 		 * that the following code is executed within 100ms.
@@ -324,90 +385,116 @@ static int hns3_dbg_queue_info(struct hnae3_handle *h,
 			return -EPERM;
 
 		ring = &priv->ring[(u32)(i + h->kinfo.num_tqps)];
-		base_add_h = readl_relaxed(ring->tqp->io_base +
-					   HNS3_RING_RX_RING_BASEADDR_H_REG);
-		base_add_l = readl_relaxed(ring->tqp->io_base +
-					   HNS3_RING_RX_RING_BASEADDR_L_REG);
-		dev_info(&h->pdev->dev, "RX(%u) BASE ADD: 0x%08x%08x\n", i,
-			 base_add_h, base_add_l);
+		hns3_dump_rx_queue_info(ring, ae_dev, result, i);
+		hns3_dbg_fill_content(content, sizeof(content),
+				      rx_queue_info_items,
+				      (const char **)result,
+				      ARRAY_SIZE(rx_queue_info_items));
+		pos += scnprintf(buf + pos, len - pos, "%s", content);
+	}
 
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_RX_RING_BD_NUM_REG);
-		dev_info(&h->pdev->dev, "RX(%u) RING BD NUM: %u\n", i, value);
+	return 0;
+}
 
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_RX_RING_BD_LEN_REG);
-		dev_info(&h->pdev->dev, "RX(%u) RING BD LEN: %u\n", i, value);
+static const struct hns3_dbg_item tx_queue_info_items[] = {
+	{ "QUEUE_ID", 2 },
+	{ "BD_NUM", 2 },
+	{ "TC", 2 },
+	{ "TAIL", 2 },
+	{ "HEAD", 2 },
+	{ "FBDNUM", 2 },
+	{ "OFFSET", 2 },
+	{ "PKTNUM", 2 },
+	{ "RING_EN", 2 },
+	{ "TX_RING_EN", 2 },
+	{ "BASE_ADDR", 10 },
+};
 
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_RX_RING_TAIL_REG);
-		dev_info(&h->pdev->dev, "RX(%u) RING TAIL: %u\n", i, value);
+static void hns3_dump_tx_queue_info(struct hns3_enet_ring *ring,
+				    struct hnae3_ae_dev *ae_dev, char **result,
+				    u32 index)
+{
+	u32 base_add_l, base_add_h;
+	u32 j = 0;
 
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_RX_RING_HEAD_REG);
-		dev_info(&h->pdev->dev, "RX(%u) RING HEAD: %u\n", i, value);
+	sprintf(result[j++], "%8u", index);
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_TX_RING_BD_NUM_REG));
 
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_RX_RING_FBDNUM_REG);
-		dev_info(&h->pdev->dev, "RX(%u) RING FBDNUM: %u\n", i, value);
+	sprintf(result[j++], "%2u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_TX_RING_TC_REG));
 
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_RX_RING_PKTNUM_RECORD_REG);
-		dev_info(&h->pdev->dev, "RX(%u) RING PKTNUM: %u\n", i, value);
+	sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_TX_RING_TAIL_REG));
+
+	sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_TX_RING_HEAD_REG));
+
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_TX_RING_FBDNUM_REG));
+
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_TX_RING_OFFSET_REG));
+
+	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_TX_RING_PKTNUM_RECORD_REG));
+
+	sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
+		HNS3_RING_EN_REG) ? "on" : "off");
+
+	if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev))
+		sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base +
+			HNS3_RING_TX_EN_REG) ? "on" : "off");
+	else
+		sprintf(result[j++], "%10s", "NA");
+
+	base_add_h = readl_relaxed(ring->tqp->io_base +
+					HNS3_RING_TX_RING_BASEADDR_H_REG);
+	base_add_l = readl_relaxed(ring->tqp->io_base +
+					HNS3_RING_TX_RING_BASEADDR_L_REG);
+	sprintf(result[j++], "0x%08x%08x", base_add_h, base_add_l);
+}
+
+static int hns3_dbg_tx_queue_info(struct hnae3_handle *h,
+				  char *buf, int len)
+{
+	char data_str[ARRAY_SIZE(tx_queue_info_items)][HNS3_DBG_DATA_STR_LEN];
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+	char *result[ARRAY_SIZE(tx_queue_info_items)];
+	struct hns3_nic_priv *priv = h->priv;
+	char content[HNS3_DBG_INFO_LEN];
+	struct hns3_enet_ring *ring;
+	int pos = 0;
+	u32 i;
+
+	if (!priv->ring) {
+		dev_err(&h->pdev->dev, "priv->ring is NULL\n");
+		return -EFAULT;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tx_queue_info_items); i++)
+		result[i] = &data_str[i][0];
+
+	hns3_dbg_fill_content(content, sizeof(content), tx_queue_info_items,
+			      NULL, ARRAY_SIZE(tx_queue_info_items));
+	pos += scnprintf(buf + pos, len - pos, "%s", content);
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		/* Each cycle needs to determine whether the instance is reset,
+		 * to prevent reference to invalid memory. And need to ensure
+		 * that the following code is executed within 100ms.
+		 */
+		if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+		    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
+			return -EPERM;
 
 		ring = &priv->ring[i];
-		base_add_h = readl_relaxed(ring->tqp->io_base +
-					   HNS3_RING_TX_RING_BASEADDR_H_REG);
-		base_add_l = readl_relaxed(ring->tqp->io_base +
-					   HNS3_RING_TX_RING_BASEADDR_L_REG);
-		dev_info(&h->pdev->dev, "TX(%u) BASE ADD: 0x%08x%08x\n", i,
-			 base_add_h, base_add_l);
-
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_TX_RING_BD_NUM_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING BD NUM: %u\n", i, value);
-
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_TX_RING_TC_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING TC: %u\n", i, value);
-
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_TX_RING_TAIL_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING TAIL: %u\n", i, value);
-
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_TX_RING_HEAD_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING HEAD: %u\n", i, value);
-
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_TX_RING_FBDNUM_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING FBDNUM: %u\n", i, value);
-
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_TX_RING_OFFSET_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING OFFSET: %u\n", i, value);
-
-		value = readl_relaxed(ring->tqp->io_base +
-				      HNS3_RING_TX_RING_PKTNUM_RECORD_REG);
-		dev_info(&h->pdev->dev, "TX(%u) RING PKTNUM: %u\n", i, value);
-
-		value = readl_relaxed(ring->tqp->io_base + HNS3_RING_EN_REG);
-		dev_info(&h->pdev->dev, "TX/RX(%u) RING EN: %s\n", i,
-			 value ? "enable" : "disable");
-
-		if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) {
-			value = readl_relaxed(ring->tqp->io_base +
-					      HNS3_RING_TX_EN_REG);
-			dev_info(&h->pdev->dev, "TX(%u) RING EN: %s\n", i,
-				 value ? "enable" : "disable");
-
-			value = readl_relaxed(ring->tqp->io_base +
-					      HNS3_RING_RX_EN_REG);
-			dev_info(&h->pdev->dev, "RX(%u) RING EN: %s\n", i,
-				 value ? "enable" : "disable");
-		}
-
-		dev_info(&h->pdev->dev, "\n");
+		hns3_dump_tx_queue_info(ring, ae_dev, result, i);
+		hns3_dbg_fill_content(content, sizeof(content),
+				      tx_queue_info_items,
+				      (const char **)result,
+				      ARRAY_SIZE(tx_queue_info_items));
+		pos += scnprintf(buf + pos, len - pos, "%s", content);
 	}
 
 	return 0;
@@ -616,7 +703,6 @@ static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
 static void hns3_dbg_help(struct hnae3_handle *h)
 {
 	dev_info(&h->pdev->dev, "available commands\n");
-	dev_info(&h->pdev->dev, "queue info <number>\n");
 
 	if (!hns3_is_phys_func(h->pdev))
 		return;
@@ -741,8 +827,6 @@ static int hns3_dbg_check_cmd(struct hnae3_handle *handle, char *cmd_buf)
 
 	if (strncmp(cmd_buf, "help", 4) == 0)
 		hns3_dbg_help(handle);
-	else if (strncmp(cmd_buf, "queue info", 10) == 0)
-		ret = hns3_dbg_queue_info(handle, cmd_buf);
 	else if (handle->ae_algo->ops->dbg_run_cmd)
 		ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
 	else
@@ -833,6 +917,14 @@ static const struct hns3_dbg_func hns3_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_RX_BD,
 		.dbg_dump_bd = hns3_dbg_rx_bd_info,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_RX_QUEUE_INFO,
+		.dbg_dump = hns3_dbg_rx_queue_info,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_TX_QUEUE_INFO,
+		.dbg_dump = hns3_dbg_tx_queue_info,
+	},
 };
 
 static int hns3_dbg_read_cmd(struct hns3_dbg_data *dbg_data,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 4cab37a5d44a..0e109b03f89e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -6,6 +6,7 @@
 
 #define HNS3_DBG_READ_LEN	65536
 #define HNS3_DBG_READ_LEN_128KB	0x20000
+#define HNS3_DBG_READ_LEN_1MB	0x100000
 #define HNS3_DBG_READ_LEN_4MB	0x400000
 #define HNS3_DBG_WRITE_LEN	1024
 

From b5a0b70d77b9be91b8e6dfa5dd3b39ea9cf6be4c Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhao288@hisilicon.com>
Date: Thu, 20 May 2021 10:21:34 +0800
Subject: [PATCH 0385/3292] net: hns3: refactor dump fd tcam of debugfs

Currently, the debugfs command for fd tcam is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"fd_tcam" for it, and query it by command "cat fd_tcam",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat fd_tcam
read result tcam key x(31):
00000000
00000000
00000000
08000000
00000600
00000000
00000000
00000000
00000000
00000000
00000000
00000000
00000000
read result tcam key y(31):
00000000
00000000
00000000
f7ff0000
0000f900
00000000
00000000
00000000
00000000
00000000
00000000
00000000
0000fff8

Signed-off-by: Hao Chen <chenhao288@hisilicon.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 11 ++-
 .../ethernet/hisilicon/hns3/hns3_debugfs.h    |  1 +
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 88 ++++++++++++-------
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |  5 ++
 5 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index f4c87960e436..730f56d9ae4c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -279,6 +279,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_QUEUE_MAP,
 	HNAE3_DBG_CMD_RX_QUEUE_INFO,
 	HNAE3_DBG_CMD_TX_QUEUE_INFO,
+	HNAE3_DBG_CMD_FD_TCAM,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 93455c7ed59d..37aa891f8133 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -29,6 +29,9 @@ static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
 	{
 		.name = "queue"
 	},
+	{
+		.name = "fd"
+	},
 	/* keep common at the bottom and add new directory above */
 	{
 		.name = "common"
@@ -236,6 +239,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN_1MB,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "fd_tcam",
+		.cmd = HNAE3_DBG_CMD_FD_TCAM,
+		.dentry = HNS3_DBG_DENTRY_FD,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -707,7 +717,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	if (!hns3_is_phys_func(h->pdev))
 		return;
 
-	dev_info(&h->pdev->dev, "dump fd tcam\n");
 	dev_info(&h->pdev->dev, "dump tc\n");
 	dev_info(&h->pdev->dev, "dump tm map <q_num>\n");
 	dev_info(&h->pdev->dev, "dump tm\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 0e109b03f89e..f3766ff38bb7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -32,6 +32,7 @@ enum hns3_dbg_dentry_type {
 	HNS3_DBG_DENTRY_MAC,
 	HNS3_DBG_DENTRY_REG,
 	HNS3_DBG_DENTRY_QUEUE,
+	HNS3_DBG_DENTRY_FD,
 	HNS3_DBG_DENTRY_COMMON,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 1ad7bffb4d62..c92800d21cc1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1455,13 +1455,17 @@ static int hclge_dbg_dump_mng_table(struct hclge_dev *hdev, char *buf, int len)
 	return 0;
 }
 
-static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
-				  bool sel_x, u32 loc)
+#define HCLGE_DBG_TCAM_BUF_SIZE 256
+
+static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
+				  char *tcam_buf,
+				  struct hclge_dbg_tcam_msg tcam_msg)
 {
 	struct hclge_fd_tcam_config_1_cmd *req1;
 	struct hclge_fd_tcam_config_2_cmd *req2;
 	struct hclge_fd_tcam_config_3_cmd *req3;
 	struct hclge_desc desc[3];
+	int pos = 0;
 	int ret, i;
 	u32 *req;
 
@@ -1475,31 +1479,35 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
 	req2 = (struct hclge_fd_tcam_config_2_cmd *)desc[1].data;
 	req3 = (struct hclge_fd_tcam_config_3_cmd *)desc[2].data;
 
-	req1->stage  = stage;
+	req1->stage  = tcam_msg.stage;
 	req1->xy_sel = sel_x ? 1 : 0;
-	req1->index  = cpu_to_le32(loc);
+	req1->index  = cpu_to_le32(tcam_msg.loc);
 
 	ret = hclge_cmd_send(&hdev->hw, desc, 3);
 	if (ret)
 		return ret;
 
-	dev_info(&hdev->pdev->dev, " read result tcam key %s(%u):\n",
-		 sel_x ? "x" : "y", loc);
+	pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+			 "read result tcam key %s(%u):\n", sel_x ? "x" : "y",
+			 tcam_msg.loc);
 
 	/* tcam_data0 ~ tcam_data1 */
 	req = (u32 *)req1->tcam_data;
 	for (i = 0; i < 2; i++)
-		dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+		pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+				 "%08x\n", *req++);
 
 	/* tcam_data2 ~ tcam_data7 */
 	req = (u32 *)req2->tcam_data;
 	for (i = 0; i < 6; i++)
-		dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+		pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+				 "%08x\n", *req++);
 
 	/* tcam_data8 ~ tcam_data12 */
 	req = (u32 *)req3->tcam_data;
 	for (i = 0; i < 5; i++)
-		dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+		pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+				 "%08x\n", *req++);
 
 	return ret;
 }
@@ -1517,59 +1525,75 @@ static int hclge_dbg_get_rules_location(struct hclge_dev *hdev, u16 *rule_locs)
 	}
 	spin_unlock_bh(&hdev->fd_rule_lock);
 
-	if (cnt != hdev->hclge_fd_rule_num)
+	if (cnt != hdev->hclge_fd_rule_num || cnt == 0)
 		return -EINVAL;
 
 	return cnt;
 }
 
-static void hclge_dbg_fd_tcam(struct hclge_dev *hdev)
+static int hclge_dbg_dump_fd_tcam(struct hclge_dev *hdev, char *buf, int len)
 {
+	u32 rule_num = hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1];
+	struct hclge_dbg_tcam_msg tcam_msg;
 	int i, ret, rule_cnt;
 	u16 *rule_locs;
+	char *tcam_buf;
+	int pos = 0;
 
 	if (!hnae3_dev_fd_supported(hdev)) {
 		dev_err(&hdev->pdev->dev,
 			"Only FD-supported dev supports dump fd tcam\n");
-		return;
+		return -EOPNOTSUPP;
 	}
 
-	if (!hdev->hclge_fd_rule_num ||
-	    !hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
-		return;
+	if (!hdev->hclge_fd_rule_num || !rule_num)
+		return 0;
 
-	rule_locs = kcalloc(hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1],
-			    sizeof(u16), GFP_KERNEL);
+	rule_locs = kcalloc(rule_num, sizeof(u16), GFP_KERNEL);
 	if (!rule_locs)
-		return;
+		return -ENOMEM;
+
+	tcam_buf = kzalloc(HCLGE_DBG_TCAM_BUF_SIZE, GFP_KERNEL);
+	if (!tcam_buf) {
+		kfree(rule_locs);
+		return -ENOMEM;
+	}
 
 	rule_cnt = hclge_dbg_get_rules_location(hdev, rule_locs);
-	if (rule_cnt <= 0) {
+	if (rule_cnt < 0) {
+		ret = rule_cnt;
 		dev_err(&hdev->pdev->dev,
-			"failed to get rule number, ret = %d\n", rule_cnt);
-		kfree(rule_locs);
-		return;
+			"failed to get rule number, ret = %d\n", ret);
+		goto out;
 	}
 
 	for (i = 0; i < rule_cnt; i++) {
-		ret = hclge_dbg_fd_tcam_read(hdev, 0, true, rule_locs[i]);
+		tcam_msg.stage = HCLGE_FD_STAGE_1;
+		tcam_msg.loc = rule_locs[i];
+
+		ret = hclge_dbg_fd_tcam_read(hdev, true, tcam_buf, tcam_msg);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
 				"failed to get fd tcam key x, ret = %d\n", ret);
-			kfree(rule_locs);
-			return;
+			goto out;
 		}
 
-		ret = hclge_dbg_fd_tcam_read(hdev, 0, false, rule_locs[i]);
+		pos += scnprintf(buf + pos, len - pos, "%s", tcam_buf);
+
+		ret = hclge_dbg_fd_tcam_read(hdev, false, tcam_buf, tcam_msg);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
 				"failed to get fd tcam key y, ret = %d\n", ret);
-			kfree(rule_locs);
-			return;
+			goto out;
 		}
+
+		pos += scnprintf(buf + pos, len - pos, "%s", tcam_buf);
 	}
 
+out:
+	kfree(tcam_buf);
 	kfree(rule_locs);
+	return ret;
 }
 
 int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len)
@@ -1994,9 +2018,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (strncmp(cmd_buf, "dump fd tcam", 12) == 0) {
-		hclge_dbg_fd_tcam(hdev);
-	} else if (strncmp(cmd_buf, "dump tc", 7) == 0) {
+	if (strncmp(cmd_buf, "dump tc", 7) == 0) {
 		hclge_dbg_dump_tc(hdev);
 	} else if (strncmp(cmd_buf, DUMP_TM_MAP, strlen(DUMP_TM_MAP)) == 0) {
 		hclge_dbg_dump_tm_map(hdev, &cmd_buf[sizeof(DUMP_TM_MAP)]);
@@ -2112,6 +2134,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_REG_DCB,
 		.dbg_dump = hclge_dbg_dump_dcb,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_FD_TCAM,
+		.dbg_dump = hclge_dbg_dump_fd_tcam,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 933f157294fd..25b42da471e5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -69,6 +69,11 @@ struct hclge_dbg_reg_common_msg {
 	enum hclge_opcode_type cmd;
 };
 
+struct hclge_dbg_tcam_msg {
+	u8 stage;
+	u32 loc;
+};
+
 #define	HCLGE_DBG_MAX_DFX_MSG_LEN	60
 struct hclge_dbg_dfx_message {
 	int flag;

From 7679f28e32a48461be4d4c30c14d7be06300cd5d Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 20 May 2021 10:21:35 +0800
Subject: [PATCH 0386/3292] net: hns3: refactor dump tm map of debugfs

Currently, the debugfs command for tm map is implemented by
"echo xxxx > cmd", and record the information in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"tm_map" for it, and query it by command "cat tm_map",
return the result to userspace, rather than record in dmesg.

As user can't specify queue id in cat command, driver will return info
of all queue id.

The display style is below:
$ cat tm_map
queue_id   qset_id   pri_id   tc_id
0000         0000      00       00
INDEX | TM BP QSET MAPPING:
0000  | 00000000:00000000:00000000:00000000:00000000:00000000:00000000
0256  | 00000000:00000000:00000000:00000000:00000000:00000002:00000000
0512  | 00000000:00000000:00000000:00000004:00000000:00000000:00000000
0768  | 00000000:00000008:00000000:00000000:00000000:00000000:00000000

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   8 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 169 ++++++++----------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c |  60 +++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h |   2 +
 5 files changed, 145 insertions(+), 95 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 730f56d9ae4c..5de8b11e0cb7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -254,6 +254,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TM_NODES,
 	HNAE3_DBG_CMD_TM_PRI,
 	HNAE3_DBG_CMD_TM_QSET,
+	HNAE3_DBG_CMD_TM_MAP,
 	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_TX_BD,
 	HNAE3_DBG_CMD_RX_BD,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 37aa891f8133..39a24cc143f6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -64,6 +64,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "tm_map",
+		.cmd = HNAE3_DBG_CMD_TM_MAP,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN_1MB,
+		.init = hns3_dbg_common_file_init,
+	},
 	{
 		.name = "dev_info",
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
@@ -718,7 +725,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 		return;
 
 	dev_info(&h->pdev->dev, "dump tc\n");
-	dev_info(&h->pdev->dev, "dump tm map <q_num>\n");
 	dev_info(&h->pdev->dev, "dump tm\n");
 	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index c92800d21cc1..58ee389342ce 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -905,115 +905,96 @@ err_tm_cmd_send:
 		cmd, ret);
 }
 
-static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
-				  const char *cmd_buf)
+static int hclge_dbg_dump_tm_bp_qset_map(struct hclge_dev *hdev, u8 tc_id,
+					 char *buf, int len)
 {
-	struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
-	struct hclge_nq_to_qs_link_cmd *nq_to_qs_map;
 	u32 qset_mapping[HCLGE_BP_EXT_GRP_NUM];
-	struct hclge_qs_to_pri_link_cmd *map;
-	struct hclge_tqp_tx_queue_tc_cmd *tc;
-	u16 group_id, queue_id, qset_id;
-	enum hclge_opcode_type cmd;
-	u8 grp_num, pri_id, tc_id;
+	struct hclge_bp_to_qs_map_cmd *map;
 	struct hclge_desc desc;
-	u16 qs_id_l;
-	u16 qs_id_h;
+	int pos = 0;
+	u8 group_id;
+	u8 grp_num;
+	u16 i = 0;
 	int ret;
-	u32 i;
-
-	ret = kstrtou16(cmd_buf, 0, &queue_id);
-	queue_id = (ret != 0) ? 0 : queue_id;
-
-	cmd = HCLGE_OPC_TM_NQ_TO_QS_LINK;
-	nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	nq_to_qs_map->nq_id = cpu_to_le16(queue_id);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_map_cmd_send;
-	qset_id = le16_to_cpu(nq_to_qs_map->qset_id);
-
-	/* convert qset_id to the following format, drop the vld bit
-	 *            | qs_id_h | vld | qs_id_l |
-	 * qset_id:   | 15 ~ 11 |  10 |  9 ~ 0  |
-	 *             \         \   /         /
-	 *              \         \ /         /
-	 * qset_id: | 15 | 14 ~ 10 |  9 ~ 0  |
-	 */
-	qs_id_l = hnae3_get_field(qset_id, HCLGE_TM_QS_ID_L_MSK,
-				  HCLGE_TM_QS_ID_L_S);
-	qs_id_h = hnae3_get_field(qset_id, HCLGE_TM_QS_ID_H_EXT_MSK,
-				  HCLGE_TM_QS_ID_H_EXT_S);
-	qset_id = 0;
-	hnae3_set_field(qset_id, HCLGE_TM_QS_ID_L_MSK, HCLGE_TM_QS_ID_L_S,
-			qs_id_l);
-	hnae3_set_field(qset_id, HCLGE_TM_QS_ID_H_MSK, HCLGE_TM_QS_ID_H_S,
-			qs_id_h);
-
-	cmd = HCLGE_OPC_TM_QS_TO_PRI_LINK;
-	map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	map->qs_id = cpu_to_le16(qset_id);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_map_cmd_send;
-	pri_id = map->priority;
-
-	cmd = HCLGE_OPC_TQP_TX_QUEUE_TC;
-	tc = (struct hclge_tqp_tx_queue_tc_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	tc->queue_id = cpu_to_le16(queue_id);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_map_cmd_send;
-	tc_id = tc->tc_id & 0x7;
-
-	dev_info(&hdev->pdev->dev, "queue_id | qset_id | pri_id | tc_id\n");
-	dev_info(&hdev->pdev->dev, "%04u     | %04u    | %02u     | %02u\n",
-		 queue_id, qset_id, pri_id, tc_id);
-
-	if (!hnae3_dev_dcb_supported(hdev)) {
-		dev_info(&hdev->pdev->dev,
-			 "Only DCB-supported dev supports tm mapping\n");
-		return;
-	}
 
 	grp_num = hdev->num_tqps <= HCLGE_TQP_MAX_SIZE_DEV_V2 ?
 		  HCLGE_BP_GRP_NUM : HCLGE_BP_EXT_GRP_NUM;
-	cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
-	bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
+	map = (struct hclge_bp_to_qs_map_cmd *)desc.data;
 	for (group_id = 0; group_id < grp_num; group_id++) {
-		hclge_cmd_setup_basic_desc(&desc, cmd, true);
-		bp_to_qs_map_cmd->tc_id = tc_id;
-		bp_to_qs_map_cmd->qs_group_id = group_id;
+		hclge_cmd_setup_basic_desc(&desc,
+					   HCLGE_OPC_TM_BP_TO_QSET_MAPPING,
+					   true);
+		map->tc_id = tc_id;
+		map->qs_group_id = group_id;
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-		if (ret)
-			goto err_tm_map_cmd_send;
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to get bp to qset map, ret = %d\n",
+				ret);
+			return ret;
+		}
 
-		qset_mapping[group_id] =
-			le32_to_cpu(bp_to_qs_map_cmd->qs_bit_map);
+		qset_mapping[group_id] = le32_to_cpu(map->qs_bit_map);
 	}
 
-	dev_info(&hdev->pdev->dev, "index | tm bp qset maping:\n");
-
-	i = 0;
+	pos += scnprintf(buf + pos, len - pos, "INDEX | TM BP QSET MAPPING:\n");
 	for (group_id = 0; group_id < grp_num / 8; group_id++) {
-		dev_info(&hdev->pdev->dev,
+		pos += scnprintf(buf + pos, len - pos,
 			 "%04d  | %08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n",
-			 group_id * 256, qset_mapping[(u32)(i + 7)],
-			 qset_mapping[(u32)(i + 6)], qset_mapping[(u32)(i + 5)],
-			 qset_mapping[(u32)(i + 4)], qset_mapping[(u32)(i + 3)],
-			 qset_mapping[(u32)(i + 2)], qset_mapping[(u32)(i + 1)],
+			 group_id * 256, qset_mapping[i + 7],
+			 qset_mapping[i + 6], qset_mapping[i + 5],
+			 qset_mapping[i + 4], qset_mapping[i + 3],
+			 qset_mapping[i + 2], qset_mapping[i + 1],
 			 qset_mapping[i]);
 		i += 8;
 	}
 
-	return;
+	return pos;
+}
 
-err_tm_map_cmd_send:
-	dev_err(&hdev->pdev->dev, "dump tqp map fail(0x%x), ret = %d\n",
-		cmd, ret);
+static int hclge_dbg_dump_tm_map(struct hclge_dev *hdev, char *buf, int len)
+{
+	u16 queue_id;
+	u16 qset_id;
+	u8 link_vld;
+	int pos = 0;
+	u8 pri_id;
+	u8 tc_id;
+	int ret;
+
+	for (queue_id = 0; queue_id < hdev->num_tqps; queue_id++) {
+		ret = hclge_tm_get_q_to_qs_map(hdev, queue_id, &qset_id);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_get_qset_map_pri(hdev, qset_id, &pri_id,
+						&link_vld);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_get_q_to_tc(hdev, queue_id, &tc_id);
+		if (ret)
+			return ret;
+
+		pos += scnprintf(buf + pos, len - pos,
+				 "QUEUE_ID   QSET_ID   PRI_ID   TC_ID\n");
+		pos += scnprintf(buf + pos, len - pos,
+				 "%04u        %4u       %3u      %2u\n",
+				 queue_id, qset_id, pri_id, tc_id);
+
+		if (!hnae3_dev_dcb_supported(hdev))
+			continue;
+
+		ret = hclge_dbg_dump_tm_bp_qset_map(hdev, tc_id, buf + pos,
+						    len - pos);
+		if (ret < 0)
+			return ret;
+		pos += ret;
+
+		pos += scnprintf(buf + pos, len - pos, "\n");
+	}
+
+	return 0;
 }
 
 static int hclge_dbg_dump_tm_nodes(struct hclge_dev *hdev, char *buf, int len)
@@ -2013,15 +1994,11 @@ static int hclge_dbg_dump_mac_mc(struct hclge_dev *hdev, char *buf, int len)
 
 int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
-#define DUMP_TM_MAP	"dump tm map"
-
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
 	if (strncmp(cmd_buf, "dump tc", 7) == 0) {
 		hclge_dbg_dump_tc(hdev);
-	} else if (strncmp(cmd_buf, DUMP_TM_MAP, strlen(DUMP_TM_MAP)) == 0) {
-		hclge_dbg_dump_tm_map(hdev, &cmd_buf[sizeof(DUMP_TM_MAP)]);
 	} else if (strncmp(cmd_buf, "dump tm", 7) == 0) {
 		hclge_dbg_dump_tm(hdev);
 	} else if (strncmp(cmd_buf, "dump qos pause cfg", 18) == 0) {
@@ -2058,6 +2035,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_TM_QSET,
 		.dbg_dump = hclge_dbg_dump_tm_qset,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_TM_MAP,
+		.dbg_dump = hclge_dbg_dump_tm_map,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_MAC_UC,
 		.dbg_dump = hclge_dbg_dump_mac_uc,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index ebb962bad451..bd99faf19d9d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1807,3 +1807,63 @@ int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id,
 	para->rate = le32_to_cpu(shap_cfg_cmd->pri_rate);
 	return 0;
 }
+
+int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id)
+{
+	struct hclge_nq_to_qs_link_cmd *map;
+	struct hclge_desc desc;
+	u16 qs_id_l;
+	u16 qs_id_h;
+	int ret;
+
+	map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_NQ_TO_QS_LINK, true);
+	map->nq_id = cpu_to_le16(q_id);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get queue to qset map, ret = %d\n", ret);
+		return ret;
+	}
+	*qset_id = le16_to_cpu(map->qset_id);
+
+	/* convert qset_id to the following format, drop the vld bit
+	 *            | qs_id_h | vld | qs_id_l |
+	 * qset_id:   | 15 ~ 11 |  10 |  9 ~ 0  |
+	 *             \         \   /         /
+	 *              \         \ /         /
+	 * qset_id: | 15 | 14 ~ 10 |  9 ~ 0  |
+	 */
+	qs_id_l = hnae3_get_field(*qset_id, HCLGE_TM_QS_ID_L_MSK,
+				  HCLGE_TM_QS_ID_L_S);
+	qs_id_h = hnae3_get_field(*qset_id, HCLGE_TM_QS_ID_H_EXT_MSK,
+				  HCLGE_TM_QS_ID_H_EXT_S);
+	*qset_id = 0;
+	hnae3_set_field(*qset_id, HCLGE_TM_QS_ID_L_MSK, HCLGE_TM_QS_ID_L_S,
+			qs_id_l);
+	hnae3_set_field(*qset_id, HCLGE_TM_QS_ID_H_MSK, HCLGE_TM_QS_ID_H_S,
+			qs_id_h);
+	return 0;
+}
+
+int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id)
+{
+#define HCLGE_TM_TC_MASK		0x7
+
+	struct hclge_tqp_tx_queue_tc_cmd *tc;
+	struct hclge_desc desc;
+	int ret;
+
+	tc = (struct hclge_tqp_tx_queue_tc_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TQP_TX_QUEUE_TC, true);
+	tc->queue_id = cpu_to_le16(q_id);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get queue to tc map, ret = %d\n", ret);
+		return ret;
+	}
+
+	*tc_id = tc->tc_id & HCLGE_TM_TC_MASK;
+	return 0;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index b25d76023af0..c21e822fefbf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -242,4 +242,6 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight);
 int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id,
 			    enum hclge_opcode_type cmd,
 			    struct hclge_pri_shaper_para *para);
+int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id);
+int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id);
 #endif

From cad7c215a4b1bc67920ab0d2673ac08a2cc885f1 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 20 May 2021 10:21:36 +0800
Subject: [PATCH 0387/3292] net: hns3: refactor dump tm of debugfs

Currently, user gets some tm info by implementing debugfs command
"echo dump tm > cmd", this command will dump info in dmesg. It's
unnecessary and heavy.

In addition, the info of this command mixes info of qset, priority,
pg and port. Qset and priority have their own command to get info of
themself, so can remove info of qset and priority from this command.

To optimize it, create two new files "tm_pg", "tm_port" in tm directory
and use cat command to separately get info of pg and port.

The display style is below:
$ cat tm_pg
ID  PRI_MAP  MODE DWRR  C_IR_B  C_IR_U  C_IR_S  C_BS_B  C_BS_S ...
00   0x1f    dwrr  1       75       9       0      31      20  ...

$ cat tm_port
IR_B  IR_U  IR_S  BS_B  BS_S  FLAG  RATE(Mbps)
75     9     0    31    20    1     200000

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   2 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  15 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 328 ++++++------------
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |   2 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 125 ++++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h |  15 +-
 6 files changed, 269 insertions(+), 218 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 5de8b11e0cb7..e783d167c624 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -255,6 +255,8 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TM_PRI,
 	HNAE3_DBG_CMD_TM_QSET,
 	HNAE3_DBG_CMD_TM_MAP,
+	HNAE3_DBG_CMD_TM_PG,
+	HNAE3_DBG_CMD_TM_PORT,
 	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_TX_BD,
 	HNAE3_DBG_CMD_RX_BD,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 39a24cc143f6..4061f1f36739 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -71,6 +71,20 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN_1MB,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "tm_pg",
+		.cmd = HNAE3_DBG_CMD_TM_PG,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
+	{
+		.name = "tm_port",
+		.cmd = HNAE3_DBG_CMD_TM_PORT,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 	{
 		.name = "dev_info",
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
@@ -725,7 +739,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 		return;
 
 	dev_info(&h->pdev->dev, "dump tc\n");
-	dev_info(&h->pdev->dev, "dump tm\n");
 	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 58ee389342ce..506f0abfe46d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -685,224 +685,120 @@ static void hclge_dbg_dump_tc(struct hclge_dev *hdev)
 		hclge_print_tc_info(hdev, ets_weight->tc_weight[i], i);
 }
 
-static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev)
+static const struct hclge_dbg_item tm_pg_items[] = {
+	{ "ID", 2 },
+	{ "PRI_MAP", 2 },
+	{ "MODE", 2 },
+	{ "DWRR", 2 },
+	{ "C_IR_B", 2 },
+	{ "C_IR_U", 2 },
+	{ "C_IR_S", 2 },
+	{ "C_BS_B", 2 },
+	{ "C_BS_S", 2 },
+	{ "C_FLAG", 2 },
+	{ "C_RATE(Mbps)", 2 },
+	{ "P_IR_B", 2 },
+	{ "P_IR_U", 2 },
+	{ "P_IR_S", 2 },
+	{ "P_BS_B", 2 },
+	{ "P_BS_S", 2 },
+	{ "P_FLAG", 2 },
+	{ "P_RATE(Mbps)", 0 }
+};
+
+static void hclge_dbg_fill_shaper_content(struct hclge_tm_shaper_para *para,
+					  char **result, u8 *index)
 {
-	struct hclge_port_shapping_cmd *port_shap_cfg_cmd;
-	struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
-	struct hclge_pg_shapping_cmd *pg_shap_cfg_cmd;
-	enum hclge_opcode_type cmd;
-	struct hclge_desc desc;
-	int ret;
-
-	cmd = HCLGE_OPC_TM_PG_C_SHAPPING;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_pg_cmd_send;
-
-	pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "PG_C pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
-	dev_info(&hdev->pdev->dev, "PG_C pg_shapping: 0x%x\n",
-		 le32_to_cpu(pg_shap_cfg_cmd->pg_shapping_para));
-
-	cmd = HCLGE_OPC_TM_PG_P_SHAPPING;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_pg_cmd_send;
-
-	pg_shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "PG_P pg_id: %u\n", pg_shap_cfg_cmd->pg_id);
-	dev_info(&hdev->pdev->dev, "PG_P pg_shapping: 0x%x\n",
-		 le32_to_cpu(pg_shap_cfg_cmd->pg_shapping_para));
-	dev_info(&hdev->pdev->dev, "PG_P flag: %#x\n", pg_shap_cfg_cmd->flag);
-	dev_info(&hdev->pdev->dev, "PG_P pg_rate: %u(Mbps)\n",
-		 le32_to_cpu(pg_shap_cfg_cmd->pg_rate));
-
-	cmd = HCLGE_OPC_TM_PORT_SHAPPING;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_pg_cmd_send;
-
-	port_shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "PORT port_shapping: 0x%x\n",
-		 le32_to_cpu(port_shap_cfg_cmd->port_shapping_para));
-	dev_info(&hdev->pdev->dev, "PORT flag: %#x\n", port_shap_cfg_cmd->flag);
-	dev_info(&hdev->pdev->dev, "PORT port_rate: %u(Mbps)\n",
-		 le32_to_cpu(port_shap_cfg_cmd->port_rate));
-
-	cmd = HCLGE_OPC_TM_PG_SCH_MODE_CFG;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_pg_cmd_send;
-
-	dev_info(&hdev->pdev->dev, "PG_SCH pg_id: %u\n",
-		 le32_to_cpu(desc.data[0]));
-
-	cmd = HCLGE_OPC_TM_PRI_SCH_MODE_CFG;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_pg_cmd_send;
-
-	dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n",
-		 le32_to_cpu(desc.data[0]));
-
-	cmd = HCLGE_OPC_TM_QS_SCH_MODE_CFG;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_pg_cmd_send;
-
-	dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n",
-		 le32_to_cpu(desc.data[0]));
-
-	if (!hnae3_dev_dcb_supported(hdev)) {
-		dev_info(&hdev->pdev->dev,
-			 "Only DCB-supported dev supports tm mapping\n");
-		return;
-	}
-
-	cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_pg_cmd_send;
-
-	bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "BP_TO_QSET tc_id: %u\n",
-		 bp_to_qs_map_cmd->tc_id);
-	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_group_id: 0x%x\n",
-		 bp_to_qs_map_cmd->qs_group_id);
-	dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_bit_map: 0x%x\n",
-		 le32_to_cpu(bp_to_qs_map_cmd->qs_bit_map));
-	return;
-
-err_tm_pg_cmd_send:
-	dev_err(&hdev->pdev->dev, "dump tm_pg fail(0x%x), ret = %d\n",
-		cmd, ret);
+	sprintf(result[(*index)++], "%3u", para->ir_b);
+	sprintf(result[(*index)++], "%3u", para->ir_u);
+	sprintf(result[(*index)++], "%3u", para->ir_s);
+	sprintf(result[(*index)++], "%3u", para->bs_b);
+	sprintf(result[(*index)++], "%3u", para->bs_s);
+	sprintf(result[(*index)++], "%3u", para->flag);
+	sprintf(result[(*index)++], "%6u", para->rate);
 }
 
-static void hclge_dbg_dump_tm(struct hclge_dev *hdev)
+static int hclge_dbg_dump_tm_pg(struct hclge_dev *hdev, char *buf, int len)
 {
-	struct hclge_priority_weight_cmd *priority_weight;
-	struct hclge_pg_to_pri_link_cmd *pg_to_pri_map;
-	struct hclge_qs_to_pri_link_cmd *qs_to_pri_map;
-	struct hclge_nq_to_qs_link_cmd *nq_to_qs_map;
-	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
-	struct hclge_pg_weight_cmd *pg_weight;
-	struct hclge_qs_weight_cmd *qs_weight;
-	enum hclge_opcode_type cmd;
-	struct hclge_desc desc;
+	char data_str[ARRAY_SIZE(tm_pg_items)][HCLGE_DBG_DATA_STR_LEN];
+	struct hclge_tm_shaper_para c_shaper_para, p_shaper_para;
+	char *result[ARRAY_SIZE(tm_pg_items)], *sch_mode_str;
+	u8 pg_id, sch_mode, weight, pri_bit_map, i, j;
+	char content[HCLGE_DBG_TM_INFO_LEN];
+	int pos = 0;
 	int ret;
 
-	cmd = HCLGE_OPC_TM_PG_TO_PRI_LINK;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	for (i = 0; i < ARRAY_SIZE(tm_pg_items); i++)
+		result[i] = &data_str[i][0];
+
+	hclge_dbg_fill_content(content, sizeof(content), tm_pg_items,
+			       NULL, ARRAY_SIZE(tm_pg_items));
+	pos += scnprintf(buf + pos, len - pos, "%s", content);
+
+	for (pg_id = 0; pg_id < hdev->tm_info.num_pg; pg_id++) {
+		ret = hclge_tm_get_pg_to_pri_map(hdev, pg_id, &pri_bit_map);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_get_pg_sch_mode(hdev, pg_id, &sch_mode);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_get_pg_weight(hdev, pg_id, &weight);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_get_pg_shaper(hdev, pg_id,
+					     HCLGE_OPC_TM_PG_C_SHAPPING,
+					     &c_shaper_para);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_get_pg_shaper(hdev, pg_id,
+					     HCLGE_OPC_TM_PG_P_SHAPPING,
+					     &p_shaper_para);
+		if (ret)
+			return ret;
+
+		sch_mode_str = sch_mode & HCLGE_TM_TX_SCHD_DWRR_MSK ? "dwrr" :
+				       "sp";
+
+		j = 0;
+		sprintf(result[j++], "%02u", pg_id);
+		sprintf(result[j++], "0x%02x", pri_bit_map);
+		sprintf(result[j++], "%4s", sch_mode_str);
+		sprintf(result[j++], "%3u", weight);
+		hclge_dbg_fill_shaper_content(&c_shaper_para, result, &j);
+		hclge_dbg_fill_shaper_content(&p_shaper_para, result, &j);
+
+		hclge_dbg_fill_content(content, sizeof(content), tm_pg_items,
+				       (const char **)result,
+				       ARRAY_SIZE(tm_pg_items));
+		pos += scnprintf(buf + pos, len - pos, "%s", content);
+	}
+
+	return 0;
+}
+
+static int hclge_dbg_dump_tm_port(struct hclge_dev *hdev,  char *buf, int len)
+{
+	struct hclge_tm_shaper_para shaper_para;
+	int pos = 0;
+	int ret;
+
+	ret = hclge_tm_get_port_shaper(hdev, &shaper_para);
 	if (ret)
-		goto err_tm_cmd_send;
+		return ret;
 
-	pg_to_pri_map = (struct hclge_pg_to_pri_link_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "dump tm\n");
-	dev_info(&hdev->pdev->dev, "PG_TO_PRI gp_id: %u\n",
-		 pg_to_pri_map->pg_id);
-	dev_info(&hdev->pdev->dev, "PG_TO_PRI map: 0x%x\n",
-		 pg_to_pri_map->pri_bit_map);
+	pos += scnprintf(buf + pos, len - pos,
+			 "IR_B  IR_U  IR_S  BS_B  BS_S  FLAG  RATE(Mbps)\n");
+	pos += scnprintf(buf + pos, len - pos,
+			 "%3u   %3u   %3u   %3u   %3u     %1u   %6u\n",
+			 shaper_para.ir_b, shaper_para.ir_u, shaper_para.ir_s,
+			 shaper_para.bs_b, shaper_para.bs_s, shaper_para.flag,
+			 shaper_para.rate);
 
-	cmd = HCLGE_OPC_TM_QS_TO_PRI_LINK;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_cmd_send;
-
-	qs_to_pri_map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "QS_TO_PRI qs_id: %u\n",
-		 le16_to_cpu(qs_to_pri_map->qs_id));
-	dev_info(&hdev->pdev->dev, "QS_TO_PRI priority: %u\n",
-		 qs_to_pri_map->priority);
-	dev_info(&hdev->pdev->dev, "QS_TO_PRI link_vld: %u\n",
-		 qs_to_pri_map->link_vld);
-
-	cmd = HCLGE_OPC_TM_NQ_TO_QS_LINK;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_cmd_send;
-
-	nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n",
-		 le16_to_cpu(nq_to_qs_map->nq_id));
-	dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: 0x%x\n",
-		 le16_to_cpu(nq_to_qs_map->qset_id));
-
-	cmd = HCLGE_OPC_TM_PG_WEIGHT;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_cmd_send;
-
-	pg_weight = (struct hclge_pg_weight_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "PG pg_id: %u\n", pg_weight->pg_id);
-	dev_info(&hdev->pdev->dev, "PG dwrr: %u\n", pg_weight->dwrr);
-
-	cmd = HCLGE_OPC_TM_QS_WEIGHT;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_cmd_send;
-
-	qs_weight = (struct hclge_qs_weight_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "QS qs_id: %u\n",
-		 le16_to_cpu(qs_weight->qs_id));
-	dev_info(&hdev->pdev->dev, "QS dwrr: %u\n", qs_weight->dwrr);
-
-	cmd = HCLGE_OPC_TM_PRI_WEIGHT;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_cmd_send;
-
-	priority_weight = (struct hclge_priority_weight_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "PRI pri_id: %u\n", priority_weight->pri_id);
-	dev_info(&hdev->pdev->dev, "PRI dwrr: %u\n", priority_weight->dwrr);
-
-	cmd = HCLGE_OPC_TM_PRI_C_SHAPPING;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_cmd_send;
-
-	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "PRI_C pri_id: %u\n", shap_cfg_cmd->pri_id);
-	dev_info(&hdev->pdev->dev, "PRI_C pri_shapping: 0x%x\n",
-		 le32_to_cpu(shap_cfg_cmd->pri_shapping_para));
-	dev_info(&hdev->pdev->dev, "PRI_C flag: %#x\n", shap_cfg_cmd->flag);
-	dev_info(&hdev->pdev->dev, "PRI_C pri_rate: %u(Mbps)\n",
-		 le32_to_cpu(shap_cfg_cmd->pri_rate));
-
-	cmd = HCLGE_OPC_TM_PRI_P_SHAPPING;
-	hclge_cmd_setup_basic_desc(&desc, cmd, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		goto err_tm_cmd_send;
-
-	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "PRI_P pri_id: %u\n", shap_cfg_cmd->pri_id);
-	dev_info(&hdev->pdev->dev, "PRI_P pri_shapping: 0x%x\n",
-		 le32_to_cpu(shap_cfg_cmd->pri_shapping_para));
-	dev_info(&hdev->pdev->dev, "PRI_P flag: %#x\n", shap_cfg_cmd->flag);
-	dev_info(&hdev->pdev->dev, "PRI_P pri_rate: %u(Mbps)\n",
-		 le32_to_cpu(shap_cfg_cmd->pri_rate));
-
-	hclge_dbg_dump_tm_pg(hdev);
-
-	return;
-
-err_tm_cmd_send:
-	dev_err(&hdev->pdev->dev, "dump tm fail(0x%x), ret = %d\n",
-		cmd, ret);
+	return 0;
 }
 
 static int hclge_dbg_dump_tm_bp_qset_map(struct hclge_dev *hdev, u8 tc_id,
@@ -1031,8 +927,8 @@ static int hclge_dbg_dump_tm_nodes(struct hclge_dev *hdev, char *buf, int len)
 
 static int hclge_dbg_dump_tm_pri(struct hclge_dev *hdev, char *buf, int len)
 {
-	struct hclge_pri_shaper_para c_shaper_para;
-	struct hclge_pri_shaper_para p_shaper_para;
+	struct hclge_tm_shaper_para c_shaper_para;
+	struct hclge_tm_shaper_para p_shaper_para;
 	u8 pri_num, sch_mode, weight;
 	char *sch_mode_str;
 	int pos = 0;
@@ -1999,8 +1895,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 
 	if (strncmp(cmd_buf, "dump tc", 7) == 0) {
 		hclge_dbg_dump_tc(hdev);
-	} else if (strncmp(cmd_buf, "dump tm", 7) == 0) {
-		hclge_dbg_dump_tm(hdev);
 	} else if (strncmp(cmd_buf, "dump qos pause cfg", 18) == 0) {
 		hclge_dbg_dump_qos_pause_cfg(hdev);
 	} else if (strncmp(cmd_buf, "dump qos pri map", 16) == 0) {
@@ -2039,6 +1933,14 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_TM_MAP,
 		.dbg_dump = hclge_dbg_dump_tm_map,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_TM_PG,
+		.dbg_dump = hclge_dbg_dump_tm_pg,
+	},
+	{
+		.cmd = HNAE3_DBG_CMD_TM_PORT,
+		.dbg_dump = hclge_dbg_dump_tm_port,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_MAC_UC,
 		.dbg_dump = hclge_dbg_dump_mac_uc,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 25b42da471e5..c4956e3d8a41 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -738,6 +738,8 @@ static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
 #define HCLGE_DBG_ID_LEN			16
 #define HCLGE_DBG_ITEM_NAME_LEN			32
 #define HCLGE_DBG_DATA_STR_LEN			32
+#define HCLGE_DBG_TM_INFO_LEN			256
+
 struct hclge_dbg_item {
 	char name[HCLGE_DBG_ITEM_NAME_LEN];
 	u16 interval; /* blank numbers after the item */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index bd99faf19d9d..45870fec0506 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1775,7 +1775,7 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight)
 
 int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id,
 			    enum hclge_opcode_type cmd,
-			    struct hclge_pri_shaper_para *para)
+			    struct hclge_tm_shaper_para *para)
 {
 	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
 	struct hclge_desc desc;
@@ -1867,3 +1867,126 @@ int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id)
 	*tc_id = tc->tc_id & HCLGE_TM_TC_MASK;
 	return 0;
 }
+
+int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id,
+			       u8 *pri_bit_map)
+{
+	struct hclge_pg_to_pri_link_cmd *map;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_TO_PRI_LINK, true);
+	map = (struct hclge_pg_to_pri_link_cmd *)desc.data;
+	map->pg_id = pg_id;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get pg to pri map, ret = %d\n", ret);
+		return ret;
+	}
+
+	*pri_bit_map = map->pri_bit_map;
+	return 0;
+}
+
+int hclge_tm_get_pg_weight(struct hclge_dev *hdev, u8 pg_id, u8 *weight)
+{
+	struct hclge_pg_weight_cmd *pg_weight_cmd;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_WEIGHT, true);
+	pg_weight_cmd = (struct hclge_pg_weight_cmd *)desc.data;
+	pg_weight_cmd->pg_id = pg_id;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get pg weight, ret = %d\n", ret);
+		return ret;
+	}
+
+	*weight = pg_weight_cmd->dwrr;
+	return 0;
+}
+
+int hclge_tm_get_pg_sch_mode(struct hclge_dev *hdev, u8 pg_id, u8 *mode)
+{
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_SCH_MODE_CFG, true);
+	desc.data[0] = cpu_to_le32(pg_id);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get pg sch mode, ret = %d\n", ret);
+		return ret;
+	}
+
+	*mode = (u8)le32_to_cpu(desc.data[1]);
+	return 0;
+}
+
+int hclge_tm_get_pg_shaper(struct hclge_dev *hdev, u8 pg_id,
+			   enum hclge_opcode_type cmd,
+			   struct hclge_tm_shaper_para *para)
+{
+	struct hclge_pg_shapping_cmd *shap_cfg_cmd;
+	struct hclge_desc desc;
+	u32 shapping_para;
+	int ret;
+
+	if (cmd != HCLGE_OPC_TM_PG_C_SHAPPING &&
+	    cmd != HCLGE_OPC_TM_PG_P_SHAPPING)
+		return -EINVAL;
+
+	hclge_cmd_setup_basic_desc(&desc, cmd, true);
+	shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
+	shap_cfg_cmd->pg_id = pg_id;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get pg shaper(%#x), ret = %d\n",
+			cmd, ret);
+		return ret;
+	}
+
+	shapping_para = le32_to_cpu(shap_cfg_cmd->pg_shapping_para);
+	para->ir_b = hclge_tm_get_field(shapping_para, IR_B);
+	para->ir_u = hclge_tm_get_field(shapping_para, IR_U);
+	para->ir_s = hclge_tm_get_field(shapping_para, IR_S);
+	para->bs_b = hclge_tm_get_field(shapping_para, BS_B);
+	para->bs_s = hclge_tm_get_field(shapping_para, BS_S);
+	para->flag = shap_cfg_cmd->flag;
+	para->rate = le32_to_cpu(shap_cfg_cmd->pg_rate);
+	return 0;
+}
+
+int hclge_tm_get_port_shaper(struct hclge_dev *hdev,
+			     struct hclge_tm_shaper_para *para)
+{
+	struct hclge_port_shapping_cmd *port_shap_cfg_cmd;
+	struct hclge_desc desc;
+	u32 shapping_para;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get port shaper, ret = %d\n", ret);
+		return ret;
+	}
+
+	port_shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
+	shapping_para = le32_to_cpu(port_shap_cfg_cmd->port_shapping_para);
+	para->ir_b = hclge_tm_get_field(shapping_para, IR_B);
+	para->ir_u = hclge_tm_get_field(shapping_para, IR_U);
+	para->ir_s = hclge_tm_get_field(shapping_para, IR_S);
+	para->bs_b = hclge_tm_get_field(shapping_para, BS_B);
+	para->bs_s = hclge_tm_get_field(shapping_para, BS_S);
+	para->flag = port_shap_cfg_cmd->flag;
+	para->rate = le32_to_cpu(port_shap_cfg_cmd->port_rate);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index c21e822fefbf..d6f148171353 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -199,14 +199,14 @@ struct hclge_tm_nodes_cmd {
 	__le16 queue_num;
 };
 
-struct hclge_pri_shaper_para {
+struct hclge_tm_shaper_para {
+	u32 rate;
 	u8 ir_b;
 	u8 ir_u;
 	u8 ir_s;
 	u8 bs_b;
 	u8 bs_s;
 	u8 flag;
-	u32 rate;
 };
 
 #define hclge_tm_set_field(dest, string, val) \
@@ -241,7 +241,16 @@ int hclge_tm_get_pri_sch_mode(struct hclge_dev *hdev, u8 pri_id, u8 *mode);
 int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight);
 int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id,
 			    enum hclge_opcode_type cmd,
-			    struct hclge_pri_shaper_para *para);
+			    struct hclge_tm_shaper_para *para);
 int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id);
 int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id);
+int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id,
+			       u8 *pri_bit_map);
+int hclge_tm_get_pg_weight(struct hclge_dev *hdev, u8 pg_id, u8 *weight);
+int hclge_tm_get_pg_sch_mode(struct hclge_dev *hdev, u8 pg_id, u8 *mode);
+int hclge_tm_get_pg_shaper(struct hclge_dev *hdev, u8 pg_id,
+			   enum hclge_opcode_type cmd,
+			   struct hclge_tm_shaper_para *para);
+int hclge_tm_get_port_shaper(struct hclge_dev *hdev,
+			     struct hclge_tm_shaper_para *para);
 #endif

From 0e32038dc8565e8f1c00129307d56fd336267a56 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 20 May 2021 10:21:37 +0800
Subject: [PATCH 0388/3292] net: hns3: refactor dump tc of debugfs

Currently, user gets tc schedule info by implementing debugfs command
"echo dump tc > cmd", this command will dump info in dmesg. It's
unnecessary and heavy.

To optimize it, create a single file "tc_sch_info" and use cat command
to get info. It will return info to userspace, rather than record in
dmesg.

The display style is below:
$ cat tc_sch_info
enabled tc number: 4
weight_offset: 14
TC    MODE  WEIGHT
0     dwrr     25
1     dwrr     25
2     dwrr     25
3     dwrr     25
4     dwrr      0
5     dwrr      0
6     dwrr      0
7     dwrr      0

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 ++-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 55 ++++++++++---------
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e783d167c624..dc6b8e366c04 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -257,6 +257,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TM_MAP,
 	HNAE3_DBG_CMD_TM_PG,
 	HNAE3_DBG_CMD_TM_PORT,
+	HNAE3_DBG_CMD_TC_SCH_INFO,
 	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_TX_BD,
 	HNAE3_DBG_CMD_RX_BD,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 4061f1f36739..1719ff8b3a20 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -85,6 +85,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "tc_sch_info",
+		.cmd = HNAE3_DBG_CMD_TC_SCH_INFO,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 	{
 		.name = "dev_info",
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
@@ -738,7 +745,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	if (!hns3_is_phys_func(h->pdev))
 		return;
 
-	dev_info(&h->pdev->dev, "dump tc\n");
 	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 506f0abfe46d..bd62103d4aa8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -645,44 +645,45 @@ static int hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev,
 	return ret;
 }
 
-static void hclge_print_tc_info(struct hclge_dev *hdev, bool flag, int index)
-{
-	if (flag)
-		dev_info(&hdev->pdev->dev, "tc(%d): no sp mode weight: %u\n",
-			 index, hdev->tm_info.pg_info[0].tc_dwrr[index]);
-	else
-		dev_info(&hdev->pdev->dev, "tc(%d): sp mode\n", index);
-}
-
-static void hclge_dbg_dump_tc(struct hclge_dev *hdev)
+static int hclge_dbg_dump_tc(struct hclge_dev *hdev, char *buf, int len)
 {
 	struct hclge_ets_tc_weight_cmd *ets_weight;
 	struct hclge_desc desc;
-	int i, ret;
+	char *sch_mode_str;
+	int pos = 0;
+	int ret;
+	u8 i;
 
 	if (!hnae3_dev_dcb_supported(hdev)) {
-		dev_info(&hdev->pdev->dev,
-			 "Only DCB-supported dev supports tc\n");
-		return;
+		dev_err(&hdev->pdev->dev,
+			"Only DCB-supported dev supports tc\n");
+		return -EOPNOTSUPP;
 	}
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ETS_TC_WEIGHT, true);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
-		dev_err(&hdev->pdev->dev, "dump tc fail, ret = %d\n", ret);
-		return;
+		dev_err(&hdev->pdev->dev, "failed to get tc weight, ret = %d\n",
+			ret);
+		return ret;
 	}
 
 	ets_weight = (struct hclge_ets_tc_weight_cmd *)desc.data;
 
-	dev_info(&hdev->pdev->dev, "dump tc: %u tc enabled\n",
-		 hdev->tm_info.num_tc);
-	dev_info(&hdev->pdev->dev, "weight_offset: %u\n",
-		 ets_weight->weight_offset);
+	pos += scnprintf(buf + pos, len - pos, "enabled tc number: %u\n",
+			 hdev->tm_info.num_tc);
+	pos += scnprintf(buf + pos, len - pos, "weight_offset: %u\n",
+			 ets_weight->weight_offset);
 
-	for (i = 0; i < HNAE3_MAX_TC; i++)
-		hclge_print_tc_info(hdev, ets_weight->tc_weight[i], i);
+	pos += scnprintf(buf + pos, len - pos, "TC    MODE  WEIGHT\n");
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		sch_mode_str = ets_weight->tc_weight[i] ? "dwrr" : "sp";
+		pos += scnprintf(buf + pos, len - pos, "%u     %4s    %3u\n",
+				 i, sch_mode_str,
+				 hdev->tm_info.pg_info[0].tc_dwrr[i]);
+	}
+
+	return 0;
 }
 
 static const struct hclge_dbg_item tm_pg_items[] = {
@@ -1893,9 +1894,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (strncmp(cmd_buf, "dump tc", 7) == 0) {
-		hclge_dbg_dump_tc(hdev);
-	} else if (strncmp(cmd_buf, "dump qos pause cfg", 18) == 0) {
+	if (strncmp(cmd_buf, "dump qos pause cfg", 18) == 0) {
 		hclge_dbg_dump_qos_pause_cfg(hdev);
 	} else if (strncmp(cmd_buf, "dump qos pri map", 16) == 0) {
 		hclge_dbg_dump_qos_pri_map(hdev);
@@ -1941,6 +1940,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_TM_PORT,
 		.dbg_dump = hclge_dbg_dump_tm_port,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_TC_SCH_INFO,
+		.dbg_dump = hclge_dbg_dump_tc,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_MAC_UC,
 		.dbg_dump = hclge_dbg_dump_mac_uc,

From 6571ec2eda65d4e19244bb3e001ec64a6eef41dc Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 20 May 2021 10:21:38 +0800
Subject: [PATCH 0389/3292] net: hns3: refactor dump qos pause cfg of debugfs

Currently, user gets pause config by implementing debugfs command
"echo dump qos pause cfg > cmd", this command will dump info in dmesg.
It's unnecessary and heavy.

To optimize it, create a single file "qos_pause_cfg" in tm directory
and use cat command to get info. It will return info to userspace,
rather than record in dmesg.

The display style is below:
$ cat qos_pause_cfg
pause_trans_gap: 0x7f
pause_trans_time: 0xffff

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 ++++-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 30 +++++++++++--------
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index dc6b8e366c04..dec3c772c1bf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -258,6 +258,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TM_PG,
 	HNAE3_DBG_CMD_TM_PORT,
 	HNAE3_DBG_CMD_TC_SCH_INFO,
+	HNAE3_DBG_CMD_QOS_PAUSE_CFG,
 	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_TX_BD,
 	HNAE3_DBG_CMD_RX_BD,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 1719ff8b3a20..be2cde9ca052 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -92,6 +92,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "qos_pause_cfg",
+		.cmd = HNAE3_DBG_CMD_QOS_PAUSE_CFG,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 	{
 		.name = "dev_info",
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
@@ -745,7 +752,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	if (!hns3_is_phys_func(h->pdev))
 		return;
 
-	dev_info(&h->pdev->dev, "dump qos pause cfg\n");
 	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index bd62103d4aa8..f7864f867eb6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1028,27 +1028,29 @@ static int hclge_dbg_dump_tm_qset(struct hclge_dev *hdev, char *buf, int len)
 	return 0;
 }
 
-static void hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev)
+static int hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev, char *buf,
+					int len)
 {
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
+	int pos = 0;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_MAC_PARA, true);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
-		dev_err(&hdev->pdev->dev, "dump checksum fail, ret = %d\n",
-			ret);
-		return;
+		dev_err(&hdev->pdev->dev,
+			"failed to dump qos pause, ret = %d\n", ret);
+		return ret;
 	}
 
 	pause_param = (struct hclge_cfg_pause_param_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "dump qos pause cfg\n");
-	dev_info(&hdev->pdev->dev, "pause_trans_gap: 0x%x\n",
-		 pause_param->pause_trans_gap);
-	dev_info(&hdev->pdev->dev, "pause_trans_time: 0x%x\n",
-		 le16_to_cpu(pause_param->pause_trans_time));
+
+	pos += scnprintf(buf + pos, len - pos, "pause_trans_gap: 0x%x\n",
+			 pause_param->pause_trans_gap);
+	pos += scnprintf(buf + pos, len - pos, "pause_trans_time: 0x%x\n",
+			 le16_to_cpu(pause_param->pause_trans_time));
+	return 0;
 }
 
 static void hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev)
@@ -1894,9 +1896,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (strncmp(cmd_buf, "dump qos pause cfg", 18) == 0) {
-		hclge_dbg_dump_qos_pause_cfg(hdev);
-	} else if (strncmp(cmd_buf, "dump qos pri map", 16) == 0) {
+	if (strncmp(cmd_buf, "dump qos pri map", 16) == 0) {
 		hclge_dbg_dump_qos_pri_map(hdev);
 	} else if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
 		hclge_dbg_dump_qos_buf_cfg(hdev);
@@ -1944,6 +1944,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_TC_SCH_INFO,
 		.dbg_dump = hclge_dbg_dump_tc,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_QOS_PAUSE_CFG,
+		.dbg_dump = hclge_dbg_dump_qos_pause_cfg,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_MAC_UC,
 		.dbg_dump = hclge_dbg_dump_mac_uc,

From 28d3bada7d42e324ee8558e6d22c3d768a87af7b Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 20 May 2021 10:21:39 +0800
Subject: [PATCH 0390/3292] net: hns3: refactor dump qos pri map of debugfs

Currently, user gets priority map by implementing debugfs command
"echo dump qos pri map > cmd", this command will dump info in dmesg.
It's unnecessary and heavy.

To optimize it, create a single file "qos_pri_map" in tm directory
and use cat command to get info. It will return info to userspace,
rather than record in dmesg.

The display style is below:
$ cat qos_pri_map
vlan_to_pri: 0
PRI    TC
0       0
1       1
2       2
3       3
4       0
5       1
6       2

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  8 +++-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 45 ++++++++++++-------
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index dec3c772c1bf..16a99434502e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -259,6 +259,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TM_PORT,
 	HNAE3_DBG_CMD_TC_SCH_INFO,
 	HNAE3_DBG_CMD_QOS_PAUSE_CFG,
+	HNAE3_DBG_CMD_QOS_PRI_MAP,
 	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_TX_BD,
 	HNAE3_DBG_CMD_RX_BD,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index be2cde9ca052..e59060b5daad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -99,6 +99,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "qos_pri_map",
+		.cmd = HNAE3_DBG_CMD_QOS_PRI_MAP,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 	{
 		.name = "dev_info",
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
@@ -752,7 +759,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	if (!hns3_is_phys_func(h->pdev))
 		return;
 
-	dev_info(&h->pdev->dev, "dump qos pri map\n");
 	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index f7864f867eb6..85129a5fef73 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1053,32 +1053,41 @@ static int hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev, char *buf,
 	return 0;
 }
 
-static void hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev)
+static int hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev, char *buf,
+				      int len)
 {
+#define HCLGE_DBG_TC_MASK		0x0F
+#define HCLGE_DBG_TC_BIT_WIDTH		4
+
 	struct hclge_qos_pri_map_cmd *pri_map;
 	struct hclge_desc desc;
+	int pos = 0;
+	u8 *pri_tc;
+	u8 tc, i;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, true);
-
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"dump qos pri map fail, ret = %d\n", ret);
-		return;
+			"failed to dump qos pri map, ret = %d\n", ret);
+		return ret;
 	}
 
 	pri_map = (struct hclge_qos_pri_map_cmd *)desc.data;
-	dev_info(&hdev->pdev->dev, "dump qos pri map\n");
-	dev_info(&hdev->pdev->dev, "vlan_to_pri: 0x%x\n", pri_map->vlan_pri);
-	dev_info(&hdev->pdev->dev, "pri_0_to_tc: 0x%x\n", pri_map->pri0_tc);
-	dev_info(&hdev->pdev->dev, "pri_1_to_tc: 0x%x\n", pri_map->pri1_tc);
-	dev_info(&hdev->pdev->dev, "pri_2_to_tc: 0x%x\n", pri_map->pri2_tc);
-	dev_info(&hdev->pdev->dev, "pri_3_to_tc: 0x%x\n", pri_map->pri3_tc);
-	dev_info(&hdev->pdev->dev, "pri_4_to_tc: 0x%x\n", pri_map->pri4_tc);
-	dev_info(&hdev->pdev->dev, "pri_5_to_tc: 0x%x\n", pri_map->pri5_tc);
-	dev_info(&hdev->pdev->dev, "pri_6_to_tc: 0x%x\n", pri_map->pri6_tc);
-	dev_info(&hdev->pdev->dev, "pri_7_to_tc: 0x%x\n", pri_map->pri7_tc);
+
+	pos += scnprintf(buf + pos, len - pos, "vlan_to_pri: 0x%x\n",
+			 pri_map->vlan_pri);
+	pos += scnprintf(buf + pos, len - pos, "PRI  TC\n");
+
+	pri_tc = (u8 *)pri_map;
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		tc = pri_tc[i >> 1] >> ((i & 1) * HCLGE_DBG_TC_BIT_WIDTH);
+		tc &= HCLGE_DBG_TC_MASK;
+		pos += scnprintf(buf + pos, len - pos, "%u     %u\n", i, tc);
+	}
+
+	return 0;
 }
 
 static int hclge_dbg_dump_tx_buf_cfg(struct hclge_dev *hdev)
@@ -1896,9 +1905,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (strncmp(cmd_buf, "dump qos pri map", 16) == 0) {
-		hclge_dbg_dump_qos_pri_map(hdev);
-	} else if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
+	if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
 		hclge_dbg_dump_qos_buf_cfg(hdev);
 	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
@@ -1948,6 +1955,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_QOS_PAUSE_CFG,
 		.dbg_dump = hclge_dbg_dump_qos_pause_cfg,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_QOS_PRI_MAP,
+		.dbg_dump = hclge_dbg_dump_qos_pri_map,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_MAC_UC,
 		.dbg_dump = hclge_dbg_dump_mac_uc,

From 95b195869b7d1eb7a1f837f024db31cd6fc07981 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 20 May 2021 10:21:40 +0800
Subject: [PATCH 0391/3292] net: hns3: refactor dump qos buf cfg of debugfs

Currently, user gets qos buffer config by implementing debugfs command
"echo dump qos buf cfg > cmd", this command will dump info in dmesg.
It's unnecessary and heavy.

To optimize it, create a single file "qos_buf_cfg" in tm directory
and use cat command to get info. It will return info to userspace,
rather than record in dmesg.

The display style is below:
$ cat qos_buf_cfg
tx_packet_buf_tc_0: 0x120
tx_packet_buf_tc_1: 0x120
tx_packet_buf_tc_2: 0x120
tx_packet_buf_tc_3: 0x120
tx_packet_buf_tc_4: 0x0
tx_packet_buf_tc_5: 0x0
tx_packet_buf_tc_6: 0x0
tx_packet_buf_tc_7: 0x0
......

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   8 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 186 ++++++++++--------
 3 files changed, 115 insertions(+), 80 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 16a99434502e..9af1d64e45df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -260,6 +260,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TC_SCH_INFO,
 	HNAE3_DBG_CMD_QOS_PAUSE_CFG,
 	HNAE3_DBG_CMD_QOS_PRI_MAP,
+	HNAE3_DBG_CMD_QOS_BUF_CFG,
 	HNAE3_DBG_CMD_DEV_INFO,
 	HNAE3_DBG_CMD_TX_BD,
 	HNAE3_DBG_CMD_RX_BD,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index e59060b5daad..bd348c1c801b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -106,6 +106,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "qos_buf_cfg",
+		.cmd = HNAE3_DBG_CMD_QOS_BUF_CFG,
+		.dentry = HNS3_DBG_DENTRY_TM,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 	{
 		.name = "dev_info",
 		.cmd = HNAE3_DBG_CMD_DEV_INFO,
@@ -759,7 +766,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	if (!hns3_is_phys_func(h->pdev))
 		return;
 
-	dev_info(&h->pdev->dev, "dump qos buf cfg\n");
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 85129a5fef73..45ccb04100c5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1090,199 +1090,225 @@ static int hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev, char *buf,
 	return 0;
 }
 
-static int hclge_dbg_dump_tx_buf_cfg(struct hclge_dev *hdev)
+static int hclge_dbg_dump_tx_buf_cfg(struct hclge_dev *hdev, char *buf, int len)
 {
 	struct hclge_tx_buff_alloc_cmd *tx_buf_cmd;
 	struct hclge_desc desc;
+	int pos = 0;
 	int i, ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump tx buf, ret = %d\n", ret);
 		return ret;
+	}
 
-	dev_info(&hdev->pdev->dev, "dump qos buf cfg\n");
 	tx_buf_cmd = (struct hclge_tx_buff_alloc_cmd *)desc.data;
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
-		dev_info(&hdev->pdev->dev, "tx_packet_buf_tc_%d: 0x%x\n", i,
-			 le16_to_cpu(tx_buf_cmd->tx_pkt_buff[i]));
+		pos += scnprintf(buf + pos, len - pos,
+				 "tx_packet_buf_tc_%d: 0x%x\n", i,
+				 le16_to_cpu(tx_buf_cmd->tx_pkt_buff[i]));
 
-	return 0;
+	return pos;
 }
 
-static int hclge_dbg_dump_rx_priv_buf_cfg(struct hclge_dev *hdev)
+static int hclge_dbg_dump_rx_priv_buf_cfg(struct hclge_dev *hdev, char *buf,
+					  int len)
 {
 	struct hclge_rx_priv_buff_cmd *rx_buf_cmd;
 	struct hclge_desc desc;
+	int pos = 0;
 	int i, ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump rx priv buf, ret = %d\n", ret);
 		return ret;
+	}
+
+	pos += scnprintf(buf + pos, len - pos, "\n");
 
-	dev_info(&hdev->pdev->dev, "\n");
 	rx_buf_cmd = (struct hclge_rx_priv_buff_cmd *)desc.data;
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
-		dev_info(&hdev->pdev->dev, "rx_packet_buf_tc_%d: 0x%x\n", i,
-			 le16_to_cpu(rx_buf_cmd->buf_num[i]));
+		pos += scnprintf(buf + pos, len - pos,
+				 "rx_packet_buf_tc_%d: 0x%x\n", i,
+				 le16_to_cpu(rx_buf_cmd->buf_num[i]));
 
-	dev_info(&hdev->pdev->dev, "rx_share_buf: 0x%x\n",
-		 le16_to_cpu(rx_buf_cmd->shared_buf));
+	pos += scnprintf(buf + pos, len - pos, "rx_share_buf: 0x%x\n",
+			 le16_to_cpu(rx_buf_cmd->shared_buf));
 
-	return 0;
+	return pos;
 }
 
-static int hclge_dbg_dump_rx_common_wl_cfg(struct hclge_dev *hdev)
+static int hclge_dbg_dump_rx_common_wl_cfg(struct hclge_dev *hdev, char *buf,
+					   int len)
 {
 	struct hclge_rx_com_wl *rx_com_wl;
 	struct hclge_desc desc;
+	int pos = 0;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_COM_WL_ALLOC, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump rx common wl, ret = %d\n", ret);
 		return ret;
+	}
 
 	rx_com_wl = (struct hclge_rx_com_wl *)desc.data;
-	dev_info(&hdev->pdev->dev, "\n");
-	dev_info(&hdev->pdev->dev, "rx_com_wl: high: 0x%x, low: 0x%x\n",
-		 le16_to_cpu(rx_com_wl->com_wl.high),
-		 le16_to_cpu(rx_com_wl->com_wl.low));
+	pos += scnprintf(buf + pos, len - pos, "\n");
+	pos += scnprintf(buf + pos, len - pos,
+			 "rx_com_wl: high: 0x%x, low: 0x%x\n",
+			 le16_to_cpu(rx_com_wl->com_wl.high),
+			 le16_to_cpu(rx_com_wl->com_wl.low));
 
-	return 0;
+	return pos;
 }
 
-static int hclge_dbg_dump_rx_global_pkt_cnt(struct hclge_dev *hdev)
+static int hclge_dbg_dump_rx_global_pkt_cnt(struct hclge_dev *hdev, char *buf,
+					    int len)
 {
 	struct hclge_rx_com_wl *rx_packet_cnt;
 	struct hclge_desc desc;
+	int pos = 0;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_GBL_PKT_CNT, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump rx global pkt cnt, ret = %d\n", ret);
 		return ret;
+	}
 
 	rx_packet_cnt = (struct hclge_rx_com_wl *)desc.data;
-	dev_info(&hdev->pdev->dev,
-		 "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n",
-		 le16_to_cpu(rx_packet_cnt->com_wl.high),
-		 le16_to_cpu(rx_packet_cnt->com_wl.low));
+	pos += scnprintf(buf + pos, len - pos,
+			 "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n",
+			 le16_to_cpu(rx_packet_cnt->com_wl.high),
+			 le16_to_cpu(rx_packet_cnt->com_wl.low));
 
-	return 0;
+	return pos;
 }
 
-static int hclge_dbg_dump_rx_priv_wl_buf_cfg(struct hclge_dev *hdev)
+static int hclge_dbg_dump_rx_priv_wl_buf_cfg(struct hclge_dev *hdev, char *buf,
+					     int len)
 {
 	struct hclge_rx_priv_wl_buf *rx_priv_wl;
 	struct hclge_desc desc[2];
+	int pos = 0;
 	int i, ret;
 
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_RX_PRIV_WL_ALLOC, true);
 	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_RX_PRIV_WL_ALLOC, true);
 	ret = hclge_cmd_send(&hdev->hw, desc, 2);
-	if (ret)
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump rx priv wl buf, ret = %d\n", ret);
 		return ret;
+	}
 
 	rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[0].data;
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
-		dev_info(&hdev->pdev->dev,
+		pos += scnprintf(buf + pos, len - pos,
 			 "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n", i,
 			 le16_to_cpu(rx_priv_wl->tc_wl[i].high),
 			 le16_to_cpu(rx_priv_wl->tc_wl[i].low));
 
 	rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[1].data;
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
-		dev_info(&hdev->pdev->dev,
+		pos += scnprintf(buf + pos, len - pos,
 			 "rx_priv_wl_tc_%d: high: 0x%x, low: 0x%x\n",
 			 i + HCLGE_TC_NUM_ONE_DESC,
 			 le16_to_cpu(rx_priv_wl->tc_wl[i].high),
 			 le16_to_cpu(rx_priv_wl->tc_wl[i].low));
 
-	return 0;
+	return pos;
 }
 
-static int hclge_dbg_dump_rx_common_threshold_cfg(struct hclge_dev *hdev)
+static int hclge_dbg_dump_rx_common_threshold_cfg(struct hclge_dev *hdev,
+						  char *buf, int len)
 {
 	struct hclge_rx_com_thrd *rx_com_thrd;
 	struct hclge_desc desc[2];
+	int pos = 0;
 	int i, ret;
 
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_RX_COM_THRD_ALLOC, true);
 	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_RX_COM_THRD_ALLOC, true);
 	ret = hclge_cmd_send(&hdev->hw, desc, 2);
-	if (ret)
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump rx common threshold, ret = %d\n", ret);
 		return ret;
+	}
 
-	dev_info(&hdev->pdev->dev, "\n");
+	pos += scnprintf(buf + pos, len - pos, "\n");
 	rx_com_thrd = (struct hclge_rx_com_thrd *)desc[0].data;
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
-		dev_info(&hdev->pdev->dev,
+		pos += scnprintf(buf + pos, len - pos,
 			 "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n", i,
 			 le16_to_cpu(rx_com_thrd->com_thrd[i].high),
 			 le16_to_cpu(rx_com_thrd->com_thrd[i].low));
 
 	rx_com_thrd = (struct hclge_rx_com_thrd *)desc[1].data;
 	for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++)
-		dev_info(&hdev->pdev->dev,
+		pos += scnprintf(buf + pos, len - pos,
 			 "rx_com_thrd_tc_%d: high: 0x%x, low: 0x%x\n",
 			 i + HCLGE_TC_NUM_ONE_DESC,
 			 le16_to_cpu(rx_com_thrd->com_thrd[i].high),
 			 le16_to_cpu(rx_com_thrd->com_thrd[i].low));
 
-	return 0;
+	return pos;
 }
 
-static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev)
+static int hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev, char *buf,
+				      int len)
 {
-	enum hclge_opcode_type cmd;
+	int pos = 0;
 	int ret;
 
-	cmd = HCLGE_OPC_TX_BUFF_ALLOC;
-	ret = hclge_dbg_dump_tx_buf_cfg(hdev);
-	if (ret)
-		goto err_qos_cmd_send;
+	ret = hclge_dbg_dump_tx_buf_cfg(hdev, buf + pos, len - pos);
+	if (ret < 0)
+		return ret;
+	pos += ret;
 
-	cmd = HCLGE_OPC_RX_PRIV_BUFF_ALLOC;
-	ret = hclge_dbg_dump_rx_priv_buf_cfg(hdev);
-	if (ret)
-		goto err_qos_cmd_send;
+	ret = hclge_dbg_dump_rx_priv_buf_cfg(hdev, buf + pos, len - pos);
+	if (ret < 0)
+		return ret;
+	pos += ret;
 
-	cmd = HCLGE_OPC_RX_COM_WL_ALLOC;
-	ret = hclge_dbg_dump_rx_common_wl_cfg(hdev);
-	if (ret)
-		goto err_qos_cmd_send;
+	ret = hclge_dbg_dump_rx_common_wl_cfg(hdev, buf + pos, len - pos);
+	if (ret < 0)
+		return ret;
+	pos += ret;
 
-	cmd = HCLGE_OPC_RX_GBL_PKT_CNT;
-	ret = hclge_dbg_dump_rx_global_pkt_cnt(hdev);
-	if (ret)
-		goto err_qos_cmd_send;
+	ret = hclge_dbg_dump_rx_global_pkt_cnt(hdev, buf + pos, len - pos);
+	if (ret < 0)
+		return ret;
+	pos += ret;
 
-	dev_info(&hdev->pdev->dev, "\n");
-	if (!hnae3_dev_dcb_supported(hdev)) {
-		dev_info(&hdev->pdev->dev,
-			 "Only DCB-supported dev supports rx priv wl\n");
-		return;
-	}
+	pos += scnprintf(buf + pos, len - pos, "\n");
+	if (!hnae3_dev_dcb_supported(hdev))
+		return 0;
 
-	cmd = HCLGE_OPC_RX_PRIV_WL_ALLOC;
-	ret = hclge_dbg_dump_rx_priv_wl_buf_cfg(hdev);
-	if (ret)
-		goto err_qos_cmd_send;
+	ret = hclge_dbg_dump_rx_priv_wl_buf_cfg(hdev, buf + pos, len - pos);
+	if (ret < 0)
+		return ret;
+	pos += ret;
 
-	cmd = HCLGE_OPC_RX_COM_THRD_ALLOC;
-	ret = hclge_dbg_dump_rx_common_threshold_cfg(hdev);
-	if (ret)
-		goto err_qos_cmd_send;
+	ret = hclge_dbg_dump_rx_common_threshold_cfg(hdev, buf + pos,
+						     len - pos);
+	if (ret < 0)
+		return ret;
 
-	return;
-
-err_qos_cmd_send:
-	dev_err(&hdev->pdev->dev,
-		"dump qos buf cfg fail(0x%x), ret = %d\n", cmd, ret);
+	return 0;
 }
 
 static int hclge_dbg_dump_mng_table(struct hclge_dev *hdev, char *buf, int len)
@@ -1905,9 +1931,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (strncmp(cmd_buf, "dump qos buf cfg", 16) == 0) {
-		hclge_dbg_dump_qos_buf_cfg(hdev);
-	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
+	if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
 	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
 		hclge_dbg_dump_mac_tnl_status(hdev);
@@ -1959,6 +1983,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_QOS_PRI_MAP,
 		.dbg_dump = hclge_dbg_dump_qos_pri_map,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_QOS_BUF_CFG,
+		.dbg_dump = hclge_dbg_dump_qos_buf_cfg,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_MAC_UC,
 		.dbg_dump = hclge_dbg_dump_mac_uc,

From 484e1ed1b25a6cd02ab25c871e670760f4b627b6 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 20 May 2021 10:21:41 +0800
Subject: [PATCH 0392/3292] net: hns3: refactor dump qs shaper of debugfs

Currently, user gets qset shaper parameters by implementing debugfs
command "echo dump qs shaper > cmd", this command will dump info in
dmesg. It's unnecessary and heavy.

As there is "tm_qset" file in tm directory for dump qset info, to
optimize these command, merge qset shaper parameters to tm_qset
file and use cat command to get them.

The display style is below:
$ cat tm_qset
ID    MAP_PRI  LINK_VLD  MODE  DWRR  IR_B  IR_U  IR_S  BS_B  BS_S  FLAG
0000     0        1      dwrr  100   150     7     0     5    20     0
0001     0        0        sp    0   150     7     0     5    20     0

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   1 -
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 129 ++++++------------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c |  30 ++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h |   2 +
 4 files changed, 76 insertions(+), 86 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index bd348c1c801b..599b405e754d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -767,7 +767,6 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 		return;
 
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
-	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
 }
 
 static void
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 45ccb04100c5..2b7acf620739 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -991,19 +991,42 @@ static int hclge_dbg_dump_tm_pri(struct hclge_dev *hdev, char *buf, int len)
 	return 0;
 }
 
+static const struct hclge_dbg_item tm_qset_items[] = {
+	{ "ID", 4 },
+	{ "MAP_PRI", 2 },
+	{ "LINK_VLD", 2 },
+	{ "MODE", 2 },
+	{ "DWRR", 2 },
+	{ "IR_B", 2 },
+	{ "IR_U", 2 },
+	{ "IR_S", 2 },
+	{ "BS_B", 2 },
+	{ "BS_S", 2 },
+	{ "FLAG", 2 },
+	{ "RATE(Mbps)", 0 }
+};
+
 static int hclge_dbg_dump_tm_qset(struct hclge_dev *hdev, char *buf, int len)
 {
+	char data_str[ARRAY_SIZE(tm_qset_items)][HCLGE_DBG_DATA_STR_LEN];
+	char *result[ARRAY_SIZE(tm_qset_items)], *sch_mode_str;
 	u8 priority, link_vld, sch_mode, weight;
-	char *sch_mode_str;
+	struct hclge_tm_shaper_para shaper_para;
+	char content[HCLGE_DBG_TM_INFO_LEN];
+	u16 qset_num, i;
 	int ret, pos;
-	u16 qset_num;
-	u16 i;
+	u8 j;
 
 	ret = hclge_tm_get_qset_num(hdev, &qset_num);
 	if (ret)
 		return ret;
 
-	pos = scnprintf(buf, len, "ID    MAP_PRI  LINK_VLD  MODE  DWRR\n");
+	for (i = 0; i < ARRAY_SIZE(tm_qset_items); i++)
+		result[i] = &data_str[i][0];
+
+	hclge_dbg_fill_content(content, sizeof(content), tm_qset_items,
+			       NULL, ARRAY_SIZE(tm_qset_items));
+	pos = scnprintf(buf, len, "%s", content);
 
 	for (i = 0; i < qset_num; i++) {
 		ret = hclge_tm_get_qset_map_pri(hdev, i, &priority, &link_vld);
@@ -1018,11 +1041,25 @@ static int hclge_dbg_dump_tm_qset(struct hclge_dev *hdev, char *buf, int len)
 		if (ret)
 			return ret;
 
+		ret = hclge_tm_get_qset_shaper(hdev, i, &shaper_para);
+		if (ret)
+			return ret;
+
 		sch_mode_str = sch_mode & HCLGE_TM_TX_SCHD_DWRR_MSK ? "dwrr" :
 			       "sp";
-		pos += scnprintf(buf + pos, len - pos,
-				 "%04u  %4u        %1u      %4s  %3u\n",
-				 i, priority, link_vld, sch_mode_str, weight);
+
+		j = 0;
+		sprintf(result[j++], "%04u", i);
+		sprintf(result[j++], "%4u", priority);
+		sprintf(result[j++], "%4u", link_vld);
+		sprintf(result[j++], "%4s", sch_mode_str);
+		sprintf(result[j++], "%3u", weight);
+		hclge_dbg_fill_shaper_content(&shaper_para, result, &j);
+
+		hclge_dbg_fill_content(content, sizeof(content), tm_qset_items,
+				       (const char **)result,
+				       ARRAY_SIZE(tm_qset_items));
+		pos += scnprintf(buf + pos, len - pos, "%s", content);
 	}
 
 	return 0;
@@ -1787,81 +1824,6 @@ static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev)
 	}
 }
 
-static void hclge_dbg_dump_qs_shaper_single(struct hclge_dev *hdev, u16 qsid)
-{
-	struct hclge_qs_shapping_cmd *shap_cfg_cmd;
-	u8 ir_u, ir_b, ir_s, bs_b, bs_s;
-	struct hclge_desc desc;
-	u32 shapping_para;
-	u32 rate;
-	int ret;
-
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, true);
-
-	shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data;
-	shap_cfg_cmd->qs_id = cpu_to_le16(qsid);
-
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"qs%u failed to get tx_rate, ret=%d\n",
-			qsid, ret);
-		return;
-	}
-
-	shapping_para = le32_to_cpu(shap_cfg_cmd->qs_shapping_para);
-	ir_b = hclge_tm_get_field(shapping_para, IR_B);
-	ir_u = hclge_tm_get_field(shapping_para, IR_U);
-	ir_s = hclge_tm_get_field(shapping_para, IR_S);
-	bs_b = hclge_tm_get_field(shapping_para, BS_B);
-	bs_s = hclge_tm_get_field(shapping_para, BS_S);
-	rate = le32_to_cpu(shap_cfg_cmd->qs_rate);
-
-	dev_info(&hdev->pdev->dev,
-		 "qs%u ir_b:%u, ir_u:%u, ir_s:%u, bs_b:%u, bs_s:%u, flag:%#x, rate:%u(Mbps)\n",
-		 qsid, ir_b, ir_u, ir_s, bs_b, bs_s, shap_cfg_cmd->flag, rate);
-}
-
-static void hclge_dbg_dump_qs_shaper_all(struct hclge_dev *hdev)
-{
-	struct hnae3_knic_private_info *kinfo;
-	struct hclge_vport *vport;
-	int vport_id, i;
-
-	for (vport_id = 0; vport_id <= pci_num_vf(hdev->pdev); vport_id++) {
-		vport = &hdev->vport[vport_id];
-		kinfo = &vport->nic.kinfo;
-
-		dev_info(&hdev->pdev->dev, "qs cfg of vport%d:\n", vport_id);
-
-		for (i = 0; i < kinfo->tc_info.num_tc; i++) {
-			u16 qsid = vport->qs_offset + i;
-
-			hclge_dbg_dump_qs_shaper_single(hdev, qsid);
-		}
-	}
-}
-
-static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev,
-				     const char *cmd_buf)
-{
-	u16 qsid;
-	int ret;
-
-	ret = kstrtou16(cmd_buf, 0, &qsid);
-	if (ret) {
-		hclge_dbg_dump_qs_shaper_all(hdev);
-		return;
-	}
-
-	if (qsid >= hdev->ae_dev->dev_specs.max_qset_num) {
-		dev_err(&hdev->pdev->dev, "qsid(%u) out of range[0-%u]\n",
-			qsid, hdev->ae_dev->dev_specs.max_qset_num - 1);
-		return;
-	}
-
-	hclge_dbg_dump_qs_shaper_single(hdev, qsid);
-}
 
 static const struct hclge_dbg_item mac_list_items[] = {
 	{ "FUNC_ID", 2 },
@@ -1935,9 +1897,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_serv_info(hdev);
 	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
 		hclge_dbg_dump_mac_tnl_status(hdev);
-	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
-		hclge_dbg_dump_qs_shaper(hdev,
-					 &cmd_buf[sizeof("dump qs shaper")]);
 	} else {
 		dev_info(&hdev->pdev->dev, "unknown command\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 45870fec0506..78d5bf1ea561 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1733,6 +1733,36 @@ int hclge_tm_get_qset_weight(struct hclge_dev *hdev, u16 qset_id, u8 *weight)
 	return 0;
 }
 
+int hclge_tm_get_qset_shaper(struct hclge_dev *hdev, u16 qset_id,
+			     struct hclge_tm_shaper_para *para)
+{
+	struct hclge_qs_shapping_cmd *shap_cfg_cmd;
+	struct hclge_desc desc;
+	u32 shapping_para;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QCN_SHAPPING_CFG, true);
+	shap_cfg_cmd = (struct hclge_qs_shapping_cmd *)desc.data;
+	shap_cfg_cmd->qs_id = cpu_to_le16(qset_id);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get qset %u shaper, ret = %d\n", qset_id,
+			ret);
+		return ret;
+	}
+
+	shapping_para = le32_to_cpu(shap_cfg_cmd->qs_shapping_para);
+	para->ir_b = hclge_tm_get_field(shapping_para, IR_B);
+	para->ir_u = hclge_tm_get_field(shapping_para, IR_U);
+	para->ir_s = hclge_tm_get_field(shapping_para, IR_S);
+	para->bs_b = hclge_tm_get_field(shapping_para, BS_B);
+	para->bs_s = hclge_tm_get_field(shapping_para, BS_S);
+	para->flag = shap_cfg_cmd->flag;
+	para->rate = le32_to_cpu(shap_cfg_cmd->qs_rate);
+	return 0;
+}
+
 int hclge_tm_get_pri_sch_mode(struct hclge_dev *hdev, u8 pri_id, u8 *mode)
 {
 	struct hclge_pri_sch_mode_cfg_cmd *pri_sch_mode;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index d6f148171353..2ee9b795f71d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -237,6 +237,8 @@ int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority,
 			      u8 *link_vld);
 int hclge_tm_get_qset_sch_mode(struct hclge_dev *hdev, u16 qset_id, u8 *mode);
 int hclge_tm_get_qset_weight(struct hclge_dev *hdev, u16 qset_id, u8 *weight);
+int hclge_tm_get_qset_shaper(struct hclge_dev *hdev, u16 qset_id,
+			     struct hclge_tm_shaper_para *para);
 int hclge_tm_get_pri_sch_mode(struct hclge_dev *hdev, u8 pri_id, u8 *mode);
 int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight);
 int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id,

From 7b07ab06e6b00b4421a4dfd732e98b359e0bad91 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Thu, 20 May 2021 10:21:42 +0800
Subject: [PATCH 0393/3292] net: hns3: refactor dump mac tnl status of debugfs

Currently, the debugfs command for dump mac tnl status is
implemented by "echo xxxx > cmd", and record the information
in dmesg. It's unnecessary and heavy. To improve it, create
a single file "mac_tnl_status" for it, and query it by command
"cat mac_tnl_status", return the result to userspace, rather
than record in dmesg.

The display style is below:
$ cat mac_tnl_status
Recently generated mac tnl interruption:
[0111204.175437] status = 0x30
[0154120.329912] status = 0x30

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 12 ++++++----
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 23 +++++++++++++------
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 9af1d64e45df..ed06431c290c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -287,6 +287,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_RX_QUEUE_INFO,
 	HNAE3_DBG_CMD_TX_QUEUE_INFO,
 	HNAE3_DBG_CMD_FD_TCAM,
+	HNAE3_DBG_CMD_MAC_TNL_STATUS,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 599b405e754d..04c19a0c0e39 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -190,6 +190,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN_128KB,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "mac_tnl_status",
+		.cmd = HNAE3_DBG_CMD_MAC_TNL_STATUS,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 	{
 		.name = "bios_common",
 		.cmd = HNAE3_DBG_CMD_REG_BIOS_COMMON,
@@ -762,11 +769,6 @@ static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
 static void hns3_dbg_help(struct hnae3_handle *h)
 {
 	dev_info(&h->pdev->dev, "available commands\n");
-
-	if (!hns3_is_phys_func(h->pdev))
-		return;
-
-	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 }
 
 static void
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 2b7acf620739..fe7ceab85459 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1807,21 +1807,28 @@ static int hclge_dbg_dump_loopback(struct hclge_dev *hdev, char *buf, int len)
 /* hclge_dbg_dump_mac_tnl_status: print message about mac tnl interrupt
  * @hdev: pointer to struct hclge_dev
  */
-static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev)
+static int
+hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev, char *buf, int len)
 {
 #define HCLGE_BILLION_NANO_SECONDS 1000000000
 
 	struct hclge_mac_tnl_stats stats;
 	unsigned long rem_nsec;
+	int pos = 0;
 
-	dev_info(&hdev->pdev->dev, "Recently generated mac tnl interruption:\n");
+	pos += scnprintf(buf + pos, len - pos,
+			 "Recently generated mac tnl interruption:\n");
 
 	while (kfifo_get(&hdev->mac_tnl_log, &stats)) {
 		rem_nsec = do_div(stats.time, HCLGE_BILLION_NANO_SECONDS);
-		dev_info(&hdev->pdev->dev, "[%07lu.%03lu] status = 0x%x\n",
-			 (unsigned long)stats.time, rem_nsec / 1000,
-			 stats.status);
+
+		pos += scnprintf(buf + pos, len - pos,
+				 "[%07lu.%03lu] status = 0x%x\n",
+				 (unsigned long)stats.time, rem_nsec / 1000,
+				 stats.status);
 	}
+
+	return 0;
 }
 
 
@@ -1895,8 +1902,6 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 
 	if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
 		hclge_dbg_dump_serv_info(hdev);
-	} else if (strncmp(cmd_buf, "dump mac tnl status", 19) == 0) {
-		hclge_dbg_dump_mac_tnl_status(hdev);
 	} else {
 		dev_info(&hdev->pdev->dev, "unknown command\n");
 		return -EINVAL;
@@ -2026,6 +2031,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_FD_TCAM,
 		.dbg_dump = hclge_dbg_dump_fd_tcam,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_MAC_TNL_STATUS,
+		.dbg_dump = hclge_dbg_dump_mac_tnl_status,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,

From 058c3be95235a12953d6533ef1486dc3d5879688 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 10:21:43 +0800
Subject: [PATCH 0394/3292] net: hns3: refactor dump serv info of debugfs

Currently, the debugfs command for serv info is implemented by
"echo xxxx > cmd", and record the inforamtion in dmesg. It's
unnecessary and heavy. To improve it, create a single file
"serv_info" for it, and query it by command "cat serv_info",
return the result to userspace, rather than record in dmesg.

The display style is below:
$ cat service_task_info
local_clock: [  114.203321]
delta: 784(ms)
last_service_task_processed: 4294918512(jiffies)
last_service_task_cnt: 4

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  7 ++++
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 39 ++++++++++++-------
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |  2 +
 4 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ed06431c290c..09a065856cad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -288,6 +288,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_TX_QUEUE_INFO,
 	HNAE3_DBG_CMD_FD_TCAM,
 	HNAE3_DBG_CMD_MAC_TNL_STATUS,
+	HNAE3_DBG_CMD_SERV_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 04c19a0c0e39..04102d73e89c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -302,6 +302,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "service_task_info",
+		.cmd = HNAE3_DBG_CMD_SERV_INFO,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index fe7ceab85459..e7a043a08685 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1590,12 +1590,26 @@ int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len)
 	return 0;
 }
 
-static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev)
+static int hclge_dbg_dump_serv_info(struct hclge_dev *hdev, char *buf, int len)
 {
-	dev_info(&hdev->pdev->dev, "last_serv_processed: %lu\n",
-		 hdev->last_serv_processed);
-	dev_info(&hdev->pdev->dev, "last_serv_cnt: %lu\n",
-		 hdev->serv_processed_cnt);
+	unsigned long rem_nsec;
+	int pos = 0;
+	u64 lc;
+
+	lc = local_clock();
+	rem_nsec = do_div(lc, HCLGE_BILLION_NANO_SECONDS);
+
+	pos += scnprintf(buf + pos, len - pos, "local_clock: [%5lu.%06lu]\n",
+			 (unsigned long)lc, rem_nsec / 1000);
+	pos += scnprintf(buf + pos, len - pos, "delta: %u(ms)\n",
+			 jiffies_to_msecs(jiffies - hdev->last_serv_processed));
+	pos += scnprintf(buf + pos, len - pos,
+			 "last_service_task_processed: %lu(jiffies)\n",
+			 hdev->last_serv_processed);
+	pos += scnprintf(buf + pos, len - pos, "last_service_task_cnt: %lu\n",
+			 hdev->serv_processed_cnt);
+
+	return 0;
 }
 
 static int hclge_dbg_dump_interrupt(struct hclge_dev *hdev, char *buf, int len)
@@ -1810,8 +1824,6 @@ static int hclge_dbg_dump_loopback(struct hclge_dev *hdev, char *buf, int len)
 static int
 hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev, char *buf, int len)
 {
-#define HCLGE_BILLION_NANO_SECONDS 1000000000
-
 	struct hclge_mac_tnl_stats stats;
 	unsigned long rem_nsec;
 	int pos = 0;
@@ -1900,14 +1912,9 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
-		hclge_dbg_dump_serv_info(hdev);
-	} else {
-		dev_info(&hdev->pdev->dev, "unknown command\n");
-		return -EINVAL;
-	}
+	dev_info(&hdev->pdev->dev, "unknown command\n");
 
-	return 0;
+	return -EINVAL;
 }
 
 static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
@@ -2035,6 +2042,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_MAC_TNL_STATUS,
 		.dbg_dump = hclge_dbg_dump_mac_tnl_status,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_SERV_INFO,
+		.dbg_dump = hclge_dbg_dump_serv_info,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index c4956e3d8a41..642752e65a7c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -740,6 +740,8 @@ static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
 #define HCLGE_DBG_DATA_STR_LEN			32
 #define HCLGE_DBG_TM_INFO_LEN			256
 
+#define HCLGE_BILLION_NANO_SECONDS	1000000000
+
 struct hclge_dbg_item {
 	char name[HCLGE_DBG_ITEM_NAME_LEN];
 	u16 interval; /* blank numbers after the item */

From b4689aaf18633ff1b9ce37b09e226a7964ce9751 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 10:21:44 +0800
Subject: [PATCH 0395/3292] net: hns3: remove the useless debugfs file node cmd

Currently, all debugfs commands have been reconstructed, and the
debugfs file node cmd is useless. So remove this debugfs file node.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 -
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 106 ------------------
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     |  10 --
 .../hisilicon/hns3/hns3pf/hclge_main.c        |   1 -
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   1 -
 5 files changed, 119 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 09a065856cad..57fa7fc97c69 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -668,7 +668,6 @@ struct hnae3_ae_ops {
 	void (*enable_fd)(struct hnae3_handle *handle, bool enable);
 	int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id,
 			      u16 flow_id, struct flow_keys *fkeys);
-	int (*dbg_run_cmd)(struct hnae3_handle *handle, const char *cmd_buf);
 	int (*dbg_read_cmd)(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
 			    char *buf, int len);
 	pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 04102d73e89c..57ba5a16ad73 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -773,11 +773,6 @@ static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len)
 	return 0;
 }
 
-static void hns3_dbg_help(struct hnae3_handle *h)
-{
-	dev_info(&h->pdev->dev, "available commands\n");
-}
-
 static void
 hns3_dbg_dev_caps(struct hnae3_handle *h, char *buf, int len, int *pos)
 {
@@ -852,97 +847,6 @@ static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
 	return 0;
 }
 
-static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer,
-				 size_t count, loff_t *ppos)
-{
-	int uncopy_bytes;
-	char *buf;
-	int len;
-
-	if (*ppos != 0)
-		return 0;
-
-	if (count < HNS3_DBG_READ_LEN)
-		return -ENOSPC;
-
-	buf = kzalloc(HNS3_DBG_READ_LEN, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	len = scnprintf(buf, HNS3_DBG_READ_LEN, "%s\n",
-			"Please echo help to cmd to get help information");
-	uncopy_bytes = copy_to_user(buffer, buf, len);
-
-	kfree(buf);
-
-	if (uncopy_bytes)
-		return -EFAULT;
-
-	return (*ppos = len);
-}
-
-static int hns3_dbg_check_cmd(struct hnae3_handle *handle, char *cmd_buf)
-{
-	int ret = 0;
-
-	if (strncmp(cmd_buf, "help", 4) == 0)
-		hns3_dbg_help(handle);
-	else if (handle->ae_algo->ops->dbg_run_cmd)
-		ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf);
-	else
-		ret = -EOPNOTSUPP;
-
-	return ret;
-}
-
-static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer,
-				  size_t count, loff_t *ppos)
-{
-	struct hnae3_handle *handle = filp->private_data;
-	struct hns3_nic_priv *priv  = handle->priv;
-	char *cmd_buf, *cmd_buf_tmp;
-	int uncopied_bytes;
-	int ret;
-
-	if (*ppos != 0)
-		return 0;
-
-	/* Judge if the instance is being reset. */
-	if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
-	    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
-		return 0;
-
-	if (count > HNS3_DBG_WRITE_LEN)
-		return -ENOSPC;
-
-	cmd_buf = kzalloc(count + 1, GFP_KERNEL);
-	if (!cmd_buf)
-		return count;
-
-	uncopied_bytes = copy_from_user(cmd_buf, buffer, count);
-	if (uncopied_bytes) {
-		kfree(cmd_buf);
-		return -EFAULT;
-	}
-
-	cmd_buf[count] = '\0';
-
-	cmd_buf_tmp = strchr(cmd_buf, '\n');
-	if (cmd_buf_tmp) {
-		*cmd_buf_tmp = '\0';
-		count = cmd_buf_tmp - cmd_buf + 1;
-	}
-
-	ret = hns3_dbg_check_cmd(handle, cmd_buf);
-	if (ret)
-		hns3_dbg_help(handle);
-
-	kfree(cmd_buf);
-	cmd_buf = NULL;
-
-	return count;
-}
-
 static int hns3_dbg_get_cmd_index(struct hnae3_handle *handle,
 				  const unsigned char *name, u32 *index)
 {
@@ -1071,13 +975,6 @@ out:
 	return ret;
 }
 
-static const struct file_operations hns3_dbg_cmd_fops = {
-	.owner = THIS_MODULE,
-	.open  = simple_open,
-	.read  = hns3_dbg_cmd_read,
-	.write = hns3_dbg_cmd_write,
-};
-
 static const struct file_operations hns3_dbg_fops = {
 	.owner = THIS_MODULE,
 	.open  = simple_open,
@@ -1140,9 +1037,6 @@ int hns3_dbg_init(struct hnae3_handle *handle)
 				debugfs_create_dir(name, hns3_dbgfs_root);
 	handle->hnae3_dbgfs = hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry;
 
-	debugfs_create_file("cmd", 0600, handle->hnae3_dbgfs, handle,
-			    &hns3_dbg_cmd_fops);
-
 	for (i = 0; i < HNS3_DBG_DENTRY_COMMON; i++)
 		hns3_dbg_dentry[i].dentry =
 			debugfs_create_dir(hns3_dbg_dentry[i].name,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index e7a043a08685..dd9eb6e6f5a7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1907,16 +1907,6 @@ static int hclge_dbg_dump_mac_mc(struct hclge_dev *hdev, char *buf, int len)
 	return 0;
 }
 
-int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
-{
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-
-	dev_info(&hdev->pdev->dev, "unknown command\n");
-
-	return -EINVAL;
-}
-
 static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 	{
 		.cmd = HNAE3_DBG_CMD_TM_NODES,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d4d3f0b247af..3882f829fc49 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -12610,7 +12610,6 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_fd_all_rules = hclge_get_all_rules,
 	.enable_fd = hclge_enable_fd,
 	.add_arfs_entry = hclge_add_fd_entry_by_arfs,
-	.dbg_run_cmd = hclge_dbg_run_cmd,
 	.dbg_read_cmd = hclge_dbg_read_cmd,
 	.handle_hw_ras_error = hclge_handle_hw_ras_error,
 	.get_hw_reset_stat = hclge_get_hw_reset_stat,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 8bf451ef0b05..4bdb0243a97a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1062,7 +1062,6 @@ int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
 int hclge_vport_start(struct hclge_vport *vport);
 void hclge_vport_stop(struct hclge_vport *vport);
 int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu);
-int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf);
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
 		       char *buf, int len);
 u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id);

From c169a93c8176e40f8956ca365ce466537101cd51 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:46 +0800
Subject: [PATCH 0396/3292] net: wan: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

	$ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Xie He <xie.he.0141@gmail.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lmc/lmc.h | 2 +-
 drivers/net/wan/wanxl.c   | 4 ++--
 drivers/net/wan/z85230.c  | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wan/lmc/lmc.h b/drivers/net/wan/lmc/lmc.h
index 38961793adad..3bd541c868d5 100644
--- a/drivers/net/wan/lmc/lmc.h
+++ b/drivers/net/wan/lmc/lmc.h
@@ -9,7 +9,7 @@
  */
 int lmc_probe(struct net_device * dev);
 unsigned lmc_mii_readreg(lmc_softc_t * const sc, unsigned
-      			  devaddr, unsigned regno);
+			  devaddr, unsigned regno);
 void lmc_mii_writereg(lmc_softc_t * const sc, unsigned devaddr,
 			       unsigned regno, unsigned data);
 void lmc_led_on(lmc_softc_t * const, u32);
diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index a83133388de9..f393684f203a 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -639,7 +639,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	card->plx = ioremap(plx_phy, 0x70);
 	if (!card->plx) {
 		pr_err("ioremap() failed\n");
- 		wanxl_pci_remove_one(pdev);
+		wanxl_pci_remove_one(pdev);
 		return -EFAULT;
 	}
 
@@ -707,7 +707,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	mem = ioremap(mem_phy, PDM_OFFSET + sizeof(firmware));
 	if (!mem) {
 		pr_err("ioremap() failed\n");
- 		wanxl_pci_remove_one(pdev);
+		wanxl_pci_remove_one(pdev);
 		return -EFAULT;
 	}
 
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 138930c66ad2..002b8c99ab5b 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1080,7 +1080,7 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	z8530_rx_done(c);
 	z8530_rx_done(c);
 
- 	/*
+	/*
 	 *	Load the DMA interfaces up
 	 */
 
@@ -1092,13 +1092,13 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	c->dma_ready=1;
 	c->dma_tx = 1;
 
- 	/*
+	/*
 	 *	Enable DMA control mode
 	 */
 
- 	/*
+	/*
 	 *	TX DMA via DIR/REQ
- 	 */
+	 */
 	c->regs[R14]|= DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);     
 	

From 094fefd663adb651833989bb3cef7d8fd56abfb8 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:47 +0800
Subject: [PATCH 0397/3292] net: usb: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/mcs7830.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index 9f9352a4522f..2469bdcb1a04 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -601,7 +601,7 @@ MODULE_DEVICE_TABLE(usb, products);
 
 static int mcs7830_reset_resume (struct usb_interface *intf)
 {
- 	/* YES, this function is successful enough that ethtool -d
+	/* YES, this function is successful enough that ethtool -d
            does show same output pre-/post-suspend */
 
 	struct usbnet		*dev = usb_get_intfdata(intf);

From 1d314fc1a157f3a39af68518c27a9e98b125053d Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:48 +0800
Subject: [PATCH 0398/3292] net: slip: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/slip/slhc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index f78ceba42e57..ba93bab948e0 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -325,7 +325,7 @@ found:
 	 * Found it -- move to the front on the connection list.
 	 */
 	if(lcs == ocs) {
- 		/* found at most recently used */
+		/* found at most recently used */
 	} else if (cs == ocs) {
 		/* found at least recently used */
 		comp->xmit_oldest = lcs->cs_this;

From 63b63138f656d2ab9e1e692b6d1e4112406741a0 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:49 +0800
Subject: [PATCH 0399/3292] net: ppp: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Tom Parkin <tparkin@katalix.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/bsd_comp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ppp/bsd_comp.c b/drivers/net/ppp/bsd_comp.c
index 61fedb23d3cf..db0dc36d12e3 100644
--- a/drivers/net/ppp/bsd_comp.c
+++ b/drivers/net/ppp/bsd_comp.c
@@ -436,7 +436,7 @@ static void *bsd_alloc (unsigned char *options, int opt_len, int decomp)
  * Initialize the data information for the compression code
  */
     db->totlen     = sizeof (struct bsd_db)   +
-      		    (sizeof (struct bsd_dict) * hsize);
+		    (sizeof (struct bsd_dict) * hsize);
 
     db->hsize      = hsize;
     db->hshift     = hshift;

From d1542f85dfc29f4a012e98730d8b465ea05cd461 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:50 +0800
Subject: [PATCH 0400/3292] net: hamradio: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/baycom_epp.c |  4 ++--
 drivers/net/hamradio/hdlcdrv.c    |  2 +-
 drivers/net/hamradio/mkiss.c      |  6 +++---
 drivers/net/hamradio/scc.c        | 20 ++++++++++----------
 drivers/net/hamradio/yam.c        |  2 +-
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index e4e4981ac1d2..4435a1195194 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -231,7 +231,7 @@ struct baycom_state {
 #if 0
 static inline void append_crc_ccitt(unsigned char *buffer, int len)
 {
- 	unsigned int crc = 0xffff;
+	unsigned int crc = 0xffff;
 
 	for (;len>0;len--)
 		crc = (crc >> 8) ^ crc_ccitt_table[(crc ^ *buffer++) & 0xff];
@@ -390,7 +390,7 @@ static void encode_hdlc(struct baycom_state *bc)
 		for (j = 0; j < 8; j++)
 			if (unlikely(!(notbitstream & (0x1f0 << j)))) {
 				bitstream &= ~(0x100 << j);
- 				bitbuf = (bitbuf & (((2 << j) << numbit) - 1)) |
+				bitbuf = (bitbuf & (((2 << j) << numbit) - 1)) |
 					((bitbuf & ~(((2 << j) << numbit) - 1)) << 1);
 				numbit++;
 				notbitstream = ~bitstream;
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index 9e0058154ac3..cbaf1cdde7cb 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -74,7 +74,7 @@
 
 static inline void append_crc_ccitt(unsigned char *buffer, int len)
 {
- 	unsigned int crc = crc_ccitt(0xffff, buffer, len) ^ 0xffff;
+	unsigned int crc = crc_ccitt(0xffff, buffer, len) ^ 0xffff;
 	buffer += len;
 	*buffer++ = crc;
 	*buffer++ = crc >> 8;
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 65154224d5b8..9933c87c1327 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -276,7 +276,7 @@ static void ax_bump(struct mkiss *ax)
 			 */
 			*ax->rbuff &= ~0x20;
 		}
- 	}
+	}
 
 	count = ax->rcount;
 
@@ -501,7 +501,7 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 		default:
 			count = kiss_esc(p, ax->xbuff, len);
 		}
-  	}
+	}
 	spin_unlock_bh(&ax->buflock);
 
 	set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
@@ -815,7 +815,7 @@ static int mkiss_ioctl(struct tty_struct *tty, struct file *file,
 	dev = ax->dev;
 
 	switch (cmd) {
- 	case SIOCGIFNAME:
+	case SIOCGIFNAME:
 		err = copy_to_user((void __user *) arg, ax->dev->name,
 		                   strlen(ax->dev->name) + 1) ? -EFAULT : 0;
 		break;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 4690c6a59054..3f1edd0526a4 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1192,18 +1192,18 @@ static void t_tail(struct timer_list *t)
 	unsigned long flags;
 	
 	spin_lock_irqsave(&scc->lock, flags); 
- 	del_timer(&scc->tx_wdog);	
- 	scc_key_trx(scc, TX_OFF);
+	del_timer(&scc->tx_wdog);
+	scc_key_trx(scc, TX_OFF);
 	spin_unlock_irqrestore(&scc->lock, flags);
 
- 	if (scc->stat.tx_state == TXS_TIMEOUT)		/* we had a timeout? */
- 	{
- 		scc->stat.tx_state = TXS_WAIT;
+	if (scc->stat.tx_state == TXS_TIMEOUT)		/* we had a timeout? */
+	{
+		scc->stat.tx_state = TXS_WAIT;
 		scc_start_tx_timer(scc, t_dwait, scc->kiss.mintime*100);
- 		return;
- 	}
- 	
- 	scc->stat.tx_state = TXS_IDLE;
+		return;
+	}
+
+	scc->stat.tx_state = TXS_IDLE;
 	netif_wake_queue(scc->dev);
 }
 
@@ -1580,7 +1580,7 @@ static int scc_net_open(struct net_device *dev)
 {
 	struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
 
- 	if (!scc->init)
+	if (!scc->init)
 		return -EINVAL;
 
 	scc->tx_buff = NULL;
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 5ab53e9942f3..d4911041596c 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -668,7 +668,7 @@ static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
 			}
 			yp->tx_len = skb->len - 1;	/* strip KISS byte */
 			if (yp->tx_len >= YAM_MAX_FRAME || yp->tx_len < 2) {
-        			dev_kfree_skb_any(skb);
+				dev_kfree_skb_any(skb);
 				break;
 			}
 			skb_copy_from_linear_data_offset(skb, 1,

From a597111a3ce330f28fca9cc3806cf7a0b3d5e4c2 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:51 +0800
Subject: [PATCH 0401/3292] net: fddi: skfp: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/skfp/ess.c        | 6 +++---
 drivers/net/fddi/skfp/h/supern_2.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/fddi/skfp/ess.c b/drivers/net/fddi/skfp/ess.c
index 35110c0c00a0..41107338f0c0 100644
--- a/drivers/net/fddi/skfp/ess.c
+++ b/drivers/net/fddi/skfp/ess.c
@@ -379,17 +379,17 @@ static int process_bw_alloc(struct s_smc *smc, long int payload, long int overhe
 	 * if the payload is greater than zero.
 	 * For the SBAPayload and the SBAOverhead we have the following
 	 * unite quations
- 	 *		      _		  _
+	 *		      _		  _
 	 *		     |	     bytes |
 	 *	SBAPayload = | 8000 ------ |
 	 *		     |		s  |
 	 *		      -		  -
- 	 *		       _       _
+	 *		       _       _
 	 *		      |	 bytes	|
 	 *	SBAOverhead = | ------	|
 	 *		      |	 T-NEG	|
 	 *		       -       -
- 	 *
+	 *
 	 * T-NEG is described by the equation:
 	 *
 	 *		     (-) fddiMACT-NEG
diff --git a/drivers/net/fddi/skfp/h/supern_2.h b/drivers/net/fddi/skfp/h/supern_2.h
index 78ae8ea4007c..0bbbd411d000 100644
--- a/drivers/net/fddi/skfp/h/supern_2.h
+++ b/drivers/net/fddi/skfp/h/supern_2.h
@@ -1025,7 +1025,7 @@ struct tx_queue {
 #define	PLC_QELM_A_BIST	0x5b6b		/* BIST signature of QELM Rev. A */
 
 /*
- 	FDDI board recources	
+	FDDI board recources
  */
 
 /*

From 20a4fc3bc2849068c4119c9ce4d57e8f18a1b329 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:52 +0800
Subject: [PATCH 0402/3292] net: appletalk: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/cops.c | 30 +++++++++++++++---------------
 drivers/net/appletalk/ltpc.c |  6 +++---
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index ba8e70a8e312..992e92fb4e8e 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -609,12 +609,12 @@ static int cops_nodeid (struct net_device *dev, int nodeid)
 
 	if(lp->board == DAYNA)
         {
-        	/* Empty any pending adapter responses. */
+		/* Empty any pending adapter responses. */
                 while((inb(ioaddr+DAYNA_CARD_STATUS)&DAYNA_TX_READY)==0)
                 {
 			outb(0, ioaddr+COPS_CLEAR_INT);	/* Clear interrupts. */
-        		if((inb(ioaddr+DAYNA_CARD_STATUS)&0x03)==DAYNA_RX_REQUEST)
-                		cops_rx(dev);	/* Kick any packets waiting. */
+			if((inb(ioaddr+DAYNA_CARD_STATUS)&0x03)==DAYNA_RX_REQUEST)
+				cops_rx(dev);	/* Kick any packets waiting. */
 			schedule();
                 }
 
@@ -630,13 +630,13 @@ static int cops_nodeid (struct net_device *dev, int nodeid)
                 while(inb(ioaddr+TANG_CARD_STATUS)&TANG_RX_READY)
                 {
 			outb(0, ioaddr+COPS_CLEAR_INT);	/* Clear interrupt. */
-                	cops_rx(dev);          	/* Kick out packets waiting. */
+			cops_rx(dev);          	/* Kick out packets waiting. */
 			schedule();
                 }
 
 		/* Not sure what Tangent does if nodeid picked is used. */
                 if(nodeid == 0)	         		/* Seed. */
-                	nodeid = jiffies&0xFF;		/* Get a random try */
+			nodeid = jiffies&0xFF;		/* Get a random try */
                 outb(2, ioaddr);        		/* Command length LSB */
                 outb(0, ioaddr);       			/* Command length MSB */
                 outb(LAP_INIT, ioaddr); 		/* Send LAP_INIT byte */
@@ -651,13 +651,13 @@ static int cops_nodeid (struct net_device *dev, int nodeid)
 
 		if(lp->board == DAYNA)
 		{
-                	if((inb(ioaddr+DAYNA_CARD_STATUS)&0x03)==DAYNA_RX_REQUEST)
-                		cops_rx(dev);	/* Grab the nodeid put in lp->node_acquire. */
+			if((inb(ioaddr+DAYNA_CARD_STATUS)&0x03)==DAYNA_RX_REQUEST)
+				cops_rx(dev);	/* Grab the nodeid put in lp->node_acquire. */
 		}
 		if(lp->board == TANGENT)
 		{	
 			if(inb(ioaddr+TANG_CARD_STATUS)&TANG_RX_READY)
-                                cops_rx(dev);   /* Grab the nodeid put in lp->node_acquire. */
+				cops_rx(dev);   /* Grab the nodeid put in lp->node_acquire. */
 		}
 		schedule();
 	}
@@ -719,16 +719,16 @@ static irqreturn_t cops_interrupt(int irq, void *dev_id)
 	{
 		do {
 			outb(0, ioaddr + COPS_CLEAR_INT);
-                       	status=inb(ioaddr+DAYNA_CARD_STATUS);
-                       	if((status&0x03)==DAYNA_RX_REQUEST)
-                       	        cops_rx(dev);
-                	netif_wake_queue(dev);
+			status=inb(ioaddr+DAYNA_CARD_STATUS);
+			if((status&0x03)==DAYNA_RX_REQUEST)
+				cops_rx(dev);
+			netif_wake_queue(dev);
 		} while(++boguscount < 20);
 	}
 	else
 	{
 		do {
-                       	status=inb(ioaddr+TANG_CARD_STATUS);
+			status=inb(ioaddr+TANG_CARD_STATUS);
 			if(status & TANG_RX_READY)
 				cops_rx(dev);
 			if(status & TANG_TX_READY)
@@ -855,7 +855,7 @@ static void cops_timeout(struct net_device *dev, unsigned int txqueue)
         if(lp->board==TANGENT)
         {
 		if((inb(ioaddr+TANG_CARD_STATUS)&TANG_TX_READY)==0)
-               		printk(KERN_WARNING "%s: No TX complete interrupt.\n", dev->name);
+			printk(KERN_WARNING "%s: No TX complete interrupt.\n", dev->name);
 	}
 	printk(KERN_WARNING "%s: Transmit timed out.\n", dev->name);
 	cops_jumpstart(dev);	/* Restart the card. */
@@ -897,7 +897,7 @@ static netdev_tx_t cops_send_packet(struct sk_buff *skb,
 	outb(LAP_WRITE, ioaddr);
 
 	if(lp->board == DAYNA)	/* Check the transmit buffer again. */
-        	while((inb(ioaddr+DAYNA_CARD_STATUS)&DAYNA_TX_READY)==0);
+		while((inb(ioaddr+DAYNA_CARD_STATUS)&DAYNA_TX_READY)==0);
 
 	outsb(ioaddr, skb->data, skb->len);	/* Send out the data. */
 
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index c6f73aa3700c..f0e715a93852 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -935,10 +935,10 @@ static netdev_tx_t ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 static int __init ltpc_probe_dma(int base, int dma)
 {
 	int want = (dma == 3) ? 2 : (dma == 1) ? 1 : 3;
-  	unsigned long timeout;
-  	unsigned long f;
+	unsigned long timeout;
+	unsigned long f;
   
-  	if (want & 1) {
+	if (want & 1) {
 		if (request_dma(1,"ltpc")) {
 			want &= ~1;
 		} else {

From cf9207d77aef758efe884e3edb6bf38baacf24ec Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:53 +0800
Subject: [PATCH 0403/3292] ifb: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index ab7022582154..e9258a9f3702 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -16,10 +16,10 @@
 	by Patrick McHardy and then maintained by Andre Correa.
 
 	You need the tc action  mirror or redirect to feed this device
-       	packets.
+	packets.
 
 
-  	Authors:	Jamal Hadi Salim (2005)
+	Authors:	Jamal Hadi Salim (2005)
 
 */
 

From 9e5914cc9571fca4de2978aaa14e7d8a262a7ca4 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Thu, 20 May 2021 11:47:54 +0800
Subject: [PATCH 0404/3292] mii: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running
the following commard:

    $ find . -name '*.[ch]' | xargs sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mii.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index e71ebb933266..779c3a96dba7 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -81,7 +81,7 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 	bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
 	bmsr = mii->mdio_read(dev, mii->phy_id, MII_BMSR);
 	if (mii->supports_gmii) {
- 		ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
+		ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
 		stat1000 = mii->mdio_read(dev, mii->phy_id, MII_STAT1000);
 	}
 

From 86a5ad0a4608c96055117ae031ceb6ed387f026c Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 14:18:32 +0800
Subject: [PATCH 0405/3292] net: bonding: add some required blank lines

Add some blank lines after declarations as required.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c    | 3 +++
 drivers/net/bonding/bond_main.c   | 2 ++
 drivers/net/bonding/bond_procfs.c | 1 +
 drivers/net/bonding/bond_sysfs.c  | 7 +++++++
 4 files changed, 13 insertions(+)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 3455f2cc13f2..c63e0d1faa63 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -104,6 +104,7 @@ static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
 		index = SLAVE_TLB_INFO(slave).head;
 		while (index != TLB_NULL_INDEX) {
 			u32 next_index = tx_hash_table[index].next;
+
 			tlb_init_table_entry(&tx_hash_table[index], save_load);
 			index = next_index;
 		}
@@ -628,6 +629,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb,
 
 		if (!client_info->assigned) {
 			u32 prev_tbl_head = bond_info->rx_hashtbl_used_head;
+
 			bond_info->rx_hashtbl_used_head = hash_index;
 			client_info->used_next = prev_tbl_head;
 			if (prev_tbl_head != RLB_NULL_INDEX) {
@@ -830,6 +832,7 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
 	while (index != RLB_NULL_INDEX) {
 		struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
 		u32 next_index = entry->src_next;
+
 		if (entry->ip_src == arp->ip_src &&
 		    !ether_addr_equal_64bits(arp->mac_src, entry->mac_src))
 				rlb_delete_table_entry(bond, index);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 20bbda1b36e1..e786a9c42bfd 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2272,6 +2272,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev,
 static void bond_info_query(struct net_device *bond_dev, struct ifbond *info)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+
 	bond_fill_ifbond(bond, info);
 }
 
@@ -4849,6 +4850,7 @@ static const struct device_type bond_type = {
 static void bond_destructor(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+
 	if (bond->wq)
 		destroy_workqueue(bond->wq);
 }
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 56d34be5e797..0fb1da361bb1 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -112,6 +112,7 @@ static void bond_info_show_master(struct seq_file *seq)
 	/* ARP information */
 	if (bond->params.arp_interval > 0) {
 		int printed = 0;
+
 		seq_printf(seq, "ARP Polling Interval (ms): %d\n",
 				bond->params.arp_interval);
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 2d615a93685e..5f9e9a240226 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -385,6 +385,7 @@ static ssize_t bonding_show_num_peer_notif(struct device *d,
 					   char *buf)
 {
 	struct bonding *bond = to_bond(d);
+
 	return sprintf(buf, "%d\n", bond->params.num_peer_notif);
 }
 static DEVICE_ATTR(num_grat_arp, 0644,
@@ -496,6 +497,7 @@ static ssize_t bonding_show_ad_aggregator(struct device *d,
 
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
+
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
 				?  0 : ad_info.aggregator_id);
@@ -516,6 +518,7 @@ static ssize_t bonding_show_ad_num_ports(struct device *d,
 
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
+
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
 				?  0 : ad_info.ports);
@@ -536,6 +539,7 @@ static ssize_t bonding_show_ad_actor_key(struct device *d,
 
 	if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) {
 		struct ad_info ad_info;
+
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
 				?  0 : ad_info.actor_key);
@@ -556,6 +560,7 @@ static ssize_t bonding_show_ad_partner_key(struct device *d,
 
 	if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) {
 		struct ad_info ad_info;
+
 		count = sprintf(buf, "%d\n",
 				bond_3ad_get_active_agg_info(bond, &ad_info)
 				?  0 : ad_info.partner_key);
@@ -576,6 +581,7 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,
 
 	if (BOND_MODE(bond) == BOND_MODE_8023AD && capable(CAP_NET_ADMIN)) {
 		struct ad_info ad_info;
+
 		if (!bond_3ad_get_active_agg_info(bond, &ad_info))
 			count = sprintf(buf, "%pM\n", ad_info.partner_system);
 	}
@@ -660,6 +666,7 @@ static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
 					   char *buf)
 {
 	struct bonding *bond = to_bond(d);
+
 	return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);
 }
 static DEVICE_ATTR(tlb_dynamic_lb, 0644,

From 8ce390bb985939a3bbc9a3616fa4fd046b54333b Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 14:18:33 +0800
Subject: [PATCH 0406/3292] net: bonding: fix code indent for conditional
 statements

Fix incorrect code indent for conditional statements.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c63e0d1faa63..269dad176df4 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -835,7 +835,7 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
 
 		if (entry->ip_src == arp->ip_src &&
 		    !ether_addr_equal_64bits(arp->mac_src, entry->mac_src))
-				rlb_delete_table_entry(bond, index);
+			rlb_delete_table_entry(bond, index);
 		index = next_index;
 	}
 	spin_unlock_bh(&bond->mode_lock);

From 52333512701b56464a42f79b82570b37e7b91164 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 14:18:34 +0800
Subject: [PATCH 0407/3292] net: bonding: remove unnecessary braces

Braces {} are not necessary for single statement blocks,
so remove these braces {}.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_debugfs.c | 3 +--
 drivers/net/bonding/bond_main.c    | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index f3f86ef68ae0..4f9b4a18c74c 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -88,9 +88,8 @@ void bond_create_debugfs(void)
 {
 	bonding_debug_root = debugfs_create_dir("bonding", NULL);
 
-	if (!bonding_debug_root) {
+	if (!bonding_debug_root)
 		pr_warn("Warning: Cannot create bonding directory in debugfs\n");
-	}
 }
 
 void bond_destroy_debugfs(void)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e786a9c42bfd..dafeaef3cbd3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1013,9 +1013,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 			if (bond_is_lb(bond))
 				bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
 		} else {
-			if (bond_uses_primary(bond)) {
+			if (bond_uses_primary(bond))
 				slave_info(bond->dev, new_active->dev, "making interface the new active one\n");
-			}
 		}
 	}
 

From 97a1111d9ca69efef5a248ae5b89cc264b0b04f4 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 20 May 2021 14:18:35 +0800
Subject: [PATCH 0408/3292] net: bonding: use tabs instead of space for code
 indent

Code indent should use tabs where possible, so
use tabs instead of space for code indent.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index f0f9138e967f..0561ece1ba45 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -598,7 +598,7 @@ static int bond_fill_info(struct sk_buff *skb,
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, IFLA_BOND_RESEND_IGMP,
-		        bond->params.resend_igmp))
+			bond->params.resend_igmp))
 		goto nla_put_failure;
 
 	if (nla_put_u8(skb, IFLA_BOND_NUM_PEER_NOTIF,

From 4b99b74982774b0ba3cc3e2e8e0cbcd7f920dc78 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Thu, 20 May 2021 09:05:50 +0800
Subject: [PATCH 0409/3292] NFC: st21nfca: remove unnecessary variable and
 labels

assign vlue (EIO/EPROTO) to variable r, and goto exit label,
but just return r follow exit label, so we delete exit label,
and just replace with return sentence.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/st21nfca/dep.c | 59 ++++++++++++--------------------------
 1 file changed, 18 insertions(+), 41 deletions(-)

diff --git a/drivers/nfc/st21nfca/dep.c b/drivers/nfc/st21nfca/dep.c
index 8874d605b14f..1ec651e31064 100644
--- a/drivers/nfc/st21nfca/dep.c
+++ b/drivers/nfc/st21nfca/dep.c
@@ -196,38 +196,29 @@ static int st21nfca_tm_recv_atr_req(struct nfc_hci_dev *hdev,
 
 	skb_trim(skb, skb->len - 1);
 
-	if (!skb->len) {
-		r = -EIO;
-		goto exit;
-	}
+	if (!skb->len)
+		return -EIO;
 
-	if (skb->len < ST21NFCA_ATR_REQ_MIN_SIZE) {
-		r = -EPROTO;
-		goto exit;
-	}
+	if (skb->len < ST21NFCA_ATR_REQ_MIN_SIZE)
+		return -EPROTO;
 
 	atr_req = (struct st21nfca_atr_req *)skb->data;
 
-	if (atr_req->length < sizeof(struct st21nfca_atr_req)) {
-		r = -EPROTO;
-		goto exit;
-	}
+	if (atr_req->length < sizeof(struct st21nfca_atr_req))
+		return -EPROTO;
 
 	r = st21nfca_tm_send_atr_res(hdev, atr_req);
 	if (r)
-		goto exit;
+		return r;
 
 	gb_len = skb->len - sizeof(struct st21nfca_atr_req);
 
 	r = nfc_tm_activated(hdev->ndev, NFC_PROTO_NFC_DEP_MASK,
 			      NFC_COMM_PASSIVE, atr_req->gbi, gb_len);
 	if (r)
-		goto exit;
+		return r;
 
-	r = 0;
-
-exit:
-	return r;
+	return 0;
 }
 
 static int st21nfca_tm_send_psl_res(struct nfc_hci_dev *hdev,
@@ -280,25 +271,18 @@ static int st21nfca_tm_recv_psl_req(struct nfc_hci_dev *hdev,
 				    struct sk_buff *skb)
 {
 	struct st21nfca_psl_req *psl_req;
-	int r;
 
 	skb_trim(skb, skb->len - 1);
 
-	if (!skb->len) {
-		r = -EIO;
-		goto exit;
-	}
+	if (!skb->len)
+		return -EIO;
 
 	psl_req = (struct st21nfca_psl_req *)skb->data;
 
-	if (skb->len < sizeof(struct st21nfca_psl_req)) {
-		r = -EIO;
-		goto exit;
-	}
+	if (skb->len < sizeof(struct st21nfca_psl_req))
+		return -EIO;
 
-	r = st21nfca_tm_send_psl_res(hdev, psl_req);
-exit:
-	return r;
+	return st21nfca_tm_send_psl_res(hdev, psl_req);
 }
 
 int st21nfca_tm_send_dep_res(struct nfc_hci_dev *hdev, struct sk_buff *skb)
@@ -324,7 +308,6 @@ static int st21nfca_tm_recv_dep_req(struct nfc_hci_dev *hdev,
 {
 	struct st21nfca_dep_req_res *dep_req;
 	u8 size;
-	int r;
 	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
 
 	skb_trim(skb, skb->len - 1);
@@ -332,20 +315,16 @@ static int st21nfca_tm_recv_dep_req(struct nfc_hci_dev *hdev,
 	size = 4;
 
 	dep_req = (struct st21nfca_dep_req_res *)skb->data;
-	if (skb->len < size) {
-		r = -EIO;
-		goto exit;
-	}
+	if (skb->len < size)
+		return -EIO;
 
 	if (ST21NFCA_NFC_DEP_DID_BIT_SET(dep_req->pfb))
 		size++;
 	if (ST21NFCA_NFC_DEP_NAD_BIT_SET(dep_req->pfb))
 		size++;
 
-	if (skb->len < size) {
-		r = -EIO;
-		goto exit;
-	}
+	if (skb->len < size)
+		return -EIO;
 
 	/* Receiving DEP_REQ - Decoding */
 	switch (ST21NFCA_NFC_DEP_PFB_TYPE(dep_req->pfb)) {
@@ -364,8 +343,6 @@ static int st21nfca_tm_recv_dep_req(struct nfc_hci_dev *hdev,
 	skb_pull(skb, size);
 
 	return nfc_tm_data_received(hdev->ndev, skb);
-exit:
-	return r;
 }
 
 static int st21nfca_tm_event_send_data(struct nfc_hci_dev *hdev,

From 07b5dc1d515a9a9b3973e0bdc716a78adf6db8f8 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Thu, 20 May 2021 08:50:34 +0200
Subject: [PATCH 0410/3292] ibmvnic: remove default label from to_string switch

This way the compiler warns when a new value is added to the enum but
not to the string translation like:

drivers/net/ethernet/ibm/ibmvnic.c: In function 'adapter_state_to_string':
drivers/net/ethernet/ibm/ibmvnic.c:832:2: warning: enumeration value 'VNIC_FOOBAR' not handled in switch [-Wswitch]
  switch (state) {
  ^~~~~~
drivers/net/ethernet/ibm/ibmvnic.c: In function 'reset_reason_to_string':
drivers/net/ethernet/ibm/ibmvnic.c:1935:2: warning: enumeration value 'VNIC_RESET_FOOBAR' not handled in switch [-Wswitch]
  switch (reason) {
  ^~~~~~

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Acked-by: Lijun Pan <lijunp213@gmail.com>
Link: https://lore.kernel.org/netdev/CAOhMmr701LecfuNM+EozqbiTxFvDiXjFdY2aYeKJYaXq9kqVDg@mail.gmail.com/
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5788bb956d73..4d439413f6d9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -846,9 +846,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
 		return "REMOVING";
 	case VNIC_REMOVED:
 		return "REMOVED";
-	default:
-		return "UNKNOWN";
 	}
+	return "UNKNOWN";
 }
 
 static int ibmvnic_login(struct net_device *netdev)
@@ -1946,9 +1945,8 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
 		return "TIMEOUT";
 	case VNIC_RESET_CHANGE_PARAM:
 		return "CHANGE_PARAM";
-	default:
-		return "UNKNOWN";
 	}
+	return "UNKNOWN";
 }
 
 /*

From 25173dd4093a24e977e2af9cd5654c205bf13547 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Thu, 20 May 2021 18:49:54 +0800
Subject: [PATCH 0411/3292] selftests: net: devlink_port_split.py: skip the
 test if no devlink device

When there is no devlink device, the following command will return:
  $ devlink -j dev show
  {dev:{}}

This will cause IndexError when trying to access the first element
in dev of this json dataset. Use the kselftest framework skip code
to skip this test in this case.

Example output with this change:
  # selftests: net: devlink_port_split.py
  # no devlink device was found, test skipped
  ok 7 selftests: net: devlink_port_split.py # SKIP

Link: https://bugs.launchpad.net/bugs/1928889
Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/devlink_port_split.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 834066d465fc..2b5d6ff87373 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -18,6 +18,8 @@ import sys
 #
 
 
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
 Port = collections.namedtuple('Port', 'bus_info name')
 
 
@@ -239,7 +241,11 @@ def main(cmdline=None):
         assert stderr == ""
 
         devs = json.loads(stdout)['dev']
-        dev = list(devs.keys())[0]
+        if devs:
+            dev = list(devs.keys())[0]
+        else:
+            print("no devlink device was found, test skipped")
+            sys.exit(KSFT_SKIP)
 
     cmd = "devlink dev show %s" % dev
     stdout, stderr = run_command(cmd)

From 48afdaea04eb691df3244b6a361f1a0c4540ff45 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 20 May 2021 21:36:45 +0800
Subject: [PATCH 0412/3292] net: atm: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/atm_sysfs.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index aa1b57161f3b..0fdbdfd19474 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -11,7 +11,7 @@
 
 #define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev)
 
-static ssize_t show_type(struct device *cdev,
+static ssize_t type_show(struct device *cdev,
 			 struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -19,7 +19,7 @@ static ssize_t show_type(struct device *cdev,
 	return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type);
 }
 
-static ssize_t show_address(struct device *cdev,
+static ssize_t address_show(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -27,7 +27,7 @@ static ssize_t show_address(struct device *cdev,
 	return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi);
 }
 
-static ssize_t show_atmaddress(struct device *cdev,
+static ssize_t atmaddress_show(struct device *cdev,
 			       struct device_attribute *attr, char *buf)
 {
 	unsigned long flags;
@@ -50,7 +50,7 @@ static ssize_t show_atmaddress(struct device *cdev,
 	return count;
 }
 
-static ssize_t show_atmindex(struct device *cdev,
+static ssize_t atmindex_show(struct device *cdev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -58,7 +58,7 @@ static ssize_t show_atmindex(struct device *cdev,
 	return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number);
 }
 
-static ssize_t show_carrier(struct device *cdev,
+static ssize_t carrier_show(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -67,7 +67,7 @@ static ssize_t show_carrier(struct device *cdev,
 			 adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
 }
 
-static ssize_t show_link_rate(struct device *cdev,
+static ssize_t link_rate_show(struct device *cdev,
 			      struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
@@ -90,12 +90,12 @@ static ssize_t show_link_rate(struct device *cdev,
 	return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
 }
 
-static DEVICE_ATTR(address, 0444, show_address, NULL);
-static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL);
-static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL);
-static DEVICE_ATTR(carrier, 0444, show_carrier, NULL);
-static DEVICE_ATTR(type, 0444, show_type, NULL);
-static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL);
+static DEVICE_ATTR_RO(address);
+static DEVICE_ATTR_RO(atmaddress);
+static DEVICE_ATTR_RO(atmindex);
+static DEVICE_ATTR_RO(carrier);
+static DEVICE_ATTR_RO(type);
+static DEVICE_ATTR_RO(link_rate);
 
 static struct device_attribute *atm_attrs[] = {
 	&dev_attr_atmaddress,

From 7567d603b3f1c5ee799e311d0e48932bfc449028 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 20 May 2021 21:41:16 +0800
Subject: [PATCH 0413/3292] net: usb: hso: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 3ef4b2841402..fa30e78c6e49 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -457,9 +457,8 @@ static const struct usb_device_id hso_ids[] = {
 MODULE_DEVICE_TABLE(usb, hso_ids);
 
 /* Sysfs attribute */
-static ssize_t hso_sysfs_show_porttype(struct device *dev,
-				       struct device_attribute *attr,
-				       char *buf)
+static ssize_t hsotype_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
 {
 	struct hso_device *hso_dev = dev_get_drvdata(dev);
 	char *port_name;
@@ -505,7 +504,7 @@ static ssize_t hso_sysfs_show_porttype(struct device *dev,
 
 	return sprintf(buf, "%s\n", port_name);
 }
-static DEVICE_ATTR(hsotype, 0444, hso_sysfs_show_porttype, NULL);
+static DEVICE_ATTR_RO(hsotype);
 
 static struct attribute *hso_serial_dev_attrs[] = {
 	&dev_attr_hsotype.attr,

From 86fe2f8aa14f6b0b76a1ce3897a3ee1433e5203a Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 20 May 2021 21:46:19 +0800
Subject: [PATCH 0414/3292] net: cdc_ncm: use DEVICE_ATTR_RW macro

Use DEVICE_ATTR_RW helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index b04055fd1b79..783d6139fdfa 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -192,7 +192,8 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
 	return val;
 }
 
-static ssize_t cdc_ncm_show_min_tx_pkt(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t min_tx_pkt_show(struct device *d,
+			       struct device_attribute *attr, char *buf)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -200,7 +201,8 @@ static ssize_t cdc_ncm_show_min_tx_pkt(struct device *d, struct device_attribute
 	return sprintf(buf, "%u\n", ctx->min_tx_pkt);
 }
 
-static ssize_t cdc_ncm_show_rx_max(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t rx_max_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -208,7 +210,8 @@ static ssize_t cdc_ncm_show_rx_max(struct device *d, struct device_attribute *at
 	return sprintf(buf, "%u\n", ctx->rx_max);
 }
 
-static ssize_t cdc_ncm_show_tx_max(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t tx_max_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -216,7 +219,8 @@ static ssize_t cdc_ncm_show_tx_max(struct device *d, struct device_attribute *at
 	return sprintf(buf, "%u\n", ctx->tx_max);
 }
 
-static ssize_t cdc_ncm_show_tx_timer_usecs(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t tx_timer_usecs_show(struct device *d,
+				   struct device_attribute *attr, char *buf)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -224,7 +228,9 @@ static ssize_t cdc_ncm_show_tx_timer_usecs(struct device *d, struct device_attri
 	return sprintf(buf, "%u\n", ctx->timer_interval / (u32)NSEC_PER_USEC);
 }
 
-static ssize_t cdc_ncm_store_min_tx_pkt(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+static ssize_t min_tx_pkt_store(struct device *d,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -238,7 +244,9 @@ static ssize_t cdc_ncm_store_min_tx_pkt(struct device *d,  struct device_attribu
 	return len;
 }
 
-static ssize_t cdc_ncm_store_rx_max(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+static ssize_t rx_max_store(struct device *d,
+			    struct device_attribute *attr,
+			    const char *buf, size_t len)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -251,7 +259,9 @@ static ssize_t cdc_ncm_store_rx_max(struct device *d,  struct device_attribute *
 	return len;
 }
 
-static ssize_t cdc_ncm_store_tx_max(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+static ssize_t tx_max_store(struct device *d,
+			    struct device_attribute *attr,
+			    const char *buf, size_t len)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -264,7 +274,9 @@ static ssize_t cdc_ncm_store_tx_max(struct device *d,  struct device_attribute *
 	return len;
 }
 
-static ssize_t cdc_ncm_store_tx_timer_usecs(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
+static ssize_t tx_timer_usecs_store(struct device *d,
+				    struct device_attribute *attr,
+				    const char *buf, size_t len)
 {
 	struct usbnet *dev = netdev_priv(to_net_dev(d));
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@ -285,10 +297,10 @@ static ssize_t cdc_ncm_store_tx_timer_usecs(struct device *d,  struct device_att
 	return len;
 }
 
-static DEVICE_ATTR(min_tx_pkt, 0644, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt);
-static DEVICE_ATTR(rx_max, 0644, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max);
-static DEVICE_ATTR(tx_max, 0644, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max);
-static DEVICE_ATTR(tx_timer_usecs, 0644, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs);
+static DEVICE_ATTR_RW(min_tx_pkt);
+static DEVICE_ATTR_RW(rx_max);
+static DEVICE_ATTR_RW(tx_max);
+static DEVICE_ATTR_RW(tx_timer_usecs);
 
 static ssize_t ndp_to_end_show(struct device *d, struct device_attribute *attr, char *buf)
 {

From 7dbc0d246891acbb8ae5840b3237881b7a0787df Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Wed, 19 May 2021 15:05:44 +0800
Subject: [PATCH 0415/3292] PM / devfreq: imx-bus: Remove
 imx_bus_get_dev_status

Current driver actually does not support simple ondemand governor
as it's unable to provide device load information. So removing
the unnecessary callback to avoid confusing.
Right now the driver is using userspace governor by default.

polling_ms was also dropped as it's not needed for non-ondemand
governor.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/imx-bus.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c
index 3fc3fd77492d..f3f6e25053ed 100644
--- a/drivers/devfreq/imx-bus.c
+++ b/drivers/devfreq/imx-bus.c
@@ -45,18 +45,6 @@ static int imx_bus_get_cur_freq(struct device *dev, unsigned long *freq)
 	return 0;
 }
 
-static int imx_bus_get_dev_status(struct device *dev,
-		struct devfreq_dev_status *stat)
-{
-	struct imx_bus *priv = dev_get_drvdata(dev);
-
-	stat->busy_time = 0;
-	stat->total_time = 0;
-	stat->current_frequency = clk_get_rate(priv->clk);
-
-	return 0;
-}
-
 static void imx_bus_exit(struct device *dev)
 {
 	struct imx_bus *priv = dev_get_drvdata(dev);
@@ -129,9 +117,7 @@ static int imx_bus_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	priv->profile.polling_ms = 1000;
 	priv->profile.target = imx_bus_target;
-	priv->profile.get_dev_status = imx_bus_get_dev_status;
 	priv->profile.exit = imx_bus_exit;
 	priv->profile.get_cur_freq = imx_bus_get_cur_freq;
 	priv->profile.initial_freq = clk_get_rate(priv->clk);

From 5e480ab94db8102baa73da33534e708a8636c2f9 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Tue, 11 May 2021 00:10:02 +0300
Subject: [PATCH 0416/3292] PM / devfreq: tegra30: Support thermal cooling

Expose ACTMON devfreq device as a cooling device in order to throttle
memory freq on overheat. Throttling of memory freq has a significant
cooling effect on NVIDIA Tegra SoCs since higher memory freqs require
higher SoC core voltage which is one of the main causes of the heating.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/tegra30-devfreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index ce83f883ca65..10661eb2aed8 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -688,6 +688,7 @@ static struct devfreq_dev_profile tegra_devfreq_profile = {
 	.polling_ms	= ACTMON_SAMPLING_PERIOD,
 	.target		= tegra_devfreq_target,
 	.get_dev_status	= tegra_devfreq_get_dev_status,
+	.is_cooling_device = true,
 };
 
 static int tegra_governor_get_target(struct devfreq *devfreq,

From a15fc9aa5b384e305ea25f42f744bb301fe39da0 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 21 May 2021 11:16:39 +0800
Subject: [PATCH 0417/3292] PM / devfreq: imx8m-ddrc: Remove
 DEVFREQ_GOV_SIMPLE_ONDEMAND dependency

The driver can't support simple ondemand governor due to missing
.get_dev_status() capability.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 20373a893b44..e87d01c0b76a 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -103,7 +103,6 @@ config ARM_IMX8M_DDRC_DEVFREQ
 	tristate "i.MX8M DDRC DEVFREQ Driver"
 	depends on (ARCH_MXC && HAVE_ARM_SMCCC) || \
 		(COMPILE_TEST && HAVE_ARM_SMCCC)
-	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	select DEVFREQ_GOV_USERSPACE
 	help
 	  This adds the DEVFREQ driver for the i.MX8M DDR Controller. It allows

From f1b7d45d3f8f3e18e190e71cb54d4b1917300d1d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:49 -0700
Subject: [PATCH 0418/3292] x86/irq: Remove unused vectors defines

UV_BAU_MESSAGE is defined but not used anywhere in the kernel. Presumably
this is a stale vector number that can be reclaimed.

MCE_VECTOR is not an actual vector: #MC is an exception, not an interrupt
vector, and as such is correctly described as X86_TRAP_MC. MCE_VECTOR is
not used anywhere is the kernel.

Note that NMI_VECTOR *is* used; specifically it is the vector number
programmed into the APIC LVT when an NMI interrupt is configured. At
the moment it is always numerically identical to X86_TRAP_NMI, that is
not necessarily going to be the case indefinitely.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lore.kernel.org/r/20210519212154.511983-4-hpa@zytor.com
---
 arch/x86/include/asm/irq_vectors.h       | 4 ++--
 tools/arch/x86/include/asm/irq_vectors.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 889f8b1b5b7f..dc71b781be42 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -26,8 +26,8 @@
  * This file enumerates the exact layout of them:
  */
 
+/* This is used as an interrupt vector when programming the APIC. */
 #define NMI_VECTOR			0x02
-#define MCE_VECTOR			0x12
 
 /*
  * IDT vectors usable for external interrupt sources start at 0x20.
@@ -84,7 +84,7 @@
  */
 #define IRQ_WORK_VECTOR			0xf6
 
-#define UV_BAU_MESSAGE			0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
 #define DEFERRED_ERROR_VECTOR		0xf4
 
 /* Vector on which hypervisor callbacks will be delivered */
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h
index 889f8b1b5b7f..dc71b781be42 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -26,8 +26,8 @@
  * This file enumerates the exact layout of them:
  */
 
+/* This is used as an interrupt vector when programming the APIC. */
 #define NMI_VECTOR			0x02
-#define MCE_VECTOR			0x12
 
 /*
  * IDT vectors usable for external interrupt sources start at 0x20.
@@ -84,7 +84,7 @@
  */
 #define IRQ_WORK_VECTOR			0xf6
 
-#define UV_BAU_MESSAGE			0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
 #define DEFERRED_ERROR_VECTOR		0xf4
 
 /* Vector on which hypervisor callbacks will be delivered */

From ff851003880de9d1111498877551ba16668c38ef Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:48 -0700
Subject: [PATCH 0419/3292] x86/irq: Add and use NR_EXTERNAL_VECTORS and
 NR_SYSTEM_VECTORS

Add defines for the number of external vectors and number of system
vectors instead of requiring the use of (FIRST_SYSTEM_VECTOR -
FIRST_EXTERNAL_VECTOR) and (NR_VECTORS - FIRST_SYSTEM_VECTOR)
respectively. Clean up the usage sites.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20210519212154.511983-3-hpa@zytor.com
---
 arch/x86/include/asm/idtentry.h          | 4 ++--
 arch/x86/include/asm/irq_vectors.h       | 3 +++
 tools/arch/x86/include/asm/irq_vectors.h | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 73d45b0dfff2..c03a18cac78e 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -504,7 +504,7 @@ __visible noinstr void func(struct pt_regs *regs,			\
 	.align 8
 SYM_CODE_START(irq_entries_start)
     vector=FIRST_EXTERNAL_VECTOR
-    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+    .rept NR_EXTERNAL_VECTORS
 	UNWIND_HINT_IRET_REGS
 0 :
 	.byte	0x6a, vector
@@ -520,7 +520,7 @@ SYM_CODE_END(irq_entries_start)
 	.align 8
 SYM_CODE_START(spurious_entries_start)
     vector=FIRST_SYSTEM_VECTOR
-    .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+    .rept NR_SYSTEM_VECTORS
 	UNWIND_HINT_IRET_REGS
 0 :
 	.byte	0x6a, vector
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index dc71b781be42..43dcb9284208 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,6 +114,9 @@
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS
 #endif
 
+#define NR_EXTERNAL_VECTORS		(FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS		(NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
 /*
  * Size the maximum number of interrupts.
  *
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h
index dc71b781be42..43dcb9284208 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -114,6 +114,9 @@
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS
 #endif
 
+#define NR_EXTERNAL_VECTORS		(FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS		(NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
 /*
  * Size the maximum number of interrupts.
  *

From 8ec9069a432c873e52e6f4ce1496f282a4299604 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:50 -0700
Subject: [PATCH 0420/3292] x86/idt: Remove address argument from
 idt_invalidate()

There is no reason to specify any specific address to idt_invalidate(). It
looks mostly like an artifact of unifying code done differently by
accident. The most "sensible" address to set here is a NULL pointer -
virtual address zero, just as a visual marker.

This also makes it possible to mark the struct desc_ptr in idt_invalidate()
as static const.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210519212154.511983-5-hpa@zytor.com
---
 arch/x86/include/asm/desc.h        | 2 +-
 arch/x86/kernel/idt.c              | 5 ++---
 arch/x86/kernel/machine_kexec_32.c | 2 +-
 arch/x86/kernel/reboot.c           | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 476082a83d1c..b8429ae50b71 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -427,6 +427,6 @@ static inline void idt_setup_early_pf(void) { }
 static inline void idt_setup_ist_traps(void) { }
 #endif
 
-extern void idt_invalidate(void *addr);
+extern void idt_invalidate(void);
 
 #endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d552f177eca0..2779f5226dc2 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -331,11 +331,10 @@ void __init idt_setup_early_handler(void)
 
 /**
  * idt_invalidate - Invalidate interrupt descriptor table
- * @addr:	The virtual address of the 'invalid' IDT
  */
-void idt_invalidate(void *addr)
+void idt_invalidate(void)
 {
-	struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 };
+	static const struct desc_ptr idt = { .address = 0, .size = 0 };
 
 	load_idt(&idt);
 }
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 64b00b0d7fe8..1e34feebcd5d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -232,7 +232,7 @@ void machine_kexec(struct kimage *image)
 	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	idt_invalidate(phys_to_virt(0));
+	idt_invalidate();
 	set_gdt(phys_to_virt(0), 0);
 
 	/* now call it */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index b29657b76e3f..ebfb91108232 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -669,7 +669,7 @@ static void native_machine_emergency_restart(void)
 			break;
 
 		case BOOT_TRIPLE:
-			idt_invalidate(NULL);
+			idt_invalidate();
 			__asm__ __volatile__("int3");
 
 			/* We're probably dead after this, but... */

From 283fa3b6483a84aeb62f1b97c2ec7c02eb2f5882 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:51 -0700
Subject: [PATCH 0421/3292] x86: Add native_[ig]dt_invalidate()

In some places, the native forms of descriptor table invalidation is
required. Rather than open-coding them, add explicitly native functions to
invalidate the GDT and IDT.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210519212154.511983-6-hpa@zytor.com
---
 arch/x86/include/asm/desc.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index b8429ae50b71..400c17862870 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -224,6 +224,26 @@ static inline void store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+static inline void native_gdt_invalidate(void)
+{
+	const struct desc_ptr invalid_gdt = {
+		.address = 0,
+		.size = 0
+	};
+
+	native_load_gdt(&invalid_gdt);
+}
+
+static inline void native_idt_invalidate(void)
+{
+	const struct desc_ptr invalid_idt = {
+		.address = 0,
+		.size = 0
+	};
+
+	native_load_idt(&invalid_idt);
+}
+
 /*
  * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
  * a read-only remapping. To prevent a page fault, the GDT is switched to the

From 056c52f5e824c050c58fd27ea6d717cba32239c2 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:52 -0700
Subject: [PATCH 0422/3292] x86/kexec: Set_[gi]dt() ->
 native_[gi]dt_invalidate() in machine_kexec_*.c

These files contain private set_gdt() functions which are only used to
invalid the gdt; machine_kexec_64.c also contains a set_idt()
function to invalidate the idt.

phys_to_virt(0) *really* doesn't make any sense for creating an
invalid GDT. A NULL pointer (virtual 0) makes a lot more sense;
although neither will allow any actual memory reference, a NULL
pointer stands out more.

Replace these calls with native_[gi]dt_invalidate().

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210519212154.511983-7-hpa@zytor.com
---
 arch/x86/kernel/machine_kexec_32.c | 15 ++------------
 arch/x86/kernel/machine_kexec_64.c | 33 ++----------------------------
 2 files changed, 4 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 1e34feebcd5d..1b373d79cedc 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -23,17 +23,6 @@
 #include <asm/set_memory.h>
 #include <asm/debugreg.h>
 
-static void set_gdt(void *newgdt, __u16 limit)
-{
-	struct desc_ptr curgdt;
-
-	/* ia32 supports unaligned loads & stores */
-	curgdt.size    = limit;
-	curgdt.address = (unsigned long)newgdt;
-
-	load_gdt(&curgdt);
-}
-
 static void load_segments(void)
 {
 #define __STR(X) #X
@@ -232,8 +221,8 @@ void machine_kexec(struct kimage *image)
 	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	idt_invalidate();
-	set_gdt(phys_to_virt(0), 0);
+	native_idt_invalidate();
+	native_gdt_invalidate();
 
 	/* now call it */
 	image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c078b0d3ab0e..131f30fdcfbd 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -256,35 +256,6 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 	return init_transition_pgtable(image, level4p);
 }
 
-static void set_idt(void *newidt, u16 limit)
-{
-	struct desc_ptr curidt;
-
-	/* x86-64 supports unaligned loads & stores */
-	curidt.size    = limit;
-	curidt.address = (unsigned long)newidt;
-
-	__asm__ __volatile__ (
-		"lidtq %0\n"
-		: : "m" (curidt)
-		);
-};
-
-
-static void set_gdt(void *newgdt, u16 limit)
-{
-	struct desc_ptr curgdt;
-
-	/* x86-64 supports unaligned loads & stores */
-	curgdt.size    = limit;
-	curgdt.address = (unsigned long)newgdt;
-
-	__asm__ __volatile__ (
-		"lgdtq %0\n"
-		: : "m" (curgdt)
-		);
-};
-
 static void load_segments(void)
 {
 	__asm__ __volatile__ (
@@ -379,8 +350,8 @@ void machine_kexec(struct kimage *image)
 	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	set_gdt(phys_to_virt(0), 0);
-	set_idt(phys_to_virt(0), 0);
+	native_idt_invalidate();
+	native_gdt_invalidate();
 
 	/* now call it */
 	image->start = relocate_kernel((unsigned long)image->head,

From c3d175e4852bfdfd1e4021dff8715fc407dedd98 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 12 May 2021 16:15:48 +0200
Subject: [PATCH 0423/3292] cpufreq: intel_pstate: hybrid: Avoid exposing two
 global attributes

The turbo_pct and num_pstates sysfs attributes represent CPU
properties that may be different for differenty types of CPUs in
a hybrid processor, so avoid exposing them in that case.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/intel_pstate.rst |  6 ++++++
 drivers/cpufreq/intel_pstate.c                | 15 +++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index df29b4f1f219..235f1025a7e6 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -365,6 +365,9 @@ argument is passed to the kernel in the command line.
 	inclusive) including both turbo and non-turbo P-states (see
 	`Turbo P-states Support`_).
 
+	This attribute is present only if the value exposed by it is the same
+	for all of the CPUs in the system.
+
 	The value of this attribute is not affected by the ``no_turbo``
 	setting described `below <no_turbo_attr_>`_.
 
@@ -374,6 +377,9 @@ argument is passed to the kernel in the command line.
 	Ratio of the `turbo range <turbo_>`_ size to the size of the entire
 	range of supported P-states, in percent.
 
+	This attribute is present only if the value exposed by it is the same
+	for all of the CPUs in the system.
+
 	This attribute is read-only.
 
 .. _no_turbo_attr:
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 0e69dffd5a76..45f59e2827fe 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1365,8 +1365,6 @@ define_one_global_rw(energy_efficiency);
 static struct attribute *intel_pstate_attributes[] = {
 	&status.attr,
 	&no_turbo.attr,
-	&turbo_pct.attr,
-	&num_pstates.attr,
 	NULL
 };
 
@@ -1391,6 +1389,14 @@ static void __init intel_pstate_sysfs_expose_params(void)
 	if (WARN_ON(rc))
 		return;
 
+	if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
+		rc = sysfs_create_file(intel_pstate_kobject, &turbo_pct.attr);
+		WARN_ON(rc);
+
+		rc = sysfs_create_file(intel_pstate_kobject, &num_pstates.attr);
+		WARN_ON(rc);
+	}
+
 	/*
 	 * If per cpu limits are enforced there are no global limits, so
 	 * return without creating max/min_perf_pct attributes
@@ -1417,6 +1423,11 @@ static void __init intel_pstate_sysfs_remove(void)
 
 	sysfs_remove_group(intel_pstate_kobject, &intel_pstate_attr_group);
 
+	if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
+		sysfs_remove_file(intel_pstate_kobject, &num_pstates.attr);
+		sysfs_remove_file(intel_pstate_kobject, &turbo_pct.attr);
+	}
+
 	if (!per_cpu_limits) {
 		sysfs_remove_file(intel_pstate_kobject, &max_perf_pct.attr);
 		sysfs_remove_file(intel_pstate_kobject, &min_perf_pct.attr);

From eb3693f0521e020dd8617c7fa3ddf5c9f0d8dea0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 12 May 2021 16:19:30 +0200
Subject: [PATCH 0424/3292] cpufreq: intel_pstate: hybrid: CPU-specific scaling
 factor

The scaling factor between HWP performance levels and CPU frequency
may be different for different types of CPUs in a hybrid processor
and in general the HWP performance levels need not correspond to
"P-states" representing values that would be written to
MSR_IA32_PERF_CTL if HWP was disabled.

However, the policy limits control in cpufreq is defined in terms
of CPU frequency, so it is necessary to map the frequency limits set
through that interface to HWP performance levels with reasonable
accuracy and the behavior of that interface on hybrid processors
has to be compatible with its behavior on non-hybrid ones.

To address this problem, use the observations that (1) on hybrid
processors the sysfs interface can operate by mapping frequency
to "P-states" and translating those "P-states" to specific HWP
performance levels of the given CPU and (2) the scaling factor
between the MSR_IA32_PERF_CTL "P-states" and CPU frequency can be
regarded as a known value.  Moreover, the mapping between the
HWP performance levels and CPU frequency can be assumed to be
linear and such that HWP performance level 0 correspond to the
frequency value of 0, so it is only necessary to know the
frequency corresponding to one specific HWP performance level
to compute the scaling factor applicable to all of them.

One possibility is to take the nominal performance value from CPPC,
if available, and use cpu_khz as the corresponding frequency.  If
the CPPC capabilities interface is not there or the nominal
performance value provided by it is out of range, though, something
else needs to be done.

Namely, the guaranteed performance level either from CPPC or from
MSR_HWP_CAPABILITIES can be used instead, but the corresponding
frequency needs to be determined.  That can be done by computing the
product of the (known) scaling factor between the MSR_IA32_PERF_CTL
P-states and CPU frequency (the PERF_CTL scaling factor) and the
P-state value referred to as the "TDP ratio".

If the HWP-to-frequency scaling factor value obtained in one of the
ways above turns out to be euqal to the PERF_CTL scaling factor, it
can be assumed that the number of HWP performance levels is equal to
the number of P-states and the given CPU can be handled as though
this was not a hybrid processor.

Otherwise, one more adjustment may still need to be made, because the
HWP-to-frequency scaling factor computed so far may not be accurate
enough (e.g. because the CPPC information does not match the exact
behavior of the processor).  Specifically, in that case the frequency
corresponding to the highest HWP performance value from
MSR_HWP_CAPABILITIES (computed as the product of that value and the
HWP-to-frequency scaling factor) cannot exceed the frequency that
corresponds to the maximum 1-core turbo P-state value from
MSR_TURBO_RATIO_LIMIT (computed as the procuct of that value and the
PERF_CTL scaling factor) and the HWP-to-frequency scaling factor may
need to be adjusted accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 233 +++++++++++++++++++++++++++++----
 1 file changed, 210 insertions(+), 23 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 45f59e2827fe..b0afb8629767 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -121,9 +121,10 @@ struct sample {
  * @max_pstate_physical:This is physical Max P state for a processor
  *			This can be higher than the max_pstate which can
  *			be limited by platform thermal design power limits
- * @scaling:		Scaling factor to  convert frequency to cpufreq
- *			frequency units
+ * @perf_ctl_scaling:	PERF_CTL P-state to frequency scaling factor
+ * @scaling:		Scaling factor between performance and frequency
  * @turbo_pstate:	Max Turbo P state possible for this platform
+ * @min_freq:		@min_pstate frequency in cpufreq units
  * @max_freq:		@max_pstate frequency in cpufreq units
  * @turbo_freq:		@turbo_pstate frequency in cpufreq units
  *
@@ -134,8 +135,10 @@ struct pstate_data {
 	int	min_pstate;
 	int	max_pstate;
 	int	max_pstate_physical;
+	int	perf_ctl_scaling;
 	int	scaling;
 	int	turbo_pstate;
+	unsigned int min_freq;
 	unsigned int max_freq;
 	unsigned int turbo_freq;
 };
@@ -489,6 +492,149 @@ static int intel_pstate_get_cppc_guranteed(int cpu)
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
 
+static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
+{
+	return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
+}
+
+static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
+					struct cppc_perf_caps *caps)
+{
+	if (cppc_get_perf_caps(cpu->cpu, caps))
+		return false;
+
+	return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
+}
+
+static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
+{
+	pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
+
+	cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling;
+}
+
+/**
+ * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels.
+ * @cpu: Target CPU.
+ *
+ * On hybrid processors, HWP may expose more performance levels than there are
+ * P-states accessible through the PERF_CTL interface.  If that happens, the
+ * scaling factor between HWP performance levels and CPU frequency will be less
+ * than the scaling factor between P-state values and CPU frequency.
+ *
+ * In that case, the scaling factor between HWP performance levels and CPU
+ * frequency needs to be determined which can be done with the help of the
+ * observation that certain HWP performance levels should correspond to certain
+ * P-states, like for example the HWP highest performance should correspond
+ * to the maximum turbo P-state of the CPU.
+ */
+static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
+{
+	struct cppc_perf_caps caps;
+	int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
+	int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
+	int perf_ctl_turbo = pstate_funcs.get_turbo();
+	int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
+	int perf_ctl_max = pstate_funcs.get_max();
+	int max_freq = perf_ctl_max * perf_ctl_scaling;
+	int scaling = INT_MAX;
+	int freq;
+
+	pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
+	pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max);
+	pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
+	pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
+
+	pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
+	pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
+
+	if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
+		if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
+			pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
+
+			/*
+			 * If the CPPC nominal performance is valid, it can be
+			 * assumed to correspond to cpu_khz.
+			 */
+			if (caps.nominal_perf == perf_ctl_max_phys) {
+				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+				return;
+			}
+			scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
+		} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
+			pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
+
+			/*
+			 * If the CPPC guaranteed performance is valid, it can
+			 * be assumed to correspond to max_freq.
+			 */
+			if (caps.guaranteed_perf == perf_ctl_max) {
+				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+				return;
+			}
+			scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
+		}
+	}
+	/*
+	 * If using the CPPC data to compute the HWP-to-frequency scaling factor
+	 * doesn't work, use the HWP_CAP gauranteed perf for this purpose with
+	 * the assumption that it corresponds to max_freq.
+	 */
+	if (scaling > perf_ctl_scaling) {
+		pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu);
+
+		if (cpu->pstate.max_pstate == perf_ctl_max) {
+			intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+			return;
+		}
+		scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate);
+		if (scaling > perf_ctl_scaling) {
+			/*
+			 * This should not happen, because it would mean that
+			 * the number of HWP perf levels was less than the
+			 * number of P-states, so use the PERF_CTL scaling in
+			 * that case.
+			 */
+			pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu,
+				scaling);
+
+			intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+			return;
+		}
+	}
+
+	/*
+	 * If the product of the HWP performance scaling factor obtained above
+	 * and the HWP_CAP highest performance is greater than the maximum turbo
+	 * frequency corresponding to the pstate_funcs.get_turbo() return value,
+	 * the scaling factor is too high, so recompute it so that the HWP_CAP
+	 * highest performance corresponds to the maximum turbo frequency.
+	 */
+	if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
+		pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling);
+
+		cpu->pstate.turbo_freq = turbo_freq;
+		scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
+	}
+
+	cpu->pstate.scaling = scaling;
+
+	pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
+
+	cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
+					 perf_ctl_scaling);
+
+	freq = perf_ctl_max_phys * perf_ctl_scaling;
+	cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling);
+
+	cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+	/*
+	 * Cast the min P-state value retrieved via pstate_funcs.get_min() to
+	 * the effective range of HWP performance levels.
+	 */
+	cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
+}
+
 static inline void update_turbo_state(void)
 {
 	u64 misc_en;
@@ -795,19 +941,22 @@ cpufreq_freq_attr_rw(energy_performance_preference);
 
 static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
 {
-	struct cpudata *cpu;
-	u64 cap;
-	int ratio;
+	struct cpudata *cpu = all_cpu_data[policy->cpu];
+	int ratio, freq;
 
 	ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
 	if (ratio <= 0) {
+		u64 cap;
+
 		rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
 		ratio = HWP_GUARANTEED_PERF(cap);
 	}
 
-	cpu = all_cpu_data[policy->cpu];
+	freq = ratio * cpu->pstate.scaling;
+	if (cpu->pstate.scaling != cpu->pstate.perf_ctl_scaling)
+		freq = rounddown(freq, cpu->pstate.perf_ctl_scaling);
 
-	return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling);
+	return sprintf(buf, "%d\n", freq);
 }
 
 cpufreq_freq_attr_ro(base_frequency);
@@ -831,9 +980,20 @@ static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
 
 static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
 {
+	int scaling = cpu->pstate.scaling;
+
 	__intel_pstate_get_hwp_cap(cpu);
-	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
-	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+
+	cpu->pstate.max_freq = cpu->pstate.max_pstate * scaling;
+	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * scaling;
+	if (scaling != cpu->pstate.perf_ctl_scaling) {
+		int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
+
+		cpu->pstate.max_freq = rounddown(cpu->pstate.max_freq,
+						 perf_ctl_scaling);
+		cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_freq,
+						   perf_ctl_scaling);
+	}
 }
 
 static void intel_pstate_hwp_set(unsigned int cpu)
@@ -1724,19 +1884,33 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 {
+	bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU);
+	int perf_ctl_max_phys = pstate_funcs.get_max_physical();
+	int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys :
+					    pstate_funcs.get_scaling();
+
 	cpu->pstate.min_pstate = pstate_funcs.get_min();
-	cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
-	cpu->pstate.scaling = pstate_funcs.get_scaling();
+	cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
+	cpu->pstate.perf_ctl_scaling = perf_ctl_scaling;
 
 	if (hwp_active && !hwp_mode_bdw) {
 		__intel_pstate_get_hwp_cap(cpu);
+
+		if (hybrid_cpu)
+			intel_pstate_hybrid_hwp_calibrate(cpu);
+		else
+			cpu->pstate.scaling = perf_ctl_scaling;
 	} else {
+		cpu->pstate.scaling = perf_ctl_scaling;
 		cpu->pstate.max_pstate = pstate_funcs.get_max();
 		cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 	}
 
-	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
-	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	if (cpu->pstate.scaling == perf_ctl_scaling) {
+		cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+		cpu->pstate.max_freq = cpu->pstate.max_pstate * perf_ctl_scaling;
+		cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * perf_ctl_scaling;
+	}
 
 	if (pstate_funcs.get_aperf_mperf_shift)
 		cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
@@ -2206,23 +2380,34 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
 					    unsigned int policy_min,
 					    unsigned int policy_max)
 {
-	int scaling = cpu->pstate.scaling;
+	int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
 	int32_t max_policy_perf, min_policy_perf;
 
+	max_policy_perf = policy_max / perf_ctl_scaling;
+	if (policy_max == policy_min) {
+		min_policy_perf = max_policy_perf;
+	} else {
+		min_policy_perf = policy_min / perf_ctl_scaling;
+		min_policy_perf = clamp_t(int32_t, min_policy_perf,
+					  0, max_policy_perf);
+	}
+
 	/*
 	 * HWP needs some special consideration, because HWP_REQUEST uses
 	 * abstract values to represent performance rather than pure ratios.
 	 */
-	if (hwp_active)
+	if (hwp_active) {
 		intel_pstate_get_hwp_cap(cpu);
 
-	max_policy_perf = policy_max / scaling;
-	if (policy_max == policy_min) {
-		min_policy_perf = max_policy_perf;
-	} else {
-		min_policy_perf = policy_min / scaling;
-		min_policy_perf = clamp_t(int32_t, min_policy_perf,
-					  0, max_policy_perf);
+		if (cpu->pstate.scaling != perf_ctl_scaling) {
+			int scaling = cpu->pstate.scaling;
+			int freq;
+
+			freq = max_policy_perf * perf_ctl_scaling;
+			max_policy_perf = DIV_ROUND_UP(freq, scaling);
+			freq = min_policy_perf * perf_ctl_scaling;
+			min_policy_perf = DIV_ROUND_UP(freq, scaling);
+		}
 	}
 
 	pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
@@ -2416,7 +2601,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	cpu->min_perf_ratio = 0;
 
 	/* cpuinfo and default policy values */
-	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
+	policy->cpuinfo.min_freq = cpu->pstate.min_freq;
 	update_turbo_state();
 	global.turbo_disabled_mf = global.turbo_disabled;
 	policy->cpuinfo.max_freq = global.turbo_disabled ?
@@ -3146,6 +3331,8 @@ hwp_cpu_matched:
 		}
 
 		pr_info("HWP enabled\n");
+	} else if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
+		pr_warn("Problematic setup: Hybrid processor with disabled HWP\n");
 	}
 
 	return 0;

From fbdc21e9b038d00d0d56fa4e0f7701d42ae08f00 Mon Sep 17 00:00:00 2001
From: Giovanni Gherdovich <ggherdovich@suse.cz>
Date: Tue, 18 May 2021 14:34:12 +0200
Subject: [PATCH 0425/3292] cpufreq: intel_pstate: Add Icelake servers support
 in no-HWP mode

Users may disable HWP in firmware, in which case intel_pstate wouldn't load
unless the CPU model is explicitly supported.

Add ICELAKE_X to the list of CPUs that can register intel_pstate while not
advertising the HWP capability. Without this change, an ICELAKE_X in no-HWP
mode could only use the acpi_cpufreq frequency scaling driver.

See also commit d8de7a44e11f ("cpufreq: intel_pstate: Add Skylake servers
support").

Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b0afb8629767..d36d3b72d86b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2272,6 +2272,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	X86_MATCH(ATOM_GOLDMONT,	core_funcs),
 	X86_MATCH(ATOM_GOLDMONT_PLUS,	core_funcs),
 	X86_MATCH(SKYLAKE_X,		core_funcs),
+	X86_MATCH(ICELAKE_X,		core_funcs),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);

From 706c5328851d23dec4d9b433cbf864d900a54edf Mon Sep 17 00:00:00 2001
From: Giovanni Gherdovich <ggherdovich@suse.cz>
Date: Tue, 18 May 2021 14:34:13 +0200
Subject: [PATCH 0426/3292] cpufreq: intel_pstate: Add Cometlake support in
 no-HWP mode

Users may disable HWP in firmware, in which case intel_pstate wouldn't load
unless the CPU model is explicitly supported.

See also commit d8de7a44e11f ("cpufreq: intel_pstate: Add Skylake servers
support").

Suggested-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d36d3b72d86b..03d8516e653e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2272,6 +2272,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	X86_MATCH(ATOM_GOLDMONT,	core_funcs),
 	X86_MATCH(ATOM_GOLDMONT_PLUS,	core_funcs),
 	X86_MATCH(SKYLAKE_X,		core_funcs),
+	X86_MATCH(COMETLAKE,		core_funcs),
 	X86_MATCH(ICELAKE_X,		core_funcs),
 	{}
 };

From 9ff6774b9718d1a72d1b7c580fc579f1d9d7071f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 3 May 2021 09:18:40 -0700
Subject: [PATCH 0427/3292] cpufreq: sc520_freq: add 'fallthrough' to one case

Quieten an implicit-fallthrough warning in sc520_freq.c:

../drivers/cpufreq/sc520_freq.c: In function 'sc520_freq_get_cpu_frequency':
../include/linux/printk.h:343:2: warning: this statement may fall through [-Wimplicit-fallthrough=]
  printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
../drivers/cpufreq/sc520_freq.c:43:3: note: in expansion of macro 'pr_err'
   pr_err("error: cpuctl register has unexpected value %02x\n",
../drivers/cpufreq/sc520_freq.c:45:2: note: here
  case 0x01:

Fixes: bf6fc9fd2d848 ("[CPUFREQ] AMD Elan SC520 cpufreq driver.")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/sc520_freq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index 73a208559fe2..330c8d6cf93c 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -42,6 +42,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
 	default:
 		pr_err("error: cpuctl register has unexpected value %02x\n",
 		       clockspeed_reg);
+		fallthrough;
 	case 0x01:
 		return 100000;
 	case 0x02:

From b7a732a73ac54dbccf14d90779d7e6b70ea0d653 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 14 May 2021 16:34:14 +0100
Subject: [PATCH 0428/3292] ACPI: APEI: Don't warn if ACPI is disabled

If ACPI is not enabled but support for ACPI and APEI is enabled in the
kernel, then the following warning is seen on boot ...

 WARNING KERN EINJ: ACPI disabled.

For ARM64 platforms, the 'acpi_disabled' variable is true by default
and hence, the above is often seen on ARM64. Given that it can be
normal for ACPI to be disabled, make this an informational print rather
that a warning.

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/einj.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 328e8aeece6c..2882450c443e 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -673,7 +673,7 @@ static int __init einj_init(void)
 	struct apei_exec_context ctx;
 
 	if (acpi_disabled) {
-		pr_warn("ACPI disabled.\n");
+		pr_info("ACPI disabled.\n");
 		return -ENODEV;
 	}
 

From b9370dceabb7841c5e65ce4ee4405b9db5231fc4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 14 May 2021 21:08:51 +0200
Subject: [PATCH 0429/3292] ACPI: PM / fan: Put fan device IDs into separate
 header file

The ACPI fan device IDs are shared between the fan driver and the
device power management code.  The former is modular, so it needs
to include the table of device IDs for module autoloading and the
latter needs that list to avoid attaching the generic ACPI PM domain
to fan devices (which doesn't make sense) possibly before the fan
driver module is loaded.

Unfortunately, that requires the list of fan device IDs to be
updated in two places which is prone to mistakes, so put it into
a symbol definition in a separate header file so there is only one
copy of it in case it needs to be updated again in the future.

Fixes: b9ea0bae260f ("ACPI: PM: Avoid attaching ACPI PM domain to certain devices")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c |  6 ++----
 drivers/acpi/fan.c       |  7 +++----
 drivers/acpi/fan.h       | 13 +++++++++++++
 3 files changed, 18 insertions(+), 8 deletions(-)
 create mode 100644 drivers/acpi/fan.h

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index d260bc1f3e6e..9d2d3b9bb8b5 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -20,6 +20,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
 
+#include "fan.h"
 #include "internal.h"
 
 /**
@@ -1310,10 +1311,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
 	 * with the generic ACPI PM domain.
 	 */
 	static const struct acpi_device_id special_pm_ids[] = {
-		{"PNP0C0B", }, /* Generic ACPI fan */
-		{"INT3404", }, /* Fan */
-		{"INTC1044", }, /* Fan for Tiger Lake generation */
-		{"INTC1048", }, /* Fan for Alder Lake generation */
+		ACPI_FAN_DEVICE_IDS,
 		{}
 	};
 	struct acpi_device *adev = ACPI_COMPANION(dev);
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 66c3983f0ccc..5cd0ceb50bc8 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -16,6 +16,8 @@
 #include <linux/platform_device.h>
 #include <linux/sort.h>
 
+#include "fan.h"
+
 MODULE_AUTHOR("Paul Diefenbaugh");
 MODULE_DESCRIPTION("ACPI Fan Driver");
 MODULE_LICENSE("GPL");
@@ -24,10 +26,7 @@ static int acpi_fan_probe(struct platform_device *pdev);
 static int acpi_fan_remove(struct platform_device *pdev);
 
 static const struct acpi_device_id fan_device_ids[] = {
-	{"PNP0C0B", 0},
-	{"INT3404", 0},
-	{"INTC1044", 0},
-	{"INTC1048", 0},
+	ACPI_FAN_DEVICE_IDS,
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, fan_device_ids);
diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h
new file mode 100644
index 000000000000..dc9a6efa514b
--- /dev/null
+++ b/drivers/acpi/fan.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * ACPI fan device IDs are shared between the fan driver and the device power
+ * management code.
+ *
+ * Add new device IDs before the generic ACPI fan one.
+ */
+#define ACPI_FAN_DEVICE_IDS	\
+	{"INT3404", }, /* Fan */ \
+	{"INTC1044", }, /* Fan for Tiger Lake generation */ \
+	{"INTC1048", }, /* Fan for Alder Lake generation */ \
+	{"PNP0C0B", } /* Generic ACPI fan */

From 45b10e3e0170b925251464f238420c2b9ea9d998 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 17 May 2021 18:23:12 -0500
Subject: [PATCH 0430/3292] ACPI: sbshc: Fix fall-through warning for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a
fallthrough warning by simply dropping the empty default case at
the bottom.

This contributes to the ongoing efforts to globally enable
-Wimplicit-fallthrough for Clang.

Link: https://github.com/KSPP/linux/issues/115
Suggested-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/lkml/CAJZ5v0hLYWKX__oZdcCY0D20pNqpw8SkiTPOCNOtpqe--QLp4Q@mail.gmail.com/
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sbshc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index 53c2862c4c75..5c021c3b81d9 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -231,7 +231,6 @@ static int smbus_alarm(void *context)
 		case ACPI_SBS_BATTERY:
 			acpi_os_execute(OSL_NOTIFY_HANDLER,
 					acpi_smbus_callback, hc);
-		default:;
 	}
 	mutex_unlock(&hc->lock);
 	return 0;

From f00d2d32cc6766fdc03ccfc865f08cc081df5b5f Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 20 May 2021 10:39:08 +0800
Subject: [PATCH 0431/3292] ACPI: event: Remove redundant initialization of
 local variable

'error' will be initialized, so clean up the redundant initialization.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index 92e59f45329b..bfb16cf8f807 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -165,7 +165,7 @@ static int acpi_event_genetlink_init(void)
 
 static int __init acpi_event_init(void)
 {
-	int error = 0;
+	int error;
 
 	if (acpi_disabled)
 		return 0;

From 6306f0431914beaf220634ad36c08234006571d5 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chris.chiu@canonical.com>
Date: Thu, 20 May 2021 11:09:50 +0800
Subject: [PATCH 0432/3292] ACPI: EC: Make more Asus laptops use ECDT _GPE

More ASUS laptops have the _GPE define in the DSDT table with a
different value than the _GPE number in the ECDT.

This is causing media keys not working on ASUS X505BA/BP, X542BA/BP

Add model info to the quirks list.

Signed-off-by: Chris Chiu <chris.chiu@canonical.com>
Signed-off-by: Jian-Hong Pan <jhp@endlessos.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 13565629ce0a..e8c5da2b964a 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1846,6 +1846,22 @@ static const struct dmi_system_id ec_dmi_table[] __initconst = {
 	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 	DMI_MATCH(DMI_PRODUCT_NAME, "GL702VMK"),}, NULL},
 	{
+	ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X505BA", {
+	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+	DMI_MATCH(DMI_PRODUCT_NAME, "X505BA"),}, NULL},
+	{
+	ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X505BP", {
+	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+	DMI_MATCH(DMI_PRODUCT_NAME, "X505BP"),}, NULL},
+	{
+	ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X542BA", {
+	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+	DMI_MATCH(DMI_PRODUCT_NAME, "X542BA"),}, NULL},
+	{
+	ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X542BP", {
+	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+	DMI_MATCH(DMI_PRODUCT_NAME, "X542BP"),}, NULL},
+	{
 	ec_honor_ecdt_gpe, "ASUS X550VXK", {
 	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 	DMI_MATCH(DMI_PRODUCT_NAME, "X550VXK"),}, NULL},

From d0d62baa7f505bd4c59cd169692ff07ec49dde37 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 19 May 2021 10:47:04 +0800
Subject: [PATCH 0433/3292] net: xilinx_emaclite: Do not print real IOMEM
 pointer

Printing kernel pointers is discouraged because they might leak kernel
memory layout.  This fixes smatch warning:

drivers/net/ethernet/xilinx/xilinx_emaclite.c:1191 xemaclite_of_probe() warn:
 argument 4 to %08lX specifier is cast from pointer

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/xilinx_emaclite.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index d9d58a7dabee..b06377fe7293 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1189,9 +1189,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
 	}
 
 	dev_info(dev,
-		 "Xilinx EmacLite at 0x%08lX mapped to 0x%08lX, irq=%d\n",
-		 (unsigned long __force)ndev->mem_start,
-		 (unsigned long __force)lp->base_addr, ndev->irq);
+		 "Xilinx EmacLite at 0x%08lX mapped to 0x%p, irq=%d\n",
+		 (unsigned long __force)ndev->mem_start, lp->base_addr, ndev->irq);
 	return 0;
 
 error:

From 48e8c6f1612b3d2dccaea2285231def830cc5b8e Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Thu, 20 May 2021 12:32:30 -0400
Subject: [PATCH 0434/3292] net: phy: add driver for Motorcomm yt8511 phy

Add a driver for the Motorcomm yt8511 phy that will be used in the
production Pine64 rk3566-quartz64 development board.
It supports gigabit transfer speeds, rgmii, and 125mhz clk output.

Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                 |   6 ++
 drivers/net/phy/Kconfig     |   6 ++
 drivers/net/phy/Makefile    |   1 +
 drivers/net/phy/motorcomm.c | 136 ++++++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+)
 create mode 100644 drivers/net/phy/motorcomm.c

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..b9f329249a5a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12378,6 +12378,12 @@ F:	Documentation/userspace-api/media/drivers/meye*
 F:	drivers/media/pci/meye/
 F:	include/uapi/linux/meye.h
 
+MOTORCOMM PHY DRIVER
+M:	Peter Geis <pgwipeout@gmail.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/phy/motorcomm.c
+
 MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD
 S:	Orphan
 F:	Documentation/driver-api/serial/moxa-smartio.rst
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 1534e408505b..c56f703ae998 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -234,6 +234,12 @@ config MICROSEMI_PHY
 	help
 	  Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
 
+config MOTORCOMM_PHY
+	tristate "Motorcomm PHYs"
+	help
+	  Enables support for Motorcomm network PHYs.
+	  Currently supports the YT8511 gigabit PHY.
+
 config NATIONAL_PHY
 	tristate "National Semiconductor PHYs"
 	help
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 24328d7cf931..172bb193ae6a 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_MICREL_PHY)	+= micrel.o
 obj-$(CONFIG_MICROCHIP_PHY)	+= microchip.o
 obj-$(CONFIG_MICROCHIP_T1_PHY)	+= microchip_t1.o
 obj-$(CONFIG_MICROSEMI_PHY)	+= mscc/
+obj-$(CONFIG_MOTORCOMM_PHY)	+= motorcomm.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
 obj-$(CONFIG_NXP_C45_TJA11XX_PHY)	+= nxp-c45-tja11xx.o
 obj-$(CONFIG_NXP_TJA11XX_PHY)	+= nxp-tja11xx.o
diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c
new file mode 100644
index 000000000000..796b68f4b499
--- /dev/null
+++ b/drivers/net/phy/motorcomm.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Driver for Motorcomm PHYs
+ *
+ * Author: Peter Geis <pgwipeout@gmail.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+
+#define PHY_ID_YT8511		0x0000010a
+
+#define YT8511_PAGE_SELECT	0x1e
+#define YT8511_PAGE		0x1f
+#define YT8511_EXT_CLK_GATE	0x0c
+#define YT8511_EXT_DELAY_DRIVE	0x0d
+#define YT8511_EXT_SLEEP_CTRL	0x27
+
+/* 2b00 25m from pll
+ * 2b01 25m from xtl *default*
+ * 2b10 62.m from pll
+ * 2b11 125m from pll
+ */
+#define YT8511_CLK_125M		(BIT(2) | BIT(1))
+#define YT8511_PLLON_SLP	BIT(14)
+
+/* RX Delay enabled = 1.8ns 1000T, 8ns 10/100T */
+#define YT8511_DELAY_RX		BIT(0)
+
+/* TX Gig-E Delay is bits 7:4, default 0x5
+ * TX Fast-E Delay is bits 15:12, default 0xf
+ * Delay = 150ps * N - 250ps
+ * On = 2000ps, off = 50ps
+ */
+#define YT8511_DELAY_GE_TX_EN	(0xf << 4)
+#define YT8511_DELAY_GE_TX_DIS	(0x2 << 4)
+#define YT8511_DELAY_FE_TX_EN	(0xf << 12)
+#define YT8511_DELAY_FE_TX_DIS	(0x2 << 12)
+
+static int yt8511_read_page(struct phy_device *phydev)
+{
+	return __phy_read(phydev, YT8511_PAGE_SELECT);
+};
+
+static int yt8511_write_page(struct phy_device *phydev, int page)
+{
+	return __phy_write(phydev, YT8511_PAGE_SELECT, page);
+};
+
+static int yt8511_config_init(struct phy_device *phydev)
+{
+	unsigned int ge, fe;
+	int ret, oldpage;
+
+	/* set clock mode to 125mhz */
+	oldpage = phy_select_page(phydev, YT8511_EXT_CLK_GATE);
+	if (oldpage < 0)
+		goto err_restore_page;
+
+	ret = __phy_modify(phydev, YT8511_PAGE, 0, YT8511_CLK_125M);
+	if (ret < 0)
+		goto err_restore_page;
+
+	/* set rgmii delay mode */
+	switch (phydev->interface) {
+	case PHY_INTERFACE_MODE_RGMII:
+		ge = YT8511_DELAY_GE_TX_DIS;
+		fe = YT8511_DELAY_FE_TX_DIS;
+		break;
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+		ge = YT8511_DELAY_RX | YT8511_DELAY_GE_TX_DIS;
+		fe = YT8511_DELAY_FE_TX_DIS;
+		break;
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		ge = YT8511_DELAY_GE_TX_EN;
+		fe = YT8511_DELAY_FE_TX_EN;
+		break;
+	case PHY_INTERFACE_MODE_RGMII_ID:
+		ge = YT8511_DELAY_RX | YT8511_DELAY_GE_TX_EN;
+		fe = YT8511_DELAY_FE_TX_EN;
+		break;
+	default: /* leave everything alone in other modes */
+		break;
+	}
+
+	ret = __phy_modify(phydev, YT8511_PAGE, (YT8511_DELAY_RX | YT8511_DELAY_GE_TX_EN), ge);
+	if (ret < 0)
+		goto err_restore_page;
+
+	/* fast ethernet delay is in a separate page */
+	ret = __phy_write(phydev, YT8511_PAGE_SELECT, YT8511_EXT_DELAY_DRIVE);
+	if (ret < 0)
+		goto err_restore_page;
+
+	ret = __phy_modify(phydev, YT8511_PAGE, YT8511_DELAY_FE_TX_EN, fe);
+	if (ret < 0)
+		goto err_restore_page;
+
+	/* leave pll enabled in sleep */
+	ret = __phy_write(phydev, YT8511_PAGE_SELECT, YT8511_EXT_SLEEP_CTRL);
+	if (ret < 0)
+		goto err_restore_page;
+
+	ret = __phy_modify(phydev, YT8511_PAGE, 0, YT8511_PLLON_SLP);
+	if (ret < 0)
+		goto err_restore_page;
+
+err_restore_page:
+	return phy_restore_page(phydev, oldpage, ret);
+}
+
+static struct phy_driver motorcomm_phy_drvs[] = {
+	{
+		PHY_ID_MATCH_EXACT(PHY_ID_YT8511),
+		.name		= "YT8511 Gigabit Ethernet",
+		.config_init	= yt8511_config_init,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.read_page	= yt8511_read_page,
+		.write_page	= yt8511_write_page,
+	},
+};
+
+module_phy_driver(motorcomm_phy_drvs);
+
+MODULE_DESCRIPTION("Motorcomm PHY driver");
+MODULE_AUTHOR("Peter Geis");
+MODULE_LICENSE("GPL");
+
+static const struct mdio_device_id __maybe_unused motorcomm_tbl[] = {
+	{ PHY_ID_MATCH_EXACT(PHY_ID_YT8511) },
+	{ /* sentinal */ }
+};
+
+MODULE_DEVICE_TABLE(mdio, motorcomm_tbl);

From ca021f0dd85140bc96f1381700bbcab753b74658 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 21 May 2021 00:16:56 +0300
Subject: [PATCH 0435/3292] net: dsa: sja1105: send multiple spi_messages
 instead of using cs_change

The sja1105 driver has been described by Mark Brown as "not using the
[ SPI ] API at all idiomatically" due to the use of cs_change:
https://patchwork.kernel.org/project/netdevbpf/patch/20210520135031.2969183-1-olteanv@gmail.com/

According to include/linux/spi/spi.h, the chip select is supposed to be
asserted for the entire length of a SPI message, as long as cs_change is
false for all member transfers. The cs_change flag changes the following:

(i) When a non-final SPI transfer has cs_change = true, the chip select
    should temporarily deassert and then reassert starting with the next
    transfer.
(ii) When a final SPI transfer has cs_change = true, the chip select
     should remain asserted until the following SPI message.

The sja1105 driver only uses cs_change for its first property, to form a
single SPI message whose layout can be seen below:

                                             this is an entire, single spi_message
           _______________________________________________________________________________________________
          /                                                                                               \
          +-------------+---------------+-------------+---------------+ ... +-------------+---------------+
          | hdr_xfer[0] | chunk_xfer[0] | hdr_xfer[1] | chunk_xfer[1] |     | hdr_xfer[n] | chunk_xfer[n] |
          +-------------+---------------+-------------+---------------+ ... +-------------+---------------+
cs_change      false          true           false           true                false          false

           ____________________________  _____________________________       _____________________________
CS line __/                            \/                             \ ... /                             \__

The fact of the matter is that spi_max_message_size() has an ambiguous
meaning if any non-final transfer has cs_change = true.

If the SPI master has a limitation in that it cannot keep the chip
select asserted for more than, say, 200 bytes (like the spi-sc18is602),
the normal thing for it to do is to implement .max_transfer_size and
.max_message_size, and limit both to 200: in the "worst case" where
cs_change is always false, then the controller can, indeed, not send
messages larger than 200 bytes.

But the fact that the SPI controller's max_message_size does not
necessarily mean that we cannot send messages larger than that.
Notably, if the SPI master special-cases the transfers with cs_change
and treats every chip select toggling as an entirely new transaction,
then a SPI message can easily exceed that limit. So there is a
temptation to ignore the controller's reported max_message_size when
using cs_change = true in non-final transfers.

But that can lead to false conclusions. As Mark points out, the SPI
controller might have a different kind of limitation with the max
message size, that has nothing at all to do with how long it can keep
the chip select asserted.
For example, that might be the case if the device is able to offload the
chip select changes to the hardware as part of the data stream, and it
packs the entire stream of commands+data (corresponding to a SPI
message) into a single DMA transfer that is itself limited in size.

So the only thing we can do is avoid ambiguity by not using cs_change at
all. Instead of sending a single spi_message, we now send multiple SPI
messages as follows:

                  spi_message 0                 spi_message 1                       spi_message n
           ____________________________   ___________________________        _____________________________
          /                            \ /                           \      /                             \
          +-------------+---------------+-------------+---------------+ ... +-------------+---------------+
          | hdr_xfer[0] | chunk_xfer[0] | hdr_xfer[1] | chunk_xfer[1] |     | hdr_xfer[n] | chunk_xfer[n] |
          +-------------+---------------+-------------+---------------+ ... +-------------+---------------+
cs_change      false          true           false           true                false          false

           ____________________________  _____________________________       _____________________________
CS line __/                            \/                             \ ... /                             \__

which is clearer because the max_message_size limit is now easier to
enforce. What is transmitted on the wire stays, of course, the same.

Additionally, because we send no more than 2 transfers at a time, we now
avoid dynamic memory allocation too, which might be seen as an
improvement by some.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_spi.c | 52 +++++++--------------------
 1 file changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index f7a1514f81e8..8746e3f158a0 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -29,13 +29,6 @@ sja1105_spi_message_pack(void *buf, const struct sja1105_spi_message *msg)
 	sja1105_pack(buf, &msg->address,    24,  4, size);
 }
 
-#define sja1105_hdr_xfer(xfers, chunk) \
-	((xfers) + 2 * (chunk))
-#define sja1105_chunk_xfer(xfers, chunk) \
-	((xfers) + 2 * (chunk) + 1)
-#define sja1105_hdr_buf(hdr_bufs, chunk) \
-	((hdr_bufs) + (chunk) * SJA1105_SIZE_SPI_MSG_HEADER)
-
 /* If @rw is:
  * - SPI_WRITE: creates and sends an SPI write message at absolute
  *		address reg_addr, taking @len bytes from *buf
@@ -46,41 +39,25 @@ static int sja1105_xfer(const struct sja1105_private *priv,
 			sja1105_spi_rw_mode_t rw, u64 reg_addr, u8 *buf,
 			size_t len, struct ptp_system_timestamp *ptp_sts)
 {
+	u8 hdr_buf[SJA1105_SIZE_SPI_MSG_HEADER] = {0};
 	struct sja1105_chunk chunk = {
 		.len = min_t(size_t, len, SJA1105_SIZE_SPI_MSG_MAXLEN),
 		.reg_addr = reg_addr,
 		.buf = buf,
 	};
 	struct spi_device *spi = priv->spidev;
-	struct spi_transfer *xfers;
+	struct spi_transfer xfers[2] = {0};
+	struct spi_transfer *chunk_xfer;
+	struct spi_transfer *hdr_xfer;
 	int num_chunks;
 	int rc, i = 0;
-	u8 *hdr_bufs;
 
 	num_chunks = DIV_ROUND_UP(len, SJA1105_SIZE_SPI_MSG_MAXLEN);
 
-	/* One transfer for each message header, one for each message
-	 * payload (chunk).
-	 */
-	xfers = kcalloc(2 * num_chunks, sizeof(struct spi_transfer),
-			GFP_KERNEL);
-	if (!xfers)
-		return -ENOMEM;
-
-	/* Packed buffers for the num_chunks SPI message headers,
-	 * stored as a contiguous array
-	 */
-	hdr_bufs = kcalloc(num_chunks, SJA1105_SIZE_SPI_MSG_HEADER,
-			   GFP_KERNEL);
-	if (!hdr_bufs) {
-		kfree(xfers);
-		return -ENOMEM;
-	}
+	hdr_xfer = &xfers[0];
+	chunk_xfer = &xfers[1];
 
 	for (i = 0; i < num_chunks; i++) {
-		struct spi_transfer *chunk_xfer = sja1105_chunk_xfer(xfers, i);
-		struct spi_transfer *hdr_xfer = sja1105_hdr_xfer(xfers, i);
-		u8 *hdr_buf = sja1105_hdr_buf(hdr_bufs, i);
 		struct spi_transfer *ptp_sts_xfer;
 		struct sja1105_spi_message msg;
 
@@ -129,19 +106,14 @@ static int sja1105_xfer(const struct sja1105_private *priv,
 		chunk.len = min_t(size_t, (ptrdiff_t)(buf + len - chunk.buf),
 				  SJA1105_SIZE_SPI_MSG_MAXLEN);
 
-		/* De-assert the chip select after each chunk. */
-		if (chunk.len)
-			chunk_xfer->cs_change = 1;
+		rc = spi_sync_transfer(spi, xfers, 2);
+		if (rc < 0) {
+			dev_err(&spi->dev, "SPI transfer failed: %d\n", rc);
+			return rc;
+		}
 	}
 
-	rc = spi_sync_transfer(spi, xfers, 2 * num_chunks);
-	if (rc < 0)
-		dev_err(&spi->dev, "SPI transfer failed: %d\n", rc);
-
-	kfree(hdr_bufs);
-	kfree(xfers);
-
-	return rc;
+	return 0;
 }
 
 int sja1105_xfer_buf(const struct sja1105_private *priv,

From 718bad0e4da9a637a99c13b27dcb030921961bc7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 21 May 2021 00:16:57 +0300
Subject: [PATCH 0436/3292] net: dsa: sja1105: adapt to a SPI controller with a
 limited max transfer size

The static config of the sja1105 switch is a long stream of bytes which
is programmed to the hardware in chunks (portions with the chip select
continuously asserted) of max 256 bytes each. Each chunk is a
spi_message composed of 2 spi_transfers: the buffer with the data and a
preceding buffer with the SPI access header.

Only that certain SPI controllers, such as the spi-sc18is602 I2C-to-SPI
bridge, cannot keep the chip select asserted for that long.
The spi_max_transfer_size() and spi_max_message_size() functions are how
the controller can impose its hardware limitations upon the SPI
peripheral driver.

For the sja1105 driver to work with these controllers, both buffers must
be smaller than the transfer limit, and their sum must be smaller than
the message limit.

Regression-tested on a switch connected to a controller with no
limitations (spi-fsl-dspi) as well as with one with caps for both
max_transfer_size and max_message_size (spi-sc18is602).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h             |  1 +
 drivers/net/dsa/sja1105/sja1105_main.c        | 28 +++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_spi.c         | 16 +++++------
 .../net/dsa/sja1105/sja1105_static_config.h   |  2 ++
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index f9e87fb33da0..7ec40c4b2d5a 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -209,6 +209,7 @@ struct sja1105_private {
 	unsigned long ucast_egress_floods;
 	unsigned long bcast_egress_floods;
 	const struct sja1105_info *info;
+	size_t max_xfer_len;
 	struct gpio_desc *reset_gpio;
 	struct spi_device *spidev;
 	struct dsa_switch *ds;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 405024b637d6..802314e90e64 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3563,6 +3563,7 @@ static int sja1105_probe(struct spi_device *spi)
 	struct sja1105_tagger_data *tagger_data;
 	struct device *dev = &spi->dev;
 	struct sja1105_private *priv;
+	size_t max_xfer, max_msg;
 	struct dsa_switch *ds;
 	int rc, port;
 
@@ -3596,6 +3597,33 @@ static int sja1105_probe(struct spi_device *spi)
 		return rc;
 	}
 
+	/* In sja1105_xfer, we send spi_messages composed of two spi_transfers:
+	 * a small one for the message header and another one for the current
+	 * chunk of the packed buffer.
+	 * Check that the restrictions imposed by the SPI controller are
+	 * respected: the chunk buffer is smaller than the max transfer size,
+	 * and the total length of the chunk plus its message header is smaller
+	 * than the max message size.
+	 * We do that during probe time since the maximum transfer size is a
+	 * runtime invariant.
+	 */
+	max_xfer = spi_max_transfer_size(spi);
+	max_msg = spi_max_message_size(spi);
+
+	/* We need to send at least one 64-bit word of SPI payload per message
+	 * in order to be able to make useful progress.
+	 */
+	if (max_msg < SJA1105_SIZE_SPI_MSG_HEADER + 8) {
+		dev_err(dev, "SPI master cannot send large enough buffers, aborting\n");
+		return -EINVAL;
+	}
+
+	priv->max_xfer_len = SJA1105_SIZE_SPI_MSG_MAXLEN;
+	if (priv->max_xfer_len > max_xfer)
+		priv->max_xfer_len = max_xfer;
+	if (priv->max_xfer_len > max_msg - SJA1105_SIZE_SPI_MSG_HEADER)
+		priv->max_xfer_len = max_msg - SJA1105_SIZE_SPI_MSG_HEADER;
+
 	priv->info = of_device_get_match_data(dev);
 
 	/* Detect hardware device */
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 8746e3f158a0..5a7b404bf3ce 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -8,8 +8,6 @@
 #include "sja1105.h"
 
 #define SJA1105_SIZE_RESET_CMD		4
-#define SJA1105_SIZE_SPI_MSG_HEADER	4
-#define SJA1105_SIZE_SPI_MSG_MAXLEN	(64 * 4)
 
 struct sja1105_chunk {
 	u8	*buf;
@@ -40,19 +38,19 @@ static int sja1105_xfer(const struct sja1105_private *priv,
 			size_t len, struct ptp_system_timestamp *ptp_sts)
 {
 	u8 hdr_buf[SJA1105_SIZE_SPI_MSG_HEADER] = {0};
-	struct sja1105_chunk chunk = {
-		.len = min_t(size_t, len, SJA1105_SIZE_SPI_MSG_MAXLEN),
-		.reg_addr = reg_addr,
-		.buf = buf,
-	};
 	struct spi_device *spi = priv->spidev;
 	struct spi_transfer xfers[2] = {0};
 	struct spi_transfer *chunk_xfer;
 	struct spi_transfer *hdr_xfer;
+	struct sja1105_chunk chunk;
 	int num_chunks;
 	int rc, i = 0;
 
-	num_chunks = DIV_ROUND_UP(len, SJA1105_SIZE_SPI_MSG_MAXLEN);
+	num_chunks = DIV_ROUND_UP(len, priv->max_xfer_len);
+
+	chunk.reg_addr = reg_addr;
+	chunk.buf = buf;
+	chunk.len = min_t(size_t, len, priv->max_xfer_len);
 
 	hdr_xfer = &xfers[0];
 	chunk_xfer = &xfers[1];
@@ -104,7 +102,7 @@ static int sja1105_xfer(const struct sja1105_private *priv,
 		chunk.buf += chunk.len;
 		chunk.reg_addr += chunk.len / 4;
 		chunk.len = min_t(size_t, (ptrdiff_t)(buf + len - chunk.buf),
-				  SJA1105_SIZE_SPI_MSG_MAXLEN);
+				  priv->max_xfer_len);
 
 		rc = spi_sync_transfer(spi, xfers, 2);
 		if (rc < 0) {
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index bc7606899289..779eb6840f05 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -9,6 +9,8 @@
 #include <linux/types.h>
 #include <asm/types.h>
 
+#define SJA1105_SIZE_SPI_MSG_HEADER			4
+#define SJA1105_SIZE_SPI_MSG_MAXLEN			(64 * 4)
 #define SJA1105_SIZE_DEVICE_ID				4
 #define SJA1105_SIZE_TABLE_HEADER			12
 #define SJA1105_SIZE_SCHEDULE_ENTRY			8

From 4926257916496909154857d92413027915a30309 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 21 May 2021 09:08:12 +0800
Subject: [PATCH 0437/3292] net: wan: fix an code style issue about "foo* bar"

Fix the checkpatch error as "foo* bar" should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64572.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index 9f60e3969bf8..e7d8653c4bde 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -54,7 +54,7 @@
 
 static int sca_poll(struct napi_struct *napi, int budget);
 
-static inline port_t* dev_to_port(struct net_device *dev)
+static inline port_t *dev_to_port(struct net_device *dev)
 {
 	return dev_to_hdlc(dev)->priv;
 }
@@ -186,7 +186,7 @@ static void sca_init_port(port_t *port)
 static inline void sca_msci_intr(port_t *port)
 {
 	u16 msci = get_msci(port);
-	card_t* card = port->card;
+	card_t *card = port->card;
 
 	if (sca_in(msci + ST1, card) & ST1_CDCD) {
 		/* Reset MSCI CDCD status bit */
@@ -286,7 +286,7 @@ static inline int sca_rx_done(port_t *port, int budget)
 static inline void sca_tx_done(port_t *port)
 {
 	struct net_device *dev = port->netdev;
-	card_t* card = port->card;
+	card_t *card = port->card;
 	u8 stat;
 	unsigned count = 0;
 
@@ -366,7 +366,7 @@ static irqreturn_t sca_intr(int irq, void *dev_id)
 
 static void sca_set_port(port_t *port)
 {
-	card_t* card = port->card;
+	card_t *card = port->card;
 	u16 msci = get_msci(port);
 	u8 md2 = sca_in(msci + MD2, card);
 	unsigned int tmc, br = 10, brv = 1024;
@@ -421,7 +421,7 @@ static void sca_set_port(port_t *port)
 static void sca_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
-	card_t* card = port->card;
+	card_t *card = port->card;
 	u16 msci = get_msci(port);
 	u8 md0, md2;
 

From 974221c6cf5441107c82f8c4c0b0694345d6c568 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 21 May 2021 09:08:13 +0800
Subject: [PATCH 0438/3292] net: wan: add some required spaces

Add space required after that close brace '}'.
Add space required before the open parenthesis '('.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64572.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index e7d8653c4bde..e51d94d62e07 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -380,7 +380,7 @@ static void sca_set_port(port_t *port)
 
 			/* Baud Rate = CLOCK_BASE / TMC / 2^BR */
 			tmc = CLOCK_BASE / brv / port->settings.clock_rate;
-		}while (br > 1 && tmc <= 128);
+		} while (br > 1 && tmc <= 128);
 
 		if (tmc < 1) {
 			tmc = 1;
@@ -425,7 +425,7 @@ static void sca_open(struct net_device *dev)
 	u16 msci = get_msci(port);
 	u8 md0, md2;
 
-	switch(port->encoding) {
+	switch (port->encoding) {
 	case ENCODING_NRZ:	md2 = MD2_NRZ;		break;
 	case ENCODING_NRZI:	md2 = MD2_NRZI;		break;
 	case ENCODING_FM_MARK:	md2 = MD2_FM_MARK;	break;
@@ -436,7 +436,7 @@ static void sca_open(struct net_device *dev)
 	if (port->settings.loopback)
 		md2 |= MD2_LOOPBACK;
 
-	switch(port->parity) {
+	switch (port->parity) {
 	case PARITY_CRC16_PR0:	     md0 = MD0_HDLC | MD0_CRC_16_0;  break;
 	case PARITY_CRC16_PR1:	     md0 = MD0_HDLC | MD0_CRC_16;    break;
 	case PARITY_CRC32_PR1_CCITT: md0 = MD0_HDLC | MD0_CRC_ITU32; break;

From eab9948140d109fdf42f0477d1b3b85c3a7e3306 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 21 May 2021 09:08:14 +0800
Subject: [PATCH 0439/3292] net: wan: fix the code style issue about trailing
 statements

Trailing statements should be on next line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64572.c | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index e51d94d62e07..aa69bcaba633 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -426,22 +426,40 @@ static void sca_open(struct net_device *dev)
 	u8 md0, md2;
 
 	switch (port->encoding) {
-	case ENCODING_NRZ:	md2 = MD2_NRZ;		break;
-	case ENCODING_NRZI:	md2 = MD2_NRZI;		break;
-	case ENCODING_FM_MARK:	md2 = MD2_FM_MARK;	break;
-	case ENCODING_FM_SPACE:	md2 = MD2_FM_SPACE;	break;
-	default:		md2 = MD2_MANCHESTER;
+	case ENCODING_NRZ:
+		md2 = MD2_NRZ;
+		break;
+	case ENCODING_NRZI:
+		md2 = MD2_NRZI;
+		break;
+	case ENCODING_FM_MARK:
+		md2 = MD2_FM_MARK;
+		break;
+	case ENCODING_FM_SPACE:
+		md2 = MD2_FM_SPACE;
+		break;
+	default:
+		md2 = MD2_MANCHESTER;
 	}
 
 	if (port->settings.loopback)
 		md2 |= MD2_LOOPBACK;
 
 	switch (port->parity) {
-	case PARITY_CRC16_PR0:	     md0 = MD0_HDLC | MD0_CRC_16_0;  break;
-	case PARITY_CRC16_PR1:	     md0 = MD0_HDLC | MD0_CRC_16;    break;
-	case PARITY_CRC32_PR1_CCITT: md0 = MD0_HDLC | MD0_CRC_ITU32; break;
-	case PARITY_CRC16_PR1_CCITT: md0 = MD0_HDLC | MD0_CRC_ITU;   break;
-	default:		     md0 = MD0_HDLC | MD0_CRC_NONE;
+	case PARITY_CRC16_PR0:
+		md0 = MD0_HDLC | MD0_CRC_16_0;
+		break;
+	case PARITY_CRC16_PR1:
+		md0 = MD0_HDLC | MD0_CRC_16;
+		break;
+	case PARITY_CRC32_PR1_CCITT:
+		md0 = MD0_HDLC | MD0_CRC_ITU32;
+		break;
+	case PARITY_CRC16_PR1_CCITT:
+		md0 = MD0_HDLC | MD0_CRC_ITU;
+		break;
+	default:
+		md0 = MD0_HDLC | MD0_CRC_NONE;
 	}
 
 	sca_out(CMD_RESET, msci + CMD, card);

From 145efe6c279bbfd0795dcded592147278c22d713 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 21 May 2021 09:08:15 +0800
Subject: [PATCH 0440/3292] net: wan: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64572.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index aa69bcaba633..34acea93efdf 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -81,14 +81,12 @@ static inline u16 desc_abs_number(port_t *port, u16 desc, int transmit)
 	return port->chan * (rx_buffs + tx_buffs) + transmit * rx_buffs + desc;
 }
 
-
 static inline u16 desc_offset(port_t *port, u16 desc, int transmit)
 {
 	/* Descriptor offset always fits in 16 bits */
 	return desc_abs_number(port, desc, transmit) * sizeof(pkt_desc);
 }
 
-
 static inline pkt_desc __iomem *desc_address(port_t *port, u16 desc,
 					     int transmit)
 {
@@ -96,14 +94,12 @@ static inline pkt_desc __iomem *desc_address(port_t *port, u16 desc,
 				    desc_offset(port, desc, transmit));
 }
 
-
 static inline u32 buffer_offset(port_t *port, u16 desc, int transmit)
 {
 	return port->card->buff_offset +
 		desc_abs_number(port, desc, transmit) * (u32)HDLC_MAX_MRU;
 }
 
-
 static inline void sca_set_carrier(port_t *port)
 {
 	if (!(sca_in(get_msci(port) + ST3, port->card) & ST3_DCD)) {
@@ -121,7 +117,6 @@ static inline void sca_set_carrier(port_t *port)
 	}
 }
 
-
 static void sca_init_port(port_t *port)
 {
 	card_t *card = port->card;
@@ -181,7 +176,6 @@ static void sca_init_port(port_t *port)
 	netif_napi_add(port->netdev, &port->napi, sca_poll, NAPI_WEIGHT);
 }
 
-
 /* MSCI interrupt service */
 static inline void sca_msci_intr(port_t *port)
 {
@@ -195,7 +189,6 @@ static inline void sca_msci_intr(port_t *port)
 	}
 }
 
-
 static inline void sca_rx(card_t *card, port_t *port, pkt_desc __iomem *desc,
 			  u16 rxin)
 {
@@ -225,7 +218,6 @@ static inline void sca_rx(card_t *card, port_t *port, pkt_desc __iomem *desc,
 	netif_receive_skb(skb);
 }
 
-
 /* Receive DMA service */
 static inline int sca_rx_done(port_t *port, int budget)
 {
@@ -281,7 +273,6 @@ static inline int sca_rx_done(port_t *port, int budget)
 	return received;
 }
 
-
 /* Transmit DMA service */
 static inline void sca_tx_done(port_t *port)
 {
@@ -321,7 +312,6 @@ static inline void sca_tx_done(port_t *port)
 	spin_unlock(&port->lock);
 }
 
-
 static int sca_poll(struct napi_struct *napi, int budget)
 {
 	port_t *port = container_of(napi, port_t, napi);
@@ -363,7 +353,6 @@ static irqreturn_t sca_intr(int irq, void *dev_id)
 	return IRQ_RETVAL(handled);
 }
 
-
 static void sca_set_port(port_t *port)
 {
 	card_t *card = port->card;
@@ -371,7 +360,6 @@ static void sca_set_port(port_t *port)
 	u8 md2 = sca_in(msci + MD2, card);
 	unsigned int tmc, br = 10, brv = 1024;
 
-
 	if (port->settings.clock_rate > 0) {
 		/* Try lower br for better accuracy*/
 		do {
@@ -414,10 +402,8 @@ static void sca_set_port(port_t *port)
 		md2 &= ~MD2_LOOPBACK;
 
 	sca_out(md2, msci + MD2, card);
-
 }
 
-
 static void sca_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -494,7 +480,6 @@ static void sca_open(struct net_device *dev)
 	netif_start_queue(dev);
 }
 
-
 static void sca_close(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -506,7 +491,6 @@ static void sca_close(struct net_device *dev)
 	netif_stop_queue(dev);
 }
 
-
 static int sca_attach(struct net_device *dev, unsigned short encoding,
 		      unsigned short parity)
 {
@@ -529,7 +513,6 @@ static int sca_attach(struct net_device *dev, unsigned short encoding,
 	return 0;
 }
 
-
 #ifdef DEBUG_RINGS
 static void sca_dump_rings(struct net_device *dev)
 {
@@ -576,7 +559,6 @@ static void sca_dump_rings(struct net_device *dev)
 }
 #endif /* DEBUG_RINGS */
 
-
 static netdev_tx_t sca_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -618,7 +600,6 @@ static netdev_tx_t sca_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-
 static u32 sca_detect_ram(card_t *card, u8 __iomem *rambase, u32 ramsize)
 {
 	/* Round RAM size to 32 bits, fill from end to start */
@@ -637,7 +618,6 @@ static u32 sca_detect_ram(card_t *card, u8 __iomem *rambase, u32 ramsize)
 	return i;
 }
 
-
 static void sca_init(card_t *card, int wait_states)
 {
 	sca_out(wait_states, WCRL, card); /* Wait Control */

From 1bf705d4f2316ec213ada3119bc6cb352f43de64 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 21 May 2021 09:08:16 +0800
Subject: [PATCH 0441/3292] net: wan: add braces {} to all arms of the
 statement

Braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64572.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index 34acea93efdf..7fccf23c8bef 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -374,8 +374,9 @@ static void sca_set_port(port_t *port)
 			tmc = 1;
 			br = 0;	/* For baud=CLOCK_BASE we use tmc=1 br=0 */
 			brv = 1;
-		} else if (tmc > 255)
+		} else if (tmc > 255) {
 			tmc = 256; /* tmc=0 means 256 - low baud rates */
+		}
 
 		port->settings.clock_rate = CLOCK_BASE / brv / tmc;
 	} else {

From 70fe4523c8f6c310c4e5e2c2de5a018a22a6d928 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 21 May 2021 09:08:17 +0800
Subject: [PATCH 0442/3292] net: wan: add necessary () to macro argument

Macro argument 'card' and 'port' may be better as
'(card)' and '(port)' to avoid precedence issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64572.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index 7fccf23c8bef..b89b03a6aba7 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -41,16 +41,16 @@
 
 #define NAPI_WEIGHT		16
 
-#define get_msci(port)	  (port->chan ?   MSCI1_OFFSET :   MSCI0_OFFSET)
-#define get_dmac_rx(port) (port->chan ? DMAC1RX_OFFSET : DMAC0RX_OFFSET)
-#define get_dmac_tx(port) (port->chan ? DMAC1TX_OFFSET : DMAC0TX_OFFSET)
+#define get_msci(port)	  ((port)->chan ?   MSCI1_OFFSET :   MSCI0_OFFSET)
+#define get_dmac_rx(port) ((port)->chan ? DMAC1RX_OFFSET : DMAC0RX_OFFSET)
+#define get_dmac_tx(port) ((port)->chan ? DMAC1TX_OFFSET : DMAC0TX_OFFSET)
 
-#define sca_in(reg, card)	     readb(card->scabase + (reg))
-#define sca_out(value, reg, card)    writeb(value, card->scabase + (reg))
-#define sca_inw(reg, card)	     readw(card->scabase + (reg))
-#define sca_outw(value, reg, card)   writew(value, card->scabase + (reg))
-#define sca_inl(reg, card)	     readl(card->scabase + (reg))
-#define sca_outl(value, reg, card)   writel(value, card->scabase + (reg))
+#define sca_in(reg, card)	     readb((card)->scabase + (reg))
+#define sca_out(value, reg, card)    writeb(value, (card)->scabase + (reg))
+#define sca_inw(reg, card)	     readw((card)->scabase + (reg))
+#define sca_outw(value, reg, card)   writew(value, (card)->scabase + (reg))
+#define sca_inl(reg, card)	     readl((card)->scabase + (reg))
+#define sca_outl(value, reg, card)   writel(value, (card)->scabase + (reg))
 
 static int sca_poll(struct napi_struct *napi, int budget);
 

From ae8102b87b9a91f401841513ceab4fc2c0e14787 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Fri, 21 May 2021 11:24:55 +0800
Subject: [PATCH 0443/3292] caif_virtio: Fix some typos in caif_virtio.c

s/patckets/packets/
s/avilable/available/
s/tbe/the/

Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/caif_virtio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index 106f089eb2a8..91230894692d 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -315,7 +315,7 @@ exit:
 	case 0:
 		++cfv->stats.rx_napi_complete;
 
-		/* Really out of patckets? (stolen from virtio_net)*/
+		/* Really out of packets? (stolen from virtio_net)*/
 		napi_complete(napi);
 		if (unlikely(!vringh_notify_enable_kern(cfv->vr_rx)) &&
 		    napi_schedule_prep(napi)) {
@@ -463,7 +463,7 @@ static int cfv_netdev_close(struct net_device *netdev)
 	vringh_notify_disable_kern(cfv->vr_rx);
 	napi_disable(&cfv->napi);
 
-	/* Release any TX buffers on both used and avilable rings */
+	/* Release any TX buffers on both used and available rings */
 	cfv_release_used_buf(cfv->vq_tx);
 	spin_lock_irqsave(&cfv->tx_lock, flags);
 	while ((buf_info = virtqueue_detach_unused_buf(cfv->vq_tx)))
@@ -497,7 +497,7 @@ static struct buf_info *cfv_alloc_and_copy_to_shm(struct cfv_info *cfv,
 	if (unlikely(!buf_info))
 		goto err;
 
-	/* Make the IP header aligned in tbe buffer */
+	/* Make the IP header aligned in the buffer */
 	hdr_ofs = cfv->tx_hr + info->hdr_len;
 	pad_len = hdr_ofs & (IP_HDR_ALIGN - 1);
 	buf_info->size = cfv->tx_hr + skb->len + cfv->tx_tr + pad_len;

From 4057c58da21ceeecb71c0f2d22c50755d53320e0 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Fri, 21 May 2021 11:31:35 +0800
Subject: [PATCH 0444/3292] net: bonding: bond_alb: Fix some typos in
 bond_alb.c

s/becase/because/
s/reqeusts/requests/
s/funcions/functions/
s/addreses/addresses/

Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 269dad176df4..22e5632089ac 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -229,7 +229,7 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
 {
 	struct slave *tx_slave;
 
-	/* We don't need to disable softirq here, becase
+	/* We don't need to disable softirq here, because
 	 * tlb_choose_channel() is only called by bond_alb_xmit()
 	 * which already has softirq disabled.
 	 */
@@ -609,7 +609,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb,
 
 		client_info->ip_src = arp->ip_src;
 		client_info->ip_dst = arp->ip_dst;
-		/* arp->mac_dst is broadcast for arp reqeusts.
+		/* arp->mac_dst is broadcast for arp requests.
 		 * will be updated with clients actual unicast mac address
 		 * upon receiving an arp reply.
 		 */
@@ -1271,7 +1271,7 @@ unwind:
 	return res;
 }
 
-/************************ exported alb funcions ************************/
+/************************ exported alb functions ************************/
 
 int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
 {
@@ -1550,7 +1550,7 @@ void bond_alb_monitor(struct work_struct *work)
 
 		bond_for_each_slave_rcu(bond, slave, iter) {
 			/* If updating current_active, use all currently
-			 * user mac addreses (!strict_match).  Otherwise, only
+			 * user mac addresses (!strict_match).  Otherwise, only
 			 * use mac of the slave device.
 			 * In RLB mode, we always use strict matches.
 			 */

From 31d990cb2628448806a94e64f07b90994a716c56 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 21 May 2021 11:57:21 +0800
Subject: [PATCH 0445/3292] sfc: farch: fix compile warning in
 efx_farch_dimension_resources()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following kernel build warning when CONFIG_SFC_SRIOV is disabled:

  drivers/net/ethernet/sfc/farch.c: In function ‘efx_farch_dimension_resources’:
  drivers/net/ethernet/sfc/farch.c:1671:21: warning: variable ‘buftbl_min’ set but not used [-Wunused-but-set-variable]
    unsigned vi_count, buftbl_min, total_tx_channels;

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/farch.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 49df02ecee91..148dcd48b58d 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1668,13 +1668,17 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
  */
 void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
 {
-	unsigned vi_count, buftbl_min, total_tx_channels;
-
+	unsigned vi_count, total_tx_channels;
 #ifdef CONFIG_SFC_SRIOV
-	struct siena_nic_data *nic_data = efx->nic_data;
+	struct siena_nic_data *nic_data;
+	unsigned buftbl_min;
 #endif
 
 	total_tx_channels = efx->n_tx_channels + efx->n_extra_tx_channels;
+	vi_count = max(efx->n_channels, total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL);
+
+#ifdef CONFIG_SFC_SRIOV
+	nic_data = efx->nic_data;
 	/* Account for the buffer table entries backing the datapath channels
 	 * and the descriptor caches for those channels.
 	 */
@@ -1682,9 +1686,6 @@ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
 		       total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_DMAQ_SIZE +
 		       efx->n_channels * EFX_MAX_EVQ_SIZE)
 		      * sizeof(efx_qword_t) / EFX_BUF_SIZE);
-	vi_count = max(efx->n_channels, total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL);
-
-#ifdef CONFIG_SFC_SRIOV
 	if (efx->type->sriov_wanted) {
 		if (efx->type->sriov_wanted(efx)) {
 			unsigned vi_dc_entries, buftbl_free;

From b269875f91c30c8d18cf6a6fbce40b12965e120f Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 21 May 2021 17:40:14 +0800
Subject: [PATCH 0446/3292] net: phy: Fix inconsistent indenting

Eliminate the follow smatch warning:

drivers/net/phy/phy_device.c:2886 phy_probe() warn: inconsistent
indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0a2d8bedf73d..1539ea021ac0 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2883,7 +2883,7 @@ static int phy_probe(struct device *dev)
 	/* Disable the interrupt if the PHY doesn't support it
 	 * but the interrupt is still a valid one
 	 */
-	 if (!phy_drv_supports_irq(phydrv) && phy_interrupt_is_valid(phydev))
+	if (!phy_drv_supports_irq(phydrv) && phy_interrupt_is_valid(phydev))
 		phydev->irq = PHY_POLL;
 
 	if (phydrv->flags & PHY_IS_INTERNAL)

From 04fdfad68b81cab9de660190ed8c881b1e5bf5fa Mon Sep 17 00:00:00 2001
From: zuoqilin <zuoqilin@yulong.com>
Date: Fri, 21 May 2021 17:45:22 +0800
Subject: [PATCH 0447/3292] atm: Fix typo

Change 'contol' to 'control'.

Signed-off-by: zuoqilin <zuoqilin@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/zeprom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/zeprom.h b/drivers/atm/zeprom.h
index 88e01f808a86..8e8819a3840d 100644
--- a/drivers/atm/zeprom.h
+++ b/drivers/atm/zeprom.h
@@ -12,7 +12,7 @@
 #define ZEPROM_V1_REG	PCI_VENDOR_ID	/* PCI register */
 #define ZEPROM_V2_REG	0x40
 
-/* Bits in contol register */
+/* Bits in control register */
 
 #define ZEPROM_SK	0x80000000	/* strobe (probably on raising edge) */
 #define ZEPROM_CS	0x40000000	/* Chip Select */

From 030c8198d744e4149da57bd2a73b87aa6a8aa272 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 21 May 2021 11:01:46 +0100
Subject: [PATCH 0448/3292] net: hns3: Fix return of uninitialized variable ret

In the unlikely event that rule_cnt is zero the variable ret is
not assigned a value and function hclge_dbg_dump_fd_tcam can end
up returning an unitialized value in ret. Fix this by explicitly
setting ret to zero before the for-loop.

Addresses-Coverity: ("Uninitialized scalar variable")
Fixes: b5a0b70d77b9 ("net: hns3: refactor dump fd tcam of debugfs")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index dd9eb6e6f5a7..0b7c6838d905 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1519,6 +1519,7 @@ static int hclge_dbg_dump_fd_tcam(struct hclge_dev *hdev, char *buf, int len)
 		goto out;
 	}
 
+	ret = 0;
 	for (i = 0; i < rule_cnt; i++) {
 		tcam_msg.stage = HCLGE_FD_STAGE_1;
 		tcam_msg.loc = rule_locs[i];

From 30a2e9c0f5cf8892255e21153952cd347c81b36b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 21 May 2021 16:16:07 +0300
Subject: [PATCH 0449/3292] net: dsa: sja1105: stop reporting the queue levels
 in ethtool port counters

The queue levels are not counters, but instead they represent the
occupancy of the MAC TX queues. Having these in ethtool port counters is
not helpful, so remove them.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h         |  1 -
 drivers/net/dsa/sja1105/sja1105_ethtool.c | 54 +----------------------
 drivers/net/dsa/sja1105/sja1105_spi.c     |  1 -
 3 files changed, 2 insertions(+), 54 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 7ec40c4b2d5a..6749dc21b589 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -65,7 +65,6 @@ struct sja1105_regs {
 	u64 mac_hl1[SJA1105_NUM_PORTS];
 	u64 mac_hl2[SJA1105_NUM_PORTS];
 	u64 ether_stats[SJA1105_NUM_PORTS];
-	u64 qlevel[SJA1105_NUM_PORTS];
 };
 
 struct sja1105_info {
diff --git a/drivers/net/dsa/sja1105/sja1105_ethtool.c b/drivers/net/dsa/sja1105/sja1105_ethtool.c
index 9133a831ec79..2d8e5399f698 100644
--- a/drivers/net/dsa/sja1105/sja1105_ethtool.c
+++ b/drivers/net/dsa/sja1105/sja1105_ethtool.c
@@ -6,7 +6,6 @@
 #define SJA1105_SIZE_MAC_AREA		(0x02 * 4)
 #define SJA1105_SIZE_HL1_AREA		(0x10 * 4)
 #define SJA1105_SIZE_HL2_AREA		(0x4 * 4)
-#define SJA1105_SIZE_QLEVEL_AREA	(0x8 * 4) /* 0x4 to 0xB */
 #define SJA1105_SIZE_ETHER_AREA		(0x17 * 4)
 
 struct sja1105_port_status_mac {
@@ -60,8 +59,6 @@ struct sja1105_port_status_hl2 {
 	u64 n_part_drop;
 	u64 n_egr_disabled;
 	u64 n_not_reach;
-	u64 qlevel_hwm[8]; /* Only for P/Q/R/S */
-	u64 qlevel[8];     /* Only for P/Q/R/S */
 };
 
 struct sja1105_port_status_ether {
@@ -172,20 +169,6 @@ sja1105_port_status_hl2_unpack(void *buf,
 	sja1105_unpack(p + 0x0, &status->n_not_reach,    31,  0, 4);
 }
 
-static void
-sja1105pqrs_port_status_qlevel_unpack(void *buf,
-				      struct sja1105_port_status_hl2 *status)
-{
-	/* Make pointer arithmetic work on 4 bytes */
-	u32 *p = buf;
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		sja1105_unpack(p + i, &status->qlevel_hwm[i], 24, 16, 4);
-		sja1105_unpack(p + i, &status->qlevel[i],      8,  0, 4);
-	}
-}
-
 static void
 sja1105pqrs_port_status_ether_unpack(void *buf,
 				     struct sja1105_port_status_ether *status)
@@ -280,7 +263,7 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv,
 				       int port)
 {
 	const struct sja1105_regs *regs = priv->info->regs;
-	u8 packed_buf[SJA1105_SIZE_QLEVEL_AREA] = {0};
+	u8 packed_buf[SJA1105_SIZE_HL2_AREA] = {0};
 	int rc;
 
 	rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl2[port], packed_buf,
@@ -290,18 +273,6 @@ static int sja1105_port_status_get_hl2(struct sja1105_private *priv,
 
 	sja1105_port_status_hl2_unpack(packed_buf, status);
 
-	/* Code below is strictly P/Q/R/S specific. */
-	if (priv->info->device_id == SJA1105E_DEVICE_ID ||
-	    priv->info->device_id == SJA1105T_DEVICE_ID)
-		return 0;
-
-	rc = sja1105_xfer_buf(priv, SPI_READ, regs->qlevel[port], packed_buf,
-			      SJA1105_SIZE_QLEVEL_AREA);
-	if (rc < 0)
-		return rc;
-
-	sja1105pqrs_port_status_qlevel_unpack(packed_buf, status);
-
 	return 0;
 }
 
@@ -375,23 +346,6 @@ static char sja1105_port_stats[][ETH_GSTRING_LEN] = {
 };
 
 static char sja1105pqrs_extra_port_stats[][ETH_GSTRING_LEN] = {
-	/* Queue Levels */
-	"qlevel_hwm_0",
-	"qlevel_hwm_1",
-	"qlevel_hwm_2",
-	"qlevel_hwm_3",
-	"qlevel_hwm_4",
-	"qlevel_hwm_5",
-	"qlevel_hwm_6",
-	"qlevel_hwm_7",
-	"qlevel_0",
-	"qlevel_1",
-	"qlevel_2",
-	"qlevel_3",
-	"qlevel_4",
-	"qlevel_5",
-	"qlevel_6",
-	"qlevel_7",
 	/* Ether Stats */
 	"n_drops_nolearn",
 	"n_drops_noroute",
@@ -422,7 +376,7 @@ void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data)
 {
 	struct sja1105_private *priv = ds->priv;
 	struct sja1105_port_status *status;
-	int rc, i, k = 0;
+	int rc, k = 0;
 
 	status = kzalloc(sizeof(*status), GFP_KERNEL);
 	if (!status)
@@ -482,10 +436,6 @@ void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data)
 
 	memset(data + k, 0, ARRAY_SIZE(sja1105pqrs_extra_port_stats) *
 			sizeof(u64));
-	for (i = 0; i < 8; i++) {
-		data[k++] = status->hl2.qlevel_hwm[i];
-		data[k++] = status->hl2.qlevel[i];
-	}
 	data[k++] = status->ether.n_drops_nolearn;
 	data[k++] = status->ether.n_drops_noroute;
 	data[k++] = status->ether.n_drops_ill_dtag;
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 5a7b404bf3ce..52d53e737c68 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -464,7 +464,6 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.rgmii_tx_clk = {0x100016, 0x10001C, 0x100022, 0x100028, 0x10002E},
 	.rmii_ref_clk = {0x100015, 0x10001B, 0x100021, 0x100027, 0x10002D},
 	.rmii_ext_tx_clk = {0x100017, 0x10001D, 0x100023, 0x100029, 0x10002F},
-	.qlevel = {0x604, 0x614, 0x624, 0x634, 0x644},
 	.ptpegr_ts = {0xC0, 0xC4, 0xC8, 0xCC, 0xD0},
 	.ptpschtm = 0x13, /* Spans 0x13 to 0x14 */
 	.ptppinst = 0x15,

From 039b167d68a3ce401114b1a520843db319277895 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 21 May 2021 16:16:08 +0300
Subject: [PATCH 0450/3292] net: dsa: sja1105: don't use burst SPI reads for
 port statistics

The current internal sja1105 driver API is optimized for retrieving many
statistics counters at once. But the switch does not do atomic snapshotting
for them anyway.

In case we start reporting the hardware port counters through
ndo_get_stats64 as well, not just ethtool, it would be good to be able
to read individual port counters and not all of them.

Additionally, since Arnd Bergmann's commit ae1804de93f6 ("dsa: sja1105:
dynamically allocate stats structure"), sja1105_get_ethtool_stats
allocates memory dynamically, since struct sja1105_port_status was
deemed to consume too much stack memory. That is not ideal.
The large structure is only needed because of the burst read.
If we read statistics one by one, we can consume less memory, and
we can avoid dynamic allocation.

Additionally, latency-sensitive interfaces such as PTP operations (for
phc2sys) might suffer if the SPI mutex is being held for too long, which
happens in the case of SPI burst reads. By reading counters one by one,
we give a chance for higher priority processes to preempt and take the
SPI bus mutex for accessing the PTP clock.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h         |   13 +-
 drivers/net/dsa/sja1105/sja1105_ethtool.c | 1049 ++++++++++++---------
 drivers/net/dsa/sja1105/sja1105_spi.c     |   14 +-
 3 files changed, 603 insertions(+), 473 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 6749dc21b589..10fc6b54f9f6 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -30,6 +30,14 @@ typedef enum {
 #include "sja1105_tas.h"
 #include "sja1105_ptp.h"
 
+enum sja1105_stats_area {
+	MAC,
+	HL1,
+	HL2,
+	ETHER,
+	__MAX_SJA1105_STATS_AREA,
+};
+
 /* Keeps the different addresses between E/T and P/Q/R/S */
 struct sja1105_regs {
 	u64 device_id;
@@ -61,10 +69,7 @@ struct sja1105_regs {
 	u64 rgmii_tx_clk[SJA1105_NUM_PORTS];
 	u64 rmii_ref_clk[SJA1105_NUM_PORTS];
 	u64 rmii_ext_tx_clk[SJA1105_NUM_PORTS];
-	u64 mac[SJA1105_NUM_PORTS];
-	u64 mac_hl1[SJA1105_NUM_PORTS];
-	u64 mac_hl2[SJA1105_NUM_PORTS];
-	u64 ether_stats[SJA1105_NUM_PORTS];
+	u64 stats[__MAX_SJA1105_STATS_AREA][SJA1105_NUM_PORTS];
 };
 
 struct sja1105_info {
diff --git a/drivers/net/dsa/sja1105/sja1105_ethtool.c b/drivers/net/dsa/sja1105/sja1105_ethtool.c
index 2d8e5399f698..decc6c931dc1 100644
--- a/drivers/net/dsa/sja1105/sja1105_ethtool.c
+++ b/drivers/net/dsa/sja1105/sja1105_ethtool.c
@@ -3,502 +3,627 @@
  */
 #include "sja1105.h"
 
-#define SJA1105_SIZE_MAC_AREA		(0x02 * 4)
-#define SJA1105_SIZE_HL1_AREA		(0x10 * 4)
-#define SJA1105_SIZE_HL2_AREA		(0x4 * 4)
-#define SJA1105_SIZE_ETHER_AREA		(0x17 * 4)
-
-struct sja1105_port_status_mac {
-	u64 n_runt;
-	u64 n_soferr;
-	u64 n_alignerr;
-	u64 n_miierr;
-	u64 typeerr;
-	u64 sizeerr;
-	u64 tctimeout;
-	u64 priorerr;
-	u64 nomaster;
-	u64 memov;
-	u64 memerr;
-	u64 invtyp;
-	u64 intcyov;
-	u64 domerr;
-	u64 pcfbagdrop;
-	u64 spcprior;
-	u64 ageprior;
-	u64 portdrop;
-	u64 lendrop;
-	u64 bagdrop;
-	u64 policeerr;
-	u64 drpnona664err;
-	u64 spcerr;
-	u64 agedrp;
+enum sja1105_counter_index {
+	__SJA1105_COUNTER_UNUSED,
+	/* MAC */
+	N_RUNT,
+	N_SOFERR,
+	N_ALIGNERR,
+	N_MIIERR,
+	TYPEERR,
+	SIZEERR,
+	TCTIMEOUT,
+	PRIORERR,
+	NOMASTER,
+	MEMOV,
+	MEMERR,
+	INVTYP,
+	INTCYOV,
+	DOMERR,
+	PCFBAGDROP,
+	SPCPRIOR,
+	AGEPRIOR,
+	PORTDROP,
+	LENDROP,
+	BAGDROP,
+	POLICEERR,
+	DRPNONA664ERR,
+	SPCERR,
+	AGEDRP,
+	/* HL1 */
+	N_N664ERR,
+	N_VLANERR,
+	N_UNRELEASED,
+	N_SIZEERR,
+	N_CRCERR,
+	N_VLNOTFOUND,
+	N_CTPOLERR,
+	N_POLERR,
+	N_RXFRM,
+	N_RXBYTE,
+	N_TXFRM,
+	N_TXBYTE,
+	/* HL2 */
+	N_QFULL,
+	N_PART_DROP,
+	N_EGR_DISABLED,
+	N_NOT_REACH,
+	__MAX_SJA1105ET_PORT_COUNTER,
+	/* P/Q/R/S only */
+	/* ETHER */
+	N_DROPS_NOLEARN = __MAX_SJA1105ET_PORT_COUNTER,
+	N_DROPS_NOROUTE,
+	N_DROPS_ILL_DTAG,
+	N_DROPS_DTAG,
+	N_DROPS_SOTAG,
+	N_DROPS_SITAG,
+	N_DROPS_UTAG,
+	N_TX_BYTES_1024_2047,
+	N_TX_BYTES_512_1023,
+	N_TX_BYTES_256_511,
+	N_TX_BYTES_128_255,
+	N_TX_BYTES_65_127,
+	N_TX_BYTES_64,
+	N_TX_MCAST,
+	N_TX_BCAST,
+	N_RX_BYTES_1024_2047,
+	N_RX_BYTES_512_1023,
+	N_RX_BYTES_256_511,
+	N_RX_BYTES_128_255,
+	N_RX_BYTES_65_127,
+	N_RX_BYTES_64,
+	N_RX_MCAST,
+	N_RX_BCAST,
+	__MAX_SJA1105PQRS_PORT_COUNTER,
 };
 
-struct sja1105_port_status_hl1 {
-	u64 n_n664err;
-	u64 n_vlanerr;
-	u64 n_unreleased;
-	u64 n_sizeerr;
-	u64 n_crcerr;
-	u64 n_vlnotfound;
-	u64 n_ctpolerr;
-	u64 n_polerr;
-	u64 n_rxfrmsh;
-	u64 n_rxfrm;
-	u64 n_rxbytesh;
-	u64 n_rxbyte;
-	u64 n_txfrmsh;
-	u64 n_txfrm;
-	u64 n_txbytesh;
-	u64 n_txbyte;
+struct sja1105_port_counter {
+	enum sja1105_stats_area area;
+	const char name[ETH_GSTRING_LEN];
+	int offset;
+	int start;
+	int end;
+	bool is_64bit;
 };
 
-struct sja1105_port_status_hl2 {
-	u64 n_qfull;
-	u64 n_part_drop;
-	u64 n_egr_disabled;
-	u64 n_not_reach;
-};
-
-struct sja1105_port_status_ether {
-	u64 n_drops_nolearn;
-	u64 n_drops_noroute;
-	u64 n_drops_ill_dtag;
-	u64 n_drops_dtag;
-	u64 n_drops_sotag;
-	u64 n_drops_sitag;
-	u64 n_drops_utag;
-	u64 n_tx_bytes_1024_2047;
-	u64 n_tx_bytes_512_1023;
-	u64 n_tx_bytes_256_511;
-	u64 n_tx_bytes_128_255;
-	u64 n_tx_bytes_65_127;
-	u64 n_tx_bytes_64;
-	u64 n_tx_mcast;
-	u64 n_tx_bcast;
-	u64 n_rx_bytes_1024_2047;
-	u64 n_rx_bytes_512_1023;
-	u64 n_rx_bytes_256_511;
-	u64 n_rx_bytes_128_255;
-	u64 n_rx_bytes_65_127;
-	u64 n_rx_bytes_64;
-	u64 n_rx_mcast;
-	u64 n_rx_bcast;
-};
-
-struct sja1105_port_status {
-	struct sja1105_port_status_mac mac;
-	struct sja1105_port_status_hl1 hl1;
-	struct sja1105_port_status_hl2 hl2;
-	struct sja1105_port_status_ether ether;
-};
-
-static void
-sja1105_port_status_mac_unpack(void *buf,
-			       struct sja1105_port_status_mac *status)
-{
-	/* Make pointer arithmetic work on 4 bytes */
-	u32 *p = buf;
-
-	sja1105_unpack(p + 0x0, &status->n_runt,       31, 24, 4);
-	sja1105_unpack(p + 0x0, &status->n_soferr,     23, 16, 4);
-	sja1105_unpack(p + 0x0, &status->n_alignerr,   15,  8, 4);
-	sja1105_unpack(p + 0x0, &status->n_miierr,      7,  0, 4);
-	sja1105_unpack(p + 0x1, &status->typeerr,      27, 27, 4);
-	sja1105_unpack(p + 0x1, &status->sizeerr,      26, 26, 4);
-	sja1105_unpack(p + 0x1, &status->tctimeout,    25, 25, 4);
-	sja1105_unpack(p + 0x1, &status->priorerr,     24, 24, 4);
-	sja1105_unpack(p + 0x1, &status->nomaster,     23, 23, 4);
-	sja1105_unpack(p + 0x1, &status->memov,        22, 22, 4);
-	sja1105_unpack(p + 0x1, &status->memerr,       21, 21, 4);
-	sja1105_unpack(p + 0x1, &status->invtyp,       19, 19, 4);
-	sja1105_unpack(p + 0x1, &status->intcyov,      18, 18, 4);
-	sja1105_unpack(p + 0x1, &status->domerr,       17, 17, 4);
-	sja1105_unpack(p + 0x1, &status->pcfbagdrop,   16, 16, 4);
-	sja1105_unpack(p + 0x1, &status->spcprior,     15, 12, 4);
-	sja1105_unpack(p + 0x1, &status->ageprior,     11,  8, 4);
-	sja1105_unpack(p + 0x1, &status->portdrop,      6,  6, 4);
-	sja1105_unpack(p + 0x1, &status->lendrop,       5,  5, 4);
-	sja1105_unpack(p + 0x1, &status->bagdrop,       4,  4, 4);
-	sja1105_unpack(p + 0x1, &status->policeerr,     3,  3, 4);
-	sja1105_unpack(p + 0x1, &status->drpnona664err, 2,  2, 4);
-	sja1105_unpack(p + 0x1, &status->spcerr,        1,  1, 4);
-	sja1105_unpack(p + 0x1, &status->agedrp,        0,  0, 4);
-}
-
-static void
-sja1105_port_status_hl1_unpack(void *buf,
-			       struct sja1105_port_status_hl1 *status)
-{
-	/* Make pointer arithmetic work on 4 bytes */
-	u32 *p = buf;
-
-	sja1105_unpack(p + 0xF, &status->n_n664err,    31,  0, 4);
-	sja1105_unpack(p + 0xE, &status->n_vlanerr,    31,  0, 4);
-	sja1105_unpack(p + 0xD, &status->n_unreleased, 31,  0, 4);
-	sja1105_unpack(p + 0xC, &status->n_sizeerr,    31,  0, 4);
-	sja1105_unpack(p + 0xB, &status->n_crcerr,     31,  0, 4);
-	sja1105_unpack(p + 0xA, &status->n_vlnotfound, 31,  0, 4);
-	sja1105_unpack(p + 0x9, &status->n_ctpolerr,   31,  0, 4);
-	sja1105_unpack(p + 0x8, &status->n_polerr,     31,  0, 4);
-	sja1105_unpack(p + 0x7, &status->n_rxfrmsh,    31,  0, 4);
-	sja1105_unpack(p + 0x6, &status->n_rxfrm,      31,  0, 4);
-	sja1105_unpack(p + 0x5, &status->n_rxbytesh,   31,  0, 4);
-	sja1105_unpack(p + 0x4, &status->n_rxbyte,     31,  0, 4);
-	sja1105_unpack(p + 0x3, &status->n_txfrmsh,    31,  0, 4);
-	sja1105_unpack(p + 0x2, &status->n_txfrm,      31,  0, 4);
-	sja1105_unpack(p + 0x1, &status->n_txbytesh,   31,  0, 4);
-	sja1105_unpack(p + 0x0, &status->n_txbyte,     31,  0, 4);
-	status->n_rxfrm  += status->n_rxfrmsh  << 32;
-	status->n_rxbyte += status->n_rxbytesh << 32;
-	status->n_txfrm  += status->n_txfrmsh  << 32;
-	status->n_txbyte += status->n_txbytesh << 32;
-}
-
-static void
-sja1105_port_status_hl2_unpack(void *buf,
-			       struct sja1105_port_status_hl2 *status)
-{
-	/* Make pointer arithmetic work on 4 bytes */
-	u32 *p = buf;
-
-	sja1105_unpack(p + 0x3, &status->n_qfull,        31,  0, 4);
-	sja1105_unpack(p + 0x2, &status->n_part_drop,    31,  0, 4);
-	sja1105_unpack(p + 0x1, &status->n_egr_disabled, 31,  0, 4);
-	sja1105_unpack(p + 0x0, &status->n_not_reach,    31,  0, 4);
-}
-
-static void
-sja1105pqrs_port_status_ether_unpack(void *buf,
-				     struct sja1105_port_status_ether *status)
-{
-	/* Make pointer arithmetic work on 4 bytes */
-	u32 *p = buf;
-
-	sja1105_unpack(p + 0x16, &status->n_drops_nolearn,      31, 0, 4);
-	sja1105_unpack(p + 0x15, &status->n_drops_noroute,      31, 0, 4);
-	sja1105_unpack(p + 0x14, &status->n_drops_ill_dtag,     31, 0, 4);
-	sja1105_unpack(p + 0x13, &status->n_drops_dtag,         31, 0, 4);
-	sja1105_unpack(p + 0x12, &status->n_drops_sotag,        31, 0, 4);
-	sja1105_unpack(p + 0x11, &status->n_drops_sitag,        31, 0, 4);
-	sja1105_unpack(p + 0x10, &status->n_drops_utag,         31, 0, 4);
-	sja1105_unpack(p + 0x0F, &status->n_tx_bytes_1024_2047, 31, 0, 4);
-	sja1105_unpack(p + 0x0E, &status->n_tx_bytes_512_1023,  31, 0, 4);
-	sja1105_unpack(p + 0x0D, &status->n_tx_bytes_256_511,   31, 0, 4);
-	sja1105_unpack(p + 0x0C, &status->n_tx_bytes_128_255,   31, 0, 4);
-	sja1105_unpack(p + 0x0B, &status->n_tx_bytes_65_127,    31, 0, 4);
-	sja1105_unpack(p + 0x0A, &status->n_tx_bytes_64,        31, 0, 4);
-	sja1105_unpack(p + 0x09, &status->n_tx_mcast,           31, 0, 4);
-	sja1105_unpack(p + 0x08, &status->n_tx_bcast,           31, 0, 4);
-	sja1105_unpack(p + 0x07, &status->n_rx_bytes_1024_2047, 31, 0, 4);
-	sja1105_unpack(p + 0x06, &status->n_rx_bytes_512_1023,  31, 0, 4);
-	sja1105_unpack(p + 0x05, &status->n_rx_bytes_256_511,   31, 0, 4);
-	sja1105_unpack(p + 0x04, &status->n_rx_bytes_128_255,   31, 0, 4);
-	sja1105_unpack(p + 0x03, &status->n_rx_bytes_65_127,    31, 0, 4);
-	sja1105_unpack(p + 0x02, &status->n_rx_bytes_64,        31, 0, 4);
-	sja1105_unpack(p + 0x01, &status->n_rx_mcast,           31, 0, 4);
-	sja1105_unpack(p + 0x00, &status->n_rx_bcast,           31, 0, 4);
-}
-
-static int
-sja1105pqrs_port_status_get_ether(struct sja1105_private *priv,
-				  struct sja1105_port_status_ether *ether,
-				  int port)
-{
-	const struct sja1105_regs *regs = priv->info->regs;
-	u8 packed_buf[SJA1105_SIZE_ETHER_AREA] = {0};
-	int rc;
-
-	/* Ethernet statistics area */
-	rc = sja1105_xfer_buf(priv, SPI_READ, regs->ether_stats[port],
-			      packed_buf, SJA1105_SIZE_ETHER_AREA);
-	if (rc < 0)
-		return rc;
-
-	sja1105pqrs_port_status_ether_unpack(packed_buf, ether);
-
-	return 0;
-}
-
-static int sja1105_port_status_get_mac(struct sja1105_private *priv,
-				       struct sja1105_port_status_mac *status,
-				       int port)
-{
-	const struct sja1105_regs *regs = priv->info->regs;
-	u8 packed_buf[SJA1105_SIZE_MAC_AREA] = {0};
-	int rc;
-
-	/* MAC area */
-	rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac[port], packed_buf,
-			      SJA1105_SIZE_MAC_AREA);
-	if (rc < 0)
-		return rc;
-
-	sja1105_port_status_mac_unpack(packed_buf, status);
-
-	return 0;
-}
-
-static int sja1105_port_status_get_hl1(struct sja1105_private *priv,
-				       struct sja1105_port_status_hl1 *status,
-				       int port)
-{
-	const struct sja1105_regs *regs = priv->info->regs;
-	u8 packed_buf[SJA1105_SIZE_HL1_AREA] = {0};
-	int rc;
-
-	rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl1[port], packed_buf,
-			      SJA1105_SIZE_HL1_AREA);
-	if (rc < 0)
-		return rc;
-
-	sja1105_port_status_hl1_unpack(packed_buf, status);
-
-	return 0;
-}
-
-static int sja1105_port_status_get_hl2(struct sja1105_private *priv,
-				       struct sja1105_port_status_hl2 *status,
-				       int port)
-{
-	const struct sja1105_regs *regs = priv->info->regs;
-	u8 packed_buf[SJA1105_SIZE_HL2_AREA] = {0};
-	int rc;
-
-	rc = sja1105_xfer_buf(priv, SPI_READ, regs->mac_hl2[port], packed_buf,
-			      SJA1105_SIZE_HL2_AREA);
-	if (rc < 0)
-		return rc;
-
-	sja1105_port_status_hl2_unpack(packed_buf, status);
-
-	return 0;
-}
-
-static int sja1105_port_status_get(struct sja1105_private *priv,
-				   struct sja1105_port_status *status,
-				   int port)
-{
-	int rc;
-
-	rc = sja1105_port_status_get_mac(priv, &status->mac, port);
-	if (rc < 0)
-		return rc;
-	rc = sja1105_port_status_get_hl1(priv, &status->hl1, port);
-	if (rc < 0)
-		return rc;
-	rc = sja1105_port_status_get_hl2(priv, &status->hl2, port);
-	if (rc < 0)
-		return rc;
-
-	if (priv->info->device_id == SJA1105E_DEVICE_ID ||
-	    priv->info->device_id == SJA1105T_DEVICE_ID)
-		return 0;
-
-	return sja1105pqrs_port_status_get_ether(priv, &status->ether, port);
-}
-
-static char sja1105_port_stats[][ETH_GSTRING_LEN] = {
+static const struct sja1105_port_counter sja1105_port_counters[] = {
 	/* MAC-Level Diagnostic Counters */
-	"n_runt",
-	"n_soferr",
-	"n_alignerr",
-	"n_miierr",
+	[N_RUNT] = {
+		.area = MAC,
+		.name = "n_runt",
+		.offset = 0,
+		.start = 31,
+		.end = 24,
+	},
+	[N_SOFERR] = {
+		.area = MAC,
+		.name = "n_soferr",
+		.offset = 0x0,
+		.start = 23,
+		.end = 16,
+	},
+	[N_ALIGNERR] = {
+		.area = MAC,
+		.name = "n_alignerr",
+		.offset = 0x0,
+		.start = 15,
+		.end = 8,
+	},
+	[N_MIIERR] = {
+		.area = MAC,
+		.name = "n_miierr",
+		.offset = 0x0,
+		.start = 7,
+		.end = 0,
+	},
 	/* MAC-Level Diagnostic Flags */
-	"typeerr",
-	"sizeerr",
-	"tctimeout",
-	"priorerr",
-	"nomaster",
-	"memov",
-	"memerr",
-	"invtyp",
-	"intcyov",
-	"domerr",
-	"pcfbagdrop",
-	"spcprior",
-	"ageprior",
-	"portdrop",
-	"lendrop",
-	"bagdrop",
-	"policeerr",
-	"drpnona664err",
-	"spcerr",
-	"agedrp",
+	[TYPEERR] = {
+		.area = MAC,
+		.name = "typeerr",
+		.offset = 0x1,
+		.start = 27,
+		.end = 27,
+	},
+	[SIZEERR] = {
+		.area = MAC,
+		.name = "sizeerr",
+		.offset = 0x1,
+		.start = 26,
+		.end = 26,
+	},
+	[TCTIMEOUT] = {
+		.area = MAC,
+		.name = "tctimeout",
+		.offset = 0x1,
+		.start = 25,
+		.end = 25,
+	},
+	[PRIORERR] = {
+		.area = MAC,
+		.name = "priorerr",
+		.offset = 0x1,
+		.start = 24,
+		.end = 24,
+	},
+	[NOMASTER] = {
+		.area = MAC,
+		.name = "nomaster",
+		.offset = 0x1,
+		.start = 23,
+		.end = 23,
+	},
+	[MEMOV] = {
+		.area = MAC,
+		.name = "memov",
+		.offset = 0x1,
+		.start = 22,
+		.end = 22,
+	},
+	[MEMERR] = {
+		.area = MAC,
+		.name = "memerr",
+		.offset = 0x1,
+		.start = 21,
+		.end = 21,
+	},
+	[INVTYP] = {
+		.area = MAC,
+		.name = "invtyp",
+		.offset = 0x1,
+		.start = 19,
+		.end = 19,
+	},
+	[INTCYOV] = {
+		.area = MAC,
+		.name = "intcyov",
+		.offset = 0x1,
+		.start = 18,
+		.end = 18,
+	},
+	[DOMERR] = {
+		.area = MAC,
+		.name = "domerr",
+		.offset = 0x1,
+		.start = 17,
+		.end = 17,
+	},
+	[PCFBAGDROP] = {
+		.area = MAC,
+		.name = "pcfbagdrop",
+		.offset = 0x1,
+		.start = 16,
+		.end = 16,
+	},
+	[SPCPRIOR] = {
+		.area = MAC,
+		.name = "spcprior",
+		.offset = 0x1,
+		.start = 15,
+		.end = 12,
+	},
+	[AGEPRIOR] = {
+		.area = MAC,
+		.name = "ageprior",
+		.offset = 0x1,
+		.start = 11,
+		.end = 8,
+	},
+	[PORTDROP] = {
+		.area = MAC,
+		.name = "portdrop",
+		.offset = 0x1,
+		.start = 6,
+		.end = 6,
+	},
+	[LENDROP] = {
+		.area = MAC,
+		.name = "lendrop",
+		.offset = 0x1,
+		.start = 5,
+		.end = 5,
+	},
+	[BAGDROP] = {
+		.area = MAC,
+		.name = "bagdrop",
+		.offset = 0x1,
+		.start = 4,
+		.end = 4,
+	},
+	[POLICEERR] = {
+		.area = MAC,
+		.name = "policeerr",
+		.offset = 0x1,
+		.start = 3,
+		.end = 3,
+	},
+	[DRPNONA664ERR] = {
+		.area = MAC,
+		.name = "drpnona664err",
+		.offset = 0x1,
+		.start = 2,
+		.end = 2,
+	},
+	[SPCERR] = {
+		.area = MAC,
+		.name = "spcerr",
+		.offset = 0x1,
+		.start = 1,
+		.end = 1,
+	},
+	[AGEDRP] = {
+		.area = MAC,
+		.name = "agedrp",
+		.offset = 0x1,
+		.start = 0,
+		.end = 0,
+	},
 	/* High-Level Diagnostic Counters */
-	"n_n664err",
-	"n_vlanerr",
-	"n_unreleased",
-	"n_sizeerr",
-	"n_crcerr",
-	"n_vlnotfound",
-	"n_ctpolerr",
-	"n_polerr",
-	"n_rxfrm",
-	"n_rxbyte",
-	"n_txfrm",
-	"n_txbyte",
-	"n_qfull",
-	"n_part_drop",
-	"n_egr_disabled",
-	"n_not_reach",
+	[N_N664ERR] = {
+		.area = HL1,
+		.name = "n_n664err",
+		.offset = 0xF,
+		.start = 31,
+		.end = 0,
+	},
+	[N_VLANERR] = {
+		.area = HL1,
+		.name = "n_vlanerr",
+		.offset = 0xE,
+		.start = 31,
+		.end = 0,
+	},
+	[N_UNRELEASED] = {
+		.area = HL1,
+		.name = "n_unreleased",
+		.offset = 0xD,
+		.start = 31,
+		.end = 0,
+	},
+	[N_SIZEERR] = {
+		.area = HL1,
+		.name = "n_sizeerr",
+		.offset = 0xC,
+		.start = 31,
+		.end = 0,
+	},
+	[N_CRCERR] = {
+		.area = HL1,
+		.name = "n_crcerr",
+		.offset = 0xB,
+		.start = 31,
+		.end = 0,
+	},
+	[N_VLNOTFOUND] = {
+		.area = HL1,
+		.name = "n_vlnotfound",
+		.offset = 0xA,
+		.start = 31,
+		.end = 0,
+	},
+	[N_CTPOLERR] = {
+		.area = HL1,
+		.name = "n_ctpolerr",
+		.offset = 0x9,
+		.start = 31,
+		.end = 0,
+	},
+	[N_POLERR] = {
+		.area = HL1,
+		.name = "n_polerr",
+		.offset = 0x8,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RXFRM] = {
+		.area = HL1,
+		.name = "n_rxfrm",
+		.offset = 0x6,
+		.start = 31,
+		.end = 0,
+		.is_64bit = true,
+	},
+	[N_RXBYTE] = {
+		.area = HL1,
+		.name = "n_rxbyte",
+		.offset = 0x4,
+		.start = 31,
+		.end = 0,
+		.is_64bit = true,
+	},
+	[N_TXFRM] = {
+		.area = HL1,
+		.name = "n_txfrm",
+		.offset = 0x2,
+		.start = 31,
+		.end = 0,
+		.is_64bit = true,
+	},
+	[N_TXBYTE] = {
+		.area = HL1,
+		.name = "n_txbyte",
+		.offset = 0x0,
+		.start = 31,
+		.end = 0,
+		.is_64bit = true,
+	},
+	[N_QFULL] = {
+		.area = HL2,
+		.name = "n_qfull",
+		.offset = 0x3,
+		.start = 31,
+		.end = 0,
+	},
+	[N_PART_DROP] = {
+		.area = HL2,
+		.name = "n_part_drop",
+		.offset = 0x2,
+		.start = 31,
+		.end = 0,
+	},
+	[N_EGR_DISABLED] = {
+		.area = HL2,
+		.name = "n_egr_disabled",
+		.offset = 0x1,
+		.start = 31,
+		.end = 0,
+	},
+	[N_NOT_REACH] = {
+		.area = HL2,
+		.name = "n_not_reach",
+		.offset = 0x0,
+		.start = 31,
+		.end = 0,
+	},
+	/* Ether Stats */
+	[N_DROPS_NOLEARN] = {
+		.area = ETHER,
+		.name = "n_drops_nolearn",
+		.offset = 0x16,
+		.start = 31,
+		.end = 0,
+	},
+	[N_DROPS_NOROUTE] = {
+		.area = ETHER,
+		.name = "n_drops_noroute",
+		.offset = 0x15,
+		.start = 31,
+		.end = 0,
+	},
+	[N_DROPS_ILL_DTAG] = {
+		.area = ETHER,
+		.name = "n_drops_ill_dtag",
+		.offset = 0x14,
+		.start = 31,
+		.end = 0,
+	},
+	[N_DROPS_DTAG] = {
+		.area = ETHER,
+		.name = "n_drops_dtag",
+		.offset = 0x13,
+		.start = 31,
+		.end = 0,
+	},
+	[N_DROPS_SOTAG] = {
+		.area = ETHER,
+		.name = "n_drops_sotag",
+		.offset = 0x12,
+		.start = 31,
+		.end = 0,
+	},
+	[N_DROPS_SITAG] = {
+		.area = ETHER,
+		.name = "n_drops_sitag",
+		.offset = 0x11,
+		.start = 31,
+		.end = 0,
+	},
+	[N_DROPS_UTAG] = {
+		.area = ETHER,
+		.name = "n_drops_utag",
+		.offset = 0x10,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_BYTES_1024_2047] = {
+		.area = ETHER,
+		.name = "n_tx_bytes_1024_2047",
+		.offset = 0x0F,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_BYTES_512_1023] = {
+		.area = ETHER,
+		.name = "n_tx_bytes_512_1023",
+		.offset = 0x0E,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_BYTES_256_511] = {
+		.area = ETHER,
+		.name = "n_tx_bytes_256_511",
+		.offset = 0x0D,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_BYTES_128_255] = {
+		.area = ETHER,
+		.name = "n_tx_bytes_128_255",
+		.offset = 0x0C,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_BYTES_65_127] = {
+		.area = ETHER,
+		.name = "n_tx_bytes_65_127",
+		.offset = 0x0B,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_BYTES_64] = {
+		.area = ETHER,
+		.name = "n_tx_bytes_64",
+		.offset = 0x0A,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_MCAST] = {
+		.area = ETHER,
+		.name = "n_tx_mcast",
+		.offset = 0x09,
+		.start = 31,
+		.end = 0,
+	},
+	[N_TX_BCAST] = {
+		.area = ETHER,
+		.name = "n_tx_bcast",
+		.offset = 0x08,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_BYTES_1024_2047] = {
+		.area = ETHER,
+		.name = "n_rx_bytes_1024_2047",
+		.offset = 0x07,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_BYTES_512_1023] = {
+		.area = ETHER,
+		.name = "n_rx_bytes_512_1023",
+		.offset = 0x06,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_BYTES_256_511] = {
+		.area = ETHER,
+		.name = "n_rx_bytes_256_511",
+		.offset = 0x05,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_BYTES_128_255] = {
+		.area = ETHER,
+		.name = "n_rx_bytes_128_255",
+		.offset = 0x04,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_BYTES_65_127] = {
+		.area = ETHER,
+		.name = "n_rx_bytes_65_127",
+		.offset = 0x03,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_BYTES_64] = {
+		.area = ETHER,
+		.name = "n_rx_bytes_64",
+		.offset = 0x02,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_MCAST] = {
+		.area = ETHER,
+		.name = "n_rx_mcast",
+		.offset = 0x01,
+		.start = 31,
+		.end = 0,
+	},
+	[N_RX_BCAST] = {
+		.area = ETHER,
+		.name = "n_rx_bcast",
+		.offset = 0x00,
+		.start = 31,
+		.end = 0,
+	},
 };
 
-static char sja1105pqrs_extra_port_stats[][ETH_GSTRING_LEN] = {
-	/* Ether Stats */
-	"n_drops_nolearn",
-	"n_drops_noroute",
-	"n_drops_ill_dtag",
-	"n_drops_dtag",
-	"n_drops_sotag",
-	"n_drops_sitag",
-	"n_drops_utag",
-	"n_tx_bytes_1024_2047",
-	"n_tx_bytes_512_1023",
-	"n_tx_bytes_256_511",
-	"n_tx_bytes_128_255",
-	"n_tx_bytes_65_127",
-	"n_tx_bytes_64",
-	"n_tx_mcast",
-	"n_tx_bcast",
-	"n_rx_bytes_1024_2047",
-	"n_rx_bytes_512_1023",
-	"n_rx_bytes_256_511",
-	"n_rx_bytes_128_255",
-	"n_rx_bytes_65_127",
-	"n_rx_bytes_64",
-	"n_rx_mcast",
-	"n_rx_bcast",
-};
+static int sja1105_port_counter_read(struct sja1105_private *priv, int port,
+				     enum sja1105_counter_index idx, u64 *ctr)
+{
+	const struct sja1105_port_counter *c = &sja1105_port_counters[idx];
+	size_t size = c->is_64bit ? 8 : 4;
+	u8 buf[8] = {0};
+	u64 regs;
+	int rc;
+
+	regs = priv->info->regs->stats[c->area][port];
+
+	rc = sja1105_xfer_buf(priv, SPI_READ, regs + c->offset, buf, size);
+	if (rc)
+		return rc;
+
+	sja1105_unpack(buf, ctr, c->start, c->end, size);
+
+	return 0;
+}
 
 void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data)
 {
 	struct sja1105_private *priv = ds->priv;
-	struct sja1105_port_status *status;
+	enum sja1105_counter_index max_ctr, i;
 	int rc, k = 0;
 
-	status = kzalloc(sizeof(*status), GFP_KERNEL);
-	if (!status)
-		goto out;
-
-	rc = sja1105_port_status_get(priv, status, port);
-	if (rc < 0) {
-		dev_err(ds->dev, "Failed to read port %d counters: %d\n",
-			port, rc);
-		goto out;
-	}
-	memset(data, 0, ARRAY_SIZE(sja1105_port_stats) * sizeof(u64));
-	data[k++] = status->mac.n_runt;
-	data[k++] = status->mac.n_soferr;
-	data[k++] = status->mac.n_alignerr;
-	data[k++] = status->mac.n_miierr;
-	data[k++] = status->mac.typeerr;
-	data[k++] = status->mac.sizeerr;
-	data[k++] = status->mac.tctimeout;
-	data[k++] = status->mac.priorerr;
-	data[k++] = status->mac.nomaster;
-	data[k++] = status->mac.memov;
-	data[k++] = status->mac.memerr;
-	data[k++] = status->mac.invtyp;
-	data[k++] = status->mac.intcyov;
-	data[k++] = status->mac.domerr;
-	data[k++] = status->mac.pcfbagdrop;
-	data[k++] = status->mac.spcprior;
-	data[k++] = status->mac.ageprior;
-	data[k++] = status->mac.portdrop;
-	data[k++] = status->mac.lendrop;
-	data[k++] = status->mac.bagdrop;
-	data[k++] = status->mac.policeerr;
-	data[k++] = status->mac.drpnona664err;
-	data[k++] = status->mac.spcerr;
-	data[k++] = status->mac.agedrp;
-	data[k++] = status->hl1.n_n664err;
-	data[k++] = status->hl1.n_vlanerr;
-	data[k++] = status->hl1.n_unreleased;
-	data[k++] = status->hl1.n_sizeerr;
-	data[k++] = status->hl1.n_crcerr;
-	data[k++] = status->hl1.n_vlnotfound;
-	data[k++] = status->hl1.n_ctpolerr;
-	data[k++] = status->hl1.n_polerr;
-	data[k++] = status->hl1.n_rxfrm;
-	data[k++] = status->hl1.n_rxbyte;
-	data[k++] = status->hl1.n_txfrm;
-	data[k++] = status->hl1.n_txbyte;
-	data[k++] = status->hl2.n_qfull;
-	data[k++] = status->hl2.n_part_drop;
-	data[k++] = status->hl2.n_egr_disabled;
-	data[k++] = status->hl2.n_not_reach;
-
 	if (priv->info->device_id == SJA1105E_DEVICE_ID ||
 	    priv->info->device_id == SJA1105T_DEVICE_ID)
-		goto out;
+		max_ctr = __MAX_SJA1105ET_PORT_COUNTER;
+	else
+		max_ctr = __MAX_SJA1105PQRS_PORT_COUNTER;
 
-	memset(data + k, 0, ARRAY_SIZE(sja1105pqrs_extra_port_stats) *
-			sizeof(u64));
-	data[k++] = status->ether.n_drops_nolearn;
-	data[k++] = status->ether.n_drops_noroute;
-	data[k++] = status->ether.n_drops_ill_dtag;
-	data[k++] = status->ether.n_drops_dtag;
-	data[k++] = status->ether.n_drops_sotag;
-	data[k++] = status->ether.n_drops_sitag;
-	data[k++] = status->ether.n_drops_utag;
-	data[k++] = status->ether.n_tx_bytes_1024_2047;
-	data[k++] = status->ether.n_tx_bytes_512_1023;
-	data[k++] = status->ether.n_tx_bytes_256_511;
-	data[k++] = status->ether.n_tx_bytes_128_255;
-	data[k++] = status->ether.n_tx_bytes_65_127;
-	data[k++] = status->ether.n_tx_bytes_64;
-	data[k++] = status->ether.n_tx_mcast;
-	data[k++] = status->ether.n_tx_bcast;
-	data[k++] = status->ether.n_rx_bytes_1024_2047;
-	data[k++] = status->ether.n_rx_bytes_512_1023;
-	data[k++] = status->ether.n_rx_bytes_256_511;
-	data[k++] = status->ether.n_rx_bytes_128_255;
-	data[k++] = status->ether.n_rx_bytes_65_127;
-	data[k++] = status->ether.n_rx_bytes_64;
-	data[k++] = status->ether.n_rx_mcast;
-	data[k++] = status->ether.n_rx_bcast;
-out:
-	kfree(status);
+	for (i = 0; i < max_ctr; i++) {
+		rc = sja1105_port_counter_read(priv, port, i, &data[k++]);
+		if (rc) {
+			dev_err(ds->dev,
+				"Failed to read port %d counters: %d\n",
+				port, rc);
+			break;
+		}
+	}
 }
 
 void sja1105_get_strings(struct dsa_switch *ds, int port,
 			 u32 stringset, u8 *data)
 {
 	struct sja1105_private *priv = ds->priv;
-	u8 *p = data;
-	int i;
+	enum sja1105_counter_index max_ctr, i;
+	char *p = data;
 
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(sja1105_port_stats); i++) {
-			strlcpy(p, sja1105_port_stats[i], ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		if (priv->info->device_id == SJA1105E_DEVICE_ID ||
-		    priv->info->device_id == SJA1105T_DEVICE_ID)
-			return;
-		for (i = 0; i < ARRAY_SIZE(sja1105pqrs_extra_port_stats); i++) {
-			strlcpy(p, sja1105pqrs_extra_port_stats[i],
-				ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		break;
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	if (priv->info->device_id == SJA1105E_DEVICE_ID ||
+	    priv->info->device_id == SJA1105T_DEVICE_ID)
+		max_ctr = __MAX_SJA1105ET_PORT_COUNTER;
+	else
+		max_ctr = __MAX_SJA1105PQRS_PORT_COUNTER;
+
+	for (i = 0; i < max_ctr; i++) {
+		strscpy(p, sja1105_port_counters[i].name, ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
 	}
 }
 
 int sja1105_get_sset_count(struct dsa_switch *ds, int port, int sset)
 {
-	int count = ARRAY_SIZE(sja1105_port_stats);
 	struct sja1105_private *priv = ds->priv;
+	enum sja1105_counter_index max_ctr, i;
+	int sset_count = 0;
 
 	if (sset != ETH_SS_STATS)
 		return -EOPNOTSUPP;
 
-	if (priv->info->device_id == SJA1105PR_DEVICE_ID ||
-	    priv->info->device_id == SJA1105QS_DEVICE_ID)
-		count += ARRAY_SIZE(sja1105pqrs_extra_port_stats);
+	if (priv->info->device_id == SJA1105E_DEVICE_ID ||
+	    priv->info->device_id == SJA1105T_DEVICE_ID)
+		max_ctr = __MAX_SJA1105ET_PORT_COUNTER;
+	else
+		max_ctr = __MAX_SJA1105PQRS_PORT_COUNTER;
 
-	return count;
+	for (i = 0; i < max_ctr; i++) {
+		if (!strlen(sja1105_port_counters[i].name))
+			continue;
+
+		sset_count++;
+	}
+
+	return sset_count;
 }
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 52d53e737c68..df3a780e9dcc 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -416,9 +416,9 @@ static struct sja1105_regs sja1105et_regs = {
 	.pad_mii_rx = {0x100801, 0x100803, 0x100805, 0x100807, 0x100809},
 	.rmii_pll1 = 0x10000A,
 	.cgu_idiv = {0x10000B, 0x10000C, 0x10000D, 0x10000E, 0x10000F},
-	.mac = {0x200, 0x202, 0x204, 0x206, 0x208},
-	.mac_hl1 = {0x400, 0x410, 0x420, 0x430, 0x440},
-	.mac_hl2 = {0x600, 0x610, 0x620, 0x630, 0x640},
+	.stats[MAC] = {0x200, 0x202, 0x204, 0x206, 0x208},
+	.stats[HL1] = {0x400, 0x410, 0x420, 0x430, 0x440},
+	.stats[HL2] = {0x600, 0x610, 0x620, 0x630, 0x640},
 	/* UM10944.pdf, Table 78, CGU Register overview */
 	.mii_tx_clk = {0x100013, 0x10001A, 0x100021, 0x100028, 0x10002F},
 	.mii_rx_clk = {0x100014, 0x10001B, 0x100022, 0x100029, 0x100030},
@@ -452,10 +452,10 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.sgmii = 0x1F0000,
 	.rmii_pll1 = 0x10000A,
 	.cgu_idiv = {0x10000B, 0x10000C, 0x10000D, 0x10000E, 0x10000F},
-	.mac = {0x200, 0x202, 0x204, 0x206, 0x208},
-	.mac_hl1 = {0x400, 0x410, 0x420, 0x430, 0x440},
-	.mac_hl2 = {0x600, 0x610, 0x620, 0x630, 0x640},
-	.ether_stats = {0x1400, 0x1418, 0x1430, 0x1448, 0x1460},
+	.stats[MAC] = {0x200, 0x202, 0x204, 0x206, 0x208},
+	.stats[HL1] = {0x400, 0x410, 0x420, 0x430, 0x440},
+	.stats[HL2] = {0x600, 0x610, 0x620, 0x630, 0x640},
+	.stats[ETHER] = {0x1400, 0x1418, 0x1430, 0x1448, 0x1460},
 	/* UM11040.pdf, Table 114 */
 	.mii_tx_clk = {0x100013, 0x100019, 0x10001F, 0x100025, 0x10002B},
 	.mii_rx_clk = {0x100014, 0x10001A, 0x100020, 0x100026, 0x10002C},

From b193f2ed533f6ddffe947327dcf2e76d8beb72a4 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 21 May 2021 16:25:29 +0300
Subject: [PATCH 0451/3292] dpaa2-eth: setup the of_node field of the device

When the DPNI object is connected to a DPMAC, setup the of_node to point
to the DTS device node of that specific MAC. This enables other drivers,
for example the DSA subsystem, to find the net_device by its device
node.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-mac.c  | 25 ++++++++++---------
 .../net/ethernet/freescale/dpaa2/dpaa2-mac.h  |  1 +
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index ccaf7e35abeb..4dfadf2b70d6 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -289,17 +289,15 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 
 	mac->if_link_type = mac->attr.link_type;
 
-	dpmac_node = dpaa2_mac_get_node(mac->attr.id);
+	dpmac_node = mac->of_node;
 	if (!dpmac_node) {
 		netdev_err(net_dev, "No dpmac@%d node found.\n", mac->attr.id);
 		return -ENODEV;
 	}
 
 	err = dpaa2_mac_get_if_mode(dpmac_node, mac->attr);
-	if (err < 0) {
-		err = -EINVAL;
-		goto err_put_node;
-	}
+	if (err < 0)
+		return -EINVAL;
 	mac->if_mode = err;
 
 	/* The MAC does not have the capability to add RGMII delays so
@@ -311,8 +309,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 	     mac->if_mode == PHY_INTERFACE_MODE_RGMII_RXID ||
 	     mac->if_mode == PHY_INTERFACE_MODE_RGMII_TXID)) {
 		netdev_err(net_dev, "RGMII delay not supported\n");
-		err = -EINVAL;
-		goto err_put_node;
+		return -EINVAL;
 	}
 
 	if ((mac->attr.link_type == DPMAC_LINK_TYPE_PHY &&
@@ -320,7 +317,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 	    mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE) {
 		err = dpaa2_pcs_create(mac, dpmac_node, mac->attr.id);
 		if (err)
-			goto err_put_node;
+			return err;
 	}
 
 	mac->phylink_config.dev = &net_dev->dev;
@@ -344,16 +341,12 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 		goto err_phylink_destroy;
 	}
 
-	of_node_put(dpmac_node);
-
 	return 0;
 
 err_phylink_destroy:
 	phylink_destroy(mac->phylink);
 err_pcs_destroy:
 	dpaa2_pcs_destroy(mac);
-err_put_node:
-	of_node_put(dpmac_node);
 
 	return err;
 }
@@ -388,6 +381,12 @@ int dpaa2_mac_open(struct dpaa2_mac *mac)
 		goto err_close_dpmac;
 	}
 
+	/* Find the device node representing the MAC device and link the device
+	 * behind the associated netdev to it.
+	 */
+	mac->of_node = dpaa2_mac_get_node(mac->attr.id);
+	net_dev->dev.of_node = mac->of_node;
+
 	return 0;
 
 err_close_dpmac:
@@ -400,6 +399,8 @@ void dpaa2_mac_close(struct dpaa2_mac *mac)
 	struct fsl_mc_device *dpmac_dev = mac->mc_dev;
 
 	dpmac_close(mac->mc_io, 0, dpmac_dev->mc_handle);
+	if (mac->of_node)
+		of_node_put(mac->of_node);
 }
 
 static char dpaa2_mac_ethtool_stats[][ETH_GSTRING_LEN] = {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
index 13d42dd58ec9..8ebcb3420d02 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
@@ -24,6 +24,7 @@ struct dpaa2_mac {
 	phy_interface_t if_mode;
 	enum dpmac_link_type if_link_type;
 	struct lynx_pcs *pcs;
+	struct device_node *of_node;
 };
 
 bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,

From 30f43d6f1cab2eae349a27bd8a0faa795673a97f Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 21 May 2021 16:25:30 +0300
Subject: [PATCH 0452/3292] dpaa2-eth: name the debugfs directory after the
 DPNI object

Name the debugfs directory after the DPNI object instead of the netdev
name since this can be changed after probe by udev rules.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
index b87db0846e10..8356af4631fd 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
@@ -121,10 +121,14 @@ DEFINE_SHOW_ATTRIBUTE(dpaa2_dbg_ch);
 
 void dpaa2_dbg_add(struct dpaa2_eth_priv *priv)
 {
+	struct fsl_mc_device *dpni_dev;
 	struct dentry *dir;
+	char name[10];
 
 	/* Create a directory for the interface */
-	dir = debugfs_create_dir(priv->net_dev->name, dpaa2_dbg_root);
+	dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
+	snprintf(name, 10, "dpni.%d", dpni_dev->obj_desc.id);
+	dir = debugfs_create_dir(name, dpaa2_dbg_root);
 	priv->dbg.dir = dir;
 
 	/* per-cpu stats file */

From f5120f5998803a973b1d432ed2aa7e592527aa46 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 21 May 2021 17:12:20 +0300
Subject: [PATCH 0453/3292] dpaa2-eth: don't print error from dpaa2_mac_connect
 if that's EPROBE_DEFER

When booting a board with DPAA2 interfaces defined statically via DPL
(as opposed to creating them dynamically using restool), the driver will
print an unspecific error message.

This change adds the error code to the message, and avoids printing
altogether if the error code is EPROBE_DEFER, because that is not a
cause of alarm.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index e0c3c58e2ac7..8433aa730c42 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -4164,10 +4164,11 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
 
 	if (dpaa2_eth_is_type_phy(priv)) {
 		err = dpaa2_mac_connect(mac);
-		if (err) {
-			netdev_err(priv->net_dev, "Error connecting to the MAC endpoint\n");
+		if (err && err != -EPROBE_DEFER)
+			netdev_err(priv->net_dev, "Error connecting to the MAC endpoint: %pe",
+				   ERR_PTR(err));
+		if (err)
 			goto err_close_mac;
-		}
 	}
 
 	return 0;

From 52af13a41489d7bbc1932d17583eff6e5fffc820 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 22 May 2021 20:02:46 +0800
Subject: [PATCH 0454/3292] net: ftgmac100: add missing error return code in
 ftgmac100_probe()

The variables will be free on path err_phy_connect, it should
return error code, or it will cause double free when calling
ftgmac100_remove().

Fixes: bd466c3fb5a4 ("net/faraday: Support NCSI mode")
Fixes: 39bfab8844a0 ("net: ftgmac100: Add support for DT phy-handle property")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 04421aec2dfd..11dbbfd38770 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1830,14 +1830,17 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	if (np && of_get_property(np, "use-ncsi", NULL)) {
 		if (!IS_ENABLED(CONFIG_NET_NCSI)) {
 			dev_err(&pdev->dev, "NCSI stack not enabled\n");
+			err = -EINVAL;
 			goto err_phy_connect;
 		}
 
 		dev_info(&pdev->dev, "Using NCSI interface\n");
 		priv->use_ncsi = true;
 		priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler);
-		if (!priv->ndev)
+		if (!priv->ndev) {
+			err = -EINVAL;
 			goto err_phy_connect;
+		}
 	} else if (np && of_get_property(np, "phy-handle", NULL)) {
 		struct phy_device *phy;
 
@@ -1856,6 +1859,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 					     &ftgmac100_adjust_link);
 		if (!phy) {
 			dev_err(&pdev->dev, "Failed to connect to phy\n");
+			err = -EINVAL;
 			goto err_phy_connect;
 		}
 

From 3880fc37beba5d6878ef4c8d57f21683974a211b Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 11:20:30 +0800
Subject: [PATCH 0455/3292] sfc: use DEVICE_ATTR_*() macro

Use DEVICE_ATTR_*() helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 17 ++++++++---------
 drivers/net/ethernet/sfc/efx.c        |  6 +++---
 drivers/net/ethernet/sfc/efx_common.c | 12 +++++++-----
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index c3f35da1b82a..d597c89f00ed 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -370,9 +370,9 @@ static int efx_ef10_get_mac_address_vf(struct efx_nic *efx, u8 *mac_address)
 	return 0;
 }
 
-static ssize_t efx_ef10_show_link_control_flag(struct device *dev,
-					       struct device_attribute *attr,
-					       char *buf)
+static ssize_t link_control_flag_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
 {
 	struct efx_nic *efx = dev_get_drvdata(dev);
 
@@ -382,9 +382,9 @@ static ssize_t efx_ef10_show_link_control_flag(struct device *dev,
 		       ? 1 : 0);
 }
 
-static ssize_t efx_ef10_show_primary_flag(struct device *dev,
-					  struct device_attribute *attr,
-					  char *buf)
+static ssize_t primary_flag_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
 {
 	struct efx_nic *efx = dev_get_drvdata(dev);
 
@@ -519,9 +519,8 @@ static void efx_ef10_cleanup_vlans(struct efx_nic *efx)
 	mutex_unlock(&nic_data->vlan_lock);
 }
 
-static DEVICE_ATTR(link_control_flag, 0444, efx_ef10_show_link_control_flag,
-		   NULL);
-static DEVICE_ATTR(primary_flag, 0444, efx_ef10_show_primary_flag, NULL);
+static DEVICE_ATTR_RO(link_control_flag);
+static DEVICE_ATTR_RO(primary_flag);
 
 static int efx_ef10_probe(struct efx_nic *efx)
 {
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 4fd9903ffe98..37fcf2eb0741 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -689,13 +689,13 @@ static struct notifier_block efx_netdev_notifier = {
 	.notifier_call = efx_netdev_event,
 };
 
-static ssize_t
-show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t phy_type_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct efx_nic *efx = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", efx->phy_type);
 }
-static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
+static DEVICE_ATTR_RO(phy_type);
 
 static int efx_register_netdev(struct efx_nic *efx)
 {
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index de797e1ac5a9..896b59253197 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -1160,8 +1160,9 @@ void efx_fini_io(struct efx_nic *efx)
 }
 
 #ifdef CONFIG_SFC_MCDI_LOGGING
-static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
-			     char *buf)
+static ssize_t mcdi_logging_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
 {
 	struct efx_nic *efx = dev_get_drvdata(dev);
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
@@ -1169,8 +1170,9 @@ static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
 	return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
 }
 
-static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
+static ssize_t mcdi_logging_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
 	struct efx_nic *efx = dev_get_drvdata(dev);
 	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
@@ -1180,7 +1182,7 @@ static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
+static DEVICE_ATTR_RW(mcdi_logging);
 
 void efx_init_mcdi_logging(struct efx_nic *efx)
 {

From 4934fb7dc409c2b14dc49a1f6d9024bec87431a4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 11:24:09 +0800
Subject: [PATCH 0456/3292] sfc: falcon: use DEVICE_ATTR_*() macro

Use DEVICE_ATTR_*() helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/falcon/efx.c           |  4 ++--
 drivers/net/ethernet/sfc/falcon/falcon_boards.c | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 5e7a57b680ca..9ec752a43c75 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -2254,12 +2254,12 @@ static struct notifier_block ef4_netdev_notifier = {
 };
 
 static ssize_t
-show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
+phy_type_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct ef4_nic *efx = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", efx->phy_type);
 }
-static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
+static DEVICE_ATTR_RO(phy_type);
 
 static int ef4_register_netdev(struct ef4_nic *efx)
 {
diff --git a/drivers/net/ethernet/sfc/falcon/falcon_boards.c b/drivers/net/ethernet/sfc/falcon/falcon_boards.c
index 729a05c1b0cf..2d2d8099011e 100644
--- a/drivers/net/ethernet/sfc/falcon/falcon_boards.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon_boards.c
@@ -354,16 +354,16 @@ fail_on:
 	return rc;
 }
 
-static ssize_t show_phy_flash_cfg(struct device *dev,
+static ssize_t phy_flash_cfg_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
 	struct ef4_nic *efx = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", !!(efx->phy_mode & PHY_MODE_SPECIAL));
 }
 
-static ssize_t set_phy_flash_cfg(struct device *dev,
-				 struct device_attribute *attr,
-				 const char *buf, size_t count)
+static ssize_t phy_flash_cfg_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
 {
 	struct ef4_nic *efx = dev_get_drvdata(dev);
 	enum ef4_phy_mode old_mode, new_mode;
@@ -396,7 +396,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev,
 	return err ? err : count;
 }
 
-static DEVICE_ATTR(phy_flash_cfg, 0644, show_phy_flash_cfg, set_phy_flash_cfg);
+static DEVICE_ATTR_RW(phy_flash_cfg);
 
 static void sfe4001_fini(struct ef4_nic *efx)
 {

From 0056982f093d6d5f12c43855754a0933b654778d Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 14:02:23 +0800
Subject: [PATCH 0457/3292] ehea: Use DEVICE_ATTR_*() macro

Use DEVICE_ATTR_*() helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ehea/ehea_main.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index ea55314b209d..8fddce769c14 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2867,14 +2867,14 @@ out:
 	return ret;
 }
 
-static ssize_t ehea_show_port_id(struct device *dev,
-				 struct device_attribute *attr, char *buf)
+static ssize_t log_port_id_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	struct ehea_port *port = container_of(dev, struct ehea_port, ofdev.dev);
 	return sprintf(buf, "%d", port->logical_port_id);
 }
 
-static DEVICE_ATTR(log_port_id, 0444, ehea_show_port_id, NULL);
+static DEVICE_ATTR_RO(log_port_id);
 
 static void logical_port_release(struct device *dev)
 {
@@ -3113,7 +3113,7 @@ static struct device_node *ehea_get_eth_dn(struct ehea_adapter *adapter,
 	return NULL;
 }
 
-static ssize_t ehea_probe_port(struct device *dev,
+static ssize_t probe_port_store(struct device *dev,
 			       struct device_attribute *attr,
 			       const char *buf, size_t count)
 {
@@ -3168,9 +3168,9 @@ static ssize_t ehea_probe_port(struct device *dev,
 	return (ssize_t) count;
 }
 
-static ssize_t ehea_remove_port(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
+static ssize_t remove_port_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
 {
 	struct ehea_adapter *adapter = dev_get_drvdata(dev);
 	struct ehea_port *port;
@@ -3203,8 +3203,8 @@ static ssize_t ehea_remove_port(struct device *dev,
 	return (ssize_t) count;
 }
 
-static DEVICE_ATTR(probe_port, 0200, NULL, ehea_probe_port);
-static DEVICE_ATTR(remove_port, 0200, NULL, ehea_remove_port);
+static DEVICE_ATTR_WO(probe_port);
+static DEVICE_ATTR_WO(remove_port);
 
 static int ehea_create_device_sysfs(struct platform_device *dev)
 {

From 190e6e291a4c5b9602817cffeacfd577f805e563 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 23 May 2021 08:54:10 -0700
Subject: [PATCH 0458/3292] net: r6040: Use logical or for MDIO operations

This is not a functional change, but we should be using a logical or to
assign the bits we will be writing to the MDIO read and write registers.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rdc/r6040.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 7c74318620b1..aff68e2cb700 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -200,7 +200,7 @@ static int r6040_phy_read(void __iomem *ioaddr, int phy_addr, int reg)
 	int limit = MAC_DEF_TIMEOUT;
 	u16 cmd;
 
-	iowrite16(MDIO_READ + reg + (phy_addr << 8), ioaddr + MMDIO);
+	iowrite16(MDIO_READ | reg | (phy_addr << 8), ioaddr + MMDIO);
 	/* Wait for the read bit to be cleared */
 	while (limit--) {
 		cmd = ioread16(ioaddr + MMDIO);
@@ -224,7 +224,7 @@ static int r6040_phy_write(void __iomem *ioaddr,
 
 	iowrite16(val, ioaddr + MMWD);
 	/* Write the command to the MDIO bus */
-	iowrite16(MDIO_WRITE + reg + (phy_addr << 8), ioaddr + MMDIO);
+	iowrite16(MDIO_WRITE | reg | (phy_addr << 8), ioaddr + MMDIO);
 	/* Wait for the write bit to be cleared */
 	while (limit--) {
 		cmd = ioread16(ioaddr + MMDIO);

From 06666907a38acdd07058014944d041856a11ebe7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 23 May 2021 08:54:11 -0700
Subject: [PATCH 0459/3292] net: r6040: Use ETH_FCS_LEN

Instead of the open coded constant 4.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rdc/r6040.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index aff68e2cb700..ef78c2424668 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -544,7 +544,7 @@ static int r6040_rx(struct net_device *dev, int limit)
 		skb_ptr->dev = priv->dev;
 
 		/* Do not count the CRC */
-		skb_put(skb_ptr, descptr->len - 4);
+		skb_put(skb_ptr, descptr->len - ETH_FCS_LEN);
 		dma_unmap_single(&priv->pdev->dev, le32_to_cpu(descptr->buf),
 				 MAX_BUF_SIZE, DMA_FROM_DEVICE);
 		skb_ptr->protocol = eth_type_trans(skb_ptr, priv->dev);
@@ -552,7 +552,7 @@ static int r6040_rx(struct net_device *dev, int limit)
 		/* Send to upper layer */
 		netif_receive_skb(skb_ptr);
 		dev->stats.rx_packets++;
-		dev->stats.rx_bytes += descptr->len - 4;
+		dev->stats.rx_bytes += descptr->len - ETH_FCS_LEN;
 
 		/* put new skb into descriptor */
 		descptr->skb_ptr = new_skb;

From 5d6c3d91ad722bad16346bcd3e839c7649e5dca4 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 23 May 2021 08:58:42 -0700
Subject: [PATCH 0460/3292] net: r6040: Allow restarting auto-negotiation

Use phy_ethtool_nway_reset() since the driver makes use of the PHY
library.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rdc/r6040.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index ef78c2424668..47e9998b62f0 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -943,6 +943,7 @@ static const struct ethtool_ops netdev_ethtool_ops = {
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
+	.nway_reset		= phy_ethtool_nway_reset,
 };
 
 static const struct net_device_ops r6040_netdev_ops = {

From ec7d6dd870d421a853ffa692d4bce5783a519342 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 24 May 2021 09:07:01 +0800
Subject: [PATCH 0461/3292] ethernet: ucc_geth: Use kmemdup() rather than
 kmalloc+memcpy

Issue identified with Coccinelle.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/ucc_geth.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index e0936510fa34..0acfafb73db1 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3590,10 +3590,9 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	if ((ucc_num < 0) || (ucc_num > 7))
 		return -ENODEV;
 
-	ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL);
+	ug_info = kmemdup(&ugeth_primary_info, sizeof(*ug_info), GFP_KERNEL);
 	if (ug_info == NULL)
 		return -ENOMEM;
-	memcpy(ug_info, &ugeth_primary_info, sizeof(*ug_info));
 
 	ug_info->uf_info.ucc_num = ucc_num;
 

From 17818dfa8f2e90a6f40e047a3ea9c39af1a8a87d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Apr 2021 14:41:33 +0100
Subject: [PATCH 0462/3292] ath10k/ath11k: fix spelling mistake "requed" ->
 "requeued"

There are multiple occurrances of the misspelling of requeued in
the drivers with symbol names and debug text. Fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210423134133.339751-1-colin.king@canonical.com
---
 drivers/net/wireless/ath/ath10k/core.h              | 2 +-
 drivers/net/wireless/ath/ath10k/debug.c             | 4 ++--
 drivers/net/wireless/ath/ath10k/htt.h               | 4 ++--
 drivers/net/wireless/ath/ath10k/wmi.c               | 6 +++---
 drivers/net/wireless/ath/ath10k/wmi.h               | 8 ++++----
 drivers/net/wireless/ath/ath11k/core.h              | 4 ++--
 drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c | 2 +-
 drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h | 2 +-
 drivers/net/wireless/ath/ath11k/wmi.c               | 4 ++--
 drivers/net/wireless/ath/ath11k/wmi.h               | 4 ++--
 10 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 648ed36f845f..5aeff2d9f6cf 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -301,7 +301,7 @@ struct ath10k_fw_stats_pdev {
 	s32 underrun;
 	u32 hw_paused;
 	s32 tx_abort;
-	s32 mpdus_requed;
+	s32 mpdus_requeued;
 	u32 tx_ko;
 	u32 data_rc;
 	u32 self_triggers;
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index fd052f6ed019..39378e3f9b2b 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -1105,7 +1105,7 @@ static const char ath10k_gstrings_stats[][ETH_GSTRING_LEN] = {
 	"d_tx_ppdu_reaped",
 	"d_tx_fifo_underrun",
 	"d_tx_ppdu_abort",
-	"d_tx_mpdu_requed",
+	"d_tx_mpdu_requeued",
 	"d_tx_excessive_retries",
 	"d_tx_hw_rate",
 	"d_tx_dropped_sw_retries",
@@ -1205,7 +1205,7 @@ void ath10k_debug_get_et_stats(struct ieee80211_hw *hw,
 	data[i++] = pdev_stats->hw_reaped;
 	data[i++] = pdev_stats->underrun;
 	data[i++] = pdev_stats->tx_abort;
-	data[i++] = pdev_stats->mpdus_requed;
+	data[i++] = pdev_stats->mpdus_requeued;
 	data[i++] = pdev_stats->tx_ko;
 	data[i++] = pdev_stats->data_rc;
 	data[i++] = pdev_stats->sw_retry_failure;
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 956157946106..4e11ee775b4d 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -1282,8 +1282,8 @@ struct htt_dbg_stats_wal_tx_stats {
 	/* Num PPDUs cleaned up in TX abort */
 	__le32 tx_abort;
 
-	/* Num MPDUs requed by SW */
-	__le32 mpdus_requed;
+	/* Num MPDUs requeued by SW */
+	__le32 mpdus_requeued;
 
 	/* excessive retries */
 	__le32 tx_ko;
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index d48b922215eb..f42bf2c8f9e7 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -2867,7 +2867,7 @@ void ath10k_wmi_pull_pdev_stats_tx(const struct wmi_pdev_stats_tx *src,
 	dst->hw_reaped = __le32_to_cpu(src->hw_reaped);
 	dst->underrun = __le32_to_cpu(src->underrun);
 	dst->tx_abort = __le32_to_cpu(src->tx_abort);
-	dst->mpdus_requed = __le32_to_cpu(src->mpdus_requed);
+	dst->mpdus_requeued = __le32_to_cpu(src->mpdus_requeued);
 	dst->tx_ko = __le32_to_cpu(src->tx_ko);
 	dst->data_rc = __le32_to_cpu(src->data_rc);
 	dst->self_triggers = __le32_to_cpu(src->self_triggers);
@@ -2895,7 +2895,7 @@ ath10k_wmi_10_4_pull_pdev_stats_tx(const struct wmi_10_4_pdev_stats_tx *src,
 	dst->hw_reaped = __le32_to_cpu(src->hw_reaped);
 	dst->underrun = __le32_to_cpu(src->underrun);
 	dst->tx_abort = __le32_to_cpu(src->tx_abort);
-	dst->mpdus_requed = __le32_to_cpu(src->mpdus_requed);
+	dst->mpdus_requeued = __le32_to_cpu(src->mpdus_requeued);
 	dst->tx_ko = __le32_to_cpu(src->tx_ko);
 	dst->data_rc = __le32_to_cpu(src->data_rc);
 	dst->self_triggers = __le32_to_cpu(src->self_triggers);
@@ -8270,7 +8270,7 @@ ath10k_wmi_fw_pdev_tx_stats_fill(const struct ath10k_fw_stats_pdev *pdev,
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
 			 "PPDUs cleaned", pdev->tx_abort);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
-			 "MPDUs requed", pdev->mpdus_requed);
+			 "MPDUs requeued", pdev->mpdus_requeued);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
 			 "Excessive retries", pdev->tx_ko);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index d870f7067cb7..dd980c81793e 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -4371,8 +4371,8 @@ struct wmi_pdev_stats_tx {
 	/* Num PPDUs cleaned up in TX abort */
 	__le32 tx_abort;
 
-	/* Num MPDUs requed by SW */
-	__le32 mpdus_requed;
+	/* Num MPDUs requeued by SW */
+	__le32 mpdus_requeued;
 
 	/* excessive retries */
 	__le32 tx_ko;
@@ -4444,8 +4444,8 @@ struct wmi_10_4_pdev_stats_tx {
 	/* Num PPDUs cleaned up in TX abort */
 	__le32 tx_abort;
 
-	/* Num MPDUs requed by SW */
-	__le32 mpdus_requed;
+	/* Num MPDUs requeued by SW */
+	__le32 mpdus_requeued;
 
 	/* excessive retries */
 	__le32 tx_ko;
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 55af982deca7..382df5318b61 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -795,8 +795,8 @@ struct ath11k_fw_stats_pdev {
 	s32 underrun;
 	/* Num PPDUs cleaned up in TX abort */
 	s32 tx_abort;
-	/* Num MPDUs requed by SW */
-	s32 mpdus_requed;
+	/* Num MPDUs requeued by SW */
+	s32 mpdus_requeued;
 	/* excessive retries */
 	u32 tx_ko;
 	/* data hw rate code */
diff --git a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c
index ec93f14e6d2a..9e0c90da99d3 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c
@@ -89,7 +89,7 @@ static inline void htt_print_tx_pdev_stats_cmn_tlv(const void *tag_buf,
 	len += HTT_DBG_OUT(buf + len, buf_len - len, "tx_abort = %u",
 			   htt_stats_buf->tx_abort);
 	len += HTT_DBG_OUT(buf + len, buf_len - len, "mpdu_requeued = %u",
-			   htt_stats_buf->mpdu_requed);
+			   htt_stats_buf->mpdu_requeued);
 	len += HTT_DBG_OUT(buf + len, buf_len - len, "tx_xretry = %u",
 			   htt_stats_buf->tx_xretry);
 	len += HTT_DBG_OUT(buf + len, buf_len - len, "data_rc = %u",
diff --git a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h
index 567a26d485a9..d428f52003a4 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h
+++ b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h
@@ -147,7 +147,7 @@ struct htt_tx_pdev_stats_cmn_tlv {
 	u32 hw_flush;
 	u32 hw_filt;
 	u32 tx_abort;
-	u32 mpdu_requed;
+	u32 mpdu_requeued;
 	u32 tx_xretry;
 	u32 data_rc;
 	u32 mpdu_dropped_xretry;
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 5ca2d80679b6..6c253eae9d06 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -5235,7 +5235,7 @@ ath11k_wmi_pull_pdev_stats_tx(const struct wmi_pdev_stats_tx *src,
 	dst->hw_reaped = src->hw_reaped;
 	dst->underrun = src->underrun;
 	dst->tx_abort = src->tx_abort;
-	dst->mpdus_requed = src->mpdus_requed;
+	dst->mpdus_requeued = src->mpdus_requeued;
 	dst->tx_ko = src->tx_ko;
 	dst->data_rc = src->data_rc;
 	dst->self_triggers = src->self_triggers;
@@ -5505,7 +5505,7 @@ ath11k_wmi_fw_pdev_tx_stats_fill(const struct ath11k_fw_stats_pdev *pdev,
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
 			 "PPDUs cleaned", pdev->tx_abort);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
-			 "MPDUs requed", pdev->mpdus_requed);
+			 "MPDUs requeued", pdev->mpdus_requeued);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
 			 "Excessive retries", pdev->tx_ko);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10u\n",
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index 3ade1ddd35c9..d35c47e0b19d 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -4171,8 +4171,8 @@ struct wmi_pdev_stats_tx {
 	/* Num PPDUs cleaned up in TX abort */
 	s32 tx_abort;
 
-	/* Num MPDUs requed by SW */
-	s32 mpdus_requed;
+	/* Num MPDUs requeued by SW */
+	s32 mpdus_requeued;
 
 	/* excessive retries */
 	u32 tx_ko;

From 21e4e15a846f86643a43c291cbed5dca3639fc2b Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 17:02:58 +0800
Subject: [PATCH 0463/3292] reiserfs: Remove unneed check in
 reiserfs_write_full_page()

Condition !A || A && B is equivalent to !A || B.

Generated by: scripts/coccinelle/misc/excluded_middle.cocci

Link: https://lore.kernel.org/r/20210523090258.27696-1-yuehaibing@huawei.com
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/reiserfs/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 780bb90c1804..f49b72ccac4c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2584,9 +2584,7 @@ static int reiserfs_write_full_page(struct page *page,
 			clear_buffer_dirty(bh);
 			set_buffer_uptodate(bh);
 		} else if ((checked || buffer_dirty(bh)) &&
-		           (!buffer_mapped(bh) || (buffer_mapped(bh)
-						       && bh->b_blocknr ==
-						       0))) {
+			   (!buffer_mapped(bh) || bh->b_blocknr == 0)) {
 			/*
 			 * not mapped yet, or it points to a direct item, search
 			 * the btree for the mapping info, and log any direct

From 12e0613715e1cf305fffafaf0e89d810d9a85cc0 Mon Sep 17 00:00:00 2001
From: "zhangyi (F)" <yi.zhang@huawei.com>
Date: Sat, 13 Mar 2021 11:01:44 +0800
Subject: [PATCH 0464/3292] block_dump: remove block_dump feature in
 mark_inode_dirty()

block_dump is an old debugging interface, one of it's functions is used
to print the information about who write which file on disk. If we
enable block_dump through /proc/sys/vm/block_dump and turn on debug log
level, we can gather information about write process name, target file
name and disk from kernel message. This feature is realized in
block_dump___mark_inode_dirty(), it print above information into kernel
message directly when marking inode dirty, so it is noisy and can easily
trigger log storm. At the same time, get the dentry refcount is also not
safe, we found it will lead to deadlock on ext4 file system with
data=journal mode.

After tracepoints has been introduced into the kernel, we got a
tracepoint in __mark_inode_dirty(), which is a better replacement of
block_dump___mark_inode_dirty(). The only downside is that it only trace
the inode number and not a file name, but it probably doesn't matter
because the original printed file name in block_dump is not accurate in
some cases, and we can still find it through the inode number and device
id. So this patch delete the dirting inode part of block_dump feature.

Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210313030146.2882027-2-yi.zhang@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e91980f49388..7c46d1588a19 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2205,28 +2205,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
 	return ret;
 }
 
-static noinline void block_dump___mark_inode_dirty(struct inode *inode)
-{
-	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
-		struct dentry *dentry;
-		const char *name = "?";
-
-		dentry = d_find_alias(inode);
-		if (dentry) {
-			spin_lock(&dentry->d_lock);
-			name = (const char *) dentry->d_name.name;
-		}
-		printk(KERN_DEBUG
-		       "%s(%d): dirtied inode %lu (%s) on %s\n",
-		       current->comm, task_pid_nr(current), inode->i_ino,
-		       name, inode->i_sb->s_id);
-		if (dentry) {
-			spin_unlock(&dentry->d_lock);
-			dput(dentry);
-		}
-	}
-}
-
 /**
  * __mark_inode_dirty -	internal function to mark an inode dirty
  *
@@ -2296,9 +2274,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	    (dirtytime && (inode->i_state & I_DIRTY_INODE)))
 		return;
 
-	if (unlikely(block_dump))
-		block_dump___mark_inode_dirty(inode);
-
 	spin_lock(&inode->i_lock);
 	if (dirtytime && (inode->i_state & I_DIRTY_INODE))
 		goto out_unlock_inode;

From 3af3d772f7216cf23081bb4176e86f1219d32ebc Mon Sep 17 00:00:00 2001
From: "zhangyi (F)" <yi.zhang@huawei.com>
Date: Sat, 13 Mar 2021 11:01:45 +0800
Subject: [PATCH 0465/3292] block_dump: remove block_dump feature

We have already delete block_dump feature in mark_inode_dirty() because
it can be replaced by tracepoints, now we also remove the part in
submit_bio() for the same reason. The part of block dump feature in
submit_bio() dump the write process, write region and sectors on the
target disk into kernel message. it can be replaced by
block_bio_queue tracepoint in submit_bio_checks(), so we do not need
block_dump anymore, remove the whole block_dump feature.

Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210313030146.2882027-3-yi.zhang@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          | 9 ---------
 include/linux/writeback.h | 1 -
 kernel/sysctl.c           | 8 --------
 mm/page-writeback.c       | 5 -----
 4 files changed, 23 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 9bcdae93f6d4..689aac2625d2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1086,15 +1086,6 @@ blk_qc_t submit_bio(struct bio *bio)
 			task_io_account_read(bio->bi_iter.bi_size);
 			count_vm_events(PGPGIN, count);
 		}
-
-		if (unlikely(block_dump)) {
-			char b[BDEVNAME_SIZE];
-			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
-			current->comm, task_pid_nr(current),
-				op_is_write(bio_op(bio)) ? "WRITE" : "READ",
-				(unsigned long long)bio->bi_iter.bi_sector,
-				bio_devname(bio, b), count);
-		}
 	}
 
 	/*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8e5c5bb16e2d..9ef50176f3a1 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -360,7 +360,6 @@ extern unsigned int dirty_writeback_interval;
 extern unsigned int dirty_expire_interval;
 extern unsigned int dirtytime_expire_interval;
 extern int vm_highmem_is_dirtyable;
-extern int block_dump;
 extern int laptop_mode;
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 14edf84cc571..08e52b1090e9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2931,14 +2931,6 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
-	{
-		.procname	= "block_dump",
-		.data		= &block_dump,
-		.maxlen		= sizeof(block_dump),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
 	{
 		.procname	= "vfs_cache_pressure",
 		.data		= &sysctl_vfs_cache_pressure,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0062d5c57d41..fe72d5f65688 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -108,11 +108,6 @@ EXPORT_SYMBOL_GPL(dirty_writeback_interval);
  */
 unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
 
-/*
- * Flag that makes the machine dump writes/reads and block dirtyings.
- */
-int block_dump;
-
 /*
  * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
  * a full sync is triggered after this time elapses without any disk activity.

From 51fd43e2801054b1321b1d81b91dc37efdff5127 Mon Sep 17 00:00:00 2001
From: "zhangyi (F)" <yi.zhang@huawei.com>
Date: Sat, 13 Mar 2021 11:01:46 +0800
Subject: [PATCH 0466/3292] block_dump: remove comments in docs

Now block_dump feature is gone, remove all comments in docs.

Signed-off-by: zhangyi (F) <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210313030146.2882027-4-yi.zhang@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/laptops/laptop-mode.rst | 11 -----------
 Documentation/admin-guide/sysctl/vm.rst           |  8 --------
 2 files changed, 19 deletions(-)

diff --git a/Documentation/admin-guide/laptops/laptop-mode.rst b/Documentation/admin-guide/laptops/laptop-mode.rst
index c984c4262f2e..b61cc601d298 100644
--- a/Documentation/admin-guide/laptops/laptop-mode.rst
+++ b/Documentation/admin-guide/laptops/laptop-mode.rst
@@ -101,17 +101,6 @@ this results in concentration of disk activity in a small time interval which
 occurs only once every 10 minutes, or whenever the disk is forced to spin up by
 a cache miss. The disk can then be spun down in the periods of inactivity.
 
-If you want to find out which process caused the disk to spin up, you can
-gather information by setting the flag /proc/sys/vm/block_dump. When this flag
-is set, Linux reports all disk read and write operations that take place, and
-all block dirtyings done to files. This makes it possible to debug why a disk
-needs to spin up, and to increase battery life even more. The output of
-block_dump is written to the kernel output, and it can be retrieved using
-"dmesg". When you use block_dump and your kernel logging level also includes
-kernel debugging messages, you probably want to turn off klogd, otherwise
-the output of block_dump will be logged, causing disk activity that is not
-normally there.
-
 
 Configuration
 -------------
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 586cd4b86428..3ca6679f16ea 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -25,7 +25,6 @@ files can be found in mm/swap.c.
 Currently, these files are in /proc/sys/vm:
 
 - admin_reserve_kbytes
-- block_dump
 - compact_memory
 - compaction_proactiveness
 - compact_unevictable_allowed
@@ -106,13 +105,6 @@ On x86_64 this is about 128MB.
 Changing this takes effect whenever an application requests memory.
 
 
-block_dump
-==========
-
-block_dump enables block I/O debugging when set to a nonzero value. More
-information on block I/O debugging is in Documentation/admin-guide/laptops/laptop-mode.rst.
-
-
 compact_memory
 ==============
 

From 65a8db393a8e49ee98432cf3e641d2bd2fa88606 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 11 May 2021 19:34:40 +0800
Subject: [PATCH 0467/3292] aoe: remove unnecessary mutex_init()

The mutex ktio_spawn_lock is initialized statically.
It is unnecessary to initialize by mutex_init().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20210511113440.3772053-1-yangyingliang@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/aoe/aoecmd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index ecd77897a761..588889bea7c3 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1701,8 +1701,6 @@ aoecmd_init(void)
 		goto ktiowq_fail;
 	}
 
-	mutex_init(&ktio_spawn_lock);
-
 	for (i = 0; i < ncpus; i++) {
 		INIT_LIST_HEAD(&iocq[i].head);
 		spin_lock_init(&iocq[i].lock);

From b5f3352e0868611b555e1dcb2e1ffb8e346c519c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 11 May 2021 14:58:04 -0400
Subject: [PATCH 0468/3292] blkcg: drop CLONE_IO check in blkcg_can_attach()

blkcg has always rejected to attach if any of the member tasks has shared
io_context. The rationale was that io_contexts can be shared across
different cgroups making it impossible to define what the appropriate
control behavior should be. However, this check causes more problems than it
solves:

* The check prevents controller enable and migrations but not CLONE_IO
  itself, which can lead to surprises as the outcome changes depending on
  the order of operations.

* Sharing within a cgroup is fine but the check can't distinguish that. This
  leads to unnecessary conflicts with the recent CLONE_IO usage in io_uring.

io_context sharing doesn't make any difference for rq_qos based controllers
and the way it's used is safe as long as tasks aren't migrated dynamically
which is the vast majority of use cases. While we can try to make the check
more precise to avoid false positives, the added complexity doesn't seem
worthwhile. Let's just drop blkcg_can_attach().

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/YJrTvHbrRDbJjw+S@slm.duckdns.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 582d2f18717e..d169e2055158 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1217,32 +1217,6 @@ void blkcg_exit_queue(struct request_queue *q)
 	blk_throtl_exit(q);
 }
 
-/*
- * We cannot support shared io contexts, as we have no mean to support
- * two tasks with the same ioc in two different groups without major rework
- * of the main cic data structures.  For now we allow a task to change
- * its cgroup only if it's the only owner of its ioc.
- */
-static int blkcg_can_attach(struct cgroup_taskset *tset)
-{
-	struct task_struct *task;
-	struct cgroup_subsys_state *dst_css;
-	struct io_context *ioc;
-	int ret = 0;
-
-	/* task_lock() is needed to avoid races with exit_io_context() */
-	cgroup_taskset_for_each(task, dst_css, tset) {
-		task_lock(task);
-		ioc = task->io_context;
-		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
-			ret = -EINVAL;
-		task_unlock(task);
-		if (ret)
-			break;
-	}
-	return ret;
-}
-
 static void blkcg_bind(struct cgroup_subsys_state *root_css)
 {
 	int i;
@@ -1275,7 +1249,6 @@ struct cgroup_subsys io_cgrp_subsys = {
 	.css_online = blkcg_css_online,
 	.css_offline = blkcg_css_offline,
 	.css_free = blkcg_css_free,
-	.can_attach = blkcg_can_attach,
 	.css_rstat_flush = blkcg_rstat_flush,
 	.bind = blkcg_bind,
 	.dfl_cftypes = blkcg_files,

From 8c390ff910c5500fc16cca6f90ac2a60c7c84979 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Tue, 11 May 2021 15:53:19 +0000
Subject: [PATCH 0469/3292] block: remove unneeded parenthesis from blk-sysfs

Align to common code conventions.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Link: https://lore.kernel.org/r/20210511155319.1885277-1-mgurtovoy@nvidia.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e03bedf180ab..f89e2fc3963b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -91,7 +91,7 @@ static ssize_t queue_ra_show(struct request_queue *q, char *page)
 	unsigned long ra_kb = q->backing_dev_info->ra_pages <<
 					(PAGE_SHIFT - 10);
 
-	return queue_var_show(ra_kb, (page));
+	return queue_var_show(ra_kb, page);
 }
 
 static ssize_t
@@ -112,28 +112,28 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
 {
 	int max_sectors_kb = queue_max_sectors(q) >> 1;
 
-	return queue_var_show(max_sectors_kb, (page));
+	return queue_var_show(max_sectors_kb, page);
 }
 
 static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(queue_max_segments(q), (page));
+	return queue_var_show(queue_max_segments(q), page);
 }
 
 static ssize_t queue_max_discard_segments_show(struct request_queue *q,
 		char *page)
 {
-	return queue_var_show(queue_max_discard_segments(q), (page));
+	return queue_var_show(queue_max_discard_segments(q), page);
 }
 
 static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->limits.max_integrity_segments, (page));
+	return queue_var_show(q->limits.max_integrity_segments, page);
 }
 
 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(queue_max_segment_size(q), (page));
+	return queue_var_show(queue_max_segment_size(q), page);
 }
 
 static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
@@ -261,12 +261,12 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 {
 	int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
 
-	return queue_var_show(max_hw_sectors_kb, (page));
+	return queue_var_show(max_hw_sectors_kb, page);
 }
 
 static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->limits.virt_boundary_mask, (page));
+	return queue_var_show(q->limits.virt_boundary_mask, page);
 }
 
 #define QUEUE_SYSFS_BIT_FNS(name, flag, neg)				\

From 84da7acc3ba53af26f15c4b0ada446127b7a7836 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 11 May 2021 23:22:33 +0800
Subject: [PATCH 0470/3292] block: avoid double io accounting for flush request

For flush request, rq->end_io() may be called two times, one is from
timeout handling(blk_mq_check_expired()), another is from normal
completion(__blk_mq_end_request()).

Move blk_account_io_flush() after flush_rq->ref drops to zero, so
io accounting can be done just once for flush request.

Fixes: b68663186577 ("block: add iostat counters for flush requests")
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: John Garry <john.garry@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210511152236.763464-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-flush.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 7942ca6ed321..1002f6c58181 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -219,8 +219,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 	unsigned long flags = 0;
 	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
 
-	blk_account_io_flush(flush_rq);
-
 	/* release the tag's ownership to the req cloned from */
 	spin_lock_irqsave(&fq->mq_flush_lock, flags);
 
@@ -230,6 +228,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 		return;
 	}
 
+	blk_account_io_flush(flush_rq);
 	/*
 	 * Flush request has to be marked as IDLE when it is really ended
 	 * because its .end_io() is called from timeout code path too for

From 2e315dc07df009c3e29d6926871f62a30cfae394 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 11 May 2021 23:22:34 +0800
Subject: [PATCH 0471/3292] blk-mq: grab rq->refcount before calling ->fn in
 blk_mq_tagset_busy_iter

Grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter(), and
this way will prevent the request from being re-used when ->fn is
running. The approach is same as what we do during handling timeout.

Fix request use-after-free(UAF) related with completion race or queue
releasing:

- If one rq is referred before rq->q is frozen, then queue won't be
frozen before the request is released during iteration.

- If one rq is referred after rq->q is frozen, refcount_inc_not_zero()
will return false, and we won't iterate over this request.

However, still one request UAF not covered: refcount_inc_not_zero() may
read one freed request, and it will be handled in next patch.

Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210511152236.763464-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c | 44 +++++++++++++++++++++++++++++++++-----------
 block/blk-mq.c     | 14 +++++++++-----
 block/blk-mq.h     |  1 +
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 2a37731e8244..544edf2c56a5 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -199,6 +199,16 @@ struct bt_iter_data {
 	bool reserved;
 };
 
+static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
+		unsigned int bitnr)
+{
+	struct request *rq = tags->rqs[bitnr];
+
+	if (!rq || !refcount_inc_not_zero(&rq->ref))
+		return NULL;
+	return rq;
+}
+
 static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 {
 	struct bt_iter_data *iter_data = data;
@@ -206,18 +216,22 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	struct blk_mq_tags *tags = hctx->tags;
 	bool reserved = iter_data->reserved;
 	struct request *rq;
+	bool ret = true;
 
 	if (!reserved)
 		bitnr += tags->nr_reserved_tags;
-	rq = tags->rqs[bitnr];
-
 	/*
 	 * We can hit rq == NULL here, because the tagging functions
 	 * test and set the bit before assigning ->rqs[].
 	 */
-	if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx)
-		return iter_data->fn(hctx, rq, iter_data->data, reserved);
-	return true;
+	rq = blk_mq_find_and_get_req(tags, bitnr);
+	if (!rq)
+		return true;
+
+	if (rq->q == hctx->queue && rq->mq_hctx == hctx)
+		ret = iter_data->fn(hctx, rq, iter_data->data, reserved);
+	blk_mq_put_rq_ref(rq);
+	return ret;
 }
 
 /**
@@ -264,6 +278,8 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	struct blk_mq_tags *tags = iter_data->tags;
 	bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
 	struct request *rq;
+	bool ret = true;
+	bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
 
 	if (!reserved)
 		bitnr += tags->nr_reserved_tags;
@@ -272,16 +288,19 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	 * We can hit rq == NULL here, because the tagging functions
 	 * test and set the bit before assigning ->rqs[].
 	 */
-	if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
+	if (iter_static_rqs)
 		rq = tags->static_rqs[bitnr];
 	else
-		rq = tags->rqs[bitnr];
+		rq = blk_mq_find_and_get_req(tags, bitnr);
 	if (!rq)
 		return true;
-	if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
-	    !blk_mq_request_started(rq))
-		return true;
-	return iter_data->fn(rq, iter_data->data, reserved);
+
+	if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
+	    blk_mq_request_started(rq))
+		ret = iter_data->fn(rq, iter_data->data, reserved);
+	if (!iter_static_rqs)
+		blk_mq_put_rq_ref(rq);
+	return ret;
 }
 
 /**
@@ -348,6 +367,9 @@ void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
  *		indicates whether or not @rq is a reserved request. Return
  *		true to continue iterating tags, false to stop.
  * @priv:	Will be passed as second argument to @fn.
+ *
+ * We grab one request reference before calling @fn and release it after
+ * @fn returns.
  */
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 		busy_tag_iter_fn *fn, void *priv)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c86c01bfecdb..debfa5cd8025 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -909,6 +909,14 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
 	return false;
 }
 
+void blk_mq_put_rq_ref(struct request *rq)
+{
+	if (is_flush_rq(rq, rq->mq_hctx))
+		rq->end_io(rq, 0);
+	else if (refcount_dec_and_test(&rq->ref))
+		__blk_mq_free_request(rq);
+}
+
 static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, void *priv, bool reserved)
 {
@@ -942,11 +950,7 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 	if (blk_mq_req_expired(rq, next))
 		blk_mq_rq_timed_out(rq, reserved);
 
-	if (is_flush_rq(rq, hctx))
-		rq->end_io(rq, 0);
-	else if (refcount_dec_and_test(&rq->ref))
-		__blk_mq_free_request(rq);
-
+	blk_mq_put_rq_ref(rq);
 	return true;
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 9ce64bc4a6c8..556368d2c5b6 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -47,6 +47,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_ctx *start);
+void blk_mq_put_rq_ref(struct request *rq);
 
 /*
  * Internal helpers for allocating/freeing the request map

From bd63141d585bef14f4caf111f6d0e27fe2300ec6 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 11 May 2021 23:22:35 +0800
Subject: [PATCH 0472/3292] blk-mq: clear stale request in tags->rq[] before
 freeing one request pool

refcount_inc_not_zero() in bt_tags_iter() still may read one freed
request.

Fix the issue by the following approach:

1) hold a per-tags spinlock when reading ->rqs[tag] and calling
refcount_inc_not_zero in bt_tags_iter()

2) clearing stale request referred via ->rqs[tag] before freeing
request pool, the per-tags spinlock is held for clearing stale
->rq[tag]

So after we cleared stale requests, bt_tags_iter() won't observe
freed request any more, also the clearing will wait for pending
request reference.

The idea of clearing ->rqs[] is borrowed from John Garry's previous
patch and one recent David's patch.

Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: David Jeffery <djeffery@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210511152236.763464-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c |  9 +++++++--
 block/blk-mq-tag.h |  6 ++++++
 block/blk-mq.c     | 46 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 544edf2c56a5..1671dae43030 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -202,10 +202,14 @@ struct bt_iter_data {
 static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
 		unsigned int bitnr)
 {
-	struct request *rq = tags->rqs[bitnr];
+	struct request *rq;
+	unsigned long flags;
 
+	spin_lock_irqsave(&tags->lock, flags);
+	rq = tags->rqs[bitnr];
 	if (!rq || !refcount_inc_not_zero(&rq->ref))
-		return NULL;
+		rq = NULL;
+	spin_unlock_irqrestore(&tags->lock, flags);
 	return rq;
 }
 
@@ -538,6 +542,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 
 	tags->nr_tags = total_tags;
 	tags->nr_reserved_tags = reserved_tags;
+	spin_lock_init(&tags->lock);
 
 	if (blk_mq_is_sbitmap_shared(flags))
 		return tags;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 7d3e6b333a4a..f887988e5ef6 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -20,6 +20,12 @@ struct blk_mq_tags {
 	struct request **rqs;
 	struct request **static_rqs;
 	struct list_head page_list;
+
+	/*
+	 * used to clear request reference in rqs[] before freeing one
+	 * request pool
+	 */
+	spinlock_t lock;
 };
 
 extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index debfa5cd8025..dd371f321d35 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2307,6 +2307,45 @@ queue_exit:
 	return BLK_QC_T_NONE;
 }
 
+static size_t order_to_size(unsigned int order)
+{
+	return (size_t)PAGE_SIZE << order;
+}
+
+/* called before freeing request pool in @tags */
+static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set,
+		struct blk_mq_tags *tags, unsigned int hctx_idx)
+{
+	struct blk_mq_tags *drv_tags = set->tags[hctx_idx];
+	struct page *page;
+	unsigned long flags;
+
+	list_for_each_entry(page, &tags->page_list, lru) {
+		unsigned long start = (unsigned long)page_address(page);
+		unsigned long end = start + order_to_size(page->private);
+		int i;
+
+		for (i = 0; i < set->queue_depth; i++) {
+			struct request *rq = drv_tags->rqs[i];
+			unsigned long rq_addr = (unsigned long)rq;
+
+			if (rq_addr >= start && rq_addr < end) {
+				WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
+				cmpxchg(&drv_tags->rqs[i], rq, NULL);
+			}
+		}
+	}
+
+	/*
+	 * Wait until all pending iteration is done.
+	 *
+	 * Request reference is cleared and it is guaranteed to be observed
+	 * after the ->lock is released.
+	 */
+	spin_lock_irqsave(&drv_tags->lock, flags);
+	spin_unlock_irqrestore(&drv_tags->lock, flags);
+}
+
 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 		     unsigned int hctx_idx)
 {
@@ -2325,6 +2364,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 		}
 	}
 
+	blk_mq_clear_rq_mapping(set, tags, hctx_idx);
+
 	while (!list_empty(&tags->page_list)) {
 		page = list_first_entry(&tags->page_list, struct page, lru);
 		list_del_init(&page->lru);
@@ -2384,11 +2425,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 	return tags;
 }
 
-static size_t order_to_size(unsigned int order)
-{
-	return (size_t)PAGE_SIZE << order;
-}
-
 static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 			       unsigned int hctx_idx, int node)
 {

From 364b61818f65045479e42e76ed8dd6f051778280 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 11 May 2021 23:22:36 +0800
Subject: [PATCH 0473/3292] blk-mq: clearing flush request reference in
 tags->rqs[]

Before we free request queue, clearing flush request reference in
tags->rqs[], so that potential UAF can be avoided.

Based on one patch written by David Jeffery.

Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210511152236.763464-5-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index dd371f321d35..fbb165393790 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2643,16 +2643,49 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
 					    &hctx->cpuhp_dead);
 }
 
+/*
+ * Before freeing hw queue, clearing the flush request reference in
+ * tags->rqs[] for avoiding potential UAF.
+ */
+static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
+		unsigned int queue_depth, struct request *flush_rq)
+{
+	int i;
+	unsigned long flags;
+
+	/* The hw queue may not be mapped yet */
+	if (!tags)
+		return;
+
+	WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
+
+	for (i = 0; i < queue_depth; i++)
+		cmpxchg(&tags->rqs[i], flush_rq, NULL);
+
+	/*
+	 * Wait until all pending iteration is done.
+	 *
+	 * Request reference is cleared and it is guaranteed to be observed
+	 * after the ->lock is released.
+	 */
+	spin_lock_irqsave(&tags->lock, flags);
+	spin_unlock_irqrestore(&tags->lock, flags);
+}
+
 /* hctx->ctxs will be freed in queue's release handler */
 static void blk_mq_exit_hctx(struct request_queue *q,
 		struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
+	struct request *flush_rq = hctx->fq->flush_rq;
+
 	if (blk_mq_hw_queue_mapped(hctx))
 		blk_mq_tag_idle(hctx);
 
+	blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
+			set->queue_depth, flush_rq);
 	if (set->ops->exit_request)
-		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
+		set->ops->exit_request(set, flush_rq, hctx_idx);
 
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);

From 56b68085e536eff2676108f2f8356889a7dbbf55 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 13 May 2021 20:00:57 +0800
Subject: [PATCH 0474/3292] blk-mq: Some tag allocation code refactoring

The tag allocation code to alloc the sbitmap pairs is common for regular
bitmaps tags and shared sbitmap, so refactor into a common function.

Also remove superfluous "flags" argument from blk_mq_init_shared_sbitmap().

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/1620907258-30910-2-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c | 56 ++++++++++++++++++++++++++++------------------
 block/blk-mq-tag.h |  9 +++++---
 block/blk-mq.c     |  2 +-
 3 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1671dae43030..f597d40de10b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -471,39 +471,54 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
 				       node);
 }
 
+int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
+			struct sbitmap_queue *breserved_tags,
+			unsigned int queue_depth, unsigned int reserved,
+			int node, int alloc_policy)
+{
+	unsigned int depth = queue_depth - reserved;
+	bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
+
+	if (bt_alloc(bitmap_tags, depth, round_robin, node))
+		return -ENOMEM;
+	if (bt_alloc(breserved_tags, reserved, round_robin, node))
+		goto free_bitmap_tags;
+
+	return 0;
+
+free_bitmap_tags:
+	sbitmap_queue_free(bitmap_tags);
+	return -ENOMEM;
+}
+
 static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
 				   int node, int alloc_policy)
 {
-	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
-	bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
+	int ret;
 
-	if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node))
-		return -ENOMEM;
-	if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags,
-		     round_robin, node))
-		goto free_bitmap_tags;
+	ret = blk_mq_init_bitmaps(&tags->__bitmap_tags,
+				  &tags->__breserved_tags,
+				  tags->nr_tags, tags->nr_reserved_tags,
+				  node, alloc_policy);
+	if (ret)
+		return ret;
 
 	tags->bitmap_tags = &tags->__bitmap_tags;
 	tags->breserved_tags = &tags->__breserved_tags;
 
 	return 0;
-free_bitmap_tags:
-	sbitmap_queue_free(&tags->__bitmap_tags);
-	return -ENOMEM;
 }
 
-int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)
+int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set)
 {
-	unsigned int depth = set->queue_depth - set->reserved_tags;
 	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
-	bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
-	int i, node = set->numa_node;
+	int i, ret;
 
-	if (bt_alloc(&set->__bitmap_tags, depth, round_robin, node))
-		return -ENOMEM;
-	if (bt_alloc(&set->__breserved_tags, set->reserved_tags,
-		     round_robin, node))
-		goto free_bitmap_tags;
+	ret = blk_mq_init_bitmaps(&set->__bitmap_tags, &set->__breserved_tags,
+				  set->queue_depth, set->reserved_tags,
+				  set->numa_node, alloc_policy);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < set->nr_hw_queues; i++) {
 		struct blk_mq_tags *tags = set->tags[i];
@@ -513,9 +528,6 @@ int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)
 	}
 
 	return 0;
-free_bitmap_tags:
-	sbitmap_queue_free(&set->__bitmap_tags);
-	return -ENOMEM;
 }
 
 void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index f887988e5ef6..8ed55af08427 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -32,11 +32,14 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
 					unsigned int reserved_tags,
 					int node, unsigned int flags);
 extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags);
+extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
+			       struct sbitmap_queue *breserved_tags,
+			       unsigned int queue_depth,
+			       unsigned int reserved,
+			       int node, int alloc_policy);
 
-extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set,
-				      unsigned int flags);
+extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set);
 extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set);
-
 extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
 extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
 			   unsigned int tag);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index fbb165393790..001e196bdebd 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3564,7 +3564,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (blk_mq_is_sbitmap_shared(set->flags)) {
 		atomic_set(&set->active_queues_shared_sbitmap, 0);
 
-		if (blk_mq_init_shared_sbitmap(set, set->flags)) {
+		if (blk_mq_init_shared_sbitmap(set)) {
 			ret = -ENOMEM;
 			goto out_free_mq_rq_maps;
 		}

From d97e594c51660bea510a387731637b894651e4b5 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 13 May 2021 20:00:58 +0800
Subject: [PATCH 0475/3292] blk-mq: Use request queue-wide tags for tagset-wide
 sbitmap

The tags used for an IO scheduler are currently per hctx.

As such, when q->nr_hw_queues grows, so does the request queue total IO
scheduler tag depth.

This may cause problems for SCSI MQ HBAs whose total driver depth is
fixed.

Ming and Yanhui report higher CPU usage and lower throughput in scenarios
where the fixed total driver tag depth is appreciably lower than the total
scheduler tag depth:
https://lore.kernel.org/linux-block/440dfcfc-1a2c-bd98-1161-cec4d78c6dfc@huawei.com/T/#mc0d6d4f95275a2743d1c8c3e4dc9ff6c9aa3a76b

In that scenario, since the scheduler tag is got first, much contention
is introduced since a driver tag may not be available after we have got
the sched tag.

Improve this scenario by introducing request queue-wide tags for when
a tagset-wide sbitmap is used. The static sched requests are still
allocated per hctx, as requests are initialised per hctx, as in
blk_mq_init_request(..., hctx_idx, ...) ->
set->ops->init_request(.., hctx_idx, ...).

For simplicity of resizing the request queue sbitmap when updating the
request queue depth, just init at the max possible size, so we don't need
to deal with the possibly with swapping out a new sbitmap for old if
we need to grow.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/1620907258-30910-3-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   | 67 ++++++++++++++++++++++++++++++++++--------
 block/blk-mq-sched.h   |  2 ++
 block/blk-mq-tag.c     | 11 ++++---
 block/blk-mq.c         | 13 ++++++--
 include/linux/blkdev.h |  4 +++
 5 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 996a4b2f73aa..045b6878b8c5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -509,11 +509,9 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
 				   struct blk_mq_hw_ctx *hctx,
 				   unsigned int hctx_idx)
 {
-	unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
-
 	if (hctx->sched_tags) {
 		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
-		blk_mq_free_rq_map(hctx->sched_tags, flags);
+		blk_mq_free_rq_map(hctx->sched_tags, set->flags);
 		hctx->sched_tags = NULL;
 	}
 }
@@ -523,12 +521,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
 				   unsigned int hctx_idx)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
-	/* Clear HCTX_SHARED so tags are init'ed */
-	unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
 	int ret;
 
 	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
-					       set->reserved_tags, flags);
+					       set->reserved_tags, set->flags);
 	if (!hctx->sched_tags)
 		return -ENOMEM;
 
@@ -546,16 +542,50 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		/* Clear HCTX_SHARED so tags are freed */
-		unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
-
 		if (hctx->sched_tags) {
-			blk_mq_free_rq_map(hctx->sched_tags, flags);
+			blk_mq_free_rq_map(hctx->sched_tags, hctx->flags);
 			hctx->sched_tags = NULL;
 		}
 	}
 }
 
+static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue)
+{
+	struct blk_mq_tag_set *set = queue->tag_set;
+	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
+	struct blk_mq_hw_ctx *hctx;
+	int ret, i;
+
+	/*
+	 * Set initial depth at max so that we don't need to reallocate for
+	 * updating nr_requests.
+	 */
+	ret = blk_mq_init_bitmaps(&queue->sched_bitmap_tags,
+				  &queue->sched_breserved_tags,
+				  MAX_SCHED_RQ, set->reserved_tags,
+				  set->numa_node, alloc_policy);
+	if (ret)
+		return ret;
+
+	queue_for_each_hw_ctx(queue, hctx, i) {
+		hctx->sched_tags->bitmap_tags =
+					&queue->sched_bitmap_tags;
+		hctx->sched_tags->breserved_tags =
+					&queue->sched_breserved_tags;
+	}
+
+	sbitmap_queue_resize(&queue->sched_bitmap_tags,
+			     queue->nr_requests - set->reserved_tags);
+
+	return 0;
+}
+
+static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
+{
+	sbitmap_queue_free(&queue->sched_bitmap_tags);
+	sbitmap_queue_free(&queue->sched_breserved_tags);
+}
+
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -580,12 +610,18 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_sched_alloc_tags(q, hctx, i);
 		if (ret)
-			goto err;
+			goto err_free_tags;
+	}
+
+	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
+		ret = blk_mq_init_sched_shared_sbitmap(q);
+		if (ret)
+			goto err_free_tags;
 	}
 
 	ret = e->ops.init_sched(q, e);
 	if (ret)
-		goto err;
+		goto err_free_sbitmap;
 
 	blk_mq_debugfs_register_sched(q);
 
@@ -605,7 +641,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 
 	return 0;
 
-err:
+err_free_sbitmap:
+	if (blk_mq_is_sbitmap_shared(q->tag_set->flags))
+		blk_mq_exit_sched_shared_sbitmap(q);
+err_free_tags:
 	blk_mq_sched_free_requests(q);
 	blk_mq_sched_tags_teardown(q);
 	q->elevator = NULL;
@@ -643,5 +682,7 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 	if (e->type->ops.exit_sched)
 		e->type->ops.exit_sched(e);
 	blk_mq_sched_tags_teardown(q);
+	if (blk_mq_is_sbitmap_shared(q->tag_set->flags))
+		blk_mq_exit_sched_shared_sbitmap(q);
 	q->elevator = NULL;
 }
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 5b18ab915c65..aff037cfd8e7 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -5,6 +5,8 @@
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
+#define MAX_SCHED_RQ (16 * BLKDEV_MAX_RQ)
+
 void blk_mq_sched_assign_ioc(struct request *rq);
 
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index f597d40de10b..86f87346232a 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-sched.h"
 #include "blk-mq-tag.h"
 
 /*
@@ -590,8 +591,6 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 	 */
 	if (tdepth > tags->nr_tags) {
 		struct blk_mq_tag_set *set = hctx->queue->tag_set;
-		/* Only sched tags can grow, so clear HCTX_SHARED flag  */
-		unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
 		struct blk_mq_tags *new;
 		bool ret;
 
@@ -602,21 +601,21 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 		 * We need some sort of upper limit, set it high enough that
 		 * no valid use cases should require more.
 		 */
-		if (tdepth > 16 * BLKDEV_MAX_RQ)
+		if (tdepth > MAX_SCHED_RQ)
 			return -EINVAL;
 
 		new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
-				tags->nr_reserved_tags, flags);
+				tags->nr_reserved_tags, set->flags);
 		if (!new)
 			return -ENOMEM;
 		ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
 		if (ret) {
-			blk_mq_free_rq_map(new, flags);
+			blk_mq_free_rq_map(new, set->flags);
 			return -ENOMEM;
 		}
 
 		blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
-		blk_mq_free_rq_map(*tagsptr, flags);
+		blk_mq_free_rq_map(*tagsptr, set->flags);
 		*tagsptr = new;
 	} else {
 		/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 001e196bdebd..f11d4018ce2e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3640,15 +3640,24 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 		} else {
 			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
 							nr, true);
+			if (blk_mq_is_sbitmap_shared(set->flags)) {
+				hctx->sched_tags->bitmap_tags =
+					&q->sched_bitmap_tags;
+				hctx->sched_tags->breserved_tags =
+					&q->sched_breserved_tags;
+			}
 		}
 		if (ret)
 			break;
 		if (q->elevator && q->elevator->type->ops.depth_updated)
 			q->elevator->type->ops.depth_updated(hctx);
 	}
-
-	if (!ret)
+	if (!ret) {
 		q->nr_requests = nr;
+		if (q->elevator && blk_mq_is_sbitmap_shared(set->flags))
+			sbitmap_queue_resize(&q->sched_bitmap_tags,
+					     nr - set->reserved_tags);
+	}
 
 	blk_mq_unquiesce_queue(q);
 	blk_mq_unfreeze_queue(q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f69c75bd6d27..2c28577b50f4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -25,6 +25,7 @@
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
 #include <linux/pm.h>
+#include <linux/sbitmap.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -493,6 +494,9 @@ struct request_queue {
 
 	atomic_t		nr_active_requests_shared_sbitmap;
 
+	struct sbitmap_queue	sched_bitmap_tags;
+	struct sbitmap_queue	sched_breserved_tags;
+
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
 	DECLARE_BITMAP		(blkcg_pols, BLKCG_MAX_POLS);

From 8184035805dc87dd826101b930d3dce97758f7b1 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 13 May 2021 15:37:30 -0500
Subject: [PATCH 0476/3292] rsxx: Use struct_size() in vmalloc()

Make use of the struct_size() helper instead of an open-coded version,
in order to avoid any potential type mistakes or integer overflows
that, in the worst scenario, could lead to heap overflows.

This code was detected with the help of Coccinelle and, audited and
fixed manually.

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20210513203730.GA212128@embeddedor
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dma.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 0574f4495755..ed182f3dd054 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -74,9 +74,6 @@ struct dma_tracker {
 	struct rsxx_dma	*dma;
 };
 
-#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \
-		(sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS))
-
 struct dma_tracker_list {
 	spinlock_t		lock;
 	int			head;
@@ -808,7 +805,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 
 	memset(&ctrl->stats, 0, sizeof(ctrl->stats));
 
-	ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8);
+	ctrl->trackers = vmalloc(struct_size(ctrl->trackers, list,
+					     RSXX_MAX_OUTSTANDING_CMDS));
 	if (!ctrl->trackers)
 		return -ENOMEM;
 

From 558642bccede3d0e6ffebe4106b0719e29b9e4a8 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 20 May 2021 15:34:58 +0800
Subject: [PATCH 0477/3292] PM: wakeirq: Set IRQF_NO_AUTOEN when requesting the
 IRQ

request_irq() after setting IRQ_NOAUTOEN as below
irq_set_status_flags(irq, IRQ_NOAUTOEN);
request_irq(dev, irq...);
can be replaced by request_irq() with IRQF_NO_AUTOEN flag.

This change is just to simplify the code, no actual functional
changes.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Tony Lindgren <tony@atomide.com>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeirq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 8e021082dba8..3bad3266a2ad 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -182,7 +182,6 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 
 	wirq->dev = dev;
 	wirq->irq = irq;
-	irq_set_status_flags(irq, IRQ_NOAUTOEN);
 
 	/* Prevent deferred spurious wakeirqs with disable_irq_nosync() */
 	irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY);
@@ -192,7 +191,8 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 	 * so we use a threaded irq.
 	 */
 	err = request_threaded_irq(irq, NULL, handle_threaded_wake_irq,
-				   IRQF_ONESHOT, wirq->name, wirq);
+				   IRQF_ONESHOT | IRQF_NO_AUTOEN,
+				   wirq->name, wirq);
 	if (err)
 		goto err_free_name;
 

From 6be2408a1ef632a48149044d1757c80ab1096213 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 24 May 2021 17:30:10 +0800
Subject: [PATCH 0478/3292] PM: hibernate: fix spelling mistakes

Fix some spelling mistakes in comments:

corresonds ==> corresponds
alocated ==> allocated
unitialized ==> uninitialized
Deompression ==> Decompression

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/snapshot.c | 8 ++++----
 kernel/power/swap.c     | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 1a221dcb3c01..af507c8c895b 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -331,7 +331,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
  *
  * Memory bitmap is a structure consisting of many linked lists of
  * objects.  The main list's elements are of type struct zone_bitmap
- * and each of them corresonds to one zone.  For each zone bitmap
+ * and each of them corresponds to one zone.  For each zone bitmap
  * object there is a list of objects of type struct bm_block that
  * represent each blocks of bitmap in which information is stored.
  *
@@ -1500,7 +1500,7 @@ static struct memory_bitmap copy_bm;
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
- * Image pages are alocated before snapshot creation, so they need to be
+ * Image pages are allocated before snapshot creation, so they need to be
  * released after resume.
  */
 void swsusp_free(void)
@@ -2326,7 +2326,7 @@ static struct memory_bitmap *safe_highmem_bm;
  * (@nr_highmem_p points to the variable containing the number of highmem image
  * pages).  The pages that are "safe" (ie. will not be overwritten when the
  * hibernation image is restored entirely) have the corresponding bits set in
- * @bm (it must be unitialized).
+ * @bm (it must be uninitialized).
  *
  * NOTE: This function should not be called if there are no highmem image pages.
  */
@@ -2483,7 +2483,7 @@ static inline void free_highmem_data(void) {}
 
 /**
  * prepare_image - Make room for loading hibernation image.
- * @new_bm: Unitialized memory bitmap structure.
+ * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index bea3cb8afa11..3cb89baebc79 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -1125,7 +1125,7 @@ struct dec_data {
 };
 
 /**
- * Deompression function that runs in its own thread.
+ * Decompression function that runs in its own thread.
  */
 static int lzo_decompress_threadfn(void *data)
 {

From 64887bbddae56cb808089a7b3d5247d1a71a1e7e Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Mon, 24 May 2021 16:35:08 +0800
Subject: [PATCH 0479/3292] ACPI: ipmi: Remove address space handler in error
 path

The acpi_install_address_space_handler() is coupled with
acpi_remove_address_space_handler() in ipmi module init/exit, but
it forgets to remove the handler in acpi_ipmi_init() if the
ipmi_smi_watcher_register() call fails, so add the removal of the
address space handler in error path.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_ipmi.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c
index bbd00d96b7a8..a5fe2926bf50 100644
--- a/drivers/acpi/acpi_ipmi.c
+++ b/drivers/acpi/acpi_ipmi.c
@@ -597,9 +597,14 @@ static int __init acpi_ipmi_init(void)
 		pr_warn("Can't register IPMI opregion space handle\n");
 		return -EINVAL;
 	}
+
 	result = ipmi_smi_watcher_register(&driver_data.bmc_events);
-	if (result)
+	if (result) {
+		acpi_remove_address_space_handler(ACPI_ROOT_OBJECT,
+										  ACPI_ADR_SPACE_IPMI,
+										  &acpi_ipmi_space_handler);
 		pr_err("Can't register IPMI system interface watcher\n");
+	}
 
 	return result;
 }

From be7ae56809bf6d3e6ee80cc92f4096207640a2fb Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Mon, 24 May 2021 17:41:05 +0800
Subject: [PATCH 0480/3292] ACPI: configfs: Replace ACPI_INFO() with pr_debug()

The ACPI_INFO() macro is used for message printing in the ACPICA code.

ACPI_INFO() will be empty if the ACPICA debug is not enabled, so
replace it with pr_debug().

Also remove the not needed ACPICA header file inclusions to decouple
from ACPICA.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_configfs.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c
index 3a14859dbb75..76b83b181356 100644
--- a/drivers/acpi/acpi_configfs.c
+++ b/drivers/acpi/acpi_configfs.c
@@ -13,9 +13,6 @@
 #include <linux/acpi.h>
 #include <linux/security.h>
 
-#include "acpica/accommon.h"
-#include "acpica/actables.h"
-
 static struct config_group *acpi_table_group;
 
 struct acpi_table {
@@ -226,7 +223,7 @@ static void acpi_table_drop_item(struct config_group *group,
 {
 	struct acpi_table *table = container_of(cfg, struct acpi_table, cfg);
 
-	ACPI_INFO(("Host-directed Dynamic ACPI Table Unload"));
+	pr_debug("Host-directed Dynamic ACPI Table Unload\n");
 	acpi_unload_table(table->index);
 	config_item_put(cfg);
 }

From f9bceaa59c5c47a8a08f48e19cbe887e500a1978 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 20 May 2021 20:06:53 -0700
Subject: [PATCH 0481/3292] libbpf: Skip bpf_object__probe_loading for light
 skeleton

I'm getting the following error when running 'gen skeleton -L' as
regular user:

libbpf: Error in bpf_object__probe_loading():Operation not permitted(1).
Couldn't load trivial BPF program. Make sure your kernel supports BPF
(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough
value.

Fixes: 67234743736a ("libbpf: Generate loader program out of BPF ELF file.")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210521030653.2626513-1-sdf@google.com
---
 tools/lib/bpf/libbpf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index dc4d5fe6d9d2..b396e45b17ea 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3971,6 +3971,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
 	};
 	int ret;
 
+	if (obj->gen_loader)
+		return 0;
+
 	/* make sure basic loading works */
 
 	memset(&attr, 0, sizeof(attr));

From 62f148d8dde6239199af49e52ae43d0820765a65 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Mon, 24 May 2021 10:11:23 +0800
Subject: [PATCH 0482/3292] nfc: st-nci: remove unnecessary assignment and
 label

In function st_nci_hci_network_init, the variable r is assigned then
goto exit label, which just return r, so we use return to replace it.
and exit label only used once at here, so we remove exit label.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/st-nci/se.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c
index 1cba8f69d3ae..8657e025166f 100644
--- a/drivers/nfc/st-nci/se.c
+++ b/drivers/nfc/st-nci/se.c
@@ -534,10 +534,8 @@ static int st_nci_hci_network_init(struct nci_dev *ndev)
 	dest_params =
 		kzalloc(sizeof(struct core_conn_create_dest_spec_params) +
 			sizeof(struct dest_spec_params), GFP_KERNEL);
-	if (dest_params == NULL) {
-		r = -ENOMEM;
-		goto exit;
-	}
+	if (dest_params == NULL)
+		return -ENOMEM;
 
 	dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE;
 	dest_params->length = sizeof(struct dest_spec_params);
@@ -594,8 +592,6 @@ static int st_nci_hci_network_init(struct nci_dev *ndev)
 
 free_dest_params:
 	kfree(dest_params);
-
-exit:
 	return r;
 }
 

From 1e6e76101fd9f51319a742991778bdc3b2d992d9 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 24 May 2021 17:30:42 +0800
Subject: [PATCH 0483/3292] net: hns3: configure promisc mode for VF
 asynchronously

Currently, when host set VF untrusted, the driver will disable
the promisc mode of VF. It may be conflicted when the VF requests
the host to set promisc mode. So refactor it by changing promisc
mode for VF asynchronously. With this change, the promisc mode of
VF can be restored when the VF being trusted again.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 43 +++++++++++++------
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  5 ++-
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 40 ++++-------------
 3 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 3882f829fc49..d37767dc5d85 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11500,10 +11500,7 @@ static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
 	u32 new_trusted = enable ? 1 : 0;
-	bool en_bc_pmc;
-	int ret;
 
 	vport = hclge_get_vf_vport(hdev, vf);
 	if (!vport)
@@ -11512,18 +11509,9 @@ static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
 	if (vport->vf_info.trusted == new_trusted)
 		return 0;
 
-	/* Disable promisc mode for VF if it is not trusted any more. */
-	if (!enable && vport->vf_info.promisc_enable) {
-		en_bc_pmc = ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2;
-		ret = hclge_set_vport_promisc_mode(vport, false, false,
-						   en_bc_pmc);
-		if (ret)
-			return ret;
-		vport->vf_info.promisc_enable = 0;
-		hclge_inform_vf_promisc_info(vport);
-	}
-
 	vport->vf_info.trusted = new_trusted;
+	set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
+	hclge_task_schedule(hdev, 0);
 
 	return 0;
 }
@@ -12417,6 +12405,7 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
 	struct hnae3_handle *handle = &vport->nic;
 	u8 tmp_flags;
 	int ret;
+	u16 i;
 
 	if (vport->last_promisc_flags != vport->overflow_promisc_flags) {
 		set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
@@ -12433,6 +12422,32 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
 						 tmp_flags & HNAE3_VLAN_FLTR);
 		}
 	}
+
+	for (i = 1; i < hdev->num_alloc_vport; i++) {
+		bool uc_en = false;
+		bool mc_en = false;
+		bool bc_en;
+
+		vport = &hdev->vport[i];
+
+		if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+					&vport->state))
+			continue;
+
+		if (vport->vf_info.trusted) {
+			uc_en = vport->vf_info.request_uc_en > 0;
+			mc_en = vport->vf_info.request_mc_en > 0;
+		}
+		bc_en = vport->vf_info.request_bc_en > 0;
+
+		ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en,
+						 mc_en, bc_en);
+		if (ret) {
+			set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+				&vport->state);
+			return;
+		}
+	}
 }
 
 static bool hclge_module_existed(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 4bdb0243a97a..8425dae9e8b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -952,6 +952,7 @@ struct hclge_rss_tuple_cfg {
 enum HCLGE_VPORT_STATE {
 	HCLGE_VPORT_STATE_ALIVE,
 	HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
+	HCLGE_VPORT_STATE_PROMISC_CHANGE,
 	HCLGE_VPORT_STATE_MAX
 };
 
@@ -972,7 +973,9 @@ struct hclge_vf_info {
 	u32 spoofchk;
 	u32 max_tx_rate;
 	u32 trusted;
-	u16 promisc_enable;
+	u8 request_uc_en;
+	u8 request_mc_en;
+	u8 request_bc_en;
 };
 
 struct hclge_vport {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 8e5f9dc8791d..d86fc5eed6cd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -231,19 +231,15 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en,
 	return ret;
 }
 
-static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
-				     struct hclge_mbx_vf_to_pf_cmd *req)
+static void hclge_set_vf_promisc_mode(struct hclge_vport *vport,
+				      struct hclge_mbx_vf_to_pf_cmd *req)
 {
-	bool en_bc = req->msg.en_bc ? true : false;
-	bool en_uc = req->msg.en_uc ? true : false;
-	bool en_mc = req->msg.en_mc ? true : false;
 	struct hnae3_handle *handle = &vport->nic;
-	int ret;
+	struct hclge_dev *hdev = vport->back;
 
-	if (!vport->vf_info.trusted) {
-		en_uc = false;
-		en_mc = false;
-	}
+	vport->vf_info.request_uc_en = req->msg.en_uc;
+	vport->vf_info.request_mc_en = req->msg.en_mc;
+	vport->vf_info.request_bc_en = req->msg.en_bc;
 
 	if (req->msg.en_limit_promisc)
 		set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->priv_flags);
@@ -251,22 +247,8 @@ static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
 		clear_bit(HNAE3_PFLAG_LIMIT_PROMISC,
 			  &handle->priv_flags);
 
-	ret = hclge_set_vport_promisc_mode(vport, en_uc, en_mc, en_bc);
-
-	vport->vf_info.promisc_enable = (en_uc || en_mc) ? 1 : 0;
-
-	return ret;
-}
-
-void hclge_inform_vf_promisc_info(struct hclge_vport *vport)
-{
-	u8 dest_vfid = (u8)vport->vport_id;
-	u8 msg_data[2];
-
-	memcpy(&msg_data[0], &vport->vf_info.promisc_enable, sizeof(u16));
-
-	hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
-			   HCLGE_MBX_PUSH_PROMISC_INFO, dest_vfid);
+	set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
+	hclge_task_schedule(hdev, 0);
 }
 
 static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
@@ -748,11 +730,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 								req);
 			break;
 		case HCLGE_MBX_SET_PROMISC_MODE:
-			ret = hclge_set_vf_promisc_mode(vport, req);
-			if (ret)
-				dev_err(&hdev->pdev->dev,
-					"PF fail(%d) to set VF promisc mode\n",
-					ret);
+			hclge_set_vf_promisc_mode(vport, req);
 			break;
 		case HCLGE_MBX_SET_UNICAST:
 			ret = hclge_set_vf_uc_mac_addr(vport, req);

From 4e2471f7b6ef5a564cd05bc5fb9f3ce71b7b7942 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 24 May 2021 17:30:43 +0800
Subject: [PATCH 0484/3292] net: hns3: use HCLGE_VPORT_STATE_PROMISC_CHANGE to
 replace HCLGE_STATE_PROMISC_CHANGED

Currently, PF is using HCLGE_STATE_PROMISC_CHANGED to indicate
need synchronize the promisc mode for itself, and using flag
HCLGE_VPORT_STATE_PROMISC_CHANGE for its VF. To keep consistent,
remove flag HCLGE_STATE_PROMISC_CHANGED, and use flag
HCLGE_VPORT_STATE_PROMISC_CHANGE instead.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c  | 12 ++++++------
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h  |  1 -
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d37767dc5d85..6addeb299bba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5183,9 +5183,8 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 static void hclge_request_update_promisc_mode(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
 
-	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
+	set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
 }
 
 static void hclge_sync_fd_state(struct hclge_dev *hdev)
@@ -8050,6 +8049,7 @@ int hclge_vport_start(struct hclge_vport *vport)
 	struct hclge_dev *hdev = vport->back;
 
 	set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
+	set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
 	vport->last_active_jiffies = jiffies;
 
 	if (test_bit(vport->vport_id, hdev->vport_config_block)) {
@@ -10048,7 +10048,6 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev)
 
 	hclge_restore_mac_table_common(vport);
 	hclge_restore_vport_vlan_table(vport);
-	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
 	set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
 	hclge_restore_fd_entries(handle);
 }
@@ -12408,16 +12407,17 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
 	u16 i;
 
 	if (vport->last_promisc_flags != vport->overflow_promisc_flags) {
-		set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
+		set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state);
 		vport->last_promisc_flags = vport->overflow_promisc_flags;
 	}
 
-	if (test_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state)) {
+	if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) {
 		tmp_flags = handle->netdev_flags | vport->last_promisc_flags;
 		ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE,
 					     tmp_flags & HNAE3_MPE);
 		if (!ret) {
-			clear_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
+			clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
+				  &vport->state);
 			hclge_enable_vlan_filter(handle,
 						 tmp_flags & HNAE3_VLAN_FLTR);
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 8425dae9e8b0..9e4d02d73bf3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -224,7 +224,6 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_STATISTICS_UPDATING,
 	HCLGE_STATE_CMD_DISABLE,
 	HCLGE_STATE_LINK_UPDATING,
-	HCLGE_STATE_PROMISC_CHANGED,
 	HCLGE_STATE_RST_FAIL,
 	HCLGE_STATE_FD_TBL_CHANGED,
 	HCLGE_STATE_FD_CLEAR_ALL,

From 3e87f192b405960c0fe83e0925bd0dadf4f8cf43 Mon Sep 17 00:00:00 2001
From: Denis Salopek <denis.salopek@sartura.hr>
Date: Tue, 11 May 2021 23:00:04 +0200
Subject: [PATCH 0485/3292] bpf: Add lookup_and_delete_elem support to hashtab

Extend the existing bpf_map_lookup_and_delete_elem() functionality to
hashtab map types, in addition to stacks and queues.
Create a new hashtab bpf_map_ops function that does lookup and deletion
of the element under the same bucket lock and add the created map_ops to
bpf.h.

Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/4d18480a3e990ffbf14751ddef0325eed3be2966.1620763117.git.denis.salopek@sartura.hr
---
 include/linux/bpf.h            |  2 +
 include/uapi/linux/bpf.h       | 13 +++++
 kernel/bpf/hashtab.c           | 98 ++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           | 34 ++++++++++--
 tools/include/uapi/linux/bpf.h | 13 +++++
 5 files changed, 156 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dc44ba97584..1e9a0ff3217b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -70,6 +70,8 @@ struct bpf_map_ops {
 	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
 	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
 				union bpf_attr __user *uattr);
+	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
+					  void *value, u64 flags);
 	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
 					   const union bpf_attr *attr,
 					   union bpf_attr __user *uattr);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 418b9b813d65..562adeac1d67 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
  *		Look up an element with the given *key* in the map referred to
  *		by the file descriptor *fd*, and if found, delete the element.
  *
+ *		For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ *		types, the *flags* argument needs to be set to 0, but for other
+ *		map types, it may be specified as:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up and delete the value of a spin-locked map
+ *			without returning the lock. This must be specified if
+ *			the elements contain a spinlock.
+ *
  *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
  *		implement this command as a "pop" operation, deleting the top
  *		element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
  *		This command is only valid for the following map types:
  *		* **BPF_MAP_TYPE_QUEUE**
  *		* **BPF_MAP_TYPE_STACK**
+ *		* **BPF_MAP_TYPE_HASH**
+ *		* **BPF_MAP_TYPE_PERCPU_HASH**
+ *		* **BPF_MAP_TYPE_LRU_HASH**
+ *		* **BPF_MAP_TYPE_LRU_PERCPU_HASH**
  *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d7ebb12ffffc..9da0a0413a53 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1401,6 +1401,100 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
+static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					     void *value, bool is_lru_map,
+					     bool is_percpu, u64 flags)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct hlist_nulls_head *head;
+	unsigned long bflags;
+	struct htab_elem *l;
+	u32 hash, key_size;
+	struct bucket *b;
+	int ret;
+
+	key_size = map->key_size;
+
+	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	b = __select_bucket(htab, hash);
+	head = &b->head;
+
+	ret = htab_lock_bucket(htab, b, hash, &bflags);
+	if (ret)
+		return ret;
+
+	l = lookup_elem_raw(head, hash, key, key_size);
+	if (!l) {
+		ret = -ENOENT;
+	} else {
+		if (is_percpu) {
+			u32 roundup_value_size = round_up(map->value_size, 8);
+			void __percpu *pptr;
+			int off = 0, cpu;
+
+			pptr = htab_elem_get_ptr(l, key_size);
+			for_each_possible_cpu(cpu) {
+				bpf_long_memcpy(value + off,
+						per_cpu_ptr(pptr, cpu),
+						roundup_value_size);
+				off += roundup_value_size;
+			}
+		} else {
+			u32 roundup_key_size = round_up(map->key_size, 8);
+
+			if (flags & BPF_F_LOCK)
+				copy_map_value_locked(map, value, l->key +
+						      roundup_key_size,
+						      true);
+			else
+				copy_map_value(map, value, l->key +
+					       roundup_key_size);
+			check_and_init_map_lock(map, value);
+		}
+
+		hlist_nulls_del_rcu(&l->hash_node);
+		if (!is_lru_map)
+			free_htab_elem(htab, l);
+	}
+
+	htab_unlock_bucket(htab, b, hash, bflags);
+
+	if (is_lru_map && l)
+		bpf_lru_push_free(&htab->lru, &l->lru_node);
+
+	return ret;
+}
+
+static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					   void *value, u64 flags)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
+						 flags);
+}
+
+static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+						  void *key, void *value,
+						  u64 flags)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
+						 flags);
+}
+
+static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					       void *value, u64 flags)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
+						 flags);
+}
+
+static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+						      void *key, void *value,
+						      u64 flags)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
+						 flags);
+}
+
 static int
 __htab_map_lookup_and_delete_batch(struct bpf_map *map,
 				   const union bpf_attr *attr,
@@ -1934,6 +2028,7 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
 	.map_update_elem = htab_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
@@ -1954,6 +2049,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_lru_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
 	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
 	.map_update_elem = htab_lru_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
@@ -2077,6 +2173,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_percpu_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_percpu_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -2096,6 +2193,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_lru_percpu_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1d1cd80a6e67..50457019da27 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1483,7 +1483,7 @@ free_buf:
 	return err;
 }
 
-#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
 
 static int map_lookup_and_delete_elem(union bpf_attr *attr)
 {
@@ -1499,6 +1499,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
 		return -EINVAL;
 
+	if (attr->flags & ~BPF_F_LOCK)
+		return -EINVAL;
+
 	f = fdget(ufd);
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
@@ -1509,24 +1512,47 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
+	if (attr->flags &&
+	    (map->map_type == BPF_MAP_TYPE_QUEUE ||
+	     map->map_type == BPF_MAP_TYPE_STACK)) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
+	if ((attr->flags & BPF_F_LOCK) &&
+	    !map_value_has_spin_lock(map)) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	key = __bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
 		goto err_put;
 	}
 
-	value_size = map->value_size;
+	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
 	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
+	err = -ENOTSUPP;
 	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 	    map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_pop_elem(map, value);
-	} else {
-		err = -ENOTSUPP;
+	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
+		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
+		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+		if (!bpf_map_is_dev_bound(map)) {
+			bpf_disable_instrumentation();
+			rcu_read_lock();
+			err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
+			rcu_read_unlock();
+			bpf_enable_instrumentation();
+		}
 	}
 
 	if (err)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 418b9b813d65..562adeac1d67 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
  *		Look up an element with the given *key* in the map referred to
  *		by the file descriptor *fd*, and if found, delete the element.
  *
+ *		For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ *		types, the *flags* argument needs to be set to 0, but for other
+ *		map types, it may be specified as:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up and delete the value of a spin-locked map
+ *			without returning the lock. This must be specified if
+ *			the elements contain a spinlock.
+ *
  *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
  *		implement this command as a "pop" operation, deleting the top
  *		element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
  *		This command is only valid for the following map types:
  *		* **BPF_MAP_TYPE_QUEUE**
  *		* **BPF_MAP_TYPE_STACK**
+ *		* **BPF_MAP_TYPE_HASH**
+ *		* **BPF_MAP_TYPE_PERCPU_HASH**
+ *		* **BPF_MAP_TYPE_LRU_HASH**
+ *		* **BPF_MAP_TYPE_LRU_PERCPU_HASH**
  *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*

From d59b9f2d1b2211e948044a099fd0a65941d06570 Mon Sep 17 00:00:00 2001
From: Denis Salopek <denis.salopek@sartura.hr>
Date: Tue, 11 May 2021 23:00:05 +0200
Subject: [PATCH 0486/3292] bpf: Extend libbpf with
 bpf_map_lookup_and_delete_elem_flags

Add bpf_map_lookup_and_delete_elem_flags() libbpf API in order to use
the BPF_F_LOCK flag with the map_lookup_and_delete_elem() function.

Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/15b05dafe46c7e0750d110f233977372029d1f62.1620763117.git.denis.salopek@sartura.hr
---
 tools/lib/bpf/bpf.c      | 13 +++++++++++++
 tools/lib/bpf/bpf.h      |  2 ++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 16 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index bba48ff4c5c0..b7c2cc12034c 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -458,6 +458,19 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = flags;
+
+	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
 	union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 875dde20d56e..4f758f8f50cd 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -124,6 +124,8 @@ LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
 					 __u64 flags);
 LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
 					      void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
+						    void *value, __u64 flags);
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_map_freeze(int fd);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 2abef6f17c06..0229e01e8ccc 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -361,6 +361,7 @@ LIBBPF_0.4.0 {
 		bpf_linker__new;
 		bpf_map__initial_value;
 		bpf_map__inner_map;
+		bpf_map_lookup_and_delete_elem_flags;
 		bpf_object__gen_loader;
 		bpf_object__set_kversion;
 		bpf_tc_attach;

From 49c299b69444d58a1d234769a13a3697841deb54 Mon Sep 17 00:00:00 2001
From: Denis Salopek <denis.salopek@sartura.hr>
Date: Tue, 11 May 2021 23:00:06 +0200
Subject: [PATCH 0487/3292] selftests/bpf: Add bpf_lookup_and_delete_elem tests

Add bpf selftests and extend existing ones for a new function
bpf_lookup_and_delete_elem() for (percpu) hash and (percpu) LRU hash map
types.
In test_lru_map and test_maps we add an element, lookup_and_delete it,
then check whether it's deleted.
The newly added lookup_and_delete prog tests practically do the same
thing but additionally use a BPF program to change the value of the
element for LRU maps.

Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/d30d3e0060c1f750e133579623cf1c60ff58f3d9.1620763117.git.denis.salopek@sartura.hr
---
 .../bpf/prog_tests/lookup_and_delete.c        | 288 ++++++++++++++++++
 .../bpf/progs/test_lookup_and_delete.c        |  26 ++
 tools/testing/selftests/bpf/test_lru_map.c    |   8 +
 tools/testing/selftests/bpf/test_maps.c       |  17 ++
 4 files changed, 339 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.c

diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..beebfa9730e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+	__u64 key, value = START_VALUE;
+	int err;
+
+	for (key = 1; key < MAX_ENTRIES + 1; key++) {
+		err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+	__u64 key, value[nr_cpus];
+	int i, err;
+
+	for (i = 0; i < nr_cpus; i++)
+		value[i] = START_VALUE;
+
+	for (key = 1; key < MAX_ENTRIES + 1; key++) {
+		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			return -1;
+	}
+
+	return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+						 int *map_fd)
+{
+	struct test_lookup_and_delete *skel;
+	int err;
+
+	skel = test_lookup_and_delete__open();
+	if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+		return NULL;
+
+	err = bpf_map__set_type(skel->maps.hash_map, map_type);
+	if (!ASSERT_OK(err, "bpf_map__set_type"))
+		goto cleanup;
+
+	err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+	if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+		goto cleanup;
+
+	err = test_lookup_and_delete__load(skel);
+	if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+		goto cleanup;
+
+	*map_fd = bpf_map__fd(skel->maps.hash_map);
+	if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+		goto cleanup;
+
+	return skel;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+	return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+		      __u64 value)
+{
+	int err;
+
+	skel->bss->set_pid = getpid();
+	skel->bss->set_key = key;
+	skel->bss->set_value = value;
+
+	err = test_lookup_and_delete__attach(skel);
+	if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+		return -1;
+
+	syscall(__NR_getpgid);
+
+	test_lookup_and_delete__detach(skel);
+
+	return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	__u64 key, value;
+	int map_fd, err;
+
+	/* Setup program and fill the map. */
+	skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+	if (!ASSERT_OK_PTR(skel, "setup_prog"))
+		return;
+
+	err = fill_values(map_fd);
+	if (!ASSERT_OK(err, "fill_values"))
+		goto cleanup;
+
+	/* Lookup and delete element. */
+	key = 1;
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+		goto cleanup;
+
+	/* Fetched value should match the initially set value. */
+	if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+		  "unexpected value=%lld\n", value))
+		goto cleanup;
+
+	/* Check that the entry is non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	__u64 key, val, value[nr_cpus];
+	int map_fd, err, i;
+
+	/* Setup program and fill the map. */
+	skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+	if (!ASSERT_OK_PTR(skel, "setup_prog"))
+		return;
+
+	err = fill_values_percpu(map_fd);
+	if (!ASSERT_OK(err, "fill_values_percpu"))
+		goto cleanup;
+
+	/* Lookup and delete element. */
+	key = 1;
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+		goto cleanup;
+
+	for (i = 0; i < nr_cpus; i++) {
+		val = value[i];
+
+		/* Fetched value should match the initially set value. */
+		if (CHECK(val != START_VALUE, "map value",
+			  "unexpected for cpu %d: %lld\n", i, val))
+			goto cleanup;
+	}
+
+	/* Check that the entry is non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, value);
+	if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	__u64 key, value;
+	int map_fd, err;
+
+	/* Setup program and fill the LRU map. */
+	skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+	if (!ASSERT_OK_PTR(skel, "setup_prog"))
+		return;
+
+	err = fill_values(map_fd);
+	if (!ASSERT_OK(err, "fill_values"))
+		goto cleanup;
+
+	/* Insert new element at key=3, should reuse LRU element. */
+	key = 3;
+	err = trigger_tp(skel, key, NEW_VALUE);
+	if (!ASSERT_OK(err, "trigger_tp"))
+		goto cleanup;
+
+	/* Lookup and delete element 3. */
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+		goto cleanup;
+
+	/* Value should match the new value. */
+	if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+		  "unexpected value=%lld\n", value))
+		goto cleanup;
+
+	/* Check that entries 3 and 1 are non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+	key = 1;
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	__u64 key, val, value[nr_cpus];
+	int map_fd, err, i, cpucnt = 0;
+
+	/* Setup program and fill the LRU map. */
+	skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+	if (!ASSERT_OK_PTR(skel, "setup_prog"))
+		return;
+
+	err = fill_values_percpu(map_fd);
+	if (!ASSERT_OK(err, "fill_values_percpu"))
+		goto cleanup;
+
+	/* Insert new element at key=3, should reuse LRU element 1. */
+	key = 3;
+	err = trigger_tp(skel, key, NEW_VALUE);
+	if (!ASSERT_OK(err, "trigger_tp"))
+		goto cleanup;
+
+	/* Clean value. */
+	for (i = 0; i < nr_cpus; i++)
+		value[i] = 0;
+
+	/* Lookup and delete element 3. */
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+		goto cleanup;
+	}
+
+	/* Check if only one CPU has set the value. */
+	for (i = 0; i < nr_cpus; i++) {
+		val = value[i];
+		if (val) {
+			if (CHECK(val != NEW_VALUE, "map value",
+				  "unexpected for cpu %d: %lld\n", i, val))
+				goto cleanup;
+			cpucnt++;
+		}
+	}
+	if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+		  cpucnt))
+		goto cleanup;
+
+	/* Check that entries 3 and 1 are non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+	key = 1;
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+	nr_cpus = bpf_num_possible_cpus();
+
+	if (test__start_subtest("lookup_and_delete"))
+		test_lookup_and_delete_hash();
+	if (test__start_subtest("lookup_and_delete_percpu"))
+		test_lookup_and_delete_percpu_hash();
+	if (test__start_subtest("lookup_and_delete_lru"))
+		test_lookup_and_delete_lru_hash();
+	if (test__start_subtest("lookup_and_delete_lru_percpu"))
+		test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..3a193f42c7e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 2);
+	__type(key, __u64);
+	__type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+	if (set_pid == bpf_get_current_pid_tgid() >> 32)
+		bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6a5349f9eb14..7e9049fa3edf 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
 	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
 	       errno == ENOENT);
 
+	/* lookup elem key=1 and delete it, then check it doesn't exist */
+	key = 1;
+	assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+	assert(value[0] == 1234);
+
+	/* remove the same element from the expected map */
+	assert(!bpf_map_delete_elem(expected_map_fd, &key));
+
 	assert(map_equal(lru_map_fd, expected_map_fd));
 
 	close(expected_map_fd);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 51adc42b2b40..8410a730c82f 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -65,6 +65,13 @@ static void test_hashmap(unsigned int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
 
 	key = 2;
+	value = 1234;
+	/* Insert key=2 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+	/* Check that key=2 matches the value and delete it */
+	assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
 	/* Check that key=2 is not found. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
@@ -166,6 +173,16 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 	/* Insert key=1 element. */
 	assert(!(expected_key_mask & key));
 	assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+	/* Lookup and delete elem key=1 and check value. */
+	assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+	       bpf_percpu(value,0) == 100);
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(value,i) = i + 100;
+
+	/* Insert key=1 element which should not exist. */
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
 	expected_key_mask |= key;
 
 	/* BPF_NOEXIST means add new element if it doesn't exist. */

From 542043e91df452ed09f382d8c41cdf3788f31b5e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:13 +0300
Subject: [PATCH 0488/3292] net: dsa: sja1105: parameterize the number of ports

The sja1105 driver will gain support for the next-gen SJA1110 switch,
which is very similar except for the fact it has more than 5 ports.

So we need to replace the hardcoded SJA1105_NUM_PORTS in this driver
with ds->num_ports. This patch is as mechanical as possible (save for
the fact that ds->num_ports is not an integer constant expression).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_clocking.c |  3 +-
 drivers/net/dsa/sja1105/sja1105_flower.c   |  9 ++--
 drivers/net/dsa/sja1105/sja1105_main.c     | 61 +++++++++++++---------
 drivers/net/dsa/sja1105/sja1105_spi.c      |  4 +-
 drivers/net/dsa/sja1105/sja1105_tas.c      | 14 ++---
 5 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 2a9b8a6a5306..f54b4d03a002 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -721,9 +721,10 @@ int sja1105_clocking_setup_port(struct sja1105_private *priv, int port)
 
 int sja1105_clocking_setup(struct sja1105_private *priv)
 {
+	struct dsa_switch *ds = priv->ds;
 	int port, rc;
 
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		rc = sja1105_clocking_setup_port(priv, port);
 		if (rc < 0)
 			return rc;
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 973761132fc3..77c54126b3fc 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -35,6 +35,7 @@ static int sja1105_setup_bcast_policer(struct sja1105_private *priv,
 {
 	struct sja1105_rule *rule = sja1105_rule_find(priv, cookie);
 	struct sja1105_l2_policing_entry *policing;
+	struct dsa_switch *ds = priv->ds;
 	bool new_rule = false;
 	unsigned long p;
 	int rc;
@@ -59,7 +60,7 @@ static int sja1105_setup_bcast_policer(struct sja1105_private *priv,
 
 	policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
 
-	if (policing[(SJA1105_NUM_PORTS * SJA1105_NUM_TC) + port].sharindx != port) {
+	if (policing[(ds->num_ports * SJA1105_NUM_TC) + port].sharindx != port) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Port already has a broadcast policer");
 		rc = -EEXIST;
@@ -72,7 +73,7 @@ static int sja1105_setup_bcast_policer(struct sja1105_private *priv,
 	 * point to the newly allocated policer
 	 */
 	for_each_set_bit(p, &rule->port_mask, SJA1105_NUM_PORTS) {
-		int bcast = (SJA1105_NUM_PORTS * SJA1105_NUM_TC) + p;
+		int bcast = (ds->num_ports * SJA1105_NUM_TC) + p;
 
 		policing[bcast].sharindx = rule->bcast_pol.sharindx;
 	}
@@ -435,7 +436,7 @@ int sja1105_cls_flower_del(struct dsa_switch *ds, int port,
 	policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
 
 	if (rule->type == SJA1105_RULE_BCAST_POLICER) {
-		int bcast = (SJA1105_NUM_PORTS * SJA1105_NUM_TC) + port;
+		int bcast = (ds->num_ports * SJA1105_NUM_TC) + port;
 
 		old_sharindx = policing[bcast].sharindx;
 		policing[bcast].sharindx = port;
@@ -486,7 +487,7 @@ void sja1105_flower_setup(struct dsa_switch *ds)
 
 	INIT_LIST_HEAD(&priv->flow_block.rules);
 
-	for (port = 0; port < SJA1105_NUM_PORTS; port++)
+	for (port = 0; port < ds->num_ports; port++)
 		priv->flow_block.l2_policer_used[port] = true;
 }
 
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 802314e90e64..ed98bb51596a 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -106,6 +106,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 		.ingress = false,
 	};
 	struct sja1105_mac_config_entry *mac;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
 	int i;
 
@@ -117,16 +118,16 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_NUM_PORTS,
+	table->entries = kcalloc(ds->num_ports,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = SJA1105_NUM_PORTS;
+	table->entry_count = ds->num_ports;
 
 	mac = table->entries;
 
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+	for (i = 0; i < ds->num_ports; i++) {
 		mac[i] = default_mac;
 		if (i == dsa_upstream_port(priv->ds, i)) {
 			/* STP doesn't get called for CPU port, so we need to
@@ -161,6 +162,7 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 {
 	struct device *dev = &priv->spidev->dev;
 	struct sja1105_xmii_params_entry *mii;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
 	int i;
 
@@ -182,7 +184,7 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 
 	mii = table->entries;
 
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+	for (i = 0; i < ds->num_ports; i++) {
 		if (dsa_is_unused_port(priv->ds, i))
 			continue;
 
@@ -265,8 +267,6 @@ static int sja1105_init_static_fdb(struct sja1105_private *priv)
 
 static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 {
-	struct sja1105_table *table;
-	u64 max_fdb_entries = SJA1105_MAX_L2_LOOKUP_COUNT / SJA1105_NUM_PORTS;
 	struct sja1105_l2_lookup_params_entry default_l2_lookup_params = {
 		/* Learned FDB entries are forgotten after 300 seconds */
 		.maxage = SJA1105_AGEING_TIME_MS(300000),
@@ -274,8 +274,6 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 		.dyn_tbsz = SJA1105ET_FDB_BIN_SIZE,
 		/* And the P/Q/R/S equivalent setting: */
 		.start_dynspc = 0,
-		.maxaddrp = {max_fdb_entries, max_fdb_entries, max_fdb_entries,
-			     max_fdb_entries, max_fdb_entries, },
 		/* 2^8 + 2^5 + 2^3 + 2^2 + 2^1 + 1 in Koopman notation */
 		.poly = 0x97,
 		/* This selects between Independent VLAN Learning (IVL) and
@@ -299,6 +297,15 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 		.owr_dyn = true,
 		.drpnolearn = true,
 	};
+	struct dsa_switch *ds = priv->ds;
+	struct sja1105_table *table;
+	u64 max_fdb_entries;
+	int port;
+
+	max_fdb_entries = SJA1105_MAX_L2_LOOKUP_COUNT / ds->num_ports;
+
+	for (port = 0; port < ds->num_ports; port++)
+		default_l2_lookup_params.maxaddrp[port] = max_fdb_entries;
 
 	table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
 
@@ -388,6 +395,7 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 {
 	struct sja1105_l2_forwarding_entry *l2fwd;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
 	int i, j;
 
@@ -408,7 +416,7 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 	l2fwd = table->entries;
 
 	/* First 5 entries define the forwarding rules */
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+	for (i = 0; i < ds->num_ports; i++) {
 		unsigned int upstream = dsa_upstream_port(priv->ds, i);
 
 		for (j = 0; j < SJA1105_NUM_TC; j++)
@@ -436,8 +444,8 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 	 * Create a one-to-one mapping.
 	 */
 	for (i = 0; i < SJA1105_NUM_TC; i++)
-		for (j = 0; j < SJA1105_NUM_PORTS; j++)
-			l2fwd[SJA1105_NUM_PORTS + i].vlan_pmap[j] = i;
+		for (j = 0; j < ds->num_ports; j++)
+			l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
 
 	return 0;
 }
@@ -533,7 +541,7 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		 */
 		.host_port = dsa_upstream_port(priv->ds, 0),
 		/* Default to an invalid value */
-		.mirr_port = SJA1105_NUM_PORTS,
+		.mirr_port = priv->ds->num_ports,
 		/* Link-local traffic received on casc_port will be forwarded
 		 * to host_port without embedding the source port and device ID
 		 * info in the destination MAC address (presumably because it
@@ -541,7 +549,7 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		 * that). Default to an invalid port (to disable the feature)
 		 * and overwrite this if we find any DSA (cascaded) ports.
 		 */
-		.casc_port = SJA1105_NUM_PORTS,
+		.casc_port = priv->ds->num_ports,
 		/* No TTEthernet */
 		.vllupformat = SJA1105_VL_FORMAT_PSFP,
 		.vlmarker = 0,
@@ -662,6 +670,7 @@ static int sja1105_init_avb_params(struct sja1105_private *priv)
 static int sja1105_init_l2_policing(struct sja1105_private *priv)
 {
 	struct sja1105_l2_policing_entry *policing;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
 	int port, tc;
 
@@ -683,8 +692,8 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
 	policing = table->entries;
 
 	/* Setup shared indices for the matchall policers */
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
-		int bcast = (SJA1105_NUM_PORTS * SJA1105_NUM_TC) + port;
+	for (port = 0; port < ds->num_ports; port++) {
+		int bcast = (ds->num_ports * SJA1105_NUM_TC) + port;
 
 		for (tc = 0; tc < SJA1105_NUM_TC; tc++)
 			policing[port * SJA1105_NUM_TC + tc].sharindx = port;
@@ -693,7 +702,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
 	}
 
 	/* Setup the matchall policer parameters */
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
 
 		if (dsa_is_cpu_port(priv->ds, port))
@@ -759,9 +768,10 @@ static int sja1105_static_config_load(struct sja1105_private *priv,
 static int sja1105_parse_rgmii_delays(struct sja1105_private *priv,
 				      const struct sja1105_dt_port *ports)
 {
+	struct dsa_switch *ds = priv->ds;
 	int i;
 
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+	for (i = 0; i < ds->num_ports; i++) {
 		if (ports[i].role == XMII_MAC)
 			continue;
 
@@ -1636,7 +1646,7 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port,
 
 	l2_fwd = priv->static_config.tables[BLK_IDX_L2_FORWARDING].entries;
 
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+	for (i = 0; i < ds->num_ports; i++) {
 		/* Add this port to the forwarding matrix of the
 		 * other ports in the same bridge, and viceversa.
 		 */
@@ -1852,7 +1862,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 	 * switch wants to see in the static config in order to allow us to
 	 * change it through the dynamic interface later.
 	 */
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+	for (i = 0; i < ds->num_ports; i++) {
 		speed_mbps[i] = sja1105_speed[mac[i].speed];
 		mac[i].speed = SJA1105_SPEED_AUTO;
 	}
@@ -1904,7 +1914,7 @@ out_unlock_ptp:
 	if (rc < 0)
 		goto out;
 
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+	for (i = 0; i < ds->num_ports; i++) {
 		rc = sja1105_adjust_port_config(priv, i, speed_mbps[i]);
 		if (rc < 0)
 			goto out;
@@ -3022,7 +3032,7 @@ static void sja1105_teardown(struct dsa_switch *ds)
 	struct sja1105_bridge_vlan *v, *n;
 	int port;
 
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		struct sja1105_port *sp = &priv->ports[port];
 
 		if (!dsa_is_user_port(ds, port))
@@ -3225,6 +3235,7 @@ static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to,
 {
 	struct sja1105_general_params_entry *general_params;
 	struct sja1105_mac_config_entry *mac;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
 	bool already_enabled;
 	u64 new_mirr_port;
@@ -3235,7 +3246,7 @@ static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to,
 
 	mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
 
-	already_enabled = (general_params->mirr_port != SJA1105_NUM_PORTS);
+	already_enabled = (general_params->mirr_port != ds->num_ports);
 	if (already_enabled && enabled && general_params->mirr_port != to) {
 		dev_err(priv->ds->dev,
 			"Delete mirroring rules towards port %llu first\n",
@@ -3249,7 +3260,7 @@ static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to,
 		int port;
 
 		/* Anybody still referencing mirr_port? */
-		for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+		for (port = 0; port < ds->num_ports; port++) {
 			if (mac[port].ing_mirr || mac[port].egr_mirr) {
 				keep = true;
 				break;
@@ -3257,7 +3268,7 @@ static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to,
 		}
 		/* Unset already_enabled for next time */
 		if (!keep)
-			new_mirr_port = SJA1105_NUM_PORTS;
+			new_mirr_port = ds->num_ports;
 	}
 	if (new_mirr_port != general_params->mirr_port) {
 		general_params->mirr_port = new_mirr_port;
@@ -3679,7 +3690,7 @@ static int sja1105_probe(struct spi_device *spi)
 	}
 
 	/* Connections between dsa_port and sja1105_port */
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		struct sja1105_port *sp = &priv->ports[port];
 		struct dsa_port *dp = dsa_to_port(ds, port);
 		struct net_device *slave;
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index df3a780e9dcc..f22340e77fd5 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -309,10 +309,10 @@ int static_config_buf_prepare_for_upload(struct sja1105_private *priv,
 
 int sja1105_static_config_upload(struct sja1105_private *priv)
 {
-	unsigned long port_bitmap = GENMASK_ULL(SJA1105_NUM_PORTS - 1, 0);
 	struct sja1105_static_config *config = &priv->static_config;
 	const struct sja1105_regs *regs = priv->info->regs;
 	struct device *dev = &priv->spidev->dev;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_status status;
 	int rc, retries = RETRIES;
 	u8 *config_buf;
@@ -333,7 +333,7 @@ int sja1105_static_config_upload(struct sja1105_private *priv)
 	 * Tx on all ports and waiting for current packet to drain.
 	 * Otherwise, the PHY will see an unterminated Ethernet packet.
 	 */
-	rc = sja1105_inhibit_tx(priv, port_bitmap, true);
+	rc = sja1105_inhibit_tx(priv, GENMASK_ULL(ds->num_ports - 1, 0), true);
 	if (rc < 0) {
 		dev_err(dev, "Failed to inhibit Tx on ports\n");
 		rc = -ENXIO;
diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c
index 31d8acff1f01..e6153848a950 100644
--- a/drivers/net/dsa/sja1105/sja1105_tas.c
+++ b/drivers/net/dsa/sja1105/sja1105_tas.c
@@ -27,7 +27,7 @@ static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
 
 	tas_data->enabled = false;
 
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		const struct tc_taprio_qopt_offload *offload;
 
 		offload = tas_data->offload[port];
@@ -164,6 +164,7 @@ int sja1105_init_scheduling(struct sja1105_private *priv)
 	struct sja1105_tas_data *tas_data = &priv->tas_data;
 	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
 	struct sja1105_schedule_entry *schedule;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
 	int schedule_start_idx;
 	s64 entry_point_delta;
@@ -207,7 +208,7 @@ int sja1105_init_scheduling(struct sja1105_private *priv)
 	}
 
 	/* Figure out the dimensioning of the problem */
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		if (tas_data->offload[port]) {
 			num_entries += tas_data->offload[port]->num_entries;
 			num_cycles++;
@@ -269,7 +270,7 @@ int sja1105_init_scheduling(struct sja1105_private *priv)
 	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
 	schedule_entry_points_params->actsubsch = num_cycles - 1;
 
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		const struct tc_taprio_qopt_offload *offload;
 		/* Relative base time */
 		s64 rbt;
@@ -468,6 +469,7 @@ bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
 	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
 	size_t num_entries = gating_cfg->num_entries;
 	struct tc_taprio_qopt_offload *dummy;
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_gate_entry *e;
 	bool conflict;
 	int i = 0;
@@ -491,7 +493,7 @@ bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
 	if (port != -1) {
 		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
 	} else {
-		for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+		for (port = 0; port < ds->num_ports; port++) {
 			conflict = sja1105_tas_check_conflicts(priv, port,
 							       dummy);
 			if (conflict)
@@ -554,7 +556,7 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
 		}
 	}
 
-	for (other_port = 0; other_port < SJA1105_NUM_PORTS; other_port++) {
+	for (other_port = 0; other_port < ds->num_ports; other_port++) {
 		if (other_port == port)
 			continue;
 
@@ -885,7 +887,7 @@ void sja1105_tas_teardown(struct dsa_switch *ds)
 
 	cancel_work_sync(&priv->tas_data.tas_work);
 
-	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+	for (port = 0; port < ds->num_ports; port++) {
 		offload = priv->tas_data.offload[port];
 		if (!offload)
 			continue;

From f238fef1b3de2fac2d09d925ebc75aacf5e27fd1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:14 +0300
Subject: [PATCH 0489/3292] net: dsa: sja1105: avoid some work for unused ports

Do not put unused ports in the forwarding domain, and do not allocate
FDB entries for dynamic address learning for them.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 27 +++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index ed98bb51596a..2f162765d9f3 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -298,14 +298,22 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 		.drpnolearn = true,
 	};
 	struct dsa_switch *ds = priv->ds;
+	int port, num_used_ports = 0;
 	struct sja1105_table *table;
 	u64 max_fdb_entries;
-	int port;
-
-	max_fdb_entries = SJA1105_MAX_L2_LOOKUP_COUNT / ds->num_ports;
 
 	for (port = 0; port < ds->num_ports; port++)
+		if (!dsa_is_unused_port(ds, port))
+			num_used_ports++;
+
+	max_fdb_entries = SJA1105_MAX_L2_LOOKUP_COUNT / num_used_ports;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_is_unused_port(ds, port))
+			continue;
+
 		default_l2_lookup_params.maxaddrp[port] = max_fdb_entries;
+	}
 
 	table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
 
@@ -419,6 +427,9 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 	for (i = 0; i < ds->num_ports; i++) {
 		unsigned int upstream = dsa_upstream_port(priv->ds, i);
 
+		if (dsa_is_unused_port(ds, i))
+			continue;
+
 		for (j = 0; j < SJA1105_NUM_TC; j++)
 			l2fwd[i].vlan_pmap[j] = j;
 
@@ -440,12 +451,18 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 		l2fwd[upstream].bc_domain |= BIT(i);
 		l2fwd[upstream].fl_domain |= BIT(i);
 	}
+
 	/* Next 8 entries define VLAN PCP mapping from ingress to egress.
 	 * Create a one-to-one mapping.
 	 */
-	for (i = 0; i < SJA1105_NUM_TC; i++)
-		for (j = 0; j < ds->num_ports; j++)
+	for (i = 0; i < SJA1105_NUM_TC; i++) {
+		for (j = 0; j < ds->num_ports; j++) {
+			if (dsa_is_unused_port(ds, j))
+				continue;
+
 			l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
+		}
+	}
 
 	return 0;
 }

From 82760d7f2ea63829d6ab0f3de7ec98b93132c8ee Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:15 +0300
Subject: [PATCH 0490/3292] net: dsa: sja1105: dimension the data structures
 for a larger port count

Introduce a SJA1105_MAX_NUM_PORTS macro which at the moment is equal to
SJA1105_NUM_PORTS (5). With the introduction of SJA1110, these
structures will need to hold information for up to 11 ports.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h        | 33 ++++++++++++------------
 drivers/net/dsa/sja1105/sja1105_flower.c |  4 +--
 drivers/net/dsa/sja1105/sja1105_main.c   |  8 +++---
 drivers/net/dsa/sja1105/sja1105_tas.h    |  2 +-
 drivers/net/dsa/sja1105/sja1105_vl.c     |  2 +-
 5 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 10fc6b54f9f6..3737a3b38863 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -14,6 +14,7 @@
 #include "sja1105_static_config.h"
 
 #define SJA1105_NUM_PORTS		5
+#define SJA1105_MAX_NUM_PORTS		SJA1105_NUM_PORTS
 #define SJA1105_NUM_TC			8
 #define SJA1105ET_FDB_BIN_SIZE		4
 /* The hardware value is in multiples of 10 ms.
@@ -57,19 +58,19 @@ struct sja1105_regs {
 	u64 ptpclkcorp;
 	u64 ptpsyncts;
 	u64 ptpschtm;
-	u64 ptpegr_ts[SJA1105_NUM_PORTS];
-	u64 pad_mii_tx[SJA1105_NUM_PORTS];
-	u64 pad_mii_rx[SJA1105_NUM_PORTS];
-	u64 pad_mii_id[SJA1105_NUM_PORTS];
-	u64 cgu_idiv[SJA1105_NUM_PORTS];
-	u64 mii_tx_clk[SJA1105_NUM_PORTS];
-	u64 mii_rx_clk[SJA1105_NUM_PORTS];
-	u64 mii_ext_tx_clk[SJA1105_NUM_PORTS];
-	u64 mii_ext_rx_clk[SJA1105_NUM_PORTS];
-	u64 rgmii_tx_clk[SJA1105_NUM_PORTS];
-	u64 rmii_ref_clk[SJA1105_NUM_PORTS];
-	u64 rmii_ext_tx_clk[SJA1105_NUM_PORTS];
-	u64 stats[__MAX_SJA1105_STATS_AREA][SJA1105_NUM_PORTS];
+	u64 ptpegr_ts[SJA1105_MAX_NUM_PORTS];
+	u64 pad_mii_tx[SJA1105_MAX_NUM_PORTS];
+	u64 pad_mii_rx[SJA1105_MAX_NUM_PORTS];
+	u64 pad_mii_id[SJA1105_MAX_NUM_PORTS];
+	u64 cgu_idiv[SJA1105_MAX_NUM_PORTS];
+	u64 mii_tx_clk[SJA1105_MAX_NUM_PORTS];
+	u64 mii_rx_clk[SJA1105_MAX_NUM_PORTS];
+	u64 mii_ext_tx_clk[SJA1105_MAX_NUM_PORTS];
+	u64 mii_ext_rx_clk[SJA1105_MAX_NUM_PORTS];
+	u64 rgmii_tx_clk[SJA1105_MAX_NUM_PORTS];
+	u64 rmii_ref_clk[SJA1105_MAX_NUM_PORTS];
+	u64 rmii_ext_tx_clk[SJA1105_MAX_NUM_PORTS];
+	u64 stats[__MAX_SJA1105_STATS_AREA][SJA1105_MAX_NUM_PORTS];
 };
 
 struct sja1105_info {
@@ -206,8 +207,8 @@ enum sja1105_vlan_state {
 
 struct sja1105_private {
 	struct sja1105_static_config static_config;
-	bool rgmii_rx_delay[SJA1105_NUM_PORTS];
-	bool rgmii_tx_delay[SJA1105_NUM_PORTS];
+	bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS];
+	bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS];
 	bool best_effort_vlan_filtering;
 	unsigned long learn_ena;
 	unsigned long ucast_egress_floods;
@@ -220,7 +221,7 @@ struct sja1105_private {
 	struct list_head dsa_8021q_vlans;
 	struct list_head bridge_vlans;
 	struct sja1105_flow_block flow_block;
-	struct sja1105_port ports[SJA1105_NUM_PORTS];
+	struct sja1105_port ports[SJA1105_MAX_NUM_PORTS];
 	/* Serializes transmission of management frames so that
 	 * the switch doesn't confuse them with one another.
 	 */
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 77c54126b3fc..6c10ffa968ce 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -72,7 +72,7 @@ static int sja1105_setup_bcast_policer(struct sja1105_private *priv,
 	/* Make the broadcast policers of all ports attached to this block
 	 * point to the newly allocated policer
 	 */
-	for_each_set_bit(p, &rule->port_mask, SJA1105_NUM_PORTS) {
+	for_each_set_bit(p, &rule->port_mask, SJA1105_MAX_NUM_PORTS) {
 		int bcast = (ds->num_ports * SJA1105_NUM_TC) + p;
 
 		policing[bcast].sharindx = rule->bcast_pol.sharindx;
@@ -144,7 +144,7 @@ static int sja1105_setup_tc_policer(struct sja1105_private *priv,
 	/* Make the policers for traffic class @tc of all ports attached to
 	 * this block point to the newly allocated policer
 	 */
-	for_each_set_bit(p, &rule->port_mask, SJA1105_NUM_PORTS) {
+	for_each_set_bit(p, &rule->port_mask, SJA1105_MAX_NUM_PORTS) {
 		int index = (p * SJA1105_NUM_TC) + tc;
 
 		policing[index].sharindx = rule->tc_pol.sharindx;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 2f162765d9f3..76fc730b341d 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1861,8 +1861,8 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 {
 	struct ptp_system_timestamp ptp_sts_before;
 	struct ptp_system_timestamp ptp_sts_after;
+	int speed_mbps[SJA1105_MAX_NUM_PORTS];
 	struct sja1105_mac_config_entry *mac;
-	int speed_mbps[SJA1105_NUM_PORTS];
 	struct dsa_switch *ds = priv->ds;
 	s64 t1, t2, t3, t4;
 	s64 t12, t34;
@@ -2639,7 +2639,7 @@ out:
 
 static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
 {
-	u16 subvlan_map[SJA1105_NUM_PORTS][DSA_8021Q_N_SUBVLAN];
+	u16 subvlan_map[SJA1105_MAX_NUM_PORTS][DSA_8021Q_N_SUBVLAN];
 	struct sja1105_retagging_entry *new_retagging;
 	struct sja1105_vlan_lookup_entry *new_vlan;
 	struct sja1105_table *table;
@@ -2975,7 +2975,7 @@ static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = {
  */
 static int sja1105_setup(struct dsa_switch *ds)
 {
-	struct sja1105_dt_port ports[SJA1105_NUM_PORTS];
+	struct sja1105_dt_port ports[SJA1105_MAX_NUM_PORTS];
 	struct sja1105_private *priv = ds->priv;
 	int rc;
 
@@ -3668,7 +3668,7 @@ static int sja1105_probe(struct spi_device *spi)
 		return -ENOMEM;
 
 	ds->dev = dev;
-	ds->num_ports = SJA1105_NUM_PORTS;
+	ds->num_ports = SJA1105_MAX_NUM_PORTS;
 	ds->ops = &sja1105_switch_ops;
 	ds->priv = priv;
 	priv->ds = ds;
diff --git a/drivers/net/dsa/sja1105/sja1105_tas.h b/drivers/net/dsa/sja1105/sja1105_tas.h
index 0c173ff51751..c05bd07e8221 100644
--- a/drivers/net/dsa/sja1105/sja1105_tas.h
+++ b/drivers/net/dsa/sja1105/sja1105_tas.h
@@ -39,7 +39,7 @@ struct sja1105_gating_config {
 };
 
 struct sja1105_tas_data {
-	struct tc_taprio_qopt_offload *offload[SJA1105_NUM_PORTS];
+	struct tc_taprio_qopt_offload *offload[SJA1105_MAX_NUM_PORTS];
 	struct sja1105_gating_config gating_cfg;
 	enum sja1105_tas_state state;
 	enum sja1105_ptp_op last_op;
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index ffc4042b4502..f6e13e6c6a18 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -386,7 +386,7 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 		if (rule->type != SJA1105_RULE_VL)
 			continue;
 
-		for_each_set_bit(port, &rule->port_mask, SJA1105_NUM_PORTS) {
+		for_each_set_bit(port, &rule->port_mask, SJA1105_MAX_NUM_PORTS) {
 			vl_lookup[k].format = SJA1105_VL_FORMAT_PSFP;
 			vl_lookup[k].port = port;
 			vl_lookup[k].macaddr = rule->key.vl.dmac;

From df2a81a35ebb507d8d614d993d3b55425d73ffee Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:16 +0300
Subject: [PATCH 0491/3292] net: dsa: sja1105: don't assign the host port using
 dsa_upstream_port()

If @port is unused, then dsa_upstream_port(ds, port) returns @port,
which means we cannot assume the CPU port can be retrieved this way.

The sja1105 switches support a single CPU port, so just iterate over the
switch ports and stop at the first CPU port we see.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 76fc730b341d..409e059b87e3 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -556,7 +556,7 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		 * receieved on host_port itself would be dropped, except
 		 * by installing a temporary 'management route'
 		 */
-		.host_port = dsa_upstream_port(priv->ds, 0),
+		.host_port = priv->ds->num_ports,
 		/* Default to an invalid value */
 		.mirr_port = priv->ds->num_ports,
 		/* Link-local traffic received on casc_port will be forwarded
@@ -579,7 +579,16 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		.tpid = ETH_P_SJA1105,
 		.tpid2 = ETH_P_SJA1105,
 	};
+	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
+	int port;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_is_cpu_port(ds, port)) {
+			default_general_params.host_port = port;
+			break;
+		}
+	}
 
 	table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
 

From c50376783f23ffd2dd8833c2069e52ba08e82917 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:17 +0300
Subject: [PATCH 0492/3292] net: dsa: sja1105: skip CGU configuration if it's
 unnecessary

There are two distinct code paths which enter sja1105_clocking.c, one
through sja1105_clocking_setup() and the other through
sja1105_clocking_setup_port():

sja1105_static_config_reload      sja1105_setup
              |                         |
              |      +------------------+
              |      |
              v      v
   sja1105_clocking_setup               sja1105_adjust_port_config
                 |                                   |
                 v                                   |
      sja1105_clocking_setup_port <------------------+

As opposed to SJA1105, the SJA1110 does not need any configuration of
the Clock Generation Unit in order for xMII ports to work. Just RGMII
internal delays need to be configured, and that is done inside
sja1105_clocking_setup_port for the RGMII ports.

So this patch introduces the concept of a "reserved address", which the
CGU configuration functions from sja1105_clocking.c must check before
proceeding to do anything. The SJA1110 will have reserved addresses for
the CGU PLLs for MII/RMII/RGMII.

Additionally, make sja1105_clocking_setup() a function pointer so it can
be overridden by the SJA1110. Even though nothing port-related needs to
be done in the CGU, there are some operations such as disabling the
watchdog clock which are unique to the SJA1110.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h             |  1 +
 drivers/net/dsa/sja1105/sja1105_clocking.c    | 33 +++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_main.c        |  4 +--
 drivers/net/dsa/sja1105/sja1105_spi.c         |  6 ++++
 .../net/dsa/sja1105/sja1105_static_config.h   |  2 ++
 5 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 3737a3b38863..47cad24e6af0 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -109,6 +109,7 @@ struct sja1105_info {
 			   const unsigned char *addr, u16 vid);
 	void (*ptp_cmd_packing)(u8 *buf, struct sja1105_ptp_cmd *cmd,
 				enum packing_op op);
+	int (*clocking_setup)(struct sja1105_private *priv);
 	const char *name;
 };
 
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index f54b4d03a002..4697ac064abc 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -110,6 +110,9 @@ static int sja1105_cgu_idiv_config(struct sja1105_private *priv, int port,
 	struct sja1105_cgu_idiv idiv;
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
 
+	if (regs->cgu_idiv[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	if (enabled && factor != 1 && factor != 10) {
 		dev_err(dev, "idiv factor must be 1 or 10\n");
 		return -ERANGE;
@@ -159,6 +162,9 @@ static int sja1105_cgu_mii_tx_clk_config(struct sja1105_private *priv,
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
 	int clksrc;
 
+	if (regs->mii_tx_clk[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	if (role == XMII_MAC)
 		clksrc = mac_clk_sources[port];
 	else
@@ -188,6 +194,9 @@ sja1105_cgu_mii_rx_clk_config(struct sja1105_private *priv, int port)
 		CLKSRC_MII4_RX_CLK,
 	};
 
+	if (regs->mii_rx_clk[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* Payload for packed_buf */
 	mii_rx_clk.clksrc    = clk_sources[port];
 	mii_rx_clk.autoblock = 1;  /* Autoblock clk while changing clksrc */
@@ -212,6 +221,9 @@ sja1105_cgu_mii_ext_tx_clk_config(struct sja1105_private *priv, int port)
 		CLKSRC_IDIV4,
 	};
 
+	if (regs->mii_ext_tx_clk[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* Payload for packed_buf */
 	mii_ext_tx_clk.clksrc    = clk_sources[port];
 	mii_ext_tx_clk.autoblock = 1; /* Autoblock clk while changing clksrc */
@@ -236,6 +248,9 @@ sja1105_cgu_mii_ext_rx_clk_config(struct sja1105_private *priv, int port)
 		CLKSRC_IDIV4,
 	};
 
+	if (regs->mii_ext_rx_clk[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* Payload for packed_buf */
 	mii_ext_rx_clk.clksrc    = clk_sources[port];
 	mii_ext_rx_clk.autoblock = 1; /* Autoblock clk while changing clksrc */
@@ -320,6 +335,9 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv,
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
 	int clksrc;
 
+	if (regs->rgmii_tx_clk[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	if (speed == SJA1105_SPEED_1000MBPS) {
 		clksrc = CLKSRC_PLL0;
 	} else {
@@ -368,6 +386,9 @@ static int sja1105_rgmii_cfg_pad_tx_config(struct sja1105_private *priv,
 	struct sja1105_cfg_pad_mii pad_mii_tx = {0};
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
 
+	if (regs->pad_mii_tx[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* Payload */
 	pad_mii_tx.d32_os    = 3; /* TXD[3:2] output stage: */
 				  /*          high noise/high speed */
@@ -394,6 +415,9 @@ static int sja1105_cfg_pad_rx_config(struct sja1105_private *priv, int port)
 	struct sja1105_cfg_pad_mii pad_mii_rx = {0};
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
 
+	if (regs->pad_mii_rx[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* Payload */
 	pad_mii_rx.d32_ih    = 0; /* RXD[3:2] input stage hysteresis: */
 				  /*          non-Schmitt (default) */
@@ -572,6 +596,9 @@ static int sja1105_cgu_rmii_ref_clk_config(struct sja1105_private *priv,
 		CLKSRC_MII4_TX_CLK,
 	};
 
+	if (regs->rmii_ref_clk[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* Payload for packed_buf */
 	ref_clk.clksrc    = clk_sources[port];
 	ref_clk.autoblock = 1;      /* Autoblock clk while changing clksrc */
@@ -589,6 +616,9 @@ sja1105_cgu_rmii_ext_tx_clk_config(struct sja1105_private *priv, int port)
 	struct sja1105_cgu_mii_ctrl ext_tx_clk;
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
 
+	if (regs->rmii_ext_tx_clk[port] == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* Payload for packed_buf */
 	ext_tx_clk.clksrc    = CLKSRC_PLL1;
 	ext_tx_clk.autoblock = 1;   /* Autoblock clk while changing clksrc */
@@ -607,6 +637,9 @@ static int sja1105_cgu_rmii_pll_config(struct sja1105_private *priv)
 	struct device *dev = priv->ds->dev;
 	int rc;
 
+	if (regs->rmii_pll1 == SJA1105_RSV_ADDR)
+		return 0;
+
 	/* PLL1 must be enabled and output 50 Mhz.
 	 * This is done by writing first 0x0A010941 to
 	 * the PLL_1_C register and then deasserting
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 409e059b87e3..be48e45079f2 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1936,7 +1936,7 @@ out_unlock_ptp:
 	 * For these interfaces there is no dynamic configuration
 	 * needed, since PLLs have same settings at all speeds.
 	 */
-	rc = sja1105_clocking_setup(priv);
+	rc = priv->info->clocking_setup(priv);
 	if (rc < 0)
 		goto out;
 
@@ -3015,7 +3015,7 @@ static int sja1105_setup(struct dsa_switch *ds)
 		return rc;
 	}
 	/* Configure the CGU (PHY link modes and speeds) */
-	rc = sja1105_clocking_setup(priv);
+	rc = priv->info->clocking_setup(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to configure MII clocking: %d\n", rc);
 		return rc;
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index f22340e77fd5..c08aa6fbd85d 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -489,6 +489,7 @@ const struct sja1105_info sja1105e_info = {
 	.fdb_add_cmd		= sja1105et_fdb_add,
 	.fdb_del_cmd		= sja1105et_fdb_del,
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
+	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
 	.name			= "SJA1105E",
 };
@@ -507,6 +508,7 @@ const struct sja1105_info sja1105t_info = {
 	.fdb_add_cmd		= sja1105et_fdb_add,
 	.fdb_del_cmd		= sja1105et_fdb_del,
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
+	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
 	.name			= "SJA1105T",
 };
@@ -526,6 +528,7 @@ const struct sja1105_info sja1105p_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
 	.name			= "SJA1105P",
 };
@@ -545,6 +548,7 @@ const struct sja1105_info sja1105q_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
 	.name			= "SJA1105Q",
 };
@@ -564,6 +568,7 @@ const struct sja1105_info sja1105r_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
 	.name			= "SJA1105R",
 };
@@ -584,5 +589,6 @@ const struct sja1105_info sja1105s_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1105_clocking_setup,
 	.name			= "SJA1105S",
 };
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 779eb6840f05..9bc783a2bbea 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -129,6 +129,8 @@ enum sja1105_blk_idx {
 #define SJA1105R_PART_NO				0x9A86
 #define SJA1105S_PART_NO				0x9A87
 
+#define SJA1105_RSV_ADDR		0xffffffffffffffffull
+
 struct sja1105_schedule_entry {
 	u64 winstindex;
 	u64 winend;

From fd6f2c257b0bc0c656e88dcc2c6fc7ce180fb2de Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:18 +0300
Subject: [PATCH 0493/3292] net: dsa: sja1105: dynamically choose the number of
 static config table entries

Due to the fact that the port count is different, some static config
tables have a different number of elements in SJA1105 compared to
SJA1110. Such an example is the L2 Policing table, which has 45 entries
in SJA1105 (one per port x traffic class, and one broadcast policer per
port) and 110 entries in SJA1110 (one per port x traffic class, one
broadcast and one multicast policer per port).

Similarly, the MAC Configuration Table, the L2 Forwarding table, all
have a different number of elements simply because the port count is
different, and although this can be accounted for by looking at
ds->ports, the policing table can't because of the presence of the extra
multicast policers.

The common denominator for the static config initializers for these
tables is that they must set up all the entries within that table.
So the simplest way to account for these differences in a uniform manner
is to look at struct sja1105_table_ops::max_entry_count. For the sake of
uniformity, this patch makes that change also for tables whose number of
elements did not change in SJA1110, like the xMII Mode Parameters, the
L2 Lookup Parameters, General Parameters, AVB Parameters (all of these
are singleton tables with a single entry).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 32 +++++++++++++-------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index be48e45079f2..cd9359722e5f 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -118,12 +118,12 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(ds->num_ports,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = ds->num_ports;
+	table->entry_count = table->ops->max_entry_count;
 
 	mac = table->entries;
 
@@ -174,13 +174,13 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_MAX_XMII_PARAMS_COUNT,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
 	/* Override table based on PHYLINK DT bindings */
-	table->entry_count = SJA1105_MAX_XMII_PARAMS_COUNT;
+	table->entry_count = table->ops->max_entry_count;
 
 	mii = table->entries;
 
@@ -322,12 +322,12 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT;
+	table->entry_count = table->ops->max_entry_count;
 
 	/* This table only has a single entry */
 	((struct sja1105_l2_lookup_params_entry *)table->entries)[0] =
@@ -414,12 +414,12 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_MAX_L2_FORWARDING_COUNT,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = SJA1105_MAX_L2_FORWARDING_COUNT;
+	table->entry_count = table->ops->max_entry_count;
 
 	l2fwd = table->entries;
 
@@ -484,12 +484,12 @@ static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_MAX_L2_FORWARDING_PARAMS_COUNT,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = SJA1105_MAX_L2_FORWARDING_PARAMS_COUNT;
+	table->entry_count = table->ops->max_entry_count;
 
 	/* This table only has a single entry */
 	((struct sja1105_l2_forwarding_params_entry *)table->entries)[0] =
@@ -597,12 +597,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_MAX_GENERAL_PARAMS_COUNT,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT;
+	table->entry_count = table->ops->max_entry_count;
 
 	/* This table only has a single entry */
 	((struct sja1105_general_params_entry *)table->entries)[0] =
@@ -624,12 +624,12 @@ static int sja1105_init_avb_params(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_MAX_AVB_PARAMS_COUNT,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = SJA1105_MAX_AVB_PARAMS_COUNT;
+	table->entry_count = table->ops->max_entry_count;
 
 	avb = table->entries;
 
@@ -708,12 +708,12 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
 		table->entry_count = 0;
 	}
 
-	table->entries = kcalloc(SJA1105_MAX_L2_POLICING_COUNT,
+	table->entries = kcalloc(table->ops->max_entry_count,
 				 table->ops->unpacked_entry_size, GFP_KERNEL);
 	if (!table->entries)
 		return -ENOMEM;
 
-	table->entry_count = SJA1105_MAX_L2_POLICING_COUNT;
+	table->entry_count = table->ops->max_entry_count;
 
 	policing = table->entries;
 

From f78a2517cf73fb0de68012ba2f42c9ad65d2aa14 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:19 +0300
Subject: [PATCH 0494/3292] net: dsa: sja1105: use sja1105_xfer_u32 for the
 reset procedure

Using sja1105_xfer_buf results in a higher overhead and is harder to
read.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_spi.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index c08aa6fbd85d..79ba86096a4b 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -7,8 +7,6 @@
 #include <linux/packing.h>
 #include "sja1105.h"
 
-#define SJA1105_SIZE_RESET_CMD		4
-
 struct sja1105_chunk {
 	u8	*buf;
 	size_t	len;
@@ -179,28 +177,20 @@ static int sja1105et_reset_cmd(struct dsa_switch *ds)
 {
 	struct sja1105_private *priv = ds->priv;
 	const struct sja1105_regs *regs = priv->info->regs;
-	u8 packed_buf[SJA1105_SIZE_RESET_CMD] = {0};
-	const int size = SJA1105_SIZE_RESET_CMD;
-	u64 cold_rst = 1;
+	u32 cold_reset = BIT(3);
 
-	sja1105_pack(packed_buf, &cold_rst, 3, 3, size);
-
-	return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf,
-				SJA1105_SIZE_RESET_CMD);
+	/* Cold reset */
+	return sja1105_xfer_u32(priv, SPI_WRITE, regs->rgu, &cold_reset, NULL);
 }
 
 static int sja1105pqrs_reset_cmd(struct dsa_switch *ds)
 {
 	struct sja1105_private *priv = ds->priv;
 	const struct sja1105_regs *regs = priv->info->regs;
-	u8 packed_buf[SJA1105_SIZE_RESET_CMD] = {0};
-	const int size = SJA1105_SIZE_RESET_CMD;
-	u64 cold_rst = 1;
+	u32 cold_reset = BIT(2);
 
-	sja1105_pack(packed_buf, &cold_rst, 2, 2, size);
-
-	return sja1105_xfer_buf(priv, SPI_WRITE, regs->rgu, packed_buf,
-				SJA1105_SIZE_RESET_CMD);
+	/* Cold reset */
+	return sja1105_xfer_u32(priv, SPI_WRITE, regs->rgu, &cold_reset, NULL);
 }
 
 int sja1105_inhibit_tx(const struct sja1105_private *priv,

From 38fbe91f2287c696f290d9115901aa435f7166a8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:20 +0300
Subject: [PATCH 0495/3292] net: dsa: sja1105: configure the multicast
 policers, if present

The SJA1110 policer array is similar in layout with SJA1105, except it
contains one multicast policer per port at the end.

Detect the presence of multicast policers based on the maximum number of
supported L2 Policing Table entries, and make those policers have a
shared index equal to the port's default policer. Letting the user
configure these policers is not supported at the moment.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index cd9359722e5f..c391ab00e003 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -719,12 +719,16 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
 
 	/* Setup shared indices for the matchall policers */
 	for (port = 0; port < ds->num_ports; port++) {
+		int mcast = (ds->num_ports * (SJA1105_NUM_TC + 1)) + port;
 		int bcast = (ds->num_ports * SJA1105_NUM_TC) + port;
 
 		for (tc = 0; tc < SJA1105_NUM_TC; tc++)
 			policing[port * SJA1105_NUM_TC + tc].sharindx = port;
 
 		policing[bcast].sharindx = port;
+		/* Only SJA1110 has multicast policers */
+		if (mcast <= table->ops->max_entry_count)
+			policing[mcast].sharindx = port;
 	}
 
 	/* Setup the matchall policer parameters */

From 1bf658eefe38cc26801b5861bbb6dbf3259ba8c1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 16:14:21 +0300
Subject: [PATCH 0496/3292] net: dsa: sja1105: allow the frame buffer size to
 be customized

The shared frame buffer of the SJA1110 is larger than that of SJA1105,
which is natural due to the fact that there are more ports.

Introduce yet another property in struct sja1105_info which encodes the
maximum number of 128 byte blocks that can be used for frame buffers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h             |  1 +
 drivers/net/dsa/sja1105/sja1105_main.c        | 21 ++++++++-----------
 drivers/net/dsa/sja1105/sja1105_spi.c         |  9 +++++++-
 .../net/dsa/sja1105/sja1105_static_config.c   | 13 ++++++------
 .../net/dsa/sja1105/sja1105_static_config.h   |  5 +++--
 5 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 47cad24e6af0..2ec03917feb3 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -90,6 +90,7 @@ struct sja1105_info {
 	 */
 	int ptpegr_ts_bytes;
 	int num_cbs_shapers;
+	int max_frame_mem;
 	const struct sja1105_dynamic_table_ops *dyn_ops;
 	const struct sja1105_table_ops *static_ops;
 	const struct sja1105_regs *regs;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index c391ab00e003..a105e174b3af 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -469,12 +469,7 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 
 static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv)
 {
-	struct sja1105_l2_forwarding_params_entry default_l2fwd_params = {
-		/* Disallow dynamic reconfiguration of vlan_pmap */
-		.max_dynp = 0,
-		/* Use a single memory partition for all ingress queues */
-		.part_spc = { SJA1105_MAX_FRAME_MEMORY, 0, 0, 0, 0, 0, 0, 0 },
-	};
+	struct sja1105_l2_forwarding_params_entry *l2fwd_params;
 	struct sja1105_table *table;
 
 	table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
@@ -492,8 +487,12 @@ static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv)
 	table->entry_count = table->ops->max_entry_count;
 
 	/* This table only has a single entry */
-	((struct sja1105_l2_forwarding_params_entry *)table->entries)[0] =
-				default_l2fwd_params;
+	l2fwd_params = table->entries;
+
+	/* Disallow dynamic reconfiguration of vlan_pmap */
+	l2fwd_params->max_dynp = 0;
+	/* Use a single memory partition for all ingress queues */
+	l2fwd_params->part_spc[0] = priv->info->max_frame_mem;
 
 	return 0;
 }
@@ -502,16 +501,14 @@ void sja1105_frame_memory_partitioning(struct sja1105_private *priv)
 {
 	struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
 	struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
+	int max_mem = priv->info->max_frame_mem;
 	struct sja1105_table *table;
-	int max_mem;
 
 	/* VLAN retagging is implemented using a loopback port that consumes
 	 * frame buffers. That leaves less for us.
 	 */
 	if (priv->vlan_state == SJA1105_VLAN_BEST_EFFORT)
-		max_mem = SJA1105_MAX_FRAME_MEMORY_RETAGGING;
-	else
-		max_mem = SJA1105_MAX_FRAME_MEMORY;
+		max_mem -= SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD;
 
 	table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
 	l2_fwd_params = table->entries;
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 79ba86096a4b..d0bc6cf90bfd 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -271,7 +271,8 @@ int static_config_buf_prepare_for_upload(struct sja1105_private *priv,
 	char *final_header_ptr;
 	int crc_len;
 
-	valid = sja1105_static_config_check_valid(config);
+	valid = sja1105_static_config_check_valid(config,
+						  priv->info->max_frame_mem);
 	if (valid != SJA1105_CONFIG_OK) {
 		dev_err(&priv->spidev->dev,
 			sja1105_static_config_error_msg[valid]);
@@ -474,6 +475,7 @@ const struct sja1105_info sja1105e_info = {
 	.can_limit_mcast_flood	= false,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
+	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
 	.num_cbs_shapers	= SJA1105ET_MAX_CBS_COUNT,
 	.reset_cmd		= sja1105et_reset_cmd,
 	.fdb_add_cmd		= sja1105et_fdb_add,
@@ -493,6 +495,7 @@ const struct sja1105_info sja1105t_info = {
 	.can_limit_mcast_flood	= false,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
+	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
 	.num_cbs_shapers	= SJA1105ET_MAX_CBS_COUNT,
 	.reset_cmd		= sja1105et_reset_cmd,
 	.fdb_add_cmd		= sja1105et_fdb_add,
@@ -512,6 +515,7 @@ const struct sja1105_info sja1105p_info = {
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
@@ -532,6 +536,7 @@ const struct sja1105_info sja1105q_info = {
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
@@ -552,6 +557,7 @@ const struct sja1105_info sja1105r_info = {
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
@@ -573,6 +579,7 @@ const struct sja1105_info sja1105s_info = {
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index a8efb7fac395..33f91ecbe07b 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -657,11 +657,11 @@ const char *sja1105_static_config_error_msg[] = {
 };
 
 static sja1105_config_valid_t
-static_config_check_memory_size(const struct sja1105_table *tables)
+static_config_check_memory_size(const struct sja1105_table *tables, int max_mem)
 {
 	const struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
 	const struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
-	int i, max_mem, mem = 0;
+	int i, mem = 0;
 
 	l2_fwd_params = tables[BLK_IDX_L2_FORWARDING_PARAMS].entries;
 
@@ -675,9 +675,7 @@ static_config_check_memory_size(const struct sja1105_table *tables)
 	}
 
 	if (tables[BLK_IDX_RETAGGING].entry_count)
-		max_mem = SJA1105_MAX_FRAME_MEMORY_RETAGGING;
-	else
-		max_mem = SJA1105_MAX_FRAME_MEMORY;
+		max_mem -= SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD;
 
 	if (mem > max_mem)
 		return SJA1105_OVERCOMMITTED_FRAME_MEMORY;
@@ -686,7 +684,8 @@ static_config_check_memory_size(const struct sja1105_table *tables)
 }
 
 sja1105_config_valid_t
-sja1105_static_config_check_valid(const struct sja1105_static_config *config)
+sja1105_static_config_check_valid(const struct sja1105_static_config *config,
+				  int max_mem)
 {
 	const struct sja1105_table *tables = config->tables;
 #define IS_FULL(blk_idx) \
@@ -754,7 +753,7 @@ sja1105_static_config_check_valid(const struct sja1105_static_config *config)
 	if (!IS_FULL(BLK_IDX_XMII_PARAMS))
 		return SJA1105_MISSING_XMII_TABLE;
 
-	return static_config_check_memory_size(tables);
+	return static_config_check_memory_size(tables, max_mem);
 #undef IS_FULL
 }
 
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 9bc783a2bbea..4ddb06bd8e92 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -115,7 +115,7 @@ enum sja1105_blk_idx {
 #define SJA1105PQRS_MAX_CBS_COUNT			16
 
 #define SJA1105_MAX_FRAME_MEMORY			929
-#define SJA1105_MAX_FRAME_MEMORY_RETAGGING		910
+#define SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD		19
 #define SJA1105_VL_FRAME_MEMORY				100
 
 #define SJA1105E_DEVICE_ID				0x9C00000Cull
@@ -416,7 +416,8 @@ typedef enum {
 extern const char *sja1105_static_config_error_msg[];
 
 sja1105_config_valid_t
-sja1105_static_config_check_valid(const struct sja1105_static_config *config);
+sja1105_static_config_check_valid(const struct sja1105_static_config *config,
+				  int max_mem);
 void
 sja1105_static_config_pack(void *buf, struct sja1105_static_config *config);
 int sja1105_static_config_init(struct sja1105_static_config *config,

From 8890d0a1891aea989e23e357eac4c8a206152d58 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:08 +0800
Subject: [PATCH 0497/3292] net: wan: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 38 --------------------------------------
 1 file changed, 38 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index f393684f203a..676dd813d36d 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -50,7 +50,6 @@ static const char* version = "wanXL serial card driver version: 0.48";
 /* MAILBOX #2 - DRAM SIZE */
 #define MBX2_MEMSZ_MASK 0xFFFF0000 /* PUTS Memory Size Register mask */
 
-
 struct port {
 	struct net_device *dev;
 	struct card *card;
@@ -61,13 +60,11 @@ struct port {
 	struct sk_buff *tx_skbs[TX_BUFFERS];
 };
 
-
 struct card_status {
 	desc_t rx_descs[RX_QUEUE_LENGTH];
 	port_status_t port_status[4];
 };
 
-
 struct card {
 	int n_ports;		/* 1, 2 or 4 ports */
 	u8 irq;
@@ -81,20 +78,16 @@ struct card {
 	struct port ports[];	/* 1 - 4 port structures follow */
 };
 
-
-
 static inline struct port *dev_to_port(struct net_device *dev)
 {
 	return (struct port *)dev_to_hdlc(dev)->priv;
 }
 
-
 static inline port_status_t *get_status(struct port *port)
 {
 	return &port->card->status->port_status[port->node];
 }
 
-
 #ifdef DEBUG_PCI
 static inline dma_addr_t pci_map_single_debug(struct pci_dev *pdev, void *ptr,
 					      size_t size, int direction)
@@ -110,7 +103,6 @@ static inline dma_addr_t pci_map_single_debug(struct pci_dev *pdev, void *ptr,
 #define pci_map_single pci_map_single_debug
 #endif
 
-
 /* Cable and/or personality module change interrupt service */
 static inline void wanxl_cable_intr(struct port *port)
 {
@@ -154,8 +146,6 @@ static inline void wanxl_cable_intr(struct port *port)
 		netif_carrier_off(port->dev);
 }
 
-
-
 /* Transmit complete interrupt service */
 static inline void wanxl_tx_intr(struct port *port)
 {
@@ -187,8 +177,6 @@ static inline void wanxl_tx_intr(struct port *port)
         }
 }
 
-
-
 /* Receive complete interrupt service */
 static inline void wanxl_rx_intr(struct card *card)
 {
@@ -239,8 +227,6 @@ static inline void wanxl_rx_intr(struct card *card)
 	}
 }
 
-
-
 static irqreturn_t wanxl_intr(int irq, void* dev_id)
 {
 	struct card *card = dev_id;
@@ -248,7 +234,6 @@ static irqreturn_t wanxl_intr(int irq, void* dev_id)
         u32 stat;
         int handled = 0;
 
-
         while((stat = readl(card->plx + PLX_DOORBELL_FROM_CARD)) != 0) {
                 handled = 1;
 		writel(stat, card->plx + PLX_DOORBELL_FROM_CARD);
@@ -266,8 +251,6 @@ static irqreturn_t wanxl_intr(int irq, void* dev_id)
         return IRQ_RETVAL(handled);
 }
 
-
-
 static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct port *port = dev_to_port(dev);
@@ -312,8 +295,6 @@ static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-
-
 static int wanxl_attach(struct net_device *dev, unsigned short encoding,
 			unsigned short parity)
 {
@@ -335,8 +316,6 @@ static int wanxl_attach(struct net_device *dev, unsigned short encoding,
 	return 0;
 }
 
-
-
 static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	const size_t size = sizeof(sync_serial_settings);
@@ -387,8 +366,6 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
         }
 }
 
-
-
 static int wanxl_open(struct net_device *dev)
 {
 	struct port *port = dev_to_port(dev);
@@ -423,8 +400,6 @@ static int wanxl_open(struct net_device *dev)
 	return -EFAULT;
 }
 
-
-
 static int wanxl_close(struct net_device *dev)
 {
 	struct port *port = dev_to_port(dev);
@@ -461,8 +436,6 @@ static int wanxl_close(struct net_device *dev)
 	return 0;
 }
 
-
-
 static struct net_device_stats *wanxl_get_stats(struct net_device *dev)
 {
 	struct port *port = dev_to_port(dev);
@@ -474,8 +447,6 @@ static struct net_device_stats *wanxl_get_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
-
-
 static int wanxl_puts_command(struct card *card, u32 cmd)
 {
 	unsigned long timeout = jiffies + 5 * HZ;
@@ -491,8 +462,6 @@ static int wanxl_puts_command(struct card *card, u32 cmd)
 	return -1;
 }
 
-
-
 static void wanxl_reset(struct card *card)
 {
 	u32 old_value = readl(card->plx + PLX_CONTROL) & ~PLX_CTL_RESET;
@@ -505,8 +474,6 @@ static void wanxl_reset(struct card *card)
 	readl(card->plx + PLX_CONTROL); /* wait for posted write */
 }
 
-
-
 static void wanxl_pci_remove_one(struct pci_dev *pdev)
 {
 	struct card *card = pci_get_drvdata(pdev);
@@ -543,7 +510,6 @@ static void wanxl_pci_remove_one(struct pci_dev *pdev)
 	kfree(card);
 }
 
-
 #include "wanxlfw.inc"
 
 static const struct net_device_ops wanxl_ops = {
@@ -677,7 +643,6 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	/* set up on-board RAM mapping */
 	mem_phy = pci_resource_start(pdev, 2);
 
-
 	/* sanity check the board's reported memory size */
 	if (ramsize < BUFFERS_ADDR +
 	    (TX_BUFFERS + RX_BUFFERS) * BUFFER_LENGTH * ports) {
@@ -813,7 +778,6 @@ static const struct pci_device_id wanxl_pci_tbl[] = {
 	{ 0, }
 };
 
-
 static struct pci_driver wanxl_pci_driver = {
 	.name		= "wanXL",
 	.id_table	= wanxl_pci_tbl,
@@ -821,7 +785,6 @@ static struct pci_driver wanxl_pci_driver = {
 	.remove		= wanxl_pci_remove_one,
 };
 
-
 static int __init wanxl_init_module(void)
 {
 #ifdef MODULE
@@ -835,7 +798,6 @@ static void __exit wanxl_cleanup_module(void)
 	pci_unregister_driver(&wanxl_pci_driver);
 }
 
-
 MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
 MODULE_DESCRIPTION("SBE Inc. wanXL serial port driver");
 MODULE_LICENSE("GPL v2");

From b32db030b96e380a86b0d8827a902bdf41f7035a Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:09 +0800
Subject: [PATCH 0498/3292] net: wan: fix an code style issue about "foo* bar"

Fix the checkpatch error as "foo* bar" and should be "foo *bar",
and "(foo*)" should be "(foo *)".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 676dd813d36d..afca54cf3e82 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -32,7 +32,7 @@
 
 #include "wanxl.h"
 
-static const char* version = "wanXL serial card driver version: 0.48";
+static const char *version = "wanXL serial card driver version: 0.48";
 
 #define PLX_CTL_RESET   0x40000000 /* adapter reset */
 
@@ -227,7 +227,7 @@ static inline void wanxl_rx_intr(struct card *card)
 	}
 }
 
-static irqreturn_t wanxl_intr(int irq, void* dev_id)
+static irqreturn_t wanxl_intr(int irq, void *dev_id)
 {
 	struct card *card = dev_id;
         int i;
@@ -677,7 +677,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	}
 
 	for (i = 0; i < sizeof(firmware); i += 4)
-		writel(ntohl(*(__be32*)(firmware + i)), mem + PDM_OFFSET + i);
+		writel(ntohl(*(__be32 *)(firmware + i)), mem + PDM_OFFSET + i);
 
 	for (i = 0; i < ports; i++)
 		writel(card->status_address +

From f0328a1922906be3540611e344914b9682fff350 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:10 +0800
Subject: [PATCH 0499/3292] net: wan: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index afca54cf3e82..566c519c6f65 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -93,6 +93,7 @@ static inline dma_addr_t pci_map_single_debug(struct pci_dev *pdev, void *ptr,
 					      size_t size, int direction)
 {
 	dma_addr_t addr = dma_map_single(&pdev->dev, ptr, size, direction);
+
 	if (addr + size > 0x100000000LL)
 		pr_crit("%s: pci_map_single() returned memory at 0x%llx!\n",
 			pci_name(pdev), (unsigned long long)addr);
@@ -150,6 +151,7 @@ static inline void wanxl_cable_intr(struct port *port)
 static inline void wanxl_tx_intr(struct port *port)
 {
 	struct net_device *dev = port->dev;
+
 	while (1) {
                 desc_t *desc = &get_status(port)->tx_descs[port->tx_in];
 		struct sk_buff *skb = port->tx_skbs[port->tx_in];
@@ -181,6 +183,7 @@ static inline void wanxl_tx_intr(struct port *port)
 static inline void wanxl_rx_intr(struct card *card)
 {
 	desc_t *desc;
+
 	while (desc = &card->status->rx_descs[card->rx_in],
 	       desc->stat != PACKET_EMPTY) {
 		if ((desc->stat & PACKET_PORT_MASK) > card->n_ports)
@@ -662,6 +665,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 
 	for (i = 0; i < RX_QUEUE_LENGTH; i++) {
 		struct sk_buff *skb = dev_alloc_skb(BUFFER_LENGTH);
+
 		card->rx_skbs[i] = skb;
 		if (skb)
 			card->status->rx_descs[i].address =
@@ -729,6 +733,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 		hdlc_device *hdlc;
 		struct port *port = &card->ports[i];
 		struct net_device *dev = alloc_hdlcdev(port);
+
 		if (!dev) {
 			pr_err("%s: unable to allocate memory\n",
 			       pci_name(pdev));

From 261795f4113bba9b26e76b27c0522c403d90bfe7 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:11 +0800
Subject: [PATCH 0500/3292] net: wan: code indent use tabs where possible

Code indent should use tabs where possible.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 566c519c6f65..0bcb21ddcc62 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -54,7 +54,7 @@ struct port {
 	struct net_device *dev;
 	struct card *card;
 	spinlock_t lock;	/* for wanxl_xmit */
-        int node;		/* physical port #0 - 3 */
+	int node;		/* physical port #0 - 3 */
 	unsigned int clock_type;
 	int tx_in, tx_out;
 	struct sk_buff *tx_skbs[TX_BUFFERS];
@@ -153,7 +153,7 @@ static inline void wanxl_tx_intr(struct port *port)
 	struct net_device *dev = port->dev;
 
 	while (1) {
-                desc_t *desc = &get_status(port)->tx_descs[port->tx_in];
+		desc_t *desc = &get_status(port)->tx_descs[port->tx_in];
 		struct sk_buff *skb = port->tx_skbs[port->tx_in];
 
 		switch (desc->stat) {
@@ -171,12 +171,12 @@ static inline void wanxl_tx_intr(struct port *port)
 			dev->stats.tx_packets++;
 			dev->stats.tx_bytes += skb->len;
 		}
-                desc->stat = PACKET_EMPTY; /* Free descriptor */
+		desc->stat = PACKET_EMPTY; /* Free descriptor */
 		dma_unmap_single(&port->card->pdev->dev, desc->address,
 				 skb->len, DMA_TO_DEVICE);
 		dev_consume_skb_irq(skb);
-                port->tx_in = (port->tx_in + 1) % TX_BUFFERS;
-        }
+		port->tx_in = (port->tx_in + 1) % TX_BUFFERS;
+	}
 }
 
 /* Receive complete interrupt service */
@@ -233,15 +233,15 @@ static inline void wanxl_rx_intr(struct card *card)
 static irqreturn_t wanxl_intr(int irq, void *dev_id)
 {
 	struct card *card = dev_id;
-        int i;
-        u32 stat;
-        int handled = 0;
+	int i;
+	u32 stat;
+	int handled = 0;
 
-        while((stat = readl(card->plx + PLX_DOORBELL_FROM_CARD)) != 0) {
-                handled = 1;
+	while ((stat = readl(card->plx + PLX_DOORBELL_FROM_CARD)) != 0) {
+		handled = 1;
 		writel(stat, card->plx + PLX_DOORBELL_FROM_CARD);
 
-                for (i = 0; i < card->n_ports; i++) {
+		for (i = 0; i < card->n_ports; i++) {
 			if (stat & (1 << (DOORBELL_FROM_CARD_TX_0 + i)))
 				wanxl_tx_intr(&card->ports[i]);
 			if (stat & (1 << (DOORBELL_FROM_CARD_CABLE_0 + i)))
@@ -249,9 +249,9 @@ static irqreturn_t wanxl_intr(int irq, void *dev_id)
 		}
 		if (stat & (1 << DOORBELL_FROM_CARD_RX))
 			wanxl_rx_intr(card);
-        }
+	}
 
-        return IRQ_RETVAL(handled);
+	return IRQ_RETVAL(handled);
 }
 
 static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -259,11 +259,11 @@ static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct port *port = dev_to_port(dev);
 	desc_t *desc;
 
-        spin_lock(&port->lock);
+	spin_lock(&port->lock);
 
 	desc = &get_status(port)->tx_descs[port->tx_out];
-        if (desc->stat != PACKET_EMPTY) {
-                /* should never happen - previous xmit should stop queue */
+	if (desc->stat != PACKET_EMPTY) {
+		/* should never happen - previous xmit should stop queue */
 #ifdef DEBUG_PKT
                 printk(KERN_DEBUG "%s: transmitter buffer full\n", dev->name);
 #endif
@@ -366,7 +366,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 	default:
 		return hdlc_ioctl(dev, ifr, cmd);
-        }
+	}
 }
 
 static int wanxl_open(struct net_device *dev)

From e5877104b5ec315167e0234110725f6a7c8351ab Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:12 +0800
Subject: [PATCH 0501/3292] net: wan: fix the code style issue about trailing
 statements

Trailing statements should be on next line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 59 ++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 0bcb21ddcc62..7e5bf207a142 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -112,21 +112,45 @@ static inline void wanxl_cable_intr(struct port *port)
 	const char *cable, *pm, *dte = "", *dsr = "", *dcd = "";
 
 	switch(value & 0x7) {
-	case STATUS_CABLE_V35: cable = "V.35"; break;
-	case STATUS_CABLE_X21: cable = "X.21"; break;
-	case STATUS_CABLE_V24: cable = "V.24"; break;
-	case STATUS_CABLE_EIA530: cable = "EIA530"; break;
-	case STATUS_CABLE_NONE: cable = "no"; break;
-	default: cable = "invalid";
+	case STATUS_CABLE_V35:
+		cable = "V.35";
+		break;
+	case STATUS_CABLE_X21:
+		cable = "X.21";
+		break;
+	case STATUS_CABLE_V24:
+		cable = "V.24";
+		break;
+	case STATUS_CABLE_EIA530:
+		cable = "EIA530";
+		break;
+	case STATUS_CABLE_NONE:
+		cable = "no";
+		break;
+	default:
+		cable = "invalid";
 	}
 
 	switch((value >> STATUS_CABLE_PM_SHIFT) & 0x7) {
-	case STATUS_CABLE_V35: pm = "V.35"; break;
-	case STATUS_CABLE_X21: pm = "X.21"; break;
-	case STATUS_CABLE_V24: pm = "V.24"; break;
-	case STATUS_CABLE_EIA530: pm = "EIA530"; break;
-	case STATUS_CABLE_NONE: pm = "no personality"; valid = 0; break;
-	default: pm = "invalid personality"; valid = 0;
+	case STATUS_CABLE_V35:
+		pm = "V.35";
+		break;
+	case STATUS_CABLE_X21:
+		pm = "X.21";
+		break;
+	case STATUS_CABLE_V24:
+		pm = "V.24";
+		break;
+	case STATUS_CABLE_EIA530:
+		pm = "EIA530";
+		break;
+	case STATUS_CABLE_NONE:
+		pm = "no personality";
+		valid = 0;
+		break;
+	default:
+		pm = "invalid personality";
+		valid = 0;
 	}
 
 	if (valid) {
@@ -563,9 +587,14 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	}
 
 	switch (pdev->device) {
-	case PCI_DEVICE_ID_SBE_WANXL100: ports = 1; break;
-	case PCI_DEVICE_ID_SBE_WANXL200: ports = 2; break;
-	default: ports = 4;
+	case PCI_DEVICE_ID_SBE_WANXL100:
+		ports = 1;
+		break;
+	case PCI_DEVICE_ID_SBE_WANXL200:
+		ports = 2;
+		break;
+	default:
+		ports = 4;
 	}
 
 	card = kzalloc(struct_size(card, ports, ports), GFP_KERNEL);

From c3b6b5c64f394ce381ae7ce12060dd61768d9dd7 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:13 +0800
Subject: [PATCH 0502/3292] net: wan: add some required spaces

Add space required after that close brace '}'.
Add space required before the open parenthesis '('.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 7e5bf207a142..7965c648f3eb 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -111,7 +111,7 @@ static inline void wanxl_cable_intr(struct port *port)
 	int valid = 1;
 	const char *cable, *pm, *dte = "", *dsr = "", *dcd = "";
 
-	switch(value & 0x7) {
+	switch (value & 0x7) {
 	case STATUS_CABLE_V35:
 		cable = "V.35";
 		break;
@@ -131,7 +131,7 @@ static inline void wanxl_cable_intr(struct port *port)
 		cable = "invalid";
 	}
 
-	switch((value >> STATUS_CABLE_PM_SHIFT) & 0x7) {
+	switch ((value >> STATUS_CABLE_PM_SHIFT) & 0x7) {
 	case STATUS_CABLE_V35:
 		pm = "V.35";
 		break;
@@ -484,7 +484,7 @@ static int wanxl_puts_command(struct card *card, u32 cmd)
 			return 0;
 
 		schedule();
-	}while (time_after(timeout, jiffies));
+	} while (time_after(timeout, jiffies));
 
 	return -1;
 }
@@ -654,7 +654,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 			return -ENODEV;
 		}
 
-		switch(stat & 0xC0) {
+		switch (stat & 0xC0) {
 		case 0x00:	/* hmm - PUTS completed with non-zero code? */
 		case 0x80:	/* PUTS still testing the hardware */
 			break;
@@ -733,7 +733,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 		if ((stat = readl(card->plx + PLX_MAILBOX_5)) != 0)
 			break;
 		schedule();
-	}while (time_after(timeout, jiffies));
+	} while (time_after(timeout, jiffies));
 
 	if (!stat) {
 		pr_warn("%s: timeout while initializing card firmware\n",

From 87feef1cfbbe9233c53d5c4ff03277b70b58c458 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:14 +0800
Subject: [PATCH 0503/3292] net: wan: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 7965c648f3eb..a5f0aae30e0c 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -404,7 +404,9 @@ static int wanxl_open(struct net_device *dev)
 		netdev_err(dev, "port already open\n");
 		return -EIO;
 	}
-	if ((i = hdlc_open(dev)) != 0)
+
+	i = hdlc_open(dev);
+	if (i)
 		return i;
 
 	port->tx_in = port->tx_out = 0;
@@ -730,7 +732,8 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 
 	timeout = jiffies + 5 * HZ;
 	do {
-		if ((stat = readl(card->plx + PLX_MAILBOX_5)) != 0)
+		stat = readl(card->plx + PLX_MAILBOX_5);
+		if (stat)
 			break;
 		schedule();
 	} while (time_after(timeout, jiffies));

From 336d781bd952beb1a043b786ced65883d67c34bd Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:15 +0800
Subject: [PATCH 0504/3292] net: wan: replace comparison to NULL with "!card"

According to the chackpatch.pl, comparison to NULL could
be written "!card".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index a5f0aae30e0c..5a89b6d4d92e 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -600,7 +600,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	}
 
 	card = kzalloc(struct_size(card, ports, ports), GFP_KERNEL);
-	if (card == NULL) {
+	if (!card) {
 		pci_release_regions(pdev);
 		pci_disable_device(pdev);
 		return -ENOBUFS;
@@ -612,7 +612,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	card->status = dma_alloc_coherent(&pdev->dev,
 					  sizeof(struct card_status),
 					  &card->status_address, GFP_KERNEL);
-	if (card->status == NULL) {
+	if (!card->status) {
 		wanxl_pci_remove_one(pdev);
 		return -ENOBUFS;
 	}

From 80d67b95d1fe3aa629efb453f57dea935e304421 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:16 +0800
Subject: [PATCH 0505/3292] net: wan: fix the comments style issue

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 5a89b6d4d92e..18de5f1bb0ed 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -569,12 +569,14 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 		return i;
 
 	/* QUICC can only access first 256 MB of host RAM directly,
-	   but PLX9060 DMA does 32-bits for actual packet data transfers */
+	 * but PLX9060 DMA does 32-bits for actual packet data transfers
+	 */
 
 	/* FIXME when PCI/DMA subsystems are fixed.
-	   We set both dma_mask and consistent_dma_mask to 28 bits
-	   and pray pci_alloc_consistent() will use this info. It should
-	   work on most platforms */
+	 * We set both dma_mask and consistent_dma_mask to 28 bits
+	 * and pray pci_alloc_consistent() will use this info. It should
+	 * work on most platforms
+	 */
 	if (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(28)) ||
 	    dma_set_mask(&pdev->dev, DMA_BIT_MASK(28))) {
 		pr_err("No usable DMA configuration\n");
@@ -624,8 +626,9 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 #endif
 
 	/* FIXME when PCI/DMA subsystems are fixed.
-	   We set both dma_mask and consistent_dma_mask back to 32 bits
-	   to indicate the card can do 32-bit DMA addressing */
+	 * We set both dma_mask and consistent_dma_mask back to 32 bits
+	 * to indicate the card can do 32-bit DMA addressing
+	 */
 	if (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) ||
 	    dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
 		pr_err("No usable DMA configuration\n");

From d1406175f96869b653ee1071266a78cb8c70ab80 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 24 May 2021 22:47:17 +0800
Subject: [PATCH 0506/3292] net: wan: add braces {} to all arms of the
 statement

Braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/wanxl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index 18de5f1bb0ed..f22e48415e6f 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -210,18 +210,18 @@ static inline void wanxl_rx_intr(struct card *card)
 
 	while (desc = &card->status->rx_descs[card->rx_in],
 	       desc->stat != PACKET_EMPTY) {
-		if ((desc->stat & PACKET_PORT_MASK) > card->n_ports)
+		if ((desc->stat & PACKET_PORT_MASK) > card->n_ports) {
 			pr_crit("%s: received packet for nonexistent port\n",
 				pci_name(card->pdev));
-		else {
+		} else {
 			struct sk_buff *skb = card->rx_skbs[card->rx_in];
 			struct port *port = &card->ports[desc->stat &
 						    PACKET_PORT_MASK];
 			struct net_device *dev = port->dev;
 
-			if (!skb)
+			if (!skb) {
 				dev->stats.rx_dropped++;
-			else {
+			} else {
 				dma_unmap_single(&card->pdev->dev,
 						 desc->address, BUFFER_LENGTH,
 						 DMA_FROM_DEVICE);

From 9f0c317f6aa12b160103ee3946d79276c14b95e2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 22 May 2021 09:23:41 -0700
Subject: [PATCH 0507/3292] libbpf: Add support for new llvm bpf relocations

LLVM patch https://reviews.llvm.org/D102712
narrowed the scope of existing R_BPF_64_64
and R_BPF_64_32 relocations, and added three
new relocations, R_BPF_64_ABS64, R_BPF_64_ABS32
and R_BPF_64_NODYLD32. The main motivation is
to make relocations linker friendly.

This change, unfortunately, breaks libbpf build,
and we will see errors like below:
  libbpf: ELF relo #0 in section #6 has unexpected type 2 in
     /home/yhs/work/bpf-next/tools/testing/selftests/bpf/bpf_tcp_nogpl.o
  Error: failed to link
     '/home/yhs/work/bpf-next/tools/testing/selftests/bpf/bpf_tcp_nogpl.o':
     Unknown error -22 (-22)
The new relocation R_BPF_64_ABS64 is generated
and libbpf linker sanity check doesn't understand it.
Relocation section '.rel.struct_ops' at offset 0x1410 contains 1 entries:
    Offset             Info             Type               Symbol's Value  Symbol's Name
0000000000000018  0000000700000002 R_BPF_64_ABS64         0000000000000000 nogpltcp_init

Look at the selftests/bpf/bpf_tcp_nogpl.c,
  void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk)
  {
  }

  SEC(".struct_ops")
  struct tcp_congestion_ops bpf_nogpltcp = {
          .init           = (void *)nogpltcp_init,
          .name           = "bpf_nogpltcp",
  };
The new llvm relocation scheme categorizes 'nogpltcp_init' reference
as R_BPF_64_ABS64 instead of R_BPF_64_64 which is used to specify
ld_imm64 relocation in the new scheme.

Let us fix the linker sanity checking by including
R_BPF_64_ABS64 and R_BPF_64_ABS32. There is no need to
check R_BPF_64_NODYLD32 which is used for .BTF and .BTF.ext.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210522162341.3687617-1-yhs@fb.com
---
 tools/lib/bpf/libbpf_internal.h | 6 ++++++
 tools/lib/bpf/linker.c          | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 55d9b4dca64f..e2db08573bf0 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -28,6 +28,12 @@
 #ifndef R_BPF_64_64
 #define R_BPF_64_64 1
 #endif
+#ifndef R_BPF_64_ABS64
+#define R_BPF_64_ABS64 2
+#endif
+#ifndef R_BPF_64_ABS32
+#define R_BPF_64_ABS32 3
+#endif
 #ifndef R_BPF_64_32
 #define R_BPF_64_32 10
 #endif
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index b594a88620ce..1dca41a24f75 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -892,7 +892,8 @@ static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *se
 		size_t sym_idx = ELF64_R_SYM(relo->r_info);
 		size_t sym_type = ELF64_R_TYPE(relo->r_info);
 
-		if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+		if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 &&
+		    sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) {
 			pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
 				i, sec->sec_idx, sym_type, obj->filename);
 			return -EINVAL;

From 4ce7d68beb9e63d0a0a427cc2b89ec0c68f24b3d Mon Sep 17 00:00:00 2001
From: Aditya Srivastava <yashsri421@gmail.com>
Date: Sun, 23 May 2021 20:44:08 +0530
Subject: [PATCH 0508/3292] samples: bpf: Ix kernel-doc syntax in file header

The opening comment mark '/**' is used for highlighting the beginning of
kernel-doc comments.
The header for samples/bpf/ibumad_kern.c follows this syntax, but
the content inside does not comply with kernel-doc.

This line was probably not meant for kernel-doc parsing, but is parsed
due to the presence of kernel-doc like comment syntax(i.e, '/**'), which
causes unexpected warnings from kernel-doc:
warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * ibumad BPF sample kernel side

Provide a simple fix by replacing this occurrence with general comment
format, i.e. '/*', to prevent kernel-doc from parsing it.

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/bpf/20210523151408.22280-1-yashsri421@gmail.com
---
 samples/bpf/ibumad_kern.c | 2 +-
 samples/bpf/ibumad_user.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c
index 26dcd4dde946..9b193231024a 100644
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 
-/**
+/*
  * ibumad BPF sample kernel side
  *
  * This program is free software; you can redistribute it and/or
diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c
index d83d8102f489..0746ca516097 100644
--- a/samples/bpf/ibumad_user.c
+++ b/samples/bpf/ibumad_user.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 
-/**
+/*
  * ibumad BPF sample user side
  *
  * This program is free software; you can redistribute it and/or

From 8fb33b6055300a23f26868680c22a5726834785e Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 25 May 2021 10:56:59 +0800
Subject: [PATCH 0509/3292] bpf: Fix spelling mistakes

Fix some spelling mistakes in comments:
aother ==> another
Netiher ==> Neither
desribe ==> describe
intializing ==> initializing
funciton ==> function
wont ==> won't and move the word 'the' at the end to the next line
accross ==> across
pathes ==> paths
triggerred ==> triggered
excute ==> execute
ether ==> either
conervative ==> conservative
convetion ==> convention
markes ==> marks
interpeter ==> interpreter

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210525025659.8898-2-thunder.leizhen@huawei.com
---
 include/linux/bpf_local_storage.h |  4 ++--
 kernel/bpf/bpf_inode_storage.c    |  2 +-
 kernel/bpf/btf.c                  |  6 +++---
 kernel/bpf/devmap.c               |  4 ++--
 kernel/bpf/hashtab.c              |  4 ++--
 kernel/bpf/reuseport_array.c      |  2 +-
 kernel/bpf/trampoline.c           |  2 +-
 kernel/bpf/verifier.c             | 12 ++++++------
 8 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index b902c580c48d..24496bc28e7b 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -58,7 +58,7 @@ struct bpf_local_storage_data {
 	 * from the object's bpf_local_storage.
 	 *
 	 * Put it in the same cacheline as the data to minimize
-	 * the number of cachelines access during the cache hit case.
+	 * the number of cachelines accessed during the cache hit case.
 	 */
 	struct bpf_local_storage_map __rcu *smap;
 	u8 data[] __aligned(8);
@@ -71,7 +71,7 @@ struct bpf_local_storage_elem {
 	struct bpf_local_storage __rcu *local_storage;
 	struct rcu_head rcu;
 	/* 8 bytes hole */
-	/* The data is stored in aother cacheline to minimize
+	/* The data is stored in another cacheline to minimize
 	 * the number of cachelines access during a cache hit.
 	 */
 	struct bpf_local_storage_data sdata ____cacheline_aligned;
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 2921ca39a93e..96ceed0e0fb5 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -72,7 +72,7 @@ void bpf_inode_storage_free(struct inode *inode)
 		return;
 	}
 
-	/* Netiher the bpf_prog nor the bpf-map's syscall
+	/* Neither the bpf_prog nor the bpf-map's syscall
 	 * could be modifying the local_storage->list now.
 	 * Thus, no elem can be added-to or deleted-from the
 	 * local_storage->list by the bpf_prog or by the bpf-map's syscall.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 85716327c375..a6e39c5ea0bf 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -51,7 +51,7 @@
  * The BTF type section contains a list of 'struct btf_type' objects.
  * Each one describes a C type.  Recall from the above section
  * that a 'struct btf_type' object could be immediately followed by extra
- * data in order to desribe some particular C types.
+ * data in order to describe some particular C types.
  *
  * type_id:
  * ~~~~~~~
@@ -1143,7 +1143,7 @@ static void *btf_show_obj_safe(struct btf_show *show,
 
 	/*
 	 * We need a new copy to our safe object, either because we haven't
-	 * yet copied and are intializing safe data, or because the data
+	 * yet copied and are initializing safe data, or because the data
 	 * we want falls outside the boundaries of the safe object.
 	 */
 	if (!safe) {
@@ -3417,7 +3417,7 @@ static struct btf_kind_operations func_proto_ops = {
 	 * BTF_KIND_FUNC_PROTO cannot be directly referred by
 	 * a struct's member.
 	 *
-	 * It should be a funciton pointer instead.
+	 * It should be a function pointer instead.
 	 * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO)
 	 *
 	 * Hence, there is no btf_func_check_member().
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index aa516472ce46..d60d617ec0d7 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -382,8 +382,8 @@ void __dev_flush(void)
 }
 
 /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
- * update happens in parallel here a dev_put wont happen until after reading the
- * ifindex.
+ * update happens in parallel here a dev_put won't happen until after reading
+ * the ifindex.
  */
 static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 9da0a0413a53..6f6681b07364 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -46,12 +46,12 @@
  * events, kprobes and tracing to be invoked before the prior invocation
  * from one of these contexts completed. sys_bpf() uses the same mechanism
  * by pinning the task to the current CPU and incrementing the recursion
- * protection accross the map operation.
+ * protection across the map operation.
  *
  * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
  * operations like memory allocations (even with GFP_ATOMIC) from atomic
  * contexts. This is required because even with GFP_ATOMIC the memory
- * allocator calls into code pathes which acquire locks with long held lock
+ * allocator calls into code paths which acquire locks with long held lock
  * sections. To ensure the deterministic behaviour these locks are regular
  * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
  * true atomic contexts on an RT kernel are the low level hardware
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 4838922f723d..93a55391791a 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -102,7 +102,7 @@ static void reuseport_array_free(struct bpf_map *map)
 	/*
 	 * ops->map_*_elem() will not be able to access this
 	 * array now. Hence, this function only races with
-	 * bpf_sk_reuseport_detach() which was triggerred by
+	 * bpf_sk_reuseport_detach() which was triggered by
 	 * close() or disconnect().
 	 *
 	 * This function and bpf_sk_reuseport_detach() are
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 2d44b5aa0057..28a3630c48ee 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -552,7 +552,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
  * __bpf_prog_enter returns:
  * 0 - skip execution of the bpf prog
  * 1 - execute bpf prog
- * [2..MAX_U64] - excute bpf prog and record execution time.
+ * [2..MAX_U64] - execute bpf prog and record execution time.
  *     This is start time.
  */
 u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9189eecb26dd..1de4b8c6ee42 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -47,7 +47,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  * - unreachable insns exist (shouldn't be a forest. program = one function)
  * - out of bounds or malformed jumps
  * The second pass is all possible path descent from the 1st insn.
- * Since it's analyzing all pathes through the program, the length of the
+ * Since it's analyzing all paths through the program, the length of the
  * analysis is limited to 64k insn, which may be hit even if total number of
  * insn is less then 4K, but there are too many branches that change stack/regs.
  * Number of 'branches to be analyzed' is limited to 1k
@@ -132,7 +132,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  * If it's ok, then verifier allows this BPF_CALL insn and looks at
  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
- * returns ether pointer to map value or NULL.
+ * returns either pointer to map value or NULL.
  *
  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
  * insn, the register holding that pointer in the true branch changes state to
@@ -2616,7 +2616,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 		if (dst_reg != BPF_REG_FP) {
 			/* The backtracking logic can only recognize explicit
 			 * stack slot address like [fp - 8]. Other spill of
-			 * scalar via different register has to be conervative.
+			 * scalar via different register has to be conservative.
 			 * Backtrack from here and mark all registers as precise
 			 * that contributed into 'reg' being a constant.
 			 */
@@ -9053,7 +9053,7 @@ static int check_return_code(struct bpf_verifier_env *env)
 	    !prog->aux->attach_func_proto->type)
 		return 0;
 
-	/* eBPF calling convetion is such that R0 is used
+	/* eBPF calling convention is such that R0 is used
 	 * to return the value from eBPF program.
 	 * Make sure that it's readable at this time
 	 * of bpf_exit, which means that program wrote
@@ -9844,7 +9844,7 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
  * Since the verifier pushes the branch states as it sees them while exploring
  * the program the condition of walking the branch instruction for the second
  * time means that all states below this branch were already explored and
- * their final liveness markes are already propagated.
+ * their final liveness marks are already propagated.
  * Hence when the verifier completes the search of state list in is_state_visited()
  * we can call this clean_live_states() function to mark all liveness states
  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
@@ -12464,7 +12464,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
 
 			/* mark bpf_tail_call as different opcode to avoid
-			 * conditional branch in the interpeter for every normal
+			 * conditional branch in the interpreter for every normal
 			 * call and to prevent accidental JITing by JIT compiler
 			 * that doesn't support bpf_tail_call yet
 			 */

From 2978996f620001f4e748c79af0fe89be729ef58d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:03 -0700
Subject: [PATCH 0510/3292] x86/entry: Use int everywhere for system call
 numbers

System call numbers are defined as int, so use int everywhere for system
call numbers. This is strictly a cleanup; it should not change anything
user visible; all ABI changes have been done in the preceeding patches.

[ tglx: Replaced the unsigned long cast ]

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-7-hpa@zytor.com
---
 arch/x86/entry/common.c        | 87 +++++++++++++++++++++++-----------
 arch/x86/include/asm/syscall.h |  2 +-
 2 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index f51bc17262db..ee95fe3f1518 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -36,49 +36,81 @@
 #include <asm/irq_stack.h>
 
 #ifdef CONFIG_X86_64
-__visible noinstr void do_syscall_64(struct pt_regs *regs, unsigned long nr)
+
+static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
+{
+	/*
+	 * Convert negative numbers to very high and thus out of range
+	 * numbers for comparisons.
+	 */
+	unsigned int unr = nr;
+
+	if (likely(unr < NR_syscalls)) {
+		unr = array_index_nospec(unr, NR_syscalls);
+		regs->ax = sys_call_table[unr](regs);
+		return true;
+	}
+	return false;
+}
+
+static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
+{
+	/*
+	 * Adjust the starting offset of the table, and convert numbers
+	 * < __X32_SYSCALL_BIT to very high and thus out of range
+	 * numbers for comparisons.
+	 */
+	unsigned int xnr = nr - __X32_SYSCALL_BIT;
+
+	if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
+		xnr = array_index_nospec(xnr, X32_NR_syscalls);
+		regs->ax = x32_sys_call_table[xnr](regs);
+		return true;
+	}
+	return false;
+}
+
+__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
 {
 	add_random_kstack_offset();
 	nr = syscall_enter_from_user_mode(regs, nr);
 
 	instrumentation_begin();
-	if (likely(nr < NR_syscalls)) {
-		nr = array_index_nospec(nr, NR_syscalls);
-		regs->ax = sys_call_table[nr](regs);
-#ifdef CONFIG_X86_X32_ABI
-	} else if (likely((nr & __X32_SYSCALL_BIT) &&
-			  (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
-		nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
-					X32_NR_syscalls);
-		regs->ax = x32_sys_call_table[nr](regs);
-#endif
-	} else if (unlikely((int)nr != -1)) {
+
+	if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
+		/* Invalid system call, but still a system call. */
 		regs->ax = __x64_sys_ni_syscall(regs);
 	}
+
 	instrumentation_end();
 	syscall_exit_to_user_mode(regs);
 }
 #endif
 
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
-static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
+static __always_inline int syscall_32_enter(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_IA32_EMULATION))
 		current_thread_info()->status |= TS_COMPAT;
 
-	return (unsigned int)regs->orig_ax;
+	return (int)regs->orig_ax;
 }
 
 /*
  * Invoke a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.
  */
-static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
-						  unsigned int nr)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
 {
-	if (likely(nr < IA32_NR_syscalls)) {
-		nr = array_index_nospec(nr, IA32_NR_syscalls);
-		regs->ax = ia32_sys_call_table[nr](regs);
-	} else if (unlikely((int)nr != -1)) {
+	/*
+	 * Convert negative numbers to very high and thus out of range
+	 * numbers for comparisons.
+	 */
+	unsigned int unr = nr;
+
+	if (likely(unr < IA32_NR_syscalls)) {
+		unr = array_index_nospec(unr, IA32_NR_syscalls);
+		regs->ax = ia32_sys_call_table[unr](regs);
+	} else if (nr != -1) {
 		regs->ax = __ia32_sys_ni_syscall(regs);
 	}
 }
@@ -86,15 +118,15 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
 /* Handles int $0x80 */
 __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 {
-	unsigned int nr = syscall_32_enter(regs);
+	int nr = syscall_32_enter(regs);
 
 	add_random_kstack_offset();
 	/*
-	 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
-	 * orig_ax, the unsigned int return value truncates it.  This may
-	 * or may not be necessary, but it matches the old asm behavior.
+	 * Subtlety here: if ptrace pokes something larger than 2^31-1 into
+	 * orig_ax, the int return value truncates it. This matches
+	 * the semantics of syscall_get_nr().
 	 */
-	nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+	nr = syscall_enter_from_user_mode(regs, nr);
 	instrumentation_begin();
 
 	do_syscall_32_irqs_on(regs, nr);
@@ -105,7 +137,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 
 static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 {
-	unsigned int nr = syscall_32_enter(regs);
+	int nr = syscall_32_enter(regs);
 	int res;
 
 	add_random_kstack_offset();
@@ -140,8 +172,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 		return false;
 	}
 
-	/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
-	nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+	nr = syscall_enter_from_user_mode_work(regs, nr);
 
 	/* Now this is just like a normal syscall. */
 	do_syscall_32_irqs_on(regs, nr);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f6593cafdbd9..f7e2d82d24fb 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -159,7 +159,7 @@ static inline int syscall_get_arch(struct task_struct *task)
 		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 
-void do_syscall_64(struct pt_regs *regs, unsigned long nr);
+void do_syscall_64(struct pt_regs *regs, int nr);
 void do_int80_syscall_32(struct pt_regs *regs);
 long do_fast_syscall_32(struct pt_regs *regs);
 

From a720a2a0ad6cb6f769b6c7cbc3c54287a7d54ff8 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 21 May 2021 10:33:01 +0200
Subject: [PATCH 0511/3292] xsk: Use kvcalloc to support large umems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use kvcalloc() instead of kcalloc() to support large umems with, on my
server, one million pages or more in the umem.

Reported-by: Dan Siemon <dan@coverfire.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210521083301.26921-1-magnus.karlsson@gmail.com
---
 net/xdp/xdp_umem.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 56a28a686988..f01ef6bda390 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -27,7 +27,7 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
 	unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
 
-	kfree(umem->pgs);
+	kvfree(umem->pgs);
 	umem->pgs = NULL;
 }
 
@@ -99,8 +99,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
 	long npgs;
 	int err;
 
-	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
-			    GFP_KERNEL | __GFP_NOWARN);
+	umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN);
 	if (!umem->pgs)
 		return -ENOMEM;
 
@@ -123,7 +122,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
 out_pin:
 	xdp_umem_unpin_pages(umem);
 out_pgs:
-	kfree(umem->pgs);
+	kvfree(umem->pgs);
 	umem->pgs = NULL;
 	return err;
 }

From c937aabbd7f46fa3b283744169a6b48dafbd6cc7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:33 -0400
Subject: [PATCH 0512/3292] fs: dlm: always run complete for possible waiters

This patch changes the ping_members() result that we always run
complete() for possible waiters. We handle the -EINTR error code as
successful. This error code is returned if the recovery is stopped which
is likely that a new recovery is triggered with a new members
configuration and ping_members() runs again.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/member.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index ceef3f2074ff..48245ba3c640 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -576,12 +576,18 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	*neg_out = neg;
 
 	error = ping_members(ls);
-	if (!error || error == -EPROTO) {
-		/* new_lockspace() may be waiting to know if the config
-		   is good or bad */
-		ls->ls_members_result = error;
-		complete(&ls->ls_members_done);
-	}
+	/* error -EINTR means that a new recovery action is triggered.
+	 * We ignore this recovery action and let run the new one which might
+	 * have new member configuration.
+	 */
+	if (error == -EINTR)
+		error = 0;
+
+	/* new_lockspace() may be waiting to know if the config
+	 * is good or bad
+	 */
+	ls->ls_members_result = error;
+	complete(&ls->ls_members_done);
 
 	log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes);
 	return error;

From 2df6b7627a81b1407378125246eacdb1d8c90036 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:34 -0400
Subject: [PATCH 0513/3292] fs: dlm: add dlm macros for ratelimit log

This patch add ratelimit macro to dlm subsystem and will set the
connecting log message to ratelimit. In non blocking connecting cases it
will print out this message a lot.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h | 2 ++
 fs/dlm/lowcomms.c     | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 04fe9f525ac7..ae3fdf6d9cda 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -60,6 +60,8 @@ struct dlm_mhandle;
 
 #define log_print(fmt, args...) \
 	printk(KERN_ERR "dlm: "fmt"\n" , ##args)
+#define log_print_ratelimited(fmt, args...) \
+	printk_ratelimited(KERN_ERR "dlm: "fmt"\n", ##args)
 #define log_error(ls, fmt, args...) \
 	printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 166e36fcf3e4..15810701b13e 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1075,7 +1075,7 @@ static void sctp_connect_to_sock(struct connection *con)
 
 	make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len);
 
-	log_print("connecting to %d", con->nodeid);
+	log_print_ratelimited("connecting to %d", con->nodeid);
 
 	/* Turn off Nagle's algorithm */
 	sctp_sock_set_nodelay(sock->sk);
@@ -1171,7 +1171,7 @@ static void tcp_connect_to_sock(struct connection *con)
 
 	make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
 
-	log_print("connecting to %d", con->nodeid);
+	log_print_ratelimited("connecting to %d", con->nodeid);
 
 	/* Turn off Nagle's algorithm */
 	tcp_sock_set_nodelay(sock->sk);

From b38bc9c2b3171f4411d80015ecb876bc6f9bcd26 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:35 -0400
Subject: [PATCH 0514/3292] fs: dlm: fix srcu read lock usage

This patch holds the srcu connection read lock in cases where we lookup
the connections and accessing it. We don't hold the srcu lock in workers
function where the scheduled worker is part of the connection itself.
The connection should not be freed if any worker is scheduled or
pending.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 75 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 23 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 15810701b13e..7c7a31377f27 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -113,6 +113,7 @@ struct writequeue_entry {
 	int len;
 	int end;
 	int users;
+	int idx; /* get()/commit() idx exchange */
 	struct connection *con;
 };
 
@@ -163,21 +164,14 @@ static inline int nodeid_hash(int nodeid)
 	return nodeid & (CONN_HASH_SIZE-1);
 }
 
-static struct connection *__find_con(int nodeid)
+static struct connection *__find_con(int nodeid, int r)
 {
-	int r, idx;
 	struct connection *con;
 
-	r = nodeid_hash(nodeid);
-
-	idx = srcu_read_lock(&connections_srcu);
 	hlist_for_each_entry_rcu(con, &connection_hash[r], list) {
-		if (con->nodeid == nodeid) {
-			srcu_read_unlock(&connections_srcu, idx);
+		if (con->nodeid == nodeid)
 			return con;
-		}
 	}
-	srcu_read_unlock(&connections_srcu, idx);
 
 	return NULL;
 }
@@ -216,7 +210,8 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
 	struct connection *con, *tmp;
 	int r, ret;
 
-	con = __find_con(nodeid);
+	r = nodeid_hash(nodeid);
+	con = __find_con(nodeid, r);
 	if (con || !alloc)
 		return con;
 
@@ -230,8 +225,6 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
 		return NULL;
 	}
 
-	r = nodeid_hash(nodeid);
-
 	spin_lock(&connections_lock);
 	/* Because multiple workqueues/threads calls this function it can
 	 * race on multiple cpu's. Instead of locking hot path __find_con()
@@ -239,7 +232,7 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
 	 * under protection of connections_lock. If this is the case we
 	 * abort our connection creation and return the existing connection.
 	 */
-	tmp = __find_con(nodeid);
+	tmp = __find_con(nodeid, r);
 	if (tmp) {
 		spin_unlock(&connections_lock);
 		kfree(con->rx_buf);
@@ -256,15 +249,13 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
 /* Loop round all connections */
 static void foreach_conn(void (*conn_func)(struct connection *c))
 {
-	int i, idx;
+	int i;
 	struct connection *con;
 
-	idx = srcu_read_lock(&connections_srcu);
 	for (i = 0; i < CONN_HASH_SIZE; i++) {
 		hlist_for_each_entry_rcu(con, &connection_hash[i], list)
 			conn_func(con);
 	}
-	srcu_read_unlock(&connections_srcu, idx);
 }
 
 static struct dlm_node_addr *find_node_addr(int nodeid)
@@ -518,14 +509,21 @@ static void lowcomms_state_change(struct sock *sk)
 int dlm_lowcomms_connect_node(int nodeid)
 {
 	struct connection *con;
+	int idx;
 
 	if (nodeid == dlm_our_nodeid())
 		return 0;
 
+	idx = srcu_read_lock(&connections_srcu);
 	con = nodeid2con(nodeid, GFP_NOFS);
-	if (!con)
+	if (!con) {
+		srcu_read_unlock(&connections_srcu, idx);
 		return -ENOMEM;
+	}
+
 	lowcomms_connect_sock(con);
+	srcu_read_unlock(&connections_srcu, idx);
+
 	return 0;
 }
 
@@ -864,7 +862,7 @@ static int accept_from_sock(struct listen_connection *con)
 	int result;
 	struct sockaddr_storage peeraddr;
 	struct socket *newsock;
-	int len;
+	int len, idx;
 	int nodeid;
 	struct connection *newcon;
 	struct connection *addcon;
@@ -907,8 +905,10 @@ static int accept_from_sock(struct listen_connection *con)
 	 *  the same time and the connections cross on the wire.
 	 *  In this case we store the incoming one in "othercon"
 	 */
+	idx = srcu_read_lock(&connections_srcu);
 	newcon = nodeid2con(nodeid, GFP_NOFS);
 	if (!newcon) {
+		srcu_read_unlock(&connections_srcu, idx);
 		result = -ENOMEM;
 		goto accept_err;
 	}
@@ -924,6 +924,7 @@ static int accept_from_sock(struct listen_connection *con)
 			if (!othercon) {
 				log_print("failed to allocate incoming socket");
 				mutex_unlock(&newcon->sock_mutex);
+				srcu_read_unlock(&connections_srcu, idx);
 				result = -ENOMEM;
 				goto accept_err;
 			}
@@ -932,6 +933,7 @@ static int accept_from_sock(struct listen_connection *con)
 			if (result < 0) {
 				kfree(othercon);
 				mutex_unlock(&newcon->sock_mutex);
+				srcu_read_unlock(&connections_srcu, idx);
 				goto accept_err;
 			}
 
@@ -966,6 +968,8 @@ static int accept_from_sock(struct listen_connection *con)
 	if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
 		queue_work(recv_workqueue, &addcon->rwork);
 
+	srcu_read_unlock(&connections_srcu, idx);
+
 	return 0;
 
 accept_err:
@@ -1403,7 +1407,9 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 
 void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 {
+	struct writequeue_entry *e;
 	struct connection *con;
+	int idx;
 
 	if (len > DEFAULT_BUFFER_SIZE ||
 	    len < sizeof(struct dlm_header)) {
@@ -1413,11 +1419,23 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 		return NULL;
 	}
 
+	idx = srcu_read_lock(&connections_srcu);
 	con = nodeid2con(nodeid, allocation);
-	if (!con)
+	if (!con) {
+		srcu_read_unlock(&connections_srcu, idx);
 		return NULL;
+	}
 
-	return new_wq_entry(con, len, allocation, ppc);
+	e = new_wq_entry(con, len, allocation, ppc);
+	if (!e) {
+		srcu_read_unlock(&connections_srcu, idx);
+		return NULL;
+	}
+
+	/* we assume if successful commit must called */
+	e->idx = idx;
+
+	return e;
 }
 
 void dlm_lowcomms_commit_buffer(void *mh)
@@ -1435,10 +1453,12 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	spin_unlock(&con->writequeue_lock);
 
 	queue_work(send_workqueue, &con->swork);
+	srcu_read_unlock(&connections_srcu, e->idx);
 	return;
 
 out:
 	spin_unlock(&con->writequeue_lock);
+	srcu_read_unlock(&connections_srcu, e->idx);
 	return;
 }
 
@@ -1532,8 +1552,10 @@ int dlm_lowcomms_close(int nodeid)
 {
 	struct connection *con;
 	struct dlm_node_addr *na;
+	int idx;
 
 	log_print("closing connection to node %d", nodeid);
+	idx = srcu_read_lock(&connections_srcu);
 	con = nodeid2con(nodeid, 0);
 	if (con) {
 		set_bit(CF_CLOSE, &con->flags);
@@ -1542,6 +1564,7 @@ int dlm_lowcomms_close(int nodeid)
 		if (con->othercon)
 			clean_one_writequeue(con->othercon);
 	}
+	srcu_read_unlock(&connections_srcu, idx);
 
 	spin_lock(&dlm_node_addrs_spin);
 	na = find_node_addr(nodeid);
@@ -1621,6 +1644,8 @@ static void shutdown_conn(struct connection *con)
 
 void dlm_lowcomms_shutdown(void)
 {
+	int idx;
+
 	/* Set all the flags to prevent any
 	 * socket activity.
 	 */
@@ -1633,7 +1658,9 @@ void dlm_lowcomms_shutdown(void)
 
 	dlm_close_sock(&listen_con.sock);
 
+	idx = srcu_read_lock(&connections_srcu);
 	foreach_conn(shutdown_conn);
+	srcu_read_unlock(&connections_srcu, idx);
 }
 
 static void _stop_conn(struct connection *con, bool and_other)
@@ -1682,7 +1709,7 @@ static void free_conn(struct connection *con)
 
 static void work_flush(void)
 {
-	int ok, idx;
+	int ok;
 	int i;
 	struct connection *con;
 
@@ -1693,7 +1720,6 @@ static void work_flush(void)
 			flush_workqueue(recv_workqueue);
 		if (send_workqueue)
 			flush_workqueue(send_workqueue);
-		idx = srcu_read_lock(&connections_srcu);
 		for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
 			hlist_for_each_entry_rcu(con, &connection_hash[i],
 						 list) {
@@ -1707,14 +1733,17 @@ static void work_flush(void)
 				}
 			}
 		}
-		srcu_read_unlock(&connections_srcu, idx);
 	} while (!ok);
 }
 
 void dlm_lowcomms_stop(void)
 {
+	int idx;
+
+	idx = srcu_read_lock(&connections_srcu);
 	work_flush();
 	foreach_conn(free_conn);
+	srcu_read_unlock(&connections_srcu, idx);
 	work_stop();
 	deinit_local();
 }

From 7443bc962509912c70c587db71449daff26b9678 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:36 -0400
Subject: [PATCH 0515/3292] fs: dlm: set is othercon flag

There is a is othercon flag which is never used, this patch will set it
and printout a warning if the othercon ever sends a dlm message which
should never be the case.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 7c7a31377f27..4944aef24aa5 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -938,6 +938,7 @@ static int accept_from_sock(struct listen_connection *con)
 			}
 
 			lockdep_set_subclass(&othercon->sock_mutex, 1);
+			set_bit(CF_IS_OTHERCON, &othercon->flags);
 			newcon->othercon = othercon;
 		} else {
 			/* close other sock con if we have something new */
@@ -1601,6 +1602,8 @@ static void process_send_sockets(struct work_struct *work)
 {
 	struct connection *con = container_of(work, struct connection, swork);
 
+	WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
+
 	clear_bit(CF_WRITE_PENDING, &con->flags);
 	if (con->sock == NULL) /* not mutex protected so check it inside too */
 		con->connect_action(con);

From ba868d9deaab2bb1c09e50650127823925154802 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:37 -0400
Subject: [PATCH 0516/3292] fs: dlm: reconnect if socket error report occurs

This patch will change the reconnect handling that if an error occurs
if a socket error callback is occurred. This will also handle reconnects
in a non blocking connecting case which is currently missing. If error
ECONNREFUSED is reported we delay the reconnect by one second.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 60 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 4944aef24aa5..051f22dbb83a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -79,6 +79,8 @@ struct connection {
 #define CF_CLOSING 8
 #define CF_SHUTDOWN 9
 #define CF_CONNECTED 10
+#define CF_RECONNECT 11
+#define CF_DELAY_CONNECT 12
 	struct list_head writequeue;  /* List of outgoing writequeue_entries */
 	spinlock_t writequeue_lock;
 	void (*connect_action) (struct connection *);	/* What to do to connect */
@@ -87,6 +89,7 @@ struct connection {
 #define MAX_CONNECT_RETRIES 3
 	struct hlist_node list;
 	struct connection *othercon;
+	struct connection *sendcon;
 	struct work_struct rwork; /* Receive workqueue */
 	struct work_struct swork; /* Send workqueue */
 	wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
@@ -585,6 +588,22 @@ static void lowcomms_error_report(struct sock *sk)
 				   dlm_config.ci_tcp_port, sk->sk_err,
 				   sk->sk_err_soft);
 	}
+
+	/* below sendcon only handling */
+	if (test_bit(CF_IS_OTHERCON, &con->flags))
+		con = con->sendcon;
+
+	switch (sk->sk_err) {
+	case ECONNREFUSED:
+		set_bit(CF_DELAY_CONNECT, &con->flags);
+		break;
+	default:
+		break;
+	}
+
+	if (!test_and_set_bit(CF_RECONNECT, &con->flags))
+		queue_work(send_workqueue, &con->swork);
+
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 	if (orig_report)
@@ -702,6 +721,8 @@ static void close_connection(struct connection *con, bool and_other,
 	con->rx_leftover = 0;
 	con->retries = 0;
 	clear_bit(CF_CONNECTED, &con->flags);
+	clear_bit(CF_DELAY_CONNECT, &con->flags);
+	clear_bit(CF_RECONNECT, &con->flags);
 	mutex_unlock(&con->sock_mutex);
 	clear_bit(CF_CLOSING, &con->flags);
 }
@@ -840,18 +861,15 @@ out_resched:
 
 out_close:
 	mutex_unlock(&con->sock_mutex);
-	if (ret != -EAGAIN) {
-		/* Reconnect when there is something to send */
+	if (ret == 0) {
 		close_connection(con, false, true, false);
-		if (ret == 0) {
-			log_print("connection %p got EOF from %d",
-				  con, con->nodeid);
-			/* handling for tcp shutdown */
-			clear_bit(CF_SHUTDOWN, &con->flags);
-			wake_up(&con->shutdown_wait);
-			/* signal to breaking receive worker */
-			ret = -1;
-		}
+		log_print("connection %p got EOF from %d",
+			  con, con->nodeid);
+		/* handling for tcp shutdown */
+		clear_bit(CF_SHUTDOWN, &con->flags);
+		wake_up(&con->shutdown_wait);
+		/* signal to breaking receive worker */
+		ret = -1;
 	}
 	return ret;
 }
@@ -940,6 +958,7 @@ static int accept_from_sock(struct listen_connection *con)
 			lockdep_set_subclass(&othercon->sock_mutex, 1);
 			set_bit(CF_IS_OTHERCON, &othercon->flags);
 			newcon->othercon = othercon;
+			othercon->sendcon = newcon;
 		} else {
 			/* close other sock con if we have something new */
 			close_connection(othercon, false, true, false);
@@ -1504,7 +1523,7 @@ static void send_to_sock(struct connection *con)
 				cond_resched();
 				goto out;
 			} else if (ret < 0)
-				goto send_error;
+				goto out;
 		}
 
 		/* Don't starve people filling buffers */
@@ -1521,14 +1540,6 @@ out:
 	mutex_unlock(&con->sock_mutex);
 	return;
 
-send_error:
-	mutex_unlock(&con->sock_mutex);
-	close_connection(con, false, false, true);
-	/* Requeue the send work. When the work daemon runs again, it will try
-	   a new connection, then call this function again. */
-	queue_work(send_workqueue, &con->swork);
-	return;
-
 out_connect:
 	mutex_unlock(&con->sock_mutex);
 	queue_work(send_workqueue, &con->swork);
@@ -1605,8 +1616,15 @@ static void process_send_sockets(struct work_struct *work)
 	WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
 
 	clear_bit(CF_WRITE_PENDING, &con->flags);
-	if (con->sock == NULL) /* not mutex protected so check it inside too */
+
+	if (test_and_clear_bit(CF_RECONNECT, &con->flags))
+		close_connection(con, false, false, true);
+
+	if (con->sock == NULL) { /* not mutex protected so check it inside too */
+		if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
+			msleep(1000);
 		con->connect_action(con);
+	}
 	if (!list_empty(&con->writequeue))
 		send_to_sock(con);
 }

From c6aa00e3d20c2767ba3f57b64eb862572b9744b3 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:38 -0400
Subject: [PATCH 0517/3292] fs: dlm: cancel work sync othercon

These rx tx flags arguments are for signaling close_connection() from
which worker they are called. Obviously the receive worker cannot cancel
itself and vice versa for swork. For the othercon the receive worker
should only be used, however to avoid deadlocks we should pass the same
flags as the original close_connection() was called.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 051f22dbb83a..0a4851b3cd4b 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -715,7 +715,7 @@ static void close_connection(struct connection *con, bool and_other,
 
 	if (con->othercon && and_other) {
 		/* Will only re-enter once. */
-		close_connection(con->othercon, false, true, true);
+		close_connection(con->othercon, false, tx, rx);
 	}
 
 	con->rx_leftover = 0;

From 8aa31cbf20ad168c35dd83476629402aacbf5a44 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:39 -0400
Subject: [PATCH 0518/3292] fs: dlm: fix connection tcp EOF handling

This patch fixes the EOF handling for TCP that if and EOF is received we
will close the socket next time the writequeue runs empty. This is a
half-closed socket functionality which doesn't exists in SCTP. The
midcomms layer will do a half closed socket functionality on DLM side to
solve this problem for the SCTP case. However there is still the last ack
flying around but other reset functionality will take care of it if it got
lost.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 48 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0a4851b3cd4b..14ca3eda6a83 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -81,10 +81,13 @@ struct connection {
 #define CF_CONNECTED 10
 #define CF_RECONNECT 11
 #define CF_DELAY_CONNECT 12
+#define CF_EOF 13
 	struct list_head writequeue;  /* List of outgoing writequeue_entries */
 	spinlock_t writequeue_lock;
+	atomic_t writequeue_cnt;
 	void (*connect_action) (struct connection *);	/* What to do to connect */
 	void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
+	bool (*eof_condition)(struct connection *con); /* What to do to eof check */
 	int retries;
 #define MAX_CONNECT_RETRIES 3
 	struct hlist_node list;
@@ -179,6 +182,11 @@ static struct connection *__find_con(int nodeid, int r)
 	return NULL;
 }
 
+static bool tcp_eof_condition(struct connection *con)
+{
+	return atomic_read(&con->writequeue_cnt);
+}
+
 static int dlm_con_init(struct connection *con, int nodeid)
 {
 	con->rx_buflen = dlm_config.ci_buffer_size;
@@ -190,6 +198,7 @@ static int dlm_con_init(struct connection *con, int nodeid)
 	mutex_init(&con->sock_mutex);
 	INIT_LIST_HEAD(&con->writequeue);
 	spin_lock_init(&con->writequeue_lock);
+	atomic_set(&con->writequeue_cnt, 0);
 	INIT_WORK(&con->swork, process_send_sockets);
 	INIT_WORK(&con->rwork, process_recv_sockets);
 	init_waitqueue_head(&con->shutdown_wait);
@@ -197,6 +206,7 @@ static int dlm_con_init(struct connection *con, int nodeid)
 	if (dlm_config.ci_protocol == 0) {
 		con->connect_action = tcp_connect_to_sock;
 		con->shutdown_action = dlm_tcp_shutdown;
+		con->eof_condition = tcp_eof_condition;
 	} else {
 		con->connect_action = sctp_connect_to_sock;
 	}
@@ -723,6 +733,7 @@ static void close_connection(struct connection *con, bool and_other,
 	clear_bit(CF_CONNECTED, &con->flags);
 	clear_bit(CF_DELAY_CONNECT, &con->flags);
 	clear_bit(CF_RECONNECT, &con->flags);
+	clear_bit(CF_EOF, &con->flags);
 	mutex_unlock(&con->sock_mutex);
 	clear_bit(CF_CLOSING, &con->flags);
 }
@@ -860,16 +871,26 @@ out_resched:
 	return -EAGAIN;
 
 out_close:
-	mutex_unlock(&con->sock_mutex);
 	if (ret == 0) {
-		close_connection(con, false, true, false);
 		log_print("connection %p got EOF from %d",
 			  con, con->nodeid);
-		/* handling for tcp shutdown */
-		clear_bit(CF_SHUTDOWN, &con->flags);
-		wake_up(&con->shutdown_wait);
+
+		if (con->eof_condition && con->eof_condition(con)) {
+			set_bit(CF_EOF, &con->flags);
+			mutex_unlock(&con->sock_mutex);
+		} else {
+			mutex_unlock(&con->sock_mutex);
+			close_connection(con, false, true, false);
+
+			/* handling for tcp shutdown */
+			clear_bit(CF_SHUTDOWN, &con->flags);
+			wake_up(&con->shutdown_wait);
+		}
+
 		/* signal to breaking receive worker */
 		ret = -1;
+	} else {
+		mutex_unlock(&con->sock_mutex);
 	}
 	return ret;
 }
@@ -1021,6 +1042,7 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
 
 	if (e->len == 0 && e->users == 0) {
 		list_del(&e->list);
+		atomic_dec(&e->con->writequeue_cnt);
 		free_entry(e);
 	}
 }
@@ -1417,6 +1439,7 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 
 	*ppc = page_address(e->page);
 	e->end += len;
+	atomic_inc(&con->writequeue_cnt);
 
 	spin_lock(&con->writequeue_lock);
 	list_add_tail(&e->list, &con->writequeue);
@@ -1536,6 +1559,21 @@ static void send_to_sock(struct connection *con)
 		writequeue_entry_complete(e, ret);
 	}
 	spin_unlock(&con->writequeue_lock);
+
+	/* close if we got EOF */
+	if (test_and_clear_bit(CF_EOF, &con->flags)) {
+		mutex_unlock(&con->sock_mutex);
+		close_connection(con, false, false, true);
+
+		/* handling for tcp shutdown */
+		clear_bit(CF_SHUTDOWN, &con->flags);
+		wake_up(&con->shutdown_wait);
+	} else {
+		mutex_unlock(&con->sock_mutex);
+	}
+
+	return;
+
 out:
 	mutex_unlock(&con->sock_mutex);
 	return;

From 6fb5cf9d4206f2cdccb05be1bf2307dab4e5babe Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:40 -0400
Subject: [PATCH 0519/3292] fs: dlm: public header in out utility

This patch allows to use header_out() and header_in() outside of dlm
util functionality.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/util.c | 4 ++--
 fs/dlm/util.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index cfd0d00b19ae..74a8c5bfe9b5 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -20,7 +20,7 @@
 #define DLM_ERRNO_ETIMEDOUT	       110
 #define DLM_ERRNO_EINPROGRESS	       115
 
-static void header_out(struct dlm_header *hd)
+void header_out(struct dlm_header *hd)
 {
 	hd->h_version		= cpu_to_le32(hd->h_version);
 	hd->h_lockspace		= cpu_to_le32(hd->h_lockspace);
@@ -28,7 +28,7 @@ static void header_out(struct dlm_header *hd)
 	hd->h_length		= cpu_to_le16(hd->h_length);
 }
 
-static void header_in(struct dlm_header *hd)
+void header_in(struct dlm_header *hd)
 {
 	hd->h_version		= le32_to_cpu(hd->h_version);
 	hd->h_lockspace		= le32_to_cpu(hd->h_lockspace);
diff --git a/fs/dlm/util.h b/fs/dlm/util.h
index cc719ca9397e..d46f23c7a6a0 100644
--- a/fs/dlm/util.h
+++ b/fs/dlm/util.h
@@ -15,6 +15,8 @@ void dlm_message_out(struct dlm_message *ms);
 void dlm_message_in(struct dlm_message *ms);
 void dlm_rcom_out(struct dlm_rcom *rc);
 void dlm_rcom_in(struct dlm_rcom *rc);
+void header_out(struct dlm_header *hd);
+void header_in(struct dlm_header *hd);
 
 #endif
 

From a070a91cf1402b5328d3517d1fccbdeec58d3f2d Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:41 -0400
Subject: [PATCH 0520/3292] fs: dlm: add more midcomms hooks

This patch prepares hooks to redirect to the midcomms layer which will
be used by the midcomms re-transmit handling.

There exists the new concept of stateless buffers allocation and
commits. This can be used to bypass the midcomms re-transmit handling. It
is used by RCOM_STATUS and RCOM_NAMES messages, because they have their
own ping-like re-transmit handling. As well these two messages will be
used to determine the DLM version per node, because these two messages
are per observation the first messages which are exchanged.

Cluster manager events for node membership are added to add support for
half-closed connections in cases that the peer connection get to
an end of file but DLM still holds membership of the node. In
this time DLM can still trigger new message which we should allow. After
the cluster manager node removal event occurs it safe to close the
connection.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/config.c    |   3 +-
 fs/dlm/lock.c      |   8 ++--
 fs/dlm/lockspace.c |   7 ++--
 fs/dlm/member.c    |  17 ++++++--
 fs/dlm/midcomms.c  |  31 +++++++++++++-
 fs/dlm/midcomms.h  |   8 ++++
 fs/dlm/rcom.c      | 101 +++++++++++++++++++++++++++++++--------------
 7 files changed, 133 insertions(+), 42 deletions(-)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 88d95d96e36c..01ae294743e9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -20,6 +20,7 @@
 #include <net/sock.h>
 
 #include "config.h"
+#include "midcomms.h"
 #include "lowcomms.h"
 
 /*
@@ -532,7 +533,7 @@ static void drop_comm(struct config_group *g, struct config_item *i)
 	struct dlm_comm *cm = config_item_to_comm(i);
 	if (local_comm == cm)
 		local_comm = NULL;
-	dlm_lowcomms_close(cm->nodeid);
+	dlm_midcomms_close(cm->nodeid);
 	while (cm->addr_count--)
 		kfree(cm->addr[cm->addr_count]);
 	config_item_put(i);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index b93df39d0915..b625ce92464a 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -59,7 +59,7 @@
 #include "dlm_internal.h"
 #include <linux/dlm_device.h>
 #include "memory.h"
-#include "lowcomms.h"
+#include "midcomms.h"
 #include "requestqueue.h"
 #include "util.h"
 #include "dir.h"
@@ -3534,10 +3534,10 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
 	char *mb;
 
 	/* get_buffer gives us a message handle (mh) that we need to
-	   pass into lowcomms_commit and a message buffer (mb) that we
+	   pass into midcomms_commit and a message buffer (mb) that we
 	   write our data into */
 
-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+	mh = dlm_midcomms_get_mhandle(to_nodeid, mb_len, GFP_NOFS, &mb);
 	if (!mh)
 		return -ENOBUFS;
 
@@ -3589,7 +3589,7 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
 static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
 {
 	dlm_message_out(ms);
-	dlm_lowcomms_commit_buffer(mh);
+	dlm_midcomms_commit_mhandle(mh);
 	return 0;
 }
 
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index c14cf2b7faab..bf5c55ef9d0d 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -16,6 +16,7 @@
 #include "member.h"
 #include "recoverd.h"
 #include "dir.h"
+#include "midcomms.h"
 #include "lowcomms.h"
 #include "config.h"
 #include "memory.h"
@@ -390,7 +391,7 @@ static int threads_start(void)
 	}
 
 	/* Thread for sending/receiving messages for all lockspace's */
-	error = dlm_lowcomms_start();
+	error = dlm_midcomms_start();
 	if (error) {
 		log_print("cannot start dlm lowcomms %d", error);
 		goto scand_fail;
@@ -698,7 +699,7 @@ int dlm_new_lockspace(const char *name, const char *cluster,
 		error = 0;
 	if (!ls_count) {
 		dlm_scand_stop();
-		dlm_lowcomms_shutdown();
+		dlm_midcomms_shutdown();
 		dlm_lowcomms_stop();
 	}
  out:
@@ -787,7 +788,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 
 	if (ls_count == 1) {
 		dlm_scand_stop();
-		dlm_lowcomms_shutdown();
+		dlm_midcomms_shutdown();
 	}
 
 	dlm_callback_stop(ls);
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 48245ba3c640..63971c594bdc 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -15,6 +15,7 @@
 #include "recover.h"
 #include "rcom.h"
 #include "config.h"
+#include "midcomms.h"
 #include "lowcomms.h"
 
 int dlm_slots_version(struct dlm_header *h)
@@ -329,6 +330,7 @@ static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node)
 	memb->nodeid = node->nodeid;
 	memb->weight = node->weight;
 	memb->comm_seq = node->comm_seq;
+	dlm_midcomms_add_member(node->nodeid);
 	add_ordered_member(ls, memb);
 	ls->ls_num_nodes++;
 	return 0;
@@ -359,26 +361,34 @@ int dlm_is_removed(struct dlm_ls *ls, int nodeid)
 	return 0;
 }
 
-static void clear_memb_list(struct list_head *head)
+static void clear_memb_list(struct list_head *head,
+			    void (*after_del)(int nodeid))
 {
 	struct dlm_member *memb;
 
 	while (!list_empty(head)) {
 		memb = list_entry(head->next, struct dlm_member, list);
 		list_del(&memb->list);
+		if (after_del)
+			after_del(memb->nodeid);
 		kfree(memb);
 	}
 }
 
+static void clear_members_cb(int nodeid)
+{
+	dlm_midcomms_remove_member(nodeid);
+}
+
 void dlm_clear_members(struct dlm_ls *ls)
 {
-	clear_memb_list(&ls->ls_nodes);
+	clear_memb_list(&ls->ls_nodes, clear_members_cb);
 	ls->ls_num_nodes = 0;
 }
 
 void dlm_clear_members_gone(struct dlm_ls *ls)
 {
-	clear_memb_list(&ls->ls_nodes_gone);
+	clear_memb_list(&ls->ls_nodes_gone, NULL);
 }
 
 static void make_member_array(struct dlm_ls *ls)
@@ -552,6 +562,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 
 		neg++;
 		list_move(&memb->list, &ls->ls_nodes_gone);
+		dlm_midcomms_remove_member(memb->nodeid);
 		ls->ls_num_nodes--;
 		dlm_lsop_recover_slot(ls, memb);
 	}
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 1c6654a21ec4..1a280dda99d3 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -28,6 +28,36 @@
 #include "lock.h"
 #include "midcomms.h"
 
+struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
+					     gfp_t allocation, char **ppc)
+{
+	return dlm_lowcomms_get_buffer(nodeid, len, allocation, ppc);
+}
+
+void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
+{
+	dlm_lowcomms_commit_buffer(mh);
+}
+
+void dlm_midcomms_add_member(int nodeid) { }
+
+void dlm_midcomms_remove_member(int nodeid) { }
+
+int dlm_midcomms_start(void)
+{
+	return dlm_lowcomms_start();
+}
+
+void dlm_midcomms_shutdown(void)
+{
+	dlm_lowcomms_shutdown();
+}
+
+int dlm_midcomms_close(int nodeid)
+{
+	return dlm_lowcomms_close(nodeid);
+}
+
 /*
  * Called from the low-level comms layer to process a buffer of
  * commands.
@@ -101,4 +131,3 @@ skip:
 
 	return ret;
 }
-
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 61e90a921849..9ac1190ce277 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -13,6 +13,14 @@
 #define __MIDCOMMS_DOT_H__
 
 int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int buflen);
+struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
+					     gfp_t allocation, char **ppc);
+void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh);
+int dlm_midcomms_close(int nodeid);
+int dlm_midcomms_start(void);
+void dlm_midcomms_shutdown(void);
+void dlm_midcomms_add_member(int nodeid);
+void dlm_midcomms_remove_member(int nodeid);
 
 #endif				/* __MIDCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index f5b1bd65728d..2661674364af 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -27,20 +27,10 @@ static int rcom_response(struct dlm_ls *ls)
 	return test_bit(LSFL_RCOM_READY, &ls->ls_flags);
 }
 
-static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
-		       struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret)
+static void _create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
+			 struct dlm_rcom **rc_ret, char *mb, int mb_len)
 {
 	struct dlm_rcom *rc;
-	struct dlm_mhandle *mh;
-	char *mb;
-	int mb_len = sizeof(struct dlm_rcom) + len;
-
-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
-	if (!mh) {
-		log_print("create_rcom to %d type %d len %d ENOBUFS",
-			  to_nodeid, type, len);
-		return -ENOBUFS;
-	}
 
 	rc = (struct dlm_rcom *) mb;
 
@@ -56,15 +46,64 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 	rc->rc_seq = ls->ls_recover_seq;
 	spin_unlock(&ls->ls_recover_lock);
 
-	*mh_ret = mh;
 	*rc_ret = rc;
+}
+
+static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
+		       struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret)
+{
+	int mb_len = sizeof(struct dlm_rcom) + len;
+	struct dlm_mhandle *mh;
+	char *mb;
+
+	mh = dlm_midcomms_get_mhandle(to_nodeid, mb_len, GFP_NOFS, &mb);
+	if (!mh) {
+		log_print("%s to %d type %d len %d ENOBUFS",
+			  __func__, to_nodeid, type, len);
+		return -ENOBUFS;
+	}
+
+	_create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len);
+	*mh_ret = mh;
 	return 0;
 }
 
+static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type,
+				 int len, struct dlm_rcom **rc_ret,
+				 void **mh_ret)
+{
+	int mb_len = sizeof(struct dlm_rcom) + len;
+	void *mh;
+	char *mb;
+
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+	if (!mh) {
+		log_print("create_rcom to %d type %d len %d ENOBUFS",
+			  to_nodeid, type, len);
+		return -ENOBUFS;
+	}
+
+	_create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len);
+	*mh_ret = mh;
+	return 0;
+}
+
+static void _send_rcom(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+	dlm_rcom_out(rc);
+}
+
 static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
 		      struct dlm_rcom *rc)
 {
-	dlm_rcom_out(rc);
+	_send_rcom(ls, rc);
+	dlm_midcomms_commit_mhandle(mh);
+}
+
+static void send_rcom_stateless(struct dlm_ls *ls, void *mh,
+				struct dlm_rcom *rc)
+{
+	_send_rcom(ls, rc);
 	dlm_lowcomms_commit_buffer(mh);
 }
 
@@ -141,8 +180,8 @@ static void disallow_sync_reply(struct dlm_ls *ls)
 int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 {
 	struct dlm_rcom *rc;
-	struct dlm_mhandle *mh;
 	int error = 0;
+	void *mh;
 
 	ls->ls_recover_nodeid = nodeid;
 
@@ -153,8 +192,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 	}
 
 retry:
-	error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
-			    sizeof(struct rcom_status), &rc, &mh);
+	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS,
+				      sizeof(struct rcom_status), &rc, &mh);
 	if (error)
 		goto out;
 
@@ -163,7 +202,7 @@ retry:
 	allow_sync_reply(ls, &rc->rc_id);
 	memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
 
-	send_rcom(ls, mh, rc);
+	send_rcom_stateless(ls, mh, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
 	disallow_sync_reply(ls);
@@ -191,13 +230,13 @@ retry:
 static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
-	struct dlm_mhandle *mh;
 	struct rcom_status *rs;
 	uint32_t status;
 	int nodeid = rc_in->rc_header.h_nodeid;
 	int len = sizeof(struct rcom_config);
 	int num_slots = 0;
 	int error;
+	void *mh;
 
 	if (!dlm_slots_version(&rc_in->rc_header)) {
 		status = dlm_recover_status(ls);
@@ -218,8 +257,8 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	len += num_slots * sizeof(struct rcom_slot);
 
  do_create:
-	error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY,
-			    len, &rc, &mh);
+	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS_REPLY,
+				      len, &rc, &mh);
 	if (error)
 		return;
 
@@ -246,7 +285,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	spin_unlock(&ls->ls_recover_lock);
 
  do_send:
-	send_rcom(ls, mh, rc);
+	send_rcom_stateless(ls, mh, rc);
 }
 
 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -271,13 +310,14 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 {
 	struct dlm_rcom *rc;
-	struct dlm_mhandle *mh;
 	int error = 0;
+	void *mh;
 
 	ls->ls_recover_nodeid = nodeid;
 
 retry:
-	error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
+	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES, last_len,
+				      &rc, &mh);
 	if (error)
 		goto out;
 	memcpy(rc->rc_buf, last_name, last_len);
@@ -285,7 +325,7 @@ retry:
 	allow_sync_reply(ls, &rc->rc_id);
 	memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
 
-	send_rcom(ls, mh, rc);
+	send_rcom_stateless(ls, mh, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
 	disallow_sync_reply(ls);
@@ -298,14 +338,15 @@ retry:
 static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
-	struct dlm_mhandle *mh;
 	int error, inlen, outlen, nodeid;
+	void *mh;
 
 	nodeid = rc_in->rc_header.h_nodeid;
 	inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
 	outlen = LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom);
 
-	error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
+	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen,
+				      &rc, &mh);
 	if (error)
 		return;
 	rc->rc_id = rc_in->rc_id;
@@ -313,7 +354,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
 	dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
 			      nodeid);
-	send_rcom(ls, mh, rc);
+	send_rcom_stateless(ls, mh, rc);
 }
 
 int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
@@ -458,7 +499,7 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
 
-	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb);
+	mh = dlm_midcomms_get_mhandle(nodeid, mb_len, GFP_NOFS, &mb);
 	if (!mh)
 		return -ENOBUFS;
 
@@ -479,7 +520,7 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	rf->rf_lvblen = cpu_to_le32(~0U);
 
 	dlm_rcom_out(rc);
-	dlm_lowcomms_commit_buffer(mh);
+	dlm_midcomms_commit_mhandle(mh);
 
 	return 0;
 }

From 8f2dc78dbc2010b497bb58e0460cb44c678a3c5b Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:42 -0400
Subject: [PATCH 0521/3292] fs: dlm: make buffer handling per msg

This patch makes the void pointer handle for lowcomms functionality per
message and not per page allocation entry. A refcount handling for the
handle was added to keep the message alive until the user doesn't need
it anymore.

There exists now a per message callback which will be called when
allocating a new buffer. This callback will be guaranteed to be called
according the order of the sending buffer, which can be used that the
caller increments a sequence number for the dlm message handle.

For transition process we cast the dlm_mhandle to dlm_msg and vice versa
until the midcomms layer will implement a specific dlm_mhandle structure.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h |   1 +
 fs/dlm/lowcomms.c     | 105 ++++++++++++++++++++++++++++++++++--------
 fs/dlm/lowcomms.h     |   7 ++-
 fs/dlm/midcomms.c     |   7 ++-
 fs/dlm/rcom.c         |  40 ++++++++--------
 5 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index ae3fdf6d9cda..e8dc5f4f1f9e 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -57,6 +57,7 @@ struct dlm_header;
 struct dlm_message;
 struct dlm_rcom;
 struct dlm_mhandle;
+struct dlm_msg;
 
 #define log_print(fmt, args...) \
 	printk(KERN_ERR "dlm: "fmt"\n" , ##args)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 14ca3eda6a83..d222e6088ab2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -119,8 +119,19 @@ struct writequeue_entry {
 	int len;
 	int end;
 	int users;
-	int idx; /* get()/commit() idx exchange */
 	struct connection *con;
+	struct list_head msgs;
+	struct kref ref;
+};
+
+struct dlm_msg {
+	struct writequeue_entry *entry;
+	void *ppc;
+	int len;
+	int idx; /* new()/commit() idx exchange */
+
+	struct list_head list;
+	struct kref ref;
 };
 
 struct dlm_node_addr {
@@ -1022,12 +1033,37 @@ accept_err:
 	return result;
 }
 
-static void free_entry(struct writequeue_entry *e)
+static void dlm_page_release(struct kref *kref)
 {
+	struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
+						  ref);
+
 	__free_page(e->page);
 	kfree(e);
 }
 
+static void dlm_msg_release(struct kref *kref)
+{
+	struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
+
+	kref_put(&msg->entry->ref, dlm_page_release);
+	kfree(msg);
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+	struct dlm_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
+		list_del(&msg->list);
+		kref_put(&msg->ref, dlm_msg_release);
+	}
+
+	list_del(&e->list);
+	atomic_dec(&e->con->writequeue_cnt);
+	kref_put(&e->ref, dlm_page_release);
+}
+
 /*
  * writequeue_entry_complete - try to delete and free write queue entry
  * @e: write queue entry to try to delete
@@ -1040,11 +1076,8 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
 	e->offset += completed;
 	e->len -= completed;
 
-	if (e->len == 0 && e->users == 0) {
-		list_del(&e->list);
-		atomic_dec(&e->con->writequeue_cnt);
+	if (e->len == 0 && e->users == 0)
 		free_entry(e);
-	}
 }
 
 /*
@@ -1410,12 +1443,16 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con,
 
 	entry->con = con;
 	entry->users = 1;
+	kref_init(&entry->ref);
+	INIT_LIST_HEAD(&entry->msgs);
 
 	return entry;
 }
 
 static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
-					     gfp_t allocation, char **ppc)
+					     gfp_t allocation, char **ppc,
+					     void (*cb)(struct dlm_mhandle *mh),
+					     struct dlm_mhandle *mh)
 {
 	struct writequeue_entry *e;
 
@@ -1423,7 +1460,12 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 	if (!list_empty(&con->writequeue)) {
 		e = list_last_entry(&con->writequeue, struct writequeue_entry, list);
 		if (DLM_WQ_REMAIN_BYTES(e) >= len) {
+			kref_get(&e->ref);
+
 			*ppc = page_address(e->page) + e->end;
+			if (cb)
+				cb(mh);
+
 			e->end += len;
 			e->users++;
 			spin_unlock(&con->writequeue_lock);
@@ -1437,21 +1479,28 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 	if (!e)
 		return NULL;
 
+	kref_get(&e->ref);
 	*ppc = page_address(e->page);
 	e->end += len;
 	atomic_inc(&con->writequeue_cnt);
 
 	spin_lock(&con->writequeue_lock);
+	if (cb)
+		cb(mh);
+
 	list_add_tail(&e->list, &con->writequeue);
 	spin_unlock(&con->writequeue_lock);
 
 	return e;
 };
 
-void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
+struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
+				     char **ppc, void (*cb)(struct dlm_mhandle *mh),
+				     struct dlm_mhandle *mh)
 {
 	struct writequeue_entry *e;
 	struct connection *con;
+	struct dlm_msg *msg;
 	int idx;
 
 	if (len > DEFAULT_BUFFER_SIZE ||
@@ -1469,25 +1518,41 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 		return NULL;
 	}
 
-	e = new_wq_entry(con, len, allocation, ppc);
-	if (!e) {
+	msg = kzalloc(sizeof(*msg), allocation);
+	if (!msg) {
 		srcu_read_unlock(&connections_srcu, idx);
 		return NULL;
 	}
 
-	/* we assume if successful commit must called */
-	e->idx = idx;
+	kref_init(&msg->ref);
 
-	return e;
+	e = new_wq_entry(con, len, allocation, ppc, cb, mh);
+	if (!e) {
+		srcu_read_unlock(&connections_srcu, idx);
+		kfree(msg);
+		return NULL;
+	}
+
+	msg->ppc = *ppc;
+	msg->len = len;
+	msg->entry = e;
+
+	/* we assume if successful commit must called */
+	msg->idx = idx;
+
+	return msg;
 }
 
-void dlm_lowcomms_commit_buffer(void *mh)
+void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
 {
-	struct writequeue_entry *e = (struct writequeue_entry *)mh;
+	struct writequeue_entry *e = msg->entry;
 	struct connection *con = e->con;
 	int users;
 
 	spin_lock(&con->writequeue_lock);
+	kref_get(&msg->ref);
+	list_add(&msg->list, &e->msgs);
+
 	users = --e->users;
 	if (users)
 		goto out;
@@ -1496,15 +1561,20 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	spin_unlock(&con->writequeue_lock);
 
 	queue_work(send_workqueue, &con->swork);
-	srcu_read_unlock(&connections_srcu, e->idx);
+	srcu_read_unlock(&connections_srcu, msg->idx);
 	return;
 
 out:
 	spin_unlock(&con->writequeue_lock);
-	srcu_read_unlock(&connections_srcu, e->idx);
+	srcu_read_unlock(&connections_srcu, msg->idx);
 	return;
 }
 
+void dlm_lowcomms_put_msg(struct dlm_msg *msg)
+{
+	kref_put(&msg->ref, dlm_msg_release);
+}
+
 /* Send a message */
 static void send_to_sock(struct connection *con)
 {
@@ -1590,7 +1660,6 @@ static void clean_one_writequeue(struct connection *con)
 
 	spin_lock(&con->writequeue_lock);
 	list_for_each_entry_safe(e, safe, &con->writequeue, list) {
-		list_del(&e->list);
 		free_entry(e);
 	}
 	spin_unlock(&con->writequeue_lock);
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 48bbc4e18761..cdb8f066f0d8 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -22,8 +22,11 @@ void dlm_lowcomms_shutdown(void);
 void dlm_lowcomms_stop(void);
 void dlm_lowcomms_exit(void);
 int dlm_lowcomms_close(int nodeid);
-void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
-void dlm_lowcomms_commit_buffer(void *mh);
+struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
+				     char **ppc, void (*cb)(struct dlm_mhandle *mh),
+				     struct dlm_mhandle *mh);
+void dlm_lowcomms_commit_msg(struct dlm_msg *msg);
+void dlm_lowcomms_put_msg(struct dlm_msg *msg);
 int dlm_lowcomms_connect_node(int nodeid);
 int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 1a280dda99d3..aadb3781bebe 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -31,12 +31,15 @@
 struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 					     gfp_t allocation, char **ppc)
 {
-	return dlm_lowcomms_get_buffer(nodeid, len, allocation, ppc);
+	return (struct dlm_mhandle *)dlm_lowcomms_new_msg(nodeid, len,
+							  allocation, ppc,
+							  NULL, NULL);
 }
 
 void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
 {
-	dlm_lowcomms_commit_buffer(mh);
+	dlm_lowcomms_commit_msg((struct dlm_msg *)mh);
+	dlm_lowcomms_put_msg((struct dlm_msg *)mh);
 }
 
 void dlm_midcomms_add_member(int nodeid) { }
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 2661674364af..6f653a339bea 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -70,21 +70,22 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 
 static int create_rcom_stateless(struct dlm_ls *ls, int to_nodeid, int type,
 				 int len, struct dlm_rcom **rc_ret,
-				 void **mh_ret)
+				 struct dlm_msg **msg_ret)
 {
 	int mb_len = sizeof(struct dlm_rcom) + len;
-	void *mh;
+	struct dlm_msg *msg;
 	char *mb;
 
-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
-	if (!mh) {
+	msg = dlm_lowcomms_new_msg(to_nodeid, mb_len, GFP_NOFS, &mb,
+				   NULL, NULL);
+	if (!msg) {
 		log_print("create_rcom to %d type %d len %d ENOBUFS",
 			  to_nodeid, type, len);
 		return -ENOBUFS;
 	}
 
 	_create_rcom(ls, to_nodeid, type, len, rc_ret, mb, mb_len);
-	*mh_ret = mh;
+	*msg_ret = msg;
 	return 0;
 }
 
@@ -100,11 +101,12 @@ static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
 	dlm_midcomms_commit_mhandle(mh);
 }
 
-static void send_rcom_stateless(struct dlm_ls *ls, void *mh,
+static void send_rcom_stateless(struct dlm_ls *ls, struct dlm_msg *msg,
 				struct dlm_rcom *rc)
 {
 	_send_rcom(ls, rc);
-	dlm_lowcomms_commit_buffer(mh);
+	dlm_lowcomms_commit_msg(msg);
+	dlm_lowcomms_put_msg(msg);
 }
 
 static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs,
@@ -180,8 +182,8 @@ static void disallow_sync_reply(struct dlm_ls *ls)
 int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 {
 	struct dlm_rcom *rc;
+	struct dlm_msg *msg;
 	int error = 0;
-	void *mh;
 
 	ls->ls_recover_nodeid = nodeid;
 
@@ -193,7 +195,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
 
 retry:
 	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS,
-				      sizeof(struct rcom_status), &rc, &mh);
+				      sizeof(struct rcom_status), &rc, &msg);
 	if (error)
 		goto out;
 
@@ -202,7 +204,7 @@ retry:
 	allow_sync_reply(ls, &rc->rc_id);
 	memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
 
-	send_rcom_stateless(ls, mh, rc);
+	send_rcom_stateless(ls, msg, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
 	disallow_sync_reply(ls);
@@ -234,9 +236,9 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	uint32_t status;
 	int nodeid = rc_in->rc_header.h_nodeid;
 	int len = sizeof(struct rcom_config);
+	struct dlm_msg *msg;
 	int num_slots = 0;
 	int error;
-	void *mh;
 
 	if (!dlm_slots_version(&rc_in->rc_header)) {
 		status = dlm_recover_status(ls);
@@ -258,7 +260,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
  do_create:
 	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_STATUS_REPLY,
-				      len, &rc, &mh);
+				      len, &rc, &msg);
 	if (error)
 		return;
 
@@ -285,7 +287,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	spin_unlock(&ls->ls_recover_lock);
 
  do_send:
-	send_rcom_stateless(ls, mh, rc);
+	send_rcom_stateless(ls, msg, rc);
 }
 
 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -310,14 +312,14 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 {
 	struct dlm_rcom *rc;
+	struct dlm_msg *msg;
 	int error = 0;
-	void *mh;
 
 	ls->ls_recover_nodeid = nodeid;
 
 retry:
 	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES, last_len,
-				      &rc, &mh);
+				      &rc, &msg);
 	if (error)
 		goto out;
 	memcpy(rc->rc_buf, last_name, last_len);
@@ -325,7 +327,7 @@ retry:
 	allow_sync_reply(ls, &rc->rc_id);
 	memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
 
-	send_rcom_stateless(ls, mh, rc);
+	send_rcom_stateless(ls, msg, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
 	disallow_sync_reply(ls);
@@ -339,14 +341,14 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
 	int error, inlen, outlen, nodeid;
-	void *mh;
+	struct dlm_msg *msg;
 
 	nodeid = rc_in->rc_header.h_nodeid;
 	inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
 	outlen = LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom);
 
 	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen,
-				      &rc, &mh);
+				      &rc, &msg);
 	if (error)
 		return;
 	rc->rc_id = rc_in->rc_id;
@@ -354,7 +356,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
 	dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
 			      nodeid);
-	send_rcom_stateless(ls, mh, rc);
+	send_rcom_stateless(ls, msg, rc);
 }
 
 int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)

From 2874d1a68c4ec5623a05c8118f5dbaefb30b37ff Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:43 -0400
Subject: [PATCH 0522/3292] fs: dlm: add functionality to re-transmit a message

This patch introduces a retransmit functionality for a lowcomms message
handle. It's just allocates a new buffer and transmit it again, no
special handling about prioritize it because keeping bytestream in order.

To avoid another connection look some refactor was done to make a new
buffer allocation with a preexisting connection pointer.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 85 ++++++++++++++++++++++++++++++++++++-----------
 fs/dlm/lowcomms.h |  1 +
 2 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index d222e6088ab2..df9827ec12f3 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -126,6 +126,8 @@ struct writequeue_entry {
 
 struct dlm_msg {
 	struct writequeue_entry *entry;
+	struct dlm_msg *orig_msg;
+	bool retransmit;
 	void *ppc;
 	int len;
 	int idx; /* new()/commit() idx exchange */
@@ -1055,6 +1057,10 @@ static void free_entry(struct writequeue_entry *e)
 	struct dlm_msg *msg, *tmp;
 
 	list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
+		if (msg->orig_msg) {
+			msg->orig_msg->retransmit = false;
+			kref_put(&msg->orig_msg->ref, dlm_msg_release);
+		}
 		list_del(&msg->list);
 		kref_put(&msg->ref, dlm_msg_release);
 	}
@@ -1494,11 +1500,37 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 	return e;
 };
 
+static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
+						gfp_t allocation, char **ppc,
+						void (*cb)(struct dlm_mhandle *mh),
+						struct dlm_mhandle *mh)
+{
+	struct writequeue_entry *e;
+	struct dlm_msg *msg;
+
+	msg = kzalloc(sizeof(*msg), allocation);
+	if (!msg)
+		return NULL;
+
+	kref_init(&msg->ref);
+
+	e = new_wq_entry(con, len, allocation, ppc, cb, mh);
+	if (!e) {
+		kfree(msg);
+		return NULL;
+	}
+
+	msg->ppc = *ppc;
+	msg->len = len;
+	msg->entry = e;
+
+	return msg;
+}
+
 struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
 				     char **ppc, void (*cb)(struct dlm_mhandle *mh),
 				     struct dlm_mhandle *mh)
 {
-	struct writequeue_entry *e;
 	struct connection *con;
 	struct dlm_msg *msg;
 	int idx;
@@ -1518,32 +1550,18 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
 		return NULL;
 	}
 
-	msg = kzalloc(sizeof(*msg), allocation);
+	msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, mh);
 	if (!msg) {
 		srcu_read_unlock(&connections_srcu, idx);
 		return NULL;
 	}
 
-	kref_init(&msg->ref);
-
-	e = new_wq_entry(con, len, allocation, ppc, cb, mh);
-	if (!e) {
-		srcu_read_unlock(&connections_srcu, idx);
-		kfree(msg);
-		return NULL;
-	}
-
-	msg->ppc = *ppc;
-	msg->len = len;
-	msg->entry = e;
-
 	/* we assume if successful commit must called */
 	msg->idx = idx;
-
 	return msg;
 }
 
-void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
+static void _dlm_lowcomms_commit_msg(struct dlm_msg *msg)
 {
 	struct writequeue_entry *e = msg->entry;
 	struct connection *con = e->con;
@@ -1561,20 +1579,49 @@ void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
 	spin_unlock(&con->writequeue_lock);
 
 	queue_work(send_workqueue, &con->swork);
-	srcu_read_unlock(&connections_srcu, msg->idx);
 	return;
 
 out:
 	spin_unlock(&con->writequeue_lock);
-	srcu_read_unlock(&connections_srcu, msg->idx);
 	return;
 }
 
+void dlm_lowcomms_commit_msg(struct dlm_msg *msg)
+{
+	_dlm_lowcomms_commit_msg(msg);
+	srcu_read_unlock(&connections_srcu, msg->idx);
+}
+
 void dlm_lowcomms_put_msg(struct dlm_msg *msg)
 {
 	kref_put(&msg->ref, dlm_msg_release);
 }
 
+/* does not held connections_srcu, usage workqueue only */
+int dlm_lowcomms_resend_msg(struct dlm_msg *msg)
+{
+	struct dlm_msg *msg_resend;
+	char *ppc;
+
+	if (msg->retransmit)
+		return 1;
+
+	msg_resend = dlm_lowcomms_new_msg_con(msg->entry->con, msg->len,
+					      GFP_ATOMIC, &ppc, NULL, NULL);
+	if (!msg_resend)
+		return -ENOMEM;
+
+	msg->retransmit = true;
+	kref_get(&msg->ref);
+	msg_resend->orig_msg = msg;
+
+	memcpy(ppc, msg->ppc, msg->len);
+	_dlm_lowcomms_commit_msg(msg_resend);
+	dlm_lowcomms_put_msg(msg_resend);
+
+	return 0;
+}
+
 /* Send a message */
 static void send_to_sock(struct connection *con)
 {
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index cdb8f066f0d8..a4384826442c 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -27,6 +27,7 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
 				     struct dlm_mhandle *mh);
 void dlm_lowcomms_commit_msg(struct dlm_msg *msg);
 void dlm_lowcomms_put_msg(struct dlm_msg *msg);
+int dlm_lowcomms_resend_msg(struct dlm_msg *msg);
 int dlm_lowcomms_connect_node(int nodeid);
 int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);

From 37a247da517f4315eed21585be8aa516e0b9cec9 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:44 -0400
Subject: [PATCH 0523/3292] fs: dlm: move out some hash functionality

This patch moves out some lowcomms hash functionality into lowcomms
header to provide them to other layers like midcomms as well.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c |  9 ---------
 fs/dlm/lowcomms.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index df9827ec12f3..1f2759cfda09 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -59,7 +59,6 @@
 #include "config.h"
 
 #define NEEDED_RMEM (4*1024*1024)
-#define CONN_HASH_SIZE 32
 
 /* Number of messages to send before rescheduling */
 #define MAX_SEND_MSG_COUNT 25
@@ -175,14 +174,6 @@ static void sctp_connect_to_sock(struct connection *con);
 static void tcp_connect_to_sock(struct connection *con);
 static void dlm_tcp_shutdown(struct connection *con);
 
-/* This is deliberately very simple because most clusters have simple
-   sequential nodeids, so we should be able to go straight to a connection
-   struct in the array */
-static inline int nodeid_hash(int nodeid)
-{
-	return nodeid & (CONN_HASH_SIZE-1);
-}
-
 static struct connection *__find_con(int nodeid, int r)
 {
 	struct connection *con;
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index a4384826442c..66dc1bb3de7f 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -13,6 +13,16 @@
 #define __LOWCOMMS_DOT_H__
 
 #define LOWCOMMS_MAX_TX_BUFFER_LEN	4096
+#define CONN_HASH_SIZE 32
+
+/* This is deliberately very simple because most clusters have simple
+ * sequential nodeids, so we should be able to go straight to a connection
+ * struct in the array
+ */
+static inline int nodeid_hash(int nodeid)
+{
+	return nodeid & (CONN_HASH_SIZE-1);
+}
 
 /* switch to check if dlm is running */
 extern int dlm_allow_conn;

From 8e2e40860c7f67c0b19b13d92cfea03a19232ce2 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:45 -0400
Subject: [PATCH 0524/3292] fs: dlm: add union in dlm header for lockspace id

This patch adds union inside the lockspace id to handle it also for
another use case for a different dlm command.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h | 5 ++++-
 fs/dlm/lock.c         | 8 ++++----
 fs/dlm/rcom.c         | 4 ++--
 fs/dlm/util.c         | 6 ++++--
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index e8dc5f4f1f9e..8f5980909d80 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -380,7 +380,10 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
 
 struct dlm_header {
 	uint32_t		h_version;
-	uint32_t		h_lockspace;
+	union {
+		/* for DLM_MSG and DLM_RCOM */
+		uint32_t	h_lockspace;
+	} u;
 	uint32_t		h_nodeid;	/* nodeid of sender */
 	uint16_t		h_length;
 	uint8_t			h_cmd;		/* DLM_MSG, DLM_RCOM */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index b625ce92464a..c502c065d007 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3544,7 +3544,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
 	ms = (struct dlm_message *) mb;
 
 	ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
-	ms->m_header.h_lockspace = ls->ls_global_id;
+	ms->m_header.u.h_lockspace = ls->ls_global_id;
 	ms->m_header.h_nodeid = dlm_our_nodeid();
 	ms->m_header.h_length = mb_len;
 	ms->m_header.h_cmd = DLM_MSG;
@@ -5038,16 +5038,16 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
 
 	if (hd->h_nodeid != nodeid) {
 		log_print("invalid h_nodeid %d from %d lockspace %x",
-			  hd->h_nodeid, nodeid, hd->h_lockspace);
+			  hd->h_nodeid, nodeid, hd->u.h_lockspace);
 		return;
 	}
 
-	ls = dlm_find_lockspace_global(hd->h_lockspace);
+	ls = dlm_find_lockspace_global(hd->u.h_lockspace);
 	if (!ls) {
 		if (dlm_config.ci_log_debug) {
 			printk_ratelimited(KERN_DEBUG "dlm: invalid lockspace "
 				"%u from %d cmd %d type %d\n",
-				hd->h_lockspace, nodeid, hd->h_cmd, type);
+				hd->u.h_lockspace, nodeid, hd->h_cmd, type);
 		}
 
 		if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6f653a339bea..7c1a06bcd418 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -35,7 +35,7 @@ static void _create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 	rc = (struct dlm_rcom *) mb;
 
 	rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
-	rc->rc_header.h_lockspace = ls->ls_global_id;
+	rc->rc_header.u.h_lockspace = ls->ls_global_id;
 	rc->rc_header.h_nodeid = dlm_our_nodeid();
 	rc->rc_header.h_length = mb_len;
 	rc->rc_header.h_cmd = DLM_RCOM;
@@ -508,7 +508,7 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	rc = (struct dlm_rcom *) mb;
 
 	rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
-	rc->rc_header.h_lockspace = rc_in->rc_header.h_lockspace;
+	rc->rc_header.u.h_lockspace = rc_in->rc_header.u.h_lockspace;
 	rc->rc_header.h_nodeid = dlm_our_nodeid();
 	rc->rc_header.h_length = mb_len;
 	rc->rc_header.h_cmd = DLM_RCOM;
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 74a8c5bfe9b5..58acbcc2081a 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -23,7 +23,8 @@
 void header_out(struct dlm_header *hd)
 {
 	hd->h_version		= cpu_to_le32(hd->h_version);
-	hd->h_lockspace		= cpu_to_le32(hd->h_lockspace);
+	/* does it for others u32 in union as well */
+	hd->u.h_lockspace	= cpu_to_le32(hd->u.h_lockspace);
 	hd->h_nodeid		= cpu_to_le32(hd->h_nodeid);
 	hd->h_length		= cpu_to_le16(hd->h_length);
 }
@@ -31,7 +32,8 @@ void header_out(struct dlm_header *hd)
 void header_in(struct dlm_header *hd)
 {
 	hd->h_version		= le32_to_cpu(hd->h_version);
-	hd->h_lockspace		= le32_to_cpu(hd->h_lockspace);
+	/* does it for others u32 in union as well */
+	hd->u.h_lockspace	= le32_to_cpu(hd->u.h_lockspace);
 	hd->h_nodeid		= le32_to_cpu(hd->h_nodeid);
 	hd->h_length		= le16_to_cpu(hd->h_length);
 }

From 489d8e559c6596eb08e16447d9830bc39afbe54e Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:46 -0400
Subject: [PATCH 0525/3292] fs: dlm: add reliable connection if reconnect

This patch introduce to make a tcp lowcomms connection reliable even if
reconnects occurs. This is done by an application layer re-transmission
handling and sequence numbers in dlm protocols. There are three new dlm
commands:

DLM_OPTS:

This will encapsulate an existing dlm message (and rcom message if they
don't have an own application side re-transmission handling). As optional
handling additional tlv's (type length fields) can be appended. This can
be for example a sequence number field. However because in DLM_OPTS the
lockspace field is unused and a sequence number is a mandatory field it
isn't made as a tlv and we put the sequence number inside the lockspace
id. The possibility to add optional options are still there for future
purposes.

DLM_ACK:

Just a dlm header to acknowledge the receive of a DLM_OPTS message to
it's sender.

DLM_FIN:

This provides a 4 way handshake for connection termination inclusive
support for half-closed connections. It's provided on application layer
because SCTP doesn't support half-closed sockets, the shutdown() call
can interrupted by e.g. TCP resets itself and a hard logic to implement
it because the othercon paradigm in lowcomms. The 4-way termination
handshake also solve problems to synchronize peer EOF arrival and that
the cluster manager removes the peer in the node membership handling of
DLM. In some cases messages can be still transmitted in this time and we
need to wait for the node membership event.

To provide a reliable connection the node will retransmit all
unacknowledges message to it's peer on reconnect. The receiver will then
filtering out the next received message and drop all messages which are
duplicates.

As RCOM_STATUS and RCOM_NAMES messages are the first messages which are
exchanged and they have they own re-transmission handling, there exists
logic that these messages must be first. If these messages arrives we
store the dlm version field. This handling is on DLM 3.1 and after this
patch 3.2 the same. A backwards compatibility handling has been added
which seems to work on tests without tcpkill, however it's not recommended
to use DLM 3.1 and 3.2 at the same time, because DLM 3.2 tries to fix long
term bugs in the DLM protocol.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h |   30 +-
 fs/dlm/lockspace.c    |    7 +-
 fs/dlm/lowcomms.c     |    4 +-
 fs/dlm/lowcomms.h     |    7 +-
 fs/dlm/midcomms.c     | 1294 +++++++++++++++++++++++++++++++++++++++--
 fs/dlm/midcomms.h     |    1 +
 fs/dlm/rcom.c         |    4 +-
 7 files changed, 1294 insertions(+), 53 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 8f5980909d80..40917c878370 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -371,18 +371,26 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
 /* dlm_header is first element of all structs sent between nodes */
 
 #define DLM_HEADER_MAJOR	0x00030000
-#define DLM_HEADER_MINOR	0x00000001
+#define DLM_HEADER_MINOR	0x00000002
+
+#define DLM_VERSION_3_1		0x00030001
+#define DLM_VERSION_3_2		0x00030002
 
 #define DLM_HEADER_SLOTS	0x00000001
 
 #define DLM_MSG			1
 #define DLM_RCOM		2
+#define DLM_OPTS		3
+#define DLM_ACK			4
+#define DLM_FIN			5
 
 struct dlm_header {
 	uint32_t		h_version;
 	union {
 		/* for DLM_MSG and DLM_RCOM */
 		uint32_t	h_lockspace;
+		/* for DLM_ACK and DLM_OPTS */
+		uint32_t	h_seq;
 	} u;
 	uint32_t		h_nodeid;	/* nodeid of sender */
 	uint16_t		h_length;
@@ -390,7 +398,6 @@ struct dlm_header {
 	uint8_t			h_pad;
 };
 
-
 #define DLM_MSG_REQUEST		1
 #define DLM_MSG_CONVERT		2
 #define DLM_MSG_UNLOCK		3
@@ -458,10 +465,29 @@ struct dlm_rcom {
 	char			rc_buf[];
 };
 
+struct dlm_opt_header {
+	uint16_t	t_type;
+	uint16_t	t_length;
+	uint32_t	o_pad;
+	/* need to be 8 byte aligned */
+	char		t_value[];
+};
+
+/* encapsulation header */
+struct dlm_opts {
+	struct dlm_header	o_header;
+	uint8_t			o_nextcmd;
+	uint8_t			o_pad;
+	uint16_t		o_optlen;
+	uint32_t		o_pad2;
+	char			o_opts[];
+};
+
 union dlm_packet {
 	struct dlm_header	header;		/* common to other two */
 	struct dlm_message	message;
 	struct dlm_rcom		rcom;
+	struct dlm_opts		opts;
 };
 
 #define DLM_RSF_NEED_SLOTS	0x00000001
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index bf5c55ef9d0d..2b738be8d7e4 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -567,7 +567,12 @@ static int new_lockspace(const char *name, const char *cluster,
 	mutex_init(&ls->ls_requestqueue_mutex);
 	mutex_init(&ls->ls_clear_proc_locks);
 
-	ls->ls_recover_buf = kmalloc(LOWCOMMS_MAX_TX_BUFFER_LEN, GFP_NOFS);
+	/* Due backwards compatibility with 3.1 we need to use maximum
+	 * possible dlm message size to be sure the message will fit and
+	 * not having out of bounds issues. However on sending side 3.2
+	 * might send less.
+	 */
+	ls->ls_recover_buf = kmalloc(DEFAULT_BUFFER_SIZE, GFP_NOFS);
 	if (!ls->ls_recover_buf)
 		goto out_lkbidr;
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 1f2759cfda09..fe9113bd5ba0 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1762,8 +1762,10 @@ static void process_send_sockets(struct work_struct *work)
 
 	clear_bit(CF_WRITE_PENDING, &con->flags);
 
-	if (test_and_clear_bit(CF_RECONNECT, &con->flags))
+	if (test_and_clear_bit(CF_RECONNECT, &con->flags)) {
 		close_connection(con, false, false, true);
+		dlm_midcomms_unack_msg_resend(con->nodeid);
+	}
 
 	if (con->sock == NULL) { /* not mutex protected so check it inside too */
 		if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 66dc1bb3de7f..730c34317183 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -12,7 +12,12 @@
 #ifndef __LOWCOMMS_DOT_H__
 #define __LOWCOMMS_DOT_H__
 
-#define LOWCOMMS_MAX_TX_BUFFER_LEN	4096
+#include "dlm_internal.h"
+
+#define DLM_MIDCOMMS_OPT_LEN		sizeof(struct dlm_opts)
+#define LOWCOMMS_MAX_TX_BUFFER_LEN	(DEFAULT_BUFFER_SIZE - \
+					 DLM_MIDCOMMS_OPT_LEN)
+
 #define CONN_HASH_SIZE 32
 
 /* This is deliberately very simple because most clusters have simple
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index aadb3781bebe..eef3938a363e 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -3,7 +3,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2021 Red Hat, Inc.  All rights reserved.
 **
 **
 *******************************************************************************
@@ -12,53 +12,817 @@
 /*
  * midcomms.c
  *
- * This is the appallingly named "mid-level" comms layer.
+ * This is the appallingly named "mid-level" comms layer. It takes care about
+ * deliver an on application layer "reliable" communication above the used
+ * lowcomms transport layer.
  *
- * Its purpose is to take packets from the "real" comms layer,
- * split them up into packets and pass them to the interested
- * part of the locking mechanism.
+ * How it works:
  *
- * It also takes messages from the locking layer, formats them
- * into packets and sends them to the comms layer.
+ * Each nodes keeps track of all send DLM messages in send_queue with a sequence
+ * number. The receive will send an DLM_ACK message back for every DLM message
+ * received at the other side. If a reconnect happens in lowcomms we will send
+ * all unacknowledged dlm messages again. The receiving side might drop any already
+ * received message by comparing sequence numbers.
+ *
+ * How version detection works:
+ *
+ * Due the fact that dlm has pre-configured node addresses on every side
+ * it is in it's nature that every side connects at starts to transmit
+ * dlm messages which ends in a race. However DLM_RCOM_NAMES, DLM_RCOM_STATUS
+ * and their replies are the first messages which are exchanges. Due backwards
+ * compatibility these messages are not covered by the midcomms re-transmission
+ * layer. These messages have their own re-transmission handling in the dlm
+ * application layer. The version field of every node will be set on these RCOM
+ * messages as soon as they arrived and the node isn't yet part of the nodes
+ * hash. There exists also logic to detect version mismatched if something weird
+ * going on or the first messages isn't an expected one.
+ *
+ * Termination:
+ *
+ * The midcomms layer does a 4 way handshake for termination on DLM protocol
+ * like TCP supports it with half-closed socket support. SCTP doesn't support
+ * half-closed socket, so we do it on DLM layer. Also socket shutdown() can be
+ * interrupted by .e.g. tcp reset itself. Additional there exists the othercon
+ * paradigm in lowcomms which cannot be easily without breaking backwards
+ * compatibility. A node cannot send anything to another node when a DLM_FIN
+ * message was send. There exists additional logic to print a warning if
+ * DLM wants to do it. There exists a state handling like RFC 793 but reduced
+ * to termination only. The event "member removal event" describes the cluster
+ * manager removed the node from internal lists, at this point DLM does not
+ * send any message to the other node. There exists two cases:
+ *
+ * 1. The cluster member was removed and we received a FIN
+ * OR
+ * 2. We received a FIN but the member was not removed yet
+ *
+ * One of these cases will do the CLOSE_WAIT to LAST_ACK change.
+ *
+ *
+ *                              +---------+
+ *                              | CLOSED  |
+ *                              +---------+
+ *                                   | add member/receive RCOM version
+ *                                   |            detection msg
+ *                                   V
+ *                              +---------+
+ *                              |  ESTAB  |
+ *                              +---------+
+ *                       CLOSE    |     |    rcv FIN
+ *                      -------   |     |    -------
+ * +---------+          snd FIN  /       \   snd ACK          +---------+
+ * |  FIN    |<-----------------           ------------------>|  CLOSE  |
+ * | WAIT-1  |------------------                              |   WAIT  |
+ * +---------+          rcv FIN  \                            +---------+
+ * | rcv ACK of FIN   -------   |                            CLOSE  | member
+ * | --------------   snd ACK   |                           ------- | removal
+ * V        x                   V                           snd FIN V event
+ * +---------+                  +---------+                   +---------+
+ * |FINWAIT-2|                  | CLOSING |                   | LAST-ACK|
+ * +---------+                  +---------+                   +---------+
+ * |                rcv ACK of FIN |                 rcv ACK of FIN |
+ * |  rcv FIN       -------------- |                 -------------- |
+ * |  -------              x       V                        x       V
+ *  \ snd ACK                 +---------+                   +---------+
+ *   ------------------------>| CLOSED  |                   | CLOSED  |
+ *                            +---------+                   +---------+
+ *
+ * NOTE: any state can interrupted by midcomms_close() and state will be
+ * switched to CLOSED in case of fencing. There exists also some timeout
+ * handling when we receive the version detection RCOM messages which is
+ * made by observation.
+ *
+ * Future improvements:
+ *
+ * There exists some known issues/improvements of the dlm handling. Some
+ * of them should be done in a next major dlm version bump which makes
+ * it incompatible with previous versions.
+ *
+ * Unaligned memory access:
+ *
+ * There exists cases when the dlm message buffer length is not aligned
+ * to 8 byte. However seems nobody detected any problem with it. This
+ * can be fixed in the next major version bump of dlm.
+ *
+ * Version detection:
+ *
+ * The version detection and how it's done is related to backwards
+ * compatibility. There exists better ways to make a better handling.
+ * However this should be changed in the next major version bump of dlm.
+ *
+ * Ack handling:
+ *
+ * Currently we send an ack message for every dlm message. However we
+ * can ack multiple dlm messages with one ack by just delaying the ack
+ * message. Will reduce some traffic but makes the drop detection slower.
+ *
+ * Tail Size checking:
+ *
+ * There exists a message tail payload in e.g. DLM_MSG however we don't
+ * check it against the message length yet regarding to the receive buffer
+ * length. That need to be validated.
+ *
+ * Fencing bad nodes:
+ *
+ * At timeout places or weird sequence number behaviours we should send
+ * a fencing request to the cluster manager.
  */
 
+/* Debug switch to enable a 5 seconds sleep waiting of a termination.
+ * This can be useful to test fencing while termination is running.
+ * This requires a setup with only gfs2 as dlm user, so that the
+ * last umount will terminate the connection.
+ *
+ * However it became useful to test, while the 5 seconds block in umount
+ * just press the reset button. In a lot of dropping the termination
+ * process can could take several seconds.
+ */
+#define DLM_DEBUG_FENCE_TERMINATION	0
+
+#include <net/tcp.h>
+
 #include "dlm_internal.h"
 #include "lowcomms.h"
 #include "config.h"
 #include "lock.h"
+#include "util.h"
 #include "midcomms.h"
 
-struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
-					     gfp_t allocation, char **ppc)
+/* init value for sequence numbers for testing purpose only e.g. overflows */
+#define DLM_SEQ_INIT		0
+/* 3 minutes wait to sync ending of dlm */
+#define DLM_SHUTDOWN_TIMEOUT	msecs_to_jiffies(3 * 60 * 1000)
+#define DLM_VERSION_NOT_SET	0
+
+struct midcomms_node {
+	int nodeid;
+	uint32_t version;
+	uint32_t seq_send;
+	uint32_t seq_next;
+	/* These queues are unbound because we cannot drop any message in dlm.
+	 * We could send a fence signal for a specific node to the cluster
+	 * manager if queues hits some maximum value, however this handling
+	 * not supported yet.
+	 */
+	struct list_head send_queue;
+	spinlock_t send_queue_lock;
+	atomic_t send_queue_cnt;
+#define DLM_NODE_FLAG_CLOSE	1
+#define DLM_NODE_FLAG_STOP_TX	2
+#define DLM_NODE_FLAG_STOP_RX	3
+	unsigned long flags;
+	wait_queue_head_t shutdown_wait;
+
+	/* dlm tcp termination state */
+#define DLM_CLOSED	1
+#define DLM_ESTABLISHED	2
+#define DLM_FIN_WAIT1	3
+#define DLM_FIN_WAIT2	4
+#define DLM_CLOSE_WAIT	5
+#define DLM_LAST_ACK	6
+#define DLM_CLOSING	7
+	int state;
+	spinlock_t state_lock;
+
+	/* counts how many lockspaces are using this node
+	 * this refcount is necessary to determine if the
+	 * node wants to disconnect.
+	 */
+	int users;
+
+	struct hlist_node hlist;
+	struct rcu_head rcu;
+};
+
+struct dlm_mhandle {
+	const struct dlm_header *inner_hd;
+	struct midcomms_node *node;
+	struct dlm_opts *opts;
+	struct dlm_msg *msg;
+	bool committed;
+	uint32_t seq;
+
+	void (*ack_rcv)(struct midcomms_node *node);
+
+	/* get_mhandle/commit srcu idx exchange */
+	int idx;
+
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+static struct hlist_head node_hash[CONN_HASH_SIZE];
+static DEFINE_SPINLOCK(nodes_lock);
+DEFINE_STATIC_SRCU(nodes_srcu);
+
+/* This mutex prevents that midcomms_close() is running while
+ * stop() or remove(). As I experienced invalid memory access
+ * behaviours when DLM_DEBUG_FENCE_TERMINATION is enabled and
+ * resetting machines. I will end in some double deletion in nodes
+ * datastructure.
+ */
+static DEFINE_MUTEX(close_lock);
+
+static inline const char *dlm_state_str(int state)
 {
-	return (struct dlm_mhandle *)dlm_lowcomms_new_msg(nodeid, len,
-							  allocation, ppc,
-							  NULL, NULL);
+	switch (state) {
+	case DLM_CLOSED:
+		return "CLOSED";
+	case DLM_ESTABLISHED:
+		return "ESTABLISHED";
+	case DLM_FIN_WAIT1:
+		return "FIN_WAIT1";
+	case DLM_FIN_WAIT2:
+		return "FIN_WAIT2";
+	case DLM_CLOSE_WAIT:
+		return "CLOSE_WAIT";
+	case DLM_LAST_ACK:
+		return "LAST_ACK";
+	case DLM_CLOSING:
+		return "CLOSING";
+	default:
+		return "UNKNOWN";
+	}
 }
 
-void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
+static struct midcomms_node *__find_node(int nodeid, int r)
 {
-	dlm_lowcomms_commit_msg((struct dlm_msg *)mh);
-	dlm_lowcomms_put_msg((struct dlm_msg *)mh);
+	struct midcomms_node *node;
+
+	hlist_for_each_entry_rcu(node, &node_hash[r], hlist) {
+		if (node->nodeid == nodeid)
+			return node;
+	}
+
+	return NULL;
 }
 
-void dlm_midcomms_add_member(int nodeid) { }
-
-void dlm_midcomms_remove_member(int nodeid) { }
-
-int dlm_midcomms_start(void)
+static void dlm_mhandle_release(struct rcu_head *rcu)
 {
-	return dlm_lowcomms_start();
+	struct dlm_mhandle *mh = container_of(rcu, struct dlm_mhandle, rcu);
+
+	dlm_lowcomms_put_msg(mh->msg);
+	kfree(mh);
 }
 
-void dlm_midcomms_shutdown(void)
+static void dlm_send_queue_flush(struct midcomms_node *node)
 {
-	dlm_lowcomms_shutdown();
+	struct dlm_mhandle *mh;
+
+	pr_debug("flush midcomms send queue of node %d\n", node->nodeid);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(mh, &node->send_queue, list) {
+		spin_lock(&node->send_queue_lock);
+		list_del_rcu(&mh->list);
+		spin_unlock(&node->send_queue_lock);
+
+		atomic_dec(&node->send_queue_cnt);
+
+		call_rcu(&mh->rcu, dlm_mhandle_release);
+	}
+	rcu_read_unlock();
 }
 
-int dlm_midcomms_close(int nodeid)
+static void midcomms_node_reset(struct midcomms_node *node)
 {
-	return dlm_lowcomms_close(nodeid);
+	pr_debug("reset node %d\n", node->nodeid);
+
+	node->seq_next = DLM_SEQ_INIT;
+	node->seq_send = DLM_SEQ_INIT;
+	node->version = DLM_VERSION_NOT_SET;
+	node->flags = 0;
+
+	dlm_send_queue_flush(node);
+	node->state = DLM_CLOSED;
+	wake_up(&node->shutdown_wait);
+}
+
+static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
+{
+	struct midcomms_node *node, *tmp;
+	int r = nodeid_hash(nodeid);
+
+	node = __find_node(nodeid, r);
+	if (node || !alloc)
+		return node;
+
+	node = kmalloc(sizeof(*node), alloc);
+	if (!node)
+		return NULL;
+
+	node->nodeid = nodeid;
+	spin_lock_init(&node->state_lock);
+	spin_lock_init(&node->send_queue_lock);
+	atomic_set(&node->send_queue_cnt, 0);
+	INIT_LIST_HEAD(&node->send_queue);
+	init_waitqueue_head(&node->shutdown_wait);
+	node->users = 0;
+	midcomms_node_reset(node);
+
+	spin_lock(&nodes_lock);
+	/* check again if there was somebody else
+	 * earlier here to add the node
+	 */
+	tmp = __find_node(nodeid, r);
+	if (tmp) {
+		spin_unlock(&nodes_lock);
+		kfree(node);
+		return tmp;
+	}
+
+	hlist_add_head_rcu(&node->hlist, &node_hash[r]);
+	spin_unlock(&nodes_lock);
+	return node;
+}
+
+static int dlm_send_ack(int nodeid, uint32_t seq)
+{
+	int mb_len = sizeof(struct dlm_header);
+	struct dlm_header *m_header;
+	struct dlm_msg *msg;
+	char *ppc;
+
+	msg = dlm_lowcomms_new_msg(nodeid, mb_len, GFP_NOFS, &ppc,
+				   NULL, NULL);
+	if (!msg)
+		return -ENOMEM;
+
+	m_header = (struct dlm_header *)ppc;
+
+	m_header->h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+	m_header->h_nodeid = dlm_our_nodeid();
+	m_header->h_length = mb_len;
+	m_header->h_cmd = DLM_ACK;
+	m_header->u.h_seq = seq;
+
+	header_out(m_header);
+	dlm_lowcomms_commit_msg(msg);
+	dlm_lowcomms_put_msg(msg);
+
+	return 0;
+}
+
+static int dlm_send_fin(struct midcomms_node *node,
+			void (*ack_rcv)(struct midcomms_node *node))
+{
+	int mb_len = sizeof(struct dlm_header);
+	struct dlm_header *m_header;
+	struct dlm_mhandle *mh;
+	char *ppc;
+
+	mh = dlm_midcomms_get_mhandle(node->nodeid, mb_len, GFP_NOFS, &ppc);
+	if (!mh)
+		return -ENOMEM;
+
+	mh->ack_rcv = ack_rcv;
+
+	m_header = (struct dlm_header *)ppc;
+
+	m_header->h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+	m_header->h_nodeid = dlm_our_nodeid();
+	m_header->h_length = mb_len;
+	m_header->h_cmd = DLM_FIN;
+
+	header_out(m_header);
+
+	pr_debug("sending fin msg to node %d\n", node->nodeid);
+	dlm_midcomms_commit_mhandle(mh);
+	set_bit(DLM_NODE_FLAG_STOP_TX, &node->flags);
+
+	return 0;
+}
+
+static void dlm_receive_ack(struct midcomms_node *node, uint32_t seq)
+{
+	struct dlm_mhandle *mh;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(mh, &node->send_queue, list) {
+		if (before(mh->seq, seq)) {
+			spin_lock(&node->send_queue_lock);
+			list_del_rcu(&mh->list);
+			spin_unlock(&node->send_queue_lock);
+
+			atomic_dec(&node->send_queue_cnt);
+
+			if (mh->ack_rcv)
+				mh->ack_rcv(node);
+
+			call_rcu(&mh->rcu, dlm_mhandle_release);
+		} else {
+			/* send queue should be ordered */
+			break;
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void dlm_pas_fin_ack_rcv(struct midcomms_node *node)
+{
+	spin_lock(&node->state_lock);
+	pr_debug("receive passive fin ack from node %d with state %s\n",
+		 node->nodeid, dlm_state_str(node->state));
+
+	switch (node->state) {
+	case DLM_LAST_ACK:
+		/* DLM_CLOSED */
+		midcomms_node_reset(node);
+		break;
+	case DLM_CLOSED:
+		/* not valid but somehow we got what we want */
+		wake_up(&node->shutdown_wait);
+		break;
+	default:
+		spin_unlock(&node->state_lock);
+		log_print("%s: unexpected state: %d\n",
+			  __func__, node->state);
+		WARN_ON(1);
+		return;
+	}
+	spin_unlock(&node->state_lock);
+}
+
+static void dlm_midcomms_receive_buffer(union dlm_packet *p,
+					struct midcomms_node *node,
+					uint32_t seq)
+{
+	if (seq == node->seq_next) {
+		node->seq_next++;
+		/* send ack before fin */
+		dlm_send_ack(node->nodeid, node->seq_next);
+
+		switch (p->header.h_cmd) {
+		case DLM_FIN:
+			spin_lock(&node->state_lock);
+			pr_debug("receive fin msg from node %d with state %s\n",
+				 node->nodeid, dlm_state_str(node->state));
+
+			switch (node->state) {
+			case DLM_ESTABLISHED:
+				node->state = DLM_CLOSE_WAIT;
+				pr_debug("switch node %d to state %s\n",
+					 node->nodeid, dlm_state_str(node->state));
+				/* passive shutdown DLM_LAST_ACK case 1
+				 * additional we check if the node is used by
+				 * cluster manager events at all.
+				 */
+				if (node->users == 0) {
+					node->state = DLM_LAST_ACK;
+					pr_debug("switch node %d to state %s case 1\n",
+						 node->nodeid, dlm_state_str(node->state));
+					spin_unlock(&node->state_lock);
+					goto send_fin;
+				}
+				break;
+			case DLM_FIN_WAIT1:
+				node->state = DLM_CLOSING;
+				pr_debug("switch node %d to state %s\n",
+					 node->nodeid, dlm_state_str(node->state));
+				break;
+			case DLM_FIN_WAIT2:
+				midcomms_node_reset(node);
+				pr_debug("switch node %d to state %s\n",
+					 node->nodeid, dlm_state_str(node->state));
+				wake_up(&node->shutdown_wait);
+				break;
+			case DLM_LAST_ACK:
+				/* probably remove_member caught it, do nothing */
+				break;
+			default:
+				spin_unlock(&node->state_lock);
+				log_print("%s: unexpected state: %d\n",
+					  __func__, node->state);
+				WARN_ON(1);
+				return;
+			}
+			spin_unlock(&node->state_lock);
+
+			set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+			break;
+		default:
+			WARN_ON(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
+			dlm_receive_buffer(p, node->nodeid);
+			break;
+		}
+	} else {
+		/* retry to ack message which we already have by sending back
+		 * current node->seq_next number as ack.
+		 */
+		if (seq < node->seq_next)
+			dlm_send_ack(node->nodeid, node->seq_next);
+
+		log_print_ratelimited("ignore dlm msg because seq mismatch, seq: %u, expected: %u, nodeid: %d",
+				      seq, node->seq_next, node->nodeid);
+	}
+
+	return;
+
+send_fin:
+	set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+	dlm_send_fin(node, dlm_pas_fin_ack_rcv);
+}
+
+static struct midcomms_node *
+dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
+			      uint16_t msglen, int (*cb)(struct midcomms_node *node))
+{
+	struct midcomms_node *node = NULL;
+	gfp_t allocation = 0;
+	int ret;
+
+	switch (p->header.h_cmd) {
+	case DLM_RCOM:
+		if (msglen < sizeof(struct dlm_rcom)) {
+			log_print("rcom msg too small: %u, will skip this message from node %d",
+				  msglen, nodeid);
+			return NULL;
+		}
+
+		switch (le32_to_cpu(p->rcom.rc_type)) {
+		case DLM_RCOM_NAMES:
+			fallthrough;
+		case DLM_RCOM_NAMES_REPLY:
+			fallthrough;
+		case DLM_RCOM_STATUS:
+			fallthrough;
+		case DLM_RCOM_STATUS_REPLY:
+			node = nodeid2node(nodeid, 0);
+			if (node) {
+				spin_lock(&node->state_lock);
+				if (node->state != DLM_ESTABLISHED)
+					pr_debug("receive begin RCOM msg from node %d with state %s\n",
+						 node->nodeid, dlm_state_str(node->state));
+
+				switch (node->state) {
+				case DLM_CLOSED:
+					node->state = DLM_ESTABLISHED;
+					pr_debug("switch node %d to state %s\n",
+						 node->nodeid, dlm_state_str(node->state));
+					break;
+				case DLM_ESTABLISHED:
+					break;
+				default:
+					/* some invalid state passive shutdown
+					 * was failed, we try to reset and
+					 * hope it will go on.
+					 */
+					log_print("reset node %d because shutdown stucked",
+						  node->nodeid);
+
+					midcomms_node_reset(node);
+					node->state = DLM_ESTABLISHED;
+					break;
+				}
+				spin_unlock(&node->state_lock);
+			}
+
+			allocation = GFP_NOFS;
+			break;
+		default:
+			break;
+		}
+
+		break;
+	default:
+		break;
+	}
+
+	node = nodeid2node(nodeid, allocation);
+	if (!node) {
+		log_print_ratelimited("received dlm message cmd %d nextcmd %d from node %d in an invalid sequence",
+				      p->header.h_cmd, p->opts.o_nextcmd, nodeid);
+		return NULL;
+	}
+
+	ret = cb(node);
+	if (ret < 0)
+		return NULL;
+
+	return node;
+}
+
+static int dlm_midcomms_version_check_3_2(struct midcomms_node *node)
+{
+	switch (node->version) {
+	case DLM_VERSION_NOT_SET:
+		node->version = DLM_VERSION_3_2;
+		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_2,
+			  node->nodeid);
+		break;
+	case DLM_VERSION_3_2:
+		break;
+	default:
+		log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
+				      DLM_VERSION_3_2, node->nodeid, node->version);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int dlm_opts_check_msglen(union dlm_packet *p, uint16_t msglen, int nodeid)
+{
+	int len = msglen;
+
+	/* we only trust outer header msglen because
+	 * it's checked against receive buffer length.
+	 */
+	if (len < sizeof(struct dlm_opts))
+		return -1;
+	len -= sizeof(struct dlm_opts);
+
+	if (len < le16_to_cpu(p->opts.o_optlen))
+		return -1;
+	len -= le16_to_cpu(p->opts.o_optlen);
+
+	switch (p->opts.o_nextcmd) {
+	case DLM_FIN:
+		if (len < sizeof(struct dlm_header)) {
+			log_print("fin too small: %d, will skip this message from node %d",
+				  len, nodeid);
+			return -1;
+		}
+
+		break;
+	case DLM_MSG:
+		if (len < sizeof(struct dlm_message)) {
+			log_print("msg too small: %d, will skip this message from node %d",
+				  msglen, nodeid);
+			return -1;
+		}
+
+		break;
+	case DLM_RCOM:
+		if (len < sizeof(struct dlm_rcom)) {
+			log_print("rcom msg too small: %d, will skip this message from node %d",
+				  len, nodeid);
+			return -1;
+		}
+
+		break;
+	default:
+		log_print("unsupported o_nextcmd received: %u, will skip this message from node %d",
+			  p->opts.o_nextcmd, nodeid);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void dlm_midcomms_receive_buffer_3_2(union dlm_packet *p, int nodeid)
+{
+	uint16_t msglen = le16_to_cpu(p->header.h_length);
+	struct midcomms_node *node;
+	uint32_t seq;
+	int ret, idx;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
+					     dlm_midcomms_version_check_3_2);
+	if (!node)
+		goto out;
+
+	switch (p->header.h_cmd) {
+	case DLM_RCOM:
+		/* these rcom message we use to determine version.
+		 * they have their own retransmission handling and
+		 * are the first messages of dlm.
+		 *
+		 * length already checked.
+		 */
+		switch (le32_to_cpu(p->rcom.rc_type)) {
+		case DLM_RCOM_NAMES:
+			fallthrough;
+		case DLM_RCOM_NAMES_REPLY:
+			fallthrough;
+		case DLM_RCOM_STATUS:
+			fallthrough;
+		case DLM_RCOM_STATUS_REPLY:
+			break;
+		default:
+			log_print("unsupported rcom type received: %u, will skip this message from node %d",
+				  le32_to_cpu(p->rcom.rc_type), nodeid);
+			goto out;
+		}
+
+		WARN_ON(test_bit(DLM_NODE_FLAG_STOP_RX, &node->flags));
+		dlm_receive_buffer(p, nodeid);
+		break;
+	case DLM_OPTS:
+		seq = le32_to_cpu(p->header.u.h_seq);
+
+		ret = dlm_opts_check_msglen(p, msglen, nodeid);
+		if (ret < 0) {
+			log_print("opts msg too small: %u, will skip this message from node %d",
+				  msglen, nodeid);
+			goto out;
+		}
+
+		p = (union dlm_packet *)((unsigned char *)p->opts.o_opts +
+					 le16_to_cpu(p->opts.o_optlen));
+
+		/* recheck inner msglen just if it's not garbage */
+		msglen = le16_to_cpu(p->header.h_length);
+		switch (p->header.h_cmd) {
+		case DLM_RCOM:
+			if (msglen < sizeof(struct dlm_rcom)) {
+				log_print("inner rcom msg too small: %u, will skip this message from node %d",
+					  msglen, nodeid);
+				goto out;
+			}
+
+			break;
+		case DLM_MSG:
+			if (msglen < sizeof(struct dlm_message)) {
+				log_print("inner msg too small: %u, will skip this message from node %d",
+					  msglen, nodeid);
+				goto out;
+			}
+
+			break;
+		case DLM_FIN:
+			if (msglen < sizeof(struct dlm_header)) {
+				log_print("inner fin too small: %u, will skip this message from node %d",
+					  msglen, nodeid);
+				goto out;
+			}
+
+			break;
+		default:
+			log_print("unsupported inner h_cmd received: %u, will skip this message from node %d",
+				  msglen, nodeid);
+			goto out;
+		}
+
+		dlm_midcomms_receive_buffer(p, node, seq);
+		break;
+	case DLM_ACK:
+		seq = le32_to_cpu(p->header.u.h_seq);
+		dlm_receive_ack(node, seq);
+		break;
+	default:
+		log_print("unsupported h_cmd received: %u, will skip this message from node %d",
+			  p->header.h_cmd, nodeid);
+		break;
+	}
+
+out:
+	srcu_read_unlock(&nodes_srcu, idx);
+}
+
+static int dlm_midcomms_version_check_3_1(struct midcomms_node *node)
+{
+	switch (node->version) {
+	case DLM_VERSION_NOT_SET:
+		node->version = DLM_VERSION_3_1;
+		log_print("version 0x%08x for node %d detected", DLM_VERSION_3_1,
+			  node->nodeid);
+		break;
+	case DLM_VERSION_3_1:
+		break;
+	default:
+		log_print_ratelimited("version mismatch detected, assumed 0x%08x but node %d has 0x%08x",
+				      DLM_VERSION_3_1, node->nodeid, node->version);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void dlm_midcomms_receive_buffer_3_1(union dlm_packet *p, int nodeid)
+{
+	uint16_t msglen = le16_to_cpu(p->header.h_length);
+	struct midcomms_node *node;
+	int idx;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = dlm_midcomms_recv_node_lookup(nodeid, p, msglen,
+					     dlm_midcomms_version_check_3_1);
+	if (!node) {
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+	srcu_read_unlock(&nodes_srcu, idx);
+
+	switch (p->header.h_cmd) {
+	case DLM_RCOM:
+		/* length already checked */
+		break;
+	case DLM_MSG:
+		if (msglen < sizeof(struct dlm_message)) {
+			log_print("msg too small: %u, will skip this message from node %d",
+				  msglen, nodeid);
+			return;
+		}
+
+		break;
+	default:
+		log_print("unsupported h_cmd received: %u, will skip this message from node %d",
+			  p->header.h_cmd, nodeid);
+		return;
+	}
+
+	dlm_receive_buffer(p, nodeid);
 }
 
 /*
@@ -101,32 +865,19 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
 		if (msglen > len)
 			break;
 
-		switch (hd->h_cmd) {
-		case DLM_MSG:
-			if (msglen < sizeof(struct dlm_message)) {
-				log_print("dlm msg too small: %u, will skip this message",
-					  msglen);
-				goto skip;
-			}
-
+		switch (le32_to_cpu(hd->h_version)) {
+		case DLM_VERSION_3_1:
+			dlm_midcomms_receive_buffer_3_1((union dlm_packet *)ptr, nodeid);
 			break;
-		case DLM_RCOM:
-			if (msglen < sizeof(struct dlm_rcom)) {
-				log_print("dlm rcom msg too small: %u, will skip this message",
-					  msglen);
-				goto skip;
-			}
-
+		case DLM_VERSION_3_2:
+			dlm_midcomms_receive_buffer_3_2((union dlm_packet *)ptr, nodeid);
 			break;
 		default:
-			log_print("unsupported h_cmd received: %u, will skip this message",
-				  hd->h_cmd);
-			goto skip;
+			log_print("received invalid version header: %u from node %d, will skip this message",
+				  le32_to_cpu(hd->h_version), nodeid);
+			break;
 		}
 
-		dlm_receive_buffer((union dlm_packet *)ptr, nodeid);
-
-skip:
 		ret += msglen;
 		len -= msglen;
 		ptr += msglen;
@@ -134,3 +885,454 @@ skip:
 
 	return ret;
 }
+
+void dlm_midcomms_unack_msg_resend(int nodeid)
+{
+	struct midcomms_node *node;
+	struct dlm_mhandle *mh;
+	int idx, ret;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = nodeid2node(nodeid, 0);
+	if (!node) {
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
+	/* old protocol, we don't support to retransmit on failure */
+	switch (node->version) {
+	case DLM_VERSION_3_2:
+		break;
+	default:
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(mh, &node->send_queue, list) {
+		if (!mh->committed)
+			continue;
+
+		ret = dlm_lowcomms_resend_msg(mh->msg);
+		if (!ret)
+			log_print_ratelimited("retransmit dlm msg, seq %u, nodeid %d",
+					      mh->seq, node->nodeid);
+	}
+	rcu_read_unlock();
+	srcu_read_unlock(&nodes_srcu, idx);
+}
+
+static void dlm_fill_opts_header(struct dlm_opts *opts, uint16_t inner_len,
+				 uint32_t seq)
+{
+	opts->o_header.h_cmd = DLM_OPTS;
+	opts->o_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
+	opts->o_header.h_nodeid = dlm_our_nodeid();
+	opts->o_header.h_length = DLM_MIDCOMMS_OPT_LEN + inner_len;
+	opts->o_header.u.h_seq = seq;
+	header_out(&opts->o_header);
+}
+
+static void midcomms_new_msg_cb(struct dlm_mhandle *mh)
+{
+	atomic_inc(&mh->node->send_queue_cnt);
+
+	spin_lock(&mh->node->send_queue_lock);
+	list_add_tail_rcu(&mh->list, &mh->node->send_queue);
+	spin_unlock(&mh->node->send_queue_lock);
+
+	mh->seq = mh->node->seq_send++;
+}
+
+static struct dlm_msg *dlm_midcomms_get_msg_3_2(struct dlm_mhandle *mh, int nodeid,
+						int len, gfp_t allocation, char **ppc)
+{
+	struct dlm_opts *opts;
+	struct dlm_msg *msg;
+
+	msg = dlm_lowcomms_new_msg(nodeid, len + DLM_MIDCOMMS_OPT_LEN,
+				   allocation, ppc, midcomms_new_msg_cb, mh);
+	if (!msg)
+		return NULL;
+
+	opts = (struct dlm_opts *)*ppc;
+	mh->opts = opts;
+
+	/* add possible options here */
+	dlm_fill_opts_header(opts, len, mh->seq);
+
+	*ppc += sizeof(*opts);
+	mh->inner_hd = (const struct dlm_header *)*ppc;
+	return msg;
+}
+
+struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
+					     gfp_t allocation, char **ppc)
+{
+	struct midcomms_node *node;
+	struct dlm_mhandle *mh;
+	struct dlm_msg *msg;
+	int idx;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = nodeid2node(nodeid, 0);
+	if (!node) {
+		WARN_ON_ONCE(1);
+		goto err;
+	}
+
+	/* this is a bug, however we going on and hope it will be resolved */
+	WARN_ON(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags));
+
+	mh = kzalloc(sizeof(*mh), GFP_NOFS);
+	if (!mh)
+		goto err;
+
+	mh->idx = idx;
+	mh->node = node;
+
+	switch (node->version) {
+	case DLM_VERSION_3_1:
+		msg = dlm_lowcomms_new_msg(nodeid, len, allocation, ppc,
+					   NULL, NULL);
+		if (!msg) {
+			kfree(mh);
+			goto err;
+		}
+
+		break;
+	case DLM_VERSION_3_2:
+		msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation,
+					       ppc);
+		if (!msg) {
+			kfree(mh);
+			goto err;
+		}
+
+		break;
+	default:
+		kfree(mh);
+		WARN_ON(1);
+		goto err;
+	}
+
+	mh->msg = msg;
+
+	/* keep in mind that is a must to call
+	 * dlm_midcomms_commit_msg() which releases
+	 * nodes_srcu using mh->idx which is assumed
+	 * here that the application will call it.
+	 */
+	return mh;
+
+err:
+	srcu_read_unlock(&nodes_srcu, idx);
+	return NULL;
+}
+
+static void dlm_midcomms_commit_msg_3_2(struct dlm_mhandle *mh)
+{
+	/* nexthdr chain for fast lookup */
+	mh->opts->o_nextcmd = mh->inner_hd->h_cmd;
+	mh->committed = true;
+	dlm_lowcomms_commit_msg(mh->msg);
+}
+
+void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
+{
+	switch (mh->node->version) {
+	case DLM_VERSION_3_1:
+		srcu_read_unlock(&nodes_srcu, mh->idx);
+
+		dlm_lowcomms_commit_msg(mh->msg);
+		dlm_lowcomms_put_msg(mh->msg);
+		/* mh is not part of rcu list in this case */
+		kfree(mh);
+		break;
+	case DLM_VERSION_3_2:
+		dlm_midcomms_commit_msg_3_2(mh);
+		srcu_read_unlock(&nodes_srcu, mh->idx);
+		break;
+	default:
+		srcu_read_unlock(&nodes_srcu, mh->idx);
+		WARN_ON(1);
+		break;
+	}
+}
+
+int dlm_midcomms_start(void)
+{
+	int i;
+
+	for (i = 0; i < CONN_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&node_hash[i]);
+
+	return dlm_lowcomms_start();
+}
+
+static void dlm_act_fin_ack_rcv(struct midcomms_node *node)
+{
+	spin_lock(&node->state_lock);
+	pr_debug("receive active fin ack from node %d with state %s\n",
+		 node->nodeid, dlm_state_str(node->state));
+
+	switch (node->state) {
+	case DLM_FIN_WAIT1:
+		node->state = DLM_FIN_WAIT2;
+		pr_debug("switch node %d to state %s\n",
+			 node->nodeid, dlm_state_str(node->state));
+		break;
+	case DLM_CLOSING:
+		midcomms_node_reset(node);
+		pr_debug("switch node %d to state %s\n",
+			 node->nodeid, dlm_state_str(node->state));
+		wake_up(&node->shutdown_wait);
+		break;
+	case DLM_CLOSED:
+		/* not valid but somehow we got what we want */
+		wake_up(&node->shutdown_wait);
+		break;
+	default:
+		spin_unlock(&node->state_lock);
+		log_print("%s: unexpected state: %d\n",
+			  __func__, node->state);
+		WARN_ON(1);
+		return;
+	}
+	spin_unlock(&node->state_lock);
+}
+
+void dlm_midcomms_add_member(int nodeid)
+{
+	struct midcomms_node *node;
+	int idx;
+
+	if (nodeid == dlm_our_nodeid())
+		return;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = nodeid2node(nodeid, GFP_NOFS);
+	if (!node) {
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
+	spin_lock(&node->state_lock);
+	if (!node->users) {
+		pr_debug("receive add member from node %d with state %s\n",
+			 node->nodeid, dlm_state_str(node->state));
+		switch (node->state) {
+		case DLM_ESTABLISHED:
+			break;
+		case DLM_CLOSED:
+			node->state = DLM_ESTABLISHED;
+			pr_debug("switch node %d to state %s\n",
+				 node->nodeid, dlm_state_str(node->state));
+			break;
+		default:
+			/* some invalid state passive shutdown
+			 * was failed, we try to reset and
+			 * hope it will go on.
+			 */
+			log_print("reset node %d because shutdown stucked",
+				  node->nodeid);
+
+			midcomms_node_reset(node);
+			node->state = DLM_ESTABLISHED;
+			break;
+		}
+	}
+
+	node->users++;
+	pr_debug("users inc count %d\n", node->users);
+	spin_unlock(&node->state_lock);
+
+	srcu_read_unlock(&nodes_srcu, idx);
+}
+
+void dlm_midcomms_remove_member(int nodeid)
+{
+	struct midcomms_node *node;
+	int idx;
+
+	if (nodeid == dlm_our_nodeid())
+		return;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	node = nodeid2node(nodeid, 0);
+	if (!node) {
+		srcu_read_unlock(&nodes_srcu, idx);
+		return;
+	}
+
+	spin_lock(&node->state_lock);
+	node->users--;
+	pr_debug("users dec count %d\n", node->users);
+
+	/* hitting users count to zero means the
+	 * other side is running dlm_midcomms_stop()
+	 * we meet us to have a clean disconnect.
+	 */
+	if (node->users == 0) {
+		pr_debug("receive remove member from node %d with state %s\n",
+			 node->nodeid, dlm_state_str(node->state));
+		switch (node->state) {
+		case DLM_ESTABLISHED:
+			break;
+		case DLM_CLOSE_WAIT:
+			/* passive shutdown DLM_LAST_ACK case 2 */
+			node->state = DLM_LAST_ACK;
+			spin_unlock(&node->state_lock);
+
+			pr_debug("switch node %d to state %s case 2\n",
+				 node->nodeid, dlm_state_str(node->state));
+			goto send_fin;
+		case DLM_LAST_ACK:
+			/* probably receive fin caught it, do nothing */
+			break;
+		case DLM_CLOSED:
+			/* already gone, do nothing */
+			break;
+		default:
+			log_print("%s: unexpected state: %d\n",
+				  __func__, node->state);
+			break;
+		}
+	}
+	spin_unlock(&node->state_lock);
+
+	srcu_read_unlock(&nodes_srcu, idx);
+	return;
+
+send_fin:
+	set_bit(DLM_NODE_FLAG_STOP_RX, &node->flags);
+	dlm_send_fin(node, dlm_pas_fin_ack_rcv);
+	srcu_read_unlock(&nodes_srcu, idx);
+}
+
+static void midcomms_node_release(struct rcu_head *rcu)
+{
+	struct midcomms_node *node = container_of(rcu, struct midcomms_node, rcu);
+
+	WARN_ON(atomic_read(&node->send_queue_cnt));
+	kfree(node);
+}
+
+static void midcomms_shutdown(struct midcomms_node *node)
+{
+	int ret;
+
+	/* old protocol, we don't wait for pending operations */
+	switch (node->version) {
+	case DLM_VERSION_3_2:
+		break;
+	default:
+		return;
+	}
+
+	spin_lock(&node->state_lock);
+	pr_debug("receive active shutdown for node %d with state %s\n",
+		 node->nodeid, dlm_state_str(node->state));
+	switch (node->state) {
+	case DLM_ESTABLISHED:
+		node->state = DLM_FIN_WAIT1;
+		pr_debug("switch node %d to state %s case 2\n",
+			 node->nodeid, dlm_state_str(node->state));
+		break;
+	case DLM_CLOSED:
+		/* we have what we want */
+		spin_unlock(&node->state_lock);
+		return;
+	default:
+		/* busy to enter DLM_FIN_WAIT1, wait until passive
+		 * done in shutdown_wait to enter DLM_CLOSED.
+		 */
+		break;
+	}
+	spin_unlock(&node->state_lock);
+
+	if (node->state == DLM_FIN_WAIT1) {
+		dlm_send_fin(node, dlm_act_fin_ack_rcv);
+
+		if (DLM_DEBUG_FENCE_TERMINATION)
+			msleep(5000);
+	}
+
+	/* wait for other side dlm + fin */
+	ret = wait_event_timeout(node->shutdown_wait,
+				 node->state == DLM_CLOSED ||
+				 test_bit(DLM_NODE_FLAG_CLOSE, &node->flags),
+				 DLM_SHUTDOWN_TIMEOUT);
+	if (!ret || test_bit(DLM_NODE_FLAG_CLOSE, &node->flags)) {
+		pr_debug("active shutdown timed out for node %d with state %s\n",
+			 node->nodeid, dlm_state_str(node->state));
+		midcomms_node_reset(node);
+		return;
+	}
+
+	pr_debug("active shutdown done for node %d with state %s\n",
+		 node->nodeid, dlm_state_str(node->state));
+}
+
+void dlm_midcomms_shutdown(void)
+{
+	struct midcomms_node *node;
+	int i, idx;
+
+	mutex_lock(&close_lock);
+	idx = srcu_read_lock(&nodes_srcu);
+	for (i = 0; i < CONN_HASH_SIZE; i++) {
+		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
+			midcomms_shutdown(node);
+
+			spin_lock(&nodes_lock);
+			hlist_del_rcu(&node->hlist);
+			spin_unlock(&nodes_lock);
+
+			call_srcu(&nodes_srcu, &node->rcu, midcomms_node_release);
+		}
+	}
+	srcu_read_unlock(&nodes_srcu, idx);
+	mutex_unlock(&close_lock);
+
+	dlm_lowcomms_shutdown();
+}
+
+int dlm_midcomms_close(int nodeid)
+{
+	struct midcomms_node *node;
+	int idx, ret;
+
+	if (nodeid == dlm_our_nodeid())
+		return 0;
+
+	idx = srcu_read_lock(&nodes_srcu);
+	/* Abort pending close/remove operation */
+	node = nodeid2node(nodeid, 0);
+	if (node) {
+		/* let shutdown waiters leave */
+		set_bit(DLM_NODE_FLAG_CLOSE, &node->flags);
+		wake_up(&node->shutdown_wait);
+	}
+	srcu_read_unlock(&nodes_srcu, idx);
+
+	synchronize_srcu(&nodes_srcu);
+
+	idx = srcu_read_lock(&nodes_srcu);
+	mutex_lock(&close_lock);
+	node = nodeid2node(nodeid, 0);
+	if (!node) {
+		mutex_unlock(&close_lock);
+		srcu_read_unlock(&nodes_srcu, idx);
+		return dlm_lowcomms_close(nodeid);
+	}
+
+	ret = dlm_lowcomms_close(nodeid);
+	spin_lock(&node->state_lock);
+	midcomms_node_reset(node);
+	spin_unlock(&node->state_lock);
+	srcu_read_unlock(&nodes_srcu, idx);
+	mutex_unlock(&close_lock);
+
+	return ret;
+}
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 9ac1190ce277..1178b836315b 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -21,6 +21,7 @@ int dlm_midcomms_start(void);
 void dlm_midcomms_shutdown(void);
 void dlm_midcomms_add_member(int nodeid);
 void dlm_midcomms_remove_member(int nodeid);
+void dlm_midcomms_unack_msg_resend(int nodeid);
 
 #endif				/* __MIDCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 7c1a06bcd418..085f21966c72 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -202,7 +202,7 @@ retry:
 	set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags);
 
 	allow_sync_reply(ls, &rc->rc_id);
-	memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
+	memset(ls->ls_recover_buf, 0, DEFAULT_BUFFER_SIZE);
 
 	send_rcom_stateless(ls, msg, rc);
 
@@ -325,7 +325,7 @@ retry:
 	memcpy(rc->rc_buf, last_name, last_len);
 
 	allow_sync_reply(ls, &rc->rc_id);
-	memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
+	memset(ls->ls_recover_buf, 0, DEFAULT_BUFFER_SIZE);
 
 	send_rcom_stateless(ls, msg, rc);
 

From 5b2f981fde8b0dbf0bfa117bb4322342fcfb7174 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:47 -0400
Subject: [PATCH 0526/3292] fs: dlm: add midcomms debugfs functionality

This patch adds functionality to debug midcomms per connection state
inside a comms directory which is similar like dlm configfs. Currently
there exists the possibility to read out two attributes which is the
send queue counter and the version of each midcomms node state.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/debug_fs.c     | 54 +++++++++++++++++++++++++++++++++++++++++++
 fs/dlm/dlm_internal.h |  4 ++++
 fs/dlm/midcomms.c     | 27 ++++++++++++++++++++++
 fs/dlm/midcomms.h     |  6 +++++
 4 files changed, 91 insertions(+)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index d5bd990bcab8..47e9d57e4cae 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 
 #include "dlm_internal.h"
+#include "midcomms.h"
 #include "lock.h"
 
 #define DLM_DEBUG_BUF_LEN 4096
@@ -23,6 +24,7 @@ static char debug_buf[DLM_DEBUG_BUF_LEN];
 static struct mutex debug_buf_lock;
 
 static struct dentry *dlm_root;
+static struct dentry *dlm_comms;
 
 static char *print_lockmode(int mode)
 {
@@ -738,6 +740,57 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
 	debugfs_remove(ls->ls_debug_toss_dentry);
 }
 
+static int dlm_state_show(struct seq_file *file, void *offset)
+{
+	seq_printf(file, "%s\n", dlm_midcomms_state(file->private));
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dlm_state);
+
+static int dlm_flags_show(struct seq_file *file, void *offset)
+{
+	seq_printf(file, "%lu\n", dlm_midcomms_flags(file->private));
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dlm_flags);
+
+static int dlm_send_queue_cnt_show(struct seq_file *file, void *offset)
+{
+	seq_printf(file, "%d\n", dlm_midcomms_send_queue_cnt(file->private));
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dlm_send_queue_cnt);
+
+static int dlm_version_show(struct seq_file *file, void *offset)
+{
+	seq_printf(file, "0x%08x\n", dlm_midcomms_version(file->private));
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dlm_version);
+
+void *dlm_create_debug_comms_file(int nodeid, void *data)
+{
+	struct dentry *d_node;
+	char name[256];
+
+	memset(name, 0, sizeof(name));
+	snprintf(name, 256, "%d", nodeid);
+
+	d_node = debugfs_create_dir(name, dlm_comms);
+	debugfs_create_file("state", 0444, d_node, data, &dlm_state_fops);
+	debugfs_create_file("flags", 0444, d_node, data, &dlm_flags_fops);
+	debugfs_create_file("send_queue_count", 0444, d_node, data,
+			    &dlm_send_queue_cnt_fops);
+	debugfs_create_file("version", 0444, d_node, data, &dlm_version_fops);
+
+	return d_node;
+}
+
+void dlm_delete_debug_comms_file(void *ctx)
+{
+	debugfs_remove(ctx);
+}
+
 void dlm_create_debug_file(struct dlm_ls *ls)
 {
 	char name[DLM_LOCKSPACE_LEN + 8];
@@ -797,6 +850,7 @@ void __init dlm_register_debugfs(void)
 {
 	mutex_init(&debug_buf_lock);
 	dlm_root = debugfs_create_dir("dlm", NULL);
+	dlm_comms = debugfs_create_dir("comms", dlm_root);
 }
 
 void dlm_unregister_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 40917c878370..91d1ca3a121a 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -754,11 +754,15 @@ void dlm_register_debugfs(void);
 void dlm_unregister_debugfs(void);
 void dlm_create_debug_file(struct dlm_ls *ls);
 void dlm_delete_debug_file(struct dlm_ls *ls);
+void *dlm_create_debug_comms_file(int nodeid, void *data);
+void dlm_delete_debug_comms_file(void *ctx);
 #else
 static inline void dlm_register_debugfs(void) { }
 static inline void dlm_unregister_debugfs(void) { }
 static inline void dlm_create_debug_file(struct dlm_ls *ls) { }
 static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
+static inline void *dlm_create_debug_comms_file(int nodeid, void *data) { return NULL; }
+static inline void dlm_delete_debug_comms_file(void *ctx) { }
 #endif
 
 #endif				/* __DLM_INTERNAL_DOT_H__ */
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index eef3938a363e..35664950f6b7 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -189,6 +189,9 @@ struct midcomms_node {
 	 */
 	int users;
 
+	/* not protected by srcu, node_hash lifetime */
+	void *debugfs;
+
 	struct hlist_node hlist;
 	struct rcu_head rcu;
 };
@@ -244,6 +247,26 @@ static inline const char *dlm_state_str(int state)
 	}
 }
 
+const char *dlm_midcomms_state(struct midcomms_node *node)
+{
+	return dlm_state_str(node->state);
+}
+
+unsigned long dlm_midcomms_flags(struct midcomms_node *node)
+{
+	return node->flags;
+}
+
+int dlm_midcomms_send_queue_cnt(struct midcomms_node *node)
+{
+	return atomic_read(&node->send_queue_cnt);
+}
+
+uint32_t dlm_midcomms_version(struct midcomms_node *node)
+{
+	return node->version;
+}
+
 static struct midcomms_node *__find_node(int nodeid, int r)
 {
 	struct midcomms_node *node;
@@ -332,6 +355,8 @@ static struct midcomms_node *nodeid2node(int nodeid, gfp_t alloc)
 
 	hlist_add_head_rcu(&node->hlist, &node_hash[r]);
 	spin_unlock(&nodes_lock);
+
+	node->debugfs = dlm_create_debug_comms_file(nodeid, node);
 	return node;
 }
 
@@ -1285,6 +1310,8 @@ void dlm_midcomms_shutdown(void)
 		hlist_for_each_entry_rcu(node, &node_hash[i], hlist) {
 			midcomms_shutdown(node);
 
+			dlm_delete_debug_comms_file(node->debugfs);
+
 			spin_lock(&nodes_lock);
 			hlist_del_rcu(&node->hlist);
 			spin_unlock(&nodes_lock);
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 1178b836315b..579abc6929be 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -12,6 +12,8 @@
 #ifndef __MIDCOMMS_DOT_H__
 #define __MIDCOMMS_DOT_H__
 
+struct midcomms_node;
+
 int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int buflen);
 struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 					     gfp_t allocation, char **ppc);
@@ -22,6 +24,10 @@ void dlm_midcomms_shutdown(void);
 void dlm_midcomms_add_member(int nodeid);
 void dlm_midcomms_remove_member(int nodeid);
 void dlm_midcomms_unack_msg_resend(int nodeid);
+const char *dlm_midcomms_state(struct midcomms_node *node);
+unsigned long dlm_midcomms_flags(struct midcomms_node *node);
+int dlm_midcomms_send_queue_cnt(struct midcomms_node *node);
+uint32_t dlm_midcomms_version(struct midcomms_node *node);
 
 #endif				/* __MIDCOMMS_DOT_H__ */
 

From 706474fbc5fedd7799b488962aad3541b235165b Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 21 May 2021 15:08:48 -0400
Subject: [PATCH 0527/3292] fs: dlm: don't allow half transmitted messages

This patch will clean a dirty page buffer if a reconnect occurs. If a page
buffer was half transmitted we cannot start inside the middle of a dlm
message if a node connects again. I observed invalid length receptions
errors and was guessing that this behaviour occurs, after this patch I
never saw an invalid message length again. This patch might drops more
messages for dlm version 3.1 but 3.1 can't deal with half messages as
well, for 3.2 it might trigger more re-transmissions but will not leave dlm
in a broken state.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 95 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 35 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index fe9113bd5ba0..36adccc4f849 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -118,6 +118,7 @@ struct writequeue_entry {
 	int len;
 	int end;
 	int users;
+	bool dirty;
 	struct connection *con;
 	struct list_head msgs;
 	struct kref ref;
@@ -700,6 +701,42 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 	memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
 }
 
+static void dlm_page_release(struct kref *kref)
+{
+	struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
+						  ref);
+
+	__free_page(e->page);
+	kfree(e);
+}
+
+static void dlm_msg_release(struct kref *kref)
+{
+	struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
+
+	kref_put(&msg->entry->ref, dlm_page_release);
+	kfree(msg);
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+	struct dlm_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
+		if (msg->orig_msg) {
+			msg->orig_msg->retransmit = false;
+			kref_put(&msg->orig_msg->ref, dlm_msg_release);
+		}
+
+		list_del(&msg->list);
+		kref_put(&msg->ref, dlm_msg_release);
+	}
+
+	list_del(&e->list);
+	atomic_dec(&e->con->writequeue_cnt);
+	kref_put(&e->ref, dlm_page_release);
+}
+
 static void dlm_close_sock(struct socket **sock)
 {
 	if (*sock) {
@@ -714,6 +751,7 @@ static void close_connection(struct connection *con, bool and_other,
 			     bool tx, bool rx)
 {
 	bool closing = test_and_set_bit(CF_CLOSING, &con->flags);
+	struct writequeue_entry *e;
 
 	if (tx && !closing && cancel_work_sync(&con->swork)) {
 		log_print("canceled swork for node %d", con->nodeid);
@@ -732,6 +770,26 @@ static void close_connection(struct connection *con, bool and_other,
 		close_connection(con->othercon, false, tx, rx);
 	}
 
+	/* if we send a writequeue entry only a half way, we drop the
+	 * whole entry because reconnection and that we not start of the
+	 * middle of a msg which will confuse the other end.
+	 *
+	 * we can always drop messages because retransmits, but what we
+	 * cannot allow is to transmit half messages which may be processed
+	 * at the other side.
+	 *
+	 * our policy is to start on a clean state when disconnects, we don't
+	 * know what's send/received on transport layer in this case.
+	 */
+	spin_lock(&con->writequeue_lock);
+	if (!list_empty(&con->writequeue)) {
+		e = list_first_entry(&con->writequeue, struct writequeue_entry,
+				     list);
+		if (e->dirty)
+			free_entry(e);
+	}
+	spin_unlock(&con->writequeue_lock);
+
 	con->rx_leftover = 0;
 	con->retries = 0;
 	clear_bit(CF_CONNECTED, &con->flags);
@@ -1026,41 +1084,6 @@ accept_err:
 	return result;
 }
 
-static void dlm_page_release(struct kref *kref)
-{
-	struct writequeue_entry *e = container_of(kref, struct writequeue_entry,
-						  ref);
-
-	__free_page(e->page);
-	kfree(e);
-}
-
-static void dlm_msg_release(struct kref *kref)
-{
-	struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
-
-	kref_put(&msg->entry->ref, dlm_page_release);
-	kfree(msg);
-}
-
-static void free_entry(struct writequeue_entry *e)
-{
-	struct dlm_msg *msg, *tmp;
-
-	list_for_each_entry_safe(msg, tmp, &e->msgs, list) {
-		if (msg->orig_msg) {
-			msg->orig_msg->retransmit = false;
-			kref_put(&msg->orig_msg->ref, dlm_msg_release);
-		}
-		list_del(&msg->list);
-		kref_put(&msg->ref, dlm_msg_release);
-	}
-
-	list_del(&e->list);
-	atomic_dec(&e->con->writequeue_cnt);
-	kref_put(&e->ref, dlm_page_release);
-}
-
 /*
  * writequeue_entry_complete - try to delete and free write queue entry
  * @e: write queue entry to try to delete
@@ -1072,6 +1095,8 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
 {
 	e->offset += completed;
 	e->len -= completed;
+	/* signal that page was half way transmitted */
+	e->dirty = true;
 
 	if (e->len == 0 && e->users == 0)
 		free_entry(e);

From 1eb8a49836949a77c4f7d738786719e7fde0c333 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 25 May 2021 20:54:20 +0900
Subject: [PATCH 0528/3292] x86/syscalls: Clear 'offset' and 'prefix' in case
 they are set in env

If the environment variable 'prefix' is set on the build host, it is
wrongly used as syscall macro prefixes.

  $ export prefix=/usr
  $ make -s defconfig all
  In file included from ./arch/x86/include/asm/unistd.h:20,
                   from <stdin>:2:
  ./arch/x86/include/generated/uapi/asm/unistd_64.h:4:9: warning: missing whitespace after the macro name
      4 | #define __NR_/usrread 0
        |         ^~~~~

arch/x86/entry/syscalls/Makefile should clear 'offset' and 'prefix'.

Fixes: 3cba325b358f ("x86/syscalls: Switch to generic syscallhdr.sh")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210525115420.679416-1-masahiroy@kernel.org
---
 arch/x86/entry/syscalls/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index 8eb014bca8c9..5b3efed0e4e8 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -11,6 +11,8 @@ syscall64 := $(src)/syscall_64.tbl
 
 syshdr := $(srctree)/scripts/syscallhdr.sh
 systbl := $(srctree)/scripts/syscalltbl.sh
+offset :=
+prefix :=
 
 quiet_cmd_syshdr = SYSHDR  $@
       cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --abis $(abis) --emit-nr \

From d48ca5b98fa5d21444e04bb17373d339200b679a Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 24 May 2021 14:17:05 -0400
Subject: [PATCH 0529/3292] x86/uml/syscalls: Remove array index from syscall
 initializers

The recent syscall table generator rework removed the index from the
initializers for native x86 syscall tables, but missed the UML syscall
tables.

Fixes: 44fe4895f47c ("Stop filling syscall arrays with *_sys_ni_syscall")
Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210524181707.132844-2-brgerst@gmail.com
---
 arch/x86/um/sys_call_table_32.c | 2 +-
 arch/x86/um/sys_call_table_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index f8323104e353..0575decb5e54 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -31,7 +31,7 @@
 #include <asm/syscalls_32.h>
 
 #undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym,
+#define __SYSCALL(nr, sym) sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 5ed665dc785f..95725b5a41ac 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -39,7 +39,7 @@
 #include <asm/syscalls_64.h>
 
 #undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym,
+#define __SYSCALL(nr, sym) sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 

From fd9e8691f38712892fa2ac73132dcc8b85b07a8f Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 24 May 2021 14:17:06 -0400
Subject: [PATCH 0530/3292] x86/syscalls: Remove -Wno-override-init for syscall
 tables

Commit 44fe4895f47c ("Stop filling syscall arrays with *_sys_ni_syscall")
removes the need for -Wno-override-init, since the table is now filled
sequentially instead of overriding a default value.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210524181707.132844-3-brgerst@gmail.com
---
 arch/x86/entry/Makefile | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 08bf95dbc911..94d2843ce80c 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -17,10 +17,6 @@ CFLAGS_syscall_64.o		+= -fno-stack-protector
 CFLAGS_syscall_32.o		+= -fno-stack-protector
 CFLAGS_syscall_x32.o		+= -fno-stack-protector
 
-CFLAGS_syscall_64.o		+= $(call cc-option,-Wno-override-init,)
-CFLAGS_syscall_32.o		+= $(call cc-option,-Wno-override-init,)
-CFLAGS_syscall_x32.o		+= $(call cc-option,-Wno-override-init,)
-
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
 

From 48f7eee81cd53a94699d28959566b41a9dcac1d9 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 24 May 2021 14:17:07 -0400
Subject: [PATCH 0531/3292] x86/syscalls: Don't adjust CFLAGS for syscall
 tables

The syscall_*.c files only contain data (the syscall tables).  There
is no need to adjust CFLAGS for tracing and stack protector since they
contain no code.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210524181707.132844-4-brgerst@gmail.com
---
 arch/x86/entry/Makefile | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 94d2843ce80c..7fec5dcf6438 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -8,14 +8,8 @@ UBSAN_SANITIZE := n
 KCOV_INSTRUMENT := n
 
 CFLAGS_REMOVE_common.o		= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_syscall_64.o	= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_syscall_32.o	= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_syscall_x32.o	= $(CC_FLAGS_FTRACE)
 
 CFLAGS_common.o			+= -fno-stack-protector
-CFLAGS_syscall_64.o		+= -fno-stack-protector
-CFLAGS_syscall_32.o		+= -fno-stack-protector
-CFLAGS_syscall_x32.o		+= -fno-stack-protector
 
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o

From 130708331bc6b03a3c3a78599333faddfebbd0f3 Mon Sep 17 00:00:00 2001
From: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
Date: Sun, 23 May 2021 21:31:30 +0800
Subject: [PATCH 0532/3292] cpu/hotplug: Simplify access to percpu cpuhp_state

It is unnecessary to invoke per_cpu_ptr() everytime to access cpuhp_state.
Use the available pointer instead.

Signed-off-by: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Link: https://lore.kernel.org/r/1621776690-13264-1-git-send-email-yuanzhaoxiong@baidu.com
---
 kernel/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index e538518556f4..2942cb4644c5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -961,7 +961,7 @@ static int takedown_cpu(unsigned int cpu)
 	int err;
 
 	/* Park the smpboot threads */
-	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+	kthread_park(st->thread);
 
 	/*
 	 * Prevent irq alloc/free while the dying cpu reorganizes the
@@ -977,7 +977,7 @@ static int takedown_cpu(unsigned int cpu)
 		/* CPU refused to die */
 		irq_unlock_sparse();
 		/* Unpark the hotplug thread so we can rollback there */
-		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+		kthread_unpark(st->thread);
 		return err;
 	}
 	BUG_ON(cpu_online(cpu));

From e8049c4aa5d83e2a853e01f2a5543788d3a49777 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Tue, 25 May 2021 19:48:34 +0000
Subject: [PATCH 0533/3292] MAINTAINERS: Make Yazen Ghannam maintainer for
 EDAC-AMD64

Also, include the mce_amd* files under this section.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210525194834.2710362-1-Yazen.Ghannam@amd.com
---
 MAINTAINERS | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..fbcd649b2cc4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6437,10 +6437,11 @@ F:	Documentation/filesystems/ecryptfs.rst
 F:	fs/ecryptfs/
 
 EDAC-AMD64
-M:	Borislav Petkov <bp@alien8.de>
+M:	Yazen Ghannam <yazen.ghannam@amd.com>
 L:	linux-edac@vger.kernel.org
-S:	Maintained
+S:	Supported
 F:	drivers/edac/amd64_edac*
+F:	drivers/edac/mce_amd*
 
 EDAC-ARMADA
 M:	Jan Luebbe <jlu@pengutronix.de>

From ccc882f0d838cb45a1a78ea4e48c219887f920dc Mon Sep 17 00:00:00 2001
From: Nigel Christian <nigel.l.christian@gmail.com>
Date: Mon, 24 May 2021 23:05:58 -0500
Subject: [PATCH 0534/3292] net: bridge: remove redundant assignment

The variable br is assigned a value that is not being read after
exiting case IFLA_STATS_LINK_XSTATS_SLAVE. The assignment is
redundant and can be removed.

Addresses-Coverity ("Unused value")
Signed-off-by: Nigel Christian <nigel.l.christian@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e4e6e991313e..8642e56059fb 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1644,7 +1644,6 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
 		p = br_port_get_rtnl(dev);
 		if (!p)
 			return 0;
-		br = p->br;
 		vg = nbp_vlan_group(p);
 		break;
 	default:

From 307ea4ce3edd3f7d1130d3c35955aa77063296cc Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 25 May 2021 17:22:03 +0800
Subject: [PATCH 0535/3292] net: hns3: switch to dim algorithm for adaptive
 interrupt moderation

The Linux kernel has support for a dynamic interrupt moderation
algorithm known as "dimlib". Replace the custom driver-specific
implementation of dynamic interrupt moderation with the kernel's
algorithm.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/Kconfig        |   1 +
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 199 +++++++-----------
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |   4 +-
 3 files changed, 76 insertions(+), 128 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 44f9279cdde1..fa6025dc4cdb 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -130,6 +130,7 @@ config HNS3_ENET
 	default m
 	depends on 64BIT && PCI
 	depends on INET
+	select DIMLIB
 	help
 	  This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
 	  family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index de0e2d215879..f256ed18b4ab 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -368,6 +368,7 @@ static irqreturn_t hns3_irq_handle(int irq, void *vector)
 	struct hns3_enet_tqp_vector *tqp_vector = vector;
 
 	napi_schedule_irqoff(&tqp_vector->napi);
+	tqp_vector->event_cnt++;
 
 	return IRQ_HANDLED;
 }
@@ -471,6 +472,8 @@ static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector)
 
 	disable_irq(tqp_vector->vector_irq);
 	napi_disable(&tqp_vector->napi);
+	cancel_work_sync(&tqp_vector->rx_group.dim.work);
+	cancel_work_sync(&tqp_vector->tx_group.dim.work);
 }
 
 void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
@@ -3772,139 +3775,30 @@ out:
 	return recv_pkts;
 }
 
-static bool hns3_get_new_flow_lvl(struct hns3_enet_ring_group *ring_group)
-{
-#define HNS3_RX_LOW_BYTE_RATE 10000
-#define HNS3_RX_MID_BYTE_RATE 20000
-#define HNS3_RX_ULTRA_PACKET_RATE 40
-
-	enum hns3_flow_level_range new_flow_level;
-	struct hns3_enet_tqp_vector *tqp_vector;
-	int packets_per_msecs, bytes_per_msecs;
-	u32 time_passed_ms;
-
-	tqp_vector = ring_group->ring->tqp_vector;
-	time_passed_ms =
-		jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
-	if (!time_passed_ms)
-		return false;
-
-	do_div(ring_group->total_packets, time_passed_ms);
-	packets_per_msecs = ring_group->total_packets;
-
-	do_div(ring_group->total_bytes, time_passed_ms);
-	bytes_per_msecs = ring_group->total_bytes;
-
-	new_flow_level = ring_group->coal.flow_level;
-
-	/* Simple throttlerate management
-	 * 0-10MB/s   lower     (50000 ints/s)
-	 * 10-20MB/s   middle    (20000 ints/s)
-	 * 20-1249MB/s high      (18000 ints/s)
-	 * > 40000pps  ultra     (8000 ints/s)
-	 */
-	switch (new_flow_level) {
-	case HNS3_FLOW_LOW:
-		if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
-			new_flow_level = HNS3_FLOW_MID;
-		break;
-	case HNS3_FLOW_MID:
-		if (bytes_per_msecs > HNS3_RX_MID_BYTE_RATE)
-			new_flow_level = HNS3_FLOW_HIGH;
-		else if (bytes_per_msecs <= HNS3_RX_LOW_BYTE_RATE)
-			new_flow_level = HNS3_FLOW_LOW;
-		break;
-	case HNS3_FLOW_HIGH:
-	case HNS3_FLOW_ULTRA:
-	default:
-		if (bytes_per_msecs <= HNS3_RX_MID_BYTE_RATE)
-			new_flow_level = HNS3_FLOW_MID;
-		break;
-	}
-
-	if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
-	    &tqp_vector->rx_group == ring_group)
-		new_flow_level = HNS3_FLOW_ULTRA;
-
-	ring_group->total_bytes = 0;
-	ring_group->total_packets = 0;
-	ring_group->coal.flow_level = new_flow_level;
-
-	return true;
-}
-
-static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
-{
-	struct hns3_enet_tqp_vector *tqp_vector;
-	u16 new_int_gl;
-
-	if (!ring_group->ring)
-		return false;
-
-	tqp_vector = ring_group->ring->tqp_vector;
-	if (!tqp_vector->last_jiffies)
-		return false;
-
-	if (ring_group->total_packets == 0) {
-		ring_group->coal.int_gl = HNS3_INT_GL_50K;
-		ring_group->coal.flow_level = HNS3_FLOW_LOW;
-		return true;
-	}
-
-	if (!hns3_get_new_flow_lvl(ring_group))
-		return false;
-
-	new_int_gl = ring_group->coal.int_gl;
-	switch (ring_group->coal.flow_level) {
-	case HNS3_FLOW_LOW:
-		new_int_gl = HNS3_INT_GL_50K;
-		break;
-	case HNS3_FLOW_MID:
-		new_int_gl = HNS3_INT_GL_20K;
-		break;
-	case HNS3_FLOW_HIGH:
-		new_int_gl = HNS3_INT_GL_18K;
-		break;
-	case HNS3_FLOW_ULTRA:
-		new_int_gl = HNS3_INT_GL_8K;
-		break;
-	default:
-		break;
-	}
-
-	if (new_int_gl != ring_group->coal.int_gl) {
-		ring_group->coal.int_gl = new_int_gl;
-		return true;
-	}
-	return false;
-}
-
-static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
+static void hns3_update_rx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
 {
 	struct hns3_enet_ring_group *rx_group = &tqp_vector->rx_group;
-	struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
-	bool rx_update, tx_update;
+	struct dim_sample sample = {};
 
-	/* update param every 1000ms */
-	if (time_before(jiffies,
-			tqp_vector->last_jiffies + msecs_to_jiffies(1000)))
+	if (!rx_group->coal.adapt_enable)
 		return;
 
-	if (rx_group->coal.adapt_enable) {
-		rx_update = hns3_get_new_int_gl(rx_group);
-		if (rx_update)
-			hns3_set_vector_coalesce_rx_gl(tqp_vector,
-						       rx_group->coal.int_gl);
-	}
+	dim_update_sample(tqp_vector->event_cnt, rx_group->total_packets,
+			  rx_group->total_bytes, &sample);
+	net_dim(&rx_group->dim, sample);
+}
 
-	if (tx_group->coal.adapt_enable) {
-		tx_update = hns3_get_new_int_gl(tx_group);
-		if (tx_update)
-			hns3_set_vector_coalesce_tx_gl(tqp_vector,
-						       tx_group->coal.int_gl);
-	}
+static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
+	struct dim_sample sample = {};
 
-	tqp_vector->last_jiffies = jiffies;
+	if (!tx_group->coal.adapt_enable)
+		return;
+
+	dim_update_sample(tqp_vector->event_cnt, tx_group->total_packets,
+			  tx_group->total_bytes, &sample);
+	net_dim(&tx_group->dim, sample);
 }
 
 static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
@@ -3949,7 +3843,9 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
 
 	if (napi_complete(napi) &&
 	    likely(!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))) {
-		hns3_update_new_int_gl(tqp_vector);
+		hns3_update_rx_int_coalesce(tqp_vector);
+		hns3_update_tx_int_coalesce(tqp_vector);
+
 		hns3_mask_vector_irq(tqp_vector, 1);
 	}
 
@@ -4080,6 +3976,54 @@ static void hns3_nic_set_cpumask(struct hns3_nic_priv *priv)
 	}
 }
 
+static void hns3_rx_dim_work(struct work_struct *work)
+{
+	struct dim *dim = container_of(work, struct dim, work);
+	struct hns3_enet_ring_group *group = container_of(dim,
+		struct hns3_enet_ring_group, dim);
+	struct hns3_enet_tqp_vector *tqp_vector = group->ring->tqp_vector;
+	struct dim_cq_moder cur_moder =
+		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+
+	hns3_set_vector_coalesce_rx_gl(group->ring->tqp_vector, cur_moder.usec);
+	tqp_vector->rx_group.coal.int_gl = cur_moder.usec;
+
+	if (cur_moder.pkts < tqp_vector->rx_group.coal.int_ql_max) {
+		hns3_set_vector_coalesce_rx_ql(tqp_vector, cur_moder.pkts);
+		tqp_vector->rx_group.coal.int_ql = cur_moder.pkts;
+	}
+
+	dim->state = DIM_START_MEASURE;
+}
+
+static void hns3_tx_dim_work(struct work_struct *work)
+{
+	struct dim *dim = container_of(work, struct dim, work);
+	struct hns3_enet_ring_group *group = container_of(dim,
+		struct hns3_enet_ring_group, dim);
+	struct hns3_enet_tqp_vector *tqp_vector = group->ring->tqp_vector;
+	struct dim_cq_moder cur_moder =
+		net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+
+	hns3_set_vector_coalesce_tx_gl(tqp_vector, cur_moder.usec);
+	tqp_vector->tx_group.coal.int_gl = cur_moder.usec;
+
+	if (cur_moder.pkts < tqp_vector->tx_group.coal.int_ql_max) {
+		hns3_set_vector_coalesce_tx_ql(tqp_vector, cur_moder.pkts);
+		tqp_vector->tx_group.coal.int_ql = cur_moder.pkts;
+	}
+
+	dim->state = DIM_START_MEASURE;
+}
+
+static void hns3_nic_init_dim(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	INIT_WORK(&tqp_vector->rx_group.dim.work, hns3_rx_dim_work);
+	tqp_vector->rx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	INIT_WORK(&tqp_vector->tx_group.dim.work, hns3_tx_dim_work);
+	tqp_vector->tx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
 static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
@@ -4093,6 +4037,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 		tqp_vector = &priv->tqp_vector[i];
 		hns3_vector_coalesce_init_hw(tqp_vector, priv);
 		tqp_vector->num_tqps = 0;
+		hns3_nic_init_dim(tqp_vector);
 	}
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 79ff2fa61d47..b038441907f9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -4,6 +4,7 @@
 #ifndef __HNS3_ENET_H
 #define __HNS3_ENET_H
 
+#include <linux/dim.h>
 #include <linux/if_vlan.h>
 
 #include "hnae3.h"
@@ -482,6 +483,7 @@ struct hns3_enet_ring_group {
 	u64 total_packets;	/* total packets processed this group */
 	u16 count;
 	struct hns3_enet_coalesce coal;
+	struct dim dim;
 };
 
 struct hns3_enet_tqp_vector {
@@ -503,7 +505,7 @@ struct hns3_enet_tqp_vector {
 
 	char name[HNAE3_INT_NAME_LEN];
 
-	unsigned long last_jiffies;
+	u64 event_cnt;
 } ____cacheline_internodealigned_in_smp;
 
 struct hns3_nic_priv {

From 687c87adc11a6b1bfae115ee6e7bcf822e7228b3 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 25 May 2021 18:55:45 +0800
Subject: [PATCH 0536/3292] net/hamradio/6pack: Fix inconsistent indenting

Eliminate the follow smatch warning:

drivers/net/hamradio/6pack.c:728 sixpack_ioctl() warn: inconsistent
indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/6pack.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 80f41945709f..a15cc5e50290 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -716,11 +716,11 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file,
 		err = 0;
 		break;
 
-	 case SIOCSIFHWADDR: {
-		char addr[AX25_ADDR_LEN];
+	case SIOCSIFHWADDR: {
+			char addr[AX25_ADDR_LEN];
 
-		if (copy_from_user(&addr,
-		                   (void __user *) arg, AX25_ADDR_LEN)) {
+			if (copy_from_user(&addr,
+					   (void __user *)arg, AX25_ADDR_LEN)) {
 				err = -EFAULT;
 				break;
 			}
@@ -728,11 +728,9 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file,
 			netif_tx_lock_bh(dev);
 			memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN);
 			netif_tx_unlock_bh(dev);
-
 			err = 0;
 			break;
 		}
-
 	default:
 		err = tty_mode_ioctl(tty, file, cmd, arg);
 	}

From 98d728232c988da7109576a23e6e0295acb5cf2d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 25 May 2021 22:07:53 +0800
Subject: [PATCH 0537/3292] net: wan: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/n2.c | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index 5bf4463873b1..dd39789ebfa0 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -32,7 +32,6 @@
 #include <asm/io.h>
 #include "hd64570.h"
 
-
 static const char* version = "SDL RISCom/N2 driver version: 1.15";
 static const char* devname = "RISCom/N2";
 
@@ -64,11 +63,9 @@ static char *hw;	/* pointer to hw=xxx command line string */
 #define PCR_ENWIN  4     /* Open window */
 #define PCR_BUS16  8     /* 16-bit bus */
 
-
 /* Memory Base Address Register */
 #define N2_BAR 2
 
-
 /* Page Scan Register  */
 #define N2_PSR 4
 #define WIN16K       0x00
@@ -78,7 +75,6 @@ static char *hw;	/* pointer to hw=xxx command line string */
 #define PSR_DMAEN    0x80
 #define PSR_PAGEBITS 0x0F
 
-
 /* Modem Control Reg */
 #define N2_MCR 6
 #define CLOCK_OUT_PORT1 0x80
@@ -90,7 +86,6 @@ static char *hw;	/* pointer to hw=xxx command line string */
 #define DTR_PORT1       0x02
 #define DTR_PORT0       0x01
 
-
 typedef struct port_s {
 	struct net_device *dev;
 	struct card_s *card;
@@ -108,8 +103,6 @@ typedef struct port_s {
 	u8 log_node;		/* logical port # */
 }port_t;
 
-
-
 typedef struct card_s {
 	u8 __iomem *winbase;		/* ISA window base address */
 	u32 phy_winbase;	/* ISA physical base address */
@@ -124,11 +117,9 @@ typedef struct card_s {
 	struct card_s *next_card;
 }card_t;
 
-
 static card_t *first_card;
 static card_t **new_card = &first_card;
 
-
 #define sca_reg(reg, card) (0x8000 | (card)->io | \
 			    ((reg) & 0x0F) | (((reg) & 0xF0) << 6))
 #define sca_in(reg, card)		inb(sca_reg(reg, card))
@@ -144,23 +135,19 @@ static card_t **new_card = &first_card;
 #define get_port(card, port)		((card)->ports[port].valid ? \
 					 &(card)->ports[port] : NULL)
 
-
 static __inline__ u8 sca_get_page(card_t *card)
 {
 	return inb(card->io + N2_PSR) & PSR_PAGEBITS;
 }
 
-
 static __inline__ void openwin(card_t *card, u8 page)
 {
 	u8 psr = inb(card->io + N2_PSR);
 	outb((psr & ~PSR_PAGEBITS) | page, card->io + N2_PSR);
 }
 
-
 #include "hd64570.c"
 
-
 static void n2_set_iface(port_t *port)
 {
 	card_t *card = port->card;
@@ -203,8 +190,6 @@ static void n2_set_iface(port_t *port)
 	sca_set_port(port);
 }
 
-
-
 static int n2_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -226,8 +211,6 @@ static int n2_open(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int n2_close(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -241,8 +224,6 @@ static int n2_close(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	const size_t size = sizeof(sync_serial_settings);
@@ -295,8 +276,6 @@ static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	}
 }
 
-
-
 static void n2_destroy_card(card_t *card)
 {
 	int cnt;
@@ -486,8 +465,6 @@ static int __init n2_run(unsigned long io, unsigned long irq,
 	return 0;
 }
 
-
-
 static int __init n2_init(void)
 {
 	if (hw==NULL) {
@@ -539,7 +516,6 @@ static int __init n2_init(void)
 	return first_card ? 0 : -EINVAL;
 }
 
-
 static void __exit n2_cleanup(void)
 {
 	card_t *card = first_card;
@@ -551,7 +527,6 @@ static void __exit n2_cleanup(void)
 	}
 }
 
-
 module_init(n2_init);
 module_exit(n2_cleanup);
 

From 9e7ee10f169f790ee1a7146cf2c0befa970f19a5 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 25 May 2021 22:07:54 +0800
Subject: [PATCH 0538/3292] net: wan: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/n2.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index dd39789ebfa0..5ad8c5032900 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -143,6 +143,7 @@ static __inline__ u8 sca_get_page(card_t *card)
 static __inline__ void openwin(card_t *card, u8 page)
 {
 	u8 psr = inb(card->io + N2_PSR);
+
 	outb((psr & ~PSR_PAGEBITS) | page, card->io + N2_PSR);
 }
 
@@ -283,6 +284,7 @@ static void n2_destroy_card(card_t *card)
 	for (cnt = 0; cnt < 2; cnt++)
 		if (card->ports[cnt].card) {
 			struct net_device *dev = port_to_dev(&card->ports[cnt]);
+
 			unregister_hdlc_device(dev);
 		}
 
@@ -522,6 +524,7 @@ static void __exit n2_cleanup(void)
 
 	while (card) {
 		card_t *ptr = card;
+
 		card = card->next_card;
 		n2_destroy_card(ptr);
 	}

From c4fdef99d17bad52d268ffe7a67b9ed2410cbd34 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 25 May 2021 22:07:55 +0800
Subject: [PATCH 0539/3292] net: wan: fix an code style issue about "foo* bar

Fix the checkpatch error as "foo* bar" and should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/n2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index 5ad8c5032900..180fb2c9a442 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -32,8 +32,8 @@
 #include <asm/io.h>
 #include "hd64570.h"
 
-static const char* version = "SDL RISCom/N2 driver version: 1.15";
-static const char* devname = "RISCom/N2";
+static const char *version = "SDL RISCom/N2 driver version: 1.15";
+static const char *devname = "RISCom/N2";
 
 #undef DEBUG_PKT
 #define DEBUG_RINGS

From 69542276e2b12d2a02af5fb523fe409bd0983f10 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 25 May 2021 22:07:56 +0800
Subject: [PATCH 0540/3292] net: wan: add some required spaces

Add space required after that close brace '}'.
Add space required before the open parenthesis '(' and '{'

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/n2.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index 180fb2c9a442..2f602171cbc6 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -101,7 +101,7 @@ typedef struct port_s {
 	u8 rxs, txs, tmc;	/* SCA registers */
 	u8 phy_node;		/* physical port # - 0 or 1 */
 	u8 log_node;		/* logical port # */
-}port_t;
+} port_t;
 
 typedef struct card_s {
 	u8 __iomem *winbase;		/* ISA window base address */
@@ -115,7 +115,7 @@ typedef struct card_s {
 
 	port_t ports[2];
 	struct card_s *next_card;
-}card_t;
+} card_t;
 
 static card_t *first_card;
 static card_t **new_card = &first_card;
@@ -158,7 +158,7 @@ static void n2_set_iface(port_t *port)
 	u8 rxs = port->rxs & CLK_BRG_MASK;
 	u8 txs = port->txs & CLK_BRG_MASK;
 
-	switch(port->settings.clock_type) {
+	switch (port->settings.clock_type) {
 	case CLOCK_INT:
 		mcr |= port->phy_node ? CLOCK_OUT_PORT1 : CLOCK_OUT_PORT0;
 		rxs |= CLK_BRG_RX; /* BRG output */
@@ -241,7 +241,7 @@ static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (cmd != SIOCWANDEV)
 		return hdlc_ioctl(dev, ifr, cmd);
 
-	switch(ifr->ifr_settings.type) {
+	switch (ifr->ifr_settings.type) {
 	case IF_GET_IFACE:
 		ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
 		if (ifr->ifr_settings.size < size) {
@@ -253,7 +253,7 @@ static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return 0;
 
 	case IF_IFACE_SYNC_SERIAL:
-		if(!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
 		if (copy_from_user(&new_line, line, size))
@@ -494,7 +494,7 @@ static int __init n2_init(void)
 
 		if (*hw++ != ',')
 			break;
-		while(1) {
+		while (1) {
 			if (*hw == '0' && !valid[0])
 				valid[0] = 1; /* Port 0 enabled */
 			else if (*hw == '1' && !valid[1])
@@ -512,7 +512,7 @@ static int __init n2_init(void)
 
 		if (*hw == '\x0')
 			return first_card ? 0 : -EINVAL;
-	}while(*hw++ == ':');
+	} while (*hw++ == ':');
 
 	pr_err("invalid hardware parameters\n");
 	return first_card ? 0 : -EINVAL;

From 2aea27bae89b14e624bdc53a5e5af5a004e68058 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 25 May 2021 22:07:57 +0800
Subject: [PATCH 0541/3292] net: wan: replace comparison to NULL with "!card"

According to the chackpatch.pl, comparison to NULL could
be written "!card".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/n2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index 2f602171cbc6..76ef808c2769 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -335,7 +335,7 @@ static int __init n2_run(unsigned long io, unsigned long irq,
 	}
 
 	card = kzalloc(sizeof(card_t), GFP_KERNEL);
-	if (card == NULL)
+	if (!card)
 		return -ENOBUFS;
 
 	card->ports[0].dev = alloc_hdlcdev(&card->ports[0]);
@@ -469,7 +469,7 @@ static int __init n2_run(unsigned long io, unsigned long irq,
 
 static int __init n2_init(void)
 {
-	if (hw==NULL) {
+	if (!hw) {
 #ifdef MODULE
 		pr_info("no card initialized\n");
 #endif

From 30cbb0107e98308a5cb1a27b4c8d4049c07290aa Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 25 May 2021 22:07:58 +0800
Subject: [PATCH 0542/3292] net: wan: add spaces required around that ':' and
 '+'

This patch adds spaces required around that ':' and '+'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/n2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c
index 76ef808c2769..bdb6dc2409bc 100644
--- a/drivers/net/wan/n2.c
+++ b/drivers/net/wan/n2.c
@@ -195,7 +195,8 @@ static int n2_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
 	int io = port->card->io;
-	u8 mcr = inb(io + N2_MCR) | (port->phy_node ? TX422_PORT1:TX422_PORT0);
+	u8 mcr = inb(io + N2_MCR) |
+		(port->phy_node ? TX422_PORT1 : TX422_PORT0);
 	int result;
 
 	result = hdlc_open(dev);
@@ -216,7 +217,8 @@ static int n2_close(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
 	int io = port->card->io;
-	u8 mcr = inb(io+N2_MCR) | (port->phy_node ? TX422_PORT1 : TX422_PORT0);
+	u8 mcr = inb(io + N2_MCR) |
+		(port->phy_node ? TX422_PORT1 : TX422_PORT0);
 
 	sca_close(dev);
 	mcr |= port->phy_node ? DTR_PORT1 : DTR_PORT0; /* set DTR OFF */

From e4e92ee78702b13ad55118d8b66f06e1aef62586 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 25 May 2021 18:31:18 +0200
Subject: [PATCH 0543/3292] net: wwan: core: Add WWAN device index sysfs
 attribute

Add index sysfs attribute for WWAN devices. This index is used to
uniquely indentify and reference a WWAN device. 'index' is the
attribute name that other device classes use (wireless, v4l2-dev,
rfkill, etc...).

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 92a8a6ffc567..6e8f19c71a9e 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -63,6 +63,20 @@ struct wwan_port {
 	wait_queue_head_t waitqueue;
 };
 
+static ssize_t index_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct wwan_device *wwan = to_wwan_dev(dev);
+
+	return sprintf(buf, "%d\n", wwan->id);
+}
+static DEVICE_ATTR_RO(index);
+
+static struct attribute *wwan_dev_attrs[] = {
+	&dev_attr_index.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(wwan_dev);
+
 static void wwan_dev_destroy(struct device *dev)
 {
 	struct wwan_device *wwandev = to_wwan_dev(dev);
@@ -74,6 +88,7 @@ static void wwan_dev_destroy(struct device *dev)
 static const struct device_type wwan_dev_type = {
 	.name    = "wwan_dev",
 	.release = wwan_dev_destroy,
+	.groups = wwan_dev_groups,
 };
 
 static int wwan_dev_parent_match(struct device *dev, const void *parent)

From 5981881d21dff612abf8fce484f8efa67f49aae4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 24 May 2021 20:59:31 -0700
Subject: [PATCH 0544/3292] libbpf: Add libbpf_set_strict_mode() API to turn on
 libbpf 1.0 behaviors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add libbpf_set_strict_mode() API that allows application to simulate libbpf
1.0 breaking changes before libbpf 1.0 is released. This will help users
migrate gradually and with confidence.

For now only ALL or NONE options are available, subsequent patches will add
more flags. This patch is preliminary for selftests/bpf changes.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210525035935.1461796-2-andrii@kernel.org
---
 tools/lib/bpf/Makefile        |  1 +
 tools/lib/bpf/libbpf.c        | 17 +++++++++++++
 tools/lib/bpf/libbpf.h        |  1 +
 tools/lib/bpf/libbpf.map      |  5 ++++
 tools/lib/bpf/libbpf_legacy.h | 47 +++++++++++++++++++++++++++++++++++
 5 files changed, 71 insertions(+)
 create mode 100644 tools/lib/bpf/libbpf_legacy.h

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e43e1896cb4b..15420303cf06 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -229,6 +229,7 @@ install_headers: $(BPF_HELPER_DEFS)
 		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,btf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
+		$(call do_install,libbpf_legacy.h,$(prefix)/include/bpf,644); \
 		$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
 		$(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b396e45b17ea..1425d7ed0f2f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -151,6 +151,23 @@ static inline __u64 ptr_to_u64(const void *ptr)
 	return (__u64) (unsigned long) ptr;
 }
 
+/* this goes away in libbpf 1.0 */
+enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
+
+int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
+{
+	/* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
+	 * get all possible values we compensate last +1, and then (2*x - 1)
+	 * to get the bit mask
+	 */
+	if (mode != LIBBPF_STRICT_ALL
+	    && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
+		return errno = EINVAL, -EINVAL;
+
+	libbpf_mode = mode;
+	return 0;
+}
+
 enum kern_feature_id {
 	/* v4.14: kernel support for program & map names. */
 	FEAT_PROG_NAME,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index d98523558f39..6e61342ba56c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -18,6 +18,7 @@
 #include <linux/bpf.h>
 
 #include "libbpf_common.h"
+#include "libbpf_legacy.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 0229e01e8ccc..bbe99b1db1a9 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -370,3 +370,8 @@ LIBBPF_0.4.0 {
 		bpf_tc_hook_destroy;
 		bpf_tc_query;
 } LIBBPF_0.3.0;
+
+LIBBPF_0.5.0 {
+	global:
+		libbpf_set_strict_mode;
+} LIBBPF_0.4.0;
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
new file mode 100644
index 000000000000..7482cfe22ab2
--- /dev/null
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0])
+ *
+ *   [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY
+ *
+ * Copyright (C) 2021 Facebook
+ */
+#ifndef __LIBBPF_LEGACY_BPF_H
+#define __LIBBPF_LEGACY_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum libbpf_strict_mode {
+	/* Turn on all supported strict features of libbpf to simulate libbpf
+	 * v1.0 behavior.
+	 * This will be the default behavior in libbpf v1.0.
+	 */
+	LIBBPF_STRICT_ALL = 0xffffffff,
+
+	/*
+	 * Disable any libbpf 1.0 behaviors. This is the default before libbpf
+	 * v1.0. It won't be supported anymore in v1.0, please update your
+	 * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0.
+	 */
+	LIBBPF_STRICT_NONE = 0x00,
+
+	__LIBBPF_STRICT_LAST,
+};
+
+LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LEGACY_BPF_H */

From bad2e478af3b4df9fd84b4db7779ea91bd618c16 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 24 May 2021 20:59:32 -0700
Subject: [PATCH 0545/3292] selftests/bpf: Turn on libbpf 1.0 mode and fix all
 IS_ERR checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turn ony libbpf 1.0 mode. Fix all the explicit IS_ERR checks that now will be
broken because libbpf returns NULL on error (and sets errno). Fix
ASSERT_OK_PTR and ASSERT_ERR_PTR to work for both old mode and new modes and
use them throughout selftests. This is trivial to do by using
libbpf_get_error() API that all libbpf users are supposed to use, instead of
IS_ERR checks.

A bunch of checks also did explicit -1 comparison for various fd-returning
APIs. Such checks are replaced with >= 0 or < 0 cases.

There were also few misuses of bpf_object__find_map_by_name() in test_maps.
Those are fixed in this patch as well.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210525035935.1461796-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/bench.c           |   1 +
 .../selftests/bpf/benchs/bench_rename.c       |   2 +-
 .../selftests/bpf/benchs/bench_ringbufs.c     |   6 +-
 .../selftests/bpf/benchs/bench_trigger.c      |   2 +-
 .../selftests/bpf/prog_tests/attach_probe.c   |  12 +-
 .../selftests/bpf/prog_tests/bpf_iter.c       |  31 ++--
 .../selftests/bpf/prog_tests/bpf_tcp_ca.c     |   8 +-
 tools/testing/selftests/bpf/prog_tests/btf.c  |  93 +++++-----
 .../selftests/bpf/prog_tests/btf_dump.c       |   8 +-
 .../selftests/bpf/prog_tests/btf_write.c      |   4 +-
 .../bpf/prog_tests/cg_storage_multi.c         |  84 +++------
 .../bpf/prog_tests/cgroup_attach_multi.c      |   2 +-
 .../selftests/bpf/prog_tests/cgroup_link.c    |  14 +-
 .../bpf/prog_tests/cgroup_skb_sk_lookup.c     |   2 +-
 .../selftests/bpf/prog_tests/check_mtu.c      |   2 +-
 .../selftests/bpf/prog_tests/core_reloc.c     |  15 +-
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c  |  25 +--
 .../selftests/bpf/prog_tests/flow_dissector.c |   2 +-
 .../bpf/prog_tests/flow_dissector_reattach.c  |  10 +-
 .../bpf/prog_tests/get_stack_raw_tp.c         |  10 +-
 .../prog_tests/get_stackid_cannot_attach.c    |   9 +-
 .../selftests/bpf/prog_tests/hashmap.c        |   9 +-
 .../selftests/bpf/prog_tests/kfree_skb.c      |  19 +-
 .../selftests/bpf/prog_tests/ksyms_btf.c      |   3 +-
 .../selftests/bpf/prog_tests/link_pinning.c   |   7 +-
 .../selftests/bpf/prog_tests/obj_name.c       |   8 +-
 .../selftests/bpf/prog_tests/perf_branches.c  |   4 +-
 .../selftests/bpf/prog_tests/perf_buffer.c    |   2 +-
 .../bpf/prog_tests/perf_event_stackmap.c      |   3 +-
 .../selftests/bpf/prog_tests/probe_user.c     |   7 +-
 .../selftests/bpf/prog_tests/prog_run_xattr.c |   4 +-
 .../bpf/prog_tests/raw_tp_test_run.c          |   4 +-
 .../selftests/bpf/prog_tests/rdonly_maps.c    |   7 +-
 .../bpf/prog_tests/reference_tracking.c       |   2 +-
 .../selftests/bpf/prog_tests/resolve_btfids.c |   2 +-
 .../selftests/bpf/prog_tests/ringbuf_multi.c  |   2 +-
 .../bpf/prog_tests/select_reuseport.c         |  53 +++---
 .../selftests/bpf/prog_tests/send_signal.c    |   3 +-
 .../selftests/bpf/prog_tests/sk_lookup.c      |   2 +-
 .../selftests/bpf/prog_tests/sock_fields.c    |  14 +-
 .../selftests/bpf/prog_tests/sockmap_basic.c  |   8 +-
 .../selftests/bpf/prog_tests/sockmap_ktls.c   |   2 +-
 .../selftests/bpf/prog_tests/sockmap_listen.c |  10 +-
 .../bpf/prog_tests/stacktrace_build_id_nmi.c  |   3 +-
 .../selftests/bpf/prog_tests/stacktrace_map.c |   2 +-
 .../bpf/prog_tests/stacktrace_map_raw_tp.c    |   5 +-
 .../bpf/prog_tests/tcp_hdr_options.c          |  15 +-
 .../selftests/bpf/prog_tests/test_overhead.c  |  12 +-
 .../bpf/prog_tests/trampoline_count.c         |  14 +-
 .../selftests/bpf/prog_tests/udp_limit.c      |   7 +-
 .../selftests/bpf/prog_tests/xdp_bpf2bpf.c    |   2 +-
 .../selftests/bpf/prog_tests/xdp_link.c       |   8 +-
 tools/testing/selftests/bpf/test_maps.c       | 168 +++++++++---------
 tools/testing/selftests/bpf/test_progs.c      |   3 +
 tools/testing/selftests/bpf/test_progs.h      |   9 +-
 .../selftests/bpf/test_tcpnotify_user.c       |   7 +-
 56 files changed, 347 insertions(+), 425 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 332ed2f7b402..6ea15b93a2f8 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -43,6 +43,7 @@ void setup_libbpf()
 {
 	int err;
 
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 	libbpf_set_print(libbpf_print_fn);
 
 	err = bump_memlock_rlimit();
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index a967674098ad..c7ec114eca56 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog)
 	struct bpf_link *link;
 
 	link = bpf_program__attach(prog);
-	if (IS_ERR(link)) {
+	if (!link) {
 		fprintf(stderr, "failed to attach program!\n");
 		exit(1);
 	}
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index bde6c9d4cbd4..d167bffac679 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup()
 	}
 
 	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
-	if (IS_ERR(link)) {
+	if (!link) {
 		fprintf(stderr, "failed to attach program!\n");
 		exit(1);
 	}
@@ -271,7 +271,7 @@ static void ringbuf_custom_setup()
 	}
 
 	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
-	if (IS_ERR(link)) {
+	if (!link) {
 		fprintf(stderr, "failed to attach program\n");
 		exit(1);
 	}
@@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup()
 	}
 
 	link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
-	if (IS_ERR(link)) {
+	if (!link) {
 		fprintf(stderr, "failed to attach program\n");
 		exit(1);
 	}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 2a0b6c9885a4..f41a491a8cc0 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog)
 	struct bpf_link *link;
 
 	link = bpf_program__attach(prog);
-	if (IS_ERR(link)) {
+	if (!link) {
 		fprintf(stderr, "failed to attach program!\n");
 		exit(1);
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 9dc4e3dfbcf3..ec11e20d2b92 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -85,16 +85,14 @@ void test_attach_probe(void)
 	kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
 						 false /* retprobe */,
 						 SYS_NANOSLEEP_KPROBE_NAME);
-	if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
-		  "err %ld\n", PTR_ERR(kprobe_link)))
+	if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
 		goto cleanup;
 	skel->links.handle_kprobe = kprobe_link;
 
 	kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
 						    true /* retprobe */,
 						    SYS_NANOSLEEP_KPROBE_NAME);
-	if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
-		  "err %ld\n", PTR_ERR(kretprobe_link)))
+	if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
 		goto cleanup;
 	skel->links.handle_kretprobe = kretprobe_link;
 
@@ -103,8 +101,7 @@ void test_attach_probe(void)
 						 0 /* self pid */,
 						 "/proc/self/exe",
 						 uprobe_offset);
-	if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
-		  "err %ld\n", PTR_ERR(uprobe_link)))
+	if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
 		goto cleanup;
 	skel->links.handle_uprobe = uprobe_link;
 
@@ -113,8 +110,7 @@ void test_attach_probe(void)
 						    -1 /* any pid */,
 						    "/proc/self/exe",
 						    uprobe_offset);
-	if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
-		  "err %ld\n", PTR_ERR(uretprobe_link)))
+	if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
 		goto cleanup;
 	skel->links.handle_uretprobe = uretprobe_link;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 2d3590cfb5e1..1f1aade56504 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -47,7 +47,7 @@ static void do_dummy_read(struct bpf_program *prog)
 	int iter_fd, len;
 
 	link = bpf_program__attach_iter(prog, NULL);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		return;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -201,7 +201,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 	int ret = 0;
 
 	link = bpf_program__attach_iter(prog, NULL);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		return ret;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -396,7 +396,7 @@ static void test_file_iter(void)
 		return;
 
 	link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	/* unlink this path if it exists. */
@@ -502,7 +502,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	skel->bss->map2_id = map_info.id;
 
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto free_map2;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -607,14 +607,12 @@ static void test_bpf_hash_map(void)
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-	if (CHECK(!IS_ERR(link), "attach_iter",
-		  "attach_iter for hashmap2 unexpected succeeded\n"))
+	if (!ASSERT_ERR_PTR(link, "attach_iter"))
 		goto out;
 
 	linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-	if (CHECK(!IS_ERR(link), "attach_iter",
-		  "attach_iter for hashmap3 unexpected succeeded\n"))
+	if (!ASSERT_ERR_PTR(link, "attach_iter"))
 		goto out;
 
 	/* hashmap1 should be good, update map values here */
@@ -636,7 +634,7 @@ static void test_bpf_hash_map(void)
 
 	linfo.map.map_fd = map_fd;
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -727,7 +725,7 @@ static void test_bpf_percpu_hash_map(void)
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -798,7 +796,7 @@ static void test_bpf_array_map(void)
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -894,7 +892,7 @@ static void test_bpf_percpu_array_map(void)
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -957,7 +955,7 @@ static void test_bpf_sk_storage_delete(void)
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
 					&opts);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1075,7 +1073,7 @@ static void test_bpf_sk_storage_map(void)
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1128,7 +1126,7 @@ static void test_rdonly_buf_out_of_bound(void)
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-	if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+	if (!ASSERT_ERR_PTR(link, "attach_iter"))
 		bpf_link__destroy(link);
 
 	bpf_iter_test_kern5__destroy(skel);
@@ -1186,8 +1184,7 @@ static void test_task_vma(void)
 	skel->links.proc_maps = bpf_program__attach_iter(
 		skel->progs.proc_maps, NULL);
 
-	if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
-		  "attach iterator failed\n")) {
+	if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
 		skel->links.proc_maps = NULL;
 		goto out;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e25917f04602..efe1e979affb 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -82,7 +82,7 @@ static void *server(void *arg)
 	      bytes, total_bytes, nr_sent, errno);
 
 done:
-	if (fd != -1)
+	if (fd >= 0)
 		close(fd);
 	if (err) {
 		WRITE_ONCE(stop, 1);
@@ -191,8 +191,7 @@ static void test_cubic(void)
 		return;
 
 	link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
-	if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
-		  PTR_ERR(link))) {
+	if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
 		bpf_cubic__destroy(cubic_skel);
 		return;
 	}
@@ -213,8 +212,7 @@ static void test_dctcp(void)
 		return;
 
 	link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
-	if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
-		  PTR_ERR(link))) {
+	if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
 		bpf_dctcp__destroy(dctcp_skel);
 		return;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 0457ae32b270..857e3f26086f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num)
 			      always_log);
 	free(raw_btf);
 
-	err = ((btf_fd == -1) != test->btf_load_err);
+	err = ((btf_fd < 0) != test->btf_load_err);
 	if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
 		  btf_fd, test->btf_load_err) ||
 	    CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num)
 		goto done;
 	}
 
-	if (err || btf_fd == -1)
+	if (err || btf_fd < 0)
 		goto done;
 
 	create_attr.name = test->map_name;
@@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num)
 
 	map_fd = bpf_create_map_xattr(&create_attr);
 
-	err = ((map_fd == -1) != test->map_create_err);
+	err = ((map_fd < 0) != test->map_create_err);
 	CHECK(err, "map_fd:%d test->map_create_err:%u",
 	      map_fd, test->map_create_err);
 
 done:
 	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
-	if (btf_fd != -1)
+	if (btf_fd >= 0)
 		close(btf_fd);
-	if (map_fd != -1)
+	if (map_fd >= 0)
 		close(map_fd);
 }
 
@@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num)
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
 			      always_log);
-	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+	if (CHECK(btf_fd < 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -3987,7 +3987,7 @@ done:
 	free(raw_btf);
 	free(user_btf);
 
-	if (btf_fd != -1)
+	if (btf_fd >= 0)
 		close(btf_fd);
 
 	return err;
@@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num)
 	btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
 				 btf_log_buf, BTF_LOG_BUF_SIZE,
 				 always_log);
-	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+	if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num)
 	}
 
 	btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
-	if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+	if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num)
 	create_attr.btf_value_type_id = 2;
 
 	map_fd = bpf_create_map_xattr(&create_attr);
-	if (CHECK(map_fd == -1, "errno:%d", errno)) {
+	if (CHECK(map_fd < 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num)
 
 	/* Test BTF ID is removed from the kernel */
 	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
-	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+	if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num)
 	close(map_fd);
 	map_fd = -1;
 	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
-	if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+	if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
 		err = -1;
 		goto done;
 	}
@@ -4117,11 +4117,11 @@ done:
 		fprintf(stderr, "\n%s", btf_log_buf);
 
 	free(raw_btf);
-	if (map_fd != -1)
+	if (map_fd >= 0)
 		close(map_fd);
 	for (i = 0; i < 2; i++) {
 		free(user_btf[i]);
-		if (btf_fd[i] != -1)
+		if (btf_fd[i] >= 0)
 			close(btf_fd[i]);
 	}
 
@@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num)
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
 			      always_log);
-	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+	if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -4212,7 +4212,7 @@ done:
 	free(raw_btf);
 	free(user_btf);
 
-	if (btf_fd != -1)
+	if (btf_fd >= 0)
 		close(btf_fd);
 }
 
@@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num)
 		return;
 
 	btf = btf__parse_elf(test->file, &btf_ext);
-	if (IS_ERR(btf)) {
-		if (PTR_ERR(btf) == -ENOENT) {
+	err = libbpf_get_error(btf);
+	if (err) {
+		if (err == -ENOENT) {
 			printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
 			test__skip();
 			return;
@@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num)
 	btf_ext__free(btf_ext);
 
 	obj = bpf_object__open(test->file);
-	if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+	err = libbpf_get_error(obj);
+	if (CHECK(err, "obj: %d", err))
 		return;
 
 	prog = bpf_program__next(NULL, obj);
@@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num)
 	info_len = sizeof(struct bpf_prog_info);
 	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
 
-	if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+	if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
 		err = -1;
 		goto done;
@@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num)
 
 	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
 
-	if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+	if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
 		err = -1;
 		goto done;
@@ -4886,7 +4888,7 @@ static void do_test_pprint(int test_num)
 			      always_log);
 	free(raw_btf);
 
-	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+	if (CHECK(btf_fd < 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -4901,7 +4903,7 @@ static void do_test_pprint(int test_num)
 	create_attr.btf_value_type_id = test->value_type_id;
 
 	map_fd = bpf_create_map_xattr(&create_attr);
-	if (CHECK(map_fd == -1, "errno:%d", errno)) {
+	if (CHECK(map_fd < 0, "errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -4982,7 +4984,7 @@ static void do_test_pprint(int test_num)
 
 					err = check_line(expected_line, nexpected_line,
 							 sizeof(expected_line), line);
-					if (err == -1)
+					if (err < 0)
 						goto done;
 				}
 
@@ -4998,7 +5000,7 @@ static void do_test_pprint(int test_num)
 								  cpu, cmapv);
 			err = check_line(expected_line, nexpected_line,
 					 sizeof(expected_line), line);
-			if (err == -1)
+			if (err < 0)
 				goto done;
 
 			cmapv = cmapv + rounded_value_size;
@@ -5036,9 +5038,9 @@ done:
 		fprintf(stderr, "OK");
 	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
-	if (btf_fd != -1)
+	if (btf_fd >= 0)
 		close(btf_fd);
-	if (map_fd != -1)
+	if (map_fd >= 0)
 		close(map_fd);
 	if (pin_file)
 		fclose(pin_file);
@@ -5950,7 +5952,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
 	/* get necessary lens */
 	info_len = sizeof(struct bpf_prog_info);
 	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-	if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+	if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
 		return -1;
 	}
@@ -5980,7 +5982,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
 	info.func_info_rec_size = rec_size;
 	info.func_info = ptr_to_u64(func_info);
 	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-	if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+	if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
 		err = -1;
 		goto done;
@@ -6044,7 +6046,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
 
 	info_len = sizeof(struct bpf_prog_info);
 	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-	if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+	if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
 		err = -1;
 		goto done;
 	}
@@ -6123,7 +6125,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
 	 * Only recheck the info.*line_info* fields.
 	 * Other fields are not the concern of this test.
 	 */
-	if (CHECK(err == -1 ||
+	if (CHECK(err < 0 ||
 		  info.nr_line_info != cnt ||
 		  (jited_cnt && !info.jited_line_info) ||
 		  info.nr_jited_line_info != jited_cnt ||
@@ -6260,7 +6262,7 @@ static void do_test_info_raw(unsigned int test_num)
 			      always_log);
 	free(raw_btf);
 
-	if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+	if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
 		err = -1;
 		goto done;
 	}
@@ -6273,7 +6275,8 @@ static void do_test_info_raw(unsigned int test_num)
 	patched_linfo = patch_name_tbd(test->line_info,
 				       test->str_sec, linfo_str_off,
 				       test->str_sec_size, &linfo_size);
-	if (IS_ERR(patched_linfo)) {
+	err = libbpf_get_error(patched_linfo);
+	if (err) {
 		fprintf(stderr, "error in creating raw bpf_line_info");
 		err = -1;
 		goto done;
@@ -6297,7 +6300,7 @@ static void do_test_info_raw(unsigned int test_num)
 	}
 
 	prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
-	err = ((prog_fd == -1) != test->expected_prog_load_failure);
+	err = ((prog_fd < 0) != test->expected_prog_load_failure);
 	if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
 		  prog_fd, test->expected_prog_load_failure, errno) ||
 	    CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6306,7 +6309,7 @@ static void do_test_info_raw(unsigned int test_num)
 		goto done;
 	}
 
-	if (prog_fd == -1)
+	if (prog_fd < 0)
 		goto done;
 
 	err = test_get_finfo(test, prog_fd);
@@ -6323,12 +6326,12 @@ done:
 	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
 
-	if (btf_fd != -1)
+	if (btf_fd >= 0)
 		close(btf_fd);
-	if (prog_fd != -1)
+	if (prog_fd >= 0)
 		close(prog_fd);
 
-	if (!IS_ERR(patched_linfo))
+	if (!libbpf_get_error(patched_linfo))
 		free(patched_linfo);
 }
 
@@ -6839,9 +6842,9 @@ static void do_test_dedup(unsigned int test_num)
 		return;
 
 	test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+	err = libbpf_get_error(test_btf);
 	free(raw_btf);
-	if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
-		  PTR_ERR(test_btf))) {
+	if (CHECK(err, "invalid test_btf errno:%d", err)) {
 		err = -1;
 		goto done;
 	}
@@ -6853,9 +6856,9 @@ static void do_test_dedup(unsigned int test_num)
 	if (!raw_btf)
 		return;
 	expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+	err = libbpf_get_error(expect_btf);
 	free(raw_btf);
-	if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
-		  PTR_ERR(expect_btf))) {
+	if (CHECK(err, "invalid expect_btf errno:%d", err)) {
 		err = -1;
 		goto done;
 	}
@@ -6966,10 +6969,8 @@ static void do_test_dedup(unsigned int test_num)
 	}
 
 done:
-	if (!IS_ERR(test_btf))
-		btf__free(test_btf);
-	if (!IS_ERR(expect_btf))
-		btf__free(expect_btf);
+	btf__free(test_btf);
+	btf__free(expect_btf);
 }
 
 void test_btf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 5e129dc2073c..1b90e684ff13 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf,
 	int err = 0, id;
 
 	d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
-	if (IS_ERR(d))
-		return PTR_ERR(d);
+	err = libbpf_get_error(d);
+	if (err)
+		return err;
 
 	for (id = 1; id <= type_cnt; id++) {
 		err = btf_dump__dump_type(d, id);
@@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
 	snprintf(test_file, sizeof(test_file), "%s.o", t->file);
 
 	btf = btf__parse_elf(test_file, NULL);
-	if (CHECK(IS_ERR(btf), "btf_parse_elf",
-	    "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+	if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
 		err = -PTR_ERR(btf);
 		btf = NULL;
 		goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index f36da15b134f..022c7d89d6f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -4,8 +4,6 @@
 #include <bpf/btf.h>
 #include "btf_helpers.h"
 
-static int duration = 0;
-
 void test_btf_write() {
 	const struct btf_var_secinfo *vi;
 	const struct btf_type *t;
@@ -16,7 +14,7 @@ void test_btf_write() {
 	int id, err, str_off;
 
 	btf = btf__new_empty();
-	if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+	if (!ASSERT_OK_PTR(btf, "new_empty"))
 		return;
 
 	str_off = btf__find_str(btf, "int");
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 643dfa35419c..876be0ecb654 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 	 */
 	parent_link = bpf_program__attach_cgroup(obj->progs.egress,
 						 parent_cgroup_fd);
-	if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
-		  "err %ld", PTR_ERR(parent_link)))
+	if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
 		goto close_bpf_object;
 	err = connect_send(CHILD_CGROUP);
 	if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 	 */
 	child_link = bpf_program__attach_cgroup(obj->progs.egress,
 						child_cgroup_fd);
-	if (CHECK(IS_ERR(child_link), "child-cg-attach",
-		  "err %ld", PTR_ERR(child_link)))
+	if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
 		goto close_bpf_object;
 	err = connect_send(CHILD_CGROUP);
 	if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 		goto close_bpf_object;
 
 close_bpf_object:
-	if (!IS_ERR(parent_link))
-		bpf_link__destroy(parent_link);
-	if (!IS_ERR(child_link))
-		bpf_link__destroy(child_link);
+	bpf_link__destroy(parent_link);
+	bpf_link__destroy(child_link);
 
 	cg_storage_multi_egress_only__destroy(obj);
 }
@@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
 	 */
 	parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
 							 parent_cgroup_fd);
-	if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
-		  "err %ld", PTR_ERR(parent_egress1_link)))
+	if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
 		goto close_bpf_object;
 	parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
 							 parent_cgroup_fd);
-	if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
-		  "err %ld", PTR_ERR(parent_egress2_link)))
+	if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
 		goto close_bpf_object;
 	parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
 							 parent_cgroup_fd);
-	if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
-		  "err %ld", PTR_ERR(parent_ingress_link)))
+	if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
 		goto close_bpf_object;
 	err = connect_send(CHILD_CGROUP);
 	if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
 	 */
 	child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
 							child_cgroup_fd);
-	if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
-		  "err %ld", PTR_ERR(child_egress1_link)))
+	if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
 		goto close_bpf_object;
 	child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
 							child_cgroup_fd);
-	if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
-		  "err %ld", PTR_ERR(child_egress2_link)))
+	if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
 		goto close_bpf_object;
 	child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
 							child_cgroup_fd);
-	if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
-		  "err %ld", PTR_ERR(child_ingress_link)))
+	if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
 		goto close_bpf_object;
 	err = connect_send(CHILD_CGROUP);
 	if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
 		goto close_bpf_object;
 
 close_bpf_object:
-	if (!IS_ERR(parent_egress1_link))
-		bpf_link__destroy(parent_egress1_link);
-	if (!IS_ERR(parent_egress2_link))
-		bpf_link__destroy(parent_egress2_link);
-	if (!IS_ERR(parent_ingress_link))
-		bpf_link__destroy(parent_ingress_link);
-	if (!IS_ERR(child_egress1_link))
-		bpf_link__destroy(child_egress1_link);
-	if (!IS_ERR(child_egress2_link))
-		bpf_link__destroy(child_egress2_link);
-	if (!IS_ERR(child_ingress_link))
-		bpf_link__destroy(child_ingress_link);
+	bpf_link__destroy(parent_egress1_link);
+	bpf_link__destroy(parent_egress2_link);
+	bpf_link__destroy(parent_ingress_link);
+	bpf_link__destroy(child_egress1_link);
+	bpf_link__destroy(child_egress2_link);
+	bpf_link__destroy(child_ingress_link);
 
 	cg_storage_multi_isolated__destroy(obj);
 }
@@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
 	 */
 	parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
 							 parent_cgroup_fd);
-	if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
-		  "err %ld", PTR_ERR(parent_egress1_link)))
+	if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
 		goto close_bpf_object;
 	parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
 							 parent_cgroup_fd);
-	if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
-		  "err %ld", PTR_ERR(parent_egress2_link)))
+	if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
 		goto close_bpf_object;
 	parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
 							 parent_cgroup_fd);
-	if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
-		  "err %ld", PTR_ERR(parent_ingress_link)))
+	if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
 		goto close_bpf_object;
 	err = connect_send(CHILD_CGROUP);
 	if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
 	 */
 	child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
 							child_cgroup_fd);
-	if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
-		  "err %ld", PTR_ERR(child_egress1_link)))
+	if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
 		goto close_bpf_object;
 	child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
 							child_cgroup_fd);
-	if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
-		  "err %ld", PTR_ERR(child_egress2_link)))
+	if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
 		goto close_bpf_object;
 	child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
 							child_cgroup_fd);
-	if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
-		  "err %ld", PTR_ERR(child_ingress_link)))
+	if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
 		goto close_bpf_object;
 	err = connect_send(CHILD_CGROUP);
 	if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
 		goto close_bpf_object;
 
 close_bpf_object:
-	if (!IS_ERR(parent_egress1_link))
-		bpf_link__destroy(parent_egress1_link);
-	if (!IS_ERR(parent_egress2_link))
-		bpf_link__destroy(parent_egress2_link);
-	if (!IS_ERR(parent_ingress_link))
-		bpf_link__destroy(parent_ingress_link);
-	if (!IS_ERR(child_egress1_link))
-		bpf_link__destroy(child_egress1_link);
-	if (!IS_ERR(child_egress2_link))
-		bpf_link__destroy(child_egress2_link);
-	if (!IS_ERR(child_ingress_link))
-		bpf_link__destroy(child_ingress_link);
+	bpf_link__destroy(parent_egress1_link);
+	bpf_link__destroy(parent_egress2_link);
+	bpf_link__destroy(parent_ingress_link);
+	bpf_link__destroy(child_egress1_link);
+	bpf_link__destroy(child_egress2_link);
+	bpf_link__destroy(child_ingress_link);
 
 	cg_storage_multi_shared__destroy(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 0a1fc9816cef..20bb8831dda6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
 	prog_cnt = 2;
 	CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
 				  BPF_F_QUERY_EFFECTIVE, &attach_flags,
-				  prog_ids, &prog_cnt) != -1);
+				  prog_ids, &prog_cnt) >= 0);
 	CHECK_FAIL(errno != ENOSPC);
 	CHECK_FAIL(prog_cnt != 4);
 	/* check that prog_ids are returned even when buffer is too small */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 736796e56ed1..9091524131d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -65,8 +65,7 @@ void test_cgroup_link(void)
 	for (i = 0; i < cg_nr; i++) {
 		links[i] = bpf_program__attach_cgroup(skel->progs.egress,
 						      cgs[i].fd);
-		if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
-				 i, PTR_ERR(links[i])))
+		if (!ASSERT_OK_PTR(links[i], "cg_attach"))
 			goto cleanup;
 	}
 
@@ -121,8 +120,7 @@ void test_cgroup_link(void)
 
 	links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
 						    cgs[last_cg].fd);
-	if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
-		  PTR_ERR(links[last_cg])))
+	if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
 		goto cleanup;
 
 	ping_and_check(cg_nr + 1, 0);
@@ -147,7 +145,7 @@ void test_cgroup_link(void)
 	/* attempt to mix in with multi-attach bpf_link */
 	tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
 					      cgs[last_cg].fd);
-	if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+	if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
 		bpf_link__destroy(tmp_link);
 		goto cleanup;
 	}
@@ -165,8 +163,7 @@ void test_cgroup_link(void)
 	/* attach back link-based one */
 	links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
 						    cgs[last_cg].fd);
-	if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
-		  PTR_ERR(links[last_cg])))
+	if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
 		goto cleanup;
 
 	ping_and_check(cg_nr, 0);
@@ -249,8 +246,7 @@ cleanup:
 				 BPF_CGROUP_INET_EGRESS);
 
 	for (i = 0; i < cg_nr; i++) {
-		if (!IS_ERR(links[i]))
-			bpf_link__destroy(links[i]);
+		bpf_link__destroy(links[i]);
 	}
 	test_cgroup_link__destroy(skel);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 464edc1c1708..b9dc4ec655b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
 		goto cleanup;
 
 	link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
-	if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "cgroup_attach"))
 		goto cleanup;
 
 	run_lookup_test(&skel->bss->g_serv_port, out_sk);
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index b62a39315336..012068f33a0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void)
 	prog = skel->progs.xdp_use_helper_basic;
 
 	link = bpf_program__attach_xdp(prog, IFINDEX_LO);
-	if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "link_attach"))
 		goto out;
 	skel->links.xdp_use_helper_basic = link;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 607710826dca..d02e064c535f 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
 	const char *name;
 	int i;
 
-	if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
-	    CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+	if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
 		btf__free(local_btf);
 		btf__free(targ_btf);
 		return -EINVAL;
@@ -848,8 +847,7 @@ void test_core_reloc(void)
 		}
 
 		obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
-		if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
-			  test_case->bpf_obj_file, PTR_ERR(obj)))
+		if (!ASSERT_OK_PTR(obj, "obj_open"))
 			continue;
 
 		probe_name = "raw_tracepoint/sys_enter";
@@ -899,8 +897,7 @@ void test_core_reloc(void)
 		data->my_pid_tgid = my_pid_tgid;
 
 		link = bpf_program__attach_raw_tracepoint(prog, tp_name);
-		if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
-			  PTR_ERR(link)))
+		if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
 			goto cleanup;
 
 		/* trigger test run */
@@ -941,10 +938,8 @@ cleanup:
 			CHECK_FAIL(munmap(mmap_data, mmap_sz));
 			mmap_data = NULL;
 		}
-		if (!IS_ERR_OR_NULL(link)) {
-			bpf_link__destroy(link);
-			link = NULL;
-		}
+		bpf_link__destroy(link);
+		link = NULL;
 		bpf_object__close(obj);
 	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 63990842d20f..73b4c76e6b86 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 
 close_prog:
 	for (i = 0; i < prog_cnt; i++)
-		if (!IS_ERR_OR_NULL(link[i]))
-			bpf_link__destroy(link[i]);
-	if (!IS_ERR_OR_NULL(obj))
-		bpf_object__close(obj);
+		bpf_link__destroy(link[i]);
+	bpf_object__close(obj);
 	bpf_object__close(tgt_obj);
 	free(link);
 	free(prog);
@@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj)
 		return err;
 
 	link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
-	if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+	if (!ASSERT_OK_PTR(link, "second_link"))
 		goto out;
 
 	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
@@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void)
 	opts.attach_prog_fd = pkt_fd;
 
 	freplace_obj = bpf_object__open_file(freplace_name, &opts);
-	if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
-		  "failed to open %s: %ld\n", freplace_name,
-		  PTR_ERR(freplace_obj)))
+	if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
 		goto out;
 
 	err = bpf_object__load(freplace_obj);
@@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void)
 
 	prog = bpf_program__next(NULL, freplace_obj);
 	freplace_link = bpf_program__attach_trace(prog);
-	if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+	if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
 		goto out;
 
 	opts.attach_prog_fd = bpf_program__fd(prog);
 	fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
-	if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
-		  "failed to open %s: %ld\n", fmod_ret_name,
-		  PTR_ERR(fmod_obj)))
+	if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
 		goto out;
 
 	err = bpf_object__load(fmod_obj);
@@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file,
 			   );
 
 	obj = bpf_object__open_file(obj_file, &opts);
-	if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
-		  "failed to open %s: %ld\n", obj_file,
-		  PTR_ERR(obj)))
+	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		goto close_prog;
 
 	/* It should fail to load the program */
@@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file,
 		goto close_prog;
 
 close_prog:
-	if (!IS_ERR_OR_NULL(obj))
-		bpf_object__close(obj);
+	bpf_object__close(obj);
 	bpf_object__close(pkt_obj);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index cd6dc80edf18..225714f71ac6 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
 		return;
 
 	link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
-	if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_netns"))
 		goto out_close;
 
 	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 172c586b6996..3931ede5c534 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
 	/* Expect failure creating link when another link exists */
 	errno = 0;
 	link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
-	if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+	if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
 		perror("bpf_prog_attach(prog2) expected E2BIG");
-	if (link2 != -1)
+	if (link2 >= 0)
 		close(link2);
 	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
 
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
 	/* Expect failure creating link when prog attached */
 	errno = 0;
 	link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
-	if (CHECK_FAIL(link != -1 || errno != EEXIST))
+	if (CHECK_FAIL(link >= 0 || errno != EEXIST))
 		perror("bpf_link_create(prog2) expected EEXIST");
-	if (link != -1)
+	if (link >= 0)
 		close(link);
 	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
 
@@ -623,7 +623,7 @@ static void run_tests(int netns)
 	}
 out_close:
 	for (i = 0; i < ARRAY_SIZE(progs); i++) {
-		if (progs[i] != -1)
+		if (progs[i] >= 0)
 			CHECK_FAIL(close(progs[i]));
 	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 925722217edf..522237aa4470 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void)
 		goto close_prog;
 
 	link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
-	if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
 		goto close_prog;
 
 	pb_opts.sample_cb = get_stack_print_output;
 	pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
-	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+	if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
 		goto close_prog;
 
 	/* trigger some syscall action */
@@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void)
 	}
 
 close_prog:
-	if (!IS_ERR_OR_NULL(link))
-		bpf_link__destroy(link);
-	if (!IS_ERR_OR_NULL(pb))
-		perf_buffer__free(pb);
+	bpf_link__destroy(link);
+	perf_buffer__free(pb);
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index d884b2ed5bc5..8d5a6023a1bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void)
 
 	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
 							   pmu_fd);
-	CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
-	      "should have failed\n");
+	ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
 	close(pmu_fd);
 
 	/* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
@@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void)
 
 	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
 							   pmu_fd);
-	CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
-	      "err: %ld\n", PTR_ERR(skel->links.oncpu));
+	ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
 	close(pmu_fd);
 
 	/* add exclude_callchain_kernel, attach should fail */
@@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void)
 
 	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
 							   pmu_fd);
-	CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
-	      "should have failed\n");
+	ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
 	close(pmu_fd);
 
 cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 428d488830c6..4747ab18f97f 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -48,8 +48,7 @@ static void test_hashmap_generic(void)
 	struct hashmap *map;
 
 	map = hashmap__new(hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "hashmap__new",
-		  "failed to create map: %ld\n", PTR_ERR(map)))
+	if (!ASSERT_OK_PTR(map, "hashmap__new"))
 		return;
 
 	for (i = 0; i < ELEM_CNT; i++) {
@@ -267,8 +266,7 @@ static void test_hashmap_multimap(void)
 
 	/* force collisions */
 	map = hashmap__new(collision_hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "hashmap__new",
-		  "failed to create map: %ld\n", PTR_ERR(map)))
+	if (!ASSERT_OK_PTR(map, "hashmap__new"))
 		return;
 
 	/* set up multimap:
@@ -339,8 +337,7 @@ static void test_hashmap_empty()
 
 	/* force collisions */
 	map = hashmap__new(hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "hashmap__new",
-		  "failed to create map: %ld\n", PTR_ERR(map)))
+	if (!ASSERT_OK_PTR(map, "hashmap__new"))
 		goto cleanup;
 
 	if (CHECK(hashmap__size(map) != 0, "hashmap__size",
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index d65107919998..ddfb6bf97152 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -97,15 +97,13 @@ void test_kfree_skb(void)
 		goto close_prog;
 
 	link = bpf_program__attach_raw_tracepoint(prog, NULL);
-	if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
 		goto close_prog;
 	link_fentry = bpf_program__attach_trace(fentry);
-	if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
-		  PTR_ERR(link_fentry)))
+	if (!ASSERT_OK_PTR(link_fentry, "attach fentry"))
 		goto close_prog;
 	link_fexit = bpf_program__attach_trace(fexit);
-	if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
-		  PTR_ERR(link_fexit)))
+	if (!ASSERT_OK_PTR(link_fexit, "attach fexit"))
 		goto close_prog;
 
 	perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
@@ -116,7 +114,7 @@ void test_kfree_skb(void)
 	pb_opts.sample_cb = on_sample;
 	pb_opts.ctx = &passed;
 	pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
-	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+	if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
 		goto close_prog;
 
 	memcpy(skb.cb, &cb, sizeof(cb));
@@ -144,12 +142,9 @@ void test_kfree_skb(void)
 	CHECK_FAIL(!test_ok[0] || !test_ok[1]);
 close_prog:
 	perf_buffer__free(pb);
-	if (!IS_ERR_OR_NULL(link))
-		bpf_link__destroy(link);
-	if (!IS_ERR_OR_NULL(link_fentry))
-		bpf_link__destroy(link_fentry);
-	if (!IS_ERR_OR_NULL(link_fexit))
-		bpf_link__destroy(link_fexit);
+	bpf_link__destroy(link);
+	bpf_link__destroy(link_fentry);
+	bpf_link__destroy(link_fexit);
 	bpf_object__close(obj);
 	bpf_object__close(obj2);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index b58b775d19f3..67bebd324147 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -87,8 +87,7 @@ void test_ksyms_btf(void)
 	struct btf *btf;
 
 	btf = libbpf_find_kernel_btf();
-	if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
-		  PTR_ERR(btf)))
+	if (!ASSERT_OK_PTR(btf, "btf_exists"))
 		return;
 
 	percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
index a743288cf384..6fc97c45f71e 100644
--- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
 	int err, i;
 
 	link = bpf_program__attach(prog);
-	if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "link_attach"))
 		goto cleanup;
 
 	bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
 
 	/* re-open link from BPFFS */
 	link = bpf_link__open(link_pin_path);
-	if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "link_open"))
 		goto cleanup;
 
 	CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
 	CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
 
 cleanup:
-	if (!IS_ERR(link))
-		bpf_link__destroy(link);
+	bpf_link__destroy(link);
 }
 
 void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c
index e178416bddad..6194b776a28b 100644
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -38,13 +38,13 @@ void test_obj_name(void)
 
 		fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
 		CHECK((tests[i].success && fd < 0) ||
-		      (!tests[i].success && fd != -1) ||
+		      (!tests[i].success && fd >= 0) ||
 		      (!tests[i].success && errno != tests[i].expected_errno),
 		      "check-bpf-prog-name",
 		      "fd %d(%d) errno %d(%d)\n",
 		       fd, tests[i].success, errno, tests[i].expected_errno);
 
-		if (fd != -1)
+		if (fd >= 0)
 			close(fd);
 
 		/* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
 		memcpy(attr.map_name, tests[i].name, ncopy);
 		fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
 		CHECK((tests[i].success && fd < 0) ||
-		      (!tests[i].success && fd != -1) ||
+		      (!tests[i].success && fd >= 0) ||
 		      (!tests[i].success && errno != tests[i].expected_errno),
 		      "check-bpf-map-name",
 		      "fd %d(%d) errno %d(%d)\n",
 		      fd, tests[i].success, errno, tests[i].expected_errno);
 
-		if (fd != -1)
+		if (fd >= 0)
 			close(fd);
 	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index e35c444902a7..12c4f45cee1a 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd,
 
 	/* attach perf_event */
 	link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
-	if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_perf_event"))
 		goto out_destroy_skel;
 
 	/* generate some branches on cpu 0 */
@@ -119,7 +119,7 @@ static void test_perf_branches_hw(void)
 	 * Some setups don't support branch records (virtual machines, !x86),
 	 * so skip test in this case.
 	 */
-	if (pfd == -1) {
+	if (pfd < 0) {
 		if (errno == ENOENT || errno == EOPNOTSUPP) {
 			printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
 			       __func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index ca9f0895ec84..6490e9673002 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -80,7 +80,7 @@ void test_perf_buffer(void)
 	pb_opts.sample_cb = on_sample;
 	pb_opts.ctx = &cpu_seen;
 	pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
-	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+	if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
 		goto out_close;
 
 	CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
index 72c3690844fb..33144c9432ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -97,8 +97,7 @@ void test_perf_event_stackmap(void)
 
 	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
 							   pmu_fd);
-	if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
-		  "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+	if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
 		close(pmu_fd);
 		goto cleanup;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 7aecfd9e87d1..95bd12097358 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -15,7 +15,7 @@ void test_probe_user(void)
 	static const int zero = 0;
 
 	obj = bpf_object__open_file(obj_file, &opts);
-	if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
 		return;
 
 	kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
@@ -33,11 +33,8 @@ void test_probe_user(void)
 		goto cleanup;
 
 	kprobe_link = bpf_program__attach(kprobe_prog);
-	if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
-		  "err %ld\n", PTR_ERR(kprobe_link))) {
-		kprobe_link = NULL;
+	if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
 		goto cleanup;
-	}
 
 	memset(&curr, 0, sizeof(curr));
 	in->sin_family = AF_INET;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 131d7f7eeb42..89fc98faf19e 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -46,7 +46,7 @@ void test_prog_run_xattr(void)
 	tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
 
 	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
+	CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
 	      "err %d errno %d retval %d\n", err, errno, tattr.retval);
 
 	CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
@@ -78,6 +78,6 @@ void test_prog_run_xattr(void)
 cleanup:
 	if (skel)
 		test_pkt_access__destroy(skel);
-	if (stats_fd != -1)
+	if (stats_fd >= 0)
 		close(stats_fd);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index c5fb191874ac..41720a62c4fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -77,7 +77,7 @@ void test_raw_tp_test_run(void)
 	/* invalid cpu ID should fail with ENXIO */
 	opts.cpu = 0xffffffff;
 	err = bpf_prog_test_run_opts(prog_fd, &opts);
-	CHECK(err != -1 || errno != ENXIO,
+	CHECK(err >= 0 || errno != ENXIO,
 	      "test_run_opts_fail",
 	      "should failed with ENXIO\n");
 
@@ -85,7 +85,7 @@ void test_raw_tp_test_run(void)
 	opts.cpu = 1;
 	opts.flags = 0;
 	err = bpf_prog_test_run_opts(prog_fd, &opts);
-	CHECK(err != -1 || errno != EINVAL,
+	CHECK(err >= 0 || errno != EINVAL,
 	      "test_run_opts_fail",
 	      "should failed with EINVAL\n");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index 563e12120e77..5f9eaa3ab584 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -30,7 +30,7 @@ void test_rdonly_maps(void)
 	struct bss bss;
 
 	obj = bpf_object__open_file(file, NULL);
-	if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		return;
 
 	err = bpf_object__load(obj);
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
 			goto cleanup;
 
 		link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
-		if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
-			  t->prog_name, PTR_ERR(link))) {
-			link = NULL;
+		if (!ASSERT_OK_PTR(link, "attach_prog"))
 			goto cleanup;
-		}
 
 		/* trigger probe */
 		usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index ac1ee10cffd8..de2688166696 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -15,7 +15,7 @@ void test_reference_tracking(void)
 	int err = 0;
 
 	obj = bpf_object__open_file(file, &open_opts);
-	if (CHECK_FAIL(IS_ERR(obj)))
+	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
 		return;
 
 	if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index d3c2de2c24d1..f62361306f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
-		if (test_symbols[i].id != -1)
+		if (test_symbols[i].id >= 0)
 			continue;
 
 		if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index cef63e703924..167cd8a2edfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -63,7 +63,7 @@ void test_ringbuf_multi(void)
 		goto cleanup;
 
 	proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
-	if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+	if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n"))
 		goto cleanup;
 
 	err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 821b4146b7b6..4efd337d6a3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type)
 	attr.max_entries = REUSEPORT_ARRAY_SIZE;
 
 	reuseport_array = bpf_create_map_xattr(&attr);
-	RET_ERR(reuseport_array == -1, "creating reuseport_array",
+	RET_ERR(reuseport_array < 0, "creating reuseport_array",
 		"reuseport_array:%d errno:%d\n", reuseport_array, errno);
 
 	/* Creating outer_map */
@@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type)
 	attr.max_entries = 1;
 	attr.inner_map_fd = reuseport_array;
 	outer_map = bpf_create_map_xattr(&attr);
-	RET_ERR(outer_map == -1, "creating outer_map",
+	RET_ERR(outer_map < 0, "creating outer_map",
 		"outer_map:%d errno:%d\n", outer_map, errno);
 
 	return 0;
@@ -102,8 +102,9 @@ static int prepare_bpf_obj(void)
 	int err;
 
 	obj = bpf_object__open("test_select_reuseport_kern.o");
-	RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
-		"obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+	err = libbpf_get_error(obj);
+	RET_ERR(err, "open test_select_reuseport_kern.o",
+		"obj:%p PTR_ERR(obj):%d\n", obj, err);
 
 	map = bpf_object__find_map_by_name(obj, "outer_map");
 	RET_ERR(!map, "find outer_map", "!map\n");
@@ -116,31 +117,31 @@ static int prepare_bpf_obj(void)
 	prog = bpf_program__next(NULL, obj);
 	RET_ERR(!prog, "get first bpf_program", "!prog\n");
 	select_by_skb_data_prog = bpf_program__fd(prog);
-	RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+	RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
 		"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
 
 	map = bpf_object__find_map_by_name(obj, "result_map");
 	RET_ERR(!map, "find result_map", "!map\n");
 	result_map = bpf_map__fd(map);
-	RET_ERR(result_map == -1, "get result_map fd",
+	RET_ERR(result_map < 0, "get result_map fd",
 		"result_map:%d\n", result_map);
 
 	map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
 	RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
 	tmp_index_ovr_map = bpf_map__fd(map);
-	RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+	RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
 		"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
 
 	map = bpf_object__find_map_by_name(obj, "linum_map");
 	RET_ERR(!map, "find linum_map", "!map\n");
 	linum_map = bpf_map__fd(map);
-	RET_ERR(linum_map == -1, "get linum_map fd",
+	RET_ERR(linum_map < 0, "get linum_map fd",
 		"linum_map:%d\n", linum_map);
 
 	map = bpf_object__find_map_by_name(obj, "data_check_map");
 	RET_ERR(!map, "find data_check_map", "!map\n");
 	data_check_map = bpf_map__fd(map);
-	RET_ERR(data_check_map == -1, "get data_check_map fd",
+	RET_ERR(data_check_map < 0, "get data_check_map fd",
 		"data_check_map:%d\n", data_check_map);
 
 	return 0;
@@ -237,7 +238,7 @@ static long get_linum(void)
 	int err;
 
 	err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
-	RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+	RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
 		err, errno);
 
 	return linum;
@@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
 	addrlen = sizeof(cli_sa);
 	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
 			  &addrlen);
-	RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+	RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
 	       err, errno);
 
 	err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
-	RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+	RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
 	       err, errno);
 
 	if (type == SOCK_STREAM) {
@@ -347,7 +348,7 @@ static void check_results(void)
 
 	for (i = 0; i < NR_RESULTS; i++) {
 		err = bpf_map_lookup_elem(result_map, &i, &results[i]);
-		RET_IF(err == -1, "lookup_elem(result_map)",
+		RET_IF(err < 0, "lookup_elem(result_map)",
 		       "i:%u err:%d errno:%d\n", i, err, errno);
 	}
 
@@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family)
 	 */
 	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
 				  &tmp_index, BPF_ANY);
-	RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+	RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
 	       "err:%d errno:%d\n", err, errno);
 	do_test(type, family, &cmd, PASS);
 	err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
 				  &tmp_index);
-	RET_IF(err == -1 || tmp_index != -1,
+	RET_IF(err < 0 || tmp_index >= 0,
 	       "lookup_elem(tmp_index_ovr_map)",
 	       "err:%d errno:%d tmp_index:%d\n",
 	       err, errno, tmp_index);
@@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family)
 
 	for (i = 0; i < NR_RESULTS; i++) {
 		err = bpf_map_lookup_elem(result_map, &i, &tmp);
-		RET_IF(err == -1, "lookup_elem(result_map)",
+		RET_IF(err < 0, "lookup_elem(result_map)",
 		       "i:%u err:%d errno:%d\n", i, err, errno);
 		nr_run_before += tmp;
 	}
@@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family)
 
 	for (i = 0; i < NR_RESULTS; i++) {
 		err = bpf_map_lookup_elem(result_map, &i, &tmp);
-		RET_IF(err == -1, "lookup_elem(result_map)",
+		RET_IF(err < 0, "lookup_elem(result_map)",
 		       "i:%u err:%d errno:%d\n", i, err, errno);
 		nr_run_after += tmp;
 	}
@@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
 					 SO_ATTACH_REUSEPORT_EBPF,
 					 &select_by_skb_data_prog,
 					 sizeof(select_by_skb_data_prog));
-			RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+			RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
 			       "err:%d errno:%d\n", err, errno);
 		}
 
 		err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
-		RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+		RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
 		       i, err, errno);
 
 		if (type == SOCK_STREAM) {
 			err = listen(sk_fds[i], 10);
-			RET_IF(err == -1, "listen()",
+			RET_IF(err < 0, "listen()",
 			       "sk_fds[%d] err:%d errno:%d\n",
 			       i, err, errno);
 		}
 
 		err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
 					  BPF_NOEXIST);
-		RET_IF(err == -1, "update_elem(reuseport_array)",
+		RET_IF(err < 0, "update_elem(reuseport_array)",
 		       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
 
 		if (i == first) {
@@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
 	prepare_sk_fds(type, family, inany);
 	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
 				  BPF_ANY);
-	RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+	RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
 	       "err:%d errno:%d\n", err, errno);
 
 	/* Install reuseport_array to outer_map? */
@@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
 
 	err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
 				  BPF_ANY);
-	RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+	RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
 	       "err:%d errno:%d\n", err, errno);
 }
 
@@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map)
 		return;
 
 	err = bpf_map_delete_elem(outer_map, &index_zero);
-	RET_IF(err == -1, "delete_elem(outer_map)",
+	RET_IF(err < 0, "delete_elem(outer_map)",
 	       "err:%d errno:%d\n", err, errno);
 }
 
 static void cleanup(void)
 {
-	if (outer_map != -1) {
+	if (outer_map >= 0) {
 		close(outer_map);
 		outer_map = -1;
 	}
 
-	if (reuseport_array != -1) {
+	if (reuseport_array >= 0) {
 		close(reuseport_array);
 		reuseport_array = -1;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index a1eade51d440..023cc532992d 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -91,8 +91,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 
 		skel->links.send_signal_perf =
 			bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
-		if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
-			  "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+		if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
 			goto disable_pmu;
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 45c82db3c58c..aee41547e7f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
 	}
 
 	link = bpf_program__attach_netns(prog, net_fd);
-	if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
 		errno = -PTR_ERR(link);
 		log_err("failed to attach program '%s' to netns",
 			bpf_program__name(prog));
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index af87118e748e..577d619fb07e 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -97,12 +97,12 @@ static void check_result(void)
 
 	err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
 				  &egress_linum);
-	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
 	      "err:%d errno:%d\n", err, errno);
 
 	err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
 				  &ingress_linum);
-	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
 	      "err:%d errno:%d\n", err, errno);
 
 	memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
@@ -355,14 +355,12 @@ void test_sock_fields(void)
 
 	egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
 						 child_cg_fd);
-	if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
-		  PTR_ERR(egress_link)))
+	if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
 		goto done;
 
 	ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
 						  child_cg_fd);
-	if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
-		  PTR_ERR(ingress_link)))
+	if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
 		goto done;
 
 	linum_map_fd = bpf_map__fd(skel->maps.linum_map);
@@ -375,8 +373,8 @@ done:
 	bpf_link__destroy(egress_link);
 	bpf_link__destroy(ingress_link);
 	test_sock_fields__destroy(skel);
-	if (child_cg_fd != -1)
+	if (child_cg_fd >= 0)
 		close(child_cg_fd);
-	if (parent_cg_fd != -1)
+	if (parent_cg_fd >= 0)
 		close(parent_cg_fd);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index ab77596b64e3..1352ec104149 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
 	int s, map, err;
 
 	s = connected_socket_v4();
-	if (CHECK_FAIL(s == -1))
+	if (CHECK_FAIL(s < 0))
 		return;
 
 	map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
-	if (CHECK_FAIL(map == -1)) {
+	if (CHECK_FAIL(map < 0)) {
 		perror("bpf_create_map");
 		goto out;
 	}
@@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(skel->progs.copy, &opts);
-	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
 	}
 
 	err = bpf_prog_attach(verdict, map, second, 0);
-	assert(err == -1 && errno == EBUSY);
+	ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
 
 	err = bpf_prog_detach2(verdict, map, first);
 	if (CHECK_FAIL(err)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 06b86addc181..7a0d64fdc192 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type)
 	int map;
 
 	map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
-	if (CHECK_FAIL(map == -1)) {
+	if (CHECK_FAIL(map < 0)) {
 		perror("bpf_map_create");
 		return;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 648d9ae898d2..0f066b89b4af 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -139,7 +139,7 @@
 #define xbpf_map_delete_elem(fd, key)                                          \
 	({                                                                     \
 		int __ret = bpf_map_delete_elem((fd), (key));                  \
-		if (__ret == -1)                                               \
+		if (__ret < 0)                                               \
 			FAIL_ERRNO("map_delete");                              \
 		__ret;                                                         \
 	})
@@ -147,7 +147,7 @@
 #define xbpf_map_lookup_elem(fd, key, val)                                     \
 	({                                                                     \
 		int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
-		if (__ret == -1)                                               \
+		if (__ret < 0)                                               \
 			FAIL_ERRNO("map_lookup");                              \
 		__ret;                                                         \
 	})
@@ -155,7 +155,7 @@
 #define xbpf_map_update_elem(fd, key, val, flags)                              \
 	({                                                                     \
 		int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
-		if (__ret == -1)                                               \
+		if (__ret < 0)                                               \
 			FAIL_ERRNO("map_update");                              \
 		__ret;                                                         \
 	})
@@ -164,7 +164,7 @@
 	({                                                                     \
 		int __ret =                                                    \
 			bpf_prog_attach((prog), (target), (type), (flags));    \
-		if (__ret == -1)                                               \
+		if (__ret < 0)                                               \
 			FAIL_ERRNO("prog_attach(" #type ")");                  \
 		__ret;                                                         \
 	})
@@ -172,7 +172,7 @@
 #define xbpf_prog_detach2(prog, target, type)                                  \
 	({                                                                     \
 		int __ret = bpf_prog_detach2((prog), (target), (type));        \
-		if (__ret == -1)                                               \
+		if (__ret < 0)                                               \
 			FAIL_ERRNO("prog_detach2(" #type ")");                 \
 		__ret;                                                         \
 	})
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 11a769e18f5d..0a91d8d9954b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -62,8 +62,7 @@ retry:
 
 	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
 							   pmu_fd);
-	if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
-		  "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+	if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
 		close(pmu_fd);
 		goto cleanup;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 37269d23df93..04b476bd62b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -21,7 +21,7 @@ void test_stacktrace_map(void)
 		goto close_prog;
 
 	link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
-	if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_tp"))
 		goto close_prog;
 
 	/* find map fds */
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 404a5498e1a3..4fd30bb651ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void)
 		goto close_prog;
 
 	link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
-	if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
 		goto close_prog;
 
 	/* find map fds */
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
 		goto close_prog;
 
 close_prog:
-	if (!IS_ERR_OR_NULL(link))
-		bpf_link__destroy(link);
+	bpf_link__destroy(link);
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 08d19cafd5e8..1fa772079967 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -353,8 +353,7 @@ static void fastopen_estab(void)
 		return;
 
 	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-		  PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
 		return;
 
 	if (sk_fds_connect(&sk_fds, true)) {
@@ -398,8 +397,7 @@ static void syncookie_estab(void)
 		return;
 
 	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-		  PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
 		return;
 
 	if (sk_fds_connect(&sk_fds, false)) {
@@ -431,8 +429,7 @@ static void fin(void)
 		return;
 
 	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-		  PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
 		return;
 
 	if (sk_fds_connect(&sk_fds, false)) {
@@ -471,8 +468,7 @@ static void __simple_estab(bool exprm)
 		return;
 
 	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-		  PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
 		return;
 
 	if (sk_fds_connect(&sk_fds, false)) {
@@ -509,8 +505,7 @@ static void misc(void)
 		return;
 
 	link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
-	if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
-		  PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
 		return;
 
 	if (sk_fds_connect(&sk_fds, false)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 9966685866fd..123c68c1917d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -73,7 +73,7 @@ void test_test_overhead(void)
 		return;
 
 	obj = bpf_object__open_file("./test_overhead.o", NULL);
-	if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
 		return;
 
 	kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
@@ -108,7 +108,7 @@ void test_test_overhead(void)
 	/* attach kprobe */
 	link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
 					  kprobe_func);
-	if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_kprobe"))
 		goto cleanup;
 	test_run("kprobe");
 	bpf_link__destroy(link);
@@ -116,28 +116,28 @@ void test_test_overhead(void)
 	/* attach kretprobe */
 	link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
 					  kprobe_func);
-	if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
 		goto cleanup;
 	test_run("kretprobe");
 	bpf_link__destroy(link);
 
 	/* attach raw_tp */
 	link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
-	if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
 		goto cleanup;
 	test_run("raw_tp");
 	bpf_link__destroy(link);
 
 	/* attach fentry */
 	link = bpf_program__attach_trace(fentry_prog);
-	if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_fentry"))
 		goto cleanup;
 	test_run("fentry");
 	bpf_link__destroy(link);
 
 	/* attach fexit */
 	link = bpf_program__attach_trace(fexit_prog);
-	if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "attach_fexit"))
 		goto cleanup;
 	test_run("fexit");
 	bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index f3022d934e2d..d7f5a931d7f3 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,7 +55,7 @@ void test_trampoline_count(void)
 	/* attach 'allowed' trampoline programs */
 	for (i = 0; i < MAX_TRAMP_PROGS; i++) {
 		obj = bpf_object__open_file(object, NULL);
-		if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+		if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
 			obj = NULL;
 			goto cleanup;
 		}
@@ -68,14 +68,14 @@ void test_trampoline_count(void)
 
 		if (rand() % 2) {
 			link = load(inst[i].obj, fentry_name);
-			if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+			if (!ASSERT_OK_PTR(link, "attach_prog")) {
 				link = NULL;
 				goto cleanup;
 			}
 			inst[i].link_fentry = link;
 		} else {
 			link = load(inst[i].obj, fexit_name);
-			if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+			if (!ASSERT_OK_PTR(link, "attach_prog")) {
 				link = NULL;
 				goto cleanup;
 			}
@@ -85,7 +85,7 @@ void test_trampoline_count(void)
 
 	/* and try 1 extra.. */
 	obj = bpf_object__open_file(object, NULL);
-	if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+	if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
 		obj = NULL;
 		goto cleanup;
 	}
@@ -96,13 +96,15 @@ void test_trampoline_count(void)
 
 	/* ..that needs to fail */
 	link = load(obj, fentry_name);
-	if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+	err = libbpf_get_error(link);
+	if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
 		bpf_link__destroy(link);
 		goto cleanup_extra;
 	}
 
 	/* with E2BIG error */
-	CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+	ASSERT_EQ(err, -E2BIG, "proper error check");
+	ASSERT_EQ(link, NULL, "ptr_is_null");
 
 	/* and finaly execute the probe */
 	if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
index 2aba09d4d01b..56c9d6bd38a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -22,11 +22,10 @@ void test_udp_limit(void)
 		goto close_cgroup_fd;
 
 	skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+		goto close_skeleton;
 	skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
-	if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
-		  "cg-attach", "sock %ld sock_release %ld",
-		  PTR_ERR(skel->links.sock),
-		  PTR_ERR(skel->links.sock_release)))
+	if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
 		goto close_skeleton;
 
 	/* BPF program enforces a single UDP socket per cgroup,
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 2c6c570b21f8..3bd5904b4db5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void)
 	pb_opts.ctx = &passed;
 	pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
 			      1, &pb_opts);
-	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+	if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
 		goto out;
 
 	/* Run test program */
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 6f814999b395..46eed0a33c23 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -51,7 +51,7 @@ void test_xdp_link(void)
 
 	/* BPF link is not allowed to replace prog attachment */
 	link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
-	if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
 		bpf_link__destroy(link);
 		/* best-effort detach prog */
 		opts.old_fd = prog_fd1;
@@ -67,7 +67,7 @@ void test_xdp_link(void)
 
 	/* now BPF link should attach successfully */
 	link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
-	if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "link_attach"))
 		goto cleanup;
 	skel1->links.xdp_handler = link;
 
@@ -95,7 +95,7 @@ void test_xdp_link(void)
 
 	/* BPF link is not allowed to replace another BPF link */
 	link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
-	if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
 		bpf_link__destroy(link);
 		goto cleanup;
 	}
@@ -105,7 +105,7 @@ void test_xdp_link(void)
 
 	/* new link attach should succeed */
 	link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
-	if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+	if (!ASSERT_OK_PTR(link, "link_attach"))
 		goto cleanup;
 	skel2->links.xdp_handler = link;
 
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 8410a730c82f..30cbf5d98f7d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -53,12 +53,12 @@ static void test_hashmap(unsigned int task, void *data)
 
 	value = 0;
 	/* BPF_NOEXIST means add new element if it doesn't exist. */
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
 	       /* key=1 already exists. */
 	       errno == EEXIST);
 
 	/* -1 is an invalid flag. */
-	assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
 	       errno == EINVAL);
 
 	/* Check that key=1 can be found. */
@@ -73,10 +73,10 @@ static void test_hashmap(unsigned int task, void *data)
 	assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
 
 	/* Check that key=2 is not found. */
-	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
 
 	/* BPF_EXIST means update existing element. */
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
 	       /* key=2 is not there. */
 	       errno == ENOENT);
 
@@ -87,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data)
 	 * inserted due to max_entries limit.
 	 */
 	key = 0;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
 	       errno == E2BIG);
 
 	/* Update existing element, though the map is full. */
@@ -96,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data)
 	key = 2;
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 	key = 3;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
 	       errno == E2BIG);
 
 	/* Check that key = 0 doesn't exist. */
 	key = 0;
-	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -111,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data)
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
 	       (next_key == 1 || next_key == 2) &&
 	       (next_key != first_key));
-	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
 	       errno == ENOENT);
 
 	/* Delete both elements. */
@@ -119,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == 0);
 	key = 2;
 	assert(bpf_map_delete_elem(fd, &key) == 0);
-	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
 	key = 0;
 	/* Check that map is empty. */
-	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
 	       errno == ENOENT);
-	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
 	       errno == ENOENT);
 
 	close(fd);
@@ -186,12 +186,12 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 	expected_key_mask |= key;
 
 	/* BPF_NOEXIST means add new element if it doesn't exist. */
-	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
 	       /* key=1 already exists. */
 	       errno == EEXIST);
 
 	/* -1 is an invalid flag. */
-	assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
 	       errno == EINVAL);
 
 	/* Check that key=1 can be found. Value could be 0 if the lookup
@@ -203,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 
 	key = 2;
 	/* Check that key=2 is not found. */
-	assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
 
 	/* BPF_EXIST means update existing element. */
-	assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
 	       /* key=2 is not there. */
 	       errno == ENOENT);
 
@@ -219,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 	 * inserted due to max_entries limit.
 	 */
 	key = 0;
-	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
 	       errno == E2BIG);
 
 	/* Check that key = 0 doesn't exist. */
-	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -254,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == 0);
 	key = 2;
 	assert(bpf_map_delete_elem(fd, &key) == 0);
-	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
 	key = 0;
 	/* Check that map is empty. */
-	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
 	       errno == ENOENT);
-	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
 	       errno == ENOENT);
 
 	close(fd);
@@ -377,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data)
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
 	value = 0;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
 	       errno == EEXIST);
 
 	/* Check that key=1 can be found. */
@@ -391,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data)
 	 * due to max_entries limit.
 	 */
 	key = 2;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
 	       errno == E2BIG);
 
 	/* Check that key = 2 doesn't exist. */
-	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -404,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data)
 	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
 	       next_key == 1);
-	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
 	       errno == ENOENT);
 
 	/* Delete shouldn't succeed. */
 	key = 1;
-	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+	assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
 
 	close(fd);
 }
@@ -435,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
 	assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
 
 	bpf_percpu(values, 0) = 0;
-	assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
 	       errno == EEXIST);
 
 	/* Check that key=1 can be found. */
@@ -450,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
 
 	/* Check that key=2 cannot be inserted due to max_entries limit. */
 	key = 2;
-	assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
 	       errno == E2BIG);
 
 	/* Check that key = 2 doesn't exist. */
-	assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -463,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
 	       next_key == 0);
 	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
 	       next_key == 1);
-	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
 	       errno == ENOENT);
 
 	/* Delete shouldn't succeed. */
 	key = 1;
-	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+	assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
 
 	close(fd);
 }
@@ -572,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data)
 		assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
 
 	/* Check that element cannot be pushed due to max_entries limit */
-	assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+	assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
 	       errno == E2BIG);
 
 	/* Peek element */
@@ -588,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data)
 		       val == vals[i]);
 
 	/* Check that there are not elements left */
-	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
 	       errno == ENOENT);
 
 	/* Check that non supported functions set errno to EINVAL */
-	assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
-	assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+	assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+	assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
 
 	close(fd);
 }
@@ -630,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data)
 		assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
 
 	/* Check that element cannot be pushed due to max_entries limit */
-	assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+	assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
 	       errno == E2BIG);
 
 	/* Peek element */
@@ -646,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data)
 		       val == vals[i]);
 
 	/* Check that there are not elements left */
-	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
 	       errno == ENOENT);
 
 	/* Check that non supported functions set errno to EINVAL */
-	assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
-	assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+	assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+	assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
 
 	close(fd);
 }
@@ -852,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 	}
 
 	bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
-	if (IS_ERR(bpf_map_rx)) {
+	if (!bpf_map_rx) {
 		printf("Failed to load map rx from verdict prog\n");
 		goto out_sockmap;
 	}
@@ -864,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 	}
 
 	bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
-	if (IS_ERR(bpf_map_tx)) {
+	if (!bpf_map_tx) {
 		printf("Failed to load map tx from verdict prog\n");
 		goto out_sockmap;
 	}
@@ -876,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 	}
 
 	bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
-	if (IS_ERR(bpf_map_msg)) {
+	if (!bpf_map_msg) {
 		printf("Failed to load map msg from msg_verdict prog\n");
 		goto out_sockmap;
 	}
@@ -888,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 	}
 
 	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
-	if (IS_ERR(bpf_map_break)) {
+	if (!bpf_map_break) {
 		printf("Failed to load map tx from verdict prog\n");
 		goto out_sockmap;
 	}
@@ -1170,7 +1170,7 @@ static void test_map_in_map(void)
 	}
 
 	map = bpf_object__find_map_by_name(obj, "mim_array");
-	if (IS_ERR(map)) {
+	if (!map) {
 		printf("Failed to load array of maps from test prog\n");
 		goto out_map_in_map;
 	}
@@ -1181,7 +1181,7 @@ static void test_map_in_map(void)
 	}
 
 	map = bpf_object__find_map_by_name(obj, "mim_hash");
-	if (IS_ERR(map)) {
+	if (!map) {
 		printf("Failed to load hash of maps from test prog\n");
 		goto out_map_in_map;
 	}
@@ -1194,7 +1194,7 @@ static void test_map_in_map(void)
 	bpf_object__load(obj);
 
 	map = bpf_object__find_map_by_name(obj, "mim_array");
-	if (IS_ERR(map)) {
+	if (!map) {
 		printf("Failed to load array of maps from test prog\n");
 		goto out_map_in_map;
 	}
@@ -1211,7 +1211,7 @@ static void test_map_in_map(void)
 	}
 
 	map = bpf_object__find_map_by_name(obj, "mim_hash");
-	if (IS_ERR(map)) {
+	if (!map) {
 		printf("Failed to load hash of maps from test prog\n");
 		goto out_map_in_map;
 	}
@@ -1263,7 +1263,7 @@ static void test_map_large(void)
 	}
 
 	key.c = -1;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
 	       errno == E2BIG);
 
 	/* Iterate through all elements. */
@@ -1271,12 +1271,12 @@ static void test_map_large(void)
 	key.c = -1;
 	for (i = 0; i < MAP_SIZE; i++)
 		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
-	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
 
 	key.c = 0;
 	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
 	key.a = 1;
-	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
 
 	close(fd);
 }
@@ -1408,7 +1408,7 @@ static void test_map_parallel(void)
 	run_parallel(TASKS, test_update_delete, data);
 
 	/* Check that key=0 is already there. */
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
 	       errno == EEXIST);
 
 	/* Check that all elements were inserted. */
@@ -1416,7 +1416,7 @@ static void test_map_parallel(void)
 	key = -1;
 	for (i = 0; i < MAP_SIZE; i++)
 		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
-	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
 
 	/* Another check for all elements */
 	for (i = 0; i < MAP_SIZE; i++) {
@@ -1432,8 +1432,8 @@ static void test_map_parallel(void)
 
 	/* Nothing should be left. */
 	key = -1;
-	assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
-	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
 }
 
 static void test_map_rdonly(void)
@@ -1451,12 +1451,12 @@ static void test_map_rdonly(void)
 	key = 1;
 	value = 1234;
 	/* Try to insert key=1 element. */
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
 	       errno == EPERM);
 
 	/* Check that key=1 is not found. */
-	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
-	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
 
 	close(fd);
 }
@@ -1479,8 +1479,8 @@ static void test_map_wronly_hash(void)
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
 	/* Check that reading elements and keys from the map is not allowed. */
-	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
-	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+	assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+	assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
 
 	close(fd);
 }
@@ -1507,10 +1507,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
 	assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
 
 	/* Peek element should fail */
-	assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+	assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
 
 	/* Pop element should fail */
-	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
 	       errno == EPERM);
 
 	close(fd);
@@ -1564,7 +1564,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
 			value = &fd32;
 		}
 		err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
-		CHECK(err != -1 || errno != EINVAL,
+		CHECK(err >= 0 || errno != EINVAL,
 		      "reuseport array update unbound sk",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
@@ -1593,7 +1593,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
 			 */
 			err = bpf_map_update_elem(map_fd, &index0, value,
 						  BPF_ANY);
-			CHECK(err != -1 || errno != EINVAL,
+			CHECK(err >= 0 || errno != EINVAL,
 			      "reuseport array update non-listening sk",
 			      "sock_type:%d err:%d errno:%d\n",
 			      type, err, errno);
@@ -1623,31 +1623,31 @@ static void test_reuseport_array(void)
 
 	map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 				sizeof(__u32), sizeof(__u64), array_size, 0);
-	CHECK(map_fd == -1, "reuseport array create",
+	CHECK(map_fd < 0, "reuseport array create",
 	      "map_fd:%d, errno:%d\n", map_fd, errno);
 
 	/* Test lookup/update/delete with invalid index */
 	err = bpf_map_delete_elem(map_fd, &bad_index);
-	CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+	CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
 	      "err:%d errno:%d\n", err, errno);
 
 	err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
-	CHECK(err != -1 || errno != E2BIG,
+	CHECK(err >= 0 || errno != E2BIG,
 	      "reuseport array update >=max_entries",
 	      "err:%d errno:%d\n", err, errno);
 
 	err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
-	CHECK(err != -1 || errno != ENOENT,
+	CHECK(err >= 0 || errno != ENOENT,
 	      "reuseport array update >=max_entries",
 	      "err:%d errno:%d\n", err, errno);
 
 	/* Test lookup/delete non existence elem */
 	err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-	CHECK(err != -1 || errno != ENOENT,
+	CHECK(err >= 0 || errno != ENOENT,
 	      "reuseport array lookup not-exist elem",
 	      "err:%d errno:%d\n", err, errno);
 	err = bpf_map_delete_elem(map_fd, &index3);
-	CHECK(err != -1 || errno != ENOENT,
+	CHECK(err >= 0 || errno != ENOENT,
 	      "reuseport array del not-exist elem",
 	      "err:%d errno:%d\n", err, errno);
 
@@ -1661,7 +1661,7 @@ static void test_reuseport_array(void)
 		/* BPF_EXIST failure case */
 		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
 					  BPF_EXIST);
-		CHECK(err != -1 || errno != ENOENT,
+		CHECK(err >= 0 || errno != ENOENT,
 		      "reuseport array update empty elem BPF_EXIST",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
@@ -1670,7 +1670,7 @@ static void test_reuseport_array(void)
 		/* BPF_NOEXIST success case */
 		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
 					  BPF_NOEXIST);
-		CHECK(err == -1,
+		CHECK(err < 0,
 		      "reuseport array update empty elem BPF_NOEXIST",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
@@ -1679,7 +1679,7 @@ static void test_reuseport_array(void)
 		/* BPF_EXIST success case. */
 		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
 					  BPF_EXIST);
-		CHECK(err == -1,
+		CHECK(err < 0,
 		      "reuseport array update same elem BPF_EXIST",
 		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
 		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1687,7 +1687,7 @@ static void test_reuseport_array(void)
 		/* BPF_NOEXIST failure case */
 		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
 					  BPF_NOEXIST);
-		CHECK(err != -1 || errno != EEXIST,
+		CHECK(err >= 0 || errno != EEXIST,
 		      "reuseport array update non-empty elem BPF_NOEXIST",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
@@ -1696,7 +1696,7 @@ static void test_reuseport_array(void)
 		/* BPF_ANY case (always succeed) */
 		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
 					  BPF_ANY);
-		CHECK(err == -1,
+		CHECK(err < 0,
 		      "reuseport array update same sk with BPF_ANY",
 		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
 
@@ -1705,32 +1705,32 @@ static void test_reuseport_array(void)
 
 		/* The same sk cannot be added to reuseport_array twice */
 		err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
-		CHECK(err != -1 || errno != EBUSY,
+		CHECK(err >= 0 || errno != EBUSY,
 		      "reuseport array update same sk with same index",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
 
 		err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
-		CHECK(err != -1 || errno != EBUSY,
+		CHECK(err >= 0 || errno != EBUSY,
 		      "reuseport array update same sk with different index",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
 
 		/* Test delete elem */
 		err = bpf_map_delete_elem(map_fd, &index3);
-		CHECK(err == -1, "reuseport array delete sk",
+		CHECK(err < 0, "reuseport array delete sk",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
 
 		/* Add it back with BPF_NOEXIST */
 		err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
-		CHECK(err == -1,
+		CHECK(err < 0,
 		      "reuseport array re-add with BPF_NOEXIST after del",
 		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
 
 		/* Test cookie */
 		err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-		CHECK(err == -1 || sk_cookie != map_cookie,
+		CHECK(err < 0 || sk_cookie != map_cookie,
 		      "reuseport array lookup re-added sk",
 		      "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
 		      type, err, errno, sk_cookie, map_cookie);
@@ -1739,7 +1739,7 @@ static void test_reuseport_array(void)
 		for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
 			close(grpa_fds64[f]);
 		err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-		CHECK(err != -1 || errno != ENOENT,
+		CHECK(err >= 0 || errno != ENOENT,
 		      "reuseport array lookup after close()",
 		      "sock_type:%d err:%d errno:%d\n",
 		      type, err, errno);
@@ -1750,7 +1750,7 @@ static void test_reuseport_array(void)
 	CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
 	      err, errno);
 	err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
-	CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+	CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
 	      "err:%d errno:%d\n", err, errno);
 	close(fd64);
 
@@ -1760,16 +1760,16 @@ static void test_reuseport_array(void)
 	/* Test 32 bit fd */
 	map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 				sizeof(__u32), sizeof(__u32), array_size, 0);
-	CHECK(map_fd == -1, "reuseport array create",
+	CHECK(map_fd < 0, "reuseport array create",
 	      "map_fd:%d, errno:%d\n", map_fd, errno);
 	prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
 			      &sk_cookie, 1);
 	fd = fd64;
 	err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
-	CHECK(err == -1, "reuseport array update 32 bit fd",
+	CHECK(err < 0, "reuseport array update 32 bit fd",
 	      "err:%d errno:%d\n", err, errno);
 	err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-	CHECK(err != -1 || errno != ENOSPC,
+	CHECK(err >= 0 || errno != ENOSPC,
 	      "reuseport array lookup 32 bit fd",
 	      "err:%d errno:%d\n", err, errno);
 	close(fd);
@@ -1815,6 +1815,8 @@ int main(void)
 {
 	srand(time(NULL));
 
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	map_flags = 0;
 	run_all_tests();
 
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6396932b97e2..6f103106a39b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -737,6 +737,9 @@ int main(int argc, char **argv)
 	if (err)
 		return err;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	libbpf_set_print(libbpf_print_fn);
 
 	srand(time(NULL));
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index dda52cb649dc..8ef7f334e715 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -249,16 +249,17 @@ extern int test__join_cgroup(const char *path);
 #define ASSERT_OK_PTR(ptr, name) ({					\
 	static int duration = 0;					\
 	const void *___res = (ptr);					\
-	bool ___ok = !IS_ERR_OR_NULL(___res);				\
-	CHECK(!___ok, (name),						\
-	      "unexpected error: %ld\n", PTR_ERR(___res));		\
+	int ___err = libbpf_get_error(___res);				\
+	bool ___ok = ___err == 0;					\
+	CHECK(!___ok, (name), "unexpected error: %d\n", ___err);	\
 	___ok;								\
 })
 
 #define ASSERT_ERR_PTR(ptr, name) ({					\
 	static int duration = 0;					\
 	const void *___res = (ptr);					\
-	bool ___ok = IS_ERR(___res);					\
+	int ___err = libbpf_get_error(___res);				\
+	bool ___ok = ___err != 0;					\
 	CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);	\
 	___ok;								\
 })
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 73da7fe8c152..4a39304cc5a6 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -82,6 +82,8 @@ int main(int argc, char **argv)
 	cpu_set_t cpuset;
 	__u32 key = 0;
 
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	CPU_ZERO(&cpuset);
 	CPU_SET(0, &cpuset);
 	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
@@ -116,7 +118,7 @@ int main(int argc, char **argv)
 
 	pb_opts.sample_cb = dummyfn;
 	pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
-	if (IS_ERR(pb))
+	if (!pb)
 		goto err;
 
 	pthread_create(&tid, NULL, poller_thread, pb);
@@ -163,7 +165,6 @@ err:
 	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
 	close(cg_fd);
 	cleanup_cgroup_environment();
-	if (!IS_ERR_OR_NULL(pb))
-		perf_buffer__free(pb);
+	perf_buffer__free(pb);
 	return error;
 }

From f12b654327283d158de0af170943ec5dd8cd02e5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 24 May 2021 20:59:33 -0700
Subject: [PATCH 0546/3292] libbpf: Streamline error reporting for low-level
 APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ensure that low-level APIs behave uniformly across the libbpf as follows:
  - in case of an error, errno is always set to the correct error code;
  - when libbpf 1.0 mode is enabled with LIBBPF_STRICT_DIRECT_ERRS option to
    libbpf_set_strict_mode(), return -Exxx error value directly, instead of -1;
  - by default, until libbpf 1.0 is released, keep returning -1 directly.

More context, justification, and discussion can be found in "Libbpf: the road
to v1.0" document ([0]).

  [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210525035935.1461796-4-andrii@kernel.org
---
 tools/lib/bpf/bpf.c             | 168 ++++++++++++++++++++++----------
 tools/lib/bpf/libbpf_internal.h |  26 +++++
 tools/lib/bpf/libbpf_legacy.h   |  12 +++
 3 files changed, 156 insertions(+), 50 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index b7c2cc12034c..86dcac44f32f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -80,6 +80,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
 int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, '\0', sizeof(attr));
 
@@ -102,7 +103,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 	else
 		attr.inner_map_fd = create_attr->inner_map_fd;
 
-	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
@@ -160,6 +162,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
 			       __u32 map_flags, int node)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, '\0', sizeof(attr));
 
@@ -178,7 +181,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
 		attr.numa_node = node;
 	}
 
-	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
@@ -222,10 +226,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 	int fd;
 
 	if (!load_attr->log_buf != !load_attr->log_buf_sz)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
@@ -281,8 +285,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 							load_attr->func_info_cnt,
 							load_attr->func_info_rec_size,
 							attr.func_info_rec_size);
-			if (!finfo)
+			if (!finfo) {
+				errno = E2BIG;
 				goto done;
+			}
 
 			attr.func_info = ptr_to_u64(finfo);
 			attr.func_info_rec_size = load_attr->func_info_rec_size;
@@ -293,8 +299,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 							load_attr->line_info_cnt,
 							load_attr->line_info_rec_size,
 							attr.line_info_rec_size);
-			if (!linfo)
+			if (!linfo) {
+				errno = E2BIG;
 				goto done;
+			}
 
 			attr.line_info = ptr_to_u64(linfo);
 			attr.line_info_rec_size = load_attr->line_info_rec_size;
@@ -318,9 +326,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 done:
+	/* free() doesn't affect errno, so we don't need to restore it */
 	free(finfo);
 	free(linfo);
-	return fd;
+	return libbpf_err_errno(fd);
 }
 
 int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
@@ -329,7 +338,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	struct bpf_prog_load_params p = {};
 
 	if (!load_attr || !log_buf != !log_buf_sz)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	p.prog_type = load_attr->prog_type;
 	p.expected_attach_type = load_attr->expected_attach_type;
@@ -391,6 +400,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		       int log_level)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = type;
@@ -404,13 +414,15 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	attr.kern_version = kern_version;
 	attr.prog_flags = prog_flags;
 
-	return sys_bpf_prog_load(&attr, sizeof(attr));
+	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
@@ -418,24 +430,28 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
 	attr.value = ptr_to_u64(value);
 	attr.flags = flags;
 
-	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
 
-	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
@@ -443,19 +459,22 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
 	attr.value = ptr_to_u64(value);
 	attr.flags = flags;
 
-	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
 
-	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
@@ -474,34 +493,40 @@ int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, _
 int bpf_map_delete_elem(int fd, const void *key)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 
-	return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.next_key = ptr_to_u64(next_key);
 
-	return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_map_freeze(int fd)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 
-	return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
@@ -513,7 +538,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_map_batch_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.batch.map_fd = fd;
@@ -528,7 +553,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
 	ret = sys_bpf(cmd, &attr, sizeof(attr));
 	*count = attr.batch.count;
 
-	return ret;
+	return libbpf_err_errno(ret);
 }
 
 int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
@@ -565,22 +590,26 @@ int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
 int bpf_obj_pin(int fd, const char *pathname)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.pathname = ptr_to_u64((void *)pathname);
 	attr.bpf_fd = fd;
 
-	return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_obj_get(const char *pathname)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.pathname = ptr_to_u64((void *)pathname);
 
-	return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
@@ -598,9 +627,10 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
 			  const struct bpf_prog_attach_opts *opts)
 {
 	union bpf_attr attr;
+	int ret;
 
 	if (!OPTS_VALID(opts, bpf_prog_attach_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.target_fd	   = target_fd;
@@ -609,30 +639,35 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
 	attr.attach_flags  = OPTS_GET(opts, flags, 0);
 	attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
 
-	return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.target_fd	 = target_fd;
 	attr.attach_type = type;
 
-	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.target_fd	 = target_fd;
 	attr.attach_bpf_fd = prog_fd;
 	attr.attach_type = type;
 
-	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_link_create(int prog_fd, int target_fd,
@@ -641,15 +676,16 @@ int bpf_link_create(int prog_fd, int target_fd,
 {
 	__u32 target_btf_id, iter_info_len;
 	union bpf_attr attr;
+	int fd;
 
 	if (!OPTS_VALID(opts, bpf_link_create_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	iter_info_len = OPTS_GET(opts, iter_info_len, 0);
 	target_btf_id = OPTS_GET(opts, target_btf_id, 0);
 
 	if (iter_info_len && target_btf_id)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.link_create.prog_fd = prog_fd;
@@ -665,26 +701,30 @@ int bpf_link_create(int prog_fd, int target_fd,
 		attr.link_create.target_btf_id = target_btf_id;
 	}
 
-	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_link_detach(int link_fd)
 {
 	union bpf_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.link_detach.link_fd = link_fd;
 
-	return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_link_update(int link_fd, int new_prog_fd,
 		    const struct bpf_link_update_opts *opts)
 {
 	union bpf_attr attr;
+	int ret;
 
 	if (!OPTS_VALID(opts, bpf_link_update_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.link_update.link_fd = link_fd;
@@ -692,17 +732,20 @@ int bpf_link_update(int link_fd, int new_prog_fd,
 	attr.link_update.flags = OPTS_GET(opts, flags, 0);
 	attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
 
-	return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
 
 int bpf_iter_create(int link_fd)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.iter_create.link_fd = link_fd;
 
-	return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
@@ -719,10 +762,12 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 	attr.query.prog_ids	= ptr_to_u64(prog_ids);
 
 	ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+
 	if (attach_flags)
 		*attach_flags = attr.query.attach_flags;
 	*prog_cnt = attr.query.prog_cnt;
-	return ret;
+
+	return libbpf_err_errno(ret);
 }
 
 int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
@@ -740,13 +785,15 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
 	attr.test.repeat = repeat;
 
 	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
 	if (size_out)
 		*size_out = attr.test.data_size_out;
 	if (retval)
 		*retval = attr.test.retval;
 	if (duration)
 		*duration = attr.test.duration;
-	return ret;
+
+	return libbpf_err_errno(ret);
 }
 
 int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
@@ -755,7 +802,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
 	int ret;
 
 	if (!test_attr->data_out && test_attr->data_size_out > 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.test.prog_fd = test_attr->prog_fd;
@@ -770,11 +817,13 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
 	attr.test.repeat = test_attr->repeat;
 
 	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
 	test_attr->data_size_out = attr.test.data_size_out;
 	test_attr->ctx_size_out = attr.test.ctx_size_out;
 	test_attr->retval = attr.test.retval;
 	test_attr->duration = attr.test.duration;
-	return ret;
+
+	return libbpf_err_errno(ret);
 }
 
 int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
@@ -783,7 +832,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_test_run_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.test.prog_fd = prog_fd;
@@ -801,11 +850,13 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
 	attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
 
 	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
 	OPTS_SET(opts, data_size_out, attr.test.data_size_out);
 	OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
 	OPTS_SET(opts, duration, attr.test.duration);
 	OPTS_SET(opts, retval, attr.test.retval);
-	return ret;
+
+	return libbpf_err_errno(ret);
 }
 
 static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
@@ -820,7 +871,7 @@ static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
 	if (!err)
 		*next_id = attr.next_id;
 
-	return err;
+	return libbpf_err_errno(err);
 }
 
 int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
@@ -846,41 +897,49 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
 int bpf_prog_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_id = id;
 
-	return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_map_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.map_id = id;
 
-	return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_btf_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.btf_id = id;
 
-	return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_link_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.link_id = id;
 
-	return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
@@ -894,21 +953,24 @@ int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
 	attr.info.info = ptr_to_u64(info);
 
 	err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+
 	if (!err)
 		*info_len = attr.info.info_len;
 
-	return err;
+	return libbpf_err_errno(err);
 }
 
 int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.raw_tracepoint.name = ptr_to_u64(name);
 	attr.raw_tracepoint.prog_fd = prog_fd;
 
-	return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
@@ -928,12 +990,13 @@ retry:
 	}
 
 	fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
-	if (fd == -1 && !do_log && log_buf && log_buf_size) {
+
+	if (fd < 0 && !do_log && log_buf && log_buf_size) {
 		do_log = true;
 		goto retry;
 	}
 
-	return fd;
+	return libbpf_err_errno(fd);
 }
 
 int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
@@ -950,37 +1013,42 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
 	attr.task_fd_query.buf_len = *buf_len;
 
 	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+
 	*buf_len = attr.task_fd_query.buf_len;
 	*prog_id = attr.task_fd_query.prog_id;
 	*fd_type = attr.task_fd_query.fd_type;
 	*probe_offset = attr.task_fd_query.probe_offset;
 	*probe_addr = attr.task_fd_query.probe_addr;
 
-	return err;
+	return libbpf_err_errno(err);
 }
 
 int bpf_enable_stats(enum bpf_stats_type type)
 {
 	union bpf_attr attr;
+	int fd;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.enable_stats.type = type;
 
-	return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+	fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+	return libbpf_err_errno(fd);
 }
 
 int bpf_prog_bind_map(int prog_fd, int map_fd,
 		      const struct bpf_prog_bind_opts *opts)
 {
 	union bpf_attr attr;
+	int ret;
 
 	if (!OPTS_VALID(opts, bpf_prog_bind_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_bind_map.prog_fd = prog_fd;
 	attr.prog_bind_map.map_fd = map_fd;
 	attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
 
-	return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+	return libbpf_err_errno(ret);
 }
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index e2db08573bf0..8d59683ab246 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -11,6 +11,9 @@
 
 #include <stdlib.h>
 #include <limits.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "libbpf_legacy.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -436,4 +439,27 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct
 int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
 int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
 
+extern enum libbpf_strict_mode libbpf_mode;
+
+/* handle direct returned errors */
+static inline int libbpf_err(int ret)
+{
+	if (ret < 0)
+		errno = -ret;
+	return ret;
+}
+
+/* handle errno-based (e.g., syscall or libc) errors according to libbpf's
+ * strict mode settings
+ */
+static inline int libbpf_err_errno(int ret)
+{
+	if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
+		/* errno is already assumed to be set on error */
+		return ret < 0 ? -errno : ret;
+
+	/* legacy: on error return -1 directly and don't touch errno */
+	return ret;
+}
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index 7482cfe22ab2..df0d03dcffab 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -33,6 +33,18 @@ enum libbpf_strict_mode {
 	 * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0.
 	 */
 	LIBBPF_STRICT_NONE = 0x00,
+	/*
+	 * Return NULL pointers on error, not ERR_PTR(err).
+	 * Additionally, libbpf also always sets errno to corresponding Exx
+	 * (positive) error code.
+	 */
+	LIBBPF_STRICT_CLEAN_PTRS = 0x01,
+	/*
+	 * Return actual error codes from low-level APIs directly, not just -1.
+	 * Additionally, libbpf also always sets errno to corresponding Exx
+	 * (positive) error code.
+	 */
+	LIBBPF_STRICT_DIRECT_ERRS = 0x02,
 
 	__LIBBPF_STRICT_LAST,
 };

From e9fc3ce99b3485586e7e4803b63df8b4c681f897 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 24 May 2021 20:59:34 -0700
Subject: [PATCH 0547/3292] libbpf: Streamline error reporting for high-level
 APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement changes to error reporting for high-level libbpf APIs to make them
less surprising and less error-prone to users:
  - in all the cases when error happens, errno is set to an appropriate error
    value;
  - in libbpf 1.0 mode, all pointer-returning APIs return NULL on error and
    error code is communicated through errno; this applies both to APIs that
    already returned NULL before (so now they communicate more detailed error
    codes), as well as for many APIs that used ERR_PTR() macro and encoded
    error numbers as fake pointers.
  - in legacy (default) mode, those APIs that were returning ERR_PTR(err),
    continue doing so, but still set errno.

With these changes, errno can be always used to extract actual error,
regardless of legacy or libbpf 1.0 modes. This is utilized internally in
libbpf in places where libbpf uses it's own high-level APIs.
libbpf_get_error() is adapted to handle both cases completely transparently to
end-users (and is used by libbpf consistently as well).

More context, justification, and discussion can be found in "Libbpf: the road
to v1.0" document ([0]).

  [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210525035935.1461796-5-andrii@kernel.org
---
 tools/lib/bpf/bpf_prog_linfo.c  |  18 +-
 tools/lib/bpf/btf.c             | 302 +++++++++----------
 tools/lib/bpf/btf_dump.c        |  14 +-
 tools/lib/bpf/libbpf.c          | 502 +++++++++++++++++---------------
 tools/lib/bpf/libbpf_errno.c    |   7 +-
 tools/lib/bpf/libbpf_internal.h |  27 ++
 tools/lib/bpf/linker.c          |  22 +-
 tools/lib/bpf/netlink.c         |  81 +++---
 tools/lib/bpf/ringbuf.c         |  26 +-
 9 files changed, 531 insertions(+), 468 deletions(-)

diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index 3ed1a27b5f7c..5c503096ef43 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -106,7 +106,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
 	nr_linfo = info->nr_line_info;
 
 	if (!nr_linfo)
-		return NULL;
+		return errno = EINVAL, NULL;
 
 	/*
 	 * The min size that bpf_prog_linfo has to access for
@@ -114,11 +114,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
 	 */
 	if (info->line_info_rec_size <
 	    offsetof(struct bpf_line_info, file_name_off))
-		return NULL;
+		return errno = EINVAL, NULL;
 
 	prog_linfo = calloc(1, sizeof(*prog_linfo));
 	if (!prog_linfo)
-		return NULL;
+		return errno = ENOMEM, NULL;
 
 	/* Copy xlated line_info */
 	prog_linfo->nr_linfo = nr_linfo;
@@ -174,7 +174,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
 
 err_free:
 	bpf_prog_linfo__free(prog_linfo);
-	return NULL;
+	return errno = EINVAL, NULL;
 }
 
 const struct bpf_line_info *
@@ -186,11 +186,11 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
 	const __u64 *jited_linfo;
 
 	if (func_idx >= prog_linfo->nr_jited_func)
-		return NULL;
+		return errno = ENOENT, NULL;
 
 	nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
 	if (nr_skip >= nr_linfo)
-		return NULL;
+		return errno = ENOENT, NULL;
 
 	start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
 	jited_rec_size = prog_linfo->jited_rec_size;
@@ -198,7 +198,7 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
 		(start * jited_rec_size);
 	jited_linfo = raw_jited_linfo;
 	if (addr < *jited_linfo)
-		return NULL;
+		return errno = ENOENT, NULL;
 
 	nr_linfo -= nr_skip;
 	rec_size = prog_linfo->rec_size;
@@ -225,13 +225,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
 
 	nr_linfo = prog_linfo->nr_linfo;
 	if (nr_skip >= nr_linfo)
-		return NULL;
+		return errno = ENOENT, NULL;
 
 	rec_size = prog_linfo->rec_size;
 	raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
 	linfo = raw_linfo;
 	if (insn_off < linfo->insn_off)
-		return NULL;
+		return errno = ENOENT, NULL;
 
 	nr_linfo -= nr_skip;
 	for (i = 0; i < nr_linfo; i++) {
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d57e13a13798..b46760b93bb4 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -443,7 +443,7 @@ struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 {
 	if (type_id >= btf->start_id + btf->nr_types)
-		return NULL;
+		return errno = EINVAL, NULL;
 	return btf_type_by_id((struct btf *)btf, type_id);
 }
 
@@ -510,7 +510,7 @@ size_t btf__pointer_size(const struct btf *btf)
 int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
 {
 	if (ptr_sz != 4 && ptr_sz != 8)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	btf->ptr_sz = ptr_sz;
 	return 0;
 }
@@ -537,7 +537,7 @@ enum btf_endianness btf__endianness(const struct btf *btf)
 int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
 {
 	if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
 	if (!btf->swapped_endian) {
@@ -568,8 +568,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 	int i;
 
 	t = btf__type_by_id(btf, type_id);
-	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
-	     i++) {
+	for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) {
 		switch (btf_kind(t)) {
 		case BTF_KIND_INT:
 		case BTF_KIND_STRUCT:
@@ -592,12 +591,12 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 		case BTF_KIND_ARRAY:
 			array = btf_array(t);
 			if (nelems && array->nelems > UINT32_MAX / nelems)
-				return -E2BIG;
+				return libbpf_err(-E2BIG);
 			nelems *= array->nelems;
 			type_id = array->type;
 			break;
 		default:
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		}
 
 		t = btf__type_by_id(btf, type_id);
@@ -605,9 +604,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 
 done:
 	if (size < 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (nelems && size > UINT32_MAX / nelems)
-		return -E2BIG;
+		return libbpf_err(-E2BIG);
 
 	return nelems * size;
 }
@@ -640,7 +639,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
 		for (i = 0; i < vlen; i++, m++) {
 			align = btf__align_of(btf, m->type);
 			if (align <= 0)
-				return align;
+				return libbpf_err(align);
 			max_align = max(max_align, align);
 		}
 
@@ -648,7 +647,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
 	}
 	default:
 		pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
-		return 0;
+		return errno = EINVAL, 0;
 	}
 }
 
@@ -667,7 +666,7 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 	}
 
 	if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	return type_id;
 }
@@ -687,7 +686,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 			return i;
 	}
 
-	return -ENOENT;
+	return libbpf_err(-ENOENT);
 }
 
 __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
@@ -709,7 +708,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
 			return i;
 	}
 
-	return -ENOENT;
+	return libbpf_err(-ENOENT);
 }
 
 static bool btf_is_modifiable(const struct btf *btf)
@@ -785,12 +784,12 @@ static struct btf *btf_new_empty(struct btf *base_btf)
 
 struct btf *btf__new_empty(void)
 {
-	return btf_new_empty(NULL);
+	return libbpf_ptr(btf_new_empty(NULL));
 }
 
 struct btf *btf__new_empty_split(struct btf *base_btf)
 {
-	return btf_new_empty(base_btf);
+	return libbpf_ptr(btf_new_empty(base_btf));
 }
 
 static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
@@ -846,7 +845,7 @@ done:
 
 struct btf *btf__new(const void *data, __u32 size)
 {
-	return btf_new(data, size, NULL);
+	return libbpf_ptr(btf_new(data, size, NULL));
 }
 
 static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
@@ -937,7 +936,8 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
 		goto done;
 	}
 	btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
-	if (IS_ERR(btf))
+	err = libbpf_get_error(btf);
+	if (err)
 		goto done;
 
 	switch (gelf_getclass(elf)) {
@@ -953,9 +953,9 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
 	}
 
 	if (btf_ext && btf_ext_data) {
-		*btf_ext = btf_ext__new(btf_ext_data->d_buf,
-					btf_ext_data->d_size);
-		if (IS_ERR(*btf_ext))
+		*btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+		err = libbpf_get_error(*btf_ext);
+		if (err)
 			goto done;
 	} else if (btf_ext) {
 		*btf_ext = NULL;
@@ -965,30 +965,24 @@ done:
 		elf_end(elf);
 	close(fd);
 
-	if (err)
-		return ERR_PTR(err);
-	/*
-	 * btf is always parsed before btf_ext, so no need to clean up
-	 * btf_ext, if btf loading failed
-	 */
-	if (IS_ERR(btf))
+	if (!err)
 		return btf;
-	if (btf_ext && IS_ERR(*btf_ext)) {
-		btf__free(btf);
-		err = PTR_ERR(*btf_ext);
-		return ERR_PTR(err);
-	}
-	return btf;
+
+	if (btf_ext)
+		btf_ext__free(*btf_ext);
+	btf__free(btf);
+
+	return ERR_PTR(err);
 }
 
 struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 {
-	return btf_parse_elf(path, NULL, btf_ext);
+	return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext));
 }
 
 struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
 {
-	return btf_parse_elf(path, base_btf, NULL);
+	return libbpf_ptr(btf_parse_elf(path, base_btf, NULL));
 }
 
 static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
@@ -1056,36 +1050,39 @@ err_out:
 
 struct btf *btf__parse_raw(const char *path)
 {
-	return btf_parse_raw(path, NULL);
+	return libbpf_ptr(btf_parse_raw(path, NULL));
 }
 
 struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
 {
-	return btf_parse_raw(path, base_btf);
+	return libbpf_ptr(btf_parse_raw(path, base_btf));
 }
 
 static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
 {
 	struct btf *btf;
+	int err;
 
 	if (btf_ext)
 		*btf_ext = NULL;
 
 	btf = btf_parse_raw(path, base_btf);
-	if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+	err = libbpf_get_error(btf);
+	if (!err)
 		return btf;
-
+	if (err != -EPROTO)
+		return ERR_PTR(err);
 	return btf_parse_elf(path, base_btf, btf_ext);
 }
 
 struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
 {
-	return btf_parse(path, NULL, btf_ext);
+	return libbpf_ptr(btf_parse(path, NULL, btf_ext));
 }
 
 struct btf *btf__parse_split(const char *path, struct btf *base_btf)
 {
-	return btf_parse(path, base_btf, NULL);
+	return libbpf_ptr(btf_parse(path, base_btf, NULL));
 }
 
 static int compare_vsi_off(const void *_a, const void *_b)
@@ -1178,7 +1175,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
 		}
 	}
 
-	return err;
+	return libbpf_err(err);
 }
 
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
@@ -1191,13 +1188,13 @@ int btf__load(struct btf *btf)
 	int err = 0;
 
 	if (btf->fd >= 0)
-		return -EEXIST;
+		return libbpf_err(-EEXIST);
 
 retry_load:
 	if (log_buf_size) {
 		log_buf = malloc(log_buf_size);
 		if (!log_buf)
-			return -ENOMEM;
+			return libbpf_err(-ENOMEM);
 
 		*log_buf = 0;
 	}
@@ -1229,7 +1226,7 @@ retry_load:
 
 done:
 	free(log_buf);
-	return err;
+	return libbpf_err(err);
 }
 
 int btf__fd(const struct btf *btf)
@@ -1305,7 +1302,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
 
 	data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
 	if (!data)
-		return NULL;
+		return errno = -ENOMEM, NULL;
 
 	btf->raw_size = data_sz;
 	if (btf->swapped_endian)
@@ -1323,7 +1320,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
 	else if (offset - btf->start_str_off < btf->hdr->str_len)
 		return btf_strs_data(btf) + (offset - btf->start_str_off);
 	else
-		return NULL;
+		return errno = EINVAL, NULL;
 }
 
 const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@@ -1388,17 +1385,20 @@ exit_free:
 int btf__get_from_id(__u32 id, struct btf **btf)
 {
 	struct btf *res;
-	int btf_fd;
+	int err, btf_fd;
 
 	*btf = NULL;
 	btf_fd = bpf_btf_get_fd_by_id(id);
 	if (btf_fd < 0)
-		return -errno;
+		return libbpf_err(-errno);
 
 	res = btf_get_from_fd(btf_fd, NULL);
+	err = libbpf_get_error(res);
+
 	close(btf_fd);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
+
+	if (err)
+		return libbpf_err(err);
 
 	*btf = res;
 	return 0;
@@ -1415,31 +1415,30 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 	__s64 key_size, value_size;
 	__s32 container_id;
 
-	if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
-	    max_name) {
+	if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
 		pr_warn("map:%s length of '____btf_map_%s' is too long\n",
 			map_name, map_name);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	container_id = btf__find_by_name(btf, container_name);
 	if (container_id < 0) {
 		pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
 			 map_name, container_name);
-		return container_id;
+		return libbpf_err(container_id);
 	}
 
 	container_type = btf__type_by_id(btf, container_id);
 	if (!container_type) {
 		pr_warn("map:%s cannot find BTF type for container_id:%u\n",
 			map_name, container_id);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
 		pr_warn("map:%s container_name:%s is an invalid container struct\n",
 			map_name, container_name);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	key = btf_members(container_type);
@@ -1448,25 +1447,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 	key_size = btf__resolve_size(btf, key->type);
 	if (key_size < 0) {
 		pr_warn("map:%s invalid BTF key_type_size\n", map_name);
-		return key_size;
+		return libbpf_err(key_size);
 	}
 
 	if (expected_key_size != key_size) {
 		pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
 			map_name, (__u32)key_size, expected_key_size);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	value_size = btf__resolve_size(btf, value->type);
 	if (value_size < 0) {
 		pr_warn("map:%s invalid BTF value_type_size\n", map_name);
-		return value_size;
+		return libbpf_err(value_size);
 	}
 
 	if (expected_value_size != value_size) {
 		pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
 			map_name, (__u32)value_size, expected_value_size);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	*key_type_id = key->type;
@@ -1563,11 +1562,11 @@ int btf__find_str(struct btf *btf, const char *s)
 
 	/* BTF needs to be in a modifiable state to build string lookup index */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	off = strset__find_str(btf->strs_set, s);
 	if (off < 0)
-		return off;
+		return libbpf_err(off);
 
 	return btf->start_str_off + off;
 }
@@ -1588,11 +1587,11 @@ int btf__add_str(struct btf *btf, const char *s)
 	}
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	off = strset__add_str(btf->strs_set, s);
 	if (off < 0)
-		return off;
+		return libbpf_err(off);
 
 	btf->hdr->str_len = strset__data_size(btf->strs_set);
 
@@ -1616,7 +1615,7 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 
 	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	btf->hdr->type_len += data_sz;
 	btf->hdr->str_off += data_sz;
@@ -1653,21 +1652,21 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
 
 	sz = btf_type_size(src_type);
 	if (sz < 0)
-		return sz;
+		return libbpf_err(sz);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	memcpy(t, src_type, sz);
 
 	err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	return btf_commit_type(btf, sz);
 }
@@ -1688,21 +1687,21 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 
 	/* non-empty name */
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	/* byte_sz must be power of 2 */
 	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type) + sizeof(int);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	/* if something goes wrong later, we might end up with an extra string,
 	 * but that shouldn't be a problem, because BTF can't be constructed
@@ -1736,20 +1735,20 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
 
 	/* non-empty name */
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* byte_sz must be one of the explicitly allowed values */
 	if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
 	    byte_sz != 16)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	name_off = btf__add_str(btf, name);
 	if (name_off < 0)
@@ -1780,15 +1779,15 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
 	int sz, name_off = 0;
 
 	if (validate_type_id(ref_type_id))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	if (name && name[0]) {
 		name_off = btf__add_str(btf, name);
@@ -1831,15 +1830,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
 	int sz;
 
 	if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type) + sizeof(struct btf_array);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	t->name_off = 0;
 	t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
@@ -1860,12 +1859,12 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
 	int sz, name_off = 0;
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	if (name && name[0]) {
 		name_off = btf__add_str(btf, name);
@@ -1943,30 +1942,30 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 
 	/* last type should be union/struct */
 	if (btf->nr_types == 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	t = btf_last_type(btf);
 	if (!btf_is_composite(t))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (validate_type_id(type_id))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	/* best-effort bit field offset/size enforcement */
 	is_bitfield = bit_size || (bit_offset % 8 != 0);
 	if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* only offset 0 is allowed for unions */
 	if (btf_is_union(t) && bit_offset)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* decompose and invalidate raw data */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_member);
 	m = btf_add_type_mem(btf, sz);
 	if (!m)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	if (name && name[0]) {
 		name_off = btf__add_str(btf, name);
@@ -2008,15 +2007,15 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
 
 	/* byte_sz must be power of 2 */
 	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	if (name && name[0]) {
 		name_off = btf__add_str(btf, name);
@@ -2048,25 +2047,25 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 
 	/* last type should be BTF_KIND_ENUM */
 	if (btf->nr_types == 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	t = btf_last_type(btf);
 	if (!btf_is_enum(t))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* non-empty name */
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (value < INT_MIN || value > UINT_MAX)
-		return -E2BIG;
+		return libbpf_err(-E2BIG);
 
 	/* decompose and invalidate raw data */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_enum);
 	v = btf_add_type_mem(btf, sz);
 	if (!v)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	name_off = btf__add_str(btf, name);
 	if (name_off < 0)
@@ -2096,7 +2095,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
 {
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	switch (fwd_kind) {
 	case BTF_FWD_STRUCT:
@@ -2117,7 +2116,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
 		 */
 		return btf__add_enum(btf, name, sizeof(int));
 	default:
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 }
 
@@ -2132,7 +2131,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
 int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
 {
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
 }
@@ -2187,10 +2186,10 @@ int btf__add_func(struct btf *btf, const char *name,
 	int id;
 
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
 	    linkage != BTF_FUNC_EXTERN)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
 	if (id > 0) {
@@ -2198,7 +2197,7 @@ int btf__add_func(struct btf *btf, const char *name,
 
 		t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
 	}
-	return id;
+	return libbpf_err(id);
 }
 
 /*
@@ -2219,15 +2218,15 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
 	int sz;
 
 	if (validate_type_id(ret_type_id))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	/* start out with vlen=0; this will be adjusted when adding enum
 	 * values, if necessary
@@ -2254,23 +2253,23 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 	int sz, name_off = 0;
 
 	if (validate_type_id(type_id))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* last type should be BTF_KIND_FUNC_PROTO */
 	if (btf->nr_types == 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	t = btf_last_type(btf);
 	if (!btf_is_func_proto(t))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* decompose and invalidate raw data */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_param);
 	p = btf_add_type_mem(btf, sz);
 	if (!p)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	if (name && name[0]) {
 		name_off = btf__add_str(btf, name);
@@ -2308,21 +2307,21 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 
 	/* non-empty name */
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
 	    linkage != BTF_VAR_GLOBAL_EXTERN)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (validate_type_id(type_id))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type) + sizeof(struct btf_var);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	name_off = btf__add_str(btf, name);
 	if (name_off < 0)
@@ -2357,15 +2356,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
 
 	/* non-empty name */
 	if (!name || !name[0])
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	name_off = btf__add_str(btf, name);
 	if (name_off < 0)
@@ -2397,22 +2396,22 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 
 	/* last type should be BTF_KIND_DATASEC */
 	if (btf->nr_types == 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	t = btf_last_type(btf);
 	if (!btf_is_datasec(t))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (validate_type_id(var_type_id))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* decompose and invalidate raw data */
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	sz = sizeof(struct btf_var_secinfo);
 	v = btf_add_type_mem(btf, sz);
 	if (!v)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	v->type = var_type_id;
 	v->offset = offset;
@@ -2614,11 +2613,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 
 	err = btf_ext_parse_hdr(data, size);
 	if (err)
-		return ERR_PTR(err);
+		return libbpf_err_ptr(err);
 
 	btf_ext = calloc(1, sizeof(struct btf_ext));
 	if (!btf_ext)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 
 	btf_ext->data_size = size;
 	btf_ext->data = malloc(size);
@@ -2628,9 +2627,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	}
 	memcpy(btf_ext->data, data, size);
 
-	if (btf_ext->hdr->hdr_len <
-	    offsetofend(struct btf_ext_header, line_info_len))
+	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+		err = -EINVAL;
 		goto done;
+	}
+
 	err = btf_ext_setup_func_info(btf_ext);
 	if (err)
 		goto done;
@@ -2639,8 +2640,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	if (err)
 		goto done;
 
-	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
+		err = -EINVAL;
 		goto done;
+	}
+
 	err = btf_ext_setup_core_relos(btf_ext);
 	if (err)
 		goto done;
@@ -2648,7 +2652,7 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 done:
 	if (err) {
 		btf_ext__free(btf_ext);
-		return ERR_PTR(err);
+		return libbpf_err_ptr(err);
 	}
 
 	return btf_ext;
@@ -2687,7 +2691,7 @@ static int btf_ext_reloc_info(const struct btf *btf,
 		existing_len = (*cnt) * record_size;
 		data = realloc(*info, existing_len + records_len);
 		if (!data)
-			return -ENOMEM;
+			return libbpf_err(-ENOMEM);
 
 		memcpy(data + existing_len, sinfo->data, records_len);
 		/* adjust insn_off only, the rest data will be passed
@@ -2697,15 +2701,14 @@ static int btf_ext_reloc_info(const struct btf *btf,
 			__u32 *insn_off;
 
 			insn_off = data + existing_len + (i * record_size);
-			*insn_off = *insn_off / sizeof(struct bpf_insn) +
-				insns_cnt;
+			*insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
 		}
 		*info = data;
 		*cnt += sinfo->num_info;
 		return 0;
 	}
 
-	return -ENOENT;
+	return libbpf_err(-ENOENT);
 }
 
 int btf_ext__reloc_func_info(const struct btf *btf,
@@ -2894,11 +2897,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 
 	if (IS_ERR(d)) {
 		pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (btf_ensure_modifiable(btf))
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	err = btf_dedup_prep(d);
 	if (err) {
@@ -2938,7 +2941,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 
 done:
 	btf_dedup_free(d);
-	return err;
+	return libbpf_err(err);
 }
 
 #define BTF_UNPROCESSED_ID ((__u32)-1)
@@ -4411,7 +4414,7 @@ struct btf *libbpf_find_kernel_btf(void)
 	char path[PATH_MAX + 1];
 	struct utsname buf;
 	struct btf *btf;
-	int i;
+	int i, err;
 
 	uname(&buf);
 
@@ -4425,17 +4428,16 @@ struct btf *libbpf_find_kernel_btf(void)
 			btf = btf__parse_raw(path);
 		else
 			btf = btf__parse_elf(path, NULL);
-
-		pr_debug("loading kernel BTF '%s': %ld\n",
-			 path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
-		if (IS_ERR(btf))
+		err = libbpf_get_error(btf);
+		pr_debug("loading kernel BTF '%s': %d\n", path, err);
+		if (err)
 			continue;
 
 		return btf;
 	}
 
 	pr_warn("failed to find valid kernel BTF\n");
-	return ERR_PTR(-ESRCH);
+	return libbpf_err_ptr(-ESRCH);
 }
 
 int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 5e2809d685bf..5dc6b5172bb3 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -128,7 +128,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
 
 	d = calloc(1, sizeof(struct btf_dump));
 	if (!d)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 
 	d->btf = btf;
 	d->btf_ext = btf_ext;
@@ -156,7 +156,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
 	return d;
 err:
 	btf_dump__free(d);
-	return ERR_PTR(err);
+	return libbpf_err_ptr(err);
 }
 
 static int btf_dump_resize(struct btf_dump *d)
@@ -236,16 +236,16 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
 	int err, i;
 
 	if (id > btf__get_nr_types(d->btf))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	err = btf_dump_resize(d);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	d->emit_queue_cnt = 0;
 	err = btf_dump_order_type(d, id, false);
 	if (err < 0)
-		return err;
+		return libbpf_err(err);
 
 	for (i = 0; i < d->emit_queue_cnt; i++)
 		btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
@@ -1075,11 +1075,11 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
 	int lvl, err;
 
 	if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	err = btf_dump_resize(d);
 	if (err)
-		return -EINVAL;
+		return libbpf_err(err);
 
 	fname = OPTS_GET(opts, field_name, "");
 	lvl = OPTS_GET(opts, indent_level, 0);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1425d7ed0f2f..1c4e20e75237 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2579,16 +2579,14 @@ static int bpf_object__init_btf(struct bpf_object *obj,
 
 	if (btf_data) {
 		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
-		if (IS_ERR(obj->btf)) {
-			err = PTR_ERR(obj->btf);
+		err = libbpf_get_error(obj->btf);
+		if (err) {
 			obj->btf = NULL;
-			pr_warn("Error loading ELF section %s: %d.\n",
-				BTF_ELF_SEC, err);
+			pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
 			goto out;
 		}
 		/* enforce 8-byte pointers for BPF-targeted BTFs */
 		btf__set_pointer_size(obj->btf, 8);
-		err = 0;
 	}
 	if (btf_ext_data) {
 		if (!obj->btf) {
@@ -2596,11 +2594,11 @@ static int bpf_object__init_btf(struct bpf_object *obj,
 				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
 			goto out;
 		}
-		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
-					    btf_ext_data->d_size);
-		if (IS_ERR(obj->btf_ext)) {
-			pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
-				BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+		err = libbpf_get_error(obj->btf_ext);
+		if (err) {
+			pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
+				BTF_EXT_ELF_SEC, err);
 			obj->btf_ext = NULL;
 			goto out;
 		}
@@ -2684,8 +2682,8 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
 		return 0;
 
 	obj->btf_vmlinux = libbpf_find_kernel_btf();
-	if (IS_ERR(obj->btf_vmlinux)) {
-		err = PTR_ERR(obj->btf_vmlinux);
+	err = libbpf_get_error(obj->btf_vmlinux);
+	if (err) {
 		pr_warn("Error loading vmlinux BTF: %d\n", err);
 		obj->btf_vmlinux = NULL;
 		return err;
@@ -2751,8 +2749,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 		/* clone BTF to sanitize a copy and leave the original intact */
 		raw_data = btf__get_raw_data(obj->btf, &sz);
 		kern_btf = btf__new(raw_data, sz);
-		if (IS_ERR(kern_btf))
-			return PTR_ERR(kern_btf);
+		err = libbpf_get_error(kern_btf);
+		if (err)
+			return err;
 
 		/* enforce 8-byte pointers for BPF-targeted BTFs */
 		btf__set_pointer_size(obj->btf, 8);
@@ -3523,7 +3522,7 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
 		if (pos->sec_name && !strcmp(pos->sec_name, title))
 			return pos;
 	}
-	return NULL;
+	return errno = ENOENT, NULL;
 }
 
 static bool prog_is_subprog(const struct bpf_object *obj,
@@ -3556,7 +3555,7 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
 		if (!strcmp(prog->name, name))
 			return prog;
 	}
-	return NULL;
+	return errno = ENOENT, NULL;
 }
 
 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
@@ -3903,11 +3902,11 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	new_name = strdup(info.name);
 	if (!new_name)
-		return -errno;
+		return libbpf_err(-errno);
 
 	new_fd = open("/", O_RDONLY | O_CLOEXEC);
 	if (new_fd < 0) {
@@ -3945,7 +3944,7 @@ err_close_new_fd:
 	close(new_fd);
 err_free_new_name:
 	free(new_name);
-	return err;
+	return libbpf_err(err);
 }
 
 __u32 bpf_map__max_entries(const struct bpf_map *map)
@@ -3956,7 +3955,7 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 {
 	if (!bpf_map_type__is_map_in_map(map->def.type))
-		return NULL;
+		return errno = EINVAL, NULL;
 
 	return map->inner_map;
 }
@@ -3964,7 +3963,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 {
 	if (map->fd >= 0)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	map->def.max_entries = max_entries;
 	return 0;
 }
@@ -3972,7 +3971,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
 {
 	if (!map || !max_entries)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	return bpf_map__set_max_entries(map, max_entries);
 }
@@ -5103,10 +5102,10 @@ static int load_module_btfs(struct bpf_object *obj)
 		}
 
 		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
-		if (IS_ERR(btf)) {
-			pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
-				name, id, PTR_ERR(btf));
-			err = PTR_ERR(btf);
+		err = libbpf_get_error(btf);
+		if (err) {
+			pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
+				name, id, err);
 			goto err_out;
 		}
 
@@ -6366,8 +6365,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 
 	if (targ_btf_path) {
 		obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
-		if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
-			err = PTR_ERR(obj->btf_vmlinux_override);
+		err = libbpf_get_error(obj->btf_vmlinux_override);
+		if (err) {
 			pr_warn("failed to parse target BTF: %d\n", err);
 			return err;
 		}
@@ -7424,7 +7423,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 
 	if (prog->obj->loaded) {
 		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if ((prog->type == BPF_PROG_TYPE_TRACING ||
@@ -7434,7 +7433,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 
 		err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
 		if (err)
-			return err;
+			return libbpf_err(err);
 
 		prog->attach_btf_obj_fd = btf_obj_fd;
 		prog->attach_btf_id = btf_type_id;
@@ -7444,13 +7443,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 		if (prog->preprocessor) {
 			pr_warn("Internal error: can't load program '%s'\n",
 				prog->name);
-			return -LIBBPF_ERRNO__INTERNAL;
+			return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
 		}
 
 		prog->instances.fds = malloc(sizeof(int));
 		if (!prog->instances.fds) {
 			pr_warn("Not enough memory for BPF fds\n");
-			return -ENOMEM;
+			return libbpf_err(-ENOMEM);
 		}
 		prog->instances.nr = 1;
 		prog->instances.fds[0] = -1;
@@ -7509,7 +7508,7 @@ out:
 		pr_warn("failed to load program '%s'\n", prog->name);
 	zfree(&prog->insns);
 	prog->insns_cnt = 0;
-	return err;
+	return libbpf_err(err);
 }
 
 static int
@@ -7642,7 +7641,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
 
 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
 {
-	return __bpf_object__open_xattr(attr, 0);
+	return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
 }
 
 struct bpf_object *bpf_object__open(const char *path)
@@ -7652,18 +7651,18 @@ struct bpf_object *bpf_object__open(const char *path)
 		.prog_type	= BPF_PROG_TYPE_UNSPEC,
 	};
 
-	return bpf_object__open_xattr(&attr);
+	return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
 }
 
 struct bpf_object *
 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 {
 	if (!path)
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 
 	pr_debug("loading %s\n", path);
 
-	return __bpf_object__open(path, NULL, 0, opts);
+	return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
 }
 
 struct bpf_object *
@@ -7671,9 +7670,9 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 		     const struct bpf_object_open_opts *opts)
 {
 	if (!obj_buf || obj_buf_sz == 0)
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 
-	return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+	return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
 }
 
 struct bpf_object *
@@ -7688,9 +7687,9 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
 
 	/* returning NULL is wrong, but backwards-compatible */
 	if (!obj_buf || obj_buf_sz == 0)
-		return NULL;
+		return errno = EINVAL, NULL;
 
-	return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+	return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
 }
 
 int bpf_object__unload(struct bpf_object *obj)
@@ -7698,7 +7697,7 @@ int bpf_object__unload(struct bpf_object *obj)
 	size_t i;
 
 	if (!obj)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	for (i = 0; i < obj->nr_maps; i++) {
 		zclose(obj->maps[i].fd);
@@ -8031,14 +8030,14 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	int err, i;
 
 	if (!attr)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	obj = attr->obj;
 	if (!obj)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (obj->loaded) {
 		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (obj->gen_loader)
@@ -8089,7 +8088,7 @@ out:
 
 	bpf_object__unload(obj);
 	pr_warn("failed to load object '%s'\n", obj->path);
-	return err;
+	return libbpf_err(err);
 }
 
 int bpf_object__load(struct bpf_object *obj)
@@ -8161,28 +8160,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 
 	err = make_parent_dir(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	err = check_path(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	if (prog == NULL) {
 		pr_warn("invalid program pointer\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (instance < 0 || instance >= prog->instances.nr) {
 		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
 			instance, prog->name, prog->instances.nr);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (bpf_obj_pin(prog->instances.fds[instance], path)) {
 		err = -errno;
 		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 		pr_warn("failed to pin program: %s\n", cp);
-		return err;
+		return libbpf_err(err);
 	}
 	pr_debug("pinned program '%s'\n", path);
 
@@ -8196,22 +8195,23 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
 
 	err = check_path(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	if (prog == NULL) {
 		pr_warn("invalid program pointer\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (instance < 0 || instance >= prog->instances.nr) {
 		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
 			instance, prog->name, prog->instances.nr);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	err = unlink(path);
 	if (err != 0)
-		return -errno;
+		return libbpf_err(-errno);
+
 	pr_debug("unpinned program '%s'\n", path);
 
 	return 0;
@@ -8223,20 +8223,20 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
 
 	err = make_parent_dir(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	err = check_path(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	if (prog == NULL) {
 		pr_warn("invalid program pointer\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (prog->instances.nr <= 0) {
 		pr_warn("no instances of prog %s to pin\n", prog->name);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (prog->instances.nr == 1) {
@@ -8280,7 +8280,7 @@ err_unpin:
 
 	rmdir(path);
 
-	return err;
+	return libbpf_err(err);
 }
 
 int bpf_program__unpin(struct bpf_program *prog, const char *path)
@@ -8289,16 +8289,16 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
 
 	err = check_path(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	if (prog == NULL) {
 		pr_warn("invalid program pointer\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (prog->instances.nr <= 0) {
 		pr_warn("no instances of prog %s to pin\n", prog->name);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (prog->instances.nr == 1) {
@@ -8312,9 +8312,9 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
 
 		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
 		if (len < 0)
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		else if (len >= PATH_MAX)
-			return -ENAMETOOLONG;
+			return libbpf_err(-ENAMETOOLONG);
 
 		err = bpf_program__unpin_instance(prog, buf, i);
 		if (err)
@@ -8323,7 +8323,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
 
 	err = rmdir(path);
 	if (err)
-		return -errno;
+		return libbpf_err(-errno);
 
 	return 0;
 }
@@ -8335,14 +8335,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 
 	if (map == NULL) {
 		pr_warn("invalid map pointer\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (map->pin_path) {
 		if (path && strcmp(path, map->pin_path)) {
 			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 				bpf_map__name(map), map->pin_path, path);
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		} else if (map->pinned) {
 			pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
 				 bpf_map__name(map), map->pin_path);
@@ -8352,10 +8352,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 		if (!path) {
 			pr_warn("missing a path to pin map '%s' at\n",
 				bpf_map__name(map));
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		} else if (map->pinned) {
 			pr_warn("map '%s' already pinned\n", bpf_map__name(map));
-			return -EEXIST;
+			return libbpf_err(-EEXIST);
 		}
 
 		map->pin_path = strdup(path);
@@ -8367,11 +8367,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 
 	err = make_parent_dir(map->pin_path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	err = check_path(map->pin_path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	if (bpf_obj_pin(map->fd, map->pin_path)) {
 		err = -errno;
@@ -8386,7 +8386,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 out_err:
 	cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 	pr_warn("failed to pin map: %s\n", cp);
-	return err;
+	return libbpf_err(err);
 }
 
 int bpf_map__unpin(struct bpf_map *map, const char *path)
@@ -8395,29 +8395,29 @@ int bpf_map__unpin(struct bpf_map *map, const char *path)
 
 	if (map == NULL) {
 		pr_warn("invalid map pointer\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	if (map->pin_path) {
 		if (path && strcmp(path, map->pin_path)) {
 			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 				bpf_map__name(map), map->pin_path, path);
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		}
 		path = map->pin_path;
 	} else if (!path) {
 		pr_warn("no path to unpin map '%s' from\n",
 			bpf_map__name(map));
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	err = check_path(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	err = unlink(path);
 	if (err != 0)
-		return -errno;
+		return libbpf_err(-errno);
 
 	map->pinned = false;
 	pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
@@ -8432,7 +8432,7 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
 	if (path) {
 		new = strdup(path);
 		if (!new)
-			return -errno;
+			return libbpf_err(-errno);
 	}
 
 	free(map->pin_path);
@@ -8466,11 +8466,11 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 	int err;
 
 	if (!obj)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 
 	if (!obj->loaded) {
 		pr_warn("object not yet loaded; load it first\n");
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 	}
 
 	bpf_object__for_each_map(map, obj) {
@@ -8510,7 +8510,7 @@ err_unpin_maps:
 		bpf_map__unpin(map, NULL);
 	}
 
-	return err;
+	return libbpf_err(err);
 }
 
 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
@@ -8519,7 +8519,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 	int err;
 
 	if (!obj)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 
 	bpf_object__for_each_map(map, obj) {
 		char *pin_path = NULL;
@@ -8531,9 +8531,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 			len = snprintf(buf, PATH_MAX, "%s/%s", path,
 				       bpf_map__name(map));
 			if (len < 0)
-				return -EINVAL;
+				return libbpf_err(-EINVAL);
 			else if (len >= PATH_MAX)
-				return -ENAMETOOLONG;
+				return libbpf_err(-ENAMETOOLONG);
 			sanitize_pin_path(buf);
 			pin_path = buf;
 		} else if (!map->pin_path) {
@@ -8542,7 +8542,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 
 		err = bpf_map__unpin(map, pin_path);
 		if (err)
-			return err;
+			return libbpf_err(err);
 	}
 
 	return 0;
@@ -8554,11 +8554,11 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 	int err;
 
 	if (!obj)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 
 	if (!obj->loaded) {
 		pr_warn("object not yet loaded; load it first\n");
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 	}
 
 	bpf_object__for_each_program(prog, obj) {
@@ -8597,7 +8597,7 @@ err_unpin_programs:
 		bpf_program__unpin(prog, buf);
 	}
 
-	return err;
+	return libbpf_err(err);
 }
 
 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
@@ -8606,7 +8606,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 	int err;
 
 	if (!obj)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 
 	bpf_object__for_each_program(prog, obj) {
 		char buf[PATH_MAX];
@@ -8615,13 +8615,13 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 		len = snprintf(buf, PATH_MAX, "%s/%s", path,
 			       prog->pin_name);
 		if (len < 0)
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		else if (len >= PATH_MAX)
-			return -ENAMETOOLONG;
+			return libbpf_err(-ENAMETOOLONG);
 
 		err = bpf_program__unpin(prog, buf);
 		if (err)
-			return err;
+			return libbpf_err(err);
 	}
 
 	return 0;
@@ -8633,12 +8633,12 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 
 	err = bpf_object__pin_maps(obj, path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	err = bpf_object__pin_programs(obj, path);
 	if (err) {
 		bpf_object__unpin_maps(obj, path);
-		return err;
+		return libbpf_err(err);
 	}
 
 	return 0;
@@ -8735,7 +8735,7 @@ bpf_object__next(struct bpf_object *prev)
 
 const char *bpf_object__name(const struct bpf_object *obj)
 {
-	return obj ? obj->name : ERR_PTR(-EINVAL);
+	return obj ? obj->name : libbpf_err_ptr(-EINVAL);
 }
 
 unsigned int bpf_object__kversion(const struct bpf_object *obj)
@@ -8756,7 +8756,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
 {
 	if (obj->loaded)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	obj->kern_version = kern_version;
 
@@ -8776,7 +8776,7 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 
 void *bpf_object__priv(const struct bpf_object *obj)
 {
-	return obj ? obj->priv : ERR_PTR(-EINVAL);
+	return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
 }
 
 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
@@ -8812,7 +8812,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
 
 	if (p->obj != obj) {
 		pr_warn("error: program handler doesn't match object\n");
-		return NULL;
+		return errno = EINVAL, NULL;
 	}
 
 	idx = (p - obj->programs) + (forward ? 1 : -1);
@@ -8858,7 +8858,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 
 void *bpf_program__priv(const struct bpf_program *prog)
 {
-	return prog ? prog->priv : ERR_PTR(-EINVAL);
+	return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
 }
 
 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
@@ -8885,7 +8885,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
 		title = strdup(title);
 		if (!title) {
 			pr_warn("failed to strdup program title\n");
-			return ERR_PTR(-ENOMEM);
+			return libbpf_err_ptr(-ENOMEM);
 		}
 	}
 
@@ -8900,7 +8900,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 {
 	if (prog->obj->loaded)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	prog->load = autoload;
 	return 0;
@@ -8922,17 +8922,17 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
 	int *instances_fds;
 
 	if (nr_instances <= 0 || !prep)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (prog->instances.nr > 0 || prog->instances.fds) {
 		pr_warn("Can't set pre-processor after loading\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	instances_fds = malloc(sizeof(int) * nr_instances);
 	if (!instances_fds) {
 		pr_warn("alloc memory failed for fds\n");
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 	}
 
 	/* fill all fd with -1 */
@@ -8949,19 +8949,19 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
 	int fd;
 
 	if (!prog)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (n >= prog->instances.nr || n < 0) {
 		pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
 			n, prog->name, prog->instances.nr);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	fd = prog->instances.fds[n];
 	if (fd < 0) {
 		pr_warn("%dth instance of program '%s' is invalid\n",
 			n, prog->name);
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 	}
 
 	return fd;
@@ -8987,7 +8987,7 @@ static bool bpf_program__is_type(const struct bpf_program *prog,
 int bpf_program__set_##NAME(struct bpf_program *prog)		\
 {								\
 	if (!prog)						\
-		return -EINVAL;					\
+		return libbpf_err(-EINVAL);			\
 	bpf_program__set_type(prog, TYPE);			\
 	return 0;						\
 }								\
@@ -9274,7 +9274,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 	char *type_names;
 
 	if (!name)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	sec_def = find_sec_def(name);
 	if (sec_def) {
@@ -9290,7 +9290,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 		free(type_names);
 	}
 
-	return -ESRCH;
+	return libbpf_err(-ESRCH);
 }
 
 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
@@ -9488,9 +9488,10 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 	int err;
 
 	btf = libbpf_find_kernel_btf();
-	if (IS_ERR(btf)) {
+	err = libbpf_get_error(btf);
+	if (err) {
 		pr_warn("vmlinux BTF is not found\n");
-		return -EINVAL;
+		return libbpf_err(err);
 	}
 
 	err = find_attach_btf_id(btf, name, attach_type);
@@ -9498,7 +9499,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 		pr_warn("%s is not found in vmlinux BTF\n", name);
 
 	btf__free(btf);
-	return err;
+	return libbpf_err(err);
 }
 
 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -9509,10 +9510,11 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 	int err = -EINVAL;
 
 	info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
-	if (IS_ERR_OR_NULL(info_linear)) {
+	err = libbpf_get_error(info_linear);
+	if (err) {
 		pr_warn("failed get_prog_info_linear for FD %d\n",
 			attach_prog_fd);
-		return -EINVAL;
+		return err;
 	}
 	info = &info_linear->info;
 	if (!info->btf_id) {
@@ -9633,13 +9635,13 @@ int libbpf_attach_type_by_name(const char *name,
 	int i;
 
 	if (!name)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
 		if (strncmp(name, section_defs[i].sec, section_defs[i].len))
 			continue;
 		if (!section_defs[i].is_attachable)
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		*attach_type = section_defs[i].expected_attach_type;
 		return 0;
 	}
@@ -9650,17 +9652,17 @@ int libbpf_attach_type_by_name(const char *name,
 		free(type_names);
 	}
 
-	return -EINVAL;
+	return libbpf_err(-EINVAL);
 }
 
 int bpf_map__fd(const struct bpf_map *map)
 {
-	return map ? map->fd : -EINVAL;
+	return map ? map->fd : libbpf_err(-EINVAL);
 }
 
 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
 {
-	return map ? &map->def : ERR_PTR(-EINVAL);
+	return map ? &map->def : libbpf_err_ptr(-EINVAL);
 }
 
 const char *bpf_map__name(const struct bpf_map *map)
@@ -9676,7 +9678,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map)
 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
 {
 	if (map->fd >= 0)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	map->def.type = type;
 	return 0;
 }
@@ -9689,7 +9691,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map)
 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
 {
 	if (map->fd >= 0)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	map->def.map_flags = flags;
 	return 0;
 }
@@ -9702,7 +9704,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map)
 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
 {
 	if (map->fd >= 0)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	map->numa_node = numa_node;
 	return 0;
 }
@@ -9715,7 +9717,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map)
 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
 {
 	if (map->fd >= 0)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	map->def.key_size = size;
 	return 0;
 }
@@ -9728,7 +9730,7 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 {
 	if (map->fd >= 0)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	map->def.value_size = size;
 	return 0;
 }
@@ -9747,7 +9749,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
 		     bpf_map_clear_priv_t clear_priv)
 {
 	if (!map)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (map->priv) {
 		if (map->clear_priv)
@@ -9761,7 +9763,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
 
 void *bpf_map__priv(const struct bpf_map *map)
 {
-	return map ? map->priv : ERR_PTR(-EINVAL);
+	return map ? map->priv : libbpf_err_ptr(-EINVAL);
 }
 
 int bpf_map__set_initial_value(struct bpf_map *map,
@@ -9769,7 +9771,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
 {
 	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
 	    size != map->def.value_size || map->fd >= 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	memcpy(map->mmaped, data, size);
 	return 0;
@@ -9801,7 +9803,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map)
 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 {
 	if (map->fd >= 0)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	map->map_ifindex = ifindex;
 	return 0;
 }
@@ -9810,11 +9812,11 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 {
 	if (!bpf_map_type__is_map_in_map(map->def.type)) {
 		pr_warn("error: unsupported map type\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 	if (map->inner_map_fd != -1) {
 		pr_warn("error: inner_map_fd already specified\n");
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 	zfree(&map->inner_map);
 	map->inner_map_fd = fd;
@@ -9828,7 +9830,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 	struct bpf_map *s, *e;
 
 	if (!obj || !obj->maps)
-		return NULL;
+		return errno = EINVAL, NULL;
 
 	s = obj->maps;
 	e = obj->maps + obj->nr_maps;
@@ -9836,7 +9838,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 	if ((m < s) || (m >= e)) {
 		pr_warn("error in %s: map handler doesn't belong to object\n",
 			 __func__);
-		return NULL;
+		return errno = EINVAL, NULL;
 	}
 
 	idx = (m - obj->maps) + i;
@@ -9875,7 +9877,7 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
 		if (pos->name && !strcmp(pos->name, name))
 			return pos;
 	}
-	return NULL;
+	return errno = ENOENT, NULL;
 }
 
 int
@@ -9887,12 +9889,23 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
 {
-	return ERR_PTR(-ENOTSUP);
+	return libbpf_err_ptr(-ENOTSUP);
 }
 
 long libbpf_get_error(const void *ptr)
 {
-	return PTR_ERR_OR_ZERO(ptr);
+	if (!IS_ERR_OR_NULL(ptr))
+		return 0;
+
+	if (IS_ERR(ptr))
+		errno = -PTR_ERR(ptr);
+
+	/* If ptr == NULL, then errno should be already set by the failing
+	 * API, because libbpf never returns NULL on success and it now always
+	 * sets errno on error. So no extra errno handling for ptr == NULL
+	 * case.
+	 */
+	return -errno;
 }
 
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
@@ -9918,16 +9931,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	int err;
 
 	if (!attr)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (!attr->file)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	open_attr.file = attr->file;
 	open_attr.prog_type = attr->prog_type;
 
 	obj = bpf_object__open_xattr(&open_attr);
-	if (IS_ERR_OR_NULL(obj))
-		return -ENOENT;
+	err = libbpf_get_error(obj);
+	if (err)
+		return libbpf_err(-ENOENT);
 
 	bpf_object__for_each_program(prog, obj) {
 		enum bpf_attach_type attach_type = attr->expected_attach_type;
@@ -9947,7 +9961,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 			 * didn't provide a fallback type, too bad...
 			 */
 			bpf_object__close(obj);
-			return -EINVAL;
+			return libbpf_err(-EINVAL);
 		}
 
 		prog->prog_ifindex = attr->ifindex;
@@ -9965,13 +9979,13 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	if (!first_prog) {
 		pr_warn("object file doesn't contain bpf program\n");
 		bpf_object__close(obj);
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 	}
 
 	err = bpf_object__load(obj);
 	if (err) {
 		bpf_object__close(obj);
-		return err;
+		return libbpf_err(err);
 	}
 
 	*pobj = obj;
@@ -9990,7 +10004,10 @@ struct bpf_link {
 /* Replace link's underlying BPF program with the new one */
 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 {
-	return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+	int ret;
+	
+	ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+	return libbpf_err_errno(ret);
 }
 
 /* Release "ownership" of underlying BPF resource (typically, BPF program
@@ -10023,7 +10040,7 @@ int bpf_link__destroy(struct bpf_link *link)
 		free(link->pin_path);
 	free(link);
 
-	return err;
+	return libbpf_err(err);
 }
 
 int bpf_link__fd(const struct bpf_link *link)
@@ -10038,7 +10055,7 @@ const char *bpf_link__pin_path(const struct bpf_link *link)
 
 static int bpf_link__detach_fd(struct bpf_link *link)
 {
-	return close(link->fd);
+	return libbpf_err_errno(close(link->fd));
 }
 
 struct bpf_link *bpf_link__open(const char *path)
@@ -10050,13 +10067,13 @@ struct bpf_link *bpf_link__open(const char *path)
 	if (fd < 0) {
 		fd = -errno;
 		pr_warn("failed to open link at %s: %d\n", path, fd);
-		return ERR_PTR(fd);
+		return libbpf_err_ptr(fd);
 	}
 
 	link = calloc(1, sizeof(*link));
 	if (!link) {
 		close(fd);
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 	}
 	link->detach = &bpf_link__detach_fd;
 	link->fd = fd;
@@ -10064,7 +10081,7 @@ struct bpf_link *bpf_link__open(const char *path)
 	link->pin_path = strdup(path);
 	if (!link->pin_path) {
 		bpf_link__destroy(link);
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 	}
 
 	return link;
@@ -10080,22 +10097,22 @@ int bpf_link__pin(struct bpf_link *link, const char *path)
 	int err;
 
 	if (link->pin_path)
-		return -EBUSY;
+		return libbpf_err(-EBUSY);
 	err = make_parent_dir(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 	err = check_path(path);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	link->pin_path = strdup(path);
 	if (!link->pin_path)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 
 	if (bpf_obj_pin(link->fd, link->pin_path)) {
 		err = -errno;
 		zfree(&link->pin_path);
-		return err;
+		return libbpf_err(err);
 	}
 
 	pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
@@ -10107,11 +10124,11 @@ int bpf_link__unpin(struct bpf_link *link)
 	int err;
 
 	if (!link->pin_path)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	err = unlink(link->pin_path);
 	if (err != 0)
-		return -errno;
+		return libbpf_err_errno(err);
 
 	pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
 	zfree(&link->pin_path);
@@ -10127,11 +10144,10 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
 		err = -errno;
 
 	close(link->fd);
-	return err;
+	return libbpf_err(err);
 }
 
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
-						int pfd)
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
 {
 	char errmsg[STRERR_BUFSIZE];
 	struct bpf_link *link;
@@ -10140,18 +10156,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 	if (pfd < 0) {
 		pr_warn("prog '%s': invalid perf event FD %d\n",
 			prog->name, pfd);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
 		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
 			prog->name);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 
 	link = calloc(1, sizeof(*link));
 	if (!link)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 	link->detach = &bpf_link__detach_perf_event;
 	link->fd = pfd;
 
@@ -10163,14 +10179,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 		if (err == -EPROTO)
 			pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
 				prog->name, pfd);
-		return ERR_PTR(err);
+		return libbpf_err_ptr(err);
 	}
 	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
 		err = -errno;
 		free(link);
 		pr_warn("prog '%s': failed to enable pfd %d: %s\n",
 			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-		return ERR_PTR(err);
+		return libbpf_err_ptr(err);
 	}
 	return link;
 }
@@ -10294,16 +10310,16 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
 			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
 			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-		return ERR_PTR(pfd);
+		return libbpf_err_ptr(pfd);
 	}
 	link = bpf_program__attach_perf_event(prog, pfd);
-	if (IS_ERR(link)) {
+	err = libbpf_get_error(link);
+	if (err) {
 		close(pfd);
-		err = PTR_ERR(link);
 		pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
 			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-		return link;
+		return libbpf_err_ptr(err);
 	}
 	return link;
 }
@@ -10336,17 +10352,17 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
 			prog->name, retprobe ? "uretprobe" : "uprobe",
 			binary_path, func_offset,
 			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-		return ERR_PTR(pfd);
+		return libbpf_err_ptr(pfd);
 	}
 	link = bpf_program__attach_perf_event(prog, pfd);
-	if (IS_ERR(link)) {
+	err = libbpf_get_error(link);
+	if (err) {
 		close(pfd);
-		err = PTR_ERR(link);
 		pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
 			prog->name, retprobe ? "uretprobe" : "uprobe",
 			binary_path, func_offset,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-		return link;
+		return libbpf_err_ptr(err);
 	}
 	return link;
 }
@@ -10414,16 +10430,16 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
 		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
 			prog->name, tp_category, tp_name,
 			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-		return ERR_PTR(pfd);
+		return libbpf_err_ptr(pfd);
 	}
 	link = bpf_program__attach_perf_event(prog, pfd);
-	if (IS_ERR(link)) {
+	err = libbpf_get_error(link);
+	if (err) {
 		close(pfd);
-		err = PTR_ERR(link);
 		pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
 			prog->name, tp_category, tp_name,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-		return link;
+		return libbpf_err_ptr(err);
 	}
 	return link;
 }
@@ -10436,20 +10452,19 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
 
 	sec_name = strdup(prog->sec_name);
 	if (!sec_name)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 
 	/* extract "tp/<category>/<name>" */
 	tp_cat = sec_name + sec->len;
 	tp_name = strchr(tp_cat, '/');
 	if (!tp_name) {
-		link = ERR_PTR(-EINVAL);
-		goto out;
+		free(sec_name);
+		return libbpf_err_ptr(-EINVAL);
 	}
 	*tp_name = '\0';
 	tp_name++;
 
 	link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
-out:
 	free(sec_name);
 	return link;
 }
@@ -10464,12 +10479,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
 		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 
 	link = calloc(1, sizeof(*link));
 	if (!link)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 	link->detach = &bpf_link__detach_fd;
 
 	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
@@ -10478,7 +10493,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 		free(link);
 		pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
 			prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-		return ERR_PTR(pfd);
+		return libbpf_err_ptr(pfd);
 	}
 	link->fd = pfd;
 	return link;
@@ -10502,12 +10517,12 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
 		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 
 	link = calloc(1, sizeof(*link));
 	if (!link)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 	link->detach = &bpf_link__detach_fd;
 
 	pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
@@ -10516,7 +10531,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
 		free(link);
 		pr_warn("prog '%s': failed to attach: %s\n",
 			prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-		return ERR_PTR(pfd);
+		return libbpf_err_ptr(pfd);
 	}
 	link->fd = pfd;
 	return (struct bpf_link *)link;
@@ -10544,12 +10559,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
 	return bpf_program__attach_lsm(prog);
 }
 
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
-				    struct bpf_program *prog)
-{
-	return bpf_program__attach_iter(prog, NULL);
-}
-
 static struct bpf_link *
 bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
 		       const char *target_name)
@@ -10564,12 +10573,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
 		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 
 	link = calloc(1, sizeof(*link));
 	if (!link)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 	link->detach = &bpf_link__detach_fd;
 
 	attach_type = bpf_program__get_expected_attach_type(prog);
@@ -10580,7 +10589,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
 		pr_warn("prog '%s': failed to attach to %s: %s\n",
 			prog->name, target_name,
 			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
-		return ERR_PTR(link_fd);
+		return libbpf_err_ptr(link_fd);
 	}
 	link->fd = link_fd;
 	return link;
@@ -10613,19 +10622,19 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
 	if (!!target_fd != !!attach_func_name) {
 		pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
 			prog->name);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 
 	if (prog->type != BPF_PROG_TYPE_EXT) {
 		pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
 			prog->name);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 
 	if (target_fd) {
 		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
 		if (btf_id < 0)
-			return ERR_PTR(btf_id);
+			return libbpf_err_ptr(btf_id);
 
 		return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
 	} else {
@@ -10647,7 +10656,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
 	__u32 target_fd = 0;
 
 	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 
 	link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
 	link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
@@ -10655,12 +10664,12 @@ bpf_program__attach_iter(struct bpf_program *prog,
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
 		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 	}
 
 	link = calloc(1, sizeof(*link));
 	if (!link)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 	link->detach = &bpf_link__detach_fd;
 
 	link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
@@ -10670,19 +10679,25 @@ bpf_program__attach_iter(struct bpf_program *prog,
 		free(link);
 		pr_warn("prog '%s': failed to attach to iterator: %s\n",
 			prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
-		return ERR_PTR(link_fd);
+		return libbpf_err_ptr(link_fd);
 	}
 	link->fd = link_fd;
 	return link;
 }
 
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+				    struct bpf_program *prog)
+{
+	return bpf_program__attach_iter(prog, NULL);
+}
+
 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
 {
 	const struct bpf_sec_def *sec_def;
 
 	sec_def = find_sec_def(prog->sec_name);
 	if (!sec_def || !sec_def->attach_fn)
-		return ERR_PTR(-ESRCH);
+		return libbpf_err_ptr(-ESRCH);
 
 	return sec_def->attach_fn(sec_def, prog);
 }
@@ -10705,11 +10720,11 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
 	int err;
 
 	if (!bpf_map__is_struct_ops(map) || map->fd == -1)
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 
 	link = calloc(1, sizeof(*link));
 	if (!link)
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 
 	st_ops = map->st_ops;
 	for (i = 0; i < btf_vlen(st_ops->type); i++) {
@@ -10729,7 +10744,7 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
 	if (err) {
 		err = -errno;
 		free(link);
-		return ERR_PTR(err);
+		return libbpf_err_ptr(err);
 	}
 
 	link->detach = bpf_link__detach_struct_ops;
@@ -10783,7 +10798,7 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 	}
 
 	ring_buffer_write_tail(header, data_tail);
-	return ret;
+	return libbpf_err(ret);
 }
 
 struct perf_buffer;
@@ -10936,7 +10951,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
 	p.lost_cb = opts ? opts->lost_cb : NULL;
 	p.ctx = opts ? opts->ctx : NULL;
 
-	return __perf_buffer__new(map_fd, page_cnt, &p);
+	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
 struct perf_buffer *
@@ -10952,7 +10967,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
 	p.cpus = opts->cpus;
 	p.map_keys = opts->map_keys;
 
-	return __perf_buffer__new(map_fd, page_cnt, &p);
+	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
@@ -11173,16 +11188,19 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
 	int i, cnt, err;
 
 	cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+	if (cnt < 0)
+		return libbpf_err_errno(cnt);
+
 	for (i = 0; i < cnt; i++) {
 		struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
 
 		err = perf_buffer__process_records(pb, cpu_buf);
 		if (err) {
 			pr_warn("error while processing records: %d\n", err);
-			return err;
+			return libbpf_err(err);
 		}
 	}
-	return cnt < 0 ? -errno : cnt;
+	return cnt;
 }
 
 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
@@ -11203,11 +11221,11 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
 	struct perf_cpu_buf *cpu_buf;
 
 	if (buf_idx >= pb->cpu_cnt)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	cpu_buf = pb->cpu_bufs[buf_idx];
 	if (!cpu_buf)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 
 	return cpu_buf->fd;
 }
@@ -11225,11 +11243,11 @@ int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
 	struct perf_cpu_buf *cpu_buf;
 
 	if (buf_idx >= pb->cpu_cnt)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	cpu_buf = pb->cpu_bufs[buf_idx];
 	if (!cpu_buf)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 
 	return perf_buffer__process_records(pb, cpu_buf);
 }
@@ -11247,7 +11265,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
 		err = perf_buffer__process_records(pb, cpu_buf);
 		if (err) {
 			pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
-			return err;
+			return libbpf_err(err);
 		}
 	}
 	return 0;
@@ -11359,13 +11377,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
 	void *ptr;
 
 	if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
-		return ERR_PTR(-EINVAL);
+		return libbpf_err_ptr(-EINVAL);
 
 	/* step 1: get array dimensions */
 	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
 	if (err) {
 		pr_debug("can't get prog info: %s", strerror(errno));
-		return ERR_PTR(-EFAULT);
+		return libbpf_err_ptr(-EFAULT);
 	}
 
 	/* step 2: calculate total size of all arrays */
@@ -11397,7 +11415,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
 	data_len = roundup(data_len, sizeof(__u64));
 	info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
 	if (!info_linear)
-		return ERR_PTR(-ENOMEM);
+		return libbpf_err_ptr(-ENOMEM);
 
 	/* step 4: fill data to info_linear->info */
 	info_linear->arrays = arrays;
@@ -11429,7 +11447,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
 	if (err) {
 		pr_debug("can't get prog info: %s", strerror(errno));
 		free(info_linear);
-		return ERR_PTR(-EFAULT);
+		return libbpf_err_ptr(-EFAULT);
 	}
 
 	/* step 6: verify the data */
@@ -11508,26 +11526,26 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 	int btf_obj_fd = 0, btf_id = 0, err;
 
 	if (!prog || attach_prog_fd < 0 || !attach_func_name)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (prog->obj->loaded)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (attach_prog_fd) {
 		btf_id = libbpf_find_prog_btf_id(attach_func_name,
 						 attach_prog_fd);
 		if (btf_id < 0)
-			return btf_id;
+			return libbpf_err(btf_id);
 	} else {
 		/* load btf_vmlinux, if not yet */
 		err = bpf_object__load_vmlinux_btf(prog->obj, true);
 		if (err)
-			return err;
+			return libbpf_err(err);
 		err = find_kernel_btf_id(prog->obj, attach_func_name,
 					 prog->expected_attach_type,
 					 &btf_obj_fd, &btf_id);
 		if (err)
-			return err;
+			return libbpf_err(err);
 	}
 
 	prog->attach_btf_id = btf_id;
@@ -11626,7 +11644,7 @@ int libbpf_num_possible_cpus(void)
 
 	err = parse_cpu_mask_file(fcpu, &mask, &n);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	tmp_cpus = 0;
 	for (i = 0; i < n; i++) {
@@ -11646,7 +11664,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
 		.object_name = s->name,
 	);
 	struct bpf_object *obj;
-	int i;
+	int i, err;
 
 	/* Attempt to preserve opts->object_name, unless overriden by user
 	 * explicitly. Overwriting object name for skeletons is discouraged,
@@ -11661,10 +11679,11 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
 	}
 
 	obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
-	if (IS_ERR(obj)) {
-		pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
-			s->name, PTR_ERR(obj));
-		return PTR_ERR(obj);
+	err = libbpf_get_error(obj);
+	if (err) {
+		pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
+			s->name, err);
+		return libbpf_err(err);
 	}
 
 	*s->obj = obj;
@@ -11677,7 +11696,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
 		*map = bpf_object__find_map_by_name(obj, name);
 		if (!*map) {
 			pr_warn("failed to find skeleton map '%s'\n", name);
-			return -ESRCH;
+			return libbpf_err(-ESRCH);
 		}
 
 		/* externs shouldn't be pre-setup from user code */
@@ -11692,7 +11711,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
 		*prog = bpf_object__find_program_by_name(obj, name);
 		if (!*prog) {
 			pr_warn("failed to find skeleton program '%s'\n", name);
-			return -ESRCH;
+			return libbpf_err(-ESRCH);
 		}
 	}
 
@@ -11706,7 +11725,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
 	err = bpf_object__load(*s->obj);
 	if (err) {
 		pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
-		return err;
+		return libbpf_err(err);
 	}
 
 	for (i = 0; i < s->map_cnt; i++) {
@@ -11745,7 +11764,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
 			*mmaped = NULL;
 			pr_warn("failed to re-mmap() map '%s': %d\n",
 				 bpf_map__name(map), err);
-			return err;
+			return libbpf_err(err);
 		}
 	}
 
@@ -11754,7 +11773,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
 
 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 {
-	int i;
+	int i, err;
 
 	for (i = 0; i < s->prog_cnt; i++) {
 		struct bpf_program *prog = *s->progs[i].prog;
@@ -11769,10 +11788,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 			continue;
 
 		*link = sec_def->attach_fn(sec_def, prog);
-		if (IS_ERR(*link)) {
-			pr_warn("failed to auto-attach program '%s': %ld\n",
-				bpf_program__name(prog), PTR_ERR(*link));
-			return PTR_ERR(*link);
+		err = libbpf_get_error(*link);
+		if (err) {
+			pr_warn("failed to auto-attach program '%s': %d\n",
+				bpf_program__name(prog), err);
+			return libbpf_err(err);
 		}
 	}
 
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
index 0afb51f7a919..96f67a772a1b 100644
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -12,6 +12,7 @@
 #include <string.h>
 
 #include "libbpf.h"
+#include "libbpf_internal.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -39,7 +40,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
 int libbpf_strerror(int err, char *buf, size_t size)
 {
 	if (!buf || !size)
-		return -1;
+		return libbpf_err(-EINVAL);
 
 	err = err > 0 ? err : -err;
 
@@ -48,7 +49,7 @@ int libbpf_strerror(int err, char *buf, size_t size)
 
 		ret = strerror_r(err, buf, size);
 		buf[size - 1] = '\0';
-		return ret;
+		return libbpf_err_errno(ret);
 	}
 
 	if (err < __LIBBPF_ERRNO__END) {
@@ -62,5 +63,5 @@ int libbpf_strerror(int err, char *buf, size_t size)
 
 	snprintf(buf, size, "Unknown libbpf error %d", err);
 	buf[size - 1] = '\0';
-	return -1;
+	return libbpf_err(-ENOENT);
 }
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 8d59683ab246..bc3a6b402e8e 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -462,4 +462,31 @@ static inline int libbpf_err_errno(int ret)
 	return ret;
 }
 
+/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
+static inline void *libbpf_err_ptr(int err)
+{
+	/* set errno on error, this doesn't break anything */
+	errno = -err;
+
+	if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+		return NULL;
+
+	/* legacy: encode err as ptr */
+	return ERR_PTR(err);
+}
+
+/* handle pointer-returning APIs' error handling */
+static inline void *libbpf_ptr(void *ret)
+{
+	/* set errno on error, this doesn't break anything */
+	if (IS_ERR(ret))
+		errno = -PTR_ERR(ret);
+
+	if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+		return IS_ERR(ret) ? NULL : ret;
+
+	/* legacy: pass-through original pointer */
+	return ret;
+}
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 1dca41a24f75..10911a8cad0f 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -220,16 +220,16 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
 	int err;
 
 	if (!OPTS_VALID(opts, bpf_linker_opts))
-		return NULL;
+		return errno = EINVAL, NULL;
 
 	if (elf_version(EV_CURRENT) == EV_NONE) {
 		pr_warn_elf("libelf initialization failed");
-		return NULL;
+		return errno = EINVAL, NULL;
 	}
 
 	linker = calloc(1, sizeof(*linker));
 	if (!linker)
-		return NULL;
+		return errno = ENOMEM, NULL;
 
 	linker->fd = -1;
 
@@ -241,7 +241,7 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
 
 err_out:
 	bpf_linker__free(linker);
-	return NULL;
+	return errno = -err, NULL;
 }
 
 static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
@@ -444,10 +444,10 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
 	int err = 0;
 
 	if (!OPTS_VALID(opts, bpf_linker_file_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (!linker->elf)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
 	err = err ?: linker_append_sec_data(linker, &obj);
@@ -467,7 +467,7 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
 	if (obj.fd >= 0)
 		close(obj.fd);
 
-	return err;
+	return libbpf_err(err);
 }
 
 static bool is_dwarf_sec_name(const char *name)
@@ -2548,11 +2548,11 @@ int bpf_linker__finalize(struct bpf_linker *linker)
 	int err, i;
 
 	if (!linker->elf)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	err = finalize_btf(linker);
 	if (err)
-		return err;
+		return libbpf_err(err);
 
 	/* Finalize strings */
 	strs_sz = strset__data_size(linker->strtab_strs);
@@ -2584,14 +2584,14 @@ int bpf_linker__finalize(struct bpf_linker *linker)
 	if (elf_update(linker->elf, ELF_C_NULL) < 0) {
 		err = -errno;
 		pr_warn_elf("failed to finalize ELF layout");
-		return err;
+		return libbpf_err(err);
 	}
 
 	/* Write out final ELF contents */
 	if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
 		err = -errno;
 		pr_warn_elf("failed to write ELF contents");
-		return err;
+		return libbpf_err(err);
 	}
 
 	elf_end(linker->elf);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 47444588e0d2..d743c8721aa7 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -225,22 +225,26 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
 			     const struct bpf_xdp_set_link_opts *opts)
 {
-	int old_fd = -1;
+	int old_fd = -1, ret;
 
 	if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (OPTS_HAS(opts, old_fd)) {
 		old_fd = OPTS_GET(opts, old_fd, -1);
 		flags |= XDP_FLAGS_REPLACE;
 	}
 
-	return __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+	ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+	return libbpf_err(ret);
 }
 
 int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 {
-	return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+	int ret;
+
+	ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+	return libbpf_err(ret);
 }
 
 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
@@ -321,13 +325,13 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 	};
 
 	if (flags & ~XDP_FLAGS_MASK || !info_size)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	/* Check whether the single {HW,DRV,SKB} mode is set */
 	flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
 	mask = flags - 1;
 	if (flags && flags & mask)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	xdp_id.ifindex = ifindex;
 	xdp_id.flags = flags;
@@ -341,7 +345,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 		memset((void *) info + sz, 0, info_size - sz);
 	}
 
-	return ret;
+	return libbpf_err(ret);
 }
 
 static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
@@ -369,7 +373,7 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
 	if (!ret)
 		*prog_id = get_xdp_id(&info, flags);
 
-	return ret;
+	return libbpf_err(ret);
 }
 
 typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t,
@@ -463,11 +467,14 @@ static int tc_qdisc_delete(struct bpf_tc_hook *hook)
 
 int bpf_tc_hook_create(struct bpf_tc_hook *hook)
 {
+	int ret;
+
 	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
 	    OPTS_GET(hook, ifindex, 0) <= 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
-	return tc_qdisc_create_excl(hook);
+	ret = tc_qdisc_create_excl(hook);
+	return libbpf_err(ret);
 }
 
 static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
@@ -478,18 +485,18 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
 {
 	if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
 	    OPTS_GET(hook, ifindex, 0) <= 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	switch (OPTS_GET(hook, attach_point, 0)) {
 	case BPF_TC_INGRESS:
 	case BPF_TC_EGRESS:
-		return __bpf_tc_detach(hook, NULL, true);
+		return libbpf_err(__bpf_tc_detach(hook, NULL, true));
 	case BPF_TC_INGRESS | BPF_TC_EGRESS:
-		return tc_qdisc_delete(hook);
+		return libbpf_err(tc_qdisc_delete(hook));
 	case BPF_TC_CUSTOM:
-		return -EOPNOTSUPP;
+		return libbpf_err(-EOPNOTSUPP);
 	default:
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 }
 
@@ -574,7 +581,7 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 	if (!hook || !opts ||
 	    !OPTS_VALID(hook, bpf_tc_hook) ||
 	    !OPTS_VALID(opts, bpf_tc_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	ifindex      = OPTS_GET(hook, ifindex, 0);
 	parent       = OPTS_GET(hook, parent, 0);
@@ -587,11 +594,11 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 	flags        = OPTS_GET(opts, flags, 0);
 
 	if (ifindex <= 0 || !prog_fd || prog_id)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (priority > UINT16_MAX)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (flags & ~BPF_TC_F_REPLACE)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
 	protocol = ETH_P_ALL;
@@ -608,32 +615,32 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 
 	ret = tc_get_tcm_parent(attach_point, &parent);
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 	req.tc.tcm_parent = parent;
 
 	ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 	nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS);
 	if (!nla)
-		return -EMSGSIZE;
+		return libbpf_err(-EMSGSIZE);
 	ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd);
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 	bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
 	ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags,
 			 sizeof(bpf_flags));
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 	nlattr_end_nested(&req.nh, nla);
 
 	info.opts = opts;
 
 	ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 	if (!info.processed)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 	return ret;
 }
 
@@ -708,7 +715,13 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
 int bpf_tc_detach(const struct bpf_tc_hook *hook,
 		  const struct bpf_tc_opts *opts)
 {
-	return !opts ? -EINVAL : __bpf_tc_detach(hook, opts, false);
+	int ret;
+
+	if (!opts)
+		return libbpf_err(-EINVAL);
+
+	ret = __bpf_tc_detach(hook, opts, false);
+	return libbpf_err(ret);
 }
 
 int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
@@ -725,7 +738,7 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 	if (!hook || !opts ||
 	    !OPTS_VALID(hook, bpf_tc_hook) ||
 	    !OPTS_VALID(opts, bpf_tc_opts))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	ifindex      = OPTS_GET(hook, ifindex, 0);
 	parent       = OPTS_GET(hook, parent, 0);
@@ -739,9 +752,9 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 
 	if (ifindex <= 0 || flags || prog_fd || prog_id ||
 	    !handle || !priority)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (priority > UINT16_MAX)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	protocol = ETH_P_ALL;
 
@@ -756,19 +769,19 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 
 	ret = tc_get_tcm_parent(attach_point, &parent);
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 	req.tc.tcm_parent = parent;
 
 	ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 
 	info.opts = opts;
 
 	ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
 	if (ret < 0)
-		return ret;
+		return libbpf_err(ret);
 	if (!info.processed)
-		return -ENOENT;
+		return libbpf_err(-ENOENT);
 	return ret;
 }
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 1d80ad4e0de8..8bc117bcc7bc 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -69,23 +69,23 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		err = -errno;
 		pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
 			map_fd, err);
-		return err;
+		return libbpf_err(err);
 	}
 
 	if (info.type != BPF_MAP_TYPE_RINGBUF) {
 		pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
 			map_fd);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
 	if (!tmp)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 	rb->rings = tmp;
 
 	tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
 	if (!tmp)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 	rb->events = tmp;
 
 	r = &rb->rings[rb->ring_cnt];
@@ -103,7 +103,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		err = -errno;
 		pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
 			map_fd, err);
-		return err;
+		return libbpf_err(err);
 	}
 	r->consumer_pos = tmp;
 
@@ -118,7 +118,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		ringbuf_unmap_ring(rb, r);
 		pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
 			map_fd, err);
-		return err;
+		return libbpf_err(err);
 	}
 	r->producer_pos = tmp;
 	r->data = tmp + rb->page_size;
@@ -133,7 +133,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 		ringbuf_unmap_ring(rb, r);
 		pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
 			map_fd, err);
-		return err;
+		return libbpf_err(err);
 	}
 
 	rb->ring_cnt++;
@@ -165,11 +165,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
 	int err;
 
 	if (!OPTS_VALID(opts, ring_buffer_opts))
-		return NULL;
+		return errno = EINVAL, NULL;
 
 	rb = calloc(1, sizeof(*rb));
 	if (!rb)
-		return NULL;
+		return errno = ENOMEM, NULL;
 
 	rb->page_size = getpagesize();
 
@@ -188,7 +188,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
 
 err_out:
 	ring_buffer__free(rb);
-	return NULL;
+	return errno = -err, NULL;
 }
 
 static inline int roundup_len(__u32 len)
@@ -260,7 +260,7 @@ int ring_buffer__consume(struct ring_buffer *rb)
 
 		err = ringbuf_process_ring(ring);
 		if (err < 0)
-			return err;
+			return libbpf_err(err);
 		res += err;
 	}
 	if (res > INT_MAX)
@@ -279,7 +279,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 
 	cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
 	if (cnt < 0)
-		return -errno;
+		return libbpf_err(-errno);
 
 	for (i = 0; i < cnt; i++) {
 		__u32 ring_id = rb->events[i].data.fd;
@@ -287,7 +287,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 
 		err = ringbuf_process_ring(ring);
 		if (err < 0)
-			return err;
+			return libbpf_err(err);
 		res += err;
 	}
 	if (res > INT_MAX)

From 9c6c0449deb41dbe3a66ab9adfd08020bba6c43d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 24 May 2021 20:59:35 -0700
Subject: [PATCH 0548/3292] bpftool: Set errno on skeleton failures and
 propagate errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow libbpf's error handling conventions and pass through errors and errno
properly. Skeleton code always returned NULL on errors (not ERR_PTR(err)), so
there are no backwards compatibility concerns. But now we also set errno
properly, so it's possible to distinguish different reasons for failure, if
necessary.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210525035935.1461796-6-andrii@kernel.org
---
 tools/bpf/bpftool/gen.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 13b0aa789178..1d71ff8c52fa 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -713,6 +713,7 @@ static int do_skeleton(int argc, char **argv)
 		#ifndef %2$s						    \n\
 		#define %2$s						    \n\
 									    \n\
+		#include <errno.h>					    \n\
 		#include <stdlib.h>					    \n\
 		#include <bpf/libbpf.h>					    \n\
 									    \n\
@@ -793,18 +794,23 @@ static int do_skeleton(int argc, char **argv)
 		%1$s__open_opts(const struct bpf_object_open_opts *opts)    \n\
 		{							    \n\
 			struct %1$s *obj;				    \n\
+			int err;					    \n\
 									    \n\
 			obj = (struct %1$s *)calloc(1, sizeof(*obj));	    \n\
-			if (!obj)					    \n\
+			if (!obj) {					    \n\
+				errno = ENOMEM;				    \n\
 				return NULL;				    \n\
-			if (%1$s__create_skeleton(obj))			    \n\
-				goto err;				    \n\
-			if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\
-				goto err;				    \n\
+			}						    \n\
+									    \n\
+			err = %1$s__create_skeleton(obj);		    \n\
+			err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\
+			if (err)					    \n\
+				goto err_out;				    \n\
 									    \n\
 			return obj;					    \n\
-		err:							    \n\
+		err_out:						    \n\
 			%1$s__destroy(obj);				    \n\
+			errno = -err;					    \n\
 			return NULL;					    \n\
 		}							    \n\
 									    \n\
@@ -824,12 +830,15 @@ static int do_skeleton(int argc, char **argv)
 		%1$s__open_and_load(void)				    \n\
 		{							    \n\
 			struct %1$s *obj;				    \n\
+			int err;					    \n\
 									    \n\
 			obj = %1$s__open();				    \n\
 			if (!obj)					    \n\
 				return NULL;				    \n\
-			if (%1$s__load(obj)) {				    \n\
+			err = %1$s__load(obj);				    \n\
+			if (err) {					    \n\
 				%1$s__destroy(obj);			    \n\
+				errno = -err;				    \n\
 				return NULL;				    \n\
 			}						    \n\
 			return obj;					    \n\
@@ -860,7 +869,7 @@ static int do_skeleton(int argc, char **argv)
 									    \n\
 			s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
 			if (!s)						    \n\
-				return -1;				    \n\
+				goto err;				    \n\
 			obj->skeleton = s;				    \n\
 									    \n\
 			s->sz = sizeof(*s);				    \n\
@@ -949,7 +958,7 @@ static int do_skeleton(int argc, char **argv)
 			return 0;					    \n\
 		err:							    \n\
 			bpf_object__destroy_skeleton(s);		    \n\
-			return -1;					    \n\
+			return -ENOMEM;					    \n\
 		}							    \n\
 									    \n\
 		#endif /* %s */						    \n\

From a925316af80ae93186f00d80163f5a3c7f5b4782 Mon Sep 17 00:00:00 2001
From: zuoqilin <zuoqilin@yulong.com>
Date: Fri, 14 May 2021 15:55:13 +0800
Subject: [PATCH 0549/3292] net: Remove unnecessary variables

It is not necessary to define variables to receive -ENOMEM,
directly return -ENOMEM.

Signed-off-by: zuoqilin <zuoqilin@yulong.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index ef9b4ac03e7b..de24a7d474df 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -141,7 +141,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 	struct sock *sk;
 	struct pfkey_sock *pfk;
-	int err;
 
 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
@@ -150,10 +149,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
 	if (protocol != PF_KEY_V2)
 		return -EPROTONOSUPPORT;
 
-	err = -ENOMEM;
 	sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern);
 	if (sk == NULL)
-		goto out;
+		return -ENOMEM;
 
 	pfk = pfkey_sk(sk);
 	mutex_init(&pfk->dump_lock);
@@ -169,8 +167,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
 	pfkey_insert(sk);
 
 	return 0;
-out:
-	return err;
 }
 
 static int pfkey_release(struct socket *sock)

From cb261b594b4108668e00f565184c7c221efe0359 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 19 May 2021 17:07:44 +0800
Subject: [PATCH 0550/3292] bpf: Run devmap xdp_prog on flush instead of bulk
 enqueue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This changes the devmap XDP program support to run the program when the
bulk queue is flushed instead of before the frame is enqueued. This has
a couple of benefits:

- It "sorts" the packets by destination devmap entry, and then runs the
  same BPF program on all the packets in sequence. This ensures that we
  keep the XDP program and destination device properties hot in I-cache.

- It makes the multicast implementation simpler because it can just
  enqueue packets using bq_enqueue() without having to deal with the
  devmap program at all.

The drawback is that if the devmap program drops the packet, the enqueue
step is redundant. However, arguably this is mostly visible in a
micro-benchmark, and with more mixed traffic the I-cache benefit should
win out. The performance impact of just this patch is as follows:

Using 2 10Gb i40e NIC, redirecting one to another, or into a veth interface,
which do XDP_DROP on veth peer. With xdp_redirect_map in sample/bpf, send
pkts via pktgen cmd:
./pktgen_sample03_burst_single_flow.sh -i eno1 -d $dst_ip -m $dst_mac -t 10 -s 64

There are about +/- 0.1M deviation for native testing, the performance
improved for the base-case, but some drop back with xdp devmap prog attached.

Version          | Test                           | Generic | Native | Native + 2nd xdp_prog
5.12 rc4         | xdp_redirect_map   i40e->i40e  |    1.9M |   9.6M |  8.4M
5.12 rc4         | xdp_redirect_map   i40e->veth  |    1.7M |  11.7M |  9.8M
5.12 rc4 + patch | xdp_redirect_map   i40e->i40e  |    1.9M |   9.8M |  8.0M
5.12 rc4 + patch | xdp_redirect_map   i40e->veth  |    1.7M |  12.0M |  9.4M

When bq_xmit_all() is called from bq_enqueue(), another packet will
always be enqueued immediately after, so clearing dev_rx, xdp_prog and
flush_node in bq_xmit_all() is redundant. Move the clear to __dev_flush(),
and only check them once in bq_enqueue() since they are all modified
together.

This change also has the side effect of extending the lifetime of the
RCU-protected xdp_prog that lives inside the devmap entries: Instead of
just living for the duration of the XDP program invocation, the
reference now lives all the way until the bq is flushed. This is safe
because the bq flush happens at the end of the NAPI poll loop, so
everything happens between a local_bh_disable()/local_bh_enable() pair.
However, this is by no means obvious from looking at the call sites; in
particular, some drivers have an additional rcu_read_lock() around only
the XDP program invocation, which only confuses matters further.
Cleaning this up will be done in a separate patch series.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210519090747.1655268-2-liuhangbin@gmail.com
---
 kernel/bpf/devmap.c | 127 ++++++++++++++++++++++++++------------------
 1 file changed, 76 insertions(+), 51 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d60d617ec0d7..642264e32abd 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue {
 	struct list_head flush_node;
 	struct net_device *dev;
 	struct net_device *dev_rx;
+	struct bpf_prog *xdp_prog;
 	unsigned int count;
 };
 
@@ -326,22 +327,71 @@ bool dev_map_can_have_prog(struct bpf_map *map)
 	return false;
 }
 
+static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
+				struct xdp_frame **frames, int n,
+				struct net_device *dev)
+{
+	struct xdp_txq_info txq = { .dev = dev };
+	struct xdp_buff xdp;
+	int i, nframes = 0;
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		u32 act;
+		int err;
+
+		xdp_convert_frame_to_buff(xdpf, &xdp);
+		xdp.txq = &txq;
+
+		act = bpf_prog_run_xdp(xdp_prog, &xdp);
+		switch (act) {
+		case XDP_PASS:
+			err = xdp_update_frame_from_buff(&xdp, xdpf);
+			if (unlikely(err < 0))
+				xdp_return_frame_rx_napi(xdpf);
+			else
+				frames[nframes++] = xdpf;
+			break;
+		default:
+			bpf_warn_invalid_xdp_action(act);
+			fallthrough;
+		case XDP_ABORTED:
+			trace_xdp_exception(dev, xdp_prog, act);
+			fallthrough;
+		case XDP_DROP:
+			xdp_return_frame_rx_napi(xdpf);
+			break;
+		}
+	}
+	return nframes; /* sent frames count */
+}
+
 static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
 	struct net_device *dev = bq->dev;
-	int sent = 0, err = 0;
+	int sent = 0, drops = 0, err = 0;
+	unsigned int cnt = bq->count;
+	int to_send = cnt;
 	int i;
 
-	if (unlikely(!bq->count))
+	if (unlikely(!cnt))
 		return;
 
-	for (i = 0; i < bq->count; i++) {
+	for (i = 0; i < cnt; i++) {
 		struct xdp_frame *xdpf = bq->q[i];
 
 		prefetch(xdpf);
 	}
 
-	sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
+	if (bq->xdp_prog) {
+		to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
+		if (!to_send)
+			goto out;
+
+		drops = cnt - to_send;
+	}
+
+	sent = dev->netdev_ops->ndo_xdp_xmit(dev, to_send, bq->q, flags);
 	if (sent < 0) {
 		/* If ndo_xdp_xmit fails with an errno, no frames have
 		 * been xmit'ed.
@@ -353,13 +403,13 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 	/* If not all frames have been transmitted, it is our
 	 * responsibility to free them
 	 */
-	for (i = sent; unlikely(i < bq->count); i++)
+	for (i = sent; unlikely(i < to_send); i++)
 		xdp_return_frame_rx_napi(bq->q[i]);
 
-	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, bq->count - sent, err);
-	bq->dev_rx = NULL;
+out:
+	drops = cnt - sent;
 	bq->count = 0;
-	__list_del_clearprev(&bq->flush_node);
+	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err);
 }
 
 /* __dev_flush is called from xdp_do_flush() which _must_ be signaled
@@ -377,8 +427,12 @@ void __dev_flush(void)
 	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
 	struct xdp_dev_bulk_queue *bq, *tmp;
 
-	list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
+	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
 		bq_xmit_all(bq, XDP_XMIT_FLUSH);
+		bq->dev_rx = NULL;
+		bq->xdp_prog = NULL;
+		__list_del_clearprev(&bq->flush_node);
+	}
 }
 
 /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
@@ -401,7 +455,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
  * Thus, safe percpu variable access.
  */
 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
-		       struct net_device *dev_rx)
+		       struct net_device *dev_rx, struct bpf_prog *xdp_prog)
 {
 	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
 	struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
@@ -412,18 +466,22 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 	/* Ingress dev_rx will be the same for all xdp_frame's in
 	 * bulk_queue, because bq stored per-CPU and must be flushed
 	 * from net_device drivers NAPI func end.
+	 *
+	 * Do the same with xdp_prog and flush_list since these fields
+	 * are only ever modified together.
 	 */
-	if (!bq->dev_rx)
+	if (!bq->dev_rx) {
 		bq->dev_rx = dev_rx;
+		bq->xdp_prog = xdp_prog;
+		list_add(&bq->flush_node, flush_list);
+	}
 
 	bq->q[bq->count++] = xdpf;
-
-	if (!bq->flush_node.prev)
-		list_add(&bq->flush_node, flush_list);
 }
 
 static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
-			       struct net_device *dev_rx)
+				struct net_device *dev_rx,
+				struct bpf_prog *xdp_prog)
 {
 	struct xdp_frame *xdpf;
 	int err;
@@ -439,42 +497,14 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
-	bq_enqueue(dev, xdpf, dev_rx);
+	bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
 	return 0;
 }
 
-static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
-					 struct xdp_buff *xdp,
-					 struct bpf_prog *xdp_prog)
-{
-	struct xdp_txq_info txq = { .dev = dev };
-	u32 act;
-
-	xdp_set_data_meta_invalid(xdp);
-	xdp->txq = &txq;
-
-	act = bpf_prog_run_xdp(xdp_prog, xdp);
-	switch (act) {
-	case XDP_PASS:
-		return xdp;
-	case XDP_DROP:
-		break;
-	default:
-		bpf_warn_invalid_xdp_action(act);
-		fallthrough;
-	case XDP_ABORTED:
-		trace_xdp_exception(dev, xdp_prog, act);
-		break;
-	}
-
-	xdp_return_buff(xdp);
-	return NULL;
-}
-
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx)
 {
-	return __xdp_enqueue(dev, xdp, dev_rx);
+	return __xdp_enqueue(dev, xdp, dev_rx, NULL);
 }
 
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
@@ -482,12 +512,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 {
 	struct net_device *dev = dst->dev;
 
-	if (dst->xdp_prog) {
-		xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
-		if (!xdp)
-			return 0;
-	}
-	return __xdp_enqueue(dev, xdp, dev_rx);
+	return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
 }
 
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,

From e624d4ed4aa8cc3c69d1359b0aaea539203ed266 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 19 May 2021 17:07:45 +0800
Subject: [PATCH 0551/3292] xdp: Extend xdp_redirect_map with broadcast support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds two flags BPF_F_BROADCAST and BPF_F_EXCLUDE_INGRESS to
extend xdp_redirect_map for broadcast support.

With BPF_F_BROADCAST the packet will be broadcasted to all the interfaces
in the map. with BPF_F_EXCLUDE_INGRESS the ingress interface will be
excluded when do broadcasting.

When getting the devices in dev hash map via dev_map_hash_get_next_key(),
there is a possibility that we fall back to the first key when a device
was removed. This will duplicate packets on some interfaces. So just walk
the whole buckets to avoid this issue. For dev array map, we also walk the
whole map to find valid interfaces.

Function bpf_clear_redirect_map() was removed in
commit ee75aef23afe ("bpf, xdp: Restructure redirect actions").
Add it back as we need to use ri->map again.

With test topology:
  +-------------------+             +-------------------+
  | Host A (i40e 10G) |  ---------- | eno1(i40e 10G)    |
  +-------------------+             |                   |
                                    |   Host B          |
  +-------------------+             |                   |
  | Host C (i40e 10G) |  ---------- | eno2(i40e 10G)    |
  +-------------------+             |                   |
                                    |          +------+ |
                                    | veth0 -- | Peer | |
                                    | veth1 -- |      | |
                                    | veth2 -- |  NS  | |
                                    |          +------+ |
                                    +-------------------+

On Host A:
 # pktgen/pktgen_sample03_burst_single_flow.sh -i eno1 -d $dst_ip -m $dst_mac -s 64

On Host B(Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz, 128G Memory):
Use xdp_redirect_map and xdp_redirect_map_multi in samples/bpf for testing.
All the veth peers in the NS have a XDP_DROP program loaded. The
forward_map max_entries in xdp_redirect_map_multi is modify to 4.

Testing the performance impact on the regular xdp_redirect path with and
without patch (to check impact of additional check for broadcast mode):

5.12 rc4         | redirect_map        i40e->i40e      |    2.0M |  9.7M
5.12 rc4         | redirect_map        i40e->veth      |    1.7M | 11.8M
5.12 rc4 + patch | redirect_map        i40e->i40e      |    2.0M |  9.6M
5.12 rc4 + patch | redirect_map        i40e->veth      |    1.7M | 11.7M

Testing the performance when cloning packets with the redirect_map_multi
test, using a redirect map size of 4, filled with 1-3 devices:

5.12 rc4 + patch | redirect_map multi  i40e->veth (x1) |    1.7M | 11.4M
5.12 rc4 + patch | redirect_map multi  i40e->veth (x2) |    1.1M |  4.3M
5.12 rc4 + patch | redirect_map multi  i40e->veth (x3) |    0.8M |  2.6M

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20210519090747.1655268-3-liuhangbin@gmail.com
---
 include/linux/bpf.h            |  20 ++++
 include/linux/filter.h         |  19 +++-
 include/net/xdp.h              |   1 +
 include/trace/events/xdp.h     |   6 +-
 include/uapi/linux/bpf.h       |  14 ++-
 kernel/bpf/cpumap.c            |   3 +-
 kernel/bpf/devmap.c            | 183 ++++++++++++++++++++++++++++++++-
 net/core/filter.c              |  37 ++++++-
 net/core/xdp.c                 |  28 +++++
 net/xdp/xskmap.c               |   3 +-
 tools/include/uapi/linux/bpf.h |  14 ++-
 11 files changed, 313 insertions(+), 15 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1e9a0ff3217b..86dec5001ae2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1501,8 +1501,13 @@ int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+			  struct bpf_map *map, bool exclude_ingress);
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+			   struct bpf_prog *xdp_prog, struct bpf_map *map,
+			   bool exclude_ingress);
 bool dev_map_can_have_prog(struct bpf_map *map);
 
 void __cpu_map_flush(void);
@@ -1670,6 +1675,13 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return 0;
 }
 
+static inline
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+			  struct bpf_map *map, bool exclude_ingress)
+{
+	return 0;
+}
+
 struct sk_buff;
 
 static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
@@ -1679,6 +1691,14 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
 	return 0;
 }
 
+static inline
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+			   struct bpf_prog *xdp_prog, struct bpf_map *map,
+			   bool exclude_ingress)
+{
+	return 0;
+}
+
 static inline void __cpu_map_flush(void)
 {
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9a09547bc7ba..c5ad7df029ed 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,6 +646,7 @@ struct bpf_redirect_info {
 	u32 flags;
 	u32 tgt_index;
 	void *tgt_value;
+	struct bpf_map *map;
 	u32 map_id;
 	enum bpf_map_type map_type;
 	u32 kern_flags;
@@ -1464,17 +1465,19 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 }
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
+						  u64 flags, const u64 flag_mask,
 						  void *lookup_elem(struct bpf_map *map, u32 key))
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
 
 	/* Lower bits of the flags are used as return code on lookup failure */
-	if (unlikely(flags > XDP_TX))
+	if (unlikely(flags & ~(action_mask | flag_mask)))
 		return XDP_ABORTED;
 
 	ri->tgt_value = lookup_elem(map, ifindex);
-	if (unlikely(!ri->tgt_value)) {
+	if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
 		/* If the lookup fails we want to clear out the state in the
 		 * redirect_info struct completely, so that if an eBPF program
 		 * performs multiple lookups, the last one always takes
@@ -1482,13 +1485,21 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
 		 */
 		ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
 		ri->map_type = BPF_MAP_TYPE_UNSPEC;
-		return flags;
+		return flags & action_mask;
 	}
 
 	ri->tgt_index = ifindex;
 	ri->map_id = map->id;
 	ri->map_type = map->map_type;
 
+	if (flags & BPF_F_BROADCAST) {
+		WRITE_ONCE(ri->map, map);
+		ri->flags = flags;
+	} else {
+		WRITE_ONCE(ri->map, NULL);
+		ri->flags = 0;
+	}
+
 	return XDP_REDIRECT;
 }
 
diff --git a/include/net/xdp.h b/include/net/xdp.h
index a5bc214a49d9..5533f0ab2afc 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -170,6 +170,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 					 struct net_device *dev);
 int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
 
 static inline
 void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index fcad3645a70b..c40fc97f9417 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -110,7 +110,11 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
 		u32 ifindex = 0, map_index = index;
 
 		if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
-			ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+			/* Just leave to_ifindex to 0 if do broadcast redirect,
+			 * as tgt will be NULL.
+			 */
+			if (tgt)
+				ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
 		} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
 			ifindex = index;
 			map_index = 0;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 562adeac1d67..2c1ba70abbf1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2555,8 +2555,12 @@ union bpf_attr {
  * 		The lower two bits of *flags* are used as the return code if
  * 		the map lookup fails. This is so that the return value can be
  * 		one of the XDP program return codes up to **XDP_TX**, as chosen
- * 		by the caller. Any higher bits in the *flags* argument must be
- * 		unset.
+ * 		by the caller. The higher bits of *flags* can be set to
+ * 		BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ * 		With BPF_F_BROADCAST the packet will be broadcasted to all the
+ * 		interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ * 		interface will be excluded when do broadcasting.
  *
  * 		See also **bpf_redirect**\ (), which only supports redirecting
  * 		to an ifindex, but doesn't require a map to do so.
@@ -5122,6 +5126,12 @@ enum {
 	BPF_F_BPRM_SECUREEXEC	= (1ULL << 0),
 };
 
+/* Flags for bpf_redirect_map helper */
+enum {
+	BPF_F_BROADCAST		= (1ULL << 3),
+	BPF_F_EXCLUDE_INGRESS	= (1ULL << 4),
+};
+
 #define __bpf_md_ptr(type, name)	\
 union {					\
 	type name;			\
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 5dd3e866599a..a1a0c4e791c6 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -601,7 +601,8 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 
 static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+				      __cpu_map_lookup_elem);
 }
 
 static int cpu_map_btf_id;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 642264e32abd..f9148daab0e3 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -198,6 +198,7 @@ static void dev_map_free(struct bpf_map *map)
 	list_del_rcu(&dtab->list);
 	spin_unlock(&dev_map_lock);
 
+	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* Make sure prior __dev_map_entry_free() have completed. */
@@ -515,6 +516,99 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
 }
 
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
+			 int exclude_ifindex)
+{
+	if (!obj || obj->dev->ifindex == exclude_ifindex ||
+	    !obj->dev->netdev_ops->ndo_xdp_xmit)
+		return false;
+
+	if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data))
+		return false;
+
+	return true;
+}
+
+static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
+				 struct net_device *dev_rx,
+				 struct xdp_frame *xdpf)
+{
+	struct xdp_frame *nxdpf;
+
+	nxdpf = xdpf_clone(xdpf);
+	if (!nxdpf)
+		return -ENOMEM;
+
+	bq_enqueue(obj->dev, nxdpf, dev_rx, obj->xdp_prog);
+
+	return 0;
+}
+
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+			  struct bpf_map *map, bool exclude_ingress)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
+	struct bpf_dtab_netdev *dst, *last_dst = NULL;
+	struct hlist_head *head;
+	struct xdp_frame *xdpf;
+	unsigned int i;
+	int err;
+
+	xdpf = xdp_convert_buff_to_frame(xdp);
+	if (unlikely(!xdpf))
+		return -EOVERFLOW;
+
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+		for (i = 0; i < map->max_entries; i++) {
+			dst = READ_ONCE(dtab->netdev_map[i]);
+			if (!is_valid_dst(dst, xdp, exclude_ifindex))
+				continue;
+
+			/* we only need n-1 clones; last_dst enqueued below */
+			if (!last_dst) {
+				last_dst = dst;
+				continue;
+			}
+
+			err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
+			if (err)
+				return err;
+
+			last_dst = dst;
+		}
+	} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
+		for (i = 0; i < dtab->n_buckets; i++) {
+			head = dev_map_index_hash(dtab, i);
+			hlist_for_each_entry_rcu(dst, head, index_hlist,
+						 lockdep_is_held(&dtab->index_lock)) {
+				if (!is_valid_dst(dst, xdp, exclude_ifindex))
+					continue;
+
+				/* we only need n-1 clones; last_dst enqueued below */
+				if (!last_dst) {
+					last_dst = dst;
+					continue;
+				}
+
+				err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
+				if (err)
+					return err;
+
+				last_dst = dst;
+			}
+		}
+	}
+
+	/* consume the last copy of the frame */
+	if (last_dst)
+		bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog);
+	else
+		xdp_return_frame_rx_napi(xdpf); /* dtab is empty */
+
+	return 0;
+}
+
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog)
 {
@@ -529,6 +623,87 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 	return 0;
 }
 
+static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst,
+				  struct sk_buff *skb,
+				  struct bpf_prog *xdp_prog)
+{
+	struct sk_buff *nskb;
+	int err;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return -ENOMEM;
+
+	err = dev_map_generic_redirect(dst, nskb, xdp_prog);
+	if (unlikely(err)) {
+		consume_skb(nskb);
+		return err;
+	}
+
+	return 0;
+}
+
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+			   struct bpf_prog *xdp_prog, struct bpf_map *map,
+			   bool exclude_ingress)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
+	struct bpf_dtab_netdev *dst, *last_dst = NULL;
+	struct hlist_head *head;
+	struct hlist_node *next;
+	unsigned int i;
+	int err;
+
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+		for (i = 0; i < map->max_entries; i++) {
+			dst = READ_ONCE(dtab->netdev_map[i]);
+			if (!dst || dst->dev->ifindex == exclude_ifindex)
+				continue;
+
+			/* we only need n-1 clones; last_dst enqueued below */
+			if (!last_dst) {
+				last_dst = dst;
+				continue;
+			}
+
+			err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
+			if (err)
+				return err;
+
+			last_dst = dst;
+		}
+	} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
+		for (i = 0; i < dtab->n_buckets; i++) {
+			head = dev_map_index_hash(dtab, i);
+			hlist_for_each_entry_safe(dst, next, head, index_hlist) {
+				if (!dst || dst->dev->ifindex == exclude_ifindex)
+					continue;
+
+				/* we only need n-1 clones; last_dst enqueued below */
+				if (!last_dst) {
+					last_dst = dst;
+					continue;
+				}
+
+				err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
+				if (err)
+					return err;
+
+				last_dst = dst;
+			}
+		}
+	}
+
+	/* consume the first skb and return */
+	if (last_dst)
+		return dev_map_generic_redirect(last_dst, skb, xdp_prog);
+
+	/* dtab is empty */
+	consume_skb(skb);
+	return 0;
+}
+
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
@@ -755,12 +930,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
 
 static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags,
+				      BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
+				      __dev_map_lookup_elem);
 }
 
 static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags,
+				      BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
+				      __dev_map_hash_lookup_elem);
 }
 
 static int dev_map_btf_id;
diff --git a/net/core/filter.c b/net/core/filter.c
index 582ac196fd94..caa88955562e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3930,6 +3930,23 @@ void xdp_do_flush(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 
+void bpf_clear_redirect_map(struct bpf_map *map)
+{
+	struct bpf_redirect_info *ri;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+		/* Avoid polluting remote cacheline due to writes if
+		 * not needed. Once we pass this test, we need the
+		 * cmpxchg() to make sure it hasn't been changed in
+		 * the meantime by remote CPU.
+		 */
+		if (unlikely(READ_ONCE(ri->map) == map))
+			cmpxchg(&ri->map, map, NULL);
+	}
+}
+
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
@@ -3937,6 +3954,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 	enum bpf_map_type map_type = ri->map_type;
 	void *fwd = ri->tgt_value;
 	u32 map_id = ri->map_id;
+	struct bpf_map *map;
 	int err;
 
 	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
@@ -3946,7 +3964,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 	case BPF_MAP_TYPE_DEVMAP:
 		fallthrough;
 	case BPF_MAP_TYPE_DEVMAP_HASH:
-		err = dev_map_enqueue(fwd, xdp, dev);
+		map = READ_ONCE(ri->map);
+		if (unlikely(map)) {
+			WRITE_ONCE(ri->map, NULL);
+			err = dev_map_enqueue_multi(xdp, dev, map,
+						    ri->flags & BPF_F_EXCLUDE_INGRESS);
+		} else {
+			err = dev_map_enqueue(fwd, xdp, dev);
+		}
 		break;
 	case BPF_MAP_TYPE_CPUMAP:
 		err = cpu_map_enqueue(fwd, xdp, dev);
@@ -3988,13 +4013,21 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       enum bpf_map_type map_type, u32 map_id)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_map *map;
 	int err;
 
 	switch (map_type) {
 	case BPF_MAP_TYPE_DEVMAP:
 		fallthrough;
 	case BPF_MAP_TYPE_DEVMAP_HASH:
-		err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+		map = READ_ONCE(ri->map);
+		if (unlikely(map)) {
+			WRITE_ONCE(ri->map, NULL);
+			err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
+						     ri->flags & BPF_F_EXCLUDE_INGRESS);
+		} else {
+			err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+		}
 		if (unlikely(err))
 			goto err;
 		break;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 858276e72c68..725d20f1b100 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -584,3 +584,31 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 	return __xdp_build_skb_from_frame(xdpf, skb, dev);
 }
 EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame);
+
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
+{
+	unsigned int headroom, totalsize;
+	struct xdp_frame *nxdpf;
+	struct page *page;
+	void *addr;
+
+	headroom = xdpf->headroom + sizeof(*xdpf);
+	totalsize = headroom + xdpf->len;
+
+	if (unlikely(totalsize > PAGE_SIZE))
+		return NULL;
+	page = dev_alloc_page();
+	if (!page)
+		return NULL;
+	addr = page_to_virt(page);
+
+	memcpy(addr, xdpf, totalsize);
+
+	nxdpf = addr;
+	nxdpf->data = addr + headroom;
+	nxdpf->frame_sz = PAGE_SIZE;
+	nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
+	nxdpf->mem.id = 0;
+
+	return nxdpf;
+}
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 67b4ce504852..9df75ea4a567 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -226,7 +226,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 
 static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-	return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+	return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+				      __xsk_map_lookup_elem);
 }
 
 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 562adeac1d67..2c1ba70abbf1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2555,8 +2555,12 @@ union bpf_attr {
  * 		The lower two bits of *flags* are used as the return code if
  * 		the map lookup fails. This is so that the return value can be
  * 		one of the XDP program return codes up to **XDP_TX**, as chosen
- * 		by the caller. Any higher bits in the *flags* argument must be
- * 		unset.
+ * 		by the caller. The higher bits of *flags* can be set to
+ * 		BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ * 		With BPF_F_BROADCAST the packet will be broadcasted to all the
+ * 		interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ * 		interface will be excluded when do broadcasting.
  *
  * 		See also **bpf_redirect**\ (), which only supports redirecting
  * 		to an ifindex, but doesn't require a map to do so.
@@ -5122,6 +5126,12 @@ enum {
 	BPF_F_BPRM_SECUREEXEC	= (1ULL << 0),
 };
 
+/* Flags for bpf_redirect_map helper */
+enum {
+	BPF_F_BROADCAST		= (1ULL << 3),
+	BPF_F_EXCLUDE_INGRESS	= (1ULL << 4),
+};
+
 #define __bpf_md_ptr(type, name)	\
 union {					\
 	type name;			\

From e48cfe4bbfadd7b88821fe98f625a6b5a6d1cbb4 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 19 May 2021 17:07:46 +0800
Subject: [PATCH 0552/3292] sample/bpf: Add xdp_redirect_map_multi for
 redirect_map broadcast test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a sample for xdp redirect broadcast. In the sample we could forward
all packets between given interfaces. There is also an option -X that could
enable 2nd xdp_prog on egress interface.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210519090747.1655268-4-liuhangbin@gmail.com
---
 samples/bpf/Makefile                      |   3 +
 samples/bpf/xdp_redirect_map_multi_kern.c |  88 +++++++
 samples/bpf/xdp_redirect_map_multi_user.c | 302 ++++++++++++++++++++++
 3 files changed, 393 insertions(+)
 create mode 100644 samples/bpf/xdp_redirect_map_multi_kern.c
 create mode 100644 samples/bpf/xdp_redirect_map_multi_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 45ceca4e2c70..520434ea966f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -41,6 +41,7 @@ tprogs-y += test_map_in_map
 tprogs-y += per_socket_stats_example
 tprogs-y += xdp_redirect
 tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect_map_multi
 tprogs-y += xdp_redirect_cpu
 tprogs-y += xdp_monitor
 tprogs-y += xdp_rxq_info
@@ -99,6 +100,7 @@ test_map_in_map-objs := test_map_in_map_user.o
 per_socket_stats_example-objs := cookie_uid_helper_example.o
 xdp_redirect-objs := xdp_redirect_user.o
 xdp_redirect_map-objs := xdp_redirect_map_user.o
+xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o
 xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
 xdp_monitor-objs := xdp_monitor_user.o
 xdp_rxq_info-objs := xdp_rxq_info_user.o
@@ -160,6 +162,7 @@ always-y += tcp_tos_reflect_kern.o
 always-y += tcp_dumpstats_kern.o
 always-y += xdp_redirect_kern.o
 always-y += xdp_redirect_map_kern.o
+always-y += xdp_redirect_map_multi_kern.o
 always-y += xdp_redirect_cpu_kern.o
 always-y += xdp_monitor_kern.o
 always-y += xdp_rxq_info_kern.o
diff --git a/samples/bpf/xdp_redirect_map_multi_kern.c b/samples/bpf/xdp_redirect_map_multi_kern.c
new file mode 100644
index 000000000000..71aa23d1cb2b
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_multi_kern.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__uint(max_entries, 32);
+} forward_map_general SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__uint(max_entries, 32);
+} forward_map_native SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, long);
+	__uint(max_entries, 1);
+} rxcnt SEC(".maps");
+
+/* map to store egress interfaces mac addresses, set the
+ * max_entries to 1 and extend it in user sapce prog.
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, __be64);
+	__uint(max_entries, 1);
+} mac_map SEC(".maps");
+
+static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
+{
+	long *value;
+	u32 key = 0;
+
+	/* count packet in global counter */
+	value = bpf_map_lookup_elem(&rxcnt, &key);
+	if (value)
+		*value += 1;
+
+	return bpf_redirect_map(forward_map, key,
+				BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_redirect_general")
+int xdp_redirect_map_general(struct xdp_md *ctx)
+{
+	return xdp_redirect_map(ctx, &forward_map_general);
+}
+
+SEC("xdp_redirect_native")
+int xdp_redirect_map_native(struct xdp_md *ctx)
+{
+	return xdp_redirect_map(ctx, &forward_map_native);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	u32 key = ctx->egress_ifindex;
+	struct ethhdr *eth = data;
+	__be64 *mac;
+	u64 nh_off;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+
+	mac = bpf_map_lookup_elem(&mac_map, &key);
+	if (mac)
+		__builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c
new file mode 100644
index 000000000000..84cdbbed20b7
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_multi_user.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+static int rxcnt_map_fd;
+
+static void int_exit(int sig)
+{
+	__u32 prog_id = 0;
+	int i;
+
+	for (i = 0; ifaces[i] > 0; i++) {
+		if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(1);
+		}
+		if (prog_id)
+			bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+	}
+
+	exit(0);
+}
+
+static void poll_stats(int interval)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	__u64 values[nr_cpus], prev[nr_cpus];
+
+	memset(prev, 0, sizeof(prev));
+
+	while (1) {
+		__u64 sum = 0;
+		__u32 key = 0;
+		int i;
+
+		sleep(interval);
+		assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
+		for (i = 0; i < nr_cpus; i++)
+			sum += (values[i] - prev[i]);
+		if (sum)
+			printf("Forwarding %10llu pkt/s\n", sum / interval);
+		memcpy(prev, values, sizeof(values));
+	}
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+	char ifname[IF_NAMESIZE];
+	struct ifreq ifr;
+	int fd, ret = -1;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd < 0)
+		return ret;
+
+	if (!if_indextoname(ifindex, ifname))
+		goto err_out;
+
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+		goto err_out;
+
+	memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+	ret = 0;
+
+err_out:
+	close(fd);
+	return ret;
+}
+
+static int update_mac_map(struct bpf_object *obj)
+{
+	int i, ret = -1, mac_map_fd;
+	unsigned char mac_addr[6];
+	unsigned int ifindex;
+
+	mac_map_fd = bpf_object__find_map_fd_by_name(obj, "mac_map");
+	if (mac_map_fd < 0) {
+		printf("find mac map fd failed\n");
+		return ret;
+	}
+
+	for (i = 0; ifaces[i] > 0; i++) {
+		ifindex = ifaces[i];
+
+		ret = get_mac_addr(ifindex, mac_addr);
+		if (ret < 0) {
+			printf("get interface %d mac failed\n", ifindex);
+			return ret;
+		}
+
+		ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
+		if (ret) {
+			perror("bpf_update_elem mac_map_fd");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+		"OPTS:\n"
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n"
+		"    -F    force loading prog\n"
+		"    -X    load xdp program on egress\n",
+		prog);
+}
+
+int main(int argc, char **argv)
+{
+	int i, ret, opt, forward_map_fd, max_ifindex = 0;
+	struct bpf_program *ingress_prog, *egress_prog;
+	int ingress_prog_fd, egress_prog_fd = 0;
+	struct bpf_devmap_val devmap_val;
+	bool attach_egress_prog = false;
+	char ifname[IF_NAMESIZE];
+	struct bpf_map *mac_map;
+	struct bpf_object *obj;
+	unsigned int ifindex;
+	char filename[256];
+
+	while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+		switch (opt) {
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			/* default, set below */
+			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
+		case 'X':
+			attach_egress_prog = true;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+		xdp_flags |= XDP_FLAGS_DRV_MODE;
+	} else if (attach_egress_prog) {
+		printf("Load xdp program on egress with SKB mode not supported yet\n");
+		return 1;
+	}
+
+	if (optind == argc) {
+		printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+		return 1;
+	}
+
+	printf("Get interfaces");
+	for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+		ifaces[i] = if_nametoindex(argv[optind + i]);
+		if (!ifaces[i])
+			ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+		if (!if_indextoname(ifaces[i], ifname)) {
+			perror("Invalid interface name or i");
+			return 1;
+		}
+
+		/* Find the largest index number */
+		if (ifaces[i] > max_ifindex)
+			max_ifindex = ifaces[i];
+
+		printf(" %d", ifaces[i]);
+	}
+	printf("\n");
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	obj = bpf_object__open(filename);
+	if (libbpf_get_error(obj)) {
+		printf("ERROR: opening BPF object file failed\n");
+		obj = NULL;
+		goto err_out;
+	}
+
+	/* Reset the map size to max ifindex + 1 */
+	if (attach_egress_prog) {
+		mac_map = bpf_object__find_map_by_name(obj, "mac_map");
+		ret = bpf_map__resize(mac_map, max_ifindex + 1);
+		if (ret < 0) {
+			printf("ERROR: reset mac map size failed\n");
+			goto err_out;
+		}
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		printf("ERROR: loading BPF object file failed\n");
+		goto err_out;
+	}
+
+	if (xdp_flags & XDP_FLAGS_SKB_MODE) {
+		ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
+		forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_general");
+	} else {
+		ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
+		forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_native");
+	}
+	if (!ingress_prog || forward_map_fd < 0) {
+		printf("finding ingress_prog/forward_map in obj file failed\n");
+		goto err_out;
+	}
+
+	ingress_prog_fd = bpf_program__fd(ingress_prog);
+	if (ingress_prog_fd < 0) {
+		printf("find ingress_prog fd failed\n");
+		goto err_out;
+	}
+
+	rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+	if (rxcnt_map_fd < 0) {
+		printf("bpf_object__find_map_fd_by_name failed\n");
+		goto err_out;
+	}
+
+	if (attach_egress_prog) {
+		/* Update mac_map with all egress interfaces' mac addr */
+		if (update_mac_map(obj) < 0) {
+			printf("Error: update mac map failed");
+			goto err_out;
+		}
+
+		/* Find egress prog fd */
+		egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+		if (!egress_prog) {
+			printf("finding egress_prog in obj file failed\n");
+			goto err_out;
+		}
+		egress_prog_fd = bpf_program__fd(egress_prog);
+		if (egress_prog_fd < 0) {
+			printf("find egress_prog fd failed\n");
+			goto err_out;
+		}
+	}
+
+	/* Remove attached program when program is interrupted or killed */
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	/* Init forward multicast groups */
+	for (i = 0; ifaces[i] > 0; i++) {
+		ifindex = ifaces[i];
+
+		/* bind prog_fd to each interface */
+		ret = bpf_set_link_xdp_fd(ifindex, ingress_prog_fd, xdp_flags);
+		if (ret) {
+			printf("Set xdp fd failed on %d\n", ifindex);
+			goto err_out;
+		}
+
+		/* Add all the interfaces to forward group and attach
+		 * egress devmap programe if exist
+		 */
+		devmap_val.ifindex = ifindex;
+		devmap_val.bpf_prog.fd = egress_prog_fd;
+		ret = bpf_map_update_elem(forward_map_fd, &ifindex, &devmap_val, 0);
+		if (ret) {
+			perror("bpf_map_update_elem forward_map");
+			goto err_out;
+		}
+	}
+
+	poll_stats(2);
+
+	return 0;
+
+err_out:
+	return 1;
+}

From d232924762971fe2698011bc244e05949e544541 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 19 May 2021 17:07:47 +0800
Subject: [PATCH 0553/3292] selftests/bpf: Add xdp_redirect_multi test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a bpf selftest for new helper xdp_redirect_map_multi(). In this
test there are 3 forward groups and 1 exclude group. The test will
redirect each interface's packets to all the interfaces in the forward
group, and exclude the interface in exclude map.

Two maps (DEVMAP, DEVMAP_HASH) and two xdp modes (generic, drive) will
be tested. XDP egress program will also be tested by setting pkt src MAC
to egress interface's MAC address.

For more test details, you can find it in the test script. Here is
the test result.
]# time ./test_xdp_redirect_multi.sh
Pass: xdpgeneric arp(F_BROADCAST) ns1-1
Pass: xdpgeneric arp(F_BROADCAST) ns1-2
Pass: xdpgeneric arp(F_BROADCAST) ns1-3
Pass: xdpgeneric IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1
Pass: xdpgeneric IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2
Pass: xdpgeneric IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3
Pass: xdpgeneric IPv6 (no flags) ns1-1
Pass: xdpgeneric IPv6 (no flags) ns1-2
Pass: xdpdrv arp(F_BROADCAST) ns1-1
Pass: xdpdrv arp(F_BROADCAST) ns1-2
Pass: xdpdrv arp(F_BROADCAST) ns1-3
Pass: xdpdrv IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1
Pass: xdpdrv IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2
Pass: xdpdrv IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3
Pass: xdpdrv IPv6 (no flags) ns1-1
Pass: xdpdrv IPv6 (no flags) ns1-2
Pass: xdpegress mac ns1-2
Pass: xdpegress mac ns1-3
Summary: PASS 18, FAIL 0

real    1m18.321s
user    0m0.123s
sys     0m0.350s

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210519090747.1655268-5-liuhangbin@gmail.com
---
 tools/testing/selftests/bpf/Makefile          |   3 +-
 .../bpf/progs/xdp_redirect_multi_kern.c       |  94 ++++++++
 .../selftests/bpf/test_xdp_redirect_multi.sh  | 204 ++++++++++++++++
 .../selftests/bpf/xdp_redirect_multi.c        | 226 ++++++++++++++++++
 4 files changed, 526 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
 create mode 100755 tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
 create mode 100644 tools/testing/selftests/bpf/xdp_redirect_multi.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 525e4b3fb514..f405b20c1e6c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
 	test_xdp_redirect.sh \
+	test_xdp_redirect_multi.sh \
 	test_xdp_meta.sh \
 	test_xdp_veth.sh \
 	test_offload.py \
@@ -84,7 +85,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
 	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-	xdpxceiver
+	xdpxceiver xdp_redirect_multi
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
new file mode 100644
index 000000000000..880debcbcd65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, __be64);
+	__uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+SEC("xdp_redirect_map_multi")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	int if_index = ctx->ingress_ifindex;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+	__u64 nh_off;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	/* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
+	if (h_proto == bpf_htons(ETH_P_IP))
+		return bpf_redirect_map(&map_all, 0,
+					BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+	/* Using IPv6 for none flag testing */
+	else if (h_proto == bpf_htons(ETH_P_IPV6))
+		return bpf_redirect_map(&map_all, if_index, 0);
+	/* All others for BPF_F_BROADCAST testing */
+	else
+		return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp_redirect_map_ingress")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+	return bpf_redirect_map(&map_egress, 0,
+				BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	__u32 key = ctx->egress_ifindex;
+	struct ethhdr *eth = data;
+	__u64 nh_off;
+	__be64 *mac;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+
+	mac = bpf_map_lookup_elem(&mac_map, &key);
+	if (mac)
+		__builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
new file mode 100755
index 000000000000..1538373157e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test topology:
+#     - - - - - - - - - - - - - - - - - - - - - - - - -
+#    | veth1         veth2         veth3 |  ... init net
+#     - -| - - - - - - | - - - - - - | - -
+#    ---------     ---------     ---------
+#    | veth0 |     | veth0 |     | veth0 |  ...
+#    ---------     ---------     ---------
+#       ns1           ns2           ns3
+#
+# Test modules:
+# XDP modes: generic, native, native + egress_prog
+#
+# Test cases:
+#   ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
+#   the redirects.
+#      ns1 -> gw: ns1, ns2, ns3, should receive the arp request
+#   IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
+#   interface should not receive the redirects.
+#      ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
+#   IPv6: Testing none flag, all the pkts should be redirected back
+#      ping test: ns1 -> ns2 (block), echo requests will be redirect back
+#   egress_prog:
+#      all src mac should be egress interface's mac
+
+# netns numbers
+NUM=3
+IFACES=""
+DRV_MODE="xdpgeneric xdpdrv xdpegress"
+PASS=0
+FAIL=0
+
+test_pass()
+{
+	echo "Pass: $@"
+	PASS=$((PASS + 1))
+}
+
+test_fail()
+{
+	echo "fail: $@"
+	FAIL=$((FAIL + 1))
+}
+
+clean_up()
+{
+	for i in $(seq $NUM); do
+		ip link del veth$i 2> /dev/null
+		ip netns del ns$i 2> /dev/null
+	done
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_env()
+{
+	ip link set dev lo xdpgeneric off &>/dev/null
+	if [ $? -ne 0 ];then
+		echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+		exit 4
+	fi
+
+	which tcpdump &>/dev/null
+	if [ $? -ne 0 ];then
+		echo "selftests: [SKIP] Could not run test without tcpdump"
+		exit 4
+	fi
+}
+
+setup_ns()
+{
+	local mode=$1
+	IFACES=""
+
+	if [ "$mode" = "xdpegress" ]; then
+		mode="xdpdrv"
+	fi
+
+	for i in $(seq $NUM); do
+	        ip netns add ns$i
+	        ip link add veth$i type veth peer name veth0 netns ns$i
+		ip link set veth$i up
+		ip -n ns$i link set veth0 up
+
+		ip -n ns$i addr add 192.0.2.$i/24 dev veth0
+		ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+		# Add a neigh entry for IPv4 ping test
+		ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+		ip -n ns$i link set veth0 $mode obj \
+			xdp_dummy.o sec xdp_dummy &> /dev/null || \
+			{ test_fail "Unable to load dummy xdp" && exit 1; }
+		IFACES="$IFACES veth$i"
+		veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
+	done
+}
+
+do_egress_tests()
+{
+	local mode=$1
+
+	# mac test
+	ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log &
+	ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log &
+	sleep 0.5
+	ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+	sleep 0.5
+	pkill -9 tcpdump
+
+	# mac check
+	grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \
+	       test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+	grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \
+		test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+}
+
+do_ping_tests()
+{
+	local mode=$1
+
+	# ping6 test: echo request should be redirect back to itself, not others
+	ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+	ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log &
+	ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log &
+	ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log &
+	sleep 0.5
+	# ARP test
+	ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+	# IPv4 test
+	ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+	# IPv6 test
+	ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+	sleep 0.5
+	pkill -9 tcpdump
+
+	# All netns should receive the redirect arp requests
+	[ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \
+		test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+		test_fail "$mode arp(F_BROADCAST) ns1-1"
+	[ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \
+		test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+		test_fail "$mode arp(F_BROADCAST) ns1-2"
+	[ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \
+		test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+		test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+	# ns1 should not receive the redirect echo request, others should
+	[ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \
+		test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+		test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+	[ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \
+		test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+		test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+	[ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \
+		test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+		test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+	# ns1 should receive the echo request, ns2 should not
+	[ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \
+		test_pass "$mode IPv6 (no flags) ns1-1" || \
+		test_fail "$mode IPv6 (no flags) ns1-1"
+	[ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \
+		test_pass "$mode IPv6 (no flags) ns1-2" || \
+		test_fail "$mode IPv6 (no flags) ns1-2"
+}
+
+do_tests()
+{
+	local mode=$1
+	local drv_p
+
+	case ${mode} in
+		xdpdrv)  drv_p="-N";;
+		xdpegress) drv_p="-X";;
+		xdpgeneric) drv_p="-S";;
+	esac
+
+	./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log &
+	xdp_pid=$!
+	sleep 1
+
+	if [ "$mode" = "xdpegress" ]; then
+		do_egress_tests $mode
+	else
+		do_ping_tests $mode
+	fi
+
+	kill $xdp_pid
+}
+
+trap clean_up 0 2 3 6 9
+
+check_env
+rm -f xdp_redirect_*.log ns*.log mac_ns*.log
+
+for mode in ${DRV_MODE}; do
+	setup_ns $mode
+	do_tests $mode
+	clean_up
+done
+
+echo "Summary: PASS $PASS, FAIL $FAIL"
+[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
new file mode 100644
index 000000000000..3696a8f32c23
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+#define MAX_INDEX_NUM 1024
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static void int_exit(int sig)
+{
+	__u32 prog_id = 0;
+	int i;
+
+	for (i = 0; ifaces[i] > 0; i++) {
+		if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
+			exit(1);
+		}
+		if (prog_id)
+			bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+	}
+
+	exit(0);
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+	char ifname[IF_NAMESIZE];
+	struct ifreq ifr;
+	int fd, ret = -1;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd < 0)
+		return ret;
+
+	if (!if_indextoname(ifindex, ifname))
+		goto err_out;
+
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+		goto err_out;
+
+	memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+	ret = 0;
+
+err_out:
+	close(fd);
+	return ret;
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+		"OPTS:\n"
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n"
+		"    -F    force loading prog\n"
+		"    -X    load xdp program on egress\n",
+		prog);
+}
+
+int main(int argc, char **argv)
+{
+	int prog_fd, group_all, mac_map;
+	struct bpf_program *ingress_prog, *egress_prog;
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type = BPF_PROG_TYPE_UNSPEC,
+	};
+	int i, ret, opt, egress_prog_fd = 0;
+	struct bpf_devmap_val devmap_val;
+	bool attach_egress_prog = false;
+	unsigned char mac_addr[6];
+	char ifname[IF_NAMESIZE];
+	struct bpf_object *obj;
+	unsigned int ifindex;
+	char filename[256];
+
+	while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+		switch (opt) {
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			/* default, set below */
+			break;
+		case 'F':
+			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+			break;
+		case 'X':
+			attach_egress_prog = true;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+		xdp_flags |= XDP_FLAGS_DRV_MODE;
+	} else if (attach_egress_prog) {
+		printf("Load xdp program on egress with SKB mode not supported yet\n");
+		goto err_out;
+	}
+
+	if (optind == argc) {
+		printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+		goto err_out;
+	}
+
+	printf("Get interfaces");
+	for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+		ifaces[i] = if_nametoindex(argv[optind + i]);
+		if (!ifaces[i])
+			ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+		if (!if_indextoname(ifaces[i], ifname)) {
+			perror("Invalid interface name or i");
+			goto err_out;
+		}
+		if (ifaces[i] > MAX_INDEX_NUM) {
+			printf("Interface index to large\n");
+			goto err_out;
+		}
+		printf(" %d", ifaces[i]);
+	}
+	printf("\n");
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	prog_load_attr.file = filename;
+
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		goto err_out;
+
+	if (attach_egress_prog)
+		group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
+	else
+		group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
+	mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
+
+	if (group_all < 0 || mac_map < 0) {
+		printf("bpf_object__find_map_fd_by_name failed\n");
+		goto err_out;
+	}
+
+	if (attach_egress_prog) {
+		/* Find ingress/egress prog for 2nd xdp prog */
+		ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
+		egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+		if (!ingress_prog || !egress_prog) {
+			printf("finding ingress/egress_prog in obj file failed\n");
+			goto err_out;
+		}
+		prog_fd = bpf_program__fd(ingress_prog);
+		egress_prog_fd = bpf_program__fd(egress_prog);
+		if (prog_fd < 0 || egress_prog_fd < 0) {
+			printf("find egress_prog fd failed\n");
+			goto err_out;
+		}
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	/* Init forward multicast groups and exclude group */
+	for (i = 0; ifaces[i] > 0; i++) {
+		ifindex = ifaces[i];
+
+		if (attach_egress_prog) {
+			ret = get_mac_addr(ifindex, mac_addr);
+			if (ret < 0) {
+				printf("get interface %d mac failed\n", ifindex);
+				goto err_out;
+			}
+			ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
+			if (ret) {
+				perror("bpf_update_elem mac_map failed\n");
+				goto err_out;
+			}
+		}
+
+		/* Add all the interfaces to group all */
+		devmap_val.ifindex = ifindex;
+		devmap_val.bpf_prog.fd = egress_prog_fd;
+		ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
+		if (ret) {
+			perror("bpf_map_update_elem");
+			goto err_out;
+		}
+
+		/* bind prog_fd to each interface */
+		ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+		if (ret) {
+			printf("Set xdp fd failed on %d\n", ifindex);
+			goto err_out;
+		}
+	}
+
+	/* sleep some time for testing */
+	sleep(999);
+
+	return 0;
+
+err_out:
+	return 1;
+}

From b15b253c908235bb10bdbc36f7e33ab40758215f Mon Sep 17 00:00:00 2001
From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Date: Mon, 24 May 2021 23:57:43 +0900
Subject: [PATCH 0554/3292] HID: usbmouse: Avoid GFP_ATOMIC when GFP_KERNEL is
 possible

probe in usb don't need to be atomic. So GFP_KERNEL can be used here,
instead of GFP_ATOMIC.

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/usbmouse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/usbhid/usbmouse.c b/drivers/hid/usbhid/usbmouse.c
index 073127e65ac1..c89332017d5d 100644
--- a/drivers/hid/usbhid/usbmouse.c
+++ b/drivers/hid/usbhid/usbmouse.c
@@ -130,7 +130,7 @@ static int usb_mouse_probe(struct usb_interface *intf, const struct usb_device_i
 	if (!mouse || !input_dev)
 		goto fail1;
 
-	mouse->data = usb_alloc_coherent(dev, 8, GFP_ATOMIC, &mouse->data_dma);
+	mouse->data = usb_alloc_coherent(dev, 8, GFP_KERNEL, &mouse->data_dma);
 	if (!mouse->data)
 		goto fail1;
 

From f6089981d07e6e1cc053f4c239e458eed122c092 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 26 May 2021 15:46:05 +0100
Subject: [PATCH 0555/3292] fs: dlm: Fix memory leak of object mh

There is an error return path that is not kfree'ing mh after
it has been successfully allocates.  Fix this by moving the
call to create_rcom to after the check on rc_in->rc_id check
to avoid this.

Thanks to Alexander Ahring Oder Aring for suggesting the
correct way to fix this.

Addresses-Coverity: ("Resource leak")
Fixes: a070a91cf140 ("fs: dlm: add more midcomms hooks")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/rcom.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 085f21966c72..a7727b9e5e83 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -385,10 +385,6 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid;
 	int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
 
-	error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh);
-	if (error)
-		return;
-
 	/* Old code would send this special id to trigger a debug dump. */
 	if (rc_in->rc_id == 0xFFFFFFFF) {
 		log_error(ls, "receive_rcom_lookup dump from %d", nodeid);
@@ -396,6 +392,10 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 		return;
 	}
 
+	error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh);
+	if (error)
+		return;
+
 	error = dlm_master_lookup(ls, nodeid, rc_in->rc_buf, len,
 				  DLM_LU_RECOVER_MASTER, &ret_nodeid, NULL);
 	if (error)

From 7d3848c03e09ea9cdfde8bb2b82282d252943ee6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 26 May 2021 09:53:39 +0100
Subject: [PATCH 0556/3292] fs: dlm: Fix spelling mistake "stucked" -> "stuck"

There are spelling mistake in log messages. Fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 35664950f6b7..4e36e418b6bf 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -591,7 +591,7 @@ dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
 					 * was failed, we try to reset and
 					 * hope it will go on.
 					 */
-					log_print("reset node %d because shutdown stucked",
+					log_print("reset node %d because shutdown stuck",
 						  node->nodeid);
 
 					midcomms_node_reset(node);
@@ -1159,7 +1159,7 @@ void dlm_midcomms_add_member(int nodeid)
 			 * was failed, we try to reset and
 			 * hope it will go on.
 			 */
-			log_print("reset node %d because shutdown stucked",
+			log_print("reset node %d because shutdown stuck",
 				  node->nodeid);
 
 			midcomms_node_reset(node);

From d4ef55288aa2e1b76033717242728ac98ddc4721 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:24 -0700
Subject: [PATCH 0557/3292] e100: handle eeprom as little endian

Sparse tool was warning on some implicit conversions from
little endian data read from the EEPROM on the e100 cards.

Fix these by being explicit about the conversions using
le16_to_cpu().

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e100.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index f8d78af76d7d..1b0958bd24f6 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -1395,7 +1395,7 @@ static int e100_phy_check_without_mii(struct nic *nic)
 	u8 phy_type;
 	int without_mii;
 
-	phy_type = (nic->eeprom[eeprom_phy_iface] >> 8) & 0x0f;
+	phy_type = (le16_to_cpu(nic->eeprom[eeprom_phy_iface]) >> 8) & 0x0f;
 
 	switch (phy_type) {
 	case NoSuchPhy: /* Non-MII PHY; UNTESTED! */
@@ -1515,7 +1515,7 @@ static int e100_phy_init(struct nic *nic)
 		mdio_write(netdev, nic->mii.phy_id, MII_BMCR, bmcr);
 	} else if ((nic->mac >= mac_82550_D102) || ((nic->flags & ich) &&
 	   (mdio_read(netdev, nic->mii.phy_id, MII_TPISTATUS) & 0x8000) &&
-		(nic->eeprom[eeprom_cnfg_mdix] & eeprom_mdix_enabled))) {
+	   (le16_to_cpu(nic->eeprom[eeprom_cnfg_mdix]) & eeprom_mdix_enabled))) {
 		/* enable/disable MDI/MDI-X auto-switching. */
 		mdio_write(netdev, nic->mii.phy_id, MII_NCONFIG,
 				nic->mii.force_media ? 0 : NCONFIG_AUTO_SWITCH);
@@ -2269,9 +2269,9 @@ static int e100_asf(struct nic *nic)
 {
 	/* ASF can be enabled from eeprom */
 	return (nic->pdev->device >= 0x1050) && (nic->pdev->device <= 0x1057) &&
-	   (nic->eeprom[eeprom_config_asf] & eeprom_asf) &&
-	   !(nic->eeprom[eeprom_config_asf] & eeprom_gcl) &&
-	   ((nic->eeprom[eeprom_smbus_addr] & 0xFF) != 0xFE);
+	   (le16_to_cpu(nic->eeprom[eeprom_config_asf]) & eeprom_asf) &&
+	   !(le16_to_cpu(nic->eeprom[eeprom_config_asf]) & eeprom_gcl) &&
+	   ((le16_to_cpu(nic->eeprom[eeprom_smbus_addr]) & 0xFF) != 0xFE);
 }
 
 static int e100_up(struct nic *nic)
@@ -2926,7 +2926,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* Wol magic packet can be enabled from eeprom */
 	if ((nic->mac >= mac_82558_D101_A4) &&
-	   (nic->eeprom[eeprom_id] & eeprom_id_wol)) {
+	   (le16_to_cpu(nic->eeprom[eeprom_id]) & eeprom_id_wol)) {
 		nic->flags |= wol_magic;
 		device_set_wakeup_enable(&pdev->dev, true);
 	}

From c40591cc3d48194faa80bda652d86a1ed8e221be Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:25 -0700
Subject: [PATCH 0558/3292] intel: remove checker warning

The sparse checker (C=2) found an assignment where we were mixing
types when trying to convert from data read directly from the
device NVM, to an array in CPU order in-memory, which
unfortunately the driver tries to do in-place.

This is easily solved by using the swap operation instead of an
assignment, and is already proven in other Intel drivers to be
functionally correct and the same code, just without a sparse
warning.

The change is the same in all three drivers.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 2 +-
 drivers/net/ethernet/intel/igb/igb_ethtool.c     | 2 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index f976e9daa3d8..3c51ee94fa00 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -513,7 +513,7 @@ static int e1000_set_eeprom(struct net_device *netdev,
 	memcpy(ptr, bytes, eeprom->len);
 
 	for (i = 0; i < last_word - first_word + 1; i++)
-		eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]);
+		cpu_to_le16s(&eeprom_buff[i]);
 
 	ret_val = e1000_write_eeprom(hw, first_word,
 				     last_word - first_word + 1, eeprom_buff);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 7545da216d8b..636a1b1fb7e1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -831,7 +831,7 @@ static int igb_set_eeprom(struct net_device *netdev,
 	memcpy(ptr, bytes, eeprom->len);
 
 	for (i = 0; i < last_word - first_word + 1; i++)
-		eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]);
+		cpu_to_le16s(&eeprom_buff[i]);
 
 	ret_val = hw->nvm.ops.write(hw, first_word,
 				    last_word - first_word + 1, eeprom_buff);
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 9722449d7633..2cb12431c371 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -554,7 +554,7 @@ static int igc_ethtool_set_eeprom(struct net_device *netdev,
 	memcpy(ptr, bytes, eeprom->len);
 
 	for (i = 0; i < last_word - first_word + 1; i++)
-		eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]);
+		cpu_to_le16s(&eeprom_buff[i]);
 
 	ret_val = hw->nvm.ops.write(hw, first_word,
 				    last_word - first_word + 1, eeprom_buff);

From 0a5d8a9d226fe5ef54b08a8c5f42add49d78484e Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:26 -0700
Subject: [PATCH 0559/3292] fm10k: move error check

The error check and set_bit are placed in such a way that sparse (C=2)
warns:
.../fm10k_pci.c:1395:9: warning: context imbalance in 'fm10k_msix_mbx_pf' - different lock contexts for basic block

Which seems a little odd, but the code can obviously be moved
to where the variable is being set without changing functionality
at all, and it even seems to make a bit more sense with the check
closer to the set.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 9e3103fae723..dbcae92bb18d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1370,7 +1370,6 @@ static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data)
 	struct fm10k_hw *hw = &interface->hw;
 	struct fm10k_mbx_info *mbx = &hw->mbx;
 	u32 eicr;
-	s32 err = 0;
 
 	/* unmask any set bits related to this interrupt */
 	eicr = fm10k_read_reg(hw, FM10K_EICR);
@@ -1386,15 +1385,16 @@ static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data)
 
 	/* service mailboxes */
 	if (fm10k_mbx_trylock(interface)) {
-		err = mbx->ops.process(hw, mbx);
+		s32 err = mbx->ops.process(hw, mbx);
+
+		if (err == FM10K_ERR_RESET_REQUESTED)
+			set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+
 		/* handle VFLRE events */
 		fm10k_iov_event(interface);
 		fm10k_mbx_unlock(interface);
 	}
 
-	if (err == FM10K_ERR_RESET_REQUESTED)
-		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
-
 	/* if switch toggled state we should reset GLORTs */
 	if (eicr & FM10K_EICR_SWITCHNOTREADY) {
 		/* force link down for at least 4 seconds */

From 88c228b22e001cce1d36112b883bd320c0893ef8 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:27 -0700
Subject: [PATCH 0560/3292] igb/igc: use strongly typed pointer

The igb and igc driver both use a trick of creating a local type
pointer on the stack to ease dealing with a receive descriptor in
64 bit chunks for printing.  Sparse however was not taken into
account and receive descriptors are always in little endian
order, so just make the unions use __le64 instead of u64.

No functional change.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 drivers/net/ethernet/intel/igc/igc_dump.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 038a9fd1af44..cf91e3624a89 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -356,7 +356,7 @@ static void igb_dump(struct igb_adapter *adapter)
 	struct igb_reg_info *reginfo;
 	struct igb_ring *tx_ring;
 	union e1000_adv_tx_desc *tx_desc;
-	struct my_u0 { u64 a; u64 b; } *u0;
+	struct my_u0 { __le64 a; __le64 b; } *u0;
 	struct igb_ring *rx_ring;
 	union e1000_adv_rx_desc *rx_desc;
 	u32 staterr;
diff --git a/drivers/net/ethernet/intel/igc/igc_dump.c b/drivers/net/ethernet/intel/igc/igc_dump.c
index 495bed47ed0a..c09c95cc5f70 100644
--- a/drivers/net/ethernet/intel/igc/igc_dump.c
+++ b/drivers/net/ethernet/intel/igc/igc_dump.c
@@ -112,7 +112,7 @@ static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo)
 void igc_rings_dump(struct igc_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	struct my_u0 { u64 a; u64 b; } *u0;
+	struct my_u0 { __le64 a; __le64 b; } *u0;
 	union igc_adv_tx_desc *tx_desc;
 	union igc_adv_rx_desc *rx_desc;
 	struct igc_ring *tx_ring;

From c7cbfb028b95360403d579c47aaaeef1ff140964 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:28 -0700
Subject: [PATCH 0561/3292] igb: handle vlan types with checker enabled

The sparse build (C=2) finds some issues with how the driver
dealt with the (very difficult) hardware that in some generations
uses little-endian, and in others uses big endian, for the VLAN
field. The code as written picks __le16 as a type and for some
hardware revisions we override it to __be16 as done in this
patch. This impacted the VF driver as well so fix it there too.

Also change the vlan_tci assignment to override the sparse
warning without changing functionality.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 5 +++--
 drivers/net/ethernet/intel/igbvf/netdev.c | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index cf91e3624a89..3a96b61a7229 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2643,7 +2643,8 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter,
 			}
 
 			input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
-			input->filter.vlan_tci = match.key->vlan_priority;
+			input->filter.vlan_tci =
+				(__force __be16)match.key->vlan_priority;
 		}
 	}
 
@@ -8597,7 +8598,7 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring,
 
 		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
 		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
-			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
+			vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
 		else
 			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
 
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index fb3fbcb13331..630c1155f196 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -83,14 +83,14 @@ static int igbvf_desc_unused(struct igbvf_ring *ring)
 static void igbvf_receive_skb(struct igbvf_adapter *adapter,
 			      struct net_device *netdev,
 			      struct sk_buff *skb,
-			      u32 status, u16 vlan)
+			      u32 status, __le16 vlan)
 {
 	u16 vid;
 
 	if (status & E1000_RXD_STAT_VP) {
 		if ((adapter->flags & IGBVF_FLAG_RX_LB_VLAN_BSWAP) &&
 		    (status & E1000_RXDEXT_STATERR_LB))
-			vid = be16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
+			vid = be16_to_cpu((__force __be16)vlan) & E1000_RXD_SPC_VLAN_MASK;
 		else
 			vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
 		if (test_bit(vid, adapter->active_vlans))

From b514958dd1a3bd57638b0e63b8e5152b1960e6aa Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:29 -0700
Subject: [PATCH 0562/3292] igb: fix assignment on big endian machines

The igb driver was trying hard to be sparse correct, but somehow
ended up converting a variable into little endian order and then
tries to OR something with it.

A much plainer way of doing things is to leave all variables and
OR operations in CPU (non-endian) mode, and then convert to
little endian only once, which is what this change does.

This probably fixes a bug that might have been seen only on
big endian systems.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 3a96b61a7229..f555670e9271 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6276,12 +6276,12 @@ int igb_xmit_xdp_ring(struct igb_adapter *adapter,
 	cmd_type |= len | IGB_TXD_DCMD;
 	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
 
-	olinfo_status = cpu_to_le32(len << E1000_ADVTXD_PAYLEN_SHIFT);
+	olinfo_status = len << E1000_ADVTXD_PAYLEN_SHIFT;
 	/* 82575 requires a unique index per ring */
 	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
 		olinfo_status |= tx_ring->reg_idx << 4;
 
-	tx_desc->read.olinfo_status = olinfo_status;
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
 
 	netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer->bytecount);
 

From 9fb8602e565d71e0747ad149690d536d20a70b58 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:30 -0700
Subject: [PATCH 0563/3292] igb: override two checker warnings

The igb PTP code was using htons() on a constant to try to
byte swap the value before writing it to a register. This byte
swap has the consequence of triggering sparse conflicts between
the register write which expect cpu ordered input, and the code
which generated a big endian constant. Just override the cast
to make sure code doesn't change but silence the warning.

Can't do a __swab16 in this case because big endian systems
would then write the wrong value.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index ba61fe9bfaf4..de08ae8db4d5 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1134,12 +1134,12 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter,
 			| E1000_FTQF_MASK); /* mask all inputs */
 		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
 
-		wr32(E1000_IMIR(3), htons(PTP_EV_PORT));
+		wr32(E1000_IMIR(3), (__force unsigned int)htons(PTP_EV_PORT));
 		wr32(E1000_IMIREXT(3),
 		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
 		if (hw->mac.type == e1000_82576) {
 			/* enable source port check */
-			wr32(E1000_SPQF(3), htons(PTP_EV_PORT));
+			wr32(E1000_SPQF(3), (__force unsigned int)htons(PTP_EV_PORT));
 			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
 		}
 		wr32(E1000_FTQF(3), ftqf);

From de8447131d2b1923a91c4c30bf094422dfcc16bf Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:31 -0700
Subject: [PATCH 0564/3292] intel: call csum functions with well formatted
 arguments

The sparse build (C=2) found that there were two drivers
who had not been convered to call the csum_replace_by_diff() function
with sparse clean arguments.  Most if not all drivers force the cast
like this patch does. So these drivers are now joining the party
(a bit late), but with no functional change.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igbvf/netdev.c         | 2 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 630c1155f196..1bbe9862a758 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2056,7 +2056,7 @@ static int igbvf_tso(struct igbvf_ring *tx_ring,
 
 	/* remove payload length from inner checksum */
 	paylen = skb->len - l4_offset;
-	csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
+	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
 
 	/* MSS L4LEN IDX */
 	mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index ba2ed8a43d2d..588c3aa50d94 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3814,7 +3814,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 
 	/* remove payload length from inner checksum */
 	paylen = skb->len - l4_offset;
-	csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
+	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
 
 	/* update gso size and bytecount with header size */
 	first->gso_segs = skb_shinfo(skb)->gso_segs;

From b6ce4a1c4ba4fd6fd27fe8d917b3d062ff4ebbdd Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:32 -0700
Subject: [PATCH 0565/3292] igbvf: convert to strongly typed descriptors

The igbvf driver for some reason never strongly typed it's descriptor
formats. Make this driver like the rest of the Intel drivers and use
__le* for our little endian descriptors.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igbvf/vf.h | 42 +++++++++++++--------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h
index c71b0d7dbcee..ba9bb3132d5d 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.h
+++ b/drivers/net/ethernet/intel/igbvf/vf.h
@@ -35,31 +35,31 @@ struct e1000_hw;
 /* Receive Descriptor - Advanced */
 union e1000_adv_rx_desc {
 	struct {
-		u64 pkt_addr; /* Packet buffer address */
-		u64 hdr_addr; /* Header buffer address */
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
 	} read;
 	struct {
 		struct {
 			union {
-				u32 data;
+				__le32 data;
 				struct {
-					u16 pkt_info; /* RSS/Packet type */
+					__le16 pkt_info; /* RSS/Packet type */
 					/* Split Header, hdr buffer length */
-					u16 hdr_info;
+					__le16 hdr_info;
 				} hs_rss;
 			} lo_dword;
 			union {
-				u32 rss; /* RSS Hash */
+				__le32 rss; /* RSS Hash */
 				struct {
-					u16 ip_id; /* IP id */
-					u16 csum;  /* Packet Checksum */
+					__le16 ip_id; /* IP id */
+					__le16 csum;  /* Packet Checksum */
 				} csum_ip;
 			} hi_dword;
 		} lower;
 		struct {
-			u32 status_error; /* ext status/error */
-			u16 length; /* Packet length */
-			u16 vlan;   /* VLAN tag */
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
 		} upper;
 	} wb;  /* writeback */
 };
@@ -70,14 +70,14 @@ union e1000_adv_rx_desc {
 /* Transmit Descriptor - Advanced */
 union e1000_adv_tx_desc {
 	struct {
-		u64 buffer_addr; /* Address of descriptor's data buf */
-		u32 cmd_type_len;
-		u32 olinfo_status;
+		__le64 buffer_addr; /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
 	} read;
 	struct {
-		u64 rsvd; /* Reserved */
-		u32 nxtseq_seed;
-		u32 status;
+		__le64 rsvd; /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
 	} wb;
 };
 
@@ -94,10 +94,10 @@ union e1000_adv_tx_desc {
 
 /* Context descriptors */
 struct e1000_adv_tx_context_desc {
-	u32 vlan_macip_lens;
-	u32 seqnum_seed;
-	u32 type_tucmd_mlhl;
-	u32 mss_l4len_idx;
+	__le32 vlan_macip_lens;
+	__le32 seqnum_seed;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
 };
 
 #define E1000_ADVTXD_MACLEN_SHIFT	9  /* Adv ctxt desc mac len shift */

From b16dc6c2f1786157a93c7ab560f8da1e69b69256 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:33 -0700
Subject: [PATCH 0566/3292] ixgbe: use checker safe conversions

The ixgbe hardware needs some very specific programming for
certain registers, which led to some misguided usage of ntohs
instead of using be16_to_cpu(), as well as a home grown swap
followed by an ntohs. Sparse didn't like this at all, and this
fixes the C=2 build, with code that uses native kernel interface.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index e324e42fab2d..58ea959a4482 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1514,8 +1514,7 @@ static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
 #define IXGBE_WRITE_REG_BE32(a, reg, value) \
 	IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(ntohl(value)))
 
-#define IXGBE_STORE_AS_BE16(_value) \
-	ntohs(((u16)(_value) >> 8) | ((u16)(_value) << 8))
+#define IXGBE_STORE_AS_BE16(_value) __swab16(ntohs((_value)))
 
 s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
 				    union ixgbe_atr_input *input_mask)
@@ -1651,13 +1650,13 @@ s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
 	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
 
 	/* record source and destination port (little-endian)*/
-	fdirport = ntohs(input->formatted.dst_port);
+	fdirport = be16_to_cpu(input->formatted.dst_port);
 	fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT;
-	fdirport |= ntohs(input->formatted.src_port);
+	fdirport |= be16_to_cpu(input->formatted.src_port);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
 
 	/* record vlan (little-endian) and flex_bytes(big-endian) */
-	fdirvlan = IXGBE_STORE_AS_BE16((__force u16)input->formatted.flex_bytes);
+	fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
 	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
 	fdirvlan |= ntohs(input->formatted.vlan_id);
 	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);

From 205523bc06ceceac2a22a28a4f27b2737e318c92 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 25 Mar 2021 17:38:34 -0700
Subject: [PATCH 0567/3292] ixgbe: reduce checker warnings

Fix the sparse warnings in the ixgbe crypto offload code. These
changes were made in the most conservative way (force cast)
in order to hopefully not break the code. I suspect that the
code might still be broken on big-endian architectures, but
no one is complaining, so I'm just leaving it functionally
the same.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Shannon Nelson <snelson@pensando.io>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 54d47265a7ac..e596e1a9fc75 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -511,14 +511,14 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs)
 					continue;
 
 				reg = IXGBE_READ_REG(hw, MIPAF_ARR(3, i));
-				if (reg == xs->id.daddr.a4)
+				if (reg == (__force u32)xs->id.daddr.a4)
 					return 1;
 			}
 		}
 
 		if ((bmcipval & BMCIP_MASK) == BMCIP_V4) {
 			reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(3));
-			if (reg == xs->id.daddr.a4)
+			if (reg == (__force u32)xs->id.daddr.a4)
 				return 1;
 		}
 
@@ -533,7 +533,7 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs)
 
 			for (j = 0; j < 4; j++) {
 				reg = IXGBE_READ_REG(hw, MIPAF_ARR(i, j));
-				if (reg != xs->id.daddr.a6[j])
+				if (reg != (__force u32)xs->id.daddr.a6[j])
 					break;
 			}
 			if (j == 4)   /* did we match all 4 words? */
@@ -543,7 +543,7 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs)
 		if ((bmcipval & BMCIP_MASK) == BMCIP_V6) {
 			for (j = 0; j < 4; j++) {
 				reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(j));
-				if (reg != xs->id.daddr.a6[j])
+				if (reg != (__force u32)xs->id.daddr.a6[j])
 					break;
 			}
 			if (j == 4)   /* did we match all 4 words? */

From d6a6a55518c16040a369360255b355b7a2a261de Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Wed, 26 May 2021 18:46:43 +0200
Subject: [PATCH 0568/3292] libbpf: Move BPF_SEQ_PRINTF and BPF_SNPRINTF to
 bpf_helpers.h

These macros are convenient wrappers around the bpf_seq_printf and
bpf_snprintf helpers. They are currently provided by bpf_tracing.h which
targets low level tracing primitives. bpf_helpers.h is a better fit.

The __bpf_narg and __bpf_apply are needed in both files and provided
twice. __bpf_empty isn't used anywhere and is removed from bpf_tracing.h

Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210526164643.2881368-1-revest@chromium.org
---
 kernel/bpf/preload/iterators/iterators.bpf.c  |  1 -
 tools/lib/bpf/bpf_helpers.h                   | 66 +++++++++++++++++++
 tools/lib/bpf/bpf_tracing.h                   | 62 +++--------------
 .../bpf/progs/bpf_iter_bpf_hash_map.c         |  1 -
 .../selftests/bpf/progs/bpf_iter_bpf_map.c    |  1 -
 .../selftests/bpf/progs/bpf_iter_ipv6_route.c |  1 -
 .../selftests/bpf/progs/bpf_iter_netlink.c    |  1 -
 .../selftests/bpf/progs/bpf_iter_task.c       |  1 -
 .../selftests/bpf/progs/bpf_iter_task_btf.c   |  1 -
 .../selftests/bpf/progs/bpf_iter_task_file.c  |  1 -
 .../selftests/bpf/progs/bpf_iter_task_stack.c |  1 -
 .../selftests/bpf/progs/bpf_iter_task_vma.c   |  1 -
 .../selftests/bpf/progs/bpf_iter_tcp4.c       |  1 -
 .../selftests/bpf/progs/bpf_iter_tcp6.c       |  1 -
 .../selftests/bpf/progs/bpf_iter_udp4.c       |  1 -
 .../selftests/bpf/progs/bpf_iter_udp6.c       |  1 -
 .../selftests/bpf/progs/test_snprintf.c       |  1 -
 17 files changed, 74 insertions(+), 69 deletions(-)

diff --git a/kernel/bpf/preload/iterators/iterators.bpf.c b/kernel/bpf/preload/iterators/iterators.bpf.c
index 52aa7b38e8b8..03af863314ea 100644
--- a/kernel/bpf/preload/iterators/iterators.bpf.c
+++ b/kernel/bpf/preload/iterators/iterators.bpf.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
 #pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 9720dc0b4605..b9987c3efa3c 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -158,4 +158,70 @@ enum libbpf_tristate {
 #define __kconfig __attribute__((section(".kconfig")))
 #define __ksym __attribute__((section(".ksyms")))
 
+#ifndef ___bpf_concat
+#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
+#define ___bpf_narg(...) \
+	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+	___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...)			\
+({								\
+	static const char ___fmt[] = fmt;			\
+	unsigned long long ___param[___bpf_narg(args)];		\
+								\
+	_Pragma("GCC diagnostic push")				\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
+	___bpf_fill(___param, args);				\
+	_Pragma("GCC diagnostic pop")				\
+								\
+	bpf_seq_printf(seq, ___fmt, sizeof(___fmt),		\
+		       ___param, sizeof(___param));		\
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...)		\
+({								\
+	static const char ___fmt[] = fmt;			\
+	unsigned long long ___param[___bpf_narg(args)];		\
+								\
+	_Pragma("GCC diagnostic push")				\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
+	___bpf_fill(___param, args);				\
+	_Pragma("GCC diagnostic pop")				\
+								\
+	bpf_snprintf(out, out_size, ___fmt,			\
+		     ___param, sizeof(___param));		\
+})
+
 #endif
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 8c954ebc0c7c..c0f3a26aa582 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -295,13 +295,19 @@ struct pt_regs;
 			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif
 
+#ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
 #define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
 #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
 #define ___bpf_narg(...) \
 	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
-	___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+#endif
 
 #define ___bpf_ctx_cast0() ctx
 #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
@@ -413,56 +419,4 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 }									    \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 
-#define ___bpf_fill0(arr, p, x) do {} while (0)
-#define ___bpf_fill1(arr, p, x) arr[p] = x
-#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
-#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
-#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
-#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
-#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
-#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
-#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
-#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
-#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
-#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
-#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
-#define ___bpf_fill(arr, args...) \
-	___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
-
-/*
- * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
- * in a structure.
- */
-#define BPF_SEQ_PRINTF(seq, fmt, args...)			\
-({								\
-	static const char ___fmt[] = fmt;			\
-	unsigned long long ___param[___bpf_narg(args)];		\
-								\
-	_Pragma("GCC diagnostic push")				\
-	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
-	___bpf_fill(___param, args);				\
-	_Pragma("GCC diagnostic pop")				\
-								\
-	bpf_seq_printf(seq, ___fmt, sizeof(___fmt),		\
-		       ___param, sizeof(___param));		\
-})
-
-/*
- * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
- * an array of u64.
- */
-#define BPF_SNPRINTF(out, out_size, fmt, args...)		\
-({								\
-	static const char ___fmt[] = fmt;			\
-	unsigned long long ___param[___bpf_narg(args)];		\
-								\
-	_Pragma("GCC diagnostic push")				\
-	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
-	___bpf_fill(___param, args);				\
-	_Pragma("GCC diagnostic pop")				\
-								\
-	bpf_snprintf(out, out_size, ___fmt,			\
-		     ___param, sizeof(___param));		\
-})
-
 #endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 6dfce3fd68bc..0aa3cd34cbe3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b83b5d2e17dc..6c39e86b666f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index d58d9f1642b5..784a610ce039 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 95989f4c99b5..a28e51e2dcee 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index b7f32c160f4e..c86b93f33b32 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
index a1ddc36f13ec..bca8b889cb10 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020, Oracle and/or its affiliates. */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
 #include <errno.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index b2f7c7c5f952..6e7b400888fe 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 43c36f5f7649..f2b8167b72a8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
index 11d1aa37cf11..4ea6a37d1345 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 54380c5e1069..2e4775c35414 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index b4fbddfa4e10..943f7bba180e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index f258583afbbd..cf0c485b1ed7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 65f93bb03f0f..5031e21c433f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index e35129bea0a0..e2ad26150f9b 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -3,7 +3,6 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 __u32 pid = 0;
 

From 568e7142a15f821867a39797f5b098070df4a9c8 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Wed, 26 May 2021 08:56:51 +0800
Subject: [PATCH 0569/3292] nfc: st95hf: remove unnecessary assignment and
 label

In function st95hf_in_send_cmd, the variable rc is assigned then goto
error label, which just returns rc, so we use return to replace it.
Since error label only used once in the function, so we remove error label.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210526005651.12652-1-samirweng1979@163.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st95hf/core.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 457854765983..88924be8decb 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -926,10 +926,8 @@ static int st95hf_in_send_cmd(struct nfc_digital_dev *ddev,
 	int len_data_to_tag = 0;
 
 	skb_resp = nfc_alloc_recv_skb(MAX_RESPONSE_BUFFER_SIZE, GFP_KERNEL);
-	if (!skb_resp) {
-		rc = -ENOMEM;
-		goto error;
-	}
+	if (!skb_resp)
+		return -ENOMEM;
 
 	switch (stcontext->current_rf_tech) {
 	case NFC_DIGITAL_RF_TECH_106A:
@@ -986,7 +984,6 @@ static int st95hf_in_send_cmd(struct nfc_digital_dev *ddev,
 
 free_skb_resp:
 	kfree_skb(skb_resp);
-error:
 	return rc;
 }
 

From c7a551b2e44a65170b5dceaca0afbd59f3715f11 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Wed, 26 May 2021 09:16:24 +0800
Subject: [PATCH 0570/3292] nfc: st-nci: remove unnecessary labels

Some labels are only used once, so we delete them and use the
return statement instead of the goto statement.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210526011624.11204-1-samirweng1979@163.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st-nci/vendor_cmds.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/nfc/st-nci/vendor_cmds.c b/drivers/nfc/st-nci/vendor_cmds.c
index c6a9d30a4dba..94b600029a2a 100644
--- a/drivers/nfc/st-nci/vendor_cmds.c
+++ b/drivers/nfc/st-nci/vendor_cmds.c
@@ -98,7 +98,7 @@ static int st_nci_hci_dm_get_info(struct nfc_dev *dev, void *data,
 	r = nci_hci_send_cmd(ndev, ST_NCI_DEVICE_MGNT_GATE, ST_NCI_HCI_DM_GETINFO,
 			     data, data_len, &skb);
 	if (r)
-		goto exit;
+		return r;
 
 	msg = nfc_vendor_cmd_alloc_reply_skb(dev, ST_NCI_VENDOR_OUI,
 					     HCI_DM_GET_INFO, skb->len);
@@ -117,7 +117,6 @@ static int st_nci_hci_dm_get_info(struct nfc_dev *dev, void *data,
 
 free_skb:
 	kfree_skb(skb);
-exit:
 	return r;
 }
 
@@ -131,7 +130,7 @@ static int st_nci_hci_dm_get_data(struct nfc_dev *dev, void *data,
 	r = nci_hci_send_cmd(ndev, ST_NCI_DEVICE_MGNT_GATE, ST_NCI_HCI_DM_GETDATA,
 			     data, data_len, &skb);
 	if (r)
-		goto exit;
+		return r;
 
 	msg = nfc_vendor_cmd_alloc_reply_skb(dev, ST_NCI_VENDOR_OUI,
 					     HCI_DM_GET_DATA, skb->len);
@@ -150,7 +149,6 @@ static int st_nci_hci_dm_get_data(struct nfc_dev *dev, void *data,
 
 free_skb:
 	kfree_skb(skb);
-exit:
 	return r;
 }
 
@@ -216,7 +214,7 @@ static int st_nci_hci_get_param(struct nfc_dev *dev, void *data,
 
 	r = nci_hci_get_param(ndev, param->gate, param->data, &skb);
 	if (r)
-		goto exit;
+		return r;
 
 	msg = nfc_vendor_cmd_alloc_reply_skb(dev, ST_NCI_VENDOR_OUI,
 					     HCI_GET_PARAM, skb->len);
@@ -235,7 +233,6 @@ static int st_nci_hci_get_param(struct nfc_dev *dev, void *data,
 
 free_skb:
 	kfree_skb(skb);
-exit:
 	return r;
 }
 
@@ -262,7 +259,7 @@ static int st_nci_hci_dm_vdc_measurement_value(struct nfc_dev *dev, void *data,
 			     ST_NCI_HCI_DM_VDC_MEASUREMENT_VALUE,
 			     data, data_len, &skb);
 	if (r)
-		goto exit;
+		return r;
 
 	msg = nfc_vendor_cmd_alloc_reply_skb(dev, ST_NCI_VENDOR_OUI,
 				HCI_DM_VDC_MEASUREMENT_VALUE, skb->len);
@@ -281,7 +278,6 @@ static int st_nci_hci_dm_vdc_measurement_value(struct nfc_dev *dev, void *data,
 
 free_skb:
 	kfree_skb(skb);
-exit:
 	return r;
 }
 
@@ -299,7 +295,7 @@ static int st_nci_hci_dm_vdc_value_comparison(struct nfc_dev *dev, void *data,
 			     ST_NCI_HCI_DM_VDC_VALUE_COMPARISON,
 			     data, data_len, &skb);
 	if (r)
-		goto exit;
+		return r;
 
 	msg = nfc_vendor_cmd_alloc_reply_skb(dev, ST_NCI_VENDOR_OUI,
 					HCI_DM_VDC_VALUE_COMPARISON, skb->len);
@@ -318,7 +314,6 @@ static int st_nci_hci_dm_vdc_value_comparison(struct nfc_dev *dev, void *data,
 
 free_skb:
 	kfree_skb(skb);
-exit:
 	return r;
 }
 

From 125217e0967fc905be35a3b2c9ba4db9a8616b92 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 25 May 2021 18:00:38 -0500
Subject: [PATCH 0571/3292] i40e: Replace one-element array with flexible-array
 member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having a
dynamically sized set of trailing elements in a structure. Kernel code
should always use “flexible array members”[1] for these cases. The older
style of one-element or zero-length arrays should no longer be used[2].

Refactor the code according to the use of a flexible-array member in struct
i40e_qvlist_info instead of one-element array, and use the struct_size()
helper.

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays

Link: https://github.com/KSPP/linux/issues/79
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Acked-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/infiniband/hw/i40iw/i40iw_main.c      | 5 ++---
 drivers/net/ethernet/intel/i40e/i40e_client.c | 2 +-
 include/linux/net/intel/i40e_client.h         | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index b496f30ce066..364f69cd620f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1423,7 +1423,7 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
 	struct i40e_qv_info *iw_qvinfo;
 	u32 ceq_idx;
 	u32 i;
-	u32 size;
+	size_t size;
 
 	if (!ldev->msix_count) {
 		i40iw_pr_err("No MSI-X vectors\n");
@@ -1433,8 +1433,7 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
 	iwdev->msix_count = ldev->msix_count;
 
 	size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
-	size += sizeof(struct i40e_qvlist_info);
-	size +=  sizeof(struct i40e_qv_info) * iwdev->msix_count - 1;
+	size += struct_size(iw_qvlist, qv_info, iwdev->msix_count);
 	iwdev->iw_msixtbl = kzalloc(size, GFP_KERNEL);
 
 	if (!iwdev->iw_msixtbl)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 32f3facbed1a..63eab14a26df 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -579,7 +579,7 @@ static int i40e_client_setup_qvlist(struct i40e_info *ldev,
 	u32 v_idx, i, reg_idx, reg;
 
 	ldev->qvlist_info = kzalloc(struct_size(ldev->qvlist_info, qv_info,
-				    qvlist_info->num_vectors - 1), GFP_KERNEL);
+				    qvlist_info->num_vectors), GFP_KERNEL);
 	if (!ldev->qvlist_info)
 		return -ENOMEM;
 	ldev->qvlist_info->num_vectors = qvlist_info->num_vectors;
diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h
index f41387a8969f..fd7bc860a241 100644
--- a/include/linux/net/intel/i40e_client.h
+++ b/include/linux/net/intel/i40e_client.h
@@ -48,7 +48,7 @@ struct i40e_qv_info {
 
 struct i40e_qvlist_info {
 	u32 num_vectors;
-	struct i40e_qv_info qv_info[1];
+	struct i40e_qv_info qv_info[];
 };
 
 

From 18c8d3044d9c1ad2c3f447f30e4a25fd96ae3603 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 26 May 2021 18:21:04 +0800
Subject: [PATCH 0572/3292] net/appletalk: Fix inconsistent indenting

Eliminate the follow smatch warning:

drivers/net/appletalk/ltpc.c:588 idle() warn: inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1622024464-29896-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/appletalk/ltpc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index f0e715a93852..69c270885ff0 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -584,11 +584,13 @@ loop:
 						printk("%02x ",ltdmacbuf[i]);
 					printk("\n");
 				}
+
 				handlecommand(dev);
-					if(0xfa==inb_p(base+6)) {
-						/* we timed out, so return */
-						goto done;
-					} 
+
+				if (0xfa == inb_p(base + 6)) {
+					/* we timed out, so return */
+					goto done;
+				}
 			} else {
 				/* we don't seem to have a command */
 				if (!mboxinuse[0]) {

From 75a78026ea1307ef6d6924cc22be3ce9bf453c63 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 6 May 2021 19:55:53 +0200
Subject: [PATCH 0573/3292] dt-bindings: can: rcar_can: Convert to json-schema

Convert the Renesas R-Car CAN Controller Device Tree binding
documentation to json-schema.

Document missing properties.
Update the example to match reality.

Link: https://lore.kernel.org/r/561c35648e22a3c1e3b5477ae27fd1a50da7fe98.1620323639.git.geert+renesas@glider.be
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 .../devicetree/bindings/net/can/rcar_can.txt  |  80 ----------
 .../bindings/net/can/renesas,rcar-can.yaml    | 139 ++++++++++++++++++
 2 files changed, 139 insertions(+), 80 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/can/rcar_can.txt
 create mode 100644 Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml

diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
deleted file mode 100644
index 90ac4fef23f5..000000000000
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ /dev/null
@@ -1,80 +0,0 @@
-Renesas R-Car CAN controller Device Tree Bindings
--------------------------------------------------
-
-Required properties:
-- compatible: "renesas,can-r8a7742" if CAN controller is a part of R8A7742 SoC.
-	      "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
-	      "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC.
-	      "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
-	      "renesas,can-r8a77470" if CAN controller is a part of R8A77470 SoC.
-	      "renesas,can-r8a774a1" if CAN controller is a part of R8A774A1 SoC.
-	      "renesas,can-r8a774b1" if CAN controller is a part of R8A774B1 SoC.
-	      "renesas,can-r8a774c0" if CAN controller is a part of R8A774C0 SoC.
-	      "renesas,can-r8a774e1" if CAN controller is a part of R8A774E1 SoC.
-	      "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
-	      "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
-	      "renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
-	      "renesas,can-r8a7791" if CAN controller is a part of R8A7791 SoC.
-	      "renesas,can-r8a7792" if CAN controller is a part of R8A7792 SoC.
-	      "renesas,can-r8a7793" if CAN controller is a part of R8A7793 SoC.
-	      "renesas,can-r8a7794" if CAN controller is a part of R8A7794 SoC.
-	      "renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
-	      "renesas,can-r8a7796" if CAN controller is a part of R8A77960 SoC.
-	      "renesas,can-r8a77961" if CAN controller is a part of R8A77961 SoC.
-	      "renesas,can-r8a77965" if CAN controller is a part of R8A77965 SoC.
-	      "renesas,can-r8a77990" if CAN controller is a part of R8A77990 SoC.
-	      "renesas,can-r8a77995" if CAN controller is a part of R8A77995 SoC.
-	      "renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
-	      "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1
-	      compatible device.
-	      "renesas,rcar-gen3-can" for a generic R-Car Gen3 or RZ/G2
-	      compatible device.
-	      When compatible with the generic version, nodes must list the
-	      SoC-specific version corresponding to the platform first
-	      followed by the generic version.
-
-- reg: physical base address and size of the R-Car CAN register map.
-- interrupts: interrupt specifier for the sole interrupt.
-- clocks: phandles and clock specifiers for 3 CAN clock inputs.
-- clock-names: 3 clock input name strings: "clkp1", "clkp2", and "can_clk".
-- pinctrl-0: pin control group to be used for this controller.
-- pinctrl-names: must be "default".
-
-Required properties for R8A774A1, R8A774B1, R8A774C0, R8A774E1, R8A7795,
-R8A77960, R8A77961, R8A77965, R8A77990, and R8A77995:
-For the denoted SoCs, "clkp2" can be CANFD clock. This is a div6 clock and can
-be used by both CAN and CAN FD controller at the same time. It needs to be
-scaled to maximum frequency if any of these controllers use it. This is done
-using the below properties:
-
-- assigned-clocks: phandle of clkp2(CANFD) clock.
-- assigned-clock-rates: maximum frequency of this clock.
-
-Optional properties:
-- renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are:
-			    <0x0> (default) : Peripheral clock (clkp1)
-			    <0x1> : Peripheral clock (clkp2)
-			    <0x3> : External input clock
-
-Example
--------
-
-SoC common .dtsi file:
-
-	can0: can@e6e80000 {
-		compatible = "renesas,can-r8a7791", "renesas,rcar-gen2-can";
-		reg = <0 0xe6e80000 0 0x1000>;
-		interrupts = <0 186 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp9_clks R8A7791_CLK_RCAN0>,
-			 <&cpg_clocks R8A7791_CLK_RCAN>, <&can_clk>;
-		clock-names = "clkp1", "clkp2", "can_clk";
-		status = "disabled";
-	};
-
-Board specific .dts file:
-
-&can0 {
-	pinctrl-0 = <&can0_pins>;
-	pinctrl-names = "default";
-	status = "okay";
-};
diff --git a/Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml b/Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml
new file mode 100644
index 000000000000..fadc871fd6b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/renesas,rcar-can.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car CAN Controller
+
+maintainers:
+  - Sergei Shtylyov <sergei.shtylyov@gmail.com>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - renesas,can-r8a7778      # R-Car M1-A
+              - renesas,can-r8a7779      # R-Car H1
+          - const: renesas,rcar-gen1-can # R-Car Gen1
+
+      - items:
+          - enum:
+              - renesas,can-r8a7742      # RZ/G1H
+              - renesas,can-r8a7743      # RZ/G1M
+              - renesas,can-r8a7744      # RZ/G1N
+              - renesas,can-r8a7745      # RZ/G1E
+              - renesas,can-r8a77470     # RZ/G1C
+              - renesas,can-r8a7790      # R-Car H2
+              - renesas,can-r8a7791      # R-Car M2-W
+              - renesas,can-r8a7792      # R-Car V2H
+              - renesas,can-r8a7793      # R-Car M2-N
+              - renesas,can-r8a7794      # R-Car E2
+          - const: renesas,rcar-gen2-can # R-Car Gen2 and RZ/G1
+
+      - items:
+          - enum:
+              - renesas,can-r8a774a1     # RZ/G2M
+              - renesas,can-r8a774b1     # RZ/G2N
+              - renesas,can-r8a774c0     # RZ/G2E
+              - renesas,can-r8a774e1     # RZ/G2H
+              - renesas,can-r8a7795      # R-Car H3
+              - renesas,can-r8a7796      # R-Car M3-W
+              - renesas,can-r8a77961     # R-Car M3-W+
+              - renesas,can-r8a77965     # R-Car M3-N
+              - renesas,can-r8a77990     # R-Car E3
+              - renesas,can-r8a77995     # R-Car D3
+          - const: renesas,rcar-gen3-can # R-Car Gen3 and RZ/G2
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 3
+
+  clock-names:
+    items:
+      - const: clkp1
+      - const: clkp2
+      - const: can_clk
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  renesas,can-clock-select:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1, 3 ]
+    default: 0
+    description: |
+      R-Car CAN Clock Source Select.  Valid values are:
+        <0x0> (default) : Peripheral clock (clkp1)
+        <0x1> : Peripheral clock (clkp2)
+        <0x3> : External input clock
+
+  assigned-clocks:
+    description:
+      Reference to the clkp2 (CANFD) clock.
+      On R-Car Gen3 and RZ/G2 SoCs, "clkp2" is the CANFD clock.  This is a div6
+      clock and can be used by both CAN and CAN FD controllers at the same
+      time.  It needs to be scaled to maximum frequency if any of these
+      controllers use it.
+
+  assigned-clock-rates:
+    description: Maximum frequency of the CANFD clock.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - power-domains
+
+allOf:
+  - $ref: can-controller.yaml#
+
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              const: renesas,rcar-gen1-can
+    then:
+      required:
+        - resets
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: renesas,rcar-gen3-can
+    then:
+      required:
+        - assigned-clocks
+        - assigned-clock-rates
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7791-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a7791-sysc.h>
+
+    can0: can@e6e80000 {
+            compatible = "renesas,can-r8a7791", "renesas,rcar-gen2-can";
+            reg = <0xe6e80000 0x1000>;
+            interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&cpg CPG_MOD 916>,
+                     <&cpg CPG_CORE R8A7791_CLK_RCAN>, <&can_clk>;
+            clock-names = "clkp1", "clkp2", "can_clk";
+            power-domains = <&sysc R8A7791_PD_ALWAYS_ON>;
+            resets = <&cpg 916>;
+    };

From 8a5e7d19c8c747e3e7bfa0283a54742b103afcb5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 6 May 2021 19:55:54 +0200
Subject: [PATCH 0574/3292] dt-bindings: can: rcar_canfd: Convert to
 json-schema

Convert the Renesas R-Car CAN FD Controller Device Tree binding
documentation to json-schema.

Document missing properties.
The CANFD clock needs to be configured for the maximum frequency on
R-Car V3M and V3H, too.
Update the example to match reality.

Link: https://lore.kernel.org/r/905134c87f72e2d8e37c309e0ce28ecd7d4f3992.1620323639.git.geert+renesas@glider.be
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 .../bindings/net/can/rcar_canfd.txt           | 107 ---------------
 .../bindings/net/can/renesas,rcar-canfd.yaml  | 122 ++++++++++++++++++
 2 files changed, 122 insertions(+), 107 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/can/rcar_canfd.txt
 create mode 100644 Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml

diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
deleted file mode 100644
index 248c4ed97a0a..000000000000
--- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-Renesas R-Car CAN FD controller Device Tree Bindings
-----------------------------------------------------
-
-Required properties:
-- compatible: Must contain one or more of the following:
-  - "renesas,rcar-gen3-canfd" for R-Car Gen3 and RZ/G2 compatible controllers.
-  - "renesas,r8a774a1-canfd" for R8A774A1 (RZ/G2M) compatible controller.
-  - "renesas,r8a774b1-canfd" for R8A774B1 (RZ/G2N) compatible controller.
-  - "renesas,r8a774c0-canfd" for R8A774C0 (RZ/G2E) compatible controller.
-  - "renesas,r8a774e1-canfd" for R8A774E1 (RZ/G2H) compatible controller.
-  - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
-  - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller.
-  - "renesas,r8a77965-canfd" for R8A77965 (R-Car M3-N) compatible controller.
-  - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
-  - "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller.
-  - "renesas,r8a77990-canfd" for R8A77990 (R-Car E3) compatible controller.
-  - "renesas,r8a77995-canfd" for R8A77995 (R-Car D3) compatible controller.
-
-  When compatible with the generic version, nodes must list the
-  SoC-specific version corresponding to the platform first, followed by the
-  family-specific and/or generic versions.
-
-- reg: physical base address and size of the R-Car CAN FD register map.
-- interrupts: interrupt specifiers for the Channel & Global interrupts
-- clocks: phandles and clock specifiers for 3 clock inputs.
-- clock-names: 3 clock input name strings: "fck", "canfd", "can_clk".
-- pinctrl-0: pin control group to be used for this controller.
-- pinctrl-names: must be "default".
-
-Required child nodes:
-The controller supports two channels and each is represented as a child node.
-The name of the child nodes are "channel0" and "channel1" respectively. Each
-child node supports the "status" property only, which is used to
-enable/disable the respective channel.
-
-Required properties for R8A774A1, R8A774B1, R8A774C0, R8A774E1, R8A7795,
-R8A7796, R8A77965, R8A77990, and R8A77995:
-In the denoted SoCs, canfd clock is a div6 clock and can be used by both CAN
-and CAN FD controller at the same time. It needs to be scaled to maximum
-frequency if any of these controllers use it. This is done using the below
-properties:
-
-- assigned-clocks: phandle of canfd clock.
-- assigned-clock-rates: maximum frequency of this clock.
-
-Optional property:
-The controller can operate in either CAN FD only mode (default) or
-Classical CAN only mode. The mode is global to both the channels. In order to
-enable the later, define the following optional property.
- - renesas,no-can-fd: puts the controller in Classical CAN only mode.
-
-Example
--------
-
-SoC common .dtsi file:
-
-		canfd: can@e66c0000 {
-			compatible = "renesas,r8a7795-canfd",
-				     "renesas,rcar-gen3-canfd";
-			reg = <0 0xe66c0000 0 0x8000>;
-			interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
-				   <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-			clocks = <&cpg CPG_MOD 914>,
-			       <&cpg CPG_CORE R8A7795_CLK_CANFD>,
-			       <&can_clk>;
-			clock-names = "fck", "canfd", "can_clk";
-			assigned-clocks = <&cpg CPG_CORE R8A7795_CLK_CANFD>;
-			assigned-clock-rates = <40000000>;
-			power-domains = <&cpg>;
-			status = "disabled";
-
-			channel0 {
-				status = "disabled";
-			};
-
-			channel1 {
-				status = "disabled";
-			};
-		};
-
-Board specific .dts file:
-
-E.g. below enables Channel 1 alone in the board in Classical CAN only mode.
-
-&canfd {
-	pinctrl-0 = <&canfd1_pins>;
-	pinctrl-names = "default";
-	renesas,no-can-fd;
-	status = "okay";
-
-	channel1 {
-		status = "okay";
-	};
-};
-
-E.g. below enables Channel 0 alone in the board using External clock
-as fCAN clock.
-
-&canfd {
-	pinctrl-0 = <&canfd0_pins>, <&can_clk_pins>;
-	pinctrl-names = "default";
-	status = "okay";
-
-	channel0 {
-		status = "okay";
-	};
-};
diff --git a/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml
new file mode 100644
index 000000000000..0b33ba9ccb47
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/renesas,rcar-canfd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car CAN FD Controller
+
+maintainers:
+  - Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+
+allOf:
+  - $ref: can-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - renesas,r8a774a1-canfd     # RZ/G2M
+              - renesas,r8a774b1-canfd     # RZ/G2N
+              - renesas,r8a774c0-canfd     # RZ/G2E
+              - renesas,r8a774e1-canfd     # RZ/G2H
+              - renesas,r8a7795-canfd      # R-Car H3
+              - renesas,r8a7796-canfd      # R-Car M3-W
+              - renesas,r8a77965-canfd     # R-Car M3-N
+              - renesas,r8a77970-canfd     # R-Car V3M
+              - renesas,r8a77980-canfd     # R-Car V3H
+              - renesas,r8a77990-canfd     # R-Car E3
+              - renesas,r8a77995-canfd     # R-Car D3
+          - const: renesas,rcar-gen3-canfd # R-Car Gen3 and RZ/G2
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: Channel interrupt
+      - description: Global interrupt
+
+  clocks:
+    maxItems: 3
+
+  clock-names:
+    items:
+      - const: fck
+      - const: canfd
+      - const: can_clk
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  renesas,no-can-fd:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The controller can operate in either CAN FD only mode (default) or
+      Classical CAN only mode.  The mode is global to both the channels.
+      Specify this property to put the controller in Classical CAN only mode.
+
+  assigned-clocks:
+    description:
+      Reference to the CANFD clock.  The CANFD clock is a div6 clock and can be
+      used by both CAN (if present) and CAN FD controllers at the same time.
+      It needs to be scaled to maximum frequency if any of these controllers
+      use it.
+
+  assigned-clock-rates:
+    description: Maximum frequency of the CANFD clock.
+
+patternProperties:
+  "^channel[01]$":
+    type: object
+    description:
+      The controller supports two channels and each is represented as a child
+      node.  Each child node supports the "status" property only, which
+      is used to enable/disable the respective channel.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - power-domains
+  - resets
+  - assigned-clocks
+  - assigned-clock-rates
+  - channel0
+  - channel1
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a7795-sysc.h>
+
+    canfd: can@e66c0000 {
+            compatible = "renesas,r8a7795-canfd",
+                         "renesas,rcar-gen3-canfd";
+            reg = <0xe66c0000 0x8000>;
+            interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+                         <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&cpg CPG_MOD 914>,
+                     <&cpg CPG_CORE R8A7795_CLK_CANFD>,
+                     <&can_clk>;
+            clock-names = "fck", "canfd", "can_clk";
+            assigned-clocks = <&cpg CPG_CORE R8A7795_CLK_CANFD>;
+            assigned-clock-rates = <40000000>;
+            power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+            resets = <&cpg 914>;
+
+            channel0 {
+            };
+
+            channel1 {
+            };
+    };

From 7e97d274db920df479e222fed10e7b242f90ffb0 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 17 May 2021 13:24:25 +0200
Subject: [PATCH 0575/3292] can: uapi: update CAN-FD frame description

Since an early version of the CAN-FD specification the bit that
defines a CAN-FD frame on the wire, has been renamed from Extended
Data Length (EDL) to FD Frame (FDF).

To avoid confusion, update the struct canfd_frame description in the
UAPI headers accordingly.

Link: https://lore.kernel.org/r/20210517113727.77597-1-mkl@pengutronix.de
Suggested-by: Ayoub Kaanich <kayoub5@live.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index c7535352fef6..ac5d7a31671f 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -123,8 +123,8 @@ struct can_frame {
 /*
  * defined bits for canfd_frame.flags
  *
- * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to
- * be set in the CAN frame bitstream on the wire. The EDL bit switch turns
+ * The use of struct canfd_frame implies the FD Frame (FDF) bit to
+ * be set in the CAN frame bitstream on the wire. The FDF bit switch turns
  * the CAN controllers bitstream processor into the CAN FD mode which creates
  * two new options within the CAN FD frame specification:
  *

From 02546884221279da2725e87e35348290470363d7 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 11 Apr 2017 15:43:43 +0200
Subject: [PATCH 0576/3292] can: uapi: introduce CANFD_FDF flag for mixed
 content in struct canfd_frame

The struct can_frame and struct canfd_frame intentionally share the
same layout to be able to write CAN frame content into a CAN FD frame
structure. When this is done the former differentiation via CAN_MTU /
CANFD_MTU is lost. CANFD_FDF allows programmers to mark CAN FD frames
in the case of using struct canfd_frame for mixed CAN/CAN FD
content (dual use).

N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside
of struct canfd_frame therefore the CANFD_FDF flag is disregarded by
Linux.

Link: https://lore.kernel.org/r/20170411134343.3089-1-socketcan@hartkopp.net
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index ac5d7a31671f..90801ada2bbe 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -135,9 +135,18 @@ struct can_frame {
  * controller only the CANFD_BRS bit is relevant for real CAN controllers when
  * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make
  * sense for virtual CAN interfaces to test applications with echoed frames.
+ *
+ * The struct can_frame and struct canfd_frame intentionally share the same
+ * layout to be able to write CAN frame content into a CAN FD frame structure.
+ * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets
+ * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of
+ * using struct canfd_frame for mixed CAN / CAN FD content (dual use).
+ * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of
+ * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux.
  */
 #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */
 #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */
+#define CANFD_FDF 0x04 /* mark CAN FD for dual use of struct canfd_frame */
 
 /**
  * struct canfd_frame - CAN flexible data rate frame structure

From 24a774a4f9750ecd37d7aaeacfc04a844b9cf20b Mon Sep 17 00:00:00 2001
From: zuoqilin <zuoqilin@yulong.com>
Date: Fri, 14 May 2021 18:08:06 +0800
Subject: [PATCH 0577/3292] can: proc: remove unnecessary variables

There is no need to define the variable "rate" to receive, just return
directly.

Link: https://lore.kernel.org/r/20210514100806.792-1-zuoqilin1@163.com
Signed-off-by: zuoqilin <zuoqilin@yulong.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/proc.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/can/proc.c b/net/can/proc.c
index d1fe49e6f16d..b3099f0a3cb8 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -99,8 +99,6 @@ static void can_init_stats(struct net *net)
 static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
 			       unsigned long count)
 {
-	unsigned long rate;
-
 	if (oldjif == newjif)
 		return 0;
 
@@ -111,9 +109,7 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
 		return 99999999;
 	}
 
-	rate = (count * HZ) / (newjif - oldjif);
-
-	return rate;
+	return (count * HZ) / (newjif - oldjif);
 }
 
 void can_stat_update(struct timer_list *t)

From 46d8657a6b284e32b6b3bf1a6c93ee507fdd3cdb Mon Sep 17 00:00:00 2001
From: Patrick Menschel <menschel.p@posteo.de>
Date: Tue, 27 Apr 2021 05:21:47 +0000
Subject: [PATCH 0578/3292] can: isotp: change error format from decimal to
 symbolic error names

This patch changes the format string for errors from decimal %d to
symbolic error names %pe to achieve more comprehensive log messages.

Link: https://lore.kernel.org/r/20210427052150.2308-2-menschel.p@posteo.de
Signed-off-by: Patrick Menschel <menschel.p@posteo.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 9f94ad3caee9..2c4f84fac70a 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -221,8 +221,8 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
 
 	can_send_ret = can_send(nskb, 1);
 	if (can_send_ret)
-		pr_notice_once("can-isotp: %s: can_send_ret %d\n",
-			       __func__, can_send_ret);
+		pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+			       __func__, ERR_PTR(can_send_ret));
 
 	dev_put(dev);
 
@@ -798,8 +798,8 @@ isotp_tx_burst:
 
 		can_send_ret = can_send(skb, 1);
 		if (can_send_ret)
-			pr_notice_once("can-isotp: %s: can_send_ret %d\n",
-				       __func__, can_send_ret);
+			pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+				       __func__, ERR_PTR(can_send_ret));
 
 		if (so->tx.idx >= so->tx.len) {
 			/* we are done */
@@ -946,8 +946,8 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	err = can_send(skb, 1);
 	dev_put(dev);
 	if (err) {
-		pr_notice_once("can-isotp: %s: can_send_ret %d\n",
-			       __func__, err);
+		pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+			       __func__, ERR_PTR(err));
 		return err;
 	}
 

From 6a5ddae578842652719fb926b22f1d510fe50bee Mon Sep 17 00:00:00 2001
From: Patrick Menschel <menschel.p@posteo.de>
Date: Tue, 27 Apr 2021 05:21:48 +0000
Subject: [PATCH 0579/3292] can: isotp: add symbolic error message to
 isotp_module_init()

This patch adds the value of err with format %pe to the already
existing error message.

Link: https://lore.kernel.org/r/20210427052150.2308-3-menschel.p@posteo.de
Signed-off-by: Patrick Menschel <menschel.p@posteo.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 2c4f84fac70a..2075d8d9e6b6 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1433,7 +1433,7 @@ static __init int isotp_module_init(void)
 
 	err = can_proto_register(&isotp_can_proto);
 	if (err < 0)
-		pr_err("can: registration of isotp protocol failed\n");
+		pr_err("can: registration of isotp protocol failed %pe\n", ERR_PTR(err));
 
 	return err;
 }

From c69d190f7bb9a03cf5237d45a457993730d01605 Mon Sep 17 00:00:00 2001
From: Patrick Menschel <menschel.p@posteo.de>
Date: Tue, 27 Apr 2021 05:21:49 +0000
Subject: [PATCH 0580/3292] can: isotp: Add error message if txqueuelen is too
 small

This patch adds an additional error message in case that txqueuelen is
set too small and advices the user to increase txqueuelen.

This is likely to happen even with small transfers if txqueuelen is at
default value 10 frames.

Link: https://lore.kernel.org/r/20210427052150.2308-4-menschel.p@posteo.de
Signed-off-by: Patrick Menschel <menschel.p@posteo.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 2075d8d9e6b6..5ff11aaf0a79 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -797,10 +797,12 @@ isotp_tx_burst:
 		can_skb_set_owner(skb, sk);
 
 		can_send_ret = can_send(skb, 1);
-		if (can_send_ret)
+		if (can_send_ret) {
 			pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
 				       __func__, ERR_PTR(can_send_ret));
-
+			if (can_send_ret == -ENOBUFS)
+				pr_notice_once("can-isotp: tx queue is full, increasing txqueuelen may prevent this error\n");
+		}
 		if (so->tx.idx >= so->tx.len) {
 			/* we are done */
 			so->tx.state = ISOTP_IDLE;

From 9208f7bf053a6fb32f9276a3b78c2b7fb3c0b0cb Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 27 Apr 2021 18:52:47 +0800
Subject: [PATCH 0581/3292] can: softing: Remove redundant variable ptr

The value stored to ptr in the calculations this patch removes is not
used, so the calculation and the assignment can be removed.

Cleans up the following clang-analyzer warning:

drivers/net/can/softing/softing_main.c:279:3: warning: Value stored to
'ptr' is never read [clang-analyzer-deadcode.DeadStores].

drivers/net/can/softing/softing_main.c:242:3: warning: Value stored to
'ptr' is never read [clang-analyzer-deadcode.DeadStores].

Link: https://lore.kernel.org/r/1619520767-80948-1-git-send-email-jiapeng.chong@linux.alibaba.com
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/softing/softing_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index c44f3411e561..cfc1325aad10 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -239,7 +239,6 @@ static int softing_handle_1(struct softing *card)
 				DPRAM_INFO_BUSSTATE2 : DPRAM_INFO_BUSSTATE]);
 		/* timestamp */
 		tmp_u32 = le32_to_cpup((void *)ptr);
-		ptr += 4;
 		ktime = softing_raw2ktime(card, tmp_u32);
 
 		++netdev->stats.rx_errors;
@@ -276,7 +275,6 @@ static int softing_handle_1(struct softing *card)
 		ktime = softing_raw2ktime(card, tmp_u32);
 		if (!(msg.can_id & CAN_RTR_FLAG))
 			memcpy(&msg.data[0], ptr, 8);
-		ptr += 8;
 		/* update socket */
 		if (cmd & CMD_ACK) {
 			/* acknowledge, was tx msg */

From 83415669d8d830034c96e1de8ffb09b153a53504 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 4 May 2021 21:55:10 +0200
Subject: [PATCH 0582/3292] can: hi311x: hi3110_can_probe(): silence clang
 warning

This patch silences the following clang warning:

| drivers/net/can/spi/hi311x.c:874:17: warning: cast to smaller integer type
| 'enum hi3110_model' from 'const void *' [-Wvoid-pointer-to-enum-cast]
|                 priv->model = (enum hi3110_model)of_id->data;
|                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 57e83fb9b746 ("can: hi311x: Add Holt HI-311x CAN driver")
Link: https://lore.kernel.org/r/20210504200520.1179635-3-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/hi311x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index 6f5d6d04a8b9..dd17b8c53e1c 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -871,7 +871,7 @@ static int hi3110_can_probe(struct spi_device *spi)
 		CAN_CTRLMODE_BERR_REPORTING;
 
 	if (of_id)
-		priv->model = (enum hi3110_model)of_id->data;
+		priv->model = (enum hi3110_model)(uintptr_t)of_id->data;
 	else
 		priv->model = spi_get_device_id(spi)->driver_data;
 	priv->net = net;

From 10462b3558d418929dad8312dd8ae5d40116c1eb Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 4 May 2021 21:55:10 +0200
Subject: [PATCH 0583/3292] can: mcp251x: mcp251x_can_probe(): silence clang
 warning

This patch silences the following clang warning:

| drivers/net/can/spi/mcp251x.c:1333:17: warning: cast to smaller integer type
| 'enum mcp251x_model' from 'const void *' [-Wvoid-pointer-to-enum-cast]
|                 priv->model = (enum mcp251x_model)match;
|                               ^~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 8de29a5c34a5 ("can: mcp251x: Make use of device property API")
Link: https://lore.kernel.org/r/20210504200520.1179635-2-mkl@pengutronix.de
Reported-by: kernel test robot <lkp@intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 173c6614086f..0579ab74f728 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -1330,7 +1330,7 @@ static int mcp251x_can_probe(struct spi_device *spi)
 	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES |
 		CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY;
 	if (match)
-		priv->model = (enum mcp251x_model)match;
+		priv->model = (enum mcp251x_model)(uintptr_t)match;
 	else
 		priv->model = spi_get_device_id(spi)->driver_data;
 	priv->net = net;

From b558e200d626b1761e4642e1acd2268fd30bddc6 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 5 May 2021 10:07:48 +0200
Subject: [PATCH 0584/3292] can: mcp251xfd: silence clang warning

This patch fixes the following clang warning, by marking the functions
as maybe unused. gcc doesn't complain about unused inline functions.

| drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c:564:1: warning: unused function 'mcp251xfd_chip_set_mode_nowait' [-Wunused-function]
| mcp251xfd_chip_set_mode_nowait(const struct mcp251xfd_priv *priv,
| ^
| 1 warning generated.

Link: https://lore.kernel.org/r/20210514153741.1958041-3-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index e0ae00e34c7b..47c3f408a799 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -560,7 +560,7 @@ mcp251xfd_chip_set_mode(const struct mcp251xfd_priv *priv,
 	return __mcp251xfd_chip_set_mode(priv, mode_req, false);
 }
 
-static inline int
+static inline int __maybe_unused
 mcp251xfd_chip_set_mode_nowait(const struct mcp251xfd_priv *priv,
 			       const u8 mode_req)
 {

From 4318b1aa22b7b44209ec5d079c83e3bae3b9c077 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 5 May 2021 10:06:24 +0200
Subject: [PATCH 0585/3292] can: at91_can: silence clang warning

This patch fixes the following clang warning, by marking the functions
as maybe unused. gcc doesn't complain about unused inline functions.

| drivers/net/can/at91_can.c:178:1: warning: unused function 'at91_is_sam9X5' [-Wunused-function]
| AT91_IS(9X5);
| ^
| drivers/net/can/at91_can.c:172:19: note: expanded from macro 'AT91_IS'
| static inline int at91_is_sam##_model(const struct at91_priv *priv) \
|                   ^
| <scratch space>:66:1: note: expanded from here
| at91_is_sam9X5
| ^

Link: https://lore.kernel.org/r/20210514153741.1958041-2-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/at91_can.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 9ad9b39f480e..04d0bb3ffe89 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -169,7 +169,7 @@ static const struct can_bittiming_const at91_bittiming_const = {
 };
 
 #define AT91_IS(_model) \
-static inline int at91_is_sam##_model(const struct at91_priv *priv) \
+static inline int __maybe_unused at91_is_sam##_model(const struct at91_priv *priv) \
 { \
 	return priv->devtype_data.type == AT91_DEVTYPE_SAM##_model; \
 }

From 893974d9b5655792f3b8f490f5499d3b8886be18 Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <extja@kvaser.com>
Date: Thu, 29 Apr 2021 11:37:29 +0200
Subject: [PATCH 0586/3292] can: kvaser_usb: Rename define
 USB_HYBRID_{,PRO_}CANLIN_PRODUCT_ID

Rename define USB_HYBRID_{,PRO_}CANLIN_PRODUCT_ID to
USB_HYBRID_{,PRO_}2CANLIN_PRODUCT_ID, to reflect the channel count.

Link: https://lore.kernel.org/r/20210429093730.499263-1-extja@kvaser.com
Signed-off-by: Jimmy Assarsson <extja@kvaser.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index 90ebcae13409..b2236bf63b41 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -79,10 +79,10 @@
 #define USB_USBCAN_PRO_2HS_V2_PRODUCT_ID	264
 #define USB_MEMO_2HS_PRODUCT_ID			265
 #define USB_MEMO_PRO_2HS_V2_PRODUCT_ID		266
-#define USB_HYBRID_CANLIN_PRODUCT_ID		267
+#define USB_HYBRID_2CANLIN_PRODUCT_ID		267
 #define USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID	268
 #define USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID	269
-#define USB_HYBRID_PRO_CANLIN_PRODUCT_ID	270
+#define USB_HYBRID_PRO_2CANLIN_PRODUCT_ID	270
 #define USB_U100_PRODUCT_ID			273
 #define USB_U100P_PRODUCT_ID			274
 #define USB_U100S_PRODUCT_ID			275
@@ -187,10 +187,10 @@ static const struct usb_device_id kvaser_usb_table[] = {
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID) },
-	{ USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID) },
-	{ USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID) },

From ee6bb641bc70accfedb8d78fc957df73e6770858 Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <extja@kvaser.com>
Date: Thu, 29 Apr 2021 11:37:30 +0200
Subject: [PATCH 0587/3292] can: kvaser_usb: Add new Kvaser hydra devices

Add new Kvaser hydra devices.

Link: https://lore.kernel.org/r/20210429093730.499263-2-extja@kvaser.com
Signed-off-by: Jimmy Assarsson <extja@kvaser.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig                      | 2 ++
 drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 3deb9f1cd292..f959215c9d53 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -76,7 +76,9 @@ config CAN_KVASER_USB
 	    - Scania VCI2 (if you have the Kvaser logo on top)
 	    - Kvaser BlackBird v2
 	    - Kvaser Leaf Pro HS v2
+	    - Kvaser Hybrid CAN/LIN
 	    - Kvaser Hybrid 2xCAN/LIN
+	    - Kvaser Hybrid Pro CAN/LIN
 	    - Kvaser Hybrid Pro 2xCAN/LIN
 	    - Kvaser Memorator 2xHS v2
 	    - Kvaser Memorator Pro 2xHS v2
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index b2236bf63b41..0cc0fc866a2a 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -87,8 +87,10 @@
 #define USB_U100P_PRODUCT_ID			274
 #define USB_U100S_PRODUCT_ID			275
 #define USB_USBCAN_PRO_4HS_PRODUCT_ID		276
+#define USB_HYBRID_CANLIN_PRODUCT_ID		277
+#define USB_HYBRID_PRO_CANLIN_PRODUCT_ID	278
 #define USB_HYDRA_PRODUCT_ID_END \
-	USB_USBCAN_PRO_4HS_PRODUCT_ID
+	USB_HYBRID_PRO_CANLIN_PRODUCT_ID
 
 static inline bool kvaser_is_leaf(const struct usb_device_id *id)
 {
@@ -195,6 +197,8 @@ static const struct usb_device_id kvaser_usb_table[] = {
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) },
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, kvaser_usb_table);

From c7b0f6887d90665ac1aefd503c2eba1294f83473 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Sun, 9 May 2021 14:43:07 +0200
Subject: [PATCH 0588/3292] can: c_can: remove unused variable struct
 c_can_priv::rxmasked

The member rxmasked of struct c_can_priv is initialized by
c_can_chip_config(), but's it's never used, so remove it.

Link: https://lore.kernel.org/r/20210509124309.30024-2-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c | 1 -
 drivers/net/can/c_can/c_can.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 313793f6922d..1fa47968c2ec 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -599,7 +599,6 @@ static int c_can_chip_config(struct net_device *dev)
 
 	/* Clear all internal status */
 	atomic_set(&priv->tx_active, 0);
-	priv->rxmasked = 0;
 	priv->tx_dir = 0;
 
 	/* set bittiming params */
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 06045f610f0e..517845c4571e 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -205,7 +205,6 @@ struct c_can_priv {
 	struct c_can_raminit raminit_sys;	/* RAMINIT via syscon regmap */
 	void (*raminit)(const struct c_can_priv *priv, bool enable);
 	u32 comm_rcv_high;
-	u32 rxmasked;
 	u32 dlc[];
 };
 

From 2722ac986e93c4cabbefde299d01ed24db40a645 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Fri, 14 May 2021 18:55:47 +0200
Subject: [PATCH 0589/3292] can: c_can: add ethtool support

With commit 132f2d45fb23 ("can: c_can: add support to 64 message
objects") the number of message objects used for reception /
transmission depends on FIFO size.

The ethtools API support allows you to retrieve this info. Driver info
has been added too.

Link: https://lore.kernel.org/r/20210514165549.14365-2-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/Makefile                |  5 +++
 drivers/net/can/c_can/c_can.h                 |  2 +
 drivers/net/can/c_can/c_can_ethtool.c         | 43 +++++++++++++++++++
 .../net/can/c_can/{c_can.c => c_can_main.c}   |  1 +
 4 files changed, 51 insertions(+)
 create mode 100644 drivers/net/can/c_can/c_can_ethtool.c
 rename drivers/net/can/c_can/{c_can.c => c_can_main.c} (99%)

diff --git a/drivers/net/can/c_can/Makefile b/drivers/net/can/c_can/Makefile
index e6a94c948531..6fa3b2b9e4b9 100644
--- a/drivers/net/can/c_can/Makefile
+++ b/drivers/net/can/c_can/Makefile
@@ -4,5 +4,10 @@
 #
 
 obj-$(CONFIG_CAN_C_CAN) += c_can.o
+
+c_can-objs :=
+c_can-objs += c_can_ethtool.o
+c_can-objs += c_can_main.o
+
 obj-$(CONFIG_CAN_C_CAN_PLATFORM) += c_can_platform.o
 obj-$(CONFIG_CAN_C_CAN_PCI) += c_can_pci.o
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 517845c4571e..4247ff80a29c 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -218,4 +218,6 @@ int c_can_power_up(struct net_device *dev);
 int c_can_power_down(struct net_device *dev);
 #endif
 
+void c_can_set_ethtool_ops(struct net_device *dev);
+
 #endif /* C_CAN_H */
diff --git a/drivers/net/can/c_can/c_can_ethtool.c b/drivers/net/can/c_can/c_can_ethtool.c
new file mode 100644
index 000000000000..cd5f07fca2a5
--- /dev/null
+++ b/drivers/net/can/c_can/c_can_ethtool.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021, Dario Binacchi <dariobin@libero.it>
+ */
+
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/netdevice.h>
+#include <linux/can/dev.h>
+
+#include "c_can.h"
+
+static void c_can_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *info)
+{
+	struct c_can_priv *priv = netdev_priv(netdev);
+	struct platform_device *pdev = to_platform_device(priv->device);
+
+	strscpy(info->driver, "c_can", sizeof(info->driver));
+	strscpy(info->bus_info, pdev->name, sizeof(info->bus_info));
+}
+
+static void c_can_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	struct c_can_priv *priv = netdev_priv(netdev);
+
+	ring->rx_max_pending = priv->msg_obj_num;
+	ring->tx_max_pending = priv->msg_obj_num;
+	ring->rx_pending = priv->msg_obj_rx_num;
+	ring->tx_pending = priv->msg_obj_tx_num;
+}
+
+static const struct ethtool_ops c_can_ethtool_ops = {
+	.get_drvinfo = c_can_get_drvinfo,
+	.get_ringparam = c_can_get_ringparam,
+};
+
+void c_can_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &c_can_ethtool_ops;
+}
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can_main.c
similarity index 99%
rename from drivers/net/can/c_can/c_can.c
rename to drivers/net/can/c_can/c_can_main.c
index 1fa47968c2ec..7588f70ca0fe 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can_main.c
@@ -1334,6 +1334,7 @@ int register_c_can_dev(struct net_device *dev)
 
 	dev->flags |= IFF_ECHO;	/* we support local echo */
 	dev->netdev_ops = &c_can_netdev_ops;
+	c_can_set_ethtool_ops(dev);
 
 	err = register_candev(dev);
 	if (!err)

From 20779943a080c5ac9b9e291b76afbac12fdec023 Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Tue, 4 May 2021 13:51:20 +0100
Subject: [PATCH 0590/3292] can: m_can: use bits.h macros for all regmasks

This updates m_can.c to exclusively use GENMASK, FIELD_GET, FIELD_PREP
and FIELD_MAX for regmask ops, as is convention in the current kernel
(far less error-prone, far more concise).

Link: https://lore.kernel.org/r/20210504125123.500553-2-torin@maxiluxsystems.com
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 199 +++++++++++++++-------------------
 1 file changed, 86 insertions(+), 113 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 3cf6de21d19c..5bed59b1083f 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -83,41 +83,30 @@ enum m_can_reg {
 #define MRAM_CFG_LEN	8
 
 /* Core Release Register (CREL) */
-#define CREL_REL_SHIFT		28
-#define CREL_REL_MASK		(0xF << CREL_REL_SHIFT)
-#define CREL_STEP_SHIFT		24
-#define CREL_STEP_MASK		(0xF << CREL_STEP_SHIFT)
-#define CREL_SUBSTEP_SHIFT	20
-#define CREL_SUBSTEP_MASK	(0xF << CREL_SUBSTEP_SHIFT)
+#define CREL_REL_MASK		GENMASK(31, 28)
+#define CREL_STEP_MASK		GENMASK(27, 24)
+#define CREL_SUBSTEP_MASK	GENMASK(23, 20)
 
 /* Data Bit Timing & Prescaler Register (DBTP) */
 #define DBTP_TDC		BIT(23)
-#define DBTP_DBRP_SHIFT		16
-#define DBTP_DBRP_MASK		(0x1f << DBTP_DBRP_SHIFT)
-#define DBTP_DTSEG1_SHIFT	8
-#define DBTP_DTSEG1_MASK	(0x1f << DBTP_DTSEG1_SHIFT)
-#define DBTP_DTSEG2_SHIFT	4
-#define DBTP_DTSEG2_MASK	(0xf << DBTP_DTSEG2_SHIFT)
-#define DBTP_DSJW_SHIFT		0
-#define DBTP_DSJW_MASK		(0xf << DBTP_DSJW_SHIFT)
+#define DBTP_DBRP_MASK		GENMASK(20, 16)
+#define DBTP_DTSEG1_MASK	GENMASK(12, 8)
+#define DBTP_DTSEG2_MASK	GENMASK(7, 4)
+#define DBTP_DSJW_MASK		GENMASK(3, 0)
 
 /* Transmitter Delay Compensation Register (TDCR) */
-#define TDCR_TDCO_SHIFT		8
-#define TDCR_TDCO_MASK		(0x7F << TDCR_TDCO_SHIFT)
-#define TDCR_TDCF_SHIFT		0
-#define TDCR_TDCF_MASK		(0x7F << TDCR_TDCF_SHIFT)
+#define TDCR_TDCO_MASK		GENMASK(14, 8)
+#define TDCR_TDCF_MASK		GENMASK(6, 0)
 
 /* Test Register (TEST) */
 #define TEST_LBCK		BIT(4)
 
 /* CC Control Register(CCCR) */
-#define CCCR_CMR_MASK		0x3
-#define CCCR_CMR_SHIFT		10
+#define CCCR_CMR_MASK		GENMASK(11, 10)
 #define CCCR_CMR_CANFD		0x1
 #define CCCR_CMR_CANFD_BRS	0x2
 #define CCCR_CMR_CAN		0x3
-#define CCCR_CME_MASK		0x3
-#define CCCR_CME_SHIFT		8
+#define CCCR_CME_MASK		GENMASK(9, 8)
 #define CCCR_CME_CAN		0
 #define CCCR_CME_CANFD		0x1
 #define CCCR_CME_CANFD_BRS	0x2
@@ -130,7 +119,7 @@ enum m_can_reg {
 #define CCCR_ASM		BIT(2)
 #define CCCR_CCE		BIT(1)
 #define CCCR_INIT		BIT(0)
-#define CCCR_CANFD		0x10
+#define CCCR_CANFD		BIT(4)
 /* for version >=3.1.x */
 #define CCCR_EFBI		BIT(13)
 #define CCCR_PXHD		BIT(12)
@@ -140,14 +129,10 @@ enum m_can_reg {
 #define CCCR_NISO		BIT(15)
 
 /* Nominal Bit Timing & Prescaler Register (NBTP) */
-#define NBTP_NSJW_SHIFT		25
-#define NBTP_NSJW_MASK		(0x7f << NBTP_NSJW_SHIFT)
-#define NBTP_NBRP_SHIFT		16
-#define NBTP_NBRP_MASK		(0x1ff << NBTP_NBRP_SHIFT)
-#define NBTP_NTSEG1_SHIFT	8
-#define NBTP_NTSEG1_MASK	(0xff << NBTP_NTSEG1_SHIFT)
-#define NBTP_NTSEG2_SHIFT	0
-#define NBTP_NTSEG2_MASK	(0x7f << NBTP_NTSEG2_SHIFT)
+#define NBTP_NSJW_MASK		GENMASK(31, 25)
+#define NBTP_NBRP_MASK		GENMASK(24, 16)
+#define NBTP_NTSEG1_MASK	GENMASK(15, 8)
+#define NBTP_NTSEG2_MASK	GENMASK(6, 0)
 
 /* Timestamp Counter Configuration Register (TSCC) */
 #define TSCC_TCP_MASK		GENMASK(19, 16)
@@ -161,16 +146,14 @@ enum m_can_reg {
 
 /* Error Counter Register(ECR) */
 #define ECR_RP			BIT(15)
-#define ECR_REC_SHIFT		8
-#define ECR_REC_MASK		(0x7f << ECR_REC_SHIFT)
-#define ECR_TEC_SHIFT		0
-#define ECR_TEC_MASK		0xff
+#define ECR_REC_MASK		GENMASK(14, 8)
+#define ECR_TEC_MASK		GENMASK(7, 0)
 
 /* Protocol Status Register(PSR) */
 #define PSR_BO		BIT(7)
 #define PSR_EW		BIT(6)
 #define PSR_EP		BIT(5)
-#define PSR_LEC_MASK	0x7
+#define PSR_LEC_MASK	GENMASK(2, 0)
 
 /* Interrupt Register(IR) */
 #define IR_ALL_INT	0xffffffff
@@ -221,6 +204,7 @@ enum m_can_reg {
 			 IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \
 			 IR_RF1L | IR_RF0L)
 #define IR_ERR_ALL_30X	(IR_ERR_STATE | IR_ERR_BUS_30X)
+
 /* Interrupts for version >= 3.1.x */
 #define IR_ERR_LEC_31X	(IR_PED | IR_PEA)
 #define IR_ERR_BUS_31X      (IR_ERR_LEC_31X | IR_WDI | IR_ELO | IR_BEU | \
@@ -237,58 +221,45 @@ enum m_can_reg {
 #define ILE_EINT0	BIT(0)
 
 /* Rx FIFO 0/1 Configuration (RXF0C/RXF1C) */
-#define RXFC_FWM_SHIFT	24
-#define RXFC_FWM_MASK	(0x7f << RXFC_FWM_SHIFT)
-#define RXFC_FS_SHIFT	16
-#define RXFC_FS_MASK	(0x7f << RXFC_FS_SHIFT)
+#define RXFC_FWM_MASK	GENMASK(30, 24)
+#define RXFC_FS_MASK	GENMASK(22, 16)
 
 /* Rx FIFO 0/1 Status (RXF0S/RXF1S) */
 #define RXFS_RFL	BIT(25)
 #define RXFS_FF		BIT(24)
-#define RXFS_FPI_SHIFT	16
-#define RXFS_FPI_MASK	0x3f0000
-#define RXFS_FGI_SHIFT	8
-#define RXFS_FGI_MASK	0x3f00
-#define RXFS_FFL_MASK	0x7f
+#define RXFS_FPI_MASK	GENMASK(21, 16)
+#define RXFS_FGI_MASK	GENMASK(13, 8)
+#define RXFS_FFL_MASK	GENMASK(6, 0)
 
 /* Rx Buffer / FIFO Element Size Configuration (RXESC) */
 #define M_CAN_RXESC_8BYTES	0x0
 #define M_CAN_RXESC_64BYTES	0x777
 
-/* Tx Buffer Configuration(TXBC) */
-#define TXBC_NDTB_SHIFT		16
-#define TXBC_NDTB_MASK		(0x3f << TXBC_NDTB_SHIFT)
-#define TXBC_TFQS_SHIFT		24
-#define TXBC_TFQS_MASK		(0x3f << TXBC_TFQS_SHIFT)
+/* Tx Buffer Configuration (TXBC) */
+#define TXBC_TFQS_MASK		GENMASK(29, 24)
+#define TXBC_NDTB_MASK		GENMASK(21, 16)
 
 /* Tx FIFO/Queue Status (TXFQS) */
 #define TXFQS_TFQF		BIT(21)
-#define TXFQS_TFQPI_SHIFT	16
-#define TXFQS_TFQPI_MASK	(0x1f << TXFQS_TFQPI_SHIFT)
-#define TXFQS_TFGI_SHIFT	8
-#define TXFQS_TFGI_MASK		(0x1f << TXFQS_TFGI_SHIFT)
-#define TXFQS_TFFL_SHIFT	0
-#define TXFQS_TFFL_MASK		(0x3f << TXFQS_TFFL_SHIFT)
+#define TXFQS_TFQPI_MASK	GENMASK(20, 16)
+#define TXFQS_TFGI_MASK		GENMASK(12, 8)
+#define TXFQS_TFFL_MASK		GENMASK(5, 0)
 
 /* Tx Buffer Element Size Configuration(TXESC) */
 #define TXESC_TBDS_8BYTES	0x0
 #define TXESC_TBDS_64BYTES	0x7
 
 /* Tx Event FIFO Configuration (TXEFC) */
-#define TXEFC_EFS_SHIFT		16
-#define TXEFC_EFS_MASK		(0x3f << TXEFC_EFS_SHIFT)
+#define TXEFC_EFS_MASK		GENMASK(21, 16)
 
 /* Tx Event FIFO Status (TXEFS) */
 #define TXEFS_TEFL		BIT(25)
 #define TXEFS_EFF		BIT(24)
-#define TXEFS_EFGI_SHIFT	8
-#define	TXEFS_EFGI_MASK		(0x1f << TXEFS_EFGI_SHIFT)
-#define TXEFS_EFFL_SHIFT	0
-#define TXEFS_EFFL_MASK		(0x3f << TXEFS_EFFL_SHIFT)
+#define TXEFS_EFGI_MASK		GENMASK(12, 8)
+#define TXEFS_EFFL_MASK		GENMASK(5, 0)
 
 /* Tx Event FIFO Acknowledge (TXEFA) */
-#define TXEFA_EFAI_SHIFT	0
-#define TXEFA_EFAI_MASK		(0x1f << TXEFA_EFAI_SHIFT)
+#define TXEFA_EFAI_MASK		GENMASK(4, 0)
 
 /* Message RAM Configuration (in bytes) */
 #define SIDF_ELEMENT_SIZE	4
@@ -324,13 +295,12 @@ enum m_can_reg {
 #define TX_BUF_EFC		BIT(23)
 #define TX_BUF_FDF		BIT(21)
 #define TX_BUF_BRS		BIT(20)
-#define TX_BUF_MM_SHIFT		24
-#define TX_BUF_MM_MASK		(0xff << TX_BUF_MM_SHIFT)
+#define TX_BUF_MM_MASK		GENMASK(31, 24)
+#define TX_BUF_DLC_MASK		GENMASK(19, 16)
 
 /* Tx event FIFO Element */
 /* E1 */
-#define TX_EVENT_MM_SHIFT	TX_BUF_MM_SHIFT
-#define TX_EVENT_MM_MASK	(0xff << TX_EVENT_MM_SHIFT)
+#define TX_EVENT_MM_MASK	GENMASK(31, 24)
 #define TX_EVENT_TXTS_MASK	GENMASK(15, 0)
 
 static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg)
@@ -449,8 +419,8 @@ static void m_can_clean(struct net_device *net)
 
 		net->stats.tx_errors++;
 		if (cdev->version > 30)
-			putidx = ((m_can_read(cdev, M_CAN_TXFQS) &
-				   TXFQS_TFQPI_MASK) >> TXFQS_TFQPI_SHIFT);
+			putidx = FIELD_GET(TXFQS_TFQPI_MASK,
+					   m_can_read(cdev, M_CAN_TXFQS));
 
 		can_free_echo_skb(cdev->net, putidx, NULL);
 		cdev->tx_skb = NULL;
@@ -490,7 +460,7 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
 	int i;
 
 	/* calculate the fifo get index for where to read data */
-	fgi = (rxfs & RXFS_FGI_MASK) >> RXFS_FGI_SHIFT;
+	fgi = FIELD_GET(RXFS_FGI_MASK, rxfs);
 	dlc = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DLC);
 	if (dlc & RX_BUF_FDF)
 		skb = alloc_canfd_skb(dev, &cf);
@@ -663,8 +633,8 @@ static int __m_can_get_berr_counter(const struct net_device *dev,
 	unsigned int ecr;
 
 	ecr = m_can_read(cdev, M_CAN_ECR);
-	bec->rxerr = (ecr & ECR_REC_MASK) >> ECR_REC_SHIFT;
-	bec->txerr = (ecr & ECR_TEC_MASK) >> ECR_TEC_SHIFT;
+	bec->rxerr = FIELD_GET(ECR_REC_MASK, ecr);
+	bec->txerr = FIELD_GET(ECR_TEC_MASK, ecr);
 
 	return 0;
 }
@@ -1004,24 +974,23 @@ static void m_can_echo_tx_event(struct net_device *dev)
 	m_can_txefs = m_can_read(cdev, M_CAN_TXEFS);
 
 	/* Get Tx Event fifo element count */
-	txe_count = (m_can_txefs & TXEFS_EFFL_MASK) >> TXEFS_EFFL_SHIFT;
+	txe_count = FIELD_GET(TXEFS_EFFL_MASK, m_can_txefs);
 
 	/* Get and process all sent elements */
 	for (i = 0; i < txe_count; i++) {
 		u32 txe, timestamp = 0;
 
 		/* retrieve get index */
-		fgi = (m_can_read(cdev, M_CAN_TXEFS) & TXEFS_EFGI_MASK) >>
-			TXEFS_EFGI_SHIFT;
+		fgi = FIELD_GET(TXEFS_EFGI_MASK, m_can_read(cdev, M_CAN_TXEFS));
 
 		/* get message marker, timestamp */
 		txe = m_can_txe_fifo_read(cdev, fgi, 4);
-		msg_mark = (txe & TX_EVENT_MM_MASK) >> TX_EVENT_MM_SHIFT;
+		msg_mark = FIELD_GET(TX_EVENT_MM_MASK, txe);
 		timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe);
 
 		/* ack txe element */
-		m_can_write(cdev, M_CAN_TXEFA, (TXEFA_EFAI_MASK &
-						(fgi << TXEFA_EFAI_SHIFT)));
+		m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK,
+							  fgi));
 
 		/* update stats */
 		m_can_tx_update_stats(cdev, msg_mark, timestamp);
@@ -1147,8 +1116,10 @@ static int m_can_set_bittiming(struct net_device *dev)
 	sjw = bt->sjw - 1;
 	tseg1 = bt->prop_seg + bt->phase_seg1 - 1;
 	tseg2 = bt->phase_seg2 - 1;
-	reg_btp = (brp << NBTP_NBRP_SHIFT) | (sjw << NBTP_NSJW_SHIFT) |
-		(tseg1 << NBTP_NTSEG1_SHIFT) | (tseg2 << NBTP_NTSEG2_SHIFT);
+	reg_btp = FIELD_PREP(NBTP_NBRP_MASK, brp) |
+		  FIELD_PREP(NBTP_NSJW_MASK, sjw) |
+		  FIELD_PREP(NBTP_NTSEG1_MASK, tseg1) |
+		  FIELD_PREP(NBTP_NTSEG2_MASK, tseg2);
 	m_can_write(cdev, M_CAN_NBTP, reg_btp);
 
 	if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) {
@@ -1185,13 +1156,13 @@ static int m_can_set_bittiming(struct net_device *dev)
 
 			reg_btp |= DBTP_TDC;
 			m_can_write(cdev, M_CAN_TDCR,
-				    tdco << TDCR_TDCO_SHIFT);
+				    FIELD_PREP(TDCR_TDCO_MASK, tdco));
 		}
 
-		reg_btp |= (brp << DBTP_DBRP_SHIFT) |
-			(sjw << DBTP_DSJW_SHIFT) |
-			(tseg1 << DBTP_DTSEG1_SHIFT) |
-			(tseg2 << DBTP_DTSEG2_SHIFT);
+		reg_btp = FIELD_PREP(NBTP_NBRP_MASK, brp) |
+			  FIELD_PREP(NBTP_NSJW_MASK, sjw) |
+			  FIELD_PREP(NBTP_NTSEG1_MASK, tseg1) |
+			  FIELD_PREP(NBTP_NTSEG2_MASK, tseg2);
 
 		m_can_write(cdev, M_CAN_DBTP, reg_btp);
 	}
@@ -1224,13 +1195,14 @@ static void m_can_chip_config(struct net_device *dev)
 
 	if (cdev->version == 30) {
 		/* only support one Tx Buffer currently */
-		m_can_write(cdev, M_CAN_TXBC, (1 << TXBC_NDTB_SHIFT) |
+		m_can_write(cdev, M_CAN_TXBC, FIELD_PREP(TXBC_NDTB_MASK, 1) |
 			    cdev->mcfg[MRAM_TXB].off);
 	} else {
 		/* TX FIFO is used for newer IP Core versions */
 		m_can_write(cdev, M_CAN_TXBC,
-			    (cdev->mcfg[MRAM_TXB].num << TXBC_TFQS_SHIFT) |
-			    (cdev->mcfg[MRAM_TXB].off));
+			    FIELD_PREP(TXBC_TFQS_MASK,
+				       cdev->mcfg[MRAM_TXB].num) |
+			    cdev->mcfg[MRAM_TXB].off);
 	}
 
 	/* support 64 bytes payload */
@@ -1238,23 +1210,24 @@ static void m_can_chip_config(struct net_device *dev)
 
 	/* TX Event FIFO */
 	if (cdev->version == 30) {
-		m_can_write(cdev, M_CAN_TXEFC, (1 << TXEFC_EFS_SHIFT) |
+		m_can_write(cdev, M_CAN_TXEFC,
+			    FIELD_PREP(TXEFC_EFS_MASK, 1) |
 			    cdev->mcfg[MRAM_TXE].off);
 	} else {
 		/* Full TX Event FIFO is used */
 		m_can_write(cdev, M_CAN_TXEFC,
-			    ((cdev->mcfg[MRAM_TXE].num << TXEFC_EFS_SHIFT)
-			     & TXEFC_EFS_MASK) |
+			    FIELD_PREP(TXEFC_EFS_MASK,
+				       cdev->mcfg[MRAM_TXE].num) |
 			    cdev->mcfg[MRAM_TXE].off);
 	}
 
 	/* rx fifo configuration, blocking mode, fifo size 1 */
 	m_can_write(cdev, M_CAN_RXF0C,
-		    (cdev->mcfg[MRAM_RXF0].num << RXFC_FS_SHIFT) |
+		    FIELD_PREP(RXFC_FS_MASK, cdev->mcfg[MRAM_RXF0].num) |
 		    cdev->mcfg[MRAM_RXF0].off);
 
 	m_can_write(cdev, M_CAN_RXF1C,
-		    (cdev->mcfg[MRAM_RXF1].num << RXFC_FS_SHIFT) |
+		    FIELD_PREP(RXFC_FS_MASK, cdev->mcfg[MRAM_RXF1].num) |
 		    cdev->mcfg[MRAM_RXF1].off);
 
 	cccr = m_can_read(cdev, M_CAN_CCCR);
@@ -1264,11 +1237,11 @@ static void m_can_chip_config(struct net_device *dev)
 		/* Version 3.0.x */
 
 		cccr &= ~(CCCR_TEST | CCCR_MON | CCCR_DAR |
-			  (CCCR_CMR_MASK << CCCR_CMR_SHIFT) |
-			  (CCCR_CME_MASK << CCCR_CME_SHIFT));
+			  FIELD_PREP(CCCR_CMR_MASK, FIELD_MAX(CCCR_CMR_MASK)) |
+			  FIELD_PREP(CCCR_CME_MASK, FIELD_MAX(CCCR_CME_MASK)));
 
 		if (cdev->can.ctrlmode & CAN_CTRLMODE_FD)
-			cccr |= CCCR_CME_CANFD_BRS << CCCR_CME_SHIFT;
+			cccr |= FIELD_PREP(CCCR_CME_MASK, CCCR_CME_CANFD_BRS);
 
 	} else {
 		/* Version 3.1.x or 3.2.x */
@@ -1372,8 +1345,8 @@ static int m_can_check_core_release(struct m_can_classdev *cdev)
 	 * Example: Version 3.2.1 => rel = 3; step = 2; substep = 1;
 	 */
 	crel_reg = m_can_read(cdev, M_CAN_CREL);
-	rel = (u8)((crel_reg & CREL_REL_MASK) >> CREL_REL_SHIFT);
-	step = (u8)((crel_reg & CREL_STEP_MASK) >> CREL_STEP_SHIFT);
+	rel = (u8)FIELD_GET(CREL_REL_MASK, crel_reg);
+	step = (u8)FIELD_GET(CREL_STEP_MASK, crel_reg);
 
 	if (rel == 3) {
 		/* M_CAN v3.x.y: create return value */
@@ -1593,16 +1566,16 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
 
 		if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) {
 			cccr = m_can_read(cdev, M_CAN_CCCR);
-			cccr &= ~(CCCR_CMR_MASK << CCCR_CMR_SHIFT);
+			cccr &= ~CCCR_CMR_MASK;
 			if (can_is_canfd_skb(skb)) {
 				if (cf->flags & CANFD_BRS)
-					cccr |= CCCR_CMR_CANFD_BRS <<
-						CCCR_CMR_SHIFT;
+					cccr |= FIELD_PREP(CCCR_CMR_MASK,
+							   CCCR_CMR_CANFD_BRS);
 				else
-					cccr |= CCCR_CMR_CANFD <<
-						CCCR_CMR_SHIFT;
+					cccr |= FIELD_PREP(CCCR_CMR_MASK,
+							   CCCR_CMR_CANFD);
 			} else {
-				cccr |= CCCR_CMR_CAN << CCCR_CMR_SHIFT;
+				cccr |= FIELD_PREP(CCCR_CMR_MASK, CCCR_CMR_CAN);
 			}
 			m_can_write(cdev, M_CAN_CCCR, cccr);
 		}
@@ -1629,8 +1602,8 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
 		}
 
 		/* get put index for frame */
-		putidx = ((m_can_read(cdev, M_CAN_TXFQS) & TXFQS_TFQPI_MASK)
-			  >> TXFQS_TFQPI_SHIFT);
+		putidx = FIELD_GET(TXFQS_TFQPI_MASK,
+				   m_can_read(cdev, M_CAN_TXFQS));
 		/* Write ID Field to FIFO Element */
 		m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, id);
 
@@ -1648,9 +1621,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
 		 * sending the correct echo frame
 		 */
 		m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DLC,
-				 ((putidx << TX_BUF_MM_SHIFT) &
-				  TX_BUF_MM_MASK) |
-				 (can_fd_len2dlc(cf->len) << 16) |
+				 FIELD_PREP(TX_BUF_MM_MASK, putidx) |
+				 FIELD_PREP(TX_BUF_DLC_MASK,
+					    can_fd_len2dlc(cf->len)) |
 				 fdflags | TX_BUF_EFC);
 
 		for (i = 0; i < cf->len; i += 4)
@@ -1810,11 +1783,11 @@ static void m_can_of_parse_mram(struct m_can_classdev *cdev,
 	cdev->mcfg[MRAM_RXF0].off = cdev->mcfg[MRAM_XIDF].off +
 		cdev->mcfg[MRAM_XIDF].num * XIDF_ELEMENT_SIZE;
 	cdev->mcfg[MRAM_RXF0].num = mram_config_vals[3] &
-		(RXFC_FS_MASK >> RXFC_FS_SHIFT);
+		FIELD_MAX(RXFC_FS_MASK);
 	cdev->mcfg[MRAM_RXF1].off = cdev->mcfg[MRAM_RXF0].off +
 		cdev->mcfg[MRAM_RXF0].num * RXF0_ELEMENT_SIZE;
 	cdev->mcfg[MRAM_RXF1].num = mram_config_vals[4] &
-		(RXFC_FS_MASK >> RXFC_FS_SHIFT);
+		FIELD_MAX(RXFC_FS_MASK);
 	cdev->mcfg[MRAM_RXB].off = cdev->mcfg[MRAM_RXF1].off +
 		cdev->mcfg[MRAM_RXF1].num * RXF1_ELEMENT_SIZE;
 	cdev->mcfg[MRAM_RXB].num = mram_config_vals[5];
@@ -1824,7 +1797,7 @@ static void m_can_of_parse_mram(struct m_can_classdev *cdev,
 	cdev->mcfg[MRAM_TXB].off = cdev->mcfg[MRAM_TXE].off +
 		cdev->mcfg[MRAM_TXE].num * TXE_ELEMENT_SIZE;
 	cdev->mcfg[MRAM_TXB].num = mram_config_vals[7] &
-		(TXBC_NDTB_MASK >> TXBC_NDTB_SHIFT);
+		FIELD_MAX(TXBC_NDTB_MASK);
 
 	dev_dbg(cdev->dev,
 		"sidf 0x%x %d xidf 0x%x %d rxf0 0x%x %d rxf1 0x%x %d rxb 0x%x %d txe 0x%x %d txb 0x%x %d\n",

From 38395f302f4d63b8373d2340fddd225f7b644882 Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Tue, 4 May 2021 13:51:21 +0100
Subject: [PATCH 0591/3292] can: m_can: clean up CCCR reg defs, order by revs

Ensures that the different CCCR regmasks for m_can revs 3.0.x, 3.1.x,
3.2.x and 3.3.x are clearly distinguishable. Removes incorrect
CCCR_CANFD define. Adds bit fields UTSU and WMM for rev 3.3.x, for
completeness.

Link: https://lore.kernel.org/r/20210504125123.500553-3-torin@maxiluxsystems.com
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 5bed59b1083f..cee542c0fdd5 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -102,14 +102,6 @@ enum m_can_reg {
 #define TEST_LBCK		BIT(4)
 
 /* CC Control Register(CCCR) */
-#define CCCR_CMR_MASK		GENMASK(11, 10)
-#define CCCR_CMR_CANFD		0x1
-#define CCCR_CMR_CANFD_BRS	0x2
-#define CCCR_CMR_CAN		0x3
-#define CCCR_CME_MASK		GENMASK(9, 8)
-#define CCCR_CME_CAN		0
-#define CCCR_CME_CANFD		0x1
-#define CCCR_CME_CANFD_BRS	0x2
 #define CCCR_TXP		BIT(14)
 #define CCCR_TEST		BIT(7)
 #define CCCR_DAR		BIT(6)
@@ -119,14 +111,25 @@ enum m_can_reg {
 #define CCCR_ASM		BIT(2)
 #define CCCR_CCE		BIT(1)
 #define CCCR_INIT		BIT(0)
-#define CCCR_CANFD		BIT(4)
+/* for version 3.0.x */
+#define CCCR_CMR_MASK		GENMASK(11, 10)
+#define CCCR_CMR_CANFD		0x1
+#define CCCR_CMR_CANFD_BRS	0x2
+#define CCCR_CMR_CAN		0x3
+#define CCCR_CME_MASK		GENMASK(9, 8)
+#define CCCR_CME_CAN		0
+#define CCCR_CME_CANFD		0x1
+#define CCCR_CME_CANFD_BRS	0x2
 /* for version >=3.1.x */
 #define CCCR_EFBI		BIT(13)
 #define CCCR_PXHD		BIT(12)
 #define CCCR_BRSE		BIT(9)
 #define CCCR_FDOE		BIT(8)
-/* only for version >=3.2.x */
+/* for version >=3.2.x */
 #define CCCR_NISO		BIT(15)
+/* for version >=3.3.x */
+#define CCCR_WMM		BIT(11)
+#define CCCR_UTSU		BIT(10)
 
 /* Nominal Bit Timing & Prescaler Register (NBTP) */
 #define NBTP_NSJW_MASK		GENMASK(31, 25)

From 0f31571668914f421dab628c45eeb391aaa127ef Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Tue, 4 May 2021 13:51:22 +0100
Subject: [PATCH 0592/3292] can: m_can: make TXESC, RXESC config more explicit

Introduce masks for the three RXESC fields (RBDS, F1DS, F0DS) and the
one TXESC field (TBDS). Update m_can_chip_config() to explicitly set all
four fields to the 64-byte option (0x7) (and these defs are renamed to
be more concise).

This is an improvement in maintainability, and also makes it easier to
implement more flexible configuration of the M_CAN buffers in the
future.

Link: https://lore.kernel.org/r/20210504125123.500553-4-torin@maxiluxsystems.com
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index cee542c0fdd5..ce7722229964 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -235,8 +235,10 @@ enum m_can_reg {
 #define RXFS_FFL_MASK	GENMASK(6, 0)
 
 /* Rx Buffer / FIFO Element Size Configuration (RXESC) */
-#define M_CAN_RXESC_8BYTES	0x0
-#define M_CAN_RXESC_64BYTES	0x777
+#define RXESC_RBDS_MASK		GENMASK(10, 8)
+#define RXESC_F1DS_MASK		GENMASK(6, 4)
+#define RXESC_F0DS_MASK		GENMASK(2, 0)
+#define RXESC_64B		0x7
 
 /* Tx Buffer Configuration (TXBC) */
 #define TXBC_TFQS_MASK		GENMASK(29, 24)
@@ -249,8 +251,8 @@ enum m_can_reg {
 #define TXFQS_TFFL_MASK		GENMASK(5, 0)
 
 /* Tx Buffer Element Size Configuration(TXESC) */
-#define TXESC_TBDS_8BYTES	0x0
-#define TXESC_TBDS_64BYTES	0x7
+#define TXESC_TBDS_MASK		GENMASK(2, 0)
+#define TXESC_TBDS_64B		0x7
 
 /* Tx Event FIFO Configuration (TXEFC) */
 #define TXEFC_EFS_MASK		GENMASK(21, 16)
@@ -1191,7 +1193,10 @@ static void m_can_chip_config(struct net_device *dev)
 	m_can_config_endisable(cdev, true);
 
 	/* RX Buffer/FIFO Element Size 64 bytes data field */
-	m_can_write(cdev, M_CAN_RXESC, M_CAN_RXESC_64BYTES);
+	m_can_write(cdev, M_CAN_RXESC,
+		    FIELD_PREP(RXESC_RBDS_MASK, RXESC_64B) |
+		    FIELD_PREP(RXESC_F1DS_MASK, RXESC_64B) |
+		    FIELD_PREP(RXESC_F0DS_MASK, RXESC_64B));
 
 	/* Accept Non-matching Frames Into FIFO 0 */
 	m_can_write(cdev, M_CAN_GFC, 0x0);
@@ -1209,7 +1214,8 @@ static void m_can_chip_config(struct net_device *dev)
 	}
 
 	/* support 64 bytes payload */
-	m_can_write(cdev, M_CAN_TXESC, TXESC_TBDS_64BYTES);
+	m_can_write(cdev, M_CAN_TXESC,
+		    FIELD_PREP(TXESC_TBDS_MASK, TXESC_TBDS_64B));
 
 	/* TX Event FIFO */
 	if (cdev->version == 30) {

From 50fe7547b637b3cf51876ce9ec829e79d76e5de0 Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Tue, 4 May 2021 13:51:23 +0100
Subject: [PATCH 0593/3292] can: m_can: fix whitespace in a few comments

Fixes whitespace in comments titling sections of register masks.

Link: https://lore.kernel.org/r/20210504125123.500553-5-torin@maxiluxsystems.com
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index ce7722229964..bba2a449ac70 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -101,7 +101,7 @@ enum m_can_reg {
 /* Test Register (TEST) */
 #define TEST_LBCK		BIT(4)
 
-/* CC Control Register(CCCR) */
+/* CC Control Register (CCCR) */
 #define CCCR_TXP		BIT(14)
 #define CCCR_TEST		BIT(7)
 #define CCCR_DAR		BIT(6)
@@ -147,18 +147,18 @@ enum m_can_reg {
 /* Timestamp Counter Value Register (TSCV) */
 #define TSCV_TSC_MASK		GENMASK(15, 0)
 
-/* Error Counter Register(ECR) */
+/* Error Counter Register (ECR) */
 #define ECR_RP			BIT(15)
 #define ECR_REC_MASK		GENMASK(14, 8)
 #define ECR_TEC_MASK		GENMASK(7, 0)
 
-/* Protocol Status Register(PSR) */
+/* Protocol Status Register (PSR) */
 #define PSR_BO		BIT(7)
 #define PSR_EW		BIT(6)
 #define PSR_EP		BIT(5)
 #define PSR_LEC_MASK	GENMASK(2, 0)
 
-/* Interrupt Register(IR) */
+/* Interrupt Register (IR) */
 #define IR_ALL_INT	0xffffffff
 
 /* Renamed bits for versions > 3.1.x */
@@ -250,7 +250,7 @@ enum m_can_reg {
 #define TXFQS_TFGI_MASK		GENMASK(12, 8)
 #define TXFQS_TFFL_MASK		GENMASK(5, 0)
 
-/* Tx Buffer Element Size Configuration(TXESC) */
+/* Tx Buffer Element Size Configuration (TXESC) */
 #define TXESC_TBDS_MASK		GENMASK(2, 0)
 #define TXESC_TBDS_64B		0x7
 

From 1b932689c77766b68e2ead51ca0fb84ec5bb8965 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 25 May 2021 13:20:11 +0100
Subject: [PATCH 0594/3292] lib: test_scanf: Remove pointless use of type_min()
 with unsigned types

sparse was producing warnings of the form:

 sparse: cast truncates bits from constant value (ffff0001 becomes 1)

There is no actual problem here. Using type_min() on an unsigned type
results in an (expected) truncation.

However, there is no need to test an unsigned value against type_min().
The minimum value of an unsigned is obviously 0, and any value cast to
an unsigned type is >= 0, so for unsigneds only type_max() need be tested.

This patch also takes the opportunity to clean up the implementation of
simple_numbers_loop() to use a common pattern for the positive and
negative test.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210525122012.6336-2-rf@opensource.cirrus.com
---
 lib/test_scanf.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index 8d577aec6c28..48ff5747a4da 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -187,8 +187,8 @@ static const unsigned long long numbers[] __initconst = {
 #define value_representable_in_type(T, val)					 \
 (is_signed_type(T)								 \
 	? ((long long)(val) >= type_min(T)) && ((long long)(val) <= type_max(T)) \
-	: ((unsigned long long)(val) >= type_min(T)) &&				 \
-	  ((unsigned long long)(val) <= type_max(T)))
+	: ((unsigned long long)(val) <= type_max(T)))
+
 
 #define test_one_number(T, gen_fmt, scan_fmt, val, fn)			\
 do {									\
@@ -204,12 +204,11 @@ do {									\
 	int i;								\
 									\
 	for (i = 0; i < ARRAY_SIZE(numbers); i++) {			\
-		if (!value_representable_in_type(T, numbers[i]))	\
-			continue;					\
+		if (value_representable_in_type(T, numbers[i]))		\
+			test_one_number(T, gen_fmt, scan_fmt,		\
+					numbers[i], fn);		\
 									\
-		test_one_number(T, gen_fmt, scan_fmt, numbers[i], fn);	\
-									\
-		if (is_signed_type(T))					\
+		if (value_representable_in_type(T, -numbers[i]))	\
 			test_one_number(T, gen_fmt, scan_fmt,		\
 					-numbers[i], fn);		\
 	}								\

From d327ea15a305024ef0085252fa3657bbb1ce25f5 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 25 May 2021 13:20:12 +0100
Subject: [PATCH 0595/3292] random32: Fix implicit truncation warning in
 prandom_seed_state()

sparse generates the following warning:

 include/linux/prandom.h:114:45: sparse: sparse: cast truncates bits from
 constant value

This is because the 64-bit seed value is manipulated and then placed in a
u32, causing an implicit cast and truncation. A forced cast to u32 doesn't
prevent this warning, which is reasonable because a typecast doesn't prove
that truncation was expected.

Logical-AND the value with 0xffffffff to make explicit that truncation to
32-bit is intended.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210525122012.6336-3-rf@opensource.cirrus.com
---
 include/linux/prandom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index bbf4b4ad61df..056d31317e49 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -111,7 +111,7 @@ static inline u32 __seed(u32 x, u32 m)
  */
 static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
-	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
+	u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL;
 
 	state->s1 = __seed(i,   2U);
 	state->s2 = __seed(i,   8U);

From 74097a0dcd1e47d3ccdd066422f28300ad508eee Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Thu, 11 Mar 2021 09:53:32 +0200
Subject: [PATCH 0596/3292] net/mlx5e: CT, Remove newline from ct_dbg call

ct_dbg() already adds a newline.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 5da5e5323a44..edf19f1c19ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -918,7 +918,7 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
 	}
 
 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
-		ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
+		ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
 		shared_counter = rev_entry->counter;
 		spin_unlock_bh(&ct_priv->ht_lock);
 

From 7fac5c2eced36f335ee19ff316bd3182fbeda823 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Mon, 19 Apr 2021 15:50:58 +0300
Subject: [PATCH 0597/3292] net/mlx5: CT: Avoid reusing modify header context
 for natted entries

Currently the driver is designed to reuse header modify context entries.
Natted entries will always have a unique modify header, as such the
modify header hashtable lookup is introducing an overhead. When the
hashtable size exceeded 200k entries the tested insertion rate dropped
from ~10k entries/sec to ~300 entries/sec.

Don't use the re-use mechanism when creating modify headers
for natted tuples.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/tc_ct.c    | 50 ++++++++++++++-----
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index edf19f1c19ff..e3b0fd78184e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -150,6 +150,11 @@ struct mlx5_ct_entry {
 	unsigned long flags;
 };
 
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+				 struct mlx5_flow_attr *attr,
+				 struct mlx5e_mod_hdr_handle *mh);
+
 static const struct rhashtable_params cts_ht_params = {
 	.head_offset = offsetof(struct mlx5_ct_entry, node),
 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
@@ -458,8 +463,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
 
 	mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
-	mlx5e_mod_hdr_detach(ct_priv->dev,
-			     ct_priv->mod_hdr_tbl, zone_rule->mh);
+	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
 	kfree(attr);
 }
@@ -686,15 +690,27 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 	if (err)
 		goto err_mapping;
 
-	*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
-				   ct_priv->mod_hdr_tbl,
-				   ct_priv->ns_type,
-				   &mod_acts);
-	if (IS_ERR(*mh)) {
-		err = PTR_ERR(*mh);
-		goto err_mapping;
+	if (nat) {
+		attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
+							    mod_acts.num_actions,
+							    mod_acts.actions);
+		if (IS_ERR(attr->modify_hdr)) {
+			err = PTR_ERR(attr->modify_hdr);
+			goto err_mapping;
+		}
+
+		*mh = NULL;
+	} else {
+		*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
+					   ct_priv->mod_hdr_tbl,
+					   ct_priv->ns_type,
+					   &mod_acts);
+		if (IS_ERR(*mh)) {
+			err = PTR_ERR(*mh);
+			goto err_mapping;
+		}
+		attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
 	}
-	attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
 
 	dealloc_mod_hdr_actions(&mod_acts);
 	return 0;
@@ -705,6 +721,17 @@ err_mapping:
 	return err;
 }
 
+static void
+mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+				 struct mlx5_flow_attr *attr,
+				 struct mlx5e_mod_hdr_handle *mh)
+{
+	if (mh)
+		mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
+	else
+		mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
+}
+
 static int
 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 			  struct flow_rule *flow_rule,
@@ -767,8 +794,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 	return 0;
 
 err_rule:
-	mlx5e_mod_hdr_detach(ct_priv->dev,
-			     ct_priv->mod_hdr_tbl, zone_rule->mh);
+	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
 err_mod_hdr:
 	kfree(attr);

From ed2fe7ba7b9f550ec03e89e3f423bdd97de248d6 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Wed, 10 Mar 2021 15:00:05 +0200
Subject: [PATCH 0598/3292] net/mlx5e: TC: Use bit counts for register mapping

To prepare for next patch where we will use a non-byte
aligned mapping, change all byte counts in register
mapping to bits.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/tc_ct.c    |  6 +-
 .../ethernet/mellanox/mlx5/core/en/tc_ct.h    | 23 +++--
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 86 ++++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/en_tc.h   |  6 +-
 .../mellanox/mlx5/core/lib/fs_chains.c        |  5 +-
 5 files changed, 77 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index e3b0fd78184e..91e7a01e32be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -23,7 +23,7 @@
 #include "en_tc.h"
 #include "en_rep.h"
 
-#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
+#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen)
 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
 #define MLX5_CT_STATE_TRK_BIT BIT(2)
@@ -32,11 +32,11 @@
 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
 
-#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
+#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
 
-#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
+#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
 
 #define ct_dbg(fmt, args...)\
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 69e618d17071..644cf1641cde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -33,15 +33,15 @@ struct mlx5_ct_attr {
 #define zone_to_reg_ct {\
 	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
 	.moffset = 0,\
-	.mlen = 2,\
+	.mlen = 16,\
 	.soffset = MLX5_BYTE_OFF(fte_match_param,\
-				 misc_parameters_2.metadata_reg_c_2) + 2,\
+				 misc_parameters_2.metadata_reg_c_2),\
 }
 
 #define ctstate_to_reg_ct {\
 	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_2,\
-	.moffset = 2,\
-	.mlen = 2,\
+	.moffset = 16,\
+	.mlen = 16,\
 	.soffset = MLX5_BYTE_OFF(fte_match_param,\
 				 misc_parameters_2.metadata_reg_c_2),\
 }
@@ -49,7 +49,7 @@ struct mlx5_ct_attr {
 #define mark_to_reg_ct {\
 	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_3,\
 	.moffset = 0,\
-	.mlen = 4,\
+	.mlen = 32,\
 	.soffset = MLX5_BYTE_OFF(fte_match_param,\
 				 misc_parameters_2.metadata_reg_c_3),\
 }
@@ -57,7 +57,7 @@ struct mlx5_ct_attr {
 #define labels_to_reg_ct {\
 	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_4,\
 	.moffset = 0,\
-	.mlen = 4,\
+	.mlen = 32,\
 	.soffset = MLX5_BYTE_OFF(fte_match_param,\
 				 misc_parameters_2.metadata_reg_c_4),\
 }
@@ -65,7 +65,7 @@ struct mlx5_ct_attr {
 #define fteid_to_reg_ct {\
 	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\
 	.moffset = 0,\
-	.mlen = 4,\
+	.mlen = 32,\
 	.soffset = MLX5_BYTE_OFF(fte_match_param,\
 				 misc_parameters_2.metadata_reg_c_5),\
 }
@@ -73,20 +73,19 @@ struct mlx5_ct_attr {
 #define zone_restore_to_reg_ct {\
 	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\
 	.moffset = 0,\
-	.mlen = (ESW_ZONE_ID_BITS / 8),\
+	.mlen = ESW_ZONE_ID_BITS,\
 	.soffset = MLX5_BYTE_OFF(fte_match_param,\
-				 misc_parameters_2.metadata_reg_c_1) + 3,\
+				 misc_parameters_2.metadata_reg_c_1),\
 }
 
 #define nic_zone_restore_to_reg_ct {\
 	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\
-	.moffset = 2,\
-	.mlen = (ESW_ZONE_ID_BITS / 8),\
+	.moffset = 16,\
+	.mlen = ESW_ZONE_ID_BITS,\
 }
 
 #define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen)
 #define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset)
-#define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8)
 
 #if IS_ENABLED(CONFIG_MLX5_TC_CT)
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 47a9c49b25fd..7d5c9b69ea37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -83,17 +83,17 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
 	[CHAIN_TO_REG] = {
 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
 		.moffset = 0,
-		.mlen = 2,
+		.mlen = 16,
 	},
 	[VPORT_TO_REG] = {
 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
-		.moffset = 2,
-		.mlen = 2,
+		.moffset = 16,
+		.mlen = 16,
 	},
 	[TUNNEL_TO_REG] = {
 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
-		.moffset = 1,
-		.mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8),
+		.moffset = 8,
+		.mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
 		.soffset = MLX5_BYTE_OFF(fte_match_param,
 					 misc_parameters_2.metadata_reg_c_1),
 	},
@@ -110,7 +110,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
 	[NIC_CHAIN_TO_REG] = {
 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
 		.moffset = 0,
-		.mlen = 2,
+		.mlen = 16,
 	},
 	[NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
 };
@@ -128,23 +128,46 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
 void
 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
 			    enum mlx5e_tc_attr_to_reg type,
-			    u32 data,
+			    u32 val,
 			    u32 mask)
 {
+	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
+	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
-	void *headers_c = spec->match_criteria;
-	void *headers_v = spec->match_value;
-	void *fmask, *fval;
+	u32 max_mask = GENMASK(match_len - 1, 0);
+	__be32 curr_mask_be, curr_val_be;
+	u32 curr_mask, curr_val;
 
 	fmask = headers_c + soffset;
 	fval = headers_v + soffset;
 
-	mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
-	data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
+	memcpy(&curr_mask_be, fmask, 4);
+	memcpy(&curr_val_be, fval, 4);
 
-	memcpy(fmask, &mask, match_len);
-	memcpy(fval, &data, match_len);
+	curr_mask = be32_to_cpu(curr_mask_be);
+	curr_val = be32_to_cpu(curr_val_be);
+
+	//move to correct offset
+	WARN_ON(mask > max_mask);
+	mask <<= moffset;
+	val <<= moffset;
+	max_mask <<= moffset;
+
+	//zero val and mask
+	curr_mask &= ~max_mask;
+	curr_val &= ~max_mask;
+
+	//add current to mask
+	curr_mask |= mask;
+	curr_val |= val;
+
+	//back to be32 and write
+	curr_mask_be = cpu_to_be32(curr_mask);
+	curr_val_be = cpu_to_be32(curr_val);
+
+	memcpy(fmask, &curr_mask_be, 4);
+	memcpy(fval, &curr_val_be, 4);
 
 	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
 }
@@ -152,23 +175,28 @@ mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
 void
 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
 				enum mlx5e_tc_attr_to_reg type,
-				u32 *data,
+				u32 *val,
 				u32 *mask)
 {
+	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
 	int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
+	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
 	int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
-	void *headers_c = spec->match_criteria;
-	void *headers_v = spec->match_value;
-	void *fmask, *fval;
+	u32 max_mask = GENMASK(match_len - 1, 0);
+	__be32 curr_mask_be, curr_val_be;
+	u32 curr_mask, curr_val;
 
 	fmask = headers_c + soffset;
 	fval = headers_v + soffset;
 
-	memcpy(mask, fmask, match_len);
-	memcpy(data, fval, match_len);
+	memcpy(&curr_mask_be, fmask, 4);
+	memcpy(&curr_val_be, fval, 4);
 
-	*mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8))));
-	*data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8))));
+	curr_mask = be32_to_cpu(curr_mask_be);
+	curr_val = be32_to_cpu(curr_val_be);
+
+	*mask = (curr_mask >> moffset) & max_mask;
+	*val = (curr_val >> moffset) & max_mask;
 }
 
 int
@@ -192,13 +220,13 @@ mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
 		 (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
 
 	/* Firmware has 5bit length field and 0 means 32bits */
-	if (mlen == 4)
+	if (mlen == 32)
 		mlen = 0;
 
 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
 	MLX5_SET(set_action_in, modact, field, mfield);
-	MLX5_SET(set_action_in, modact, offset, moffset * 8);
-	MLX5_SET(set_action_in, modact, length, mlen * 8);
+	MLX5_SET(set_action_in, modact, offset, moffset);
+	MLX5_SET(set_action_in, modact, length, mlen);
 	MLX5_SET(set_action_in, modact, data, data);
 	err = mod_hdr_acts->num_actions;
 	mod_hdr_acts->num_actions++;
@@ -296,13 +324,13 @@ void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
 	modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ);
 
 	/* Firmware has 5bit length field and 0 means 32bits */
-	if (mlen == 4)
+	if (mlen == 32)
 		mlen = 0;
 
 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
 	MLX5_SET(set_action_in, modact, field, mfield);
-	MLX5_SET(set_action_in, modact, offset, moffset * 8);
-	MLX5_SET(set_action_in, modact, length, mlen * 8);
+	MLX5_SET(set_action_in, modact, offset, moffset);
+	MLX5_SET(set_action_in, modact, length, mlen);
 	MLX5_SET(set_action_in, modact, data, data);
 }
 
@@ -5080,7 +5108,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 		tc_skb_ext->chain = chain;
 
-		zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
+		zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
 			ESW_ZONE_ID_MASK;
 
 		if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 25c091795bcd..3534d14d7d5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -198,10 +198,10 @@ enum mlx5e_tc_attr_to_reg {
 
 struct mlx5e_tc_attr_to_reg_mapping {
 	int mfield; /* rewrite field */
-	int moffset; /* offset of mfield */
-	int mlen; /* bytes to rewrite/match */
+	int moffset; /* bit offset of mfield */
+	int mlen; /* bits to rewrite/match */
 
-	int soffset; /* offset of spec for match */
+	int soffset; /* byte offset of spec for match */
 };
 
 extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 00ef10a1a9f8..4c60c540bf9d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -336,9 +336,10 @@ create_chain_restore(struct fs_chain *chain)
 	MLX5_SET(set_action_in, modact, field,
 		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield);
 	MLX5_SET(set_action_in, modact, offset,
-		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset * 8);
+		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset);
 	MLX5_SET(set_action_in, modact, length,
-		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen * 8);
+		 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ?
+		 0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen);
 	MLX5_SET(set_action_in, modact, data, chain->id);
 	mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
 					   1, modact);

From b973cf32453f78d8661a640d0a0167d1d41ea331 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@nvidia.com>
Date: Mon, 23 Nov 2020 14:48:22 -0600
Subject: [PATCH 0599/3292] net/mlx5e: TC: Reserved bit 31 of REG_C1 for IPsec
 offload

Currently ASAP features fully utilize all the bits of the CQE's flow tag
and ft_metadata field. The flow tag field cannot be used because the
flow table tagging in FTE does not allow partial write.

We agree to reserve bit 31 of CQE's ft_metadata for IPsec to avoid
ASAP CT from dropping IPsec offloaded packet

Here is the new bit layout of REG_C1. Tunnel option id is reduced to
11 bits:
< IPSEC MARKER (1) | ESW_TUN_ID(12) | ESW_TUN_OPTS(11) | ESW_ZONE_ID(8) >

Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Signed-off-by: Raed Salem <raeds@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Paul Blakey <paulb@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h |  2 +-
 include/linux/mlx5/eswitch.h                    | 17 ++++++++++-------
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 6cdc52d50a48..8cef4e7cfa4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -617,7 +617,7 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
 			      struct mlx5e_tc_update_priv *tc_priv)
 {
 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
-	u32 tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
+	u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
 
 	if (chain) {
 		struct mlx5_rep_uplink_priv *uplink_priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 3534d14d7d5c..721093b55acc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -129,7 +129,7 @@ struct tunnel_match_enc_opts {
  */
 #define TUNNEL_INFO_BITS 12
 #define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
-#define ENC_OPTS_BITS 12
+#define ENC_OPTS_BITS 11
 #define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
 #define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
 #define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 17109b65c1ac..bc7db2e059eb 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -98,10 +98,11 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
 					    u16 vport_num);
 
 /* Reg C1 usage:
- * Reg C1 = < ESW_TUN_ID(12) | ESW_TUN_OPTS(12) | ESW_ZONE_ID(8) >
+ * Reg C1 = < Reserved(1) | ESW_TUN_ID(12) | ESW_TUN_OPTS(11) | ESW_ZONE_ID(8) >
  *
- * Highest 12 bits of reg c1 is the encapsulation tunnel id, next 12 bits is
- * encapsulation tunnel options, and the lowest 8 bits are used for zone id.
+ * Highest bit is reserved for other offloads as marker bit, next 12 bits of reg c1
+ * is the encapsulation tunnel id, next 11 bits is encapsulation tunnel options,
+ * and the lowest 8 bits are used for zone id.
  *
  * Zone id is used to restore CT flow when packet misses on chain.
  *
@@ -109,16 +110,18 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
  * on miss and to support inner header rewrite by means of implicit chain 0
  * flows.
  */
+#define ESW_RESERVED_BITS 1
 #define ESW_ZONE_ID_BITS 8
-#define ESW_TUN_OPTS_BITS 12
+#define ESW_TUN_OPTS_BITS 11
 #define ESW_TUN_ID_BITS 12
 #define ESW_TUN_OPTS_OFFSET ESW_ZONE_ID_BITS
 #define ESW_TUN_OFFSET ESW_TUN_OPTS_OFFSET
 #define ESW_ZONE_ID_MASK GENMASK(ESW_ZONE_ID_BITS - 1, 0)
-#define ESW_TUN_OPTS_MASK GENMASK(32 - ESW_TUN_ID_BITS - 1, ESW_TUN_OPTS_OFFSET)
-#define ESW_TUN_MASK GENMASK(31, ESW_TUN_OFFSET)
+#define ESW_TUN_OPTS_MASK GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, ESW_TUN_OPTS_OFFSET)
+#define ESW_TUN_MASK GENMASK(31 - ESW_RESERVED_BITS, ESW_TUN_OFFSET)
 #define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */
-#define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT 0xFFF /* 0xFFF is a reserved mapping */
+/* 0x7FF is a reserved mapping */
+#define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT GENMASK(ESW_TUN_OPTS_BITS - 1, 0)
 #define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \
 				       ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT)
 #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK

From c07274ab1ab2c38fb128e32643c22c89cb319384 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@nvidia.com>
Date: Tue, 15 Dec 2020 10:58:54 -0600
Subject: [PATCH 0600/3292] net/mlx5e: IPsec/rep_tc: Fix rep_tc_update_skb
 drops IPsec packet

rep_tc copy REG_C1 to REG_B. IPsec crypto utilizes the whole REG_B
register with BIT31 as IPsec marker. rep_tc_update_skb drops
IPsec because it thought REG_B contains bad value.

In previous patch, BIT 31 of REG_C1 is reserved for IPsec.
Skip the rep_tc_update_skb if BIT31 of REG_B is set.

Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index f90894eea9e0..5346271974f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1310,7 +1310,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	if (rep->vlan && skb_vlan_tag_present(skb))
 		skb_vlan_pop(skb);
 
-	if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) {
+	if (unlikely(!mlx5_ipsec_is_rx_flow(cqe) &&
+		     !mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))) {
 		dev_kfree_skb_any(skb);
 		goto free_wqe;
 	}
@@ -1367,7 +1368,8 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 
-	if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) {
+	if (unlikely(!mlx5_ipsec_is_rx_flow(cqe) &&
+		     !mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))) {
 		dev_kfree_skb_any(skb);
 		goto mpwrq_cqe_out;
 	}

From 2ef9c7c613cfed2bba64dab194a5649b9c1e6685 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Thu, 22 Apr 2021 18:34:57 +0300
Subject: [PATCH 0601/3292] net/mlx5e: RX, Remove unnecessary check in RX CQE
 compression handling

There are two reasons for exiting mlx5e_decompress_cqes_cont():
1. The compression session is completed (cqd.left == 0).
2. The budget is exhausted (work_done == budget).

If after calling mlx5e_decompress_cqes_cont() we have cqd.left > 0,
it necessarily implies that budget is exhausted.

The first part of the complex condition is covered by the second,
hence we remove it here.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5346271974f5..e88429356018 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1560,7 +1560,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 
 	if (rq->cqd.left) {
 		work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
-		if (rq->cqd.left || work_done >= budget)
+		if (work_done >= budget)
 			goto out;
 	}
 

From b72ce870f57ea7ddd1c03b000b31bb20ab52f44b Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 23 Mar 2021 03:08:56 +0200
Subject: [PATCH 0602/3292] net/mlx5: DR, Remove unused field of send_ring
 struct

Remove unused field of struct mlx5dr_send_ring

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 67460c42a99b..7600004d79a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -1252,7 +1252,6 @@ struct mlx5dr_send_ring {
 	u32 tx_head;
 	void *buf;
 	u32 buf_size;
-	struct ib_wc wc[MAX_SEND_CQE];
 	u8 sync_buff[MIN_READ_SYNC];
 	struct mlx5dr_mr *sync_mr;
 	spinlock_t lock; /* Protect the data path of the send ring */

From e01b58e9d5c47d027086f35cdd5fc953c6f66c4f Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Tue, 30 Mar 2021 20:59:50 +0300
Subject: [PATCH 0603/3292] net/mlx5: Add case for FS_FT_NIC_TX FT in
 MLX5_CAP_FLOWTABLE_TYPE

Commit 16f1c5bb3ed7 ("net/mlx5: Check device capability for maximum flow
counters") added MLX5_CAP_FLOWTABLE_TYPE but forgot to account
for FS_FT_NIC_TX case in the expression.

Although the expression will return 1 for this case instead of the
actual cap, there isn't currently no known side affects of
missing this case.

Add the FS_FT_NIC_TX case.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index e577a2c424af..7317cdeab661 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -331,6 +331,7 @@ void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
 
 #define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) (		\
 	(type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) :		\
+	(type == FS_FT_NIC_TX) ? MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) :		\
 	(type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) :		\
 	(type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) :		\
 	(type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) :		\

From 04745afb2ae3ee496d339a5511291def2be5c7c1 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Mon, 8 Mar 2021 14:20:24 +0200
Subject: [PATCH 0604/3292] net/mlx5: Move table size calculation to steering
 cmd layer

Currently the table size is calculated by the fs_core layer. However, each
steering cmd instance has a different allocation logic. FW steering uses
a predefined pools of multiple sizes. SW steering doesn't have a pool,
and can allocate any size of tables.

Move the table size calculation to the steering cmd layer as a pre-step
for moving fs_chains pool logic globally to firmware steering, and
increasing table sizes for software steering. In addition, change the
size parameter to absolute size to allow the special zero value to
mean "get next available maximum size".

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c       | 10 ++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h       |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c      |  8 ++------
 .../net/ethernet/mellanox/mlx5/core/steering/fs_dr.c   |  6 ++++--
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 8e06731d3cb3..94712a10ef9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -49,9 +49,11 @@ static int mlx5_cmd_stub_update_root_ft(struct mlx5_flow_root_namespace *ns,
 
 static int mlx5_cmd_stub_create_flow_table(struct mlx5_flow_root_namespace *ns,
 					   struct mlx5_flow_table *ft,
-					   unsigned int log_size,
+					   unsigned int size,
 					   struct mlx5_flow_table *next_ft)
 {
+	ft->max_fte = size ? roundup_pow_of_two(size) : 1;
+
 	return 0;
 }
 
@@ -181,7 +183,7 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 
 static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
-				      unsigned int log_size,
+				      unsigned int size,
 				      struct mlx5_flow_table *next_ft)
 {
 	int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
@@ -190,8 +192,12 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
+	unsigned int log_size = 0;
 	int err;
 
+	log_size = size ? ilog2(roundup_pow_of_two(size)) : 0;
+	ft->max_fte = 1 << log_size;
+
 	MLX5_SET(create_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index d62de642eca9..c2e102ed82ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -38,7 +38,7 @@
 struct mlx5_flow_cmds {
 	int (*create_flow_table)(struct mlx5_flow_root_namespace *ns,
 				 struct mlx5_flow_table *ft,
-				 unsigned int log_size,
+				 unsigned int size,
 				 struct mlx5_flow_table *next_ft);
 	int (*destroy_flow_table)(struct mlx5_flow_root_namespace *ns,
 				  struct mlx5_flow_table *ft);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f74d2c834037..378990c933e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -752,7 +752,7 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f
 	return fg;
 }
 
-static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
+static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport,
 						enum fs_flow_table_type table_type,
 						enum fs_flow_table_op_mod op_mod,
 						u32 flags)
@@ -775,7 +775,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
 	ft->op_mod = op_mod;
 	ft->type = table_type;
 	ft->vport = vport;
-	ft->max_fte = max_fte;
 	ft->flags = flags;
 	INIT_LIST_HEAD(&ft->fwd_rules);
 	mutex_init(&ft->lock);
@@ -1070,7 +1069,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	struct mlx5_flow_table *next_ft;
 	struct fs_prio *fs_prio = NULL;
 	struct mlx5_flow_table *ft;
-	int log_table_sz;
 	int err;
 
 	if (!root) {
@@ -1101,7 +1099,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	 */
 	ft = alloc_flow_table(ft_attr->level,
 			      vport,
-			      ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
 			      root->table_type,
 			      op_mod, ft_attr->flags);
 	if (IS_ERR(ft)) {
@@ -1110,12 +1107,11 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	}
 
 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
-	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = unmanaged ? ft_attr->next_ft :
 			      find_next_chained_ft(fs_prio);
 	ft->def_miss_action = ns->def_miss_action;
 	ft->ns = ns;
-	err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
+	err = root->cmds->create_flow_table(root, ft, ft_attr->max_fte, next_ft);
 	if (err)
 		goto free_ft;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 96c39a17d026..ee45d698cd9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -62,7 +62,7 @@ static int set_miss_action(struct mlx5_flow_root_namespace *ns,
 
 static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
 					 struct mlx5_flow_table *ft,
-					 unsigned int log_size,
+					 unsigned int size,
 					 struct mlx5_flow_table *next_ft)
 {
 	struct mlx5dr_table *tbl;
@@ -71,7 +71,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
 
 	if (mlx5_dr_is_fw_table(ft->flags))
 		return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft,
-								    log_size,
+								    size,
 								    next_ft);
 	flags = ft->flags;
 	/* turn off encap/decap if not supported for sw-str by fw */
@@ -97,6 +97,8 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
 		}
 	}
 
+	ft->max_fte = size ? roundup_pow_of_two(size) : 1;
+
 	return 0;
 }
 

From 4a98544d182761873381d46bb1a498703ca85bf0 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Mon, 8 Mar 2021 14:16:02 +0200
Subject: [PATCH 0605/3292] net/mlx5: Move chains ft pool to be used by all
 firmware steering

Firmware FT pool is per device, but the software tracking of this pool
only services fs_chains users, and if another layer takes a flow table,
the pool will not be updated, and fs_chains will fail creating a flow
table, with no recovery till the flow table is returned.

Move FT pool to be global per device, and stored at the cmd level,
so all layers can use it.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 25 ++++--
 .../net/ethernet/mellanox/mlx5/core/fs_core.c | 27 ++++--
 .../ethernet/mellanox/mlx5/core/fs_ft_pool.c  | 83 +++++++++++++++++
 .../ethernet/mellanox/mlx5/core/fs_ft_pool.h  | 21 +++++
 .../mellanox/mlx5/core/lib/fs_chains.c        | 89 ++-----------------
 include/linux/mlx5/driver.h                   |  2 +
 7 files changed, 151 insertions(+), 98 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a1223e904190..8dbdf1aef00f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
-		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
+		fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
 		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
 		fw_reset.o qos.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 94712a10ef9a..b7aae8b75760 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -36,6 +36,7 @@
 
 #include "fs_core.h"
 #include "fs_cmd.h"
+#include "fs_ft_pool.h"
 #include "mlx5_core.h"
 #include "eswitch.h"
 
@@ -192,18 +193,20 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
-	unsigned int log_size = 0;
 	int err;
 
-	log_size = size ? ilog2(roundup_pow_of_two(size)) : 0;
-	ft->max_fte = 1 << log_size;
+	if (size != POOL_NEXT_SIZE)
+		size = roundup_pow_of_two(size);
+	size = mlx5_ft_pool_get_avail_sz(dev, ft->type, size);
+	if (!size)
+		return -ENOSPC;
 
 	MLX5_SET(create_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
 
 	MLX5_SET(create_flow_table_in, in, table_type, ft->type);
 	MLX5_SET(create_flow_table_in, in, flow_table_context.level, ft->level);
-	MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, size ? ilog2(size) : 0);
 	MLX5_SET(create_flow_table_in, in, vport_number, ft->vport);
 	MLX5_SET(create_flow_table_in, in, other_vport,
 		 !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
@@ -240,9 +243,14 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 	}
 
 	err = mlx5_cmd_exec_inout(dev, create_flow_table, in, out);
-	if (!err)
+	if (!err) {
 		ft->id = MLX5_GET(create_flow_table_out, out,
 				  table_id);
+		ft->max_fte = size;
+	} else {
+		mlx5_ft_pool_put_sz(ns->dev, size);
+	}
+
 	return err;
 }
 
@@ -251,6 +259,7 @@ static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
+	int err;
 
 	MLX5_SET(destroy_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
@@ -260,7 +269,11 @@ static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
 	MLX5_SET(destroy_flow_table_in, in, other_vport,
 		 !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
 
-	return mlx5_cmd_exec_in(dev, destroy_flow_table, in);
+	err = mlx5_cmd_exec_in(dev, destroy_flow_table, in);
+	if (!err)
+		mlx5_ft_pool_put_sz(ns->dev, ft->max_fte);
+
+	return err;
 }
 
 static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 378990c933e5..6e20cbb4656a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -38,6 +38,7 @@
 #include "mlx5_core.h"
 #include "fs_core.h"
 #include "fs_cmd.h"
+#include "fs_ft_pool.h"
 #include "diag/fs_tracepoint.h"
 #include "accel/ipsec.h"
 #include "fpga/ipsec.h"
@@ -1166,6 +1167,8 @@ mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
 
 	ft_attr.level = level;
 	ft_attr.prio  = prio;
+	ft_attr.max_fte = 1;
+
 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
 }
 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
@@ -1175,19 +1178,20 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 				    struct mlx5_flow_table_attr *ft_attr)
 {
 	int num_reserved_entries = ft_attr->autogroup.num_reserved_entries;
-	int autogroups_max_fte = ft_attr->max_fte - num_reserved_entries;
 	int max_num_groups = ft_attr->autogroup.max_num_groups;
 	struct mlx5_flow_table *ft;
-
-	if (max_num_groups > autogroups_max_fte)
-		return ERR_PTR(-EINVAL);
-	if (num_reserved_entries > ft_attr->max_fte)
-		return ERR_PTR(-EINVAL);
+	int autogroups_max_fte;
 
 	ft = mlx5_create_flow_table(ns, ft_attr);
 	if (IS_ERR(ft))
 		return ft;
 
+	autogroups_max_fte = ft->max_fte - num_reserved_entries;
+	if (max_num_groups > autogroups_max_fte)
+		goto err_validate;
+	if (num_reserved_entries > ft->max_fte)
+		goto err_validate;
+
 	ft->autogroup.active = true;
 	ft->autogroup.required_groups = max_num_groups;
 	ft->autogroup.max_fte = autogroups_max_fte;
@@ -1195,6 +1199,10 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 	ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1);
 
 	return ft;
+
+err_validate:
+	mlx5_destroy_flow_table(ft);
+	return ERR_PTR(-ENOSPC);
 }
 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
 
@@ -2588,6 +2596,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	mlx5_cleanup_fc_stats(dev);
 	kmem_cache_destroy(steering->ftes_cache);
 	kmem_cache_destroy(steering->fgs_cache);
+	mlx5_ft_pool_destroy(dev);
 	kfree(steering);
 }
 
@@ -2938,9 +2947,13 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 	if (err)
 		return err;
 
+	err = mlx5_ft_pool_init(dev);
+	if (err)
+		return err;
+
 	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
 	if (!steering)
-		return -ENOMEM;
+		goto err;
 	steering->dev = dev;
 	dev->priv.steering = steering;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
new file mode 100644
index 000000000000..526fbb669142
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include "fs_ft_pool.h"
+
+/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS),
+ * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via mlx5_ft_pool_get_avail_sz.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small and match firmware
+ * pools.
+ */
+#define FT_SIZE (16 * 1024 * 1024)
+static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024,
+					 1 * 1024 * 1024,
+					 64 * 1024,
+					 128,
+					 1 /* size for termination tables */ };
+struct mlx5_ft_pool {
+	int ft_left[ARRAY_SIZE(FT_POOLS)];
+};
+
+int mlx5_ft_pool_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_ft_pool *ft_pool;
+	int i;
+
+	ft_pool = kzalloc(sizeof(*ft_pool), GFP_KERNEL);
+
+	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--)
+		ft_pool->ft_left[i] = FT_SIZE / FT_POOLS[i];
+
+	dev->priv.ft_pool = ft_pool;
+	return 0;
+}
+
+void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev)
+{
+	kfree(dev->priv.ft_pool);
+}
+
+int
+mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type,
+			  int desired_size)
+{
+	u32 max_ft_size = 1 << MLX5_CAP_FLOWTABLE_TYPE(dev, log_max_ft_size, table_type);
+	int i, found_i = -1;
+
+	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+		if (dev->priv.ft_pool->ft_left[i] && FT_POOLS[i] >= desired_size &&
+		    FT_POOLS[i] <= max_ft_size) {
+			found_i = i;
+			if (desired_size != POOL_NEXT_SIZE)
+				break;
+		}
+	}
+
+	if (found_i != -1) {
+		--dev->priv.ft_pool->ft_left[found_i];
+		return FT_POOLS[found_i];
+	}
+
+	return 0;
+}
+
+void
+mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz)
+{
+	int i;
+
+	if (!sz)
+		return;
+
+	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
+		if (sz == FT_POOLS[i]) {
+			++dev->priv.ft_pool->ft_left[i];
+			return;
+		}
+	}
+
+	WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h
new file mode 100644
index 000000000000..25f4274b372b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_FS_FT_POOL_H__
+#define __MLX5_FS_FT_POOL_H__
+
+#include <linux/mlx5/driver.h>
+#include "fs_core.h"
+
+#define POOL_NEXT_SIZE 0
+
+int mlx5_ft_pool_init(struct mlx5_core_dev *dev);
+void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev);
+
+int
+mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type table_type,
+			  int desired_size);
+void
+mlx5_ft_pool_put_sz(struct mlx5_core_dev *dev, int sz);
+
+#endif /* __MLX5_FS_FT_POOL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 4c60c540bf9d..d0cfe7adb8a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -6,6 +6,7 @@
 #include <linux/mlx5/fs.h>
 
 #include "lib/fs_chains.h"
+#include "fs_ft_pool.h"
 #include "en/mapping.h"
 #include "fs_core.h"
 #include "en_tc.h"
@@ -13,25 +14,10 @@
 #define chains_lock(chains) ((chains)->lock)
 #define chains_ht(chains) ((chains)->chains_ht)
 #define prios_ht(chains) ((chains)->prios_ht)
-#define ft_pool_left(chains) ((chains)->ft_left)
 #define tc_default_ft(chains) ((chains)->tc_default_ft)
 #define tc_end_ft(chains) ((chains)->tc_end_ft)
 #define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \
 				  FDB_TC_OFFLOAD : MLX5E_TC_PRIO)
-
-/* Firmware currently has 4 pool of 4 sizes that it supports (FT_POOLS),
- * and a virtual memory region of 16M (MLX5_FT_SIZE), this region is duplicated
- * for each flow table pool. We can allocate up to 16M of each pool,
- * and we keep track of how much we used via get_next_avail_sz_from_pool.
- * Firmware doesn't report any of this for now.
- * ESW_POOL is expected to be sorted from large to small and match firmware
- * pools.
- */
-#define FT_SIZE (16 * 1024 * 1024)
-static const unsigned int FT_POOLS[] = { 4 * 1024 * 1024,
-					  1 * 1024 * 1024,
-					  64 * 1024,
-					  128 };
 #define FT_TBL_SZ (64 * 1024)
 
 struct mlx5_fs_chains {
@@ -49,8 +35,6 @@ struct mlx5_fs_chains {
 	enum mlx5_flow_namespace_type ns;
 	u32 group_num;
 	u32 flags;
-
-	int ft_left[ARRAY_SIZE(FT_POOLS)];
 };
 
 struct fs_chain {
@@ -160,54 +144,6 @@ mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
 	tc_end_ft(chains) = ft;
 }
 
-#define POOL_NEXT_SIZE 0
-static int
-mlx5_chains_get_avail_sz_from_pool(struct mlx5_fs_chains *chains,
-				   int desired_size)
-{
-	int i, found_i = -1;
-
-	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
-		if (ft_pool_left(chains)[i] && FT_POOLS[i] > desired_size) {
-			found_i = i;
-			if (desired_size != POOL_NEXT_SIZE)
-				break;
-		}
-	}
-
-	if (found_i != -1) {
-		--ft_pool_left(chains)[found_i];
-		return FT_POOLS[found_i];
-	}
-
-	return 0;
-}
-
-static void
-mlx5_chains_put_sz_to_pool(struct mlx5_fs_chains *chains, int sz)
-{
-	int i;
-
-	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) {
-		if (sz == FT_POOLS[i]) {
-			++ft_pool_left(chains)[i];
-			return;
-		}
-	}
-
-	WARN_ONCE(1, "Couldn't find size %d in flow table size pool", sz);
-}
-
-static void
-mlx5_chains_init_sz_pool(struct mlx5_fs_chains *chains, u32 ft_max)
-{
-	int i;
-
-	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--)
-		ft_pool_left(chains)[i] =
-			FT_POOLS[i] <= ft_max ? FT_SIZE / FT_POOLS[i] : 0;
-}
-
 static struct mlx5_flow_table *
 mlx5_chains_create_table(struct mlx5_fs_chains *chains,
 			 u32 chain, u32 prio, u32 level)
@@ -221,11 +157,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
 		ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
 				  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 
-	sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
-	     mlx5_chains_get_avail_sz_from_pool(chains, FT_TBL_SZ) :
-	     mlx5_chains_get_avail_sz_from_pool(chains, POOL_NEXT_SIZE);
-	if (!sz)
-		return ERR_PTR(-ENOSPC);
+	sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE;
 	ft_attr.max_fte = sz;
 
 	/* We use tc_default_ft(chains) as the table's next_ft till
@@ -266,21 +198,12 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
 	if (IS_ERR(ft)) {
 		mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
 			       (int)PTR_ERR(ft), chain, prio, level, sz);
-		mlx5_chains_put_sz_to_pool(chains, sz);
 		return ft;
 	}
 
 	return ft;
 }
 
-static void
-mlx5_chains_destroy_table(struct mlx5_fs_chains *chains,
-			  struct mlx5_flow_table *ft)
-{
-	mlx5_chains_put_sz_to_pool(chains, ft->max_fte);
-	mlx5_destroy_flow_table(ft);
-}
-
 static int
 create_chain_restore(struct fs_chain *chain)
 {
@@ -637,7 +560,7 @@ err_insert:
 err_miss_rule:
 	mlx5_destroy_flow_group(miss_group);
 err_group:
-	mlx5_chains_destroy_table(chains, ft);
+	mlx5_destroy_flow_table(ft);
 err_create:
 err_alloc:
 	kvfree(prio_s);
@@ -660,7 +583,7 @@ mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains,
 			       prio_params);
 	mlx5_del_flow_rules(prio->miss_rule);
 	mlx5_destroy_flow_group(prio->miss_group);
-	mlx5_chains_destroy_table(chains, prio->ft);
+	mlx5_destroy_flow_table(prio->ft);
 	mlx5_chains_put_chain(chain);
 	kvfree(prio);
 }
@@ -785,7 +708,7 @@ void
 mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
 				 struct mlx5_flow_table *ft)
 {
-	mlx5_chains_destroy_table(chains, ft);
+	mlx5_destroy_flow_table(ft);
 }
 
 static struct mlx5_fs_chains *
@@ -817,8 +740,6 @@ mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
 		       mlx5_chains_get_chain_range(chains_priv),
 		       mlx5_chains_get_prio_range(chains_priv));
 
-	mlx5_chains_init_sz_pool(chains_priv, attr->max_ft_sz);
-
 	err = rhashtable_init(&chains_ht(chains_priv), &chain_params);
 	if (err)
 		goto init_chains_ht_err;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f8e8d7e90616..6a7749c21b82 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -550,6 +550,7 @@ struct mlx5_adev {
 	int idx;
 };
 
+struct mlx5_ft_pool;
 struct mlx5_priv {
 	/* IRQ table valid only for real pci devices PF or VF */
 	struct mlx5_irq_table   *irq_table;
@@ -602,6 +603,7 @@ struct mlx5_priv {
 	struct mlx5_core_roce	roce;
 	struct mlx5_fc_stats		fc_stats;
 	struct mlx5_rl_table            rl_table;
+	struct mlx5_ft_pool		*ft_pool;
 
 	struct mlx5_bfreg_data		bfregs;
 	struct mlx5_uars_page	       *uar;

From 9e117998409c740fdf921392acb048360fcb62a9 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Mon, 8 Mar 2021 14:52:41 +0200
Subject: [PATCH 0606/3292] net/mlx5: DR, Set max table size to 2G entries

SW steering has no table size limitations.
However, fs_core API is size aware.

Set SW steering tables to the maximum possible table size (INT_MAX).

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index ee45d698cd9c..ee0e9d79aaec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -97,7 +97,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
 		}
 	}
 
-	ft->max_fte = size ? roundup_pow_of_two(size) : 1;
+	ft->max_fte = INT_MAX;
 
 	return 0;
 }

From 71513c05a97fa2c930d79661a32759e6875850d4 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Thu, 11 Mar 2021 11:35:39 +0200
Subject: [PATCH 0607/3292] net/mlx5: Cap the maximum flow group size to 16M
 entries

The maximum number of large flow groups applies to both small and large
tables. For very large tables (such as the 2G SW steering tables) this may
create a small number of flow groups each with an unrealistic entries
domain (> 16M).

Set the maximum number of large flow groups to at least what user
requested, but with a maximum per group size of 16M entries.
For software steering, if user requested less than 128 large flow
groups, it will gives us about 128 16M groups in a 2G
entries tables.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6e20cbb4656a..1b7a1cde097c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1173,6 +1173,7 @@ mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
 }
 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
 
+#define MAX_FLOW_GROUP_SIZE BIT(24)
 struct mlx5_flow_table*
 mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 				    struct mlx5_flow_table_attr *ft_attr)
@@ -1192,6 +1193,10 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 	if (num_reserved_entries > ft->max_fte)
 		goto err_validate;
 
+	/* Align the number of groups according to the largest group size */
+	if (autogroups_max_fte / (max_num_groups + 1) > MAX_FLOW_GROUP_SIZE)
+		max_num_groups = (autogroups_max_fte / MAX_FLOW_GROUP_SIZE) - 1;
+
 	ft->autogroup.active = true;
 	ft->autogroup.required_groups = max_num_groups;
 	ft->autogroup.max_fte = autogroups_max_fte;

From a546432f2f0491d53b3beaffe33813e96a6644c8 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Mon, 3 May 2021 09:22:43 +0300
Subject: [PATCH 0608/3292] net/mlx5: Remove unnecessary spin lock protection

Taking lag_lock to access ldev->tracker is meaningless in the context of
do_bond() and mlx5_lag_netdev_event().

Signed-off-by: Eli Cohen <elic@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index b8748390335f..c9c00163d918 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -280,9 +280,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 	if (!mlx5_lag_is_ready(ldev))
 		return;
 
-	spin_lock(&lag_lock);
 	tracker = ldev->tracker;
-	spin_unlock(&lag_lock);
 
 	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
 
@@ -481,9 +479,7 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
 		break;
 	}
 
-	spin_lock(&lag_lock);
 	ldev->tracker = tracker;
-	spin_unlock(&lag_lock);
 
 	if (changed)
 		mlx5_queue_bond_work(ldev, 0);

From 2b1476752521aae142d4c8bc37dfd2be66b2acfa Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Mon, 3 May 2021 12:35:05 +0300
Subject: [PATCH 0609/3292] net/mlx5: Use boolean arithmetic to evaluate
 roce_lag

Avoid mixing boolean and bit arithmetic when evaluating validity of
roce_lag.

Signed-off-by: Eli Cohen <elic@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index c9c00163d918..e52e2144ab12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -289,8 +289,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 			   !mlx5_sriov_is_enabled(dev1);
 
 #ifdef CONFIG_MLX5_ESWITCH
-		roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
-			    dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
+		roce_lag = roce_lag &&
+			   dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
+			   dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 #endif
 
 		if (roce_lag)

From 8613641063617c1dfc731b403b3ee4935ef15f87 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Mon, 3 May 2021 14:39:58 +0300
Subject: [PATCH 0610/3292] net/mlx5: Fix lag port remapping logic

Fix the logic so that if both ports netdevices are enabled or disabled,
use the trivial mapping without swapping.

If only one of the netdevice's tx is enabled, use it to remap traffic to
that port.

Signed-off-by: Eli Cohen <elic@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index e52e2144ab12..1fb70524d067 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -118,17 +118,24 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 					   u8 *port1, u8 *port2)
 {
+	bool p1en;
+	bool p2en;
+
+	p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
+	       tracker->netdev_state[MLX5_LAG_P1].link_up;
+
+	p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
+	       tracker->netdev_state[MLX5_LAG_P2].link_up;
+
 	*port1 = 1;
 	*port2 = 2;
-	if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled ||
-	    !tracker->netdev_state[MLX5_LAG_P1].link_up) {
-		*port1 = 2;
+	if ((!p1en && !p2en) || (p1en && p2en))
 		return;
-	}
 
-	if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled ||
-	    !tracker->netdev_state[MLX5_LAG_P2].link_up)
+	if (p1en)
 		*port2 = 1;
+	else
+		*port1 = 2;
 }
 
 void mlx5_modify_lag(struct mlx5_lag *ldev,

From f285f37cb1e6b29e7dc732c81510aa115463730f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Thu, 27 May 2021 12:48:19 +0200
Subject: [PATCH 0611/3292] devlink: append split port number to the port name

Instead of doing sprintf twice in case the port is split or not, append
the split port suffix in case the port is split.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20210527104819.789840-1-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/devlink.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4eb969518ee0..69681f19388e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8632,12 +8632,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 	switch (attrs->flavour) {
 	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
 	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
-		if (!attrs->split)
-			n = snprintf(name, len, "p%u", attrs->phys.port_number);
-		else
-			n = snprintf(name, len, "p%us%u",
-				     attrs->phys.port_number,
-				     attrs->phys.split_subport_number);
+		n = snprintf(name, len, "p%u", attrs->phys.port_number);
+		if (n < len && attrs->split)
+			n += snprintf(name + n, len - n, "s%u",
+				      attrs->phys.split_subport_number);
 		break;
 	case DEVLINK_PORT_FLAVOUR_CPU:
 	case DEVLINK_PORT_FLAVOUR_DSA:

From 557c4d2f780ce73ffad8dfe4d03a27dc9e13dfde Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Thu, 27 May 2021 12:55:15 +0200
Subject: [PATCH 0612/3292] selftests: devlink_lib: add check for devlink
 device existence

If user passes devlink handle over DEVLINK_DEV variable, check if the
device exists.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20210527105515.790330-1-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/devlink_lib.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 9c12c4fd3afc..c19e001f138b 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -18,6 +18,12 @@ if [[ ! -v DEVLINK_DEV ]]; then
 
 	DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
 			 -n | cut -d" " -f3)
+else
+	devlink dev show $DEVLINK_DEV &> /dev/null
+	if [ $? -ne 0 ]; then
+		echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
+		exit 1
+	fi
 fi
 
 ##############################################################################

From 2754125ebd334268d815277e2909dc9fbb504356 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 26 May 2021 12:45:07 +0200
Subject: [PATCH 0613/3292] net/mlx5: Expose FW version over defined keyword

To be aligned with the rest of the drivers, expose FW version under "fw"
keyword in devlink dev info, in addition to the existing "fw.version",
which is currently Mellanox-specific.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 44c458443428..d791d351b489 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -63,6 +63,11 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 	err = devlink_info_version_running_put(req, "fw.version", version_str);
 	if (err)
 		return err;
+	err = devlink_info_version_running_put(req,
+					       DEVLINK_INFO_VERSION_GENERIC_FW,
+					       version_str);
+	if (err)
+		return err;
 
 	/* no pending version, return running (stored) version */
 	if (stored_fw == 0)
@@ -74,8 +79,9 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 	err = devlink_info_version_stored_put(req, "fw.version", version_str);
 	if (err)
 		return err;
-
-	return 0;
+	return devlink_info_version_stored_put(req,
+					       DEVLINK_INFO_VERSION_GENERIC_FW,
+					       version_str);
 }
 
 static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack)

From f55c998c274e3b59f8b07234f16521524c564a23 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 26 May 2021 12:45:08 +0200
Subject: [PATCH 0614/3292] mlxsw: core: Expose FW version over defined keyword

To be aligned with the rest of the drivers, expose FW version under "fw"
keyword in devlink dev info, in addition to the existing "fw.version",
which is currently Mellanox-specific.

devlink output before:
       running:
         fw.version 30.2008.2018
after:
       running:
         fw.version 30.2008.2018
         fw 30.2008.2018

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index ad93e01b2cda..b543d4e87951 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1453,7 +1453,9 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 	if (err)
 		return err;
 
-	return 0;
+	return devlink_info_version_running_put(req,
+						DEVLINK_INFO_VERSION_GENERIC_FW,
+						buf);
 }
 
 static int

From 7dafcc4c9dfb417fc1ad19881c802af2101d0bfd Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Wed, 26 May 2021 12:45:09 +0200
Subject: [PATCH 0615/3292] mlxsw: core: use PSID string define in devlink info

Instead of having the string spelled out in the driver, use the global
define with the same value.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index b543d4e87951..e775f08fb464 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1444,7 +1444,9 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 	if (err)
 		return err;
 
-	err = devlink_info_version_fixed_put(req, "fw.psid", fw_info_psid);
+	err = devlink_info_version_fixed_put(req,
+					     DEVLINK_INFO_VERSION_GENERIC_FW_PSID,
+					     fw_info_psid);
 	if (err)
 		return err;
 

From bf3be85dbe59e2a4972caf0163113a1040b73f72 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 27 May 2021 17:44:20 +0300
Subject: [PATCH 0616/3292] atl1c: detect NIC type early

To support NICs that allow for more than one tx queue it is
required to detect NIC type early during probe. This is moves
NIC type detection before netdev_alloc to prepare for that.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/atheros/atl1c/atl1c_main.c   | 56 +++++++++----------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 77da1c54c49f..e3a77d81fecb 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -646,33 +646,26 @@ static int atl1c_alloc_queues(struct atl1c_adapter *adapter)
 	return 0;
 }
 
-static void atl1c_set_mac_type(struct atl1c_hw *hw)
+static enum atl1c_nic_type atl1c_get_mac_type(struct pci_dev *pdev,
+					      u8 __iomem *hw_addr)
 {
-	u32 magic;
-	switch (hw->device_id) {
+	switch (pdev->device) {
 	case PCI_DEVICE_ID_ATTANSIC_L2C:
-		hw->nic_type = athr_l2c;
-		break;
+		return athr_l2c;
 	case PCI_DEVICE_ID_ATTANSIC_L1C:
-		hw->nic_type = athr_l1c;
-		break;
+		return athr_l1c;
 	case PCI_DEVICE_ID_ATHEROS_L2C_B:
-		hw->nic_type = athr_l2c_b;
-		break;
+		return athr_l2c_b;
 	case PCI_DEVICE_ID_ATHEROS_L2C_B2:
-		hw->nic_type = athr_l2c_b2;
-		break;
+		return athr_l2c_b2;
 	case PCI_DEVICE_ID_ATHEROS_L1D:
-		hw->nic_type = athr_l1d;
-		break;
+		return athr_l1d;
 	case PCI_DEVICE_ID_ATHEROS_L1D_2_0:
-		hw->nic_type = athr_l1d_2;
-		AT_READ_REG(hw, REG_MT_MAGIC, &magic);
-		if (magic == MT_MAGIC)
-			hw->nic_type = athr_mt;
-		break;
+		if (readl(hw_addr + REG_MT_MAGIC) == MT_MAGIC)
+			return athr_mt;
+		return athr_l1d_2;
 	default:
-		break;
+		return athr_l1c;
 	}
 }
 
@@ -680,7 +673,6 @@ static int atl1c_setup_mac_funcs(struct atl1c_hw *hw)
 {
 	u32 link_ctrl_data;
 
-	atl1c_set_mac_type(hw);
 	AT_READ_REG(hw, REG_LINK_CTRL, &link_ctrl_data);
 
 	hw->ctrl_flags = ATL1C_INTR_MODRT_ENABLE  |
@@ -2568,7 +2560,8 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct net_device *netdev;
 	struct atl1c_adapter *adapter;
 	static int cards_found;
-
+	u8 __iomem *hw_addr;
+	enum atl1c_nic_type nic_type;
 	int err = 0;
 
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
@@ -2602,6 +2595,15 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
+	hw_addr = pci_ioremap_bar(pdev, 0);
+	if (!hw_addr) {
+		err = -EIO;
+		dev_err(&pdev->dev, "cannot map device registers\n");
+		goto err_ioremap;
+	}
+
+	nic_type = atl1c_get_mac_type(pdev, hw_addr);
+
 	netdev = alloc_etherdev(sizeof(struct atl1c_adapter));
 	if (netdev == NULL) {
 		err = -ENOMEM;
@@ -2618,13 +2620,9 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->netdev = netdev;
 	adapter->pdev = pdev;
 	adapter->hw.adapter = adapter;
+	adapter->hw.nic_type = nic_type;
 	adapter->msg_enable = netif_msg_init(-1, atl1c_default_msg);
-	adapter->hw.hw_addr = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
-	if (!adapter->hw.hw_addr) {
-		err = -EIO;
-		dev_err(&pdev->dev, "cannot map device registers\n");
-		goto err_ioremap;
-	}
+	adapter->hw.hw_addr = hw_addr;
 
 	/* init mii data */
 	adapter->mii.dev = netdev;
@@ -2687,11 +2685,11 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 err_reset:
 err_register:
 err_sw_init:
-	iounmap(adapter->hw.hw_addr);
 err_init_netdev:
-err_ioremap:
 	free_netdev(netdev);
 err_alloc_etherdev:
+	iounmap(hw_addr);
+err_ioremap:
 	pci_release_regions(pdev);
 err_pci_reg:
 err_dma:

From 20a1b6bdca15477c95800f50867a4dc1699ab548 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 27 May 2021 17:44:21 +0300
Subject: [PATCH 0617/3292] atl1c: move tx napi into tpd_ring

To get more performance from using multiple tx queues one needs
a per tx queue napi.

Move tx napi from per adapter struct into per tx queue struct.
Patch that actually enables multiple tx queues will follow.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/atl1c/atl1c.h     |  4 +++-
 .../net/ethernet/atheros/atl1c/atl1c_main.c    | 18 ++++++++++--------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index 9d70cb7544f1..2c8b72a7db03 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -475,13 +475,16 @@ struct atl1c_buffer {
 
 /* transimit packet descriptor (tpd) ring */
 struct atl1c_tpd_ring {
+	struct atl1c_adapter *adapter;
 	void *desc;		/* descriptor ring virtual address */
 	dma_addr_t dma;		/* descriptor ring physical address */
+	u16 num;
 	u16 size;		/* descriptor ring length in bytes */
 	u16 count;		/* number of descriptors in the ring */
 	u16 next_to_use;
 	atomic_t next_to_clean;
 	struct atl1c_buffer *buffer_info;
+	struct napi_struct napi;
 };
 
 /* receive free descriptor (rfd) ring */
@@ -510,7 +513,6 @@ struct atl1c_adapter {
 	struct net_device   *netdev;
 	struct pci_dev      *pdev;
 	struct napi_struct  napi;
-	struct napi_struct  tx_napi;
 	struct page         *rx_page;
 	unsigned int	    rx_page_offset;
 	unsigned int	    rx_frag_size;
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index e3a77d81fecb..db60c1f706ae 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -983,6 +983,8 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
 		goto err_nomem;
 
 	for (i = 0; i < AT_MAX_TRANSMIT_QUEUE; i++) {
+		tpd_ring[i].adapter = adapter;
+		tpd_ring[i].num = i;
 		tpd_ring[i].buffer_info =
 			(tpd_ring->buffer_info + count);
 		count += tpd_ring[i].count;
@@ -1533,9 +1535,9 @@ static inline void atl1c_clear_phy_int(struct atl1c_adapter *adapter)
 
 static int atl1c_clean_tx(struct napi_struct *napi, int budget)
 {
-	struct atl1c_adapter *adapter =
-		container_of(napi, struct atl1c_adapter, tx_napi);
-	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[atl1c_trans_normal];
+	struct atl1c_tpd_ring *tpd_ring =
+		container_of(napi, struct atl1c_tpd_ring, napi);
+	struct atl1c_adapter *adapter = tpd_ring->adapter;
 	struct atl1c_buffer *buffer_info;
 	struct pci_dev *pdev = adapter->pdev;
 	u16 next_to_clean = atomic_read(&tpd_ring->next_to_clean);
@@ -1615,12 +1617,12 @@ static irqreturn_t atl1c_intr(int irq, void *data)
 			}
 		}
 		if (status & ISR_TX_PKT) {
-			if (napi_schedule_prep(&adapter->tx_napi)) {
+			if (napi_schedule_prep(&adapter->tpd_ring[0].napi)) {
 				spin_lock(&hw->intr_mask_lock);
 				hw->intr_mask &= ~ISR_TX_PKT;
 				AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
 				spin_unlock(&hw->intr_mask_lock);
-				__napi_schedule(&adapter->tx_napi);
+				__napi_schedule(&adapter->tpd_ring[0].napi);
 			}
 		}
 
@@ -2354,7 +2356,7 @@ static int atl1c_up(struct atl1c_adapter *adapter)
 	atl1c_check_link_status(adapter);
 	clear_bit(__AT_DOWN, &adapter->flags);
 	napi_enable(&adapter->napi);
-	napi_enable(&adapter->tx_napi);
+	napi_enable(&adapter->tpd_ring[0].napi);
 	atl1c_irq_enable(adapter);
 	netif_start_queue(netdev);
 	return err;
@@ -2375,7 +2377,7 @@ static void atl1c_down(struct atl1c_adapter *adapter)
 	set_bit(__AT_DOWN, &adapter->flags);
 	netif_carrier_off(netdev);
 	napi_disable(&adapter->napi);
-	napi_disable(&adapter->tx_napi);
+	napi_disable(&adapter->tpd_ring[0].napi);
 	atl1c_irq_disable(adapter);
 	atl1c_free_irq(adapter);
 	/* disable ASPM if device inactive */
@@ -2632,7 +2634,7 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK;
 	dev_set_threaded(netdev, true);
 	netif_napi_add(netdev, &adapter->napi, atl1c_clean, 64);
-	netif_napi_add(netdev, &adapter->tx_napi, atl1c_clean_tx, 64);
+	netif_napi_add(netdev, &adapter->tpd_ring[0].napi, atl1c_clean_tx, 64);
 	timer_setup(&adapter->phy_config_timer, atl1c_phy_config, 0);
 	/* setup the private structure */
 	err = atl1c_sw_init(adapter);

From 8042824a3c0bcf7fc96c62b932b44b0b54714642 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 27 May 2021 17:44:22 +0300
Subject: [PATCH 0618/3292] atl1c: prepare for multiple rx queues

Move napi and other per queue members into per rx queue struct.
Allocate max rx queues that any hw supported by the driver might have.
Patch that actually enables multiple rx queues will follow.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/atl1c/atl1c.h    |  12 +-
 .../net/ethernet/atheros/atl1c/atl1c_main.c   | 106 +++++++++---------
 2 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index 2c8b72a7db03..9edf90e1f028 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -500,21 +500,23 @@ struct atl1c_rfd_ring {
 
 /* receive return descriptor (rrd) ring */
 struct atl1c_rrd_ring {
+	struct atl1c_adapter *adapter;
 	void *desc;		/* descriptor ring virtual address */
 	dma_addr_t dma;		/* descriptor ring physical address */
+	u16 num;
 	u16 size;		/* descriptor ring length in bytes */
 	u16 count;		/* number of descriptors in the ring */
 	u16 next_to_use;
 	u16 next_to_clean;
+	struct napi_struct napi;
+	struct page *rx_page;
+	unsigned int rx_page_offset;
 };
 
 /* board specific private data structure */
 struct atl1c_adapter {
 	struct net_device   *netdev;
 	struct pci_dev      *pdev;
-	struct napi_struct  napi;
-	struct page         *rx_page;
-	unsigned int	    rx_page_offset;
 	unsigned int	    rx_frag_size;
 	struct atl1c_hw        hw;
 	struct atl1c_hw_stats  hw_stats;
@@ -545,8 +547,8 @@ struct atl1c_adapter {
 	/* All Descriptor memory */
 	struct atl1c_ring_header ring_header;
 	struct atl1c_tpd_ring tpd_ring[AT_MAX_TRANSMIT_QUEUE];
-	struct atl1c_rfd_ring rfd_ring;
-	struct atl1c_rrd_ring rrd_ring;
+	struct atl1c_rfd_ring rfd_ring[AT_MAX_RECEIVE_QUEUE];
+	struct atl1c_rrd_ring rrd_ring[AT_MAX_RECEIVE_QUEUE];
 	u32 bd_number;     /* board number;*/
 };
 
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index db60c1f706ae..79984735a2fd 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -40,8 +40,6 @@ static int atl1c_stop_mac(struct atl1c_hw *hw);
 static void atl1c_disable_l0s_l1(struct atl1c_hw *hw);
 static void atl1c_set_aspm(struct atl1c_hw *hw, u16 link_speed);
 static void atl1c_start_mac(struct atl1c_adapter *adapter);
-static void atl1c_clean_rx_irq(struct atl1c_adapter *adapter,
-		   int *work_done, int work_to_do);
 static int atl1c_up(struct atl1c_adapter *adapter);
 static void atl1c_down(struct atl1c_adapter *adapter);
 static int atl1c_reset_mac(struct atl1c_hw *hw);
@@ -770,7 +768,7 @@ static int atl1c_sw_init(struct atl1c_adapter *adapter)
 	adapter->link_speed = SPEED_0;
 	adapter->link_duplex = FULL_DUPLEX;
 	adapter->tpd_ring[0].count = 1024;
-	adapter->rfd_ring.count = 512;
+	adapter->rfd_ring[0].count = 512;
 
 	hw->vendor_id = pdev->vendor;
 	hw->device_id = pdev->device;
@@ -878,8 +876,8 @@ static void atl1c_clean_tx_ring(struct atl1c_adapter *adapter,
  */
 static void atl1c_clean_rx_ring(struct atl1c_adapter *adapter)
 {
-	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring;
+	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
+	struct atl1c_rrd_ring *rrd_ring = adapter->rrd_ring;
 	struct atl1c_buffer *buffer_info;
 	struct pci_dev *pdev = adapter->pdev;
 	int j;
@@ -902,8 +900,8 @@ static void atl1c_clean_rx_ring(struct atl1c_adapter *adapter)
 static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter)
 {
 	struct atl1c_tpd_ring *tpd_ring = adapter->tpd_ring;
-	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring;
+	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
+	struct atl1c_rrd_ring *rrd_ring = adapter->rrd_ring;
 	struct atl1c_buffer *buffer_info;
 	int i, j;
 
@@ -945,9 +943,9 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
 		kfree(adapter->tpd_ring[0].buffer_info);
 		adapter->tpd_ring[0].buffer_info = NULL;
 	}
-	if (adapter->rx_page) {
-		put_page(adapter->rx_page);
-		adapter->rx_page = NULL;
+	if (adapter->rrd_ring[0].rx_page) {
+		put_page(adapter->rrd_ring[0].rx_page);
+		adapter->rrd_ring[0].rx_page = NULL;
 	}
 }
 
@@ -961,8 +959,8 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
 	struct atl1c_tpd_ring *tpd_ring = adapter->tpd_ring;
-	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring;
+	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
+	struct atl1c_rrd_ring *rrd_ring = adapter->rrd_ring;
 	struct atl1c_ring_header *ring_header = &adapter->ring_header;
 	int size;
 	int i;
@@ -1030,6 +1028,8 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
 	offset += roundup(rfd_ring->size, 8);
 
 	/* init RRD ring */
+	rrd_ring->adapter = adapter;
+	rrd_ring->num = 0;
 	rrd_ring->dma = ring_header->dma + offset;
 	rrd_ring->desc = (u8 *) ring_header->desc + offset;
 	rrd_ring->size = sizeof(struct atl1c_recv_ret_status) *
@@ -1046,10 +1046,9 @@ err_nomem:
 static void atl1c_configure_des_ring(struct atl1c_adapter *adapter)
 {
 	struct atl1c_hw *hw = &adapter->hw;
-	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring;
-	struct atl1c_tpd_ring *tpd_ring = (struct atl1c_tpd_ring *)
-				adapter->tpd_ring;
+	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
+	struct atl1c_rrd_ring *rrd_ring = adapter->rrd_ring;
+	struct atl1c_tpd_ring *tpd_ring = adapter->tpd_ring;
 
 	/* TPD */
 	AT_WRITE_REG(hw, REG_TX_BASE_ADDR_HI,
@@ -1608,12 +1607,12 @@ static irqreturn_t atl1c_intr(int irq, void *data)
 		/* Ack ISR */
 		AT_WRITE_REG(hw, REG_ISR, status | ISR_DIS_INT);
 		if (status & ISR_RX_PKT) {
-			if (likely(napi_schedule_prep(&adapter->napi))) {
+			if (napi_schedule_prep(&adapter->rrd_ring[0].napi)) {
 				spin_lock(&hw->intr_mask_lock);
 				hw->intr_mask &= ~ISR_RX_PKT;
 				AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
 				spin_unlock(&hw->intr_mask_lock);
-				__napi_schedule(&adapter->napi);
+				__napi_schedule(&adapter->rrd_ring[0].napi);
 			}
 		}
 		if (status & ISR_TX_PKT) {
@@ -1677,33 +1676,35 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
 static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
 				       bool napi_mode)
 {
+	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[0];
 	struct sk_buff *skb;
 	struct page *page;
 
 	if (adapter->rx_frag_size > PAGE_SIZE) {
 		if (likely(napi_mode))
-			return napi_alloc_skb(&adapter->napi,
+			return napi_alloc_skb(&rrd_ring->napi,
 					      adapter->rx_buffer_len);
 		else
 			return netdev_alloc_skb_ip_align(adapter->netdev,
 							 adapter->rx_buffer_len);
 	}
 
-	page = adapter->rx_page;
+	page = rrd_ring->rx_page;
 	if (!page) {
-		adapter->rx_page = page = alloc_page(GFP_ATOMIC);
+		page = alloc_page(GFP_ATOMIC);
 		if (unlikely(!page))
 			return NULL;
-		adapter->rx_page_offset = 0;
+		rrd_ring->rx_page = page;
+		rrd_ring->rx_page_offset = 0;
 	}
 
-	skb = build_skb(page_address(page) + adapter->rx_page_offset,
+	skb = build_skb(page_address(page) + rrd_ring->rx_page_offset,
 			adapter->rx_frag_size);
 	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-		adapter->rx_page_offset += adapter->rx_frag_size;
-		if (adapter->rx_page_offset >= PAGE_SIZE)
-			adapter->rx_page = NULL;
+		rrd_ring->rx_page_offset += adapter->rx_frag_size;
+		if (rrd_ring->rx_page_offset >= PAGE_SIZE)
+			rrd_ring->rx_page = NULL;
 		else
 			get_page(page);
 	}
@@ -1712,7 +1713,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
 
 static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode)
 {
-	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
+	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	struct atl1c_buffer *buffer_info, *next_info;
 	struct sk_buff *skb;
@@ -1812,22 +1813,34 @@ static void atl1c_clean_rfd(struct atl1c_rfd_ring *rfd_ring,
 	rfd_ring->next_to_clean = rfd_index;
 }
 
-static void atl1c_clean_rx_irq(struct atl1c_adapter *adapter,
-		   int *work_done, int work_to_do)
+/**
+ * atl1c_clean_rx - NAPI Rx polling callback
+ * @napi: napi info
+ * @budget: limit of packets to clean
+ */
+static int atl1c_clean_rx(struct napi_struct *napi, int budget)
 {
+	struct atl1c_rrd_ring *rrd_ring =
+		container_of(napi, struct atl1c_rrd_ring, napi);
+	struct atl1c_adapter *adapter = rrd_ring->adapter;
 	u16 rfd_num, rfd_index;
 	u16 count = 0;
 	u16 length;
 	struct pci_dev *pdev = adapter->pdev;
 	struct net_device *netdev  = adapter->netdev;
-	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring;
+	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[rrd_ring->num];
 	struct sk_buff *skb;
 	struct atl1c_recv_ret_status *rrs;
 	struct atl1c_buffer *buffer_info;
+	int work_done = 0;
+	unsigned long flags;
+
+	/* Keep link state information with original netdev */
+	if (!netif_carrier_ok(adapter->netdev))
+		goto quit_polling;
 
 	while (1) {
-		if (*work_done >= work_to_do)
+		if (work_done >= budget)
 			break;
 		rrs = ATL1C_RRD_DESC(rrd_ring, rrd_ring->next_to_clean);
 		if (likely(RRS_RXD_IS_VALID(rrs->word3))) {
@@ -1881,32 +1894,13 @@ rrs_checked:
 			vlan = le16_to_cpu(vlan);
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan);
 		}
-		napi_gro_receive(&adapter->napi, skb);
+		napi_gro_receive(napi, skb);
 
-		(*work_done)++;
+		work_done++;
 		count++;
 	}
 	if (count)
 		atl1c_alloc_rx_buffer(adapter, true);
-}
-
-/**
- * atl1c_clean - NAPI Rx polling callback
- * @napi: napi info
- * @budget: limit of packets to clean
- */
-static int atl1c_clean(struct napi_struct *napi, int budget)
-{
-	struct atl1c_adapter *adapter =
-			container_of(napi, struct atl1c_adapter, napi);
-	int work_done = 0;
-	unsigned long flags;
-
-	/* Keep link state information with original netdev */
-	if (!netif_carrier_ok(adapter->netdev))
-		goto quit_polling;
-	/* just enable one RXQ */
-	atl1c_clean_rx_irq(adapter, &work_done, budget);
 
 	if (work_done < budget) {
 quit_polling:
@@ -2355,7 +2349,7 @@ static int atl1c_up(struct atl1c_adapter *adapter)
 
 	atl1c_check_link_status(adapter);
 	clear_bit(__AT_DOWN, &adapter->flags);
-	napi_enable(&adapter->napi);
+	napi_enable(&adapter->rrd_ring[0].napi);
 	napi_enable(&adapter->tpd_ring[0].napi);
 	atl1c_irq_enable(adapter);
 	netif_start_queue(netdev);
@@ -2376,7 +2370,7 @@ static void atl1c_down(struct atl1c_adapter *adapter)
 	 * reschedule our watchdog timer */
 	set_bit(__AT_DOWN, &adapter->flags);
 	netif_carrier_off(netdev);
-	napi_disable(&adapter->napi);
+	napi_disable(&adapter->rrd_ring[0].napi);
 	napi_disable(&adapter->tpd_ring[0].napi);
 	atl1c_irq_disable(adapter);
 	atl1c_free_irq(adapter);
@@ -2633,7 +2627,7 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->mii.phy_id_mask = 0x1f;
 	adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK;
 	dev_set_threaded(netdev, true);
-	netif_napi_add(netdev, &adapter->napi, atl1c_clean, 64);
+	netif_napi_add(netdev, &adapter->rrd_ring[0].napi, atl1c_clean_rx, 64);
 	netif_napi_add(netdev, &adapter->tpd_ring[0].napi, atl1c_clean_tx, 64);
 	timer_setup(&adapter->phy_config_timer, atl1c_phy_config, 0);
 	/* setup the private structure */

From 057f4af2b1716f6d6cef285a1c9b7a9bb63d822b Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Thu, 27 May 2021 17:44:23 +0300
Subject: [PATCH 0619/3292] atl1c: add 4 RX/TX queue support for Mikrotik
 10/25G NIC

More RX/TX queues on a network card help spread the CPU load among
cores and achieve higher overall networking performance. The new
Mikrotik 10/25G NIC supports 4 RX and 4 TX queues. TX queues are
treated with equal priority. RX queue balancing is fixed based on
L2/L3/L4 hash.

This adds support for 4 RX/TX queues while maintaining backwards
compatibility with older hardware.

Simultaneous TX + RX performance on AMD Threadripper 3960X
with Mikrotik 10/25G NIC improved from 1.6Mpps to 3.2Mpps per port.

Backwards compatiblitiy was verified with AR8151 and AR8131 based
NICs.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/atl1c/atl1c.h    |   9 +-
 drivers/net/ethernet/atheros/atl1c/atl1c_hw.h |  34 +-
 .../net/ethernet/atheros/atl1c/atl1c_main.c   | 402 +++++++++++-------
 3 files changed, 289 insertions(+), 156 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index 9edf90e1f028..43d821fe7a54 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -63,7 +63,7 @@
 
 #define AT_MAX_RECEIVE_QUEUE    4
 #define AT_DEF_RECEIVE_QUEUE	1
-#define AT_MAX_TRANSMIT_QUEUE	2
+#define AT_MAX_TRANSMIT_QUEUE  4
 
 #define AT_DMA_HI_ADDR_MASK     0xffffffff00000000ULL
 #define AT_DMA_LO_ADDR_MASK     0x00000000ffffffffULL
@@ -294,11 +294,6 @@ enum atl1c_nic_type {
 	athr_mt,
 };
 
-enum atl1c_trans_queue {
-	atl1c_trans_normal = 0,
-	atl1c_trans_high = 1
-};
-
 struct atl1c_hw_stats {
 	/* rx */
 	unsigned long rx_ok;		/* The number of good packet received. */
@@ -522,6 +517,8 @@ struct atl1c_adapter {
 	struct atl1c_hw_stats  hw_stats;
 	struct mii_if_info  mii;    /* MII interface info */
 	u16 rx_buffer_len;
+	unsigned int tx_queue_count;
+	unsigned int rx_queue_count;
 
 	unsigned long flags;
 #define __AT_TESTING        0x0001
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
index c263b326cec5..c567c920628f 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.h
@@ -528,15 +528,24 @@ void atl1c_post_phy_linkchg(struct atl1c_hw *hw, u16 link_speed);
 #define REG_RX_BASE_ADDR_HI		0x1540
 #define REG_TX_BASE_ADDR_HI		0x1544
 #define REG_RFD0_HEAD_ADDR_LO		0x1550
+#define REG_RFD1_HEAD_ADDR_LO          0x1554
+#define REG_RFD2_HEAD_ADDR_LO          0x1558
+#define REG_RFD3_HEAD_ADDR_LO          0x155C
 #define REG_RFD_RING_SIZE		0x1560
 #define RFD_RING_SIZE_MASK		0x0FFF
 #define REG_RX_BUF_SIZE			0x1564
 #define RX_BUF_SIZE_MASK		0xFFFF
 #define REG_RRD0_HEAD_ADDR_LO		0x1568
+#define REG_RRD1_HEAD_ADDR_LO          0x156C
+#define REG_RRD2_HEAD_ADDR_LO          0x1570
+#define REG_RRD3_HEAD_ADDR_LO          0x1574
 #define REG_RRD_RING_SIZE		0x1578
 #define RRD_RING_SIZE_MASK		0x0FFF
 #define REG_TPD_PRI1_ADDR_LO		0x157C
 #define REG_TPD_PRI0_ADDR_LO		0x1580
+#define REG_TPD_PRI2_ADDR_LO           0x1F10
+#define REG_TPD_PRI3_ADDR_LO           0x1F14
+
 #define REG_TPD_RING_SIZE		0x1584
 #define TPD_RING_SIZE_MASK		0xFFFF
 
@@ -655,15 +664,26 @@ void atl1c_post_phy_linkchg(struct atl1c_hw *hw, u16 link_speed);
 /* Mail box */
 #define MB_RFDX_PROD_IDX_MASK		0xFFFF
 #define REG_MB_RFD0_PROD_IDX		0x15E0
+#define REG_MB_RFD1_PROD_IDX           0x15E4
+#define REG_MB_RFD2_PROD_IDX           0x15E8
+#define REG_MB_RFD3_PROD_IDX           0x15EC
 
 #define REG_TPD_PRI1_PIDX               0x15F0	/* 16bit,hi-tpd producer idx */
 #define REG_TPD_PRI0_PIDX		0x15F2	/* 16bit,lo-tpd producer idx */
 #define REG_TPD_PRI1_CIDX		0x15F4	/* 16bit,hi-tpd consumer idx */
 #define REG_TPD_PRI0_CIDX		0x15F6	/* 16bit,lo-tpd consumer idx */
+#define REG_TPD_PRI3_PIDX              0x1F18
+#define REG_TPD_PRI2_PIDX              0x1F1A
+#define REG_TPD_PRI3_CIDX              0x1F1C
+#define REG_TPD_PRI2_CIDX              0x1F1E
+
 
 #define REG_MB_RFD01_CONS_IDX		0x15F8
 #define MB_RFD0_CONS_IDX_MASK		0x0000FFFF
 #define MB_RFD1_CONS_IDX_MASK		0xFFFF0000
+#define REG_MB_RFD23_CONS_IDX          0x15FC
+#define MB_RFD2_CONS_IDX_MASK          0x0000FFFF
+#define MB_RFD3_CONS_IDX_MASK          0xFFFF0000
 
 /* Interrupt Status Register */
 #define REG_ISR    			0x1600
@@ -687,7 +707,7 @@ void atl1c_post_phy_linkchg(struct atl1c_hw *hw, u16 link_speed);
 /* GPHY low power state interrupt */
 #define ISR_GPHY_LPW           		0x00002000
 #define ISR_TXQ_TO_RST			0x00004000
-#define ISR_TX_PKT			0x00008000
+#define ISR_TX_PKT_0                   0x00008000
 #define ISR_RX_PKT_0			0x00010000
 #define ISR_RX_PKT_1			0x00020000
 #define ISR_RX_PKT_2			0x00040000
@@ -699,6 +719,9 @@ void atl1c_post_phy_linkchg(struct atl1c_hw *hw, u16 link_speed);
 #define ISR_NFERR_DETECTED		0x01000000
 #define ISR_CERR_DETECTED		0x02000000
 #define ISR_PHY_LINKDOWN		0x04000000
+#define ISR_TX_PKT_1                   0x10000000
+#define ISR_TX_PKT_2                   0x20000000
+#define ISR_TX_PKT_3                   0x40000000
 #define ISR_DIS_INT			0x80000000
 
 /* Interrupt Mask Register */
@@ -713,11 +736,15 @@ void atl1c_post_phy_linkchg(struct atl1c_hw *hw, u16 link_speed);
 		ISR_TXQ_TO_RST  |\
 		ISR_DMAW_TO_RST	|\
 		ISR_GPHY	|\
-		ISR_TX_PKT	|\
-		ISR_RX_PKT_0	|\
 		ISR_GPHY_LPW    |\
 		ISR_PHY_LINKDOWN)
 
+#define ISR_TX_PKT     (			\
+	ISR_TX_PKT_0    |			\
+	ISR_TX_PKT_1    |			\
+	ISR_TX_PKT_2    |			\
+	ISR_TX_PKT_3)
+
 #define ISR_RX_PKT 	(\
 	ISR_RX_PKT_0    |\
 	ISR_RX_PKT_1    |\
@@ -771,6 +798,7 @@ void atl1c_post_phy_linkchg(struct atl1c_hw *hw, u16 link_speed);
 #define REG_MT_VERSION			0x1F0C
 
 #define MT_MAGIC			0xaabb1234
+#define MT_MODE_4Q			BIT(0)
 
 #define L1D_MPW_PHYID1			0xD01C  /* V7 */
 #define L1D_MPW_PHYID2			0xD01D  /* V1-V6 */
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 79984735a2fd..1c6246a5dc22 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -36,6 +36,40 @@ MODULE_AUTHOR("Qualcomm Atheros Inc.");
 MODULE_DESCRIPTION("Qualcomm Atheros 100/1000M Ethernet Network Driver");
 MODULE_LICENSE("GPL");
 
+struct atl1c_qregs {
+	u16 tpd_addr_lo;
+	u16 tpd_prod;
+	u16 tpd_cons;
+	u16 rfd_addr_lo;
+	u16 rrd_addr_lo;
+	u16 rfd_prod;
+	u32 tx_isr;
+	u32 rx_isr;
+};
+
+static struct atl1c_qregs atl1c_qregs[AT_MAX_TRANSMIT_QUEUE] = {
+	{
+		REG_TPD_PRI0_ADDR_LO, REG_TPD_PRI0_PIDX, REG_TPD_PRI0_CIDX,
+		REG_RFD0_HEAD_ADDR_LO, REG_RRD0_HEAD_ADDR_LO,
+		REG_MB_RFD0_PROD_IDX, ISR_TX_PKT_0, ISR_RX_PKT_0
+	},
+	{
+		REG_TPD_PRI1_ADDR_LO, REG_TPD_PRI1_PIDX, REG_TPD_PRI1_CIDX,
+		REG_RFD1_HEAD_ADDR_LO, REG_RRD1_HEAD_ADDR_LO,
+		REG_MB_RFD1_PROD_IDX, ISR_TX_PKT_1, ISR_RX_PKT_1
+	},
+	{
+		REG_TPD_PRI2_ADDR_LO, REG_TPD_PRI2_PIDX, REG_TPD_PRI2_CIDX,
+		REG_RFD2_HEAD_ADDR_LO, REG_RRD2_HEAD_ADDR_LO,
+		REG_MB_RFD2_PROD_IDX, ISR_TX_PKT_2, ISR_RX_PKT_2
+	},
+	{
+		REG_TPD_PRI3_ADDR_LO, REG_TPD_PRI3_PIDX, REG_TPD_PRI3_CIDX,
+		REG_RFD3_HEAD_ADDR_LO, REG_RRD3_HEAD_ADDR_LO,
+		REG_MB_RFD3_PROD_IDX, ISR_TX_PKT_3, ISR_RX_PKT_3
+	},
+};
+
 static int atl1c_stop_mac(struct atl1c_hw *hw);
 static void atl1c_disable_l0s_l1(struct atl1c_hw *hw);
 static void atl1c_set_aspm(struct atl1c_hw *hw, u16 link_speed);
@@ -45,7 +79,8 @@ static void atl1c_down(struct atl1c_adapter *adapter);
 static int atl1c_reset_mac(struct atl1c_hw *hw);
 static void atl1c_reset_dma_ring(struct atl1c_adapter *adapter);
 static int atl1c_configure(struct atl1c_adapter *adapter);
-static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode);
+static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue,
+				 bool napi_mode);
 
 
 static const u32 atl1c_default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -761,7 +796,7 @@ static int atl1c_sw_init(struct atl1c_adapter *adapter)
 	struct atl1c_hw *hw   = &adapter->hw;
 	struct pci_dev	*pdev = adapter->pdev;
 	u32 revision;
-
+	int i;
 
 	adapter->wol = 0;
 	device_set_wakeup_enable(&pdev->dev, false);
@@ -786,6 +821,10 @@ static int atl1c_sw_init(struct atl1c_adapter *adapter)
 	atl1c_patch_assign(hw);
 
 	hw->intr_mask = IMR_NORMAL_MASK;
+	for (i = 0; i < adapter->tx_queue_count; ++i)
+		hw->intr_mask |= atl1c_qregs[i].tx_isr;
+	for (i = 0; i < adapter->rx_queue_count; ++i)
+		hw->intr_mask |= atl1c_qregs[i].rx_isr;
 	hw->phy_configured = false;
 	hw->preamble_len = 7;
 	hw->max_frame_size = adapter->netdev->mtu;
@@ -845,12 +884,12 @@ static inline void atl1c_clean_buffer(struct pci_dev *pdev,
 /**
  * atl1c_clean_tx_ring - Free Tx-skb
  * @adapter: board private structure
- * @type: type of transmit queue
+ * @queue: idx of transmit queue
  */
 static void atl1c_clean_tx_ring(struct atl1c_adapter *adapter,
-				enum atl1c_trans_queue type)
+				u32 queue)
 {
-	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
+	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[queue];
 	struct atl1c_buffer *buffer_info;
 	struct pci_dev *pdev = adapter->pdev;
 	u16 index, ring_count;
@@ -873,11 +912,12 @@ static void atl1c_clean_tx_ring(struct atl1c_adapter *adapter,
 /**
  * atl1c_clean_rx_ring - Free rx-reservation skbs
  * @adapter: board private structure
+ * @queue: idx of transmit queue
  */
-static void atl1c_clean_rx_ring(struct atl1c_adapter *adapter)
+static void atl1c_clean_rx_ring(struct atl1c_adapter *adapter, u32 queue)
 {
-	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
-	struct atl1c_rrd_ring *rrd_ring = adapter->rrd_ring;
+	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[queue];
+	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue];
 	struct atl1c_buffer *buffer_info;
 	struct pci_dev *pdev = adapter->pdev;
 	int j;
@@ -905,21 +945,23 @@ static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter)
 	struct atl1c_buffer *buffer_info;
 	int i, j;
 
-	for (i = 0; i < AT_MAX_TRANSMIT_QUEUE; i++) {
+	for (i = 0; i < adapter->tx_queue_count; i++) {
 		tpd_ring[i].next_to_use = 0;
 		atomic_set(&tpd_ring[i].next_to_clean, 0);
 		buffer_info = tpd_ring[i].buffer_info;
 		for (j = 0; j < tpd_ring->count; j++)
 			ATL1C_SET_BUFFER_STATE(&buffer_info[i],
-					ATL1C_BUFFER_FREE);
+					       ATL1C_BUFFER_FREE);
 	}
-	rfd_ring->next_to_use = 0;
-	rfd_ring->next_to_clean = 0;
-	rrd_ring->next_to_use = 0;
-	rrd_ring->next_to_clean = 0;
-	for (j = 0; j < rfd_ring->count; j++) {
-		buffer_info = &rfd_ring->buffer_info[j];
-		ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_FREE);
+	for (i = 0; i < adapter->rx_queue_count; i++) {
+		rfd_ring[i].next_to_use = 0;
+		rfd_ring[i].next_to_clean = 0;
+		rrd_ring[i].next_to_use = 0;
+		rrd_ring[i].next_to_clean = 0;
+		for (j = 0; j < rfd_ring[i].count; j++) {
+			buffer_info = &rfd_ring[i].buffer_info[j];
+			ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_FREE);
+		}
 	}
 }
 
@@ -932,20 +974,24 @@ static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter)
 static void atl1c_free_ring_resources(struct atl1c_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
+	int i;
 
 	dma_free_coherent(&pdev->dev, adapter->ring_header.size,
 			  adapter->ring_header.desc, adapter->ring_header.dma);
 	adapter->ring_header.desc = NULL;
 
 	/* Note: just free tdp_ring.buffer_info,
-	*  it contain rfd_ring.buffer_info, do not double free */
+	 * it contain rfd_ring.buffer_info, do not double free
+	 */
 	if (adapter->tpd_ring[0].buffer_info) {
 		kfree(adapter->tpd_ring[0].buffer_info);
 		adapter->tpd_ring[0].buffer_info = NULL;
 	}
-	if (adapter->rrd_ring[0].rx_page) {
-		put_page(adapter->rrd_ring[0].rx_page);
-		adapter->rrd_ring[0].rx_page = NULL;
+	for (i = 0; i < adapter->rx_queue_count; ++i) {
+		if (adapter->rrd_ring[i].rx_page) {
+			put_page(adapter->rrd_ring[i].rx_page);
+			adapter->rrd_ring[i].rx_page = NULL;
+		}
 	}
 }
 
@@ -962,36 +1008,43 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
 	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
 	struct atl1c_rrd_ring *rrd_ring = adapter->rrd_ring;
 	struct atl1c_ring_header *ring_header = &adapter->ring_header;
+	int tqc = adapter->tx_queue_count;
+	int rqc = adapter->rx_queue_count;
 	int size;
 	int i;
 	int count = 0;
-	int rx_desc_count = 0;
 	u32 offset = 0;
 
-	rrd_ring->count = rfd_ring->count;
-	for (i = 1; i < AT_MAX_TRANSMIT_QUEUE; i++)
+	/* Even though only one tpd queue is actually used, the "high"
+	 * priority tpd queue also gets initialized
+	 */
+	if (tqc == 1)
+		tqc = 2;
+
+	for (i = 1; i < tqc; i++)
 		tpd_ring[i].count = tpd_ring[0].count;
 
-	/* 2 tpd queue, one high priority queue,
-	 * another normal priority queue */
-	size = sizeof(struct atl1c_buffer) * (tpd_ring->count * 2 +
-		rfd_ring->count);
+	size = sizeof(struct atl1c_buffer) * (tpd_ring->count * tqc +
+					      rfd_ring->count * rqc);
 	tpd_ring->buffer_info = kzalloc(size, GFP_KERNEL);
 	if (unlikely(!tpd_ring->buffer_info))
 		goto err_nomem;
 
-	for (i = 0; i < AT_MAX_TRANSMIT_QUEUE; i++) {
+	for (i = 0; i < tqc; i++) {
 		tpd_ring[i].adapter = adapter;
 		tpd_ring[i].num = i;
-		tpd_ring[i].buffer_info =
-			(tpd_ring->buffer_info + count);
+		tpd_ring[i].buffer_info = (tpd_ring->buffer_info + count);
 		count += tpd_ring[i].count;
 	}
 
-	rfd_ring->buffer_info =
-		(tpd_ring->buffer_info + count);
-	count += rfd_ring->count;
-	rx_desc_count += rfd_ring->count;
+	for (i = 0; i < rqc; i++) {
+		rrd_ring[i].adapter = adapter;
+		rrd_ring[i].num = i;
+		rrd_ring[i].count = rfd_ring[0].count;
+		rfd_ring[i].count = rfd_ring[0].count;
+		rfd_ring[i].buffer_info = (tpd_ring->buffer_info + count);
+		count += rfd_ring->count;
+	}
 
 	/*
 	 * real ring DMA buffer
@@ -999,9 +1052,9 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
 	 * additional bytes tacked onto the end.
 	 */
 	ring_header->size = size =
-		sizeof(struct atl1c_tpd_desc) * tpd_ring->count * 2 +
-		sizeof(struct atl1c_rx_free_desc) * rx_desc_count +
-		sizeof(struct atl1c_recv_ret_status) * rx_desc_count +
+		sizeof(struct atl1c_tpd_desc) * tpd_ring->count * tqc +
+		sizeof(struct atl1c_rx_free_desc) * rfd_ring->count * rqc +
+		sizeof(struct atl1c_recv_ret_status) * rfd_ring->count * rqc +
 		8 * 4;
 
 	ring_header->desc = dma_alloc_coherent(&pdev->dev, ring_header->size,
@@ -1014,27 +1067,28 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
 
 	tpd_ring[0].dma = roundup(ring_header->dma, 8);
 	offset = tpd_ring[0].dma - ring_header->dma;
-	for (i = 0; i < AT_MAX_TRANSMIT_QUEUE; i++) {
+	for (i = 0; i < tqc; i++) {
 		tpd_ring[i].dma = ring_header->dma + offset;
-		tpd_ring[i].desc = (u8 *) ring_header->desc + offset;
+		tpd_ring[i].desc = (u8 *)ring_header->desc + offset;
 		tpd_ring[i].size =
 			sizeof(struct atl1c_tpd_desc) * tpd_ring[i].count;
 		offset += roundup(tpd_ring[i].size, 8);
 	}
-	/* init RFD ring */
-	rfd_ring->dma = ring_header->dma + offset;
-	rfd_ring->desc = (u8 *) ring_header->desc + offset;
-	rfd_ring->size = sizeof(struct atl1c_rx_free_desc) * rfd_ring->count;
-	offset += roundup(rfd_ring->size, 8);
+	for (i = 0; i < rqc; i++) {
+		/* init RFD ring */
+		rfd_ring[i].dma = ring_header->dma + offset;
+		rfd_ring[i].desc = (u8 *)ring_header->desc + offset;
+		rfd_ring[i].size = sizeof(struct atl1c_rx_free_desc) *
+			rfd_ring[i].count;
+		offset += roundup(rfd_ring[i].size, 8);
 
-	/* init RRD ring */
-	rrd_ring->adapter = adapter;
-	rrd_ring->num = 0;
-	rrd_ring->dma = ring_header->dma + offset;
-	rrd_ring->desc = (u8 *) ring_header->desc + offset;
-	rrd_ring->size = sizeof(struct atl1c_recv_ret_status) *
-		rrd_ring->count;
-	offset += roundup(rrd_ring->size, 8);
+		/* init RRD ring */
+		rrd_ring[i].dma = ring_header->dma + offset;
+		rrd_ring[i].desc = (u8 *)ring_header->desc + offset;
+		rrd_ring[i].size = sizeof(struct atl1c_recv_ret_status) *
+			rrd_ring[i].count;
+		offset += roundup(rrd_ring[i].size, 8);
+	}
 
 	return 0;
 
@@ -1049,27 +1103,31 @@ static void atl1c_configure_des_ring(struct atl1c_adapter *adapter)
 	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
 	struct atl1c_rrd_ring *rrd_ring = adapter->rrd_ring;
 	struct atl1c_tpd_ring *tpd_ring = adapter->tpd_ring;
+	int i;
+	int tx_queue_count = adapter->tx_queue_count;
+
+	if (tx_queue_count == 1)
+		tx_queue_count = 2;
 
 	/* TPD */
 	AT_WRITE_REG(hw, REG_TX_BASE_ADDR_HI,
-			(u32)((tpd_ring[atl1c_trans_normal].dma &
-				AT_DMA_HI_ADDR_MASK) >> 32));
+		     (u32)((tpd_ring[0].dma & AT_DMA_HI_ADDR_MASK) >> 32));
 	/* just enable normal priority TX queue */
-	AT_WRITE_REG(hw, REG_TPD_PRI0_ADDR_LO,
-			(u32)(tpd_ring[atl1c_trans_normal].dma &
-				AT_DMA_LO_ADDR_MASK));
-	AT_WRITE_REG(hw, REG_TPD_PRI1_ADDR_LO,
-			(u32)(tpd_ring[atl1c_trans_high].dma &
-				AT_DMA_LO_ADDR_MASK));
+	for (i = 0; i < tx_queue_count; i++) {
+		AT_WRITE_REG(hw, atl1c_qregs[i].tpd_addr_lo,
+			     (u32)(tpd_ring[i].dma & AT_DMA_LO_ADDR_MASK));
+	}
 	AT_WRITE_REG(hw, REG_TPD_RING_SIZE,
 			(u32)(tpd_ring[0].count & TPD_RING_SIZE_MASK));
 
 
 	/* RFD */
 	AT_WRITE_REG(hw, REG_RX_BASE_ADDR_HI,
-			(u32)((rfd_ring->dma & AT_DMA_HI_ADDR_MASK) >> 32));
-	AT_WRITE_REG(hw, REG_RFD0_HEAD_ADDR_LO,
-			(u32)(rfd_ring->dma & AT_DMA_LO_ADDR_MASK));
+		     (u32)((rfd_ring->dma & AT_DMA_HI_ADDR_MASK) >> 32));
+	for (i = 0; i < adapter->rx_queue_count; i++) {
+		AT_WRITE_REG(hw, atl1c_qregs[i].rfd_addr_lo,
+			     (u32)(rfd_ring[i].dma & AT_DMA_LO_ADDR_MASK));
+	}
 
 	AT_WRITE_REG(hw, REG_RFD_RING_SIZE,
 			rfd_ring->count & RFD_RING_SIZE_MASK);
@@ -1077,8 +1135,10 @@ static void atl1c_configure_des_ring(struct atl1c_adapter *adapter)
 			adapter->rx_buffer_len & RX_BUF_SIZE_MASK);
 
 	/* RRD */
-	AT_WRITE_REG(hw, REG_RRD0_HEAD_ADDR_LO,
-			(u32)(rrd_ring->dma & AT_DMA_LO_ADDR_MASK));
+	for (i = 0; i < adapter->rx_queue_count; i++) {
+		AT_WRITE_REG(hw, atl1c_qregs[i].rrd_addr_lo,
+			     (u32)(rrd_ring[i].dma & AT_DMA_LO_ADDR_MASK));
+	}
 	AT_WRITE_REG(hw, REG_RRD_RING_SIZE,
 			(rrd_ring->count & RRD_RING_SIZE_MASK));
 
@@ -1431,14 +1491,28 @@ static int atl1c_configure(struct atl1c_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	int num;
+	int i;
+
+	if (adapter->hw.nic_type == athr_mt) {
+		u32 mode;
+
+		AT_READ_REG(&adapter->hw, REG_MT_MODE, &mode);
+		if (adapter->rx_queue_count == 4)
+			mode |= MT_MODE_4Q;
+		else
+			mode &= ~MT_MODE_4Q;
+		AT_WRITE_REG(&adapter->hw, REG_MT_MODE, mode);
+	}
 
 	atl1c_init_ring_ptrs(adapter);
 	atl1c_set_multi(netdev);
 	atl1c_restore_vlan(adapter);
 
-	num = atl1c_alloc_rx_buffer(adapter, false);
-	if (unlikely(num == 0))
-		return -ENOMEM;
+	for (i = 0; i < adapter->rx_queue_count; ++i) {
+		num = atl1c_alloc_rx_buffer(adapter, i, false);
+		if (unlikely(num == 0))
+			return -ENOMEM;
+	}
 
 	if (atl1c_configure_mac(adapter))
 		return -EIO;
@@ -1537,6 +1611,8 @@ static int atl1c_clean_tx(struct napi_struct *napi, int budget)
 	struct atl1c_tpd_ring *tpd_ring =
 		container_of(napi, struct atl1c_tpd_ring, napi);
 	struct atl1c_adapter *adapter = tpd_ring->adapter;
+	struct netdev_queue *txq =
+		netdev_get_tx_queue(napi->dev, tpd_ring->num);
 	struct atl1c_buffer *buffer_info;
 	struct pci_dev *pdev = adapter->pdev;
 	u16 next_to_clean = atomic_read(&tpd_ring->next_to_clean);
@@ -1544,7 +1620,8 @@ static int atl1c_clean_tx(struct napi_struct *napi, int budget)
 	unsigned int total_bytes = 0, total_packets = 0;
 	unsigned long flags;
 
-	AT_READ_REGW(&adapter->hw, REG_TPD_PRI0_CIDX, &hw_next_to_clean);
+	AT_READ_REGW(&adapter->hw, atl1c_qregs[tpd_ring->num].tpd_cons,
+		     &hw_next_to_clean);
 
 	while (next_to_clean != hw_next_to_clean) {
 		buffer_info = &tpd_ring->buffer_info[next_to_clean];
@@ -1558,17 +1635,15 @@ static int atl1c_clean_tx(struct napi_struct *napi, int budget)
 		atomic_set(&tpd_ring->next_to_clean, next_to_clean);
 	}
 
-	netdev_completed_queue(adapter->netdev, total_packets, total_bytes);
+	netdev_tx_completed_queue(txq, total_packets, total_bytes);
 
-	if (netif_queue_stopped(adapter->netdev) &&
-			netif_carrier_ok(adapter->netdev)) {
-		netif_wake_queue(adapter->netdev);
-	}
+	if (netif_tx_queue_stopped(txq) && netif_carrier_ok(adapter->netdev))
+		netif_tx_wake_queue(txq);
 
 	if (total_packets < budget) {
 		napi_complete_done(napi, total_packets);
 		spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags);
-		adapter->hw.intr_mask |= ISR_TX_PKT;
+		adapter->hw.intr_mask |= atl1c_qregs[tpd_ring->num].tx_isr;
 		AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask);
 		spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags);
 		return total_packets;
@@ -1576,6 +1651,38 @@ static int atl1c_clean_tx(struct napi_struct *napi, int budget)
 	return budget;
 }
 
+static void atl1c_intr_rx_tx(struct atl1c_adapter *adapter, u32 status)
+{
+	struct atl1c_hw *hw = &adapter->hw;
+	u32 intr_mask;
+	int i;
+
+	spin_lock(&hw->intr_mask_lock);
+	intr_mask = hw->intr_mask;
+	for (i = 0; i < adapter->rx_queue_count; ++i) {
+		if (!(status & atl1c_qregs[i].rx_isr))
+			continue;
+		if (napi_schedule_prep(&adapter->rrd_ring[i].napi)) {
+			intr_mask &= ~atl1c_qregs[i].rx_isr;
+			__napi_schedule(&adapter->rrd_ring[i].napi);
+		}
+	}
+	for (i = 0; i < adapter->tx_queue_count; ++i) {
+		if (!(status & atl1c_qregs[i].tx_isr))
+			continue;
+		if (napi_schedule_prep(&adapter->tpd_ring[i].napi)) {
+			intr_mask &= ~atl1c_qregs[i].tx_isr;
+			__napi_schedule(&adapter->tpd_ring[i].napi);
+		}
+	}
+
+	if (hw->intr_mask != intr_mask) {
+		hw->intr_mask = intr_mask;
+		AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
+	}
+	spin_unlock(&hw->intr_mask_lock);
+}
+
 /**
  * atl1c_intr - Interrupt Handler
  * @irq: interrupt number
@@ -1606,24 +1713,8 @@ static irqreturn_t atl1c_intr(int irq, void *data)
 			atl1c_clear_phy_int(adapter);
 		/* Ack ISR */
 		AT_WRITE_REG(hw, REG_ISR, status | ISR_DIS_INT);
-		if (status & ISR_RX_PKT) {
-			if (napi_schedule_prep(&adapter->rrd_ring[0].napi)) {
-				spin_lock(&hw->intr_mask_lock);
-				hw->intr_mask &= ~ISR_RX_PKT;
-				AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
-				spin_unlock(&hw->intr_mask_lock);
-				__napi_schedule(&adapter->rrd_ring[0].napi);
-			}
-		}
-		if (status & ISR_TX_PKT) {
-			if (napi_schedule_prep(&adapter->tpd_ring[0].napi)) {
-				spin_lock(&hw->intr_mask_lock);
-				hw->intr_mask &= ~ISR_TX_PKT;
-				AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
-				spin_unlock(&hw->intr_mask_lock);
-				__napi_schedule(&adapter->tpd_ring[0].napi);
-			}
-		}
+		if (status & (ISR_RX_PKT | ISR_TX_PKT))
+			atl1c_intr_rx_tx(adapter, status);
 
 		handled = IRQ_HANDLED;
 		/* check if PCIE PHY Link down */
@@ -1674,9 +1765,9 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
 }
 
 static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
-				       bool napi_mode)
+				       u32 queue, bool napi_mode)
 {
-	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[0];
+	struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue];
 	struct sk_buff *skb;
 	struct page *page;
 
@@ -1711,9 +1802,10 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
 	return skb;
 }
 
-static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode)
+static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue,
+				 bool napi_mode)
 {
-	struct atl1c_rfd_ring *rfd_ring = adapter->rfd_ring;
+	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[queue];
 	struct pci_dev *pdev = adapter->pdev;
 	struct atl1c_buffer *buffer_info, *next_info;
 	struct sk_buff *skb;
@@ -1732,7 +1824,7 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode)
 	while (next_info->flags & ATL1C_BUFFER_FREE) {
 		rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use);
 
-		skb = atl1c_alloc_skb(adapter, napi_mode);
+		skb = atl1c_alloc_skb(adapter, queue, napi_mode);
 		if (unlikely(!skb)) {
 			if (netif_msg_rx_err(adapter))
 				dev_warn(&pdev->dev, "alloc rx buffer failed\n");
@@ -1774,8 +1866,8 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode)
 		/* TODO: update mailbox here */
 		wmb();
 		rfd_ring->next_to_use = rfd_next_to_use;
-		AT_WRITE_REG(&adapter->hw, REG_MB_RFD0_PROD_IDX,
-			rfd_ring->next_to_use & MB_RFDX_PROD_IDX_MASK);
+		AT_WRITE_REG(&adapter->hw, atl1c_qregs[queue].rfd_prod,
+			     rfd_ring->next_to_use & MB_RFDX_PROD_IDX_MASK);
 	}
 
 	return num_alloc;
@@ -1824,7 +1916,6 @@ static int atl1c_clean_rx(struct napi_struct *napi, int budget)
 		container_of(napi, struct atl1c_rrd_ring, napi);
 	struct atl1c_adapter *adapter = rrd_ring->adapter;
 	u16 rfd_num, rfd_index;
-	u16 count = 0;
 	u16 length;
 	struct pci_dev *pdev = adapter->pdev;
 	struct net_device *netdev  = adapter->netdev;
@@ -1897,16 +1988,15 @@ rrs_checked:
 		napi_gro_receive(napi, skb);
 
 		work_done++;
-		count++;
 	}
-	if (count)
-		atl1c_alloc_rx_buffer(adapter, true);
+	if (work_done)
+		atl1c_alloc_rx_buffer(adapter, rrd_ring->num, true);
 
 	if (work_done < budget) {
 quit_polling:
 		napi_complete_done(napi, work_done);
 		spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags);
-		adapter->hw.intr_mask |= ISR_RX_PKT;
+		adapter->hw.intr_mask |= atl1c_qregs[rrd_ring->num].rx_isr;
 		AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask);
 		spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags);
 	}
@@ -1930,9 +2020,9 @@ static void atl1c_netpoll(struct net_device *netdev)
 }
 #endif
 
-static inline u16 atl1c_tpd_avail(struct atl1c_adapter *adapter, enum atl1c_trans_queue type)
+static inline u16 atl1c_tpd_avail(struct atl1c_adapter *adapter, u32 queue)
 {
-	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
+	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[queue];
 	u16 next_to_use = 0;
 	u16 next_to_clean = 0;
 
@@ -1950,9 +2040,9 @@ static inline u16 atl1c_tpd_avail(struct atl1c_adapter *adapter, enum atl1c_tran
  * there is enough tpd to use
  */
 static struct atl1c_tpd_desc *atl1c_get_tpd(struct atl1c_adapter *adapter,
-	enum atl1c_trans_queue type)
+					    u32 queue)
 {
-	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
+	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[queue];
 	struct atl1c_tpd_desc *tpd_desc;
 	u16 next_to_use = 0;
 
@@ -1994,7 +2084,7 @@ static u16 atl1c_cal_tpd_req(const struct sk_buff *skb)
 static int atl1c_tso_csum(struct atl1c_adapter *adapter,
 			  struct sk_buff *skb,
 			  struct atl1c_tpd_desc **tpd,
-			  enum atl1c_trans_queue type)
+			  u32 queue)
 {
 	struct pci_dev *pdev = adapter->pdev;
 	unsigned short offload_type;
@@ -2039,7 +2129,7 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter,
 				*(struct atl1c_tpd_ext_desc **)(tpd);
 
 			memset(etpd, 0, sizeof(struct atl1c_tpd_ext_desc));
-			*tpd = atl1c_get_tpd(adapter, type);
+			*tpd = atl1c_get_tpd(adapter, queue);
 			ipv6_hdr(skb)->payload_len = 0;
 			/* check payload == 0 byte ? */
 			hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb));
@@ -2091,9 +2181,9 @@ check_sum:
 
 static void atl1c_tx_rollback(struct atl1c_adapter *adpt,
 			      struct atl1c_tpd_desc *first_tpd,
-			      enum atl1c_trans_queue type)
+			      u32 queue)
 {
-	struct atl1c_tpd_ring *tpd_ring = &adpt->tpd_ring[type];
+	struct atl1c_tpd_ring *tpd_ring = &adpt->tpd_ring[queue];
 	struct atl1c_buffer *buffer_info;
 	struct atl1c_tpd_desc *tpd;
 	u16 first_index, index;
@@ -2112,8 +2202,8 @@ static void atl1c_tx_rollback(struct atl1c_adapter *adpt,
 }
 
 static int atl1c_tx_map(struct atl1c_adapter *adapter,
-		      struct sk_buff *skb, struct atl1c_tpd_desc *tpd,
-			enum atl1c_trans_queue type)
+			struct sk_buff *skb, struct atl1c_tpd_desc *tpd,
+			u32 queue)
 {
 	struct atl1c_tpd_desc *use_tpd = NULL;
 	struct atl1c_buffer *buffer_info = NULL;
@@ -2153,7 +2243,7 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 		if (mapped_len == 0)
 			use_tpd = tpd;
 		else {
-			use_tpd = atl1c_get_tpd(adapter, type);
+			use_tpd = atl1c_get_tpd(adapter, queue);
 			memcpy(use_tpd, tpd, sizeof(struct atl1c_tpd_desc));
 		}
 		buffer_info = atl1c_get_tx_buffer(adapter, use_tpd);
@@ -2175,7 +2265,7 @@ static int atl1c_tx_map(struct atl1c_adapter *adapter,
 	for (f = 0; f < nr_frags; f++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 
-		use_tpd = atl1c_get_tpd(adapter, type);
+		use_tpd = atl1c_get_tpd(adapter, queue);
 		memcpy(use_tpd, tpd, sizeof(struct atl1c_tpd_desc));
 
 		buffer_info = atl1c_get_tx_buffer(adapter, use_tpd);
@@ -2208,23 +2298,22 @@ err_dma:
 	return -1;
 }
 
-static void atl1c_tx_queue(struct atl1c_adapter *adapter,
-			   enum atl1c_trans_queue type)
+static void atl1c_tx_queue(struct atl1c_adapter *adapter, u32 queue)
 {
-	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
-	u16 reg;
+	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[queue];
 
-	reg = type == atl1c_trans_high ? REG_TPD_PRI1_PIDX : REG_TPD_PRI0_PIDX;
-	AT_WRITE_REGW(&adapter->hw, reg, tpd_ring->next_to_use);
+	AT_WRITE_REGW(&adapter->hw, atl1c_qregs[queue].tpd_prod,
+		      tpd_ring->next_to_use);
 }
 
 static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 					  struct net_device *netdev)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
-	u16 tpd_req;
+	u32 queue = skb_get_queue_mapping(skb);
+	struct netdev_queue *txq = netdev_get_tx_queue(netdev, queue);
 	struct atl1c_tpd_desc *tpd;
-	enum atl1c_trans_queue type = atl1c_trans_normal;
+	u16 tpd_req;
 
 	if (test_bit(__AT_DOWN, &adapter->flags)) {
 		dev_kfree_skb_any(skb);
@@ -2233,18 +2322,18 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 
 	tpd_req = atl1c_cal_tpd_req(skb);
 
-	if (atl1c_tpd_avail(adapter, type) < tpd_req) {
+	if (atl1c_tpd_avail(adapter, queue) < tpd_req) {
 		/* no enough descriptor, just stop queue */
-		atl1c_tx_queue(adapter, type);
-		netif_stop_queue(netdev);
+		atl1c_tx_queue(adapter, queue);
+		netif_tx_stop_queue(txq);
 		return NETDEV_TX_BUSY;
 	}
 
-	tpd = atl1c_get_tpd(adapter, type);
+	tpd = atl1c_get_tpd(adapter, queue);
 
 	/* do TSO and check sum */
-	if (atl1c_tso_csum(adapter, skb, &tpd, type) != 0) {
-		atl1c_tx_queue(adapter, type);
+	if (atl1c_tso_csum(adapter, skb, &tpd, queue) != 0) {
+		atl1c_tx_queue(adapter, queue);
 		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
@@ -2262,17 +2351,17 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 	if (skb_network_offset(skb) != ETH_HLEN)
 		tpd->word1 |= 1 << TPD_ETH_TYPE_SHIFT; /* Ethernet frame */
 
-	if (atl1c_tx_map(adapter, skb, tpd, type) < 0) {
+	if (atl1c_tx_map(adapter, skb, tpd, queue) < 0) {
 		netif_info(adapter, tx_done, adapter->netdev,
 			   "tx-skb dropped due to dma error\n");
 		/* roll back tpd/buffer */
-		atl1c_tx_rollback(adapter, tpd, type);
+		atl1c_tx_rollback(adapter, tpd, queue);
 		dev_kfree_skb_any(skb);
 	} else {
 		bool more = netdev_xmit_more();
 
-		if (__netdev_sent_queue(adapter->netdev, skb->len, more))
-			atl1c_tx_queue(adapter, type);
+		if (__netdev_tx_sent_queue(txq, skb->len, more))
+			atl1c_tx_queue(adapter, queue);
 	}
 
 	return NETDEV_TX_OK;
@@ -2326,16 +2415,19 @@ static int atl1c_request_irq(struct atl1c_adapter *adapter)
 
 static void atl1c_reset_dma_ring(struct atl1c_adapter *adapter)
 {
+	int i;
 	/* release tx-pending skbs and reset tx/rx ring index */
-	atl1c_clean_tx_ring(adapter, atl1c_trans_normal);
-	atl1c_clean_tx_ring(adapter, atl1c_trans_high);
-	atl1c_clean_rx_ring(adapter);
+	for (i = 0; i < adapter->tx_queue_count; ++i)
+		atl1c_clean_tx_ring(adapter, i);
+	for (i = 0; i < adapter->rx_queue_count; ++i)
+		atl1c_clean_rx_ring(adapter, i);
 }
 
 static int atl1c_up(struct atl1c_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	int err;
+	int i;
 
 	netif_carrier_off(netdev);
 
@@ -2349,20 +2441,24 @@ static int atl1c_up(struct atl1c_adapter *adapter)
 
 	atl1c_check_link_status(adapter);
 	clear_bit(__AT_DOWN, &adapter->flags);
-	napi_enable(&adapter->rrd_ring[0].napi);
-	napi_enable(&adapter->tpd_ring[0].napi);
+	for (i = 0; i < adapter->tx_queue_count; ++i)
+		napi_enable(&adapter->tpd_ring[i].napi);
+	for (i = 0; i < adapter->rx_queue_count; ++i)
+		napi_enable(&adapter->rrd_ring[i].napi);
 	atl1c_irq_enable(adapter);
 	netif_start_queue(netdev);
 	return err;
 
 err_up:
-	atl1c_clean_rx_ring(adapter);
+	for (i = 0; i < adapter->rx_queue_count; ++i)
+		atl1c_clean_rx_ring(adapter, i);
 	return err;
 }
 
 static void atl1c_down(struct atl1c_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	int i;
 
 	atl1c_del_timer(adapter);
 	adapter->work_event = 0; /* clear all event */
@@ -2370,8 +2466,10 @@ static void atl1c_down(struct atl1c_adapter *adapter)
 	 * reschedule our watchdog timer */
 	set_bit(__AT_DOWN, &adapter->flags);
 	netif_carrier_off(netdev);
-	napi_disable(&adapter->rrd_ring[0].napi);
-	napi_disable(&adapter->tpd_ring[0].napi);
+	for (i = 0; i < adapter->tx_queue_count; ++i)
+		napi_disable(&adapter->tpd_ring[i].napi);
+	for (i = 0; i < adapter->rx_queue_count; ++i)
+		napi_disable(&adapter->rrd_ring[i].napi);
 	atl1c_irq_disable(adapter);
 	atl1c_free_irq(adapter);
 	/* disable ASPM if device inactive */
@@ -2558,7 +2656,9 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	static int cards_found;
 	u8 __iomem *hw_addr;
 	enum atl1c_nic_type nic_type;
+	u32 queue_count = 1;
 	int err = 0;
+	int i;
 
 	/* enable device (incl. PCI PM wakeup and hotplug setup) */
 	err = pci_enable_device_mem(pdev);
@@ -2599,8 +2699,10 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	nic_type = atl1c_get_mac_type(pdev, hw_addr);
+	if (nic_type == athr_mt)
+		queue_count = 4;
 
-	netdev = alloc_etherdev(sizeof(struct atl1c_adapter));
+	netdev = alloc_etherdev_mq(sizeof(struct atl1c_adapter), queue_count);
 	if (netdev == NULL) {
 		err = -ENOMEM;
 		goto err_alloc_etherdev;
@@ -2619,6 +2721,8 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->hw.nic_type = nic_type;
 	adapter->msg_enable = netif_msg_init(-1, atl1c_default_msg);
 	adapter->hw.hw_addr = hw_addr;
+	adapter->tx_queue_count = queue_count;
+	adapter->rx_queue_count = queue_count;
 
 	/* init mii data */
 	adapter->mii.dev = netdev;
@@ -2627,8 +2731,12 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->mii.phy_id_mask = 0x1f;
 	adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK;
 	dev_set_threaded(netdev, true);
-	netif_napi_add(netdev, &adapter->rrd_ring[0].napi, atl1c_clean_rx, 64);
-	netif_napi_add(netdev, &adapter->tpd_ring[0].napi, atl1c_clean_tx, 64);
+	for (i = 0; i < adapter->rx_queue_count; ++i)
+		netif_napi_add(netdev, &adapter->rrd_ring[i].napi,
+			       atl1c_clean_rx, 64);
+	for (i = 0; i < adapter->tx_queue_count; ++i)
+		netif_napi_add(netdev, &adapter->tpd_ring[i].napi,
+			       atl1c_clean_tx, 64);
 	timer_setup(&adapter->phy_config_timer, atl1c_phy_config, 0);
 	/* setup the private structure */
 	err = atl1c_sw_init(adapter);

From eefa5311c3f7d6600306470585cbd8d9ffd28af4 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 27 May 2021 10:34:24 -0700
Subject: [PATCH 0620/3292] ixgbe: Fix out-bounds warning in
 ixgbe_host_interface_command()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace union with a couple of pointers in order to fix the following
out-of-bounds warning:

  CC [M]  drivers/net/ethernet/intel/ixgbe/ixgbe_common.o
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c: In function ‘ixgbe_host_interface_command’:
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c:3729:13: warning: array subscript 1 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
 3729 |   bp->u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
      |   ~~~~~~~~~~^~~~
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c:3682:7: note: while referencing ‘u32arr’
 3682 |   u32 u32arr[1];
      |       ^~~~~~

This helps with the ongoing efforts to globally enable -Warray-bounds.

Link: https://github.com/KSPP/linux/issues/109
Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://lore.kernel.org/r/20210527173424.362456-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 03ccbe6b66d2..e90b5047e695 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3678,10 +3678,8 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
 				 bool return_data)
 {
 	u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
-	union {
-		struct ixgbe_hic_hdr hdr;
-		u32 u32arr[1];
-	} *bp = buffer;
+	struct ixgbe_hic_hdr *hdr = buffer;
+	u32 *u32arr = buffer;
 	u16 buf_len, dword_len;
 	s32 status;
 	u32 bi;
@@ -3707,12 +3705,12 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
 
 	/* first pull in the header so we know the buffer length */
 	for (bi = 0; bi < dword_len; bi++) {
-		bp->u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
-		le32_to_cpus(&bp->u32arr[bi]);
+		u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+		le32_to_cpus(&u32arr[bi]);
 	}
 
 	/* If there is any thing in data position pull it in */
-	buf_len = bp->hdr.buf_len;
+	buf_len = hdr->buf_len;
 	if (!buf_len)
 		goto rel_out;
 
@@ -3727,8 +3725,8 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
 
 	/* Pull in the rest of the buffer (bi is where we left off) */
 	for (; bi <= dword_len; bi++) {
-		bp->u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
-		le32_to_cpus(&bp->u32arr[bi]);
+		u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+		le32_to_cpus(&u32arr[bi]);
 	}
 
 rel_out:

From 5c350aa11b441b32baf3bfe4018168cb8d10cef7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 28 May 2021 11:24:15 +0200
Subject: [PATCH 0621/3292] fcntl: remove unused VALID_UPGRADE_FLAGS

We currently do not maky use of this feature and should we implement
something like this in the future it's trivial to add it back.

Link: https://lore.kernel.org/r/20210528092417.3942079-2-brauner@kernel.org
Cc: Christoph Hellwig <hch@lst.de>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Suggested-by: Richard Guy Briggs <rgb@redhat.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/fcntl.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 766fcd973beb..a332e79b3207 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -12,10 +12,6 @@
 	 FASYNC	| O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
 	 O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
 
-/* List of all valid flags for the how->upgrade_mask argument: */
-#define VALID_UPGRADE_FLAGS \
-	(UPGRADE_NOWRITE | UPGRADE_NOREAD)
-
 /* List of all valid flags for the how->resolve argument: */
 #define VALID_RESOLVE_FLAGS \
 	(RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \

From cfe80306a0dd6d363934913e47c3f30d71b721e5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 28 May 2021 11:24:16 +0200
Subject: [PATCH 0622/3292] open: don't silently ignore unknown O-flags in
 openat2()

The new openat2() syscall verifies that no unknown O-flag values are
set and returns an error to userspace if they are while the older open
syscalls like open() and openat() simply ignore unknown flag values:

  #define O_FLAG_CURRENTLY_INVALID (1 << 31)
  struct open_how how = {
          .flags = O_RDONLY | O_FLAG_CURRENTLY_INVALID,
          .resolve = 0,
  };

  /* fails */
  fd = openat2(-EBADF, "/dev/null", &how, sizeof(how));

  /* succeeds */
  fd = openat(-EBADF, "/dev/null", O_RDONLY | O_FLAG_CURRENTLY_INVALID);

However, openat2() silently truncates the upper 32 bits meaning:

  #define O_FLAG_CURRENTLY_INVALID_LOWER32 (1 << 31)
  #define O_FLAG_CURRENTLY_INVALID_UPPER32 (1 << 40)

  struct open_how how_lowe32 = {
          .flags = O_RDONLY | O_FLAG_CURRENTLY_INVALID_LOWER32,
  };

  struct open_how how_upper32 = {
          .flags = O_RDONLY | O_FLAG_CURRENTLY_INVALID_UPPER32,
  };

  /* fails */
  fd = openat2(-EBADF, "/dev/null", &how_lower32, sizeof(how_lower32));

  /* succeeds */
  fd = openat2(-EBADF, "/dev/null", &how_upper32, sizeof(how_upper32));

Fix this by preventing the immediate truncation in build_open_flags().

There's a snafu here though stripping FMODE_* directly from flags would
cause the upper 32 bits to be truncated as well due to integer promotion
rules since FMODE_* is unsigned int, O_* are signed ints (yuck).

In addition, struct open_flags currently defines flags to be 32 bit
which is reasonable. If we simply were to bump it to 64 bit we would
need to change a lot of code preemptively which doesn't seem worth it.
So simply add a compile-time check verifying that all currently known
O_* flags are within the 32 bit range and fail to build if they aren't
anymore.

This change shouldn't regress old open syscalls since they silently
truncate any unknown values anyway. It is a tiny semantic change for
openat2() but it is very unlikely people pass ing > 32 bit unknown flags
and the syscall is relatively new too.

Link: https://lore.kernel.org/r/20210528092417.3942079-3-brauner@kernel.org
Cc: Christoph Hellwig <hch@lst.de>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Reported-by: Richard Guy Briggs <rgb@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 fs/open.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index e53af13b5835..53bc0573c0ec 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1002,12 +1002,20 @@ inline struct open_how build_open_how(int flags, umode_t mode)
 
 inline int build_open_flags(const struct open_how *how, struct open_flags *op)
 {
-	int flags = how->flags;
+	u64 flags = how->flags;
+	u64 strip = FMODE_NONOTIFY | O_CLOEXEC;
 	int lookup_flags = 0;
 	int acc_mode = ACC_MODE(flags);
 
-	/* Must never be set by userspace */
-	flags &= ~(FMODE_NONOTIFY | O_CLOEXEC);
+	BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
+			 "struct open_flags doesn't yet handle flags > 32 bits");
+
+	/*
+	 * Strip flags that either shouldn't be set by userspace like
+	 * FMODE_NONOTIFY or that aren't relevant in determining struct
+	 * open_flags like O_CLOEXEC.
+	 */
+	flags &= ~strip;
 
 	/*
 	 * Older syscalls implicitly clear all of the invalid flags or argument

From 15845cbcd12a571869c6703892427f9e5839d5fb Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 28 May 2021 11:24:17 +0200
Subject: [PATCH 0623/3292] test: add openat2() test for invalid upper 32 bit
 flag value

Test that openat2() rejects unknown flags in the upper 32 bit range.

Link: https://lore.kernel.org/r/20210528092417.3942079-4-brauner@kernel.org
Cc: Richard Guy Briggs <rgb@redhat.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: linux-fsdevel@vger.kernel.org
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/openat2/openat2_test.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 381d874cce99..d7ec1e7da0d0 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -155,7 +155,7 @@ struct flag_test {
 	int err;
 };
 
-#define NUM_OPENAT2_FLAG_TESTS 24
+#define NUM_OPENAT2_FLAG_TESTS 25
 
 void test_openat2_flags(void)
 {
@@ -229,6 +229,11 @@ void test_openat2_flags(void)
 		{ .name = "invalid how.resolve and O_PATH",
 		  .how.flags = O_PATH,
 		  .how.resolve = 0x1337, .err = -EINVAL },
+
+		/* currently unknown upper 32 bit rejected. */
+		{ .name = "currently unknown bit (1 << 63)",
+		  .how.flags = O_RDONLY | (1ULL << 63),
+		  .how.resolve = 0, .err = -EINVAL },
 	};
 
 	BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS);

From 5e6b8a50a7cec5686ee2c4bda1d49899c79a7eae Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 26 May 2021 22:38:05 +0800
Subject: [PATCH 0624/3292] cred: add missing return error code when
 set_cred_ucounts() failed

If set_cred_ucounts() failed, we need return the error code.

Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lkml.kernel.org/r/20210526143805.2549649-1-yangyingliang@huawei.com
Reviewed-by: Alexey Gladkov <legion@kernel.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/cred.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/cred.c b/kernel/cred.c
index dcfa30b337c5..5a1d9702658e 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -372,7 +372,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		ret = create_user_ns(new);
 		if (ret < 0)
 			goto error_put;
-		if (set_cred_ucounts(new) < 0)
+		ret = set_cred_ucounts(new);
+		if (ret < 0)
 			goto error_put;
 	}
 

From 133dc203d77dff617d9c4673973ef3859be2c476 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Tue, 4 May 2021 17:54:06 +0200
Subject: [PATCH 0625/3292] netfilter: nft_exthdr: Support SCTP chunks

Chunks are SCTP header extensions similar in implementation to IPv6
extension headers or TCP options. Reusing exthdr expression to find and
extract field values from them is therefore pretty straightforward.

For now, this supports extracting data from chunks at a fixed offset
(and length) only - chunks themselves are an extensible data structure;
in order to make all fields available, a nested extension search is
needed.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 +
 net/netfilter/nft_exthdr.c               | 51 ++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 1fb4ca18ffbb..19715e2679d1 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -813,11 +813,13 @@ enum nft_exthdr_flags {
  * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers
  * @NFT_EXTHDR_OP_TCP: match against tcp options
  * @NFT_EXTHDR_OP_IPV4: match against ipv4 options
+ * @NFT_EXTHDR_OP_SCTP: match against sctp chunks
  */
 enum nft_exthdr_op {
 	NFT_EXTHDR_OP_IPV6,
 	NFT_EXTHDR_OP_TCPOPT,
 	NFT_EXTHDR_OP_IPV4,
+	NFT_EXTHDR_OP_SCTP,
 	__NFT_EXTHDR_OP_MAX
 };
 #define NFT_EXTHDR_OP_MAX	(__NFT_EXTHDR_OP_MAX - 1)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index f64f0017e9a5..4d0b8e1c40c0 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -10,8 +10,10 @@
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
+#include <linux/sctp.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/sctp/sctp.h>
 #include <net/tcp.h>
 
 struct nft_exthdr {
@@ -300,6 +302,43 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
 	}
 }
 
+static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	unsigned int offset = pkt->xt.thoff + sizeof(struct sctphdr);
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	const struct sctp_chunkhdr *sch;
+	struct sctp_chunkhdr _sch;
+
+	do {
+		sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
+		if (!sch || !sch->length)
+			break;
+
+		if (sch->type == priv->type) {
+			if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+				nft_reg_store8(dest, true);
+				return;
+			}
+			if (priv->offset + priv->len > ntohs(sch->length) ||
+			    offset + ntohs(sch->length) > pkt->skb->len)
+				break;
+
+			dest[priv->len / NFT_REG32_SIZE] = 0;
+			memcpy(dest, (char *)sch + priv->offset, priv->len);
+			return;
+		}
+		offset += SCTP_PAD4(ntohs(sch->length));
+	} while (offset < pkt->skb->len);
+
+	if (priv->flags & NFT_EXTHDR_F_PRESENT)
+		nft_reg_store8(dest, false);
+	else
+		regs->verdict.code = NFT_BREAK;
+}
+
 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
 	[NFTA_EXTHDR_DREG]		= { .type = NLA_U32 },
 	[NFTA_EXTHDR_TYPE]		= { .type = NLA_U8 },
@@ -499,6 +538,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
 	.dump		= nft_exthdr_dump_set,
 };
 
+static const struct nft_expr_ops nft_exthdr_sctp_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_sctp_eval,
+	.init		= nft_exthdr_init,
+	.dump		= nft_exthdr_dump,
+};
+
 static const struct nft_expr_ops *
 nft_exthdr_select_ops(const struct nft_ctx *ctx,
 		      const struct nlattr * const tb[])
@@ -529,6 +576,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
 				return &nft_exthdr_ipv4_ops;
 		}
 		break;
+	case NFT_EXTHDR_OP_SCTP:
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_sctp_ops;
+		break;
 	}
 
 	return ERR_PTR(-EOPNOTSUPP);

From a58db7ad80e89dab014bd2d769c233eeca1bf519 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Mon, 10 May 2021 07:58:52 +0200
Subject: [PATCH 0626/3292] netfilter: nft_set_pipapo_avx2: Skip LDMXCSR, we
 don't need a valid MXCSR state

We don't need a valid MXCSR state for the lookup routines, none of
the instructions we use rely on or affect any bit in the MXCSR
register.

Instead of calling kernel_fpu_begin(), we can pass 0 as mask to
kernel_fpu_begin_mask() and spare one LDMXCSR instruction.

Commit 49200d17d27d ("x86/fpu/64: Don't FNINIT in kernel_fpu_begin()")
already speeds up lookups considerably, and by dropping the MCXSR
initialisation we can now get a much smaller, but measurable, increase
in matching rates.

The table below reports matching rates and a wild approximation of
clock cycles needed for a match in a "port,net" test with 10 entries
from selftests/netfilter/nft_concat_range.sh, limited to the first
field, i.e. the port (with nft_set_rbtree initialisation skipped), run
on a single AMD Epyc 7351 thread (2.9GHz, 512 KiB L1D$, 8 MiB L2$).

The (very rough) estimation of clock cycles is obtained by simply
dividing frequency by matching rate. The "cycles spared" column refers
to the difference in cycles compared to the previous row, and the rate
increase also refers to the previous row. Results are averages of six
runs.

Merely for context, I'm also reporting packet rates obtained by
skipping kernel_fpu_begin() and kernel_fpu_end() altogether (which
shows a very limited impact now), as well as skipping the whole lookup
function, compared to simply counting and dropping all packets using
the netdev hook drop (see nft_concat_range.sh for details). This
workload also includes packet generation with pktgen and the receive
path of veth.

                                      |matching|  est.  | cycles |  rate  |
                                      |  rate  | cycles | spared |increase|
                                      | (Mpps) |        |        |        |
--------------------------------------|--------|--------|--------|--------|
FNINIT, LDMXCSR (before 49200d17d27d) |  5.245 |    553 |      - |      - |
LDMXCSR only (with 49200d17d27d)      |  6.347 |    457 |     96 |  21.0% |
Without LDMXCSR (this patch)          |  6.461 |    449 |      8 |   1.8% |
-------- for reference only: ---------|--------|--------|--------|--------|
Without kernel_fpu_begin()            |  6.513 |    445 |      4 |   0.8% |
Without actual matching (return true) |  7.649 |    379 |     66 |  17.4% |
Without lookup operation (netdev drop)| 10.320 |    281 |     98 |  34.9% |

The clock cycles spared by avoiding LDMXCSR appear to be in line with CPI
and latency indicated in the manuals of comparable architectures: Intel
Skylake (CPI: 1, latency: 7) and AMD 12h (latency: 12) -- I couldn't find
this information for AMD 17h.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_pipapo_avx2.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index eabdb8d552ee..1c2620923a61 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1136,8 +1136,13 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
 
 	m = rcu_dereference(priv->match);
 
-	/* This also protects access to all data related to scratch maps */
-	kernel_fpu_begin();
+	/* This also protects access to all data related to scratch maps.
+	 *
+	 * Note that we don't need a valid MXCSR state for any of the
+	 * operations we use here, so pass 0 as mask and spare a LDMXCSR
+	 * instruction.
+	 */
+	kernel_fpu_begin_mask(0);
 
 	scratch = *raw_cpu_ptr(m->scratch_aligned);
 	if (unlikely(!scratch)) {

From 0974cff3eb66764cc86b60f1071958acc432a4e8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 May 2021 22:29:55 +0200
Subject: [PATCH 0627/3292] netfilter: add and use nft_set_do_lookup helper

Followup patch will add a CONFIG_RETPOLINE wrapper to avoid
the ops->lookup() indirection cost for retpoline builds.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 7 +++++++
 net/netfilter/nft_lookup.c             | 4 ++--
 net/netfilter/nft_objref.c             | 4 ++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index fd10a7862fdc..5eb699454490 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -88,6 +88,13 @@ extern const struct nft_set_type nft_set_bitmap_type;
 extern const struct nft_set_type nft_set_pipapo_type;
 extern const struct nft_set_type nft_set_pipapo_avx2_type;
 
+static inline bool
+nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+		  const u32 *key, const struct nft_set_ext **ext)
+{
+	return set->ops->lookup(net, set, key, ext);
+}
+
 struct nft_expr;
 struct nft_regs;
 struct nft_pktinfo;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index a479f8a1270c..1a8581879af5 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -33,8 +33,8 @@ void nft_lookup_eval(const struct nft_expr *expr,
 	const struct net *net = nft_net(pkt);
 	bool found;
 
-	found = set->ops->lookup(net, set, &regs->data[priv->sreg], &ext) ^
-				 priv->invert;
+	found =	nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext) ^
+				  priv->invert;
 	if (!found) {
 		ext = nft_set_catchall_lookup(net, set);
 		if (!ext) {
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 7e47edee88ee..94b2327e71dc 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -9,7 +9,7 @@
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
 
 #define nft_objref_priv(expr)	*((struct nft_object **)nft_expr_priv(expr))
 
@@ -110,7 +110,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
 	struct nft_object *obj;
 	bool found;
 
-	found = set->ops->lookup(net, set, &regs->data[priv->sreg], &ext);
+	found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext);
 	if (!found) {
 		ext = nft_set_catchall_lookup(net, set);
 		if (!ext) {

From fc8c262e0eb5aa248af5051377e4ff3348841ac5 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 26 May 2021 08:24:57 -0700
Subject: [PATCH 0628/3292] bpf, docs: Add llvm_reloc.rst to explain llvm bpf
 relocations

LLVM upstream commit https://reviews.llvm.org/D102712 made some changes
to bpf relocations to make them llvm linker lld friendly. The scope of
existing relocations R_BPF_64_{64,32} is narrowed and new relocations
R_BPF_64_{ABS32,ABS64,NODYLD32} are introduced.

Let us add some documentation about llvm bpf relocations so people can
understand how to resolve them properly in their respective tools.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210526152457.335210-1-yhs@fb.com
---
 Documentation/bpf/index.rst            |   1 +
 Documentation/bpf/llvm_reloc.rst       | 240 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/README.rst |  19 ++
 3 files changed, 260 insertions(+)
 create mode 100644 Documentation/bpf/llvm_reloc.rst

diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index a702f67dd45f..93e8cf12a6d4 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -84,6 +84,7 @@ Other
    :maxdepth: 1
 
    ringbuf
+   llvm_reloc
 
 .. Links:
 .. _networking-filter: ../networking/filter.rst
diff --git a/Documentation/bpf/llvm_reloc.rst b/Documentation/bpf/llvm_reloc.rst
new file mode 100644
index 000000000000..ca8957d5b671
--- /dev/null
+++ b/Documentation/bpf/llvm_reloc.rst
@@ -0,0 +1,240 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+====================
+BPF LLVM Relocations
+====================
+
+This document describes LLVM BPF backend relocation types.
+
+Relocation Record
+=================
+
+LLVM BPF backend records each relocation with the following 16-byte
+ELF structure::
+
+  typedef struct
+  {
+    Elf64_Addr    r_offset;  // Offset from the beginning of section.
+    Elf64_Xword   r_info;    // Relocation type and symbol index.
+  } Elf64_Rel;
+
+For example, for the following code::
+
+  int g1 __attribute__((section("sec")));
+  int g2 __attribute__((section("sec")));
+  static volatile int l1 __attribute__((section("sec")));
+  static volatile int l2 __attribute__((section("sec")));
+  int test() {
+    return g1 + g2 + l1 + l2;
+  }
+
+Compiled with ``clang -target bpf -O2 -c test.c``, the following is
+the code with ``llvm-objdump -dr test.o``::
+
+       0:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+                0000000000000000:  R_BPF_64_64  g1
+       2:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+       3:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
+                0000000000000018:  R_BPF_64_64  g2
+       5:       61 20 00 00 00 00 00 00 r0 = *(u32 *)(r2 + 0)
+       6:       0f 10 00 00 00 00 00 00 r0 += r1
+       7:       18 01 00 00 08 00 00 00 00 00 00 00 00 00 00 00 r1 = 8 ll
+                0000000000000038:  R_BPF_64_64  sec
+       9:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+      10:       0f 10 00 00 00 00 00 00 r0 += r1
+      11:       18 01 00 00 0c 00 00 00 00 00 00 00 00 00 00 00 r1 = 12 ll
+                0000000000000058:  R_BPF_64_64  sec
+      13:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+      14:       0f 10 00 00 00 00 00 00 r0 += r1
+      15:       95 00 00 00 00 00 00 00 exit
+
+There are four relations in the above for four ``LD_imm64`` instructions.
+The following ``llvm-readelf -r test.o`` shows the binary values of the four
+relocations::
+
+  Relocation section '.rel.text' at offset 0x190 contains 4 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000000  0000000600000001 R_BPF_64_64            0000000000000000 g1
+  0000000000000018  0000000700000001 R_BPF_64_64            0000000000000004 g2
+  0000000000000038  0000000400000001 R_BPF_64_64            0000000000000000 sec
+  0000000000000058  0000000400000001 R_BPF_64_64            0000000000000000 sec
+
+Each relocation is represented by ``Offset`` (8 bytes) and ``Info`` (8 bytes).
+For example, the first relocation corresponds to the first instruction
+(Offset 0x0) and the corresponding ``Info`` indicates the relocation type
+of ``R_BPF_64_64`` (type 1) and the entry in the symbol table (entry 6).
+The following is the symbol table with ``llvm-readelf -s test.o``::
+
+  Symbol table '.symtab' contains 8 entries:
+     Num:    Value          Size Type    Bind   Vis       Ndx Name
+       0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT   UND
+       1: 0000000000000000     0 FILE    LOCAL  DEFAULT   ABS test.c
+       2: 0000000000000008     4 OBJECT  LOCAL  DEFAULT     4 l1
+       3: 000000000000000c     4 OBJECT  LOCAL  DEFAULT     4 l2
+       4: 0000000000000000     0 SECTION LOCAL  DEFAULT     4 sec
+       5: 0000000000000000   128 FUNC    GLOBAL DEFAULT     2 test
+       6: 0000000000000000     4 OBJECT  GLOBAL DEFAULT     4 g1
+       7: 0000000000000004     4 OBJECT  GLOBAL DEFAULT     4 g2
+
+The 6th entry is global variable ``g1`` with value 0.
+
+Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3,
+for global variable ``g2`` which has a symbol value 4, the offset
+from the start of ``.data`` section.
+
+The third and fourth relocations refers to static variables ``l1``
+and ``l2``. From ``.rel.text`` section above, it is not clear
+which symbols they really refers to as they both refers to
+symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type
+and represents a section. So for static variable or function,
+the section offset is written to the original insn
+buffer, which is called ``A`` (addend). Looking at
+above insn ``7`` and ``11``, they have section offset ``8`` and ``12``.
+From symbol table, we can find that they correspond to entries ``2``
+and ``3`` for ``l1`` and ``l2``.
+
+In general, the ``A`` is 0 for global variables and functions,
+and is the section offset or some computation result based on
+section offset for static variables/functions. The non-section-offset
+case refers to function calls. See below for more details.
+
+Different Relocation Types
+==========================
+
+Six relocation types are supported. The following is an overview and
+``S`` represents the value of the symbol in the symbol table::
+
+  Enum  ELF Reloc Type     Description      BitSize  Offset        Calculation
+  0     R_BPF_NONE         None
+  1     R_BPF_64_64        ld_imm64 insn    32       r_offset + 4  S + A
+  2     R_BPF_64_ABS64     normal data      64       r_offset      S + A
+  3     R_BPF_64_ABS32     normal data      32       r_offset      S + A
+  4     R_BPF_64_NODYLD32  .BTF[.ext] data  32       r_offset      S + A
+  10    R_BPF_64_32        call insn        32       r_offset + 4  (S + A) / 8 - 1
+
+For example, ``R_BPF_64_64`` relocation type is used for ``ld_imm64`` instruction.
+The actual to-be-relocated data (0 or section offset)
+is stored at ``r_offset + 4`` and the read/write
+data bitsize is 32 (4 bytes). The relocation can be resolved with
+the symbol value plus implicit addend. Note that the ``BitSize`` is 32 which
+means the section offset must be less than or equal to ``UINT32_MAX`` and this
+is enforced by LLVM BPF backend.
+
+In another case, ``R_BPF_64_ABS64`` relocation type is used for normal 64-bit data.
+The actual to-be-relocated data is stored at ``r_offset`` and the read/write data
+bitsize is 64 (8 bytes). The relocation can be resolved with
+the symbol value plus implicit addend.
+
+Both ``R_BPF_64_ABS32`` and ``R_BPF_64_NODYLD32`` types are for 32-bit data.
+But ``R_BPF_64_NODYLD32`` specifically refers to relocations in ``.BTF`` and
+``.BTF.ext`` sections. For cases like bcc where llvm ``ExecutionEngine RuntimeDyld``
+is involved, ``R_BPF_64_NODYLD32`` types of relocations should not be resolved
+to actual function/variable address. Otherwise, ``.BTF`` and ``.BTF.ext``
+become unusable by bcc and kernel.
+
+Type ``R_BPF_64_32`` is used for call instruction. The call target section
+offset is stored at ``r_offset + 4`` (32bit) and calculated as
+``(S + A) / 8 - 1``.
+
+Examples
+========
+
+Types ``R_BPF_64_64`` and ``R_BPF_64_32`` are used to resolve ``ld_imm64``
+and ``call`` instructions. For example::
+
+  __attribute__((noinline)) __attribute__((section("sec1")))
+  int gfunc(int a, int b) {
+    return a * b;
+  }
+  static __attribute__((noinline)) __attribute__((section("sec1")))
+  int lfunc(int a, int b) {
+    return a + b;
+  }
+  int global __attribute__((section("sec2")));
+  int test(int a, int b) {
+    return gfunc(a, b) +  lfunc(a, b) + global;
+  }
+
+Compiled with ``clang -target bpf -O2 -c test.c``, we will have
+following code with `llvm-objdump -dr test.o``::
+
+  Disassembly of section .text:
+
+  0000000000000000 <test>:
+         0:       bf 26 00 00 00 00 00 00 r6 = r2
+         1:       bf 17 00 00 00 00 00 00 r7 = r1
+         2:       85 10 00 00 ff ff ff ff call -1
+                  0000000000000010:  R_BPF_64_32  gfunc
+         3:       bf 08 00 00 00 00 00 00 r8 = r0
+         4:       bf 71 00 00 00 00 00 00 r1 = r7
+         5:       bf 62 00 00 00 00 00 00 r2 = r6
+         6:       85 10 00 00 02 00 00 00 call 2
+                  0000000000000030:  R_BPF_64_32  sec1
+         7:       0f 80 00 00 00 00 00 00 r0 += r8
+         8:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+                  0000000000000040:  R_BPF_64_64  global
+        10:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+        11:       0f 10 00 00 00 00 00 00 r0 += r1
+        12:       95 00 00 00 00 00 00 00 exit
+
+  Disassembly of section sec1:
+
+  0000000000000000 <gfunc>:
+         0:       bf 20 00 00 00 00 00 00 r0 = r2
+         1:       2f 10 00 00 00 00 00 00 r0 *= r1
+         2:       95 00 00 00 00 00 00 00 exit
+
+  0000000000000018 <lfunc>:
+         3:       bf 20 00 00 00 00 00 00 r0 = r2
+         4:       0f 10 00 00 00 00 00 00 r0 += r1
+         5:       95 00 00 00 00 00 00 00 exit
+
+The first relocation corresponds to ``gfunc(a, b)`` where ``gfunc`` has a value of 0,
+so the ``call`` instruction offset is ``(0 + 0)/8 - 1 = -1``.
+The second relocation corresponds to ``lfunc(a, b)`` where ``lfunc`` has a section
+offset ``0x18``, so the ``call`` instruction offset is ``(0 + 0x18)/8 - 1 = 2``.
+The third relocation corresponds to ld_imm64 of ``global``, which has a section
+offset ``0``.
+
+The following is an example to show how R_BPF_64_ABS64 could be generated::
+
+  int global() { return 0; }
+  struct t { void *g; } gbl = { global };
+
+Compiled with ``clang -target bpf -O2 -g -c test.c``, we will see a
+relocation below in ``.data`` section with command
+``llvm-readelf -r test.o``::
+
+  Relocation section '.rel.data' at offset 0x458 contains 1 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000000  0000000700000002 R_BPF_64_ABS64         0000000000000000 global
+
+The relocation says the first 8-byte of ``.data`` section should be
+filled with address of ``global`` variable.
+
+With ``llvm-readelf`` output, we can see that dwarf sections have a bunch of
+``R_BPF_64_ABS32`` and ``R_BPF_64_ABS64`` relocations::
+
+  Relocation section '.rel.debug_info' at offset 0x468 contains 13 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000006  0000000300000003 R_BPF_64_ABS32         0000000000000000 .debug_abbrev
+  000000000000000c  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000012  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000016  0000000600000003 R_BPF_64_ABS32         0000000000000000 .debug_line
+  000000000000001a  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  000000000000001e  0000000200000002 R_BPF_64_ABS64         0000000000000000 .text
+  000000000000002b  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000037  0000000800000002 R_BPF_64_ABS64         0000000000000000 gbl
+  0000000000000040  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  ......
+
+The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
+
+  Relocation section '.rel.BTF' at offset 0x538 contains 1 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000084  0000000800000004 R_BPF_64_NODYLD32      0000000000000000 gbl
+
+  Relocation section '.rel.BTF.ext' at offset 0x548 contains 2 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  000000000000002c  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
+  0000000000000040  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3353778c30f8..8deec1ca9150 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use
 Clang that contains the fix.
 
 __ https://reviews.llvm.org/D100362
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_  made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+  libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst

From e8e0f0f484780d7b90a63ea50020ac4bb027178d Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 27 May 2021 22:43:56 -0400
Subject: [PATCH 0629/3292] bpf, devmap: Remove drops variable from
 bq_xmit_all()

As Colin pointed out, the first drops assignment after declaration will
be overwritten by the second drops assignment before using, which makes
it useless.

Since the drops variable will be used only once. Just remove it and
use "cnt - sent" in trace_xdp_devmap_xmit().

Fixes: cb261b594b41 ("bpf: Run devmap xdp_prog on flush instead of bulk enqueue")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210528024356.24333-1-liuhangbin@gmail.com
---
 kernel/bpf/devmap.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index f9148daab0e3..2a75e6c2d27d 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -370,8 +370,8 @@ static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
 static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
 	struct net_device *dev = bq->dev;
-	int sent = 0, drops = 0, err = 0;
 	unsigned int cnt = bq->count;
+	int sent = 0, err = 0;
 	int to_send = cnt;
 	int i;
 
@@ -388,8 +388,6 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 		to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
 		if (!to_send)
 			goto out;
-
-		drops = cnt - to_send;
 	}
 
 	sent = dev->netdev_ops->ndo_xdp_xmit(dev, to_send, bq->q, flags);
@@ -408,9 +406,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 		xdp_return_frame_rx_napi(bq->q[i]);
 
 out:
-	drops = cnt - sent;
 	bq->count = 0;
-	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err);
+	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, cnt - sent, err);
 }
 
 /* __dev_flush is called from xdp_do_flush() which _must_ be signaled

From 2f1af441fd5dd5caf0807bb19ce9bbf9325ce534 Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@chinatelecom.cn>
Date: Thu, 27 May 2021 16:54:24 -0700
Subject: [PATCH 0630/3292] mptcp: fix pr_debug in mptcp_token_new_connect

After commit 2c5ebd001d4f ("mptcp: refactor token container"),
pr_debug() is called before mptcp_crypto_key_gen_sha() in
mptcp_token_new_connect(), so the output local_key, token and
idsn are 0, like:

  MPTCP: ssk=00000000f6b3c4a2, local_key=0, token=0, idsn=0

Move pr_debug() after mptcp_crypto_key_gen_sha().

Fixes: 2c5ebd001d4f ("mptcp: refactor token container")
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/token.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 8f0270a780ce..72a24e63b131 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -156,9 +156,6 @@ int mptcp_token_new_connect(struct sock *sk)
 	int retries = TOKEN_MAX_RETRIES;
 	struct token_bucket *bucket;
 
-	pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
-		 sk, subflow->local_key, subflow->token, subflow->idsn);
-
 again:
 	mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
 				 &subflow->idsn);
@@ -172,6 +169,9 @@ again:
 		goto again;
 	}
 
+	pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
+		 sk, subflow->local_key, subflow->token, subflow->idsn);
+
 	WRITE_ONCE(msk->token, subflow->token);
 	__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
 	bucket->chain_len++;

From c68a0cd1735fe09fa7c1a7de1f11a5b674f1c549 Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@chinatelecom.cn>
Date: Thu, 27 May 2021 16:54:25 -0700
Subject: [PATCH 0631/3292] mptcp: using TOKEN_MAX_RETRIES instead of magic
 number

We have macro TOKEN_MAX_RETRIES for the number of token generate retries,
so using TOKEN_MAX_RETRIES in subflow_check_req().

And rename TOKEN_MAX_RETRIES to MPTCP_TOKEN_MAX_RETRIES as it is now
exposed.

Fixes: 535fb8152f31 ("mptcp: token: move retry to caller")
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.h | 2 ++
 net/mptcp/subflow.c  | 2 +-
 net/mptcp/token.c    | 3 +--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0c6f99c67345..89f6b73783d5 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -627,6 +627,8 @@ static inline void mptcp_write_space(struct sock *sk)
 
 void mptcp_destroy_common(struct mptcp_sock *msk);
 
+#define MPTCP_TOKEN_MAX_RETRIES	4
+
 void __init mptcp_token_init(void);
 static inline void mptcp_token_init_request(struct request_sock *req)
 {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bde6be77ea73..a50a97908866 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -162,7 +162,7 @@ static int subflow_check_req(struct request_sock *req,
 	}
 
 	if (mp_opt.mp_capable && listener->request_mptcp) {
-		int err, retries = 4;
+		int err, retries = MPTCP_TOKEN_MAX_RETRIES;
 
 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
 again:
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 72a24e63b131..a98e554b034f 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -33,7 +33,6 @@
 #include <net/mptcp.h>
 #include "protocol.h"
 
-#define TOKEN_MAX_RETRIES	4
 #define TOKEN_MAX_CHAIN_LEN	4
 
 struct token_bucket {
@@ -153,7 +152,7 @@ int mptcp_token_new_connect(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-	int retries = TOKEN_MAX_RETRIES;
+	int retries = MPTCP_TOKEN_MAX_RETRIES;
 	struct token_bucket *bucket;
 
 again:

From 0a4d8e96e4fd687af92b961d5cdcea0fdbde05fe Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@chinatelecom.cn>
Date: Thu, 27 May 2021 16:54:26 -0700
Subject: [PATCH 0632/3292] mptcp: generate subflow hmac after
 mptcp_finish_join()

For outgoing subflow join, when recv SYNACK, in subflow_finish_connect(),
the mptcp_finish_join() may return false in some cases, and send a RESET
to remote, and no local hmac is required.
So generate subflow hmac after mptcp_finish_join().

Fixes: ec3edaa7ca6c ("mptcp: Add handling of outgoing MP_JOIN requests")
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/subflow.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a50a97908866..2a58503e55bd 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -430,15 +430,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 			goto do_reset;
 		}
 
+		if (!mptcp_finish_join(sk))
+			goto do_reset;
+
 		subflow_generate_hmac(subflow->local_key, subflow->remote_key,
 				      subflow->local_nonce,
 				      subflow->remote_nonce,
 				      hmac);
 		memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
 
-		if (!mptcp_finish_join(sk))
-			goto do_reset;
-
 		subflow->mp_join = 1;
 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
 

From ae514983f2e416cb1476bdd8e23b6d9be4ce94cd Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@chinatelecom.cn>
Date: Thu, 27 May 2021 16:54:27 -0700
Subject: [PATCH 0633/3292] mptcp: remove redundant initialization in
 pm_nl_init_net()

Memory of struct pm_nl_pernet{} is allocated by kzalloc()
in setup_net()->ops_init(), so it's no need to reset counters
and zero bitmap in pm_nl_init_net().

Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/pm_netlink.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2469e06a3a9d..7dbc4f308dbe 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1913,10 +1913,13 @@ static int __net_init pm_nl_init_net(struct net *net)
 	struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
 
 	INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
-	__reset_counters(pernet);
 	pernet->next_id = 1;
-	bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
 	spin_lock_init(&pernet->lock);
+
+	/* No need to initialize other pernet fields, the struct is zeroed at
+	 * allocation time.
+	 */
+
 	return 0;
 }
 

From eb5fb629f56da3f40f496c807da44a7ce7644779 Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@chinatelecom.cn>
Date: Thu, 27 May 2021 16:54:28 -0700
Subject: [PATCH 0634/3292] mptcp: make sure flag signal is set when add addr
 with port

When add address with port, it is mean to create a listening socket,
and send an ADD_ADDR to remote, so it must have flag signal set,
add this check in mptcp_pm_parse_addr().

Fixes: a77e9179c7651 ("mptcp: deal with MPTCP_PM_ADDR_ATTR_PORT in PM netlink")
Acked-by: Geliang Tang <geliangtang@gmail.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/pm_netlink.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 7dbc4f308dbe..09722598994d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -971,8 +971,14 @@ skip_family:
 	if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
 		entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
 
-	if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+	if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
+		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+			NL_SET_ERR_MSG_ATTR(info->extack, attr,
+					    "flags must have signal when using port");
+			return -EINVAL;
+		}
 		entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
+	}
 
 	return 0;
 }

From 804c72eeecd2cd38567b64f868cc8c63202cf1a2 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 27 May 2021 16:54:29 -0700
Subject: [PATCH 0635/3292] mptcp: support SYSCTL only if enabled

Since the introduction of the sysctl support in MPTCP with
commit 784325e9f037 ("mptcp: new sysctl to control the activation per NS"),
we don't check CONFIG_SYSCTL.

Until now, that was not an issue: the register and unregister functions
were replaced by NO-OP one if SYSCTL was not enabled in the config. The
only thing we could have avoid is not to reserve memory for the table
but that's for the moment only a small table per net-ns.

But the following commit is going to use SYSCTL_ZERO and SYSCTL_ONE
which are not be defined if SYSCTL is not enabled in the config. This
causes 'undefined reference' errors from the linker.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/ctrl.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 96ba616f59bf..a3b15ed60b77 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -4,7 +4,9 @@
  * Copyright (c) 2019, Tessares SA.
  */
 
+#ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
+#endif
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -15,7 +17,9 @@
 
 static int mptcp_pernet_id;
 struct mptcp_pernet {
+#ifdef CONFIG_SYSCTL
 	struct ctl_table_header *ctl_table_hdr;
+#endif
 
 	int mptcp_enabled;
 	unsigned int add_addr_timeout;
@@ -36,6 +40,13 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net)
 	return mptcp_get_pernet(net)->add_addr_timeout;
 }
 
+static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
+{
+	pernet->mptcp_enabled = 1;
+	pernet->add_addr_timeout = TCP_RTO_MAX;
+}
+
+#ifdef CONFIG_SYSCTL
 static struct ctl_table mptcp_sysctl_table[] = {
 	{
 		.procname = "enabled",
@@ -55,12 +66,6 @@ static struct ctl_table mptcp_sysctl_table[] = {
 	{}
 };
 
-static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
-{
-	pernet->mptcp_enabled = 1;
-	pernet->add_addr_timeout = TCP_RTO_MAX;
-}
-
 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
 {
 	struct ctl_table_header *hdr;
@@ -100,6 +105,17 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
 	kfree(table);
 }
 
+#else
+
+static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
+{
+	return 0;
+}
+
+static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
+
+#endif /* CONFIG_SYSCTL */
+
 static int __net_init mptcp_net_init(struct net *net)
 {
 	struct mptcp_pernet *pernet = mptcp_get_pernet(net);

From 744ee14054c8ca5ad0fe3ab9172709c17d8a240a Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 27 May 2021 16:54:30 -0700
Subject: [PATCH 0636/3292] mptcp: restrict values of 'enabled' sysctl

To avoid confusions, it seems better to parse this sysctl parameter as a
boolean. We use it as a boolean, no need to parse an integer and bring
confusions if we see a value different from 0 and 1, especially with
this parameter name: enabled.

It seems fine to do this modification because the default value is 1
(enabled). Then the only other interesting value to set is 0 (disabled).
All other values would not have changed the default behaviour.

Suggested-by: Florian Westphal <fw@strlen.de>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/mptcp-sysctl.rst | 8 ++++----
 net/mptcp/ctrl.c                          | 8 +++++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index 6af0196c4297..3b352e5f6300 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -7,13 +7,13 @@ MPTCP Sysfs variables
 /proc/sys/net/mptcp/* Variables
 ===============================
 
-enabled - INTEGER
+enabled - BOOLEAN
 	Control whether MPTCP sockets can be created.
 
-	MPTCP sockets can be created if the value is nonzero. This is
-	a per-namespace sysctl.
+	MPTCP sockets can be created if the value is 1. This is a
+	per-namespace sysctl.
 
-	Default: 1
+	Default: 1 (enabled)
 
 add_addr_timeout - INTEGER (seconds)
 	Set the timeout after which an ADD_ADDR control message will be
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index a3b15ed60b77..1ec4d36a39f0 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -21,7 +21,7 @@ struct mptcp_pernet {
 	struct ctl_table_header *ctl_table_hdr;
 #endif
 
-	int mptcp_enabled;
+	u8 mptcp_enabled;
 	unsigned int add_addr_timeout;
 };
 
@@ -50,12 +50,14 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 static struct ctl_table mptcp_sysctl_table[] = {
 	{
 		.procname = "enabled",
-		.maxlen = sizeof(int),
+		.maxlen = sizeof(u8),
 		.mode = 0644,
 		/* users with CAP_NET_ADMIN or root (not and) can change this
 		 * value, same as other sysctl or the 'net' tree.
 		 */
-		.proc_handler = proc_dointvec,
+		.proc_handler = proc_dou8vec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE
 	},
 	{
 		.procname = "add_addr_timeout",

From b11faec368704d5e18e345e67ea3ba0d58628113 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:40 +0800
Subject: [PATCH 0637/3292] net: hdlc_fr: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 42 ---------------------------------------
 1 file changed, 42 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 0720f5f92caa..0b6e133de4ad 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -60,7 +60,6 @@
 #define NLPID_CCITT_ANSI_LMI	0x08
 #define NLPID_CISCO_LMI		0x09
 
-
 #define LMI_CCITT_ANSI_DLCI	   0 /* LMI DLCI */
 #define LMI_CISCO_DLCI		1023
 
@@ -86,7 +85,6 @@
 #define LMI_CCITT_CISCO_LENGTH	  13 /* LMI frame lengths */
 #define LMI_ANSI_LENGTH		  14
 
-
 struct fr_hdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	unsigned ea1:	1;
@@ -111,7 +109,6 @@ struct fr_hdr {
 #endif
 } __packed;
 
-
 struct pvc_device {
 	struct net_device *frad;
 	struct net_device *main;
@@ -149,29 +146,24 @@ struct frad_state {
 	u8 rxseq; /* RX sequence number */
 };
 
-
 static int fr_ioctl(struct net_device *dev, struct ifreq *ifr);
 
-
 static inline u16 q922_to_dlci(u8 *hdr)
 {
 	return ((hdr[0] & 0xFC) << 2) | ((hdr[1] & 0xF0) >> 4);
 }
 
-
 static inline void dlci_to_q922(u8 *hdr, u16 dlci)
 {
 	hdr[0] = (dlci >> 2) & 0xFC;
 	hdr[1] = ((dlci << 4) & 0xF0) | 0x01;
 }
 
-
 static inline struct frad_state* state(hdlc_device *hdlc)
 {
 	return(struct frad_state *)(hdlc->state);
 }
 
-
 static inline struct pvc_device *find_pvc(hdlc_device *hdlc, u16 dlci)
 {
 	struct pvc_device *pvc = state(hdlc)->first_pvc;
@@ -187,7 +179,6 @@ static inline struct pvc_device *find_pvc(hdlc_device *hdlc, u16 dlci)
 	return NULL;
 }
 
-
 static struct pvc_device *add_pvc(struct net_device *dev, u16 dlci)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -215,13 +206,11 @@ static struct pvc_device *add_pvc(struct net_device *dev, u16 dlci)
 	return pvc;
 }
 
-
 static inline int pvc_is_used(struct pvc_device *pvc)
 {
 	return pvc->main || pvc->ether;
 }
 
-
 static inline void pvc_carrier(int on, struct pvc_device *pvc)
 {
 	if (on) {
@@ -241,7 +230,6 @@ static inline void pvc_carrier(int on, struct pvc_device *pvc)
 	}
 }
 
-
 static inline void delete_unused_pvcs(hdlc_device *hdlc)
 {
 	struct pvc_device **pvc_p = &state(hdlc)->first_pvc;
@@ -260,7 +248,6 @@ static inline void delete_unused_pvcs(hdlc_device *hdlc)
 	}
 }
 
-
 static inline struct net_device **get_dev_p(struct pvc_device *pvc,
 					    int type)
 {
@@ -270,7 +257,6 @@ static inline struct net_device **get_dev_p(struct pvc_device *pvc,
 		return &pvc->main;
 }
 
-
 static int fr_hard_header(struct sk_buff *skb, u16 dlci)
 {
 	if (!skb->dev) { /* Control packets */
@@ -334,8 +320,6 @@ static int fr_hard_header(struct sk_buff *skb, u16 dlci)
 	return 0;
 }
 
-
-
 static int pvc_open(struct net_device *dev)
 {
 	struct pvc_device *pvc = dev->ml_priv;
@@ -354,8 +338,6 @@ static int pvc_open(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int pvc_close(struct net_device *dev)
 {
 	struct pvc_device *pvc = dev->ml_priv;
@@ -373,8 +355,6 @@ static int pvc_close(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct pvc_device *pvc = dev->ml_priv;
@@ -465,15 +445,12 @@ static inline void fr_log_dlci_active(struct pvc_device *pvc)
 		    pvc->state.active ? "active" : "inactive");
 }
 
-
-
 static inline u8 fr_lmi_nextseq(u8 x)
 {
 	x++;
 	return x ? x : 1;
 }
 
-
 static void fr_lmi_send(struct net_device *dev, int fullrep)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -569,8 +546,6 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 	dev_queue_xmit(skb);
 }
 
-
-
 static void fr_set_link_state(int reliable, struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -603,7 +578,6 @@ static void fr_set_link_state(int reliable, struct net_device *dev)
 	}
 }
 
-
 static void fr_timer(struct timer_list *t)
 {
 	struct frad_state *st = from_timer(st, t, timer);
@@ -655,7 +629,6 @@ static void fr_timer(struct timer_list *t)
 	add_timer(&state(hdlc)->timer);
 }
 
-
 static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -962,7 +935,6 @@ static int fr_rx(struct sk_buff *skb)
 		pvc->state.becn ^= 1;
 	}
 
-
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
 		frad->stats.rx_dropped++;
 		return NET_RX_DROP;
@@ -1018,8 +990,6 @@ rx_drop:
 	return NET_RX_DROP;
 }
 
-
-
 static void fr_start(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -1044,7 +1014,6 @@ static void fr_start(struct net_device *dev)
 		fr_set_link_state(1, dev);
 }
 
-
 static void fr_stop(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -1056,7 +1025,6 @@ static void fr_stop(struct net_device *dev)
 	fr_set_link_state(0, dev);
 }
 
-
 static void fr_close(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -1071,7 +1039,6 @@ static void fr_close(struct net_device *dev)
 	}
 }
 
-
 static void pvc_setup(struct net_device *dev)
 {
 	dev->type = ARPHRD_DLCI;
@@ -1147,8 +1114,6 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 	return 0;
 }
 
-
-
 static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type)
 {
 	struct pvc_device *pvc;
@@ -1174,8 +1139,6 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type)
 	return 0;
 }
 
-
-
 static void fr_destroy(struct net_device *frad)
 {
 	hdlc_device *hdlc = dev_to_hdlc(frad);
@@ -1198,7 +1161,6 @@ static void fr_destroy(struct net_device *frad)
 	}
 }
 
-
 static struct hdlc_proto proto = {
 	.close		= fr_close,
 	.start		= fr_start,
@@ -1209,7 +1171,6 @@ static struct hdlc_proto proto = {
 	.module		= THIS_MODULE,
 };
 
-
 static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	fr_proto __user *fr_s = ifr->ifr_settings.ifs_ifsu.fr;
@@ -1309,20 +1270,17 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 	return -EINVAL;
 }
 
-
 static int __init mod_init(void)
 {
 	register_hdlc_protocol(&proto);
 	return 0;
 }
 
-
 static void __exit mod_exit(void)
 {
 	unregister_hdlc_protocol(&proto);
 }
 
-
 module_init(mod_init);
 module_exit(mod_exit);
 

From 4a9ab454ae9bda262802179cae4f5700736e8bd6 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:41 +0800
Subject: [PATCH 0638/3292] net: hdlc_fr: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 0b6e133de4ad..96e4a89fa923 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -329,6 +329,7 @@ static int pvc_open(struct net_device *dev)
 
 	if (pvc->open_count++ == 0) {
 		hdlc_device *hdlc = dev_to_hdlc(pvc->frad);
+
 		if (state(hdlc)->settings.lmi == LMI_NONE)
 			pvc->state.active = netif_carrier_ok(pvc->frad);
 
@@ -344,6 +345,7 @@ static int pvc_close(struct net_device *dev)
 
 	if (--pvc->open_count == 0) {
 		hdlc_device *hdlc = dev_to_hdlc(pvc->frad);
+
 		if (state(hdlc)->settings.lmi == LMI_NONE)
 			pvc->state.active = 0;
 
@@ -1143,6 +1145,7 @@ static void fr_destroy(struct net_device *frad)
 {
 	hdlc_device *hdlc = dev_to_hdlc(frad);
 	struct pvc_device *pvc = state(hdlc)->first_pvc;
+
 	state(hdlc)->first_pvc = NULL; /* All PVCs destroyed */
 	state(hdlc)->dce_pvc_count = 0;
 	state(hdlc)->dce_changed = 1;

From 7aad0642599162771653f8433ad473c24f556c83 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:42 +0800
Subject: [PATCH 0639/3292] net: hdlc_fr: fix an code style issue about "foo*
 bar"

Fix the checkpatch error as "foo* bar" and should be "foo *bar",
and "(foo*)" should be "(foo *)".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 96e4a89fa923..4a6172cca682 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -159,7 +159,7 @@ static inline void dlci_to_q922(u8 *hdr, u16 dlci)
 	hdr[1] = ((dlci << 4) & 0xF0) | 0x01;
 }
 
-static inline struct frad_state* state(hdlc_device *hdlc)
+static inline struct frad_state *state(hdlc_device *hdlc)
 {
 	return(struct frad_state *)(hdlc->state);
 }
@@ -1090,7 +1090,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 		eth_hw_addr_random(dev);
 	} else {
-		*(__be16*)dev->dev_addr = htons(dlci);
+		*(__be16 *)dev->dev_addr = htons(dlci);
 		dlci_to_q922(dev->broadcast, dlci);
 	}
 	dev->netdev_ops = &pvc_ops;

From 30e7720d379ad868ae7a510457ad8f2bf44ef056 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:43 +0800
Subject: [PATCH 0640/3292] net: hdlc_fr: add some required spaces

Add spaces required after that close brace '}'.
Add spaces required before the open parenthesis '('.
Add spaces required after that ','.
Add spaces required around that '='.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 4a6172cca682..512ef79459da 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -125,7 +125,7 @@ struct pvc_device {
 		unsigned int fecn: 1;
 		unsigned int becn: 1;
 		unsigned int bandwidth;	/* Cisco LMI reporting only */
-	}state;
+	} state;
 };
 
 struct frad_state {
@@ -161,7 +161,7 @@ static inline void dlci_to_q922(u8 *hdr, u16 dlci)
 
 static inline struct frad_state *state(hdlc_device *hdlc)
 {
-	return(struct frad_state *)(hdlc->state);
+	return (struct frad_state *)(hdlc->state);
 }
 
 static inline struct pvc_device *find_pvc(hdlc_device *hdlc, u16 dlci)
@@ -1223,7 +1223,8 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
 		     new_settings.dce != 1))
 			return -EINVAL;
 
-		result=hdlc->attach(dev, ENCODING_NRZ,PARITY_CRC16_PR1_CCITT);
+		result = hdlc->attach(dev, ENCODING_NRZ,
+				      PARITY_CRC16_PR1_CCITT);
 		if (result)
 			return result;
 

From 168a196ffcff6aee6834cf73fc60d5cc387a0d85 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:44 +0800
Subject: [PATCH 0641/3292] net: hdlc_fr: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 512ef79459da..a39e5082c20f 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -937,7 +937,8 @@ static int fr_rx(struct sk_buff *skb)
 		pvc->state.becn ^= 1;
 	}
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb) {
 		frad->stats.rx_dropped++;
 		return NET_RX_DROP;
 	}
@@ -1064,7 +1065,8 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 	struct net_device *dev;
 	int used;
 
-	if ((pvc = add_pvc(frad, dlci)) == NULL) {
+	pvc = add_pvc(frad, dlci);
+	if (!pvc) {
 		netdev_warn(frad, "Memory squeeze on fr_add_pvc()\n");
 		return -ENOBUFS;
 	}
@@ -1121,10 +1123,12 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type)
 	struct pvc_device *pvc;
 	struct net_device *dev;
 
-	if ((pvc = find_pvc(hdlc, dlci)) == NULL)
+	pvc = find_pvc(hdlc, dlci);
+	if (!pvc)
 		return -ENOENT;
 
-	if ((dev = *get_dev_p(pvc, type)) == NULL)
+	dev = *get_dev_p(pvc, type);
+	if (!dev)
 		return -ENOENT;
 
 	if (dev->flags & IFF_UP)

From 683b54bb468fdae45659cbd082267053d32865ba Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:45 +0800
Subject: [PATCH 0642/3292] net: hdlc_fr: code indent use tabs where possible

Code indent should use tabs where possible.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index a39e5082c20f..fa10eea88fbc 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -6,16 +6,16 @@
  * Copyright (C) 1999 - 2006 Krzysztof Halasa <khc@pm.waw.pl>
  *
 
-            Theory of PVC state
+	Theory of PVC state
 
  DCE mode:
 
  (exist,new) -> 0,0 when "PVC create" or if "link unreliable"
-         0,x -> 1,1 if "link reliable" when sending FULL STATUS
-         1,1 -> 1,0 if received FULL STATUS ACK
+	 0,x -> 1,1 if "link reliable" when sending FULL STATUS
+	 1,1 -> 1,0 if received FULL STATUS ACK
 
  (active)    -> 0 when "ifconfig PVC down" or "link unreliable" or "PVC create"
-             -> 1 when "PVC up" and (exist,new) = 1,0
+	     -> 1 when "PVC up" and (exist,new) = 1,0
 
  DTE mode:
  (exist,new,active) = FULL STATUS if "link reliable"

From 8f032c6535fecb29a90915c566f6851146e9ebb6 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:46 +0800
Subject: [PATCH 0643/3292] net: hdlc_fr: remove space after '!'

According to the chackpatch.pl, space prohibited after that '!'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index fa10eea88fbc..77b4f65cc4e0 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -789,8 +789,8 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 		}
 		i++;
 
-		new = !! (skb->data[i + 2] & 0x08);
-		active = !! (skb->data[i + 2] & 0x02);
+		new = !!(skb->data[i + 2] & 0x08);
+		active = !!(skb->data[i + 2] & 0x02);
 		if (lmi == LMI_CISCO) {
 			dlci = (skb->data[i] << 8) | skb->data[i + 1];
 			bw = (skb->data[i + 3] << 16) |

From 5d650a6c7b9cb75c48a75b2593a48a307c8b91f1 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:47 +0800
Subject: [PATCH 0644/3292] net: hdlc_fr: add braces {} to all arms of the
 statement

Braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 77b4f65cc4e0..5c2a2ec6de5f 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -613,10 +613,10 @@ static void fr_timer(struct timer_list *t)
 		fr_set_link_state(reliable, dev);
 	}
 
-	if (state(hdlc)->settings.dce)
+	if (state(hdlc)->settings.dce) {
 		state(hdlc)->timer.expires = jiffies +
 			state(hdlc)->settings.t392 * HZ;
-	else {
+	} else {
 		if (state(hdlc)->n391cnt)
 			state(hdlc)->n391cnt--;
 
@@ -671,8 +671,9 @@ static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 			return 1;
 		}
 		i = 7;
-	} else
+	} else {
 		i = 6;
+	}
 
 	if (skb->data[i] != (lmi == LMI_CCITT ? LMI_CCITT_REPTYPE :
 			     LMI_ANSI_CISCO_REPTYPE)) {
@@ -1013,8 +1014,9 @@ static void fr_start(struct net_device *dev)
 		/* First poll after 1 s */
 		state(hdlc)->timer.expires = jiffies + HZ;
 		add_timer(&state(hdlc)->timer);
-	} else
+	} else {
 		fr_set_link_state(1, dev);
+	}
 }
 
 static void fr_stop(struct net_device *dev)

From c9a2ca5d7e58860d56ce5e7b74f7621cebf819de Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:48 +0800
Subject: [PATCH 0645/3292] net: hdlc_fr: remove redundant braces {}

This patch removes redundant braces {}, to fix the
checkpatch.pl warning:
"braces {} are not necessary for any arm of this statement"

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 5c2a2ec6de5f..de7fbdc77588 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -480,11 +480,11 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 	}
 	memset(skb->data, 0, len);
 	skb_reserve(skb, 4);
-	if (lmi == LMI_CISCO) {
+	if (lmi == LMI_CISCO)
 		fr_hard_header(skb, LMI_CISCO_DLCI);
-	} else {
+	else
 		fr_hard_header(skb, LMI_CCITT_ANSI_DLCI);
-	}
+
 	data = skb_tail_pointer(skb);
 	data[i++] = LMI_CALLREF;
 	data[i++] = dce ? LMI_STATUS : LMI_STATUS_ENQUIRY;

From 2744fa2dfdcd390c03056b50de8563da9d0f1660 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 28 May 2021 08:12:49 +0800
Subject: [PATCH 0646/3292] net: hdlc_fr: remove unnecessary out of memory
 message

This patch removes unnecessary out of memory message,
to fix the following checkpatch.pl warning:
"WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/hdlc_fr.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index de7fbdc77588..72250fe0a1df 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -474,10 +474,9 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 	}
 
 	skb = dev_alloc_skb(len);
-	if (!skb) {
-		netdev_warn(dev, "Memory squeeze on fr_lmi_send()\n");
+	if (!skb)
 		return;
-	}
+
 	memset(skb->data, 0, len);
 	skb_reserve(skb, 4);
 	if (lmi == LMI_CISCO)

From 460a9aa23de6eda55734411e3301838a9033b8b9 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 28 May 2021 16:06:35 +0200
Subject: [PATCH 0647/3292] samples: pktgen: add UDP tx checksum support

Introduce k parameter in pktgen samples in order to toggle UDP tx
checksum

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/r/cf16417902062c6ea2fd3c79e00510e36a40c31a.1622210713.git.lorenzo@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 samples/pktgen/parameters.sh                               | 7 ++++++-
 samples/pktgen/pktgen_sample01_simple.sh                   | 2 ++
 samples/pktgen/pktgen_sample02_multiqueue.sh               | 2 ++
 samples/pktgen/pktgen_sample03_burst_single_flow.sh        | 2 ++
 samples/pktgen/pktgen_sample04_many_flows.sh               | 2 ++
 samples/pktgen/pktgen_sample05_flow_per_thread.sh          | 2 ++
 .../pktgen_sample06_numa_awared_queue_irq_affinity.sh      | 2 ++
 7 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index b4c1b371e4b8..81906f199454 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -11,6 +11,7 @@ function usage() {
     echo "  -d : (\$DEST_IP)   destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed"
     echo "  -m : (\$DST_MAC)   destination MAC-addr"
     echo "  -p : (\$DST_PORT)  destination PORT range (e.g. 433-444) is also allowed"
+    echo "  -k : (\$UDP_CSUM)  enable UDP tx checksum"
     echo "  -t : (\$THREADS)   threads to start"
     echo "  -f : (\$F_THREAD)  index of first thread (zero indexed CPU number)"
     echo "  -c : (\$SKB_CLONE) SKB clones send before alloc new SKB"
@@ -26,7 +27,7 @@ function usage() {
 
 ##  --- Parse command line arguments / parameters ---
 ## echo "Commandline options:"
-while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6a" option; do
+while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6ak" option; do
     case $option in
         i) # interface
           export DEV=$OPTARG
@@ -88,6 +89,10 @@ while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6a" option; do
           export APPEND=yes
           info "Append mode: APPEND=$APPEND"
           ;;
+        k)
+          export UDP_CSUM=yes
+          info "UDP tx checksum: UDP_CSUM=$UDP_CSUM"
+          ;;
         h|?|*)
           usage;
           err 2 "[ERROR] Unknown parameters!!!"
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index a09f3422fbcc..246cfe02bb82 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -72,6 +72,8 @@ if [ -n "$DST_PORT" ]; then
     pg_set $DEV "udp_dst_max $UDP_DST_MAX"
 fi
 
+[ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
 # Setup random UDP port src range
 pg_set $DEV "flag UDPSRC_RND"
 pg_set $DEV "udp_src_min $UDP_SRC_MIN"
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index acae8ede0d6c..c6af3d9d5171 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -75,6 +75,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 	pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
+    [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
     # Setup random UDP port src range
     pg_set $dev "flag UDPSRC_RND"
     pg_set $dev "udp_src_min $UDP_SRC_MIN"
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index 5adcf954de73..ab87de440277 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -73,6 +73,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 	pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
+    [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
     # Setup burst, for easy testing -b 0 disable bursting
     # (internally in pktgen default and minimum burst=1)
     if [[ ${BURST} -ne 0 ]]; then
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index ddce876635aa..56c5f5af350f 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -72,6 +72,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 	pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
+    [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
     # Randomize source IP-addresses
     pg_set $dev "flag IPSRC_RND"
     pg_set $dev "src_min $SRC_MIN"
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 4a65fe2fcee9..6e0effabca59 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -62,6 +62,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 	pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
+    [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
     # Setup source IP-addresses based on thread number
     pg_set $dev "src_min 198.18.$((thread+1)).1"
     pg_set $dev "src_max 198.18.$((thread+1)).1"
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 10f1da571f40..7c27923083a6 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -92,6 +92,8 @@ for ((i = 0; i < $THREADS; i++)); do
 	pg_set $dev "udp_dst_max $UDP_DST_MAX"
     fi
 
+    [ ! -z "$UDP_CSUM" ] && pg_set $dev "flag UDPCSUM"
+
     # Setup random UDP port src range
     pg_set $dev "flag UDPSRC_RND"
     pg_set $dev "udp_src_min $UDP_SRC_MIN"

From cd4375d621aa0adda527640b421fad969bf0c9bd Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:49 -0400
Subject: [PATCH 0648/3292] nfc: fdp: correct kerneldoc for structure

Since structure comments are not kerneldoc, remove the double ** to fix
W=1 warnings:

    warning: This comment starts with '/**', but isn't a kernel-doc comment.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-1-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/fdp/fdp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index fe0719ed81a0..125d71c27b8b 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -149,7 +149,7 @@ static void fdp_nci_send_patch_cb(struct nci_dev *ndev)
 	wake_up(&info->setup_wq);
 }
 
-/**
+/*
  * Register a packet sent counter and a callback
  *
  * We have no other way of knowing when all firmware packets were sent out
@@ -167,7 +167,7 @@ static void fdp_nci_set_data_pkt_counter(struct nci_dev *ndev,
 	info->data_pkt_counter_cb = cb;
 }
 
-/**
+/*
  * The device is expecting a stream of packets. All packets need to
  * have the PBF flag set to 0x0 (last packet) even if the firmware
  * file is segmented and there are multiple packets. If we give the

From 466e1c889c7134462aca62c683aab5512fea2f93 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:50 -0400
Subject: [PATCH 0649/3292] nfc: fdp: drop ACPI_PTR from device ID table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match only via the ACPI ID table so the table should be
always used and the ACPI_PTR does not have any sense.  This fixes fixes
compile warning (!CONFIG_ACPI):

    drivers/nfc/fdp/i2c.c:362:36: warning:
        ‘fdp_nci_i2c_acpi_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-2-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/fdp/i2c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index adaa1a7147f9..997e0806821a 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -368,7 +368,7 @@ MODULE_DEVICE_TABLE(acpi, fdp_nci_i2c_acpi_match);
 static struct i2c_driver fdp_nci_i2c_driver = {
 	.driver = {
 		   .name = FDP_I2C_DRIVER_NAME,
-		   .acpi_match_table = ACPI_PTR(fdp_nci_i2c_acpi_match),
+		   .acpi_match_table = fdp_nci_i2c_acpi_match,
 		  },
 	.probe_new = fdp_nci_i2c_probe,
 	.remove = fdp_nci_i2c_remove,

From a548bee9ffe89018932886680b09e2eedeefb14e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:51 -0400
Subject: [PATCH 0650/3292] nfc: port100: correct kerneldoc for structure

The port100_in_rf_setting structure does not contain valid kerneldoc
docummentation, unlike the port100_tg_rf_setting structure.  Correct the
kerneldoc to fix W=1 warnings:

    warning: This comment starts with '/**', but isn't a kernel-doc comment.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-3-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/port100.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index 8e4d355dc3ae..4df926cc37d0 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -94,7 +94,7 @@ struct port100;
 typedef void (*port100_send_async_complete_t)(struct port100 *dev, void *arg,
 					      struct sk_buff *resp);
 
-/**
+/*
  * Setting sets structure for in_set_rf command
  *
  * @in_*_set_number: Represent the entry indexes in the port-100 RF Base Table.
@@ -145,7 +145,7 @@ static const struct port100_in_rf_setting in_rf_settings[] = {
 };
 
 /**
- * Setting sets structure for tg_set_rf command
+ * struct port100_tg_rf_setting - Setting sets structure for tg_set_rf command
  *
  * @tg_set_number: Represents the entry index in the port-100 RF Base Table.
  *                 This table contains multiple RF setting sets required for RF

From a70bbbe387d0944975d5016f1ba3be1e6b4c3971 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:52 -0400
Subject: [PATCH 0651/3292] nfc: pn533: drop of_match_ptr from device ID table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match only via the DT table so the table should be always
used and the of_match_ptr does not have any sense (this also allows ACPI
matching via PRP0001, even though it might be not relevant here).  This
fixes compile warning (!CONFIG_OF):

    drivers/nfc/pn533/i2c.c:252:34: warning:
      ‘of_pn533_i2c_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-4-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn533/uart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c
index a0665d8ea85b..7bdaf8263070 100644
--- a/drivers/nfc/pn533/uart.c
+++ b/drivers/nfc/pn533/uart.c
@@ -319,7 +319,7 @@ static struct serdev_device_driver pn532_uart_driver = {
 	.remove = pn532_uart_remove,
 	.driver = {
 		.name = "pn532_uart",
-		.of_match_table = of_match_ptr(pn532_uart_of_match),
+		.of_match_table = pn532_uart_of_match,
 	},
 };
 

From 26f20ff5e207ed87340fc574cb3b360a2581272c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:53 -0400
Subject: [PATCH 0652/3292] nfc: mrvl: mark OF device ID tables as maybe unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match either via OF or I2C ID tables.  If OF is disabled,
the table will be unused:

    drivers/nfc/nfcmrvl/spi.c:199:34: warning:
        ‘of_nfcmrvl_spi_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-5-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/nfcmrvl/i2c.c | 2 +-
 drivers/nfc/nfcmrvl/spi.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index c5420616b7bc..bafd9b500b2c 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -260,7 +260,7 @@ static int nfcmrvl_i2c_remove(struct i2c_client *client)
 }
 
 
-static const struct of_device_id of_nfcmrvl_i2c_match[] = {
+static const struct of_device_id of_nfcmrvl_i2c_match[] __maybe_unused = {
 	{ .compatible = "marvell,nfc-i2c", },
 	{},
 };
diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
index dec0d3eb3648..0647b85930a6 100644
--- a/drivers/nfc/nfcmrvl/spi.c
+++ b/drivers/nfc/nfcmrvl/spi.c
@@ -196,7 +196,7 @@ static int nfcmrvl_spi_remove(struct spi_device *spi)
 	return 0;
 }
 
-static const struct of_device_id of_nfcmrvl_spi_match[] = {
+static const struct of_device_id of_nfcmrvl_spi_match[] __maybe_unused = {
 	{ .compatible = "marvell,nfc-spi", },
 	{},
 };

From 41a6bf50ee04a604f84ba8989055781d68061ed6 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:54 -0400
Subject: [PATCH 0653/3292] nfc: mrvl: skip impossible NCI_MAX_PAYLOAD_SIZE
 check

The nci_ctrl_hdr.plen field us u8, so checkign if it is bigger than
NCI_MAX_PAYLOAD_SIZE does not make any sense.  Fix warning reported by
Smatch:

    drivers/nfc/nfcmrvl/i2c.c:52 nfcmrvl_i2c_read() warn:
        impossible condition '(nci_hdr.plen > 255) => (0-255 > 255)'

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-6-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/nfcmrvl/i2c.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index bafd9b500b2c..3c9bbee98237 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -49,11 +49,6 @@ static int nfcmrvl_i2c_read(struct nfcmrvl_i2c_drv_data *drv_data,
 		return -EBADMSG;
 	}
 
-	if (nci_hdr.plen > NCI_MAX_PAYLOAD_SIZE) {
-		nfc_err(&drv_data->i2c->dev, "invalid packet payload size\n");
-		return -EBADMSG;
-	}
-
 	*skb = nci_skb_alloc(drv_data->priv->ndev,
 			     nci_hdr.plen + NCI_CTRL_HDR_SIZE, GFP_KERNEL);
 	if (!*skb)

From b3a790d4374981732202fd13d6634c439bad9cfe Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:55 -0400
Subject: [PATCH 0654/3292] nfc: pn533: mark OF device ID tables as maybe
 unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match either via OF or I2C ID tables.  If OF is disabled,
the table will be unused:

    drivers/nfc/pn533/i2c.c:252:34: warning:
        ‘of_pn533_i2c_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-7-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn533/i2c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c
index 795da9b85d56..bfc617acabae 100644
--- a/drivers/nfc/pn533/i2c.c
+++ b/drivers/nfc/pn533/i2c.c
@@ -249,7 +249,7 @@ static int pn533_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-static const struct of_device_id of_pn533_i2c_match[] = {
+static const struct of_device_id of_pn533_i2c_match[] __maybe_unused = {
 	{ .compatible = "nxp,pn532", },
 	/*
 	 * NOTE: The use of the compatibles with the trailing "...-i2c" is

From 5edc94265e195e7e8cbc19dd4ee09f001a46fb34 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:56 -0400
Subject: [PATCH 0655/3292] nfc: s3fwrn5: mark OF device ID tables as maybe
 unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match either via OF or I2C ID tables.  If OF is disabled,
the table will be unused:

    drivers/nfc/s3fwrn5/i2c.c:265:34: warning:
        ‘of_s3fwrn5_i2c_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-8-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/s3fwrn5/i2c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c
index 38b8d6cab593..4d1cf1bb55b0 100644
--- a/drivers/nfc/s3fwrn5/i2c.c
+++ b/drivers/nfc/s3fwrn5/i2c.c
@@ -262,7 +262,7 @@ static const struct i2c_device_id s3fwrn5_i2c_id_table[] = {
 };
 MODULE_DEVICE_TABLE(i2c, s3fwrn5_i2c_id_table);
 
-static const struct of_device_id of_s3fwrn5_i2c_match[] = {
+static const struct of_device_id of_s3fwrn5_i2c_match[] __maybe_unused = {
 	{ .compatible = "samsung,s3fwrn5-i2c", },
 	{}
 };

From aa1405772fe13f4ec74d50610facd3fb7f6d998d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:57 -0400
Subject: [PATCH 0656/3292] nfc: pn544: mark ACPI and OF device ID tables as
 maybe unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match either via OF or ACPI ID tables.  If one
configuration is disabled, the table will be unused:

    drivers/nfc/pn544/i2c.c:53:36: warning:
        ‘pn544_hci_i2c_acpi_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-9-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn544/i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index 4ac8cb262559..aac778c5ddd2 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -50,7 +50,7 @@ static const struct i2c_device_id pn544_hci_i2c_id_table[] = {
 
 MODULE_DEVICE_TABLE(i2c, pn544_hci_i2c_id_table);
 
-static const struct acpi_device_id pn544_hci_i2c_acpi_match[] = {
+static const struct acpi_device_id pn544_hci_i2c_acpi_match[] __maybe_unused = {
 	{"NXP5440", 0},
 	{}
 };
@@ -951,7 +951,7 @@ static int pn544_hci_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-static const struct of_device_id of_pn544_i2c_match[] = {
+static const struct of_device_id of_pn544_i2c_match[] __maybe_unused = {
 	{ .compatible = "nxp,pn544-i2c", },
 	{},
 };

From 255fcc7b71666bd3275ac0a4476e91d175ac9223 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:58 -0400
Subject: [PATCH 0657/3292] nfc: st-nci: mark ACPI and OF device ID tables as
 maybe unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match either via OF or ACPI ID tables.  If one
configuration is disabled, the table will be unused:

    drivers/nfc/st-nci/spi.c:296:34: warning:
        ‘of_st_nci_spi_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-10-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st-nci/i2c.c | 4 ++--
 drivers/nfc/st-nci/spi.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 55d600cd3861..663d1cc19b81 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -274,14 +274,14 @@ static const struct i2c_device_id st_nci_i2c_id_table[] = {
 };
 MODULE_DEVICE_TABLE(i2c, st_nci_i2c_id_table);
 
-static const struct acpi_device_id st_nci_i2c_acpi_match[] = {
+static const struct acpi_device_id st_nci_i2c_acpi_match[] __maybe_unused = {
 	{"SMO2101"},
 	{"SMO2102"},
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, st_nci_i2c_acpi_match);
 
-static const struct of_device_id of_st_nci_i2c_match[] = {
+static const struct of_device_id of_st_nci_i2c_match[] __maybe_unused = {
 	{ .compatible = "st,st21nfcb-i2c", },
 	{ .compatible = "st,st21nfcb_i2c", },
 	{ .compatible = "st,st21nfcc-i2c", },
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 09df6ea65840..5f1a2173b2e7 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -287,13 +287,13 @@ static struct spi_device_id st_nci_spi_id_table[] = {
 };
 MODULE_DEVICE_TABLE(spi, st_nci_spi_id_table);
 
-static const struct acpi_device_id st_nci_spi_acpi_match[] = {
+static const struct acpi_device_id st_nci_spi_acpi_match[] __maybe_unused = {
 	{"SMO2101", 0},
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, st_nci_spi_acpi_match);
 
-static const struct of_device_id of_st_nci_spi_match[] = {
+static const struct of_device_id of_st_nci_spi_match[] __maybe_unused = {
 	{ .compatible = "st,st21nfcb-spi", },
 	{}
 };

From 80627802349242de2387a768475e2429e40e061c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:41:59 -0400
Subject: [PATCH 0658/3292] nfc: st21nfca: mark ACPI and OF device ID tables as
 maybe unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match either via OF or ACPI ID tables.  If one
configuration is disabled, the table will be unused:

    drivers/nfc/st21nfca/i2c.c:593:34: warning:
        ‘of_st21nfca_i2c_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-11-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st21nfca/i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 23ed11f91213..cebc6c06a1b6 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -584,13 +584,13 @@ static const struct i2c_device_id st21nfca_hci_i2c_id_table[] = {
 };
 MODULE_DEVICE_TABLE(i2c, st21nfca_hci_i2c_id_table);
 
-static const struct acpi_device_id st21nfca_hci_i2c_acpi_match[] = {
+static const struct acpi_device_id st21nfca_hci_i2c_acpi_match[] __maybe_unused = {
 	{"SMO2100", 0},
 	{}
 };
 MODULE_DEVICE_TABLE(acpi, st21nfca_hci_i2c_acpi_match);
 
-static const struct of_device_id of_st21nfca_i2c_match[] = {
+static const struct of_device_id of_st21nfca_i2c_match[] __maybe_unused = {
 	{ .compatible = "st,st21nfca-i2c", },
 	{ .compatible = "st,st21nfca_i2c", },
 	{}

From 1ab4fe09977e9f32a6992072c648865fcd36b750 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Fri, 28 May 2021 08:42:00 -0400
Subject: [PATCH 0659/3292] nfc: st95hf: mark ACPI and OF device ID tables as
 maybe unused
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match either via OF or ACPI ID tables.  If one
configuration is disabled, the table will be unused:

    drivers/nfc/st95hf/core.c:1059:34: warning:
        ‘st95hf_spi_of_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210528124200.79655-12-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st95hf/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 88924be8decb..0d99181b6ce3 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1056,7 +1056,7 @@ static const struct spi_device_id st95hf_id[] = {
 };
 MODULE_DEVICE_TABLE(spi, st95hf_id);
 
-static const struct of_device_id st95hf_spi_of_match[] = {
+static const struct of_device_id st95hf_spi_of_match[] __maybe_unused = {
         { .compatible = "st,st95hf" },
         { },
 };

From 4751d2aa321f2828d8c5d2f7ce4ed18a01e47f46 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 27 May 2021 18:59:59 +0300
Subject: [PATCH 0660/3292] net: stmmac: the XPCS obscures a potential "PHY not
 found" error

stmmac_mdio_register() has logic to search for PHYs on the MDIO bus and
assign them IRQ lines, as well as to set priv->plat->phy_addr.

If no PHY is found, the "found" variable remains set to 0 and the
function errors out.

After the introduction of commit f213bbe8a9d6 ("net: stmmac: Integrate
it with DesignWare XPCS"), the "found" variable was immediately reused
for searching for a PCS on the same MDIO bus.

This can result in 2 types of potential problems (none of them seems to
be seen on the only Intel system that sets has_xpcs = true, otherwise it
would have been reported):

1. If a PCS is found but a PHY is not, then the code happily exits with
   no error. One might say "yes, but this is not possible, because
   of_mdiobus_register will probe a PHY for all MDIO addresses,
   including for the XPCS, so if an XPCS exists, then a PHY certainly
   exists too". Well, that is not true, see intel_mgbe_common_data():

	/* Ensure mdio bus scan skips intel serdes and pcs-xpcs */
	plat->mdio_bus_data->phy_mask = 1 << INTEL_MGBE_ADHOC_ADDR;
	plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR;

2. A PHY is found but an MDIO device with the XPCS PHY ID isn't, and in
   that case, the error message will be "No PHY found". Confusing.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210527155959.3270478-1-olteanv@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index b750074f8f9c..e293bf1ce9f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -503,6 +503,12 @@ int stmmac_mdio_register(struct net_device *ndev)
 		found = 1;
 	}
 
+	if (!found && !mdio_node) {
+		dev_warn(dev, "No PHY found\n");
+		err = -ENODEV;
+		goto no_phy_found;
+	}
+
 	/* Try to probe the XPCS by scanning all addresses. */
 	if (priv->hw->xpcs) {
 		struct mdio_xpcs_args *xpcs = &priv->hw->xpcs_args;
@@ -511,6 +517,7 @@ int stmmac_mdio_register(struct net_device *ndev)
 
 		xpcs->bus = new_bus;
 
+		found = 0;
 		for (addr = 0; addr < max_addr; addr++) {
 			xpcs->addr = addr;
 
@@ -520,13 +527,12 @@ int stmmac_mdio_register(struct net_device *ndev)
 				break;
 			}
 		}
-	}
 
-	if (!found && !mdio_node) {
-		dev_warn(dev, "No PHY found\n");
-		mdiobus_unregister(new_bus);
-		mdiobus_free(new_bus);
-		return -ENODEV;
+		if (!found && !mdio_node) {
+			dev_warn(dev, "No XPCS found\n");
+			err = -ENODEV;
+			goto no_xpcs_found;
+		}
 	}
 
 bus_register_done:
@@ -534,6 +540,9 @@ bus_register_done:
 
 	return 0;
 
+no_xpcs_found:
+no_phy_found:
+	mdiobus_unregister(new_bus);
 bus_register_fail:
 	mdiobus_free(new_bus);
 	return err;

From ffb35c679842b471d8bc42c6986daa3b373f57b2 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 28 May 2021 15:23:27 -0500
Subject: [PATCH 0661/3292] r8169: Fix fall-through warning for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Acked-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://lore.kernel.org/r/20210528202327.GA39994@embeddedor
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 1663e0486496..64f94a3fe646 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4115,6 +4115,7 @@ static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp,
 	case RTL_GIGA_MAC_VER_61:
 	case RTL_GIGA_MAC_VER_63:
 		padto = max_t(unsigned int, padto, ETH_ZLEN);
+		break;
 	default:
 		break;
 	}

From f227925e53c3ecc168027e0015ab0a953d1bf013 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 May 2021 22:29:56 +0200
Subject: [PATCH 0662/3292] netfilter: nf_tables: prefer direct calls for set
 lookups

Extend nft_set_do_lookup() to use direct calls when retpoline feature
is enabled.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 24 ++++++++++++++++++++
 net/netfilter/nft_lookup.c             | 31 ++++++++++++++++++++++++++
 net/netfilter/nft_set_bitmap.c         |  5 +++--
 net/netfilter/nft_set_hash.c           | 17 ++++++++------
 net/netfilter/nft_set_pipapo.h         |  2 --
 net/netfilter/nft_set_pipapo_avx2.h    |  2 --
 net/netfilter/nft_set_rbtree.c         |  5 +++--
 7 files changed, 71 insertions(+), 15 deletions(-)

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 5eb699454490..46c8d5bb5d8d 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -3,6 +3,7 @@
 #define _NET_NF_TABLES_CORE_H
 
 #include <net/netfilter/nf_tables.h>
+#include <linux/indirect_call_wrapper.h>
 
 extern struct nft_expr_type nft_imm_type;
 extern struct nft_expr_type nft_cmp_type;
@@ -88,12 +89,35 @@ extern const struct nft_set_type nft_set_bitmap_type;
 extern const struct nft_set_type nft_set_pipapo_type;
 extern const struct nft_set_type nft_set_pipapo_avx2_type;
 
+#ifdef CONFIG_RETPOLINE
+bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+		      const u32 *key, const struct nft_set_ext **ext);
+bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext);
+bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext);
+bool nft_hash_lookup_fast(const struct net *net,
+			  const struct nft_set *set,
+			  const u32 *key, const struct nft_set_ext **ext);
+bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+		     const u32 *key, const struct nft_set_ext **ext);
+bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext);
+#else
 static inline bool
 nft_set_do_lookup(const struct net *net, const struct nft_set *set,
 		  const u32 *key, const struct nft_set_ext **ext)
 {
 	return set->ops->lookup(net, set, key, ext);
 }
+#endif
+
+/* called from nft_pipapo_avx2.c */
+bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext);
+/* called from nft_set_pipapo.c */
+bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
+			    const u32 *key, const struct nft_set_ext **ext);
 
 struct nft_expr;
 struct nft_regs;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 1a8581879af5..90becbf5bff3 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -23,6 +23,37 @@ struct nft_lookup {
 	struct nft_set_binding		binding;
 };
 
+#ifdef CONFIG_RETPOLINE
+bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
+{
+	if (set->ops == &nft_set_hash_fast_type.ops)
+		return nft_hash_lookup_fast(net, set, key, ext);
+	if (set->ops == &nft_set_hash_type.ops)
+		return nft_hash_lookup(net, set, key, ext);
+
+	if (set->ops == &nft_set_rhash_type.ops)
+		return nft_rhash_lookup(net, set, key, ext);
+
+	if (set->ops == &nft_set_bitmap_type.ops)
+		return nft_bitmap_lookup(net, set, key, ext);
+
+	if (set->ops == &nft_set_pipapo_type.ops)
+		return nft_pipapo_lookup(net, set, key, ext);
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+	if (set->ops == &nft_set_pipapo_avx2_type.ops)
+		return nft_pipapo_avx2_lookup(net, set, key, ext);
+#endif
+
+	if (set->ops == &nft_set_rbtree_type.ops)
+		return nft_rbtree_lookup(net, set, key, ext);
+
+	WARN_ON_ONCE(1);
+	return set->ops->lookup(net, set, key, ext);
+}
+EXPORT_SYMBOL_GPL(nft_set_do_lookup);
+#endif
+
 void nft_lookup_eval(const struct nft_expr *expr,
 		     struct nft_regs *regs,
 		     const struct nft_pktinfo *pkt)
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 2a81ea421819..e7ae5914971e 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -73,8 +73,9 @@ nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
 	return (bitmap[idx] & (0x3 << off)) & (genmask << off);
 }
 
-static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
-			      const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
 {
 	const struct nft_bitmap *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_cur(net);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 7b3d0a78c569..df40314de21f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -74,8 +74,9 @@ static const struct rhashtable_params nft_rhash_params = {
 	.automatic_shrinking	= true,
 };
 
-static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
-			     const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+		      const u32 *key, const struct nft_set_ext **ext)
 {
 	struct nft_rhash *priv = nft_set_priv(set);
 	const struct nft_rhash_elem *he;
@@ -446,8 +447,9 @@ struct nft_hash_elem {
 	struct nft_set_ext		ext;
 };
 
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
-			    const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+		     const u32 *key, const struct nft_set_ext **ext)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_cur(net);
@@ -484,9 +486,10 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set,
 	return ERR_PTR(-ENOENT);
 }
 
-static bool nft_hash_lookup_fast(const struct net *net,
-				 const struct nft_set *set,
-				 const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_hash_lookup_fast(const struct net *net,
+			  const struct nft_set *set,
+			  const u32 *key, const struct nft_set_ext **ext)
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_cur(net);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index d84afb8fa79a..25a75591583e 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -178,8 +178,6 @@ struct nft_pipapo_elem {
 
 int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
 		  union nft_pipapo_map_bucket *mt, bool match_only);
-bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
-		       const u32 *key, const struct nft_set_ext **ext);
 
 /**
  * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h
index 394bcb704db7..dbb6aaca8a7a 100644
--- a/net/netfilter/nft_set_pipapo_avx2.h
+++ b/net/netfilter/nft_set_pipapo_avx2.h
@@ -5,8 +5,6 @@
 #include <asm/fpu/xstate.h>
 #define NFT_PIPAPO_ALIGN	(XSAVE_YMM_SIZE / BITS_PER_BYTE)
 
-bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
-			    const u32 *key, const struct nft_set_ext **ext);
 bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
 			      struct nft_set_estimate *est);
 #endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9e36eb4a7429..d600a566da32 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -107,8 +107,9 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
 	return false;
 }
 
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
-			      const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
 	unsigned int seq = read_seqcount_begin(&priv->count);

From 06f029930264ee8013fb76cfb591c6e1ad2f0dd0 Mon Sep 17 00:00:00 2001
From: Juerg Haefliger <juerg.haefliger@canonical.com>
Date: Mon, 17 May 2021 11:58:50 +0200
Subject: [PATCH 0663/3292] netfilter: Remove leading spaces in Kconfig

Remove leading spaces before tabs in Kconfig file(s) by running the
following command:

  $ find net/netfilter -name 'Kconfig*' | xargs sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Juerg Haefliger <juergh@canonical.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig      | 2 +-
 net/netfilter/ipvs/Kconfig | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56a2531a3402..172d74560632 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -816,7 +816,7 @@ config NETFILTER_XT_TARGET_CLASSIFY
 	  the priority of a packet. Some qdiscs can use this value for
 	  classification, among these are:
 
-  	  atm, cbq, dsmark, pfifo_fast, htb, prio
+	  atm, cbq, dsmark, pfifo_fast, htb, prio
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index d61886874940..271da8447b29 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -318,7 +318,7 @@ config IP_VS_MH_TAB_INDEX
 comment 'IPVS application helper'
 
 config	IP_VS_FTP
-  	tristate "FTP protocol helper"
+	tristate "FTP protocol helper"
 	depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
 		NF_CONNTRACK_FTP
 	select IP_VS_NFCT

From 07df3fc90a03919b5f1bc3b2fad0046a0aa0e2cb Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Thu, 29 Apr 2021 12:26:13 -0400
Subject: [PATCH 0664/3292] netfilter: x_tables: improve limit_mt scalability

We've seen this spin_lock show up high in profiles. Let's introduce a
lockless version. I've tested this using pktgen_sample01_simple.sh.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_limit.c | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 24d4afb9988d..8b4fd27857f2 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -8,16 +8,14 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <linux/spinlock.h>
 #include <linux/interrupt.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_limit.h>
 
 struct xt_limit_priv {
-	spinlock_t lock;
 	unsigned long prev;
-	uint32_t credit;
+	u32 credit;
 };
 
 MODULE_LICENSE("GPL");
@@ -66,22 +64,31 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv = r->master;
-	unsigned long now = jiffies;
+	unsigned long now;
+	u32 old_credit, new_credit, credit_increase = 0;
+	bool ret;
 
-	spin_lock_bh(&priv->lock);
-	priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
-	if (priv->credit > r->credit_cap)
-		priv->credit = r->credit_cap;
+	/* fastpath if there is nothing to update */
+	if ((READ_ONCE(priv->credit) < r->cost) && (READ_ONCE(priv->prev) == jiffies))
+		return false;
 
-	if (priv->credit >= r->cost) {
-		/* We're not limited. */
-		priv->credit -= r->cost;
-		spin_unlock_bh(&priv->lock);
-		return true;
-	}
+	do {
+		now = jiffies;
+		credit_increase += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
+		old_credit = READ_ONCE(priv->credit);
+		new_credit = old_credit;
+		new_credit += credit_increase;
+		if (new_credit > r->credit_cap)
+			new_credit = r->credit_cap;
+		if (new_credit >= r->cost) {
+			ret = true;
+			new_credit -= r->cost;
+		} else {
+			ret = false;
+		}
+	} while (cmpxchg(&priv->credit, old_credit, new_credit) != old_credit);
 
-	spin_unlock_bh(&priv->lock);
-	return false;
+	return ret;
 }
 
 /* Precision saver. */
@@ -122,7 +129,6 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 		r->credit_cap = priv->credit; /* Credits full. */
 		r->cost = user2credits(r->avg);
 	}
-	spin_lock_init(&priv->lock);
 
 	return 0;
 }

From 02d85142670b6676abcfd95023c8d28288dc5ad9 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 30 Apr 2021 17:25:10 +0800
Subject: [PATCH 0665/3292] netfilter: xt_CT: Remove redundant assignment to
 ret

Variable 'ret' is set to zero but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed

Clean up the following clang-analyzer warning:

net/netfilter/xt_CT.c:175:2: warning: Value stored to 'ret' is never read [clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_CT.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index d4deee39158b..12404d221026 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -172,7 +172,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 		goto err2;
 	}
 
-	ret = 0;
 	if ((info->ct_events || info->exp_events) &&
 	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
 				  GFP_KERNEL)) {

From e0241ae6ac59ffa318255640c047f7c90457fbe5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 17 May 2021 14:43:08 +0200
Subject: [PATCH 0666/3292] netfilter: use nfnetlink_unicast()

Replace netlink_unicast() calls by nfnetlink_unicast() which already
deals with translating EAGAIN to ENOBUFS as the nfnetlink core expects.

nfnetlink_unicast() calls nlmsg_unicast() which returns zero in case of
success, otherwise the netlink core function netlink_rcv_skb() turns
err > 0 into an acknowlegment.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_core.c    | 50 +++++----------------
 net/netfilter/nf_conntrack_netlink.c | 65 ++++++++--------------------
 net/netfilter/nfnetlink_acct.c       |  9 ++--
 net/netfilter/nfnetlink_cthelper.c   | 10 ++---
 net/netfilter/nfnetlink_cttimeout.c  | 34 +++++----------
 5 files changed, 44 insertions(+), 124 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index de2d20c37cda..16ae92054baa 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1685,8 +1685,8 @@ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static int
-call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
-	struct nlattr *tb[], enum ipset_adt adt,
+call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+	struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt,
 	u32 flags, bool use_lineno)
 {
 	int ret;
@@ -1738,8 +1738,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 
 		*errline = lineno;
 
-		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
-				MSG_DONTWAIT);
+		nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
 		/* Signal netlink not to send its ACK/errmsg.  */
 		return -EINTR;
 	}
@@ -1783,7 +1782,7 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
 				     attr[IPSET_ATTR_DATA],
 				     set->type->adt_policy, NULL))
 			return -IPSET_ERR_PROTOCOL;
-		ret = call_ad(ctnl, skb, set, tb, adt, flags,
+		ret = call_ad(net, ctnl, skb, set, tb, adt, flags,
 			      use_lineno);
 	} else {
 		int nla_rem;
@@ -1794,7 +1793,7 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
 			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
 					     set->type->adt_policy, NULL))
 				return -IPSET_ERR_PROTOCOL;
-			ret = call_ad(ctnl, skb, set, tb, adt,
+			ret = call_ad(net, ctnl, skb, set, tb, adt,
 				      flags, use_lineno);
 			if (ret < 0)
 				return ret;
@@ -1859,7 +1858,6 @@ static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info,
 	const struct ip_set *set;
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
-	int ret = 0;
 
 	if (unlikely(protocol_min_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME]))
@@ -1885,12 +1883,7 @@ static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 
 nla_put_failure:
 	nlmsg_cancel(skb2, nlh2);
@@ -1945,12 +1938,7 @@ static int ip_set_type(struct sk_buff *skb, const struct nfnl_info *info,
 	nlmsg_end(skb2, nlh2);
 
 	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
-	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 
 nla_put_failure:
 	nlmsg_cancel(skb2, nlh2);
@@ -1971,7 +1959,6 @@ static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info,
 {
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
-	int ret = 0;
 
 	if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
 		return -IPSET_ERR_PROTOCOL;
@@ -1990,12 +1977,7 @@ static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 
 nla_put_failure:
 	nlmsg_cancel(skb2, nlh2);
@@ -2014,7 +1996,6 @@ static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info,
 	struct nlmsghdr *nlh2;
 	ip_set_id_t id = IPSET_INVALID_ID;
 	const struct ip_set *set;
-	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
 		     !attr[IPSET_ATTR_SETNAME]))
@@ -2038,12 +2019,7 @@ static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 
 nla_put_failure:
 	nlmsg_cancel(skb2, nlh2);
@@ -2065,7 +2041,6 @@ static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info,
 	struct nlmsghdr *nlh2;
 	ip_set_id_t id = IPSET_INVALID_ID;
 	const struct ip_set *set;
-	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
 		     !attr[IPSET_ATTR_INDEX]))
@@ -2091,12 +2066,7 @@ static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 
 nla_put_failure:
 	nlmsg_cancel(skb2, nlh2);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8690fc07030f..220f51f055ab 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1628,9 +1628,8 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
 
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
-	err = -ENOMEM;
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL) {
+	if (!skb2) {
 		nf_ct_put(ct);
 		return -ENOMEM;
 	}
@@ -1640,21 +1639,12 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
 				  NFNL_MSG_TYPE(info->nlh->nlmsg_type), ct,
 				  true, 0);
 	nf_ct_put(ct);
-	if (err <= 0)
-		goto free;
+	if (err <= 0) {
+		kfree_skb(skb2);
+		return -ENOMEM;
+	}
 
-	err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (err < 0)
-		goto out;
-
-	return 0;
-
-free:
-	kfree_skb(skb2);
-out:
-	/* this avoids a loop in nfnetlink. */
-	return err == -EAGAIN ? -ENOBUFS : err;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 }
 
 static int ctnetlink_done_list(struct netlink_callback *cb)
@@ -2590,21 +2580,12 @@ static int ctnetlink_stat_ct(struct sk_buff *skb, const struct nfnl_info *info,
 					  info->nlh->nlmsg_seq,
 					  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
 					  sock_net(skb->sk));
-	if (err <= 0)
-		goto free;
+	if (err <= 0) {
+		kfree_skb(skb2);
+		return -ENOMEM;
+	}
 
-	err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (err < 0)
-		goto out;
-
-	return 0;
-
-free:
-	kfree_skb(skb2);
-out:
-	/* this avoids a loop in nfnetlink. */
-	return err == -EAGAIN ? -ENOBUFS : err;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 }
 
 static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
@@ -3329,11 +3310,10 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
 		}
 	}
 
-	err = -ENOMEM;
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL) {
+	if (!skb2) {
 		nf_ct_expect_put(exp);
-		goto out;
+		return -ENOMEM;
 	}
 
 	rcu_read_lock();
@@ -3342,21 +3322,12 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
 				      exp);
 	rcu_read_unlock();
 	nf_ct_expect_put(exp);
-	if (err <= 0)
-		goto free;
+	if (err <= 0) {
+		kfree_skb(skb2);
+		return -ENOMEM;
+	}
 
-	err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (err < 0)
-		goto out;
-
-	return 0;
-
-free:
-	kfree_skb(skb2);
-out:
-	/* this avoids a loop in nfnetlink. */
-	return err == -EAGAIN ? -ENOBUFS : err;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 }
 
 static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 3c8cf8748cfb..505f46a32173 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -314,14 +314,11 @@ static int nfnl_acct_get(struct sk_buff *skb, const struct nfnl_info *info,
 			kfree_skb(skb2);
 			break;
 		}
-		ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-				      MSG_DONTWAIT);
-		if (ret > 0)
-			ret = 0;
 
-		/* this avoids a loop in nfnetlink. */
-		return ret == -EAGAIN ? -ENOBUFS : ret;
+		ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+		break;
 	}
+
 	return ret;
 }
 
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 322ac5dd5402..df58cd534ff5 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -663,14 +663,10 @@ static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info,
 			break;
 		}
 
-		ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-				      MSG_DONTWAIT);
-		if (ret > 0)
-			ret = 0;
-
-		/* this avoids a loop in nfnetlink. */
-		return ret == -EAGAIN ? -ENOBUFS : ret;
+		ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+		break;
 	}
+
 	return ret;
 }
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 38848ad68899..c57673d499be 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -287,14 +287,11 @@ static int cttimeout_get_timeout(struct sk_buff *skb,
 			kfree_skb(skb2);
 			break;
 		}
-		ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-				      MSG_DONTWAIT);
-		if (ret > 0)
-			ret = 0;
 
-		/* this avoids a loop in nfnetlink. */
-		return ret == -EAGAIN ? -ENOBUFS : ret;
+		ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+		break;
 	}
+
 	return ret;
 }
 
@@ -427,9 +424,9 @@ static int cttimeout_default_get(struct sk_buff *skb,
 	const struct nf_conntrack_l4proto *l4proto;
 	unsigned int *timeouts = NULL;
 	struct sk_buff *skb2;
-	int ret, err;
 	__u16 l3num;
 	__u8 l4num;
+	int ret;
 
 	if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
 		return -EINVAL;
@@ -438,9 +435,8 @@ static int cttimeout_default_get(struct sk_buff *skb,
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
 	l4proto = nf_ct_l4proto_find(l4num);
 
-	err = -EOPNOTSUPP;
 	if (l4proto->l4proto != l4num)
-		goto err;
+		return -EOPNOTSUPP;
 
 	switch (l4proto->l4proto) {
 	case IPPROTO_ICMP:
@@ -480,13 +476,11 @@ static int cttimeout_default_get(struct sk_buff *skb,
 	}
 
 	if (!timeouts)
-		goto err;
+		return -EOPNOTSUPP;
 
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
+	if (!skb2)
+		return -ENOMEM;
 
 	ret = cttimeout_default_fill_info(info->net, skb2,
 					  NETLINK_CB(skb).portid,
@@ -496,18 +490,10 @@ static int cttimeout_default_get(struct sk_buff *skb,
 					  l3num, l4proto, timeouts);
 	if (ret <= 0) {
 		kfree_skb(skb2);
-		err = -ENOMEM;
-		goto err;
+		return -ENOMEM;
 	}
-	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-			      MSG_DONTWAIT);
-	if (ret > 0)
-		ret = 0;
 
-	/* this avoids a loop in nfnetlink. */
-	return ret == -EAGAIN ? -ENOBUFS : ret;
-err:
-	return err;
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 }
 
 static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,

From 586d5a8bcede47fda7bebf4b36be917c5010db16 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 28 May 2021 12:30:03 +0200
Subject: [PATCH 0667/3292] netfilter: x_tables: reduce xt_action_param by 8
 byte

The fragment offset in ipv4/ipv6 is a 16bit field, so use
u16 instead of unsigned int.

On 64bit: 40 bytes to 32 bytes. By extension this also reduces
nft_pktinfo (56 to 48 byte).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 2 +-
 net/ipv6/netfilter/ip6_tables.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 07c6ad8f2a02..28d7027cd460 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -36,8 +36,8 @@ struct xt_action_param {
 		const void *matchinfo, *targinfo;
 	};
 	const struct nf_hook_state *state;
-	int fragoff;
 	unsigned int thoff;
+	u16 fragoff;
 	bool hotdrop;
 };
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e810a23baf99..de2cf3943b91 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -51,7 +51,7 @@ ip6_packet_match(const struct sk_buff *skb,
 		 const char *outdev,
 		 const struct ip6t_ip6 *ip6info,
 		 unsigned int *protoff,
-		 int *fragoff, bool *hotdrop)
+		 u16 *fragoff, bool *hotdrop)
 {
 	unsigned long ret;
 	const struct ipv6hdr *ipv6 = ipv6_hdr(skb);

From 6802db48fc27b8d7f601e96a85771f2205702941 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 28 May 2021 12:30:04 +0200
Subject: [PATCH 0668/3292] netfilter: reduce size of nf_hook_state on 32bit
 platforms

Reduce size from 28 to 24 bytes on 32bit platforms.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index f0f3a8354c3c..f161569fbe2f 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -65,8 +65,8 @@ struct nf_hook_ops;
 struct sock;
 
 struct nf_hook_state {
-	unsigned int hook;
-	u_int8_t pf;
+	u8 hook;
+	u8 pf;
 	struct net_device *in;
 	struct net_device *out;
 	struct sock *sk;

From 85554eb981e5a8b0b8947611193aef1737081ef2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 28 May 2021 12:30:05 +0200
Subject: [PATCH 0669/3292] netfilter: nf_tables: add and use nft_sk helper

This allows to change storage placement later on without changing readers.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h    | 5 +++++
 net/ipv4/netfilter/nft_reject_ipv4.c | 2 +-
 net/ipv6/netfilter/nft_reject_ipv6.c | 2 +-
 net/netfilter/nft_reject_inet.c      | 4 ++--
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 27eeb613bb4e..af1228f58e5a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -29,6 +29,11 @@ struct nft_pktinfo {
 	struct xt_action_param		xt;
 };
 
+static inline struct sock *nft_sk(const struct nft_pktinfo *pkt)
+{
+	return pkt->xt.state->sk;
+}
+
 static inline struct net *nft_net(const struct nft_pktinfo *pkt)
 {
 	return pkt->xt.state->net;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index ff437e4ed6db..55fc23a8f7a7 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,7 +27,7 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
 		nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt));
 		break;
 	case NFT_REJECT_TCP_RST:
-		nf_send_reset(nft_net(pkt), pkt->xt.state->sk, pkt->skb,
+		nf_send_reset(nft_net(pkt), nft_sk(pkt), pkt->skb,
 			      nft_hook(pkt));
 		break;
 	default:
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 7969d1f3018d..ed69c768797e 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -28,7 +28,7 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
 				 nft_hook(pkt));
 		break;
 	case NFT_REJECT_TCP_RST:
-		nf_send_reset6(nft_net(pkt), pkt->xt.state->sk, pkt->skb,
+		nf_send_reset6(nft_net(pkt), nft_sk(pkt), pkt->skb,
 			       nft_hook(pkt));
 		break;
 	default:
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 95090186ee90..554caf967baa 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -28,7 +28,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
 					nft_hook(pkt));
 			break;
 		case NFT_REJECT_TCP_RST:
-			nf_send_reset(nft_net(pkt), pkt->xt.state->sk,
+			nf_send_reset(nft_net(pkt), nft_sk(pkt),
 				      pkt->skb, nft_hook(pkt));
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
@@ -45,7 +45,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
 					 priv->icmp_code, nft_hook(pkt));
 			break;
 		case NFT_REJECT_TCP_RST:
-			nf_send_reset6(nft_net(pkt), pkt->xt.state->sk,
+			nf_send_reset6(nft_net(pkt), nft_sk(pkt),
 				       pkt->skb, nft_hook(pkt));
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:

From 2d7b4ace0754ebaaf71c6824880178d46aa0ab33 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 28 May 2021 12:30:06 +0200
Subject: [PATCH 0670/3292] netfilter: nf_tables: add and use nft_thoff helper

This allows to change storage placement later on without changing readers.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  5 +++++
 net/netfilter/nf_tables_core.c    |  2 +-
 net/netfilter/nf_tables_trace.c   |  6 +++---
 net/netfilter/nft_exthdr.c        |  8 ++++----
 net/netfilter/nft_flow_offload.c  |  2 +-
 net/netfilter/nft_payload.c       | 10 +++++-----
 net/netfilter/nft_synproxy.c      |  4 ++--
 net/netfilter/nft_tproxy.c        |  4 ++--
 8 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index af1228f58e5a..10c1b8759990 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -34,6 +34,11 @@ static inline struct sock *nft_sk(const struct nft_pktinfo *pkt)
 	return pkt->xt.state->sk;
 }
 
+static inline unsigned int nft_thoff(const struct nft_pktinfo *pkt)
+{
+	return pkt->xt.thoff;
+}
+
 static inline struct net *nft_net(const struct nft_pktinfo *pkt)
 {
 	return pkt->xt.state->net;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index dbc2e945c98e..7780342e2f2d 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -81,7 +81,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
 	else {
 		if (!pkt->tprot_set)
 			return false;
-		ptr = skb_network_header(skb) + pkt->xt.thoff;
+		ptr = skb_network_header(skb) + nft_thoff(pkt);
 	}
 
 	ptr += priv->offset;
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 0cf3278007ba..e4fe2f0780eb 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -113,17 +113,17 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
 	int off = skb_network_offset(skb);
 	unsigned int len, nh_end;
 
-	nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len;
+	nh_end = pkt->tprot_set ? nft_thoff(pkt) : skb->len;
 	len = min_t(unsigned int, nh_end - skb_network_offset(skb),
 		    NFT_TRACETYPE_NETWORK_HSIZE);
 	if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
 		return -1;
 
 	if (pkt->tprot_set) {
-		len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+		len = min_t(unsigned int, skb->len - nft_thoff(pkt),
 			    NFT_TRACETYPE_TRANSPORT_HSIZE);
 		if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
-				      pkt->xt.thoff, len))
+				      nft_thoff(pkt), len))
 			return -1;
 	}
 
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 4d0b8e1c40c0..1b0579cb62d0 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
 	if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
 		return NULL;
 
-	tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+	tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
 	if (!tcph)
 		return NULL;
 
@@ -175,7 +175,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
 	if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
 		return NULL;
 
-	return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+	return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer);
 }
 
 static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
@@ -251,7 +251,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
 			return;
 
 		if (skb_ensure_writable(pkt->skb,
-					pkt->xt.thoff + i + priv->len))
+					nft_thoff(pkt) + i + priv->len))
 			return;
 
 		tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
@@ -306,7 +306,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
 				 struct nft_regs *regs,
 				 const struct nft_pktinfo *pkt)
 {
-	unsigned int offset = pkt->xt.thoff + sizeof(struct sctphdr);
+	unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr);
 	struct nft_exthdr *priv = nft_expr_priv(expr);
 	u32 *dest = &regs->data[priv->dreg];
 	const struct sctp_chunkhdr *sch;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 4843dd2b410c..0af34ad41479 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -291,7 +291,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 
 	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
 	case IPPROTO_TCP:
-		tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
+		tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
 					  sizeof(_tcph), &_tcph);
 		if (unlikely(!tcph || tcph->fin || tcph->rst))
 			goto out;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 501c5b24cc39..a44b14f6c0dc 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -110,7 +110,7 @@ void nft_payload_eval(const struct nft_expr *expr,
 	case NFT_PAYLOAD_TRANSPORT_HEADER:
 		if (!pkt->tprot_set)
 			goto err;
-		offset = pkt->xt.thoff;
+		offset = nft_thoff(pkt);
 		break;
 	default:
 		BUG();
@@ -507,7 +507,7 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
 		*l4csum_offset = offsetof(struct tcphdr, check);
 		break;
 	case IPPROTO_UDP:
-		if (!nft_payload_udp_checksum(skb, pkt->xt.thoff))
+		if (!nft_payload_udp_checksum(skb, nft_thoff(pkt)))
 			return -1;
 		fallthrough;
 	case IPPROTO_UDPLITE:
@@ -520,7 +520,7 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
 		return -1;
 	}
 
-	*l4csum_offset += pkt->xt.thoff;
+	*l4csum_offset += nft_thoff(pkt);
 	return 0;
 }
 
@@ -612,7 +612,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
 	case NFT_PAYLOAD_TRANSPORT_HEADER:
 		if (!pkt->tprot_set)
 			goto err;
-		offset = pkt->xt.thoff;
+		offset = nft_thoff(pkt);
 		break;
 	default:
 		BUG();
@@ -643,7 +643,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
 	if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
 	    pkt->tprot == IPPROTO_SCTP &&
 	    skb->ip_summed != CHECKSUM_PARTIAL) {
-		if (nft_payload_csum_sctp(skb, pkt->xt.thoff))
+		if (nft_payload_csum_sctp(skb, nft_thoff(pkt)))
 			goto err;
 	}
 
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 4fda8b3f1762..a0109fa1e92d 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -109,7 +109,7 @@ static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
 {
 	struct synproxy_options opts = {};
 	struct sk_buff *skb = pkt->skb;
-	int thoff = pkt->xt.thoff;
+	int thoff = nft_thoff(pkt);
 	const struct tcphdr *tcp;
 	struct tcphdr _tcph;
 
@@ -123,7 +123,7 @@ static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
 		return;
 	}
 
-	tcp = skb_header_pointer(skb, pkt->xt.thoff,
+	tcp = skb_header_pointer(skb, thoff,
 				 sizeof(struct tcphdr),
 				 &_tcph);
 	if (!tcp) {
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index accef672088c..18e79c0fd3cf 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -82,9 +82,9 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
 	const struct nft_tproxy *priv = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
-	struct in6_addr taddr;
-	int thoff = pkt->xt.thoff;
+	int thoff = nft_thoff(pkt);
 	struct udphdr _hdr, *hp;
+	struct in6_addr taddr;
 	__be16 tport = 0;
 	struct sock *sk;
 	int l4proto;

From f06ad944b6a92dd9ce95f2e5f4164a8e70d32af5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 28 May 2021 12:30:07 +0200
Subject: [PATCH 0671/3292] netfilter: nf_tables: remove unused arg in
 nft_set_pktinfo_unspec()

The functions pass extra skb arg, but either its not used or the helpers
can already access it via pkt->skb.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h      |  3 +--
 include/net/netfilter/nf_tables_ipv4.h | 28 +++++++++++-------------
 include/net/netfilter/nf_tables_ipv6.h | 30 +++++++++++---------------
 net/netfilter/nft_chain_filter.c       | 26 +++++++++++-----------
 net/netfilter/nft_chain_nat.c          |  4 ++--
 net/netfilter/nft_chain_route.c        |  4 ++--
 6 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 10c1b8759990..958b8e68bb1a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -72,8 +72,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
 	pkt->xt.state = state;
 }
 
-static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
-					  struct sk_buff *skb)
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt)
 {
 	pkt->tprot_set = false;
 	pkt->tprot = 0;
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index 1f7bea39ad1b..b185a9216bf1 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -5,8 +5,7 @@
 #include <net/netfilter/nf_tables.h>
 #include <net/ip.h>
 
-static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
-					struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt)
 {
 	struct iphdr *ip;
 
@@ -17,14 +16,13 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
 	pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 }
 
-static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
-						  struct sk_buff *skb)
+static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
 {
 	struct iphdr *iph, _iph;
 	u32 len, thoff;
 
-	iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
-				 &_iph);
+	iph = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
+				 sizeof(*iph), &_iph);
 	if (!iph)
 		return -1;
 
@@ -33,7 +31,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
 
 	len = ntohs(iph->tot_len);
 	thoff = iph->ihl * 4;
-	if (skb->len < len)
+	if (pkt->skb->len < len)
 		return -1;
 	else if (len < thoff)
 		return -1;
@@ -46,29 +44,27 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
 	return 0;
 }
 
-static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
-						 struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
 {
-	if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0)
-		nft_set_pktinfo_unspec(pkt, skb);
+	if (__nft_set_pktinfo_ipv4_validate(pkt) < 0)
+		nft_set_pktinfo_unspec(pkt);
 }
 
-static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt,
-					       struct sk_buff *skb)
+static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
 {
 	struct iphdr *iph;
 	u32 len, thoff;
 
-	if (!pskb_may_pull(skb, sizeof(*iph)))
+	if (!pskb_may_pull(pkt->skb, sizeof(*iph)))
 		return -1;
 
-	iph = ip_hdr(skb);
+	iph = ip_hdr(pkt->skb);
 	if (iph->ihl < 5 || iph->version != 4)
 		goto inhdr_error;
 
 	len = ntohs(iph->tot_len);
 	thoff = iph->ihl * 4;
-	if (skb->len < len) {
+	if (pkt->skb->len < len) {
 		__IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INTRUNCATEDPKTS);
 		return -1;
 	} else if (len < thoff) {
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index 867de29f3f7a..bf132d488b17 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -6,8 +6,7 @@
 #include <net/ipv6.h>
 #include <net/netfilter/nf_tables.h>
 
-static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-					struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt)
 {
 	unsigned int flags = IP6_FH_F_AUTH;
 	int protohdr, thoff = 0;
@@ -15,7 +14,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 
 	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
 	if (protohdr < 0) {
-		nft_set_pktinfo_unspec(pkt, skb);
+		nft_set_pktinfo_unspec(pkt);
 		return;
 	}
 
@@ -25,8 +24,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 	pkt->xt.fragoff = frag_off;
 }
 
-static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
-						  struct sk_buff *skb)
+static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	unsigned int flags = IP6_FH_F_AUTH;
@@ -36,8 +34,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 	int protohdr;
 	u32 pkt_len;
 
-	ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
-				  &_ip6h);
+	ip6h = skb_header_pointer(pkt->skb, skb_network_offset(pkt->skb),
+				  sizeof(*ip6h), &_ip6h);
 	if (!ip6h)
 		return -1;
 
@@ -45,7 +43,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 		return -1;
 
 	pkt_len = ntohs(ip6h->payload_len);
-	if (pkt_len + sizeof(*ip6h) > skb->len)
+	if (pkt_len + sizeof(*ip6h) > pkt->skb->len)
 		return -1;
 
 	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
@@ -63,15 +61,13 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 #endif
 }
 
-static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
-						 struct sk_buff *skb)
+static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
 {
-	if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0)
-		nft_set_pktinfo_unspec(pkt, skb);
+	if (__nft_set_pktinfo_ipv6_validate(pkt) < 0)
+		nft_set_pktinfo_unspec(pkt);
 }
 
-static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt,
-					       struct sk_buff *skb)
+static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	unsigned int flags = IP6_FH_F_AUTH;
@@ -82,15 +78,15 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt,
 	int protohdr;
 	u32 pkt_len;
 
-	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+	if (!pskb_may_pull(pkt->skb, sizeof(*ip6h)))
 		return -1;
 
-	ip6h = ipv6_hdr(skb);
+	ip6h = ipv6_hdr(pkt->skb);
 	if (ip6h->version != 6)
 		goto inhdr_error;
 
 	pkt_len = ntohs(ip6h->payload_len);
-	if (pkt_len + sizeof(*ip6h) > skb->len) {
+	if (pkt_len + sizeof(*ip6h) > pkt->skb->len) {
 		idev = __in6_dev_get(nft_in(pkt));
 		__IP6_INC_STATS(nft_net(pkt), idev, IPSTATS_MIB_INTRUNCATEDPKTS);
 		return -1;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 363bdd7044ec..5b02408a920b 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -18,7 +18,7 @@ static unsigned int nft_do_chain_ipv4(void *priv,
 	struct nft_pktinfo pkt;
 
 	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv4(&pkt, skb);
+	nft_set_pktinfo_ipv4(&pkt);
 
 	return nft_do_chain(&pkt, priv);
 }
@@ -62,7 +62,7 @@ static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb,
 	struct nft_pktinfo pkt;
 
 	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_unspec(&pkt, skb);
+	nft_set_pktinfo_unspec(&pkt);
 
 	return nft_do_chain(&pkt, priv);
 }
@@ -102,7 +102,7 @@ static unsigned int nft_do_chain_ipv6(void *priv,
 	struct nft_pktinfo pkt;
 
 	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv6(&pkt, skb);
+	nft_set_pktinfo_ipv6(&pkt);
 
 	return nft_do_chain(&pkt, priv);
 }
@@ -149,10 +149,10 @@ static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
 
 	switch (state->pf) {
 	case NFPROTO_IPV4:
-		nft_set_pktinfo_ipv4(&pkt, skb);
+		nft_set_pktinfo_ipv4(&pkt);
 		break;
 	case NFPROTO_IPV6:
-		nft_set_pktinfo_ipv6(&pkt, skb);
+		nft_set_pktinfo_ipv6(&pkt);
 		break;
 	default:
 		break;
@@ -174,7 +174,7 @@ static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb,
 		ingress_state.hook = NF_INET_INGRESS;
 		nft_set_pktinfo(&pkt, skb, &ingress_state);
 
-		if (nft_set_pktinfo_ipv4_ingress(&pkt, skb) < 0)
+		if (nft_set_pktinfo_ipv4_ingress(&pkt) < 0)
 			return NF_DROP;
 		break;
 	case htons(ETH_P_IPV6):
@@ -182,7 +182,7 @@ static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb,
 		ingress_state.hook = NF_INET_INGRESS;
 		nft_set_pktinfo(&pkt, skb, &ingress_state);
 
-		if (nft_set_pktinfo_ipv6_ingress(&pkt, skb) < 0)
+		if (nft_set_pktinfo_ipv6_ingress(&pkt) < 0)
 			return NF_DROP;
 		break;
 	default:
@@ -238,13 +238,13 @@ nft_do_chain_bridge(void *priv,
 
 	switch (eth_hdr(skb)->h_proto) {
 	case htons(ETH_P_IP):
-		nft_set_pktinfo_ipv4_validate(&pkt, skb);
+		nft_set_pktinfo_ipv4_validate(&pkt);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_set_pktinfo_ipv6_validate(&pkt, skb);
+		nft_set_pktinfo_ipv6_validate(&pkt);
 		break;
 	default:
-		nft_set_pktinfo_unspec(&pkt, skb);
+		nft_set_pktinfo_unspec(&pkt);
 		break;
 	}
 
@@ -293,13 +293,13 @@ static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb,
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		nft_set_pktinfo_ipv4_validate(&pkt, skb);
+		nft_set_pktinfo_ipv4_validate(&pkt);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_set_pktinfo_ipv6_validate(&pkt, skb);
+		nft_set_pktinfo_ipv6_validate(&pkt);
 		break;
 	default:
-		nft_set_pktinfo_unspec(&pkt, skb);
+		nft_set_pktinfo_unspec(&pkt);
 		break;
 	}
 
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index eac4a901233f..98e4946100c5 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -17,12 +17,12 @@ static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb,
 	switch (state->pf) {
 #ifdef CONFIG_NF_TABLES_IPV4
 	case NFPROTO_IPV4:
-		nft_set_pktinfo_ipv4(&pkt, skb);
+		nft_set_pktinfo_ipv4(&pkt);
 		break;
 #endif
 #ifdef CONFIG_NF_TABLES_IPV6
 	case NFPROTO_IPV6:
-		nft_set_pktinfo_ipv6(&pkt, skb);
+		nft_set_pktinfo_ipv6(&pkt);
 		break;
 #endif
 	default:
diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c
index edd02cda57fc..925db0dce48d 100644
--- a/net/netfilter/nft_chain_route.c
+++ b/net/netfilter/nft_chain_route.c
@@ -26,7 +26,7 @@ static unsigned int nf_route_table_hook4(void *priv,
 	u8 tos;
 
 	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv4(&pkt, skb);
+	nft_set_pktinfo_ipv4(&pkt);
 
 	mark = skb->mark;
 	iph = ip_hdr(skb);
@@ -74,7 +74,7 @@ static unsigned int nf_route_table_hook6(void *priv,
 	int err;
 
 	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv6(&pkt, skb);
+	nft_set_pktinfo_ipv6(&pkt);
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority */
 	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));

From 015dbf5662fd689d581c0bc980711b073ca09a1a Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Fri, 28 May 2021 16:55:55 +0800
Subject: [PATCH 0672/3292] ehea: fix error return code in ehea_restart_qps()

Fix to return -EFAULT from the error handling case instead of 0, as done
elsewhere in this function.

By the way, when get_zeroed_page() fails, directly return -ENOMEM to
simplify code.

Fixes: 2c69448bbced ("ehea: DLPAR memory add fix")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20210528085555.9390-1-thunder.leizhen@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ehea/ehea_main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 8fddce769c14..d5df131b183c 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2618,10 +2618,8 @@ static int ehea_restart_qps(struct net_device *dev)
 	u16 dummy16 = 0;
 
 	cb0 = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!cb0) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!cb0)
+		return -ENOMEM;
 
 	for (i = 0; i < (port->num_def_qps); i++) {
 		struct ehea_port_res *pr =  &port->port_res[i];
@@ -2641,6 +2639,7 @@ static int ehea_restart_qps(struct net_device *dev)
 					    cb0);
 		if (hret != H_SUCCESS) {
 			netdev_err(dev, "query_ehea_qp failed (1)\n");
+			ret = -EFAULT;
 			goto out;
 		}
 
@@ -2653,6 +2652,7 @@ static int ehea_restart_qps(struct net_device *dev)
 					     &dummy64, &dummy16, &dummy16);
 		if (hret != H_SUCCESS) {
 			netdev_err(dev, "modify_ehea_qp failed (1)\n");
+			ret = -EFAULT;
 			goto out;
 		}
 
@@ -2661,6 +2661,7 @@ static int ehea_restart_qps(struct net_device *dev)
 					    cb0);
 		if (hret != H_SUCCESS) {
 			netdev_err(dev, "query_ehea_qp failed (2)\n");
+			ret = -EFAULT;
 			goto out;
 		}
 

From e860fa9b69e1bf077ba4725ee4be7b9443a3682a Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Thu, 20 May 2021 09:37:48 -0500
Subject: [PATCH 0673/3292] iidc: Introduce iidc.h

Introduce a shared header file used by the 'ice' Intel networking driver
providing RDMA support and the 'irdma' driver to provide a private
interface.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 MAINTAINERS                    |   1 +
 include/linux/net/intel/iidc.h | 100 +++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 include/linux/net/intel/iidc.h

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f..34d2bf36b5ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9130,6 +9130,7 @@ F:	Documentation/networking/device_drivers/ethernet/intel/
 F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
 F:	include/linux/avf/virtchnl.h
+F:	include/linux/net/intel/iidc.h
 
 INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
 M:	Maik Broemme <mbroemme@libmpq.org>
diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h
new file mode 100644
index 000000000000..e32f6712aee0
--- /dev/null
+++ b/include/linux/net/intel/iidc.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _IIDC_H_
+#define _IIDC_H_
+
+#include <linux/auxiliary_bus.h>
+#include <linux/dcbnl.h>
+#include <linux/device.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+
+enum iidc_event_type {
+	IIDC_EVENT_BEFORE_MTU_CHANGE,
+	IIDC_EVENT_AFTER_MTU_CHANGE,
+	IIDC_EVENT_BEFORE_TC_CHANGE,
+	IIDC_EVENT_AFTER_TC_CHANGE,
+	IIDC_EVENT_CRIT_ERR,
+	IIDC_EVENT_NBITS		/* must be last */
+};
+
+enum iidc_reset_type {
+	IIDC_PFR,
+	IIDC_CORER,
+	IIDC_GLOBR,
+};
+
+#define IIDC_MAX_USER_PRIORITY		8
+
+/* Struct to hold per RDMA Qset info */
+struct iidc_rdma_qset_params {
+	/* Qset TEID returned to the RDMA driver in
+	 * ice_add_rdma_qset and used by RDMA driver
+	 * for calls to ice_del_rdma_qset
+	 */
+	u32 teid;	/* Qset TEID */
+	u16 qs_handle; /* RDMA driver provides this */
+	u16 vport_id; /* VSI index */
+	u8 tc; /* TC branch the Qset should belong to */
+};
+
+struct iidc_qos_info {
+	u64 tc_ctx;
+	u8 rel_bw;
+	u8 prio_type;
+	u8 egress_virt_up;
+	u8 ingress_virt_up;
+};
+
+/* Struct to pass QoS info */
+struct iidc_qos_params {
+	struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS];
+	u8 up2tc[IIDC_MAX_USER_PRIORITY];
+	u8 vport_relative_bw;
+	u8 vport_priority_type;
+	u8 num_tc;
+};
+
+struct iidc_event {
+	DECLARE_BITMAP(type, IIDC_EVENT_NBITS);
+	u32 reg;
+};
+
+struct ice_pf;
+
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset);
+int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type);
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable);
+void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos);
+
+#define IIDC_RDMA_ROCE_NAME	"roce"
+
+/* Structure representing auxiliary driver tailored information about the core
+ * PCI dev, each auxiliary driver using the IIDC interface will have an
+ * instance of this struct dedicated to it.
+ */
+
+struct iidc_auxiliary_dev {
+	struct auxiliary_device adev;
+	struct ice_pf *pf;
+};
+
+/* structure representing the auxiliary driver. This struct is to be
+ * allocated and populated by the auxiliary driver's owner. The core PCI
+ * driver will access these ops by performing a container_of on the
+ * auxiliary_device->dev.driver.
+ */
+struct iidc_auxiliary_drv {
+	struct auxiliary_driver adrv;
+	/* This event_handler is meant to be a blocking call.  For instance,
+	 * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not
+	 * return until the auxiliary driver is ready for the MTU change to
+	 * happen.
+	 */
+	void (*event_handler)(struct ice_pf *pf, struct iidc_event *event);
+};
+
+#endif /* _IIDC_H_*/

From d25a0fc41c1f927bb914e72a03c1898052557406 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Thu, 20 May 2021 09:37:49 -0500
Subject: [PATCH 0674/3292] ice: Initialize RDMA support

Probe the device's capabilities to see if it supports RDMA. If so, allocate
and reserve resources to support its operation; populate structures with
initial values.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile       |  1 +
 drivers/net/ethernet/intel/ice/ice.h          | 30 ++++++-
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  1 +
 drivers/net/ethernet/intel/ice/ice_common.c   | 17 +++-
 drivers/net/ethernet/intel/ice/ice_idc.c      | 43 ++++++++++
 drivers/net/ethernet/intel/ice/ice_lag.c      |  2 +
 drivers/net/ethernet/intel/ice/ice_lib.c      | 11 +++
 drivers/net/ethernet/intel/ice/ice_lib.h      |  2 +-
 drivers/net/ethernet/intel/ice/ice_main.c     | 84 +++++++++++++++++--
 drivers/net/ethernet/intel/ice/ice_type.h     |  1 +
 10 files changed, 183 insertions(+), 9 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_idc.c

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 07fe857e9e3a..dfb64fb504a2 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -22,6 +22,7 @@ ice-y := ice_main.o	\
 	 ice_ethtool_fdir.o \
 	 ice_flex_pipe.o \
 	 ice_flow.o	\
+	 ice_idc.o	\
 	 ice_devlink.o	\
 	 ice_fw_update.o \
 	 ice_lag.o	\
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index e35db3ff583b..64e3633493c1 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -78,6 +78,8 @@
 #define ICE_MIN_LAN_OICR_MSIX	1
 #define ICE_MIN_MSIX		(ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
 #define ICE_FDIR_MSIX		2
+#define ICE_RDMA_NUM_AEQ_MSIX	4
+#define ICE_MIN_RDMA_MSIX	2
 #define ICE_NO_VSI		0xffff
 #define ICE_VSI_MAP_CONTIG	0
 #define ICE_VSI_MAP_SCATTER	1
@@ -88,8 +90,9 @@
 #define ICE_MAX_LG_RSS_QS	256
 #define ICE_RES_VALID_BIT	0x8000
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
+#define ICE_RES_RDMA_VEC_ID	(ICE_RES_MISC_VEC_ID - 1)
 /* All VF control VSIs share the same IRQ, so assign a unique ID for them */
-#define ICE_RES_VF_CTRL_VEC_ID	(ICE_RES_MISC_VEC_ID - 1)
+#define ICE_RES_VF_CTRL_VEC_ID	(ICE_RES_RDMA_VEC_ID - 1)
 #define ICE_INVAL_Q_INDEX	0xffff
 #define ICE_INVAL_VFID		256
 
@@ -373,12 +376,14 @@ struct ice_q_vector {
 
 enum ice_pf_flags {
 	ICE_FLAG_FLTR_SYNC,
+	ICE_FLAG_RDMA_ENA,
 	ICE_FLAG_RSS_ENA,
 	ICE_FLAG_SRIOV_ENA,
 	ICE_FLAG_SRIOV_CAPABLE,
 	ICE_FLAG_DCB_CAPABLE,
 	ICE_FLAG_DCB_ENA,
 	ICE_FLAG_FD_ENA,
+	ICE_FLAG_AUX_ENA,
 	ICE_FLAG_ADV_FEATURES,
 	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
 	ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
@@ -439,6 +444,8 @@ struct ice_pf {
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
 	struct mutex tc_mutex;		/* lock to protect TC changes */
 	u32 msg_enable;
+	u16 num_rdma_msix;		/* Total MSIX vectors for RDMA driver */
+	u16 rdma_base_vector;
 
 	/* spinlock to protect the AdminQ wait list */
 	spinlock_t aq_wait_lock;
@@ -471,6 +478,8 @@ struct ice_pf {
 	unsigned long tx_timeout_last_recovery;
 	u32 tx_timeout_recovery_level;
 	char int_name[ICE_INT_NAME_STR_LEN];
+	struct auxiliary_device *adev;
+	int aux_idx;
 	u32 sw_int_count;
 
 	__le64 nvm_phy_type_lo; /* NVM PHY type low */
@@ -636,6 +645,7 @@ int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+int ice_init_rdma(struct ice_pf *pf);
 const char *ice_stat_str(enum ice_status stat_err);
 const char *ice_aq_str(enum ice_aq_err aq_err);
 bool ice_is_wol_supported(struct ice_hw *hw);
@@ -660,4 +670,22 @@ int ice_open_internal(struct net_device *netdev);
 int ice_stop(struct net_device *netdev);
 void ice_service_task_schedule(struct ice_pf *pf);
 
+/**
+ * ice_set_rdma_cap - enable RDMA support
+ * @pf: PF struct
+ */
+static inline void ice_set_rdma_cap(struct ice_pf *pf)
+{
+	if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix)
+		set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+}
+
+/**
+ * ice_clear_rdma_cap - disable RDMA support
+ * @pf: PF struct
+ */
+static inline void ice_clear_rdma_cap(struct ice_pf *pf)
+{
+	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+}
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 5cdfe406af84..cba6933a7a0e 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -115,6 +115,7 @@ struct ice_aqc_list_caps_elem {
 #define ICE_AQC_CAPS_PENDING_OROM_VER			0x004B
 #define ICE_AQC_CAPS_NET_VER				0x004C
 #define ICE_AQC_CAPS_PENDING_NET_VER			0x004D
+#define ICE_AQC_CAPS_RDMA				0x0051
 #define ICE_AQC_CAPS_NVM_MGMT				0x0080
 
 	u8 major_ver;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index e93b1e40f627..6d649e5d1a19 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1062,7 +1062,8 @@ enum ice_status ice_check_reset(struct ice_hw *hw)
 				 GLNVM_ULD_POR_DONE_1_M |\
 				 GLNVM_ULD_PCIER_DONE_2_M)
 
-	uld_mask = ICE_RESET_DONE_MASK;
+	uld_mask = ICE_RESET_DONE_MASK | (hw->func_caps.common_cap.rdma ?
+					  GLNVM_ULD_PE_DONE_M : 0);
 
 	/* Device is Active; check Global Reset processes are done */
 	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
@@ -1938,6 +1939,10 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
 		ice_debug(hw, ICE_DBG_INIT, "%s: nvm_unified_update = %d\n", prefix,
 			  caps->nvm_unified_update);
 		break;
+	case ICE_AQC_CAPS_RDMA:
+		caps->rdma = (number == 1);
+		ice_debug(hw, ICE_DBG_INIT, "%s: rdma = %d\n", prefix, caps->rdma);
+		break;
 	case ICE_AQC_CAPS_MAX_MTU:
 		caps->max_mtu = number;
 		ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
@@ -1971,6 +1976,16 @@ ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps)
 		caps->maxtc = 4;
 		ice_debug(hw, ICE_DBG_INIT, "reducing maxtc to %d (based on #ports)\n",
 			  caps->maxtc);
+		if (caps->rdma) {
+			ice_debug(hw, ICE_DBG_INIT, "forcing RDMA off\n");
+			caps->rdma = 0;
+		}
+
+		/* print message only when processing device capabilities
+		 * during initialization.
+		 */
+		if (caps == &hw->dev_caps.common_cap)
+			dev_info(ice_hw_to_dev(hw), "RDMA functionality is not available with the current device configuration.\n");
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
new file mode 100644
index 000000000000..c419c9cb316d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+/* Inter-Driver Communication */
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * ice_reserve_rdma_qvector - Reserve vector resources for RDMA driver
+ * @pf: board private structure to initialize
+ */
+static int ice_reserve_rdma_qvector(struct ice_pf *pf)
+{
+	if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) {
+		int index;
+
+		index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix,
+				    ICE_RES_RDMA_VEC_ID);
+		if (index < 0)
+			return index;
+		pf->num_avail_sw_msix -= pf->num_rdma_msix;
+		pf->rdma_base_vector = (u16)index;
+	}
+	return 0;
+}
+
+/**
+ * ice_init_rdma - initializes PF for RDMA use
+ * @pf: ptr to ice_pf
+ */
+int ice_init_rdma(struct ice_pf *pf)
+{
+	struct device *dev = &pf->pdev->dev;
+	int ret;
+
+	/* Reserve vector resources */
+	ret = ice_reserve_rdma_qvector(pf);
+	if (ret < 0)
+		dev_err(dev, "failed to reserve vectors for RDMA\n");
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index 4599fc3b4ed8..37c18c66b5c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -172,6 +172,7 @@ ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info)
 	}
 
 	ice_clear_sriov_cap(pf);
+	ice_clear_rdma_cap(pf);
 
 	lag->bonded = true;
 	lag->role = ICE_LAG_UNSET;
@@ -222,6 +223,7 @@ ice_lag_unlink(struct ice_lag *lag,
 	}
 
 	ice_set_sriov_cap(pf);
+	ice_set_rdma_cap(pf);
 	lag->bonded = false;
 	lag->role = ICE_LAG_NONE;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 82e2ce23df3d..56e1ae558761 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -616,6 +616,17 @@ bool ice_is_safe_mode(struct ice_pf *pf)
 	return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
 }
 
+/**
+ * ice_is_aux_ena
+ * @pf: pointer to the PF struct
+ *
+ * returns true if AUX devices/drivers are supported, false otherwise
+ */
+bool ice_is_aux_ena(struct ice_pf *pf)
+{
+	return test_bit(ICE_FLAG_AUX_ENA, pf->flags);
+}
+
 /**
  * ice_vsi_clean_rss_flow_fld - Delete RSS configuration
  * @vsi: the VSI being cleaned up
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 511c2316c40c..5ec857f71459 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -102,7 +102,7 @@ enum ice_status
 ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
 
 bool ice_is_safe_mode(struct ice_pf *pf);
-
+bool ice_is_aux_ena(struct ice_pf *pf);
 bool ice_is_dflt_vsi_in_use(struct ice_sw *sw);
 
 bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4ee85a217c6f..e307317e819a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -35,6 +35,8 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX
 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
 #endif /* !CONFIG_DYNAMIC_DEBUG */
 
+static DEFINE_IDA(ice_aux_ida);
+
 static struct workqueue_struct *ice_wq;
 static const struct net_device_ops ice_netdev_safe_mode_ops;
 static const struct net_device_ops ice_netdev_ops;
@@ -3276,6 +3278,12 @@ static void ice_set_pf_caps(struct ice_pf *pf)
 {
 	struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
 
+	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+	clear_bit(ICE_FLAG_AUX_ENA, pf->flags);
+	if (func_caps->common_cap.rdma) {
+		set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+		set_bit(ICE_FLAG_AUX_ENA, pf->flags);
+	}
 	clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
 	if (func_caps->common_cap.dcb)
 		set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
@@ -3355,11 +3363,12 @@ static int ice_init_pf(struct ice_pf *pf)
  */
 static int ice_ena_msix_range(struct ice_pf *pf)
 {
-	int v_left, v_actual, v_other, v_budget = 0;
+	int num_cpus, v_left, v_actual, v_other, v_budget = 0;
 	struct device *dev = ice_pf_to_dev(pf);
 	int needed, err, i;
 
 	v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
+	num_cpus = num_online_cpus();
 
 	/* reserve for LAN miscellaneous handler */
 	needed = ICE_MIN_LAN_OICR_MSIX;
@@ -3381,13 +3390,23 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 	v_other = v_budget;
 
 	/* reserve vectors for LAN traffic */
-	needed = min_t(int, num_online_cpus(), v_left);
+	needed = num_cpus;
 	if (v_left < needed)
 		goto no_hw_vecs_left_err;
 	pf->num_lan_msix = needed;
 	v_budget += needed;
 	v_left -= needed;
 
+	/* reserve vectors for RDMA auxiliary driver */
+	if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) {
+		needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
+		if (v_left < needed)
+			goto no_hw_vecs_left_err;
+		pf->num_rdma_msix = needed;
+		v_budget += needed;
+		v_left -= needed;
+	}
+
 	pf->msix_entries = devm_kcalloc(dev, v_budget,
 					sizeof(*pf->msix_entries), GFP_KERNEL);
 	if (!pf->msix_entries) {
@@ -3417,16 +3436,46 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 			err = -ERANGE;
 			goto msix_err;
 		} else {
-			int v_traffic = v_actual - v_other;
+			int v_remain = v_actual - v_other;
+			int v_rdma = 0, v_min_rdma = 0;
+
+			if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) {
+				/* Need at least 1 interrupt in addition to
+				 * AEQ MSIX
+				 */
+				v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
+				v_min_rdma = ICE_MIN_RDMA_MSIX;
+			}
 
 			if (v_actual == ICE_MIN_MSIX ||
-			    v_traffic < ICE_MIN_LAN_TXRX_MSIX)
+			    v_remain < ICE_MIN_LAN_TXRX_MSIX + v_min_rdma) {
+				dev_warn(dev, "Not enough MSI-X vectors to support RDMA.\n");
+				clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+
+				pf->num_rdma_msix = 0;
 				pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
-			else
-				pf->num_lan_msix = v_traffic;
+			} else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
+				   (v_remain - v_rdma < v_rdma)) {
+				/* Support minimum RDMA and give remaining
+				 * vectors to LAN MSIX
+				 */
+				pf->num_rdma_msix = v_min_rdma;
+				pf->num_lan_msix = v_remain - v_min_rdma;
+			} else {
+				/* Split remaining MSIX with RDMA after
+				 * accounting for AEQ MSIX
+				 */
+				pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
+						    ICE_RDMA_NUM_AEQ_MSIX;
+				pf->num_lan_msix = v_remain - pf->num_rdma_msix;
+			}
 
 			dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
 				   pf->num_lan_msix);
+
+			if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
+				dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
+					   pf->num_rdma_msix);
 		}
 	}
 
@@ -3441,6 +3490,7 @@ no_hw_vecs_left_err:
 		needed, v_left);
 	err = -ERANGE;
 exit_err:
+	pf->num_rdma_msix = 0;
 	pf->num_lan_msix = 0;
 	return err;
 }
@@ -4268,8 +4318,29 @@ probe_done:
 
 	/* ready to go, so clear down state bit */
 	clear_bit(ICE_DOWN, pf->state);
+	if (ice_is_aux_ena(pf)) {
+		pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL);
+		if (pf->aux_idx < 0) {
+			dev_err(dev, "Failed to allocate device ID for AUX driver\n");
+			err = -ENOMEM;
+			goto err_netdev_reg;
+		}
+
+		err = ice_init_rdma(pf);
+		if (err) {
+			dev_err(dev, "Failed to initialize RDMA: %d\n", err);
+			err = -EIO;
+			goto err_init_aux_unroll;
+		}
+	} else {
+		dev_warn(dev, "RDMA is not supported on this device\n");
+	}
+
 	return 0;
 
+err_init_aux_unroll:
+	pf->adev = NULL;
+	ida_free(&ice_aux_ida, pf->aux_idx);
 err_netdev_reg:
 err_send_version_unroll:
 	ice_vsi_release_all(pf);
@@ -4383,6 +4454,7 @@ static void ice_remove(struct pci_dev *pdev)
 	ice_service_task_stop(pf);
 
 	ice_aq_cancel_waiting_tasks(pf);
+	ida_free(&ice_aux_ida, pf->aux_idx);
 
 	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_deinit_lag(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 4474dd6a7ba1..b86ae7910a02 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -262,6 +262,7 @@ struct ice_hw_common_caps {
 	u8 rss_table_entry_width;	/* RSS Entry width in bits */
 
 	u8 dcb;
+	u8 rdma;
 
 	bool nvm_update_pending_nvm;
 	bool nvm_update_pending_orom;

From 348048e724a0e8f08b63948d728d27596f6d3769 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Thu, 20 May 2021 09:37:50 -0500
Subject: [PATCH 0675/3292] ice: Implement iidc operations

Add implementations for supporting iidc operations for device operation
such as allocation of resources and event notifications.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |   8 +-
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  32 +++
 drivers/net/ethernet/intel/ice/ice_common.c   | 203 ++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |   9 +
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c  |  19 ++
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |   3 +-
 drivers/net/ethernet/intel/ice/ice_idc.c      | 224 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_idc_int.h  |  14 ++
 drivers/net/ethernet/intel/ice/ice_main.c     |  47 ++--
 drivers/net/ethernet/intel/ice/ice_sched.c    |  69 +++++-
 drivers/net/ethernet/intel/ice/ice_switch.c   |  28 +++
 drivers/net/ethernet/intel/ice/ice_switch.h   |   4 +
 drivers/net/ethernet/intel/ice/ice_type.h     |   3 +
 13 files changed, 639 insertions(+), 24 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_idc_int.h

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 64e3633493c1..225f8a55eb3f 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -55,6 +55,7 @@
 #include "ice_switch.h"
 #include "ice_common.h"
 #include "ice_sched.h"
+#include "ice_idc_int.h"
 #include "ice_virtchnl_pf.h"
 #include "ice_sriov.h"
 #include "ice_fdir.h"
@@ -206,9 +207,9 @@ enum ice_pf_state {
 	ICE_NEEDS_RESTART,
 	ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
 	ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
-	ICE_PFR_REQ,			/* set by driver and peers */
-	ICE_CORER_REQ,		/* set by driver and peers */
-	ICE_GLOBR_REQ,		/* set by driver and peers */
+	ICE_PFR_REQ,		/* set by driver */
+	ICE_CORER_REQ,		/* set by driver */
+	ICE_GLOBR_REQ,		/* set by driver */
 	ICE_CORER_RECV,		/* set by OICR handler */
 	ICE_GLOBR_RECV,		/* set by OICR handler */
 	ICE_EMPR_RECV,		/* set by OICR handler */
@@ -335,6 +336,7 @@ struct ice_vsi {
 	u16 req_rxq;			 /* User requested Rx queues */
 	u16 num_rx_desc;
 	u16 num_tx_desc;
+	u16 qset_handle[ICE_MAX_TRAFFIC_CLASS];
 	struct ice_tc_cfg tc_cfg;
 	struct bpf_prog *xdp_prog;
 	struct ice_ring **xdp_rings;	 /* XDP ring array */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index cba6933a7a0e..ff11a618bef7 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1685,6 +1685,36 @@ struct ice_aqc_dis_txq_item {
 	__le16 q_id[];
 } __packed;
 
+/* Add Tx RDMA Queue Set (indirect 0x0C33) */
+struct ice_aqc_add_rdma_qset {
+	u8 num_qset_grps;
+	u8 reserved[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the descriptor of each Qset entry for the Add Tx RDMA Queue Set
+ * command (0x0C33). Only used within struct ice_aqc_add_rdma_qset.
+ */
+struct ice_aqc_add_tx_rdma_qset_entry {
+	__le16 tx_qset_id;
+	u8 rsvd[2];
+	__le32 qset_teid;
+	struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add Tx RDMA Queue Set(0x0C33)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_rdma_qset is variable due to the variable
+ * number of queues in each group!
+ */
+struct ice_aqc_add_rdma_qset_data {
+	__le32 parent_teid;
+	__le16 num_qsets;
+	u8 rsvd[2];
+	struct ice_aqc_add_tx_rdma_qset_entry rdma_qsets[];
+};
+
 /* Configure Firmware Logging Command (indirect 0xFF09)
  * Logging Information Read Response (indirect 0xFF10)
  * Note: The 0xFF10 command has no input parameters.
@@ -1881,6 +1911,7 @@ struct ice_aq_desc {
 		struct ice_aqc_get_set_rss_key get_set_rss_key;
 		struct ice_aqc_add_txqs add_txqs;
 		struct ice_aqc_dis_txqs dis_txqs;
+		struct ice_aqc_add_rdma_qset add_rdma_qset;
 		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
 		struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
 		struct ice_aqc_fw_logging fw_logging;
@@ -2029,6 +2060,7 @@ enum ice_adminq_opc {
 	/* Tx queue handling commands/events */
 	ice_aqc_opc_add_txqs				= 0x0C30,
 	ice_aqc_opc_dis_txqs				= 0x0C31,
+	ice_aqc_opc_add_rdma_qset			= 0x0C33,
 
 	/* package commands */
 	ice_aqc_opc_download_pkg			= 0x0C40,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 6d649e5d1a19..b8cc737ea261 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice_common.h"
+#include "ice_lib.h"
 #include "ice_sched.h"
 #include "ice_adminq_cmd.h"
 #include "ice_flow.h"
@@ -3650,6 +3651,52 @@ do_aq:
 	return status;
 }
 
+/**
+ * ice_aq_add_rdma_qsets
+ * @hw: pointer to the hardware structure
+ * @num_qset_grps: Number of RDMA Qset groups
+ * @qset_list: list of Qset groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx RDMA Qsets (0x0C33)
+ */
+static int
+ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
+		      struct ice_aqc_add_rdma_qset_data *qset_list,
+		      u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_rdma_qset_data *list;
+	struct ice_aqc_add_rdma_qset *cmd;
+	struct ice_aq_desc desc;
+	u16 i, sum_size = 0;
+
+	cmd = &desc.params.add_rdma_qset;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_rdma_qset);
+
+	if (num_qset_grps > ICE_LAN_TXQ_MAX_QGRPS)
+		return -EINVAL;
+
+	for (i = 0, list = qset_list; i < num_qset_grps; i++) {
+		u16 num_qsets = le16_to_cpu(list->num_qsets);
+
+		sum_size += struct_size(list, rdma_qsets, num_qsets);
+		list = (struct ice_aqc_add_rdma_qset_data *)(list->rdma_qsets +
+							     num_qsets);
+	}
+
+	if (buf_size != sum_size)
+		return -EINVAL;
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_qset_grps = num_qset_grps;
+
+	return ice_status_to_errno(ice_aq_send_cmd(hw, &desc, qset_list,
+						   buf_size, cd));
+}
+
 /* End of FW Admin Queue command wrappers */
 
 /**
@@ -4147,6 +4194,162 @@ ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
 			      ICE_SCHED_NODE_OWNER_LAN);
 }
 
+/**
+ * ice_cfg_vsi_rdma - configure the VSI RDMA queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @max_rdmaqs: max RDMA queues array per TC
+ *
+ * This function adds/updates the VSI RDMA queues per TC.
+ */
+int
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+		 u16 *max_rdmaqs)
+{
+	return ice_status_to_errno(ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap,
+						  max_rdmaqs,
+						  ICE_SCHED_NODE_OWNER_RDMA));
+}
+
+/**
+ * ice_ena_vsi_rdma_qset
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @rdma_qset: pointer to RDMA Qset
+ * @num_qsets: number of RDMA Qsets
+ * @qset_teid: pointer to Qset node TEIDs
+ *
+ * This function adds RDMA Qset
+ */
+int
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 *rdma_qset, u16 num_qsets, u32 *qset_teid)
+{
+	struct ice_aqc_txsched_elem_data node = { 0 };
+	struct ice_aqc_add_rdma_qset_data *buf;
+	struct ice_sched_node *parent;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u16 i, buf_size;
+	int ret;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return -EIO;
+	hw = pi->hw;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	buf_size = struct_size(buf, rdma_qsets, num_qsets);
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	mutex_lock(&pi->sched_lock);
+
+	parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
+					    ICE_SCHED_NODE_OWNER_RDMA);
+	if (!parent) {
+		ret = -EINVAL;
+		goto rdma_error_exit;
+	}
+	buf->parent_teid = parent->info.node_teid;
+	node.parent_teid = parent->info.node_teid;
+
+	buf->num_qsets = cpu_to_le16(num_qsets);
+	for (i = 0; i < num_qsets; i++) {
+		buf->rdma_qsets[i].tx_qset_id = cpu_to_le16(rdma_qset[i]);
+		buf->rdma_qsets[i].info.valid_sections =
+			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+			ICE_AQC_ELEM_VALID_EIR;
+		buf->rdma_qsets[i].info.generic = 0;
+		buf->rdma_qsets[i].info.cir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->rdma_qsets[i].info.cir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+		buf->rdma_qsets[i].info.eir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->rdma_qsets[i].info.eir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+	}
+	ret = ice_aq_add_rdma_qsets(hw, 1, buf, buf_size, NULL);
+	if (ret) {
+		ice_debug(hw, ICE_DBG_RDMA, "add RDMA qset failed\n");
+		goto rdma_error_exit;
+	}
+	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+	for (i = 0; i < num_qsets; i++) {
+		node.node_teid = buf->rdma_qsets[i].qset_teid;
+		status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1,
+					    &node);
+		if (status) {
+			ret = ice_status_to_errno(status);
+			break;
+		}
+		qset_teid[i] = le32_to_cpu(node.node_teid);
+	}
+rdma_error_exit:
+	mutex_unlock(&pi->sched_lock);
+	kfree(buf);
+	return ret;
+}
+
+/**
+ * ice_dis_vsi_rdma_qset - free RDMA resources
+ * @pi: port_info struct
+ * @count: number of RDMA Qsets to free
+ * @qset_teid: TEID of Qset node
+ * @q_id: list of queue IDs being disabled
+ */
+int
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+		      u16 *q_id)
+{
+	struct ice_aqc_dis_txq_item *qg_list;
+	enum ice_status status = 0;
+	struct ice_hw *hw;
+	u16 qg_size;
+	int i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return -EIO;
+
+	hw = pi->hw;
+
+	qg_size = struct_size(qg_list, q_id, 1);
+	qg_list = kzalloc(qg_size, GFP_KERNEL);
+	if (!qg_list)
+		return -ENOMEM;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < count; i++) {
+		struct ice_sched_node *node;
+
+		node = ice_sched_find_node_by_teid(pi->root, qset_teid[i]);
+		if (!node)
+			continue;
+
+		qg_list->parent_teid = node->info.parent_teid;
+		qg_list->num_qs = 1;
+		qg_list->q_id[0] =
+			cpu_to_le16(q_id[i] |
+				    ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET);
+
+		status = ice_aq_dis_lan_txq(hw, 1, qg_list, qg_size,
+					    ICE_NO_RESET, 0, NULL);
+		if (status)
+			break;
+
+		ice_free_sched_node(pi, node);
+	}
+
+	mutex_unlock(&pi->sched_lock);
+	kfree(qg_list);
+	return ice_status_to_errno(status);
+}
+
 /**
  * ice_replay_pre_init - replay pre initialization
  * @hw: pointer to the HW struct
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 7a9d2dfb21a2..0fdda597fbc8 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -147,6 +147,15 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
 		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
 		  bool write, struct ice_sq_cd *cd);
 
+int
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+		 u16 *max_rdmaqs);
+int
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 *rdma_qset, u16 num_qsets, u32 *qset_teid);
+int
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+		      u16 *q_id);
 enum ice_status
 ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
 		u16 *q_handle, u16 *q_ids, u32 *q_teids,
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index df02cffdf209..857dc62da7a8 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -275,6 +275,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
 	struct device *dev = ice_pf_to_dev(pf);
 	int ret = ICE_DCB_NO_HW_CHG;
+	struct iidc_event *event;
 	struct ice_vsi *pf_vsi;
 
 	curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
@@ -313,6 +314,15 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 		goto free_cfg;
 	}
 
+	/* Notify AUX drivers about impending change to TCs */
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event)
+		return -ENOMEM;
+
+	set_bit(IIDC_EVENT_BEFORE_TC_CHANGE, event->type);
+	ice_send_event_to_aux(pf, event);
+	kfree(event);
+
 	/* avoid race conditions by holding the lock while disabling and
 	 * re-enabling the VSI
 	 */
@@ -640,6 +650,7 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
 void ice_pf_dcb_recfg(struct ice_pf *pf)
 {
 	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	struct iidc_event *event;
 	u8 tc_map = 0;
 	int v, ret;
 
@@ -675,6 +686,14 @@ void ice_pf_dcb_recfg(struct ice_pf *pf)
 		if (vsi->type == ICE_VSI_PF)
 			ice_dcbnl_set_all(vsi);
 	}
+	/* Notify the AUX drivers that TC change is finished */
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event)
+		return;
+
+	set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
+	ice_send_event_to_aux(pf, event);
+	kfree(event);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index de38a0fc9665..65b18b3e2bcc 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -110,8 +110,6 @@
 #define VPGEN_VFRSTAT_VFRD_M			BIT(0)
 #define VPGEN_VFRTRIG(_VF)			(0x00090000 + ((_VF) * 4))
 #define VPGEN_VFRTRIG_VFSWR_M			BIT(0)
-#define PFHMC_ERRORDATA				0x00520500
-#define PFHMC_ERRORINFO				0x00520400
 #define GLINT_CTL				0x0016CC54
 #define GLINT_CTL_DIS_AUTOMASK_M		BIT(0)
 #define GLINT_CTL_ITR_GRAN_200_S		16
@@ -160,6 +158,7 @@
 #define PFINT_OICR_GRST_M			BIT(20)
 #define PFINT_OICR_PCI_EXCEPTION_M		BIT(21)
 #define PFINT_OICR_HMC_ERR_M			BIT(26)
+#define PFINT_OICR_PE_PUSH_M			BIT(27)
 #define PFINT_OICR_PE_CRITERR_M			BIT(28)
 #define PFINT_OICR_VFLR_M			BIT(29)
 #define PFINT_OICR_SWINT_M			BIT(31)
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index c419c9cb316d..adc9015ed0ed 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -6,6 +6,230 @@
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
+/**
+ * ice_get_auxiliary_drv - retrieve iidc_auxiliary_drv struct
+ * @pf: pointer to PF struct
+ *
+ * This function has to be called with a device_lock on the
+ * pf->adev.dev to avoid race conditions.
+ */
+static struct iidc_auxiliary_drv *ice_get_auxiliary_drv(struct ice_pf *pf)
+{
+	struct auxiliary_device *adev;
+
+	adev = pf->adev;
+	if (!adev || !adev->dev.driver)
+		return NULL;
+
+	return container_of(adev->dev.driver, struct iidc_auxiliary_drv,
+			    adrv.driver);
+}
+
+/**
+ * ice_send_event_to_aux - send event to RDMA AUX driver
+ * @pf: pointer to PF struct
+ * @event: event struct
+ */
+void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
+{
+	struct iidc_auxiliary_drv *iadrv;
+
+	if (!pf->adev)
+		return;
+
+	device_lock(&pf->adev->dev);
+	iadrv = ice_get_auxiliary_drv(pf);
+	if (iadrv && iadrv->event_handler)
+		iadrv->event_handler(pf, event);
+	device_unlock(&pf->adev->dev);
+}
+
+/**
+ * ice_find_vsi - Find the VSI from VSI ID
+ * @pf: The PF pointer to search in
+ * @vsi_num: The VSI ID to search for
+ */
+static struct ice_vsi *ice_find_vsi(struct ice_pf *pf, u16 vsi_num)
+{
+	int i;
+
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && pf->vsi[i]->vsi_num == vsi_num)
+			return  pf->vsi[i];
+	return NULL;
+}
+
+/**
+ * ice_add_rdma_qset - Add Leaf Node for RDMA Qset
+ * @pf: PF struct
+ * @qset: Resource to be allocated
+ */
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset)
+{
+	u16 max_rdmaqs[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_vsi *vsi;
+	struct device *dev;
+	u32 qset_teid;
+	u16 qs_handle;
+	int status;
+	int i;
+
+	if (WARN_ON(!pf || !qset))
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
+		return -EINVAL;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi) {
+		dev_err(dev, "RDMA QSet invalid VSI\n");
+		return -EINVAL;
+	}
+
+	ice_for_each_traffic_class(i)
+		max_rdmaqs[i] = 0;
+
+	max_rdmaqs[qset->tc]++;
+	qs_handle = qset->qs_handle;
+
+	status = ice_cfg_vsi_rdma(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				  max_rdmaqs);
+	if (status) {
+		dev_err(dev, "Failed VSI RDMA Qset config\n");
+		return status;
+	}
+
+	status = ice_ena_vsi_rdma_qset(vsi->port_info, vsi->idx, qset->tc,
+				       &qs_handle, 1, &qset_teid);
+	if (status) {
+		dev_err(dev, "Failed VSI RDMA Qset enable\n");
+		return status;
+	}
+	vsi->qset_handle[qset->tc] = qset->qs_handle;
+	qset->teid = qset_teid;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ice_add_rdma_qset);
+
+/**
+ * ice_del_rdma_qset - Delete leaf node for RDMA Qset
+ * @pf: PF struct
+ * @qset: Resource to be freed
+ */
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset)
+{
+	struct ice_vsi *vsi;
+	u32 teid;
+	u16 q_id;
+
+	if (WARN_ON(!pf || !qset))
+		return -EINVAL;
+
+	vsi = ice_find_vsi(pf, qset->vport_id);
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "RDMA Invalid VSI\n");
+		return -EINVAL;
+	}
+
+	q_id = qset->qs_handle;
+	teid = qset->teid;
+
+	vsi->qset_handle[qset->tc] = 0;
+
+	return ice_dis_vsi_rdma_qset(vsi->port_info, 1, &teid, &q_id);
+}
+EXPORT_SYMBOL_GPL(ice_del_rdma_qset);
+
+/**
+ * ice_rdma_request_reset - accept request from RDMA to perform a reset
+ * @pf: struct for PF
+ * @reset_type: type of reset
+ */
+int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type)
+{
+	enum ice_reset_req reset;
+
+	if (WARN_ON(!pf))
+		return -EINVAL;
+
+	switch (reset_type) {
+	case IIDC_PFR:
+		reset = ICE_RESET_PFR;
+		break;
+	case IIDC_CORER:
+		reset = ICE_RESET_CORER;
+		break;
+	case IIDC_GLOBR:
+		reset = ICE_RESET_GLOBR;
+		break;
+	default:
+		dev_err(ice_pf_to_dev(pf), "incorrect reset request\n");
+		return -EINVAL;
+	}
+
+	return ice_schedule_reset(pf, reset);
+}
+EXPORT_SYMBOL_GPL(ice_rdma_request_reset);
+
+/**
+ * ice_rdma_update_vsi_filter - update main VSI filters for RDMA
+ * @pf: pointer to struct for PF
+ * @vsi_id: VSI HW idx to update filter on
+ * @enable: bool whether to enable or disable filters
+ */
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable)
+{
+	struct ice_vsi *vsi;
+	int status;
+
+	if (WARN_ON(!pf))
+		return -EINVAL;
+
+	vsi = ice_find_vsi(pf, vsi_id);
+	if (!vsi)
+		return -EINVAL;
+
+	status = ice_cfg_rdma_fltr(&pf->hw, vsi->idx, enable);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Failed to  %sable RDMA filtering\n",
+			enable ? "en" : "dis");
+	} else {
+		if (enable)
+			vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+		else
+			vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+	}
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(ice_rdma_update_vsi_filter);
+
+/**
+ * ice_get_qos_params - parse QoS params for RDMA consumption
+ * @pf: pointer to PF struct
+ * @qos: set of QoS values
+ */
+void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos)
+{
+	struct ice_dcbx_cfg *dcbx_cfg;
+	unsigned int i;
+	u32 up2tc;
+
+	dcbx_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
+
+	qos->num_tc = ice_dcb_get_num_tc(dcbx_cfg);
+	for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
+		qos->up2tc[i] = (up2tc >> (i * 3)) & 0x7;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i];
+}
+EXPORT_SYMBOL_GPL(ice_get_qos_params);
+
 /**
  * ice_reserve_rdma_qvector - Reserve vector resources for RDMA driver
  * @pf: board private structure to initialize
diff --git a/drivers/net/ethernet/intel/ice/ice_idc_int.h b/drivers/net/ethernet/intel/ice/ice_idc_int.h
new file mode 100644
index 000000000000..b7796b8aecbd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_idc_int.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _ICE_IDC_INT_H_
+#define _ICE_IDC_INT_H_
+
+#include <linux/net/intel/iidc.h>
+#include "ice.h"
+
+struct ice_pf;
+
+void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event);
+
+#endif /* !_ICE_IDC_INT_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index e307317e819a..9d4570b862aa 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2610,6 +2610,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf)
 	       PFINT_OICR_PCI_EXCEPTION_M |
 	       PFINT_OICR_VFLR_M |
 	       PFINT_OICR_HMC_ERR_M |
+	       PFINT_OICR_PE_PUSH_M |
 	       PFINT_OICR_PE_CRITERR_M);
 
 	wr32(hw, PFINT_OICR_ENA, val);
@@ -2680,8 +2681,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 
 		/* If a reset cycle isn't already in progress, we set a bit in
 		 * pf->state so that the service task can start a reset/rebuild.
-		 * We also make note of which reset happened so that peer
-		 * devices/drivers can be informed.
 		 */
 		if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
 			if (reset == ICE_RESET_CORER)
@@ -2708,11 +2707,19 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		}
 	}
 
-	if (oicr & PFINT_OICR_HMC_ERR_M) {
-		ena_mask &= ~PFINT_OICR_HMC_ERR_M;
-		dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n",
-			rd32(hw, PFHMC_ERRORINFO),
-			rd32(hw, PFHMC_ERRORDATA));
+#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
+	if (oicr & ICE_AUX_CRIT_ERR) {
+		struct iidc_event *event;
+
+		ena_mask &= ~ICE_AUX_CRIT_ERR;
+		event = kzalloc(sizeof(*event), GFP_KERNEL);
+		if (event) {
+			set_bit(IIDC_EVENT_CRIT_ERR, event->type);
+			/* report the entire OICR value to AUX driver */
+			event->reg = oicr;
+			ice_send_event_to_aux(pf, event);
+			kfree(event);
+		}
 	}
 
 	/* Report any remaining unexpected interrupts */
@@ -2722,8 +2729,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		/* If a critical error is pending there is no choice but to
 		 * reset the device.
 		 */
-		if (oicr & (PFINT_OICR_PE_CRITERR_M |
-			    PFINT_OICR_PCI_EXCEPTION_M |
+		if (oicr & (PFINT_OICR_PCI_EXCEPTION_M |
 			    PFINT_OICR_ECC_ERR_M)) {
 			set_bit(ICE_PFR_REQ, pf->state);
 			ice_service_task_schedule(pf);
@@ -6318,7 +6324,9 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
+	struct iidc_event *event;
 	u8 count = 0;
+	int err = 0;
 
 	if (new_mtu == (int)netdev->mtu) {
 		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
@@ -6351,27 +6359,38 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		return -EBUSY;
 	}
 
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event)
+		return -ENOMEM;
+
+	set_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
+	ice_send_event_to_aux(pf, event);
+	clear_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
+
 	netdev->mtu = (unsigned int)new_mtu;
 
 	/* if VSI is up, bring it down and then back up */
 	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
-		int err;
-
 		err = ice_down(vsi);
 		if (err) {
 			netdev_err(netdev, "change MTU if_down err %d\n", err);
-			return err;
+			goto event_after;
 		}
 
 		err = ice_up(vsi);
 		if (err) {
 			netdev_err(netdev, "change MTU if_up err %d\n", err);
-			return err;
+			goto event_after;
 		}
 	}
 
 	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
-	return 0;
+event_after:
+	set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
+	ice_send_event_to_aux(pf, event);
+	kfree(event);
+
+	return err;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 2f097637e405..a17e24e54cf3 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -595,6 +595,50 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
 	return 0;
 }
 
+/**
+ * ice_alloc_rdma_q_ctx - allocate RDMA queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
+ */
+static enum ice_status
+ice_alloc_rdma_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+	struct ice_vsi_ctx *vsi_ctx;
+	struct ice_q_ctx *q_ctx;
+
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return ICE_ERR_PARAM;
+	/* allocate RDMA queue contexts */
+	if (!vsi_ctx->rdma_q_ctx[tc]) {
+		vsi_ctx->rdma_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
+						       new_numqs,
+						       sizeof(*q_ctx),
+						       GFP_KERNEL);
+		if (!vsi_ctx->rdma_q_ctx[tc])
+			return ICE_ERR_NO_MEMORY;
+		vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+		return 0;
+	}
+	/* num queues are increased, update the queue contexts */
+	if (new_numqs > vsi_ctx->num_rdma_q_entries[tc]) {
+		u16 prev_num = vsi_ctx->num_rdma_q_entries[tc];
+
+		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+				     sizeof(*q_ctx), GFP_KERNEL);
+		if (!q_ctx)
+			return ICE_ERR_NO_MEMORY;
+		memcpy(q_ctx, vsi_ctx->rdma_q_ctx[tc],
+		       prev_num * sizeof(*q_ctx));
+		devm_kfree(ice_hw_to_dev(hw), vsi_ctx->rdma_q_ctx[tc]);
+		vsi_ctx->rdma_q_ctx[tc] = q_ctx;
+		vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+	}
+	return 0;
+}
+
 /**
  * ice_aq_rl_profile - performs a rate limiting task
  * @hw: pointer to the HW struct
@@ -1774,13 +1818,22 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
 	if (!vsi_ctx)
 		return ICE_ERR_PARAM;
 
-	prev_numqs = vsi_ctx->sched.max_lanq[tc];
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		prev_numqs = vsi_ctx->sched.max_lanq[tc];
+	else
+		prev_numqs = vsi_ctx->sched.max_rdmaq[tc];
 	/* num queues are not changed or less than the previous number */
 	if (new_numqs <= prev_numqs)
 		return status;
-	status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
-	if (status)
-		return status;
+	if (owner == ICE_SCHED_NODE_OWNER_LAN) {
+		status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
+		if (status)
+			return status;
+	} else {
+		status = ice_alloc_rdma_q_ctx(hw, vsi_handle, tc, new_numqs);
+		if (status)
+			return status;
+	}
 
 	if (new_numqs)
 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
@@ -1795,7 +1848,10 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
 					       new_num_nodes, owner);
 	if (status)
 		return status;
-	vsi_ctx->sched.max_lanq[tc] = new_numqs;
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		vsi_ctx->sched.max_lanq[tc] = new_numqs;
+	else
+		vsi_ctx->sched.max_rdmaq[tc] = new_numqs;
 
 	return 0;
 }
@@ -1861,6 +1917,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
 		 * recreate the child nodes all the time in these cases.
 		 */
 		vsi_ctx->sched.max_lanq[tc] = 0;
+		vsi_ctx->sched.max_rdmaq[tc] = 0;
 	}
 
 	/* update the VSI child nodes */
@@ -1990,6 +2047,8 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
 		}
 		if (owner == ICE_SCHED_NODE_OWNER_LAN)
 			vsi_ctx->sched.max_lanq[i] = 0;
+		else
+			vsi_ctx->sched.max_rdmaq[i] = 0;
 	}
 	status = 0;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 357d3073d814..3b6c1420aa7b 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2018, Intel Corporation. */
 
+#include "ice_lib.h"
 #include "ice_switch.h"
 
 #define ICE_ETH_DA_OFFSET		0
@@ -302,6 +303,10 @@ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle)
 			devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]);
 			vsi->lan_q_ctx[i] = NULL;
 		}
+		if (vsi->rdma_q_ctx[i]) {
+			devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]);
+			vsi->rdma_q_ctx[i] = NULL;
+		}
 	}
 }
 
@@ -422,6 +427,29 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
 	return ice_aq_update_vsi(hw, vsi_ctx, cd);
 }
 
+/**
+ * ice_cfg_rdma_fltr - enable/disable RDMA filtering on VSI
+ * @hw: pointer to HW struct
+ * @vsi_handle: VSI SW index
+ * @enable: boolean for enable/disable
+ */
+int
+ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable)
+{
+	struct ice_vsi_ctx *ctx;
+
+	ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!ctx)
+		return -EIO;
+
+	if (enable)
+		ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+	else
+		ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+
+	return ice_status_to_errno(ice_update_vsi(hw, vsi_handle, ctx, NULL));
+}
+
 /**
  * ice_aq_alloc_free_vsi_list
  * @hw: pointer to the HW struct
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 8b4f9d35c860..6bb7358ff67b 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -26,6 +26,8 @@ struct ice_vsi_ctx {
 	u8 vf_num;
 	u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS];
 	struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS];
+	u16 num_rdma_q_entries[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS];
 };
 
 enum ice_sw_fwd_act_type {
@@ -223,6 +225,8 @@ enum ice_status
 ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list);
 enum ice_status
 ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+int
+ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable);
 void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle);
 enum ice_status
 ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index b86ae7910a02..c580b87c76ee 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -45,6 +45,7 @@ static inline u32 ice_round_to_num(u32 N, u32 R)
 #define ICE_DBG_FLOW		BIT_ULL(9)
 #define ICE_DBG_SW		BIT_ULL(13)
 #define ICE_DBG_SCHED		BIT_ULL(14)
+#define ICE_DBG_RDMA		BIT_ULL(15)
 #define ICE_DBG_PKG		BIT_ULL(16)
 #define ICE_DBG_RES		BIT_ULL(17)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
@@ -441,6 +442,7 @@ struct ice_sched_node {
 	u8 tc_num;
 	u8 owner;
 #define ICE_SCHED_NODE_OWNER_LAN	0
+#define ICE_SCHED_NODE_OWNER_RDMA	2
 };
 
 /* Access Macros for Tx Sched Elements data */
@@ -512,6 +514,7 @@ struct ice_sched_vsi_info {
 	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
 	struct list_head list_entry;
 	u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
+	u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS];
 };
 
 /* driver defines the policy */

From f9f5301e7e2d4fa2445aab3ec889dac6b34ea63e Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Thu, 20 May 2021 09:37:51 -0500
Subject: [PATCH 0676/3292] ice: Register auxiliary device to provide RDMA

Register ice client auxiliary RDMA device on the auxiliary bus per
PCIe device function for the auxiliary driver (irdma) to attach to.
It allows to realize a single RDMA driver (irdma) capable of working with
multiple netdev drivers over multi-generation Intel HW supporting RDMA.
There is no load ordering dependencies between ice and irdma.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/Kconfig        |  1 +
 drivers/net/ethernet/intel/ice/ice.h      |  8 ++-
 drivers/net/ethernet/intel/ice/ice_idc.c  | 71 ++++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_main.c | 11 +++-
 4 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index c1d155690341..d8a12da5c49a 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -294,6 +294,7 @@ config ICE
 	tristate "Intel(R) Ethernet Connection E800 Series Support"
 	default n
 	depends on PCI_MSI
+	select AUXILIARY_BUS
 	select DIMLIB
 	select NET_DEVLINK
 	select PLDMFW
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 225f8a55eb3f..228055e8f33b 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -34,6 +34,7 @@
 #include <linux/if_bridge.h>
 #include <linux/ctype.h>
 #include <linux/bpf.h>
+#include <linux/auxiliary_bus.h>
 #include <linux/avf/virtchnl.h>
 #include <linux/cpu_rmap.h>
 #include <linux/dim.h>
@@ -647,6 +648,8 @@ int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+int ice_plug_aux_dev(struct ice_pf *pf);
+void ice_unplug_aux_dev(struct ice_pf *pf);
 int ice_init_rdma(struct ice_pf *pf);
 const char *ice_stat_str(enum ice_status stat_err);
 const char *ice_aq_str(enum ice_aq_err aq_err);
@@ -678,8 +681,10 @@ void ice_service_task_schedule(struct ice_pf *pf);
  */
 static inline void ice_set_rdma_cap(struct ice_pf *pf)
 {
-	if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix)
+	if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) {
 		set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+		ice_plug_aux_dev(pf);
+	}
 }
 
 /**
@@ -688,6 +693,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
  */
 static inline void ice_clear_rdma_cap(struct ice_pf *pf)
 {
+	ice_unplug_aux_dev(pf);
 	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
 }
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index adc9015ed0ed..1f2afdf6cd48 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -249,6 +249,71 @@ static int ice_reserve_rdma_qvector(struct ice_pf *pf)
 	return 0;
 }
 
+/**
+ * ice_adev_release - function to be mapped to AUX dev's release op
+ * @dev: pointer to device to free
+ */
+static void ice_adev_release(struct device *dev)
+{
+	struct iidc_auxiliary_dev *iadev;
+
+	iadev = container_of(dev, struct iidc_auxiliary_dev, adev.dev);
+	kfree(iadev);
+}
+
+/**
+ * ice_plug_aux_dev - allocate and register AUX device
+ * @pf: pointer to pf struct
+ */
+int ice_plug_aux_dev(struct ice_pf *pf)
+{
+	struct iidc_auxiliary_dev *iadev;
+	struct auxiliary_device *adev;
+	int ret;
+
+	iadev = kzalloc(sizeof(*iadev), GFP_KERNEL);
+	if (!iadev)
+		return -ENOMEM;
+
+	adev = &iadev->adev;
+	pf->adev = adev;
+	iadev->pf = pf;
+
+	adev->id = pf->aux_idx;
+	adev->dev.release = ice_adev_release;
+	adev->dev.parent = &pf->pdev->dev;
+	adev->name = IIDC_RDMA_ROCE_NAME;
+
+	ret = auxiliary_device_init(adev);
+	if (ret) {
+		pf->adev = NULL;
+		kfree(iadev);
+		return ret;
+	}
+
+	ret = auxiliary_device_add(adev);
+	if (ret) {
+		pf->adev = NULL;
+		auxiliary_device_uninit(adev);
+		return ret;
+	}
+
+	return 0;
+}
+
+/* ice_unplug_aux_dev - unregister and free AUX device
+ * @pf: pointer to pf struct
+ */
+void ice_unplug_aux_dev(struct ice_pf *pf)
+{
+	if (!pf->adev)
+		return;
+
+	auxiliary_device_delete(pf->adev);
+	auxiliary_device_uninit(pf->adev);
+	pf->adev = NULL;
+}
+
 /**
  * ice_init_rdma - initializes PF for RDMA use
  * @pf: ptr to ice_pf
@@ -260,8 +325,10 @@ int ice_init_rdma(struct ice_pf *pf)
 
 	/* Reserve vector resources */
 	ret = ice_reserve_rdma_qvector(pf);
-	if (ret < 0)
+	if (ret < 0) {
 		dev_err(dev, "failed to reserve vectors for RDMA\n");
+		return ret;
+	}
 
-	return ret;
+	return ice_plug_aux_dev(pf);
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 9d4570b862aa..254cfc14d6b4 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -456,6 +456,8 @@ ice_prepare_for_reset(struct ice_pf *pf)
 	if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
 		return;
 
+	ice_unplug_aux_dev(pf);
+
 	/* Notify VFs of impending reset */
 	if (ice_check_sq_alive(hw, &hw->mailboxq))
 		ice_vc_notify_reset(pf);
@@ -2120,6 +2122,8 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
 		return -EBUSY;
 	}
 
+	ice_unplug_aux_dev(pf);
+
 	switch (reset) {
 	case ICE_RESET_PFR:
 		set_bit(ICE_PFR_REQ, pf->state);
@@ -4456,11 +4460,12 @@ static void ice_remove(struct pci_dev *pdev)
 		ice_free_vfs(pf);
 	}
 
-	set_bit(ICE_DOWN, pf->state);
 	ice_service_task_stop(pf);
 
 	ice_aq_cancel_waiting_tasks(pf);
+	ice_unplug_aux_dev(pf);
 	ida_free(&ice_aux_ida, pf->aux_idx);
+	set_bit(ICE_DOWN, pf->state);
 
 	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_deinit_lag(pf);
@@ -4616,6 +4621,8 @@ static int __maybe_unused ice_suspend(struct device *dev)
 	 */
 	disabled = ice_service_task_stop(pf);
 
+	ice_unplug_aux_dev(pf);
+
 	/* Already suspended?, then there is nothing to do */
 	if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
 		if (!disabled)
@@ -6286,6 +6293,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
 	/* if we get here, reset flow is successful */
 	clear_bit(ICE_RESET_FAILED, pf->state);
+
+	ice_plug_aux_dev(pf);
 	return;
 
 err_vsi_rebuild:

From 9ed7533121219cb25408888cf7fbb929cedc033c Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 21 May 2021 10:10:59 -0700
Subject: [PATCH 0677/3292] i40e: Prep i40e header for aux bus conversion

Add the definitions to the i40e client header file in
preparation to convert i40e to use the new auxiliary bus
infrastructure. This header is shared between the 'i40e'
Intel networking driver providing RDMA support and the
'irdma' driver.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/net/intel/i40e_client.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h
index fd7bc860a241..41f24b5241ab 100644
--- a/include/linux/net/intel/i40e_client.h
+++ b/include/linux/net/intel/i40e_client.h
@@ -4,6 +4,8 @@
 #ifndef _I40E_CLIENT_H_
 #define _I40E_CLIENT_H_
 
+#include <linux/auxiliary_bus.h>
+
 #define I40E_CLIENT_STR_LENGTH 10
 
 /* Client interface version should be updated anytime there is a change in the
@@ -78,6 +80,7 @@ struct i40e_info {
 	u8 lanmac[6];
 	struct net_device *netdev;
 	struct pci_dev *pcidev;
+	struct auxiliary_device *aux_dev;
 	u8 __iomem *hw_addr;
 	u8 fid;	/* function id, PF id or VF id */
 #define I40E_CLIENT_FTYPE_PF 0
@@ -100,6 +103,11 @@ struct i40e_info {
 	u32 fw_build;                   /* firmware build number */
 };
 
+struct i40e_auxiliary_device {
+	struct auxiliary_device aux_dev;
+	struct i40e_info *ldev;
+};
+
 #define I40E_CLIENT_RESET_LEVEL_PF   1
 #define I40E_CLIENT_RESET_LEVEL_CORE 2
 #define I40E_CLIENT_VSI_FLAG_TCP_ENABLE  BIT(1)
@@ -187,6 +195,8 @@ static inline bool i40e_client_is_registered(struct i40e_client *client)
 	return test_bit(__I40E_CLIENT_REGISTERED, &client->state);
 }
 
+void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client);
+void i40e_client_device_unregister(struct i40e_info *ldev);
 /* used by clients */
 int i40e_register_client(struct i40e_client *client);
 int i40e_unregister_client(struct i40e_client *client);

From f4370a85d62e645107afc8a35a979be962b19258 Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 21 May 2021 10:11:20 -0700
Subject: [PATCH 0678/3292] i40e: Register auxiliary devices to provide RDMA

Convert i40e to use the auxiliary bus infrastructure to export
the RDMA functionality of the device to the RDMA driver.
Register i40e client auxiliary RDMA device on the auxiliary bus per
PCIe device function for the new auxiliary rdma driver (irdma) to
attach to.

The global i40e_register_client and i40e_unregister_client symbols
will be obsoleted once irdma replaces i40iw in the kernel
for the X722 device.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/Kconfig            |   1 +
 drivers/net/ethernet/intel/i40e/i40e.h        |   2 +
 drivers/net/ethernet/intel/i40e/i40e_client.c | 130 +++++++++++++++---
 drivers/net/ethernet/intel/i40e/i40e_main.c   |   1 +
 4 files changed, 114 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index d8a12da5c49a..eae1b42e48db 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -241,6 +241,7 @@ config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
 	imply PTP_1588_CLOCK
 	depends on PCI
+	select AUXILIARY_BUS
 	help
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
 	  devices.  For more information on how to identify your adapter, go
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 85d3dd3a3339..b9417dc0007c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -870,6 +870,8 @@ struct i40e_netdev_priv {
 	struct i40e_vsi *vsi;
 };
 
+extern struct ida i40e_client_ida;
+
 /* struct that defines an interrupt vector */
 struct i40e_q_vector {
 	struct i40e_vsi *vsi;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 63eab14a26df..e07ed065d3a4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -12,6 +12,7 @@ static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
 static struct i40e_client *registered_client;
 static LIST_HEAD(i40e_devices);
 static DEFINE_MUTEX(i40e_device_mutex);
+DEFINE_IDA(i40e_client_ida);
 
 static int i40e_client_virtchnl_send(struct i40e_info *ldev,
 				     struct i40e_client *client,
@@ -275,6 +276,57 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
 	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
 }
 
+static void i40e_auxiliary_dev_release(struct device *dev)
+{
+	struct i40e_auxiliary_device *i40e_aux_dev =
+			container_of(dev, struct i40e_auxiliary_device, aux_dev.dev);
+
+	ida_free(&i40e_client_ida, i40e_aux_dev->aux_dev.id);
+	kfree(i40e_aux_dev);
+}
+
+static int i40e_register_auxiliary_dev(struct i40e_info *ldev, const char *name)
+{
+	struct i40e_auxiliary_device *i40e_aux_dev;
+	struct pci_dev *pdev = ldev->pcidev;
+	struct auxiliary_device *aux_dev;
+	int ret;
+
+	i40e_aux_dev = kzalloc(sizeof(*i40e_aux_dev), GFP_KERNEL);
+	if (!i40e_aux_dev)
+		return -ENOMEM;
+
+	i40e_aux_dev->ldev = ldev;
+
+	aux_dev = &i40e_aux_dev->aux_dev;
+	aux_dev->name = name;
+	aux_dev->dev.parent = &pdev->dev;
+	aux_dev->dev.release = i40e_auxiliary_dev_release;
+	ldev->aux_dev = aux_dev;
+
+	ret = ida_alloc(&i40e_client_ida, GFP_KERNEL);
+	if (ret < 0) {
+		kfree(i40e_aux_dev);
+		return ret;
+	}
+	aux_dev->id = ret;
+
+	ret = auxiliary_device_init(aux_dev);
+	if (ret < 0) {
+		ida_free(&i40e_client_ida, aux_dev->id);
+		kfree(i40e_aux_dev);
+		return ret;
+	}
+
+	ret = auxiliary_device_add(aux_dev);
+	if (ret) {
+		auxiliary_device_uninit(aux_dev);
+		return ret;
+	}
+
+	return ret;
+}
+
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -286,9 +338,6 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 	struct netdev_hw_addr *mac = NULL;
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 
-	if (!registered_client || pf->cinst)
-		return;
-
 	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
 	if (!cdev)
 		return;
@@ -308,11 +357,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 	cdev->lan_info.fw_build = pf->hw.aq.fw_build;
 	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state);
 
-	if (i40e_client_get_params(vsi, &cdev->lan_info.params)) {
-		kfree(cdev);
-		cdev = NULL;
-		return;
-	}
+	if (i40e_client_get_params(vsi, &cdev->lan_info.params))
+		goto free_cdev;
 
 	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
 			       struct netdev_hw_addr, list);
@@ -324,7 +370,17 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 	cdev->client = registered_client;
 	pf->cinst = cdev;
 
-	i40e_client_update_msix_info(pf);
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+
+	if (i40e_register_auxiliary_dev(&cdev->lan_info, "iwarp"))
+		goto free_cdev;
+
+	return;
+
+free_cdev:
+	kfree(cdev);
+	pf->cinst = NULL;
 }
 
 /**
@@ -345,7 +401,7 @@ void i40e_client_del_instance(struct i40e_pf *pf)
  **/
 void i40e_client_subtask(struct i40e_pf *pf)
 {
-	struct i40e_client *client = registered_client;
+	struct i40e_client *client;
 	struct i40e_client_instance *cdev;
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	int ret = 0;
@@ -359,9 +415,11 @@ void i40e_client_subtask(struct i40e_pf *pf)
 	    test_bit(__I40E_CONFIG_BUSY, pf->state))
 		return;
 
-	if (!client || !cdev)
+	if (!cdev || !cdev->client)
 		return;
 
+	client = cdev->client;
+
 	/* Here we handle client opens. If the client is down, and
 	 * the netdev is registered, then open the client.
 	 */
@@ -423,16 +481,8 @@ int i40e_lan_add_device(struct i40e_pf *pf)
 		 pf->hw.pf_id, pf->hw.bus.bus_id,
 		 pf->hw.bus.device, pf->hw.bus.func);
 
-	/* If a client has already been registered, we need to add an instance
-	 * of it to our new LAN device.
-	 */
-	if (registered_client)
-		i40e_client_add_instance(pf);
+	i40e_client_add_instance(pf);
 
-	/* Since in some cases register may have happened before a device gets
-	 * added, we can schedule a subtask to go initiate the clients if
-	 * they can be launched at probe time.
-	 */
 	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	i40e_service_event_schedule(pf);
 
@@ -449,9 +499,13 @@ out:
  **/
 int i40e_lan_del_device(struct i40e_pf *pf)
 {
+	struct auxiliary_device *aux_dev = pf->cinst->lan_info.aux_dev;
 	struct i40e_device *ldev, *tmp;
 	int ret = -ENODEV;
 
+	auxiliary_device_delete(aux_dev);
+	auxiliary_device_uninit(aux_dev);
+
 	/* First, remove any client instance. */
 	i40e_client_del_instance(pf);
 
@@ -732,6 +786,42 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
 	return err;
 }
 
+void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client)
+{
+	struct i40e_pf *pf = ldev->pf;
+
+	pf->cinst->client = client;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	i40e_service_event_schedule(pf);
+}
+EXPORT_SYMBOL_GPL(i40e_client_device_register);
+
+void i40e_client_device_unregister(struct i40e_info *ldev)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev)
+		return;
+
+	while (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
+		usleep_range(500, 1000);
+
+	if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+		cdev->client->ops->close(&cdev->lan_info, cdev->client, false);
+		clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+		i40e_client_release_qvlist(&cdev->lan_info);
+	}
+
+	pf->cinst->client = NULL;
+	clear_bit(__I40E_SERVICE_SCHED, pf->state);
+}
+EXPORT_SYMBOL_GPL(i40e_client_device_unregister);
+
+/* Retain these legacy global registration/unregistration calls till i40iw is
+ * removed from the kernel. The irdma unified driver does not use these
+ * exported symbols.
+ */
 /**
  * i40e_register_client - Register a i40e client driver with the L2 driver
  * @client: pointer to the i40e_client struct
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 704e474879c5..9db1968fc491 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -16270,6 +16270,7 @@ static void __exit i40e_exit_module(void)
 {
 	pci_unregister_driver(&i40e_driver);
 	destroy_workqueue(i40e_wq);
+	ida_destroy(&i40e_client_ida);
 	i40e_dbg_exit();
 }
 module_exit(i40e_exit_module);

From 897389de48283d413728dae7520973872cef8eb2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 28 May 2021 12:30:08 +0200
Subject: [PATCH 0679/3292] netfilter: nf_tables: remove xt_action_param from
 nft_pktinfo

Init it on demand in the nft_compat expression.  This reduces size
of nft_pktinfo from 48 to 24 bytes on x86_64.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h      | 25 ++++++++++++-----------
 include/net/netfilter/nf_tables_ipv4.h | 12 +++++------
 include/net/netfilter/nf_tables_ipv6.h | 12 +++++------
 net/netfilter/nft_compat.c             | 28 +++++++++++++++++---------
 4 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 958b8e68bb1a..6783164428f1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -23,45 +23,46 @@ struct module;
 
 struct nft_pktinfo {
 	struct sk_buff			*skb;
+	const struct nf_hook_state	*state;
 	bool				tprot_set;
 	u8				tprot;
-	/* for x_tables compatibility */
-	struct xt_action_param		xt;
+	u16				fragoff;
+	unsigned int			thoff;
 };
 
 static inline struct sock *nft_sk(const struct nft_pktinfo *pkt)
 {
-	return pkt->xt.state->sk;
+	return pkt->state->sk;
 }
 
 static inline unsigned int nft_thoff(const struct nft_pktinfo *pkt)
 {
-	return pkt->xt.thoff;
+	return pkt->thoff;
 }
 
 static inline struct net *nft_net(const struct nft_pktinfo *pkt)
 {
-	return pkt->xt.state->net;
+	return pkt->state->net;
 }
 
 static inline unsigned int nft_hook(const struct nft_pktinfo *pkt)
 {
-	return pkt->xt.state->hook;
+	return pkt->state->hook;
 }
 
 static inline u8 nft_pf(const struct nft_pktinfo *pkt)
 {
-	return pkt->xt.state->pf;
+	return pkt->state->pf;
 }
 
 static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt)
 {
-	return pkt->xt.state->in;
+	return pkt->state->in;
 }
 
 static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt)
 {
-	return pkt->xt.state->out;
+	return pkt->state->out;
 }
 
 static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
@@ -69,15 +70,15 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
 				   const struct nf_hook_state *state)
 {
 	pkt->skb = skb;
-	pkt->xt.state = state;
+	pkt->state = state;
 }
 
 static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt)
 {
 	pkt->tprot_set = false;
 	pkt->tprot = 0;
-	pkt->xt.thoff = 0;
-	pkt->xt.fragoff = 0;
+	pkt->thoff = 0;
+	pkt->fragoff = 0;
 }
 
 /**
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index b185a9216bf1..eb4c094cd54d 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -12,8 +12,8 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt)
 	ip = ip_hdr(pkt->skb);
 	pkt->tprot_set = true;
 	pkt->tprot = ip->protocol;
-	pkt->xt.thoff = ip_hdrlen(pkt->skb);
-	pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+	pkt->thoff = ip_hdrlen(pkt->skb);
+	pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 }
 
 static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
@@ -38,8 +38,8 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
 
 	pkt->tprot_set = true;
 	pkt->tprot = iph->protocol;
-	pkt->xt.thoff = thoff;
-	pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+	pkt->thoff = thoff;
+	pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
 
 	return 0;
 }
@@ -73,8 +73,8 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
 
 	pkt->tprot_set = true;
 	pkt->tprot = iph->protocol;
-	pkt->xt.thoff = thoff;
-	pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+	pkt->thoff = thoff;
+	pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET;
 
 	return 0;
 
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index bf132d488b17..7595e02b00ba 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -20,8 +20,8 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt)
 
 	pkt->tprot_set = true;
 	pkt->tprot = protohdr;
-	pkt->xt.thoff = thoff;
-	pkt->xt.fragoff = frag_off;
+	pkt->thoff = thoff;
+	pkt->fragoff = frag_off;
 }
 
 static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
@@ -52,8 +52,8 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
 
 	pkt->tprot_set = true;
 	pkt->tprot = protohdr;
-	pkt->xt.thoff = thoff;
-	pkt->xt.fragoff = frag_off;
+	pkt->thoff = thoff;
+	pkt->fragoff = frag_off;
 
 	return 0;
 #else
@@ -98,8 +98,8 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt)
 
 	pkt->tprot_set = true;
 	pkt->tprot = protohdr;
-	pkt->xt.thoff = thoff;
-	pkt->xt.fragoff = frag_off;
+	pkt->thoff = thoff;
+	pkt->fragoff = frag_off;
 
 	return 0;
 
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5415ab14400d..3144a9ad2f6a 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -57,8 +57,13 @@ union nft_entry {
 };
 
 static inline void
-nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
+nft_compat_set_par(struct xt_action_param *par,
+		   const struct nft_pktinfo *pkt,
+		   const void *xt, const void *xt_info)
 {
+	par->state	= pkt->state;
+	par->thoff	= nft_thoff(pkt);
+	par->fragoff	= pkt->fragoff;
 	par->target	= xt;
 	par->targinfo	= xt_info;
 	par->hotdrop	= false;
@@ -71,13 +76,14 @@ static void nft_target_eval_xt(const struct nft_expr *expr,
 	void *info = nft_expr_priv(expr);
 	struct xt_target *target = expr->ops->data;
 	struct sk_buff *skb = pkt->skb;
+	struct xt_action_param xt;
 	int ret;
 
-	nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+	nft_compat_set_par(&xt, pkt, target, info);
 
-	ret = target->target(skb, &pkt->xt);
+	ret = target->target(skb, &xt);
 
-	if (pkt->xt.hotdrop)
+	if (xt.hotdrop)
 		ret = NF_DROP;
 
 	switch (ret) {
@@ -97,13 +103,14 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
 	void *info = nft_expr_priv(expr);
 	struct xt_target *target = expr->ops->data;
 	struct sk_buff *skb = pkt->skb;
+	struct xt_action_param xt;
 	int ret;
 
-	nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+	nft_compat_set_par(&xt, pkt, target, info);
 
-	ret = target->target(skb, &pkt->xt);
+	ret = target->target(skb, &xt);
 
-	if (pkt->xt.hotdrop)
+	if (xt.hotdrop)
 		ret = NF_DROP;
 
 	switch (ret) {
@@ -350,13 +357,14 @@ static void __nft_match_eval(const struct nft_expr *expr,
 {
 	struct xt_match *match = expr->ops->data;
 	struct sk_buff *skb = pkt->skb;
+	struct xt_action_param xt;
 	bool ret;
 
-	nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info);
+	nft_compat_set_par(&xt, pkt, match, info);
 
-	ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
+	ret = match->match(skb, &xt);
 
-	if (pkt->xt.hotdrop) {
+	if (xt.hotdrop) {
 		regs->verdict.code = NF_DROP;
 		return;
 	}

From 3a7244152f9c22f32f37dfba2a9b070a90bf877a Mon Sep 17 00:00:00 2001
From: Stanislaw Kardach <skardach@marvell.com>
Date: Thu, 27 May 2021 15:14:35 +0530
Subject: [PATCH 0680/3292] octeontx2-af: add support for custom KPU entries

Add ability to load a set of custom KPU entries. This
allows for flexible support for custom protocol parsing.

AF driver will attempt to load the profile and verify if it can fit
hardware capabilities. If not, it will revert to the built-in profile.

Next it will replace the first KPU_MAX_CST_LT (2) entries in each KPU
in default profile with entries read from the profile image.
The built-in profile should always contain KPU_MAX_CSR_LT first no-match
entries and AF driver will disable those in the KPU unless custom
profile is loaded.

Profile file contains also a list of default protocol overrides to
allow for custom protocols to be used there.

Signed-off-by: Stanislaw Kardach <skardach@marvell.com>
Signed-off-by: George Cherian <george.cherian@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/marvell/octeontx2/af/npc.h   |  44 ++++-
 .../marvell/octeontx2/af/npc_profile.h        | 156 ++++++++++++++----
 .../net/ethernet/marvell/octeontx2/af/rvu.c   |   6 +
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |   4 +
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 116 ++++++++++++-
 5 files changed, 286 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 1e012e787260..6579ad19f684 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -213,7 +213,7 @@ struct npc_kpu_profile_cam {
 	u16 dp1_mask;
 	u16 dp2;
 	u16 dp2_mask;
-};
+} __packed;
 
 struct npc_kpu_profile_action {
 	u8 errlev;
@@ -233,13 +233,13 @@ struct npc_kpu_profile_action {
 	u8 mask;
 	u8 right;
 	u8 shift;
-};
+} __packed;
 
 struct npc_kpu_profile {
 	int cam_entries;
 	int action_entries;
-	const struct npc_kpu_profile_cam *cam;
-	const struct npc_kpu_profile_action *action;
+	struct npc_kpu_profile_cam *cam;
+	struct npc_kpu_profile_action *action;
 };
 
 /* NPC KPU register formats */
@@ -445,6 +445,15 @@ struct npc_mcam_kex {
 	u64 intf_ld_flags[NPC_MAX_INTF][NPC_MAX_LD][NPC_MAX_LFL];
 } __packed;
 
+struct npc_kpu_fwdata {
+	int	entries;
+	/* What follows is:
+	 * struct npc_kpu_profile_cam[entries];
+	 * struct npc_kpu_profile_action[entries];
+	 */
+	u8	data[0];
+} __packed;
+
 struct npc_lt_def {
 	u8	ltype_mask;
 	u8	ltype_match;
@@ -478,6 +487,33 @@ struct npc_lt_def_cfg {
 	struct npc_lt_def	pck_iip4;
 };
 
+/* Loadable KPU profile firmware data */
+struct npc_kpu_profile_fwdata {
+#define KPU_SIGN	0x00666f727075706b
+#define KPU_NAME_LEN	32
+/** Maximum number of custom KPU entries supported by the built-in profile. */
+#define KPU_MAX_CST_ENT	2
+	/* KPU Profle Header */
+	__le64	signature; /* "kpuprof\0" (8 bytes/ASCII characters) */
+	u8	name[KPU_NAME_LEN]; /* KPU Profile name */
+	__le64	version; /* KPU profile version */
+	u8	kpus;
+	u8	reserved[7];
+
+	/* Default MKEX profile to be used with this KPU profile. May be
+	 * overridden with mkex_profile module parameter. Format is same as for
+	 * the MKEX profile to streamline processing.
+	 */
+	struct npc_mcam_kex	mkex;
+	/* LTYPE values for specific HW offloaded protocols. */
+	struct npc_lt_def_cfg	lt_def;
+	/* Dynamically sized data:
+	 *  Custom KPU CAM and ACTION configuration entries.
+	 * struct npc_kpu_fwdata kpu[kpus];
+	 */
+	u8	data[0];
+} __packed;
+
 struct rvu_npc_mcam_rule {
 	struct flow_msg packet;
 	struct flow_msg mask;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index 5c372d2c24a1..de3a60c12392 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -11,7 +11,10 @@
 #ifndef NPC_PROFILE_H
 #define NPC_PROFILE_H
 
-#define NPC_KPU_PROFILE_VER    0x0000000100050000
+#define NPC_KPU_PROFILE_VER	0x0000000100050000
+#define NPC_KPU_VER_MAJ(ver)	((u16)(((ver) >> 32) & 0xFFFF))
+#define NPC_KPU_VER_MIN(ver)	((u16)(((ver) >> 16) & 0xFFFF))
+#define NPC_KPU_VER_PATCH(ver)	((u16)((ver) & 0xFFFF))
 
 #define NPC_IH_W		0x8000
 #define NPC_IH_UTAG		0x2000
@@ -442,7 +445,28 @@ enum NPC_ERRLEV_E {
 	NPC_ERRLEV_ENUM_LAST = 16,
 };
 
-static const struct npc_kpu_profile_action ikpu_action_entries[] = {
+#define NPC_KPU_NOP_CAM		\
+	{			\
+		NPC_S_NA, 0xff,	\
+		0x0000,		\
+		0x0000,		\
+		0x0000,		\
+		0x0000,		\
+		0x0000,		\
+		0x0000,		\
+	}
+
+#define NPC_KPU_NOP_ACTION			\
+	{					\
+		NPC_ERRLEV_RE, NPC_EC_NOERR,	\
+		0, 0, 0, 0, 0,			\
+		NPC_S_NA, 0, 0,			\
+		NPC_LID_LA, NPC_LT_NA,		\
+		0,				\
+		0, 0, 0, 0,			\
+	}
+
+static struct npc_kpu_profile_action ikpu_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		12, 16, 20, 0, 0,
@@ -1021,7 +1045,9 @@ static const struct npc_kpu_profile_action ikpu_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU1_ETHER, 0xff,
 		NPC_ETYPE_IP,
@@ -1699,7 +1725,9 @@ static const struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu2_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU2_CTAG, 0xff,
 		NPC_ETYPE_IP,
@@ -2827,7 +2855,9 @@ static const struct npc_kpu_profile_cam kpu2_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu3_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu3_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU3_CTAG, 0xff,
 		NPC_ETYPE_IP,
@@ -3946,7 +3976,9 @@ static const struct npc_kpu_profile_cam kpu3_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu4_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu4_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU4_MPLS, 0xff,
 		NPC_MPLS_S,
@@ -4102,7 +4134,9 @@ static const struct npc_kpu_profile_cam kpu4_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu5_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU5_IP, 0xff,
 		0x0000,
@@ -4672,7 +4706,9 @@ static const struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu6_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU6_IP6_EXT, 0xff,
 		0x0000,
@@ -5017,7 +5053,9 @@ static const struct npc_kpu_profile_cam kpu6_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu7_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU7_IP6_EXT, 0xff,
 		0x0000,
@@ -5236,7 +5274,9 @@ static const struct npc_kpu_profile_cam kpu7_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu8_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU8_TCP, 0xff,
 		0x0000,
@@ -5977,7 +6017,9 @@ static const struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu9_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu9_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU9_TU_MPLS_IN_GRE, 0xff,
 		NPC_MPLS_S,
@@ -6448,7 +6490,9 @@ static const struct npc_kpu_profile_cam kpu9_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu10_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu10_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU10_TU_MPLS, 0xff,
 		NPC_MPLS_S,
@@ -6613,7 +6657,9 @@ static const struct npc_kpu_profile_cam kpu10_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu11_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu11_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU11_TU_ETHER, 0xff,
 		NPC_ETYPE_IP,
@@ -6922,7 +6968,9 @@ static const struct npc_kpu_profile_cam kpu11_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu12_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_TCP,
@@ -7177,7 +7225,9 @@ static const struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu13_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu13_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU13_TU_IP6_EXT, 0xff,
 		0x0000,
@@ -7189,7 +7239,9 @@ static const struct npc_kpu_profile_cam kpu13_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu14_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu14_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU14_TU_IP6_EXT, 0xff,
 		0x0000,
@@ -7201,7 +7253,9 @@ static const struct npc_kpu_profile_cam kpu14_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu15_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU15_TU_TCP, 0xff,
 		0x0000,
@@ -7402,7 +7456,9 @@ static const struct npc_kpu_profile_cam kpu15_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_cam kpu16_cam_entries[] = {
+static struct npc_kpu_profile_cam kpu16_cam_entries[] = {
+	NPC_KPU_NOP_CAM,
+	NPC_KPU_NOP_CAM,
 	{
 		NPC_S_KPU16_TCP_DATA, 0xff,
 		0x0000,
@@ -7459,7 +7515,9 @@ static const struct npc_kpu_profile_cam kpu16_cam_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu1_action_entries[] = {
+static struct npc_kpu_profile_action kpu1_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 3, 0,
@@ -8084,7 +8142,9 @@ static const struct npc_kpu_profile_action kpu1_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu2_action_entries[] = {
+static struct npc_kpu_profile_action kpu2_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 2, 0,
@@ -9087,7 +9147,9 @@ static const struct npc_kpu_profile_action kpu2_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu3_action_entries[] = {
+static struct npc_kpu_profile_action kpu3_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 1, 0,
@@ -10082,7 +10144,9 @@ static const struct npc_kpu_profile_action kpu3_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu4_action_entries[] = {
+static struct npc_kpu_profile_action kpu4_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 0,
@@ -10221,7 +10285,9 @@ static const struct npc_kpu_profile_action kpu4_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu5_action_entries[] = {
+static struct npc_kpu_profile_action kpu5_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_LC, NPC_EC_IP_TTL_0,
 		0, 0, 0, 0, 1,
@@ -10728,7 +10794,9 @@ static const struct npc_kpu_profile_action kpu5_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu6_action_entries[] = {
+static struct npc_kpu_profile_action kpu6_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -11035,7 +11103,9 @@ static const struct npc_kpu_profile_action kpu6_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu7_action_entries[] = {
+static struct npc_kpu_profile_action kpu7_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -11230,7 +11300,9 @@ static const struct npc_kpu_profile_action kpu7_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu8_action_entries[] = {
+static struct npc_kpu_profile_action kpu8_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_LD, NPC_EC_TCP_FLAGS_FIN_ONLY,
 		0, 0, 0, 0, 1,
@@ -11889,7 +11961,9 @@ static const struct npc_kpu_profile_action kpu8_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu9_action_entries[] = {
+static struct npc_kpu_profile_action kpu9_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 0,
@@ -12308,7 +12382,9 @@ static const struct npc_kpu_profile_action kpu9_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu10_action_entries[] = {
+static struct npc_kpu_profile_action kpu10_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 1, 0,
@@ -12455,7 +12531,9 @@ static const struct npc_kpu_profile_action kpu10_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu11_action_entries[] = {
+static struct npc_kpu_profile_action kpu11_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 0, 0,
@@ -12730,7 +12808,9 @@ static const struct npc_kpu_profile_action kpu11_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu12_action_entries[] = {
+static struct npc_kpu_profile_action kpu12_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 12, 0, 2, 0,
@@ -12957,7 +13037,9 @@ static const struct npc_kpu_profile_action kpu12_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu13_action_entries[] = {
+static struct npc_kpu_profile_action kpu13_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -12968,7 +13050,9 @@ static const struct npc_kpu_profile_action kpu13_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu14_action_entries[] = {
+static struct npc_kpu_profile_action kpu14_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -12979,7 +13063,9 @@ static const struct npc_kpu_profile_action kpu14_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu15_action_entries[] = {
+static struct npc_kpu_profile_action kpu15_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_LG, NPC_EC_TCP_FLAGS_FIN_ONLY,
 		0, 0, 0, 0, 1,
@@ -13158,7 +13244,9 @@ static const struct npc_kpu_profile_action kpu15_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile_action kpu16_action_entries[] = {
+static struct npc_kpu_profile_action kpu16_action_entries[] = {
+	NPC_KPU_NOP_ACTION,
+	NPC_KPU_NOP_ACTION,
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index ab24a5e8ee8a..bc71a9c462de 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -57,6 +57,10 @@ static char *mkex_profile; /* MKEX profile name */
 module_param(mkex_profile, charp, 0000);
 MODULE_PARM_DESC(mkex_profile, "MKEX profile name string");
 
+static char *kpu_profile; /* KPU profile name */
+module_param(kpu_profile, charp, 0000);
+MODULE_PARM_DESC(kpu_profile, "KPU profile name string");
+
 static void rvu_setup_hw_capabilities(struct rvu *rvu)
 {
 	struct rvu_hwinfo *hw = rvu->hw;
@@ -2842,6 +2846,8 @@ static void rvu_update_module_params(struct rvu *rvu)
 
 	strscpy(rvu->mkex_pfl_name,
 		mkex_profile ? mkex_profile : default_pfl_name, MKEX_NAME_LEN);
+	strscpy(rvu->kpu_pfl_name,
+		kpu_profile ? kpu_profile : default_pfl_name, KPU_NAME_LEN);
 }
 
 static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index c2cc4806d13c..fb142520e309 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -386,6 +386,7 @@ struct npc_kpu_profile_adapter {
 	const struct npc_kpu_profile_action	*ikpu; /* array[pkinds] */
 	const struct npc_kpu_profile	*kpu; /* array[kpus] */
 	struct npc_mcam_kex		*mkex;
+	bool				custom;
 	size_t				pkinds;
 	size_t				kpus;
 };
@@ -435,9 +436,12 @@ struct rvu {
 	struct mutex		cgx_cfg_lock; /* serialize cgx configuration */
 
 	char mkex_pfl_name[MKEX_NAME_LEN]; /* Configured MKEX profile name */
+	char kpu_pfl_name[KPU_NAME_LEN]; /* Configured KPU profile name */
 
 	/* Firmware data */
 	struct rvu_fwdata	*fwdata;
+	void			*kpu_fwdata;
+	size_t			kpu_fwdata_sz;
 
 	/* NPC KPU data */
 	struct npc_kpu_profile_adapter kpu;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 0bc4529691ec..254b768155cd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1145,7 +1145,8 @@ static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr,
 	u64 prfl_addr, prfl_sz;
 
 	/* If user not selected mkex profile */
-	if (!strncmp(mkex_profile, def_pfl_name, MKEX_NAME_LEN))
+	if (rvu->kpu_fwdata_sz ||
+	    !strncmp(mkex_profile, def_pfl_name, MKEX_NAME_LEN))
 		goto program_mkex;
 
 	if (!rvu->fwdata)
@@ -1263,6 +1264,7 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu,
 				    const struct npc_kpu_profile *profile)
 {
 	int entry, num_entries, max_entries;
+	u64 entry_mask;
 
 	if (profile->cam_entries != profile->action_entries) {
 		dev_err(rvu->dev,
@@ -1286,8 +1288,12 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu,
 
 	/* Enable all programmed entries */
 	num_entries = min_t(int, profile->action_entries, profile->cam_entries);
+	entry_mask = enable_mask(num_entries);
+	/* Disable first KPU_MAX_CST_ENT entries for built-in profile */
+	if (!rvu->kpu.custom)
+		entry_mask |= GENMASK_ULL(KPU_MAX_CST_ENT - 1, 0);
 	rvu_write64(rvu, blkaddr,
-		    NPC_AF_KPUX_ENTRY_DISX(kpu, 0), enable_mask(num_entries));
+		    NPC_AF_KPUX_ENTRY_DISX(kpu, 0), entry_mask);
 	if (num_entries > 64) {
 		rvu_write64(rvu, blkaddr,
 			    NPC_AF_KPUX_ENTRY_DISX(kpu, 1),
@@ -1300,6 +1306,7 @@ static void npc_program_kpu_profile(struct rvu *rvu, int blkaddr, int kpu,
 
 static int npc_prepare_default_kpu(struct npc_kpu_profile_adapter *profile)
 {
+	profile->custom = 0;
 	profile->name = def_pfl_name;
 	profile->version = NPC_KPU_PROFILE_VER;
 	profile->ikpu = ikpu_action_entries;
@@ -1312,10 +1319,114 @@ static int npc_prepare_default_kpu(struct npc_kpu_profile_adapter *profile)
 	return 0;
 }
 
+static int npc_apply_custom_kpu(struct rvu *rvu,
+				struct npc_kpu_profile_adapter *profile)
+{
+	size_t hdr_sz = sizeof(struct npc_kpu_profile_fwdata), offset = 0;
+	struct npc_kpu_profile_fwdata *fw = rvu->kpu_fwdata;
+	struct npc_kpu_profile_action *action;
+	struct npc_kpu_profile_cam *cam;
+	struct npc_kpu_fwdata *fw_kpu;
+	int entries;
+	u16 kpu, entry;
+
+	if (rvu->kpu_fwdata_sz < hdr_sz) {
+		dev_warn(rvu->dev, "Invalid KPU profile size\n");
+		return -EINVAL;
+	}
+	if (le64_to_cpu(fw->signature) != KPU_SIGN) {
+		dev_warn(rvu->dev, "Invalid KPU profile signature %llx\n",
+			 fw->signature);
+		return -EINVAL;
+	}
+	/* Verify if the using known profile structure */
+	if (NPC_KPU_VER_MAJ(profile->version) >
+	    NPC_KPU_VER_MAJ(NPC_KPU_PROFILE_VER)) {
+		dev_warn(rvu->dev, "Not supported Major version: %d > %d\n",
+			 NPC_KPU_VER_MAJ(profile->version),
+			 NPC_KPU_VER_MAJ(NPC_KPU_PROFILE_VER));
+		return -EINVAL;
+	}
+	/* Verify if profile fits the HW */
+	if (fw->kpus > profile->kpus) {
+		dev_warn(rvu->dev, "Not enough KPUs: %d > %ld\n", fw->kpus,
+			 profile->kpus);
+		return -EINVAL;
+	}
+
+	profile->custom = 1;
+	profile->name = fw->name;
+	profile->version = le64_to_cpu(fw->version);
+	profile->mkex = &fw->mkex;
+	profile->lt_def = &fw->lt_def;
+
+	for (kpu = 0; kpu < fw->kpus; kpu++) {
+		fw_kpu = (struct npc_kpu_fwdata *)(fw->data + offset);
+		if (fw_kpu->entries > KPU_MAX_CST_ENT)
+			dev_warn(rvu->dev,
+				 "Too many custom entries on KPU%d: %d > %d\n",
+				 kpu, fw_kpu->entries, KPU_MAX_CST_ENT);
+		entries = min(fw_kpu->entries, KPU_MAX_CST_ENT);
+		cam = (struct npc_kpu_profile_cam *)fw_kpu->data;
+		offset += sizeof(*fw_kpu) + fw_kpu->entries * sizeof(*cam);
+		action = (struct npc_kpu_profile_action *)(fw->data + offset);
+		offset += fw_kpu->entries * sizeof(*action);
+		if (rvu->kpu_fwdata_sz < hdr_sz + offset) {
+			dev_warn(rvu->dev,
+				 "Profile size mismatch on KPU%i parsing.\n",
+				 kpu + 1);
+			return -EINVAL;
+		}
+		for (entry = 0; entry < entries; entry++) {
+			profile->kpu[kpu].cam[entry] = cam[entry];
+			profile->kpu[kpu].action[entry] = action[entry];
+		}
+	}
+
+	return 0;
+}
+
 static void npc_load_kpu_profile(struct rvu *rvu)
 {
 	struct npc_kpu_profile_adapter *profile = &rvu->kpu;
+	const char *kpu_profile = rvu->kpu_pfl_name;
+	const struct firmware *fw = NULL;
 
+	/* If user not specified profile customization */
+	if (!strncmp(kpu_profile, def_pfl_name, KPU_NAME_LEN))
+		goto revert_to_default;
+	/* First prepare default KPU, then we'll customize top entries. */
+	npc_prepare_default_kpu(profile);
+
+	dev_info(rvu->dev, "Loading KPU profile from firmware: %s\n",
+		 kpu_profile);
+	if (!request_firmware(&fw, kpu_profile, rvu->dev)) {
+		rvu->kpu_fwdata = kzalloc(fw->size, GFP_KERNEL);
+		if (rvu->kpu_fwdata) {
+			memcpy(rvu->kpu_fwdata, fw->data, fw->size);
+			rvu->kpu_fwdata_sz = fw->size;
+		}
+		release_firmware(fw);
+	}
+
+	/* Apply profile customization if firmware was loaded. */
+	if (!rvu->kpu_fwdata_sz || npc_apply_custom_kpu(rvu, profile)) {
+		dev_warn(rvu->dev,
+			 "Can't load KPU profile %s. Using default.\n",
+			 kpu_profile);
+		kfree(rvu->kpu_fwdata);
+		rvu->kpu_fwdata = NULL;
+		goto revert_to_default;
+	}
+
+	dev_info(rvu->dev, "Using custom profile '%s', version %d.%d.%d\n",
+		 profile->name, NPC_KPU_VER_MAJ(profile->version),
+		 NPC_KPU_VER_MIN(profile->version),
+		 NPC_KPU_VER_PATCH(profile->version));
+
+	return;
+
+revert_to_default:
 	npc_prepare_default_kpu(profile);
 }
 
@@ -1654,6 +1765,7 @@ void rvu_npc_freemem(struct rvu *rvu)
 
 	kfree(pkind->rsrc.bmap);
 	kfree(mcam->counters.bmap);
+	kfree(rvu->kpu_fwdata);
 	mutex_destroy(&mcam->lock);
 }
 

From 5d16250b605963b8b45bf824b1889a6b3d64c662 Mon Sep 17 00:00:00 2001
From: Harman Kalra <hkalra@marvell.com>
Date: Thu, 27 May 2021 15:14:36 +0530
Subject: [PATCH 0681/3292] octeontx2-af: load NPC profile via firmware
 database

Currently NPC profile (KPU + MKEX) can be loaded using firmware
binary in filesystem scheme. Enhancing the functionality to load
NPC profile image from system firmware database. It uses the same
technique as used for loading MKEX profile. Firstly firmware binary
in kernel is checked for a valid image else tries to load NPC profile
from firmware database and at last uses default profile if no proper
image found.

Signed-off-by: Harman Kalra <hkalra@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@marvell.com>
Signed-off-by: George Cherian <george.cherian@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |   1 +
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 117 +++++++++++++++---
 2 files changed, 101 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index fb142520e309..74ed929f101b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -442,6 +442,7 @@ struct rvu {
 	struct rvu_fwdata	*fwdata;
 	void			*kpu_fwdata;
 	size_t			kpu_fwdata_sz;
+	void __iomem		*kpu_prfl_addr;
 
 	/* NPC KPU data */
 	struct npc_kpu_profile_adapter kpu;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 254b768155cd..91e9467c5f69 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1134,6 +1134,30 @@ static void npc_program_mkex_profile(struct rvu *rvu, int blkaddr,
 	}
 }
 
+static int npc_fwdb_prfl_img_map(struct rvu *rvu, void __iomem **prfl_img_addr,
+				 u64 *size)
+{
+	u64 prfl_addr, prfl_sz;
+
+	if (!rvu->fwdata)
+		return -EINVAL;
+
+	prfl_addr = rvu->fwdata->mcam_addr;
+	prfl_sz = rvu->fwdata->mcam_sz;
+
+	if (!prfl_addr || !prfl_sz)
+		return -EINVAL;
+
+	*prfl_img_addr = ioremap_wc(prfl_addr, prfl_sz);
+	if (!(*prfl_img_addr))
+		return -ENOMEM;
+
+	*size = prfl_sz;
+
+	return 0;
+}
+
+/* strtoull of "mkexprof" with base:36 */
 #define MKEX_END_SIGN  0xdeadbeef
 
 static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr,
@@ -1141,27 +1165,21 @@ static void npc_load_mkex_profile(struct rvu *rvu, int blkaddr,
 {
 	struct device *dev = &rvu->pdev->dev;
 	struct npc_mcam_kex *mcam_kex;
-	void *mkex_prfl_addr = NULL;
-	u64 prfl_addr, prfl_sz;
+	void __iomem *mkex_prfl_addr = NULL;
+	u64 prfl_sz;
+	int ret;
 
 	/* If user not selected mkex profile */
 	if (rvu->kpu_fwdata_sz ||
 	    !strncmp(mkex_profile, def_pfl_name, MKEX_NAME_LEN))
 		goto program_mkex;
 
-	if (!rvu->fwdata)
-		goto program_mkex;
-	prfl_addr = rvu->fwdata->mcam_addr;
-	prfl_sz = rvu->fwdata->mcam_sz;
-
-	if (!prfl_addr || !prfl_sz)
+	/* Setting up the mapping for mkex profile image */
+	ret = npc_fwdb_prfl_img_map(rvu, &mkex_prfl_addr, &prfl_sz);
+	if (ret < 0)
 		goto program_mkex;
 
-	mkex_prfl_addr = memremap(prfl_addr, prfl_sz, MEMREMAP_WC);
-	if (!mkex_prfl_addr)
-		goto program_mkex;
-
-	mcam_kex = (struct npc_mcam_kex *)mkex_prfl_addr;
+	mcam_kex = (struct npc_mcam_kex __force *)mkex_prfl_addr;
 
 	while (((s64)prfl_sz > 0) && (mcam_kex->mkex_sign != MKEX_END_SIGN)) {
 		/* Compare with mkex mod_param name string */
@@ -1187,7 +1205,7 @@ program_mkex:
 	/* Program selected mkex profile */
 	npc_program_mkex_profile(rvu, blkaddr, rvu->kpu.mkex);
 	if (mkex_prfl_addr)
-		memunmap(mkex_prfl_addr);
+		iounmap(mkex_prfl_addr);
 }
 
 static void npc_config_kpuaction(struct rvu *rvu, int blkaddr,
@@ -1386,6 +1404,40 @@ static int npc_apply_custom_kpu(struct rvu *rvu,
 	return 0;
 }
 
+static int npc_load_kpu_profile_fwdb(struct rvu *rvu, const char *kpu_profile)
+{
+	struct npc_kpu_profile_fwdata *kpu_fw = NULL;
+	u64 prfl_sz;
+	int ret;
+
+	/* Setting up the mapping for NPC profile image */
+	ret = npc_fwdb_prfl_img_map(rvu, &rvu->kpu_prfl_addr, &prfl_sz);
+	if (ret < 0)
+		return ret;
+
+	rvu->kpu_fwdata =
+		(struct npc_kpu_profile_fwdata __force *)rvu->kpu_prfl_addr;
+	rvu->kpu_fwdata_sz = prfl_sz;
+
+	kpu_fw = rvu->kpu_fwdata;
+	if (le64_to_cpu(kpu_fw->signature) == KPU_SIGN &&
+	    !strncmp(kpu_fw->name, kpu_profile, KPU_NAME_LEN)) {
+		dev_info(rvu->dev, "Loading KPU profile from firmware db: %s\n",
+			 kpu_profile);
+		return 0;
+	}
+
+	/* Cleaning up if KPU profile image from fwdata is not valid. */
+	if (rvu->kpu_prfl_addr) {
+		iounmap(rvu->kpu_prfl_addr);
+		rvu->kpu_prfl_addr = NULL;
+		rvu->kpu_fwdata_sz = 0;
+		rvu->kpu_fwdata = NULL;
+	}
+
+	return -EINVAL;
+}
+
 static void npc_load_kpu_profile(struct rvu *rvu)
 {
 	struct npc_kpu_profile_adapter *profile = &rvu->kpu;
@@ -1398,19 +1450,47 @@ static void npc_load_kpu_profile(struct rvu *rvu)
 	/* First prepare default KPU, then we'll customize top entries. */
 	npc_prepare_default_kpu(profile);
 
-	dev_info(rvu->dev, "Loading KPU profile from firmware: %s\n",
-		 kpu_profile);
+	/* Order of preceedence for load loading NPC profile (high to low)
+	 * Firmware binary in filesystem.
+	 * Firmware database method.
+	 * Default KPU profile.
+	 */
 	if (!request_firmware(&fw, kpu_profile, rvu->dev)) {
+		dev_info(rvu->dev, "Loading KPU profile from firmware: %s\n",
+			 kpu_profile);
 		rvu->kpu_fwdata = kzalloc(fw->size, GFP_KERNEL);
 		if (rvu->kpu_fwdata) {
 			memcpy(rvu->kpu_fwdata, fw->data, fw->size);
 			rvu->kpu_fwdata_sz = fw->size;
 		}
 		release_firmware(fw);
+		goto program_kpu;
 	}
 
+load_image_fwdb:
+	/* Loading the KPU profile using firmware database */
+	if (npc_load_kpu_profile_fwdb(rvu, kpu_profile))
+		goto revert_to_default;
+
+program_kpu:
 	/* Apply profile customization if firmware was loaded. */
 	if (!rvu->kpu_fwdata_sz || npc_apply_custom_kpu(rvu, profile)) {
+		/* If image from firmware filesystem fails to load or invalid
+		 * retry with firmware database method.
+		 */
+		if (rvu->kpu_fwdata || rvu->kpu_fwdata_sz) {
+			/* Loading image from firmware database failed. */
+			if (rvu->kpu_prfl_addr) {
+				iounmap(rvu->kpu_prfl_addr);
+				rvu->kpu_prfl_addr = NULL;
+			} else {
+				kfree(rvu->kpu_fwdata);
+			}
+			rvu->kpu_fwdata = NULL;
+			rvu->kpu_fwdata_sz = 0;
+			goto load_image_fwdb;
+		}
+
 		dev_warn(rvu->dev,
 			 "Can't load KPU profile %s. Using default.\n",
 			 kpu_profile);
@@ -1765,7 +1845,10 @@ void rvu_npc_freemem(struct rvu *rvu)
 
 	kfree(pkind->rsrc.bmap);
 	kfree(mcam->counters.bmap);
-	kfree(rvu->kpu_fwdata);
+	if (rvu->kpu_prfl_addr)
+		iounmap(rvu->kpu_prfl_addr);
+	else
+		kfree(rvu->kpu_fwdata);
 	mutex_destroy(&mcam->lock);
 }
 

From c87e6b1395792d25697927e2a565547ec7a62681 Mon Sep 17 00:00:00 2001
From: Harman Kalra <hkalra@marvell.com>
Date: Thu, 27 May 2021 15:14:37 +0530
Subject: [PATCH 0682/3292] octeontx2-af: adding new lt def registers support

CN10k introduces following new LT DEF registers:
1. APAD (alignment padding) LT DEF registers are
enhancement to existing apad calculation algorithm
where not just ipv4 and ipv6 but also other protocols
can be matched and required alignment can be added by NIX.

2. ET LT DEF register defines layer information in NPC_RESULT_S
to identify the Ethertype location in L2 header. Used for
Ethertype overwriting in inline IPsec flow.

This patch adds required structures and some header changes. Also
strict version check (based on minor field) is imposed to highlight
version mismatch between the kernel headers and KPU profile.

Signed-off-by: Harman Kalra <hkalra@marvell.com>
Signed-off-by: Jerin Jacob Kollanukkaran <jerinj@marvell.com>
Signed-off-by: Kiran Kumar Kokkilagadda <kirankumark@marvell.com>
Signed-off-by: George Cherian <george.cherian@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/marvell/octeontx2/af/npc.h   | 32 ++++++++++++++++-
 .../marvell/octeontx2/af/npc_profile.h        | 26 +++++++++++++-
 .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 34 +++++++++++++++++++
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 20 ++++++++++-
 .../ethernet/marvell/octeontx2/af/rvu_reg.h   |  4 ++-
 5 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 6579ad19f684..8114c5fb0c2c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -468,6 +468,29 @@ struct npc_lt_def_ipsec {
 	u8	spi_nz;
 };
 
+struct npc_lt_def_apad {
+	u8	ltype_mask;
+	u8	ltype_match;
+	u8	lid;
+	u8	valid;
+} __packed;
+
+struct npc_lt_def_color {
+	u8	ltype_mask;
+	u8	ltype_match;
+	u8	lid;
+	u8	noffset;
+	u8	offset;
+} __packed;
+
+struct npc_lt_def_et {
+	u8	ltype_mask;
+	u8	ltype_match;
+	u8	lid;
+	u8	valid;
+	u8	offset;
+} __packed;
+
 struct npc_lt_def_cfg {
 	struct npc_lt_def	rx_ol2;
 	struct npc_lt_def	rx_oip4;
@@ -485,7 +508,14 @@ struct npc_lt_def_cfg {
 	struct npc_lt_def	pck_oip4;
 	struct npc_lt_def	pck_oip6;
 	struct npc_lt_def	pck_iip4;
-};
+	struct npc_lt_def_apad	rx_apad0;
+	struct npc_lt_def_apad	rx_apad1;
+	struct npc_lt_def_color	rx_ovlan;
+	struct npc_lt_def_color	rx_ivlan;
+	struct npc_lt_def_color	rx_gen0_color;
+	struct npc_lt_def_color	rx_gen1_color;
+	struct npc_lt_def_et	rx_et[2];
+} __packed;
 
 /* Loadable KPU profile firmware data */
 struct npc_kpu_profile_fwdata {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index de3a60c12392..980435e5a00a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -11,7 +11,7 @@
 #ifndef NPC_PROFILE_H
 #define NPC_PROFILE_H
 
-#define NPC_KPU_PROFILE_VER	0x0000000100050000
+#define NPC_KPU_PROFILE_VER	0x0000000100060000
 #define NPC_KPU_VER_MAJ(ver)	((u16)(((ver) >> 32) & 0xFFFF))
 #define NPC_KPU_VER_MIN(ver)	((u16)(((ver) >> 16) & 0xFFFF))
 #define NPC_KPU_VER_PATCH(ver)	((u16)((ver) & 0xFFFF))
@@ -13480,6 +13480,30 @@ static const struct npc_lt_def_cfg npc_lt_defaults = {
 			.ltype_match = NPC_LT_LG_TU_IP,
 			.ltype_mask = 0x0F,
 	},
+	.rx_apad0 = {
+		.valid = 0,
+		.lid = NPC_LID_LC,
+		.ltype_match = NPC_LT_LC_IP6,
+		.ltype_mask = 0x0F,
+	},
+	.rx_apad1 = {
+		.valid = 0,
+		.lid = NPC_LID_LC,
+		.ltype_match = NPC_LT_LC_IP6,
+		.ltype_mask = 0x0F,
+	},
+	.rx_et = {
+		{
+			.lid = NPC_LID_LB,
+			.ltype_match = NPC_LT_NA,
+			.ltype_mask = 0x0,
+		},
+		{
+			.lid = NPC_LID_LB,
+			.ltype_match = NPC_LT_NA,
+			.ltype_mask = 0x0,
+		},
+	},
 };
 
 static struct npc_mcam_kex npc_mkex_default = {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 0a8bd667cb11..174ef09f9069 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3523,6 +3523,40 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
 			    (ltdefs->rx_isctp.lid << 8) | (ltdefs->rx_isctp.ltype_match << 4) |
 			    ltdefs->rx_isctp.ltype_mask);
 
+		if (!is_rvu_otx2(rvu)) {
+			/* Enable APAD calculation for other protocols
+			 * matching APAD0 and APAD1 lt def registers.
+			 */
+			rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_CST_APAD0,
+				    (ltdefs->rx_apad0.valid << 11) |
+				    (ltdefs->rx_apad0.lid << 8) |
+				    (ltdefs->rx_apad0.ltype_match << 4) |
+				    ltdefs->rx_apad0.ltype_mask);
+			rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_CST_APAD1,
+				    (ltdefs->rx_apad1.valid << 11) |
+				    (ltdefs->rx_apad1.lid << 8) |
+				    (ltdefs->rx_apad1.ltype_match << 4) |
+				    ltdefs->rx_apad1.ltype_mask);
+
+			/* Receive ethertype defination register defines layer
+			 * information in NPC_RESULT_S to identify the Ethertype
+			 * location in L2 header. Used for Ethertype overwriting
+			 * in inline IPsec flow.
+			 */
+			rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_ET(0),
+				    (ltdefs->rx_et[0].offset << 12) |
+				    (ltdefs->rx_et[0].valid << 11) |
+				    (ltdefs->rx_et[0].lid << 8) |
+				    (ltdefs->rx_et[0].ltype_match << 4) |
+				    ltdefs->rx_et[0].ltype_mask);
+			rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_ET(1),
+				    (ltdefs->rx_et[1].offset << 12) |
+				    (ltdefs->rx_et[1].valid << 11) |
+				    (ltdefs->rx_et[1].lid << 8) |
+				    (ltdefs->rx_et[1].ltype_match << 4) |
+				    ltdefs->rx_et[1].ltype_mask);
+		}
+
 		err = nix_rx_flowkey_alg_cfg(rvu, blkaddr);
 		if (err)
 			return err;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 91e9467c5f69..52ee58ce9339 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1365,6 +1365,19 @@ static int npc_apply_custom_kpu(struct rvu *rvu,
 			 NPC_KPU_VER_MAJ(NPC_KPU_PROFILE_VER));
 		return -EINVAL;
 	}
+	/* Verify if profile is aligned with the required kernel changes */
+	if (NPC_KPU_VER_MIN(profile->version) <
+	    NPC_KPU_VER_MIN(NPC_KPU_PROFILE_VER)) {
+		dev_warn(rvu->dev,
+			 "Invalid KPU profile version: %d.%d.%d expected vesion <= %d.%d.%d\n",
+			 NPC_KPU_VER_MAJ(profile->version),
+			 NPC_KPU_VER_MIN(profile->version),
+			 NPC_KPU_VER_PATCH(profile->version),
+			 NPC_KPU_VER_MAJ(NPC_KPU_PROFILE_VER),
+			 NPC_KPU_VER_MIN(NPC_KPU_PROFILE_VER),
+			 NPC_KPU_VER_PATCH(NPC_KPU_PROFILE_VER));
+		return -EINVAL;
+	}
 	/* Verify if profile fits the HW */
 	if (fw->kpus > profile->kpus) {
 		dev_warn(rvu->dev, "Not enough KPUs: %d > %ld\n", fw->kpus,
@@ -1443,6 +1456,7 @@ static void npc_load_kpu_profile(struct rvu *rvu)
 	struct npc_kpu_profile_adapter *profile = &rvu->kpu;
 	const char *kpu_profile = rvu->kpu_pfl_name;
 	const struct firmware *fw = NULL;
+	bool retry_fwdb = false;
 
 	/* If user not specified profile customization */
 	if (!strncmp(kpu_profile, def_pfl_name, KPU_NAME_LEN))
@@ -1464,6 +1478,7 @@ static void npc_load_kpu_profile(struct rvu *rvu)
 			rvu->kpu_fwdata_sz = fw->size;
 		}
 		release_firmware(fw);
+		retry_fwdb = true;
 		goto program_kpu;
 	}
 
@@ -1488,7 +1503,10 @@ program_kpu:
 			}
 			rvu->kpu_fwdata = NULL;
 			rvu->kpu_fwdata_sz = 0;
-			goto load_image_fwdb;
+			if (retry_fwdb) {
+				retry_fwdb = false;
+				goto load_image_fwdb;
+			}
 		}
 
 		dev_warn(rvu->dev,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index ac71c0f2f960..ce365ae80352 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -208,7 +208,7 @@
 #define NIX_AF_RVU_INT_ENA_W1S		(0x01D0)
 #define NIX_AF_RVU_INT_ENA_W1C		(0x01D8)
 #define NIX_AF_TCP_TIMER		(0x01E0)
-#define NIX_AF_RX_WQE_TAG_CTL		(0x01F0)
+#define NIX_AF_RX_DEF_ET(a)		(0x01F0ull | (uint64_t)(a) << 3)
 #define NIX_AF_RX_DEF_OL2		(0x0200)
 #define NIX_AF_RX_DEF_OIP4		(0x0210)
 #define NIX_AF_RX_DEF_IIP4		(0x0220)
@@ -219,8 +219,10 @@
 #define NIX_AF_RX_DEF_OUDP		(0x0270)
 #define NIX_AF_RX_DEF_IUDP		(0x0280)
 #define NIX_AF_RX_DEF_OSCTP		(0x0290)
+#define NIX_AF_RX_DEF_CST_APAD0		(0x0298)
 #define NIX_AF_RX_DEF_ISCTP		(0x02A0)
 #define NIX_AF_RX_DEF_IPSECX		(0x02B0)
+#define NIX_AF_RX_DEF_CST_APAD1		(0x02A8)
 #define NIX_AF_RX_IPSEC_GEN_CFG		(0x0300)
 #define NIX_AF_RX_CPTX_INST_ADDR	(0x0310)
 #define NIX_AF_NDC_TX_SYNC		(0x03F0)

From 11c730bfbf5b9eecdf0de1267314ab3e5ea4d896 Mon Sep 17 00:00:00 2001
From: Harman Kalra <hkalra@marvell.com>
Date: Thu, 27 May 2021 15:14:38 +0530
Subject: [PATCH 0683/3292] octeontx2-af: support for coalescing KPU profiles

Adding support to load a new type of KPU image, known as coalesced/
consolidated KPU image via firmware database. This image is a
consolidation of multiple KPU profiles into a single image.

During kernel bootup this coalesced image will be read via
firmware database and only the relevant KPU profile will be loaded.
Existing functionality of loading single KPU/MKEX profile
is intact as the images are differentiated based on the image signature.

Signed-off-by: Harman Kalra <hkalra@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@marvell.com>
Signed-off-by: George Cherian <george.cherian@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/marvell/octeontx2/af/npc.h   | 11 +++
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 83 +++++++++++++++----
 2 files changed, 79 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 8114c5fb0c2c..8afa1c6691f6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -427,6 +427,17 @@ struct nix_tx_action {
 #define NIXLF_BCAST_ENTRY	1
 #define NIXLF_PROMISC_ENTRY	2
 
+struct npc_coalesced_kpu_prfl {
+#define NPC_SIGN	0x00666f727063706e
+#define NPC_PRFL_NAME   "npc_prfls_array"
+#define NPC_NAME_LEN	32
+	__le64 signature; /* "npcprof\0" (8 bytes/ASCII characters) */
+	u8 name[NPC_NAME_LEN]; /* KPU Profile name */
+	u64 version; /* KPU firmware/profile version */
+	u8 num_prfl; /* No of NPC profiles. */
+	u16 prfl_sz[0];
+};
+
 struct npc_mcam_kex {
 	/* MKEX Profle Header */
 	u64 mkex_sign; /* "mcam-kex-profile" (8 bytes/ASCII characters) */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 52ee58ce9339..bd63305ba6d2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -27,6 +27,8 @@
 #define NPC_KEX_CHAN_MASK		0xFFFULL
 #define NPC_KEX_PF_FUNC_MASK		0xFFFFULL
 
+#define ALIGN_8B_CEIL(__a)	(((__a) + 7) & (-8))
+
 static const char def_pfl_name[] = "default";
 
 static void npc_mcam_free_all_entries(struct rvu *rvu, struct npc_mcam *mcam,
@@ -1417,28 +1419,78 @@ static int npc_apply_custom_kpu(struct rvu *rvu,
 	return 0;
 }
 
+static int npc_load_kpu_prfl_img(struct rvu *rvu, void __iomem *prfl_addr,
+				 u64 prfl_sz, const char *kpu_profile)
+{
+	struct npc_kpu_profile_fwdata *kpu_data = NULL;
+	int rc = -EINVAL;
+
+	kpu_data = (struct npc_kpu_profile_fwdata __force *)prfl_addr;
+	if (le64_to_cpu(kpu_data->signature) == KPU_SIGN &&
+	    !strncmp(kpu_data->name, kpu_profile, KPU_NAME_LEN)) {
+		dev_info(rvu->dev, "Loading KPU profile from firmware db: %s\n",
+			 kpu_profile);
+		rvu->kpu_fwdata = kpu_data;
+		rvu->kpu_fwdata_sz = prfl_sz;
+		rvu->kpu_prfl_addr = prfl_addr;
+		rc = 0;
+	}
+
+	return rc;
+}
+
+static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz,
+					 const char *kpu_profile)
+{
+	struct npc_coalesced_kpu_prfl *img_data = NULL;
+	int i = 0, rc = -EINVAL;
+	void __iomem *kpu_prfl_addr;
+	u16 offset;
+
+	img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
+	if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
+	    !strncmp(img_data->name, kpu_profile, KPU_NAME_LEN)) {
+		/* Loaded profile is a single KPU profile. */
+		rc = npc_load_kpu_prfl_img(rvu, rvu->kpu_prfl_addr,
+					   prfl_sz, kpu_profile);
+		goto done;
+	}
+
+	/* Loaded profile is coalesced image, offset of first KPU profile.*/
+	offset = offsetof(struct npc_coalesced_kpu_prfl, prfl_sz) +
+		(img_data->num_prfl * sizeof(uint16_t));
+	/* Check if mapped image is coalesced image. */
+	while (i < img_data->num_prfl) {
+		/* Profile image offsets are rounded up to next 8 multiple.*/
+		offset = ALIGN_8B_CEIL(offset);
+		kpu_prfl_addr = (void __iomem *)((uintptr_t)rvu->kpu_prfl_addr +
+					 offset);
+		rc = npc_load_kpu_prfl_img(rvu, kpu_prfl_addr,
+					   img_data->prfl_sz[i], kpu_profile);
+		if (!rc)
+			break;
+		/* Calculating offset of profile image based on profile size.*/
+		offset += img_data->prfl_sz[i];
+		i++;
+	}
+done:
+	return rc;
+}
+
 static int npc_load_kpu_profile_fwdb(struct rvu *rvu, const char *kpu_profile)
 {
-	struct npc_kpu_profile_fwdata *kpu_fw = NULL;
+	int ret = -EINVAL;
 	u64 prfl_sz;
-	int ret;
 
 	/* Setting up the mapping for NPC profile image */
 	ret = npc_fwdb_prfl_img_map(rvu, &rvu->kpu_prfl_addr, &prfl_sz);
 	if (ret < 0)
-		return ret;
+		goto done;
 
-	rvu->kpu_fwdata =
-		(struct npc_kpu_profile_fwdata __force *)rvu->kpu_prfl_addr;
-	rvu->kpu_fwdata_sz = prfl_sz;
-
-	kpu_fw = rvu->kpu_fwdata;
-	if (le64_to_cpu(kpu_fw->signature) == KPU_SIGN &&
-	    !strncmp(kpu_fw->name, kpu_profile, KPU_NAME_LEN)) {
-		dev_info(rvu->dev, "Loading KPU profile from firmware db: %s\n",
-			 kpu_profile);
-		return 0;
-	}
+	/* Detect if profile is coalesced or single KPU profile and load */
+	ret = npc_fwdb_detect_load_prfl_img(rvu, prfl_sz, kpu_profile);
+	if (ret == 0)
+		goto done;
 
 	/* Cleaning up if KPU profile image from fwdata is not valid. */
 	if (rvu->kpu_prfl_addr) {
@@ -1448,7 +1500,8 @@ static int npc_load_kpu_profile_fwdb(struct rvu *rvu, const char *kpu_profile)
 		rvu->kpu_fwdata = NULL;
 	}
 
-	return -EINVAL;
+done:
+	return ret;
 }
 
 static void npc_load_kpu_profile(struct rvu *rvu)

From f9c49be90c0536c7f51dfab788639586e0ccc2a8 Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@marvell.com>
Date: Thu, 27 May 2021 15:14:39 +0530
Subject: [PATCH 0684/3292] octeontx2-af: Update the default KPU profile and
 fixes

Add support for parsing following
 1. NGIO
 2. PPPOE
 3. 24 byte custom L2 header
 4. CPT Header
 5. Fragmented CPT packets
 6. VLAN EXDSA

Fix for
 1. EDSA VLAN parsing
 2. Enhance FDSA
 3. CPT Header parsing

Remove ITAG support

Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@marvell.com>
Signed-off-by: Harman Kalra <hkalra@marvell.com>
Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
Signed-off-by: George Cherian <george.cherian@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/marvell/octeontx2/af/npc.h   |   21 +-
 .../marvell/octeontx2/af/npc_profile.h        | 3753 +++++++++++++----
 2 files changed, 2917 insertions(+), 857 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 8afa1c6691f6..fe19704173a1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -33,6 +33,10 @@ enum npc_kpu_la_ltype {
 	NPC_LT_LA_IH_2_ETHER,
 	NPC_LT_LA_HIGIG2_ETHER,
 	NPC_LT_LA_IH_NIX_HIGIG2_ETHER,
+	NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+	NPC_LT_LA_CH_LEN_90B_ETHER,
+	NPC_LT_LA_CPT_HDR,
+	NPC_LT_LA_CUSTOM_L2_24B_ETHER,
 	NPC_LT_LA_CUSTOM0 = 0xE,
 	NPC_LT_LA_CUSTOM1 = 0xF,
 };
@@ -42,7 +46,7 @@ enum npc_kpu_lb_ltype {
 	NPC_LT_LB_CTAG,
 	NPC_LT_LB_STAG_QINQ,
 	NPC_LT_LB_BTAG,
-	NPC_LT_LB_ITAG,
+	NPC_LT_LB_PPPOE,
 	NPC_LT_LB_DSA,
 	NPC_LT_LB_DSA_VLAN,
 	NPC_LT_LB_EDSA,
@@ -50,6 +54,7 @@ enum npc_kpu_lb_ltype {
 	NPC_LT_LB_EXDSA,
 	NPC_LT_LB_EXDSA_VLAN,
 	NPC_LT_LB_FDSA,
+	NPC_LT_LB_VLAN_EXDSA,
 	NPC_LT_LB_CUSTOM0 = 0xE,
 	NPC_LT_LB_CUSTOM1 = 0xF,
 };
@@ -65,6 +70,7 @@ enum npc_kpu_lc_ltype {
 	NPC_LT_LC_NSH,
 	NPC_LT_LC_PTP,
 	NPC_LT_LC_FCOE,
+	NPC_LT_LC_NGIO,
 	NPC_LT_LC_CUSTOM0 = 0xE,
 	NPC_LT_LC_CUSTOM1 = 0xF,
 };
@@ -146,7 +152,14 @@ enum npc_kpu_lh_ltype {
  * Ethernet interfaces, LBK interfaces, etc.
  */
 enum npc_pkind_type {
-	NPC_TX_DEF_PKIND = 63ULL,	/* NIX-TX PKIND */
+	NPC_RX_VLAN_EXDSA_PKIND = 56ULL,
+	NPC_RX_CHLEN24B_PKIND = 57ULL,
+	NPC_RX_CPT_HDR_PKIND,
+	NPC_RX_CHLEN90B_PKIND,
+	NPC_TX_HIGIG_PKIND,
+	NPC_RX_HIGIG_PKIND,
+	NPC_RX_EDSA_PKIND,
+	NPC_TX_DEF_PKIND,	/* NIX-TX PKIND */
 };
 
 /* list of known and supported fields in packet header and
@@ -521,8 +534,8 @@ struct npc_lt_def_cfg {
 	struct npc_lt_def	pck_iip4;
 	struct npc_lt_def_apad	rx_apad0;
 	struct npc_lt_def_apad	rx_apad1;
-	struct npc_lt_def_color	rx_ovlan;
-	struct npc_lt_def_color	rx_ivlan;
+	struct npc_lt_def_color	ovlan;
+	struct npc_lt_def_color	ivlan;
 	struct npc_lt_def_color	rx_gen0_color;
 	struct npc_lt_def_color	rx_gen1_color;
 	struct npc_lt_def_et	rx_et[2];
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index 980435e5a00a..fee655cc7523 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -23,6 +23,7 @@
 #define NPC_ETYPE_IP6		0x86dd
 #define NPC_ETYPE_ARP		0x0806
 #define NPC_ETYPE_RARP		0x8035
+#define NPC_ETYPE_NGIO		0x8842
 #define NPC_ETYPE_MPLSU		0x8847
 #define NPC_ETYPE_MPLSM		0x8848
 #define NPC_ETYPE_ETAG		0x893f
@@ -36,6 +37,10 @@
 #define NPC_ETYPE_PPP		0x880b
 #define NPC_ETYPE_NSH		0x894f
 #define NPC_ETYPE_DSA		0xdada
+#define NPC_ETYPE_PPPOE		0x8864
+
+#define NPC_PPP_IP		0x0021
+#define NPC_PPP_IP6		0x0057
 
 #define NPC_IPNH_HOP		0
 #define NPC_IPNH_ICMP		1
@@ -145,14 +150,15 @@
 #define NPC_DSA_EDSA		0x8000
 #define NPC_DSA_FDSA		0xc000
 
-#define NPC_KEXOF_DMAC	8
-#define MKEX_SIGN	0x19bbfdbd15f /* strtoull of "mkexprof" with base:36 */
+#define NPC_KEXOF_DMAC	9
+#define MKEX_SIGN      0x19bbfdbd15f
 #define KEX_LD_CFG(bytesm1, hdr_ofs, ena, flags_ena, key_ofs)		\
 			(((bytesm1) << 16) | ((hdr_ofs) << 8) | ((ena) << 7) | \
 			 ((flags_ena) << 6) | ((key_ofs) & 0x3F))
 
 /* Rx parse key extract nibble enable */
 #define NPC_PARSE_NIBBLE_INTF_RX	(NPC_PARSE_NIBBLE_CHAN | \
+					 NPC_PARSE_NIBBLE_ERRCODE | \
 					 NPC_PARSE_NIBBLE_LA_LTYPE | \
 					 NPC_PARSE_NIBBLE_LB_LTYPE | \
 					 NPC_PARSE_NIBBLE_LC_LTYPE | \
@@ -173,25 +179,31 @@ enum npc_kpu_parser_state {
 	NPC_S_KPU1_EXDSA,
 	NPC_S_KPU1_HIGIG2,
 	NPC_S_KPU1_IH_NIX_HIGIG2,
+	NPC_S_KPU1_CUSTOM_L2_90B,
+	NPC_S_KPU1_CPT_HDR,
+	NPC_S_KPU1_CUSTOM_L2_24B,
+	NPC_S_KPU1_VLAN_EXDSA,
 	NPC_S_KPU2_CTAG,
 	NPC_S_KPU2_CTAG2,
 	NPC_S_KPU2_SBTAG,
 	NPC_S_KPU2_QINQ,
 	NPC_S_KPU2_ETAG,
-	NPC_S_KPU2_ITAG,
 	NPC_S_KPU2_PREHEADER,
 	NPC_S_KPU2_EXDSA,
+	NPC_S_KPU2_NGIO,
 	NPC_S_KPU3_CTAG,
 	NPC_S_KPU3_STAG,
 	NPC_S_KPU3_QINQ,
-	NPC_S_KPU3_ITAG,
 	NPC_S_KPU3_CTAG_C,
 	NPC_S_KPU3_STAG_C,
 	NPC_S_KPU3_QINQ_C,
 	NPC_S_KPU3_DSA,
+	NPC_S_KPU3_VLAN_EXDSA,
 	NPC_S_KPU4_MPLS,
 	NPC_S_KPU4_NSH,
 	NPC_S_KPU4_FDSA,
+	NPC_S_KPU4_VLAN_EXDSA,
+	NPC_S_KPU4_PPPOE,
 	NPC_S_KPU5_IP,
 	NPC_S_KPU5_IP6,
 	NPC_S_KPU5_ARP,
@@ -201,13 +213,19 @@ enum npc_kpu_parser_state {
 	NPC_S_KPU5_MPLS,
 	NPC_S_KPU5_MPLS_PL,
 	NPC_S_KPU5_NSH,
+	NPC_S_KPU5_CPT_IP,
+	NPC_S_KPU5_CPT_IP6,
 	NPC_S_KPU6_IP6_EXT,
 	NPC_S_KPU6_IP6_HOP_DEST,
 	NPC_S_KPU6_IP6_ROUT,
 	NPC_S_KPU6_IP6_FRAG,
+	NPC_S_KPU6_IP6_CPT_FRAG,
+	NPC_S_KPU6_IP6_CPT_HOP_DEST,
+	NPC_S_KPU6_IP6_CPT_ROUT,
 	NPC_S_KPU7_IP6_EXT,
 	NPC_S_KPU7_IP6_ROUT,
 	NPC_S_KPU7_IP6_FRAG,
+	NPC_S_KPU7_CPT_IP6_FRAG,
 	NPC_S_KPU8_TCP,
 	NPC_S_KPU8_UDP,
 	NPC_S_KPU8_SCTP,
@@ -268,7 +286,6 @@ enum npc_kpu_la_lflag {
 	NPC_F_LA_L_UNK_ETYPE = 1,
 	NPC_F_LA_L_WITH_VLAN,
 	NPC_F_LA_L_WITH_ETAG,
-	NPC_F_LA_L_WITH_ITAG,
 	NPC_F_LA_L_WITH_MPLS,
 	NPC_F_LA_L_WITH_NSH,
 };
@@ -974,7 +991,7 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		12, 16, 20, 0, 0,
-		NPC_S_KPU1_ETHER, 0, 0,
+		NPC_S_KPU1_VLAN_EXDSA, 0, 0,
 		NPC_LID_LA, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -982,8 +999,8 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		12, 16, 20, 0, 0,
-		NPC_S_KPU1_ETHER, 0, 0,
+		36, 40, 44, 0, 0,
+		NPC_S_KPU1_CUSTOM_L2_24B, 0, 0,
 		NPC_LID_LA, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -991,8 +1008,8 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		12, 16, 20, 0, 0,
-		NPC_S_KPU1_ETHER, 0, 0,
+		40, 54, 58, 0, 0,
+		NPC_S_KPU1_CPT_HDR, 0, 0,
 		NPC_LID_LA, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -1000,8 +1017,8 @@ static struct npc_kpu_profile_action ikpu_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		12, 16, 20, 0, 0,
-		NPC_S_KPU1_ETHER, 0, 0,
+		102, 106, 110, 0, 0,
+		NPC_S_KPU1_CUSTOM_L2_90B, 0, 0,
 		NPC_LID_LA, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -1102,6 +1119,15 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU1_ETHER, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_NGIO,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_KPU1_ETHER, 0xff,
 		NPC_ETYPE_CTAG,
@@ -1147,15 +1173,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
-	{
-		NPC_S_KPU1_ETHER, 0xff,
-		NPC_ETYPE_ITAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
 	{
 		NPC_S_KPU1_ETHER, 0xff,
 		NPC_ETYPE_MPLSU,
@@ -1192,6 +1209,15 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU1_ETHER, 0xff,
+		NPC_ETYPE_PPPOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_KPU1_ETHER, 0xff,
 		0x0000,
@@ -1318,15 +1344,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
-	{
-		NPC_S_KPU1_IH_NIX, 0xff,
-		NPC_ETYPE_ITAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
 	{
 		NPC_S_KPU1_IH_NIX, 0xff,
 		NPC_ETYPE_MPLSU,
@@ -1365,8 +1382,8 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 	},
 	{
 		NPC_S_KPU1_IH, 0xff,
-		NPC_IH_W|NPC_IH_UTAG,
-		NPC_IH_W|NPC_IH_UTAG,
+		NPC_IH_W | NPC_IH_UTAG,
+		NPC_IH_W | NPC_IH_UTAG,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -1375,7 +1392,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 	{
 		NPC_S_KPU1_IH, 0xff,
 		NPC_IH_W,
-		NPC_IH_W|NPC_IH_UTAG,
+		NPC_IH_W | NPC_IH_UTAG,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -1384,7 +1401,7 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 	{
 		NPC_S_KPU1_IH, 0xff,
 		0x0000,
-		NPC_IH_W|NPC_IH_UTAG,
+		NPC_IH_W | NPC_IH_UTAG,
 		0x0000,
 		0x0000,
 		0x0000,
@@ -1525,15 +1542,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
-	{
-		NPC_S_KPU1_HIGIG2, 0xff,
-		NPC_ETYPE_ITAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
 	{
 		NPC_S_KPU1_HIGIG2, 0xff,
 		NPC_ETYPE_MPLSU,
@@ -1669,15 +1677,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
-	{
-		NPC_S_KPU1_IH_NIX_HIGIG2, 0xff,
-		NPC_ETYPE_ITAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
 	{
 		NPC_S_KPU1_IH_NIX_HIGIG2, 0xff,
 		NPC_ETYPE_MPLSU,
@@ -1714,6 +1713,366 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_IP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_IP6,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_ARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_RARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_PTP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_FCOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_SBTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_QINQ,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_ETAG,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_MPLSU,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_MPLSM,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		NPC_ETYPE_NSH,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_90B, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		NPC_ETYPE_IP,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		NPC_ETYPE_IP6,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		NPC_ETYPE_QINQ,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_ETYPE_IP,
+		0xffff,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_ETYPE_IP6,
+		0xffff,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_ETYPE_CTAG,
+		0xffff,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_ETYPE_QINQ,
+		0xffff,
+	},
+	{
+		NPC_S_KPU1_CPT_HDR, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_IP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_IP6,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_ARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_RARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_PTP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_FCOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_SBTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_QINQ,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_ETAG,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_MPLSU,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_MPLSM,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		NPC_ETYPE_NSH,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_CUSTOM_L2_24B, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU1_VLAN_EXDSA, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_NA, 0X00,
 		0x0000,
@@ -1809,6 +2168,24 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU2_CTAG, 0xff,
+		NPC_ETYPE_PPPOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_PPP_IP,
+		0xffff,
+	},
+	{
+		NPC_S_KPU2_CTAG, 0xff,
+		NPC_ETYPE_PPPOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_PPP_IP6,
+		0xffff,
+	},
 	{
 		NPC_S_KPU2_CTAG, 0xff,
 		0x0000,
@@ -2250,15 +2627,6 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
-	{
-		NPC_S_KPU2_ETAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_ITAG,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
 	{
 		NPC_S_KPU2_ETAG, 0xff,
 		NPC_ETYPE_SBTAG,
@@ -2340,159 +2708,6 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_IP,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_IP6,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_ARP,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_RARP,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP,
-		0xffff,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP6,
-		0xffff,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_ARP,
-		0xffff,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_IP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_IP6,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_ARP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP6,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_ARP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU2_ITAG, 0xff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
 	{
 		NPC_S_KPU2_CTAG2, 0xff,
 		NPC_ETYPE_IP,
@@ -2844,6 +3059,15 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU2_NGIO, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_NA, 0X00,
 		0x0000,
@@ -3272,159 +3496,6 @@ static struct npc_kpu_profile_cam kpu3_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_IP,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_IP6,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_ARP,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_RARP,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP,
-		0xffff,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP6,
-		0xffff,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_ARP,
-		0xffff,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_IP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_IP6,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_ARP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_SBTAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_IP6,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		NPC_ETYPE_ARP,
-		0xffff,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		NPC_ETYPE_CTAG,
-		0xffff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
-	{
-		NPC_S_KPU3_ITAG, 0xff,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-		0x0000,
-	},
 	{
 		NPC_S_KPU3_CTAG_C, 0xff,
 		NPC_ETYPE_IP,
@@ -3965,6 +4036,15 @@ static struct npc_kpu_profile_cam kpu3_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU3_VLAN_EXDSA, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_NA, 0X00,
 		0x0000,
@@ -4114,6 +4194,78 @@ static struct npc_kpu_profile_cam kpu4_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_IP,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_IP6,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_ARP,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_RARP,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_PTP,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_CTAG,
+		0xffff,
+		NPC_ETYPE_FCOE,
+		0xffff,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_PPPOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_PPP_IP,
+		0xffff,
+	},
+	{
+		NPC_S_KPU4_FDSA, 0xff,
+		NPC_ETYPE_PPPOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		NPC_PPP_IP6,
+		0xffff,
+	},
 	{
 		NPC_S_KPU4_FDSA, 0xff,
 		0x0000,
@@ -4123,6 +4275,87 @@ static struct npc_kpu_profile_cam kpu4_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU4_VLAN_EXDSA, 0xff,
+		NPC_ETYPE_IP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_VLAN_EXDSA, 0xff,
+		NPC_ETYPE_IP6,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_VLAN_EXDSA, 0xff,
+		NPC_ETYPE_ARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_VLAN_EXDSA, 0xff,
+		NPC_ETYPE_RARP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_VLAN_EXDSA, 0xff,
+		NPC_ETYPE_PTP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_VLAN_EXDSA, 0xff,
+		NPC_ETYPE_FCOE,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_VLAN_EXDSA, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_PPPOE, 0xff,
+		NPC_PPP_IP,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU4_PPPOE, 0xff,
+		NPC_PPP_IP6,
+		0xffff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_NA, 0X00,
 		0x0000,
@@ -4159,116 +4392,116 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_TCP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_UDP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_SCTP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_ICMP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_IGMP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_ESP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_AH,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_GRE,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_IP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_IP6,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		NPC_IPNH_MPLS,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -4279,7 +4512,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4288,7 +4521,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4297,7 +4530,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4306,7 +4539,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4315,7 +4548,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4324,7 +4557,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4333,7 +4566,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4342,7 +4575,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4351,7 +4584,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4360,7 +4593,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4369,7 +4602,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4378,7 +4611,7 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		NPC_IP_VER_4,
 		NPC_IP_VER_MASK,
 		0x0000,
-		NPC_IP_HDR_MF|NPC_IP_HDR_FRAGOFF,
+		NPC_IP_HDR_MF | NPC_IP_HDR_FRAGOFF,
 	},
 	{
 		NPC_S_KPU5_IP, 0xff,
@@ -4695,6 +4928,429 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		0x0000,
+		NPC_IP_TTL_MASK,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0001,
+		NPC_IP_HDR_FRAGOFF,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_TCP,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_UDP,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_SCTP,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_ICMP,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_IGMP,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_ESP,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_AH,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_GRE,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_IP,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_IP6,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_MPLS,
+		0x00ff,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		0x0000,
+		0x0000,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_TCP,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_UDP,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_SCTP,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_ICMP,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_IGMP,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_ESP,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_AH,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_GRE,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_IP,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_IP6,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		NPC_IPNH_MPLS,
+		0x00ff,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		0x0000,
+		0x0000,
+		NPC_IP_VER_4,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		0x0000,
+		NPC_IP6_HOP_MASK,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_TCP << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_UDP << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_SCTP << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_ICMP << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_ICMP6 << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_GRE << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_IP6 << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_MPLS << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_HOP << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_DEST << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_ROUT << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_FRAG << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_ESP << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_AH << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_MOBILITY << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_HOSTID << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		NPC_IPNH_SHIM6 << 8,
+		0xff00,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		0x0000,
+		0x0000,
+		NPC_IP_VER_6,
+		NPC_IP_VER_MASK,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU5_CPT_IP6, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_NA, 0X00,
 		0x0000,
@@ -5042,6 +5698,330 @@ static struct npc_kpu_profile_cam kpu6_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_TCP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_UDP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_SCTP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_ICMP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_ICMP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_ESP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_AH << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_GRE << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_IP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		NPC_IPNH_MPLS << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_FRAG, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_TCP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_UDP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_SCTP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_ICMP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_ICMP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_ESP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_AH << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_GRE << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_IP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_MPLS << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_ROUT << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		NPC_IPNH_FRAG << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_TCP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_UDP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_SCTP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_ICMP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_ICMP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_ESP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_AH << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_GRE << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_IP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_MPLS << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		NPC_IPNH_FRAG << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU6_IP6_CPT_ROUT, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_NA, 0X00,
 		0x0000,
@@ -5263,6 +6243,105 @@ static struct npc_kpu_profile_cam kpu7_cam_entries[] = {
 		0x0000,
 		0x0000,
 	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_TCP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_UDP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_SCTP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_ICMP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_ICMP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_ESP << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_AH << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_GRE << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_IP6 << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		NPC_IPNH_MPLS << 8,
+		0xff00,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
+	{
+		NPC_S_KPU7_CPT_IP6_FRAG, 0xff,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+		0x0000,
+	},
 	{
 		NPC_S_NA, 0X00,
 		0x0000,
@@ -5299,8 +6378,8 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_FIN,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_FIN,
 		0x0000,
 		0x0000,
 	},
@@ -5308,8 +6387,8 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_URG|NPC_TCP_FLAGS_SYN,
-		NPC_TCP_FLAGS_URG|NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_URG | NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_URG | NPC_TCP_FLAGS_SYN,
 		0x0000,
 		0x0000,
 	},
@@ -5317,8 +6396,8 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_SYN,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_SYN,
 		0x0000,
 		0x0000,
 	},
@@ -5326,8 +6405,8 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_SYN|NPC_TCP_FLAGS_FIN,
-		NPC_TCP_FLAGS_SYN|NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_SYN | NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_SYN | NPC_TCP_FLAGS_FIN,
 		0x0000,
 		0x0000,
 	},
@@ -5605,7 +6684,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSU,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5614,7 +6693,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSU,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5623,7 +6702,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSU,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5632,7 +6711,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSU,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5677,7 +6756,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSM,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5686,7 +6765,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSM,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5695,7 +6774,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSM,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5704,7 +6783,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_MPLSM,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5749,7 +6828,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_NSH,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5758,7 +6837,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_NSH,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5767,7 +6846,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_NSH,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5776,7 +6855,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_NSH,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5821,7 +6900,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5830,7 +6909,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5839,7 +6918,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5848,7 +6927,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5893,7 +6972,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP6,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5902,7 +6981,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP6,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5911,7 +6990,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP6,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5920,7 +6999,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_IP6,
 		0xffff,
-		NPC_GRE_F_CSUM|NPC_GRE_F_KEY|NPC_GRE_F_SEQ,
+		NPC_GRE_F_CSUM | NPC_GRE_F_KEY | NPC_GRE_F_SEQ,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5956,7 +7035,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_PPP,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_VER_1,
+		NPC_GRE_F_KEY | NPC_GRE_VER_1,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5965,7 +7044,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_PPP,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_SEQ|NPC_GRE_VER_1,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ | NPC_GRE_VER_1,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5974,7 +7053,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_PPP,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_ACK|NPC_GRE_VER_1,
+		NPC_GRE_F_KEY | NPC_GRE_F_ACK | NPC_GRE_VER_1,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -5983,7 +7062,7 @@ static struct npc_kpu_profile_cam kpu8_cam_entries[] = {
 		NPC_S_KPU8_GRE, 0xff,
 		NPC_ETYPE_PPP,
 		0xffff,
-		NPC_GRE_F_KEY|NPC_GRE_F_SEQ|NPC_GRE_F_ACK|NPC_GRE_VER_1,
+		NPC_GRE_F_KEY | NPC_GRE_F_SEQ | NPC_GRE_F_ACK | NPC_GRE_VER_1,
 		0xffff,
 		0x0000,
 		0x0000,
@@ -6429,8 +7508,8 @@ static struct npc_kpu_profile_cam kpu9_cam_entries[] = {
 		NPC_S_KPU9_GTPU, 0xff,
 		0x0000,
 		0x0000,
-		0x0000,
-		0x0000,
+		NPC_GTP_PT_GTP | NPC_GTP_VER1,
+		NPC_GTP_PT_MASK | NPC_GTP_VER_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -6975,8 +8054,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_TCP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -6984,8 +8063,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_UDP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -6993,8 +8072,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_SCTP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -7002,8 +8081,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_ICMP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -7011,8 +8090,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_IGMP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -7020,8 +8099,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_ESP,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -7029,8 +8108,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		NPC_IPNH_AH,
 		0x00ff,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -7038,8 +8117,8 @@ static struct npc_kpu_profile_cam kpu12_cam_entries[] = {
 		NPC_S_KPU12_TU_IP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_IP_VER_4|NPC_IP_HDR_LEN_5,
-		NPC_IP_VER_MASK|NPC_IP_HDR_LEN_MASK,
+		NPC_IP_VER_4 | NPC_IP_HDR_LEN_5,
+		NPC_IP_VER_MASK | NPC_IP_HDR_LEN_MASK,
 		0x0000,
 		0x0000,
 	},
@@ -7278,8 +8357,8 @@ static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
 		NPC_S_KPU15_TU_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_FIN,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_FIN,
 		0x0000,
 		0x0000,
 	},
@@ -7287,8 +8366,8 @@ static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
 		NPC_S_KPU15_TU_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_URG|NPC_TCP_FLAGS_SYN,
-		NPC_TCP_FLAGS_URG|NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_URG | NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_URG | NPC_TCP_FLAGS_SYN,
 		0x0000,
 		0x0000,
 	},
@@ -7296,8 +8375,8 @@ static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
 		NPC_S_KPU15_TU_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_SYN,
-		NPC_TCP_FLAGS_RST|NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_SYN,
+		NPC_TCP_FLAGS_RST | NPC_TCP_FLAGS_SYN,
 		0x0000,
 		0x0000,
 	},
@@ -7305,8 +8384,8 @@ static struct npc_kpu_profile_cam kpu15_cam_entries[] = {
 		NPC_S_KPU15_TU_TCP, 0xff,
 		0x0000,
 		0x0000,
-		NPC_TCP_FLAGS_SYN|NPC_TCP_FLAGS_FIN,
-		NPC_TCP_FLAGS_SYN|NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_SYN | NPC_TCP_FLAGS_FIN,
+		NPC_TCP_FLAGS_SYN | NPC_TCP_FLAGS_FIN,
 		0x0000,
 		0x0000,
 	},
@@ -7566,6 +8645,14 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 		0,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 12, 0, 0, 0,
+		NPC_S_KPU2_NGIO, 12, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 12, 0, 0, 0,
@@ -7576,7 +8663,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		4, 8, 0, 0, 0,
+		4, 8, 12, 0, 0,
 		NPC_S_KPU2_CTAG, 12, 1,
 		NPC_LID_LA, NPC_LT_LA_ETHER,
 		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
@@ -7606,14 +8693,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ETAG,
 		0, 0, 0, 0,
 	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		18, 22, 26, 0, 0,
-		NPC_S_KPU2_ITAG, 12, 1,
-		NPC_LID_LA, NPC_LT_LA_ETHER,
-		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ITAG,
-		0, 0, 0, 0,
-	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 6, 10, 2, 0,
@@ -7646,6 +8725,14 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 		0,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 12, 0, 2, 0,
+		NPC_S_KPU4_PPPOE, 12, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -7763,15 +8850,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 			| NPC_F_LA_L_WITH_ETAG,
 		0, 0, 0, 0,
 	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		18, 22, 26, 0, 0,
-		NPC_S_KPU2_ITAG, 20, 1,
-		NPC_LID_LA, NPC_LT_LA_IH_NIX_ETHER,
-		NPC_F_LA_U_HAS_IH_NIX | NPC_F_LA_U_HAS_TAG
-			| NPC_F_LA_L_WITH_ITAG,
-		0, 0, 0, 0,
-	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 6, 10, 2, 0,
@@ -7846,7 +8924,7 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		4, 8, 16, 2, 0,
+		4, 8, 12, 2, 0,
 		NPC_S_KPU4_FDSA, 12, 1,
 		NPC_LID_LA, NPC_LT_LA_ETHER,
 		0,
@@ -7953,15 +9031,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 			| NPC_F_LA_L_WITH_ETAG,
 		0, 0, 0, 0,
 	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		18, 22, 26, 0, 0,
-		NPC_S_KPU2_ITAG, 28, 1,
-		NPC_LID_LA, NPC_LT_LA_HIGIG2_ETHER,
-		NPC_F_LA_U_HAS_HIGIG2 | NPC_F_LA_U_HAS_TAG
-			| NPC_F_LA_L_WITH_ITAG,
-		0, 0, 0, 0,
-	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 6, 10, 2, 0,
@@ -8087,15 +9156,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 			| NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ETAG,
 		0, 0, 0, 0,
 	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		18, 22, 26, 0, 0,
-		NPC_S_KPU2_ITAG, 36, 1,
-		NPC_LID_LA, NPC_LT_LA_IH_NIX_HIGIG2_ETHER,
-		NPC_F_LA_U_HAS_IH_NIX | NPC_F_LA_U_HAS_HIGIG2
-			| NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ITAG,
-		0, 0, 0, 0,
-	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 6, 10, 2, 0,
@@ -8132,6 +9192,326 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = {
 			| NPC_F_LA_L_UNK_ETYPE,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 3, 0,
+		NPC_S_KPU5_IP, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 3, 0,
+		NPC_S_KPU5_IP6, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_ARP, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_RARP, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_PTP, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_FCOE, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 12, 0, 0, 0,
+		NPC_S_KPU2_CTAG2, 102, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_CTAG, 102, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 22, 0, 0,
+		NPC_S_KPU2_SBTAG, 102, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_QINQ, 102, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 12, 26, 0, 0,
+		NPC_S_KPU2_ETAG, 102, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ETAG,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 2, 0,
+		NPC_S_KPU4_MPLS, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_L_WITH_MPLS,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 2, 0,
+		NPC_S_KPU4_MPLS, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_L_WITH_MPLS,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 0, 0, 2, 0,
+		NPC_S_KPU4_NSH, 104, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_L_WITH_NSH,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_90B_ETHER,
+		NPC_F_LA_L_UNK_ETYPE,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 3, 0,
+		NPC_S_KPU5_CPT_IP, 56, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 3, 0,
+		NPC_S_KPU5_CPT_IP6, 56, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_CTAG, 54, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_QINQ, 54, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 3, 0,
+		NPC_S_KPU5_CPT_IP, 60, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 3, 0,
+		NPC_S_KPU5_CPT_IP6, 60, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_CTAG, 58, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_QINQ, 58, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LA, NPC_LT_LA_CPT_HDR,
+		NPC_F_LA_L_UNK_ETYPE,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 3, 0,
+		NPC_S_KPU5_IP, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 3, 0,
+		NPC_S_KPU5_IP6, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_ARP, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_RARP, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_PTP, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU5_FCOE, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 12, 0, 0, 0,
+		NPC_S_KPU2_CTAG2, 36, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_CTAG, 36, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 22, 0, 0,
+		NPC_S_KPU2_SBTAG, 36, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		4, 8, 0, 0, 0,
+		NPC_S_KPU2_QINQ, 36, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_VLAN,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 12, 26, 0, 0,
+		NPC_S_KPU2_ETAG, 36, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_U_HAS_TAG | NPC_F_LA_L_WITH_ETAG,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 2, 0,
+		NPC_S_KPU4_MPLS, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_L_WITH_MPLS,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 2, 0,
+		NPC_S_KPU4_MPLS, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_L_WITH_MPLS,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 0, 0, 2, 0,
+		NPC_S_KPU4_NSH, 38, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_L_WITH_NSH,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LA, NPC_LT_LA_CUSTOM_L2_24B_ETHER,
+		NPC_F_LA_L_UNK_ETYPE,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		12, 0, 0, 1, 0,
+		NPC_S_KPU3_VLAN_EXDSA, 12, 1,
+		NPC_LID_LA, NPC_LT_LA_ETHER,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_LA, NPC_EC_L2_K1,
 		0, 0, 0, 0, 1,
@@ -8217,6 +9597,22 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 2, 0,
+		NPC_S_KPU5_IP, 14, 1,
+		NPC_LID_LB, NPC_LT_LB_PPPOE,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 2, 0,
+		NPC_S_KPU5_IP6, 14, 1,
+		NPC_LID_LB, NPC_LT_LB_PPPOE,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -8230,7 +9626,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		8, 0, 6, 2, 0,
 		NPC_S_KPU5_IP, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8238,7 +9634,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		6, 0, 0, 2, 0,
 		NPC_S_KPU5_IP6, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8246,7 +9642,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_ARP, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8254,7 +9650,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_RARP, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8262,7 +9658,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_PTP, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8270,7 +9666,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_FCOE, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8278,7 +9674,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 10, 1, 0,
 		NPC_S_KPU4_MPLS, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8286,7 +9682,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 10, 1, 0,
 		NPC_S_KPU4_MPLS, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8294,7 +9690,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 0, 0, 1, 0,
 		NPC_S_KPU4_NSH, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8302,7 +9698,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 0, 1,
 		NPC_S_NA, 0, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG_UNK,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG_UNK,
 		0, 0, 0, 0,
 	},
 	{
@@ -8310,7 +9706,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 0, 0, 0,
 		NPC_S_KPU3_CTAG, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_STAG_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8318,7 +9714,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 0, 0, 0,
 		NPC_S_KPU3_STAG, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_STAG_QINQ,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG_STAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_STAG_STAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8326,7 +9722,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		8, 0, 6, 2, 0,
 		NPC_S_KPU5_IP, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8334,7 +9730,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		6, 0, 0, 2, 0,
 		NPC_S_KPU5_IP6, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8342,7 +9738,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_ARP, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8350,7 +9746,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_RARP, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8358,7 +9754,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_PTP, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8366,7 +9762,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_FCOE, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8374,7 +9770,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 10, 1, 0,
 		NPC_S_KPU4_MPLS, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8382,7 +9778,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 10, 1, 0,
 		NPC_S_KPU4_MPLS, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8390,7 +9786,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 0, 0, 1, 0,
 		NPC_S_KPU4_NSH, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8398,7 +9794,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 0, 0, 0, 0,
 		NPC_S_KPU3_STAG, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG_STAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG_STAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8406,7 +9802,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 0, 0, 0, 0,
 		NPC_S_KPU3_CTAG, 24, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8414,7 +9810,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 0, 1,
 		NPC_S_NA, 0, 1,
 		NPC_LID_LB, NPC_LT_LB_BTAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG_UNK,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG_UNK,
 		0, 0, 0, 0,
 	},
 	{
@@ -8606,15 +10002,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 0, 0, 0, 0,
 		NPC_S_KPU3_CTAG, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		16, 20, 24, 0, 0,
-		NPC_S_KPU3_ITAG, 14, 1,
-		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_BTAG_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8622,7 +10010,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 0, 0, 0,
 		NPC_S_KPU3_STAG, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_STAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8630,7 +10018,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 6, 0, 0, 0,
 		NPC_S_KPU3_QINQ, 10, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_QINQ,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_QINQ,
 		0, 0, 0, 0,
 	},
 	{
@@ -8638,7 +10026,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		8, 0, 6, 2, 0,
 		NPC_S_KPU5_IP, 28, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8646,7 +10034,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		6, 0, 0, 2, 0,
 		NPC_S_KPU5_IP6, 28, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8654,7 +10042,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 2, 0,
 		NPC_S_KPU5_ARP, 28, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8662,7 +10050,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 0, 0, 0, 0,
 		NPC_S_KPU3_STAG, 28, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG_STAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG_STAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8670,7 +10058,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		2, 0, 0, 0, 0,
 		NPC_S_KPU3_CTAG, 28, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG_CTAG,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG_CTAG,
 		0, 0, 0, 0,
 	},
 	{
@@ -8678,7 +10066,7 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		0, 0, 0, 0, 1,
 		NPC_S_NA, 0, 1,
 		NPC_LID_LB, NPC_LT_LB_ETAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_ITAG_UNK,
+		NPC_F_LB_U_MORE_TAG | NPC_F_LB_L_WITH_ITAG_UNK,
 		0, 0, 0, 0,
 	},
 	{
@@ -8689,142 +10077,6 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		NPC_F_LB_U_UNK_ETYPE,
 		0, 0, 0, 0,
 	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 2, 0,
-		NPC_S_KPU5_IP, 20, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 2, 0,
-		NPC_S_KPU5_IP6, 20, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU5_ARP, 20, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU5_RARP, 20, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 2, 0,
-		NPC_S_KPU5_IP, 28, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG_CTAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 2, 0,
-		NPC_S_KPU5_IP6, 28, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG_CTAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU5_ARP, 28, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG_CTAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 2, 0,
-		NPC_S_KPU5_IP, 24, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 2, 0,
-		NPC_S_KPU5_IP6, 24, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU5_ARP, 24, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_STAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 2, 0,
-		NPC_S_KPU5_IP, 24, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 2, 0,
-		NPC_S_KPU5_IP6, 24, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU5_ARP, 24, 1,
-		NPC_LID_LB, NPC_LT_LB_ITAG,
-		NPC_F_LB_U_MORE_TAG|NPC_F_LB_L_WITH_CTAG,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 2, 0,
@@ -9137,6 +10389,14 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = {
 		NPC_F_LB_U_UNK_ETYPE | NPC_F_LB_L_EXDSA,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_NGIO,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_LB, NPC_EC_L2_K3,
 		0, 0, 0, 0, 1,
@@ -9153,7 +10413,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 1, 0,
-		NPC_S_KPU5_IP, 4, 0,
+		NPC_S_KPU5_IP, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9161,7 +10421,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		6, 0, 0, 1, 0,
-		NPC_S_KPU5_IP6, 4, 0,
+		NPC_S_KPU5_IP6, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9169,7 +10429,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 1, 0,
-		NPC_S_KPU5_ARP, 4, 0,
+		NPC_S_KPU5_ARP, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9177,7 +10437,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 1, 0,
-		NPC_S_KPU5_RARP, 4, 0,
+		NPC_S_KPU5_RARP, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9185,7 +10445,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 1, 0,
-		NPC_S_KPU5_PTP, 4, 0,
+		NPC_S_KPU5_PTP, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9193,7 +10453,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 1, 0,
-		NPC_S_KPU5_FCOE, 4, 0,
+		NPC_S_KPU5_FCOE, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9201,7 +10461,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 6, 10, 0, 0,
-		NPC_S_KPU4_MPLS, 4, 0,
+		NPC_S_KPU4_MPLS, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9209,7 +10469,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 6, 10, 0, 0,
-		NPC_S_KPU4_MPLS, 4, 0,
+		NPC_S_KPU4_MPLS, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9217,7 +10477,7 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		2, 0, 0, 0, 0,
-		NPC_S_KPU4_NSH, 4, 0,
+		NPC_S_KPU4_NSH, 6, 0,
 		NPC_LID_LB, NPC_LT_NA,
 		0,
 		0, 0, 0, 0,
@@ -9518,142 +10778,6 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 		0,
 		0, 0, 0, 0,
 	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 2, 0,
-		NPC_S_KPU5_IP, 18, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 2, 0,
-		NPC_S_KPU5_IP6, 18, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU5_ARP, 18, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 2, 0,
-		NPC_S_KPU5_RARP, 18, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 1, 0,
-		NPC_S_KPU5_IP, 26, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 1, 0,
-		NPC_S_KPU5_IP6, 26, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU5_ARP, 26, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 1, 0,
-		NPC_S_KPU5_IP, 22, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 1, 0,
-		NPC_S_KPU5_IP6, 22, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU5_ARP, 22, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		8, 0, 6, 1, 0,
-		NPC_S_KPU5_IP, 22, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		6, 0, 0, 1, 0,
-		NPC_S_KPU5_IP6, 22, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 1, 0,
-		NPC_S_KPU5_ARP, 22, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
-	{
-		NPC_ERRLEV_LB, NPC_EC_L2_K3_ETYPE_UNK,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 0,
-		NPC_LID_LB, NPC_LT_NA,
-		0,
-		0, 0, 0, 0,
-	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		8, 0, 6, 1, 0,
@@ -10134,6 +11258,14 @@ static struct npc_kpu_profile_action kpu3_action_entries[] = {
 		NPC_F_LB_U_UNK_ETYPE | NPC_F_LB_L_DSA_VLAN,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU4_VLAN_EXDSA, 12, 1,
+		NPC_LID_LB, NPC_LT_LB_VLAN_EXDSA,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_LB, NPC_EC_L2_K3,
 		0, 0, 0, 0, 1,
@@ -10267,6 +11399,70 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
 		NPC_F_LB_L_FDSA,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_IP, 10, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_IP6, 10, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU5_ARP, 10, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_RARP, 10, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_PTP, 10, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU5_FCOE, 10, 1,
+		NPC_LID_LB, NPC_LT_LB_FDSA,
+		NPC_F_LB_L_FDSA,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_IP, 14, 1,
+		NPC_LID_LB, NPC_LT_LB_PPPOE,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_IP6, 14, 1,
+		NPC_LID_LB, NPC_LT_LB_PPPOE,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
 		0, 0, 0, 0, 1,
@@ -10275,6 +11471,78 @@ static struct npc_kpu_profile_action kpu4_action_entries[] = {
 		NPC_F_LB_U_UNK_ETYPE | NPC_F_LB_L_FDSA,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_IP, 2, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_IP6, 2, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU5_ARP, 2, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_RARP, 2, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_PTP, 2, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU5_FCOE, 2, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 0, 0,
+		NPC_S_KPU5_IP, 10, 0,
+		NPC_LID_LB, NPC_LT_LB_PPPOE,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 0, 0,
+		NPC_S_KPU5_IP6, 10, 0,
+		NPC_LID_LB, NPC_LT_LB_PPPOE,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_LB, NPC_EC_L2_K4,
 		0, 0, 0, 0, 1,
@@ -10784,6 +12052,382 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = {
 		0,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_IP_TTL_0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_IP_FRAG_OFFSET_1,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		NPC_F_LC_U_IP_FRAG,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 12, 0, 2, 0,
+		NPC_S_KPU8_TCP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 0, 0, 2, 0,
+		NPC_S_KPU8_UDP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_SCTP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_ICMP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_IGMP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU9_ESP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_AH, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 0, 0, 2, 0,
+		NPC_S_KPU8_GRE, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 6, 0,
+		NPC_S_KPU12_TU_IP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		NPC_F_LC_L_IP_IN_IP,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 6, 0,
+		NPC_S_KPU12_TU_IP6, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		NPC_F_LC_L_6TO4,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 3, 0,
+		NPC_S_KPU9_TU_MPLS_IN_IP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		NPC_F_LC_L_MPLS_IN_IP,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		NPC_F_LC_U_UNK_PROTO,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 12, 0, 2, 0,
+		NPC_S_KPU8_TCP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 8, 10, 2, 0,
+		NPC_S_KPU8_UDP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_SCTP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_ICMP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_IGMP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU9_ESP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_AH, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 0, 0, 2, 0,
+		NPC_S_KPU8_GRE, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		0,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		8, 0, 6, 6, 0,
+		NPC_S_KPU12_TU_IP, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		NPC_F_LC_L_IP_IN_IP,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 6, 0,
+		NPC_S_KPU12_TU_IP6, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		NPC_F_LC_L_6TO4,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 3, 0,
+		NPC_S_KPU9_TU_MPLS_IN_IP, 20, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		NPC_F_LC_L_MPLS_IN_IP,
+		0, 0xf, 0, 2,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP_OPT,
+		NPC_F_LC_U_UNK_PROTO,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_IP_VER,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_IP6_HOP_0,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 12, 0, 2, 0,
+		NPC_S_KPU8_TCP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 0, 0, 2, 0,
+		NPC_S_KPU8_UDP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_SCTP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_ICMP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_ICMP6, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_GRE, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 6, 0,
+		NPC_S_KPU12_TU_IP6, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		NPC_F_LC_L_IP6_TUN_IP6,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 3, 0,
+		NPC_S_KPU9_TU_MPLS_IN_IP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		NPC_F_LC_L_IP6_MPLS_IN_IP,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		NPC_F_LC_L_EXT_HOP,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU6_IP6_CPT_HOP_DEST, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		NPC_F_LC_L_EXT_DEST,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU6_IP6_CPT_ROUT, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		NPC_F_LC_L_EXT_ROUT,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 2, 0, 0, 0,
+		NPC_S_KPU6_IP6_CPT_FRAG, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		NPC_F_LC_U_IP6_FRAG,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 3, 0,
+		NPC_S_KPU9_ESP, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU8_AH, 40, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		NPC_F_LC_L_EXT_MOBILITY,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		NPC_F_LC_L_EXT_HOSTID,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6_EXT,
+		NPC_F_LC_L_EXT_SHIM6,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		NPC_F_LC_U_UNK_PROTO,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_LC, NPC_EC_IP6_VER,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 1,
+		NPC_LID_LC, NPC_LT_LC_IP6,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_LC, NPC_EC_UNK,
 		0, 0, 0, 0, 1,
@@ -11093,6 +12737,294 @@ static struct npc_kpu_profile_action kpu6_action_entries[] = {
 		0,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 12, 0, 1, 0,
+		NPC_S_KPU8_TCP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 8, 10, 1, 0,
+		NPC_S_KPU8_UDP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_SCTP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_ICMP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_ICMP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU9_ESP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_AH, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_GRE, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 5, 0,
+		NPC_S_KPU12_TU_IP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 2, 0,
+		NPC_S_KPU9_TU_MPLS_IN_IP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 12, 0, 1, 0,
+		NPC_S_KPU8_TCP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 8, 10, 1, 0,
+		NPC_S_KPU8_UDP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_SCTP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_ICMP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_ICMP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU9_ESP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_AH, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_GRE, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 5, 0,
+		NPC_S_KPU12_TU_IP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 2, 0,
+		NPC_S_KPU9_TU_MPLS_IN_IP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU7_IP6_ROUT, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 2, 0, 0, 0,
+		NPC_S_KPU7_CPT_IP6_FRAG, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 12, 0, 1, 0,
+		NPC_S_KPU8_TCP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 8, 10, 1, 0,
+		NPC_S_KPU8_UDP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_SCTP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_ICMP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_ICMP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 2, 0,
+		NPC_S_KPU9_ESP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_AH, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU8_GRE, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 5, 0,
+		NPC_S_KPU12_TU_IP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 2, 0,
+		NPC_S_KPU9_TU_MPLS_IN_IP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 2, 0, 0, 0,
+		NPC_S_KPU7_CPT_IP6_FRAG, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		1, 0xff, 0, 3,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_LC, NPC_EC_UNK,
 		0, 0, 0, 0, 1,
@@ -11290,6 +13222,94 @@ static struct npc_kpu_profile_action kpu7_action_entries[] = {
 		0,
 		0, 0, 0, 0,
 	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 12, 0, 0, 0,
+		NPC_S_KPU8_TCP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 8, 10, 0, 0,
+		NPC_S_KPU8_UDP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU8_SCTP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU8_ICMP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU8_ICMP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 1, 0,
+		NPC_S_KPU9_ESP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU8_AH, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 0,
+		NPC_S_KPU8_GRE, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		6, 0, 0, 4, 0,
+		NPC_S_KPU12_TU_IP6, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		2, 6, 10, 1, 0,
+		NPC_S_KPU9_TU_MPLS_IN_IP, 8, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
+	{
+		NPC_ERRLEV_RE, NPC_EC_NOERR,
+		0, 0, 0, 0, 1,
+		NPC_S_NA, 0, 0,
+		NPC_LID_LC, NPC_LT_NA,
+		0,
+		0, 0, 0, 0,
+	},
 	{
 		NPC_ERRLEV_LC, NPC_EC_UNK,
 		0, 0, 0, 0, 1,
@@ -12326,10 +14346,10 @@ static struct npc_kpu_profile_action kpu9_action_entries[] = {
 	},
 	{
 		NPC_ERRLEV_RE, NPC_EC_NOERR,
-		0, 0, 0, 0, 1,
-		NPC_S_NA, 0, 1,
+		8, 0, 6, 2, 0,
+		NPC_S_KPU12_TU_IP, 8, 1,
 		NPC_LID_LE, NPC_LT_LE_GTPU,
-		NPC_F_LE_L_GTPU_UNK,
+		0,
 		0, 0, 0, 0,
 	},
 	{
@@ -13297,7 +15317,7 @@ static struct npc_kpu_profile_action kpu16_action_entries[] = {
 	},
 };
 
-static const struct npc_kpu_profile npc_kpu_profiles[] = {
+static struct npc_kpu_profile npc_kpu_profiles[] = {
 	{
 		ARRAY_SIZE(kpu1_cam_entries),
 		ARRAY_SIZE(kpu1_action_entries),
@@ -13396,12 +15416,22 @@ static const struct npc_kpu_profile npc_kpu_profiles[] = {
 	},
 };
 
-static const struct npc_lt_def_cfg npc_lt_defaults = {
+static struct npc_lt_def_cfg npc_lt_defaults = {
 	.rx_ol2 = {
 		.lid = NPC_LID_LA,
 		.ltype_match = NPC_LT_LA_ETHER,
 		.ltype_mask = 0x0F,
 	},
+	.ovlan = {
+		.lid = NPC_LID_LB,
+		.ltype_match = NPC_LT_LB_CTAG,
+		.ltype_mask = 0x0F,
+	},
+	.ivlan = {
+		.lid = NPC_LID_LB,
+		.ltype_match = NPC_LT_LB_STAG_QINQ,
+		.ltype_mask = 0x0F,
+	},
 	.rx_oip4 = {
 		.lid = NPC_LID_LC,
 		.ltype_match = NPC_LT_LC_IP,
@@ -13511,7 +15541,7 @@ static struct npc_mcam_kex npc_mkex_default = {
 	.name = "default",
 	.kpu_version = NPC_KPU_PROFILE_VER,
 	.keyx_cfg = {
-		/* nibble: LA..LE (ltype only) + channel */
+		/* nibble: LA..LE (ltype only) + Error code + Channel */
 		[NIX_INTF_RX] = ((u64)NPC_MCAM_KEY_X2 << 32) | NPC_PARSE_NIBBLE_INTF_RX,
 		/* nibble: LA..LE (ltype only) */
 		[NIX_INTF_TX] = ((u64)NPC_MCAM_KEY_X2 << 32) | NPC_PARSE_NIBBLE_INTF_TX,
@@ -13522,30 +15552,40 @@ static struct npc_mcam_kex npc_mkex_default = {
 		[NPC_LID_LA] = {
 			/* Layer A: Ethernet: */
 			[NPC_LT_LA_ETHER] = {
-				/* DMAC: 6 bytes, KW1[47:0] */
+				/* DMAC: 6 bytes, KW1[55:8] */
 				KEX_LD_CFG(0x05, 0x0, 0x1, 0x0, NPC_KEXOF_DMAC),
-				/* Ethertype: 2 bytes, KW0[47:32] */
-				KEX_LD_CFG(0x01, 0xc, 0x1, 0x0, 0x4),
+				/* Ethertype: 2 bytes, KW0[55:40] */
+				KEX_LD_CFG(0x01, 0xc, 0x1, 0x0, 0x5),
+			},
+			/* Layer A: HiGig2: */
+			[NPC_LT_LA_HIGIG2_ETHER] = {
+				/* Classification: 2 bytes, KW1[23:8] */
+				KEX_LD_CFG(0x01, 0x8, 0x1, 0x0, NPC_KEXOF_DMAC),
+				/* VID: 2 bytes, KW1[39:24] */
+				KEX_LD_CFG(0x01, 0xc, 0x1, 0x0,
+					   NPC_KEXOF_DMAC + 2),
 			},
 		},
 		[NPC_LID_LB] = {
 			/* Layer B: Single VLAN (CTAG) */
-			/* CTAG VLAN[2..3] + Ethertype, 4 bytes, KW0[63:32] */
 			[NPC_LT_LB_CTAG] = {
-				KEX_LD_CFG(0x03, 0x2, 0x1, 0x0, 0x4),
+				/* CTAG VLAN: 2 bytes, KW1[7:0], KW0[63:56] */
+				KEX_LD_CFG(0x01, 0x2, 0x1, 0x0, 0x7),
+				/* Ethertype: 2 bytes, KW0[55:40] */
+				KEX_LD_CFG(0x01, 0x4, 0x1, 0x0, 0x5),
 			},
 			/* Layer B: Stacked VLAN (STAG|QinQ) */
 			[NPC_LT_LB_STAG_QINQ] = {
-				/* Outer VLAN: 2 bytes, KW0[63:48] */
-				KEX_LD_CFG(0x01, 0x2, 0x1, 0x0, 0x6),
-				/* Ethertype: 2 bytes, KW0[47:32] */
-				KEX_LD_CFG(0x01, 0x8, 0x1, 0x0, 0x4),
+				/* Outer VLAN: 2 bytes, KW1[7:0], KW0[63:56] */
+				KEX_LD_CFG(0x01, 0x2, 0x1, 0x0, 0x7),
+				/* Ethertype: 2 bytes, KW0[55:40] */
+				KEX_LD_CFG(0x01, 0x8, 0x1, 0x0, 0x5),
 			},
 			[NPC_LT_LB_FDSA] = {
-				/* SWITCH PORT: 1 byte, KW0[63:48] */
-				KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0x6),
-				/* Ethertype: 2 bytes, KW0[47:32] */
-				KEX_LD_CFG(0x01, 0x4, 0x1, 0x0, 0x4),
+				/* SWITCH PORT: 1 byte, KW0[63:56] */
+				KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0x7),
+				/* Ethertype: 2 bytes, KW0[55:40] */
+				KEX_LD_CFG(0x01, 0x4, 0x1, 0x0, 0x5),
 			},
 		},
 		[NPC_LID_LC] = {
@@ -13589,6 +15629,13 @@ static struct npc_mcam_kex npc_mkex_default = {
 				/* DMAC: 6 bytes, KW1[63:16] */
 				KEX_LD_CFG(0x05, 0x8, 0x1, 0x0, 0xa),
 			},
+			/* Layer A: HiGig2: */
+			[NPC_LT_LA_IH_NIX_HIGIG2_ETHER] = {
+				/* PF_FUNC: 2B , KW0 [47:32] */
+				KEX_LD_CFG(0x01, 0x0, 0x1, 0x0, 0x4),
+				/* VID: 2 bytes, KW1[31:16] */
+				KEX_LD_CFG(0x01, 0x10, 0x1, 0x0, 0xa),
+			},
 		},
 		[NPC_LID_LB] = {
 			/* Layer B: Single VLAN (CTAG) */

From 238ebd8b487b7fc995284e9580257801f2c76aa5 Mon Sep 17 00:00:00 2001
From: Philipp Borgers <borgers@mi.fu-berlin.de>
Date: Mon, 24 May 2021 08:52:11 +0300
Subject: [PATCH 0685/3292] ath9k: ar9003_mac: read STBC indicator from rx
 descriptor

The rx descriptor contains a STBC indicator. If the indicator is set the
frame was received with STBC.

Signed-off-by: Philipp Borgers <borgers@mi.fu-berlin.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517172426.15919-1-borgers@mi.fu-berlin.de
---
 drivers/net/wireless/ath/ath9k/ar9003_mac.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
index 76b538942a79..5184a0aacfe2 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
@@ -522,6 +522,8 @@ int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah, struct ath_rx_status *rxs,
 	rxs->rs_moreaggr = (rxsp->status11 & AR_RxMoreAggr) ? 1 : 0;
 	rxs->rs_antenna = (MS(rxsp->status4, AR_RxAntenna) & 0x7);
 	rxs->enc_flags |= (rxsp->status4 & AR_GI) ? RX_ENC_FLAG_SHORT_GI : 0;
+	rxs->enc_flags |=
+		(rxsp->status4 & AR_STBC) ? (1 << RX_ENC_FLAG_STBC_SHIFT) : 0;
 	rxs->bw = (rxsp->status4 & AR_2040) ? RATE_INFO_BW_40 : RATE_INFO_BW_20;
 
 	rxs->evm0 = rxsp->status6;

From 7a68cc16b82c963c096387917ced11a27c352261 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Sat, 15 May 2021 17:22:08 +0200
Subject: [PATCH 0686/3292] batman-adv: mcast: add MRD + routable IPv4
 multicast with bridges support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds support for routable IPv4 multicast addresses
(224.0.0.0/4, excluding 224.0.0.0/24) in bridged setups.

This utilizes the Multicast Router Discovery (MRD, RFC4286) support
in the Linux bridge. batman-adv will now query the Linux bridge for
IPv4 multicast routers, which the bridge has previously learned about
via MRD.

This allows us to then safely send routable IPv4 multicast packets in
bridged setups to multicast listeners and multicast routers only. Before
we had to flood such packets to avoid potential multicast packet loss to
IPv4 multicast routers, which we were not able to detect before.

With the bridge MRD integration, we are now also able to perform more
fine-grained detection of IPv6 multicast routers in bridged setups:
Before we were "guessing" IPv6 multicast routers by looking up multicast
listeners for the link-local All Routers multicast address (ff02::2),
which every IPv6 multicast router is listening to. However this would
also include more nodes than necessary: For instance nodes which are
just a router for unicast, but not multicast would be included, too.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/multicast.c | 41 +++++---------------------------------
 1 file changed, 5 insertions(+), 36 deletions(-)

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 1d63c8cbbfe7..923e2197c2db 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -193,53 +193,22 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv,
  *	BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
  *	The former two OR'd: no multicast router is present
  */
-#if IS_ENABLED(CONFIG_IPV6)
 static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
 						struct net_device *bridge)
 {
-	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
 	struct net_device *dev = bat_priv->soft_iface;
-	struct br_ip_list *br_ip_entry, *tmp;
-	u8 flags = BATADV_MCAST_WANT_NO_RTR6;
-	int ret;
+	u8 flags = BATADV_NO_FLAGS;
 
 	if (!bridge)
 		return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
 
-	/* TODO: ask the bridge if a multicast router is present (the bridge
-	 * is capable of performing proper RFC4286 multicast router
-	 * discovery) instead of searching for a ff02::2 listener here
-	 */
-	ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
-	if (ret < 0)
-		return BATADV_NO_FLAGS;
-
-	list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
-		/* the bridge snooping does not maintain IPv4 link-local
-		 * addresses - therefore we won't find any IPv4 multicast router
-		 * address here, only IPv6 ones
-		 */
-		if (br_ip_entry->addr.proto == htons(ETH_P_IPV6) &&
-		    ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.dst.ip6))
-			flags &= ~BATADV_MCAST_WANT_NO_RTR6;
-
-		list_del(&br_ip_entry->list);
-		kfree(br_ip_entry);
-	}
+	if (!br_multicast_has_router_adjacent(dev, ETH_P_IP))
+		flags |= BATADV_MCAST_WANT_NO_RTR4;
+	if (!br_multicast_has_router_adjacent(dev, ETH_P_IPV6))
+		flags |= BATADV_MCAST_WANT_NO_RTR6;
 
 	return flags;
 }
-#else
-static inline u8
-batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
-				      struct net_device *bridge)
-{
-	if (bridge)
-		return BATADV_NO_FLAGS;
-	else
-		return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
-}
-#endif
 
 /**
  * batadv_mcast_mla_rtr_flags_get() - get multicast router flags

From 1cf1ef60a1a64cb5e00148b35bb35abad0984234 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Fri, 28 May 2021 17:56:25 +0800
Subject: [PATCH 0687/3292] batman-adv: Remove the repeated declaration

Function 'batadv_bla_claim_dump' is declared twice, so remove the
repeated declaration.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 5c22955bb9d5..8673a265995f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -52,7 +52,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 void batadv_bla_status_update(struct net_device *net_dev);
 int batadv_bla_init(struct batadv_priv *bat_priv);
 void batadv_bla_free(struct batadv_priv *bat_priv);
-int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 #ifdef CONFIG_BATMAN_ADV_DAT
 bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
 			    unsigned short vid);

From 03a6ef31f2bc3e14522298a9617e240fb0f5954b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 28 May 2021 14:58:31 -0500
Subject: [PATCH 0688/3292] net: axienet: Fix fall-through warning for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding a fallthrough statement instead of just
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20210528195831.GA39131@embeddedor
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index b508c9453f40..e29ad9a86a3c 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1543,6 +1543,7 @@ static void axienet_validate(struct phylink_config *config,
 	case PHY_INTERFACE_MODE_MII:
 		phylink_set(mask, 100baseT_Full);
 		phylink_set(mask, 10baseT_Full);
+		fallthrough;
 	default:
 		break;
 	}

From 320daffdf249bb41531c01445054443c80440904 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 28 May 2021 15:22:25 -0500
Subject: [PATCH 0689/3292] octeontx2-pf: Fix fall-through warning for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20210528202225.GA39855@embeddedor
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 0b4fa92ba821..80b769079d51 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -551,6 +551,7 @@ static int otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp,
 			req->features |= BIT_ULL(NPC_IPPROTO_AH);
 		else
 			req->features |= BIT_ULL(NPC_IPPROTO_ESP);
+		break;
 	default:
 		break;
 	}

From 7c9896e37807862e276064dd9331860f5d27affc Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 29 May 2021 11:04:38 +0800
Subject: [PATCH 0690/3292] net: dsa: qca8k: check return value of read
 functions correctly

Current return type of qca8k_mii_read32() and qca8k_read() are
unsigned, it can't be negative, so the return value check is
unuseful. For check the return value correctly, change return
type of the read functions and add a output parameter to store
the read value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/qca8k.c | 130 +++++++++++++++++++---------------------
 1 file changed, 60 insertions(+), 70 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 1f1b7c4dda13..d761c5947222 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -89,26 +89,26 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
 	*page = regaddr & 0x3ff;
 }
 
-static u32
-qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum)
+static int
+qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val)
 {
-	u32 val;
 	int ret;
 
 	ret = bus->read(bus, phy_id, regnum);
 	if (ret >= 0) {
-		val = ret;
+		*val = ret;
 		ret = bus->read(bus, phy_id, regnum + 1);
-		val |= ret << 16;
+		*val |= ret << 16;
 	}
 
 	if (ret < 0) {
 		dev_err_ratelimited(&bus->dev,
 				    "failed to read qca8k 32bit register\n");
+		*val = 0;
 		return ret;
 	}
 
-	return val;
+	return 0;
 }
 
 static void
@@ -148,26 +148,26 @@ qca8k_set_page(struct mii_bus *bus, u16 page)
 	return 0;
 }
 
-static u32
-qca8k_read(struct qca8k_priv *priv, u32 reg)
+static int
+qca8k_read(struct qca8k_priv *priv, u32 reg, u32 *val)
 {
 	struct mii_bus *bus = priv->bus;
 	u16 r1, r2, page;
-	u32 val;
+	int ret;
 
 	qca8k_split_addr(reg, &r1, &r2, &page);
 
 	mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED);
 
-	val = qca8k_set_page(bus, page);
-	if (val < 0)
+	ret = qca8k_set_page(bus, page);
+	if (ret < 0)
 		goto exit;
 
-	val = qca8k_mii_read32(bus, 0x10 | r2, r1);
+	ret = qca8k_mii_read32(bus, 0x10 | r2, r1, val);
 
 exit:
 	mutex_unlock(&bus->mdio_lock);
-	return val;
+	return ret;
 }
 
 static int
@@ -208,11 +208,9 @@ qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 write_val)
 	if (ret < 0)
 		goto exit;
 
-	val = qca8k_mii_read32(bus, 0x10 | r2, r1);
-	if (val < 0) {
-		ret = val;
+	ret = qca8k_mii_read32(bus, 0x10 | r2, r1, &val);
+	if (ret < 0)
 		goto exit;
-	}
 
 	val &= ~mask;
 	val |= write_val;
@@ -240,15 +238,8 @@ static int
 qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ctx;
-	int ret;
 
-	ret = qca8k_read(priv, reg);
-	if (ret < 0)
-		return ret;
-
-	*val = ret;
-
-	return 0;
+	return qca8k_read(priv, reg, val);
 }
 
 static int
@@ -296,18 +287,18 @@ static struct regmap_config qca8k_regmap_config = {
 static int
 qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
 {
+	int ret, ret1;
 	u32 val;
-	int ret;
 
-	ret = read_poll_timeout(qca8k_read, val, !(val & mask),
+	ret = read_poll_timeout(qca8k_read, ret1, !(val & mask),
 				0, QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
-				priv, reg);
+				priv, reg, &val);
 
 	/* Check if qca8k_read has failed for a different reason
 	 * before returning -ETIMEDOUT
 	 */
-	if (ret < 0 && val < 0)
-		return val;
+	if (ret < 0 && ret1 < 0)
+		return ret1;
 
 	return ret;
 }
@@ -316,13 +307,13 @@ static int
 qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb)
 {
 	u32 reg[4], val;
-	int i;
+	int i, ret;
 
 	/* load the ARL table into an array */
 	for (i = 0; i < 4; i++) {
-		val = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4));
-		if (val < 0)
-			return val;
+		ret = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4), &val);
+		if (ret < 0)
+			return ret;
 
 		reg[i] = val;
 	}
@@ -396,9 +387,9 @@ qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port)
 
 	/* Check for table full violation when adding an entry */
 	if (cmd == QCA8K_FDB_LOAD) {
-		reg = qca8k_read(priv, QCA8K_REG_ATU_FUNC);
-		if (reg < 0)
-			return reg;
+		ret = qca8k_read(priv, QCA8K_REG_ATU_FUNC, &reg);
+		if (ret < 0)
+			return ret;
 		if (reg & QCA8K_ATU_FUNC_FULL)
 			return -1;
 	}
@@ -477,9 +468,9 @@ qca8k_vlan_access(struct qca8k_priv *priv, enum qca8k_vlan_cmd cmd, u16 vid)
 
 	/* Check for table full violation when adding an entry */
 	if (cmd == QCA8K_VLAN_LOAD) {
-		reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC1);
-		if (reg < 0)
-			return reg;
+		ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC1, &reg);
+		if (ret < 0)
+			return ret;
 		if (reg & QCA8K_VTU_FUNC1_FULL)
 			return -ENOMEM;
 	}
@@ -505,11 +496,9 @@ qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, bool untagged)
 	if (ret < 0)
 		goto out;
 
-	reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
-	if (reg < 0) {
-		ret = reg;
+	ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC0, &reg);
+	if (ret < 0)
 		goto out;
-	}
 	reg |= QCA8K_VTU_FUNC0_VALID | QCA8K_VTU_FUNC0_IVL_EN;
 	reg &= ~(QCA8K_VTU_FUNC0_EG_MODE_MASK << QCA8K_VTU_FUNC0_EG_MODE_S(port));
 	if (untagged)
@@ -542,11 +531,9 @@ qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid)
 	if (ret < 0)
 		goto out;
 
-	reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
-	if (reg < 0) {
-		ret = reg;
+	ret = qca8k_read(priv, QCA8K_REG_VTU_FUNC0, &reg);
+	if (ret < 0)
 		goto out;
-	}
 	reg &= ~(3 << QCA8K_VTU_FUNC0_EG_MODE_S(port));
 	reg |= QCA8K_VTU_FUNC0_EG_MODE_NOT <<
 			QCA8K_VTU_FUNC0_EG_MODE_S(port);
@@ -638,19 +625,19 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask)
 {
 	u16 r1, r2, page;
 	u32 val;
-	int ret;
+	int ret, ret1;
 
 	qca8k_split_addr(reg, &r1, &r2, &page);
 
-	ret = read_poll_timeout(qca8k_mii_read32, val, !(val & mask), 0,
+	ret = read_poll_timeout(qca8k_mii_read32, ret1, !(val & mask), 0,
 				QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false,
-				bus, 0x10 | r2, r1);
+				bus, 0x10 | r2, r1, &val);
 
 	/* Check if qca8k_read has failed for a different reason
 	 * before returnting -ETIMEDOUT
 	 */
-	if (ret < 0 && val < 0)
-		return val;
+	if (ret < 0 && ret1 < 0)
+		return ret1;
 
 	return ret;
 }
@@ -725,7 +712,7 @@ qca8k_mdio_read(struct mii_bus *salve_bus, int phy, int regnum)
 	if (ret)
 		goto exit;
 
-	val = qca8k_mii_read32(bus, 0x10 | r2, r1);
+	ret = qca8k_mii_read32(bus, 0x10 | r2, r1, &val);
 
 exit:
 	/* even if the busy_wait timeouts try to clear the MASTER_EN */
@@ -733,10 +720,10 @@ exit:
 
 	mutex_unlock(&bus->mdio_lock);
 
-	if (val >= 0)
-		val &= QCA8K_MDIO_MASTER_DATA_MASK;
+	if (ret >= 0)
+		ret = val & QCA8K_MDIO_MASTER_DATA_MASK;
 
-	return val;
+	return ret;
 }
 
 static int
@@ -1211,7 +1198,7 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 		qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN);
 
 		/* Enable/disable SerDes auto-negotiation as necessary */
-		val = qca8k_read(priv, QCA8K_REG_PWS);
+		qca8k_read(priv, QCA8K_REG_PWS, &val);
 		if (phylink_autoneg_inband(mode))
 			val &= ~QCA8K_PWS_SERDES_AEN_DIS;
 		else
@@ -1219,7 +1206,7 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 		qca8k_write(priv, QCA8K_REG_PWS, val);
 
 		/* Configure the SGMII parameters */
-		val = qca8k_read(priv, QCA8K_REG_SGMII_CTRL);
+		qca8k_read(priv, QCA8K_REG_SGMII_CTRL, &val);
 
 		val |= QCA8K_SGMII_EN_PLL | QCA8K_SGMII_EN_RX |
 			QCA8K_SGMII_EN_TX | QCA8K_SGMII_EN_SD;
@@ -1314,10 +1301,11 @@ qca8k_phylink_mac_link_state(struct dsa_switch *ds, int port,
 {
 	struct qca8k_priv *priv = ds->priv;
 	u32 reg;
+	int ret;
 
-	reg = qca8k_read(priv, QCA8K_REG_PORT_STATUS(port));
-	if (reg < 0)
-		return reg;
+	ret = qca8k_read(priv, QCA8K_REG_PORT_STATUS(port), &reg);
+	if (ret < 0)
+		return ret;
 
 	state->link = !!(reg & QCA8K_PORT_STATUS_LINK_UP);
 	state->an_complete = state->link;
@@ -1419,19 +1407,20 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	const struct qca8k_mib_desc *mib;
 	u32 reg, i, val;
-	u64 hi;
+	u64 hi = 0;
+	int ret;
 
 	for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) {
 		mib = &ar8327_mib[i];
 		reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset;
 
-		val = qca8k_read(priv, reg);
-		if (val < 0)
+		ret = qca8k_read(priv, reg, &val);
+		if (ret < 0)
 			continue;
 
 		if (mib->size == 2) {
-			hi = qca8k_read(priv, reg + 4);
-			if (hi < 0)
+			ret = qca8k_read(priv, reg + 4, (u32 *)&hi);
+			if (ret < 0)
 				continue;
 		}
 
@@ -1459,7 +1448,7 @@ qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 	int ret;
 
 	mutex_lock(&priv->reg_mutex);
-	reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
+	ret = qca8k_read(priv, QCA8K_REG_EEE_CTRL, &reg);
 	if (reg < 0) {
 		ret = reg;
 		goto exit;
@@ -1793,14 +1782,15 @@ static int qca8k_read_switch_id(struct qca8k_priv *priv)
 	const struct qca8k_match_data *data;
 	u32 val;
 	u8 id;
+	int ret;
 
 	/* get the switches ID from the compatible */
 	data = of_device_get_match_data(priv->dev);
 	if (!data)
 		return -ENODEV;
 
-	val = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
-	if (val < 0)
+	ret = qca8k_read(priv, QCA8K_REG_MASK_CTRL, &val);
+	if (ret < 0)
 		return -ENODEV;
 
 	id = QCA8K_MASK_CTRL_DEVICE_ID(val & QCA8K_MASK_CTRL_DEVICE_ID_MASK);

From 9fe99de01440d9ede74d447ac76e9c445d8daae9 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 29 May 2021 11:04:39 +0800
Subject: [PATCH 0691/3292] net: dsa: qca8k: add missing check return value in
 qca8k_phylink_mac_config()

Now we can check qca8k_read() return value correctly, so if
it fails, we need return directly.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/qca8k.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index d761c5947222..6fe963ba23e8 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1128,6 +1128,7 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 {
 	struct qca8k_priv *priv = ds->priv;
 	u32 reg, val;
+	int ret;
 
 	switch (port) {
 	case 0: /* 1st CPU port */
@@ -1198,7 +1199,9 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 		qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN);
 
 		/* Enable/disable SerDes auto-negotiation as necessary */
-		qca8k_read(priv, QCA8K_REG_PWS, &val);
+		ret = qca8k_read(priv, QCA8K_REG_PWS, &val);
+		if (ret)
+			return;
 		if (phylink_autoneg_inband(mode))
 			val &= ~QCA8K_PWS_SERDES_AEN_DIS;
 		else
@@ -1206,7 +1209,9 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 		qca8k_write(priv, QCA8K_REG_PWS, val);
 
 		/* Configure the SGMII parameters */
-		qca8k_read(priv, QCA8K_REG_SGMII_CTRL, &val);
+		ret = qca8k_read(priv, QCA8K_REG_SGMII_CTRL, &val);
+		if (ret)
+			return;
 
 		val |= QCA8K_SGMII_EN_PLL | QCA8K_SGMII_EN_RX |
 			QCA8K_SGMII_EN_TX | QCA8K_SGMII_EN_SD;

From 546d6bad18c04926c4d0eba4222654a9a60ea830 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Sat, 29 May 2021 07:05:55 -0400
Subject: [PATCH 0692/3292] net: phy: fix yt8511 clang uninitialized variable
 warning

clang doesn't preinitialize variables. If phy_select_page failed and
returned an error, phy_restore_page would be called with `ret` being
uninitialized.
Even though phy_restore_page won't use `ret` in this scenario,
initialize `ret` to silence the warning.

Fixes: 48e8c6f1612b ("net: phy: add driver for Motorcomm yt8511 phy")
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/motorcomm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c
index 796b68f4b499..68cd19540c67 100644
--- a/drivers/net/phy/motorcomm.c
+++ b/drivers/net/phy/motorcomm.c
@@ -50,8 +50,8 @@ static int yt8511_write_page(struct phy_device *phydev, int page)
 
 static int yt8511_config_init(struct phy_device *phydev)
 {
+	int oldpage, ret = 0;
 	unsigned int ge, fe;
-	int ret, oldpage;
 
 	/* set clock mode to 125mhz */
 	oldpage = phy_select_page(phydev, YT8511_EXT_CLK_GATE);

From 0cc8bddb5b0665283baba6d89684630663c0ccbd Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Sat, 29 May 2021 07:05:56 -0400
Subject: [PATCH 0693/3292] net: phy: abort loading yt8511 driver in
 unsupported modes

While investigating the clang `ge` uninitialized variable report, it was
discovered the default switch would have unintended consequences. Due to
the switch to __phy_modify, the driver would modify the ID values in the
default scenario.

Fix this by promoting the interface mode switch and aborting when the
mode is not a supported RGMII mode.

This prevents the `ge` and `fe` variables from ever being used
uninitialized.

Fixes: 48e8c6f1612b ("net: phy: add driver for Motorcomm yt8511 phy")
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/motorcomm.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/motorcomm.c b/drivers/net/phy/motorcomm.c
index 68cd19540c67..7e6ac2c5e27e 100644
--- a/drivers/net/phy/motorcomm.c
+++ b/drivers/net/phy/motorcomm.c
@@ -53,15 +53,10 @@ static int yt8511_config_init(struct phy_device *phydev)
 	int oldpage, ret = 0;
 	unsigned int ge, fe;
 
-	/* set clock mode to 125mhz */
 	oldpage = phy_select_page(phydev, YT8511_EXT_CLK_GATE);
 	if (oldpage < 0)
 		goto err_restore_page;
 
-	ret = __phy_modify(phydev, YT8511_PAGE, 0, YT8511_CLK_125M);
-	if (ret < 0)
-		goto err_restore_page;
-
 	/* set rgmii delay mode */
 	switch (phydev->interface) {
 	case PHY_INTERFACE_MODE_RGMII:
@@ -80,14 +75,20 @@ static int yt8511_config_init(struct phy_device *phydev)
 		ge = YT8511_DELAY_RX | YT8511_DELAY_GE_TX_EN;
 		fe = YT8511_DELAY_FE_TX_EN;
 		break;
-	default: /* leave everything alone in other modes */
-		break;
+	default: /* do not support other modes */
+		ret = -EOPNOTSUPP;
+		goto err_restore_page;
 	}
 
 	ret = __phy_modify(phydev, YT8511_PAGE, (YT8511_DELAY_RX | YT8511_DELAY_GE_TX_EN), ge);
 	if (ret < 0)
 		goto err_restore_page;
 
+	/* set clock mode to 125mhz */
+	ret = __phy_modify(phydev, YT8511_PAGE, 0, YT8511_CLK_125M);
+	if (ret < 0)
+		goto err_restore_page;
+
 	/* fast ethernet delay is in a separate page */
 	ret = __phy_write(phydev, YT8511_PAGE_SELECT, YT8511_EXT_DELAY_DRIVE);
 	if (ret < 0)

From 587024b8210d4eeeab457323141c92e134cd3dbf Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 24 May 2021 17:24:16 +0200
Subject: [PATCH 0694/3292] ACPI: power: Use u8 as the power resource state
 data type

Use u8 as the data type for representing the state of an ACPI
power resource.

It is s not necessary to use int for that and because subsequent
changes are going to use ACPI_POWER_RESOURCE_STATE_UNKNOWN, it is
better to adjust the data type so that the "unknown" state is
represented by the "all ones" value.

While at it, clean up acpi_power_get_state() somewhat.

No intentional functional impact.

Tested-by: Dave Olsthoorn <dave@bewaar.me>
Tested-by: Shujun Wang <wsj20369@163.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/power.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 97c9a94a1a30..106a5d37486c 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -182,10 +182,11 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
 	return err;
 }
 
-static int acpi_power_get_state(acpi_handle handle, int *state)
+static int acpi_power_get_state(acpi_handle handle, u8 *state)
 {
 	acpi_status status = AE_OK;
 	unsigned long long sta = 0;
+	u8 cur_state;
 
 	if (!handle || !state)
 		return -EINVAL;
@@ -194,25 +195,24 @@ static int acpi_power_get_state(acpi_handle handle, int *state)
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
-	*state = (sta & 0x01)?ACPI_POWER_RESOURCE_STATE_ON:
-			      ACPI_POWER_RESOURCE_STATE_OFF;
+	cur_state = sta & ACPI_POWER_RESOURCE_STATE_ON;
 
 	acpi_handle_debug(handle, "Power resource is %s\n",
-			  *state ? "on" : "off");
+			  cur_state ? "on" : "off");
 
+	*state = cur_state;
 	return 0;
 }
 
-static int acpi_power_get_list_state(struct list_head *list, int *state)
+static int acpi_power_get_list_state(struct list_head *list, u8 *state)
 {
 	struct acpi_power_resource_entry *entry;
-	int cur_state;
+	u8 cur_state = ACPI_POWER_RESOURCE_STATE_OFF;
 
 	if (!list || !state)
 		return -EINVAL;
 
 	/* The state of the list is 'on' IFF all resources are 'on'. */
-	cur_state = 0;
 	list_for_each_entry(entry, list, node) {
 		struct acpi_power_resource *resource = entry->resource;
 		acpi_handle handle = resource->device.handle;
@@ -592,7 +592,7 @@ int acpi_power_wakeup_list_init(struct list_head *list, int *system_level_p)
 		struct acpi_power_resource *resource = entry->resource;
 		acpi_handle handle = resource->device.handle;
 		int result;
-		int state;
+		u8 state;
 
 		mutex_lock(&resource->resource_lock);
 
@@ -789,8 +789,8 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
 
 int acpi_power_get_inferred_state(struct acpi_device *device, int *state)
 {
+	u8 list_state = ACPI_POWER_RESOURCE_STATE_OFF;
 	int result = 0;
-	int list_state = 0;
 	int i = 0;
 
 	if (!device || !state)
@@ -919,7 +919,8 @@ struct acpi_device *acpi_add_power_resource(acpi_handle handle)
 	union acpi_object acpi_object;
 	struct acpi_buffer buffer = { sizeof(acpi_object), &acpi_object };
 	acpi_status status;
-	int state, result = -ENODEV;
+	int result;
+	u8 state;
 
 	acpi_bus_get_device(handle, &device);
 	if (device)
@@ -979,7 +980,8 @@ void acpi_resume_power_resources(void)
 	mutex_lock(&power_resource_list_lock);
 
 	list_for_each_entry(resource, &acpi_power_resource_list, list_node) {
-		int result, state;
+		int result;
+		u8 state;
 
 		mutex_lock(&resource->resource_lock);
 
@@ -1012,7 +1014,8 @@ static void acpi_power_turn_off_if_unused(struct acpi_power_resource *resource,
 		if (resource->users > 0)
 			return;
 	} else {
-		int result, state;
+		int result;
+		u8 state;
 
 		result = acpi_power_get_state(resource->device.handle, &state);
 		if (result || state == ACPI_POWER_RESOURCE_STATE_OFF)

From ca84f18798a457e9a92c52882813901e15a3b38b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 24 May 2021 17:25:23 +0200
Subject: [PATCH 0695/3292] ACPI: power: Save the last known state of each
 power resource

Currently, there are two ways to check the state of an ACPI power
resource and they may not be consistent with each other.  The first
one is to evaluate the power resource's _STA object and the other one
is to check its reference counter value.  However, on some systems
the value returned by _STA may not be consistent with the value of
the power resource's reference counter (for example, on some systems
it returns the same value every time for certain power resources).

Moreover, evaluating _STA is unnecessary overhead for a power
resource for which it has been evaluated already or whose state is
otherwise known, because either the _ON or the _OFF method has been
executed for it.

For this reason, save the state of each power resource in its
struct acpi_power_resource object and use the saved value whenever
its state needs to be checked, except when its stats is unknown, in
which case the _STA method is evaluated for it and the value
returned by that method is saved as the last known state of
the power resource.

Moreover, drop the power resource _STA method evaluation from
acpi_add_power_resource(), so as to avoid doing that unnecessarily
for power resources that will never be used.

Tested-by: Dave Olsthoorn <dave@bewaar.me>
Tested-by: Shujun Wang <wsj20369@163.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/power.c | 50 ++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 106a5d37486c..6aeea7649852 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -53,6 +53,7 @@ struct acpi_power_resource {
 	u32 order;
 	unsigned int ref_count;
 	unsigned int users;
+	u8 state;
 	bool wakeup_enabled;
 	struct mutex resource_lock;
 	struct list_head dependents;
@@ -182,15 +183,12 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
 	return err;
 }
 
-static int acpi_power_get_state(acpi_handle handle, u8 *state)
+static int __get_state(acpi_handle handle, u8 *state)
 {
 	acpi_status status = AE_OK;
 	unsigned long long sta = 0;
 	u8 cur_state;
 
-	if (!handle || !state)
-		return -EINVAL;
-
 	status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
@@ -204,6 +202,20 @@ static int acpi_power_get_state(acpi_handle handle, u8 *state)
 	return 0;
 }
 
+static int acpi_power_get_state(struct acpi_power_resource *resource, u8 *state)
+{
+	if (resource->state == ACPI_POWER_RESOURCE_STATE_UNKNOWN) {
+		int ret;
+
+		ret = __get_state(resource->device.handle, &resource->state);
+		if (ret)
+			return ret;
+	}
+
+	*state = resource->state;
+	return 0;
+}
+
 static int acpi_power_get_list_state(struct list_head *list, u8 *state)
 {
 	struct acpi_power_resource_entry *entry;
@@ -215,11 +227,10 @@ static int acpi_power_get_list_state(struct list_head *list, u8 *state)
 	/* The state of the list is 'on' IFF all resources are 'on'. */
 	list_for_each_entry(entry, list, node) {
 		struct acpi_power_resource *resource = entry->resource;
-		acpi_handle handle = resource->device.handle;
 		int result;
 
 		mutex_lock(&resource->resource_lock);
-		result = acpi_power_get_state(handle, &cur_state);
+		result = acpi_power_get_state(resource, &cur_state);
 		mutex_unlock(&resource->resource_lock);
 		if (result)
 			return result;
@@ -352,8 +363,12 @@ static int __acpi_power_on(struct acpi_power_resource *resource)
 	acpi_status status = AE_OK;
 
 	status = acpi_evaluate_object(resource->device.handle, "_ON", NULL, NULL);
-	if (ACPI_FAILURE(status))
+	if (ACPI_FAILURE(status)) {
+		resource->state = ACPI_POWER_RESOURCE_STATE_UNKNOWN;
 		return -ENODEV;
+	}
+
+	resource->state = ACPI_POWER_RESOURCE_STATE_ON;
 
 	pr_debug("Power resource [%s] turned on\n", resource->name);
 
@@ -405,8 +420,12 @@ static int __acpi_power_off(struct acpi_power_resource *resource)
 
 	status = acpi_evaluate_object(resource->device.handle, "_OFF",
 				      NULL, NULL);
-	if (ACPI_FAILURE(status))
+	if (ACPI_FAILURE(status)) {
+		resource->state = ACPI_POWER_RESOURCE_STATE_UNKNOWN;
 		return -ENODEV;
+	}
+
+	resource->state = ACPI_POWER_RESOURCE_STATE_OFF;
 
 	pr_debug("Power resource [%s] turned off\n", resource->name);
 
@@ -590,13 +609,12 @@ int acpi_power_wakeup_list_init(struct list_head *list, int *system_level_p)
 
 	list_for_each_entry(entry, list, node) {
 		struct acpi_power_resource *resource = entry->resource;
-		acpi_handle handle = resource->device.handle;
 		int result;
 		u8 state;
 
 		mutex_lock(&resource->resource_lock);
 
-		result = acpi_power_get_state(handle, &state);
+		result = acpi_power_get_state(resource, &state);
 		if (result) {
 			mutex_unlock(&resource->resource_lock);
 			return result;
@@ -920,7 +938,6 @@ struct acpi_device *acpi_add_power_resource(acpi_handle handle)
 	struct acpi_buffer buffer = { sizeof(acpi_object), &acpi_object };
 	acpi_status status;
 	int result;
-	u8 state;
 
 	acpi_bus_get_device(handle, &device);
 	if (device)
@@ -947,13 +964,9 @@ struct acpi_device *acpi_add_power_resource(acpi_handle handle)
 
 	resource->system_level = acpi_object.power_resource.system_level;
 	resource->order = acpi_object.power_resource.resource_order;
+	resource->state = ACPI_POWER_RESOURCE_STATE_UNKNOWN;
 
-	result = acpi_power_get_state(handle, &state);
-	if (result)
-		goto err;
-
-	pr_info("%s [%s] (%s)\n", acpi_device_name(device),
-		acpi_device_bid(device), state ? "on" : "off");
+	pr_info("%s [%s]\n", acpi_device_name(device), acpi_device_bid(device));
 
 	device->flags.match_driver = true;
 	result = acpi_device_add(device, acpi_release_power_resource);
@@ -985,7 +998,8 @@ void acpi_resume_power_resources(void)
 
 		mutex_lock(&resource->resource_lock);
 
-		result = acpi_power_get_state(resource->device.handle, &state);
+		resource->state = ACPI_POWER_RESOURCE_STATE_UNKNOWN;
+		result = acpi_power_get_state(resource, &state);
 		if (result) {
 			mutex_unlock(&resource->resource_lock);
 			continue;

From 6381195ad7d06ef979528c7452f3ff93659f86b1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 24 May 2021 17:26:16 +0200
Subject: [PATCH 0696/3292] ACPI: power: Rework turning off unused power
 resources

Make turning off unused power resources (after the enumeration of
devices and during system-wide resume from S3) more straightforward
by using the observation that the power resource state stored in
struct acpi_power_resource can be used to determine whether or not
the give power resource has any users.

Namely, when the state of the power resource is unknown, its _STA
method has never been evaluated (or the evaluation of it has failed)
and its _ON and _OFF methods have never been executed (or they have
failed to execute), so for all practical purposes it can be assumed
to have no users (or to be unusable).  Therefore, instead of checking
the number of power resource users, it is sufficient to check if its
state is known.

Moreover, if the last known state of a given power resource is "off",
it is not necessary to turn it off, because it has been used to
initialize the power state or the wakeup power resources list of at
least one device and either its _STA method has returned 0 ("off"),
or its _OFF method has been successfully executed already.

Accordingly, modify acpi_turn_off_unused_power_resources() to do the
above checks (which are suitable for both uses of it) instead of
using the number of power resource users or evaluating its _STA
method, drop its argument (which is not useful any more) and update
its callers.

Also drop the users field from struct acpi_power_resource as it is
not useful any more.

Tested-by: Dave Olsthoorn <dave@bewaar.me>
Tested-by: Shujun Wang <wsj20369@163.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h |  2 +-
 drivers/acpi/power.c    | 45 ++++++++++-------------------------------
 drivers/acpi/scan.c     |  2 +-
 drivers/acpi/sleep.c    |  2 +-
 4 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index e21611c9a170..bc05a7c976f8 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -142,7 +142,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
 int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
 int acpi_power_on_resources(struct acpi_device *device, int state);
 int acpi_power_transition(struct acpi_device *device, int state);
-void acpi_turn_off_unused_power_resources(bool init);
+void acpi_turn_off_unused_power_resources(void);
 
 /* --------------------------------------------------------------------------
                               Device Power Management
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 6aeea7649852..f145f645ab06 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -52,7 +52,6 @@ struct acpi_power_resource {
 	u32 system_level;
 	u32 order;
 	unsigned int ref_count;
-	unsigned int users;
 	u8 state;
 	bool wakeup_enabled;
 	struct mutex resource_lock;
@@ -174,8 +173,6 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
 		err = acpi_power_resources_list_add(rhandle, list);
 		if (err)
 			break;
-
-		to_power_resource(rdev)->users++;
 	}
 	if (err)
 		acpi_power_resources_list_free(list);
@@ -1018,39 +1015,10 @@ void acpi_resume_power_resources(void)
 }
 #endif
 
-static void acpi_power_turn_off_if_unused(struct acpi_power_resource *resource,
-				       bool init)
-{
-	if (resource->ref_count > 0)
-		return;
-
-	if (init) {
-		if (resource->users > 0)
-			return;
-	} else {
-		int result;
-		u8 state;
-
-		result = acpi_power_get_state(resource->device.handle, &state);
-		if (result || state == ACPI_POWER_RESOURCE_STATE_OFF)
-			return;
-	}
-
-	dev_info(&resource->device.dev, "Turning OFF\n");
-	__acpi_power_off(resource);
-}
-
 /**
  * acpi_turn_off_unused_power_resources - Turn off power resources not in use.
- * @init: Control switch.
- *
- * If @ainit is set, unconditionally turn off all of the ACPI power resources
- * without any users.
- *
- * Otherwise, turn off all ACPI power resources without active references (that
- * is, the ones that should be "off" at the moment) that are "on".
  */
-void acpi_turn_off_unused_power_resources(bool init)
+void acpi_turn_off_unused_power_resources(void)
 {
 	struct acpi_power_resource *resource;
 
@@ -1059,7 +1027,16 @@ void acpi_turn_off_unused_power_resources(bool init)
 	list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) {
 		mutex_lock(&resource->resource_lock);
 
-		acpi_power_turn_off_if_unused(resource, init);
+		/*
+		 * Turn off power resources in an unknown state too, because the
+		 * platform firmware on some system expects the OS to turn off
+		 * power resources without any users unconditionally.
+		 */
+		if (!resource->ref_count &&
+		    resource->state != ACPI_POWER_RESOURCE_STATE_OFF) {
+			dev_info(&resource->device.dev, "Turning OFF\n");
+			__acpi_power_off(resource);
+		}
 
 		mutex_unlock(&resource->resource_lock);
 	}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e10d38ac7cf2..453eff8ec8c3 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2360,7 +2360,7 @@ int __init acpi_scan_init(void)
 		}
 	}
 
-	acpi_turn_off_unused_power_resources(true);
+	acpi_turn_off_unused_power_resources();
 
 	acpi_scan_initialized = true;
 
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index df386571da98..09fd13757b65 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -504,7 +504,7 @@ static void acpi_pm_start(u32 acpi_state)
  */
 static void acpi_pm_end(void)
 {
-	acpi_turn_off_unused_power_resources(false);
+	acpi_turn_off_unused_power_resources();
 	acpi_scan_lock_release();
 	/*
 	 * This is necessary in case acpi_pm_finish() is not called during a

From c58e7ed28b4534ed073371843d03c433d6a9fe34 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 26 May 2021 12:22:51 -0400
Subject: [PATCH 0697/3292] PM: runtime: document common mistake with
 pm_runtime_get_sync()

pm_runtime_get_sync(), contradictory to intuition, does not drop the
runtime PM usage counter on errors which lead to several wrong usages in
drivers (missing the put).  pm_runtime_resume_and_get() was added as a
better implementation so document the preference of using it, hoping it
will stop bad patterns.

Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
[ rjw: Documentation change edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.rst | 6 +++++-
 include/linux/pm_runtime.h         | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
index 18ae21bf7f92..b48cac5f9048 100644
--- a/Documentation/power/runtime_pm.rst
+++ b/Documentation/power/runtime_pm.rst
@@ -378,7 +378,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
 
   `int pm_runtime_get_sync(struct device *dev);`
     - increment the device's usage counter, run pm_runtime_resume(dev) and
-      return its result
+      return its result;
+      note that it does not drop the device's usage counter on errors, so
+      consider using pm_runtime_resume_and_get() instead of it, especially
+      if its return value is checked by the caller, as this is likely to
+      result in cleaner code.
 
   `int pm_runtime_get_if_in_use(struct device *dev);`
     - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 6c08a085367b..aab8b35e9f8a 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -380,6 +380,9 @@ static inline int pm_runtime_get(struct device *dev)
  * The possible return values of this function are the same as for
  * pm_runtime_resume() and the runtime PM usage counter of @dev remains
  * incremented in all cases, even if it returns an error code.
+ * Consider using pm_runtime_resume_and_get() instead of it, especially
+ * if its return value is checked by the caller, as this is likely to result
+ * in cleaner code.
  */
 static inline int pm_runtime_get_sync(struct device *dev)
 {

From 695efefb2e31cade92864d736596d3fbcd563b60 Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Sun, 30 May 2021 04:53:28 +0000
Subject: [PATCH 0698/3292] microblaze: Cleanup unused functions

These functions haven't been used, so just remove them. The patch
just uses grep to verify.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Michal Simek <monstr@monstr.eu>
Link: https://lore.kernel.org/r/1622350408-44875-3-git-send-email-guoren@kernel.org
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/include/asm/page.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index bf681f272f72..ce550978f4fc 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -35,9 +35,6 @@
 
 #define ARCH_SLAB_MINALIGN	L1_CACHE_BYTES
 
-#define PAGE_UP(addr)	(((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
-#define PAGE_DOWN(addr)	((addr)&(~((PAGE_SIZE)-1)))
-
 /*
  * PAGE_OFFSET -- the first address of the first page of memory. With MMU
  * it is set to the kernel start address (aligned on a page boundary).

From 1fa98d96ea0ff6c8770eeba90417aab4b4e07f52 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 14:58:25 +0800
Subject: [PATCH 0699/3292] clockevents: Use DEVICE_ATTR_[RO|WO] macros

Use the DEVICE_ATTR_[RO|WO] helpers instead of plain DEVICE_ATTR, which
makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210523065825.19684-1-yuehaibing@huawei.com
---
 kernel/time/clockevents.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f5490222e134..0056d2bed53e 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -668,9 +668,9 @@ static struct bus_type clockevents_subsys = {
 static DEFINE_PER_CPU(struct device, tick_percpu_dev);
 static struct tick_device *tick_get_tick_dev(struct device *dev);
 
-static ssize_t sysfs_show_current_tick_dev(struct device *dev,
-					   struct device_attribute *attr,
-					   char *buf)
+static ssize_t current_device_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
 {
 	struct tick_device *td;
 	ssize_t count = 0;
@@ -682,12 +682,12 @@ static ssize_t sysfs_show_current_tick_dev(struct device *dev,
 	raw_spin_unlock_irq(&clockevents_lock);
 	return count;
 }
-static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
+static DEVICE_ATTR_RO(current_device);
 
 /* We don't support the abomination of removable broadcast devices */
-static ssize_t sysfs_unbind_tick_dev(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
+static ssize_t unbind_device_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
 {
 	char name[CS_NAME_LEN];
 	ssize_t ret = sysfs_get_uname(buf, name, count);
@@ -714,7 +714,7 @@ static ssize_t sysfs_unbind_tick_dev(struct device *dev,
 	mutex_unlock(&clockevents_mutex);
 	return ret ? ret : count;
 }
-static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
+static DEVICE_ATTR_WO(unbind_device);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static struct device tick_bc_dev = {

From c2d4fee3f6d170dee5ee7c337a0ba5e92fad7a64 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:14 +0100
Subject: [PATCH 0700/3292] tick/broadcast: Drop unneeded
 CONFIG_GENERIC_CLOCKEVENTS_BROADCAST guard

tick-broadcast.o is only built if CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
so remove the redundant #ifdef guards around the definition of
tick_receive_broadcast().

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-2-will@kernel.org
---
 kernel/time/tick-broadcast.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index a44055228796..fb794ff4855e 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -253,7 +253,6 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	return ret;
 }
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 int tick_receive_broadcast(void)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
@@ -268,7 +267,6 @@ int tick_receive_broadcast(void)
 	evt->event_handler(evt);
 	return 0;
 }
-#endif
 
 /*
  * Broadcast the event to the cpus, which are set in the mask (mangled).

From e5007c288e7981e0b0cf8ea3dea443f0b8c34345 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:15 +0100
Subject: [PATCH 0701/3292] tick/broadcast: Split
 __tick_broadcast_oneshot_control() into a helper

In preparation for adding support for per-cpu wakeup timers, split
_tick_broadcast_oneshot_control() into a helper function which deals
only with the broadcast timer management across idle transitions.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-3-will@kernel.org
---
 kernel/time/tick-broadcast.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index fb794ff4855e..f3f2f4ba4321 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -717,24 +717,16 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
 	clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state,
+					     struct tick_device *td,
+					     int cpu)
 {
-	struct clock_event_device *bc, *dev;
-	int cpu, ret = 0;
+	struct clock_event_device *bc, *dev = td->evtdev;
+	int ret = 0;
 	ktime_t now;
 
-	/*
-	 * If there is no broadcast device, tell the caller not to go
-	 * into deep idle.
-	 */
-	if (!tick_broadcast_device.evtdev)
-		return -EBUSY;
-
-	dev = this_cpu_ptr(&tick_cpu_device)->evtdev;
-
 	raw_spin_lock(&tick_broadcast_lock);
 	bc = tick_broadcast_device.evtdev;
-	cpu = smp_processor_id();
 
 	if (state == TICK_BROADCAST_ENTER) {
 		/*
@@ -863,6 +855,21 @@ out:
 	return ret;
 }
 
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+	int cpu = smp_processor_id();
+
+	if (tick_broadcast_device.evtdev)
+		return ___tick_broadcast_oneshot_control(state, td, cpu);
+
+	/*
+	 * If there is no broadcast device, tell the caller not
+	 * to go into deep idle.
+	 */
+	return -EBUSY;
+}
+
 /*
  * Reset the one shot broadcast for a cpu
  *

From c94a8537df12708cc03da9120c3c3561ae744ce1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:16 +0100
Subject: [PATCH 0702/3292] tick/broadcast: Prefer per-cpu oneshot wakeup
 timers to broadcast

Some SoCs have two per-cpu timer implementations where the timer with the
higher rating stops in deep idle (i.e. suffers from CLOCK_EVT_FEAT_C3STOP)
but is otherwise preferable to the timer with the lower rating. In such a
design, selecting the higher rated devices relies on a global broadcast
timer and IPIs to wake up from deep idle states.

To avoid the reliance on a global broadcast timer and also to reduce the
overhead associated with the IPI wakeups, extend
tick_install_broadcast_device() to manage per-cpu wakeup timers separately
from the broadcast device.

For now, these timers remain unused.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-4-will@kernel.org
---
 kernel/time/tick-broadcast.c | 59 +++++++++++++++++++++++++++++++++++-
 kernel/time/tick-common.c    |  2 +-
 kernel/time/tick-internal.h  |  4 +--
 3 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f3f2f4ba4321..0e9e06d6cc5c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -33,6 +33,8 @@ static int tick_broadcast_forced;
 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 
 #ifdef CONFIG_TICK_ONESHOT
+static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
+
 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
@@ -88,13 +90,65 @@ static bool tick_check_broadcast_device(struct clock_event_device *curdev,
 	return !curdev || newdev->rating > curdev->rating;
 }
 
+#ifdef CONFIG_TICK_ONESHOT
+static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
+{
+	return per_cpu(tick_oneshot_wakeup_device, cpu);
+}
+
+static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
+					   int cpu)
+{
+	struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu);
+
+	if (!newdev)
+		goto set_device;
+
+	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
+	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
+		 return false;
+
+	if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
+	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
+		return false;
+
+	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
+		return false;
+
+	if (curdev && newdev->rating <= curdev->rating)
+		return false;
+
+	if (!try_module_get(newdev->owner))
+		return false;
+
+set_device:
+	clockevents_exchange_device(curdev, newdev);
+	per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
+	return true;
+}
+#else
+static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
+{
+	return NULL;
+}
+
+static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
+					   int cpu)
+{
+	return false;
+}
+#endif
+
 /*
  * Conditionally install/replace broadcast device
  */
-void tick_install_broadcast_device(struct clock_event_device *dev)
+void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
 {
 	struct clock_event_device *cur = tick_broadcast_device.evtdev;
 
+	if (tick_set_oneshot_wakeup_device(dev, cpu))
+		return;
+
 	if (!tick_check_broadcast_device(cur, dev))
 		return;
 
@@ -996,6 +1050,9 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
  */
 static void tick_broadcast_oneshot_offline(unsigned int cpu)
 {
+	if (tick_get_oneshot_wakeup_device(cpu))
+		tick_set_oneshot_wakeup_device(NULL, cpu);
+
 	/*
 	 * Clear the broadcast masks for the dead cpu, but do not stop
 	 * the broadcast device!
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index e15bc0ef1912..d663249652ef 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -373,7 +373,7 @@ out_bc:
 	/*
 	 * Can the new device be used as a broadcast device ?
 	 */
-	tick_install_broadcast_device(newdev);
+	tick_install_broadcast_device(newdev, cpu);
 }
 
 /**
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 7a981c9e87a4..30c89639e305 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -61,7 +61,7 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 /* Broadcasting support */
 # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
-extern void tick_install_broadcast_device(struct clock_event_device *dev);
+extern void tick_install_broadcast_device(struct clock_event_device *dev, int cpu);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
 extern void tick_suspend_broadcast(void);
 extern void tick_resume_broadcast(void);
@@ -72,7 +72,7 @@ extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
 extern struct tick_device *tick_get_broadcast_device(void);
 extern struct cpumask *tick_get_broadcast_mask(void);
 # else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
-static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
+static inline void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) { }
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
 static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }

From ea5c7f1b9aa1a7c9d1bb9440084ac1256789fadb Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:17 +0100
Subject: [PATCH 0703/3292] tick/broadcast: Program wakeup timer when entering
 idle if required

When configuring the broadcast timer on entry to and exit from deep idle
states, prefer a per-CPU wakeup timer if one exists.

On entry to idle, stop the tick device and transfer the next event into
the oneshot wakeup device, which will serve as the wakeup from idle. To
avoid the overhead of additional hardware accesses on exit from idle,
leave the timer armed and treat the inevitable interrupt as a (possibly
spurious) tick event.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-5-will@kernel.org
---
 kernel/time/tick-broadcast.c | 44 +++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 0e9e06d6cc5c..9b845212430b 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -96,6 +96,15 @@ static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
 	return per_cpu(tick_oneshot_wakeup_device, cpu);
 }
 
+static void tick_oneshot_wakeup_handler(struct clock_event_device *wd)
+{
+	/*
+	 * If we woke up early and the tick was reprogrammed in the
+	 * meantime then this may be spurious but harmless.
+	 */
+	tick_receive_broadcast();
+}
+
 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
 					   int cpu)
 {
@@ -121,6 +130,7 @@ static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
 	if (!try_module_get(newdev->owner))
 		return false;
 
+	newdev->event_handler = tick_oneshot_wakeup_handler;
 set_device:
 	clockevents_exchange_device(curdev, newdev);
 	per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
@@ -909,16 +919,48 @@ out:
 	return ret;
 }
 
+static int tick_oneshot_wakeup_control(enum tick_broadcast_state state,
+				       struct tick_device *td,
+				       int cpu)
+{
+	struct clock_event_device *dev, *wd;
+
+	dev = td->evtdev;
+	if (td->mode != TICKDEV_MODE_ONESHOT)
+		return -EINVAL;
+
+	wd = tick_get_oneshot_wakeup_device(cpu);
+	if (!wd)
+		return -ENODEV;
+
+	switch (state) {
+	case TICK_BROADCAST_ENTER:
+		clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
+		clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT);
+		clockevents_program_event(wd, dev->next_event, 1);
+		break;
+	case TICK_BROADCAST_EXIT:
+		/* We may have transitioned to oneshot mode while idle */
+		if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT)
+			return -ENODEV;
+	}
+
+	return 0;
+}
+
 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 	int cpu = smp_processor_id();
 
+	if (!tick_oneshot_wakeup_control(state, td, cpu))
+		return 0;
+
 	if (tick_broadcast_device.evtdev)
 		return ___tick_broadcast_oneshot_control(state, td, cpu);
 
 	/*
-	 * If there is no broadcast device, tell the caller not
+	 * If there is no broadcast or wakeup device, tell the caller not
 	 * to go into deep idle.
 	 */
 	return -EBUSY;

From 245a057fee18be08d6ac12357463579d06bea077 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:18 +0100
Subject: [PATCH 0704/3292] timer_list: Print name of per-cpu wakeup device

With the introduction of per-cpu wakeup devices that can be used in
preference to the broadcast timer, print the name of such devices when
they are available.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-6-will@kernel.org
---
 kernel/time/tick-broadcast.c |  7 +++++++
 kernel/time/tick-internal.h  |  1 +
 kernel/time/timer_list.c     | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 9b845212430b..f7fe6fe36173 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -63,6 +63,13 @@ struct cpumask *tick_get_broadcast_mask(void)
 	return tick_broadcast_mask;
 }
 
+static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu);
+
+const struct clock_event_device *tick_get_wakeup_device(int cpu)
+{
+	return tick_get_oneshot_wakeup_device(cpu);
+}
+
 /*
  * Start the device in periodic mode
  */
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 30c89639e305..6a742a29e545 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -71,6 +71,7 @@ extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadc
 extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
 extern struct tick_device *tick_get_broadcast_device(void);
 extern struct cpumask *tick_get_broadcast_mask(void);
+extern const struct clock_event_device *tick_get_wakeup_device(int cpu);
 # else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
 static inline void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) { }
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 6939140ab7c5..ed7d6ad694fb 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,14 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	SEQ_printf(m, " event_handler:  %ps\n", dev->event_handler);
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, " retries:        %lu\n", dev->retries);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	if (cpu >= 0) {
+		const struct clock_event_device *wd = tick_get_wakeup_device(cpu);
+
+		SEQ_printf(m, "Wakeup Device: %s\n", wd ? wd->name : "<NULL>");
+	}
+#endif
 	SEQ_printf(m, "\n");
 }
 
@@ -248,7 +256,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)
 
 static inline void timer_list_header(struct seq_file *m, u64 now)
 {
-	SEQ_printf(m, "Timer List Version: v0.8\n");
+	SEQ_printf(m, "Timer List Version: v0.9\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 	SEQ_printf(m, "\n");

From 9571289ddf71694de0e023afc5e88d90cfd067b5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:35:12 +0200
Subject: [PATCH 0705/3292] nfc: fdp: drop ftrace-like debugging messages

Now that the kernel has ftrace, any debugging calls that just do "made
it to this function!" and "leaving this function!" can be removed.
Better to use standard debugging tools.

This allows also to remove several local variables and entire
fdp_nci_recv_frame() function (whose purpose was only to log).

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073522.6720-1-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/fdp/fdp.c | 31 -------------------------------
 drivers/nfc/fdp/fdp.h |  1 -
 drivers/nfc/fdp/i2c.c | 12 +-----------
 3 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 125d71c27b8b..7863b2536999 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -237,28 +237,18 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
 static int fdp_nci_open(struct nci_dev *ndev)
 {
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
-	struct device *dev = &info->phy->i2c_dev->dev;
-
-	dev_dbg(dev, "%s\n", __func__);
 
 	return info->phy_ops->enable(info->phy);
 }
 
 static int fdp_nci_close(struct nci_dev *ndev)
 {
-	struct fdp_nci_info *info = nci_get_drvdata(ndev);
-	struct device *dev = &info->phy->i2c_dev->dev;
-
-	dev_dbg(dev, "%s\n", __func__);
 	return 0;
 }
 
 static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
-	struct device *dev = &info->phy->i2c_dev->dev;
-
-	dev_dbg(dev, "%s\n", __func__);
 
 	if (atomic_dec_and_test(&info->data_pkt_counter))
 		info->data_pkt_counter_cb(ndev);
@@ -266,16 +256,6 @@ static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
 	return info->phy_ops->write(info->phy, skb);
 }
 
-int fdp_nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
-{
-	struct fdp_nci_info *info = nci_get_drvdata(ndev);
-	struct device *dev = &info->phy->i2c_dev->dev;
-
-	dev_dbg(dev, "%s\n", __func__);
-	return nci_recv_frame(ndev, skb);
-}
-EXPORT_SYMBOL(fdp_nci_recv_frame);
-
 static int fdp_nci_request_firmware(struct nci_dev *ndev)
 {
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
@@ -476,8 +456,6 @@ static int fdp_nci_setup(struct nci_dev *ndev)
 	int r;
 	u8 patched = 0;
 
-	dev_dbg(dev, "%s\n", __func__);
-
 	r = nci_core_init(ndev);
 	if (r)
 		goto error;
@@ -585,9 +563,7 @@ static int fdp_nci_core_reset_ntf_packet(struct nci_dev *ndev,
 					  struct sk_buff *skb)
 {
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
-	struct device *dev = &info->phy->i2c_dev->dev;
 
-	dev_dbg(dev, "%s\n", __func__);
 	info->setup_reset_ntf = 1;
 	wake_up(&info->setup_wq);
 
@@ -598,9 +574,7 @@ static int fdp_nci_prop_patch_ntf_packet(struct nci_dev *ndev,
 					  struct sk_buff *skb)
 {
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
-	struct device *dev = &info->phy->i2c_dev->dev;
 
-	dev_dbg(dev, "%s\n", __func__);
 	info->setup_patch_ntf = 1;
 	info->setup_patch_status = skb->data[0];
 	wake_up(&info->setup_wq);
@@ -773,11 +747,6 @@ EXPORT_SYMBOL(fdp_nci_probe);
 
 void fdp_nci_remove(struct nci_dev *ndev)
 {
-	struct fdp_nci_info *info = nci_get_drvdata(ndev);
-	struct device *dev = &info->phy->i2c_dev->dev;
-
-	dev_dbg(dev, "%s\n", __func__);
-
 	nci_unregister_device(ndev);
 	nci_free_device(ndev);
 }
diff --git a/drivers/nfc/fdp/fdp.h b/drivers/nfc/fdp/fdp.h
index 9bd1f3f23e2d..ead3b21ccae6 100644
--- a/drivers/nfc/fdp/fdp.h
+++ b/drivers/nfc/fdp/fdp.h
@@ -25,6 +25,5 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
 		  struct nci_dev **ndev, int tx_headroom, int tx_tailroom,
 		  u8 clock_type, u32 clock_freq, u8 *fw_vsc_cfg);
 void fdp_nci_remove(struct nci_dev *ndev);
-int fdp_nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 
 #endif /* __LOCAL_FDP_H_ */
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index 997e0806821a..c5596e514648 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -49,7 +49,6 @@ static int fdp_nci_i2c_enable(void *phy_id)
 {
 	struct fdp_i2c_phy *phy = phy_id;
 
-	dev_dbg(&phy->i2c_dev->dev, "%s\n", __func__);
 	fdp_nci_i2c_reset(phy);
 
 	return 0;
@@ -59,7 +58,6 @@ static void fdp_nci_i2c_disable(void *phy_id)
 {
 	struct fdp_i2c_phy *phy = phy_id;
 
-	dev_dbg(&phy->i2c_dev->dev, "%s\n", __func__);
 	fdp_nci_i2c_reset(phy);
 }
 
@@ -197,7 +195,6 @@ flush:
 static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id)
 {
 	struct fdp_i2c_phy *phy = phy_id;
-	struct i2c_client *client;
 	struct sk_buff *skb;
 	int r;
 
@@ -206,9 +203,6 @@ static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id)
 		return IRQ_NONE;
 	}
 
-	client = phy->i2c_dev;
-	dev_dbg(&client->dev, "%s\n", __func__);
-
 	r = fdp_nci_i2c_read(phy, &skb);
 
 	if (r == -EREMOTEIO)
@@ -217,7 +211,7 @@ static irqreturn_t fdp_nci_i2c_irq_thread_fn(int irq, void *phy_id)
 		return IRQ_HANDLED;
 
 	if (skb != NULL)
-		fdp_nci_recv_frame(phy->ndev, skb);
+		nci_recv_frame(phy->ndev, skb);
 
 	return IRQ_HANDLED;
 }
@@ -288,8 +282,6 @@ static int fdp_nci_i2c_probe(struct i2c_client *client)
 	u32 clock_freq;
 	int r = 0;
 
-	dev_dbg(dev, "%s\n", __func__);
-
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		nfc_err(dev, "No I2C_FUNC_I2C support\n");
 		return -ENODEV;
@@ -351,8 +343,6 @@ static int fdp_nci_i2c_remove(struct i2c_client *client)
 {
 	struct fdp_i2c_phy *phy = i2c_get_clientdata(client);
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-
 	fdp_nci_remove(phy->ndev);
 	fdp_nci_i2c_disable(phy);
 

From 6a7fdad7021b3eed9c6d68a483dc8f294accd1d5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:35:13 +0200
Subject: [PATCH 0706/3292] nfc: mei_phy: drop ftrace-like debugging messages

Now that the kernel has ftrace, any debugging calls that just do "made
it to this function!" and "leaving this function!" can be removed.
Better to use standard debugging tools.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073522.6720-2-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/mei_phy.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c
index 0f43bb389566..e56cea716cd2 100644
--- a/drivers/nfc/mei_phy.c
+++ b/drivers/nfc/mei_phy.c
@@ -98,8 +98,6 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy)
 	size_t if_version_length;
 	int bytes_recv, r;
 
-	pr_info("%s\n", __func__);
-
 	memset(&cmd, 0, sizeof(struct mei_nfc_cmd));
 	cmd.hdr.cmd = MEI_NFC_CMD_MAINTENANCE;
 	cmd.hdr.data_size = 1;
@@ -146,8 +144,6 @@ static int mei_nfc_connect(struct nfc_mei_phy *phy)
 	size_t connect_length, connect_resp_length;
 	int bytes_recv, r;
 
-	pr_info("%s\n", __func__);
-
 	connect_length = sizeof(struct mei_nfc_cmd) +
 			sizeof(struct mei_nfc_connect);
 
@@ -320,8 +316,6 @@ static int nfc_mei_phy_enable(void *phy_id)
 	int r;
 	struct nfc_mei_phy *phy = phy_id;
 
-	pr_info("%s\n", __func__);
-
 	if (phy->powered == 1)
 		return 0;
 
@@ -363,8 +357,6 @@ static void nfc_mei_phy_disable(void *phy_id)
 {
 	struct nfc_mei_phy *phy = phy_id;
 
-	pr_info("%s\n", __func__);
-
 	mei_cldev_disable(phy->cldev);
 
 	phy->powered = 0;

From be3d162ad02303f7bcb00b99b010fd4babe630b0 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:35:14 +0200
Subject: [PATCH 0707/3292] nfc: mrvl: use SPDX-License-Identifier

Use SPDX-License-Identifier: GPL-2.0-only, instead of hand writing it.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073522.6720-3-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/nfcmrvl/fw_dnld.c | 13 +------------
 drivers/nfc/nfcmrvl/fw_dnld.h | 15 ++-------------
 drivers/nfc/nfcmrvl/i2c.c     | 15 ++-------------
 drivers/nfc/nfcmrvl/main.c    | 13 +------------
 drivers/nfc/nfcmrvl/nfcmrvl.h | 15 ++-------------
 drivers/nfc/nfcmrvl/spi.c     | 15 ++-------------
 drivers/nfc/nfcmrvl/uart.c    | 13 +------------
 drivers/nfc/nfcmrvl/usb.c     | 15 ++-------------
 8 files changed, 13 insertions(+), 101 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index 52c8ae504e32..05df7ad224d5 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell NFC driver: Firmware downloader
  *
  * Copyright (C) 2015, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
  */
 
 #include <linux/module.h>
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.h b/drivers/nfc/nfcmrvl/fw_dnld.h
index 058ce77b3cbc..7c4d91b01910 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.h
+++ b/drivers/nfc/nfcmrvl/fw_dnld.h
@@ -1,20 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Marvell NFC driver: Firmware downloader
  *
  * Copyright (C) 2015, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
- **/
+ */
 
 #ifndef __NFCMRVL_FW_DNLD_H__
 #define __NFCMRVL_FW_DNLD_H__
diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index 3c9bbee98237..59a529e72d96 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell NFC-over-I2C driver: I2C interface related functions
  *
  * Copyright (C) 2015, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
- **/
+ */
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index 529be35ac178..a4620b480c4f 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell NFC driver: major functions
  *
  * Copyright (C) 2014-2015 Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
  */
 
 #include <linux/module.h>
diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h
index e84ee18c73ae..0b4220bb91bc 100644
--- a/drivers/nfc/nfcmrvl/nfcmrvl.h
+++ b/drivers/nfc/nfcmrvl/nfcmrvl.h
@@ -1,20 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Marvell NFC driver
  *
  * Copyright (C) 2014-2015, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
- **/
+ */
 
 #ifndef _NFCMRVL_H_
 #define _NFCMRVL_H_
diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
index 0647b85930a6..66696321c645 100644
--- a/drivers/nfc/nfcmrvl/spi.c
+++ b/drivers/nfc/nfcmrvl/spi.c
@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell NFC-over-SPI driver: SPI interface related functions
  *
  * Copyright (C) 2015, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
- **/
+ */
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 7194dd7ef0f1..d7ba5b5c653c 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell NFC-over-UART driver
  *
  * Copyright (C) 2015, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
  */
 
 #include <linux/module.h>
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index bcd563cb556c..50f06dd1ba25 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell NFC-over-USB driver: USB interface related functions
  *
  * Copyright (C) 2014, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License").  You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
- * this warranty disclaimer.
- **/
+ */
 
 #include <linux/module.h>
 #include <linux/usb.h>

From 8f99528ef5e2aaee5b17bd97271a4e1dd4ee9b35 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:35:15 +0200
Subject: [PATCH 0708/3292] nfc: mrvl: correct minor coding style violations

Correct block comments and usage of tab in function definition.  No
functional change.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073522.6720-4-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/nfcmrvl/fw_dnld.c | 12 ++++++------
 drivers/nfc/nfcmrvl/nfcmrvl.h | 12 ++++++------
 drivers/nfc/nfcmrvl/uart.c    | 16 ++++++++--------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c
index 05df7ad224d5..aaccb8b76b3e 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -39,8 +39,8 @@ enum {
 };
 
 /*
-** Patterns for responses
-*/
+ * Patterns for responses
+ */
 
 static const uint8_t nci_pattern_core_reset_ntf[] = {
 	0x60, 0x00, 0x02, 0xA0, 0x01
@@ -440,7 +440,7 @@ static void fw_dnld_rx_work(struct work_struct *work)
 	}
 }
 
-int	nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv)
+int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv)
 {
 	char name[32];
 
@@ -454,13 +454,13 @@ int	nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv)
 	return 0;
 }
 
-void	nfcmrvl_fw_dnld_deinit(struct nfcmrvl_private *priv)
+void nfcmrvl_fw_dnld_deinit(struct nfcmrvl_private *priv)
 {
 	destroy_workqueue(priv->fw_dnld.rx_wq);
 }
 
-void	nfcmrvl_fw_dnld_recv_frame(struct nfcmrvl_private *priv,
-				   struct sk_buff *skb)
+void nfcmrvl_fw_dnld_recv_frame(struct nfcmrvl_private *priv,
+				struct sk_buff *skb)
 {
 	/* Discard command timer */
 	if (timer_pending(&priv->ndev->cmd_timer))
diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h
index 0b4220bb91bc..a715543bc9bf 100644
--- a/drivers/nfc/nfcmrvl/nfcmrvl.h
+++ b/drivers/nfc/nfcmrvl/nfcmrvl.h
@@ -25,16 +25,16 @@
 #define NFCMRVL_NCI_MAX_EVENT_SIZE		260
 
 /*
-** NCI FW Parmaters
-*/
+ * NCI FW Parameters
+ */
 
 #define NFCMRVL_PB_BAIL_OUT			0x11
 #define NFCMRVL_PROP_REF_CLOCK			0xF0
 #define NFCMRVL_PROP_SET_HI_CONFIG		0xF1
 
 /*
-** HCI defines
-*/
+ * HCI defines
+ */
 
 #define NFCMRVL_HCI_EVENT_HEADER_SIZE		0x04
 #define NFCMRVL_HCI_EVENT_CODE			0x04
@@ -67,8 +67,8 @@ struct nfcmrvl_private {
 	bool support_fw_dnld;
 
 	/*
-	** PHY related information
-	*/
+	 * PHY related information
+	 */
 
 	/* PHY driver context */
 	void *drv_data;
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index d7ba5b5c653c..ed85645eb885 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -18,8 +18,8 @@ static unsigned int break_control;
 static int reset_n_io = -EINVAL;
 
 /*
-** NFCMRVL NCI OPS
-*/
+ * NFCMRVL NCI OPS
+ */
 
 static int nfcmrvl_uart_nci_open(struct nfcmrvl_private *priv)
 {
@@ -92,8 +92,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
 }
 
 /*
-** NCI UART OPS
-*/
+ * NCI UART OPS
+ */
 
 static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
 {
@@ -167,10 +167,10 @@ static void nfcmrvl_nci_uart_tx_done(struct nci_uart *nu)
 		return;
 
 	/*
-	** To ensure that if the NFCC goes in DEEP SLEEP sate we can wake him
-	** up. we set BREAK. Once we will be ready to send again we will remove
-	** it.
-	*/
+	 * To ensure that if the NFCC goes in DEEP SLEEP sate we can wake him
+	 * up. we set BREAK. Once we will be ready to send again we will remove
+	 * it.
+	 */
 	if (priv->config.break_control && nu->tty->ops->break_ctl) {
 		nu->tty->ops->break_ctl(nu->tty, -1);
 		usleep_range(1000, 3000);

From e3bf5531e6e6d5a7a498cb21079a135726fd7fb8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:38:56 +0200
Subject: [PATCH 0709/3292] nfc: mrvl: simplify with module_driver

Remove standard module init/exit boilerplate with module_driver() which
also annotates the functions with __init.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073902.7111-1-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/nfcmrvl/uart.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index ed85645eb885..50d86c90b9dd 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -189,23 +189,7 @@ static struct nci_uart nfcmrvl_nci_uart = {
 		.tx_done	= nfcmrvl_nci_uart_tx_done,
 	}
 };
-
-/*
-** Module init
-*/
-
-static int nfcmrvl_uart_init_module(void)
-{
-	return nci_uart_register(&nfcmrvl_nci_uart);
-}
-
-static void nfcmrvl_uart_exit_module(void)
-{
-	nci_uart_unregister(&nfcmrvl_nci_uart);
-}
-
-module_init(nfcmrvl_uart_init_module);
-module_exit(nfcmrvl_uart_exit_module);
+module_driver(nfcmrvl_nci_uart, nci_uart_register, nci_uart_unregister);
 
 MODULE_AUTHOR("Marvell International Ltd.");
 MODULE_DESCRIPTION("Marvell NFC-over-UART");

From 62f64417afd6babfa3a45800c11b5a8fce447c71 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:38:57 +0200
Subject: [PATCH 0710/3292] nfc: pn533: drop ftrace-like debugging messages

Now that the kernel has ftrace, any debugging calls that just do "made
it to this function!" and "leaving this function!" can be removed.
Better to use standard debugging tools.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073902.7111-2-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn533/i2c.c   |  5 -----
 drivers/nfc/pn533/pn533.c | 46 ---------------------------------------
 drivers/nfc/pn533/usb.c   |  4 ----
 3 files changed, 55 deletions(-)

diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c
index bfc617acabae..bb04fddb0504 100644
--- a/drivers/nfc/pn533/i2c.c
+++ b/drivers/nfc/pn533/i2c.c
@@ -174,9 +174,6 @@ static int pn533_i2c_probe(struct i2c_client *client,
 	struct pn533 *priv;
 	int r = 0;
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
-
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
 		return -ENODEV;
@@ -239,8 +236,6 @@ static int pn533_i2c_remove(struct i2c_client *client)
 {
 	struct pn533_i2c_phy *phy = i2c_get_clientdata(client);
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-
 	free_irq(client->irq, phy);
 
 	pn53x_unregister_nfc(phy->priv);
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index 2c7f9916f206..cd64bfe20402 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -1075,8 +1075,6 @@ static int pn533_tm_get_data_complete(struct pn533 *dev, void *arg,
 	u8 status, ret, mi;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (IS_ERR(resp)) {
 		skb_queue_purge(&dev->resp_q);
 		return PTR_ERR(resp);
@@ -1124,8 +1122,6 @@ static void pn533_wq_tm_mi_recv(struct work_struct *work)
 	struct sk_buff *skb;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	skb = pn533_alloc_skb(dev, 0);
 	if (!skb)
 		return;
@@ -1148,8 +1144,6 @@ static void pn533_wq_tm_mi_send(struct work_struct *work)
 	struct sk_buff *skb;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	/* Grab the first skb in the queue */
 	skb = skb_dequeue(&dev->fragment_skb);
 	if (skb == NULL) {	/* No more data */
@@ -1186,8 +1180,6 @@ static void pn533_wq_tg_get_data(struct work_struct *work)
 	struct sk_buff *skb;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	skb = pn533_alloc_skb(dev, 0);
 	if (!skb)
 		return;
@@ -1206,8 +1198,6 @@ static int pn533_init_target_complete(struct pn533 *dev, struct sk_buff *resp)
 	size_t gb_len;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (resp->len < ATR_REQ_GB_OFFSET + 1)
 		return -EINVAL;
 
@@ -1260,8 +1250,6 @@ static int pn533_rf_complete(struct pn533 *dev, void *arg,
 {
 	int rc = 0;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (IS_ERR(resp)) {
 		rc = PTR_ERR(resp);
 
@@ -1283,8 +1271,6 @@ static void pn533_wq_rf(struct work_struct *work)
 	struct sk_buff *skb;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	skb = pn533_alloc_skb(dev, 2);
 	if (!skb)
 		return;
@@ -1360,8 +1346,6 @@ static int pn533_poll_dep(struct nfc_dev *nfc_dev)
 	u8 *next, nfcid3[NFC_NFCID3_MAXSIZE];
 	u8 passive_data[PASSIVE_DATA_LEN] = {0x00, 0xff, 0xff, 0x00, 0x3};
 
-	dev_dbg(dev->dev, "%s", __func__);
-
 	if (!dev->gb) {
 		dev->gb = nfc_get_local_general_bytes(nfc_dev, &dev->gb_len);
 
@@ -1511,8 +1495,6 @@ static int pn533_poll_complete(struct pn533 *dev, void *arg,
 	struct pn533_poll_modulations *cur_mod;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (IS_ERR(resp)) {
 		rc = PTR_ERR(resp);
 
@@ -1783,8 +1765,6 @@ static int pn533_activate_target_nfcdep(struct pn533 *dev)
 	struct sk_buff *skb;
 	struct sk_buff *resp;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	skb = pn533_alloc_skb(dev, sizeof(u8) * 2); /*TG + Next*/
 	if (!skb)
 		return -ENOMEM;
@@ -1866,8 +1846,6 @@ static int pn533_deactivate_target_complete(struct pn533 *dev, void *arg,
 {
 	int rc = 0;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (IS_ERR(resp)) {
 		rc = PTR_ERR(resp);
 
@@ -1892,8 +1870,6 @@ static void pn533_deactivate_target(struct nfc_dev *nfc_dev,
 	struct sk_buff *skb;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (!dev->tgt_active_prot) {
 		nfc_err(dev->dev, "There is no active target\n");
 		return;
@@ -1988,8 +1964,6 @@ static int pn533_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	u8 *next, *arg, nfcid3[NFC_NFCID3_MAXSIZE];
 	u8 passive_data[PASSIVE_DATA_LEN] = {0x00, 0xff, 0xff, 0x00, 0x3};
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (dev->poll_mod_count) {
 		nfc_err(dev->dev,
 			"Cannot bring the DEP link up while polling\n");
@@ -2067,8 +2041,6 @@ static int pn533_dep_link_down(struct nfc_dev *nfc_dev)
 {
 	struct pn533 *dev = nfc_get_drvdata(nfc_dev);
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	pn533_poll_reset_mod_list(dev);
 
 	if (dev->tgt_mode || dev->tgt_active_prot)
@@ -2092,8 +2064,6 @@ static struct sk_buff *pn533_build_response(struct pn533 *dev)
 	struct sk_buff *skb, *tmp, *t;
 	unsigned int skb_len = 0, tmp_len = 0;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (skb_queue_empty(&dev->resp_q))
 		return NULL;
 
@@ -2133,8 +2103,6 @@ static int pn533_data_exchange_complete(struct pn533 *dev, void *_arg,
 	int rc = 0;
 	u8 status, ret, mi;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (IS_ERR(resp)) {
 		rc = PTR_ERR(resp);
 		goto _error;
@@ -2288,8 +2256,6 @@ static int pn533_transceive(struct nfc_dev *nfc_dev,
 	struct pn533_data_exchange_arg *arg = NULL;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (!dev->tgt_active_prot) {
 		nfc_err(dev->dev,
 			"Can't exchange data if there is no active target\n");
@@ -2356,8 +2322,6 @@ static int pn533_tm_send_complete(struct pn533 *dev, void *arg,
 {
 	u8 status;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	if (IS_ERR(resp))
 		return PTR_ERR(resp);
 
@@ -2388,8 +2352,6 @@ static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb)
 	struct pn533 *dev = nfc_get_drvdata(nfc_dev);
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	/* let's split in multiple chunks if size's too big */
 	if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) {
 		rc = pn533_fill_fragment_skbs(dev, skb);
@@ -2426,8 +2388,6 @@ static void pn533_wq_mi_recv(struct work_struct *work)
 	struct sk_buff *skb;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	skb = pn533_alloc_skb(dev, PN533_CMD_DATAEXCH_HEAD_LEN);
 	if (!skb)
 		goto error;
@@ -2476,8 +2436,6 @@ static void pn533_wq_mi_send(struct work_struct *work)
 	struct sk_buff *skb;
 	int rc;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	/* Grab the first skb in the queue */
 	skb = skb_dequeue(&dev->fragment_skb);
 
@@ -2533,8 +2491,6 @@ static int pn533_set_configuration(struct pn533 *dev, u8 cfgitem, u8 *cfgdata,
 	struct sk_buff *resp;
 	int skb_len;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	skb_len = sizeof(cfgitem) + cfgdata_len; /* cfgitem + cfgdata */
 
 	skb = pn533_alloc_skb(dev, skb_len);
@@ -2580,8 +2536,6 @@ static int pn533_pasori_fw_reset(struct pn533 *dev)
 	struct sk_buff *skb;
 	struct sk_buff *resp;
 
-	dev_dbg(dev->dev, "%s\n", __func__);
-
 	skb = pn533_alloc_skb(dev, sizeof(u8));
 	if (!skb)
 		return -ENOMEM;
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index 84d2bfabf42b..bd7f7478d189 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -354,8 +354,6 @@ static void pn533_acr122_poweron_rdr_resp(struct urb *urb)
 {
 	struct pn533_acr122_poweron_rdr_arg *arg = urb->context;
 
-	dev_dbg(&urb->dev->dev, "%s\n", __func__);
-
 	print_hex_dump_debug("ACR122 RX: ", DUMP_PREFIX_NONE, 16, 1,
 		       urb->transfer_buffer, urb->transfer_buffer_length,
 		       false);
@@ -375,8 +373,6 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy)
 	void *cntx;
 	struct pn533_acr122_poweron_rdr_arg arg;
 
-	dev_dbg(&phy->udev->dev, "%s\n", __func__);
-
 	buffer = kmemdup(cmd, sizeof(cmd), GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;

From feab6ba21da73a86ed406fe2c7d075147f73eb44 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:38:58 +0200
Subject: [PATCH 0711/3292] nfc: pn533: drop unneeded braces {} in if

{} braces are not needed over single if-statement.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073902.7111-3-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn533/i2c.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c
index bb04fddb0504..e6bf8cfe3aa7 100644
--- a/drivers/nfc/pn533/i2c.c
+++ b/drivers/nfc/pn533/i2c.c
@@ -192,9 +192,8 @@ static int pn533_i2c_probe(struct i2c_client *client,
 				phy, &i2c_phy_ops, NULL,
 				&phy->i2c_dev->dev);
 
-	if (IS_ERR(priv)) {
+	if (IS_ERR(priv))
 		return PTR_ERR(priv);
-	}
 
 	phy->priv = priv;
 	r = pn532_i2c_nfc_alloc(priv, PN533_NO_TYPE_B_PROTOCOLS, &client->dev);

From aa93b4bcdffab33b23edb71d6e237b0d85b7688a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:38:59 +0200
Subject: [PATCH 0712/3292] nfc: pn544: drop ftrace-like debugging messages

Now that the kernel has ftrace, any debugging calls that just do "made
it to this function!" and "leaving this function!" can be removed.
Better to use standard debugging tools.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073902.7111-4-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn544/i2c.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c
index aac778c5ddd2..de59e439c369 100644
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -241,8 +241,6 @@ static int pn544_hci_i2c_enable(void *phy_id)
 {
 	struct pn544_i2c_phy *phy = phy_id;
 
-	pr_info("%s\n", __func__);
-
 	pn544_hci_i2c_enable_mode(phy, PN544_HCI_MODE);
 
 	phy->powered = 1;
@@ -875,9 +873,6 @@ static int pn544_hci_i2c_probe(struct i2c_client *client,
 	struct pn544_i2c_phy *phy;
 	int r = 0;
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
-
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
 		return -ENODEV;
@@ -937,8 +932,6 @@ static int pn544_hci_i2c_remove(struct i2c_client *client)
 {
 	struct pn544_i2c_phy *phy = i2c_get_clientdata(client);
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-
 	cancel_work_sync(&phy->fw_work);
 	if (phy->fw_work_state != FW_WORK_STATE_IDLE)
 		pn544_hci_i2c_fw_work_complete(phy, -ENODEV);

From e83a26473a7bc79498c486a542a7b933b2994466 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:39:00 +0200
Subject: [PATCH 0713/3292] nfc: st21nfca: drop ftrace-like debugging messages

Now that the kernel has ftrace, any debugging calls that just do "made
it to this function!" and "leaving this function!" can be removed.
Better to use standard debugging tools.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073902.7111-5-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st21nfca/i2c.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index cebc6c06a1b6..7a9f4d71707e 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -502,9 +502,6 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
 	struct st21nfca_i2c_phy *phy;
 	int r;
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
-
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
 		return -ENODEV;
@@ -568,8 +565,6 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client)
 {
 	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-
 	st21nfca_hci_remove(phy->hdev);
 
 	if (phy->powered)

From 1952fa424dcbfa75a58bbd4f7a0551a3ad5bb172 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:39:01 +0200
Subject: [PATCH 0714/3292] nfc: st-nci: drop ftrace-like debugging messages

Now that the kernel has ftrace, any debugging calls that just do "made
it to this function!" and "leaving this function!" can be removed.
Better to use standard debugging tools.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073902.7111-6-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st-nci/i2c.c | 5 -----
 drivers/nfc/st-nci/se.c  | 6 ------
 drivers/nfc/st-nci/spi.c | 5 -----
 3 files changed, 16 deletions(-)

diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c
index 663d1cc19b81..46981405e8b1 100644
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -206,9 +206,6 @@ static int st_nci_i2c_probe(struct i2c_client *client,
 	struct st_nci_i2c_phy *phy;
 	int r;
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
-
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
 		return -ENODEV;
@@ -261,8 +258,6 @@ static int st_nci_i2c_remove(struct i2c_client *client)
 {
 	struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
 
-	dev_dbg(&client->dev, "%s\n", __func__);
-
 	ndlc_remove(phy->ndlc);
 
 	return 0;
diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c
index 8657e025166f..5fd89f72969d 100644
--- a/drivers/nfc/st-nci/se.c
+++ b/drivers/nfc/st-nci/se.c
@@ -470,8 +470,6 @@ int st_nci_disable_se(struct nci_dev *ndev, u32 se_idx)
 {
 	int r;
 
-	pr_debug("st_nci_disable_se\n");
-
 	/*
 	 * According to upper layer, se_idx == NFC_SE_UICC when
 	 * info->se_info.se_status->is_uicc_enable is true should never happen
@@ -496,8 +494,6 @@ int st_nci_enable_se(struct nci_dev *ndev, u32 se_idx)
 {
 	int r;
 
-	pr_debug("st_nci_enable_se\n");
-
 	/*
 	 * According to upper layer, se_idx == NFC_SE_UICC when
 	 * info->se_info.se_status->is_uicc_enable is true should never happen.
@@ -602,8 +598,6 @@ int st_nci_discover_se(struct nci_dev *ndev)
 	int se_count = 0;
 	struct st_nci_info *info = nci_get_drvdata(ndev);
 
-	pr_debug("st_nci_discover_se\n");
-
 	r = st_nci_hci_network_init(ndev);
 	if (r != 0)
 		return r;
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 5f1a2173b2e7..250d56f204c3 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -216,9 +216,6 @@ static int st_nci_spi_probe(struct spi_device *dev)
 	struct st_nci_spi_phy *phy;
 	int r;
 
-	dev_dbg(&dev->dev, "%s\n", __func__);
-	dev_dbg(&dev->dev, "IRQ: %d\n", dev->irq);
-
 	/* Check SPI platform functionnalities */
 	if (!dev) {
 		pr_debug("%s: dev is NULL. Device is not accessible.\n",
@@ -274,8 +271,6 @@ static int st_nci_spi_remove(struct spi_device *dev)
 {
 	struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
 
-	dev_dbg(&dev->dev, "%s\n", __func__);
-
 	ndlc_remove(phy->ndlc);
 
 	return 0;

From e099f3e8b71c212779089429a105b16d51ff6d58 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:39:02 +0200
Subject: [PATCH 0715/3292] nfc: st95hf: fix indentation to tabs

Use tabs to indent instead of spaces. No functional change.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531073902.7111-7-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st95hf/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 0d99181b6ce3..2dc788c363fd 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1057,8 +1057,8 @@ static const struct spi_device_id st95hf_id[] = {
 MODULE_DEVICE_TABLE(spi, st95hf_id);
 
 static const struct of_device_id st95hf_spi_of_match[] __maybe_unused = {
-        { .compatible = "st,st95hf" },
-        { },
+	{ .compatible = "st,st95hf" },
+	{},
 };
 MODULE_DEVICE_TABLE(of, st95hf_spi_of_match);
 

From ad993a95c508417acdeb15244109e009e50d8758 Mon Sep 17 00:00:00 2001
From: Xie Yongji <xieyongji@bytedance.com>
Date: Mon, 31 May 2021 21:58:52 +0800
Subject: [PATCH 0716/3292] virtio-net: Add validation for used length

This adds validation for used length (might come
from an untrusted device) to avoid data corruption
or loss.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20210531135852.113-1-xieyongji@bytedance.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 073fec4c0df1..ed969b65126e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -730,6 +730,12 @@ static struct sk_buff *receive_small(struct net_device *dev,
 	len -= vi->hdr_len;
 	stats->bytes += len;
 
+	if (unlikely(len > GOOD_PACKET_LEN)) {
+		pr_debug("%s: rx error: len %u exceeds max size %d\n",
+			 dev->name, len, GOOD_PACKET_LEN);
+		dev->stats.rx_length_errors++;
+		goto err_len;
+	}
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
@@ -833,6 +839,7 @@ err:
 err_xdp:
 	rcu_read_unlock();
 	stats->xdp_drops++;
+err_len:
 	stats->drops++;
 	put_page(page);
 xdp_xmit:
@@ -886,6 +893,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	head_skb = NULL;
 	stats->bytes += len - vi->hdr_len;
 
+	if (unlikely(len > truesize)) {
+		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
+			 dev->name, len, (unsigned long)ctx);
+		dev->stats.rx_length_errors++;
+		goto err_skb;
+	}
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
@@ -1012,13 +1025,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 	rcu_read_unlock();
 
-	if (unlikely(len > truesize)) {
-		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
-			 dev->name, len, (unsigned long)ctx);
-		dev->stats.rx_length_errors++;
-		goto err_skb;
-	}
-
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
 			       metasize, !!headroom);
 	curr_skb = head_skb;

From 62f20e068ccc50d6ab66fdb72ba90da2b9418c99 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Sat, 29 May 2021 13:07:46 +0200
Subject: [PATCH 0717/3292] ipv6: use prandom_u32() for ID generation

This is a complement to commit aa6dd211e4b1 ("inet: use bigger hash
table for IP ID generation"), but focusing on some specific aspects
of IPv6.

Contary to IPv4, IPv6 only uses packet IDs with fragments, and with a
minimum MTU of 1280, it's much less easy to force a remote peer to
produce many fragments to explore its ID sequence. In addition packet
IDs are 32-bit in IPv6, which further complicates their analysis. On
the other hand, it is often easier to choose among plenty of possible
source addresses and partially work around the bigger hash table the
commit above permits, which leaves IPv6 partially exposed to some
possibilities of remote analysis at the risk of weakening some
protocols like DNS if some IDs can be predicted with a good enough
probability.

Given the wide range of permitted IDs, the risk of collision is extremely
low so there's no need to rely on the positive increment algorithm that
is shared with the IPv4 code via ip_idents_reserve(). We have a fast
PRNG, so let's simply call prandom_u32() and be done with it.

Performance measurements at 10 Gbps couldn't show any difference with
the previous code, even when using a single core, because due to the
large fragments, we're limited to only ~930 kpps at 10 Gbps and the cost
of the random generation is completely offset by other operations and by
the network transfer time. In addition, this change removes the need to
update a shared entry in the idents table so it may even end up being
slightly faster on large scale systems where this matters.

The risk of at least one collision here is about 1/80 million among
10 IDs, 1/850k among 100 IDs, and still only 1/8.5k among 1000 IDs,
which remains very low compared to IPv4 where all IDs are reused
every 4 to 80ms on a 10 Gbps flow depending on packet sizes.

Reported-by: Amit Klein <aksecurity@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20210529110746.6796-1-w@1wt.eu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/output_core.c | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index af36acc1a644..2880dc7d9a49 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -15,29 +15,11 @@ static u32 __ipv6_select_ident(struct net *net,
 			       const struct in6_addr *dst,
 			       const struct in6_addr *src)
 {
-	const struct {
-		struct in6_addr dst;
-		struct in6_addr src;
-	} __aligned(SIPHASH_ALIGNMENT) combined = {
-		.dst = *dst,
-		.src = *src,
-	};
-	u32 hash, id;
+	u32 id;
 
-	/* Note the following code is not safe, but this is okay. */
-	if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
-		get_random_bytes(&net->ipv4.ip_id_key,
-				 sizeof(net->ipv4.ip_id_key));
-
-	hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
-
-	/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
-	 * set the hight order instead thus minimizing possible future
-	 * collisions.
-	 */
-	id = ip_idents_reserve(hash, 1);
-	if (unlikely(!id))
-		id = 1 << 31;
+	do {
+		id = prandom_u32();
+	} while (!id);
 
 	return id;
 }

From 801f0a1cf96f314ed88b51a2ceb243952ab37e7c Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:25 +0800
Subject: [PATCH 0718/3292] net: sealevel: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 7dddc9dcbe23..62cc59892d1e 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -29,14 +29,12 @@
 #include <asm/byteorder.h>
 #include "z85230.h"
 
-
 struct slvl_device
 {
 	struct z8530_channel *chan;
 	int channel;
 };
 
-
 struct slvl_board
 {
 	struct slvl_device dev[2];
@@ -195,7 +193,6 @@ static int slvl_setup(struct slvl_device *sv, int iobase, int irq)
 	return 0;
 }
 
-
 /*
  *	Allocate and setup Sealevel board.
  */
@@ -256,7 +253,6 @@ static __init struct slvl_board *slvl_init(int iobase, int irq,
 
 	outb(3 | (1 << 7), b->iobase + 4);
 
-
 	/* We want a fast IRQ for this device. Actually we'd like an even faster
 	   IRQ ;) - This is one driver RtLinux is made for */
 
@@ -351,7 +347,6 @@ static void __exit slvl_shutdown(struct slvl_board *b)
 	kfree(b);
 }
 
-
 static int io=0x238;
 static int txdma=1;
 static int rxdma=3;

From 58f30eea85a336d4931fa76099f24fa51797666d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:26 +0800
Subject: [PATCH 0719/3292] net: sealevel: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 62cc59892d1e..829e9bc01a49 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -174,6 +174,7 @@ static const struct net_device_ops sealevel_ops = {
 static int slvl_setup(struct slvl_device *sv, int iobase, int irq)
 {
 	struct net_device *dev = alloc_hdlcdev(sv);
+
 	if (!dev)
 		return -1;
 
@@ -334,6 +335,7 @@ static void __exit slvl_shutdown(struct slvl_board *b)
 
 	for (u = 0; u < 2; u++) {
 		struct net_device *d = b->dev[u].chan->netdevice;
+
 		unregister_hdlc_device(d);
 		free_netdev(d);
 	}

From 8be88e3ccee49716a45972c7451b8033737fc9f0 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:27 +0800
Subject: [PATCH 0720/3292] net: sealevel: fix the code style issue about "foo*
 bar"

Fix the checkpatch error as "(foo*)" should be "(foo *)".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 829e9bc01a49..465c9ace1dc7 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -46,7 +46,7 @@ struct slvl_board
  *	Network driver support routines
  */
 
-static inline struct slvl_device* dev_to_chan(struct net_device *dev)
+static inline struct slvl_device *dev_to_chan(struct net_device *dev)
 {
 	return (struct slvl_device *)dev_to_hdlc(dev)->priv;
 }

From f090d1c38107dd964e933f7be9885baee15325f6 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:28 +0800
Subject: [PATCH 0721/3292] net: sealevel: open brace '{' following struct go
 on the same line

Fix the checkpatch error as open brace '{' following struct should
go on the same line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 465c9ace1dc7..b484d1f7b176 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -29,14 +29,12 @@
 #include <asm/byteorder.h>
 #include "z85230.h"
 
-struct slvl_device
-{
+struct slvl_device {
 	struct z8530_channel *chan;
 	int channel;
 };
 
-struct slvl_board
-{
+struct slvl_board {
 	struct slvl_device dev[2];
 	struct z8530_dev board;
 	int iobase;

From 52499d202dc6ecb995fb56e9933e503f6d484495 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:29 +0800
Subject: [PATCH 0722/3292] net: sealevel: add some required spaces

Add spaces required around that '='.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index b484d1f7b176..be618fd0b9a8 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -347,10 +347,10 @@ static void __exit slvl_shutdown(struct slvl_board *b)
 	kfree(b);
 }
 
-static int io=0x238;
-static int txdma=1;
-static int rxdma=3;
-static int irq=5;
+static int io = 0x238;
+static int txdma = 1;
+static int rxdma = 3;
+static int irq = 5;
 static bool slow=false;
 
 module_param_hw(io, int, ioport, 0);

From 40e8ee9d467dda815b10e9c2eb77ab7cf715ca1a Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:30 +0800
Subject: [PATCH 0723/3292] net: sealevel: remove redundant initialization for
 statics

Should not initialise statics to false.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index be618fd0b9a8..d19e9024865f 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -351,7 +351,7 @@ static int io = 0x238;
 static int txdma = 1;
 static int rxdma = 3;
 static int irq = 5;
-static bool slow=false;
+static bool slow;
 
 module_param_hw(io, int, ioport, 0);
 MODULE_PARM_DESC(io, "The I/O base of the Sealevel card");

From cc51e3f36e6268697cf760dc770dc51154e13a51 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:31 +0800
Subject: [PATCH 0724/3292] net: sealevel: fix a code style issue about switch
 and case

According to the chackpatch.pl, switch and case should be
at the same indent.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index d19e9024865f..e07309e0d971 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -79,12 +79,12 @@ static int sealevel_open(struct net_device *d)
 	 */
 
 	switch (unit) {
-		case 0:
-			err = z8530_sync_dma_open(d, slvl->chan);
-			break;
-		case 1:
-			err = z8530_sync_open(d, slvl->chan);
-			break;
+	case 0:
+		err = z8530_sync_dma_open(d, slvl->chan);
+		break;
+	case 1:
+		err = z8530_sync_open(d, slvl->chan);
+		break;
 	}
 
 	if (err)
@@ -93,12 +93,12 @@ static int sealevel_open(struct net_device *d)
 	err = hdlc_open(d);
 	if (err) {
 		switch (unit) {
-			case 0:
-				z8530_sync_dma_close(d, slvl->chan);
-				break;
-			case 1:
-				z8530_sync_close(d, slvl->chan);
-				break;
+		case 0:
+			z8530_sync_dma_close(d, slvl->chan);
+			break;
+		case 1:
+			z8530_sync_close(d, slvl->chan);
+			break;
 		}
 		return err;
 	}
@@ -127,12 +127,12 @@ static int sealevel_close(struct net_device *d)
 	netif_stop_queue(d);
 
 	switch (unit) {
-		case 0:
-			z8530_sync_dma_close(d, slvl->chan);
-			break;
-		case 1:
-			z8530_sync_close(d, slvl->chan);
-			break;
+	case 0:
+		z8530_sync_dma_close(d, slvl->chan);
+		break;
+	case 1:
+		z8530_sync_close(d, slvl->chan);
+		break;
 	}
 	return 0;
 }

From 04d7ad8cca9c33df22b3f5ef964ca9cb86d8ee8e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:32 +0800
Subject: [PATCH 0725/3292] net: sealevel: remove meaningless comments

Remove the meaningless stylistically wrong comment.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index e07309e0d971..6665732f96ce 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -105,9 +105,6 @@ static int sealevel_open(struct net_device *d)
 
 	slvl->chan->rx_function = sealevel_input;
 
-	/*
-	 *	Go go go
-	 */
 	netif_start_queue(d);
 	return 0;
 }

From b086ebfce34f78de02c1966d21931f681955778d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:33 +0800
Subject: [PATCH 0726/3292] net: sealevel: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 60 ++++++++++++--------------------------
 1 file changed, 18 insertions(+), 42 deletions(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 6665732f96ce..60028cfaaab5 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *	Sealevel Systems 4021 driver.
+/*	Sealevel Systems 4021 driver.
  *
  *	(c) Copyright 1999, 2001 Alan Cox
  *	(c) Copyright 2001 Red Hat Inc.
@@ -40,17 +39,14 @@ struct slvl_board {
 	int iobase;
 };
 
-/*
- *	Network driver support routines
- */
+ /*	Network driver support routines */
 
 static inline struct slvl_device *dev_to_chan(struct net_device *dev)
 {
 	return (struct slvl_device *)dev_to_hdlc(dev)->priv;
 }
 
-/*
- *	Frame receive. Simple for our card as we do HDLC and there
+/*	Frame receive. Simple for our card as we do HDLC and there
  *	is no funny garbage involved
  */
 
@@ -64,9 +60,7 @@ static void sealevel_input(struct z8530_channel *c, struct sk_buff *skb)
 	netif_rx(skb);
 }
 
-/*
- *	We've been placed in the UP state
- */
+ /*	We've been placed in the UP state */
 
 static int sealevel_open(struct net_device *d)
 {
@@ -74,9 +68,7 @@ static int sealevel_open(struct net_device *d)
 	int err = -1;
 	int unit = slvl->channel;
 
-	/*
-	 *	Link layer up.
-	 */
+	 /*	Link layer up. */
 
 	switch (unit) {
 	case 0:
@@ -114,9 +106,7 @@ static int sealevel_close(struct net_device *d)
 	struct slvl_device *slvl = dev_to_chan(d);
 	int unit = slvl->channel;
 
-	/*
-	 *	Discard new frames
-	 */
+	/*	Discard new frames */
 
 	slvl->chan->rx_function = z8530_null_rx;
 
@@ -137,13 +127,12 @@ static int sealevel_close(struct net_device *d)
 static int sealevel_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
 {
 	/* struct slvl_device *slvl=dev_to_chan(d);
-	   z8530_ioctl(d,&slvl->sync.chanA,ifr,cmd) */
+	 * z8530_ioctl(d,&slvl->sync.chanA,ifr,cmd)
+	 */
 	return hdlc_ioctl(d, ifr, cmd);
 }
 
-/*
- *	Passed network frames, fire them downwind.
- */
+/*	Passed network frames, fire them downwind. */
 
 static netdev_tx_t sealevel_queue_xmit(struct sk_buff *skb,
 					     struct net_device *d)
@@ -189,9 +178,7 @@ static int slvl_setup(struct slvl_device *sv, int iobase, int irq)
 	return 0;
 }
 
-/*
- *	Allocate and setup Sealevel board.
- */
+/*	Allocate and setup Sealevel board. */
 
 static __init struct slvl_board *slvl_init(int iobase, int irq,
 					   int txdma, int rxdma, int slow)
@@ -199,9 +186,7 @@ static __init struct slvl_board *slvl_init(int iobase, int irq,
 	struct z8530_dev *dev;
 	struct slvl_board *b;
 
-	/*
-	 *	Get the needed I/O space
-	 */
+	/*	Get the needed I/O space */
 
 	if (!request_region(iobase, 8, "Sealevel 4021")) {
 		pr_warn("I/O 0x%X already in use\n", iobase);
@@ -220,17 +205,13 @@ static __init struct slvl_board *slvl_init(int iobase, int irq,
 
 	dev = &b->board;
 
-	/*
-	 *	Stuff in the I/O addressing
-	 */
+	/*	Stuff in the I/O addressing */
 
 	dev->active = 0;
 
 	b->iobase = iobase;
 
-	/*
-	 *	Select 8530 delays for the old board
-	 */
+	/*	Select 8530 delays for the old board */
 
 	if (slow)
 		iobase |= Z8530_PORT_SLEEP;
@@ -243,14 +224,13 @@ static __init struct slvl_board *slvl_init(int iobase, int irq,
 	dev->chanA.irqs = &z8530_nop;
 	dev->chanB.irqs = &z8530_nop;
 
-	/*
-	 *	Assert DTR enable DMA
-	 */
+	/*	Assert DTR enable DMA */
 
 	outb(3 | (1 << 7), b->iobase + 4);
 
 	/* We want a fast IRQ for this device. Actually we'd like an even faster
-	   IRQ ;) - This is one driver RtLinux is made for */
+	 * IRQ ;) - This is one driver RtLinux is made for
+	 */
 
 	if (request_irq(irq, z8530_interrupt, 0,
 			"SeaLevel", dev) < 0) {
@@ -274,9 +254,7 @@ static __init struct slvl_board *slvl_init(int iobase, int irq,
 
 	disable_irq(irq);
 
-	/*
-	 *	Begin normal initialise
-	 */
+	/*	Begin normal initialise */
 
 	if (z8530_init(dev) != 0) {
 		pr_err("Z8530 series device not found\n");
@@ -291,9 +269,7 @@ static __init struct slvl_board *slvl_init(int iobase, int irq,
 		z8530_channel_load(&dev->chanB, z8530_hdlc_kilostream_85230);
 	}
 
-	/*
-	 *	Now we can take the IRQ
-	 */
+	/*	Now we can take the IRQ */
 
 	enable_irq(irq);
 

From e24b608519363712a00327b0dbb4f12861cb6b55 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sun, 30 May 2021 14:24:34 +0800
Subject: [PATCH 0727/3292] net: sealevel: fix the alignment issue

Alignment should match open parenthesis.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wan/sealevel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 60028cfaaab5..4403e219ca03 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -135,7 +135,7 @@ static int sealevel_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
 /*	Passed network frames, fire them downwind. */
 
 static netdev_tx_t sealevel_queue_xmit(struct sk_buff *skb,
-					     struct net_device *d)
+				       struct net_device *d)
 {
 	return z8530_queue_xmit(dev_to_chan(d)->chan, skb);
 }

From 12e64b3bb9a8cd376a1a92434302e5aa7f1ab1e5 Mon Sep 17 00:00:00 2001
From: Rocco Yue <rocco.yue@mediatek.com>
Date: Sun, 30 May 2021 19:38:11 +0800
Subject: [PATCH 0728/3292] ipv6: align code with context

The Tab key is used three times, causing the code block to
be out of alignment with the context.

Signed-off-by: Rocco Yue <rocco.yue@mediatek.com>
Link: https://lore.kernel.org/r/20210530113811.8817-1-rocco.yue@mediatek.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/addrconf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b0ef65eb9bd2..048570900fdf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6903,10 +6903,10 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.proc_handler   = proc_dointvec,
 	},
 	{
-		.procname		= "addr_gen_mode",
-		.data			= &ipv6_devconf.addr_gen_mode,
-		.maxlen			= sizeof(int),
-		.mode			= 0644,
+		.procname	= "addr_gen_mode",
+		.data		= &ipv6_devconf.addr_gen_mode,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
 		.proc_handler	= addrconf_sysctl_addr_gen_mode,
 	},
 	{

From 44fdd2edb36f0da66758cd355840d357078110fe Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 31 May 2021 16:29:16 +0800
Subject: [PATCH 0729/3292] bnx2x: Remove the repeated declaration

Function 'bnx2x_vfpf_release' is declared twice, so remove the
repeated declaration.

Cc: Ariel Elior <aelior@marvell.com>
Cc: GR-everest-linux-l2@marvell.com
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Acked-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Link: https://lore.kernel.org/r/1622449756-2627-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 3a716c015415..966d5722c5e2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -504,7 +504,6 @@ enum sample_bulletin_result bnx2x_sample_bulletin(struct bnx2x *bp);
 /* VF side vfpf channel functions */
 int bnx2x_vfpf_acquire(struct bnx2x *bp, u8 tx_count, u8 rx_count);
 int bnx2x_vfpf_release(struct bnx2x *bp);
-int bnx2x_vfpf_release(struct bnx2x *bp);
 int bnx2x_vfpf_init(struct bnx2x *bp);
 void bnx2x_vfpf_close_vf(struct bnx2x *bp);
 int bnx2x_vfpf_setup_q(struct bnx2x *bp, struct bnx2x_fastpath *fp,

From 15074a361fee151ee4207d20020a2de178cf490f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:32 +0300
Subject: [PATCH 0730/3292] net: dsa: sja1105: be compatible with
 "ethernet-ports" OF node name

Since commit f2f3e09396be ("net: dsa: sja1105: be compatible with
"ethernet-ports" OF node name"), DSA supports the "ethernet-ports" name
for the container node of the ports, but the sja1105 driver doesn't,
because it handles some device tree parsing of its own.

Add the second node name as a fallback.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 2080f36ff25b..4c776bd7ce25 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -885,6 +885,8 @@ static int sja1105_parse_dt(struct sja1105_private *priv,
 	int rc;
 
 	ports_node = of_get_child_by_name(switch_node, "ports");
+	if (!ports_node)
+		ports_node = of_get_child_by_name(switch_node, "ethernet-ports");
 	if (!ports_node) {
 		dev_err(dev, "Incorrect bindings: absent \"ports\" node\n");
 		return -ENODEV;

From 84db00f2c04338da329e2cc9a055d5a0b82fa159 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:33 +0300
Subject: [PATCH 0731/3292] net: dsa: sja1105: allow SGMII PCS configuration to
 be per port

The SJA1105 R and S switches have 1 SGMII port (port 4). Because there
is only one such port, there is no "port" parameter in the configuration
code for the SGMII PCS.

However, the SJA1110 can have up to 4 SGMII ports, each with its own
SGMII register map. So we need to generalize the logic.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 75 +++++++++++++++-----------
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 4c776bd7ce25..1e4e05d429af 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -898,36 +898,43 @@ static int sja1105_parse_dt(struct sja1105_private *priv,
 	return rc;
 }
 
-static int sja1105_sgmii_read(struct sja1105_private *priv, int pcs_reg)
+static int sja1105_sgmii_read(struct sja1105_private *priv, int port,
+			      int pcs_reg)
 {
 	const struct sja1105_regs *regs = priv->info->regs;
 	u32 val;
 	int rc;
 
-	rc = sja1105_xfer_u32(priv, SPI_READ, regs->sgmii + pcs_reg, &val,
-			      NULL);
+	if (port != SJA1105_SGMII_PORT)
+		return -ENODEV;
+
+	rc = sja1105_xfer_u32(priv, SPI_READ, regs->sgmii + pcs_reg,
+			      &val, NULL);
 	if (rc < 0)
 		return rc;
 
 	return val;
 }
 
-static int sja1105_sgmii_write(struct sja1105_private *priv, int pcs_reg,
-			       u16 pcs_val)
+static int sja1105_sgmii_write(struct sja1105_private *priv, int port,
+			       int pcs_reg, u16 pcs_val)
 {
 	const struct sja1105_regs *regs = priv->info->regs;
 	u32 val = pcs_val;
 	int rc;
 
-	rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->sgmii + pcs_reg, &val,
-			      NULL);
+	if (port != SJA1105_SGMII_PORT)
+		return -ENODEV;
+
+	rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->sgmii + pcs_reg,
+			      &val, NULL);
 	if (rc < 0)
 		return rc;
 
 	return val;
 }
 
-static void sja1105_sgmii_pcs_config(struct sja1105_private *priv,
+static void sja1105_sgmii_pcs_config(struct sja1105_private *priv, int port,
 				     bool an_enabled, bool an_master)
 {
 	u16 ac = SJA1105_AC_AUTONEG_MODE_SGMII;
@@ -936,27 +943,29 @@ static void sja1105_sgmii_pcs_config(struct sja1105_private *priv,
 	 * stop the clock during LPI mode, make the MAC reconfigure
 	 * autonomously after PCS autoneg is done, flush the internal FIFOs.
 	 */
-	sja1105_sgmii_write(priv, SJA1105_DC1, SJA1105_DC1_EN_VSMMD1 |
-					       SJA1105_DC1_CLOCK_STOP_EN |
-					       SJA1105_DC1_MAC_AUTO_SW |
-					       SJA1105_DC1_INIT);
+	sja1105_sgmii_write(priv, port, SJA1105_DC1,
+			    SJA1105_DC1_EN_VSMMD1 |
+			    SJA1105_DC1_CLOCK_STOP_EN |
+			    SJA1105_DC1_MAC_AUTO_SW |
+			    SJA1105_DC1_INIT);
 	/* DIGITAL_CONTROL_2: No polarity inversion for TX and RX lanes */
-	sja1105_sgmii_write(priv, SJA1105_DC2, SJA1105_DC2_TX_POL_INV_DISABLE);
+	sja1105_sgmii_write(priv, port, SJA1105_DC2,
+			    SJA1105_DC2_TX_POL_INV_DISABLE);
 	/* AUTONEG_CONTROL: Use SGMII autoneg */
 	if (an_master)
 		ac |= SJA1105_AC_PHY_MODE | SJA1105_AC_SGMII_LINK;
-	sja1105_sgmii_write(priv, SJA1105_AC, ac);
+	sja1105_sgmii_write(priv, port, SJA1105_AC, ac);
 	/* BASIC_CONTROL: enable in-band AN now, if requested. Otherwise,
 	 * sja1105_sgmii_pcs_force_speed must be called later for the link
 	 * to become operational.
 	 */
 	if (an_enabled)
-		sja1105_sgmii_write(priv, MII_BMCR,
+		sja1105_sgmii_write(priv, port, MII_BMCR,
 				    BMCR_ANENABLE | BMCR_ANRESTART);
 }
 
 static void sja1105_sgmii_pcs_force_speed(struct sja1105_private *priv,
-					  int speed)
+					  int port, int speed)
 {
 	int pcs_speed;
 
@@ -974,7 +983,7 @@ static void sja1105_sgmii_pcs_force_speed(struct sja1105_private *priv,
 		dev_err(priv->ds->dev, "Invalid speed %d\n", speed);
 		return;
 	}
-	sja1105_sgmii_write(priv, MII_BMCR, pcs_speed | BMCR_FULLDPLX);
+	sja1105_sgmii_write(priv, port, MII_BMCR, pcs_speed | BMCR_FULLDPLX);
 }
 
 /* Convert link speed from SJA1105 to ethtool encoding */
@@ -1115,7 +1124,8 @@ static void sja1105_mac_config(struct dsa_switch *ds, int port,
 	}
 
 	if (is_sgmii)
-		sja1105_sgmii_pcs_config(priv, phylink_autoneg_inband(mode),
+		sja1105_sgmii_pcs_config(priv, port,
+					 phylink_autoneg_inband(mode),
 					 false);
 }
 
@@ -1138,7 +1148,7 @@ static void sja1105_mac_link_up(struct dsa_switch *ds, int port,
 	sja1105_adjust_port_config(priv, port, speed);
 
 	if (sja1105_supports_sgmii(priv, port) && !phylink_autoneg_inband(mode))
-		sja1105_sgmii_pcs_force_speed(priv, speed);
+		sja1105_sgmii_pcs_force_speed(priv, port, speed);
 
 	sja1105_inhibit_tx(priv, BIT(port), false);
 }
@@ -1191,7 +1201,7 @@ static int sja1105_mac_pcs_get_state(struct dsa_switch *ds, int port,
 	int ais;
 
 	/* Read the vendor-specific AUTONEG_INTR_STATUS register */
-	ais = sja1105_sgmii_read(priv, SJA1105_AIS);
+	ais = sja1105_sgmii_read(priv, port, SJA1105_AIS);
 	if (ais < 0)
 		return ais;
 
@@ -1873,11 +1883,11 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 	struct ptp_system_timestamp ptp_sts_before;
 	struct ptp_system_timestamp ptp_sts_after;
 	int speed_mbps[SJA1105_MAX_NUM_PORTS];
+	u16 bmcr[SJA1105_MAX_NUM_PORTS] = {0};
 	struct sja1105_mac_config_entry *mac;
 	struct dsa_switch *ds = priv->ds;
 	s64 t1, t2, t3, t4;
 	s64 t12, t34;
-	u16 bmcr = 0;
 	int rc, i;
 	s64 now;
 
@@ -1893,10 +1903,10 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 	for (i = 0; i < ds->num_ports; i++) {
 		speed_mbps[i] = sja1105_speed[mac[i].speed];
 		mac[i].speed = SJA1105_SPEED_AUTO;
-	}
 
-	if (sja1105_supports_sgmii(priv, SJA1105_SGMII_PORT))
-		bmcr = sja1105_sgmii_read(priv, MII_BMCR);
+		if (sja1105_supports_sgmii(priv, i))
+			bmcr[i] = sja1105_sgmii_read(priv, i, MII_BMCR);
+	}
 
 	/* No PTP operations can run right now */
 	mutex_lock(&priv->ptp_data.lock);
@@ -1943,27 +1953,30 @@ out_unlock_ptp:
 		goto out;
 
 	for (i = 0; i < ds->num_ports; i++) {
+		bool an_enabled;
+
 		rc = sja1105_adjust_port_config(priv, i, speed_mbps[i]);
 		if (rc < 0)
 			goto out;
-	}
 
-	if (sja1105_supports_sgmii(priv, SJA1105_SGMII_PORT)) {
-		bool an_enabled = !!(bmcr & BMCR_ANENABLE);
+		if (!sja1105_supports_sgmii(priv, i))
+			continue;
 
-		sja1105_sgmii_pcs_config(priv, an_enabled, false);
+		an_enabled = !!(bmcr[i] & BMCR_ANENABLE);
+
+		sja1105_sgmii_pcs_config(priv, i, an_enabled, false);
 
 		if (!an_enabled) {
 			int speed = SPEED_UNKNOWN;
 
-			if (bmcr & BMCR_SPEED1000)
+			if (bmcr[i] & BMCR_SPEED1000)
 				speed = SPEED_1000;
-			else if (bmcr & BMCR_SPEED100)
+			else if (bmcr[i] & BMCR_SPEED100)
 				speed = SPEED_100;
 			else
 				speed = SPEED_10;
 
-			sja1105_sgmii_pcs_force_speed(priv, speed);
+			sja1105_sgmii_pcs_force_speed(priv, i, speed);
 		}
 	}
 

From 4c7ee010cf75e5769037d4f152a8192dcf5eb49c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:34 +0300
Subject: [PATCH 0732/3292] net: dsa: sja1105: the 0x1F0000 SGMII "base
 address" is actually MDIO_MMD_VEND2

Looking at the SGMII PCS from SJA1110, which is accessed indirectly
through a different base address as can be seen in the next patch, it
appears odd that the address accessed through indirection still
references the base address from the SJA1105S register map (first MDIO
register is at 0x1f0000), when it could index the SGMII registers
starting from zero.

Except that the 0x1f0000 is not a base address at all, it seems. It is
0x1f << 16 | 0x0000, and 0x1f is coding for the vendor-specific MMD2.
So, it turns out, the Synopsys PCS implements all its registers inside
the vendor-specific MMDs 1 and 2 (0x1e and 0x1f). This explains why the
PCS has no overlaps (for the other MMDs) with other register regions of
the switch (because no other MMDs are implemented).

Change the code to remove the SGMII "base address" and explicitly encode
the MMD for reads/writes. This will become necessary for SJA1110 support.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105.h      |  1 -
 drivers/net/dsa/sja1105/sja1105_main.c | 31 +++++++++++++-------------
 drivers/net/dsa/sja1105/sja1105_spi.c  |  1 -
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 2ec03917feb3..830ea5ca359f 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -48,7 +48,6 @@ struct sja1105_regs {
 	u64 rgu;
 	u64 vl_status;
 	u64 config;
-	u64 sgmii;
 	u64 rmii_pll1;
 	u64 ptppinst;
 	u64 ptppindur;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 1e4e05d429af..c8d93d810421 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -898,36 +898,34 @@ static int sja1105_parse_dt(struct sja1105_private *priv,
 	return rc;
 }
 
-static int sja1105_sgmii_read(struct sja1105_private *priv, int port,
+static int sja1105_sgmii_read(struct sja1105_private *priv, int port, int mmd,
 			      int pcs_reg)
 {
-	const struct sja1105_regs *regs = priv->info->regs;
+	u64 addr = (mmd << 16) | pcs_reg;
 	u32 val;
 	int rc;
 
 	if (port != SJA1105_SGMII_PORT)
 		return -ENODEV;
 
-	rc = sja1105_xfer_u32(priv, SPI_READ, regs->sgmii + pcs_reg,
-			      &val, NULL);
+	rc = sja1105_xfer_u32(priv, SPI_READ, addr, &val, NULL);
 	if (rc < 0)
 		return rc;
 
 	return val;
 }
 
-static int sja1105_sgmii_write(struct sja1105_private *priv, int port,
+static int sja1105_sgmii_write(struct sja1105_private *priv, int port, int mmd,
 			       int pcs_reg, u16 pcs_val)
 {
-	const struct sja1105_regs *regs = priv->info->regs;
+	u64 addr = (mmd << 16) | pcs_reg;
 	u32 val = pcs_val;
 	int rc;
 
 	if (port != SJA1105_SGMII_PORT)
 		return -ENODEV;
 
-	rc = sja1105_xfer_u32(priv, SPI_WRITE, regs->sgmii + pcs_reg,
-			      &val, NULL);
+	rc = sja1105_xfer_u32(priv, SPI_WRITE, addr, &val, NULL);
 	if (rc < 0)
 		return rc;
 
@@ -943,24 +941,24 @@ static void sja1105_sgmii_pcs_config(struct sja1105_private *priv, int port,
 	 * stop the clock during LPI mode, make the MAC reconfigure
 	 * autonomously after PCS autoneg is done, flush the internal FIFOs.
 	 */
-	sja1105_sgmii_write(priv, port, SJA1105_DC1,
+	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, SJA1105_DC1,
 			    SJA1105_DC1_EN_VSMMD1 |
 			    SJA1105_DC1_CLOCK_STOP_EN |
 			    SJA1105_DC1_MAC_AUTO_SW |
 			    SJA1105_DC1_INIT);
 	/* DIGITAL_CONTROL_2: No polarity inversion for TX and RX lanes */
-	sja1105_sgmii_write(priv, port, SJA1105_DC2,
+	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, SJA1105_DC2,
 			    SJA1105_DC2_TX_POL_INV_DISABLE);
 	/* AUTONEG_CONTROL: Use SGMII autoneg */
 	if (an_master)
 		ac |= SJA1105_AC_PHY_MODE | SJA1105_AC_SGMII_LINK;
-	sja1105_sgmii_write(priv, port, SJA1105_AC, ac);
+	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, SJA1105_AC, ac);
 	/* BASIC_CONTROL: enable in-band AN now, if requested. Otherwise,
 	 * sja1105_sgmii_pcs_force_speed must be called later for the link
 	 * to become operational.
 	 */
 	if (an_enabled)
-		sja1105_sgmii_write(priv, port, MII_BMCR,
+		sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, MDIO_CTRL1,
 				    BMCR_ANENABLE | BMCR_ANRESTART);
 }
 
@@ -983,7 +981,8 @@ static void sja1105_sgmii_pcs_force_speed(struct sja1105_private *priv,
 		dev_err(priv->ds->dev, "Invalid speed %d\n", speed);
 		return;
 	}
-	sja1105_sgmii_write(priv, port, MII_BMCR, pcs_speed | BMCR_FULLDPLX);
+	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, MDIO_CTRL1,
+			    pcs_speed | BMCR_FULLDPLX);
 }
 
 /* Convert link speed from SJA1105 to ethtool encoding */
@@ -1201,7 +1200,7 @@ static int sja1105_mac_pcs_get_state(struct dsa_switch *ds, int port,
 	int ais;
 
 	/* Read the vendor-specific AUTONEG_INTR_STATUS register */
-	ais = sja1105_sgmii_read(priv, port, SJA1105_AIS);
+	ais = sja1105_sgmii_read(priv, port, MDIO_MMD_VEND2, SJA1105_AIS);
 	if (ais < 0)
 		return ais;
 
@@ -1905,7 +1904,9 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 		mac[i].speed = SJA1105_SPEED_AUTO;
 
 		if (sja1105_supports_sgmii(priv, i))
-			bmcr[i] = sja1105_sgmii_read(priv, i, MII_BMCR);
+			bmcr[i] = sja1105_sgmii_read(priv, i,
+						     MDIO_MMD_VEND2,
+						     MDIO_CTRL1);
 	}
 
 	/* No PTP operations can run right now */
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index d0bc6cf90bfd..615e0906b1fa 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -440,7 +440,6 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.pad_mii_tx = {0x100800, 0x100802, 0x100804, 0x100806, 0x100808},
 	.pad_mii_rx = {0x100801, 0x100803, 0x100805, 0x100807, 0x100809},
 	.pad_mii_id = {0x100810, 0x100811, 0x100812, 0x100813, 0x100814},
-	.sgmii = 0x1F0000,
 	.rmii_pll1 = 0x10000A,
 	.cgu_idiv = {0x10000B, 0x10000C, 0x10000D, 0x10000E, 0x10000F},
 	.stats[MAC] = {0x200, 0x202, 0x204, 0x206, 0x208},

From bf4edf4afb87e67bed8678c09a01110148830483 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:35 +0300
Subject: [PATCH 0733/3292] net: dsa: sja1105: cache the phy-mode port property

So far we've succeeded in operating without keeping a copy of the
phy-mode in the driver, since we already have the static config and we
can look at the xMII Mode Parameters Table which already holds that
information.

But with the SJA1110, we cannot make the distinction between sgmii and
2500base-x, because to the hardware's static config, it's all SGMII.
So add a phy_mode property per port inside struct sja1105_private.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105.h      |  1 +
 drivers/net/dsa/sja1105/sja1105_main.c | 24 +++---------------------
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 830ea5ca359f..d5c0217b1f65 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -210,6 +210,7 @@ struct sja1105_private {
 	struct sja1105_static_config static_config;
 	bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS];
 	bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS];
+	phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS];
 	bool best_effort_vlan_filtering;
 	unsigned long learn_ena;
 	unsigned long ucast_egress_floods;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index c8d93d810421..e4932243d0d3 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -871,6 +871,8 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv,
 			ports[index].role = XMII_MAC;
 		else if (of_property_read_bool(child, "sja1105,role-phy"))
 			ports[index].role = XMII_PHY;
+
+		priv->phy_mode[index] = phy_mode;
 	}
 
 	return 0;
@@ -1081,27 +1083,7 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 static bool sja1105_phy_mode_mismatch(struct sja1105_private *priv, int port,
 				      phy_interface_t interface)
 {
-	struct sja1105_xmii_params_entry *mii;
-	sja1105_phy_interface_t phy_mode;
-
-	mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries;
-	phy_mode = mii->xmii_mode[port];
-
-	switch (interface) {
-	case PHY_INTERFACE_MODE_MII:
-		return (phy_mode != XMII_MODE_MII);
-	case PHY_INTERFACE_MODE_RMII:
-		return (phy_mode != XMII_MODE_RMII);
-	case PHY_INTERFACE_MODE_RGMII:
-	case PHY_INTERFACE_MODE_RGMII_ID:
-	case PHY_INTERFACE_MODE_RGMII_RXID:
-	case PHY_INTERFACE_MODE_RGMII_TXID:
-		return (phy_mode != XMII_MODE_RGMII);
-	case PHY_INTERFACE_MODE_SGMII:
-		return (phy_mode != XMII_MODE_SGMII);
-	default:
-		return true;
-	}
+	return priv->phy_mode[port] != interface;
 }
 
 static void sja1105_mac_config(struct dsa_switch *ds, int port,

From 91a050782cbfc8f81c7c6aa0c5ce396bea51df3b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:36 +0300
Subject: [PATCH 0734/3292] net: dsa: sja1105: add a PHY interface type
 compatibility matrix

On the SJA1105, all ports support the parallel "xMII" protocols (MII,
RMII, RGMII) except for port 4 on SJA1105R/S which supports only SGMII.
This was relatively easy to model, by special-casing the SGMII port.

On the SJA1110, certain ports can be pinmuxed between SGMII and xMII, or
between SGMII and an internal 100base-TX PHY. This creates problems,
because the driver's assumption so far was that if a port supports
SGMII, it uses SGMII.

We allow the device tree to tell us how the port pinmuxing is done, and
check that against a PHY interface type compatibility matrix for
plausibility.

The other big change is that instead of doing SGMII configuration based
on what the port supports, we do it based on what is the configured
phy_mode of the port.

The 2500base-x support added in this patch is not complete.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105.h      |  5 +++
 drivers/net/dsa/sja1105/sja1105_main.c | 59 +++++++++++++-------------
 drivers/net/dsa/sja1105/sja1105_spi.c  | 20 +++++++++
 3 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index d5c0217b1f65..a27841642693 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -111,6 +111,11 @@ struct sja1105_info {
 				enum packing_op op);
 	int (*clocking_setup)(struct sja1105_private *priv);
 	const char *name;
+	bool supports_mii[SJA1105_MAX_NUM_PORTS];
+	bool supports_rmii[SJA1105_MAX_NUM_PORTS];
+	bool supports_rgmii[SJA1105_MAX_NUM_PORTS];
+	bool supports_sgmii[SJA1105_MAX_NUM_PORTS];
+	bool supports_2500basex[SJA1105_MAX_NUM_PORTS];
 };
 
 enum sja1105_key_type {
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index e4932243d0d3..3682b04d37cb 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -143,21 +143,6 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 	return 0;
 }
 
-static bool sja1105_supports_sgmii(struct sja1105_private *priv, int port)
-{
-	if (priv->info->part_no != SJA1105R_PART_NO &&
-	    priv->info->part_no != SJA1105S_PART_NO)
-		return false;
-
-	if (port != SJA1105_SGMII_PORT)
-		return false;
-
-	if (dsa_is_unused_port(priv->ds, port))
-		return false;
-
-	return true;
-}
-
 static int sja1105_init_mii_settings(struct sja1105_private *priv,
 				     struct sja1105_dt_port *ports)
 {
@@ -191,25 +176,42 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 
 		switch (ports[i].phy_mode) {
 		case PHY_INTERFACE_MODE_MII:
+			if (!priv->info->supports_mii[i])
+				goto unsupported;
+
 			mii->xmii_mode[i] = XMII_MODE_MII;
 			break;
 		case PHY_INTERFACE_MODE_RMII:
+			if (!priv->info->supports_rmii[i])
+				goto unsupported;
+
 			mii->xmii_mode[i] = XMII_MODE_RMII;
 			break;
 		case PHY_INTERFACE_MODE_RGMII:
 		case PHY_INTERFACE_MODE_RGMII_ID:
 		case PHY_INTERFACE_MODE_RGMII_RXID:
 		case PHY_INTERFACE_MODE_RGMII_TXID:
+			if (!priv->info->supports_rgmii[i])
+				goto unsupported;
+
 			mii->xmii_mode[i] = XMII_MODE_RGMII;
 			break;
 		case PHY_INTERFACE_MODE_SGMII:
-			if (!sja1105_supports_sgmii(priv, i))
-				return -EINVAL;
+			if (!priv->info->supports_sgmii[i])
+				goto unsupported;
+
 			mii->xmii_mode[i] = XMII_MODE_SGMII;
 			break;
+		case PHY_INTERFACE_MODE_2500BASEX:
+			if (!priv->info->supports_2500basex[i])
+				goto unsupported;
+
+			mii->xmii_mode[i] = XMII_MODE_SGMII;
+			break;
+unsupported:
 		default:
-			dev_err(dev, "Unsupported PHY mode %s!\n",
-				phy_modes(ports[i].phy_mode));
+			dev_err(dev, "Unsupported PHY mode %s on port %d!\n",
+				phy_modes(ports[i].phy_mode), i);
 			return -EINVAL;
 		}
 
@@ -999,10 +1001,8 @@ static int sja1105_speed[] = {
 static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 				      int speed_mbps)
 {
-	struct sja1105_xmii_params_entry *mii;
 	struct sja1105_mac_config_entry *mac;
 	struct device *dev = priv->ds->dev;
-	sja1105_phy_interface_t phy_mode;
 	sja1105_speed_t speed;
 	int rc;
 
@@ -1013,7 +1013,6 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	 * reasonable approximation for both E/T and P/Q/R/S.
 	 */
 	mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
-	mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries;
 
 	switch (speed_mbps) {
 	case SPEED_UNKNOWN:
@@ -1047,7 +1046,7 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	 * Actually for the SGMII port, the MAC is fixed at 1 Gbps and
 	 * we need to configure the PCS only (if even that).
 	 */
-	if (sja1105_supports_sgmii(priv, port))
+	if (priv->phy_mode[port] == PHY_INTERFACE_MODE_SGMII)
 		mac[port].speed = SJA1105_SPEED_1000MBPS;
 	else
 		mac[port].speed = speed;
@@ -1066,8 +1065,7 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	 * the clock setup does interrupt the clock signal for a certain time
 	 * which causes trouble for all PHYs relying on this signal.
 	 */
-	phy_mode = mii->xmii_mode[port];
-	if (phy_mode != XMII_MODE_RGMII)
+	if (!phy_interface_mode_is_rgmii(priv->phy_mode[port]))
 		return 0;
 
 	return sja1105_clocking_setup_port(priv, port);
@@ -1091,7 +1089,9 @@ static void sja1105_mac_config(struct dsa_switch *ds, int port,
 			       const struct phylink_link_state *state)
 {
 	struct sja1105_private *priv = ds->priv;
-	bool is_sgmii = sja1105_supports_sgmii(priv, port);
+	bool is_sgmii;
+
+	is_sgmii = (state->interface == PHY_INTERFACE_MODE_SGMII);
 
 	if (sja1105_phy_mode_mismatch(priv, port, state->interface)) {
 		dev_err(ds->dev, "Changing PHY mode to %s not supported!\n",
@@ -1128,7 +1128,8 @@ static void sja1105_mac_link_up(struct dsa_switch *ds, int port,
 
 	sja1105_adjust_port_config(priv, port, speed);
 
-	if (sja1105_supports_sgmii(priv, port) && !phylink_autoneg_inband(mode))
+	if (priv->phy_mode[port] == PHY_INTERFACE_MODE_SGMII &&
+	    !phylink_autoneg_inband(mode))
 		sja1105_sgmii_pcs_force_speed(priv, port, speed);
 
 	sja1105_inhibit_tx(priv, BIT(port), false);
@@ -1885,7 +1886,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 		speed_mbps[i] = sja1105_speed[mac[i].speed];
 		mac[i].speed = SJA1105_SPEED_AUTO;
 
-		if (sja1105_supports_sgmii(priv, i))
+		if (priv->phy_mode[i] == PHY_INTERFACE_MODE_SGMII)
 			bmcr[i] = sja1105_sgmii_read(priv, i,
 						     MDIO_MMD_VEND2,
 						     MDIO_CTRL1);
@@ -1942,7 +1943,7 @@ out_unlock_ptp:
 		if (rc < 0)
 			goto out;
 
-		if (!sja1105_supports_sgmii(priv, i))
+		if (priv->phy_mode[i] != PHY_INTERFACE_MODE_SGMII)
 			continue;
 
 		an_enabled = !!(bmcr[i] & BMCR_ANENABLE);
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 615e0906b1fa..565b594efa7d 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -482,6 +482,9 @@ const struct sja1105_info sja1105e_info = {
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
+	.supports_mii		= {true, true, true, true, true},
+	.supports_rmii		= {true, true, true, true, true},
+	.supports_rgmii		= {true, true, true, true, true},
 	.name			= "SJA1105E",
 };
 
@@ -502,6 +505,9 @@ const struct sja1105_info sja1105t_info = {
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
+	.supports_mii		= {true, true, true, true, true},
+	.supports_rmii		= {true, true, true, true, true},
+	.supports_rgmii		= {true, true, true, true, true},
 	.name			= "SJA1105T",
 };
 
@@ -523,6 +529,9 @@ const struct sja1105_info sja1105p_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
+	.supports_mii		= {true, true, true, true, true},
+	.supports_rmii		= {true, true, true, true, true},
+	.supports_rgmii		= {true, true, true, true, true},
 	.name			= "SJA1105P",
 };
 
@@ -544,6 +553,9 @@ const struct sja1105_info sja1105q_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
+	.supports_mii		= {true, true, true, true, true},
+	.supports_rmii		= {true, true, true, true, true},
+	.supports_rgmii		= {true, true, true, true, true},
 	.name			= "SJA1105Q",
 };
 
@@ -565,6 +577,10 @@ const struct sja1105_info sja1105r_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
+	.supports_mii		= {true, true, true, true, true},
+	.supports_rmii		= {true, true, true, true, true},
+	.supports_rgmii		= {true, true, true, true, true},
+	.supports_sgmii		= {false, false, false, false, true},
 	.name			= "SJA1105R",
 };
 
@@ -586,5 +602,9 @@ const struct sja1105_info sja1105s_info = {
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
+	.supports_mii		= {true, true, true, true, true},
+	.supports_rmii		= {true, true, true, true, true},
+	.supports_rgmii		= {true, true, true, true, true},
+	.supports_sgmii		= {false, false, false, false, true},
 	.name			= "SJA1105S",
 };

From 41fed17fdbe5311b141d39c40f028a594b0a0c72 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:37 +0300
Subject: [PATCH 0735/3292] net: dsa: sja1105: add a translation table for port
 speeds

In order to support the new speed of 2500Mbps, the SJA1110 has achieved
the great performance of changing the encoding in the MAC Configuration
Table for the port speeds of 10, 100, 1000 compared to SJA1105.

Because this is a common driver, we need a layer of indirection in order
to program the hardware with the right values irrespective of switch
generation.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105.h          | 17 +++++----
 drivers/net/dsa/sja1105/sja1105_clocking.c | 22 +++++-------
 drivers/net/dsa/sja1105/sja1105_main.c     | 38 ++++++++++++--------
 drivers/net/dsa/sja1105/sja1105_spi.c      | 42 ++++++++++++++++++++++
 4 files changed, 84 insertions(+), 35 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index a27841642693..867cda832e77 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -72,6 +72,15 @@ struct sja1105_regs {
 	u64 stats[__MAX_SJA1105_STATS_AREA][SJA1105_MAX_NUM_PORTS];
 };
 
+enum {
+	SJA1105_SPEED_AUTO,
+	SJA1105_SPEED_10MBPS,
+	SJA1105_SPEED_100MBPS,
+	SJA1105_SPEED_1000MBPS,
+	SJA1105_SPEED_2500MBPS,
+	SJA1105_SPEED_MAX,
+};
+
 struct sja1105_info {
 	u64 device_id;
 	/* Needed for distinction between P and R, and between Q and S
@@ -116,6 +125,7 @@ struct sja1105_info {
 	bool supports_rgmii[SJA1105_MAX_NUM_PORTS];
 	bool supports_sgmii[SJA1105_MAX_NUM_PORTS];
 	bool supports_2500basex[SJA1105_MAX_NUM_PORTS];
+	const u64 port_speed[SJA1105_SPEED_MAX];
 };
 
 enum sja1105_key_type {
@@ -314,13 +324,6 @@ typedef enum {
 	XMII_MODE_SGMII		= 3,
 } sja1105_phy_interface_t;
 
-typedef enum {
-	SJA1105_SPEED_10MBPS	= 3,
-	SJA1105_SPEED_100MBPS	= 2,
-	SJA1105_SPEED_1000MBPS	= 1,
-	SJA1105_SPEED_AUTO	= 0,
-} sja1105_speed_t;
-
 int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port);
 int sja1105_clocking_setup_port(struct sja1105_private *priv, int port);
 int sja1105_clocking_setup(struct sja1105_private *priv);
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 4697ac064abc..03173397d950 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -328,7 +328,7 @@ sja1105_cgu_pll_control_packing(void *buf, struct sja1105_cgu_pll_ctrl *cmd,
 }
 
 static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv,
-					   int port, sja1105_speed_t speed)
+					   int port, u64 speed)
 {
 	const struct sja1105_regs *regs = priv->info->regs;
 	struct sja1105_cgu_mii_ctrl txc;
@@ -338,7 +338,7 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv,
 	if (regs->rgmii_tx_clk[port] == SJA1105_RSV_ADDR)
 		return 0;
 
-	if (speed == SJA1105_SPEED_1000MBPS) {
+	if (speed == priv->info->port_speed[SJA1105_SPEED_1000MBPS]) {
 		clksrc = CLKSRC_PLL0;
 	} else {
 		int clk_sources[] = {CLKSRC_IDIV0, CLKSRC_IDIV1, CLKSRC_IDIV2,
@@ -524,35 +524,31 @@ static int sja1105_rgmii_clocking_setup(struct sja1105_private *priv, int port,
 {
 	struct device *dev = priv->ds->dev;
 	struct sja1105_mac_config_entry *mac;
-	sja1105_speed_t speed;
+	u64 speed;
 	int rc;
 
 	mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
 	speed = mac[port].speed;
 
-	dev_dbg(dev, "Configuring port %d RGMII at speed %dMbps\n",
+	dev_dbg(dev, "Configuring port %d RGMII at speed %lldMbps\n",
 		port, speed);
 
-	switch (speed) {
-	case SJA1105_SPEED_1000MBPS:
+	if (speed == priv->info->port_speed[SJA1105_SPEED_1000MBPS]) {
 		/* 1000Mbps, IDIV disabled (125 MHz) */
 		rc = sja1105_cgu_idiv_config(priv, port, false, 1);
-		break;
-	case SJA1105_SPEED_100MBPS:
+	} else if (speed == priv->info->port_speed[SJA1105_SPEED_100MBPS]) {
 		/* 100Mbps, IDIV enabled, divide by 1 (25 MHz) */
 		rc = sja1105_cgu_idiv_config(priv, port, true, 1);
-		break;
-	case SJA1105_SPEED_10MBPS:
+	} else if (speed == priv->info->port_speed[SJA1105_SPEED_10MBPS]) {
 		/* 10Mbps, IDIV enabled, divide by 10 (2.5 MHz) */
 		rc = sja1105_cgu_idiv_config(priv, port, true, 10);
-		break;
-	case SJA1105_SPEED_AUTO:
+	} else if (speed == priv->info->port_speed[SJA1105_SPEED_AUTO]) {
 		/* Skip CGU configuration if there is no speed available
 		 * (e.g. link is not established yet)
 		 */
 		dev_dbg(dev, "Speed not available, skipping CGU config\n");
 		return 0;
-	default:
+	} else {
 		rc = -EINVAL;
 	}
 
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 3682b04d37cb..5beafe003268 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -80,7 +80,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 		/* Always put the MAC speed in automatic mode, where it can be
 		 * adjusted at runtime by PHYLINK.
 		 */
-		.speed = SJA1105_SPEED_AUTO,
+		.speed = priv->info->port_speed[SJA1105_SPEED_AUTO],
 		/* No static correction for 1-step 1588 events */
 		.tp_delin = 0,
 		.tp_delout = 0,
@@ -990,12 +990,19 @@ static void sja1105_sgmii_pcs_force_speed(struct sja1105_private *priv,
 }
 
 /* Convert link speed from SJA1105 to ethtool encoding */
-static int sja1105_speed[] = {
-	[SJA1105_SPEED_AUTO]		= SPEED_UNKNOWN,
-	[SJA1105_SPEED_10MBPS]		= SPEED_10,
-	[SJA1105_SPEED_100MBPS]		= SPEED_100,
-	[SJA1105_SPEED_1000MBPS]	= SPEED_1000,
-};
+static int sja1105_port_speed_to_ethtool(struct sja1105_private *priv,
+					 u64 speed)
+{
+	if (speed == priv->info->port_speed[SJA1105_SPEED_10MBPS])
+		return SPEED_10;
+	if (speed == priv->info->port_speed[SJA1105_SPEED_100MBPS])
+		return SPEED_100;
+	if (speed == priv->info->port_speed[SJA1105_SPEED_1000MBPS])
+		return SPEED_1000;
+	if (speed == priv->info->port_speed[SJA1105_SPEED_2500MBPS])
+		return SPEED_2500;
+	return SPEED_UNKNOWN;
+}
 
 /* Set link speed in the MAC configuration for a specific port. */
 static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
@@ -1003,7 +1010,7 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 {
 	struct sja1105_mac_config_entry *mac;
 	struct device *dev = priv->ds->dev;
-	sja1105_speed_t speed;
+	u64 speed;
 	int rc;
 
 	/* On P/Q/R/S, one can read from the device via the MAC reconfiguration
@@ -1023,16 +1030,16 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 		 * ok for power consumption in case AN will never complete -
 		 * otherwise PHYLINK should come back with a new update.
 		 */
-		speed = SJA1105_SPEED_AUTO;
+		speed = priv->info->port_speed[SJA1105_SPEED_AUTO];
 		break;
 	case SPEED_10:
-		speed = SJA1105_SPEED_10MBPS;
+		speed = priv->info->port_speed[SJA1105_SPEED_10MBPS];
 		break;
 	case SPEED_100:
-		speed = SJA1105_SPEED_100MBPS;
+		speed = priv->info->port_speed[SJA1105_SPEED_100MBPS];
 		break;
 	case SPEED_1000:
-		speed = SJA1105_SPEED_1000MBPS;
+		speed = priv->info->port_speed[SJA1105_SPEED_1000MBPS];
 		break;
 	default:
 		dev_err(dev, "Invalid speed %iMbps\n", speed_mbps);
@@ -1047,7 +1054,7 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	 * we need to configure the PCS only (if even that).
 	 */
 	if (priv->phy_mode[port] == PHY_INTERFACE_MODE_SGMII)
-		mac[port].speed = SJA1105_SPEED_1000MBPS;
+		mac[port].speed = priv->info->port_speed[SJA1105_SPEED_1000MBPS];
 	else
 		mac[port].speed = speed;
 
@@ -1883,8 +1890,9 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 	 * change it through the dynamic interface later.
 	 */
 	for (i = 0; i < ds->num_ports; i++) {
-		speed_mbps[i] = sja1105_speed[mac[i].speed];
-		mac[i].speed = SJA1105_SPEED_AUTO;
+		speed_mbps[i] = sja1105_port_speed_to_ethtool(priv,
+							      mac[i].speed);
+		mac[i].speed = priv->info->port_speed[SJA1105_SPEED_AUTO];
 
 		if (priv->phy_mode[i] == PHY_INTERFACE_MODE_SGMII)
 			bmcr[i] = sja1105_sgmii_read(priv, i,
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 565b594efa7d..786c16a77e46 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -482,6 +482,13 @@ const struct sja1105_info sja1105e_info = {
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 3,
+		[SJA1105_SPEED_100MBPS] = 2,
+		[SJA1105_SPEED_1000MBPS] = 1,
+		[SJA1105_SPEED_2500MBPS] = 0, /* Not supported */
+	},
 	.supports_mii		= {true, true, true, true, true},
 	.supports_rmii		= {true, true, true, true, true},
 	.supports_rgmii		= {true, true, true, true, true},
@@ -505,6 +512,13 @@ const struct sja1105_info sja1105t_info = {
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 3,
+		[SJA1105_SPEED_100MBPS] = 2,
+		[SJA1105_SPEED_1000MBPS] = 1,
+		[SJA1105_SPEED_2500MBPS] = 0, /* Not supported */
+	},
 	.supports_mii		= {true, true, true, true, true},
 	.supports_rmii		= {true, true, true, true, true},
 	.supports_rgmii		= {true, true, true, true, true},
@@ -529,6 +543,13 @@ const struct sja1105_info sja1105p_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 3,
+		[SJA1105_SPEED_100MBPS] = 2,
+		[SJA1105_SPEED_1000MBPS] = 1,
+		[SJA1105_SPEED_2500MBPS] = 0, /* Not supported */
+	},
 	.supports_mii		= {true, true, true, true, true},
 	.supports_rmii		= {true, true, true, true, true},
 	.supports_rgmii		= {true, true, true, true, true},
@@ -553,6 +574,13 @@ const struct sja1105_info sja1105q_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 3,
+		[SJA1105_SPEED_100MBPS] = 2,
+		[SJA1105_SPEED_1000MBPS] = 1,
+		[SJA1105_SPEED_2500MBPS] = 0, /* Not supported */
+	},
 	.supports_mii		= {true, true, true, true, true},
 	.supports_rmii		= {true, true, true, true, true},
 	.supports_rgmii		= {true, true, true, true, true},
@@ -577,6 +605,13 @@ const struct sja1105_info sja1105r_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 3,
+		[SJA1105_SPEED_100MBPS] = 2,
+		[SJA1105_SPEED_1000MBPS] = 1,
+		[SJA1105_SPEED_2500MBPS] = 0, /* Not supported */
+	},
 	.supports_mii		= {true, true, true, true, true},
 	.supports_rmii		= {true, true, true, true, true},
 	.supports_rgmii		= {true, true, true, true, true},
@@ -602,6 +637,13 @@ const struct sja1105_info sja1105s_info = {
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.clocking_setup		= sja1105_clocking_setup,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 3,
+		[SJA1105_SPEED_100MBPS] = 2,
+		[SJA1105_SPEED_1000MBPS] = 1,
+		[SJA1105_SPEED_2500MBPS] = 0, /* Not supported */
+	},
 	.supports_mii		= {true, true, true, true, true},
 	.supports_rmii		= {true, true, true, true, true},
 	.supports_rgmii		= {true, true, true, true, true},

From f41fad3cb8b76cc0e5deebd4d535d0c7c7b43b43 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:38 +0300
Subject: [PATCH 0736/3292] net: dsa: sja1105: always keep RGMII ports in the
 MAC role

In SJA1105, the xMII Mode Parameters Table field called PHY_MAC denotes
the 'role' of the port, be it a PHY or a MAC. This makes a difference in
the MII and RMII protocols, but RGMII is symmetric, so either PHY or MAC
settings result in the same hardware behavior.

The SJA1110 is different, and the RGMII ports only work when configured
in MAC mode, so keep the port roles in MAC mode unconditionally.

Why we had an RGMII port in the PHY role in the first place was because
we wanted to have a way in the driver to denote whether RGMII delays
should be applied based on the phy-mode property or not. This is already
done in sja1105_parse_rgmii_delays() based on an intermediary
struct sja1105_dt_port (which contains the port role). So it is a
logical fallacy to use the hardware configuration as a scratchpad for
driver data, it isn't necessary.

We can also remove the gating condition for applying RGMII delays only
for ports in the PHY role. The .setup_rgmii_delay() method looks at
the priv->rgmii_rx_delay[port] and priv->rgmii_tx_delay[port] properties
which are already populated properly (in the case of a port in the MAC
role they are false). Removing this condition generates a few more SPI
writes for these ports (clearing the RGMII delays) which are perhaps
useless for SJA1105P/Q/R/S, where we know that the delays are disabled
by default. But for SJA1110, the firmware on the embedded microcontroller
might have done something funny, so it's always a good idea to clear the
RGMII delays if that's what Linux expects.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_clocking.c | 7 +------
 drivers/net/dsa/sja1105/sja1105_main.c     | 8 +++++++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 03173397d950..ae297648611f 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -566,14 +566,9 @@ static int sja1105_rgmii_clocking_setup(struct sja1105_private *priv, int port,
 		dev_err(dev, "Failed to configure Tx pad registers\n");
 		return rc;
 	}
+
 	if (!priv->info->setup_rgmii_delay)
 		return 0;
-	/* The role has no hardware effect for RGMII. However we use it as
-	 * a proxy for this interface being a MAC-to-MAC connection, with
-	 * the RGMII internal delays needing to be applied by us.
-	 */
-	if (role == XMII_MAC)
-		return 0;
 
 	return priv->info->setup_rgmii_delay(priv, port);
 }
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 5beafe003268..84edd054781b 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -218,8 +218,14 @@ unsupported:
 		/* Even though the SerDes port is able to drive SGMII autoneg
 		 * like a PHY would, from the perspective of the XMII tables,
 		 * the SGMII port should always be put in MAC mode.
+		 * Similarly, RGMII is a symmetric protocol electrically
+		 * speaking, and the 'RGMII PHY' role does not mean anything to
+		 * hardware. Just keep the 'PHY role' notation relevant to the
+		 * driver to mean 'the switch port should apply RGMII delays',
+		 * but unconditionally put the port in the MAC role.
 		 */
-		if (ports[i].phy_mode == PHY_INTERFACE_MODE_SGMII)
+		if (ports[i].phy_mode == PHY_INTERFACE_MODE_SGMII ||
+		    phy_interface_mode_is_rgmii(ports[i].phy_mode))
 			mii->phy_mac[i] = XMII_MAC;
 		else
 			mii->phy_mac[i] = ports[i].role;

From 96c85f51f1236d0eed3c8cd075ce144faed6a0ca Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 01:59:39 +0300
Subject: [PATCH 0737/3292] net: dsa: sja1105: some table entries are always
 present when read dynamically

The SJA1105 has a static configuration comprised of a number of tables
with entries. Some of these can be read and modified at runtime as well,
through the dynamic configuration interface.

As a careful reader can notice from the comments in this file, the
software interface for accessing a table entry through the dynamic
reconfiguration is a bit of a no man's land, and varies wildly across
switch generations and even from one kind of table to another.

I have tried my best to come up with a software representation of a
'common denominator' SPI command to access a table entry through the
dynamic configuration interface:

struct sja1105_dyn_cmd {
	bool search;
	u64 valid; /* must be set to 1 */
	u64 rdwrset; /* 0 to read, 1 to write */
	u64 errors;
	u64 valident; /* 0 if entry is invalid, 1 if valid */
	u64 index;
};

Relevant to this patch is the VALIDENT bit, which for READ commands is
populated by the switch and lets us know if we're looking at junk or at
a real table entry.

In SJA1105, the dynamic reconfiguration interface for management routes
has notably not implemented the VALIDENT bit, leading to a workaround to
ignore this field in sja1105_dynamic_config_read(), as it will be set to
zero, but the data is valid nonetheless.

In SJA1110, this pattern has sadly been abused to death, and while there
are many more tables which can be read back over the dynamic config
interface compared to SJA1105, their handling isn't in any way more
uniform. Generally speaking, if there is a single possible entry in a
given table, and loading that table in the static config is mandatory as
per the documentation, then the VALIDENT bit is deemed as redundant and
more than likely not implemented.

So it is time to make the workaround more official, and add a bit to the
flags implemented by dynamic config tables. It will be used by more
tables when SJA1110 support arrives.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 12cd04b56803..ff2742f53de3 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -78,6 +78,9 @@
  *		   on its ENTRY portion, as a result of a SPI write command.
  *		   Only the TCAM-based FDB table on SJA1105 P/Q/R/S supports
  *		   this.
+ *	OP_VALID_ANYWAY: Reading some tables through the dynamic config
+ *			 interface is possible even if the VALIDENT bit is not
+ *			 set in the writeback. So don't error out in that case.
  * - .max_entry_count: The number of entries, counting from zero, that can be
  *		       reconfigured through the dynamic interface. If a static
  *		       table can be reconfigured at all dynamically, this
@@ -651,6 +654,7 @@ static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr,
 #define OP_WRITE	BIT(1)
 #define OP_DEL		BIT(2)
 #define OP_SEARCH	BIT(3)
+#define OP_VALID_ANYWAY	BIT(4)
 
 /* SJA1105E/T: First generation */
 const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
@@ -673,7 +677,7 @@ const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_MGMT_ROUTE] = {
 		.entry_packing = sja1105et_mgmt_route_entry_packing,
 		.cmd_packing = sja1105et_mgmt_route_cmd_packing,
-		.access = (OP_READ | OP_WRITE),
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
 		.max_entry_count = SJA1105_NUM_PORTS,
 		.packed_size = SJA1105ET_SIZE_L2_LOOKUP_DYN_CMD,
 		.addr = 0x20,
@@ -757,7 +761,7 @@ const struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_MGMT_ROUTE] = {
 		.entry_packing = sja1105pqrs_mgmt_route_entry_packing,
 		.cmd_packing = sja1105pqrs_mgmt_route_cmd_packing,
-		.access = (OP_READ | OP_WRITE | OP_DEL | OP_SEARCH),
+		.access = (OP_READ | OP_WRITE | OP_DEL | OP_SEARCH | OP_VALID_ANYWAY),
 		.max_entry_count = SJA1105_NUM_PORTS,
 		.packed_size = SJA1105PQRS_SIZE_L2_LOOKUP_DYN_CMD,
 		.addr = 0x24,
@@ -911,11 +915,8 @@ int sja1105_dynamic_config_read(struct sja1105_private *priv,
 
 		cmd = (struct sja1105_dyn_cmd) {0};
 		ops->cmd_packing(packed_buf, &cmd, UNPACK);
-		/* UM10944: [valident] will always be found cleared
-		 * during a read access with MGMTROUTE set.
-		 * So don't error out in that case.
-		 */
-		if (!cmd.valident && blk_idx != BLK_IDX_MGMT_ROUTE)
+
+		if (!cmd.valident && !(ops->access & OP_VALID_ANYWAY))
 			return -ENOENT;
 		cpu_relax();
 	} while (cmd.valid && --retries);

From 01709d0977d464b862968ef063899e576615e5e5 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 31 May 2021 10:00:19 +0800
Subject: [PATCH 0738/3292] nfc: hci: Fix spelling mistakes

Fix some spelling mistakes in comments:
occured  ==> occurred
negociate  ==> negotiate

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531020019.2919799-1-zhengyongjun3@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/nfc/hci/command.c   | 2 +-
 net/nfc/hci/core.c      | 2 +-
 net/nfc/hci/llc_shdlc.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index e02b9befce0b..3a89bd9b89fc 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -34,7 +34,7 @@ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
  * HCI command execution completion callback.
  * err will be a standard linux error (may be converted from HCI response)
  * skb contains the response data and must be disposed, or may be NULL if
- * an error occured
+ * an error occurred
  */
 static void nfc_hci_execute_cb(void *context, struct sk_buff *skb, int err)
 {
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 43811b5219b5..3481941be70b 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -705,7 +705,7 @@ static void hci_transceive_cb(void *context, struct sk_buff *skb, int err)
 		/*
 		 * TODO: Check RF Error indicator to make sure data is valid.
 		 * It seems that HCI cmd can complete without error, but data
-		 * can be invalid if an RF error occured? Ignore for now.
+		 * can be invalid if an RF error occurred? Ignore for now.
 		 */
 		if (err == 0)
 			skb_trim(skb, skb->len - 1); /* RF Err ind */
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index c0c8fea3a186..1e3a90049da9 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -406,7 +406,7 @@ static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc,
 		case SHDLC_NEGOTIATING:
 		case SHDLC_CONNECTING:
 			/*
-			 * We sent RSET, but chip wants to negociate or we
+			 * We sent RSET, but chip wants to negotiate or we
 			 * got RSET before we managed to send out our.
 			 */
 			if (skb->len > 0)

From 37f2ad2b9018c5e23455536e5c240cac1334f20a Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 31 May 2021 10:00:48 +0800
Subject: [PATCH 0739/3292] net: sched: Fix spelling mistakes

Fix some spelling mistakes in comments:
sevaral  ==> several
sugestion  ==> suggestion
unregster  ==> unregister
suplied  ==> supplied
cirsumstances  ==> circumstances

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Link: https://lore.kernel.org/r/20210531020048.2920054-1-zhengyongjun3@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/cls_rsvp.h | 2 +-
 net/sched/ematch.c   | 2 +-
 net/sched/sch_gred.c | 2 +-
 net/sched/sch_htb.c  | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 2e288f88ff02..27a4b6dbcf57 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -7,7 +7,7 @@
 
 /*
    Comparing to general packet classification problem,
-   RSVP needs only sevaral relatively simple rules:
+   RSVP needs only several relatively simple rules:
 
    * (dst, protocol) are always specified,
      so that we are able to hash them.
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index f885bea5b452..4ce681361851 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -141,7 +141,7 @@ errout:
 EXPORT_SYMBOL(tcf_em_register);
 
 /**
- * tcf_em_unregister - unregster and extended match
+ * tcf_em_unregister - unregister and extended match
  *
  * @ops: ematch operations lookup table
  *
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index f4132dc25ac0..621dc6afde8f 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -6,7 +6,7 @@
  *
  *             991129: -  Bug fix with grio mode
  *		       - a better sing. AvgQ mode with Grio(WRED)
- *		       - A finer grained VQ dequeue based on sugestion
+ *		       - A finer grained VQ dequeue based on suggestion
  *		         from Ren Liu
  *		       - More error checks
  *
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 081c11d5717c..282614614905 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -52,7 +52,7 @@
 */
 
 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
-#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30011		/* major must be matched with number supplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
@@ -523,7 +523,7 @@ htb_class_mode(struct htb_class *cl, s64 *diff)
  * htb_change_class_mode - changes classe's mode
  *
  * This should be the only way how to change classe's mode under normal
- * cirsumstances. Routine will update feed lists linkage, change mode
+ * circumstances. Routine will update feed lists linkage, change mode
  * and add class to the wait event queue if appropriate. New mode should
  * be different from old one and cl->pq_key has to be valid if changing
  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).

From 379aecbce08f7187feab9aa12609d049ce2675b4 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 31 May 2021 14:36:17 +0800
Subject: [PATCH 0740/3292] rds: Fix spelling mistakes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix some spelling mistakes in comments:
alloced  ==> allocated

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Link: https://lore.kernel.org/r/20210531063617.3018637-1-zhengyongjun3@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/rds/ib_ring.c  | 2 +-
 net/rds/tcp_recv.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c
index ff97e8eda858..006b2e441418 100644
--- a/net/rds/ib_ring.c
+++ b/net/rds/ib_ring.c
@@ -141,7 +141,7 @@ int rds_ib_ring_low(struct rds_ib_work_ring *ring)
 }
 
 /*
- * returns the oldest alloced ring entry.  This will be the next one
+ * returns the oldest allocated ring entry.  This will be the next one
  * freed.  This can't be called if there are none allocated.
  */
 u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring)
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 42c5ff1eda95..f4ee13da90c7 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -177,7 +177,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 				goto out;
 			}
 			tc->t_tinc = tinc;
-			rdsdebug("alloced tinc %p\n", tinc);
+			rdsdebug("allocated tinc %p\n", tinc);
 			rds_inc_path_init(&tinc->ti_inc, cp,
 					  &cp->cp_conn->c_faddr);
 			tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =

From 0c2c366e0ec55533decb00d0f1ea1cbc42247e7b Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 10:08:01 +0800
Subject: [PATCH 0741/3292] sctp: sm_statefuns: Fix spelling mistakes

Fix some spelling mistakes in comments:
genereate ==> generate
correclty ==> correctly
boundries ==> boundaries
failes ==> fails
isses ==> issues
assocition ==> association
signe ==> sign
assocaition ==> association
managemement ==> management
restransmissions ==> retransmission
sideffect ==> sideeffect
bomming ==> booming
chukns ==> chunks
SHUDOWN ==> SHUTDOWN
violationg ==> violating
explcitly ==> explicitly
CHunk ==> Chunk

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Link: https://lore.kernel.org/r/20210601020801.3625358-1-zhengyongjun3@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sctp/sm_statefuns.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index fd1e319eda00..4f30388a0dd0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -361,7 +361,7 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
 
 	/* If the INIT is coming toward a closing socket, we'll send back
 	 * and ABORT.  Essentially, this catches the race of INIT being
-	 * backloged to the socket at the same time as the user isses close().
+	 * backloged to the socket at the same time as the user issues close().
 	 * Since the socket and all its associations are going away, we
 	 * can treat this OOTB
 	 */
@@ -608,8 +608,8 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_COOKIE_ECHOED));
 
-	/* SCTP-AUTH: genereate the assocition shared keys so that
-	 * we can potentially signe the COOKIE-ECHO.
+	/* SCTP-AUTH: generate the association shared keys so that
+	 * we can potentially sign the COOKIE-ECHO.
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_SHKEY, SCTP_NULL());
 
@@ -787,7 +787,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 		goto nomem_init;
 
 	/* SCTP-AUTH:  Now that we've populate required fields in
-	 * sctp_process_init, set up the assocaition shared keys as
+	 * sctp_process_init, set up the association shared keys as
 	 * necessary so that we can potentially authenticate the ACK
 	 */
 	error = sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC);
@@ -838,7 +838,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 
 	/* Add all the state machine commands now since we've created
 	 * everything.  This way we don't introduce memory corruptions
-	 * during side-effect processing and correclty count established
+	 * during side-effect processing and correctly count established
 	 * associations.
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -923,7 +923,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 						  commands);
 
 	/* Reset init error count upon receipt of COOKIE-ACK,
-	 * to avoid problems with the managemement of this
+	 * to avoid problems with the management of this
 	 * counter in stale cookie situations when a transition back
 	 * from the COOKIE-ECHOED state to the COOKIE-WAIT
 	 * state is performed.
@@ -2950,7 +2950,7 @@ enum sctp_disposition sctp_sf_do_9_2_reshutack(
 						  commands);
 
 	/* Since we are not going to really process this INIT, there
-	 * is no point in verifying chunk boundries.  Just generate
+	 * is no point in verifying chunk boundaries.  Just generate
 	 * the SHUTDOWN ACK.
 	 */
 	reply = sctp_make_shutdown_ack(asoc, chunk);
@@ -3560,7 +3560,7 @@ enum sctp_disposition sctp_sf_do_9_2_final(struct net *net,
 		goto nomem_chunk;
 
 	/* Do all the commands now (after allocation), so that we
-	 * have consistent state if memory allocation failes
+	 * have consistent state if memory allocation fails
 	 */
 	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
 
@@ -3747,7 +3747,7 @@ static enum sctp_disposition sctp_sf_shut_8_4_5(
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* We need to discard the rest of the packet to prevent
-	 * potential bomming attacks from additional bundled chunks.
+	 * potential boomming attacks from additional bundled chunks.
 	 * This is documented in SCTP Threats ID.
 	 */
 	return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -4257,7 +4257,7 @@ gen_shutdown:
 }
 
 /*
- * SCTP-AUTH Section 6.3 Receiving authenticated chukns
+ * SCTP-AUTH Section 6.3 Receiving authenticated chunks
  *
  *    The receiver MUST use the HMAC algorithm indicated in the HMAC
  *    Identifier field.  If this algorithm was not specified by the
@@ -4812,7 +4812,7 @@ static enum sctp_disposition sctp_sf_violation_ctsn(
 
 /* Handle protocol violation of an invalid chunk bundling.  For example,
  * when we have an association and we receive bundled INIT-ACK, or
- * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle"
+ * SHUTDOWN-COMPLETE, our peer is clearly violating the "MUST NOT bundle"
  * statement from the specs.  Additionally, there might be an attacker
  * on the path and we may not want to continue this communication.
  */
@@ -5208,7 +5208,7 @@ enum sctp_disposition sctp_sf_cookie_wait_prm_shutdown(
  * Inputs
  * (endpoint, asoc)
  *
- * The RFC does not explcitly address this issue, but is the route through the
+ * The RFC does not explicitly address this issue, but is the route through the
  * state table when someone issues a shutdown while in COOKIE_ECHOED state.
  *
  * Outputs
@@ -5932,7 +5932,7 @@ enum sctp_disposition sctp_sf_t1_cookie_timer_expire(
 /* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN
  * with the updated last sequential TSN received from its peer.
  *
- * An endpoint should limit the number of retransmissions of the
+ * An endpoint should limit the number of retransmission of the
  * SHUTDOWN chunk to the protocol parameter 'Association.Max.Retrans'.
  * If this threshold is exceeded the endpoint should destroy the TCB and
  * MUST report the peer endpoint unreachable to the upper layer (and
@@ -6010,7 +6010,7 @@ nomem:
 }
 
 /*
- * ADDIP Section 4.1 ASCONF CHunk Procedures
+ * ADDIP Section 4.1 ASCONF Chunk Procedures
  * If the T4 RTO timer expires the endpoint should do B1 to B5
  */
 enum sctp_disposition sctp_sf_t4_timer_expire(
@@ -6441,7 +6441,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 		chunk->ecn_ce_done = 1;
 
 		if (af->is_ce(sctp_gso_headskb(chunk->skb))) {
-			/* Do real work as sideffect. */
+			/* Do real work as side effect. */
 			sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE,
 					SCTP_U32(tsn));
 		}

From d7b0408934c749f546b01f2b33d07421a49b6f3e Mon Sep 17 00:00:00 2001
From: Varad Gautam <varad.gautam@suse.com>
Date: Fri, 28 May 2021 18:04:06 +0200
Subject: [PATCH 0742/3292] xfrm: policy: Read seqcount outside of rcu-read
 side in xfrm_policy_lookup_bytype

xfrm_policy_lookup_bytype loops on seqcount mutex xfrm_policy_hash_generation
within an RCU read side critical section. Although ill advised, this is fine if
the loop is bounded.

xfrm_policy_hash_generation wraps mutex hash_resize_mutex, which is used to
serialize writers (xfrm_hash_resize, xfrm_hash_rebuild). This is fine too.

On PREEMPT_RT=y, the read_seqcount_begin call within xfrm_policy_lookup_bytype
emits a mutex lock/unlock for hash_resize_mutex. Mutex locking is fine, since
RCU read side critical sections are allowed to sleep with PREEMPT_RT.

xfrm_hash_resize can, however, block on synchronize_rcu while holding
hash_resize_mutex.

This leads to the following situation on PREEMPT_RT, where the writer is
blocked on RCU grace period expiry, while the reader is blocked on a lock held
by the writer:

Thead 1 (xfrm_hash_resize)	Thread 2 (xfrm_policy_lookup_bytype)

				rcu_read_lock();
mutex_lock(&hash_resize_mutex);
				read_seqcount_begin(&xfrm_policy_hash_generation);
				mutex_lock(&hash_resize_mutex); // block
xfrm_bydst_resize();
synchronize_rcu(); // block
		<RCU stalls in xfrm_policy_lookup_bytype>

Move the read_seqcount_begin call outside of the RCU read side critical section,
and do an rcu_read_unlock/retry if we got stale data within the critical section.

On non-PREEMPT_RT, this shortens the time spent within RCU read side critical
section in case the seqcount needs a retry, and avoids unbounded looping.

Fixes: 77cc278f7b20 ("xfrm: policy: Use sequence counters with associated lock")
Signed-off-by: Varad Gautam <varad.gautam@suse.com>
Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
Cc: netdev@vger.kernel.org
Cc: stable@vger.kernel.org # v4.9
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Florian Westphal <fw@strlen.de>
Cc: "Ahmed S. Darwish" <a.darwish@linutronix.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Ahmed S. Darwish <a.darwish@linutronix.de>
---
 net/xfrm/xfrm_policy.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b74f28cabe24..8c56e3e59c3c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2092,12 +2092,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	if (unlikely(!daddr || !saddr))
 		return NULL;
 
-	rcu_read_lock();
  retry:
-	do {
-		sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
-		chain = policy_hash_direct(net, daddr, saddr, family, dir);
-	} while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+	sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+	rcu_read_lock();
+
+	chain = policy_hash_direct(net, daddr, saddr, family, dir);
+	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
+		rcu_read_unlock();
+		goto retry;
+	}
 
 	ret = NULL;
 	hlist_for_each_entry_rcu(pol, chain, bydst) {
@@ -2128,11 +2131,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	}
 
 skip_inexact:
-	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
+		rcu_read_unlock();
 		goto retry;
+	}
 
-	if (ret && !xfrm_pol_hold_rcu(ret))
+	if (ret && !xfrm_pol_hold_rcu(ret)) {
+		rcu_read_unlock();
 		goto retry;
+	}
 fail:
 	rcu_read_unlock();
 

From b44eb28d44a65370e77d0bcd9a87cee3fa1daaca Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:38 +0800
Subject: [PATCH 0743/3292] net: hns3: add 'QoS' support for port based VLAN
 configuration

Currently, option "qos" is igored by HNS3 driver for command
"ip link set ethx vf <vf id> vlan <vlan id> qos <qos value>".
Add support for it.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 91 +++++++++++++------
 1 file changed, 63 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6addeb299bba..af5b278a0643 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9724,7 +9724,7 @@ static int hclge_set_vlan_rx_offload_cfg(struct hclge_vport *vport)
 
 static int hclge_vlan_offload_cfg(struct hclge_vport *vport,
 				  u16 port_base_vlan_state,
-				  u16 vlan_tag)
+				  u16 vlan_tag, u8 qos)
 {
 	int ret;
 
@@ -9738,7 +9738,8 @@ static int hclge_vlan_offload_cfg(struct hclge_vport *vport,
 		vport->txvlan_cfg.accept_tag1 =
 			ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3;
 		vport->txvlan_cfg.insert_tag1_en = true;
-		vport->txvlan_cfg.default_tag1 = vlan_tag;
+		vport->txvlan_cfg.default_tag1 = (qos << VLAN_PRIO_SHIFT) |
+						 vlan_tag;
 	}
 
 	vport->txvlan_cfg.accept_untag1 = true;
@@ -9867,13 +9868,15 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 
 	for (i = 0; i < hdev->num_alloc_vport; i++) {
 		u16 vlan_tag;
+		u8 qos;
 
 		vport = &hdev->vport[i];
 		vlan_tag = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
+		qos = vport->port_base_vlan_cfg.vlan_info.qos;
 
 		ret = hclge_vlan_offload_cfg(vport,
 					     vport->port_base_vlan_cfg.state,
-					     vlan_tag);
+					     vlan_tag, qos);
 		if (ret)
 			return ret;
 	}
@@ -10084,6 +10087,10 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
 
 	if (port_base_vlan_state == HNAE3_PORT_BASE_VLAN_ENABLE) {
 		hclge_rm_vport_all_vlan_table(vport, false);
+		/* force clear VLAN 0 */
+		ret = hclge_set_vf_vlan_common(hdev, vport->vport_id, true, 0);
+		if (ret)
+			return ret;
 		return hclge_set_vlan_filter_hw(hdev,
 						 htons(new_info->vlan_proto),
 						 vport->vport_id,
@@ -10091,6 +10098,11 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
 						 false);
 	}
 
+	/* force add VLAN 0 */
+	ret = hclge_set_vf_vlan_common(hdev, vport->vport_id, false, 0);
+	if (ret)
+		return ret;
+
 	ret = hclge_set_vlan_filter_hw(hdev, htons(old_info->vlan_proto),
 				       vport->vport_id, old_info->vlan_tag,
 				       true);
@@ -10100,6 +10112,18 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
 	return hclge_add_vport_all_vlan_table(vport);
 }
 
+static bool hclge_need_update_vlan_filter(const struct hclge_vlan_info *new_cfg,
+					  const struct hclge_vlan_info *old_cfg)
+{
+	if (new_cfg->vlan_tag != old_cfg->vlan_tag)
+		return true;
+
+	if (new_cfg->vlan_tag == 0 && (new_cfg->qos == 0 || old_cfg->qos == 0))
+		return true;
+
+	return false;
+}
+
 int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
 				    struct hclge_vlan_info *vlan_info)
 {
@@ -10110,10 +10134,14 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
 
 	old_vlan_info = &vport->port_base_vlan_cfg.vlan_info;
 
-	ret = hclge_vlan_offload_cfg(vport, state, vlan_info->vlan_tag);
+	ret = hclge_vlan_offload_cfg(vport, state, vlan_info->vlan_tag,
+				     vlan_info->qos);
 	if (ret)
 		return ret;
 
+	if (!hclge_need_update_vlan_filter(vlan_info, old_vlan_info))
+		goto out;
+
 	if (state == HNAE3_PORT_BASE_VLAN_MODIFY) {
 		/* add new VLAN tag */
 		ret = hclge_set_vlan_filter_hw(hdev,
@@ -10125,15 +10153,23 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
 			return ret;
 
 		/* remove old VLAN tag */
-		ret = hclge_set_vlan_filter_hw(hdev,
-					       htons(old_vlan_info->vlan_proto),
-					       vport->vport_id,
-					       old_vlan_info->vlan_tag,
-					       true);
-		if (ret)
+		if (old_vlan_info->vlan_tag == 0)
+			ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
+						       true, 0);
+		else
+			ret = hclge_set_vlan_filter_hw(hdev,
+						       htons(ETH_P_8021Q),
+						       vport->vport_id,
+						       old_vlan_info->vlan_tag,
+						       true);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to clear vport%u port base vlan %u, ret = %d.\n",
+				vport->vport_id, old_vlan_info->vlan_tag, ret);
 			return ret;
+		}
 
-		goto update;
+		goto out;
 	}
 
 	ret = hclge_update_vlan_filter_entries(vport, state, vlan_info,
@@ -10141,38 +10177,37 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
 	if (ret)
 		return ret;
 
-	/* update state only when disable/enable port based VLAN */
+out:
 	vport->port_base_vlan_cfg.state = state;
 	if (state == HNAE3_PORT_BASE_VLAN_DISABLE)
 		nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_DISABLE;
 	else
 		nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
 
-update:
-	vport->port_base_vlan_cfg.vlan_info.vlan_tag = vlan_info->vlan_tag;
-	vport->port_base_vlan_cfg.vlan_info.qos = vlan_info->qos;
-	vport->port_base_vlan_cfg.vlan_info.vlan_proto = vlan_info->vlan_proto;
+	vport->port_base_vlan_cfg.vlan_info = *vlan_info;
 
 	return 0;
 }
 
 static u16 hclge_get_port_base_vlan_state(struct hclge_vport *vport,
 					  enum hnae3_port_base_vlan_state state,
-					  u16 vlan)
+					  u16 vlan, u8 qos)
 {
 	if (state == HNAE3_PORT_BASE_VLAN_DISABLE) {
-		if (!vlan)
+		if (!vlan && !qos)
 			return HNAE3_PORT_BASE_VLAN_NOCHANGE;
-		else
-			return HNAE3_PORT_BASE_VLAN_ENABLE;
-	} else {
-		if (!vlan)
-			return HNAE3_PORT_BASE_VLAN_DISABLE;
-		else if (vport->port_base_vlan_cfg.vlan_info.vlan_tag == vlan)
-			return HNAE3_PORT_BASE_VLAN_NOCHANGE;
-		else
-			return HNAE3_PORT_BASE_VLAN_MODIFY;
+
+		return HNAE3_PORT_BASE_VLAN_ENABLE;
 	}
+
+	if (!vlan && !qos)
+		return HNAE3_PORT_BASE_VLAN_DISABLE;
+
+	if (vport->port_base_vlan_cfg.vlan_info.vlan_tag == vlan &&
+	    vport->port_base_vlan_cfg.vlan_info.qos == qos)
+		return HNAE3_PORT_BASE_VLAN_NOCHANGE;
+
+	return HNAE3_PORT_BASE_VLAN_MODIFY;
 }
 
 static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
@@ -10200,7 +10235,7 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
 
 	state = hclge_get_port_base_vlan_state(vport,
 					       vport->port_base_vlan_cfg.state,
-					       vlan);
+					       vlan, qos);
 	if (state == HNAE3_PORT_BASE_VLAN_NOCHANGE)
 		return 0;
 

From f2dbf0ed4e0c1789cc7c74ab3798bd0cdb7a2bf1 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:39 +0800
Subject: [PATCH 0744/3292] net: hns3: refine for
 hclge_push_vf_port_base_vlan_info()

Use struct "hclge_vlan_info" instead of separately parameters
for function hclge_push_vf_port_base_vlan_info(), to make it
more concise.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c    |  3 +--
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  4 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 10 +++++-----
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index af5b278a0643..7c6b51f8d2b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10258,8 +10258,7 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
 	    test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
 		hclge_push_vf_port_base_vlan_info(&hdev->vport[0],
 						  vport->vport_id, state,
-						  vlan, qos,
-						  ntohs(proto));
+						  &vlan_info);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9e4d02d73bf3..e3dc2167ebd9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1084,8 +1084,8 @@ void hclge_restore_vport_vlan_table(struct hclge_vport *vport);
 int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
 				    struct hclge_vlan_info *vlan_info);
 int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
-				      u16 state, u16 vlan_tag, u16 qos,
-				      u16 vlan_proto);
+				      u16 state,
+				      struct hclge_vlan_info *vlan_info);
 void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time);
 int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev,
 				struct hclge_desc *desc);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 851408bcbc4f..16b42cee778e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -318,17 +318,17 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
 }
 
 int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
-				      u16 state, u16 vlan_tag, u16 qos,
-				      u16 vlan_proto)
+				      u16 state,
+				      struct hclge_vlan_info *vlan_info)
 {
 #define MSG_DATA_SIZE	8
 
 	u8 msg_data[MSG_DATA_SIZE];
 
 	memcpy(&msg_data[0], &state, sizeof(u16));
-	memcpy(&msg_data[2], &vlan_proto, sizeof(u16));
-	memcpy(&msg_data[4], &qos, sizeof(u16));
-	memcpy(&msg_data[6], &vlan_tag, sizeof(u16));
+	memcpy(&msg_data[2], &vlan_info->vlan_proto, sizeof(u16));
+	memcpy(&msg_data[4], &vlan_info->qos, sizeof(u16));
+	memcpy(&msg_data[6], &vlan_info->vlan_tag, sizeof(u16));
 
 	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
 				  HCLGE_MBX_PUSH_VLAN_INFO, vfid);

From 132023de7149e7dde4b457328cb233dc58561b54 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:40 +0800
Subject: [PATCH 0745/3292] net: hns3: remove unnecessary updating port based
 VLAN

For the PF have called hclge_update_port_base_vlan_cfg() already
before notify VF, it's unnecessary to update port based VLAN again
when received mailbox request from VF.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 16b42cee778e..3f7d1f2cbe2d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -360,15 +360,6 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
 		bool en = msg_cmd->is_kill ? true : false;
 
 		status = hclge_en_hw_strip_rxvtag(handle, en);
-	} else if (msg_cmd->subcode == HCLGE_MBX_PORT_BASE_VLAN_CFG) {
-		struct hclge_vlan_info *vlan_info;
-		u16 *state;
-
-		state = (u16 *)&mbx_req->msg.data[HCLGE_MBX_VLAN_STATE_OFFSET];
-		vlan_info = (struct hclge_vlan_info *)
-			&mbx_req->msg.data[HCLGE_MBX_VLAN_INFO_OFFSET];
-		status = hclge_update_port_base_vlan_cfg(vport, *state,
-							 vlan_info);
 	} else if (msg_cmd->subcode == HCLGE_MBX_GET_PORT_BASE_VLAN_STATE) {
 		struct hnae3_ae_dev *ae_dev = pci_get_drvdata(vport->nic.pdev);
 		/* vf does not need to know about the port based VLAN state

From 060e9accaa743d701e653213651cf3feee1ae921 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:41 +0800
Subject: [PATCH 0746/3292] net: hns3: refine function hclge_set_vf_vlan_cfg()

The struct hclge_vf_vlan_cfg is firstly designed for setting
VLAN filter tag. And it's reused for enable RX VLAN offload
later. It's strange to use member "is_kill" to indicate "enable".
So redefine the struct hclge_vf_vlan_cfg to adapt it.

For there are already 3 subcodes being used in function
hclge_set_vf_vlan_cfg(), use "switch-case" style for each
branch, rather than "if-else". Also simplify the assignment for
each branch to make it more clearly.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../hisilicon/hns3/hns3pf/hclge_main.h        | 11 ++++--
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 35 ++++++++-----------
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index e3dc2167ebd9..cd1e40152a67 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -759,9 +759,14 @@ struct hclge_mac_tnl_stats {
 struct hclge_vf_vlan_cfg {
 	u8 mbx_cmd;
 	u8 subcode;
-	u8 is_kill;
-	u16 vlan;
-	u16 proto;
+	union {
+		struct {
+			u8 is_kill;
+			u16 vlan;
+			u16 proto;
+		};
+		u8 enable;
+	};
 };
 
 #pragma pack()
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 3f7d1f2cbe2d..54eee94df47a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -341,40 +341,33 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
 #define HCLGE_MBX_VLAN_STATE_OFFSET	0
 #define HCLGE_MBX_VLAN_INFO_OFFSET	2
 
+	struct hnae3_handle *handle = &vport->nic;
+	struct hclge_dev *hdev = vport->back;
 	struct hclge_vf_vlan_cfg *msg_cmd;
-	int status = 0;
 
 	msg_cmd = (struct hclge_vf_vlan_cfg *)&mbx_req->msg;
-	if (msg_cmd->subcode == HCLGE_MBX_VLAN_FILTER) {
-		struct hnae3_handle *handle = &vport->nic;
-		u16 vlan, proto;
-		bool is_kill;
-
-		is_kill = !!msg_cmd->is_kill;
-		vlan =  msg_cmd->vlan;
-		proto =  msg_cmd->proto;
-		status = hclge_set_vlan_filter(handle, cpu_to_be16(proto),
-					       vlan, is_kill);
-	} else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) {
-		struct hnae3_handle *handle = &vport->nic;
-		bool en = msg_cmd->is_kill ? true : false;
-
-		status = hclge_en_hw_strip_rxvtag(handle, en);
-	} else if (msg_cmd->subcode == HCLGE_MBX_GET_PORT_BASE_VLAN_STATE) {
-		struct hnae3_ae_dev *ae_dev = pci_get_drvdata(vport->nic.pdev);
+	switch (msg_cmd->subcode) {
+	case HCLGE_MBX_VLAN_FILTER:
+		return hclge_set_vlan_filter(handle,
+					     cpu_to_be16(msg_cmd->proto),
+					     msg_cmd->vlan, msg_cmd->is_kill);
+	case HCLGE_MBX_VLAN_RX_OFF_CFG:
+		return hclge_en_hw_strip_rxvtag(handle, msg_cmd->enable);
+	case HCLGE_MBX_GET_PORT_BASE_VLAN_STATE:
 		/* vf does not need to know about the port based VLAN state
 		 * on device HNAE3_DEVICE_VERSION_V3. So always return disable
 		 * on device HNAE3_DEVICE_VERSION_V3 if vf queries the port
 		 * based VLAN state.
 		 */
 		resp_msg->data[0] =
-			ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3 ?
+			hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3 ?
 			HNAE3_PORT_BASE_VLAN_DISABLE :
 			vport->port_base_vlan_cfg.state;
 		resp_msg->len = sizeof(u8);
+		return 0;
+	default:
+		return 0;
 	}
-
-	return status;
 }
 
 static int hclge_set_vf_alive(struct hclge_vport *vport,

From 2ba306627f5950c9a7850f3b0499d39e522dc249 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:42 +0800
Subject: [PATCH 0747/3292] net: hns3: add support for modify VLAN filter state

Previously, with hardware limitation, the port VLAN filter are
effective for both PF and its VFs simultaneously, so a single
function is not able to enable/disable separately, and the VLAN
filter state is default enabled. Now some device supports each
function to bypass port VLAN filter, then each function can
switch VLAN filter separately. Add capability flag to check
whether the device supports modify VLAN filter state. If flag
on, user will be able to modify the VLAN filter state by ethtool
-K.

Furtherly, the default VLAN filter state is also changed
according to whether non-zero VLAN used. Then the device can
receive packet with any VLAN tag if only VLAN 0 used.

The function hclge_need_enable_vport_vlan_filter() is used to
help implement above changes. And the VLAN filter handle for
promisc mode can also be simplified by this function.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   5 +-
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   8 +-
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   |  39 ++--
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |   1 -
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    |   8 +-
 .../hisilicon/hns3/hns3pf/hclge_cmd.c         |   4 +
 .../hisilicon/hns3/hns3pf/hclge_cmd.h         |  12 ++
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 183 ++++++++++++++++--
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   8 +
 9 files changed, 214 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 57fa7fc97c69..c79fef937ade 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -92,6 +92,8 @@ enum HNAE3_DEV_CAP_BITS {
 	HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
 	HNAE3_DEV_SUPPORT_PAUSE_B,
 	HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
+	HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B,
+	HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B,
 };
 
 #define hnae3_dev_fd_supported(hdev) \
@@ -631,7 +633,7 @@ struct hnae3_ae_ops {
 	void (*get_mdix_mode)(struct hnae3_handle *handle,
 			      u8 *tp_mdix_ctrl, u8 *tp_mdix);
 
-	void (*enable_vlan_filter)(struct hnae3_handle *handle, bool enable);
+	int (*enable_vlan_filter)(struct hnae3_handle *handle, bool enable);
 	int (*set_vlan_filter)(struct hnae3_handle *handle, __be16 proto,
 			       u16 vlan_id, bool is_kill);
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
@@ -783,7 +785,6 @@ struct hnae3_roce_private_info {
 #define HNAE3_BPE		BIT(2)	/* broadcast promisc enable */
 #define HNAE3_OVERFLOW_UPE	BIT(3)	/* unicast mac vlan overflow */
 #define HNAE3_OVERFLOW_MPE	BIT(4)	/* multicast mac vlan overflow */
-#define HNAE3_VLAN_FLTR		BIT(5)	/* enable vlan filter */
 #define HNAE3_UPE		(HNAE3_USER_UPE | HNAE3_OVERFLOW_UPE)
 #define HNAE3_MPE		(HNAE3_USER_MPE | HNAE3_OVERFLOW_MPE)
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 57ba5a16ad73..3feba43586e0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -345,7 +345,13 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
 	}, {
 		.name = "support rxd advanced layout",
 		.cap_bit = HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
-	},
+	}, {
+		.name = "support port vlan bypass",
+		.cap_bit = HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B,
+	}, {
+		.name = "support modify vlan filter state",
+		.cap_bit = HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B,
+	}
 };
 
 static void hns3_dbg_fill_content(char *content, u16 len,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 43dcf3f0dbe2..393979bec170 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -908,13 +908,10 @@ static u8 hns3_get_netdev_flags(struct net_device *netdev)
 {
 	u8 flags = 0;
 
-	if (netdev->flags & IFF_PROMISC) {
+	if (netdev->flags & IFF_PROMISC)
 		flags = HNAE3_USER_UPE | HNAE3_USER_MPE | HNAE3_BPE;
-	} else {
-		flags |= HNAE3_VLAN_FLTR;
-		if (netdev->flags & IFF_ALLMULTI)
-			flags |= HNAE3_USER_MPE;
-	}
+	else if (netdev->flags & IFF_ALLMULTI)
+		flags = HNAE3_USER_MPE;
 
 	return flags;
 }
@@ -944,25 +941,6 @@ void hns3_request_update_promisc_mode(struct hnae3_handle *handle)
 		ops->request_update_promisc_mode(handle);
 }
 
-void hns3_enable_vlan_filter(struct net_device *netdev, bool enable)
-{
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
-	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
-	bool last_state;
-
-	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 &&
-	    h->ae_algo->ops->enable_vlan_filter) {
-		last_state = h->netdev_flags & HNAE3_VLAN_FLTR ? true : false;
-		if (enable != last_state) {
-			netdev_info(netdev,
-				    "%s vlan filter\n",
-				    enable ? "enable" : "disable");
-			h->ae_algo->ops->enable_vlan_filter(h, enable);
-		}
-	}
-}
-
 static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs,
 			u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes)
 {
@@ -1980,6 +1958,14 @@ static int hns3_nic_set_features(struct net_device *netdev,
 		return -EINVAL;
 	}
 
+	if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    h->ae_algo->ops->enable_vlan_filter) {
+		enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER);
+		ret = h->ae_algo->ops->enable_vlan_filter(h, enable);
+		if (ret)
+			return ret;
+	}
+
 	netdev->features = features;
 	return 0;
 }
@@ -2825,6 +2811,9 @@ static void hns3_set_default_feature(struct net_device *netdev)
 		netdev->hw_features |= NETIF_F_HW_TC;
 		netdev->features |= NETIF_F_HW_TC;
 	}
+
+	if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 }
 
 static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index b038441907f9..5698a14a804e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -643,7 +643,6 @@ void hns3_set_vector_coalesce_rx_ql(struct hns3_enet_tqp_vector *tqp_vector,
 void hns3_set_vector_coalesce_tx_ql(struct hns3_enet_tqp_vector *tqp_vector,
 				    u32 ql_value);
 
-void hns3_enable_vlan_filter(struct net_device *netdev, bool enable);
 void hns3_request_update_promisc_mode(struct hnae3_handle *handle);
 
 #ifdef CONFIG_HNS3_DCB
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index c1ea403d2b56..bb7c2ec7ed6f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -88,7 +88,6 @@ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
-	bool vlan_filter_enable;
 	int ret;
 
 	if (!h->ae_algo->ops->set_loopback ||
@@ -110,14 +109,11 @@ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 	if (ret || ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 		return ret;
 
-	if (en) {
+	if (en)
 		h->ae_algo->ops->set_promisc_mode(h, true, true);
-	} else {
+	else
 		/* recover promisc mode before loopback test */
 		hns3_request_update_promisc_mode(h);
-		vlan_filter_enable = ndev->flags & IFF_PROMISC ? false : true;
-		hns3_enable_vlan_filter(ndev, vlan_filter_enable);
-	}
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 6aed30cc22f2..8f6ed8577aea 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -388,6 +388,10 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_RXD_ADV_LAYOUT_B))
 		set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_PORT_VLAN_BYPASS_B)) {
+		set_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps);
+		set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
+	}
 }
 
 static __le32 hclge_build_api_caps(void)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 12558aa0fe0a..da78a6477e46 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -236,6 +236,7 @@ enum hclge_opcode_type {
 	HCLGE_OPC_VLAN_FILTER_CTRL	    = 0x1100,
 	HCLGE_OPC_VLAN_FILTER_PF_CFG	= 0x1101,
 	HCLGE_OPC_VLAN_FILTER_VF_CFG	= 0x1102,
+	HCLGE_OPC_PORT_VLAN_BYPASS	= 0x1103,
 
 	/* Flow Director commands */
 	HCLGE_OPC_FD_MODE_CTRL		= 0x1200,
@@ -392,6 +393,7 @@ enum HCLGE_CAP_BITS {
 	HCLGE_CAP_FEC_B = 13,
 	HCLGE_CAP_PAUSE_B = 14,
 	HCLGE_CAP_RXD_ADV_LAYOUT_B = 15,
+	HCLGE_CAP_PORT_VLAN_BYPASS_B = 17,
 };
 
 enum HCLGE_API_CAP_BITS {
@@ -527,6 +529,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_SPEED_ABILITY_M	GENMASK(7, 0)
 #define HCLGE_CFG_SPEED_ABILITY_EXT_S	10
 #define HCLGE_CFG_SPEED_ABILITY_EXT_M	GENMASK(15, 10)
+#define HCLGE_CFG_VLAN_FLTR_CAP_S	8
+#define HCLGE_CFG_VLAN_FLTR_CAP_M	GENMASK(9, 8)
 #define HCLGE_CFG_UMV_TBL_SPACE_S	16
 #define HCLGE_CFG_UMV_TBL_SPACE_M	GENMASK(31, 16)
 #define HCLGE_CFG_PF_RSS_SIZE_S		0
@@ -811,6 +815,14 @@ struct hclge_vlan_filter_vf_cfg_cmd {
 	u8  vf_bitmap[HCLGE_MAX_VF_BYTES];
 };
 
+#define HCLGE_INGRESS_BYPASS_B		0
+struct hclge_port_vlan_filter_bypass_cmd {
+	u8 bypass_state;
+	u8 rsv1[3];
+	u8 vf_id;
+	u8 rsv2[19];
+};
+
 #define HCLGE_SWITCH_ANTI_SPOOF_B	0U
 #define HCLGE_SWITCH_ALW_LPBK_B		1U
 #define HCLGE_SWITCH_ALW_LCL_LPBK_B	2U
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 7c6b51f8d2b0..c6444d258328 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1334,6 +1334,10 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 					    HCLGE_CFG_SPEED_ABILITY_EXT_S);
 	cfg->speed_ability |= speed_ability_ext << SPEED_ABILITY_EXT_SHIFT;
 
+	cfg->vlan_fliter_cap = hnae3_get_field(__le32_to_cpu(req->param[1]),
+					       HCLGE_CFG_VLAN_FLTR_CAP_M,
+					       HCLGE_CFG_VLAN_FLTR_CAP_S);
+
 	cfg->umv_space = hnae3_get_field(__le32_to_cpu(req->param[1]),
 					 HCLGE_CFG_UMV_TBL_SPACE_M,
 					 HCLGE_CFG_UMV_TBL_SPACE_S);
@@ -1513,6 +1517,7 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev)
 
 static int hclge_configure(struct hclge_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	struct hclge_cfg cfg;
 	unsigned int i;
 	int ret;
@@ -1534,6 +1539,8 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->tc_max = cfg.tc_num;
 	hdev->tm_info.hw_pfc_map = 0;
 	hdev->wanted_umv_size = cfg.umv_space;
+	if (cfg.vlan_fliter_cap == HCLGE_VLAN_FLTR_CAN_MDF)
+		set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
 
 	if (hnae3_dev_fd_supported(hdev)) {
 		hdev->fd_en = true;
@@ -1843,6 +1850,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
 		vport->mps = HCLGE_MAC_DEFAULT_FRAME;
 		vport->port_base_vlan_cfg.state = HNAE3_PORT_BASE_VLAN_DISABLE;
 		vport->rxvlan_cfg.rx_vlan_offload_en = true;
+		vport->req_vlan_fltr_en = true;
 		INIT_LIST_HEAD(&vport->vlan_list);
 		INIT_LIST_HEAD(&vport->uc_mac_list);
 		INIT_LIST_HEAD(&vport->mc_mac_list);
@@ -9381,6 +9389,28 @@ static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
 	return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd);
 }
 
+static int hclge_set_port_vlan_filter_bypass(struct hclge_dev *hdev, u8 vf_id,
+					     bool bypass_en)
+{
+	struct hclge_port_vlan_filter_bypass_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PORT_VLAN_BYPASS, false);
+	req = (struct hclge_port_vlan_filter_bypass_cmd *)desc.data;
+	req->vf_id = vf_id;
+	hnae3_set_bit(req->bypass_state, HCLGE_INGRESS_BYPASS_B,
+		      bypass_en ? 1 : 0);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to set vport%u port vlan filter bypass state, ret = %d.\n",
+			vf_id, ret);
+
+	return ret;
+}
+
 static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
 				      u8 fe_type, bool filter_en, u8 vf_id)
 {
@@ -9426,25 +9456,100 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
 #define HCLGE_FILTER_FE_INGRESS		(HCLGE_FILTER_FE_NIC_INGRESS_B \
 					| HCLGE_FILTER_FE_ROCE_INGRESS_B)
 
-static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
+static int hclge_set_vport_vlan_filter(struct hclge_vport *vport, bool enable)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+	int ret;
+
+	if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
+		return hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+						  HCLGE_FILTER_FE_EGRESS_V1_B,
+						  enable, vport->vport_id);
+
+	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
+					 HCLGE_FILTER_FE_EGRESS, enable,
+					 vport->vport_id);
+	if (ret)
+		return ret;
+
+	if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps))
+		ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id,
+							!enable);
+	else if (!vport->vport_id)
+		ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT,
+						 HCLGE_FILTER_FE_INGRESS,
+						 enable, 0);
+
+	return ret;
+}
+
+static bool hclge_need_enable_vport_vlan_filter(struct hclge_vport *vport)
+{
+	struct hnae3_handle *handle = &vport->nic;
+	struct hclge_vport_vlan_cfg *vlan, *tmp;
 	struct hclge_dev *hdev = vport->back;
 
-	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
-		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
-					   HCLGE_FILTER_FE_EGRESS, enable, 0);
-		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT,
-					   HCLGE_FILTER_FE_INGRESS, enable, 0);
-	} else {
-		hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_VF,
-					   HCLGE_FILTER_FE_EGRESS_V1_B, enable,
-					   0);
+	if (vport->vport_id) {
+		if (vport->port_base_vlan_cfg.state !=
+			HNAE3_PORT_BASE_VLAN_DISABLE)
+			return true;
+
+		if (vport->vf_info.trusted && vport->vf_info.request_uc_en)
+			return false;
+	} else if (handle->netdev_flags & HNAE3_USER_UPE) {
+		return false;
 	}
-	if (enable)
-		handle->netdev_flags |= HNAE3_VLAN_FLTR;
-	else
-		handle->netdev_flags &= ~HNAE3_VLAN_FLTR;
+
+	if (!vport->req_vlan_fltr_en)
+		return false;
+
+	/* compatible with former device, always enable vlan filter */
+	if (!test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, hdev->ae_dev->caps))
+		return true;
+
+	list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node)
+		if (vlan->vlan_id != 0)
+			return true;
+
+	return false;
+}
+
+static int hclge_enable_vport_vlan_filter(struct hclge_vport *vport,
+					  bool request_en)
+{
+	struct hclge_dev *hdev = vport->back;
+	bool need_en;
+	int ret;
+
+	mutex_lock(&hdev->vport_lock);
+
+	vport->req_vlan_fltr_en = request_en;
+
+	need_en = hclge_need_enable_vport_vlan_filter(vport);
+	if (need_en == vport->cur_vlan_fltr_en) {
+		mutex_unlock(&hdev->vport_lock);
+		return 0;
+	}
+
+	ret = hclge_set_vport_vlan_filter(vport, need_en);
+	if (ret) {
+		mutex_unlock(&hdev->vport_lock);
+		return ret;
+	}
+
+	vport->cur_vlan_fltr_en = need_en;
+
+	mutex_unlock(&hdev->vport_lock);
+
+	return 0;
+}
+
+static int hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_enable_vport_vlan_filter(vport, enable);
 }
 
 static int hclge_set_vf_vlan_filter_cmd(struct hclge_dev *hdev, u16 vfid,
@@ -9838,6 +9943,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 							 vport->vport_id);
 			if (ret)
 				return ret;
+			vport->cur_vlan_fltr_en = true;
 		}
 
 		ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT,
@@ -9853,8 +9959,6 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 			return ret;
 	}
 
-	handle->netdev_flags |= HNAE3_VLAN_FLTR;
-
 	hdev->vlan_type_cfg.rx_in_fst_vlan_type = HCLGE_DEF_VLAN_TYPE;
 	hdev->vlan_type_cfg.rx_in_sec_vlan_type = HCLGE_DEF_VLAN_TYPE;
 	hdev->vlan_type_cfg.rx_ot_fst_vlan_type = HCLGE_DEF_VLAN_TYPE;
@@ -10077,6 +10181,14 @@ int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 	return hclge_set_vlan_rx_offload_cfg(vport);
 }
 
+static void hclge_set_vport_vlan_fltr_change(struct hclge_vport *vport)
+{
+	struct hclge_dev *hdev = vport->back;
+
+	if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, hdev->ae_dev->caps))
+		set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, &vport->state);
+}
+
 static int hclge_update_vlan_filter_entries(struct hclge_vport *vport,
 					    u16 port_base_vlan_state,
 					    struct hclge_vlan_info *new_info,
@@ -10185,6 +10297,7 @@ out:
 		nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE;
 
 	vport->port_base_vlan_cfg.vlan_info = *vlan_info;
+	hclge_set_vport_vlan_fltr_change(vport);
 
 	return 0;
 }
@@ -10328,9 +10441,37 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
 		 */
 		set_bit(vlan_id, vport->vlan_del_fail_bmap);
 	}
+
+	hclge_set_vport_vlan_fltr_change(vport);
+
 	return ret;
 }
 
+static void hclge_sync_vlan_fltr_state(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport;
+	int ret;
+	u16 i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		vport = &hdev->vport[i];
+		if (!test_and_clear_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
+					&vport->state))
+			continue;
+
+		ret = hclge_enable_vport_vlan_filter(vport,
+						     vport->req_vlan_fltr_en);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to sync vlan filter state for vport%u, ret = %d\n",
+				vport->vport_id, ret);
+			set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
+				&vport->state);
+			return;
+		}
+	}
+}
+
 static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
 {
 #define HCLGE_MAX_SYNC_COUNT	60
@@ -10353,6 +10494,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
 
 			clear_bit(vlan_id, vport->vlan_del_fail_bmap);
 			hclge_rm_vport_vlan_table(vport, vlan_id, false);
+			hclge_set_vport_vlan_fltr_change(vport);
 
 			sync_cnt++;
 			if (sync_cnt >= HCLGE_MAX_SYNC_COUNT)
@@ -10362,6 +10504,8 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
 						 VLAN_N_VID);
 		}
 	}
+
+	hclge_sync_vlan_fltr_state(hdev);
 }
 
 static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps)
@@ -12452,8 +12596,8 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
 		if (!ret) {
 			clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE,
 				  &vport->state);
-			hclge_enable_vlan_filter(handle,
-						 tmp_flags & HNAE3_VLAN_FLTR);
+			set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
+				&vport->state);
 		}
 	}
 
@@ -12481,6 +12625,7 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
 				&vport->state);
 			return;
 		}
+		hclge_set_vport_vlan_fltr_change(vport);
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index cd1e40152a67..eb0365231e4e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -321,6 +321,10 @@ enum hclge_fc_mode {
 	HCLGE_FC_DEFAULT
 };
 
+enum hclge_vlan_fltr_cap {
+	HCLGE_VLAN_FLTR_DEF,
+	HCLGE_VLAN_FLTR_CAN_MDF,
+};
 enum hclge_link_fail_code {
 	HCLGE_LF_NORMAL,
 	HCLGE_LF_REF_CLOCK_LOST,
@@ -351,6 +355,7 @@ struct hclge_tc_info {
 
 struct hclge_cfg {
 	u8 tc_num;
+	u8 vlan_fliter_cap;
 	u16 tqp_desc_num;
 	u16 rx_buf_len;
 	u16 vf_rss_size_max;
@@ -957,6 +962,7 @@ enum HCLGE_VPORT_STATE {
 	HCLGE_VPORT_STATE_ALIVE,
 	HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
 	HCLGE_VPORT_STATE_PROMISC_CHANGE,
+	HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE,
 	HCLGE_VPORT_STATE_MAX
 };
 
@@ -998,6 +1004,8 @@ struct hclge_vport {
 	u32 bw_limit;		/* VSI BW Limit (0 = disabled) */
 	u8  dwrr;
 
+	bool req_vlan_fltr_en;
+	bool cur_vlan_fltr_en;
 	unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)];
 	struct hclge_port_base_vlan_config port_base_vlan_cfg;
 	struct hclge_tx_vtag_cfg  txvlan_cfg;

From 32e6d104c6fe01713a039a98842e4d2f6bb505ec Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:43 +0800
Subject: [PATCH 0748/3292] net: hns3: add query basic info support for VF

There are some features of VF depend on PF, so it's necessary
for VF to know whether PF supports. For compatibility, modify
the mailbox HCLGE_MBX_GET_TCINFO, extend its function, use to
get the basic information of PF, including mailbox api version
and PF capabilities.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/hisilicon/hns3/hclge_mbx.h   |  9 ++++-
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  3 ++
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 19 +++++++----
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 33 +++++++++++--------
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |  1 +
 5 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index a2c17af57fde..d75286202e7c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -20,7 +20,7 @@ enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_API_NEGOTIATE,	/* (VF -> PF) negotiate API version */
 	HCLGE_MBX_GET_QINFO,		/* (VF -> PF) get queue config */
 	HCLGE_MBX_GET_QDEPTH,		/* (VF -> PF) get queue depth */
-	HCLGE_MBX_GET_TCINFO,		/* (VF -> PF) get TC config */
+	HCLGE_MBX_GET_BASIC_INFO,	/* (VF -> PF) get basic info */
 	HCLGE_MBX_GET_RETA,		/* (VF -> PF) get RETA */
 	HCLGE_MBX_GET_RSS_KEY,		/* (VF -> PF) get RSS key */
 	HCLGE_MBX_GET_MAC_ADDR,		/* (VF -> PF) get MAC addr */
@@ -85,6 +85,13 @@ struct hclge_ring_chain_param {
 	u8 int_gl_index;
 };
 
+struct hclge_basic_info {
+	u8 hw_tc_map;
+	u8 rsv;
+	u16 mbx_api_version;
+	u32 pf_caps;
+};
+
 struct hclgevf_mbx_resp_status {
 	struct mutex mbx_mutex; /* protects against contending sync cmd resp */
 	u32 origin_mbx_msg;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index c79fef937ade..0ce353da2b35 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -147,6 +147,9 @@ enum HNAE3_DEV_CAP_BITS {
 #define hnae3_ae_dev_rxd_adv_layout_supported(ae_dev) \
 	test_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, (ae_dev)->caps)
 
+enum HNAE3_PF_CAP_BITS {
+	HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0,
+};
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
 #define ring_ptr_move_bw(ring, p) \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 54eee94df47a..59951949ec2d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -384,16 +384,23 @@ static int hclge_set_vf_alive(struct hclge_vport *vport,
 	return ret;
 }
 
-static void hclge_get_vf_tcinfo(struct hclge_vport *vport,
-				struct hclge_respond_to_vf_msg *resp_msg)
+static void hclge_get_basic_info(struct hclge_vport *vport,
+				 struct hclge_respond_to_vf_msg *resp_msg)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hnae3_ae_dev *ae_dev = vport->back->ae_dev;
+	struct hclge_basic_info *basic_info;
 	unsigned int i;
 
+	basic_info = (struct hclge_basic_info *)resp_msg->data;
 	for (i = 0; i < kinfo->tc_info.num_tc; i++)
-		resp_msg->data[0] |= BIT(i);
+		basic_info->hw_tc_map |= BIT(i);
 
-	resp_msg->len = sizeof(u8);
+	if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
+		hnae3_set_bit(basic_info->pf_caps,
+			      HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B, 1);
+
+	resp_msg->len = HCLGE_MBX_MAX_RESP_DATA_SIZE;
 }
 
 static void hclge_get_vf_queue_info(struct hclge_vport *vport,
@@ -752,8 +759,8 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 		case HCLGE_MBX_GET_QDEPTH:
 			hclge_get_vf_queue_depth(vport, &resp_msg);
 			break;
-		case HCLGE_MBX_GET_TCINFO:
-			hclge_get_vf_tcinfo(vport, &resp_msg);
+		case HCLGE_MBX_GET_BASIC_INFO:
+			hclge_get_basic_info(vport, &resp_msg);
 			break;
 		case HCLGE_MBX_GET_LINK_STATUS:
 			ret = hclge_push_vf_link_status(vport);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 7bef6b24e610..7c101455f1e4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -243,23 +243,31 @@ static void hclgevf_build_send_msg(struct hclge_vf_to_pf_msg *msg, u8 code,
 	}
 }
 
-static int hclgevf_get_tc_info(struct hclgevf_dev *hdev)
+static int hclgevf_get_basic_info(struct hclgevf_dev *hdev)
 {
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+	u8 resp_msg[HCLGE_MBX_MAX_RESP_DATA_SIZE];
+	struct hclge_basic_info *basic_info;
 	struct hclge_vf_to_pf_msg send_msg;
-	u8 resp_msg;
+	unsigned long caps;
 	int status;
 
-	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_TCINFO, 0);
-	status = hclgevf_send_mbx_msg(hdev, &send_msg, true, &resp_msg,
+	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_GET_BASIC_INFO, 0);
+	status = hclgevf_send_mbx_msg(hdev, &send_msg, true, resp_msg,
 				      sizeof(resp_msg));
 	if (status) {
 		dev_err(&hdev->pdev->dev,
-			"VF request to get TC info from PF failed %d",
-			status);
+			"failed to get basic info from pf, ret = %d", status);
 		return status;
 	}
 
-	hdev->hw_tc_map = resp_msg;
+	basic_info = (struct hclge_basic_info *)resp_msg;
+
+	hdev->hw_tc_map = basic_info->hw_tc_map;
+	hdev->mbx_api_version = basic_info->mbx_api_version;
+	caps = basic_info->pf_caps;
+	if (test_bit(HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B, &caps))
+		set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
 
 	return 0;
 }
@@ -2466,6 +2474,10 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
 {
 	int ret;
 
+	ret = hclgevf_get_basic_info(hdev);
+	if (ret)
+		return ret;
+
 	/* get current port based vlan state from PF */
 	ret = hclgevf_get_port_base_vlan_filter_state(hdev);
 	if (ret)
@@ -2481,12 +2493,7 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
 	if (ret)
 		return ret;
 
-	ret = hclgevf_get_pf_media_type(hdev);
-	if (ret)
-		return ret;
-
-	/* get tc configuration from PF */
-	return hclgevf_get_tc_info(hdev);
+	return hclgevf_get_pf_media_type(hdev);
 }
 
 static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index b146d04526de..d7d02848d674 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -285,6 +285,7 @@ struct hclgevf_dev {
 	struct semaphore reset_sem;	/* protect reset process */
 
 	u32 fw_version;
+	u16 mbx_api_version;
 	u16 num_tqps;		/* num task queue pairs of this VF */
 
 	u16 alloc_rss_size;	/* allocated RSS task queue */

From fa6a262a255003ebb1f514fffd3399f3946d4fc9 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:44 +0800
Subject: [PATCH 0749/3292] net: hns3: add support for VF modify VLAN filter
 state

Previously, there is hardware limitation for VF to modify
the VLAN filter state, and the VLAN filter state is default
enabled. Now the limitation has been removed in some device,
so add capability flag to check whether the device supports
modify VLAN filter state. If flag on, user will be able to
modify the VLAN filter state by ethtool -K.
VF needs to send mailbox to request the PF to modify the VLAN
filter state for it.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h |  1 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c |  3 +--
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h |  1 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c  |  2 ++
 .../hisilicon/hns3/hns3vf/hclgevf_main.c        | 17 +++++++++++++++++
 5 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index d75286202e7c..0a6cda309b24 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -69,6 +69,7 @@ enum hclge_mbx_vlan_cfg_subcode {
 	HCLGE_MBX_VLAN_RX_OFF_CFG,	/* set rx side vlan offload */
 	HCLGE_MBX_PORT_BASE_VLAN_CFG,	/* set port based vlan configuration */
 	HCLGE_MBX_GET_PORT_BASE_VLAN_STATE,	/* get port based vlan state */
+	HCLGE_MBX_ENABLE_VLAN_FILTER,
 };
 
 enum hclge_mbx_tbl_cfg_subcode {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c6444d258328..35aa4ac4b09b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9515,8 +9515,7 @@ static bool hclge_need_enable_vport_vlan_filter(struct hclge_vport *vport)
 	return false;
 }
 
-static int hclge_enable_vport_vlan_filter(struct hclge_vport *vport,
-					  bool request_en)
+int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en)
 {
 	struct hclge_dev *hdev = vport->back;
 	bool need_en;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index eb0365231e4e..bb778433f63d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1107,4 +1107,5 @@ void hclge_report_hw_error(struct hclge_dev *hdev,
 void hclge_inform_vf_promisc_info(struct hclge_vport *vport);
 int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len);
 int hclge_push_vf_link_status(struct hclge_vport *vport);
+int hclge_enable_vport_vlan_filter(struct hclge_vport *vport, bool request_en);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 59951949ec2d..e10a2c36b706 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -365,6 +365,8 @@ static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
 			vport->port_base_vlan_cfg.state;
 		resp_msg->len = sizeof(u8);
 		return 0;
+	case HCLGE_MBX_ENABLE_VLAN_FILTER:
+		return hclge_enable_vport_vlan_filter(vport, msg_cmd->enable);
 	default:
 		return 0;
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 7c101455f1e4..f84b3a135c06 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1650,6 +1650,22 @@ static void hclgevf_uninit_mac_list(struct hclgevf_dev *hdev)
 	spin_unlock_bh(&hdev->mac_table.mac_list_lock);
 }
 
+static int hclgevf_enable_vlan_filter(struct hnae3_handle *handle, bool enable)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+	struct hclge_vf_to_pf_msg send_msg;
+
+	if (!test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
+		return -EOPNOTSUPP;
+
+	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN,
+			       HCLGE_MBX_ENABLE_VLAN_FILTER);
+	send_msg.data[0] = enable ? 1 : 0;
+
+	return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0);
+}
+
 static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
 				   __be16 proto, u16 vlan_id,
 				   bool is_kill)
@@ -3808,6 +3824,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_tc_size = hclgevf_get_tc_size,
 	.get_fw_version = hclgevf_get_fw_version,
 	.set_vlan_filter = hclgevf_set_vlan_filter,
+	.enable_vlan_filter = hclgevf_enable_vlan_filter,
 	.enable_hw_strip_rxvtag = hclgevf_en_hw_strip_rxvtag,
 	.reset_event = hclgevf_reset_event,
 	.set_default_reset_request = hclgevf_set_def_reset_request,

From 0ca821da86a5ec24eb2ece24fe87e5bf518c5939 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 31 May 2021 10:38:45 +0800
Subject: [PATCH 0750/3292] net: hns3: add debugfs support for vlan
 configuration

Add debugfs support for vlan configuraion. create a single file
"vlan_config" for it, and query it by command "cat vlan_config",
return the result to userspace.

The new display style is below:
$ cat vlan_config
I_PORT_VLAN_FILTER: on
E_PORT_VLAN_FILTER: off
FUNC_ID  I_VF_VLAN_FILTER  E_VF_VLAN_FILTER  PORT_VLAN_FILTER_BYPASS
pf       off               on                off
vf0      off               on                off
FUNC_ID  PVID    ACCEPT_TAG1  ACCEPT_TAG2  ACCEPT_UNTAG1  ACCEPT_UNTAG2
pf       0       on           on           on             on
vf0      0       on           on           on             on

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |   7 +
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 283 ++++++++++++++++++
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |  19 ++
 .../hisilicon/hns3/hns3pf/hclge_main.c        |  12 -
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  12 +
 6 files changed, 322 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 0ce353da2b35..89b2b7fa7b8b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -288,6 +288,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_REG_TQP,
 	HNAE3_DBG_CMD_REG_MAC,
 	HNAE3_DBG_CMD_REG_DCB,
+	HNAE3_DBG_CMD_VLAN_CONFIG,
 	HNAE3_DBG_CMD_QUEUE_MAP,
 	HNAE3_DBG_CMD_RX_QUEUE_INFO,
 	HNAE3_DBG_CMD_TX_QUEUE_INFO,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 3feba43586e0..cf1efd2f4a0f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -309,6 +309,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "vlan_config",
+		.cmd = HNAE3_DBG_CMD_VLAN_CONFIG,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 0b7c6838d905..0d433a5ff807 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1894,6 +1894,285 @@ static void hclge_dbg_dump_mac_list(struct hclge_dev *hdev, char *buf, int len,
 	}
 }
 
+static int hclge_get_vlan_rx_offload_cfg(struct hclge_dev *hdev, u8 vf_id,
+					 struct hclge_dbg_vlan_cfg *vlan_cfg)
+{
+	struct hclge_vport_vtag_rx_cfg_cmd *req;
+	struct hclge_desc desc;
+	u16 bmap_index;
+	u8 rx_cfg;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_RX_CFG, true);
+
+	req = (struct hclge_vport_vtag_rx_cfg_cmd *)desc.data;
+	req->vf_offset = vf_id / HCLGE_VF_NUM_PER_CMD;
+	bmap_index = vf_id % HCLGE_VF_NUM_PER_CMD / HCLGE_VF_NUM_PER_BYTE;
+	req->vf_bitmap[bmap_index] = 1U << (vf_id % HCLGE_VF_NUM_PER_BYTE);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get vport%u rxvlan cfg, ret = %d\n",
+			vf_id, ret);
+		return ret;
+	}
+
+	rx_cfg = req->vport_vlan_cfg;
+	vlan_cfg->strip_tag1 = hnae3_get_bit(rx_cfg, HCLGE_REM_TAG1_EN_B);
+	vlan_cfg->strip_tag2 = hnae3_get_bit(rx_cfg, HCLGE_REM_TAG2_EN_B);
+	vlan_cfg->drop_tag1 = hnae3_get_bit(rx_cfg, HCLGE_DISCARD_TAG1_EN_B);
+	vlan_cfg->drop_tag2 = hnae3_get_bit(rx_cfg, HCLGE_DISCARD_TAG2_EN_B);
+	vlan_cfg->pri_only1 = hnae3_get_bit(rx_cfg, HCLGE_SHOW_TAG1_EN_B);
+	vlan_cfg->pri_only2 = hnae3_get_bit(rx_cfg, HCLGE_SHOW_TAG2_EN_B);
+
+	return 0;
+}
+
+static int hclge_get_vlan_tx_offload_cfg(struct hclge_dev *hdev, u8 vf_id,
+					 struct hclge_dbg_vlan_cfg *vlan_cfg)
+{
+	struct hclge_vport_vtag_tx_cfg_cmd *req;
+	struct hclge_desc desc;
+	u16 bmap_index;
+	u8 tx_cfg;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_PORT_TX_CFG, true);
+	req = (struct hclge_vport_vtag_tx_cfg_cmd *)desc.data;
+	req->vf_offset = vf_id / HCLGE_VF_NUM_PER_CMD;
+	bmap_index = vf_id % HCLGE_VF_NUM_PER_CMD / HCLGE_VF_NUM_PER_BYTE;
+	req->vf_bitmap[bmap_index] = 1U << (vf_id % HCLGE_VF_NUM_PER_BYTE);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get vport%u txvlan cfg, ret = %d\n",
+			vf_id, ret);
+		return ret;
+	}
+
+	tx_cfg = req->vport_vlan_cfg;
+	vlan_cfg->pvid = le16_to_cpu(req->def_vlan_tag1);
+
+	vlan_cfg->accept_tag1 = hnae3_get_bit(tx_cfg, HCLGE_ACCEPT_TAG1_B);
+	vlan_cfg->accept_tag2 = hnae3_get_bit(tx_cfg, HCLGE_ACCEPT_TAG2_B);
+	vlan_cfg->accept_untag1 = hnae3_get_bit(tx_cfg, HCLGE_ACCEPT_UNTAG1_B);
+	vlan_cfg->accept_untag2 = hnae3_get_bit(tx_cfg, HCLGE_ACCEPT_UNTAG2_B);
+	vlan_cfg->insert_tag1 = hnae3_get_bit(tx_cfg, HCLGE_PORT_INS_TAG1_EN_B);
+	vlan_cfg->insert_tag2 = hnae3_get_bit(tx_cfg, HCLGE_PORT_INS_TAG2_EN_B);
+	vlan_cfg->shift_tag = hnae3_get_bit(tx_cfg, HCLGE_TAG_SHIFT_MODE_EN_B);
+
+	return 0;
+}
+
+static int hclge_get_vlan_filter_config_cmd(struct hclge_dev *hdev,
+					    u8 vlan_type, u8 vf_id,
+					    struct hclge_desc *desc)
+{
+	struct hclge_vlan_filter_ctrl_cmd *req;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(desc, HCLGE_OPC_VLAN_FILTER_CTRL, true);
+	req = (struct hclge_vlan_filter_ctrl_cmd *)desc->data;
+	req->vlan_type = vlan_type;
+	req->vf_id = vf_id;
+
+	ret = hclge_cmd_send(&hdev->hw, desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to get vport%u vlan filter config, ret = %d.\n",
+			vf_id, ret);
+
+	return ret;
+}
+
+static int hclge_get_vlan_filter_state(struct hclge_dev *hdev, u8 vlan_type,
+				       u8 vf_id, u8 *vlan_fe)
+{
+	struct hclge_vlan_filter_ctrl_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	ret = hclge_get_vlan_filter_config_cmd(hdev, vlan_type, vf_id, &desc);
+	if (ret)
+		return ret;
+
+	req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data;
+	*vlan_fe = req->vlan_fe;
+
+	return 0;
+}
+
+static int hclge_get_port_vlan_filter_bypass_state(struct hclge_dev *hdev,
+						   u8 vf_id, u8 *bypass_en)
+{
+	struct hclge_port_vlan_filter_bypass_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	if (!test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, hdev->ae_dev->caps))
+		return 0;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PORT_VLAN_BYPASS, true);
+	req = (struct hclge_port_vlan_filter_bypass_cmd *)desc.data;
+	req->vf_id = vf_id;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get vport%u port vlan filter bypass state, ret = %d.\n",
+			vf_id, ret);
+		return ret;
+	}
+
+	*bypass_en = hnae3_get_bit(req->bypass_state, HCLGE_INGRESS_BYPASS_B);
+
+	return 0;
+}
+
+static const struct hclge_dbg_item vlan_filter_items[] = {
+	{ "FUNC_ID", 2 },
+	{ "I_VF_VLAN_FILTER", 2 },
+	{ "E_VF_VLAN_FILTER", 2 },
+	{ "PORT_VLAN_FILTER_BYPASS", 0 }
+};
+
+static const struct hclge_dbg_item vlan_offload_items[] = {
+	{ "FUNC_ID", 2 },
+	{ "PVID", 4 },
+	{ "ACCEPT_TAG1", 2 },
+	{ "ACCEPT_TAG2", 2 },
+	{ "ACCEPT_UNTAG1", 2 },
+	{ "ACCEPT_UNTAG2", 2 },
+	{ "INSERT_TAG1", 2 },
+	{ "INSERT_TAG2", 2 },
+	{ "SHIFT_TAG", 2 },
+	{ "STRIP_TAG1", 2 },
+	{ "STRIP_TAG2", 2 },
+	{ "DROP_TAG1", 2 },
+	{ "DROP_TAG2", 2 },
+	{ "PRI_ONLY_TAG1", 2 },
+	{ "PRI_ONLY_TAG2", 0 }
+};
+
+static int hclge_dbg_dump_vlan_filter_config(struct hclge_dev *hdev, char *buf,
+					     int len, int *pos)
+{
+	char content[HCLGE_DBG_VLAN_FLTR_INFO_LEN], str_id[HCLGE_DBG_ID_LEN];
+	const char *result[ARRAY_SIZE(vlan_filter_items)];
+	u8 i, j, vlan_fe, bypass, ingress, egress;
+	u8 func_num = pci_num_vf(hdev->pdev) + 1; /* pf and enabled vf num */
+	int ret;
+
+	ret = hclge_get_vlan_filter_state(hdev, HCLGE_FILTER_TYPE_PORT, 0,
+					  &vlan_fe);
+	if (ret)
+		return ret;
+	ingress = vlan_fe & HCLGE_FILTER_FE_NIC_INGRESS_B;
+	egress = vlan_fe & HCLGE_FILTER_FE_NIC_EGRESS_B ? 1 : 0;
+
+	*pos += scnprintf(buf, len, "I_PORT_VLAN_FILTER: %s\n",
+			  state_str[ingress]);
+	*pos += scnprintf(buf + *pos, len - *pos, "E_PORT_VLAN_FILTER: %s\n",
+			  state_str[egress]);
+
+	hclge_dbg_fill_content(content, sizeof(content), vlan_filter_items,
+			       NULL, ARRAY_SIZE(vlan_filter_items));
+	*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+
+	for (i = 0; i < func_num; i++) {
+		ret = hclge_get_vlan_filter_state(hdev, HCLGE_FILTER_TYPE_VF, i,
+						  &vlan_fe);
+		if (ret)
+			return ret;
+
+		ingress = vlan_fe & HCLGE_FILTER_FE_NIC_INGRESS_B;
+		egress = vlan_fe & HCLGE_FILTER_FE_NIC_EGRESS_B ? 1 : 0;
+		ret = hclge_get_port_vlan_filter_bypass_state(hdev, i, &bypass);
+		if (ret)
+			return ret;
+		j = 0;
+		result[j++] = hclge_dbg_get_func_id_str(str_id, i);
+		result[j++] = state_str[ingress];
+		result[j++] = state_str[egress];
+		result[j++] =
+			test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B,
+				 hdev->ae_dev->caps) ? state_str[bypass] : "NA";
+		hclge_dbg_fill_content(content, sizeof(content),
+				       vlan_filter_items, result,
+				       ARRAY_SIZE(vlan_filter_items));
+		*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+	}
+	*pos += scnprintf(buf + *pos, len - *pos, "\n");
+
+	return 0;
+}
+
+static int hclge_dbg_dump_vlan_offload_config(struct hclge_dev *hdev, char *buf,
+					      int len, int *pos)
+{
+	char str_id[HCLGE_DBG_ID_LEN], str_pvid[HCLGE_DBG_ID_LEN];
+	const char *result[ARRAY_SIZE(vlan_offload_items)];
+	char content[HCLGE_DBG_VLAN_OFFLOAD_INFO_LEN];
+	u8 func_num = pci_num_vf(hdev->pdev) + 1; /* pf and enabled vf num */
+	struct hclge_dbg_vlan_cfg vlan_cfg;
+	int ret;
+	u8 i, j;
+
+	hclge_dbg_fill_content(content, sizeof(content), vlan_offload_items,
+			       NULL, ARRAY_SIZE(vlan_offload_items));
+	*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+
+	for (i = 0; i < func_num; i++) {
+		ret = hclge_get_vlan_tx_offload_cfg(hdev, i, &vlan_cfg);
+		if (ret)
+			return ret;
+
+		ret = hclge_get_vlan_rx_offload_cfg(hdev, i, &vlan_cfg);
+		if (ret)
+			return ret;
+
+		sprintf(str_pvid, "%u", vlan_cfg.pvid);
+		j = 0;
+		result[j++] = hclge_dbg_get_func_id_str(str_id, i);
+		result[j++] = str_pvid;
+		result[j++] = state_str[vlan_cfg.accept_tag1];
+		result[j++] = state_str[vlan_cfg.accept_tag2];
+		result[j++] = state_str[vlan_cfg.accept_untag1];
+		result[j++] = state_str[vlan_cfg.accept_untag2];
+		result[j++] = state_str[vlan_cfg.insert_tag1];
+		result[j++] = state_str[vlan_cfg.insert_tag2];
+		result[j++] = state_str[vlan_cfg.shift_tag];
+		result[j++] = state_str[vlan_cfg.strip_tag1];
+		result[j++] = state_str[vlan_cfg.strip_tag2];
+		result[j++] = state_str[vlan_cfg.drop_tag1];
+		result[j++] = state_str[vlan_cfg.drop_tag2];
+		result[j++] = state_str[vlan_cfg.pri_only1];
+		result[j++] = state_str[vlan_cfg.pri_only2];
+
+		hclge_dbg_fill_content(content, sizeof(content),
+				       vlan_offload_items, result,
+				       ARRAY_SIZE(vlan_offload_items));
+		*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+	}
+
+	return 0;
+}
+
+static int hclge_dbg_dump_vlan_config(struct hclge_dev *hdev, char *buf,
+				      int len)
+{
+	int pos = 0;
+	int ret;
+
+	ret = hclge_dbg_dump_vlan_filter_config(hdev, buf, len, &pos);
+	if (ret)
+		return ret;
+
+	return hclge_dbg_dump_vlan_offload_config(hdev, buf, len, &pos);
+}
+
 static int hclge_dbg_dump_mac_uc(struct hclge_dev *hdev, char *buf, int len)
 {
 	hclge_dbg_dump_mac_list(hdev, buf, len, true);
@@ -2037,6 +2316,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_SERV_INFO,
 		.dbg_dump = hclge_dbg_dump_serv_info,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_VLAN_CONFIG,
+		.dbg_dump = hclge_dbg_dump_vlan_config,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 642752e65a7c..c526591a7240 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -735,6 +735,8 @@ static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
 };
 
 #define HCLGE_DBG_INFO_LEN			256
+#define HCLGE_DBG_VLAN_FLTR_INFO_LEN		256
+#define HCLGE_DBG_VLAN_OFFLOAD_INFO_LEN		512
 #define HCLGE_DBG_ID_LEN			16
 #define HCLGE_DBG_ITEM_NAME_LEN			32
 #define HCLGE_DBG_DATA_STR_LEN			32
@@ -747,4 +749,21 @@ struct hclge_dbg_item {
 	u16 interval; /* blank numbers after the item */
 };
 
+struct hclge_dbg_vlan_cfg {
+	u16 pvid;
+	u8 accept_tag1;
+	u8 accept_tag2;
+	u8 accept_untag1;
+	u8 accept_untag2;
+	u8 insert_tag1;
+	u8 insert_tag2;
+	u8 shift_tag;
+	u8 strip_tag1;
+	u8 strip_tag2;
+	u8 drop_tag1;
+	u8 drop_tag2;
+	u8 pri_only1;
+	u8 pri_only2;
+};
+
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 35aa4ac4b09b..6ecc106af334 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9444,18 +9444,6 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
 	return ret;
 }
 
-#define HCLGE_FILTER_TYPE_VF		0
-#define HCLGE_FILTER_TYPE_PORT		1
-#define HCLGE_FILTER_FE_EGRESS_V1_B	BIT(0)
-#define HCLGE_FILTER_FE_NIC_INGRESS_B	BIT(0)
-#define HCLGE_FILTER_FE_NIC_EGRESS_B	BIT(1)
-#define HCLGE_FILTER_FE_ROCE_INGRESS_B	BIT(2)
-#define HCLGE_FILTER_FE_ROCE_EGRESS_B	BIT(3)
-#define HCLGE_FILTER_FE_EGRESS		(HCLGE_FILTER_FE_NIC_EGRESS_B \
-					| HCLGE_FILTER_FE_ROCE_EGRESS_B)
-#define HCLGE_FILTER_FE_INGRESS		(HCLGE_FILTER_FE_NIC_INGRESS_B \
-					| HCLGE_FILTER_FE_ROCE_INGRESS_B)
-
 static int hclge_set_vport_vlan_filter(struct hclge_vport *vport, bool enable)
 {
 	struct hclge_dev *hdev = vport->back;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index bb778433f63d..7595f841aaac 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -321,6 +321,18 @@ enum hclge_fc_mode {
 	HCLGE_FC_DEFAULT
 };
 
+#define HCLGE_FILTER_TYPE_VF		0
+#define HCLGE_FILTER_TYPE_PORT		1
+#define HCLGE_FILTER_FE_EGRESS_V1_B	BIT(0)
+#define HCLGE_FILTER_FE_NIC_INGRESS_B	BIT(0)
+#define HCLGE_FILTER_FE_NIC_EGRESS_B	BIT(1)
+#define HCLGE_FILTER_FE_ROCE_INGRESS_B	BIT(2)
+#define HCLGE_FILTER_FE_ROCE_EGRESS_B	BIT(3)
+#define HCLGE_FILTER_FE_EGRESS		(HCLGE_FILTER_FE_NIC_EGRESS_B \
+					| HCLGE_FILTER_FE_ROCE_EGRESS_B)
+#define HCLGE_FILTER_FE_INGRESS		(HCLGE_FILTER_FE_NIC_INGRESS_B \
+					| HCLGE_FILTER_FE_ROCE_INGRESS_B)
+
 enum hclge_vlan_fltr_cap {
 	HCLGE_VLAN_FLTR_DEF,
 	HCLGE_VLAN_FLTR_CAN_MDF,

From 6e1e89418a5ccdfb325aed538307c2f9dba6ef51 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Sat, 29 May 2021 15:52:02 +0800
Subject: [PATCH 0751/3292] xfrm: Remove the repeated declaration

Function 'xfrm_parse_spi' is declared twice, so remove the
repeated declaration.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6232a5f048bd..b30623678430 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1571,7 +1571,6 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type);
 int xfrm4_transport_finish(struct sk_buff *skb, int async);
 int xfrm4_rcv(struct sk_buff *skb);
-int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq);
 
 static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {

From eebd49a4ffb420a991c606e54aa3c9f02857a334 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 29 May 2021 16:23:18 -0400
Subject: [PATCH 0752/3292] xfrm: remove the fragment check for ipv6 beet mode

In commit 68dc022d04eb ("xfrm: BEET mode doesn't support fragments
for inner packets"), it tried to fix the issue that in TX side the
packet is fragmented before the ESP encapping while in the RX side
the fragments always get reassembled before decapping with ESP.

This is not true for IPv6. IPv6 is different, and it's using exthdr
to save fragment info, as well as the ESP info. Exthdrs are added
in TX and processed in RX both in order. So in the above case, the
ESP decapping will be done earlier than the fragment reassembling
in TX side.

Here just remove the fragment check for the IPv6 inner packets to
recover the fragments support for BEET mode.

Fixes: 68dc022d04eb ("xfrm: BEET mode doesn't support fragments for inner packets")
Reported-by: Xiumei Mu <xmu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_output.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4cb0ff4dcf4..ac907b9d32d1 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -711,15 +711,8 @@ out:
 static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-	unsigned int ptr = 0;
 	int err;
 
-	if (x->outer_mode.encap == XFRM_MODE_BEET &&
-	    ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL) >= 0) {
-		net_warn_ratelimited("BEET mode doesn't support inner IPv6 fragments\n");
-		return -EAFNOSUPPORT;
-	}
-
 	err = xfrm6_tunnel_check_size(skb);
 	if (err)
 		return err;

From dd8b477f9a3d8edb136207acb3652e1a34a661b7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Tue, 1 Jun 2021 11:33:59 +0200
Subject: [PATCH 0753/3292] mount: Support "nosymfollow" in new mount api

Commit dab741e0e02b ("Add a "nosymfollow" mount option.") added support
for the "nosymfollow" mount option allowing to block following symlinks
when resolving paths. The mount option so far was only available in the
old mount api. Make it available in the new mount api as well. Bonus is
that it can be applied to a whole subtree not just a single mount.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Mattias Nissler <mnissler@chromium.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ross Zwisler <zwisler@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 fs/namespace.c             | 9 ++++++---
 include/uapi/linux/mount.h | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index c3f1a78ba369..ab4174a3c802 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3464,9 +3464,10 @@ out_type:
 	return ret;
 }
 
-#define FSMOUNT_VALID_FLAGS \
-	(MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
-	 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME)
+#define FSMOUNT_VALID_FLAGS                                                    \
+	(MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV |            \
+	 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME |       \
+	 MOUNT_ATTR_NOSYMFOLLOW)
 
 #define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
 
@@ -3487,6 +3488,8 @@ static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
 		mnt_flags |= MNT_NOEXEC;
 	if (attr_flags & MOUNT_ATTR_NODIRATIME)
 		mnt_flags |= MNT_NODIRATIME;
+	if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
+		mnt_flags |= MNT_NOSYMFOLLOW;
 
 	return mnt_flags;
 }
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index e6524ead2b7b..dd7a166fdf9c 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -120,6 +120,7 @@ enum fsconfig_command {
 #define MOUNT_ATTR_STRICTATIME	0x00000020 /* - Always perform atime updates */
 #define MOUNT_ATTR_NODIRATIME	0x00000080 /* Do not update directory access times */
 #define MOUNT_ATTR_IDMAP	0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+#define MOUNT_ATTR_NOSYMFOLLOW	0x00200000 /* Do not follow symlinks */
 
 /*
  * mount_setattr()

From 05924717ac704a868053652b20036aa3a2273e26 Mon Sep 17 00:00:00 2001
From: Harishankar Vishwanathan <harishankar.vishwanathan@rutgers.edu>
Date: Sun, 30 May 2021 22:01:57 -0400
Subject: [PATCH 0754/3292] bpf, tnums: Provably sound, faster, and more
 precise algorithm for tnum_mul

This patch introduces a new algorithm for multiplication of tristate
numbers (tnums) that is provably sound. It is faster and more precise when
compared to the existing method.

Like the existing method, this new algorithm follows the long
multiplication algorithm. The idea is to generate partial products by
multiplying each bit in the multiplier (tnum a) with the multiplicand
(tnum b), and adding the partial products after appropriately bit-shifting
them. The new algorithm, however, uses just a single loop over the bits of
the multiplier (tnum a) and accumulates only the uncertain components of
the multiplicand (tnum b) into a mask-only tnum. The following paper
explains the algorithm in more detail: https://arxiv.org/abs/2105.05398.

A natural way to construct the tnum product is by performing a tnum
addition on all the partial products. This algorithm presents another
method of doing this: decompose each partial product into two tnums,
consisting of the values and the masks separately. The mask-sum is
accumulated within the loop in acc_m. The value-sum tnum is generated
using a.value * b.value. The tnum constructed by tnum addition of the
value-sum and the mask-sum contains all possible summations of concrete
values drawn from the partial product tnums pairwise. We prove this result
in the paper.

Our evaluations show that the new algorithm is overall more precise
(producing tnums with less uncertain components) than the existing method.
As an illustrative example, consider the input tnums A and B. The numbers
in the parenthesis correspond to (value;mask).

  A                = 000000x1 (1;2)
  B                = 0010011x (38;1)
  A * B (existing) = xxxxxxxx (0;255)
  A * B (new)      = 0x1xxxxx (32;95)

Importantly, we present a proof of soundness of the new algorithm in the
aforementioned paper. Additionally, we show that this new algorithm is
empirically faster than the existing method.

Co-developed-by: Matan Shachnai <m.shachnai@rutgers.edu>
Co-developed-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Co-developed-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Matan Shachnai <m.shachnai@rutgers.edu>
Signed-off-by: Srinivas Narayana <srinivas.narayana@rutgers.edu>
Signed-off-by: Santosh Nagarakatte <santosh.nagarakatte@rutgers.edu>
Signed-off-by: Harishankar Vishwanathan <harishankar.vishwanathan@rutgers.edu>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Edward Cree <ecree.xilinx@gmail.com>
Link: https://arxiv.org/abs/2105.05398
Link: https://lore.kernel.org/bpf/20210531020157.7386-1-harishankar.vishwanathan@rutgers.edu
---
 kernel/bpf/tnum.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index ceac5281bd31..3d7127f439a1 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -111,28 +111,31 @@ struct tnum tnum_xor(struct tnum a, struct tnum b)
 	return TNUM(v & ~mu, mu);
 }
 
-/* half-multiply add: acc += (unknown * mask * value).
- * An intermediate step in the multiply algorithm.
+/* Generate partial products by multiplying each bit in the multiplier (tnum a)
+ * with the multiplicand (tnum b), and add the partial products after
+ * appropriately bit-shifting them. Instead of directly performing tnum addition
+ * on the generated partial products, equivalenty, decompose each partial
+ * product into two tnums, consisting of the value-sum (acc_v) and the
+ * mask-sum (acc_m) and then perform tnum addition on them. The following paper
+ * explains the algorithm in more detail: https://arxiv.org/abs/2105.05398.
  */
-static struct tnum hma(struct tnum acc, u64 value, u64 mask)
-{
-	while (mask) {
-		if (mask & 1)
-			acc = tnum_add(acc, TNUM(0, value));
-		mask >>= 1;
-		value <<= 1;
-	}
-	return acc;
-}
-
 struct tnum tnum_mul(struct tnum a, struct tnum b)
 {
-	struct tnum acc;
-	u64 pi;
+	u64 acc_v = a.value * b.value;
+	struct tnum acc_m = TNUM(0, 0);
 
-	pi = a.value * b.value;
-	acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
-	return hma(acc, b.mask, a.value);
+	while (a.value || a.mask) {
+		/* LSB of tnum a is a certain 1 */
+		if (a.value & 1)
+			acc_m = tnum_add(acc_m, TNUM(0, b.mask));
+		/* LSB of tnum a is uncertain */
+		else if (a.mask & 1)
+			acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask));
+		/* Note: no case for LSB is certain 0 */
+		a = tnum_rshift(a, 1);
+		b = tnum_lshift(b, 1);
+	}
+	return tnum_add(TNUM(acc_v, 0), acc_m);
 }
 
 /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has

From 5990b5d770cbfe2b4254d870240e9863aca421e3 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Tue, 1 Jun 2021 11:55:04 +0200
Subject: [PATCH 0755/3292] tests: test MOUNT_ATTR_NOSYMFOLLOW with
 mount_setattr()

Add tests to verify that MOUNT_ATTR_NOSYMFOLLOW is honored.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Mattias Nissler <mnissler@chromium.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ross Zwisler <zwisler@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 .../mount_setattr/mount_setattr_test.c        | 88 ++++++++++++++++++-
 1 file changed, 85 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 4e94e566e040..f31205f04ee0 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -136,6 +136,10 @@ struct mount_attr {
 #define MOUNT_ATTR_IDMAP 0x00100000
 #endif
 
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
+#endif
+
 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
 				    struct mount_attr *attr, size_t size)
 {
@@ -235,6 +239,10 @@ static int prepare_unpriv_mountns(void)
 	return 0;
 }
 
+#ifndef ST_NOSYMFOLLOW
+#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
+#endif
+
 static int read_mnt_flags(const char *path)
 {
 	int ret;
@@ -245,9 +253,9 @@ static int read_mnt_flags(const char *path)
 	if (ret != 0)
 		return -EINVAL;
 
-	if (stat.f_flag &
-	    ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
-	      ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
+	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
+			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
+			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
 		return -EINVAL;
 
 	mnt_flags = 0;
@@ -269,6 +277,8 @@ static int read_mnt_flags(const char *path)
 		mnt_flags |= MS_SYNCHRONOUS;
 	if (stat.f_flag & ST_MANDLOCK)
 		mnt_flags |= ST_MANDLOCK;
+	if (stat.f_flag & ST_NOSYMFOLLOW)
+		mnt_flags |= ST_NOSYMFOLLOW;
 
 	return mnt_flags;
 }
@@ -368,8 +378,13 @@ static bool mount_setattr_supported(void)
 FIXTURE(mount_setattr) {
 };
 
+#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
+#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
+
 FIXTURE_SETUP(mount_setattr)
 {
+	int fd = -EBADF;
+
 	if (!mount_setattr_supported())
 		SKIP(return, "mount_setattr syscall not supported");
 
@@ -412,6 +427,11 @@ FIXTURE_SETUP(mount_setattr)
 
 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
+	ASSERT_GT(fd, 0);
+	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
+	ASSERT_EQ(close(fd), 0);
 }
 
 FIXTURE_TEARDOWN(mount_setattr)
@@ -1421,4 +1441,66 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
 }
 
+TEST_F(mount_setattr, mount_attr_nosymfollow)
+{
+	int fd;
+	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+	struct mount_attr attr = {
+		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
+	};
+
+	if (!mount_setattr_supported())
+		SKIP(return, "mount_setattr syscall not supported");
+
+	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+	ASSERT_GT(fd, 0);
+	ASSERT_EQ(close(fd), 0);
+
+	old_flags = read_mnt_flags("/mnt/A");
+	ASSERT_GT(old_flags, 0);
+
+	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+	expected_flags = old_flags;
+	expected_flags |= ST_NOSYMFOLLOW;
+
+	new_flags = read_mnt_flags("/mnt/A");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	new_flags = read_mnt_flags("/mnt/A/AA");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	new_flags = read_mnt_flags("/mnt/A/AA/B");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+	ASSERT_LT(fd, 0);
+	ASSERT_EQ(errno, ELOOP);
+
+	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
+	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
+
+	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+	expected_flags &= ~ST_NOSYMFOLLOW;
+	new_flags = read_mnt_flags("/mnt/A");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	new_flags = read_mnt_flags("/mnt/A/AA");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	new_flags = read_mnt_flags("/mnt/A/AA/B");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+	ASSERT_EQ(new_flags, expected_flags);
+
+	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+	ASSERT_GT(fd, 0);
+	ASSERT_EQ(close(fd), 0);
+}
+
 TEST_HARNESS_MAIN

From 7c3f828b522b07adb341b08fde1660685c5ba3eb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:51 +0200
Subject: [PATCH 0756/3292] block: refactor device number setup in
 __device_add_disk

Untangle the mess around blk_alloc_devt by moving the check for
the used allocation scheme into the callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h             |  4 +-
 block/genhd.c           | 96 ++++++++++++++++-------------------------
 block/partitions/core.c | 15 +++++--
 3 files changed, 49 insertions(+), 66 deletions(-)

diff --git a/block/blk.h b/block/blk.h
index 8b3591aee0a5..cba3a94aabfa 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -343,8 +343,8 @@ static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
 static inline void blk_queue_clear_zone_settings(struct request_queue *q) {}
 #endif
 
-int blk_alloc_devt(struct block_device *part, dev_t *devt);
-void blk_free_devt(dev_t devt);
+int blk_alloc_ext_minor(void);
+void blk_free_ext_minor(unsigned int minor);
 char *disk_name(struct gendisk *hd, int partno, char *buf);
 #define ADDPART_FLAG_NONE	0
 #define ADDPART_FLAG_RAID	1
diff --git a/block/genhd.c b/block/genhd.c
index 9f8cb7beaad1..3daab80201df 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -333,52 +333,22 @@ static int blk_mangle_minor(int minor)
 	return minor;
 }
 
-/**
- * blk_alloc_devt - allocate a dev_t for a block device
- * @bdev: block device to allocate dev_t for
- * @devt: out parameter for resulting dev_t
- *
- * Allocate a dev_t for block device.
- *
- * RETURNS:
- * 0 on success, allocated dev_t is returned in *@devt.  -errno on
- * failure.
- *
- * CONTEXT:
- * Might sleep.
- */
-int blk_alloc_devt(struct block_device *bdev, dev_t *devt)
+int blk_alloc_ext_minor(void)
 {
-	struct gendisk *disk = bdev->bd_disk;
 	int idx;
 
-	/* in consecutive minor range? */
-	if (bdev->bd_partno < disk->minors) {
-		*devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
-		return 0;
-	}
-
 	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
-	if (idx < 0)
-		return idx == -ENOSPC ? -EBUSY : idx;
-
-	*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
-	return 0;
+	if (idx < 0) {
+		if (idx == -ENOSPC)
+			return -EBUSY;
+		return idx;
+	}
+	return blk_mangle_minor(idx);
 }
 
-/**
- * blk_free_devt - free a dev_t
- * @devt: dev_t to free
- *
- * Free @devt which was allocated using blk_alloc_devt().
- *
- * CONTEXT:
- * Might sleep.
- */
-void blk_free_devt(dev_t devt)
+void blk_free_ext_minor(unsigned int minor)
 {
-	if (MAJOR(devt) == BLOCK_EXT_MAJOR)
-		ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt)));
+	ida_free(&ext_devt_ida, blk_mangle_minor(minor));
 }
 
 static char *bdevt_str(dev_t devt, char *buf)
@@ -499,8 +469,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 			      const struct attribute_group **groups,
 			      bool register_queue)
 {
-	dev_t devt;
-	int retval;
+	int ret;
 
 	/*
 	 * The disk queue should now be all set with enough information about
@@ -511,24 +480,30 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	if (register_queue)
 		elevator_init_mq(disk->queue);
 
-	/* minors == 0 indicates to use ext devt from part0 and should
-	 * be accompanied with EXT_DEVT flag.  Make sure all
-	 * parameters make sense.
+	/*
+	 * If the driver provides an explicit major number it also must provide
+	 * the number of minors numbers supported, and those will be used to
+	 * setup the gendisk.
+	 * Otherwise just allocate the device numbers for both the whole device
+	 * and all partitions from the extended dev_t space.
 	 */
-	WARN_ON(disk->minors && !(disk->major || disk->first_minor));
-	WARN_ON(!disk->minors &&
-		!(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
+	if (disk->major) {
+		WARN_ON(!disk->minors);
+	} else {
+		WARN_ON(disk->minors);
+		WARN_ON(!(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
+
+		ret = blk_alloc_ext_minor();
+		if (ret < 0) {
+			WARN_ON(1);
+			return;
+		}
+		disk->major = BLOCK_EXT_MAJOR;
+		disk->first_minor = MINOR(ret);
+	}
 
 	disk->flags |= GENHD_FL_UP;
 
-	retval = blk_alloc_devt(disk->part0, &devt);
-	if (retval) {
-		WARN_ON(1);
-		return;
-	}
-	disk->major = MAJOR(devt);
-	disk->first_minor = MINOR(devt);
-
 	disk_alloc_events(disk);
 
 	if (disk->flags & GENHD_FL_HIDDEN) {
@@ -541,14 +516,14 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	} else {
 		struct backing_dev_info *bdi = disk->queue->backing_dev_info;
 		struct device *dev = disk_to_dev(disk);
-		int ret;
 
 		/* Register BDI before referencing it from bdev */
-		dev->devt = devt;
-		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
+		dev->devt = MKDEV(disk->major, disk->first_minor);
+		ret = bdi_register(bdi, "%u:%u",
+				   disk->major, disk->first_minor);
 		WARN_ON(ret);
 		bdi_set_owner(bdi, dev);
-		bdev_add(disk->part0, devt);
+		bdev_add(disk->part0, dev->devt);
 	}
 	register_disk(parent, disk, groups);
 	if (register_queue)
@@ -1120,7 +1095,8 @@ static void disk_release(struct device *dev)
 
 	might_sleep();
 
-	blk_free_devt(dev->devt);
+	if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
+		blk_free_ext_minor(MINOR(dev->devt));
 	disk_release_events(disk);
 	kfree(disk->random);
 	xa_destroy(&disk->part_tbl);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index dc60ecf46fe6..504297bdc8bf 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -260,7 +260,8 @@ static const struct attribute_group *part_attr_groups[] = {
 
 static void part_release(struct device *dev)
 {
-	blk_free_devt(dev->devt);
+	if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
+		blk_free_ext_minor(MINOR(dev->devt));
 	bdput(dev_to_bdev(dev));
 }
 
@@ -379,9 +380,15 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
 	pdev->type = &part_type;
 	pdev->parent = ddev;
 
-	err = blk_alloc_devt(bdev, &devt);
-	if (err)
-		goto out_put;
+	/* in consecutive minor range? */
+	if (bdev->bd_partno < disk->minors) {
+		devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
+	} else {
+		err = blk_alloc_ext_minor();
+		if (err < 0)
+			goto out_put;
+		devt = MKDEV(BLOCK_EXT_MAJOR, err);
+	}
 	pdev->devt = devt;
 
 	/* delay uevent until 'holders' subdir is created */

From 2e3c73fa0c419f62fd588731be30fb0d1bca9ad6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:52 +0200
Subject: [PATCH 0757/3292] block: move the DISK_MAX_PARTS sanity check into
 __device_add_disk

Keep this together with the first place that actually looks at
->minors and prepare for not passing a minors argument to
alloc_disk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 3daab80201df..8c1816d2929e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -489,6 +489,12 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	 */
 	if (disk->major) {
 		WARN_ON(!disk->minors);
+
+		if (disk->minors > DISK_MAX_PARTS) {
+			pr_err("block: can't allocate more than %d partitions\n",
+				DISK_MAX_PARTS);
+			disk->minors = DISK_MAX_PARTS;
+		}
 	} else {
 		WARN_ON(disk->minors);
 		WARN_ON(!(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
@@ -1255,13 +1261,6 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 {
 	struct gendisk *disk;
 
-	if (minors > DISK_MAX_PARTS) {
-		printk(KERN_ERR
-			"block: can't allocate more than %d partitions\n",
-			DISK_MAX_PARTS);
-		minors = DISK_MAX_PARTS;
-	}
-
 	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
 	if (!disk)
 		return NULL;

From 0d1feb72ffd8578f6f167ca15b2096c276c1f6df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:53 +0200
Subject: [PATCH 0758/3292] block: automatically enable GENHD_FL_EXT_DEVT

Automatically set the GENHD_FL_EXT_DEVT flag for all disks allocated
without an explicit number of minors.  This is what all new block
drivers should do, so make sure it is the default without boilerplate
code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c                    | 2 +-
 block/partitions/core.c          | 4 ----
 drivers/block/n64cart.c          | 2 +-
 drivers/lightnvm/core.c          | 1 -
 drivers/memstick/core/ms_block.c | 1 -
 drivers/nvdimm/blk.c             | 1 -
 drivers/nvdimm/btt.c             | 1 -
 drivers/nvdimm/pmem.c            | 1 -
 drivers/nvme/host/core.c         | 1 -
 drivers/nvme/host/multipath.c    | 1 -
 10 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 8c1816d2929e..9fa734cb9cbd 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -497,7 +497,6 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 		}
 	} else {
 		WARN_ON(disk->minors);
-		WARN_ON(!(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN)));
 
 		ret = blk_alloc_ext_minor();
 		if (ret < 0) {
@@ -506,6 +505,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 		}
 		disk->major = BLOCK_EXT_MAJOR;
 		disk->first_minor = MINOR(ret);
+		disk->flags |= GENHD_FL_EXT_DEVT;
 	}
 
 	disk->flags |= GENHD_FL_UP;
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 504297bdc8bf..ada3e1e66989 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -326,10 +326,6 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
 	const char *dname;
 	int err;
 
-	/*
-	 * disk_max_parts() won't be zero, either GENHD_FL_EXT_DEVT is set
-	 * or 'minors' is passed to alloc_disk().
-	 */
 	if (partno >= disk_max_parts(disk))
 		return ERR_PTR(-EINVAL);
 
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index 47bdf324e962..3dae4b631dea 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -141,7 +141,7 @@ static int __init n64cart_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	disk->first_minor = 0;
-	disk->flags = GENHD_FL_NO_PART_SCAN | GENHD_FL_EXT_DEVT;
+	disk->flags = GENHD_FL_NO_PART_SCAN;
 	disk->fops = &n64cart_fops;
 	disk->private_data = &pdev->dev;
 	strcpy(disk->disk_name, "n64cart");
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 40a948c08a0b..e7dc539fc0ac 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -383,7 +383,6 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	}
 
 	strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
-	tdisk->flags = GENHD_FL_EXT_DEVT;
 	tdisk->major = 0;
 	tdisk->first_minor = 0;
 	tdisk->fops = tt->bops;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 8004dd64d09a..0bacf4268f83 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2136,7 +2136,6 @@ static int msb_init_disk(struct memstick_dev *card)
 	msb->disk->fops = &msb_bdops;
 	msb->disk->private_data = msb;
 	msb->disk->queue = msb->queue;
-	msb->disk->flags |= GENHD_FL_EXT_DEVT;
 
 	capacity = msb->pages_in_block * msb->logical_block_count;
 	capacity *= (msb->page_size / 512);
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 7b9556291eb1..7ba446d224fb 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -267,7 +267,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	disk->first_minor	= 0;
 	disk->fops		= &nd_blk_fops;
 	disk->queue		= q;
-	disk->flags		= GENHD_FL_EXT_DEVT;
 	disk->private_data	= nsblk;
 	nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
 
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 18a267d5073f..1741a7b0b30f 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1537,7 +1537,6 @@ static int btt_blk_init(struct btt *btt)
 	btt->btt_disk->fops = &btt_fops;
 	btt->btt_disk->private_data = btt;
 	btt->btt_disk->queue = btt->btt_queue;
-	btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
 
 	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
 	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ed10a8b66068..968b8483c763 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -477,7 +477,6 @@ static int pmem_attach_disk(struct device *dev,
 
 	disk->fops		= &pmem_fops;
 	disk->queue		= q;
-	disk->flags		= GENHD_FL_EXT_DEVT;
 	disk->private_data	= pmem;
 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
 	set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 762125f2905f..24bcae88587a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3699,7 +3699,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	disk->fops = &nvme_bdev_ops;
 	disk->private_data = ns;
 	disk->queue = ns->queue;
-	disk->flags = GENHD_FL_EXT_DEVT;
 	/*
 	 * Without the multipath code enabled, multiple controller per
 	 * subsystems are visible as devices and thus we cannot use the
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f81871c7128a..a5d02f236cca 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -462,7 +462,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	head->disk->fops = &nvme_ns_head_ops;
 	head->disk->private_data = head;
 	head->disk->queue = q;
-	head->disk->flags = GENHD_FL_EXT_DEVT;
 	sprintf(head->disk->disk_name, "nvme%dn%d",
 			ctrl->subsys->instance, head->instance);
 	return 0;

From 958229a7c55f219b1cff99f939dabbc1b6ba7161 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:54 +0200
Subject: [PATCH 0759/3292] block: add a flag to make put_disk on partially
 initalized disks safer

Add a flag to indicate that __device_add_disk did grab a queue reference
so that disk_release only drops it if we actually had it.  This sort
out one of the major pitfals with partially initialized gendisk that
a lot of drivers did get wrong or still do.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 7 +++++--
 include/linux/genhd.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 9fa734cb9cbd..c826db33a73e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -539,7 +539,10 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	 * Take an extra ref on queue which will be put on disk_release()
 	 * so that it sticks around as long as @disk is there.
 	 */
-	WARN_ON_ONCE(!blk_get_queue(disk->queue));
+	if (blk_get_queue(disk->queue))
+		set_bit(GD_QUEUE_REF, &disk->state);
+	else
+		WARN_ON_ONCE(1);
 
 	disk_add_events(disk);
 	blk_integrity_add(disk);
@@ -1107,7 +1110,7 @@ static void disk_release(struct device *dev)
 	kfree(disk->random);
 	xa_destroy(&disk->part_tbl);
 	bdput(disk->part0);
-	if (disk->queue)
+	if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
 		blk_put_queue(disk->queue);
 	kfree(disk);
 }
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6fc26f7bdf71..4d3ee8b6b297 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -153,6 +153,7 @@ struct gendisk {
 	unsigned long state;
 #define GD_NEED_PART_SCAN		0
 #define GD_READ_ONLY			1
+#define GD_QUEUE_REF			2
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;

From f525464a8000f092c20b00eead3eaa9d849c599e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:55 +0200
Subject: [PATCH 0760/3292] block: add blk_alloc_disk and blk_cleanup_disk APIs

Add two new APIs to allocate and free a gendisk including the
request_queue for use with BIO based drivers.  This is to avoid
boilerplate code in drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 35 +++++++++++++++++++++++++++++++++++
 include/linux/genhd.h | 22 ++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/block/genhd.c b/block/genhd.c
index c826db33a73e..efe0db4d62f0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1293,6 +1293,25 @@ out_free_disk:
 }
 EXPORT_SYMBOL(__alloc_disk_node);
 
+struct gendisk *__blk_alloc_disk(int node)
+{
+	struct request_queue *q;
+	struct gendisk *disk;
+
+	q = blk_alloc_queue(node);
+	if (!q)
+		return NULL;
+
+	disk = __alloc_disk_node(0, node);
+	if (!disk) {
+		blk_cleanup_queue(q);
+		return NULL;
+	}
+	disk->queue = q;
+	return disk;
+}
+EXPORT_SYMBOL(__blk_alloc_disk);
+
 /**
  * put_disk - decrements the gendisk refcount
  * @disk: the struct gendisk to decrement the refcount for
@@ -1310,6 +1329,22 @@ void put_disk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(put_disk);
 
+/**
+ * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
+ * @disk: gendisk to shutdown
+ *
+ * Mark the queue hanging off @disk DYING, drain all pending requests, then mark
+ * the queue DEAD, destroy and put it and the gendisk structure.
+ *
+ * Context: can sleep
+ */
+void blk_cleanup_disk(struct gendisk *disk)
+{
+	blk_cleanup_queue(disk->queue);
+	put_disk(disk);
+}
+EXPORT_SYMBOL(blk_cleanup_disk);
+
 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
 {
 	char event[] = "DISK_RO=1";
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4d3ee8b6b297..782f0171d104 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -278,6 +278,28 @@ extern void put_disk(struct gendisk *disk);
 
 #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE)
 
+/**
+ * blk_alloc_disk - allocate a gendisk structure
+ * @node_id: numa node to allocate on
+ *
+ * Allocate and pre-initialize a gendisk structure for use with BIO based
+ * drivers.
+ *
+ * Context: can sleep
+ */
+#define blk_alloc_disk(node_id)						\
+({									\
+	struct gendisk *__disk = __blk_alloc_disk(node_id);		\
+	static struct lock_class_key __key;				\
+									\
+	if (__disk)							\
+		lockdep_init_map(&__disk->lockdep_map,			\
+			"(bio completion)", &__key, 0);			\
+	__disk;								\
+})
+struct gendisk *__blk_alloc_disk(int node);
+void blk_cleanup_disk(struct gendisk *disk);
+
 int __register_blkdev(unsigned int major, const char *name,
 		void (*probe)(dev_t devt));
 #define register_blkdev(major, name) \

From 7f9b348cb5e94259acdcbafbcaed55d3bb515304 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:56 +0200
Subject: [PATCH 0761/3292] brd: convert to blk_alloc_disk/blk_cleanup_disk

Convert the brd driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.  This also
allows to remove the request_queue pointer in struct request_queue,
and to simplify the initialization as blk_cleanup_disk can be called
on any disk returned from blk_alloc_disk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c | 94 ++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 61 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 7562cf30b14e..95694113e38e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -38,9 +38,7 @@
  * device).
  */
 struct brd_device {
-	int		brd_number;
-
-	struct request_queue	*brd_queue;
+	int			brd_number;
 	struct gendisk		*brd_disk;
 	struct list_head	brd_list;
 
@@ -372,7 +370,7 @@ static LIST_HEAD(brd_devices);
 static DEFINE_MUTEX(brd_devices_mutex);
 static struct dentry *brd_debugfs_dir;
 
-static struct brd_device *brd_alloc(int i)
+static int brd_alloc(int i)
 {
 	struct brd_device *brd;
 	struct gendisk *disk;
@@ -380,64 +378,55 @@ static struct brd_device *brd_alloc(int i)
 
 	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
 	if (!brd)
-		goto out;
+		return -ENOMEM;
 	brd->brd_number		= i;
 	spin_lock_init(&brd->brd_lock);
 	INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
 
-	brd->brd_queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!brd->brd_queue)
-		goto out_free_dev;
-
 	snprintf(buf, DISK_NAME_LEN, "ram%d", i);
 	if (!IS_ERR_OR_NULL(brd_debugfs_dir))
 		debugfs_create_u64(buf, 0444, brd_debugfs_dir,
 				&brd->brd_nr_pages);
 
-	/* This is so fdisk will align partitions on 4k, because of
-	 * direct_access API needing 4k alignment, returning a PFN
-	 * (This is only a problem on very small devices <= 4M,
-	 *  otherwise fdisk will align on 1M. Regardless this call
-	 *  is harmless)
-	 */
-	blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
-	disk = brd->brd_disk = alloc_disk(max_part);
+	disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
-		goto out_free_queue;
+		goto out_free_dev;
+
 	disk->major		= RAMDISK_MAJOR;
 	disk->first_minor	= i * max_part;
+	disk->minors		= max_part;
 	disk->fops		= &brd_fops;
 	disk->private_data	= brd;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
 	set_capacity(disk, rd_size * 2);
+	
+	/*
+	 * This is so fdisk will align partitions on 4k, because of
+	 * direct_access API needing 4k alignment, returning a PFN
+	 * (This is only a problem on very small devices <= 4M,
+	 *  otherwise fdisk will align on 1M. Regardless this call
+	 *  is harmless)
+	 */
+	blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
 
 	/* Tell the block layer that this is not a rotational device */
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
-	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+	add_disk(disk);
+	list_add_tail(&brd->brd_list, &brd_devices);
 
-	return brd;
+	return 0;
 
-out_free_queue:
-	blk_cleanup_queue(brd->brd_queue);
 out_free_dev:
 	kfree(brd);
-out:
-	return NULL;
-}
-
-static void brd_free(struct brd_device *brd)
-{
-	put_disk(brd->brd_disk);
-	blk_cleanup_queue(brd->brd_queue);
-	brd_free_pages(brd);
-	kfree(brd);
+	return -ENOMEM;
 }
 
 static void brd_probe(dev_t dev)
 {
-	struct brd_device *brd;
 	int i = MINOR(dev) / max_part;
+	struct brd_device *brd;
 
 	mutex_lock(&brd_devices_mutex);
 	list_for_each_entry(brd, &brd_devices, brd_list) {
@@ -445,13 +434,7 @@ static void brd_probe(dev_t dev)
 			goto out_unlock;
 	}
 
-	brd = brd_alloc(i);
-	if (brd) {
-		brd->brd_disk->queue = brd->brd_queue;
-		add_disk(brd->brd_disk);
-		list_add_tail(&brd->brd_list, &brd_devices);
-	}
-
+	brd_alloc(i);
 out_unlock:
 	mutex_unlock(&brd_devices_mutex);
 }
@@ -460,7 +443,9 @@ static void brd_del_one(struct brd_device *brd)
 {
 	list_del(&brd->brd_list);
 	del_gendisk(brd->brd_disk);
-	brd_free(brd);
+	blk_cleanup_disk(brd->brd_disk);
+	brd_free_pages(brd);
+	kfree(brd);
 }
 
 static inline void brd_check_and_reset_par(void)
@@ -485,7 +470,7 @@ static inline void brd_check_and_reset_par(void)
 static int __init brd_init(void)
 {
 	struct brd_device *brd, *next;
-	int i;
+	int err, i;
 
 	/*
 	 * brd module now has a feature to instantiate underlying device
@@ -511,22 +496,11 @@ static int __init brd_init(void)
 
 	mutex_lock(&brd_devices_mutex);
 	for (i = 0; i < rd_nr; i++) {
-		brd = brd_alloc(i);
-		if (!brd)
+		err = brd_alloc(i);
+		if (err)
 			goto out_free;
-		list_add_tail(&brd->brd_list, &brd_devices);
 	}
 
-	/* point of no return */
-
-	list_for_each_entry(brd, &brd_devices, brd_list) {
-		/*
-		 * associate with queue just before adding disk for
-		 * avoiding to mess up failure path
-		 */
-		brd->brd_disk->queue = brd->brd_queue;
-		add_disk(brd->brd_disk);
-	}
 	mutex_unlock(&brd_devices_mutex);
 
 	pr_info("brd: module loaded\n");
@@ -535,15 +509,13 @@ static int __init brd_init(void)
 out_free:
 	debugfs_remove_recursive(brd_debugfs_dir);
 
-	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
-		list_del(&brd->brd_list);
-		brd_free(brd);
-	}
+	list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
+		brd_del_one(brd);
 	mutex_unlock(&brd_devices_mutex);
 	unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
 
 	pr_info("brd: module NOT loaded !!!\n");
-	return -ENOMEM;
+	return err;
 }
 
 static void __exit brd_exit(void)

From b647ad024841d02d67e78716f51f355d8d3e9656 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:57 +0200
Subject: [PATCH 0762/3292] drbd: convert to blk_alloc_disk/blk_cleanup_disk

Convert the drbd driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index de463773b530..55234a558e98 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2231,8 +2231,7 @@ void drbd_destroy_device(struct kref *kref)
 	if (device->bitmap) /* should no longer be there. */
 		drbd_bm_cleanup(device);
 	__free_page(device->md_io.page);
-	put_disk(device->vdisk);
-	blk_cleanup_queue(device->rq_queue);
+	blk_cleanup_disk(device->vdisk);
 	kfree(device->rs_plan_s);
 
 	/* not for_each_connection(connection, resource):
@@ -2701,7 +2700,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	struct drbd_device *device;
 	struct drbd_peer_device *peer_device, *tmp_peer_device;
 	struct gendisk *disk;
-	struct request_queue *q;
 	int id;
 	int vnr = adm_ctx->volume;
 	enum drbd_ret_code err = ERR_NOMEM;
@@ -2723,29 +2721,26 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 
 	drbd_init_set_defaults(device);
 
-	q = blk_alloc_queue(NUMA_NO_NODE);
-	if (!q)
-		goto out_no_q;
-	device->rq_queue = q;
-
-	disk = alloc_disk(1);
+	disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
 		goto out_no_disk;
+
 	device->vdisk = disk;
+	device->rq_queue = disk->queue;
 
 	set_disk_ro(disk, true);
 
-	disk->queue = q;
 	disk->major = DRBD_MAJOR;
 	disk->first_minor = minor;
+	disk->minors = 1;
 	disk->fops = &drbd_ops;
 	sprintf(disk->disk_name, "drbd%d", minor);
 	disk->private_data = device;
 
-	blk_queue_write_cache(q, true, true);
+	blk_queue_write_cache(disk->queue, true, true);
 	/* Setting the max_hw_sectors to an odd value of 8kibyte here
 	   This triggers a max_bio_size message upon first attach or connect */
-	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
+	blk_queue_max_hw_sectors(disk->queue, DRBD_MAX_BIO_SIZE_SAFE >> 8);
 
 	device->md_io.page = alloc_page(GFP_KERNEL);
 	if (!device->md_io.page)
@@ -2834,10 +2829,8 @@ out_no_minor_idr:
 out_no_bitmap:
 	__free_page(device->md_io.page);
 out_no_io_page:
-	put_disk(disk);
+	blk_cleanup_disk(disk);
 out_no_disk:
-	blk_cleanup_queue(q);
-out_no_q:
 	kref_put(&resource->kref, drbd_destroy_resource);
 	kfree(device);
 	return err;

From 444134845277ad37c8ca7d1321d3dd57b96b5ae0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:58 +0200
Subject: [PATCH 0763/3292] pktcdvd: convert to blk_alloc_disk/blk_cleanup_disk

Convert the pktcdvd driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/pktcdvd.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index bd3556585122..f69b5c69c2a6 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2711,19 +2711,17 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	pd->write_congestion_off = write_congestion_off;
 
 	ret = -ENOMEM;
-	disk = alloc_disk(1);
+	disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
 		goto out_mem;
 	pd->disk = disk;
 	disk->major = pktdev_major;
 	disk->first_minor = idx;
+	disk->minors = 1;
 	disk->fops = &pktcdvd_ops;
 	disk->flags = GENHD_FL_REMOVABLE;
 	strcpy(disk->disk_name, pd->name);
 	disk->private_data = pd;
-	disk->queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!disk->queue)
-		goto out_mem2;
 
 	pd->pkt_dev = MKDEV(pktdev_major, idx);
 	ret = pkt_new_dev(pd, dev);
@@ -2746,7 +2744,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	return 0;
 
 out_mem2:
-	put_disk(disk);
+	blk_cleanup_disk(disk);
 out_mem:
 	mempool_exit(&pd->rb_pool);
 	kfree(pd);
@@ -2796,8 +2794,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
 	pkt_dbg(1, pd, "writer unmapped\n");
 
 	del_gendisk(pd->disk);
-	blk_cleanup_queue(pd->disk->queue);
-	put_disk(pd->disk);
+	blk_cleanup_disk(pd->disk);
 
 	mempool_exit(&pd->rb_pool);
 	kfree(pd);

From 0be7966e7e6e8c57c3f63b16ddeed73e68313a89 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:50:59 +0200
Subject: [PATCH 0764/3292] rsxx: convert to blk_alloc_disk/blk_cleanup_disk

Convert the rsxx driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dev.c       | 39 +++++++++++++---------------------
 drivers/block/rsxx/rsxx_priv.h |  1 -
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9a28322a8cd8..1cc40b0ea761 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -236,47 +236,40 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 		return -ENOMEM;
 	}
 
-	card->queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!card->queue) {
-		dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
-		unregister_blkdev(card->major, DRIVER_NAME);
-		return -ENOMEM;
-	}
-
-	card->gendisk = alloc_disk(blkdev_minors);
+	card->gendisk = blk_alloc_disk(blkdev_minors);
 	if (!card->gendisk) {
 		dev_err(CARD_TO_DEV(card), "Failed disk alloc\n");
-		blk_cleanup_queue(card->queue);
 		unregister_blkdev(card->major, DRIVER_NAME);
 		return -ENOMEM;
 	}
 
 	if (card->config_valid) {
 		blk_size = card->config.data.block_size;
-		blk_queue_dma_alignment(card->queue, blk_size - 1);
-		blk_queue_logical_block_size(card->queue, blk_size);
+		blk_queue_dma_alignment(card->gendisk->queue, blk_size - 1);
+		blk_queue_logical_block_size(card->gendisk->queue, blk_size);
 	}
 
-	blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
-	blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
+	blk_queue_max_hw_sectors(card->gendisk->queue, blkdev_max_hw_sectors);
+	blk_queue_physical_block_size(card->gendisk->queue, RSXX_HW_BLK_SIZE);
 
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue);
-	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, card->gendisk->queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->gendisk->queue);
 	if (rsxx_discard_supported(card)) {
-		blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue);
-		blk_queue_max_discard_sectors(card->queue,
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->gendisk->queue);
+		blk_queue_max_discard_sectors(card->gendisk->queue,
 						RSXX_HW_BLK_SIZE >> 9);
-		card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
-		card->queue->limits.discard_alignment   = RSXX_HW_BLK_SIZE;
+		card->gendisk->queue->limits.discard_granularity =
+			RSXX_HW_BLK_SIZE;
+		card->gendisk->queue->limits.discard_alignment =
+			RSXX_HW_BLK_SIZE;
 	}
 
 	snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
 		 "rsxx%d", card->disk_id);
 	card->gendisk->major = card->major;
-	card->gendisk->first_minor = 0;
+	card->gendisk->minors = blkdev_minors;
 	card->gendisk->fops = &rsxx_fops;
 	card->gendisk->private_data = card;
-	card->gendisk->queue = card->queue;
 
 	return 0;
 }
@@ -286,10 +279,8 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
 	if (!enable_blkdev)
 		return;
 
-	put_disk(card->gendisk);
+	blk_cleanup_disk(card->gendisk);
 	card->gendisk = NULL;
-
-	blk_cleanup_queue(card->queue);
 	unregister_blkdev(card->major, DRIVER_NAME);
 }
 
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6147977994ff..26c320c0d924 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -154,7 +154,6 @@ struct rsxx_cardinfo {
 	bool			bdev_attached;
 	int			disk_id;
 	int			major;
-	struct request_queue	*queue;
 	struct gendisk		*gendisk;
 	struct {
 		/* Used to convert a byte address to a device address. */

From 7681750bd35fe92dd915f4df177d45265e78a933 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:00 +0200
Subject: [PATCH 0765/3292] zram: convert to blk_alloc_disk/blk_cleanup_disk

Convert the zram driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/zram/zram_drv.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index cf8deecc39ef..006416cc4969 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1890,7 +1890,6 @@ static const struct attribute_group *zram_disk_attr_groups[] = {
 static int zram_add(void)
 {
 	struct zram *zram;
-	struct request_queue *queue;
 	int ret, device_id;
 
 	zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
@@ -1906,27 +1905,20 @@ static int zram_add(void)
 #ifdef CONFIG_ZRAM_WRITEBACK
 	spin_lock_init(&zram->wb_limit_lock);
 #endif
-	queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!queue) {
-		pr_err("Error allocating disk queue for device %d\n",
+
+	/* gendisk structure */
+	zram->disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (!zram->disk) {
+		pr_err("Error allocating disk structure for device %d\n",
 			device_id);
 		ret = -ENOMEM;
 		goto out_free_idr;
 	}
 
-	/* gendisk structure */
-	zram->disk = alloc_disk(1);
-	if (!zram->disk) {
-		pr_err("Error allocating disk structure for device %d\n",
-			device_id);
-		ret = -ENOMEM;
-		goto out_free_queue;
-	}
-
 	zram->disk->major = zram_major;
 	zram->disk->first_minor = device_id;
+	zram->disk->minors = 1;
 	zram->disk->fops = &zram_devops;
-	zram->disk->queue = queue;
 	zram->disk->private_data = zram;
 	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
 
@@ -1969,8 +1961,6 @@ static int zram_add(void)
 	pr_info("Added device: %s\n", zram->disk->disk_name);
 	return device_id;
 
-out_free_queue:
-	blk_cleanup_queue(queue);
 out_free_idr:
 	idr_remove(&zram_index_idr, device_id);
 out_free_dev:
@@ -2000,8 +1990,7 @@ static int zram_remove(struct zram *zram)
 	pr_info("Removed device: %s\n", zram->disk->disk_name);
 
 	del_gendisk(zram->disk);
-	blk_cleanup_queue(zram->disk->queue);
-	put_disk(zram->disk);
+	blk_cleanup_disk(zram->disk);
 	kfree(zram);
 	return 0;
 }

From 1aabd53a4b49adaf65319aa622e612b6edb2b663 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:01 +0200
Subject: [PATCH 0766/3292] lightnvm: convert to
 blk_alloc_disk/blk_cleanup_disk

Convert the lightnvm driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/core.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index e7dc539fc0ac..cf8a75494833 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -305,7 +305,6 @@ static int __nvm_config_extended(struct nvm_dev *dev,
 static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 {
 	struct nvm_ioctl_create_extended e;
-	struct request_queue *tqueue;
 	struct gendisk *tdisk;
 	struct nvm_tgt_type *tt;
 	struct nvm_target *t;
@@ -370,23 +369,16 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 		goto err_t;
 	}
 
-	tdisk = alloc_disk(0);
+	tdisk = blk_alloc_disk(dev->q->node);
 	if (!tdisk) {
 		ret = -ENOMEM;
 		goto err_dev;
 	}
 
-	tqueue = blk_alloc_queue(dev->q->node);
-	if (!tqueue) {
-		ret = -ENOMEM;
-		goto err_disk;
-	}
-
 	strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
 	tdisk->major = 0;
 	tdisk->first_minor = 0;
 	tdisk->fops = tt->bops;
-	tdisk->queue = tqueue;
 
 	targetdata = tt->init(tgt_dev, tdisk, create->flags);
 	if (IS_ERR(targetdata)) {
@@ -395,14 +387,14 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	}
 
 	tdisk->private_data = targetdata;
-	tqueue->queuedata = targetdata;
+	tdisk->queue->queuedata = targetdata;
 
 	mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
 	if (dev->geo.mdts) {
 		mdts = min_t(u32, dev->geo.mdts,
 				(dev->geo.csecs >> 9) * NVM_MAX_VLBA);
 	}
-	blk_queue_max_hw_sectors(tqueue, mdts);
+	blk_queue_max_hw_sectors(tdisk->queue, mdts);
 
 	set_capacity(tdisk, tt->capacity(targetdata));
 	add_disk(tdisk);
@@ -427,10 +419,7 @@ err_sysfs:
 	if (tt->exit)
 		tt->exit(targetdata, true);
 err_init:
-	blk_cleanup_queue(tqueue);
-	tdisk->queue = NULL;
-err_disk:
-	put_disk(tdisk);
+	blk_cleanup_disk(tdisk);
 err_dev:
 	nvm_remove_tgt_dev(tgt_dev, 0);
 err_t:
@@ -444,10 +433,8 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
 {
 	struct nvm_tgt_type *tt = t->type;
 	struct gendisk *tdisk = t->disk;
-	struct request_queue *q = tdisk->queue;
 
 	del_gendisk(tdisk);
-	blk_cleanup_queue(q);
 
 	if (tt->sysfs_exit)
 		tt->sysfs_exit(tdisk);
@@ -456,7 +443,7 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
 		tt->exit(tdisk->private_data, graceful);
 
 	nvm_remove_tgt_dev(t->dev, 1);
-	put_disk(tdisk);
+	blk_cleanup_disk(tdisk);
 	module_put(t->type->owner);
 
 	list_del(&t->list);

From bc70852fd10415cda727577f12ea93e502eb1027 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:02 +0200
Subject: [PATCH 0767/3292] bcache: convert to blk_alloc_disk/blk_cleanup_disk

Convert the bcache driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Coly Li <colyli@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/super.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bea8c4429ae8..185246a0d855 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -890,13 +890,9 @@ static void bcache_device_free(struct bcache_device *d)
 		if (disk_added)
 			del_gendisk(disk);
 
-		if (disk->queue)
-			blk_cleanup_queue(disk->queue);
-
+		blk_cleanup_disk(disk);
 		ida_simple_remove(&bcache_device_idx,
 				  first_minor_to_idx(disk->first_minor));
-		if (disk_added)
-			put_disk(disk);
 	}
 
 	bioset_exit(&d->bio_split);
@@ -946,7 +942,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 			BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
 		goto err;
 
-	d->disk = alloc_disk(BCACHE_MINORS);
+	d->disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!d->disk)
 		goto err;
 
@@ -955,14 +951,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 
 	d->disk->major		= bcache_major;
 	d->disk->first_minor	= idx_to_first_minor(idx);
+	d->disk->minors		= BCACHE_MINORS;
 	d->disk->fops		= ops;
 	d->disk->private_data	= d;
 
-	q = blk_alloc_queue(NUMA_NO_NODE);
-	if (!q)
-		return -ENOMEM;
-
-	d->disk->queue			= q;
+	q = d->disk->queue;
 	q->limits.max_hw_sectors	= UINT_MAX;
 	q->limits.max_sectors		= UINT_MAX;
 	q->limits.max_segment_size	= UINT_MAX;

From 74fe6ba9239497e5fa383a15efa9f5ffc23b11f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:03 +0200
Subject: [PATCH 0768/3292] dm: convert to blk_alloc_disk/blk_cleanup_disk

Convert the dm driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ca2aedd8ee7d..3c7c2d257018 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1801,13 +1801,13 @@ static void cleanup_mapped_device(struct mapped_device *md)
 		md->disk->private_data = NULL;
 		spin_unlock(&_minor_lock);
 		del_gendisk(md->disk);
-		put_disk(md->disk);
 	}
 
-	if (md->queue) {
+	if (md->queue)
 		dm_queue_destroy_keyslot_manager(md->queue);
-		blk_cleanup_queue(md->queue);
-	}
+
+	if (md->disk)
+		blk_cleanup_disk(md->disk);
 
 	cleanup_srcu_struct(&md->io_barrier);
 
@@ -1869,13 +1869,10 @@ static struct mapped_device *alloc_dev(int minor)
 	 * established. If request-based table is loaded: blk-mq will
 	 * override accordingly.
 	 */
-	md->queue = blk_alloc_queue(numa_node_id);
-	if (!md->queue)
-		goto bad;
-
-	md->disk = alloc_disk_node(1, md->numa_node_id);
+	md->disk = blk_alloc_disk(md->numa_node_id);
 	if (!md->disk)
 		goto bad;
+	md->queue = md->disk->queue;
 
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
@@ -1888,6 +1885,7 @@ static struct mapped_device *alloc_dev(int minor)
 
 	md->disk->major = _major;
 	md->disk->first_minor = minor;
+	md->disk->minors = 1;
 	md->disk->fops = &dm_blk_dops;
 	md->disk->queue = md->queue;
 	md->disk->private_data = md;

From 0f1d2e0643c544df50dbc436da930201218fa1e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:04 +0200
Subject: [PATCH 0769/3292] md: convert to blk_alloc_disk/blk_cleanup_disk

Convert the md driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/md.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 49f897fbb89b..d806be8cc210 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5598,12 +5598,10 @@ static void md_free(struct kobject *ko)
 	if (mddev->sysfs_level)
 		sysfs_put(mddev->sysfs_level);
 
-	if (mddev->gendisk)
+	if (mddev->gendisk) {
 		del_gendisk(mddev->gendisk);
-	if (mddev->queue)
-		blk_cleanup_queue(mddev->queue);
-	if (mddev->gendisk)
-		put_disk(mddev->gendisk);
+		blk_cleanup_disk(mddev->gendisk);
+	}
 	percpu_ref_exit(&mddev->writes_pending);
 
 	bioset_exit(&mddev->bio_set);
@@ -5711,20 +5709,13 @@ static int md_alloc(dev_t dev, char *name)
 		goto abort;
 
 	error = -ENOMEM;
-	mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!mddev->queue)
+	disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (!disk)
 		goto abort;
 
-	blk_set_stacking_limits(&mddev->queue->limits);
-
-	disk = alloc_disk(1 << shift);
-	if (!disk) {
-		blk_cleanup_queue(mddev->queue);
-		mddev->queue = NULL;
-		goto abort;
-	}
 	disk->major = MAJOR(mddev->unit);
 	disk->first_minor = unit << shift;
+	disk->minors = 1 << shift;
 	if (name)
 		strcpy(disk->disk_name, name);
 	else if (partitioned)
@@ -5733,7 +5724,9 @@ static int md_alloc(dev_t dev, char *name)
 		sprintf(disk->disk_name, "md%d", unit);
 	disk->fops = &md_fops;
 	disk->private_data = mddev;
-	disk->queue = mddev->queue;
+
+	mddev->queue = disk->queue;
+	blk_set_stacking_limits(&mddev->queue->limits);
 	blk_queue_write_cache(mddev->queue, true, true);
 	/* Allow extended partitions.  This makes the
 	 * 'mdp' device redundant, but we can't really

From 6c552ceabf39797fba1d3088af9481511f02393f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:05 +0200
Subject: [PATCH 0770/3292] nvdimm-blk: convert to
 blk_alloc_disk/blk_cleanup_disk

Convert the nvdimm-blk driver to use the blk_alloc_disk and
blk_cleanup_disk helpers to simplify gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-16-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/blk.c | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 7ba446d224fb..088d3dd6f6fa 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -228,48 +228,34 @@ static const struct block_device_operations nd_blk_fops = {
 	.submit_bio =  nd_blk_submit_bio,
 };
 
-static void nd_blk_release_queue(void *q)
-{
-	blk_cleanup_queue(q);
-}
-
 static void nd_blk_release_disk(void *disk)
 {
 	del_gendisk(disk);
-	put_disk(disk);
+	blk_cleanup_disk(disk);
 }
 
 static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 {
 	struct device *dev = &nsblk->common.dev;
 	resource_size_t available_disk_size;
-	struct request_queue *q;
 	struct gendisk *disk;
 	u64 internal_nlba;
 
 	internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
 	available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
 
-	q = blk_alloc_queue(NUMA_NO_NODE);
-	if (!q)
-		return -ENOMEM;
-	if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
-		return -ENOMEM;
-
-	blk_queue_max_hw_sectors(q, UINT_MAX);
-	blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-
-	disk = alloc_disk(0);
+	disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
 		return -ENOMEM;
 
-	disk->first_minor	= 0;
 	disk->fops		= &nd_blk_fops;
-	disk->queue		= q;
 	disk->private_data	= nsblk;
 	nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
 
+	blk_queue_max_hw_sectors(disk->queue, UINT_MAX);
+	blk_queue_logical_block_size(disk->queue, nsblk_sector_size(nsblk));
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+
 	if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
 		return -ENOMEM;
 

From d4e4e5835f29fa1a1dcdecc5bea125050274d0f6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:06 +0200
Subject: [PATCH 0771/3292] nvdimm-btt: convert to
 blk_alloc_disk/blk_cleanup_disk

Convert the nvdimm-btt driver to use the blk_alloc_disk and
blk_cleanup_disk helpers to simplify gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/btt.c | 24 +++++++-----------------
 drivers/nvdimm/btt.h |  2 --
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 1741a7b0b30f..92dec4952297 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1521,34 +1521,25 @@ static int btt_blk_init(struct btt *btt)
 	struct nd_btt *nd_btt = btt->nd_btt;
 	struct nd_namespace_common *ndns = nd_btt->ndns;
 
-	/* create a new disk and request queue for btt */
-	btt->btt_queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!btt->btt_queue)
+	btt->btt_disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (!btt->btt_disk)
 		return -ENOMEM;
 
-	btt->btt_disk = alloc_disk(0);
-	if (!btt->btt_disk) {
-		blk_cleanup_queue(btt->btt_queue);
-		return -ENOMEM;
-	}
-
 	nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
 	btt->btt_disk->first_minor = 0;
 	btt->btt_disk->fops = &btt_fops;
 	btt->btt_disk->private_data = btt;
-	btt->btt_disk->queue = btt->btt_queue;
 
-	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
-	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
+	blk_queue_logical_block_size(btt->btt_disk->queue, btt->sector_size);
+	blk_queue_max_hw_sectors(btt->btt_disk->queue, UINT_MAX);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
 
 	if (btt_meta_size(btt)) {
 		int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
 
 		if (rc) {
 			del_gendisk(btt->btt_disk);
-			put_disk(btt->btt_disk);
-			blk_cleanup_queue(btt->btt_queue);
+			blk_cleanup_disk(btt->btt_disk);
 			return rc;
 		}
 	}
@@ -1563,8 +1554,7 @@ static int btt_blk_init(struct btt *btt)
 static void btt_blk_cleanup(struct btt *btt)
 {
 	del_gendisk(btt->btt_disk);
-	put_disk(btt->btt_disk);
-	blk_cleanup_queue(btt->btt_queue);
+	blk_cleanup_disk(btt->btt_disk);
 }
 
 /**
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index aa53e0b769bd..0c76c0333f6e 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -201,7 +201,6 @@ struct badblocks;
 /**
  * struct btt - handle for a BTT instance
  * @btt_disk:		Pointer to the gendisk for BTT device
- * @btt_queue:		Pointer to the request queue for the BTT device
  * @arena_list:		Head of the list of arenas
  * @debugfs_dir:	Debugfs dentry
  * @nd_btt:		Parent nd_btt struct
@@ -219,7 +218,6 @@ struct badblocks;
  */
 struct btt {
 	struct gendisk *btt_disk;
-	struct request_queue *btt_queue;
 	struct list_head arena_list;
 	struct dentry *debugfs_dir;
 	struct nd_btt *nd_btt;

From 87eb73b2ca7c1b913e84d6efe46810fd301e7a66 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:07 +0200
Subject: [PATCH 0772/3292] nvdimm-pmem: convert to
 blk_alloc_disk/blk_cleanup_disk

Convert the nvdimm-pmem driver to use the blk_alloc_disk and
blk_cleanup_disk helpers to simplify gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-18-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/pmem.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 968b8483c763..9fcd05084564 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -338,7 +338,7 @@ static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap)
 	struct request_queue *q =
 		container_of(pgmap->ref, struct request_queue, q_usage_counter);
 
-	blk_cleanup_queue(q);
+	blk_cleanup_disk(queue_to_disk(q));
 }
 
 static void pmem_release_queue(void *pgmap)
@@ -361,7 +361,6 @@ static void pmem_release_disk(void *__pmem)
 	kill_dax(pmem->dax_dev);
 	put_dax(pmem->dax_dev);
 	del_gendisk(pmem->disk);
-	put_disk(pmem->disk);
 }
 
 static const struct dev_pagemap_ops fsdax_pagemap_ops = {
@@ -422,10 +421,12 @@ static int pmem_attach_disk(struct device *dev,
 		return -EBUSY;
 	}
 
-	q = blk_alloc_queue(dev_to_node(dev));
-	if (!q)
+	disk = blk_alloc_disk(nid);
+	if (!disk)
 		return -ENOMEM;
+	q = disk->queue;
 
+	pmem->disk = disk;
 	pmem->pfn_flags = PFN_DEV;
 	pmem->pgmap.ref = &q->q_usage_counter;
 	if (is_nd_pfn(dev)) {
@@ -470,11 +471,6 @@ static int pmem_attach_disk(struct device *dev,
 	if (pmem->pfn_flags & PFN_MAP)
 		blk_queue_flag_set(QUEUE_FLAG_DAX, q);
 
-	disk = alloc_disk_node(0, nid);
-	if (!disk)
-		return -ENOMEM;
-	pmem->disk = disk;
-
 	disk->fops		= &pmem_fops;
 	disk->queue		= q;
 	disk->private_data	= pmem;
@@ -490,7 +486,6 @@ static int pmem_attach_disk(struct device *dev,
 		flags = DAXDEV_F_SYNC;
 	dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
 	if (IS_ERR(dax_dev)) {
-		put_disk(disk);
 		return PTR_ERR(dax_dev);
 	}
 	dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));

From f165fb89b71facbef833c6244abf8b9887b899d1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:08 +0200
Subject: [PATCH 0773/3292] nvme-multipath: convert to
 blk_alloc_disk/blk_cleanup_disk

Convert the nvme-multipath driver to use the blk_alloc_disk and
blk_cleanup_disk helpers to simplify gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-19-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/pmem.c         |  1 -
 drivers/nvme/host/multipath.c | 47 +++++++++++------------------------
 2 files changed, 14 insertions(+), 34 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 9fcd05084564..31f3c4bd6f72 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -472,7 +472,6 @@ static int pmem_attach_disk(struct device *dev,
 		blk_queue_flag_set(QUEUE_FLAG_DAX, q);
 
 	disk->fops		= &pmem_fops;
-	disk->queue		= q;
 	disk->private_data	= pmem;
 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
 	set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a5d02f236cca..b5fbdb416022 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -427,7 +427,6 @@ static void nvme_requeue_work(struct work_struct *work)
 
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 {
-	struct request_queue *q;
 	bool vwc = false;
 
 	mutex_init(&head->lock);
@@ -443,33 +442,24 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
 		return 0;
 
-	q = blk_alloc_queue(ctrl->numa_node);
-	if (!q)
-		goto out;
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-	/* set to a default value for 512 until disk is validated */
-	blk_queue_logical_block_size(q, 512);
-	blk_set_stacking_limits(&q->limits);
+	head->disk = blk_alloc_disk(ctrl->numa_node);
+	if (!head->disk)
+		return -ENOMEM;
+	head->disk->fops = &nvme_ns_head_ops;
+	head->disk->private_data = head;
+	sprintf(head->disk->disk_name, "nvme%dn%d",
+			ctrl->subsys->instance, head->instance);
+
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
+	/* set to a default value of 512 until the disk is validated */
+	blk_queue_logical_block_size(head->disk->queue, 512);
+	blk_set_stacking_limits(&head->disk->queue->limits);
 
 	/* we need to propagate up the VMC settings */
 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
 		vwc = true;
-	blk_queue_write_cache(q, vwc, vwc);
-
-	head->disk = alloc_disk(0);
-	if (!head->disk)
-		goto out_cleanup_queue;
-	head->disk->fops = &nvme_ns_head_ops;
-	head->disk->private_data = head;
-	head->disk->queue = q;
-	sprintf(head->disk->disk_name, "nvme%dn%d",
-			ctrl->subsys->instance, head->instance);
+	blk_queue_write_cache(head->disk->queue, vwc, vwc);
 	return 0;
-
-out_cleanup_queue:
-	blk_cleanup_queue(q);
-out:
-	return -ENOMEM;
 }
 
 static void nvme_mpath_set_live(struct nvme_ns *ns)
@@ -768,16 +758,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 	/* make sure all pending bios are cleaned up */
 	kblockd_schedule_work(&head->requeue_work);
 	flush_work(&head->requeue_work);
-	blk_cleanup_queue(head->disk->queue);
-	if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
-		/*
-		 * if device_add_disk wasn't called, prevent
-		 * disk release to put a bogus reference on the
-		 * request queue
-		 */
-		head->disk->queue = NULL;
-	}
-	put_disk(head->disk);
+	blk_cleanup_disk(head->disk);
 }
 
 void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)

From c3e235383645de20796efd2821c60bf6a7ab5f2e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:09 +0200
Subject: [PATCH 0774/3292] nfblock: convert to blk_alloc_disk/blk_cleanup_disk

Convert the nfblock driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-20-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/m68k/emu/nfblock.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index ba808543161a..9a8394e96388 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -55,7 +55,6 @@ struct nfhd_device {
 	int id;
 	u32 blocks, bsize;
 	int bshift;
-	struct request_queue *queue;
 	struct gendisk *disk;
 };
 
@@ -119,32 +118,24 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
 	dev->bsize = bsize;
 	dev->bshift = ffs(bsize) - 10;
 
-	dev->queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (dev->queue == NULL)
-		goto free_dev;
-
-	blk_queue_logical_block_size(dev->queue, bsize);
-
-	dev->disk = alloc_disk(16);
+	dev->disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!dev->disk)
-		goto free_queue;
+		goto free_dev;
 
 	dev->disk->major = major_num;
 	dev->disk->first_minor = dev_id * 16;
+	dev->disk->minors = 16;
 	dev->disk->fops = &nfhd_ops;
 	dev->disk->private_data = dev;
 	sprintf(dev->disk->disk_name, "nfhd%u", dev_id);
 	set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
-	dev->disk->queue = dev->queue;
-
+	blk_queue_logical_block_size(dev->disk->queue, bsize);
 	add_disk(dev->disk);
 
 	list_add_tail(&dev->list, &nfhd_list);
 
 	return 0;
 
-free_queue:
-	blk_cleanup_queue(dev->queue);
 free_dev:
 	kfree(dev);
 out:
@@ -186,8 +177,7 @@ static void __exit nfhd_exit(void)
 	list_for_each_entry_safe(dev, next, &nfhd_list, list) {
 		list_del(&dev->list);
 		del_gendisk(dev->disk);
-		put_disk(dev->disk);
-		blk_cleanup_queue(dev->queue);
+		blk_cleanup_disk(dev->disk);
 		kfree(dev);
 	}
 	unregister_blkdev(major_num, "nfhd");

From b1833edc4c95d801b249159be361af6d3c3ea44d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:10 +0200
Subject: [PATCH 0775/3292] simdisk: convert to blk_alloc_disk/blk_cleanup_disk

Convert the simdisk driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-21-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/xtensa/platforms/iss/simdisk.c | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index fc09be7b1347..3cdfa00738e0 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -27,7 +27,6 @@
 struct simdisk {
 	const char *filename;
 	spinlock_t lock;
-	struct request_queue *queue;
 	struct gendisk *gd;
 	struct proc_dir_entry *procfile;
 	int users;
@@ -266,21 +265,13 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
 	spin_lock_init(&dev->lock);
 	dev->users = 0;
 
-	dev->queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (dev->queue == NULL) {
-		pr_err("blk_alloc_queue failed\n");
-		goto out_alloc_queue;
-	}
-
-	dev->gd = alloc_disk(SIMDISK_MINORS);
-	if (dev->gd == NULL) {
-		pr_err("alloc_disk failed\n");
-		goto out_alloc_disk;
-	}
+	dev->gd = blk_alloc_disk(NUMA_NO_NODE);
+	if (!dev->gd)
+		return -ENOMEM;
 	dev->gd->major = simdisk_major;
 	dev->gd->first_minor = which;
+	dev->gd->minors = SIMDISK_MINORS;
 	dev->gd->fops = &simdisk_ops;
-	dev->gd->queue = dev->queue;
 	dev->gd->private_data = dev;
 	snprintf(dev->gd->disk_name, 32, "simdisk%d", which);
 	set_capacity(dev->gd, 0);
@@ -288,12 +279,6 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
 
 	dev->procfile = proc_create_data(tmp, 0644, procdir, &simdisk_proc_ops, dev);
 	return 0;
-
-out_alloc_disk:
-	blk_cleanup_queue(dev->queue);
-	dev->queue = NULL;
-out_alloc_queue:
-	return -ENOMEM;
 }
 
 static int __init simdisk_init(void)
@@ -343,10 +328,10 @@ static void simdisk_teardown(struct simdisk *dev, int which,
 	char tmp[2] = { '0' + which, 0 };
 
 	simdisk_detach(dev);
-	if (dev->gd)
+	if (dev->gd) {
 		del_gendisk(dev->gd);
-	if (dev->queue)
-		blk_cleanup_queue(dev->queue);
+		blk_cleanup_disk(dev->gd);
+	}
 	remove_proc_entry(tmp, procdir);
 }
 

From f9dc931de80664eb78cbc8c85052bd0856d4aa9c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:11 +0200
Subject: [PATCH 0776/3292] n64cart: convert to blk_alloc_disk

Convert the n64cart driver to use the blk_alloc_disk helper to simplify
gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-22-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/n64cart.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index 3dae4b631dea..7b4dd10af9ec 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -132,14 +132,10 @@ static int __init n64cart_probe(struct platform_device *pdev)
 	if (!reg_base)
 		return -EINVAL;
 
-	disk = alloc_disk(0);
+	disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (!disk)
 		return -ENOMEM;
 
-	disk->queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!disk->queue)
-		return -ENOMEM;
-
 	disk->first_minor = 0;
 	disk->flags = GENHD_FL_NO_PART_SCAN;
 	disk->fops = &n64cart_fops;

From 684bf9cd8d29503ee47a6942e34e1d2f0a4774fa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:12 +0200
Subject: [PATCH 0777/3292] ps3vram: convert to blk_alloc_disk/blk_cleanup_disk

Convert the ps3vram driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-23-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/ps3vram.c | 31 ++++++++-----------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 1d738999fb69..7fbf469651c4 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -67,7 +67,6 @@ struct ps3vram_cache {
 };
 
 struct ps3vram_priv {
-	struct request_queue *queue;
 	struct gendisk *gendisk;
 
 	u64 size;
@@ -613,7 +612,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 {
 	struct ps3vram_priv *priv;
 	int error, status;
-	struct request_queue *queue;
 	struct gendisk *gendisk;
 	u64 ddr_size, ddr_lpar, ctrl_lpar, info_lpar, reports_lpar,
 	    reports_size, xdr_lpar;
@@ -736,33 +734,23 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 
 	ps3vram_proc_init(dev);
 
-	queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (!queue) {
-		dev_err(&dev->core, "blk_alloc_queue failed\n");
+	gendisk = blk_alloc_disk(NUMA_NO_NODE);
+	if (!gendisk) {
+		dev_err(&dev->core, "blk_alloc_disk failed\n");
 		error = -ENOMEM;
 		goto out_cache_cleanup;
 	}
 
-	priv->queue = queue;
-	blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
-	blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
-	blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
-
-	gendisk = alloc_disk(1);
-	if (!gendisk) {
-		dev_err(&dev->core, "alloc_disk failed\n");
-		error = -ENOMEM;
-		goto fail_cleanup_queue;
-	}
-
 	priv->gendisk = gendisk;
 	gendisk->major = ps3vram_major;
-	gendisk->first_minor = 0;
+	gendisk->minors = 1;
 	gendisk->fops = &ps3vram_fops;
-	gendisk->queue = queue;
 	gendisk->private_data = dev;
 	strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
 	set_capacity(gendisk, priv->size >> 9);
+	blk_queue_max_segments(gendisk->queue, BLK_MAX_SEGMENTS);
+	blk_queue_max_segment_size(gendisk->queue, BLK_MAX_SEGMENT_SIZE);
+	blk_queue_max_hw_sectors(gendisk->queue, BLK_SAFE_MAX_SECTORS);
 
 	dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n",
 		 gendisk->disk_name, get_capacity(gendisk) >> 11);
@@ -770,8 +758,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 	device_add_disk(&dev->core, gendisk, NULL);
 	return 0;
 
-fail_cleanup_queue:
-	blk_cleanup_queue(queue);
 out_cache_cleanup:
 	remove_proc_entry(DEVICE_NAME, NULL);
 	ps3vram_cache_cleanup(dev);
@@ -802,8 +788,7 @@ static void ps3vram_remove(struct ps3_system_bus_device *dev)
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 
 	del_gendisk(priv->gendisk);
-	put_disk(priv->gendisk);
-	blk_cleanup_queue(priv->queue);
+	blk_cleanup_disk(priv->gendisk);
 	remove_proc_entry(DEVICE_NAME, NULL);
 	ps3vram_cache_cleanup(dev);
 	iounmap(priv->reports);

From 0692ef289f067d1de416cd33c9cf8d7e006293dc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:13 +0200
Subject: [PATCH 0778/3292] dcssblk: convert to blk_alloc_disk/blk_cleanup_disk

Convert the dcssblk driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-24-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/s390/block/dcssblk.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index da33cb4cba28..7faa56399999 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -90,7 +90,6 @@ struct dcssblk_dev_info {
 	int segment_type;
 	unsigned char save_pending;
 	unsigned char is_shared;
-	struct request_queue *dcssblk_queue;
 	int num_of_segments;
 	struct list_head seg_list;
 	struct dax_device *dax_dev;
@@ -429,9 +428,7 @@ removeseg:
 	kill_dax(dev_info->dax_dev);
 	put_dax(dev_info->dax_dev);
 	del_gendisk(dev_info->gd);
-	blk_cleanup_queue(dev_info->dcssblk_queue);
-	dev_info->gd->queue = NULL;
-	put_disk(dev_info->gd);
+	blk_cleanup_disk(dev_info->gd);
 	up_write(&dcssblk_devices_sem);
 
 	if (device_remove_file_self(dev, attr)) {
@@ -644,18 +641,17 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	dev_info->dev.release = dcssblk_release_segment;
 	dev_info->dev.groups = dcssblk_dev_attr_groups;
 	INIT_LIST_HEAD(&dev_info->lh);
-	dev_info->gd = alloc_disk(DCSSBLK_MINORS_PER_DISK);
+	dev_info->gd = blk_alloc_disk(NUMA_NO_NODE);
 	if (dev_info->gd == NULL) {
 		rc = -ENOMEM;
 		goto seg_list_del;
 	}
 	dev_info->gd->major = dcssblk_major;
+	dev_info->gd->minors = DCSSBLK_MINORS_PER_DISK;
 	dev_info->gd->fops = &dcssblk_devops;
-	dev_info->dcssblk_queue = blk_alloc_queue(NUMA_NO_NODE);
-	dev_info->gd->queue = dev_info->dcssblk_queue;
 	dev_info->gd->private_data = dev_info;
-	blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
-	blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
+	blk_queue_logical_block_size(dev_info->gd->queue, 4096);
+	blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue);
 
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
 	set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
@@ -719,9 +715,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 
 put_dev:
 	list_del(&dev_info->lh);
-	blk_cleanup_queue(dev_info->dcssblk_queue);
-	dev_info->gd->queue = NULL;
-	put_disk(dev_info->gd);
+	blk_cleanup_disk(dev_info->gd);
 	list_for_each_entry(seg_info, &dev_info->seg_list, lh) {
 		segment_unload(seg_info->segment_name);
 	}
@@ -731,9 +725,7 @@ put_dev:
 dev_list_del:
 	list_del(&dev_info->lh);
 release_gd:
-	blk_cleanup_queue(dev_info->dcssblk_queue);
-	dev_info->gd->queue = NULL;
-	put_disk(dev_info->gd);
+	blk_cleanup_disk(dev_info->gd);
 	up_write(&dcssblk_devices_sem);
 seg_list_del:
 	if (dev_info == NULL)
@@ -801,9 +793,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
 	kill_dax(dev_info->dax_dev);
 	put_dax(dev_info->dax_dev);
 	del_gendisk(dev_info->gd);
-	blk_cleanup_queue(dev_info->dcssblk_queue);
-	dev_info->gd->queue = NULL;
-	put_disk(dev_info->gd);
+	blk_cleanup_disk(dev_info->gd);
 
 	/* unload all related segments */
 	list_for_each_entry(entry, &dev_info->seg_list, lh)

From ef35885400481b46ede9cbdcc1fce902e06002c5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:14 +0200
Subject: [PATCH 0779/3292] xpram: convert to blk_alloc_disk/blk_cleanup_disk

Convert the xpram driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-25-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/s390/block/xpram.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index d1ed39162943..91ef710edfd2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -56,7 +56,6 @@ typedef struct {
 static xpram_device_t xpram_devices[XPRAM_MAX_DEVS];
 static unsigned int xpram_sizes[XPRAM_MAX_DEVS];
 static struct gendisk *xpram_disks[XPRAM_MAX_DEVS];
-static struct request_queue *xpram_queues[XPRAM_MAX_DEVS];
 static unsigned int xpram_pages;
 static int xpram_devs;
 
@@ -341,17 +340,13 @@ static int __init xpram_setup_blkdev(void)
 	int i, rc = -ENOMEM;
 
 	for (i = 0; i < xpram_devs; i++) {
-		xpram_disks[i] = alloc_disk(1);
+		xpram_disks[i] = blk_alloc_disk(NUMA_NO_NODE);
 		if (!xpram_disks[i])
 			goto out;
-		xpram_queues[i] = blk_alloc_queue(NUMA_NO_NODE);
-		if (!xpram_queues[i]) {
-			put_disk(xpram_disks[i]);
-			goto out;
-		}
-		blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]);
-		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
-		blk_queue_logical_block_size(xpram_queues[i], 4096);
+		blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_disks[i]->queue);
+		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM,
+				xpram_disks[i]->queue);
+		blk_queue_logical_block_size(xpram_disks[i]->queue, 4096);
 	}
 
 	/*
@@ -373,9 +368,9 @@ static int __init xpram_setup_blkdev(void)
 		offset += xpram_devices[i].size;
 		disk->major = XPRAM_MAJOR;
 		disk->first_minor = i;
+		disk->minors = 1;
 		disk->fops = &xpram_devops;
 		disk->private_data = &xpram_devices[i];
-		disk->queue = xpram_queues[i];
 		sprintf(disk->disk_name, "slram%d", i);
 		set_capacity(disk, xpram_sizes[i] << 1);
 		add_disk(disk);
@@ -383,10 +378,8 @@ static int __init xpram_setup_blkdev(void)
 
 	return 0;
 out:
-	while (i--) {
-		blk_cleanup_queue(xpram_queues[i]);
-		put_disk(xpram_disks[i]);
-	}
+	while (i--)
+		blk_cleanup_disk(xpram_disks[i]);
 	return rc;
 }
 
@@ -434,8 +427,7 @@ static void __exit xpram_exit(void)
 	int i;
 	for (i = 0; i < xpram_devs; i++) {
 		del_gendisk(xpram_disks[i]);
-		blk_cleanup_queue(xpram_queues[i]);
-		put_disk(xpram_disks[i]);
+		blk_cleanup_disk(xpram_disks[i]);
 	}
 	unregister_blkdev(XPRAM_MAJOR, XPRAM_NAME);
 	platform_device_unregister(xpram_pdev);

From 132226b301b545198515fb8c6b7f537c13b71f4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:15 +0200
Subject: [PATCH 0780/3292] null_blk: convert to
 blk_alloc_disk/blk_cleanup_disk

Convert the null_blk driver to use the blk_alloc_disk and blk_cleanup_disk
helpers to simplify gendisk and request_queue allocation.  Note that the
blk-mq mode is left with its own allocations scheme, to be handled later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-26-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 38 +++++++++++++++++------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 5f006d9e1472..d8e098f1e5b5 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1597,11 +1597,10 @@ static void null_del_dev(struct nullb *nullb)
 		null_restart_queue_async(nullb);
 	}
 
-	blk_cleanup_queue(nullb->q);
+	blk_cleanup_disk(nullb->disk);
 	if (dev->queue_mode == NULL_Q_MQ &&
 	    nullb->tag_set == &nullb->__tag_set)
 		blk_mq_free_tag_set(nullb->tag_set);
-	put_disk(nullb->disk);
 	cleanup_queues(nullb);
 	if (null_cache_active(nullb))
 		null_free_device_storage(nullb->dev, true);
@@ -1700,22 +1699,19 @@ static int init_driver_queues(struct nullb *nullb)
 static int null_gendisk_register(struct nullb *nullb)
 {
 	sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
-	struct gendisk *disk;
+	struct gendisk *disk = nullb->disk;
 
-	disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
-	if (!disk)
-		return -ENOMEM;
 	set_capacity(disk, size);
 
 	disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
 	disk->major		= null_major;
 	disk->first_minor	= nullb->index;
+	disk->minors		= 1;
 	if (queue_is_mq(nullb->q))
 		disk->fops		= &null_rq_ops;
 	else
 		disk->fops		= &null_bio_ops;
 	disk->private_data	= nullb;
-	disk->queue		= nullb->q;
 	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
 
 	if (nullb->dev->zoned) {
@@ -1851,23 +1847,27 @@ static int null_add_dev(struct nullb_device *dev)
 			goto out_cleanup_queues;
 
 		if (!null_setup_fault())
-			goto out_cleanup_queues;
+			goto out_cleanup_tags;
 
+		rv = -ENOMEM;
 		nullb->tag_set->timeout = 5 * HZ;
 		nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
-		if (IS_ERR(nullb->q)) {
-			rv = -ENOMEM;
+		if (IS_ERR(nullb->q))
 			goto out_cleanup_tags;
-		}
+		nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
+		if (!nullb->disk)
+			goto out_cleanup_disk;
+		nullb->disk->queue = nullb->q;
 	} else if (dev->queue_mode == NULL_Q_BIO) {
-		nullb->q = blk_alloc_queue(dev->home_node);
-		if (!nullb->q) {
-			rv = -ENOMEM;
+		rv = -ENOMEM;
+		nullb->disk = blk_alloc_disk(nullb->dev->home_node);
+		if (!nullb->disk)
 			goto out_cleanup_queues;
-		}
+
+		nullb->q = nullb->disk->queue;
 		rv = init_driver_queues(nullb);
 		if (rv)
-			goto out_cleanup_blk_queue;
+			goto out_cleanup_disk;
 	}
 
 	if (dev->mbps) {
@@ -1883,7 +1883,7 @@ static int null_add_dev(struct nullb_device *dev)
 	if (dev->zoned) {
 		rv = null_init_zoned_dev(dev, nullb->q);
 		if (rv)
-			goto out_cleanup_blk_queue;
+			goto out_cleanup_disk;
 	}
 
 	nullb->q->queuedata = nullb;
@@ -1921,8 +1921,8 @@ static int null_add_dev(struct nullb_device *dev)
 	return 0;
 out_cleanup_zone:
 	null_free_zoned_dev(dev);
-out_cleanup_blk_queue:
-	blk_cleanup_queue(nullb->q);
+out_cleanup_disk:
+	blk_cleanup_disk(nullb->disk);
 out_cleanup_tags:
 	if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
 		blk_mq_free_tag_set(nullb->tag_set);

From da7ba72960ca2a9b968e47fcf414d16f3d4c0c42 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 May 2021 07:51:16 +0200
Subject: [PATCH 0781/3292] block: unexport blk_alloc_queue

blk_alloc_queue is just an internal helper now, unexport it and remove
it from the public header.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210521055116.1053587-27-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 1 -
 block/blk.h            | 2 ++
 include/linux/blkdev.h | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 689aac2625d2..3515a66022d7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -599,7 +599,6 @@ fail_q:
 	kmem_cache_free(blk_requestq_cachep, q);
 	return NULL;
 }
-EXPORT_SYMBOL(blk_alloc_queue);
 
 /**
  * blk_get_queue - increment the request_queue refcount
diff --git a/block/blk.h b/block/blk.h
index cba3a94aabfa..3440142f029b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -359,4 +359,6 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
 		unsigned int max_sectors, bool *same_page);
 
+struct request_queue *blk_alloc_queue(int node_id);
+
 #endif /* BLK_INTERNAL_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c28577b50f4..d66d0da72529 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1213,7 +1213,6 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 extern void blk_dump_rq_flags(struct request *, char *);
 
 bool __must_check blk_get_queue(struct request_queue *);
-struct request_queue *blk_alloc_queue(int node_id);
 extern void blk_put_queue(struct request_queue *);
 extern void blk_set_queue_dying(struct request_queue *);
 

From 362529d9280af4b08d2c25a4b39b8e5ae7658f9a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:12:54 +0200
Subject: [PATCH 0782/3292] block: split __blkdev_get

Split __blkdev_get into one helper for the whole device, and one for
opening partitions.  This removes the (bounded) recursion when opening
a partition.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20210525061301.2242282-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 126 ++++++++++++++++++++++++-------------------------
 1 file changed, 61 insertions(+), 65 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6cc4d4cfe0c2..2b5073e3c923 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1290,78 +1290,68 @@ rescan:
  */
 EXPORT_SYMBOL_GPL(bdev_disk_changed);
 
-/*
- * bd_mutex locking:
- *
- *  mutex_lock(part->bd_mutex)
- *    mutex_lock_nested(whole->bd_mutex, 1)
- */
-static int __blkdev_get(struct block_device *bdev, fmode_t mode)
+static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	int ret = 0;
 
-	if (!(disk->flags & GENHD_FL_UP))
-		return -ENXIO;
-
-	if (!bdev->bd_openers) {
-		if (!bdev_is_partition(bdev)) {
-			ret = 0;
-			if (disk->fops->open)
-				ret = disk->fops->open(bdev, mode);
-
-			if (!ret)
-				set_init_blocksize(bdev);
-
-			/*
-			 * If the device is invalidated, rescan partition
-			 * if open succeeded or failed with -ENOMEDIUM.
-			 * The latter is necessary to prevent ghost
-			 * partitions on a removed medium.
-			 */
-			if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
-			    (!ret || ret == -ENOMEDIUM))
-				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
-
-			if (ret)
-				return ret;
-		} else {
-			struct block_device *whole = bdgrab(disk->part0);
-
-			mutex_lock_nested(&whole->bd_mutex, 1);
-			ret = __blkdev_get(whole, mode);
-			if (ret) {
-				mutex_unlock(&whole->bd_mutex);
-				bdput(whole);
-				return ret;
-			}
-			whole->bd_part_count++;
-			mutex_unlock(&whole->bd_mutex);
-
-			if (!bdev_nr_sectors(bdev)) {
-				__blkdev_put(whole, mode, 1);
-				bdput(whole);
-				return -ENXIO;
-			}
-			set_init_blocksize(bdev);
-		}
-
-		if (bdev->bd_bdi == &noop_backing_dev_info)
-			bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
-	} else {
-		if (!bdev_is_partition(bdev)) {
-			if (bdev->bd_disk->fops->open)
-				ret = bdev->bd_disk->fops->open(bdev, mode);
-			/* the same as first opener case, read comment there */
-			if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
-			    (!ret || ret == -ENOMEDIUM))
-				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
-			if (ret)
-				return ret;
+	if (disk->fops->open) {
+		ret = disk->fops->open(bdev, mode);
+		if (ret) {
+			/* avoid ghost partitions on a removed medium */
+			if (ret == -ENOMEDIUM &&
+			     test_bit(GD_NEED_PART_SCAN, &disk->state))
+				bdev_disk_changed(bdev, true);
+			return ret;
 		}
 	}
+
+	if (!bdev->bd_openers) {
+		set_init_blocksize(bdev);
+		if (bdev->bd_bdi == &noop_backing_dev_info)
+			bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
+	}
+	if (test_bit(GD_NEED_PART_SCAN, &disk->state))
+		bdev_disk_changed(bdev, false);
 	bdev->bd_openers++;
+	return 0;;
+}
+
+static int blkdev_get_part(struct block_device *part, fmode_t mode)
+{
+	struct gendisk *disk = part->bd_disk;
+	struct block_device *whole;
+	int ret;
+
+	if (part->bd_openers)
+		goto done;
+
+	whole = bdgrab(disk->part0);
+	mutex_lock_nested(&whole->bd_mutex, 1);
+	ret = blkdev_get_whole(whole, mode);
+	if (ret) {
+		mutex_unlock(&whole->bd_mutex);
+		goto out_put_whole;
+	}
+	whole->bd_part_count++;
+	mutex_unlock(&whole->bd_mutex);
+
+	ret = -ENXIO;
+	if (!bdev_nr_sectors(part))
+		goto out_blkdev_put;
+
+	set_init_blocksize(part);
+	if (part->bd_bdi == &noop_backing_dev_info)
+		part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
+done:
+	part->bd_openers++;
 	return 0;
+
+out_blkdev_put:
+	__blkdev_put(whole, mode, 1);
+out_put_whole:
+	bdput(whole);
+	return ret;
 }
 
 struct block_device *blkdev_get_no_open(dev_t dev)
@@ -1448,7 +1438,13 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 	disk_block_events(disk);
 
 	mutex_lock(&bdev->bd_mutex);
-	ret =__blkdev_get(bdev, mode);
+	ret = -ENXIO;
+	if (!(disk->flags & GENHD_FL_UP))
+		goto abort_claiming;
+	if (bdev_is_partition(bdev))
+		ret = blkdev_get_part(bdev, mode);
+	else
+		ret = blkdev_get_whole(bdev, mode);
 	if (ret)
 		goto abort_claiming;
 	if (mode & FMODE_EXCL) {

From 210a6d756f20f33fc546ec8682a538fbcb84ee8e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:12:55 +0200
Subject: [PATCH 0783/3292] block: move sync_blockdev from __blkdev_put to
 blkdev_put

Do the early unlocked syncing even earlier to move more code out of
the recursive path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20210525061301.2242282-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2b5073e3c923..41d2d9708bf8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1552,16 +1552,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	struct gendisk *disk = bdev->bd_disk;
 	struct block_device *victim = NULL;
 
-	/*
-	 * Sync early if it looks like we're the last one.  If someone else
-	 * opens the block device between now and the decrement of bd_openers
-	 * then we did a sync that we didn't need to, but that's not the end
-	 * of the world and we want to avoid long (could be several minute)
-	 * syncs while holding the mutex.
-	 */
-	if (bdev->bd_openers == 1)
-		sync_blockdev(bdev);
-
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (for_part)
 		bdev->bd_part_count--;
@@ -1588,6 +1578,16 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
 
+	/*
+	 * Sync early if it looks like we're the last one.  If someone else
+	 * opens the block device between now and the decrement of bd_openers
+	 * then we did a sync that we didn't need to, but that's not the end
+	 * of the world and we want to avoid long (could be several minute)
+	 * syncs while holding the mutex.
+	 */
+	if (bdev->bd_openers == 1)
+		sync_blockdev(bdev);
+
 	mutex_lock(&bdev->bd_mutex);
 
 	if (mode & FMODE_EXCL) {

From a8698707a1835be3abd12a3b28079a80999f8dee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:12:56 +0200
Subject: [PATCH 0784/3292] block: move bd_mutex to struct gendisk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the per-block device bd_mutex with a per-gendisk open_mutex,
thus simplifying locking wherever we deal with partitions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Link: https://lore.kernel.org/r/20210525061301.2242282-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/filesystems/locking.rst |  2 +-
 block/genhd.c                         |  7 ++---
 block/partitions/core.c               | 24 ++++++++---------
 drivers/block/loop.c                  | 14 +++++-----
 drivers/block/xen-blkfront.c          |  8 +++---
 drivers/block/zram/zram_drv.c         | 18 ++++++-------
 drivers/block/zram/zram_drv.h         |  2 +-
 drivers/md/md.h                       |  6 ++---
 drivers/s390/block/dasd_genhd.c       |  8 +++---
 drivers/scsi/sd.c                     |  4 +--
 fs/block_dev.c                        | 37 +++++++++++----------------
 fs/btrfs/volumes.c                    |  2 +-
 fs/super.c                            |  8 +++---
 include/linux/blk_types.h             |  1 -
 include/linux/genhd.h                 |  3 +++
 15 files changed, 68 insertions(+), 76 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 1e894480115b..2183fd8cc350 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -480,7 +480,7 @@ prototypes::
 locking rules:
 
 ======================= ===================
-ops			bd_mutex
+ops			open_mutex
 ======================= ===================
 open:			yes
 release:		yes
diff --git a/block/genhd.c b/block/genhd.c
index efe0db4d62f0..38d136a19484 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -591,10 +591,10 @@ void del_gendisk(struct gendisk *disk)
 	blk_integrity_del(disk);
 	disk_del_events(disk);
 
-	mutex_lock(&disk->part0->bd_mutex);
+	mutex_lock(&disk->open_mutex);
 	disk->flags &= ~GENHD_FL_UP;
 	blk_drop_partitions(disk);
-	mutex_unlock(&disk->part0->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 
 	fsync_bdev(disk->part0);
 	__invalidate_device(disk->part0, true);
@@ -1273,6 +1273,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 		goto out_free_disk;
 
 	disk->node_id = node_id;
+	mutex_init(&disk->open_mutex);
 	xa_init(&disk->part_tbl);
 	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
 		goto out_destroy_part_tbl;
@@ -1525,7 +1526,7 @@ void disk_unblock_events(struct gendisk *disk)
  * doesn't clear the events from @disk->ev.
  *
  * CONTEXT:
- * If @mask is non-zero must be called with bdev->bd_mutex held.
+ * If @mask is non-zero must be called with disk->open_mutex held.
  */
 void disk_flush_events(struct gendisk *disk, unsigned int mask)
 {
diff --git a/block/partitions/core.c b/block/partitions/core.c
index ada3e1e66989..4fde8e0dd7cd 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -283,7 +283,7 @@ struct device_type part_type = {
 };
 
 /*
- * Must be called either with bd_mutex held, before a disk can be opened or
+ * Must be called either with open_mutex held, before a disk can be opened or
  * after all disk users are gone.
  */
 static void delete_partition(struct block_device *part)
@@ -312,7 +312,7 @@ static ssize_t whole_disk_show(struct device *dev,
 static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
 
 /*
- * Must be called either with bd_mutex held, before a disk can be opened or
+ * Must be called either with open_mutex held, before a disk can be opened or
  * after all disk users are gone.
  */
 static struct block_device *add_partition(struct gendisk *disk, int partno,
@@ -453,15 +453,15 @@ int bdev_add_partition(struct block_device *bdev, int partno,
 {
 	struct block_device *part;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 		return -EBUSY;
 	}
 
 	part = add_partition(bdev->bd_disk, partno, start, length,
 			ADDPART_FLAG_NONE, NULL);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	return PTR_ERR_OR_ZERO(part);
 }
 
@@ -474,8 +474,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
 	if (!part)
 		return -ENXIO;
 
-	mutex_lock(&part->bd_mutex);
-	mutex_lock_nested(&bdev->bd_mutex, 1);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	ret = -EBUSY;
 	if (part->bd_openers)
@@ -484,8 +483,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
 	delete_partition(part);
 	ret = 0;
 out_unlock:
-	mutex_unlock(&bdev->bd_mutex);
-	mutex_unlock(&part->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	bdput(part);
 	return ret;
 }
@@ -500,8 +498,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
 	if (!part)
 		return -ENXIO;
 
-	mutex_lock(&part->bd_mutex);
-	mutex_lock_nested(&bdev->bd_mutex, 1);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	ret = -EINVAL;
 	if (start != part->bd_start_sect)
 		goto out_unlock;
@@ -514,8 +511,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
 
 	ret = 0;
 out_unlock:
-	mutex_unlock(&part->bd_mutex);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	bdput(part);
 	return ret;
 }
@@ -541,7 +537,7 @@ void blk_drop_partitions(struct gendisk *disk)
 	struct block_device *part;
 	unsigned long idx;
 
-	lockdep_assert_held(&disk->part0->bd_mutex);
+	lockdep_assert_held(&disk->open_mutex);
 
 	xa_for_each_start(&disk->part_tbl, idx, part, 1) {
 		if (!bdgrab(part))
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d58d68f3c7cd..95c570f5923f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -652,9 +652,9 @@ static void loop_reread_partitions(struct loop_device *lo,
 {
 	int rc;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	rc = bdev_disk_changed(bdev, false);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	if (rc)
 		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
 			__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -747,7 +747,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	mutex_unlock(&lo->lo_mutex);
 	/*
 	 * We must drop file reference outside of lo_mutex as dropping
-	 * the file ref can take bd_mutex which creates circular locking
+	 * the file ref can take open_mutex which creates circular locking
 	 * dependency.
 	 */
 	fput(old_file);
@@ -1260,7 +1260,7 @@ out_unlock:
 	mutex_unlock(&lo->lo_mutex);
 	if (partscan) {
 		/*
-		 * bd_mutex has been held already in release path, so don't
+		 * open_mutex has been held already in release path, so don't
 		 * acquire it if this function is called in such case.
 		 *
 		 * If the reread partition isn't from release path, lo_refcnt
@@ -1268,10 +1268,10 @@ out_unlock:
 		 * current holder is released.
 		 */
 		if (!release)
-			mutex_lock(&bdev->bd_mutex);
+			mutex_lock(&bdev->bd_disk->open_mutex);
 		err = bdev_disk_changed(bdev, false);
 		if (!release)
-			mutex_unlock(&bdev->bd_mutex);
+			mutex_unlock(&bdev->bd_disk->open_mutex);
 		if (err)
 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
 				__func__, lo_number, err);
@@ -1298,7 +1298,7 @@ out_unlock:
 	/*
 	 * Need not hold lo_mutex to fput backing file. Calling fput holding
 	 * lo_mutex triggers a circular lock dependency possibility warning as
-	 * fput can take bd_mutex which is usually taken before lo_mutex.
+	 * fput can take open_mutex which is usually taken before lo_mutex.
 	 */
 	if (filp)
 		fput(filp);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 10df39a8b18d..f2c1aedcdf5a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2163,7 +2163,7 @@ static void blkfront_closing(struct blkfront_info *info)
 		return;
 	}
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	if (bdev->bd_openers) {
 		xenbus_dev_error(xbdev, -EBUSY,
@@ -2174,7 +2174,7 @@ static void blkfront_closing(struct blkfront_info *info)
 		xenbus_frontend_closed(xbdev);
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	bdput(bdev);
 }
 
@@ -2531,7 +2531,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
 	 * isn't closed yet, we let release take care of it.
 	 */
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&disk->open_mutex);
 	info = disk->private_data;
 
 	dev_warn(disk_to_dev(disk),
@@ -2546,7 +2546,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
 		mutex_unlock(&blkfront_mutex);
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 	bdput(bdev);
 
 	return 0;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 006416cc4969..fcaf2750f68f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1781,24 +1781,24 @@ static ssize_t reset_store(struct device *dev,
 	zram = dev_to_zram(dev);
 	bdev = zram->disk->part0;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	/* Do not reset an active device or claimed device */
 	if (bdev->bd_openers || zram->claim) {
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 		return -EBUSY;
 	}
 
 	/* From now on, anyone can't open /dev/zram[0-9] */
 	zram->claim = true;
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	/* Make sure all the pending I/O are finished */
 	fsync_bdev(bdev);
 	zram_reset_device(zram);
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	zram->claim = false;
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	return len;
 }
@@ -1808,7 +1808,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
 	int ret = 0;
 	struct zram *zram;
 
-	WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
+	WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
 
 	zram = bdev->bd_disk->private_data;
 	/* zram was claimed to reset so open request fails */
@@ -1972,14 +1972,14 @@ static int zram_remove(struct zram *zram)
 {
 	struct block_device *bdev = zram->disk->part0;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	if (bdev->bd_openers || zram->claim) {
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 		return -EBUSY;
 	}
 
 	zram->claim = true;
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	zram_debugfs_unregister(zram);
 
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 419a7e8281ee..74c411911b6e 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -112,7 +112,7 @@ struct zram {
 	/*
 	 * zram is claimed so open request will be failed
 	 */
-	bool claim; /* Protected by bdev->bd_mutex */
+	bool claim; /* Protected by disk->open_mutex */
 	struct file *backing_dev;
 #ifdef CONFIG_ZRAM_WRITEBACK
 	spinlock_t wb_limit_lock;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fb7eab58cfd5..a88086d4110c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -395,10 +395,10 @@ struct mddev {
 	 * that we are never stopping an array while it is open.
 	 * 'reconfig_mutex' protects all other reconfiguration.
 	 * These locks are separate due to conflicting interactions
-	 * with bdev->bd_mutex.
+	 * with disk->open_mutex.
 	 * Lock ordering is:
-	 *  reconfig_mutex -> bd_mutex
-	 *  bd_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
+	 *  reconfig_mutex -> disk->open_mutex
+	 *  disk->open_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
 	 */
 	struct mutex			open_mutex;
 	struct mutex			reconfig_mutex;
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 8d6587ec73e2..bf2082d461c7 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -109,9 +109,9 @@ int dasd_scan_partitions(struct dasd_block *block)
 		return -ENODEV;
 	}
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&block->gdp->open_mutex);
 	rc = bdev_disk_changed(bdev, false);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&block->gdp->open_mutex);
 	if (rc)
 		DBF_DEV_EVENT(DBF_ERR, block->base,
 				"scan partitions error, rc %d", rc);
@@ -145,9 +145,9 @@ void dasd_destroy_partitions(struct dasd_block *block)
 	bdev = block->bdev;
 	block->bdev = NULL;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	bdev_disk_changed(bdev, true);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
 	blkdev_put(bdev, FMODE_READ);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cb3c37d1e009..d3ff723af879 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1400,7 +1400,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
  *	In the latter case @inode and @filp carry an abridged amount
  *	of information as noted above.
  *
- *	Locking: called with bdev->bd_mutex held.
+ *	Locking: called with bdev->bd_disk->open_mutex held.
  **/
 static int sd_open(struct block_device *bdev, fmode_t mode)
 {
@@ -1476,7 +1476,7 @@ error_out:
  *	Note: may block (uninterruptible) if error recovery is underway
  *	on this disk.
  *
- *	Locking: called with bdev->bd_mutex held.
+ *	Locking: called with bdev->bd_disk->open_mutex held.
  **/
 static void sd_release(struct gendisk *disk, fmode_t mode)
 {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 41d2d9708bf8..e094806c3a0c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -895,7 +895,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	mapping_set_gfp_mask(&inode->i_data, GFP_USER);
 
 	bdev = I_BDEV(inode);
-	mutex_init(&bdev->bd_mutex);
 	mutex_init(&bdev->bd_fsfreeze_mutex);
 	spin_lock_init(&bdev->bd_size_lock);
 	bdev->bd_disk = disk;
@@ -1154,7 +1153,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	struct bd_holder_disk *holder;
 	int ret = 0;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	WARN_ON_ONCE(!bdev->bd_holder);
 
@@ -1199,7 +1198,7 @@ out_del:
 out_free:
 	kfree(holder);
 out_unlock:
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bd_link_disk_holder);
@@ -1218,7 +1217,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 {
 	struct bd_holder_disk *holder;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	holder = bd_find_holder_disk(bdev, disk);
 
@@ -1230,7 +1229,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 		kfree(holder);
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 }
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
@@ -1242,7 +1241,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 	struct gendisk *disk = bdev->bd_disk;
 	int ret = 0;
 
-	lockdep_assert_held(&bdev->bd_mutex);
+	lockdep_assert_held(&disk->open_mutex);
 
 	if (!(disk->flags & GENHD_FL_UP))
 		return -ENXIO;
@@ -1327,14 +1326,10 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 		goto done;
 
 	whole = bdgrab(disk->part0);
-	mutex_lock_nested(&whole->bd_mutex, 1);
 	ret = blkdev_get_whole(whole, mode);
-	if (ret) {
-		mutex_unlock(&whole->bd_mutex);
+	if (ret)
 		goto out_put_whole;
-	}
 	whole->bd_part_count++;
-	mutex_unlock(&whole->bd_mutex);
 
 	ret = -ENXIO;
 	if (!bdev_nr_sectors(part))
@@ -1437,7 +1432,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 
 	disk_block_events(disk);
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&disk->open_mutex);
 	ret = -ENXIO;
 	if (!(disk->flags & GENHD_FL_UP))
 		goto abort_claiming;
@@ -1463,7 +1458,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 			unblock_events = false;
 		}
 	}
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 
 	if (unblock_events)
 		disk_unblock_events(disk);
@@ -1472,7 +1467,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 abort_claiming:
 	if (mode & FMODE_EXCL)
 		bd_abort_claiming(bdev, holder);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 	disk_unblock_events(disk);
 put_blkdev:
 	blkdev_put_no_open(bdev);
@@ -1552,7 +1547,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	struct gendisk *disk = bdev->bd_disk;
 	struct block_device *victim = NULL;
 
-	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (for_part)
 		bdev->bd_part_count--;
 
@@ -1567,7 +1561,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 
 	if (!bdev_is_partition(bdev) && disk->fops->release)
 		disk->fops->release(disk, mode);
-	mutex_unlock(&bdev->bd_mutex);
 	if (victim) {
 		__blkdev_put(victim, mode, 1);
 		bdput(victim);
@@ -1588,15 +1581,14 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	if (bdev->bd_openers == 1)
 		sync_blockdev(bdev);
 
-	mutex_lock(&bdev->bd_mutex);
-
+	mutex_lock(&disk->open_mutex);
 	if (mode & FMODE_EXCL) {
 		struct block_device *whole = bdev_whole(bdev);
 		bool bdev_free;
 
 		/*
 		 * Release a claim on the device.  The holder fields
-		 * are protected with bdev_lock.  bd_mutex is to
+		 * are protected with bdev_lock.  open_mutex is to
 		 * synchronize disk_holder unlinking.
 		 */
 		spin_lock(&bdev_lock);
@@ -1627,9 +1619,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	 * from userland - e.g. eject(1).
 	 */
 	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
-	mutex_unlock(&bdev->bd_mutex);
 
 	__blkdev_put(bdev, mode, 0);
+	mutex_unlock(&disk->open_mutex);
+
 	blkdev_put_no_open(bdev);
 }
 EXPORT_SYMBOL(blkdev_put);
@@ -1936,10 +1929,10 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 		old_inode = inode;
 		bdev = I_BDEV(inode);
 
-		mutex_lock(&bdev->bd_mutex);
+		mutex_lock(&bdev->bd_disk->open_mutex);
 		if (bdev->bd_openers)
 			func(bdev, arg);
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 
 		spin_lock(&blockdev_superblock->s_inode_list_lock);
 	}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 47d27059d064..f246eb2772e9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1247,7 +1247,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 	lockdep_assert_held(&uuid_mutex);
 	/*
 	 * The device_list_mutex cannot be taken here in case opening the
-	 * underlying device takes further locks like bd_mutex.
+	 * underlying device takes further locks like open_mutex.
 	 *
 	 * We also don't need the lock here as this is called during mount and
 	 * exclusion is provided by uuid_mutex
diff --git a/fs/super.c b/fs/super.c
index 11b7e7213fd1..91b7f156735b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1277,9 +1277,9 @@ int get_tree_bdev(struct fs_context *fc,
 		}
 
 		/*
-		 * s_umount nests inside bd_mutex during
+		 * s_umount nests inside open_mutex during
 		 * __invalidate_device().  blkdev_put() acquires
-		 * bd_mutex and can't be called under s_umount.  Drop
+		 * open_mutex and can't be called under s_umount.  Drop
 		 * s_umount temporarily.  This is safe as we're
 		 * holding an active reference.
 		 */
@@ -1352,9 +1352,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		}
 
 		/*
-		 * s_umount nests inside bd_mutex during
+		 * s_umount nests inside open_mutex during
 		 * __invalidate_device().  blkdev_put() acquires
-		 * bd_mutex and can't be called under s_umount.  Drop
+		 * open_mutex and can't be called under s_umount.  Drop
 		 * s_umount temporarily.  This is safe as we're
 		 * holding an active reference.
 		 */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index db026b6ec15a..a09660671fa4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -29,7 +29,6 @@ struct block_device {
 	int			bd_openers;
 	struct inode *		bd_inode;	/* will die */
 	struct super_block *	bd_super;
-	struct mutex		bd_mutex;	/* open/close mutex */
 	void *			bd_claiming;
 	struct device		bd_device;
 	void *			bd_holder;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 782f0171d104..1fabb1559110 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -154,6 +154,9 @@ struct gendisk {
 #define GD_NEED_PART_SCAN		0
 #define GD_READ_ONLY			1
 #define GD_QUEUE_REF			2
+
+	struct mutex open_mutex;	/* open/close mutex */
+
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;

From e54069acac1a302c1adc26694963547f8b73c2b0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:12:57 +0200
Subject: [PATCH 0785/3292] block: move adjusting bd_part_count out of
 __blkdev_get

Keep in the callers and thus remove the for_part argument.  This mirrors
what is done on the blkdev_get side and slightly simplifies
blkdev_get_part as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@rehat.com>
Link: https://lore.kernel.org/r/20210525061301.2242282-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index e094806c3a0c..43dce929e7ee 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1234,7 +1234,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
+static void __blkdev_put(struct block_device *bdev, fmode_t mode);
 
 int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 {
@@ -1329,12 +1329,12 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	ret = blkdev_get_whole(whole, mode);
 	if (ret)
 		goto out_put_whole;
-	whole->bd_part_count++;
 
 	ret = -ENXIO;
 	if (!bdev_nr_sectors(part))
 		goto out_blkdev_put;
 
+	whole->bd_part_count++;
 	set_init_blocksize(part);
 	if (part->bd_bdi == &noop_backing_dev_info)
 		part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
@@ -1343,7 +1343,7 @@ done:
 	return 0;
 
 out_blkdev_put:
-	__blkdev_put(whole, mode, 1);
+	__blkdev_put(whole, mode);
 out_put_whole:
 	bdput(whole);
 	return ret;
@@ -1542,14 +1542,11 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
+static void __blkdev_put(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct block_device *victim = NULL;
 
-	if (for_part)
-		bdev->bd_part_count--;
-
 	if (!--bdev->bd_openers) {
 		WARN_ON_ONCE(bdev->bd_holders);
 		sync_blockdev(bdev);
@@ -1562,7 +1559,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	if (!bdev_is_partition(bdev) && disk->fops->release)
 		disk->fops->release(disk, mode);
 	if (victim) {
-		__blkdev_put(victim, mode, 1);
+		victim->bd_part_count--;
+		__blkdev_put(victim, mode);
 		bdput(victim);
 	}
 }
@@ -1620,7 +1618,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	 */
 	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
 
-	__blkdev_put(bdev, mode, 0);
+	__blkdev_put(bdev, mode);
 	mutex_unlock(&disk->open_mutex);
 
 	blkdev_put_no_open(bdev);

From c8276b954d2dacbabe587c0421a9344529af5bad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:12:58 +0200
Subject: [PATCH 0786/3292] block: split __blkdev_put

Split __blkdev_put into one helper for the whole device, and one for
partitions as well as another shared helper for flushing the block
device inode mapping.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210525061301.2242282-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 58 ++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 43dce929e7ee..cd45b54e86b4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1234,7 +1234,13 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode);
+static void blkdev_flush_mapping(struct block_device *bdev)
+{
+	WARN_ON_ONCE(bdev->bd_holders);
+	sync_blockdev(bdev);
+	kill_bdev(bdev);
+	bdev_write_inode(bdev);
+}
 
 int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 {
@@ -1316,6 +1322,14 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 	return 0;;
 }
 
+static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+{
+	if (!--bdev->bd_openers)
+		blkdev_flush_mapping(bdev);
+	if (bdev->bd_disk->fops->release)
+		bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+}
+
 static int blkdev_get_part(struct block_device *part, fmode_t mode)
 {
 	struct gendisk *disk = part->bd_disk;
@@ -1343,12 +1357,24 @@ done:
 	return 0;
 
 out_blkdev_put:
-	__blkdev_put(whole, mode);
+	blkdev_put_whole(whole, mode);
 out_put_whole:
 	bdput(whole);
 	return ret;
 }
 
+static void blkdev_put_part(struct block_device *part, fmode_t mode)
+{
+	struct block_device *whole = bdev_whole(part);
+
+	if (--part->bd_openers)
+		return;
+	blkdev_flush_mapping(part);
+	whole->bd_part_count--;
+	blkdev_put_whole(whole, mode);
+	bdput(whole);
+}
+
 struct block_device *blkdev_get_no_open(dev_t dev)
 {
 	struct block_device *bdev;
@@ -1542,29 +1568,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	struct block_device *victim = NULL;
-
-	if (!--bdev->bd_openers) {
-		WARN_ON_ONCE(bdev->bd_holders);
-		sync_blockdev(bdev);
-		kill_bdev(bdev);
-		bdev_write_inode(bdev);
-		if (bdev_is_partition(bdev))
-			victim = bdev_whole(bdev);
-	}
-
-	if (!bdev_is_partition(bdev) && disk->fops->release)
-		disk->fops->release(disk, mode);
-	if (victim) {
-		victim->bd_part_count--;
-		__blkdev_put(victim, mode);
-		bdput(victim);
-	}
-}
-
 void blkdev_put(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -1618,7 +1621,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	 */
 	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
 
-	__blkdev_put(bdev, mode);
+	if (bdev_is_partition(bdev))
+		blkdev_put_part(bdev, mode);
+	else
+		blkdev_put_whole(bdev, mode);
 	mutex_unlock(&disk->open_mutex);
 
 	blkdev_put_no_open(bdev);

From ab4b57057d744861f670b47b163209727b26418b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:12:59 +0200
Subject: [PATCH 0787/3292] block: move bd_part_count to struct gendisk

The bd_part_count value only makes sense for whole devices, so move it
to struct gendisk and give it a more descriptive name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210525061301.2242282-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/ioctl.c             | 2 +-
 fs/block_dev.c            | 6 +++---
 include/linux/blk_types.h | 3 ---
 include/linux/genhd.h     | 1 +
 4 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/block/ioctl.c b/block/ioctl.c
index 8ba1ed8defd0..24beec9ca9c9 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -89,7 +89,7 @@ static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	if (bdev->bd_part_count)
+	if (bdev->bd_disk->open_partitions)
 		return -EBUSY;
 
 	/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index cd45b54e86b4..ac9b3c158a77 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1253,7 +1253,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 		return -ENXIO;
 
 rescan:
-	if (bdev->bd_part_count)
+	if (disk->open_partitions)
 		return -EBUSY;
 	sync_blockdev(bdev);
 	invalidate_bdev(bdev);
@@ -1348,7 +1348,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	if (!bdev_nr_sectors(part))
 		goto out_blkdev_put;
 
-	whole->bd_part_count++;
+	disk->open_partitions++;
 	set_init_blocksize(part);
 	if (part->bd_bdi == &noop_backing_dev_info)
 		part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
@@ -1370,7 +1370,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
 	if (--part->bd_openers)
 		return;
 	blkdev_flush_mapping(part);
-	whole->bd_part_count--;
+	whole->bd_disk->open_partitions--;
 	blkdev_put_whole(whole, mode);
 	bdput(whole);
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a09660671fa4..fd3860d18d7e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,9 +39,6 @@ struct block_device {
 #endif
 	struct kobject		*bd_holder_dir;
 	u8			bd_partno;
-	/* number of times partitions within this device have been opened. */
-	unsigned		bd_part_count;
-
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
 	struct gendisk *	bd_disk;
 	struct backing_dev_info *bd_bdi;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 1fabb1559110..47d4605c0e7e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -156,6 +156,7 @@ struct gendisk {
 #define GD_QUEUE_REF			2
 
 	struct mutex open_mutex;	/* open/close mutex */
+	unsigned open_partitions;	/* number of open partitions */
 
 	struct kobject *slave_dir;
 

From c97d93c31e5734a16bfe663085ec91b8c9fb20f9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:13:00 +0200
Subject: [PATCH 0788/3292] block: factor out a part_devt helper

Add a helper to find the dev_t for a disk + partno tuple.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210525061301.2242282-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 25 +++++++++++++++++--------
 include/linux/genhd.h |  1 +
 init/do_mounts.c      | 10 ++--------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 38d136a19484..3f7b1c92c7f3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1227,6 +1227,19 @@ static int __init proc_genhd_init(void)
 module_init(proc_genhd_init);
 #endif /* CONFIG_PROC_FS */
 
+dev_t part_devt(struct gendisk *disk, u8 partno)
+{
+	struct block_device *part = bdget_disk(disk, partno);
+	dev_t devt = 0;
+
+	if (part) {
+		devt = part->bd_dev;
+		bdput(part);
+	}
+
+	return devt;
+}
+
 dev_t blk_lookup_devt(const char *name, int partno)
 {
 	dev_t devt = MKDEV(0, 0);
@@ -1236,7 +1249,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 	while ((dev = class_dev_iter_next(&iter))) {
 		struct gendisk *disk = dev_to_disk(dev);
-		struct block_device *part;
 
 		if (strcmp(dev_name(dev), name))
 			continue;
@@ -1247,13 +1259,10 @@ dev_t blk_lookup_devt(const char *name, int partno)
 			 */
 			devt = MKDEV(MAJOR(dev->devt),
 				     MINOR(dev->devt) + partno);
-			break;
-		}
-		part = bdget_disk(disk, partno);
-		if (part) {
-			devt = part->bd_dev;
-			bdput(part);
-			break;
+		} else {
+			devt = part_devt(disk, partno);
+			if (devt)
+				break;
 		}
 	}
 	class_dev_iter_exit(&iter);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 47d4605c0e7e..64a8431202b7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -333,6 +333,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
 }
 #endif /* CONFIG_SYSFS */
 
+dev_t part_devt(struct gendisk *disk, u8 partno);
 dev_t blk_lookup_devt(const char *name, int partno);
 void blk_request_module(dev_t devt);
 #ifdef CONFIG_BLOCK
diff --git a/init/do_mounts.c b/init/do_mounts.c
index a78e44ee6adb..74aede860de7 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -133,14 +133,8 @@ static dev_t devt_from_partuuid(const char *uuid_str)
 		 * Attempt to find the requested partition by adding an offset
 		 * to the partition number found by UUID.
 		 */
-		struct block_device *part;
-
-		part = bdget_disk(dev_to_disk(dev),
-				  dev_to_bdev(dev)->bd_partno + offset);
-		if (part) {
-			devt = part->bd_dev;
-			bdput(part);
-		}
+		devt = part_devt(dev_to_disk(dev),
+				 dev_to_bdev(dev)->bd_partno + offset);
 	} else {
 		devt = dev->devt;
 	}

From 0e0ccdecb3cff95a350b4364e7ebbaa754d0e47d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 May 2021 08:13:01 +0200
Subject: [PATCH 0789/3292] block: remove bdget_disk

Just opencode the xa_load in the callers, as none of them actually
needs a reference to the bdev.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210525061301.2242282-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c           | 35 +++++------------------------------
 block/partitions/core.c | 27 +++++++++++++--------------
 include/linux/genhd.h   |  1 -
 3 files changed, 18 insertions(+), 45 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 3f7b1c92c7f3..5f5628216295 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -676,32 +676,6 @@ void blk_request_module(dev_t devt)
 		request_module("block-major-%d", MAJOR(devt));
 }
 
-/**
- * bdget_disk - do bdget() by gendisk and partition number
- * @disk: gendisk of interest
- * @partno: partition number
- *
- * Find partition @partno from @disk, do bdget() on it.
- *
- * CONTEXT:
- * Don't care.
- *
- * RETURNS:
- * Resulting block_device on success, NULL on failure.
- */
-struct block_device *bdget_disk(struct gendisk *disk, int partno)
-{
-	struct block_device *bdev = NULL;
-
-	rcu_read_lock();
-	bdev = xa_load(&disk->part_tbl, partno);
-	if (bdev && !bdgrab(bdev))
-		bdev = NULL;
-	rcu_read_unlock();
-
-	return bdev;
-}
-
 /*
  * print a full list of all partitions - intended for places where the root
  * filesystem can't be mounted and thus to give the victim some idea of what
@@ -1229,13 +1203,14 @@ module_init(proc_genhd_init);
 
 dev_t part_devt(struct gendisk *disk, u8 partno)
 {
-	struct block_device *part = bdget_disk(disk, partno);
+	struct block_device *part;
 	dev_t devt = 0;
 
-	if (part) {
+	rcu_read_lock();
+	part = xa_load(&disk->part_tbl, partno);
+	if (part)
 		devt = part->bd_dev;
-		bdput(part);
-	}
+	rcu_read_unlock();
 
 	return devt;
 }
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 4fde8e0dd7cd..186d4fbd9f09 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -326,6 +326,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
 	const char *dname;
 	int err;
 
+	lockdep_assert_held(&disk->open_mutex);
+
 	if (partno >= disk_max_parts(disk))
 		return ERR_PTR(-EINVAL);
 
@@ -467,14 +469,13 @@ int bdev_add_partition(struct block_device *bdev, int partno,
 
 int bdev_del_partition(struct block_device *bdev, int partno)
 {
-	struct block_device *part;
-	int ret;
-
-	part = bdget_disk(bdev->bd_disk, partno);
-	if (!part)
-		return -ENXIO;
+	struct block_device *part = NULL;
+	int ret = -ENXIO;
 
 	mutex_lock(&bdev->bd_disk->open_mutex);
+	part = xa_load(&bdev->bd_disk->part_tbl, partno);
+	if (!part)
+		goto out_unlock;
 
 	ret = -EBUSY;
 	if (part->bd_openers)
@@ -484,21 +485,20 @@ int bdev_del_partition(struct block_device *bdev, int partno)
 	ret = 0;
 out_unlock:
 	mutex_unlock(&bdev->bd_disk->open_mutex);
-	bdput(part);
 	return ret;
 }
 
 int bdev_resize_partition(struct block_device *bdev, int partno,
 		sector_t start, sector_t length)
 {
-	struct block_device *part;
-	int ret = 0;
-
-	part = bdget_disk(bdev->bd_disk, partno);
-	if (!part)
-		return -ENXIO;
+	struct block_device *part = NULL;
+	int ret = -ENXIO;
 
 	mutex_lock(&bdev->bd_disk->open_mutex);
+	part = xa_load(&bdev->bd_disk->part_tbl, partno);
+	if (!part)
+		goto out_unlock;
+
 	ret = -EINVAL;
 	if (start != part->bd_start_sect)
 		goto out_unlock;
@@ -512,7 +512,6 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
 	ret = 0;
 out_unlock:
 	mutex_unlock(&bdev->bd_disk->open_mutex);
-	bdput(part);
 	return ret;
 }
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 64a8431202b7..03d684f0498f 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -223,7 +223,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk)
 }
 
 extern void del_gendisk(struct gendisk *gp);
-extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
 
 void set_disk_ro(struct gendisk *disk, bool read_only);
 

From 89258f8e4148630a7d327d23ce55b6f80b290ff4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 29 May 2021 18:50:45 +0200
Subject: [PATCH 0790/3292] netfilter: nft_set_pipapo_avx2: fix up description
 warnings

W=1:
net/netfilter/nft_set_pipapo_avx2.c:159: warning: Excess function parameter 'len' description in 'nft_pipapo_avx2_refill'
net/netfilter/nft_set_pipapo_avx2.c:1124: warning: Function parameter or member 'key' not described in 'nft_pipapo_avx2_lookup'
net/netfilter/nft_set_pipapo_avx2.c:1124: warning: Excess function parameter 'elem' description in 'nft_pipapo_avx2_lookup'

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_pipapo_avx2.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 1c2620923a61..e517663e0cd1 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -142,7 +142,6 @@ static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len)
  * @map:	Bitmap to be scanned for set bits
  * @dst:	Destination bitmap
  * @mt:		Mapping table containing bit set specifiers
- * @len:	Length of bitmap in longs
  * @last:	Return index of first set bit, if this is the last field
  *
  * This is an alternative implementation of pipapo_refill() suitable for usage
@@ -1109,7 +1108,7 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
  * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
  * @net:	Network namespace
  * @set:	nftables API set representation
- * @elem:	nftables API element representation containing key data
+ * @key:	nftables API element representation containing key data
  * @ext:	nftables API extension pointer, filled with matching reference
  *
  * For more details, see DOC: Theory of Operation in nft_set_pipapo.c.

From 8a1c08ad19b6ecb7254eca5c7275cb5d6fa1b0cb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 29 May 2021 18:53:25 +0200
Subject: [PATCH 0791/3292] netfilter: fix clang-12 fmt string warnings

nf_conntrack_h323_main.c:198:6: warning: format specifies type 'unsigned short' but
xt_AUDIT.c:121:9: warning: format specifies type 'unsigned char' but the argument has type 'int' [-Wformat]

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_h323_main.c | 2 +-
 net/netfilter/xt_AUDIT.c               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index aafaff00baf1..2eb31ffb3d14 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -194,7 +194,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
 		if (tcpdatalen == 4) {	/* Separate TPKT header */
 			/* Netmeeting sends TPKT header and data separately */
 			pr_debug("nf_ct_h323: separate TPKT header indicates "
-				 "there will be TPKT data of %hu bytes\n",
+				 "there will be TPKT data of %d bytes\n",
 				 tpktlen - 4);
 			info->tpkt_len[dir] = tpktlen - 4;
 			return 0;
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 9cdc16b0d0d8..b6a015aee0ce 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -117,7 +117,7 @@ static int audit_tg_check(const struct xt_tgchk_param *par)
 	const struct xt_audit_info *info = par->targinfo;
 
 	if (info->type > XT_AUDIT_TYPE_MAX) {
-		pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n",
+		pr_info_ratelimited("Audit type out of range (valid range: 0..%u)\n",
 				    XT_AUDIT_TYPE_MAX);
 		return -ERANGE;
 	}

From 0bf4d9af2efe118263ddb6b80bc5176629f20781 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 31 May 2021 20:48:59 +0800
Subject: [PATCH 0792/3292] net: neterion: fix doc warnings in s2io.c

Add description for may_sleep to fix the W=1 warnings:

  drivers/net/ethernet/neterion/s2io.c:1110: warning: Function parameter or member 'may_sleep' not described in 'init_tti'
  drivers/net/ethernet/neterion/s2io.c:3335: warning: Function parameter or member 'may_sleep' not described in 'wait_for_cmd_complete'
  drivers/net/ethernet/neterion/s2io.c:4881: warning: Function parameter or member 'may_sleep' not described in 's2io_set_multicast'

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/neterion/s2io.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 27a65ab3d501..0b017d4f5c08 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -1101,6 +1101,8 @@ static int s2io_print_pci_mode(struct s2io_nic *nic)
  *  @nic: device private variable
  *  @link: link status (UP/DOWN) used to enable/disable continuous
  *  transmit interrupts
+ *  @may_sleep: parameter indicates if sleeping when waiting for
+ *  command complete
  *  Description: The function configures transmit traffic interrupts
  *  Return Value:  SUCCESS on success and
  *  '-1' on failure
@@ -3323,6 +3325,8 @@ static void s2io_updt_xpak_counter(struct net_device *dev)
  *  @addr: address
  *  @busy_bit: bit to check for busy
  *  @bit_state: state to check
+ *  @may_sleep: parameter indicates if sleeping when waiting for
+ *  command complete
  *  Description: Function that waits for a command to Write into RMAC
  *  ADDR DATA registers to be completed and returns either success or
  *  error depending on whether the command was complete or not.
@@ -4868,6 +4872,8 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev)
 /**
  *  s2io_set_multicast - entry point for multicast address enable/disable.
  *  @dev : pointer to the device structure
+ *  @may_sleep: parameter indicates if sleeping when waiting for command
+ *  complete
  *  Description:
  *  This function is a driver entry point which gets called by the kernel
  *  whenever multicast addresses must be enabled/disabled. This also gets

From 263805c8840de4f44564b14eb9b94b1ffed2823e Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Mon, 31 May 2021 17:32:43 +0300
Subject: [PATCH 0793/3292] net: marvell: prestera: disable events interrupt
 while handling

There are change in firmware which requires that receiver will
disable event interrupts before handling them and enable them
after finish with handling. Events still may come into the queue
but without receiver interruption.

Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/prestera/prestera_pci.c  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index 298110119272..dba6cacd7d9c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
 /* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
 
+#include <linux/bitfield.h>
 #include <linux/circ_buf.h>
 #include <linux/device.h>
 #include <linux/firmware.h>
@@ -144,6 +145,11 @@ struct prestera_fw_regs {
 /* PRESTERA_CMD_RCV_CTL_REG flags */
 #define PRESTERA_CMD_F_REPL_SENT	BIT(0)
 
+#define PRESTERA_FW_EVT_CTL_STATUS_MASK	GENMASK(1, 0)
+
+#define PRESTERA_FW_EVT_CTL_STATUS_ON	0
+#define PRESTERA_FW_EVT_CTL_STATUS_OFF	1
+
 #define PRESTERA_EVTQ_REG_OFFSET(q, f)			\
 	(PRESTERA_FW_REG_OFFSET(evtq_list) +		\
 	 (q) * sizeof(struct prestera_fw_evtq_regs) +	\
@@ -260,6 +266,15 @@ static u8 prestera_fw_evtq_pick(struct prestera_fw *fw)
 	return PRESTERA_EVT_QNUM_MAX;
 }
 
+static void prestera_fw_evt_ctl_status_set(struct prestera_fw *fw, u32 val)
+{
+	u32 status = prestera_fw_read(fw, PRESTERA_FW_STATUS_REG);
+
+	u32p_replace_bits(&status, val, PRESTERA_FW_EVT_CTL_STATUS_MASK);
+
+	prestera_fw_write(fw, PRESTERA_FW_STATUS_REG, status);
+}
+
 static void prestera_fw_evt_work_fn(struct work_struct *work)
 {
 	struct prestera_fw *fw;
@@ -269,6 +284,8 @@ static void prestera_fw_evt_work_fn(struct work_struct *work)
 	fw = container_of(work, struct prestera_fw, evt_work);
 	msg = fw->evt_msg;
 
+	prestera_fw_evt_ctl_status_set(fw, PRESTERA_FW_EVT_CTL_STATUS_OFF);
+
 	while ((qid = prestera_fw_evtq_pick(fw)) < PRESTERA_EVT_QNUM_MAX) {
 		u32 idx;
 		u32 len;
@@ -288,6 +305,8 @@ static void prestera_fw_evt_work_fn(struct work_struct *work)
 		if (fw->dev.recv_msg)
 			fw->dev.recv_msg(&fw->dev, msg, len);
 	}
+
+	prestera_fw_evt_ctl_status_set(fw, PRESTERA_FW_EVT_CTL_STATUS_ON);
 }
 
 static int prestera_fw_wait_reg32(struct prestera_fw *fw, u32 reg, u32 cmp,

From c00e8a69fe429f164161d85466adb9bcb9ea0809 Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Mon, 31 May 2021 17:32:44 +0300
Subject: [PATCH 0794/3292] net: marvell: prestera: align flood setting
 according to latest firmware version

Latest FW IPC flood message format was changed to configure uc/mc
flooding separately, so change code according to this.

Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/prestera/prestera_hw.c   | 85 ++++++++++++++++++-
 .../ethernet/marvell/prestera/prestera_hw.h   |  3 +-
 .../marvell/prestera/prestera_switchdev.c     | 17 ++--
 3 files changed, 94 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 0424718d5998..96ce73b50fec 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved */
 
 #include <linux/etherdevice.h>
+#include <linux/if_bridge.h>
 #include <linux/ethtool.h>
 #include <linux/list.h>
 
@@ -85,6 +86,11 @@ enum {
 	PRESTERA_PORT_TP_AUTO,
 };
 
+enum {
+	PRESTERA_PORT_FLOOD_TYPE_UC = 0,
+	PRESTERA_PORT_FLOOD_TYPE_MC = 1,
+};
+
 enum {
 	PRESTERA_PORT_GOOD_OCTETS_RCV_CNT,
 	PRESTERA_PORT_BAD_OCTETS_RCV_CNT,
@@ -188,6 +194,11 @@ struct prestera_msg_port_mdix_param {
 	u8 admin_mode;
 };
 
+struct prestera_msg_port_flood_param {
+	u8 type;
+	u8 enable;
+};
+
 union prestera_msg_port_param {
 	u8  admin_state;
 	u8  oper_state;
@@ -205,6 +216,7 @@ union prestera_msg_port_param {
 	struct prestera_msg_port_mdix_param mdix;
 	struct prestera_msg_port_autoneg_param autoneg;
 	struct prestera_msg_port_cap_param cap;
+	struct prestera_msg_port_flood_param flood_ext;
 };
 
 struct prestera_msg_port_attr_req {
@@ -988,7 +1000,43 @@ int prestera_hw_port_learning_set(struct prestera_port *port, bool enable)
 			    &req.cmd, sizeof(req));
 }
 
-int prestera_hw_port_flood_set(struct prestera_port *port, bool flood)
+static int prestera_hw_port_uc_flood_set(struct prestera_port *port, bool flood)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.flood_ext = {
+				.type = PRESTERA_PORT_FLOOD_TYPE_UC,
+				.enable = flood,
+			}
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+static int prestera_hw_port_mc_flood_set(struct prestera_port *port, bool flood)
+{
+	struct prestera_msg_port_attr_req req = {
+		.attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.param = {
+			.flood_ext = {
+				.type = PRESTERA_PORT_FLOOD_TYPE_MC,
+				.enable = flood,
+			}
+		}
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_PORT_ATTR_SET,
+			    &req.cmd, sizeof(req));
+}
+
+static int prestera_hw_port_flood_set_v2(struct prestera_port *port, bool flood)
 {
 	struct prestera_msg_port_attr_req req = {
 		.attr = PRESTERA_CMD_PORT_ATTR_FLOOD,
@@ -1003,6 +1051,41 @@ int prestera_hw_port_flood_set(struct prestera_port *port, bool flood)
 			    &req.cmd, sizeof(req));
 }
 
+int prestera_hw_port_flood_set(struct prestera_port *port, unsigned long mask,
+			       unsigned long val)
+{
+	int err;
+
+	if (port->sw->dev->fw_rev.maj <= 2) {
+		if (!(mask & BR_FLOOD))
+			return 0;
+
+		return prestera_hw_port_flood_set_v2(port, val & BR_FLOOD);
+	}
+
+	if (mask & BR_FLOOD) {
+		err = prestera_hw_port_uc_flood_set(port, val & BR_FLOOD);
+		if (err)
+			goto err_uc_flood;
+	}
+
+	if (mask & BR_MCAST_FLOOD) {
+		err = prestera_hw_port_mc_flood_set(port, val & BR_MCAST_FLOOD);
+		if (err)
+			goto err_mc_flood;
+	}
+
+	return 0;
+
+err_mc_flood:
+	prestera_hw_port_mc_flood_set(port, 0);
+err_uc_flood:
+	if (mask & BR_FLOOD)
+		prestera_hw_port_uc_flood_set(port, 0);
+
+	return err;
+}
+
 int prestera_hw_vlan_create(struct prestera_switch *sw, u16 vid)
 {
 	struct prestera_msg_vlan_req req = {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index b2b5ac95b4e3..e8dd0e2b81d2 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -138,7 +138,8 @@ int prestera_hw_port_mdix_get(const struct prestera_port *port, u8 *status,
 int prestera_hw_port_mdix_set(const struct prestera_port *port, u8 mode);
 int prestera_hw_port_speed_get(const struct prestera_port *port, u32 *speed);
 int prestera_hw_port_learning_set(struct prestera_port *port, bool enable);
-int prestera_hw_port_flood_set(struct prestera_port *port, bool flood);
+int prestera_hw_port_flood_set(struct prestera_port *port, unsigned long mask,
+			       unsigned long val);
 int prestera_hw_port_accept_frm_type(struct prestera_port *port,
 				     enum prestera_accept_frm_type type);
 /* Vlan API */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index cb564890a3dc..6442dc411285 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -404,7 +404,8 @@ prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
 	if (err)
 		return err;
 
-	err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+	err = prestera_hw_port_flood_set(port, BR_FLOOD | BR_MCAST_FLOOD,
+					 br_port->flags);
 	if (err)
 		goto err_port_flood_set;
 
@@ -415,7 +416,6 @@ prestera_bridge_1d_port_join(struct prestera_bridge_port *br_port)
 	return 0;
 
 err_port_learning_set:
-	prestera_hw_port_flood_set(port, false);
 err_port_flood_set:
 	prestera_hw_bridge_port_delete(port, bridge->bridge_id);
 
@@ -528,7 +528,7 @@ static void prestera_port_bridge_leave(struct prestera_port *port,
 		prestera_bridge_1d_port_leave(br_port);
 
 	prestera_hw_port_learning_set(port, false);
-	prestera_hw_port_flood_set(port, false);
+	prestera_hw_port_flood_set(port, BR_FLOOD | BR_MCAST_FLOOD, 0);
 	prestera_port_vid_stp_set(port, PRESTERA_VID_ALL, BR_STATE_FORWARDING);
 	prestera_bridge_port_put(br_port);
 }
@@ -590,11 +590,9 @@ static int prestera_port_attr_br_flags_set(struct prestera_port *port,
 	if (!br_port)
 		return 0;
 
-	if (flags.mask & BR_FLOOD) {
-		err = prestera_hw_port_flood_set(port, flags.val & BR_FLOOD);
-		if (err)
-			return err;
-	}
+	err = prestera_hw_port_flood_set(port, flags.mask, flags.val);
+	if (err)
+		return err;
 
 	if (flags.mask & BR_LEARNING) {
 		err = prestera_hw_port_learning_set(port,
@@ -901,7 +899,8 @@ prestera_port_vlan_bridge_join(struct prestera_port_vlan *port_vlan,
 	if (port_vlan->br_port)
 		return 0;
 
-	err = prestera_hw_port_flood_set(port, br_port->flags & BR_FLOOD);
+	err = prestera_hw_port_flood_set(port, BR_FLOOD | BR_MCAST_FLOOD,
+					 br_port->flags);
 	if (err)
 		return err;
 

From f1e1b26301782ef236d17947311e6f02c78eb8f0 Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Mon, 31 May 2021 17:32:45 +0300
Subject: [PATCH 0795/3292] net: marvell: prestera: bump supported firmware
 version to 3.0

New firmware version has some ABI and feature changes like:

    - LAG support
    - initial L3 support
    - changed events handling logic

Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/prestera/prestera_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index dba6cacd7d9c..5edd4d2ac672 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -14,7 +14,7 @@
 
 #define PRESTERA_MSG_MAX_SIZE 1500
 
-#define PRESTERA_SUPP_FW_MAJ_VER	2
+#define PRESTERA_SUPP_FW_MAJ_VER	3
 #define PRESTERA_SUPP_FW_MIN_VER	0
 
 #define PRESTERA_FW_PATH_FMT	"mrvl/prestera/mvsw_prestera_fw-v%u.%u.img"

From 47f26018a414ff640527be4ca814168e0b71eaf3 Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Mon, 31 May 2021 17:32:46 +0300
Subject: [PATCH 0796/3292] net: marvell: prestera: try to load previous fw
 version

Lets try to load previous fw version in case the latest one is missing on
existing system.

Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/prestera/prestera_pci.c  | 83 ++++++++++++++-----
 1 file changed, 61 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index 5edd4d2ac672..a250d394da38 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -17,6 +17,9 @@
 #define PRESTERA_SUPP_FW_MAJ_VER	3
 #define PRESTERA_SUPP_FW_MIN_VER	0
 
+#define PRESTERA_PREV_FW_MAJ_VER	2
+#define PRESTERA_PREV_FW_MIN_VER	0
+
 #define PRESTERA_FW_PATH_FMT	"mrvl/prestera/mvsw_prestera_fw-v%u.%u.img"
 
 #define PRESTERA_FW_HDR_MAGIC		0x351D9D06
@@ -172,6 +175,8 @@ struct prestera_fw_evtq {
 };
 
 struct prestera_fw {
+	struct prestera_fw_rev rev_supp;
+	const struct firmware *bin;
 	struct workqueue_struct *wq;
 	struct prestera_device dev;
 	u8 __iomem *ldr_regs;
@@ -595,25 +600,24 @@ static void prestera_fw_rev_parse(const struct prestera_fw_header *hdr,
 static int prestera_fw_rev_check(struct prestera_fw *fw)
 {
 	struct prestera_fw_rev *rev = &fw->dev.fw_rev;
-	u16 maj_supp = PRESTERA_SUPP_FW_MAJ_VER;
-	u16 min_supp = PRESTERA_SUPP_FW_MIN_VER;
 
-	if (rev->maj == maj_supp && rev->min >= min_supp)
+	if (rev->maj == fw->rev_supp.maj && rev->min >= fw->rev_supp.min)
 		return 0;
 
 	dev_err(fw->dev.dev, "Driver supports FW version only '%u.%u.x'",
-		PRESTERA_SUPP_FW_MAJ_VER, PRESTERA_SUPP_FW_MIN_VER);
+		fw->rev_supp.maj, fw->rev_supp.min);
 
 	return -EINVAL;
 }
 
-static int prestera_fw_hdr_parse(struct prestera_fw *fw,
-				 const struct firmware *img)
+static int prestera_fw_hdr_parse(struct prestera_fw *fw)
 {
-	struct prestera_fw_header *hdr = (struct prestera_fw_header *)img->data;
 	struct prestera_fw_rev *rev = &fw->dev.fw_rev;
+	struct prestera_fw_header *hdr;
 	u32 magic;
 
+	hdr = (struct prestera_fw_header *)fw->bin->data;
+
 	magic = be32_to_cpu(hdr->magic_number);
 	if (magic != PRESTERA_FW_HDR_MAGIC) {
 		dev_err(fw->dev.dev, "FW img hdr magic is invalid");
@@ -628,11 +632,52 @@ static int prestera_fw_hdr_parse(struct prestera_fw *fw,
 	return prestera_fw_rev_check(fw);
 }
 
+static int prestera_fw_get(struct prestera_fw *fw)
+{
+	int ver_maj = PRESTERA_SUPP_FW_MAJ_VER;
+	int ver_min = PRESTERA_SUPP_FW_MIN_VER;
+	char fw_path[128];
+	int err;
+
+pick_fw_ver:
+	snprintf(fw_path, sizeof(fw_path), PRESTERA_FW_PATH_FMT,
+		 ver_maj, ver_min);
+
+	err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev);
+	if (err) {
+		if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) {
+			ver_maj = PRESTERA_PREV_FW_MAJ_VER;
+			ver_min = PRESTERA_PREV_FW_MIN_VER;
+
+			dev_warn(fw->dev.dev,
+				 "missing latest %s firmware, fall-back to previous %u.%u version\n",
+				 fw_path, ver_maj, ver_min);
+
+			goto pick_fw_ver;
+		} else {
+			dev_err(fw->dev.dev, "failed to request previous firmware: %s\n",
+				fw_path);
+			return err;
+		}
+	}
+
+	dev_info(fw->dev.dev, "Loading %s ...", fw_path);
+
+	fw->rev_supp.maj = ver_maj;
+	fw->rev_supp.min = ver_min;
+	fw->rev_supp.sub = 0;
+
+	return 0;
+}
+
+static void prestera_fw_put(struct prestera_fw *fw)
+{
+	release_firmware(fw->bin);
+}
+
 static int prestera_fw_load(struct prestera_fw *fw)
 {
 	size_t hlen = sizeof(struct prestera_fw_header);
-	const struct firmware *f;
-	char fw_path[128];
 	int err;
 
 	err = prestera_ldr_wait_reg32(fw, PRESTERA_LDR_READY_REG,
@@ -651,30 +696,24 @@ static int prestera_fw_load(struct prestera_fw *fw)
 
 	fw->ldr_wr_idx = 0;
 
-	snprintf(fw_path, sizeof(fw_path), PRESTERA_FW_PATH_FMT,
-		 PRESTERA_SUPP_FW_MAJ_VER, PRESTERA_SUPP_FW_MIN_VER);
-
-	err = request_firmware_direct(&f, fw_path, fw->dev.dev);
-	if (err) {
-		dev_err(fw->dev.dev, "failed to request firmware file\n");
+	err = prestera_fw_get(fw);
+	if (err)
 		return err;
-	}
 
-	err = prestera_fw_hdr_parse(fw, f);
+	err = prestera_fw_hdr_parse(fw);
 	if (err) {
 		dev_err(fw->dev.dev, "FW image header is invalid\n");
 		goto out_release;
 	}
 
-	prestera_ldr_write(fw, PRESTERA_LDR_IMG_SIZE_REG, f->size - hlen);
+	prestera_ldr_write(fw, PRESTERA_LDR_IMG_SIZE_REG, fw->bin->size - hlen);
 	prestera_ldr_write(fw, PRESTERA_LDR_CTL_REG, PRESTERA_LDR_CTL_DL_START);
 
-	dev_info(fw->dev.dev, "Loading %s ...", fw_path);
-
-	err = prestera_ldr_fw_send(fw, f->data + hlen, f->size - hlen);
+	err = prestera_ldr_fw_send(fw, fw->bin->data + hlen,
+				   fw->bin->size - hlen);
 
 out_release:
-	release_firmware(f);
+	prestera_fw_put(fw);
 	return err;
 }
 

From b0f6c9ac8088a01cd9b6bfba8ede22f1bd0ff72f Mon Sep 17 00:00:00 2001
From: Wander Lairson Costa <wander@redhat.com>
Date: Mon, 31 May 2021 12:23:23 -0300
Subject: [PATCH 0797/3292] netpoll: don't require irqs disabled in rt kernels

write_msg(netconsole.c:836) calls netpoll_send_udp after a call to
spin_lock_irqsave, which normally disables interrupts; but in PREEMPT_RT
this call just locks an rt_mutex without disabling irqs. In this case,
netpoll_send_udp is called with interrupts enabled.

Signed-off-by: Wander Lairson Costa <wander@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c310c7c1cef7..0a6b04714558 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -36,6 +36,7 @@
 #include <net/ip6_checksum.h>
 #include <asm/unaligned.h>
 #include <trace/events/napi.h>
+#include <linux/kconfig.h>
 
 /*
  * We maintain a small pool of fully-sized skbs, to make sure the
@@ -389,7 +390,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	static atomic_t ip_ident;
 	struct ipv6hdr *ip6h;
 
-	WARN_ON_ONCE(!irqs_disabled());
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		WARN_ON_ONCE(!irqs_disabled());
 
 	udp_len = len + sizeof(*udph);
 	if (np->ipv6)

From 37d4b3fdc55dbeb32e3ed4d76d36d8ea4c1c479d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 19:17:07 +0300
Subject: [PATCH 0798/3292] net: enetc: catch negative return code from
 enetc_pf_to_port()

After the refactoring introduced in commit 87614b931c24 ("net: enetc:
create a common enetc_pf_to_port helper"), enetc_pf_to_port was coded up
to return -1 in case the passed PCIe device does not have a recognized
BDF.

Make sure the -1 value is checked by the callers, to appease static
checkers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/enetc/enetc_qos.c  | 31 ++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index af699f2ad095..4577226d3c6a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -465,8 +465,13 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 	struct streamid_conf *si_conf;
 	u16 data_size;
 	dma_addr_t dma;
+	int port;
 	int err;
 
+	port = enetc_pf_to_port(priv->si->pdev);
+	if (port < 0)
+		return -EINVAL;
+
 	if (sid->index >= priv->psfp_cap.max_streamid)
 		return -EINVAL;
 
@@ -499,7 +504,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 
 	si_conf = &cbd.sid_set;
 	/* Only one port supported for one entry, set itself */
-	si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev));
+	si_conf->iports = cpu_to_le32(1 << port);
 	si_conf->id_type = 1;
 	si_conf->oui[2] = 0x0;
 	si_conf->oui[1] = 0x80;
@@ -524,7 +529,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 
 	si_conf->en = 0x80;
 	si_conf->stream_handle = cpu_to_le32(sid->handle);
-	si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev));
+	si_conf->iports = cpu_to_le32(1 << port);
 	si_conf->id_type = sid->filtertype;
 	si_conf->oui[2] = 0x0;
 	si_conf->oui[1] = 0x80;
@@ -567,6 +572,11 @@ static int enetc_streamfilter_hw_set(struct enetc_ndev_priv *priv,
 {
 	struct enetc_cbd cbd = {.cmd = 0};
 	struct sfi_conf *sfi_config;
+	int port;
+
+	port = enetc_pf_to_port(priv->si->pdev);
+	if (port < 0)
+		return -EINVAL;
 
 	cbd.index = cpu_to_le16(sfi->index);
 	cbd.cls = BDCR_CMD_STREAM_FILTER;
@@ -586,8 +596,7 @@ static int enetc_streamfilter_hw_set(struct enetc_ndev_priv *priv,
 	}
 
 	sfi_config->sg_inst_table_index = cpu_to_le16(sfi->gate_id);
-	sfi_config->input_ports =
-		cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev));
+	sfi_config->input_ports = cpu_to_le32(1 << port);
 
 	/* The priority value which may be matched against the
 	 * frame’s priority value to determine a match for this entry.
@@ -1548,7 +1557,7 @@ int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 	struct flow_block_offload *f = type_data;
-	int err;
+	int port, err;
 
 	err = flow_block_cb_setup_simple(f, &enetc_block_cb_list,
 					 enetc_setup_tc_block_cb,
@@ -1558,10 +1567,18 @@ int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data)
 
 	switch (f->command) {
 	case FLOW_BLOCK_BIND:
-		set_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap);
+		port = enetc_pf_to_port(priv->si->pdev);
+		if (port < 0)
+			return -EINVAL;
+
+		set_bit(port, &epsfp.dev_bitmap);
 		break;
 	case FLOW_BLOCK_UNBIND:
-		clear_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap);
+		port = enetc_pf_to_port(priv->si->pdev);
+		if (port < 0)
+			return -EINVAL;
+
+		clear_bit(port, &epsfp.dev_bitmap);
 		if (!epsfp.dev_bitmap)
 			clean_psfp_all();
 		break;

From d153ef5ce7db6d9801d418638d05df92cc63797a Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 1 Jun 2021 14:27:36 +0800
Subject: [PATCH 0799/3292] fjes: Use DEFINE_RES_MEM() and DEFINE_RES_IRQ() to
 simplify code

No functional change.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index 466622664424..d098b1fcf006 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -90,16 +90,8 @@ static struct platform_driver fjes_driver = {
 };
 
 static struct resource fjes_resource[] = {
-	{
-		.flags = IORESOURCE_MEM,
-		.start = 0,
-		.end = 0,
-	},
-	{
-		.flags = IORESOURCE_IRQ,
-		.start = 0,
-		.end = 0,
-	},
+	DEFINE_RES_MEM(0, 1),
+	DEFINE_RES_IRQ(0)
 };
 
 static bool is_extended_socket_device(struct acpi_device *device)

From 68b8c55a701e4e7aba254688b483ef79da8338ed Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Tue, 1 Jun 2021 14:57:58 +0800
Subject: [PATCH 0800/3292] qlcnic: Remove the repeated declaration

Function 'qlcnic_82xx_hw_write_wx_2M' is declared twice, so remove the
repeated declaration.

Cc: Shahed Shaikh <shshaikh@marvell.com>
Cc: Manish Chopra <manishc@marvell.com>
Cc: GR-Linux-NIC-Dev@marvell.com
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
index 601d22495a88..95ecc84dddcd 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
@@ -203,7 +203,6 @@ int qlcnic_82xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *);
 int qlcnic_82xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info*);
 int qlcnic_82xx_alloc_mbx_args(struct qlcnic_cmd_args *,
 			       struct qlcnic_adapter *, u32);
-int qlcnic_82xx_hw_write_wx_2M(struct qlcnic_adapter *, ulong, u32);
 int qlcnic_82xx_get_board_info(struct qlcnic_adapter *);
 int qlcnic_82xx_config_led(struct qlcnic_adapter *, u32, u32);
 void qlcnic_82xx_get_func_no(struct qlcnic_adapter *);

From 163d01c56e80fdbd3e386162b969d6cb43af3b5c Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Tue, 1 Jun 2021 15:37:12 +0800
Subject: [PATCH 0801/3292] r8152: support pauseparam of ethtool_ops

Support get_pauseparam and set_pauseparam of ethtool_ops.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 75 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index f6abb2fbf972..21e6b9b6776c 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -8967,6 +8967,79 @@ static int rtl8152_set_ringparam(struct net_device *netdev,
 	return 0;
 }
 
+static void rtl8152_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct r8152 *tp = netdev_priv(netdev);
+	u16 bmcr, lcladv, rmtadv;
+	u8 cap;
+
+	if (usb_autopm_get_interface(tp->intf) < 0)
+		return;
+
+	mutex_lock(&tp->control);
+
+	bmcr = r8152_mdio_read(tp, MII_BMCR);
+	lcladv = r8152_mdio_read(tp, MII_ADVERTISE);
+	rmtadv = r8152_mdio_read(tp, MII_LPA);
+
+	mutex_unlock(&tp->control);
+
+	usb_autopm_put_interface(tp->intf);
+
+	if (!(bmcr & BMCR_ANENABLE)) {
+		pause->autoneg = 0;
+		pause->rx_pause = 0;
+		pause->tx_pause = 0;
+		return;
+	}
+
+	pause->autoneg = 1;
+
+	cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
+
+	if (cap & FLOW_CTRL_RX)
+		pause->rx_pause = 1;
+
+	if (cap & FLOW_CTRL_TX)
+		pause->tx_pause = 1;
+}
+
+static int rtl8152_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct r8152 *tp = netdev_priv(netdev);
+	u16 old, new1;
+	u8 cap = 0;
+	int ret;
+
+	ret = usb_autopm_get_interface(tp->intf);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&tp->control);
+
+	if (pause->autoneg && !(r8152_mdio_read(tp, MII_BMCR) & BMCR_ANENABLE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (pause->rx_pause)
+		cap |= FLOW_CTRL_RX;
+
+	if (pause->tx_pause)
+		cap |= FLOW_CTRL_TX;
+
+	old = r8152_mdio_read(tp, MII_ADVERTISE);
+	new1 = (old & ~(ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM)) | mii_advertise_flowctrl(cap);
+	if (old != new1)
+		r8152_mdio_write(tp, MII_ADVERTISE, new1);
+
+out:
+	mutex_unlock(&tp->control);
+	usb_autopm_put_interface(tp->intf);
+
+	return ret;
+}
+
 static const struct ethtool_ops ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
 	.get_drvinfo = rtl8152_get_drvinfo,
@@ -8989,6 +9062,8 @@ static const struct ethtool_ops ops = {
 	.set_tunable = rtl8152_set_tunable,
 	.get_ringparam = rtl8152_get_ringparam,
 	.set_ringparam = rtl8152_set_ringparam,
+	.get_pauseparam = rtl8152_get_pauseparam,
+	.set_pauseparam = rtl8152_set_pauseparam,
 };
 
 static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)

From 52aa0b189288c5d44cb5f2500372e474d6623c18 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 16:23:04 +0800
Subject: [PATCH 0802/3292] net: vxge: Declare the function
 vxge_reset_all_vpaths as void

variable 'status' is unneeded and it's noneed to check the
return value of function vxge_reset_all_vpaths,so declare
it as void.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/neterion/vxge/vxge-main.c    | 27 +++++--------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index b113c158d6e3..0528b8f49061 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -87,7 +87,7 @@ static unsigned int bw_percentage[VXGE_HW_MAX_VIRTUAL_PATHS] =
 module_param_array(bw_percentage, uint, NULL, 0);
 
 static struct vxge_drv_config *driver_config;
-static enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev);
+static void vxge_reset_all_vpaths(struct vxgedev *vdev);
 
 static inline int is_vxge_card_up(struct vxgedev *vdev)
 {
@@ -1606,7 +1606,6 @@ static void vxge_config_ci_for_tti_rti(struct vxgedev *vdev)
 
 static int do_vxge_reset(struct vxgedev *vdev, int event)
 {
-	enum vxge_hw_status status;
 	int ret = 0, vp_id, i;
 
 	vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__);
@@ -1709,14 +1708,7 @@ static int do_vxge_reset(struct vxgedev *vdev, int event)
 		netif_tx_stop_all_queues(vdev->ndev);
 
 	if (event == VXGE_LL_FULL_RESET) {
-		status = vxge_reset_all_vpaths(vdev);
-		if (status != VXGE_HW_OK) {
-			vxge_debug_init(VXGE_ERR,
-				"fatal: %s: can not reset vpaths",
-				vdev->ndev->name);
-			ret = -EPERM;
-			goto out;
-		}
+		vxge_reset_all_vpaths(vdev);
 	}
 
 	if (event == VXGE_LL_COMPL_RESET) {
@@ -1969,9 +1961,8 @@ static enum vxge_hw_status vxge_rth_configure(struct vxgedev *vdev)
 }
 
 /* reset vpaths */
-static enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev)
+static void vxge_reset_all_vpaths(struct vxgedev *vdev)
 {
-	enum vxge_hw_status status = VXGE_HW_OK;
 	struct vxge_vpath *vpath;
 	int i;
 
@@ -1986,18 +1977,16 @@ static enum vxge_hw_status vxge_reset_all_vpaths(struct vxgedev *vdev)
 						"vxge_hw_vpath_recover_"
 						"from_reset failed for vpath: "
 						"%d", i);
-					return status;
+					return;
 				}
 			} else {
 				vxge_debug_init(VXGE_ERR,
 					"vxge_hw_vpath_reset failed for "
 					"vpath:%d", i);
-				return status;
+				return;
 			}
 		}
 	}
-
-	return status;
 }
 
 /* close vpaths */
@@ -2676,11 +2665,7 @@ static int vxge_set_features(struct net_device *dev, netdev_features_t features)
 	/* !netif_running() ensured by vxge_fix_features() */
 
 	vdev->devh->config.rth_en = !!(features & NETIF_F_RXHASH);
-	if (vxge_reset_all_vpaths(vdev) != VXGE_HW_OK) {
-		dev->features = features ^ NETIF_F_RXHASH;
-		vdev->devh->config.rth_en = !!(dev->features & NETIF_F_RXHASH);
-		return -EIO;
-	}
+	vxge_reset_all_vpaths(vdev);
 
 	return 0;
 }

From 7cf85f8caa042db0e33d70dbd72d8b92b1051f93 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 1 Jun 2021 17:49:50 +0800
Subject: [PATCH 0803/3292] NFC: nci: Remove redundant assignment to len

Variable 'len' is set to conn_info->max_pkt_payload_len but this
value is never read as it is overwritten with a new value later on,
hence it is a redundant assignment and can be removed.

Clean up the following clang-analyzer warning:

net/nfc/nci/hci.c:164:3: warning: Value stored to 'len' is never read
[clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/nci/hci.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 96865142104f..d6732e5e8958 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -161,8 +161,6 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 	*(u8 *)skb_push(skb, 1) = data_type;
 
 	do {
-		len = conn_info->max_pkt_payload_len;
-
 		/* If last packet add NCI_HFP_NO_CHAINING */
 		if (i + conn_info->max_pkt_payload_len -
 		    (skb->len + 1) >= data_len) {

From b934b6d1d9332fb31c4c899ce63d4aac6ee9f1da Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 1 Jun 2021 11:31:44 +0100
Subject: [PATCH 0804/3292] octeontx2-af: Fix spelling mistake "vesion" ->
 "version"

There is a spelling mistake in a dev_warning message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index bd63305ba6d2..053cc872d0cc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1371,7 +1371,7 @@ static int npc_apply_custom_kpu(struct rvu *rvu,
 	if (NPC_KPU_VER_MIN(profile->version) <
 	    NPC_KPU_VER_MIN(NPC_KPU_PROFILE_VER)) {
 		dev_warn(rvu->dev,
-			 "Invalid KPU profile version: %d.%d.%d expected vesion <= %d.%d.%d\n",
+			 "Invalid KPU profile version: %d.%d.%d expected version <= %d.%d.%d\n",
 			 NPC_KPU_VER_MAJ(profile->version),
 			 NPC_KPU_VER_MIN(profile->version),
 			 NPC_KPU_VER_PATCH(profile->version),

From 9c5eee0afca09cbde6bd00f77876754aaa552970 Mon Sep 17 00:00:00 2001
From: Boris Sukholitko <boris.sukholitko@broadcom.com>
Date: Tue, 1 Jun 2021 15:30:50 +0300
Subject: [PATCH 0805/3292] net/sched: act_vlan: Fix modify to allow 0

Currently vlan modification action checks existence of vlan priority by
comparing it to 0. Therefore it is impossible to modify existing vlan
tag to have priority 0.

For example, the following tc command will change the vlan id but will
not affect vlan priority:

tc filter add dev eth1 ingress matchall action vlan modify id 300 \
        priority 0 pipe mirred egress redirect dev eth2

The incoming packet on eth1:

ethertype 802.1Q (0x8100), vlan 200, p 4, ethertype IPv4

will be changed to:

ethertype 802.1Q (0x8100), vlan 300, p 4, ethertype IPv4

although the user has intended to have p == 0.

The fix is to add tcfv_push_prio_exists flag to struct tcf_vlan_params
and rely on it when deciding to set the priority.

Fixes: 45a497f2d149a4a8061c (net/sched: act_vlan: Introduce TCA_VLAN_ACT_MODIFY vlan action)
Signed-off-by: Boris Sukholitko <boris.sukholitko@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_vlan.h | 1 +
 net/sched/act_vlan.c         | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index f051046ba034..f94b8bc26f9e 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -16,6 +16,7 @@ struct tcf_vlan_params {
 	u16               tcfv_push_vid;
 	__be16            tcfv_push_proto;
 	u8                tcfv_push_prio;
+	bool              tcfv_push_prio_exists;
 	struct rcu_head   rcu;
 };
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 1cac3c6fbb49..a108469c664f 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -70,7 +70,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
 		/* replace the vid */
 		tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
 		/* replace prio bits, if tcfv_push_prio specified */
-		if (p->tcfv_push_prio) {
+		if (p->tcfv_push_prio_exists) {
 			tci &= ~VLAN_PRIO_MASK;
 			tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
 		}
@@ -121,6 +121,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
 	struct tcf_chain *goto_ch = NULL;
+	bool push_prio_exists = false;
 	struct tcf_vlan_params *p;
 	struct tc_vlan *parm;
 	struct tcf_vlan *v;
@@ -189,7 +190,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			push_proto = htons(ETH_P_8021Q);
 		}
 
-		if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+		push_prio_exists = !!tb[TCA_VLAN_PUSH_VLAN_PRIORITY];
+		if (push_prio_exists)
 			push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
 		break;
 	case TCA_VLAN_ACT_POP_ETH:
@@ -241,6 +243,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	p->tcfv_action = action;
 	p->tcfv_push_vid = push_vid;
 	p->tcfv_push_prio = push_prio;
+	p->tcfv_push_prio_exists = push_prio_exists || action == TCA_VLAN_ACT_PUSH;
 	p->tcfv_push_proto = push_proto;
 
 	if (action == TCA_VLAN_ACT_PUSH_ETH) {

From 8323b20f1d76b10fb413daae6abf76b7b903c8de Mon Sep 17 00:00:00 2001
From: Boris Sukholitko <boris.sukholitko@broadcom.com>
Date: Tue, 1 Jun 2021 15:30:51 +0300
Subject: [PATCH 0806/3292] net/sched: act_vlan: No dump for unset priority

Dump vlan priority only if it has been previously set.

Fix the tests accordingly.

Signed-off-by: Boris Sukholitko <boris.sukholitko@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_vlan.c                                          | 4 ++--
 tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index a108469c664f..71f2015c70ca 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -307,8 +307,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
 	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
 			  p->tcfv_push_proto) ||
-	     (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
-					      p->tcfv_push_prio))))
+	     (p->tcfv_push_prio_exists &&
+	      nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, p->tcfv_push_prio))))
 		goto nla_put_failure;
 
 	if (p->tcfv_action == TCA_VLAN_ACT_PUSH_ETH) {
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 41d783254b08..eccbf0d7c7b9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -463,7 +463,7 @@
         "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action vlan index 100",
-        "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q pipe.*index 100 ref",
         "matchCount": "0",
         "teardown": [
             "$TC actions flush action vlan"
@@ -487,7 +487,7 @@
         "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
         "expExitCode": "0",
         "verifyCmd": "$TC actions get action vlan index 12",
-        "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad reclassify.*index 12 ref",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action vlan"

From 8fd52b1f923c3ebd41da678b537021b88830494e Mon Sep 17 00:00:00 2001
From: Boris Sukholitko <boris.sukholitko@broadcom.com>
Date: Tue, 1 Jun 2021 15:30:52 +0300
Subject: [PATCH 0807/3292] net/sched: act_vlan: Test priority 0 modification

Because explicitly being set, the priority 0 should appear
in the output.

Signed-off-by: Boris Sukholitko <boris.sukholitko@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/vlan.json     | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index eccbf0d7c7b9..2aad4caa8581 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -445,6 +445,30 @@
         "matchCount": "0",
         "teardown": []
     },
+    {
+        "id": "ba5b",
+        "name": "Add vlan modify action for protocol 802.1Q setting priority 0",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 priority 0 index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 100",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 100 priority 0 protocol 802.1Q pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
     {
         "id": "6812",
         "name": "Add vlan modify action for protocol 802.1Q",

From 30cd458be244a929e929512fa63bbcf45225e752 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 1 Jun 2021 21:23:16 +0800
Subject: [PATCH 0808/3292] net: hdlc: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 1bdd3df0867a..08833023b116 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -36,7 +36,6 @@
 #include <linux/slab.h>
 #include <net/net_namespace.h>
 
-
 static const char* version = "HDLC support module revision 1.22";
 
 #undef DEBUG_LINK
@@ -82,8 +81,6 @@ static inline void hdlc_proto_start(struct net_device *dev)
 		hdlc->proto->start(dev);
 }
 
-
-
 static inline void hdlc_proto_stop(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -91,8 +88,6 @@ static inline void hdlc_proto_stop(struct net_device *dev)
 		hdlc->proto->stop(dev);
 }
 
-
-
 static int hdlc_device_event(struct notifier_block *this, unsigned long event,
 			     void *ptr)
 {
@@ -141,8 +136,6 @@ carrier_exit:
 	return NOTIFY_DONE;
 }
 
-
-
 /* Must be called by hardware driver when HDLC device is being opened */
 int hdlc_open(struct net_device *dev)
 {
@@ -175,8 +168,6 @@ int hdlc_open(struct net_device *dev)
 	return 0;
 }
 
-
-
 /* Must be called by hardware driver when HDLC device is being closed */
 void hdlc_close(struct net_device *dev)
 {
@@ -198,8 +189,6 @@ void hdlc_close(struct net_device *dev)
 		hdlc->proto->close(dev);
 }
 
-
-
 int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct hdlc_proto *proto = first_proto;
@@ -271,8 +260,6 @@ void unregister_hdlc_device(struct net_device *dev)
 	rtnl_unlock();
 }
 
-
-
 int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 			 size_t size)
 {
@@ -297,7 +284,6 @@ int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 	return 0;
 }
 
-
 int detach_hdlc_protocol(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -323,7 +309,6 @@ int detach_hdlc_protocol(struct net_device *dev)
 	return 0;
 }
 
-
 void register_hdlc_protocol(struct hdlc_proto *proto)
 {
 	rtnl_lock();
@@ -332,7 +317,6 @@ void register_hdlc_protocol(struct hdlc_proto *proto)
 	rtnl_unlock();
 }
 
-
 void unregister_hdlc_protocol(struct hdlc_proto *proto)
 {
 	struct hdlc_proto **p;
@@ -347,8 +331,6 @@ void unregister_hdlc_protocol(struct hdlc_proto *proto)
 	rtnl_unlock();
 }
 
-
-
 MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
 MODULE_DESCRIPTION("HDLC support module");
 MODULE_LICENSE("GPL v2");
@@ -369,12 +351,10 @@ static struct packet_type hdlc_packet_type __read_mostly = {
 	.func = hdlc_rcv,
 };
 
-
 static struct notifier_block hdlc_notifier = {
 	.notifier_call = hdlc_device_event,
 };
 
-
 static int __init hdlc_module_init(void)
 {
 	int result;
@@ -386,14 +366,11 @@ static int __init hdlc_module_init(void)
 	return 0;
 }
 
-
-
 static void __exit hdlc_module_exit(void)
 {
 	dev_remove_pack(&hdlc_packet_type);
 	unregister_netdevice_notifier(&hdlc_notifier);
 }
 
-
 module_init(hdlc_module_init);
 module_exit(hdlc_module_exit);

From 04cc04f07bb25e5555b27f0d6069599f0da23cad Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 1 Jun 2021 21:23:17 +0800
Subject: [PATCH 0809/3292] net: hdlc: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 08833023b116..6199a705d3df 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -77,6 +77,7 @@ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static inline void hdlc_proto_start(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
+
 	if (hdlc->proto->start)
 		hdlc->proto->start(dev);
 }
@@ -84,6 +85,7 @@ static inline void hdlc_proto_start(struct net_device *dev)
 static inline void hdlc_proto_stop(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
+
 	if (hdlc->proto->stop)
 		hdlc->proto->stop(dev);
 }
@@ -150,6 +152,7 @@ int hdlc_open(struct net_device *dev)
 
 	if (hdlc->proto->open) {
 		int result = hdlc->proto->open(dev);
+
 		if (result)
 			return result;
 	}
@@ -245,6 +248,7 @@ static void hdlc_setup(struct net_device *dev)
 struct net_device *alloc_hdlcdev(void *priv)
 {
 	struct net_device *dev;
+
 	dev = alloc_netdev(sizeof(struct hdlc_device), "hdlc%d",
 			   NET_NAME_UNKNOWN, hdlc_setup);
 	if (dev)

From 68fd73925bce2d81e7144caf16519e6b7e80b6a1 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 1 Jun 2021 21:23:18 +0800
Subject: [PATCH 0810/3292] net: hdlc: fix an code style issue about "foo* bar"

Fix the checkpatch error as "foo* bar" and should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 6199a705d3df..3cdb6417ffdb 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -36,7 +36,7 @@
 #include <linux/slab.h>
 #include <net/net_namespace.h>
 
-static const char* version = "HDLC support module revision 1.22";
+static const char *version = "HDLC support module revision 1.22";
 
 #undef DEBUG_LINK
 

From 01506939cc8466635fb05711a79f8eb2b5df1ff3 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 1 Jun 2021 21:23:19 +0800
Subject: [PATCH 0811/3292] net: hdlc: fix an code style issue about
 EXPORT_SYMBOL(foo)

According to the chackpatch.pl,
EXPORT_SYMBOL(foo); should immediately follow its function/variable.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 3cdb6417ffdb..13388bad88a3 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -73,6 +73,7 @@ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	return hdlc->xmit(skb, dev); /* call hardware driver directly */
 }
+EXPORT_SYMBOL(hdlc_start_xmit);
 
 static inline void hdlc_proto_start(struct net_device *dev)
 {
@@ -170,6 +171,7 @@ int hdlc_open(struct net_device *dev)
 	spin_unlock_irq(&hdlc->state_lock);
 	return 0;
 }
+EXPORT_SYMBOL(hdlc_open);
 
 /* Must be called by hardware driver when HDLC device is being closed */
 void hdlc_close(struct net_device *dev)
@@ -191,6 +193,7 @@ void hdlc_close(struct net_device *dev)
 	if (hdlc->proto->close)
 		hdlc->proto->close(dev);
 }
+EXPORT_SYMBOL(hdlc_close);
 
 int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
@@ -215,6 +218,7 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	}
 	return -EINVAL;
 }
+EXPORT_SYMBOL(hdlc_ioctl);
 
 static const struct header_ops hdlc_null_ops;
 
@@ -255,6 +259,7 @@ struct net_device *alloc_hdlcdev(void *priv)
 		dev_to_hdlc(dev)->priv = priv;
 	return dev;
 }
+EXPORT_SYMBOL(alloc_hdlcdev);
 
 void unregister_hdlc_device(struct net_device *dev)
 {
@@ -263,6 +268,7 @@ void unregister_hdlc_device(struct net_device *dev)
 	unregister_netdevice(dev);
 	rtnl_unlock();
 }
+EXPORT_SYMBOL(unregister_hdlc_device);
 
 int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 			 size_t size)
@@ -287,6 +293,7 @@ int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 
 	return 0;
 }
+EXPORT_SYMBOL(attach_hdlc_protocol);
 
 int detach_hdlc_protocol(struct net_device *dev)
 {
@@ -312,6 +319,7 @@ int detach_hdlc_protocol(struct net_device *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL(detach_hdlc_protocol);
 
 void register_hdlc_protocol(struct hdlc_proto *proto)
 {
@@ -320,6 +328,7 @@ void register_hdlc_protocol(struct hdlc_proto *proto)
 	first_proto = proto;
 	rtnl_unlock();
 }
+EXPORT_SYMBOL(register_hdlc_protocol);
 
 void unregister_hdlc_protocol(struct hdlc_proto *proto)
 {
@@ -334,22 +343,12 @@ void unregister_hdlc_protocol(struct hdlc_proto *proto)
 	*p = proto->next;
 	rtnl_unlock();
 }
+EXPORT_SYMBOL(unregister_hdlc_protocol);
 
 MODULE_AUTHOR("Krzysztof Halasa <khc@pm.waw.pl>");
 MODULE_DESCRIPTION("HDLC support module");
 MODULE_LICENSE("GPL v2");
 
-EXPORT_SYMBOL(hdlc_start_xmit);
-EXPORT_SYMBOL(hdlc_open);
-EXPORT_SYMBOL(hdlc_close);
-EXPORT_SYMBOL(hdlc_ioctl);
-EXPORT_SYMBOL(alloc_hdlcdev);
-EXPORT_SYMBOL(unregister_hdlc_device);
-EXPORT_SYMBOL(register_hdlc_protocol);
-EXPORT_SYMBOL(unregister_hdlc_protocol);
-EXPORT_SYMBOL(attach_hdlc_protocol);
-EXPORT_SYMBOL(detach_hdlc_protocol);
-
 static struct packet_type hdlc_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_HDLC),
 	.func = hdlc_rcv,

From 387847f295c86fe72693178ed1eab000154f98ff Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 1 Jun 2021 21:23:20 +0800
Subject: [PATCH 0812/3292] net: hdlc: replace comparison to NULL with "!param"

According to the chackpatch.pl, comparison to NULL could
be written "!param".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index 13388bad88a3..fefc7321c0c4 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -148,7 +148,7 @@ int hdlc_open(struct net_device *dev)
 	       hdlc->carrier, hdlc->open);
 #endif
 
-	if (hdlc->proto == NULL)
+	if (!hdlc->proto)
 		return -ENOSYS;	/* no protocol attached */
 
 	if (hdlc->proto->open) {
@@ -284,7 +284,7 @@ int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 
 	if (size) {
 		dev_to_hdlc(dev)->state = kmalloc(size, GFP_KERNEL);
-		if (dev_to_hdlc(dev)->state == NULL) {
+		if (!dev_to_hdlc(dev)->state) {
 			module_put(proto->module);
 			return -ENOBUFS;
 		}

From e50eb6c3578c6bd7c43c30bfa40a882f0815a2d2 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 1 Jun 2021 21:23:21 +0800
Subject: [PATCH 0813/3292] net: hdlc: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index fefc7321c0c4..f48d70e7f3ba 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -212,7 +212,8 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	/* Not handled by currently attached protocol (if any) */
 
 	while (proto) {
-		if ((result = proto->ioctl(dev, ifr)) != -EINVAL)
+		result = proto->ioctl(dev, ifr);
+		if (result != -EINVAL)
 			return result;
 		proto = proto->next;
 	}
@@ -363,7 +364,8 @@ static int __init hdlc_module_init(void)
 	int result;
 
 	pr_info("%s\n", version);
-	if ((result = register_netdevice_notifier(&hdlc_notifier)) != 0)
+	result = register_netdevice_notifier(&hdlc_notifier);
+	if (result)
 		return result;
 	dev_add_pack(&hdlc_packet_type);
 	return 0;

From 1bb52182526552e31367ea521cf36d37ebb26966 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 1 Jun 2021 21:23:22 +0800
Subject: [PATCH 0814/3292] net: hdlc: add braces {} to all arms of the
 statement

Braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c
index f48d70e7f3ba..dd6312b69861 100644
--- a/drivers/net/wan/hdlc.c
+++ b/drivers/net/wan/hdlc.c
@@ -163,8 +163,9 @@ int hdlc_open(struct net_device *dev)
 	if (hdlc->carrier) {
 		netdev_info(dev, "Carrier detected\n");
 		hdlc_proto_start(dev);
-	} else
+	} else {
 		netdev_info(dev, "No carrier\n");
+	}
 
 	hdlc->open = 1;
 

From e5432cc71ab64b10100a290b7bf32804981c9cb4 Mon Sep 17 00:00:00 2001
From: Nigel Christian <nigel.l.christian@gmail.com>
Date: Tue, 1 Jun 2021 09:35:33 -0400
Subject: [PATCH 0815/3292] NFC: microread: Remove redundant assignment to
 variable err

In the case MICROREAD_CB_TYPE_READER_ALL clang reports a dead code
warning. The error code assigned to variable err is already passed
to async_cb(). The assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Nigel Christian <nigel.l.christian@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/microread/microread.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
index 8d3988457c58..b1d3975e8a81 100644
--- a/drivers/nfc/microread/microread.c
+++ b/drivers/nfc/microread/microread.c
@@ -364,7 +364,6 @@ static void microread_im_transceive_cb(void *context, struct sk_buff *skb,
 	case MICROREAD_CB_TYPE_READER_ALL:
 		if (err == 0) {
 			if (skb->len == 0) {
-				err = -EPROTO;
 				kfree_skb(skb);
 				info->async_cb(info->async_cb_context, NULL,
 					       -EPROTO);

From 5ac712dcdfefb1a783384db85e0507d161e87812 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Tue, 1 Jun 2021 21:52:35 +0800
Subject: [PATCH 0816/3292] net: stmmac: enable platform specific safety
 features

On Intel platforms, not all safety features are enabled on the hardware.
The current implementation enable all safety features by default. This
will cause mass error and warning printouts after the module is loaded.

Introduce platform specific safety features flag to enable or disable
each safety features.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 26 ++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c  | 30 ++++++++++++-------
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h  |  3 +-
 .../ethernet/stmicro/stmmac/dwxgmac2_core.c   |  4 ++-
 drivers/net/ethernet/stmicro/stmmac/hwif.h    |  3 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  3 +-
 .../net/ethernet/stmicro/stmmac/stmmac_pci.c  | 16 ++++++++++
 include/linux/stmmac.h                        | 13 ++++++++
 8 files changed, 84 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index e36a8cc59ad0..2ecf93c84b9d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -568,6 +568,16 @@ static int ehl_common_data(struct pci_dev *pdev,
 	plat->tx_queues_to_use = 8;
 	plat->clk_ptp_rate = 200000000;
 
+	plat->safety_feat_cfg->tsoee = 1;
+	plat->safety_feat_cfg->mrxpee = 1;
+	plat->safety_feat_cfg->mestee = 1;
+	plat->safety_feat_cfg->mrxee = 1;
+	plat->safety_feat_cfg->mtxee = 1;
+	plat->safety_feat_cfg->epsi = 0;
+	plat->safety_feat_cfg->edpp = 0;
+	plat->safety_feat_cfg->prtyen = 0;
+	plat->safety_feat_cfg->tmouten = 0;
+
 	return intel_mgbe_common_data(pdev, plat);
 }
 
@@ -683,6 +693,16 @@ static int tgl_common_data(struct pci_dev *pdev,
 	plat->tx_queues_to_use = 4;
 	plat->clk_ptp_rate = 200000000;
 
+	plat->safety_feat_cfg->tsoee = 1;
+	plat->safety_feat_cfg->mrxpee = 0;
+	plat->safety_feat_cfg->mestee = 1;
+	plat->safety_feat_cfg->mrxee = 1;
+	plat->safety_feat_cfg->mtxee = 1;
+	plat->safety_feat_cfg->epsi = 0;
+	plat->safety_feat_cfg->edpp = 0;
+	plat->safety_feat_cfg->prtyen = 0;
+	plat->safety_feat_cfg->tmouten = 0;
+
 	return intel_mgbe_common_data(pdev, plat);
 }
 
@@ -959,6 +979,12 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	if (!plat->dma_cfg)
 		return -ENOMEM;
 
+	plat->safety_feat_cfg = devm_kzalloc(&pdev->dev,
+					     sizeof(*plat->safety_feat_cfg),
+					     GFP_KERNEL);
+	if (!plat->safety_feat_cfg)
+		return -ENOMEM;
+
 	/* Enable pci device */
 	ret = pcim_enable_device(pdev);
 	if (ret) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index d8c6ff725237..9c2d40f853ed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -183,7 +183,8 @@ static void dwmac5_handle_dma_err(struct net_device *ndev,
 			STAT_OFF(dma_errors), stats);
 }
 
-int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
+int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
+			      struct stmmac_safety_feature_cfg *safety_feat_cfg)
 {
 	u32 value;
 
@@ -193,11 +194,16 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
 	/* 1. Enable Safety Features */
 	value = readl(ioaddr + MTL_ECC_CONTROL);
 	value |= MEEAO; /* MTL ECC Error Addr Status Override */
-	value |= TSOEE; /* TSO ECC */
-	value |= MRXPEE; /* MTL RX Parser ECC */
-	value |= MESTEE; /* MTL EST ECC */
-	value |= MRXEE; /* MTL RX FIFO ECC */
-	value |= MTXEE; /* MTL TX FIFO ECC */
+	if (safety_feat_cfg->tsoee)
+		value |= TSOEE; /* TSO ECC */
+	if (safety_feat_cfg->mrxpee)
+		value |= MRXPEE; /* MTL RX Parser ECC */
+	if (safety_feat_cfg->mestee)
+		value |= MESTEE; /* MTL EST ECC */
+	if (safety_feat_cfg->mrxee)
+		value |= MRXEE; /* MTL RX FIFO ECC */
+	if (safety_feat_cfg->mtxee)
+		value |= MTXEE; /* MTL TX FIFO ECC */
 	writel(value, ioaddr + MTL_ECC_CONTROL);
 
 	/* 2. Enable MTL Safety Interrupts */
@@ -219,13 +225,16 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
 
 	/* 5. Enable Parity and Timeout for FSM */
 	value = readl(ioaddr + MAC_FSM_CONTROL);
-	value |= PRTYEN; /* FSM Parity Feature */
-	value |= TMOUTEN; /* FSM Timeout Feature */
+	if (safety_feat_cfg->prtyen)
+		value |= PRTYEN; /* FSM Parity Feature */
+	if (safety_feat_cfg->tmouten)
+		value |= TMOUTEN; /* FSM Timeout Feature */
 	writel(value, ioaddr + MAC_FSM_CONTROL);
 
 	/* 4. Enable Data Parity Protection */
 	value = readl(ioaddr + MTL_DPP_CONTROL);
-	value |= EDPP;
+	if (safety_feat_cfg->edpp)
+		value |= EDPP;
 	writel(value, ioaddr + MTL_DPP_CONTROL);
 
 	/*
@@ -235,7 +244,8 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
 	if (asp <= 0x2)
 		return 0;
 
-	value |= EPSI;
+	if (safety_feat_cfg->epsi)
+		value |= EPSI;
 	writel(value, ioaddr + MTL_DPP_CONTROL);
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 6b2fd37b29ad..53c138d0ff48 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -137,7 +137,8 @@
 
 #define GMAC_INT_FPE_EN			BIT(17)
 
-int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp);
+int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
+			      struct stmmac_safety_feature_cfg *safety_cfg);
 int dwmac5_safety_feat_irq_status(struct net_device *ndev,
 		void __iomem *ioaddr, unsigned int asp,
 		struct stmmac_safety_stats *stats);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index ad4df9bddcf3..c4d78fa93663 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -801,7 +801,9 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev,
 			   dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
 }
 
-static int dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
+static int
+dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
+			    struct stmmac_safety_feature_cfg *safety_cfg)
 {
 	u32 value;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 75a8b90c202a..dbafedb24290 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -348,7 +348,8 @@ struct stmmac_ops {
 	void (*pcs_rane)(void __iomem *ioaddr, bool restart);
 	void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv);
 	/* Safety Features */
-	int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp);
+	int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp,
+				  struct stmmac_safety_feature_cfg *safety_cfg);
 	int (*safety_feat_irq_status)(struct net_device *ndev,
 			void __iomem *ioaddr, unsigned int asp,
 			struct stmmac_safety_stats *stats);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9962a1041d35..13720bf6f6ff 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3172,7 +3172,8 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 {
 	if (priv->dma_cap.asp) {
 		netdev_info(priv->dev, "Enabling Safety Features\n");
-		stmmac_safety_feat_config(priv, priv->ioaddr, priv->dma_cap.asp);
+		stmmac_safety_feat_config(priv, priv->ioaddr, priv->dma_cap.asp,
+					  priv->plat->safety_feat_cfg);
 	} else {
 		netdev_info(priv->dev, "No Safety Features support found\n");
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 95e0e4d6f74d..fcf17d8a0494 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -174,6 +174,12 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
 	if (!plat->dma_cfg)
 		return -ENOMEM;
 
+	plat->safety_feat_cfg = devm_kzalloc(&pdev->dev,
+					     sizeof(*plat->safety_feat_cfg),
+					     GFP_KERNEL);
+	if (!plat->safety_feat_cfg)
+		return -ENOMEM;
+
 	/* Enable pci device */
 	ret = pci_enable_device(pdev);
 	if (ret) {
@@ -203,6 +209,16 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
 	res.wol_irq = pdev->irq;
 	res.irq = pdev->irq;
 
+	plat->safety_feat_cfg->tsoee = 1;
+	plat->safety_feat_cfg->mrxpee = 1;
+	plat->safety_feat_cfg->mestee = 1;
+	plat->safety_feat_cfg->mrxee = 1;
+	plat->safety_feat_cfg->mtxee = 1;
+	plat->safety_feat_cfg->epsi = 1;
+	plat->safety_feat_cfg->edpp = 1;
+	plat->safety_feat_cfg->prtyen = 1;
+	plat->safety_feat_cfg->tmouten = 1;
+
 	return stmmac_dvr_probe(&pdev->dev, plat, &res);
 }
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index e14a12df381b..e55a4807e3ea 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -172,6 +172,18 @@ struct stmmac_fpe_cfg {
 	enum stmmac_fpe_state lo_fpe_state;	/* Local station FPE state */
 };
 
+struct stmmac_safety_feature_cfg {
+	u32 tsoee;
+	u32 mrxpee;
+	u32 mestee;
+	u32 mrxee;
+	u32 mtxee;
+	u32 epsi;
+	u32 edpp;
+	u32 prtyen;
+	u32 tmouten;
+};
+
 struct plat_stmmacenet_data {
 	int bus_id;
 	int phy_addr;
@@ -184,6 +196,7 @@ struct plat_stmmacenet_data {
 	struct stmmac_dma_cfg *dma_cfg;
 	struct stmmac_est *est;
 	struct stmmac_fpe_cfg *fpe_cfg;
+	struct stmmac_safety_feature_cfg *safety_feat_cfg;
 	int clk_csr;
 	int has_gmac;
 	int enh_desc;

From e516f5be5b17cdae68d14206451b64d36f9588e4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 1 Jun 2021 22:00:52 +0800
Subject: [PATCH 0817/3292] hamradio: bpqether: Fix -Wunused-const-variable
 warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_PROC_FS is n, gcc warns:

drivers/net/hamradio/bpqether.c:437:36:
 warning: ‘bpq_seqops’ defined but not used [-Wunused-const-variable=]
 static const struct seq_operations bpq_seqops = {
                                    ^~~~~~~~~~
Use #ifdef macro to gurad this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/bpqether.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index 1ad6085994b1..0e623c2e8b2d 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -368,7 +368,7 @@ static int bpq_close(struct net_device *dev)
 
 /* ------------------------------------------------------------------------ */
 
-
+#ifdef CONFIG_PROC_FS
 /*
  *	Proc filesystem
  */
@@ -440,7 +440,7 @@ static const struct seq_operations bpq_seqops = {
 	.stop = bpq_seq_stop,
 	.show = bpq_seq_show,
 };
-
+#endif
 /* ------------------------------------------------------------------------ */
 
 static const struct net_device_ops bpq_netdev_ops = {

From 6990c7f44c0dee4de6cf43fee0e5e7680c1a973a Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 1 Jun 2021 22:01:48 +0800
Subject: [PATCH 0818/3292] cxgb4: Fix -Wunused-const-variable warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_PCI_IOV is n, make W=1 warns:

drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:3909:33:
 warning: ‘cxgb4_mgmt_ethtool_ops’ defined but not used [-Wunused-const-variable=]
 static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
                                 ^~~~~~~~~~~~~~~~~~~~~~

Move it into #ifdef block to fix this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 421bd9b88028..b730aa1cb141 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3894,7 +3894,6 @@ static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
 	.ndo_set_vf_vlan        = cxgb4_mgmt_set_vf_vlan,
 	.ndo_set_vf_link_state	= cxgb4_mgmt_set_vf_link_state,
 };
-#endif
 
 static void cxgb4_mgmt_get_drvinfo(struct net_device *dev,
 				   struct ethtool_drvinfo *info)
@@ -3909,6 +3908,7 @@ static void cxgb4_mgmt_get_drvinfo(struct net_device *dev,
 static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
 	.get_drvinfo       = cxgb4_mgmt_get_drvinfo,
 };
+#endif
 
 static void notify_fatal_err(struct work_struct *work)
 {

From 0a206f9d9e23967639b204094c8dc073eb2997a4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 1 Jun 2021 22:02:38 +0800
Subject: [PATCH 0819/3292] igb: Fix -Wunused-const-variable warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_IGB_HWMON is n, gcc warns:

drivers/net/ethernet/intel/igb/e1000_82575.c:2765:17:
 warning: ‘e1000_emc_therm_limit’ defined but not used [-Wunused-const-variable=]
 static const u8 e1000_emc_therm_limit[4] = {
                 ^~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/intel/igb/e1000_82575.c:2759:17:
 warning: ‘e1000_emc_temp_data’ defined but not used [-Wunused-const-variable=]
 static const u8 e1000_emc_temp_data[4] = {
                 ^~~~~~~~~~~~~~~~~~~

Move it into #ifdef block to fix this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igb/e1000_82575.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 50863fd87d53..cbe92fd23a70 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -2756,6 +2756,7 @@ out:
 	return ret_val;
 }
 
+#ifdef CONFIG_IGB_HWMON
 static const u8 e1000_emc_temp_data[4] = {
 	E1000_EMC_INTERNAL_DATA,
 	E1000_EMC_DIODE1_DATA,
@@ -2769,7 +2770,6 @@ static const u8 e1000_emc_therm_limit[4] = {
 	E1000_EMC_DIODE3_THERM_LIMIT
 };
 
-#ifdef CONFIG_IGB_HWMON
 /**
  *  igb_get_thermal_sensor_data_generic - Gathers thermal sensor data
  *  @hw: pointer to hardware structure

From b923cda9638860d6fbb688cfc4c939ff13df31b5 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 22:13:58 +0800
Subject: [PATCH 0820/3292] net: dcb: Return the correct errno code

When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 51f80a2f8194..b441ab330fd3 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1381,7 +1381,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd,
 
 	skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh);
 	if (!skb)
-		return -ENOBUFS;
+		return -ENOMEM;
 
 	if (dcbx_ver == DCB_CAP_DCBX_VER_IEEE)
 		err = dcbnl_ieee_fill(skb, dev);
@@ -1781,7 +1781,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq,
 				 nlh->nlmsg_flags, &reply_nlh);
 	if (!reply_skb)
-		return -ENOBUFS;
+		return -ENOMEM;
 
 	ret = fn->cb(netdev, nlh, nlh->nlmsg_seq, tb, reply_skb);
 	if (ret < 0) {

From ca746c55a7e6e597cc2d29a094082d345b2c33c9 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 22:14:07 +0800
Subject: [PATCH 0821/3292] net: Return the correct errno code

When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d9bccad65e2b..750f388a4a68 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -318,7 +318,7 @@ lookup_protocol:
 
 	WARN_ON(!answer_prot->slab);
 
-	err = -ENOBUFS;
+	err = -ENOMEM;
 	sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
 	if (!sk)
 		goto out;

From 26d3f69c500cf950299e9b3d76b6e8e2c6fe00bb Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 22:16:10 +0800
Subject: [PATCH 0822/3292] macvlan: Fix a typo

underlaying  ==> underlying

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 1b998aa481f8..80de9768ecd4 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1781,7 +1781,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 		unregister_netdevice_many(&list_kill);
 		break;
 	case NETDEV_PRE_TYPE_CHANGE:
-		/* Forbid underlaying device to change its type. */
+		/* Forbid underlying device to change its type. */
 		return NOTIFY_BAD;
 
 	case NETDEV_NOTIFY_PEERS:

From ec674565fbc6af6fce8451cfd7f215c577216661 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 22:16:25 +0800
Subject: [PATCH 0823/3292] gtp: Fix a typo

Suppport  ==> Support

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 39c00f050fbd..1c9023d47e00 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -436,7 +436,7 @@ static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
 	gtp1->length	= htons(payload_len);
 	gtp1->tid	= htonl(pctx->u.v1.o_tei);
 
-	/* TODO: Suppport for extension header, sequence number and N-PDU.
+	/* TODO: Support for extension header, sequence number and N-PDU.
 	 *	 Update the length field if any of them is available.
 	 */
 }

From e9a0bf6d002f28bae3f3fcca314d4ea79b21fff0 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 22:16:35 +0800
Subject: [PATCH 0824/3292] vrf: Fix a typo

possibile  ==> possible

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 503e2fd7ce51..07eaef5e73c2 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -274,7 +274,7 @@ vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack)
 	int res;
 
 	/* we pre-allocate elements used in the spin-locked section (so that we
-	 * keep the spinlock as short as possibile).
+	 * keep the spinlock as short as possible).
 	 */
 	new_me = vrf_map_elem_alloc(GFP_KERNEL);
 	if (!new_me)

From f62c4f3870d8114029d3ebfc7ec4421728f07f83 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 22:18:13 +0800
Subject: [PATCH 0825/3292] net: usb: Fix spelling mistakes

wierdness  ==> weirdness
multicat  ==> multicast
limite  ==> limit
adddress  ==> address
operater  ==> operator
intial  ==> initial
smaler  ==> smaller
Communcation  ==> Communication
funcitons  ==> functions
everytime  ==> every time
Neigbor  ==> Neighbor
performace  ==> performance

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c  | 2 +-
 drivers/net/usb/cdc_mbim.c   | 6 +++---
 drivers/net/usb/cdc_ncm.c    | 4 ++--
 drivers/net/usb/int51x1.c    | 2 +-
 drivers/net/usb/lan78xx.c    | 2 +-
 drivers/net/usb/lg-vl600.c   | 4 ++--
 drivers/net/usb/r8152.c      | 4 ++--
 drivers/net/usb/rndis_host.c | 2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 7eb0109e9baa..eb3817d70f2b 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -217,7 +217,7 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
 		goto bad_desc;
 	}
 skip:
-	/* Communcation class functions with bmCapabilities are not
+	/* Communication class functions with bmCapabilities are not
 	 * RNDIS.  But some Wireless class RNDIS functions use
 	 * bmCapabilities for their own purpose. The failsafe is
 	 * therefore applied only to Communication class RNDIS
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 42fb75057c15..4c4ab7b38d78 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -301,8 +301,8 @@ error:
 	return NULL;
 }
 
-/* Some devices are known to send Neigbor Solicitation messages and
- * require Neigbor Advertisement replies.  The IPv6 core will not
+/* Some devices are known to send Neighbor Solicitation messages and
+ * require Neighbor Advertisement replies.  The IPv6 core will not
  * respond since IFF_NOARP is set, so we must handle them ourselves.
  */
 static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci)
@@ -589,7 +589,7 @@ static const struct driver_info cdc_mbim_info_zlp = {
  *
  * Note: The current implementation of this feature restricts each NTB
  * to a single NDP, implying that multiplexed sessions cannot share an
- * NTB. This might affect performace for multiplexed sessions.
+ * NTB. This might affect performance for multiplexed sessions.
  */
 static const struct driver_info cdc_mbim_info_ndp_to_end = {
 	.description = "CDC MBIM",
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 783d6139fdfa..c67f11e0e9a7 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -640,7 +640,7 @@ out:
 	/* set MTU to max supported by the device if necessary */
 	dev->net->mtu = min_t(int, dev->net->mtu, ctx->max_datagram_size - cdc_ncm_eth_hlen(dev));
 
-	/* do not exceed operater preferred MTU */
+	/* do not exceed operator preferred MTU */
 	if (ctx->mbim_extended_desc) {
 		mbim_mtu = le16_to_cpu(ctx->mbim_extended_desc->wMTU);
 		if (mbim_mtu != 0 && mbim_mtu < dev->net->mtu)
@@ -697,7 +697,7 @@ static int cdc_ncm_setup(struct usbnet *dev)
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
 	u32 def_rx, def_tx;
 
-	/* be conservative when selecting intial buffer size to
+	/* be conservative when selecting initial buffer size to
 	 * increase the number of hosts this will work for
 	 */
 	def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX,
diff --git a/drivers/net/usb/int51x1.c b/drivers/net/usb/int51x1.c
index ed05f992c612..6fde41550de1 100644
--- a/drivers/net/usb/int51x1.c
+++ b/drivers/net/usb/int51x1.c
@@ -61,7 +61,7 @@ static struct sk_buff *int51x1_tx_fixup(struct usbnet *dev,
 	int need_tail = 0;
 	__le16 *len;
 
-	/* if packet and our header is smaler than 64 pad to 64 (+ ZLP) */
+	/* if packet and our header is smaller than 64 pad to 64 (+ ZLP) */
 	if ((pack_with_header_len) < dev->maxpacket)
 		need_tail = dev->maxpacket - pack_with_header_len + 1;
 	/*
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 02bce40a67e5..25489389ea49 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -298,7 +298,7 @@ struct lan78xx_net;
 struct lan78xx_priv {
 	struct lan78xx_net *dev;
 	u32 rfe_ctl;
-	u32 mchash_table[DP_SEL_VHF_HASH_LEN]; /* multicat hash table */
+	u32 mchash_table[DP_SEL_VHF_HASH_LEN]; /* multicast hash table */
 	u32 pfilter_table[NUM_OF_MAF][2]; /* perfect filter table */
 	u32 vlan_table[DP_SEL_VHF_VLAN_LEN];
 	struct mutex dataport_mutex; /* for dataport access */
diff --git a/drivers/net/usb/lg-vl600.c b/drivers/net/usb/lg-vl600.c
index 217a2d8fa47b..b2495fa80171 100644
--- a/drivers/net/usb/lg-vl600.c
+++ b/drivers/net/usb/lg-vl600.c
@@ -31,7 +31,7 @@
  * Windows/Mac drivers do send a couple of such frames to the device
  * during initialisation, with protocol set to 0x0906 or 0x0b06 and (what
  * seems to be) a flag in the .dummy_flags.  This doesn't seem necessary
- * for modem operation but can possibly be used for GPS or other funcitons.
+ * for modem operation but can possibly be used for GPS or other functions.
  */
 
 struct vl600_frame_hdr {
@@ -72,7 +72,7 @@ static int vl600_bind(struct usbnet *dev, struct usb_interface *intf)
 	/* ARP packets don't go through, but they're also of no use.  The
 	 * subnet has only two hosts anyway: us and the gateway / DHCP
 	 * server (probably simulated by modem firmware or network operator)
-	 * whose address changes everytime we connect to the intarwebz and
+	 * whose address changes every time we connect to the intarwebz and
 	 * who doesn't bother answering ARP requests either.  So hardware
 	 * addresses have no meaning, the destination and the source of every
 	 * packet depend only on whether it is on the IN or OUT endpoint.  */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 21e6b9b6776c..85039e17f4cd 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -2445,7 +2445,7 @@ static int rx_bottom(struct r8152 *tp, int budget)
 			unsigned int pkt_len, rx_frag_head_sz;
 			struct sk_buff *skb;
 
-			/* limite the skb numbers for rx_queue */
+			/* limit the skb numbers for rx_queue */
 			if (unlikely(skb_queue_len(&tp->rx_queue) >= 1000))
 				break;
 
@@ -8211,7 +8211,7 @@ static int rtl8152_post_reset(struct usb_interface *intf)
 	if (!tp)
 		return 0;
 
-	/* reset the MAC adddress in case of policy change */
+	/* reset the MAC address in case of policy change */
 	if (determine_ethernet_addr(tp, &sa) >= 0) {
 		rtnl_lock();
 		dev_set_mac_address (tp->netdev, &sa, NULL);
diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index f813ca9dec53..85a8b96e39a6 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -324,7 +324,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
 	 * For RX we handle drivers that zero-pad to end-of-packet.
 	 * Don't let userspace change these settings.
 	 *
-	 * NOTE: there still seems to be wierdness here, as if we need
+	 * NOTE: there still seems to be weirdness here, as if we need
 	 * to do some more things to make sure WinCE targets accept this.
 	 * They default to jumbograms of 8KB or 16KB, which is absurd
 	 * for such low data rates and which is also more than Linux

From e65c27938d8e3dd67d55049a82f27e56ca31e728 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 1 Jun 2021 22:18:59 +0800
Subject: [PATCH 0826/3292] net: mdio: Fix spelling mistakes

informations  ==> information
typicaly  ==> typically
derrive  ==> derive
eventhough  ==> even though

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-bcm-unimac.c     | 2 +-
 drivers/net/mdio/mdio-mux-bcm-iproc.c  | 2 +-
 drivers/net/mdio/mdio-mux-meson-g12a.c | 2 +-
 drivers/net/mdio/of_mdio.c             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index 5d171e7f118d..bfc9be23c973 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -203,7 +203,7 @@ static void unimac_mdio_clk_set(struct unimac_mdio_priv *priv)
 		return;
 	}
 
-	/* The MDIO clock is the reference clock (typicaly 250Mhz) divided by
+	/* The MDIO clock is the reference clock (typically 250Mhz) divided by
 	 * 2 x (MDIO_CLK_DIV + 1)
 	 */
 	reg = unimac_mdio_readl(priv, MDIO_CFG);
diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 03261e6b9ceb..239e88c7a272 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -65,7 +65,7 @@ static void mdio_mux_iproc_config(struct iproc_mdiomux_desc *md)
 	writel(val, md->base + MDIO_SCAN_CTRL_OFFSET);
 
 	if (md->core_clk) {
-		/* use rate adjust regs to derrive the mdio's operating
+		/* use rate adjust regs to derive the mdio's operating
 		 * frequency from the specified core clock
 		 */
 		divisor = clk_get_rate(md->core_clk) / MDIO_OPERATING_FREQUENCY;
diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c
index bf86c9c7a288..b8866bc3f2e8 100644
--- a/drivers/net/mdio/mdio-mux-meson-g12a.c
+++ b/drivers/net/mdio/mdio-mux-meson-g12a.c
@@ -95,7 +95,7 @@ static int g12a_ephy_pll_enable(struct clk_hw *hw)
 
 	/* Poll on the digital lock instead of the usual analog lock
 	 * This is done because bit 31 is unreliable on some SoC. Bit
-	 * 31 may indicate that the PLL is not lock eventhough the clock
+	 * 31 may indicate that the PLL is not lock even though the clock
 	 * is actually running
 	 */
 	return readl_poll_timeout(pll->base + ETH_PLL_CTL0, val,
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 094494a68ddf..8e97d5b825f5 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -466,7 +466,7 @@ EXPORT_SYMBOL(of_phy_get_and_connect);
  * of_phy_is_fixed_link() and of_phy_register_fixed_link() must
  * support two DT bindings:
  * - the old DT binding, where 'fixed-link' was a property with 5
- *   cells encoding various informations about the fixed PHY
+ *   cells encoding various information about the fixed PHY
  * - the new DT binding, where 'fixed-link' is a sub-node of the
  *   Ethernet device.
  */

From 710b797cf61b318995db6d31767a532162db113d Mon Sep 17 00:00:00 2001
From: Sharath Chandra Vurukala <sharathv@codeaurora.org>
Date: Wed, 2 Jun 2021 00:58:34 +0530
Subject: [PATCH 0827/3292] docs: networking: Add documentation for MAPv5

Adding documentation explaining the new MAPv4/v5 packet formats
and the corresponding checksum offload headers.

Signed-off-by: Sharath Chandra Vurukala <sharathv@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../cellular/qualcomm/rmnet.rst               | 126 ++++++++++++++++--
 1 file changed, 114 insertions(+), 12 deletions(-)

diff --git a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst
index 70643b58de05..4118384cf8eb 100644
--- a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst
+++ b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst
@@ -27,34 +27,136 @@ these MAP frames and send them to appropriate PDN's.
 2. Packet format
 ================
 
-a. MAP packet (data / control)
+a. MAP packet v1 (data / control)
 
-MAP header has the same endianness of the IP packet.
+MAP header fields are in big endian format.
 
 Packet format::
 
-  Bit             0             1           2-7      8 - 15           16 - 31
+  Bit             0             1           2-7      8-15           16-31
   Function   Command / Data   Reserved     Pad   Multiplexer ID    Payload length
-  Bit            32 - x
-  Function     Raw  Bytes
+
+  Bit            32-x
+  Function      Raw bytes
 
 Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
-or data packet. Control packet is used for transport level flow control. Data
+or data packet. Command packet is used for transport level flow control. Data
 packets are standard IP packets.
 
-Reserved bits are usually zeroed out and to be ignored by receiver.
+Reserved bits must be zero when sent and ignored when received.
 
-Padding is number of bytes to be added for 4 byte alignment if required by
-hardware.
+Padding is the number of bytes to be appended to the payload to
+ensure 4 byte alignment.
 
 Multiplexer ID is to indicate the PDN on which data has to be sent.
 
 Payload length includes the padding length but does not include MAP header
 length.
 
-b. MAP packet (command specific)::
+b. Map packet v4 (data / control)
 
-    Bit             0             1           2-7      8 - 15           16 - 31
+MAP header fields are in big endian format.
+
+Packet format::
+
+  Bit             0             1           2-7      8-15           16-31
+  Function   Command / Data   Reserved     Pad   Multiplexer ID    Payload length
+
+  Bit            32-(x-33)      (x-32)-x
+  Function      Raw bytes      Checksum offload header
+
+Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
+or data packet. Command packet is used for transport level flow control. Data
+packets are standard IP packets.
+
+Reserved bits must be zero when sent and ignored when received.
+
+Padding is the number of bytes to be appended to the payload to
+ensure 4 byte alignment.
+
+Multiplexer ID is to indicate the PDN on which data has to be sent.
+
+Payload length includes the padding length but does not include MAP header
+length.
+
+Checksum offload header, has the information about the checksum processing done
+by the hardware.Checksum offload header fields are in big endian format.
+
+Packet format::
+
+  Bit             0-14        15              16-31
+  Function      Reserved   Valid     Checksum start offset
+
+  Bit                31-47                    48-64
+  Function      Checksum length           Checksum value
+
+Reserved bits must be zero when sent and ignored when received.
+
+Valid bit indicates whether the partial checksum is calculated and is valid.
+Set to 1, if its is valid. Set to 0 otherwise.
+
+Padding is the number of bytes to be appended to the payload to
+ensure 4 byte alignment.
+
+Checksum start offset, Indicates the offset in bytes from the beginning of the
+IP header, from which modem computed checksum.
+
+Checksum length is the Length in bytes starting from CKSUM_START_OFFSET,
+over which checksum is computed.
+
+Checksum value, indicates the checksum computed.
+
+c. MAP packet v5 (data / control)
+
+MAP header fields are in big endian format.
+
+Packet format::
+
+  Bit             0             1         2-7      8-15           16-31
+  Function   Command / Data  Next header  Pad   Multiplexer ID   Payload length
+
+  Bit            32-x
+  Function      Raw bytes
+
+Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
+or data packet. Command packet is used for transport level flow control. Data
+packets are standard IP packets.
+
+Next header is used to indicate the presence of another header, currently is
+limited to checksum header.
+
+Padding is the number of bytes to be appended to the payload to
+ensure 4 byte alignment.
+
+Multiplexer ID is to indicate the PDN on which data has to be sent.
+
+Payload length includes the padding length but does not include MAP header
+length.
+
+d. Checksum offload header v5
+
+Checksum offload header fields are in big endian format.
+
+  Bit            0 - 6          7               8-15              16-31
+  Function     Header Type    Next Header     Checksum Valid    Reserved
+
+Header Type is to indicate the type of header, this usually is set to CHECKSUM
+
+Header types
+= ==========================================
+0 Reserved
+1 Reserved
+2 checksum header
+
+Checksum Valid is to indicate whether the header checksum is valid. Value of 1
+implies that checksum is calculated on this packet and is valid, value of 0
+indicates that the calculated packet checksum is invalid.
+
+Reserved bits must be zero when sent and ignored when received.
+
+e. MAP packet v1/v5 (command specific)::
+
+    Bit             0             1         2-7      8 - 15           16 - 31
     Function   Command         Reserved     Pad   Multiplexer ID    Payload length
     Bit          32 - 39        40 - 45    46 - 47       48 - 63
     Function   Command name    Reserved   Command Type   Reserved
@@ -74,7 +176,7 @@ Command types
 3 is for error during processing of commands
 = ==========================================
 
-c. Aggregation
+f. Aggregation
 
 Aggregation is multiple MAP packets (can be data or command) delivered to
 rmnet in a single linear skb. rmnet will process the individual

From e1d9a90a9bfdb0735062d3adb16b07314b4b7b01 Mon Sep 17 00:00:00 2001
From: Sharath Chandra Vurukala <sharathv@codeaurora.org>
Date: Wed, 2 Jun 2021 00:58:35 +0530
Subject: [PATCH 0828/3292] net: ethernet: rmnet: Support for ingress MAPv5
 checksum offload

Adding support for processing of MAPv5 downlink packets.
It involves parsing the Mapv5 packet and checking the csum header
to know whether the hardware has validated the checksum and is
valid or not.

Based on the checksum valid bit the corresponding stats are
incremented and skb->ip_summed is marked either CHECKSUM_UNNECESSARY
or left as CHEKSUM_NONE to let network stack revalidate the checksum
and update the respective snmp stats.

Current MAPV1 header has been modified, the reserved field in the
Mapv1 header is now used for next header indication.

Signed-off-by: Sharath Chandra Vurukala <sharathv@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_handlers.c  | 17 ++++--
 .../net/ethernet/qualcomm/rmnet/rmnet_map.h   |  3 +-
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 57 ++++++++++++++++++-
 include/linux/if_rmnet.h                      | 30 +++++++++-
 include/uapi/linux/if_link.h                  |  1 +
 5 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 0be5ac7ab261..706a225075a3 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, 2021, The Linux Foundation. All rights reserved.
  *
  * RMNET Data ingress/egress handler
  */
@@ -82,11 +82,16 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 
 	skb->dev = ep->egress_dev;
 
-	/* Subtract MAP header */
-	skb_pull(skb, sizeof(struct rmnet_map_header));
-	rmnet_set_skb_proto(skb);
-
-	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
+	if ((port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV5) &&
+	    (map_header->flags & MAP_NEXT_HEADER_FLAG)) {
+		if (rmnet_map_process_next_hdr_packet(skb, len))
+			goto free_skb;
+		skb_pull(skb, sizeof(*map_header));
+		rmnet_set_skb_proto(skb);
+	} else if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
+		/* Subtract MAP header */
+		skb_pull(skb, sizeof(*map_header));
+		rmnet_set_skb_proto(skb);
 		if (!rmnet_map_checksum_downlink_packet(skb, len + pad))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 2aea153f4247..1a399bfa07d2 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, 2021, The Linux Foundation. All rights reserved.
  */
 
 #ifndef _RMNET_MAP_H_
@@ -48,5 +48,6 @@ void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port);
 int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len);
 void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
 				      struct net_device *orig_dev);
+int rmnet_map_process_next_hdr_packet(struct sk_buff *skb, u16 len);
 
 #endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 0ac2ff828320..5c018bd64689 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, 2021, The Linux Foundation. All rights reserved.
  *
  * RMNET Data MAP protocol
  */
@@ -8,6 +8,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
+#include <linux/bitfield.h>
 #include "rmnet_config.h"
 #include "rmnet_map.h"
 #include "rmnet_private.h"
@@ -300,8 +301,11 @@ done:
 struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 				      struct rmnet_port *port)
 {
+	struct rmnet_map_v5_csum_header *next_hdr = NULL;
 	struct rmnet_map_header *maph;
+	void *data = skb->data;
 	struct sk_buff *skbn;
+	u8 nexthdr_type;
 	u32 packet_len;
 
 	if (skb->len == 0)
@@ -310,8 +314,18 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	maph = (struct rmnet_map_header *)skb->data;
 	packet_len = ntohs(maph->pkt_len) + sizeof(*maph);
 
-	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
 		packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
+	} else if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV5) {
+		if (!(maph->flags & MAP_CMD_FLAG)) {
+			packet_len += sizeof(*next_hdr);
+			if (maph->flags & MAP_NEXT_HEADER_FLAG)
+				next_hdr = data + sizeof(*maph);
+			else
+				/* Mapv5 data pkt without csum hdr is invalid */
+				return NULL;
+		}
+	}
 
 	if (((int)skb->len - (int)packet_len) < 0)
 		return NULL;
@@ -320,6 +334,13 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	if (!maph->pkt_len)
 		return NULL;
 
+	if (next_hdr) {
+		nexthdr_type = u8_get_bits(next_hdr->header_info,
+					   MAPV5_HDRINFO_HDR_TYPE_FMASK);
+		if (nexthdr_type != RMNET_MAP_HEADER_TYPE_CSUM_OFFLOAD)
+			return NULL;
+	}
+
 	skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC);
 	if (!skbn)
 		return NULL;
@@ -414,3 +435,35 @@ sw_csum:
 
 	priv->stats.csum_sw++;
 }
+
+/* Process a MAPv5 packet header */
+int rmnet_map_process_next_hdr_packet(struct sk_buff *skb,
+				      u16 len)
+{
+	struct rmnet_priv *priv = netdev_priv(skb->dev);
+	struct rmnet_map_v5_csum_header *next_hdr;
+	u8 nexthdr_type;
+
+	next_hdr = (struct rmnet_map_v5_csum_header *)(skb->data +
+			sizeof(struct rmnet_map_header));
+
+	nexthdr_type = u8_get_bits(next_hdr->header_info,
+				   MAPV5_HDRINFO_HDR_TYPE_FMASK);
+
+	if (nexthdr_type != RMNET_MAP_HEADER_TYPE_CSUM_OFFLOAD)
+		return -EINVAL;
+
+	if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) {
+		priv->stats.csum_sw++;
+	} else if (next_hdr->csum_info & MAPV5_CSUMINFO_VALID_FLAG) {
+		priv->stats.csum_ok++;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	} else {
+		priv->stats.csum_valid_unset++;
+	}
+
+	/* Pull csum v5 header */
+	skb_pull(skb, sizeof(*next_hdr));
+
+	return 0;
+}
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index 4efb537f57f3..be17610a981e 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only
- * Copyright (c) 2013-2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2013-2019, 2021 The Linux Foundation. All rights reserved.
  */
 
 #ifndef _LINUX_IF_RMNET_H_
@@ -12,10 +12,12 @@ struct rmnet_map_header {
 }  __aligned(1);
 
 /* rmnet_map_header flags field:
- *  PAD_LEN:	number of pad bytes following packet data
- *  CMD:	1 = packet contains a MAP command; 0 = packet contains data
+ *  PAD_LEN:	  number of pad bytes following packet data
+ *  CMD:	  1 = packet contains a MAP command; 0 = packet contains data
+ *  NEXT_HEADER: 1 = packet contains V5 CSUM header 0 = no V5 CSUM header
  */
 #define MAP_PAD_LEN_MASK		GENMASK(5, 0)
+#define MAP_NEXT_HEADER_FLAG		BIT(6)
 #define MAP_CMD_FLAG			BIT(7)
 
 struct rmnet_map_dl_csum_trailer {
@@ -45,4 +47,26 @@ struct rmnet_map_ul_csum_header {
 #define MAP_CSUM_UL_UDP_FLAG		BIT(14)
 #define MAP_CSUM_UL_ENABLED_FLAG	BIT(15)
 
+/* MAP CSUM headers */
+struct rmnet_map_v5_csum_header {
+	u8 header_info;
+	u8 csum_info;
+	__be16 reserved;
+} __aligned(1);
+
+/* v5 header_info field
+ * NEXT_HEADER: represents whether there is any next header
+ * HEADER_TYPE: represents the type of this header
+ *
+ * csum_info field
+ * CSUM_VALID_OR_REQ:
+ * 1 = for UL, checksum computation is requested.
+ * 1 = for DL, validated the checksum and has found it valid
+ */
+
+#define MAPV5_HDRINFO_NXT_HDR_FLAG	BIT(0)
+#define MAPV5_HDRINFO_HDR_TYPE_FMASK	GENMASK(7, 1)
+#define MAPV5_CSUMINFO_VALID_FLAG	BIT(7)
+
+#define RMNET_MAP_HEADER_TYPE_CSUM_OFFLOAD 2
 #endif /* !(_LINUX_IF_RMNET_H_) */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index cd5b382a4138..1f753dcd85e1 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1236,6 +1236,7 @@ enum {
 #define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
 #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
 #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5           (1U << 4)
 
 enum {
 	IFLA_RMNET_UNSPEC,

From b6e5d27e32ef6089d316ce7e1ecaf595584d4b84 Mon Sep 17 00:00:00 2001
From: Sharath Chandra Vurukala <sharathv@codeaurora.org>
Date: Wed, 2 Jun 2021 00:58:36 +0530
Subject: [PATCH 0829/3292] net: ethernet: rmnet: Add support for MAPv5 egress
 packets

Adding support for MAPv5 egress packets.

This involves adding the MAPv5 header and setting the csum_valid_required
in the checksum header to request HW compute the checksum.

Corresponding stats are incremented based on whether the checksum is
computed in software or HW.

New stat has been added which represents the count of packets whose
checksum is calculated by the HW.

Signed-off-by: Sharath Chandra Vurukala <sharathv@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_config.h    |  4 +-
 .../ethernet/qualcomm/rmnet/rmnet_handlers.c  | 23 +++--
 .../net/ethernet/qualcomm/rmnet/rmnet_map.h   |  8 +-
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 92 +++++++++++++++++--
 .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c   |  1 +
 include/uapi/linux/if_link.h                  |  1 +
 6 files changed, 111 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 8d8d4690a074..8e64ca98068d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018, 2021 The Linux Foundation.
+ * All rights reserved.
  *
  * RMNET Data configuration engine
  */
@@ -56,6 +57,7 @@ struct rmnet_priv_stats {
 	u64 csum_fragmented_pkt;
 	u64 csum_skipped;
 	u64 csum_sw;
+	u64 csum_hw;
 };
 
 struct rmnet_priv {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 706a225075a3..2504d0363b6b 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -133,7 +133,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 				    struct rmnet_port *port, u8 mux_id,
 				    struct net_device *orig_dev)
 {
-	int required_headroom, additional_header_len;
+	int required_headroom, additional_header_len, csum_type = 0;
 	struct rmnet_map_header *map_header;
 
 	additional_header_len = 0;
@@ -141,18 +141,23 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 
 	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4) {
 		additional_header_len = sizeof(struct rmnet_map_ul_csum_header);
-		required_headroom += additional_header_len;
+		csum_type = RMNET_FLAGS_EGRESS_MAP_CKSUMV4;
+	} else if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV5) {
+		additional_header_len = sizeof(struct rmnet_map_v5_csum_header);
+		csum_type = RMNET_FLAGS_EGRESS_MAP_CKSUMV5;
 	}
 
-	if (skb_headroom(skb) < required_headroom) {
-		if (pskb_expand_head(skb, required_headroom, 0, GFP_ATOMIC))
-			return -ENOMEM;
-	}
+	required_headroom += additional_header_len;
 
-	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4)
-		rmnet_map_checksum_uplink_packet(skb, orig_dev);
+	if (skb_cow_head(skb, required_headroom) < 0)
+		return -ENOMEM;
 
-	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
+	if (csum_type)
+		rmnet_map_checksum_uplink_packet(skb, port, orig_dev,
+						 csum_type);
+
+	map_header = rmnet_map_add_map_header(skb, additional_header_len,
+					      port, 0);
 	if (!map_header)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 1a399bfa07d2..e5a0b38f7dbe 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -43,11 +43,15 @@ enum rmnet_map_commands {
 struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 				      struct rmnet_port *port);
 struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
-						  int hdrlen, int pad);
+						  int hdrlen,
+						  struct rmnet_port *port,
+						  int pad);
 void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port);
 int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len);
 void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
-				      struct net_device *orig_dev);
+				      struct rmnet_port *port,
+				      struct net_device *orig_dev,
+				      int csum_type);
 int rmnet_map_process_next_hdr_packet(struct sk_buff *skb, u16 len);
 
 #endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 5c018bd64689..6492ec5bdec4 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -251,12 +251,69 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
 }
 #endif
 
+static void rmnet_map_v5_checksum_uplink_packet(struct sk_buff *skb,
+						struct rmnet_port *port,
+						struct net_device *orig_dev)
+{
+	struct rmnet_priv *priv = netdev_priv(orig_dev);
+	struct rmnet_map_v5_csum_header *ul_header;
+
+	ul_header = skb_push(skb, sizeof(*ul_header));
+	memset(ul_header, 0, sizeof(*ul_header));
+	ul_header->header_info = u8_encode_bits(RMNET_MAP_HEADER_TYPE_CSUM_OFFLOAD,
+						MAPV5_HDRINFO_HDR_TYPE_FMASK);
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		void *iph = ip_hdr(skb);
+		__sum16 *check;
+		void *trans;
+		u8 proto;
+
+		if (skb->protocol != htons(ETH_P_IP) &&
+		    skb->protocol != htons(ETH_P_IPV6)) {
+			priv->stats.csum_err_invalid_ip_version++;
+			goto sw_csum;
+		}
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			u16 ip_len = ((struct iphdr *)iph)->ihl * 4;
+
+			proto = ((struct iphdr *)iph)->protocol;
+			trans = iph + ip_len;
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+#if IS_ENABLED(CONFIG_IPV6)
+			u16 ip_len = sizeof(struct ipv6hdr);
+
+			proto = ((struct ipv6hdr *)iph)->nexthdr;
+			trans = iph + ip_len;
+#else
+			priv->stats.csum_err_invalid_ip_version++;
+			goto sw_csum;
+#endif /* CONFIG_IPV6 */
+		}
+
+		check = rmnet_map_get_csum_field(proto, trans);
+		if (check) {
+			skb->ip_summed = CHECKSUM_NONE;
+			/* Ask for checksum offloading */
+			ul_header->csum_info |= MAPV5_CSUMINFO_VALID_FLAG;
+			priv->stats.csum_hw++;
+			return;
+		}
+	}
+
+sw_csum:
+	priv->stats.csum_sw++;
+}
+
 /* Adds MAP header to front of skb->data
  * Padding is calculated and set appropriately in MAP header. Mux ID is
  * initialized to 0.
  */
 struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
-						  int hdrlen, int pad)
+						  int hdrlen,
+						  struct rmnet_port *port,
+						  int pad)
 {
 	struct rmnet_map_header *map_header;
 	u32 padding, map_datalen;
@@ -267,6 +324,10 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 			skb_push(skb, sizeof(struct rmnet_map_header));
 	memset(map_header, 0, sizeof(struct rmnet_map_header));
 
+	/* Set next_hdr bit for csum offload packets */
+	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV5)
+		map_header->flags |= MAP_NEXT_HEADER_FLAG;
+
 	if (pad == RMNET_MAP_NO_PAD_BYTES) {
 		map_header->pkt_len = htons(map_datalen);
 		return map_header;
@@ -393,11 +454,8 @@ int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
 	return 0;
 }
 
-/* Generates UL checksum meta info header for IPv4 and IPv6 over TCP and UDP
- * packets that are supported for UL checksum offload.
- */
-void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
-				      struct net_device *orig_dev)
+static void rmnet_map_v4_checksum_uplink_packet(struct sk_buff *skb,
+						struct net_device *orig_dev)
 {
 	struct rmnet_priv *priv = netdev_priv(orig_dev);
 	struct rmnet_map_ul_csum_header *ul_header;
@@ -416,10 +474,12 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
 
 		if (skb->protocol == htons(ETH_P_IP)) {
 			rmnet_map_ipv4_ul_csum_header(iphdr, ul_header, skb);
+			priv->stats.csum_hw++;
 			return;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 #if IS_ENABLED(CONFIG_IPV6)
 			rmnet_map_ipv6_ul_csum_header(iphdr, ul_header, skb);
+			priv->stats.csum_hw++;
 			return;
 #else
 			priv->stats.csum_err_invalid_ip_version++;
@@ -436,6 +496,26 @@ sw_csum:
 	priv->stats.csum_sw++;
 }
 
+/* Generates UL checksum meta info header for IPv4 and IPv6 over TCP and UDP
+ * packets that are supported for UL checksum offload.
+ */
+void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
+				      struct rmnet_port *port,
+				      struct net_device *orig_dev,
+				      int csum_type)
+{
+	switch (csum_type) {
+	case RMNET_FLAGS_EGRESS_MAP_CKSUMV4:
+		rmnet_map_v4_checksum_uplink_packet(skb, orig_dev);
+		break;
+	case RMNET_FLAGS_EGRESS_MAP_CKSUMV5:
+		rmnet_map_v5_checksum_uplink_packet(skb, port, orig_dev);
+		break;
+	default:
+		break;
+	}
+}
+
 /* Process a MAPv5 packet header */
 int rmnet_map_process_next_hdr_packet(struct sk_buff *skb,
 				      u16 len)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 41fbd2ceeede..fe13017e9a41 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -174,6 +174,7 @@ static const char rmnet_gstrings_stats[][ETH_GSTRING_LEN] = {
 	"Checksum skipped on ip fragment",
 	"Checksum skipped",
 	"Checksum computed in software",
+	"Checksum computed in hardware",
 };
 
 static void rmnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 1f753dcd85e1..a5a7f0e64865 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1237,6 +1237,7 @@ enum {
 #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
 #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
 #define RMNET_FLAGS_INGRESS_MAP_CKSUMV5           (1U << 4)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5            (1U << 5)
 
 enum {
 	IFLA_RMNET_UNSPEC,

From 791ad7f5c17ea3d0887b94eed1e7812777f8e496 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:56:03 +0800
Subject: [PATCH 0830/3292] batman-adv: Fix spelling mistakes

Fix some spelling mistakes in comments:
containg  ==> containing
dont  ==> don't
datas  ==> data
brodcast  ==> broadcast

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 4 ++--
 net/batman-adv/hard-interface.c        | 2 +-
 net/batman-adv/hash.h                  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 7dc133cfc363..63d42dcc9324 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -395,7 +395,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
 		break;
 	case BATADV_CLAIM_TYPE_ANNOUNCE:
 		/* announcement frame
-		 * set HW SRC to the special mac containg the crc
+		 * set HW SRC to the special mac containing the crc
 		 */
 		ether_addr_copy(hw_src, mac);
 		batadv_dbg(BATADV_DBG_BLA, bat_priv,
@@ -1040,7 +1040,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
 	/* lets see if this originator is in our mesh */
 	orig_node = batadv_orig_hash_find(bat_priv, backbone_addr);
 
-	/* dont accept claims from gateways which are not in
+	/* don't accept claims from gateways which are not in
 	 * the same mesh or group.
 	 */
 	if (!orig_node)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 4a6a25d551a8..b99f64f483fc 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -403,7 +403,7 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
 		goto out;
 	}
 
-	/* >1 neighbors -> (re)brodcast */
+	/* >1 neighbors -> (re)broadcast */
 	if (rcu_dereference(hlist_next_rcu(first)))
 		goto out;
 
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 46696759f194..fb251c385a1b 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -18,7 +18,7 @@
 #include <linux/stddef.h>
 #include <linux/types.h>
 
-/* callback to a compare function.  should compare 2 element datas for their
+/* callback to a compare function.  should compare 2 element data for their
  * keys
  *
  * Return: true if same and false if not same

From 14a832498c23cf480243222189066a8006182b9d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 1 Jun 2021 11:37:07 +0100
Subject: [PATCH 0831/3292] arch: microblaze: Fix spelling mistake "vesion" ->
 "version"

There is a spelling mistake in the comment. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210601103707.9701-1-colin.king@canonical.com
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index b41f323e1fde..6d4af39e3890 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -3,7 +3,7 @@ KBUILD_DEFCONFIG := mmu_defconfig
 
 UTS_SYSNAME = -DUTS_SYSNAME=\"Linux\"
 
-# What CPU vesion are we building for, and crack it open
+# What CPU version are we building for, and crack it open
 # as major.minor.rev
 CPU_VER   := $(shell echo $(CONFIG_XILINX_MICROBLAZE0_HW_VER))
 CPU_MAJOR := $(shell echo $(CPU_VER) | cut -d '.' -f 1)

From 450605c28d571eddca39a65fdbc1338add44c6d9 Mon Sep 17 00:00:00 2001
From: Praveen Kumar <kumarpraveen@linux.microsoft.com>
Date: Mon, 31 May 2021 13:10:46 +0530
Subject: [PATCH 0832/3292] x86/hyperv: fix logical processor creation

Microsoft Hypervisor expects the logical processor index to be the same
as CPU's index during logical processor creation. Using cpu_physical_id
confuses hypervisor's scheduler. That causes the root partition not boot
when core scheduler is used.

This patch removes the call to cpu_physical_id and uses the CPU index
directly for bringing up logical processor. This scheme works for both
classic scheduler and core scheduler.

Fixes: 333abaf5abb3 (x86/hyperv: implement and use hv_smp_prepare_cpus)
Signed-off-by: Praveen Kumar <kumarpraveen@linux.microsoft.com>
Link: https://lore.kernel.org/r/20210531074046.113452-1-kumarpraveen@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/kernel/cpu/mshyperv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 22f13343b5da..4fa0a4280895 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -236,7 +236,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
 	for_each_present_cpu(i) {
 		if (i == 0)
 			continue;
-		ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
+		ret = hv_call_add_logical_proc(numa_cpu_node(i), i, i);
 		BUG_ON(ret);
 	}
 

From 9de6655cc5a6a1febc514465c87c24a0e96d8dba Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 25 May 2021 18:58:41 +0800
Subject: [PATCH 0833/3292] drivers: hv: Fix missing error code in
 vmbus_connect()

Eliminate the follow smatch warning:

drivers/hv/connection.c:236 vmbus_connect() warn: missing error code
'ret'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/1621940321-72353-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/connection.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 311cd005b3be..5e479d54918c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -232,8 +232,10 @@ int vmbus_connect(void)
 	 */
 
 	for (i = 0; ; i++) {
-		if (i == ARRAY_SIZE(vmbus_versions))
+		if (i == ARRAY_SIZE(vmbus_versions)) {
+			ret = -EDOM;
 			goto cleanup;
+		}
 
 		version = vmbus_versions[i];
 		if (version > max_version)

From fcef0e6c27ce109d2c617aa12f0bfd9f7ff47d38 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 2 Jun 2021 09:45:15 -0400
Subject: [PATCH 0834/3292] fs: dlm: fix lowcomms_start error case

This patch fixes the error path handling in lowcomms_start(). We need to
cleanup some static allocated data structure and cleanup possible
workqueue if these have started.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 36adccc4f849..b71f7eafb808 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1803,10 +1803,15 @@ static void process_send_sockets(struct work_struct *work)
 
 static void work_stop(void)
 {
-	if (recv_workqueue)
+	if (recv_workqueue) {
 		destroy_workqueue(recv_workqueue);
-	if (send_workqueue)
+		recv_workqueue = NULL;
+	}
+
+	if (send_workqueue) {
 		destroy_workqueue(send_workqueue);
+		send_workqueue = NULL;
+	}
 }
 
 static int work_start(void)
@@ -1823,6 +1828,7 @@ static int work_start(void)
 	if (!send_workqueue) {
 		log_print("can't start dlm_send");
 		destroy_workqueue(recv_workqueue);
+		recv_workqueue = NULL;
 		return -ENOMEM;
 	}
 
@@ -1960,7 +1966,7 @@ int dlm_lowcomms_start(void)
 
 	error = work_start();
 	if (error)
-		goto fail;
+		goto fail_local;
 
 	dlm_allow_conn = 1;
 
@@ -1977,6 +1983,9 @@ int dlm_lowcomms_start(void)
 fail_unlisten:
 	dlm_allow_conn = 0;
 	dlm_close_sock(&listen_con.sock);
+	work_stop();
+fail_local:
+	deinit_local();
 fail:
 	return error;
 }

From 700ab1c363c7b54c9ea3222379b33fc00ab02f7b Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 2 Jun 2021 09:45:16 -0400
Subject: [PATCH 0835/3292] fs: dlm: fix memory leak when fenced

I got some kmemleak report when a node was fenced. The user space tool
dlm_controld will therefore run some rmdir() in dlm configfs which was
triggering some memleaks. This patch stores the sps and cms attributes
which stores some handling for subdirectories of the configfs cluster
entry and free them if they get released as the parent directory gets
freed.

unreferenced object 0xffff88810d9e3e00 (size 192):
  comm "dlm_controld", pid 342, jiffies 4294698126 (age 55438.801s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 73 70 61 63 65 73 00 00  ........spaces..
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000db8b640b>] make_cluster+0x5d/0x360
    [<000000006a571db4>] configfs_mkdir+0x274/0x730
    [<00000000b094501c>] vfs_mkdir+0x27e/0x340
    [<0000000058b0adaf>] do_mkdirat+0xff/0x1b0
    [<00000000d1ffd156>] do_syscall_64+0x40/0x80
    [<00000000ab1408c8>] entry_SYSCALL_64_after_hwframe+0x44/0xae
unreferenced object 0xffff88810d9e3a00 (size 192):
  comm "dlm_controld", pid 342, jiffies 4294698126 (age 55438.801s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 63 6f 6d 6d 73 00 00 00  ........comms...
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000a7ef6ad2>] make_cluster+0x82/0x360
    [<000000006a571db4>] configfs_mkdir+0x274/0x730
    [<00000000b094501c>] vfs_mkdir+0x27e/0x340
    [<0000000058b0adaf>] do_mkdirat+0xff/0x1b0
    [<00000000d1ffd156>] do_syscall_64+0x40/0x80
    [<00000000ab1408c8>] entry_SYSCALL_64_after_hwframe+0x44/0xae

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/config.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 01ae294743e9..db717a879537 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -80,6 +80,9 @@ struct dlm_cluster {
 	unsigned int cl_new_rsb_count;
 	unsigned int cl_recover_callbacks;
 	char cl_cluster_name[DLM_LOCKSPACE_LEN];
+
+	struct dlm_spaces *sps;
+	struct dlm_comms *cms;
 };
 
 static struct dlm_cluster *config_item_to_cluster(struct config_item *i)
@@ -410,6 +413,9 @@ static struct config_group *make_cluster(struct config_group *g,
 	if (!cl || !sps || !cms)
 		goto fail;
 
+	cl->sps = sps;
+	cl->cms = cms;
+
 	config_group_init_type_name(&cl->group, name, &cluster_type);
 	config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type);
 	config_group_init_type_name(&cms->cs_group, "comms", &comms_type);
@@ -459,6 +465,9 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
 static void release_cluster(struct config_item *i)
 {
 	struct dlm_cluster *cl = config_item_to_cluster(i);
+
+	kfree(cl->sps);
+	kfree(cl->cms);
 	kfree(cl);
 }
 

From 6c6a1cc666956cbb3ac6db79ed401ee027e6f950 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 2 Jun 2021 09:45:17 -0400
Subject: [PATCH 0836/3292] fs: dlm: use alloc_ordered_workqueue

The proper way to allocate ordered workqueues is to use
alloc_ordered_workqueue() function. The current way implies an ordered
workqueue which is also required by dlm.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index b71f7eafb808..02b636d113fb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1816,15 +1816,13 @@ static void work_stop(void)
 
 static int work_start(void)
 {
-	recv_workqueue = alloc_workqueue("dlm_recv",
-					 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	recv_workqueue = alloc_ordered_workqueue("dlm_recv", WQ_MEM_RECLAIM);
 	if (!recv_workqueue) {
 		log_print("can't start dlm_recv");
 		return -ENOMEM;
 	}
 
-	send_workqueue = alloc_workqueue("dlm_send",
-					 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	send_workqueue = alloc_ordered_workqueue("dlm_send", WQ_MEM_RECLAIM);
 	if (!send_workqueue) {
 		log_print("can't start dlm_send");
 		destroy_workqueue(recv_workqueue);

From 9a4139a79403161f190cf30be7d89ac877ae3b12 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 2 Jun 2021 09:45:18 -0400
Subject: [PATCH 0837/3292] fs: dlm: move dlm allow conn

This patch checks if possible allowing new connections is allowed before
queueing the listen socket to accept new connections.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 02b636d113fb..6b150e3aa30c 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -471,6 +471,9 @@ static void lowcomms_data_ready(struct sock *sk)
 
 static void lowcomms_listen_data_ready(struct sock *sk)
 {
+	if (!dlm_allow_conn)
+		return;
+
 	queue_work(recv_workqueue, &listen_con.rwork);
 }
 
@@ -969,10 +972,6 @@ static int accept_from_sock(struct listen_connection *con)
 	struct connection *addcon;
 	unsigned int mark;
 
-	if (!dlm_allow_conn) {
-		return -1;
-	}
-
 	if (!con->sock)
 		return -ENOTCONN;
 

From ac7d5d036dc93710971f532ed57f9a6858a2b262 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 2 Jun 2021 09:45:19 -0400
Subject: [PATCH 0838/3292] fs: dlm: introduce proto values

Currently the dlm protocol values are that TCP is 0 and everything else
is SCTP. This makes it difficult to introduce possible other transport
layers. The only one user space tool dlm_controld, which I am aware of,
handles the protocol value 1 for SCTP. We change it now to handle SCTP
as 1, this will break user space API but it will fix it so we can add
possible other transport layers.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/config.c   |  2 +-
 fs/dlm/config.h   |  3 +++
 fs/dlm/lowcomms.c | 23 +++++++++++++++++++----
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index db717a879537..c91c1c73ed9d 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -952,7 +952,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_LOG_INFO           1
-#define DEFAULT_PROTOCOL           0
+#define DEFAULT_PROTOCOL           DLM_PROTO_TCP
 #define DEFAULT_MARK               0
 #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 #define DEFAULT_WAITWARN_US	   0
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index d2cd4bd20313..00374b45c748 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -23,6 +23,9 @@ struct dlm_config_node {
 
 #define DLM_MAX_ADDR_COUNT 3
 
+#define DLM_PROTO_TCP	0
+#define DLM_PROTO_SCTP	1
+
 struct dlm_config_info {
 	int ci_tcp_port;
 	int ci_buffer_size;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 6b150e3aa30c..f2a3b0401b9c 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -208,12 +208,18 @@ static int dlm_con_init(struct connection *con, int nodeid)
 	INIT_WORK(&con->rwork, process_recv_sockets);
 	init_waitqueue_head(&con->shutdown_wait);
 
-	if (dlm_config.ci_protocol == 0) {
+	switch (dlm_config.ci_protocol) {
+	case DLM_PROTO_TCP:
 		con->connect_action = tcp_connect_to_sock;
 		con->shutdown_action = dlm_tcp_shutdown;
 		con->eof_condition = tcp_eof_condition;
-	} else {
+		break;
+	case DLM_PROTO_SCTP:
 		con->connect_action = sctp_connect_to_sock;
+		break;
+	default:
+		kfree(con->rx_buf);
+		return -EINVAL;
 	}
 
 	return 0;
@@ -1968,10 +1974,19 @@ int dlm_lowcomms_start(void)
 	dlm_allow_conn = 1;
 
 	/* Start listening */
-	if (dlm_config.ci_protocol == 0)
+	switch (dlm_config.ci_protocol) {
+	case DLM_PROTO_TCP:
 		error = tcp_listen_for_all();
-	else
+		break;
+	case DLM_PROTO_SCTP:
 		error = sctp_listen_for_all(&listen_con);
+		break;
+	default:
+		log_print("Invalid protocol identifier %d set",
+			  dlm_config.ci_protocol);
+		error = -EINVAL;
+		break;
+	}
 	if (error)
 		goto fail_unlisten;
 

From d10a0b88751a0954c14e11fd988da00d3b0d5445 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 2 Jun 2021 09:45:20 -0400
Subject: [PATCH 0839/3292] fs: dlm: rename socket and app buffer defines

This patch renames DEFAULT_BUFFER_SIZE to DLM_MAX_SOCKET_BUFSIZE and
LOWCOMMS_MAX_TX_BUFFER_LEN to DLM_MAX_APP_BUFSIZE as they are proper
names to define what's behind those values. The DLM_MAX_SOCKET_BUFSIZE
defines the maximum size of buffer which can be handled on socket layer,
the DLM_MAX_APP_BUFSIZE defines the maximum size of buffer which can be
handled by the DLM application layer.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/config.c    | 4 ++--
 fs/dlm/config.h    | 2 +-
 fs/dlm/lockspace.c | 2 +-
 fs/dlm/lowcomms.c  | 4 ++--
 fs/dlm/lowcomms.h  | 2 +-
 fs/dlm/member.c    | 2 +-
 fs/dlm/midcomms.c  | 4 ++--
 fs/dlm/rcom.c      | 6 +++---
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index c91c1c73ed9d..42eee2783756 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -208,7 +208,7 @@ static int dlm_check_zero(unsigned int x)
 
 static int dlm_check_buffer_size(unsigned int x)
 {
-	if (x < DEFAULT_BUFFER_SIZE)
+	if (x < DLM_MAX_SOCKET_BUFSIZE)
 		return -EINVAL;
 
 	return 0;
@@ -962,7 +962,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 
 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
-	.ci_buffer_size = DEFAULT_BUFFER_SIZE,
+	.ci_buffer_size = DLM_MAX_SOCKET_BUFSIZE,
 	.ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
 	.ci_recover_timer = DEFAULT_RECOVER_TIMER,
 	.ci_toss_secs = DEFAULT_TOSS_SECS,
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 00374b45c748..df92b0a07fc6 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -12,7 +12,7 @@
 #ifndef __CONFIG_DOT_H__
 #define __CONFIG_DOT_H__
 
-#define DEFAULT_BUFFER_SIZE     4096
+#define DLM_MAX_SOCKET_BUFSIZE	4096
 
 struct dlm_config_node {
 	int nodeid;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 2b738be8d7e4..d71aba8c3e64 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -572,7 +572,7 @@ static int new_lockspace(const char *name, const char *cluster,
 	 * not having out of bounds issues. However on sending side 3.2
 	 * might send less.
 	 */
-	ls->ls_recover_buf = kmalloc(DEFAULT_BUFFER_SIZE, GFP_NOFS);
+	ls->ls_recover_buf = kmalloc(DLM_MAX_SOCKET_BUFSIZE, GFP_NOFS);
 	if (!ls->ls_recover_buf)
 		goto out_lkbidr;
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index f2a3b0401b9c..0ea9ae35da0b 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1556,9 +1556,9 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
 	struct dlm_msg *msg;
 	int idx;
 
-	if (len > DEFAULT_BUFFER_SIZE ||
+	if (len > DLM_MAX_SOCKET_BUFSIZE ||
 	    len < sizeof(struct dlm_header)) {
-		BUILD_BUG_ON(PAGE_SIZE < DEFAULT_BUFFER_SIZE);
+		BUILD_BUG_ON(PAGE_SIZE < DLM_MAX_SOCKET_BUFSIZE);
 		log_print("failed to allocate a buffer of size %d", len);
 		WARN_ON(1);
 		return NULL;
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 730c34317183..aaae7115c00d 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -15,7 +15,7 @@
 #include "dlm_internal.h"
 
 #define DLM_MIDCOMMS_OPT_LEN		sizeof(struct dlm_opts)
-#define LOWCOMMS_MAX_TX_BUFFER_LEN	(DEFAULT_BUFFER_SIZE - \
+#define DLM_MAX_APP_BUFSIZE		(DLM_MAX_SOCKET_BUFSIZE - \
 					 DLM_MIDCOMMS_OPT_LEN)
 
 #define CONN_HASH_SIZE 32
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 63971c594bdc..d9e1e4170eb1 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -271,7 +271,7 @@ int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
 
 	log_slots(ls, gen, num, NULL, array, array_size);
 
-	max_slots = (LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom) -
+	max_slots = (DLM_MAX_APP_BUFSIZE - sizeof(struct dlm_rcom) -
 		     sizeof(struct rcom_config)) / sizeof(struct rcom_slot);
 
 	if (num > max_slots) {
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 4e36e418b6bf..7d217234b697 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -865,7 +865,7 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
 	while (len >= sizeof(struct dlm_header)) {
 		hd = (struct dlm_header *)ptr;
 
-		/* no message should be more than DEFAULT_BUFFER_SIZE or
+		/* no message should be more than DLM_MAX_SOCKET_BUFSIZE or
 		 * less than dlm_header size.
 		 *
 		 * Some messages does not have a 8 byte length boundary yet
@@ -877,7 +877,7 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
 		 * the next major version bump.
 		 */
 		msglen = le16_to_cpu(hd->h_length);
-		if (msglen > DEFAULT_BUFFER_SIZE ||
+		if (msglen > DLM_MAX_SOCKET_BUFSIZE ||
 		    msglen < sizeof(struct dlm_header)) {
 			log_print("received invalid length header: %u from node %d, will abort message parsing",
 				  msglen, nodeid);
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index a7727b9e5e83..5651933f54a4 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -202,7 +202,7 @@ retry:
 	set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags);
 
 	allow_sync_reply(ls, &rc->rc_id);
-	memset(ls->ls_recover_buf, 0, DEFAULT_BUFFER_SIZE);
+	memset(ls->ls_recover_buf, 0, DLM_MAX_SOCKET_BUFSIZE);
 
 	send_rcom_stateless(ls, msg, rc);
 
@@ -325,7 +325,7 @@ retry:
 	memcpy(rc->rc_buf, last_name, last_len);
 
 	allow_sync_reply(ls, &rc->rc_id);
-	memset(ls->ls_recover_buf, 0, DEFAULT_BUFFER_SIZE);
+	memset(ls->ls_recover_buf, 0, DLM_MAX_SOCKET_BUFSIZE);
 
 	send_rcom_stateless(ls, msg, rc);
 
@@ -345,7 +345,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
 	nodeid = rc_in->rc_header.h_nodeid;
 	inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
-	outlen = LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom);
+	outlen = DLM_MAX_APP_BUFSIZE - sizeof(struct dlm_rcom);
 
 	error = create_rcom_stateless(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen,
 				      &rc, &msg);

From 011ab4dffe965c16c2ba27c0b97d42d41a97b4da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Thu, 27 May 2021 22:28:15 +0200
Subject: [PATCH 0840/3292] dt-bindings: net: brcm,iproc-mdio: convert to the
 json-schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This helps validating DTS files.

Introduced changes:
1. Swapped #address-cells and #size-cells values
2. Renamed node: s/enet-gphy/ethernet-phy@/

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/brcm,iproc-mdio.txt          | 23 -----------
 .../bindings/net/brcm,iproc-mdio.yaml         | 38 +++++++++++++++++++
 2 files changed, 38 insertions(+), 23 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt
 create mode 100644 Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml

diff --git a/Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt b/Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt
deleted file mode 100644
index 8ba9ed11d716..000000000000
--- a/Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-* Broadcom iProc MDIO bus controller
-
-Required properties:
-- compatible: should be "brcm,iproc-mdio"
-- reg: address and length of the register set for the MDIO interface
-- #size-cells: must be 1
-- #address-cells: must be 0
-
-Child nodes of this MDIO bus controller node are standard Ethernet PHY device
-nodes as described in Documentation/devicetree/bindings/net/phy.txt
-
-Example:
-
-mdio@18002000 {
-	compatible = "brcm,iproc-mdio";
-	reg = <0x18002000 0x8>;
-	#size-cells = <1>;
-	#address-cells = <0>;
-
-	enet-gphy@0 {
-		reg = <0>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml
new file mode 100644
index 000000000000..3031395f7e6e
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/brcm,iproc-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom iProc MDIO bus controller
+
+maintainers:
+  - Rafał Miłecki <rafal@milecki.pl>
+
+allOf:
+  - $ref: mdio.yaml#
+
+properties:
+  compatible:
+    const: brcm,iproc-mdio
+
+  reg:
+    maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+  - reg
+
+examples:
+  - |
+    mdio@18002000 {
+        compatible = "brcm,iproc-mdio";
+        reg = <0x18002000 0x8>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethernet-phy@0 {
+            reg = <0>;
+        };
+    };

From bf6b260b8a9654db99761cde74c6b16356b9b441 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 1 Jun 2021 23:40:16 +0200
Subject: [PATCH 0841/3292] batman-adv: Drop implicit creation of batadv
 net_devices

The sysfs code in batman-adv was could create a new batadv interfaces on
demand when a string (interface name) was written to the
batman-adv/mesh_iface file. But the code no longer exists in the current
batman-adv codebase. The helper code to implement this behavior must be
considered as unused and can be dropped.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/hard-interface.c | 12 ++----------
 net/batman-adv/soft-interface.c | 34 +--------------------------------
 net/batman-adv/soft-interface.h |  2 --
 3 files changed, 3 insertions(+), 45 deletions(-)

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index b99f64f483fc..a638f35598f0 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -725,17 +725,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	kref_get(&hard_iface->refcount);
 
 	soft_iface = dev_get_by_name(net, iface_name);
-
 	if (!soft_iface) {
-		soft_iface = batadv_softif_create(net, iface_name);
-
-		if (!soft_iface) {
-			ret = -ENOMEM;
-			goto err;
-		}
-
-		/* dev_get_by_name() increases the reference counter for us */
-		dev_hold(soft_iface);
+		ret = -EINVAL;
+		goto err;
 	}
 
 	if (!batadv_softif_is_valid(soft_iface)) {
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index a21884c0d47f..0c5b34251a6d 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -26,7 +26,6 @@
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/percpu.h>
-#include <linux/printk.h>
 #include <linux/random.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -37,6 +36,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
@@ -1086,38 +1086,6 @@ static int batadv_softif_newlink(struct net *src_net, struct net_device *dev,
 	return register_netdevice(dev);
 }
 
-/**
- * batadv_softif_create() - Create and register soft interface
- * @net: the applicable net namespace
- * @name: name of the new soft interface
- *
- * Return: newly allocated soft_interface, NULL on errors
- */
-struct net_device *batadv_softif_create(struct net *net, const char *name)
-{
-	struct net_device *soft_iface;
-	int ret;
-
-	soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
-				  NET_NAME_UNKNOWN, batadv_softif_init_early);
-	if (!soft_iface)
-		return NULL;
-
-	dev_net_set(soft_iface, net);
-
-	soft_iface->rtnl_link_ops = &batadv_link_ops;
-
-	ret = register_netdevice(soft_iface);
-	if (ret < 0) {
-		pr_err("Unable to register the batman interface '%s': %i\n",
-		       name, ret);
-		free_netdev(soft_iface);
-		return NULL;
-	}
-
-	return soft_iface;
-}
-
 /**
  * batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via
  *  netlink
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 38b0ad182584..67a2ddd6832f 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -12,14 +12,12 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
-#include <net/net_namespace.h>
 #include <net/rtnetlink.h>
 
 int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
 void batadv_interface_rx(struct net_device *soft_iface,
 			 struct sk_buff *skb, int hdr_size,
 			 struct batadv_orig_node *orig_node);
-struct net_device *batadv_softif_create(struct net *net, const char *name);
 bool batadv_softif_is_valid(const struct net_device *net_dev);
 extern struct rtnl_link_ops batadv_link_ops;
 int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);

From fa205602d46e0b66c0c90672bce8b36e5de449df Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 1 Jun 2021 23:50:35 +0200
Subject: [PATCH 0842/3292] batman-adv: Avoid name based attaching of hard
 interfaces

The sysfs code for the batman-adv/mesh_iface file was receiving a string of
the batadv interface. This interface name was then provided to the code
which shared sysfs+rtnetlink code for attaching an hard-interface to an
batadv interface. The rtnetlink code was also using the (extracted)
interface name from the ndo_add_slave callback to increase the shared code
- even when it would have been more efficient to use the provided
net_device object directly instead of searching it again (based on its
name) in batadv_hardif_enable_interface.

But this indirect handling is no longer necessary because the sysfs code
was dropped. There is now only a single code path which is using
batadv_hardif_enable_interface.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/hard-interface.c | 14 ++++----------
 net/batman-adv/hard-interface.h |  3 +--
 net/batman-adv/soft-interface.c |  3 +--
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index a638f35598f0..81d201cc343d 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -705,16 +705,15 @@ static int batadv_master_del_slave(struct batadv_hard_iface *slave,
 /**
  * batadv_hardif_enable_interface() - Enslave hard interface to soft interface
  * @hard_iface: hard interface to add to soft interface
- * @net: the applicable net namespace
- * @iface_name: name of the soft interface
+ * @soft_iface: netdev struct of the mesh interface
  *
  * Return: 0 on success or negative error number in case of failure
  */
 int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
-				   struct net *net, const char *iface_name)
+				   struct net_device *soft_iface)
 {
 	struct batadv_priv *bat_priv;
-	struct net_device *soft_iface, *master;
+	struct net_device *master;
 	__be16 ethertype = htons(ETH_P_BATMAN);
 	int max_header_len = batadv_max_header_len();
 	int ret;
@@ -724,11 +723,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 
 	kref_get(&hard_iface->refcount);
 
-	soft_iface = dev_get_by_name(net, iface_name);
-	if (!soft_iface) {
-		ret = -EINVAL;
-		goto err;
-	}
+	dev_hold(soft_iface);
 
 	if (!batadv_softif_is_valid(soft_iface)) {
 		pr_err("Can't create batman mesh interface %s: already exists as regular interface\n",
@@ -802,7 +797,6 @@ err_upper:
 err_dev:
 	hard_iface->soft_iface = NULL;
 	dev_put(soft_iface);
-err:
 	batadv_hardif_put(hard_iface);
 	return ret;
 }
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 83d11b46a9d8..8cb2a1f10080 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -16,7 +16,6 @@
 #include <linux/rcupdate.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
-#include <net/net_namespace.h>
 
 /**
  * enum batadv_hard_if_state - State of a hard interface
@@ -75,7 +74,7 @@ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
 struct batadv_hard_iface*
 batadv_hardif_get_by_netdev(const struct net_device *net_dev);
 int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
-				   struct net *net, const char *iface_name);
+				   struct net_device *soft_iface);
 void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface);
 int batadv_hardif_min_mtu(struct net_device *soft_iface);
 void batadv_update_min_mtu(struct net_device *soft_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0c5b34251a6d..ae368a42a4ad 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -842,14 +842,13 @@ static int batadv_softif_slave_add(struct net_device *dev,
 				   struct netlink_ext_ack *extack)
 {
 	struct batadv_hard_iface *hard_iface;
-	struct net *net = dev_net(dev);
 	int ret = -EINVAL;
 
 	hard_iface = batadv_hardif_get_by_netdev(slave_dev);
 	if (!hard_iface || hard_iface->soft_iface)
 		goto out;
 
-	ret = batadv_hardif_enable_interface(hard_iface, net, dev->name);
+	ret = batadv_hardif_enable_interface(hard_iface, dev);
 
 out:
 	if (hard_iface)

From 170258ce1c71dc4e03d5cb92b5f03cfb01941514 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 1 Jun 2021 23:52:48 +0200
Subject: [PATCH 0843/3292] batman-adv: Don't manually reattach hard-interface

The batadv_hardif_enable_interface is now only called from the callback
ndo_add_slave. This callback is only used by do_set_master in the rtnetlink
code which only does two things:

1. remove the net_device from its old master
2. add the net_device to its new batadv master

The code to replicate the first step in batman-adv is therefore unused
since the sysfs code was dropped.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/hard-interface.c | 34 ---------------------------------
 1 file changed, 34 deletions(-)

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 81d201cc343d..44b0aa30c30a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -677,31 +677,6 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
 	batadv_update_min_mtu(hard_iface->soft_iface);
 }
 
-/**
- * batadv_master_del_slave() - remove hard_iface from the current master iface
- * @slave: the interface enslaved in another master
- * @master: the master from which slave has to be removed
- *
- * Invoke ndo_del_slave on master passing slave as argument. In this way the
- * slave is free'd and the master can correctly change its internal state.
- *
- * Return: 0 on success, a negative value representing the error otherwise
- */
-static int batadv_master_del_slave(struct batadv_hard_iface *slave,
-				   struct net_device *master)
-{
-	int ret;
-
-	if (!master)
-		return 0;
-
-	ret = -EBUSY;
-	if (master->netdev_ops->ndo_del_slave)
-		ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev);
-
-	return ret;
-}
-
 /**
  * batadv_hardif_enable_interface() - Enslave hard interface to soft interface
  * @hard_iface: hard interface to add to soft interface
@@ -713,7 +688,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 				   struct net_device *soft_iface)
 {
 	struct batadv_priv *bat_priv;
-	struct net_device *master;
 	__be16 ethertype = htons(ETH_P_BATMAN);
 	int max_header_len = batadv_max_header_len();
 	int ret;
@@ -732,14 +706,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 		goto err_dev;
 	}
 
-	/* check if the interface is enslaved in another virtual one and
-	 * in that case unlink it first
-	 */
-	master = netdev_master_upper_dev_get(hard_iface->net_dev);
-	ret = batadv_master_del_slave(hard_iface, master);
-	if (ret)
-		goto err_dev;
-
 	hard_iface->soft_iface = soft_iface;
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 

From 020577f879be736bc87e1f48dfad7220923401c0 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 2 Jun 2021 21:54:30 +0200
Subject: [PATCH 0844/3292] batman-adv: Drop reduntant batadv interface check

If batadv_hardif_enable_interface is called then its called from its
callback ndo_add_slave. It is therefore not necessary to check if it is a
batadv interface.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/hard-interface.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 44b0aa30c30a..55d97e18aa4a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,7 +9,6 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
-#include <linux/errno.h>
 #include <linux/gfp.h>
 #include <linux/if.h>
 #include <linux/if_arp.h>
@@ -698,14 +697,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	kref_get(&hard_iface->refcount);
 
 	dev_hold(soft_iface);
-
-	if (!batadv_softif_is_valid(soft_iface)) {
-		pr_err("Can't create batman mesh interface %s: already exists as regular interface\n",
-		       soft_iface->name);
-		ret = -EINVAL;
-		goto err_dev;
-	}
-
 	hard_iface->soft_iface = soft_iface;
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 

From 7f0e869c4e3902abc44ae6a9c9fc4fba6867408e Mon Sep 17 00:00:00 2001
From: zhang kai <zhangkaiheb@126.com>
Date: Wed, 2 Jun 2021 09:50:39 +0800
Subject: [PATCH 0845/3292] sit: replace 68 with micro IPV4_MIN_MTU

Use meaningfull micro IPV4_MIN_MTU

Signed-off-by: zhang kai <zhangkaiheb@126.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index aa98294a3ad3..71b57bdb1519 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -970,7 +970,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	if (df) {
 		mtu = dst_mtu(&rt->dst) - t_hlen;
 
-		if (mtu < 68) {
+		if (mtu < IPV4_MIN_MTU) {
 			dev->stats.collisions++;
 			ip_rt_put(rt);
 			goto tx_error;

From b676c7f1c383390bfd940582e493b374541f2dc2 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:54:28 +0800
Subject: [PATCH 0846/3292] ethtool: Fix a typo

atribute  ==> attribute

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 8abcbc10796c..90b10966b16b 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -138,7 +138,7 @@ static inline void ethnl_update_bool32(u32 *dst, const struct nlattr *attr,
 }
 
 /**
- * ethnl_update_binary() - update binary data from NLA_BINARY atribute
+ * ethnl_update_binary() - update binary data from NLA_BINARY attribute
  * @dst:  value to update
  * @len:  destination buffer length
  * @attr: netlink attribute with new value or null

From 8ab1784df65176ad24b6d1e1376b8dd53ce2b695 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:54:42 +0800
Subject: [PATCH 0847/3292] 9p/trans_virtio: Fix spelling mistakes

reseting  ==> resetting
alloced  ==> allocated
accomodate  ==> accommodate

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_virtio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 93f2f8654882..2bbd7dce0f1d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -99,7 +99,7 @@ static unsigned int rest_of_page(void *data)
  * @client: client instance
  *
  * This reclaims a channel by freeing its resources and
- * reseting its inuse flag.
+ * resetting its inuse flag.
  *
  */
 
@@ -463,7 +463,7 @@ req_retry_pinned:
 	 * For example TREAD have 11.
 	 * 11 is the read/write header = PDU Header(7) + IO Size (4).
 	 * Arrange in such a way that server places header in the
-	 * alloced memory and payload onto the user buffer.
+	 * allocated memory and payload onto the user buffer.
 	 */
 	in = pack_sg_list(chan->sg, out,
 			  VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
@@ -760,7 +760,7 @@ static struct p9_trans_module p9_virtio_trans = {
 	.cancelled = p9_virtio_cancelled,
 	/*
 	 * We leave one entry for input and one entry for response
-	 * headers. We also skip one more entry to accomodate, address
+	 * headers. We also skip one more entry to accommodate, address
 	 * that are not at page boundary, that can result in an extra
 	 * page in zero copy.
 	 */

From 91641b79e1e1538eed5dac1a00770c2b94e07a33 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:54:58 +0800
Subject: [PATCH 0848/3292] Bluetooth: Fix spelling mistakes

Fix some spelling mistakes in comments:
udpate  ==> update
retreive  ==> retrieve
accidentially  ==> accidentally
correspondig  ==> corresponding
adddress  ==> address
estabilish  ==> establish
commplete  ==> complete
Unkown  ==> Unknown
triggerd  ==> triggered
transtion  ==> transition

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_conn.c  | 2 +-
 net/bluetooth/hci_core.c  | 8 ++++----
 net/bluetooth/hci_event.c | 2 +-
 net/bluetooth/hci_sock.c  | 6 +++---
 net/bluetooth/mgmt.c      | 2 +-
 net/bluetooth/smp.c       | 6 +++---
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 88ec08978ff4..0ceb72d32208 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -758,7 +758,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 	conn->state = BT_CLOSED;
 
 	/* If the status indicates successful cancellation of
-	 * the attempt (i.e. Unkown Connection Id) there's no point of
+	 * the attempt (i.e. Unknown Connection Id) there's no point of
 	 * notifying failure since we'll go back to keep trying to
 	 * connect. The only exception is explicit connect requests
 	 * where a timeout + cancel does indicate an actual failure.
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index fd12f1652bdf..aa214834c374 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -648,7 +648,7 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 						 */
 
 		/* If the controller supports Extended Scanner Filter
-		 * Policies, enable the correspondig event.
+		 * Policies, enable the corresponding event.
 		 */
 		if (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)
 			events[1] |= 0x04;	/* LE Direct Advertising
@@ -1454,7 +1454,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		}
 
 		/* Check for valid public address or a configured static
-		 * random adddress, but let the HCI setup proceed to
+		 * random address, but let the HCI setup proceed to
 		 * be able to determine if there is a public address
 		 * or not.
 		 *
@@ -3544,7 +3544,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
 		if (params->auto_connect != HCI_AUTO_CONN_DISABLED)
 			continue;
 
-		/* If trying to estabilish one time connection to disabled
+		/* If trying to establish one time connection to disabled
 		 * device, leave the params, but mark them as just once.
 		 */
 		if (params->explicit_connect) {
@@ -4279,7 +4279,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
 	return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
 }
 
-/* Send HCI command and wait for command commplete event */
+/* Send HCI command and wait for command complete event */
 struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
 			     const void *param, u32 timeout)
 {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 016b2999f219..ea06b010ccad 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -6032,7 +6032,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
 		return true;
 	}
 
-	/* Check if request ended in Command Status - no way to retreive
+	/* Check if request ended in Command Status - no way to retrieve
 	 * any extra parameters in this case.
 	 */
 	if (hdr->evt == HCI_EV_CMD_STATUS)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 251b9128f530..6ef98a887571 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1130,7 +1130,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 		if (!hci_sock_gen_cookie(sk)) {
 			/* In the case when a cookie has already been assigned,
 			 * then there has been already an ioctl issued against
-			 * an unbound socket and with that triggerd an open
+			 * an unbound socket and with that triggered an open
 			 * notification. Send a close notification first to
 			 * allow the state transition to bounded.
 			 */
@@ -1326,9 +1326,9 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 		if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) {
 			if (!hci_sock_gen_cookie(sk)) {
 				/* In the case when a cookie has already been
-				 * assigned, this socket will transtion from
+				 * assigned, this socket will transition from
 				 * a raw socket into a control socket. To
-				 * allow for a clean transtion, send the
+				 * allow for a clean transition, send the
 				 * close notification first.
 				 */
 				skb = create_monitor_ctrl_close(sk);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f9be7f9084d6..f290d0c54d32 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3341,7 +3341,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
 	}
 
 	/* The name is stored in the scan response data and so
-	 * no need to udpate the advertising data here.
+	 * no need to update the advertising data here.
 	 */
 	if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING))
 		__hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 372e3b25aaa4..93144e0c7efa 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -40,7 +40,7 @@
 	((struct smp_dev *)((struct l2cap_chan *)((hdev)->smp_data))->data)
 
 /* Low-level debug macros to be used for stuff that we don't want
- * accidentially in dmesg, i.e. the values of the various crypto keys
+ * accidentally in dmesg, i.e. the values of the various crypto keys
  * and the inputs & outputs of crypto functions.
  */
 #ifdef DEBUG
@@ -560,7 +560,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
 				return err;
 
 			/* This is unlikely, but we need to check that
-			 * we didn't accidentially generate a debug key.
+			 * we didn't accidentally generate a debug key.
 			 */
 			if (crypto_memneq(smp->local_pk, debug_pk, 64))
 				break;
@@ -1902,7 +1902,7 @@ static u8 sc_send_public_key(struct smp_chan *smp)
 				return SMP_UNSPECIFIED;
 
 			/* This is unlikely, but we need to check that
-			 * we didn't accidentially generate a debug key.
+			 * we didn't accidentally generate a debug key.
 			 */
 			if (crypto_memneq(smp->local_pk, debug_pk, 64))
 				break;

From fe6c0262bdf96623e4b088d4bd32a9454a37eb3a Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:55:08 +0800
Subject: [PATCH 0849/3292] rxrpc: Fix a typo

targetted  ==> targeted

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/local_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 3ce6d628cd75..19e929c7c38b 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -77,7 +77,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
 }
 
 /*
- * Process event packets targetted at a local endpoint.
+ * Process event packets targeted at a local endpoint.
  */
 void rxrpc_process_local_events(struct rxrpc_local *local)
 {

From 5debe0b30bac2b921722d7419f02acee6f02fa71 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:55:44 +0800
Subject: [PATCH 0850/3292] decnet: Fix spelling mistakes

Fix some spelling mistakes in comments:
thats  ==> that's
serivce  ==> service
varience  ==> variance

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_nsp_in.c  | 2 +-
 net/decnet/dn_nsp_out.c | 2 +-
 net/decnet/dn_route.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 1a12912b88d6..7ab788f41a3f 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -870,7 +870,7 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 
 		/*
 		 * Read out ack data here, this applies equally
-		 * to data, other data, link serivce and both
+		 * to data, other data, link service and both
 		 * ack data and ack otherdata.
 		 */
 		dn_process_ack(sk, skb, other);
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 00f2ed721ec1..eadc89583168 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -179,7 +179,7 @@ static void dn_nsp_rtt(struct sock *sk, long rtt)
 		scp->nsp_srtt = 1;
 
 	/*
-	 * Add new rtt varience to smoothed varience
+	 * Add new rtt variance to smoothed varience
 	 */
 	delta >>= 1;
 	rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 32b1bed8ae51..729d3de6020d 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -604,7 +604,7 @@ drop_it:
 static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	/*
-	 * I know we drop the packet here, but thats considered success in
+	 * I know we drop the packet here, but that's considered success in
 	 * this case
 	 */
 	kfree_skb(skb);

From 2bda0a5e3bf8924cc6dc5955df610defa10d70e7 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:45 +0200
Subject: [PATCH 0851/3292] nfp: flower: move non-zero chain check

This is in preparation for conntrack offload support which makes
used of different chains. Add explicit checks for conntrack and
non-zero chains in the add_offload path.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/offload.c   | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index e95969c462e4..16ef960a150d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1276,6 +1276,20 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 	return 0;
 }
 
+static bool offload_pre_check(struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+	struct flow_dissector *dissector = rule->match.dissector;
+
+	if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT))
+		return false;
+
+	if (flow->common.chain_index)
+		return false;
+
+	return true;
+}
+
 /**
  * nfp_flower_add_offload() - Adds a new flow to hardware.
  * @app:	Pointer to the APP handle
@@ -1302,6 +1316,9 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	if (nfp_netdev_is_nfp_repr(netdev))
 		port = nfp_port_from_netdev(netdev);
 
+	if (!offload_pre_check(flow))
+		return -EOPNOTSUPP;
+
 	key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
 	if (!key_layer)
 		return -ENOMEM;
@@ -1646,9 +1663,10 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
 static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type,
 					void *type_data, void *cb_priv)
 {
+	struct flow_cls_common_offload *common = type_data;
 	struct nfp_repr *repr = cb_priv;
 
-	if (!tc_cls_can_offload_and_chain0(repr->netdev, type_data))
+	if (!tc_can_offload_extack(repr->netdev, common->extack))
 		return -EOPNOTSUPP;
 
 	switch (type) {
@@ -1746,10 +1764,6 @@ static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type,
 					  void *type_data, void *cb_priv)
 {
 	struct nfp_flower_indr_block_cb_priv *priv = cb_priv;
-	struct flow_cls_offload *flower = type_data;
-
-	if (flower->common.chain_index)
-		return -EOPNOTSUPP;
 
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:

From c8b034fbeba55847be999595d63ad7d04f06c141 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:46 +0200
Subject: [PATCH 0852/3292] nfp: flower-ct: add pre and post ct checks

Add checks to see if a flow is a conntrack flow we can potentially
handle. Just stub out the handling the different conntrack flows.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/Makefile   |  3 +-
 .../ethernet/netronome/nfp/flower/conntrack.c | 48 +++++++++++++++++++
 .../ethernet/netronome/nfp/flower/conntrack.h | 45 +++++++++++++++++
 .../ethernet/netronome/nfp/flower/offload.c   |  7 +++
 4 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/flower/conntrack.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/flower/conntrack.h

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index d31772ae511d..9cff3d48acbc 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -51,7 +51,8 @@ nfp-objs += \
 	    flower/metadata.o \
 	    flower/offload.o \
 	    flower/tunnel_conf.o \
-	    flower/qos_conf.o
+	    flower/qos_conf.o \
+	    flower/conntrack.o
 endif
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
new file mode 100644
index 000000000000..aeea37a0135e
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2021 Corigine, Inc. */
+
+#include "conntrack.h"
+
+bool is_pre_ct_flow(struct flow_cls_offload *flow)
+{
+	struct flow_action_entry *act;
+	int i;
+
+	flow_action_for_each(i, act, &flow->rule->action) {
+		if (act->id == FLOW_ACTION_CT && !act->ct.action)
+			return true;
+	}
+	return false;
+}
+
+bool is_post_ct_flow(struct flow_cls_offload *flow)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct flow_match_ct ct;
+
+	if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) {
+		flow_rule_match_ct(rule, &ct);
+		if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)
+			return true;
+	}
+	return false;
+}
+
+int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
+			    struct net_device *netdev,
+			    struct flow_cls_offload *flow,
+			    struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack action not supported");
+	return -EOPNOTSUPP;
+}
+
+int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+			     struct net_device *netdev,
+			     struct flow_cls_offload *flow,
+			     struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack match not supported");
+	return -EOPNOTSUPP;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
new file mode 100644
index 000000000000..e8d034bb9807
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2021 Corigine, Inc. */
+
+#ifndef __NFP_FLOWER_CONNTRACK_H__
+#define __NFP_FLOWER_CONNTRACK_H__ 1
+
+#include "main.h"
+
+bool is_pre_ct_flow(struct flow_cls_offload *flow);
+bool is_post_ct_flow(struct flow_cls_offload *flow);
+
+/**
+ * nfp_fl_ct_handle_pre_ct() - Handles -trk conntrack rules
+ * @priv:	Pointer to app priv
+ * @netdev:	netdev structure.
+ * @flow:	TC flower classifier offload structure.
+ * @extack:	Extack pointer for errors
+ *
+ * Adds a new entry to the relevant zone table and tries to
+ * merge with other +trk+est entries and offload if possible.
+ *
+ * Return: negative value on error, 0 if configured successfully.
+ */
+int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
+			    struct net_device *netdev,
+			    struct flow_cls_offload *flow,
+			    struct netlink_ext_ack *extack);
+/**
+ * nfp_fl_ct_handle_post_ct() - Handles +trk+est conntrack rules
+ * @priv:	Pointer to app priv
+ * @netdev:	netdev structure.
+ * @flow:	TC flower classifier offload structure.
+ * @extack:	Extack pointer for errors
+ *
+ * Adds a new entry to the relevant zone table and tries to
+ * merge with other -trk entries and offload if possible.
+ *
+ * Return: negative value on error, 0 if configured successfully.
+ */
+int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
+			     struct net_device *netdev,
+			     struct flow_cls_offload *flow,
+			     struct netlink_ext_ack *extack);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 16ef960a150d..7e4ad5d58859 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -7,6 +7,7 @@
 
 #include "cmsg.h"
 #include "main.h"
+#include "conntrack.h"
 #include "../nfpcore/nfp_cpp.h"
 #include "../nfpcore/nfp_nsp.h"
 #include "../nfp_app.h"
@@ -1316,6 +1317,12 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	if (nfp_netdev_is_nfp_repr(netdev))
 		port = nfp_port_from_netdev(netdev);
 
+	if (is_pre_ct_flow(flow))
+		return nfp_fl_ct_handle_pre_ct(priv, netdev, flow, extack);
+
+	if (is_post_ct_flow(flow))
+		return nfp_fl_ct_handle_post_ct(priv, netdev, flow, extack);
+
 	if (!offload_pre_check(flow))
 		return -EOPNOTSUPP;
 

From e236e4849b583d0e77b856417ce2f03b1d6b31db Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:47 +0200
Subject: [PATCH 0853/3292] nfp: flower-ct: add ct zone table

Add initial zone table to nfp_flower_priv. This table will be used
to store all the information required to offload conntrack.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.h | 17 ++++++++++++++
 .../net/ethernet/netronome/nfp/flower/main.h  |  2 ++
 .../ethernet/netronome/nfp/flower/metadata.c  | 22 ++++++++++++++++++-
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index e8d034bb9807..5f1f54ccc5a1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -6,6 +6,23 @@
 
 #include "main.h"
 
+extern const struct rhashtable_params nfp_zone_table_params;
+
+/**
+ * struct nfp_fl_ct_zone_entry - Zone entry containing conntrack flow information
+ * @zone:	The zone number, used as lookup key in hashtable
+ * @hash_node:	Used by the hashtable
+ * @priv:	Pointer to nfp_flower_priv data
+ * @nft:	Pointer to nf_flowtable for this zone
+ */
+struct nfp_fl_ct_zone_entry {
+	u16 zone;
+	struct rhash_head hash_node;
+
+	struct nfp_flower_priv *priv;
+	struct nf_flowtable *nft;
+};
+
 bool is_pre_ct_flow(struct flow_cls_offload *flow);
 bool is_post_ct_flow(struct flow_cls_offload *flow);
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 31377923ea3d..0073851f31d7 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -193,6 +193,7 @@ struct nfp_fl_internal_ports {
  * @qos_stats_lock:	Lock on qos stats updates
  * @pre_tun_rule_cnt:	Number of pre-tunnel rules offloaded
  * @merge_table:	Hash table to store merged flows
+ * @ct_zone_table:	Hash table used to store the different zones
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -227,6 +228,7 @@ struct nfp_flower_priv {
 	spinlock_t qos_stats_lock; /* Protect the qos stats */
 	int pre_tun_rule_cnt;
 	struct rhashtable merge_table;
+	struct rhashtable ct_zone_table;
 };
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 327bb56b3ef5..4a00ce803df1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -9,6 +9,7 @@
 #include <net/pkt_cls.h>
 
 #include "cmsg.h"
+#include "conntrack.h"
 #include "main.h"
 #include "../nfp_app.h"
 
@@ -496,6 +497,13 @@ const struct rhashtable_params merge_table_params = {
 	.key_len	= sizeof(u64),
 };
 
+const struct rhashtable_params nfp_zone_table_params = {
+	.head_offset		= offsetof(struct nfp_fl_ct_zone_entry, hash_node),
+	.key_len		= sizeof(u16),
+	.key_offset		= offsetof(struct nfp_fl_ct_zone_entry, zone),
+	.automatic_shrinking	= false,
+};
+
 int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
 			     unsigned int host_num_mems)
 {
@@ -516,6 +524,10 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
 	if (err)
 		goto err_free_stats_ctx_table;
 
+	err = rhashtable_init(&priv->ct_zone_table, &nfp_zone_table_params);
+	if (err)
+		goto err_free_merge_table;
+
 	get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
 
 	/* Init ring buffer and unallocated mask_ids. */
@@ -523,7 +535,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
 		kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
 			      NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
 	if (!priv->mask_ids.mask_id_free_list.buf)
-		goto err_free_merge_table;
+		goto err_free_ct_zone_table;
 
 	priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
 
@@ -560,6 +572,8 @@ err_free_last_used:
 	kfree(priv->mask_ids.last_used);
 err_free_mask_id:
 	kfree(priv->mask_ids.mask_id_free_list.buf);
+err_free_ct_zone_table:
+	rhashtable_destroy(&priv->ct_zone_table);
 err_free_merge_table:
 	rhashtable_destroy(&priv->merge_table);
 err_free_stats_ctx_table:
@@ -569,6 +583,10 @@ err_free_flow_table:
 	return -ENOMEM;
 }
 
+static void nfp_free_zone_table_entry(void *ptr, void *arg)
+{
+}
+
 void nfp_flower_metadata_cleanup(struct nfp_app *app)
 {
 	struct nfp_flower_priv *priv = app->priv;
@@ -582,6 +600,8 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app)
 				    nfp_check_rhashtable_empty, NULL);
 	rhashtable_free_and_destroy(&priv->merge_table,
 				    nfp_check_rhashtable_empty, NULL);
+	rhashtable_free_and_destroy(&priv->ct_zone_table,
+				    nfp_free_zone_table_entry, NULL);
 	kvfree(priv->stats);
 	kfree(priv->mask_ids.mask_id_free_list.buf);
 	kfree(priv->mask_ids.last_used);

From bd0fe7f96a3c44c6ed0ab645abbad8b43a8cc5c1 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:48 +0200
Subject: [PATCH 0854/3292] nfp: flower-ct: add zone table entry when handling
 pre/post_ct flows

Start populating the pre/post_ct handler functions. Add a zone entry
to the zone table, based on the zone information from the flow. In
the case of a post_ct flow which has a wildcarded match on the zone
create a special entry.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 125 ++++++++++++++++++
 .../net/ethernet/netronome/nfp/flower/main.h  |   2 +
 .../ethernet/netronome/nfp/flower/metadata.c  |   4 +
 3 files changed, 131 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index aeea37a0135e..9d63a8f89397 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -3,6 +3,33 @@
 
 #include "conntrack.h"
 
+/**
+ * get_hashentry() - Wrapper around hashtable lookup.
+ * @ht:		hashtable where entry could be found
+ * @key:	key to lookup
+ * @params:	hashtable params
+ * @size:	size of entry to allocate if not in table
+ *
+ * Returns an entry from a hashtable. If entry does not exist
+ * yet allocate the memory for it and return the new entry.
+ */
+static void *get_hashentry(struct rhashtable *ht, void *key,
+			   const struct rhashtable_params params, size_t size)
+{
+	void *result;
+
+	result = rhashtable_lookup_fast(ht, key, params);
+
+	if (result)
+		return result;
+
+	result = kzalloc(size, GFP_KERNEL);
+	if (!result)
+		return ERR_PTR(-ENOMEM);
+
+	return result;
+}
+
 bool is_pre_ct_flow(struct flow_cls_offload *flow)
 {
 	struct flow_action_entry *act;
@@ -29,11 +56,88 @@ bool is_post_ct_flow(struct flow_cls_offload *flow)
 	return false;
 }
 
+static struct
+nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
+					 u16 zone, bool wildcarded)
+{
+	struct nfp_fl_ct_zone_entry *zt;
+	int err;
+
+	if (wildcarded && priv->ct_zone_wc)
+		return priv->ct_zone_wc;
+
+	if (!wildcarded) {
+		zt = get_hashentry(&priv->ct_zone_table, &zone,
+				   nfp_zone_table_params, sizeof(*zt));
+
+		/* If priv is set this is an existing entry, just return it */
+		if (IS_ERR(zt) || zt->priv)
+			return zt;
+	} else {
+		zt = kzalloc(sizeof(*zt), GFP_KERNEL);
+		if (!zt)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	zt->zone = zone;
+	zt->priv = priv;
+	zt->nft = NULL;
+
+	if (wildcarded) {
+		priv->ct_zone_wc = zt;
+	} else {
+		err = rhashtable_insert_fast(&priv->ct_zone_table,
+					     &zt->hash_node,
+					     nfp_zone_table_params);
+		if (err)
+			goto err_zone_insert;
+	}
+
+	return zt;
+
+err_zone_insert:
+	kfree(zt);
+	return ERR_PTR(err);
+}
+
+static struct flow_action_entry *get_flow_act(struct flow_cls_offload *flow,
+					      enum flow_action_id act_id)
+{
+	struct flow_action_entry *act = NULL;
+	int i;
+
+	flow_action_for_each(i, act, &flow->rule->action) {
+		if (act->id == act_id)
+			return act;
+	}
+	return NULL;
+}
+
 int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 			    struct net_device *netdev,
 			    struct flow_cls_offload *flow,
 			    struct netlink_ext_ack *extack)
 {
+	struct flow_action_entry *ct_act;
+	struct nfp_fl_ct_zone_entry *zt;
+
+	ct_act = get_flow_act(flow, FLOW_ACTION_CT);
+	if (!ct_act) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "unsupported offload: Conntrack action empty in conntrack offload");
+		return -EOPNOTSUPP;
+	}
+
+	zt = get_nfp_zone_entry(priv, ct_act->ct.zone, false);
+	if (IS_ERR(zt)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "offload error: Could not create zone table entry");
+		return PTR_ERR(zt);
+	}
+
+	if (!zt->nft)
+		zt->nft = ct_act->ct.flow_table;
+
 	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack action not supported");
 	return -EOPNOTSUPP;
 }
@@ -43,6 +147,27 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 			     struct flow_cls_offload *flow,
 			     struct netlink_ext_ack *extack)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+	struct nfp_fl_ct_zone_entry *zt;
+	bool wildcarded = false;
+	struct flow_match_ct ct;
+
+	flow_rule_match_ct(rule, &ct);
+	if (!ct.mask->ct_zone) {
+		wildcarded = true;
+	} else if (ct.mask->ct_zone != U16_MAX) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "unsupported offload: partially wildcarded ct_zone is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	zt = get_nfp_zone_entry(priv, ct.key->ct_zone, wildcarded);
+	if (IS_ERR(zt)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "offload error: Could not create zone table entry");
+		return PTR_ERR(zt);
+	}
+
 	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack match not supported");
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 0073851f31d7..060c6de36c02 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -194,6 +194,7 @@ struct nfp_fl_internal_ports {
  * @pre_tun_rule_cnt:	Number of pre-tunnel rules offloaded
  * @merge_table:	Hash table to store merged flows
  * @ct_zone_table:	Hash table used to store the different zones
+ * @ct_zone_wc:		Special zone entry for wildcarded zone matches
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -229,6 +230,7 @@ struct nfp_flower_priv {
 	int pre_tun_rule_cnt;
 	struct rhashtable merge_table;
 	struct rhashtable ct_zone_table;
+	struct nfp_fl_ct_zone_entry *ct_zone_wc;
 };
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 4a00ce803df1..10d84ebf77bf 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -585,6 +585,9 @@ err_free_flow_table:
 
 static void nfp_free_zone_table_entry(void *ptr, void *arg)
 {
+	struct nfp_fl_ct_zone_entry *zt = ptr;
+
+	kfree(zt);
 }
 
 void nfp_flower_metadata_cleanup(struct nfp_app *app)
@@ -602,6 +605,7 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app)
 				    nfp_check_rhashtable_empty, NULL);
 	rhashtable_free_and_destroy(&priv->ct_zone_table,
 				    nfp_free_zone_table_entry, NULL);
+	kfree(priv->ct_zone_wc);
 	kvfree(priv->stats);
 	kfree(priv->mask_ids.mask_id_free_list.buf);
 	kfree(priv->mask_ids.last_used);

From 072c089ca536a87591a21741ffa972ae7626c96a Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:49 +0200
Subject: [PATCH 0855/3292] nfp: flower-ct: add nfp_fl_ct_flow_entries

This commit starts adding the structures and lists that will
be used in follow up commits to enable offloading of conntrack.
Some stub functions are also introduced as placeholders by
this commit.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 130 +++++++++++++++++-
 .../ethernet/netronome/nfp/flower/conntrack.h |  51 +++++++
 .../ethernet/netronome/nfp/flower/metadata.c  |  31 ++++-
 3 files changed, 209 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 9d63a8f89397..57a5ba5f2761 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -83,6 +83,10 @@ nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
 	zt->priv = priv;
 	zt->nft = NULL;
 
+	/* init the various hash tables and lists*/
+	INIT_LIST_HEAD(&zt->pre_ct_list);
+	INIT_LIST_HEAD(&zt->post_ct_list);
+
 	if (wildcarded) {
 		priv->ct_zone_wc = zt;
 	} else {
@@ -100,6 +104,100 @@ err_zone_insert:
 	return ERR_PTR(err);
 }
 
+static struct
+nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
+					 struct net_device *netdev,
+					 struct flow_cls_offload *flow)
+{
+	struct nfp_fl_ct_flow_entry *entry;
+	struct flow_action_entry *act;
+	int err, i;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	entry->zt = zt;
+	entry->netdev = netdev;
+	entry->cookie = flow->cookie;
+	entry->rule = flow_rule_alloc(flow->rule->action.num_entries);
+	if (!entry->rule) {
+		err = -ENOMEM;
+		goto err_pre_ct_act;
+	}
+	entry->rule->match.dissector = flow->rule->match.dissector;
+	entry->rule->match.mask = flow->rule->match.mask;
+	entry->rule->match.key = flow->rule->match.key;
+	entry->chain_index = flow->common.chain_index;
+	entry->tun_offset = NFP_FL_CT_NO_TUN;
+
+	/* Copy over action data. Unfortunately we do not get a handle to the
+	 * original tcf_action data, and the flow objects gets destroyed, so we
+	 * cannot just save a pointer to this either, so need to copy over the
+	 * data unfortunately.
+	 */
+	entry->rule->action.num_entries = flow->rule->action.num_entries;
+	flow_action_for_each(i, act, &flow->rule->action) {
+		struct flow_action_entry *new_act;
+
+		new_act = &entry->rule->action.entries[i];
+		memcpy(new_act, act, sizeof(struct flow_action_entry));
+		/* Entunnel is a special case, need to allocate and copy
+		 * tunnel info.
+		 */
+		if (act->id == FLOW_ACTION_TUNNEL_ENCAP) {
+			struct ip_tunnel_info *tun = act->tunnel;
+			size_t tun_size = sizeof(*tun) + tun->options_len;
+
+			new_act->tunnel = kmemdup(tun, tun_size, GFP_ATOMIC);
+			if (!new_act->tunnel) {
+				err = -ENOMEM;
+				goto err_pre_ct_tun_cp;
+			}
+			entry->tun_offset = i;
+		}
+	}
+
+	INIT_LIST_HEAD(&entry->children);
+
+	/* Creation of a ct_map_entry and adding it to a hashtable
+	 * will happen here in follow up patches.
+	 */
+
+	return entry;
+
+err_pre_ct_tun_cp:
+	kfree(entry->rule);
+err_pre_ct_act:
+	kfree(entry);
+	return ERR_PTR(err);
+}
+
+static void nfp_free_tc_merge_children(struct nfp_fl_ct_flow_entry *entry)
+{
+}
+
+static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow)
+{
+}
+
+void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry)
+{
+	list_del(&entry->list_node);
+
+	if (!list_empty(&entry->children)) {
+		if (entry->type == CT_TYPE_NFT)
+			nfp_free_nft_merge_children(entry, true);
+		else
+			nfp_free_tc_merge_children(entry);
+	}
+
+	if (entry->tun_offset != NFP_FL_CT_NO_TUN)
+		kfree(entry->rule->action.entries[entry->tun_offset].tunnel);
+	kfree(entry->rule);
+	kfree(entry);
+}
+
 static struct flow_action_entry *get_flow_act(struct flow_cls_offload *flow,
 					      enum flow_action_id act_id)
 {
@@ -118,7 +216,8 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 			    struct flow_cls_offload *flow,
 			    struct netlink_ext_ack *extack)
 {
-	struct flow_action_entry *ct_act;
+	struct flow_action_entry *ct_act, *ct_goto;
+	struct nfp_fl_ct_flow_entry *ct_entry;
 	struct nfp_fl_ct_zone_entry *zt;
 
 	ct_act = get_flow_act(flow, FLOW_ACTION_CT);
@@ -128,6 +227,13 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 		return -EOPNOTSUPP;
 	}
 
+	ct_goto = get_flow_act(flow, FLOW_ACTION_GOTO);
+	if (!ct_goto) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "unsupported offload: Conntrack requires ACTION_GOTO");
+		return -EOPNOTSUPP;
+	}
+
 	zt = get_nfp_zone_entry(priv, ct_act->ct.zone, false);
 	if (IS_ERR(zt)) {
 		NL_SET_ERR_MSG_MOD(extack,
@@ -138,7 +244,17 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 	if (!zt->nft)
 		zt->nft = ct_act->ct.flow_table;
 
+	/* Add entry to pre_ct_list */
+	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow);
+	if (IS_ERR(ct_entry))
+		return PTR_ERR(ct_entry);
+	ct_entry->type = CT_TYPE_PRE_CT;
+	ct_entry->chain_index = ct_goto->chain_index;
+	list_add(&ct_entry->list_node, &zt->pre_ct_list);
+	zt->pre_ct_count++;
+
 	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack action not supported");
+	nfp_fl_ct_clean_flow_entry(ct_entry);
 	return -EOPNOTSUPP;
 }
 
@@ -148,6 +264,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 			     struct netlink_ext_ack *extack)
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+	struct nfp_fl_ct_flow_entry *ct_entry;
 	struct nfp_fl_ct_zone_entry *zt;
 	bool wildcarded = false;
 	struct flow_match_ct ct;
@@ -168,6 +285,17 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 		return PTR_ERR(zt);
 	}
 
+	/* Add entry to post_ct_list */
+	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow);
+	if (IS_ERR(ct_entry))
+		return PTR_ERR(ct_entry);
+
+	ct_entry->type = CT_TYPE_POST_CT;
+	ct_entry->chain_index = flow->common.chain_index;
+	list_add(&ct_entry->list_node, &zt->post_ct_list);
+	zt->post_ct_count++;
+
 	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack match not supported");
+	nfp_fl_ct_clean_flow_entry(ct_entry);
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index 5f1f54ccc5a1..46437de4d75f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -6,6 +6,8 @@
 
 #include "main.h"
 
+#define NFP_FL_CT_NO_TUN	0xff
+
 extern const struct rhashtable_params nfp_zone_table_params;
 
 /**
@@ -14,6 +16,12 @@ extern const struct rhashtable_params nfp_zone_table_params;
  * @hash_node:	Used by the hashtable
  * @priv:	Pointer to nfp_flower_priv data
  * @nft:	Pointer to nf_flowtable for this zone
+ *
+ * @pre_ct_list:	The pre_ct_list of nfp_fl_ct_flow_entry entries
+ * @pre_ct_count:	Keep count of the number of pre_ct entries
+ *
+ * @post_ct_list:	The post_ct_list of nfp_fl_ct_flow_entry entries
+ * @post_ct_count:	Keep count of the number of post_ct entries
  */
 struct nfp_fl_ct_zone_entry {
 	u16 zone;
@@ -21,6 +29,44 @@ struct nfp_fl_ct_zone_entry {
 
 	struct nfp_flower_priv *priv;
 	struct nf_flowtable *nft;
+
+	struct list_head pre_ct_list;
+	unsigned int pre_ct_count;
+
+	struct list_head post_ct_list;
+	unsigned int post_ct_count;
+};
+
+enum ct_entry_type {
+	CT_TYPE_PRE_CT,
+	CT_TYPE_NFT,
+	CT_TYPE_POST_CT,
+};
+
+/**
+ * struct nfp_fl_ct_flow_entry - Flow entry containing conntrack flow information
+ * @cookie:	Flow cookie, same as original TC flow, used as key
+ * @list_node:	Used by the list
+ * @chain_index:	Chain index of the original flow
+ * @netdev:	netdev structure.
+ * @type:	Type of pre-entry from enum ct_entry_type
+ * @zt:		Reference to the zone table this belongs to
+ * @children:	List of tc_merge flows this flow forms part of
+ * @rule:	Reference to the original TC flow rule
+ * @stats:	Used to cache stats for updating
+ * @tun_offset: Used to indicate tunnel action offset in action list
+ */
+struct nfp_fl_ct_flow_entry {
+	unsigned long cookie;
+	struct list_head list_node;
+	u32 chain_index;
+	enum ct_entry_type type;
+	struct net_device *netdev;
+	struct nfp_fl_ct_zone_entry *zt;
+	struct list_head children;
+	struct flow_rule *rule;
+	struct flow_stats stats;
+	u8 tun_offset;		// Set to NFP_FL_CT_NO_TUN if no tun
 };
 
 bool is_pre_ct_flow(struct flow_cls_offload *flow);
@@ -59,4 +105,9 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 			     struct flow_cls_offload *flow,
 			     struct netlink_ext_ack *extack);
 
+/**
+ * nfp_fl_ct_clean_flow_entry() - Free a nfp_fl_ct_flow_entry
+ * @entry:	Flow entry to cleanup
+ */
+void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry);
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 10d84ebf77bf..062e963a8838 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -583,11 +583,38 @@ err_free_flow_table:
 	return -ENOMEM;
 }
 
+static void nfp_zone_table_entry_destroy(struct nfp_fl_ct_zone_entry *zt)
+{
+	if (!zt)
+		return;
+
+	if (!list_empty(&zt->pre_ct_list)) {
+		struct nfp_fl_ct_flow_entry *entry, *tmp;
+
+		WARN_ONCE(1, "pre_ct_list not empty as expected, cleaning up\n");
+		list_for_each_entry_safe(entry, tmp, &zt->pre_ct_list,
+					 list_node) {
+			nfp_fl_ct_clean_flow_entry(entry);
+		}
+	}
+
+	if (!list_empty(&zt->post_ct_list)) {
+		struct nfp_fl_ct_flow_entry *entry, *tmp;
+
+		WARN_ONCE(1, "post_ct_list not empty as expected, cleaning up\n");
+		list_for_each_entry_safe(entry, tmp, &zt->post_ct_list,
+					 list_node) {
+			nfp_fl_ct_clean_flow_entry(entry);
+		}
+	}
+	kfree(zt);
+}
+
 static void nfp_free_zone_table_entry(void *ptr, void *arg)
 {
 	struct nfp_fl_ct_zone_entry *zt = ptr;
 
-	kfree(zt);
+	nfp_zone_table_entry_destroy(zt);
 }
 
 void nfp_flower_metadata_cleanup(struct nfp_app *app)
@@ -605,7 +632,7 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app)
 				    nfp_check_rhashtable_empty, NULL);
 	rhashtable_free_and_destroy(&priv->ct_zone_table,
 				    nfp_free_zone_table_entry, NULL);
-	kfree(priv->ct_zone_wc);
+	nfp_zone_table_entry_destroy(priv->ct_zone_wc);
 	kvfree(priv->stats);
 	kfree(priv->mask_ids.mask_id_free_list.buf);
 	kfree(priv->mask_ids.last_used);

From fa81d6d214a4202b539a815d6c9d6b3c8e70ca95 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:50 +0200
Subject: [PATCH 0856/3292] nfp: flower-ct: add a table to map flow cookies to
 ct flows

Add a hashtable which contains entries to map flow cookies to ct
flow entries. Currently the entries are added and not used, but
follow-up patches will use this for stats updates and flow deletes.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 35 +++++++++++---
 .../ethernet/netronome/nfp/flower/conntrack.h | 13 ++++++
 .../net/ethernet/netronome/nfp/flower/main.h  |  2 +
 .../ethernet/netronome/nfp/flower/metadata.c  | 46 ++++++++++++++++++-
 4 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 57a5ba5f2761..f6f97224e773 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -107,9 +107,11 @@ err_zone_insert:
 static struct
 nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
 					 struct net_device *netdev,
-					 struct flow_cls_offload *flow)
+					 struct flow_cls_offload *flow,
+					 struct netlink_ext_ack *extack)
 {
 	struct nfp_fl_ct_flow_entry *entry;
+	struct nfp_fl_ct_map_entry *map;
 	struct flow_action_entry *act;
 	int err, i;
 
@@ -160,12 +162,33 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
 
 	INIT_LIST_HEAD(&entry->children);
 
-	/* Creation of a ct_map_entry and adding it to a hashtable
-	 * will happen here in follow up patches.
-	 */
+	/* Now add a ct map entry to flower-priv */
+	map = get_hashentry(&zt->priv->ct_map_table, &flow->cookie,
+			    nfp_ct_map_params, sizeof(*map));
+	if (IS_ERR(map)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "offload error: ct map entry creation failed");
+		err = -ENOMEM;
+		goto err_ct_flow_insert;
+	}
+	map->cookie = flow->cookie;
+	map->ct_entry = entry;
+	err = rhashtable_insert_fast(&zt->priv->ct_map_table,
+				     &map->hash_node,
+				     nfp_ct_map_params);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "offload error: ct map entry table add failed");
+		goto err_map_insert;
+	}
 
 	return entry;
 
+err_map_insert:
+	kfree(map);
+err_ct_flow_insert:
+	if (entry->tun_offset != NFP_FL_CT_NO_TUN)
+		kfree(entry->rule->action.entries[entry->tun_offset].tunnel);
 err_pre_ct_tun_cp:
 	kfree(entry->rule);
 err_pre_ct_act:
@@ -245,7 +268,7 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 		zt->nft = ct_act->ct.flow_table;
 
 	/* Add entry to pre_ct_list */
-	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow);
+	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, extack);
 	if (IS_ERR(ct_entry))
 		return PTR_ERR(ct_entry);
 	ct_entry->type = CT_TYPE_PRE_CT;
@@ -286,7 +309,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 	}
 
 	/* Add entry to post_ct_list */
-	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow);
+	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, extack);
 	if (IS_ERR(ct_entry))
 		return PTR_ERR(ct_entry);
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index 46437de4d75f..a7f0d7c76b72 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -9,6 +9,7 @@
 #define NFP_FL_CT_NO_TUN	0xff
 
 extern const struct rhashtable_params nfp_zone_table_params;
+extern const struct rhashtable_params nfp_ct_map_params;
 
 /**
  * struct nfp_fl_ct_zone_entry - Zone entry containing conntrack flow information
@@ -69,6 +70,18 @@ struct nfp_fl_ct_flow_entry {
 	u8 tun_offset;		// Set to NFP_FL_CT_NO_TUN if no tun
 };
 
+/**
+ * struct nfp_fl_ct_map_entry - Map between flow cookie and specific ct_flow
+ * @cookie:	Flow cookie, same as original TC flow, used as key
+ * @hash_node:	Used by the hashtable
+ * @ct_entry:	Pointer to corresponding ct_entry
+ */
+struct nfp_fl_ct_map_entry {
+	unsigned long cookie;
+	struct rhash_head hash_node;
+	struct nfp_fl_ct_flow_entry *ct_entry;
+};
+
 bool is_pre_ct_flow(struct flow_cls_offload *flow);
 bool is_post_ct_flow(struct flow_cls_offload *flow);
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 060c6de36c02..0fbd682ccf72 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -195,6 +195,7 @@ struct nfp_fl_internal_ports {
  * @merge_table:	Hash table to store merged flows
  * @ct_zone_table:	Hash table used to store the different zones
  * @ct_zone_wc:		Special zone entry for wildcarded zone matches
+ * @ct_map_table:	Hash table used to referennce ct flows
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -231,6 +232,7 @@ struct nfp_flower_priv {
 	struct rhashtable merge_table;
 	struct rhashtable ct_zone_table;
 	struct nfp_fl_ct_zone_entry *ct_zone_wc;
+	struct rhashtable ct_map_table;
 };
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 062e963a8838..7654cf6a3222 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -504,6 +504,13 @@ const struct rhashtable_params nfp_zone_table_params = {
 	.automatic_shrinking	= false,
 };
 
+const struct rhashtable_params nfp_ct_map_params = {
+	.head_offset		= offsetof(struct nfp_fl_ct_map_entry, hash_node),
+	.key_len		= sizeof(unsigned long),
+	.key_offset		= offsetof(struct nfp_fl_ct_map_entry, cookie),
+	.automatic_shrinking	= true,
+};
+
 int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
 			     unsigned int host_num_mems)
 {
@@ -528,6 +535,10 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
 	if (err)
 		goto err_free_merge_table;
 
+	err = rhashtable_init(&priv->ct_map_table, &nfp_ct_map_params);
+	if (err)
+		goto err_free_ct_zone_table;
+
 	get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
 
 	/* Init ring buffer and unallocated mask_ids. */
@@ -535,7 +546,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count,
 		kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
 			      NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
 	if (!priv->mask_ids.mask_id_free_list.buf)
-		goto err_free_ct_zone_table;
+		goto err_free_ct_map_table;
 
 	priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
 
@@ -572,6 +583,8 @@ err_free_last_used:
 	kfree(priv->mask_ids.last_used);
 err_free_mask_id:
 	kfree(priv->mask_ids.mask_id_free_list.buf);
+err_free_ct_map_table:
+	rhashtable_destroy(&priv->ct_map_table);
 err_free_ct_zone_table:
 	rhashtable_destroy(&priv->ct_zone_table);
 err_free_merge_table:
@@ -589,22 +602,40 @@ static void nfp_zone_table_entry_destroy(struct nfp_fl_ct_zone_entry *zt)
 		return;
 
 	if (!list_empty(&zt->pre_ct_list)) {
+		struct rhashtable *m_table = &zt->priv->ct_map_table;
 		struct nfp_fl_ct_flow_entry *entry, *tmp;
+		struct nfp_fl_ct_map_entry *map;
 
 		WARN_ONCE(1, "pre_ct_list not empty as expected, cleaning up\n");
 		list_for_each_entry_safe(entry, tmp, &zt->pre_ct_list,
 					 list_node) {
+			map = rhashtable_lookup_fast(m_table,
+						     &entry->cookie,
+						     nfp_ct_map_params);
+			WARN_ON_ONCE(rhashtable_remove_fast(m_table,
+							    &map->hash_node,
+							    nfp_ct_map_params));
 			nfp_fl_ct_clean_flow_entry(entry);
+			kfree(map);
 		}
 	}
 
 	if (!list_empty(&zt->post_ct_list)) {
+		struct rhashtable *m_table = &zt->priv->ct_map_table;
 		struct nfp_fl_ct_flow_entry *entry, *tmp;
+		struct nfp_fl_ct_map_entry *map;
 
 		WARN_ONCE(1, "post_ct_list not empty as expected, cleaning up\n");
 		list_for_each_entry_safe(entry, tmp, &zt->post_ct_list,
 					 list_node) {
+			map = rhashtable_lookup_fast(m_table,
+						     &entry->cookie,
+						     nfp_ct_map_params);
+			WARN_ON_ONCE(rhashtable_remove_fast(m_table,
+							    &map->hash_node,
+							    nfp_ct_map_params));
 			nfp_fl_ct_clean_flow_entry(entry);
+			kfree(map);
 		}
 	}
 	kfree(zt);
@@ -617,6 +648,16 @@ static void nfp_free_zone_table_entry(void *ptr, void *arg)
 	nfp_zone_table_entry_destroy(zt);
 }
 
+static void nfp_free_map_table_entry(void *ptr, void *arg)
+{
+	struct nfp_fl_ct_map_entry *map = ptr;
+
+	if (!map)
+		return;
+
+	kfree(map);
+}
+
 void nfp_flower_metadata_cleanup(struct nfp_app *app)
 {
 	struct nfp_flower_priv *priv = app->priv;
@@ -633,6 +674,9 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app)
 	rhashtable_free_and_destroy(&priv->ct_zone_table,
 				    nfp_free_zone_table_entry, NULL);
 	nfp_zone_table_entry_destroy(priv->ct_zone_wc);
+
+	rhashtable_free_and_destroy(&priv->ct_map_table,
+				    nfp_free_map_table_entry, NULL);
 	kvfree(priv->stats);
 	kfree(priv->mask_ids.mask_id_free_list.buf);
 	kfree(priv->mask_ids.last_used);

From f7ae12e2f95dc5bed1a0c6cfd73cf6690d465855 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:51 +0200
Subject: [PATCH 0857/3292] nfp: flower-ct: add tc_merge_tb

Add the table required to store the merge result of pre_ct and post_ct
flows. This is just the initial setup and teardown of the table,
the implementation will be in follow-up patches.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 14 +++++++++
 .../ethernet/netronome/nfp/flower/conntrack.h | 29 +++++++++++++++++++
 .../ethernet/netronome/nfp/flower/metadata.c  |  4 +++
 3 files changed, 47 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index f6f97224e773..afa024971c08 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -3,6 +3,14 @@
 
 #include "conntrack.h"
 
+const struct rhashtable_params nfp_tc_ct_merge_params = {
+	.head_offset		= offsetof(struct nfp_fl_ct_tc_merge,
+					   hash_node),
+	.key_len		= sizeof(unsigned long) * 2,
+	.key_offset		= offsetof(struct nfp_fl_ct_tc_merge, cookie),
+	.automatic_shrinking	= true,
+};
+
 /**
  * get_hashentry() - Wrapper around hashtable lookup.
  * @ht:		hashtable where entry could be found
@@ -87,6 +95,10 @@ nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
 	INIT_LIST_HEAD(&zt->pre_ct_list);
 	INIT_LIST_HEAD(&zt->post_ct_list);
 
+	err = rhashtable_init(&zt->tc_merge_tb, &nfp_tc_ct_merge_params);
+	if (err)
+		goto err_tc_merge_tb_init;
+
 	if (wildcarded) {
 		priv->ct_zone_wc = zt;
 	} else {
@@ -100,6 +112,8 @@ nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
 	return zt;
 
 err_zone_insert:
+	rhashtable_destroy(&zt->tc_merge_tb);
+err_tc_merge_tb_init:
 	kfree(zt);
 	return ERR_PTR(err);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index a7f0d7c76b72..3d7d260c6e5c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -10,6 +10,7 @@
 
 extern const struct rhashtable_params nfp_zone_table_params;
 extern const struct rhashtable_params nfp_ct_map_params;
+extern const struct rhashtable_params nfp_tc_ct_merge_params;
 
 /**
  * struct nfp_fl_ct_zone_entry - Zone entry containing conntrack flow information
@@ -23,6 +24,9 @@ extern const struct rhashtable_params nfp_ct_map_params;
  *
  * @post_ct_list:	The post_ct_list of nfp_fl_ct_flow_entry entries
  * @post_ct_count:	Keep count of the number of post_ct entries
+ *
+ * @tc_merge_tb:	The table of merged tc flows
+ * @tc_merge_count:	Keep count of the number of merged tc entries
  */
 struct nfp_fl_ct_zone_entry {
 	u16 zone;
@@ -36,6 +40,9 @@ struct nfp_fl_ct_zone_entry {
 
 	struct list_head post_ct_list;
 	unsigned int post_ct_count;
+
+	struct rhashtable tc_merge_tb;
+	unsigned int tc_merge_count;
 };
 
 enum ct_entry_type {
@@ -70,6 +77,28 @@ struct nfp_fl_ct_flow_entry {
 	u8 tun_offset;		// Set to NFP_FL_CT_NO_TUN if no tun
 };
 
+/**
+ * struct nfp_fl_ct_tc_merge - Merge of two flows from tc
+ * @cookie:		Flow cookie, combination of pre and post ct cookies
+ * @hash_node:		Used by the hashtable
+ * @pre_ct_list:	This entry is part of a pre_ct_list
+ * @post_ct_list:	This entry is part of a post_ct_list
+ * @zt:			Reference to the zone table this belongs to
+ * @pre_ct_parent:	The pre_ct_parent
+ * @post_ct_parent:	The post_ct_parent
+ * @children:		List of nft merged entries
+ */
+struct nfp_fl_ct_tc_merge {
+	unsigned long cookie[2];
+	struct rhash_head hash_node;
+	struct list_head pre_ct_list;
+	struct list_head post_ct_list;
+	struct nfp_fl_ct_zone_entry *zt;
+	struct nfp_fl_ct_flow_entry *pre_ct_parent;
+	struct nfp_fl_ct_flow_entry *post_ct_parent;
+	struct list_head children;
+};
+
 /**
  * struct nfp_fl_ct_map_entry - Map between flow cookie and specific ct_flow
  * @cookie:	Flow cookie, same as original TC flow, used as key
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 7654cf6a3222..8658c5cedf91 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -638,6 +638,10 @@ static void nfp_zone_table_entry_destroy(struct nfp_fl_ct_zone_entry *zt)
 			kfree(map);
 		}
 	}
+
+	rhashtable_free_and_destroy(&zt->tc_merge_tb,
+				    nfp_check_rhashtable_empty, NULL);
+
 	kfree(zt);
 }
 

From 3c863c300c0959aeeef13f797c85ea58f6b291c6 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 2 Jun 2021 13:59:52 +0200
Subject: [PATCH 0858/3292] nfp: flower-ct: add tc merge functionality

Add merging of pre/post_ct flow rules into the tc_merge table.
Pre_ct flows needs to be merge with post_ct flows and vice versa.

This needs to be done for all flows in the same zone table, as well
as with the wc_zone_table, which is for flows masking out ct_zone
info.

Cleanup is happening when all the tables are cleared up and prints
a warning traceback as this is not expected in the final version.
At this point we are not actually returning success for the offload,
so we do not get any delete requests for flows, so we can't delete
them that way yet. This means that cleanup happens in what would
usually be an exception path.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 162 +++++++++++++++++-
 1 file changed, 158 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index afa024971c08..b1709affb52d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -64,6 +64,77 @@ bool is_post_ct_flow(struct flow_cls_offload *flow)
 	return false;
 }
 
+static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1,
+			      struct nfp_fl_ct_flow_entry *entry2)
+{
+	return 0;
+}
+
+static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt,
+			      struct nfp_fl_ct_flow_entry *ct_entry1,
+			      struct nfp_fl_ct_flow_entry *ct_entry2)
+{
+	struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry;
+	struct nfp_fl_ct_tc_merge *m_entry;
+	unsigned long new_cookie[2];
+	int err;
+
+	if (ct_entry1->type == CT_TYPE_PRE_CT) {
+		pre_ct_entry = ct_entry1;
+		post_ct_entry = ct_entry2;
+	} else {
+		post_ct_entry = ct_entry1;
+		pre_ct_entry = ct_entry2;
+	}
+
+	if (post_ct_entry->netdev != pre_ct_entry->netdev)
+		return -EINVAL;
+	/* Checks that the chain_index of the filter matches the
+	 * chain_index of the GOTO action.
+	 */
+	if (post_ct_entry->chain_index != pre_ct_entry->chain_index)
+		return -EINVAL;
+
+	err = nfp_ct_merge_check(post_ct_entry, pre_ct_entry);
+	if (err)
+		return err;
+
+	new_cookie[0] = pre_ct_entry->cookie;
+	new_cookie[1] = post_ct_entry->cookie;
+	m_entry = get_hashentry(&zt->tc_merge_tb, &new_cookie,
+				nfp_tc_ct_merge_params, sizeof(*m_entry));
+	if (IS_ERR(m_entry))
+		return PTR_ERR(m_entry);
+
+	/* m_entry already present, not merging again */
+	if (!memcmp(&new_cookie, m_entry->cookie, sizeof(new_cookie)))
+		return 0;
+
+	memcpy(&m_entry->cookie, &new_cookie, sizeof(new_cookie));
+	m_entry->zt = zt;
+	m_entry->post_ct_parent = post_ct_entry;
+	m_entry->pre_ct_parent = pre_ct_entry;
+
+	/* Add this entry to the pre_ct and post_ct lists */
+	list_add(&m_entry->post_ct_list, &post_ct_entry->children);
+	list_add(&m_entry->pre_ct_list, &pre_ct_entry->children);
+	INIT_LIST_HEAD(&m_entry->children);
+
+	err = rhashtable_insert_fast(&zt->tc_merge_tb, &m_entry->hash_node,
+				     nfp_tc_ct_merge_params);
+	if (err)
+		goto err_ct_tc_merge_insert;
+	zt->tc_merge_count++;
+
+	return 0;
+
+err_ct_tc_merge_insert:
+	list_del(&m_entry->post_ct_list);
+	list_del(&m_entry->pre_ct_list);
+	kfree(m_entry);
+	return err;
+}
+
 static struct
 nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
 					 u16 zone, bool wildcarded)
@@ -210,12 +281,48 @@ err_pre_ct_act:
 	return ERR_PTR(err);
 }
 
-static void nfp_free_tc_merge_children(struct nfp_fl_ct_flow_entry *entry)
+static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow)
 {
 }
 
-static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow)
+static void nfp_del_tc_merge_entry(struct nfp_fl_ct_tc_merge *m_ent)
 {
+	struct nfp_fl_ct_zone_entry *zt;
+	int err;
+
+	zt = m_ent->zt;
+	err = rhashtable_remove_fast(&zt->tc_merge_tb,
+				     &m_ent->hash_node,
+				     nfp_tc_ct_merge_params);
+	if (err)
+		pr_warn("WARNING: could not remove merge_entry from hashtable\n");
+	zt->tc_merge_count--;
+	list_del(&m_ent->post_ct_list);
+	list_del(&m_ent->pre_ct_list);
+
+	if (!list_empty(&m_ent->children))
+		nfp_free_nft_merge_children(m_ent, false);
+	kfree(m_ent);
+}
+
+static void nfp_free_tc_merge_children(struct nfp_fl_ct_flow_entry *entry)
+{
+	struct nfp_fl_ct_tc_merge *m_ent, *tmp;
+
+	switch (entry->type) {
+	case CT_TYPE_PRE_CT:
+		list_for_each_entry_safe(m_ent, tmp, &entry->children, pre_ct_list) {
+			nfp_del_tc_merge_entry(m_ent);
+		}
+		break;
+	case CT_TYPE_POST_CT:
+		list_for_each_entry_safe(m_ent, tmp, &entry->children, post_ct_list) {
+			nfp_del_tc_merge_entry(m_ent);
+		}
+		break;
+	default:
+		break;
+	}
 }
 
 void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry)
@@ -248,6 +355,27 @@ static struct flow_action_entry *get_flow_act(struct flow_cls_offload *flow,
 	return NULL;
 }
 
+static void
+nfp_ct_merge_tc_entries(struct nfp_fl_ct_flow_entry *ct_entry1,
+			struct nfp_fl_ct_zone_entry *zt_src,
+			struct nfp_fl_ct_zone_entry *zt_dst)
+{
+	struct nfp_fl_ct_flow_entry *ct_entry2, *ct_tmp;
+	struct list_head *ct_list;
+
+	if (ct_entry1->type == CT_TYPE_PRE_CT)
+		ct_list = &zt_src->post_ct_list;
+	else if (ct_entry1->type == CT_TYPE_POST_CT)
+		ct_list = &zt_src->pre_ct_list;
+	else
+		return;
+
+	list_for_each_entry_safe(ct_entry2, ct_tmp, ct_list,
+				 list_node) {
+		nfp_ct_do_tc_merge(zt_dst, ct_entry2, ct_entry1);
+	}
+}
+
 int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 			    struct net_device *netdev,
 			    struct flow_cls_offload *flow,
@@ -290,8 +418,13 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 	list_add(&ct_entry->list_node, &zt->pre_ct_list);
 	zt->pre_ct_count++;
 
+	nfp_ct_merge_tc_entries(ct_entry, zt, zt);
+
+	/* Need to check and merge with tables in the wc_zone as well */
+	if (priv->ct_zone_wc)
+		nfp_ct_merge_tc_entries(ct_entry, priv->ct_zone_wc, zt);
+
 	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack action not supported");
-	nfp_fl_ct_clean_flow_entry(ct_entry);
 	return -EOPNOTSUPP;
 }
 
@@ -332,7 +465,28 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 	list_add(&ct_entry->list_node, &zt->post_ct_list);
 	zt->post_ct_count++;
 
+	if (wildcarded) {
+		/* Iterate through all zone tables if not empty, look for merges with
+		 * pre_ct entries and merge them.
+		 */
+		struct rhashtable_iter iter;
+		struct nfp_fl_ct_zone_entry *zone_table;
+
+		rhashtable_walk_enter(&priv->ct_zone_table, &iter);
+		rhashtable_walk_start(&iter);
+		while ((zone_table = rhashtable_walk_next(&iter)) != NULL) {
+			if (IS_ERR(zone_table))
+				continue;
+			rhashtable_walk_stop(&iter);
+			nfp_ct_merge_tc_entries(ct_entry, zone_table, zone_table);
+			rhashtable_walk_start(&iter);
+		}
+		rhashtable_walk_stop(&iter);
+		rhashtable_walk_exit(&iter);
+	} else {
+		nfp_ct_merge_tc_entries(ct_entry, zt, zt);
+	}
+
 	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack match not supported");
-	nfp_fl_ct_clean_flow_entry(ct_entry);
 	return -EOPNOTSUPP;
 }

From d395381909a32060927e3f90116f938379be0636 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:14 +0300
Subject: [PATCH 0859/3292] netdevsim: Add max_vfs to bus_dev

Currently there is no limit to the number of VFs netdevsim can enable.
In a real systems this value exist and used by the driver.
Fore example, some features might need to consider this value when
allocating memory.

Expose max_vfs variable to debugfs as configurable resource. If are VFs
configured (num_vfs != 0) then changing of max_vfs not allowed.

Co-developed-by: Yuval Avnery <yuvalav@nvidia.com>
Signed-off-by: Yuval Avnery <yuvalav@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/bus.c       | 97 ++++++++++++++++++++++++++++---
 drivers/net/netdevsim/dev.c       | 13 +++++
 drivers/net/netdevsim/netdevsim.h | 10 ++++
 3 files changed, 112 insertions(+), 8 deletions(-)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 0e9511661601..4bd7ef3c04be 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -27,9 +27,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
 static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
 				   unsigned int num_vfs)
 {
-	nsim_bus_dev->vfconfigs = kcalloc(num_vfs,
-					  sizeof(struct nsim_vf_config),
-					  GFP_KERNEL | __GFP_NOWARN);
+	if (nsim_bus_dev->max_vfs < num_vfs)
+		return -ENOMEM;
+
 	if (!nsim_bus_dev->vfconfigs)
 		return -ENOMEM;
 	nsim_bus_dev->num_vfs = num_vfs;
@@ -39,8 +39,6 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
 
 static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev)
 {
-	kfree(nsim_bus_dev->vfconfigs);
-	nsim_bus_dev->vfconfigs = NULL;
 	nsim_bus_dev->num_vfs = 0;
 }
 
@@ -56,7 +54,7 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr,
 	if (ret)
 		return ret;
 
-	rtnl_lock();
+	mutex_lock(&nsim_bus_dev->vfs_lock);
 	if (nsim_bus_dev->num_vfs == num_vfs)
 		goto exit_good;
 	if (nsim_bus_dev->num_vfs && num_vfs) {
@@ -74,7 +72,7 @@ nsim_bus_dev_numvfs_store(struct device *dev, struct device_attribute *attr,
 exit_good:
 	ret = count;
 exit_unlock:
-	rtnl_unlock();
+	mutex_unlock(&nsim_bus_dev->vfs_lock);
 
 	return ret;
 }
@@ -92,6 +90,73 @@ static struct device_attribute nsim_bus_dev_numvfs_attr =
 	__ATTR(sriov_numvfs, 0664, nsim_bus_dev_numvfs_show,
 	       nsim_bus_dev_numvfs_store);
 
+ssize_t nsim_bus_dev_max_vfs_read(struct file *file,
+				  char __user *data,
+				  size_t count, loff_t *ppos)
+{
+	struct nsim_bus_dev *nsim_bus_dev = file->private_data;
+	char buf[11];
+	size_t len;
+
+	len = snprintf(buf, sizeof(buf), "%u\n", nsim_bus_dev->max_vfs);
+	if (len < 0)
+		return len;
+
+	return simple_read_from_buffer(data, count, ppos, buf, len);
+}
+
+ssize_t nsim_bus_dev_max_vfs_write(struct file *file,
+				   const char __user *data,
+				   size_t count, loff_t *ppos)
+{
+	struct nsim_bus_dev *nsim_bus_dev = file->private_data;
+	struct nsim_vf_config *vfconfigs;
+	ssize_t ret;
+	char buf[10];
+	u32 val;
+
+	if (*ppos != 0)
+		return 0;
+
+	if (count >= sizeof(buf))
+		return -ENOSPC;
+
+	mutex_lock(&nsim_bus_dev->vfs_lock);
+	/* Reject if VFs are configured */
+	if (nsim_bus_dev->num_vfs) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	ret = copy_from_user(buf, data, count);
+	if (ret) {
+		ret = -EFAULT;
+		goto unlock;
+	}
+
+	buf[count] = '\0';
+	ret = kstrtouint(buf, 10, &val);
+	if (ret) {
+		ret = -EIO;
+		goto unlock;
+	}
+
+	vfconfigs = kcalloc(val, sizeof(struct nsim_vf_config), GFP_KERNEL | __GFP_NOWARN);
+	if (!vfconfigs) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	kfree(nsim_bus_dev->vfconfigs);
+	nsim_bus_dev->vfconfigs = vfconfigs;
+	nsim_bus_dev->max_vfs = val;
+	*ppos += count;
+	ret = count;
+unlock:
+	mutex_unlock(&nsim_bus_dev->vfs_lock);
+	return ret;
+}
+
 static ssize_t
 new_port_store(struct device *dev, struct device_attribute *attr,
 	       const char *buf, size_t count)
@@ -311,6 +376,8 @@ static struct bus_type nsim_bus = {
 	.num_vf		= nsim_num_vf,
 };
 
+#define NSIM_BUS_DEV_MAX_VFS 4
+
 static struct nsim_bus_dev *
 nsim_bus_dev_new(unsigned int id, unsigned int port_count)
 {
@@ -329,15 +396,28 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
 	nsim_bus_dev->dev.type = &nsim_bus_dev_type;
 	nsim_bus_dev->port_count = port_count;
 	nsim_bus_dev->initial_net = current->nsproxy->net_ns;
+	nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS;
 	mutex_init(&nsim_bus_dev->nsim_bus_reload_lock);
+	mutex_init(&nsim_bus_dev->vfs_lock);
 	/* Disallow using nsim_bus_dev */
 	smp_store_release(&nsim_bus_dev->init, false);
 
+	nsim_bus_dev->vfconfigs = kcalloc(nsim_bus_dev->max_vfs,
+					  sizeof(struct nsim_vf_config),
+					  GFP_KERNEL | __GFP_NOWARN);
+	if (!nsim_bus_dev->vfconfigs) {
+		err = -ENOMEM;
+		goto err_nsim_bus_dev_id_free;
+	}
+
 	err = device_register(&nsim_bus_dev->dev);
 	if (err)
-		goto err_nsim_bus_dev_id_free;
+		goto err_nsim_vfs_free;
+
 	return nsim_bus_dev;
 
+err_nsim_vfs_free:
+	kfree(nsim_bus_dev->vfconfigs);
 err_nsim_bus_dev_id_free:
 	ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
 err_nsim_bus_dev_free:
@@ -351,6 +431,7 @@ static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev)
 	smp_store_release(&nsim_bus_dev->init, false);
 	device_unregister(&nsim_bus_dev->dev);
 	ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id);
+	kfree(nsim_bus_dev->vfconfigs);
 	kfree(nsim_bus_dev);
 }
 
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 6189a4c0d39e..12df93a34bfd 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -192,6 +192,14 @@ static const struct file_operations nsim_dev_trap_fa_cookie_fops = {
 	.owner = THIS_MODULE,
 };
 
+static const struct file_operations nsim_dev_max_vfs_fops = {
+	.open = simple_open,
+	.read = nsim_bus_dev_max_vfs_read,
+	.write = nsim_bus_dev_max_vfs_write,
+	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
+};
+
 static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 {
 	char dev_ddir_name[sizeof(DRV_NAME) + 10];
@@ -231,6 +239,11 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 	debugfs_create_bool("fail_trap_policer_counter_get", 0600,
 			    nsim_dev->ddir,
 			    &nsim_dev->fail_trap_policer_counter_get);
+	nsim_dev->max_vfs = debugfs_create_file("max_vfs",
+						0600,
+						nsim_dev->ddir,
+						nsim_dev->nsim_bus_dev,
+						&nsim_dev_max_vfs_fops);
 	nsim_udp_tunnels_debugfs_create(nsim_dev);
 	return 0;
 }
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 7ff24e03577b..12f56f2f85e8 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -212,6 +212,7 @@ struct nsim_dev {
 	struct dentry *ddir;
 	struct dentry *ports_ddir;
 	struct dentry *take_snapshot;
+	struct dentry *max_vfs;
 	struct bpf_offload_dev *bpf_dev;
 	bool bpf_bind_accept;
 	bool bpf_bind_verifier_accept;
@@ -269,6 +270,13 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *fib_data);
 u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
 		     enum nsim_resource_id res_id, bool max);
 
+ssize_t nsim_bus_dev_max_vfs_read(struct file *file,
+				  char __user *data,
+				  size_t count, loff_t *ppos);
+ssize_t nsim_bus_dev_max_vfs_write(struct file *file,
+				   const char __user *data,
+				   size_t count, loff_t *ppos);
+
 #if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
 void nsim_ipsec_init(struct netdevsim *ns);
 void nsim_ipsec_teardown(struct netdevsim *ns);
@@ -308,7 +316,9 @@ struct nsim_bus_dev {
 	struct net *initial_net; /* Purpose of this is to carry net pointer
 				  * during the probe time only.
 				  */
+	unsigned int max_vfs;
 	unsigned int num_vfs;
+	struct mutex vfs_lock;  /* Protects vfconfigs */
 	struct nsim_vf_config *vfconfigs;
 	/* Lock for devlink->reload_enabled in netdevsim module */
 	struct mutex nsim_bus_reload_lock;

From 32ac15d8fd804615e79e365d6825da2a371f91f9 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:15 +0300
Subject: [PATCH 0860/3292] netdevsim: Disable VFs on nsim_dev_reload_destroy()
 call

Move VFs disabling from device release() to nsim_dev_reload_destroy() to
make VFs disabling and ports removal simultaneous.
This is a requirement for VFs ports implemented in next patches.

Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/bus.c       | 5 +----
 drivers/net/netdevsim/dev.c       | 6 ++++++
 drivers/net/netdevsim/netdevsim.h | 1 +
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index 4bd7ef3c04be..d5c547c35e2f 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -37,7 +37,7 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
 	return 0;
 }
 
-static void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev)
+void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev)
 {
 	nsim_bus_dev->num_vfs = 0;
 }
@@ -233,9 +233,6 @@ static const struct attribute_group *nsim_bus_dev_attr_groups[] = {
 
 static void nsim_bus_dev_release(struct device *dev)
 {
-	struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
-
-	nsim_bus_dev_vfs_disable(nsim_bus_dev);
 }
 
 static struct device_type nsim_bus_dev_type = {
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 12df93a34bfd..cd50c05b1e6e 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -1182,6 +1182,12 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev)
 	if (devlink_is_reload_failed(devlink))
 		return;
 	debugfs_remove(nsim_dev->take_snapshot);
+
+	mutex_lock(&nsim_dev->nsim_bus_dev->vfs_lock);
+	if (nsim_dev->nsim_bus_dev->num_vfs)
+		nsim_bus_dev_vfs_disable(nsim_dev->nsim_bus_dev);
+	mutex_unlock(&nsim_dev->nsim_bus_dev->vfs_lock);
+
 	nsim_dev_port_del_all(nsim_dev);
 	nsim_dev_psample_exit(nsim_dev);
 	nsim_dev_health_exit(nsim_dev);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 12f56f2f85e8..a1b49c8c0175 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -276,6 +276,7 @@ ssize_t nsim_bus_dev_max_vfs_read(struct file *file,
 ssize_t nsim_bus_dev_max_vfs_write(struct file *file,
 				   const char __user *data,
 				   size_t count, loff_t *ppos);
+void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev);
 
 #if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
 void nsim_ipsec_init(struct netdevsim *ns);

From 814b9ce65ec3b53404eeda7a11e1abb4af8d7df3 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:16 +0300
Subject: [PATCH 0861/3292] netdevsim: Implement port types and indexing

Define type of ports, which netdevsim driver currently operates with as
PF. Define new port type - VF, which will be implemented in following
patches. Add helper functions to distinguish them. Add helper function
to get VF index from port index.

Add port indexing logic where PFs' indexes starts from 0, VFs' - from
NSIM_DEV_VF_PORT_INDEX_BASE.
All ports uses same index pool, which means that PF port may be created
with index from VFs' indexes range.
Maximum number of VFs, which the driver can allocate, is limited by
UINT_MAX - BASE.

Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/bus.c       | 10 ++++++--
 drivers/net/netdevsim/dev.c       | 42 +++++++++++++++++++++++--------
 drivers/net/netdevsim/netdevsim.h | 20 +++++++++++++++
 3 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index d5c547c35e2f..e29146d9eddb 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -141,6 +141,12 @@ ssize_t nsim_bus_dev_max_vfs_write(struct file *file,
 		goto unlock;
 	}
 
+	/* max_vfs limited by the maximum number of provided port indexes */
+	if (val > NSIM_DEV_VF_PORT_INDEX_MAX - NSIM_DEV_VF_PORT_INDEX_BASE) {
+		ret = -ERANGE;
+		goto unlock;
+	}
+
 	vfconfigs = kcalloc(val, sizeof(struct nsim_vf_config), GFP_KERNEL | __GFP_NOWARN);
 	if (!vfconfigs) {
 		ret = -ENOMEM;
@@ -178,7 +184,7 @@ new_port_store(struct device *dev, struct device_attribute *attr,
 
 	mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
 	devlink_reload_disable(devlink);
-	ret = nsim_dev_port_add(nsim_bus_dev, port_index);
+	ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
 	devlink_reload_enable(devlink);
 	mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
 	return ret ? ret : count;
@@ -207,7 +213,7 @@ del_port_store(struct device *dev, struct device_attribute *attr,
 
 	mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
 	devlink_reload_disable(devlink);
-	ret = nsim_dev_port_del(nsim_bus_dev, port_index);
+	ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
 	devlink_reload_enable(devlink);
 	mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
 	return ret ? ret : count;
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index cd50c05b1e6e..93d6f3d54d11 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -35,6 +35,25 @@
 
 #include "netdevsim.h"
 
+static unsigned int
+nsim_dev_port_index(enum nsim_dev_port_type type, unsigned int port_index)
+{
+	switch (type) {
+	case NSIM_DEV_PORT_TYPE_VF:
+		port_index = NSIM_DEV_VF_PORT_INDEX_BASE + port_index;
+		break;
+	case NSIM_DEV_PORT_TYPE_PF:
+		break;
+	}
+
+	return port_index;
+}
+
+static inline unsigned int nsim_dev_port_index_to_vf_index(unsigned int port_index)
+{
+	return port_index - NSIM_DEV_VF_PORT_INDEX_BASE;
+}
+
 static struct dentry *nsim_dev_ddir;
 
 #define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32)
@@ -923,7 +942,7 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 #define NSIM_DEV_MAX_MACS_DEFAULT 32
 #define NSIM_DEV_TEST1_DEFAULT true
 
-static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
+static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
 			       unsigned int port_index)
 {
 	struct devlink_port_attrs attrs = {};
@@ -934,7 +953,8 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
 	nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL);
 	if (!nsim_dev_port)
 		return -ENOMEM;
-	nsim_dev_port->port_index = port_index;
+	nsim_dev_port->port_index = nsim_dev_port_index(type, port_index);
+	nsim_dev_port->port_type = type;
 
 	devlink_port = &nsim_dev_port->devlink_port;
 	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
@@ -943,7 +963,7 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
 	attrs.switch_id.id_len = nsim_dev->switch_id.id_len;
 	devlink_port_attrs_set(devlink_port, &attrs);
 	err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port,
-				    port_index);
+				    nsim_dev_port->port_index);
 	if (err)
 		goto err_port_free;
 
@@ -1000,7 +1020,7 @@ static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev,
 	int i, err;
 
 	for (i = 0; i < port_count; i++) {
-		err = __nsim_dev_port_add(nsim_dev, i);
+		err = __nsim_dev_port_add(nsim_dev, NSIM_DEV_PORT_TYPE_PF, i);
 		if (err)
 			goto err_port_del_all;
 	}
@@ -1216,32 +1236,34 @@ void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev)
 }
 
 static struct nsim_dev_port *
-__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, unsigned int port_index)
+__nsim_dev_port_lookup(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
+		       unsigned int port_index)
 {
 	struct nsim_dev_port *nsim_dev_port;
 
+	port_index = nsim_dev_port_index(type, port_index);
 	list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
 		if (nsim_dev_port->port_index == port_index)
 			return nsim_dev_port;
 	return NULL;
 }
 
-int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev,
+int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type,
 		      unsigned int port_index)
 {
 	struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
 	int err;
 
 	mutex_lock(&nsim_dev->port_list_lock);
-	if (__nsim_dev_port_lookup(nsim_dev, port_index))
+	if (__nsim_dev_port_lookup(nsim_dev, type, port_index))
 		err = -EEXIST;
 	else
-		err = __nsim_dev_port_add(nsim_dev, port_index);
+		err = __nsim_dev_port_add(nsim_dev, type, port_index);
 	mutex_unlock(&nsim_dev->port_list_lock);
 	return err;
 }
 
-int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
+int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, enum nsim_dev_port_type type,
 		      unsigned int port_index)
 {
 	struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
@@ -1249,7 +1271,7 @@ int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
 	int err = 0;
 
 	mutex_lock(&nsim_dev->port_list_lock);
-	nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, port_index);
+	nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, type, port_index);
 	if (!nsim_dev_port)
 		err = -ENOENT;
 	else
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index a1b49c8c0175..e025c1bc1c26 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -197,10 +197,19 @@ static inline void nsim_dev_psample_exit(struct nsim_dev *nsim_dev)
 }
 #endif
 
+enum nsim_dev_port_type {
+	NSIM_DEV_PORT_TYPE_PF,
+	NSIM_DEV_PORT_TYPE_VF,
+};
+
+#define NSIM_DEV_VF_PORT_INDEX_BASE 128
+#define NSIM_DEV_VF_PORT_INDEX_MAX UINT_MAX
+
 struct nsim_dev_port {
 	struct list_head list;
 	struct devlink_port devlink_port;
 	unsigned int port_index;
+	enum nsim_dev_port_type port_type;
 	struct dentry *ddir;
 	struct netdevsim *ns;
 };
@@ -260,8 +269,10 @@ void nsim_dev_exit(void);
 int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev);
 void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev);
 int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev,
+		      enum nsim_dev_port_type type,
 		      unsigned int port_index);
 int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
+		      enum nsim_dev_port_type type,
 		      unsigned int port_index);
 
 struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
@@ -278,6 +289,15 @@ ssize_t nsim_bus_dev_max_vfs_write(struct file *file,
 				   size_t count, loff_t *ppos);
 void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev);
 
+static inline bool nsim_dev_port_is_pf(struct nsim_dev_port *nsim_dev_port)
+{
+	return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_PF;
+}
+
+static inline bool nsim_dev_port_is_vf(struct nsim_dev_port *nsim_dev_port)
+{
+	return nsim_dev_port->port_type == NSIM_DEV_PORT_TYPE_VF;
+}
 #if IS_ENABLED(CONFIG_XFRM_OFFLOAD)
 void nsim_ipsec_init(struct netdevsim *ns);
 void nsim_ipsec_teardown(struct netdevsim *ns);

From 92ba1f29e6e2f16eb93a0a2c7c01985920b89222 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:17 +0300
Subject: [PATCH 0862/3292] netdevsim: Implement VFs

Allow creation of netdevsim ports for VFs along with allocations of
corresponding net devices and devlink ports.
Add enums and helpers to distinguish PFs' ports from VFs' ports.

Ports creation/deletion debugfs API intended to be used with physical
ports only.
VFs instantiation will be done in one of the next patches.

Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c    | 14 +++++-
 drivers/net/netdevsim/netdev.c | 90 ++++++++++++++++++++++++----------
 2 files changed, 77 insertions(+), 27 deletions(-)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 93d6f3d54d11..8bd7654f4dca 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -945,11 +945,15 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_type type,
 			       unsigned int port_index)
 {
+	struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev;
 	struct devlink_port_attrs attrs = {};
 	struct nsim_dev_port *nsim_dev_port;
 	struct devlink_port *devlink_port;
 	int err;
 
+	if (type == NSIM_DEV_PORT_TYPE_VF && !nsim_bus_dev->num_vfs)
+		return -EINVAL;
+
 	nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL);
 	if (!nsim_dev_port)
 		return -ENOMEM;
@@ -957,8 +961,14 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ
 	nsim_dev_port->port_type = type;
 
 	devlink_port = &nsim_dev_port->devlink_port;
-	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
-	attrs.phys.port_number = port_index + 1;
+	if (nsim_dev_port_is_pf(nsim_dev_port)) {
+		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+		attrs.phys.port_number = port_index + 1;
+	} else {
+		attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF;
+		attrs.pci_vf.pf = 0;
+		attrs.pci_vf.vf = port_index;
+	}
 	memcpy(attrs.switch_id.id, nsim_dev->switch_id.id, nsim_dev->switch_id.id_len);
 	attrs.switch_id.id_len = nsim_dev->switch_id.id_len;
 	devlink_port_attrs_set(devlink_port, &attrs);
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 659d3dceb687..9352e18b4db9 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -261,6 +261,18 @@ static const struct net_device_ops nsim_netdev_ops = {
 	.ndo_get_devlink_port	= nsim_get_devlink_port,
 };
 
+static const struct net_device_ops nsim_vf_netdev_ops = {
+	.ndo_start_xmit		= nsim_start_xmit,
+	.ndo_set_rx_mode	= nsim_set_rx_mode,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= nsim_change_mtu,
+	.ndo_get_stats64	= nsim_get_stats64,
+	.ndo_setup_tc		= nsim_setup_tc,
+	.ndo_set_features	= nsim_set_features,
+	.ndo_get_devlink_port	= nsim_get_devlink_port,
+};
+
 static void nsim_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -280,6 +292,49 @@ static void nsim_setup(struct net_device *dev)
 	dev->max_mtu = ETH_MAX_MTU;
 }
 
+static int nsim_init_netdevsim(struct netdevsim *ns)
+{
+	int err;
+
+	ns->netdev->netdev_ops = &nsim_netdev_ops;
+
+	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
+	if (err)
+		return err;
+
+	rtnl_lock();
+	err = nsim_bpf_init(ns);
+	if (err)
+		goto err_utn_destroy;
+
+	nsim_ipsec_init(ns);
+
+	err = register_netdevice(ns->netdev);
+	if (err)
+		goto err_ipsec_teardown;
+	rtnl_unlock();
+	return 0;
+
+err_ipsec_teardown:
+	nsim_ipsec_teardown(ns);
+	nsim_bpf_uninit(ns);
+err_utn_destroy:
+	rtnl_unlock();
+	nsim_udp_tunnels_info_destroy(ns->netdev);
+	return err;
+}
+
+static int nsim_init_netdevsim_vf(struct netdevsim *ns)
+{
+	int err;
+
+	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
+	rtnl_lock();
+	err = register_netdevice(ns->netdev);
+	rtnl_unlock();
+	return err;
+}
+
 struct netdevsim *
 nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 {
@@ -299,33 +354,15 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 	ns->nsim_dev_port = nsim_dev_port;
 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
-	dev->netdev_ops = &nsim_netdev_ops;
 	nsim_ethtool_init(ns);
-
-	err = nsim_udp_tunnels_info_create(nsim_dev, dev);
+	if (nsim_dev_port_is_pf(nsim_dev_port))
+		err = nsim_init_netdevsim(ns);
+	else
+		err = nsim_init_netdevsim_vf(ns);
 	if (err)
 		goto err_free_netdev;
-
-	rtnl_lock();
-	err = nsim_bpf_init(ns);
-	if (err)
-		goto err_utn_destroy;
-
-	nsim_ipsec_init(ns);
-
-	err = register_netdevice(dev);
-	if (err)
-		goto err_ipsec_teardown;
-	rtnl_unlock();
-
 	return ns;
 
-err_ipsec_teardown:
-	nsim_ipsec_teardown(ns);
-	nsim_bpf_uninit(ns);
-err_utn_destroy:
-	rtnl_unlock();
-	nsim_udp_tunnels_info_destroy(dev);
 err_free_netdev:
 	free_netdev(dev);
 	return ERR_PTR(err);
@@ -337,10 +374,13 @@ void nsim_destroy(struct netdevsim *ns)
 
 	rtnl_lock();
 	unregister_netdevice(dev);
-	nsim_ipsec_teardown(ns);
-	nsim_bpf_uninit(ns);
+	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
+		nsim_ipsec_teardown(ns);
+		nsim_bpf_uninit(ns);
+	}
 	rtnl_unlock();
-	nsim_udp_tunnels_info_destroy(dev);
+	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
+		nsim_udp_tunnels_info_destroy(dev);
 	free_netdev(dev);
 }
 

From 160dc373eead2143ea51b7f8e2a6bf1e383f24f8 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:18 +0300
Subject: [PATCH 0863/3292] netdevsim: Implement legacy/switchdev mode for VFs

Implement callbacks to set/get eswitch mode value. Add helpers to check
current mode.

Instantiate VFs' net devices and devlink ports on switchdev enabling and
remove them on legacy enabling. Changing number of VFs while in
switchdev mode triggers VFs creation/deletion.

Also disable NDO API callback to set VF rate, since it's legacy API.
Switchdev API to set VF rate will be implemented in one of the next
patches.

Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/bus.c       | 17 +++++++-
 drivers/net/netdevsim/dev.c       | 69 +++++++++++++++++++++++++++++++
 drivers/net/netdevsim/netdev.c    |  5 +++
 drivers/net/netdevsim/netdevsim.h | 14 +++++++
 4 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index e29146d9eddb..b56003dfe3cc 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -27,6 +27,9 @@ static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev)
 static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
 				   unsigned int num_vfs)
 {
+	struct nsim_dev *nsim_dev;
+	int err = 0;
+
 	if (nsim_bus_dev->max_vfs < num_vfs)
 		return -ENOMEM;
 
@@ -34,12 +37,24 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev,
 		return -ENOMEM;
 	nsim_bus_dev->num_vfs = num_vfs;
 
-	return 0;
+	nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
+	if (nsim_esw_mode_is_switchdev(nsim_dev)) {
+		err = nsim_esw_switchdev_enable(nsim_dev, NULL);
+		if (err)
+			nsim_bus_dev->num_vfs = 0;
+	}
+
+	return err;
 }
 
 void nsim_bus_dev_vfs_disable(struct nsim_bus_dev *nsim_bus_dev)
 {
+	struct nsim_dev *nsim_dev;
+
 	nsim_bus_dev->num_vfs = 0;
+	nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
+	if (nsim_esw_mode_is_switchdev(nsim_dev))
+		nsim_esw_legacy_enable(nsim_dev, NULL);
 }
 
 static ssize_t
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 8bd7654f4dca..ed9ce083d0ad 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -439,6 +439,72 @@ static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
 	devlink_region_destroy(nsim_dev->dummy_region);
 }
 
+static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port);
+int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack)
+{
+	struct nsim_dev_port *nsim_dev_port, *tmp;
+
+	mutex_lock(&nsim_dev->port_list_lock);
+	list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list)
+		if (nsim_dev_port_is_vf(nsim_dev_port))
+			__nsim_dev_port_del(nsim_dev_port);
+	mutex_unlock(&nsim_dev->port_list_lock);
+	nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+	return 0;
+}
+
+int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack)
+{
+	struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev;
+	int i, err;
+
+	for (i = 0; i < nsim_bus_dev->num_vfs; i++) {
+		err = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to initialize VFs' netdevsim ports");
+			pr_err("Failed to initialize VF id=%d. %d.\n", i, err);
+			goto err_port_add_vfs;
+		}
+	}
+	nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+	return 0;
+
+err_port_add_vfs:
+	for (i--; i >= 0; i--)
+		nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_VF, i);
+	return err;
+}
+
+static int nsim_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+					 struct netlink_ext_ack *extack)
+{
+	struct nsim_dev *nsim_dev = devlink_priv(devlink);
+	int err = 0;
+
+	mutex_lock(&nsim_dev->nsim_bus_dev->vfs_lock);
+	if (mode == nsim_dev->esw_mode)
+		goto unlock;
+
+	if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
+		err = nsim_esw_legacy_enable(nsim_dev, extack);
+	else if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		err = nsim_esw_switchdev_enable(nsim_dev, extack);
+	else
+		err = -EINVAL;
+
+unlock:
+	mutex_unlock(&nsim_dev->nsim_bus_dev->vfs_lock);
+	return err;
+}
+
+static int nsim_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct nsim_dev *nsim_dev = devlink_priv(devlink);
+
+	*mode = nsim_dev->esw_mode;
+	return 0;
+}
+
 struct nsim_trap_item {
 	void *trap_ctx;
 	enum devlink_trap_action action;
@@ -925,6 +991,8 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink,
 }
 
 static const struct devlink_ops nsim_dev_devlink_ops = {
+	.eswitch_mode_set = nsim_devlink_eswitch_mode_set,
+	.eswitch_mode_get = nsim_devlink_eswitch_mode_get,
 	.supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT |
 					 DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
 	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
@@ -1177,6 +1245,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
 
 	devlink_params_publish(devlink);
 	devlink_reload_enable(devlink);
+	nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY;
 	return 0;
 
 err_psample_exit:
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 9352e18b4db9..c3aeb15843e2 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -113,6 +113,11 @@ static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
 	struct netdevsim *ns = netdev_priv(dev);
 	struct nsim_bus_dev *nsim_bus_dev = ns->nsim_bus_dev;
 
+	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
+		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
+		return -EOPNOTSUPP;
+	}
+
 	if (vf >= nsim_bus_dev->num_vfs)
 		return -EINVAL;
 
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index e025c1bc1c26..13a0042124f7 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -257,8 +257,22 @@ struct nsim_dev {
 		u32 sleep;
 	} udp_ports;
 	struct nsim_dev_psample *psample;
+	u16 esw_mode;
 };
 
+int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack);
+int nsim_esw_switchdev_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack);
+
+static inline bool nsim_esw_mode_is_legacy(struct nsim_dev *nsim_dev)
+{
+	return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_LEGACY;
+}
+
+static inline bool nsim_esw_mode_is_switchdev(struct nsim_dev *nsim_dev)
+{
+	return nsim_dev->esw_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV;
+}
+
 static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev)
 {
 	return devlink_net(priv_to_devlink(nsim_dev));

From 4677efc486e1872f62d4632c50f7183f82296fa6 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:19 +0300
Subject: [PATCH 0864/3292] devlink: Introduce rate object

Allow registering rate object for devlink ports with dedicated
devlink_rate_leaf_{create|destroy}() API. Implement new netlink
DEVLINK_CMD_RATE_GET command that is used to retrieve rate object info.
Add new DEVLINK_CMD_RATE_{NEW|DEL} commands that are used for
notifications when creating/deleting leaf rate object.

Rate API is intended to be used for rate limiting of individual
devlink ports (leafs) and their aggregates (nodes).

Example:

$ devlink port show
pci/0000:03:00.0/0
pci/0000:03:00.0/1

$ devlink port function rate show
pci/0000:03:00.0/0: type leaf
pci/0000:03:00.0/1: type leaf

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  14 +++
 include/uapi/linux/devlink.h |  11 ++
 net/core/devlink.c           | 229 ++++++++++++++++++++++++++++++++++-
 3 files changed, 253 insertions(+), 1 deletion(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7c984cadfec4..2f5954d96c3e 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -34,6 +34,7 @@ struct devlink_ops;
 struct devlink {
 	struct list_head list;
 	struct list_head port_list;
+	struct list_head rate_list;
 	struct list_head sb_list;
 	struct list_head dpipe_table_list;
 	struct list_head resource_list;
@@ -133,6 +134,15 @@ struct devlink_port_attrs {
 	};
 };
 
+struct devlink_rate {
+	struct list_head list;
+	enum devlink_rate_type type;
+	struct devlink *devlink;
+	void *priv;
+
+	struct devlink_port *devlink_port;
+};
+
 struct devlink_port {
 	struct list_head list;
 	struct list_head param_list;
@@ -152,6 +162,8 @@ struct devlink_port {
 	struct delayed_work type_warn_dw;
 	struct list_head reporter_list;
 	struct mutex reporters_lock; /* Protects reporter_list */
+
+	struct devlink_rate *devlink_rate;
 };
 
 struct devlink_port_new_attrs {
@@ -1512,6 +1524,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
 void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
 				   u32 controller, u16 pf, u32 sf,
 				   bool external);
+int devlink_rate_leaf_create(struct devlink_port *port, void *priv);
+void devlink_rate_leaf_destroy(struct devlink_port *devlink_port);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
 			u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index f6008b2fa60f..0c27b45c47db 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -126,6 +126,11 @@ enum devlink_command {
 
 	DEVLINK_CMD_HEALTH_REPORTER_TEST,
 
+	DEVLINK_CMD_RATE_GET,		/* can dump */
+	DEVLINK_CMD_RATE_SET,
+	DEVLINK_CMD_RATE_NEW,
+	DEVLINK_CMD_RATE_DEL,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -206,6 +211,10 @@ enum devlink_port_flavour {
 				      */
 };
 
+enum devlink_rate_type {
+	DEVLINK_RATE_TYPE_LEAF,
+};
+
 enum devlink_param_cmode {
 	DEVLINK_PARAM_CMODE_RUNTIME,
 	DEVLINK_PARAM_CMODE_DRIVERINIT,
@@ -534,6 +543,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_RELOAD_ACTION_STATS,       /* nested */
 
 	DEVLINK_ATTR_PORT_PCI_SF_NUMBER,	/* u32 */
+
+	DEVLINK_ATTR_RATE_TYPE,			/* u16 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 69681f19388e..3b785f51156f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -190,6 +190,25 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
 	return devlink_port_get_from_attrs(devlink, info->attrs);
 }
 
+static inline bool
+devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
+{
+	return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
+}
+
+static struct devlink_rate *
+devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+	struct devlink_rate *devlink_rate;
+	struct devlink_port *devlink_port;
+
+	devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
+	if (IS_ERR(devlink_port))
+		return ERR_CAST(devlink_port);
+	devlink_rate = devlink_port->devlink_rate;
+	return devlink_rate ?: ERR_PTR(-ENODEV);
+}
+
 struct devlink_sb {
 	struct list_head list;
 	unsigned int index;
@@ -408,12 +427,13 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
 
 #define DEVLINK_NL_FLAG_NEED_PORT		BIT(0)
 #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT	BIT(1)
+#define DEVLINK_NL_FLAG_NEED_RATE		BIT(2)
 
 /* The per devlink instance lock is taken by default in the pre-doit
  * operation, yet several commands do not require this. The global
  * devlink lock is taken and protects from disruption by user-calls.
  */
-#define DEVLINK_NL_FLAG_NO_LOCK			BIT(2)
+#define DEVLINK_NL_FLAG_NO_LOCK			BIT(3)
 
 static int devlink_nl_pre_doit(const struct genl_ops *ops,
 			       struct sk_buff *skb, struct genl_info *info)
@@ -442,6 +462,15 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 		devlink_port = devlink_port_get_from_info(devlink, info);
 		if (!IS_ERR(devlink_port))
 			info->user_ptr[1] = devlink_port;
+	} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
+		struct devlink_rate *devlink_rate;
+
+		devlink_rate = devlink_rate_leaf_get_from_info(devlink, info);
+		if (IS_ERR(devlink_rate)) {
+			err = PTR_ERR(devlink_rate);
+			goto unlock;
+		}
+		info->user_ptr[1] = devlink_rate;
 	}
 	return 0;
 
@@ -749,6 +778,39 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
 	return 0;
 }
 
+static int devlink_nl_rate_fill(struct sk_buff *msg,
+				struct devlink *devlink,
+				struct devlink_rate *devlink_rate,
+				enum devlink_command cmd, u32 portid,
+				u32 seq, int flags,
+				struct netlink_ext_ack *extack)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
+		goto nla_put_failure;
+
+	if (devlink_rate_is_leaf(devlink_rate)) {
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+				devlink_rate->devlink_port->index))
+			goto nla_put_failure;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
 static bool
 devlink_port_fn_state_valid(enum devlink_port_fn_state state)
 {
@@ -920,6 +982,99 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
 				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+				enum devlink_command cmd)
+{
+	struct devlink *devlink = devlink_rate->devlink;
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_RATE_NEW &&
+		cmd != DEVLINK_CMD_RATE_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
+				   cmd, 0, 0, 0, NULL);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink_rate *devlink_rate;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err = 0;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+			enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+			u32 id = NETLINK_CB(cb->skb).portid;
+
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_rate_fill(msg, devlink,
+						   devlink_rate,
+						   cmd, id,
+						   cb->nlh->nlmsg_seq,
+						   NLM_F_MULTI, NULL);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+	if (err != -EMSGSIZE)
+		return err;
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink_rate *devlink_rate = info->user_ptr[1];
+	struct devlink *devlink = devlink_rate->devlink;
+	struct sk_buff *msg;
+	int err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
+				   DEVLINK_CMD_RATE_NEW,
+				   info->snd_portid, info->snd_seq, 0,
+				   info->extack);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
 static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
@@ -7802,6 +7957,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
 	[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
 };
 
 static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7827,6 +7983,13 @@ static const struct genl_small_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
 	},
+	{
+		.cmd = DEVLINK_CMD_RATE_GET,
+		.doit = devlink_nl_cmd_rate_get_doit,
+		.dumpit = devlink_nl_cmd_rate_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
+		/* can be retrieved by unprivileged users */
+	},
 	{
 		.cmd = DEVLINK_CMD_PORT_SPLIT,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -8202,6 +8365,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
 	__devlink_net_set(devlink, &init_net);
 	INIT_LIST_HEAD(&devlink->port_list);
+	INIT_LIST_HEAD(&devlink->rate_list);
 	INIT_LIST_HEAD(&devlink->sb_list);
 	INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
 	INIT_LIST_HEAD(&devlink->resource_list);
@@ -8304,6 +8468,7 @@ void devlink_free(struct devlink *devlink)
 	WARN_ON(!list_empty(&devlink->resource_list));
 	WARN_ON(!list_empty(&devlink->dpipe_table_list));
 	WARN_ON(!list_empty(&devlink->sb_list));
+	WARN_ON(!list_empty(&devlink->rate_list));
 	WARN_ON(!list_empty(&devlink->port_list));
 
 	xa_destroy(&devlink->snapshot_ids);
@@ -8620,6 +8785,68 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
 }
 EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
 
+/**
+ * devlink_rate_leaf_create - create devlink rate leaf
+ *
+ * @devlink_port: devlink port object to create rate object on
+ * @priv: driver private data
+ *
+ * Create devlink rate object of type leaf on provided @devlink_port.
+ * Throws call trace if @devlink_port already has a devlink rate object.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
+ */
+int
+devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
+{
+	struct devlink *devlink = devlink_port->devlink;
+	struct devlink_rate *devlink_rate;
+
+	devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
+	if (!devlink_rate)
+		return -ENOMEM;
+
+	mutex_lock(&devlink->lock);
+	WARN_ON(devlink_port->devlink_rate);
+	devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
+	devlink_rate->devlink = devlink;
+	devlink_rate->devlink_port = devlink_port;
+	devlink_rate->priv = priv;
+	list_add_tail(&devlink_rate->list, &devlink->rate_list);
+	devlink_port->devlink_rate = devlink_rate;
+	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+	mutex_unlock(&devlink->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
+
+/**
+ * devlink_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+	struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+	struct devlink *devlink = devlink_port->devlink;
+
+	if (!devlink_rate)
+		return;
+
+	mutex_lock(&devlink->lock);
+	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+	list_del(&devlink_rate->list);
+	devlink_port->devlink_rate = NULL;
+	mutex_unlock(&devlink->lock);
+	kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
+
 static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 					     char *name, size_t len)
 {

From 885dfe121b3862d0cc1de98a69f1ca37d18e3495 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:20 +0300
Subject: [PATCH 0865/3292] netdevsim: Register devlink rate leaf objects per
 VF

Register devlink rate leaf objects per VF.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index ed9ce083d0ad..356287a2c320 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -1055,11 +1055,20 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ
 		goto err_port_debugfs_exit;
 	}
 
+	if (nsim_dev_port_is_vf(nsim_dev_port)) {
+		err = devlink_rate_leaf_create(&nsim_dev_port->devlink_port,
+					       nsim_dev_port);
+		if (err)
+			goto err_nsim_destroy;
+	}
+
 	devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev);
 	list_add(&nsim_dev_port->list, &nsim_dev->port_list);
 
 	return 0;
 
+err_nsim_destroy:
+	nsim_destroy(nsim_dev_port->ns);
 err_port_debugfs_exit:
 	nsim_dev_port_debugfs_exit(nsim_dev_port);
 err_dl_port_unregister:
@@ -1074,6 +1083,8 @@ static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port)
 	struct devlink_port *devlink_port = &nsim_dev_port->devlink_port;
 
 	list_del(&nsim_dev_port->list);
+	if (nsim_dev_port_is_vf(nsim_dev_port))
+		devlink_rate_leaf_destroy(&nsim_dev_port->devlink_port);
 	devlink_port_type_clear(devlink_port);
 	nsim_destroy(nsim_dev_port->ns);
 	nsim_dev_port_debugfs_exit(nsim_dev_port);

From a27d8e352bf252eb44b04c9e628a8d759956bdd2 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:21 +0300
Subject: [PATCH 0866/3292] selftest: netdevsim: Add devlink rate test

Test verifies that all netdevsim VF ports have rate leaf object created
by default.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/netdevsim/devlink.sh          | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 40909c254365..c654be0e8300 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding
 
 ALL_TESTS="fw_flash_test params_test regions_test reload_test \
 	   netns_reload_test resource_test dev_info_test \
-	   empty_reporter_test dummy_reporter_test"
+	   empty_reporter_test dummy_reporter_test rate_test"
 NUM_NETIFS=0
 source $lib_dir/lib.sh
 
 BUS_ADDR=10
 PORT_COUNT=4
+VF_COUNT=4
 DEV_NAME=netdevsim$BUS_ADDR
 SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
 DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
@@ -507,6 +508,28 @@ dummy_reporter_test()
 	log_test "dummy reporter test"
 }
 
+rate_leafs_get()
+{
+	local handle=$1
+
+	cmd_jq "devlink port function rate show -j" \
+	       '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
+}
+
+rate_test()
+{
+	RET=0
+
+	echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs
+	devlink dev eswitch set $DL_HANDLE mode switchdev
+	local leafs=`rate_leafs_get $DL_HANDLE`
+	local num_leafs=`echo $leafs | wc -w`
+	[ "$num_leafs" == "$VF_COUNT" ]
+	check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
+
+	log_test "rate test"
+}
+
 setup_prepare()
 {
 	modprobe netdevsim

From 1897db2ec3109eb1dd07b357c95c5e03d54e41b9 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:22 +0300
Subject: [PATCH 0867/3292] devlink: Allow setting tx rate for devlink rate
 leaf objects

Implement support for DEVLINK_CMD_RATE_SET command with new attributes
DEVLINK_ATTR_RATE_TX_{SHARE|MAX} that are used to set devlink rate
shared/max tx rate values. Extend devlink ops with new callbacks
rate_leaf_tx_{share|max}_set() to allow supporting drivers to implement
rate control through devlink.

New attributes are optional. Driver implementations are allowed to
support either or both of them.

Shared rate example:

$ devlink port function rate set netdevsim/netdevsim10/0 tx_share 10mbit

$ devlink port function rate show netdevsim/netdevsim10/0
netdevsim/netdevsim10/0: type leaf tx_share 10mbit

Max rate example:

$ devlink port function rate set netdevsim/netdevsim10/0 tx_max 100mbit

$ devlink port function rate show netdevsim/netdevsim10/0
netdevsim/netdevsim10/0: type leaf tx_max 100mbit

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 10 +++++
 include/uapi/linux/devlink.h |  2 +
 net/core/devlink.c           | 86 ++++++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 2f5954d96c3e..46d553545f83 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -139,6 +139,8 @@ struct devlink_rate {
 	enum devlink_rate_type type;
 	struct devlink *devlink;
 	void *priv;
+	u64 tx_share;
+	u64 tx_max;
 
 	struct devlink_port *devlink_port;
 };
@@ -1465,6 +1467,14 @@ struct devlink_ops {
 				 struct devlink_port *port,
 				 enum devlink_port_fn_state state,
 				 struct netlink_ext_ack *extack);
+
+	/**
+	 * Rate control callbacks.
+	 */
+	int (*rate_leaf_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
+				      u64 tx_share, struct netlink_ext_ack *extack);
+	int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
+				    u64 tx_max, struct netlink_ext_ack *extack);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 0c27b45c47db..ae94cd2a1078 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -545,6 +545,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_PCI_SF_NUMBER,	/* u32 */
 
 	DEVLINK_ATTR_RATE_TYPE,			/* u16 */
+	DEVLINK_ATTR_RATE_TX_SHARE,		/* u64 */
+	DEVLINK_ATTR_RATE_TX_MAX,		/* u64 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3b785f51156f..37839fd5ca73 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -803,6 +803,14 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
 			goto nla_put_failure;
 	}
 
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+			      devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
+			      devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -1495,6 +1503,76 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
 	return devlink->ops->port_del(devlink, port_index, extack);
 }
 
+static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
+			       const struct devlink_ops *ops,
+			       struct genl_info *info)
+{
+	struct nlattr **attrs = info->attrs;
+	u64 rate;
+	int err;
+
+	if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
+		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
+		err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
+						  rate, info->extack);
+		if (err)
+			return err;
+		devlink_rate->tx_share = rate;
+	}
+
+	if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
+		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
+		err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
+						rate, info->extack);
+		if (err)
+			return err;
+		devlink_rate->tx_max = rate;
+	}
+
+	return 0;
+}
+
+static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
+					   struct genl_info *info,
+					   enum devlink_rate_type type)
+{
+	struct nlattr **attrs = info->attrs;
+
+	if (type == DEVLINK_RATE_TYPE_LEAF) {
+		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
+			NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
+			NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
+			return false;
+		}
+	} else {
+		WARN_ON("Unknown type of rate object");
+		return false;
+	}
+
+	return true;
+}
+
+static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink_rate *devlink_rate = info->user_ptr[1];
+	struct devlink *devlink = devlink_rate->devlink;
+	const struct devlink_ops *ops = devlink->ops;
+	int err;
+
+	if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
+		return -EOPNOTSUPP;
+
+	err = devlink_nl_rate_set(devlink_rate, ops, info);
+
+	if (!err)
+		devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+	return err;
+}
+
 static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
 			      struct devlink_sb *devlink_sb,
 			      enum devlink_command cmd, u32 portid,
@@ -7958,6 +8036,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
+	[DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
+	[DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
 };
 
 static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7990,6 +8070,12 @@ static const struct genl_small_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = DEVLINK_CMD_RATE_SET,
+		.doit = devlink_nl_cmd_rate_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
+	},
 	{
 		.cmd = DEVLINK_CMD_PORT_SPLIT,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,

From 605c4f8f199b8374cf01624822aa0b5f2c09d1c7 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:23 +0300
Subject: [PATCH 0868/3292] netdevsim: Implement devlink rate leafs tx rate
 support

Implement new devlink ops that allow shared and max tx rate control for
devlink port rate objects (leafs) through devlink API.

Expose rate values of VF ports to netdevsim debugfs.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c | 78 +++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 3 deletions(-)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 356287a2c320..5be6f7e86711 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -276,17 +276,26 @@ static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
 static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
 				      struct nsim_dev_port *nsim_dev_port)
 {
+	struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev;
+	unsigned int port_index = nsim_dev_port->port_index;
 	char port_ddir_name[16];
 	char dev_link_name[32];
 
-	sprintf(port_ddir_name, "%u", nsim_dev_port->port_index);
+	sprintf(port_ddir_name, "%u", port_index);
 	nsim_dev_port->ddir = debugfs_create_dir(port_ddir_name,
 						 nsim_dev->ports_ddir);
 	if (IS_ERR(nsim_dev_port->ddir))
 		return PTR_ERR(nsim_dev_port->ddir);
 
-	sprintf(dev_link_name, "../../../" DRV_NAME "%u",
-		nsim_dev->nsim_bus_dev->dev.id);
+	sprintf(dev_link_name, "../../../" DRV_NAME "%u", nsim_bus_dev->dev.id);
+	if (nsim_dev_port_is_vf(nsim_dev_port)) {
+		unsigned int vf_id = nsim_dev_port_index_to_vf_index(port_index);
+
+		debugfs_create_u16("tx_share", 0400, nsim_dev_port->ddir,
+				   &nsim_bus_dev->vfconfigs[vf_id].min_tx_rate);
+		debugfs_create_u16("tx_max", 0400, nsim_dev_port->ddir,
+				   &nsim_bus_dev->vfconfigs[vf_id].max_tx_rate);
+	}
 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
 
 	return 0;
@@ -990,6 +999,67 @@ nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink,
 	return 0;
 }
 
+#define NSIM_LINK_SPEED_MAX     5000 /* Mbps */
+#define NSIM_LINK_SPEED_UNIT    125000 /* 1 Mbps given in bytes/sec to avoid
+					* u64 overflow during conversion from
+					* bytes to bits.
+					*/
+
+static int nsim_rate_bytes_to_units(char *name, u64 *rate, struct netlink_ext_ack *extack)
+{
+	u64 val;
+	u32 rem;
+
+	val = div_u64_rem(*rate, NSIM_LINK_SPEED_UNIT, &rem);
+	if (rem) {
+		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
+		       name, *rate);
+		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps.");
+		return -EINVAL;
+	}
+
+	if (val > NSIM_LINK_SPEED_MAX) {
+		pr_err("%s rate value %lluMbps exceed link maximum speed 5000Mbps.\n",
+		       name, val);
+		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed 5000Mbps.");
+		return -EINVAL;
+	}
+	*rate = val;
+	return 0;
+}
+
+static int nsim_leaf_tx_share_set(struct devlink_rate *devlink_rate, void *priv,
+				  u64 tx_share, struct netlink_ext_ack *extack)
+{
+	struct nsim_dev_port *nsim_dev_port = priv;
+	struct nsim_bus_dev *nsim_bus_dev = nsim_dev_port->ns->nsim_bus_dev;
+	int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index);
+	int err;
+
+	err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack);
+	if (err)
+		return err;
+
+	nsim_bus_dev->vfconfigs[vf_id].min_tx_rate = tx_share;
+	return 0;
+}
+
+static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv,
+				u64 tx_max, struct netlink_ext_ack *extack)
+{
+	struct nsim_dev_port *nsim_dev_port = priv;
+	struct nsim_bus_dev *nsim_bus_dev = nsim_dev_port->ns->nsim_bus_dev;
+	int vf_id = nsim_dev_port_index_to_vf_index(nsim_dev_port->port_index);
+	int err;
+
+	err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack);
+	if (err)
+		return err;
+
+	nsim_bus_dev->vfconfigs[vf_id].max_tx_rate = tx_max;
+	return 0;
+}
+
 static const struct devlink_ops nsim_dev_devlink_ops = {
 	.eswitch_mode_set = nsim_devlink_eswitch_mode_set,
 	.eswitch_mode_get = nsim_devlink_eswitch_mode_get,
@@ -1005,6 +1075,8 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 	.trap_group_set = nsim_dev_devlink_trap_group_set,
 	.trap_policer_set = nsim_dev_devlink_trap_policer_set,
 	.trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get,
+	.rate_leaf_tx_share_set = nsim_leaf_tx_share_set,
+	.rate_leaf_tx_max_set = nsim_leaf_tx_max_set,
 };
 
 #define NSIM_DEV_MAX_MACS_DEFAULT 32

From 31f0723336063f20ce81cc16ca4775979ff0a26b Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:24 +0300
Subject: [PATCH 0869/3292] selftest: netdevsim: Add devlink port shared/max tx
 rate test

Test verifies that netdevsim VFs can set and retrieve shared/max tx
rate through new devlink API.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/netdevsim/devlink.sh          | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index c654be0e8300..05dcefc054d1 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -516,6 +516,45 @@ rate_leafs_get()
 	       '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
 }
 
+rate_attr_set()
+{
+	local handle=$1
+	local name=$2
+	local value=$3
+	local units=$4
+
+	devlink port function rate set $handle $name $value$units
+}
+
+rate_attr_get()
+{
+	local handle=$1
+	local name=$2
+
+	cmd_jq "devlink port function rate show $handle -j" '.[][].'$name
+}
+
+rate_attr_tx_rate_check()
+{
+	local handle=$1
+	local name=$2
+	local rate=$3
+	local debug_file=$4
+
+	rate_attr_set $handle $name $rate mbit
+	check_err $? "Failed to set $name value"
+
+	local debug_value=$(cat $debug_file)
+	check_err $? "Failed to read $name value from debugfs"
+	[ "$debug_value" == "$rate" ]
+	check_err $? "Unexpected $name debug value $debug_value != $rate"
+
+	local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 ))
+	check_err $? "Failed to get $name attr value"
+	[ "$api_value" == "$rate" ]
+	check_err $? "Unexpected $name attr value $api_value != $rate"
+}
+
 rate_test()
 {
 	RET=0
@@ -527,6 +566,22 @@ rate_test()
 	[ "$num_leafs" == "$VF_COUNT" ]
 	check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
 
+	rate=10
+	for r_obj in $leafs
+	do
+		rate_attr_tx_rate_check $r_obj tx_share $rate \
+			$DEBUGFS_DIR/ports/${r_obj##*/}/tx_share
+		rate=$(($rate+10))
+	done
+
+	rate=100
+	for r_obj in $leafs
+	do
+		rate_attr_tx_rate_check $r_obj tx_max $rate \
+			$DEBUGFS_DIR/ports/${r_obj##*/}/tx_max
+		rate=$(($rate+100))
+	done
+
 	log_test "rate test"
 }
 

From a8ecb93ef03de4c59fb6289f99bc9616a852c917 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:25 +0300
Subject: [PATCH 0870/3292] devlink: Introduce rate nodes

Implement support for DEVLINK_CMD_RATE_{NEW|DEL} commands that are used
to create and delete devlink rate nodes. Add new attribute
DEVLINK_ATTR_RATE_NODE_NAME that specify node name string. The node name
is an alphanumeric identifier. No valid node name can be a devlink port
index, eg. decimal number. Extend devlink ops with new callbacks
rate_node_{new|del}() and rate_node_tx_{share|max}_set() to allow
supporting drivers to implement ports rate grouping and setting tx rate
of rate nodes through devlink.
Expose devlink_rate_nodes_destroy() function to allow vendor driver do
proper cleanup of internally allocated resources for the nodes if the
driver goes down or due to any other reasons which requires nodes to be
destroyed.
Disallow moving device from switchdev to legacy mode if any node exists
on that device. User must explicitly delete nodes before switching mode.

Example:

$ devlink port function rate add netdevsim/netdevsim10/group1

$ devlink port function rate set netdevsim/netdevsim10/group1 \
        tx_share 10mbit tx_max 100mbit

Add + set command can be combined:

$ devlink port function rate add netdevsim/netdevsim10/group1 \
        tx_share 10mbit tx_max 100mbit

$ devlink port function rate show netdevsim/netdevsim10/group1
netdevsim/netdevsim10/group1: type node tx_share 10mbit tx_max 100mbit

$ devlink port function rate del netdevsim/netdevsim10/group1

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  14 ++-
 include/uapi/linux/devlink.h |   3 +
 net/core/devlink.c           | 238 +++++++++++++++++++++++++++++++++--
 3 files changed, 247 insertions(+), 8 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 46d553545f83..13162b579124 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -142,7 +142,10 @@ struct devlink_rate {
 	u64 tx_share;
 	u64 tx_max;
 
-	struct devlink_port *devlink_port;
+	union {
+		struct devlink_port *devlink_port;
+		char *name;
+	};
 };
 
 struct devlink_port {
@@ -1475,6 +1478,14 @@ struct devlink_ops {
 				      u64 tx_share, struct netlink_ext_ack *extack);
 	int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
 				    u64 tx_max, struct netlink_ext_ack *extack);
+	int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
+				      u64 tx_share, struct netlink_ext_ack *extack);
+	int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
+				    u64 tx_max, struct netlink_ext_ack *extack);
+	int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
+			     struct netlink_ext_ack *extack);
+	int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
+			     struct netlink_ext_ack *extack);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
@@ -1536,6 +1547,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
 				   bool external);
 int devlink_rate_leaf_create(struct devlink_port *port, void *priv);
 void devlink_rate_leaf_destroy(struct devlink_port *devlink_port);
+void devlink_rate_nodes_destroy(struct devlink *devlink);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
 			u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ae94cd2a1078..7e15853b77fe 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -213,6 +213,7 @@ enum devlink_port_flavour {
 
 enum devlink_rate_type {
 	DEVLINK_RATE_TYPE_LEAF,
+	DEVLINK_RATE_TYPE_NODE,
 };
 
 enum devlink_param_cmode {
@@ -547,6 +548,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_RATE_TYPE,			/* u16 */
 	DEVLINK_ATTR_RATE_TX_SHARE,		/* u64 */
 	DEVLINK_ATTR_RATE_TX_MAX,		/* u64 */
+	DEVLINK_ATTR_RATE_NODE_NAME,		/* string */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 37839fd5ca73..589d750b70e4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -196,6 +196,12 @@ devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
 	return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
 }
 
+static inline bool
+devlink_rate_is_node(struct devlink_rate *devlink_rate)
+{
+	return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
+}
+
 static struct devlink_rate *
 devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
 {
@@ -209,6 +215,55 @@ devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
 	return devlink_rate ?: ERR_PTR(-ENODEV);
 }
 
+static struct devlink_rate *
+devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
+{
+	static struct devlink_rate *devlink_rate;
+
+	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+		if (devlink_rate_is_node(devlink_rate) &&
+		    !strcmp(node_name, devlink_rate->name))
+			return devlink_rate;
+	}
+	return ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+	const char *rate_node_name;
+	size_t len;
+
+	if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+		return ERR_PTR(-EINVAL);
+	rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
+	len = strlen(rate_node_name);
+	/* Name cannot be empty or decimal number */
+	if (!len || strspn(rate_node_name, "0123456789") == len)
+		return ERR_PTR(-EINVAL);
+
+	return devlink_rate_node_get_by_name(devlink, rate_node_name);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+	return devlink_rate_node_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+	struct nlattr **attrs = info->attrs;
+
+	if (attrs[DEVLINK_ATTR_PORT_INDEX])
+		return devlink_rate_leaf_get_from_info(devlink, info);
+	else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+		return devlink_rate_node_get_from_info(devlink, info);
+	else
+		return ERR_PTR(-EINVAL);
+}
+
 struct devlink_sb {
 	struct list_head list;
 	unsigned int index;
@@ -428,12 +483,13 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
 #define DEVLINK_NL_FLAG_NEED_PORT		BIT(0)
 #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT	BIT(1)
 #define DEVLINK_NL_FLAG_NEED_RATE		BIT(2)
+#define DEVLINK_NL_FLAG_NEED_RATE_NODE		BIT(3)
 
 /* The per devlink instance lock is taken by default in the pre-doit
  * operation, yet several commands do not require this. The global
  * devlink lock is taken and protects from disruption by user-calls.
  */
-#define DEVLINK_NL_FLAG_NO_LOCK			BIT(3)
+#define DEVLINK_NL_FLAG_NO_LOCK			BIT(4)
 
 static int devlink_nl_pre_doit(const struct genl_ops *ops,
 			       struct sk_buff *skb, struct genl_info *info)
@@ -465,12 +521,21 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 	} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
 		struct devlink_rate *devlink_rate;
 
-		devlink_rate = devlink_rate_leaf_get_from_info(devlink, info);
+		devlink_rate = devlink_rate_get_from_info(devlink, info);
 		if (IS_ERR(devlink_rate)) {
 			err = PTR_ERR(devlink_rate);
 			goto unlock;
 		}
 		info->user_ptr[1] = devlink_rate;
+	} else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
+		struct devlink_rate *rate_node;
+
+		rate_node = devlink_rate_node_get_from_info(devlink, info);
+		if (IS_ERR(rate_node)) {
+			err = PTR_ERR(rate_node);
+			goto unlock;
+		}
+		info->user_ptr[1] = rate_node;
 	}
 	return 0;
 
@@ -801,6 +866,10 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
 		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
 				devlink_rate->devlink_port->index))
 			goto nla_put_failure;
+	} else if (devlink_rate_is_node(devlink_rate)) {
+		if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
+				   devlink_rate->name))
+			goto nla_put_failure;
 	}
 
 	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
@@ -1508,13 +1577,17 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
 			       struct genl_info *info)
 {
 	struct nlattr **attrs = info->attrs;
+	int err = -EOPNOTSUPP;
 	u64 rate;
-	int err;
 
 	if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
 		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
-		err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
-						  rate, info->extack);
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
+							  rate, info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
+							  rate, info->extack);
 		if (err)
 			return err;
 		devlink_rate->tx_share = rate;
@@ -1522,8 +1595,12 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
 
 	if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
 		rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
-		err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
-						rate, info->extack);
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
+							rate, info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
+							rate, info->extack);
 		if (err)
 			return err;
 		devlink_rate->tx_max = rate;
@@ -1547,6 +1624,15 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
 			NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
 			return false;
 		}
+	} else if (type == DEVLINK_RATE_TYPE_NODE) {
+		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
+			NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes");
+			return false;
+		}
+		if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
+			NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes");
+			return false;
+		}
 	} else {
 		WARN_ON("Unknown type of rate object");
 		return false;
@@ -1573,6 +1659,78 @@ static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
 	return err;
 }
 
+static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_rate *rate_node;
+	const struct devlink_ops *ops;
+	int err;
+
+	ops = devlink->ops;
+	if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
+		return -EOPNOTSUPP;
+
+	rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
+	if (!IS_ERR(rate_node))
+		return -EEXIST;
+	else if (rate_node == ERR_PTR(-EINVAL))
+		return -EINVAL;
+
+	rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+	if (!rate_node)
+		return -ENOMEM;
+
+	rate_node->devlink = devlink;
+	rate_node->type = DEVLINK_RATE_TYPE_NODE;
+	rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
+	if (!rate_node->name) {
+		err = -ENOMEM;
+		goto err_strdup;
+	}
+
+	err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
+	if (err)
+		goto err_node_new;
+
+	err = devlink_nl_rate_set(rate_node, ops, info);
+	if (err)
+		goto err_rate_set;
+
+	list_add(&rate_node->list, &devlink->rate_list);
+	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+	return 0;
+
+err_rate_set:
+	ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+err_node_new:
+	kfree(rate_node->name);
+err_strdup:
+	kfree(rate_node);
+	return err;
+}
+
+static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink_rate *rate_node = info->user_ptr[1];
+	struct devlink *devlink = rate_node->devlink;
+	const struct devlink_ops *ops = devlink->ops;
+	int err;
+
+	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+	err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+	list_del(&rate_node->list);
+	kfree(rate_node->name);
+	kfree(rate_node);
+	return err;
+}
+
 static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
 			      struct devlink_sb *devlink_sb,
 			      enum devlink_command cmd, u32 portid,
@@ -2441,6 +2599,30 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 }
 
+static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+				    struct netlink_ext_ack *extack)
+{
+	struct devlink_rate *devlink_rate;
+	u16 old_mode;
+	int err;
+
+	if (!devlink->ops->eswitch_mode_get)
+		return -EOPNOTSUPP;
+	err = devlink->ops->eswitch_mode_get(devlink, &old_mode);
+	if (err)
+		return err;
+
+	if (old_mode == mode)
+		return 0;
+
+	list_for_each_entry(devlink_rate, &devlink->rate_list, list)
+		if (devlink_rate_is_node(devlink_rate)) {
+			NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
+			return -EBUSY;
+		}
+	return 0;
+}
+
 static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
 					   struct genl_info *info)
 {
@@ -2455,6 +2637,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
 		if (!ops->eswitch_mode_set)
 			return -EOPNOTSUPP;
 		mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+		err = devlink_rate_nodes_check(devlink, mode, info->extack);
+		if (err)
+			return err;
 		err = ops->eswitch_mode_set(devlink, mode, info->extack);
 		if (err)
 			return err;
@@ -8038,6 +8223,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
 	[DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
 	[DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
+	[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_small_ops devlink_nl_ops[] = {
@@ -8076,6 +8262,17 @@ static const struct genl_small_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
 	},
+	{
+		.cmd = DEVLINK_CMD_RATE_NEW,
+		.doit = devlink_nl_cmd_rate_new_doit,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = DEVLINK_CMD_RATE_DEL,
+		.doit = devlink_nl_cmd_rate_del_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE,
+	},
 	{
 		.cmd = DEVLINK_CMD_PORT_SPLIT,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -8933,6 +9130,33 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
 }
 EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
 
+/**
+ * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
+ *
+ * @devlink: devlink instance
+ *
+ * Destroy all rate nodes on specified device
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_nodes_destroy(struct devlink *devlink)
+{
+	static struct devlink_rate *devlink_rate, *tmp;
+	const struct devlink_ops *ops = devlink->ops;
+
+	mutex_lock(&devlink->lock);
+	list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
+		if (devlink_rate_is_node(devlink_rate)) {
+			ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
+			list_del(&devlink_rate->list);
+			kfree(devlink_rate->name);
+			kfree(devlink_rate);
+		}
+	}
+	mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
+
 static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 					     char *name, size_t len)
 {

From 885226f5680e099ec54564332ea85412372b4958 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:26 +0300
Subject: [PATCH 0871/3292] netdevsim: Implement support for devlink rate nodes

Implement new devlink ops that allow creation, deletion and setting of
shared/max tx rate of devlink rate nodes through devlink API.
Expose rate node and it's tx rates to netdevsim debugfs.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c       | 80 +++++++++++++++++++++++++++++++
 drivers/net/netdevsim/netdevsim.h |  1 +
 2 files changed, 81 insertions(+)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 5be6f7e86711..9f01b6c04cfd 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -263,12 +263,16 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 						nsim_dev->ddir,
 						nsim_dev->nsim_bus_dev,
 						&nsim_dev_max_vfs_fops);
+	nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir);
+	if (IS_ERR(nsim_dev->nodes_ddir))
+		return PTR_ERR(nsim_dev->nodes_ddir);
 	nsim_udp_tunnels_debugfs_create(nsim_dev);
 	return 0;
 }
 
 static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
 {
+	debugfs_remove_recursive(nsim_dev->nodes_ddir);
 	debugfs_remove_recursive(nsim_dev->ports_ddir);
 	debugfs_remove_recursive(nsim_dev->ddir);
 }
@@ -451,8 +455,10 @@ static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
 static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port);
 int nsim_esw_legacy_enable(struct nsim_dev *nsim_dev, struct netlink_ext_ack *extack)
 {
+	struct devlink *devlink = priv_to_devlink(nsim_dev);
 	struct nsim_dev_port *nsim_dev_port, *tmp;
 
+	devlink_rate_nodes_destroy(devlink);
 	mutex_lock(&nsim_dev->port_list_lock);
 	list_for_each_entry_safe(nsim_dev_port, tmp, &nsim_dev->port_list, list)
 		if (nsim_dev_port_is_vf(nsim_dev_port))
@@ -1060,6 +1066,76 @@ static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv,
 	return 0;
 }
 
+struct nsim_rate_node {
+	struct dentry *ddir;
+	u16 tx_share;
+	u16 tx_max;
+};
+
+static int nsim_node_tx_share_set(struct devlink_rate *devlink_rate, void *priv,
+				  u64 tx_share, struct netlink_ext_ack *extack)
+{
+	struct nsim_rate_node *nsim_node = priv;
+	int err;
+
+	err = nsim_rate_bytes_to_units("tx_share", &tx_share, extack);
+	if (err)
+		return err;
+
+	nsim_node->tx_share = tx_share;
+	return 0;
+}
+
+static int nsim_node_tx_max_set(struct devlink_rate *devlink_rate, void *priv,
+				u64 tx_max, struct netlink_ext_ack *extack)
+{
+	struct nsim_rate_node *nsim_node = priv;
+	int err;
+
+	err = nsim_rate_bytes_to_units("tx_max", &tx_max, extack);
+	if (err)
+		return err;
+
+	nsim_node->tx_max = tx_max;
+	return 0;
+}
+
+static int nsim_rate_node_new(struct devlink_rate *node, void **priv,
+			      struct netlink_ext_ack *extack)
+{
+	struct nsim_dev *nsim_dev = devlink_priv(node->devlink);
+	struct nsim_rate_node *nsim_node;
+
+	if (!nsim_esw_mode_is_switchdev(nsim_dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Node creation allowed only in switchdev mode.");
+		return -EOPNOTSUPP;
+	}
+
+	nsim_node = kzalloc(sizeof(*nsim_node), GFP_KERNEL);
+	if (!nsim_node)
+		return -ENOMEM;
+
+	nsim_node->ddir = debugfs_create_dir(node->name, nsim_dev->nodes_ddir);
+	if (!nsim_node->ddir) {
+		kfree(nsim_node);
+		return -ENOMEM;
+	}
+	debugfs_create_u16("tx_share", 0400, nsim_node->ddir, &nsim_node->tx_share);
+	debugfs_create_u16("tx_max", 0400, nsim_node->ddir, &nsim_node->tx_max);
+	*priv = nsim_node;
+	return 0;
+}
+
+static int nsim_rate_node_del(struct devlink_rate *node, void *priv,
+			      struct netlink_ext_ack *extack)
+{
+	struct nsim_rate_node *nsim_node = priv;
+
+	debugfs_remove_recursive(nsim_node->ddir);
+	kfree(nsim_node);
+	return 0;
+}
+
 static const struct devlink_ops nsim_dev_devlink_ops = {
 	.eswitch_mode_set = nsim_devlink_eswitch_mode_set,
 	.eswitch_mode_get = nsim_devlink_eswitch_mode_get,
@@ -1077,6 +1153,10 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 	.trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get,
 	.rate_leaf_tx_share_set = nsim_leaf_tx_share_set,
 	.rate_leaf_tx_max_set = nsim_leaf_tx_max_set,
+	.rate_node_tx_share_set = nsim_node_tx_share_set,
+	.rate_node_tx_max_set = nsim_node_tx_max_set,
+	.rate_node_new = nsim_rate_node_new,
+	.rate_node_del = nsim_rate_node_del,
 };
 
 #define NSIM_DEV_MAX_MACS_DEFAULT 32
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 13a0042124f7..d62a1386f9f1 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -222,6 +222,7 @@ struct nsim_dev {
 	struct dentry *ports_ddir;
 	struct dentry *take_snapshot;
 	struct dentry *max_vfs;
+	struct dentry *nodes_ddir;
 	struct bpf_offload_dev *bpf_dev;
 	bool bpf_bind_accept;
 	bool bpf_bind_verifier_accept;

From 413ee943d788610b0675cb343fac5a23d7877613 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:27 +0300
Subject: [PATCH 0872/3292] selftest: netdevsim: Add devlink rate nodes test

Test verifies that it is possible to create, delete and set min/max tx
rate of devlink rate node on netdevsim VF.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/netdevsim/devlink.sh          | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 05dcefc054d1..301d92069f99 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -516,6 +516,14 @@ rate_leafs_get()
 	       '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
 }
 
+rate_nodes_get()
+{
+	local handle=$1
+
+	cmd_jq "devlink port function rate show -j" \
+		'.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))'
+}
+
 rate_attr_set()
 {
 	local handle=$1
@@ -555,6 +563,20 @@ rate_attr_tx_rate_check()
 	check_err $? "Unexpected $name attr value $api_value != $rate"
 }
 
+rate_node_add()
+{
+	local handle=$1
+
+	devlink port function rate add $handle
+}
+
+rate_node_del()
+{
+	local handle=$1
+
+	devlink port function rate del $handle
+}
+
 rate_test()
 {
 	RET=0
@@ -582,6 +604,29 @@ rate_test()
 		rate=$(($rate+100))
 	done
 
+	local node1_name='group1'
+	local node1="$DL_HANDLE/$node1_name"
+	rate_node_add "$node1"
+	check_err $? "Failed to add node $node1"
+
+	local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+	[ $num_nodes == 1 ]
+	check_err $? "Expected 1 rate node in output but got $num_nodes"
+
+	local node_tx_share=10
+	rate_attr_tx_rate_check $node1 tx_share $node_tx_share \
+		$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share
+
+	local node_tx_max=100
+	rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
+		$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+	rate_node_del "$node1"
+	check_err $? "Failed to delete node $node1"
+	local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+	[ $num_nodes == 0 ]
+	check_err $? "Expected 0 rate node but got $num_nodes"
+
 	log_test "rate test"
 }
 

From d7555984507822458b32a6405881038241d140be Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:28 +0300
Subject: [PATCH 0873/3292] devlink: Allow setting parent node of rate objects

Refactor DEVLINK_CMD_RATE_{GET|SET} command handlers to support setting
a node as a parent for another rate object (leaf or node) by means of
new attribute DEVLINK_ATTR_RATE_PARENT_NODE_NAME. Extend devlink ops
with new callbacks rate_{leaf|node}_parent_set() to set node as a parent
for rate object to allow supporting drivers to implement rate grouping
through devlink. Driver implementations are allowed to support leafs
or node children only. Invoking callback with NULL as parent should be
threated by the driver as unset parent action.
Extend rate object struct with reference counter to disallow deleting a
node with any child pointing to it. User should unset parent for the
child explicitly.

Example:

$ devlink port function rate add netdevsim/netdevsim10/group1

$ devlink port function rate add netdevsim/netdevsim10/group2

$ devlink port function rate set netdevsim/netdevsim10/group1 parent group2

$ devlink port function rate show netdevsim/netdevsim10/group1
netdevsim/netdevsim10/group1: type node parent group2

$ devlink port function rate set netdevsim/netdevsim10/group1 noparent

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  14 +++-
 include/uapi/linux/devlink.h |   1 +
 net/core/devlink.c           | 125 ++++++++++++++++++++++++++++++++++-
 3 files changed, 137 insertions(+), 3 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 13162b579124..eb045f1b5d1d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -142,9 +142,13 @@ struct devlink_rate {
 	u64 tx_share;
 	u64 tx_max;
 
+	struct devlink_rate *parent;
 	union {
 		struct devlink_port *devlink_port;
-		char *name;
+		struct {
+			char *name;
+			refcount_t refcnt;
+		};
 	};
 };
 
@@ -1486,6 +1490,14 @@ struct devlink_ops {
 			     struct netlink_ext_ack *extack);
 	int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
 			     struct netlink_ext_ack *extack);
+	int (*rate_leaf_parent_set)(struct devlink_rate *child,
+				    struct devlink_rate *parent,
+				    void *priv_child, void *priv_parent,
+				    struct netlink_ext_ack *extack);
+	int (*rate_node_parent_set)(struct devlink_rate *child,
+				    struct devlink_rate *parent,
+				    void *priv_child, void *priv_parent,
+				    struct netlink_ext_ack *extack);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 7e15853b77fe..32f53a0069d6 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -549,6 +549,7 @@ enum devlink_attr {
 	DEVLINK_ATTR_RATE_TX_SHARE,		/* u64 */
 	DEVLINK_ATTR_RATE_TX_MAX,		/* u64 */
 	DEVLINK_ATTR_RATE_NODE_NAME,		/* string */
+	DEVLINK_ATTR_RATE_PARENT_NODE_NAME,	/* string */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 589d750b70e4..464f56408247 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -880,6 +880,11 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
 			      devlink_rate->tx_max, DEVLINK_ATTR_PAD))
 		goto nla_put_failure;
 
+	if (devlink_rate->parent)
+		if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
+				   devlink_rate->parent->name))
+			goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -1152,6 +1157,18 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 }
 
+static bool
+devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
+			    struct devlink_rate *parent)
+{
+	while (parent) {
+		if (parent == devlink_rate)
+			return true;
+		parent = parent->parent;
+	}
+	return false;
+}
+
 static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
@@ -1572,11 +1589,75 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
 	return devlink->ops->port_del(devlink, port_index, extack);
 }
 
+static int
+devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
+				struct genl_info *info,
+				struct nlattr *nla_parent)
+{
+	struct devlink *devlink = devlink_rate->devlink;
+	const char *parent_name = nla_data(nla_parent);
+	const struct devlink_ops *ops = devlink->ops;
+	size_t len = strlen(parent_name);
+	struct devlink_rate *parent;
+	int err = -EOPNOTSUPP;
+
+	parent = devlink_rate->parent;
+	if (parent && len) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent.");
+		return -EBUSY;
+	} else if (parent && !len) {
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_parent_set(devlink_rate, NULL,
+							devlink_rate->priv, NULL,
+							info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_parent_set(devlink_rate, NULL,
+							devlink_rate->priv, NULL,
+							info->extack);
+		if (err)
+			return err;
+
+		refcount_dec(&parent->refcnt);
+		devlink_rate->parent = NULL;
+	} else if (!parent && len) {
+		parent = devlink_rate_node_get_by_name(devlink, parent_name);
+		if (IS_ERR(parent))
+			return -ENODEV;
+
+		if (parent == devlink_rate) {
+			NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed");
+			return -EINVAL;
+		}
+
+		if (devlink_rate_is_node(devlink_rate) &&
+		    devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
+			NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node.");
+			return -EEXIST;
+		}
+
+		if (devlink_rate_is_leaf(devlink_rate))
+			err = ops->rate_leaf_parent_set(devlink_rate, parent,
+							devlink_rate->priv, parent->priv,
+							info->extack);
+		else if (devlink_rate_is_node(devlink_rate))
+			err = ops->rate_node_parent_set(devlink_rate, parent,
+							devlink_rate->priv, parent->priv,
+							info->extack);
+		if (err)
+			return err;
+
+		refcount_inc(&parent->refcnt);
+		devlink_rate->parent = parent;
+	}
+
+	return 0;
+}
+
 static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
 			       const struct devlink_ops *ops,
 			       struct genl_info *info)
 {
-	struct nlattr **attrs = info->attrs;
+	struct nlattr *nla_parent, **attrs = info->attrs;
 	int err = -EOPNOTSUPP;
 	u64 rate;
 
@@ -1606,6 +1687,14 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
 		devlink_rate->tx_max = rate;
 	}
 
+	nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
+	if (nla_parent) {
+		err = devlink_nl_rate_parent_node_set(devlink_rate, info,
+						      nla_parent);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1624,6 +1713,11 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
 			NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
 			return false;
 		}
+		if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+		    !ops->rate_leaf_parent_set) {
+			NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs");
+			return false;
+		}
 	} else if (type == DEVLINK_RATE_TYPE_NODE) {
 		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
 			NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes");
@@ -1633,6 +1727,11 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
 			NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes");
 			return false;
 		}
+		if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+		    !ops->rate_node_parent_set) {
+			NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes");
+			return false;
+		}
 	} else {
 		WARN_ON("Unknown type of rate object");
 		return false;
@@ -1702,6 +1801,7 @@ static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
 	if (err)
 		goto err_rate_set;
 
+	refcount_set(&rate_node->refcnt, 1);
 	list_add(&rate_node->list, &devlink->rate_list);
 	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
 	return 0;
@@ -1723,8 +1823,15 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
 	const struct devlink_ops *ops = devlink->ops;
 	int err;
 
+	if (refcount_read(&rate_node->refcnt) > 1) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node.");
+		return -EBUSY;
+	}
+
 	devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
 	err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+	if (rate_node->parent)
+		refcount_dec(&rate_node->parent->refcnt);
 	list_del(&rate_node->list);
 	kfree(rate_node->name);
 	kfree(rate_node);
@@ -8224,6 +8331,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
 	[DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
 	[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_small_ops devlink_nl_ops[] = {
@@ -9135,7 +9243,8 @@ EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
  *
  * @devlink: devlink instance
  *
- * Destroy all rate nodes on specified device
+ * Unset parent for all rate objects and destroy all rate nodes
+ * on specified device.
  *
  * Context: Takes and release devlink->lock <mutex>.
  */
@@ -9145,6 +9254,18 @@ void devlink_rate_nodes_destroy(struct devlink *devlink)
 	const struct devlink_ops *ops = devlink->ops;
 
 	mutex_lock(&devlink->lock);
+	list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+		if (!devlink_rate->parent)
+			continue;
+
+		refcount_dec(&devlink_rate->parent->refcnt);
+		if (devlink_rate_is_leaf(devlink_rate))
+			ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
+						  NULL, NULL);
+		else if (devlink_rate_is_node(devlink_rate))
+			ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
+						  NULL, NULL);
+	}
 	list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
 		if (devlink_rate_is_node(devlink_rate)) {
 			ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);

From f3d101b485ca2c831088d72878fe6e7416676cb8 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:29 +0300
Subject: [PATCH 0874/3292] netdevsim: Allow setting parent node of rate
 objects

Implement new devlink ops that allow setting rate node as a parent for
devlink port (leaf) or another devlink node through devlink API.
Expose parent names to netdevsim debugfs in read only mode.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c       | 91 +++++++++++++++++++++++++++++--
 drivers/net/netdevsim/netdevsim.h |  2 +
 2 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 9f01b6c04cfd..527b019ae0b2 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -222,6 +222,7 @@ static const struct file_operations nsim_dev_max_vfs_fops = {
 static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 {
 	char dev_ddir_name[sizeof(DRV_NAME) + 10];
+	int err;
 
 	sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id);
 	nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir);
@@ -264,10 +265,17 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 						nsim_dev->nsim_bus_dev,
 						&nsim_dev_max_vfs_fops);
 	nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir);
-	if (IS_ERR(nsim_dev->nodes_ddir))
-		return PTR_ERR(nsim_dev->nodes_ddir);
+	if (IS_ERR(nsim_dev->nodes_ddir)) {
+		err = PTR_ERR(nsim_dev->nodes_ddir);
+		goto err_out;
+	}
 	nsim_udp_tunnels_debugfs_create(nsim_dev);
 	return 0;
+
+err_out:
+	debugfs_remove_recursive(nsim_dev->ports_ddir);
+	debugfs_remove_recursive(nsim_dev->ddir);
+	return err;
 }
 
 static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
@@ -277,6 +285,27 @@ static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
 	debugfs_remove_recursive(nsim_dev->ddir);
 }
 
+static ssize_t nsim_dev_rate_parent_read(struct file *file,
+					 char __user *data,
+					 size_t count, loff_t *ppos)
+{
+	char **name_ptr = file->private_data;
+	size_t len;
+
+	if (!*name_ptr)
+		return 0;
+
+	len = strlen(*name_ptr);
+	return simple_read_from_buffer(data, count, ppos, *name_ptr, len);
+}
+
+static const struct file_operations nsim_dev_rate_parent_fops = {
+	.open = simple_open,
+	.read = nsim_dev_rate_parent_read,
+	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
+};
+
 static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
 				      struct nsim_dev_port *nsim_dev_port)
 {
@@ -299,6 +328,11 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
 				   &nsim_bus_dev->vfconfigs[vf_id].min_tx_rate);
 		debugfs_create_u16("tx_max", 0400, nsim_dev_port->ddir,
 				   &nsim_bus_dev->vfconfigs[vf_id].max_tx_rate);
+		nsim_dev_port->rate_parent = debugfs_create_file("rate_parent",
+								 0400,
+								 nsim_dev_port->ddir,
+								 &nsim_dev_port->parent_name,
+								 &nsim_dev_rate_parent_fops);
 	}
 	debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
 
@@ -1068,6 +1102,8 @@ static int nsim_leaf_tx_max_set(struct devlink_rate *devlink_rate, void *priv,
 
 struct nsim_rate_node {
 	struct dentry *ddir;
+	struct dentry *rate_parent;
+	char *parent_name;
 	u16 tx_share;
 	u16 tx_max;
 };
@@ -1105,6 +1141,7 @@ static int nsim_rate_node_new(struct devlink_rate *node, void **priv,
 {
 	struct nsim_dev *nsim_dev = devlink_priv(node->devlink);
 	struct nsim_rate_node *nsim_node;
+	int err;
 
 	if (!nsim_esw_mode_is_switchdev(nsim_dev)) {
 		NL_SET_ERR_MSG_MOD(extack, "Node creation allowed only in switchdev mode.");
@@ -1117,13 +1154,28 @@ static int nsim_rate_node_new(struct devlink_rate *node, void **priv,
 
 	nsim_node->ddir = debugfs_create_dir(node->name, nsim_dev->nodes_ddir);
 	if (!nsim_node->ddir) {
-		kfree(nsim_node);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto err_node;
 	}
 	debugfs_create_u16("tx_share", 0400, nsim_node->ddir, &nsim_node->tx_share);
 	debugfs_create_u16("tx_max", 0400, nsim_node->ddir, &nsim_node->tx_max);
+	nsim_node->rate_parent = debugfs_create_file("rate_parent", 0400,
+						     nsim_node->ddir,
+						     &nsim_node->parent_name,
+						     &nsim_dev_rate_parent_fops);
+	if (IS_ERR(nsim_node->rate_parent)) {
+		err = PTR_ERR(nsim_node->rate_parent);
+		goto err_ddir;
+	}
+
 	*priv = nsim_node;
 	return 0;
+
+err_ddir:
+	debugfs_remove_recursive(nsim_node->ddir);
+err_node:
+	kfree(nsim_node);
+	return err;
 }
 
 static int nsim_rate_node_del(struct devlink_rate *node, void *priv,
@@ -1131,11 +1183,40 @@ static int nsim_rate_node_del(struct devlink_rate *node, void *priv,
 {
 	struct nsim_rate_node *nsim_node = priv;
 
+	debugfs_remove(nsim_node->rate_parent);
 	debugfs_remove_recursive(nsim_node->ddir);
 	kfree(nsim_node);
 	return 0;
 }
 
+static int nsim_rate_leaf_parent_set(struct devlink_rate *child,
+				     struct devlink_rate *parent,
+				     void *priv_child, void *priv_parent,
+				     struct netlink_ext_ack *extack)
+{
+	struct nsim_dev_port *nsim_dev_port = priv_child;
+
+	if (parent)
+		nsim_dev_port->parent_name = parent->name;
+	else
+		nsim_dev_port->parent_name = NULL;
+	return 0;
+}
+
+static int nsim_rate_node_parent_set(struct devlink_rate *child,
+				     struct devlink_rate *parent,
+				     void *priv_child, void *priv_parent,
+				     struct netlink_ext_ack *extack)
+{
+	struct nsim_rate_node *nsim_node = priv_child;
+
+	if (parent)
+		nsim_node->parent_name = parent->name;
+	else
+		nsim_node->parent_name = NULL;
+	return 0;
+}
+
 static const struct devlink_ops nsim_dev_devlink_ops = {
 	.eswitch_mode_set = nsim_devlink_eswitch_mode_set,
 	.eswitch_mode_get = nsim_devlink_eswitch_mode_get,
@@ -1157,6 +1238,8 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 	.rate_node_tx_max_set = nsim_node_tx_max_set,
 	.rate_node_new = nsim_rate_node_new,
 	.rate_node_del = nsim_rate_node_del,
+	.rate_leaf_parent_set = nsim_rate_leaf_parent_set,
+	.rate_node_parent_set = nsim_rate_node_parent_set,
 };
 
 #define NSIM_DEV_MAX_MACS_DEFAULT 32
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index d62a1386f9f1..cdfdf2a99578 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -211,6 +211,8 @@ struct nsim_dev_port {
 	unsigned int port_index;
 	enum nsim_dev_port_type port_type;
 	struct dentry *ddir;
+	struct dentry *rate_parent;
+	char *parent_name;
 	struct netdevsim *ns;
 };
 

From 1a9c0482f5557f5906294d3327a981bf842ba436 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:30 +0300
Subject: [PATCH 0875/3292] selftest: netdevsim: Add devlink rate grouping test

Test verifies that netdevsim correctly implements devlink ops callbacks
that set node as a parent of devlink leaf or node rate object.

Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/netdevsim/devlink.sh          | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 301d92069f99..9de1d123f4f5 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -563,6 +563,26 @@ rate_attr_tx_rate_check()
 	check_err $? "Unexpected $name attr value $api_value != $rate"
 }
 
+rate_attr_parent_check()
+{
+	local handle=$1
+	local parent=$2
+	local debug_file=$3
+
+	rate_attr_set $handle parent $parent
+	check_err $? "Failed to set parent"
+
+	debug_value=$(cat $debug_file)
+	check_err $? "Failed to get parent debugfs value"
+	[ "$debug_value" == "$parent" ]
+	check_err $? "Unexpected parent debug value $debug_value != $parent"
+
+	api_value=$(rate_attr_get $r_obj parent)
+	check_err $? "Failed to get parent attr value"
+	[ "$api_value" == "$parent" ]
+	check_err $? "Unexpected parent attr value $api_value != $parent"
+}
+
 rate_node_add()
 {
 	local handle=$1
@@ -627,6 +647,28 @@ rate_test()
 	[ $num_nodes == 0 ]
 	check_err $? "Expected 0 rate node but got $num_nodes"
 
+	local node1_name='group1'
+	local node1="$DL_HANDLE/$node1_name"
+	rate_node_add "$node1"
+	check_err $? "Failed to add node $node1"
+
+	rate_attr_parent_check $r_obj $node1_name \
+		$DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent
+
+	local node2_name='group2'
+	local node2="$DL_HANDLE/$node2_name"
+	rate_node_add "$node2"
+	check_err $? "Failed to add node $node2"
+
+	rate_attr_parent_check $node2 $node1_name \
+		$DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent
+	rate_node_del "$node2"
+	check_err $? "Failed to delete node $node2"
+	rate_attr_set "$r_obj" noparent
+	check_err $? "Failed to unset $r_obj parent node"
+	rate_node_del "$node1"
+	check_err $? "Failed to delete node $node1"
+
 	log_test "rate test"
 }
 

From b62767e7bab3a397166a2fa36b409e5e2859f100 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 2 Jun 2021 15:17:31 +0300
Subject: [PATCH 0876/3292] Documentation: devlink rate objects

Add devlink rate objects section at devlink port documentation.
Add devlink rate support info at netdevsim devlink documentation.

Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/devlink/devlink-port.rst       | 35 +++++++++++++++++++
 .../networking/devlink/netdevsim.rst          | 26 ++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst
index ab790e7980b8..7627b1da01f2 100644
--- a/Documentation/networking/devlink/devlink-port.rst
+++ b/Documentation/networking/devlink/devlink-port.rst
@@ -164,6 +164,41 @@ device to instantiate the subfunction device on particular PCI function.
 A subfunction device is created on the :ref:`Documentation/driver-api/auxiliary_bus.rst <auxiliary_bus>`.
 At this point a matching subfunction driver binds to the subfunction's auxiliary device.
 
+Rate object management
+======================
+
+Devlink provides API to manage tx rates of single devlink port or a group.
+This is done through rate objects, which can be one of the two types:
+
+``leaf``
+  Represents a single devlink port; created/destroyed by the driver. Since leaf
+  have 1to1 mapping to its devlink port, in user space it is referred as
+  ``pci/<bus_addr>/<port_index>``;
+
+``node``
+  Represents a group of rate objects (leafs and/or nodes); created/deleted by
+  request from the userspace; initially empty (no rate objects added). In
+  userspace it is referred as ``pci/<bus_addr>/<node_name>``, where
+  ``node_name`` can be any identifier, except decimal number, to avoid
+  collisions with leafs.
+
+API allows to configure following rate object's parameters:
+
+``tx_share``
+  Minimum TX rate value shared among all other rate objects, or rate objects
+  that parts of the parent group, if it is a part of the same group.
+
+``tx_max``
+  Maximum TX rate value.
+
+``parent``
+  Parent node name. Parent node rate limits are considered as additional limits
+  to all node children limits. ``tx_max`` is an upper limit for children.
+  ``tx_share`` is a total bandwidth distributed among children.
+
+Driver implementations are allowed to support both or either rate object types
+and setting methods of their parameters.
+
 Terms and Definitions
 =====================
 
diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst
index 02c2d20dc673..8a292fb5aaea 100644
--- a/Documentation/networking/devlink/netdevsim.rst
+++ b/Documentation/networking/devlink/netdevsim.rst
@@ -57,6 +57,32 @@ entries, FIB rule entries and nexthops that the driver will allow.
     $ devlink resource set netdevsim/netdevsim0 path /nexthops size 16
     $ devlink dev reload netdevsim/netdevsim0
 
+Rate objects
+============
+
+The ``netdevsim`` driver supports rate objects management, which includes:
+
+- registerging/unregistering leaf rate objects per VF devlink port;
+- creation/deletion node rate objects;
+- setting tx_share and tx_max rate values for any rate object type;
+- setting parent node for any rate object type.
+
+Rate nodes and it's parameters are exposed in ``netdevsim`` debugfs in RO mode.
+For example created rate node with name ``some_group``:
+
+.. code:: shell
+
+    $ ls /sys/kernel/debug/netdevsim/netdevsim0/rate_groups/some_group
+    rate_parent  tx_max  tx_share
+
+Same parameters are exposed for leaf objects in corresponding ports directories.
+For ex.:
+
+.. code:: shell
+
+    $ ls /sys/kernel/debug/netdevsim/netdevsim0/ports/1
+    dev  ethtool  rate_parent  tx_max  tx_share
+
 Driver-specific Traps
 =====================
 

From 13ce7e625a3383004181217985a70d16c3cbe8be Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 13 May 2021 12:59:52 +0100
Subject: [PATCH 0877/3292] nvme: remove redundant initialization of variable
 ret

The variable ret is being initialized with a value that is never read,
it is being updated later on. The assignment is redundant and can be
removed.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 37943dc4c2c1..74bf2c7f2b80 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1088,7 +1088,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
 
 static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
 {
-	int ret = -EINVAL;
+	int ret;
 	bool changed;
 
 	ret = nvme_rdma_configure_admin_queue(ctrl, new);

From ebd8a93aa4f50e9e013e6aa7fe601b4ce7565c28 Mon Sep 17 00:00:00 2001
From: Alexey Bogoslavsky <Alexey.Bogoslavsky@wdc.com>
Date: Wed, 28 Apr 2021 09:27:36 +0000
Subject: [PATCH 0878/3292] nvme: extend and modify the APST configuration
 algorithm

The algorithm that was used until now for building the APST configuration
table has been found to produce entries with excessively long ITPT
(idle time prior to transition) for devices declaring relatively long
entry and exit latencies for non-operational power states. This leads
to unnecessary waste of power and, as a result, failure to pass
mandatory power consumption tests on Chromebook platforms.

The new algorithm is based on two predefined ITPT values and two
predefined latency tolerances. Based on these values, as well as on
exit and entry latencies reported by the device, the algorithm looks
for up to 2 suitable non-operational power states to use as primary
and secondary APST transition targets. The predefined values are
supplied to the nvme driver as module parameters:

 - apst_primary_timeout_ms (default: 100)
 - apst_secondary_timeout_ms (default: 2000)
 - apst_primary_latency_tol_us (default: 15000)
 - apst_secondary_latency_tol_us (default: 100000)

The algorithm echoes the approach used by Intel's and Microsoft's drivers
on Windows. The specific default parameter values are also based on those
drivers. Yet, this patch doesn't introduce the ability to dynamically
regenerate the APST table in the event of switching the power source from
AC to battery and back. Adding this functionality may be considered in the
future. In the meantime, the timeouts and tolerances reflect a compromise
between values used by Microsoft for AC and battery scenarios.

In most NVMe devices the new algorithm causes them to implement a more
aggressive power saving policy. While beneficial in most cases, this
sometimes comes at the price of a higher IO processing latency in certain
scenarios as well as at the price of a potential impact on the drive's
endurance (due to more frequent context saving when entering deep non-
operational states). So in order to provide a fallback for systems where
these regressions cannot be tolerated, the patch allows to revert to
the legacy behavior by setting either apst_primary_timeout_ms or
apst_primary_latency_tol_us parameter to 0. Eventually (and possibly after
fine tuning the default values of the module parameters) the legacy behavior
can be removed.

TESTING.

The new algorithm has been extensively tested. Initially, simulations were
used to compare APST tables generated by old and new algorithms for a wide
range of devices. After that, power consumption, performance and latencies
were measured under different workloads on devices from multiple vendors
(WD, Intel, Samsung, Hynix, Kioxia). Below is the description of the tests
and the findings.

General observations.
The effect the patch has on the APST table varies depending on the entry and
exit latencies advertised by the devices. For some devices, the effect is
negligible (e.g. Kioxia KBG40ZNS), for some significant, making the
transitions to PS3 and PS4 much quicker (e.g. WD SN530, Intel 760P), or making
the sleep deeper, PS4 rather than PS3 after a similar amount of time (e.g.
SK Hynix BC511). For some devices (e.g. Samsung PM991) the effect is mixed:
the initial transition happens after a longer idle time, but takes the device
to a lower power state.

Workflows.
In order to evaluate the patch's effect on the power consumption and latency,
7 workflows were used for each device. The workflows were designed to test
the scenarios where significant differences between the old and new behaviors
are most likely. Each workflow was tested twice: with the new and with the
old APST table generation implementation. Power consumption, performance and
latency were measured in the process. The following workflows were used:
1) Consecutive write at the maximum rate with IO depth of 2, with no pauses
2) Repeated pattern of 1000 consecutive writes of 4K packets followed by 50ms
   idle time
3) Repeated pattern of 1000 consecutive writes of 4K packets followed by 150ms
   idle time
4) Repeated pattern of 1000 consecutive writes of 4K packets followed by 500ms
   idle time
5) Repeated pattern of 1000 consecutive writes of 4K packets followed by 1.5s
   idle time
6) Repeated pattern of 1000 consecutive writes of 4K packets followed by 5s
   idle time
7) Repeated pattern of a single random read of a 4K packet followed by 150ms
   idle time

Power consumption
Actual power consumption measurements produced predictable results in
accordance with the APST mechanism's theory of operation.
Devices with long entry and exit latencies such as WD SN530 showed huge
improvement on scenarios 4,5 and 6 of up to 62%. Devices such as Kioxia
KBG40ZNS where the resulting APST table looks virtually identical with
both legacy and new algorithms, showed little or no change in the average power
consumption on all workflows. Devices with extra short latencies such as
Samsung PM991 showed moderate increase in power consumption of up to 18% in
worst case scenarios.
In addition, on Intel and Samsung devices a more complex impact was observed
on scenarios 3, 4 and 7. Our understanding is that due to longer stay in deep
non-operational states between the writes the devices start performing background
operations leading to an increase of power consumption. With the old APST tables
part of these operations are delayed until the scenario is over and a longer idle
period begins, but eventually this extra power is consumed anyway.

Performance.
In terms of performance measured on sustained write or read scenarios, the
effect of the patch is minimal as in this case the device doesn't enter low power
states.

Latency
As expected, in devices where the patch causes a more aggressive power saving
policy (e.g. WD SN530, Intel 760P), an increase in latency was observed in
certain scenarios. Workflow number 7, specifically designed to simulate the
worst case scenario as far as latency is concerned, indeed shows a sharp
increase in average latency (~2ms -> ~53ms on Intel 760P and 0.6 -> 10ms on
WD SN530). The latency increase on other workloads and other devices is much
milder or non-existent.

Signed-off-by: Alexey Bogoslavsky <alexey.bogoslavsky@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 89 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 78 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 762125f2905f..e7441ccaa8db 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -57,6 +57,26 @@ static bool force_apst;
 module_param(force_apst, bool, 0644);
 MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
 
+static unsigned long apst_primary_timeout_ms = 100;
+module_param(apst_primary_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(apst_primary_timeout_ms,
+	"primary APST timeout in ms");
+
+static unsigned long apst_secondary_timeout_ms = 2000;
+module_param(apst_secondary_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(apst_secondary_timeout_ms,
+	"secondary APST timeout in ms");
+
+static unsigned long apst_primary_latency_tol_us = 15000;
+module_param(apst_primary_latency_tol_us, ulong, 0644);
+MODULE_PARM_DESC(apst_primary_latency_tol_us,
+	"primary APST latency tolerance in us");
+
+static unsigned long apst_secondary_latency_tol_us = 100000;
+module_param(apst_secondary_latency_tol_us, ulong, 0644);
+MODULE_PARM_DESC(apst_secondary_latency_tol_us,
+	"secondary APST latency tolerance in us");
+
 static bool streams;
 module_param(streams, bool, 0644);
 MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
@@ -2217,14 +2237,54 @@ static int nvme_configure_acre(struct nvme_ctrl *ctrl)
 	return ret;
 }
 
+/*
+ * The function checks whether the given total (exlat + enlat) latency of
+ * a power state allows the latter to be used as an APST transition target.
+ * It does so by comparing the latency to the primary and secondary latency
+ * tolerances defined by module params. If there's a match, the corresponding
+ * timeout value is returned and the matching tolerance index (1 or 2) is
+ * reported.
+ */
+static bool nvme_apst_get_transition_time(u64 total_latency,
+		u64 *transition_time, unsigned *last_index)
+{
+	if (total_latency <= apst_primary_latency_tol_us) {
+		if (*last_index == 1)
+			return false;
+		*last_index = 1;
+		*transition_time = apst_primary_timeout_ms;
+		return true;
+	}
+	if (apst_secondary_timeout_ms &&
+		total_latency <= apst_secondary_latency_tol_us) {
+		if (*last_index <= 2)
+			return false;
+		*last_index = 2;
+		*transition_time = apst_secondary_timeout_ms;
+		return true;
+	}
+	return false;
+}
+
 /*
  * APST (Autonomous Power State Transition) lets us program a table of power
  * state transitions that the controller will perform automatically.
- * We configure it with a simple heuristic: we are willing to spend at most 2%
- * of the time transitioning between power states.  Therefore, when running in
- * any given state, we will enter the next lower-power non-operational state
- * after waiting 50 * (enlat + exlat) microseconds, as long as that state's exit
- * latency is under the requested maximum latency.
+ *
+ * Depending on module params, one of the two supported techniques will be used:
+ *
+ * - If the parameters provide explicit timeouts and tolerances, they will be
+ *   used to build a table with up to 2 non-operational states to transition to.
+ *   The default parameter values were selected based on the values used by
+ *   Microsoft's and Intel's NVMe drivers. Yet, since we don't implement dynamic
+ *   regeneration of the APST table in the event of switching between external
+ *   and battery power, the timeouts and tolerances reflect a compromise
+ *   between values used by Microsoft for AC and battery scenarios.
+ * - If not, we'll configure the table with a simple heuristic: we are willing
+ *   to spend at most 2% of the time transitioning between power states.
+ *   Therefore, when running in any given state, we will enter the next
+ *   lower-power non-operational state after waiting 50 * (enlat + exlat)
+ *   microseconds, as long as that state's exit latency is under the requested
+ *   maximum latency.
  *
  * We will not autonomously enter any non-operational state for which the total
  * latency exceeds ps_max_latency_us.
@@ -2240,6 +2300,7 @@ static int nvme_configure_apst(struct nvme_ctrl *ctrl)
 	int max_ps = -1;
 	int state;
 	int ret;
+	unsigned last_lt_index = UINT_MAX;
 
 	/*
 	 * If APST isn't supported or if we haven't been initialized yet,
@@ -2298,13 +2359,19 @@ static int nvme_configure_apst(struct nvme_ctrl *ctrl)
 			le32_to_cpu(ctrl->psd[state].entry_lat);
 
 		/*
-		 * This state is good.  Use it as the APST idle target for
-		 * higher power states.
+		 * This state is good. It can be used as the APST idle target
+		 * for higher power states.
 		 */
-		transition_ms = total_latency_us + 19;
-		do_div(transition_ms, 20);
-		if (transition_ms > (1 << 24) - 1)
-			transition_ms = (1 << 24) - 1;
+		if (apst_primary_timeout_ms && apst_primary_latency_tol_us) {
+			if (!nvme_apst_get_transition_time(total_latency_us,
+					&transition_ms, &last_lt_index))
+				continue;
+		} else {
+			transition_ms = total_latency_us + 19;
+			do_div(transition_ms, 20);
+			if (transition_ms > (1 << 24) - 1)
+				transition_ms = (1 << 24) - 1;
+		}
 
 		target = cpu_to_le64((state << 3) | (transition_ms << 8));
 		if (max_ps == -1)

From e21e0243e7b0f1c2a21d21f4d115f7b37175772a Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 28 May 2021 11:02:34 -0500
Subject: [PATCH 0879/3292] nvme-pci: look for StorageD3Enable on companion
 ACPI device instead

The documentation around the StorageD3Enable property hints that it
should be made on the PCI device.  This is where newer AMD systems set
the property and it's required for S0i3 support.

So rather than look for nodes of the root port only present on Intel
systems, switch to the companion ACPI device for all systems.
David Box from Intel indicated this should work on Intel as well.

Link: https://lore.kernel.org/linux-nvme/YK6gmAWqaRmvpJXb@google.com/T/#m900552229fa455867ee29c33b854845fce80ba70
Link: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/power-management-for-storage-hardware-devices-intro
Fixes: df4f9bc4fb9c ("nvme-pci: add support for ACPI StorageD3Enable property")
Suggested-by: Liang Prike <Prike.Liang@amd.com>
Acked-by: Raul E Rangel <rrangel@chromium.org>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: David E. Box <david.e.box@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a29b170701fc..3aa7245a505f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2831,10 +2831,7 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
 #ifdef CONFIG_ACPI
 static bool nvme_acpi_storage_d3(struct pci_dev *dev)
 {
-	struct acpi_device *adev;
-	struct pci_dev *root;
-	acpi_handle handle;
-	acpi_status status;
+	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
 	u8 val;
 
 	/*
@@ -2842,28 +2839,9 @@ static bool nvme_acpi_storage_d3(struct pci_dev *dev)
 	 * must use D3 to support deep platform power savings during
 	 * suspend-to-idle.
 	 */
-	root = pcie_find_root_port(dev);
-	if (!root)
-		return false;
 
-	adev = ACPI_COMPANION(&root->dev);
 	if (!adev)
 		return false;
-
-	/*
-	 * The property is defined in the PXSX device for South complex ports
-	 * and in the PEGP device for North complex ports.
-	 */
-	status = acpi_get_handle(adev->handle, "PXSX", &handle);
-	if (ACPI_FAILURE(status)) {
-		status = acpi_get_handle(adev->handle, "PEGP", &handle);
-		if (ACPI_FAILURE(status))
-			return false;
-	}
-
-	if (acpi_bus_get_device(handle, &adev))
-		return false;
-
 	if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable",
 			&val))
 		return false;

From 3ede8f72a9a2825efca23a3552e80a1202ea88fd Mon Sep 17 00:00:00 2001
From: Martin Belanger <martin.belanger@dell.com>
Date: Thu, 20 May 2021 15:09:34 -0400
Subject: [PATCH 0880/3292] nvme-tcp: allow selecting the network interface for
 connections

In our application, we need a way to force TCP connections to go out a
specific IP interface instead of letting Linux select the interface
based on the routing tables.

Add the 'host-iface' option to allow specifying the interface to use.
When the option host-iface is specified, the driver uses the specified
interface to set the option SO_BINDTODEVICE on the TCP socket before
connecting.

This new option is needed in addtion to the existing host-traddr for
the following reasons:

Specifying an IP interface by its associated IP address is less
intuitive than specifying the actual interface name and, in some cases,
simply doesn't work. That's because the association between interfaces
and IP addresses is not predictable. IP addresses can be changed or can
change by themselves over time (e.g. DHCP). Interface names are
predictable [1] and will persist over time. Consider the following
configuration.

1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state ...
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 100.0.0.100/24 scope global lo
       valid_lft forever preferred_lft forever
2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc ...
    link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
    inet 100.0.0.100/24 scope global enp0s3
       valid_lft forever preferred_lft forever
3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc ...
    link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
    inet 100.0.0.100/24 scope global enp0s8
       valid_lft forever preferred_lft forever

The above is a VM that I configured with the same IP address
(100.0.0.100) on all interfaces. Doing a reverse lookup to identify the
unique interface associated with 100.0.0.100 does not work here. And
this is why the option host_iface is required. I understand that the
above config does not represent a standard host system, but I'm using
this to prove a point: "We can never know how users will configure
their systems". By te way, The above configuration is perfectly fine
by Linux.

The current TCP implementation for host_traddr performs a
bind()-before-connect(). This is a common construct to set the source
IP address on a TCP socket before connecting. This has no effect on how
Linux selects the interface for the connection. That's because Linux
uses the Weak End System model as described in RFC1122 [2]. On the other
hand, setting the Source IP Address has benefits and should be supported
by linux-nvme. In fact, setting the Source IP Address is a mandatory
FedGov requirement (e.g. connection to a RADIUS/TACACS+ server).
Consider the following configuration.

$ ip addr list dev enp0s8
3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc ...
    link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
    inet 192.168.56.101/24 brd 192.168.56.255 scope global enp0s8
       valid_lft 426sec preferred_lft 426sec
    inet 192.168.56.102/24 scope global secondary enp0s8
       valid_lft forever preferred_lft forever
    inet 192.168.56.103/24 scope global secondary enp0s8
       valid_lft forever preferred_lft forever
    inet 192.168.56.104/24 scope global secondary enp0s8
       valid_lft forever preferred_lft forever

Here we can see that several addresses are associated with interface
enp0s8. By default, Linux always selects the default IP address,
192.168.56.101, as the source address when connecting over interface
enp0s8. Some users, however, want the ability to specify a different
source address (e.g., 192.168.56.102, 192.168.56.103, ...). The option
host_traddr can be used as-is to perform this function.

In conclusion, I believe that we need 2 options for TCP connections.
One that can be used to specify an interface (host-iface). And one that
can be used to set the source address (host-traddr). Users should be
allowed to use one or the other, or both, or none. Of course, the
documentation for host_traddr will need some clarification. It should
state that when used for TCP connection, this option only sets the
source address. And the documentation for host_iface should say that
this option is only available for TCP connections.

References:
[1] https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/
[2] https://tools.ietf.org/html/rfc1122

Tested both IPv4 and IPv6 connections.

Signed-off-by: Martin Belanger <martin.belanger@dell.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c    |  5 +++++
 drivers/nvme/host/fabrics.c | 14 ++++++++++++++
 drivers/nvme/host/fabrics.h |  6 +++++-
 drivers/nvme/host/tcp.c     | 27 ++++++++++++++++++++++++++-
 4 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e7441ccaa8db..bb8b242594f9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4134,6 +4134,11 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 		ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
 				opts->host_traddr ?: "none");
+		if (ret)
+			return ret;
+
+		ret = add_uevent_var(env, "NVME_HOST_IFACE=%s",
+				opts->host_iface ?: "none");
 	}
 	return ret;
 }
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index a2bb7fc63a73..76dc3eaf46f3 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -112,6 +112,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
 	if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
 		len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
 				(len) ? "," : "", ctrl->opts->host_traddr);
+	if (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)
+		len += scnprintf(buf + len, size - len, "%shost_iface=%s",
+				(len) ? "," : "", ctrl->opts->host_iface);
 	len += scnprintf(buf + len, size - len, "\n");
 
 	return len;
@@ -545,6 +548,7 @@ static const match_table_t opt_tokens = {
 	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
 	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
+	{ NVMF_OPT_HOST_IFACE,		"host_iface=%s"		},
 	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
 	{ NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
 	{ NVMF_OPT_DISABLE_SQFLOW,	"disable_sqflow"	},
@@ -754,6 +758,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			kfree(opts->host_traddr);
 			opts->host_traddr = p;
 			break;
+		case NVMF_OPT_HOST_IFACE:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			kfree(opts->host_iface);
+			opts->host_iface = p;
+			break;
 		case NVMF_OPT_HOST_ID:
 			p = match_strdup(args);
 			if (!p) {
@@ -938,6 +951,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
 	kfree(opts->trsvcid);
 	kfree(opts->subsysnqn);
 	kfree(opts->host_traddr);
+	kfree(opts->host_iface);
 	kfree(opts);
 }
 EXPORT_SYMBOL_GPL(nvmf_free_options);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index d7f7974dc208..c31dad69a773 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -66,6 +66,7 @@ enum {
 	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
 	NVMF_OPT_TOS		= 1 << 19,
 	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
+	NVMF_OPT_HOST_IFACE	= 1 << 21,
 };
 
 /**
@@ -83,7 +84,9 @@ enum {
  * @trsvcid:	The transport-specific TRSVCID field for a port on the
  *              subsystem which is adding a controller.
  * @host_traddr: A transport-specific field identifying the NVME host port
- *              to use for the connection to the controller.
+ *     to use for the connection to the controller.
+ * @host_iface: A transport-specific field identifying the NVME host
+ *     interface to use for the connection to the controller.
  * @queue_size: Number of IO queue elements.
  * @nr_io_queues: Number of controller IO queues that will be established.
  * @reconnect_delay: Time between two consecutive reconnect attempts.
@@ -108,6 +111,7 @@ struct nvmf_ctrl_options {
 	char			*traddr;
 	char			*trsvcid;
 	char			*host_traddr;
+	char			*host_iface;
 	size_t			queue_size;
 	unsigned int		nr_io_queues;
 	unsigned int		reconnect_delay;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 34f4b3402f7c..5fc6c568c626 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -123,6 +123,7 @@ struct nvme_tcp_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct sockaddr_storage addr;
 	struct sockaddr_storage src_addr;
+	struct net_device	*ndev;
 	struct nvme_ctrl	ctrl;
 
 	struct work_struct	err_work;
@@ -1455,6 +1456,20 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 		}
 	}
 
+	if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) {
+		char *iface = nctrl->opts->host_iface;
+		sockptr_t optval = KERNEL_SOCKPTR(iface);
+
+		ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
+				      optval, strlen(iface));
+		if (ret) {
+			dev_err(nctrl->device,
+			  "failed to bind to interface %s queue %d err %d\n",
+			  iface, qid, ret);
+			goto err_sock;
+		}
+	}
+
 	queue->hdr_digest = nctrl->opts->hdr_digest;
 	queue->data_digest = nctrl->opts->data_digest;
 	if (queue->hdr_digest || queue->data_digest) {
@@ -2515,6 +2530,16 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 		}
 	}
 
+	if (opts->mask & NVMF_OPT_HOST_IFACE) {
+		ctrl->ndev = dev_get_by_name(&init_net, opts->host_iface);
+		if (!ctrl->ndev) {
+			pr_err("invalid interface passed: %s\n",
+			       opts->host_iface);
+			ret = -ENODEV;
+			goto out_free_ctrl;
+		}
+	}
+
 	if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
 		ret = -EALREADY;
 		goto out_free_ctrl;
@@ -2571,7 +2596,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
 			  NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
 			  NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
-			  NVMF_OPT_TOS,
+			  NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE,
 	.create_ctrl	= nvme_tcp_create_ctrl,
 };
 

From 25e1de8c40c57bb6be4ecd601641691cfd8a7923 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Fri, 21 May 2021 15:41:57 -0700
Subject: [PATCH 0881/3292] nvme-fabrics: fix the kerneldco comment for
 nvmf_log_connect_error()

Fix the comment style that matches existing code.

No functionality change in this patch.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 76dc3eaf46f3..1d20105bb283 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -257,19 +257,15 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 EXPORT_SYMBOL_GPL(nvmf_reg_write32);
 
 /**
- * nvmf_log_connect_error() - Error-parsing-diagnostic print
- * out function for connect() errors.
- *
- * @ctrl: the specific /dev/nvmeX device that had the error.
- *
- * @errval: Error code to be decoded in a more human-friendly
- *	    printout.
- *
- * @offset: For use with the NVMe error code NVME_SC_CONNECT_INVALID_PARAM.
- *
- * @cmd: This is the SQE portion of a submission capsule.
- *
- * @data: This is the "Data" portion of a submission capsule.
+ * nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
+ * 				connect() errors.
+ * @ctrl:	The specific /dev/nvmeX device that had the error.
+ * @errval:	Error code to be decoded in a more human-friendly
+ * 		printout.
+ * @offset:	For use with the NVMe error code
+ * 		NVME_SC_CONNECT_INVALID_PARAM.
+ * @cmd:	This is the SQE portion of a submission capsule.
+ * @data:	This is the "Data" portion of a submission capsule.
  */
 static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 		int errval, int offset, struct nvme_command *cmd,

From 63d20f54a3d0cff17145716caff03a0d161abf44 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Thu, 3 Jun 2021 10:28:03 +0300
Subject: [PATCH 0882/3292] nvme-fabrics: remove extra new lines in the switch

Remove the extra lines in the switch block that is not common practice
in the kernel code.

No functionality change in this patch.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 1d20105bb283..d71ffcbc3296 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -274,7 +274,6 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 	int err_sctype = errval & (~NVME_SC_DNR);
 
 	switch (err_sctype) {
-
 	case (NVME_SC_CONNECT_INVALID_PARAM):
 		if (offset >> 16) {
 			char *inv_data = "Connect Invalid Data Parameter";
@@ -317,24 +316,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 			}
 		}
 		break;
-
 	case NVME_SC_CONNECT_INVALID_HOST:
 		dev_err(ctrl->device,
 			"Connect for subsystem %s is not allowed, hostnqn: %s\n",
 			data->subsysnqn, data->hostnqn);
 		break;
-
 	case NVME_SC_CONNECT_CTRL_BUSY:
 		dev_err(ctrl->device,
 			"Connect command failed: controller is busy or not available\n");
 		break;
-
 	case NVME_SC_CONNECT_FORMAT:
 		dev_err(ctrl->device,
 			"Connect incompatible format: %d",
 			cmd->connect.recfmt);
 		break;
-
+	case NVME_SC_HOST_PATH_ERROR:
+		dev_err(ctrl->device,
+			"Connect command failed: host path error\n");
+		break;
 	default:
 		dev_err(ctrl->device,
 			"Connect command failed, error wo/DNR bit: %d\n",

From 6f860c922532afaae33a968b0d1df3ddf9a8d8a7 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Fri, 21 May 2021 15:41:59 -0700
Subject: [PATCH 0883/3292] nvme-fabrics: remove an extra comment

Remove the comment at the end of the switch that is not needed as
function is small enough.

No functionality change in this patch.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index d71ffcbc3296..78527690c947 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -339,7 +339,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 			"Connect command failed, error wo/DNR bit: %d\n",
 			err_sctype);
 		break;
-	} /* switch (err_sctype) */
+	}
 }
 
 /**

From 97ba6931ba881ea23f3758bbbde7a07a98bff4f9 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Fri, 21 May 2021 15:42:00 -0700
Subject: [PATCH 0884/3292] nvme-fabrics: remove extra braces

No need to use the braces around ~ operator.

No functionality change in this patch.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 78527690c947..1239a63e3ac2 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -271,7 +271,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 		int errval, int offset, struct nvme_command *cmd,
 		struct nvmf_connect_data *data)
 {
-	int err_sctype = errval & (~NVME_SC_DNR);
+	int err_sctype = errval & ~NVME_SC_DNR;
 
 	switch (err_sctype) {
 	case (NVME_SC_CONNECT_INVALID_PARAM):

From f423c85cd392241f1521887b1396038cd1e4c68e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 09:02:59 +0200
Subject: [PATCH 0885/3292] nvme: open code nvme_put_ns_from_disk in
 nvme_ns_head_chr_ioctl

nvme_ns_head_chr_ioctl is always used on multipath nodes, so just call
srcu_read_unlock and consolidate the two unlock paths.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/ioctl.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 9557ead02de1..0341767ff2e7 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -419,21 +419,19 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
 		container_of(cdev, struct nvme_ns_head, cdev);
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
-	int srcu_idx, ret;
+	int srcu_idx, ret = -EWOULDBLOCK;
 
 	srcu_idx = srcu_read_lock(&head->srcu);
 	ns = nvme_find_path(head);
-	if (!ns) {
-		srcu_read_unlock(&head->srcu, srcu_idx);
-		return -EWOULDBLOCK;
-	}
+	if (!ns)
+		goto out_unlock;
 
 	if (is_ctrl_ioctl(cmd))
 		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
 
 	ret = nvme_ns_ioctl(ns, cmd, argp);
-	nvme_put_ns_from_disk(head, srcu_idx);
-
+out_unlock:
+	srcu_read_unlock(&head->srcu, srcu_idx);
 	return ret;
 }
 #endif /* CONFIG_NVME_MULTIPATH */

From 86b4284d98d6a47033b7bfc5b029a4fc45e4d370 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 09:04:26 +0200
Subject: [PATCH 0886/3292] nvme: open code nvme_{get,put}_ns_from_disk in
 nvme_ns_head_ioctl

nvme_ns_head_ioctl is always used on multipath nodes, no need to
deal with the de-multiplexers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/ioctl.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 0341767ff2e7..3f84bd3b9259 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -387,14 +387,15 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
 		unsigned int cmd, unsigned long arg)
 {
-	struct nvme_ns_head *head = NULL;
+	struct nvme_ns_head *head = bdev->bd_disk->private_data;
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
-	int srcu_idx, ret;
+	int srcu_idx, ret = -EWOULDBLOCK;
 
-	ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
-	if (unlikely(!ns))
-		return -EWOULDBLOCK;
+	srcu_idx = srcu_read_lock(&head->srcu);
+	ns = nvme_find_path(head);
+	if (!ns)
+		goto out_unlock;
 
 	/*
 	 * Handle ioctls that apply to the controller instead of the namespace
@@ -402,12 +403,11 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
 	 * deadlock when deleting namespaces using the passthrough interface.
 	 */
 	if (is_ctrl_ioctl(cmd))
-		ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
-	else {
-		ret = nvme_ns_ioctl(ns, cmd, argp);
-		nvme_put_ns_from_disk(head, srcu_idx);
-	}
+		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
 
+	ret = nvme_ns_ioctl(ns, cmd, argp);
+out_unlock:
+	srcu_read_unlock(&head->srcu, srcu_idx);
 	return ret;
 }
 

From 3e7d1a55165bdce2aaf1139ee8889e68eb29c263 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 09:08:41 +0200
Subject: [PATCH 0887/3292] nvme: open code nvme_put_ns_from_disk in
 nvme_ns_head_ctrl_ioctl

nvme_ns_head_ctrl_ioctl is always used on multipath nodes, so just call
srcu_read_unlock directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 3f84bd3b9259..2c6969ffe85c 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -377,7 +377,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 	int ret;
 
 	nvme_get_ctrl(ns->ctrl);
-	nvme_put_ns_from_disk(head, srcu_idx);
+	srcu_read_unlock(&head->srcu, srcu_idx);
 	ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
 
 	nvme_put_ctrl(ctrl);

From 85b790a7ae0518dd745bbb97d532b83840d2db04 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 09:09:56 +0200
Subject: [PATCH 0888/3292] nvme: add a sparse annotation to
 nvme_ns_head_ctrl_ioctl

Add the __releases annotation to tell sparse that nvme_ns_head_ctrl_ioctl
is expected to unlock head->srcu.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 2c6969ffe85c..2e7780ea0354 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -372,6 +372,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 		void __user *argp, struct nvme_ns_head *head, int srcu_idx)
+	__releases(&head->srcu)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
 	int ret;

From d8ca66e82191a9a95926f7f129028bd362202d5d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 09:11:54 +0200
Subject: [PATCH 0889/3292] nvme: move the CSI sanity check into
 nvme_ns_report_zones

Move the CSI check into nvme_ns_report_zones to clean up the code
a little bit and prepare for further refactoring.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/zns.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 475dd45c3db4..31e789ecd940 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -180,6 +180,9 @@ static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
 	unsigned int nz, i;
 	size_t buflen;
 
+	if (ns->head->ids.csi != NVME_CSI_ZNS)
+		return -EINVAL;
+
 	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
 	if (!report)
 		return -ENOMEM;
@@ -237,11 +240,7 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
 	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
 	if (unlikely(!ns))
 		return -EWOULDBLOCK;
-
-	if (ns->head->ids.csi == NVME_CSI_ZNS)
-		ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
-	else
-		ret = -EINVAL;
+	ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
 	nvme_put_ns_from_disk(head, srcu_idx);
 
 	return ret;

From 8b4fb0f968ffe73f619c06cb4040ecaa60327098 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 09:17:06 +0200
Subject: [PATCH 0890/3292] nvme: split nvme_report_zones

Split multipath support out of nvme_report_zones into a separate helper
and simplify the non-multipath version as a result.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c      | 11 +++++++++++
 drivers/nvme/host/multipath.c | 21 ++++++++++++++++++++-
 drivers/nvme/host/nvme.h      |  7 ++-----
 drivers/nvme/host/zns.c       | 20 ++------------------
 4 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bb8b242594f9..47cfc8a28e45 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2073,6 +2073,17 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
 EXPORT_SYMBOL_GPL(nvme_sec_submit);
 #endif /* CONFIG_BLK_SED_OPAL */
 
+#ifdef CONFIG_BLK_DEV_ZONED
+static int nvme_report_zones(struct gendisk *disk, sector_t sector,
+		unsigned int nr_zones, report_zones_cb cb, void *data)
+{
+	return nvme_ns_report_zones(disk->private_data, sector, nr_zones, cb,
+			data);
+}
+#else
+#define nvme_report_zones	NULL
+#endif /* CONFIG_BLK_DEV_ZONED */
+
 static const struct block_device_operations nvme_bdev_ops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= nvme_ioctl,
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f81871c7128a..127a17b4c13d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -349,6 +349,25 @@ static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
 	nvme_put_ns_head(disk->private_data);
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+static int nvme_ns_head_report_zones(struct gendisk *disk, sector_t sector,
+		unsigned int nr_zones, report_zones_cb cb, void *data)
+{
+	struct nvme_ns_head *head = disk->private_data;
+	struct nvme_ns *ns;
+	int srcu_idx, ret = -EWOULDBLOCK;
+
+	srcu_idx = srcu_read_lock(&head->srcu);
+	ns = nvme_find_path(head);
+	if (ns)
+		ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
+	srcu_read_unlock(&head->srcu, srcu_idx);
+	return ret;
+}
+#else
+#define nvme_ns_head_report_zones	NULL
+#endif /* CONFIG_BLK_DEV_ZONED */
+
 const struct block_device_operations nvme_ns_head_ops = {
 	.owner		= THIS_MODULE,
 	.submit_bio	= nvme_ns_head_submit_bio,
@@ -356,7 +375,7 @@ const struct block_device_operations nvme_ns_head_ops = {
 	.release	= nvme_ns_head_release,
 	.ioctl		= nvme_ns_head_ioctl,
 	.getgeo		= nvme_getgeo,
-	.report_zones	= nvme_report_zones,
+	.report_zones	= nvme_ns_head_report_zones,
 	.pr_ops		= &nvme_pr_ops,
 };
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 0015860ec12b..01f41b2bf915 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -810,17 +810,14 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
 #endif /* CONFIG_NVME_MULTIPATH */
 
 int nvme_revalidate_zones(struct nvme_ns *ns);
+int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
+		unsigned int nr_zones, report_zones_cb cb, void *data);
 #ifdef CONFIG_BLK_DEV_ZONED
 int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf);
-int nvme_report_zones(struct gendisk *disk, sector_t sector,
-		      unsigned int nr_zones, report_zones_cb cb, void *data);
-
 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
 				       struct nvme_command *cmnd,
 				       enum nvme_zone_mgmt_action action);
 #else
-#define nvme_report_zones NULL
-
 static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
 		struct request *req, struct nvme_command *cmnd,
 		enum nvme_zone_mgmt_action action)
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 31e789ecd940..d95010481fce 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -171,8 +171,8 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
 	return cb(&zone, idx, data);
 }
 
-static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
-			unsigned int nr_zones, report_zones_cb cb, void *data)
+int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
+		unsigned int nr_zones, report_zones_cb cb, void *data)
 {
 	struct nvme_zone_report *report;
 	struct nvme_command c = { };
@@ -230,22 +230,6 @@ out_free:
 	return ret;
 }
 
-int nvme_report_zones(struct gendisk *disk, sector_t sector,
-		      unsigned int nr_zones, report_zones_cb cb, void *data)
-{
-	struct nvme_ns_head *head = NULL;
-	struct nvme_ns *ns;
-	int srcu_idx, ret;
-
-	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
-	if (unlikely(!ns))
-		return -EWOULDBLOCK;
-	ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
-	nvme_put_ns_from_disk(head, srcu_idx);
-
-	return ret;
-}
-
 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *c, enum nvme_zone_mgmt_action action)
 {

From f1cf35e17ec308c0e76f55c6bccf84fff1a2d71a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 09:22:35 +0200
Subject: [PATCH 0891/3292] nvme: remove nvme_{get,put}_ns_from_disk

Now that only one caller is left remove the helpers by restructuring
nvme_pr_command so that it has two helpers for sending a command of to a
given nsid using either the ns_head for multipath, or the namespace
stored in the gendisk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c | 68 ++++++++++++++++------------------------
 drivers/nvme/host/nvme.h |  5 +--
 2 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 47cfc8a28e45..177cae44b612 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1542,36 +1542,6 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl)
 	queue_work(nvme_wq, &ctrl->async_event_work);
 }
 
-/*
- * Issue ioctl requests on the first available path.  Note that unlike normal
- * block layer requests we will not retry failed request on another controller.
- */
-struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
-		struct nvme_ns_head **head, int *srcu_idx)
-{
-#ifdef CONFIG_NVME_MULTIPATH
-	if (disk->fops == &nvme_ns_head_ops) {
-		struct nvme_ns *ns;
-
-		*head = disk->private_data;
-		*srcu_idx = srcu_read_lock(&(*head)->srcu);
-		ns = nvme_find_path(*head);
-		if (!ns)
-			srcu_read_unlock(&(*head)->srcu, *srcu_idx);
-		return ns;
-	}
-#endif
-	*head = NULL;
-	*srcu_idx = -1;
-	return disk->private_data;
-}
-
-void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
-{
-	if (head)
-		srcu_read_unlock(&head->srcu, idx);
-}
-
 static int nvme_ns_open(struct nvme_ns *ns)
 {
 
@@ -1968,30 +1938,46 @@ static char nvme_pr_type(enum pr_type type)
 	}
 };
 
+static int nvme_send_ns_head_pr_command(struct block_device *bdev,
+		struct nvme_command *c, u8 data[16])
+{
+	struct nvme_ns_head *head = bdev->bd_disk->private_data;
+	int srcu_idx = srcu_read_lock(&head->srcu);
+	struct nvme_ns *ns = nvme_find_path(head);
+	int ret = -EWOULDBLOCK;
+
+	if (ns) {
+		c->common.nsid = cpu_to_le32(ns->head->ns_id);
+		ret = nvme_submit_sync_cmd(ns->queue, c, data, 16);
+	}
+	srcu_read_unlock(&head->srcu, srcu_idx);
+	return ret;
+}
+	
+static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
+		u8 data[16])
+{
+	c->common.nsid = cpu_to_le32(ns->head->ns_id);
+	return nvme_submit_sync_cmd(ns->queue, c, data, 16);
+}
+
 static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
 				u64 key, u64 sa_key, u8 op)
 {
-	struct nvme_ns_head *head = NULL;
-	struct nvme_ns *ns;
 	struct nvme_command c;
-	int srcu_idx, ret;
 	u8 data[16] = { 0, };
 
-	ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
-	if (unlikely(!ns))
-		return -EWOULDBLOCK;
-
 	put_unaligned_le64(key, &data[0]);
 	put_unaligned_le64(sa_key, &data[8]);
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = op;
-	c.common.nsid = cpu_to_le32(ns->head->ns_id);
 	c.common.cdw10 = cpu_to_le32(cdw10);
 
-	ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
-	nvme_put_ns_from_disk(head, srcu_idx);
-	return ret;
+	if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
+	    bdev->bd_disk->fops == &nvme_ns_head_ops)
+		return nvme_send_ns_head_pr_command(bdev, &c, data);
+	return nvme_send_ns_pr_command(bdev->bd_disk->private_data, &c, data);
 }
 
 static int nvme_pr_register(struct block_device *bdev, u64 old,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 01f41b2bf915..1f397ecba16c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -674,9 +674,6 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
 void nvme_queue_scan(struct nvme_ctrl *ctrl);
 int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
 		void *log, size_t size, u64 offset);
-struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
-		struct nvme_ns_head **head, int *srcu_idx);
-void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
 bool nvme_tryget_ns_head(struct nvme_ns_head *head);
 void nvme_put_ns_head(struct nvme_ns_head *head);
 int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
@@ -697,6 +694,7 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[];
 extern const struct pr_ops nvme_pr_ops;
 extern const struct block_device_operations nvme_ns_head_ops;
 
+struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
 #ifdef CONFIG_NVME_MULTIPATH
 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
 {
@@ -718,7 +716,6 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
-struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
 
 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 {

From f6e8bd59c4e84820fc5f6c404730ef872439548a Mon Sep 17 00:00:00 2001
From: Amit Engel <amit.engel@dell.com>
Date: Thu, 22 Apr 2021 15:33:16 +0300
Subject: [PATCH 0892/3292] nvmet: move ka_work initialization to
 nvmet_alloc_ctrl

Initialize keep-alive work only once, as part of alloc_ctrl
and not each time that nvmet_start_keep_alive_timer is being called

Signed-off-by: Amit Engel <amit.engel@dell.com>
Reviewed-by: Hou Pu <houpu.main@gmail.com>
---
 drivers/nvme/target/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 1853db38b682..4ae4bea6625d 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -412,7 +412,6 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
 	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
 		ctrl->cntlid, ctrl->kato);
 
-	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
 
@@ -1352,6 +1351,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	INIT_LIST_HEAD(&ctrl->async_events);
 	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
 	INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
+	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 
 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);

From 346ac785badf66120d8b4c7b48f87b0a536f691e Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 2 Jun 2021 17:37:58 -0700
Subject: [PATCH 0893/3292] nvmet: remove a superfluous variable

Remove the superfluous variable "bdev" that is only used once in the
nvmet_bdev_alloc_bip() and use req->ns->bdev that is used everywhere in
the code to access the nvmet request's bdev.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-bdev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 429263ca9b97..f673679d258a 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -174,11 +174,10 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 {
 	struct blk_integrity *bi;
 	struct bio_integrity_payload *bip;
-	struct block_device *bdev = req->ns->bdev;
 	int rc;
 	size_t resid, len;
 
-	bi = bdev_get_integrity(bdev);
+	bi = bdev_get_integrity(req->ns->bdev);
 	if (unlikely(!bi)) {
 		pr_err("Unable to locate bio_integrity\n");
 		return -ENODEV;

From da16f5be45d0458e5240737fe90194ee33314bdf Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 11:42:03 +0800
Subject: [PATCH 0894/3292] b43: phy_n: Delete some useless TODO code

These TODO empty code are added by
commit 9442e5b58edb ("b43: N-PHY: partly implement SPUR workaround"). It's
been more than a decade now. I don't think anyone who wants to perfect
this workaround can follow this TODO tip exactly. Instead, it limits them
to new thinking. Remove it will be better.

No functional change.

By the way, this helps reduce some binary code size.
Before:
text    data    bss     dec     hex
74472   9967    0       84439   149d7

After:
text    data    bss     dec     hex
74408   9919    0       84327   14967

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511034203.4122-2-thunder.leizhen@huawei.com
---
 drivers/net/wireless/broadcom/b43/phy_n.c | 47 -----------------------
 1 file changed, 47 deletions(-)

diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c
index 665b737fbb0d..cf3ccf4ddfe7 100644
--- a/drivers/net/wireless/broadcom/b43/phy_n.c
+++ b/drivers/net/wireless/broadcom/b43/phy_n.c
@@ -4592,58 +4592,11 @@ static void b43_nphy_spur_workaround(struct b43_wldev *dev)
 {
 	struct b43_phy_n *nphy = dev->phy.n;
 
-	u8 channel = dev->phy.channel;
-	int tone[2] = { 57, 58 };
-	u32 noise[2] = { 0x3FF, 0x3FF };
-
 	B43_WARN_ON(dev->phy.rev < 3);
 
 	if (nphy->hang_avoid)
 		b43_nphy_stay_in_carrier_search(dev, 1);
 
-	if (nphy->gband_spurwar_en) {
-		/* TODO: N PHY Adjust Analog Pfbw (7) */
-		if (channel == 11 && b43_is_40mhz(dev)) {
-			; /* TODO: N PHY Adjust Min Noise Var(2, tone, noise)*/
-		} else {
-			; /* TODO: N PHY Adjust Min Noise Var(0, NULL, NULL)*/
-		}
-		/* TODO: N PHY Adjust CRS Min Power (0x1E) */
-	}
-
-	if (nphy->aband_spurwar_en) {
-		if (channel == 54) {
-			tone[0] = 0x20;
-			noise[0] = 0x25F;
-		} else if (channel == 38 || channel == 102 || channel == 118) {
-			if (0 /* FIXME */) {
-				tone[0] = 0x20;
-				noise[0] = 0x21F;
-			} else {
-				tone[0] = 0;
-				noise[0] = 0;
-			}
-		} else if (channel == 134) {
-			tone[0] = 0x20;
-			noise[0] = 0x21F;
-		} else if (channel == 151) {
-			tone[0] = 0x10;
-			noise[0] = 0x23F;
-		} else if (channel == 153 || channel == 161) {
-			tone[0] = 0x30;
-			noise[0] = 0x23F;
-		} else {
-			tone[0] = 0;
-			noise[0] = 0;
-		}
-
-		if (!tone[0] && !noise[0]) {
-			; /* TODO: N PHY Adjust Min Noise Var(1, tone, noise)*/
-		} else {
-			; /* TODO: N PHY Adjust Min Noise Var(0, NULL, NULL)*/
-		}
-	}
-
 	if (nphy->hang_avoid)
 		b43_nphy_stay_in_carrier_search(dev, 0);
 }

From d1dbaa54191e0014493cb4065aadb6764b404dd1 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 29 May 2021 19:51:31 +0800
Subject: [PATCH 0895/3292] b43legacy: Remove unused inline function
 txring_to_priority()

commit 5d07a3d62f63 ("b43legacy: Avoid packet losses in the dma worker code")
left behind this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210529115131.6028-1-yuehaibing@huawei.com
---
 drivers/net/wireless/broadcom/b43legacy/dma.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/net/wireless/broadcom/b43legacy/dma.c b/drivers/net/wireless/broadcom/b43legacy/dma.c
index 7e2f70c4207c..6869f2bf1bae 100644
--- a/drivers/net/wireless/broadcom/b43legacy/dma.c
+++ b/drivers/net/wireless/broadcom/b43legacy/dma.c
@@ -213,19 +213,6 @@ return dev->dma.tx_ring1;
 	return ring;
 }
 
-/* Bcm4301-ring to mac80211-queue mapping */
-static inline int txring_to_priority(struct b43legacy_dmaring *ring)
-{
-	static const u8 idx_to_prio[] =
-		{ 3, 2, 1, 0, 4, 5, };
-
-/*FIXME: have only one queue, for now */
-return 0;
-
-	return idx_to_prio[ring->index];
-}
-
-
 static u16 b43legacy_dmacontroller_base(enum b43legacy_dmatype type,
 					int controller_idx)
 {

From fef1cdbba4d12fb67555364c22cc8d7c500600aa Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 1 Jun 2021 11:28:55 +0100
Subject: [PATCH 0896/3292] b43legacy: Fix spelling mistake "overflew" ->
 "overflowed"

There is a spelling mistake in a comment. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210601102855.8884-1-colin.king@canonical.com
---
 drivers/net/wireless/broadcom/b43legacy/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
index f64ebff68308..eec3af9c3745 100644
--- a/drivers/net/wireless/broadcom/b43legacy/main.c
+++ b/drivers/net/wireless/broadcom/b43legacy/main.c
@@ -391,7 +391,7 @@ void b43legacy_tsf_read(struct b43legacy_wldev *dev, u64 *tsf)
 	 * registers, we should take care of register overflows.
 	 * In theory, the whole tsf read process should be atomic.
 	 * We try to be atomic here, by restaring the read process,
-	 * if any of the high registers changed (overflew).
+	 * if any of the high registers changed (overflowed).
 	 */
 	if (dev->dev->id.revision >= 3) {
 		u32 low;

From 080f9c10c773df39ccebe8dc414179d9179005a9 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Sun, 25 Apr 2021 16:20:42 +0530
Subject: [PATCH 0897/3292] ipw2x00: Minor documentation update

Kernel test robot throws below warning ->

drivers/net/wireless/intel/ipw2x00/ipw2100.c:5359: warning: This comment
starts with '/**', but isn't a kernel-doc comment. Refer
Documentation/doc-guide/kernel-doc.rst

Minor update in documentation.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1619347842-6638-1-git-send-email-jrdr.linux@gmail.com
---
 drivers/net/wireless/intel/ipw2x00/ipw2100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
index 23fbddd0c1f8..47eb89b773cf 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
@@ -5356,7 +5356,7 @@ struct ipw2100_wep_key {
 #define WEP_STR_128(x) x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9],x[10]
 
 /**
- * Set a the wep key
+ * ipw2100_set_key() - Set a the wep key
  *
  * @priv: struct to work on
  * @idx: index of the key we want to set

From 16cac0060680c11bb82c325c4fe95cb66fc8dfaf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Jun 2021 17:40:23 -0700
Subject: [PATCH 0898/3292] libbpf: Move few APIs from 0.4 to 0.5 version

Official libbpf 0.4 release doesn't include three APIs that were tentatively
put into 0.4 section. Fix libbpf.map and move these three APIs:

  - bpf_map__initial_value;
  - bpf_map_lookup_and_delete_elem_flags;
  - bpf_object__gen_loader.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210603004026.2698513-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.map | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index bbe99b1db1a9..944c99d1ded3 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,10 +359,7 @@ LIBBPF_0.4.0 {
 		bpf_linker__finalize;
 		bpf_linker__free;
 		bpf_linker__new;
-		bpf_map__initial_value;
 		bpf_map__inner_map;
-		bpf_map_lookup_and_delete_elem_flags;
-		bpf_object__gen_loader;
 		bpf_object__set_kversion;
 		bpf_tc_attach;
 		bpf_tc_detach;
@@ -373,5 +370,8 @@ LIBBPF_0.4.0 {
 
 LIBBPF_0.5.0 {
 	global:
+		bpf_map__initial_value;
+		bpf_map_lookup_and_delete_elem_flags;
+		bpf_object__gen_loader;
 		libbpf_set_strict_mode;
 } LIBBPF_0.4.0;

From 232c9e8bd5ebfb43563b58f31e685fde06d9441f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Jun 2021 17:40:24 -0700
Subject: [PATCH 0899/3292] libbpf: Refactor header installation portions of
 Makefile

As we gradually get more headers that have to be installed, it's quite
annoying to copy/paste long $(call) commands. So extract that logic and do
a simple $(foreach) over the list of headers.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210603004026.2698513-3-andrii@kernel.org
---
 tools/lib/bpf/Makefile | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 15420303cf06..d1b909e005dc 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -223,19 +223,14 @@ install_lib: all_cmd
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
+INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
+		  bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h	     \
+		  bpf_endian.h bpf_core_read.h
+
 install_headers: $(BPF_HELPER_DEFS)
-	$(call QUIET_INSTALL, headers) \
-		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
-		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
-		$(call do_install,btf.h,$(prefix)/include/bpf,644); \
-		$(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
-		$(call do_install,libbpf_legacy.h,$(prefix)/include/bpf,644); \
-		$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
-		$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
-		$(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
-		$(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
-		$(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
-		$(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
+	$(call QUIET_INSTALL, headers)					     \
+		$(foreach hdr,$(INSTALL_HEADERS),			     \
+			$(call do_install,$(hdr),$(prefix)/include/bpf,644);)
 
 install_pkgconfig: $(PC_FILE)
 	$(call QUIET_INSTALL, $(PC_FILE)) \

From 7d8a819dd31672f02ece93b6a9b9491daba4f0f2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Jun 2021 17:40:25 -0700
Subject: [PATCH 0900/3292] libbpf: Install skel_internal.h header used from
 light skeletons

Light skeleton code assumes skel_internal.h header to be installed system-wide
by libbpf package. Make sure it is actually installed.

Fixes: 67234743736a ("libbpf: Generate loader program out of BPF ELF file.")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210603004026.2698513-4-andrii@kernel.org
---
 tools/lib/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index d1b909e005dc..ec14aa725bb0 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -225,7 +225,7 @@ install_lib: all_cmd
 
 INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
 		  bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h	     \
-		  bpf_endian.h bpf_core_read.h
+		  bpf_endian.h bpf_core_read.h skel_internal.h
 
 install_headers: $(BPF_HELPER_DEFS)
 	$(call QUIET_INSTALL, headers)					     \

From 56b8b7f9533b5c40cbc1266b5cc6a3b19dfd2aad Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Jun 2021 17:40:26 -0700
Subject: [PATCH 0901/3292] selftests/bpf: Add xdp_redirect_multi into
 .gitignore

When xdp_redirect_multi test binary was added recently, it wasn't added to
.gitignore. Fix that.

Fixes: d23292476297 ("selftests/bpf: Add xdp_redirect_multi test")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210603004026.2698513-5-andrii@kernel.org
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index a030aa4a8a9e..a18f57044014 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -38,3 +38,4 @@ test_cpp
 /bench
 *.ko
 xdpxceiver
+xdp_redirect_multi

From 90bf3e28ef51aa3f480d2f2151813be669ba69ce Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 2 Jun 2021 11:06:59 +0100
Subject: [PATCH 0902/3292] null_blk: Fix null pointer dereference on
 nullb->disk on blk_cleanup_disk call

The error handling on a nullb->disk allocation currently jumps to
out_cleanup_disk that calls blk_cleanup_disk with a null pointer causing
a null pointer dereference issue. Fix this by jumping to out_cleanup_tags
instead.

Addresses-Coverity: ("Dereference after null check")
Fixes: 132226b301b5 ("null_blk: convert to blk_alloc_disk/blk_cleanup_disk")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210602100659.11058-1-colin.king@canonical.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index d8e098f1e5b5..83d803cb57c8 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1856,7 +1856,7 @@ static int null_add_dev(struct nullb_device *dev)
 			goto out_cleanup_tags;
 		nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
 		if (!nullb->disk)
-			goto out_cleanup_disk;
+			goto out_cleanup_tags;
 		nullb->disk->queue = nullb->q;
 	} else if (dev->queue_mode == NULL_Q_BIO) {
 		rv = -ENOMEM;

From 613471549f366cdf4170b81ce0f99f3867ec4d16 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 3 Jun 2021 12:47:21 +0200
Subject: [PATCH 0903/3292] block: Do not pull requests from the scheduler when
 we cannot dispatch them

Provided the device driver does not implement dispatch budget accounting
(which only SCSI does) the loop in __blk_mq_do_dispatch_sched() pulls
requests from the IO scheduler as long as it is willing to give out any.
That defeats scheduling heuristics inside the scheduler by creating
false impression that the device can take more IO when it in fact
cannot.

For example with BFQ IO scheduler on top of virtio-blk device setting
blkio cgroup weight has barely any impact on observed throughput of
async IO because __blk_mq_do_dispatch_sched() always sucks out all the
IO queued in BFQ. BFQ first submits IO from higher weight cgroups but
when that is all dispatched, it will give out IO of lower weight cgroups
as well. And then we have to wait for all this IO to be dispatched to
the disk (which means lot of it actually has to complete) before the
IO scheduler is queried again for dispatching more requests. This
completely destroys any service differentiation.

So grab request tag for a request pulled out of the IO scheduler already
in __blk_mq_do_dispatch_sched() and do not pull any more requests if we
cannot get it because we are unlikely to be able to dispatch it. That
way only single request is going to wait in the dispatch list for some
tag to free.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210603104721.6309-1-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 12 +++++++++++-
 block/blk-mq.c       |  2 +-
 block/blk-mq.h       |  2 ++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 045b6878b8c5..a9182d2f8ad3 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -168,9 +168,19 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 		 * in blk_mq_dispatch_rq_list().
 		 */
 		list_add_tail(&rq->queuelist, &rq_list);
+		count++;
 		if (rq->mq_hctx != hctx)
 			multi_hctxs = true;
-	} while (++count < max_dispatch);
+
+		/*
+		 * If we cannot get tag for the request, stop dequeueing
+		 * requests from the IO scheduler. We are unlikely to be able
+		 * to submit them anyway and it creates false impression for
+		 * scheduling heuristics that the device can take more IO.
+		 */
+		if (!blk_mq_get_driver_tag(rq))
+			break;
+	} while (count < max_dispatch);
 
 	if (!count) {
 		if (run_queue)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f11d4018ce2e..4261adee9964 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1104,7 +1104,7 @@ static bool __blk_mq_get_driver_tag(struct request *rq)
 	return true;
 }
 
-static bool blk_mq_get_driver_tag(struct request *rq)
+bool blk_mq_get_driver_tag(struct request *rq)
 {
 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 556368d2c5b6..4b1ca7b7bbeb 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -260,6 +260,8 @@ static inline void blk_mq_put_driver_tag(struct request *rq)
 	__blk_mq_put_driver_tag(rq->mq_hctx, rq);
 }
 
+bool blk_mq_get_driver_tag(struct request *rq);
+
 static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
 {
 	int cpu;

From 490dcecabbf93e705006af498fa6815251404a54 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 19 May 2021 10:18:25 -0700
Subject: [PATCH 0904/3292] mlx5: count all link events

mlx5 devices were observed generating MLX5_PORT_CHANGE_SUBTYPE_ACTIVE
events without an intervening MLX5_PORT_CHANGE_SUBTYPE_DOWN. This
breaks link flap detection based on Linux carrier state transition
count as netif_carrier_on() does nothing if carrier is already on.
Make sure we count such events.

netif_carrier_event() increments the counters and fires the linkwatch
events. The latter is not necessary for the use case but seems like
the right thing to do.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c  |  6 +++++-
 include/linux/netdevice.h                      |  2 +-
 net/sched/sch_generic.c                        | 18 ++++++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ad0f69480b9c..e36d0c6a08db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -91,12 +91,16 @@ void mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u8 port_state;
+	bool up;
 
 	port_state = mlx5_query_vport_state(mdev,
 					    MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
 					    0);
 
-	if (port_state == VPORT_STATE_UP) {
+	up = port_state == VPORT_STATE_UP;
+	if (up == netif_carrier_ok(priv->netdev))
+		netif_carrier_event(priv->netdev);
+	if (up) {
 		netdev_info(priv->netdev, "Link up\n");
 		netif_carrier_on(priv->netdev);
 	} else {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5cbc950b34df..be1dcceda5e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4187,8 +4187,8 @@ unsigned long dev_trans_start(struct net_device *dev);
 void __netdev_watchdog_up(struct net_device *dev);
 
 void netif_carrier_on(struct net_device *dev);
-
 void netif_carrier_off(struct net_device *dev);
+void netif_carrier_event(struct net_device *dev);
 
 /**
  *	netif_dormant_on - mark device as dormant.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fc8b56bcabf3..e9c0afc8becc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -540,6 +540,24 @@ void netif_carrier_off(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_carrier_off);
 
+/**
+ *	netif_carrier_event - report carrier state event
+ *	@dev: network device
+ *
+ * Device has detected a carrier event but the carrier state wasn't changed.
+ * Use in drivers when querying carrier state asynchronously, to avoid missing
+ * events (link flaps) if link recovers before it's queried.
+ */
+void netif_carrier_event(struct net_device *dev)
+{
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		return;
+	atomic_inc(&dev->carrier_up_count);
+	atomic_inc(&dev->carrier_down_count);
+	linkwatch_fire_event(dev);
+}
+EXPORT_SYMBOL_GPL(netif_carrier_event);
+
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
    under all circumstances. It is difficult to invent anything faster or
    cheaper.

From e6dfa4a54a908d788858be4cef16e82c3bfa75d3 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 13 May 2021 19:00:14 +0800
Subject: [PATCH 0905/3292] net/mlx5: Fix duplicate included vhca_event.h

Clean up the following includecheck warning:

./drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c: vhca_event.h is
included more than once.

No functional change.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index ef5f892aafad..500c71fb6f6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -6,7 +6,6 @@
 #include "sf.h"
 #include "mlx5_ifc_vhca_event.h"
 #include "ecpf.h"
-#include "vhca_event.h"
 #include "mlx5_core.h"
 #include "eswitch.h"
 

From b74fc1ca6a45897e97f69a12b381bac415b75a5f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 3 Jun 2021 15:39:24 +0300
Subject: [PATCH 0906/3292] net/mlx5: check for allocation failure in
 mlx5_ft_pool_init()

Add a check for if the kzalloc() fails.

Fixes: 4a98544d1827 ("net/mlx5: Move chains ft pool to be used by all firmware steering")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
index 526fbb669142..c14590acc772 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c
@@ -27,6 +27,8 @@ int mlx5_ft_pool_init(struct mlx5_core_dev *dev)
 	int i;
 
 	ft_pool = kzalloc(sizeof(*ft_pool), GFP_KERNEL);
+	if (!ft_pool)
+		return -ENOMEM;
 
 	for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--)
 		ft_pool->ft_left[i] = FT_SIZE / FT_POOLS[i];

From c4cf987ebe146ab5a9571f7ebc143cf5cf41bfb9 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 31 May 2021 16:52:10 +0800
Subject: [PATCH 0907/3292] net/mlx5e: Remove the repeated declaration

Function 'mlx5e_deactivate_rq' is declared twice, so remove the
repeated declaration.

Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b636d63358d2..d966d5f40e78 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -974,7 +974,6 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
 		  struct mlx5e_xsk_param *xsk, int node,
 		  struct mlx5e_rq *rq);
 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
-void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
 void mlx5e_close_rq(struct mlx5e_rq *rq);
 int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param);
 void mlx5e_destroy_rq(struct mlx5e_rq *rq);

From ab57a912befe7f824b186f27d251d97f31fd3856 Mon Sep 17 00:00:00 2001
From: Meir Lichtinger <meirl@nvidia.com>
Date: Sat, 1 May 2021 16:15:24 +0300
Subject: [PATCH 0908/3292] net/mlx5e: IPoIB, Add support for NDR speed

Add NDR IB PTYS coding and NDR speed 100GHz.

Fixes: 235b6ac30695 ("RDMA/ipoib: Add 50Gb and 100Gb link speeds to ethtool")
Signed-off-by: Meir Lichtinger <meirl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 97d96fc38a65..0e487ec57d5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -150,6 +150,7 @@ enum mlx5_ptys_rate {
 	MLX5_PTYS_RATE_FDR	= 1 << 4,
 	MLX5_PTYS_RATE_EDR	= 1 << 5,
 	MLX5_PTYS_RATE_HDR	= 1 << 6,
+	MLX5_PTYS_RATE_NDR	= 1 << 7,
 };
 
 static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
@@ -162,6 +163,7 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
 	case MLX5_PTYS_RATE_FDR:   return 14000;
 	case MLX5_PTYS_RATE_EDR:   return 25000;
 	case MLX5_PTYS_RATE_HDR:   return 50000;
+	case MLX5_PTYS_RATE_NDR:   return 100000;
 	default:		   return -1;
 	}
 }

From 771a563ea05b08dee1a1d7c6128e3307c0ba3830 Mon Sep 17 00:00:00 2001
From: Lama Kayal <lkayal@nvidia.com>
Date: Wed, 19 May 2021 17:28:23 +0300
Subject: [PATCH 0909/3292] net/mlx5e: Zero-init DIM structures

Initialize structs to avoid unexpected behavior.

No immediate issue in current code, structs are return values, it's
safer to initialize.

Signed-off-by: Lama Kayal <lkayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index f410c1268422..69cdc4e41a46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -201,7 +201,7 @@ int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *param
 
 static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 {
-	struct dim_cq_moder moder;
+	struct dim_cq_moder moder = {};
 
 	moder.cq_period_mode = cq_period_mode;
 	moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
@@ -214,7 +214,7 @@ static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 
 static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
 {
-	struct dim_cq_moder moder;
+	struct dim_cq_moder moder = {};
 
 	moder.cq_period_mode = cq_period_mode;
 	moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;

From 8ec5d438a3c24e0ebd5df4e94b41a22e4bc0e028 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 19 May 2021 15:16:52 +0300
Subject: [PATCH 0910/3292] net/mlx5e: RX, Re-place page pool numa node change
 logic

Move the logic that updates the page pool upon changes in numa node.
Before this patch, logic was placed in the RX polling function, being
called also when no RX traffic, wasting cpu cycles.  Here we move it to
the RX post_wqes function, to be called only when new RX descriptors are
going to be allocated.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index e88429356018..3c65fd0bcf31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -579,6 +579,9 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 	if (mlx5_wq_cyc_missing(wq) < wqe_bulk)
 		return false;
 
+	if (rq->page_pool)
+		page_pool_nid_changed(rq->page_pool, numa_mem_id());
+
 	do {
 		u16 head = mlx5_wq_cyc_get_head(wq);
 
@@ -734,6 +737,9 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 	if (likely(missing < UMR_WQE_BULK))
 		return false;
 
+	if (rq->page_pool)
+		page_pool_nid_changed(rq->page_pool, numa_mem_id());
+
 	head = rq->mpwqe.actual_wq_head;
 	i = missing;
 	do {
@@ -1555,9 +1561,6 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return 0;
 
-	if (rq->page_pool)
-		page_pool_nid_changed(rq->page_pool, numa_mem_id());
-
 	if (rq->cqd.left) {
 		work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
 		if (work_done >= budget)

From 040ee6172e77b2366d0c622f75eba26e4e49481f Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@nvidia.com>
Date: Mon, 10 May 2021 14:17:55 +0300
Subject: [PATCH 0911/3292] net/mlx5e: Disable TX MPWQE in kdump mode

Under kdump environment we want to use the smallest possible amount
of resources, that includes setting SQ size to minimum.
However, when running on a device that supports TX MPWQE, then the SQ stop
room becomes larger than with non-capable device and requires increasing
the SQ size.

Since TX MPWQE offload is not necessary in kdump mode, disable it to
reduce the memory requirements for capable devices.

With this change, the needed SQ stop room size drops by 31.

Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h         | 7 +++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c    | 6 ++----
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d966d5f40e78..b1b51bbba054 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1162,6 +1162,13 @@ mlx5e_calc_max_nch(struct mlx5e_priv *priv, const struct mlx5e_profile *profile)
 	return priv->netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
 }
 
+static inline bool
+mlx5e_tx_mpwqe_supported(struct mlx5_core_dev *mdev)
+{
+	return !is_kdump_kernel() &&
+		MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe);
+}
+
 int mlx5e_priv_init(struct mlx5e_priv *priv,
 		    struct net_device *netdev,
 		    struct mlx5_core_dev *mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 8360289813f0..5daf7185b035 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1984,7 +1984,7 @@ static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool e
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_params new_params;
 
-	if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+	if (enable && !mlx5e_tx_mpwqe_supported(mdev))
 		return -EOPNOTSUPP;
 
 	new_params = priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e36d0c6a08db..b1981dc9cc7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4620,12 +4620,10 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
 	params->log_sq_size = is_kdump_kernel() ?
 		MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
 		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
-	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE,
-			MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
 
 	/* XDP SQ */
-	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
-			MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
 
 	/* set CQE compression */
 	params->rx_cqe_compress_def = false;

From 39e8cc6d757af7ee5edf5826102c95e3f5bb374b Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@nvidia.com>
Date: Tue, 27 Apr 2021 12:51:41 +0300
Subject: [PATCH 0912/3292] net/mlx5e: Disable TLS device offload in kdump mode

Under kdump environment we want to use the smallest possible amount
of resources, that includes setting SQ size to minimum.
However, when running on a device that supports TLS device offload,
then the SQ stop room becomes larger than with non-capable device and
requires increasing the SQ size.

Since TLS device offload is not necessary in kdump mode, disable it to
reduce the memory requirements for capable devices.

With this change, the needed SQ stop room size drops by 33.

Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/params.c   |  4 ++--
 .../mellanox/mlx5/core/en_accel/ktls.c        | 11 +++++----
 .../mellanox/mlx5/core/en_accel/ktls.h        | 24 +++++++++++++++++++
 .../mellanox/mlx5/core/en_accel/ktls_tx.c     |  5 +++-
 .../mellanox/mlx5/core/en_accel/ktls_txrx.h   |  2 +-
 .../mellanox/mlx5/core/en_accel/tls.c         |  6 ++---
 .../mellanox/mlx5/core/en_accel/tls.h         | 10 +++++++-
 .../mellanox/mlx5/core/en_accel/tls_rxtx.c    |  8 +++----
 .../mellanox/mlx5/core/en_accel/tls_stats.c   |  4 ++--
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  2 +-
 10 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 69cdc4e41a46..150c8e82c738 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -614,7 +614,7 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
 
 static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
 {
-	if (mlx5_accel_is_ktls_rx(mdev))
+	if (mlx5e_accel_is_ktls_rx(mdev))
 		return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
 	return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
@@ -643,7 +643,7 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
 
 	mlx5e_build_sq_param_common(mdev, param);
 	param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
-	param->is_tls = mlx5_accel_is_ktls_rx(mdev);
+	param->is_tls = mlx5e_accel_is_ktls_rx(mdev);
 	if (param->is_tls)
 		param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */
 	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index 95293ee0d38d..d93aadbf10da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -59,12 +59,15 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
 	struct net_device *netdev = priv->netdev;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
-	if (mlx5_accel_is_ktls_tx(mdev)) {
+	if (!mlx5e_accel_is_ktls_tx(mdev) && !mlx5e_accel_is_ktls_rx(mdev))
+		return;
+
+	if (mlx5e_accel_is_ktls_tx(mdev)) {
 		netdev->hw_features |= NETIF_F_HW_TLS_TX;
 		netdev->features    |= NETIF_F_HW_TLS_TX;
 	}
 
-	if (mlx5_accel_is_ktls_rx(mdev))
+	if (mlx5e_accel_is_ktls_rx(mdev))
 		netdev->hw_features |= NETIF_F_HW_TLS_RX;
 
 	netdev->tlsdev_ops = &mlx5e_ktls_ops;
@@ -89,7 +92,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
 {
 	int err;
 
-	if (!mlx5_accel_is_ktls_rx(priv->mdev))
+	if (!mlx5e_accel_is_ktls_rx(priv->mdev))
 		return 0;
 
 	priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx");
@@ -109,7 +112,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
 
 void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
 {
-	if (!mlx5_accel_is_ktls_rx(priv->mdev))
+	if (!mlx5e_accel_is_ktls_rx(priv->mdev))
 		return;
 
 	if (priv->netdev->features & NETIF_F_HW_TLS_RX)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index aaa579bf9a39..5833deb2354c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -15,6 +15,25 @@ int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable);
 struct mlx5e_ktls_resync_resp *
 mlx5e_ktls_rx_resync_create_resp_list(void);
 void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list);
+
+static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev)
+{
+	return !is_kdump_kernel() &&
+		mlx5_accel_is_ktls_tx(mdev);
+}
+
+static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+	return !is_kdump_kernel() &&
+		mlx5_accel_is_ktls_rx(mdev);
+}
+
+static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+	return !is_kdump_kernel() &&
+		mlx5_accel_is_ktls_device(mdev);
+}
+
 #else
 
 static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
@@ -44,6 +63,11 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
 
 static inline void
 mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {}
+
+static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
+
 #endif
 
 #endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 51bdf71073f3..2c0a9344338a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -23,10 +23,13 @@ mlx5e_ktls_dumps_num_wqes(struct mlx5e_params *params, unsigned int nfrags,
 	return nfrags + DIV_ROUND_UP(sync_len, MLX5E_SW2HW_MTU(params, params->sw_mtu));
 }
 
-u16 mlx5e_ktls_get_stop_room(struct mlx5e_params *params)
+u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
 	u16 num_dumps, stop_room = 0;
 
+	if (!mlx5e_accel_is_ktls_tx(mdev))
+		return 0;
+
 	num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
 
 	stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index 8f79335057dc..08c9d5134479 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -14,7 +14,7 @@ struct mlx5e_accel_tx_tls_state {
 	u32 tls_tisn;
 };
 
-u16 mlx5e_ktls_get_stop_room(struct mlx5e_params *params);
+u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 
 bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
 			      struct sk_buff *skb, int datalen,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index d6b21b899dbc..b8fc863aa68d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -192,13 +192,13 @@ void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
 	struct net_device *netdev = priv->netdev;
 	u32 caps;
 
-	if (mlx5_accel_is_ktls_device(priv->mdev)) {
+	if (mlx5e_accel_is_ktls_device(priv->mdev)) {
 		mlx5e_ktls_build_netdev(priv);
 		return;
 	}
 
 	/* FPGA */
-	if (!mlx5_accel_is_tls_device(priv->mdev))
+	if (!mlx5e_accel_is_tls_device(priv->mdev))
 		return;
 
 	caps = mlx5_accel_tls_device_caps(priv->mdev);
@@ -224,7 +224,7 @@ int mlx5e_tls_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tls *tls;
 
-	if (!mlx5_accel_is_tls_device(priv->mdev))
+	if (!mlx5e_accel_is_tls_device(priv->mdev))
 		return 0;
 
 	tls = kzalloc(sizeof(*tls), GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index 4c9274d390da..3fd6fd69bbd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -103,11 +103,18 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv);
 int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
 int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
 
+static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+	return !is_kdump_kernel() &&
+		mlx5_accel_is_tls_device(mdev);
+}
+
 #else
 
 static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
 {
-	if (mlx5_accel_is_ktls_device(priv->mdev))
+	if (!is_kdump_kernel() &&
+	    mlx5_accel_is_ktls_device(priv->mdev))
 		mlx5e_ktls_build_netdev(priv);
 }
 
@@ -117,6 +124,7 @@ static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
 static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
 static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
 static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
+static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
 
 #endif
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 82dc09aaa7fc..7a700f913582 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -273,7 +273,7 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 	if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
 		goto err_out;
 
-	if (mlx5_accel_is_ktls_tx(sq->mdev))
+	if (mlx5e_accel_is_ktls_tx(sq->mdev))
 		return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state);
 
 	/* FPGA */
@@ -378,11 +378,11 @@ void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb,
 
 u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
-	if (!mlx5_accel_is_tls_device(mdev))
+	if (!mlx5e_accel_is_tls_device(mdev))
 		return 0;
 
-	if (mlx5_accel_is_ktls_device(mdev))
-		return mlx5e_ktls_get_stop_room(params);
+	if (mlx5e_accel_is_ktls_device(mdev))
+		return mlx5e_ktls_get_stop_room(mdev, params);
 
 	/* FPGA */
 	/* Resync SKB. */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
index 29463bdb7715..ffc84f9b41b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
@@ -58,7 +58,7 @@ static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv)
 {
 	if (!priv->tls)
 		return NULL;
-	if (mlx5_accel_is_ktls_device(priv->mdev))
+	if (mlx5e_accel_is_ktls_device(priv->mdev))
 		return mlx5e_ktls_sw_stats_desc;
 	return mlx5e_tls_sw_stats_desc;
 }
@@ -67,7 +67,7 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv)
 {
 	if (!priv->tls)
 		return 0;
-	if (mlx5_accel_is_ktls_device(priv->mdev))
+	if (mlx5e_accel_is_ktls_device(priv->mdev))
 		return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc);
 	return ARRAY_SIZE(mlx5e_tls_sw_stats_desc);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b1981dc9cc7b..0d59639f8ac0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -857,7 +857,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
 	if (err)
 		goto err_destroy_rq;
 
-	if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev))
+	if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev))
 		__set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
 
 	if (MLX5_CAP_ETH(mdev, cqe_checksum_full))

From f68406ca3b77c90d249e7f50e8f3015408d9ad4a Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Tue, 11 May 2021 17:56:26 +0300
Subject: [PATCH 0913/3292] net/mlx5e: Remove unreachable code in mlx5e_xmit()

After some commits, mlx5e_txwqe_build_eseg() lost its ability to return
boolean value and became effectively void.

Change its return type to void and remove unreachable branches.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/en_accel.h      |  4 +---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 17 ++++-------------
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 00af0b831a28..d964665eaa63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -162,7 +162,7 @@ static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
 /* Part of the eseg touched by TX offloads */
 #define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
 
-static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
+static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
 				       struct sk_buff *skb,
 				       struct mlx5_wqe_eth_seg *eseg, u16 ihs)
 {
@@ -175,8 +175,6 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 		mlx5e_tx_tunnel_accel(skb, eseg, ihs);
 #endif
-
-	return true;
 }
 
 static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 8ba62671f5f1..669ff58107e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -706,16 +706,12 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
 		mlx5e_tx_mpwqe_session_complete(sq);
 }
 
-static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
+static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
 				   struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
 				   struct mlx5_wqe_eth_seg *eseg, u16 ihs)
 {
-	if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg, ihs)))
-		return false;
-
+	mlx5e_accel_tx_eseg(priv, skb, eseg, ihs);
 	mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
-
-	return true;
 }
 
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -744,10 +740,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
 			struct mlx5_wqe_eth_seg eseg = {};
 
-			if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg,
-							     attr.ihs)))
-				return NETDEV_TX_OK;
-
+			mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs);
 			mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
 			return NETDEV_TX_OK;
 		}
@@ -762,9 +755,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* May update the WQE, but may not post other WQEs. */
 	mlx5e_accel_tx_finish(sq, wqe, &accel,
 			      (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
-	if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs)))
-		return NETDEV_TX_OK;
-
+	mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs);
 	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
 
 	return NETDEV_TX_OK;

From 4f9f4f0f6261e4b162dfcaf91e08824a7c93da07 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Tue, 11 May 2021 15:48:56 +0800
Subject: [PATCH 0914/3292] clocksource/drivers/arm_arch_timer: Remove
 arch_timer_rate1

This variable is added by my mistake, it's not used at all.

Fixes: e2bf384d4329 ("clocksource/drivers/arm_arch_timer: Add __ro_after_init and __init")
Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Reported-by: Hulk Robot <hulkci@huawei.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210511154856.6afbcb65@xhacker.debian
---
 drivers/clocksource/arm_arch_timer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index fe1a82627d57..89a9e0524555 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -64,7 +64,6 @@ struct arch_timer {
 #define to_arch_timer(e) container_of(e, struct arch_timer, evt)
 
 static u32 arch_timer_rate __ro_after_init;
-u32 arch_timer_rate1 __ro_after_init;
 static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;
 
 static const char *arch_timer_ppi_names[ARCH_TIMER_MAX_TIMER_PPI] = {

From d467d0bc7ab8062197158658c456e1f2f6c3fcf1 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:56:23 +0800
Subject: [PATCH 0915/3292] rtnetlink: Fix spelling mistakes

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 04b4f0f2a3d2..dbf59b2d5700 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -9,7 +9,7 @@
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
  *	Fixes:
- *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
+ *	Vitaly E. Lavrov		RTA_OK arithmetic was wrong.
  */
 
 #include <linux/bitops.h>
@@ -234,7 +234,7 @@ unlock:
  * @msgtype: rtnetlink message type
  * @doit: Function pointer called for each request message
  * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
  *
  * Like rtnl_register, but for use by removable modules.
  */
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(rtnl_register_module);
  * @msgtype: rtnetlink message type
  * @doit: Function pointer called for each request message
  * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
  *
  * Registers the specified function pointers (at least one of them has
  * to be non-NULL) to be called whenever a request message for the
@@ -2567,7 +2567,7 @@ static int do_set_proto_down(struct net_device *dev,
 	if (nl_proto_down) {
 		proto_down = nla_get_u8(nl_proto_down);
 
-		/* Dont turn off protodown if there are active reasons */
+		/* Don't turn off protodown if there are active reasons */
 		if (!proto_down && dev->proto_down_reason) {
 			NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
 			return -EBUSY;

From dd0d91b9139899ba2546290ab282767600e0f358 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 14:56:35 +0800
Subject: [PATCH 0916/3292] libceph: Fix spelling mistakes

Fix some spelling mistakes in comments:
enconding  ==> encoding
ambigous  ==> ambiguous
orignal  ==> original
encyption  ==> encryption

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ceph/auth_x_protocol.h | 2 +-
 net/ceph/mon_client.c      | 2 +-
 net/ceph/osdmap.c          | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 792fcb974dc3..9c60feeb1bcb 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -87,7 +87,7 @@ struct ceph_x_authorize_reply {
 
 
 /*
- * encyption bundle
+ * encryption bundle
  */
 #define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull
 
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 195ceb8afb06..013cbdb6cfe2 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1508,7 +1508,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
 			return get_generic_reply(con, hdr, skip);
 
 		/*
-		 * Older OSDs don't set reply tid even if the orignal
+		 * Older OSDs don't set reply tid even if the original
 		 * request had a non-zero tid.  Work around this weirdness
 		 * by allocating a new message.
 		 */
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index c959320c4775..75b738083523 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1309,7 +1309,7 @@ static int get_osdmap_client_data_v(void **p, void *end,
 			return -EINVAL;
 		}
 
-		/* old osdmap enconding */
+		/* old osdmap encoding */
 		struct_v = 0;
 	}
 
@@ -3010,7 +3010,7 @@ static bool is_valid_crush_name(const char *name)
  * parent, returns 0.
  *
  * Does a linear search, as there are no parent pointers of any
- * kind.  Note that the result is ambigous for items that occur
+ * kind.  Note that the result is ambiguous for items that occur
  * multiple times in the map.
  */
 static int get_immediate_parent(struct crush_map *c, int id,

From 5abaf211c4a56ec5272b49a78adf2e0b21e5fd37 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 2 Jun 2021 19:01:11 +0800
Subject: [PATCH 0917/3292] net: hdlc_cisco: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_cisco.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index cb5898f7d68c..5fc0f8d1e425 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -28,14 +28,12 @@
 #define CISCO_ADDR_REPLY	1	/* Cisco address reply */
 #define CISCO_KEEPALIVE_REQ	2	/* Cisco keepalive request */
 
-
 struct hdlc_header {
 	u8 address;
 	u8 control;
 	__be16 protocol;
 }__packed;
 
-
 struct cisco_packet {
 	__be32 type;		/* code */
 	__be32 par1;
@@ -46,7 +44,6 @@ struct cisco_packet {
 #define	CISCO_PACKET_LEN	18
 #define	CISCO_BIG_PACKET_LEN	20
 
-
 struct cisco_state {
 	cisco_proto settings;
 
@@ -59,16 +56,13 @@ struct cisco_state {
 	u32 rxseq; /* RX sequence number */
 };
 
-
 static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr);
 
-
 static inline struct cisco_state* state(hdlc_device *hdlc)
 {
 	return (struct cisco_state *)hdlc->state;
 }
 
-
 static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev,
 			     u16 type, const void *daddr, const void *saddr,
 			     unsigned int len)
@@ -90,8 +84,6 @@ static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return sizeof(struct hdlc_header);
 }
 
-
-
 static void cisco_keepalive_send(struct net_device *dev, u32 type,
 				 __be32 par1, __be32 par2)
 {
@@ -124,8 +116,6 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	dev_queue_xmit(skb);
 }
 
-
-
 static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct hdlc_header *data = (struct hdlc_header*)skb->data;
@@ -148,7 +138,6 @@ static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev)
 	}
 }
 
-
 static int cisco_rx(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -253,8 +242,6 @@ rx_error:
 	return NET_RX_DROP;
 }
 
-
-
 static void cisco_timer(struct timer_list *t)
 {
 	struct cisco_state *st = from_timer(st, t, timer);
@@ -276,8 +263,6 @@ static void cisco_timer(struct timer_list *t)
 	add_timer(&st->timer);
 }
 
-
-
 static void cisco_start(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -294,8 +279,6 @@ static void cisco_start(struct net_device *dev)
 	add_timer(&st->timer);
 }
 
-
-
 static void cisco_stop(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -310,7 +293,6 @@ static void cisco_stop(struct net_device *dev)
 	spin_unlock_irqrestore(&st->lock, flags);
 }
 
-
 static struct hdlc_proto proto = {
 	.start		= cisco_start,
 	.stop		= cisco_stop,
@@ -381,21 +363,17 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 	return -EINVAL;
 }
 
-
 static int __init mod_init(void)
 {
 	register_hdlc_protocol(&proto);
 	return 0;
 }
 
-
-
 static void __exit mod_exit(void)
 {
 	unregister_hdlc_protocol(&proto);
 }
 
-
 module_init(mod_init);
 module_exit(mod_exit);
 

From 001aa274300db079fe79a03b84bc07407a9f43e0 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 2 Jun 2021 19:01:12 +0800
Subject: [PATCH 0918/3292] net: hdlc_cisco: fix the code style issue about
 "foo* bar"

Fix the checkpatch error as "foo* bar" and should be "foo *bar",
and "(foo*)" should be "(foo *)".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_cisco.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 5fc0f8d1e425..227939dc7874 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -58,7 +58,7 @@ struct cisco_state {
 
 static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr);
 
-static inline struct cisco_state* state(hdlc_device *hdlc)
+static inline struct cisco_state *state(hdlc_device *hdlc)
 {
 	return (struct cisco_state *)hdlc->state;
 }
@@ -73,7 +73,7 @@ static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev,
 #endif
 
 	skb_push(skb, sizeof(struct hdlc_header));
-	data = (struct hdlc_header*)skb->data;
+	data = (struct hdlc_header *)skb->data;
 	if (type == CISCO_KEEPALIVE)
 		data->address = CISCO_MULTICAST;
 	else
@@ -98,7 +98,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	}
 	skb_reserve(skb, 4);
 	cisco_hard_header(skb, dev, CISCO_KEEPALIVE, NULL, NULL, 0);
-	data = (struct cisco_packet*)(skb->data + 4);
+	data = (struct cisco_packet *)(skb->data + 4);
 
 	data->type = htonl(type);
 	data->par1 = par1;
@@ -118,7 +118,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 
 static __be16 cisco_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
-	struct hdlc_header *data = (struct hdlc_header*)skb->data;
+	struct hdlc_header *data = (struct hdlc_header *)skb->data;
 
 	if (skb->len < sizeof(struct hdlc_header))
 		return cpu_to_be16(ETH_P_HDLC);
@@ -143,7 +143,7 @@ static int cisco_rx(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	struct cisco_state *st = state(hdlc);
-	struct hdlc_header *data = (struct hdlc_header*)skb->data;
+	struct hdlc_header *data = (struct hdlc_header *)skb->data;
 	struct cisco_packet *cisco_data;
 	struct in_device *in_dev;
 	__be32 addr, mask;
@@ -172,7 +172,7 @@ static int cisco_rx(struct sk_buff *skb)
 			goto rx_error;
 		}
 
-		cisco_data = (struct cisco_packet*)(skb->data + sizeof
+		cisco_data = (struct cisco_packet *)(skb->data + sizeof
 						    (struct hdlc_header));
 
 		switch (ntohl (cisco_data->type)) {

From c1300f37ea99c92b1759caf3418afb491b6852ce Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 2 Jun 2021 19:01:13 +0800
Subject: [PATCH 0919/3292] net: hdlc_cisco: add some required spaces

Add spaces required after the close parenthesis '}'.
Add spaces required after that ','.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_cisco.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 227939dc7874..3f6a51af02a2 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -32,7 +32,7 @@ struct hdlc_header {
 	u8 address;
 	u8 control;
 	__be16 protocol;
-}__packed;
+} __packed;
 
 struct cisco_packet {
 	__be32 type;		/* code */
@@ -40,7 +40,7 @@ struct cisco_packet {
 	__be32 par2;
 	__be16 rel;		/* reliability */
 	__be32 time;
-}__packed;
+} __packed;
 #define	CISCO_PACKET_LEN	18
 #define	CISCO_BIG_PACKET_LEN	20
 
@@ -341,7 +341,8 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
 		    new_settings.timeout < 2)
 			return -EINVAL;
 
-		result = hdlc->attach(dev, ENCODING_NRZ,PARITY_CRC16_PR1_CCITT);
+		result = hdlc->attach(dev, ENCODING_NRZ,
+				      PARITY_CRC16_PR1_CCITT);
 		if (result)
 			return result;
 

From 05ff5525aa824c433fbd47e790f181055f0127ae Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 2 Jun 2021 19:01:14 +0800
Subject: [PATCH 0920/3292] net: hdlc_cisco: remove unnecessary out of memory
 message

This patch removes unnecessary out of memory message,
to fix the following checkpatch.pl warning:
"WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_cisco.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 3f6a51af02a2..0d29a2ceecdb 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -92,10 +92,9 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 
 	skb = dev_alloc_skb(sizeof(struct hdlc_header) +
 			    sizeof(struct cisco_packet));
-	if (!skb) {
-		netdev_warn(dev, "Memory squeeze on %s()\n", __func__);
+	if (!skb)
 		return;
-	}
+
 	skb_reserve(skb, 4);
 	cisco_hard_header(skb, dev, CISCO_KEEPALIVE, NULL, NULL, 0);
 	data = (struct cisco_packet *)(skb->data + 4);

From 4e38d514788c218306e35a63d147b721132a5466 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 2 Jun 2021 19:01:15 +0800
Subject: [PATCH 0921/3292] net: hdlc_cisco: add blank line after declaration

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_cisco.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 0d29a2ceecdb..d33771104326 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -214,6 +214,7 @@ static int cisco_rx(struct sk_buff *skb)
 				st->last_poll = jiffies;
 				if (!st->up) {
 					u32 sec, min, hrs, days;
+
 					sec = ntohl(cisco_data->time) / 1000;
 					min = sec / 60; sec -= min * 60;
 					hrs = min / 60; min -= hrs * 60;

From 4a20f8ecbf61baae90948480ff521e1f5909c8b8 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 2 Jun 2021 19:01:16 +0800
Subject: [PATCH 0922/3292] net: hdlc_cisco: remove redundant space

Space prohibited between function name and open parenthesis '('.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_cisco.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index d33771104326..349ca18088e8 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -174,7 +174,7 @@ static int cisco_rx(struct sk_buff *skb)
 		cisco_data = (struct cisco_packet *)(skb->data + sizeof
 						    (struct hdlc_header));
 
-		switch (ntohl (cisco_data->type)) {
+		switch (ntohl(cisco_data->type)) {
 		case CISCO_ADDR_REQ: /* Stolen from syncppp.c :-) */
 			rcu_read_lock();
 			in_dev = __in_dev_get_rcu(dev);

From b81017aeee4eb9159296cdb68889932649317b9b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:11 +0300
Subject: [PATCH 0923/3292] net: pcs: xpcs: delete shim definition for
 mdio_xpcs_get_ops()

CONFIG_STMMAC_ETH selects CONFIG_PCS_XPCS, so there should be no
situation where the shim should be needed.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 5938ced805f4..c4d0a2c469c7 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -36,13 +36,6 @@ struct mdio_xpcs_ops {
 			  int enable);
 };
 
-#if IS_ENABLED(CONFIG_PCS_XPCS)
 struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
-#else
-static inline struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
-{
-	return NULL;
-}
-#endif
 
 #endif /* __LINUX_PCS_XPCS_H */

From a54a8b71f6faca9a794c285e2ecde74a0dbec65a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:12 +0300
Subject: [PATCH 0924/3292] net: pcs: xpcs: there is only one PHY ID

The xpcs driver has an apparently inadequate structure for the actual
hardware it drives.

These defines and the xpcs_probe() function would suggest that there is
one PHY ID per supported PHY interface type, and the driver simply
validates whether the mode it should operate in (the argument of
xpcs_probe) matches what the hardware is capable of:

	#define SYNOPSYS_XPCS_USXGMII_ID	0x7996ced0
	#define SYNOPSYS_XPCS_10GKR_ID		0x7996ced0
	#define SYNOPSYS_XPCS_XLGMII_ID		0x7996ced0
	#define SYNOPSYS_XPCS_SGMII_ID		0x7996ced0
	#define SYNOPSYS_XPCS_MASK		0xffffffff

but that is not the case, because upon closer inspection, all the above
4 PHY ID definitions are in fact equal.

So it is the same XPCS that is compatible with all 4 sets of PHY
interface types.

This change introduces an array of struct xpcs_compat which is populated
by the single struct xpcs_id instance. It also eliminates the bogus
defines for multiple Synopsys XPCS PHY IDs and replaces them with a
single XPCS_ID, which better reflects the way in which the hardware
operates.

Because we are touching this area of the code anyway, the new array of
struct xpcs_compat, as well as the array of xpcs_id, have been moved
towards the end of the file, since they are variable declarations not
definitions. If whichever of struct xpcs_compat or struct xpcs_id need
to gain a function pointer member in the future, it is easier to
reference functions (no forward declarations needed) if we have the
const variable declarations at the end of the file.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c | 133 ++++++++++++++++++++++---------------
 1 file changed, 80 insertions(+), 53 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index aa985a5aae8d..9f2da9e873c4 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -12,10 +12,7 @@
 #include <linux/phylink.h>
 #include <linux/workqueue.h>
 
-#define SYNOPSYS_XPCS_USXGMII_ID	0x7996ced0
-#define SYNOPSYS_XPCS_10GKR_ID		0x7996ced0
-#define SYNOPSYS_XPCS_XLGMII_ID		0x7996ced0
-#define SYNOPSYS_XPCS_SGMII_ID		0x7996ced0
+#define SYNOPSYS_XPCS_ID		0x7996ced0
 #define SYNOPSYS_XPCS_MASK		0xffffffff
 
 /* Vendor regs access */
@@ -163,56 +160,39 @@ static const int xpcs_sgmii_features[] = {
 
 static const phy_interface_t xpcs_usxgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_USXGMII,
-	PHY_INTERFACE_MODE_MAX,
 };
 
 static const phy_interface_t xpcs_10gkr_interfaces[] = {
 	PHY_INTERFACE_MODE_10GKR,
-	PHY_INTERFACE_MODE_MAX,
 };
 
 static const phy_interface_t xpcs_xlgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_XLGMII,
-	PHY_INTERFACE_MODE_MAX,
 };
 
 static const phy_interface_t xpcs_sgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_SGMII,
-	PHY_INTERFACE_MODE_MAX,
 };
 
-static struct xpcs_id {
-	u32 id;
-	u32 mask;
+enum {
+	DW_XPCS_USXGMII,
+	DW_XPCS_10GKR,
+	DW_XPCS_XLGMII,
+	DW_XPCS_SGMII,
+	DW_XPCS_INTERFACE_MAX,
+};
+
+struct xpcs_compat {
 	const int *supported;
 	const phy_interface_t *interface;
+	int num_interfaces;
 	int an_mode;
-} xpcs_id_list[] = {
-	{
-		.id = SYNOPSYS_XPCS_USXGMII_ID,
-		.mask = SYNOPSYS_XPCS_MASK,
-		.supported = xpcs_usxgmii_features,
-		.interface = xpcs_usxgmii_interfaces,
-		.an_mode = DW_AN_C73,
-	}, {
-		.id = SYNOPSYS_XPCS_10GKR_ID,
-		.mask = SYNOPSYS_XPCS_MASK,
-		.supported = xpcs_10gkr_features,
-		.interface = xpcs_10gkr_interfaces,
-		.an_mode = DW_AN_C73,
-	}, {
-		.id = SYNOPSYS_XPCS_XLGMII_ID,
-		.mask = SYNOPSYS_XPCS_MASK,
-		.supported = xpcs_xlgmii_features,
-		.interface = xpcs_xlgmii_interfaces,
-		.an_mode = DW_AN_C73,
-	}, {
-		.id = SYNOPSYS_XPCS_SGMII_ID,
-		.mask = SYNOPSYS_XPCS_MASK,
-		.supported = xpcs_sgmii_features,
-		.interface = xpcs_sgmii_interfaces,
-		.an_mode = DW_AN_C37_SGMII,
-	},
+};
+
+struct xpcs_id {
+	u32 id;
+	u32 mask;
+	const struct xpcs_compat *compat;
 };
 
 static int xpcs_read(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
@@ -911,35 +891,82 @@ static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
 }
 
 static bool xpcs_check_features(struct mdio_xpcs_args *xpcs,
-				struct xpcs_id *match,
+				const struct xpcs_id *match,
 				phy_interface_t interface)
 {
-	int i;
+	int i, j;
 
-	for (i = 0; match->interface[i] != PHY_INTERFACE_MODE_MAX; i++) {
-		if (match->interface[i] == interface)
-			break;
+	for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
+		const struct xpcs_compat *compat = &match->compat[i];
+		bool supports_interface = false;
+
+		for (j = 0; j < compat->num_interfaces; j++) {
+			if (compat->interface[j] == interface) {
+				supports_interface = true;
+				break;
+			}
+		}
+
+		if (!supports_interface)
+			continue;
+
+		/* Populate the supported link modes for this
+		 * PHY interface type
+		 */
+		for (j = 0; compat->supported[j] != __ETHTOOL_LINK_MODE_MASK_NBITS; j++)
+			set_bit(compat->supported[j], xpcs->supported);
+
+		xpcs->an_mode = compat->an_mode;
+
+		return true;
 	}
 
-	if (match->interface[i] == PHY_INTERFACE_MODE_MAX)
-		return false;
-
-	for (i = 0; match->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
-		set_bit(match->supported[i], xpcs->supported);
-
-	xpcs->an_mode = match->an_mode;
-
-	return true;
+	return false;
 }
 
+static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
+	[DW_XPCS_USXGMII] = {
+		.supported = xpcs_usxgmii_features,
+		.interface = xpcs_usxgmii_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_usxgmii_interfaces),
+		.an_mode = DW_AN_C73,
+	},
+	[DW_XPCS_10GKR] = {
+		.supported = xpcs_10gkr_features,
+		.interface = xpcs_10gkr_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_10gkr_interfaces),
+		.an_mode = DW_AN_C73,
+	},
+	[DW_XPCS_XLGMII] = {
+		.supported = xpcs_xlgmii_features,
+		.interface = xpcs_xlgmii_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_xlgmii_interfaces),
+		.an_mode = DW_AN_C73,
+	},
+	[DW_XPCS_SGMII] = {
+		.supported = xpcs_sgmii_features,
+		.interface = xpcs_sgmii_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_sgmii_interfaces),
+		.an_mode = DW_AN_C37_SGMII,
+	},
+};
+
+static const struct xpcs_id xpcs_id_list[] = {
+	{
+		.id = SYNOPSYS_XPCS_ID,
+		.mask = SYNOPSYS_XPCS_MASK,
+		.compat = synopsys_xpcs_compat,
+	},
+};
+
 static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 {
+	const struct xpcs_id *match = NULL;
 	u32 xpcs_id = xpcs_get_id(xpcs);
-	struct xpcs_id *match = NULL;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(xpcs_id_list); i++) {
-		struct xpcs_id *entry = &xpcs_id_list[i];
+		const struct xpcs_id *entry = &xpcs_id_list[i];
 
 		if ((xpcs_id & entry->mask) == entry->id) {
 			match = entry;

From 9900074ecccec472c9d89929c3d37c235f45d33a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:13 +0300
Subject: [PATCH 0925/3292] net: pcs: xpcs: make the checks related to the PHY
 interface mode stateless

The operating mode of the driver is currently to populate its
struct mdio_xpcs_args::supported and struct mdio_xpcs_args::an_mode
statically in xpcs_probe(), based on the passed phy_interface_t,
and work with those.

However this is not the operation that phylink expects from a PCS
driver, because the port might be attached to an SFP cage that triggers
changes of the phy_interface_t dynamically as one SFP module is
unpluggged and another is plugged.

To migrate towards that model, the struct mdio_xpcs_args should not
cache anything related to the phy_interface_t, but just look up the
statically defined, const struct xpcs_compat structure corresponding to
the detected PCS OUI/model number.

So we delete the "supported" and "an_mode" members of struct
mdio_xpcs_args, and add the "id" structure there (since the ID is not
expected to change at runtime).

Since xpcs->supported is used deep in the code in _xpcs_config_aneg_c73(),
we need to modify some function headers to pass the xpcs_compat from all
callers. In turn, the xpcs_compat is always supplied externally to the
xpcs module:
- Most of the time by phylink
- In xpcs_probe() it is needed because xpcs_soft_reset() writes to
  MDIO_MMD_PCS or to MDIO_MMD_VEND2 depending on whether an_mode is clause
  37 or clause 73. In order to not introduce functional changes related
  to when the soft reset is issued, we continue to require the initial
  phy_interface_t argument to be passed to xpcs_probe() so we can pass
  this on to xpcs_soft_reset().
- stmmac_open() wants to know whether to call stmmac_init_phy() or not,
  and for that it looks inside xpcs->an_mode, because the clause 73
  (backplane) AN modes supposedly do not have a PHY. Because we moved
  an_mode outside of struct mdio_xpcs_args, this is now no longer
  directly possible, so we introduce a helper function xpcs_get_an_mode()
  which protects the data encapsulation of the xpcs module and requires
  a phy_interface_t to be passed as argument. This function can look up
  the appropriate compat based on the phy_interface_t.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |   4 +-
 drivers/net/pcs/pcs-xpcs.c                    | 175 +++++++++++-------
 include/linux/pcs/pcs-xpcs.h                  |   6 +-
 3 files changed, 120 insertions(+), 65 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 13720bf6f6ff..c96a89fa4e3c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3638,6 +3638,7 @@ static int stmmac_request_irq(struct net_device *dev)
 int stmmac_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	int mode = priv->plat->phy_interface;
 	int bfsize = 0;
 	u32 chan;
 	int ret;
@@ -3650,7 +3651,8 @@ int stmmac_open(struct net_device *dev)
 
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI &&
-	    priv->hw->xpcs_args.an_mode != DW_AN_C73) {
+	    (!priv->hw->xpcs ||
+	     xpcs_get_an_mode(&priv->hw->xpcs_args, mode) != DW_AN_C73)) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
 			netdev_err(priv->dev,
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 9f2da9e873c4..610073cb55d0 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -195,6 +195,49 @@ struct xpcs_id {
 	const struct xpcs_compat *compat;
 };
 
+static const struct xpcs_compat *xpcs_find_compat(const struct xpcs_id *id,
+						  phy_interface_t interface)
+{
+	int i, j;
+
+	for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
+		const struct xpcs_compat *compat = &id->compat[i];
+
+		for (j = 0; j < compat->num_interfaces; j++)
+			if (compat->interface[j] == interface)
+				return compat;
+	}
+
+	return NULL;
+}
+
+int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
+{
+	const struct xpcs_compat *compat;
+
+	compat = xpcs_find_compat(xpcs->id, interface);
+	if (!compat)
+		return -ENODEV;
+
+	return compat->an_mode;
+}
+EXPORT_SYMBOL_GPL(xpcs_get_an_mode);
+
+static bool __xpcs_linkmode_supported(const struct xpcs_compat *compat,
+				      enum ethtool_link_mode_bit_indices linkmode)
+{
+	int i;
+
+	for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
+		if (compat->supported[i] == linkmode)
+			return true;
+
+	return false;
+}
+
+#define xpcs_linkmode_supported(compat, mode) \
+	__xpcs_linkmode_supported(compat, ETHTOOL_LINK_MODE_ ## mode ## _BIT)
+
 static int xpcs_read(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
 {
 	u32 reg_addr = MII_ADDR_C45 | dev << 16 | reg;
@@ -246,11 +289,12 @@ static int xpcs_poll_reset(struct mdio_xpcs_args *xpcs, int dev)
 	return (ret & MDIO_CTRL1_RESET) ? -ETIMEDOUT : 0;
 }
 
-static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs)
+static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs,
+			   const struct xpcs_compat *compat)
 {
 	int ret, dev;
 
-	switch (xpcs->an_mode) {
+	switch (compat->an_mode) {
 	case DW_AN_C73:
 		dev = MDIO_MMD_PCS;
 		break;
@@ -419,7 +463,8 @@ static int xpcs_config_usxgmii(struct mdio_xpcs_args *xpcs, int speed)
 	return xpcs_write_vpcs(xpcs, MDIO_CTRL1, ret | DW_USXGMII_RST);
 }
 
-static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
+static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs,
+				 const struct xpcs_compat *compat)
 {
 	int ret, adv;
 
@@ -431,7 +476,7 @@ static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 
 	/* SR_AN_ADV3 */
 	adv = 0;
-	if (phylink_test(xpcs->supported, 2500baseX_Full))
+	if (xpcs_linkmode_supported(compat, 2500baseX_Full))
 		adv |= DW_C73_2500KX;
 
 	/* TODO: 5000baseKR */
@@ -442,11 +487,11 @@ static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 
 	/* SR_AN_ADV2 */
 	adv = 0;
-	if (phylink_test(xpcs->supported, 1000baseKX_Full))
+	if (xpcs_linkmode_supported(compat, 1000baseKX_Full))
 		adv |= DW_C73_1000KX;
-	if (phylink_test(xpcs->supported, 10000baseKX4_Full))
+	if (xpcs_linkmode_supported(compat, 10000baseKX4_Full))
 		adv |= DW_C73_10000KX4;
-	if (phylink_test(xpcs->supported, 10000baseKR_Full))
+	if (xpcs_linkmode_supported(compat, 10000baseKR_Full))
 		adv |= DW_C73_10000KR;
 
 	ret = xpcs_write(xpcs, MDIO_MMD_AN, DW_SR_AN_ADV2, adv);
@@ -455,19 +500,20 @@ static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 
 	/* SR_AN_ADV1 */
 	adv = DW_C73_AN_ADV_SF;
-	if (phylink_test(xpcs->supported, Pause))
+	if (xpcs_linkmode_supported(compat, Pause))
 		adv |= DW_C73_PAUSE;
-	if (phylink_test(xpcs->supported, Asym_Pause))
+	if (xpcs_linkmode_supported(compat, Asym_Pause))
 		adv |= DW_C73_ASYM_PAUSE;
 
 	return xpcs_write(xpcs, MDIO_MMD_AN, DW_SR_AN_ADV1, adv);
 }
 
-static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
+static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs,
+				const struct xpcs_compat *compat)
 {
 	int ret;
 
-	ret = _xpcs_config_aneg_c73(xpcs);
+	ret = _xpcs_config_aneg_c73(xpcs, compat);
 	if (ret < 0)
 		return ret;
 
@@ -481,7 +527,8 @@ static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 }
 
 static int xpcs_aneg_done_c73(struct mdio_xpcs_args *xpcs,
-			      struct phylink_link_state *state)
+			      struct phylink_link_state *state,
+			      const struct xpcs_compat *compat)
 {
 	int ret;
 
@@ -496,7 +543,7 @@ static int xpcs_aneg_done_c73(struct mdio_xpcs_args *xpcs,
 
 		/* Check if Aneg outcome is valid */
 		if (!(ret & DW_C73_AN_ADV_SF)) {
-			xpcs_config_aneg_c73(xpcs);
+			xpcs_config_aneg_c73(xpcs, compat);
 			return 0;
 		}
 
@@ -642,8 +689,31 @@ static int xpcs_validate(struct mdio_xpcs_args *xpcs,
 			 unsigned long *supported,
 			 struct phylink_link_state *state)
 {
-	linkmode_and(supported, supported, xpcs->supported);
-	linkmode_and(state->advertising, state->advertising, xpcs->supported);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
+	const struct xpcs_compat *compat;
+	int i;
+
+	/* phylink expects us to report all supported modes with
+	 * PHY_INTERFACE_MODE_NA, just don't limit the supported and
+	 * advertising masks and exit.
+	 */
+	if (state->interface == PHY_INTERFACE_MODE_NA)
+		return 0;
+
+	bitmap_zero(xpcs_supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	compat = xpcs_find_compat(xpcs->id, state->interface);
+
+	/* Populate the supported link modes for this
+	 * PHY interface type
+	 */
+	if (compat)
+		for (i = 0; compat->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
+			set_bit(compat->supported[i], xpcs_supported);
+
+	linkmode_and(supported, supported, xpcs_supported);
+	linkmode_and(state->advertising, state->advertising, xpcs_supported);
+
 	return 0;
 }
 
@@ -724,12 +794,17 @@ static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
 static int xpcs_config(struct mdio_xpcs_args *xpcs,
 		       const struct phylink_link_state *state)
 {
+	const struct xpcs_compat *compat;
 	int ret;
 
-	switch (xpcs->an_mode) {
+	compat = xpcs_find_compat(xpcs->id, state->interface);
+	if (!compat)
+		return -ENODEV;
+
+	switch (compat->an_mode) {
 	case DW_AN_C73:
 		if (state->an_enabled) {
-			ret = xpcs_config_aneg_c73(xpcs);
+			ret = xpcs_config_aneg_c73(xpcs, compat);
 			if (ret)
 				return ret;
 		}
@@ -747,7 +822,8 @@ static int xpcs_config(struct mdio_xpcs_args *xpcs,
 }
 
 static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
-			      struct phylink_link_state *state)
+			      struct phylink_link_state *state,
+			      const struct xpcs_compat *compat)
 {
 	int ret;
 
@@ -757,7 +833,7 @@ static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
 	/* ... and then we check the faults. */
 	ret = xpcs_read_fault_c73(xpcs, state);
 	if (ret) {
-		ret = xpcs_soft_reset(xpcs);
+		ret = xpcs_soft_reset(xpcs, compat);
 		if (ret)
 			return ret;
 
@@ -766,7 +842,7 @@ static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
 		return xpcs_config(xpcs, state);
 	}
 
-	if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state)) {
+	if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state, compat)) {
 		state->an_complete = true;
 		xpcs_read_lpa_c73(xpcs, state);
 		xpcs_resolve_lpa_c73(xpcs, state);
@@ -823,11 +899,16 @@ static int xpcs_get_state_c37_sgmii(struct mdio_xpcs_args *xpcs,
 static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
 			  struct phylink_link_state *state)
 {
+	const struct xpcs_compat *compat;
 	int ret;
 
-	switch (xpcs->an_mode) {
+	compat = xpcs_find_compat(xpcs->id, state->interface);
+	if (!compat)
+		return -ENODEV;
+
+	switch (compat->an_mode) {
 	case DW_AN_C73:
-		ret = xpcs_get_state_c73(xpcs, state);
+		ret = xpcs_get_state_c73(xpcs, state, compat);
 		if (ret)
 			return ret;
 		break;
@@ -890,40 +971,6 @@ static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
 	return 0xffffffff;
 }
 
-static bool xpcs_check_features(struct mdio_xpcs_args *xpcs,
-				const struct xpcs_id *match,
-				phy_interface_t interface)
-{
-	int i, j;
-
-	for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) {
-		const struct xpcs_compat *compat = &match->compat[i];
-		bool supports_interface = false;
-
-		for (j = 0; j < compat->num_interfaces; j++) {
-			if (compat->interface[j] == interface) {
-				supports_interface = true;
-				break;
-			}
-		}
-
-		if (!supports_interface)
-			continue;
-
-		/* Populate the supported link modes for this
-		 * PHY interface type
-		 */
-		for (j = 0; compat->supported[j] != __ETHTOOL_LINK_MODE_MASK_NBITS; j++)
-			set_bit(compat->supported[j], xpcs->supported);
-
-		xpcs->an_mode = compat->an_mode;
-
-		return true;
-	}
-
-	return false;
-}
-
 static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
 	[DW_XPCS_USXGMII] = {
 		.supported = xpcs_usxgmii_features,
@@ -961,19 +1008,23 @@ static const struct xpcs_id xpcs_id_list[] = {
 
 static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 {
-	const struct xpcs_id *match = NULL;
 	u32 xpcs_id = xpcs_get_id(xpcs);
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(xpcs_id_list); i++) {
 		const struct xpcs_id *entry = &xpcs_id_list[i];
+		const struct xpcs_compat *compat;
 
-		if ((xpcs_id & entry->mask) == entry->id) {
-			match = entry;
+		if ((xpcs_id & entry->mask) != entry->id)
+			continue;
 
-			if (xpcs_check_features(xpcs, match, interface))
-				return xpcs_soft_reset(xpcs);
-		}
+		xpcs->id = entry;
+
+		compat = xpcs_find_compat(entry, interface);
+		if (!compat)
+			return -ENODEV;
+
+		return xpcs_soft_reset(xpcs, compat);
 	}
 
 	return -ENODEV;
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index c4d0a2c469c7..c2ec440d2c5d 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -14,11 +14,12 @@
 #define DW_AN_C73			1
 #define DW_AN_C37_SGMII			2
 
+struct xpcs_id;
+
 struct mdio_xpcs_args {
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
 	struct mii_bus *bus;
+	const struct xpcs_id *id;
 	int addr;
-	int an_mode;
 };
 
 struct mdio_xpcs_ops {
@@ -36,6 +37,7 @@ struct mdio_xpcs_ops {
 			  int enable);
 };
 
+int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
 struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
 
 #endif /* __LINUX_PCS_XPCS_H */

From a1a753ed1d4ae46c1c1874fb1af899f6579a7547 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:14 +0300
Subject: [PATCH 0926/3292] net: pcs: xpcs: export xpcs_validate

Calling a function pointer with a single implementation through
struct mdio_xpcs_ops is clunky, and the stmmac_do_callback system forces
this to return int, even though it always returns zero.

Simply remove the "validate" function pointer from struct mdio_xpcs_ops
and replace it with an exported xpcs_validate symbol which is called
directly by stmmac.

priv->hw->xpcs is of the type "const struct mdio_xpcs_ops *" and is used
as a placeholder/synonym for priv->plat->mdio_bus_data->has_xpcs. It is
done that way because the mdio_bus_data pointer might or might not be
populated in all stmmac instantiations.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/hwif.h        |  2 --
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  3 ++-
 drivers/net/pcs/pcs-xpcs.c                        | 11 ++++-------
 include/linux/pcs/pcs-xpcs.h                      |  5 ++---
 4 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index dbafedb24290..441985f9cf49 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -614,8 +614,6 @@ struct stmmac_mmc_ops {
 	stmmac_do_void_callback(__priv, mmc, read, __args)
 
 /* XPCS callbacks */
-#define stmmac_xpcs_validate(__priv, __args...) \
-	stmmac_do_callback(__priv, xpcs, validate, __args)
 #define stmmac_xpcs_config(__priv, __args...) \
 	stmmac_do_callback(__priv, xpcs, config, __args)
 #define stmmac_xpcs_get_state(__priv, __args...) \
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c96a89fa4e3c..b7e6ab05ddd9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -996,7 +996,8 @@ static void stmmac_validate(struct phylink_config *config,
 	linkmode_andnot(state->advertising, state->advertising, mask);
 
 	/* If PCS is supported, check which modes it supports. */
-	stmmac_xpcs_validate(priv, &priv->hw->xpcs_args, supported, state);
+	if (priv->hw->xpcs)
+		xpcs_validate(&priv->hw->xpcs_args, supported, state);
 }
 
 static void stmmac_mac_pcs_get_state(struct phylink_config *config,
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 610073cb55d0..2f7791bcf07b 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -685,9 +685,8 @@ static void xpcs_resolve_pma(struct mdio_xpcs_args *xpcs,
 	}
 }
 
-static int xpcs_validate(struct mdio_xpcs_args *xpcs,
-			 unsigned long *supported,
-			 struct phylink_link_state *state)
+void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
+		   struct phylink_link_state *state)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
 	const struct xpcs_compat *compat;
@@ -698,7 +697,7 @@ static int xpcs_validate(struct mdio_xpcs_args *xpcs,
 	 * advertising masks and exit.
 	 */
 	if (state->interface == PHY_INTERFACE_MODE_NA)
-		return 0;
+		return;
 
 	bitmap_zero(xpcs_supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 
@@ -713,9 +712,8 @@ static int xpcs_validate(struct mdio_xpcs_args *xpcs,
 
 	linkmode_and(supported, supported, xpcs_supported);
 	linkmode_and(state->advertising, state->advertising, xpcs_supported);
-
-	return 0;
 }
+EXPORT_SYMBOL_GPL(xpcs_validate);
 
 static int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
 			   int enable)
@@ -1031,7 +1029,6 @@ static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 }
 
 static struct mdio_xpcs_ops xpcs_ops = {
-	.validate = xpcs_validate,
 	.config = xpcs_config,
 	.get_state = xpcs_get_state,
 	.link_up = xpcs_link_up,
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index c2ec440d2c5d..5ec9aaca01fe 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -23,9 +23,6 @@ struct mdio_xpcs_args {
 };
 
 struct mdio_xpcs_ops {
-	int (*validate)(struct mdio_xpcs_args *xpcs,
-			unsigned long *supported,
-			struct phylink_link_state *state);
 	int (*config)(struct mdio_xpcs_args *xpcs,
 		      const struct phylink_link_state *state);
 	int (*get_state)(struct mdio_xpcs_args *xpcs,
@@ -39,5 +36,7 @@ struct mdio_xpcs_ops {
 
 int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
 struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
+void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
+		   struct phylink_link_state *state);
 
 #endif /* __LINUX_PCS_XPCS_H */

From 14b517cb62d6efc8866f176c922de03dfe1564f3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:15 +0300
Subject: [PATCH 0927/3292] net: pcs: xpcs: export xpcs_config_eee

There is no good reason why we need to go through:

stmmac_xpcs_config_eee
-> stmmac_do_callback
   -> mdio_xpcs_ops->config_eee
      -> xpcs_config_eee

when we can simply call xpcs_config_eee.

priv->hw->xpcs is of the type "const struct mdio_xpcs_ops *" and is used
as a placeholder/synonym for priv->plat->mdio_bus_data->has_xpcs. It is
done that way because the mdio_bus_data pointer might or might not be
populated in all stmmac instantiations.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/hwif.h           |  2 --
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 12 +++++++-----
 drivers/net/pcs/pcs-xpcs.c                           |  6 +++---
 include/linux/pcs/pcs-xpcs.h                         |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 441985f9cf49..c10d11dbde61 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -622,8 +622,6 @@ struct stmmac_mmc_ops {
 	stmmac_do_callback(__priv, xpcs, link_up, __args)
 #define stmmac_xpcs_probe(__priv, __args...) \
 	stmmac_do_callback(__priv, xpcs, probe, __args)
-#define stmmac_xpcs_config_eee(__priv, __args...) \
-	stmmac_do_callback(__priv, xpcs, config_eee, __args)
 
 struct stmmac_regs_off {
 	u32 ptp_off;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 1f6d749fd9a3..ba7d0f40723a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -720,11 +720,13 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 		netdev_warn(priv->dev,
 			    "Setting EEE tx-lpi is not supported\n");
 
-	ret = stmmac_xpcs_config_eee(priv, &priv->hw->xpcs_args,
-				     priv->plat->mult_fact_100ns,
-				     edata->eee_enabled);
-	if (ret)
-		return ret;
+	if (priv->hw->xpcs) {
+		ret = xpcs_config_eee(&priv->hw->xpcs_args,
+				      priv->plat->mult_fact_100ns,
+				      edata->eee_enabled);
+		if (ret)
+			return ret;
+	}
 
 	if (!edata->eee_enabled)
 		stmmac_disable_eee_mode(priv);
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 2f7791bcf07b..2f2ffab855aa 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -715,8 +715,8 @@ void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
 }
 EXPORT_SYMBOL_GPL(xpcs_validate);
 
-static int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
-			   int enable)
+int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
+		    int enable)
 {
 	int ret;
 
@@ -747,6 +747,7 @@ static int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
 	ret |= DW_VR_MII_EEE_TRN_LPI;
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_EEE_MCTRL1, ret);
 }
+EXPORT_SYMBOL_GPL(xpcs_config_eee);
 
 static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
 {
@@ -1033,7 +1034,6 @@ static struct mdio_xpcs_ops xpcs_ops = {
 	.get_state = xpcs_get_state,
 	.link_up = xpcs_link_up,
 	.probe = xpcs_probe,
-	.config_eee = xpcs_config_eee,
 };
 
 struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 5ec9aaca01fe..ae74a336dcb9 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -30,13 +30,13 @@ struct mdio_xpcs_ops {
 	int (*link_up)(struct mdio_xpcs_args *xpcs, int speed,
 		       phy_interface_t interface);
 	int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
-	int (*config_eee)(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
-			  int enable);
 };
 
 int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
 struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
 void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
 		   struct phylink_link_state *state);
+int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
+		    int enable);
 
 #endif /* __LINUX_PCS_XPCS_H */

From 8e2bb9569942f9cb2ef816dbf66fbf3e8d722720 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:16 +0300
Subject: [PATCH 0928/3292] net: pcs: xpcs: export xpcs_probe

Similar to the other recently functions, it is not necessary for
xpcs_probe to be a function pointer, so export it so that it can be
called directly.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/hwif.h        | 2 --
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +-
 drivers/net/pcs/pcs-xpcs.c                        | 4 ++--
 include/linux/pcs/pcs-xpcs.h                      | 2 +-
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index c10d11dbde61..3dc291ff9f32 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -620,8 +620,6 @@ struct stmmac_mmc_ops {
 	stmmac_do_callback(__priv, xpcs, get_state, __args)
 #define stmmac_xpcs_link_up(__priv, __args...) \
 	stmmac_do_callback(__priv, xpcs, link_up, __args)
-#define stmmac_xpcs_probe(__priv, __args...) \
-	stmmac_do_callback(__priv, xpcs, probe, __args)
 
 struct stmmac_regs_off {
 	u32 ptp_off;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index e293bf1ce9f3..56deb92a8430 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -521,7 +521,7 @@ int stmmac_mdio_register(struct net_device *ndev)
 		for (addr = 0; addr < max_addr; addr++) {
 			xpcs->addr = addr;
 
-			ret = stmmac_xpcs_probe(priv, xpcs, mode);
+			ret = xpcs_probe(xpcs, mode);
 			if (!ret) {
 				found = 1;
 				break;
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 2f2ffab855aa..7f51eb4bbaa4 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -1005,7 +1005,7 @@ static const struct xpcs_id xpcs_id_list[] = {
 	},
 };
 
-static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
+int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 {
 	u32 xpcs_id = xpcs_get_id(xpcs);
 	int i;
@@ -1028,12 +1028,12 @@ static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 
 	return -ENODEV;
 }
+EXPORT_SYMBOL_GPL(xpcs_probe);
 
 static struct mdio_xpcs_ops xpcs_ops = {
 	.config = xpcs_config,
 	.get_state = xpcs_get_state,
 	.link_up = xpcs_link_up,
-	.probe = xpcs_probe,
 };
 
 struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index ae74a336dcb9..1d8581b74d81 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -29,7 +29,6 @@ struct mdio_xpcs_ops {
 			 struct phylink_link_state *state);
 	int (*link_up)(struct mdio_xpcs_args *xpcs, int speed,
 		       phy_interface_t interface);
-	int (*probe)(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
 };
 
 int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
@@ -38,5 +37,6 @@ void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
 		   struct phylink_link_state *state);
 int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
 		    int enable);
+int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
 
 #endif /* __LINUX_PCS_XPCS_H */

From 679e283ec7d6cfdf997cbb911d6857c115029007 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:17 +0300
Subject: [PATCH 0929/3292] net: pcs: xpcs: use mdiobus_c45_addr in
 xpcs_{read,write}

Use the dedicated helper for abstracting away how the clause 45 address
is packed in reg_addr.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 7f51eb4bbaa4..afabb9209c52 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -240,14 +240,14 @@ static bool __xpcs_linkmode_supported(const struct xpcs_compat *compat,
 
 static int xpcs_read(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
 {
-	u32 reg_addr = MII_ADDR_C45 | dev << 16 | reg;
+	u32 reg_addr = mdiobus_c45_addr(dev, reg);
 
 	return mdiobus_read(xpcs->bus, xpcs->addr, reg_addr);
 }
 
 static int xpcs_write(struct mdio_xpcs_args *xpcs, int dev, u32 reg, u16 val)
 {
-	u32 reg_addr = MII_ADDR_C45 | dev << 16 | reg;
+	u32 reg_addr = mdiobus_c45_addr(dev, reg);
 
 	return mdiobus_write(xpcs->bus, xpcs->addr, reg_addr, val);
 }

From 2cac15dae2f6e2f86bef1acc2a7f78fc97a0a060 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:18 +0300
Subject: [PATCH 0930/3292] net: pcs: xpcs: convert to mdio_device

Unify the 2 existing PCS drivers (lynx and xpcs) by doing a similar
thing on probe, which is to have a *_create function that takes a
struct mdio_device * given by the caller, and builds a private PCS
structure around that.

This changes stmmac to hold only a pointer to the xpcs, as opposed to
the full structure. This will be used in the next patch when struct
mdio_xpcs_ops is removed. Currently a pointer to struct mdio_xpcs_ops
is used as a shorthand to determine whether the port has an XPCS or not.
We can do the same now with the mdio_xpcs_args pointer.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h  |  2 +-
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  |  2 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++--
 .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 35 +++++++-----
 drivers/net/pcs/pcs-xpcs.c                    | 53 +++++++++++++++----
 include/linux/pcs/pcs-xpcs.h                  |  7 +--
 6 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 619e3c0760d6..4bcd1d340766 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -504,7 +504,7 @@ struct mac_device_info {
 	const struct stmmac_tc_ops *tc;
 	const struct stmmac_mmc_ops *mmc;
 	const struct mdio_xpcs_ops *xpcs;
-	struct mdio_xpcs_args xpcs_args;
+	struct mdio_xpcs_args *xpcs_args;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index ba7d0f40723a..050576ee704d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -721,7 +721,7 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 			    "Setting EEE tx-lpi is not supported\n");
 
 	if (priv->hw->xpcs) {
-		ret = xpcs_config_eee(&priv->hw->xpcs_args,
+		ret = xpcs_config_eee(priv->hw->xpcs_args,
 				      priv->plat->mult_fact_100ns,
 				      edata->eee_enabled);
 		if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b7e6ab05ddd9..e9e5bcb79d48 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -997,7 +997,7 @@ static void stmmac_validate(struct phylink_config *config,
 
 	/* If PCS is supported, check which modes it supports. */
 	if (priv->hw->xpcs)
-		xpcs_validate(&priv->hw->xpcs_args, supported, state);
+		xpcs_validate(priv->hw->xpcs_args, supported, state);
 }
 
 static void stmmac_mac_pcs_get_state(struct phylink_config *config,
@@ -1006,7 +1006,7 @@ static void stmmac_mac_pcs_get_state(struct phylink_config *config,
 	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
 
 	state->link = 0;
-	stmmac_xpcs_get_state(priv, &priv->hw->xpcs_args, state);
+	stmmac_xpcs_get_state(priv, priv->hw->xpcs_args, state);
 }
 
 static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
@@ -1014,7 +1014,7 @@ static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
 {
 	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
 
-	stmmac_xpcs_config(priv, &priv->hw->xpcs_args, state);
+	stmmac_xpcs_config(priv, priv->hw->xpcs_args, state);
 }
 
 static void stmmac_mac_an_restart(struct phylink_config *config)
@@ -1061,7 +1061,7 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
 	u32 ctrl;
 
-	stmmac_xpcs_link_up(priv, &priv->hw->xpcs_args, speed, interface);
+	stmmac_xpcs_link_up(priv, priv->hw->xpcs_args, speed, interface);
 
 	ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
 	ctrl &= ~priv->hw->link.speed_mask;
@@ -3653,7 +3653,7 @@ int stmmac_open(struct net_device *dev)
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI &&
 	    (!priv->hw->xpcs ||
-	     xpcs_get_an_mode(&priv->hw->xpcs_args, mode) != DW_AN_C73)) {
+	     xpcs_get_an_mode(priv->hw->xpcs_args, mode) != DW_AN_C73)) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
 			netdev_err(priv->dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 56deb92a8430..9b4bf78d2eaa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -510,25 +510,27 @@ int stmmac_mdio_register(struct net_device *ndev)
 	}
 
 	/* Try to probe the XPCS by scanning all addresses. */
-	if (priv->hw->xpcs) {
-		struct mdio_xpcs_args *xpcs = &priv->hw->xpcs_args;
-		int ret, mode = priv->plat->phy_interface;
-		max_addr = PHY_MAX_ADDR;
+	if (mdio_bus_data->has_xpcs) {
+		int mode = priv->plat->phy_interface;
+		struct mdio_device *mdiodev;
+		struct mdio_xpcs_args *xpcs;
 
-		xpcs->bus = new_bus;
+		for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+			mdiodev = mdio_device_create(new_bus, addr);
+			if (IS_ERR(mdiodev))
+				continue;
 
-		found = 0;
-		for (addr = 0; addr < max_addr; addr++) {
-			xpcs->addr = addr;
-
-			ret = xpcs_probe(xpcs, mode);
-			if (!ret) {
-				found = 1;
-				break;
+			xpcs = xpcs_create(mdiodev, mode);
+			if (IS_ERR_OR_NULL(xpcs)) {
+				mdio_device_free(mdiodev);
+				continue;
 			}
+
+			priv->hw->xpcs_args = xpcs;
+			break;
 		}
 
-		if (!found && !mdio_node) {
+		if (!priv->hw->xpcs_args) {
 			dev_warn(dev, "No XPCS found\n");
 			err = -ENODEV;
 			goto no_xpcs_found;
@@ -560,6 +562,11 @@ int stmmac_mdio_unregister(struct net_device *ndev)
 	if (!priv->mii)
 		return 0;
 
+	if (priv->hw->xpcs) {
+		mdio_device_free(priv->hw->xpcs_args->mdiodev);
+		xpcs_destroy(priv->hw->xpcs_args);
+	}
+
 	mdiobus_unregister(priv->mii);
 	priv->mii->priv = NULL;
 	mdiobus_free(priv->mii);
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index afabb9209c52..e17e72175ebb 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -241,15 +241,19 @@ static bool __xpcs_linkmode_supported(const struct xpcs_compat *compat,
 static int xpcs_read(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
 {
 	u32 reg_addr = mdiobus_c45_addr(dev, reg);
+	struct mii_bus *bus = xpcs->mdiodev->bus;
+	int addr = xpcs->mdiodev->addr;
 
-	return mdiobus_read(xpcs->bus, xpcs->addr, reg_addr);
+	return mdiobus_read(bus, addr, reg_addr);
 }
 
 static int xpcs_write(struct mdio_xpcs_args *xpcs, int dev, u32 reg, u16 val)
 {
 	u32 reg_addr = mdiobus_c45_addr(dev, reg);
+	struct mii_bus *bus = xpcs->mdiodev->bus;
+	int addr = xpcs->mdiodev->addr;
 
-	return mdiobus_write(xpcs->bus, xpcs->addr, reg_addr, val);
+	return mdiobus_write(bus, addr, reg_addr, val);
 }
 
 static int xpcs_read_vendor(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
@@ -315,7 +319,7 @@ static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs,
 #define xpcs_warn(__xpcs, __state, __args...) \
 ({ \
 	if ((__state)->link) \
-		dev_warn(&(__xpcs)->bus->dev, ##__args); \
+		dev_warn(&(__xpcs)->mdiodev->dev, ##__args); \
 })
 
 static int xpcs_read_fault_c73(struct mdio_xpcs_args *xpcs,
@@ -1005,10 +1009,20 @@ static const struct xpcs_id xpcs_id_list[] = {
 	},
 };
 
-int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
+struct mdio_xpcs_args *xpcs_create(struct mdio_device *mdiodev,
+				   phy_interface_t interface)
 {
-	u32 xpcs_id = xpcs_get_id(xpcs);
-	int i;
+	struct mdio_xpcs_args *xpcs;
+	u32 xpcs_id;
+	int i, ret;
+
+	xpcs = kzalloc(sizeof(*xpcs), GFP_KERNEL);
+	if (!xpcs)
+		return NULL;
+
+	xpcs->mdiodev = mdiodev;
+
+	xpcs_id = xpcs_get_id(xpcs);
 
 	for (i = 0; i < ARRAY_SIZE(xpcs_id_list); i++) {
 		const struct xpcs_id *entry = &xpcs_id_list[i];
@@ -1020,15 +1034,32 @@ int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 		xpcs->id = entry;
 
 		compat = xpcs_find_compat(entry, interface);
-		if (!compat)
-			return -ENODEV;
+		if (!compat) {
+			ret = -ENODEV;
+			goto out;
+		}
 
-		return xpcs_soft_reset(xpcs, compat);
+		ret = xpcs_soft_reset(xpcs, compat);
+		if (ret)
+			goto out;
+
+		return xpcs;
 	}
 
-	return -ENODEV;
+	ret = -ENODEV;
+
+out:
+	kfree(xpcs);
+
+	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(xpcs_probe);
+EXPORT_SYMBOL_GPL(xpcs_create);
+
+void xpcs_destroy(struct mdio_xpcs_args *xpcs)
+{
+	kfree(xpcs);
+}
+EXPORT_SYMBOL_GPL(xpcs_destroy);
 
 static struct mdio_xpcs_ops xpcs_ops = {
 	.config = xpcs_config,
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 1d8581b74d81..57a199393d63 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -17,9 +17,8 @@
 struct xpcs_id;
 
 struct mdio_xpcs_args {
-	struct mii_bus *bus;
+	struct mdio_device *mdiodev;
 	const struct xpcs_id *id;
-	int addr;
 };
 
 struct mdio_xpcs_ops {
@@ -37,6 +36,8 @@ void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
 		   struct phylink_link_state *state);
 int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
 		    int enable);
-int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
+struct mdio_xpcs_args *xpcs_create(struct mdio_device *mdiodev,
+				   phy_interface_t interface);
+void xpcs_destroy(struct mdio_xpcs_args *xpcs);
 
 #endif /* __LINUX_PCS_XPCS_H */

From 11059740e616f4d83d8d9e3f8a63dafefdc2ae5d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 2 Jun 2021 19:20:19 +0300
Subject: [PATCH 0931/3292] net: pcs: xpcs: convert to phylink_pcs_ops

Since all the remaining members of struct mdio_xpcs_ops have direct
equivalents in struct phylink_pcs_ops, it is about time we remove it
altogether.

Since the phylink ops return void, we need to remove the error
propagation from the various xpcs methods and simply print an error
message where appropriate.

Since xpcs_get_state_c73() detects link faults and attempts to reset the
link on its own by calling xpcs_config(), but xpcs_config() now has a
lot of phylink arguments which are not needed and cannot be simply
fabricated by anybody else except phylink, the actual implementation has
been moved into a smaller xpcs_do_config().

The const struct mdio_xpcs_ops *priv->hw->xpcs has been removed, so we
need to look at the struct mdio_xpcs_args pointer now as an indication
whether the port has an XPCS or not.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h  |  3 +-
 drivers/net/ethernet/stmicro/stmmac/hwif.h    |  8 --
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  |  2 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 36 +++----
 .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 16 +--
 drivers/net/pcs/pcs-xpcs.c                    | 99 +++++++++++--------
 include/linux/pcs/pcs-xpcs.h                  | 11 +--
 7 files changed, 78 insertions(+), 97 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 4bcd1d340766..8a83f9e1e95b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -503,8 +503,7 @@ struct mac_device_info {
 	const struct stmmac_hwtimestamp *ptp;
 	const struct stmmac_tc_ops *tc;
 	const struct stmmac_mmc_ops *mmc;
-	const struct mdio_xpcs_ops *xpcs;
-	struct mdio_xpcs_args *xpcs_args;
+	struct mdio_xpcs_args *xpcs;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 3dc291ff9f32..6dc1c98ebec8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -613,14 +613,6 @@ struct stmmac_mmc_ops {
 #define stmmac_mmc_read(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mmc, read, __args)
 
-/* XPCS callbacks */
-#define stmmac_xpcs_config(__priv, __args...) \
-	stmmac_do_callback(__priv, xpcs, config, __args)
-#define stmmac_xpcs_get_state(__priv, __args...) \
-	stmmac_do_callback(__priv, xpcs, get_state, __args)
-#define stmmac_xpcs_link_up(__priv, __args...) \
-	stmmac_do_callback(__priv, xpcs, link_up, __args)
-
 struct stmmac_regs_off {
 	u32 ptp_off;
 	u32 mmc_off;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 050576ee704d..d0ce608b81c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -721,7 +721,7 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 			    "Setting EEE tx-lpi is not supported\n");
 
 	if (priv->hw->xpcs) {
-		ret = xpcs_config_eee(priv->hw->xpcs_args,
+		ret = xpcs_config_eee(priv->hw->xpcs,
 				      priv->plat->mult_fact_100ns,
 				      edata->eee_enabled);
 		if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e9e5bcb79d48..6d41dd6f9f7a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -997,29 +997,13 @@ static void stmmac_validate(struct phylink_config *config,
 
 	/* If PCS is supported, check which modes it supports. */
 	if (priv->hw->xpcs)
-		xpcs_validate(priv->hw->xpcs_args, supported, state);
-}
-
-static void stmmac_mac_pcs_get_state(struct phylink_config *config,
-				     struct phylink_link_state *state)
-{
-	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
-
-	state->link = 0;
-	stmmac_xpcs_get_state(priv, priv->hw->xpcs_args, state);
+		xpcs_validate(priv->hw->xpcs, supported, state);
 }
 
 static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
 			      const struct phylink_link_state *state)
 {
-	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
-
-	stmmac_xpcs_config(priv, priv->hw->xpcs_args, state);
-}
-
-static void stmmac_mac_an_restart(struct phylink_config *config)
-{
-	/* Not Supported */
+	/* Nothing to do, xpcs_config() handles everything */
 }
 
 static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up)
@@ -1061,8 +1045,6 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 	struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
 	u32 ctrl;
 
-	stmmac_xpcs_link_up(priv, priv->hw->xpcs_args, speed, interface);
-
 	ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
 	ctrl &= ~priv->hw->link.speed_mask;
 
@@ -1155,9 +1137,7 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 
 static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
 	.validate = stmmac_validate,
-	.mac_pcs_get_state = stmmac_mac_pcs_get_state,
 	.mac_config = stmmac_mac_config,
-	.mac_an_restart = stmmac_mac_an_restart,
 	.mac_link_down = stmmac_mac_link_down,
 	.mac_link_up = stmmac_mac_link_up,
 };
@@ -1234,6 +1214,7 @@ static int stmmac_init_phy(struct net_device *dev)
 
 static int stmmac_phy_setup(struct stmmac_priv *priv)
 {
+	struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
 	struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
 	int mode = priv->plat->phy_interface;
 	struct phylink *phylink;
@@ -1241,8 +1222,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 	priv->phylink_config.dev = &priv->dev->dev;
 	priv->phylink_config.type = PHYLINK_NETDEV;
 	priv->phylink_config.pcs_poll = true;
-	priv->phylink_config.ovr_an_inband =
-		priv->plat->mdio_bus_data->xpcs_an_inband;
+	priv->phylink_config.ovr_an_inband = mdio_bus_data->xpcs_an_inband;
 
 	if (!fwnode)
 		fwnode = dev_fwnode(priv->device);
@@ -1252,6 +1232,12 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 	if (IS_ERR(phylink))
 		return PTR_ERR(phylink);
 
+	if (mdio_bus_data->has_xpcs) {
+		struct mdio_xpcs_args *xpcs = priv->hw->xpcs;
+
+		phylink_set_pcs(phylink, &xpcs->pcs);
+	}
+
 	priv->phylink = phylink;
 	return 0;
 }
@@ -3653,7 +3639,7 @@ int stmmac_open(struct net_device *dev)
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI &&
 	    (!priv->hw->xpcs ||
-	     xpcs_get_an_mode(priv->hw->xpcs_args, mode) != DW_AN_C73)) {
+	     xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73)) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
 			netdev_err(priv->dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 9b4bf78d2eaa..6312a152c8ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -444,14 +444,6 @@ int stmmac_mdio_register(struct net_device *ndev)
 		max_addr = PHY_MAX_ADDR;
 	}
 
-	if (mdio_bus_data->has_xpcs) {
-		priv->hw->xpcs = mdio_xpcs_get_ops();
-		if (!priv->hw->xpcs) {
-			err = -ENODEV;
-			goto bus_register_fail;
-		}
-	}
-
 	if (mdio_bus_data->needs_reset)
 		new_bus->reset = &stmmac_mdio_reset;
 
@@ -526,11 +518,11 @@ int stmmac_mdio_register(struct net_device *ndev)
 				continue;
 			}
 
-			priv->hw->xpcs_args = xpcs;
+			priv->hw->xpcs = xpcs;
 			break;
 		}
 
-		if (!priv->hw->xpcs_args) {
+		if (!priv->hw->xpcs) {
 			dev_warn(dev, "No XPCS found\n");
 			err = -ENODEV;
 			goto no_xpcs_found;
@@ -563,8 +555,8 @@ int stmmac_mdio_unregister(struct net_device *ndev)
 		return 0;
 
 	if (priv->hw->xpcs) {
-		mdio_device_free(priv->hw->xpcs_args->mdiodev);
-		xpcs_destroy(priv->hw->xpcs_args);
+		mdio_device_free(priv->hw->xpcs->mdiodev);
+		xpcs_destroy(priv->hw->xpcs);
 	}
 
 	mdiobus_unregister(priv->mii);
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index e17e72175ebb..34164437c135 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -100,6 +100,9 @@
 /* VR MII EEE Control 1 defines */
 #define DW_VR_MII_EEE_TRN_LPI		BIT(0)	/* Transparent Mode Enable */
 
+#define phylink_pcs_to_xpcs(pl_pcs) \
+	container_of((pl_pcs), struct mdio_xpcs_args, pcs)
+
 static const int xpcs_usxgmii_features[] = {
 	ETHTOOL_LINK_MODE_Pause_BIT,
 	ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -413,7 +416,7 @@ static int xpcs_get_max_usxgmii_speed(const unsigned long *supported)
 	return max;
 }
 
-static int xpcs_config_usxgmii(struct mdio_xpcs_args *xpcs, int speed)
+static void xpcs_config_usxgmii(struct mdio_xpcs_args *xpcs, int speed)
 {
 	int ret, speed_sel;
 
@@ -438,33 +441,40 @@ static int xpcs_config_usxgmii(struct mdio_xpcs_args *xpcs, int speed)
 		break;
 	default:
 		/* Nothing to do here */
-		return -EINVAL;
+		return;
 	}
 
 	ret = xpcs_read_vpcs(xpcs, MDIO_CTRL1);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	ret = xpcs_write_vpcs(xpcs, MDIO_CTRL1, ret | DW_USXGMII_EN);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	ret &= ~DW_USXGMII_SS_MASK;
 	ret |= speed_sel | DW_USXGMII_FULL;
 
 	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, ret);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	ret = xpcs_read_vpcs(xpcs, MDIO_CTRL1);
 	if (ret < 0)
-		return ret;
+		goto out;
 
-	return xpcs_write_vpcs(xpcs, MDIO_CTRL1, ret | DW_USXGMII_RST);
+	ret = xpcs_write_vpcs(xpcs, MDIO_CTRL1, ret | DW_USXGMII_RST);
+	if (ret < 0)
+		goto out;
+
+	return;
+
+out:
+	pr_err("%s: XPCS access returned %pe\n", __func__, ERR_PTR(ret));
 }
 
 static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs,
@@ -794,19 +804,19 @@ static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
 }
 
-static int xpcs_config(struct mdio_xpcs_args *xpcs,
-		       const struct phylink_link_state *state)
+static int xpcs_do_config(struct mdio_xpcs_args *xpcs,
+			  phy_interface_t interface, unsigned int mode)
 {
 	const struct xpcs_compat *compat;
 	int ret;
 
-	compat = xpcs_find_compat(xpcs->id, state->interface);
+	compat = xpcs_find_compat(xpcs->id, interface);
 	if (!compat)
 		return -ENODEV;
 
 	switch (compat->an_mode) {
 	case DW_AN_C73:
-		if (state->an_enabled) {
+		if (phylink_autoneg_inband(mode)) {
 			ret = xpcs_config_aneg_c73(xpcs, compat);
 			if (ret)
 				return ret;
@@ -824,6 +834,16 @@ static int xpcs_config(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
+static int xpcs_config(struct phylink_pcs *pcs, unsigned int mode,
+		       phy_interface_t interface,
+		       const unsigned long *advertising,
+		       bool permit_pause_to_mac)
+{
+	struct mdio_xpcs_args *xpcs = phylink_pcs_to_xpcs(pcs);
+
+	return xpcs_do_config(xpcs, interface, mode);
+}
+
 static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
 			      struct phylink_link_state *state,
 			      const struct xpcs_compat *compat)
@@ -842,7 +862,7 @@ static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
 
 		state->link = 0;
 
-		return xpcs_config(xpcs, state);
+		return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND);
 	}
 
 	if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state, compat)) {
@@ -899,41 +919,45 @@ static int xpcs_get_state_c37_sgmii(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
-			  struct phylink_link_state *state)
+static void xpcs_get_state(struct phylink_pcs *pcs,
+			   struct phylink_link_state *state)
 {
+	struct mdio_xpcs_args *xpcs = phylink_pcs_to_xpcs(pcs);
 	const struct xpcs_compat *compat;
 	int ret;
 
 	compat = xpcs_find_compat(xpcs->id, state->interface);
 	if (!compat)
-		return -ENODEV;
+		return;
 
 	switch (compat->an_mode) {
 	case DW_AN_C73:
 		ret = xpcs_get_state_c73(xpcs, state, compat);
-		if (ret)
-			return ret;
+		if (ret) {
+			pr_err("xpcs_get_state_c73 returned %pe\n",
+			       ERR_PTR(ret));
+			return;
+		}
 		break;
 	case DW_AN_C37_SGMII:
 		ret = xpcs_get_state_c37_sgmii(xpcs, state);
-		if (ret)
-			return ret;
+		if (ret) {
+			pr_err("xpcs_get_state_c37_sgmii returned %pe\n",
+			       ERR_PTR(ret));
+		}
 		break;
 	default:
-		return -1;
+		return;
 	}
-
-	return 0;
 }
 
-static int xpcs_link_up(struct mdio_xpcs_args *xpcs, int speed,
-			phy_interface_t interface)
+static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+			 phy_interface_t interface, int speed, int duplex)
 {
+	struct mdio_xpcs_args *xpcs = phylink_pcs_to_xpcs(pcs);
+
 	if (interface == PHY_INTERFACE_MODE_USXGMII)
 		return xpcs_config_usxgmii(xpcs, speed);
-
-	return 0;
 }
 
 static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
@@ -1009,6 +1033,12 @@ static const struct xpcs_id xpcs_id_list[] = {
 	},
 };
 
+static const struct phylink_pcs_ops xpcs_phylink_ops = {
+	.pcs_config = xpcs_config,
+	.pcs_get_state = xpcs_get_state,
+	.pcs_link_up = xpcs_link_up,
+};
+
 struct mdio_xpcs_args *xpcs_create(struct mdio_device *mdiodev,
 				   phy_interface_t interface)
 {
@@ -1039,6 +1069,9 @@ struct mdio_xpcs_args *xpcs_create(struct mdio_device *mdiodev,
 			goto out;
 		}
 
+		xpcs->pcs.ops = &xpcs_phylink_ops;
+		xpcs->pcs.poll = true;
+
 		ret = xpcs_soft_reset(xpcs, compat);
 		if (ret)
 			goto out;
@@ -1061,16 +1094,4 @@ void xpcs_destroy(struct mdio_xpcs_args *xpcs)
 }
 EXPORT_SYMBOL_GPL(xpcs_destroy);
 
-static struct mdio_xpcs_ops xpcs_ops = {
-	.config = xpcs_config,
-	.get_state = xpcs_get_state,
-	.link_up = xpcs_link_up,
-};
-
-struct mdio_xpcs_ops *mdio_xpcs_get_ops(void)
-{
-	return &xpcs_ops;
-}
-EXPORT_SYMBOL_GPL(mdio_xpcs_get_ops);
-
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 57a199393d63..0860a5b59f10 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -19,19 +19,10 @@ struct xpcs_id;
 struct mdio_xpcs_args {
 	struct mdio_device *mdiodev;
 	const struct xpcs_id *id;
-};
-
-struct mdio_xpcs_ops {
-	int (*config)(struct mdio_xpcs_args *xpcs,
-		      const struct phylink_link_state *state);
-	int (*get_state)(struct mdio_xpcs_args *xpcs,
-			 struct phylink_link_state *state);
-	int (*link_up)(struct mdio_xpcs_args *xpcs, int speed,
-		       phy_interface_t interface);
+	struct phylink_pcs pcs;
 };
 
 int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
-struct mdio_xpcs_ops *mdio_xpcs_get_ops(void);
 void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
 		   struct phylink_link_state *state);
 int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,

From 7cc2623d1c84935f06fbdf727f41d70f4c779ef6 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 19 May 2021 10:52:26 -0700
Subject: [PATCH 0932/3292] block: Update blk_update_request() documentation

Although the original intent was to use blk_update_request() in stacking
block drivers only, it is used much more widely today. Reflect this in the
documentation block above this function. See also:
* commit 32fab448e5e8 ("block: add request update interface").
* commit 2e60e02297cf ("block: clean up request completion API").
* commit ed6565e73424 ("block: handle partial completions for special
  payload requests").

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210519175226.8853-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3515a66022d7..514838ccab2d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1384,26 +1384,22 @@ void blk_steal_bios(struct bio_list *list, struct request *rq)
 EXPORT_SYMBOL_GPL(blk_steal_bios);
 
 /**
- * blk_update_request - Special helper function for request stacking drivers
+ * blk_update_request - Complete multiple bytes without completing the request
  * @req:      the request being processed
  * @error:    block status code
- * @nr_bytes: number of bytes to complete @req
+ * @nr_bytes: number of bytes to complete for @req
  *
  * Description:
  *     Ends I/O on a number of bytes attached to @req, but doesn't complete
  *     the request structure even if @req doesn't have leftover.
  *     If @req has leftover, sets it up for the next range of segments.
  *
- *     This special helper function is only for request stacking drivers
- *     (e.g. request-based dm) so that they can handle partial completion.
- *     Actual device drivers should use blk_mq_end_request instead.
- *
  *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
  *     %false return from this function.
  *
  * Note:
- *	The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
- *	blk_rq_bytes() and in blk_update_request().
+ *	The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function
+ *      except in the consistency check at the end of this function.
  *
  * Return:
  *     %false - this request doesn't have any more data

From f8e0a68babae3f612799178c718ec5358eac41cf Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 2 Jun 2021 10:56:25 +0200
Subject: [PATCH 0933/3292] net/smc: avoid possible duplicate dmb
 unregistration

smc_lgr_cleanup() calls smcd_unregister_all_dmbs() as part of the link
group termination process. This is a leftover from the times when
smc_lgr_cleanup() scheduled a worker to actually free the link group.
Nowadays smc_lgr_cleanup() directly calls smc_lgr_free() without any
delay so an earlier dmb unregistration is no longer needed.
So remove smcd_unregister_all_dmbs() and the call to it.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 0df85a12651e..317bc2c90fab 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1235,20 +1235,6 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 	kfree(lgr);
 }
 
-static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
-{
-	int i;
-
-	for (i = 0; i < SMC_RMBE_SIZES; i++) {
-		struct smc_buf_desc *buf_desc;
-
-		list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
-			buf_desc->len += sizeof(struct smcd_cdc_msg);
-			smc_ism_unregister_dmb(lgr->smcd, buf_desc);
-		}
-	}
-}
-
 static void smc_sk_wake_ups(struct smc_sock *smc)
 {
 	smc->sk.sk_write_space(&smc->sk);
@@ -1285,7 +1271,6 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
 {
 	if (lgr->is_smcd) {
 		smc_ism_signal_shutdown(lgr);
-		smcd_unregister_all_dmbs(lgr);
 	} else {
 		u32 rsn = lgr->llc_termination_rsn;
 

From 5e4a43ceb22a6fd2d372fde923a6a95ef6728fd7 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 2 Jun 2021 10:56:26 +0200
Subject: [PATCH 0934/3292] net/smc: no need to flush smcd_dev's event_wq
 before destroying it

destroy_workqueue() already calls drain_workqueue(), which is a stronger
variant of flush_workqueue().

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ism.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 967712ba52a0..9cb2df289963 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -470,7 +470,6 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
 	mutex_unlock(&smcd_dev_list.mutex);
 	smcd->going_away = 1;
 	smc_smcd_terminate_all(smcd);
-	flush_workqueue(smcd->event_wq);
 	destroy_workqueue(smcd->event_wq);
 
 	device_del(&smcd->dev);

From a58224040f2df8381146e7cfba9d657d5683ded1 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 2 Jun 2021 13:20:10 +0200
Subject: [PATCH 0935/3292] nfc: mrvl: remove useless "continue" at end of loop

The "continue" statement at the end of a for loop does not have an
effect.  Entire loop contents can be slightly simplified to increase
code readability.  No functional change.

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/nfcmrvl/usb.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 50f06dd1ba25..559814869c73 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -308,13 +308,9 @@ static int nfcmrvl_probe(struct usb_interface *intf,
 		if (!drv_data->bulk_tx_ep &&
 		    usb_endpoint_is_bulk_out(ep_desc)) {
 			drv_data->bulk_tx_ep = ep_desc;
-			continue;
-		}
-
-		if (!drv_data->bulk_rx_ep &&
-		    usb_endpoint_is_bulk_in(ep_desc)) {
+		} else if (!drv_data->bulk_rx_ep &&
+			   usb_endpoint_is_bulk_in(ep_desc)) {
 			drv_data->bulk_rx_ep = ep_desc;
-			continue;
 		}
 	}
 

From 2c95e6c7e558f20d309c1385a8bf3ed9da48491e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 2 Jun 2021 13:20:11 +0200
Subject: [PATCH 0936/3292] nfc: mrvl: reduce the scope of local variables

In two places the 'ep_desc' and 'skb' local variables are used only
within if() or for() block, so they scope can be reduced which makes the
entire code slightly easier to follow.  No functional change.

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/nfcmrvl/usb.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 559814869c73..9d649b45300b 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -57,7 +57,6 @@ static int nfcmrvl_inc_tx(struct nfcmrvl_usb_drv_data *drv_data)
 static void nfcmrvl_bulk_complete(struct urb *urb)
 {
 	struct nfcmrvl_usb_drv_data *drv_data = urb->context;
-	struct sk_buff *skb;
 	int err;
 
 	dev_dbg(&drv_data->udev->dev, "urb %p status %d count %d\n",
@@ -67,6 +66,8 @@ static void nfcmrvl_bulk_complete(struct urb *urb)
 		return;
 
 	if (!urb->status) {
+		struct sk_buff *skb;
+
 		skb = nci_skb_alloc(drv_data->priv->ndev, urb->actual_length,
 				    GFP_ATOMIC);
 		if (!skb) {
@@ -285,7 +286,6 @@ static void nfcmrvl_waker(struct work_struct *work)
 static int nfcmrvl_probe(struct usb_interface *intf,
 			 const struct usb_device_id *id)
 {
-	struct usb_endpoint_descriptor *ep_desc;
 	struct nfcmrvl_usb_drv_data *drv_data;
 	struct nfcmrvl_private *priv;
 	int i;
@@ -303,6 +303,8 @@ static int nfcmrvl_probe(struct usb_interface *intf,
 		return -ENOMEM;
 
 	for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; i++) {
+		struct usb_endpoint_descriptor *ep_desc;
+
 		ep_desc = &intf->cur_altsetting->endpoint[i].desc;
 
 		if (!drv_data->bulk_tx_ep &&

From 1bd4f5716fc3bb4882033fbeeb97472503f1c7e2 Mon Sep 17 00:00:00 2001
From: Omkar Kulkarni <okulkarni@marvell.com>
Date: Wed, 2 Jun 2021 20:16:49 +0300
Subject: [PATCH 0937/3292] qed: Add TCP_ULP FW resource layout

Add TCP_ULP as a storage common TCP offload FW resource layout.
This will be used by the core driver (QED) for both the NVMeTCP and iSCSI.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c     | 18 ++++++++---------
 drivers/net/ethernet/qlogic/qed/qed_cxt.h     |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c     |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h     |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c   | 20 +++++++++----------
 drivers/net/ethernet/qlogic/qed/qed_ll2.c     |  8 ++++----
 drivers/net/ethernet/qlogic/qed/qed_ooo.c     |  2 +-
 .../net/ethernet/qlogic/qed/qed_sp_commands.c |  2 +-
 include/linux/qed/common_hsi.h                |  2 +-
 include/linux/qed/qed_ll2_if.h                |  2 +-
 10 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 0a22f8ce9a2c..fcabbaa518df 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -94,14 +94,14 @@ struct src_ent {
 
 static bool src_proto(enum protocol_type type)
 {
-	return type == PROTOCOLID_ISCSI ||
+	return type == PROTOCOLID_TCP_ULP ||
 	       type == PROTOCOLID_FCOE ||
 	       type == PROTOCOLID_IWARP;
 }
 
 static bool tm_cid_proto(enum protocol_type type)
 {
-	return type == PROTOCOLID_ISCSI ||
+	return type == PROTOCOLID_TCP_ULP ||
 	       type == PROTOCOLID_FCOE ||
 	       type == PROTOCOLID_ROCE ||
 	       type == PROTOCOLID_IWARP;
@@ -2090,13 +2090,13 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
 
 		if (p_params->num_cons && p_params->num_tasks) {
 			qed_cxt_set_proto_cid_count(p_hwfn,
-						    PROTOCOLID_ISCSI,
+						    PROTOCOLID_TCP_ULP,
 						    p_params->num_cons,
 						    0);
 
 			qed_cxt_set_proto_tid_count(p_hwfn,
-						    PROTOCOLID_ISCSI,
-						    QED_CXT_ISCSI_TID_SEG,
+						    PROTOCOLID_TCP_ULP,
+						    QED_CXT_TCP_ULP_TID_SEG,
 						    0,
 						    p_params->num_tasks,
 						    true);
@@ -2129,8 +2129,8 @@ int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
 		seg = QED_CXT_FCOE_TID_SEG;
 		break;
 	case QED_PCI_ISCSI:
-		proto = PROTOCOLID_ISCSI;
-		seg = QED_CXT_ISCSI_TID_SEG;
+		proto = PROTOCOLID_TCP_ULP;
+		seg = QED_CXT_TCP_ULP_TID_SEG;
 		break;
 	default:
 		return -EINVAL;
@@ -2455,8 +2455,8 @@ int qed_cxt_get_task_ctx(struct qed_hwfn *p_hwfn,
 		seg = QED_CXT_FCOE_TID_SEG;
 		break;
 	case QED_PCI_ISCSI:
-		proto = PROTOCOLID_ISCSI;
-		seg = QED_CXT_ISCSI_TID_SEG;
+		proto = PROTOCOLID_TCP_ULP;
+		seg = QED_CXT_TCP_ULP_TID_SEG;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index 056e79620a0e..8adb7ed0c12d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -50,7 +50,7 @@ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
 int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
 			     struct qed_tid_mem *p_info);
 
-#define QED_CXT_ISCSI_TID_SEG	PROTOCOLID_ISCSI
+#define QED_CXT_TCP_ULP_TID_SEG	PROTOCOLID_TCP_ULP
 #define QED_CXT_ROCE_TID_SEG	PROTOCOLID_ROCE
 #define QED_CXT_FCOE_TID_SEG	PROTOCOLID_FCOE
 enum qed_cxt_elem_type {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index d2f5855b2ea7..c231d0e56571 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2266,7 +2266,7 @@ int qed_resc_alloc(struct qed_dev *cdev)
 		} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
 			num_cons =
 			    qed_cxt_get_proto_cid_count(p_hwfn,
-							PROTOCOLID_ISCSI,
+							PROTOCOLID_TCP_ULP,
 							NULL);
 			n_eqes += 2 * num_cons;
 		}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 559df9f4d656..9dbeb2efdc51 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -1118,7 +1118,7 @@ struct outer_tag_config_struct {
 /* personality per PF */
 enum personality_type {
 	BAD_PERSONALITY_TYP,
-	PERSONALITY_ISCSI,
+	PERSONALITY_TCP_ULP,
 	PERSONALITY_FCOE,
 	PERSONALITY_RDMA_AND_ETH,
 	PERSONALITY_RDMA,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 448567a1f520..db926d8b3033 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -158,7 +158,7 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 ISCSI_RAMROD_CMD_ID_INIT_FUNC,
-				 PROTOCOLID_ISCSI, &init_data);
+				 PROTOCOLID_TCP_ULP, &init_data);
 	if (rc)
 		return rc;
 
@@ -250,7 +250,7 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
 	p_hwfn->p_iscsi_info->event_context = event_context;
 	p_hwfn->p_iscsi_info->event_cb = async_event_cb;
 
-	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_ISCSI,
+	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_TCP_ULP,
 				  qed_iscsi_async_event);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
@@ -286,7 +286,7 @@ static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN,
-				 PROTOCOLID_ISCSI, &init_data);
+				 PROTOCOLID_TCP_ULP, &init_data);
 	if (rc)
 		return rc;
 
@@ -465,7 +465,7 @@ static int qed_sp_iscsi_conn_update(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 ISCSI_RAMROD_CMD_ID_UPDATE_CONN,
-				 PROTOCOLID_ISCSI, &init_data);
+				 PROTOCOLID_TCP_ULP, &init_data);
 	if (rc)
 		return rc;
 
@@ -506,7 +506,7 @@ qed_sp_iscsi_mac_update(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 ISCSI_RAMROD_CMD_ID_MAC_UPDATE,
-				 PROTOCOLID_ISCSI, &init_data);
+				 PROTOCOLID_TCP_ULP, &init_data);
 	if (rc)
 		return rc;
 
@@ -548,7 +548,7 @@ static int qed_sp_iscsi_conn_terminate(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 ISCSI_RAMROD_CMD_ID_TERMINATION_CONN,
-				 PROTOCOLID_ISCSI, &init_data);
+				 PROTOCOLID_TCP_ULP, &init_data);
 	if (rc)
 		return rc;
 
@@ -582,7 +582,7 @@ static int qed_sp_iscsi_conn_clear_sq(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 ISCSI_RAMROD_CMD_ID_CLEAR_SQ,
-				 PROTOCOLID_ISCSI, &init_data);
+				 PROTOCOLID_TCP_ULP, &init_data);
 	if (rc)
 		return rc;
 
@@ -606,13 +606,13 @@ static int qed_sp_iscsi_func_stop(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 ISCSI_RAMROD_CMD_ID_DESTROY_FUNC,
-				 PROTOCOLID_ISCSI, &init_data);
+				 PROTOCOLID_TCP_ULP, &init_data);
 	if (rc)
 		return rc;
 
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
-	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_ISCSI);
+	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_TCP_ULP);
 	return rc;
 }
 
@@ -786,7 +786,7 @@ static int qed_iscsi_acquire_connection(struct qed_hwfn *p_hwfn,
 	u32 icid;
 
 	spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ISCSI, &icid);
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_TCP_ULP, &icid);
 	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
 	if (rc)
 		return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 49783f365079..286e53927866 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1037,8 +1037,8 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	case QED_LL2_TYPE_FCOE:
 		p_ramrod->conn_type = PROTOCOLID_FCOE;
 		break;
-	case QED_LL2_TYPE_ISCSI:
-		p_ramrod->conn_type = PROTOCOLID_ISCSI;
+	case QED_LL2_TYPE_TCP_ULP:
+		p_ramrod->conn_type = PROTOCOLID_TCP_ULP;
 		break;
 	case QED_LL2_TYPE_ROCE:
 		p_ramrod->conn_type = PROTOCOLID_ROCE;
@@ -1048,7 +1048,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 		break;
 	case QED_LL2_TYPE_OOO:
 		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
-			p_ramrod->conn_type = PROTOCOLID_ISCSI;
+			p_ramrod->conn_type = PROTOCOLID_TCP_ULP;
 		else
 			p_ramrod->conn_type = PROTOCOLID_IWARP;
 		break;
@@ -2442,7 +2442,7 @@ static int __qed_ll2_start(struct qed_hwfn *p_hwfn,
 		conn_type = QED_LL2_TYPE_FCOE;
 		break;
 	case QED_PCI_ISCSI:
-		conn_type = QED_LL2_TYPE_ISCSI;
+		conn_type = QED_LL2_TYPE_TCP_ULP;
 		break;
 	case QED_PCI_ETH_ROCE:
 		conn_type = QED_LL2_TYPE_ROCE;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 88353aa404dc..599da0d7366b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -83,7 +83,7 @@ int qed_ooo_alloc(struct qed_hwfn *p_hwfn)
 
 	switch (p_hwfn->hw_info.personality) {
 	case QED_PCI_ISCSI:
-		proto = PROTOCOLID_ISCSI;
+		proto = PROTOCOLID_TCP_ULP;
 		break;
 	case QED_PCI_ETH_RDMA:
 	case QED_PCI_ETH_IWARP:
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index aa71adcf31ee..ee7dc0a7da6c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -385,7 +385,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 		p_ramrod->personality = PERSONALITY_FCOE;
 		break;
 	case QED_PCI_ISCSI:
-		p_ramrod->personality = PERSONALITY_ISCSI;
+		p_ramrod->personality = PERSONALITY_TCP_ULP;
 		break;
 	case QED_PCI_ETH_ROCE:
 	case QED_PCI_ETH_IWARP:
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 977807e1be53..0a3807e927c5 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -702,7 +702,7 @@ enum mf_mode {
 
 /* Per-protocol connection types */
 enum protocol_type {
-	PROTOCOLID_ISCSI,
+	PROTOCOLID_TCP_ULP,
 	PROTOCOLID_FCOE,
 	PROTOCOLID_ROCE,
 	PROTOCOLID_CORE,
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index ea273ba1c991..ff808d248883 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -18,7 +18,7 @@
 
 enum qed_ll2_conn_type {
 	QED_LL2_TYPE_FCOE,
-	QED_LL2_TYPE_ISCSI,
+	QED_LL2_TYPE_TCP_ULP,
 	QED_LL2_TYPE_TEST,
 	QED_LL2_TYPE_OOO,
 	QED_LL2_TYPE_RESERVED2,

From 897e87a10c35fb37a20886af6f731748d92c1836 Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Wed, 2 Jun 2021 20:16:50 +0300
Subject: [PATCH 0938/3292] qed: Add NVMeTCP Offload PF Level FW and HW HSI

This patch introduces the NVMeTCP device and PF level HSI and HSI
functionality in order to initialize and interact with the HW device.
The patch also adds qed NVMeTCP personality.

This patch is based on the qede, qedr, qedi, qedf drivers HSI.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Dean Balandin <dbalandin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/Kconfig           |   3 +
 drivers/net/ethernet/qlogic/qed/Makefile      |   2 +
 drivers/net/ethernet/qlogic/qed/qed.h         |   6 +
 drivers/net/ethernet/qlogic/qed/qed_cxt.c     |  27 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c     |  48 +++-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h     |   4 +-
 drivers/net/ethernet/qlogic/qed/qed_ll2.c     |  32 ++-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c     |   3 +
 drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c |   3 +-
 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c | 266 ++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h |  51 ++++
 drivers/net/ethernet/qlogic/qed/qed_ooo.c     |   3 +-
 drivers/net/ethernet/qlogic/qed/qed_sp.h      |   2 +
 .../net/ethernet/qlogic/qed/qed_sp_commands.c |   1 +
 include/linux/qed/nvmetcp_common.h            |  54 ++++
 include/linux/qed/qed_if.h                    |  18 ++
 include/linux/qed/qed_nvmetcp_if.h            |  71 +++++
 17 files changed, 572 insertions(+), 22 deletions(-)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h
 create mode 100644 include/linux/qed/nvmetcp_common.h
 create mode 100644 include/linux/qed/qed_nvmetcp_if.h

diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 6b5ddb07ee83..98f430905ffa 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -110,6 +110,9 @@ config QED_RDMA
 config QED_ISCSI
 	bool
 
+config QED_NVMETCP
+	bool
+
 config QED_FCOE
 	bool
 
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 8251755ec18c..7cb0db67ba5b 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -28,6 +28,8 @@ qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
 qed-$(CONFIG_QED_OOO) += qed_ooo.o
 
+qed-$(CONFIG_QED_NVMETCP) += qed_nvmetcp.o
+
 qed-$(CONFIG_QED_RDMA) +=	\
 	qed_iwarp.o		\
 	qed_rdma.o		\
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index a20cb8a0c377..bc9bdb9d1bb9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -200,6 +200,7 @@ enum qed_pci_personality {
 	QED_PCI_ETH,
 	QED_PCI_FCOE,
 	QED_PCI_ISCSI,
+	QED_PCI_NVMETCP,
 	QED_PCI_ETH_ROCE,
 	QED_PCI_ETH_IWARP,
 	QED_PCI_ETH_RDMA,
@@ -239,6 +240,7 @@ enum QED_FEATURE {
 	QED_PF_L2_QUE,
 	QED_VF,
 	QED_RDMA_CNQ,
+	QED_NVMETCP_CQ,
 	QED_ISCSI_CQ,
 	QED_FCOE_CQ,
 	QED_VF_L2_QUE,
@@ -284,6 +286,8 @@ struct qed_hw_info {
 	((dev)->hw_info.personality == QED_PCI_FCOE)
 #define QED_IS_ISCSI_PERSONALITY(dev)					\
 	((dev)->hw_info.personality == QED_PCI_ISCSI)
+#define QED_IS_NVMETCP_PERSONALITY(dev)					\
+	((dev)->hw_info.personality == QED_PCI_NVMETCP)
 
 	/* Resource Allocation scheme results */
 	u32				resc_start[QED_MAX_RESC];
@@ -592,6 +596,7 @@ struct qed_hwfn {
 	struct qed_ooo_info		*p_ooo_info;
 	struct qed_rdma_info		*p_rdma_info;
 	struct qed_iscsi_info		*p_iscsi_info;
+	struct qed_nvmetcp_info		*p_nvmetcp_info;
 	struct qed_fcoe_info		*p_fcoe_info;
 	struct qed_pf_params		pf_params;
 
@@ -828,6 +833,7 @@ struct qed_dev {
 		struct qed_eth_cb_ops		*eth;
 		struct qed_fcoe_cb_ops		*fcoe;
 		struct qed_iscsi_cb_ops		*iscsi;
+		struct qed_nvmetcp_cb_ops	*nvmetcp;
 	} protocol_ops;
 	void				*ops_cookie;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index fcabbaa518df..5a0a3cbcc1c1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2072,7 +2072,6 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
 						    PROTOCOLID_FCOE,
 						    p_params->num_cons,
 						    0);
-
 			qed_cxt_set_proto_tid_count(p_hwfn, PROTOCOLID_FCOE,
 						    QED_CXT_FCOE_TID_SEG, 0,
 						    p_params->num_tasks, true);
@@ -2093,7 +2092,6 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
 						    PROTOCOLID_TCP_ULP,
 						    p_params->num_cons,
 						    0);
-
 			qed_cxt_set_proto_tid_count(p_hwfn,
 						    PROTOCOLID_TCP_ULP,
 						    QED_CXT_TCP_ULP_TID_SEG,
@@ -2106,6 +2104,29 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
 		}
 		break;
 	}
+	case QED_PCI_NVMETCP:
+	{
+		struct qed_nvmetcp_pf_params *p_params;
+
+		p_params = &p_hwfn->pf_params.nvmetcp_pf_params;
+
+		if (p_params->num_cons && p_params->num_tasks) {
+			qed_cxt_set_proto_cid_count(p_hwfn,
+						    PROTOCOLID_TCP_ULP,
+						    p_params->num_cons,
+						    0);
+			qed_cxt_set_proto_tid_count(p_hwfn,
+						    PROTOCOLID_TCP_ULP,
+						    QED_CXT_TCP_ULP_TID_SEG,
+						    0,
+						    p_params->num_tasks,
+						    true);
+		} else {
+			DP_INFO(p_hwfn->cdev,
+				"NvmeTCP personality used without setting params!\n");
+		}
+		break;
+	}
 	default:
 		return -EINVAL;
 	}
@@ -2129,6 +2150,7 @@ int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
 		seg = QED_CXT_FCOE_TID_SEG;
 		break;
 	case QED_PCI_ISCSI:
+	case QED_PCI_NVMETCP:
 		proto = PROTOCOLID_TCP_ULP;
 		seg = QED_CXT_TCP_ULP_TID_SEG;
 		break;
@@ -2455,6 +2477,7 @@ int qed_cxt_get_task_ctx(struct qed_hwfn *p_hwfn,
 		seg = QED_CXT_FCOE_TID_SEG;
 		break;
 	case QED_PCI_ISCSI:
+	case QED_PCI_NVMETCP:
 		proto = PROTOCOLID_TCP_ULP;
 		seg = QED_CXT_TCP_ULP_TID_SEG;
 		break;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index c231d0e56571..932b892f1ef1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -37,6 +37,7 @@
 #include "qed_sriov.h"
 #include "qed_vf.h"
 #include "qed_rdma.h"
+#include "qed_nvmetcp.h"
 
 static DEFINE_SPINLOCK(qm_lock);
 
@@ -667,7 +668,8 @@ qed_llh_set_engine_affin(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	}
 
 	/* Storage PF is bound to a single engine while L2 PF uses both */
-	if (QED_IS_FCOE_PERSONALITY(p_hwfn) || QED_IS_ISCSI_PERSONALITY(p_hwfn))
+	if (QED_IS_FCOE_PERSONALITY(p_hwfn) || QED_IS_ISCSI_PERSONALITY(p_hwfn) ||
+	    QED_IS_NVMETCP_PERSONALITY(p_hwfn))
 		eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
 	else			/* L2_PERSONALITY */
 		eng = QED_BOTH_ENG;
@@ -1164,6 +1166,9 @@ void qed_llh_remove_mac_filter(struct qed_dev *cdev,
 	if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
 		goto out;
 
+	if (QED_IS_NVMETCP_PERSONALITY(p_hwfn))
+		return;
+
 	ether_addr_copy(filter.mac.addr, mac_addr);
 	rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
 					  &ref_cnt);
@@ -1381,6 +1386,11 @@ void qed_resc_free(struct qed_dev *cdev)
 			qed_ooo_free(p_hwfn);
 		}
 
+		if (p_hwfn->hw_info.personality == QED_PCI_NVMETCP) {
+			qed_nvmetcp_free(p_hwfn);
+			qed_ooo_free(p_hwfn);
+		}
+
 		if (QED_IS_RDMA_PERSONALITY(p_hwfn) && rdma_info) {
 			qed_spq_unregister_async_cb(p_hwfn, rdma_info->proto);
 			qed_rdma_info_free(p_hwfn);
@@ -1423,6 +1433,7 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn)
 		flags |= PQ_FLAGS_OFLD;
 		break;
 	case QED_PCI_ISCSI:
+	case QED_PCI_NVMETCP:
 		flags |= PQ_FLAGS_ACK | PQ_FLAGS_OOO | PQ_FLAGS_OFLD;
 		break;
 	case QED_PCI_ETH_ROCE:
@@ -2263,7 +2274,8 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			 * at the same time
 			 */
 			n_eqes += num_cons + 2 * MAX_NUM_VFS_BB + n_srq;
-		} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+		} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI ||
+			   p_hwfn->hw_info.personality == QED_PCI_NVMETCP) {
 			num_cons =
 			    qed_cxt_get_proto_cid_count(p_hwfn,
 							PROTOCOLID_TCP_ULP,
@@ -2313,6 +2325,15 @@ int qed_resc_alloc(struct qed_dev *cdev)
 				goto alloc_err;
 		}
 
+		if (p_hwfn->hw_info.personality == QED_PCI_NVMETCP) {
+			rc = qed_nvmetcp_alloc(p_hwfn);
+			if (rc)
+				goto alloc_err;
+			rc = qed_ooo_alloc(p_hwfn);
+			if (rc)
+				goto alloc_err;
+		}
+
 		if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
 			rc = qed_rdma_info_alloc(p_hwfn);
 			if (rc)
@@ -2393,6 +2414,11 @@ void qed_resc_setup(struct qed_dev *cdev)
 			qed_iscsi_setup(p_hwfn);
 			qed_ooo_setup(p_hwfn);
 		}
+
+		if (p_hwfn->hw_info.personality == QED_PCI_NVMETCP) {
+			qed_nvmetcp_setup(p_hwfn);
+			qed_ooo_setup(p_hwfn);
+		}
 	}
 }
 
@@ -2854,7 +2880,8 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 
 	/* Protocol Configuration */
 	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_TCP_RT_OFFSET,
-		     (p_hwfn->hw_info.personality == QED_PCI_ISCSI) ? 1 : 0);
+		     ((p_hwfn->hw_info.personality == QED_PCI_ISCSI) ||
+			 (p_hwfn->hw_info.personality == QED_PCI_NVMETCP)) ? 1 : 0);
 	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_FCOE_RT_OFFSET,
 		     (p_hwfn->hw_info.personality == QED_PCI_FCOE) ? 1 : 0);
 	STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_ROCE_RT_OFFSET, 0);
@@ -3535,14 +3562,21 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 		feat_num[QED_ISCSI_CQ] = min_t(u32, sb_cnt.cnt,
 					       RESC_NUM(p_hwfn,
 							QED_CMDQS_CQS));
+
+	if (QED_IS_NVMETCP_PERSONALITY(p_hwfn))
+		feat_num[QED_NVMETCP_CQ] = min_t(u32, sb_cnt.cnt,
+						 RESC_NUM(p_hwfn,
+							  QED_CMDQS_CQS));
+
 	DP_VERBOSE(p_hwfn,
 		   NETIF_MSG_PROBE,
-		   "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d FCOE_CQ=%d ISCSI_CQ=%d #SBS=%d\n",
+		   "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d FCOE_CQ=%d ISCSI_CQ=%d NVMETCP_CQ=%d #SBS=%d\n",
 		   (int)FEAT_NUM(p_hwfn, QED_PF_L2_QUE),
 		   (int)FEAT_NUM(p_hwfn, QED_VF_L2_QUE),
 		   (int)FEAT_NUM(p_hwfn, QED_RDMA_CNQ),
 		   (int)FEAT_NUM(p_hwfn, QED_FCOE_CQ),
 		   (int)FEAT_NUM(p_hwfn, QED_ISCSI_CQ),
+		   (int)FEAT_NUM(p_hwfn, QED_NVMETCP_CQ),
 		   (int)sb_cnt.cnt);
 }
 
@@ -3734,7 +3768,8 @@ int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
 		break;
 	case QED_BDQ:
 		if (p_hwfn->hw_info.personality != QED_PCI_ISCSI &&
-		    p_hwfn->hw_info.personality != QED_PCI_FCOE)
+		    p_hwfn->hw_info.personality != QED_PCI_FCOE &&
+			p_hwfn->hw_info.personality != QED_PCI_NVMETCP)
 			*p_resc_num = 0;
 		else
 			*p_resc_num = 1;
@@ -3755,7 +3790,8 @@ int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
 			*p_resc_start = 0;
 		else if (p_hwfn->cdev->num_ports_in_engine == 4)
 			*p_resc_start = p_hwfn->port_id;
-		else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+		else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI ||
+			 p_hwfn->hw_info.personality == QED_PCI_NVMETCP)
 			*p_resc_start = p_hwfn->port_id;
 		else if (p_hwfn->hw_info.personality == QED_PCI_FCOE)
 			*p_resc_start = p_hwfn->port_id + 2;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 9dbeb2efdc51..fb1baa2da2d0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -20,6 +20,7 @@
 #include <linux/qed/fcoe_common.h>
 #include <linux/qed/eth_common.h>
 #include <linux/qed/iscsi_common.h>
+#include <linux/qed/nvmetcp_common.h>
 #include <linux/qed/iwarp_common.h>
 #include <linux/qed/rdma_common.h>
 #include <linux/qed/roce_common.h>
@@ -12147,7 +12148,8 @@ struct public_func {
 #define FUNC_MF_CFG_PROTOCOL_ISCSI              0x00000010
 #define FUNC_MF_CFG_PROTOCOL_FCOE               0x00000020
 #define FUNC_MF_CFG_PROTOCOL_ROCE               0x00000030
-#define FUNC_MF_CFG_PROTOCOL_MAX	0x00000030
+#define FUNC_MF_CFG_PROTOCOL_NVMETCP    0x00000040
+#define FUNC_MF_CFG_PROTOCOL_MAX	0x00000040
 
 #define FUNC_MF_CFG_MIN_BW_MASK		0x0000ff00
 #define FUNC_MF_CFG_MIN_BW_SHIFT	8
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 286e53927866..02a4610d9330 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -960,7 +960,8 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 
 	if (test_bit(QED_MF_LL2_NON_UNICAST, &p_hwfn->cdev->mf_bits) &&
 	    p_ramrod->main_func_queue && conn_type != QED_LL2_TYPE_ROCE &&
-	    conn_type != QED_LL2_TYPE_IWARP) {
+	    conn_type != QED_LL2_TYPE_IWARP &&
+		(!QED_IS_NVMETCP_PERSONALITY(p_hwfn))) {
 		p_ramrod->mf_si_bcast_accept_all = 1;
 		p_ramrod->mf_si_mcast_accept_all = 1;
 	} else {
@@ -1047,7 +1048,8 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 		p_ramrod->conn_type = PROTOCOLID_IWARP;
 		break;
 	case QED_LL2_TYPE_OOO:
-		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI ||
+		    p_hwfn->hw_info.personality == QED_PCI_NVMETCP)
 			p_ramrod->conn_type = PROTOCOLID_TCP_ULP;
 		else
 			p_ramrod->conn_type = PROTOCOLID_IWARP;
@@ -1634,7 +1636,8 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 	if (rc)
 		goto out;
 
-	if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
+	if (!QED_IS_RDMA_PERSONALITY(p_hwfn) &&
+	    !QED_IS_NVMETCP_PERSONALITY(p_hwfn))
 		qed_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1);
 
 	qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
@@ -2376,7 +2379,8 @@ out:
 static bool qed_ll2_is_storage_eng1(struct qed_dev *cdev)
 {
 	return (QED_IS_FCOE_PERSONALITY(QED_LEADING_HWFN(cdev)) ||
-		QED_IS_ISCSI_PERSONALITY(QED_LEADING_HWFN(cdev))) &&
+		QED_IS_ISCSI_PERSONALITY(QED_LEADING_HWFN(cdev)) ||
+		QED_IS_NVMETCP_PERSONALITY(QED_LEADING_HWFN(cdev))) &&
 		(QED_AFFIN_HWFN(cdev) != QED_LEADING_HWFN(cdev));
 }
 
@@ -2402,11 +2406,13 @@ static int qed_ll2_stop(struct qed_dev *cdev)
 
 	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
 		return 0;
+	if (!QED_IS_NVMETCP_PERSONALITY(p_hwfn))
+		qed_llh_remove_mac_filter(cdev, 0, cdev->ll2_mac_address);
 
 	qed_llh_remove_mac_filter(cdev, 0, cdev->ll2_mac_address);
 	eth_zero_addr(cdev->ll2_mac_address);
 
-	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn) || QED_IS_NVMETCP_PERSONALITY(p_hwfn))
 		qed_ll2_stop_ooo(p_hwfn);
 
 	/* In CMT mode, LL2 is always started on engine 0 for a storage PF */
@@ -2442,6 +2448,7 @@ static int __qed_ll2_start(struct qed_hwfn *p_hwfn,
 		conn_type = QED_LL2_TYPE_FCOE;
 		break;
 	case QED_PCI_ISCSI:
+	case QED_PCI_NVMETCP:
 		conn_type = QED_LL2_TYPE_TCP_ULP;
 		break;
 	case QED_PCI_ETH_ROCE:
@@ -2567,7 +2574,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 		}
 	}
 
-	if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) {
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn) || QED_IS_NVMETCP_PERSONALITY(p_hwfn)) {
 		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
 		rc = qed_ll2_start_ooo(p_hwfn, params);
 		if (rc) {
@@ -2576,10 +2583,13 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 		}
 	}
 
-	rc = qed_llh_add_mac_filter(cdev, 0, params->ll2_mac_address);
-	if (rc) {
-		DP_NOTICE(cdev, "Failed to add an LLH filter\n");
-		goto err3;
+	if (!QED_IS_NVMETCP_PERSONALITY(p_hwfn)) {
+		rc = qed_llh_add_mac_filter(cdev, 0, params->ll2_mac_address);
+		if (rc) {
+			DP_NOTICE(cdev, "Failed to add an LLH filter\n");
+			goto err3;
+		}
+
 	}
 
 	ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
@@ -2587,7 +2597,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	return 0;
 
 err3:
-	if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+	if (QED_IS_ISCSI_PERSONALITY(p_hwfn) || QED_IS_NVMETCP_PERSONALITY(p_hwfn))
 		qed_ll2_stop_ooo(p_hwfn);
 err2:
 	if (b_is_storage_eng1)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index cd882c453394..4387292c37e2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2446,6 +2446,9 @@ qed_mcp_get_shmem_proto(struct qed_hwfn *p_hwfn,
 	case FUNC_MF_CFG_PROTOCOL_ISCSI:
 		*p_proto = QED_PCI_ISCSI;
 		break;
+	case FUNC_MF_CFG_PROTOCOL_NVMETCP:
+		*p_proto = QED_PCI_NVMETCP;
+		break;
 	case FUNC_MF_CFG_PROTOCOL_FCOE:
 		*p_proto = QED_PCI_FCOE;
 		break;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
index 3e3192a3ad9b..6190adf965bc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
@@ -1306,7 +1306,8 @@ int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	}
 
 	if ((tlv_group & QED_MFW_TLV_ISCSI) &&
-	    p_hwfn->hw_info.personality != QED_PCI_ISCSI) {
+	    p_hwfn->hw_info.personality != QED_PCI_ISCSI &&
+		p_hwfn->hw_info.personality != QED_PCI_NVMETCP) {
 		DP_VERBOSE(p_hwfn, QED_MSG_SP,
 			   "Skipping iSCSI TLVs for non-iSCSI function\n");
 		tlv_group &= ~QED_MFW_TLV_ISCSI;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
new file mode 100644
index 000000000000..cb9c71109b2d
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/* Copyright 2021 Marvell. All rights reserved. */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/param.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/qed/qed_nvmetcp_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_nvmetcp.h"
+#include "qed_ll2.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+#include "qed_reg_addr.h"
+
+static int qed_nvmetcp_async_event(struct qed_hwfn *p_hwfn, u8 fw_event_code,
+				   u16 echo, union event_ring_data *data,
+				   u8 fw_return_code)
+{
+	if (p_hwfn->p_nvmetcp_info->event_cb) {
+		struct qed_nvmetcp_info *p_nvmetcp = p_hwfn->p_nvmetcp_info;
+
+		return p_nvmetcp->event_cb(p_nvmetcp->event_context,
+					 fw_event_code, data);
+	} else {
+		DP_NOTICE(p_hwfn, "nvmetcp async completion is not set\n");
+
+		return -EINVAL;
+	}
+}
+
+static int qed_sp_nvmetcp_func_start(struct qed_hwfn *p_hwfn,
+				     enum spq_mode comp_mode,
+				     struct qed_spq_comp_cb *p_comp_addr,
+				     void *event_context,
+				     nvmetcp_event_cb_t async_event_cb)
+{
+	struct nvmetcp_init_ramrod_params *p_ramrod = NULL;
+	struct qed_nvmetcp_pf_params *p_params = NULL;
+	struct scsi_init_func_queues *p_queue = NULL;
+	struct nvmetcp_spe_func_init *p_init = NULL;
+	struct qed_sp_init_data init_data = {};
+	struct qed_spq_entry *p_ent = NULL;
+	int rc = 0;
+	u16 val;
+	u8 i;
+
+	/* Get SPQ entry */
+	init_data.cid = qed_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 NVMETCP_RAMROD_CMD_ID_INIT_FUNC,
+				 PROTOCOLID_TCP_ULP, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.nvmetcp_init;
+	p_init = &p_ramrod->nvmetcp_init_spe;
+	p_params = &p_hwfn->pf_params.nvmetcp_pf_params;
+	p_queue = &p_init->q_params;
+	p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
+	p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
+	p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+	p_init->ll2_rx_queue_id = RESC_START(p_hwfn, QED_LL2_RAM_QUEUE) +
+					p_params->ll2_ooo_queue_id;
+	SET_FIELD(p_init->flags, NVMETCP_SPE_FUNC_INIT_NVMETCP_MODE, 1);
+	p_init->func_params.log_page_size = ilog2(PAGE_SIZE);
+	p_init->func_params.num_tasks = cpu_to_le16(p_params->num_tasks);
+	p_init->debug_flags = p_params->debug_mode;
+	DMA_REGPAIR_LE(p_queue->glbl_q_params_addr,
+		       p_params->glbl_q_params_addr);
+	p_queue->cq_num_entries = cpu_to_le16(QED_NVMETCP_FW_CQ_SIZE);
+	p_queue->num_queues = p_params->num_queues;
+	val = RESC_START(p_hwfn, QED_CMDQS_CQS);
+	p_queue->queue_relative_offset = cpu_to_le16((u16)val);
+	p_queue->cq_sb_pi = p_params->gl_rq_pi;
+
+	for (i = 0; i < p_params->num_queues; i++) {
+		val = qed_get_igu_sb_id(p_hwfn, i);
+		p_queue->cq_cmdq_sb_num_arr[i] = cpu_to_le16(val);
+	}
+
+	SET_FIELD(p_queue->q_validity,
+		  SCSI_INIT_FUNC_QUEUES_CMD_VALID, 0);
+	p_queue->cmdq_num_entries = 0;
+	p_queue->bdq_resource_id = (u8)RESC_START(p_hwfn, QED_BDQ);
+	p_ramrod->tcp_init.two_msl_timer = cpu_to_le32(QED_TCP_TWO_MSL_TIMER);
+	p_ramrod->tcp_init.tx_sws_timer = cpu_to_le16(QED_TCP_SWS_TIMER);
+	p_init->half_way_close_timeout = cpu_to_le16(QED_TCP_HALF_WAY_CLOSE_TIMEOUT);
+	p_ramrod->tcp_init.max_fin_rt = QED_TCP_MAX_FIN_RT;
+	SET_FIELD(p_ramrod->nvmetcp_init_spe.params,
+		  NVMETCP_SPE_FUNC_INIT_MAX_SYN_RT, QED_TCP_MAX_FIN_RT);
+	p_hwfn->p_nvmetcp_info->event_context = event_context;
+	p_hwfn->p_nvmetcp_info->event_cb = async_event_cb;
+	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_TCP_ULP,
+				  qed_nvmetcp_async_event);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_nvmetcp_func_stop(struct qed_hwfn *p_hwfn,
+				    enum spq_mode comp_mode,
+				    struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qed_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 NVMETCP_RAMROD_CMD_ID_DESTROY_FUNC,
+				 PROTOCOLID_TCP_ULP, &init_data);
+	if (rc)
+		return rc;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_TCP_ULP);
+
+	return rc;
+}
+
+static int qed_fill_nvmetcp_dev_info(struct qed_dev *cdev,
+				     struct qed_dev_nvmetcp_info *info)
+{
+	struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
+	int rc;
+
+	memset(info, 0, sizeof(*info));
+	rc = qed_fill_dev_info(cdev, &info->common);
+	info->port_id = MFW_PORT(hwfn);
+	info->num_cqs = FEAT_NUM(hwfn, QED_NVMETCP_CQ);
+
+	return rc;
+}
+
+static void qed_register_nvmetcp_ops(struct qed_dev *cdev,
+				     struct qed_nvmetcp_cb_ops *ops,
+				     void *cookie)
+{
+	cdev->protocol_ops.nvmetcp = ops;
+	cdev->ops_cookie = cookie;
+}
+
+static int qed_nvmetcp_stop(struct qed_dev *cdev)
+{
+	int rc;
+
+	if (!(cdev->flags & QED_FLAG_STORAGE_STARTED)) {
+		DP_NOTICE(cdev, "nvmetcp already stopped\n");
+
+		return 0;
+	}
+
+	if (!hash_empty(cdev->connections)) {
+		DP_NOTICE(cdev,
+			  "Can't stop nvmetcp - not all connections were returned\n");
+
+		return -EINVAL;
+	}
+
+	/* Stop the nvmetcp */
+	rc = qed_sp_nvmetcp_func_stop(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+				      NULL);
+	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
+
+	return rc;
+}
+
+static int qed_nvmetcp_start(struct qed_dev *cdev,
+			     struct qed_nvmetcp_tid *tasks,
+			     void *event_context,
+			     nvmetcp_event_cb_t async_event_cb)
+{
+	struct qed_tid_mem *tid_info;
+	int rc;
+
+	if (cdev->flags & QED_FLAG_STORAGE_STARTED) {
+		DP_NOTICE(cdev, "nvmetcp already started;\n");
+
+		return 0;
+	}
+
+	rc = qed_sp_nvmetcp_func_start(QED_AFFIN_HWFN(cdev),
+				       QED_SPQ_MODE_EBLOCK, NULL,
+				       event_context, async_event_cb);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to start nvmetcp\n");
+
+		return rc;
+	}
+
+	cdev->flags |= QED_FLAG_STORAGE_STARTED;
+	hash_init(cdev->connections);
+
+	if (!tasks)
+		return 0;
+
+	tid_info = kzalloc(sizeof(*tid_info), GFP_KERNEL);
+	if (!tid_info) {
+		qed_nvmetcp_stop(cdev);
+
+		return -ENOMEM;
+	}
+
+	rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to gather task information\n");
+		qed_nvmetcp_stop(cdev);
+		kfree(tid_info);
+
+		return rc;
+	}
+
+	/* Fill task information */
+	tasks->size = tid_info->tid_size;
+	tasks->num_tids_per_block = tid_info->num_tids_per_block;
+	memcpy(tasks->blocks, tid_info->blocks,
+	       MAX_TID_BLOCKS_NVMETCP * sizeof(u8 *));
+	kfree(tid_info);
+
+	return 0;
+}
+
+static const struct qed_nvmetcp_ops qed_nvmetcp_ops_pass = {
+	.common = &qed_common_ops_pass,
+	.ll2 = &qed_ll2_ops_pass,
+	.fill_dev_info = &qed_fill_nvmetcp_dev_info,
+	.register_ops = &qed_register_nvmetcp_ops,
+	.start = &qed_nvmetcp_start,
+	.stop = &qed_nvmetcp_stop,
+
+	/* Placeholder - Connection level ops */
+};
+
+const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void)
+{
+	return &qed_nvmetcp_ops_pass;
+}
+EXPORT_SYMBOL(qed_get_nvmetcp_ops);
+
+void qed_put_nvmetcp_ops(void)
+{
+}
+EXPORT_SYMBOL(qed_put_nvmetcp_ops);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h
new file mode 100644
index 000000000000..774b46ade408
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/* Copyright 2021 Marvell. All rights reserved. */
+
+#ifndef _QED_NVMETCP_H
+#define _QED_NVMETCP_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/qed/tcp_common.h>
+#include <linux/qed/qed_nvmetcp_if.h>
+#include <linux/qed/qed_chain.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+
+#define QED_NVMETCP_FW_CQ_SIZE (4 * 1024)
+
+/* tcp parameters */
+#define QED_TCP_TWO_MSL_TIMER 4000
+#define QED_TCP_HALF_WAY_CLOSE_TIMEOUT 10
+#define QED_TCP_MAX_FIN_RT 2
+#define QED_TCP_SWS_TIMER 5000
+
+struct qed_nvmetcp_info {
+	spinlock_t lock; /* Connection resources. */
+	struct list_head free_list;
+	u16 max_num_outstanding_tasks;
+	void *event_context;
+	nvmetcp_event_cb_t event_cb;
+};
+
+#if IS_ENABLED(CONFIG_QED_NVMETCP)
+int qed_nvmetcp_alloc(struct qed_hwfn *p_hwfn);
+void qed_nvmetcp_setup(struct qed_hwfn *p_hwfn);
+void qed_nvmetcp_free(struct qed_hwfn *p_hwfn);
+
+#else /* IS_ENABLED(CONFIG_QED_NVMETCP) */
+static inline int qed_nvmetcp_alloc(struct qed_hwfn *p_hwfn)
+{
+	return -EINVAL;
+}
+
+static inline void qed_nvmetcp_setup(struct qed_hwfn *p_hwfn) {}
+static inline void qed_nvmetcp_free(struct qed_hwfn *p_hwfn) {}
+
+#endif /* IS_ENABLED(CONFIG_QED_NVMETCP) */
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 599da0d7366b..b8c5641b29a8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -16,7 +16,7 @@
 #include "qed_ll2.h"
 #include "qed_ooo.h"
 #include "qed_cxt.h"
-
+#include "qed_nvmetcp.h"
 static struct qed_ooo_archipelago
 *qed_ooo_seek_archipelago(struct qed_hwfn *p_hwfn,
 			  struct qed_ooo_info
@@ -83,6 +83,7 @@ int qed_ooo_alloc(struct qed_hwfn *p_hwfn)
 
 	switch (p_hwfn->hw_info.personality) {
 	case QED_PCI_ISCSI:
+	case QED_PCI_NVMETCP:
 		proto = PROTOCOLID_TCP_ULP;
 		break;
 	case QED_PCI_ETH_RDMA:
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 993f1357b6fc..525159e747a5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -100,6 +100,8 @@ union ramrod_data {
 	struct iscsi_spe_conn_mac_update iscsi_conn_mac_update;
 	struct iscsi_spe_conn_termination iscsi_conn_terminate;
 
+	struct nvmetcp_init_ramrod_params nvmetcp_init;
+
 	struct vf_start_ramrod_data vf_start;
 	struct vf_stop_ramrod_data vf_stop;
 };
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index ee7dc0a7da6c..b4ed54ffef9b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -385,6 +385,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 		p_ramrod->personality = PERSONALITY_FCOE;
 		break;
 	case QED_PCI_ISCSI:
+	case QED_PCI_NVMETCP:
 		p_ramrod->personality = PERSONALITY_TCP_ULP;
 		break;
 	case QED_PCI_ETH_ROCE:
diff --git a/include/linux/qed/nvmetcp_common.h b/include/linux/qed/nvmetcp_common.h
new file mode 100644
index 000000000000..e9ccfc07041d
--- /dev/null
+++ b/include/linux/qed/nvmetcp_common.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/* Copyright 2021 Marvell. All rights reserved. */
+
+#ifndef __NVMETCP_COMMON__
+#define __NVMETCP_COMMON__
+
+#include "tcp_common.h"
+
+/* NVMeTCP firmware function init parameters */
+struct nvmetcp_spe_func_init {
+	__le16 half_way_close_timeout;
+	u8 num_sq_pages_in_ring;
+	u8 num_r2tq_pages_in_ring;
+	u8 num_uhq_pages_in_ring;
+	u8 ll2_rx_queue_id;
+	u8 flags;
+#define NVMETCP_SPE_FUNC_INIT_COUNTERS_EN_MASK 0x1
+#define NVMETCP_SPE_FUNC_INIT_COUNTERS_EN_SHIFT 0
+#define NVMETCP_SPE_FUNC_INIT_NVMETCP_MODE_MASK 0x1
+#define NVMETCP_SPE_FUNC_INIT_NVMETCP_MODE_SHIFT 1
+#define NVMETCP_SPE_FUNC_INIT_RESERVED0_MASK 0x3F
+#define NVMETCP_SPE_FUNC_INIT_RESERVED0_SHIFT 2
+	u8 debug_flags;
+	__le16 reserved1;
+	u8 params;
+#define NVMETCP_SPE_FUNC_INIT_MAX_SYN_RT_MASK	0xF
+#define NVMETCP_SPE_FUNC_INIT_MAX_SYN_RT_SHIFT	0
+#define NVMETCP_SPE_FUNC_INIT_RESERVED1_MASK	0xF
+#define NVMETCP_SPE_FUNC_INIT_RESERVED1_SHIFT	4
+	u8 reserved2[5];
+	struct scsi_init_func_params func_params;
+	struct scsi_init_func_queues q_params;
+};
+
+/* NVMeTCP init params passed by driver to FW in NVMeTCP init ramrod. */
+struct nvmetcp_init_ramrod_params {
+	struct nvmetcp_spe_func_init nvmetcp_init_spe;
+	struct tcp_init_params tcp_init;
+};
+
+/* NVMeTCP Ramrod Command IDs */
+enum nvmetcp_ramrod_cmd_id {
+	NVMETCP_RAMROD_CMD_ID_UNUSED = 0,
+	NVMETCP_RAMROD_CMD_ID_INIT_FUNC = 1,
+	NVMETCP_RAMROD_CMD_ID_DESTROY_FUNC = 2,
+	MAX_NVMETCP_RAMROD_CMD_ID
+};
+
+struct nvmetcp_glbl_queue_entry {
+	struct regpair cq_pbl_addr;
+	struct regpair reserved;
+};
+
+#endif /* __NVMETCP_COMMON__ */
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 68d17a4fbf20..850b98991670 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -542,6 +542,22 @@ struct qed_iscsi_pf_params {
 	u8 bdq_pbl_num_entries[3];
 };
 
+struct qed_nvmetcp_pf_params {
+	u64 glbl_q_params_addr;
+	u16 cq_num_entries;
+	u16 num_cons;
+	u16 num_tasks;
+	u8 num_sq_pages_in_ring;
+	u8 num_r2tq_pages_in_ring;
+	u8 num_uhq_pages_in_ring;
+	u8 num_queues;
+	u8 gl_rq_pi;
+	u8 gl_cmd_pi;
+	u8 debug_mode;
+	u8 ll2_ooo_queue_id;
+	u16 min_rto;
+};
+
 struct qed_rdma_pf_params {
 	/* Supplied to QED during resource allocation (may affect the ILT and
 	 * the doorbell BAR).
@@ -560,6 +576,7 @@ struct qed_pf_params {
 	struct qed_eth_pf_params eth_pf_params;
 	struct qed_fcoe_pf_params fcoe_pf_params;
 	struct qed_iscsi_pf_params iscsi_pf_params;
+	struct qed_nvmetcp_pf_params nvmetcp_pf_params;
 	struct qed_rdma_pf_params rdma_pf_params;
 };
 
@@ -662,6 +679,7 @@ enum qed_sb_type {
 enum qed_protocol {
 	QED_PROTOCOL_ETH,
 	QED_PROTOCOL_ISCSI,
+	QED_PROTOCOL_NVMETCP = QED_PROTOCOL_ISCSI,
 	QED_PROTOCOL_FCOE,
 };
 
diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h
new file mode 100644
index 000000000000..76868bdf0883
--- /dev/null
+++ b/include/linux/qed/qed_nvmetcp_if.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/* Copyright 2021 Marvell. All rights reserved. */
+
+#ifndef _QED_NVMETCP_IF_H
+#define _QED_NVMETCP_IF_H
+#include <linux/types.h>
+#include <linux/qed/qed_if.h>
+
+#define QED_NVMETCP_MAX_IO_SIZE	0x800000
+
+typedef int (*nvmetcp_event_cb_t) (void *context,
+				   u8 fw_event_code, void *fw_handle);
+
+struct qed_dev_nvmetcp_info {
+	struct qed_dev_info common;
+	u8 port_id;  /* Physical port */
+	u8 num_cqs;
+};
+
+#define MAX_TID_BLOCKS_NVMETCP (512)
+struct qed_nvmetcp_tid {
+	u32 size;		/* In bytes per task */
+	u32 num_tids_per_block;
+	u8 *blocks[MAX_TID_BLOCKS_NVMETCP];
+};
+
+struct qed_nvmetcp_cb_ops {
+	struct qed_common_cb_ops common;
+};
+
+/**
+ * struct qed_nvmetcp_ops - qed NVMeTCP operations.
+ * @common:		common operations pointer
+ * @ll2:		light L2 operations pointer
+ * @fill_dev_info:	fills NVMeTCP specific information
+ *			@param cdev
+ *			@param info
+ *			@return 0 on success, otherwise error value.
+ * @register_ops:	register nvmetcp operations
+ *			@param cdev
+ *			@param ops - specified using qed_nvmetcp_cb_ops
+ *			@param cookie - driver private
+ * @start:		nvmetcp in FW
+ *			@param cdev
+ *			@param tasks - qed will fill information about tasks
+ *			return 0 on success, otherwise error value.
+ * @stop:		nvmetcp in FW
+ *			@param cdev
+ *			return 0 on success, otherwise error value.
+ */
+struct qed_nvmetcp_ops {
+	const struct qed_common_ops *common;
+
+	const struct qed_ll2_ops *ll2;
+
+	int (*fill_dev_info)(struct qed_dev *cdev,
+			     struct qed_dev_nvmetcp_info *info);
+
+	void (*register_ops)(struct qed_dev *cdev,
+			     struct qed_nvmetcp_cb_ops *ops, void *cookie);
+
+	int (*start)(struct qed_dev *cdev,
+		     struct qed_nvmetcp_tid *tasks,
+		     void *event_context, nvmetcp_event_cb_t async_event_cb);
+
+	int (*stop)(struct qed_dev *cdev);
+};
+
+const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void);
+void qed_put_nvmetcp_ops(void);
+#endif

From 76684ab8f4f95394df6a752cee37b197b4c8732b Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Wed, 2 Jun 2021 20:16:51 +0300
Subject: [PATCH 0939/3292] qed: Add NVMeTCP Offload Connection Level FW and HW
 HSI

This patch introduces the NVMeTCP HSI and HSI functionality in order to
initialize and interact with the HW device as part of the connection level
HSI.

This includes:
- Connection offload: offload a TCP connection to the FW.
- Connection update: update the ICReq-ICResp params
- Connection clear SQ: outstanding IOs FW flush.
- Connection termination: terminate the TCP connection and flush the FW.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c | 557 +++++++++++++++++-
 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h |  52 ++
 drivers/net/ethernet/qlogic/qed/qed_sp.h      |   3 +
 include/linux/qed/nvmetcp_common.h            | 143 +++++
 include/linux/qed/qed_nvmetcp_if.h            |  94 +++
 5 files changed, 847 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
index cb9c71109b2d..7943804e88cd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
@@ -243,6 +243,555 @@ static int qed_nvmetcp_start(struct qed_dev *cdev,
 	return 0;
 }
 
+static struct qed_hash_nvmetcp_con *qed_nvmetcp_get_hash(struct qed_dev *cdev,
+							 u32 handle)
+{
+	struct qed_hash_nvmetcp_con *hash_con = NULL;
+
+	if (!(cdev->flags & QED_FLAG_STORAGE_STARTED))
+		return NULL;
+
+	hash_for_each_possible(cdev->connections, hash_con, node, handle) {
+		if (hash_con->con->icid == handle)
+			break;
+	}
+
+	if (!hash_con || hash_con->con->icid != handle)
+		return NULL;
+
+	return hash_con;
+}
+
+static int qed_sp_nvmetcp_conn_offload(struct qed_hwfn *p_hwfn,
+				       struct qed_nvmetcp_conn *p_conn,
+				       enum spq_mode comp_mode,
+				       struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct nvmetcp_spe_conn_offload *p_ramrod = NULL;
+	struct tcp_offload_params_opt2 *p_tcp = NULL;
+	struct qed_sp_init_data init_data = { 0 };
+	struct qed_spq_entry *p_ent = NULL;
+	dma_addr_t r2tq_pbl_addr;
+	dma_addr_t xhq_pbl_addr;
+	dma_addr_t uhq_pbl_addr;
+	u16 physical_q;
+	int rc = 0;
+	u8 i;
+
+	/* Get SPQ entry */
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 NVMETCP_RAMROD_CMD_ID_OFFLOAD_CONN,
+				 PROTOCOLID_TCP_ULP, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.nvmetcp_conn_offload;
+
+	/* Transmission PQ is the first of the PF */
+	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+	p_conn->physical_q0 = cpu_to_le16(physical_q);
+	p_ramrod->nvmetcp.physical_q0 = cpu_to_le16(physical_q);
+
+	/* nvmetcp Pure-ACK PQ */
+	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+	p_conn->physical_q1 = cpu_to_le16(physical_q);
+	p_ramrod->nvmetcp.physical_q1 = cpu_to_le16(physical_q);
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	DMA_REGPAIR_LE(p_ramrod->nvmetcp.sq_pbl_addr, p_conn->sq_pbl_addr);
+	r2tq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->r2tq);
+	DMA_REGPAIR_LE(p_ramrod->nvmetcp.r2tq_pbl_addr, r2tq_pbl_addr);
+	xhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->xhq);
+	DMA_REGPAIR_LE(p_ramrod->nvmetcp.xhq_pbl_addr, xhq_pbl_addr);
+	uhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->uhq);
+	DMA_REGPAIR_LE(p_ramrod->nvmetcp.uhq_pbl_addr, uhq_pbl_addr);
+	p_ramrod->nvmetcp.flags = p_conn->offl_flags;
+	p_ramrod->nvmetcp.default_cq = p_conn->default_cq;
+	p_ramrod->nvmetcp.initial_ack = 0;
+	DMA_REGPAIR_LE(p_ramrod->nvmetcp.nvmetcp.cccid_itid_table_addr,
+		       p_conn->nvmetcp_cccid_itid_table_addr);
+	p_ramrod->nvmetcp.nvmetcp.cccid_max_range =
+		 cpu_to_le16(p_conn->nvmetcp_cccid_max_range);
+	p_tcp = &p_ramrod->tcp;
+	qed_set_fw_mac_addr(&p_tcp->remote_mac_addr_hi,
+			    &p_tcp->remote_mac_addr_mid,
+			    &p_tcp->remote_mac_addr_lo, p_conn->remote_mac);
+	qed_set_fw_mac_addr(&p_tcp->local_mac_addr_hi,
+			    &p_tcp->local_mac_addr_mid,
+			    &p_tcp->local_mac_addr_lo, p_conn->local_mac);
+	p_tcp->vlan_id = cpu_to_le16(p_conn->vlan_id);
+	p_tcp->flags = cpu_to_le16(p_conn->tcp_flags);
+	p_tcp->ip_version = p_conn->ip_version;
+	if (p_tcp->ip_version == TCP_IPV6) {
+		for (i = 0; i < 4; i++) {
+			p_tcp->remote_ip[i] = cpu_to_le32(p_conn->remote_ip[i]);
+			p_tcp->local_ip[i] = cpu_to_le32(p_conn->local_ip[i]);
+		}
+	} else {
+		p_tcp->remote_ip[0] = cpu_to_le32(p_conn->remote_ip[0]);
+		p_tcp->local_ip[0] = cpu_to_le32(p_conn->local_ip[0]);
+	}
+
+	p_tcp->flow_label = cpu_to_le32(p_conn->flow_label);
+	p_tcp->ttl = p_conn->ttl;
+	p_tcp->tos_or_tc = p_conn->tos_or_tc;
+	p_tcp->remote_port = cpu_to_le16(p_conn->remote_port);
+	p_tcp->local_port = cpu_to_le16(p_conn->local_port);
+	p_tcp->mss = cpu_to_le16(p_conn->mss);
+	p_tcp->rcv_wnd_scale = p_conn->rcv_wnd_scale;
+	p_tcp->connect_mode = p_conn->connect_mode;
+	p_tcp->cwnd = cpu_to_le32(p_conn->cwnd);
+	p_tcp->ka_max_probe_cnt = p_conn->ka_max_probe_cnt;
+	p_tcp->ka_timeout = cpu_to_le32(p_conn->ka_timeout);
+	p_tcp->max_rt_time = cpu_to_le32(p_conn->max_rt_time);
+	p_tcp->ka_interval = cpu_to_le32(p_conn->ka_interval);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_nvmetcp_conn_update(struct qed_hwfn *p_hwfn,
+				      struct qed_nvmetcp_conn *p_conn,
+				      enum spq_mode comp_mode,
+				      struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct nvmetcp_conn_update_ramrod_params *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+	u32 dval;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 NVMETCP_RAMROD_CMD_ID_UPDATE_CONN,
+				 PROTOCOLID_TCP_ULP, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.nvmetcp_conn_update;
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->flags = p_conn->update_flag;
+	p_ramrod->max_seq_size = cpu_to_le32(p_conn->max_seq_size);
+	dval = p_conn->max_recv_pdu_length;
+	p_ramrod->max_recv_pdu_length = cpu_to_le32(dval);
+	dval = p_conn->max_send_pdu_length;
+	p_ramrod->max_send_pdu_length = cpu_to_le32(dval);
+	p_ramrod->first_seq_length = cpu_to_le32(p_conn->first_seq_length);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_nvmetcp_conn_terminate(struct qed_hwfn *p_hwfn,
+					 struct qed_nvmetcp_conn *p_conn,
+					 enum spq_mode comp_mode,
+					 struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct nvmetcp_spe_conn_termination *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 NVMETCP_RAMROD_CMD_ID_TERMINATION_CONN,
+				 PROTOCOLID_TCP_ULP, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.nvmetcp_conn_terminate;
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->abortive = p_conn->abortive_dsconnect;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_nvmetcp_conn_clear_sq(struct qed_hwfn *p_hwfn,
+					struct qed_nvmetcp_conn *p_conn,
+					enum spq_mode comp_mode,
+					struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 NVMETCP_RAMROD_CMD_ID_CLEAR_SQ,
+				 PROTOCOLID_TCP_ULP, &init_data);
+	if (rc)
+		return rc;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static void __iomem *qed_nvmetcp_get_db_addr(struct qed_hwfn *p_hwfn, u32 cid)
+{
+	return (u8 __iomem *)p_hwfn->doorbells +
+			     qed_db_addr(cid, DQ_DEMS_LEGACY);
+}
+
+static int qed_nvmetcp_allocate_connection(struct qed_hwfn *p_hwfn,
+					   struct qed_nvmetcp_conn **p_out_conn)
+{
+	struct qed_chain_init_params params = {
+		.mode		= QED_CHAIN_MODE_PBL,
+		.intended_use	= QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+		.cnt_type	= QED_CHAIN_CNT_TYPE_U16,
+	};
+	struct qed_nvmetcp_pf_params *p_params = NULL;
+	struct qed_nvmetcp_conn *p_conn = NULL;
+	int rc = 0;
+
+	/* Try finding a free connection that can be used */
+	spin_lock_bh(&p_hwfn->p_nvmetcp_info->lock);
+	if (!list_empty(&p_hwfn->p_nvmetcp_info->free_list))
+		p_conn = list_first_entry(&p_hwfn->p_nvmetcp_info->free_list,
+					  struct qed_nvmetcp_conn, list_entry);
+	if (p_conn) {
+		list_del(&p_conn->list_entry);
+		spin_unlock_bh(&p_hwfn->p_nvmetcp_info->lock);
+		*p_out_conn = p_conn;
+
+		return 0;
+	}
+	spin_unlock_bh(&p_hwfn->p_nvmetcp_info->lock);
+
+	/* Need to allocate a new connection */
+	p_params = &p_hwfn->pf_params.nvmetcp_pf_params;
+	p_conn = kzalloc(sizeof(*p_conn), GFP_KERNEL);
+	if (!p_conn)
+		return -ENOMEM;
+
+	params.num_elems = p_params->num_r2tq_pages_in_ring *
+			   QED_CHAIN_PAGE_SIZE / sizeof(struct nvmetcp_wqe);
+	params.elem_size = sizeof(struct nvmetcp_wqe);
+	rc = qed_chain_alloc(p_hwfn->cdev, &p_conn->r2tq, &params);
+	if (rc)
+		goto nomem_r2tq;
+
+	params.num_elems = p_params->num_uhq_pages_in_ring *
+			   QED_CHAIN_PAGE_SIZE / sizeof(struct iscsi_uhqe);
+	params.elem_size = sizeof(struct iscsi_uhqe);
+	rc = qed_chain_alloc(p_hwfn->cdev, &p_conn->uhq, &params);
+	if (rc)
+		goto nomem_uhq;
+
+	params.elem_size = sizeof(struct iscsi_xhqe);
+	rc = qed_chain_alloc(p_hwfn->cdev, &p_conn->xhq, &params);
+	if (rc)
+		goto nomem;
+
+	p_conn->free_on_delete = true;
+	*p_out_conn = p_conn;
+
+	return 0;
+
+nomem:
+	qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+nomem_uhq:
+	qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+nomem_r2tq:
+	kfree(p_conn);
+
+	return -ENOMEM;
+}
+
+static int qed_nvmetcp_acquire_connection(struct qed_hwfn *p_hwfn,
+					  struct qed_nvmetcp_conn **p_out_conn)
+{
+	struct qed_nvmetcp_conn *p_conn = NULL;
+	int rc = 0;
+	u32 icid;
+
+	spin_lock_bh(&p_hwfn->p_nvmetcp_info->lock);
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_TCP_ULP, &icid);
+	spin_unlock_bh(&p_hwfn->p_nvmetcp_info->lock);
+
+	if (rc)
+		return rc;
+
+	rc = qed_nvmetcp_allocate_connection(p_hwfn, &p_conn);
+	if (rc) {
+		spin_lock_bh(&p_hwfn->p_nvmetcp_info->lock);
+		qed_cxt_release_cid(p_hwfn, icid);
+		spin_unlock_bh(&p_hwfn->p_nvmetcp_info->lock);
+
+		return rc;
+	}
+
+	p_conn->icid = icid;
+	p_conn->conn_id = (u16)icid;
+	p_conn->fw_cid = (p_hwfn->hw_info.opaque_fid << 16) | icid;
+	*p_out_conn = p_conn;
+
+	return rc;
+}
+
+static void qed_nvmetcp_release_connection(struct qed_hwfn *p_hwfn,
+					   struct qed_nvmetcp_conn *p_conn)
+{
+	spin_lock_bh(&p_hwfn->p_nvmetcp_info->lock);
+	list_add_tail(&p_conn->list_entry, &p_hwfn->p_nvmetcp_info->free_list);
+	qed_cxt_release_cid(p_hwfn, p_conn->icid);
+	spin_unlock_bh(&p_hwfn->p_nvmetcp_info->lock);
+}
+
+static void qed_nvmetcp_free_connection(struct qed_hwfn *p_hwfn,
+					struct qed_nvmetcp_conn *p_conn)
+{
+	qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
+	qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+	qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+	kfree(p_conn);
+}
+
+int qed_nvmetcp_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_nvmetcp_info *p_nvmetcp_info;
+
+	p_nvmetcp_info = kzalloc(sizeof(*p_nvmetcp_info), GFP_KERNEL);
+	if (!p_nvmetcp_info)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&p_nvmetcp_info->free_list);
+	p_hwfn->p_nvmetcp_info = p_nvmetcp_info;
+
+	return 0;
+}
+
+void qed_nvmetcp_setup(struct qed_hwfn *p_hwfn)
+{
+	spin_lock_init(&p_hwfn->p_nvmetcp_info->lock);
+}
+
+void qed_nvmetcp_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_nvmetcp_conn *p_conn = NULL;
+
+	if (!p_hwfn->p_nvmetcp_info)
+		return;
+
+	while (!list_empty(&p_hwfn->p_nvmetcp_info->free_list)) {
+		p_conn = list_first_entry(&p_hwfn->p_nvmetcp_info->free_list,
+					  struct qed_nvmetcp_conn, list_entry);
+		if (p_conn) {
+			list_del(&p_conn->list_entry);
+			qed_nvmetcp_free_connection(p_hwfn, p_conn);
+		}
+	}
+
+	kfree(p_hwfn->p_nvmetcp_info);
+	p_hwfn->p_nvmetcp_info = NULL;
+}
+
+static int qed_nvmetcp_acquire_conn(struct qed_dev *cdev,
+				    u32 *handle,
+				    u32 *fw_cid, void __iomem **p_doorbell)
+{
+	struct qed_hash_nvmetcp_con *hash_con;
+	int rc;
+
+	/* Allocate a hashed connection */
+	hash_con = kzalloc(sizeof(*hash_con), GFP_ATOMIC);
+	if (!hash_con)
+		return -ENOMEM;
+
+	/* Acquire the connection */
+	rc = qed_nvmetcp_acquire_connection(QED_AFFIN_HWFN(cdev),
+					    &hash_con->con);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to acquire Connection\n");
+		kfree(hash_con);
+
+		return rc;
+	}
+
+	/* Added the connection to hash table */
+	*handle = hash_con->con->icid;
+	*fw_cid = hash_con->con->fw_cid;
+	hash_add(cdev->connections, &hash_con->node, *handle);
+	if (p_doorbell)
+		*p_doorbell = qed_nvmetcp_get_db_addr(QED_AFFIN_HWFN(cdev),
+						      *handle);
+
+	return 0;
+}
+
+static int qed_nvmetcp_release_conn(struct qed_dev *cdev, u32 handle)
+{
+	struct qed_hash_nvmetcp_con *hash_con;
+
+	hash_con = qed_nvmetcp_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+
+		return -EINVAL;
+	}
+
+	hlist_del(&hash_con->node);
+	qed_nvmetcp_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
+	kfree(hash_con);
+
+	return 0;
+}
+
+static int qed_nvmetcp_offload_conn(struct qed_dev *cdev, u32 handle,
+				    struct qed_nvmetcp_params_offload *conn_info)
+{
+	struct qed_hash_nvmetcp_con *hash_con;
+	struct qed_nvmetcp_conn *con;
+
+	hash_con = qed_nvmetcp_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+
+		return -EINVAL;
+	}
+
+	/* Update the connection with information from the params */
+	con = hash_con->con;
+
+	/* FW initializations */
+	con->layer_code = NVMETCP_SLOW_PATH_LAYER_CODE;
+	con->sq_pbl_addr = conn_info->sq_pbl_addr;
+	con->nvmetcp_cccid_max_range = conn_info->nvmetcp_cccid_max_range;
+	con->nvmetcp_cccid_itid_table_addr = conn_info->nvmetcp_cccid_itid_table_addr;
+	con->default_cq = conn_info->default_cq;
+	SET_FIELD(con->offl_flags, NVMETCP_CONN_OFFLOAD_PARAMS_TARGET_MODE, 0);
+	SET_FIELD(con->offl_flags, NVMETCP_CONN_OFFLOAD_PARAMS_NVMETCP_MODE, 1);
+	SET_FIELD(con->offl_flags, NVMETCP_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B, 1);
+
+	/* Networking and TCP stack initializations */
+	ether_addr_copy(con->local_mac, conn_info->src.mac);
+	ether_addr_copy(con->remote_mac, conn_info->dst.mac);
+	memcpy(con->local_ip, conn_info->src.ip, sizeof(con->local_ip));
+	memcpy(con->remote_ip, conn_info->dst.ip, sizeof(con->remote_ip));
+	con->local_port = conn_info->src.port;
+	con->remote_port = conn_info->dst.port;
+	con->vlan_id = conn_info->vlan_id;
+
+	if (conn_info->timestamp_en)
+		SET_FIELD(con->tcp_flags, TCP_OFFLOAD_PARAMS_OPT2_TS_EN, 1);
+
+	if (conn_info->delayed_ack_en)
+		SET_FIELD(con->tcp_flags, TCP_OFFLOAD_PARAMS_OPT2_DA_EN, 1);
+
+	if (conn_info->tcp_keep_alive_en)
+		SET_FIELD(con->tcp_flags, TCP_OFFLOAD_PARAMS_OPT2_KA_EN, 1);
+
+	if (conn_info->ecn_en)
+		SET_FIELD(con->tcp_flags, TCP_OFFLOAD_PARAMS_OPT2_ECN_EN, 1);
+
+	con->ip_version = conn_info->ip_version;
+	con->flow_label = QED_TCP_FLOW_LABEL;
+	con->ka_max_probe_cnt = conn_info->ka_max_probe_cnt;
+	con->ka_timeout = conn_info->ka_timeout;
+	con->ka_interval = conn_info->ka_interval;
+	con->max_rt_time = conn_info->max_rt_time;
+	con->ttl = conn_info->ttl;
+	con->tos_or_tc = conn_info->tos_or_tc;
+	con->mss = conn_info->mss;
+	con->cwnd = conn_info->cwnd;
+	con->rcv_wnd_scale = conn_info->rcv_wnd_scale;
+	con->connect_mode = 0;
+
+	return qed_sp_nvmetcp_conn_offload(QED_AFFIN_HWFN(cdev), con,
+					 QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_nvmetcp_update_conn(struct qed_dev *cdev,
+				   u32 handle,
+				   struct qed_nvmetcp_params_update *conn_info)
+{
+	struct qed_hash_nvmetcp_con *hash_con;
+	struct qed_nvmetcp_conn *con;
+
+	hash_con = qed_nvmetcp_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+
+		return -EINVAL;
+	}
+
+	/* Update the connection with information from the params */
+	con = hash_con->con;
+	SET_FIELD(con->update_flag,
+		  ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T, 0);
+	SET_FIELD(con->update_flag,
+		  ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA, 1);
+	if (conn_info->hdr_digest_en)
+		SET_FIELD(con->update_flag, ISCSI_CONN_UPDATE_RAMROD_PARAMS_HD_EN, 1);
+
+	if (conn_info->data_digest_en)
+		SET_FIELD(con->update_flag, ISCSI_CONN_UPDATE_RAMROD_PARAMS_DD_EN, 1);
+
+	/* Placeholder - initialize pfv, cpda, hpda */
+
+	con->max_seq_size = conn_info->max_io_size;
+	con->max_recv_pdu_length = conn_info->max_recv_pdu_length;
+	con->max_send_pdu_length = conn_info->max_send_pdu_length;
+	con->first_seq_length = conn_info->max_io_size;
+
+	return qed_sp_nvmetcp_conn_update(QED_AFFIN_HWFN(cdev), con,
+					QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_nvmetcp_clear_conn_sq(struct qed_dev *cdev, u32 handle)
+{
+	struct qed_hash_nvmetcp_con *hash_con;
+
+	hash_con = qed_nvmetcp_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+
+		return -EINVAL;
+	}
+
+	return qed_sp_nvmetcp_conn_clear_sq(QED_AFFIN_HWFN(cdev), hash_con->con,
+					    QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_nvmetcp_destroy_conn(struct qed_dev *cdev,
+				    u32 handle, u8 abrt_conn)
+{
+	struct qed_hash_nvmetcp_con *hash_con;
+
+	hash_con = qed_nvmetcp_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+
+		return -EINVAL;
+	}
+
+	hash_con->con->abortive_dsconnect = abrt_conn;
+
+	return qed_sp_nvmetcp_conn_terminate(QED_AFFIN_HWFN(cdev), hash_con->con,
+					   QED_SPQ_MODE_EBLOCK, NULL);
+}
+
 static const struct qed_nvmetcp_ops qed_nvmetcp_ops_pass = {
 	.common = &qed_common_ops_pass,
 	.ll2 = &qed_ll2_ops_pass,
@@ -250,8 +799,12 @@ static const struct qed_nvmetcp_ops qed_nvmetcp_ops_pass = {
 	.register_ops = &qed_register_nvmetcp_ops,
 	.start = &qed_nvmetcp_start,
 	.stop = &qed_nvmetcp_stop,
-
-	/* Placeholder - Connection level ops */
+	.acquire_conn = &qed_nvmetcp_acquire_conn,
+	.release_conn = &qed_nvmetcp_release_conn,
+	.offload_conn = &qed_nvmetcp_offload_conn,
+	.update_conn = &qed_nvmetcp_update_conn,
+	.destroy_conn = &qed_nvmetcp_destroy_conn,
+	.clear_sq = &qed_nvmetcp_clear_conn_sq,
 };
 
 const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h
index 774b46ade408..e5e9d075bf4f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.h
@@ -19,6 +19,7 @@
 #define QED_NVMETCP_FW_CQ_SIZE (4 * 1024)
 
 /* tcp parameters */
+#define QED_TCP_FLOW_LABEL 0
 #define QED_TCP_TWO_MSL_TIMER 4000
 #define QED_TCP_HALF_WAY_CLOSE_TIMEOUT 10
 #define QED_TCP_MAX_FIN_RT 2
@@ -32,6 +33,57 @@ struct qed_nvmetcp_info {
 	nvmetcp_event_cb_t event_cb;
 };
 
+struct qed_hash_nvmetcp_con {
+	struct hlist_node node;
+	struct qed_nvmetcp_conn *con;
+};
+
+struct qed_nvmetcp_conn {
+	struct list_head list_entry;
+	bool free_on_delete;
+	u16 conn_id;
+	u32 icid;
+	u32 fw_cid;
+	u8 layer_code;
+	u8 offl_flags;
+	u8 connect_mode;
+	dma_addr_t sq_pbl_addr;
+	struct qed_chain r2tq;
+	struct qed_chain xhq;
+	struct qed_chain uhq;
+	u8 local_mac[6];
+	u8 remote_mac[6];
+	u8 ip_version;
+	u8 ka_max_probe_cnt;
+	u16 vlan_id;
+	u16 tcp_flags;
+	u32 remote_ip[4];
+	u32 local_ip[4];
+	u32 flow_label;
+	u32 ka_timeout;
+	u32 ka_interval;
+	u32 max_rt_time;
+	u8 ttl;
+	u8 tos_or_tc;
+	u16 remote_port;
+	u16 local_port;
+	u16 mss;
+	u8 rcv_wnd_scale;
+	u32 rcv_wnd;
+	u32 cwnd;
+	u8 update_flag;
+	u8 default_cq;
+	u8 abortive_dsconnect;
+	u32 max_seq_size;
+	u32 max_recv_pdu_length;
+	u32 max_send_pdu_length;
+	u32 first_seq_length;
+	u16 physical_q0;
+	u16 physical_q1;
+	u16 nvmetcp_cccid_max_range;
+	dma_addr_t nvmetcp_cccid_itid_table_addr;
+};
+
 #if IS_ENABLED(CONFIG_QED_NVMETCP)
 int qed_nvmetcp_alloc(struct qed_hwfn *p_hwfn);
 void qed_nvmetcp_setup(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 525159e747a5..60ff3222bf55 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -101,6 +101,9 @@ union ramrod_data {
 	struct iscsi_spe_conn_termination iscsi_conn_terminate;
 
 	struct nvmetcp_init_ramrod_params nvmetcp_init;
+	struct nvmetcp_spe_conn_offload nvmetcp_conn_offload;
+	struct nvmetcp_conn_update_ramrod_params nvmetcp_conn_update;
+	struct nvmetcp_spe_conn_termination nvmetcp_conn_terminate;
 
 	struct vf_start_ramrod_data vf_start;
 	struct vf_stop_ramrod_data vf_stop;
diff --git a/include/linux/qed/nvmetcp_common.h b/include/linux/qed/nvmetcp_common.h
index e9ccfc07041d..c8836b71b866 100644
--- a/include/linux/qed/nvmetcp_common.h
+++ b/include/linux/qed/nvmetcp_common.h
@@ -6,6 +6,8 @@
 
 #include "tcp_common.h"
 
+#define NVMETCP_SLOW_PATH_LAYER_CODE (6)
+
 /* NVMeTCP firmware function init parameters */
 struct nvmetcp_spe_func_init {
 	__le16 half_way_close_timeout;
@@ -43,6 +45,10 @@ enum nvmetcp_ramrod_cmd_id {
 	NVMETCP_RAMROD_CMD_ID_UNUSED = 0,
 	NVMETCP_RAMROD_CMD_ID_INIT_FUNC = 1,
 	NVMETCP_RAMROD_CMD_ID_DESTROY_FUNC = 2,
+	NVMETCP_RAMROD_CMD_ID_OFFLOAD_CONN = 3,
+	NVMETCP_RAMROD_CMD_ID_UPDATE_CONN = 4,
+	NVMETCP_RAMROD_CMD_ID_TERMINATION_CONN = 5,
+	NVMETCP_RAMROD_CMD_ID_CLEAR_SQ = 6,
 	MAX_NVMETCP_RAMROD_CMD_ID
 };
 
@@ -51,4 +57,141 @@ struct nvmetcp_glbl_queue_entry {
 	struct regpair reserved;
 };
 
+/* NVMeTCP conn level EQEs */
+enum nvmetcp_eqe_opcode {
+	NVMETCP_EVENT_TYPE_INIT_FUNC = 0, /* Response after init Ramrod */
+	NVMETCP_EVENT_TYPE_DESTROY_FUNC, /* Response after destroy Ramrod */
+	NVMETCP_EVENT_TYPE_OFFLOAD_CONN,/* Response after option 2 offload Ramrod */
+	NVMETCP_EVENT_TYPE_UPDATE_CONN, /* Response after update Ramrod */
+	NVMETCP_EVENT_TYPE_CLEAR_SQ, /* Response after clear sq Ramrod */
+	NVMETCP_EVENT_TYPE_TERMINATE_CONN, /* Response after termination Ramrod */
+	NVMETCP_EVENT_TYPE_RESERVED0,
+	NVMETCP_EVENT_TYPE_RESERVED1,
+	NVMETCP_EVENT_TYPE_ASYN_CONNECT_COMPLETE, /* Connect completed (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_ASYN_TERMINATE_DONE, /* Termination completed (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_START_OF_ERROR_TYPES = 10, /* Separate EQs from err EQs */
+	NVMETCP_EVENT_TYPE_ASYN_ABORT_RCVD, /* TCP RST packet receive (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_ASYN_CLOSE_RCVD, /* TCP FIN packet receive (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_ASYN_SYN_RCVD, /* TCP SYN+ACK packet receive (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_ASYN_MAX_RT_TIME, /* TCP max retransmit time (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_ASYN_MAX_RT_CNT, /* TCP max retransmit count (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_ASYN_MAX_KA_PROBES_CNT, /* TCP ka probes count (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_ASYN_FIN_WAIT2, /* TCP fin wait 2 (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_NVMETCP_CONN_ERROR, /* NVMeTCP error response (A-syn EQE) */
+	NVMETCP_EVENT_TYPE_TCP_CONN_ERROR, /* NVMeTCP error - tcp error (A-syn EQE) */
+	MAX_NVMETCP_EQE_OPCODE
+};
+
+struct nvmetcp_conn_offload_section {
+	struct regpair cccid_itid_table_addr; /* CCCID to iTID table address */
+	__le16 cccid_max_range; /* CCCID max value - used for validation */
+	__le16 reserved[3];
+};
+
+/* NVMe TCP connection offload params passed by driver to FW in NVMeTCP offload ramrod */
+struct nvmetcp_conn_offload_params {
+	struct regpair sq_pbl_addr;
+	struct regpair r2tq_pbl_addr;
+	struct regpair xhq_pbl_addr;
+	struct regpair uhq_pbl_addr;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	u8 flags;
+#define NVMETCP_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_MASK 0x1
+#define NVMETCP_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_SHIFT 0
+#define NVMETCP_CONN_OFFLOAD_PARAMS_TARGET_MODE_MASK 0x1
+#define NVMETCP_CONN_OFFLOAD_PARAMS_TARGET_MODE_SHIFT 1
+#define NVMETCP_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_MASK 0x1
+#define NVMETCP_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_SHIFT 2
+#define NVMETCP_CONN_OFFLOAD_PARAMS_NVMETCP_MODE_MASK 0x1
+#define NVMETCP_CONN_OFFLOAD_PARAMS_NVMETCP_MODE_SHIFT 3
+#define NVMETCP_CONN_OFFLOAD_PARAMS_RESERVED1_MASK 0xF
+#define NVMETCP_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT 4
+	u8 default_cq;
+	__le16 reserved0;
+	__le32 reserved1;
+	__le32 initial_ack;
+
+	struct nvmetcp_conn_offload_section nvmetcp; /* NVMe/TCP section */
+};
+
+/* NVMe TCP and TCP connection offload params passed by driver to FW in NVMeTCP offload ramrod. */
+struct nvmetcp_spe_conn_offload {
+	__le16 reserved;
+	__le16 conn_id;
+	__le32 fw_cid;
+	struct nvmetcp_conn_offload_params nvmetcp;
+	struct tcp_offload_params_opt2 tcp;
+};
+
+/* NVMeTCP connection update params passed by driver to FW in NVMETCP update ramrod. */
+struct nvmetcp_conn_update_ramrod_params {
+	__le16 reserved0;
+	__le16 conn_id;
+	__le32 reserved1;
+	u8 flags;
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_HD_EN_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_HD_EN_SHIFT 0
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_DD_EN_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_DD_EN_SHIFT 1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED0_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED0_SHIFT 2
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_DATA_SHIFT 3
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED2_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED2_SHIFT 4
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED3_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED3_SHIFT 5
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED4_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED4_SHIFT 6
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED5_MASK 0x1
+#define NVMETCP_CONN_UPDATE_RAMROD_PARAMS_RESERVED5_SHIFT 7
+	u8 reserved3[3];
+	__le32 max_seq_size;
+	__le32 max_send_pdu_length;
+	__le32 max_recv_pdu_length;
+	__le32 first_seq_length;
+	__le32 reserved4[5];
+};
+
+/* NVMeTCP connection termination request */
+struct nvmetcp_spe_conn_termination {
+	__le16 reserved0;
+	__le16 conn_id;
+	__le32 reserved1;
+	u8 abortive;
+	u8 reserved2[7];
+	struct regpair reserved3;
+	struct regpair reserved4;
+};
+
+struct nvmetcp_dif_flags {
+	u8 flags;
+};
+
+enum nvmetcp_wqe_type {
+	NVMETCP_WQE_TYPE_NORMAL,
+	NVMETCP_WQE_TYPE_TASK_CLEANUP,
+	NVMETCP_WQE_TYPE_MIDDLE_PATH,
+	NVMETCP_WQE_TYPE_IC,
+	MAX_NVMETCP_WQE_TYPE
+};
+
+struct nvmetcp_wqe {
+	__le16 task_id;
+	u8 flags;
+#define NVMETCP_WQE_WQE_TYPE_MASK 0x7 /* [use nvmetcp_wqe_type] */
+#define NVMETCP_WQE_WQE_TYPE_SHIFT 0
+#define NVMETCP_WQE_NUM_SGES_MASK 0xF
+#define NVMETCP_WQE_NUM_SGES_SHIFT 3
+#define NVMETCP_WQE_RESPONSE_MASK 0x1
+#define NVMETCP_WQE_RESPONSE_SHIFT 7
+	struct nvmetcp_dif_flags prot_flags;
+	__le32 contlen_cdbsize;
+#define NVMETCP_WQE_CONT_LEN_MASK 0xFFFFFF
+#define NVMETCP_WQE_CONT_LEN_SHIFT 0
+#define NVMETCP_WQE_CDB_SIZE_OR_NVMETCP_CMD_MASK 0xFF
+#define NVMETCP_WQE_CDB_SIZE_OR_NVMETCP_CMD_SHIFT 24
+};
+
 #endif /* __NVMETCP_COMMON__ */
diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h
index 76868bdf0883..5baf1c5ce798 100644
--- a/include/linux/qed/qed_nvmetcp_if.h
+++ b/include/linux/qed/qed_nvmetcp_if.h
@@ -24,6 +24,50 @@ struct qed_nvmetcp_tid {
 	u8 *blocks[MAX_TID_BLOCKS_NVMETCP];
 };
 
+struct qed_nvmetcp_id_params {
+	u8 mac[ETH_ALEN];
+	u32 ip[4];
+	u16 port;
+};
+
+struct qed_nvmetcp_params_offload {
+	/* FW initializations */
+	dma_addr_t sq_pbl_addr;
+	dma_addr_t nvmetcp_cccid_itid_table_addr;
+	u16 nvmetcp_cccid_max_range;
+	u8 default_cq;
+
+	/* Networking and TCP stack initializations */
+	struct qed_nvmetcp_id_params src;
+	struct qed_nvmetcp_id_params dst;
+	u32 ka_timeout;
+	u32 ka_interval;
+	u32 max_rt_time;
+	u32 cwnd;
+	u16 mss;
+	u16 vlan_id;
+	bool timestamp_en;
+	bool delayed_ack_en;
+	bool tcp_keep_alive_en;
+	bool ecn_en;
+	u8 ip_version;
+	u8 ka_max_probe_cnt;
+	u8 ttl;
+	u8 tos_or_tc;
+	u8 rcv_wnd_scale;
+};
+
+struct qed_nvmetcp_params_update {
+	u32 max_io_size;
+	u32 max_recv_pdu_length;
+	u32 max_send_pdu_length;
+
+	/* Placeholder: pfv, cpda, hpda */
+
+	bool hdr_digest_en;
+	bool data_digest_en;
+};
+
 struct qed_nvmetcp_cb_ops {
 	struct qed_common_cb_ops common;
 };
@@ -47,6 +91,38 @@ struct qed_nvmetcp_cb_ops {
  * @stop:		nvmetcp in FW
  *			@param cdev
  *			return 0 on success, otherwise error value.
+ * @acquire_conn:	acquire a new nvmetcp connection
+ *			@param cdev
+ *			@param handle - qed will fill handle that should be
+ *				used henceforth as identifier of the
+ *				connection.
+ *			@param p_doorbell - qed will fill the address of the
+ *				doorbell.
+ *			@return 0 on sucesss, otherwise error value.
+ * @release_conn:	release a previously acquired nvmetcp connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@return 0 on success, otherwise error value.
+ * @offload_conn:	configures an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@param conn_info - the configuration to use for the
+ *				offload.
+ *			@return 0 on success, otherwise error value.
+ * @update_conn:	updates an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@param conn_info - the configuration to use for the
+ *				offload.
+ *			@return 0 on success, otherwise error value.
+ * @destroy_conn:	stops an offloaded connection
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@return 0 on success, otherwise error value.
+ * @clear_sq:		clear all task in sq
+ *			@param cdev
+ *			@param handle - the connection handle.
+ *			@return 0 on success, otherwise error value.
  */
 struct qed_nvmetcp_ops {
 	const struct qed_common_ops *common;
@@ -64,6 +140,24 @@ struct qed_nvmetcp_ops {
 		     void *event_context, nvmetcp_event_cb_t async_event_cb);
 
 	int (*stop)(struct qed_dev *cdev);
+
+	int (*acquire_conn)(struct qed_dev *cdev,
+			    u32 *handle,
+			    u32 *fw_cid, void __iomem **p_doorbell);
+
+	int (*release_conn)(struct qed_dev *cdev, u32 handle);
+
+	int (*offload_conn)(struct qed_dev *cdev,
+			    u32 handle,
+			    struct qed_nvmetcp_params_offload *conn_info);
+
+	int (*update_conn)(struct qed_dev *cdev,
+			   u32 handle,
+			   struct qed_nvmetcp_params_update *conn_info);
+
+	int (*destroy_conn)(struct qed_dev *cdev, u32 handle, u8 abrt_conn);
+
+	int (*clear_sq)(struct qed_dev *cdev, u32 handle);
 };
 
 const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void);

From 203d136e8958a7c65834601f669bdd0fcaa6fcbd Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Wed, 2 Jun 2021 20:16:52 +0300
Subject: [PATCH 0940/3292] qed: Add support of HW filter block

This patch introduces the functionality of HW filter block.
It adds and removes filters based on source and target TCP port.

It also add functionality to clear all filters at once.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h         |  8 ++
 drivers/net/ethernet/qlogic/qed/qed_dev.c     | 90 +++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c |  5 ++
 include/linux/qed/qed_nvmetcp_if.h            | 24 +++++
 4 files changed, 127 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index bc9bdb9d1bb9..b590c70539b5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -49,6 +49,8 @@ extern const struct qed_common_ops qed_common_ops_pass;
 #define QED_MIN_WIDS		(4)
 #define QED_PF_DEMS_SIZE        (4)
 
+#define QED_LLH_DONT_CARE 0
+
 /* cau states */
 enum qed_coalescing_mode {
 	QED_COAL_MODE_DISABLE,
@@ -1005,4 +1007,10 @@ int qed_mfw_fill_tlv_data(struct qed_hwfn *hwfn,
 void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc);
 
 void qed_periodic_db_rec_start(struct qed_hwfn *p_hwfn);
+
+int qed_llh_add_src_tcp_port_filter(struct qed_dev *cdev, u16 src_port);
+int qed_llh_add_dst_tcp_port_filter(struct qed_dev *cdev, u16 dest_port);
+void qed_llh_remove_src_tcp_port_filter(struct qed_dev *cdev, u16 src_port);
+void qed_llh_remove_dst_tcp_port_filter(struct qed_dev *cdev, u16 src_port);
+void qed_llh_clear_all_filters(struct qed_dev *cdev);
 #endif /* _QED_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 932b892f1ef1..0410c3604abd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -5362,3 +5362,93 @@ void qed_set_fw_mac_addr(__le16 *fw_msb,
 	((u8 *)fw_lsb)[0] = mac[5];
 	((u8 *)fw_lsb)[1] = mac[4];
 }
+
+static int qed_llh_shadow_remove_all_filters(struct qed_dev *cdev, u8 ppfid)
+{
+	struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+	struct qed_llh_filter_info *p_filters;
+	int rc;
+
+	rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "remove_all");
+	if (rc)
+		return rc;
+
+	p_filters = p_llh_info->pp_filters[ppfid];
+	memset(p_filters, 0, NIG_REG_LLH_FUNC_FILTER_EN_SIZE *
+	       sizeof(*p_filters));
+
+	return 0;
+}
+
+static void qed_llh_clear_ppfid_filters(struct qed_dev *cdev, u8 ppfid)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u8 filter_idx, abs_ppfid;
+	int rc = 0;
+
+	if (!p_ptt)
+		return;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits) &&
+	    !test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		goto out;
+
+	rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+	if (rc)
+		goto out;
+
+	rc = qed_llh_shadow_remove_all_filters(cdev, ppfid);
+	if (rc)
+		goto out;
+
+	for (filter_idx = 0; filter_idx < NIG_REG_LLH_FUNC_FILTER_EN_SIZE;
+	     filter_idx++) {
+		rc = qed_llh_remove_filter(p_hwfn, p_ptt,
+					   abs_ppfid, filter_idx);
+		if (rc)
+			goto out;
+	}
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+}
+
+int qed_llh_add_src_tcp_port_filter(struct qed_dev *cdev, u16 src_port)
+{
+	return qed_llh_add_protocol_filter(cdev, 0,
+					   QED_LLH_FILTER_TCP_SRC_PORT,
+					   src_port, QED_LLH_DONT_CARE);
+}
+
+void qed_llh_remove_src_tcp_port_filter(struct qed_dev *cdev, u16 src_port)
+{
+	qed_llh_remove_protocol_filter(cdev, 0,
+				       QED_LLH_FILTER_TCP_SRC_PORT,
+				       src_port, QED_LLH_DONT_CARE);
+}
+
+int qed_llh_add_dst_tcp_port_filter(struct qed_dev *cdev, u16 dest_port)
+{
+	return qed_llh_add_protocol_filter(cdev, 0,
+					   QED_LLH_FILTER_TCP_DEST_PORT,
+					   QED_LLH_DONT_CARE, dest_port);
+}
+
+void qed_llh_remove_dst_tcp_port_filter(struct qed_dev *cdev, u16 dest_port)
+{
+	qed_llh_remove_protocol_filter(cdev, 0,
+				       QED_LLH_FILTER_TCP_DEST_PORT,
+				       QED_LLH_DONT_CARE, dest_port);
+}
+
+void qed_llh_clear_all_filters(struct qed_dev *cdev)
+{
+	u8 ppfid;
+
+	if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits) &&
+	    !test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+		return;
+
+	for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++)
+		qed_llh_clear_ppfid_filters(cdev, ppfid);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
index 7943804e88cd..d4d609a4d3a3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
@@ -805,6 +805,11 @@ static const struct qed_nvmetcp_ops qed_nvmetcp_ops_pass = {
 	.update_conn = &qed_nvmetcp_update_conn,
 	.destroy_conn = &qed_nvmetcp_destroy_conn,
 	.clear_sq = &qed_nvmetcp_clear_conn_sq,
+	.add_src_tcp_port_filter = &qed_llh_add_src_tcp_port_filter,
+	.remove_src_tcp_port_filter = &qed_llh_remove_src_tcp_port_filter,
+	.add_dst_tcp_port_filter = &qed_llh_add_dst_tcp_port_filter,
+	.remove_dst_tcp_port_filter = &qed_llh_remove_dst_tcp_port_filter,
+	.clear_all_filters = &qed_llh_clear_all_filters
 };
 
 const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void)
diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h
index 5baf1c5ce798..5180edad24e5 100644
--- a/include/linux/qed/qed_nvmetcp_if.h
+++ b/include/linux/qed/qed_nvmetcp_if.h
@@ -123,6 +123,20 @@ struct qed_nvmetcp_cb_ops {
  *			@param cdev
  *			@param handle - the connection handle.
  *			@return 0 on success, otherwise error value.
+ * @add_src_tcp_port_filter: Add source tcp port filter
+ *			@param cdev
+ *			@param src_port
+ * @remove_src_tcp_port_filter: Remove source tcp port filter
+ *			@param cdev
+ *			@param src_port
+ * @add_dst_tcp_port_filter: Add destination tcp port filter
+ *			@param cdev
+ *			@param dest_port
+ * @remove_dst_tcp_port_filter: Remove destination tcp port filter
+ *			@param cdev
+ *			@param dest_port
+ * @clear_all_filters: Clear all filters.
+ *			@param cdev
  */
 struct qed_nvmetcp_ops {
 	const struct qed_common_ops *common;
@@ -158,6 +172,16 @@ struct qed_nvmetcp_ops {
 	int (*destroy_conn)(struct qed_dev *cdev, u32 handle, u8 abrt_conn);
 
 	int (*clear_sq)(struct qed_dev *cdev, u32 handle);
+
+	int (*add_src_tcp_port_filter)(struct qed_dev *cdev, u16 src_port);
+
+	void (*remove_src_tcp_port_filter)(struct qed_dev *cdev, u16 src_port);
+
+	int (*add_dst_tcp_port_filter)(struct qed_dev *cdev, u16 dest_port);
+
+	void (*remove_dst_tcp_port_filter)(struct qed_dev *cdev, u16 dest_port);
+
+	void (*clear_all_filters)(struct qed_dev *cdev);
 };
 
 const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void);

From ab47bdfd2e2e9670172a737d12ebfc94bf9d299d Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Wed, 2 Jun 2021 20:16:53 +0300
Subject: [PATCH 0941/3292] qed: Add NVMeTCP Offload IO Level FW and HW HSI

This patch introduces the NVMeTCP Offload FW and HW  HSI in order
to initialize the IO level configuration into a per IO HW
resource ("task") as part of the IO path flow.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/nvmetcp_common.h | 335 ++++++++++++++++++++++++++++-
 include/linux/qed/qed_nvmetcp_if.h |  31 +++
 2 files changed, 365 insertions(+), 1 deletion(-)

diff --git a/include/linux/qed/nvmetcp_common.h b/include/linux/qed/nvmetcp_common.h
index c8836b71b866..ad745a9c2264 100644
--- a/include/linux/qed/nvmetcp_common.h
+++ b/include/linux/qed/nvmetcp_common.h
@@ -7,6 +7,7 @@
 #include "tcp_common.h"
 
 #define NVMETCP_SLOW_PATH_LAYER_CODE (6)
+#define NVMETCP_WQE_NUM_SGES_SLOWIO (0xf)
 
 /* NVMeTCP firmware function init parameters */
 struct nvmetcp_spe_func_init {
@@ -194,4 +195,336 @@ struct nvmetcp_wqe {
 #define NVMETCP_WQE_CDB_SIZE_OR_NVMETCP_CMD_SHIFT 24
 };
 
-#endif /* __NVMETCP_COMMON__ */
+struct nvmetcp_host_cccid_itid_entry {
+	__le16 itid;
+};
+
+struct nvmetcp_connect_done_results {
+	__le16 icid;
+	__le16 conn_id;
+	struct tcp_ulp_connect_done_params params;
+};
+
+struct nvmetcp_eqe_data {
+	__le16 icid;
+	__le16 conn_id;
+	__le16 reserved;
+	u8 error_code;
+	u8 error_pdu_opcode_reserved;
+#define NVMETCP_EQE_DATA_ERROR_PDU_OPCODE_MASK 0x3F
+#define NVMETCP_EQE_DATA_ERROR_PDU_OPCODE_SHIFT  0
+#define NVMETCP_EQE_DATA_ERROR_PDU_OPCODE_VALID_MASK  0x1
+#define NVMETCP_EQE_DATA_ERROR_PDU_OPCODE_VALID_SHIFT  6
+#define NVMETCP_EQE_DATA_RESERVED0_MASK 0x1
+#define NVMETCP_EQE_DATA_RESERVED0_SHIFT 7
+};
+
+enum nvmetcp_task_type {
+	NVMETCP_TASK_TYPE_HOST_WRITE,
+	NVMETCP_TASK_TYPE_HOST_READ,
+	NVMETCP_TASK_TYPE_INIT_CONN_REQUEST,
+	NVMETCP_TASK_TYPE_RESERVED0,
+	NVMETCP_TASK_TYPE_CLEANUP,
+	NVMETCP_TASK_TYPE_HOST_READ_NO_CQE,
+	MAX_NVMETCP_TASK_TYPE
+};
+
+struct nvmetcp_db_data {
+	u8 params;
+#define NVMETCP_DB_DATA_DEST_MASK 0x3 /* destination of doorbell (use enum db_dest) */
+#define NVMETCP_DB_DATA_DEST_SHIFT 0
+#define NVMETCP_DB_DATA_AGG_CMD_MASK 0x3 /* aggregative command to CM (use enum db_agg_cmd_sel) */
+#define NVMETCP_DB_DATA_AGG_CMD_SHIFT 2
+#define NVMETCP_DB_DATA_BYPASS_EN_MASK 0x1 /* enable QM bypass */
+#define NVMETCP_DB_DATA_BYPASS_EN_SHIFT 4
+#define NVMETCP_DB_DATA_RESERVED_MASK 0x1
+#define NVMETCP_DB_DATA_RESERVED_SHIFT 5
+#define NVMETCP_DB_DATA_AGG_VAL_SEL_MASK 0x3 /* aggregative value selection */
+#define NVMETCP_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	u8 agg_flags; /* bit for every DQ counter flags in CM context that DQ can increment */
+	__le16 sq_prod;
+};
+
+struct nvmetcp_fw_nvmf_cqe {
+	__le32 reserved[4];
+};
+
+struct nvmetcp_icresp_mdata {
+	u8  digest;
+	u8  cpda;
+	__le16  pfv;
+	__le32 maxdata;
+	__le16 rsvd[4];
+};
+
+union nvmetcp_fw_cqe_data {
+	struct nvmetcp_fw_nvmf_cqe nvme_cqe;
+	struct nvmetcp_icresp_mdata icresp_mdata;
+};
+
+struct nvmetcp_fw_cqe {
+	__le16 conn_id;
+	u8 cqe_type;
+	u8 cqe_error_status_bits;
+#define CQE_ERROR_BITMAP_DIF_ERR_BITS_MASK 0x7
+#define CQE_ERROR_BITMAP_DIF_ERR_BITS_SHIFT 0
+#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_MASK 0x1
+#define CQE_ERROR_BITMAP_DATA_DIGEST_ERR_SHIFT 3
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_MASK 0x1
+#define CQE_ERROR_BITMAP_RCV_ON_INVALID_CONN_SHIFT 4
+	__le16 itid;
+	u8 task_type;
+	u8 fw_dbg_field;
+	u8 caused_conn_err;
+	u8 reserved0[3];
+	__le32 reserved1;
+	union nvmetcp_fw_cqe_data cqe_data;
+	struct regpair task_opaque;
+	__le32 reserved[6];
+};
+
+enum nvmetcp_fw_cqes_type {
+	NVMETCP_FW_CQE_TYPE_NORMAL = 1,
+	NVMETCP_FW_CQE_TYPE_RESERVED0,
+	NVMETCP_FW_CQE_TYPE_RESERVED1,
+	NVMETCP_FW_CQE_TYPE_CLEANUP,
+	NVMETCP_FW_CQE_TYPE_DUMMY,
+	MAX_NVMETCP_FW_CQES_TYPE
+};
+
+struct ystorm_nvmetcp_task_state {
+	struct scsi_cached_sges data_desc;
+	struct scsi_sgl_params sgl_params;
+	__le32 resrved0;
+	__le32 buffer_offset;
+	__le16 cccid;
+	struct nvmetcp_dif_flags dif_flags;
+	u8 flags;
+#define YSTORM_NVMETCP_TASK_STATE_LOCAL_COMP_MASK 0x1
+#define YSTORM_NVMETCP_TASK_STATE_LOCAL_COMP_SHIFT 0
+#define YSTORM_NVMETCP_TASK_STATE_SLOW_IO_MASK 0x1
+#define YSTORM_NVMETCP_TASK_STATE_SLOW_IO_SHIFT 1
+#define YSTORM_NVMETCP_TASK_STATE_SET_DIF_OFFSET_MASK 0x1
+#define YSTORM_NVMETCP_TASK_STATE_SET_DIF_OFFSET_SHIFT 2
+#define YSTORM_NVMETCP_TASK_STATE_SEND_W_RSP_MASK 0x1
+#define YSTORM_NVMETCP_TASK_STATE_SEND_W_RSP_SHIFT 3
+};
+
+struct ystorm_nvmetcp_task_rxmit_opt {
+	__le32 reserved[4];
+};
+
+struct nvmetcp_task_hdr {
+	__le32 reg[18];
+};
+
+struct nvmetcp_task_hdr_aligned {
+	struct nvmetcp_task_hdr task_hdr;
+	__le32 reserved[2];	/* HSI_COMMENT: Align to QREG */
+};
+
+struct e5_tdif_task_context {
+	__le32 reserved[16];
+};
+
+struct e5_rdif_task_context {
+	__le32 reserved[12];
+};
+
+struct ystorm_nvmetcp_task_st_ctx {
+	struct ystorm_nvmetcp_task_state state;
+	struct ystorm_nvmetcp_task_rxmit_opt rxmit_opt;
+	struct nvmetcp_task_hdr_aligned pdu_hdr;
+};
+
+struct mstorm_nvmetcp_task_st_ctx {
+	struct scsi_cached_sges data_desc;
+	struct scsi_sgl_params sgl_params;
+	__le32 rem_task_size;
+	__le32 data_buffer_offset;
+	u8 task_type;
+	struct nvmetcp_dif_flags dif_flags;
+	__le16 dif_task_icid;
+	struct regpair reserved0;
+	__le32 expected_itt;
+	__le32 reserved1;
+};
+
+struct ustorm_nvmetcp_task_st_ctx {
+	__le32 rem_rcv_len;
+	__le32 exp_data_transfer_len;
+	__le32 exp_data_sn;
+	struct regpair reserved0;
+	__le32 reg1_map;
+#define REG1_NUM_SGES_MASK 0xF
+#define REG1_NUM_SGES_SHIFT 0
+#define REG1_RESERVED1_MASK 0xFFFFFFF
+#define REG1_RESERVED1_SHIFT 4
+	u8 flags2;
+#define USTORM_NVMETCP_TASK_ST_CTX_AHS_EXIST_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_AHS_EXIST_SHIFT 0
+#define USTORM_NVMETCP_TASK_ST_CTX_RESERVED1_MASK 0x7F
+#define USTORM_NVMETCP_TASK_ST_CTX_RESERVED1_SHIFT 1
+	struct nvmetcp_dif_flags dif_flags;
+	__le16 reserved3;
+	__le16 tqe_opaque[2];
+	__le32 reserved5;
+	__le32 nvme_tcp_opaque_lo;
+	__le32 nvme_tcp_opaque_hi;
+	u8 task_type;
+	u8 error_flags;
+#define USTORM_NVMETCP_TASK_ST_CTX_DATA_DIGEST_ERROR_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_DATA_DIGEST_ERROR_SHIFT 0
+#define USTORM_NVMETCP_TASK_ST_CTX_DATA_TRUNCATED_ERROR_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_DATA_TRUNCATED_ERROR_SHIFT 1
+#define USTORM_NVMETCP_TASK_ST_CTX_UNDER_RUN_ERROR_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_UNDER_RUN_ERROR_SHIFT 2
+#define USTORM_NVMETCP_TASK_ST_CTX_NVME_TCP_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_NVME_TCP_SHIFT 3
+	u8 flags;
+#define USTORM_NVMETCP_TASK_ST_CTX_CQE_WRITE_MASK 0x3
+#define USTORM_NVMETCP_TASK_ST_CTX_CQE_WRITE_SHIFT 0
+#define USTORM_NVMETCP_TASK_ST_CTX_LOCAL_COMP_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_LOCAL_COMP_SHIFT 2
+#define USTORM_NVMETCP_TASK_ST_CTX_Q0_R2TQE_WRITE_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_Q0_R2TQE_WRITE_SHIFT 3
+#define USTORM_NVMETCP_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_SHIFT 4
+#define USTORM_NVMETCP_TASK_ST_CTX_HQ_SCANNED_DONE_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_HQ_SCANNED_DONE_SHIFT 5
+#define USTORM_NVMETCP_TASK_ST_CTX_R2T2RECV_DONE_MASK 0x1
+#define USTORM_NVMETCP_TASK_ST_CTX_R2T2RECV_DONE_SHIFT 6
+	u8 cq_rss_number;
+};
+
+struct e5_ystorm_nvmetcp_task_ag_ctx {
+	u8 reserved /* cdu_validation */;
+	u8 byte1 /* state_and_core_id */;
+	__le16 word0 /* icid */;
+	u8 flags0;
+	u8 flags1;
+	u8 flags2;
+	u8 flags3;
+	__le32 TTT;
+	u8 byte2;
+	u8 byte3;
+	u8 byte4;
+	u8 e4_reserved7;
+};
+
+struct e5_mstorm_nvmetcp_task_ag_ctx {
+	u8 cdu_validation;
+	u8 byte1;
+	__le16 task_cid;
+	u8 flags0;
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_VALID_MASK 0x1
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_VALID_SHIFT 6
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK 0x1
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT 7
+	u8 flags1;
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_TASK_CLEANUP_CF_MASK 0x3
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT 0
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CF1_MASK 0x3
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CF1_SHIFT 2
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CF2_MASK 0x3
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CF2_SHIFT 4
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK 0x1
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT 6
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CF1EN_MASK 0x1
+#define E5_MSTORM_NVMETCP_TASK_AG_CTX_CF1EN_SHIFT 7
+	u8 flags2;
+	u8 flags3;
+	__le32 reg0;
+	u8 byte2;
+	u8 byte3;
+	u8 byte4;
+	u8 e4_reserved7;
+};
+
+struct e5_ustorm_nvmetcp_task_ag_ctx {
+	u8 reserved;
+	u8 state_and_core_id;
+	__le16 icid;
+	u8 flags0;
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT 5
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_HQ_SCANNED_CF_MASK 0x3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT 6
+	u8 flags1;
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED1_MASK 0x3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED1_SHIFT 0
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_R2T2RECV_MASK 0x3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_R2T2RECV_SHIFT 2
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CF3_MASK 0x3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CF3_SHIFT 4
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_CF_MASK 0x3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_CF_SHIFT 6
+	u8 flags2;
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT 0
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT 1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_R2T2RECV_EN_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_R2T2RECV_EN_SHIFT 2
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CF3EN_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CF3EN_SHIFT 3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT 5
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RULE1EN_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RULE1EN_SHIFT 6
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT 7
+	u8 flags3;
+	u8 flags4;
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED5_MASK 0x3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED5_SHIFT 0
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED6_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED6_SHIFT 2
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED7_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED7_SHIFT 3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4
+	u8 byte2;
+	u8 byte3;
+	u8 e4_reserved8;
+	__le32 dif_err_intervals;
+	__le32 dif_error_1st_interval;
+	__le32 rcv_cont_len;
+	__le32 exp_cont_len;
+	__le32 total_data_acked;
+	__le32 exp_data_acked;
+	__le16 word1;
+	__le16 next_tid;
+	__le32 hdr_residual_count;
+	__le32 exp_r2t_sn;
+};
+
+struct e5_nvmetcp_task_context {
+	struct ystorm_nvmetcp_task_st_ctx ystorm_st_context;
+	struct e5_ystorm_nvmetcp_task_ag_ctx ystorm_ag_context;
+	struct regpair ystorm_ag_padding[2];
+	struct e5_tdif_task_context tdif_context;
+	struct e5_mstorm_nvmetcp_task_ag_ctx mstorm_ag_context;
+	struct regpair mstorm_ag_padding[2];
+	struct e5_ustorm_nvmetcp_task_ag_ctx ustorm_ag_context;
+	struct regpair ustorm_ag_padding[2];
+	struct mstorm_nvmetcp_task_st_ctx mstorm_st_context;
+	struct regpair mstorm_st_padding[2];
+	struct ustorm_nvmetcp_task_st_ctx ustorm_st_context;
+	struct regpair ustorm_st_padding[2];
+	struct e5_rdif_task_context rdif_context;
+};
+
+#endif /* __NVMETCP_COMMON__*/
diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h
index 5180edad24e5..606427ebb63c 100644
--- a/include/linux/qed/qed_nvmetcp_if.h
+++ b/include/linux/qed/qed_nvmetcp_if.h
@@ -5,6 +5,8 @@
 #define _QED_NVMETCP_IF_H
 #include <linux/types.h>
 #include <linux/qed/qed_if.h>
+#include <linux/qed/storage_common.h>
+#include <linux/qed/nvmetcp_common.h>
 
 #define QED_NVMETCP_MAX_IO_SIZE	0x800000
 
@@ -72,6 +74,35 @@ struct qed_nvmetcp_cb_ops {
 	struct qed_common_cb_ops common;
 };
 
+struct nvmetcp_sge {
+	struct regpair sge_addr; /* SGE address */
+	__le32 sge_len; /* SGE length */
+	__le32 reserved;
+};
+
+/* IO path HSI function SGL params */
+struct storage_sgl_task_params {
+	struct nvmetcp_sge *sgl;
+	struct regpair sgl_phys_addr;
+	u32 total_buffer_size;
+	u16 num_sges;
+	bool small_mid_sge;
+};
+
+/* IO path HSI function FW task context params */
+struct nvmetcp_task_params {
+	void *context; /* Output parameter - set/filled by the HSI function */
+	struct nvmetcp_wqe *sqe;
+	u32 tx_io_size; /* in bytes (Without DIF, if exists) */
+	u32 rx_io_size; /* in bytes (Without DIF, if exists) */
+	u16 conn_icid;
+	u16 itid;
+	struct regpair opq; /* qedn_task_ctx address */
+	u16 host_cccid;
+	u8 cq_rss_number;
+	bool send_write_incapsule;
+};
+
 /**
  * struct qed_nvmetcp_ops - qed NVMeTCP operations.
  * @common:		common operations pointer

From 826da4861430898495fa49f072335e795e8adfd3 Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Wed, 2 Jun 2021 20:16:54 +0300
Subject: [PATCH 0942/3292] qed: Add NVMeTCP Offload IO Level FW
 Initializations

This patch introduces the NVMeTCP FW initializations which is used
to initialize the IO level configuration into a per IO HW
resource ("task") as part of the IO path flow.

This includes:
- Write IO FW initialization
- Read IO FW initialization.
- IC-Req and IC-Resp FW exchange.
- FW Cleanup flow (Flush IO).

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/Makefile      |   5 +-
 drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c |   7 +-
 .../qlogic/qed/qed_nvmetcp_fw_funcs.c         | 376 ++++++++++++++++++
 .../qlogic/qed/qed_nvmetcp_fw_funcs.h         |  40 ++
 include/linux/qed/nvmetcp_common.h            |   1 +
 include/linux/qed/qed_nvmetcp_if.h            |  20 +
 6 files changed, 447 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h

diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 7cb0db67ba5b..0d9c2fe0245d 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -28,7 +28,10 @@ qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
 qed-$(CONFIG_QED_OOO) += qed_ooo.o
 
-qed-$(CONFIG_QED_NVMETCP) += qed_nvmetcp.o
+qed-$(CONFIG_QED_NVMETCP) +=	\
+	qed_nvmetcp.o		\
+	qed_nvmetcp_fw_funcs.o	\
+	qed_nvmetcp_ip_services.o
 
 qed-$(CONFIG_QED_RDMA) +=	\
 	qed_iwarp.o		\
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
index d4d609a4d3a3..f19128c8d9cc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp.c
@@ -27,6 +27,7 @@
 #include "qed_mcp.h"
 #include "qed_sp.h"
 #include "qed_reg_addr.h"
+#include "qed_nvmetcp_fw_funcs.h"
 
 static int qed_nvmetcp_async_event(struct qed_hwfn *p_hwfn, u8 fw_event_code,
 				   u16 echo, union event_ring_data *data,
@@ -809,7 +810,11 @@ static const struct qed_nvmetcp_ops qed_nvmetcp_ops_pass = {
 	.remove_src_tcp_port_filter = &qed_llh_remove_src_tcp_port_filter,
 	.add_dst_tcp_port_filter = &qed_llh_add_dst_tcp_port_filter,
 	.remove_dst_tcp_port_filter = &qed_llh_remove_dst_tcp_port_filter,
-	.clear_all_filters = &qed_llh_clear_all_filters
+	.clear_all_filters = &qed_llh_clear_all_filters,
+	.init_read_io = &init_nvmetcp_host_read_task,
+	.init_write_io = &init_nvmetcp_host_write_task,
+	.init_icreq_exchange = &init_nvmetcp_init_conn_req_task,
+	.init_task_cleanup = &init_cleanup_task_nvmetcp
 };
 
 const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
new file mode 100644
index 000000000000..c1dd71d19f3f
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/* Copyright 2021 Marvell. All rights reserved. */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/qed/common_hsi.h>
+#include <linux/qed/storage_common.h>
+#include <linux/qed/nvmetcp_common.h>
+#include <linux/qed/qed_nvmetcp_if.h>
+#include "qed_nvmetcp_fw_funcs.h"
+
+#define NVMETCP_NUM_SGES_IN_CACHE 0x4
+
+bool nvmetcp_is_slow_sgl(u16 num_sges, bool small_mid_sge)
+{
+	return (num_sges > SCSI_NUM_SGES_SLOW_SGL_THR && small_mid_sge);
+}
+
+void init_scsi_sgl_context(struct scsi_sgl_params *ctx_sgl_params,
+			   struct scsi_cached_sges *ctx_data_desc,
+			   struct storage_sgl_task_params *sgl_params)
+{
+	u8 num_sges_to_init = (u8)(sgl_params->num_sges > NVMETCP_NUM_SGES_IN_CACHE ?
+				   NVMETCP_NUM_SGES_IN_CACHE : sgl_params->num_sges);
+	u8 sge_index;
+
+	/* sgl params */
+	ctx_sgl_params->sgl_addr.lo = cpu_to_le32(sgl_params->sgl_phys_addr.lo);
+	ctx_sgl_params->sgl_addr.hi = cpu_to_le32(sgl_params->sgl_phys_addr.hi);
+	ctx_sgl_params->sgl_total_length = cpu_to_le32(sgl_params->total_buffer_size);
+	ctx_sgl_params->sgl_num_sges = cpu_to_le16(sgl_params->num_sges);
+
+	for (sge_index = 0; sge_index < num_sges_to_init; sge_index++) {
+		ctx_data_desc->sge[sge_index].sge_addr.lo =
+			cpu_to_le32(sgl_params->sgl[sge_index].sge_addr.lo);
+		ctx_data_desc->sge[sge_index].sge_addr.hi =
+			cpu_to_le32(sgl_params->sgl[sge_index].sge_addr.hi);
+		ctx_data_desc->sge[sge_index].sge_len =
+			cpu_to_le32(sgl_params->sgl[sge_index].sge_len);
+	}
+}
+
+static inline u32 calc_rw_task_size(struct nvmetcp_task_params *task_params,
+				    enum nvmetcp_task_type task_type)
+{
+	u32 io_size;
+
+	if (task_type == NVMETCP_TASK_TYPE_HOST_WRITE)
+		io_size = task_params->tx_io_size;
+	else
+		io_size = task_params->rx_io_size;
+
+	if (unlikely(!io_size))
+		return 0;
+
+	return io_size;
+}
+
+static inline void init_sqe(struct nvmetcp_task_params *task_params,
+			    struct storage_sgl_task_params *sgl_task_params,
+			    enum nvmetcp_task_type task_type)
+{
+	if (!task_params->sqe)
+		return;
+
+	memset(task_params->sqe, 0, sizeof(*task_params->sqe));
+	task_params->sqe->task_id = cpu_to_le16(task_params->itid);
+
+	switch (task_type) {
+	case NVMETCP_TASK_TYPE_HOST_WRITE: {
+		u32 buf_size = 0;
+		u32 num_sges = 0;
+
+		SET_FIELD(task_params->sqe->contlen_cdbsize,
+			  NVMETCP_WQE_CDB_SIZE_OR_NVMETCP_CMD, 1);
+		SET_FIELD(task_params->sqe->flags, NVMETCP_WQE_WQE_TYPE,
+			  NVMETCP_WQE_TYPE_NORMAL);
+		if (task_params->tx_io_size) {
+			if (task_params->send_write_incapsule)
+				buf_size = calc_rw_task_size(task_params, task_type);
+
+			if (nvmetcp_is_slow_sgl(sgl_task_params->num_sges,
+						sgl_task_params->small_mid_sge))
+				num_sges = NVMETCP_WQE_NUM_SGES_SLOWIO;
+			else
+				num_sges = min((u16)sgl_task_params->num_sges,
+					       (u16)SCSI_NUM_SGES_SLOW_SGL_THR);
+		}
+		SET_FIELD(task_params->sqe->flags, NVMETCP_WQE_NUM_SGES, num_sges);
+		SET_FIELD(task_params->sqe->contlen_cdbsize, NVMETCP_WQE_CONT_LEN, buf_size);
+	} break;
+
+	case NVMETCP_TASK_TYPE_HOST_READ: {
+		SET_FIELD(task_params->sqe->flags, NVMETCP_WQE_WQE_TYPE,
+			  NVMETCP_WQE_TYPE_NORMAL);
+		SET_FIELD(task_params->sqe->contlen_cdbsize,
+			  NVMETCP_WQE_CDB_SIZE_OR_NVMETCP_CMD, 1);
+	} break;
+
+	case NVMETCP_TASK_TYPE_INIT_CONN_REQUEST: {
+		SET_FIELD(task_params->sqe->flags, NVMETCP_WQE_WQE_TYPE,
+			  NVMETCP_WQE_TYPE_MIDDLE_PATH);
+
+		if (task_params->tx_io_size) {
+			SET_FIELD(task_params->sqe->contlen_cdbsize, NVMETCP_WQE_CONT_LEN,
+				  task_params->tx_io_size);
+			SET_FIELD(task_params->sqe->flags, NVMETCP_WQE_NUM_SGES,
+				  min((u16)sgl_task_params->num_sges,
+				      (u16)SCSI_NUM_SGES_SLOW_SGL_THR));
+		}
+	} break;
+
+	case NVMETCP_TASK_TYPE_CLEANUP:
+		SET_FIELD(task_params->sqe->flags, NVMETCP_WQE_WQE_TYPE,
+			  NVMETCP_WQE_TYPE_TASK_CLEANUP);
+
+	default:
+		break;
+	}
+}
+
+/* The following function initializes of NVMeTCP task params */
+static inline void
+init_nvmetcp_task_params(struct e5_nvmetcp_task_context *context,
+			 struct nvmetcp_task_params *task_params,
+			 enum nvmetcp_task_type task_type)
+{
+	context->ystorm_st_context.state.cccid = task_params->host_cccid;
+	SET_FIELD(context->ustorm_st_context.error_flags, USTORM_NVMETCP_TASK_ST_CTX_NVME_TCP, 1);
+	context->ustorm_st_context.nvme_tcp_opaque_lo = cpu_to_le32(task_params->opq.lo);
+	context->ustorm_st_context.nvme_tcp_opaque_hi = cpu_to_le32(task_params->opq.hi);
+}
+
+/* The following function initializes default values to all tasks */
+static inline void
+init_default_nvmetcp_task(struct nvmetcp_task_params *task_params,
+			  void *pdu_header, void *nvme_cmd,
+			  enum nvmetcp_task_type task_type)
+{
+	struct e5_nvmetcp_task_context *context = task_params->context;
+	const u8 val_byte = context->mstorm_ag_context.cdu_validation;
+	u8 dw_index;
+
+	memset(context, 0, sizeof(*context));
+	init_nvmetcp_task_params(context, task_params,
+				 (enum nvmetcp_task_type)task_type);
+
+	/* Swapping requirements used below, will be removed in future FW versions */
+	if (task_type == NVMETCP_TASK_TYPE_HOST_WRITE ||
+	    task_type == NVMETCP_TASK_TYPE_HOST_READ) {
+		for (dw_index = 0;
+		     dw_index < QED_NVMETCP_CMN_HDR_SIZE / sizeof(u32);
+		     dw_index++)
+			context->ystorm_st_context.pdu_hdr.task_hdr.reg[dw_index] =
+				cpu_to_le32(__swab32(((u32 *)pdu_header)[dw_index]));
+
+		for (dw_index = QED_NVMETCP_CMN_HDR_SIZE / sizeof(u32);
+		     dw_index < QED_NVMETCP_CMD_HDR_SIZE / sizeof(u32);
+		     dw_index++)
+			context->ystorm_st_context.pdu_hdr.task_hdr.reg[dw_index] =
+				cpu_to_le32(__swab32(((u32 *)nvme_cmd)[dw_index - 2]));
+	} else {
+		for (dw_index = 0;
+		     dw_index < QED_NVMETCP_NON_IO_HDR_SIZE / sizeof(u32);
+		     dw_index++)
+			context->ystorm_st_context.pdu_hdr.task_hdr.reg[dw_index] =
+				cpu_to_le32(__swab32(((u32 *)pdu_header)[dw_index]));
+	}
+
+	/* M-Storm Context: */
+	context->mstorm_ag_context.cdu_validation = val_byte;
+	context->mstorm_st_context.task_type = (u8)(task_type);
+	context->mstorm_ag_context.task_cid = cpu_to_le16(task_params->conn_icid);
+
+	/* Ustorm Context: */
+	SET_FIELD(context->ustorm_ag_context.flags1, E5_USTORM_NVMETCP_TASK_AG_CTX_R2T2RECV, 1);
+	context->ustorm_st_context.task_type = (u8)(task_type);
+	context->ustorm_st_context.cq_rss_number = task_params->cq_rss_number;
+	context->ustorm_ag_context.icid = cpu_to_le16(task_params->conn_icid);
+}
+
+/* The following function initializes the U-Storm Task Contexts */
+static inline void
+init_ustorm_task_contexts(struct ustorm_nvmetcp_task_st_ctx *ustorm_st_context,
+			  struct e5_ustorm_nvmetcp_task_ag_ctx *ustorm_ag_context,
+			  u32 remaining_recv_len,
+			  u32 expected_data_transfer_len, u8 num_sges,
+			  bool tx_dif_conn_err_en)
+{
+	/* Remaining data to be received in bytes. Used in validations*/
+	ustorm_st_context->rem_rcv_len = cpu_to_le32(remaining_recv_len);
+	ustorm_ag_context->exp_data_acked = cpu_to_le32(expected_data_transfer_len);
+	ustorm_st_context->exp_data_transfer_len = cpu_to_le32(expected_data_transfer_len);
+	SET_FIELD(ustorm_st_context->reg1_map, REG1_NUM_SGES, num_sges);
+	SET_FIELD(ustorm_ag_context->flags2, E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_CF_EN,
+		  tx_dif_conn_err_en ? 1 : 0);
+}
+
+/* The following function initializes Local Completion Contexts: */
+static inline void
+set_local_completion_context(struct e5_nvmetcp_task_context *context)
+{
+	SET_FIELD(context->ystorm_st_context.state.flags,
+		  YSTORM_NVMETCP_TASK_STATE_LOCAL_COMP, 1);
+	SET_FIELD(context->ustorm_st_context.flags,
+		  USTORM_NVMETCP_TASK_ST_CTX_LOCAL_COMP, 1);
+}
+
+/* Common Fastpath task init function: */
+static inline void
+init_rw_nvmetcp_task(struct nvmetcp_task_params *task_params,
+		     enum nvmetcp_task_type task_type,
+		     void *pdu_header, void *nvme_cmd,
+		     struct storage_sgl_task_params *sgl_task_params)
+{
+	struct e5_nvmetcp_task_context *context = task_params->context;
+	u32 task_size = calc_rw_task_size(task_params, task_type);
+	bool slow_io = false;
+	u8 num_sges = 0;
+
+	init_default_nvmetcp_task(task_params, pdu_header, nvme_cmd, task_type);
+
+	/* Tx/Rx: */
+	if (task_params->tx_io_size) {
+		/* if data to transmit: */
+		init_scsi_sgl_context(&context->ystorm_st_context.state.sgl_params,
+				      &context->ystorm_st_context.state.data_desc,
+				      sgl_task_params);
+		slow_io = nvmetcp_is_slow_sgl(sgl_task_params->num_sges,
+					      sgl_task_params->small_mid_sge);
+		num_sges =
+			(u8)(!slow_io ? min((u32)sgl_task_params->num_sges,
+					    (u32)SCSI_NUM_SGES_SLOW_SGL_THR) :
+					    NVMETCP_WQE_NUM_SGES_SLOWIO);
+		if (slow_io) {
+			SET_FIELD(context->ystorm_st_context.state.flags,
+				  YSTORM_NVMETCP_TASK_STATE_SLOW_IO, 1);
+		}
+	} else if (task_params->rx_io_size) {
+		/* if data to receive: */
+		init_scsi_sgl_context(&context->mstorm_st_context.sgl_params,
+				      &context->mstorm_st_context.data_desc,
+				      sgl_task_params);
+		num_sges =
+			(u8)(!nvmetcp_is_slow_sgl(sgl_task_params->num_sges,
+						  sgl_task_params->small_mid_sge) ?
+						  min((u32)sgl_task_params->num_sges,
+						      (u32)SCSI_NUM_SGES_SLOW_SGL_THR) :
+						      NVMETCP_WQE_NUM_SGES_SLOWIO);
+		context->mstorm_st_context.rem_task_size = cpu_to_le32(task_size);
+	}
+
+	/* Ustorm context: */
+	init_ustorm_task_contexts(&context->ustorm_st_context,
+				  &context->ustorm_ag_context,
+				  /* Remaining Receive length is the Task Size */
+				  task_size,
+				  /* The size of the transmitted task */
+				  task_size,
+				  /* num_sges */
+				  num_sges,
+				  false);
+
+	/* Set exp_data_acked */
+	if (task_type == NVMETCP_TASK_TYPE_HOST_WRITE) {
+		if (task_params->send_write_incapsule)
+			context->ustorm_ag_context.exp_data_acked = task_size;
+		else
+			context->ustorm_ag_context.exp_data_acked = 0;
+	} else if (task_type == NVMETCP_TASK_TYPE_HOST_READ) {
+		context->ustorm_ag_context.exp_data_acked = 0;
+	}
+
+	context->ustorm_ag_context.exp_cont_len = 0;
+	init_sqe(task_params, sgl_task_params, task_type);
+}
+
+static void
+init_common_initiator_read_task(struct nvmetcp_task_params *task_params,
+				struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+				struct nvme_command *nvme_cmd,
+				struct storage_sgl_task_params *sgl_task_params)
+{
+	init_rw_nvmetcp_task(task_params, NVMETCP_TASK_TYPE_HOST_READ,
+			     cmd_pdu_header, nvme_cmd, sgl_task_params);
+}
+
+void init_nvmetcp_host_read_task(struct nvmetcp_task_params *task_params,
+				 struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+				 struct nvme_command *nvme_cmd,
+				 struct storage_sgl_task_params *sgl_task_params)
+{
+	init_common_initiator_read_task(task_params, (void *)cmd_pdu_header,
+					(void *)nvme_cmd, sgl_task_params);
+}
+
+static void
+init_common_initiator_write_task(struct nvmetcp_task_params *task_params,
+				 struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+				 struct nvme_command *nvme_cmd,
+				 struct storage_sgl_task_params *sgl_task_params)
+{
+	init_rw_nvmetcp_task(task_params, NVMETCP_TASK_TYPE_HOST_WRITE,
+			     cmd_pdu_header, nvme_cmd, sgl_task_params);
+}
+
+void init_nvmetcp_host_write_task(struct nvmetcp_task_params *task_params,
+				  struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+				  struct nvme_command *nvme_cmd,
+				  struct storage_sgl_task_params *sgl_task_params)
+{
+	init_common_initiator_write_task(task_params, (void *)cmd_pdu_header,
+					 (void *)nvme_cmd, sgl_task_params);
+}
+
+static void
+init_common_login_request_task(struct nvmetcp_task_params *task_params,
+			       void *login_req_pdu_header,
+			       struct storage_sgl_task_params *tx_sgl_task_params,
+			       struct storage_sgl_task_params *rx_sgl_task_params)
+{
+	struct e5_nvmetcp_task_context *context = task_params->context;
+
+	init_default_nvmetcp_task(task_params, (void *)login_req_pdu_header, NULL,
+				  NVMETCP_TASK_TYPE_INIT_CONN_REQUEST);
+
+	/* Ustorm Context: */
+	init_ustorm_task_contexts(&context->ustorm_st_context,
+				  &context->ustorm_ag_context,
+
+				  /* Remaining Receive length is the Task Size */
+				  task_params->rx_io_size ?
+				  rx_sgl_task_params->total_buffer_size : 0,
+
+				  /* The size of the transmitted task */
+				  task_params->tx_io_size ?
+				  tx_sgl_task_params->total_buffer_size : 0,
+				  0, /* num_sges */
+				  0); /* tx_dif_conn_err_en */
+
+	/* SGL context: */
+	if (task_params->tx_io_size)
+		init_scsi_sgl_context(&context->ystorm_st_context.state.sgl_params,
+				      &context->ystorm_st_context.state.data_desc,
+				      tx_sgl_task_params);
+	if (task_params->rx_io_size)
+		init_scsi_sgl_context(&context->mstorm_st_context.sgl_params,
+				      &context->mstorm_st_context.data_desc,
+				      rx_sgl_task_params);
+
+	context->mstorm_st_context.rem_task_size =
+		cpu_to_le32(task_params->rx_io_size ?
+				 rx_sgl_task_params->total_buffer_size : 0);
+	init_sqe(task_params, tx_sgl_task_params, NVMETCP_TASK_TYPE_INIT_CONN_REQUEST);
+}
+
+/* The following function initializes Login task in Host mode: */
+void init_nvmetcp_init_conn_req_task(struct nvmetcp_task_params *task_params,
+				     struct nvme_tcp_icreq_pdu *init_conn_req_pdu_hdr,
+				     struct storage_sgl_task_params *tx_sgl_task_params,
+				     struct storage_sgl_task_params *rx_sgl_task_params)
+{
+	init_common_login_request_task(task_params, init_conn_req_pdu_hdr,
+				       tx_sgl_task_params, rx_sgl_task_params);
+}
+
+void init_cleanup_task_nvmetcp(struct nvmetcp_task_params *task_params)
+{
+	init_sqe(task_params, NULL, NVMETCP_TASK_TYPE_CLEANUP);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h
new file mode 100644
index 000000000000..4c7ac2bd2ea5
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/* Copyright 2021 Marvell. All rights reserved. */
+
+#ifndef _QED_NVMETCP_FW_FUNCS_H
+#define _QED_NVMETCP_FW_FUNCS_H
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/qed/common_hsi.h>
+#include <linux/qed/storage_common.h>
+#include <linux/qed/nvmetcp_common.h>
+#include <linux/qed/qed_nvmetcp_if.h>
+
+#if IS_ENABLED(CONFIG_QED_NVMETCP)
+
+void init_nvmetcp_host_read_task(struct nvmetcp_task_params *task_params,
+				 struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+				 struct nvme_command *nvme_cmd,
+				 struct storage_sgl_task_params *sgl_task_params);
+void init_nvmetcp_host_write_task(struct nvmetcp_task_params *task_params,
+				  struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+				  struct nvme_command *nvme_cmd,
+				  struct storage_sgl_task_params *sgl_task_params);
+void init_nvmetcp_init_conn_req_task(struct nvmetcp_task_params *task_params,
+				     struct nvme_tcp_icreq_pdu *init_conn_req_pdu_hdr,
+				     struct storage_sgl_task_params *tx_sgl_task_params,
+				     struct storage_sgl_task_params *rx_sgl_task_params);
+void init_cleanup_task_nvmetcp(struct nvmetcp_task_params *task_params);
+
+#else /* IS_ENABLED(CONFIG_QED_NVMETCP) */
+
+#endif /* IS_ENABLED(CONFIG_QED_NVMETCP) */
+
+#endif /* _QED_NVMETCP_FW_FUNCS_H */
diff --git a/include/linux/qed/nvmetcp_common.h b/include/linux/qed/nvmetcp_common.h
index ad745a9c2264..5a2ab0606308 100644
--- a/include/linux/qed/nvmetcp_common.h
+++ b/include/linux/qed/nvmetcp_common.h
@@ -5,6 +5,7 @@
 #define __NVMETCP_COMMON__
 
 #include "tcp_common.h"
+#include <linux/nvme-tcp.h>
 
 #define NVMETCP_SLOW_PATH_LAYER_CODE (6)
 #define NVMETCP_WQE_NUM_SGES_SLOWIO (0xf)
diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h
index 606427ebb63c..14671bc19ed1 100644
--- a/include/linux/qed/qed_nvmetcp_if.h
+++ b/include/linux/qed/qed_nvmetcp_if.h
@@ -9,6 +9,9 @@
 #include <linux/qed/nvmetcp_common.h>
 
 #define QED_NVMETCP_MAX_IO_SIZE	0x800000
+#define QED_NVMETCP_CMN_HDR_SIZE (sizeof(struct nvme_tcp_hdr))
+#define QED_NVMETCP_CMD_HDR_SIZE (sizeof(struct nvme_tcp_cmd_pdu))
+#define QED_NVMETCP_NON_IO_HDR_SIZE ((QED_NVMETCP_CMN_HDR_SIZE + 16))
 
 typedef int (*nvmetcp_event_cb_t) (void *context,
 				   u8 fw_event_code, void *fw_handle);
@@ -213,6 +216,23 @@ struct qed_nvmetcp_ops {
 	void (*remove_dst_tcp_port_filter)(struct qed_dev *cdev, u16 dest_port);
 
 	void (*clear_all_filters)(struct qed_dev *cdev);
+
+	void (*init_read_io)(struct nvmetcp_task_params *task_params,
+			     struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+			     struct nvme_command *nvme_cmd,
+			     struct storage_sgl_task_params *sgl_task_params);
+
+	void (*init_write_io)(struct nvmetcp_task_params *task_params,
+			      struct nvme_tcp_cmd_pdu *cmd_pdu_header,
+			      struct nvme_command *nvme_cmd,
+			      struct storage_sgl_task_params *sgl_task_params);
+
+	void (*init_icreq_exchange)(struct nvmetcp_task_params *task_params,
+				    struct nvme_tcp_icreq_pdu *init_conn_req_pdu_hdr,
+				    struct storage_sgl_task_params *tx_sgl_task_params,
+				    struct storage_sgl_task_params *rx_sgl_task_params);
+
+	void (*init_task_cleanup)(struct nvmetcp_task_params *task_params);
 };
 
 const struct qed_nvmetcp_ops *qed_get_nvmetcp_ops(void);

From 806ee7f81a2b037e3f57275adcdf974453cc3254 Mon Sep 17 00:00:00 2001
From: Nikolay Assa <nassa@marvell.com>
Date: Wed, 2 Jun 2021 20:16:55 +0300
Subject: [PATCH 0943/3292] qed: Add IP services APIs support

This patch introduces APIs which the NVMeTCP Offload device (qedn)
will use through the paired net-device (qede).
It includes APIs for:
- ipv4/ipv6 routing
- get VLAN from net-device
- TCP ports reservation

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Nikolay Assa <nassa@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../qlogic/qed/qed_nvmetcp_ip_services.c      | 238 ++++++++++++++++++
 .../linux/qed/qed_nvmetcp_ip_services_if.h    |  29 +++
 2 files changed, 267 insertions(+)
 create mode 100644 drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c
 create mode 100644 include/linux/qed/qed_nvmetcp_ip_services_if.h

diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c
new file mode 100644
index 000000000000..96a2077fd315
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/*
+ * Copyright 2021 Marvell. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/param.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/errno.h>
+
+#include <net/tcp.h>
+
+#include <linux/qed/qed_nvmetcp_ip_services_if.h>
+
+#define QED_IP_RESOL_TIMEOUT  4
+
+int qed_route_ipv4(struct sockaddr_storage *local_addr,
+		   struct sockaddr_storage *remote_addr,
+		   struct sockaddr *hardware_address,
+		   struct net_device **ndev)
+{
+	struct neighbour *neigh = NULL;
+	__be32 *loc_ip, *rem_ip;
+	struct rtable *rt;
+	int rc = -ENXIO;
+	int retry;
+
+	loc_ip = &((struct sockaddr_in *)local_addr)->sin_addr.s_addr;
+	rem_ip = &((struct sockaddr_in *)remote_addr)->sin_addr.s_addr;
+	*ndev = NULL;
+	rt = ip_route_output(&init_net, *rem_ip, *loc_ip, 0/*tos*/, 0/*oif*/);
+	if (IS_ERR(rt)) {
+		pr_err("lookup route failed\n");
+		rc = PTR_ERR(rt);
+		goto return_err;
+	}
+
+	neigh = dst_neigh_lookup(&rt->dst, rem_ip);
+	if (!neigh) {
+		rc = -ENOMEM;
+		ip_rt_put(rt);
+		goto return_err;
+	}
+
+	*ndev = rt->dst.dev;
+	ip_rt_put(rt);
+
+	/* If not resolved, kick-off state machine towards resolution */
+	if (!(neigh->nud_state & NUD_VALID))
+		neigh_event_send(neigh, NULL);
+
+	/* query neighbor until resolved or timeout */
+	retry = QED_IP_RESOL_TIMEOUT;
+	while (!(neigh->nud_state & NUD_VALID) && retry > 0) {
+		msleep(1000);
+		retry--;
+	}
+
+	if (neigh->nud_state & NUD_VALID) {
+		/* copy resolved MAC address */
+		neigh_ha_snapshot(hardware_address->sa_data, neigh, *ndev);
+		hardware_address->sa_family = (*ndev)->type;
+		rc = 0;
+	}
+
+	neigh_release(neigh);
+	if (!(*loc_ip)) {
+		*loc_ip = inet_select_addr(*ndev, *rem_ip, RT_SCOPE_UNIVERSE);
+		local_addr->ss_family = AF_INET;
+	}
+
+return_err:
+
+	return rc;
+}
+EXPORT_SYMBOL(qed_route_ipv4);
+
+int qed_route_ipv6(struct sockaddr_storage *local_addr,
+		   struct sockaddr_storage *remote_addr,
+		   struct sockaddr *hardware_address,
+		   struct net_device **ndev)
+{
+	struct neighbour *neigh = NULL;
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+	int rc = -ENXIO;
+	int retry;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.saddr = ((struct sockaddr_in6 *)local_addr)->sin6_addr;
+	fl6.daddr = ((struct sockaddr_in6 *)remote_addr)->sin6_addr;
+	dst = ip6_route_output(&init_net, NULL, &fl6);
+	if (!dst || dst->error) {
+		if (dst) {
+			dst_release(dst);
+			pr_err("lookup route failed %d\n", dst->error);
+		}
+
+		goto out;
+	}
+
+	neigh = dst_neigh_lookup(dst, &fl6.daddr);
+	if (neigh) {
+		*ndev = ip6_dst_idev(dst)->dev;
+
+		/* If not resolved, kick-off state machine towards resolution */
+		if (!(neigh->nud_state & NUD_VALID))
+			neigh_event_send(neigh, NULL);
+
+		/* query neighbor until resolved or timeout */
+		retry = QED_IP_RESOL_TIMEOUT;
+		while (!(neigh->nud_state & NUD_VALID) && retry > 0) {
+			msleep(1000);
+			retry--;
+		}
+
+		if (neigh->nud_state & NUD_VALID) {
+			neigh_ha_snapshot((u8 *)hardware_address->sa_data,
+					  neigh, *ndev);
+			hardware_address->sa_family = (*ndev)->type;
+			rc = 0;
+		}
+
+		neigh_release(neigh);
+
+		if (ipv6_addr_any(&fl6.saddr)) {
+			if (ipv6_dev_get_saddr(dev_net(*ndev), *ndev,
+					       &fl6.daddr, 0, &fl6.saddr)) {
+				pr_err("Unable to find source IP address\n");
+				goto out;
+			}
+
+			local_addr->ss_family = AF_INET6;
+			((struct sockaddr_in6 *)local_addr)->sin6_addr =
+								fl6.saddr;
+		}
+	}
+
+	dst_release(dst);
+
+out:
+
+	return rc;
+}
+EXPORT_SYMBOL(qed_route_ipv6);
+
+void qed_vlan_get_ndev(struct net_device **ndev, u16 *vlan_id)
+{
+	if (is_vlan_dev(*ndev)) {
+		*vlan_id = vlan_dev_vlan_id(*ndev);
+		*ndev = vlan_dev_real_dev(*ndev);
+	}
+}
+EXPORT_SYMBOL(qed_vlan_get_ndev);
+
+struct pci_dev *qed_validate_ndev(struct net_device *ndev)
+{
+	struct pci_dev *pdev = NULL;
+	struct net_device *upper;
+
+	for_each_pci_dev(pdev) {
+		if (pdev && pdev->driver &&
+		    !strcmp(pdev->driver->name, "qede")) {
+			upper = pci_get_drvdata(pdev);
+			if (upper->ifindex == ndev->ifindex)
+				return pdev;
+		}
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(qed_validate_ndev);
+
+__be16 qed_get_in_port(struct sockaddr_storage *sa)
+{
+	return sa->ss_family == AF_INET
+		? ((struct sockaddr_in *)sa)->sin_port
+		: ((struct sockaddr_in6 *)sa)->sin6_port;
+}
+EXPORT_SYMBOL(qed_get_in_port);
+
+int qed_fetch_tcp_port(struct sockaddr_storage local_ip_addr,
+		       struct socket **sock, u16 *port)
+{
+	struct sockaddr_storage sa;
+	int rc = 0;
+
+	rc = sock_create(local_ip_addr.ss_family, SOCK_STREAM, IPPROTO_TCP,
+			 sock);
+	if (rc) {
+		pr_warn("failed to create socket: %d\n", rc);
+		goto err;
+	}
+
+	(*sock)->sk->sk_allocation = GFP_KERNEL;
+	sk_set_memalloc((*sock)->sk);
+
+	rc = kernel_bind(*sock, (struct sockaddr *)&local_ip_addr,
+			 sizeof(local_ip_addr));
+
+	if (rc) {
+		pr_warn("failed to bind socket: %d\n", rc);
+		goto err_sock;
+	}
+
+	rc = kernel_getsockname(*sock, (struct sockaddr *)&sa);
+	if (rc < 0) {
+		pr_warn("getsockname() failed: %d\n", rc);
+		goto err_sock;
+	}
+
+	*port = ntohs(qed_get_in_port(&sa));
+
+	return 0;
+
+err_sock:
+	sock_release(*sock);
+	sock = NULL;
+err:
+
+	return rc;
+}
+EXPORT_SYMBOL(qed_fetch_tcp_port);
+
+void qed_return_tcp_port(struct socket *sock)
+{
+	if (sock && sock->sk) {
+		tcp_set_state(sock->sk, TCP_CLOSE);
+		sock_release(sock);
+	}
+}
+EXPORT_SYMBOL(qed_return_tcp_port);
diff --git a/include/linux/qed/qed_nvmetcp_ip_services_if.h b/include/linux/qed/qed_nvmetcp_ip_services_if.h
new file mode 100644
index 000000000000..3604aee53796
--- /dev/null
+++ b/include/linux/qed/qed_nvmetcp_ip_services_if.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * Copyright 2021 Marvell. All rights reserved.
+ */
+
+#ifndef _QED_IP_SERVICES_IF_H
+#define _QED_IP_SERVICES_IF_H
+
+#include <linux/types.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+#include <linux/inetdevice.h>
+
+int qed_route_ipv4(struct sockaddr_storage *local_addr,
+		   struct sockaddr_storage *remote_addr,
+		   struct sockaddr *hardware_address,
+		   struct net_device **ndev);
+int qed_route_ipv6(struct sockaddr_storage *local_addr,
+		   struct sockaddr_storage *remote_addr,
+		   struct sockaddr *hardware_address,
+		   struct net_device **ndev);
+void qed_vlan_get_ndev(struct net_device **ndev, u16 *vlan_id);
+struct pci_dev *qed_validate_ndev(struct net_device *ndev);
+void qed_return_tcp_port(struct socket *sock);
+int qed_fetch_tcp_port(struct sockaddr_storage local_ip_addr,
+		       struct socket **sock, u16 *port);
+__be16 qed_get_in_port(struct sockaddr_storage *sa);
+
+#endif /* _QED_IP_SERVICES_IF_H */

From 14623e005a1e74864afca1261a3aa8e6e8017df9 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 2 Jun 2021 13:44:24 -0400
Subject: [PATCH 0944/3292] tipc: eliminate redundant fields in struct
 tipc_sock

We eliminate the redundant fields conn_type and conn_instance in
struct tipc_sock. On the connecting side, this information is already
present in the unused (after the connection is established) part of
the pre-allocated header, and on the accepting side, we put it there
when the new socket is created.

Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 53 ++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 53af72824c9c..cb2d9fffbc5d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -73,9 +73,6 @@ struct sockaddr_pair {
 /**
  * struct tipc_sock - TIPC socket structure
  * @sk: socket - interacts with 'port' and with user via the socket API
- * @conn_type: TIPC type used when connection was established
- * @conn_instance: TIPC instance used when connection was established
- * @published: non-zero if port has one or more associated names
  * @max_pkt: maximum packet size "hint" used when building messages sent by port
  * @maxnagle: maximum size of msg which can be subject to nagle
  * @portid: unique port identity in TIPC socket hash table
@@ -106,11 +103,11 @@ struct sockaddr_pair {
  * @expect_ack: whether this TIPC socket is expecting an ack
  * @nodelay: setsockopt() TIPC_NODELAY setting
  * @group_is_open: TIPC socket group is fully open (FIXME)
+ * @published: true if port has one or more associated names
+ * @conn_addrtype: address type used when establishing connection
  */
 struct tipc_sock {
 	struct sock sk;
-	u32 conn_type;
-	u32 conn_instance;
 	u32 max_pkt;
 	u32 maxnagle;
 	u32 portid;
@@ -141,6 +138,7 @@ struct tipc_sock {
 	bool nodelay;
 	bool group_is_open;
 	bool published;
+	u8 conn_addrtype;
 };
 
 static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -1463,10 +1461,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 			return -EISCONN;
 		if (tsk->published)
 			return -EOPNOTSUPP;
-		if (atype == TIPC_SERVICE_ADDR) {
-			tsk->conn_type = ua->sa.type;
-			tsk->conn_instance = ua->sa.instance;
-		}
+		if (atype == TIPC_SERVICE_ADDR)
+			tsk->conn_addrtype = atype;
 		msg_set_syn(hdr, 1);
 	}
 
@@ -1783,10 +1779,10 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
 		anc_data[2] = msg_nameupper(msg);
 		break;
 	case TIPC_CONN_MSG:
-		has_name = (tsk->conn_type != 0);
-		anc_data[0] = tsk->conn_type;
-		anc_data[1] = tsk->conn_instance;
-		anc_data[2] = tsk->conn_instance;
+		has_name = !!tsk->conn_addrtype;
+		anc_data[0] = msg_nametype(&tsk->phdr);
+		anc_data[1] = msg_nameinst(&tsk->phdr);
+		anc_data[2] = anc_data[1];
 		break;
 	default:
 		has_name = 0;
@@ -2750,8 +2746,9 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 
 	tsk_set_importance(new_sk, msg_importance(msg));
 	if (msg_named(msg)) {
-		new_tsock->conn_type = msg_nametype(msg);
-		new_tsock->conn_instance = msg_nameinst(msg);
+		new_tsock->conn_addrtype = TIPC_SERVICE_ADDR;
+		msg_set_nametype(&new_tsock->phdr, msg_nametype(msg));
+		msg_set_nameinst(&new_tsock->phdr, msg_nameinst(msg));
 	}
 
 	/*
@@ -3455,13 +3452,14 @@ void tipc_socket_stop(void)
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
 {
-	u32 peer_node;
-	u32 peer_port;
+	u32 peer_node, peer_port;
+	u32 conn_type, conn_instance;
 	struct nlattr *nest;
 
 	peer_node = tsk_peer_node(tsk);
 	peer_port = tsk_peer_port(tsk);
-
+	conn_type = msg_nametype(&tsk->phdr);
+	conn_instance = msg_nameinst(&tsk->phdr);
 	nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON);
 	if (!nest)
 		return -EMSGSIZE;
@@ -3471,12 +3469,12 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
 	if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
 		goto msg_full;
 
-	if (tsk->conn_type != 0) {
+	if (tsk->conn_addrtype != 0) {
 		if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
 			goto msg_full;
-		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
+		if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, conn_type))
 			goto msg_full;
-		if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
+		if (nla_put_u32(skb, TIPC_NLA_CON_INST, conn_instance))
 			goto msg_full;
 	}
 	nla_nest_end(skb, nest);
@@ -3866,9 +3864,9 @@ bool tipc_sk_filtering(struct sock *sk)
 	}
 
 	if (!tipc_sk_type_connectionless(sk)) {
-		type = tsk->conn_type;
-		lower = tsk->conn_instance;
-		upper = tsk->conn_instance;
+		type = msg_nametype(&tsk->phdr);
+		lower = msg_nameinst(&tsk->phdr);
+		upper = lower;
 	}
 
 	if ((_type && _type != type) || (_lower && _lower != lower) ||
@@ -3933,6 +3931,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
 {
 	int i = 0;
 	size_t sz = (dqueues) ? SK_LMAX : SK_LMIN;
+	u32 conn_type, conn_instance;
 	struct tipc_sock *tsk;
 	struct publication *p;
 	bool tsk_connected;
@@ -3953,8 +3952,10 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
 	if (tsk_connected) {
 		i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk));
 		i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk));
-		i += scnprintf(buf + i, sz - i, " %u", tsk->conn_type);
-		i += scnprintf(buf + i, sz - i, " %u", tsk->conn_instance);
+		conn_type = msg_nametype(&tsk->phdr);
+		conn_instance = msg_nameinst(&tsk->phdr);
+		i += scnprintf(buf + i, sz - i, " %u", conn_type);
+		i += scnprintf(buf + i, sz - i, " %u", conn_instance);
 	}
 	i += scnprintf(buf + i, sz - i, " | %u", tsk->published);
 	if (tsk->published) {

From 62633c2f17f1f0e6dd6932f990ade9525204ea24 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 2 Jun 2021 13:44:25 -0400
Subject: [PATCH 0945/3292] tipc: refactor function tipc_sk_anc_data_recv()

We refactor tipc_sk_anc_data_recv() to make it slightly more
comprehensible, but also to facilitate application of some additions
to the code in a future commit.

Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 85 +++++++++++++++++++++--------------------------
 1 file changed, 38 insertions(+), 47 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cb2d9fffbc5d..c635fd27fb38 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1733,67 +1733,58 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
 static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
 				 struct tipc_sock *tsk)
 {
-	struct tipc_msg *msg;
-	u32 anc_data[3];
-	u32 err;
-	u32 dest_type;
-	int has_name;
-	int res;
+	struct tipc_msg *hdr;
+	u32 data[3] = {0,};
+	bool has_addr;
+	int dlen, rc;
 
 	if (likely(m->msg_controllen == 0))
 		return 0;
-	msg = buf_msg(skb);
 
-	/* Optionally capture errored message object(s) */
-	err = msg ? msg_errcode(msg) : 0;
-	if (unlikely(err)) {
-		anc_data[0] = err;
-		anc_data[1] = msg_data_sz(msg);
-		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
-		if (res)
-			return res;
-		if (anc_data[1]) {
-			if (skb_linearize(skb))
-				return -ENOMEM;
-			msg = buf_msg(skb);
-			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
-				       msg_data(msg));
-			if (res)
-				return res;
-		}
+	hdr = buf_msg(skb);
+	dlen = msg_data_sz(hdr);
+
+	/* Capture errored message object, if any */
+	if (msg_errcode(hdr)) {
+		if (skb_linearize(skb))
+			return -ENOMEM;
+		hdr = buf_msg(skb);
+		data[0] = msg_errcode(hdr);
+		data[1] = dlen;
+		rc = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, data);
+		if (rc || !dlen)
+			return rc;
+		rc = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, dlen, msg_data(hdr));
+		if (rc)
+			return rc;
 	}
 
-	/* Optionally capture message destination object */
-	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
-	switch (dest_type) {
+	/* Capture TIPC_SERVICE_ADDR/RANGE destination address, if any */
+	switch (msg_type(hdr)) {
 	case TIPC_NAMED_MSG:
-		has_name = 1;
-		anc_data[0] = msg_nametype(msg);
-		anc_data[1] = msg_namelower(msg);
-		anc_data[2] = msg_namelower(msg);
+		has_addr = true;
+		data[0] = msg_nametype(hdr);
+		data[1] = msg_namelower(hdr);
+		data[2] = data[1];
 		break;
 	case TIPC_MCAST_MSG:
-		has_name = 1;
-		anc_data[0] = msg_nametype(msg);
-		anc_data[1] = msg_namelower(msg);
-		anc_data[2] = msg_nameupper(msg);
+		has_addr = true;
+		data[0] = msg_nametype(hdr);
+		data[1] = msg_namelower(hdr);
+		data[2] = msg_nameupper(hdr);
 		break;
 	case TIPC_CONN_MSG:
-		has_name = !!tsk->conn_addrtype;
-		anc_data[0] = msg_nametype(&tsk->phdr);
-		anc_data[1] = msg_nameinst(&tsk->phdr);
-		anc_data[2] = anc_data[1];
+		has_addr = !!tsk->conn_addrtype;
+		data[0] = msg_nametype(&tsk->phdr);
+		data[1] = msg_nameinst(&tsk->phdr);
+		data[2] = data[1];
 		break;
 	default:
-		has_name = 0;
+		has_addr = false;
 	}
-	if (has_name) {
-		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
-		if (res)
-			return res;
-	}
-
-	return 0;
+	if (!has_addr)
+		return 0;
+	return put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, data);
 }
 
 static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk)

From 5ef213258ddf38fc8b6de5d7aea3d514ff13f71a Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Wed, 2 Jun 2021 13:44:26 -0400
Subject: [PATCH 0946/3292] tipc: simplify handling of lookup scope during
 multicast message reception

We introduce a new macro TIPC_ANY_SCOPE to make the handling of the
lookup scope value more comprehensible during multicast reception.

The (unchanged) rules go as follows:

1) Multicast messages sent from own node are delivered to all matching
   sockets on the own node, irrespective of their binding scope.

2) Multicast messages sent from other nodes arrive here because they
   have found TIPC_CLUSTER_SCOPE bindings emanating from this node.
   Those messages should be delivered to exactly those sockets, but not
   to local sockets bound with TIPC_NODE_SCOPE, since the latter
   obviously were not meant to be visible for those senders.

3) Group multicast/broadcast messages are delivered to the sockets with
   a binding scope matching exactly the lookup scope indicated in the
   message header, and nobody else.

Reviewed-by: Xin Long <lucien.xin@gmail.com>
Tested-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c |  6 +++---
 net/tipc/name_table.h |  4 +++-
 net/tipc/socket.c     | 26 ++++++++++----------------
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index fecab516bf41..01396dd1c899 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -673,12 +673,12 @@ exit:
  * Returns a list of local sockets
  */
 void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
-				       bool exact, struct list_head *dports)
+				       struct list_head *dports)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
 	struct publication *p;
-	u32 scope = ua->scope;
+	u8 scope = ua->scope;
 
 	rcu_read_lock();
 	sc = tipc_service_find(net, ua);
@@ -688,7 +688,7 @@ void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
 	spin_lock_bh(&sc->lock);
 	service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
 		list_for_each_entry(p, &sr->local_publ, local_publ) {
-			if (p->scope == scope || (!exact && p->scope < scope))
+			if (scope == p->scope || scope == TIPC_ANY_SCOPE)
 				tipc_dest_push(dports, 0, p->sk.ref);
 		}
 	}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index c7c9a3ddd420..259f95e3d99c 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -51,6 +51,8 @@ struct tipc_uaddr;
 #define TIPC_PUBL_SCOPE_NUM	(TIPC_NODE_SCOPE + 1)
 #define TIPC_NAMETBL_SIZE	1024	/* must be a power of 2 */
 
+#define TIPC_ANY_SCOPE 10      /* Both node and cluster scope will match */
+
 /**
  * struct publication - info about a published service address or range
  * @sr: service range represented by this publication
@@ -113,7 +115,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
 				 struct tipc_socket_addr *sk);
 void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
-				       bool exact, struct list_head *dports);
+				       struct list_head *dports);
 void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
 				     struct tipc_nlist *nodes);
 bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c635fd27fb38..575a0238deb2 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1200,12 +1200,12 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 	struct tipc_msg *hdr;
 	struct tipc_uaddr ua;
 	int user, mtyp, hlen;
-	bool exact;
 
 	__skb_queue_head_init(&tmpq);
 	INIT_LIST_HEAD(&dports);
 	ua.addrtype = TIPC_SERVICE_RANGE;
 
+	/* tipc_skb_peek() increments the head skb's reference counter */
 	skb = tipc_skb_peek(arrvq, &inputq->lock);
 	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
 		hdr = buf_msg(skb);
@@ -1214,6 +1214,12 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
 		onode = msg_orignode(hdr);
 		ua.sr.type = msg_nametype(hdr);
+		ua.sr.lower = msg_namelower(hdr);
+		ua.sr.upper = msg_nameupper(hdr);
+		if (onode == self)
+			ua.scope = TIPC_ANY_SCOPE;
+		else
+			ua.scope = TIPC_CLUSTER_SCOPE;
 
 		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
 			spin_lock_bh(&inputq->lock);
@@ -1231,20 +1237,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 			ua.sr.lower = 0;
 			ua.sr.upper = ~0;
 			ua.scope = msg_lookup_scope(hdr);
-			exact = true;
-		} else {
-			/* TIPC_NODE_SCOPE means "any scope" in this context */
-			if (onode == self)
-				ua.scope = TIPC_NODE_SCOPE;
-			else
-				ua.scope = TIPC_CLUSTER_SCOPE;
-			exact = false;
-			ua.sr.lower = msg_namelower(hdr);
-			ua.sr.upper = msg_nameupper(hdr);
 		}
 
 		/* Create destination port list: */
-		tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports);
+		tipc_nametbl_lookup_mcast_sockets(net, &ua, &dports);
 
 		/* Clone message per destination */
 		while (tipc_dest_pop(&dports, NULL, &portid)) {
@@ -1256,13 +1252,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 			}
 			pr_warn("Failed to clone mcast rcv buffer\n");
 		}
-		/* Append to inputq if not already done by other thread */
+		/* Append clones to inputq only if skb is still head of arrvq */
 		spin_lock_bh(&inputq->lock);
 		if (skb_peek(arrvq) == skb) {
 			skb_queue_splice_tail_init(&tmpq, inputq);
-			/* Decrease the skb's refcnt as increasing in the
-			 * function tipc_skb_peek
-			 */
+			/* Decrement the skb's refcnt */
 			kfree_skb(__skb_dequeue(arrvq));
 		}
 		spin_unlock_bh(&inputq->lock);

From f0e8cb6106da27039cdc23ecf5b5a776d7c7e66e Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Wed, 2 Jun 2021 21:42:39 +0300
Subject: [PATCH 0947/3292] nvme-tcp-offload: Add nvme-tcp-offload - NVMeTCP HW
 offload ULP

This patch will present the structure for the NVMeTCP offload common
layer driver. This module is added under "drivers/nvme/host/" and future
offload drivers which will register to it will be placed under
"drivers/nvme/hw".
This new driver will be enabled by the Kconfig "NVM Express over Fabrics
TCP offload commmon layer".
In order to support the new transport type, for host mode, no change is
needed.

Each new vendor-specific offload driver will register to this ULP during
its probe function, by filling out the nvme_tcp_ofld_dev->ops and
nvme_tcp_ofld_dev->private_data and calling nvme_tcp_ofld_register_dev
with the initialized struct.

The internal implementation:
- tcp-offload.h:
  Includes all common structs and ops to be used and shared by offload
  drivers.

- tcp-offload.c:
  Includes the init function which registers as a NVMf transport just
  like any other transport.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Dean Balandin <dbalandin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                     |   8 ++
 drivers/nvme/host/Kconfig       |  17 +++
 drivers/nvme/host/Makefile      |   3 +
 drivers/nvme/host/tcp-offload.c | 124 ++++++++++++++++++++
 drivers/nvme/host/tcp-offload.h | 199 ++++++++++++++++++++++++++++++++
 5 files changed, 351 insertions(+)
 create mode 100644 drivers/nvme/host/tcp-offload.c
 create mode 100644 drivers/nvme/host/tcp-offload.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 9cbc3766fd74..d8e882229a48 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13107,6 +13107,14 @@ F:	drivers/nvme/host/
 F:	include/linux/nvme.h
 F:	include/uapi/linux/nvme_ioctl.h
 
+NVM EXPRESS TCP OFFLOAD TRANSPORT DRIVERS
+M:	Shai Malin <smalin@marvell.com>
+M:	Ariel Elior <aelior@marvell.com>
+L:	linux-nvme@lists.infradead.org
+S:	Supported
+F:	drivers/nvme/host/tcp-offload.c
+F:	drivers/nvme/host/tcp-offload.h
+
 NVM EXPRESS FC TRANSPORT DRIVERS
 M:	James Smart <james.smart@broadcom.com>
 L:	linux-nvme@lists.infradead.org
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index a44d49d63968..caedc35e1f0d 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -84,3 +84,20 @@ config NVME_TCP
 	  from https://github.com/linux-nvme/nvme-cli.
 
 	  If unsure, say N.
+
+config NVME_TCP_OFFLOAD
+	tristate "NVM Express over Fabrics TCP offload common layer"
+	default m
+	depends on BLOCK
+	depends on INET
+	select NVME_CORE
+	select NVME_FABRICS
+	help
+	  This provides support for the NVMe over Fabrics protocol using
+	  the TCP offload transport. This allows you to use remote block devices
+	  exported using the NVMe protocol set.
+
+	  To configure a NVMe over Fabrics controller use the nvme-cli tool
+	  from https://github.com/linux-nvme/nvme-cli.
+
+	  If unsure, say N.
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index cbc509784b2e..3c3fdf83ce38 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_NVME_FABRICS)		+= nvme-fabrics.o
 obj-$(CONFIG_NVME_RDMA)			+= nvme-rdma.o
 obj-$(CONFIG_NVME_FC)			+= nvme-fc.o
 obj-$(CONFIG_NVME_TCP)			+= nvme-tcp.o
+obj-$(CONFIG_NVME_TCP_OFFLOAD)	+= nvme-tcp-offload.o
 
 nvme-core-y				:= core.o ioctl.o
 nvme-core-$(CONFIG_TRACING)		+= trace.o
@@ -26,3 +27,5 @@ nvme-rdma-y				+= rdma.o
 nvme-fc-y				+= fc.o
 
 nvme-tcp-y				+= tcp.o
+
+nvme-tcp-offload-y		+= tcp-offload.o
diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
new file mode 100644
index 000000000000..f7aa49f337dc
--- /dev/null
+++ b/drivers/nvme/host/tcp-offload.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2021 Marvell. All rights reserved.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+/* Kernel includes */
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* Driver includes */
+#include "tcp-offload.h"
+
+static LIST_HEAD(nvme_tcp_ofld_devices);
+static DEFINE_MUTEX(nvme_tcp_ofld_devices_mutex);
+
+/**
+ * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration
+ * function.
+ * @dev:	NVMeTCP offload device instance to be registered to the
+ *		common tcp offload instance.
+ *
+ * API function that registers the type of vendor specific driver
+ * being implemented to the common NVMe over TCP offload library. Part of
+ * the overall init sequence of starting up an offload driver.
+ */
+int nvme_tcp_ofld_register_dev(struct nvme_tcp_ofld_dev *dev)
+{
+	struct nvme_tcp_ofld_ops *ops = dev->ops;
+
+	if (!ops->claim_dev ||
+	    !ops->setup_ctrl ||
+	    !ops->release_ctrl ||
+	    !ops->create_queue ||
+	    !ops->drain_queue ||
+	    !ops->destroy_queue ||
+	    !ops->poll_queue ||
+	    !ops->send_req)
+		return -EINVAL;
+
+	mutex_lock(&nvme_tcp_ofld_devices_mutex);
+	list_add_tail(&dev->entry, &nvme_tcp_ofld_devices);
+	mutex_unlock(&nvme_tcp_ofld_devices_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_tcp_ofld_register_dev);
+
+/**
+ * nvme_tcp_ofld_unregister_dev() - NVMeTCP Offload Library unregistration
+ * function.
+ * @dev:	NVMeTCP offload device instance to be unregistered from the
+ *		common tcp offload instance.
+ *
+ * API function that unregisters the type of vendor specific driver being
+ * implemented from the common NVMe over TCP offload library.
+ * Part of the overall exit sequence of unloading the implemented driver.
+ */
+void nvme_tcp_ofld_unregister_dev(struct nvme_tcp_ofld_dev *dev)
+{
+	mutex_lock(&nvme_tcp_ofld_devices_mutex);
+	list_del(&dev->entry);
+	mutex_unlock(&nvme_tcp_ofld_devices_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_tcp_ofld_unregister_dev);
+
+/**
+ * nvme_tcp_ofld_report_queue_err() - NVMeTCP Offload report error event
+ * callback function. Pointed to by nvme_tcp_ofld_queue->report_err.
+ * @queue:	NVMeTCP offload queue instance on which the error has occurred.
+ *
+ * API function that allows the vendor specific offload driver to reports errors
+ * to the common offload layer, to invoke error recovery.
+ */
+int nvme_tcp_ofld_report_queue_err(struct nvme_tcp_ofld_queue *queue)
+{
+	/* Placeholder - invoke error recovery flow */
+
+	return 0;
+}
+
+/**
+ * nvme_tcp_ofld_req_done() - NVMeTCP Offload request done callback
+ * function. Pointed to by nvme_tcp_ofld_req->done.
+ * Handles both NVME_TCP_F_DATA_SUCCESS flag and NVMe CQ.
+ * @req:	NVMeTCP offload request to complete.
+ * @result:     The nvme_result.
+ * @status:     The completion status.
+ *
+ * API function that allows the vendor specific offload driver to report request
+ * completions to the common offload layer.
+ */
+void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,
+			    union nvme_result *result,
+			    __le16 status)
+{
+	/* Placeholder - complete request with/without error */
+}
+
+static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
+	.name		= "tcp_offload",
+	.module		= THIS_MODULE,
+	.required_opts	= NVMF_OPT_TRADDR,
+	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_NR_WRITE_QUEUES  |
+			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
+			  NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST |
+			  NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES |
+			  NVMF_OPT_TOS,
+};
+
+static int __init nvme_tcp_ofld_init_module(void)
+{
+	nvmf_register_transport(&nvme_tcp_ofld_transport);
+
+	return 0;
+}
+
+static void __exit nvme_tcp_ofld_cleanup_module(void)
+{
+	nvmf_unregister_transport(&nvme_tcp_ofld_transport);
+}
+
+module_init(nvme_tcp_ofld_init_module);
+module_exit(nvme_tcp_ofld_cleanup_module);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/tcp-offload.h b/drivers/nvme/host/tcp-offload.h
new file mode 100644
index 000000000000..520a0ea6f4b8
--- /dev/null
+++ b/drivers/nvme/host/tcp-offload.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2021 Marvell. All rights reserved.
+ */
+
+/* Linux includes */
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/types.h>
+#include <linux/nvme-tcp.h>
+
+/* Driver includes */
+#include "nvme.h"
+#include "fabrics.h"
+
+/* Forward declarations */
+struct nvme_tcp_ofld_ops;
+
+/* Representation of a vendor-specific device. This is the struct used to
+ * register to the offload layer by the vendor-specific driver during its probe
+ * function.
+ * Allocated by vendor-specific driver.
+ */
+struct nvme_tcp_ofld_dev {
+	struct list_head entry;
+	struct net_device *ndev;
+	struct nvme_tcp_ofld_ops *ops;
+
+	/* Vendor specific driver context */
+	int num_hw_vectors;
+};
+
+/* Per IO struct holding the nvme_request and command
+ * Allocated by blk-mq.
+ */
+struct nvme_tcp_ofld_req {
+	struct nvme_request req;
+	struct nvme_command nvme_cmd;
+	struct list_head queue_entry;
+	struct nvme_tcp_ofld_queue *queue;
+
+	/* Vendor specific driver context */
+	void *private_data;
+
+	/* async flag is used to distinguish between async and IO flow
+	 * in common send_req() of nvme_tcp_ofld_ops.
+	 */
+	bool async;
+
+	void (*done)(struct nvme_tcp_ofld_req *req,
+		     union nvme_result *result,
+		     __le16 status);
+};
+
+enum nvme_tcp_ofld_queue_flags {
+	NVME_TCP_OFLD_Q_ALLOCATED = 0,
+	NVME_TCP_OFLD_Q_LIVE = 1,
+};
+
+/* Allocated by nvme_tcp_ofld */
+struct nvme_tcp_ofld_queue {
+	/* Offload device associated to this queue */
+	struct nvme_tcp_ofld_dev *dev;
+	struct nvme_tcp_ofld_ctrl *ctrl;
+	unsigned long flags;
+	size_t cmnd_capsule_len;
+
+	u8 hdr_digest;
+	u8 data_digest;
+	u8 tos;
+
+	/* Vendor specific driver context */
+	void *private_data;
+
+	/* Error callback function */
+	int (*report_err)(struct nvme_tcp_ofld_queue *queue);
+};
+
+/* Connectivity (routing) params used for establishing a connection */
+struct nvme_tcp_ofld_ctrl_con_params {
+	struct sockaddr_storage remote_ip_addr;
+
+	/* If NVMF_OPT_HOST_TRADDR is provided it will be set in local_ip_addr
+	 * in nvme_tcp_ofld_create_ctrl().
+	 * If NVMF_OPT_HOST_TRADDR is not provided the local_ip_addr will be
+	 * initialized by claim_dev().
+	 */
+	struct sockaddr_storage local_ip_addr;
+};
+
+/* Allocated by nvme_tcp_ofld */
+struct nvme_tcp_ofld_ctrl {
+	struct nvme_ctrl nctrl;
+	struct list_head list;
+	struct nvme_tcp_ofld_dev *dev;
+
+	/* admin and IO queues */
+	struct blk_mq_tag_set tag_set;
+	struct blk_mq_tag_set admin_tag_set;
+	struct nvme_tcp_ofld_queue *queues;
+
+	struct work_struct err_work;
+	struct delayed_work connect_work;
+
+	/*
+	 * Each entry in the array indicates the number of queues of
+	 * corresponding type.
+	 */
+	u32 io_queues[HCTX_MAX_TYPES];
+
+	/* Connectivity params */
+	struct nvme_tcp_ofld_ctrl_con_params conn_params;
+
+	/* Vendor specific driver context */
+	void *private_data;
+};
+
+struct nvme_tcp_ofld_ops {
+	const char *name;
+	struct module *module;
+
+	/* For vendor-specific driver to report what opts it supports.
+	 * It could be different than the ULP supported opts due to hardware
+	 * limitations. Also it could be different among different vendor
+	 * drivers.
+	 */
+	int required_opts; /* bitmap using enum nvmf_parsing_opts */
+	int allowed_opts; /* bitmap using enum nvmf_parsing_opts */
+
+	/* For vendor-specific max num of segments and IO sizes */
+	u32 max_hw_sectors;
+	u32 max_segments;
+
+	/**
+	 * claim_dev: Return True if addr is reachable via offload device.
+	 * @dev: The offload device to check.
+	 * @ctrl: The offload ctrl have the conn_params field. The
+	 * conn_params is to be filled with routing params by the lower
+	 * driver.
+	 */
+	int (*claim_dev)(struct nvme_tcp_ofld_dev *dev,
+			 struct nvme_tcp_ofld_ctrl *ctrl);
+
+	/**
+	 * setup_ctrl: Setup device specific controller structures.
+	 * @ctrl: The offload ctrl.
+	 */
+	int (*setup_ctrl)(struct nvme_tcp_ofld_ctrl *ctrl);
+
+	/**
+	 * release_ctrl: Release/Free device specific controller structures.
+	 * @ctrl: The offload ctrl.
+	 */
+	int (*release_ctrl)(struct nvme_tcp_ofld_ctrl *ctrl);
+
+	/**
+	 * create_queue: Create offload queue and establish TCP + NVMeTCP
+	 * (icreq+icresp) connection. Return true on successful connection.
+	 * Based on nvme_tcp_alloc_queue.
+	 * @queue: The queue itself - used as input and output.
+	 * @qid: The queue ID associated with the requested queue.
+	 * @q_size: The queue depth.
+	 */
+	int (*create_queue)(struct nvme_tcp_ofld_queue *queue, int qid,
+			    size_t queue_size);
+
+	/**
+	 * drain_queue: Drain a given queue - blocking function call.
+	 * Return from this function ensures that no additional
+	 * completions will arrive on this queue and that the HW will
+	 * not access host memory.
+	 * @queue: The queue to drain.
+	 */
+	void (*drain_queue)(struct nvme_tcp_ofld_queue *queue);
+
+	/**
+	 * destroy_queue: Close the TCP + NVMeTCP connection of a given queue
+	 * and make sure its no longer active (no completions will arrive on the
+	 * queue).
+	 * @queue: The queue to destroy.
+	 */
+	void (*destroy_queue)(struct nvme_tcp_ofld_queue *queue);
+
+	/**
+	 * poll_queue: Poll a given queue for completions.
+	 * @queue: The queue to poll.
+	 */
+	int (*poll_queue)(struct nvme_tcp_ofld_queue *queue);
+
+	/**
+	 * send_req: Dispatch a request. Returns the execution status.
+	 * @req: Ptr to request to be sent.
+	 */
+	int (*send_req)(struct nvme_tcp_ofld_req *req);
+};
+
+/* Exported functions for lower vendor specific offload drivers */
+int nvme_tcp_ofld_register_dev(struct nvme_tcp_ofld_dev *dev);
+void nvme_tcp_ofld_unregister_dev(struct nvme_tcp_ofld_dev *dev);

From 98a5097d1e08f473e2af13bd94e5b4533d51cfd5 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Wed, 2 Jun 2021 21:42:40 +0300
Subject: [PATCH 0948/3292] nvme-fabrics: Move NVMF_ALLOWED_OPTS and
 NVMF_REQUIRED_OPTS definitions

Move NVMF_ALLOWED_OPTS and NVMF_REQUIRED_OPTS definitions
to header file, so it can be used by the different HW devices.

NVMeTCP offload devices might have different limitations of the
allowed options, for example, a device that does not support all the
queue types. With tcp and rdma, only the nvme-tcp and nvme-rdma layers
handle those attributes and the HW devices do not create any limitations
for the allowed options.

An alternative design could be to add separate fields in
nvme_tcp_ofld_ops such as max_hw_sectors and max_segments that
we already have in this series.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Arie Gershberg <agershberg@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/fabrics.c | 7 -------
 drivers/nvme/host/fabrics.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index a2bb7fc63a73..e1e05aa2fada 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -942,13 +942,6 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
 }
 EXPORT_SYMBOL_GPL(nvmf_free_options);
 
-#define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
-#define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
-				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
-				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-				 NVMF_OPT_DISABLE_SQFLOW |\
-				 NVMF_OPT_FAIL_FAST_TMO)
-
 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf)
 {
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index d7f7974dc208..ce7fe3a842b1 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -68,6 +68,13 @@ enum {
 	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };
 
+#define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
+#define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
+				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
+				 NVMF_OPT_DISABLE_SQFLOW |\
+				 NVMF_OPT_FAIL_FAST_TMO)
+
 /**
  * struct nvmf_ctrl_options - Used to hold the options specified
  *			      with the parsing opts enum.

From af527935bd5a3eb88af425a2a973211e06df6423 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Wed, 2 Jun 2021 21:42:41 +0300
Subject: [PATCH 0949/3292] nvme-fabrics: Expose nvmf_check_required_opts()
 globally

nvmf_check_required_opts() is used to check if user provided opts has
the required_opts or not. if not, it will log which options are not
provided.

It can be leveraged by nvme-tcp-offload to check if provided opts are
supported by this specific vendor driver or not.

So expose nvmf_check_required_opts() globally.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/fabrics.c | 5 +++--
 drivers/nvme/host/fabrics.h | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index e1e05aa2fada..ceb263eb50fb 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -860,8 +860,8 @@ out:
 	return ret;
 }
 
-static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
-		unsigned int required_opts)
+int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
+			     unsigned int required_opts)
 {
 	if ((opts->mask & required_opts) != required_opts) {
 		int i;
@@ -879,6 +879,7 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nvmf_check_required_opts);
 
 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
 		struct nvmf_ctrl_options *opts)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index ce7fe3a842b1..8399fcc063ef 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -193,5 +193,7 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
 		struct nvmf_ctrl_options *opts);
+int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
+			     unsigned int required_opts);
 
 #endif /* _NVME_FABRICS_H */

From 4b8178ec5794f93c71950fdfb1cb5465f6f2e154 Mon Sep 17 00:00:00 2001
From: Dean Balandin <dbalandin@marvell.com>
Date: Wed, 2 Jun 2021 21:42:42 +0300
Subject: [PATCH 0950/3292] nvme-tcp-offload: Add device scan implementation

As part of create_ctrl(), it scans the registered devices and calls
the claim_dev op on each of them, to find the first devices that matches
the connection params. Once the correct devices is found (claim_dev
returns true), we raise the refcnt of that device and return that device
as the device to be used for ctrl currently being created.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Dean Balandin <dbalandin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp-offload.c | 77 +++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
index f7aa49f337dc..5ee3bbca8770 100644
--- a/drivers/nvme/host/tcp-offload.c
+++ b/drivers/nvme/host/tcp-offload.c
@@ -13,6 +13,11 @@
 static LIST_HEAD(nvme_tcp_ofld_devices);
 static DEFINE_MUTEX(nvme_tcp_ofld_devices_mutex);
 
+static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)
+{
+	return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl);
+}
+
 /**
  * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration
  * function.
@@ -96,6 +101,77 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,
 	/* Placeholder - complete request with/without error */
 }
 
+static struct nvme_tcp_ofld_dev *
+nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl)
+{
+	struct nvme_tcp_ofld_dev *dev;
+
+	mutex_lock(&nvme_tcp_ofld_devices_mutex);
+	list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) {
+		if (dev->ops->claim_dev(dev, ctrl))
+			goto out;
+	}
+
+	dev = NULL;
+out:
+	mutex_unlock(&nvme_tcp_ofld_devices_mutex);
+
+	return dev;
+}
+
+static struct nvme_ctrl *
+nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl;
+	struct nvme_tcp_ofld_dev *dev;
+	struct nvme_ctrl *nctrl;
+	int rc = 0;
+
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		return ERR_PTR(-ENOMEM);
+
+	nctrl = &ctrl->nctrl;
+
+	/* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */
+
+	/* Find device that can reach the dest addr */
+	dev = nvme_tcp_ofld_lookup_dev(ctrl);
+	if (!dev) {
+		pr_info("no device found for addr %s:%s.\n",
+			opts->traddr, opts->trsvcid);
+		rc = -EINVAL;
+		goto out_free_ctrl;
+	}
+
+	/* Increase driver refcnt */
+	if (!try_module_get(dev->ops->module)) {
+		pr_err("try_module_get failed\n");
+		dev = NULL;
+		goto out_free_ctrl;
+	}
+
+	ctrl->dev = dev;
+
+	if (ctrl->dev->ops->max_hw_sectors)
+		nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors;
+	if (ctrl->dev->ops->max_segments)
+		nctrl->max_segments = ctrl->dev->ops->max_segments;
+
+	/* Init queues */
+
+	/* Call nvme_init_ctrl */
+
+	/* Setup ctrl */
+
+	return nctrl;
+
+out_free_ctrl:
+	kfree(ctrl);
+
+	return ERR_PTR(rc);
+}
+
 static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
 	.name		= "tcp_offload",
 	.module		= THIS_MODULE,
@@ -105,6 +181,7 @@ static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
 			  NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST |
 			  NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES |
 			  NVMF_OPT_TOS,
+	.create_ctrl	= nvme_tcp_ofld_create_ctrl,
 };
 
 static int __init nvme_tcp_ofld_init_module(void)

From 5aadd5f9311e2aa18c3892bf47df7825fee0b457 Mon Sep 17 00:00:00 2001
From: Arie Gershberg <agershberg@marvell.com>
Date: Wed, 2 Jun 2021 21:42:43 +0300
Subject: [PATCH 0951/3292] nvme-tcp-offload: Add controller level
 implementation

In this patch we implement controller level functionality including:
- create_ctrl.
- delete_ctrl.
- free_ctrl.

The implementation is similar to other nvme fabrics modules, the main
difference being that the nvme-tcp-offload ULP calls the vendor specific
claim_dev() op with the given TCP/IP parameters to determine which device
will be used for this controller.
Once found, the vendor specific device and controller will be paired and
kept in a controller list managed by the ULP.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Arie Gershberg <agershberg@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp-offload.c | 481 +++++++++++++++++++++++++++++++-
 1 file changed, 476 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
index 5ee3bbca8770..d05dec9c0add 100644
--- a/drivers/nvme/host/tcp-offload.c
+++ b/drivers/nvme/host/tcp-offload.c
@@ -12,6 +12,10 @@
 
 static LIST_HEAD(nvme_tcp_ofld_devices);
 static DEFINE_MUTEX(nvme_tcp_ofld_devices_mutex);
+static LIST_HEAD(nvme_tcp_ofld_ctrl_list);
+static DEFINE_MUTEX(nvme_tcp_ofld_ctrl_mutex);
+static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops;
+static struct blk_mq_ops nvme_tcp_ofld_mq_ops;
 
 static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)
 {
@@ -119,21 +123,439 @@ out:
 	return dev;
 }
 
+static struct blk_mq_tag_set *
+nvme_tcp_ofld_alloc_tagset(struct nvme_ctrl *nctrl, bool admin)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct blk_mq_tag_set *set;
+	int rc;
+
+	if (admin) {
+		set = &ctrl->admin_tag_set;
+		memset(set, 0, sizeof(*set));
+		set->ops = &nvme_tcp_ofld_admin_mq_ops;
+		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
+		set->reserved_tags = NVMF_RESERVED_TAGS;
+		set->numa_node = nctrl->numa_node;
+		set->flags = BLK_MQ_F_BLOCKING;
+		set->cmd_size = sizeof(struct nvme_tcp_ofld_req);
+		set->driver_data = ctrl;
+		set->nr_hw_queues = 1;
+		set->timeout = NVME_ADMIN_TIMEOUT;
+	} else {
+		set = &ctrl->tag_set;
+		memset(set, 0, sizeof(*set));
+		set->ops = &nvme_tcp_ofld_mq_ops;
+		set->queue_depth = nctrl->sqsize + 1;
+		set->reserved_tags = NVMF_RESERVED_TAGS;
+		set->numa_node = nctrl->numa_node;
+		set->flags = BLK_MQ_F_SHOULD_MERGE;
+		set->cmd_size = sizeof(struct nvme_tcp_ofld_req);
+		set->driver_data = ctrl;
+		set->nr_hw_queues = nctrl->queue_count - 1;
+		set->timeout = NVME_IO_TIMEOUT;
+		set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+	}
+
+	rc = blk_mq_alloc_tag_set(set);
+	if (rc)
+		return ERR_PTR(rc);
+
+	return set;
+}
+
+static int nvme_tcp_ofld_configure_admin_queue(struct nvme_ctrl *nctrl,
+					       bool new)
+{
+	int rc;
+
+	/* Placeholder - alloc_admin_queue */
+	if (new) {
+		nctrl->admin_tagset =
+				nvme_tcp_ofld_alloc_tagset(nctrl, true);
+		if (IS_ERR(nctrl->admin_tagset)) {
+			rc = PTR_ERR(nctrl->admin_tagset);
+			nctrl->admin_tagset = NULL;
+			goto out_destroy_queue;
+		}
+
+		nctrl->fabrics_q = blk_mq_init_queue(nctrl->admin_tagset);
+		if (IS_ERR(nctrl->fabrics_q)) {
+			rc = PTR_ERR(nctrl->fabrics_q);
+			nctrl->fabrics_q = NULL;
+			goto out_free_tagset;
+		}
+
+		nctrl->admin_q = blk_mq_init_queue(nctrl->admin_tagset);
+		if (IS_ERR(nctrl->admin_q)) {
+			rc = PTR_ERR(nctrl->admin_q);
+			nctrl->admin_q = NULL;
+			goto out_cleanup_fabrics_q;
+		}
+	}
+
+	/* Placeholder - nvme_tcp_ofld_start_queue */
+
+	rc = nvme_enable_ctrl(nctrl);
+	if (rc)
+		goto out_stop_queue;
+
+	blk_mq_unquiesce_queue(nctrl->admin_q);
+
+	rc = nvme_init_ctrl_finish(nctrl);
+	if (rc)
+		goto out_quiesce_queue;
+
+	return 0;
+
+out_quiesce_queue:
+	blk_mq_quiesce_queue(nctrl->admin_q);
+	blk_sync_queue(nctrl->admin_q);
+
+out_stop_queue:
+	/* Placeholder - stop offload queue */
+	nvme_cancel_admin_tagset(nctrl);
+
+out_cleanup_fabrics_q:
+	if (new)
+		blk_cleanup_queue(nctrl->fabrics_q);
+out_free_tagset:
+	if (new)
+		blk_mq_free_tag_set(nctrl->admin_tagset);
+out_destroy_queue:
+	/* Placeholder - free admin queue */
+
+	return rc;
+}
+
+static int
+nvme_tcp_ofld_configure_io_queues(struct nvme_ctrl *nctrl, bool new)
+{
+	int rc;
+
+	/* Placeholder - alloc_io_queues */
+
+	if (new) {
+		nctrl->tagset = nvme_tcp_ofld_alloc_tagset(nctrl, false);
+		if (IS_ERR(nctrl->tagset)) {
+			rc = PTR_ERR(nctrl->tagset);
+			nctrl->tagset = NULL;
+			goto out_free_io_queues;
+		}
+
+		nctrl->connect_q = blk_mq_init_queue(nctrl->tagset);
+		if (IS_ERR(nctrl->connect_q)) {
+			rc = PTR_ERR(nctrl->connect_q);
+			nctrl->connect_q = NULL;
+			goto out_free_tag_set;
+		}
+	}
+
+	/* Placeholder - start_io_queues */
+
+	if (!new) {
+		nvme_start_queues(nctrl);
+		if (!nvme_wait_freeze_timeout(nctrl, NVME_IO_TIMEOUT)) {
+			/*
+			 * If we timed out waiting for freeze we are likely to
+			 * be stuck.  Fail the controller initialization just
+			 * to be safe.
+			 */
+			rc = -ENODEV;
+			goto out_wait_freeze_timed_out;
+		}
+		blk_mq_update_nr_hw_queues(nctrl->tagset, nctrl->queue_count - 1);
+		nvme_unfreeze(nctrl);
+	}
+
+	return 0;
+
+out_wait_freeze_timed_out:
+	nvme_stop_queues(nctrl);
+	nvme_sync_io_queues(nctrl);
+
+	/* Placeholder - Stop IO queues */
+
+	if (new)
+		blk_cleanup_queue(nctrl->connect_q);
+out_free_tag_set:
+	if (new)
+		blk_mq_free_tag_set(nctrl->tagset);
+out_free_io_queues:
+	/* Placeholder - free_io_queues */
+
+	return rc;
+}
+
+static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvmf_ctrl_options *opts = nctrl->opts;
+	int rc = 0;
+
+	rc = ctrl->dev->ops->setup_ctrl(ctrl);
+	if (rc)
+		return rc;
+
+	rc = nvme_tcp_ofld_configure_admin_queue(nctrl, new);
+	if (rc)
+		goto out_release_ctrl;
+
+	if (nctrl->icdoff) {
+		dev_err(nctrl->device, "icdoff is not supported!\n");
+		rc = -EINVAL;
+		goto destroy_admin;
+	}
+
+	if (!(nctrl->sgls & ((1 << 0) | (1 << 1)))) {
+		dev_err(nctrl->device, "Mandatory sgls are not supported!\n");
+		goto destroy_admin;
+	}
+
+	if (opts->queue_size > nctrl->sqsize + 1)
+		dev_warn(nctrl->device,
+			 "queue_size %zu > ctrl sqsize %u, clamping down\n",
+			 opts->queue_size, nctrl->sqsize + 1);
+
+	if (nctrl->sqsize + 1 > nctrl->maxcmd) {
+		dev_warn(nctrl->device,
+			 "sqsize %u > ctrl maxcmd %u, clamping down\n",
+			 nctrl->sqsize + 1, nctrl->maxcmd);
+		nctrl->sqsize = nctrl->maxcmd - 1;
+	}
+
+	if (nctrl->queue_count > 1) {
+		rc = nvme_tcp_ofld_configure_io_queues(nctrl, new);
+		if (rc)
+			goto destroy_admin;
+	}
+
+	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_LIVE)) {
+		/*
+		 * state change failure is ok if we started ctrl delete,
+		 * unless we're during creation of a new controller to
+		 * avoid races with teardown flow.
+		 */
+		WARN_ON_ONCE(nctrl->state != NVME_CTRL_DELETING &&
+			     nctrl->state != NVME_CTRL_DELETING_NOIO);
+		WARN_ON_ONCE(new);
+		rc = -EINVAL;
+		goto destroy_io;
+	}
+
+	nvme_start_ctrl(nctrl);
+
+	return 0;
+
+destroy_io:
+	/* Placeholder - stop and destroy io queues*/
+destroy_admin:
+	/* Placeholder - stop and destroy admin queue*/
+out_release_ctrl:
+	ctrl->dev->ops->release_ctrl(ctrl);
+
+	return rc;
+}
+
+static int
+nvme_tcp_ofld_check_dev_opts(struct nvmf_ctrl_options *opts,
+			     struct nvme_tcp_ofld_ops *ofld_ops)
+{
+	unsigned int nvme_tcp_ofld_opt_mask = NVMF_ALLOWED_OPTS |
+			ofld_ops->allowed_opts | ofld_ops->required_opts;
+	struct nvmf_ctrl_options dev_opts_mask;
+
+	if (opts->mask & ~nvme_tcp_ofld_opt_mask) {
+		pr_warn("One or more nvmf options missing from ofld drvr %s.\n",
+			ofld_ops->name);
+
+		dev_opts_mask.mask = nvme_tcp_ofld_opt_mask;
+
+		return nvmf_check_required_opts(&dev_opts_mask, opts->mask);
+	}
+
+	return 0;
+}
+
+static void nvme_tcp_ofld_free_ctrl(struct nvme_ctrl *nctrl)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvme_tcp_ofld_dev *dev = ctrl->dev;
+
+	if (list_empty(&ctrl->list))
+		goto free_ctrl;
+
+	ctrl->dev->ops->release_ctrl(ctrl);
+
+	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
+	list_del(&ctrl->list);
+	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
+
+	nvmf_free_options(nctrl->opts);
+free_ctrl:
+	module_put(dev->ops->module);
+	kfree(ctrl->queues);
+	kfree(ctrl);
+}
+
+static void
+nvme_tcp_ofld_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove)
+{
+	/* Placeholder - teardown_admin_queue */
+}
+
+static void
+nvme_tcp_ofld_teardown_io_queues(struct nvme_ctrl *nctrl, bool remove)
+{
+	/* Placeholder - teardown_io_queues */
+}
+
+static void
+nvme_tcp_ofld_teardown_ctrl(struct nvme_ctrl *nctrl, bool shutdown)
+{
+	/* Placeholder - err_work and connect_work */
+	nvme_tcp_ofld_teardown_io_queues(nctrl, shutdown);
+	blk_mq_quiesce_queue(nctrl->admin_q);
+	if (shutdown)
+		nvme_shutdown_ctrl(nctrl);
+	else
+		nvme_disable_ctrl(nctrl);
+	nvme_tcp_ofld_teardown_admin_queue(nctrl, shutdown);
+}
+
+static void nvme_tcp_ofld_delete_ctrl(struct nvme_ctrl *nctrl)
+{
+	nvme_tcp_ofld_teardown_ctrl(nctrl, true);
+}
+
+static int
+nvme_tcp_ofld_init_request(struct blk_mq_tag_set *set,
+			   struct request *rq,
+			   unsigned int hctx_idx,
+			   unsigned int numa_node)
+{
+	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
+
+	/* Placeholder - init request */
+
+	req->done = nvme_tcp_ofld_req_done;
+
+	return 0;
+}
+
+static blk_status_t
+nvme_tcp_ofld_queue_rq(struct blk_mq_hw_ctx *hctx,
+		       const struct blk_mq_queue_data *bd)
+{
+	/* Call nvme_setup_cmd(...) */
+
+	/* Call ops->send_req(...) */
+
+	return BLK_STS_OK;
+}
+
+static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
+	.queue_rq	= nvme_tcp_ofld_queue_rq,
+	.init_request	= nvme_tcp_ofld_init_request,
+	/*
+	 * All additional ops will be also implemented and registered similar to
+	 * tcp.c
+	 */
+};
+
+static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops = {
+	.queue_rq	= nvme_tcp_ofld_queue_rq,
+	.init_request	= nvme_tcp_ofld_init_request,
+	/*
+	 * All additional ops will be also implemented and registered similar to
+	 * tcp.c
+	 */
+};
+
+static const struct nvme_ctrl_ops nvme_tcp_ofld_ctrl_ops = {
+	.name			= "tcp_offload",
+	.module			= THIS_MODULE,
+	.flags			= NVME_F_FABRICS,
+	.reg_read32		= nvmf_reg_read32,
+	.reg_read64		= nvmf_reg_read64,
+	.reg_write32		= nvmf_reg_write32,
+	.free_ctrl		= nvme_tcp_ofld_free_ctrl,
+	.delete_ctrl		= nvme_tcp_ofld_delete_ctrl,
+	.get_address		= nvmf_get_address,
+};
+
+static bool
+nvme_tcp_ofld_existing_controller(struct nvmf_ctrl_options *opts)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl;
+	bool found = false;
+
+	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
+	list_for_each_entry(ctrl, &nvme_tcp_ofld_ctrl_list, list) {
+		found = nvmf_ip_options_match(&ctrl->nctrl, opts);
+		if (found)
+			break;
+	}
+	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
+
+	return found;
+}
+
 static struct nvme_ctrl *
 nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
 {
+	struct nvme_tcp_ofld_queue *queue;
 	struct nvme_tcp_ofld_ctrl *ctrl;
 	struct nvme_tcp_ofld_dev *dev;
 	struct nvme_ctrl *nctrl;
-	int rc = 0;
+	int i, rc = 0;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl)
 		return ERR_PTR(-ENOMEM);
 
+	INIT_LIST_HEAD(&ctrl->list);
 	nctrl = &ctrl->nctrl;
+	nctrl->opts = opts;
+	nctrl->queue_count = opts->nr_io_queues + opts->nr_write_queues +
+			     opts->nr_poll_queues + 1;
+	nctrl->sqsize = opts->queue_size - 1;
+	nctrl->kato = opts->kato;
+	if (!(opts->mask & NVMF_OPT_TRSVCID)) {
+		opts->trsvcid =
+			kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
+		if (!opts->trsvcid) {
+			rc = -ENOMEM;
+			goto out_free_ctrl;
+		}
+		opts->mask |= NVMF_OPT_TRSVCID;
+	}
 
-	/* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */
+	rc = inet_pton_with_scope(&init_net, AF_UNSPEC, opts->traddr,
+				  opts->trsvcid,
+				  &ctrl->conn_params.remote_ip_addr);
+	if (rc) {
+		pr_err("malformed address passed: %s:%s\n",
+		       opts->traddr, opts->trsvcid);
+		goto out_free_ctrl;
+	}
+
+	if (opts->mask & NVMF_OPT_HOST_TRADDR) {
+		rc = inet_pton_with_scope(&init_net, AF_UNSPEC,
+					  opts->host_traddr, NULL,
+					  &ctrl->conn_params.local_ip_addr);
+		if (rc) {
+			pr_err("malformed src address passed: %s\n",
+			       opts->host_traddr);
+			goto out_free_ctrl;
+		}
+	}
+
+	if (!opts->duplicate_connect &&
+	    nvme_tcp_ofld_existing_controller(opts)) {
+		rc = -EALREADY;
+		goto out_free_ctrl;
+	}
 
 	/* Find device that can reach the dest addr */
 	dev = nvme_tcp_ofld_lookup_dev(ctrl);
@@ -151,6 +573,10 @@ nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
 		goto out_free_ctrl;
 	}
 
+	rc = nvme_tcp_ofld_check_dev_opts(opts, dev->ops);
+	if (rc)
+		goto out_module_put;
+
 	ctrl->dev = dev;
 
 	if (ctrl->dev->ops->max_hw_sectors)
@@ -158,14 +584,51 @@ nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
 	if (ctrl->dev->ops->max_segments)
 		nctrl->max_segments = ctrl->dev->ops->max_segments;
 
-	/* Init queues */
+	ctrl->queues = kcalloc(nctrl->queue_count,
+			       sizeof(struct nvme_tcp_ofld_queue),
+			       GFP_KERNEL);
+	if (!ctrl->queues) {
+		rc = -ENOMEM;
+		goto out_module_put;
+	}
 
-	/* Call nvme_init_ctrl */
+	for (i = 0; i < nctrl->queue_count; ++i) {
+		queue = &ctrl->queues[i];
+		queue->ctrl = ctrl;
+		queue->dev = dev;
+		queue->report_err = nvme_tcp_ofld_report_queue_err;
+	}
 
-	/* Setup ctrl */
+	rc = nvme_init_ctrl(nctrl, ndev, &nvme_tcp_ofld_ctrl_ops, 0);
+	if (rc)
+		goto out_free_queues;
+
+	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_CONNECTING)) {
+		WARN_ON_ONCE(1);
+		rc = -EINTR;
+		goto out_uninit_ctrl;
+	}
+
+	rc = nvme_tcp_ofld_setup_ctrl(nctrl, true);
+	if (rc)
+		goto out_uninit_ctrl;
+
+	dev_info(nctrl->device, "new ctrl: NQN \"%s\", addr %pISp\n",
+		 opts->subsysnqn, &ctrl->conn_params.remote_ip_addr);
+
+	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
+	list_add_tail(&ctrl->list, &nvme_tcp_ofld_ctrl_list);
+	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
 
 	return nctrl;
 
+out_uninit_ctrl:
+	nvme_uninit_ctrl(nctrl);
+	nvme_put_ctrl(nctrl);
+out_free_queues:
+	kfree(ctrl->queues);
+out_module_put:
+	module_put(dev->ops->module);
 out_free_ctrl:
 	kfree(ctrl);
 
@@ -193,7 +656,15 @@ static int __init nvme_tcp_ofld_init_module(void)
 
 static void __exit nvme_tcp_ofld_cleanup_module(void)
 {
+	struct nvme_tcp_ofld_ctrl *ctrl;
+
 	nvmf_unregister_transport(&nvme_tcp_ofld_transport);
+
+	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
+	list_for_each_entry(ctrl, &nvme_tcp_ofld_ctrl_list, list)
+		nvme_delete_ctrl(&ctrl->nctrl);
+	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
+	flush_workqueue(nvme_delete_wq);
 }
 
 module_init(nvme_tcp_ofld_init_module);

From 5faf6d68554808ef1b9c8647a7e8fd8a4e8cb0a5 Mon Sep 17 00:00:00 2001
From: Arie Gershberg <agershberg@marvell.com>
Date: Wed, 2 Jun 2021 21:42:44 +0300
Subject: [PATCH 0952/3292] nvme-tcp-offload: Add controller level error
 recovery implementation

In this patch, we implement controller level error handling and recovery.
Upon an error discovered by the ULP or reset controller initiated by the
nvme-core (using reset_ctrl workqueue), the ULP will initiate a controller
recovery which includes teardown and re-connect of all queues.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Arie Gershberg <agershberg@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp-offload.c | 127 +++++++++++++++++++++++++++++++-
 drivers/nvme/host/tcp-offload.h |   1 +
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
index d05dec9c0add..97c1fd33adb9 100644
--- a/drivers/nvme/host/tcp-offload.c
+++ b/drivers/nvme/host/tcp-offload.c
@@ -72,6 +72,23 @@ void nvme_tcp_ofld_unregister_dev(struct nvme_tcp_ofld_dev *dev)
 }
 EXPORT_SYMBOL_GPL(nvme_tcp_ofld_unregister_dev);
 
+/**
+ * nvme_tcp_ofld_error_recovery() - NVMeTCP Offload library error recovery.
+ * function.
+ * @nctrl:	NVMe controller instance to change to resetting.
+ *
+ * API function that change the controller state to resseting.
+ * Part of the overall controller reset sequence.
+ */
+void nvme_tcp_ofld_error_recovery(struct nvme_ctrl *nctrl)
+{
+	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_RESETTING))
+		return;
+
+	queue_work(nvme_reset_wq, &to_tcp_ofld_ctrl(nctrl)->err_work);
+}
+EXPORT_SYMBOL_GPL(nvme_tcp_ofld_error_recovery);
+
 /**
  * nvme_tcp_ofld_report_queue_err() - NVMeTCP Offload report error event
  * callback function. Pointed to by nvme_tcp_ofld_queue->report_err.
@@ -82,7 +99,8 @@ EXPORT_SYMBOL_GPL(nvme_tcp_ofld_unregister_dev);
  */
 int nvme_tcp_ofld_report_queue_err(struct nvme_tcp_ofld_queue *queue)
 {
-	/* Placeholder - invoke error recovery flow */
+	pr_err("nvme-tcp-offload queue error\n");
+	nvme_tcp_ofld_error_recovery(&queue->ctrl->nctrl);
 
 	return 0;
 }
@@ -287,6 +305,28 @@ out_free_io_queues:
 	return rc;
 }
 
+static void nvme_tcp_ofld_reconnect_or_remove(struct nvme_ctrl *nctrl)
+{
+	/* If we are resetting/deleting then do nothing */
+	if (nctrl->state != NVME_CTRL_CONNECTING) {
+		WARN_ON_ONCE(nctrl->state == NVME_CTRL_NEW ||
+			     nctrl->state == NVME_CTRL_LIVE);
+
+		return;
+	}
+
+	if (nvmf_should_reconnect(nctrl)) {
+		dev_info(nctrl->device, "Reconnecting in %d seconds...\n",
+			 nctrl->opts->reconnect_delay);
+		queue_delayed_work(nvme_wq,
+				   &to_tcp_ofld_ctrl(nctrl)->connect_work,
+				   nctrl->opts->reconnect_delay * HZ);
+	} else {
+		dev_info(nctrl->device, "Removing controller...\n");
+		nvme_delete_ctrl(nctrl);
+	}
+}
+
 static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
 {
 	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
@@ -410,10 +450,63 @@ nvme_tcp_ofld_teardown_io_queues(struct nvme_ctrl *nctrl, bool remove)
 	/* Placeholder - teardown_io_queues */
 }
 
+static void nvme_tcp_ofld_reconnect_ctrl_work(struct work_struct *work)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl =
+				container_of(to_delayed_work(work),
+					     struct nvme_tcp_ofld_ctrl,
+					     connect_work);
+	struct nvme_ctrl *nctrl = &ctrl->nctrl;
+
+	++nctrl->nr_reconnects;
+
+	if (nvme_tcp_ofld_setup_ctrl(nctrl, false))
+		goto requeue;
+
+	dev_info(nctrl->device, "Successfully reconnected (%d attempt)\n",
+		 nctrl->nr_reconnects);
+
+	nctrl->nr_reconnects = 0;
+
+	return;
+
+requeue:
+	dev_info(nctrl->device, "Failed reconnect attempt %d\n",
+		 nctrl->nr_reconnects);
+	nvme_tcp_ofld_reconnect_or_remove(nctrl);
+}
+
+static void nvme_tcp_ofld_error_recovery_work(struct work_struct *work)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl =
+		container_of(work, struct nvme_tcp_ofld_ctrl, err_work);
+	struct nvme_ctrl *nctrl = &ctrl->nctrl;
+
+	nvme_stop_keep_alive(nctrl);
+	nvme_tcp_ofld_teardown_io_queues(nctrl, false);
+	/* unquiesce to fail fast pending requests */
+	nvme_start_queues(nctrl);
+	nvme_tcp_ofld_teardown_admin_queue(nctrl, false);
+	blk_mq_unquiesce_queue(nctrl->admin_q);
+
+	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_CONNECTING)) {
+		/* state change failure is ok if we started nctrl delete */
+		WARN_ON_ONCE(nctrl->state != NVME_CTRL_DELETING &&
+			     nctrl->state != NVME_CTRL_DELETING_NOIO);
+
+		return;
+	}
+
+	nvme_tcp_ofld_reconnect_or_remove(nctrl);
+}
+
 static void
 nvme_tcp_ofld_teardown_ctrl(struct nvme_ctrl *nctrl, bool shutdown)
 {
-	/* Placeholder - err_work and connect_work */
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+
+	cancel_work_sync(&ctrl->err_work);
+	cancel_delayed_work_sync(&ctrl->connect_work);
 	nvme_tcp_ofld_teardown_io_queues(nctrl, shutdown);
 	blk_mq_quiesce_queue(nctrl->admin_q);
 	if (shutdown)
@@ -428,6 +521,32 @@ static void nvme_tcp_ofld_delete_ctrl(struct nvme_ctrl *nctrl)
 	nvme_tcp_ofld_teardown_ctrl(nctrl, true);
 }
 
+static void nvme_tcp_ofld_reset_ctrl_work(struct work_struct *work)
+{
+	struct nvme_ctrl *nctrl =
+		container_of(work, struct nvme_ctrl, reset_work);
+
+	nvme_stop_ctrl(nctrl);
+	nvme_tcp_ofld_teardown_ctrl(nctrl, false);
+
+	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_CONNECTING)) {
+		/* state change failure is ok if we started ctrl delete */
+		WARN_ON_ONCE(nctrl->state != NVME_CTRL_DELETING &&
+			     nctrl->state != NVME_CTRL_DELETING_NOIO);
+
+		return;
+	}
+
+	if (nvme_tcp_ofld_setup_ctrl(nctrl, false))
+		goto out_fail;
+
+	return;
+
+out_fail:
+	++nctrl->nr_reconnects;
+	nvme_tcp_ofld_reconnect_or_remove(nctrl);
+}
+
 static int
 nvme_tcp_ofld_init_request(struct blk_mq_tag_set *set,
 			   struct request *rq,
@@ -521,6 +640,10 @@ nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
 			     opts->nr_poll_queues + 1;
 	nctrl->sqsize = opts->queue_size - 1;
 	nctrl->kato = opts->kato;
+	INIT_DELAYED_WORK(&ctrl->connect_work,
+			  nvme_tcp_ofld_reconnect_ctrl_work);
+	INIT_WORK(&ctrl->err_work, nvme_tcp_ofld_error_recovery_work);
+	INIT_WORK(&nctrl->reset_work, nvme_tcp_ofld_reset_ctrl_work);
 	if (!(opts->mask & NVMF_OPT_TRSVCID)) {
 		opts->trsvcid =
 			kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
diff --git a/drivers/nvme/host/tcp-offload.h b/drivers/nvme/host/tcp-offload.h
index 520a0ea6f4b8..4281d1dacc94 100644
--- a/drivers/nvme/host/tcp-offload.h
+++ b/drivers/nvme/host/tcp-offload.h
@@ -197,3 +197,4 @@ struct nvme_tcp_ofld_ops {
 /* Exported functions for lower vendor specific offload drivers */
 int nvme_tcp_ofld_register_dev(struct nvme_tcp_ofld_dev *dev);
 void nvme_tcp_ofld_unregister_dev(struct nvme_tcp_ofld_dev *dev);
+void nvme_tcp_ofld_error_recovery(struct nvme_ctrl *nctrl);

From e4ba452ded39caae59dcecba7412c34750b6e229 Mon Sep 17 00:00:00 2001
From: Dean Balandin <dbalandin@marvell.com>
Date: Wed, 2 Jun 2021 21:42:45 +0300
Subject: [PATCH 0953/3292] nvme-tcp-offload: Add queue level implementation

In this patch we implement queue level functionality.
The implementation is similar to the nvme-tcp module, the main
difference being that we call the vendor specific create_queue op which
creates the TCP connection, and NVMeTPC connection including
icreq+icresp negotiation.
Once create_queue returns successfully, we can move on to the fabrics
connect.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Dean Balandin <dbalandin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp-offload.c | 417 +++++++++++++++++++++++++++++---
 drivers/nvme/host/tcp-offload.h |   4 +
 2 files changed, 393 insertions(+), 28 deletions(-)

diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
index 97c1fd33adb9..1700cdf42433 100644
--- a/drivers/nvme/host/tcp-offload.c
+++ b/drivers/nvme/host/tcp-offload.c
@@ -22,6 +22,11 @@ static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctr
 	return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl);
 }
 
+static inline int nvme_tcp_ofld_qid(struct nvme_tcp_ofld_queue *queue)
+{
+	return queue - queue->ctrl->queues;
+}
+
 /**
  * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration
  * function.
@@ -182,19 +187,124 @@ nvme_tcp_ofld_alloc_tagset(struct nvme_ctrl *nctrl, bool admin)
 	return set;
 }
 
+static void __nvme_tcp_ofld_stop_queue(struct nvme_tcp_ofld_queue *queue)
+{
+	queue->dev->ops->drain_queue(queue);
+}
+
+static void nvme_tcp_ofld_stop_queue(struct nvme_ctrl *nctrl, int qid)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[qid];
+
+	mutex_lock(&queue->queue_lock);
+	if (test_and_clear_bit(NVME_TCP_OFLD_Q_LIVE, &queue->flags))
+		__nvme_tcp_ofld_stop_queue(queue);
+	mutex_unlock(&queue->queue_lock);
+}
+
+static void nvme_tcp_ofld_stop_io_queues(struct nvme_ctrl *ctrl)
+{
+	int i;
+
+	for (i = 1; i < ctrl->queue_count; i++)
+		nvme_tcp_ofld_stop_queue(ctrl, i);
+}
+
+static void __nvme_tcp_ofld_free_queue(struct nvme_tcp_ofld_queue *queue)
+{
+	queue->dev->ops->destroy_queue(queue);
+}
+
+static void nvme_tcp_ofld_free_queue(struct nvme_ctrl *nctrl, int qid)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[qid];
+
+	if (test_and_clear_bit(NVME_TCP_OFLD_Q_ALLOCATED, &queue->flags)) {
+		__nvme_tcp_ofld_free_queue(queue);
+		mutex_destroy(&queue->queue_lock);
+	}
+}
+
+static void
+nvme_tcp_ofld_free_io_queues(struct nvme_ctrl *nctrl)
+{
+	int i;
+
+	for (i = 1; i < nctrl->queue_count; i++)
+		nvme_tcp_ofld_free_queue(nctrl, i);
+}
+
+static void nvme_tcp_ofld_destroy_io_queues(struct nvme_ctrl *nctrl, bool remove)
+{
+	nvme_tcp_ofld_stop_io_queues(nctrl);
+	if (remove) {
+		blk_cleanup_queue(nctrl->connect_q);
+		blk_mq_free_tag_set(nctrl->tagset);
+	}
+	nvme_tcp_ofld_free_io_queues(nctrl);
+}
+
+static void nvme_tcp_ofld_destroy_admin_queue(struct nvme_ctrl *nctrl, bool remove)
+{
+	nvme_tcp_ofld_stop_queue(nctrl, 0);
+	if (remove) {
+		blk_cleanup_queue(nctrl->admin_q);
+		blk_cleanup_queue(nctrl->fabrics_q);
+		blk_mq_free_tag_set(nctrl->admin_tagset);
+	}
+	nvme_tcp_ofld_free_queue(nctrl, 0);
+}
+
+static int nvme_tcp_ofld_start_queue(struct nvme_ctrl *nctrl, int qid)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[qid];
+	int rc;
+
+	queue = &ctrl->queues[qid];
+	if (qid) {
+		queue->cmnd_capsule_len = nctrl->ioccsz * 16;
+		rc = nvmf_connect_io_queue(nctrl, qid, false);
+	} else {
+		queue->cmnd_capsule_len = sizeof(struct nvme_command) + NVME_TCP_ADMIN_CCSZ;
+		rc = nvmf_connect_admin_queue(nctrl);
+	}
+
+	if (!rc) {
+		set_bit(NVME_TCP_OFLD_Q_LIVE, &queue->flags);
+	} else {
+		if (test_bit(NVME_TCP_OFLD_Q_ALLOCATED, &queue->flags))
+			__nvme_tcp_ofld_stop_queue(queue);
+		dev_err(nctrl->device,
+			"failed to connect queue: %d ret=%d\n", qid, rc);
+	}
+
+	return rc;
+}
+
 static int nvme_tcp_ofld_configure_admin_queue(struct nvme_ctrl *nctrl,
 					       bool new)
 {
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[0];
 	int rc;
 
-	/* Placeholder - alloc_admin_queue */
+	mutex_init(&queue->queue_lock);
+
+	rc = ctrl->dev->ops->create_queue(queue, 0, NVME_AQ_DEPTH);
+	if (rc)
+		return rc;
+
+	set_bit(NVME_TCP_OFLD_Q_ALLOCATED, &queue->flags);
 	if (new) {
 		nctrl->admin_tagset =
 				nvme_tcp_ofld_alloc_tagset(nctrl, true);
 		if (IS_ERR(nctrl->admin_tagset)) {
 			rc = PTR_ERR(nctrl->admin_tagset);
 			nctrl->admin_tagset = NULL;
-			goto out_destroy_queue;
+			goto out_free_queue;
 		}
 
 		nctrl->fabrics_q = blk_mq_init_queue(nctrl->admin_tagset);
@@ -212,7 +322,9 @@ static int nvme_tcp_ofld_configure_admin_queue(struct nvme_ctrl *nctrl,
 		}
 	}
 
-	/* Placeholder - nvme_tcp_ofld_start_queue */
+	rc = nvme_tcp_ofld_start_queue(nctrl, 0);
+	if (rc)
+		goto out_cleanup_queue;
 
 	rc = nvme_enable_ctrl(nctrl);
 	if (rc)
@@ -229,19 +341,143 @@ static int nvme_tcp_ofld_configure_admin_queue(struct nvme_ctrl *nctrl,
 out_quiesce_queue:
 	blk_mq_quiesce_queue(nctrl->admin_q);
 	blk_sync_queue(nctrl->admin_q);
-
 out_stop_queue:
-	/* Placeholder - stop offload queue */
+	nvme_tcp_ofld_stop_queue(nctrl, 0);
 	nvme_cancel_admin_tagset(nctrl);
-
+out_cleanup_queue:
+	if (new)
+		blk_cleanup_queue(nctrl->admin_q);
 out_cleanup_fabrics_q:
 	if (new)
 		blk_cleanup_queue(nctrl->fabrics_q);
 out_free_tagset:
 	if (new)
 		blk_mq_free_tag_set(nctrl->admin_tagset);
-out_destroy_queue:
-	/* Placeholder - free admin queue */
+out_free_queue:
+	nvme_tcp_ofld_free_queue(nctrl, 0);
+
+	return rc;
+}
+
+static unsigned int nvme_tcp_ofld_nr_io_queues(struct nvme_ctrl *nctrl)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvme_tcp_ofld_dev *dev = ctrl->dev;
+	u32 hw_vectors = dev->num_hw_vectors;
+	u32 nr_write_queues, nr_poll_queues;
+	u32 nr_io_queues, nr_total_queues;
+
+	nr_io_queues = min3(nctrl->opts->nr_io_queues, num_online_cpus(),
+			    hw_vectors);
+	nr_write_queues = min3(nctrl->opts->nr_write_queues, num_online_cpus(),
+			       hw_vectors);
+	nr_poll_queues = min3(nctrl->opts->nr_poll_queues, num_online_cpus(),
+			      hw_vectors);
+
+	nr_total_queues = nr_io_queues + nr_write_queues + nr_poll_queues;
+
+	return nr_total_queues;
+}
+
+static void
+nvme_tcp_ofld_set_io_queues(struct nvme_ctrl *nctrl, unsigned int nr_io_queues)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	struct nvmf_ctrl_options *opts = nctrl->opts;
+
+	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
+		/*
+		 * separate read/write queues
+		 * hand out dedicated default queues only after we have
+		 * sufficient read queues.
+		 */
+		ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
+		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+			min(opts->nr_write_queues, nr_io_queues);
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
+	} else {
+		/*
+		 * shared read/write queues
+		 * either no write queues were requested, or we don't have
+		 * sufficient queue count to have dedicated default queues.
+		 */
+		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+			min(opts->nr_io_queues, nr_io_queues);
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
+	}
+
+	if (opts->nr_poll_queues && nr_io_queues) {
+		/* map dedicated poll queues only if we have queues left */
+		ctrl->io_queues[HCTX_TYPE_POLL] =
+			min(opts->nr_poll_queues, nr_io_queues);
+	}
+}
+
+static int nvme_tcp_ofld_create_io_queues(struct nvme_ctrl *nctrl)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
+	int i, rc;
+
+	for (i = 1; i < nctrl->queue_count; i++) {
+		mutex_init(&ctrl->queues[i].queue_lock);
+
+		rc = ctrl->dev->ops->create_queue(&ctrl->queues[i],
+						  i, nctrl->sqsize + 1);
+		if (rc)
+			goto out_free_queues;
+
+		set_bit(NVME_TCP_OFLD_Q_ALLOCATED, &ctrl->queues[i].flags);
+	}
+
+	return 0;
+
+out_free_queues:
+	for (i--; i >= 1; i--)
+		nvme_tcp_ofld_free_queue(nctrl, i);
+
+	return rc;
+}
+
+static int nvme_tcp_ofld_alloc_io_queues(struct nvme_ctrl *nctrl)
+{
+	unsigned int nr_io_queues;
+	int rc;
+
+	nr_io_queues = nvme_tcp_ofld_nr_io_queues(nctrl);
+	rc = nvme_set_queue_count(nctrl, &nr_io_queues);
+	if (rc)
+		return rc;
+
+	nctrl->queue_count = nr_io_queues + 1;
+	if (nctrl->queue_count < 2) {
+		dev_err(nctrl->device,
+			"unable to set any I/O queues\n");
+
+		return -ENOMEM;
+	}
+
+	dev_info(nctrl->device, "creating %d I/O queues.\n", nr_io_queues);
+	nvme_tcp_ofld_set_io_queues(nctrl, nr_io_queues);
+
+	return nvme_tcp_ofld_create_io_queues(nctrl);
+}
+
+static int nvme_tcp_ofld_start_io_queues(struct nvme_ctrl *nctrl)
+{
+	int i, rc = 0;
+
+	for (i = 1; i < nctrl->queue_count; i++) {
+		rc = nvme_tcp_ofld_start_queue(nctrl, i);
+		if (rc)
+			goto out_stop_queues;
+	}
+
+	return 0;
+
+out_stop_queues:
+	for (i--; i >= 1; i--)
+		nvme_tcp_ofld_stop_queue(nctrl, i);
 
 	return rc;
 }
@@ -249,9 +485,10 @@ out_destroy_queue:
 static int
 nvme_tcp_ofld_configure_io_queues(struct nvme_ctrl *nctrl, bool new)
 {
-	int rc;
+	int rc = nvme_tcp_ofld_alloc_io_queues(nctrl);
 
-	/* Placeholder - alloc_io_queues */
+	if (rc)
+		return rc;
 
 	if (new) {
 		nctrl->tagset = nvme_tcp_ofld_alloc_tagset(nctrl, false);
@@ -269,7 +506,9 @@ nvme_tcp_ofld_configure_io_queues(struct nvme_ctrl *nctrl, bool new)
 		}
 	}
 
-	/* Placeholder - start_io_queues */
+	rc = nvme_tcp_ofld_start_io_queues(nctrl);
+	if (rc)
+		goto out_cleanup_connect_q;
 
 	if (!new) {
 		nvme_start_queues(nctrl);
@@ -291,16 +530,16 @@ nvme_tcp_ofld_configure_io_queues(struct nvme_ctrl *nctrl, bool new)
 out_wait_freeze_timed_out:
 	nvme_stop_queues(nctrl);
 	nvme_sync_io_queues(nctrl);
-
-	/* Placeholder - Stop IO queues */
-
+	nvme_tcp_ofld_stop_io_queues(nctrl);
+out_cleanup_connect_q:
+	nvme_cancel_tagset(nctrl);
 	if (new)
 		blk_cleanup_queue(nctrl->connect_q);
 out_free_tag_set:
 	if (new)
 		blk_mq_free_tag_set(nctrl->tagset);
 out_free_io_queues:
-	/* Placeholder - free_io_queues */
+	nvme_tcp_ofld_free_io_queues(nctrl);
 
 	return rc;
 }
@@ -327,6 +566,17 @@ static void nvme_tcp_ofld_reconnect_or_remove(struct nvme_ctrl *nctrl)
 	}
 }
 
+static int
+nvme_tcp_ofld_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+			      unsigned int hctx_idx)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = data;
+
+	hctx->driver_data = &ctrl->queues[0];
+
+	return 0;
+}
+
 static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
 {
 	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
@@ -388,9 +638,19 @@ static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
 	return 0;
 
 destroy_io:
-	/* Placeholder - stop and destroy io queues*/
+	if (nctrl->queue_count > 1) {
+		nvme_stop_queues(nctrl);
+		nvme_sync_io_queues(nctrl);
+		nvme_tcp_ofld_stop_io_queues(nctrl);
+		nvme_cancel_tagset(nctrl);
+		nvme_tcp_ofld_destroy_io_queues(nctrl, new);
+	}
 destroy_admin:
-	/* Placeholder - stop and destroy admin queue*/
+	blk_mq_quiesce_queue(nctrl->admin_q);
+	blk_sync_queue(nctrl->admin_q);
+	nvme_tcp_ofld_stop_queue(nctrl, 0);
+	nvme_cancel_admin_tagset(nctrl);
+	nvme_tcp_ofld_destroy_admin_queue(nctrl, new);
 out_release_ctrl:
 	ctrl->dev->ops->release_ctrl(ctrl);
 
@@ -439,15 +699,37 @@ free_ctrl:
 }
 
 static void
-nvme_tcp_ofld_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove)
+nvme_tcp_ofld_teardown_admin_queue(struct nvme_ctrl *nctrl, bool remove)
 {
-	/* Placeholder - teardown_admin_queue */
+	blk_mq_quiesce_queue(nctrl->admin_q);
+	blk_sync_queue(nctrl->admin_q);
+
+	nvme_tcp_ofld_stop_queue(nctrl, 0);
+	nvme_cancel_admin_tagset(nctrl);
+
+	if (remove)
+		blk_mq_unquiesce_queue(nctrl->admin_q);
+
+	nvme_tcp_ofld_destroy_admin_queue(nctrl, remove);
 }
 
 static void
 nvme_tcp_ofld_teardown_io_queues(struct nvme_ctrl *nctrl, bool remove)
 {
-	/* Placeholder - teardown_io_queues */
+	if (nctrl->queue_count <= 1)
+		return;
+
+	blk_mq_quiesce_queue(nctrl->admin_q);
+	nvme_start_freeze(nctrl);
+	nvme_stop_queues(nctrl);
+	nvme_sync_io_queues(nctrl);
+	nvme_tcp_ofld_stop_io_queues(nctrl);
+	nvme_cancel_tagset(nctrl);
+
+	if (remove)
+		nvme_start_queues(nctrl);
+
+	nvme_tcp_ofld_destroy_io_queues(nctrl, remove);
 }
 
 static void nvme_tcp_ofld_reconnect_ctrl_work(struct work_struct *work)
@@ -562,6 +844,12 @@ nvme_tcp_ofld_init_request(struct blk_mq_tag_set *set,
 	return 0;
 }
 
+inline size_t nvme_tcp_ofld_inline_data_size(struct nvme_tcp_ofld_queue *queue)
+{
+	return queue->cmnd_capsule_len - sizeof(struct nvme_command);
+}
+EXPORT_SYMBOL_GPL(nvme_tcp_ofld_inline_data_size);
+
 static blk_status_t
 nvme_tcp_ofld_queue_rq(struct blk_mq_hw_ctx *hctx,
 		       const struct blk_mq_queue_data *bd)
@@ -573,22 +861,95 @@ nvme_tcp_ofld_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_OK;
 }
 
+static void
+nvme_tcp_ofld_exit_request(struct blk_mq_tag_set *set,
+			   struct request *rq, unsigned int hctx_idx)
+{
+	/*
+	 * Nothing is allocated in nvme_tcp_ofld_init_request,
+	 * hence empty.
+	 */
+}
+
+static int
+nvme_tcp_ofld_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+			unsigned int hctx_idx)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = data;
+
+	hctx->driver_data = &ctrl->queues[hctx_idx + 1];
+
+	return 0;
+}
+
+static int nvme_tcp_ofld_map_queues(struct blk_mq_tag_set *set)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = set->driver_data;
+	struct nvmf_ctrl_options *opts = ctrl->nctrl.opts;
+
+	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
+		/* separate read/write queues */
+		set->map[HCTX_TYPE_DEFAULT].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+		set->map[HCTX_TYPE_READ].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_READ];
+		set->map[HCTX_TYPE_READ].queue_offset =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+	} else {
+		/* shared read/write queues */
+		set->map[HCTX_TYPE_DEFAULT].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+		set->map[HCTX_TYPE_READ].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+		set->map[HCTX_TYPE_READ].queue_offset = 0;
+	}
+	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+
+	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
+		/* map dedicated poll queues only if we have queues left */
+		set->map[HCTX_TYPE_POLL].nr_queues =
+				ctrl->io_queues[HCTX_TYPE_POLL];
+		set->map[HCTX_TYPE_POLL].queue_offset =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+			ctrl->io_queues[HCTX_TYPE_READ];
+		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+	}
+
+	dev_info(ctrl->nctrl.device,
+		 "mapped %d/%d/%d default/read/poll queues.\n",
+		 ctrl->io_queues[HCTX_TYPE_DEFAULT],
+		 ctrl->io_queues[HCTX_TYPE_READ],
+		 ctrl->io_queues[HCTX_TYPE_POLL]);
+
+	return 0;
+}
+
+static int nvme_tcp_ofld_poll(struct blk_mq_hw_ctx *hctx)
+{
+	/* Placeholder - Implement polling mechanism */
+
+	return 0;
+}
+
 static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
 	.queue_rq	= nvme_tcp_ofld_queue_rq,
+	.complete	= nvme_complete_rq,
 	.init_request	= nvme_tcp_ofld_init_request,
-	/*
-	 * All additional ops will be also implemented and registered similar to
-	 * tcp.c
-	 */
+	.exit_request	= nvme_tcp_ofld_exit_request,
+	.init_hctx	= nvme_tcp_ofld_init_hctx,
+	.map_queues	= nvme_tcp_ofld_map_queues,
+	.poll		= nvme_tcp_ofld_poll,
 };
 
 static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops = {
 	.queue_rq	= nvme_tcp_ofld_queue_rq,
+	.complete	= nvme_complete_rq,
 	.init_request	= nvme_tcp_ofld_init_request,
-	/*
-	 * All additional ops will be also implemented and registered similar to
-	 * tcp.c
-	 */
+	.exit_request	= nvme_tcp_ofld_exit_request,
+	.init_hctx	= nvme_tcp_ofld_init_admin_hctx,
 };
 
 static const struct nvme_ctrl_ops nvme_tcp_ofld_ctrl_ops = {
diff --git a/drivers/nvme/host/tcp-offload.h b/drivers/nvme/host/tcp-offload.h
index 4281d1dacc94..875fcd3ec04a 100644
--- a/drivers/nvme/host/tcp-offload.h
+++ b/drivers/nvme/host/tcp-offload.h
@@ -65,6 +65,9 @@ struct nvme_tcp_ofld_queue {
 	unsigned long flags;
 	size_t cmnd_capsule_len;
 
+	/* mutex used during stop_queue */
+	struct mutex queue_lock;
+
 	u8 hdr_digest;
 	u8 data_digest;
 	u8 tos;
@@ -198,3 +201,4 @@ struct nvme_tcp_ofld_ops {
 int nvme_tcp_ofld_register_dev(struct nvme_tcp_ofld_dev *dev);
 void nvme_tcp_ofld_unregister_dev(struct nvme_tcp_ofld_dev *dev);
 void nvme_tcp_ofld_error_recovery(struct nvme_ctrl *nctrl);
+inline size_t nvme_tcp_ofld_inline_data_size(struct nvme_tcp_ofld_queue *queue);

From 35155e2626dcae187df7071550fbfd94b7113d6c Mon Sep 17 00:00:00 2001
From: Dean Balandin <dbalandin@marvell.com>
Date: Wed, 2 Jun 2021 21:42:46 +0300
Subject: [PATCH 0954/3292] nvme-tcp-offload: Add IO level implementation

In this patch, we present the IO level functionality.
The nvme-tcp-offload shall work on the IO-level, meaning the
nvme-tcp-offload ULP module shall pass the request to the nvme-tcp-offload
vendor driver and shall expect for the request completion.
No additional handling is needed in between, this design will reduce the
CPU utilization as we will describe below.

The nvme-tcp-offload vendor driver shall register to nvme-tcp-offload ULP
with the following IO-path ops:
 - send_req - in order to pass the request to the handling of the offload
   driver that shall pass it to the vendor specific device
 - poll_queue

The vendor driver will manage the context from which the request will be
executed and the request aggregations.
Once the IO completed, the nvme-tcp-offload vendor driver shall call
command.done() that shall invoke the nvme-tcp-offload ULP layer for
completing the request.

This patch also add support for the nvme-tcp-offload timeout and
nvme-tcp-offload ASYNC flow.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Dean Balandin <dbalandin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp-offload.c | 176 ++++++++++++++++++++++++++++++--
 drivers/nvme/host/tcp-offload.h |   2 +
 2 files changed, 171 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
index 1700cdf42433..c76822e5ada7 100644
--- a/drivers/nvme/host/tcp-offload.c
+++ b/drivers/nvme/host/tcp-offload.c
@@ -125,7 +125,30 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,
 			    union nvme_result *result,
 			    __le16 status)
 {
-	/* Placeholder - complete request with/without error */
+	struct request *rq = blk_mq_rq_from_pdu(req);
+
+	if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), *result))
+		nvme_complete_rq(rq);
+}
+
+/**
+ * nvme_tcp_ofld_async_req_done() - NVMeTCP Offload request done callback
+ * function for async request. Pointed to by nvme_tcp_ofld_req->done.
+ * Handles both NVME_TCP_F_DATA_SUCCESS flag and NVMe CQ.
+ * @req:	NVMeTCP offload request to complete.
+ * @result:     The nvme_result.
+ * @status:     The completion status.
+ *
+ * API function that allows the vendor specific offload driver to report request
+ * completions to the common offload layer.
+ */
+void nvme_tcp_ofld_async_req_done(struct nvme_tcp_ofld_req *req,
+				  union nvme_result *result, __le16 status)
+{
+	struct nvme_tcp_ofld_queue *queue = req->queue;
+	struct nvme_tcp_ofld_ctrl *ctrl = queue->ctrl;
+
+	nvme_complete_async_event(&ctrl->nctrl, status, result);
 }
 
 static struct nvme_tcp_ofld_dev *
@@ -698,6 +721,54 @@ free_ctrl:
 	kfree(ctrl);
 }
 
+static void nvme_tcp_ofld_set_sg_null(struct nvme_command *c)
+{
+	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+
+	sg->addr = 0;
+	sg->length = 0;
+	sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | NVME_SGL_FMT_TRANSPORT_A;
+}
+
+inline void nvme_tcp_ofld_set_sg_inline(struct nvme_tcp_ofld_queue *queue,
+					struct nvme_command *c, u32 data_len)
+{
+	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+
+	sg->addr = cpu_to_le64(queue->ctrl->nctrl.icdoff);
+	sg->length = cpu_to_le32(data_len);
+	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
+}
+
+static void nvme_tcp_ofld_map_data(struct nvme_command *c, u32 data_len)
+{
+	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
+
+	sg->addr = 0;
+	sg->length = cpu_to_le32(data_len);
+	sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | NVME_SGL_FMT_TRANSPORT_A;
+}
+
+static void nvme_tcp_ofld_submit_async_event(struct nvme_ctrl *arg)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(arg);
+	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[0];
+	struct nvme_tcp_ofld_dev *dev = queue->dev;
+	struct nvme_tcp_ofld_ops *ops = dev->ops;
+
+	ctrl->async_req.nvme_cmd.common.opcode = nvme_admin_async_event;
+	ctrl->async_req.nvme_cmd.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
+	ctrl->async_req.nvme_cmd.common.flags |= NVME_CMD_SGL_METABUF;
+
+	nvme_tcp_ofld_set_sg_null(&ctrl->async_req.nvme_cmd);
+
+	ctrl->async_req.async = true;
+	ctrl->async_req.queue = queue;
+	ctrl->async_req.done = nvme_tcp_ofld_async_req_done;
+
+	ops->send_req(&ctrl->async_req);
+}
+
 static void
 nvme_tcp_ofld_teardown_admin_queue(struct nvme_ctrl *nctrl, bool remove)
 {
@@ -836,9 +907,13 @@ nvme_tcp_ofld_init_request(struct blk_mq_tag_set *set,
 			   unsigned int numa_node)
 {
 	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_tcp_ofld_ctrl *ctrl = set->driver_data;
+	int qid;
 
-	/* Placeholder - init request */
-
+	qid = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+	req->queue = &ctrl->queues[qid];
+	nvme_req(rq)->ctrl = &ctrl->nctrl;
+	nvme_req(rq)->cmd = &req->nvme_cmd;
 	req->done = nvme_tcp_ofld_req_done;
 
 	return 0;
@@ -854,9 +929,46 @@ static blk_status_t
 nvme_tcp_ofld_queue_rq(struct blk_mq_hw_ctx *hctx,
 		       const struct blk_mq_queue_data *bd)
 {
-	/* Call nvme_setup_cmd(...) */
+	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(bd->rq);
+	struct nvme_tcp_ofld_queue *queue = hctx->driver_data;
+	struct nvme_tcp_ofld_ctrl *ctrl = queue->ctrl;
+	struct nvme_ns *ns = hctx->queue->queuedata;
+	struct nvme_tcp_ofld_dev *dev = queue->dev;
+	struct nvme_tcp_ofld_ops *ops = dev->ops;
+	struct nvme_command *nvme_cmd;
+	struct request *rq = bd->rq;
+	bool queue_ready;
+	u32 data_len;
+	int rc;
 
-	/* Call ops->send_req(...) */
+	queue_ready = test_bit(NVME_TCP_OFLD_Q_LIVE, &queue->flags);
+
+	req->async = false;
+
+	if (!nvme_check_ready(&ctrl->nctrl, rq, queue_ready))
+		return nvme_fail_nonready_command(&ctrl->nctrl, rq);
+
+	rc = nvme_setup_cmd(ns, rq);
+	if (unlikely(rc))
+		return rc;
+
+	blk_mq_start_request(rq);
+
+	nvme_cmd = &req->nvme_cmd;
+	nvme_cmd->common.flags |= NVME_CMD_SGL_METABUF;
+
+	data_len = blk_rq_nr_phys_segments(rq) ? blk_rq_payload_bytes(rq) : 0;
+	if (!data_len)
+		nvme_tcp_ofld_set_sg_null(&req->nvme_cmd);
+	else if ((rq_data_dir(rq) == WRITE) &&
+		 data_len <= nvme_tcp_ofld_inline_data_size(queue))
+		nvme_tcp_ofld_set_sg_inline(queue, nvme_cmd, data_len);
+	else
+		nvme_tcp_ofld_map_data(nvme_cmd, data_len);
+
+	rc = ops->send_req(req);
+	if (unlikely(rc))
+		return rc;
 
 	return BLK_STS_OK;
 }
@@ -929,9 +1041,56 @@ static int nvme_tcp_ofld_map_queues(struct blk_mq_tag_set *set)
 
 static int nvme_tcp_ofld_poll(struct blk_mq_hw_ctx *hctx)
 {
-	/* Placeholder - Implement polling mechanism */
+	struct nvme_tcp_ofld_queue *queue = hctx->driver_data;
+	struct nvme_tcp_ofld_dev *dev = queue->dev;
+	struct nvme_tcp_ofld_ops *ops = dev->ops;
 
-	return 0;
+	return ops->poll_queue(queue);
+}
+
+static void nvme_tcp_ofld_complete_timed_out(struct request *rq)
+{
+	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_ctrl *nctrl = &req->queue->ctrl->nctrl;
+
+	nvme_tcp_ofld_stop_queue(nctrl, nvme_tcp_ofld_qid(req->queue));
+	if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
+		nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
+		blk_mq_complete_request(rq);
+	}
+}
+
+static enum blk_eh_timer_return nvme_tcp_ofld_timeout(struct request *rq, bool reserved)
+{
+	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_tcp_ofld_ctrl *ctrl = req->queue->ctrl;
+
+	dev_warn(ctrl->nctrl.device,
+		 "queue %d: timeout request %#x type %d\n",
+		 nvme_tcp_ofld_qid(req->queue), rq->tag, req->nvme_cmd.common.opcode);
+
+	if (ctrl->nctrl.state != NVME_CTRL_LIVE) {
+		/*
+		 * If we are resetting, connecting or deleting we should
+		 * complete immediately because we may block controller
+		 * teardown or setup sequence
+		 * - ctrl disable/shutdown fabrics requests
+		 * - connect requests
+		 * - initialization admin requests
+		 * - I/O requests that entered after unquiescing and
+		 *   the controller stopped responding
+		 *
+		 * All other requests should be cancelled by the error
+		 * recovery work, so it's fine that we fail it here.
+		 */
+		nvme_tcp_ofld_complete_timed_out(rq);
+
+		return BLK_EH_DONE;
+	}
+
+	nvme_tcp_ofld_error_recovery(&ctrl->nctrl);
+
+	return BLK_EH_RESET_TIMER;
 }
 
 static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
@@ -940,6 +1099,7 @@ static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
 	.init_request	= nvme_tcp_ofld_init_request,
 	.exit_request	= nvme_tcp_ofld_exit_request,
 	.init_hctx	= nvme_tcp_ofld_init_hctx,
+	.timeout	= nvme_tcp_ofld_timeout,
 	.map_queues	= nvme_tcp_ofld_map_queues,
 	.poll		= nvme_tcp_ofld_poll,
 };
@@ -950,6 +1110,7 @@ static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops = {
 	.init_request	= nvme_tcp_ofld_init_request,
 	.exit_request	= nvme_tcp_ofld_exit_request,
 	.init_hctx	= nvme_tcp_ofld_init_admin_hctx,
+	.timeout	= nvme_tcp_ofld_timeout,
 };
 
 static const struct nvme_ctrl_ops nvme_tcp_ofld_ctrl_ops = {
@@ -960,6 +1121,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ofld_ctrl_ops = {
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
 	.free_ctrl		= nvme_tcp_ofld_free_ctrl,
+	.submit_async_event     = nvme_tcp_ofld_submit_async_event,
 	.delete_ctrl		= nvme_tcp_ofld_delete_ctrl,
 	.get_address		= nvmf_get_address,
 };
diff --git a/drivers/nvme/host/tcp-offload.h b/drivers/nvme/host/tcp-offload.h
index 875fcd3ec04a..2ac5b2428612 100644
--- a/drivers/nvme/host/tcp-offload.h
+++ b/drivers/nvme/host/tcp-offload.h
@@ -114,6 +114,8 @@ struct nvme_tcp_ofld_ctrl {
 	/* Connectivity params */
 	struct nvme_tcp_ofld_ctrl_con_params conn_params;
 
+	struct nvme_tcp_ofld_req async_req;
+
 	/* Vendor specific driver context */
 	void *private_data;
 };

From 9c153d3889767eb347a9b1719cc6f336faccdba9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Jun 2021 13:27:41 -0700
Subject: [PATCH 0955/3292] net: vlan: Avoid using strncpy()

Use strscpy_pad() instead of strncpy() which is considered deprecated:
https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 3 ++-
 net/8021q/vlan.h     | 3 ++-
 net/8021q/vlan_dev.c | 6 +++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index fb3d3262dc1a..4cdf8416869d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -638,7 +638,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
 
 	case GET_VLAN_REALDEV_NAME_CMD:
 		err = 0;
-		vlan_dev_get_realdev_name(dev, args.u.device2);
+		vlan_dev_get_realdev_name(dev, args.u.device2,
+					  sizeof(args.u.device2));
 		if (copy_to_user(arg, &args,
 				 sizeof(struct vlan_ioctl_args)))
 			err = -EFAULT;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index fa3ad3d4d58c..e3f6ff05a528 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -129,7 +129,8 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
 int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, u16 vlan_prio);
 int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
-void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
+void vlan_dev_get_realdev_name(const struct net_device *dev, char *result,
+			       size_t size);
 
 int vlan_check_real_dev(struct net_device *real_dev,
 			__be16 protocol, u16 vlan_id,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4db3f0621959..a0367b37512d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -239,9 +239,9 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 	return 0;
 }
 
-void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
+void vlan_dev_get_realdev_name(const struct net_device *dev, char *result, size_t size)
 {
-	strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23);
+	strscpy_pad(result, vlan_dev_priv(dev)->real_dev->name, size);
 }
 
 bool vlan_dev_inherit_address(struct net_device *dev,
@@ -360,7 +360,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct ifreq ifrr;
 	int err = -EOPNOTSUPP;
 
-	strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
+	strscpy_pad(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
 	ifrr.ifr_ifru = ifr->ifr_ifru;
 
 	switch (cmd) {

From 43902070fb7b73a0148eef63f5ece3a100e821ae Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Jun 2021 13:58:20 -0700
Subject: [PATCH 0956/3292] net: bonding: Use strscpy_pad() instead of
 manually-truncated strncpy()

Silence this warning by using strscpy_pad() directly:

drivers/net/bonding/bond_main.c:4877:3: warning: 'strncpy' specified bound 16 equals destination size [-Wstringop-truncation]
    4877 |   strncpy(params->primary, primary, IFNAMSIZ);
         |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Additionally replace other strncpy() uses, as it is considered deprecated:
https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/202102150705.fdR6obB0-lkp@intel.com
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c    | 8 +++-----
 drivers/net/bonding/bond_options.c | 3 +--
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7e469c203ca5..eb79a9f05914 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -620,7 +620,7 @@ static int bond_check_dev_link(struct bonding *bond,
 		 */
 
 		/* Yes, the mii is overlaid on the ifreq.ifr_ifru */
-		strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
+		strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
 		mii = if_mii(&ifr);
 		if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
 			mii->reg_num = MII_BMSR;
@@ -5330,10 +5330,8 @@ static int bond_check_params(struct bond_params *params)
 			(struct reciprocal_value) { 0 };
 	}
 
-	if (primary) {
-		strncpy(params->primary, primary, IFNAMSIZ);
-		params->primary[IFNAMSIZ - 1] = 0;
-	}
+	if (primary)
+		strscpy_pad(params->primary, primary, sizeof(params->primary));
 
 	memcpy(params->arp_targets, arp_target, sizeof(arp_target));
 
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index c9d3604ae129..81c039531e66 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1206,8 +1206,7 @@ static int bond_option_primary_set(struct bonding *bond,
 		RCU_INIT_POINTER(bond->primary_slave, NULL);
 		bond_select_active_slave(bond);
 	}
-	strncpy(bond->params.primary, primary, IFNAMSIZ);
-	bond->params.primary[IFNAMSIZ - 1] = 0;
+	strscpy_pad(bond->params.primary, primary, IFNAMSIZ);
 
 	netdev_dbg(bond->dev, "Recording %s as primary, but it has not been enslaved yet\n",
 		   primary);

From a29cb6914681a55667436a9eb7a42e28da8cf387 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 2 Jun 2021 17:51:21 -0700
Subject: [PATCH 0957/3292] net: tcp better handling of reordering then loss
 cases

This patch aims to improve the situation when reordering and loss are
ocurring in the same flight of packets.

Previously the reordering would first induce a spurious recovery, then
the subsequent ACK may undo the cwnd (based on the timestamps e.g.).
However the current loss recovery does not proceed to invoke
RACK to install a reordering timer. If some packets are also lost, this
may lead to a long RTO-based recovery. An example is
https://groups.google.com/g/bbr-dev/c/OFHADvJbTEI

The solution is to after reverting the recovery, always invoke RACK
to either mount the RACK timer to fast retransmit after the reordering
window, or restarts the recovery if new loss is identified. Hence
it is possible the sender may go from Recovery to Disorder/Open to
Recovery again in one ACK.

Reported-by: mingkun bian <bianmingkun@gmail.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 45 +++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cd52ce0a2a85..7d5e59f688de 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2816,8 +2816,17 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
 	*rexmit = REXMIT_LOST;
 }
 
+static bool tcp_force_fast_retransmit(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	return after(tcp_highest_sack_seq(tp),
+		     tp->snd_una + tp->reordering * tp->mss_cache);
+}
+
 /* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
+static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
+				 bool *do_lost)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2842,7 +2851,9 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
 		tcp_undo_cwnd_reduction(sk, true);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
 		tcp_try_keep_open(sk);
-		return true;
+	} else {
+		/* Partial ACK arrived. Force fast retransmit. */
+		*do_lost = tcp_force_fast_retransmit(sk);
 	}
 	return false;
 }
@@ -2866,14 +2877,6 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
 	}
 }
 
-static bool tcp_force_fast_retransmit(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	return after(tcp_highest_sack_seq(tp),
-		     tp->snd_una + tp->reordering * tp->mss_cache);
-}
-
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2943,17 +2946,21 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
 			if (tcp_is_reno(tp))
 				tcp_add_reno_sack(sk, num_dupack, ece_ack);
-		} else {
-			if (tcp_try_undo_partial(sk, prior_snd_una))
-				return;
-			/* Partial ACK arrived. Force fast retransmit. */
-			do_lost = tcp_force_fast_retransmit(sk);
-		}
-		if (tcp_try_undo_dsack(sk)) {
-			tcp_try_keep_open(sk);
+		} else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
 			return;
-		}
+
+		if (tcp_try_undo_dsack(sk))
+			tcp_try_keep_open(sk);
+
 		tcp_identify_packet_loss(sk, ack_flag);
+		if (icsk->icsk_ca_state != TCP_CA_Recovery) {
+			if (!tcp_time_to_recover(sk, flag))
+				return;
+			/* Undo reverts the recovery state. If loss is evident,
+			 * starts a new recovery (e.g. reordering then loss);
+			 */
+			tcp_enter_recovery(sk, ece_ack);
+		}
 		break;
 	case TCP_CA_Loss:
 		tcp_process_loss(sk, flag, num_dupack, rexmit);

From 5e0b8928927fa0bac688221a4b2636ef593a0599 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Thu, 3 Jun 2021 08:34:29 +0200
Subject: [PATCH 0958/3292] net:cxgb3: replace tasklets with works
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OFLD and CTRL TX queues can be stopped if there is no room in
their DMA rings. If this happens, they're tried to be restarted
later after having made some room in the corresponding ring.

The tasks of restarting these queues were triggered using
tasklets, but they can be replaced for workqueue works, getting
them out of softirq context.

This queues stop/restart probably doesn't happen often and they
can be quite lengthy because they try to send all pending skbs.
Moreover, given that probably the ring is not empty yet, so the
DMA still has work to do, we don't need to be so fast to justify
using tasklets/softirq instead of running in a thread.

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/adapter.h |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/common.h  |  2 ++
 drivers/net/ethernet/chelsio/cxgb3/sge.c     | 38 +++++++++++---------
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/adapter.h b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
index f80fbd81b609..6d682b7c7aac 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
@@ -178,7 +178,7 @@ struct sge_txq {		/* state for an SGE Tx queue */
 	unsigned int token;	/* WR token */
 	dma_addr_t phys_addr;	/* physical address of the ring */
 	struct sk_buff_head sendq;	/* List of backpressured offload packets */
-	struct tasklet_struct qresume_tsk;	/* restarts the queue */
+	struct work_struct qresume_task;	/* restarts the queue */
 	unsigned int cntxt_id;	/* SGE context id for the Tx q */
 	unsigned long stops;	/* # of times q has been stopped */
 	unsigned long restarts;	/* # of queue restarts */
diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h
index 1bd7d89666c4..b706f2fbe4f4 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/common.h
@@ -770,4 +770,6 @@ int t3_xaui_direct_phy_prep(struct cphy *phy, struct adapter *adapter,
 			    int phy_addr, const struct mdio_ops *mdio_ops);
 int t3_aq100x_phy_prep(struct cphy *phy, struct adapter *adapter,
 			    int phy_addr, const struct mdio_ops *mdio_ops);
+
+extern struct workqueue_struct *cxgb3_wq;
 #endif				/* __CHELSIO_COMMON_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 11d3b6218ed7..115c03b0feb6 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -1518,14 +1518,15 @@ static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
 
 /**
  *	restart_ctrlq - restart a suspended control queue
- *	@t: pointer to the tasklet associated with this handler
+ *	@w: pointer to the work associated with this handler
  *
  *	Resumes transmission on a suspended Tx control queue.
  */
-static void restart_ctrlq(struct tasklet_struct *t)
+static void restart_ctrlq(struct work_struct *w)
 {
 	struct sk_buff *skb;
-	struct sge_qset *qs = from_tasklet(qs, t, txq[TXQ_CTRL].qresume_tsk);
+	struct sge_qset *qs = container_of(w, struct sge_qset,
+					   txq[TXQ_CTRL].qresume_task);
 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
 
 	spin_lock(&q->lock);
@@ -1736,14 +1737,15 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 
 /**
  *	restart_offloadq - restart a suspended offload queue
- *	@t: pointer to the tasklet associated with this handler
+ *	@w: pointer to the work associated with this handler
  *
  *	Resumes transmission on a suspended Tx offload queue.
  */
-static void restart_offloadq(struct tasklet_struct *t)
+static void restart_offloadq(struct work_struct *w)
 {
 	struct sk_buff *skb;
-	struct sge_qset *qs = from_tasklet(qs, t, txq[TXQ_OFLD].qresume_tsk);
+	struct sge_qset *qs = container_of(w, struct sge_qset,
+					   txq[TXQ_OFLD].qresume_task);
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
 	const struct port_info *pi = netdev_priv(qs->netdev);
 	struct adapter *adap = pi->adapter;
@@ -1998,13 +2000,17 @@ static void restart_tx(struct sge_qset *qs)
 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
 		qs->txq[TXQ_OFLD].restarts++;
-		tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
+
+		/* The work can be quite lengthy so we use driver's own queue */
+		queue_work(cxgb3_wq, &qs->txq[TXQ_OFLD].qresume_task);
 	}
 	if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
 		qs->txq[TXQ_CTRL].restarts++;
-		tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
+
+		/* The work can be quite lengthy so we use driver's own queue */
+		queue_work(cxgb3_wq, &qs->txq[TXQ_CTRL].qresume_task);
 	}
 }
 
@@ -3085,8 +3091,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 		skb_queue_head_init(&q->txq[i].sendq);
 	}
 
-	tasklet_setup(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq);
-	tasklet_setup(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq);
+	INIT_WORK(&q->txq[TXQ_OFLD].qresume_task, restart_offloadq);
+	INIT_WORK(&q->txq[TXQ_CTRL].qresume_task, restart_ctrlq);
 
 	q->fl[0].gen = q->fl[1].gen = 1;
 	q->fl[0].size = p->fl_size;
@@ -3276,11 +3282,11 @@ void t3_sge_start(struct adapter *adap)
  *
  *	Can be invoked from interrupt context e.g.  error handler.
  *
- *	Note that this function cannot disable the restart of tasklets as
+ *	Note that this function cannot disable the restart of works as
  *	it cannot wait if called from interrupt context, however the
- *	tasklets will have no effect since the doorbells are disabled. The
+ *	works will have no effect since the doorbells are disabled. The
  *	driver will call tg3_sge_stop() later from process context, at
- *	which time the tasklets will be stopped if they are still running.
+ *	which time the works will be stopped if they are still running.
  */
 void t3_sge_stop_dma(struct adapter *adap)
 {
@@ -3292,7 +3298,7 @@ void t3_sge_stop_dma(struct adapter *adap)
  *	@adap: the adapter
  *
  *	Called from process context. Disables the DMA engine and any
- *	pending queue restart tasklets.
+ *	pending queue restart works.
  */
 void t3_sge_stop(struct adapter *adap)
 {
@@ -3303,8 +3309,8 @@ void t3_sge_stop(struct adapter *adap)
 	for (i = 0; i < SGE_QSETS; ++i) {
 		struct sge_qset *qs = &adap->sge.qs[i];
 
-		tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
-		tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
+		cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);
+		cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);
 	}
 }
 

From 6a8dd8b2fa5b7cec4b13f5f5b2589d9abbac0fab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Thu, 3 Jun 2021 08:34:30 +0200
Subject: [PATCH 0959/3292] net:cxgb3: fix code style issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb3/cxgb3_main.c   | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 84ad7261e243..57f210c53afc 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -1273,14 +1273,14 @@ static int cxgb_up(struct adapter *adap)
 			free_irq(adap->msix_info[0].vec, adap);
 			goto irq_err;
 		}
-	} else if ((err = request_irq(adap->pdev->irq,
-				      t3_intr_handler(adap,
-						      adap->sge.qs[0].rspq.
-						      polling),
-				      (adap->flags & USING_MSI) ?
-				       0 : IRQF_SHARED,
-				      adap->name, adap)))
-		goto irq_err;
+	} else {
+		err = request_irq(adap->pdev->irq,
+				  t3_intr_handler(adap, adap->sge.qs[0].rspq.polling),
+				  (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
+				  adap->name, adap);
+		if (err)
+			goto irq_err;
+	}
 
 	enable_all_napi(adap);
 	t3_sge_start(adap);
@@ -3098,8 +3098,9 @@ static void set_nqsets(struct adapter *adap)
 			nqsets = num_cpus;
 		if (nqsets < 1 || hwports == 4)
 			nqsets = 1;
-	} else
+	} else {
 		nqsets = 1;
+	}
 
 	for_each_port(adap, i) {
 		struct port_info *pi = adap2pinfo(adap, i);

From 5567d4d9e7381230462a564d4f466177f3ba9dd5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 2 Jun 2021 07:41:30 -0500
Subject: [PATCH 0960/3292] net: ipa: add support for inline checksum offload

Starting with IPA v4.5, IP payload checksum offload is implemented
differently.

Prior to v4.5, the IPA hardware appends an rmnet_map_dl_csum_trailer
structure to each packet if checksum offload is enabled in the
download direction (modem->AP).  In the upload direction (AP->modem)
a rmnet_map_ul_csum_header structure is prepended before each sent
packet.

Starting with IPA v4.5, checksum offload is implemented using a
single new rmnet_map_v5_csum_header structure which sits between
the QMAP header and the packet data.  The same header structure
is used in both directions.

The new header contains a header type (CSUM_OFFLOAD); a checksum
flag; and a flag indicating whether any other headers follow this
one.  The checksum flag indicates whether the hardware should
compute (and insert) the checksum on a sent packet.  On a received
packet the checksum flag indicates whether the hardware confirms the
checksum value in the payload is correct.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 47 ++++++++++++++++++++++++++--------
 drivers/net/ipa/ipa_reg.h      |  1 +
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index ccc99ad983eb..03719fb6a15a 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -457,28 +457,34 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa)
 static void ipa_endpoint_init_cfg(struct ipa_endpoint *endpoint)
 {
 	u32 offset = IPA_REG_ENDP_INIT_CFG_N_OFFSET(endpoint->endpoint_id);
+	enum ipa_cs_offload_en enabled;
 	u32 val = 0;
 
 	/* FRAG_OFFLOAD_EN is 0 */
 	if (endpoint->data->checksum) {
+		enum ipa_version version = endpoint->ipa->version;
+
 		if (endpoint->toward_ipa) {
 			u32 checksum_offset;
 
-			val |= u32_encode_bits(IPA_CS_OFFLOAD_UL,
-					       CS_OFFLOAD_EN_FMASK);
 			/* Checksum header offset is in 4-byte units */
 			checksum_offset = sizeof(struct rmnet_map_header);
 			checksum_offset /= sizeof(u32);
 			val |= u32_encode_bits(checksum_offset,
 					       CS_METADATA_HDR_OFFSET_FMASK);
+
+			enabled = version < IPA_VERSION_4_5
+					? IPA_CS_OFFLOAD_UL
+					: IPA_CS_OFFLOAD_INLINE;
 		} else {
-			val |= u32_encode_bits(IPA_CS_OFFLOAD_DL,
-					       CS_OFFLOAD_EN_FMASK);
+			enabled = version < IPA_VERSION_4_5
+					? IPA_CS_OFFLOAD_DL
+					: IPA_CS_OFFLOAD_INLINE;
 		}
 	} else {
-		val |= u32_encode_bits(IPA_CS_OFFLOAD_NONE,
-				       CS_OFFLOAD_EN_FMASK);
+		enabled = IPA_CS_OFFLOAD_NONE;
 	}
+	val |= u32_encode_bits(enabled, CS_OFFLOAD_EN_FMASK);
 	/* CS_GEN_QMB_MASTER_SEL is 0 */
 
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
@@ -498,6 +504,27 @@ static void ipa_endpoint_init_nat(struct ipa_endpoint *endpoint)
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
 
+static u32
+ipa_qmap_header_size(enum ipa_version version, struct ipa_endpoint *endpoint)
+{
+	u32 header_size = sizeof(struct rmnet_map_header);
+
+	/* Without checksum offload, we just have the MAP header */
+	if (!endpoint->data->checksum)
+		return header_size;
+
+	if (version < IPA_VERSION_4_5) {
+		/* Checksum header inserted for AP TX endpoints only */
+		if (endpoint->toward_ipa)
+			header_size += sizeof(struct rmnet_map_ul_csum_header);
+	} else {
+		/* Checksum header is used in both directions */
+		header_size += sizeof(struct rmnet_map_v5_csum_header);
+	}
+
+	return header_size;
+}
+
 /**
  * ipa_endpoint_init_hdr() - Initialize HDR endpoint configuration register
  * @endpoint:	Endpoint pointer
@@ -526,13 +553,11 @@ static void ipa_endpoint_init_hdr(struct ipa_endpoint *endpoint)
 	u32 val = 0;
 
 	if (endpoint->data->qmap) {
-		size_t header_size = sizeof(struct rmnet_map_header);
 		enum ipa_version version = ipa->version;
+		size_t header_size;
 
-		/* We might supply a checksum header after the QMAP header */
-		if (endpoint->toward_ipa && endpoint->data->checksum)
-			header_size += sizeof(struct rmnet_map_ul_csum_header);
-		val |= ipa_header_size_encoded(version, header_size);
+		header_size = ipa_qmap_header_size(version, endpoint);
+		val = ipa_header_size_encoded(version, header_size);
 
 		/* Define how to fill fields in a received QMAP header */
 		if (!endpoint->toward_ipa) {
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 286ea9634c49..b89dec5865a5 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -368,6 +368,7 @@ enum ipa_cs_offload_en {
 	IPA_CS_OFFLOAD_NONE		= 0x0,
 	IPA_CS_OFFLOAD_UL		= 0x1,	/* Before IPA v4.5 (TX) */
 	IPA_CS_OFFLOAD_DL		= 0x2,	/* Before IPA v4.5 (RX) */
+	IPA_CS_OFFLOAD_INLINE		= 0x1,	/* IPA v4.5 (TX and RX) */
 };
 
 /* Valid only for TX (IPA consumer) endpoints */

From d15ec1933309a4677d0a667738dc64329ec3fd69 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 2 Jun 2021 07:41:31 -0500
Subject: [PATCH 0961/3292] Revert "net: ipa: disable checksum offload for IPA
 v4.5+"

This reverts commit c88c34fcf8f501d588c0a999aa7e51e18552c5f0.

The RMNet driver now supports inline checksum offload.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 03719fb6a15a..07a81b73306f 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -88,11 +88,6 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
 	if (ipa_gsi_endpoint_data_empty(data))
 		return true;
 
-	/* IPA v4.5+ uses checksum offload, not yet supported by RMNet */
-	if (ipa->version >= IPA_VERSION_4_5)
-		if (data->endpoint.config.checksum)
-			return false;
-
 	if (!data->toward_ipa) {
 		if (data->endpoint.filter_support) {
 			dev_err(dev, "filtering not supported for "
@@ -235,17 +230,6 @@ static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
 static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
 				    const struct ipa_gsi_endpoint_data *data)
 {
-	const struct ipa_gsi_endpoint_data *dp = data;
-	enum ipa_endpoint_name name;
-
-	if (ipa->version < IPA_VERSION_4_5)
-		return true;
-
-	/* IPA v4.5+ uses checksum offload, not yet supported by RMNet */
-	for (name = 0; name < count; name++, dp++)
-		if (data->endpoint.config.checksum)
-			return false;
-
 	return true;
 }
 

From feb938fad63fb6fdd92ab082d0888ed5694af818 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 3 Jun 2021 14:01:10 +0100
Subject: [PATCH 0962/3292] net: phy: marvell: use phy_modify_changed() for
 marvell_set_polarity()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than open-coding the phy_modify_changed() sequence, use this
helper in marvell_set_polarity().

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e6721c1c26c2..23751d95855b 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -367,39 +367,24 @@ static irqreturn_t marvell_handle_interrupt(struct phy_device *phydev)
 
 static int marvell_set_polarity(struct phy_device *phydev, int polarity)
 {
-	int reg;
-	int err;
-	int val;
+	u16 val;
 
-	/* get the current settings */
-	reg = phy_read(phydev, MII_M1011_PHY_SCR);
-	if (reg < 0)
-		return reg;
-
-	val = reg;
-	val &= ~MII_M1011_PHY_SCR_AUTO_CROSS;
 	switch (polarity) {
 	case ETH_TP_MDI:
-		val |= MII_M1011_PHY_SCR_MDI;
+		val = MII_M1011_PHY_SCR_MDI;
 		break;
 	case ETH_TP_MDI_X:
-		val |= MII_M1011_PHY_SCR_MDI_X;
+		val = MII_M1011_PHY_SCR_MDI_X;
 		break;
 	case ETH_TP_MDI_AUTO:
 	case ETH_TP_MDI_INVALID:
 	default:
-		val |= MII_M1011_PHY_SCR_AUTO_CROSS;
+		val = MII_M1011_PHY_SCR_AUTO_CROSS;
 		break;
 	}
 
-	if (val != reg) {
-		/* Set the new polarity value in the register */
-		err = phy_write(phydev, MII_M1011_PHY_SCR, val);
-		if (err)
-			return err;
-	}
-
-	return val != reg;
+	return phy_modify_changed(phydev, MII_M1011_PHY_SCR,
+				  MII_M1011_PHY_SCR_AUTO_CROSS, val);
 }
 
 static int marvell_config_aneg(struct phy_device *phydev)

From 92e1b57c3865c508e0ecd9824b7a64256329b8fd Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 3 Jun 2021 14:19:04 +0100
Subject: [PATCH 0963/3292] bonding: remove redundant initialization of
 variable ret

The variable ret is being initialized with a value that is never read,
it is being updated later on.  The assignment is redundant and can be
removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 81c039531e66..0cf25de6f46d 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -705,7 +705,7 @@ out:
 int __bond_opt_set_notify(struct bonding *bond,
 			  unsigned int option, struct bond_opt_value *val)
 {
-	int ret = -ENOENT;
+	int ret;
 
 	ASSERT_RTNL();
 

From a10541f5d9fa2aab5ff54311473b05ba75b84226 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Thu, 3 Jun 2021 22:07:49 +0800
Subject: [PATCH 0964/3292] sch_htb: fix doc warning in htb_add_to_id_tree()

Add description for parameters of htb_add_to_id_tree() to fix
gcc W=1 warnings:
net/sched/sch_htb.c:282: warning: Function parameter or member 'root' not described in 'htb_add_to_id_tree'
net/sched/sch_htb.c:282: warning: Function parameter or member 'cl' not described in 'htb_add_to_id_tree'
net/sched/sch_htb.c:282: warning: Function parameter or member 'prio' not described in 'htb_add_to_id_tree'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 282614614905..4f9304567dcc 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -273,6 +273,9 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 
 /**
  * htb_add_to_id_tree - adds class to the round robin list
+ * @root: the root of the tree
+ * @cl: the class to add
+ * @prio: the give prio in class
  *
  * Routine adds class to the list (actually tree) sorted by classid.
  * Make sure that class is not already on such list for given prio.

From 819fb78f695527fc015e0c93b23c6492f7257015 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 3 Jun 2021 09:56:13 -0700
Subject: [PATCH 0965/3292] net: ks8851: Make ks8851_read_selftest() return
 void

clang points out that ret in ks8851_read_selftest() is set but unused:

drivers/net/ethernet/micrel/ks8851_common.c:1028:6: warning: variable
'ret' set but not used [-Wunused-but-set-variable]
        int ret = 0;
            ^
1 warning generated.

The return code of this function has never been checked so just remove
ret and make the function return void.

Fixes: 3ba81f3ece3c ("net: Micrel KS8851 SPI network driver")
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851_common.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 13eef6e9bd2d..831518466de2 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -1022,30 +1022,23 @@ static int ks8851_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
  *
  * Read and check the TX/RX memory selftest information.
  */
-static int ks8851_read_selftest(struct ks8851_net *ks)
+static void ks8851_read_selftest(struct ks8851_net *ks)
 {
 	unsigned both_done = MBIR_TXMBF | MBIR_RXMBF;
-	int ret = 0;
 	unsigned rd;
 
 	rd = ks8851_rdreg16(ks, KS_MBIR);
 
 	if ((rd & both_done) != both_done) {
 		netdev_warn(ks->netdev, "Memory selftest not finished\n");
-		return 0;
+		return;
 	}
 
-	if (rd & MBIR_TXMBFA) {
+	if (rd & MBIR_TXMBFA)
 		netdev_err(ks->netdev, "TX memory selftest fail\n");
-		ret |= 1;
-	}
 
-	if (rd & MBIR_RXMBFA) {
+	if (rd & MBIR_RXMBFA)
 		netdev_err(ks->netdev, "RX memory selftest fail\n");
-		ret |= 2;
-	}
-
-	return 0;
 }
 
 /* driver bus management functions */

From 118de6106735cfeb04daf9de1d5a9f953ac034ba Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 3 Jun 2021 10:34:10 -0700
Subject: [PATCH 0966/3292] net: ethernet: rmnet: Restructure if checks to
 avoid uninitialized warning

Clang warns that proto in rmnet_map_v5_checksum_uplink_packet() might be
used uninitialized:

drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:283:14: warning:
variable 'proto' is used uninitialized whenever 'if' condition is false
[-Wsometimes-uninitialized]
                } else if (skb->protocol == htons(ETH_P_IPV6)) {
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:295:36: note:
uninitialized use occurs here
                check = rmnet_map_get_csum_field(proto, trans);
                                                 ^~~~~
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:283:10: note:
remove the 'if' if its condition is always true
                } else if (skb->protocol == htons(ETH_P_IPV6)) {
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:270:11: note:
initialize the variable 'proto' to silence this warning
                u8 proto;
                        ^
                         = '\0'
1 warning generated.

This is technically a false positive because there is an if statement
above this one that checks skb->protocol for not being either
ETH_P_IP{,V6}. However, it is more obvious to sink that into the if
statement as an else branch, which makes the code clearer and fixes the
warning.

At the same time, move the "IS_ENABLED(CONFIG_IPV6)" into the else if
condition so that the else branch of the preprocessor conditional can
be shared, since there is no build failure with CONFIG_IPV6 disabled.

Fixes: b6e5d27e32ef ("net: ethernet: rmnet: Add support for MAPv5 egress packets")
Link: https://github.com/ClangBuiltLinux/linux/issues/1390
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c    | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 6492ec5bdec4..cecf72be5102 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -269,27 +269,20 @@ static void rmnet_map_v5_checksum_uplink_packet(struct sk_buff *skb,
 		void *trans;
 		u8 proto;
 
-		if (skb->protocol != htons(ETH_P_IP) &&
-		    skb->protocol != htons(ETH_P_IPV6)) {
-			priv->stats.csum_err_invalid_ip_version++;
-			goto sw_csum;
-		}
-
 		if (skb->protocol == htons(ETH_P_IP)) {
 			u16 ip_len = ((struct iphdr *)iph)->ihl * 4;
 
 			proto = ((struct iphdr *)iph)->protocol;
 			trans = iph + ip_len;
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-#if IS_ENABLED(CONFIG_IPV6)
+		} else if (IS_ENABLED(CONFIG_IPV6) &&
+			   skb->protocol == htons(ETH_P_IPV6)) {
 			u16 ip_len = sizeof(struct ipv6hdr);
 
 			proto = ((struct ipv6hdr *)iph)->nexthdr;
 			trans = iph + ip_len;
-#else
+		} else {
 			priv->stats.csum_err_invalid_ip_version++;
 			goto sw_csum;
-#endif /* CONFIG_IPV6 */
 		}
 
 		check = rmnet_map_get_csum_field(proto, trans);

From e32ea44c7ae476f4c90e35ab0a29dc8ff082bc11 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Thu, 3 Jun 2021 16:22:11 -0500
Subject: [PATCH 0967/3292] icmp: fix lib conflict with trinity

Including <linux/in.h> and <netinet/in.h> in the dependencies breaks
compilation of trinity due to multiple definitions. <linux/in.h> is only
used in <linux/icmp.h> to provide the definition of the struct in_addr,
but this can be substituted out by using the datatype __be32.

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/icmp.h | 3 +--
 net/ipv4/icmp.c           | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h
index c1da8244c5e1..163c0998aec9 100644
--- a/include/uapi/linux/icmp.h
+++ b/include/uapi/linux/icmp.h
@@ -20,7 +20,6 @@
 
 #include <linux/types.h>
 #include <asm/byteorder.h>
-#include <linux/in.h>
 #include <linux/if.h>
 #include <linux/in6.h>
 
@@ -154,7 +153,7 @@ struct icmp_ext_echo_iio {
 		struct {
 			struct icmp_ext_echo_ctype3_hdr ctype3_hdr;
 			union {
-				struct in_addr	ipv4_addr;
+				__be32		ipv4_addr;
 				struct in6_addr	ipv6_addr;
 			} ip_addr;
 		} addr;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7b6931a4d775..2e09d62d59e3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1059,7 +1059,7 @@ static bool icmp_echo(struct sk_buff *skb)
 			if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
 					 sizeof(struct in_addr))
 				goto send_mal_query;
-			dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr);
+			dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
 			break;
 #if IS_ENABLED(CONFIG_IPV6)
 		case ICMP_AFI_IP6:

From ebbf5fcb94a7f3499747b282420a1c5f7e8d1c6f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 3 Jun 2021 22:56:57 +0100
Subject: [PATCH 0968/3292] netdevsim: Fix unsigned being compared to less than
 zero

The comparison of len < 0 is always false because len is a size_t. Fix
this by making len a ssize_t instead.

Addresses-Coverity: ("Unsigned compared against 0")
Fixes: d395381909a3 ("netdevsim: Add max_vfs to bus_dev")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index b56003dfe3cc..ccec29970d5b 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -111,7 +111,7 @@ ssize_t nsim_bus_dev_max_vfs_read(struct file *file,
 {
 	struct nsim_bus_dev *nsim_bus_dev = file->private_data;
 	char buf[11];
-	size_t len;
+	ssize_t len;
 
 	len = snprintf(buf, sizeof(buf), "%u\n", nsim_bus_dev->max_vfs);
 	if (len < 0)

From a0143f5ac0594d73ef91c2336d8172217ff9cd72 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:25 -0400
Subject: [PATCH 0969/3292] clocksource/drivers/samsung_pwm: Minor whitespace
 cleanup

Cleanup the code to be slightly more readable and follow coding
convention - only whitespace.  This fixes checkpatch warnings:

  WARNING: Block comments should align the * on each line
  WARNING: please, no space before tabs
  WARNING: Missing a blank line after declarations
  CHECK: Alignment should match open parenthesis

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-1-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 19 +++++++++++--------
 include/clocksource/samsung_pwm.h       |  3 ++-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index f760229d0c7f..69bf79c7f462 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -4,7 +4,7 @@
  *		http://www.samsung.com/
  *
  * samsung - Common hr-timer support (s3c and s5p)
-*/
+ */
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
@@ -22,7 +22,6 @@
 
 #include <clocksource/samsung_pwm.h>
 
-
 /*
  * Clocksource driver
  */
@@ -38,8 +37,8 @@
 #define TCFG0_PRESCALER_MASK		0xff
 #define TCFG0_PRESCALER1_SHIFT		8
 
-#define TCFG1_SHIFT(x)	  		((x) * 4)
-#define TCFG1_MUX_MASK	  		0xf
+#define TCFG1_SHIFT(x)			((x) * 4)
+#define TCFG1_MUX_MASK			0xf
 
 /*
  * Each channel occupies 4 bits in TCON register, but there is a gap of 4
@@ -183,7 +182,7 @@ static void samsung_time_start(unsigned int channel, bool periodic)
 }
 
 static int samsung_set_next_event(unsigned long cycles,
-				struct clock_event_device *evt)
+				  struct clock_event_device *evt)
 {
 	/*
 	 * This check is needed to account for internal rounding
@@ -225,6 +224,7 @@ static void samsung_clockevent_resume(struct clock_event_device *cev)
 
 	if (pwm.variant.has_tint_cstat) {
 		u32 mask = (1 << pwm.event_id);
+
 		writel(mask | (mask << 5), pwm.base + REG_TINT_CSTAT);
 	}
 }
@@ -248,6 +248,7 @@ static irqreturn_t samsung_clock_event_isr(int irq, void *dev_id)
 
 	if (pwm.variant.has_tint_cstat) {
 		u32 mask = (1 << pwm.event_id);
+
 		writel(mask | (mask << 5), pwm.base + REG_TINT_CSTAT);
 	}
 
@@ -272,7 +273,7 @@ static void __init samsung_clockevent_init(void)
 
 	time_event_device.cpumask = cpumask_of(0);
 	clockevents_config_and_register(&time_event_device,
-						clock_rate, 1, pwm.tcnt_max);
+					clock_rate, 1, pwm.tcnt_max);
 
 	irq_number = pwm.irq[pwm.event_id];
 	if (request_irq(irq_number, samsung_clock_event_isr,
@@ -282,6 +283,7 @@ static void __init samsung_clockevent_init(void)
 
 	if (pwm.variant.has_tint_cstat) {
 		u32 mask = (1 << pwm.event_id);
+
 		writel(mask | (mask << 5), pwm.base + REG_TINT_CSTAT);
 	}
 }
@@ -347,7 +349,7 @@ static int __init samsung_clocksource_init(void)
 		pwm.source_reg = pwm.base + pwm.source_id * 0x0c + 0x14;
 
 	sched_clock_register(samsung_read_sched_clock,
-						pwm.variant.bits, clock_rate);
+			     pwm.variant.bits, clock_rate);
 
 	samsung_clocksource.mask = CLOCKSOURCE_MASK(pwm.variant.bits);
 	return clocksource_register_hz(&samsung_clocksource, clock_rate);
@@ -398,7 +400,8 @@ static int __init _samsung_pwm_clocksource_init(void)
 }
 
 void __init samsung_pwm_clocksource_init(void __iomem *base,
-			unsigned int *irqs, struct samsung_pwm_variant *variant)
+					 unsigned int *irqs,
+					 struct samsung_pwm_variant *variant)
 {
 	pwm.base = base;
 	memcpy(&pwm.variant, variant, sizeof(pwm.variant));
diff --git a/include/clocksource/samsung_pwm.h b/include/clocksource/samsung_pwm.h
index c395238d0922..76341988fb4f 100644
--- a/include/clocksource/samsung_pwm.h
+++ b/include/clocksource/samsung_pwm.h
@@ -27,6 +27,7 @@ struct samsung_pwm_variant {
 };
 
 void samsung_pwm_clocksource_init(void __iomem *base,
-		unsigned int *irqs, struct samsung_pwm_variant *variant);
+				  unsigned int *irqs,
+				  struct samsung_pwm_variant *variant);
 
 #endif /* __CLOCKSOURCE_SAMSUNG_PWM_H */

From bb08e96575dbbd49acb49999dd0d7ffedb5c1608 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:26 -0400
Subject: [PATCH 0970/3292] clocksource/drivers/samsung_pwm: Constify passed
 structure

The 'struct samsung_pwm_variant' argument passed to initialization
functions is not modified, so it can be made const for safety.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-2-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 2 +-
 include/clocksource/samsung_pwm.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 69bf79c7f462..bfad61b509f9 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -401,7 +401,7 @@ static int __init _samsung_pwm_clocksource_init(void)
 
 void __init samsung_pwm_clocksource_init(void __iomem *base,
 					 unsigned int *irqs,
-					 struct samsung_pwm_variant *variant)
+					 const struct samsung_pwm_variant *variant)
 {
 	pwm.base = base;
 	memcpy(&pwm.variant, variant, sizeof(pwm.variant));
diff --git a/include/clocksource/samsung_pwm.h b/include/clocksource/samsung_pwm.h
index 76341988fb4f..9b435caa95fe 100644
--- a/include/clocksource/samsung_pwm.h
+++ b/include/clocksource/samsung_pwm.h
@@ -28,6 +28,6 @@ struct samsung_pwm_variant {
 
 void samsung_pwm_clocksource_init(void __iomem *base,
 				  unsigned int *irqs,
-				  struct samsung_pwm_variant *variant);
+				  const struct samsung_pwm_variant *variant);
 
 #endif /* __CLOCKSOURCE_SAMSUNG_PWM_H */

From 63e83bd8cd848a3d1b4777d90635a309fa9cb2c7 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:27 -0400
Subject: [PATCH 0971/3292] clocksource/drivers/samsung_pwm: Cleanup on init
 error

Failure of timer initialization is likely to be fatal for the system, so
cleanup in such case is not strictly necessary.  However the code might
be refactored or reused, so better not to rely on such assumption that
system won't continue init failure.

Unmap the IO memory and put the clock on initialization failures from
devicetree.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-3-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index bfad61b509f9..55e2f9fa2a15 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -421,7 +421,7 @@ static int __init samsung_pwm_alloc(struct device_node *np,
 	struct property *prop;
 	const __be32 *cur;
 	u32 val;
-	int i;
+	int i, ret;
 
 	memcpy(&pwm.variant, variant, sizeof(pwm.variant));
 	for (i = 0; i < SAMSUNG_PWM_NUM; ++i)
@@ -444,10 +444,24 @@ static int __init samsung_pwm_alloc(struct device_node *np,
 	pwm.timerclk = of_clk_get_by_name(np, "timers");
 	if (IS_ERR(pwm.timerclk)) {
 		pr_crit("failed to get timers clock for timer\n");
-		return PTR_ERR(pwm.timerclk);
+		ret = PTR_ERR(pwm.timerclk);
+		goto err_clk;
 	}
 
-	return _samsung_pwm_clocksource_init();
+	ret = _samsung_pwm_clocksource_init();
+	if (ret)
+		goto err_clocksource;
+
+	return 0;
+
+err_clocksource:
+	clk_put(pwm.timerclk);
+	pwm.timerclk = NULL;
+err_clk:
+	iounmap(pwm.base);
+	pwm.base = NULL;
+
+	return ret;
 }
 
 static const struct samsung_pwm_variant s3c24xx_variant = {

From b4318ce203db8f8b7004e7ab82a957f894660c88 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:28 -0400
Subject: [PATCH 0972/3292] clocksource/drivers/samsung_pwm: Constify source IO
 memory

The 'source_reg' IO memory is only read, so the pointer can point to
const for safety.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-4-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 55e2f9fa2a15..6e46781bc9ac 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -61,7 +61,7 @@ EXPORT_SYMBOL(samsung_pwm_lock);
 
 struct samsung_pwm_clocksource {
 	void __iomem *base;
-	void __iomem *source_reg;
+	const void __iomem *source_reg;
 	unsigned int irq[SAMSUNG_PWM_NUM];
 	struct samsung_pwm_variant variant;
 

From 43582f29b161d820717bc13f562bca27af12e3cf Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Thu, 3 Jun 2021 23:40:04 +0100
Subject: [PATCH 0973/3292] gpiolib: acpi: Introduce
 acpi_get_and_request_gpiod() helper

We need to be able to translate GPIO resources in an ACPI device's _CRS
into GPIO descriptor array. Those are represented in _CRS as a pathname
to a GPIO device plus the pin's index number: the acpi_get_gpiod()
function is perfect for that purpose.

As it's currently only used internally within the GPIO layer, provide and
export a wrapper function that additionally holds a reference to the GPIO
device.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Daniel Scally <djrscally@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpiolib-acpi.c   | 28 ++++++++++++++++++++++++++++
 include/linux/gpio/consumer.h |  2 ++
 2 files changed, 30 insertions(+)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 3ef22a3c104d..75cd0c5a5cc4 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -128,6 +128,34 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin)
 	return gpiochip_get_desc(chip, pin);
 }
 
+/**
+ * acpi_get_and_request_gpiod - Translate ACPI GPIO pin to GPIO descriptor and
+ *                              hold a refcount to the GPIO device.
+ * @path:      ACPI GPIO controller full path name, (e.g. "\\_SB.GPO1")
+ * @pin:       ACPI GPIO pin number (0-based, controller-relative)
+ * @label:     Label to pass to gpiod_request()
+ *
+ * This function is a simple pass-through to acpi_get_gpiod(), except that
+ * as it is intended for use outside of the GPIO layer (in a similar fashion to
+ * gpiod_get_index() for example) it also holds a reference to the GPIO device.
+ */
+struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label)
+{
+	struct gpio_desc *gpio;
+	int ret;
+
+	gpio = acpi_get_gpiod(path, pin);
+	if (IS_ERR(gpio))
+		return gpio;
+
+	ret = gpiod_request(gpio, label);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return gpio;
+}
+EXPORT_SYMBOL_GPL(acpi_get_and_request_gpiod);
+
 static irqreturn_t acpi_gpio_irq_handler(int irq, void *data)
 {
 	struct acpi_gpio_event *event = data;
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index c73b25bc9213..566feb56601f 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -692,6 +692,8 @@ int devm_acpi_dev_add_driver_gpios(struct device *dev,
 				   const struct acpi_gpio_mapping *gpios);
 void devm_acpi_dev_remove_driver_gpios(struct device *dev);
 
+struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label);
+
 #else  /* CONFIG_GPIOLIB && CONFIG_ACPI */
 
 struct acpi_device;

From 043d7f09bf614809c10c4acbf0695ef731958300 Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Thu, 3 Jun 2021 23:40:05 +0100
Subject: [PATCH 0974/3292] gpiolib: acpi: Add acpi_gpio_get_io_resource()

Add a function to verify that a given ACPI resource represents a GpioIo()
type of resource, and return it if so.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Daniel Scally <djrscally@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpiolib-acpi.c | 23 +++++++++++++++++++++++
 include/linux/acpi.h        |  7 +++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 75cd0c5a5cc4..cf99a5752fe0 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -196,6 +196,29 @@ bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
 }
 EXPORT_SYMBOL_GPL(acpi_gpio_get_irq_resource);
 
+/**
+ * acpi_gpio_get_io_resource - Fetch details of an ACPI resource if it is a GPIO
+ *			       I/O resource or return False if not.
+ * @ares:	Pointer to the ACPI resource to fetch
+ * @agpio:	Pointer to a &struct acpi_resource_gpio to store the output pointer
+ */
+bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
+			       struct acpi_resource_gpio **agpio)
+{
+	struct acpi_resource_gpio *gpio;
+
+	if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
+		return false;
+
+	gpio = &ares->data.gpio;
+	if (gpio->connection_type != ACPI_RESOURCE_GPIO_TYPE_IO)
+		return false;
+
+	*agpio = gpio;
+	return true;
+}
+EXPORT_SYMBOL_GPL(acpi_gpio_get_io_resource);
+
 static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio,
 				      struct acpi_gpio_event *event)
 {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c60745f657e9..a74d37a3b618 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1096,6 +1096,8 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c
 #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB)
 bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
 				struct acpi_resource_gpio **agpio);
+bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
+			       struct acpi_resource_gpio **agpio);
 int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index);
 #else
 static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
@@ -1103,6 +1105,11 @@ static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
 {
 	return false;
 }
+static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares,
+					     struct acpi_resource_gpio **agpio)
+{
+	return false;
+}
 static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev,
 					   const char *name, int index)
 {

From 4eafdb1515a708d97e4659bd488ddac19f274c4f Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 8 Apr 2021 13:47:08 +0100
Subject: [PATCH 0975/3292] dm btree: improve btree residency

This commit improves the residency of btrees built in the metadata for
dm-thin and dm-cache.

When inserting a new entry into a full btree node the current code
splits the node into two.  This can result in very many half full nodes,
particularly if the insertions are occurring in an ascending order (as
happens in dm-thin with large writes).

With this commit, when we insert into a full node we first try and move
some entries to a neighbouring node that has space, failing that it
tries to split two neighbouring nodes into three.

Results are given below.  'Residency' is how full nodes are on average
as a percentage.  Average instruction counts for the operations
are given to show the extra processing has little overhead.

                         +--------------------------+--------------------------+
                         |         Before           |         After            |
+------------+-----------+-----------+--------------+-----------+--------------+
|    Test    |   Phase   | Residency | Instructions | Residency | Instructions |
+------------+-----------+-----------+--------------+-----------+--------------+
| Ascending  | insert    |        50 |         1876 |        96 |         1930 |
|            | overwrite |        50 |         1789 |        96 |         1746 |
|            | lookup    |        50 |          778 |        96 |          778 |
| Descending | insert    |        50 |         3024 |        96 |         3181 |
|            | overwrite |        50 |         1789 |        96 |         1746 |
|            | lookup    |        50 |          778 |        96 |          778 |
| Random     | insert    |        68 |         3800 |        84 |         3736 |
|            | overwrite |        68 |         4254 |        84 |         3911 |
|            | lookup    |        68 |          779 |        84 |          779 |
| Runs       | insert    |        63 |         2546 |        82 |         2815 |
|            | overwrite |        63 |         2013 |        82 |         1986 |
|            | lookup    |        63 |          778 |        82 |          779 |
+------------+-----------+-----------+--------------+-----------+--------------+

   Ascending - keys are inserted in ascending order.
   Descending - keys are inserted in descending order.
   Random - keys are inserted in random order.
   Runs - keys are split into ascending runs of ~20 length.  Then
          the runs are shuffled.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Colin Ian King <colin.king@canonical.com> # contains_key() fix
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/persistent-data/dm-btree.c         | 451 ++++++++++++++++--
 .../persistent-data/dm-transaction-manager.c  |   9 +
 .../persistent-data/dm-transaction-manager.h  |  10 +-
 3 files changed, 439 insertions(+), 31 deletions(-)

diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index ef6e78d45d5b..18282932bedc 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -500,6 +500,122 @@ out:
 
 EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
 
+/*----------------------------------------------------------------*/
+
+/*
+ * Copies entries from one region of a btree node to another.  The regions
+ * must not overlap.
+ */
+static void copy_entries(struct btree_node *dest, unsigned dest_offset,
+			 struct btree_node *src, unsigned src_offset,
+			 unsigned count)
+{
+	size_t value_size = le32_to_cpu(dest->header.value_size);
+	memcpy(dest->keys + dest_offset, src->keys + src_offset, count * sizeof(uint64_t));
+	memcpy(value_ptr(dest, dest_offset), value_ptr(src, src_offset), count * value_size);
+}
+
+/*
+ * Moves entries from one region fo a btree node to another.  The regions
+ * may overlap.
+ */
+static void move_entries(struct btree_node *dest, unsigned dest_offset,
+			 struct btree_node *src, unsigned src_offset,
+			 unsigned count)
+{
+	size_t value_size = le32_to_cpu(dest->header.value_size);
+	memmove(dest->keys + dest_offset, src->keys + src_offset, count * sizeof(uint64_t));
+	memmove(value_ptr(dest, dest_offset), value_ptr(src, src_offset), count * value_size);
+}
+
+/*
+ * Erases the first 'count' entries of a btree node, shifting following
+ * entries down into their place.
+ */
+static void shift_down(struct btree_node *n, unsigned count)
+{
+	move_entries(n, 0, n, count, le32_to_cpu(n->header.nr_entries) - count);
+}
+
+/*
+ * Moves entries in a btree node up 'count' places, making space for
+ * new entries at the start of the node.
+ */
+static void shift_up(struct btree_node *n, unsigned count)
+{
+	move_entries(n, count, n, 0, le32_to_cpu(n->header.nr_entries));
+}
+
+/*
+ * Redistributes entries between two btree nodes to make them
+ * have similar numbers of entries.
+ */
+static void redistribute2(struct btree_node *left, struct btree_node *right)
+{
+	unsigned nr_left = le32_to_cpu(left->header.nr_entries);
+	unsigned nr_right = le32_to_cpu(right->header.nr_entries);
+	unsigned total = nr_left + nr_right;
+	unsigned target_left = total / 2;
+	unsigned target_right = total - target_left;
+
+	if (nr_left < target_left) {
+		unsigned delta = target_left - nr_left;
+		copy_entries(left, nr_left, right, 0, delta);
+		shift_down(right, delta);
+	} else if (nr_left > target_left) {
+		unsigned delta = nr_left - target_left;
+		if (nr_right)
+			shift_up(right, delta);
+		copy_entries(right, 0, left, target_left, delta);
+	}
+
+	left->header.nr_entries = cpu_to_le32(target_left);
+	right->header.nr_entries = cpu_to_le32(target_right);
+}
+
+/*
+ * Redistribute entries between three nodes.  Assumes the central
+ * node is empty.
+ */
+static void redistribute3(struct btree_node *left, struct btree_node *center,
+			  struct btree_node *right)
+{
+	unsigned nr_left = le32_to_cpu(left->header.nr_entries);
+	unsigned nr_center = le32_to_cpu(center->header.nr_entries);
+	unsigned nr_right = le32_to_cpu(right->header.nr_entries);
+	unsigned total, target_left, target_center, target_right;
+
+	BUG_ON(nr_center);
+
+	total = nr_left + nr_right;
+	target_left = total / 3;
+	target_center = (total - target_left) / 2;
+	target_right = (total - target_left - target_center);
+
+	if (nr_left < target_left) {
+		unsigned left_short = target_left - nr_left;
+		copy_entries(left, nr_left, right, 0, left_short);
+		copy_entries(center, 0, right, left_short, target_center);
+		shift_down(right, nr_right - target_right);
+
+	} else if (nr_left < (target_left + target_center)) {
+		unsigned left_to_center = nr_left - target_left;
+		copy_entries(center, 0, left, target_left, left_to_center);
+		copy_entries(center, left_to_center, right, 0, target_center - left_to_center);
+		shift_down(right, nr_right - target_right);
+
+	} else {
+		unsigned right_short = target_right - nr_right;
+		shift_up(right, right_short);
+		copy_entries(right, 0, left, nr_left - right_short, right_short);
+		copy_entries(center, 0, left, target_left, nr_left - target_left);
+	}
+
+	left->header.nr_entries = cpu_to_le32(target_left);
+	center->header.nr_entries = cpu_to_le32(target_center);
+	right->header.nr_entries = cpu_to_le32(target_right);
+}
+
 /*
  * Splits a node by creating a sibling node and shifting half the nodes
  * contents across.  Assumes there is a parent node, and it has room for
@@ -530,12 +646,10 @@ EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
  *
  * Where A* is a shadow of A.
  */
-static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
-			       uint64_t key)
+static int split_one_into_two(struct shadow_spine *s, unsigned parent_index,
+			      struct dm_btree_value_type *vt, uint64_t key)
 {
 	int r;
-	size_t size;
-	unsigned nr_left, nr_right;
 	struct dm_block *left, *right, *parent;
 	struct btree_node *ln, *rn, *pn;
 	__le64 location;
@@ -549,36 +663,18 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
 	ln = dm_block_data(left);
 	rn = dm_block_data(right);
 
-	nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
-	nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
-
-	ln->header.nr_entries = cpu_to_le32(nr_left);
-
 	rn->header.flags = ln->header.flags;
-	rn->header.nr_entries = cpu_to_le32(nr_right);
+	rn->header.nr_entries = cpu_to_le32(0);
 	rn->header.max_entries = ln->header.max_entries;
 	rn->header.value_size = ln->header.value_size;
-	memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
+	redistribute2(ln, rn);
 
-	size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
-		sizeof(uint64_t) : s->info->value_type.size;
-	memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
-	       size * nr_right);
-
-	/*
-	 * Patch up the parent
-	 */
+	/* patch up the parent */
 	parent = shadow_parent(s);
-
 	pn = dm_block_data(parent);
-	location = cpu_to_le64(dm_block_location(left));
-	__dm_bless_for_disk(&location);
-	memcpy_disk(value_ptr(pn, parent_index),
-		    &location, sizeof(__le64));
 
 	location = cpu_to_le64(dm_block_location(right));
 	__dm_bless_for_disk(&location);
-
 	r = insert_at(sizeof(__le64), pn, parent_index + 1,
 		      le64_to_cpu(rn->keys[0]), &location);
 	if (r) {
@@ -586,6 +682,7 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
 		return r;
 	}
 
+	/* patch up the spine */
 	if (key < le64_to_cpu(rn->keys[0])) {
 		unlock_block(s->info, right);
 		s->nodes[1] = left;
@@ -597,6 +694,121 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
 	return 0;
 }
 
+/*
+ * We often need to modify a sibling node.  This function shadows a particular
+ * child of the given parent node.  Making sure to update the parent to point
+ * to the new shadow.
+ */
+static int shadow_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
+			struct btree_node *parent, unsigned index,
+			struct dm_block **result)
+{
+	int r, inc;
+	dm_block_t root;
+	struct btree_node *node;
+
+	root = value64(parent, index);
+
+	r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
+			       result, &inc);
+	if (r)
+		return r;
+
+	node = dm_block_data(*result);
+
+	if (inc)
+		inc_children(info->tm, node, vt);
+
+	*((__le64 *) value_ptr(parent, index)) =
+		cpu_to_le64(dm_block_location(*result));
+
+	return 0;
+}
+
+/*
+ * Splits two nodes into three.  This is more work, but results in fuller
+ * nodes, so saves metadata space.
+ */
+static int split_two_into_three(struct shadow_spine *s, unsigned parent_index,
+                                struct dm_btree_value_type *vt, uint64_t key)
+{
+	int r;
+	unsigned middle_index;
+	struct dm_block *left, *middle, *right, *parent;
+	struct btree_node *ln, *rn, *mn, *pn;
+	__le64 location;
+
+	parent = shadow_parent(s);
+	pn = dm_block_data(parent);
+
+	if (parent_index == 0) {
+		middle_index = 1;
+		left = shadow_current(s);
+		r = shadow_child(s->info, vt, pn, parent_index + 1, &right);
+		if (r)
+			return r;
+	} else {
+		middle_index = parent_index;
+		right = shadow_current(s);
+		r = shadow_child(s->info, vt, pn, parent_index - 1, &left);
+		if (r)
+			return r;
+	}
+
+	r = new_block(s->info, &middle);
+	if (r < 0)
+		return r;
+
+	ln = dm_block_data(left);
+	mn = dm_block_data(middle);
+	rn = dm_block_data(right);
+
+	mn->header.nr_entries = cpu_to_le32(0);
+	mn->header.flags = ln->header.flags;
+	mn->header.max_entries = ln->header.max_entries;
+	mn->header.value_size = ln->header.value_size;
+
+	redistribute3(ln, mn, rn);
+
+	/* patch up the parent */
+	pn->keys[middle_index] = rn->keys[0];
+	location = cpu_to_le64(dm_block_location(middle));
+	__dm_bless_for_disk(&location);
+	r = insert_at(sizeof(__le64), pn, middle_index,
+		      le64_to_cpu(mn->keys[0]), &location);
+	if (r) {
+		if (shadow_current(s) != left)
+			unlock_block(s->info, left);
+
+		unlock_block(s->info, middle);
+
+		if (shadow_current(s) != right)
+			unlock_block(s->info, right);
+
+	        return r;
+	}
+
+
+	/* patch up the spine */
+	if (key < le64_to_cpu(mn->keys[0])) {
+		unlock_block(s->info, middle);
+		unlock_block(s->info, right);
+		s->nodes[1] = left;
+	} else if (key < le64_to_cpu(rn->keys[0])) {
+		unlock_block(s->info, left);
+		unlock_block(s->info, right);
+		s->nodes[1] = middle;
+	} else {
+		unlock_block(s->info, left);
+		unlock_block(s->info, middle);
+		s->nodes[1] = right;
+	}
+
+	return 0;
+}
+
+/*----------------------------------------------------------------*/
+
 /*
  * Splits a node by creating two new children beneath the given node.
  *
@@ -690,6 +902,186 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
 	return 0;
 }
 
+/*----------------------------------------------------------------*/
+
+/*
+ * Redistributes a node's entries with its left sibling.
+ */
+static int rebalance_left(struct shadow_spine *s, struct dm_btree_value_type *vt,
+			  unsigned parent_index, uint64_t key)
+{
+	int r;
+	struct dm_block *sib;
+	struct btree_node *left, *right, *parent = dm_block_data(shadow_parent(s));
+
+	r = shadow_child(s->info, vt, parent, parent_index - 1, &sib);
+	if (r)
+		return r;
+
+	left = dm_block_data(sib);
+	right = dm_block_data(shadow_current(s));
+	redistribute2(left, right);
+	*key_ptr(parent, parent_index) = right->keys[0];
+
+	if (key < le64_to_cpu(right->keys[0])) {
+		unlock_block(s->info, s->nodes[1]);
+		s->nodes[1] = sib;
+	} else {
+		unlock_block(s->info, sib);
+	}
+
+	return 0;
+}
+
+/*
+ * Redistributes a nodes entries with its right sibling.
+ */
+static int rebalance_right(struct shadow_spine *s, struct dm_btree_value_type *vt,
+			   unsigned parent_index, uint64_t key)
+{
+	int r;
+	struct dm_block *sib;
+	struct btree_node *left, *right, *parent = dm_block_data(shadow_parent(s));
+
+	r = shadow_child(s->info, vt, parent, parent_index + 1, &sib);
+	if (r)
+		return r;
+
+	left = dm_block_data(shadow_current(s));
+	right = dm_block_data(sib);
+	redistribute2(left, right);
+	*key_ptr(parent, parent_index + 1) = right->keys[0];
+
+	if (key < le64_to_cpu(right->keys[0])) {
+		unlock_block(s->info, sib);
+	} else {
+		unlock_block(s->info, s->nodes[1]);
+		s->nodes[1] = sib;
+	}
+
+	return 0;
+}
+
+/*
+ * Returns the number of spare entries in a node.
+ */
+static int get_node_free_space(struct dm_btree_info *info, dm_block_t b, unsigned *space)
+{
+	int r;
+	unsigned nr_entries;
+	struct dm_block *block;
+	struct btree_node *node;
+
+	r = bn_read_lock(info, b, &block);
+	if (r)
+		return r;
+
+	node = dm_block_data(block);
+	nr_entries = le32_to_cpu(node->header.nr_entries);
+	*space = le32_to_cpu(node->header.max_entries) - nr_entries;
+
+	unlock_block(info, block);
+	return 0;
+}
+
+/*
+ * Make space in a node, either by moving some entries to a sibling,
+ * or creating a new sibling node.  SPACE_THRESHOLD defines the minimum
+ * number of free entries that must be in the sibling to make the move
+ * worth while.  If the siblings are shared (eg, part of a snapshot),
+ * then they are not touched, since this break sharing and so consume
+ * more space than we save.
+ */
+#define SPACE_THRESHOLD 8
+static int rebalance_or_split(struct shadow_spine *s, struct dm_btree_value_type *vt,
+			      unsigned parent_index, uint64_t key)
+{
+	int r;
+	struct btree_node *parent = dm_block_data(shadow_parent(s));
+	unsigned nr_parent = le32_to_cpu(parent->header.nr_entries);
+	unsigned free_space;
+	int left_shared = 0, right_shared = 0;
+
+	/* Should we move entries to the left sibling? */
+	if (parent_index > 0) {
+		dm_block_t left_b = value64(parent, parent_index - 1);
+		r = dm_tm_block_is_shared(s->info->tm, left_b, &left_shared);
+		if (r)
+			return r;
+
+		if (!left_shared) {
+			r = get_node_free_space(s->info, left_b, &free_space);
+			if (r)
+				return r;
+
+			if (free_space >= SPACE_THRESHOLD)
+				return rebalance_left(s, vt, parent_index, key);
+		}
+	}
+
+	/* Should we move entries to the right sibling? */
+	if (parent_index < (nr_parent - 1)) {
+		dm_block_t right_b = value64(parent, parent_index + 1);
+		r = dm_tm_block_is_shared(s->info->tm, right_b, &right_shared);
+		if (r)
+			return r;
+
+		if (!right_shared) {
+			r = get_node_free_space(s->info, right_b, &free_space);
+			if (r)
+				return r;
+
+			if (free_space >= SPACE_THRESHOLD)
+				return rebalance_right(s, vt, parent_index, key);
+		}
+	}
+
+	/*
+	 * We need to split the node, normally we split two nodes
+	 * into three.	But when inserting a sequence that is either
+	 * monotonically increasing or decreasing it's better to split
+	 * a single node into two.
+	 */
+	if (left_shared || right_shared || (nr_parent <= 2) ||
+	    (parent_index == 0) || (parent_index + 1 == nr_parent)) {
+		return split_one_into_two(s, parent_index, vt, key);
+	} else {
+		return split_two_into_three(s, parent_index, vt, key);
+	}
+}
+
+/*
+ * Does the node contain a particular key?
+ */
+static bool contains_key(struct btree_node *node, uint64_t key)
+{
+	int i = lower_bound(node, key);
+
+	if (i >= 0 && le64_to_cpu(node->keys[i]) == key)
+		return true;
+
+	return false;
+}
+
+/*
+ * In general we preemptively make sure there's a free entry in every
+ * node on the spine when doing an insert.  But we can avoid that with
+ * leaf nodes if we know it's an overwrite.
+ */
+static bool has_space_for_insert(struct btree_node *node, uint64_t key)
+{
+	if (node->header.nr_entries == node->header.max_entries) {
+		if (le32_to_cpu(node->header.flags) & LEAF_NODE) {
+			/* we don't need space if it's an overwrite */
+			return contains_key(node, key);
+		}
+
+		return false;
+	}
+
+	return true;
+}
+
 static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
 			    struct dm_btree_value_type *vt,
 			    uint64_t key, unsigned *index)
@@ -719,17 +1111,18 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
 
 		node = dm_block_data(shadow_current(s));
 
-		if (node->header.nr_entries == node->header.max_entries) {
+		if (!has_space_for_insert(node, key)) {
 			if (top)
 				r = btree_split_beneath(s, key);
 			else
-				r = btree_split_sibling(s, i, key);
+				r = rebalance_or_split(s, vt, i, key);
 
 			if (r < 0)
 				return r;
-		}
 
-		node = dm_block_data(shadow_current(s));
+			/* making space can cause the current node to change */
+			node = dm_block_data(shadow_current(s));
+		}
 
 		i = lower_bound(node, key);
 
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index abe2c5dd0993..4353e1146d73 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -379,6 +379,15 @@ int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
 	return dm_sm_get_count(tm->sm, b, result);
 }
 
+int dm_tm_block_is_shared(struct dm_transaction_manager *tm, dm_block_t b,
+			  int *result)
+{
+	if (tm->is_clone)
+		return -EWOULDBLOCK;
+
+	return dm_sm_count_is_more_than_one(tm->sm, b, result);
+}
+
 struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
 {
 	return tm->bm;
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index f3a18be68f30..3d75cc59bbb8 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -103,8 +103,14 @@ void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b);
 
 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b);
 
-int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
-	      uint32_t *result);
+int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result);
+
+/*
+ * Finds out if a given block is shared (ie. has a reference count higher
+ * than one).
+ */
+int dm_tm_block_is_shared(struct dm_transaction_manager *tm, dm_block_t b,
+			  int *result);
 
 struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm);
 

From 5faafc77f7de69147d1e818026b9a0cbf036a7b2 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Tue, 13 Apr 2021 09:03:49 +0100
Subject: [PATCH 0976/3292] dm space maps: don't reset space map allocation
 cursor when committing

Current commit code resets the place where the search for free blocks
will begin back to the start of the metadata device.  There are a couple
of repercussions to this:

- The first allocation after the commit is likely to take longer than
  normal as it searches for a free block in an area that is likely to
  have very few free blocks (if any).

- Any free blocks it finds will have been recently freed.  Reusing them
  means we have fewer old copies of the metadata to aid recovery from
  hardware error.

Fix these issues by leaving the cursor alone, only resetting when the
search hits the end of the metadata device.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/persistent-data/dm-space-map-disk.c     | 9 ++++++++-
 drivers/md/persistent-data/dm-space-map-metadata.c | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index 61f56909e00b..4f8069bb0481 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -171,6 +171,14 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 	 * Any block we allocate has to be free in both the old and current ll.
 	 */
 	r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b);
+	if (r == -ENOSPC) {
+		/*
+		 * There's no free block between smd->begin and the end of the metadata device.
+		 * We search before smd->begin in case something has been freed.
+		 */
+		r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, 0, smd->begin, b);
+	}
+
 	if (r)
 		return r;
 
@@ -194,7 +202,6 @@ static int sm_disk_commit(struct dm_space_map *sm)
 		return r;
 
 	memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll));
-	smd->begin = 0;
 	smd->nr_allocated_this_transaction = 0;
 
 	return 0;
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 9e3c64ec2026..da439ac85796 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -452,6 +452,14 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
 	 * Any block we allocate has to be free in both the old and current ll.
 	 */
 	r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b);
+	if (r == -ENOSPC) {
+		/*
+		 * There's no free block between smm->begin and the end of the metadata device.
+		 * We search before smm->begin in case something has been freed.
+		 */
+		r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b);
+	}
+
 	if (r)
 		return r;
 
@@ -503,7 +511,6 @@ static int sm_metadata_commit(struct dm_space_map *sm)
 		return r;
 
 	memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
-	smm->begin = 0;
 	smm->allocated_this_transaction = 0;
 
 	return 0;

From be500ed721a6ec8d49bf0814c277ce7162acee0e Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Tue, 13 Apr 2021 11:03:45 +0100
Subject: [PATCH 0977/3292] dm space maps: improve performance with inc/dec on
 ranges of blocks

When we break sharing on btree nodes we typically need to increment
the reference counts to every value held in the node.  This can
cause a lot of repeated calls to the space maps.  Fix this by changing
the interface to the space map inc/dec methods to take ranges of
adjacent blocks to be operated on.

For installations that are using a lot of snapshots this will reduce
cpu overhead of fundamental operations such as provisioning a new block,
or deleting a snapshot, by as much as 10 times.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-era-target.c                    |  24 +-
 drivers/md/dm-thin-metadata.c                 |  95 ++--
 drivers/md/persistent-data/dm-array.c         |  52 +-
 .../md/persistent-data/dm-btree-internal.h    |  13 +
 drivers/md/persistent-data/dm-btree-remove.c  |   4 +-
 drivers/md/persistent-data/dm-btree-spine.c   |  16 +-
 drivers/md/persistent-data/dm-btree.c         |  91 +++-
 drivers/md/persistent-data/dm-btree.h         |  10 +-
 .../md/persistent-data/dm-space-map-common.c  | 452 ++++++++++++++++--
 .../md/persistent-data/dm-space-map-common.h  |  18 +-
 .../md/persistent-data/dm-space-map-disk.c    |  74 +--
 .../persistent-data/dm-space-map-metadata.c   |  96 ++--
 drivers/md/persistent-data/dm-space-map.h     |  18 +-
 .../persistent-data/dm-transaction-manager.c  |  52 ++
 .../persistent-data/dm-transaction-manager.h  |  12 +-
 15 files changed, 778 insertions(+), 249 deletions(-)

diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index d9ac7372108c..3b748393fca5 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -363,28 +363,32 @@ static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata
 	core->root = le64_to_cpu(disk->root);
 }
 
-static void ws_inc(void *context, const void *value)
+static void ws_inc(void *context, const void *value, unsigned count)
 {
 	struct era_metadata *md = context;
 	struct writeset_disk ws_d;
 	dm_block_t b;
+	unsigned i;
 
-	memcpy(&ws_d, value, sizeof(ws_d));
-	b = le64_to_cpu(ws_d.root);
-
-	dm_tm_inc(md->tm, b);
+	for (i = 0; i < count; i++) {
+		memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
+		b = le64_to_cpu(ws_d.root);
+		dm_tm_inc(md->tm, b);
+	}
 }
 
-static void ws_dec(void *context, const void *value)
+static void ws_dec(void *context, const void *value, unsigned count)
 {
 	struct era_metadata *md = context;
 	struct writeset_disk ws_d;
 	dm_block_t b;
+	unsigned i;
 
-	memcpy(&ws_d, value, sizeof(ws_d));
-	b = le64_to_cpu(ws_d.root);
-
-	dm_bitset_del(&md->bitset_info, b);
+	for (i = 0; i < count; i++) {
+		memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d));
+		b = le64_to_cpu(ws_d.root);
+		dm_bitset_del(&md->bitset_info, b);
+	}
 }
 
 static int ws_eq(void *context, const void *value1, const void *value2)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index e75b20480e46..c88ed14d49e6 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -311,28 +311,53 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
 	*t = v & ((1 << 24) - 1);
 }
 
-static void data_block_inc(void *context, const void *value_le)
-{
-	struct dm_space_map *sm = context;
-	__le64 v_le;
-	uint64_t b;
-	uint32_t t;
+/*
+ * It's more efficient to call dm_sm_{inc,dec}_blocks as few times as
+ * possible.  'with_runs' reads contiguous runs of blocks, and calls the
+ * given sm function.
+ */
+typedef int (*run_fn)(struct dm_space_map *, dm_block_t, dm_block_t);
 
-	memcpy(&v_le, value_le, sizeof(v_le));
-	unpack_block_time(le64_to_cpu(v_le), &b, &t);
-	dm_sm_inc_block(sm, b);
+static void with_runs(struct dm_space_map *sm, const __le64 *value_le, unsigned count, run_fn fn)
+{
+	uint64_t b, begin, end;
+	uint32_t t;
+	bool in_run = false;
+	unsigned i;
+
+	for (i = 0; i < count; i++, value_le++) {
+		/* We know value_le is 8 byte aligned */
+		unpack_block_time(le64_to_cpu(*value_le), &b, &t);
+
+		if (in_run) {
+			if (b == end) {
+				end++;
+			} else {
+				fn(sm, begin, end);
+				begin = b;
+				end = b + 1;
+			}
+		} else {
+			in_run = true;
+			begin = b;
+			end = b + 1;
+		}
+	}
+
+	if (in_run)
+		fn(sm, begin, end);
 }
 
-static void data_block_dec(void *context, const void *value_le)
+static void data_block_inc(void *context, const void *value_le, unsigned count)
 {
-	struct dm_space_map *sm = context;
-	__le64 v_le;
-	uint64_t b;
-	uint32_t t;
+	with_runs((struct dm_space_map *) context,
+		  (const __le64 *) value_le, count, dm_sm_inc_blocks);
+}
 
-	memcpy(&v_le, value_le, sizeof(v_le));
-	unpack_block_time(le64_to_cpu(v_le), &b, &t);
-	dm_sm_dec_block(sm, b);
+static void data_block_dec(void *context, const void *value_le, unsigned count)
+{
+	with_runs((struct dm_space_map *) context,
+		  (const __le64 *) value_le, count, dm_sm_dec_blocks);
 }
 
 static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
@@ -349,27 +374,25 @@ static int data_block_equal(void *context, const void *value1_le, const void *va
 	return b1 == b2;
 }
 
-static void subtree_inc(void *context, const void *value)
+static void subtree_inc(void *context, const void *value, unsigned count)
 {
 	struct dm_btree_info *info = context;
-	__le64 root_le;
-	uint64_t root;
+	const __le64 *root_le = value;
+	unsigned i;
 
-	memcpy(&root_le, value, sizeof(root_le));
-	root = le64_to_cpu(root_le);
-	dm_tm_inc(info->tm, root);
+	for (i = 0; i < count; i++, root_le++)
+		dm_tm_inc(info->tm, le64_to_cpu(*root_le));
 }
 
-static void subtree_dec(void *context, const void *value)
+static void subtree_dec(void *context, const void *value, unsigned count)
 {
 	struct dm_btree_info *info = context;
-	__le64 root_le;
-	uint64_t root;
+	const __le64 *root_le = value;
+	unsigned i;
 
-	memcpy(&root_le, value, sizeof(root_le));
-	root = le64_to_cpu(root_le);
-	if (dm_btree_del(info, root))
-		DMERR("btree delete failed");
+	for (i = 0; i < count; i++, root_le++)
+		if (dm_btree_del(info, le64_to_cpu(*root_le)))
+			DMERR("btree delete failed");
 }
 
 static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
@@ -1761,11 +1784,7 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
 	int r = 0;
 
 	pmd_write_lock(pmd);
-	for (; b != e; b++) {
-		r = dm_sm_inc_block(pmd->data_sm, b);
-		if (r)
-			break;
-	}
+	r = dm_sm_inc_blocks(pmd->data_sm, b, e);
 	pmd_write_unlock(pmd);
 
 	return r;
@@ -1776,11 +1795,7 @@ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
 	int r = 0;
 
 	pmd_write_lock(pmd);
-	for (; b != e; b++) {
-		r = dm_sm_dec_block(pmd->data_sm, b);
-		if (r)
-			break;
-	}
+	r = dm_sm_dec_blocks(pmd->data_sm, b, e);
 	pmd_write_unlock(pmd);
 
 	return r;
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 185dc60360b5..3a963d783a86 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -108,12 +108,10 @@ static void *element_at(struct dm_array_info *info, struct array_block *ab,
  * in an array block.
  */
 static void on_entries(struct dm_array_info *info, struct array_block *ab,
-		       void (*fn)(void *, const void *))
+		       void (*fn)(void *, const void *, unsigned))
 {
-	unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
-
-	for (i = 0; i < nr_entries; i++)
-		fn(info->value_type.context, element_at(info, ab, i));
+	unsigned nr_entries = le32_to_cpu(ab->nr_entries);
+	fn(info->value_type.context, element_at(info, ab, 0), nr_entries);
 }
 
 /*
@@ -175,19 +173,18 @@ static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
 static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
 			const void *value, unsigned new_nr)
 {
-	unsigned i;
-	uint32_t nr_entries;
+	uint32_t nr_entries, delta, i;
 	struct dm_btree_value_type *vt = &info->value_type;
 
 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 	BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
 
 	nr_entries = le32_to_cpu(ab->nr_entries);
-	for (i = nr_entries; i < new_nr; i++) {
-		if (vt->inc)
-			vt->inc(vt->context, value);
+	delta = new_nr - nr_entries;
+	if (vt->inc)
+		vt->inc(vt->context, value, delta);
+	for (i = nr_entries; i < new_nr; i++)
 		memcpy(element_at(info, ab, i), value, vt->size);
-	}
 	ab->nr_entries = cpu_to_le32(new_nr);
 }
 
@@ -199,17 +196,16 @@ static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
 static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
 			unsigned new_nr)
 {
-	unsigned i;
-	uint32_t nr_entries;
+	uint32_t nr_entries, delta;
 	struct dm_btree_value_type *vt = &info->value_type;
 
 	BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
 	BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
 
 	nr_entries = le32_to_cpu(ab->nr_entries);
-	for (i = nr_entries; i > new_nr; i--)
-		if (vt->dec)
-			vt->dec(vt->context, element_at(info, ab, i - 1));
+	delta = nr_entries - new_nr;
+	if (vt->dec)
+		vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta);
 	ab->nr_entries = cpu_to_le32(new_nr);
 }
 
@@ -573,16 +569,17 @@ static int grow(struct resize *resize)
  * These are the value_type functions for the btree elements, which point
  * to array blocks.
  */
-static void block_inc(void *context, const void *value)
+static void block_inc(void *context, const void *value, unsigned count)
 {
-	__le64 block_le;
+	const __le64 *block_le = value;
 	struct dm_array_info *info = context;
+	unsigned i;
 
-	memcpy(&block_le, value, sizeof(block_le));
-	dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+	for (i = 0; i < count; i++, block_le++)
+		dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le));
 }
 
-static void block_dec(void *context, const void *value)
+static void __block_dec(void *context, const void *value)
 {
 	int r;
 	uint64_t b;
@@ -621,6 +618,13 @@ static void block_dec(void *context, const void *value)
 	dm_tm_dec(info->btree_info.tm, b);
 }
 
+static void block_dec(void *context, const void *value, unsigned count)
+{
+	unsigned i;
+	for (i = 0; i < count; i++, value += sizeof(__le64))
+		__block_dec(context, value);
+}
+
 static int block_equal(void *context, const void *value1, const void *value2)
 {
 	return !memcmp(value1, value2, sizeof(__le64));
@@ -711,7 +715,7 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
 			return r;
 
 		if (vt->inc)
-			vt->inc(vt->context, element_at(info, ab, i));
+			vt->inc(vt->context, element_at(info, ab, i), 1);
 	}
 
 	ab->nr_entries = cpu_to_le32(new_nr);
@@ -822,9 +826,9 @@ static int array_set_value(struct dm_array_info *info, dm_block_t root,
 	old_value = element_at(info, ab, entry);
 	if (vt->dec &&
 	    (!vt->equal || !vt->equal(vt->context, old_value, value))) {
-		vt->dec(vt->context, old_value);
+		vt->dec(vt->context, old_value, 1);
 		if (vt->inc)
-			vt->inc(vt->context, value);
+			vt->inc(vt->context, value, 1);
 	}
 
 	memcpy(old_value, value, info->value_type.size);
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index b1788853a355..893edb426dba 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -144,4 +144,17 @@ extern struct dm_block_validator btree_node_validator;
 extern void init_le64_type(struct dm_transaction_manager *tm,
 			   struct dm_btree_value_type *vt);
 
+/*
+ * This returns a shadowed btree leaf that you may modify.  In practise
+ * this means overwrites only, since an insert could cause a node to
+ * be split.  Useful if you need access to the old value to calculate the
+ * new one.
+ *
+ * This only works with single level btrees.  The given key must be present in
+ * the tree, otherwise -EINVAL will be returned.
+ */
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+			     uint64_t key, int *index,
+			     dm_block_t *new_root, struct dm_block **leaf);
+
 #endif	/* DM_BTREE_INTERNAL_H */
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index eff04fa23dfa..b34af195bf2a 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -544,7 +544,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 
 		if (info->value_type.dec)
 			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
+					     value_ptr(n, index), 1);
 
 		delete_at(n, index);
 	}
@@ -653,7 +653,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root,
 	if (k >= keys[last_level] && k < end_key) {
 		if (info->value_type.dec)
 			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
+					     value_ptr(n, index), 1);
 
 		delete_at(n, index);
 		keys[last_level] = k + 1ull;
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index 2061ab865567..f5bd76ed8fe6 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -236,22 +236,14 @@ dm_block_t shadow_root(struct shadow_spine *s)
 	return s->root;
 }
 
-static void le64_inc(void *context, const void *value_le)
+static void le64_inc(void *context, const void *value_le, unsigned count)
 {
-	struct dm_transaction_manager *tm = context;
-	__le64 v_le;
-
-	memcpy(&v_le, value_le, sizeof(v_le));
-	dm_tm_inc(tm, le64_to_cpu(v_le));
+	dm_tm_with_runs(context, value_le, count, dm_tm_inc_range);
 }
 
-static void le64_dec(void *context, const void *value_le)
+static void le64_dec(void *context, const void *value_le, unsigned count)
 {
-	struct dm_transaction_manager *tm = context;
-	__le64 v_le;
-
-	memcpy(&v_le, value_le, sizeof(v_le));
-	dm_tm_dec(tm, le64_to_cpu(v_le));
+	dm_tm_with_runs(context, value_le, count, dm_tm_dec_range);
 }
 
 static int le64_equal(void *context, const void *value1_le, const void *value2_le)
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 18282932bedc..0703ca7a7d9a 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -71,15 +71,13 @@ static int upper_bound(struct btree_node *n, uint64_t key)
 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
 		  struct dm_btree_value_type *vt)
 {
-	unsigned i;
 	uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
 
 	if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
-		for (i = 0; i < nr_entries; i++)
-			dm_tm_inc(tm, value64(n, i));
+		dm_tm_with_runs(tm, value_ptr(n, 0), nr_entries, dm_tm_inc_range);
+
 	else if (vt->inc)
-		for (i = 0; i < nr_entries; i++)
-			vt->inc(vt->context, value_ptr(n, i));
+		vt->inc(vt->context, value_ptr(n, 0), nr_entries);
 }
 
 static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
@@ -318,13 +316,9 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
 				goto out;
 
 		} else {
-			if (info->value_type.dec) {
-				unsigned i;
-
-				for (i = 0; i < f->nr_children; i++)
-					info->value_type.dec(info->value_type.context,
-							     value_ptr(f->n, i));
-			}
+			if (info->value_type.dec)
+				info->value_type.dec(info->value_type.context,
+						     value_ptr(f->n, 0), f->nr_children);
 			pop_frame(s);
 		}
 	}
@@ -1146,6 +1140,77 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
 	return 0;
 }
 
+static int __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root,
+				      uint64_t key, int *index)
+{
+	int r, i = -1;
+	struct btree_node *node;
+
+	*index = 0;
+	for (;;) {
+		r = shadow_step(s, root, &s->info->value_type);
+		if (r < 0)
+			return r;
+
+		node = dm_block_data(shadow_current(s));
+
+		/*
+		 * We have to patch up the parent node, ugly, but I don't
+		 * see a way to do this automatically as part of the spine
+		 * op.
+		 */
+		if (shadow_has_parent(s) && i >= 0) {
+			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+
+			__dm_bless_for_disk(&location);
+			memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
+				    &location, sizeof(__le64));
+		}
+
+		node = dm_block_data(shadow_current(s));
+		i = lower_bound(node, key);
+
+		BUG_ON(i < 0);
+		BUG_ON(i >= le32_to_cpu(node->header.nr_entries));
+
+		if (le32_to_cpu(node->header.flags) & LEAF_NODE) {
+			if (key != le64_to_cpu(node->keys[i]))
+				return -EINVAL;
+			break;
+		}
+
+		root = value64(node, i);
+	}
+
+	*index = i;
+	return 0;
+}
+
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+			     uint64_t key, int *index,
+			     dm_block_t *new_root, struct dm_block **leaf)
+{
+	int r;
+	struct shadow_spine spine;
+
+	BUG_ON(info->levels > 1);
+	init_shadow_spine(&spine, info);
+	r = __btree_get_overwrite_leaf(&spine, root, key, index);
+	if (!r) {
+		*new_root = shadow_root(&spine);
+		*leaf = shadow_current(&spine);
+
+		/*
+		 * Decrement the count so exit_shadow_spine() doesn't
+		 * unlock the leaf.
+		 */
+		spine.count--;
+	}
+	exit_shadow_spine(&spine);
+
+	return r;
+}
+
 static bool need_insert(struct btree_node *node, uint64_t *keys,
 			unsigned level, unsigned index)
 {
@@ -1222,7 +1287,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
 			     value_ptr(n, index),
 			     value))) {
 			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
+					     value_ptr(n, index), 1);
 		}
 		memcpy_disk(value_ptr(n, index),
 			    value, info->value_type.size);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 3dc5bb1a4748..d2ae5aa4d00b 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -51,21 +51,21 @@ struct dm_btree_value_type {
 	 */
 
 	/*
-	 * The btree is making a duplicate of the value, for instance
+	 * The btree is making a duplicate of a run of values, for instance
 	 * because previously-shared btree nodes have now diverged.
 	 * @value argument is the new copy that the copy function may modify.
 	 * (Probably it just wants to increment a reference count
 	 * somewhere.) This method is _not_ called for insertion of a new
 	 * value: It is assumed the ref count is already 1.
 	 */
-	void (*inc)(void *context, const void *value);
+	void (*inc)(void *context, const void *value, unsigned count);
 
 	/*
-	 * This value is being deleted.  The btree takes care of freeing
+	 * These values are being deleted.  The btree takes care of freeing
 	 * the memory pointed to by @value.  Often the del function just
-	 * needs to decrement a reference count somewhere.
+	 * needs to decrement a reference counts somewhere.
 	 */
-	void (*dec)(void *context, const void *value);
+	void (*dec)(void *context, const void *value, unsigned count);
 
 	/*
 	 * A test for equality between two values.  When a value is
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index a213bf11738f..5552941912af 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -6,6 +6,7 @@
 
 #include "dm-space-map-common.h"
 #include "dm-transaction-manager.h"
+#include "dm-btree-internal.h"
 
 #include <linux/bitops.h>
 #include <linux/device-mapper.h>
@@ -409,12 +410,13 @@ int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
 	return r;
 }
 
-static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
-			int (*mutator)(void *context, uint32_t old, uint32_t *new),
-			void *context, enum allocation_event *ev)
+/*----------------------------------------------------------------*/
+
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+		 uint32_t ref_count, int32_t *nr_allocations)
 {
 	int r;
-	uint32_t bit, old, ref_count;
+	uint32_t bit, old;
 	struct dm_block *nb;
 	dm_block_t index = b;
 	struct disk_index_entry ie_disk;
@@ -433,10 +435,9 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 		return r;
 	}
 	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
-
 	bm_le = dm_bitmap_data(nb);
-	old = sm_lookup_bitmap(bm_le, bit);
 
+	old = sm_lookup_bitmap(bm_le, bit);
 	if (old > 2) {
 		r = sm_ll_lookup_big_ref_count(ll, b, &old);
 		if (r < 0) {
@@ -445,7 +446,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 		}
 	}
 
-	r = mutator(context, old, &ref_count);
 	if (r) {
 		dm_tm_unlock(ll->tm, nb);
 		return r;
@@ -453,7 +453,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 
 	if (ref_count <= 2) {
 		sm_set_bitmap(bm_le, bit, ref_count);
-
 		dm_tm_unlock(ll->tm, nb);
 
 		if (old > 2) {
@@ -480,62 +479,459 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
 	}
 
 	if (ref_count && !old) {
-		*ev = SM_ALLOC;
+		*nr_allocations = 1;
 		ll->nr_allocated++;
 		le32_add_cpu(&ie_disk.nr_free, -1);
 		if (le32_to_cpu(ie_disk.none_free_before) == bit)
 			ie_disk.none_free_before = cpu_to_le32(bit + 1);
 
 	} else if (old && !ref_count) {
-		*ev = SM_FREE;
+		*nr_allocations = -1;
 		ll->nr_allocated--;
 		le32_add_cpu(&ie_disk.nr_free, 1);
 		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
 	} else
-		*ev = SM_NONE;
+		*nr_allocations = 0;
 
 	return ll->save_ie(ll, index, &ie_disk);
 }
 
-static int set_ref_count(void *context, uint32_t old, uint32_t *new)
+/*----------------------------------------------------------------*/
+
+/*
+ * Holds useful intermediate results for the range based inc and dec
+ * operations.
+ */
+struct inc_context {
+	struct disk_index_entry ie_disk;
+	struct dm_block *bitmap_block;
+	void *bitmap;
+
+	struct dm_block *overflow_leaf;
+};
+
+static inline void init_inc_context(struct inc_context *ic)
 {
-	*new = *((uint32_t *) context);
-	return 0;
+	ic->bitmap_block = NULL;
+	ic->bitmap = NULL;
+	ic->overflow_leaf = NULL;
 }
 
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
-		 uint32_t ref_count, enum allocation_event *ev)
+static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
 {
-	return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
+	if (ic->bitmap_block)
+		dm_tm_unlock(ll->tm, ic->bitmap_block);
+	if (ic->overflow_leaf)
+		dm_tm_unlock(ll->tm, ic->overflow_leaf);
 }
 
-static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
+static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
 {
-	*new = old + 1;
-	return 0;
+	exit_inc_context(ll, ic);
+	init_inc_context(ic);
 }
 
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+/*
+ * Confirms a btree node contains a particular key at an index.
+ */
+static bool contains_key(struct btree_node *n, uint64_t key, int index)
 {
-	return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
+	return index >= 0 &&
+		index < le32_to_cpu(n->header.nr_entries) &&
+		le64_to_cpu(n->keys[index]) == key;
 }
 
-static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
+static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 {
-	if (!old) {
-		DMERR_LIMIT("unable to decrement a reference count below 0");
+	int r;
+	int index;
+	struct btree_node *n;
+	__le32 *v_ptr;
+	uint32_t rc;
+
+	/*
+	 * bitmap_block needs to be unlocked because getting the
+	 * overflow_leaf may need to allocate, and thus use the space map.
+	 */
+	reset_inc_context(ll, ic);
+
+	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+	if (r < 0)
+		return r;
+
+	n = dm_block_data(ic->overflow_leaf);
+
+	if (!contains_key(n, b, index)) {
+		DMERR("overflow btree is missing an entry");
 		return -EINVAL;
 	}
 
-	*new = old - 1;
+	v_ptr = value_ptr(n, index);
+	rc = le32_to_cpu(*v_ptr) + 1;
+	*v_ptr = cpu_to_le32(rc);
+
 	return 0;
 }
 
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
 {
-	return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev);
+	int index;
+	struct btree_node *n;
+	__le32 *v_ptr;
+	uint32_t rc;
+
+	/*
+	 * Do we already have the correct overflow leaf?
+	 */
+	if (ic->overflow_leaf) {
+		n = dm_block_data(ic->overflow_leaf);
+		index = lower_bound(n, b);
+		if (contains_key(n, b, index)) {
+			v_ptr = value_ptr(n, index);
+			rc = le32_to_cpu(*v_ptr) + 1;
+			*v_ptr = cpu_to_le32(rc);
+
+			return 0;
+		}
+	}
+
+	return __sm_ll_inc_overflow(ll, b, ic);
 }
 
+static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
+{
+	int r, inc;
+	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
+			       &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
+	if (r < 0) {
+		DMERR("dm_tm_shadow_block() failed");
+		return r;
+	}
+	ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
+	ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+	return 0;
+}
+
+/*
+ * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
+ * we can reopen the bitmap with a simple write lock, rather than re calling
+ * dm_tm_shadow_block().
+ */
+static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
+{
+	if (!ic->bitmap_block) {
+		int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
+					 &dm_sm_bitmap_validator, &ic->bitmap_block);
+		if (r) {
+			DMERR("unable to re-get write lock for bitmap");
+			return r;
+		}
+		ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+	}
+
+	return 0;
+}
+
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
+				   uint32_t bit, uint32_t bit_end,
+				   int32_t *nr_allocations, dm_block_t *new_b,
+				   struct inc_context *ic)
+{
+	int r;
+	__le32 le_rc;
+	uint32_t old;
+
+	for (; bit != bit_end; bit++, b++) {
+		/*
+		 * We only need to drop the bitmap if we need to find a new btree
+		 * leaf for the overflow.  So if it was dropped last iteration,
+		 * we now re-get it.
+		 */
+		r = ensure_bitmap(ll, ic);
+		if (r)
+			return r;
+
+		old = sm_lookup_bitmap(ic->bitmap, bit);
+		switch (old) {
+		case 0:
+			/* inc bitmap, adjust nr_allocated */
+			sm_set_bitmap(ic->bitmap, bit, 1);
+			(*nr_allocations)++;
+			ll->nr_allocated++;
+			le32_add_cpu(&ic->ie_disk.nr_free, -1);
+			if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
+				ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
+			break;
+
+		case 1:
+			/* inc bitmap */
+			sm_set_bitmap(ic->bitmap, bit, 2);
+			break;
+
+		case 2:
+			/* inc bitmap and insert into overflow */
+			sm_set_bitmap(ic->bitmap, bit, 3);
+			reset_inc_context(ll, ic);
+
+			le_rc = cpu_to_le32(3);
+			__dm_bless_for_disk(&le_rc);
+			r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
+					    &b, &le_rc, &ll->ref_count_root);
+			if (r < 0) {
+				DMERR("ref count insert failed");
+				return r;
+			}
+			break;
+
+		default:
+			/*
+			 * inc within the overflow tree only.
+			 */
+			r = sm_ll_inc_overflow(ll, b, ic);
+			if (r < 0)
+				return r;
+		}
+	}
+
+	*new_b = b;
+	return 0;
+}
+
+/*
+ * Finds a bitmap that contains entries in the block range, and increments
+ * them.
+ */
+static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+		       int32_t *nr_allocations, dm_block_t *new_b)
+{
+	int r;
+	struct inc_context ic;
+	uint32_t bit, bit_end;
+	dm_block_t index = b;
+
+	init_inc_context(&ic);
+
+	bit = do_div(index, ll->entries_per_block);
+	r = ll->load_ie(ll, index, &ic.ie_disk);
+	if (r < 0)
+		return r;
+
+	r = shadow_bitmap(ll, &ic);
+	if (r)
+		return r;
+
+	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+	r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
+
+	exit_inc_context(ll, &ic);
+
+	if (r)
+		return r;
+
+	return ll->save_ie(ll, index, &ic.ie_disk);
+}
+
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+	      int32_t *nr_allocations)
+{
+	*nr_allocations = 0;
+	while (b != e) {
+		int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
+				struct inc_context *ic)
+{
+	reset_inc_context(ll, ic);
+	return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
+			       &b, &ll->ref_count_root);
+}
+
+static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+				struct inc_context *ic, uint32_t *old_rc)
+{
+	int r;
+	int index = -1;
+	struct btree_node *n;
+	__le32 *v_ptr;
+	uint32_t rc;
+
+	reset_inc_context(ll, ic);
+	r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+				     b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+	if (r < 0)
+		return r;
+
+	n = dm_block_data(ic->overflow_leaf);
+
+	if (!contains_key(n, b, index)) {
+		DMERR("overflow btree is missing an entry");
+		return -EINVAL;
+	}
+
+	v_ptr = value_ptr(n, index);
+	rc = le32_to_cpu(*v_ptr);
+	*old_rc = rc;
+
+	if (rc == 3) {
+		return __sm_ll_del_overflow(ll, b, ic);
+	} else {
+		rc--;
+		*v_ptr = cpu_to_le32(rc);
+		return 0;
+	}
+}
+
+static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+			      struct inc_context *ic, uint32_t *old_rc)
+{
+	/*
+	 * Do we already have the correct overflow leaf?
+	 */
+	if (ic->overflow_leaf) {
+		int index;
+		struct btree_node *n;
+		__le32 *v_ptr;
+		uint32_t rc;
+
+		n = dm_block_data(ic->overflow_leaf);
+		index = lower_bound(n, b);
+		if (contains_key(n, b, index)) {
+			v_ptr = value_ptr(n, index);
+			rc = le32_to_cpu(*v_ptr);
+			*old_rc = rc;
+
+			if (rc > 3) {
+				rc--;
+				*v_ptr = cpu_to_le32(rc);
+				return 0;
+			} else {
+				return __sm_ll_del_overflow(ll, b, ic);
+			}
+
+		}
+	}
+
+	return __sm_ll_dec_overflow(ll, b, ic, old_rc);
+}
+
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
+				   uint32_t bit, uint32_t bit_end,
+				   struct inc_context *ic,
+				   int32_t *nr_allocations, dm_block_t *new_b)
+{
+	int r;
+	uint32_t old;
+
+	for (; bit != bit_end; bit++, b++) {
+		/*
+		 * We only need to drop the bitmap if we need to find a new btree
+		 * leaf for the overflow.  So if it was dropped last iteration,
+		 * we now re-get it.
+		 */
+		r = ensure_bitmap(ll, ic);
+		if (r)
+			return r;
+
+		old = sm_lookup_bitmap(ic->bitmap, bit);
+		switch (old) {
+		case 0:
+			DMERR("unable to decrement block");
+			return -EINVAL;
+
+		case 1:
+			/* dec bitmap */
+			sm_set_bitmap(ic->bitmap, bit, 0);
+			(*nr_allocations)--;
+			ll->nr_allocated--;
+			le32_add_cpu(&ic->ie_disk.nr_free, 1);
+			ic->ie_disk.none_free_before =
+				cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
+			break;
+
+		case 2:
+			/* dec bitmap and insert into overflow */
+			sm_set_bitmap(ic->bitmap, bit, 1);
+			break;
+
+		case 3:
+			r = sm_ll_dec_overflow(ll, b, ic, &old);
+			if (r < 0)
+				return r;
+
+			if (old == 3) {
+				r = ensure_bitmap(ll, ic);
+				if (r)
+					return r;
+
+				sm_set_bitmap(ic->bitmap, bit, 2);
+			}
+			break;
+		}
+	}
+
+	*new_b = b;
+	return 0;
+}
+
+static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+		       int32_t *nr_allocations, dm_block_t *new_b)
+{
+	int r;
+	uint32_t bit, bit_end;
+	struct inc_context ic;
+	dm_block_t index = b;
+
+	init_inc_context(&ic);
+
+	bit = do_div(index, ll->entries_per_block);
+	r = ll->load_ie(ll, index, &ic.ie_disk);
+	if (r < 0)
+		return r;
+
+	r = shadow_bitmap(ll, &ic);
+	if (r)
+		return r;
+
+	bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+	r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
+	exit_inc_context(ll, &ic);
+
+	if (r)
+		return r;
+
+	return ll->save_ie(ll, index, &ic.ie_disk);
+}
+
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+	      int32_t *nr_allocations)
+{
+	*nr_allocations = 0;
+	while (b != e) {
+		int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/*----------------------------------------------------------------*/
+
 int sm_ll_commit(struct ll_disk *ll)
 {
 	int r = 0;
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h
index 87e17909ef52..4a22183e78b7 100644
--- a/drivers/md/persistent-data/dm-space-map-common.h
+++ b/drivers/md/persistent-data/dm-space-map-common.h
@@ -96,12 +96,6 @@ struct disk_bitmap_header {
 	__le64 blocknr;
 } __attribute__ ((packed, aligned(8)));
 
-enum allocation_event {
-	SM_NONE,
-	SM_ALLOC,
-	SM_FREE,
-};
-
 /*----------------------------------------------------------------*/
 
 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks);
@@ -111,9 +105,15 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
 			  dm_block_t end, dm_block_t *result);
 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
 	                         dm_block_t begin, dm_block_t end, dm_block_t *result);
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev);
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
+
+/*
+ * The next three functions return (via nr_allocations) the net number of
+ * allocations that were made.  This number may be negative if there were
+ * more frees than allocs.
+ */
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, int32_t *nr_allocations);
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
 int sm_ll_commit(struct ll_disk *ll);
 
 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm);
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index 4f8069bb0481..d0a8d5e73c28 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -87,76 +87,39 @@ static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
 			     uint32_t count)
 {
 	int r;
-	uint32_t old_count;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	r = sm_ll_insert(&smd->ll, b, count, &ev);
+	r = sm_ll_insert(&smd->ll, b, count, &nr_allocations);
 	if (!r) {
-		switch (ev) {
-		case SM_NONE:
-			break;
-
-		case SM_ALLOC:
-			/*
-			 * This _must_ be free in the prior transaction
-			 * otherwise we've lost atomicity.
-			 */
-			smd->nr_allocated_this_transaction++;
-			break;
-
-		case SM_FREE:
-			/*
-			 * It's only free if it's also free in the last
-			 * transaction.
-			 */
-			r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-			if (r)
-				return r;
-
-			if (!old_count)
-				smd->nr_allocated_this_transaction--;
-			break;
-		}
+		smd->nr_allocated_this_transaction += nr_allocations;
 	}
 
 	return r;
 }
 
-static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	r = sm_ll_inc(&smd->ll, b, &ev);
-	if (!r && (ev == SM_ALLOC))
-		/*
-		 * This _must_ be free in the prior transaction
-		 * otherwise we've lost atomicity.
-		 */
-		smd->nr_allocated_this_transaction++;
+	r = sm_ll_inc(&smd->ll, b, e, &nr_allocations);
+	if (!r)
+		smd->nr_allocated_this_transaction += nr_allocations;
 
 	return r;
 }
 
-static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r;
-	uint32_t old_count;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
-	r = sm_ll_dec(&smd->ll, b, &ev);
-	if (!r && (ev == SM_FREE)) {
-		/*
-		 * It's only free if it's also free in the last
-		 * transaction.
-		 */
-		r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-		if (!r && !old_count)
-			smd->nr_allocated_this_transaction--;
-	}
+	r = sm_ll_dec(&smd->ll, b, e, &nr_allocations);
+	if (!r)
+		smd->nr_allocated_this_transaction += nr_allocations;
 
 	return r;
 }
@@ -164,7 +127,7 @@ static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
 static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 {
 	int r;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 
 	/*
@@ -183,10 +146,9 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 		return r;
 
 	smd->begin = *b + 1;
-	r = sm_ll_inc(&smd->ll, *b, &ev);
+	r = sm_ll_inc(&smd->ll, *b, *b + 1, &nr_allocations);
 	if (!r) {
-		BUG_ON(ev != SM_ALLOC);
-		smd->nr_allocated_this_transaction++;
+		smd->nr_allocated_this_transaction += nr_allocations;
 	}
 
 	return r;
@@ -242,8 +204,8 @@ static struct dm_space_map ops = {
 	.get_count = sm_disk_get_count,
 	.count_is_more_than_one = sm_disk_count_is_more_than_one,
 	.set_count = sm_disk_set_count,
-	.inc_block = sm_disk_inc_block,
-	.dec_block = sm_disk_dec_block,
+	.inc_blocks = sm_disk_inc_blocks,
+	.dec_blocks = sm_disk_dec_blocks,
 	.new_block = sm_disk_new_block,
 	.commit = sm_disk_commit,
 	.root_size = sm_disk_root_size,
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index da439ac85796..392ae26134a4 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -89,7 +89,8 @@ enum block_op_type {
 
 struct block_op {
 	enum block_op_type type;
-	dm_block_t block;
+	dm_block_t b;
+	dm_block_t e;
 };
 
 struct bop_ring_buffer {
@@ -116,7 +117,7 @@ static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
 }
 
 static int brb_push(struct bop_ring_buffer *brb,
-		    enum block_op_type type, dm_block_t b)
+		    enum block_op_type type, dm_block_t b, dm_block_t e)
 {
 	struct block_op *bop;
 	unsigned next = brb_next(brb, brb->end);
@@ -130,7 +131,8 @@ static int brb_push(struct bop_ring_buffer *brb,
 
 	bop = brb->bops + brb->end;
 	bop->type = type;
-	bop->block = b;
+	bop->b = b;
+	bop->e = e;
 
 	brb->end = next;
 
@@ -145,9 +147,7 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
 		return -ENODATA;
 
 	bop = brb->bops + brb->begin;
-	result->type = bop->type;
-	result->block = bop->block;
-
+	memcpy(result, bop, sizeof(*result));
 	return 0;
 }
 
@@ -178,10 +178,9 @@ struct sm_metadata {
 	struct threshold threshold;
 };
 
-static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
+static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
 {
-	int r = brb_push(&smm->uncommitted, type, b);
-
+	int r = brb_push(&smm->uncommitted, type, b, e);
 	if (r) {
 		DMERR("too many recursive allocations");
 		return -ENOMEM;
@@ -193,15 +192,15 @@ static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t
 static int commit_bop(struct sm_metadata *smm, struct block_op *op)
 {
 	int r = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 
 	switch (op->type) {
 	case BOP_INC:
-		r = sm_ll_inc(&smm->ll, op->block, &ev);
+		r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations);
 		break;
 
 	case BOP_DEC:
-		r = sm_ll_dec(&smm->ll, op->block, &ev);
+		r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations);
 		break;
 	}
 
@@ -314,7 +313,7 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
 	     i = brb_next(&smm->uncommitted, i)) {
 		struct block_op *op = smm->uncommitted.bops + i;
 
-		if (op->block != b)
+		if (b < op->b || b >= op->e)
 			continue;
 
 		switch (op->type) {
@@ -355,7 +354,7 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
 
 		struct block_op *op = smm->uncommitted.bops + i;
 
-		if (op->block != b)
+		if (b < op->b || b >= op->e)
 			continue;
 
 		switch (op->type) {
@@ -393,7 +392,7 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
 				 uint32_t count)
 {
 	int r, r2;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	if (smm->recursion_count) {
@@ -402,40 +401,42 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
 	}
 
 	in(smm);
-	r = sm_ll_insert(&smm->ll, b, count, &ev);
+	r = sm_ll_insert(&smm->ll, b, count, &nr_allocations);
 	r2 = out(smm);
 
 	return combine_errors(r, r2);
 }
 
-static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r, r2 = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-	if (recursing(smm))
-		r = add_bop(smm, BOP_INC, b);
-	else {
+	if (recursing(smm)) {
+		r = add_bop(smm, BOP_INC, b, e);
+		if (r)
+			return r;
+	} else {
 		in(smm);
-		r = sm_ll_inc(&smm->ll, b, &ev);
+		r = sm_ll_inc(&smm->ll, b, e, &nr_allocations);
 		r2 = out(smm);
 	}
 
 	return combine_errors(r, r2);
 }
 
-static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
 	int r, r2 = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	if (recursing(smm))
-		r = add_bop(smm, BOP_DEC, b);
+		r = add_bop(smm, BOP_DEC, b, e);
 	else {
 		in(smm);
-		r = sm_ll_dec(&smm->ll, b, &ev);
+		r = sm_ll_dec(&smm->ll, b, e, &nr_allocations);
 		r2 = out(smm);
 	}
 
@@ -445,7 +446,7 @@ static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
 {
 	int r, r2 = 0;
-	enum allocation_event ev;
+	int32_t nr_allocations;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	/*
@@ -466,10 +467,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
 	smm->begin = *b + 1;
 
 	if (recursing(smm))
-		r = add_bop(smm, BOP_INC, *b);
+		r = add_bop(smm, BOP_INC, *b, *b + 1);
 	else {
 		in(smm);
-		r = sm_ll_inc(&smm->ll, *b, &ev);
+		r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations);
 		r2 = out(smm);
 	}
 
@@ -563,8 +564,8 @@ static const struct dm_space_map ops = {
 	.get_count = sm_metadata_get_count,
 	.count_is_more_than_one = sm_metadata_count_is_more_than_one,
 	.set_count = sm_metadata_set_count,
-	.inc_block = sm_metadata_inc_block,
-	.dec_block = sm_metadata_dec_block,
+	.inc_blocks = sm_metadata_inc_blocks,
+	.dec_blocks = sm_metadata_dec_blocks,
 	.new_block = sm_metadata_new_block,
 	.commit = sm_metadata_commit,
 	.root_size = sm_metadata_root_size,
@@ -648,18 +649,28 @@ static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
 	return 0;
 }
 
-static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
+	int r;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-	return add_bop(smm, BOP_INC, b);
+	r = add_bop(smm, BOP_INC, b, e);
+	if (r)
+		return r;
+
+	return 0;
 }
 
-static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
 {
+	int r;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-	return add_bop(smm, BOP_DEC, b);
+	r = add_bop(smm, BOP_DEC, b, e);
+	if (r)
+		return r;
+
+	return 0;
 }
 
 static int sm_bootstrap_commit(struct dm_space_map *sm)
@@ -690,8 +701,8 @@ static const struct dm_space_map bootstrap_ops = {
 	.get_count = sm_bootstrap_get_count,
 	.count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
 	.set_count = sm_bootstrap_set_count,
-	.inc_block = sm_bootstrap_inc_block,
-	.dec_block = sm_bootstrap_dec_block,
+	.inc_blocks = sm_bootstrap_inc_blocks,
+	.dec_blocks = sm_bootstrap_dec_blocks,
 	.new_block = sm_bootstrap_new_block,
 	.commit = sm_bootstrap_commit,
 	.root_size = sm_bootstrap_root_size,
@@ -703,7 +714,7 @@ static const struct dm_space_map bootstrap_ops = {
 
 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 {
-	int r, i;
+	int r;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 	dm_block_t old_len = smm->ll.nr_blocks;
 
@@ -725,9 +736,7 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 	 * allocate any new blocks.
 	 */
 	do {
-		for (i = old_len; !r && i < smm->begin; i++)
-			r = add_bop(smm, BOP_INC, i);
-
+		r = add_bop(smm, BOP_INC, old_len, smm->begin);
 		if (r)
 			goto out;
 
@@ -774,7 +783,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
 			  dm_block_t superblock)
 {
 	int r;
-	dm_block_t i;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	smm->begin = superblock + 1;
@@ -799,9 +807,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
 	 * Now we need to update the newly created data structures with the
 	 * allocated blocks that they were built from.
 	 */
-	for (i = superblock; !r && i < smm->begin; i++)
-		r = add_bop(smm, BOP_INC, i);
-
+	r = add_bop(smm, BOP_INC, superblock, smm->begin);
 	if (r)
 		return r;
 
diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h
index 3e6d1153b7c4..a015cd11f6e9 100644
--- a/drivers/md/persistent-data/dm-space-map.h
+++ b/drivers/md/persistent-data/dm-space-map.h
@@ -46,8 +46,8 @@ struct dm_space_map {
 
 	int (*commit)(struct dm_space_map *sm);
 
-	int (*inc_block)(struct dm_space_map *sm, dm_block_t b);
-	int (*dec_block)(struct dm_space_map *sm, dm_block_t b);
+	int (*inc_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
+	int (*dec_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
 
 	/*
 	 * new_block will increment the returned block.
@@ -117,14 +117,24 @@ static inline int dm_sm_commit(struct dm_space_map *sm)
 	return sm->commit(sm);
 }
 
+static inline int dm_sm_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+	return sm->inc_blocks(sm, b, e);
+}
+
 static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b)
 {
-	return sm->inc_block(sm, b);
+	return dm_sm_inc_blocks(sm, b, b + 1);
+}
+
+static inline int dm_sm_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+	return sm->dec_blocks(sm, b, e);
 }
 
 static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b)
 {
-	return sm->dec_block(sm, b);
+	return dm_sm_dec_blocks(sm, b, b + 1);
 }
 
 static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b)
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 4353e1146d73..16643fc974e8 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -359,6 +359,17 @@ void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
 }
 EXPORT_SYMBOL_GPL(dm_tm_inc);
 
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+	/*
+	 * The non-blocking clone doesn't support this.
+	 */
+	BUG_ON(tm->is_clone);
+
+	dm_sm_inc_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_inc_range);
+
 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
 {
 	/*
@@ -370,6 +381,47 @@ void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
 }
 EXPORT_SYMBOL_GPL(dm_tm_dec);
 
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+	/*
+	 * The non-blocking clone doesn't support this.
+	 */
+	BUG_ON(tm->is_clone);
+
+	dm_sm_dec_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_dec_range);
+
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+		     const __le64 *value_le, unsigned count, dm_tm_run_fn fn)
+{
+	uint64_t b, begin, end;
+	bool in_run = false;
+	unsigned i;
+
+	for (i = 0; i < count; i++, value_le++) {
+		b = le64_to_cpu(*value_le);
+
+		if (in_run) {
+			if (b == end)
+				end++;
+			else {
+				fn(tm, begin, end);
+				begin = b;
+				end = b + 1;
+			}
+		} else {
+			in_run = true;
+			begin = b;
+			end = b + 1;
+		}
+	}
+
+	if (in_run)
+		fn(tm, begin, end);
+}
+EXPORT_SYMBOL_GPL(dm_tm_with_runs);
+
 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
 	      uint32_t *result)
 {
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index 3d75cc59bbb8..906c02ed0365 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -100,8 +100,18 @@ void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
  * Functions for altering the reference count of a block directly.
  */
 void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b);
-
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
 void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b);
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
+
+/*
+ * Builds up runs of adjacent blocks, and then calls the given fn
+ * (typically dm_tm_inc/dec).  Very useful when you have to perform
+ * the same tm operation on all values in a btree leaf.
+ */
+typedef void (*dm_tm_run_fn)(struct dm_transaction_manager *, dm_block_t, dm_block_t);
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+		     const __le64 *value_le, unsigned count, dm_tm_run_fn fn);
 
 int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result);
 

From 6b06dd5a972288d011a49d63eb9f6a5003d2e932 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Tue, 13 Apr 2021 13:09:32 +0100
Subject: [PATCH 0978/3292] dm space map disk: cache a small number of index
 entries

The disk space map stores it's index entries in a btree, these are
accessed very frequently, so having a few cached makes a big difference
to performance.

With this change provisioning a new block takes roughly 20% less cpu.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 .../md/persistent-data/dm-space-map-common.c  | 86 +++++++++++++++++--
 .../md/persistent-data/dm-space-map-common.h  | 16 ++++
 2 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 5552941912af..4a6a2a9b4eb4 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -7,6 +7,7 @@
 #include "dm-space-map-common.h"
 #include "dm-transaction-manager.h"
 #include "dm-btree-internal.h"
+#include "dm-persistent-data-internal.h"
 
 #include <linux/bitops.h>
 #include <linux/device-mapper.h>
@@ -1083,28 +1084,92 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
 
 /*----------------------------------------------------------------*/
 
+static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
+{
+	iec->dirty = false;
+	__dm_bless_for_disk(iec->ie);
+	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
+			       &iec->index, &iec->ie, &ll->bitmap_root);
+}
+
+static inline unsigned hash_index(dm_block_t index)
+{
+	return dm_hash_block(index, IE_CACHE_MASK);
+}
+
 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
 			   struct disk_index_entry *ie)
 {
-	return dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
+	int r;
+	unsigned h = hash_index(index);
+	struct ie_cache *iec = ll->ie_cache + h;
+
+	if (iec->valid) {
+		if (iec->index == index) {
+			memcpy(ie, &iec->ie, sizeof(*ie));
+			return 0;
+		}
+
+		if (iec->dirty) {
+			r = ie_cache_writeback(ll, iec);
+			if (r)
+				return r;
+		}
+	}
+
+	r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
+	if (!r) {
+		iec->valid = true;
+		iec->dirty = false;
+		iec->index = index;
+		memcpy(&iec->ie, ie, sizeof(*ie));
+	}
+
+	return r;
 }
 
 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
 			   struct disk_index_entry *ie)
 {
-	__dm_bless_for_disk(ie);
-	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
-			       &index, ie, &ll->bitmap_root);
+	int r;
+	unsigned h = hash_index(index);
+	struct ie_cache *iec = ll->ie_cache + h;
+
+	ll->bitmap_index_changed = true;
+	if (iec->valid) {
+		if (iec->index == index) {
+			memcpy(&iec->ie, ie, sizeof(*ie));
+			iec->dirty = true;
+			return 0;
+		}
+
+		if (iec->dirty) {
+			r = ie_cache_writeback(ll, iec);
+			if (r)
+				return r;
+		}
+	}
+
+	iec->valid = true;
+	iec->dirty = true;
+	iec->index = index;
+	memcpy(&iec->ie, ie, sizeof(*ie));
+	return 0;
 }
 
 static int disk_ll_init_index(struct ll_disk *ll)
 {
+	unsigned i;
+	for (i = 0; i < IE_CACHE_SIZE; i++) {
+		struct ie_cache *iec = ll->ie_cache + i;
+		iec->valid = false;
+		iec->dirty = false;
+	}
 	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
 }
 
 static int disk_ll_open(struct ll_disk *ll)
 {
-	/* nothing to do */
 	return 0;
 }
 
@@ -1115,7 +1180,16 @@ static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
 
 static int disk_ll_commit(struct ll_disk *ll)
 {
-	return 0;
+	int r = 0;
+	unsigned i;
+
+	for (i = 0; i < IE_CACHE_SIZE; i++) {
+		struct ie_cache *iec = ll->ie_cache + i;
+		if (iec->valid && iec->dirty)
+			r = ie_cache_writeback(ll, iec);
+	}
+
+	return r;
 }
 
 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h
index 4a22183e78b7..706ceb85d680 100644
--- a/drivers/md/persistent-data/dm-space-map-common.h
+++ b/drivers/md/persistent-data/dm-space-map-common.h
@@ -54,6 +54,20 @@ typedef int (*open_index_fn)(struct ll_disk *ll);
 typedef dm_block_t (*max_index_entries_fn)(struct ll_disk *ll);
 typedef int (*commit_fn)(struct ll_disk *ll);
 
+/*
+ * A lot of time can be wasted reading and writing the same
+ * index entry.  So we cache a few entries.
+ */
+#define IE_CACHE_SIZE 64
+#define IE_CACHE_MASK (IE_CACHE_SIZE - 1)
+
+struct ie_cache {
+	bool valid;
+	bool dirty;
+	dm_block_t index;
+	struct disk_index_entry ie;
+};
+
 struct ll_disk {
 	struct dm_transaction_manager *tm;
 	struct dm_btree_info bitmap_info;
@@ -79,6 +93,8 @@ struct ll_disk {
 	max_index_entries_fn max_entries;
 	commit_fn commit;
 	bool bitmap_index_changed:1;
+
+	struct ie_cache ie_cache[IE_CACHE_SIZE];
 };
 
 struct disk_sm_root {

From db2351eb22e42c5e29ce0caa967a10bb34efabb5 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 26 May 2021 10:16:01 -0400
Subject: [PATCH 0979/3292] dm kcopyd: avoid useless atomic operations

The functions set_bit and clear_bit are atomic. We don't need
atomicity when making flags for dm-kcopyd. So, change them to direct
manipulation of the flags.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-kcopyd.c        | 20 ++++++++++----------
 drivers/md/dm-raid1.c         |  2 +-
 drivers/md/dm-zoned-reclaim.c |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 1bbe4a34ef4c..d85a65a4274e 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -341,7 +341,7 @@ static void client_free_pages(struct dm_kcopyd_client *kc)
 struct kcopyd_job {
 	struct dm_kcopyd_client *kc;
 	struct list_head list;
-	unsigned long flags;
+	unsigned flags;
 
 	/*
 	 * Error state of the job.
@@ -418,7 +418,7 @@ static struct kcopyd_job *pop_io_job(struct list_head *jobs,
 	 * constraint and sequential writes that are at the right position.
 	 */
 	list_for_each_entry(job, jobs, list) {
-		if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
+		if (job->rw == READ || !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
 			list_del(&job->list);
 			return job;
 		}
@@ -525,7 +525,7 @@ static void complete_io(unsigned long error, void *context)
 		else
 			job->read_err = 1;
 
-		if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
+		if (!(job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))) {
 			push(&kc->complete_jobs, job);
 			wake(kc);
 			return;
@@ -565,7 +565,7 @@ static int run_io_job(struct kcopyd_job *job)
 	 * If we need to write sequentially and some reads or writes failed,
 	 * no point in continuing.
 	 */
-	if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
+	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
 	    job->master_job->write_err) {
 		job->write_err = job->master_job->write_err;
 		return -EIO;
@@ -709,7 +709,7 @@ static void segment_complete(int read_err, unsigned long write_err,
 	 * Only dispatch more work if there hasn't been an error.
 	 */
 	if ((!job->read_err && !job->write_err) ||
-	    test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
+	    job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) {
 		/* get the next chunk of work */
 		progress = job->progress;
 		count = job->source.count - progress;
@@ -801,10 +801,10 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 	 * we need to write sequentially. If one of the destination is a
 	 * host-aware device, then leave it to the caller to choose what to do.
 	 */
-	if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) {
+	if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
 		for (i = 0; i < job->num_dests; i++) {
 			if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) {
-				set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags);
+				job->flags |= BIT(DM_KCOPYD_WRITE_SEQ);
 				break;
 			}
 		}
@@ -813,9 +813,9 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 	/*
 	 * If we need to write sequentially, errors cannot be ignored.
 	 */
-	if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
-	    test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags))
-		clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags);
+	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
+	    job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))
+		job->flags &= ~BIT(DM_KCOPYD_IGNORE_ERROR);
 
 	if (from) {
 		job->source = *from;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index b0a82f29a2e4..ebb4810cc3b4 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -364,7 +364,7 @@ static void recover(struct mirror_set *ms, struct dm_region *reg)
 
 	/* hand to kcopyd */
 	if (!errors_handled(ms))
-		set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
+		flags |= BIT(DM_KCOPYD_IGNORE_ERROR);
 
 	dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
 		       flags, recovery_complete, reg);
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index 9c0ecc9568a4..d58db9a27e6c 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -134,7 +134,7 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
 	dst_zone_block = dmz_start_block(zmd, dst_zone);
 
 	if (dmz_is_seq(dst_zone))
-		set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
+		flags |= BIT(DM_KCOPYD_WRITE_SEQ);
 
 	while (block < end_block) {
 		if (src_zone->dev->flags & DMZ_BDEV_DYING)

From 6bcd658f2a2a13fb63c38fc018e1ab210396aefc Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 26 May 2021 10:18:06 -0400
Subject: [PATCH 0980/3292] dm kcopyd: avoid spin_lock_irqsave from process
 context

The functions "pop", "push_head", "do_work" can only be called from
process context. Therefore, replace spin_lock_irq{save,restore} with
spin_{lock,unlock}_irq.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-kcopyd.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index d85a65a4274e..e50625ce74ec 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -437,9 +437,8 @@ static struct kcopyd_job *pop(struct list_head *jobs,
 			      struct dm_kcopyd_client *kc)
 {
 	struct kcopyd_job *job = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&kc->job_lock, flags);
+	spin_lock_irq(&kc->job_lock);
 
 	if (!list_empty(jobs)) {
 		if (jobs == &kc->io_jobs)
@@ -449,7 +448,7 @@ static struct kcopyd_job *pop(struct list_head *jobs,
 			list_del(&job->list);
 		}
 	}
-	spin_unlock_irqrestore(&kc->job_lock, flags);
+	spin_unlock_irq(&kc->job_lock);
 
 	return job;
 }
@@ -467,12 +466,11 @@ static void push(struct list_head *jobs, struct kcopyd_job *job)
 
 static void push_head(struct list_head *jobs, struct kcopyd_job *job)
 {
-	unsigned long flags;
 	struct dm_kcopyd_client *kc = job->kc;
 
-	spin_lock_irqsave(&kc->job_lock, flags);
+	spin_lock_irq(&kc->job_lock);
 	list_add(&job->list, jobs);
-	spin_unlock_irqrestore(&kc->job_lock, flags);
+	spin_unlock_irq(&kc->job_lock);
 }
 
 /*
@@ -648,7 +646,6 @@ static void do_work(struct work_struct *work)
 	struct dm_kcopyd_client *kc = container_of(work,
 					struct dm_kcopyd_client, kcopyd_work);
 	struct blk_plug plug;
-	unsigned long flags;
 
 	/*
 	 * The order that these are called is *very* important.
@@ -657,9 +654,9 @@ static void do_work(struct work_struct *work)
 	 * list.  io jobs call wake when they complete and it all
 	 * starts again.
 	 */
-	spin_lock_irqsave(&kc->job_lock, flags);
+	spin_lock_irq(&kc->job_lock);
 	list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
-	spin_unlock_irqrestore(&kc->job_lock, flags);
+	spin_unlock_irq(&kc->job_lock);
 
 	blk_start_plug(&plug);
 	process_jobs(&kc->complete_jobs, kc, run_complete_job);

From ee50cc19d80e9b9a8283d1fb517a778faf2f6899 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 26 May 2021 10:29:45 -0400
Subject: [PATCH 0981/3292] dm writecache: don't split bios when overwriting
 contiguous cache content

If dm-writecache overwrites existing cached data, it splits the
incoming bio into many block-sized bios. The I/O scheduler does merge
these bios into one large request but this needless splitting and
merging causes performance degradation.

Fix this by avoiding bio splitting if the cache target area that is
being overwritten is contiguous.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index aecc246ade26..a44007297e63 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1360,14 +1360,18 @@ read_next_block:
 	} else {
 		do {
 			bool found_entry = false;
+			bool search_used = false;
 			if (writecache_has_error(wc))
 				goto unlock_error;
 			e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
 			if (e) {
-				if (!writecache_entry_is_committed(wc, e))
+				if (!writecache_entry_is_committed(wc, e)) {
+					search_used = true;
 					goto bio_copy;
+				}
 				if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
 					wc->overwrote_committed = true;
+					search_used = true;
 					goto bio_copy;
 				}
 				found_entry = true;
@@ -1404,13 +1408,31 @@ bio_copy:
 				sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
 
 				while (bio_size < bio->bi_iter.bi_size) {
-					struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
-					if (!f)
-						break;
-					write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
-									(bio_size >> SECTOR_SHIFT), wc->seq_count);
-					writecache_insert_entry(wc, f);
-					wc->uncommitted_blocks++;
+					if (!search_used) {
+						struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
+						if (!f)
+							break;
+						write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
+										(bio_size >> SECTOR_SHIFT), wc->seq_count);
+						writecache_insert_entry(wc, f);
+						wc->uncommitted_blocks++;
+					} else {
+						struct wc_entry *f;
+						struct rb_node *next = rb_next(&e->rb_node);
+						if (!next)
+							break;
+						f = container_of(next, struct wc_entry, rb_node);
+						if (f != e + 1)
+							break;
+						if (read_original_sector(wc, f) !=
+						    read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
+							break;
+						if (unlikely(f->write_in_progress))
+							break;
+						if (writecache_entry_is_committed(wc, f))
+							wc->overwrote_committed = true;
+						e = f;
+					}
 					bio_size += wc->block_size;
 					current_cache_sec += wc->block_size >> SECTOR_SHIFT;
 				}

From af4f6cabcc5a2449e6b7663d45227bfcb6b725ec Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 26 May 2021 15:49:03 -0400
Subject: [PATCH 0982/3292] dm writecache: interrupt writeback if suspended

If the DM device is suspended, interrupt the writeback sequence so
that there is no excessive suspend delay.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index a44007297e63..ea9f0d8fff1d 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1844,8 +1844,9 @@ restart:
 
 		n_walked++;
 		if (unlikely(n_walked > WRITEBACK_LATENCY) &&
-		    likely(!wc->writeback_all) && likely(!dm_suspended(wc->ti))) {
-			queue_work(wc->writeback_wq, &wc->writeback_work);
+		    likely(!wc->writeback_all)) {
+			if (likely(!dm_suspended(wc->ti)))
+				queue_work(wc->writeback_wq, &wc->writeback_work);
 			break;
 		}
 

From ccde2cbfa31c4d41818a493c1126df05336f8c5a Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Wed, 26 May 2021 23:06:37 +0200
Subject: [PATCH 0983/3292] dm table: Constify static struct blk_ksm_ll_ops

The only usage of dm_ksm_ll_ops is to make a copy of it to the ksm_ll_ops
field in the blk_keyslot_manager struct. Make it const to allow the
compiler to put it in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ee47a332b462..7e88e5e06922 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1244,7 +1244,7 @@ static int dm_keyslot_evict(struct blk_keyslot_manager *ksm,
 	return args.err;
 }
 
-static struct blk_ksm_ll_ops dm_ksm_ll_ops = {
+static const struct blk_ksm_ll_ops dm_ksm_ll_ops = {
 	.keyslot_evict = dm_keyslot_evict,
 };
 

From bab68499428ed934f0493ac74197ed6f36204260 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 19 May 2021 10:26:16 +0900
Subject: [PATCH 0984/3292] dm zoned: check zone capacity

The dm-zoned target cannot support zoned block devices with zones that
have a capacity smaller than the zone size (e.g. NVMe zoned namespaces)
due to the current chunk zone mapping implementation as it is assumed
that zones and chunks have the same size with all blocks usable.
If a zoned drive is found to have zones with a capacity different from
the zone size, fail the target initialization.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: stable@vger.kernel.org # v5.9+
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-zoned-metadata.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 039d17b28938..ee4626d08557 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1390,6 +1390,13 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data)
 		return -ENXIO;
 	}
 
+	/*
+	 * Devices that have zones with a capacity smaller than the zone size
+	 * (e.g. NVMe zoned namespaces) are not supported.
+	 */
+	if (blkz->capacity != blkz->len)
+		return -ENXIO;
+
 	switch (blkz->type) {
 	case BLK_ZONE_TYPE_CONVENTIONAL:
 		set_bit(DMZ_RND, &zone->flags);

From 6842d264aa5205da338b6dcc6acfa2a6732558f1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:54 +0900
Subject: [PATCH 0985/3292] dm: Fix dm_accept_partial_bio() relative to zone
 management commands

Fix dm_accept_partial_bio() to actually check that zone management
commands are not passed as explained in the function documentation
comment. Also, since a zone append operation cannot be split, add
REQ_OP_ZONE_APPEND as a forbidden command.

White lines are added around the group of BUG_ON() calls to make the
code more legible.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ca2aedd8ee7d..11af20080639 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1237,8 +1237,8 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
 
 /*
  * A target may call dm_accept_partial_bio only from the map routine.  It is
- * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
- * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH.
+ * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
+ * operations and REQ_OP_ZONE_APPEND (zone append writes).
  *
  * dm_accept_partial_bio informs the dm that the target only wants to process
  * additional n_sectors sectors of the bio and the rest of the data should be
@@ -1268,9 +1268,13 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
 {
 	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
 	unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+
 	BUG_ON(bio->bi_opf & REQ_PREFLUSH);
+	BUG_ON(op_is_zone_mgmt(bio_op(bio)));
+	BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND);
 	BUG_ON(bi_size > *tio->len_ptr);
 	BUG_ON(n_sectors > bi_size);
+
 	*tio->len_ptr -= bi_size - n_sectors;
 	bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
 }

From dd73c320ec3089149b802a1316321c3e0f6a6aaf Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:55 +0900
Subject: [PATCH 0986/3292] dm: cleanup device_area_is_invalid()

In device_area_is_invalid(), use bdev_is_zoned() instead of open
coding the test on the zoned model returned by bdev_zoned_model().

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7e88e5e06922..123d1a3a358e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -249,7 +249,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 	 * If the target is mapped to zoned block device(s), check
 	 * that the zones are not partially mapped.
 	 */
-	if (bdev_zoned_model(bdev) != BLK_ZONED_NONE) {
+	if (bdev_is_zoned(bdev)) {
 		unsigned int zone_sectors = bdev_zone_sectors(bdev);
 
 		if (start & (zone_sectors - 1)) {

From 7fc18728482b1a29bd7b8439a0ae7b3f23e097d1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:56 +0900
Subject: [PATCH 0987/3292] dm: move zone related code to dm-zone.c

Move core and table code used for zoned targets and conditionally
defined with #ifdef CONFIG_BLK_DEV_ZONED to the new file dm-zone.c.
This file is conditionally compiled depending on CONFIG_BLK_DEV_ZONED.
The small helper dm_set_zones_restrictions() is introduced to
initialize a mapped device request queue zone attributes in
dm_table_set_restrictions().

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/Makefile   |   4 ++
 drivers/md/dm-table.c |  14 ++----
 drivers/md/dm-zone.c  | 101 ++++++++++++++++++++++++++++++++++++++++++
 drivers/md/dm.c       |  78 --------------------------------
 drivers/md/dm.h       |  11 +++++
 5 files changed, 119 insertions(+), 89 deletions(-)
 create mode 100644 drivers/md/dm-zone.c

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index ef7ddc27685c..a74aaf8b1445 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -92,6 +92,10 @@ ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
 endif
 
+ifeq ($(CONFIG_BLK_DEV_ZONED),y)
+dm-mod-objs			+= dm-zone.o
+endif
+
 ifeq ($(CONFIG_DM_VERITY_FEC),y)
 dm-verity-objs			+= dm-verity-fec.o
 endif
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 123d1a3a358e..1134ceed800f 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2064,17 +2064,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	    dm_table_any_dev_attr(t, device_is_not_random, NULL))
 		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
 
-	/*
-	 * For a zoned target, the number of zones should be updated for the
-	 * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
-	 * target, this is all that is needed.
-	 */
-#ifdef CONFIG_BLK_DEV_ZONED
-	if (blk_queue_is_zoned(q)) {
-		WARN_ON_ONCE(queue_is_mq(q));
-		q->nr_zones = blkdev_nr_zones(t->md->disk);
-	}
-#endif
+	/* For a zoned target, setup the zones related queue attributes */
+	if (blk_queue_is_zoned(q))
+		dm_set_zones_restrictions(t, q);
 
 	dm_update_keyslot_manager(q, t);
 	blk_queue_update_readahead(q);
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
new file mode 100644
index 000000000000..9a34d0f319fd
--- /dev/null
+++ b/drivers/md/dm-zone.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/blkdev.h>
+
+#include "dm-core.h"
+
+/*
+ * User facing dm device block device report zone operation. This calls the
+ * report_zones operation for each target of a device table. This operation is
+ * generally implemented by targets using dm_report_zones().
+ */
+int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
+			unsigned int nr_zones, report_zones_cb cb, void *data)
+{
+	struct mapped_device *md = disk->private_data;
+	struct dm_table *map;
+	int srcu_idx, ret;
+	struct dm_report_zones_args args = {
+		.next_sector = sector,
+		.orig_data = data,
+		.orig_cb = cb,
+	};
+
+	if (dm_suspended_md(md))
+		return -EAGAIN;
+
+	map = dm_get_live_table(md, &srcu_idx);
+	if (!map) {
+		ret = -EIO;
+		goto out;
+	}
+
+	do {
+		struct dm_target *tgt;
+
+		tgt = dm_table_find_target(map, args.next_sector);
+		if (WARN_ON_ONCE(!tgt->type->report_zones)) {
+			ret = -EIO;
+			goto out;
+		}
+
+		args.tgt = tgt;
+		ret = tgt->type->report_zones(tgt, &args,
+					      nr_zones - args.zone_idx);
+		if (ret < 0)
+			goto out;
+	} while (args.zone_idx < nr_zones &&
+		 args.next_sector < get_capacity(disk));
+
+	ret = args.zone_idx;
+out:
+	dm_put_live_table(md, srcu_idx);
+	return ret;
+}
+
+int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data)
+{
+	struct dm_report_zones_args *args = data;
+	sector_t sector_diff = args->tgt->begin - args->start;
+
+	/*
+	 * Ignore zones beyond the target range.
+	 */
+	if (zone->start >= args->start + args->tgt->len)
+		return 0;
+
+	/*
+	 * Remap the start sector and write pointer position of the zone
+	 * to match its position in the target range.
+	 */
+	zone->start += sector_diff;
+	if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
+		if (zone->cond == BLK_ZONE_COND_FULL)
+			zone->wp = zone->start + zone->len;
+		else if (zone->cond == BLK_ZONE_COND_EMPTY)
+			zone->wp = zone->start;
+		else
+			zone->wp += sector_diff;
+	}
+
+	args->next_sector = zone->start + zone->len;
+	return args->orig_cb(zone, args->zone_idx++, args->orig_data);
+}
+EXPORT_SYMBOL_GPL(dm_report_zones_cb);
+
+void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
+{
+	if (!blk_queue_is_zoned(q))
+		return;
+
+	/*
+	 * For a zoned target, the number of zones should be updated for the
+	 * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
+	 * target, this is all that is needed.
+	 */
+	WARN_ON_ONCE(queue_is_mq(q));
+	q->nr_zones = blkdev_nr_zones(t->md->disk);
+}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 11af20080639..c49976cc4e44 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -444,84 +444,6 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return dm_get_geometry(md, geo);
 }
 
-#ifdef CONFIG_BLK_DEV_ZONED
-int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data)
-{
-	struct dm_report_zones_args *args = data;
-	sector_t sector_diff = args->tgt->begin - args->start;
-
-	/*
-	 * Ignore zones beyond the target range.
-	 */
-	if (zone->start >= args->start + args->tgt->len)
-		return 0;
-
-	/*
-	 * Remap the start sector and write pointer position of the zone
-	 * to match its position in the target range.
-	 */
-	zone->start += sector_diff;
-	if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
-		if (zone->cond == BLK_ZONE_COND_FULL)
-			zone->wp = zone->start + zone->len;
-		else if (zone->cond == BLK_ZONE_COND_EMPTY)
-			zone->wp = zone->start;
-		else
-			zone->wp += sector_diff;
-	}
-
-	args->next_sector = zone->start + zone->len;
-	return args->orig_cb(zone, args->zone_idx++, args->orig_data);
-}
-EXPORT_SYMBOL_GPL(dm_report_zones_cb);
-
-static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
-		unsigned int nr_zones, report_zones_cb cb, void *data)
-{
-	struct mapped_device *md = disk->private_data;
-	struct dm_table *map;
-	int srcu_idx, ret;
-	struct dm_report_zones_args args = {
-		.next_sector = sector,
-		.orig_data = data,
-		.orig_cb = cb,
-	};
-
-	if (dm_suspended_md(md))
-		return -EAGAIN;
-
-	map = dm_get_live_table(md, &srcu_idx);
-	if (!map) {
-		ret = -EIO;
-		goto out;
-	}
-
-	do {
-		struct dm_target *tgt;
-
-		tgt = dm_table_find_target(map, args.next_sector);
-		if (WARN_ON_ONCE(!tgt->type->report_zones)) {
-			ret = -EIO;
-			goto out;
-		}
-
-		args.tgt = tgt;
-		ret = tgt->type->report_zones(tgt, &args,
-					      nr_zones - args.zone_idx);
-		if (ret < 0)
-			goto out;
-	} while (args.zone_idx < nr_zones &&
-		 args.next_sector < get_capacity(disk));
-
-	ret = args.zone_idx;
-out:
-	dm_put_live_table(md, srcu_idx);
-	return ret;
-}
-#else
-#define dm_blk_report_zones		NULL
-#endif /* CONFIG_BLK_DEV_ZONED */
-
 static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
 			    struct block_device **bdev)
 {
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index b441ad772c18..fdf1536a4b62 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -100,6 +100,17 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
  */
 #define dm_target_hybrid(t) (dm_target_bio_based(t) && dm_target_request_based(t))
 
+/*
+ * Zoned targets related functions.
+ */
+void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
+#ifdef CONFIG_BLK_DEV_ZONED
+int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
+			unsigned int nr_zones, report_zones_cb cb, void *data);
+#else
+#define dm_blk_report_zones	NULL
+#endif
+
 /*-----------------------------------------------------------------
  * A registry of target types.
  *---------------------------------------------------------------*/

From 912e887505a07123917e537b657859723ce5d472 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:57 +0900
Subject: [PATCH 0988/3292] dm: Introduce dm_report_zones()

To simplify the implementation of the report_zones operation of a zoned
target, introduce the function dm_report_zones() to set a target
mapping start sector in struct dm_report_zones_args and call
blkdev_report_zones(). This new function is exported and the report
zones callback function dm_report_zones_cb() is not.

dm-linear, dm-flakey and dm-crypt are modified to use dm_report_zones().

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-crypt.c         |  7 +++----
 drivers/md/dm-flakey.c        |  7 +++----
 drivers/md/dm-linear.c        |  7 +++----
 drivers/md/dm-zone.c          | 22 ++++++++++++++++++++--
 include/linux/device-mapper.h |  3 ++-
 5 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index b0ab080f2567..f410ceee51d7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -3138,11 +3138,10 @@ static int crypt_report_zones(struct dm_target *ti,
 		struct dm_report_zones_args *args, unsigned int nr_zones)
 {
 	struct crypt_config *cc = ti->private;
-	sector_t sector = cc->start + dm_target_offset(ti, args->next_sector);
 
-	args->start = cc->start;
-	return blkdev_report_zones(cc->dev->bdev, sector, nr_zones,
-				   dm_report_zones_cb, args);
+	return dm_report_zones(cc->dev->bdev, cc->start,
+			cc->start + dm_target_offset(ti, args->next_sector),
+			args, nr_zones);
 }
 #else
 #define crypt_report_zones NULL
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index b7fee9936f05..5877220c01ed 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -463,11 +463,10 @@ static int flakey_report_zones(struct dm_target *ti,
 		struct dm_report_zones_args *args, unsigned int nr_zones)
 {
 	struct flakey_c *fc = ti->private;
-	sector_t sector = flakey_map_sector(ti, args->next_sector);
 
-	args->start = fc->start;
-	return blkdev_report_zones(fc->dev->bdev, sector, nr_zones,
-				   dm_report_zones_cb, args);
+	return dm_report_zones(fc->dev->bdev, fc->start,
+			       flakey_map_sector(ti, args->next_sector),
+			       args, nr_zones);
 }
 #else
 #define flakey_report_zones NULL
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 92db0f5e7f28..c91f1e2e2f65 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -140,11 +140,10 @@ static int linear_report_zones(struct dm_target *ti,
 		struct dm_report_zones_args *args, unsigned int nr_zones)
 {
 	struct linear_c *lc = ti->private;
-	sector_t sector = linear_map_sector(ti, args->next_sector);
 
-	args->start = lc->start;
-	return blkdev_report_zones(lc->dev->bdev, sector, nr_zones,
-				   dm_report_zones_cb, args);
+	return dm_report_zones(lc->dev->bdev, lc->start,
+			       linear_map_sector(ti, args->next_sector),
+			       args, nr_zones);
 }
 #else
 #define linear_report_zones NULL
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 9a34d0f319fd..b42474043249 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -56,7 +56,8 @@ out:
 	return ret;
 }
 
-int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data)
+static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx,
+			      void *data)
 {
 	struct dm_report_zones_args *args = data;
 	sector_t sector_diff = args->tgt->begin - args->start;
@@ -84,7 +85,24 @@ int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data)
 	args->next_sector = zone->start + zone->len;
 	return args->orig_cb(zone, args->zone_idx++, args->orig_data);
 }
-EXPORT_SYMBOL_GPL(dm_report_zones_cb);
+
+/*
+ * Helper for drivers of zoned targets to implement struct target_type
+ * report_zones operation.
+ */
+int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector,
+		    struct dm_report_zones_args *args, unsigned int nr_zones)
+{
+	/*
+	 * Set the target mapping start sector first so that
+	 * dm_report_zones_cb() can correctly remap zone information.
+	 */
+	args->start = start;
+
+	return blkdev_report_zones(bdev, sector, nr_zones,
+				   dm_report_zones_cb, args);
+}
+EXPORT_SYMBOL_GPL(dm_report_zones);
 
 void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 {
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ff700fb6ce1d..caea0a079d2d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -478,7 +478,8 @@ struct dm_report_zones_args {
 	/* must be filled by ->report_zones before calling dm_report_zones_cb */
 	sector_t start;
 };
-int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data);
+int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector,
+		    struct dm_report_zones_args *args, unsigned int nr_zones);
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 /*

From bf14e2b250e4ff94392bbe87c523effdec687b0b Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:58 +0900
Subject: [PATCH 0989/3292] dm: Forbid requeue of writes to zones

A target map method requesting the requeue of a bio with
DM_MAPIO_REQUEUE or completing it with DM_ENDIO_REQUEUE can cause
unaligned write errors if the bio is a write operation targeting a
sequential zone. If a zoned target request such a requeue, warn about
it and kill the IO.

The function dm_is_zone_write() is introduced to detect write operations
to zoned targets.

This change does not affect the target drivers supporting zoned devices
and exposing a zoned device, namely dm-crypt, dm-linear and dm-flakey as
none of these targets ever request a requeue.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-zone.c | 17 +++++++++++++++++
 drivers/md/dm.c      | 25 +++++++++++++++++++------
 drivers/md/dm.h      |  5 +++++
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index b42474043249..edc3bbb45637 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -104,6 +104,23 @@ int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(dm_report_zones);
 
+bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
+{
+	struct request_queue *q = md->queue;
+
+	if (!blk_queue_is_zoned(q))
+		return false;
+
+	switch (bio_op(bio)) {
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE:
+		return !op_is_flush(bio->bi_opf) && bio_sectors(bio);
+	default:
+		return false;
+	}
+}
+
 void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 {
 	if (!blk_queue_is_zoned(q))
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c49976cc4e44..6134a97f9016 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -841,22 +841,27 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
 	}
 
 	if (atomic_dec_and_test(&io->io_count)) {
+		bio = io->orig_bio;
 		if (io->status == BLK_STS_DM_REQUEUE) {
 			/*
 			 * Target requested pushing back the I/O.
 			 */
 			spin_lock_irqsave(&md->deferred_lock, flags);
-			if (__noflush_suspending(md))
+			if (__noflush_suspending(md) &&
+			    !WARN_ON_ONCE(dm_is_zone_write(md, bio))) {
 				/* NOTE early return due to BLK_STS_DM_REQUEUE below */
-				bio_list_add_head(&md->deferred, io->orig_bio);
-			else
-				/* noflush suspend was interrupted. */
+				bio_list_add_head(&md->deferred, bio);
+			} else {
+				/*
+				 * noflush suspend was interrupted or this is
+				 * a write to a zoned target.
+				 */
 				io->status = BLK_STS_IOERR;
+			}
 			spin_unlock_irqrestore(&md->deferred_lock, flags);
 		}
 
 		io_error = io->status;
-		bio = io->orig_bio;
 		end_io_acct(io);
 		free_io(md, io);
 
@@ -947,7 +952,15 @@ static void clone_endio(struct bio *bio)
 		int r = endio(tio->ti, bio, &error);
 		switch (r) {
 		case DM_ENDIO_REQUEUE:
-			error = BLK_STS_DM_REQUEUE;
+			/*
+			 * Requeuing writes to a sequential zone of a zoned
+			 * target will break the sequential write pattern:
+			 * fail such IO.
+			 */
+			if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
+				error = BLK_STS_IOERR;
+			else
+				error = BLK_STS_DM_REQUEUE;
 			fallthrough;
 		case DM_ENDIO_DONE:
 			break;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index fdf1536a4b62..39c243258e24 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -107,8 +107,13 @@ void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
 #ifdef CONFIG_BLK_DEV_ZONED
 int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
+bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
 #else
 #define dm_blk_report_zones	NULL
+static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
+{
+	return false;
+}
 #endif
 
 /*-----------------------------------------------------------------

From 1ee533eca70bb8867ad1e6f5ef8a86c8897d67d7 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:51 +0900
Subject: [PATCH 0990/3292] block: improve handling of all zones reset
 operation

SCSI, ZNS and null_blk zoned devices support resetting all zones using
a single command (REQ_OP_ZONE_RESET_ALL), as indicated using the device
request queue flag QUEUE_FLAG_ZONE_RESETALL. This flag is not set for
device mapper targets creating zoned devices. In this case, a user
request for resetting all zones of a device is processed in
blkdev_zone_mgmt() by issuing a REQ_OP_ZONE_RESET operation for each
zone of the device. This leads to different behaviors of the
BLKRESETZONE ioctl() depending on the target device support for the
reset all operation. E.g.

blkzone reset /dev/sdX

will reset all zones of a SCSI device using a single command that will
ignore conventional, read-only or offline zones.

But a dm-linear device including conventional, read-only or offline
zones cannot be reset in the same manner as some of the single zone
reset operations issued by blkdev_zone_mgmt() will fail. E.g.:

blkzone reset /dev/dm-Y
blkzone: /dev/dm-0: BLKRESETZONE ioctl failed: Remote I/O error

To simplify applications and tools development, unify the behavior of
the all-zone reset operation by modifying blkdev_zone_mgmt() to not
issue a zone reset operation for conventional, read-only and offline
zones, thus mimicking what an actual reset-all device command does on a
device supporting REQ_OP_ZONE_RESET_ALL. This emulation is done using
the new function blkdev_zone_reset_all_emulated(). The zones needing a
reset are identified using a bitmap that is initialized using a zone
report. Since empty zones do not need a reset, also ignore these zones.
The function blkdev_zone_reset_all() is introduced for block devices
natively supporting reset all operations. blkdev_zone_mgmt() is modified
to call either function to execute an all zone reset request.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
[hch: split into multiple functions]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-zoned.c | 119 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 92 insertions(+), 27 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 250cb76ee615..86fce751bb17 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -161,18 +161,89 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 
-static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
-						sector_t sector,
-						sector_t nr_sectors)
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+						   unsigned int nr_zones)
 {
-	if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
-		return false;
+	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+			    GFP_NOIO, node);
+}
 
+static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+				  void *data)
+{
 	/*
-	 * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
-	 * of the applicable zone range is the entire disk.
+	 * For an all-zones reset, ignore conventional, empty, read-only
+	 * and offline zones.
 	 */
-	return !sector && nr_sectors == get_capacity(bdev->bd_disk);
+	switch (zone->cond) {
+	case BLK_ZONE_COND_NOT_WP:
+	case BLK_ZONE_COND_EMPTY:
+	case BLK_ZONE_COND_READONLY:
+	case BLK_ZONE_COND_OFFLINE:
+		return 0;
+	default:
+		set_bit(idx, (unsigned long *)data);
+		return 0;
+	}
+}
+
+static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
+					  gfp_t gfp_mask)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+	sector_t capacity = get_capacity(bdev->bd_disk);
+	sector_t zone_sectors = blk_queue_zone_sectors(q);
+	unsigned long *need_reset;
+	struct bio *bio = NULL;
+	sector_t sector = 0;
+	int ret;
+
+	need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones);
+	if (!need_reset)
+		return -ENOMEM;
+
+	ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0,
+				q->nr_zones, blk_zone_need_reset_cb,
+				need_reset);
+	if (ret < 0)
+		goto out_free_need_reset;
+
+	ret = 0;
+	while (sector < capacity) {
+		if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) {
+			sector += zone_sectors;
+			continue;
+		}
+
+		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio_set_dev(bio, bdev);
+		bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
+		bio->bi_iter.bi_sector = sector;
+		sector += zone_sectors;
+
+		/* This may take a while, so be nice to others */
+		cond_resched();
+	}
+
+	if (bio) {
+		ret = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+
+out_free_need_reset:
+	kfree(need_reset);
+	return ret;
+}
+
+static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
+{
+	struct bio bio;
+
+	bio_init(&bio, NULL, 0);
+	bio_set_dev(&bio, bdev);
+	bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
+
+	return submit_bio_wait(&bio);
 }
 
 /**
@@ -200,7 +271,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	sector_t capacity = get_capacity(bdev->bd_disk);
 	sector_t end_sector = sector + nr_sectors;
 	struct bio *bio = NULL;
-	int ret;
+	int ret = 0;
 
 	if (!blk_queue_is_zoned(q))
 		return -EOPNOTSUPP;
@@ -222,20 +293,21 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
 		return -EINVAL;
 
+	/*
+	 * In the case of a zone reset operation over all zones,
+	 * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
+	 * command. For other devices, we emulate this command behavior by
+	 * identifying the zones needing a reset.
+	 */
+	if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
+		if (!blk_queue_zone_resetall(q))
+			return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
+		return blkdev_zone_reset_all(bdev, gfp_mask);
+	}
+
 	while (sector < end_sector) {
 		bio = blk_next_bio(bio, 0, gfp_mask);
 		bio_set_dev(bio, bdev);
-
-		/*
-		 * Special case for the zone reset operation that reset all
-		 * zones, this is useful for applications like mkfs.
-		 */
-		if (op == REQ_OP_ZONE_RESET &&
-		    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
-			bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
-			break;
-		}
-
 		bio->bi_opf = op | REQ_SYNC;
 		bio->bi_iter.bi_sector = sector;
 		sector += zone_sectors;
@@ -396,13 +468,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-static inline unsigned long *blk_alloc_zone_bitmap(int node,
-						   unsigned int nr_zones)
-{
-	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
-			    GFP_NOIO, node);
-}
-
 void blk_queue_free_zone_bitmaps(struct request_queue *q)
 {
 	kfree(q->conv_zones_bitmap);

From d0ea6bde141df9311bc36e7b07ad37b449f2c4f5 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:52 +0900
Subject: [PATCH 0991/3292] block: introduce bio zone helpers

Introduce the helper functions bio_zone_no() and bio_zone_is_seq().
Both are the BIO counterparts of the request helpers blk_rq_zone_no()
and blk_rq_zone_is_seq(), respectively returning the number of the
target zone of a bio and true if the BIO target zone is sequential.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/linux/blkdev.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f69c75bd6d27..2db0f376f5d9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1008,6 +1008,18 @@ static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
 /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
 const char *blk_zone_cond_str(enum blk_zone_cond zone_cond);
 
+static inline unsigned int bio_zone_no(struct bio *bio)
+{
+	return blk_queue_zone_no(bdev_get_queue(bio->bi_bdev),
+				 bio->bi_iter.bi_sector);
+}
+
+static inline unsigned int bio_zone_is_seq(struct bio *bio)
+{
+	return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev),
+				     bio->bi_iter.bi_sector);
+}
+
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
 	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));

From 9ffbbb435d8f566a0924ce4b5dc7fc1bceb6dbf8 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:53 +0900
Subject: [PATCH 0992/3292] block: introduce BIO_ZONE_WRITE_LOCKED bio flag

Introduce the BIO flag BIO_ZONE_WRITE_LOCKED to indicate that a BIO owns
the write lock of the zone it is targeting. This is the counterpart of
the struct request flag RQF_ZONE_WRITE_LOCKED.

This new BIO flag is reserved for now for zone write locking control
for device mapper targets exposing a zoned block device. Since in this
case, the lock flag must not be propagated to the struct request that
will be used to process the BIO, a BIO private flag is used rather than
changing the RQF_ZONE_WRITE_LOCKED request flag into a common REQ_XXX
flag that could be used for both BIO and request. This avoids conflicts
down the stack with the block IO scheduler zone write locking
(in mq-deadline).

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/linux/blk_types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index db026b6ec15a..e5cf12f102a2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -304,6 +304,7 @@ enum {
 	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
 	BIO_TRACKED,		/* set if bio goes through the rq_qos path */
 	BIO_REMAPPED,
+	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
 	BIO_FLAG_LAST
 };
 

From e2118b3c3d94289852417f70ec128c25f4833aad Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:24:59 +0900
Subject: [PATCH 0993/3292] dm: rearrange core declarations for extended use
 from dm-zone.c

Move the definitions of struct dm_target_io, struct dm_io and the bits
of the flags field of struct mapped_device from dm.c to dm-core.h to
make them usable from dm-zone.c. For the same reason, declare
dec_pending() in dm-core.h after renaming it to dm_io_dec_pending().
And for symmetry of the function names, introduce the inline helper
dm_io_inc_pending() instead of directly using atomic_inc() calls.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-core.h | 52 ++++++++++++++++++++++++++++++++++++++
 drivers/md/dm.c      | 59 ++++++--------------------------------------
 2 files changed, 59 insertions(+), 52 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 5953ff2bd260..cfabc1c91f9f 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -116,6 +116,19 @@ struct mapped_device {
 	struct srcu_struct io_barrier;
 };
 
+/*
+ * Bits for the flags field of struct mapped_device.
+ */
+#define DMF_BLOCK_IO_FOR_SUSPEND 0
+#define DMF_SUSPENDED 1
+#define DMF_FROZEN 2
+#define DMF_FREEING 3
+#define DMF_DELETING 4
+#define DMF_NOFLUSH_SUSPENDING 5
+#define DMF_DEFERRED_REMOVE 6
+#define DMF_SUSPENDED_INTERNALLY 7
+#define DMF_POST_SUSPENDING 8
+
 void disable_discard(struct mapped_device *md);
 void disable_write_same(struct mapped_device *md);
 void disable_write_zeroes(struct mapped_device *md);
@@ -173,6 +186,45 @@ struct dm_table {
 #endif
 };
 
+/*
+ * One of these is allocated per clone bio.
+ */
+#define DM_TIO_MAGIC 7282014
+struct dm_target_io {
+	unsigned int magic;
+	struct dm_io *io;
+	struct dm_target *ti;
+	unsigned int target_bio_nr;
+	unsigned int *len_ptr;
+	bool inside_dm_io;
+	struct bio clone;
+};
+
+/*
+ * One of these is allocated per original bio.
+ * It contains the first clone used for that original.
+ */
+#define DM_IO_MAGIC 5191977
+struct dm_io {
+	unsigned int magic;
+	struct mapped_device *md;
+	blk_status_t status;
+	atomic_t io_count;
+	struct bio *orig_bio;
+	unsigned long start_time;
+	spinlock_t endio_lock;
+	struct dm_stats_aux stats_aux;
+	/* last member of dm_target_io is 'struct bio' */
+	struct dm_target_io tio;
+};
+
+static inline void dm_io_inc_pending(struct dm_io *io)
+{
+	atomic_inc(&io->io_count);
+}
+
+void dm_io_dec_pending(struct dm_io *io, blk_status_t error);
+
 static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
 {
 	return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6134a97f9016..49bd18e99af6 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -74,38 +74,6 @@ struct clone_info {
 	unsigned sector_count;
 };
 
-/*
- * One of these is allocated per clone bio.
- */
-#define DM_TIO_MAGIC 7282014
-struct dm_target_io {
-	unsigned magic;
-	struct dm_io *io;
-	struct dm_target *ti;
-	unsigned target_bio_nr;
-	unsigned *len_ptr;
-	bool inside_dm_io;
-	struct bio clone;
-};
-
-/*
- * One of these is allocated per original bio.
- * It contains the first clone used for that original.
- */
-#define DM_IO_MAGIC 5191977
-struct dm_io {
-	unsigned magic;
-	struct mapped_device *md;
-	blk_status_t status;
-	atomic_t io_count;
-	struct bio *orig_bio;
-	unsigned long start_time;
-	spinlock_t endio_lock;
-	struct dm_stats_aux stats_aux;
-	/* last member of dm_target_io is 'struct bio' */
-	struct dm_target_io tio;
-};
-
 #define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
 #define DM_IO_BIO_OFFSET \
 	(offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
@@ -137,19 +105,6 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
 
 #define MINOR_ALLOCED ((void *)-1)
 
-/*
- * Bits for the md->flags field.
- */
-#define DMF_BLOCK_IO_FOR_SUSPEND 0
-#define DMF_SUSPENDED 1
-#define DMF_FROZEN 2
-#define DMF_FREEING 3
-#define DMF_DELETING 4
-#define DMF_NOFLUSH_SUSPENDING 5
-#define DMF_DEFERRED_REMOVE 6
-#define DMF_SUSPENDED_INTERNALLY 7
-#define DMF_POST_SUSPENDING 8
-
 #define DM_NUMA_NODE NUMA_NO_NODE
 static int dm_numa_node = DM_NUMA_NODE;
 
@@ -825,7 +780,7 @@ static int __noflush_suspending(struct mapped_device *md)
  * Decrements the number of outstanding ios that a bio has been
  * cloned into, completing the original io if necc.
  */
-static void dec_pending(struct dm_io *io, blk_status_t error)
+void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
 {
 	unsigned long flags;
 	blk_status_t io_error;
@@ -979,7 +934,7 @@ static void clone_endio(struct bio *bio)
 	}
 
 	free_tio(tio);
-	dec_pending(io, error);
+	dm_io_dec_pending(io, error);
 }
 
 /*
@@ -1247,7 +1202,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
 	 * anything, the target has assumed ownership of
 	 * this io.
 	 */
-	atomic_inc(&io->io_count);
+	dm_io_inc_pending(io);
 	sector = clone->bi_iter.bi_sector;
 
 	if (unlikely(swap_bios_limit(ti, clone))) {
@@ -1273,7 +1228,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
 			up(&md->swap_bios_semaphore);
 		}
 		free_tio(tio);
-		dec_pending(io, BLK_STS_IOERR);
+		dm_io_dec_pending(io, BLK_STS_IOERR);
 		break;
 	case DM_MAPIO_REQUEUE:
 		if (unlikely(swap_bios_limit(ti, clone))) {
@@ -1281,7 +1236,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
 			up(&md->swap_bios_semaphore);
 		}
 		free_tio(tio);
-		dec_pending(io, BLK_STS_DM_REQUEUE);
+		dm_io_dec_pending(io, BLK_STS_DM_REQUEUE);
 		break;
 	default:
 		DMWARN("unimplemented target map return value: %d", r);
@@ -1570,7 +1525,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 
 	if (bio->bi_opf & REQ_PREFLUSH) {
 		error = __send_empty_flush(&ci);
-		/* dec_pending submits any data associated with flush */
+		/* dm_io_dec_pending submits any data associated with flush */
 	} else if (op_is_zone_mgmt(bio_op(bio))) {
 		ci.bio = bio;
 		ci.sector_count = 0;
@@ -1611,7 +1566,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 	}
 
 	/* drop the extra reference count */
-	dec_pending(ci.io, errno_to_blk_status(error));
+	dm_io_dec_pending(ci.io, errno_to_blk_status(error));
 	return ret;
 }
 

From bb37d77239af25cde59693dbe3fac04dd17d7b29 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:25:00 +0900
Subject: [PATCH 0994/3292] dm: introduce zone append emulation

For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.

To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().

Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.

Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.

The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().

For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.

All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-core.h          |  13 +
 drivers/md/dm-table.c         |  19 +-
 drivers/md/dm-zone.c          | 584 ++++++++++++++++++++++++++++++++--
 drivers/md/dm.c               |  38 ++-
 drivers/md/dm.h               |  16 +-
 include/linux/device-mapper.h |   6 +
 6 files changed, 620 insertions(+), 56 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index cfabc1c91f9f..edc1553c4eea 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -114,6 +114,11 @@ struct mapped_device {
 	bool init_tio_pdu:1;
 
 	struct srcu_struct io_barrier;
+
+#ifdef CONFIG_BLK_DEV_ZONED
+	unsigned int nr_zones;
+	unsigned int *zwp_offset;
+#endif
 };
 
 /*
@@ -128,6 +133,7 @@ struct mapped_device {
 #define DMF_DEFERRED_REMOVE 6
 #define DMF_SUSPENDED_INTERNALLY 7
 #define DMF_POST_SUSPENDING 8
+#define DMF_EMULATE_ZONE_APPEND 9
 
 void disable_discard(struct mapped_device *md);
 void disable_write_same(struct mapped_device *md);
@@ -143,6 +149,13 @@ static inline struct dm_stats *dm_get_stats(struct mapped_device *md)
 	return &md->stats;
 }
 
+static inline bool dm_emulate_zone_append(struct mapped_device *md)
+{
+	if (blk_queue_is_zoned(md->queue))
+		return test_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+	return false;
+}
+
 #define DM_TABLE_MAX_DEPTH 16
 
 struct dm_table {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 1134ceed800f..0543cdf89e92 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1981,11 +1981,12 @@ static int device_requires_stable_pages(struct dm_target *ti,
 	return blk_queue_stable_writes(q);
 }
 
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
-			       struct queue_limits *limits)
+int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+			      struct queue_limits *limits)
 {
 	bool wc = false, fua = false;
 	int page_size = PAGE_SIZE;
+	int r;
 
 	/*
 	 * Copy table's limits to the DM device's request_queue
@@ -2064,12 +2065,20 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	    dm_table_any_dev_attr(t, device_is_not_random, NULL))
 		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
 
-	/* For a zoned target, setup the zones related queue attributes */
-	if (blk_queue_is_zoned(q))
-		dm_set_zones_restrictions(t, q);
+	/*
+	 * For a zoned target, setup the zones related queue attributes
+	 * and resources necessary for zone append emulation if necessary.
+	 */
+	if (blk_queue_is_zoned(q)) {
+		r = dm_set_zones_restrictions(t, q);
+		if (r)
+			return r;
+	}
 
 	dm_update_keyslot_manager(q, t);
 	blk_queue_update_readahead(q);
+
+	return 0;
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index edc3bbb45637..c2f26949f5ee 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -4,9 +4,49 @@
  */
 
 #include <linux/blkdev.h>
+#include <linux/mm.h>
+#include <linux/sched/mm.h>
+#include <linux/slab.h>
 
 #include "dm-core.h"
 
+#define DM_MSG_PREFIX "zone"
+
+#define DM_ZONE_INVALID_WP_OFST		UINT_MAX
+
+/*
+ * For internal zone reports bypassing the top BIO submission path.
+ */
+static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t,
+				  sector_t sector, unsigned int nr_zones,
+				  report_zones_cb cb, void *data)
+{
+	struct gendisk *disk = md->disk;
+	int ret;
+	struct dm_report_zones_args args = {
+		.next_sector = sector,
+		.orig_data = data,
+		.orig_cb = cb,
+	};
+
+	do {
+		struct dm_target *tgt;
+
+		tgt = dm_table_find_target(t, args.next_sector);
+		if (WARN_ON_ONCE(!tgt->type->report_zones))
+			return -EIO;
+
+		args.tgt = tgt;
+		ret = tgt->type->report_zones(tgt, &args,
+					      nr_zones - args.zone_idx);
+		if (ret < 0)
+			return ret;
+	} while (args.zone_idx < nr_zones &&
+		 args.next_sector < get_capacity(disk));
+
+	return args.zone_idx;
+}
+
 /*
  * User facing dm device block device report zone operation. This calls the
  * report_zones operation for each target of a device table. This operation is
@@ -18,41 +58,18 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
 	struct mapped_device *md = disk->private_data;
 	struct dm_table *map;
 	int srcu_idx, ret;
-	struct dm_report_zones_args args = {
-		.next_sector = sector,
-		.orig_data = data,
-		.orig_cb = cb,
-	};
 
 	if (dm_suspended_md(md))
 		return -EAGAIN;
 
 	map = dm_get_live_table(md, &srcu_idx);
-	if (!map) {
-		ret = -EIO;
-		goto out;
-	}
+	if (!map)
+		return -EIO;
 
-	do {
-		struct dm_target *tgt;
+	ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data);
 
-		tgt = dm_table_find_target(map, args.next_sector);
-		if (WARN_ON_ONCE(!tgt->type->report_zones)) {
-			ret = -EIO;
-			goto out;
-		}
-
-		args.tgt = tgt;
-		ret = tgt->type->report_zones(tgt, &args,
-					      nr_zones - args.zone_idx);
-		if (ret < 0)
-			goto out;
-	} while (args.zone_idx < nr_zones &&
-		 args.next_sector < get_capacity(disk));
-
-	ret = args.zone_idx;
-out:
 	dm_put_live_table(md, srcu_idx);
+
 	return ret;
 }
 
@@ -121,16 +138,517 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
 	}
 }
 
-void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
+void dm_cleanup_zoned_dev(struct mapped_device *md)
 {
-	if (!blk_queue_is_zoned(q))
-		return;
+	struct request_queue *q = md->queue;
+
+	if (q) {
+		kfree(q->conv_zones_bitmap);
+		q->conv_zones_bitmap = NULL;
+		kfree(q->seq_zones_wlock);
+		q->seq_zones_wlock = NULL;
+	}
+
+	kvfree(md->zwp_offset);
+	md->zwp_offset = NULL;
+	md->nr_zones = 0;
+}
+
+static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone)
+{
+	switch (zone->cond) {
+	case BLK_ZONE_COND_IMP_OPEN:
+	case BLK_ZONE_COND_EXP_OPEN:
+	case BLK_ZONE_COND_CLOSED:
+		return zone->wp - zone->start;
+	case BLK_ZONE_COND_FULL:
+		return zone->len;
+	case BLK_ZONE_COND_EMPTY:
+	case BLK_ZONE_COND_NOT_WP:
+	case BLK_ZONE_COND_OFFLINE:
+	case BLK_ZONE_COND_READONLY:
+	default:
+		/*
+		 * Conventional, offline and read-only zones do not have a valid
+		 * write pointer. Use 0 as for an empty zone.
+		 */
+		return 0;
+	}
+}
+
+static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
+				 void *data)
+{
+	struct mapped_device *md = data;
+	struct request_queue *q = md->queue;
+
+	switch (zone->type) {
+	case BLK_ZONE_TYPE_CONVENTIONAL:
+		if (!q->conv_zones_bitmap) {
+			q->conv_zones_bitmap =
+				kcalloc(BITS_TO_LONGS(q->nr_zones),
+					sizeof(unsigned long), GFP_NOIO);
+			if (!q->conv_zones_bitmap)
+				return -ENOMEM;
+		}
+		set_bit(idx, q->conv_zones_bitmap);
+		break;
+	case BLK_ZONE_TYPE_SEQWRITE_REQ:
+	case BLK_ZONE_TYPE_SEQWRITE_PREF:
+		if (!q->seq_zones_wlock) {
+			q->seq_zones_wlock =
+				kcalloc(BITS_TO_LONGS(q->nr_zones),
+					sizeof(unsigned long), GFP_NOIO);
+			if (!q->seq_zones_wlock)
+				return -ENOMEM;
+		}
+		if (!md->zwp_offset) {
+			md->zwp_offset =
+				kvcalloc(q->nr_zones, sizeof(unsigned int),
+					 GFP_NOIO);
+			if (!md->zwp_offset)
+				return -ENOMEM;
+		}
+		md->zwp_offset[idx] = dm_get_zone_wp_offset(zone);
+
+		break;
+	default:
+		DMERR("Invalid zone type 0x%x at sectors %llu",
+		      (int)zone->type, zone->start);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * Revalidate the zones of a mapped device to initialize resource necessary
+ * for zone append emulation. Note that we cannot simply use the block layer
+ * blk_revalidate_disk_zones() function here as the mapped device is suspended
+ * (this is called from __bind() context).
+ */
+static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
+{
+	struct request_queue *q = md->queue;
+	int ret;
+
+	/*
+	 * Check if something changed. If yes, cleanup the current resources
+	 * and reallocate everything.
+	 */
+	if (!q->nr_zones || q->nr_zones != md->nr_zones)
+		dm_cleanup_zoned_dev(md);
+	if (md->nr_zones)
+		return 0;
+
+	/* Scan all zones to initialize everything */
+	ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones,
+				     dm_zone_revalidate_cb, md);
+	if (ret < 0)
+		goto err;
+	if (ret != q->nr_zones) {
+		ret = -EIO;
+		goto err;
+	}
+
+	md->nr_zones = q->nr_zones;
+
+	return 0;
+
+err:
+	DMERR("Revalidate zones failed %d", ret);
+	dm_cleanup_zoned_dev(md);
+	return ret;
+}
+
+static int device_not_zone_append_capable(struct dm_target *ti,
+					  struct dm_dev *dev, sector_t start,
+					  sector_t len, void *data)
+{
+	return !blk_queue_is_zoned(bdev_get_queue(dev->bdev));
+}
+
+static bool dm_table_supports_zone_append(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned int i;
+
+	for (i = 0; i < dm_table_get_num_targets(t); i++) {
+		ti = dm_table_get_target(t, i);
+
+		if (ti->emulate_zone_append)
+			return false;
+
+		if (!ti->type->iterate_devices ||
+		    ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL))
+			return false;
+	}
+
+	return true;
+}
+
+int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
+{
+	struct mapped_device *md = t->md;
 
 	/*
 	 * For a zoned target, the number of zones should be updated for the
-	 * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
-	 * target, this is all that is needed.
+	 * correct value to be exposed in sysfs queue/nr_zones.
 	 */
 	WARN_ON_ONCE(queue_is_mq(q));
-	q->nr_zones = blkdev_nr_zones(t->md->disk);
+	q->nr_zones = blkdev_nr_zones(md->disk);
+
+	/* Check if zone append is natively supported */
+	if (dm_table_supports_zone_append(t)) {
+		clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+		dm_cleanup_zoned_dev(md);
+		return 0;
+	}
+
+	/*
+	 * Mark the mapped device as needing zone append emulation and
+	 * initialize the emulation resources once the capacity is set.
+	 */
+	set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+	if (!get_capacity(md->disk))
+		return 0;
+
+	return dm_revalidate_zones(md, t);
+}
+
+static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
+				       void *data)
+{
+	unsigned int *wp_offset = data;
+
+	*wp_offset = dm_get_zone_wp_offset(zone);
+
+	return 0;
+}
+
+static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
+				    unsigned int *wp_ofst)
+{
+	sector_t sector = zno * blk_queue_zone_sectors(md->queue);
+	unsigned int noio_flag;
+	struct dm_table *t;
+	int srcu_idx, ret;
+
+	t = dm_get_live_table(md, &srcu_idx);
+	if (!t)
+		return -EIO;
+
+	/*
+	 * Ensure that all memory allocations in this context are done as if
+	 * GFP_NOIO was specified.
+	 */
+	noio_flag = memalloc_noio_save();
+	ret = dm_blk_do_report_zones(md, t, sector, 1,
+				     dm_update_zone_wp_offset_cb, wp_ofst);
+	memalloc_noio_restore(noio_flag);
+
+	dm_put_live_table(md, srcu_idx);
+
+	if (ret != 1)
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * First phase of BIO mapping for targets with zone append emulation:
+ * check all BIO that change a zone writer pointer and change zone
+ * append operations into regular write operations.
+ */
+static bool dm_zone_map_bio_begin(struct mapped_device *md,
+				  struct bio *orig_bio, struct bio *clone)
+{
+	sector_t zsectors = blk_queue_zone_sectors(md->queue);
+	unsigned int zno = bio_zone_no(orig_bio);
+	unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
+
+	/*
+	 * If the target zone is in an error state, recover by inspecting the
+	 * zone to get its current write pointer position. Note that since the
+	 * target zone is already locked, a BIO issuing context should never
+	 * see the zone write in the DM_ZONE_UPDATING_WP_OFST state.
+	 */
+	if (zwp_offset == DM_ZONE_INVALID_WP_OFST) {
+		if (dm_update_zone_wp_offset(md, zno, &zwp_offset))
+			return false;
+		WRITE_ONCE(md->zwp_offset[zno], zwp_offset);
+	}
+
+	switch (bio_op(orig_bio)) {
+	case REQ_OP_ZONE_RESET:
+	case REQ_OP_ZONE_FINISH:
+		return true;
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE:
+		/* Writes must be aligned to the zone write pointer */
+		if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset)
+			return false;
+		break;
+	case REQ_OP_ZONE_APPEND:
+		/*
+		 * Change zone append operations into a non-mergeable regular
+		 * writes directed at the current write pointer position of the
+		 * target zone.
+		 */
+		clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
+			(orig_bio->bi_opf & (~REQ_OP_MASK));
+		clone->bi_iter.bi_sector =
+			orig_bio->bi_iter.bi_sector + zwp_offset;
+		break;
+	default:
+		DMWARN_LIMIT("Invalid BIO operation");
+		return false;
+	}
+
+	/* Cannot write to a full zone */
+	if (zwp_offset >= zsectors)
+		return false;
+
+	return true;
+}
+
+/*
+ * Second phase of BIO mapping for targets with zone append emulation:
+ * update the zone write pointer offset array to account for the additional
+ * data written to a zone. Note that at this point, the remapped clone BIO
+ * may already have completed, so we do not touch it.
+ */
+static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
+					struct bio *orig_bio,
+					unsigned int nr_sectors)
+{
+	unsigned int zno = bio_zone_no(orig_bio);
+	unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
+
+	/* The clone BIO may already have been completed and failed */
+	if (zwp_offset == DM_ZONE_INVALID_WP_OFST)
+		return BLK_STS_IOERR;
+
+	/* Update the zone wp offset */
+	switch (bio_op(orig_bio)) {
+	case REQ_OP_ZONE_RESET:
+		WRITE_ONCE(md->zwp_offset[zno], 0);
+		return BLK_STS_OK;
+	case REQ_OP_ZONE_FINISH:
+		WRITE_ONCE(md->zwp_offset[zno],
+			   blk_queue_zone_sectors(md->queue));
+		return BLK_STS_OK;
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE:
+		WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
+		return BLK_STS_OK;
+	case REQ_OP_ZONE_APPEND:
+		/*
+		 * Check that the target did not truncate the write operation
+		 * emulating a zone append.
+		 */
+		if (nr_sectors != bio_sectors(orig_bio)) {
+			DMWARN_LIMIT("Truncated write for zone append");
+			return BLK_STS_IOERR;
+		}
+		WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
+		return BLK_STS_OK;
+	default:
+		DMWARN_LIMIT("Invalid BIO operation");
+		return BLK_STS_IOERR;
+	}
+}
+
+static inline void dm_zone_lock(struct request_queue *q,
+				unsigned int zno, struct bio *clone)
+{
+	if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)))
+		return;
+
+	wait_on_bit_lock_io(q->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
+	bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED);
+}
+
+static inline void dm_zone_unlock(struct request_queue *q,
+				  unsigned int zno, struct bio *clone)
+{
+	if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
+		return;
+
+	WARN_ON_ONCE(!test_bit(zno, q->seq_zones_wlock));
+	clear_bit_unlock(zno, q->seq_zones_wlock);
+	smp_mb__after_atomic();
+	wake_up_bit(q->seq_zones_wlock, zno);
+
+	bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
+}
+
+static bool dm_need_zone_wp_tracking(struct bio *orig_bio)
+{
+	/*
+	 * Special processing is not needed for operations that do not need the
+	 * zone write lock, that is, all operations that target conventional
+	 * zones and all operations that do not modify directly a sequential
+	 * zone write pointer.
+	 */
+	if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio))
+		return false;
+	switch (bio_op(orig_bio)) {
+	case REQ_OP_WRITE_ZEROES:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE:
+	case REQ_OP_ZONE_RESET:
+	case REQ_OP_ZONE_FINISH:
+	case REQ_OP_ZONE_APPEND:
+		return bio_zone_is_seq(orig_bio);
+	default:
+		return false;
+	}
+}
+
+/*
+ * Special IO mapping for targets needing zone append emulation.
+ */
+int dm_zone_map_bio(struct dm_target_io *tio)
+{
+	struct dm_io *io = tio->io;
+	struct dm_target *ti = tio->ti;
+	struct mapped_device *md = io->md;
+	struct request_queue *q = md->queue;
+	struct bio *orig_bio = io->orig_bio;
+	struct bio *clone = &tio->clone;
+	unsigned int zno;
+	blk_status_t sts;
+	int r;
+
+	/*
+	 * IOs that do not change a zone write pointer do not need
+	 * any additional special processing.
+	 */
+	if (!dm_need_zone_wp_tracking(orig_bio))
+		return ti->type->map(ti, clone);
+
+	/* Lock the target zone */
+	zno = bio_zone_no(orig_bio);
+	dm_zone_lock(q, zno, clone);
+
+	/*
+	 * Check that the bio and the target zone write pointer offset are
+	 * both valid, and if the bio is a zone append, remap it to a write.
+	 */
+	if (!dm_zone_map_bio_begin(md, orig_bio, clone)) {
+		dm_zone_unlock(q, zno, clone);
+		return DM_MAPIO_KILL;
+	}
+
+	/*
+	 * The target map function may issue and complete the IO quickly.
+	 * Take an extra reference on the IO to make sure it does disappear
+	 * until we run dm_zone_map_bio_end().
+	 */
+	dm_io_inc_pending(io);
+
+	/* Let the target do its work */
+	r = ti->type->map(ti, clone);
+	switch (r) {
+	case DM_MAPIO_SUBMITTED:
+		/*
+		 * The target submitted the clone BIO. The target zone will
+		 * be unlocked on completion of the clone.
+		 */
+		sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr);
+		break;
+	case DM_MAPIO_REMAPPED:
+		/*
+		 * The target only remapped the clone BIO. In case of error,
+		 * unlock the target zone here as the clone will not be
+		 * submitted.
+		 */
+		sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr);
+		if (sts != BLK_STS_OK)
+			dm_zone_unlock(q, zno, clone);
+		break;
+	case DM_MAPIO_REQUEUE:
+	case DM_MAPIO_KILL:
+	default:
+		dm_zone_unlock(q, zno, clone);
+		sts = BLK_STS_IOERR;
+		break;
+	}
+
+	/* Drop the extra reference on the IO */
+	dm_io_dec_pending(io, sts);
+
+	if (sts != BLK_STS_OK)
+		return DM_MAPIO_KILL;
+
+	return r;
+}
+
+/*
+ * IO completion callback called from clone_endio().
+ */
+void dm_zone_endio(struct dm_io *io, struct bio *clone)
+{
+	struct mapped_device *md = io->md;
+	struct request_queue *q = md->queue;
+	struct bio *orig_bio = io->orig_bio;
+	unsigned int zwp_offset;
+	unsigned int zno;
+
+	/*
+	 * For targets that do not emulate zone append, we only need to
+	 * handle native zone-append bios.
+	 */
+	if (!dm_emulate_zone_append(md)) {
+		/*
+		 * Get the offset within the zone of the written sector
+		 * and add that to the original bio sector position.
+		 */
+		if (clone->bi_status == BLK_STS_OK &&
+		    bio_op(clone) == REQ_OP_ZONE_APPEND) {
+			sector_t mask = (sector_t)blk_queue_zone_sectors(q) - 1;
+
+			orig_bio->bi_iter.bi_sector +=
+				clone->bi_iter.bi_sector & mask;
+		}
+
+		return;
+	}
+
+	/*
+	 * For targets that do emulate zone append, if the clone BIO does not
+	 * own the target zone write lock, we have nothing to do.
+	 */
+	if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
+		return;
+
+	zno = bio_zone_no(orig_bio);
+
+	if (clone->bi_status != BLK_STS_OK) {
+		/*
+		 * BIOs that modify a zone write pointer may leave the zone
+		 * in an unknown state in case of failure (e.g. the write
+		 * pointer was only partially advanced). In this case, set
+		 * the target zone write pointer as invalid unless it is
+		 * already being updated.
+		 */
+		WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST);
+	} else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
+		/*
+		 * Get the written sector for zone append operation that were
+		 * emulated using regular write operations.
+		 */
+		zwp_offset = READ_ONCE(md->zwp_offset[zno]);
+		if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio)))
+			WRITE_ONCE(md->zwp_offset[zno],
+				   DM_ZONE_INVALID_WP_OFST);
+		else
+			orig_bio->bi_iter.bi_sector +=
+				zwp_offset - bio_sectors(orig_bio);
+	}
+
+	dm_zone_unlock(q, zno, clone);
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 49bd18e99af6..420a12b42708 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -876,7 +876,6 @@ static void clone_endio(struct bio *bio)
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = tio->io->md;
 	dm_endio_fn endio = tio->ti->type->end_io;
-	struct bio *orig_bio = io->orig_bio;
 	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
 
 	if (unlikely(error == BLK_STS_TARGET)) {
@@ -891,17 +890,8 @@ static void clone_endio(struct bio *bio)
 			disable_write_zeroes(md);
 	}
 
-	/*
-	 * For zone-append bios get offset in zone of the written
-	 * sector and add that to the original bio sector pos.
-	 */
-	if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
-		sector_t written_sector = bio->bi_iter.bi_sector;
-		struct request_queue *q = orig_bio->bi_bdev->bd_disk->queue;
-		u64 mask = (u64)blk_queue_zone_sectors(q) - 1;
-
-		orig_bio->bi_iter.bi_sector += written_sector & mask;
-	}
+	if (blk_queue_is_zoned(q))
+		dm_zone_endio(io, bio);
 
 	if (endio) {
 		int r = endio(tio->ti, bio, &error);
@@ -1213,7 +1203,16 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
 		down(&md->swap_bios_semaphore);
 	}
 
-	r = ti->type->map(ti, clone);
+	/*
+	 * Check if the IO needs a special mapping due to zone append emulation
+	 * on zoned target. In this case, dm_zone_map_bio() calls the target
+	 * map operation.
+	 */
+	if (dm_emulate_zone_append(io->md))
+		r = dm_zone_map_bio(tio);
+	else
+		r = ti->type->map(ti, clone);
+
 	switch (r) {
 	case DM_MAPIO_SUBMITTED:
 		break;
@@ -1711,6 +1710,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 	mutex_destroy(&md->swap_bios_lock);
 
 	dm_mq_cleanup_mapped_device(md);
+	dm_cleanup_zoned_dev(md);
 }
 
 /*
@@ -1956,11 +1956,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 		goto out;
 	}
 
+	ret = dm_table_set_restrictions(t, q, limits);
+	if (ret) {
+		old_map = ERR_PTR(ret);
+		goto out;
+	}
+
 	old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
 	rcu_assign_pointer(md->map, (void *)t);
 	md->immutable_target_type = dm_table_get_immutable_target_type(t);
 
-	dm_table_set_restrictions(t, q, limits);
 	if (old_map)
 		dm_sync_table(md);
 
@@ -2079,7 +2084,10 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 		DMERR("Cannot calculate initial queue limits");
 		return r;
 	}
-	dm_table_set_restrictions(t, md->queue, &limits);
+	r = dm_table_set_restrictions(t, md->queue, &limits);
+	if (r)
+		return r;
+
 	blk_register_queue(md->disk);
 
 	return 0;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 39c243258e24..742d9c80efe1 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -45,6 +45,8 @@ struct dm_dev_internal {
 
 struct dm_table;
 struct dm_md_mempools;
+struct dm_target_io;
+struct dm_io;
 
 /*-----------------------------------------------------------------
  * Internal table functions.
@@ -56,8 +58,8 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
 bool dm_table_has_no_data_devices(struct dm_table *table);
 int dm_calculate_queue_limits(struct dm_table *table,
 			      struct queue_limits *limits);
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
-			       struct queue_limits *limits);
+int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
+			      struct queue_limits *limits);
 struct list_head *dm_table_get_devices(struct dm_table *t);
 void dm_table_presuspend_targets(struct dm_table *t);
 void dm_table_presuspend_undo_targets(struct dm_table *t);
@@ -103,17 +105,25 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
 /*
  * Zoned targets related functions.
  */
-void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
+int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
+void dm_zone_endio(struct dm_io *io, struct bio *clone);
 #ifdef CONFIG_BLK_DEV_ZONED
+void dm_cleanup_zoned_dev(struct mapped_device *md);
 int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
 bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
+int dm_zone_map_bio(struct dm_target_io *io);
 #else
+static inline void dm_cleanup_zoned_dev(struct mapped_device *md) {}
 #define dm_blk_report_zones	NULL
 static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
 {
 	return false;
 }
+static inline int dm_zone_map_bio(struct dm_target_io *tio)
+{
+	return DM_MAPIO_KILL;
+}
 #endif
 
 /*-----------------------------------------------------------------
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index caea0a079d2d..7457d49acf9a 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -361,6 +361,12 @@ struct dm_target {
 	 * Set if we need to limit the number of in-flight bios when swapping.
 	 */
 	bool limit_swap_bios:1;
+
+	/*
+	 * Set if this target implements a a zoned device and needs emulation of
+	 * zone append operations using regular writes.
+	 */
+	bool emulate_zone_append:1;
 };
 
 void *dm_per_bio_data(struct bio *bio, size_t data_size);

From f34ee1dce642c67104a56d562e6ec71efe901f77 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 26 May 2021 06:25:01 +0900
Subject: [PATCH 0995/3292] dm crypt: Fix zoned block device support

Zone append BIOs (REQ_OP_ZONE_APPEND) always specify the start sector
of the zone to be written instead of the actual sector location to
write. The write location is determined by the device and returned to
the host upon completion of the operation. This interface, while simple
and efficient for writing into sequential zones of a zoned block
device, is incompatible with the use of sector values to calculate a
cypher block IV. All data written in a zone end up using the same IV
values corresponding to the first sectors of the zone, but read
operation will specify any sector within the zone resulting in an IV
mismatch between encryption and decryption.

To solve this problem, report to DM core that zone append operations are
not supported. This result in the zone append operations being emulated
using regular write operations.

Reported-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-crypt.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f410ceee51d7..50f4cbd600d5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -3280,14 +3280,28 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	cc->start = tmpll;
 
-	/*
-	 * For zoned block devices, we need to preserve the issuer write
-	 * ordering. To do so, disable write workqueues and force inline
-	 * encryption completion.
-	 */
 	if (bdev_is_zoned(cc->dev->bdev)) {
+		/*
+		 * For zoned block devices, we need to preserve the issuer write
+		 * ordering. To do so, disable write workqueues and force inline
+		 * encryption completion.
+		 */
 		set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
 		set_bit(DM_CRYPT_WRITE_INLINE, &cc->flags);
+
+		/*
+		 * All zone append writes to a zone of a zoned block device will
+		 * have the same BIO sector, the start of the zone. When the
+		 * cypher IV mode uses sector values, all data targeting a
+		 * zone will be encrypted using the first sector numbers of the
+		 * zone. This will not result in write errors but will
+		 * cause most reads to fail as reads will use the sector values
+		 * for the actual data locations, resulting in IV mismatch.
+		 * To avoid this problem, ask DM core to emulate zone append
+		 * operations with regular writes.
+		 */
+		DMDEBUG("Zone append operations will be emulated");
+		ti->emulate_zone_append = true;
 	}
 
 	if (crypt_integrity_aead(cc) || cc->integrity_iv_size) {

From 220ef1f97ec19894b121f40012031c405ffc388c Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Mon, 12 Apr 2021 17:41:07 +0300
Subject: [PATCH 0996/3292] igc: Update driver to use ethtool_sprintf

Complete to commit c8d4725e985d ("intel: Update drivers to use
ethtool_sprintf")
Update the igc driver to make use of ethtool_sprintf. The general idea
is to reduce code size and overhead by replacing the repeated pattern of
string printf statements and ETH_STRING_LEN counter increments.

Suggested-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 39 +++++++-------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 2cb12431c371..fa4171860623 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -765,35 +765,22 @@ static void igc_ethtool_get_strings(struct net_device *netdev, u32 stringset,
 		       IGC_TEST_LEN * ETH_GSTRING_LEN);
 		break;
 	case ETH_SS_STATS:
-		for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, igc_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < IGC_NETDEV_STATS_LEN; i++) {
-			memcpy(p, igc_gstrings_net_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p, igc_gstrings_stats[i].stat_string);
+		for (i = 0; i < IGC_NETDEV_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igc_gstrings_net_stats[i].stat_string);
 		for (i = 0; i < adapter->num_tx_queues; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_restart", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "tx_queue_%u_restart", i);
 		}
 		for (i = 0; i < adapter->num_rx_queues; i++) {
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_drops", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_csum_err", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_alloc_failed", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_drops", i);
+			ethtool_sprintf(&p, "rx_queue_%u_csum_err", i);
+			ethtool_sprintf(&p, "rx_queue_%u_alloc_failed", i);
 		}
 		/* BUG_ON(p - data != IGC_STATS_LEN * ETH_GSTRING_LEN); */
 		break;

From cca2c030b2a7f9107c925bfb506600b924751256 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 18 Apr 2021 14:40:56 +0300
Subject: [PATCH 0997/3292] igc: Remove unused asymmetric pause bit from igc
 defines

The CR_1000T_ASYM_PAUSE bit from igc defines is not used so remove it.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 0103dda32f39..af2f5e16e994 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -128,7 +128,6 @@
 #define NWAY_LPAR_ASM_DIR	0x0800 /* LP Asymmetric Pause Direction bit */
 
 /* 1000BASE-T Control Register */
-#define CR_1000T_ASYM_PAUSE	0x0080 /* Advertise asymmetric pause bit */
 #define CR_1000T_HD_CAPS	0x0100 /* Advertise 1000T HD capability */
 #define CR_1000T_FD_CAPS	0x0200 /* Advertise 1000T FD capability  */
 

From 6fdef25db3d48b1c3f735b49125397498b3f7033 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 2 May 2021 13:57:08 +0300
Subject: [PATCH 0998/3292] igc: Remove unused MDICNFG register

The MDICNFG register from igc registers is not used so remove it.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_regs.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index cc174853554b..2491d565d758 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -10,7 +10,6 @@
 #define IGC_EECD		0x00010  /* EEPROM/Flash Control - RW */
 #define IGC_CTRL_EXT		0x00018  /* Extended Device Control - RW */
 #define IGC_MDIC		0x00020  /* MDI Control - RW */
-#define IGC_MDICNFG		0x00E04  /* MDC/MDIO Configuration - RW */
 #define IGC_CONNSW		0x00034  /* Copper/Fiber switch control - RW */
 #define IGC_I225_PHPM		0x00E14  /* I225 PHY Power Management */
 #define IGC_GPHY_VERSION	0x0001E  /* I225 gPHY Firmware Version */

From 5cde7beb27affd0ccd96ccb671adb1539866738b Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Mon, 24 May 2021 09:09:01 +0300
Subject: [PATCH 0999/3292] igc: Indentation fixes

Minor fix of indentation in igc_defines.h

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index af2f5e16e994..526a4c83711f 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -98,8 +98,8 @@
 #define IGC_CTRL_RFCE		0x08000000  /* Receive Flow Control enable */
 #define IGC_CTRL_TFCE		0x10000000  /* Transmit flow control enable */
 
-#define IGC_CTRL_SDP0_DIR 0x00400000	/* SDP0 Data direction */
-#define IGC_CTRL_SDP1_DIR 0x00800000	/* SDP1 Data direction */
+#define IGC_CTRL_SDP0_DIR	0x00400000  /* SDP0 Data direction */
+#define IGC_CTRL_SDP1_DIR	0x00800000  /* SDP1 Data direction */
 
 /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
 #define MAX_JUMBO_FRAME_SIZE	0x2600

From 8d7449630e3450bc0546dc0cb692fbb57d1852c0 Mon Sep 17 00:00:00 2001
From: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Date: Fri, 4 Jun 2021 01:44:54 +0800
Subject: [PATCH 1000/3292] igc: Enable HW VLAN Insertion and HW VLAN Stripping

Add HW VLAN acceleration protocol handling. In case of HW VLAN tagging,
we need that protocol available in the ndo_start_xmit(), so that it will be
stored in a new fields in the skb.

HW offloading is set to OFF by default.
Users are allow to turn on/off Rx/Tx HW VLAN acceleration via ethtool.

Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Acked-by: Sasha Neftin <sasha.neftin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  1 +
 drivers/net/ethernet/intel/igc/igc_defines.h |  4 ++
 drivers/net/ethernet/intel/igc/igc_main.c    | 74 +++++++++++++++++++-
 drivers/net/ethernet/intel/igc/igc_regs.h    |  1 +
 4 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index b6d3277c6f52..9e0bbb2e55e3 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -372,6 +372,7 @@ extern char igc_driver_name[];
 
 /* VLAN info */
 #define IGC_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IGC_TX_FLAGS_VLAN_SHIFT	16
 
 /* igc_test_staterr - tests bits within Rx descriptor status and error fields */
 static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc,
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 526a4c83711f..c3a5a5518790 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -94,6 +94,7 @@
 #define IGC_CTRL_SLU		0x00000040  /* Set link up (Force Link) */
 #define IGC_CTRL_FRCSPD		0x00000800  /* Force Speed */
 #define IGC_CTRL_FRCDPX		0x00001000  /* Force Duplex */
+#define IGC_CTRL_VME		0x40000000  /* IEEE VLAN mode enable */
 
 #define IGC_CTRL_RFCE		0x08000000  /* Receive Flow Control enable */
 #define IGC_CTRL_TFCE		0x10000000  /* Transmit flow control enable */
@@ -322,6 +323,9 @@
 #define IGC_RXD_STAT_IXSM	0x04	/* Ignore checksum */
 #define IGC_RXD_STAT_UDPCS	0x10	/* UDP xsum calculated */
 #define IGC_RXD_STAT_TCPCS	0x20	/* TCP xsum calculated */
+#define IGC_RXD_STAT_VP		0x08	/* IEEE VLAN Packet */
+
+#define IGC_RXDEXT_STATERR_LB	0x00040000
 
 /* Advanced Receive Descriptor bit definitions */
 #define IGC_RXDADV_STAT_TSIP	0x08000 /* timestamp in packet */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ea998d2defa4..2e71b006b660 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -111,6 +111,9 @@ void igc_reset(struct igc_adapter *adapter)
 	if (!netif_running(adapter->netdev))
 		igc_power_down_phy_copper_base(&adapter->hw);
 
+	/* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
+	wr32(IGC_VET, ETH_P_8021Q);
+
 	/* Re-enable PTP, where applicable. */
 	igc_ptp_reset(adapter);
 
@@ -1122,13 +1125,17 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
 	 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :	\
 	 ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
 
-static u32 igc_tx_cmd_type(u32 tx_flags)
+static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
 {
 	/* set type for advanced descriptor with frame checksum insertion */
 	u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
 		       IGC_ADVTXD_DCMD_DEXT |
 		       IGC_ADVTXD_DCMD_IFCS;
 
+	/* set HW vlan bit if vlan is present */
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
+				 IGC_ADVTXD_DCMD_VLE);
+
 	/* set segmentation bits for TSO */
 	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
 				 (IGC_ADVTXD_DCMD_TSE));
@@ -1137,6 +1144,9 @@ static u32 igc_tx_cmd_type(u32 tx_flags)
 	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
 				 (IGC_ADVTXD_MAC_TSTAMP));
 
+	/* insert frame checksum */
+	cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
+
 	return cmd_type;
 }
 
@@ -1171,8 +1181,9 @@ static int igc_tx_map(struct igc_ring *tx_ring,
 	u16 i = tx_ring->next_to_use;
 	unsigned int data_len, size;
 	dma_addr_t dma;
-	u32 cmd_type = igc_tx_cmd_type(tx_flags);
+	u32 cmd_type;
 
+	cmd_type = igc_tx_cmd_type(skb, tx_flags);
 	tx_desc = IGC_TX_DESC(tx_ring, i);
 
 	igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
@@ -1443,6 +1454,11 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 		}
 	}
 
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= IGC_TX_FLAGS_VLAN;
+		tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
+	}
+
 	/* record initial flags and protocol */
 	first->tx_flags = tx_flags;
 	first->protocol = protocol;
@@ -1542,6 +1558,25 @@ static inline void igc_rx_hash(struct igc_ring *ring,
 			     PKT_HASH_TYPE_L3);
 }
 
+static void igc_rx_vlan(struct igc_ring *rx_ring,
+			union igc_adv_rx_desc *rx_desc,
+			struct sk_buff *skb)
+{
+	struct net_device *dev = rx_ring->netdev;
+	u16 vid;
+
+	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
+		if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
+		    test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
+			vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
+		else
+			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+}
+
 /**
  * igc_process_skb_fields - Populate skb header fields from Rx descriptor
  * @rx_ring: rx descriptor ring packet is being transacted on
@@ -1560,11 +1595,37 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring,
 
 	igc_rx_checksum(rx_ring, rx_desc, skb);
 
+	igc_rx_vlan(rx_ring, rx_desc, skb);
+
 	skb_record_rx_queue(skb, rx_ring->queue_index);
 
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
+{
+	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+
+	if (enable) {
+		/* enable VLAN tag insert/strip */
+		ctrl |= IGC_CTRL_VME;
+	} else {
+		/* disable VLAN tag insert/strip */
+		ctrl &= ~IGC_CTRL_VME;
+	}
+	wr32(IGC_CTRL, ctrl);
+}
+
+static void igc_restore_vlan(struct igc_adapter *adapter)
+{
+	igc_vlan_mode(adapter->netdev, adapter->netdev->features);
+}
+
 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
 					       const unsigned int size,
 					       int *rx_buffer_pgcnt)
@@ -3248,6 +3309,8 @@ static void igc_configure(struct igc_adapter *adapter)
 	igc_get_hw_control(adapter);
 	igc_set_rx_mode(netdev);
 
+	igc_restore_vlan(adapter);
+
 	igc_setup_tctl(adapter);
 	igc_setup_mrqc(adapter);
 	igc_setup_rctl(adapter);
@@ -4547,6 +4610,9 @@ static int igc_set_features(struct net_device *netdev,
 	netdev_features_t changed = netdev->features ^ features;
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+		igc_vlan_mode(netdev, features);
+
 	/* Add VLAN support */
 	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
 		return 0;
@@ -5873,11 +5939,15 @@ static int igc_probe(struct pci_dev *pdev,
 
 	/* copy netdev features into list of user selectable features */
 	netdev->hw_features |= NETIF_F_NTUPLE;
+	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->hw_features |= netdev->features;
 
 	if (pci_using_dac)
 		netdev->features |= NETIF_F_HIGHDMA;
 
+	netdev->vlan_features |= netdev->features;
+
 	/* MTU range: 68 - 9216 */
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 2491d565d758..0f82990567d9 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -11,6 +11,7 @@
 #define IGC_CTRL_EXT		0x00018  /* Extended Device Control - RW */
 #define IGC_MDIC		0x00020  /* MDI Control - RW */
 #define IGC_CONNSW		0x00034  /* Copper/Fiber switch control - RW */
+#define IGC_VET			0x00038  /* VLAN Ether Type - RW */
 #define IGC_I225_PHPM		0x00E14  /* I225 PHY Power Management */
 #define IGC_GPHY_VERSION	0x0001E  /* I225 gPHY Firmware Version */
 

From 371087aa476ab0ac0072303ac94a3bba2d7b0a1d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2021 16:24:27 -0700
Subject: [PATCH 1001/3292] sock: expose so_timestamp options for mptcp

This exports SO_TIMESTAMP_* function for re-use by MPTCP.

Without this there is too much copy & paste needed to support
this from mptcp setsockopt path.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  1 +
 net/core/sock.c    | 26 +++++++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 0e962d8bc73b..7e0116b1a73f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2743,6 +2743,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
 void sock_def_readable(struct sock *sk);
 
 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
+void sock_set_timestamp(struct sock *sk, int optname, bool valbool);
 void sock_enable_timestamps(struct sock *sk);
 void sock_no_linger(struct sock *sk);
 void sock_set_keepalive(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 958614ea16ed..5b85dd37b562 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -776,6 +776,24 @@ void sock_enable_timestamps(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_enable_timestamps);
 
+void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
+{
+	switch (optname) {
+	case SO_TIMESTAMP_OLD:
+		__sock_set_timestamps(sk, valbool, false, false);
+		break;
+	case SO_TIMESTAMP_NEW:
+		__sock_set_timestamps(sk, valbool, true, false);
+		break;
+	case SO_TIMESTAMPNS_OLD:
+		__sock_set_timestamps(sk, valbool, false, true);
+		break;
+	case SO_TIMESTAMPNS_NEW:
+		__sock_set_timestamps(sk, valbool, true, true);
+		break;
+	}
+}
+
 void sock_set_keepalive(struct sock *sk)
 {
 	lock_sock(sk);
@@ -989,16 +1007,10 @@ set_sndbuf:
 		break;
 
 	case SO_TIMESTAMP_OLD:
-		__sock_set_timestamps(sk, valbool, false, false);
-		break;
 	case SO_TIMESTAMP_NEW:
-		__sock_set_timestamps(sk, valbool, true, false);
-		break;
 	case SO_TIMESTAMPNS_OLD:
-		__sock_set_timestamps(sk, valbool, false, true);
-		break;
 	case SO_TIMESTAMPNS_NEW:
-		__sock_set_timestamps(sk, valbool, true, true);
+		sock_set_timestamp(sk, valbool, optname);
 		break;
 	case SO_TIMESTAMPING_NEW:
 	case SO_TIMESTAMPING_OLD:

From ced122d90f52eb6ff37272e32941845d46ac64c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2021 16:24:28 -0700
Subject: [PATCH 1002/3292] sock: expose so_timestamping options for mptcp

Similar to previous patch: expose SO_TIMESTAMPING helper so we do not
have to copy & paste this into the mptcp core.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  2 ++
 net/core/sock.c    | 71 +++++++++++++++++++++++-----------------------
 2 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 7e0116b1a73f..9b341c2c924f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2744,6 +2744,8 @@ void sock_def_readable(struct sock *sk);
 
 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
 void sock_set_timestamp(struct sock *sk, int optname, bool valbool);
+int sock_set_timestamping(struct sock *sk, int optname, int val);
+
 void sock_enable_timestamps(struct sock *sk);
 void sock_no_linger(struct sock *sk);
 void sock_set_keepalive(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 5b85dd37b562..bd887cb075ce 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -794,6 +794,40 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
 	}
 }
 
+int sock_set_timestamping(struct sock *sk, int optname, int val)
+{
+	if (val & ~SOF_TIMESTAMPING_MASK)
+		return -EINVAL;
+
+	if (val & SOF_TIMESTAMPING_OPT_ID &&
+	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
+		if (sk->sk_protocol == IPPROTO_TCP &&
+		    sk->sk_type == SOCK_STREAM) {
+			if ((1 << sk->sk_state) &
+			    (TCPF_CLOSE | TCPF_LISTEN))
+				return -EINVAL;
+			sk->sk_tskey = tcp_sk(sk)->snd_una;
+		} else {
+			sk->sk_tskey = 0;
+		}
+	}
+
+	if (val & SOF_TIMESTAMPING_OPT_STATS &&
+	    !(val & SOF_TIMESTAMPING_OPT_TSONLY))
+		return -EINVAL;
+
+	sk->sk_tsflags = val;
+	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+
+	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+		sock_enable_timestamp(sk,
+				      SOCK_TIMESTAMPING_RX_SOFTWARE);
+	else
+		sock_disable_timestamp(sk,
+				       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+	return 0;
+}
+
 void sock_set_keepalive(struct sock *sk)
 {
 	lock_sock(sk);
@@ -1012,43 +1046,10 @@ set_sndbuf:
 	case SO_TIMESTAMPNS_NEW:
 		sock_set_timestamp(sk, valbool, optname);
 		break;
+
 	case SO_TIMESTAMPING_NEW:
 	case SO_TIMESTAMPING_OLD:
-		if (val & ~SOF_TIMESTAMPING_MASK) {
-			ret = -EINVAL;
-			break;
-		}
-
-		if (val & SOF_TIMESTAMPING_OPT_ID &&
-		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
-			if (sk->sk_protocol == IPPROTO_TCP &&
-			    sk->sk_type == SOCK_STREAM) {
-				if ((1 << sk->sk_state) &
-				    (TCPF_CLOSE | TCPF_LISTEN)) {
-					ret = -EINVAL;
-					break;
-				}
-				sk->sk_tskey = tcp_sk(sk)->snd_una;
-			} else {
-				sk->sk_tskey = 0;
-			}
-		}
-
-		if (val & SOF_TIMESTAMPING_OPT_STATS &&
-		    !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
-			ret = -EINVAL;
-			break;
-		}
-
-		sk->sk_tsflags = val;
-		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
-
-		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
-			sock_enable_timestamp(sk,
-					      SOCK_TIMESTAMPING_RX_SOFTWARE);
-		else
-			sock_disable_timestamp(sk,
-					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+		ret = sock_set_timestamping(sk, optname, val);
 		break;
 
 	case SO_RCVLOWAT:

From 9061f24bf82ec2e92dd1e7c10b98b680db023d31 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2021 16:24:29 -0700
Subject: [PATCH 1003/3292] mptcp: sockopt: propagate timestamp request to
 subflows

This adds support for TIMESTAMP(NS) setsockopt.

This doesn't make things work yet, because the mptcp receive path
doesn't convert the skb timestamps to cmsgs for userspace consumption.

receive path cmsg support is added ina followup patch.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 50 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index a79798189599..3168ad4a9298 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -140,6 +140,43 @@ static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
 	mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
 }
 
+static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
+{
+	sockptr_t optval = KERNEL_SOCKPTR(&val);
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	int ret;
+
+	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
+			      optval, sizeof(val));
+	if (ret)
+		return ret;
+
+	lock_sock(sk);
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		bool slow = lock_sock_fast(ssk);
+
+		switch (optname) {
+		case SO_TIMESTAMP_OLD:
+		case SO_TIMESTAMP_NEW:
+		case SO_TIMESTAMPNS_OLD:
+		case SO_TIMESTAMPNS_NEW:
+			sock_set_timestamp(sk, optname, !!val);
+			break;
+		case SO_TIMESTAMPING_NEW:
+		case SO_TIMESTAMPING_OLD:
+			sock_set_timestamping(sk, optname, val);
+			break;
+		}
+
+		unlock_sock_fast(ssk, slow);
+	}
+
+	release_sock(sk);
+	return 0;
+}
+
 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 					   sockptr_t optval, unsigned int optlen)
 {
@@ -164,6 +201,13 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 	case SO_INCOMING_CPU:
 		mptcp_so_incoming_cpu(msk, val);
 		return 0;
+	case SO_TIMESTAMP_OLD:
+	case SO_TIMESTAMP_NEW:
+	case SO_TIMESTAMPNS_OLD:
+	case SO_TIMESTAMPNS_NEW:
+	case SO_TIMESTAMPING_OLD:
+	case SO_TIMESTAMPING_NEW:
+		return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
 	}
 
 	return -ENOPROTOOPT;
@@ -251,6 +295,12 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 	case SO_MARK:
 	case SO_INCOMING_CPU:
 	case SO_DEBUG:
+	case SO_TIMESTAMP_OLD:
+	case SO_TIMESTAMP_NEW:
+	case SO_TIMESTAMPNS_OLD:
+	case SO_TIMESTAMPNS_NEW:
+	case SO_TIMESTAMPING_OLD:
+	case SO_TIMESTAMPING_NEW:
 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
 	case SO_LINGER:
 		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);

From 7a009a70ff8adcba3b31dc8922a3671e5a8e1361 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2021 16:24:30 -0700
Subject: [PATCH 1004/3292] mptcp: setsockopt: handle SOL_SOCKET in one place
 only

Move the pre-check to the function that handles all SOL_SOCKET values.

At this point there is complete coverage for all values that were
accepted by the pre-check.

BUSYPOLL functions are accepted but will not have any functionality
yet until its clear how the expected mptcp behaviour should look like.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 99 +++++++++++++--------------------------------
 1 file changed, 29 insertions(+), 70 deletions(-)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 3168ad4a9298..092d1f635d27 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -304,6 +304,14 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
 	case SO_LINGER:
 		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
+	case SO_RCVLOWAT:
+	case SO_RCVTIMEO_OLD:
+	case SO_RCVTIMEO_NEW:
+	case SO_BUSY_POLL:
+	case SO_PREFER_BUSY_POLL:
+	case SO_BUSY_POLL_BUDGET:
+		/* No need to copy: only relevant for msk */
+		return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
 	case SO_NO_CHECK:
 	case SO_DONTROUTE:
 	case SO_BROADCAST:
@@ -317,7 +325,24 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 		return 0;
 	}
 
-	return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
+	/* SO_OOBINLINE is not supported, let's avoid the related mess
+	 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
+	 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
+	 * we must be careful with subflows
+	 *
+	 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
+	 * explicitly the sk_protocol field
+	 *
+	 * SO_PEEK_OFF is unsupported, as it is for plain TCP
+	 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
+	 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
+	 * but likely needs careful design
+	 *
+	 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
+	 * SO_TXTIME is currently unsupported
+	 */
+
+	return -EOPNOTSUPP;
 }
 
 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
@@ -349,72 +374,6 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
 
 static bool mptcp_supported_sockopt(int level, int optname)
 {
-	if (level == SOL_SOCKET) {
-		switch (optname) {
-		case SO_DEBUG:
-		case SO_REUSEPORT:
-		case SO_REUSEADDR:
-
-		/* the following ones need a better implementation,
-		 * but are quite common we want to preserve them
-		 */
-		case SO_BINDTODEVICE:
-		case SO_SNDBUF:
-		case SO_SNDBUFFORCE:
-		case SO_RCVBUF:
-		case SO_RCVBUFFORCE:
-		case SO_KEEPALIVE:
-		case SO_PRIORITY:
-		case SO_LINGER:
-		case SO_TIMESTAMP_OLD:
-		case SO_TIMESTAMP_NEW:
-		case SO_TIMESTAMPNS_OLD:
-		case SO_TIMESTAMPNS_NEW:
-		case SO_TIMESTAMPING_OLD:
-		case SO_TIMESTAMPING_NEW:
-		case SO_RCVLOWAT:
-		case SO_RCVTIMEO_OLD:
-		case SO_RCVTIMEO_NEW:
-		case SO_SNDTIMEO_OLD:
-		case SO_SNDTIMEO_NEW:
-		case SO_MARK:
-		case SO_INCOMING_CPU:
-		case SO_BINDTOIFINDEX:
-		case SO_BUSY_POLL:
-		case SO_PREFER_BUSY_POLL:
-		case SO_BUSY_POLL_BUDGET:
-
-		/* next ones are no-op for plain TCP */
-		case SO_NO_CHECK:
-		case SO_DONTROUTE:
-		case SO_BROADCAST:
-		case SO_BSDCOMPAT:
-		case SO_PASSCRED:
-		case SO_PASSSEC:
-		case SO_RXQ_OVFL:
-		case SO_WIFI_STATUS:
-		case SO_NOFCS:
-		case SO_SELECT_ERR_QUEUE:
-			return true;
-		}
-
-		/* SO_OOBINLINE is not supported, let's avoid the related mess */
-		/* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
-		 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
-		 * we must be careful with subflows
-		 */
-		/* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
-		 * explicitly the sk_protocol field
-		 */
-		/* SO_PEEK_OFF is unsupported, as it is for plain TCP */
-		/* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */
-		/* SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
-		 * but likely needs careful design
-		 */
-		/* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */
-		/* SO_TXTIME is currently unsupported */
-		return false;
-	}
 	if (level == SOL_IP) {
 		switch (optname) {
 		/* should work fine */
@@ -624,12 +583,12 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
 
 	pr_debug("msk=%p", msk);
 
-	if (!mptcp_supported_sockopt(level, optname))
-		return -ENOPROTOOPT;
-
 	if (level == SOL_SOCKET)
 		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
 
+	if (!mptcp_supported_sockopt(level, optname))
+		return -ENOPROTOOPT;
+
 	/* @@ the meaning of setsockopt() when the socket is connected and
 	 * there are multiple subflows is not yet defined. It is up to the
 	 * MPTCP-level socket to configure the subflows until the subflow

From 892bfd3ded0ef0f895ed6356d0f85e2009421747 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2021 16:24:31 -0700
Subject: [PATCH 1005/3292] tcp: export timestamp helpers for mptcp

MPTCP is builtin, so no need to add EXPORT_SYMBOL()s.

It will be used to support SO_TIMESTAMP(NS) ancillary
messages in the mptcp receive path.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h |  4 ++++
 net/ipv4/tcp.c    | 10 ++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d05193cb0d99..e668f1bf780d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -412,6 +412,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		int flags, int *addr_len);
 int tcp_set_rcvlowat(struct sock *sk, int val);
 int tcp_set_window_clamp(struct sock *sk, int val);
+void tcp_update_recv_tstamps(struct sk_buff *skb,
+			     struct scm_timestamping_internal *tss);
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+			struct scm_timestamping_internal *tss);
 void tcp_data_ready(struct sock *sk);
 #ifdef CONFIG_MMU
 int tcp_mmap(struct file *file, struct socket *sock,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1c1f9e3de72..0e3f0e0e5b51 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1738,8 +1738,8 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_set_rcvlowat);
 
-static void tcp_update_recv_tstamps(struct sk_buff *skb,
-				    struct scm_timestamping_internal *tss)
+void tcp_update_recv_tstamps(struct sk_buff *skb,
+			     struct scm_timestamping_internal *tss)
 {
 	if (skb->tstamp)
 		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
@@ -2024,8 +2024,6 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
 }
 
 #define TCP_VALID_ZC_MSG_FLAGS   (TCP_CMSG_TS)
-static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
-			       struct scm_timestamping_internal *tss);
 static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
 				      struct tcp_zerocopy_receive *zc,
 				      struct scm_timestamping_internal *tss)
@@ -2197,8 +2195,8 @@ out:
 #endif
 
 /* Similar to __sock_recv_timestamp, but does not require an skb */
-static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
-			       struct scm_timestamping_internal *tss)
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+			struct scm_timestamping_internal *tss)
 {
 	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 	bool has_timestamping = false;

From b7f653b297a4b2ed16a11883044077e6bf3e3481 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2021 16:24:32 -0700
Subject: [PATCH 1006/3292] mptcp: receive path cmsg support

This adds support for SO_TIMESTAMP(NS).  Timestamps are passed to
userspace in the same way as for plain tcp sockets.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2bc199549a88..3897d35fd9df 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -39,10 +39,15 @@ struct mptcp_skb_cb {
 	u64 map_seq;
 	u64 end_seq;
 	u32 offset;
+	u8  has_rxtstamp:1;
 };
 
 #define MPTCP_SKB_CB(__skb)	((struct mptcp_skb_cb *)&((__skb)->cb[0]))
 
+enum {
+	MPTCP_CMSG_TS = BIT(0),
+};
+
 static struct percpu_counter mptcp_sockets_allocated;
 
 static void __mptcp_destroy_sock(struct sock *sk);
@@ -272,6 +277,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 	struct sock *sk = (struct sock *)msk;
 	struct sk_buff *tail;
+	bool has_rxtstamp;
 
 	__skb_unlink(skb, &ssk->sk_receive_queue);
 
@@ -287,6 +293,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 			goto drop;
 	}
 
+	has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+
 	/* the skb map_seq accounts for the skb offset:
 	 * mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq
 	 * value
@@ -294,6 +302,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 	MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow);
 	MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
 	MPTCP_SKB_CB(skb)->offset = offset;
+	MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
 
 	if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
 		/* in sequence */
@@ -1757,7 +1766,9 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
 
 static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
 				struct msghdr *msg,
-				size_t len, int flags)
+				size_t len, int flags,
+				struct scm_timestamping_internal *tss,
+				int *cmsg_flags)
 {
 	struct sk_buff *skb, *tmp;
 	int copied = 0;
@@ -1777,6 +1788,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
 			}
 		}
 
+		if (MPTCP_SKB_CB(skb)->has_rxtstamp) {
+			tcp_update_recv_tstamps(skb, tss);
+			*cmsg_flags |= MPTCP_CMSG_TS;
+		}
+
 		copied += count;
 
 		if (count < data_len) {
@@ -1964,7 +1980,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			 int nonblock, int flags, int *addr_len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
-	int copied = 0;
+	struct scm_timestamping_internal tss;
+	int copied = 0, cmsg_flags = 0;
 	int target;
 	long timeo;
 
@@ -1986,7 +2003,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	while (copied < len) {
 		int bytes_read;
 
-		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags);
+		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
 		if (unlikely(bytes_read < 0)) {
 			if (!copied)
 				copied = bytes_read;
@@ -2067,6 +2084,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		set_bit(MPTCP_DATA_READY, &msk->flags);
 	}
 out_err:
+	if (cmsg_flags && copied >= 0) {
+		if (cmsg_flags & MPTCP_CMSG_TS)
+			tcp_recv_timestamp(msg, sk, &tss);
+	}
+
 	pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
 		 msk, test_bit(MPTCP_DATA_READY, &msk->flags),
 		 skb_queue_empty_lockless(&sk->sk_receive_queue), copied);

From 5e6af0a729b669b1da6f9600867e2e4910505a6d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2021 16:24:33 -0700
Subject: [PATCH 1007/3292] selftests: mptcp_connect: add SO_TIMESTAMPNS cmsg
 support

This extends the existing setsockopt test case to also check for cmsg
timestamps.

mptcp_connect will abort/fail if the setockopt was passed but the
timestamp cmsg isn't present after successful recvmsg().

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/mptcp/mptcp_connect.c       | 125 +++++++++++++++++-
 .../selftests/net/mptcp/mptcp_sockopt.sh      |   4 +-
 2 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d88e1fdfb147..89c4753c2760 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -6,6 +6,7 @@
 #include <limits.h>
 #include <fcntl.h>
 #include <string.h>
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -25,6 +26,7 @@
 #include <netinet/in.h>
 
 #include <linux/tcp.h>
+#include <linux/time_types.h>
 
 extern int optind;
 
@@ -66,6 +68,13 @@ static unsigned int cfg_do_w;
 static int cfg_wait;
 static uint32_t cfg_mark;
 
+struct cfg_cmsg_types {
+	unsigned int cmsg_enabled:1;
+	unsigned int timestampns:1;
+};
+
+static struct cfg_cmsg_types cfg_cmsg_types;
+
 static void die_usage(void)
 {
 	fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
@@ -80,11 +89,22 @@ static void die_usage(void)
 	fprintf(stderr, "\t-M mark -- set socket packet mark\n");
 	fprintf(stderr, "\t-u -- check mptcp ulp\n");
 	fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
+	fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
 	fprintf(stderr,
 		"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
 	exit(1);
 }
 
+static void xerror(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
 static void handle_signal(int nr)
 {
 	quit = true;
@@ -338,6 +358,58 @@ static size_t do_write(const int fd, char *buf, const size_t len)
 	return offset;
 }
 
+static void process_cmsg(struct msghdr *msgh)
+{
+	struct __kernel_timespec ts;
+	bool ts_found = false;
+	struct cmsghdr *cmsg;
+
+	for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+		if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) {
+			memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts));
+			ts_found = true;
+			continue;
+		}
+	}
+
+	if (cfg_cmsg_types.timestampns) {
+		if (!ts_found)
+			xerror("TIMESTAMPNS not present\n");
+	}
+}
+
+static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
+{
+	char msg_buf[8192];
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = len,
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = msg_buf,
+		.msg_controllen = sizeof(msg_buf),
+	};
+	int flags = 0;
+	int ret = recvmsg(fd, &msg, flags);
+
+	if (ret <= 0)
+		return ret;
+
+	if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
+		xerror("got %lu bytes of cmsg data, expected 0\n",
+		       (unsigned long)msg.msg_controllen);
+
+	if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled)
+		xerror("%s\n", "got no cmsg data");
+
+	if (msg.msg_controllen)
+		process_cmsg(&msg);
+
+	return ret;
+}
+
 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
 {
 	int ret = 0;
@@ -357,6 +429,8 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
 	} else if (cfg_peek == CFG_AFTER_PEEK) {
 		ret = recv(fd, buf, cap, MSG_PEEK);
 		ret = (ret < 0) ? ret : read(fd, buf, cap);
+	} else if (cfg_cmsg_types.cmsg_enabled) {
+		ret = do_recvmsg_cmsg(fd, buf, cap);
 	} else {
 		ret = read(fd, buf, cap);
 	}
@@ -786,6 +860,48 @@ static void init_rng(void)
 	srand(foo);
 }
 
+static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
+{
+	int err;
+
+	err = setsockopt(fd, level, optname, optval, optlen);
+	if (err) {
+		perror("setsockopt");
+		exit(1);
+	}
+}
+
+static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
+{
+	static const unsigned int on = 1;
+
+	if (cmsg->timestampns)
+		xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
+}
+
+static void parse_cmsg_types(const char *type)
+{
+	char *next = strchr(type, ',');
+	unsigned int len = 0;
+
+	cfg_cmsg_types.cmsg_enabled = 1;
+
+	if (next) {
+		parse_cmsg_types(next + 1);
+		len = next - type;
+	} else {
+		len = strlen(type);
+	}
+
+	if (strncmp(type, "TIMESTAMPNS", len) == 0) {
+		cfg_cmsg_types.timestampns = 1;
+		return;
+	}
+
+	fprintf(stderr, "Unrecognized cmsg option %s\n", type);
+	exit(1);
+}
+
 int main_loop(void)
 {
 	int fd;
@@ -801,6 +917,8 @@ int main_loop(void)
 		set_rcvbuf(fd, cfg_rcvbuf);
 	if (cfg_sndbuf)
 		set_sndbuf(fd, cfg_sndbuf);
+	if (cfg_cmsg_types.cmsg_enabled)
+		apply_cmsg_types(fd, &cfg_cmsg_types);
 
 	return copyfd_io(0, fd, 1);
 }
@@ -887,7 +1005,7 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) {
+	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) {
 		switch (c) {
 		case 'j':
 			cfg_join = true;
@@ -943,6 +1061,9 @@ static void parse_opts(int argc, char **argv)
 		case 'P':
 			cfg_peek = parse_peek(optarg);
 			break;
+		case 'c':
+			parse_cmsg_types(optarg);
+			break;
 		}
 	}
 
@@ -976,6 +1097,8 @@ int main(int argc, char *argv[])
 			set_sndbuf(fd, cfg_sndbuf);
 		if (cfg_mark)
 			set_mark(fd, cfg_mark);
+		if (cfg_cmsg_types.cmsg_enabled)
+			apply_cmsg_types(fd, &cfg_cmsg_types);
 
 		return main_loop_s(fd);
 	}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 2fa13946ac04..1579e471a5e7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -178,7 +178,7 @@ do_transfer()
 
 	timeout ${timeout_test} \
 		ip netns exec ${listener_ns} \
-			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
+			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \
 				${local_addr} < "$sin" > "$sout" &
 	spid=$!
 
@@ -186,7 +186,7 @@ do_transfer()
 
 	timeout ${timeout_test} \
 		ip netns exec ${connector_ns} \
-			$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
+			$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \
 				$connect_addr < "$cin" > "$cout" &
 
 	cpid=$!

From 0efea3c649f0a50d473a4afe2d17c2bbcee639e1 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Fri, 4 Jun 2021 09:47:02 +0800
Subject: [PATCH 1008/3292] tipc: Return the correct errno code

When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index c44b4bfaaee6..5b6181277cc5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -912,7 +912,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
 	skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
 			      dnode, l->addr, dport, 0, 0);
 	if (!skb)
-		return -ENOBUFS;
+		return -ENOMEM;
 	msg_set_dest_droppable(buf_msg(skb), true);
 	TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
 	skb_queue_tail(&l->wakeupq, skb);
@@ -1030,7 +1030,7 @@ void tipc_link_reset(struct tipc_link *l)
  *
  * Consumes the buffer chain.
  * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM
  */
 int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 		   struct sk_buff_head *xmitq)
@@ -1088,7 +1088,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 			if (!_skb) {
 				kfree_skb(skb);
 				__skb_queue_purge(list);
-				return -ENOBUFS;
+				return -ENOMEM;
 			}
 			__skb_queue_tail(transmq, skb);
 			tipc_link_set_skb_retransmit_time(skb, l);

From 1c906e369815c9f35628c492a13e33e6ccbe0e4b Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 4 Jun 2021 15:32:07 +0800
Subject: [PATCH 1009/3292] net: hdlc_x25: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_x25.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index ba8c36c7ea91..86b88f2a7028 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -70,22 +70,16 @@ static void x25_connect_disconnect(struct net_device *dev, int reason, int code)
 	tasklet_schedule(&x25st->rx_tasklet);
 }
 
-
-
 static void x25_connected(struct net_device *dev, int reason)
 {
 	x25_connect_disconnect(dev, reason, X25_IFACE_CONNECT);
 }
 
-
-
 static void x25_disconnected(struct net_device *dev, int reason)
 {
 	x25_connect_disconnect(dev, reason, X25_IFACE_DISCONNECT);
 }
 
-
-
 static int x25_data_indication(struct net_device *dev, struct sk_buff *skb)
 {
 	struct x25_state *x25st = state(dev_to_hdlc(dev));
@@ -108,8 +102,6 @@ static int x25_data_indication(struct net_device *dev, struct sk_buff *skb)
 	return NET_RX_SUCCESS;
 }
 
-
-
 static void x25_data_transmit(struct net_device *dev, struct sk_buff *skb)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -123,8 +115,6 @@ static void x25_data_transmit(struct net_device *dev, struct sk_buff *skb)
 	hdlc->xmit(skb, dev); /* Ignore return value :-( */
 }
 
-
-
 static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -185,8 +175,6 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-
-
 static int x25_open(struct net_device *dev)
 {
 	static const struct lapb_register_struct cb = {
@@ -232,8 +220,6 @@ static int x25_open(struct net_device *dev)
 	return 0;
 }
 
-
-
 static void x25_close(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
@@ -247,8 +233,6 @@ static void x25_close(struct net_device *dev)
 	tasklet_kill(&x25st->rx_tasklet);
 }
 
-
-
 static int x25_rx(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -279,7 +263,6 @@ static int x25_rx(struct sk_buff *skb)
 	return NET_RX_DROP;
 }
 
-
 static struct hdlc_proto proto = {
 	.open		= x25_open,
 	.close		= x25_close,
@@ -289,7 +272,6 @@ static struct hdlc_proto proto = {
 	.module		= THIS_MODULE,
 };
 
-
 static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 {
 	x25_hdlc_proto __user *x25_s = ifr->ifr_settings.ifs_ifsu.x25;
@@ -380,21 +362,17 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 	return -EINVAL;
 }
 
-
 static int __init mod_init(void)
 {
 	register_hdlc_protocol(&proto);
 	return 0;
 }
 
-
-
 static void __exit mod_exit(void)
 {
 	unregister_hdlc_protocol(&proto);
 }
 
-
 module_init(mod_init);
 module_exit(mod_exit);
 

From 579ebffe797368b47b7e677fba2ecfc108ae154b Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 4 Jun 2021 15:32:08 +0800
Subject: [PATCH 1010/3292] net: hdlc_x25: remove unnecessary out of memory
 message

This patch removes unnecessary out of memory message,
to fix the following checkpatch.pl warning:
"WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_x25.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index 86b88f2a7028..525eb424fade 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -56,10 +56,8 @@ static void x25_connect_disconnect(struct net_device *dev, int reason, int code)
 	unsigned char *ptr;
 
 	skb = __dev_alloc_skb(1, GFP_ATOMIC | __GFP_NOMEMALLOC);
-	if (!skb) {
-		netdev_err(dev, "out of memory\n");
+	if (!skb)
 		return;
-	}
 
 	ptr = skb_put(skb, 1);
 	*ptr = code;

From ec1f377412444432e3585ff65d3b563d1c148738 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 4 Jun 2021 15:32:09 +0800
Subject: [PATCH 1011/3292] net: hdlc_x25: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_x25.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index 525eb424fade..24fdb6c8b840 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -137,13 +137,15 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev)
 	switch (skb->data[0]) {
 	case X25_IFACE_DATA:	/* Data to be transmitted */
 		skb_pull(skb, 1);
-		if ((result = lapb_data_request(dev, skb)) != LAPB_OK)
+		result = lapb_data_request(dev, skb);
+		if (result != LAPB_OK)
 			dev_kfree_skb(skb);
 		spin_unlock_bh(&x25st->up_lock);
 		return NETDEV_TX_OK;
 
 	case X25_IFACE_CONNECT:
-		if ((result = lapb_connect_request(dev))!= LAPB_OK) {
+		result = lapb_connect_request(dev);
+		if (result != LAPB_OK) {
 			if (result == LAPB_CONNECTED)
 				/* Send connect confirm. msg to level 3 */
 				x25_connected(dev, 0);
@@ -154,7 +156,8 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev)
 		break;
 
 	case X25_IFACE_DISCONNECT:
-		if ((result = lapb_disconnect_request(dev)) != LAPB_OK) {
+		result = lapb_disconnect_request(dev);
+		if (result != LAPB_OK) {
 			if (result == LAPB_NOTCONNECTED)
 				/* Send disconnect confirm. msg to level 3 */
 				x25_disconnected(dev, 0);
@@ -237,7 +240,8 @@ static int x25_rx(struct sk_buff *skb)
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	struct x25_state *x25st = state(hdlc);
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb) {
 		dev->stats.rx_dropped++;
 		return NET_RX_DROP;
 	}
@@ -333,8 +337,9 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 		if (result)
 			return result;
 
-		if ((result = attach_hdlc_protocol(dev, &proto,
-						   sizeof(struct x25_state))))
+		result = attach_hdlc_protocol(dev, &proto,
+					      sizeof(struct x25_state));
+		if (result)
 			return result;
 
 		memcpy(&state(hdlc)->settings, &new_settings, size);

From 5de446075c8e8c218302f8f819db88ccf0584aed Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 4 Jun 2021 15:32:10 +0800
Subject: [PATCH 1012/3292] net: hdlc_x25: add some required spaces

Add spaces required around that '='.
Add space required after that ','.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_x25.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index 24fdb6c8b840..4e771f789e10 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -333,7 +333,8 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 				return -EINVAL;
 		}
 
-		result=hdlc->attach(dev, ENCODING_NRZ,PARITY_CRC16_PR1_CCITT);
+		result = hdlc->attach(dev, ENCODING_NRZ,
+				      PARITY_CRC16_PR1_CCITT);
 		if (result)
 			return result;
 

From 792b070fca8fc945c2be7f9dd965d79c238d81c7 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 4 Jun 2021 15:32:11 +0800
Subject: [PATCH 1013/3292] net: hdlc_x25: fix the code issue about
 "if..else.."

According to the chackpatch.pl, else should follow close brace '}'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_x25.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index 4e771f789e10..bd4fad3e6ff1 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -310,8 +310,7 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 			new_settings.t1 = 3;
 			new_settings.t2 = 1;
 			new_settings.n2 = 10;
-		}
-		else {
+		} else {
 			if (copy_from_user(&new_settings, x25_s, size))
 				return -EFAULT;
 

From 316fe3cc7de365f1c6164bbab8ef724c9ea1c6d3 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 4 Jun 2021 15:32:12 +0800
Subject: [PATCH 1014/3292] net: hdlc_x25: fix the alignment issue

Alignment should match open parenthesis.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_x25.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index bd4fad3e6ff1..d2bf72bf3bd7 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -315,20 +315,20 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 				return -EFAULT;
 
 			if ((new_settings.dce != 0 &&
-			new_settings.dce != 1) ||
-			(new_settings.modulo != 8 &&
-			new_settings.modulo != 128) ||
-			new_settings.window < 1 ||
-			(new_settings.modulo == 8 &&
-			new_settings.window > 7) ||
-			(new_settings.modulo == 128 &&
-			new_settings.window > 127) ||
-			new_settings.t1 < 1 ||
-			new_settings.t1 > 255 ||
-			new_settings.t2 < 1 ||
-			new_settings.t2 > 255 ||
-			new_settings.n2 < 1 ||
-			new_settings.n2 > 255)
+			     new_settings.dce != 1) ||
+			    (new_settings.modulo != 8 &&
+			     new_settings.modulo != 128) ||
+			    new_settings.window < 1 ||
+			    (new_settings.modulo == 8 &&
+			     new_settings.window > 7) ||
+			    (new_settings.modulo == 128 &&
+			     new_settings.window > 127) ||
+			    new_settings.t1 < 1 ||
+			    new_settings.t1 > 255 ||
+			    new_settings.t2 < 1 ||
+			    new_settings.t2 > 255 ||
+			    new_settings.n2 < 1 ||
+			    new_settings.n2 > 255)
 				return -EINVAL;
 		}
 

From ecb0605810f395961ea70c34e8946198bacd2f2d Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 4 Jun 2021 15:42:12 +0200
Subject: [PATCH 1015/3292] net: enetc: use get/put_unaligned helpers for MAC
 address handling

The supplied buffer for the MAC address might not be aligned. Thus
doing a 32bit (or 16bit) access could be on an unaligned address. For
now, enetc is only used on aarch64 which can do unaligned accesses, thus
there is no error. In any case, be correct and use the get/put_unaligned
helpers.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 31274325159a..c84f6c226743 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2017-2019 NXP */
 
+#include <asm/unaligned.h>
 #include <linux/mdio.h>
 #include <linux/module.h>
 #include <linux/fsl/enetc_mdio.h>
@@ -17,15 +18,15 @@ static void enetc_pf_get_primary_mac_addr(struct enetc_hw *hw, int si, u8 *addr)
 	u32 upper = __raw_readl(hw->port + ENETC_PSIPMAR0(si));
 	u16 lower = __raw_readw(hw->port + ENETC_PSIPMAR1(si));
 
-	*(u32 *)addr = upper;
-	*(u16 *)(addr + 4) = lower;
+	put_unaligned_le32(upper, addr);
+	put_unaligned_le16(lower, addr + 4);
 }
 
 static void enetc_pf_set_primary_mac_addr(struct enetc_hw *hw, int si,
 					  const u8 *addr)
 {
-	u32 upper = *(const u32 *)addr;
-	u16 lower = *(const u16 *)(addr + 4);
+	u32 upper = get_unaligned_le32(addr);
+	u16 lower = get_unaligned_le16(addr + 4);
 
 	__raw_writel(upper, hw->port + ENETC_PSIPMAR0(si));
 	__raw_writew(lower, hw->port + ENETC_PSIPMAR1(si));

From fcb34635854a5a5814227628867ea914a9805384 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Fri, 4 Jun 2021 12:37:47 +0200
Subject: [PATCH 1016/3292] net: bridge: mrp: Update ring transitions.

According to the standard IEC 62439-2, the number of transitions needs
to be counted for each transition 'between' ring state open and ring
state closed and not from open state to closed state.

Therefore fix this for both ring and interconnect ring.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mrp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index cd2b1e424e54..f7012b7d7ce4 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -627,8 +627,7 @@ int br_mrp_set_ring_state(struct net_bridge *br,
 	if (!mrp)
 		return -EINVAL;
 
-	if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED &&
-	    state->ring_state != BR_MRP_RING_STATE_CLOSED)
+	if (mrp->ring_state != state->ring_state)
 		mrp->ring_transitions++;
 
 	mrp->ring_state = state->ring_state;
@@ -715,8 +714,7 @@ int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state)
 	if (!mrp)
 		return -EINVAL;
 
-	if (mrp->in_state == BR_MRP_IN_STATE_CLOSED &&
-	    state->in_state != BR_MRP_IN_STATE_CLOSED)
+	if (mrp->in_state != state->in_state)
 		mrp->in_transitions++;
 
 	mrp->in_state = state->in_state;

From d5a73dcf09010f97ebdff197991f77eb9992f09e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Fri, 4 Jun 2021 13:36:33 +0200
Subject: [PATCH 1017/3292] net:cxgb3: fix incorrect work cancellation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In my last changes in commit 5e0b8928927f I introduced a copy-paste bug,
leading to cancel twice qresume_task work for OFLD queue, and never the
one for CTRL queue. This patch cancels correctly both works.

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/sge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 115c03b0feb6..cb5c79c43bc9 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -3310,7 +3310,7 @@ void t3_sge_stop(struct adapter *adap)
 		struct sge_qset *qs = &adap->sge.qs[i];
 
 		cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);
-		cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);
+		cancel_work_sync(&qs->txq[TXQ_CTRL].qresume_task);
 	}
 }
 

From 1a42624aecba438f1d114430a14b640cdfa51c87 Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Fri, 4 Jun 2021 11:29:22 -0500
Subject: [PATCH 1018/3292] net: dsa: xrs700x: allow HSR/PRP supervision dupes
 for node_table

Add an inbound policy filter which matches the HSR/PRP supervision
MAC range and forwards to the CPU port without discarding duplicates.
This is required to correctly populate time_in[A] and time_in[B] in the
HSR/PRP node_table. Leave the policy disabled by default and
enable/disable it when joining/leaving hsr.

Signed-off-by: George McCollister <george.mccollister@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/xrs700x/xrs700x.c | 67 +++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c
index fde6e99274b6..a79066174a77 100644
--- a/drivers/net/dsa/xrs700x/xrs700x.c
+++ b/drivers/net/dsa/xrs700x/xrs700x.c
@@ -79,6 +79,9 @@ static const struct xrs700x_mib xrs700x_mibs[] = {
 	XRS700X_MIB(XRS_EARLY_DROP_L, "early_drop", tx_dropped),
 };
 
+static const u8 eth_hsrsup_addr[ETH_ALEN] = {
+	0x01, 0x15, 0x4e, 0x00, 0x01, 0x00};
+
 static void xrs700x_get_strings(struct dsa_switch *ds, int port,
 				u32 stringset, u8 *data)
 {
@@ -329,6 +332,50 @@ static int xrs700x_port_add_bpdu_ipf(struct dsa_switch *ds, int port)
 	return 0;
 }
 
+/* Add an inbound policy filter which matches the HSR/PRP supervision MAC
+ * range and forwards to the CPU port without discarding duplicates.
+ * This is required to correctly populate the HSR/PRP node_table.
+ * Leave the policy disabled, it will be enabled as needed.
+ */
+static int xrs700x_port_add_hsrsup_ipf(struct dsa_switch *ds, int port)
+{
+	struct xrs700x *priv = ds->priv;
+	unsigned int val = 0;
+	int i = 0;
+	int ret;
+
+	/* Compare 40 bits of the destination MAC address. */
+	ret = regmap_write(priv->regmap, XRS_ETH_ADDR_CFG(port, 1), 40 << 2);
+	if (ret)
+		return ret;
+
+	/* match HSR/PRP supervision destination 01:15:4e:00:01:XX */
+	for (i = 0; i < sizeof(eth_hsrsup_addr); i += 2) {
+		ret = regmap_write(priv->regmap, XRS_ETH_ADDR_0(port, 1) + i,
+				   eth_hsrsup_addr[i] |
+				   (eth_hsrsup_addr[i + 1] << 8));
+		if (ret)
+			return ret;
+	}
+
+	/* Mirror HSR/PRP supervision to CPU port */
+	for (i = 0; i < ds->num_ports; i++) {
+		if (dsa_is_cpu_port(ds, i))
+			val |= BIT(i);
+	}
+
+	ret = regmap_write(priv->regmap, XRS_ETH_ADDR_FWD_MIRROR(port, 1), val);
+	if (ret)
+		return ret;
+
+	/* Allow must be set prevent duplicate discard */
+	ret = regmap_write(priv->regmap, XRS_ETH_ADDR_FWD_ALLOW(port, 1), val);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int xrs700x_port_setup(struct dsa_switch *ds, int port)
 {
 	bool cpu_port = dsa_is_cpu_port(ds, port);
@@ -358,6 +405,10 @@ static int xrs700x_port_setup(struct dsa_switch *ds, int port)
 		ret = xrs700x_port_add_bpdu_ipf(ds, port);
 		if (ret)
 			return ret;
+
+		ret = xrs700x_port_add_hsrsup_ipf(ds, port);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -565,6 +616,14 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port,
 			    XRS_PORT_FORWARDING);
 	regmap_fields_write(priv->ps_forward, port, XRS_PORT_FORWARDING);
 
+	/* Enable inbound policy added by xrs700x_port_add_hsrsup_ipf()
+	 * which allows HSR/PRP supervision forwarding to the CPU port without
+	 * discarding duplicates.
+	 */
+	regmap_update_bits(priv->regmap,
+			   XRS_ETH_ADDR_CFG(partner->index, 1), 1, 1);
+	regmap_update_bits(priv->regmap, XRS_ETH_ADDR_CFG(port, 1), 1, 1);
+
 	hsr_pair[0] = port;
 	hsr_pair[1] = partner->index;
 	for (i = 0; i < ARRAY_SIZE(hsr_pair); i++) {
@@ -611,6 +670,14 @@ static int xrs700x_hsr_leave(struct dsa_switch *ds, int port,
 			    XRS_PORT_FORWARDING);
 	regmap_fields_write(priv->ps_forward, port, XRS_PORT_FORWARDING);
 
+	/* Disable inbound policy added by xrs700x_port_add_hsrsup_ipf()
+	 * which allows HSR/PRP supervision forwarding to the CPU port without
+	 * discarding duplicates.
+	 */
+	regmap_update_bits(priv->regmap,
+			   XRS_ETH_ADDR_CFG(partner->index, 1), 1, 0);
+	regmap_update_bits(priv->regmap, XRS_ETH_ADDR_CFG(port, 1), 1, 0);
+
 	hsr_pair[0] = port;
 	hsr_pair[1] = partner->index;
 	for (i = 0; i < ARRAY_SIZE(hsr_pair); i++) {

From a4d7e8ae4a541557d7a2c815835b786c18c3613c Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Wed, 2 Jun 2021 14:36:44 -0700
Subject: [PATCH 1019/3292] Drivers: hv: Move Hyper-V extended capability check
 to arch neutral code

The extended capability query code is currently under arch/x86, but it
is architecture neutral, and is used by arch neutral code in the Hyper-V
balloon driver. Hence the balloon driver fails to build on other
architectures.

Fix by moving the ext cap code out from arch/x86.  Because it is also
called from built-in architecture specific code, it can't be in a module,
so the Makefile treats as built-in even when CONFIG_HYPERV is "m".  Also
drivers/Makefile is tweaked because this is the first occurrence of a
Hyper-V file that is built-in even when CONFIG_HYPERV is "m".

While here, update the hypercall status check to use the new helper
function instead of open coding. No functional change.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Link: https://lore.kernel.org/r/1622669804-2016-1-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/hyperv/hv_init.c | 47 ----------------------------
 drivers/Makefile          |  2 +-
 drivers/hv/Makefile       |  3 ++
 drivers/hv/hv_common.c    | 66 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+), 48 deletions(-)
 create mode 100644 drivers/hv/hv_common.c

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index bb0ae4b5c00f..6952e219cba3 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -614,50 +614,3 @@ bool hv_is_isolation_supported(void)
 	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
 }
 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
-
-/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
-bool hv_query_ext_cap(u64 cap_query)
-{
-	/*
-	 * The address of the 'hv_extended_cap' variable will be used as an
-	 * output parameter to the hypercall below and so it should be
-	 * compatible with 'virt_to_phys'. Which means, it's address should be
-	 * directly mapped. Use 'static' to keep it compatible; stack variables
-	 * can be virtually mapped, making them imcompatible with
-	 * 'virt_to_phys'.
-	 * Hypercall input/output addresses should also be 8-byte aligned.
-	 */
-	static u64 hv_extended_cap __aligned(8);
-	static bool hv_extended_cap_queried;
-	u64 status;
-
-	/*
-	 * Querying extended capabilities is an extended hypercall. Check if the
-	 * partition supports extended hypercall, first.
-	 */
-	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
-		return false;
-
-	/* Extended capabilities do not change at runtime. */
-	if (hv_extended_cap_queried)
-		return hv_extended_cap & cap_query;
-
-	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
-				 &hv_extended_cap);
-
-	/*
-	 * The query extended capabilities hypercall should not fail under
-	 * any normal circumstances. Avoid repeatedly making the hypercall, on
-	 * error.
-	 */
-	hv_extended_cap_queried = true;
-	status &= HV_HYPERCALL_RESULT_MASK;
-	if (status != HV_STATUS_SUCCESS) {
-		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
-		       status);
-		return false;
-	}
-
-	return hv_extended_cap & cap_query;
-}
-EXPORT_SYMBOL_GPL(hv_query_ext_cap);
diff --git a/drivers/Makefile b/drivers/Makefile
index 5a6d613e868d..1c2e1acbd098 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -161,7 +161,7 @@ obj-$(CONFIG_SOUNDWIRE)		+= soundwire/
 
 # Virtualization drivers
 obj-$(CONFIG_VIRT_DRIVERS)	+= virt/
-obj-$(CONFIG_HYPERV)		+= hv/
+obj-$(subst m,y,$(CONFIG_HYPERV))	+= hv/
 
 obj-$(CONFIG_PM_DEVFREQ)	+= devfreq/
 obj-$(CONFIG_EXTCON)		+= extcon/
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 94daf8240c95..d76df5c8c2a9 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -11,3 +11,6 @@ hv_vmbus-y := vmbus_drv.o \
 		 channel_mgmt.o ring_buffer.o hv_trace.o
 hv_vmbus-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o
 hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o
+
+# Code that must be built-in
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
new file mode 100644
index 000000000000..f0053c786891
--- /dev/null
+++ b/drivers/hv/hv_common.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Architecture neutral utility routines for interacting with
+ * Hyper-V. This file is specifically for code that must be
+ * built-in to the kernel image when CONFIG_HYPERV is set
+ * (vs. being in a module) because it is called from architecture
+ * specific code under arch/.
+ *
+ * Copyright (C) 2021, Microsoft, Inc.
+ *
+ * Author : Michael Kelley <mikelley@microsoft.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/bitfield.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+
+
+/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
+bool hv_query_ext_cap(u64 cap_query)
+{
+	/*
+	 * The address of the 'hv_extended_cap' variable will be used as an
+	 * output parameter to the hypercall below and so it should be
+	 * compatible with 'virt_to_phys'. Which means, it's address should be
+	 * directly mapped. Use 'static' to keep it compatible; stack variables
+	 * can be virtually mapped, making them imcompatible with
+	 * 'virt_to_phys'.
+	 * Hypercall input/output addresses should also be 8-byte aligned.
+	 */
+	static u64 hv_extended_cap __aligned(8);
+	static bool hv_extended_cap_queried;
+	u64 status;
+
+	/*
+	 * Querying extended capabilities is an extended hypercall. Check if the
+	 * partition supports extended hypercall, first.
+	 */
+	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
+		return false;
+
+	/* Extended capabilities do not change at runtime. */
+	if (hv_extended_cap_queried)
+		return hv_extended_cap & cap_query;
+
+	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
+				 &hv_extended_cap);
+
+	/*
+	 * The query extended capabilities hypercall should not fail under
+	 * any normal circumstances. Avoid repeatedly making the hypercall, on
+	 * error.
+	 */
+	hv_extended_cap_queried = true;
+	if (!hv_result_success(status)) {
+		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
+		       status);
+		return false;
+	}
+
+	return hv_extended_cap & cap_query;
+}
+EXPORT_SYMBOL_GPL(hv_query_ext_cap);

From e22808071d4d23596e6cc8f62588225515789031 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 1 Jun 2021 13:31:55 +0200
Subject: [PATCH 1020/3292] dt-bindings: irqchip: renesas-irqc: Add R-Car M3-W+
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document support for the Interrupt Controller for External Devices
(INT-EC) in the Renesas R-Car M3-W+ (r8a77961) SoC.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/55d2c30cb14b2e10193a7fd4aa7670c70f360037.1622546880.git.geert+renesas@glider.be
---
 .../devicetree/bindings/interrupt-controller/renesas,irqc.yaml   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
index b67b8cbd33fc..abb22db3bb28 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
@@ -29,6 +29,7 @@ properties:
           - renesas,intc-ex-r8a774c0    # RZ/G2E
           - renesas,intc-ex-r8a7795     # R-Car H3
           - renesas,intc-ex-r8a7796     # R-Car M3-W
+          - renesas,intc-ex-r8a77961    # R-Car M3-W+
           - renesas,intc-ex-r8a77965    # R-Car M3-N
           - renesas,intc-ex-r8a77970    # R-Car V3M
           - renesas,intc-ex-r8a77980    # R-Car V3H

From 4acd8a4be614a6c191273f2247aff7374a92f318 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Tue, 18 May 2021 21:19:21 +0000
Subject: [PATCH 1021/3292] irqchip/qcom-pdc: Switch to IRQCHIP_PLATFORM_DRIVER
 and allow as a module

This patch revives changes from Saravana Kannan to switch the
qcom-pdc driver to use IRQCHIP_PLATFORM_DRIVER helper macros,
and allows qcom-pdc driver to be loaded as a permanent module.

Earlier attempts at this ran into trouble with loading
dependencies, but with Saravana's fw_devlink=on set by default
now we should avoid those.

[jstultz: Folded in with my changes to allow the driver to be
 loadable as a permenent module]

Cc: Andy Gross <agross@kernel.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Maulik Shah <mkshah@codeaurora.org>
Cc: Lina Iyer <ilina@codeaurora.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Todd Kjos <tkjos@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-arm-msm@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: linux-gpio@vger.kernel.org
Signed-off-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210518211922.3474368-1-john.stultz@linaro.org
---
 drivers/irqchip/Kconfig    | 2 +-
 drivers/irqchip/qcom-pdc.c | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 62543a4eccc0..4d5924e9f766 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -415,7 +415,7 @@ config GOLDFISH_PIC
          for Goldfish based virtual platforms.
 
 config QCOM_PDC
-	bool "QCOM PDC"
+	tristate "QCOM PDC"
 	depends on ARCH_QCOM
 	select IRQ_DOMAIN_HIERARCHY
 	help
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 5dc63c20b67e..32d59202d408 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -11,9 +11,11 @@
 #include <linux/irqdomain.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/of_irq.h>
 #include <linux/soc/qcom/irq.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
@@ -459,4 +461,8 @@ fail:
 	return ret;
 }
 
-IRQCHIP_DECLARE(qcom_pdc, "qcom,pdc", qcom_pdc_init);
+IRQCHIP_PLATFORM_DRIVER_BEGIN(qcom_pdc)
+IRQCHIP_MATCH("qcom,pdc", qcom_pdc_init)
+IRQCHIP_PLATFORM_DRIVER_END(qcom_pdc)
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Power Domain Controller");
+MODULE_LICENSE("GPL v2");

From c96d6abbec52d6723bef6b50846f40f7fb27e93c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 19 May 2021 13:04:55 +0800
Subject: [PATCH 1022/3292] irqchip/mbigen: Fix compile warning when
 CONFIG_ACPI is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compile warning:

  drivers/irqchip/irq-mbigen.c:372:36: warning: ‘mbigen_acpi_match’ defined but not used [-Wunused-const-variable=]
   static const struct acpi_device_id mbigen_acpi_match[] = {

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210519050455.1693953-1-yangyingliang@huawei.com
---
 drivers/irqchip/irq-mbigen.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 2cb45c6b8501..f565317a3da3 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -273,6 +273,12 @@ static int mbigen_of_create_domain(struct platform_device *pdev,
 }
 
 #ifdef CONFIG_ACPI
+static const struct acpi_device_id mbigen_acpi_match[] = {
+	{ "HISI0152", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mbigen_acpi_match);
+
 static int mbigen_acpi_create_domain(struct platform_device *pdev,
 				     struct mbigen_device *mgn_chip)
 {
@@ -369,12 +375,6 @@ static const struct of_device_id mbigen_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, mbigen_of_match);
 
-static const struct acpi_device_id mbigen_acpi_match[] = {
-	{ "HISI0152", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(acpi, mbigen_acpi_match);
-
 static struct platform_driver mbigen_platform_driver = {
 	.driver = {
 		.name		= "Hisilicon MBIGEN-V2",

From 64c2c2c62f92339b176ea24403d8db16db36f9e6 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 May 2021 16:07:48 +0200
Subject: [PATCH 1023/3292] quota: Change quotactl_path() systcall to an
 fd-based one

Some users have pointed out that path-based syscalls are problematic in
some environments and at least directory fd argument and possibly also
resolve flags are desirable for such syscalls. Rather than
reimplementing all details of pathname lookup and following where it may
eventually evolve, let's go for full file descriptor based syscall
similar to how ioctl(2) works since the beginning. Managing of quotas
isn't performance sensitive so the extra overhead of open does not
matter and we are able to consume O_PATH descriptors as well which makes
open cheap anyway. Also for frequent operations (such as retrieving
usage information for all users) we can reuse single fd and in fact get
even better performance as well as avoiding races with possible remounts
etc.

Tested-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota.c                  | 28 +++++++++++++---------------
 include/linux/syscalls.h          |  4 ++--
 include/uapi/asm-generic/unistd.h |  4 ++--
 kernel/sys_ni.c                   |  2 +-
 4 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 05e4bd9ab6d6..2bcc9a6f1bfc 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -968,31 +968,30 @@ out:
 	return ret;
 }
 
-SYSCALL_DEFINE4(quotactl_path, unsigned int, cmd, const char __user *,
-		mountpoint, qid_t, id, void __user *, addr)
+SYSCALL_DEFINE4(quotactl_fd, unsigned int, fd, unsigned int, cmd,
+		qid_t, id, void __user *, addr)
 {
 	struct super_block *sb;
-	struct path mountpath;
 	unsigned int cmds = cmd >> SUBCMDSHIFT;
 	unsigned int type = cmd & SUBCMDMASK;
+	struct fd f;
 	int ret;
 
+	f = fdget_raw(fd);
+	if (!f.file)
+		return -EBADF;
+
+	ret = -EINVAL;
 	if (type >= MAXQUOTAS)
-		return -EINVAL;
-
-	ret = user_path_at(AT_FDCWD, mountpoint,
-			     LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT, &mountpath);
-	if (ret)
-		return ret;
-
-	sb = mountpath.mnt->mnt_sb;
+		goto out;
 
 	if (quotactl_cmd_write(cmds)) {
-		ret = mnt_want_write(mountpath.mnt);
+		ret = mnt_want_write(f.file->f_path.mnt);
 		if (ret)
 			goto out;
 	}
 
+	sb = f.file->f_path.mnt->mnt_sb;
 	if (quotactl_cmd_onoff(cmds))
 		down_write(&sb->s_umount);
 	else
@@ -1006,9 +1005,8 @@ SYSCALL_DEFINE4(quotactl_path, unsigned int, cmd, const char __user *,
 		up_read(&sb->s_umount);
 
 	if (quotactl_cmd_write(cmds))
-		mnt_drop_write(mountpath.mnt);
+		mnt_drop_write(f.file->f_path.mnt);
 out:
-	path_put(&mountpath);
-
+	fdput(f);
 	return ret;
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 050511e8f1f8..586128d5c3b8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -485,8 +485,8 @@ asmlinkage long sys_pipe2(int __user *fildes, int flags);
 /* fs/quota.c */
 asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special,
 				qid_t id, void __user *addr);
-asmlinkage long sys_quotactl_path(unsigned int cmd, const char __user *mountpoint,
-				  qid_t id, void __user *addr);
+asmlinkage long sys_quotactl_fd(unsigned int fd, unsigned int cmd, qid_t id,
+				void __user *addr);
 
 /* fs/readdir.c */
 asmlinkage long sys_getdents64(unsigned int fd,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 6de5a7fc066b..f211961ce1da 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -863,8 +863,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+#define __NR_quotactl_fd 443
+__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
 
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0ea8128468c3..dad4d994641e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -99,7 +99,7 @@ COND_SYSCALL(flock);
 
 /* fs/quota.c */
 COND_SYSCALL(quotactl);
-COND_SYSCALL(quotactl_path);
+COND_SYSCALL(quotactl_fd);
 
 /* fs/readdir.c */
 

From 65ffb3d69ed3da28af85b1e4b2aaacd6c13ba28b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 31 May 2021 18:42:58 +0200
Subject: [PATCH 1024/3292] quota: Wire up quotactl_fd syscall

Wire up the quotactl_fd syscall.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 2 +-
 arch/arm/tools/syscall.tbl                  | 2 +-
 arch/arm64/include/asm/unistd32.h           | 3 ++-
 arch/ia64/kernel/syscalls/syscall.tbl       | 2 +-
 arch/m68k/kernel/syscalls/syscall.tbl       | 2 +-
 arch/microblaze/kernel/syscalls/syscall.tbl | 2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 2 +-
 arch/parisc/kernel/syscalls/syscall.tbl     | 2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl    | 2 +-
 arch/s390/kernel/syscalls/syscall.tbl       | 2 +-
 arch/sh/kernel/syscalls/syscall.tbl         | 2 +-
 arch/sparc/kernel/syscalls/syscall.tbl      | 2 +-
 arch/x86/entry/syscalls/syscall_32.tbl      | 2 +-
 arch/x86/entry/syscalls/syscall_64.tbl      | 2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl     | 2 +-
 17 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 3000a2e8ee21..a17687ed4b51 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -482,7 +482,7 @@
 550	common	process_madvise			sys_process_madvise
 551	common	epoll_pwait2			sys_epoll_pwait2
 552	common	mount_setattr			sys_mount_setattr
-# 553 reserved for quotactl_path
+553	common	quotactl_fd			sys_quotactl_fd
 554	common	landlock_create_ruleset		sys_landlock_create_ruleset
 555	common	landlock_add_rule		sys_landlock_add_rule
 556	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 28e03b5fec00..c5df1179fc5d 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -456,7 +456,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5dab69d2c22b..99ffcafc736c 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -893,7 +893,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-/* 443 is reserved for quotactl_path */
+#define __NR_quotactl_fd 443
+__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
 #define __NR_landlock_add_rule 445
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index bb11fe4c875a..6d07742c57b8 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -363,7 +363,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 79c2d24c89dd..541bc1b3a8f9 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -442,7 +442,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index b11395a20c20..a176faca2927 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -448,7 +448,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 9220909526f9..c2d2e19abea8 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -381,7 +381,7 @@
 440	n32	process_madvise			sys_process_madvise
 441	n32	epoll_pwait2			compat_sys_epoll_pwait2
 442	n32	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	n32	quotactl_fd			sys_quotactl_fd
 444	n32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n32	landlock_add_rule		sys_landlock_add_rule
 446	n32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 9cd1c34f31b5..ac653d08b1ea 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
 440	n64	process_madvise			sys_process_madvise
 441	n64	epoll_pwait2			sys_epoll_pwait2
 442	n64	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	n64	quotactl_fd			sys_quotactl_fd
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index d560c467a8c6..253f2cd70b6b 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -430,7 +430,7 @@
 440	o32	process_madvise			sys_process_madvise
 441	o32	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	o32	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	o32	quotactl_fd			sys_quotactl_fd
 444	o32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	o32	landlock_add_rule		sys_landlock_add_rule
 446	o32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index aabc37f8cae3..e26187b9ab87 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -440,7 +440,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 8f052ff4058c..aef2a290e71a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 0690263df1dd..64d51ab5a8b4 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
-# 443 reserved for quotactl_path
+443  common	quotactl_fd		sys_quotactl_fd			sys_quotactl_fd
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 0b91499ebdcf..e0a70be77d84 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index e34cc30ef22c..603f5a821502 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -488,7 +488,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4bbc267fb36b..fba2f615119a 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -447,7 +447,7 @@
 440	i386	process_madvise		sys_process_madvise
 441	i386	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	i386	mount_setattr		sys_mount_setattr
-# 443 reserved for quotactl_path
+443	i386	quotactl_fd		sys_quotactl_fd
 444	i386	landlock_create_ruleset	sys_landlock_create_ruleset
 445	i386	landlock_add_rule	sys_landlock_add_rule
 446	i386	landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index ce18119ea0d0..af973e400053 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
 442	common	mount_setattr		sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd		sys_quotactl_fd
 444	common	landlock_create_ruleset	sys_landlock_create_ruleset
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index fd2f30227d96..235d67d6ceb4 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -413,7 +413,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-# 443 reserved for quotactl_path
+443	common	quotactl_fd			sys_quotactl_fd
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self

From ef4b65e53cc77e2b3ca4667b461047ad04fb45fa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 31 May 2021 00:08:09 +0200
Subject: [PATCH 1025/3292] netfilter: nfnetlink: add struct nfgenmsg to struct
 nfnl_info and use it

Update the nfnl_info structure to add a pointer to the nfnetlink header.
This simplifies the existing codebase since this header is usually
accessed. Update existing clients to use this new field.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  |  1 +
 net/netfilter/nf_conntrack_netlink.c | 23 +++++-------
 net/netfilter/nf_tables_api.c        | 55 ++++++++++------------------
 net/netfilter/nfnetlink.c            |  2 +
 net/netfilter/nfnetlink_log.c        |  5 +--
 net/netfilter/nfnetlink_queue.c      |  9 ++---
 net/netfilter/nft_compat.c           | 17 +++------
 7 files changed, 42 insertions(+), 70 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 515ce53aa20d..241e005f290a 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -11,6 +11,7 @@ struct nfnl_info {
 	struct net		*net;
 	struct sock		*sk;
 	const struct nlmsghdr	*nlh;
+	const struct nfgenmsg	*nfmsg;
 	struct netlink_ext_ack	*extack;
 };
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 220f51f055ab..4e1a9dba7077 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1528,7 +1528,7 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
 				   const struct nfnl_info *info,
 				   const struct nlattr * const cda[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u8 family = info->nfmsg->nfgen_family;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_zone zone;
@@ -1541,12 +1541,12 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
 
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
-					    nfmsg->nfgen_family, &zone);
+					    family, &zone);
 	else if (cda[CTA_TUPLE_REPLY])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
-					    nfmsg->nfgen_family, &zone);
+					    family, &zone);
 	else {
-		u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
+		u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC;
 
 		return ctnetlink_flush_conntrack(info->net, cda,
 						 NETLINK_CB(skb).portid,
@@ -1586,8 +1586,7 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
 				   const struct nfnl_info *info,
 				   const struct nlattr * const cda[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
+	u_int8_t u3 = info->nfmsg->nfgen_family;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_zone zone;
@@ -2363,10 +2362,9 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
 				   const struct nfnl_info *info,
 				   const struct nlattr * const cda[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
-	u_int8_t u3 = nfmsg->nfgen_family;
+	u_int8_t u3 = info->nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
 	struct nf_conn *ct;
 	int err;
@@ -3259,8 +3257,7 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
 				const struct nfnl_info *info,
 				const struct nlattr * const cda[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
+	u_int8_t u3 = info->nfmsg->nfgen_family;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_zone zone;
@@ -3349,8 +3346,7 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
 				const struct nfnl_info *info,
 				const struct nlattr * const cda[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
+	u_int8_t u3 = info->nfmsg->nfgen_family;
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_zone zone;
@@ -3601,8 +3597,7 @@ static int ctnetlink_new_expect(struct sk_buff *skb,
 				const struct nfnl_info *info,
 				const struct nlattr * const cda[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
+	u_int8_t u3 = info->nfmsg->nfgen_family;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_zone zone;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d63d2d8f769c..b2b4e03ce036 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -861,10 +861,9 @@ static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
 static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	const struct nft_table *table;
 	struct net *net = info->net;
 	struct sk_buff *skb2;
@@ -1059,10 +1058,9 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nla[])
 {
 	struct nftables_pernet *nft_net = nft_pernet(info->net);
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
@@ -1254,10 +1252,9 @@ out:
 static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
@@ -1627,10 +1624,9 @@ done:
 static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	const struct nft_chain *chain;
 	struct net *net = info->net;
 	struct nft_table *table;
@@ -2355,10 +2351,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nla[])
 {
 	struct nftables_pernet *nft_net = nft_pernet(info->net);
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct nft_chain *chain = NULL;
 	struct net *net = info->net;
 	const struct nlattr *attr;
@@ -2453,10 +2448,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
 static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
@@ -3080,10 +3074,9 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
 			     const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	const struct nft_chain *chain;
 	const struct nft_rule *rule;
 	struct net *net = info->net;
@@ -3221,13 +3214,12 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 			     const struct nlattr * const nla[])
 {
 	struct nftables_pernet *nft_net = nft_pernet(info->net);
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	unsigned int size, i, n, ulen = 0, usize = 0;
 	u8 genmask = nft_genmask_next(info->net);
 	struct nft_rule *rule, *old_rule = NULL;
 	struct nft_expr_info *expr_info = NULL;
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	struct nft_flow_rule *flow;
 	struct nft_userdata *udata;
@@ -3459,15 +3451,15 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
 static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
 			     const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
-	int family = nfmsg->nfgen_family, err = 0;
 	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
 	struct nft_chain *chain = NULL;
 	struct net *net = info->net;
 	struct nft_table *table;
 	struct nft_rule *rule;
 	struct nft_ctx ctx;
+	int err = 0;
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
 				 NETLINK_CB(skb).portid);
@@ -4050,7 +4042,6 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
 static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
 	struct net *net = info->net;
@@ -4078,7 +4069,7 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
 	}
 
 	/* Only accept unspec with dump */
-	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+	if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
 		return -EAFNOSUPPORT;
 	if (!nla[NFTA_SET_TABLE])
 		return -EINVAL;
@@ -4171,11 +4162,10 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
 static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	u32 ktype, dtype, flags, policy, gc_int, objtype;
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	const struct nft_set_ops *ops;
 	struct nft_expr *expr = NULL;
 	struct net *net = info->net;
@@ -4475,7 +4465,6 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
 	struct net *net = info->net;
@@ -4484,7 +4473,7 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
 	struct nft_ctx ctx;
 	int err;
 
-	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+	if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
 		return -EAFNOSUPPORT;
 	if (nla[NFTA_SET_TABLE] == NULL)
 		return -EINVAL;
@@ -6527,11 +6516,10 @@ err_free_trans:
 static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
 	const struct nft_object_type *type;
-	int family = nfmsg->nfgen_family;
 	struct net *net = info->net;
 	struct nft_table *table;
 	struct nft_object *obj;
@@ -6783,10 +6771,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	const struct nft_table *table;
 	struct net *net = info->net;
 	struct nft_object *obj;
@@ -6873,10 +6860,9 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
 static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
@@ -7304,12 +7290,11 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
 				  const struct nfnl_info *info,
 				  const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	struct nft_flowtable_hook flowtable_hook;
 	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
 	const struct nf_flowtable_type *type;
-	int family = nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
 	struct nft_hook *hook, *next;
 	struct net *net = info->net;
@@ -7493,10 +7478,9 @@ static int nf_tables_delflowtable(struct sk_buff *skb,
 				  const struct nfnl_info *info,
 				  const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
 	struct net *net = info->net;
 	const struct nlattr *attr;
@@ -7688,9 +7672,8 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
 				  const struct nfnl_info *info,
 				  const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	u8 genmask = nft_genmask_cur(info->net);
-	int family = nfmsg->nfgen_family;
+	u8 family = info->nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
 	const struct nft_table *table;
 	struct net *net = info->net;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e8dbd8379027..028a1f39318b 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -256,6 +256,7 @@ replay:
 			.net	= net,
 			.sk	= nfnlnet->nfnl,
 			.nlh	= nlh,
+			.nfmsg	= nlmsg_data(nlh),
 			.extack	= extack,
 		};
 
@@ -491,6 +492,7 @@ replay_abort:
 				.net	= net,
 				.sk	= nfnlnet->nfnl,
 				.nlh	= nlh,
+				.nfmsg	= nlmsg_data(nlh),
 				.extack	= &extack,
 			};
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 587086b18c36..691ef4cffdd9 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -871,15 +871,14 @@ static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nfula[])
 {
 	struct nfnl_log_net *log = nfnl_log_pernet(info->net);
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u_int16_t group_num = ntohs(nfmsg->res_id);
+	u_int16_t group_num = ntohs(info->nfmsg->res_id);
 	struct nfulnl_msg_config_cmd *cmd = NULL;
 	struct nfulnl_instance *inst;
 	u16 flags = 0;
 	int ret = 0;
 
 	if (nfula[NFULA_CFG_CMD]) {
-		u_int8_t pf = nfmsg->nfgen_family;
+		u_int8_t pf = info->nfmsg->nfgen_family;
 		cmd = nla_data(nfula[NFULA_CFG_CMD]);
 
 		/* Commands without queue context */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f37a575ebd7f..f774de0fc24f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1051,8 +1051,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
 				    const struct nlattr * const nfqa[])
 {
 	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u16 queue_num = ntohs(nfmsg->res_id);
+	u16 queue_num = ntohs(info->nfmsg->res_id);
 	struct nf_queue_entry *entry, *tmp;
 	struct nfqnl_msg_verdict_hdr *vhdr;
 	struct nfqnl_instance *queue;
@@ -1160,8 +1159,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
 			      const struct nlattr * const nfqa[])
 {
 	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u_int16_t queue_num = ntohs(nfmsg->res_id);
+	u_int16_t queue_num = ntohs(info->nfmsg->res_id);
 	struct nfqnl_msg_verdict_hdr *vhdr;
 	enum ip_conntrack_info ctinfo;
 	struct nfqnl_instance *queue;
@@ -1243,8 +1241,7 @@ static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
 			     const struct nlattr * const nfqa[])
 {
 	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
-	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
-	u_int16_t queue_num = ntohs(nfmsg->res_id);
+	u_int16_t queue_num = ntohs(info->nfmsg->res_id);
 	struct nfqnl_msg_config_cmd *cmd = NULL;
 	struct nfqnl_instance *queue;
 	__u32 flags = 0, mask = 0;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 3144a9ad2f6a..639c337c885b 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -625,7 +625,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
 			       const struct nfnl_info *info,
 			       const struct nlattr * const tb[])
 {
-	struct nfgenmsg *nfmsg;
+	u8 family = info->nfmsg->nfgen_family;
 	const char *name, *fmt;
 	struct sk_buff *skb2;
 	int ret = 0, target;
@@ -640,9 +640,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
 	rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
 	target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
 
-	nfmsg = nlmsg_data(info->nlh);
-
-	switch(nfmsg->nfgen_family) {
+	switch(family) {
 	case AF_INET:
 		fmt = "ipt_%s";
 		break;
@@ -656,8 +654,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
 		fmt = "arpt_%s";
 		break;
 	default:
-		pr_err("nft_compat: unsupported protocol %d\n",
-			nfmsg->nfgen_family);
+		pr_err("nft_compat: unsupported protocol %d\n", family);
 		return -EINVAL;
 	}
 
@@ -665,9 +662,8 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
 		return -EINVAL;
 
 	rcu_read_unlock();
-	try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
-						 rev, target, &ret),
-						 fmt, name);
+	try_then_request_module(xt_find_revision(family, name, rev, target, &ret),
+				fmt, name);
 	if (ret < 0)
 		goto out_put;
 
@@ -682,8 +678,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
 				  info->nlh->nlmsg_seq,
 				  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
 				  NFNL_MSG_COMPAT_GET,
-				  nfmsg->nfgen_family,
-				  name, ret, target) <= 0) {
+				  family, name, ret, target) <= 0) {
 		kfree_skb(skb2);
 		goto out_put;
 	}

From e2b750d78b55d783f6ff4a1ab1e96f01e3e2ccfb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 31 May 2021 00:08:10 +0200
Subject: [PATCH 1026/3292] netfilter: nf_tables: remove
 nft_ctx_init_from_elemattr()

Replace nft_ctx_init_from_elemattr() by nft_table_lookup() and set up
the context structure right before it is really needed.

Moreover, nft_ctx_init_from_elemattr() is setting up the context
structure for codepaths where this is not really needed at all.

This helper function is also not helping to consolidate code, removing
it saves us 4 LoC.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 72 +++++++++++++++++------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b2b4e03ce036..2fbcb2543795 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4703,28 +4703,6 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
 	[NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U32 },
 };
 
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
-				      const struct sk_buff *skb,
-				      const struct nlmsghdr *nlh,
-				      const struct nlattr * const nla[],
-				      struct netlink_ext_ack *extack,
-				      u8 genmask, u32 nlpid)
-{
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	int family = nfmsg->nfgen_family;
-	struct nft_table *table;
-
-	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
-				 genmask, nlpid);
-	if (IS_ERR(table)) {
-		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
-		return PTR_ERR(table);
-	}
-
-	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
-	return 0;
-}
-
 static int nft_set_elem_expr_dump(struct sk_buff *skb,
 				  const struct nft_set *set,
 				  const struct nft_set_ext *ext)
@@ -5182,21 +5160,27 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
 {
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nlattr *attr;
 	struct nft_ctx ctx;
 	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
-					 genmask, NETLINK_CB(skb).portid);
-	if (err < 0)
-		return err;
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+		return PTR_ERR(table);
+	}
 
-	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+	set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
 	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start = nf_tables_dump_set_start,
@@ -5965,8 +5949,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
 	struct nftables_pernet *nft_net = nft_pernet(info->net);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	const struct nlattr *attr;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	int rem, err;
@@ -5974,12 +5960,14 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
 	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
-					 genmask, NETLINK_CB(skb).portid);
-	if (err < 0)
-		return err;
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+		return PTR_ERR(table);
+	}
 
-	set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+	set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
 				    nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
@@ -5987,6 +5975,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
 		return -EBUSY;
 
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
 		err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
 		if (err < 0)
@@ -5994,7 +5984,7 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
 	}
 
 	if (nft_net->validate_state == NFT_VALIDATE_DO)
-		return nft_table_validate(net, ctx.table);
+		return nft_table_validate(net, table);
 
 	return 0;
 }
@@ -6232,23 +6222,29 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
 {
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	const struct nlattr *attr;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
-					 genmask, NETLINK_CB(skb).portid);
-	if (err < 0)
-		return err;
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+		return PTR_ERR(table);
+	}
 
-	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+	set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
 		return -EBUSY;
 
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
 	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
 		return nft_set_flush(&ctx, set, genmask);
 

From 670866512f971d6fdb9ef2dad19db97d400d1161 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 31 May 2021 00:08:11 +0200
Subject: [PATCH 1027/3292] netfilter: nf_tables: remove
 nft_ctx_init_from_setattr()

Replace nft_ctx_init_from_setattr() by nft_table_lookup().

This patch also disentangles nf_tables_delset() where NFTA_SET_TABLE is
required while nft_ctx_init_from_setattr() allows it to be optional.

From the nf_tables_delset() path, this also allows to set up the context
structure when it is needed.

Removing this helper function saves us 14 LoC, so it is not helping to
consolidate code.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 64 ++++++++++++++---------------------
 1 file changed, 25 insertions(+), 39 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2fbcb2543795..6c2000a11c7e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3639,30 +3639,6 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
 	[NFTA_SET_DESC_CONCAT]		= { .type = NLA_NESTED },
 };
 
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
-				     const struct sk_buff *skb,
-				     const struct nlmsghdr *nlh,
-				     const struct nlattr * const nla[],
-				     struct netlink_ext_ack *extack,
-				     u8 genmask, u32 nlpid)
-{
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	int family = nfmsg->nfgen_family;
-	struct nft_table *table = NULL;
-
-	if (nla[NFTA_SET_TABLE] != NULL) {
-		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
-					 genmask, nlpid);
-		if (IS_ERR(table)) {
-			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
-			return PTR_ERR(table);
-		}
-	}
-
-	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
-	return 0;
-}
-
 static struct nft_set *nft_set_lookup(const struct nft_table *table,
 				      const struct nlattr *nla, u8 genmask)
 {
@@ -4044,17 +4020,24 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
 {
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	struct nft_table *table = NULL;
 	struct net *net = info->net;
 	const struct nft_set *set;
 	struct sk_buff *skb2;
 	struct nft_ctx ctx;
 	int err;
 
-	/* Verify existence before starting dump */
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
-					genmask, 0);
-	if (err < 0)
-		return err;
+	if (nla[NFTA_SET_TABLE]) {
+		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+					 genmask, 0);
+		if (IS_ERR(table)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
+			return PTR_ERR(table);
+		}
+	}
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
 	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -4074,7 +4057,7 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
 	if (!nla[NFTA_SET_TABLE])
 		return -EINVAL;
 
-	set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
@@ -4467,28 +4450,29 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
 {
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
+	u8 family = info->nfmsg->nfgen_family;
 	struct net *net = info->net;
 	const struct nlattr *attr;
+	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	int err;
 
 	if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
 		return -EAFNOSUPPORT;
-	if (nla[NFTA_SET_TABLE] == NULL)
-		return -EINVAL;
 
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
-					genmask, NETLINK_CB(skb).portid);
-	if (err < 0)
-		return err;
+	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+				 genmask, NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
+		return PTR_ERR(table);
+	}
 
 	if (nla[NFTA_SET_HANDLE]) {
 		attr = nla[NFTA_SET_HANDLE];
-		set = nft_set_lookup_byhandle(ctx.table, attr, genmask);
+		set = nft_set_lookup_byhandle(table, attr, genmask);
 	} else {
 		attr = nla[NFTA_SET_NAME];
-		set = nft_set_lookup(ctx.table, attr, genmask);
+		set = nft_set_lookup(table, attr, genmask);
 	}
 
 	if (IS_ERR(set)) {
@@ -4502,6 +4486,8 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
 		return -EBUSY;
 	}
 
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
 	return nft_delset(&ctx, set);
 }
 

From 0418b989a467885292c294923dec66951c1c1398 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 2 Jun 2021 12:39:07 +0200
Subject: [PATCH 1028/3292] netfilter: nftables: add nf_ct_pernet() helper
 function

Consolidate call to net_generic(net, nf_conntrack_net_id) in this
wrapper function.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h    |  7 +++++++
 net/netfilter/nf_conntrack_core.c       | 22 +++++++++-------------
 net/netfilter/nf_conntrack_ecache.c     |  8 +++-----
 net/netfilter/nf_conntrack_expect.c     | 12 +++++-------
 net/netfilter/nf_conntrack_helper.c     |  6 ++----
 net/netfilter/nf_conntrack_proto.c      |  6 ++----
 net/netfilter/nf_conntrack_standalone.c |  8 +++-----
 7 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 06dc6db70d18..cc663c68ddc4 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -346,6 +346,13 @@ nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
 	skb_set_nfct(skb, (unsigned long)ct | info);
 }
 
+extern unsigned int nf_conntrack_net_id;
+
+static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net)
+{
+	return net_generic(net, nf_conntrack_net_id);
+}
+
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e0befcf8113a..96ba19fc8155 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -55,8 +55,6 @@
 
 #include "nf_internals.h"
 
-extern unsigned int nf_conntrack_net_id;
-
 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 
@@ -87,8 +85,6 @@ static __read_mostly bool nf_conntrack_locks_all;
 
 static struct conntrack_gc_work conntrack_gc_work;
 
-extern unsigned int nf_conntrack_net_id;
-
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
 {
 	/* 1) Acquire the lock */
@@ -1404,7 +1400,7 @@ static void gc_worker(struct work_struct *work)
 				continue;
 
 			net = nf_ct_net(tmp);
-			cnet = net_generic(net, nf_conntrack_net_id);
+			cnet = nf_ct_pernet(net);
 			if (atomic_read(&cnet->count) < nf_conntrack_max95)
 				continue;
 
@@ -1484,7 +1480,7 @@ __nf_conntrack_alloc(struct net *net,
 		     const struct nf_conntrack_tuple *repl,
 		     gfp_t gfp, u32 hash)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	unsigned int ct_count;
 	struct nf_conn *ct;
 
@@ -1556,7 +1552,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 
 	nf_ct_ext_destroy(ct);
 	kmem_cache_free(nf_conntrack_cachep, ct);
-	cnet = net_generic(net, nf_conntrack_net_id);
+	cnet = nf_ct_pernet(net);
 
 	smp_mb__before_atomic();
 	atomic_dec(&cnet->count);
@@ -1614,7 +1610,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 			     GFP_ATOMIC);
 
 	local_bh_disable();
-	cnet = net_generic(net, nf_conntrack_net_id);
+	cnet = nf_ct_pernet(net);
 	if (cnet->expect_count) {
 		spin_lock(&nf_conntrack_expect_lock);
 		exp = nf_ct_find_expectation(net, zone, tuple);
@@ -2317,7 +2313,7 @@ __nf_ct_unconfirmed_destroy(struct net *net)
 
 void nf_ct_unconfirmed_destroy(struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	might_sleep();
 
@@ -2333,7 +2329,7 @@ void nf_ct_iterate_cleanup_net(struct net *net,
 			       int (*iter)(struct nf_conn *i, void *data),
 			       void *data, u32 portid, int report)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	struct iter_data d;
 
 	might_sleep();
@@ -2367,7 +2363,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 
 	down_read(&net_rwsem);
 	for_each_net(net) {
-		struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+		struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 		if (atomic_read(&cnet->count) == 0)
 			continue;
@@ -2449,7 +2445,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 i_see_dead_people:
 	busy = 0;
 	list_for_each_entry(net, net_exit_list, exit_list) {
-		struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+		struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 		nf_ct_iterate_cleanup(kill_all, net, 0, 0);
 		if (atomic_read(&cnet->count) != 0)
@@ -2733,7 +2729,7 @@ void nf_conntrack_init_end(void)
 
 int nf_conntrack_init_net(struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	int ret = -ENOMEM;
 	int cpu;
 
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 759d87aef95f..296e4a171bd1 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -27,8 +27,6 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
-extern unsigned int nf_conntrack_net_id;
-
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
 #define ECACHE_RETRY_WAIT (HZ/10)
@@ -348,7 +346,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 
 void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	if (state == NFCT_ECACHE_DESTROY_FAIL &&
 	    !delayed_work_pending(&cnet->ecache_dwork)) {
@@ -371,7 +369,7 @@ static const struct nf_ct_ext_type event_extend = {
 
 void nf_conntrack_ecache_pernet_init(struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	net->ct.sysctl_events = nf_ct_events;
 	cnet->ct_net = &net->ct;
@@ -380,7 +378,7 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
 
 void nf_conntrack_ecache_pernet_fini(struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	cancel_delayed_work_sync(&cnet->ecache_dwork);
 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index efdd391b3f72..1e851bc2e61a 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -43,8 +43,6 @@ unsigned int nf_ct_expect_max __read_mostly;
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 static unsigned int nf_ct_expect_hashrnd __read_mostly;
 
-extern unsigned int nf_conntrack_net_id;
-
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 				u32 portid, int report)
@@ -58,7 +56,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 
 	hlist_del_rcu(&exp->hnode);
 
-	cnet = net_generic(net, nf_conntrack_net_id);
+	cnet = nf_ct_pernet(net);
 	cnet->expect_count--;
 
 	hlist_del_rcu(&exp->lnode);
@@ -123,7 +121,7 @@ __nf_ct_expect_find(struct net *net,
 		    const struct nf_conntrack_zone *zone,
 		    const struct nf_conntrack_tuple *tuple)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	struct nf_conntrack_expect *i;
 	unsigned int h;
 
@@ -164,7 +162,7 @@ nf_ct_find_expectation(struct net *net,
 		       const struct nf_conntrack_zone *zone,
 		       const struct nf_conntrack_tuple *tuple)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	struct nf_conntrack_expect *i, *exp = NULL;
 	unsigned int h;
 
@@ -397,7 +395,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	master_help->expecting[exp->class]++;
 
 	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
-	cnet = net_generic(net, nf_conntrack_net_id);
+	cnet = nf_ct_pernet(net);
 	cnet->expect_count++;
 
 	NF_CT_STAT_INC(net, expect_create);
@@ -468,7 +466,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
 		}
 	}
 
-	cnet = net_generic(net, nf_conntrack_net_id);
+	cnet = nf_ct_pernet(net);
 	if (cnet->expect_count >= nf_ct_expect_max) {
 		net_warn_ratelimited("nf_conntrack: expectation table full\n");
 		ret = -EMFILE;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ac396cc8bfae..ae4488a13c70 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -43,8 +43,6 @@ MODULE_PARM_DESC(nf_conntrack_helper,
 static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
 static struct list_head nf_ct_nat_helpers __read_mostly;
 
-extern unsigned int nf_conntrack_net_id;
-
 /* Stupid hash, but collision free for the default registrations of the
  * helpers currently in the kernel. */
 static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
@@ -214,7 +212,7 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
 static struct nf_conntrack_helper *
 nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	if (!cnet->sysctl_auto_assign_helper) {
 		if (cnet->auto_assign_helper_warned)
@@ -560,7 +558,7 @@ static const struct nf_ct_ext_type helper_extend = {
 
 void nf_conntrack_helper_pernet_init(struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
 }
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 89e5bac384d7..fbc1fa36d2c2 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -42,8 +42,6 @@
 #include <net/ipv6.h>
 #include <net/inet_frag.h>
 
-extern unsigned int nf_conntrack_net_id;
-
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
 #ifdef CONFIG_SYSCTL
@@ -446,7 +444,7 @@ static struct nf_ct_bridge_info *nf_ct_bridge_info;
 
 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	bool fixup_needed = false, retry = true;
 	int err = 0;
 retry:
@@ -531,7 +529,7 @@ retry:
 
 static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	mutex_lock(&nf_ct_proto_mutex);
 	switch (nfproto) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index aaa55246d0ca..bce93656fad9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -512,9 +512,7 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
 
 u32 nf_conntrack_count(const struct net *net)
 {
-	const struct nf_conntrack_net *cnet;
-
-	cnet = net_generic(net, nf_conntrack_net_id);
+	const struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
 	return atomic_read(&cnet->count);
 }
@@ -1032,7 +1030,7 @@ static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
 
 static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	struct nf_udp_net *un = nf_udp_pernet(net);
 	struct ctl_table *table;
 
@@ -1085,7 +1083,7 @@ out_unregister_netfilter:
 
 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
-	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 	struct ctl_table *table;
 
 	table = cnet->sysctl_header->ctl_table_arg;

From ef8ed5ea091bf21648d0c4c1fa4a962d079eab2b Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Thu, 3 Jun 2021 15:12:33 +0300
Subject: [PATCH 1029/3292] netfilter: conntrack: Introduce tcp offload timeout
 configuration

TCP connections may be offloaded from nf conntrack to nf flow table.
Offloaded connections are aged after 30 seconds of inactivity.
Once aged, ownership is returned to conntrack with a hard coded pickup
time of 120 seconds, after which the connection may be deleted.
eted. The current aging intervals may be too aggressive for some users.

Provide users with the ability to control the nf flow table offload
aging and pickup time intervals via sysctl parameter as a pre-step for
configuring the nf flow table GC timeout intervals.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/conntrack.h           |  4 ++++
 net/netfilter/nf_conntrack_proto_tcp.c  |  5 +++++
 net/netfilter/nf_conntrack_standalone.c | 24 ++++++++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index ad0a95c2335e..3a391e27ec60 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -27,6 +27,10 @@ struct nf_tcp_net {
 	u8 tcp_loose;
 	u8 tcp_be_liberal;
 	u8 tcp_max_retrans;
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	unsigned int offload_timeout;
+	unsigned int offload_pickup;
+#endif
 };
 
 enum udp_conntrack {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 34e22416a721..de840fc41a2e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1441,6 +1441,11 @@ void nf_conntrack_tcp_init_net(struct net *net)
 	 * will be started.
 	 */
 	tn->tcp_max_retrans = 3;
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	tn->offload_timeout = 30 * HZ;
+	tn->offload_pickup = 120 * HZ;
+#endif
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index bce93656fad9..67b0fcd1a787 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -573,6 +573,10 @@ enum nf_ct_sysctl_index {
 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
+#endif
 	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
 	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
 	NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
@@ -760,6 +764,20 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
+		.procname	= "nf_flowtable_tcp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
+		.procname	= "nf_flowtable_tcp_pickup",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
 	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
 		.procname	= "nf_conntrack_tcp_loose",
 		.maxlen		= sizeof(u8),
@@ -969,6 +987,12 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
 	XASSIGN(LIBERAL, &tn->tcp_be_liberal);
 	XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
 #undef XASSIGN
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
+#endif
+
 }
 
 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,

From 975c57504da1114551fdb3a91ed61dda7739613e Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Thu, 3 Jun 2021 15:12:34 +0300
Subject: [PATCH 1030/3292] netfilter: conntrack: Introduce udp offload timeout
 configuration

UDP connections may be offloaded from nf conntrack to nf flow table.
Offloaded connections are aged after 30 seconds of inactivity.
Once aged, ownership is returned to conntrack with a hard coded pickup
time of 30 seconds, after which the connection may be deleted.
eted. The current aging intervals may be too aggressive for some users.

Provide users with the ability to control the nf flow table offload
aging and pickup time intervals via sysctl parameter as a pre-step for
configuring the nf flow table GC timeout intervals.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/conntrack.h           |  4 ++++
 net/netfilter/nf_conntrack_proto_udp.c  |  5 +++++
 net/netfilter/nf_conntrack_standalone.c | 22 ++++++++++++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 3a391e27ec60..c3094b83a525 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -41,6 +41,10 @@ enum udp_conntrack {
 
 struct nf_udp_net {
 	unsigned int timeouts[UDP_CT_MAX];
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	unsigned int offload_timeout;
+	unsigned int offload_pickup;
+#endif
 };
 
 struct nf_icmp_net {
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index af402f458ee0..68911fcaa0f1 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -270,6 +270,11 @@ void nf_conntrack_udp_init_net(struct net *net)
 
 	for (i = 0; i < UDP_CT_MAX; i++)
 		un->timeouts[i] = udp_timeouts[i];
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	un->offload_timeout = 30 * HZ;
+	un->offload_pickup = 30 * HZ;
+#endif
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 67b0fcd1a787..f57a951c9b5e 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -582,6 +582,10 @@ enum nf_ct_sysctl_index {
 	NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
+	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
+#endif
 	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
 #ifdef CONFIG_NF_CT_PROTO_SCTP
@@ -812,6 +816,20 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
+#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
+		.procname	= "nf_flowtable_udp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
+		.procname	= "nf_flowtable_udp_pickup",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+#endif
 	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
 		.procname	= "nf_conntrack_icmp_timeout",
 		.maxlen		= sizeof(unsigned int),
@@ -1081,6 +1099,10 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
+	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
+#endif
 
 	nf_conntrack_standalone_init_tcp_sysctl(net, table);
 	nf_conntrack_standalone_init_sctp_sysctl(net, table);

From 1d91d2e1a7f767aa8c11d8507ecf268f787734ec Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Thu, 3 Jun 2021 15:12:35 +0300
Subject: [PATCH 1031/3292] netfilter: flowtable: Set offload timeouts
 according to proto values

Currently the aging period for tcp/udp connections is hard coded to
30 seconds. Aged tcp/udp connections configure a hard coded 120/30
seconds pickup timeout for conntrack.
This configuration may be too aggressive or permissive for some users.

Dynamically configure the nf flow table GC timeout intervals according
to the user defined values.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  2 ++
 net/netfilter/nf_flow_table_core.c    | 47 +++++++++++++++++++++------
 net/netfilter/nf_flow_table_offload.c |  4 +--
 3 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 48ef7460ff30..a3647fadf1cc 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -177,6 +177,8 @@ struct flow_offload {
 #define NF_FLOW_TIMEOUT (30 * HZ)
 #define nf_flowtable_time_stamp	(u32)jiffies
 
+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
+
 static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
 {
 	return (__s32)(timeout - nf_flowtable_time_stamp);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 1d02650dd715..1e50908b1b7e 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -178,12 +178,10 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 	tcp->seen[1].td_maxwin = 0;
 }
 
-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT	(120 * HZ)
-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT	(30 * HZ)
-
 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 {
 	const struct nf_conntrack_l4proto *l4proto;
+	struct net *net = nf_ct_net(ct);
 	int l4num = nf_ct_protonum(ct);
 	unsigned int timeout;
 
@@ -191,12 +189,17 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 	if (!l4proto)
 		return;
 
-	if (l4num == IPPROTO_TCP)
-		timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
-	else if (l4num == IPPROTO_UDP)
-		timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
-	else
+	if (l4num == IPPROTO_TCP) {
+		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+		timeout = tn->offload_pickup;
+	} else if (l4num == IPPROTO_UDP) {
+		struct nf_udp_net *tn = nf_udp_pernet(net);
+
+		timeout = tn->offload_pickup;
+	} else {
 		return;
+	}
 
 	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
 		ct->timeout = nfct_time_stamp + timeout;
@@ -268,11 +271,35 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
 	.automatic_shrinking	= true,
 };
 
+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
+{
+	const struct nf_conntrack_l4proto *l4proto;
+	unsigned long timeout = NF_FLOW_TIMEOUT;
+	struct net *net = nf_ct_net(flow->ct);
+	int l4num = nf_ct_protonum(flow->ct);
+
+	l4proto = nf_ct_l4proto_find(l4num);
+	if (!l4proto)
+		return timeout;
+
+	if (l4num == IPPROTO_TCP) {
+		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+		timeout = tn->offload_timeout;
+	} else if (l4num == IPPROTO_UDP) {
+		struct nf_udp_net *tn = nf_udp_pernet(net);
+
+		timeout = tn->offload_timeout;
+	}
+
+	return timeout;
+}
+
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 {
 	int err;
 
-	flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
+	flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
 
 	err = rhashtable_insert_fast(&flow_table->rhashtable,
 				     &flow->tuplehash[0].node,
@@ -304,7 +331,7 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
 void flow_offload_refresh(struct nf_flowtable *flow_table,
 			  struct flow_offload *flow)
 {
-	flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
+	flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
 
 	if (likely(!nf_flowtable_hw_offload(flow_table)))
 		return;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 528b2f172684..f92006cec94c 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -937,7 +937,7 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
 
 	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
 	offload->flow->timeout = max_t(u64, offload->flow->timeout,
-				       lastused + NF_FLOW_TIMEOUT);
+				       lastused + flow_offload_get_timeout(offload->flow));
 
 	if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
 		if (stats[0].pkts)
@@ -1041,7 +1041,7 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
 	__s32 delta;
 
 	delta = nf_flow_timeout_delta(flow->timeout);
-	if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
+	if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
 		return;
 
 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);

From 7b4b2fa37587394fb89fa51a4bea0820a1b37a5d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 4 Jun 2021 12:27:06 +0200
Subject: [PATCH 1032/3292] netfilter: annotate nf_tables base hook ops

This will allow a followup patch to treat the 'ops->priv' pointer
as nft_chain argument without having to first walk the table/chains
to check if there is a matching base chain pointer.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h     | 8 +++++++-
 net/netfilter/nf_tables_api.c | 4 +++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index f161569fbe2f..3fda1a508733 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -77,12 +77,18 @@ struct nf_hook_state {
 typedef unsigned int nf_hookfn(void *priv,
 			       struct sk_buff *skb,
 			       const struct nf_hook_state *state);
+enum nf_hook_ops_type {
+	NF_HOOK_OP_UNDEFINED,
+	NF_HOOK_OP_NF_TABLES,
+};
+
 struct nf_hook_ops {
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
 	struct net_device	*dev;
 	void			*priv;
-	u_int8_t		pf;
+	u8			pf;
+	enum nf_hook_ops_type	hook_ops_type:8;
 	unsigned int		hooknum;
 	/* Hooks are ordered in ascending priority. */
 	int			priority;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6c2000a11c7e..c9308241b688 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2168,8 +2168,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	}
 
 	nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
-	if (nft_is_base_chain(chain))
+	if (nft_is_base_chain(chain)) {
+		basechain->ops.hook_ops_type = NF_HOOK_OP_NF_TABLES;
 		nft_trans_chain_policy(trans) = policy;
+	}
 
 	err = nft_chain_add(table, chain);
 	if (err < 0) {

From e2cf17d3774c323ef6dab6e9f7c0cfc5e742afd9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 4 Jun 2021 12:27:07 +0200
Subject: [PATCH 1033/3292] netfilter: add new hook nfnl subsystem

This nfnl subsystem allows to dump the list of all active netfiler hooks,
e.g. defrag, conntrack, nf/ip/arp/ip6tables and so on.

This helps to see what kind of features are currently enabled in
the network stack.

Sample output from nft tool using this infra:

 $ nft list hook ip input
 family ip hook input {
   +0000000010 nft_do_chain_inet [nf_tables] # nft table firewalld INPUT
   +0000000100 nf_nat_ipv4_local_in [nf_nat]
   +2147483647 ipv4_confirm [nf_conntrack]
 }

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink.h      |   3 +-
 include/uapi/linux/netfilter/nfnetlink_hook.h |  55 +++
 net/netfilter/Kconfig                         |   9 +
 net/netfilter/Makefile                        |   1 +
 net/netfilter/nfnetlink.c                     |   1 +
 net/netfilter/nfnetlink_hook.c                | 375 ++++++++++++++++++
 6 files changed, 443 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/netfilter/nfnetlink_hook.h
 create mode 100644 net/netfilter/nfnetlink_hook.c

diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 5bc960f220b3..6cd58cd2a6f0 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -60,7 +60,8 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_CTHELPER		9
 #define NFNL_SUBSYS_NFTABLES		10
 #define NFNL_SUBSYS_NFT_COMPAT		11
-#define NFNL_SUBSYS_COUNT		12
+#define NFNL_SUBSYS_HOOK		12
+#define NFNL_SUBSYS_COUNT		13
 
 /* Reserved control nfnetlink messages */
 #define NFNL_MSG_BATCH_BEGIN		NLMSG_MIN_TYPE
diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h
new file mode 100644
index 000000000000..912ec60b26b0
--- /dev/null
+++ b/include/uapi/linux/netfilter/nfnetlink_hook.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _NFNL_HOOK_H_
+#define _NFNL_HOOK_H_
+
+enum nfnl_hook_msg_types {
+	NFNL_MSG_HOOK_GET,
+	NFNL_MSG_HOOK_MAX,
+};
+
+/**
+ * enum nfnl_hook_attributes - netfilter hook netlink attributes
+ *
+ * @NFNLA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
+ * @NFNLA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ * @NFNLA_HOOK_DEV: netdevice name (NLA_STRING)
+ * @NFNLA_HOOK_FUNCTION_NAME: hook function name (NLA_STRING)
+ * @NFNLA_HOOK_MODULE_NAME: kernel module that registered this hook (NLA_STRING)
+ * @NFNLA_HOOK_CHAIN_INFO: basechain hook metadata (NLA_NESTED)
+ */
+enum nfnl_hook_attributes {
+	NFNLA_HOOK_UNSPEC,
+	NFNLA_HOOK_HOOKNUM,
+	NFNLA_HOOK_PRIORITY,
+	NFNLA_HOOK_DEV,
+	NFNLA_HOOK_FUNCTION_NAME,
+	NFNLA_HOOK_MODULE_NAME,
+	NFNLA_HOOK_CHAIN_INFO,
+	__NFNLA_HOOK_MAX
+};
+#define NFNLA_HOOK_MAX		(__NFNLA_HOOK_MAX - 1)
+
+/**
+ * enum nfnl_hook_chain_info_attributes - chain description
+ *
+ * NFNLA_HOOK_INFO_DESC: nft chain and table name (enum nft_table_attributes) (NLA_NESTED)
+ * NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32)
+ */
+enum nfnl_hook_chain_info_attributes {
+	NFNLA_HOOK_INFO_UNSPEC,
+	NFNLA_HOOK_INFO_DESC,
+	NFNLA_HOOK_INFO_TYPE,
+	__NFNLA_HOOK_INFO_MAX,
+};
+#define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1)
+
+/**
+ * enum nfnl_hook_chaintype - chain type
+ *
+ * @NFNL_HOOK_TYPE_NFTABLES nf_tables base chain
+ */
+enum nfnl_hook_chaintype {
+	NFNL_HOOK_TYPE_NFTABLES = 0x1,
+};
+
+#endif /* _NFNL_HOOK_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 172d74560632..c81321372198 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -19,6 +19,15 @@ config NETFILTER_FAMILY_BRIDGE
 config NETFILTER_FAMILY_ARP
 	bool
 
+config NETFILTER_NETLINK_HOOK
+	tristate "Netfilter base hook dump support"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  to list the base netfilter hooks via NFNETLINK.
+	  This is helpful for debugging.
+
 config NETFILTER_NETLINK_ACCT
 	tristate "Netfilter NFACCT over NFNETLINK interface"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e80e010354b1..87112dad1fd4 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
 obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o
+obj-$(CONFIG_NETFILTER_NETLINK_HOOK) += nfnetlink_hook.o
 
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 028a1f39318b..7e2c8dd01408 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -68,6 +68,7 @@ static const char *const nfnl_lockdep_names[NFNL_SUBSYS_COUNT] = {
 	[NFNL_SUBSYS_CTHELPER] = "nfnl_subsys_cthelper",
 	[NFNL_SUBSYS_NFTABLES] = "nfnl_subsys_nftables",
 	[NFNL_SUBSYS_NFT_COMPAT] = "nfnl_subsys_nftcompat",
+	[NFNL_SUBSYS_HOOK] = "nfnl_subsys_hook",
 };
 
 static const int nfnl_group2type[NFNLGRP_MAX+1] = {
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
new file mode 100644
index 000000000000..04586dfa2acd
--- /dev/null
+++ b/net/netfilter/nfnetlink_hook.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2021 Red Hat GmbH
+ *
+ * Author: Florian Westphal <fw@strlen.de>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_hook.h>
+
+#include <net/netfilter/nf_tables.h>
+#include <net/sock.h>
+
+static const struct nla_policy nfnl_hook_nla_policy[NFNLA_HOOK_MAX + 1] = {
+	[NFNLA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
+	[NFNLA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+	[NFNLA_HOOK_DEV]	= { .type = NLA_STRING,
+				    .len = IFNAMSIZ - 1 },
+	[NFNLA_HOOK_FUNCTION_NAME] = { .type = NLA_NUL_STRING,
+				       .len = KSYM_NAME_LEN, },
+	[NFNLA_HOOK_MODULE_NAME] = { .type = NLA_NUL_STRING,
+				     .len = MODULE_NAME_LEN, },
+	[NFNLA_HOOK_CHAIN_INFO] = { .type = NLA_NESTED, },
+};
+
+static int nf_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
+				     const struct nlmsghdr *nlh,
+				     struct netlink_dump_control *c)
+{
+	int err;
+
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+
+	rcu_read_unlock();
+	err = netlink_dump_start(nlsk, skb, nlh, c);
+	rcu_read_lock();
+	module_put(THIS_MODULE);
+
+	return err;
+}
+
+struct nfnl_dump_hook_data {
+	char devname[IFNAMSIZ];
+	unsigned long headv;
+	u8 hook;
+};
+
+static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
+					const struct nfnl_dump_hook_data *ctx,
+					unsigned int seq,
+					const struct nf_hook_ops *ops)
+{
+	struct net *net = sock_net(nlskb->sk);
+	struct nlattr *nest, *nest2;
+	struct nft_chain *chain;
+	int ret = 0;
+
+	if (ops->hook_ops_type != NF_HOOK_OP_NF_TABLES)
+		return 0;
+
+	chain = ops->priv;
+	if (WARN_ON_ONCE(!chain))
+		return 0;
+
+	if (!nft_is_active(net, chain))
+		return 0;
+
+	nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO);
+	if (!nest)
+		return -EMSGSIZE;
+
+	ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE,
+			   htonl(NFNL_HOOK_TYPE_NFTABLES));
+	if (ret)
+		goto cancel_nest;
+
+	nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
+	if (!nest2)
+		goto cancel_nest;
+
+	ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name);
+	if (ret)
+		goto cancel_nest;
+
+	ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name);
+	if (ret)
+		goto cancel_nest;
+
+	nla_nest_end(nlskb, nest2);
+	nla_nest_end(nlskb, nest);
+	return ret;
+
+cancel_nest:
+	nla_nest_cancel(nlskb, nest);
+	return -EMSGSIZE;
+}
+
+static int nfnl_hook_dump_one(struct sk_buff *nlskb,
+			      const struct nfnl_dump_hook_data *ctx,
+			      const struct nf_hook_ops *ops,
+			      unsigned int seq)
+{
+	u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET);
+	unsigned int portid = NETLINK_CB(nlskb).portid;
+	struct nlmsghdr *nlh;
+	int ret = -EMSGSIZE;
+#ifdef CONFIG_KALLSYMS
+	char sym[KSYM_SYMBOL_LEN];
+	char *module_name;
+#endif
+	nlh = nfnl_msg_put(nlskb, portid, seq, event,
+			   NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0);
+	if (!nlh)
+		goto nla_put_failure;
+
+#ifdef CONFIG_KALLSYMS
+	ret = snprintf(sym, sizeof(sym), "%ps", ops->hook);
+	if (ret < 0 || ret > (int)sizeof(sym))
+		goto nla_put_failure;
+
+	module_name = strstr(sym, " [");
+	if (module_name) {
+		char *end;
+
+		module_name += 2;
+		end = strchr(module_name, ']');
+		if (end) {
+			*end = 0;
+
+			ret = nla_put_string(nlskb, NFNLA_HOOK_MODULE_NAME, module_name);
+			if (ret)
+				goto nla_put_failure;
+		}
+	}
+
+	ret = nla_put_string(nlskb, NFNLA_HOOK_FUNCTION_NAME, sym);
+	if (ret)
+		goto nla_put_failure;
+#endif
+
+	ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum));
+	if (ret)
+		goto nla_put_failure;
+
+	ret = nla_put_be32(nlskb, NFNLA_HOOK_PRIORITY, htonl(ops->priority));
+	if (ret)
+		goto nla_put_failure;
+
+	ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops);
+	if (ret)
+		goto nla_put_failure;
+
+	nlmsg_end(nlskb, nlh);
+	return 0;
+nla_put_failure:
+	nlmsg_trim(nlskb, nlh);
+	return ret;
+}
+
+static const struct nf_hook_entries *
+nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
+{
+	const struct nf_hook_entries *hook_head = NULL;
+	struct net_device *netdev;
+
+	switch (pf) {
+	case NFPROTO_IPV4:
+		if (hook >= ARRAY_SIZE(net->nf.hooks_ipv4))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
+		break;
+	case NFPROTO_IPV6:
+		hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
+		if (hook >= ARRAY_SIZE(net->nf.hooks_ipv6))
+			return ERR_PTR(-EINVAL);
+		break;
+	case NFPROTO_ARP:
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+		if (hook >= ARRAY_SIZE(net->nf.hooks_arp))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
+#endif
+		break;
+	case NFPROTO_BRIDGE:
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+		if (hook >= ARRAY_SIZE(net->nf.hooks_bridge))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
+#endif
+		break;
+#if IS_ENABLED(CONFIG_DECNET)
+	case NFPROTO_DECNET:
+		if (hook >= ARRAY_SIZE(net->nf.hooks_decnet))
+			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
+		break;
+#endif
+#ifdef CONFIG_NETFILTER_INGRESS
+	case NFPROTO_NETDEV:
+		if (hook != NF_NETDEV_INGRESS)
+			return ERR_PTR(-EOPNOTSUPP);
+
+		if (!dev)
+			return ERR_PTR(-ENODEV);
+
+		netdev = dev_get_by_name_rcu(net, dev);
+		if (!netdev)
+			return ERR_PTR(-ENODEV);
+
+		return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+	default:
+		return ERR_PTR(-EPROTONOSUPPORT);
+	}
+
+	return hook_head;
+}
+
+static int nfnl_hook_dump(struct sk_buff *nlskb,
+			  struct netlink_callback *cb)
+{
+	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nfnl_dump_hook_data *ctx = cb->data;
+	int err, family = nfmsg->nfgen_family;
+	struct net *net = sock_net(nlskb->sk);
+	struct nf_hook_ops * const *ops;
+	const struct nf_hook_entries *e;
+	unsigned int i = cb->args[0];
+
+	rcu_read_lock();
+
+	e = nfnl_hook_entries_head(family, ctx->hook, net, ctx->devname);
+	if (!e)
+		goto done;
+
+	if (IS_ERR(e)) {
+		cb->seq++;
+		goto done;
+	}
+
+	if ((unsigned long)e != ctx->headv || i >= e->num_hook_entries)
+		cb->seq++;
+
+	ops = nf_hook_entries_get_hook_ops(e);
+
+	for (; i < e->num_hook_entries; i++) {
+		err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq);
+		if (err)
+			break;
+	}
+
+done:
+	nl_dump_check_consistent(cb, nlmsg_hdr(nlskb));
+	rcu_read_unlock();
+	cb->args[0] = i;
+	return nlskb->len;
+}
+
+static int nfnl_hook_dump_start(struct netlink_callback *cb)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	const struct nlattr * const *nla = cb->data;
+	struct nfnl_dump_hook_data *ctx = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	u8 family = nfmsg->nfgen_family;
+	char name[IFNAMSIZ] = "";
+	const void *head;
+	u32 hooknum;
+
+	hooknum = ntohl(nla_get_be32(nla[NFNLA_HOOK_HOOKNUM]));
+	if (hooknum > 255)
+		return -EINVAL;
+
+	if (family == NFPROTO_NETDEV) {
+		if (!nla[NFNLA_HOOK_DEV])
+			return -EINVAL;
+
+		nla_strscpy(name, nla[NFNLA_HOOK_DEV], sizeof(name));
+	}
+
+	rcu_read_lock();
+	/* Not dereferenced; for consistency check only */
+	head = nfnl_hook_entries_head(family, hooknum, net, name);
+	rcu_read_unlock();
+
+	if (head && IS_ERR(head))
+		return PTR_ERR(head);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	strscpy(ctx->devname, name, sizeof(ctx->devname));
+	ctx->headv = (unsigned long)head;
+	ctx->hook = hooknum;
+
+	cb->seq = 1;
+	cb->data = ctx;
+
+	return 0;
+}
+
+static int nfnl_hook_dump_stop(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
+static int nfnl_hook_get(struct sk_buff *skb,
+			 const struct nfnl_info *info,
+			 const struct nlattr * const nla[])
+{
+	if (!nla[NFNLA_HOOK_HOOKNUM])
+		return -EINVAL;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nfnl_hook_dump_start,
+			.done = nfnl_hook_dump_stop,
+			.dump = nfnl_hook_dump,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nf_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static const struct nfnl_callback nfnl_hook_cb[NFNL_MSG_HOOK_MAX] = {
+	[NFNL_MSG_HOOK_GET] = {
+		.call		= nfnl_hook_get,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFNLA_HOOK_MAX,
+		.policy		= nfnl_hook_nla_policy
+	},
+};
+
+static const struct nfnetlink_subsystem nfhook_subsys = {
+	.name				= "nfhook",
+	.subsys_id			= NFNL_SUBSYS_HOOK,
+	.cb_count			= NFNL_MSG_HOOK_MAX,
+	.cb				= nfnl_hook_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_HOOK);
+
+static int __init nfnetlink_hook_init(void)
+{
+	return nfnetlink_subsys_register(&nfhook_subsys);
+}
+
+static void __exit nfnetlink_hook_exit(void)
+{
+	nfnetlink_subsys_unregister(&nfhook_subsys);
+}
+
+module_init(nfnetlink_hook_init);
+module_exit(nfnetlink_hook_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("nfnetlink_hook: list registered netfilter hooks");

From 8df71a7dc5e1e0d8f1bb13145e00bf375fa2082e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 26 May 2021 19:30:58 +0200
Subject: [PATCH 1034/3292] cpufreq: intel_pstate: hybrid: Fix build with
 CONFIG_ACPI unset

One of the previous commits introducing hybrid processor support to
intel_pstate broke build with CONFIG_ACPI unset.

Fix that and while at it make empty stubs of two functions related
to ACPI CPPC static inline and fix a spelling mistake in the name of
one of them.

Fixes: eb3693f0521e ("cpufreq: intel_pstate: hybrid: CPU-specific scaling factor")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
---
 drivers/cpufreq/intel_pstate.c | 87 ++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 41 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 03d8516e653e..6012964df51b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -369,7 +369,7 @@ static void intel_pstate_set_itmt_prio(int cpu)
 	}
 }
 
-static int intel_pstate_get_cppc_guranteed(int cpu)
+static int intel_pstate_get_cppc_guaranteed(int cpu)
 {
 	struct cppc_perf_caps cppc_perf;
 	int ret;
@@ -385,7 +385,7 @@ static int intel_pstate_get_cppc_guranteed(int cpu)
 }
 
 #else /* CONFIG_ACPI_CPPC_LIB */
-static void intel_pstate_set_itmt_prio(int cpu)
+static inline void intel_pstate_set_itmt_prio(int cpu)
 {
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
@@ -470,6 +470,20 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 
 	acpi_processor_unregister_performance(policy->cpu);
 }
+
+static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
+{
+	return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
+}
+
+static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
+					struct cppc_perf_caps *caps)
+{
+	if (cppc_get_perf_caps(cpu->cpu, caps))
+		return false;
+
+	return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
+}
 #else /* CONFIG_ACPI */
 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
@@ -486,26 +500,12 @@ static inline bool intel_pstate_acpi_pm_profile_server(void)
 #endif /* CONFIG_ACPI */
 
 #ifndef CONFIG_ACPI_CPPC_LIB
-static int intel_pstate_get_cppc_guranteed(int cpu)
+static inline int intel_pstate_get_cppc_guaranteed(int cpu)
 {
 	return -ENOTSUPP;
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
 
-static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
-{
-	return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
-}
-
-static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
-					struct cppc_perf_caps *caps)
-{
-	if (cppc_get_perf_caps(cpu->cpu, caps))
-		return false;
-
-	return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
-}
-
 static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
 {
 	pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
@@ -530,7 +530,6 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
  */
 static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
 {
-	struct cppc_perf_caps caps;
 	int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
 	int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
 	int perf_ctl_turbo = pstate_funcs.get_turbo();
@@ -548,33 +547,39 @@ static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
 	pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
 	pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
 
-	if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
-		if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
-			pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
+#ifdef CONFIG_ACPI
+	if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) {
+		struct cppc_perf_caps caps;
 
-			/*
-			 * If the CPPC nominal performance is valid, it can be
-			 * assumed to correspond to cpu_khz.
-			 */
-			if (caps.nominal_perf == perf_ctl_max_phys) {
-				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-				return;
-			}
-			scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
-		} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
-			pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
+		if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
+			if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
+				pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
 
-			/*
-			 * If the CPPC guaranteed performance is valid, it can
-			 * be assumed to correspond to max_freq.
-			 */
-			if (caps.guaranteed_perf == perf_ctl_max) {
-				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-				return;
+				/*
+				 * If the CPPC nominal performance is valid, it
+				 * can be assumed to correspond to cpu_khz.
+				 */
+				if (caps.nominal_perf == perf_ctl_max_phys) {
+					intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+					return;
+				}
+				scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
+			} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
+				pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
+
+				/*
+				 * If the CPPC guaranteed performance is valid,
+				 * it can be assumed to correspond to max_freq.
+				 */
+				if (caps.guaranteed_perf == perf_ctl_max) {
+					intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+					return;
+				}
+				scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
 			}
-			scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
 		}
 	}
+#endif
 	/*
 	 * If using the CPPC data to compute the HWP-to-frequency scaling factor
 	 * doesn't work, use the HWP_CAP gauranteed perf for this purpose with
@@ -944,7 +949,7 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 	int ratio, freq;
 
-	ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
+	ratio = intel_pstate_get_cppc_guaranteed(policy->cpu);
 	if (ratio <= 0) {
 		u64 cap;
 

From 5de1262500708bcf6eef753f5eb9d8adb3d32d33 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 31 May 2021 15:16:07 +0800
Subject: [PATCH 1035/3292] cpufreq: stats: Clean up local variable in
 cpufreq_stats_create_table()

Local variable 'count' will be initialized and 'ret' is also not
required, so remove the redundant initialization and get rid of
'ret'.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_stats.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index da717f7cd9a9..1570d6f3e75d 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -211,7 +211,7 @@ void cpufreq_stats_free_table(struct cpufreq_policy *policy)
 
 void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 {
-	unsigned int i = 0, count = 0, ret = -ENOMEM;
+	unsigned int i = 0, count;
 	struct cpufreq_stats *stats;
 	unsigned int alloc_size;
 	struct cpufreq_frequency_table *pos;
@@ -253,8 +253,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	stats->last_index = freq_table_get_index(stats, policy->cur);
 
 	policy->stats = stats;
-	ret = sysfs_create_group(&policy->kobj, &stats_attr_group);
-	if (!ret)
+	if (!sysfs_create_group(&policy->kobj, &stats_attr_group))
 		return;
 
 	/* We failed, release resources */

From 42d96e169a66151a87f6f2f9a10fdd5e262fa6a9 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Fri, 4 Jun 2021 14:25:55 -0700
Subject: [PATCH 1036/3292] ACPICA: ACPI 6.4: MADT: add Multiprocessor Wakeup
 Mailbox Structure

ACPICA commit f1ee04207a212f6c519441e7e25397649ebc4cea

Add Multiprocessor Wakeup Mailbox Structure definition. It is useful
in parsing MADT Wake table.

Link: https://github.com/acpica/acpica/commit/f1ee0420
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 18cafe3ebddc..3b6f45a2edc8 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -763,6 +763,20 @@ struct acpi_madt_multiproc_wakeup {
 	u64 base_address;
 };
 
+#define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE	2032
+#define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE	2048
+
+struct acpi_madt_multiproc_wakeup_mailbox {
+	u16 command;
+	u16 reserved;		/* reserved - must be zero */
+	u32 apic_id;
+	u64 wakeup_vector;
+	u8 reserved_os[ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE];	/* reserved for OS use */
+	u8 reserved_firmware[ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE];	/* reserved for firmware use */
+};
+
+#define ACPI_MP_WAKE_COMMAND_WAKEUP    1
+
 /*
  * Common flags fields for MADT subtables
  */

From 8288f69e47f9780d9b5e0447a3160a1fbeae9c8d Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Fri, 4 Jun 2021 14:25:56 -0700
Subject: [PATCH 1037/3292] ACPICA: Add SVKL table headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ACPICA commit b5e6bcf69dbb9877481992d5ce86008cfb94f5b8

SVKL (Storage Volume Key Location Table) is used by BIOS/Firmware
to share storage volume encryption key's with OS. It will be used
by userspace to decrypt and mount encrypted drives.

So add SVKL table signature and add it to known signatures array
support SVKL.

You can find details about the SVKL table in TDX specfication
titled "Guest-Host-Communication Interface (GHCI) for Intel
Trust Domain Extensions (Intel® TDX)", sec 4.4 and in ACPI
specification r6.4, sec 5.2.6.

Link: https://software.intel.com/content/dam/develop/external/us/en/documents/intel-tdx-guest-hypervisor-communication-interface.pdf
Link: https://github.com/acpica/acpica/commit/b5e6bcf6
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 3b6f45a2edc8..d8fec67e4c8c 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -44,6 +44,7 @@
 #define ACPI_SIG_SDEI           "SDEI"	/* Software Delegated Exception Interface Table */
 #define ACPI_SIG_SDEV           "SDEV"	/* Secure Devices table */
 #define ACPI_SIG_NHLT           "NHLT"	/* Non-HDAudio Link Table */
+#define ACPI_SIG_SVKL           "SVKL"	/* Storage Volume Key Location Table */
 
 /*
  * All tables must be byte-packed to match the ACPI specification, since
@@ -1913,6 +1914,35 @@ struct acpi_sdev_pcie_path {
 	u8 function;
 };
 
+/*******************************************************************************
+ *
+ * SVKL - Storage Volume Key Location Table (ACPI 6.4)
+ *        Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_svkl {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 count;
+};
+
+struct acpi_svkl_header {
+	u16 type;
+	u16 format;
+	u32 size;
+	u64 address;
+};
+
+enum acpi_svkl_type {
+	ACPI_SVKL_TYPE_MAIN_STORAGE = 0,
+	ACPI_SVKL_TYPE_RESERVED = 1	/* 1 and greater are reserved */
+};
+
+enum acpi_svkl_format {
+	ACPI_SVKL_FORMAT_RAW_BINARY = 0,
+	ACPI_SVKL_FORMAT_RESERVED = 1	/* 1 and greater are reserved */
+};
+
 /* Reset to default packing */
 
 #pragma pack()

From c27bac0314131b11bccd735f7e8415ac6444b667 Mon Sep 17 00:00:00 2001
From: Erik Kaneda <erik.kaneda@intel.com>
Date: Fri, 4 Jun 2021 14:25:57 -0700
Subject: [PATCH 1038/3292] ACPICA: Fix memory leak caused by _CID repair
 function

ACPICA commit 180cb53963aa876c782a6f52cc155d951b26051a

According to the ACPI spec, _CID returns a package containing
hardware ID's. Each element of an ASL package contains a reference
count from the parent package as well as the element itself.

Name (TEST, Package() {
    "String object" // this package element has a reference count of 2
})

A memory leak was caused in the _CID repair function because it did
not decrement the reference count created by the package. Fix the
memory leak by calling acpi_ut_remove_reference on _CID package elements
that represent a hardware ID (_HID).

Link: https://github.com/acpica/acpica/commit/180cb539
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/nsrepair2.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 14b71b41e845..38e10ab976e6 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -379,6 +379,13 @@ acpi_ns_repair_CID(struct acpi_evaluate_info *info,
 
 			(*element_ptr)->common.reference_count =
 			    original_ref_count;
+
+			/*
+			 * The original_element holds a reference from the package object
+			 * that represents _HID. Since a new element was created by _HID,
+			 * remove the reference from the _CID package.
+			 */
+			acpi_ut_remove_reference(original_element);
 		}
 
 		element_ptr++;

From c160b7d21ae5df7b489f3109f54bad84030cbce3 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Jun 2021 14:25:58 -0700
Subject: [PATCH 1039/3292] ACPICA: iASL: Finish support for the IVRS ACPI
 table

 1) Add compiler support for IVRS.
 2) Update disassembler support for IVRS.
 3) Add a new utility, ut_is_id_integer to determine if a HID/CID is
    an integer or a string.

ACPICA commit 7eb0b770cb0efcf089cb217b5f8bafc0c6395a3d

Link: https://github.com/acpica/acpica/commit/7eb0b770
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index d8fec67e4c8c..42c78fecca7c 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -447,6 +447,12 @@ struct acpi_ivrs_device_hid {
 	u8 uid_length;
 };
 
+/* Values for uid_type above */
+
+#define ACPI_IVRS_UID_NOT_PRESENT   0
+#define ACPI_IVRS_UID_IS_INTEGER    1
+#define ACPI_IVRS_UID_IS_STRING     2
+
 /* 0x20, 0x21, 0x22: I/O Virtualization Memory Definition Block (IVMD) */
 
 struct acpi_ivrs_memory {

From 6496f03e36ce832137733b39f6e670434af3a1c5 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Jun 2021 14:25:59 -0700
Subject: [PATCH 1040/3292] ACPICA: iASL: Add support for the SVKL table

Includes the table compiler, the disassembler and the template
generator.

ACPICA commit 27a434379e3ecafea5340c0c384789ea2062c4fb

Link: https://github.com/acpica/acpica/commit/27a43437
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 42c78fecca7c..d042341e8ba9 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1923,6 +1923,8 @@ struct acpi_sdev_pcie_path {
 /*******************************************************************************
  *
  * SVKL - Storage Volume Key Location Table (ACPI 6.4)
+ *        From: "Guest-Host-Communication Interface (GHCI) for Intel
+ *        Trust Domain Extensions (Intel TDX)".
  *        Version 1
  *
  ******************************************************************************/
@@ -1932,7 +1934,7 @@ struct acpi_table_svkl {
 	u32 count;
 };
 
-struct acpi_svkl_header {
+struct acpi_svkl_key {
 	u16 type;
 	u16 format;
 	u32 size;

From 536e35c938c67941d4279e09dc3a2825119715fd Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Jun 2021 14:26:00 -0700
Subject: [PATCH 1041/3292] ACPICA: iASL Table Compiler: Add full support for
 RGRT ACPI table

ACPICA commit 6949e1dd2d92788a994ce657857fe8809159e71e

Includes compiler, disassembler, and template generator.

Link: https://github.com/acpica/acpica/commit/6949e1dd
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index d042341e8ba9..f1d5e8950527 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -40,6 +40,7 @@
 #define ACPI_SIG_PMTT           "PMTT"	/* Platform Memory Topology Table */
 #define ACPI_SIG_PPTT           "PPTT"	/* Processor Properties Topology Table */
 #define ACPI_SIG_RASF           "RASF"	/* RAS Feature table */
+#define ACPI_SIG_RGRT           "RGRT"	/* Regulatory Graphics Resource Table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
 #define ACPI_SIG_SDEI           "SDEI"	/* Software Delegated Exception Interface Table */
 #define ACPI_SIG_SDEV           "SDEV"	/* Secure Devices table */
@@ -1790,6 +1791,32 @@ enum acpi_rasf_status {
 #define ACPI_RASF_ERROR                 (1<<2)
 #define ACPI_RASF_STATUS                (0x1F<<3)
 
+/*******************************************************************************
+ *
+ * RGRT - Regulatory Graphics Resource Table
+ *        Version 1
+ *
+ * Conforms to "ACPI RGRT" available at:
+ * https://microsoft.github.io/mu/dyn/mu_plus/ms_core_pkg/acpi_RGRT/feature_acpi_rgrt/
+ *
+ ******************************************************************************/
+
+struct acpi_table_rgrt {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u16 version;
+	u8 image_type;
+	u8 reserved;
+	u8 image[0];
+};
+
+/* image_type values */
+
+enum acpi_rgrt_image_type {
+	ACPI_RGRT_TYPE_RESERVED0 = 0,
+	ACPI_RGRT_IMAGE_TYPE_PNG = 1,
+	ACPI_RGRT_TYPE_RESERVED = 2	/* 2 and greater are reserved */
+};
+
 /*******************************************************************************
  *
  * SBST - Smart Battery Specification Table

From b5e774039629d56f6a8a64013a885e284c4b3785 Mon Sep 17 00:00:00 2001
From: Wei Ming Chen <jj251510319013@gmail.com>
Date: Fri, 4 Jun 2021 14:26:01 -0700
Subject: [PATCH 1042/3292] ACPICA: Use ACPI_FALLTHROUGH

ACPICA commit 2296edd39b4ce2d2dd691c1f309c4da00843ecc9

Replace /* FALLTHROUGH */ comment with ACPI_FALLTHROUGH

Link: https://github.com/acpica/acpica/commit/2296edd3
Signed-off-by: Wei Ming Chen <jj251510319013@gmail.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utprint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index e37d612e8db5..05426596d1f4 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -475,7 +475,7 @@ int vsnprintf(char *string, acpi_size size, const char *format, va_list args)
 		case 'X':
 
 			type |= ACPI_FORMAT_UPPER;
-			/* FALLTHROUGH */
+			ACPI_FALLTHROUGH;
 
 		case 'x':
 

From 6814a524857f2da9624dedbcac9659675406f441 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20W=C3=BCthrich?= <me@fabwu.ch>
Date: Fri, 4 Jun 2021 14:26:02 -0700
Subject: [PATCH 1043/3292] ACPICA: Add _PLD panel positions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ACPICA commit 1d36b551fa6749e650da1dfd3e809146e6ac6a2e

The ACPI specification v6.3 defines the panel positions in chapter
6.1.8 "_PLD (Physical Location of Device)"

Link: https://github.com/acpica/acpica/commit/1d36b551
Signed-off-by: Fabian Wüthrich <me@fabwu.ch>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acbuffer.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/acpi/acbuffer.h b/include/acpi/acbuffer.h
index 18197c16149f..3e8d969b22fe 100644
--- a/include/acpi/acbuffer.h
+++ b/include/acpi/acbuffer.h
@@ -207,4 +207,14 @@ struct acpi_pld_info {
 #define ACPI_PLD_GET_HORIZ_OFFSET(dword)        ACPI_GET_BITS (dword, 16, ACPI_16BIT_MASK)
 #define ACPI_PLD_SET_HORIZ_OFFSET(dword,value)  ACPI_SET_BITS (dword, 16, ACPI_16BIT_MASK, value)	/* Offset 128+16=144, Len 16 */
 
+/* Panel position defined in _PLD section of ACPI Specification 6.3 */
+
+#define ACPI_PLD_PANEL_TOP      0
+#define ACPI_PLD_PANEL_BOTTOM   1
+#define ACPI_PLD_PANEL_LEFT     2
+#define ACPI_PLD_PANEL_RIGHT    3
+#define ACPI_PLD_PANEL_FRONT    4
+#define ACPI_PLD_PANEL_BACK     5
+#define ACPI_PLD_PANEL_UNKNOWN  6
+
 #endif				/* ACBUFFER_H */

From 9401eafaff836c1b828cd5300fb4bd35a548609b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Jun 2021 14:26:03 -0700
Subject: [PATCH 1044/3292] ACPICA: iASL: Add support for the BDAT ACPI table

ACPICA commit 81eb9c383e6dee0f1b6620e91e5c3dbb48234831

Includes: Table compiler, disassembler, and template generator.

Link: https://github.com/acpica/acpica/commit/81eb9c38
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index f1d5e8950527..68a607961b2f 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -24,6 +24,7 @@
  * file. Useful because they make it more difficult to inadvertently type in
  * the wrong signature.
  */
+#define ACPI_SIG_BDAT           "BDAT"	/* BIOS Data ACPI Table */
 #define ACPI_SIG_IORT           "IORT"	/* IO Remapping Table */
 #define ACPI_SIG_IVRS           "IVRS"	/* I/O Virtualization Reporting Structure */
 #define ACPI_SIG_LPIT           "LPIT"	/* Low Power Idle Table */
@@ -65,6 +66,20 @@
  * See http://stackoverflow.com/a/1053662/41661
  */
 
+/*******************************************************************************
+ *
+ * BDAT - BIOS Data ACPI Table
+ *
+ * Conforms to "BIOS Data ACPI Table", Interface Specification v4.0 Draft 5
+ * Nov 2020
+ *
+ ******************************************************************************/
+
+struct acpi_table_bdat {
+	struct acpi_table_header header;
+	struct acpi_generic_address gas;
+};
+
 /*******************************************************************************
  *
  * IORT - IO Remapping Table

From 160c768e1cad405479e40d327e04c312da1b2384 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Fri, 4 Jun 2021 14:26:04 -0700
Subject: [PATCH 1045/3292] ACPICA: Add defines for the CXL Host Bridge
 Structure (CHBS)

ACPICA commit 5ace82441a34f8d45725f12f6bd2677e79c186a6

CXL 2.0 defines length and version field values for the CHBS.
Include them in the ACPI CEDT table definition.

Link: https://github.com/acpica/acpica/commit/5ace8244
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index ce59903c2695..8532f56a51d6 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -330,6 +330,16 @@ enum acpi_cedt_type {
 	ACPI_CEDT_TYPE_RESERVED = 1
 };
 
+/* Values for version field above */
+
+#define ACPI_CEDT_CHBS_VERSION_CXL11    (0)
+#define ACPI_CEDT_CHBS_VERSION_CXL20    (1)
+
+/* Values for length field above */
+
+#define ACPI_CEDT_CHBS_LENGTH_CXL11     (0x2000)
+#define ACPI_CEDT_CHBS_LENGTH_CXL20     (0x10000)
+
 /*
  * CEDT subtables
  */

From 4a2c1dcfaf59be4b357400d893c3f5daff6cab6c Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Fri, 4 Jun 2021 14:26:05 -0700
Subject: [PATCH 1046/3292] ACPICA: Add the CFMWS structure definition to the
 CEDT table

ACPICA commit 699fc72e56936bebf3b9ba39b6e91bd957b44452

The CXL Fixed Memory Window Structure (CFMWS) is added to the
CXL Early Discovery Table (CEDT). This new structure is defined
in an ECN to the CXL 2.0 specification.

https://www.computeexpresslink.org/spec-landing

Link: https://github.com/acpica/acpica/commit/699fc72e
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 8532f56a51d6..ef2872dea01c 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -327,7 +327,8 @@ struct acpi_cedt_header {
 
 enum acpi_cedt_type {
 	ACPI_CEDT_TYPE_CHBS = 0,
-	ACPI_CEDT_TYPE_RESERVED = 1
+	ACPI_CEDT_TYPE_CFMWS = 1,
+	ACPI_CEDT_TYPE_RESERVED = 2,
 };
 
 /* Values for version field above */
@@ -355,6 +356,34 @@ struct acpi_cedt_chbs {
 	u64 length;
 };
 
+/* 1: CXL Fixed Memory Window Structure */
+
+struct acpi_cedt_cfmws {
+	struct acpi_cedt_header header;
+	u32 reserved1;
+	u64 base_hpa;
+	u64 window_size;
+	u8 interleave_ways;
+	u8 interleave_arithmetic;
+	u16 reserved2;
+	u32 granularity;
+	u16 restrictions;
+	u16 qtg_id;
+	u32 interleave_targets[];
+};
+
+/* Values for Interleave Arithmetic field above */
+
+#define ACPI_CEDT_CFMWS_ARITHMETIC_MODULO	(0)
+
+/* Values for Restrictions field above */
+
+#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2		(1)
+#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3		(1<<1)
+#define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE	(1<<2)
+#define ACPI_CEDT_CFMWS_RESTRICT_PMEM		(1<<3)
+#define ACPI_CEDT_CFMWS_RESTRICT_FIXED		(1<<4)
+
 /*******************************************************************************
  *
  * CPEP - Corrected Platform Error Polling table (ACPI 4.0)

From d71df85aacd26fe4ac5fbfd383e01e7552ccfcc3 Mon Sep 17 00:00:00 2001
From: Erik Kaneda <erik.kaneda@intel.com>
Date: Fri, 4 Jun 2021 14:26:06 -0700
Subject: [PATCH 1047/3292] ACPICA: iASL: add disassembler support for PRMT

ACPICA commit f70e7593e37c9e29f19be8ad3ef93f3f34799368

Link: https://github.com/acpica/acpica/commit/f70e7593
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 68a607961b2f..05b086d4d94a 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -40,6 +40,7 @@
 #define ACPI_SIG_PHAT           "PHAT"	/* Platform Health Assessment Table */
 #define ACPI_SIG_PMTT           "PMTT"	/* Platform Memory Topology Table */
 #define ACPI_SIG_PPTT           "PPTT"	/* Processor Properties Topology Table */
+#define ACPI_SIG_PRMT           "PRMT"	/* Platform Runtime Mechanism Table */
 #define ACPI_SIG_RASF           "RASF"	/* RAS Feature table */
 #define ACPI_SIG_RGRT           "RGRT"	/* Regulatory Graphics Resource Table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
@@ -1710,6 +1711,43 @@ struct acpi_pptt_id {
 	u16 spin_rev;
 };
 
+/*******************************************************************************
+ *
+ * PRMT - Platform Runtime Mechanism Table
+ *        Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_prmt {
+	struct acpi_table_header header;	/* Common ACPI table header */
+};
+
+struct acpi_table_prmt_header {
+	u8 platform_guid[16];
+	u32 module_info_offset;
+	u32 module_info_count;
+};
+
+struct acpi_prmt_module_info {
+	u16 revision;
+	u16 length;
+	u8 module_guid[16];
+	u16 major_rev;
+	u16 minor_rev;
+	u16 handler_info_count;
+	u32 handler_info_offset;
+	u64 mmio_list_pointer;
+};
+
+struct acpi_prmt_handler_info {
+	u16 revision;
+	u16 length;
+	u8 handler_guid[16];
+	u64 handler_address;
+	u64 static_data_buffer_address;
+	u64 acpi_param_buffer_address;
+};
+
 /*******************************************************************************
  *
  * RASF - RAS Feature Table (ACPI 5.0)

From 04da290dd22c806c401913bcc1ed6356599b09c3 Mon Sep 17 00:00:00 2001
From: Erik Kaneda <erik.kaneda@intel.com>
Date: Fri, 4 Jun 2021 14:26:07 -0700
Subject: [PATCH 1048/3292] ACPICA: Add support for PlatformRtMechanism
 OperationRegion handler

ACPICA commit cdf48b141d7da38e47fe4020310033ddd1971f9e

Writing a buffer to a PlatformRtMechanism FieldUnit invokes a
bidirectional transaction. The input buffer contains 26 bytes
containing 9 bytes of status, a command byte and a 16-byte UUID.
This change will will simply pass this incoming buffer to a handler
registered by the OS.

Link: https://github.com/acpica/acpica/commit/cdf48b14
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acutils.h  |  2 ++
 drivers/acpi/acpica/exfield.c  |  8 +++++--
 drivers/acpi/acpica/exserial.c | 12 ++++++++++
 drivers/acpi/acpica/utuuid.c   | 41 ++++++++++++++++++++++++++++++++++
 include/acpi/acconfig.h        |  2 ++
 5 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index bccae0d3db75..59d6ded01614 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -737,6 +737,8 @@ const char *acpi_ah_match_uuid(u8 *data);
  */
 #if (defined ACPI_ASL_COMPILER || defined ACPI_EXEC_APP || defined ACPI_HELP_APP)
 void acpi_ut_convert_string_to_uuid(char *in_string, u8 *uuid_buffer);
+
+acpi_status acpi_ut_convert_uuid_to_string(char *uuid_buffer, char *out_string);
 #endif
 
 #endif				/* _ACUTILS_H */
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index 32f03ee81785..06f3c9df1e22 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -139,7 +139,9 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 		    || obj_desc->field.region_obj->region.space_id ==
 		    ACPI_ADR_SPACE_GSBUS
 		    || obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_IPMI)) {
+		    ACPI_ADR_SPACE_IPMI
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_PLATFORM_RT)) {
 
 		/* SMBus, GSBus, IPMI serial */
 
@@ -301,7 +303,9 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		    || obj_desc->field.region_obj->region.space_id ==
 		    ACPI_ADR_SPACE_GSBUS
 		    || obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_IPMI)) {
+		    ACPI_ADR_SPACE_IPMI
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_PLATFORM_RT)) {
 
 		/* SMBus, GSBus, IPMI serial */
 
diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c
index 8e8d95f7947b..10d68a5f76a3 100644
--- a/drivers/acpi/acpica/exserial.c
+++ b/drivers/acpi/acpica/exserial.c
@@ -195,6 +195,12 @@ acpi_ex_read_serial_bus(union acpi_operand_object *obj_desc,
 		function = ACPI_READ | (accessor_type << 16);
 		break;
 
+	case ACPI_ADR_SPACE_PLATFORM_RT:
+
+		buffer_length = ACPI_PRM_INPUT_BUFFER_SIZE;
+		function = ACPI_READ;
+		break;
+
 	default:
 		return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID);
 	}
@@ -311,6 +317,12 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
 		function = ACPI_WRITE | (accessor_type << 16);
 		break;
 
+	case ACPI_ADR_SPACE_PLATFORM_RT:
+
+		buffer_length = ACPI_PRM_INPUT_BUFFER_SIZE;
+		function = ACPI_WRITE;
+		break;
+
 	default:
 		return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID);
 	}
diff --git a/drivers/acpi/acpica/utuuid.c b/drivers/acpi/acpica/utuuid.c
index 090e44b6b6c7..dca9061518ab 100644
--- a/drivers/acpi/acpica/utuuid.c
+++ b/drivers/acpi/acpica/utuuid.c
@@ -61,4 +61,45 @@ void acpi_ut_convert_string_to_uuid(char *in_string, u8 *uuid_buffer)
 					       1]);
 	}
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_convert_uuid_to_string
+ *
+ * PARAMETERS:  uuid_buffer         - 16-byte UUID buffer
+ *              out_string          - 36-byte formatted UUID string
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Convert 16-byte UUID buffer to 36-byte formatted UUID string
+ *              out_string must be 37 bytes to include null terminator.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_convert_uuid_to_string(char *uuid_buffer, char *out_string)
+{
+	u32 i;
+
+	if (!uuid_buffer || !out_string) {
+		return (AE_BAD_PARAMETER);
+	}
+
+	for (i = 0; i < UUID_BUFFER_LENGTH; i++) {
+		out_string[acpi_gbl_map_to_uuid_offset[i]] =
+		    acpi_ut_hex_to_ascii_char(uuid_buffer[i], 4);
+
+		out_string[acpi_gbl_map_to_uuid_offset[i] + 1] =
+		    acpi_ut_hex_to_ascii_char(uuid_buffer[i], 0);
+	}
+
+	/* Insert required hyphens (dashes) */
+
+	out_string[UUID_HYPHEN1_OFFSET] =
+	    out_string[UUID_HYPHEN2_OFFSET] =
+	    out_string[UUID_HYPHEN3_OFFSET] =
+	    out_string[UUID_HYPHEN4_OFFSET] = '-';
+
+	out_string[UUID_STRING_LENGTH] = 0;	/* Null terminate */
+	return (AE_OK);
+}
 #endif
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index e92f84fa8c68..0362cbb72359 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -188,6 +188,8 @@
 #define ACPI_MAX_GSBUS_DATA_SIZE        255
 #define ACPI_MAX_GSBUS_BUFFER_SIZE      ACPI_SERIAL_HEADER_SIZE + ACPI_MAX_GSBUS_DATA_SIZE
 
+#define ACPI_PRM_INPUT_BUFFER_SIZE      26
+
 /* _sx_d and _sx_w control methods */
 
 #define ACPI_NUM_sx_d_METHODS           4

From 24fa16924021858ab9a0418363a2a0ee4cf1915d Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Jun 2021 14:26:08 -0700
Subject: [PATCH 1049/3292] ACPICA: Update version to 20210604

ACPICA commit ffceba1df23f8dbbc64a1023314ec179b4f5331e

Version 20210604.

Link: https://github.com/acpica/acpica/commit/ffceba1d
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f8d44b06f3e3..a43335961e30 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20210331
+#define ACPI_CA_VERSION                 0x20210604
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>

From 5c1a72a0fbe1b02c3ce0537f85f92ea935e0beec Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 4 Jun 2021 19:50:45 +0300
Subject: [PATCH 1050/3292] ACPI: property: Constify stubs for CONFIG_ACPI=n
 case

There is a few stubs that left untouched during constification of
the fwnode related APIs. Constify three more stubs here.

Fixes: 8b9d6802583a ("ACPI: Constify acpi_bus helper functions, switch to macros")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[ rjw: Subject edit ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c60745f657e9..40657f220f8b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -765,7 +765,7 @@ static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
 	return false;
 }
 
-static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode)
+static inline struct acpi_device *to_acpi_device_node(const struct fwnode_handle *fwnode)
 {
 	return NULL;
 }
@@ -775,12 +775,12 @@ static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
 	return false;
 }
 
-static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode)
+static inline struct acpi_data_node *to_acpi_data_node(const struct fwnode_handle *fwnode)
 {
 	return NULL;
 }
 
-static inline bool acpi_data_node_match(struct fwnode_handle *fwnode,
+static inline bool acpi_data_node_match(const struct fwnode_handle *fwnode,
 					const char *name)
 {
 	return false;

From 3d7c821c1d8071e517048c8b4afdf33109441c0f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 4 Jun 2021 19:50:46 +0300
Subject: [PATCH 1051/3292] ACPI: scan: Constify acpi_dma_supported() helper
 function

Constify arguments to acpi_dma_supported(). The function doesn't need
to change the content of the passed argument and when it's const it
allows to supply the result of other functions that may return a pointer
to a constant object.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[ rjw: Subject edit ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c     | 2 +-
 include/acpi/acpi_bus.h | 2 +-
 include/linux/acpi.h    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index a22778e880c2..609405ca11e2 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1404,7 +1404,7 @@ void acpi_free_pnp_ids(struct acpi_device_pnp *pnp)
  *
  * Return false if DMA is not supported. Otherwise, return true
  */
-bool acpi_dma_supported(struct acpi_device *adev)
+bool acpi_dma_supported(const struct acpi_device *adev)
 {
 	if (!adev)
 		return false;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 3a82faac5767..2fbd2c2e0568 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -586,7 +586,7 @@ struct acpi_pci_root {
 
 /* helper */
 
-bool acpi_dma_supported(struct acpi_device *adev);
+bool acpi_dma_supported(const struct acpi_device *adev);
 enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
 int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
 		       u64 *size);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 40657f220f8b..97f1c5588b5a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -911,7 +911,7 @@ acpi_create_platform_device(struct acpi_device *adev,
 	return NULL;
 }
 
-static inline bool acpi_dma_supported(struct acpi_device *adev)
+static inline bool acpi_dma_supported(const struct acpi_device *adev)
 {
 	return false;
 }

From fb38f314fbd173e2e9f9f0f2e720a5f4889562da Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 4 Jun 2021 19:50:47 +0300
Subject: [PATCH 1052/3292] device property: Unify access to of_node

Historically we have a few variants how we access dev->fwnode
and dev->of_node. Some of the functions during development
gained different versions of the getters. Unify access to of_node
and as a side change slightly refactor ACPI specific branches.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/property.c  | 29 +++++++++++++----------------
 include/linux/property.h |  2 +-
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index dd98759d688b..1f533b314efc 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -759,13 +759,8 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node);
 struct fwnode_handle *device_get_next_child_node(struct device *dev,
 						 struct fwnode_handle *child)
 {
-	struct acpi_device *adev = ACPI_COMPANION(dev);
-	struct fwnode_handle *fwnode = NULL, *next;
-
-	if (dev->of_node)
-		fwnode = of_fwnode_handle(dev->of_node);
-	else if (adev)
-		fwnode = acpi_fwnode_handle(adev);
+	const struct fwnode_handle *fwnode = dev_fwnode(dev);
+	struct fwnode_handle *next;
 
 	/* Try to find a child in primary fwnode */
 	next = fwnode_get_next_child_node(fwnode, child);
@@ -868,28 +863,31 @@ EXPORT_SYMBOL_GPL(device_get_child_node_count);
 
 bool device_dma_supported(struct device *dev)
 {
+	const struct fwnode_handle *fwnode = dev_fwnode(dev);
+
 	/* For DT, this is always supported.
 	 * For ACPI, this depends on CCA, which
 	 * is determined by the acpi_dma_supported().
 	 */
-	if (IS_ENABLED(CONFIG_OF) && dev->of_node)
+	if (is_of_node(fwnode))
 		return true;
 
-	return acpi_dma_supported(ACPI_COMPANION(dev));
+	return acpi_dma_supported(to_acpi_device_node(fwnode));
 }
 EXPORT_SYMBOL_GPL(device_dma_supported);
 
 enum dev_dma_attr device_get_dma_attr(struct device *dev)
 {
+	const struct fwnode_handle *fwnode = dev_fwnode(dev);
 	enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED;
 
-	if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
-		if (of_dma_is_coherent(dev->of_node))
+	if (is_of_node(fwnode)) {
+		if (of_dma_is_coherent(to_of_node(fwnode)))
 			attr = DEV_DMA_COHERENT;
 		else
 			attr = DEV_DMA_NON_COHERENT;
 	} else
-		attr = acpi_get_dma_attr(ACPI_COMPANION(dev));
+		attr = acpi_get_dma_attr(to_acpi_device_node(fwnode));
 
 	return attr;
 }
@@ -1007,14 +1005,13 @@ EXPORT_SYMBOL(device_get_mac_address);
  * Returns Linux IRQ number on success. Other values are determined
  * accordingly to acpi_/of_ irq_get() operation.
  */
-int fwnode_irq_get(struct fwnode_handle *fwnode, unsigned int index)
+int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index)
 {
-	struct device_node *of_node = to_of_node(fwnode);
 	struct resource res;
 	int ret;
 
-	if (IS_ENABLED(CONFIG_OF) && of_node)
-		return of_irq_get(of_node, index);
+	if (is_of_node(fwnode))
+		return of_irq_get(to_of_node(fwnode), index);
 
 	ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), index, &res);
 	if (ret)
diff --git a/include/linux/property.h b/include/linux/property.h
index 0d876316e61d..073e680c35e2 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -119,7 +119,7 @@ struct fwnode_handle *device_get_named_child_node(struct device *dev,
 struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode);
 void fwnode_handle_put(struct fwnode_handle *fwnode);
 
-int fwnode_irq_get(struct fwnode_handle *fwnode, unsigned int index);
+int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index);
 
 unsigned int device_get_child_node_count(struct device *dev);
 

From 606e56c6eced3135aecd8144b6d57b4b49e7ef89 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:23 +0800
Subject: [PATCH 1053/3292] ACPI: cmos_rtc: Using pr_fmt() and remove PREFIX

Introduce pr_fmt() and remove printk PREFIX to unify the
log message printing.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_cmos_rtc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpi_cmos_rtc.c b/drivers/acpi/acpi_cmos_rtc.c
index 67f1d33d15c4..4cf4aef7ce0c 100644
--- a/drivers/acpi/acpi_cmos_rtc.c
+++ b/drivers/acpi/acpi_cmos_rtc.c
@@ -6,6 +6,8 @@
  * Authors: Lan Tianyu <tianyu.lan@intel.com>
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -59,7 +61,7 @@ static int acpi_install_cmos_rtc_space_handler(struct acpi_device *adev,
 			&acpi_cmos_rtc_space_handler,
 			NULL, NULL);
 	if (ACPI_FAILURE(status)) {
-		pr_err(PREFIX "Error installing CMOS-RTC region handler\n");
+		pr_err("Error installing CMOS-RTC region handler\n");
 		return -ENODEV;
 	}
 
@@ -70,7 +72,7 @@ static void acpi_remove_cmos_rtc_space_handler(struct acpi_device *adev)
 {
 	if (ACPI_FAILURE(acpi_remove_address_space_handler(adev->handle,
 			ACPI_ADR_SPACE_CMOS, &acpi_cmos_rtc_space_handler)))
-		pr_err(PREFIX "Error removing CMOS-RTC region handler\n");
+		pr_err("Error removing CMOS-RTC region handler\n");
 }
 
 static struct acpi_scan_handler cmos_rtc_handler = {

From 007b3e53f3a47b3cefe6224f89baac300e8d0265 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:24 +0800
Subject: [PATCH 1054/3292] ACPI: blacklist: Unify the message printing

Intoduce pr_fmt() and use pr_*() macros to replace printk(), to generate
a unified format string for prefix, then remove the PREFIX.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/blacklist.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index a86a770c9b79..a558d24fb788 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -10,6 +10,8 @@
  *  Copyright (C) 2002 Andy Grover <andrew.grover@intel.com>
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
@@ -49,12 +51,12 @@ int __init acpi_blacklisted(void)
 
 	i = acpi_match_platform_list(acpi_blacklist);
 	if (i >= 0) {
-		pr_err(PREFIX "Vendor \"%6.6s\" System \"%8.8s\" Revision 0x%x has a known ACPI BIOS problem.\n",
+		pr_err("Vendor \"%6.6s\" System \"%8.8s\" Revision 0x%x has a known ACPI BIOS problem.\n",
 		       acpi_blacklist[i].oem_id,
 		       acpi_blacklist[i].oem_table_id,
 		       acpi_blacklist[i].oem_revision);
 
-		pr_err(PREFIX "Reason: %s. This is a %s error\n",
+		pr_err("Reason: %s. This is a %s error\n",
 		       acpi_blacklist[i].reason,
 		       (acpi_blacklist[i].data ?
 			"non-recoverable" : "recoverable"));
@@ -73,8 +75,7 @@ int __init acpi_blacklisted(void)
 #ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
 static int __init dmi_enable_rev_override(const struct dmi_system_id *d)
 {
-	printk(KERN_NOTICE PREFIX "DMI detected: %s (force ACPI _REV to 5)\n",
-	       d->ident);
+	pr_notice("DMI detected: %s (force ACPI _REV to 5)\n", d->ident);
 	acpi_rev_override_setup(NULL);
 	return 0;
 }

From 8e173cbb6a776cb1a3540be17780a5616b5c815a Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:25 +0800
Subject: [PATCH 1055/3292] ACPI: bus: Use pr_*() macros to replace printk()

In commit ee98460b2ff9 ("ACPI: bus: Clean up printing messages"),
direct printk() invocations was replaced with the matching pr_*()
calls, but the left two printk() calls was merged at the same time
with the above cleaup commit, so we missed them for cleanup, let's
replace them now and we can remove the use of PREFIX later.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index be7da23fad76..60787d9ca0cd 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -370,7 +370,7 @@ EXPORT_SYMBOL_GPL(osc_sb_native_usb4_control);
 
 static void acpi_bus_decode_usb_osc(const char *msg, u32 bits)
 {
-	printk(KERN_INFO PREFIX "%s USB3%c DisplayPort%c PCIe%c XDomain%c\n", msg,
+	pr_info("%s USB3%c DisplayPort%c PCIe%c XDomain%c\n", msg,
 	       (bits & OSC_USB_USB3_TUNNELING) ? '+' : '-',
 	       (bits & OSC_USB_DP_TUNNELING) ? '+' : '-',
 	       (bits & OSC_USB_PCIE_TUNNELING) ? '+' : '-',
@@ -409,7 +409,7 @@ static void acpi_bus_osc_negotiate_usb_control(void)
 		return;
 
 	if (context.ret.length != sizeof(capbuf)) {
-		printk(KERN_INFO PREFIX "USB4 _OSC: returned invalid length buffer\n");
+		pr_info("USB4 _OSC: returned invalid length buffer\n");
 		goto out_free;
 	}
 

From ad319565d62fa42220439efe29cc5d7b8c248dac Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:26 +0800
Subject: [PATCH 1056/3292] ACPI: event: Use pr_*() macros to replace printk()

Introduce pr_fmt() and replace direct printk() invocation with
the matching pr_*() call to prepare for removing PREFIX.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/event.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index 92e59f45329b..df38f3c94861 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -7,6 +7,8 @@
  *
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/proc_fs.h>
@@ -173,8 +175,8 @@ static int __init acpi_event_init(void)
 	/* create genetlink for acpi event */
 	error = acpi_event_genetlink_init();
 	if (error)
-		printk(KERN_WARNING PREFIX
-		       "Failed to create genetlink family for ACPI event\n");
+		pr_warn("Failed to create genetlink family for ACPI event\n");
+
 	return 0;
 }
 

From e2935abb3a3ae88f5ab832158d6ed10c599a871f Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:27 +0800
Subject: [PATCH 1057/3292] ACPI: glue: Clean up the printing messages

Remove the in house ACPI_GLUE_DEBUG and its related debug message
printing, using pr_debug() instead.

While at it, replace printk() with pr_* to simplify the code.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/glue.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 0715e3be99a0..fce3f3bba714 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -6,6 +6,8 @@
  * Copyright (c) 2005 Intel Corp.
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/acpi_iort.h>
 #include <linux/export.h>
 #include <linux/init.h>
@@ -19,17 +21,6 @@
 
 #include "internal.h"
 
-#define ACPI_GLUE_DEBUG	0
-#if ACPI_GLUE_DEBUG
-#define DBG(fmt, ...)						\
-	printk(KERN_DEBUG PREFIX fmt, ##__VA_ARGS__)
-#else
-#define DBG(fmt, ...)						\
-do {								\
-	if (0)							\
-		printk(KERN_DEBUG PREFIX fmt, ##__VA_ARGS__);	\
-} while (0)
-#endif
 static LIST_HEAD(bus_type_list);
 static DECLARE_RWSEM(bus_type_sem);
 
@@ -44,7 +35,7 @@ int register_acpi_bus_type(struct acpi_bus_type *type)
 		down_write(&bus_type_sem);
 		list_add_tail(&type->list, &bus_type_list);
 		up_write(&bus_type_sem);
-		printk(KERN_INFO PREFIX "bus type %s registered\n", type->name);
+		pr_info("bus type %s registered\n", type->name);
 		return 0;
 	}
 	return -ENODEV;
@@ -59,8 +50,7 @@ int unregister_acpi_bus_type(struct acpi_bus_type *type)
 		down_write(&bus_type_sem);
 		list_del_init(&type->list);
 		up_write(&bus_type_sem);
-		printk(KERN_INFO PREFIX "bus type %s unregistered\n",
-		       type->name);
+		pr_info("bus type %s unregistered\n", type->name);
 		return 0;
 	}
 	return -ENODEV;
@@ -307,7 +297,7 @@ static int acpi_device_notify(struct device *dev)
 
 		adev = type->find_companion(dev);
 		if (!adev) {
-			DBG("Unable to get handle for %s\n", dev_name(dev));
+			pr_debug("Unable to get handle for %s\n", dev_name(dev));
 			ret = -ENODEV;
 			goto out;
 		}
@@ -328,16 +318,15 @@ static int acpi_device_notify(struct device *dev)
 		adev->handler->bind(dev);
 
  out:
-#if ACPI_GLUE_DEBUG
 	if (!ret) {
 		struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 
 		acpi_get_name(ACPI_HANDLE(dev), ACPI_FULL_PATHNAME, &buffer);
-		DBG("Device %s -> %s\n", dev_name(dev), (char *)buffer.pointer);
+		pr_debug("Device %s -> %s\n", dev_name(dev), (char *)buffer.pointer);
 		kfree(buffer.pointer);
-	} else
-		DBG("Device %s -> No ACPI support\n", dev_name(dev));
-#endif
+	} else {
+		pr_debug("Device %s -> No ACPI support\n", dev_name(dev));
+	}
 
 	return ret;
 }

From 4f59927d5de483f99d26bbf0c3e8089adc9f139e Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:28 +0800
Subject: [PATCH 1058/3292] ACPI: nvs: Unify the message printing

The message printing in nvs.c is mixed with pr_*() and
printk(), but with no prefix and also no pr_fmt() defined.

Introduce pr_fmt() and use pr_*() macros to replace printk(),
to generate a unified format string for prefix.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/nvs.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/nvs.c b/drivers/acpi/nvs.c
index 9f8712a557b3..7f02e399047c 100644
--- a/drivers/acpi/nvs.c
+++ b/drivers/acpi/nvs.c
@@ -5,6 +5,8 @@
  * Copyright (C) 2008-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
  */
 
+#define pr_fmt(fmt) "ACPI: PM: " fmt
+
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -94,7 +96,7 @@ static int suspend_nvs_register(unsigned long start, unsigned long size)
 {
 	struct nvs_page *entry, *next;
 
-	pr_info("PM: Registering ACPI NVS region [mem %#010lx-%#010lx] (%ld bytes)\n",
+	pr_info("Registering ACPI NVS region [mem %#010lx-%#010lx] (%ld bytes)\n",
 		start, start + size - 1, size);
 
 	while (size > 0) {
@@ -170,7 +172,7 @@ int suspend_nvs_save(void)
 {
 	struct nvs_page *entry;
 
-	printk(KERN_INFO "PM: Saving platform NVS memory\n");
+	pr_info("Saving platform NVS memory\n");
 
 	list_for_each_entry(entry, &nvs_list, node)
 		if (entry->data) {
@@ -202,7 +204,7 @@ void suspend_nvs_restore(void)
 {
 	struct nvs_page *entry;
 
-	printk(KERN_INFO "PM: Restoring platform NVS memory\n");
+	pr_info("Restoring platform NVS memory\n");
 
 	list_for_each_entry(entry, &nvs_list, node)
 		if (entry->data)

From 2e670deddaa5b8b6d98554664ebc2fa723a30e9b Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:29 +0800
Subject: [PATCH 1059/3292] ACPI: osl: Remove the duplicated PREFIX for message
 printing

We have pr_fmt() in osl.c, so pr_err(PREFIX ...) is duplicated
and wrong, fix it by removing the PREFIX.

Also remove the using of PREFIX in WARN() and just add the plain
"ACPI: " in message to keep it unchanged.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/osl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 327e1b4eb6b0..1207490b0a50 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -212,7 +212,7 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
 			return efi.acpi20;
 		if (efi.acpi != EFI_INVALID_TABLE_ADDR)
 			return efi.acpi;
-		pr_err(PREFIX "System description tables not found\n");
+		pr_err("System description tables not found\n");
 	} else if (IS_ENABLED(CONFIG_ACPI_LEGACY_TABLES_LOOKUP)) {
 		acpi_find_root_pointer(&pa);
 	}
@@ -430,7 +430,7 @@ void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size)
 	map = acpi_map_lookup_virt(virt, size);
 	if (!map) {
 		mutex_unlock(&acpi_ioremap_lock);
-		WARN(true, PREFIX "%s: bad address %p\n", __func__, virt);
+		WARN(true, "ACPI: %s: bad address %p\n", __func__, virt);
 		return;
 	}
 	acpi_os_drop_map_ref(map);

From ccde83e318a58d89e2d4d3856b5b90ff745bf28d Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:30 +0800
Subject: [PATCH 1060/3292] ACPI: pci_root: Unify the message printing

In acpi_pci_root_add(), pr_info() is added with PREFIX, but
in acpi_pci_root_remap_iospace() the pr_info() with no
PREFIX.

Introduce pr_fmt() to unify the message printing and remove
the PREFIX.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pci_root.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index dcd593766a64..d7deedf3548e 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -6,6 +6,8 @@
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -574,7 +576,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
 		goto end;
 	}
 
-	pr_info(PREFIX "%s [%s] (domain %04x %pR)\n",
+	pr_info("%s [%s] (domain %04x %pR)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       root->segment, &root->secondary);
 

From 673a0796b1237d1cbe4947e711daa196858a138a Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:31 +0800
Subject: [PATCH 1061/3292] ACPI: processor_thermal: Remove unused PREFIX for
 printing

The PREFIX "ACPI: " is not used in this file, remove it.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_thermal.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 677a132be242..a3d34e3f9f94 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -17,8 +17,6 @@
 #include <acpi/processor.h>
 #include <linux/uaccess.h>
 
-#define PREFIX "ACPI: "
-
 #ifdef CONFIG_CPU_FREQ
 
 /* If a passive cooling situation is detected, primarily CPUfreq is used, as it

From 6183a684377f9340ff9460743f87f01216af3a6b Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:32 +0800
Subject: [PATCH 1062/3292] ACPI: processor_perflib: Cleanup print messages

The log messages in processor_perflib.c is not in consistency,
we have some printk() calls with PREFIX, but some are not; we
use pr_*() functions without prefix. So add pr_fmt() and unify
them with pr_*() functions.

While at it, fix some obvious coding style issues when going
through the functions.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_perflib.c | 38 +++++++++++++++-----------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index d088a0089ee9..757a98f6d7a2 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -9,6 +9,8 @@
  *  			- Added processor hotplug support
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -20,8 +22,6 @@
 #include <asm/cpufeature.h>
 #endif
 
-#define PREFIX "ACPI: "
-
 #define ACPI_PROCESSOR_FILE_PERFORMANCE	"performance"
 
 static DEFINE_MUTEX(performance_mutex);
@@ -194,7 +194,6 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	union acpi_object *pct = NULL;
 	union acpi_object obj = { 0 };
 
-
 	status = acpi_evaluate_object(pr->handle, "_PCT", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		acpi_evaluation_failure_warn(pr->handle, "_PCT", status);
@@ -204,7 +203,7 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	pct = (union acpi_object *)buffer.pointer;
 	if (!pct || (pct->type != ACPI_TYPE_PACKAGE)
 	    || (pct->package.count != 2)) {
-		printk(KERN_ERR PREFIX "Invalid _PCT data\n");
+		pr_err("Invalid _PCT data\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -218,7 +217,7 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	if ((obj.type != ACPI_TYPE_BUFFER)
 	    || (obj.buffer.length < sizeof(struct acpi_pct_register))
 	    || (obj.buffer.pointer == NULL)) {
-		printk(KERN_ERR PREFIX "Invalid _PCT data (control_register)\n");
+		pr_err("Invalid _PCT data (control_register)\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -234,7 +233,7 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	if ((obj.type != ACPI_TYPE_BUFFER)
 	    || (obj.buffer.length < sizeof(struct acpi_pct_register))
 	    || (obj.buffer.pointer == NULL)) {
-		printk(KERN_ERR PREFIX "Invalid _PCT data (status_register)\n");
+		pr_err("Invalid _PCT data (status_register)\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -242,7 +241,7 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	memcpy(&pr->performance->status_register, obj.buffer.pointer,
 	       sizeof(struct acpi_pct_register));
 
-      end:
+end:
 	kfree(buffer.pointer);
 
 	return result;
@@ -294,7 +293,6 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 	int i;
 	int last_invalid = -1;
 
-
 	status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		acpi_evaluation_failure_warn(pr->handle, "_PSS", status);
@@ -303,7 +301,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 
 	pss = buffer.pointer;
 	if (!pss || (pss->type != ACPI_TYPE_PACKAGE)) {
-		printk(KERN_ERR PREFIX "Invalid _PSS data\n");
+		pr_err("Invalid _PSS data\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -357,7 +355,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 		if (!px->core_frequency ||
 		    ((u32)(px->core_frequency * 1000) !=
 		     (px->core_frequency * 1000))) {
-			printk(KERN_ERR FW_BUG PREFIX
+			pr_err(FW_BUG
 			       "Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n",
 			       pr->id, px->core_frequency);
 			if (last_invalid == -1)
@@ -375,8 +373,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 	}
 
 	if (last_invalid == 0) {
-		printk(KERN_ERR FW_BUG PREFIX
-		       "No valid BIOS _PSS frequency found for processor %d\n", pr->id);
+		pr_err(FW_BUG
+			   "No valid BIOS _PSS frequency found for processor %d\n", pr->id);
 		result = -EFAULT;
 		kfree(pr->performance->states);
 		pr->performance->states = NULL;
@@ -385,7 +383,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 	if (last_invalid > 0)
 		pr->performance->state_count = last_invalid;
 
-      end:
+end:
 	kfree(buffer.pointer);
 
 	return result;
@@ -426,7 +424,7 @@ int acpi_processor_get_performance_info(struct acpi_processor *pr)
 #ifdef CONFIG_X86
 	if (acpi_has_method(pr->handle, "_PPC")) {
 		if(boot_cpu_has(X86_FEATURE_EST))
-			printk(KERN_WARNING FW_BUG "BIOS needs update for CPU "
+			pr_warn(FW_BUG "BIOS needs update for CPU "
 			       "frequency support\n");
 	}
 #endif
@@ -520,13 +518,13 @@ int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain)
 
 	psd = buffer.pointer;
 	if (!psd || (psd->type != ACPI_TYPE_PACKAGE)) {
-		printk(KERN_ERR PREFIX "Invalid _PSD data\n");
+		pr_err("Invalid _PSD data\n");
 		result = -EFAULT;
 		goto end;
 	}
 
 	if (psd->package.count != 1) {
-		printk(KERN_ERR PREFIX "Invalid _PSD data\n");
+		pr_err("Invalid _PSD data\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -537,19 +535,19 @@ int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain)
 	status = acpi_extract_package(&(psd->package.elements[0]),
 		&format, &state);
 	if (ACPI_FAILURE(status)) {
-		printk(KERN_ERR PREFIX "Invalid _PSD data\n");
+		pr_err("Invalid _PSD data\n");
 		result = -EFAULT;
 		goto end;
 	}
 
 	if (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) {
-		printk(KERN_ERR PREFIX "Unknown _PSD:num_entries\n");
+		pr_err("Unknown _PSD:num_entries\n");
 		result = -EFAULT;
 		goto end;
 	}
 
 	if (pdomain->revision != ACPI_PSD_REV0_REVISION) {
-		printk(KERN_ERR PREFIX "Unknown _PSD:revision\n");
+		pr_err("Unknown _PSD:revision\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -557,7 +555,7 @@ int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain)
 	if (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL &&
 	    pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY &&
 	    pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL) {
-		printk(KERN_ERR PREFIX "Invalid _PSD:coord_type\n");
+		pr_err("Invalid _PSD:coord_type\n");
 		result = -EFAULT;
 		goto end;
 	}

From 4140054af069be3a7c3fd82dafaccc51fb52b1b6 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:33 +0800
Subject: [PATCH 1063/3292] ACPI: processor_throttling: Cleanup the printing
 messages

The log messages in processor_throttling.c is not in consistency,
we have some printk() calls with PREFIX, but some are not; also we
use pr_*() functions without prefix. So add pr_fmt() and unify
them with pr_*() functions.

While at it, fix some obvious coding style issues when going
through the functions.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_throttling.c | 60 +++++++++++++----------------
 1 file changed, 26 insertions(+), 34 deletions(-)

diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index e61b8f038364..0086afe7d65d 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -9,6 +9,8 @@
  *  			- Added processor hotplug support
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -20,8 +22,6 @@
 #include <asm/io.h>
 #include <linux/uaccess.h>
 
-#define PREFIX "ACPI: "
-
 /* ignore_tpc:
  *  0 -> acpi processor driver doesn't ignore _TPC values
  *  1 -> acpi processor driver ignores _TPC values
@@ -236,8 +236,7 @@ static int acpi_processor_throttling_notifier(unsigned long event, void *data)
 		if (pr->throttling_platform_limit > target_state)
 			target_state = pr->throttling_platform_limit;
 		if (target_state >= p_throttling->state_count) {
-			printk(KERN_WARNING
-				"Exceed the limit of T-state \n");
+			pr_warn("Exceed the limit of T-state \n");
 			target_state = p_throttling->state_count - 1;
 		}
 		p_tstate->target_state = target_state;
@@ -256,8 +255,7 @@ static int acpi_processor_throttling_notifier(unsigned long event, void *data)
 				  cpu, target_state);
 		break;
 	default:
-		printk(KERN_WARNING
-			"Unsupported Throttling notifier event\n");
+		pr_warn("Unsupported Throttling notifier event\n");
 		break;
 	}
 
@@ -422,7 +420,7 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
 	ptc = (union acpi_object *)buffer.pointer;
 	if (!ptc || (ptc->type != ACPI_TYPE_PACKAGE)
 	    || (ptc->package.count != 2)) {
-		printk(KERN_ERR PREFIX "Invalid _PTC data\n");
+		pr_err("Invalid _PTC data\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -436,8 +434,7 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
 	if ((obj.type != ACPI_TYPE_BUFFER)
 	    || (obj.buffer.length < sizeof(struct acpi_ptc_register))
 	    || (obj.buffer.pointer == NULL)) {
-		printk(KERN_ERR PREFIX
-		       "Invalid _PTC data (control_register)\n");
+		pr_err("Invalid _PTC data (control_register)\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -453,7 +450,7 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
 	if ((obj.type != ACPI_TYPE_BUFFER)
 	    || (obj.buffer.length < sizeof(struct acpi_ptc_register))
 	    || (obj.buffer.pointer == NULL)) {
-		printk(KERN_ERR PREFIX "Invalid _PTC data (status_register)\n");
+		pr_err("Invalid _PTC data (status_register)\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -465,14 +462,14 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
 
 	if ((throttling->control_register.bit_width +
 		throttling->control_register.bit_offset) > 32) {
-		printk(KERN_ERR PREFIX "Invalid _PTC control register\n");
+		pr_err("Invalid _PTC control register\n");
 		result = -EFAULT;
 		goto end;
 	}
 
 	if ((throttling->status_register.bit_width +
 		throttling->status_register.bit_offset) > 32) {
-		printk(KERN_ERR PREFIX "Invalid _PTC status register\n");
+		pr_err("Invalid _PTC status register\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -506,7 +503,7 @@ static int acpi_processor_get_throttling_states(struct acpi_processor *pr)
 
 	tss = buffer.pointer;
 	if (!tss || (tss->type != ACPI_TYPE_PACKAGE)) {
-		printk(KERN_ERR PREFIX "Invalid _TSS data\n");
+		pr_err("Invalid _TSS data\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -546,15 +543,14 @@ static int acpi_processor_get_throttling_states(struct acpi_processor *pr)
 		}
 
 		if (!tx->freqpercentage) {
-			printk(KERN_ERR PREFIX
-			       "Invalid _TSS data: freq is zero\n");
+			pr_err("Invalid _TSS data: freq is zero\n");
 			result = -EFAULT;
 			kfree(pr->throttling.states_tss);
 			goto end;
 		}
 	}
 
-      end:
+end:
 	kfree(buffer.pointer);
 
 	return result;
@@ -587,13 +583,13 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr)
 
 	tsd = buffer.pointer;
 	if (!tsd || (tsd->type != ACPI_TYPE_PACKAGE)) {
-		printk(KERN_ERR PREFIX "Invalid _TSD data\n");
+		pr_err("Invalid _TSD data\n");
 		result = -EFAULT;
 		goto end;
 	}
 
 	if (tsd->package.count != 1) {
-		printk(KERN_ERR PREFIX "Invalid _TSD data\n");
+		pr_err("Invalid _TSD data\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -606,19 +602,19 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr)
 	status = acpi_extract_package(&(tsd->package.elements[0]),
 				      &format, &state);
 	if (ACPI_FAILURE(status)) {
-		printk(KERN_ERR PREFIX "Invalid _TSD data\n");
+		pr_err("Invalid _TSD data\n");
 		result = -EFAULT;
 		goto end;
 	}
 
 	if (pdomain->num_entries != ACPI_TSD_REV0_ENTRIES) {
-		printk(KERN_ERR PREFIX "Unknown _TSD:num_entries\n");
+		pr_err("Unknown _TSD:num_entries\n");
 		result = -EFAULT;
 		goto end;
 	}
 
 	if (pdomain->revision != ACPI_TSD_REV0_REVISION) {
-		printk(KERN_ERR PREFIX "Unknown _TSD:revision\n");
+		pr_err("Unknown _TSD:revision\n");
 		result = -EFAULT;
 		goto end;
 	}
@@ -639,7 +635,7 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr)
 		pthrottling->shared_type = DOMAIN_COORD_TYPE_SW_ALL;
 	}
 
-      end:
+end:
 	kfree(buffer.pointer);
 	return result;
 }
@@ -711,8 +707,7 @@ static int acpi_throttling_rdmsr(u64 *value)
 
 	if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
 		!this_cpu_has(X86_FEATURE_ACPI)) {
-		printk(KERN_ERR PREFIX
-			"HARDWARE addr space,NOT supported yet\n");
+		pr_err("HARDWARE addr space,NOT supported yet\n");
 	} else {
 		msr_low = 0;
 		msr_high = 0;
@@ -732,8 +727,7 @@ static int acpi_throttling_wrmsr(u64 value)
 
 	if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
 		!this_cpu_has(X86_FEATURE_ACPI)) {
-		printk(KERN_ERR PREFIX
-			"HARDWARE addr space,NOT supported yet\n");
+		pr_err("HARDWARE addr space,NOT supported yet\n");
 	} else {
 		msr = value;
 		wrmsr_safe(MSR_IA32_THERM_CONTROL,
@@ -745,15 +739,13 @@ static int acpi_throttling_wrmsr(u64 value)
 #else
 static int acpi_throttling_rdmsr(u64 *value)
 {
-	printk(KERN_ERR PREFIX
-		"HARDWARE addr space,NOT supported yet\n");
+	pr_err("HARDWARE addr space,NOT supported yet\n");
 	return -1;
 }
 
 static int acpi_throttling_wrmsr(u64 value)
 {
-	printk(KERN_ERR PREFIX
-		"HARDWARE addr space,NOT supported yet\n");
+	pr_err("HARDWARE addr space,NOT supported yet\n");
 	return -1;
 }
 #endif
@@ -784,7 +776,7 @@ static int acpi_read_throttling_status(struct acpi_processor *pr,
 		ret = acpi_throttling_rdmsr(value);
 		break;
 	default:
-		printk(KERN_ERR PREFIX "Unknown addr space %d\n",
+		pr_err("Unknown addr space %d\n",
 		       (u32) (throttling->status_register.space_id));
 	}
 	return ret;
@@ -817,7 +809,7 @@ static int acpi_write_throttling_state(struct acpi_processor *pr,
 		ret = acpi_throttling_wrmsr(value);
 		break;
 	default:
-		printk(KERN_ERR PREFIX "Unknown addr space %d\n",
+		pr_err("Unknown addr space %d\n",
 		       (u32) (throttling->control_register.space_id));
 	}
 	return ret;
@@ -926,7 +918,7 @@ static int acpi_processor_get_fadt_info(struct acpi_processor *pr)
 	}
 	/* TBD: Support duty_cycle values that span bit 4. */
 	else if ((pr->throttling.duty_offset + pr->throttling.duty_width) > 4) {
-		printk(KERN_WARNING PREFIX "duty_cycle spans bit 4\n");
+		pr_warn("duty_cycle spans bit 4\n");
 		return -EINVAL;
 	}
 
@@ -1246,7 +1238,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
 			goto end;
 	}
 
-      end:
+end:
 	if (result)
 		pr->flags.throttling = 0;
 

From 6ecfe60a13b1b27c7bc60892fa8116b223ce4a6b Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:34 +0800
Subject: [PATCH 1064/3292] ACPI: reboot: Unify the message printing

The meesage printing in this file is mixed with pr_*() and
printk() but with no prefix and no pr_fmt() defined.

Intoduce pr_fmt() and use pr_*() macros to replace printk(),
to generate a unified format string for prefix.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/reboot.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c
index 2a61f884e222..b79b7c99c237 100644
--- a/drivers/acpi/reboot.c
+++ b/drivers/acpi/reboot.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <acpi/reboot.h>
@@ -63,7 +65,7 @@ void acpi_reboot(void)
 
 	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
 	case ACPI_ADR_SPACE_SYSTEM_IO:
-		printk(KERN_DEBUG "ACPI MEMORY or I/O RESET_REG.\n");
+		pr_debug("ACPI MEMORY or I/O RESET_REG.\n");
 		acpi_reset();
 		break;
 	}

From 86ca3b0ab41f7172b963a38074612f8e5f1851e1 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:35 +0800
Subject: [PATCH 1065/3292] ACPI: sysfs: Cleanup message printing

We have pr_fmt() in sysfs.c but we still use pr_err(PREFIX ...) which
is wrong, remove the duplicated PREFIX and also using pr_* to replace
printk to simlify the code.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index d25927195d6d..88629d26bd48 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -359,8 +359,7 @@ static int acpi_table_attr_init(struct kobject *tables_obj,
 	}
 	table_attr->instance++;
 	if (table_attr->instance > ACPI_MAX_TABLE_INSTANCES) {
-		pr_warn("%4.4s: too many table instances\n",
-			table_attr->name);
+		pr_warn("%4.4s: too many table instances\n", table_attr->name);
 		return -ERANGE;
 	}
 
@@ -737,8 +736,7 @@ static ssize_t counter_set(struct kobject *kobj,
 		goto end;
 
 	if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER)) {
-		printk(KERN_WARNING PREFIX
-		       "Can not change Invalid GPE/Fixed Event status\n");
+		pr_warn("Can not change Invalid GPE/Fixed Event status\n");
 		return -EINVAL;
 	}
 
@@ -983,7 +981,7 @@ void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug,
 	return;
 
  err_out:
-	pr_err(PREFIX "Unable to add hotplug profile '%s'\n", name);
+	pr_err("Unable to add hotplug profile '%s'\n", name);
 }
 
 static ssize_t force_remove_show(struct kobject *kobj,

From bd10c13b7775d79e5925c66aeaa6ff64c10c3992 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:36 +0800
Subject: [PATCH 1066/3292] ACPI: sbshc: Unify the message printing

Using pr_fmt() and pr_*() macros to unify the message printing.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sbshc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index 53c2862c4c75..d69a2a84c83c 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -5,6 +5,8 @@
  * Copyright (c) 2007 Alexey Starikovskiy
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/acpi.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
@@ -13,8 +15,6 @@
 #include <linux/interrupt.h>
 #include "sbshc.h"
 
-#define PREFIX "ACPI: "
-
 #define ACPI_SMB_HC_CLASS	"smbus_host_ctl"
 #define ACPI_SMB_HC_DEVICE_NAME	"ACPI SMBus HC"
 
@@ -109,7 +109,7 @@ static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol,
 	u8 temp, sz = 0;
 
 	if (!hc) {
-		printk(KERN_ERR PREFIX "host controller is not configured\n");
+		pr_err("host controller is not configured\n");
 		return ret;
 	}
 
@@ -254,7 +254,7 @@ static int acpi_smbus_hc_add(struct acpi_device *device)
 
 	status = acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
 	if (ACPI_FAILURE(status)) {
-		printk(KERN_ERR PREFIX "error obtaining _EC.\n");
+		pr_err("error obtaining _EC.\n");
 		return -EIO;
 	}
 

From 8acf4108aabb025223d9fda416500c12ec6f6107 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:37 +0800
Subject: [PATCH 1067/3292] ACPI: scan: Unify the log message printing

The log messages in scan.c is not in consistency, some pr_*() calls
have PREFIX, but some don't.

Using pr_fmt() and remove PREFIX, also replace printk() with pr_*()
macro to unify the message printing.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e10d38ac7cf2..0c916c18a250 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -3,6 +3,8 @@
  * scan.c - support for transforming the ACPI namespace into individual objects
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -729,7 +731,7 @@ int acpi_device_add(struct acpi_device *device,
 
 	result = acpi_device_setup_files(device);
 	if (result)
-		printk(KERN_ERR PREFIX "Error creating sysfs interface for device %s\n",
+		pr_err("Error creating sysfs interface for device %s\n",
 		       dev_name(&device->dev));
 
 	return 0;
@@ -1320,8 +1322,7 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp,
 
 		acpi_get_object_info(handle, &info);
 		if (!info) {
-			pr_err(PREFIX "%s: Error reading device info\n",
-					__func__);
+			pr_err("%s: Error reading device info\n", __func__);
 			return;
 		}
 
@@ -2278,7 +2279,7 @@ static void __init acpi_get_spcr_uart_addr(void)
 	status = acpi_get_table(ACPI_SIG_SPCR, 0,
 				(struct acpi_table_header **)&spcr_ptr);
 	if (ACPI_FAILURE(status)) {
-		pr_warn(PREFIX "STAO table present, but SPCR is missing\n");
+		pr_warn("STAO table present, but SPCR is missing\n");
 		return;
 	}
 
@@ -2319,7 +2320,7 @@ int __init acpi_scan_init(void)
 				(struct acpi_table_header **)&stao_ptr);
 	if (ACPI_SUCCESS(status)) {
 		if (stao_ptr->header.length > sizeof(struct acpi_table_stao))
-			pr_info(PREFIX "STAO Name List not yet supported.\n");
+			pr_info("STAO Name List not yet supported.\n");
 
 		if (stao_ptr->ignore_uart)
 			acpi_get_spcr_uart_addr();

From f7e02c8d2344c9c2f124f71f53a900feb946eb8c Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:38 +0800
Subject: [PATCH 1068/3292] ACPI: sbs: Unify the message printing

Using pr_fmt() and pr_*() macros to unify the message printing.

While at it, fix the obvious coding style issue when scanning
the code.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sbs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 3b0b6dd34914..4938010fcac7 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -7,6 +7,8 @@
  *  Copyright (c) 2005 Rich Townsend <rhdt@bartol.udel.edu>
  */
 
+#define pr_fmt(fmt) "ACPI: " fmt
+
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -23,8 +25,6 @@
 
 #include "sbshc.h"
 
-#define PREFIX "ACPI: "
-
 #define ACPI_SBS_CLASS			"sbs"
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_SBS_DEVICE_NAME		"Smart Battery System"
@@ -544,7 +544,7 @@ static int acpi_battery_add(struct acpi_sbs *sbs, int id)
 		goto end;
 	battery->have_sysfs_alarm = 1;
       end:
-	printk(KERN_INFO PREFIX "%s [%s]: Battery Slot [%s] (battery %s)\n",
+	pr_info("%s [%s]: Battery Slot [%s] (battery %s)\n",
 	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device),
 	       battery->name, battery->present ? "present" : "absent");
 	return result;
@@ -577,10 +577,10 @@ static int acpi_charger_add(struct acpi_sbs *sbs)
 		result = PTR_ERR(sbs->charger);
 		sbs->charger = NULL;
 	}
-	printk(KERN_INFO PREFIX "%s [%s]: AC Adapter [%s] (%s)\n",
+	pr_info("%s [%s]: AC Adapter [%s] (%s)\n",
 	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device),
 	       ACPI_AC_DIR_NAME, sbs->charger_present ? "on-line" : "off-line");
-      end:
+end:
 	return result;
 }
 
@@ -658,7 +658,7 @@ static int acpi_sbs_add(struct acpi_device *device)
 		acpi_battery_add(sbs, 0);
 
 	acpi_smbus_register_callback(sbs->hc, acpi_sbs_callback, sbs);
-      end:
+end:
 	if (result)
 		acpi_sbs_remove(device);
 	return result;

From f5ee87df7a4dabadf7d560e943cbae24ed8be455 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:39 +0800
Subject: [PATCH 1069/3292] ACPI: sleep: Unify the message printing

Intoduce pr_fmt() and use pr_*() macros to replace printk(), also
remove all the PREFIX for pr_*() calls to generate a unified format
string for prefix.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index df386571da98..e1773c29e414 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -8,6 +8,8 @@
  * Copyright (c) 2003 Open Source Development Lab
  */
 
+#define pr_fmt(fmt) "ACPI: PM: " fmt
+
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/dmi.h>
@@ -41,7 +43,7 @@ static void acpi_sleep_tts_switch(u32 acpi_state)
 		 * OS can't evaluate the _TTS object correctly. Some warning
 		 * message will be printed. But it won't break anything.
 		 */
-		printk(KERN_NOTICE "Failure in evaluating _TTS object\n");
+		pr_notice("Failure in evaluating _TTS object\n");
 	}
 }
 
@@ -73,8 +75,7 @@ static int acpi_sleep_prepare(u32 acpi_state)
 	}
 	ACPI_FLUSH_CPU_CACHE();
 #endif
-	printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n",
-		acpi_state);
+	pr_info("Preparing to enter system sleep state S%d\n", acpi_state);
 	acpi_enable_wakeup_devices(acpi_state);
 	acpi_enter_sleep_state_prep(acpi_state);
 	return 0;
@@ -459,8 +460,7 @@ static void acpi_pm_finish(void)
 	if (acpi_state == ACPI_STATE_S0)
 		return;
 
-	printk(KERN_INFO PREFIX "Waking up from system sleep state S%d\n",
-		acpi_state);
+	pr_info("Waking up from system sleep state S%d\n", acpi_state);
 	acpi_disable_wakeup_devices(acpi_state);
 	acpi_leave_sleep_state(acpi_state);
 
@@ -581,7 +581,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
 		error = acpi_suspend_lowlevel();
 		if (error)
 			return error;
-		pr_info(PREFIX "Low-level resume complete\n");
+		pr_info("Low-level resume complete\n");
 		pm_set_resume_via_firmware();
 		break;
 	}
@@ -921,7 +921,7 @@ static void acpi_hibernation_leave(void)
 	acpi_leave_sleep_state_prep(ACPI_STATE_S4);
 	/* Check the hardware signature */
 	if (facs && s4_hardware_signature != facs->hardware_signature)
-		pr_crit("ACPI: Hardware changed while hibernated, success doubtful!\n");
+		pr_crit("Hardware changed while hibernated, success doubtful!\n");
 	/* Restore the NVS memory area */
 	suspend_nvs_restore();
 	/* Allow EC transactions to happen. */
@@ -1029,7 +1029,7 @@ static void acpi_power_off_prepare(void)
 static void acpi_power_off(void)
 {
 	/* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */
-	printk(KERN_DEBUG "%s called\n", __func__);
+	pr_debug("%s called\n", __func__);
 	local_irq_disable();
 	acpi_enter_sleep_state(ACPI_STATE_S5);
 }
@@ -1061,7 +1061,7 @@ int __init acpi_sleep_init(void)
 		if (sleep_states[i])
 			pos += sprintf(pos, " S%d", i);
 	}
-	pr_info(PREFIX "(supports%s)\n", supported);
+	pr_info("(supports%s)\n", supported);
 
 	/*
 	 * Register the tts_notifier to reboot notifier list so that the _TTS

From 0ac2c0e4ff4b41693977ebf624ba5952344cd7ac Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 16:54:40 +0800
Subject: [PATCH 1070/3292] ACPI: Remove the macro PREFIX "ACPI: "

Now the macro PREFIX for ACPI message printing is not used
anymore, remove it.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index e21611c9a170..bbe1064a7495 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -11,8 +11,6 @@
 
 #include <linux/idr.h>
 
-#define PREFIX "ACPI: "
-
 int early_acpi_osi_init(void);
 int acpi_osi_init(void);
 acpi_status acpi_os_initialize1(void);

From 9b64560134a0032d2de6bb565a76418ad90386fe Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 17:36:49 +0800
Subject: [PATCH 1071/3292] ACPI: bus: Remove unneeded assignment

When acpi_kobj is NULL already, assigning NULL to it is redundant,
so don't do that.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index be7da23fad76..a2e814a9ad99 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1325,10 +1325,8 @@ static int __init acpi_init(void)
 	}
 
 	acpi_kobj = kobject_create_and_add("acpi", firmware_kobj);
-	if (!acpi_kobj) {
+	if (!acpi_kobj)
 		pr_debug("%s: kset create error\n", __func__);
-		acpi_kobj = NULL;
-	}
 
 	result = acpi_bus_init();
 	if (result) {

From 4ac7a817f1992103d4e68e9837304f860b5e7300 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <guohanjun@huawei.com>
Date: Wed, 2 Jun 2021 17:36:50 +0800
Subject: [PATCH 1072/3292] ACPI: bus: Call kobject_put() in acpi_init() error
 path

Although the system will not be in a good condition or it will not
boot if acpi_bus_init() fails, it is still necessary to put the
kobject in the error path before returning to avoid leaking memory.

Signed-off-by: Hanjun Guo <guohanjun@huawei.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index a2e814a9ad99..c69470ec16b2 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1330,6 +1330,7 @@ static int __init acpi_init(void)
 
 	result = acpi_bus_init();
 	if (result) {
+		kobject_put(acpi_kobj);
 		disable_acpi();
 		return result;
 	}

From 01c3d593be8d3e45fce3644011c60b0645cbdd78 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 1 Jun 2021 14:50:32 +0800
Subject: [PATCH 1073/3292] ACPI: OSL: Use DEFINE_RES_IO_NAMED() to simplify
 code

No functional change.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/osl.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 327e1b4eb6b0..f40d34776a35 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1487,12 +1487,7 @@ EXPORT_SYMBOL(acpi_check_resource_conflict);
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name)
 {
-	struct resource res = {
-		.start = start,
-		.end   = start + n - 1,
-		.name  = name,
-		.flags = IORESOURCE_IO,
-	};
+	struct resource res = DEFINE_RES_IO_NAMED(start, n, name);
 
 	return acpi_check_resource_conflict(&res);
 }

From 7ca1a8014d860d23001605f63c1402f1092a58d5 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 1 Jun 2021 13:17:35 +0100
Subject: [PATCH 1074/3292] ACPI: tables: PPTT: Populate cache-id if provided
 by firmware

ACPI 6.4 adds a 'cache id' to the PPTT Cache Type Structure.

Copy this property across into the cacheinfo leaf when it was
provided by firmware.

This value gets exposed to userspace as:
/sys/devices/system/cpu/cpu*/cache/index*/id.

See the "Cache IDs" section of Documentation/x86/resctrl.rst.

Co-authored-by: Joey Gouly <joey.gouly@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pptt.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 4ae93350b70d..fe69dc518f31 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -347,6 +347,7 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
  * @this_leaf: Kernel cache info structure being updated
  * @found_cache: The PPTT node describing this cache instance
  * @cpu_node: A unique reference to describe this cache instance
+ * @revision: The revision of the PPTT table
  *
  * The ACPI spec implies that the fields in the cache structures are used to
  * extend and correct the information probed from the hardware. Lets only
@@ -356,8 +357,11 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
  */
 static void update_cache_properties(struct cacheinfo *this_leaf,
 				    struct acpi_pptt_cache *found_cache,
-				    struct acpi_pptt_processor *cpu_node)
+				    struct acpi_pptt_processor *cpu_node,
+				    u8 revision)
 {
+	struct acpi_pptt_cache_v1* found_cache_v1;
+
 	this_leaf->fw_token = cpu_node;
 	if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID)
 		this_leaf->size = found_cache->size;
@@ -405,6 +409,13 @@ static void update_cache_properties(struct cacheinfo *this_leaf,
 	if (this_leaf->type == CACHE_TYPE_NOCACHE &&
 	    found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID)
 		this_leaf->type = CACHE_TYPE_UNIFIED;
+
+	if (revision >= 3 && (found_cache->flags & ACPI_PPTT_CACHE_ID_VALID)) {
+		found_cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
+	                                      found_cache, sizeof(struct acpi_pptt_cache));
+		this_leaf->id = found_cache_v1->cache_id;
+		this_leaf->attributes |= CACHE_ID;
+	}
 }
 
 static void cache_setup_acpi_cpu(struct acpi_table_header *table,
@@ -425,9 +436,8 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
 						   &cpu_node);
 		pr_debug("found = %p %p\n", found_cache, cpu_node);
 		if (found_cache)
-			update_cache_properties(this_leaf,
-						found_cache,
-						cpu_node);
+			update_cache_properties(this_leaf, found_cache,
+			                        cpu_node, table->revision);
 
 		index++;
 	}

From dd9eaa23e72572d4f1c03f2e5d2e14a5b5793e79 Mon Sep 17 00:00:00 2001
From: Jing Xiangfeng <jingxiangfeng@huawei.com>
Date: Wed, 2 Jun 2021 19:58:12 +0800
Subject: [PATCH 1075/3292] ACPI: tables: FPDT: Add missing acpi_put_table() in
 acpi_init_fpdt()

acpi_init_fpdt() forgets to call acpi_put_table() in an error path.

Add the missing function call to fix it.

Fixes: d1eb86e59be0 ("ACPI: tables: introduce support for FPDT table")
Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_fpdt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c
index a89a806a7a2a..4ee2ad234e3d 100644
--- a/drivers/acpi/acpi_fpdt.c
+++ b/drivers/acpi/acpi_fpdt.c
@@ -240,8 +240,10 @@ static int __init acpi_init_fpdt(void)
 		return 0;
 
 	fpdt_kobj = kobject_create_and_add("fpdt", acpi_kobj);
-	if (!fpdt_kobj)
+	if (!fpdt_kobj) {
+		acpi_put_table(header);
 		return -ENOMEM;
+	}
 
 	while (offset < header->length) {
 		subtable = (void *)header + offset;

From 8e3ecc68e33ffe3a168f765a8f07377258615709 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Thu, 3 Jun 2021 17:12:04 +0800
Subject: [PATCH 1076/3292] ACPI: LPSS: Use kstrtol() instead of
 simple_strtol()

The simple_strtol() function is not reliable in some situation, since
it does not check for the range overflow. Use kstrtol() instead.

While at it, modify the code to avoid evaluating _SEM unnecessarily
if uid_str is NULL or kstrtol() fails to convert that string to a
nonzero number.

Signed-off-by: Liu Shixin <liushixin2@huawei.com>
[ rjw: Check uid right after calling kstrtol() ]
[ rjw: Rewrite subject and changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index ca742f16a507..894b7e6ae144 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -186,13 +186,12 @@ static void byt_i2c_setup(struct lpss_private_data *pdata)
 	long uid = 0;
 
 	/* Expected to always be true, but better safe then sorry */
-	if (uid_str)
-		uid = simple_strtol(uid_str, NULL, 10);
-
-	/* Detect I2C bus shared with PUNIT and ignore its d3 status */
-	status = acpi_evaluate_integer(handle, "_SEM", NULL, &shared_host);
-	if (ACPI_SUCCESS(status) && shared_host && uid)
-		pmc_atom_d3_mask &= ~(BIT_LPSS2_F1_I2C1 << (uid - 1));
+	if (uid_str && !kstrtol(uid_str, 10, &uid) && uid) {
+		/* Detect I2C bus shared with PUNIT and ignore its d3 status */
+		status = acpi_evaluate_integer(handle, "_SEM", NULL, &shared_host);
+		if (ACPI_SUCCESS(status) && shared_host)
+			pmc_atom_d3_mask &= ~(BIT_LPSS2_F1_I2C1 << (uid - 1));
+	}
 
 	lpss_deassert_reset(pdata);
 

From 3935787ebd5f4117d39c6fda6d73ecfdb747349f Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 2 Jun 2021 16:15:46 +0800
Subject: [PATCH 1077/3292] PNP: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO macro helper instead of plain DEVICE_ATTR, which makes
the code a bit shorter and easier to read.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/card.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c
index 2430c14f472d..d40ed8621571 100644
--- a/drivers/pnp/card.c
+++ b/drivers/pnp/card.c
@@ -181,8 +181,8 @@ struct pnp_card *pnp_alloc_card(struct pnp_protocol *protocol, int id, char *pnp
 	return card;
 }
 
-static ssize_t pnp_show_card_name(struct device *dmdev,
-				  struct device_attribute *attr, char *buf)
+static ssize_t name_show(struct device *dmdev,
+			 struct device_attribute *attr, char *buf)
 {
 	char *str = buf;
 	struct pnp_card *card = to_pnp_card(dmdev);
@@ -191,10 +191,10 @@ static ssize_t pnp_show_card_name(struct device *dmdev,
 	return (str - buf);
 }
 
-static DEVICE_ATTR(name, S_IRUGO, pnp_show_card_name, NULL);
+static DEVICE_ATTR_RO(name);
 
-static ssize_t pnp_show_card_ids(struct device *dmdev,
-				 struct device_attribute *attr, char *buf)
+static ssize_t card_id_show(struct device *dmdev,
+			    struct device_attribute *attr, char *buf)
 {
 	char *str = buf;
 	struct pnp_card *card = to_pnp_card(dmdev);
@@ -207,7 +207,7 @@ static ssize_t pnp_show_card_ids(struct device *dmdev,
 	return (str - buf);
 }
 
-static DEVICE_ATTR(card_id, S_IRUGO, pnp_show_card_ids, NULL);
+static DEVICE_ATTR_RO(card_id);
 
 static int pnp_interface_attach_card(struct pnp_card *card)
 {

From 888be6067b97132c3992866bbcf647572253ab3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= <kw@linux.com>
Date: Thu, 3 Jun 2021 17:12:01 +0000
Subject: [PATCH 1078/3292] ACPI: sysfs: Fix a buffer overrun problem with
 description_show()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, a device description can be obtained using ACPI, if the _STR
method exists for a particular device, and then exposed to the userspace
via a sysfs object as a string value.

If the _STR method is available for a given device then the data
(usually a Unicode string) is read and stored in a buffer (of the
ACPI_TYPE_BUFFER type) with a pointer to said buffer cached in the
struct acpi_device_pnp for later access.

The description_show() function is responsible for exposing the device
description to the userspace via a corresponding sysfs object and
internally calls the utf16s_to_utf8s() function with a pointer to the
buffer that contains the Unicode string so that it can be converted from
UTF16 encoding to UTF8 and thus allowing for the value to be safely
stored and later displayed.

When invoking the utf16s_to_utf8s() function, the description_show()
function also sets a limit of the data that can be saved into a provided
buffer as a result of the character conversion to be a total of
PAGE_SIZE, and upon completion, the utf16s_to_utf8s() function returns
an integer value denoting the number of bytes that have been written
into the provided buffer.

Following the execution of the utf16s_to_utf8s() a newline character
will be added at the end of the resulting buffer so that when the value
is read in the userspace through the sysfs object then it would include
newline making it more accessible when working with the sysfs file
system in the shell, etc.  Normally, this wouldn't be a problem, but if
the function utf16s_to_utf8s() happens to return the number of bytes
written to be precisely PAGE_SIZE, then we would overrun the buffer and
write the newline character outside the allotted space which can have
undefined consequences or result in a failure.

To fix this buffer overrun, ensure that there always is enough space
left for the newline character to be safely appended.

Fixes: d1efe3c324ea ("ACPI: Add new sysfs interface to export device description")
Signed-off-by: Krzysztof Wilczyński <kw@linux.com>
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index fa2c1c93072c..a393e0e09381 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -448,7 +448,7 @@ static ssize_t description_show(struct device *dev,
 		(wchar_t *)acpi_dev->pnp.str_obj->buffer.pointer,
 		acpi_dev->pnp.str_obj->buffer.length,
 		UTF16_LITTLE_ENDIAN, buf,
-		PAGE_SIZE);
+		PAGE_SIZE - 1);
 
 	buf[result++] = '\n';
 

From 237a47ebc39de7f3763e2fd11e88774239a88b77 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 5 Jun 2021 09:30:19 +0200
Subject: [PATCH 1079/3292] ACPI: NUMA: fix typo in a comment

Fix a typo in comment related to the closing #endif of an include-guard.
s/__ACP_NUMA_H/__ACPI_NUMA_H/

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_numa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index 40a91ce87e04..68e4d80c1b32 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -43,4 +43,4 @@ static inline void disable_hmat(void)
 {
 }
 #endif				/* CONFIG_ACPI_HMAT */
-#endif				/* __ACP_NUMA_H */
+#endif				/* __ACPI_NUMA_H */

From a9e10e58730432e5de840eb3ddd55c75f29341b3 Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Thu, 3 Jun 2021 23:40:02 +0100
Subject: [PATCH 1080/3292] ACPI: scan: Extend acpi_walk_dep_device_list()

The acpi_walk_dep_device_list() function is not as generic as its
name implies, serving only to decrement the dependency count for each
dependent device of the input.

Extend it to accept a callback which can be applied to all the
dependencies in acpi_dep_list.

Replace all existing calls to the function with calls to a wrapper,
passing a callback that applies the same dependency reduction.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Maximilian Luz <luzmaximilian@gmail.com>  # for platform/surface parts
Signed-off-by: Daniel Scally <djrscally@gmail.com>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c                             |  2 +-
 drivers/acpi/pmic/intel_pmic_chtdc_ti.c       |  2 +-
 drivers/acpi/scan.c                           | 69 ++++++++++++++-----
 drivers/gpio/gpiolib-acpi.c                   | 10 +--
 drivers/i2c/i2c-core-acpi.c                   |  8 +--
 drivers/platform/surface/aggregator/core.c    |  6 +-
 drivers/platform/surface/surface3_power.c     | 22 +++---
 .../platform/surface/surface_acpi_notify.c    |  7 +-
 include/acpi/acpi_bus.h                       |  7 ++
 include/linux/acpi.h                          |  4 +-
 10 files changed, 90 insertions(+), 47 deletions(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 13565629ce0a..3f7680a007a3 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1627,7 +1627,7 @@ static int acpi_ec_add(struct acpi_device *device)
 	WARN(!ret, "Could not request EC cmd io port 0x%lx", ec->command_addr);
 
 	/* Reprobe devices depending on the EC */
-	acpi_walk_dep_device_list(ec->handle);
+	acpi_dev_clear_dependencies(device);
 
 	acpi_handle_debug(ec->handle, "enumerated.\n");
 	return 0;
diff --git a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
index a5101b07611a..fef7831d0d63 100644
--- a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
+++ b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
@@ -117,7 +117,7 @@ static int chtdc_ti_pmic_opregion_probe(struct platform_device *pdev)
 		return err;
 
 	/* Re-enumerate devices depending on PMIC */
-	acpi_walk_dep_device_list(ACPI_HANDLE(pdev->dev.parent));
+	acpi_dev_clear_dependencies(ACPI_COMPANION(pdev->dev.parent));
 	return 0;
 }
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index a2df7bcf4d07..2277add6da2f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -47,12 +47,6 @@ static DEFINE_MUTEX(acpi_hp_context_lock);
  */
 static u64 spcr_uart_addr;
 
-struct acpi_dep_data {
-	struct list_head node;
-	acpi_handle supplier;
-	acpi_handle consumer;
-};
-
 void acpi_scan_lock_acquire(void)
 {
 	mutex_lock(&acpi_scan_lock);
@@ -2107,30 +2101,69 @@ static void acpi_bus_attach(struct acpi_device *device, bool first_pass)
 		device->handler->hotplug.notify_online(device);
 }
 
-void acpi_walk_dep_device_list(acpi_handle handle)
+static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
+{
+	struct acpi_device *adev;
+
+	acpi_bus_get_device(dep->consumer, &adev);
+
+	if (adev) {
+		adev->dep_unmet--;
+		if (!adev->dep_unmet)
+			acpi_bus_attach(adev, true);
+	}
+
+	list_del(&dep->node);
+	kfree(dep);
+
+	return 0;
+}
+
+/**
+ * acpi_walk_dep_device_list - Apply a callback to every entry in acpi_dep_list
+ * @handle:	The ACPI handle of the supplier device
+ * @callback:	Pointer to the callback function to apply
+ * @data:	Pointer to some data to pass to the callback
+ *
+ * The return value of the callback determines this function's behaviour. If 0
+ * is returned we continue to iterate over acpi_dep_list. If a positive value
+ * is returned then the loop is broken but this function returns 0. If a
+ * negative value is returned by the callback then the loop is broken and that
+ * value is returned as the final error.
+ */
+int acpi_walk_dep_device_list(acpi_handle handle,
+			      int (*callback)(struct acpi_dep_data *, void *),
+			      void *data)
 {
 	struct acpi_dep_data *dep, *tmp;
-	struct acpi_device *adev;
+	int ret;
 
 	mutex_lock(&acpi_dep_list_lock);
 	list_for_each_entry_safe(dep, tmp, &acpi_dep_list, node) {
 		if (dep->supplier == handle) {
-			acpi_bus_get_device(dep->consumer, &adev);
-
-			if (adev) {
-				adev->dep_unmet--;
-				if (!adev->dep_unmet)
-					acpi_bus_attach(adev, true);
-			}
-
-			list_del(&dep->node);
-			kfree(dep);
+			ret = callback(dep, data);
+			if (ret)
+				break;
 		}
 	}
 	mutex_unlock(&acpi_dep_list_lock);
+
+	return ret > 0 ? 0 : ret;
 }
 EXPORT_SYMBOL_GPL(acpi_walk_dep_device_list);
 
+/**
+ * acpi_dev_clear_dependencies - Inform consumers that the device is now active
+ * @supplier: Pointer to the supplier &struct acpi_device
+ *
+ * Clear dependencies on the given device.
+ */
+void acpi_dev_clear_dependencies(struct acpi_device *supplier)
+{
+	acpi_walk_dep_device_list(supplier->handle, acpi_scan_clear_dep, NULL);
+}
+EXPORT_SYMBOL_GPL(acpi_dev_clear_dependencies);
+
 /**
  * acpi_bus_scan - Add ACPI device node objects in a given namespace scope.
  * @handle: Root of the namespace scope to scan.
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 3ef22a3c104d..5b4111e4be3f 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1233,14 +1233,14 @@ static void acpi_gpiochip_scan_gpios(struct acpi_gpio_chip *achip)
 void acpi_gpiochip_add(struct gpio_chip *chip)
 {
 	struct acpi_gpio_chip *acpi_gpio;
-	acpi_handle handle;
+	struct acpi_device *adev;
 	acpi_status status;
 
 	if (!chip || !chip->parent)
 		return;
 
-	handle = ACPI_HANDLE(chip->parent);
-	if (!handle)
+	adev = ACPI_COMPANION(chip->parent);
+	if (!adev)
 		return;
 
 	acpi_gpio = kzalloc(sizeof(*acpi_gpio), GFP_KERNEL);
@@ -1254,7 +1254,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
 	INIT_LIST_HEAD(&acpi_gpio->events);
 	INIT_LIST_HEAD(&acpi_gpio->deferred_req_irqs_list_entry);
 
-	status = acpi_attach_data(handle, acpi_gpio_chip_dh, acpi_gpio);
+	status = acpi_attach_data(adev->handle, acpi_gpio_chip_dh, acpi_gpio);
 	if (ACPI_FAILURE(status)) {
 		dev_err(chip->parent, "Failed to attach ACPI GPIO chip\n");
 		kfree(acpi_gpio);
@@ -1263,7 +1263,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
 
 	acpi_gpiochip_request_regions(acpi_gpio);
 	acpi_gpiochip_scan_gpios(acpi_gpio);
-	acpi_walk_dep_device_list(handle);
+	acpi_dev_clear_dependencies(adev);
 }
 
 void acpi_gpiochip_remove(struct gpio_chip *chip)
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 8ceaa88dd78f..6f0aa0ed3241 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -259,8 +259,8 @@ static acpi_status i2c_acpi_add_device(acpi_handle handle, u32 level,
  */
 void i2c_acpi_register_devices(struct i2c_adapter *adap)
 {
+	struct acpi_device *adev;
 	acpi_status status;
-	acpi_handle handle;
 
 	if (!has_acpi_companion(&adap->dev))
 		return;
@@ -275,11 +275,11 @@ void i2c_acpi_register_devices(struct i2c_adapter *adap)
 	if (!adap->dev.parent)
 		return;
 
-	handle = ACPI_HANDLE(adap->dev.parent);
-	if (!handle)
+	adev = ACPI_COMPANION(adap->dev.parent);
+	if (!adev)
 		return;
 
-	acpi_walk_dep_device_list(handle);
+	acpi_dev_clear_dependencies(adev);
 }
 
 static const struct acpi_device_id i2c_acpi_force_400khz_device_ids[] = {
diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c
index 8dc2c267bcd6..517f774a6e60 100644
--- a/drivers/platform/surface/aggregator/core.c
+++ b/drivers/platform/surface/aggregator/core.c
@@ -621,8 +621,8 @@ static const struct acpi_gpio_mapping ssam_acpi_gpios[] = {
 
 static int ssam_serial_hub_probe(struct serdev_device *serdev)
 {
+	struct acpi_device *ssh = ACPI_COMPANION(&serdev->dev);
 	struct ssam_controller *ctrl;
-	acpi_handle *ssh = ACPI_HANDLE(&serdev->dev);
 	acpi_status astatus;
 	int status;
 
@@ -652,7 +652,7 @@ static int ssam_serial_hub_probe(struct serdev_device *serdev)
 	if (status)
 		goto err_devopen;
 
-	astatus = ssam_serdev_setup_via_acpi(ssh, serdev);
+	astatus = ssam_serdev_setup_via_acpi(ssh->handle, serdev);
 	if (ACPI_FAILURE(astatus)) {
 		status = -ENXIO;
 		goto err_devinit;
@@ -706,7 +706,7 @@ static int ssam_serial_hub_probe(struct serdev_device *serdev)
 	 *       For now let's thus default power/wakeup to false.
 	 */
 	device_set_wakeup_capable(&serdev->dev, true);
-	acpi_walk_dep_device_list(ssh);
+	acpi_dev_clear_dependencies(ssh);
 
 	return 0;
 
diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c
index cc4f9cba6856..dea82aa1abd4 100644
--- a/drivers/platform/surface/surface3_power.c
+++ b/drivers/platform/surface/surface3_power.c
@@ -446,12 +446,12 @@ mshw0011_space_handler(u32 function, acpi_physical_address command,
 
 static int mshw0011_install_space_handler(struct i2c_client *client)
 {
-	acpi_handle handle;
+	struct acpi_device *adev;
 	struct mshw0011_handler_data *data;
 	acpi_status status;
 
-	handle = ACPI_HANDLE(&client->dev);
-	if (!handle)
+	adev = ACPI_COMPANION(&client->dev);
+	if (!adev)
 		return -ENODEV;
 
 	data = kzalloc(sizeof(struct mshw0011_handler_data),
@@ -460,25 +460,25 @@ static int mshw0011_install_space_handler(struct i2c_client *client)
 		return -ENOMEM;
 
 	data->client = client;
-	status = acpi_bus_attach_private_data(handle, (void *)data);
+	status = acpi_bus_attach_private_data(adev->handle, (void *)data);
 	if (ACPI_FAILURE(status)) {
 		kfree(data);
 		return -ENOMEM;
 	}
 
-	status = acpi_install_address_space_handler(handle,
-				ACPI_ADR_SPACE_GSBUS,
-				&mshw0011_space_handler,
-				NULL,
-				data);
+	status = acpi_install_address_space_handler(adev->handle,
+						    ACPI_ADR_SPACE_GSBUS,
+						    &mshw0011_space_handler,
+						    NULL,
+						    data);
 	if (ACPI_FAILURE(status)) {
 		dev_err(&client->dev, "Error installing i2c space handler\n");
-		acpi_bus_detach_private_data(handle);
+		acpi_bus_detach_private_data(adev->handle);
 		kfree(data);
 		return -ENOMEM;
 	}
 
-	acpi_walk_dep_device_list(handle);
+	acpi_dev_clear_dependencies(adev);
 	return 0;
 }
 
diff --git a/drivers/platform/surface/surface_acpi_notify.c b/drivers/platform/surface/surface_acpi_notify.c
index ef9c1f8e8336..8339988d95c1 100644
--- a/drivers/platform/surface/surface_acpi_notify.c
+++ b/drivers/platform/surface/surface_acpi_notify.c
@@ -798,7 +798,7 @@ static int san_consumer_links_setup(struct platform_device *pdev)
 
 static int san_probe(struct platform_device *pdev)
 {
-	acpi_handle san = ACPI_HANDLE(&pdev->dev);
+	struct acpi_device *san = ACPI_COMPANION(&pdev->dev);
 	struct ssam_controller *ctrl;
 	struct san_data *data;
 	acpi_status astatus;
@@ -821,7 +821,8 @@ static int san_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, data);
 
-	astatus = acpi_install_address_space_handler(san, ACPI_ADR_SPACE_GSBUS,
+	astatus = acpi_install_address_space_handler(san->handle,
+						     ACPI_ADR_SPACE_GSBUS,
 						     &san_opreg_handler, NULL,
 						     &data->info);
 	if (ACPI_FAILURE(astatus))
@@ -835,7 +836,7 @@ static int san_probe(struct platform_device *pdev)
 	if (status)
 		goto err_install_dev;
 
-	acpi_walk_dep_device_list(san);
+	acpi_dev_clear_dependencies(san);
 	return 0;
 
 err_install_dev:
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 3a82faac5767..0b2c4f170f4d 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -280,6 +280,12 @@ struct acpi_device_power {
 	struct acpi_device_power_state states[ACPI_D_STATE_COUNT];	/* Power states (D0-D3Cold) */
 };
 
+struct acpi_dep_data {
+	struct list_head node;
+	acpi_handle supplier;
+	acpi_handle consumer;
+};
+
 /* Performance Management */
 
 struct acpi_device_perf_flags {
@@ -685,6 +691,7 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev)
 
 bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2);
 
+void acpi_dev_clear_dependencies(struct acpi_device *supplier);
 struct acpi_device *
 acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv);
 struct acpi_device *
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c60745f657e9..170b9bebdb2b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -666,7 +666,9 @@ extern bool acpi_driver_match_device(struct device *dev,
 				     const struct device_driver *drv);
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
-void acpi_walk_dep_device_list(acpi_handle handle);
+int acpi_walk_dep_device_list(acpi_handle handle,
+			      int (*callback)(struct acpi_dep_data *, void *),
+			      void *data);
 
 struct platform_device *acpi_create_platform_device(struct acpi_device *,
 						    struct property_entry *);

From b83e2b306736cb0d108df791fd4ee39f6d52184f Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Thu, 3 Jun 2021 23:40:03 +0100
Subject: [PATCH 1081/3292] ACPI: scan: Add function to fetch dependent of ACPI
 device

In some ACPI tables we encounter, devices use the _DEP method to assert
a dependence on other ACPI devices as opposed to the OpRegions that the
specification intends.

We need to be able to find those devices "from" the dependee, so add
a callback and a wrapper to walk over the acpi_dep_list and return
the dependent ACPI device.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Daniel Scally <djrscally@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c     | 35 +++++++++++++++++++++++++++++++++++
 include/acpi/acpi_bus.h |  1 +
 2 files changed, 36 insertions(+)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2277add6da2f..708c1d84e7bf 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2101,6 +2101,20 @@ static void acpi_bus_attach(struct acpi_device *device, bool first_pass)
 		device->handler->hotplug.notify_online(device);
 }
 
+static int acpi_dev_get_first_consumer_dev_cb(struct acpi_dep_data *dep, void *data)
+{
+	struct acpi_device *adev;
+
+	adev = acpi_bus_get_acpi_device(dep->consumer);
+	if (!adev)
+		/* If we don't find an adev then we want to continue parsing */
+		return 0;
+
+	*(struct acpi_device **)data = adev;
+
+	return 1;
+}
+
 static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
 {
 	struct acpi_device *adev;
@@ -2164,6 +2178,27 @@ void acpi_dev_clear_dependencies(struct acpi_device *supplier)
 }
 EXPORT_SYMBOL_GPL(acpi_dev_clear_dependencies);
 
+/**
+ * acpi_dev_get_first_consumer_dev - Return ACPI device dependent on @supplier
+ * @supplier: Pointer to the dependee device
+ *
+ * Returns the first &struct acpi_device which declares itself dependent on
+ * @supplier via the _DEP buffer, parsed from the acpi_dep_list.
+ *
+ * The caller is responsible for putting the reference to adev when it is no
+ * longer needed.
+ */
+struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier)
+{
+	struct acpi_device *adev = NULL;
+
+	acpi_walk_dep_device_list(supplier->handle,
+				  acpi_dev_get_first_consumer_dev_cb, &adev);
+
+	return adev;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_get_first_consumer_dev);
+
 /**
  * acpi_bus_scan - Add ACPI device node objects in a given namespace scope.
  * @handle: Root of the namespace scope to scan.
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 0b2c4f170f4d..4bed30e61c5b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -692,6 +692,7 @@ static inline bool acpi_device_can_poweroff(struct acpi_device *adev)
 bool acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2);
 
 void acpi_dev_clear_dependencies(struct acpi_device *supplier);
+struct acpi_device *acpi_dev_get_first_consumer_dev(struct acpi_device *supplier);
 struct acpi_device *
 acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const char *uid, s64 hrv);
 struct acpi_device *

From eb550f53099bf5ff8dc5de93e275378510c891c9 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Thu, 17 Sep 2020 13:13:33 -0700
Subject: [PATCH 1082/3292] virtchnl: Use pad byte in virtchnl_ether_addr to
 specify MAC type

Currently, there is no way for a VF driver to specify that it wants to
change its device/primary unicast MAC address. This makes it
difficult/impossible for the PF driver to track the VF's device/primary
unicast MAC address, which is used for VM/VF reboot and displaying on
the host. Fix this by using 2 bits of a pad byte in the
virtchnl_ether_addr structure so the VF can specify what type of MAC
it's adding/deleting.

Below are the values that should be used by all VF drivers going
forward.

VIRTCHNL_ETHER_ADDR_LEGACY(0):
	- The type should only ever be 0 for legacy AVF drivers (i.e.
	  drivers that don't support the new type bits). The PF drivers
	  will track VF's device/primary unicast MAC, but this will only
	  be a best effort.

VIRTCHNL_ETHER_ADDR_PRIMARY(1):
	- This type should only be used when the VF is changing their
	  device/primary unicast MAC. It should be used for both delete
	  and add cases related to the device/primary unicast MAC.

VIRTCHNL_ETHER_ADDR_EXTRA(2):
	- This type should be used when the VF is adding and/or deleting
	  MAC addresses that are not the device/primary unicast MAC. For
	  example, extra unicast addresses and multicast addresses
	  assuming the PF supports "extra" addresses at all.

If a PF is parsing the type field of the virtchnl_ether_addr, then it
should use the VIRTCHNL_ETHER_ADDR_TYPE_MASK to mask the first two bits
of the type field since 0, 1, and 2 are the only valid values.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/avf/virtchnl.h | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 565deea6ffe8..1fc07f3f99ab 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -412,9 +412,36 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_select);
  * PF removes the filters and returns status.
  */
 
+/* VIRTCHNL_ETHER_ADDR_LEGACY
+ * Prior to adding the @type member to virtchnl_ether_addr, there were 2 pad
+ * bytes. Moving forward all VF drivers should not set type to
+ * VIRTCHNL_ETHER_ADDR_LEGACY. This is only here to not break previous/legacy
+ * behavior. The control plane function (i.e. PF) can use a best effort method
+ * of tracking the primary/device unicast in this case, but there is no
+ * guarantee and functionality depends on the implementation of the PF.
+ */
+
+/* VIRTCHNL_ETHER_ADDR_PRIMARY
+ * All VF drivers should set @type to VIRTCHNL_ETHER_ADDR_PRIMARY for the
+ * primary/device unicast MAC address filter for VIRTCHNL_OP_ADD_ETH_ADDR and
+ * VIRTCHNL_OP_DEL_ETH_ADDR. This allows for the underlying control plane
+ * function (i.e. PF) to accurately track and use this MAC address for
+ * displaying on the host and for VM/function reset.
+ */
+
+/* VIRTCHNL_ETHER_ADDR_EXTRA
+ * All VF drivers should set @type to VIRTCHNL_ETHER_ADDR_EXTRA for any extra
+ * unicast and/or multicast filters that are being added/deleted via
+ * VIRTCHNL_OP_DEL_ETH_ADDR/VIRTCHNL_OP_ADD_ETH_ADDR respectively.
+ */
 struct virtchnl_ether_addr {
 	u8 addr[ETH_ALEN];
-	u8 pad[2];
+	u8 type;
+#define VIRTCHNL_ETHER_ADDR_LEGACY	0
+#define VIRTCHNL_ETHER_ADDR_PRIMARY	1
+#define VIRTCHNL_ETHER_ADDR_EXTRA	2
+#define VIRTCHNL_ETHER_ADDR_TYPE_MASK	3 /* first two bits of type are valid */
+	u8 pad;
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_ether_addr);

From 51efbbdf1dca3b5a9aa39ff1548abe43eafe0b3c Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Thu, 17 Sep 2020 13:13:34 -0700
Subject: [PATCH 1083/3292] ice: Manage VF's MAC address for both legacy and
 new cases

Currently there is no way for a VF driver to specify if it wants to
change it's hardware address. New bits are being added to virtchnl.h
in struct virtchnl_ether_addr that allow for the VF to correctly
communicate this information. However, legacy VF drivers that don't
support the new virtchnl.h bits still need to be supported. Make a
best effort attempt at saving the VF's primary/device address in the
legacy case and depend on the VIRTCHNL_ETHER_ADDR_PRIMARY type for
the new case.

Legacy case - If a unicast MAC is being added and the
hw_lan_addr.addr is empty, then populate it. This assumes that the
address is the VF's hardware address. If a unicast MAC is being
added and the hw_lan_addr.addr is not empty, then cache it in the
legacy_last_added_umac.addr. If a unicast MAC is being deleted and it
matches the hw_lan_addr.addr, then zero the hw_lan_addr.addr.
Also, if the legacy_last_added_umac.addr has not expired, copy the
legacy_last_added_umac.addr into the hw_lan_addr.addr. This is done
because we cannot guarantee the order of VIRTCHNL_OP_ADD_ETH_ADDR and
VIRTCHNL_OP_DEL_ETH_ADDR.

New case - If a unicast MAC is being added and it's specified as
VIRTCHNL_ETHER_ADDR_PRIMARY, then replace the current
hw_lan_addr.addr. If a unicast MAC is being deleted and it's type
is specified as VIRTCHNL_ETHER_ADDR_PRIMARY, then zero the
hw_lan_addr.addr.

Untrusted VFs - Only allow above legacy/new changes to their
hardware address if the PF has not set it administratively via
iproute2.

Trusted VFs - Always allow above legacy/new changes to their
hardware address even if the PF has administratively set it via
iproute2.

Also, change the variable dflt_lan_addr to hw_lan_addr to clearly
represent the purpose of this variable since it's purpose is to
act as a hardware programmed MAC address for the VF.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_virtchnl_pf.c  | 160 +++++++++++++++---
 .../net/ethernet/intel/ice/ice_virtchnl_pf.h  |   8 +-
 2 files changed, 141 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a1d22d2aa0bd..b0a15b821b15 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -937,12 +937,12 @@ static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
 
 	vf->num_mac++;
 
-	if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) {
-		status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr,
+	if (is_valid_ether_addr(vf->hw_lan_addr.addr)) {
+		status = ice_fltr_add_mac(vsi, vf->hw_lan_addr.addr,
 					  ICE_FWD_TO_VSI);
 		if (status) {
 			dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %s\n",
-				&vf->dflt_lan_addr.addr[0], vf->vf_id,
+				&vf->hw_lan_addr.addr[0], vf->vf_id,
 				ice_stat_str(status));
 			return ice_status_to_errno(status);
 		}
@@ -2379,7 +2379,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
 	vfres->vsi_res[0].num_queue_pairs = vsi->num_txq;
 	ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
-			vf->dflt_lan_addr.addr);
+			vf->hw_lan_addr.addr);
 
 	/* match guest capabilities */
 	vf->driver_caps = vfres->vf_cap_flags;
@@ -3659,20 +3659,94 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf)
 	return true;
 }
 
+/**
+ * ice_vc_ether_addr_type - get type of virtchnl_ether_addr
+ * @vc_ether_addr: used to extract the type
+ */
+static u8
+ice_vc_ether_addr_type(struct virtchnl_ether_addr *vc_ether_addr)
+{
+	return (vc_ether_addr->type & VIRTCHNL_ETHER_ADDR_TYPE_MASK);
+}
+
+/**
+ * ice_is_vc_addr_legacy - check if the MAC address is from an older VF
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ */
+static bool
+ice_is_vc_addr_legacy(struct virtchnl_ether_addr *vc_ether_addr)
+{
+	u8 type = ice_vc_ether_addr_type(vc_ether_addr);
+
+	return (type == VIRTCHNL_ETHER_ADDR_LEGACY);
+}
+
+/**
+ * ice_is_vc_addr_primary - check if the MAC address is the VF's primary MAC
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ *
+ * This function should only be called when the MAC address in
+ * virtchnl_ether_addr is a valid unicast MAC
+ */
+static bool
+ice_is_vc_addr_primary(struct virtchnl_ether_addr __maybe_unused *vc_ether_addr)
+{
+	u8 type = ice_vc_ether_addr_type(vc_ether_addr);
+
+	return (type == VIRTCHNL_ETHER_ADDR_PRIMARY);
+}
+
+/**
+ * ice_vfhw_mac_add - update the VF's cached hardware MAC if allowed
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to add
+ */
+static void
+ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
+{
+	u8 *mac_addr = vc_ether_addr->addr;
+
+	if (!is_valid_ether_addr(mac_addr))
+		return;
+
+	/* only allow legacy VF drivers to set the hardware MAC if it is zero
+	 * and allow new VF drivers to set the hardware MAC if the type was
+	 * correctly specified over VIRTCHNL
+	 */
+	if ((ice_is_vc_addr_legacy(vc_ether_addr) &&
+	     is_zero_ether_addr(vf->hw_lan_addr.addr)) ||
+	    ice_is_vc_addr_primary(vc_ether_addr))
+		ether_addr_copy(vf->hw_lan_addr.addr, mac_addr);
+
+	/* hardware MAC is already set, but its possible that the VF driver sent
+	 * the VIRTCHNL_OP_ADD_ETH_ADDR message before the
+	 * VIRTCHNL_OP_DEL_ETH_ADDR when trying to update its MAC, so save it
+	 * away for the legacy VF driver case as it will be updated in the
+	 * delete flow for this case
+	 */
+	if (ice_is_vc_addr_legacy(vc_ether_addr)) {
+		ether_addr_copy(vf->legacy_last_added_umac.addr,
+				mac_addr);
+		vf->legacy_last_added_umac.time_modified = jiffies;
+	}
+}
+
 /**
  * ice_vc_add_mac_addr - attempt to add the MAC address passed in
  * @vf: pointer to the VF info
  * @vsi: pointer to the VF's VSI
- * @mac_addr: MAC address to add
+ * @vc_ether_addr: VIRTCHNL MAC address structure used to add MAC
  */
 static int
-ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+		    struct virtchnl_ether_addr *vc_ether_addr)
 {
 	struct device *dev = ice_pf_to_dev(vf->pf);
+	u8 *mac_addr = vc_ether_addr->addr;
 	enum ice_status status;
 
 	/* default unicast MAC already added */
-	if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+	if (ether_addr_equal(mac_addr, vf->hw_lan_addr.addr))
 		return 0;
 
 	if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
@@ -3691,32 +3765,66 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
 		return -EIO;
 	}
 
-	/* Set the default LAN address to the latest unicast MAC address added
-	 * by the VF. The default LAN address is reported by the PF via
-	 * ndo_get_vf_config.
-	 */
-	if (is_unicast_ether_addr(mac_addr))
-		ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr);
+	ice_vfhw_mac_add(vf, vc_ether_addr);
 
 	vf->num_mac++;
 
 	return 0;
 }
 
+/**
+ * ice_is_legacy_umac_expired - check if last added legacy unicast MAC expired
+ * @last_added_umac: structure used to check expiration
+ */
+static bool ice_is_legacy_umac_expired(struct ice_time_mac *last_added_umac)
+{
+#define ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME	msecs_to_jiffies(3000)
+	return time_is_before_jiffies(last_added_umac->time_modified +
+				      ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME);
+}
+
+/**
+ * ice_vfhw_mac_del - update the VF's cached hardware MAC if allowed
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to delete
+ */
+static void
+ice_vfhw_mac_del(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
+{
+	u8 *mac_addr = vc_ether_addr->addr;
+
+	if (!is_valid_ether_addr(mac_addr) ||
+	    !ether_addr_equal(vf->hw_lan_addr.addr, mac_addr))
+		return;
+
+	/* allow the hardware MAC to be repopulated in the add flow */
+	eth_zero_addr(vf->hw_lan_addr.addr);
+
+	/* only update cached hardware MAC for legacy VF drivers on delete
+	 * because we cannot guarantee order/type of MAC from the VF driver
+	 */
+	if (ice_is_vc_addr_legacy(vc_ether_addr) &&
+	    !ice_is_legacy_umac_expired(&vf->legacy_last_added_umac))
+		ether_addr_copy(vf->hw_lan_addr.addr,
+				vf->legacy_last_added_umac.addr);
+}
+
 /**
  * ice_vc_del_mac_addr - attempt to delete the MAC address passed in
  * @vf: pointer to the VF info
  * @vsi: pointer to the VF's VSI
- * @mac_addr: MAC address to delete
+ * @vc_ether_addr: VIRTCHNL MAC address structure used to delete MAC
  */
 static int
-ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+		    struct virtchnl_ether_addr *vc_ether_addr)
 {
 	struct device *dev = ice_pf_to_dev(vf->pf);
+	u8 *mac_addr = vc_ether_addr->addr;
 	enum ice_status status;
 
 	if (!ice_can_vf_change_mac(vf) &&
-	    ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+	    ether_addr_equal(mac_addr, vf->hw_lan_addr.addr))
 		return 0;
 
 	status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
@@ -3730,8 +3838,7 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
 		return -EIO;
 	}
 
-	if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
-		eth_zero_addr(vf->dflt_lan_addr.addr);
+	ice_vfhw_mac_del(vf, vc_ether_addr);
 
 	vf->num_mac--;
 
@@ -3750,7 +3857,8 @@ static int
 ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 {
 	int (*ice_vc_cfg_mac)
-		(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr);
+		(struct ice_vf *vf, struct ice_vsi *vsi,
+		 struct virtchnl_ether_addr *virtchnl_ether_addr);
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
 	struct virtchnl_ether_addr_list *al =
 	    (struct virtchnl_ether_addr_list *)msg;
@@ -3799,7 +3907,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 		    is_zero_ether_addr(mac_addr))
 			continue;
 
-		result = ice_vc_cfg_mac(vf, vsi, mac_addr);
+		result = ice_vc_cfg_mac(vf, vsi, &al->list[i]);
 		if (result == -EEXIST || result == -ENOENT) {
 			continue;
 		} else if (result) {
@@ -4437,7 +4545,7 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
 		return -EBUSY;
 
 	ivi->vf = vf_id;
-	ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr);
+	ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr);
 
 	/* VF configuration for VLAN and applicable QoS */
 	ivi->vlan = vf->port_vlan_info & VLAN_VID_MASK;
@@ -4513,7 +4621,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	vf = &pf->vf[vf_id];
 	/* nothing left to do, unicast MAC already set */
-	if (ether_addr_equal(vf->dflt_lan_addr.addr, mac))
+	if (ether_addr_equal(vf->hw_lan_addr.addr, mac))
 		return 0;
 
 	ret = ice_check_vf_ready_for_cfg(vf);
@@ -4529,7 +4637,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	/* VF is notified of its new MAC via the PF's response to the
 	 * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
 	 */
-	ether_addr_copy(vf->dflt_lan_addr.addr, mac);
+	ether_addr_copy(vf->hw_lan_addr.addr, mac);
 	if (is_zero_ether_addr(mac)) {
 		/* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
 		vf->pf_set_mac = false;
@@ -4682,7 +4790,7 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
 
 	dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
 		 vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id,
-		 vf->dflt_lan_addr.addr,
+		 vf->hw_lan_addr.addr,
 		 test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
 			  ? "on" : "off");
 }
@@ -4726,7 +4834,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf)
 
 			dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n",
 				 vf->mdd_tx_events.count, hw->pf_id, i,
-				 vf->dflt_lan_addr.addr);
+				 vf->hw_lan_addr.addr);
 		}
 	}
 }
@@ -4816,7 +4924,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
 
 			if (pf_vsi)
 				dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n",
-					 &vf->dflt_lan_addr.addr[0],
+					 &vf->hw_lan_addr.addr[0],
 					 pf_vsi->netdev->dev_addr);
 		}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index d800ed83d6c3..91749c67129b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -58,6 +58,11 @@ enum ice_virtchnl_cap {
 	ICE_VIRTCHNL_VF_CAP_PRIVILEGE,
 };
 
+struct ice_time_mac {
+	unsigned long time_modified;
+	u8 addr[ETH_ALEN];
+};
+
 /* VF MDD events print structure */
 struct ice_mdd_vf_events {
 	u16 count;			/* total count of Rx|Tx events */
@@ -78,7 +83,8 @@ struct ice_vf {
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
 	struct virtchnl_version_info vf_ver;
 	u32 driver_caps;		/* reported by VF driver */
-	struct virtchnl_ether_addr dflt_lan_addr;
+	struct virtchnl_ether_addr hw_lan_addr;
+	struct ice_time_mac legacy_last_added_umac;
 	DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF);
 	DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF);
 	u16 port_vlan_info;		/* Port VLAN ID and QoS */

From f28cd5ce1a60949149f9870292879e0685892a22 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Thu, 17 Sep 2020 13:13:35 -0700
Subject: [PATCH 1084/3292] ice: Save VF's MAC across reboot

If a VM reboots and/or VF driver is unloaded, its cached hardware MAC
address (hw_lan_addr.addr) is cleared in some cases. If the VF is
trusted, then the PF driver allows the VF to clear its old MAC address
even if this MAC was configured by a host administrator. If the VF is
untrusted, then the PF driver allows the VF to clear its old MAC
address only if the host admin did not set it.

For the trusted VF case, this is unexpected and will cause issues
because the host configured MAC (i.e. via XML) will be cleared on VM
reboot. For the untrusted VF case, this is done to be consistent and it
will allow the VF to keep the same MAC across VM reboot.

Fix this by introducing dev_lan_addr to the VF structure. This will be
the VF's MAC address when it's up and running and in most cases will be
the same as the hw_lan_addr. However, to address the VM reboot and
unload/reload problem, the driver will never allow the hw_lan_addr to be
cleared via VIRTCHNL_OP_DEL_ETH_ADDR. When the VF's MAC is changed, the
dev_lan_addr and hw_lan_addr will always be updated with the same value.
The only ways the VF's MAC can change are the following:

- Set the VF's MAC administratively on the host via iproute2.
- If the VF is trusted and changes/sets its own MAC.
- If the VF is untrusted and the host has not set the MAC via iproute2.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_virtchnl_pf.c  | 47 ++++++++++++-------
 .../net/ethernet/intel/ice/ice_virtchnl_pf.h  |  1 +
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index b0a15b821b15..677d29fd0885 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -947,6 +947,8 @@ static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
 			return ice_status_to_errno(status);
 		}
 		vf->num_mac++;
+
+		ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr);
 	}
 
 	return 0;
@@ -3709,17 +3711,19 @@ ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
 	if (!is_valid_ether_addr(mac_addr))
 		return;
 
-	/* only allow legacy VF drivers to set the hardware MAC if it is zero
-	 * and allow new VF drivers to set the hardware MAC if the type was
-	 * correctly specified over VIRTCHNL
+	/* only allow legacy VF drivers to set the device and hardware MAC if it
+	 * is zero and allow new VF drivers to set the hardware MAC if the type
+	 * was correctly specified over VIRTCHNL
 	 */
 	if ((ice_is_vc_addr_legacy(vc_ether_addr) &&
 	     is_zero_ether_addr(vf->hw_lan_addr.addr)) ||
-	    ice_is_vc_addr_primary(vc_ether_addr))
+	    ice_is_vc_addr_primary(vc_ether_addr)) {
+		ether_addr_copy(vf->dev_lan_addr.addr, mac_addr);
 		ether_addr_copy(vf->hw_lan_addr.addr, mac_addr);
+	}
 
-	/* hardware MAC is already set, but its possible that the VF driver sent
-	 * the VIRTCHNL_OP_ADD_ETH_ADDR message before the
+	/* hardware and device MACs are already set, but its possible that the
+	 * VF driver sent the VIRTCHNL_OP_ADD_ETH_ADDR message before the
 	 * VIRTCHNL_OP_DEL_ETH_ADDR when trying to update its MAC, so save it
 	 * away for the legacy VF driver case as it will be updated in the
 	 * delete flow for this case
@@ -3745,8 +3749,8 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
 	u8 *mac_addr = vc_ether_addr->addr;
 	enum ice_status status;
 
-	/* default unicast MAC already added */
-	if (ether_addr_equal(mac_addr, vf->hw_lan_addr.addr))
+	/* device MAC already added */
+	if (ether_addr_equal(mac_addr, vf->dev_lan_addr.addr))
 		return 0;
 
 	if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
@@ -3794,19 +3798,26 @@ ice_vfhw_mac_del(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
 	u8 *mac_addr = vc_ether_addr->addr;
 
 	if (!is_valid_ether_addr(mac_addr) ||
-	    !ether_addr_equal(vf->hw_lan_addr.addr, mac_addr))
+	    !ether_addr_equal(vf->dev_lan_addr.addr, mac_addr))
 		return;
 
-	/* allow the hardware MAC to be repopulated in the add flow */
-	eth_zero_addr(vf->hw_lan_addr.addr);
+	/* allow the device MAC to be repopulated in the add flow and don't
+	 * clear the hardware MAC (i.e. hw_lan_addr.addr) here as that is meant
+	 * to be persistent on VM reboot and across driver unload/load, which
+	 * won't work if we clear the hardware MAC here
+	 */
+	eth_zero_addr(vf->dev_lan_addr.addr);
 
 	/* only update cached hardware MAC for legacy VF drivers on delete
 	 * because we cannot guarantee order/type of MAC from the VF driver
 	 */
 	if (ice_is_vc_addr_legacy(vc_ether_addr) &&
-	    !ice_is_legacy_umac_expired(&vf->legacy_last_added_umac))
+	    !ice_is_legacy_umac_expired(&vf->legacy_last_added_umac)) {
+		ether_addr_copy(vf->dev_lan_addr.addr,
+				vf->legacy_last_added_umac.addr);
 		ether_addr_copy(vf->hw_lan_addr.addr,
 				vf->legacy_last_added_umac.addr);
+	}
 }
 
 /**
@@ -3824,7 +3835,7 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
 	enum ice_status status;
 
 	if (!ice_can_vf_change_mac(vf) &&
-	    ether_addr_equal(mac_addr, vf->hw_lan_addr.addr))
+	    ether_addr_equal(vf->dev_lan_addr.addr, mac_addr))
 		return 0;
 
 	status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
@@ -4621,7 +4632,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	vf = &pf->vf[vf_id];
 	/* nothing left to do, unicast MAC already set */
-	if (ether_addr_equal(vf->hw_lan_addr.addr, mac))
+	if (ether_addr_equal(vf->dev_lan_addr.addr, mac) &&
+	    ether_addr_equal(vf->hw_lan_addr.addr, mac))
 		return 0;
 
 	ret = ice_check_vf_ready_for_cfg(vf);
@@ -4637,6 +4649,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	/* VF is notified of its new MAC via the PF's response to the
 	 * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
 	 */
+	ether_addr_copy(vf->dev_lan_addr.addr, mac);
 	ether_addr_copy(vf->hw_lan_addr.addr, mac);
 	if (is_zero_ether_addr(mac)) {
 		/* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
@@ -4790,7 +4803,7 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
 
 	dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
 		 vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id,
-		 vf->hw_lan_addr.addr,
+		 vf->dev_lan_addr.addr,
 		 test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
 			  ? "on" : "off");
 }
@@ -4834,7 +4847,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf)
 
 			dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n",
 				 vf->mdd_tx_events.count, hw->pf_id, i,
-				 vf->hw_lan_addr.addr);
+				 vf->dev_lan_addr.addr);
 		}
 	}
 }
@@ -4924,7 +4937,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
 
 			if (pf_vsi)
 				dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n",
-					 &vf->hw_lan_addr.addr[0],
+					 &vf->dev_lan_addr.addr[0],
 					 pf_vsi->netdev->dev_addr);
 		}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 91749c67129b..77ff0023f7be 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -83,6 +83,7 @@ struct ice_vf {
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
 	struct virtchnl_version_info vf_ver;
 	u32 driver_caps;		/* reported by VF driver */
+	struct virtchnl_ether_addr dev_lan_addr;
 	struct virtchnl_ether_addr hw_lan_addr;
 	struct ice_time_mac legacy_last_added_umac;
 	DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF);

From 019694f5c1b9cc444e6a3fd3005f556d0c5a6b14 Mon Sep 17 00:00:00 2001
From: Hailong Liu <liu.hailong6@zte.com.cn>
Date: Sun, 6 Jun 2021 19:58:28 +0800
Subject: [PATCH 1085/3292] cpufreq: sh: Remove unused linux/sched.h headers

Since commit '205dcc1ecbc5(cpufreq/sh: Replace racy task affinity logic)'
the header <linux/sched.h> is useless in sh-cpufreq.c, so remove it.

Signed-off-by: Hailong Liu <liu.hailong6@zte.com.cn>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/sh-cpufreq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c
index 0ac265d47ef0..1a251e635ebd 100644
--- a/drivers/cpufreq/sh-cpufreq.c
+++ b/drivers/cpufreq/sh-cpufreq.c
@@ -23,7 +23,6 @@
 #include <linux/cpumask.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
-#include <linux/sched.h>	/* set_cpus_allowed() */
 #include <linux/clk.h>
 #include <linux/percpu.h>
 #include <linux/sh_clk.h>

From bcc936c5d5159b4d1891d58f89301f74ff61a67d Mon Sep 17 00:00:00 2001
From: Hailong Liu <liu.hailong6@zte.com.cn>
Date: Thu, 3 Jun 2021 21:57:52 +0800
Subject: [PATCH 1086/3292] cpufreq: loongson2: Remove unused linux/sched.h
 headers

Since commit 759f534e93ac(CPUFREQ: Loongson2: drop set_cpus_allowed_ptr()),
the header <linux/sched.h> is useless in oongson2_cpufreq.c, so remove it.

Signed-off-by: Hailong Liu <liu.hailong6@zte.com.cn>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/loongson2_cpufreq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index d05e761d9572..afc59b292153 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -16,7 +16,6 @@
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/err.h>
-#include <linux/sched.h>	/* set_cpus_allowed() */
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 

From 43c7f9198deb855b7fee1ecb2c2f98f2bfd757c8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Date: Fri, 20 Nov 2020 16:39:32 -0800
Subject: [PATCH 1087/3292] ice: Refactor ice_setup_rx_ctx

Move AF_XDP logic and buffer allocation out of ice_setup_rx_ctx() to a
new function ice_vsi_cfg_rxq(), so the function actually sets up the Rx
context.

Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Co-developed-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c | 120 +++++++++++++---------
 drivers/net/ethernet/intel/ice/ice_base.h |   2 +-
 drivers/net/ethernet/intel/ice/ice_lib.c  |  10 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c  |   2 +-
 4 files changed, 78 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 5985a7e5ca8a..142d660010c6 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -319,11 +319,9 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
  *
  * Configure the Rx descriptor ring in RLAN context.
  */
-int ice_setup_rx_ctx(struct ice_ring *ring)
+static int ice_setup_rx_ctx(struct ice_ring *ring)
 {
-	struct device *dev = ice_pf_to_dev(ring->vsi->back);
 	int chain_len = ICE_MAX_CHAINED_RX_BUFS;
-	u16 num_bufs = ICE_DESC_UNUSED(ring);
 	struct ice_vsi *vsi = ring->vsi;
 	u32 rxdid = ICE_RXDID_FLEX_NIC;
 	struct ice_rlan_ctx rlan_ctx;
@@ -339,48 +337,6 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	/* clear the context structure first */
 	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
 
-	ring->rx_buf_len = vsi->rx_buf_len;
-
-	if (ring->vsi->type == ICE_VSI_PF) {
-		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
-			/* coverity[check_return] */
-			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
-					 ring->q_index, ring->q_vector->napi.napi_id);
-
-		ring->xsk_pool = ice_xsk_pool(ring);
-		if (ring->xsk_pool) {
-			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
-
-			ring->rx_buf_len =
-				xsk_pool_get_rx_frame_size(ring->xsk_pool);
-			/* For AF_XDP ZC, we disallow packets to span on
-			 * multiple buffers, thus letting us skip that
-			 * handling in the fast-path.
-			 */
-			chain_len = 1;
-			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-							 MEM_TYPE_XSK_BUFF_POOL,
-							 NULL);
-			if (err)
-				return err;
-			xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
-
-			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
-				 ring->q_index);
-		} else {
-			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
-				/* coverity[check_return] */
-				xdp_rxq_info_reg(&ring->xdp_rxq,
-						 ring->netdev,
-						 ring->q_index, ring->q_vector->napi.napi_id);
-
-			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-							 MEM_TYPE_PAGE_SHARED,
-							 NULL);
-			if (err)
-				return err;
-		}
-	}
 	/* Receive Queue Base Address.
 	 * Indicates the starting address of the descriptor queue defined in
 	 * 128 Byte units.
@@ -415,6 +371,12 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	 */
 	rlan_ctx.showiv = 0;
 
+	/* For AF_XDP ZC, we disallow packets to span on
+	 * multiple buffers, thus letting us skip that
+	 * handling in the fast-path.
+	 */
+	if (ring->xsk_pool)
+		chain_len = 1;
 	/* Max packet size for this queue - must not be set to a larger value
 	 * than 5 x DBUF
 	 */
@@ -438,7 +400,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	/* Absolute queue number out of 2K needs to be passed */
 	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
 	if (err) {
-		dev_err(dev, "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
 			pf_q, err);
 		return -EIO;
 	}
@@ -458,6 +420,66 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
 	writel(0, ring->tail);
 
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_rxq - Configure an Rx queue
+ * @ring: the ring being configured
+ *
+ * Return 0 on success and a negative value on error.
+ */
+int ice_vsi_cfg_rxq(struct ice_ring *ring)
+{
+	struct device *dev = ice_pf_to_dev(ring->vsi->back);
+	u16 num_bufs = ICE_DESC_UNUSED(ring);
+	int err;
+
+	ring->rx_buf_len = ring->vsi->rx_buf_len;
+
+	if (ring->vsi->type == ICE_VSI_PF) {
+		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+			/* coverity[check_return] */
+			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->q_index, ring->q_vector->napi.napi_id);
+
+		ring->xsk_pool = ice_xsk_pool(ring);
+		if (ring->xsk_pool) {
+			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+			ring->rx_buf_len =
+				xsk_pool_get_rx_frame_size(ring->xsk_pool);
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_XSK_BUFF_POOL,
+							 NULL);
+			if (err)
+				return err;
+			xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+
+			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
+				 ring->q_index);
+		} else {
+			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+				/* coverity[check_return] */
+				xdp_rxq_info_reg(&ring->xdp_rxq,
+						 ring->netdev,
+						 ring->q_index, ring->q_vector->napi.napi_id);
+
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_PAGE_SHARED,
+							 NULL);
+			if (err)
+				return err;
+		}
+	}
+
+	err = ice_setup_rx_ctx(ring);
+	if (err) {
+		dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
+			ring->q_index, err);
+		return err;
+	}
+
 	if (ring->xsk_pool) {
 		bool ok;
 
@@ -470,9 +492,13 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 		}
 
 		ok = ice_alloc_rx_bufs_zc(ring, num_bufs);
-		if (!ok)
+		if (!ok) {
+			u16 pf_q = ring->vsi->rxq_map[ring->q_index];
+
 			dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n",
 				 ring->q_index, pf_q);
+		}
+
 		return 0;
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
index 44efdb627043..20e1c29aa68a 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.h
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -6,7 +6,7 @@
 
 #include "ice.h"
 
-int ice_setup_rx_ctx(struct ice_ring *ring);
+int ice_vsi_cfg_rxq(struct ice_ring *ring);
 int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
 int
 ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 56e1ae558761..bd84c1f09296 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1698,15 +1698,11 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
 	ice_vsi_cfg_frame_size(vsi);
 setup_rings:
 	/* set up individual rings */
-	for (i = 0; i < vsi->num_rxq; i++) {
-		int err;
+	ice_for_each_rxq(vsi, i) {
+		int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
 
-		err = ice_setup_rx_ctx(vsi->rx_rings[i]);
-		if (err) {
-			dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
-				i, err);
+		if (err)
 			return err;
-		}
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index faa7b8d96adb..b0576415ffdb 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -236,7 +236,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
 		xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
 	}
 
-	err = ice_setup_rx_ctx(rx_ring);
+	err = ice_vsi_cfg_rxq(rx_ring);
 	if (err)
 		goto free_buf;
 

From 7ad15440acf8b5a889c60216dfc91370b90f455d Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Tue, 2 Mar 2021 10:15:40 -0800
Subject: [PATCH 1088/3292] ice: Refactor VIRTCHNL_OP_CONFIG_VSI_QUEUES
 handling

Currently, when a VF requests queue configuration via
VIRTCHNL_OP_CONFIG_VSI_QUEUES the PF driver expects that this message
will only be called once and we always assume the queues being
configured start from 0. This is incorrect and is causing issues when
a VF tries to send this message for multiple queue blocks. Fix this by
using the queue_id specified in the virtchnl message and allowing for
individual Rx and/or Tx queues to be configured.

Also, reduce the duplicated for loops for configuring the queues by
moving all the logic into a single for loop.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c      | 27 ++++++++++
 drivers/net/ethernet/intel/ice/ice_lib.h      |  4 ++
 .../net/ethernet/intel/ice/ice_virtchnl_pf.c  | 51 ++++++++++---------
 3 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index bd84c1f09296..135c4d9fd01c 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1681,6 +1681,33 @@ ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio)
 	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
 }
 
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
+{
+	if (q_idx >= vsi->num_rxq)
+		return -EINVAL;
+
+	return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]);
+}
+
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	int err;
+
+	if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx])
+		return -EINVAL;
+
+	qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	err = ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf);
+	kfree(qg_buf);
+	return err;
+}
+
 /**
  * ice_vsi_cfg_rxqs - Configure the VSI for Rx
  * @vsi: the VSI being configured
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 5ec857f71459..9bd619e2399a 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -12,6 +12,10 @@ bool ice_pf_state_is_nominal(struct ice_pf *pf);
 
 void ice_update_eth_stats(struct ice_vsi *vsi);
 
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx);
+
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx);
+
 int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
 
 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 677d29fd0885..5c68f11b83bb 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -3537,10 +3537,9 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 	struct virtchnl_vsi_queue_config_info *qci =
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
-	u16 num_rxq = 0, num_txq = 0;
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
-	int i;
+	int i, q_idx;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -3578,18 +3577,31 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 			goto error_param;
 		}
+
+		q_idx = qpi->rxq.queue_id;
+
+		/* make sure selected "q_idx" is in valid range of queues
+		 * for selected "vsi"
+		 */
+		if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
 		/* copy Tx queue info from VF into VSI */
 		if (qpi->txq.ring_len > 0) {
-			num_txq++;
 			vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
 			vsi->tx_rings[i]->count = qpi->txq.ring_len;
+			if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
 		}
 
 		/* copy Rx queue info from VF into VSI */
 		if (qpi->rxq.ring_len > 0) {
 			u16 max_frame_size = ice_vc_get_max_frame_size(vf);
 
-			num_rxq++;
 			vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
 			vsi->rx_rings[i]->count = qpi->rxq.ring_len;
 
@@ -3606,28 +3618,21 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 				goto error_param;
 			}
+
+			vsi->max_frame = qpi->rxq.max_pkt_size;
+			/* add space for the port VLAN since the VF driver is not
+			 * expected to account for it in the MTU calculation
+			 */
+			if (vf->port_vlan_info)
+				vsi->max_frame += VLAN_HLEN;
+
+			if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
 		}
-
-		vsi->max_frame = qpi->rxq.max_pkt_size;
-		/* add space for the port VLAN since the VF driver is not
-		 * expected to account for it in the MTU calculation
-		 */
-		if (vf->port_vlan_info)
-			vsi->max_frame += VLAN_HLEN;
 	}
 
-	/* VF can request to configure less than allocated queues or default
-	 * allocated queues. So update the VSI with new number
-	 */
-	vsi->num_txq = num_txq;
-	vsi->num_rxq = num_rxq;
-	/* All queues of VF VSI are in TC 0 */
-	vsi->tc_cfg.tc_info[0].qcount_tx = num_txq;
-	vsi->tc_cfg.tc_info[0].qcount_rx = num_rxq;
-
-	if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi))
-		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
-
 error_param:
 	/* send the response to the VF */
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,

From fb3612840d4f587a0af9511a11d7989d1fa48206 Mon Sep 17 00:00:00 2001
From: Liwei Song <liwei.song@windriver.com>
Date: Mon, 19 Apr 2021 17:31:06 +0800
Subject: [PATCH 1089/3292] ice: set the value of global config lock timeout
 longer

It may need hold Global Config Lock a longer time when download DDP
package file, extend the timeout value to 5000ms to ensure that
download can be finished before other AQ command got time to run,
this will fix the issue below when probe the device, 5000ms is a test
value that work with both Backplane and BreakoutCable NVM image:

ice 0000:f4:00.0: VSI 12 failed lan queue config, error ICE_ERR_CFG
ice 0000:f4:00.0: Failed to delete VSI 12 in FW - error: ICE_ERR_AQ_TIMEOUT
ice 0000:f4:00.0: probe failed due to setup PF switch: -12
ice: probe of 0000:f4:00.0 failed with error -12

Signed-off-by: Liwei Song <liwei.song@windriver.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_type.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index c580b87c76ee..2e235646ede2 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -64,7 +64,7 @@ enum ice_aq_res_ids {
 /* FW update timeout definitions are in milliseconds */
 #define ICE_NVM_TIMEOUT			180000
 #define ICE_CHANGE_LOCK_TIMEOUT		1000
-#define ICE_GLOBAL_CFG_LOCK_TIMEOUT	3000
+#define ICE_GLOBAL_CFG_LOCK_TIMEOUT	5000
 
 enum ice_aq_res_access_type {
 	ICE_RES_READ = 1,

From b38b7f2bb418510397714c7fb3bed64b6fd024e5 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Fri, 23 Apr 2021 01:00:18 +0100
Subject: [PATCH 1090/3292] ice: Re-organizes reqstd/avail {R, T}XQ check/code
 for efficiency

If user has explicitly requested the number of {R,T}XQs, then it is
unnecessary to get the count of already available {R,T}XQs from the
PF avail_{r,t}xqs bitmap. This value will get overridden by user specified
value in any case.

Re-organize this code for improving the flow, readability and efficiency.
This scope of improvement was found during the review of the ICE driver
code.

Fixes: 87324e747fde ("ice: Implement ethtool ops for channels")
Cc: intel-wired-lan@lists.osuosl.org
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 135c4d9fd01c..357c5d39913d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -163,12 +163,13 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 
 	switch (vsi->type) {
 	case ICE_VSI_PF:
-		vsi->alloc_txq = min3(pf->num_lan_msix,
-				      ice_get_avail_txq_count(pf),
-				      (u16)num_online_cpus());
 		if (vsi->req_txq) {
 			vsi->alloc_txq = vsi->req_txq;
 			vsi->num_txq = vsi->req_txq;
+		} else {
+			vsi->alloc_txq = min3(pf->num_lan_msix,
+					      ice_get_avail_txq_count(pf),
+					      (u16)num_online_cpus());
 		}
 
 		pf->num_lan_tx = vsi->alloc_txq;
@@ -177,12 +178,13 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 		if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
 			vsi->alloc_rxq = 1;
 		} else {
-			vsi->alloc_rxq = min3(pf->num_lan_msix,
-					      ice_get_avail_rxq_count(pf),
-					      (u16)num_online_cpus());
 			if (vsi->req_rxq) {
 				vsi->alloc_rxq = vsi->req_rxq;
 				vsi->num_rxq = vsi->req_rxq;
+			} else {
+				vsi->alloc_rxq = min3(pf->num_lan_msix,
+						      ice_get_avail_rxq_count(pf),
+						      (u16)num_online_cpus());
 			}
 		}
 

From 96cf4f689bf7e074c8ab8917a82a24114370cbeb Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 6 May 2021 08:39:56 -0700
Subject: [PATCH 1091/3292] ice: use static inline for dummy functions

Trivial:
The driver had previously attempted to use #define
macros to make functions that have no use in certain
configs disappear. Using static inlines instead allows
for certain static checkers to process the code better,
and results in no functional change.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_arfs.h     | 12 +++++-----
 drivers/net/ethernet/intel/ice/ice_dcb_lib.h  | 15 +++++++------
 drivers/net/ethernet/intel/ice/ice_dcb_nl.h   |  9 ++++----
 .../net/ethernet/intel/ice/ice_virtchnl_pf.h  | 22 ++++++++++---------
 drivers/net/ethernet/intel/ice/ice_xsk.h      |  4 ++--
 5 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.h b/drivers/net/ethernet/intel/ice/ice_arfs.h
index f39cd16403ed..80ed76f0cace 100644
--- a/drivers/net/ethernet/intel/ice/ice_arfs.h
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.h
@@ -52,12 +52,12 @@ bool
 ice_is_arfs_using_perfect_flow(struct ice_hw *hw,
 			       enum ice_fltr_ptype flow_type);
 #else
-#define ice_sync_arfs_fltrs(pf) do {} while (0)
-#define ice_init_arfs(vsi) do {} while (0)
-#define ice_clear_arfs(vsi) do {} while (0)
-#define ice_remove_arfs(pf) do {} while (0)
-#define ice_free_cpu_rx_rmap(vsi) do {} while (0)
-#define ice_rebuild_arfs(pf) do {} while (0)
+static inline void ice_clear_arfs(struct ice_vsi *vsi) { }
+static inline void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { }
+static inline void ice_init_arfs(struct ice_vsi *vsi) { }
+static inline void ice_sync_arfs_fltrs(struct ice_pf *pf) { }
+static inline void ice_remove_arfs(struct ice_pf *pf) { }
+static inline void ice_rebuild_arfs(struct ice_pf *pf) { }
 
 static inline int ice_set_cpu_rx_rmap(struct ice_vsi __always_unused *vsi)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 35c21d9ae009..261b6e2ed7bc 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -60,7 +60,7 @@ static inline bool ice_is_dcb_active(struct ice_pf *pf)
 		test_bit(ICE_FLAG_DCB_ENA, pf->flags));
 }
 #else
-#define ice_dcb_rebuild(pf) do {} while (0)
+static inline void ice_dcb_rebuild(struct ice_pf *pf) { }
 
 static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
 {
@@ -113,11 +113,12 @@ ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf,
 	return false;
 }
 
-#define ice_update_dcb_stats(pf) do {} while (0)
-#define ice_pf_dcb_recfg(pf) do {} while (0)
-#define ice_vsi_cfg_dcb_rings(vsi) do {} while (0)
-#define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0)
-#define ice_set_cgd_num(tlan_ctx, ring) do {} while (0)
-#define ice_vsi_cfg_netdev_tc(vsi, ena_tc) do {} while (0)
+static inline void ice_pf_dcb_recfg(struct ice_pf *pf) { }
+static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { }
+static inline void ice_update_dcb_stats(struct ice_pf *pf) { }
+static inline void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { }
+static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) { }
 #endif /* CONFIG_DCB */
 #endif /* _ICE_DCB_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
index 6c630a362293..eac2f34bdcdd 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
@@ -11,9 +11,10 @@ void
 ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
 		     struct ice_dcbx_cfg *new_cfg);
 #else
-#define ice_dcbnl_setup(vsi) do {} while (0)
-#define ice_dcbnl_set_all(vsi) do {} while (0)
-#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0)
+static inline void ice_dcbnl_setup(struct ice_vsi *vsi) { }
+static inline void ice_dcbnl_set_all(struct ice_vsi *vsi) { }
+static inline void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg) { }
 #endif /* CONFIG_DCB */
-
 #endif /* _ICE_DCB_NL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 77ff0023f7be..842cb077df86 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -158,16 +158,18 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
 		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen);
 bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
 #else /* CONFIG_PCI_IOV */
-#define ice_process_vflr_event(pf) do {} while (0)
-#define ice_free_vfs(pf) do {} while (0)
-#define ice_vc_process_vf_msg(pf, event) do {} while (0)
-#define ice_vc_notify_link_state(pf) do {} while (0)
-#define ice_vc_notify_reset(pf) do {} while (0)
-#define ice_set_vf_state_qs_dis(vf) do {} while (0)
-#define ice_vf_lan_overflow_event(pf, event) do {} while (0)
-#define ice_print_vfs_mdd_events(pf) do {} while (0)
-#define ice_print_vf_rx_mdd_event(vf) do {} while (0)
-#define ice_restore_all_vfs_msi_state(pdev) do {} while (0)
+static inline void ice_process_vflr_event(struct ice_pf *pf) { }
+static inline void ice_free_vfs(struct ice_pf *pf) { }
+static inline
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline void ice_vc_notify_link_state(struct ice_pf *pf) { }
+static inline void ice_vc_notify_reset(struct ice_pf *pf) { }
+static inline void ice_set_vf_state_qs_dis(struct ice_vf *vf) { }
+static inline
+void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { }
+static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { }
+static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { }
 
 static inline bool
 ice_is_malicious_vf(struct ice_pf __always_unused *pf,
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index fad783690134..ea208808623a 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -60,7 +60,7 @@ ice_xsk_wakeup(struct net_device __always_unused *netdev,
 	return -EOPNOTSUPP;
 }
 
-#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
-#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+static inline void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) { }
+static inline void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) { }
 #endif /* CONFIG_XDP_SOCKETS */
 #endif /* !_ICE_XSK_H_ */

From d5f84ae95f1db8e3d3ff3ece8ddbfd6f0a8ceb61 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 6 May 2021 08:39:57 -0700
Subject: [PATCH 1092/3292] ice: add extack when unable to read device caps

When filling out information for the DEVLINK_CMD_INFO_GET, the driver
needs to read some device capabilities. Add an extack message to
properly inform the caller of the failure, as we do for other failures
in this function.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_devlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index cf685eeea198..2923591d01ea 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -283,6 +283,9 @@ static int ice_devlink_info_get(struct devlink *devlink,
 	/* discover capabilities first */
 	status = ice_discover_dev_caps(hw, &ctx->dev_caps);
 	if (status) {
+		dev_dbg(dev, "Failed to discover device capabilities, status %s aq_err %s\n",
+			ice_stat_str(status), ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities");
 		err = -EIO;
 		goto out_free_ctx;
 	}

From e872b94f9cf0521e93d1b91b8c71ba417d59944e Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 6 May 2021 08:39:58 -0700
Subject: [PATCH 1093/3292] ice: add error message when pldmfw_flash_image
 fails

When flashing a new firmware image onto the device, the pldmfw library
parses the image contents looking for a matching record. If no record
can be found, the function reports an error of -ENOENT. This can produce
a very confusing error message and experience for the user:

  $devlink dev flash pci/0000:ab:00.0 file image.bin
  devlink answers: No such file or directory

This is because the ENOENT error code is interpreted as a missing file
or directory. The pldmfw library does not have direct access to the
extack pointer as it is generic and non-netdevice specific. The only way
that ENOENT is returned by the pldmfw library is when no record matches.

Catch this specific error and report a suitable extended ack message:

  $devlink dev flash pci/0000:ab:00.0 file image.bin
  Error: ice: Firmware image has no record matching this device
  devlink answers: No such file or directory

In addition, ensure that we log an error message to the console whenever
this function fails. Because our driver specific PLDM operation
functions potentially set the extended ACK message, avoid overwriting
this with a generic message.

This change should result in an improved experience when attempting to
flash an image that does not have a compatible record.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fw_update.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
index dcec0360ce55..f8601d5b0b19 100644
--- a/drivers/net/ethernet/intel/ice/ice_fw_update.c
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -702,6 +702,16 @@ int ice_flash_pldm_image(struct ice_pf *pf, const struct firmware *fw,
 	}
 
 	err = pldmfw_flash_image(&priv.context, fw);
+	if (err == -ENOENT) {
+		dev_err(dev, "Firmware image has no record matching this device\n");
+		NL_SET_ERR_MSG_MOD(extack, "Firmware image has no record matching this device");
+	} else if (err) {
+		/* Do not set a generic extended ACK message here. A more
+		 * specific message may already have been set by one of our
+		 * ops.
+		 */
+		dev_err(dev, "Failed to flash PLDM image, err %d", err);
+	}
 
 	ice_release_nvm(hw);
 

From 1c08052ec49e4ef4549ebbc7a43f27694e08935d Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 6 May 2021 08:39:59 -0700
Subject: [PATCH 1094/3292] ice: wait for reset before reporting devlink info

Requesting device firmware information while the device is busy cleaning
up after a reset can result in an unexpected failure:

This occurs because the command is attempting to access the device
AdminQ while it is down. Resolve this by having the command wait for
a while until the reset is complete. To do this, introduce
a reset_wait_queue and associated helper function "ice_wait_for_reset".

This helper will use the wait queue to sleep until the driver is done
rebuilding. Use of a wait queue is preferred because the potential sleep
duration can be several seconds.

To ensure that the thread wakes up properly, a new wake_up call is added
during all code paths which clear the reset state bits associated with
the driver rebuild flow.

Using this ensures that tools can request device information without
worrying about whether the driver is cleaning up from a reset.
Specifically, it is expected that a flash update could result in
a device reset, and it is better to delay the response for information
until the reset is complete rather than exit with an immediate failure.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h         |  2 ++
 drivers/net/ethernet/intel/ice/ice_devlink.c |  6 +++++
 drivers/net/ethernet/intel/ice/ice_lib.c     | 28 ++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_lib.h     |  1 +
 drivers/net/ethernet/intel/ice/ice_main.c    |  5 ++++
 5 files changed, 42 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 228055e8f33b..21f8b36df11a 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -455,6 +455,8 @@ struct ice_pf {
 	struct hlist_head aq_wait_list;
 	wait_queue_head_t aq_wait_queue;
 
+	wait_queue_head_t reset_wait_queue;
+
 	u32 hw_csum_rx_error;
 	u16 oicr_idx;		/* Other interrupt cause MSIX vector index */
 	u16 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 2923591d01ea..91b545ab8b8f 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -276,6 +276,12 @@ static int ice_devlink_info_get(struct devlink *devlink,
 	size_t i;
 	int err;
 
+	err = ice_wait_for_reset(pf, 10 * HZ);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting");
+		return err;
+	}
+
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 357c5d39913d..1c636f4bb4fc 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3218,6 +3218,34 @@ bool ice_is_reset_in_progress(unsigned long *state)
 	       test_bit(ICE_GLOBR_REQ, state);
 }
 
+/**
+ * ice_wait_for_reset - Wait for driver to finish reset and rebuild
+ * @pf: pointer to the PF structure
+ * @timeout: length of time to wait, in jiffies
+ *
+ * Wait (sleep) for a short time until the driver finishes cleaning up from
+ * a device reset. The caller must be able to sleep. Use this to delay
+ * operations that could fail while the driver is cleaning up after a device
+ * reset.
+ *
+ * Returns 0 on success, -EBUSY if the reset is not finished within the
+ * timeout, and -ERESTARTSYS if the thread was interrupted.
+ */
+int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout)
+{
+	long ret;
+
+	ret = wait_event_interruptible_timeout(pf->reset_wait_queue,
+					       !ice_is_reset_in_progress(pf->state),
+					       timeout);
+	if (ret < 0)
+		return ret;
+	else if (!ret)
+		return -EBUSY;
+	else
+		return 0;
+}
+
 #ifdef CONFIG_DCB
 /**
  * ice_vsi_update_q_map - update our copy of the VSI info with new queue map
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 9bd619e2399a..6e2b8c2c8aa0 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -77,6 +77,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id);
 int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi);
 
 bool ice_is_reset_in_progress(unsigned long *state);
+int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout);
 
 void
 ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 254cfc14d6b4..a89ca799109f 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -503,6 +503,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 		clear_bit(ICE_PFR_REQ, pf->state);
 		clear_bit(ICE_CORER_REQ, pf->state);
 		clear_bit(ICE_GLOBR_REQ, pf->state);
+		wake_up(&pf->reset_wait_queue);
 		return;
 	}
 
@@ -515,6 +516,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 		ice_rebuild(pf, reset_type);
 		clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
 		clear_bit(ICE_PFR_REQ, pf->state);
+		wake_up(&pf->reset_wait_queue);
 		ice_reset_all_vfs(pf, true);
 	}
 }
@@ -565,6 +567,7 @@ static void ice_reset_subtask(struct ice_pf *pf)
 			clear_bit(ICE_PFR_REQ, pf->state);
 			clear_bit(ICE_CORER_REQ, pf->state);
 			clear_bit(ICE_GLOBR_REQ, pf->state);
+			wake_up(&pf->reset_wait_queue);
 			ice_reset_all_vfs(pf, true);
 		}
 
@@ -3343,6 +3346,8 @@ static int ice_init_pf(struct ice_pf *pf)
 	spin_lock_init(&pf->aq_wait_lock);
 	init_waitqueue_head(&pf->aq_wait_queue);
 
+	init_waitqueue_head(&pf->reset_wait_queue);
+
 	/* setup service timer and periodic service task */
 	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
 	pf->serv_tmr_period = HZ;

From 97a4ec0107057a577b63568f31d35e31c39a5b7b Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 6 May 2021 08:40:00 -0700
Subject: [PATCH 1095/3292] ice: (re)initialize NVM fields when rebuilding

After performing a flash update, a device EMP reset may occur. This
reset will cause the newly downloaded firmware to be initialized. When
this happens, the driver still reports the previous NVM version
information.

This is because the NVM versions are cached within the hw structure.
This can be confusing, as the new firmware is in fact running in this
case.

Handle this by calling ice_init_nvm when rebuilding the driver state.
This will update the flash version information and ensures that the
current values are displayed when reporting the NVM versions to the
stack.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index a89ca799109f..7606ded59a84 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6218,6 +6218,12 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
 	ice_clear_pxe_mode(hw);
 
+	ret = ice_init_nvm(hw);
+	if (ret) {
+		dev_err(dev, "ice_init_nvm failed %s\n", ice_stat_str(ret));
+		goto err_init_ctrlq;
+	}
+
 	ret = ice_get_caps(hw);
 	if (ret) {
 		dev_err(dev, "ice_get_caps failed %s\n", ice_stat_str(ret));

From c77849f5460994f8c5b27907af13c60b533add5b Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 6 May 2021 08:40:01 -0700
Subject: [PATCH 1096/3292] ice: Detect and report unsupported module power
 levels

Determine whether an unsupported power configuration is preventing link
establishment by storing and checking the link_cfg_err_byte. Print error
messages when module power levels are unsupported. Also add a new flag
bit to prevent spamming said error messages.

Co-developed-by: Jeb Cramer <jeb.j.cramer@intel.com>
Signed-off-by: Jeb Cramer <jeb.j.cramer@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  1 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  6 ++-
 drivers/net/ethernet/intel/ice/ice_common.c   |  2 +
 drivers/net/ethernet/intel/ice/ice_main.c     | 40 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_type.h     |  1 +
 5 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 21f8b36df11a..1fd2362f73b6 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -392,6 +392,7 @@ enum ice_pf_flags {
 	ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
 	ICE_FLAG_NO_MEDIA,
 	ICE_FLAG_FW_LLDP_AGENT,
+	ICE_FLAG_MOD_POWER_UNSUPPORTED,
 	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
 	ICE_FLAG_LEGACY_RX,
 	ICE_FLAG_VF_TRUE_PROMISC_ENA,
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index ff11a618bef7..a9a7d2d1aca7 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1123,7 +1123,9 @@ struct ice_aqc_get_link_status_data {
 #define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT	BIT(5)
 #define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA	BIT(6)
 #define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA	BIT(7)
-	u8 reserved1;
+	u8 link_cfg_err;
+#define ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED	BIT(5)
+#define ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT	BIT(7)
 	u8 link_info;
 #define ICE_AQ_LINK_UP			BIT(0)	/* Link Status */
 #define ICE_AQ_LINK_FAULT		BIT(1)
@@ -1166,7 +1168,7 @@ struct ice_aqc_get_link_status_data {
 #define ICE_AQ_CFG_PACING_TYPE_FIXED	ICE_AQ_CFG_PACING_TYPE_M
 	/* External Device Power Ability */
 	u8 power_desc;
-#define ICE_AQ_PWR_CLASS_M		0x3
+#define ICE_AQ_PWR_CLASS_M		0x3F
 #define ICE_AQ_LINK_PWR_BASET_LOW_HIGH	0
 #define ICE_AQ_LINK_PWR_BASET_HIGH	1
 #define ICE_AQ_LINK_PWR_QSFP_CLASS_1	0
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index b8cc737ea261..f687d1f6b765 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -425,6 +425,7 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	li->phy_type_high = le64_to_cpu(link_data.phy_type_high);
 	*hw_media_type = ice_get_media_type(pi);
 	li->link_info = link_data.link_info;
+	li->link_cfg_err = link_data.link_cfg_err;
 	li->an_info = link_data.an_info;
 	li->ext_info = link_data.ext_info;
 	li->max_frame_size = le16_to_cpu(link_data.max_frame_size);
@@ -455,6 +456,7 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 		  (unsigned long long)li->phy_type_high);
 	ice_debug(hw, ICE_DBG_LINK, "	media_type = 0x%x\n", *hw_media_type);
 	ice_debug(hw, ICE_DBG_LINK, "	link_info = 0x%x\n", li->link_info);
+	ice_debug(hw, ICE_DBG_LINK, "	link_cfg_err = 0x%x\n", li->link_cfg_err);
 	ice_debug(hw, ICE_DBG_LINK, "	an_info = 0x%x\n", li->an_info);
 	ice_debug(hw, ICE_DBG_LINK, "	ext_info = 0x%x\n", li->ext_info);
 	ice_debug(hw, ICE_DBG_LINK, "	fec_info = 0x%x\n", li->fec_info);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 7606ded59a84..4c0412d87b1a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -864,6 +864,38 @@ static void ice_set_dflt_mib(struct ice_pf *pf)
 	kfree(lldpmib);
 }
 
+/**
+ * ice_check_module_power
+ * @pf: pointer to PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * check module power level returned by a previous call to aq_get_link_info
+ * and print error messages if module power level is not supported
+ */
+static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
+{
+	/* if module power level is supported, clear the flag */
+	if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT |
+			      ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
+		clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+		return;
+	}
+
+	/* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
+	 * above block didn't clear this bit, there's nothing to do
+	 */
+	if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
+		return;
+
+	if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
+		dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
+		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+	} else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
+		dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
+		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+	}
+}
+
 /**
  * ice_link_event - process the link event
  * @pf: PF that the link event is associated with
@@ -899,6 +931,8 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 			pi->lport, ice_stat_str(status),
 			ice_aq_str(pi->hw->adminq.sq_last_status));
 
+	ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
+
 	/* Check if the link state is up after updating link info, and treat
 	 * this event as an UP event since the link is actually UP now.
 	 */
@@ -2013,6 +2047,8 @@ static void ice_check_media_subtask(struct ice_pf *pf)
 	if (err)
 		return;
 
+	ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
+
 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
 		if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
 			ice_init_phy_user_cfg(pi);
@@ -4269,6 +4305,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	ice_init_link_dflt_override(pf->hw.port_info);
 
+	ice_check_module_power(pf, pf->hw.port_info->phy.link_info.link_cfg_err);
+
 	/* if media available, initialize PHY settings */
 	if (pf->hw.port_info->phy.link_info.link_info &
 	    ICE_AQ_MEDIA_AVAILABLE) {
@@ -6929,6 +6967,8 @@ int ice_open_internal(struct net_device *netdev)
 		return -EIO;
 	}
 
+	ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
+
 	/* Set PHY if there is media, otherwise, turn off PHY */
 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
 		clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 2e235646ede2..61ea46dd80b7 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -147,6 +147,7 @@ struct ice_link_status {
 	u16 max_frame_size;
 	u16 link_speed;
 	u16 req_speeds;
+	u8 link_cfg_err;
 	u8 lse_ena;	/* Link Status Event notification */
 	u8 link_info;
 	u8 an_info;

From a69606cde176a29f9261d96528b9a50fee8efadb Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 6 May 2021 08:40:06 -0700
Subject: [PATCH 1097/3292] ice: downgrade error print to debug print

Failing to add or remove LLDP filter doesn't seem to be a fatal
error, so downgrade the dev_err message to a dev_dbg message.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 1c636f4bb4fc..9322c09ac36c 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2239,7 +2239,7 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
 	}
 
 	if (status)
-		dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
+		dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
 			create ? "adding" : "removing", tx ? "TX" : "RX",
 			vsi->vsi_num, ice_stat_str(status));
 }

From 7e94090ae13e1ae5fe8bd3a9cd08136260bb7039 Mon Sep 17 00:00:00 2001
From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Date: Wed, 31 Mar 2021 14:17:06 -0700
Subject: [PATCH 1098/3292] ice: fix clang warning regarding
 deadcode.DeadStores

clang generates deadcode.DeadStores warnings when a variable
is used to read a value, but then that value isn't used later
in the code. Fix this warning.

Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index d9ddd0bcf65f..f60bf2f5af13 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3501,13 +3501,9 @@ static int
 ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
 		    struct ice_ring_container *rc)
 {
-	struct ice_pf *pf;
-
 	if (!rc->ring)
 		return -EINVAL;
 
-	pf = rc->ring->vsi->back;
-
 	switch (c_type) {
 	case ICE_RX_CONTAINER:
 		ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc);
@@ -3519,7 +3515,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
 		ec->tx_coalesce_usecs = rc->itr_setting;
 		break;
 	default:
-		dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type);
+		dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", c_type);
 		return -EINVAL;
 	}
 

From c858d436be8b949c368de0e079084acaff3d4aaf Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 4 Jun 2021 17:01:48 +0300
Subject: [PATCH 1099/3292] net: phy: introduce PHY_INTERFACE_MODE_REVRMII

The "reverse RMII" protocol name is a personal invention, derived from
"reverse MII".

Just like MII, RMII is an asymmetric protocol in that a PHY behaves
differently than a MAC. In the case of RMII, for example:
- the 50 MHz clock signals are either driven by the MAC or by an
  external oscillator (but never by the PHY).
- the PHY can transmit extra in-band control symbols via RXD[1:0] which
  the MAC is supposed to understand, but a PHY isn't.

The "reverse MII" protocol is not standardized either, except for this
web document:
https://www.eetimes.com/reverse-media-independent-interface-revmii-block-architecture/#

In short, it means that the Ethernet controller speaks the 4-bit data
parallel protocol from the perspective of a PHY (it acts like a PHY).
This might mean that it implements clause 22 compatible registers,
although that is optional - the important bit is that its pins can be
connected to an MII MAC and it will 'just work'.

In this discussion thread:
https://lore.kernel.org/netdev/20210201214515.cx6ivvme2tlquge2@skbuf/

we agreed that it would be an abuse of terms to use the "RevMII" name
for anything than the 4-bit parallel MII protocol. But since all the
same concepts can be applied to the 2-bit Reduced MII protocol as well,
here we are introducing a "Reverse RMII" protocol. This means: "behave
like an RMII PHY".

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ethernet-controller.yaml          | 1 +
 include/linux/phy.h                                           | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index e8f04687a3e0..d97b561003ed 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -68,6 +68,7 @@ properties:
       - tbi
       - rev-mii
       - rmii
+      - rev-rmii
 
       # RX and TX delays are added by the MAC when required
       - rgmii
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 852743f07e3e..ed332ac92e25 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -93,6 +93,7 @@ extern const int phy_10gbit_features_array[1];
  * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface
  * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface
  * @PHY_INTERFACE_MODE_RMII: Reduced Media Independent Interface
+ * @PHY_INTERFACE_MODE_REVRMII: Reduced Media Independent Interface in PHY role
  * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface
  * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay
  * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay
@@ -126,6 +127,7 @@ typedef enum {
 	PHY_INTERFACE_MODE_TBI,
 	PHY_INTERFACE_MODE_REVMII,
 	PHY_INTERFACE_MODE_RMII,
+	PHY_INTERFACE_MODE_REVRMII,
 	PHY_INTERFACE_MODE_RGMII,
 	PHY_INTERFACE_MODE_RGMII_ID,
 	PHY_INTERFACE_MODE_RGMII_RXID,
@@ -185,6 +187,8 @@ static inline const char *phy_modes(phy_interface_t interface)
 		return "rev-mii";
 	case PHY_INTERFACE_MODE_RMII:
 		return "rmii";
+	case PHY_INTERFACE_MODE_REVRMII:
+		return "rev-rmii";
 	case PHY_INTERFACE_MODE_RGMII:
 		return "rgmii";
 	case PHY_INTERFACE_MODE_RGMII_ID:

From 29afb83ac98e1c1cd9c9bb6e82e7d15354b3ae0b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 4 Jun 2021 17:01:49 +0300
Subject: [PATCH 1100/3292] net: dsa: sja1105: apply RGMII delays based on the
 fixed-link property

The sja1105 driver has an intermediate way of determining whether the
RGMII delays should be applied by the PHY or by itself: by looking at
the port role (PHY or MAC). The port can be put in the PHY role either
explicitly (sja1105,role-phy) or implicitly (fixed-link).

We want to deprecate the sja1105,role-phy property, so all that remains
is the fixed-link property. Introduce a "fixed_link" array of booleans
in the driver, and use that to determine whether RGMII delays must be
applied or not.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  1 +
 drivers/net/dsa/sja1105/sja1105_main.c | 28 +++++++++++++-------------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 867cda832e77..3c66e5945cca 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -226,6 +226,7 @@ struct sja1105_private {
 	bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS];
 	bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS];
 	phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS];
+	bool fixed_link[SJA1105_MAX_NUM_PORTS];
 	bool best_effort_vlan_filtering;
 	unsigned long learn_ena;
 	unsigned long ucast_egress_floods;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 84edd054781b..5839c1e0475a 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -799,26 +799,25 @@ static int sja1105_static_config_load(struct sja1105_private *priv,
 	return sja1105_static_config_upload(priv);
 }
 
-static int sja1105_parse_rgmii_delays(struct sja1105_private *priv,
-				      const struct sja1105_dt_port *ports)
+static int sja1105_parse_rgmii_delays(struct sja1105_private *priv)
 {
 	struct dsa_switch *ds = priv->ds;
-	int i;
+	int port;
 
-	for (i = 0; i < ds->num_ports; i++) {
-		if (ports[i].role == XMII_MAC)
+	for (port = 0; port < ds->num_ports; port++) {
+		if (!priv->fixed_link[port])
 			continue;
 
-		if (ports[i].phy_mode == PHY_INTERFACE_MODE_RGMII_RXID ||
-		    ports[i].phy_mode == PHY_INTERFACE_MODE_RGMII_ID)
-			priv->rgmii_rx_delay[i] = true;
+		if (priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_RXID ||
+		    priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_ID)
+			priv->rgmii_rx_delay[port] = true;
 
-		if (ports[i].phy_mode == PHY_INTERFACE_MODE_RGMII_TXID ||
-		    ports[i].phy_mode == PHY_INTERFACE_MODE_RGMII_ID)
-			priv->rgmii_tx_delay[i] = true;
+		if (priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_TXID ||
+		    priv->phy_mode[port] == PHY_INTERFACE_MODE_RGMII_ID)
+			priv->rgmii_tx_delay[port] = true;
 
-		if ((priv->rgmii_rx_delay[i] || priv->rgmii_tx_delay[i]) &&
-		     !priv->info->setup_rgmii_delay)
+		if ((priv->rgmii_rx_delay[port] || priv->rgmii_tx_delay[port]) &&
+		    !priv->info->setup_rgmii_delay)
 			return -EINVAL;
 	}
 	return 0;
@@ -867,6 +866,7 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv,
 			/* phy-handle is missing, but fixed-link isn't.
 			 * So it's a fixed link. Default to PHY role.
 			 */
+			priv->fixed_link[index] = true;
 			ports[index].role = XMII_PHY;
 		} else {
 			/* phy-handle present => put port in MAC role */
@@ -3021,7 +3021,7 @@ static int sja1105_setup(struct dsa_switch *ds)
 	/* Error out early if internal delays are required through DT
 	 * and we can't apply them.
 	 */
-	rc = sja1105_parse_rgmii_delays(priv, ports);
+	rc = sja1105_parse_rgmii_delays(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "RGMII delay not supported\n");
 		return rc;

From 5d645df99ac60fab5368e01f1ddf4a57fa4f719f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 4 Jun 2021 17:01:50 +0300
Subject: [PATCH 1101/3292] net: dsa: sja1105: determine PHY/MAC role from PHY
 interface type

Now that both RevMII as well as RevRMII exist, we can deprecate the
sja1105,role-mac and sja1105,role-phy properties and simply let the user
select that a port operates in MII PHY role by using
	phy-mode = "rev-mii";
or in RMII PHY role by using
	phy-mode = "rev-rmii";

There are no fixed-link MII or RMII properties in mainline device trees,
and the setup itself is fairly uncommon, so there shouldn't be risks of
breaking compatibility.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/dsa/sja1105.txt   | 37 +----------
 drivers/net/dsa/sja1105/sja1105_main.c        | 64 ++++++-------------
 2 files changed, 19 insertions(+), 82 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/dsa/sja1105.txt b/Documentation/devicetree/bindings/net/dsa/sja1105.txt
index 13fd21074d48..dcf3b2c1d26b 100644
--- a/Documentation/devicetree/bindings/net/dsa/sja1105.txt
+++ b/Documentation/devicetree/bindings/net/dsa/sja1105.txt
@@ -19,37 +19,6 @@ Required properties:
 	of support for RGMII internal delays (supported on P/Q/R/S, but not on
 	E/T).
 
-Optional properties:
-
-- sja1105,role-mac:
-- sja1105,role-phy:
-	Boolean properties that can be assigned under each port node. By
-	default (unless otherwise specified) a port is configured as MAC if it
-	is driving a PHY (phy-handle is present) or as PHY if it is PHY-less
-	(fixed-link specified, presumably because it is connected to a MAC).
-	The effect of this property (in either its implicit or explicit form)
-	is:
-	- In the case of MII or RMII it specifies whether the SJA1105 port is a
-	  clock source or sink for this interface (not applicable for RGMII
-	  where there is a Tx and an Rx clock).
-	- In the case of RGMII it affects the behavior regarding internal
-	  delays:
-	  1. If sja1105,role-mac is specified, and the phy-mode property is one
-	     of "rgmii-id", "rgmii-txid" or "rgmii-rxid", then the entity
-	     designated to apply the delay/clock skew necessary for RGMII
-	     is the PHY. The SJA1105 MAC does not apply any internal delays.
-	  2. If sja1105,role-phy is specified, and the phy-mode property is one
-	     of the above, the designated entity to apply the internal delays
-	     is the SJA1105 MAC (if hardware-supported). This is only supported
-	     by the second-generation (P/Q/R/S) hardware. On a first-generation
-	     E or T device, it is an error to specify an RGMII phy-mode other
-	     than "rgmii" for a port that is in fixed-link mode. In that case,
-	     the clock skew must either be added by the MAC at the other end of
-	     the fixed-link, or by PCB serpentine traces on the board.
-	These properties are required, for example, in the case where SJA1105
-	ports are at both ends of a MII/RMII PHY-less setup. One end would need
-	to have sja1105,role-mac, while the other sja1105,role-phy.
-
 See Documentation/devicetree/bindings/net/dsa/dsa.txt for the list of standard
 DSA required and optional properties.
 
@@ -87,7 +56,6 @@ arch/arm/boot/dts/ls1021a-tsn.dts:
 				phy-handle = <&rgmii_phy6>;
 				phy-mode = "rgmii-id";
 				reg = <0>;
-				/* Implicit "sja1105,role-mac;" */
 			};
 			port@1 {
 				/* ETH2 written on chassis */
@@ -95,7 +63,6 @@ arch/arm/boot/dts/ls1021a-tsn.dts:
 				phy-handle = <&rgmii_phy3>;
 				phy-mode = "rgmii-id";
 				reg = <1>;
-				/* Implicit "sja1105,role-mac;" */
 			};
 			port@2 {
 				/* ETH3 written on chassis */
@@ -103,7 +70,6 @@ arch/arm/boot/dts/ls1021a-tsn.dts:
 				phy-handle = <&rgmii_phy4>;
 				phy-mode = "rgmii-id";
 				reg = <2>;
-				/* Implicit "sja1105,role-mac;" */
 			};
 			port@3 {
 				/* ETH4 written on chassis */
@@ -111,14 +77,13 @@ arch/arm/boot/dts/ls1021a-tsn.dts:
 				label = "swp4";
 				phy-mode = "rgmii-id";
 				reg = <3>;
-				/* Implicit "sja1105,role-mac;" */
 			};
 			port@4 {
 				/* Internal port connected to eth2 */
 				ethernet = <&enet2>;
 				phy-mode = "rgmii";
 				reg = <4>;
-				/* Implicit "sja1105,role-phy;" */
+
 				fixed-link {
 					speed = <1000>;
 					full-duplex;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 5839c1e0475a..cbce6e90dc63 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -57,14 +57,6 @@ static bool sja1105_can_forward(struct sja1105_l2_forwarding_entry *l2_fwd,
 	return !!(l2_fwd[from].reach_port & BIT(to));
 }
 
-/* Structure used to temporarily transport device tree
- * settings into sja1105_setup
- */
-struct sja1105_dt_port {
-	phy_interface_t phy_mode;
-	sja1105_mii_role_t role;
-};
-
 static int sja1105_init_mac_settings(struct sja1105_private *priv)
 {
 	struct sja1105_mac_config_entry default_mac = {
@@ -143,8 +135,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 	return 0;
 }
 
-static int sja1105_init_mii_settings(struct sja1105_private *priv,
-				     struct sja1105_dt_port *ports)
+static int sja1105_init_mii_settings(struct sja1105_private *priv)
 {
 	struct device *dev = &priv->spidev->dev;
 	struct sja1105_xmii_params_entry *mii;
@@ -171,16 +162,24 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 	mii = table->entries;
 
 	for (i = 0; i < ds->num_ports; i++) {
+		sja1105_mii_role_t role = XMII_MAC;
+
 		if (dsa_is_unused_port(priv->ds, i))
 			continue;
 
-		switch (ports[i].phy_mode) {
+		switch (priv->phy_mode[i]) {
+		case PHY_INTERFACE_MODE_REVMII:
+			role = XMII_PHY;
+			fallthrough;
 		case PHY_INTERFACE_MODE_MII:
 			if (!priv->info->supports_mii[i])
 				goto unsupported;
 
 			mii->xmii_mode[i] = XMII_MODE_MII;
 			break;
+		case PHY_INTERFACE_MODE_REVRMII:
+			role = XMII_PHY;
+			fallthrough;
 		case PHY_INTERFACE_MODE_RMII:
 			if (!priv->info->supports_rmii[i])
 				goto unsupported;
@@ -211,24 +210,11 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 unsupported:
 		default:
 			dev_err(dev, "Unsupported PHY mode %s on port %d!\n",
-				phy_modes(ports[i].phy_mode), i);
+				phy_modes(priv->phy_mode[i]), i);
 			return -EINVAL;
 		}
 
-		/* Even though the SerDes port is able to drive SGMII autoneg
-		 * like a PHY would, from the perspective of the XMII tables,
-		 * the SGMII port should always be put in MAC mode.
-		 * Similarly, RGMII is a symmetric protocol electrically
-		 * speaking, and the 'RGMII PHY' role does not mean anything to
-		 * hardware. Just keep the 'PHY role' notation relevant to the
-		 * driver to mean 'the switch port should apply RGMII delays',
-		 * but unconditionally put the port in the MAC role.
-		 */
-		if (ports[i].phy_mode == PHY_INTERFACE_MODE_SGMII ||
-		    phy_interface_mode_is_rgmii(ports[i].phy_mode))
-			mii->phy_mac[i] = XMII_MAC;
-		else
-			mii->phy_mac[i] = ports[i].role;
+		mii->phy_mac[i] = role;
 	}
 	return 0;
 }
@@ -751,8 +737,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
 	return 0;
 }
 
-static int sja1105_static_config_load(struct sja1105_private *priv,
-				      struct sja1105_dt_port *ports)
+static int sja1105_static_config_load(struct sja1105_private *priv)
 {
 	int rc;
 
@@ -767,7 +752,7 @@ static int sja1105_static_config_load(struct sja1105_private *priv,
 	rc = sja1105_init_mac_settings(priv);
 	if (rc < 0)
 		return rc;
-	rc = sja1105_init_mii_settings(priv, ports);
+	rc = sja1105_init_mii_settings(priv);
 	if (rc < 0)
 		return rc;
 	rc = sja1105_init_static_fdb(priv);
@@ -824,7 +809,6 @@ static int sja1105_parse_rgmii_delays(struct sja1105_private *priv)
 }
 
 static int sja1105_parse_ports_node(struct sja1105_private *priv,
-				    struct sja1105_dt_port *ports,
 				    struct device_node *ports_node)
 {
 	struct device *dev = &priv->spidev->dev;
@@ -853,7 +837,6 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv,
 			of_node_put(child);
 			return -ENODEV;
 		}
-		ports[index].phy_mode = phy_mode;
 
 		phy_node = of_parse_phandle(child, "phy-handle", 0);
 		if (!phy_node) {
@@ -867,27 +850,17 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv,
 			 * So it's a fixed link. Default to PHY role.
 			 */
 			priv->fixed_link[index] = true;
-			ports[index].role = XMII_PHY;
 		} else {
-			/* phy-handle present => put port in MAC role */
-			ports[index].role = XMII_MAC;
 			of_node_put(phy_node);
 		}
 
-		/* The MAC/PHY role can be overridden with explicit bindings */
-		if (of_property_read_bool(child, "sja1105,role-mac"))
-			ports[index].role = XMII_MAC;
-		else if (of_property_read_bool(child, "sja1105,role-phy"))
-			ports[index].role = XMII_PHY;
-
 		priv->phy_mode[index] = phy_mode;
 	}
 
 	return 0;
 }
 
-static int sja1105_parse_dt(struct sja1105_private *priv,
-			    struct sja1105_dt_port *ports)
+static int sja1105_parse_dt(struct sja1105_private *priv)
 {
 	struct device *dev = &priv->spidev->dev;
 	struct device_node *switch_node = dev->of_node;
@@ -902,7 +875,7 @@ static int sja1105_parse_dt(struct sja1105_private *priv,
 		return -ENODEV;
 	}
 
-	rc = sja1105_parse_ports_node(priv, ports, ports_node);
+	rc = sja1105_parse_ports_node(priv, ports_node);
 	of_node_put(ports_node);
 
 	return rc;
@@ -3008,11 +2981,10 @@ static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = {
  */
 static int sja1105_setup(struct dsa_switch *ds)
 {
-	struct sja1105_dt_port ports[SJA1105_MAX_NUM_PORTS];
 	struct sja1105_private *priv = ds->priv;
 	int rc;
 
-	rc = sja1105_parse_dt(priv, ports);
+	rc = sja1105_parse_dt(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to parse DT: %d\n", rc);
 		return rc;
@@ -3033,7 +3005,7 @@ static int sja1105_setup(struct dsa_switch *ds)
 		return rc;
 	}
 	/* Create and send configuration down to device */
-	rc = sja1105_static_config_load(priv, ports);
+	rc = sja1105_static_config_load(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to load static config: %d\n", rc);
 		goto out_ptp_clock_unregister;

From 62568bdbe6f6293c955fbd98db15adf7ee6aca1c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 4 Jun 2021 17:01:51 +0300
Subject: [PATCH 1102/3292] dt-bindings: net: dsa: sja1105: convert to YAML
 schema

Since the sja1105 driver no longer has any custom device tree
properties, the conversion is trivial.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/dsa/nxp,sja1105.yaml         |  89 +++++++++++++
 .../devicetree/bindings/net/dsa/sja1105.txt   | 121 ------------------
 2 files changed, 89 insertions(+), 121 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
 delete mode 100644 Documentation/devicetree/bindings/net/dsa/sja1105.txt

diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
new file mode 100644
index 000000000000..d6ac9a0c1b04
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/dsa/nxp,sja1105.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP SJA1105 Automotive Ethernet Switch Family Device Tree Bindings
+
+description:
+  The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944.pdf) of at
+  least one half of t_CLK. At an SPI frequency of 1MHz, this means a minimum
+  cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed
+  depends on the SPI bus master driver.
+
+allOf:
+  - $ref: "dsa.yaml#"
+
+maintainers:
+  - Vladimir Oltean <vladimir.oltean@nxp.com>
+
+properties:
+  compatible:
+    enum:
+      - nxp,sja1105e
+      - nxp,sja1105t
+      - nxp,sja1105p
+      - nxp,sja1105q
+      - nxp,sja1105r
+      - nxp,sja1105s
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            ethernet-switch@1 {
+                    reg = <0x1>;
+                    compatible = "nxp,sja1105t";
+
+                    ethernet-ports {
+                            #address-cells = <1>;
+                            #size-cells = <0>;
+
+                            port@0 {
+                                    phy-handle = <&rgmii_phy6>;
+                                    phy-mode = "rgmii-id";
+                                    reg = <0>;
+                            };
+
+                            port@1 {
+                                    phy-handle = <&rgmii_phy3>;
+                                    phy-mode = "rgmii-id";
+                                    reg = <1>;
+                            };
+
+                            port@2 {
+                                    phy-handle = <&rgmii_phy4>;
+                                    phy-mode = "rgmii-id";
+                                    reg = <2>;
+                            };
+
+                            port@3 {
+                                    phy-mode = "rgmii-id";
+                                    reg = <3>;
+                            };
+
+                            port@4 {
+                                    ethernet = <&enet2>;
+                                    phy-mode = "rgmii";
+                                    reg = <4>;
+
+                                    fixed-link {
+                                            speed = <1000>;
+                                            full-duplex;
+                                    };
+                            };
+                    };
+            };
+    };
diff --git a/Documentation/devicetree/bindings/net/dsa/sja1105.txt b/Documentation/devicetree/bindings/net/dsa/sja1105.txt
deleted file mode 100644
index dcf3b2c1d26b..000000000000
--- a/Documentation/devicetree/bindings/net/dsa/sja1105.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-NXP SJA1105 switch driver
-=========================
-
-Required properties:
-
-- compatible:
-	Must be one of:
-	- "nxp,sja1105e"
-	- "nxp,sja1105t"
-	- "nxp,sja1105p"
-	- "nxp,sja1105q"
-	- "nxp,sja1105r"
-	- "nxp,sja1105s"
-
-	Although the device ID could be detected at runtime, explicit bindings
-	are required in order to be able to statically check their validity.
-	For example, SGMII can only be specified on port 4 of R and S devices,
-	and the non-SGMII devices, while pin-compatible, are not equal in terms
-	of support for RGMII internal delays (supported on P/Q/R/S, but not on
-	E/T).
-
-See Documentation/devicetree/bindings/net/dsa/dsa.txt for the list of standard
-DSA required and optional properties.
-
-Other observations
-------------------
-
-The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944) of at least
-one half of t_CLK. At an SPI frequency of 1MHz, this means a minimum
-cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed
-depends on the SPI bus master driver.
-
-Example
--------
-
-Ethernet switch connected via SPI to the host, CPU port wired to enet2:
-
-arch/arm/boot/dts/ls1021a-tsn.dts:
-
-/* SPI controller of the LS1021 */
-&dspi0 {
-	sja1105@1 {
-		reg = <0x1>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		compatible = "nxp,sja1105t";
-		spi-max-frequency = <4000000>;
-		fsl,spi-cs-sck-delay = <1000>;
-		fsl,spi-sck-cs-delay = <1000>;
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			port@0 {
-				/* ETH5 written on chassis */
-				label = "swp5";
-				phy-handle = <&rgmii_phy6>;
-				phy-mode = "rgmii-id";
-				reg = <0>;
-			};
-			port@1 {
-				/* ETH2 written on chassis */
-				label = "swp2";
-				phy-handle = <&rgmii_phy3>;
-				phy-mode = "rgmii-id";
-				reg = <1>;
-			};
-			port@2 {
-				/* ETH3 written on chassis */
-				label = "swp3";
-				phy-handle = <&rgmii_phy4>;
-				phy-mode = "rgmii-id";
-				reg = <2>;
-			};
-			port@3 {
-				/* ETH4 written on chassis */
-				phy-handle = <&rgmii_phy5>;
-				label = "swp4";
-				phy-mode = "rgmii-id";
-				reg = <3>;
-			};
-			port@4 {
-				/* Internal port connected to eth2 */
-				ethernet = <&enet2>;
-				phy-mode = "rgmii";
-				reg = <4>;
-
-				fixed-link {
-					speed = <1000>;
-					full-duplex;
-				};
-			};
-		};
-	};
-};
-
-/* MDIO controller of the LS1021 */
-&mdio0 {
-	/* BCM5464 */
-	rgmii_phy3: ethernet-phy@3 {
-		reg = <0x3>;
-	};
-	rgmii_phy4: ethernet-phy@4 {
-		reg = <0x4>;
-	};
-	rgmii_phy5: ethernet-phy@5 {
-		reg = <0x5>;
-	};
-	rgmii_phy6: ethernet-phy@6 {
-		reg = <0x6>;
-	};
-};
-
-/* Ethernet master port of the LS1021 */
-&enet2 {
-	phy-connection-type = "rgmii";
-	status = "ok";
-	fixed-link {
-		speed = <1000>;
-		full-duplex;
-	};
-};

From 725637a802c58ada9f64d586d4b548e04e7e3f32 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:22 +0800
Subject: [PATCH 1103/3292] net: hd64570: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index 058e48182838..e0266c687e19 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -47,7 +47,6 @@
 #define SCA_INTR_DMAC_RX(node) (node ? 0x20 : 0x02)
 #define SCA_INTR_DMAC_TX(node) (node ? 0x40 : 0x04)
 
-
 static inline struct net_device *port_to_dev(port_t *port)
 {
 	return port->dev;
@@ -87,7 +86,6 @@ static inline u16 next_desc(port_t *port, u16 desc, int transmit)
 			     : port_to_card(port)->rx_ring_buffers);
 }
 
-
 static inline u16 desc_abs_number(port_t *port, u16 desc, int transmit)
 {
 	u16 rx_buffs = port_to_card(port)->rx_ring_buffers;
@@ -98,14 +96,12 @@ static inline u16 desc_abs_number(port_t *port, u16 desc, int transmit)
 		transmit * rx_buffs + desc;
 }
 
-
 static inline u16 desc_offset(port_t *port, u16 desc, int transmit)
 {
 	/* Descriptor offset always fits in 16 bits */
 	return desc_abs_number(port, desc, transmit) * sizeof(pkt_desc);
 }
 
-
 static inline pkt_desc __iomem *desc_address(port_t *port, u16 desc,
 					     int transmit)
 {
@@ -118,14 +114,12 @@ static inline pkt_desc __iomem *desc_address(port_t *port, u16 desc,
 #endif
 }
 
-
 static inline u32 buffer_offset(port_t *port, u16 desc, int transmit)
 {
 	return port_to_card(port)->buff_offset +
 		desc_abs_number(port, desc, transmit) * (u32)HDLC_MAX_MRU;
 }
 
-
 static inline void sca_set_carrier(port_t *port)
 {
 	if (!(sca_in(get_msci(port) + ST3, port_to_card(port)) & ST3_DCD)) {
@@ -143,7 +137,6 @@ static inline void sca_set_carrier(port_t *port)
 	}
 }
 
-
 static void sca_init_port(port_t *port)
 {
 	card_t *card = port_to_card(port);
@@ -213,7 +206,6 @@ static void sca_init_port(port_t *port)
 	sca_set_carrier(port);
 }
 
-
 #ifdef NEED_SCA_MSCI_INTR
 /* MSCI interrupt service */
 static inline void sca_msci_intr(port_t *port)
@@ -236,7 +228,6 @@ static inline void sca_msci_intr(port_t *port)
 }
 #endif
 
-
 static inline void sca_rx(card_t *card, port_t *port, pkt_desc __iomem *desc,
 			  u16 rxin)
 {
@@ -282,7 +273,6 @@ static inline void sca_rx(card_t *card, port_t *port, pkt_desc __iomem *desc,
 	netif_rx(skb);
 }
 
-
 /* Receive DMA interrupt service */
 static inline void sca_rx_intr(port_t *port)
 {
@@ -334,7 +324,6 @@ static inline void sca_rx_intr(port_t *port)
 	sca_out(DSR_DE, DSR_RX(phy_node(port)), card);
 }
 
-
 /* Transmit DMA interrupt service */
 static inline void sca_tx_intr(port_t *port)
 {
@@ -370,7 +359,6 @@ static inline void sca_tx_intr(port_t *port)
 	spin_unlock(&port->lock);
 }
 
-
 static irqreturn_t sca_intr(int irq, void* dev_id)
 {
 	card_t *card = dev_id;
@@ -400,7 +388,6 @@ static irqreturn_t sca_intr(int irq, void* dev_id)
 	return IRQ_RETVAL(handled);
 }
 
-
 static void sca_set_port(port_t *port)
 {
 	card_t* card = port_to_card(port);
@@ -408,7 +395,6 @@ static void sca_set_port(port_t *port)
 	u8 md2 = sca_in(msci + MD2, card);
 	unsigned int tmc, br = 10, brv = 1024;
 
-
 	if (port->settings.clock_rate > 0) {
 		/* Try lower br for better accuracy*/
 		do {
@@ -450,10 +436,8 @@ static void sca_set_port(port_t *port)
 		md2 &= ~MD2_LOOPBACK;
 
 	sca_out(md2, msci + MD2, card);
-
 }
 
-
 static void sca_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -517,7 +501,6 @@ static void sca_open(struct net_device *dev)
 	netif_start_queue(dev);
 }
 
-
 static void sca_close(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -535,7 +518,6 @@ static void sca_close(struct net_device *dev)
 	netif_stop_queue(dev);
 }
 
-
 static int sca_attach(struct net_device *dev, unsigned short encoding,
 		      unsigned short parity)
 {
@@ -558,7 +540,6 @@ static int sca_attach(struct net_device *dev, unsigned short encoding,
 	return 0;
 }
 
-
 #ifdef DEBUG_RINGS
 static void sca_dump_rings(struct net_device *dev)
 {
@@ -613,7 +594,6 @@ static void sca_dump_rings(struct net_device *dev)
 }
 #endif /* DEBUG_RINGS */
 
-
 static netdev_tx_t sca_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -670,7 +650,6 @@ static netdev_tx_t sca_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-
 #ifdef NEED_DETECT_RAM
 static u32 sca_detect_ram(card_t *card, u8 __iomem *rambase, u32 ramsize)
 {
@@ -699,7 +678,6 @@ static u32 sca_detect_ram(card_t *card, u8 __iomem *rambase, u32 ramsize)
 }
 #endif /* NEED_DETECT_RAM */
 
-
 static void sca_init(card_t *card, int wait_states)
 {
 	sca_out(wait_states, WCRL, card); /* Wait Control */

From d364c0a93ac66325ad841b6be49e277d7c15af0c Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:23 +0800
Subject: [PATCH 1104/3292] net: hd64570: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index e0266c687e19..cca6101d68d3 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -345,6 +345,7 @@ static inline void sca_tx_intr(port_t *port)
 
 		u32 desc_off = desc_offset(port, port->txlast, 1);
 		u32 cda = sca_inw(dmac + CDAL, card);
+
 		if ((cda >= desc_off) && (cda < desc_off + sizeof(pkt_desc)))
 			break;	/* Transmitter is/will_be sending this frame */
 
@@ -371,6 +372,7 @@ static irqreturn_t sca_intr(int irq, void* dev_id)
 		handled = 1;
 		for (i = 0; i < 2; i++) {
 			port_t *port = get_port(card, i);
+
 			if (port) {
 				if (stat & SCA_INTR_MSCI(i))
 					sca_msci_intr(port);

From 1d1fa598ac198e6f40ffa67c99dbff1cc86a581f Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:24 +0800
Subject: [PATCH 1105/3292] net: hd64570: fix the code style issue about "foo*
 bar"

Fix the checkpatch error as "foo* bar" and should be "foo *bar",
and "(foo*)" should be "(foo *)".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index cca6101d68d3..6237da6df2b1 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -75,7 +75,7 @@ static inline int sca_intr_status(card_t *card)
 	return result;
 }
 
-static inline port_t* dev_to_port(struct net_device *dev)
+static inline port_t *dev_to_port(struct net_device *dev)
 {
 	return dev_to_hdlc(dev)->priv;
 }
@@ -211,7 +211,7 @@ static void sca_init_port(port_t *port)
 static inline void sca_msci_intr(port_t *port)
 {
 	u16 msci = get_msci(port);
-	card_t* card = port_to_card(port);
+	card_t *card = port_to_card(port);
 	u8 stat = sca_in(msci + ST1, card); /* read MSCI ST1 status */
 
 	/* Reset MSCI TX underrun and CDCD status bit */
@@ -329,7 +329,7 @@ static inline void sca_tx_intr(port_t *port)
 {
 	struct net_device *dev = port_to_dev(port);
 	u16 dmac = get_dmac_tx(port);
-	card_t* card = port_to_card(port);
+	card_t *card = port_to_card(port);
 	u8 stat;
 
 	spin_lock(&port->lock);
@@ -360,7 +360,7 @@ static inline void sca_tx_intr(port_t *port)
 	spin_unlock(&port->lock);
 }
 
-static irqreturn_t sca_intr(int irq, void* dev_id)
+static irqreturn_t sca_intr(int irq, void *dev_id)
 {
 	card_t *card = dev_id;
 	int i;
@@ -392,7 +392,7 @@ static irqreturn_t sca_intr(int irq, void* dev_id)
 
 static void sca_set_port(port_t *port)
 {
-	card_t* card = port_to_card(port);
+	card_t *card = port_to_card(port);
 	u16 msci = get_msci(port);
 	u8 md2 = sca_in(msci + MD2, card);
 	unsigned int tmc, br = 10, brv = 1024;
@@ -443,7 +443,7 @@ static void sca_set_port(port_t *port)
 static void sca_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
-	card_t* card = port_to_card(port);
+	card_t *card = port_to_card(port);
 	u16 msci = get_msci(port);
 	u8 md0, md2;
 
@@ -506,7 +506,7 @@ static void sca_open(struct net_device *dev)
 static void sca_close(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
-	card_t* card = port_to_card(port);
+	card_t *card = port_to_card(port);
 
 	/* reset channel */
 	sca_out(CMD_RESET, get_msci(port) + CMD, port_to_card(port));

From bc94e642e4bd37fa84405d7893e09b2cbf6b9af0 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:25 +0800
Subject: [PATCH 1106/3292] net: hd64570: fix the code style issue about
 trailing statements

Trailing statements should be on next line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 60 +++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index 6237da6df2b1..f02cce05736a 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -58,12 +58,18 @@ static inline int sca_intr_status(card_t *card)
 	u8 isr0 = sca_in(ISR0, card);
 	u8 isr1 = sca_in(ISR1, card);
 
-	if (isr1 & 0x03) result |= SCA_INTR_DMAC_RX(0);
-	if (isr1 & 0x0C) result |= SCA_INTR_DMAC_TX(0);
-	if (isr1 & 0x30) result |= SCA_INTR_DMAC_RX(1);
-	if (isr1 & 0xC0) result |= SCA_INTR_DMAC_TX(1);
-	if (isr0 & 0x0F) result |= SCA_INTR_MSCI(0);
-	if (isr0 & 0xF0) result |= SCA_INTR_MSCI(1);
+	if (isr1 & 0x03)
+		result |= SCA_INTR_DMAC_RX(0);
+	if (isr1 & 0x0C)
+		result |= SCA_INTR_DMAC_TX(0);
+	if (isr1 & 0x30)
+		result |= SCA_INTR_DMAC_RX(1);
+	if (isr1 & 0xC0)
+		result |= SCA_INTR_DMAC_TX(1);
+	if (isr0 & 0x0F)
+		result |= SCA_INTR_MSCI(0);
+	if (isr0 & 0xF0)
+		result |= SCA_INTR_MSCI(1);
 
 	if (!(result & SCA_INTR_DMAC_TX(0)))
 		if (sca_in(DSR_TX(0), card) & DSR_EOM)
@@ -447,23 +453,41 @@ static void sca_open(struct net_device *dev)
 	u16 msci = get_msci(port);
 	u8 md0, md2;
 
-	switch(port->encoding) {
-	case ENCODING_NRZ:	md2 = MD2_NRZ;		break;
-	case ENCODING_NRZI:	md2 = MD2_NRZI;		break;
-	case ENCODING_FM_MARK:	md2 = MD2_FM_MARK;	break;
-	case ENCODING_FM_SPACE:	md2 = MD2_FM_SPACE;	break;
-	default:		md2 = MD2_MANCHESTER;
+	switch (port->encoding) {
+	case ENCODING_NRZ:
+		md2 = MD2_NRZ;
+		break;
+	case ENCODING_NRZI:
+		md2 = MD2_NRZI;
+		break;
+	case ENCODING_FM_MARK:
+		md2 = MD2_FM_MARK;
+		break;
+	case ENCODING_FM_SPACE:
+		md2 = MD2_FM_SPACE;
+		break;
+	default:
+		md2 = MD2_MANCHESTER;
 	}
 
 	if (port->settings.loopback)
 		md2 |= MD2_LOOPBACK;
 
-	switch(port->parity) {
-	case PARITY_CRC16_PR0:	     md0 = MD0_HDLC | MD0_CRC_16_0;  break;
-	case PARITY_CRC16_PR1:	     md0 = MD0_HDLC | MD0_CRC_16;    break;
-	case PARITY_CRC16_PR0_CCITT: md0 = MD0_HDLC | MD0_CRC_ITU_0; break;
-	case PARITY_CRC16_PR1_CCITT: md0 = MD0_HDLC | MD0_CRC_ITU;   break;
-	default:		     md0 = MD0_HDLC | MD0_CRC_NONE;
+	switch (port->parity) {
+	case PARITY_CRC16_PR0:
+		md0 = MD0_HDLC | MD0_CRC_16_0;
+		break;
+	case PARITY_CRC16_PR1:
+		md0 = MD0_HDLC | MD0_CRC_16;
+		break;
+	case PARITY_CRC16_PR0_CCITT:
+		md0 = MD0_HDLC | MD0_CRC_ITU_0;
+		break;
+	case PARITY_CRC16_PR1_CCITT:
+		md0 = MD0_HDLC | MD0_CRC_ITU;
+		break;
+	default:
+		md0 = MD0_HDLC | MD0_CRC_NONE;
 	}
 
 	sca_out(CMD_RESET, msci + CMD, card);

From 3f8b8db695fe79aa0ab007fac1f414e74ccd7146 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:26 +0800
Subject: [PATCH 1107/3292] net: hd64570: add braces {} to all arms of the
 statement

Braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index f02cce05736a..0297fbe8b938 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -262,8 +262,9 @@ static inline void sca_rx(card_t *card, port_t *port, pkt_desc __iomem *desc,
 		memcpy_fromio(skb->data, winbase(card) + buff, maxlen);
 		openwin(card, page + 1);
 		memcpy_fromio(skb->data + maxlen, winbase(card), len - maxlen);
-	} else
+	} else {
 		memcpy_fromio(skb->data, winbase(card) + buff, len);
+	}
 
 #ifndef PAGE0_ALWAYS_MAPPED
 	openwin(card, 0);	/* select pkt_desc table page back */
@@ -318,8 +319,9 @@ static inline void sca_rx_intr(port_t *port)
 				dev->stats.rx_crc_errors++;
 			if (stat & ST_RX_EOM)
 				port->rxpart = 0; /* received last fragment */
-		} else
+		} else {
 			sca_rx(card, port, desc, port->rxin);
+		}
 
 		/* Set new error descriptor address */
 		sca_outw(desc_off, dmac + EDAL, card);
@@ -417,8 +419,9 @@ static void sca_set_port(port_t *port)
 			tmc = 1;
 			br = 0;	/* For baud=CLOCK_BASE we use tmc=1 br=0 */
 			brv = 1;
-		} else if (tmc > 255)
+		} else if (tmc > 255) {
 			tmc = 256; /* tmc=0 means 256 - low baud rates */
+		}
 
 		port->settings.clock_rate = CLOCK_BASE / brv / tmc;
 	} else {
@@ -651,8 +654,9 @@ static netdev_tx_t sca_xmit(struct sk_buff *skb, struct net_device *dev)
 		memcpy_toio(winbase(card) + buff, skb->data, maxlen);
 		openwin(card, page + 1);
 		memcpy_toio(winbase(card), skb->data + maxlen, len - maxlen);
-	} else
+	} else {
 		memcpy_toio(winbase(card) + buff, skb->data, len);
+	}
 
 #ifndef PAGE0_ALWAYS_MAPPED
 	openwin(card, 0);	/* select pkt_desc table page back */

From 53da5342c51a6cfe10e760629cf94ae382a0d02b Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:27 +0800
Subject: [PATCH 1108/3292] net: hd64570: fix the comments style issue

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index 0297fbe8b938..c6605ea6c50c 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -507,9 +507,9 @@ static void sca_open(struct net_device *dev)
 	sca_out(0x14, msci + TRC1, card); /* +1=TXRDY/DMA deactiv condition */
 
 /* We're using the following interrupts:
-   - TXINT (DMAC completed all transmisions, underrun or DCD change)
-   - all DMA interrupts
-*/
+ * - TXINT (DMAC completed all transmisions, underrun or DCD change)
+ * - all DMA interrupts
+ */
 	sca_set_carrier(port);
 
 	/* MSCI TX INT and RX INT A IRQ enable */

From cb625e9c5d48e4a717653febaac6e32bda8a63f5 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:28 +0800
Subject: [PATCH 1109/3292] net: hd64570: remove redundant parentheses

Remove redundant parentheses around 'cda >= desc_off'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index c6605ea6c50c..8af647c7b0c5 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -301,7 +301,7 @@ static inline void sca_rx_intr(port_t *port)
 		pkt_desc __iomem *desc;
 		u32 cda = sca_inw(dmac + CDAL, card);
 
-		if ((cda >= desc_off) && (cda < desc_off + sizeof(pkt_desc)))
+		if (cda >= desc_off && (cda < desc_off + sizeof(pkt_desc)))
 			break;	/* No frame received */
 
 		desc = desc_address(port, port->rxin, 0);
@@ -354,7 +354,7 @@ static inline void sca_tx_intr(port_t *port)
 		u32 desc_off = desc_offset(port, port->txlast, 1);
 		u32 cda = sca_inw(dmac + CDAL, card);
 
-		if ((cda >= desc_off) && (cda < desc_off + sizeof(pkt_desc)))
+		if (cda >= desc_off && (cda < desc_off + sizeof(pkt_desc)))
 			break;	/* Transmitter is/will_be sending this frame */
 
 		desc = desc_address(port, port->txlast, 1);

From 0f1e7a34c053fd7b9a5ffa1b45cedf40a855c26e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 5 Jun 2021 15:00:29 +0800
Subject: [PATCH 1110/3292] net: hd64570: add some required spaces

Add space required before the open parenthesis '('.
Add space required after that close brace '}'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hd64570.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index 8af647c7b0c5..0d19e39fec86 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -376,7 +376,7 @@ static irqreturn_t sca_intr(int irq, void *dev_id)
 	int handled = 0;
 	u8 page = sca_get_page(card);
 
-	while((stat = sca_intr_status(card)) != 0) {
+	while ((stat = sca_intr_status(card)) != 0) {
 		handled = 1;
 		for (i = 0; i < 2; i++) {
 			port_t *port = get_port(card, i);
@@ -413,7 +413,7 @@ static void sca_set_port(port_t *port)
 
 			/* Baud Rate = CLOCK_BASE / TMC / 2^BR */
 			tmc = CLOCK_BASE / brv / port->settings.clock_rate;
-		}while (br > 1 && tmc <= 128);
+		} while (br > 1 && tmc <= 128);
 
 		if (tmc < 1) {
 			tmc = 1;

From 4d7efa73fa268bba3b309988dd2d4c3787a17ddf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:33 +0800
Subject: [PATCH 1111/3292] sch_htb: fix doc warning in htb_add_to_wait_tree()

Add description for parameters of htb_add_to_wait_tree() to fix
gcc W=1 warnings:

net/sched/sch_htb.c:308: warning: Function parameter or member 'q' not described in 'htb_add_to_wait_tree'
net/sched/sch_htb.c:308: warning: Function parameter or member 'cl' not described in 'htb_add_to_wait_tree'
net/sched/sch_htb.c:308: warning: Function parameter or member 'delay' not described in 'htb_add_to_wait_tree'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4f9304567dcc..4eeef342c3c6 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -301,6 +301,9 @@ static void htb_add_to_id_tree(struct rb_root *root,
 
 /**
  * htb_add_to_wait_tree - adds class to the event queue with delay
+ * @q: the priority event queue
+ * @cl: the class to add
+ * @delay: delay in microseconds
  *
  * The class is added to priority event queue to indicate that class will
  * change its mode in cl->pq_key microseconds. Make sure that class is not

From 274e5d0e55aa37f2bcee42f618b1923cab0ceabf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:34 +0800
Subject: [PATCH 1112/3292] sch_htb: fix doc warning in htb_next_rb_node()

Add description for parameters of htb_next_rb_node() to fix
gcc W=1 warnings:

net/sched/sch_htb.c:339: warning: Function parameter or member 'n' not described in 'htb_next_rb_node'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4eeef342c3c6..5ad28df6b18c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -337,6 +337,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
 
 /**
  * htb_next_rb_node - finds next node in binary tree
+ * @n: the current node in binary tree
  *
  * When we are past last key we return NULL.
  * Average complexity is 2 steps per call.

From 996bccc39afba3db1309b8cd845f1f463516050e Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:35 +0800
Subject: [PATCH 1113/3292] sch_htb: fix doc warning in htb_add_class_to_row()

Add description for parameters of htb_add_class_to_row() to fix
gcc W=1 warnings:

net/sched/sch_htb.c:351: warning: Function parameter or member 'q' not described in 'htb_add_class_to_row'
net/sched/sch_htb.c:351: warning: Function parameter or member 'cl' not described in 'htb_add_class_to_row'
net/sched/sch_htb.c:351: warning: Function parameter or member 'mask' not described in 'htb_add_class_to_row'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5ad28df6b18c..97a9df42849e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -349,6 +349,9 @@ static inline void htb_next_rb_node(struct rb_node **n)
 
 /**
  * htb_add_class_to_row - add class to its row
+ * @q: the priority event queue
+ * @cl: the class to add
+ * @mask: the given priorities in class in bitmap
  *
  * The class is added to row at priorities marked in mask.
  * It does nothing if mask == 0.

From 5f8c6d05f3900a586fc3e7947423cd3b8aafcc33 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:36 +0800
Subject: [PATCH 1114/3292] sch_htb: fix doc warning in
 htb_remove_class_from_row()

Add description for parameters of htb_remove_class_from_row()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:380: warning: Function parameter or member 'q' not described in 'htb_remove_class_from_row'
net/sched/sch_htb.c:380: warning: Function parameter or member 'cl' not described in 'htb_remove_class_from_row'
net/sched/sch_htb.c:380: warning: Function parameter or member 'mask' not described in 'htb_remove_class_from_row'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 97a9df42849e..30a53db7eeb6 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -381,6 +381,9 @@ static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 
 /**
  * htb_remove_class_from_row - removes class from its row
+ * @q: the priority event queue
+ * @cl: the class to add
+ * @mask: the given priorities in class in bitmap
  *
  * The class is removed from row at priorities marked in mask.
  * It does nothing if mask == 0.

From 876b5fc0c0fb879d42736a6903af23a9ef6b985a Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:37 +0800
Subject: [PATCH 1115/3292] sch_htb: fix doc warning in htb_activate_prios()

Add description for parameters of htb_activate_prios()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:407: warning: Function parameter or member 'q' not described in 'htb_activate_prios'
net/sched/sch_htb.c:407: warning: Function parameter or member 'cl' not described in 'htb_activate_prios'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 30a53db7eeb6..06f1b4ee88e2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -411,6 +411,8 @@ static inline void htb_remove_class_from_row(struct htb_sched *q,
 
 /**
  * htb_activate_prios - creates active classe's feed chain
+ * @q: the priority event queue
+ * @cl: the class to activate
  *
  * The class is connected to ancestors and/or appropriate rows
  * for priorities it is participating on. cl->cmode must be new

From 4113be2020a82d503a63917d12860766048182eb Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:38 +0800
Subject: [PATCH 1116/3292] sch_htb: fix doc warning in htb_deactivate_prios()

Add description for parameters of htb_deactivate_prios()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:442: warning: Function parameter or member 'q' not described in 'htb_deactivate_prios'
net/sched/sch_htb.c:442: warning: Function parameter or member 'cl' not described in 'htb_deactivate_prios'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 06f1b4ee88e2..869e59be0993 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -448,6 +448,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 
 /**
  * htb_deactivate_prios - remove class from feed chain
+ * @q: the priority event queue
+ * @cl: the class to deactivate
  *
  * cl->cmode must represent old mode (before deactivation). It does
  * nothing if cl->prio_activity == 0. Class is removed from all feed

From 1e9559527a9d8cc061e884f6b237754d06711335 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:39 +0800
Subject: [PATCH 1117/3292] sch_htb: fix doc warning in htb_class_mode()

Add description for parameters of htb_class_mode()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:507: warning: Function parameter or member 'cl' not described in 'htb_class_mode'
net/sched/sch_htb.c:507: warning: Function parameter or member 'diff' not described in 'htb_class_mode'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 869e59be0993..062ddf88ce22 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -510,6 +510,8 @@ static inline s64 htb_hiwater(const struct htb_class *cl)
 
 /**
  * htb_class_mode - computes and returns current class mode
+ * @cl: the target class
+ * @diff: diff time in microseconds
  *
  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference

From 4b479e9883ce4d99ffd4eeffc61b6ef70e06ee4f Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:40 +0800
Subject: [PATCH 1118/3292] sch_htb: fix doc warning in htb_change_class_mode()

Add description for parameters of htb_change_class_mode()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:533: warning: Function parameter or member 'q' not described in 'htb_change_class_mode'
net/sched/sch_htb.c:533: warning: Function parameter or member 'cl' not described in 'htb_change_class_mode'
net/sched/sch_htb.c:533: warning: Function parameter or member 'diff' not described in 'htb_change_class_mode'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 062ddf88ce22..875ef6cd2ce0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -540,6 +540,9 @@ htb_class_mode(struct htb_class *cl, s64 *diff)
 
 /**
  * htb_change_class_mode - changes classe's mode
+ * @q: the priority event queue
+ * @cl: the target class
+ * @diff: diff time in microseconds
  *
  * This should be the only way how to change classe's mode under normal
  * circumstances. Routine will update feed lists linkage, change mode

From 8df7e8fff8da0feb3d7d686f7992e323f0cc464a Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:41 +0800
Subject: [PATCH 1119/3292] sch_htb: fix doc warning in htb_activate()

Add description for parameters of htb_activate()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:562: warning: Function parameter or member 'q' not described in 'htb_activate'
net/sched/sch_htb.c:562: warning: Function parameter or member 'cl' not described in 'htb_activate'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 875ef6cd2ce0..1ee47de6f72c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -575,6 +575,8 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 
 /**
  * htb_activate - inserts leaf cl into appropriate active feeds
+ * @q: the priority event queue
+ * @cl: the target class
  *
  * Routine learns (new) priority of leaf and activates feed chain
  * for the prio. It can be called on already active leaf safely.

From 9a034f25e4721c0f021c33ca0788c7dc13bed290 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:42 +0800
Subject: [PATCH 1120/3292] sch_htb: fix doc warning in htb_deactivate()

Add description for parameters of htb_deactivate()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:578: warning: Function parameter or member 'q' not described in 'htb_deactivate'
net/sched/sch_htb.c:578: warning: Function parameter or member 'cl' not described in 'htb_deactivate'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 1ee47de6f72c..9d4c5370257d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -594,6 +594,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 
 /**
  * htb_deactivate - remove leaf cl from active feeds
+ * @q: the priority event queue
+ * @cl: the target class
  *
  * Make sure that leaf is active. In the other words it can't be called
  * with non-active leaf. It also removes class from the drop list.

From 0e5c90848a28edc726c6badf6875790830c9428c Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:43 +0800
Subject: [PATCH 1121/3292] sch_htb: fix doc warning in htb_charge_class()

Add description for parameters of htb_charge_class()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:663: warning: Function parameter or member 'q' not described in 'htb_charge_class'
net/sched/sch_htb.c:663: warning: Function parameter or member 'cl' not described in 'htb_charge_class'
net/sched/sch_htb.c:663: warning: Function parameter or member 'level' not described in 'htb_charge_class'
net/sched/sch_htb.c:663: warning: Function parameter or member 'skb' not described in 'htb_charge_class'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 9d4c5370257d..a6cd3f18ff87 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -675,6 +675,10 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 
 /**
  * htb_charge_class - charges amount "bytes" to leaf and ancestors
+ * @q: the priority event queue
+ * @cl: the class to start iterate
+ * @level: the minimum level to account
+ * @skb: the socket buffer
  *
  * Routine assumes that packet "bytes" long was dequeued from leaf cl
  * borrowing from "level". It accounts bytes to ceil leaky bucket for

From 2c3ee53ea663a7aff97271278efb19d543e0fbe9 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:44 +0800
Subject: [PATCH 1122/3292] sch_htb: fix doc warning in htb_do_events()

Add description for parameters of htb_do_events()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:708: warning: Function parameter or member 'q' not described in 'htb_do_events'
net/sched/sch_htb.c:708: warning: Function parameter or member 'level' not described in 'htb_do_events'
net/sched/sch_htb.c:708: warning: Function parameter or member 'start' not described in 'htb_do_events'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index a6cd3f18ff87..66c330244b9d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -728,6 +728,9 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 
 /**
  * htb_do_events - make mode changes to classes at the level
+ * @q: the priority event queue
+ * @level: which wait_pq in 'q->hlevel'
+ * @start: start jiffies
  *
  * Scans event queue for pending events and applies them. Returns time of
  * next pending event (0 for no event in pq, q->now for too many events).

From 9977d6f56bacc9784654be4d0f4d27b368f57f5b Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 5 Jun 2021 18:18:45 +0800
Subject: [PATCH 1123/3292] sch_htb: fix doc warning in htb_lookup_leaf()

Add description for parameters of htb_lookup_leaf()
to fix gcc W=1 warnings:

net/sched/sch_htb.c:773: warning: Function parameter or member 'hprio' not described in 'htb_lookup_leaf'
net/sched/sch_htb.c:773: warning: Function parameter or member 'prio' not described in 'htb_lookup_leaf'

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 66c330244b9d..7a69e4e608c3 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -799,6 +799,8 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 
 /**
  * htb_lookup_leaf - returns next leaf class in DRR order
+ * @hprio: the current one
+ * @prio: which prio in class
  *
  * Find leaf where current feed pointers points to.
  */

From d402af20315c99d85c9310d6f7a00e5aca53e192 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 5 Jun 2021 20:21:27 +0800
Subject: [PATCH 1124/3292] net: lantiq: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 36dc3e5f6218..27df06ed355e 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -436,7 +436,6 @@ static int xrx200_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
-	struct resource *res;
 	struct xrx200_priv *priv;
 	struct net_device *net_dev;
 	int err;
@@ -456,13 +455,7 @@ static int xrx200_probe(struct platform_device *pdev)
 	net_dev->max_mtu = XRX200_DMA_DATA_LEN;
 
 	/* load the memory ranges */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "failed to get resources\n");
-		return -ENOENT;
-	}
-
-	priv->pmac_reg = devm_ioremap_resource(dev, res);
+	priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(priv->pmac_reg))
 		return PTR_ERR(priv->pmac_reg);
 

From ec89c2b55dc798096c5c16af4ee0094ff6c8cb3b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 5 Jun 2021 20:25:15 +0800
Subject: [PATCH 1125/3292] net: ethernet: ixp4xx_eth: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xscale/ixp4xx_eth.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 1ecceeb9700d..85c66af9e56d 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1425,7 +1425,6 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	struct eth_plat_info *plat;
 	struct net_device *ndev;
-	struct resource *res;
 	struct port *port;
 	int err;
 
@@ -1482,10 +1481,7 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	port->id = plat->npe;
 
 	/* Get the port resource and remap */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-	port->regs = devm_ioremap_resource(dev, res);
+	port->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(port->regs))
 		return PTR_ERR(port->regs);
 

From 85eb1389458d134bdb75dad502cc026c3753a619 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Sat, 5 Jun 2021 11:31:00 -0400
Subject: [PATCH 1126/3292] virtio_net: Remove BUG() to avoid machine dead

We should not directly BUG() when there is hdr error, it is
better to output a print when such error happens. Currently,
the caller of xmit_skb() already did it.

Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f8f5b8646a43..0416a7e00914 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1636,7 +1636,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 	if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
 				    virtio_is_little_endian(vi->vdev), false,
 				    0))
-		BUG();
+		return -EPROTO;
 
 	if (vi->mergeable_rx_bufs)
 		hdr->num_buffers = 0;

From cda9de0b8daf2ebfc07d385ef0039fd7860ddf25 Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Mon, 7 Jun 2021 10:37:41 +0800
Subject: [PATCH 1127/3292] pktgen: add pktgen_handle_all_threads() for the
 same code

The pktgen_{run, reset, stop}_all_threads() has the same code,
so add pktgen_handle_all_threads() for it.

Signed-off-by: Yejune Deng <yejune.deng@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3fba429f1f57..7e258d255e90 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -467,7 +467,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
 static void pktgen_run_all_threads(struct pktgen_net *pn);
 static void pktgen_reset_all_threads(struct pktgen_net *pn);
-static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
+static void pktgen_stop_all_threads(struct pktgen_net *pn);
 
 static void pktgen_stop(struct pktgen_thread *t);
 static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
@@ -516,14 +516,11 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
 	data[count - 1] = 0;	/* Strip trailing '\n' and terminate string */
 
 	if (!strcmp(data, "stop"))
-		pktgen_stop_all_threads_ifs(pn);
-
+		pktgen_stop_all_threads(pn);
 	else if (!strcmp(data, "start"))
 		pktgen_run_all_threads(pn);
-
 	else if (!strcmp(data, "reset"))
 		pktgen_reset_all_threads(pn);
-
 	else
 		return -EINVAL;
 
@@ -3027,20 +3024,25 @@ static void pktgen_run(struct pktgen_thread *t)
 		t->control &= ~(T_STOP);
 }
 
-static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
+static void pktgen_handle_all_threads(struct pktgen_net *pn, u32 flags)
 {
 	struct pktgen_thread *t;
 
-	func_enter();
-
 	mutex_lock(&pktgen_thread_lock);
 
 	list_for_each_entry(t, &pn->pktgen_threads, th_list)
-		t->control |= T_STOP;
+		t->control |= (flags);
 
 	mutex_unlock(&pktgen_thread_lock);
 }
 
+static void pktgen_stop_all_threads(struct pktgen_net *pn)
+{
+	func_enter();
+
+	pktgen_handle_all_threads(pn, T_STOP);
+}
+
 static int thread_is_running(const struct pktgen_thread *t)
 {
 	const struct pktgen_dev *pkt_dev;
@@ -3103,16 +3105,9 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
 
 static void pktgen_run_all_threads(struct pktgen_net *pn)
 {
-	struct pktgen_thread *t;
-
 	func_enter();
 
-	mutex_lock(&pktgen_thread_lock);
-
-	list_for_each_entry(t, &pn->pktgen_threads, th_list)
-		t->control |= (T_RUN);
-
-	mutex_unlock(&pktgen_thread_lock);
+	pktgen_handle_all_threads(pn, T_RUN);
 
 	/* Propagate thread->control  */
 	schedule_timeout_interruptible(msecs_to_jiffies(125));
@@ -3122,16 +3117,9 @@ static void pktgen_run_all_threads(struct pktgen_net *pn)
 
 static void pktgen_reset_all_threads(struct pktgen_net *pn)
 {
-	struct pktgen_thread *t;
-
 	func_enter();
 
-	mutex_lock(&pktgen_thread_lock);
-
-	list_for_each_entry(t, &pn->pktgen_threads, th_list)
-		t->control |= (T_REMDEVALL);
-
-	mutex_unlock(&pktgen_thread_lock);
+	pktgen_handle_all_threads(pn, T_REMDEVALL);
 
 	/* Propagate thread->control  */
 	schedule_timeout_interruptible(msecs_to_jiffies(125));

From 3f07ce8e528746d477cfb301f4dc7b197ad1e2a3 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Mon, 7 Jun 2021 10:57:09 +0800
Subject: [PATCH 1128/3292] net: dsa: hellcreek: Use is_zero_ether_addr()
 instead of memcmp()

Using is_zero_ether_addr() instead of directly use
memcmp() to determine if the ethernet address is all
zeros.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 4d78219da253..9fdcc4bde480 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -927,7 +927,6 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
 
 	/* Read table */
 	for (i = 0; i < hellcreek->fdb_entries; ++i) {
-		unsigned char null_addr[ETH_ALEN] = { 0 };
 		struct hellcreek_fdb_entry entry = { 0 };
 
 		/* Read entry */
@@ -937,7 +936,7 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
 		hellcreek_write(hellcreek, 0x00, HR_FDBRDH);
 
 		/* Check valid */
-		if (!memcmp(entry.mac, null_addr, ETH_ALEN))
+		if (is_zero_ether_addr(entry.mac))
 			continue;
 
 		/* Check port mask */

From 4fb473fe7325181f87d586685d21f27a9b9e25f8 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Sun, 6 Jun 2021 23:33:07 -0700
Subject: [PATCH 1129/3292] atm: [br2864] fix spelling mistakes

interrupt should be changed to interrupting.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 3e17a5ecaa94..dd2a8dabed84 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -93,8 +93,8 @@ struct br2684_dev {
  * This lock should be held for writing any time the list of devices or
  * their attached vcc's could be altered.  It should be held for reading
  * any time these are being queried.  Note that we sometimes need to
- * do read-locking under interrupt context, so write locking must block
- * the current CPU's interrupts
+ * do read-locking under interrupting context, so write locking must block
+ * the current CPU's interrupts.
  */
 static DEFINE_RWLOCK(devs_lock);
 

From ef91f7981036a293ee1fa1cd2f670702a3889f4b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 16:11:45 +0800
Subject: [PATCH 1130/3292] net: gemini: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cortina/gemini.c | 34 +++++++--------------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 8df6f081f244..c2ebb3388789 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2356,8 +2356,6 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct gemini_ethernet *geth;
 	struct net_device *netdev;
-	struct resource *gmacres;
-	struct resource *dmares;
 	struct device *parent;
 	unsigned int id;
 	int irq;
@@ -2390,24 +2388,18 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 	port->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
 	/* DMA memory */
-	dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!dmares) {
-		dev_err(dev, "no DMA resource\n");
-		return -ENODEV;
-	}
-	port->dma_base = devm_ioremap_resource(dev, dmares);
-	if (IS_ERR(port->dma_base))
+	port->dma_base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+	if (IS_ERR(port->dma_base)) {
+		dev_err(dev, "get DMA address failed\n");
 		return PTR_ERR(port->dma_base);
+	}
 
 	/* GMAC config memory */
-	gmacres = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!gmacres) {
-		dev_err(dev, "no GMAC resource\n");
-		return -ENODEV;
-	}
-	port->gmac_base = devm_ioremap_resource(dev, gmacres);
-	if (IS_ERR(port->gmac_base))
+	port->gmac_base = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
+	if (IS_ERR(port->gmac_base)) {
+		dev_err(dev, "get GMAC address failed\n");
 		return PTR_ERR(port->gmac_base);
+	}
 
 	/* Interrupt */
 	irq = platform_get_irq(pdev, 0);
@@ -2502,10 +2494,6 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 	if (ret)
 		goto unprepare;
 
-	netdev_info(netdev,
-		    "irq %d, DMA @ 0x%pap, GMAC @ 0x%pap\n",
-		    port->irq, &dmares->start,
-		    &gmacres->start);
 	return 0;
 
 unprepare:
@@ -2544,17 +2532,13 @@ static int gemini_ethernet_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct gemini_ethernet *geth;
 	unsigned int retry = 5;
-	struct resource *res;
 	u32 val;
 
 	/* Global registers */
 	geth = devm_kzalloc(dev, sizeof(*geth), GFP_KERNEL);
 	if (!geth)
 		return -ENOMEM;
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-	geth->base = devm_ioremap_resource(dev, res);
+	geth->base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(geth->base))
 		return PTR_ERR(geth->base);
 	geth->dev = dev;

From 218d154f540a58b82394e128e425560181c1662e Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:20 +0200
Subject: [PATCH 1131/3292] net: usb: asix: ax88772_bind: use devm_kzalloc()
 instead of kzalloc()

Make resource management easier, use devm_kzalloc().

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 19a8fafb8f04..5f767a33264e 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -746,11 +746,11 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 		dev->rx_urb_size = 2048;
 	}
 
-	dev->driver_priv = kzalloc(sizeof(struct asix_common_private), GFP_KERNEL);
-	if (!dev->driver_priv)
+	priv = devm_kzalloc(&dev->udev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
-	priv = dev->driver_priv;
+	dev->driver_priv = priv;
 
 	priv->presvd_phy_bmcr = 0;
 	priv->presvd_phy_advertise = 0;
@@ -768,7 +768,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	asix_rx_fixup_common_free(dev->driver_priv);
-	kfree(dev->driver_priv);
 }
 
 static const struct ethtool_ops ax88178_ethtool_ops = {

From 7e88b11a862afe59ee0c365123ea5fb96a26cb3b Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:21 +0200
Subject: [PATCH 1132/3292] net: usb: asix: refactor asix_read_phy_addr() and
 handle errors on return

Refactor asix_read_phy_addr() to return usable error value directly and
make sure all callers handle this error.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix.h         |  3 +--
 drivers/net/usb/asix_common.c  | 33 +++++++++++++++++----------------
 drivers/net/usb/asix_devices.c | 15 ++++++++++++---
 drivers/net/usb/ax88172a.c     |  5 +++++
 4 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index 3b53685301de..edb94efd265e 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -205,8 +205,7 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 int asix_set_sw_mii(struct usbnet *dev, int in_pm);
 int asix_set_hw_mii(struct usbnet *dev, int in_pm);
 
-int asix_read_phy_addr(struct usbnet *dev, int internal);
-int asix_get_phy_addr(struct usbnet *dev);
+int asix_read_phy_addr(struct usbnet *dev, bool internal);
 
 int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm);
 
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 7bc6e8f856fe..e1109f1a8dd5 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -288,33 +288,34 @@ int asix_set_hw_mii(struct usbnet *dev, int in_pm)
 	return ret;
 }
 
-int asix_read_phy_addr(struct usbnet *dev, int internal)
+int asix_read_phy_addr(struct usbnet *dev, bool internal)
 {
-	int offset = (internal ? 1 : 0);
+	int ret, offset;
 	u8 buf[2];
-	int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0);
 
-	netdev_dbg(dev->net, "asix_get_phy_addr()\n");
+	ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0);
+	if (ret < 0)
+		goto error;
 
 	if (ret < 2) {
-		netdev_err(dev->net, "Error reading PHYID register: %02x\n", ret);
-		goto out;
+		ret = -EIO;
+		goto error;
 	}
-	netdev_dbg(dev->net, "asix_get_phy_addr() returning 0x%04x\n",
-		   *((__le16 *)buf));
+
+	offset = (internal ? 1 : 0);
 	ret = buf[offset];
 
-out:
+	netdev_dbg(dev->net, "%s PHY address 0x%x\n",
+		   internal ? "internal" : "external", ret);
+
+	return ret;
+
+error:
+	netdev_err(dev->net, "Error reading PHY_ID register: %02x\n", ret);
+
 	return ret;
 }
 
-int asix_get_phy_addr(struct usbnet *dev)
-{
-	/* return the address of the internal phy */
-	return asix_read_phy_addr(dev, 1);
-}
-
-
 int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm)
 {
 	int ret;
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 5f767a33264e..00b6ac0570eb 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -262,7 +262,10 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->mii.mdio_write = asix_mdio_write;
 	dev->mii.phy_id_mask = 0x3f;
 	dev->mii.reg_num_mask = 0x1f;
-	dev->mii.phy_id = asix_get_phy_addr(dev);
+
+	dev->mii.phy_id = asix_read_phy_addr(dev, true);
+	if (dev->mii.phy_id < 0)
+		return dev->mii.phy_id;
 
 	dev->net->netdev_ops = &ax88172_netdev_ops;
 	dev->net->ethtool_ops = &ax88172_ethtool_ops;
@@ -717,7 +720,10 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->mii.mdio_write = asix_mdio_write;
 	dev->mii.phy_id_mask = 0x1f;
 	dev->mii.reg_num_mask = 0x1f;
-	dev->mii.phy_id = asix_get_phy_addr(dev);
+
+	dev->mii.phy_id = asix_read_phy_addr(dev, true);
+	if (dev->mii.phy_id < 0)
+		return dev->mii.phy_id;
 
 	dev->net->netdev_ops = &ax88772_netdev_ops;
 	dev->net->ethtool_ops = &ax88772_ethtool_ops;
@@ -1080,7 +1086,10 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->mii.phy_id_mask = 0x1f;
 	dev->mii.reg_num_mask = 0xff;
 	dev->mii.supports_gmii = 1;
-	dev->mii.phy_id = asix_get_phy_addr(dev);
+
+	dev->mii.phy_id = asix_read_phy_addr(dev, true);
+	if (dev->mii.phy_id < 0)
+		return dev->mii.phy_id;
 
 	dev->net->netdev_ops = &ax88178_netdev_ops;
 	dev->net->ethtool_ops = &ax88178_ethtool_ops;
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index b404c9462dce..c8ca5187eece 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -220,6 +220,11 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
 	}
 
 	priv->phy_addr = asix_read_phy_addr(dev, priv->use_embdphy);
+	if (priv->phy_addr < 0) {
+		ret = priv->phy_addr;
+		goto free;
+	}
+
 	ax88172a_reset_phy(dev, priv->use_embdphy);
 
 	/* Asix framing packs multiple eth frames into a 2K usb bulk transfer */

From dde25846925765a88df8964080098174495c1f10 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:22 +0200
Subject: [PATCH 1133/3292] net: usb/phy: asix: add support for ax88772A/C PHYs

Add support for build-in x88772A/C PHYs

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/ax88796b.c | 74 +++++++++++++++++++++++++++++++++++++-
 drivers/net/usb/Kconfig    |  1 +
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c
index 79bf7ef1fcfd..457896337505 100644
--- a/drivers/net/phy/ax88796b.c
+++ b/drivers/net/phy/ax88796b.c
@@ -10,6 +10,8 @@
 #include <linux/mii.h>
 #include <linux/phy.h>
 
+#define PHY_ID_ASIX_AX88772A		0x003b1861
+#define PHY_ID_ASIX_AX88772C		0x003b1881
 #define PHY_ID_ASIX_AX88796B		0x003b1841
 
 MODULE_DESCRIPTION("Asix PHY driver");
@@ -39,7 +41,75 @@ static int asix_soft_reset(struct phy_device *phydev)
 	return genphy_soft_reset(phydev);
 }
 
-static struct phy_driver asix_driver[] = { {
+/* AX88772A is not working properly with some old switches (NETGEAR EN 108TP):
+ * after autoneg is done and the link status is reported as active, the MII_LPA
+ * register is 0. This issue is not reproducible on AX88772C.
+ */
+static int asix_ax88772a_read_status(struct phy_device *phydev)
+{
+	int ret, val;
+
+	ret = genphy_update_link(phydev);
+	if (ret)
+		return ret;
+
+	if (!phydev->link)
+		return 0;
+
+	/* If MII_LPA is 0, phy_resolve_aneg_linkmode() will fail to resolve
+	 * linkmode so use MII_BMCR as default values.
+	 */
+	val = phy_read(phydev, MII_BMCR);
+	if (val < 0)
+		return val;
+
+	if (val & BMCR_SPEED100)
+		phydev->speed = SPEED_100;
+	else
+		phydev->speed = SPEED_10;
+
+	if (val & BMCR_FULLDPLX)
+		phydev->duplex = DUPLEX_FULL;
+	else
+		phydev->duplex = DUPLEX_HALF;
+
+	ret = genphy_read_lpa(phydev);
+	if (ret < 0)
+		return ret;
+
+	if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete)
+		phy_resolve_aneg_linkmode(phydev);
+
+	return 0;
+}
+
+static void asix_ax88772a_link_change_notify(struct phy_device *phydev)
+{
+	/* Reset PHY, otherwise MII_LPA will provide outdated information.
+	 * This issue is reproducible only with some link partner PHYs
+	 */
+	if (phydev->state == PHY_NOLINK && phydev->drv->soft_reset)
+		phydev->drv->soft_reset(phydev);
+}
+
+static struct phy_driver asix_driver[] = {
+{
+	PHY_ID_MATCH_EXACT(PHY_ID_ASIX_AX88772A),
+	.name		= "Asix Electronics AX88772A",
+	.flags		= PHY_IS_INTERNAL,
+	.read_status	= asix_ax88772a_read_status,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
+	.soft_reset	= asix_soft_reset,
+	.link_change_notify	= asix_ax88772a_link_change_notify,
+}, {
+	PHY_ID_MATCH_EXACT(PHY_ID_ASIX_AX88772C),
+	.name		= "Asix Electronics AX88772C",
+	.flags		= PHY_IS_INTERNAL,
+	.suspend	= genphy_suspend,
+	.resume		= genphy_resume,
+	.soft_reset	= asix_soft_reset,
+}, {
 	.phy_id		= PHY_ID_ASIX_AX88796B,
 	.name		= "Asix Electronics AX88796B",
 	.phy_id_mask	= 0xfffffff0,
@@ -50,6 +120,8 @@ static struct phy_driver asix_driver[] = { {
 module_phy_driver(asix_driver);
 
 static struct mdio_device_id __maybe_unused asix_tbl[] = {
+	{ PHY_ID_MATCH_EXACT(PHY_ID_ASIX_AX88772A) },
+	{ PHY_ID_MATCH_EXACT(PHY_ID_ASIX_AX88772C) },
 	{ PHY_ID_ASIX_AX88796B, 0xfffffff0 },
 	{ }
 };
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 179308782888..6f7be47974f6 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -164,6 +164,7 @@ config USB_NET_AX8817X
 	depends on USB_USBNET
 	select CRC32
 	select PHYLIB
+	select AX88796B_PHY
 	default y
 	help
 	  This option adds support for ASIX AX88xxx based USB 2.0

From e532a096be0e5e570b383e71d4560e7f04384e0f Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:23 +0200
Subject: [PATCH 1134/3292] net: usb: asix: ax88772: add phylib support

To be able to use ax88772 with external PHYs and use advantage of
existing PHY drivers, we need to port at least ax88772 part of asix
driver to the phylib framework.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix.h         |   9 +++
 drivers/net/usb/asix_common.c  |  37 ++++++++++
 drivers/net/usb/asix_devices.c | 120 +++++++++++++++++++++------------
 drivers/net/usb/ax88172a.c     |  14 ----
 4 files changed, 122 insertions(+), 58 deletions(-)

diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index edb94efd265e..2122d302e643 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -25,6 +25,7 @@
 #include <linux/usb/usbnet.h>
 #include <linux/slab.h>
 #include <linux/if_vlan.h>
+#include <linux/phy.h>
 
 #define DRIVER_VERSION "22-Dec-2011"
 #define DRIVER_NAME "asix"
@@ -178,6 +179,10 @@ struct asix_common_private {
 	u16 presvd_phy_advertise;
 	u16 presvd_phy_bmcr;
 	struct asix_rx_fixup_info rx_fixup_info;
+	struct mii_bus *mdio;
+	struct phy_device *phydev;
+	u16 phy_addr;
+	char phy_name[20];
 };
 
 extern const struct driver_info ax88172a_info;
@@ -214,6 +219,7 @@ int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm);
 
 u16 asix_read_medium_status(struct usbnet *dev, int in_pm);
 int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm);
+void asix_adjust_link(struct net_device *netdev);
 
 int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm);
 
@@ -222,6 +228,9 @@ void asix_set_multicast(struct net_device *net);
 int asix_mdio_read(struct net_device *netdev, int phy_id, int loc);
 void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val);
 
+int asix_mdio_bus_read(struct mii_bus *bus, int phy_id, int regnum);
+int asix_mdio_bus_write(struct mii_bus *bus, int phy_id, int regnum, u16 val);
+
 int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc);
 void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc,
 			  int val);
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index e1109f1a8dd5..085bc8281082 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -384,6 +384,27 @@ int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm)
 	return ret;
 }
 
+/* set MAC link settings according to information from phylib */
+void asix_adjust_link(struct net_device *netdev)
+{
+	struct phy_device *phydev = netdev->phydev;
+	struct usbnet *dev = netdev_priv(netdev);
+	u16 mode = 0;
+
+	if (phydev->link) {
+		mode = AX88772_MEDIUM_DEFAULT;
+
+		if (phydev->duplex == DUPLEX_HALF)
+			mode &= ~AX_MEDIUM_FD;
+
+		if (phydev->speed != SPEED_100)
+			mode &= ~AX_MEDIUM_PS;
+	}
+
+	asix_write_medium_mode(dev, mode, 0);
+	phy_print_status(phydev);
+}
+
 int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm)
 {
 	int ret;
@@ -506,6 +527,22 @@ void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val)
 	mutex_unlock(&dev->phy_mutex);
 }
 
+/* MDIO read and write wrappers for phylib */
+int asix_mdio_bus_read(struct mii_bus *bus, int phy_id, int regnum)
+{
+	struct usbnet *priv = bus->priv;
+
+	return asix_mdio_read(priv->net, phy_id, regnum);
+}
+
+int asix_mdio_bus_write(struct mii_bus *bus, int phy_id, int regnum, u16 val)
+{
+	struct usbnet *priv = bus->priv;
+
+	asix_mdio_write(priv->net, phy_id, regnum, val);
+	return 0;
+}
+
 int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
 {
 	struct usbnet *dev = netdev_priv(netdev);
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 00b6ac0570eb..e4cd85e38edd 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -285,7 +285,7 @@ out:
 
 static const struct ethtool_ops ax88772_ethtool_ops = {
 	.get_drvinfo		= asix_get_drvinfo,
-	.get_link		= asix_get_link,
+	.get_link		= usbnet_get_link,
 	.get_msglevel		= usbnet_get_msglevel,
 	.set_msglevel		= usbnet_set_msglevel,
 	.get_wol		= asix_get_wol,
@@ -293,37 +293,15 @@ static const struct ethtool_ops ax88772_ethtool_ops = {
 	.get_eeprom_len		= asix_get_eeprom_len,
 	.get_eeprom		= asix_get_eeprom,
 	.set_eeprom		= asix_set_eeprom,
-	.nway_reset		= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
-	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
+	.nway_reset		= phy_ethtool_nway_reset,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
-static int ax88772_link_reset(struct usbnet *dev)
-{
-	u16 mode;
-	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
-
-	mii_check_media(&dev->mii, 1, 1);
-	mii_ethtool_gset(&dev->mii, &ecmd);
-	mode = AX88772_MEDIUM_DEFAULT;
-
-	if (ethtool_cmd_speed(&ecmd) != SPEED_100)
-		mode &= ~AX_MEDIUM_PS;
-
-	if (ecmd.duplex != DUPLEX_FULL)
-		mode &= ~AX_MEDIUM_FD;
-
-	netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
-		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
-
-	asix_write_medium_mode(dev, mode, 0);
-
-	return 0;
-}
-
 static int ax88772_reset(struct usbnet *dev)
 {
 	struct asix_data *data = (struct asix_data *)&dev->data;
+	struct asix_common_private *priv = dev->driver_priv;
 	int ret;
 
 	/* Rewrite MAC address */
@@ -342,6 +320,8 @@ static int ax88772_reset(struct usbnet *dev)
 	if (ret < 0)
 		goto out;
 
+	phy_start(priv->phydev);
+
 	return 0;
 
 out:
@@ -586,7 +566,7 @@ static const struct net_device_ops ax88772_netdev_ops = {
 	.ndo_get_stats64	= dev_get_tstats64,
 	.ndo_set_mac_address 	= asix_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_do_ioctl		= asix_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_set_rx_mode        = asix_set_multicast,
 };
 
@@ -677,12 +657,57 @@ static int asix_resume(struct usb_interface *intf)
 	return usbnet_resume(intf);
 }
 
+static int ax88772_init_mdio(struct usbnet *dev)
+{
+	struct asix_common_private *priv = dev->driver_priv;
+
+	priv->mdio = devm_mdiobus_alloc(&dev->udev->dev);
+	if (!priv->mdio)
+		return -ENOMEM;
+
+	priv->mdio->priv = dev;
+	priv->mdio->read = &asix_mdio_bus_read;
+	priv->mdio->write = &asix_mdio_bus_write;
+	priv->mdio->name = "Asix MDIO Bus";
+	/* mii bus name is usb-<usb bus number>-<usb device number> */
+	snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d",
+		 dev->udev->bus->busnum, dev->udev->devnum);
+
+	return devm_mdiobus_register(&dev->udev->dev, priv->mdio);
+}
+
+static int ax88772_init_phy(struct usbnet *dev)
+{
+	struct asix_common_private *priv = dev->driver_priv;
+	int ret;
+
+	priv->phy_addr = asix_read_phy_addr(dev, true);
+	if (priv->phy_addr < 0)
+		return priv->phy_addr;
+
+	snprintf(priv->phy_name, sizeof(priv->phy_name), PHY_ID_FMT,
+		 priv->mdio->id, priv->phy_addr);
+
+	priv->phydev = phy_connect(dev->net, priv->phy_name, &asix_adjust_link,
+				   PHY_INTERFACE_MODE_INTERNAL);
+	if (IS_ERR(priv->phydev)) {
+		netdev_err(dev->net, "Could not connect to PHY device %s\n",
+			   priv->phy_name);
+		ret = PTR_ERR(priv->phydev);
+		return ret;
+	}
+
+	phy_attached_info(priv->phydev);
+
+	return 0;
+}
+
 static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 {
-	int ret, i;
 	u8 buf[ETH_ALEN] = {0}, chipcode = 0;
-	u32 phyid;
 	struct asix_common_private *priv;
+	int ret, i;
+	u32 phyid;
 
 	usbnet_get_endpoints(dev, intf);
 
@@ -714,17 +739,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	asix_set_netdev_dev_addr(dev, buf);
 
-	/* Initialize MII structure */
-	dev->mii.dev = dev->net;
-	dev->mii.mdio_read = asix_mdio_read;
-	dev->mii.mdio_write = asix_mdio_write;
-	dev->mii.phy_id_mask = 0x1f;
-	dev->mii.reg_num_mask = 0x1f;
-
-	dev->mii.phy_id = asix_read_phy_addr(dev, true);
-	if (dev->mii.phy_id < 0)
-		return dev->mii.phy_id;
-
 	dev->net->netdev_ops = &ax88772_netdev_ops;
 	dev->net->ethtool_ops = &ax88772_ethtool_ops;
 	dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
@@ -768,11 +782,31 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 		priv->suspend = ax88772_suspend;
 	}
 
+	ret = ax88772_init_mdio(dev);
+	if (ret)
+		return ret;
+
+	return ax88772_init_phy(dev);
+}
+
+static int ax88772_stop(struct usbnet *dev)
+{
+	struct asix_common_private *priv = dev->driver_priv;
+
+	/* On unplugged USB, we will get MDIO communication errors and the
+	 * PHY will be set in to PHY_HALTED state.
+	 */
+	if (priv->phydev->state != PHY_HALTED)
+		phy_stop(priv->phydev);
+
 	return 0;
 }
 
 static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
+	struct asix_common_private *priv = dev->driver_priv;
+
+	phy_disconnect(priv->phydev);
 	asix_rx_fixup_common_free(dev->driver_priv);
 }
 
@@ -1161,8 +1195,8 @@ static const struct driver_info ax88772_info = {
 	.bind = ax88772_bind,
 	.unbind = ax88772_unbind,
 	.status = asix_status,
-	.link_reset = ax88772_link_reset,
 	.reset = ax88772_reset,
+	.stop = ax88772_stop,
 	.flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET,
 	.rx_fixup = asix_rx_fixup_common,
 	.tx_fixup = asix_tx_fixup,
@@ -1173,7 +1207,6 @@ static const struct driver_info ax88772b_info = {
 	.bind = ax88772_bind,
 	.unbind = ax88772_unbind,
 	.status = asix_status,
-	.link_reset = ax88772_link_reset,
 	.reset = ax88772_reset,
 	.flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
 	         FLAG_MULTI_PACKET,
@@ -1209,7 +1242,6 @@ static const struct driver_info hg20f9_info = {
 	.bind = ax88772_bind,
 	.unbind = ax88772_unbind,
 	.status = asix_status,
-	.link_reset = ax88772_link_reset,
 	.reset = ax88772_reset,
 	.flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
 	         FLAG_MULTI_PACKET,
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index c8ca5187eece..2e2081346740 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -25,20 +25,6 @@ struct ax88172a_private {
 	struct asix_rx_fixup_info rx_fixup_info;
 };
 
-/* MDIO read and write wrappers for phylib */
-static int asix_mdio_bus_read(struct mii_bus *bus, int phy_id, int regnum)
-{
-	return asix_mdio_read(((struct usbnet *)bus->priv)->net, phy_id,
-			      regnum);
-}
-
-static int asix_mdio_bus_write(struct mii_bus *bus, int phy_id, int regnum,
-			       u16 val)
-{
-	asix_mdio_write(((struct usbnet *)bus->priv)->net, phy_id, regnum, val);
-	return 0;
-}
-
 /* set MAC link settings according to information from phylib */
 static void ax88172a_adjust_link(struct net_device *netdev)
 {

From 34a1dee6bc4458d9e059af510f2addc7a74b4c83 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:24 +0200
Subject: [PATCH 1135/3292] net: usb: asix: ax88772: add generic selftest
 support

With working phylib support we are able now to use generic selftests.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/Kconfig        |  1 +
 drivers/net/usb/asix.h         |  1 +
 drivers/net/usb/asix_devices.c | 23 +++++++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 6f7be47974f6..4c5d69732a7e 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -165,6 +165,7 @@ config USB_NET_AX8817X
 	select CRC32
 	select PHYLIB
 	select AX88796B_PHY
+	imply NET_SELFTESTS
 	default y
 	help
 	  This option adds support for ASIX AX88xxx based USB 2.0
diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index 2122d302e643..e1994a246122 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/if_vlan.h>
 #include <linux/phy.h>
+#include <net/selftests.h>
 
 #define DRIVER_VERSION "22-Dec-2011"
 #define DRIVER_NAME "asix"
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index e4cd85e38edd..57dafb3262d9 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -283,6 +283,26 @@ out:
 	return ret;
 }
 
+static void ax88772_ethtool_get_strings(struct net_device *netdev, u32 sset,
+					u8 *data)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		net_selftest_get_strings(data);
+		break;
+	}
+}
+
+static int ax88772_ethtool_get_sset_count(struct net_device *ndev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return net_selftest_get_count();
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct ethtool_ops ax88772_ethtool_ops = {
 	.get_drvinfo		= asix_get_drvinfo,
 	.get_link		= usbnet_get_link,
@@ -296,6 +316,9 @@ static const struct ethtool_ops ax88772_ethtool_ops = {
 	.nway_reset		= phy_ethtool_nway_reset,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.self_test		= net_selftest,
+	.get_strings		= ax88772_ethtool_get_strings,
+	.get_sset_count		= ax88772_ethtool_get_sset_count,
 };
 
 static int ax88772_reset(struct usbnet *dev)

From d275afb663717db99c4749f0ec5e11463642fee6 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:25 +0200
Subject: [PATCH 1136/3292] net: usb: asix: add error handling for asix_mdio_*
 functions

This usb devices can be removed at any time, so we need to forward
correct error value if device was detached.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_common.c | 42 +++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 085bc8281082..ac92bc52a85e 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -485,18 +485,23 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc)
 		return ret;
 	}
 
-	asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
-				(__u16)loc, 2, &res, 0);
-	asix_set_hw_mii(dev, 0);
+	ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, (__u16)loc, 2,
+			    &res, 0);
+	if (ret < 0)
+		goto out;
+
+	ret = asix_set_hw_mii(dev, 0);
+out:
 	mutex_unlock(&dev->phy_mutex);
 
 	netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n",
 			phy_id, loc, le16_to_cpu(res));
 
-	return le16_to_cpu(res);
+	return ret < 0 ? ret : le16_to_cpu(res);
 }
 
-void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val)
+static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc,
+			     int val)
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	__le16 res = cpu_to_le16(val);
@@ -516,15 +521,25 @@ void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val)
 		ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
 				    0, 0, 1, &smsr, 0);
 	} while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
-	if (ret == -ENODEV) {
-		mutex_unlock(&dev->phy_mutex);
-		return;
-	}
 
-	asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id,
-		       (__u16)loc, 2, &res, 0);
-	asix_set_hw_mii(dev, 0);
+	if (ret == -ENODEV)
+		goto out;
+
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2,
+			     &res, 0);
+	if (ret < 0)
+		goto out;
+
+	ret = asix_set_hw_mii(dev, 0);
+out:
 	mutex_unlock(&dev->phy_mutex);
+
+	return ret < 0 ? ret : 0;
+}
+
+void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val)
+{
+	__asix_mdio_write(netdev, phy_id, loc, val);
 }
 
 /* MDIO read and write wrappers for phylib */
@@ -539,8 +554,7 @@ int asix_mdio_bus_write(struct mii_bus *bus, int phy_id, int regnum, u16 val)
 {
 	struct usbnet *priv = bus->priv;
 
-	asix_mdio_write(priv->net, phy_id, regnum, val);
-	return 0;
+	return __asix_mdio_write(priv->net, phy_id, regnum, val);
 }
 
 int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)

From 06edf1a940be0633499e2feea31d380375a22bd9 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:26 +0200
Subject: [PATCH 1137/3292] net: phy: do not print dump stack if device was
 removed

In case phy_state_machine() works on top of USB device, we can get -ENODEV
at any point. So, be less noisy if device was removed.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 1f0512e39c65..1089a93d12f6 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1136,6 +1136,9 @@ void phy_state_machine(struct work_struct *work)
 	else if (do_suspend)
 		phy_suspend(phydev);
 
+	if (err == -ENODEV)
+		return;
+
 	if (err < 0)
 		phy_error(phydev);
 

From 2c9d6c2b871d5841ce26ede3e81fd37e2e33c42c Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 7 Jun 2021 10:27:27 +0200
Subject: [PATCH 1138/3292] usbnet: run unbind() before unregister_netdev()

unbind() is the proper place to disconnect PHY, but it will fail if
netdev is already unregistered.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index ecf62849f4c1..57a5a025255c 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1597,6 +1597,9 @@ void usbnet_disconnect (struct usb_interface *intf)
 		   xdev->bus->bus_name, xdev->devpath,
 		   dev->driver_info->description);
 
+	if (dev->driver_info->unbind)
+		dev->driver_info->unbind(dev, intf);
+
 	net = dev->net;
 	unregister_netdev (net);
 
@@ -1604,9 +1607,6 @@ void usbnet_disconnect (struct usb_interface *intf)
 
 	usb_scuttle_anchored_urbs(&dev->deferred);
 
-	if (dev->driver_info->unbind)
-		dev->driver_info->unbind (dev, intf);
-
 	usb_kill_urb(dev->interrupt);
 	usb_free_urb(dev->interrupt);
 	kfree(dev->padding_pkt);

From ca4e2b94eb98db8472a6cb5b47147e079659dc9c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 7 Jun 2021 18:31:14 +0800
Subject: [PATCH 1139/3292] qed: Fix duplicate included linux/kernel.h

Clean up the following includecheck warning:

./drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h: linux/kernel.h
is included more than once.

No functional change.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h
index 4c7ac2bd2ea5..1d5ddc217bdb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.h
@@ -7,7 +7,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/types.h>

From d991452dd7900cf152ffb43db3b1d385e1a01579 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Mon, 7 Jun 2021 19:18:10 +0800
Subject: [PATCH 1140/3292] net: hns3: add a separate error handling task

Error handling and recovery logic are intertwined. Error handling (i.e.
error identification, clearing error sources and initiation of recovery)
is done in context of reset task. If certain hardware errors get
delivered during driver init time, which can cause driver init/loading
to fail.

Introduce a separate error handling task to ensure below:

1. Reset logic remains independent of the error handling logic.
2. Add the hclge_errhand_task_schedule to schedule error recovery
tasks, This will ensure that common misellaneous MSI-X interrupt are
re-enabled quickly.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_err.c         |  4 +-
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 38 +++++++++++++++++++
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  1 +
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 8223d699cd94..f125aa425872 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1940,8 +1940,8 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
 
 	if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) {
 		dev_err(dev,
-			"Can't handle - MSIx error reported during dev init\n");
-		return 0;
+			"failed to handle msix error during dev init\n");
+		return -EAGAIN;
 	}
 
 	return hclge_handle_all_hw_msix_error(hdev, reset_requests);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6ecc106af334..8a431e124adb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2843,6 +2843,14 @@ static void hclge_reset_task_schedule(struct hclge_dev *hdev)
 				    hclge_wq, &hdev->service_task, 0);
 }
 
+static void hclge_errhand_task_schedule(struct hclge_dev *hdev)
+{
+	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
+		mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
+				    hclge_wq, &hdev->service_task, 0);
+}
+
 void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time)
 {
 	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
@@ -4264,6 +4272,36 @@ static void hclge_reset_subtask(struct hclge_dev *hdev)
 	hdev->reset_type = HNAE3_NONE_RESET;
 }
 
+static void hclge_misc_err_recovery(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	struct device *dev = &hdev->pdev->dev;
+	u32 msix_sts_reg;
+
+	msix_sts_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
+
+	if (msix_sts_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
+		if (hclge_handle_hw_msix_error(hdev,
+					       &hdev->default_reset_request))
+			dev_info(dev, "received msix interrupt 0x%x\n",
+				 msix_sts_reg);
+
+		if (hdev->default_reset_request)
+			if (ae_dev->ops->reset_event)
+				ae_dev->ops->reset_event(hdev->pdev, NULL);
+	}
+
+	hclge_enable_vector(&hdev->misc_vector, true);
+}
+
+static void hclge_errhand_service_task(struct hclge_dev *hdev)
+{
+	if (!test_and_clear_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
+		return;
+
+	hclge_misc_err_recovery(hdev);
+}
+
 static void hclge_reset_service_task(struct hclge_dev *hdev)
 {
 	if (!test_and_clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7595f841aaac..9b8abb5d7a8e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -221,6 +221,7 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_RST_HANDLING,
 	HCLGE_STATE_MBX_SERVICE_SCHED,
 	HCLGE_STATE_MBX_HANDLING,
+	HCLGE_STATE_ERR_SERVICE_SCHED,
 	HCLGE_STATE_STATISTICS_UPDATING,
 	HCLGE_STATE_CMD_DISABLE,
 	HCLGE_STATE_LINK_UPDATING,

From aff399a638da7e56680cdf6fa7544b67e0373a4e Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Mon, 7 Jun 2021 19:18:11 +0800
Subject: [PATCH 1141/3292] net: hns3: add scheduling logic for error handling
 task

Error handling & recovery is done in context of reset task which
gets scheduled from misc interrupt handler in existing code. But
since error handling has been moved to new task, it should get
scheduled instead of the reset task from the interrupt handler.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 8a431e124adb..4b1aa5c45852 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3402,18 +3402,8 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 	/* vector 0 interrupt is shared with reset and mailbox source events.*/
 	switch (event_cause) {
 	case HCLGE_VECTOR0_EVENT_ERR:
-		/* we do not know what type of reset is required now. This could
-		 * only be decided after we fetch the type of errors which
-		 * caused this event. Therefore, we will do below for now:
-		 * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we
-		 *    have defered type of reset to be used.
-		 * 2. Schedule the reset service task.
-		 * 3. When service task receives  HNAE3_UNKNOWN_RESET type it
-		 *    will fetch the correct type of reset.  This would be done
-		 *    by first decoding the types of errors.
-		 */
-		set_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request);
-		fallthrough;
+		hclge_errhand_task_schedule(hdev);
+		break;
 	case HCLGE_VECTOR0_EVENT_RST:
 		hclge_reset_task_schedule(hdev);
 		break;
@@ -4385,14 +4375,16 @@ static void hclge_service_task(struct work_struct *work)
 	struct hclge_dev *hdev =
 		container_of(work, struct hclge_dev, service_task.work);
 
+	hclge_errhand_service_task(hdev);
 	hclge_reset_service_task(hdev);
 	hclge_mailbox_service_task(hdev);
 	hclge_periodic_service_task(hdev);
 
-	/* Handle reset and mbx again in case periodical task delays the
-	 * handling by calling hclge_task_schedule() in
+	/* Handle error recovery, reset and mbx again in case periodical task
+	 * delays the handling by calling hclge_task_schedule() in
 	 * hclge_periodic_service_task().
 	 */
+	hclge_errhand_service_task(hdev);
 	hclge_reset_service_task(hdev);
 	hclge_mailbox_service_task(hdev);
 }

From e0fe0a38371b6d2d669e231c1fd68ce620dfa6b2 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Mon, 7 Jun 2021 19:18:12 +0800
Subject: [PATCH 1142/3292] net: hns3: remove now redundant logic related to
 HNAE3_UNKNOWN_RESET

Earlier patches have decoupled the MSI-X conveyed error handling
and recovery logic. This earlier concept code is no longer required.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 -
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 22 -------------------
 2 files changed, 23 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 89b2b7fa7b8b..dc9b5bc3431b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -243,7 +243,6 @@ enum hnae3_reset_type {
 	HNAE3_FUNC_RESET,
 	HNAE3_GLOBAL_RESET,
 	HNAE3_IMP_RESET,
-	HNAE3_UNKNOWN_RESET,
 	HNAE3_NONE_RESET,
 	HNAE3_MAX_RESET,
 };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 4b1aa5c45852..45102681bd2a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3792,28 +3792,6 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 	enum hnae3_reset_type rst_level = HNAE3_NONE_RESET;
 	struct hclge_dev *hdev = ae_dev->priv;
 
-	/* first, resolve any unknown reset type to the known type(s) */
-	if (test_bit(HNAE3_UNKNOWN_RESET, addr)) {
-		u32 msix_sts_reg = hclge_read_dev(&hdev->hw,
-					HCLGE_MISC_VECTOR_INT_STS);
-		/* we will intentionally ignore any errors from this function
-		 *  as we will end up in *some* reset request in any case
-		 */
-		if (hclge_handle_hw_msix_error(hdev, addr))
-			dev_info(&hdev->pdev->dev, "received msix interrupt 0x%x\n",
-				 msix_sts_reg);
-
-		clear_bit(HNAE3_UNKNOWN_RESET, addr);
-		/* We defered the clearing of the error event which caused
-		 * interrupt since it was not posssible to do that in
-		 * interrupt context (and this is the reason we introduced
-		 * new UNKNOWN reset type). Now, the errors have been
-		 * handled and cleared in hardware we can safely enable
-		 * interrupts. This is an exception to the norm.
-		 */
-		hclge_enable_vector(&hdev->misc_vector, true);
-	}
-
 	/* return the highest priority reset level amongst all */
 	if (test_bit(HNAE3_IMP_RESET, addr)) {
 		rst_level = HNAE3_IMP_RESET;

From f1fe19c2cb3fdc92a614cf330ced1613f8f1a681 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 5 Jun 2021 10:31:48 +0800
Subject: [PATCH 1143/3292] net: mscc: ocelot: check return value after calling
 platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/seville_vsc9953.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 84f93a874d50..deae923c8b7a 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -1206,6 +1206,11 @@ static int seville_probe(struct platform_device *pdev)
 	felix->info = &seville_info_vsc9953;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -EINVAL;
+		dev_err(&pdev->dev, "Invalid resource\n");
+		goto err_alloc_felix;
+	}
 	felix->switch_base = res->start;
 
 	ds = kzalloc(sizeof(struct dsa_switch), GFP_KERNEL);

From 90fdd89f6cf99213073dd9623f98519c767630d6 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Sat, 5 Jun 2021 13:42:56 +0800
Subject: [PATCH 1144/3292] net: tulip: Remove the repeated declaration

Function 'pnic2_lnk_change' is declared twice, so remove the
repeated declaration.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/tulip.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/dec/tulip/tulip.h b/drivers/net/ethernet/dec/tulip/tulip.h
index 815907259048..0ed598dc7569 100644
--- a/drivers/net/ethernet/dec/tulip/tulip.h
+++ b/drivers/net/ethernet/dec/tulip/tulip.h
@@ -478,7 +478,6 @@ void t21142_lnk_change(struct net_device *dev, int csr5);
 void pnic2_lnk_change(struct net_device *dev, int csr5);
 void pnic2_timer(struct timer_list *t);
 void pnic2_start_nway(struct net_device *dev);
-void pnic2_lnk_change(struct net_device *dev, int csr5);
 
 /* eeprom.c */
 void tulip_parse_eeprom(struct net_device *dev);

From 74325bf0104573c6dfce42837139aeef3f34be76 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 21:38:37 +0800
Subject: [PATCH 1145/3292] net: bcmgenet: check return value after calling
 platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmmii.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 5335244e4577..89d16c587bb7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -423,6 +423,10 @@ static int bcmgenet_mii_register(struct bcmgenet_priv *priv)
 	int id, ret;
 
 	pres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!pres) {
+		dev_err(&pdev->dev, "Invalid resource\n");
+		return -EINVAL;
+	}
 	memset(&res, 0, sizeof(res));
 	memset(&ppd, 0, sizeof(ppd));
 

From 809660cbc82d1bef9a2da1839d5c26a53760252c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 21:43:54 +0800
Subject: [PATCH 1146/3292] net: macb: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index a0c7b1167dbb..7d2fe13a52f8 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4655,8 +4655,7 @@ static int macb_probe(struct platform_device *pdev)
 	struct macb *bp;
 	int err, val;
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mem = devm_ioremap_resource(&pdev->dev, regs);
+	mem = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
 	if (IS_ERR(mem))
 		return PTR_ERR(mem);
 

From b5d64b43f8ccc25428009e5263619e34a6f3d787 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 21:57:14 +0800
Subject: [PATCH 1147/3292] net: enetc: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_ierb.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
index 8b356c485507..ee1468e3eaa3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
@@ -99,15 +99,13 @@ EXPORT_SYMBOL(enetc_ierb_register_pf);
 static int enetc_ierb_probe(struct platform_device *pdev)
 {
 	struct enetc_ierb *ierb;
-	struct resource *res;
 	void __iomem *regs;
 
 	ierb = devm_kzalloc(&pdev->dev, sizeof(*ierb), GFP_KERNEL);
 	if (!ierb)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs = devm_ioremap_resource(&pdev->dev, res);
+	regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 

From 3710e80952cf2dc48257ac9f145b117b5f74e0a5 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 22:21:09 +0800
Subject: [PATCH 1148/3292] net: ethernet: bgmac: Use
 devm_platform_ioremap_resource_byname

Use the devm_platform_ioremap_resource_byname() helper instead of
calling platform_get_resource_byname() and devm_ioremap_resource()
separately.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bgmac-platform.c    | 21 +++++++------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 9834b77cf4b6..4ab5bf64d353 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -172,7 +172,6 @@ static int bgmac_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct bgmac *bgmac;
-	struct resource *regs;
 	int ret;
 
 	bgmac = bgmac_alloc(&pdev->dev);
@@ -206,21 +205,15 @@ static int bgmac_probe(struct platform_device *pdev)
 	if (IS_ERR(bgmac->plat.base))
 		return PTR_ERR(bgmac->plat.base);
 
-	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
-	if (regs) {
-		bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
-		if (IS_ERR(bgmac->plat.idm_base))
-			return PTR_ERR(bgmac->plat.idm_base);
+	bgmac->plat.idm_base = devm_platform_ioremap_resource_byname(pdev, "idm_base");
+	if (IS_ERR(bgmac->plat.idm_base))
+		return PTR_ERR(bgmac->plat.idm_base);
+	else
 		bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK;
-	}
 
-	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base");
-	if (regs) {
-		bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev,
-							       regs);
-		if (IS_ERR(bgmac->plat.nicpm_base))
-			return PTR_ERR(bgmac->plat.nicpm_base);
-	}
+	bgmac->plat.nicpm_base = devm_platform_ioremap_resource_byname(pdev, "nicpm_base");
+	if (IS_ERR(bgmac->plat.nicpm_base))
+		return PTR_ERR(bgmac->plat.nicpm_base);
 
 	bgmac->read = platform_bgmac_read;
 	bgmac->write = platform_bgmac_write;

From 0bb51a3a385790a4be20085494cf78f70dadf646 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 22:36:02 +0800
Subject: [PATCH 1149/3292] net: mvpp2: check return value after calling
 platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index d4fb620f53f3..b0066f64be98 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -7377,6 +7377,10 @@ static int mvpp2_probe(struct platform_device *pdev)
 			return PTR_ERR(priv->lms_base);
 	} else {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		if (!res) {
+			dev_err(&pdev->dev, "Invalid resource\n");
+			return -EINVAL;
+		}
 		if (has_acpi_companion(&pdev->dev)) {
 			/* In case the MDIO memory region is declared in
 			 * the ACPI, it can already appear as 'in-use'

From 20f1932e2282c58cb5ac59517585206cf5b385ae Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 22:55:21 +0800
Subject: [PATCH 1150/3292] net: micrel: check return value after calling
 platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8842.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index caa251d0e381..b27713906d3a 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -1135,6 +1135,10 @@ static int ks8842_probe(struct platform_device *pdev)
 	unsigned i;
 
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iomem) {
+		dev_err(&pdev->dev, "Invalid resource\n");
+		return -EINVAL;
+	}
 	if (!request_mem_region(iomem->start, resource_size(iomem), DRV_NAME))
 		goto err_mem_region;
 

From 84a57ae96b299eaceacc4301db222ee12563cc96 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 7 Jun 2021 23:01:00 +0800
Subject: [PATCH 1151/3292] netlabel: Fix spelling mistakes

Fix some spelling mistakes in comments:
Interate  ==> Iterate
sucess  ==> success

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_domainhash.c | 2 +-
 net/netlabel/netlabel_kapi.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index dc8c39f51f7d..8158a25972b4 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -929,7 +929,7 @@ struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain,
  * @cb_arg: argument for the callback function
  *
  * Description:
- * Interate over the domain mapping hash table, skipping the first @skip_bkt
+ * Iterate over the domain mapping hash table, skipping the first @skip_bkt
  * buckets and @skip_chain entries.  For each entry in the table call
  * @callback, if @callback returns a negative value stop 'walking' through the
  * table and return.  Updates the values in @skip_bkt and @skip_chain on
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 5e1239cef000..beb0e573266d 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -719,7 +719,7 @@ int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset)
  * it in @bitmap.  The @offset must be aligned to an unsigned long and will be
  * updated on return if different from what was requested; if the catmap is
  * empty at the requested offset and beyond, the @offset is set to (u32)-1.
- * Returns zero on sucess, negative values on failure.
+ * Returns zero on success, negative values on failure.
  *
  */
 int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap,

From 974d8f86cd60d85f107f86182fb071cea0345387 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 7 Jun 2021 23:01:09 +0800
Subject: [PATCH 1152/3292] ipv4: Fix spelling mistakes

Fix some spelling mistakes in comments:
Dont  ==> Don't
timout  ==> timeout
incomming  ==> incoming
necesarry  ==> necessary
substract  ==> subtract

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_lookup.h   | 2 +-
 net/ipv4/ipmr.c         | 4 ++--
 net/ipv4/tcp_fastopen.c | 2 +-
 net/ipv4/tcp_timer.c    | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index b58db1ca4bfb..e184bcb19943 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -25,7 +25,7 @@ struct fib_alias {
 
 #define FA_S_ACCESSED	0x01
 
-/* Dont write on fa_state unless needed, to keep it shared on all cpus */
+/* Don't write on fa_state unless needed, to keep it shared on all cpus */
 static inline void fib_alias_accessed(struct fib_alias *fa)
 {
 	if (!(fa->fa_state & FA_S_ACCESSED))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 12b564b1ecb4..7b12a40dd465 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1317,7 +1317,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
 }
 
 /* called from ip_ra_control(), before an RCU grace period,
- * we dont need to call synchronize_rcu() here
+ * we don't need to call synchronize_rcu() here
  */
 static void mrtsock_destruct(struct sock *sk)
 {
@@ -1938,7 +1938,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 	if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
 		struct mfc_cache *cache_proxy;
 
-		/* For an (*,G) entry, we only check that the incomming
+		/* For an (*,G) entry, we only check that the incoming
 		 * interface is part of the static tree.
 		 */
 		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index af2814c9342a..47c32604d38f 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -526,7 +526,7 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
 	if (!tfo_da_times)
 		return false;
 
-	/* Limit timout to max: 2^6 * initial timeout */
+	/* Limit timeout to max: 2^6 * initial timeout */
 	multiplier = 1 << min(tfo_da_times - 1, 6);
 	timeout = multiplier * tfo_bh_timeout * HZ;
 	if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4ef08079ccfa..56b9d648f054 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -441,7 +441,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
  *  This function gets called when the kernel timer for a TCP packet
  *  of this socket expires.
  *
- *  It handles retransmission, timer adjustment and other necesarry measures.
+ *  It handles retransmission, timer adjustment and other necessary measures.
  *
  *  Returns: Nothing (void)
  */
@@ -766,7 +766,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
 	if (!sock_owned_by_user(sk)) {
 		if (tp->compressed_ack) {
 			/* Since we have to send one ack finally,
-			 * substract one from tp->compressed_ack to keep
+			 * subtract one from tp->compressed_ack to keep
 			 * LINUX_MIB_TCPACKCOMPRESSED accurate.
 			 */
 			tp->compressed_ack--;

From 4fb3ebbf7e086a02afb0aecad0d21cf536b5fa05 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 7 Jun 2021 23:01:18 +0800
Subject: [PATCH 1153/3292] net/ncsi: Fix spelling mistakes

Fix some spelling mistakes in comments:
constuct  ==> construct
chanels  ==> channels
Detination  ==> Destination

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ncsi/internal.h    | 4 ++--
 net/ncsi/ncsi-manage.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 49031f804276..cbbb0de4750a 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -238,7 +238,7 @@ struct ncsi_package {
 	struct ncsi_dev_priv *ndp;        /* NCSI device            */
 	spinlock_t           lock;        /* Protect the package    */
 	unsigned int         channel_num; /* Number of channels     */
-	struct list_head     channels;    /* List of chanels        */
+	struct list_head     channels;    /* List of channels        */
 	struct list_head     node;        /* Form list of packages  */
 
 	bool                 multi_channel; /* Enable multiple channels  */
@@ -339,7 +339,7 @@ struct ncsi_cmd_arg {
 	unsigned char        type;        /* Command in the NCSI packet    */
 	unsigned char        id;          /* Request ID (sequence number)  */
 	unsigned char        package;     /* Destination package ID        */
-	unsigned char        channel;     /* Detination channel ID or 0x1f */
+	unsigned char        channel;     /* Destination channel ID or 0x1f */
 	unsigned short       payload;     /* Command packet payload length */
 	unsigned int         req_flags;   /* NCSI request properties       */
 	union {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index ffff8da707b8..ca04b6df1341 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -627,7 +627,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
 	return 0;
 }
 
-/* Find an outstanding VLAN tag and constuct a "Set VLAN Filter - Enable"
+/* Find an outstanding VLAN tag and construct a "Set VLAN Filter - Enable"
  * packet.
  */
 static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,

From 7f553ff214105f49e973187488ff93ff9c56b0c8 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 7 Jun 2021 23:01:37 +0800
Subject: [PATCH 1154/3292] l2tp: Fix spelling mistakes

Fix some spelling mistakes in comments:
negociated  ==> negotiated
dont  ==> don't

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 2 +-
 net/l2tp/l2tp_ppp.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 536c30d4dd7d..b3edafa5fba4 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -488,7 +488,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		}
 	}
 
-	/* We dont need to clone dst here, it is guaranteed to not disappear.
+	/* We don't need to clone dst here, it is guaranteed to not disappear.
 	 *  __dev_xmit_skb() might force a refcount if needed.
 	 */
 	skb_dst_set_noref(skb, &rt->dst);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index aea85f91f059..bf35710127dd 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -226,7 +226,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 	/* If the first two bytes are 0xFF03, consider that it is the PPP's
 	 * Address and Control fields and skip them. The L2TP module has always
 	 * worked this way, although, in theory, the use of these fields should
-	 * be negociated and handled at the PPP layer. These fields are
+	 * be negotiated and handled at the PPP layer. These fields are
 	 * constant: 0xFF is the All-Stations Address and 0x03 the Unnumbered
 	 * Information command with Poll/Final bit set to zero (RFC 1662).
 	 */

From 35cba15a504bf4f585bb9d78f47b22b28a1a06b2 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 7 Jun 2021 23:02:59 +0800
Subject: [PATCH 1155/3292] net: moxa: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code and avoid a null-ptr-deref by checking 'res' in it.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/moxa/moxart_ether.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index b85733942053..5249b64f4fc5 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -481,13 +481,12 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	priv->ndev = ndev;
 	priv->pdev = pdev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ndev->base_addr = res->start;
-	priv->base = devm_ioremap_resource(p_dev, res);
+	priv->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto init_fail;
 	}
+	ndev->base_addr = res->start;
 
 	spin_lock_init(&priv->txlock);
 

From c07aea3ef4d4076f18f567b98ed01e082e02ed51 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Mon, 7 Jun 2021 21:02:36 +0200
Subject: [PATCH 1156/3292] mm: add a signature in struct page

This is needed by the page_pool to avoid recycling a page not allocated
via page_pool.

The page->signature field is aliased to page->lru.next and
page->compound_head, but it can't be set by mistake because the
signature value is a bad pointer, and can't trigger a false positive
in PageTail() because the last bit is 0.

Co-developed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mm.h       | 11 ++++++-----
 include/linux/mm_types.h |  7 +++++++
 include/linux/poison.h   |  3 +++
 net/core/page_pool.c     |  6 ++++++
 4 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c274f75efcf9..a0434e8c2617 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1668,10 +1668,11 @@ struct address_space *page_mapping(struct page *page);
 static inline bool page_is_pfmemalloc(const struct page *page)
 {
 	/*
-	 * Page index cannot be this large so this must be
-	 * a pfmemalloc page.
+	 * lru.next has bit 1 set if the page is allocated from the
+	 * pfmemalloc reserves.  Callers may simply overwrite it if
+	 * they do not need to preserve that information.
 	 */
-	return page->index == -1UL;
+	return (uintptr_t)page->lru.next & BIT(1);
 }
 
 /*
@@ -1680,12 +1681,12 @@ static inline bool page_is_pfmemalloc(const struct page *page)
  */
 static inline void set_page_pfmemalloc(struct page *page)
 {
-	page->index = -1UL;
+	page->lru.next = (void *)BIT(1);
 }
 
 static inline void clear_page_pfmemalloc(struct page *page)
 {
-	page->index = 0;
+	page->lru.next = NULL;
 }
 
 /*
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5aacc1c10a45..ed6862eacb52 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -96,6 +96,13 @@ struct page {
 			unsigned long private;
 		};
 		struct {	/* page_pool used by netstack */
+			/**
+			 * @pp_magic: magic value to avoid recycling non
+			 * page_pool allocated pages.
+			 */
+			unsigned long pp_magic;
+			struct page_pool *pp;
+			unsigned long _pp_mapping_pad;
 			/**
 			 * @dma_addr: might require a 64-bit value on
 			 * 32-bit architectures.
diff --git a/include/linux/poison.h b/include/linux/poison.h
index aff1c9250c82..d62ef5a6b4e9 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -78,4 +78,7 @@
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
+/********** net/core/page_pool.c **********/
+#define PP_SIGNATURE		(0x40 + POISON_POINTER_DELTA)
+
 #endif
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3c4c4c7a0402..e1321bc9d316 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -17,6 +17,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/page-flags.h>
 #include <linux/mm.h> /* for __put_page() */
+#include <linux/poison.h>
 
 #include <trace/events/page_pool.h>
 
@@ -221,6 +222,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
 		return NULL;
 	}
 
+	page->pp_magic |= PP_SIGNATURE;
+
 	/* Track how many pages are held 'in-flight' */
 	pool->pages_state_hold_cnt++;
 	trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
@@ -263,6 +266,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 			put_page(page);
 			continue;
 		}
+		page->pp_magic |= PP_SIGNATURE;
 		pool->alloc.cache[pool->alloc.count++] = page;
 		/* Track how many pages are held 'in-flight' */
 		pool->pages_state_hold_cnt++;
@@ -341,6 +345,8 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
 			     DMA_ATTR_SKIP_CPU_SYNC);
 	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
+	page->pp_magic = 0;
+
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
 	 */

From c420c98982fa9e749c99e022845d5f323d098b72 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Mon, 7 Jun 2021 21:02:37 +0200
Subject: [PATCH 1157/3292] skbuff: add a parameter to __skb_frag_unref

This is a prerequisite patch, the next one is enabling recycling of
skbs and fragments. Add an extra argument on __skb_frag_unref() to
handle recycling, and update the current users of the function with that.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/sky2.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +-
 include/linux/skbuff.h                     | 8 +++++---
 net/core/skbuff.c                          | 4 ++--
 net/tls/tls_device.c                       | 2 +-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 324c280cc22c..8b8bff59c8fe 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -2503,7 +2503,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
 
 		if (length == 0) {
 			/* don't need this page */
-			__skb_frag_unref(frag);
+			__skb_frag_unref(frag, false);
 			--skb_shinfo(skb)->nr_frags;
 		} else {
 			size = min(length, (unsigned) PAGE_SIZE);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index e35e4d7ef4d1..cea62b8f554c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -526,7 +526,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 fail:
 	while (nr > 0) {
 		nr--;
-		__skb_frag_unref(skb_shinfo(skb)->frags + nr);
+		__skb_frag_unref(skb_shinfo(skb)->frags + nr, false);
 	}
 	return 0;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dbf820a50a39..7fcfea7e7b21 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3081,10 +3081,12 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
 /**
  * __skb_frag_unref - release a reference on a paged fragment.
  * @frag: the paged fragment
+ * @recycle: recycle the page if allocated via page_pool
  *
- * Releases a reference on the paged fragment @frag.
+ * Releases a reference on the paged fragment @frag
+ * or recycles the page via the page_pool API.
  */
-static inline void __skb_frag_unref(skb_frag_t *frag)
+static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 {
 	put_page(skb_frag_page(frag));
 }
@@ -3098,7 +3100,7 @@ static inline void __skb_frag_unref(skb_frag_t *frag)
  */
 static inline void skb_frag_unref(struct sk_buff *skb, int f)
 {
-	__skb_frag_unref(&skb_shinfo(skb)->frags[f]);
+	__skb_frag_unref(&skb_shinfo(skb)->frags[f], false);
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ad22870298c..12b7e90dd2b5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -664,7 +664,7 @@ static void skb_release_data(struct sk_buff *skb)
 	skb_zcopy_clear(skb, true);
 
 	for (i = 0; i < shinfo->nr_frags; i++)
-		__skb_frag_unref(&shinfo->frags[i]);
+		__skb_frag_unref(&shinfo->frags[i], false);
 
 	if (shinfo->frag_list)
 		kfree_skb_list(shinfo->frag_list);
@@ -3495,7 +3495,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 		fragto = &skb_shinfo(tgt)->frags[merge];
 
 		skb_frag_size_add(fragto, skb_frag_size(fragfrom));
-		__skb_frag_unref(fragfrom);
+		__skb_frag_unref(fragfrom, false);
 	}
 
 	/* Reposition in the original skb */
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index bd9f1567aa39..b932469ee69c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -128,7 +128,7 @@ static void destroy_record(struct tls_record_info *record)
 	int i;
 
 	for (i = 0; i < record->num_frags; i++)
-		__skb_frag_unref(&record->frags[i]);
+		__skb_frag_unref(&record->frags[i], false);
 	kfree(record);
 }
 

From 6a5bcd84e886a9a91982e515c539529c28acdcc2 Mon Sep 17 00:00:00 2001
From: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Date: Mon, 7 Jun 2021 21:02:38 +0200
Subject: [PATCH 1158/3292] page_pool: Allow drivers to hint on SKB recycling

Up to now several high speed NICs have custom mechanisms of recycling
the allocated memory they use for their payloads.
Our page_pool API already has recycling capabilities that are always
used when we are running in 'XDP mode'. So let's tweak the API and the
kernel network stack slightly and allow the recycling to happen even
during the standard operation.
The API doesn't take into account 'split page' policies used by those
drivers currently, but can be extended once we have users for that.

The idea is to be able to intercept the packet on skb_release_data().
If it's a buffer coming from our page_pool API recycle it back to the
pool for further usage or just release the packet entirely.

To achieve that we introduce a bit in struct sk_buff (pp_recycle:1) and
a field in struct page (page->pp) to store the page_pool pointer.
Storing the information in page->pp allows us to recycle both SKBs and
their fragments.
We could have skipped the skb bit entirely, since identical information
can bederived from struct page. However, in an effort to affect the free path
as less as possible, reading a single bit in the skb which is already
in cache, is better that trying to derive identical information for the
page stored data.

The driver or page_pool has to take care of the sync operations on it's own
during the buffer recycling since the buffer is, after opting-in to the
recycling, never unmapped.

Since the gain on the drivers depends on the architecture, we are not
enabling recycling by default if the page_pool API is used on a driver.
In order to enable recycling the driver must call skb_mark_for_recycle()
to store the information we need for recycling in page->pp and
enabling the recycling bit, or page_pool_store_mem_info() for a fragment.

Co-developed-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Co-developed-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h  | 33 ++++++++++++++++++++++++++++++---
 include/net/page_pool.h |  9 +++++++++
 net/core/page_pool.c    | 22 ++++++++++++++++++++++
 net/core/skbuff.c       | 20 ++++++++++++++++----
 4 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7fcfea7e7b21..b2db9cd9a73f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@
 #include <linux/in6.h>
 #include <linux/if_packet.h>
 #include <net/flow.h>
+#include <net/page_pool.h>
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <linux/netfilter/nf_conntrack_common.h>
 #endif
@@ -667,6 +668,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@head_frag: skb was allocated from page fragments,
  *		not allocated by kmalloc() or vmalloc().
  *	@pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
+ *	@pp_recycle: mark the packet for recycling instead of freeing (implies
+ *		page_pool support on driver)
  *	@active_extensions: active extensions (skb_ext_id types)
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
@@ -791,10 +794,12 @@ struct sk_buff {
 				fclone:2,
 				peeked:1,
 				head_frag:1,
-				pfmemalloc:1;
+				pfmemalloc:1,
+				pp_recycle:1; /* page_pool recycle indicator */
 #ifdef CONFIG_SKB_EXTENSIONS
 	__u8			active_extensions;
 #endif
+
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
 	 */
@@ -3088,7 +3093,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
  */
 static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 {
-	put_page(skb_frag_page(frag));
+	struct page *page = skb_frag_page(frag);
+
+#ifdef CONFIG_PAGE_POOL
+	if (recycle && page_pool_return_skb_page(page))
+		return;
+#endif
+	put_page(page);
 }
 
 /**
@@ -3100,7 +3111,7 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
  */
 static inline void skb_frag_unref(struct sk_buff *skb, int f)
 {
-	__skb_frag_unref(&skb_shinfo(skb)->frags[f], false);
+	__skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
 }
 
 /**
@@ -4699,5 +4710,21 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
 #endif
 }
 
+#ifdef CONFIG_PAGE_POOL
+static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
+					struct page_pool *pp)
+{
+	skb->pp_recycle = 1;
+	page_pool_store_mem_info(page, pp);
+}
+#endif
+
+static inline bool skb_pp_recycle(struct sk_buff *skb, void *data)
+{
+	if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
+		return false;
+	return page_pool_return_skb_page(virt_to_page(data));
+}
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index b4b6de909c93..3dd62dd73027 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -146,6 +146,8 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
 	return pool->p.dma_dir;
 }
 
+bool page_pool_return_skb_page(struct page *page);
+
 struct page_pool *page_pool_create(const struct page_pool_params *params);
 
 #ifdef CONFIG_PAGE_POOL
@@ -251,4 +253,11 @@ static inline void page_pool_ring_unlock(struct page_pool *pool)
 		spin_unlock_bh(&pool->ring.producer_lock);
 }
 
+/* Store mem_info on struct page and use it while recycling skb frags */
+static inline
+void page_pool_store_mem_info(struct page *page, struct page_pool *pp)
+{
+	page->pp = pp;
+}
+
 #endif /* _NET_PAGE_POOL_H */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e1321bc9d316..5e4eb45b139c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -628,3 +628,25 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
 	}
 }
 EXPORT_SYMBOL(page_pool_update_nid);
+
+bool page_pool_return_skb_page(struct page *page)
+{
+	struct page_pool *pp;
+
+	page = compound_head(page);
+	if (unlikely(page->pp_magic != PP_SIGNATURE))
+		return false;
+
+	pp = page->pp;
+
+	/* Driver set this to memory recycling info. Reset it on recycle.
+	 * This will *not* work for NIC using a split-page memory model.
+	 * The page will be returned to the pool here regardless of the
+	 * 'flipped' fragment being in use or not.
+	 */
+	page->pp = NULL;
+	page_pool_put_full_page(pp, page, false);
+
+	return true;
+}
+EXPORT_SYMBOL(page_pool_return_skb_page);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 12b7e90dd2b5..a0b1d4847efe 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
 #include <net/xfrm.h>
 #include <net/mpls.h>
 #include <net/mptcp.h>
+#include <net/page_pool.h>
 
 #include <linux/uaccess.h>
 #include <trace/events/skb.h>
@@ -645,10 +646,13 @@ static void skb_free_head(struct sk_buff *skb)
 {
 	unsigned char *head = skb->head;
 
-	if (skb->head_frag)
+	if (skb->head_frag) {
+		if (skb_pp_recycle(skb, head))
+			return;
 		skb_free_frag(head);
-	else
+	} else {
 		kfree(head);
+	}
 }
 
 static void skb_release_data(struct sk_buff *skb)
@@ -664,7 +668,7 @@ static void skb_release_data(struct sk_buff *skb)
 	skb_zcopy_clear(skb, true);
 
 	for (i = 0; i < shinfo->nr_frags; i++)
-		__skb_frag_unref(&shinfo->frags[i], false);
+		__skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
 
 	if (shinfo->frag_list)
 		kfree_skb_list(shinfo->frag_list);
@@ -1046,6 +1050,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->nohdr = 0;
 	n->peeked = 0;
 	C(pfmemalloc);
+	C(pp_recycle);
 	n->destructor = NULL;
 	C(tail);
 	C(end);
@@ -3495,7 +3500,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 		fragto = &skb_shinfo(tgt)->frags[merge];
 
 		skb_frag_size_add(fragto, skb_frag_size(fragfrom));
-		__skb_frag_unref(fragfrom, false);
+		__skb_frag_unref(fragfrom, skb->pp_recycle);
 	}
 
 	/* Reposition in the original skb */
@@ -5285,6 +5290,13 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 	if (skb_cloned(to))
 		return false;
 
+	/* The page pool signature of struct page will eventually figure out
+	 * which pages can be recycled or not but for now let's prohibit slab
+	 * allocated and page_pool allocated SKBs from being coalesced.
+	 */
+	if (to->pp_recycle != from->pp_recycle)
+		return false;
+
 	if (len <= skb_tailroom(to)) {
 		if (len)
 			BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));

From 133637fcfab24e831239c5f1d7042996efd8d828 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Mon, 7 Jun 2021 21:02:39 +0200
Subject: [PATCH 1159/3292] mvpp2: recycle buffers

Use the new recycling API for page_pool.
In a drop rate test, the packet rate is almost doubled,
from 1110 Kpps to 2128 Kpps.

perf top on a stock system shows:

Overhead  Shared Object     Symbol
  34.88%  [kernel]          [k] page_pool_release_page
   8.06%  [kernel]          [k] free_unref_page
   6.42%  [mvpp2]           [k] mvpp2_rx
   6.07%  [kernel]          [k] eth_type_trans
   5.18%  [kernel]          [k] __netif_receive_skb_core
   4.95%  [kernel]          [k] build_skb
   4.88%  [kernel]          [k] kmem_cache_free
   3.97%  [kernel]          [k] kmem_cache_alloc
   3.45%  [kernel]          [k] dev_gro_receive
   2.73%  [kernel]          [k] page_frag_free
   2.07%  [kernel]          [k] __alloc_pages_bulk
   1.99%  [kernel]          [k] arch_local_irq_save
   1.84%  [kernel]          [k] skb_release_data
   1.20%  [kernel]          [k] netif_receive_skb_list_internal

With packet rate stable at 1100 Kpps:

tx: 0 bps 0 pps rx: 532.7 Mbps 1110 Kpps
tx: 0 bps 0 pps rx: 532.6 Mbps 1110 Kpps
tx: 0 bps 0 pps rx: 532.4 Mbps 1109 Kpps
tx: 0 bps 0 pps rx: 532.1 Mbps 1109 Kpps
tx: 0 bps 0 pps rx: 531.9 Mbps 1108 Kpps
tx: 0 bps 0 pps rx: 531.9 Mbps 1108 Kpps

And this is the same output with recycling enabled:

Overhead  Shared Object     Symbol
  12.91%  [kernel]          [k] eth_type_trans
  12.54%  [mvpp2]           [k] mvpp2_rx
   9.67%  [kernel]          [k] build_skb
   9.63%  [kernel]          [k] __netif_receive_skb_core
   8.44%  [kernel]          [k] page_pool_put_page
   8.07%  [kernel]          [k] kmem_cache_free
   7.79%  [kernel]          [k] kmem_cache_alloc
   6.86%  [kernel]          [k] dev_gro_receive
   3.19%  [kernel]          [k] skb_release_data
   2.41%  [kernel]          [k] netif_receive_skb_list_internal
   2.18%  [kernel]          [k] page_pool_refill_alloc_cache
   1.76%  [kernel]          [k] napi_gro_receive
   1.61%  [kernel]          [k] kfree_skb
   1.20%  [kernel]          [k] dma_sync_single_for_device
   1.16%  [mvpp2]           [k] mvpp2_poll
   1.12%  [mvpp2]           [k] mvpp2_read

With packet rate above 2100 Kpps:

tx: 0 bps 0 pps rx: 1021 Mbps 2128 Kpps
tx: 0 bps 0 pps rx: 1021 Mbps 2127 Kpps
tx: 0 bps 0 pps rx: 1021 Mbps 2128 Kpps
tx: 0 bps 0 pps rx: 1021 Mbps 2128 Kpps
tx: 0 bps 0 pps rx: 1022 Mbps 2128 Kpps
tx: 0 bps 0 pps rx: 1022 Mbps 2129 Kpps

The major performance increase is explained by the fact that the most CPU
consuming functions (page_pool_release_page, page_frag_free and
free_unref_page) are no longer called on a per packet basis.

The test was done by sending to the macchiatobin 64 byte ethernet frames
with an invalid ethertype, so the packets are dropped early in the RX path.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index b0066f64be98..5663c1b21870 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3997,7 +3997,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 		}
 
 		if (pp)
-			page_pool_release_page(pp, virt_to_page(data));
+			skb_mark_for_recycle(skb, virt_to_page(data), pp);
 		else
 			dma_unmap_single_attrs(dev->dev.parent, dma_addr,
 					       bm_pool->buf_size, DMA_FROM_DEVICE,

From e4017570daee8ce39f7101f4d00e96e5a1b8ea97 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Mon, 7 Jun 2021 21:02:40 +0200
Subject: [PATCH 1160/3292] mvneta: recycle buffers

Use the new recycling API for page_pool.
In a drop rate test, the packet rate increased by 10%,
from 296 Kpps to 326 Kpps.

perf top on a stock system shows:

Overhead  Shared Object     Symbol
  23.66%  [kernel]          [k] __pi___inval_dcache_area
  22.85%  [mvneta]          [k] mvneta_rx_swbm
   7.54%  [kernel]          [k] kmem_cache_alloc
   6.49%  [kernel]          [k] eth_type_trans
   3.94%  [kernel]          [k] dev_gro_receive
   3.91%  [kernel]          [k] __netif_receive_skb_core
   3.91%  [kernel]          [k] kmem_cache_free
   3.76%  [kernel]          [k] page_pool_release_page
   3.56%  [kernel]          [k] free_unref_page
   2.40%  [kernel]          [k] build_skb
   1.49%  [kernel]          [k] skb_release_data
   1.45%  [kernel]          [k] __alloc_pages_bulk
   1.30%  [kernel]          [k] page_frag_free

And this is the same output with recycling enabled:

Overhead  Shared Object     Symbol
  26.41%  [kernel]          [k] __pi___inval_dcache_area
  25.00%  [mvneta]          [k] mvneta_rx_swbm
   8.14%  [kernel]          [k] kmem_cache_alloc
   6.84%  [kernel]          [k] eth_type_trans
   4.44%  [kernel]          [k] __netif_receive_skb_core
   4.38%  [kernel]          [k] kmem_cache_free
   4.16%  [kernel]          [k] dev_gro_receive
   3.21%  [kernel]          [k] page_pool_put_page
   2.41%  [kernel]          [k] build_skb
   1.82%  [kernel]          [k] skb_release_data
   1.61%  [kernel]          [k] napi_gro_receive
   1.25%  [kernel]          [k] page_pool_refill_alloc_cache
   1.16%  [kernel]          [k] __netif_receive_skb_list_core

We can see that page_pool_release_page(), free_unref_page() and
__alloc_pages_bulk() are no longer on top of the list when receiving
traffic.

The test was done with mausezahn on the TX side with 64 byte raw
ethernet frames.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 7d5cd9bc6c99..c15ce06427d0 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2320,7 +2320,7 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
 }
 
 static struct sk_buff *
-mvneta_swbm_build_skb(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
+mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
 		      struct xdp_buff *xdp, u32 desc_status)
 {
 	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -2331,7 +2331,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	page_pool_release_page(rxq->page_pool, virt_to_page(xdp->data));
+	skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool);
 
 	skb_reserve(skb, xdp->data - xdp->data_hard_start);
 	skb_put(skb, xdp->data_end - xdp->data);
@@ -2343,7 +2343,10 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 				skb_frag_page(frag), skb_frag_off(frag),
 				skb_frag_size(frag), PAGE_SIZE);
-		page_pool_release_page(rxq->page_pool, skb_frag_page(frag));
+		/* We don't need to reset pp_recycle here. It's already set, so
+		 * just mark fragments for recycling.
+		 */
+		page_pool_store_mem_info(skb_frag_page(frag), pool);
 	}
 
 	return skb;
@@ -2425,7 +2428,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 		    mvneta_run_xdp(pp, rxq, xdp_prog, &xdp_buf, frame_sz, &ps))
 			goto next;
 
-		skb = mvneta_swbm_build_skb(pp, rxq, &xdp_buf, desc_status);
+		skb = mvneta_swbm_build_skb(pp, rxq->page_pool, &xdp_buf, desc_status);
 		if (IS_ERR(skb)) {
 			struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
 

From 763663c9715f5f1cc0d065d2b020f12cd37417d2 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 12 May 2021 15:25:15 +0800
Subject: [PATCH 1161/3292] PM: domains: fix some kernel-doc issues

Fix the following make W=1 kernel build warnings:

  drivers/base/power/domain_governor.c:259: warning: Function parameter or member 'now' not described in '_default_power_down_ok'
  drivers/base/power/domain.c:581: warning: Function parameter or member 'depth' not described in 'genpd_power_off'
  drivers/base/power/domain.c:2520: warning: Function parameter or member 'np' not described in 'of_genpd_remove_last'
  drivers/base/power/domain.c:2520: warning: Excess function parameter 'provider' description in 'of_genpd_remove_last'

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c          | 3 ++-
 drivers/base/power/domain_governor.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b6a782c31613..5695a641efd3 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -572,6 +572,7 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
  * RPM status of the releated device is in an intermediate state, not yet turned
  * into RPM_SUSPENDED. This means genpd_power_off() must allow one device to not
  * be RPM_SUSPENDED, while it tries to power off the PM domain.
+ * @depth: nesting count for lockdep.
  *
  * If all of the @genpd's devices have been suspended and all of its subdomains
  * have been powered down, remove power from @genpd.
@@ -2505,7 +2506,7 @@ EXPORT_SYMBOL_GPL(of_genpd_remove_subdomain);
 
 /**
  * of_genpd_remove_last - Remove the last PM domain registered for a provider
- * @provider: Pointer to device structure associated with provider
+ * @np: Pointer to device node associated with provider
  *
  * Find the last PM domain that was added by a particular provider and
  * remove this PM domain from the list of PM domains. The provider is
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index c6c218758f0b..cd08c5885190 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -252,6 +252,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
 /**
  * _default_power_down_ok - Default generic PM domain power off governor routine.
  * @pd: PM domain to check.
+ * @now: current ktime.
  *
  * This routine must be executed under the PM domain's lock.
  */

From fe6bde732be8c4711a878b11491d9a2749b03909 Mon Sep 17 00:00:00 2001
From: ChenXiaoSong <chenxiaosong2@huawei.com>
Date: Mon, 7 Jun 2021 11:52:01 +0800
Subject: [PATCH 1162/3292] Smack: fix doc warning

Fix gcc W=1 warning:

security/smack/smack_access.c:342: warning: Function parameter or member 'ad' not described in 'smack_log'
security/smack/smack_access.c:403: warning: Function parameter or member 'skp' not described in 'smk_insert_entry'
security/smack/smack_access.c:487: warning: Function parameter or member 'level' not described in 'smk_netlbl_mls'
security/smack/smack_access.c:487: warning: Function parameter or member 'len' not described in 'smk_netlbl_mls'

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack_access.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index efe2406a3960..1f391f6a3d47 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -332,7 +332,7 @@ static void smack_log_callback(struct audit_buffer *ab, void *a)
  *  @object_label  : smack label of the object being accessed
  *  @request: requested permissions
  *  @result: result from smk_access
- *  @a:  auxiliary audit data
+ *  @ad:  auxiliary audit data
  *
  * Audit the granting or denial of permissions in accordance
  * with the policy.
@@ -396,6 +396,7 @@ struct hlist_head smack_known_hash[SMACK_HASH_SLOTS];
 
 /**
  * smk_insert_entry - insert a smack label into a hash map,
+ * @skp: smack label
  *
  * this function must be called under smack_known_lock
  */
@@ -476,8 +477,10 @@ char *smk_parse_smack(const char *string, int len)
 
 /**
  * smk_netlbl_mls - convert a catset to netlabel mls categories
+ * @level: MLS sensitivity level
  * @catset: the Smack categories
  * @sap: where to put the netlabel categories
+ * @len: number of bytes for the levels in a CIPSO IP option
  *
  * Allocates and fills attr.mls
  * Returns 0 on success, error code on failure.

From d6dd33ffa33b7a6a05e98f8f0cb2f256640fa5d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
Date: Tue, 8 Jun 2021 03:26:48 +0200
Subject: [PATCH 1163/3292] net: Kconfig: indent with tabs instead of spaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The BAREUDP config option uses spaces instead of tabs for indentation.
The rest of this file uses tabs. Fix this.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 74dc8e249faa..4da68ba8448f 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -262,17 +262,17 @@ config GENEVE
 	  will be called geneve.
 
 config BAREUDP
-       tristate "Bare UDP Encapsulation"
-       depends on INET
-       depends on IPV6 || !IPV6
-       select NET_UDP_TUNNEL
-       select GRO_CELLS
-       help
-          This adds a bare UDP tunnel module for tunnelling different
-          kinds of traffic like MPLS, IP, etc. inside a UDP tunnel.
+	tristate "Bare UDP Encapsulation"
+	depends on INET
+	depends on IPV6 || !IPV6
+	select NET_UDP_TUNNEL
+	select GRO_CELLS
+	help
+	  This adds a bare UDP tunnel module for tunnelling different
+	  kinds of traffic like MPLS, IP, etc. inside a UDP tunnel.
 
-          To compile this driver as a module, choose M here: the module
-          will be called bareudp.
+	  To compile this driver as a module, choose M here: the module
+	  will be called bareudp.
 
 config GTP
 	tristate "GPRS Tunneling Protocol datapath (GTP-U)"

From a9f15dc2b9733cb5870e655e6b77a4ec2cc51b8b Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Tue, 8 Jun 2021 11:15:32 +0800
Subject: [PATCH 1164/3292] dt-bindings: net: add dt binding for realtek
 rtl82xx phy

Add binding for realtek rtl82xx phy.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/realtek,rtl82xx.yaml         | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml

diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
new file mode 100644
index 000000000000..bb94a2388520
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/realtek,rtl82xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Realtek RTL82xx PHY
+
+maintainers:
+  - Andrew Lunn <andrew@lunn.ch>
+  - Florian Fainelli <f.fainelli@gmail.com>
+  - Heiner Kallweit <hkallweit1@gmail.com>
+
+description:
+  Bindings for Realtek RTL82xx PHYs
+
+allOf:
+  - $ref: ethernet-phy.yaml#
+
+properties:
+  realtek,clkout-disable:
+    type: boolean
+    description:
+      Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset.
+
+
+  realtek,aldps-enable:
+    type: boolean
+    description:
+      Enable ALDPS mode, ALDPS mode default is disabled after hardware reset.
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethphy1: ethernet-phy@1 {
+                reg = <1>;
+                realtek,clkout-disable;
+                realtek,aldps-enable;
+        };
+    };

From 0a4355c2b7f8ecd5e61cc262ecdbd4a2cce1ea7e Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Tue, 8 Jun 2021 11:15:33 +0800
Subject: [PATCH 1165/3292] net: phy: realtek: add dt property to disable
 CLKOUT clock

CLKOUT is enabled by default after PHY hardware reset, this patch adds
"realtek,clkout-disable" property for user to disable CLKOUT clock
to save PHY power.

Per RTL8211F guide, a PHY reset should be issued after setting these
bits in PHYCR2 register. After this patch, CLKOUT clock output to be
disabled.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 42 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 821e85a97367..ca258f2a9613 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -8,6 +8,7 @@
  * Copyright (c) 2004 Freescale Semiconductor, Inc.
  */
 #include <linux/bitops.h>
+#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/module.h>
 #include <linux/delay.h>
@@ -27,6 +28,7 @@
 #define RTL821x_PAGE_SELECT			0x1f
 
 #define RTL8211F_PHYCR1				0x18
+#define RTL8211F_PHYCR2				0x19
 #define RTL8211F_INSR				0x1d
 
 #define RTL8211F_TX_DELAY			BIT(8)
@@ -40,6 +42,8 @@
 #define RTL8211E_TX_DELAY			BIT(12)
 #define RTL8211E_RX_DELAY			BIT(11)
 
+#define RTL8211F_CLKOUT_EN			BIT(0)
+
 #define RTL8201F_ISR				0x1e
 #define RTL8201F_ISR_ANERR			BIT(15)
 #define RTL8201F_ISR_DUPLEX			BIT(13)
@@ -71,6 +75,10 @@ MODULE_DESCRIPTION("Realtek PHY driver");
 MODULE_AUTHOR("Johnson Leung");
 MODULE_LICENSE("GPL");
 
+struct rtl821x_priv {
+	u16 phycr2;
+};
+
 static int rtl821x_read_page(struct phy_device *phydev)
 {
 	return __phy_read(phydev, RTL821x_PAGE_SELECT);
@@ -81,6 +89,28 @@ static int rtl821x_write_page(struct phy_device *phydev, int page)
 	return __phy_write(phydev, RTL821x_PAGE_SELECT, page);
 }
 
+static int rtl821x_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct rtl821x_priv *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->phycr2 = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2);
+	if (priv->phycr2 < 0)
+		return priv->phycr2;
+
+	priv->phycr2 &= RTL8211F_CLKOUT_EN;
+	if (of_property_read_bool(dev->of_node, "realtek,clkout-disable"))
+		priv->phycr2 &= ~RTL8211F_CLKOUT_EN;
+
+	phydev->priv = priv;
+
+	return 0;
+}
+
 static int rtl8201_ack_interrupt(struct phy_device *phydev)
 {
 	int err;
@@ -291,6 +321,7 @@ static int rtl8211c_config_init(struct phy_device *phydev)
 
 static int rtl8211f_config_init(struct phy_device *phydev)
 {
+	struct rtl821x_priv *priv = phydev->priv;
 	struct device *dev = &phydev->mdio.dev;
 	u16 val_txdly, val_rxdly;
 	u16 val;
@@ -354,7 +385,15 @@ static int rtl8211f_config_init(struct phy_device *phydev)
 			val_rxdly ? "enabled" : "disabled");
 	}
 
-	return 0;
+	ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2,
+			       RTL8211F_CLKOUT_EN, priv->phycr2);
+	if (ret < 0) {
+		dev_err(dev, "clkout configuration failed: %pe\n",
+			ERR_PTR(ret));
+		return ret;
+	}
+
+	return genphy_soft_reset(phydev);
 }
 
 static int rtl8211e_config_init(struct phy_device *phydev)
@@ -847,6 +886,7 @@ static struct phy_driver realtek_drvs[] = {
 	}, {
 		PHY_ID_MATCH_EXACT(0x001cc916),
 		.name		= "RTL8211F Gigabit Ethernet",
+		.probe		= rtl821x_probe,
 		.config_init	= &rtl8211f_config_init,
 		.read_status	= rtlgen_read_status,
 		.config_intr	= &rtl8211f_config_intr,

From d90db36a9e748c9d886df15f5e17b341f0e5bcd5 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Tue, 8 Jun 2021 11:15:34 +0800
Subject: [PATCH 1166/3292] net: phy: realtek: add dt property to enable ALDPS
 mode

If enable Advance Link Down Power Saving (ALDPS) mode, it will change
crystal/clock behavior, which cause RXC clock stop for dozens to hundreds
of miliseconds. This is comfirmed by Realtek engineer. For some MACs, it
needs RXC clock to support RX logic, after this patch, PHY can generate
continuous RXC clock during auto-negotiation.

ALDPS default is disabled after hardware reset, it's more reasonable to
add a property to enable this feature, since ALDPS would introduce side effect.
This patch adds dt property "realtek,aldps-enable" to enable ALDPS mode
per users' requirement.

Jisheng Zhang enables this feature, changes the default behavior. Since
mine patch breaks the rule that new implementation should not break
existing design, so Cc'ed let him know to see if it can be accepted.

Cc: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index ca258f2a9613..79dc55bb4091 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -76,6 +76,7 @@ MODULE_AUTHOR("Johnson Leung");
 MODULE_LICENSE("GPL");
 
 struct rtl821x_priv {
+	u16 phycr1;
 	u16 phycr2;
 };
 
@@ -98,6 +99,14 @@ static int rtl821x_probe(struct phy_device *phydev)
 	if (!priv)
 		return -ENOMEM;
 
+	priv->phycr1 = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR1);
+	if (priv->phycr1 < 0)
+		return priv->phycr1;
+
+	priv->phycr1 &= (RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF);
+	if (of_property_read_bool(dev->of_node, "realtek,aldps-enable"))
+		priv->phycr1 |= RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF;
+
 	priv->phycr2 = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2);
 	if (priv->phycr2 < 0)
 		return priv->phycr2;
@@ -324,11 +333,16 @@ static int rtl8211f_config_init(struct phy_device *phydev)
 	struct rtl821x_priv *priv = phydev->priv;
 	struct device *dev = &phydev->mdio.dev;
 	u16 val_txdly, val_rxdly;
-	u16 val;
 	int ret;
 
-	val = RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_XTAL_OFF;
-	phy_modify_paged_changed(phydev, 0xa43, RTL8211F_PHYCR1, val, val);
+	ret = phy_modify_paged_changed(phydev, 0xa43, RTL8211F_PHYCR1,
+				       RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF,
+				       priv->phycr1);
+	if (ret < 0) {
+		dev_err(dev, "aldps mode  configuration failed: %pe\n",
+			ERR_PTR(ret));
+		return ret;
+	}
 
 	switch (phydev->interface) {
 	case PHY_INTERFACE_MODE_RGMII:

From 6813cc8cfdaf401476e1a007cec8ae338cefa573 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Tue, 8 Jun 2021 11:15:35 +0800
Subject: [PATCH 1167/3292] net: phy: realtek: add delay to fix RXC generation
 issue

PHY will delay about 11.5ms to generate RXC clock when switching from
power down to normal operation. Read/write registers would also cause RXC
become unstable and stop for a while during this process. Realtek engineer
suggests 15ms or more delay can workaround this issue.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 79dc55bb4091..1b844a06fe72 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -410,6 +410,19 @@ static int rtl8211f_config_init(struct phy_device *phydev)
 	return genphy_soft_reset(phydev);
 }
 
+static int rtl821x_resume(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_resume(phydev);
+	if (ret < 0)
+		return ret;
+
+	msleep(20);
+
+	return 0;
+}
+
 static int rtl8211e_config_init(struct phy_device *phydev)
 {
 	int ret = 0, oldpage;
@@ -906,7 +919,7 @@ static struct phy_driver realtek_drvs[] = {
 		.config_intr	= &rtl8211f_config_intr,
 		.handle_interrupt = rtl8211f_handle_interrupt,
 		.suspend	= genphy_suspend,
-		.resume		= genphy_resume,
+		.resume		= rtl821x_resume,
 		.read_page	= rtl821x_read_page,
 		.write_page	= rtl821x_write_page,
 	}, {

From 34de4c85f3936a1d806a81e66f3a19dc758a8c4d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:27 +0800
Subject: [PATCH 1168/3292] net: farsync: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 5de71e44fc5a..0081ec813b9a 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -479,7 +479,6 @@ struct fst_card_info {
 #define dev_to_port(D)  (dev_to_hdlc(D)->priv)
 #define port_to_dev(P)  ((P)->dev)
 
-
 /*
  *      Shared memory window access macros
  *
@@ -1194,7 +1193,6 @@ fst_recover_rx_error(struct fst_card_info *card, struct fst_port_info *port,
 	}
 	port->rxpos = rxp;
 	return;
-
 }
 
 /*      Rx complete interrupt
@@ -2159,7 +2157,6 @@ fst_openport(struct fst_port_info *port)
 		port->txqe = 0;
 		port->txqs = 0;
 	}
-
 }
 
 static void

From 50d4c363366a09706073595f8e2d3b1683c3bbcc Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:28 +0800
Subject: [PATCH 1169/3292] net: farsync: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 0081ec813b9a..9a69aa76ccaf 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -2484,6 +2484,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
         for ( i = 0 ; i < card->nports ; i++ ) {
 		struct net_device *dev = alloc_hdlcdev(&card->ports[i]);
 		hdlc_device *hdlc;
+
 		if (!dev) {
 			while (i--)
 				free_netdev(card->ports[i].dev);
@@ -2608,6 +2609,7 @@ fst_remove_one(struct pci_dev *pdev)
 
 	for (i = 0; i < card->nports; i++) {
 		struct net_device *dev = port_to_dev(&card->ports[i]);
+
 		unregister_hdlc_device(dev);
 	}
 

From 8ea4bfb30abc5ef2688f014ffab70b5e704f0c83 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:29 +0800
Subject: [PATCH 1170/3292] net: farsync: fix the code style issue about "foo*
 bar"

Fix the checkpatch error as "foo * bar" should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 9a69aa76ccaf..a5fe605d2c38 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -577,7 +577,7 @@ static u64 fst_work_txq;
 static u64 fst_work_intq;
 
 static void
-fst_q_work_item(u64 * queue, int card_index)
+fst_q_work_item(u64 *queue, int card_index)
 {
 	unsigned long flags;
 	u64 mask;

From 40996bcfe965f70a4f8304cfdd0fb9cd495b9385 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:30 +0800
Subject: [PATCH 1171/3292] net: farsync: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index a5fe605d2c38..8db9c842ce19 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -1252,7 +1252,8 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 	}
 
 	/* Allocate SKB */
-	if ((skb = dev_alloc_skb(len)) == NULL) {
+	skb = dev_alloc_skb(len);
+	if (!skb) {
 		dbg(DBG_RX, "intr_rx: can't allocate buffer\n");
 
 		dev->stats.rx_dropped++;
@@ -1344,7 +1345,8 @@ do_bottom_half_tx(struct fst_card_info *card)
 			 * bit on the next buffer we think we can use
 			 */
 			spin_lock_irqsave(&card->card_lock, flags);
-			if ((txq_length = port->txqe - port->txqs) < 0) {
+			txq_length = port->txqe - port->txqs;
+			if (txq_length < 0) {
 				/*
 				 * This is the case where one has wrapped and the
 				 * maths gives us a negative number
@@ -1633,7 +1635,8 @@ check_started_ok(struct fst_card_info *card)
 		return;
 	}
 	/* Firmware status flag, 0x00 = initialising, 0x01 = OK, 0xFF = fail */
-	if ((i = FST_RDB(card, taskStatus)) == 0x01) {
+	i = FST_RDB(card, taskStatus);
+	if (i == 0x01) {
 		card->state = FST_RUNNING;
 	} else if (i == 0xFF) {
 		pr_err("Firmware initialisation failed. Card halted\n");
@@ -2292,7 +2295,8 @@ fst_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * Check there is room in the port txq
 	 */
 	spin_lock_irqsave(&card->card_lock, flags);
-	if ((txq_length = port->txqe - port->txqs) < 0) {
+	txq_length = port->txqe - port->txqs;
+	if (txq_length < 0) {
 		/*
 		 * This is the case where the next free has wrapped but the
 		 * last used hasn't
@@ -2432,12 +2436,14 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENOMEM;
 
 	/* Try to enable the device */
-	if ((err = pci_enable_device(pdev)) != 0) {
+	err = pci_enable_device(pdev);
+	if (err) {
 		pr_err("Failed to enable card. Err %d\n", -err);
 		goto enable_fail;
 	}
 
-	if ((err = pci_request_regions(pdev, "FarSync")) !=0) {
+	err = pci_request_regions(pdev, "FarSync");
+	if (err) {
 		pr_err("Failed to allocate regions. Err %d\n", -err);
 		goto regions_fail;
 	}
@@ -2446,12 +2452,14 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	card->pci_conf = pci_resource_start(pdev, 1);
 	card->phys_mem = pci_resource_start(pdev, 2);
 	card->phys_ctlmem = pci_resource_start(pdev, 3);
-	if ((card->mem = ioremap(card->phys_mem, FST_MEMSIZE)) == NULL) {
+	card->mem = ioremap(card->phys_mem, FST_MEMSIZE);
+	if (!card->mem) {
 		pr_err("Physical memory remap failed\n");
 		err = -ENODEV;
 		goto ioremap_physmem_fail;
 	}
-	if ((card->ctlmem = ioremap(card->phys_ctlmem, 0x10)) == NULL) {
+	card->ctlmem = ioremap(card->phys_ctlmem, 0x10);
+	if (!card->ctlmem) {
 		pr_err("Control memory remap failed\n");
 		err = -ENODEV;
 		goto ioremap_ctlmem_fail;

From 8ccac4a58aa8cd2fc14cee2671996cb1ec78c71f Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:31 +0800
Subject: [PATCH 1172/3292] net: farsync: remove redundant initialization for
 statics

Should not initialise statics to 0.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 8db9c842ce19..7e408d5311d3 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -76,7 +76,7 @@ MODULE_LICENSE("GPL");
 static int fst_txq_low = FST_LOW_WATER_MARK;
 static int fst_txq_high = FST_HIGH_WATER_MARK;
 static int fst_max_reads = 7;
-static int fst_excluded_cards = 0;
+static int fst_excluded_cards;
 static int fst_excluded_list[FST_MAX_CARDS];
 
 module_param(fst_txq_low, int, 0);
@@ -2401,7 +2401,7 @@ static const struct net_device_ops fst_ops = {
 static int
 fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	static int no_of_cards_added = 0;
+	static int no_of_cards_added;
 	struct fst_card_info *card;
 	int err = 0;
 	int i;

From 14b9764ccfeb3ca59ae74b17a521cdf54929831d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:32 +0800
Subject: [PATCH 1173/3292] net: farsync: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 235 ++++++++++++++------------------------
 1 file changed, 83 insertions(+), 152 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 7e408d5311d3..f8c755845ae0 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *      FarSync WAN driver for Linux (2.6.x kernel version)
+/*      FarSync WAN driver for Linux (2.6.x kernel version)
  *
  *      Actually sync driver for X.21, V.35 and V.24 on FarSync T-series cards
  *
@@ -30,8 +29,7 @@
 
 #include "farsync.h"
 
-/*
- *      Module info
+/*      Module info
  */
 MODULE_AUTHOR("R.J.Dunlop <bob.dunlop@farsite.co.uk>");
 MODULE_DESCRIPTION("FarSync T-Series WAN driver. FarSite Communications Ltd.");
@@ -49,16 +47,19 @@ MODULE_LICENSE("GPL");
 /*      Default parameters for the link
  */
 #define FST_TX_QUEUE_LEN        100	/* At 8Mbps a longer queue length is
-					 * useful */
+					 * useful
+					 */
 #define FST_TXQ_DEPTH           16	/* This one is for the buffering
 					 * of frames on the way down to the card
 					 * so that we can keep the card busy
 					 * and maximise throughput
 					 */
 #define FST_HIGH_WATER_MARK     12	/* Point at which we flow control
-					 * network layer */
+					 * network layer
+					 */
 #define FST_LOW_WATER_MARK      8	/* Point at which we remove flow
-					 * control from network layer */
+					 * control from network layer
+					 */
 #define FST_MAX_MTU             8000	/* Huge but possible */
 #define FST_DEF_MTU             1500	/* Common sane value */
 
@@ -70,8 +71,7 @@ MODULE_LICENSE("GPL");
 #define ARPHRD_MYTYPE   ARPHRD_HDLC	/* Cisco-HDLC (keepalives etc) */
 #endif
 
-/*
- * Modules parameters and associated variables
+/* Modules parameters and associated variables
  */
 static int fst_txq_low = FST_LOW_WATER_MARK;
 static int fst_txq_high = FST_HIGH_WATER_MARK;
@@ -105,9 +105,11 @@ module_param_array(fst_excluded_list, int, NULL, 0);
 #define FST_MEMSIZE 0x100000	/* Size of card memory (1Mb) */
 
 #define SMC_BASE 0x00002000L	/* Base offset of the shared memory window main
-				 * configuration structure */
+				 * configuration structure
+				 */
 #define BFM_BASE 0x00010000L	/* Base offset of the shared memory window DMA
-				 * buffers */
+				 * buffers
+				 */
 
 #define LEN_TX_BUFFER 8192	/* Size of packet buffers */
 #define LEN_RX_BUFFER 8192
@@ -377,8 +379,7 @@ struct fst_shared {
 #define INTCSR_9054     0x68	/* Interrupt control/status register */
 
 /* 9054 DMA Registers */
-/*
- * Note that we will be using DMA Channel 0 for copying rx data
+/* Note that we will be using DMA Channel 0 for copying rx data
  * and Channel 1 for copying tx data
  */
 #define DMAMODE0        0x80
@@ -431,8 +432,7 @@ struct fst_port_info {
 	int txpos;		/* Next Tx buffer to use */
 	int txipos;		/* Next Tx buffer to check for free */
 	int start;		/* Indication of start/stop to network */
-	/*
-	 * A sixteen entry transmit queue
+	/* A sixteen entry transmit queue
 	 */
 	int txqs;		/* index to get next buffer to tx */
 	int txqe;		/* index to queue next packet */
@@ -479,8 +479,7 @@ struct fst_card_info {
 #define dev_to_port(D)  (dev_to_hdlc(D)->priv)
 #define port_to_dev(P)  ((P)->dev)
 
-/*
- *      Shared memory window access macros
+/*      Shared memory window access macros
  *
  *      We have a nice memory based structure above, which could be directly
  *      mapped on i386 but might not work on other architectures unless we use
@@ -498,8 +497,7 @@ struct fst_card_info {
 #define FST_WRW(C,E,W)  writew ((W), (C)->mem + WIN_OFFSET(E))
 #define FST_WRL(C,E,L)  writel ((L), (C)->mem + WIN_OFFSET(E))
 
-/*
- *      Debug support
+/*      Debug support
  */
 #if FST_DEBUG
 
@@ -523,8 +521,7 @@ do {								\
 } while (0)
 #endif
 
-/*
- *      PCI ID lookup table
+/*      PCI ID lookup table
  */
 static const struct pci_device_id fst_pci_dev_id[] = {
 	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T2P, PCI_ANY_ID, 
@@ -552,8 +549,7 @@ static const struct pci_device_id fst_pci_dev_id[] = {
 
 MODULE_DEVICE_TABLE(pci, fst_pci_dev_id);
 
-/*
- *      Device Driver Work Queues
+/*      Device Driver Work Queues
  *
  *      So that we don't spend too much time processing events in the 
  *      Interrupt Service routine, we will declare a work queue per Card 
@@ -582,13 +578,11 @@ fst_q_work_item(u64 *queue, int card_index)
 	unsigned long flags;
 	u64 mask;
 
-	/*
-	 * Grab the queue exclusively
+	/* Grab the queue exclusively
 	 */
 	spin_lock_irqsave(&fst_work_q_lock, flags);
 
-	/*
-	 * Making an entry in the queue is simply a matter of setting
+	/* Making an entry in the queue is simply a matter of setting
 	 * a bit for the card indicating that there is work to do in the
 	 * bottom half for the card.  Note the limitation of 64 cards.
 	 * That ought to be enough
@@ -605,8 +599,7 @@ fst_process_tx_work_q(struct tasklet_struct *unused)
 	u64 work_txq;
 	int i;
 
-	/*
-	 * Grab the queue exclusively
+	/* Grab the queue exclusively
 	 */
 	dbg(DBG_TX, "fst_process_tx_work_q\n");
 	spin_lock_irqsave(&fst_work_q_lock, flags);
@@ -614,8 +607,7 @@ fst_process_tx_work_q(struct tasklet_struct *unused)
 	fst_work_txq = 0;
 	spin_unlock_irqrestore(&fst_work_q_lock, flags);
 
-	/*
-	 * Call the bottom half for each card with work waiting
+	/* Call the bottom half for each card with work waiting
 	 */
 	for (i = 0; i < FST_MAX_CARDS; i++) {
 		if (work_txq & 0x01) {
@@ -635,8 +627,7 @@ fst_process_int_work_q(struct tasklet_struct *unused)
 	u64 work_intq;
 	int i;
 
-	/*
-	 * Grab the queue exclusively
+	/* Grab the queue exclusively
 	 */
 	dbg(DBG_INTR, "fst_process_int_work_q\n");
 	spin_lock_irqsave(&fst_work_q_lock, flags);
@@ -644,8 +635,7 @@ fst_process_int_work_q(struct tasklet_struct *unused)
 	fst_work_intq = 0;
 	spin_unlock_irqrestore(&fst_work_q_lock, flags);
 
-	/*
-	 * Call the bottom half for each card with work waiting
+	/* Call the bottom half for each card with work waiting
 	 */
 	for (i = 0; i < FST_MAX_CARDS; i++) {
 		if (work_intq & 0x01) {
@@ -682,19 +672,16 @@ fst_cpureset(struct fst_card_info *card)
 			dbg(DBG_ASS,
 			    "Error in reading interrupt line register\n");
 		}
-		/*
-		 * Assert PLX software reset and Am186 hardware reset
+		/* Assert PLX software reset and Am186 hardware reset
 		 * and then deassert the PLX software reset but 186 still in reset
 		 */
 		outw(0x440f, card->pci_conf + CNTRL_9054 + 2);
 		outw(0x040f, card->pci_conf + CNTRL_9054 + 2);
-		/*
-		 * We are delaying here to allow the 9054 to reset itself
+		/* We are delaying here to allow the 9054 to reset itself
 		 */
 		usleep_range(10, 20);
 		outw(0x240f, card->pci_conf + CNTRL_9054 + 2);
-		/*
-		 * We are delaying here to allow the 9054 to reload its eeprom
+		/* We are delaying here to allow the 9054 to reload its eeprom
 		 */
 		usleep_range(10, 20);
 		outw(0x040f, card->pci_conf + CNTRL_9054 + 2);
@@ -719,13 +706,11 @@ static inline void
 fst_cpurelease(struct fst_card_info *card)
 {
 	if (card->family == FST_FAMILY_TXU) {
-		/*
-		 * Force posted writes to complete
+		/* Force posted writes to complete
 		 */
 		(void) readb(card->mem);
 
-		/*
-		 * Release LRESET DO = 1
+		/* Release LRESET DO = 1
 		 * Then release Local Hold, DO = 1
 		 */
 		outw(0x040e, card->pci_conf + CNTRL_9054 + 2);
@@ -781,8 +766,7 @@ fst_process_rx_status(int rx_status, char *name)
 	switch (rx_status) {
 	case NET_RX_SUCCESS:
 		{
-			/*
-			 * Nothing to do here
+			/* Nothing to do here
 			 */
 			break;
 		}
@@ -799,8 +783,7 @@ fst_process_rx_status(int rx_status, char *name)
 static inline void
 fst_init_dma(struct fst_card_info *card)
 {
-	/*
-	 * This is only required for the PLX 9054
+	/* This is only required for the PLX 9054
 	 */
 	if (card->family == FST_FAMILY_TXU) {
 	        pci_set_master(card->device);
@@ -818,8 +801,7 @@ fst_tx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
 {
 	struct net_device *dev = port_to_dev(port);
 
-	/*
-	 * Everything is now set, just tell the card to go
+	/* Everything is now set, just tell the card to go
 	 */
 	dbg(DBG_TX, "fst_tx_dma_complete\n");
 	FST_WRB(card, txDescrRing[port->index][txpos].bits,
@@ -829,8 +811,7 @@ fst_tx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
 	netif_trans_update(dev);
 }
 
-/*
- * Mark it for our own raw sockets interface
+/* Mark it for our own raw sockets interface
  */
 static __be16 farsync_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
@@ -873,14 +854,12 @@ fst_rx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
 		dev->stats.rx_dropped++;
 }
 
-/*
- *      Receive a frame through the DMA
+/*      Receive a frame through the DMA
  */
 static inline void
 fst_rx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 {
-	/*
-	 * This routine will setup the DMA and start it
+	/* This routine will setup the DMA and start it
 	 */
 
 	dbg(DBG_RX, "In fst_rx_dma %x %x %d\n", (u32)dma, mem, len);
@@ -893,21 +872,18 @@ fst_rx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 	outl(len, card->pci_conf + DMASIZ0);	/* for this length */
 	outl(0x00000000c, card->pci_conf + DMADPR0);	/* In this direction */
 
-	/*
-	 * We use the dmarx_in_progress flag to flag the channel as busy
+	/* We use the dmarx_in_progress flag to flag the channel as busy
 	 */
 	card->dmarx_in_progress = 1;
 	outb(0x03, card->pci_conf + DMACSR0);	/* Start the transfer */
 }
 
-/*
- *      Send a frame through the DMA
+/*      Send a frame through the DMA
  */
 static inline void
 fst_tx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 {
-	/*
-	 * This routine will setup the DMA and start it.
+	/* This routine will setup the DMA and start it.
 	 */
 
 	dbg(DBG_TX, "In fst_tx_dma %x %x %d\n", (u32)dma, mem, len);
@@ -920,8 +896,7 @@ fst_tx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 	outl(len, card->pci_conf + DMASIZ1);	/* for this length */
 	outl(0x000000004, card->pci_conf + DMADPR1);	/* In this direction */
 
-	/*
-	 * We use the dmatx_in_progress to flag the channel as busy
+	/* We use the dmatx_in_progress to flag the channel as busy
 	 */
 	card->dmatx_in_progress = 1;
 	outb(0x03, card->pci_conf + DMACSR1);	/* Start the transfer */
@@ -997,8 +972,7 @@ fst_op_lower(struct fst_port_info *port, unsigned int outputs)
 		fst_issue_cmd(port, SETV24O);
 }
 
-/*
- *      Setup port Rx buffers
+/*      Setup port Rx buffers
  */
 static void
 fst_rx_config(struct fst_port_info *port)
@@ -1025,8 +999,7 @@ fst_rx_config(struct fst_port_info *port)
 	spin_unlock_irqrestore(&card->card_lock, flags);
 }
 
-/*
- *      Setup port Tx buffers
+/*      Setup port Tx buffers
  */
 static void
 fst_tx_config(struct fst_port_info *port)
@@ -1068,16 +1041,14 @@ fst_intr_te1_alarm(struct fst_card_info *card, struct fst_port_info *port)
 	ais = FST_RDB(card, suStatus.alarmIndicationSignal);
 
 	if (los) {
-		/*
-		 * Lost the link
+		/* Lost the link
 		 */
 		if (netif_carrier_ok(port_to_dev(port))) {
 			dbg(DBG_INTR, "Net carrier off\n");
 			netif_carrier_off(port_to_dev(port));
 		}
 	} else {
-		/*
-		 * Link available
+		/* Link available
 		 */
 		if (!netif_carrier_ok(port_to_dev(port))) {
 			dbg(DBG_INTR, "Net carrier on\n");
@@ -1131,8 +1102,7 @@ fst_log_rx_error(struct fst_card_info *card, struct fst_port_info *port,
 {
 	struct net_device *dev = port_to_dev(port);
 
-	/*
-	 * Increment the appropriate error counter
+	/* Increment the appropriate error counter
 	 */
 	dev->stats.rx_errors++;
 	if (dmabits & RX_OFLO) {
@@ -1167,8 +1137,7 @@ fst_recover_rx_error(struct fst_card_info *card, struct fst_port_info *port,
 	int pi;
 
 	pi = port->index;
-	/* 
-	 * Discard buffer descriptors until we see the start of the
+	/* Discard buffer descriptors until we see the start of the
 	 * next frame.  Note that for long frames this could be in
 	 * a subsequent interrupt. 
 	 */
@@ -1226,8 +1195,7 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 	/* Discard the CRC */
 	len -= 2;
 	if (len == 0) {
-		/*
-		 * This seems to happen on the TE1 interface sometimes
+		/* This seems to happen on the TE1 interface sometimes
 		 * so throw the frame away and log the event.
 		 */
 		pr_err("Frame received with 0 length. Card %d Port %d\n",
@@ -1266,8 +1234,7 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 		return;
 	}
 
-	/*
-	 * We know the length we need to receive, len.
+	/* We know the length we need to receive, len.
 	 * It's not worth using the DMA for reads of less than
 	 * FST_MIN_DMA_LEN
 	 */
@@ -1310,8 +1277,7 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 	port->rxpos = rxp;
 }
 
-/*
- *      The bottom halfs to the ISR
+/*      The bottom half to the ISR
  *
  */
 
@@ -1325,8 +1291,7 @@ do_bottom_half_tx(struct fst_card_info *card)
 	unsigned long flags;
 	struct net_device *dev;
 
-	/*
-	 *  Find a free buffer for the transmit
+	/*  Find a free buffer for the transmit
 	 *  Step through each port on this card
 	 */
 
@@ -1339,24 +1304,21 @@ do_bottom_half_tx(struct fst_card_info *card)
 		while (!(FST_RDB(card, txDescrRing[pi][port->txpos].bits) &
 			 DMA_OWN) &&
 		       !(card->dmatx_in_progress)) {
-			/*
-			 * There doesn't seem to be a txdone event per-se
+			/* There doesn't seem to be a txdone event per-se
 			 * We seem to have to deduce it, by checking the DMA_OWN
 			 * bit on the next buffer we think we can use
 			 */
 			spin_lock_irqsave(&card->card_lock, flags);
 			txq_length = port->txqe - port->txqs;
 			if (txq_length < 0) {
-				/*
-				 * This is the case where one has wrapped and the
+				/* This is the case where one has wrapped and the
 				 * maths gives us a negative number
 				 */
 				txq_length = txq_length + FST_TXQ_DEPTH;
 			}
 			spin_unlock_irqrestore(&card->card_lock, flags);
 			if (txq_length > 0) {
-				/*
-				 * There is something to send
+				/* There is something to send
 				 */
 				spin_lock_irqsave(&card->card_lock, flags);
 				skb = port->txq[port->txqs];
@@ -1365,8 +1327,7 @@ do_bottom_half_tx(struct fst_card_info *card)
 					port->txqs = 0;
 				}
 				spin_unlock_irqrestore(&card->card_lock, flags);
-				/*
-				 * copy the data and set the required indicators on the
+				/* copy the data and set the required indicators on the
 				 * card.
 				 */
 				FST_WRW(card, txDescrRing[pi][port->txpos].bcnt,
@@ -1401,8 +1362,7 @@ do_bottom_half_tx(struct fst_card_info *card)
 				}
 				if (++port->txpos >= NUM_TX_BUFFER)
 					port->txpos = 0;
-				/*
-				 * If we have flow control on, can we now release it?
+				/* If we have flow control on, can we now release it?
 				 */
 				if (port->start) {
 					if (txq_length < fst_txq_low) {
@@ -1413,8 +1373,7 @@ do_bottom_half_tx(struct fst_card_info *card)
 				}
 				dev_kfree_skb(skb);
 			} else {
-				/*
-				 * Nothing to send so break out of the while loop
+				/* Nothing to send so break out of the while loop
 				 */
 				break;
 			}
@@ -1438,8 +1397,7 @@ do_bottom_half_rx(struct fst_card_info *card)
 		while (!(FST_RDB(card, rxDescrRing[pi][port->rxpos].bits)
 			 & DMA_OWN) && !(card->dmarx_in_progress)) {
 			if (rx_count > fst_max_reads) {
-				/*
-				 * Don't spend forever in receive processing
+				/* Don't spend forever in receive processing
 				 * Schedule another event
 				 */
 				fst_q_work_item(&fst_work_intq, card->card_no);
@@ -1452,8 +1410,7 @@ do_bottom_half_rx(struct fst_card_info *card)
 	}
 }
 
-/*
- *      The interrupt service routine
+/*      The interrupt service routine
  *      Dev_id is our fst_card_info pointer
  */
 static irqreturn_t
@@ -1468,8 +1425,7 @@ fst_intr(int dummy, void *dev_id)
 	unsigned int do_card_interrupt;
 	unsigned int int_retry_count;
 
-	/*
-	 * Check to see if the interrupt was for this card
+	/* Check to see if the interrupt was for this card
 	 * return if not
 	 * Note that the call to clear the interrupt is important
 	 */
@@ -1478,8 +1434,7 @@ fst_intr(int dummy, void *dev_id)
 		pr_err("Interrupt received for card %d in a non running state (%d)\n",
 		       card->card_no, card->state);
 
-		/* 
-		 * It is possible to really be running, i.e. we have re-loaded
+		/* It is possible to really be running, i.e. we have re-loaded
 		 * a running card
 		 * Clear and reprime the interrupt source 
 		 */
@@ -1490,8 +1445,7 @@ fst_intr(int dummy, void *dev_id)
 	/* Clear and reprime the interrupt source */
 	fst_clear_intr(card);
 
-	/*
-	 * Is the interrupt for this card (handshake == 1)
+	/* Is the interrupt for this card (handshake == 1)
 	 */
 	do_card_interrupt = 0;
 	if (FST_RDB(card, interruptHandshake) == 1) {
@@ -1500,13 +1454,11 @@ fst_intr(int dummy, void *dev_id)
 		FST_WRB(card, interruptHandshake, 0xEE);
 	}
 	if (card->family == FST_FAMILY_TXU) {
-		/*
-		 * Is it a DMA Interrupt
+		/* Is it a DMA Interrupt
 		 */
 		dma_intcsr = inl(card->pci_conf + INTCSR_9054);
 		if (dma_intcsr & 0x00200000) {
-			/*
-			 * DMA Channel 0 (Rx transfer complete)
+			/* DMA Channel 0 (Rx transfer complete)
 			 */
 			dbg(DBG_RX, "DMA Rx xfer complete\n");
 			outb(0x8, card->pci_conf + DMACSR0);
@@ -1517,8 +1469,7 @@ fst_intr(int dummy, void *dev_id)
 			do_card_interrupt += FST_RX_DMA_INT;
 		}
 		if (dma_intcsr & 0x00400000) {
-			/*
-			 * DMA Channel 1 (Tx transfer complete)
+			/* DMA Channel 1 (Tx transfer complete)
 			 */
 			dbg(DBG_TX, "DMA Tx xfer complete\n");
 			outb(0x8, card->pci_conf + DMACSR1);
@@ -1529,8 +1480,7 @@ fst_intr(int dummy, void *dev_id)
 		}
 	}
 
-	/*
-	 * Have we been missing Interrupts
+	/* Have we been missing Interrupts
 	 */
 	int_retry_count = FST_RDL(card, interruptRetryCount);
 	if (int_retry_count) {
@@ -1788,27 +1738,23 @@ gather_conf_info(struct fst_card_info *card, struct fst_port_info *port,
 	info->cardMode = FST_RDW(card, cardMode);
 	info->smcFirmwareVersion = FST_RDL(card, smcFirmwareVersion);
 
-	/*
-	 * The T2U can report cable presence for both A or B
+	/* The T2U can report cable presence for both A or B
 	 * in bits 0 and 1 of cableStatus.  See which port we are and 
 	 * do the mapping.
 	 */
 	if (card->family == FST_FAMILY_TXU) {
 		if (port->index == 0) {
-			/*
-			 * Port A
+			/* Port A
 			 */
 			info->cableStatus = info->cableStatus & 1;
 		} else {
-			/*
-			 * Port B
+			/* Port B
 			 */
 			info->cableStatus = info->cableStatus >> 1;
 			info->cableStatus = info->cableStatus & 1;
 		}
 	}
-	/*
-	 * Some additional bits if we are TE1
+	/* Some additional bits if we are TE1
 	 */
 	if (card->type == FST_TYPE_TE1) {
 		info->lineSpeed = FST_RDL(card, suConfig.dataRate);
@@ -2072,9 +2018,7 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return 0;
 
 	case FSTSETCONF:
-
-		/*
-		 * Most of the settings have been moved to the generic ioctls
+		/* Most of the settings have been moved to the generic ioctls
 		 * this just covers debug and board ident now
 		 */
 
@@ -2230,8 +2174,7 @@ fst_close(struct net_device *dev)
 static int
 fst_attach(struct net_device *dev, unsigned short encoding, unsigned short parity)
 {
-	/*
-	 * Setting currently fixed in FarSync card so we check and forget
+	/* Setting currently fixed in FarSync card so we check and forget
 	 */
 	if (encoding != ENCODING_NRZ || parity != PARITY_CRC16_PR1_CCITT)
 		return -EINVAL;
@@ -2289,24 +2232,21 @@ fst_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
-	/*
-	 * We are always going to queue the packet
+	/* We are always going to queue the packet
 	 * so that the bottom half is the only place we tx from
 	 * Check there is room in the port txq
 	 */
 	spin_lock_irqsave(&card->card_lock, flags);
 	txq_length = port->txqe - port->txqs;
 	if (txq_length < 0) {
-		/*
-		 * This is the case where the next free has wrapped but the
+		/* This is the case where the next free has wrapped but the
 		 * last used hasn't
 		 */
 		txq_length = txq_length + FST_TXQ_DEPTH;
 	}
 	spin_unlock_irqrestore(&card->card_lock, flags);
 	if (txq_length > fst_txq_high) {
-		/*
-		 * We have got enough buffers in the pipeline.  Ask the network
+		/* We have got enough buffers in the pipeline.  Ask the network
 		 * layer to stop sending frames down
 		 */
 		netif_stop_queue(dev);
@@ -2314,8 +2254,7 @@ fst_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	if (txq_length == FST_TXQ_DEPTH - 1) {
-		/*
-		 * This shouldn't have happened but such is life
+		/* This shouldn't have happened but such is life
 		 */
 		dev_kfree_skb(skb);
 		dev->stats.tx_errors++;
@@ -2324,8 +2263,7 @@ fst_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
-	/*
-	 * queue the buffer
+	/* queue the buffer
 	 */
 	spin_lock_irqsave(&card->card_lock, flags);
 	port->txq[port->txqe] = skb;
@@ -2341,8 +2279,7 @@ fst_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-/*
- *      Card setup having checked hardware resources.
+/*      Card setup having checked hardware resources.
  *      Should be pretty bizarre if we get an error here (kernel memory
  *      exhaustion is one possibility). If we do see a problem we report it
  *      via a printk and leave the corresponding interface and all that follow
@@ -2394,8 +2331,7 @@ static const struct net_device_ops fst_ops = {
 	.ndo_tx_timeout = fst_tx_timeout,
 };
 
-/*
- *      Initialise card when detected.
+/*      Initialise card when detected.
  *      Returns 0 to indicate success, or errno otherwise.
  */
 static int
@@ -2412,13 +2348,11 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #if FST_DEBUG
 	dbg(DBG_ASS, "The value of debug mask is %x\n", fst_debug_mask);
 #endif
-	/*
-	 * We are going to be clever and allow certain cards not to be
+	/* We are going to be clever and allow certain cards not to be
 	 * configured.  An exclude list can be provided in /etc/modules.conf
 	 */
 	if (fst_excluded_cards != 0) {
-		/*
-		 * There are cards to exclude
+		/* There are cards to exclude
 		 *
 		 */
 		for (i = 0; i < fst_excluded_cards; i++) {
@@ -2555,8 +2489,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto init_card_fail;
 	if (card->family == FST_FAMILY_TXU) {
-		/*
-		 * Allocate a dma buffer for transmit and receives
+		/* Allocate a dma buffer for transmit and receives
 		 */
 		card->rx_dma_handle_host =
 		    dma_alloc_coherent(&card->device->dev, FST_MAX_MTU,
@@ -2604,8 +2537,7 @@ enable_fail:
 	return err;
 }
 
-/*
- *      Cleanup and close down a card
+/*      Cleanup and close down a card
  */
 static void
 fst_remove_one(struct pci_dev *pdev)
@@ -2628,8 +2560,7 @@ fst_remove_one(struct pci_dev *pdev)
 	iounmap(card->mem);
 	pci_release_regions(pdev);
 	if (card->family == FST_FAMILY_TXU) {
-		/*
-		 * Free dma buffers
+		/* Free dma buffers
 		 */
 		dma_free_coherent(&card->device->dev, FST_MAX_MTU,
 				  card->rx_dma_handle_host,

From d70711da30f0bec1286aab6f01f4a0ac6da1db79 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:33 +0800
Subject: [PATCH 1174/3292] net: farsync: remove trailing whitespaces

This patch removes trailing whitespaces.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index f8c755845ae0..7653ff078c9c 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -524,25 +524,25 @@ do {								\
 /*      PCI ID lookup table
  */
 static const struct pci_device_id fst_pci_dev_id[] = {
-	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T2P, PCI_ANY_ID, 
+	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T2P, PCI_ANY_ID,
 	 PCI_ANY_ID, 0, 0, FST_TYPE_T2P},
 
-	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T4P, PCI_ANY_ID, 
+	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T4P, PCI_ANY_ID,
 	 PCI_ANY_ID, 0, 0, FST_TYPE_T4P},
 
-	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T1U, PCI_ANY_ID, 
+	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T1U, PCI_ANY_ID,
 	 PCI_ANY_ID, 0, 0, FST_TYPE_T1U},
 
-	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T2U, PCI_ANY_ID, 
+	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T2U, PCI_ANY_ID,
 	 PCI_ANY_ID, 0, 0, FST_TYPE_T2U},
 
-	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T4U, PCI_ANY_ID, 
+	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_T4U, PCI_ANY_ID,
 	 PCI_ANY_ID, 0, 0, FST_TYPE_T4U},
 
-	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_TE1, PCI_ANY_ID, 
+	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_TE1, PCI_ANY_ID,
 	 PCI_ANY_ID, 0, 0, FST_TYPE_TE1},
 
-	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_TE1C, PCI_ANY_ID, 
+	{PCI_VENDOR_ID_FARSITE, PCI_DEVICE_ID_FARSITE_TE1C, PCI_ANY_ID,
 	 PCI_ANY_ID, 0, 0, FST_TYPE_TE1},
 	{0,}			/* End */
 };
@@ -551,11 +551,11 @@ MODULE_DEVICE_TABLE(pci, fst_pci_dev_id);
 
 /*      Device Driver Work Queues
  *
- *      So that we don't spend too much time processing events in the 
- *      Interrupt Service routine, we will declare a work queue per Card 
+ *      So that we don't spend too much time processing events in the
+ *      Interrupt Service routine, we will declare a work queue per Card
  *      and make the ISR schedule a task in the queue for later execution.
  *      In the 2.4 Kernel we used to use the immediate queue for BH's
- *      Now that they are gone, tasklets seem to be much better than work 
+ *      Now that they are gone, tasklets seem to be much better than work
  *      queues.
  */
 
@@ -1139,7 +1139,7 @@ fst_recover_rx_error(struct fst_card_info *card, struct fst_port_info *port,
 	pi = port->index;
 	/* Discard buffer descriptors until we see the start of the
 	 * next frame.  Note that for long frames this could be in
-	 * a subsequent interrupt. 
+	 * a subsequent interrupt.
 	 */
 	i = 0;
 	while ((dmabits & (DMA_OWN | RX_STP)) == 0) {
@@ -1436,7 +1436,7 @@ fst_intr(int dummy, void *dev_id)
 
 		/* It is possible to really be running, i.e. we have re-loaded
 		 * a running card
-		 * Clear and reprime the interrupt source 
+		 * Clear and reprime the interrupt source
 		 */
 		fst_clear_intr(card);
 		return IRQ_HANDLED;
@@ -1616,8 +1616,8 @@ set_conf_from_info(struct fst_card_info *card, struct fst_port_info *port,
 	int err;
 	unsigned char my_framing;
 
-	/* Set things according to the user set valid flags 
-	 * Several of the old options have been invalidated/replaced by the 
+	/* Set things according to the user set valid flags
+	 * Several of the old options have been invalidated/replaced by the
 	 * generic hdlc package.
 	 */
 	err = 0;
@@ -1739,7 +1739,7 @@ gather_conf_info(struct fst_card_info *card, struct fst_port_info *port,
 	info->smcFirmwareVersion = FST_RDL(card, smcFirmwareVersion);
 
 	/* The T2U can report cable presence for both A or B
-	 * in bits 0 and 1 of cableStatus.  See which port we are and 
+	 * in bits 0 and 1 of cableStatus.  See which port we are and
 	 * do the mapping.
 	 */
 	if (card->family == FST_FAMILY_TXU) {

From 3a950181f6f58b13ee4475cafe45170ed738c2af Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:34 +0800
Subject: [PATCH 1175/3292] net: farsync: code indent use tabs where possible

Code indent should use tabs where possible.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 7653ff078c9c..075f50d4ea99 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -422,7 +422,7 @@ struct buf_window {
 /*      Per port (line or channel) information
  */
 struct fst_port_info {
-        struct net_device *dev; /* Device struct - must be first */
+	struct net_device *dev; /* Device struct - must be first */
 	struct fst_card_info *card;	/* Card we're associated with */
 	int index;		/* Port index on the card */
 	int hwif;		/* Line hardware (lineInterface copy) */
@@ -786,7 +786,7 @@ fst_init_dma(struct fst_card_info *card)
 	/* This is only required for the PLX 9054
 	 */
 	if (card->family == FST_FAMILY_TXU) {
-	        pci_set_master(card->device);
+		pci_set_master(card->device);
 		outl(0x00020441, card->pci_conf + DMAMODE0);
 		outl(0x00020441, card->pci_conf + DMAMODE1);
 		outl(0x0, card->pci_conf + DMATHR);
@@ -1561,7 +1561,7 @@ fst_intr(int dummy, void *dev_id)
 			rdidx = 0;
 	}
 	FST_WRB(card, interruptEvent.rdindex, rdidx);
-        return IRQ_HANDLED;
+	return IRQ_HANDLED;
 }
 
 /*      Check that the shared memory configuration is one that we can handle
@@ -2129,7 +2129,7 @@ fst_open(struct net_device *dev)
 
 	port = dev_to_port(dev);
 	if (!try_module_get(THIS_MODULE))
-          return -EBUSY;
+		return -EBUSY;
 
 	if (port->mode != FST_RAW) {
 		err = hdlc_open(dev);
@@ -2421,9 +2421,9 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 				(ent->driver_data == FST_TYPE_T2U)) ? 2 : 4;
 
 	card->state = FST_UNINIT;
-        spin_lock_init ( &card->card_lock );
+	spin_lock_init(&card->card_lock);
 
-        for ( i = 0 ; i < card->nports ; i++ ) {
+	for (i = 0; i < card->nports; i++) {
 		struct net_device *dev = alloc_hdlcdev(&card->ports[i]);
 		hdlc_device *hdlc;
 
@@ -2435,29 +2435,29 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			goto hdlcdev_fail;
 		}
 		card->ports[i].dev    = dev;
-                card->ports[i].card   = card;
-                card->ports[i].index  = i;
-                card->ports[i].run    = 0;
+		card->ports[i].card   = card;
+		card->ports[i].index  = i;
+		card->ports[i].run    = 0;
 
 		hdlc = dev_to_hdlc(dev);
 
-                /* Fill in the net device info */
+		/* Fill in the net device info */
 		/* Since this is a PCI setup this is purely
 		 * informational. Give them the buffer addresses
 		 * and basic card I/O.
 		 */
-                dev->mem_start   = card->phys_mem
-                                 + BUF_OFFSET ( txBuffer[i][0][0]);
-                dev->mem_end     = card->phys_mem
-                                 + BUF_OFFSET ( txBuffer[i][NUM_TX_BUFFER - 1][LEN_RX_BUFFER - 1]);
-                dev->base_addr   = card->pci_conf;
-                dev->irq         = card->irq;
+		dev->mem_start   = card->phys_mem
+				+ BUF_OFFSET(txBuffer[i][0][0]);
+		dev->mem_end     = card->phys_mem
+				+ BUF_OFFSET(txBuffer[i][NUM_TX_BUFFER - 1][LEN_RX_BUFFER - 1]);
+		dev->base_addr   = card->pci_conf;
+		dev->irq         = card->irq;
 
 		dev->netdev_ops = &fst_ops;
 		dev->tx_queue_len = FST_TX_QUEUE_LEN;
 		dev->watchdog_timeo = FST_TX_TIMEOUT;
-                hdlc->attach = fst_attach;
-                hdlc->xmit   = fst_start_xmit;
+		hdlc->attach = fst_attach;
+		hdlc->xmit   = fst_start_xmit;
 	}
 
 	card->device = pdev;

From 7619ab161892edb6826504f8549ee78e568cf793 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:35 +0800
Subject: [PATCH 1176/3292] net: farsync: fix the code style issue about macros

Macros with complex values should be enclosed in parentheses.
space required after that ',' .

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 075f50d4ea99..f2cd832a4657 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -489,13 +489,13 @@ struct fst_card_info {
  */
 #define WIN_OFFSET(X)   ((long)&(((struct fst_shared *)SMC_BASE)->X))
 
-#define FST_RDB(C,E)    readb ((C)->mem + WIN_OFFSET(E))
-#define FST_RDW(C,E)    readw ((C)->mem + WIN_OFFSET(E))
-#define FST_RDL(C,E)    readl ((C)->mem + WIN_OFFSET(E))
+#define FST_RDB(C, E)    (readb((C)->mem + WIN_OFFSET(E)))
+#define FST_RDW(C, E)    (readw((C)->mem + WIN_OFFSET(E)))
+#define FST_RDL(C, E)    (readl((C)->mem + WIN_OFFSET(E)))
 
-#define FST_WRB(C,E,B)  writeb ((B), (C)->mem + WIN_OFFSET(E))
-#define FST_WRW(C,E,W)  writew ((W), (C)->mem + WIN_OFFSET(E))
-#define FST_WRL(C,E,L)  writel ((L), (C)->mem + WIN_OFFSET(E))
+#define FST_WRB(C, E, B)  (writeb((B), (C)->mem + WIN_OFFSET(E)))
+#define FST_WRW(C, E, W)  (writew((W), (C)->mem + WIN_OFFSET(E)))
+#define FST_WRL(C, E, L)  (writel((L), (C)->mem + WIN_OFFSET(E)))
 
 /*      Debug support
  */

From 37947a9be3d1a5d551e34f56653c5ccf86814cc3 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:36 +0800
Subject: [PATCH 1177/3292] net: farsync: add some required spaces

Add spaces required around that '=' and '*'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index f2cd832a4657..8f39be4f8ceb 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -63,7 +63,7 @@ MODULE_LICENSE("GPL");
 #define FST_MAX_MTU             8000	/* Huge but possible */
 #define FST_DEF_MTU             1500	/* Common sane value */
 
-#define FST_TX_TIMEOUT          (2*HZ)
+#define FST_TX_TIMEOUT          (2 * HZ)
 
 #ifdef ARPHRD_RAWHDLC
 #define ARPHRD_MYTYPE   ARPHRD_RAWHDLC	/* Raw frames */
@@ -1144,7 +1144,7 @@ fst_recover_rx_error(struct fst_card_info *card, struct fst_port_info *port,
 	i = 0;
 	while ((dmabits & (DMA_OWN | RX_STP)) == 0) {
 		FST_WRB(card, rxDescrRing[pi][rxp].bits, DMA_OWN);
-		rxp = (rxp+1) % NUM_RX_BUFFER;
+		rxp = (rxp + 1) % NUM_RX_BUFFER;
 		if (++i > NUM_RX_BUFFER) {
 			dbg(DBG_ASS, "intr_rx: Discarding more bufs"
 			    " than we have\n");
@@ -1158,7 +1158,7 @@ fst_recover_rx_error(struct fst_card_info *card, struct fst_port_info *port,
 	/* Discard the terminal buffer */
 	if (!(dmabits & DMA_OWN)) {
 		FST_WRB(card, rxDescrRing[pi][rxp].bits, DMA_OWN);
-		rxp = (rxp+1) % NUM_RX_BUFFER;
+		rxp = (rxp + 1) % NUM_RX_BUFFER;
 	}
 	port->rxpos = rxp;
 	return;
@@ -1203,7 +1203,7 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 		/* Return descriptor to card */
 		FST_WRB(card, rxDescrRing[pi][rxp].bits, DMA_OWN);
 
-		rxp = (rxp+1) % NUM_RX_BUFFER;
+		rxp = (rxp + 1) % NUM_RX_BUFFER;
 		port->rxpos = rxp;
 		return;
 	}
@@ -1229,7 +1229,7 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 		/* Return descriptor to card */
 		FST_WRB(card, rxDescrRing[pi][rxp].bits, DMA_OWN);
 
-		rxp = (rxp+1) % NUM_RX_BUFFER;
+		rxp = (rxp + 1) % NUM_RX_BUFFER;
 		port->rxpos = rxp;
 		return;
 	}
@@ -1273,7 +1273,7 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 		dbg(DBG_ASS, "About to increment rxpos by more than 1\n");
 		dbg(DBG_ASS, "rxp = %d rxpos = %d\n", rxp, port->rxpos);
 	}
-	rxp = (rxp+1) % NUM_RX_BUFFER;
+	rxp = (rxp + 1) % NUM_RX_BUFFER;
 	port->rxpos = rxp;
 }
 

From fa8d10b54760c4869e7d3d6b57e4e5a694f0d112 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:37 +0800
Subject: [PATCH 1178/3292] net: farsync: remove redundant braces {}

This patch removes redundant braces {}, to fix the
checkpatch.pl warning:
"braces {} are not necessary for single statement blocks".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 86 ++++++++++++++++-----------------------
 1 file changed, 35 insertions(+), 51 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 8f39be4f8ceb..8b96f35a2cc7 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -739,11 +739,10 @@ fst_clear_intr(struct fst_card_info *card)
 static inline void
 fst_enable_intr(struct fst_card_info *card)
 {
-	if (card->family == FST_FAMILY_TXU) {
+	if (card->family == FST_FAMILY_TXU)
 		outl(0x0f0c0900, card->pci_conf + INTCSR_9054);
-	} else {
+	else
 		outw(0x0543, card->pci_conf + INTCSR_9052);
-	}
 }
 
 /*      Disable card interrupts
@@ -751,11 +750,10 @@ fst_enable_intr(struct fst_card_info *card)
 static inline void
 fst_disable_intr(struct fst_card_info *card)
 {
-	if (card->family == FST_FAMILY_TXU) {
+	if (card->family == FST_FAMILY_TXU)
 		outl(0x00000000, card->pci_conf + INTCSR_9054);
-	} else {
+	else
 		outw(0x0000, card->pci_conf + INTCSR_9052);
-	}
 }
 
 /*      Process the result of trying to pass a received frame up the stack
@@ -863,9 +861,8 @@ fst_rx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 	 */
 
 	dbg(DBG_RX, "In fst_rx_dma %x %x %d\n", (u32)dma, mem, len);
-	if (card->dmarx_in_progress) {
+	if (card->dmarx_in_progress)
 		dbg(DBG_ASS, "In fst_rx_dma while dma in progress\n");
-	}
 
 	outl(dma, card->pci_conf + DMAPADR0);	/* Copy to here */
 	outl(mem, card->pci_conf + DMALADR0);	/* from here */
@@ -887,9 +884,8 @@ fst_tx_dma(struct fst_card_info *card, dma_addr_t dma, u32 mem, int len)
 	 */
 
 	dbg(DBG_TX, "In fst_tx_dma %x %x %d\n", (u32)dma, mem, len);
-	if (card->dmatx_in_progress) {
+	if (card->dmatx_in_progress)
 		dbg(DBG_ASS, "In fst_tx_dma while dma in progress\n");
-	}
 
 	outl(dma, card->pci_conf + DMAPADR1);	/* Copy from here */
 	outl(mem, card->pci_conf + DMALADR1);	/* to here */
@@ -932,12 +928,11 @@ fst_issue_cmd(struct fst_port_info *port, unsigned short cmd)
 
 		mbval = FST_RDW(card, portMailbox[port->index][0]);
 	}
-	if (safety > 0) {
+	if (safety > 0)
 		dbg(DBG_CMD, "Mailbox clear after %d jiffies\n", safety);
-	}
-	if (mbval == NAK) {
+
+	if (mbval == NAK)
 		dbg(DBG_CMD, "issue_cmd: previous command was NAK'd\n");
-	}
 
 	FST_WRW(card, portMailbox[port->index][0], cmd);
 
@@ -1186,9 +1181,8 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 		    pi, rxp);
 		return;
 	}
-	if (card->dmarx_in_progress) {
+	if (card->dmarx_in_progress)
 		return;
-	}
 
 	/* Get buffer length */
 	len = FST_RDW(card, rxDescrRing[pi][rxp].mcnt);
@@ -1323,9 +1317,9 @@ do_bottom_half_tx(struct fst_card_info *card)
 				spin_lock_irqsave(&card->card_lock, flags);
 				skb = port->txq[port->txqs];
 				port->txqs++;
-				if (port->txqs == FST_TXQ_DEPTH) {
+				if (port->txqs == FST_TXQ_DEPTH)
 					port->txqs = 0;
-				}
+
 				spin_unlock_irqrestore(&card->card_lock, flags);
 				/* copy the data and set the required indicators on the
 				 * card.
@@ -1489,9 +1483,8 @@ fst_intr(int dummy, void *dev_id)
 		FST_WRL(card, interruptRetryCount, 0);
 	}
 
-	if (!do_card_interrupt) {
+	if (!do_card_interrupt)
 		return IRQ_HANDLED;
-	}
 
 	/* Scehdule the bottom half of the ISR */
 	fst_q_work_item(&fst_work_intq, card->card_no);
@@ -1691,9 +1684,8 @@ set_conf_from_info(struct fst_card_info *card, struct fst_port_info *port,
 #endif
 	}
 #if FST_DEBUG
-	if (info->valid & FSTVAL_DEBUG) {
+	if (info->valid & FSTVAL_DEBUG)
 		fst_debug_mask = info->debug;
-	}
 #endif
 
 	return err;
@@ -1798,14 +1790,12 @@ fst_set_iface(struct fst_card_info *card, struct fst_port_info *port,
 	sync_serial_settings sync;
 	int i;
 
-	if (ifr->ifr_settings.size != sizeof (sync)) {
+	if (ifr->ifr_settings.size != sizeof(sync))
 		return -ENOMEM;
-	}
 
 	if (copy_from_user
-	    (&sync, ifr->ifr_settings.ifs_ifsu.sync, sizeof (sync))) {
+	    (&sync, ifr->ifr_settings.ifs_ifsu.sync, sizeof(sync)))
 		return -EFAULT;
-	}
 
 	if (sync.loopback)
 		return -EINVAL;
@@ -1898,12 +1888,11 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
 		ifr->ifr_settings.type = IF_IFACE_X21;
 		break;
 	}
-	if (ifr->ifr_settings.size == 0) {
+	if (ifr->ifr_settings.size == 0)
 		return 0;	/* only type requested */
-	}
-	if (ifr->ifr_settings.size < sizeof (sync)) {
+
+	if (ifr->ifr_settings.size < sizeof(sync))
 		return -ENOMEM;
-	}
 
 	i = port->index;
 	memset(&sync, 0, sizeof(sync));
@@ -1913,9 +1902,8 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
 	    INTCLK ? CLOCK_INT : CLOCK_EXT;
 	sync.loopback = 0;
 
-	if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &sync, sizeof (sync))) {
+	if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &sync, sizeof(sync)))
 		return -EFAULT;
-	}
 
 	ifr->ifr_settings.size = sizeof (sync);
 	return 0;
@@ -1955,21 +1943,19 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		/* First copy in the header with the length and offset of data
 		 * to write
 		 */
-		if (ifr->ifr_data == NULL) {
+		if (!ifr->ifr_data)
 			return -EINVAL;
-		}
+
 		if (copy_from_user(&wrthdr, ifr->ifr_data,
-				   sizeof (struct fstioc_write))) {
+				   sizeof(struct fstioc_write)))
 			return -EFAULT;
-		}
 
 		/* Sanity check the parameters. We don't support partial writes
 		 * when going over the top
 		 */
 		if (wrthdr.size > FST_MEMSIZE || wrthdr.offset > FST_MEMSIZE ||
-		    wrthdr.size + wrthdr.offset > FST_MEMSIZE) {
+		    wrthdr.size + wrthdr.offset > FST_MEMSIZE)
 			return -ENXIO;
-		}
 
 		/* Now copy the data to the card. */
 
@@ -1984,9 +1970,9 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		/* Writes to the memory of a card in the reset state constitute
 		 * a download
 		 */
-		if (card->state == FST_RESET) {
+		if (card->state == FST_RESET)
 			card->state = FST_DOWNLOAD;
-		}
+
 		return 0;
 
 	case FSTGETCONF:
@@ -2006,15 +1992,14 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			}
 		}
 
-		if (ifr->ifr_data == NULL) {
+		if (!ifr->ifr_data)
 			return -EINVAL;
-		}
 
 		gather_conf_info(card, port, &info);
 
-		if (copy_to_user(ifr->ifr_data, &info, sizeof (info))) {
+		if (copy_to_user(ifr->ifr_data, &info, sizeof(info)))
 			return -EFAULT;
-		}
+
 		return 0;
 
 	case FSTSETCONF:
@@ -2027,9 +2012,8 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			       card->card_no, card->state);
 			return -EIO;
 		}
-		if (copy_from_user(&info, ifr->ifr_data, sizeof (info))) {
+		if (copy_from_user(&info, ifr->ifr_data, sizeof(info)))
 			return -EFAULT;
-		}
 
 		return set_conf_from_info(card, port, &info);
 
@@ -2164,9 +2148,9 @@ fst_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 	fst_closeport(dev_to_port(dev));
-	if (port->mode != FST_RAW) {
+	if (port->mode != FST_RAW)
 		hdlc_close(dev);
-	}
+
 	module_put(THIS_MODULE);
 	return 0;
 }
@@ -2366,7 +2350,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* Allocate driver private data */
 	card = kzalloc(sizeof(struct fst_card_info), GFP_KERNEL);
-	if (card == NULL)
+	if (!card)
 		return -ENOMEM;
 
 	/* Try to enable the device */
@@ -2494,7 +2478,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		card->rx_dma_handle_host =
 		    dma_alloc_coherent(&card->device->dev, FST_MAX_MTU,
 				       &card->rx_dma_handle_card, GFP_KERNEL);
-		if (card->rx_dma_handle_host == NULL) {
+		if (!card->rx_dma_handle_host) {
 			pr_err("Could not allocate rx dma buffer\n");
 			err = -ENOMEM;
 			goto rx_dma_fail;
@@ -2502,7 +2486,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		card->tx_dma_handle_host =
 		    dma_alloc_coherent(&card->device->dev, FST_MAX_MTU,
 				       &card->tx_dma_handle_card, GFP_KERNEL);
-		if (card->tx_dma_handle_host == NULL) {
+		if (!card->tx_dma_handle_host) {
 			pr_err("Could not allocate tx dma buffer\n");
 			err = -ENOMEM;
 			goto tx_dma_fail;

From b64b5aee73580ec223a95d28b359117b52436ff9 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:38 +0800
Subject: [PATCH 1179/3292] net: farsync: remove redundant spaces

According to the chackpatch.pl,
space prohibited between function name and open parenthesis '(',
no space is necessary after a cast.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 8b96f35a2cc7..bbe87d929609 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -708,7 +708,7 @@ fst_cpurelease(struct fst_card_info *card)
 	if (card->family == FST_FAMILY_TXU) {
 		/* Force posted writes to complete
 		 */
-		(void) readb(card->mem);
+		(void)readb(card->mem);
 
 		/* Release LRESET DO = 1
 		 * Then release Local Hold, DO = 1
@@ -716,7 +716,7 @@ fst_cpurelease(struct fst_card_info *card)
 		outw(0x040e, card->pci_conf + CNTRL_9054 + 2);
 		outw(0x040f, card->pci_conf + CNTRL_9054 + 2);
 	} else {
-		(void) readb(card->ctlmem);
+		(void)readb(card->ctlmem);
 	}
 }
 
@@ -726,7 +726,7 @@ static inline void
 fst_clear_intr(struct fst_card_info *card)
 {
 	if (card->family == FST_FAMILY_TXU) {
-		(void) readb(card->ctlmem);
+		(void)readb(card->ctlmem);
 	} else {
 		/* Poke the appropriate PLX chip register (same as enabling interrupts)
 		 */
@@ -984,8 +984,8 @@ fst_rx_config(struct fst_port_info *port)
 	for (i = 0; i < NUM_RX_BUFFER; i++) {
 		offset = BUF_OFFSET(rxBuffer[pi][i][0]);
 
-		FST_WRW(card, rxDescrRing[pi][i].ladr, (u16) offset);
-		FST_WRB(card, rxDescrRing[pi][i].hadr, (u8) (offset >> 16));
+		FST_WRW(card, rxDescrRing[pi][i].ladr, (u16)offset);
+		FST_WRB(card, rxDescrRing[pi][i].hadr, (u8)(offset >> 16));
 		FST_WRW(card, rxDescrRing[pi][i].bcnt, cnv_bcnt(LEN_RX_BUFFER));
 		FST_WRW(card, rxDescrRing[pi][i].mcnt, LEN_RX_BUFFER);
 		FST_WRB(card, rxDescrRing[pi][i].bits, DMA_OWN);
@@ -1011,8 +1011,8 @@ fst_tx_config(struct fst_port_info *port)
 	for (i = 0; i < NUM_TX_BUFFER; i++) {
 		offset = BUF_OFFSET(txBuffer[pi][i][0]);
 
-		FST_WRW(card, txDescrRing[pi][i].ladr, (u16) offset);
-		FST_WRB(card, txDescrRing[pi][i].hadr, (u8) (offset >> 16));
+		FST_WRW(card, txDescrRing[pi][i].ladr, (u16)offset);
+		FST_WRB(card, txDescrRing[pi][i].hadr, (u8)(offset >> 16));
 		FST_WRW(card, txDescrRing[pi][i].bcnt, 0);
 		FST_WRB(card, txDescrRing[pi][i].bits, 0);
 	}
@@ -1697,7 +1697,7 @@ gather_conf_info(struct fst_card_info *card, struct fst_port_info *port,
 {
 	int i;
 
-	memset(info, 0, sizeof (struct fstioc_info));
+	memset(info, 0, sizeof(struct fstioc_info));
 
 	i = port->index;
 	info->kernelVersion = LINUX_VERSION_CODE;
@@ -1905,7 +1905,7 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
 	if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &sync, sizeof(sync)))
 		return -EFAULT;
 
-	ifr->ifr_settings.size = sizeof (sync);
+	ifr->ifr_settings.size = sizeof(sync);
 	return 0;
 }
 

From ae1be3fad5695fcb5ad1053b4847f89431f7b922 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:39 +0800
Subject: [PATCH 1180/3292] net: farsync: remove redundant parentheses

Unnecessary parentheses around 'port->hwif == X21'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index bbe87d929609..f6919cf050d8 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -1075,7 +1075,7 @@ fst_intr_ctlchg(struct fst_card_info *card, struct fst_port_info *port)
 
 	signals = FST_RDL(card, v24DebouncedSts[port->index]);
 
-	if (signals & (((port->hwif == X21) || (port->hwif == X21D))
+	if (signals & ((port->hwif == X21 || port->hwif == X21D)
 		       ? IPSTS_INDICATE : IPSTS_DCD)) {
 		if (!netif_carrier_ok(port_to_dev(port))) {
 			dbg(DBG_INTR, "DCD active\n");
@@ -1233,7 +1233,7 @@ fst_intr_rx(struct fst_card_info *card, struct fst_port_info *port)
 	 * FST_MIN_DMA_LEN
 	 */
 
-	if ((len < FST_MIN_DMA_LEN) || (card->family == FST_FAMILY_TXP)) {
+	if (len < FST_MIN_DMA_LEN || card->family == FST_FAMILY_TXP) {
 		memcpy_fromio(skb_put(skb, len),
 			      card->mem + BUF_OFFSET(rxBuffer[pi][rxp][0]),
 			      len);
@@ -1326,8 +1326,8 @@ do_bottom_half_tx(struct fst_card_info *card)
 				 */
 				FST_WRW(card, txDescrRing[pi][port->txpos].bcnt,
 					cnv_bcnt(skb->len));
-				if ((skb->len < FST_MIN_DMA_LEN) ||
-				    (card->family == FST_FAMILY_TXP)) {
+				if (skb->len < FST_MIN_DMA_LEN ||
+				    card->family == FST_FAMILY_TXP) {
 					/* Enqueue the packet with normal io */
 					memcpy_toio(card->mem +
 						    BUF_OFFSET(txBuffer[pi]
@@ -2079,7 +2079,7 @@ fst_openport(struct fst_port_info *port)
 		port->run = 1;
 
 		signals = FST_RDL(port->card, v24DebouncedSts[port->index]);
-		if (signals & (((port->hwif == X21) || (port->hwif == X21D))
+		if (signals & ((port->hwif == X21 || port->hwif == X21D)
 			       ? IPSTS_INDICATE : IPSTS_DCD))
 			netif_carrier_on(port_to_dev(port));
 		else
@@ -2340,7 +2340,7 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 *
 		 */
 		for (i = 0; i < fst_excluded_cards; i++) {
-			if ((pdev->devfn) >> 3 == fst_excluded_list[i]) {
+			if (pdev->devfn >> 3 == fst_excluded_list[i]) {
 				pr_info("FarSync PCI device %d not assigned\n",
 					(pdev->devfn) >> 3);
 				return -EBUSY;
@@ -2397,8 +2397,8 @@ fst_add_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	card->family = ((ent->driver_data == FST_TYPE_T2P) ||
 			(ent->driver_data == FST_TYPE_T4P))
 	    ? FST_FAMILY_TXP : FST_FAMILY_TXU;
-	if ((ent->driver_data == FST_TYPE_T1U) ||
-	    (ent->driver_data == FST_TYPE_TE1))
+	if (ent->driver_data == FST_TYPE_T1U ||
+	    ent->driver_data == FST_TYPE_TE1)
 		card->nports = 1;
 	else
 		card->nports = ((ent->driver_data == FST_TYPE_T2P) ||

From d2a1054b8b02afdaae8119fdd7bf1171fd2fce2b Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:40 +0800
Subject: [PATCH 1181/3292] net: farsync: fix the alignment issue

Alignment should match open parenthesis.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index f6919cf050d8..3aea3d36b3c3 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -2293,7 +2293,7 @@ fst_init_card(struct fst_card_info *card)
 		err = register_hdlc_device(card->ports[i].dev);
 		if (err < 0) {
 			pr_err("Cannot register HDLC device for port %d (errno %d)\n",
-				i, -err);
+			       i, -err);
 			while (i--)
 				unregister_hdlc_device(card->ports[i].dev);
 			return err;

From f01f906ffefc71e5d370e7542884820d1318358b Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:41 +0800
Subject: [PATCH 1182/3292] net: farsync: remove redundant return

Void function return statements are not generally useful.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 3aea3d36b3c3..10208f058a52 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -1156,7 +1156,6 @@ fst_recover_rx_error(struct fst_card_info *card, struct fst_port_info *port,
 		rxp = (rxp + 1) % NUM_RX_BUFFER;
 	}
 	port->rxpos = rxp;
-	return;
 }
 
 /*      Rx complete interrupt

From f23a3da78a31d2ec7029c9637bf57ec892ada40c Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 8 Jun 2021 16:12:42 +0800
Subject: [PATCH 1183/3292] net: farsync: replace comparison to NULL with
 "fst_card_array[i]"

According to the chackpatch.pl, comparison to NULL could
be written "fst_card_array[i]".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 10208f058a52..b3466e084e84 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -611,7 +611,7 @@ fst_process_tx_work_q(struct tasklet_struct *unused)
 	 */
 	for (i = 0; i < FST_MAX_CARDS; i++) {
 		if (work_txq & 0x01) {
-			if (fst_card_array[i] != NULL) {
+			if (fst_card_array[i]) {
 				dbg(DBG_TX, "Calling tx bh for card %d\n", i);
 				do_bottom_half_tx(fst_card_array[i]);
 			}
@@ -639,7 +639,7 @@ fst_process_int_work_q(struct tasklet_struct *unused)
 	 */
 	for (i = 0; i < FST_MAX_CARDS; i++) {
 		if (work_intq & 0x01) {
-			if (fst_card_array[i] != NULL) {
+			if (fst_card_array[i]) {
 				dbg(DBG_INTR,
 				    "Calling rx & tx bh for card %d\n", i);
 				do_bottom_half_rx(fst_card_array[i]);

From 762411542050dbe27c7c96f13c57f93da5d9b89a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 8 Jun 2021 12:56:09 +0200
Subject: [PATCH 1184/3292] nvme: NVME_TCP_OFFLOAD should not default to m

The help text for the symbol controlling support for the NVM Express
over Fabrics TCP offload common layer suggests to not enable this
support when unsure.

Hence drop the "default m", which actually means "default y" if
CONFIG_MODULES is not enabled.

Fixes: f0e8cb6106da2703 ("nvme-tcp-offload: Add nvme-tcp-offload - NVMeTCP HW offload ULP")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index caedc35e1f0d..a01e3f380e74 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -87,7 +87,6 @@ config NVME_TCP
 
 config NVME_TCP_OFFLOAD
 	tristate "NVM Express over Fabrics TCP offload common layer"
-	default m
 	depends on BLOCK
 	depends on INET
 	select NVME_CORE

From 0779890fed7817725f399d6ec85730e08ebfdeee Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 3 Jun 2021 19:05:16 +0200
Subject: [PATCH 1185/3292] tools/bpftool: Fix cross-build

When the bootstrap and final bpftool have different architectures, we
need to build two distinct disasm.o objects. Add a recipe for the
bootstrap disasm.o.

After commit d510296d331a ("bpftool: Use syscall/loader program in
"prog load" and "gen skeleton" command.") cross-building bpftool didn't
work anymore, because the bootstrap bpftool was linked using objects
from different architectures:

  $ make O=/tmp/bpftool ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/bpf/bpftool/ V=1
  [...]
  aarch64-linux-gnu-gcc ... -c -MMD -o /tmp/bpftool/disasm.o /home/z/src/linux/kernel/bpf/disasm.c
  gcc ... -c -MMD -o /tmp/bpftool//bootstrap/main.o main.c
  gcc ... -o /tmp/bpftool//bootstrap/bpftool /tmp/bpftool//bootstrap/main.o ... /tmp/bpftool/disasm.o
  /usr/bin/ld: /tmp/bpftool/disasm.o: Relocations in generic ELF (EM: 183)
  /usr/bin/ld: /tmp/bpftool/disasm.o: Relocations in generic ELF (EM: 183)
  /usr/bin/ld: /tmp/bpftool/disasm.o: Relocations in generic ELF (EM: 183)
  /usr/bin/ld: /tmp/bpftool/disasm.o: error adding symbols: file in wrong format
  collect2: error: ld returned 1 exit status
  [...]

The final bpftool was built for e.g. arm64, while the bootstrap bpftool,
executed on the host, was built for x86. The problem here was that disasm.o
linked into the bootstrap bpftool was arm64 rather than x86. With the fix
we build two disasm.o, one for the target bpftool in arm64, and one for
the bootstrap bpftool in x86.

Fixes: d510296d331a ("bpftool: Use syscall/loader program in "prog load" and "gen skeleton" command.")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210603170515.1854642-1-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index d16d289ade7a..d73232be1e99 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -136,7 +136,7 @@ endif
 
 BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
 
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o) $(OUTPUT)disasm.o
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
@@ -180,6 +180,9 @@ endif
 
 CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
 
+$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
+	$(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 

From edc0571c5f67c7e24958149a8ec6a904ca84840b Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Fri, 4 Jun 2021 13:24:48 +0200
Subject: [PATCH 1186/3292] libbpf: Fix pr_warn type warnings on 32bit

The printed value is ptrdiff_t and is formatted wiht %ld. This works on
64bit but produces a warning on 32bit. Fix the format specifier to %td.

Fixes: 67234743736a ("libbpf: Generate loader program out of BPF ELF file.")
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210604112448.32297-1-msuchanek@suse.de
---
 tools/lib/bpf/libbpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1c4e20e75237..65f87cc1220c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4581,7 +4581,7 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
 		targ_map = map->init_slots[i];
 		fd = bpf_map__fd(targ_map);
 		if (obj->gen_loader) {
-			pr_warn("// TODO map_update_elem: idx %ld key %d value==map_idx %ld\n",
+			pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
 				map - obj->maps, i, targ_map - obj->maps);
 			return -ENOTSUP;
 		} else {
@@ -6205,7 +6205,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 		return -EINVAL;
 
 	if (prog->obj->gen_loader) {
-		pr_warn("// TODO core_relo: prog %ld insn[%d] %s %s kind %d\n",
+		pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
 			prog - prog->obj->programs, relo->insn_off / 8,
 			local_name, spec_str, relo->kind);
 		return -ENOTSUP;

From 380afe7208966ab59e5215c9daab3f6b06193d8c Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Mon, 7 Jun 2021 18:57:56 -0700
Subject: [PATCH 1187/3292] selftests, bpf: Make docs tests fail more reliably

Previously, if rst2man caught errors, then these would be ignored and
the output file would be written anyway. This would allow developers to
introduce regressions in the docs comments in the BPF headers.

Additionally, even if you instruct rst2man to fail out, it will still
write out to the destination target file, so if you ran the tests twice
in a row it would always pass. Use a temporary file for the initial run
to ensure that if rst2man fails out under "--strict" mode, subsequent
runs will not automatically pass.

Tested via ./tools/testing/selftests/bpf/test_doc_build.sh

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20210608015756.340385-1-joe@cilium.io
---
 tools/testing/selftests/bpf/.gitignore        | 1 +
 tools/testing/selftests/bpf/Makefile.docs     | 3 ++-
 tools/testing/selftests/bpf/test_doc_build.sh | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index a18f57044014..eae6fc7d3ed8 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -37,5 +37,6 @@ test_cpp
 /runqslower
 /bench
 *.ko
+*.tmp
 xdpxceiver
 xdp_redirect_multi
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index ccf260021e83..eb6a4fea8c79 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst
 ifndef RST2MAN_DEP
 	$$(error "rst2man not found, but required to generate man pages")
 endif
-	$$(QUIET_GEN)rst2man $$< > $$@
+	$$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+	$$(QUIET_GEN)mv $$@.tmp $$@
 
 docs-clean-$1:
 	$$(call QUIET_CLEAN, eBPF_$1-manpage)
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
index 7eb940a7b2eb..ed12111cd2f0 100755
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
 
 # Assume script is located under tools/testing/selftests/bpf/. We want to start
 # build attempts from the top of kernel repository.

From c9c9762d4d44dcb1b2ba90cfb4122dc11ceebf31 Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Mon, 7 Jun 2021 12:34:05 -0700
Subject: [PATCH 1188/3292] block: return the correct bvec when checking for
 gaps

After commit 07173c3ec276 ("block: enable multipage bvecs"), a bvec can
have multiple pages. But bio_will_gap() still assumes one page bvec while
checking for merging. If the pages in the bvec go across the
seg_boundary_mask, this check for merging can potentially succeed if only
the 1st page is tested, and can fail if all the pages are tested.

Later, when SCSI builds the SG list the same check for merging is done in
__blk_segment_map_sg_merge() with all the pages in the bvec tested. This
time the check may fail if the pages in bvec go across the
seg_boundary_mask (but tested okay in bio_will_gap() earlier, so those
BIOs were merged). If this check fails, we end up with a broken SG list
for drivers assuming the SG list not having offsets in intermediate pages.
This results in incorrect pages written to the disk.

Fix this by returning the multi-page bvec when testing gaps for merging.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Jeffle Xu <jefflexu@linux.alibaba.com>
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Fixes: 07173c3ec276 ("block: enable multipage bvecs")
Signed-off-by: Long Li <longli@microsoft.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/1623094445-22332-1-git-send-email-longli@linuxonhyperv.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/linux/bio.h b/include/linux/bio.h
index a0b4cfdf62a4..d2b98efb5cc5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -44,9 +44,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs)
 #define bio_offset(bio)		bio_iter_offset((bio), (bio)->bi_iter)
 #define bio_iovec(bio)		bio_iter_iovec((bio), (bio)->bi_iter)
 
-#define bio_multiple_segments(bio)				\
-	((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
-
 #define bvec_iter_sectors(iter)	((iter).bi_size >> 9)
 #define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter)))
 
@@ -271,7 +268,7 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
 
 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
 {
-	*bv = bio_iovec(bio);
+	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
 }
 
 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
@@ -279,10 +276,9 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
 	struct bvec_iter iter = bio->bi_iter;
 	int idx;
 
-	if (unlikely(!bio_multiple_segments(bio))) {
-		*bv = bio_iovec(bio);
-		return;
-	}
+	bio_get_first_bvec(bio, bv);
+	if (bv->bv_len == bio->bi_iter.bi_size)
+		return;		/* this bio only has a single bvec */
 
 	bio_advance_iter(bio, &iter, iter.bi_size);
 

From 11c7aa0ddea8611007768d3e6b58d45dc60a19e1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 7 Jun 2021 13:26:13 +0200
Subject: [PATCH 1189/3292] rq-qos: fix missed wake-ups in rq_qos_throttle try
 two

Commit 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle")
tried to fix a problem that a process could be sleeping in rq_qos_wait()
without anyone to wake it up. However the fix is not complete and the
following can still happen:

CPU1 (waiter1)		CPU2 (waiter2)		CPU3 (waker)
rq_qos_wait()		rq_qos_wait()
  acquire_inflight_cb() -> fails
			  acquire_inflight_cb() -> fails

						completes IOs, inflight
						  decreased
  prepare_to_wait_exclusive()
			  prepare_to_wait_exclusive()
  has_sleeper = !wq_has_single_sleeper() -> true as there are two sleepers
			  has_sleeper = !wq_has_single_sleeper() -> true
  io_schedule()		  io_schedule()

Deadlock as now there's nobody to wakeup the two waiters. The logic
automatically blocking when there are already sleepers is really subtle
and the only way to make it work reliably is that we check whether there
are some waiters in the queue when adding ourselves there. That way, we
are guaranteed that at least the first process to enter the wait queue
will recheck the waiting condition before going to sleep and thus
guarantee forward progress.

Fixes: 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle")
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210607112613.25344-1-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-rq-qos.c   | 4 ++--
 include/linux/wait.h | 2 +-
 kernel/sched/wait.c  | 9 +++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 656460636ad3..e83af7bc7591 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -266,8 +266,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 	if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
 		return;
 
-	prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
-	has_sleeper = !wq_has_single_sleeper(&rqw->wait);
+	has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
+						 TASK_UNINTERRUPTIBLE);
 	do {
 		/* The memory barrier in set_task_state saves us here. */
 		if (data.got_token)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index fe10e8570a52..6598ae35e1b5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1136,7 +1136,7 @@ do {										\
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
 void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
-void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 183cc6ae68a6..76577d1642a5 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -264,17 +264,22 @@ prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_ent
 }
 EXPORT_SYMBOL(prepare_to_wait);
 
-void
+/* Returns true if we are the first waiter in the queue, false otherwise. */
+bool
 prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
 {
 	unsigned long flags;
+	bool was_empty = false;
 
 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
 	spin_lock_irqsave(&wq_head->lock, flags);
-	if (list_empty(&wq_entry->entry))
+	if (list_empty(&wq_entry->entry)) {
+		was_empty = list_empty(&wq_head->head);
 		__add_wait_queue_entry_tail(wq_head, wq_entry);
+	}
 	set_current_state(state);
 	spin_unlock_irqrestore(&wq_head->lock, flags);
+	return was_empty;
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 

From 597a68ce32167e7d07bf40648e1501f786f60f99 Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Tue, 8 Jun 2021 11:51:56 +0800
Subject: [PATCH 1190/3292] net: stmmac: split xPCS setup from mdio register

This patch is a preparation patch for the enabling of Intel mGbE 2.5Gbps
link speed. The Intel mGbR link speed configuration (1G/2.5G) is depends on
a mdio ADHOC register which can be configured in the bios menu.
As PHY interface might be different for 1G and 2.5G, the mdio bus need be
ready to check the link speed and select the PHY interface before probing
the xPCS.

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |  1 +
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  9 +++
 .../net/ethernet/stmicro/stmmac/stmmac_mdio.c | 64 ++++++++++---------
 3 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index b6cd43eda7ac..fd7212afc543 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -311,6 +311,7 @@ enum stmmac_state {
 int stmmac_mdio_unregister(struct net_device *ndev);
 int stmmac_mdio_register(struct net_device *ndev);
 int stmmac_mdio_reset(struct mii_bus *mii);
+int stmmac_xpcs_setup(struct mii_bus *mii);
 void stmmac_set_ethtool_ops(struct net_device *netdev);
 
 void stmmac_ptp_register(struct stmmac_priv *priv);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0a266fa0af7e..af406ea3dd46 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6993,6 +6993,14 @@ int stmmac_dvr_probe(struct device *device,
 		}
 	}
 
+	if (priv->plat->mdio_bus_data) {
+		if (priv->plat->mdio_bus_data->has_xpcs) {
+			ret = stmmac_xpcs_setup(priv->mii);
+			if (ret)
+				goto error_xpcs_setup;
+		}
+	}
+
 	ret = stmmac_phy_setup(priv);
 	if (ret) {
 		netdev_err(ndev, "failed to setup phy (%d)\n", ret);
@@ -7029,6 +7037,7 @@ error_serdes_powerup:
 	unregister_netdev(ndev);
 error_netdev_register:
 	phylink_destroy(priv->phylink);
+error_xpcs_setup:
 error_phy_setup:
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 6312a152c8ad..bc900e240da2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -397,6 +397,41 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 	return 0;
 }
 
+int stmmac_xpcs_setup(struct mii_bus *bus)
+{
+	int mode, addr;
+	struct net_device *ndev = bus->priv;
+	struct mdio_xpcs_args *xpcs;
+	struct stmmac_priv *priv;
+	struct mdio_device *mdiodev;
+
+	priv = netdev_priv(ndev);
+	mode = priv->plat->phy_interface;
+
+	/* Try to probe the XPCS by scanning all addresses. */
+	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+		mdiodev = mdio_device_create(bus, addr);
+		if (IS_ERR(mdiodev))
+			continue;
+
+		xpcs = xpcs_create(mdiodev, mode);
+		if (IS_ERR_OR_NULL(xpcs)) {
+			mdio_device_free(mdiodev);
+			continue;
+		}
+
+		priv->hw->xpcs = xpcs;
+		break;
+	}
+
+	if (!priv->hw->xpcs) {
+		dev_warn(priv->device, "No xPCS found\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 /**
  * stmmac_mdio_register
  * @ndev: net device structure
@@ -501,40 +536,11 @@ int stmmac_mdio_register(struct net_device *ndev)
 		goto no_phy_found;
 	}
 
-	/* Try to probe the XPCS by scanning all addresses. */
-	if (mdio_bus_data->has_xpcs) {
-		int mode = priv->plat->phy_interface;
-		struct mdio_device *mdiodev;
-		struct mdio_xpcs_args *xpcs;
-
-		for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
-			mdiodev = mdio_device_create(new_bus, addr);
-			if (IS_ERR(mdiodev))
-				continue;
-
-			xpcs = xpcs_create(mdiodev, mode);
-			if (IS_ERR_OR_NULL(xpcs)) {
-				mdio_device_free(mdiodev);
-				continue;
-			}
-
-			priv->hw->xpcs = xpcs;
-			break;
-		}
-
-		if (!priv->hw->xpcs) {
-			dev_warn(dev, "No XPCS found\n");
-			err = -ENODEV;
-			goto no_xpcs_found;
-		}
-	}
-
 bus_register_done:
 	priv->mii = new_bus;
 
 	return 0;
 
-no_xpcs_found:
 no_phy_found:
 	mdiobus_unregister(new_bus);
 bus_register_fail:

From f27abde3042ab4d30d0003eaf5e6641baef94a56 Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Tue, 8 Jun 2021 11:51:57 +0800
Subject: [PATCH 1191/3292] net: pcs: add 2500BASEX support for Intel mGbE
 controller

XPCS IP supports 2500BASEX as PHY interface. It is configured as
autonegotiation disable to cater for PHYs that does not supports 2500BASEX
autonegotiation.

v2: Add supported link speed masking.
v3: Restructure to introduce xpcs_config_2500basex() used to configure the
    xpcs for 2.5G speeds. Added 2500BASEX specific information for
    configuration.
v4: Fix indentation error

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c   | 56 ++++++++++++++++++++++++++++++++++++
 include/linux/pcs/pcs-xpcs.h |  1 +
 2 files changed, 57 insertions(+)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 34164437c135..98c4a3973402 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -57,9 +57,12 @@
 
 /* Clause 37 Defines */
 /* VR MII MMD registers offsets */
+#define DW_VR_MII_MMD_CTRL		0x0000
 #define DW_VR_MII_DIG_CTRL1		0x8000
 #define DW_VR_MII_AN_CTRL		0x8001
 #define DW_VR_MII_AN_INTR_STS		0x8002
+/* Enable 2.5G Mode */
+#define DW_VR_MII_DIG_CTRL1_2G5_EN	BIT(2)
 /* EEE Mode Control Register */
 #define DW_VR_MII_EEE_MCTRL0		0x8006
 #define DW_VR_MII_EEE_MCTRL1		0x800b
@@ -86,6 +89,11 @@
 #define DW_VR_MII_C37_ANSGM_SP_1000		0x2
 #define DW_VR_MII_C37_ANSGM_SP_LNKSTS		BIT(4)
 
+/* SR MII MMD Control defines */
+#define AN_CL37_EN		BIT(12)	/* Enable Clause 37 auto-nego */
+#define SGMII_SPEED_SS13	BIT(13)	/* SGMII speed along with SS6 */
+#define SGMII_SPEED_SS6		BIT(6)	/* SGMII speed along with SS13 */
+
 /* VR MII EEE Control 0 defines */
 #define DW_VR_MII_EEE_LTX_EN		BIT(0)  /* LPI Tx Enable */
 #define DW_VR_MII_EEE_LRX_EN		BIT(1)  /* LPI Rx Enable */
@@ -161,6 +169,14 @@ static const int xpcs_sgmii_features[] = {
 	__ETHTOOL_LINK_MODE_MASK_NBITS,
 };
 
+static const int xpcs_2500basex_features[] = {
+	ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+	ETHTOOL_LINK_MODE_Autoneg_BIT,
+	ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+	ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+	__ETHTOOL_LINK_MODE_MASK_NBITS,
+};
+
 static const phy_interface_t xpcs_usxgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_USXGMII,
 };
@@ -177,11 +193,17 @@ static const phy_interface_t xpcs_sgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_SGMII,
 };
 
+static const phy_interface_t xpcs_2500basex_interfaces[] = {
+	PHY_INTERFACE_MODE_2500BASEX,
+	PHY_INTERFACE_MODE_MAX,
+};
+
 enum {
 	DW_XPCS_USXGMII,
 	DW_XPCS_10GKR,
 	DW_XPCS_XLGMII,
 	DW_XPCS_SGMII,
+	DW_XPCS_2500BASEX,
 	DW_XPCS_INTERFACE_MAX,
 };
 
@@ -306,6 +328,7 @@ static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs,
 		dev = MDIO_MMD_PCS;
 		break;
 	case DW_AN_C37_SGMII:
+	case DW_2500BASEX:
 		dev = MDIO_MMD_VEND2;
 		break;
 	default:
@@ -804,6 +827,28 @@ static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
 }
 
+static int xpcs_config_2500basex(struct mdio_xpcs_args *xpcs)
+{
+	int ret;
+
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1);
+	if (ret < 0)
+		return ret;
+	ret |= DW_VR_MII_DIG_CTRL1_2G5_EN;
+	ret &= ~DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
+	if (ret < 0)
+		return ret;
+
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL);
+	if (ret < 0)
+		return ret;
+	ret &= ~AN_CL37_EN;
+	ret |= SGMII_SPEED_SS6;
+	ret &= ~SGMII_SPEED_SS13;
+	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, ret);
+}
+
 static int xpcs_do_config(struct mdio_xpcs_args *xpcs,
 			  phy_interface_t interface, unsigned int mode)
 {
@@ -827,6 +872,11 @@ static int xpcs_do_config(struct mdio_xpcs_args *xpcs,
 		if (ret)
 			return ret;
 		break;
+	case DW_2500BASEX:
+		ret = xpcs_config_2500basex(xpcs);
+		if (ret)
+			return ret;
+		break;
 	default:
 		return -1;
 	}
@@ -1023,6 +1073,12 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
 		.num_interfaces = ARRAY_SIZE(xpcs_sgmii_interfaces),
 		.an_mode = DW_AN_C37_SGMII,
 	},
+	[DW_XPCS_2500BASEX] = {
+		.supported = xpcs_2500basex_features,
+		.interface = xpcs_2500basex_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_2500basex_features),
+		.an_mode = DW_2500BASEX,
+	},
 };
 
 static const struct xpcs_id xpcs_id_list[] = {
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 0860a5b59f10..4d815f03b4b2 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -13,6 +13,7 @@
 /* AN mode */
 #define DW_AN_C73			1
 #define DW_AN_C37_SGMII			2
+#define DW_2500BASEX			3
 
 struct xpcs_id;
 

From 46682cb86a37da435e5668db98555a1de0f0448b Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Tue, 8 Jun 2021 11:51:58 +0800
Subject: [PATCH 1192/3292] net: stmmac: enable Intel mGbE 2.5Gbps link speed

The Intel mGbE supports 2.5Gbps link speed by increasing the clock rate by
2.5 times of the original rate. In this mode, the serdes/PHY operates at a
serial baud rate of 3.125 Gbps and the PCS data path and GMII interface of
the MAC operate at 312.5 MHz instead of 125 MHz.

For Intel mGbE, the overclocking of 2.5 times clock rate to support 2.5G is
only able to be configured in the BIOS during boot time. Kernel driver has
no access to modify the clock rate for 1Gbps/2.5G mode. The way to
determined the current 1G/2.5G mode is by reading a dedicated adhoc
register through mdio bus. In short, after the system boot up, it is either
in 1G mode or 2.5G mode which not able to be changed on the fly.

Compared to 1G mode, the 2.5G mode selects the 2500BASEX as PHY interface and
disables the xpcs_an_inband. This is to cater for some PHYs that only
supports 2500BASEX PHY interface with no autonegotiation.

v2: remove MAC supported link speed masking
v3: Restructure  to introduce intel_speed_mode_2500() to read serdes registers
    for max speed supported and select the appropritate configuration.
    Use max_speed to determine the supported link speed mask.

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 48 ++++++++++++++++++-
 .../net/ethernet/stmicro/stmmac/dwmac-intel.h | 13 +++++
 .../net/ethernet/stmicro/stmmac/dwmac4_core.c |  1 +
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  7 +++
 include/linux/stmmac.h                        |  1 +
 5 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 2ecf93c84b9d..6a9a19b0844c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -102,6 +102,22 @@ static int intel_serdes_powerup(struct net_device *ndev, void *priv_data)
 
 	serdes_phy_addr = intel_priv->mdio_adhoc_addr;
 
+	/* Set the serdes rate and the PCLK rate */
+	data = mdiobus_read(priv->mii, serdes_phy_addr,
+			    SERDES_GCR0);
+
+	data &= ~SERDES_RATE_MASK;
+	data &= ~SERDES_PCLK_MASK;
+
+	if (priv->plat->max_speed == 2500)
+		data |= SERDES_RATE_PCIE_GEN2 << SERDES_RATE_PCIE_SHIFT |
+			SERDES_PCLK_37p5MHZ << SERDES_PCLK_SHIFT;
+	else
+		data |= SERDES_RATE_PCIE_GEN1 << SERDES_RATE_PCIE_SHIFT |
+			SERDES_PCLK_70MHZ << SERDES_PCLK_SHIFT;
+
+	mdiobus_write(priv->mii, serdes_phy_addr, SERDES_GCR0, data);
+
 	/* assert clk_req */
 	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 	data |= SERDES_PLL_CLK;
@@ -230,6 +246,32 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data)
 	}
 }
 
+static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data)
+{
+	struct intel_priv_data *intel_priv = intel_data;
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	int serdes_phy_addr = 0;
+	u32 data = 0;
+
+	serdes_phy_addr = intel_priv->mdio_adhoc_addr;
+
+	/* Determine the link speed mode: 2.5Gbps/1Gbps */
+	data = mdiobus_read(priv->mii, serdes_phy_addr,
+			    SERDES_GCR);
+
+	if (((data & SERDES_LINK_MODE_MASK) >> SERDES_LINK_MODE_SHIFT) ==
+	    SERDES_LINK_MODE_2G5) {
+		dev_info(priv->device, "Link Speed Mode: 2.5Gbps\n");
+		priv->plat->max_speed = 2500;
+		priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX;
+		priv->plat->mdio_bus_data->xpcs_an_inband = false;
+	} else {
+		priv->plat->max_speed = 1000;
+		priv->plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+		priv->plat->mdio_bus_data->xpcs_an_inband = true;
+	}
+}
+
 /* Program PTP Clock Frequency for different variant of
  * Intel mGBE that has slightly different GPO mapping
  */
@@ -586,7 +628,7 @@ static int ehl_sgmii_data(struct pci_dev *pdev,
 {
 	plat->bus_id = 1;
 	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
-
+	plat->speed_mode_2500 = intel_speed_mode_2500;
 	plat->serdes_powerup = intel_serdes_powerup;
 	plat->serdes_powerdown = intel_serdes_powerdown;
 
@@ -639,6 +681,7 @@ static int ehl_pse0_sgmii1g_data(struct pci_dev *pdev,
 				 struct plat_stmmacenet_data *plat)
 {
 	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+	plat->speed_mode_2500 = intel_speed_mode_2500;
 	plat->serdes_powerup = intel_serdes_powerup;
 	plat->serdes_powerdown = intel_serdes_powerdown;
 	return ehl_pse0_common_data(pdev, plat);
@@ -677,6 +720,7 @@ static int ehl_pse1_sgmii1g_data(struct pci_dev *pdev,
 				 struct plat_stmmacenet_data *plat)
 {
 	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+	plat->speed_mode_2500 = intel_speed_mode_2500;
 	plat->serdes_powerup = intel_serdes_powerup;
 	plat->serdes_powerdown = intel_serdes_powerdown;
 	return ehl_pse1_common_data(pdev, plat);
@@ -711,6 +755,7 @@ static int tgl_sgmii_phy0_data(struct pci_dev *pdev,
 {
 	plat->bus_id = 1;
 	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+	plat->speed_mode_2500 = intel_speed_mode_2500;
 	plat->serdes_powerup = intel_serdes_powerup;
 	plat->serdes_powerdown = intel_serdes_powerdown;
 	return tgl_common_data(pdev, plat);
@@ -725,6 +770,7 @@ static int tgl_sgmii_phy1_data(struct pci_dev *pdev,
 {
 	plat->bus_id = 2;
 	plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+	plat->speed_mode_2500 = intel_speed_mode_2500;
 	plat->serdes_powerup = intel_serdes_powerup;
 	plat->serdes_powerdown = intel_serdes_powerdown;
 	return tgl_common_data(pdev, plat);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
index 542acb8ce467..20d14e588044 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
@@ -9,6 +9,7 @@
 #define POLL_DELAY_US 8
 
 /* SERDES Register */
+#define SERDES_GCR	0x0	/* Global Conguration */
 #define SERDES_GSR0	0x5	/* Global Status Reg0 */
 #define SERDES_GCR0	0xb	/* Global Configuration Reg0 */
 
@@ -17,8 +18,20 @@
 #define SERDES_PHY_RX_CLK	BIT(1)		/* PSE SGMII PHY rx clk */
 #define SERDES_RST		BIT(2)		/* Serdes Reset */
 #define SERDES_PWR_ST_MASK	GENMASK(6, 4)	/* Serdes Power state*/
+#define SERDES_RATE_MASK	GENMASK(9, 8)
+#define SERDES_PCLK_MASK	GENMASK(14, 12)	/* PCLK rate to PHY */
+#define SERDES_LINK_MODE_MASK	GENMASK(2, 1)
+#define SERDES_LINK_MODE_SHIFT	1
 #define SERDES_PWR_ST_SHIFT	4
 #define SERDES_PWR_ST_P0	0x0
 #define SERDES_PWR_ST_P3	0x3
+#define SERDES_LINK_MODE_2G5	0x3
+#define SERSED_LINK_MODE_1G	0x2
+#define SERDES_PCLK_37p5MHZ	0x0
+#define SERDES_PCLK_70MHZ	0x1
+#define SERDES_RATE_PCIE_GEN1	0x0
+#define SERDES_RATE_PCIE_GEN2	0x1
+#define SERDES_RATE_PCIE_SHIFT	8
+#define SERDES_PCLK_SHIFT	12
 
 #endif /* __DWMAC_INTEL_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index f35c03c9f91e..67ba083eb90c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -1358,6 +1358,7 @@ int dwmac4_setup(struct stmmac_priv *priv)
 	mac->link.speed10 = GMAC_CONFIG_PS;
 	mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
 	mac->link.speed1000 = 0;
+	mac->link.speed2500 = GMAC_CONFIG_FES;
 	mac->link.speed_mask = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
 	mac->mii.addr = GMAC_MDIO_ADDR;
 	mac->mii.data = GMAC_MDIO_DATA;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index af406ea3dd46..1b12a2f8bfb5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -931,6 +931,10 @@ static void stmmac_validate(struct phylink_config *config,
 	if ((max_speed > 0) && (max_speed < 1000)) {
 		phylink_set(mask, 1000baseT_Full);
 		phylink_set(mask, 1000baseX_Full);
+	} else if (priv->plat->has_gmac4) {
+		if (!max_speed || max_speed >= 2500)
+			phylink_set(mac_supported, 2500baseT_Full);
+			phylink_set(mac_supported, 2500baseX_Full);
 	} else if (priv->plat->has_xgmac) {
 		if (!max_speed || (max_speed >= 2500)) {
 			phylink_set(mac_supported, 2500baseT_Full);
@@ -6993,6 +6997,9 @@ int stmmac_dvr_probe(struct device *device,
 		}
 	}
 
+	if (priv->plat->speed_mode_2500)
+		priv->plat->speed_mode_2500(ndev, priv->plat->bsp_priv);
+
 	if (priv->plat->mdio_bus_data) {
 		if (priv->plat->mdio_bus_data->has_xpcs) {
 			ret = stmmac_xpcs_setup(priv->mii);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index e55a4807e3ea..b10be3385a30 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -223,6 +223,7 @@ struct plat_stmmacenet_data {
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	int (*serdes_powerup)(struct net_device *ndev, void *priv);
 	void (*serdes_powerdown)(struct net_device *ndev, void *priv);
+	void (*speed_mode_2500)(struct net_device *ndev, void *priv);
 	void (*ptp_clk_freq_config)(void *priv);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);

From f36a111a74e71edbba27d4c0cf3d7bbccc172108 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:32 +0300
Subject: [PATCH 1193/3292] wwan_hwsim: WWAN device simulator

This driver simulates a set of WWAN device with a set of AT control
ports. It can be used to test WWAN kernel framework as well as user
space tools.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/Kconfig      |  10 ++
 drivers/net/wwan/Makefile     |   2 +
 drivers/net/wwan/wwan_hwsim.c | 318 ++++++++++++++++++++++++++++++++++
 3 files changed, 330 insertions(+)
 create mode 100644 drivers/net/wwan/wwan_hwsim.c

diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index 7ad1920120bc..ec0b194a373c 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -20,6 +20,16 @@ config WWAN_CORE
 	  To compile this driver as a module, choose M here: the module will be
 	  called wwan.
 
+config WWAN_HWSIM
+	tristate "Simulated WWAN device"
+	depends on WWAN_CORE
+	help
+	  This driver is a developer testing tool that can be used to test WWAN
+	  framework.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called wwan_hwsim.  If unsure, say N.
+
 config MHI_WWAN_CTRL
 	tristate "MHI WWAN control driver for QCOM-based PCIe modems"
 	select WWAN_CORE
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
index 556cd90958ca..f33f77ca1021 100644
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -6,4 +6,6 @@
 obj-$(CONFIG_WWAN_CORE) += wwan.o
 wwan-objs += wwan_core.o
 
+obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
+
 obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c
new file mode 100644
index 000000000000..96d25d7e5bb8
--- /dev/null
+++ b/drivers/net/wwan/wwan_hwsim.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * WWAN device simulator for WWAN framework testing.
+ *
+ * Copyright (c) 2021, Sergey Ryazanov <ryazanov.s.a@gmail.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/wwan.h>
+
+static int wwan_hwsim_devsnum = 2;
+module_param_named(devices, wwan_hwsim_devsnum, int, 0444);
+MODULE_PARM_DESC(devices, "Number of simulated devices");
+
+static struct class *wwan_hwsim_class;
+
+static DEFINE_SPINLOCK(wwan_hwsim_devs_lock);
+static LIST_HEAD(wwan_hwsim_devs);
+static unsigned int wwan_hwsim_dev_idx;
+
+struct wwan_hwsim_dev {
+	struct list_head list;
+	unsigned int id;
+	struct device dev;
+	spinlock_t ports_lock;	/* Serialize ports creation/deletion */
+	unsigned int port_idx;
+	struct list_head ports;
+};
+
+struct wwan_hwsim_port {
+	struct list_head list;
+	unsigned int id;
+	struct wwan_hwsim_dev *dev;
+	struct wwan_port *wwan;
+	enum {			/* AT command parser state */
+		AT_PARSER_WAIT_A,
+		AT_PARSER_WAIT_T,
+		AT_PARSER_WAIT_TERM,
+		AT_PARSER_SKIP_LINE,
+	} pstate;
+};
+
+static int wwan_hwsim_port_start(struct wwan_port *wport)
+{
+	struct wwan_hwsim_port *port = wwan_port_get_drvdata(wport);
+
+	port->pstate = AT_PARSER_WAIT_A;
+
+	return 0;
+}
+
+static void wwan_hwsim_port_stop(struct wwan_port *wport)
+{
+}
+
+/* Implements a minimalistic AT commands parser that echo input back and
+ * reply with 'OK' to each input command. See AT command protocol details in the
+ * ITU-T V.250 recomendations document.
+ *
+ * Be aware that this processor is not fully V.250 compliant.
+ */
+static int wwan_hwsim_port_tx(struct wwan_port *wport, struct sk_buff *in)
+{
+	struct wwan_hwsim_port *port = wwan_port_get_drvdata(wport);
+	struct sk_buff *out;
+	int i, n, s;
+
+	/* Estimate a max possible number of commands by counting the number of
+	 * termination chars (S3 param, CR by default). And then allocate the
+	 * output buffer that will be enough to fit the echo and result codes of
+	 * all commands.
+	 */
+	for (i = 0, n = 0; i < in->len; ++i)
+		if (in->data[i] == '\r')
+			n++;
+	n = in->len + n * (2 + 2 + 2);	/* Output buffer size */
+	out = alloc_skb(n, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	for (i = 0, s = 0; i < in->len; ++i) {
+		char c = in->data[i];
+
+		if (port->pstate == AT_PARSER_WAIT_A) {
+			if (c == 'A' || c == 'a')
+				port->pstate = AT_PARSER_WAIT_T;
+			else if (c != '\n')	/* Ignore formating char */
+				port->pstate = AT_PARSER_SKIP_LINE;
+		} else if (port->pstate == AT_PARSER_WAIT_T) {
+			if (c == 'T' || c == 't')
+				port->pstate = AT_PARSER_WAIT_TERM;
+			else
+				port->pstate = AT_PARSER_SKIP_LINE;
+		} else if (port->pstate == AT_PARSER_WAIT_TERM) {
+			if (c != '\r')
+				continue;
+			/* Consume the trailing formatting char as well */
+			if ((i + 1) < in->len && in->data[i + 1] == '\n')
+				i++;
+			n = i - s + 1;
+			memcpy(skb_put(out, n), &in->data[s], n);/* Echo */
+			memcpy(skb_put(out, 6), "\r\nOK\r\n", 6);
+			s = i + 1;
+			port->pstate = AT_PARSER_WAIT_A;
+		} else if (port->pstate == AT_PARSER_SKIP_LINE) {
+			if (c != '\r')
+				continue;
+			port->pstate = AT_PARSER_WAIT_A;
+		}
+	}
+
+	if (i > s) {
+		/* Echo the processed portion of a not yet completed command */
+		n = i - s;
+		memcpy(skb_put(out, n), &in->data[s], n);
+	}
+
+	consume_skb(in);
+
+	wwan_port_rx(wport, out);
+
+	return 0;
+}
+
+static const struct wwan_port_ops wwan_hwsim_port_ops = {
+	.start = wwan_hwsim_port_start,
+	.stop = wwan_hwsim_port_stop,
+	.tx = wwan_hwsim_port_tx,
+};
+
+static struct wwan_hwsim_port *wwan_hwsim_port_new(struct wwan_hwsim_dev *dev)
+{
+	struct wwan_hwsim_port *port;
+	int err;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	port->dev = dev;
+
+	spin_lock(&dev->ports_lock);
+	port->id = dev->port_idx++;
+	spin_unlock(&dev->ports_lock);
+
+	port->wwan = wwan_create_port(&dev->dev, WWAN_PORT_AT,
+				      &wwan_hwsim_port_ops,
+				      port);
+	if (IS_ERR(port->wwan)) {
+		err = PTR_ERR(port->wwan);
+		goto err_free_port;
+	}
+
+	return port;
+
+err_free_port:
+	kfree(port);
+
+	return ERR_PTR(err);
+}
+
+static void wwan_hwsim_port_del(struct wwan_hwsim_port *port)
+{
+	wwan_remove_port(port->wwan);
+	kfree(port);
+}
+
+static void wwan_hwsim_dev_release(struct device *sysdev)
+{
+	struct wwan_hwsim_dev *dev = container_of(sysdev, typeof(*dev), dev);
+
+	kfree(dev);
+}
+
+static struct wwan_hwsim_dev *wwan_hwsim_dev_new(void)
+{
+	struct wwan_hwsim_dev *dev;
+	int err;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock(&wwan_hwsim_devs_lock);
+	dev->id = wwan_hwsim_dev_idx++;
+	spin_unlock(&wwan_hwsim_devs_lock);
+
+	dev->dev.release = wwan_hwsim_dev_release;
+	dev->dev.class = wwan_hwsim_class;
+	dev_set_name(&dev->dev, "hwsim%u", dev->id);
+
+	spin_lock_init(&dev->ports_lock);
+	INIT_LIST_HEAD(&dev->ports);
+
+	err = device_register(&dev->dev);
+	if (err)
+		goto err_free_dev;
+
+	return dev;
+
+err_free_dev:
+	kfree(dev);
+
+	return ERR_PTR(err);
+}
+
+static void wwan_hwsim_dev_del(struct wwan_hwsim_dev *dev)
+{
+	spin_lock(&dev->ports_lock);
+	while (!list_empty(&dev->ports)) {
+		struct wwan_hwsim_port *port;
+
+		port = list_first_entry(&dev->ports, struct wwan_hwsim_port,
+					list);
+		list_del(&port->list);
+		spin_unlock(&dev->ports_lock);
+		wwan_hwsim_port_del(port);
+		spin_lock(&dev->ports_lock);
+	}
+	spin_unlock(&dev->ports_lock);
+
+	device_unregister(&dev->dev);
+	/* Memory will be freed in the device release callback */
+}
+
+static int __init wwan_hwsim_init_devs(void)
+{
+	struct wwan_hwsim_dev *dev;
+	int i, j;
+
+	for (i = 0; i < wwan_hwsim_devsnum; ++i) {
+		dev = wwan_hwsim_dev_new();
+		if (IS_ERR(dev))
+			return PTR_ERR(dev);
+
+		spin_lock(&wwan_hwsim_devs_lock);
+		list_add_tail(&dev->list, &wwan_hwsim_devs);
+		spin_unlock(&wwan_hwsim_devs_lock);
+
+		/* Create a couple of ports per each device to accelerate
+		 * the simulator readiness time.
+		 */
+		for (j = 0; j < 2; ++j) {
+			struct wwan_hwsim_port *port;
+
+			port = wwan_hwsim_port_new(dev);
+			if (IS_ERR(port))
+				return PTR_ERR(port);
+
+			spin_lock(&dev->ports_lock);
+			list_add_tail(&port->list, &dev->ports);
+			spin_unlock(&dev->ports_lock);
+		}
+	}
+
+	return 0;
+}
+
+static void wwan_hwsim_free_devs(void)
+{
+	struct wwan_hwsim_dev *dev;
+
+	spin_lock(&wwan_hwsim_devs_lock);
+	while (!list_empty(&wwan_hwsim_devs)) {
+		dev = list_first_entry(&wwan_hwsim_devs, struct wwan_hwsim_dev,
+				       list);
+		list_del(&dev->list);
+		spin_unlock(&wwan_hwsim_devs_lock);
+		wwan_hwsim_dev_del(dev);
+		spin_lock(&wwan_hwsim_devs_lock);
+	}
+	spin_unlock(&wwan_hwsim_devs_lock);
+}
+
+static int __init wwan_hwsim_init(void)
+{
+	int err;
+
+	if (wwan_hwsim_devsnum < 0 || wwan_hwsim_devsnum > 128)
+		return -EINVAL;
+
+	wwan_hwsim_class = class_create(THIS_MODULE, "wwan_hwsim");
+	if (IS_ERR(wwan_hwsim_class))
+		return PTR_ERR(wwan_hwsim_class);
+
+	err = wwan_hwsim_init_devs();
+	if (err)
+		goto err_clean_devs;
+
+	return 0;
+
+err_clean_devs:
+	wwan_hwsim_free_devs();
+	class_destroy(wwan_hwsim_class);
+
+	return err;
+}
+
+static void __exit wwan_hwsim_exit(void)
+{
+	wwan_hwsim_free_devs();
+	class_destroy(wwan_hwsim_class);
+}
+
+module_init(wwan_hwsim_init);
+module_exit(wwan_hwsim_exit);
+
+MODULE_AUTHOR("Sergey Ryazanov");
+MODULE_DESCRIPTION("Device simulator for WWAN framework");
+MODULE_LICENSE("GPL");

From 9ee23f48f6705fff6c23e02c4ab1e6d99369cd05 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:33 +0300
Subject: [PATCH 1194/3292] wwan_hwsim: add debugfs management interface

wwan_hwsim creates and removes simulated control ports on module loading
and unloading. It would be helpful to be able to create/remove devices
and ports at run-time to trigger wwan port (un-)register actions without
module reloading.

Some simulator objects (e.g. ports) do not have the underling device and
it is not possible to fully manage the simulator via sysfs. wwan_hsim
intend for developers, so implement it as a self-contained debugfs based
management interface.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_hwsim.c | 186 +++++++++++++++++++++++++++++++++-
 1 file changed, 184 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c
index 96d25d7e5bb8..472cae544a2b 100644
--- a/drivers/net/wwan/wwan_hwsim.c
+++ b/drivers/net/wwan/wwan_hwsim.c
@@ -15,6 +15,8 @@
 #include <linux/list.h>
 #include <linux/skbuff.h>
 #include <linux/wwan.h>
+#include <linux/debugfs.h>
+#include <linux/workqueue.h>
 
 static int wwan_hwsim_devsnum = 2;
 module_param_named(devices, wwan_hwsim_devsnum, int, 0444);
@@ -22,6 +24,9 @@ MODULE_PARM_DESC(devices, "Number of simulated devices");
 
 static struct class *wwan_hwsim_class;
 
+static struct dentry *wwan_hwsim_debugfs_topdir;
+static struct dentry *wwan_hwsim_debugfs_devcreate;
+
 static DEFINE_SPINLOCK(wwan_hwsim_devs_lock);
 static LIST_HEAD(wwan_hwsim_devs);
 static unsigned int wwan_hwsim_dev_idx;
@@ -30,6 +35,9 @@ struct wwan_hwsim_dev {
 	struct list_head list;
 	unsigned int id;
 	struct device dev;
+	struct work_struct del_work;
+	struct dentry *debugfs_topdir;
+	struct dentry *debugfs_portcreate;
 	spinlock_t ports_lock;	/* Serialize ports creation/deletion */
 	unsigned int port_idx;
 	struct list_head ports;
@@ -40,6 +48,8 @@ struct wwan_hwsim_port {
 	unsigned int id;
 	struct wwan_hwsim_dev *dev;
 	struct wwan_port *wwan;
+	struct work_struct del_work;
+	struct dentry *debugfs_topdir;
 	enum {			/* AT command parser state */
 		AT_PARSER_WAIT_A,
 		AT_PARSER_WAIT_T,
@@ -48,6 +58,12 @@ struct wwan_hwsim_port {
 	} pstate;
 };
 
+static const struct file_operations wwan_hwsim_debugfs_portdestroy_fops;
+static const struct file_operations wwan_hwsim_debugfs_portcreate_fops;
+static const struct file_operations wwan_hwsim_debugfs_devdestroy_fops;
+static void wwan_hwsim_port_del_work(struct work_struct *work);
+static void wwan_hwsim_dev_del_work(struct work_struct *work);
+
 static int wwan_hwsim_port_start(struct wwan_port *wport)
 {
 	struct wwan_hwsim_port *port = wwan_port_get_drvdata(wport);
@@ -139,6 +155,7 @@ static const struct wwan_port_ops wwan_hwsim_port_ops = {
 static struct wwan_hwsim_port *wwan_hwsim_port_new(struct wwan_hwsim_dev *dev)
 {
 	struct wwan_hwsim_port *port;
+	char name[0x10];
 	int err;
 
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
@@ -159,6 +176,13 @@ static struct wwan_hwsim_port *wwan_hwsim_port_new(struct wwan_hwsim_dev *dev)
 		goto err_free_port;
 	}
 
+	INIT_WORK(&port->del_work, wwan_hwsim_port_del_work);
+
+	snprintf(name, sizeof(name), "port%u", port->id);
+	port->debugfs_topdir = debugfs_create_dir(name, dev->debugfs_topdir);
+	debugfs_create_file("destroy", 0200, port->debugfs_topdir, port,
+			    &wwan_hwsim_debugfs_portdestroy_fops);
+
 	return port;
 
 err_free_port:
@@ -169,10 +193,34 @@ err_free_port:
 
 static void wwan_hwsim_port_del(struct wwan_hwsim_port *port)
 {
+	debugfs_remove(port->debugfs_topdir);
+
+	/* Make sure that there is no pending deletion work */
+	if (current_work() != &port->del_work)
+		cancel_work_sync(&port->del_work);
+
 	wwan_remove_port(port->wwan);
 	kfree(port);
 }
 
+static void wwan_hwsim_port_del_work(struct work_struct *work)
+{
+	struct wwan_hwsim_port *port =
+				container_of(work, typeof(*port), del_work);
+	struct wwan_hwsim_dev *dev = port->dev;
+
+	spin_lock(&dev->ports_lock);
+	if (list_empty(&port->list)) {
+		/* Someone else deleting port at the moment */
+		spin_unlock(&dev->ports_lock);
+		return;
+	}
+	list_del_init(&port->list);
+	spin_unlock(&dev->ports_lock);
+
+	wwan_hwsim_port_del(port);
+}
+
 static void wwan_hwsim_dev_release(struct device *sysdev)
 {
 	struct wwan_hwsim_dev *dev = container_of(sysdev, typeof(*dev), dev);
@@ -204,6 +252,17 @@ static struct wwan_hwsim_dev *wwan_hwsim_dev_new(void)
 	if (err)
 		goto err_free_dev;
 
+	INIT_WORK(&dev->del_work, wwan_hwsim_dev_del_work);
+
+	dev->debugfs_topdir = debugfs_create_dir(dev_name(&dev->dev),
+						 wwan_hwsim_debugfs_topdir);
+	debugfs_create_file("destroy", 0200, dev->debugfs_topdir, dev,
+			    &wwan_hwsim_debugfs_devdestroy_fops);
+	dev->debugfs_portcreate =
+		debugfs_create_file("portcreate", 0200,
+				    dev->debugfs_topdir, dev,
+				    &wwan_hwsim_debugfs_portcreate_fops);
+
 	return dev;
 
 err_free_dev:
@@ -214,23 +273,136 @@ err_free_dev:
 
 static void wwan_hwsim_dev_del(struct wwan_hwsim_dev *dev)
 {
+	debugfs_remove(dev->debugfs_portcreate);	/* Avoid new ports */
+
 	spin_lock(&dev->ports_lock);
 	while (!list_empty(&dev->ports)) {
 		struct wwan_hwsim_port *port;
 
 		port = list_first_entry(&dev->ports, struct wwan_hwsim_port,
 					list);
-		list_del(&port->list);
+		list_del_init(&port->list);
 		spin_unlock(&dev->ports_lock);
 		wwan_hwsim_port_del(port);
 		spin_lock(&dev->ports_lock);
 	}
 	spin_unlock(&dev->ports_lock);
 
+	debugfs_remove(dev->debugfs_topdir);
+
+	/* Make sure that there is no pending deletion work */
+	if (current_work() != &dev->del_work)
+		cancel_work_sync(&dev->del_work);
+
 	device_unregister(&dev->dev);
 	/* Memory will be freed in the device release callback */
 }
 
+static void wwan_hwsim_dev_del_work(struct work_struct *work)
+{
+	struct wwan_hwsim_dev *dev = container_of(work, typeof(*dev), del_work);
+
+	spin_lock(&wwan_hwsim_devs_lock);
+	if (list_empty(&dev->list)) {
+		/* Someone else deleting device at the moment */
+		spin_unlock(&wwan_hwsim_devs_lock);
+		return;
+	}
+	list_del_init(&dev->list);
+	spin_unlock(&wwan_hwsim_devs_lock);
+
+	wwan_hwsim_dev_del(dev);
+}
+
+static ssize_t wwan_hwsim_debugfs_portdestroy_write(struct file *file,
+						    const char __user *usrbuf,
+						    size_t count, loff_t *ppos)
+{
+	struct wwan_hwsim_port *port = file->private_data;
+
+	/* We can not delete port here since it will cause a deadlock due to
+	 * waiting this callback to finish in the debugfs_remove() call. So,
+	 * use workqueue.
+	 */
+	schedule_work(&port->del_work);
+
+	return count;
+}
+
+static const struct file_operations wwan_hwsim_debugfs_portdestroy_fops = {
+	.write = wwan_hwsim_debugfs_portdestroy_write,
+	.open = simple_open,
+	.llseek = noop_llseek,
+};
+
+static ssize_t wwan_hwsim_debugfs_portcreate_write(struct file *file,
+						   const char __user *usrbuf,
+						   size_t count, loff_t *ppos)
+{
+	struct wwan_hwsim_dev *dev = file->private_data;
+	struct wwan_hwsim_port *port;
+
+	port = wwan_hwsim_port_new(dev);
+	if (IS_ERR(port))
+		return PTR_ERR(port);
+
+	spin_lock(&dev->ports_lock);
+	list_add_tail(&port->list, &dev->ports);
+	spin_unlock(&dev->ports_lock);
+
+	return count;
+}
+
+static const struct file_operations wwan_hwsim_debugfs_portcreate_fops = {
+	.write = wwan_hwsim_debugfs_portcreate_write,
+	.open = simple_open,
+	.llseek = noop_llseek,
+};
+
+static ssize_t wwan_hwsim_debugfs_devdestroy_write(struct file *file,
+						   const char __user *usrbuf,
+						   size_t count, loff_t *ppos)
+{
+	struct wwan_hwsim_dev *dev = file->private_data;
+
+	/* We can not delete device here since it will cause a deadlock due to
+	 * waiting this callback to finish in the debugfs_remove() call. So,
+	 * use workqueue.
+	 */
+	schedule_work(&dev->del_work);
+
+	return count;
+}
+
+static const struct file_operations wwan_hwsim_debugfs_devdestroy_fops = {
+	.write = wwan_hwsim_debugfs_devdestroy_write,
+	.open = simple_open,
+	.llseek = noop_llseek,
+};
+
+static ssize_t wwan_hwsim_debugfs_devcreate_write(struct file *file,
+						  const char __user *usrbuf,
+						  size_t count, loff_t *ppos)
+{
+	struct wwan_hwsim_dev *dev;
+
+	dev = wwan_hwsim_dev_new();
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	spin_lock(&wwan_hwsim_devs_lock);
+	list_add_tail(&dev->list, &wwan_hwsim_devs);
+	spin_unlock(&wwan_hwsim_devs_lock);
+
+	return count;
+}
+
+static const struct file_operations wwan_hwsim_debugfs_devcreate_fops = {
+	.write = wwan_hwsim_debugfs_devcreate_write,
+	.open = simple_open,
+	.llseek = noop_llseek,
+};
+
 static int __init wwan_hwsim_init_devs(void)
 {
 	struct wwan_hwsim_dev *dev;
@@ -272,7 +444,7 @@ static void wwan_hwsim_free_devs(void)
 	while (!list_empty(&wwan_hwsim_devs)) {
 		dev = list_first_entry(&wwan_hwsim_devs, struct wwan_hwsim_dev,
 				       list);
-		list_del(&dev->list);
+		list_del_init(&dev->list);
 		spin_unlock(&wwan_hwsim_devs_lock);
 		wwan_hwsim_dev_del(dev);
 		spin_lock(&wwan_hwsim_devs_lock);
@@ -291,6 +463,12 @@ static int __init wwan_hwsim_init(void)
 	if (IS_ERR(wwan_hwsim_class))
 		return PTR_ERR(wwan_hwsim_class);
 
+	wwan_hwsim_debugfs_topdir = debugfs_create_dir("wwan_hwsim", NULL);
+	wwan_hwsim_debugfs_devcreate =
+			debugfs_create_file("devcreate", 0200,
+					    wwan_hwsim_debugfs_topdir, NULL,
+					    &wwan_hwsim_debugfs_devcreate_fops);
+
 	err = wwan_hwsim_init_devs();
 	if (err)
 		goto err_clean_devs;
@@ -299,6 +477,7 @@ static int __init wwan_hwsim_init(void)
 
 err_clean_devs:
 	wwan_hwsim_free_devs();
+	debugfs_remove(wwan_hwsim_debugfs_topdir);
 	class_destroy(wwan_hwsim_class);
 
 	return err;
@@ -306,7 +485,10 @@ err_clean_devs:
 
 static void __exit wwan_hwsim_exit(void)
 {
+	debugfs_remove(wwan_hwsim_debugfs_devcreate);	/* Avoid new devs */
 	wwan_hwsim_free_devs();
+	flush_scheduled_work();		/* Wait deletion works completion */
+	debugfs_remove(wwan_hwsim_debugfs_topdir);
 	class_destroy(wwan_hwsim_class);
 }
 

From b64d76b782264aa91c236c11c72646459b04c301 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:34 +0300
Subject: [PATCH 1195/3292] net: wwan: make WWAN_PORT_MAX meaning less
 surprised

It is quite unusual when some value can not be equal to a defined range
max value. Also most subsystems defines FOO_TYPE_MAX as a maximum valid
value. So turn the WAN_PORT_MAX meaning from the number of supported
port types to the maximum valid port type.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c |  2 +-
 include/linux/wwan.h         | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 6e8f19c71a9e..632ff86398ac 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -250,7 +250,7 @@ struct wwan_port *wwan_create_port(struct device *parent,
 	struct wwan_port *port;
 	int minor, err = -ENOMEM;
 
-	if (type >= WWAN_PORT_MAX || !ops)
+	if (type > WWAN_PORT_MAX || !ops)
 		return ERR_PTR(-EINVAL);
 
 	/* A port is always a child of a WWAN device, retrieve (allocate or
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 7216c114d758..fa33cc16d931 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -15,8 +15,10 @@
  * @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control
  * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface
  * @WWAN_PORT_FIREHOSE: XML based command protocol
- * @WWAN_PORT_UNKNOWN: Unknown port type
- * @WWAN_PORT_MAX: Number of supported port types
+ *
+ * @WWAN_PORT_MAX: Highest supported port types
+ * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type
+ * @__WWAN_PORT_MAX: Internal use
  */
 enum wwan_port_type {
 	WWAN_PORT_AT,
@@ -24,8 +26,12 @@ enum wwan_port_type {
 	WWAN_PORT_QMI,
 	WWAN_PORT_QCDM,
 	WWAN_PORT_FIREHOSE,
+
+	/* Add new port types above this line */
+
+	__WWAN_PORT_MAX,
+	WWAN_PORT_MAX = __WWAN_PORT_MAX - 1,
 	WWAN_PORT_UNKNOWN,
-	WWAN_PORT_MAX = WWAN_PORT_UNKNOWN,
 };
 
 struct wwan_port;

From 64cc80c0ff2eca6a99232fd57d33c8095dfdd878 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:35 +0300
Subject: [PATCH 1196/3292] net: wwan: core: init port type string array using
 enum values

This array is indexed by port type. Make it self-descriptive by using
the port type enum values as indices in the array initializer.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 632ff86398ac..97d77b06d222 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -184,13 +184,12 @@ static void wwan_remove_dev(struct wwan_device *wwandev)
 
 /* ------- WWAN port management ------- */
 
-/* Keep aligned with wwan_port_type enum */
-static const char * const wwan_port_type_str[] = {
-	"AT",
-	"MBIM",
-	"QMI",
-	"QCDM",
-	"FIREHOSE"
+static const char * const wwan_port_type_str[WWAN_PORT_MAX + 1] = {
+	[WWAN_PORT_AT] = "AT",
+	[WWAN_PORT_MBIM] = "MBIM",
+	[WWAN_PORT_QMI] = "QMI",
+	[WWAN_PORT_QCDM] = "QCDM",
+	[WWAN_PORT_FIREHOSE] = "FIREHOSE",
 };
 
 static ssize_t type_show(struct device *dev, struct device_attribute *attr,

From 392c26f7f133b9f09e5f58db1ce6ef4b3b4df49f Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:36 +0300
Subject: [PATCH 1197/3292] net: wwan: core: spell port device name in
 lowercase

Usually a device name is spelled in lowercase, let us follow this
practice in the WWAN subsystem as well. The bottom line is that such
name is easier to type.

To keep the device type attribute contents more natural (i.e., spell
abbreviations in uppercase), while making the device name lowercase,
turn the port type strings array to an array of structure that contains
both the port type name and the device name suffix.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 97d77b06d222..ba4392d71b80 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -184,12 +184,30 @@ static void wwan_remove_dev(struct wwan_device *wwandev)
 
 /* ------- WWAN port management ------- */
 
-static const char * const wwan_port_type_str[WWAN_PORT_MAX + 1] = {
-	[WWAN_PORT_AT] = "AT",
-	[WWAN_PORT_MBIM] = "MBIM",
-	[WWAN_PORT_QMI] = "QMI",
-	[WWAN_PORT_QCDM] = "QCDM",
-	[WWAN_PORT_FIREHOSE] = "FIREHOSE",
+static const struct {
+	const char * const name;	/* Port type name */
+	const char * const devsuf;	/* Port devce name suffix */
+} wwan_port_types[WWAN_PORT_MAX + 1] = {
+	[WWAN_PORT_AT] = {
+		.name = "AT",
+		.devsuf = "at",
+	},
+	[WWAN_PORT_MBIM] = {
+		.name = "MBIM",
+		.devsuf = "mbim",
+	},
+	[WWAN_PORT_QMI] = {
+		.name = "QMI",
+		.devsuf = "qmi",
+	},
+	[WWAN_PORT_QCDM] = {
+		.name = "QCDM",
+		.devsuf = "qcdm",
+	},
+	[WWAN_PORT_FIREHOSE] = {
+		.name = "FIREHOSE",
+		.devsuf = "firehose",
+	},
 };
 
 static ssize_t type_show(struct device *dev, struct device_attribute *attr,
@@ -197,7 +215,7 @@ static ssize_t type_show(struct device *dev, struct device_attribute *attr,
 {
 	struct wwan_port *port = to_wwan_port(dev);
 
-	return sprintf(buf, "%s\n", wwan_port_type_str[port->type]);
+	return sprintf(buf, "%s\n", wwan_port_types[port->type].name);
 }
 static DEVICE_ATTR_RO(type);
 
@@ -285,7 +303,7 @@ struct wwan_port *wwan_create_port(struct device *parent,
 	/* create unique name based on wwan device id, port index and type */
 	dev_set_name(&port->dev, "wwan%up%u%s", wwandev->id,
 		     atomic_inc_return(&wwandev->port_id),
-		     wwan_port_type_str[port->type]);
+		     wwan_port_types[port->type].devsuf);
 
 	err = device_register(&port->dev);
 	if (err)

From f458709ff40b0d992fec496952f79c7820dd3fde Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:37 +0300
Subject: [PATCH 1198/3292] net: wwan: core: make port names more user-friendly

At the moment, the port name is allocated based on the parent device
name, port id and the port type. Where the port id specifies nothing but
the ports registration order and is only used to make the port name
unique.

Most likely, to configure a WWAN device, the user will look for a port
of a specific type (e.g. AT port or MBIM port, etc.). The current naming
scheme can make it difficult to find a port of a specific type.

Consider a WWAN device that has 3 ports: AT port, MBIM port, and another
one AT port. With the global port index, the port names will be:
* wwan0p1at
* wwan0p2mbim
* wwan0p3at

To find the MBIM port, user should know in advance the device ports
composition (i.e. the user should know that the MBIM port is the 2nd
one) or carefully examine the whole ports list. It is not unusual for
USB modems to have a different composition, even if they are build on a
same chipset. Moreover, some modems able to change the ports composition
based on the user's configuration. All this makes port names fully
unpredictable.

To make naming more user-friendly, remove the global port id and
enumerate ports by its type. E.g.:
* wwan0p1at   -> wwan0at0
* wwan0p2mbim -> wwan0mbim0
* wwan0p3at   -> wwan0at1

With this naming scheme, the first AT port name will always be wwanXat0,
the first MBIM port name will always be wwanXmbim0, etc.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 67 ++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index ba4392d71b80..2844b17a724c 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -33,12 +33,10 @@ static int wwan_major;
  *
  * @id: WWAN device unique ID.
  * @dev: Underlying device.
- * @port_id: Current available port ID to pick.
  */
 struct wwan_device {
 	unsigned int id;
 	struct device dev;
-	atomic_t port_id;
 };
 
 /**
@@ -258,6 +256,56 @@ static struct wwan_port *wwan_port_get_by_minor(unsigned int minor)
 	return to_wwan_port(dev);
 }
 
+/* Allocate and set unique name based on passed format
+ *
+ * Name allocation approach is highly inspired by the __dev_alloc_name()
+ * function.
+ *
+ * To avoid names collision, the caller must prevent the new port device
+ * registration as well as concurrent invocation of this function.
+ */
+static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt)
+{
+	struct wwan_device *wwandev = to_wwan_dev(port->dev.parent);
+	const unsigned int max_ports = PAGE_SIZE * 8;
+	struct class_dev_iter iter;
+	unsigned long *idmap;
+	struct device *dev;
+	char buf[0x20];
+	int id;
+
+	idmap = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	if (!idmap)
+		return -ENOMEM;
+
+	/* Collect ids of same name format ports */
+	class_dev_iter_init(&iter, wwan_class, NULL, &wwan_port_dev_type);
+	while ((dev = class_dev_iter_next(&iter))) {
+		if (dev->parent != &wwandev->dev)
+			continue;
+		if (sscanf(dev_name(dev), fmt, &id) != 1)
+			continue;
+		if (id < 0 || id >= max_ports)
+			continue;
+		set_bit(id, idmap);
+	}
+	class_dev_iter_exit(&iter);
+
+	/* Allocate unique id */
+	id = find_first_zero_bit(idmap, max_ports);
+	free_page((unsigned long)idmap);
+
+	snprintf(buf, sizeof(buf), fmt, id);	/* Name generation */
+
+	dev = device_find_child_by_name(&wwandev->dev, buf);
+	if (dev) {
+		put_device(dev);
+		return -ENFILE;
+	}
+
+	return dev_set_name(&port->dev, buf);
+}
+
 struct wwan_port *wwan_create_port(struct device *parent,
 				   enum wwan_port_type type,
 				   const struct wwan_port_ops *ops,
@@ -266,6 +314,7 @@ struct wwan_port *wwan_create_port(struct device *parent,
 	struct wwan_device *wwandev;
 	struct wwan_port *port;
 	int minor, err = -ENOMEM;
+	char namefmt[0x20];
 
 	if (type > WWAN_PORT_MAX || !ops)
 		return ERR_PTR(-EINVAL);
@@ -300,12 +349,18 @@ struct wwan_port *wwan_create_port(struct device *parent,
 	port->dev.devt = MKDEV(wwan_major, minor);
 	dev_set_drvdata(&port->dev, drvdata);
 
-	/* create unique name based on wwan device id, port index and type */
-	dev_set_name(&port->dev, "wwan%up%u%s", wwandev->id,
-		     atomic_inc_return(&wwandev->port_id),
-		     wwan_port_types[port->type].devsuf);
+	/* allocate unique name based on wwan device id, port type and number */
+	snprintf(namefmt, sizeof(namefmt), "wwan%u%s%%d", wwandev->id,
+		 wwan_port_types[port->type].devsuf);
 
+	/* Serialize ports registration */
+	mutex_lock(&wwan_register_lock);
+
+	__wwan_port_dev_assign_name(port, namefmt);
 	err = device_register(&port->dev);
+
+	mutex_unlock(&wwan_register_lock);
+
 	if (err)
 		goto error_put_device;
 

From 72eedfc4bbc7480ea8fb38d5aebb57eafc03c8d5 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:38 +0300
Subject: [PATCH 1199/3292] net: wwan: core: expand ports number limit

Currently, we limit the total ports number to 256. It is quite common
for PBX or SMS gateway to be equipped with a lot of modems. In now days,
a modem could have 2-4 control ports or even more, what only accelerates
the ports exhausing rate.

To avoid facing the port number limitation issue reports, increase the
limit up the maximum number of minors (i.e. up to 1 << MINORBITS).

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 2844b17a724c..9346b2661eb3 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -14,7 +14,8 @@
 #include <linux/types.h>
 #include <linux/wwan.h>
 
-#define WWAN_MAX_MINORS 256 /* 256 minors allowed with register_chrdev() */
+/* Maximum number of minors in use */
+#define WWAN_MAX_MINORS		(1 << MINORBITS)
 
 static DEFINE_MUTEX(wwan_register_lock); /* WWAN device create|remove lock */
 static DEFINE_IDA(minors); /* minors for WWAN port chardevs */
@@ -634,7 +635,8 @@ static int __init wwan_init(void)
 		return PTR_ERR(wwan_class);
 
 	/* chrdev used for wwan ports */
-	wwan_major = register_chrdev(0, "wwan_port", &wwan_port_fops);
+	wwan_major = __register_chrdev(0, 0, WWAN_MAX_MINORS, "wwan_port",
+				       &wwan_port_fops);
 	if (wwan_major < 0) {
 		class_destroy(wwan_class);
 		return wwan_major;
@@ -645,7 +647,7 @@ static int __init wwan_init(void)
 
 static void __exit wwan_exit(void)
 {
-	unregister_chrdev(wwan_major, "wwan_port");
+	__unregister_chrdev(wwan_major, 0, WWAN_MAX_MINORS, "wwan_port");
 	class_destroy(wwan_class);
 }
 

From e263c5b2e8912149b49d757511d85a16c5fb432f Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:39 +0300
Subject: [PATCH 1200/3292] net: wwan: core: implement TIOCINQ ioctl

It is quite common for a userpace program to fetch the buffered amount
of data in the rx queue to avoid the read block. Implement the TIOCINQ
ioctl to make the migration to the WWAN port usage smooth.

Despite the fact that the read call will return no more data than the
size of a first skb in the queue, TIOCINQ returns the entire amount of
buffered data (sum of all queued skbs). This is done to prevent the
breaking of programs that optimize reading, avoiding it if the buffered
amount of data is too small.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 9346b2661eb3..d5a197da4a41 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -12,6 +12,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/termios.h>
 #include <linux/wwan.h>
 
 /* Maximum number of minors in use */
@@ -618,6 +619,30 @@ static __poll_t wwan_port_fops_poll(struct file *filp, poll_table *wait)
 	return mask;
 }
 
+static long wwan_port_fops_ioctl(struct file *filp, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct wwan_port *port = filp->private_data;
+
+	switch (cmd) {
+	case TIOCINQ: {	/* aka SIOCINQ aka FIONREAD */
+		unsigned long flags;
+		struct sk_buff *skb;
+		int amount = 0;
+
+		spin_lock_irqsave(&port->rxq.lock, flags);
+		skb_queue_walk(&port->rxq, skb)
+			amount += skb->len;
+		spin_unlock_irqrestore(&port->rxq.lock, flags);
+
+		return put_user(amount, (int __user *)arg);
+	}
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
 static const struct file_operations wwan_port_fops = {
 	.owner = THIS_MODULE,
 	.open = wwan_port_fops_open,
@@ -625,6 +650,10 @@ static const struct file_operations wwan_port_fops = {
 	.read = wwan_port_fops_read,
 	.write = wwan_port_fops_write,
 	.poll = wwan_port_fops_poll,
+	.unlocked_ioctl = wwan_port_fops_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = compat_ptr_ioctl,
+#endif
 	.llseek = noop_llseek,
 };
 

From c230035c2f2f6371739f29e56eeb2611172225c8 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:40 +0300
Subject: [PATCH 1201/3292] net: wwan: core: implement terminal ioctls for AT
 port

It is not unreasonable to assume that users will use terminal emulation
software to communicate directly with a WWAN device over the AT port.
But terminal emulators  will refuse to work with a device that does not
support terminal IOCTLs (e.g. TCGETS, TCSETS, TIOCMSET, etc.). To make
it possible to interact with the WWAN AT port using a terminal emulator,
implement a minimal set of terminal IOCTLs.

The implementation is rather stub, no passed data are actually used to
control a port behaviour. An obtained configuration is kept inside the
port structure and returned back by a request. The latter is done to
fool a program that will test the configuration status by comparing the
readed back data from the device with earlier configured ones.

Tested with fresh versions of minicom and picocom terminal apps.

MBIM, QMI and other ports for binary protocols can hardly be considered
a terminal device, so terminal IOCTLs are only implemented for the AT
port.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 91 ++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index d5a197da4a41..38da3124d81e 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -51,6 +51,8 @@ struct wwan_device {
  * @dev: Underlying device
  * @rxq: Buffer inbound queue
  * @waitqueue: The waitqueue for port fops (read/write/poll)
+ * @data_lock: Port specific data access serialization
+ * @at_data: AT port specific data
  */
 struct wwan_port {
 	enum wwan_port_type type;
@@ -61,6 +63,13 @@ struct wwan_port {
 	struct device dev;
 	struct sk_buff_head rxq;
 	wait_queue_head_t waitqueue;
+	struct mutex data_lock;	/* Port specific data access serialization */
+	union {
+		struct {
+			struct ktermios termios;
+			int mdmbits;
+		} at_data;
+	};
 };
 
 static ssize_t index_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -230,6 +239,7 @@ static void wwan_port_destroy(struct device *dev)
 	struct wwan_port *port = to_wwan_port(dev);
 
 	ida_free(&minors, MINOR(port->dev.devt));
+	mutex_destroy(&port->data_lock);
 	skb_queue_purge(&port->rxq);
 	mutex_destroy(&port->ops_lock);
 	kfree(port);
@@ -344,6 +354,7 @@ struct wwan_port *wwan_create_port(struct device *parent,
 	mutex_init(&port->ops_lock);
 	skb_queue_head_init(&port->rxq);
 	init_waitqueue_head(&port->waitqueue);
+	mutex_init(&port->data_lock);
 
 	port->dev.parent = &wwandev->dev;
 	port->dev.class = wwan_class;
@@ -619,10 +630,90 @@ static __poll_t wwan_port_fops_poll(struct file *filp, poll_table *wait)
 	return mask;
 }
 
+/* Implements minimalistic stub terminal IOCTLs support */
+static long wwan_port_fops_at_ioctl(struct wwan_port *port, unsigned int cmd,
+				    unsigned long arg)
+{
+	int ret = 0;
+
+	mutex_lock(&port->data_lock);
+
+	switch (cmd) {
+	case TCFLSH:
+		break;
+
+	case TCGETS:
+		if (copy_to_user((void __user *)arg, &port->at_data.termios,
+				 sizeof(struct termios)))
+			ret = -EFAULT;
+		break;
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF:
+		if (copy_from_user(&port->at_data.termios, (void __user *)arg,
+				   sizeof(struct termios)))
+			ret = -EFAULT;
+		break;
+
+#ifdef TCGETS2
+	case TCGETS2:
+		if (copy_to_user((void __user *)arg, &port->at_data.termios,
+				 sizeof(struct termios2)))
+			ret = -EFAULT;
+		break;
+
+	case TCSETS2:
+	case TCSETSW2:
+	case TCSETSF2:
+		if (copy_from_user(&port->at_data.termios, (void __user *)arg,
+				   sizeof(struct termios2)))
+			ret = -EFAULT;
+		break;
+#endif
+
+	case TIOCMGET:
+		ret = put_user(port->at_data.mdmbits, (int __user *)arg);
+		break;
+
+	case TIOCMSET:
+	case TIOCMBIC:
+	case TIOCMBIS: {
+		int mdmbits;
+
+		if (copy_from_user(&mdmbits, (int __user *)arg, sizeof(int))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (cmd == TIOCMBIC)
+			port->at_data.mdmbits &= ~mdmbits;
+		else if (cmd == TIOCMBIS)
+			port->at_data.mdmbits |= mdmbits;
+		else
+			port->at_data.mdmbits = mdmbits;
+		break;
+	}
+
+	default:
+		ret = -ENOIOCTLCMD;
+	}
+
+	mutex_unlock(&port->data_lock);
+
+	return ret;
+}
+
 static long wwan_port_fops_ioctl(struct file *filp, unsigned int cmd,
 				 unsigned long arg)
 {
 	struct wwan_port *port = filp->private_data;
+	int res;
+
+	if (port->type == WWAN_PORT_AT) {	/* AT port specific IOCTLs */
+		res = wwan_port_fops_at_ioctl(port, cmd, arg);
+		if (res != -ENOIOCTLCMD)
+			return res;
+	}
 
 	switch (cmd) {
 	case TIOCINQ: {	/* aka SIOCINQ aka FIONREAD */

From 504672038b17b76466724dda017618b0c072a922 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 8 Jun 2021 07:02:41 +0300
Subject: [PATCH 1202/3292] net: wwan: core: purge rx queue on port close

Purge the rx queue as soon as a user closes the port, just after the
port stop callback invocation. This is to prevent feeding a user that
will open the port next time with outdated and possibly unrelated
data.

While at it also remove the odd skb_queue_purge() call in the port
device destroy callback. The queue will be purged just before the
callback is ivoncated in the wwan_remove_port() function.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 38da3124d81e..45a41aee8958 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -240,7 +240,6 @@ static void wwan_port_destroy(struct device *dev)
 
 	ida_free(&minors, MINOR(port->dev.devt));
 	mutex_destroy(&port->data_lock);
-	skb_queue_purge(&port->rxq);
 	mutex_destroy(&port->ops_lock);
 	kfree(port);
 }
@@ -462,8 +461,11 @@ static void wwan_port_op_stop(struct wwan_port *port)
 {
 	mutex_lock(&port->ops_lock);
 	port->start_count--;
-	if (port->ops && !port->start_count)
-		port->ops->stop(port);
+	if (!port->start_count) {
+		if (port->ops)
+			port->ops->stop(port);
+		skb_queue_purge(&port->rxq);
+	}
 	mutex_unlock(&port->ops_lock);
 }
 

From 070f5b701d559ae139b348fb19145269b58b68c3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 12:25:35 +0300
Subject: [PATCH 1203/3292] dt-bindings: net: dsa: sja1105: add SJA1110
 bindings

There are 4 variations of the SJA1110 switch which have a different set
of MII protocols supported per port. Document the compatible strings.

Also, the SJA1110 optionally supports 2 internal MDIO buses for 2
different types of Ethernet PHYs. Document a container node called
"mdios" which has 2 subnodes "mdio@0" and "mdio@1", identifiable via
compatible string, under which the driver finds the internal PHYs.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: devicetree@vger.kernel.org
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/dsa/nxp,sja1105.yaml         | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
index d6ac9a0c1b04..0b8a05dd52e6 100644
--- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
@@ -27,10 +27,53 @@ properties:
       - nxp,sja1105q
       - nxp,sja1105r
       - nxp,sja1105s
+      - nxp,sja1110a
+      - nxp,sja1110b
+      - nxp,sja1110c
+      - nxp,sja1110d
 
   reg:
     maxItems: 1
 
+  # Optional container node for the 2 internal MDIO buses of the SJA1110
+  # (one for the internal 100base-T1 PHYs and the other for the single
+  # 100base-TX PHY). The "reg" property does not have physical significance.
+  # The PHY addresses to port correspondence is as follows: for 100base-T1,
+  # port 5 has PHY 1, port 6 has PHY 2 etc, while for 100base-TX, port 1 has
+  # PHY 1.
+  mdios:
+    type: object
+
+    properties:
+      '#address-cells':
+        const: 1
+      '#size-cells':
+        const: 0
+
+    patternProperties:
+      "^mdio@[0-1]$":
+        type: object
+
+        allOf:
+          - $ref: "http://devicetree.org/schemas/net/mdio.yaml#"
+
+        properties:
+          compatible:
+            oneOf:
+              - enum:
+                  - nxp,sja1110-base-t1-mdio
+                  - nxp,sja1110-base-tx-mdio
+
+          reg:
+            oneOf:
+              - enum:
+                - 0
+                - 1
+
+        required:
+          - compatible
+          - reg
+
 required:
   - compatible
   - reg

From 3e77e59bf8cf105d64f70133e41c38daf482acc3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 12:25:36 +0300
Subject: [PATCH 1204/3292] net: dsa: sja1105: add support for the SJA1110
 switch family

The SJA1110 is basically an SJA1105 with more ports, some integrated
PHYs (100base-T1 and 100base-TX) and an embedded microcontroller which
can be disabled, and the switch core can be controlled by a host running
Linux, over SPI.

This patch contains:
- the static and dynamic config packing functions, for the tables that
  are common with SJA1105
- one more static config tables which is "unique" to the SJA1110
  (actually it is a rehash of stuff that was placed somewhere else in
  SJA1105): the PCP Remapping Table
- a reset and clock configuration procedure for the SJA1110 switch.
  This resets just the switch subsystem, and gates off the clock which
  powers on the embedded microcontroller.
- an RGMII delay configuration procedure for SJA1110, which is very
  similar to SJA1105, but different enough for us to be unable to reuse
  it (this is a pattern that repeats itself)
- some adaptations to dynamic config table entries which are no longer
  programmed in the same way. For example, to delete a VLAN, you used to
  write an entry through the dynamic reconfiguration interface with the
  desired VLAN ID, and with the VALIDENT bit set to false. Now, the VLAN
  table entries contain a TYPE_ENTRY field, which must be set to zero
  (in a backwards-incompatible way) in order for the entry to be deleted,
  or to some other entry for the VLAN to match "inner tagged" or "outer
  tagged" packets.
- a similar thing for the static config: the xMII Mode Parameters Table
  encoding for SGMII and MII (the latter just when attached to a
  100base-TX PHY) just isn't what it used to be in SJA1105. They are
  identical, except there is an extra "special" bit which needs to be
  set. Set it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h             |  24 +-
 drivers/net/dsa/sja1105/sja1105_clocking.c    |  91 ++++
 .../net/dsa/sja1105/sja1105_dynamic_config.c  | 321 +++++++++++-
 .../net/dsa/sja1105/sja1105_dynamic_config.h  |   1 +
 drivers/net/dsa/sja1105/sja1105_main.c        |  55 +-
 drivers/net/dsa/sja1105/sja1105_spi.c         | 252 +++++++++
 .../net/dsa/sja1105/sja1105_static_config.c   | 482 ++++++++++++++++++
 .../net/dsa/sja1105/sja1105_static_config.h   |  98 +++-
 8 files changed, 1312 insertions(+), 12 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 3c66e5945cca..ef6e22c78373 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -13,15 +13,12 @@
 #include <linux/mutex.h>
 #include "sja1105_static_config.h"
 
-#define SJA1105_NUM_PORTS		5
-#define SJA1105_MAX_NUM_PORTS		SJA1105_NUM_PORTS
-#define SJA1105_NUM_TC			8
 #define SJA1105ET_FDB_BIN_SIZE		4
 /* The hardware value is in multiples of 10 ms.
  * The passed parameter is in multiples of 1 ms.
  */
 #define SJA1105_AGEING_TIME_MS(ms)	((ms) / 10)
-#define SJA1105_NUM_L2_POLICERS		45
+#define SJA1105_NUM_L2_POLICERS		SJA1110_MAX_L2_POLICING_COUNT
 
 typedef enum {
 	SPI_READ = 0,
@@ -99,6 +96,7 @@ struct sja1105_info {
 	int ptpegr_ts_bytes;
 	int num_cbs_shapers;
 	int max_frame_mem;
+	int num_ports;
 	const struct sja1105_dynamic_table_ops *dyn_ops;
 	const struct sja1105_table_ops *static_ops;
 	const struct sja1105_regs *regs;
@@ -310,6 +308,10 @@ extern const struct sja1105_info sja1105p_info;
 extern const struct sja1105_info sja1105q_info;
 extern const struct sja1105_info sja1105r_info;
 extern const struct sja1105_info sja1105s_info;
+extern const struct sja1105_info sja1110a_info;
+extern const struct sja1105_info sja1110b_info;
+extern const struct sja1105_info sja1110c_info;
+extern const struct sja1105_info sja1110d_info;
 
 /* From sja1105_clocking.c */
 
@@ -326,8 +328,10 @@ typedef enum {
 } sja1105_phy_interface_t;
 
 int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port);
+int sja1110_setup_rgmii_delay(const void *ctx, int port);
 int sja1105_clocking_setup_port(struct sja1105_private *priv, int port);
 int sja1105_clocking_setup(struct sja1105_private *priv);
+int sja1110_clocking_setup(struct sja1105_private *priv);
 
 /* From sja1105_ethtool.c */
 void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data);
@@ -348,6 +352,18 @@ enum sja1105_iotag {
 	SJA1105_S_TAG = 1, /* Outer VLAN header */
 };
 
+enum sja1110_vlan_type {
+	SJA1110_VLAN_INVALID = 0,
+	SJA1110_VLAN_C_TAG = 1, /* Single inner VLAN tag */
+	SJA1110_VLAN_S_TAG = 2, /* Single outer VLAN tag */
+	SJA1110_VLAN_D_TAG = 3, /* Double tagged, use outer tag for lookup */
+};
+
+enum sja1110_shaper_type {
+	SJA1110_LEAKY_BUCKET_SHAPER = 0,
+	SJA1110_CBS_SHAPER = 1,
+};
+
 u8 sja1105et_fdb_hash(struct sja1105_private *priv, const u8 *addr, u16 vid);
 int sja1105et_fdb_add(struct dsa_switch *ds, int port,
 		      const unsigned char *addr, u16 vid);
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index ae297648611f..645edea5a81f 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -6,6 +6,7 @@
 #include "sja1105.h"
 
 #define SJA1105_SIZE_CGU_CMD	4
+#define SJA1110_BASE_TIMER_CLK	SJA1110_CGU_ADDR(0x74)
 
 /* Common structure for CFG_PAD_MIIx_RX and CFG_PAD_MIIx_TX */
 struct sja1105_cfg_pad_mii {
@@ -61,6 +62,12 @@ struct sja1105_cgu_pll_ctrl {
 	u64 pd;
 };
 
+struct sja1110_cgu_outclk {
+	u64 clksrc;
+	u64 autoblock;
+	u64 pd;
+};
+
 enum {
 	CLKSRC_MII0_TX_CLK	= 0x00,
 	CLKSRC_MII0_RX_CLK	= 0x01,
@@ -461,6 +468,35 @@ sja1105_cfg_pad_mii_id_packing(void *buf, struct sja1105_cfg_pad_mii_id *cmd,
 	sja1105_packing(buf, &cmd->txc_pd,          0,  0, size, op);
 }
 
+static void
+sja1110_cfg_pad_mii_id_packing(void *buf, struct sja1105_cfg_pad_mii_id *cmd,
+			       enum packing_op op)
+{
+	const int size = SJA1105_SIZE_CGU_CMD;
+	u64 range = 4;
+
+	/* Fields RXC_RANGE and TXC_RANGE select the input frequency range:
+	 * 0 = 2.5MHz
+	 * 1 = 25MHz
+	 * 2 = 50MHz
+	 * 3 = 125MHz
+	 * 4 = Automatically determined by port speed.
+	 * There's no point in defining a structure different than the one for
+	 * SJA1105, so just hardcode the frequency range to automatic, just as
+	 * before.
+	 */
+	sja1105_packing(buf, &cmd->rxc_stable_ovr, 26, 26, size, op);
+	sja1105_packing(buf, &cmd->rxc_delay,      25, 21, size, op);
+	sja1105_packing(buf, &range,               20, 18, size, op);
+	sja1105_packing(buf, &cmd->rxc_bypass,     17, 17, size, op);
+	sja1105_packing(buf, &cmd->rxc_pd,         16, 16, size, op);
+	sja1105_packing(buf, &cmd->txc_stable_ovr, 10, 10, size, op);
+	sja1105_packing(buf, &cmd->txc_delay,       9,  5, size, op);
+	sja1105_packing(buf, &range,                4,  2, size, op);
+	sja1105_packing(buf, &cmd->txc_bypass,      1,  1, size, op);
+	sja1105_packing(buf, &cmd->txc_pd,          0,  0, size, op);
+}
+
 /* Valid range in degrees is an integer between 73.8 and 101.7 */
 static u64 sja1105_rgmii_delay(u64 phase)
 {
@@ -519,6 +555,35 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port)
 				packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
+int sja1110_setup_rgmii_delay(const void *ctx, int port)
+{
+	const struct sja1105_private *priv = ctx;
+	const struct sja1105_regs *regs = priv->info->regs;
+	struct sja1105_cfg_pad_mii_id pad_mii_id = {0};
+	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
+
+	pad_mii_id.rxc_pd = 1;
+	pad_mii_id.txc_pd = 1;
+
+	if (priv->rgmii_rx_delay[port]) {
+		pad_mii_id.rxc_delay = sja1105_rgmii_delay(90);
+		/* The "BYPASS" bit in SJA1110 is actually a "don't bypass" */
+		pad_mii_id.rxc_bypass = 1;
+		pad_mii_id.rxc_pd = 0;
+	}
+
+	if (priv->rgmii_tx_delay[port]) {
+		pad_mii_id.txc_delay = sja1105_rgmii_delay(90);
+		pad_mii_id.txc_bypass = 1;
+		pad_mii_id.txc_pd = 0;
+	}
+
+	sja1110_cfg_pad_mii_id_packing(packed_buf, &pad_mii_id, PACK);
+
+	return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_id[port],
+				packed_buf, SJA1105_SIZE_CGU_CMD);
+}
+
 static int sja1105_rgmii_clocking_setup(struct sja1105_private *priv, int port,
 					sja1105_mii_role_t role)
 {
@@ -755,3 +820,29 @@ int sja1105_clocking_setup(struct sja1105_private *priv)
 	}
 	return 0;
 }
+
+static void
+sja1110_cgu_outclk_packing(void *buf, struct sja1110_cgu_outclk *outclk,
+			   enum packing_op op)
+{
+	const int size = 4;
+
+	sja1105_packing(buf, &outclk->clksrc,    27, 24, size, op);
+	sja1105_packing(buf, &outclk->autoblock, 11, 11, size, op);
+	sja1105_packing(buf, &outclk->pd,         0,  0, size, op);
+}
+
+/* Power down the BASE_TIMER_CLK in order to disable the watchdog */
+int sja1110_clocking_setup(struct sja1105_private *priv)
+{
+	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
+	struct sja1110_cgu_outclk outclk_7_c = {
+		.clksrc = 0x5,
+		.pd = true,
+	};
+
+	sja1110_cgu_outclk_packing(packed_buf, &outclk_7_c, PACK);
+
+	return sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_TIMER_CLK,
+				packed_buf, SJA1105_SIZE_CGU_CMD);
+}
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index ff2742f53de3..4c4c04f04269 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -106,6 +106,9 @@
 #define SJA1105PQRS_SIZE_VL_LOOKUP_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_VL_LOOKUP_ENTRY)
 
+#define SJA1110_SIZE_VL_POLICING_DYN_CMD			\
+	(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_VL_POLICING_ENTRY)
+
 #define SJA1105ET_SIZE_MAC_CONFIG_DYN_ENTRY			\
 	SJA1105_SIZE_DYN_CMD
 
@@ -115,9 +118,15 @@
 #define SJA1105PQRS_SIZE_L2_LOOKUP_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY)
 
+#define SJA1110_SIZE_L2_LOOKUP_DYN_CMD				\
+	(SJA1105_SIZE_DYN_CMD + SJA1110_SIZE_L2_LOOKUP_ENTRY)
+
 #define SJA1105_SIZE_VLAN_LOOKUP_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + 4 + SJA1105_SIZE_VLAN_LOOKUP_ENTRY)
 
+#define SJA1110_SIZE_VLAN_LOOKUP_DYN_CMD			\
+	(SJA1105_SIZE_DYN_CMD + SJA1110_SIZE_VLAN_LOOKUP_ENTRY)
+
 #define SJA1105_SIZE_L2_FORWARDING_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_L2_FORWARDING_ENTRY)
 
@@ -133,12 +142,18 @@
 #define SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_DYN_CMD		\
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY)
 
+#define SJA1110_SIZE_L2_LOOKUP_PARAMS_DYN_CMD		\
+	(SJA1105_SIZE_DYN_CMD + SJA1110_SIZE_L2_LOOKUP_PARAMS_ENTRY)
+
 #define SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD			\
 	SJA1105_SIZE_DYN_CMD
 
 #define SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY)
 
+#define SJA1110_SIZE_GENERAL_PARAMS_DYN_CMD			\
+	(SJA1105_SIZE_DYN_CMD + SJA1110_SIZE_GENERAL_PARAMS_ENTRY)
+
 #define SJA1105PQRS_SIZE_AVB_PARAMS_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY)
 
@@ -151,8 +166,17 @@
 #define SJA1105PQRS_SIZE_CBS_DYN_CMD				\
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_CBS_ENTRY)
 
+#define SJA1110_SIZE_XMII_PARAMS_DYN_CMD			\
+	SJA1110_SIZE_XMII_PARAMS_ENTRY
+
+#define SJA1110_SIZE_L2_POLICING_DYN_CMD			\
+	(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_L2_POLICING_ENTRY)
+
+#define SJA1110_SIZE_L2_FORWARDING_PARAMS_DYN_CMD		\
+	SJA1105_SIZE_L2_FORWARDING_PARAMS_ENTRY
+
 #define SJA1105_MAX_DYN_CMD_SIZE				\
-	SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD
+	SJA1110_SIZE_GENERAL_PARAMS_DYN_CMD
 
 struct sja1105_dyn_cmd {
 	bool search;
@@ -197,6 +221,19 @@ sja1105pqrs_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->index,    9,  0, size, op);
 }
 
+static void
+sja1110_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+			      enum packing_op op)
+{
+	u8 *p = buf + SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+	sja1105_packing(p, &cmd->index,   11,  0, size, op);
+}
+
 static size_t sja1105et_vl_lookup_entry_packing(void *buf, void *entry_ptr,
 						enum packing_op op)
 {
@@ -208,6 +245,18 @@ static size_t sja1105et_vl_lookup_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static void
+sja1110_vl_policing_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				enum packing_op op)
+{
+	u8 *p = buf + SJA1105_SIZE_VL_LOOKUP_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->index,   11,  0, size, op);
+}
+
 static void
 sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				  enum packing_op op)
@@ -326,6 +375,18 @@ sja1105pqrs_dyn_l2_lookup_entry_packing(void *buf, void *entry_ptr,
 	return sja1105pqrs_l2_lookup_entry_packing(buf, entry_ptr, op);
 }
 
+static size_t sja1110_dyn_l2_lookup_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op)
+{
+	struct sja1105_l2_lookup_entry *entry = entry_ptr;
+	u8 *cmd = buf + SJA1110_SIZE_L2_LOOKUP_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(cmd, &entry->lockeds, 28, 28, size, op);
+
+	return sja1110_l2_lookup_entry_packing(buf, entry_ptr, op);
+}
+
 static void
 sja1105et_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				enum packing_op op)
@@ -437,6 +498,39 @@ sja1105_vlan_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 			SJA1105_SIZE_VLAN_LOOKUP_ENTRY, op);
 }
 
+/* In SJA1110 there is no gap between the command and the data, yay... */
+static void
+sja1110_vlan_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				enum packing_op op)
+{
+	u8 *p = buf + SJA1110_SIZE_VLAN_LOOKUP_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+	u64 type_entry = 0;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+	/* Hack: treat 'vlanid' field of struct sja1105_vlan_lookup_entry as
+	 * cmd->index.
+	 */
+	sja1105_packing(buf, &cmd->index, 38, 27,
+			SJA1110_SIZE_VLAN_LOOKUP_ENTRY, op);
+
+	/* But the VALIDENT bit has disappeared, now we are supposed to
+	 * invalidate an entry through the TYPE_ENTRY field of the entry..
+	 * This is a hack to transform the non-zero quality of the TYPE_ENTRY
+	 * field into a VALIDENT bit.
+	 */
+	if (op == PACK && !cmd->valident) {
+		sja1105_packing(buf, &type_entry, 40, 39,
+				SJA1110_SIZE_VLAN_LOOKUP_ENTRY, PACK);
+	} else if (op == UNPACK) {
+		sja1105_packing(buf, &type_entry, 40, 39,
+				SJA1110_SIZE_VLAN_LOOKUP_ENTRY, UNPACK);
+		cmd->valident = !!type_entry;
+	}
+}
+
 static void
 sja1105_l2_forwarding_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				  enum packing_op op)
@@ -450,6 +544,19 @@ sja1105_l2_forwarding_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->index,    4,  0, size, op);
 }
 
+static void
+sja1110_l2_forwarding_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				  enum packing_op op)
+{
+	u8 *p = buf + SJA1105_SIZE_L2_FORWARDING_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+	sja1105_packing(p, &cmd->index,    4,  0, size, op);
+}
+
 static void
 sja1105et_mac_config_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				 enum packing_op op)
@@ -504,6 +611,19 @@ sja1105pqrs_mac_config_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->index,    2,  0, size, op);
 }
 
+static void
+sja1110_mac_config_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+			       enum packing_op op)
+{
+	u8 *p = buf + SJA1105PQRS_SIZE_MAC_CONFIG_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+	sja1105_packing(p, &cmd->index,    3,  0, size, op);
+}
+
 static void
 sja1105et_l2_lookup_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				       enum packing_op op)
@@ -536,6 +656,18 @@ sja1105pqrs_l2_lookup_params_cmd_packing(void *buf,
 	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
 }
 
+static void
+sja1110_l2_lookup_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				     enum packing_op op)
+{
+	u8 *p = buf + SJA1110_SIZE_L2_LOOKUP_PARAMS_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+}
+
 static void
 sja1105et_general_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				     enum packing_op op)
@@ -570,6 +702,18 @@ sja1105pqrs_general_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->rdwrset, 28, 28, size, op);
 }
 
+static void
+sja1110_general_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				   enum packing_op op)
+{
+	u8 *p = buf + SJA1110_SIZE_GENERAL_PARAMS_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+}
+
 static void
 sja1105pqrs_avb_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				   enum packing_op op)
@@ -596,6 +740,20 @@ sja1105_retagging_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->index,     5,  0, size, op);
 }
 
+static void
+sja1110_retagging_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+			      enum packing_op op)
+{
+	u8 *p = buf + SJA1105_SIZE_RETAGGING_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,    31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset,  30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,   29, 29, size, op);
+	sja1105_packing(p, &cmd->valident, 28, 28, size, op);
+	sja1105_packing(p, &cmd->index,     4,  0, size, op);
+}
+
 static void sja1105et_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				      enum packing_op op)
 {
@@ -635,6 +793,18 @@ static void sja1105pqrs_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->index,    3,  0, size, op);
 }
 
+static void sja1110_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				    enum packing_op op)
+{
+	u8 *p = buf + SJA1105PQRS_SIZE_CBS_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+	sja1105_packing(p, &cmd->index,    7,  0, size, op);
+}
+
 static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr,
 					    enum packing_op op)
 {
@@ -650,6 +820,39 @@ static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static size_t sja1110_cbs_entry_packing(void *buf, void *entry_ptr,
+					enum packing_op op)
+{
+	const size_t size = SJA1105PQRS_SIZE_CBS_ENTRY;
+	struct sja1105_cbs_entry *entry = entry_ptr;
+	u64 entry_type = SJA1110_CBS_SHAPER;
+
+	sja1105_packing(buf, &entry_type,       159, 159, size, op);
+	sja1105_packing(buf, &entry->credit_lo, 151, 120, size, op);
+	sja1105_packing(buf, &entry->credit_hi, 119,  88, size, op);
+	sja1105_packing(buf, &entry->send_slope, 87,  56, size, op);
+	sja1105_packing(buf, &entry->idle_slope, 55,  24, size, op);
+	return size;
+}
+
+static void sja1110_dummy_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				      enum packing_op op)
+{
+}
+
+static void
+sja1110_l2_policing_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				enum packing_op op)
+{
+	u8 *p = buf + SJA1105_SIZE_L2_POLICING_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+	sja1105_packing(p, &cmd->index,    6,  0, size, op);
+}
+
 #define OP_READ		BIT(0)
 #define OP_WRITE	BIT(1)
 #define OP_DEL		BIT(2)
@@ -832,6 +1035,122 @@ const struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 	},
 };
 
+/* SJA1110: Third generation */
+const struct sja1105_dynamic_table_ops sja1110_dyn_ops[BLK_IDX_MAX_DYN] = {
+	[BLK_IDX_VL_LOOKUP] = {
+		.entry_packing = sja1110_vl_lookup_entry_packing,
+		.cmd_packing = sja1110_vl_lookup_cmd_packing,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.max_entry_count = SJA1110_MAX_VL_LOOKUP_COUNT,
+		.packed_size = SJA1105PQRS_SIZE_VL_LOOKUP_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x124),
+	},
+	[BLK_IDX_VL_POLICING] = {
+		.entry_packing = sja1110_vl_policing_entry_packing,
+		.cmd_packing = sja1110_vl_policing_cmd_packing,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.max_entry_count = SJA1110_MAX_VL_POLICING_COUNT,
+		.packed_size = SJA1110_SIZE_VL_POLICING_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x310),
+	},
+	[BLK_IDX_L2_LOOKUP] = {
+		.entry_packing = sja1110_dyn_l2_lookup_entry_packing,
+		.cmd_packing = sja1105pqrs_l2_lookup_cmd_packing,
+		.access = (OP_READ | OP_WRITE | OP_DEL | OP_SEARCH),
+		.max_entry_count = SJA1105_MAX_L2_LOOKUP_COUNT,
+		.packed_size = SJA1110_SIZE_L2_LOOKUP_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x8c),
+	},
+	[BLK_IDX_VLAN_LOOKUP] = {
+		.entry_packing = sja1110_vlan_lookup_entry_packing,
+		.cmd_packing = sja1110_vlan_lookup_cmd_packing,
+		.access = (OP_READ | OP_WRITE | OP_DEL),
+		.max_entry_count = SJA1105_MAX_VLAN_LOOKUP_COUNT,
+		.packed_size = SJA1110_SIZE_VLAN_LOOKUP_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0xb4),
+	},
+	[BLK_IDX_L2_FORWARDING] = {
+		.entry_packing = sja1110_l2_forwarding_entry_packing,
+		.cmd_packing = sja1110_l2_forwarding_cmd_packing,
+		.max_entry_count = SJA1110_MAX_L2_FORWARDING_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.packed_size = SJA1105_SIZE_L2_FORWARDING_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0xa8),
+	},
+	[BLK_IDX_MAC_CONFIG] = {
+		.entry_packing = sja1110_mac_config_entry_packing,
+		.cmd_packing = sja1110_mac_config_cmd_packing,
+		.max_entry_count = SJA1110_MAX_MAC_CONFIG_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.packed_size = SJA1105PQRS_SIZE_MAC_CONFIG_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x134),
+	},
+	[BLK_IDX_L2_LOOKUP_PARAMS] = {
+		.entry_packing = sja1110_l2_lookup_params_entry_packing,
+		.cmd_packing = sja1110_l2_lookup_params_cmd_packing,
+		.max_entry_count = SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.packed_size = SJA1110_SIZE_L2_LOOKUP_PARAMS_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x158),
+	},
+	[BLK_IDX_AVB_PARAMS] = {
+		.entry_packing = sja1105pqrs_avb_params_entry_packing,
+		.cmd_packing = sja1105pqrs_avb_params_cmd_packing,
+		.max_entry_count = SJA1105_MAX_AVB_PARAMS_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.packed_size = SJA1105PQRS_SIZE_AVB_PARAMS_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x2000C),
+	},
+	[BLK_IDX_GENERAL_PARAMS] = {
+		.entry_packing = sja1110_general_params_entry_packing,
+		.cmd_packing = sja1110_general_params_cmd_packing,
+		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.packed_size = SJA1110_SIZE_GENERAL_PARAMS_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0xe8),
+	},
+	[BLK_IDX_RETAGGING] = {
+		.entry_packing = sja1110_retagging_entry_packing,
+		.cmd_packing = sja1110_retagging_cmd_packing,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_DEL),
+		.packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0xdc),
+	},
+	[BLK_IDX_CBS] = {
+		.entry_packing = sja1110_cbs_entry_packing,
+		.cmd_packing = sja1110_cbs_cmd_packing,
+		.max_entry_count = SJA1110_MAX_CBS_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.packed_size = SJA1105PQRS_SIZE_CBS_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0xc4),
+	},
+	[BLK_IDX_XMII_PARAMS] = {
+		.entry_packing = sja1110_xmii_params_entry_packing,
+		.cmd_packing = sja1110_dummy_cmd_packing,
+		.max_entry_count = SJA1105_MAX_XMII_PARAMS_COUNT,
+		.access = (OP_READ | OP_VALID_ANYWAY),
+		.packed_size = SJA1110_SIZE_XMII_PARAMS_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x3c),
+	},
+	[BLK_IDX_L2_POLICING] = {
+		.entry_packing = sja1110_l2_policing_entry_packing,
+		.cmd_packing = sja1110_l2_policing_cmd_packing,
+		.max_entry_count = SJA1110_MAX_L2_POLICING_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_VALID_ANYWAY),
+		.packed_size = SJA1110_SIZE_L2_POLICING_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x2fc),
+	},
+	[BLK_IDX_L2_FORWARDING_PARAMS] = {
+		.entry_packing = sja1110_l2_forwarding_params_entry_packing,
+		.cmd_packing = sja1110_dummy_cmd_packing,
+		.max_entry_count = SJA1105_MAX_L2_FORWARDING_PARAMS_COUNT,
+		.access = (OP_READ | OP_VALID_ANYWAY),
+		.packed_size = SJA1110_SIZE_L2_FORWARDING_PARAMS_DYN_CMD,
+		.addr = SJA1110_SPI_ADDR(0x20000),
+	},
+};
+
 /* Provides read access to the settings through the dynamic interface
  * of the switch.
  * @blk_idx	is used as key to select from the sja1105_dynamic_table_ops.
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h
index 28d4eb5efb8b..a1472f80a059 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h
@@ -36,5 +36,6 @@ struct sja1105_mgmt_entry {
 
 extern const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN];
 extern const struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN];
+extern const struct sja1105_dynamic_table_ops sja1110_dyn_ops[BLK_IDX_MAX_DYN];
 
 #endif
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index cbce6e90dc63..2b3b6c402b34 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -343,6 +343,7 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 {
 	struct sja1105_table *table;
 	struct sja1105_vlan_lookup_entry pvid = {
+		.type_entry = SJA1110_VLAN_D_TAG,
 		.ving_mirr = 0,
 		.vegr_mirr = 0,
 		.vmemb_port = 0,
@@ -455,6 +456,47 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 
 			l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
 		}
+
+		l2fwd[ds->num_ports + i].type_egrpcp2outputq = true;
+	}
+
+	return 0;
+}
+
+static int sja1110_init_pcp_remapping(struct sja1105_private *priv)
+{
+	struct sja1110_pcp_remapping_entry *pcp_remap;
+	struct dsa_switch *ds = priv->ds;
+	struct sja1105_table *table;
+	int port, tc;
+
+	table = &priv->static_config.tables[BLK_IDX_PCP_REMAPPING];
+
+	/* Nothing to do for SJA1105 */
+	if (!table->ops->max_entry_count)
+		return 0;
+
+	if (table->entry_count) {
+		kfree(table->entries);
+		table->entry_count = 0;
+	}
+
+	table->entries = kcalloc(table->ops->max_entry_count,
+				 table->ops->unpacked_entry_size, GFP_KERNEL);
+	if (!table->entries)
+		return -ENOMEM;
+
+	table->entry_count = table->ops->max_entry_count;
+
+	pcp_remap = table->entries;
+
+	/* Repeat the configuration done for vlan_pmap */
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_is_unused_port(ds, port))
+			continue;
+
+		for (tc = 0; tc < SJA1105_NUM_TC; tc++)
+			pcp_remap[port].egrpcp[tc] = tc;
 	}
 
 	return 0;
@@ -777,6 +819,9 @@ static int sja1105_static_config_load(struct sja1105_private *priv)
 	if (rc < 0)
 		return rc;
 	rc = sja1105_init_avb_params(priv);
+	if (rc < 0)
+		return rc;
+	rc = sja1110_init_pcp_remapping(priv);
 	if (rc < 0)
 		return rc;
 
@@ -2295,6 +2340,7 @@ sja1105_build_bridge_vlans(struct sja1105_private *priv,
 		new_vlan[match].vlan_bc |= BIT(v->port);
 		if (!v->untagged)
 			new_vlan[match].tag_port |= BIT(v->port);
+		new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
 	}
 
 	return 0;
@@ -2317,6 +2363,7 @@ sja1105_build_dsa_8021q_vlans(struct sja1105_private *priv,
 		new_vlan[match].vlan_bc |= BIT(v->port);
 		if (!v->untagged)
 			new_vlan[match].tag_port |= BIT(v->port);
+		new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
 	}
 
 	return 0;
@@ -2377,6 +2424,7 @@ static int sja1105_build_subvlans(struct sja1105_private *priv,
 			new_vlan[match].tag_port |= BIT(v->port);
 		/* But it's always tagged towards the CPU */
 		new_vlan[match].tag_port |= BIT(upstream);
+		new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
 
 		/* The Retagging Table generates packet *clones* with
 		 * the new VLAN. This is a very odd hardware quirk
@@ -2544,6 +2592,7 @@ sja1105_build_crosschip_subvlans(struct sja1105_private *priv,
 		if (!tmp->untagged)
 			new_vlan[match].tag_port |= BIT(tmp->port);
 		new_vlan[match].tag_port |= BIT(upstream);
+		new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
 		/* Deny egress of @rx_vid towards our front-panel port.
 		 * This will force the switch to drop it, and we'll see
 		 * only the re-retagged packets (having the original,
@@ -3684,7 +3733,7 @@ static int sja1105_probe(struct spi_device *spi)
 		return -ENOMEM;
 
 	ds->dev = dev;
-	ds->num_ports = SJA1105_MAX_NUM_PORTS;
+	ds->num_ports = priv->info->num_ports;
 	ds->ops = &sja1105_switch_ops;
 	ds->priv = priv;
 	priv->ds = ds;
@@ -3788,6 +3837,10 @@ static const struct of_device_id sja1105_dt_ids[] = {
 	{ .compatible = "nxp,sja1105q", .data = &sja1105q_info },
 	{ .compatible = "nxp,sja1105r", .data = &sja1105r_info },
 	{ .compatible = "nxp,sja1105s", .data = &sja1105s_info },
+	{ .compatible = "nxp,sja1110a", .data = &sja1110a_info },
+	{ .compatible = "nxp,sja1110b", .data = &sja1110b_info },
+	{ .compatible = "nxp,sja1110c", .data = &sja1110c_info },
+	{ .compatible = "nxp,sja1110d", .data = &sja1110d_info },
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 786c16a77e46..187c9fbbd397 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -193,6 +193,16 @@ static int sja1105pqrs_reset_cmd(struct dsa_switch *ds)
 	return sja1105_xfer_u32(priv, SPI_WRITE, regs->rgu, &cold_reset, NULL);
 }
 
+static int sja1110_reset_cmd(struct dsa_switch *ds)
+{
+	struct sja1105_private *priv = ds->priv;
+	const struct sja1105_regs *regs = priv->info->regs;
+	u32 switch_reset = BIT(20);
+
+	/* Switch core reset */
+	return sja1105_xfer_u32(priv, SPI_WRITE, regs->rgu, &switch_reset, NULL);
+}
+
 int sja1105_inhibit_tx(const struct sja1105_private *priv,
 		       unsigned long port_bitmap, bool tx_inhibited)
 {
@@ -465,6 +475,88 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.ptpsyncts = 0x1F,
 };
 
+static struct sja1105_regs sja1110_regs = {
+	.device_id = SJA1110_SPI_ADDR(0x0),
+	.prod_id = SJA1110_ACU_ADDR(0xf00),
+	.status = SJA1110_SPI_ADDR(0x4),
+	.port_control = SJA1110_SPI_ADDR(0x50), /* actually INHIB_TX */
+	.vl_status = 0x10000,
+	.config = 0x020000,
+	.rgu = SJA1110_RGU_ADDR(0x100), /* Reset Control Register 0 */
+	/* Ports 2 and 3 are capable of xMII, but there isn't anything to
+	 * configure in the CGU/ACU for them.
+	 */
+	.pad_mii_tx = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR},
+	.pad_mii_rx = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR},
+	.pad_mii_id = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1110_ACU_ADDR(0x18), SJA1110_ACU_ADDR(0x28),
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR},
+	.rmii_pll1 = SJA1105_RSV_ADDR,
+	.cgu_idiv = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		     SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		     SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		     SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
+	.stats[MAC] = {0x200, 0x202, 0x204, 0x206, 0x208, 0x20a,
+		       0x20c, 0x20e, 0x210, 0x212, 0x214},
+	.stats[HL1] = {0x400, 0x410, 0x420, 0x430, 0x440, 0x450,
+		       0x460, 0x470, 0x480, 0x490, 0x4a0},
+	.stats[HL2] = {0x600, 0x610, 0x620, 0x630, 0x640, 0x650,
+		       0x660, 0x670, 0x680, 0x690, 0x6a0},
+	.stats[ETHER] = {0x1400, 0x1418, 0x1430, 0x1448, 0x1460, 0x1478,
+			 0x1490, 0x14a8, 0x14c0, 0x14d8, 0x14f0},
+	.mii_tx_clk = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
+	.mii_rx_clk = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		       SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
+	.mii_ext_tx_clk = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			   SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			   SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			   SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
+	.mii_ext_rx_clk = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			   SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			   SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			   SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
+	.rgmii_tx_clk = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			 SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			 SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			 SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
+	.rmii_ref_clk = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			 SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			 SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			 SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
+	.rmii_ext_tx_clk = {SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			    SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			    SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			    SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			    SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+			    SJA1105_RSV_ADDR},
+	.ptpschtm = SJA1110_SPI_ADDR(0x54),
+	.ptppinst = SJA1110_SPI_ADDR(0x5c),
+	.ptppindur = SJA1110_SPI_ADDR(0x64),
+	.ptp_control = SJA1110_SPI_ADDR(0x68),
+	.ptpclkval = SJA1110_SPI_ADDR(0x6c),
+	.ptpclkrate = SJA1110_SPI_ADDR(0x74),
+	.ptpclkcorp = SJA1110_SPI_ADDR(0x80),
+	.ptpsyncts = SJA1110_SPI_ADDR(0x84),
+};
+
 const struct sja1105_info sja1105e_info = {
 	.device_id		= SJA1105E_DEVICE_ID,
 	.part_no		= SJA1105ET_PART_NO,
@@ -475,6 +567,7 @@ const struct sja1105_info sja1105e_info = {
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
 	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1105_NUM_PORTS,
 	.num_cbs_shapers	= SJA1105ET_MAX_CBS_COUNT,
 	.reset_cmd		= sja1105et_reset_cmd,
 	.fdb_add_cmd		= sja1105et_fdb_add,
@@ -505,6 +598,7 @@ const struct sja1105_info sja1105t_info = {
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
 	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1105_NUM_PORTS,
 	.num_cbs_shapers	= SJA1105ET_MAX_CBS_COUNT,
 	.reset_cmd		= sja1105et_reset_cmd,
 	.fdb_add_cmd		= sja1105et_fdb_add,
@@ -535,6 +629,7 @@ const struct sja1105_info sja1105p_info = {
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1105_NUM_PORTS,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
@@ -566,6 +661,7 @@ const struct sja1105_info sja1105q_info = {
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1105_NUM_PORTS,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
@@ -597,6 +693,7 @@ const struct sja1105_info sja1105r_info = {
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1105_NUM_PORTS,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
@@ -630,6 +727,7 @@ const struct sja1105_info sja1105s_info = {
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1105_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1105_NUM_PORTS,
 	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
@@ -650,3 +748,157 @@ const struct sja1105_info sja1105s_info = {
 	.supports_sgmii		= {false, false, false, false, true},
 	.name			= "SJA1105S",
 };
+
+const struct sja1105_info sja1110a_info = {
+	.device_id		= SJA1110_DEVICE_ID,
+	.part_no		= SJA1110A_PART_NO,
+	.static_ops		= sja1110_table_ops,
+	.dyn_ops		= sja1110_dyn_ops,
+	.regs			= &sja1110_regs,
+	.qinq_tpid		= ETH_P_8021AD,
+	.can_limit_mcast_flood	= true,
+	.ptp_ts_bits		= 32,
+	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1110_NUM_PORTS,
+	.num_cbs_shapers	= SJA1110_MAX_CBS_COUNT,
+	.setup_rgmii_delay	= sja1110_setup_rgmii_delay,
+	.reset_cmd		= sja1110_reset_cmd,
+	.fdb_add_cmd		= sja1105pqrs_fdb_add,
+	.fdb_del_cmd		= sja1105pqrs_fdb_del,
+	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1110_clocking_setup,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 4,
+		[SJA1105_SPEED_100MBPS] = 3,
+		[SJA1105_SPEED_1000MBPS] = 2,
+		[SJA1105_SPEED_2500MBPS] = 1,
+	},
+	.supports_mii		= {true, true, true, true, false,
+				   true, true, true, true, true, true},
+	.supports_rmii		= {false, false, true, true, false,
+				   false, false, false, false, false, false},
+	.supports_rgmii		= {false, false, true, true, false,
+				   false, false, false, false, false, false},
+	.supports_sgmii		= {false, true, true, true, true,
+				   false, false, false, false, false, false},
+	.supports_2500basex	= {false, false, false, true, true,
+				   false, false, false, false, false, false},
+	.name			= "SJA1110A",
+};
+
+const struct sja1105_info sja1110b_info = {
+	.device_id		= SJA1110_DEVICE_ID,
+	.part_no		= SJA1110B_PART_NO,
+	.static_ops		= sja1110_table_ops,
+	.dyn_ops		= sja1110_dyn_ops,
+	.regs			= &sja1110_regs,
+	.qinq_tpid		= ETH_P_8021AD,
+	.can_limit_mcast_flood	= true,
+	.ptp_ts_bits		= 32,
+	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1110_NUM_PORTS,
+	.num_cbs_shapers	= SJA1110_MAX_CBS_COUNT,
+	.setup_rgmii_delay	= sja1110_setup_rgmii_delay,
+	.reset_cmd		= sja1110_reset_cmd,
+	.fdb_add_cmd		= sja1105pqrs_fdb_add,
+	.fdb_del_cmd		= sja1105pqrs_fdb_del,
+	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1110_clocking_setup,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 4,
+		[SJA1105_SPEED_100MBPS] = 3,
+		[SJA1105_SPEED_1000MBPS] = 2,
+		[SJA1105_SPEED_2500MBPS] = 1,
+	},
+	.supports_mii		= {true, true, true, true, false,
+				   true, true, true, true, true, false},
+	.supports_rmii		= {false, false, true, true, false,
+				   false, false, false, false, false, false},
+	.supports_rgmii		= {false, false, true, true, false,
+				   false, false, false, false, false, false},
+	.supports_sgmii		= {false, false, false, true, true,
+				   false, false, false, false, false, false},
+	.supports_2500basex	= {false, false, false, true, true,
+				   false, false, false, false, false, false},
+	.name			= "SJA1110B",
+};
+
+const struct sja1105_info sja1110c_info = {
+	.device_id		= SJA1110_DEVICE_ID,
+	.part_no		= SJA1110C_PART_NO,
+	.static_ops		= sja1110_table_ops,
+	.dyn_ops		= sja1110_dyn_ops,
+	.regs			= &sja1110_regs,
+	.qinq_tpid		= ETH_P_8021AD,
+	.can_limit_mcast_flood	= true,
+	.ptp_ts_bits		= 32,
+	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1110_NUM_PORTS,
+	.num_cbs_shapers	= SJA1110_MAX_CBS_COUNT,
+	.setup_rgmii_delay	= sja1110_setup_rgmii_delay,
+	.reset_cmd		= sja1110_reset_cmd,
+	.fdb_add_cmd		= sja1105pqrs_fdb_add,
+	.fdb_del_cmd		= sja1105pqrs_fdb_del,
+	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1110_clocking_setup,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 4,
+		[SJA1105_SPEED_100MBPS] = 3,
+		[SJA1105_SPEED_1000MBPS] = 2,
+		[SJA1105_SPEED_2500MBPS] = 1,
+	},
+	.supports_mii		= {true, true, true, true, false,
+				   true, true, true, false, false, false},
+	.supports_rmii		= {false, false, true, true, false,
+				   false, false, false, false, false, false},
+	.supports_rgmii		= {false, false, true, true, false,
+				   false, false, false, false, false, false},
+	.supports_sgmii		= {false, false, false, false, true,
+				   false, false, false, false, false, false},
+	.supports_2500basex	= {false, false, false, false, true,
+				   false, false, false, false, false, false},
+	.name			= "SJA1110C",
+};
+
+const struct sja1105_info sja1110d_info = {
+	.device_id		= SJA1110_DEVICE_ID,
+	.part_no		= SJA1110D_PART_NO,
+	.static_ops		= sja1110_table_ops,
+	.dyn_ops		= sja1110_dyn_ops,
+	.regs			= &sja1110_regs,
+	.qinq_tpid		= ETH_P_8021AD,
+	.can_limit_mcast_flood	= true,
+	.ptp_ts_bits		= 32,
+	.ptpegr_ts_bytes	= 8,
+	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,
+	.num_ports		= SJA1110_NUM_PORTS,
+	.num_cbs_shapers	= SJA1110_MAX_CBS_COUNT,
+	.setup_rgmii_delay	= sja1110_setup_rgmii_delay,
+	.reset_cmd		= sja1110_reset_cmd,
+	.fdb_add_cmd		= sja1105pqrs_fdb_add,
+	.fdb_del_cmd		= sja1105pqrs_fdb_del,
+	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.clocking_setup		= sja1110_clocking_setup,
+	.port_speed		= {
+		[SJA1105_SPEED_AUTO] = 0,
+		[SJA1105_SPEED_10MBPS] = 4,
+		[SJA1105_SPEED_100MBPS] = 3,
+		[SJA1105_SPEED_1000MBPS] = 2,
+		[SJA1105_SPEED_2500MBPS] = 1,
+	},
+	.supports_mii		= {true, false, true, false, false,
+				   true, true, true, false, false, false},
+	.supports_rmii		= {false, false, true, false, false,
+				   false, false, false, false, false, false},
+	.supports_rgmii		= {false, false, true, false, false,
+				   false, false, false, false, false, false},
+	.supports_sgmii		= {false, true, true, true, true,
+				   false, false, false, false, false, false},
+	.name			= "SJA1110D",
+};
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 33f91ecbe07b..4eba79bdedbf 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -180,6 +180,41 @@ size_t sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_general_params_entry_packing(void *buf, void *entry_ptr,
+					    enum packing_op op)
+{
+	struct sja1105_general_params_entry *entry = entry_ptr;
+	const size_t size = SJA1110_SIZE_GENERAL_PARAMS_ENTRY;
+
+	sja1105_packing(buf, &entry->vllupformat,  447, 447, size, op);
+	sja1105_packing(buf, &entry->mirr_ptacu,   446, 446, size, op);
+	sja1105_packing(buf, &entry->switchid,     445, 442, size, op);
+	sja1105_packing(buf, &entry->hostprio,     441, 439, size, op);
+	sja1105_packing(buf, &entry->mac_fltres1,  438, 391, size, op);
+	sja1105_packing(buf, &entry->mac_fltres0,  390, 343, size, op);
+	sja1105_packing(buf, &entry->mac_flt1,     342, 295, size, op);
+	sja1105_packing(buf, &entry->mac_flt0,     294, 247, size, op);
+	sja1105_packing(buf, &entry->incl_srcpt1,  246, 246, size, op);
+	sja1105_packing(buf, &entry->incl_srcpt0,  245, 245, size, op);
+	sja1105_packing(buf, &entry->send_meta1,   244, 244, size, op);
+	sja1105_packing(buf, &entry->send_meta0,   243, 243, size, op);
+	sja1105_packing(buf, &entry->casc_port,    242, 232, size, op);
+	sja1105_packing(buf, &entry->host_port,    231, 228, size, op);
+	sja1105_packing(buf, &entry->mirr_port,    227, 224, size, op);
+	sja1105_packing(buf, &entry->vlmarker,     223, 192, size, op);
+	sja1105_packing(buf, &entry->vlmask,       191, 160, size, op);
+	sja1105_packing(buf, &entry->tpid2,        159, 144, size, op);
+	sja1105_packing(buf, &entry->ignore2stf,   143, 143, size, op);
+	sja1105_packing(buf, &entry->tpid,         142, 127, size, op);
+	sja1105_packing(buf, &entry->queue_ts,     126, 126, size, op);
+	sja1105_packing(buf, &entry->egrmirrvid,   125, 114, size, op);
+	sja1105_packing(buf, &entry->egrmirrpcp,   113, 111, size, op);
+	sja1105_packing(buf, &entry->egrmirrdei,   110, 110, size, op);
+	sja1105_packing(buf, &entry->replay_port,  109, 106, size, op);
+	sja1105_packing(buf, &entry->tte_en,        16,  16, size, op);
+	return size;
+}
+
 static size_t
 sja1105_l2_forwarding_params_entry_packing(void *buf, void *entry_ptr,
 					   enum packing_op op)
@@ -195,6 +230,20 @@ sja1105_l2_forwarding_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_l2_forwarding_params_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op)
+{
+	struct sja1105_l2_forwarding_params_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_L2_FORWARDING_PARAMS_ENTRY;
+	int offset, i;
+
+	sja1105_packing(buf, &entry->max_dynp, 95, 93, size, op);
+	for (i = 0, offset = 5; i < 8; i++, offset += 11)
+		sja1105_packing(buf, &entry->part_spc[i],
+				offset + 10, offset + 0, size, op);
+	return size;
+}
+
 size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
 					   enum packing_op op)
 {
@@ -211,6 +260,27 @@ size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
+					   enum packing_op op)
+{
+	struct sja1105_l2_forwarding_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_L2_FORWARDING_ENTRY;
+	int offset, i;
+
+	if (entry->type_egrpcp2outputq) {
+		for (i = 0, offset = 31; i < SJA1110_NUM_PORTS;
+		     i++, offset += 3) {
+			sja1105_packing(buf, &entry->vlan_pmap[i],
+					offset + 2, offset + 0, size, op);
+		}
+	} else {
+		sja1105_packing(buf, &entry->bc_domain,  63, 53, size, op);
+		sja1105_packing(buf, &entry->reach_port, 52, 42, size, op);
+		sja1105_packing(buf, &entry->fl_domain,  41, 31, size, op);
+	}
+	return size;
+}
+
 static size_t
 sja1105et_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op)
@@ -249,6 +319,28 @@ size_t sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
+					      enum packing_op op)
+{
+	struct sja1105_l2_lookup_params_entry *entry = entry_ptr;
+	const size_t size = SJA1110_SIZE_L2_LOOKUP_PARAMS_ENTRY;
+	int offset, i;
+
+	for (i = 0, offset = 70; i < SJA1110_NUM_PORTS; i++, offset += 11)
+		sja1105_packing(buf, &entry->maxaddrp[i],
+				offset + 10, offset + 0, size, op);
+	sja1105_packing(buf, &entry->maxage,         69,  55, size, op);
+	sja1105_packing(buf, &entry->start_dynspc,   54,  45, size, op);
+	sja1105_packing(buf, &entry->drpnolearn,     44,  34, size, op);
+	sja1105_packing(buf, &entry->shared_learn,   33,  33, size, op);
+	sja1105_packing(buf, &entry->no_enf_hostprt, 32,  32, size, op);
+	sja1105_packing(buf, &entry->no_mgmt_learn,  31,  31, size, op);
+	sja1105_packing(buf, &entry->use_static,     30,  30, size, op);
+	sja1105_packing(buf, &entry->owr_dyn,        29,  29, size, op);
+	sja1105_packing(buf, &entry->learn_once,     28,  28, size, op);
+	return size;
+}
+
 size_t sja1105et_l2_lookup_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op)
 {
@@ -291,6 +383,36 @@ size_t sja1105pqrs_l2_lookup_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_l2_lookup_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op)
+{
+	const size_t size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+	struct sja1105_l2_lookup_entry *entry = entry_ptr;
+
+	if (entry->lockeds) {
+		sja1105_packing(buf, &entry->trap,     168, 168, size, op);
+		sja1105_packing(buf, &entry->mirrvlan, 167, 156, size, op);
+		sja1105_packing(buf, &entry->takets,   155, 155, size, op);
+		sja1105_packing(buf, &entry->mirr,     154, 154, size, op);
+		sja1105_packing(buf, &entry->retag,    153, 153, size, op);
+	} else {
+		sja1105_packing(buf, &entry->touched,  168, 168, size, op);
+		sja1105_packing(buf, &entry->age,      167, 153, size, op);
+	}
+	sja1105_packing(buf, &entry->mask_iotag,   152, 152, size, op);
+	sja1105_packing(buf, &entry->mask_vlanid,  151, 140, size, op);
+	sja1105_packing(buf, &entry->mask_macaddr, 139,  92, size, op);
+	sja1105_packing(buf, &entry->mask_srcport,  91,  88, size, op);
+	sja1105_packing(buf, &entry->iotag,         87,  87, size, op);
+	sja1105_packing(buf, &entry->vlanid,        86,  75, size, op);
+	sja1105_packing(buf, &entry->macaddr,       74,  27, size, op);
+	sja1105_packing(buf, &entry->srcport,       26,  23, size, op);
+	sja1105_packing(buf, &entry->destports,     22,  12, size, op);
+	sja1105_packing(buf, &entry->enfport,       11,  11, size, op);
+	sja1105_packing(buf, &entry->index,         10,   1, size, op);
+	return size;
+}
+
 static size_t sja1105_l2_policing_entry_packing(void *buf, void *entry_ptr,
 						enum packing_op op)
 {
@@ -305,6 +427,20 @@ static size_t sja1105_l2_policing_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_l2_policing_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op)
+{
+	struct sja1105_l2_policing_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_L2_POLICING_ENTRY;
+
+	sja1105_packing(buf, &entry->sharindx, 63, 57, size, op);
+	sja1105_packing(buf, &entry->smax,     56, 39, size, op);
+	sja1105_packing(buf, &entry->rate,     38, 21, size, op);
+	sja1105_packing(buf, &entry->maxlen,   20, 10, size, op);
+	sja1105_packing(buf, &entry->partition, 9,  7, size, op);
+	return size;
+}
+
 static size_t sja1105et_mac_config_entry_packing(void *buf, void *entry_ptr,
 						 enum packing_op op)
 {
@@ -373,6 +509,40 @@ size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_mac_config_entry_packing(void *buf, void *entry_ptr,
+					enum packing_op op)
+{
+	const size_t size = SJA1105PQRS_SIZE_MAC_CONFIG_ENTRY;
+	struct sja1105_mac_config_entry *entry = entry_ptr;
+	int offset, i;
+
+	for (i = 0, offset = 104; i < 8; i++, offset += 19) {
+		sja1105_packing(buf, &entry->enabled[i],
+				offset +  0, offset +  0, size, op);
+		sja1105_packing(buf, &entry->base[i],
+				offset +  9, offset +  1, size, op);
+		sja1105_packing(buf, &entry->top[i],
+				offset + 18, offset + 10, size, op);
+	}
+	sja1105_packing(buf, &entry->speed,      98, 96, size, op);
+	sja1105_packing(buf, &entry->tp_delin,   95, 80, size, op);
+	sja1105_packing(buf, &entry->tp_delout,  79, 64, size, op);
+	sja1105_packing(buf, &entry->maxage,     63, 56, size, op);
+	sja1105_packing(buf, &entry->vlanprio,   55, 53, size, op);
+	sja1105_packing(buf, &entry->vlanid,     52, 41, size, op);
+	sja1105_packing(buf, &entry->ing_mirr,   40, 40, size, op);
+	sja1105_packing(buf, &entry->egr_mirr,   39, 39, size, op);
+	sja1105_packing(buf, &entry->drpnona664, 38, 38, size, op);
+	sja1105_packing(buf, &entry->drpdtag,    37, 37, size, op);
+	sja1105_packing(buf, &entry->drpuntag,   34, 34, size, op);
+	sja1105_packing(buf, &entry->retag,      33, 33, size, op);
+	sja1105_packing(buf, &entry->dyn_learn,  32, 32, size, op);
+	sja1105_packing(buf, &entry->egress,     31, 31, size, op);
+	sja1105_packing(buf, &entry->ingress,    30, 30, size, op);
+	sja1105_packing(buf, &entry->ifg,        10,  5, size, op);
+	return size;
+}
+
 static size_t
 sja1105_schedule_entry_points_params_entry_packing(void *buf, void *entry_ptr,
 						   enum packing_op op)
@@ -398,6 +568,19 @@ sja1105_schedule_entry_points_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static size_t
+sja1110_schedule_entry_points_entry_packing(void *buf, void *entry_ptr,
+					    enum packing_op op)
+{
+	struct sja1105_schedule_entry_points_entry *entry = entry_ptr;
+	const size_t size = SJA1110_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY;
+
+	sja1105_packing(buf, &entry->subschindx, 63, 61, size, op);
+	sja1105_packing(buf, &entry->delta,      60, 43, size, op);
+	sja1105_packing(buf, &entry->address,    42, 31, size, op);
+	return size;
+}
+
 static size_t sja1105_schedule_params_entry_packing(void *buf, void *entry_ptr,
 						    enum packing_op op)
 {
@@ -411,6 +594,19 @@ static size_t sja1105_schedule_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static size_t sja1110_schedule_params_entry_packing(void *buf, void *entry_ptr,
+						    enum packing_op op)
+{
+	struct sja1105_schedule_params_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_SCHEDULE_PARAMS_ENTRY;
+	int offset, i;
+
+	for (i = 0, offset = 0; i < 8; i++, offset += 12)
+		sja1105_packing(buf, &entry->subscheind[i],
+				offset + 11, offset + 0, size, op);
+	return size;
+}
+
 static size_t sja1105_schedule_entry_packing(void *buf, void *entry_ptr,
 					     enum packing_op op)
 {
@@ -430,6 +626,25 @@ static size_t sja1105_schedule_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static size_t sja1110_schedule_entry_packing(void *buf, void *entry_ptr,
+					     enum packing_op op)
+{
+	const size_t size = SJA1110_SIZE_SCHEDULE_ENTRY;
+	struct sja1105_schedule_entry *entry = entry_ptr;
+
+	sja1105_packing(buf, &entry->winstindex,  95, 84, size, op);
+	sja1105_packing(buf, &entry->winend,      83, 83, size, op);
+	sja1105_packing(buf, &entry->winst,       82, 82, size, op);
+	sja1105_packing(buf, &entry->destports,   81, 71, size, op);
+	sja1105_packing(buf, &entry->setvalid,    70, 70, size, op);
+	sja1105_packing(buf, &entry->txen,        69, 69, size, op);
+	sja1105_packing(buf, &entry->resmedia_en, 68, 68, size, op);
+	sja1105_packing(buf, &entry->resmedia,    67, 60, size, op);
+	sja1105_packing(buf, &entry->vlindex,     59, 48, size, op);
+	sja1105_packing(buf, &entry->delta,       47, 30, size, op);
+	return size;
+}
+
 static size_t
 sja1105_vl_forwarding_params_entry_packing(void *buf, void *entry_ptr,
 					   enum packing_op op)
@@ -445,6 +660,21 @@ sja1105_vl_forwarding_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static size_t
+sja1110_vl_forwarding_params_entry_packing(void *buf, void *entry_ptr,
+					   enum packing_op op)
+{
+	struct sja1105_vl_forwarding_params_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY;
+	int offset, i;
+
+	for (i = 0, offset = 8; i < 8; i++, offset += 11)
+		sja1105_packing(buf, &entry->partspc[i],
+				offset + 10, offset + 0, size, op);
+	sja1105_packing(buf, &entry->debugen, 7, 7, size, op);
+	return size;
+}
+
 static size_t sja1105_vl_forwarding_entry_packing(void *buf, void *entry_ptr,
 						  enum packing_op op)
 {
@@ -458,6 +688,19 @@ static size_t sja1105_vl_forwarding_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static size_t sja1110_vl_forwarding_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op)
+{
+	struct sja1105_vl_forwarding_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_FORWARDING_ENTRY;
+
+	sja1105_packing(buf, &entry->type,      31, 31, size, op);
+	sja1105_packing(buf, &entry->priority,  30, 28, size, op);
+	sja1105_packing(buf, &entry->partition, 27, 25, size, op);
+	sja1105_packing(buf, &entry->destports, 24, 14, size, op);
+	return size;
+}
+
 size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr,
 				       enum packing_op op)
 {
@@ -492,6 +735,40 @@ size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_vl_lookup_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op)
+{
+	struct sja1105_vl_lookup_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_LOOKUP_ENTRY;
+
+	if (entry->format == SJA1105_VL_FORMAT_PSFP) {
+		/* Interpreting vllupformat as 0 */
+		sja1105_packing(buf, &entry->destports,
+				94, 84, size, op);
+		sja1105_packing(buf, &entry->iscritical,
+				83, 83, size, op);
+		sja1105_packing(buf, &entry->macaddr,
+				82, 35, size, op);
+		sja1105_packing(buf, &entry->vlanid,
+				34, 23, size, op);
+		sja1105_packing(buf, &entry->port,
+				22, 19, size, op);
+		sja1105_packing(buf, &entry->vlanprior,
+				18, 16, size, op);
+	} else {
+		/* Interpreting vllupformat as 1 */
+		sja1105_packing(buf, &entry->egrmirr,
+				94, 84, size, op);
+		sja1105_packing(buf, &entry->ingrmirr,
+				83, 83, size, op);
+		sja1105_packing(buf, &entry->vlid,
+				50, 35, size, op);
+		sja1105_packing(buf, &entry->port,
+				22, 19, size, op);
+	}
+	return size;
+}
+
 static size_t sja1105_vl_policing_entry_packing(void *buf, void *entry_ptr,
 						enum packing_op op)
 {
@@ -508,6 +785,22 @@ static size_t sja1105_vl_policing_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_vl_policing_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op)
+{
+	struct sja1105_vl_policing_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_POLICING_ENTRY;
+
+	sja1105_packing(buf, &entry->type,      63, 63, size, op);
+	sja1105_packing(buf, &entry->maxlen,    62, 52, size, op);
+	sja1105_packing(buf, &entry->sharindx,  51, 40, size, op);
+	if (entry->type == 0) {
+		sja1105_packing(buf, &entry->bag,    41, 28, size, op);
+		sja1105_packing(buf, &entry->jitter, 27, 18, size, op);
+	}
+	return size;
+}
+
 size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op)
 {
@@ -523,6 +816,22 @@ size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op)
+{
+	struct sja1105_vlan_lookup_entry *entry = entry_ptr;
+	const size_t size = SJA1110_SIZE_VLAN_LOOKUP_ENTRY;
+
+	sja1105_packing(buf, &entry->ving_mirr,  95, 85, size, op);
+	sja1105_packing(buf, &entry->vegr_mirr,  84, 74, size, op);
+	sja1105_packing(buf, &entry->vmemb_port, 73, 63, size, op);
+	sja1105_packing(buf, &entry->vlan_bc,    62, 52, size, op);
+	sja1105_packing(buf, &entry->tag_port,   51, 41, size, op);
+	sja1105_packing(buf, &entry->type_entry, 40, 39, size, op);
+	sja1105_packing(buf, &entry->vlanid,     38, 27, size, op);
+	return size;
+}
+
 static size_t sja1105_xmii_params_entry_packing(void *buf, void *entry_ptr,
 						enum packing_op op)
 {
@@ -539,6 +848,24 @@ static size_t sja1105_xmii_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_xmii_params_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op)
+{
+	const size_t size = SJA1110_SIZE_XMII_PARAMS_ENTRY;
+	struct sja1105_xmii_params_entry *entry = entry_ptr;
+	int offset, i;
+
+	for (i = 0, offset = 20; i < SJA1110_NUM_PORTS; i++, offset += 4) {
+		sja1105_packing(buf, &entry->xmii_mode[i],
+				offset + 1, offset + 0, size, op);
+		sja1105_packing(buf, &entry->phy_mac[i],
+				offset + 2, offset + 2, size, op);
+		sja1105_packing(buf, &entry->special[i],
+				offset + 3, offset + 3, size, op);
+	}
+	return size;
+}
+
 size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr,
 				       enum packing_op op)
 {
@@ -555,6 +882,36 @@ size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1110_retagging_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op)
+{
+	struct sja1105_retagging_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_RETAGGING_ENTRY;
+
+	sja1105_packing(buf, &entry->egr_port,       63, 53, size, op);
+	sja1105_packing(buf, &entry->ing_port,       52, 42, size, op);
+	sja1105_packing(buf, &entry->vlan_ing,       41, 30, size, op);
+	sja1105_packing(buf, &entry->vlan_egr,       29, 18, size, op);
+	sja1105_packing(buf, &entry->do_not_learn,   17, 17, size, op);
+	sja1105_packing(buf, &entry->use_dest_ports, 16, 16, size, op);
+	sja1105_packing(buf, &entry->destports,      15, 5, size, op);
+	return size;
+}
+
+static size_t sja1110_pcp_remapping_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op)
+{
+	struct sja1110_pcp_remapping_entry *entry = entry_ptr;
+	const size_t size = SJA1110_SIZE_PCP_REMAPPING_ENTRY;
+	int offset, i;
+
+	for (i = 0, offset = 8; i < SJA1105_NUM_TC; i++, offset += 3)
+		sja1105_packing(buf, &entry->egrpcp[i],
+				offset + 2, offset + 0, size, op);
+
+	return size;
+}
+
 size_t sja1105_table_header_packing(void *buf, void *entry_ptr,
 				    enum packing_op op)
 {
@@ -619,6 +976,7 @@ static u64 blk_id_map[BLK_IDX_MAX] = {
 	[BLK_IDX_GENERAL_PARAMS] = BLKID_GENERAL_PARAMS,
 	[BLK_IDX_RETAGGING] = BLKID_RETAGGING,
 	[BLK_IDX_XMII_PARAMS] = BLKID_XMII_PARAMS,
+	[BLK_IDX_PCP_REMAPPING] = BLKID_PCP_REMAPPING,
 };
 
 const char *sja1105_static_config_error_msg[] = {
@@ -1400,6 +1758,130 @@ const struct sja1105_table_ops sja1105s_table_ops[BLK_IDX_MAX] = {
 	},
 };
 
+/* SJA1110A: Third generation */
+const struct sja1105_table_ops sja1110_table_ops[BLK_IDX_MAX] = {
+	[BLK_IDX_SCHEDULE] = {
+		.packing = sja1110_schedule_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_schedule_entry),
+		.packed_entry_size = SJA1110_SIZE_SCHEDULE_ENTRY,
+		.max_entry_count = SJA1110_MAX_SCHEDULE_COUNT,
+	},
+	[BLK_IDX_SCHEDULE_ENTRY_POINTS] = {
+		.packing = sja1110_schedule_entry_points_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_schedule_entry_points_entry),
+		.packed_entry_size = SJA1110_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY,
+		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_COUNT,
+	},
+	[BLK_IDX_VL_LOOKUP] = {
+		.packing = sja1110_vl_lookup_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_lookup_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_LOOKUP_ENTRY,
+		.max_entry_count = SJA1110_MAX_VL_LOOKUP_COUNT,
+	},
+	[BLK_IDX_VL_POLICING] = {
+		.packing = sja1110_vl_policing_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_policing_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_POLICING_ENTRY,
+		.max_entry_count = SJA1110_MAX_VL_POLICING_COUNT,
+	},
+	[BLK_IDX_VL_FORWARDING] = {
+		.packing = sja1110_vl_forwarding_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_ENTRY,
+		.max_entry_count = SJA1110_MAX_VL_FORWARDING_COUNT,
+	},
+	[BLK_IDX_L2_LOOKUP] = {
+		.packing = sja1110_l2_lookup_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_entry),
+		.packed_entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY,
+		.max_entry_count = SJA1105_MAX_L2_LOOKUP_COUNT,
+	},
+	[BLK_IDX_L2_POLICING] = {
+		.packing = sja1110_l2_policing_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_l2_policing_entry),
+		.packed_entry_size = SJA1105_SIZE_L2_POLICING_ENTRY,
+		.max_entry_count = SJA1110_MAX_L2_POLICING_COUNT,
+	},
+	[BLK_IDX_VLAN_LOOKUP] = {
+		.packing = sja1110_vlan_lookup_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vlan_lookup_entry),
+		.packed_entry_size = SJA1110_SIZE_VLAN_LOOKUP_ENTRY,
+		.max_entry_count = SJA1105_MAX_VLAN_LOOKUP_COUNT,
+	},
+	[BLK_IDX_L2_FORWARDING] = {
+		.packing = sja1110_l2_forwarding_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_l2_forwarding_entry),
+		.packed_entry_size = SJA1105_SIZE_L2_FORWARDING_ENTRY,
+		.max_entry_count = SJA1110_MAX_L2_FORWARDING_COUNT,
+	},
+	[BLK_IDX_MAC_CONFIG] = {
+		.packing = sja1110_mac_config_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_mac_config_entry),
+		.packed_entry_size = SJA1105PQRS_SIZE_MAC_CONFIG_ENTRY,
+		.max_entry_count = SJA1110_MAX_MAC_CONFIG_COUNT,
+	},
+	[BLK_IDX_SCHEDULE_PARAMS] = {
+		.packing = sja1110_schedule_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_schedule_params_entry),
+		.packed_entry_size = SJA1105_SIZE_SCHEDULE_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
+	},
+	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {
+		.packing = sja1105_schedule_entry_points_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_schedule_entry_points_params_entry),
+		.packed_entry_size = SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
+	},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {
+		.packing = sja1110_vl_forwarding_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_params_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_FORWARDING_PARAMS_COUNT,
+	},
+	[BLK_IDX_L2_LOOKUP_PARAMS] = {
+		.packing = sja1110_l2_lookup_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_params_entry),
+		.packed_entry_size = SJA1110_SIZE_L2_LOOKUP_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT,
+	},
+	[BLK_IDX_L2_FORWARDING_PARAMS] = {
+		.packing = sja1110_l2_forwarding_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_l2_forwarding_params_entry),
+		.packed_entry_size = SJA1105_SIZE_L2_FORWARDING_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_L2_FORWARDING_PARAMS_COUNT,
+	},
+	[BLK_IDX_AVB_PARAMS] = {
+		.packing = sja1105pqrs_avb_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_avb_params_entry),
+		.packed_entry_size = SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_AVB_PARAMS_COUNT,
+	},
+	[BLK_IDX_GENERAL_PARAMS] = {
+		.packing = sja1110_general_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_general_params_entry),
+		.packed_entry_size = SJA1110_SIZE_GENERAL_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
+	},
+	[BLK_IDX_RETAGGING] = {
+		.packing = sja1110_retagging_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_retagging_entry),
+		.packed_entry_size = SJA1105_SIZE_RETAGGING_ENTRY,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+	},
+	[BLK_IDX_XMII_PARAMS] = {
+		.packing = sja1110_xmii_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_xmii_params_entry),
+		.packed_entry_size = SJA1110_SIZE_XMII_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_XMII_PARAMS_COUNT,
+	},
+	[BLK_IDX_PCP_REMAPPING] = {
+		.packing = sja1110_pcp_remapping_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1110_pcp_remapping_entry),
+		.packed_entry_size = SJA1110_SIZE_PCP_REMAPPING_ENTRY,
+		.max_entry_count = SJA1110_MAX_PCP_REMAPPING_COUNT,
+	},
+};
+
 int sja1105_static_config_init(struct sja1105_static_config *config,
 			       const struct sja1105_table_ops *static_ops,
 			       u64 device_id)
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 4ddb06bd8e92..d24227f78a72 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -9,21 +9,30 @@
 #include <linux/types.h>
 #include <asm/types.h>
 
+#define SJA1105_NUM_PORTS				5
+#define SJA1110_NUM_PORTS				11
+#define SJA1105_MAX_NUM_PORTS				SJA1110_NUM_PORTS
+#define SJA1105_NUM_TC					8
+
 #define SJA1105_SIZE_SPI_MSG_HEADER			4
 #define SJA1105_SIZE_SPI_MSG_MAXLEN			(64 * 4)
 #define SJA1105_SIZE_DEVICE_ID				4
 #define SJA1105_SIZE_TABLE_HEADER			12
 #define SJA1105_SIZE_SCHEDULE_ENTRY			8
+#define SJA1110_SIZE_SCHEDULE_ENTRY			12
 #define SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY	4
+#define SJA1110_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY	8
 #define SJA1105_SIZE_VL_LOOKUP_ENTRY			12
 #define SJA1105_SIZE_VL_POLICING_ENTRY			8
 #define SJA1105_SIZE_VL_FORWARDING_ENTRY		4
 #define SJA1105_SIZE_L2_POLICING_ENTRY			8
 #define SJA1105_SIZE_VLAN_LOOKUP_ENTRY			8
+#define SJA1110_SIZE_VLAN_LOOKUP_ENTRY			12
 #define SJA1105_SIZE_L2_FORWARDING_ENTRY		8
 #define SJA1105_SIZE_L2_FORWARDING_PARAMS_ENTRY		12
 #define SJA1105_SIZE_RETAGGING_ENTRY			8
 #define SJA1105_SIZE_XMII_PARAMS_ENTRY			4
+#define SJA1110_SIZE_XMII_PARAMS_ENTRY			8
 #define SJA1105_SIZE_SCHEDULE_PARAMS_ENTRY		12
 #define SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_PARAMS_ENTRY	4
 #define SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY         12
@@ -34,11 +43,15 @@
 #define SJA1105ET_SIZE_AVB_PARAMS_ENTRY			12
 #define SJA1105ET_SIZE_CBS_ENTRY			16
 #define SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY		20
+#define SJA1110_SIZE_L2_LOOKUP_ENTRY			24
 #define SJA1105PQRS_SIZE_MAC_CONFIG_ENTRY		32
 #define SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY		16
+#define SJA1110_SIZE_L2_LOOKUP_PARAMS_ENTRY		28
 #define SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY		44
+#define SJA1110_SIZE_GENERAL_PARAMS_ENTRY		56
 #define SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY		16
 #define SJA1105PQRS_SIZE_CBS_ENTRY			20
+#define SJA1110_SIZE_PCP_REMAPPING_ENTRY		4
 
 /* UM10944.pdf Page 11, Table 2. Configuration Blocks */
 enum {
@@ -61,6 +74,7 @@ enum {
 	BLKID_GENERAL_PARAMS				= 0x11,
 	BLKID_RETAGGING					= 0x12,
 	BLKID_CBS					= 0x13,
+	BLKID_PCP_REMAPPING				= 0x1C,
 	BLKID_XMII_PARAMS				= 0x4E,
 };
 
@@ -85,6 +99,7 @@ enum sja1105_blk_idx {
 	BLK_IDX_RETAGGING,
 	BLK_IDX_CBS,
 	BLK_IDX_XMII_PARAMS,
+	BLK_IDX_PCP_REMAPPING,
 	BLK_IDX_MAX,
 	/* Fake block indices that are only valid for dynamic access */
 	BLK_IDX_MGMT_ROUTE,
@@ -93,15 +108,22 @@ enum sja1105_blk_idx {
 };
 
 #define SJA1105_MAX_SCHEDULE_COUNT			1024
+#define SJA1110_MAX_SCHEDULE_COUNT			4096
 #define SJA1105_MAX_SCHEDULE_ENTRY_POINTS_COUNT		2048
 #define SJA1105_MAX_VL_LOOKUP_COUNT			1024
+#define SJA1110_MAX_VL_LOOKUP_COUNT			4096
 #define SJA1105_MAX_VL_POLICING_COUNT			1024
+#define SJA1110_MAX_VL_POLICING_COUNT			4096
 #define SJA1105_MAX_VL_FORWARDING_COUNT			1024
+#define SJA1110_MAX_VL_FORWARDING_COUNT			4096
 #define SJA1105_MAX_L2_LOOKUP_COUNT			1024
 #define SJA1105_MAX_L2_POLICING_COUNT			45
+#define SJA1110_MAX_L2_POLICING_COUNT			110
 #define SJA1105_MAX_VLAN_LOOKUP_COUNT			4096
 #define SJA1105_MAX_L2_FORWARDING_COUNT			13
+#define SJA1110_MAX_L2_FORWARDING_COUNT			19
 #define SJA1105_MAX_MAC_CONFIG_COUNT			5
+#define SJA1110_MAX_MAC_CONFIG_COUNT			11
 #define SJA1105_MAX_SCHEDULE_PARAMS_COUNT		1
 #define SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT	1
 #define SJA1105_MAX_VL_FORWARDING_PARAMS_COUNT		1
@@ -113,8 +135,11 @@ enum sja1105_blk_idx {
 #define SJA1105_MAX_AVB_PARAMS_COUNT			1
 #define SJA1105ET_MAX_CBS_COUNT				10
 #define SJA1105PQRS_MAX_CBS_COUNT			16
+#define SJA1110_MAX_CBS_COUNT				80
+#define SJA1110_MAX_PCP_REMAPPING_COUNT			11
 
 #define SJA1105_MAX_FRAME_MEMORY			929
+#define SJA1110_MAX_FRAME_MEMORY			1820
 #define SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD		19
 #define SJA1105_VL_FRAME_MEMORY				100
 
@@ -122,12 +147,26 @@ enum sja1105_blk_idx {
 #define SJA1105T_DEVICE_ID				0x9E00030Eull
 #define SJA1105PR_DEVICE_ID				0xAF00030Eull
 #define SJA1105QS_DEVICE_ID				0xAE00030Eull
+#define SJA1110_DEVICE_ID				0xB700030Full
 
 #define SJA1105ET_PART_NO				0x9A83
 #define SJA1105P_PART_NO				0x9A84
 #define SJA1105Q_PART_NO				0x9A85
 #define SJA1105R_PART_NO				0x9A86
 #define SJA1105S_PART_NO				0x9A87
+#define SJA1110A_PART_NO				0x1110
+#define SJA1110B_PART_NO				0x1111
+#define SJA1110C_PART_NO				0x1112
+#define SJA1110D_PART_NO				0x1113
+
+#define SJA1110_ACU			0x1c4400
+#define SJA1110_RGU			0x1c6000
+#define SJA1110_CGU			0x1c6400
+
+#define SJA1110_SPI_ADDR(x)		((x) / 4)
+#define SJA1110_ACU_ADDR(x)		(SJA1110_ACU + SJA1110_SPI_ADDR(x))
+#define SJA1110_CGU_ADDR(x)		(SJA1110_CGU + SJA1110_SPI_ADDR(x))
+#define SJA1110_RGU_ADDR(x)		(SJA1110_RGU + SJA1110_SPI_ADDR(x))
 
 #define SJA1105_RSV_ADDR		0xffffffffffffffffull
 
@@ -175,6 +214,8 @@ struct sja1105_general_params_entry {
 	u64 egrmirrpcp;
 	u64 egrmirrdei;
 	u64 replay_port;
+	/* SJA1110 only */
+	u64 tte_en;
 };
 
 struct sja1105_schedule_entry_points_entry {
@@ -195,6 +236,7 @@ struct sja1105_vlan_lookup_entry {
 	u64 vlan_bc;
 	u64 tag_port;
 	u64 vlanid;
+	u64 type_entry; /* SJA1110 only */
 };
 
 struct sja1105_l2_lookup_entry {
@@ -207,11 +249,17 @@ struct sja1105_l2_lookup_entry {
 	u64 mask_iotag;
 	u64 mask_vlanid;
 	u64 mask_macaddr;
+	u64 mask_srcport;
 	u64 iotag;
+	u64 srcport;
 	u64 lockeds;
 	union {
 		/* LOCKEDS=1: Static FDB entries */
 		struct {
+			/* TSREG is deprecated in SJA1110, TRAP is supported only
+			 * in SJA1110.
+			 */
+			u64 trap;
 			u64 tsreg;
 			u64 mirrvlan;
 			u64 takets;
@@ -227,7 +275,7 @@ struct sja1105_l2_lookup_entry {
 };
 
 struct sja1105_l2_lookup_params_entry {
-	u64 maxaddrp[5];     /* P/Q/R/S only */
+	u64 maxaddrp[SJA1105_MAX_NUM_PORTS]; /* P/Q/R/S only */
 	u64 start_dynspc;    /* P/Q/R/S only */
 	u64 drpnolearn;      /* P/Q/R/S only */
 	u64 use_static;      /* P/Q/R/S only */
@@ -245,7 +293,9 @@ struct sja1105_l2_forwarding_entry {
 	u64 bc_domain;
 	u64 reach_port;
 	u64 fl_domain;
-	u64 vlan_pmap[8];
+	/* This is actually max(SJA1105_NUM_TC, SJA1105_MAX_NUM_PORTS) */
+	u64 vlan_pmap[SJA1105_MAX_NUM_PORTS];
+	bool type_egrpcp2outputq;
 };
 
 struct sja1105_l2_forwarding_params_entry {
@@ -300,8 +350,8 @@ struct sja1105_retagging_entry {
 };
 
 struct sja1105_cbs_entry {
-	u64 port;
-	u64 prio;
+	u64 port; /* Not used for SJA1110 */
+	u64 prio; /* Not used for SJA1110 */
 	u64 credit_hi;
 	u64 credit_lo;
 	u64 send_slope;
@@ -309,8 +359,19 @@ struct sja1105_cbs_entry {
 };
 
 struct sja1105_xmii_params_entry {
-	u64 phy_mac[5];
-	u64 xmii_mode[5];
+	u64 phy_mac[SJA1105_MAX_NUM_PORTS];
+	u64 xmii_mode[SJA1105_MAX_NUM_PORTS];
+	/* The SJA1110 insists being a snowflake, and requires SGMII,
+	 * 2500base-x and internal MII ports connected to the 100base-TX PHY to
+	 * set this bit. We set it unconditionally from the high-level logic,
+	 * and only sja1110_xmii_params_entry_packing writes it to the static
+	 * config. I have no better name for it than "special".
+	 */
+	u64 special[SJA1105_MAX_NUM_PORTS];
+};
+
+struct sja1110_pcp_remapping_entry {
+	u64 egrpcp[SJA1105_NUM_TC];
 };
 
 enum {
@@ -391,6 +452,7 @@ extern const struct sja1105_table_ops sja1105p_table_ops[BLK_IDX_MAX];
 extern const struct sja1105_table_ops sja1105q_table_ops[BLK_IDX_MAX];
 extern const struct sja1105_table_ops sja1105r_table_ops[BLK_IDX_MAX];
 extern const struct sja1105_table_ops sja1105s_table_ops[BLK_IDX_MAX];
+extern const struct sja1105_table_ops sja1110_table_ops[BLK_IDX_MAX];
 
 size_t sja1105_table_header_packing(void *buf, void *hdr, enum packing_op op);
 void
@@ -438,23 +500,47 @@ void sja1105_packing(void *buf, u64 *val, int start, int end,
 /* Common implementations for the static and dynamic configs */
 size_t sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
 						enum packing_op op);
+size_t sja1110_general_params_entry_packing(void *buf, void *entry_ptr,
+					    enum packing_op op);
 size_t sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
 						  enum packing_op op);
+size_t sja1110_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
+					      enum packing_op op);
 size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
 					   enum packing_op op);
+size_t sja1110_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
+					   enum packing_op op);
 size_t sja1105pqrs_l2_lookup_entry_packing(void *buf, void *entry_ptr,
 					   enum packing_op op);
 size_t sja1105et_l2_lookup_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op);
+size_t sja1110_l2_lookup_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op);
 size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op);
+size_t sja1110_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op);
 size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr,
 				       enum packing_op op);
+size_t sja1110_retagging_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op);
 size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr,
 					    enum packing_op op);
+size_t sja1110_mac_config_entry_packing(void *buf, void *entry_ptr,
+					enum packing_op op);
 size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr,
 					    enum packing_op op);
 size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr,
 				       enum packing_op op);
+size_t sja1110_vl_lookup_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op);
+size_t sja1110_vl_policing_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op);
+size_t sja1110_xmii_params_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op);
+size_t sja1110_l2_policing_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op);
+size_t sja1110_l2_forwarding_params_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op);
 
 #endif

From ceec8bc0988dca7bdbf7421f01f8d46949bdbdeb Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 12:25:37 +0300
Subject: [PATCH 1205/3292] net: dsa: sja1105: make sure the retagging port is
 enabled for SJA1110

The SJA1110 has an extra configuration in the General Parameters Table
through which the user can select the buffer reservation config.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c        | 56 +++++++++++++++++++
 .../net/dsa/sja1105/sja1105_static_config.c   |  1 +
 .../net/dsa/sja1105/sja1105_static_config.h   |  1 +
 3 files changed, 58 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 2b3b6c402b34..801cf47d9572 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -565,6 +565,60 @@ void sja1105_frame_memory_partitioning(struct sja1105_private *priv)
 	vl_fwd_params->partspc[0] = SJA1105_VL_FRAME_MEMORY;
 }
 
+/* SJA1110 TDMACONFIGIDX values:
+ *
+ *      | 100 Mbps ports |  1Gbps ports  | 2.5Gbps ports | Disabled ports
+ * -----+----------------+---------------+---------------+---------------
+ *   0  |   0, [5:10]    |     [1:2]     |     [3:4]     |     retag
+ *   1  |0, [5:10], retag|     [1:2]     |     [3:4]     |       -
+ *   2  |   0, [5:10]    |  [1:3], retag |       4       |       -
+ *   3  |   0, [5:10]    |[1:2], 4, retag|       3       |       -
+ *   4  |  0, 2, [5:10]  |    1, retag   |     [3:4]     |       -
+ *   5  |  0, 1, [5:10]  |    2, retag   |     [3:4]     |       -
+ *  14  |   0, [5:10]    | [1:4], retag  |       -       |       -
+ *  15  |     [5:10]     | [0:4], retag  |       -       |       -
+ */
+static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv)
+{
+	struct sja1105_general_params_entry *general_params;
+	struct sja1105_table *table;
+	bool port_1_is_base_tx;
+	bool port_3_is_2500;
+	bool port_4_is_2500;
+	u64 tdmaconfigidx;
+
+	if (priv->info->device_id != SJA1110_DEVICE_ID)
+		return;
+
+	table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
+	general_params = table->entries;
+
+	/* All the settings below are "as opposed to SGMII", which is the
+	 * other pinmuxing option.
+	 */
+	port_1_is_base_tx = priv->phy_mode[1] == PHY_INTERFACE_MODE_INTERNAL;
+	port_3_is_2500 = priv->phy_mode[3] == PHY_INTERFACE_MODE_2500BASEX;
+	port_4_is_2500 = priv->phy_mode[4] == PHY_INTERFACE_MODE_2500BASEX;
+
+	if (port_1_is_base_tx)
+		/* Retagging port will operate at 1 Gbps */
+		tdmaconfigidx = 5;
+	else if (port_3_is_2500 && port_4_is_2500)
+		/* Retagging port will operate at 100 Mbps */
+		tdmaconfigidx = 1;
+	else if (port_3_is_2500)
+		/* Retagging port will operate at 1 Gbps */
+		tdmaconfigidx = 3;
+	else if (port_4_is_2500)
+		/* Retagging port will operate at 1 Gbps */
+		tdmaconfigidx = 2;
+	else
+		/* Retagging port will operate at 1 Gbps */
+		tdmaconfigidx = 14;
+
+	general_params->tdmaconfigidx = tdmaconfigidx;
+}
+
 static int sja1105_init_general_params(struct sja1105_private *priv)
 {
 	struct sja1105_general_params_entry default_general_params = {
@@ -640,6 +694,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 	((struct sja1105_general_params_entry *)table->entries)[0] =
 				default_general_params;
 
+	sja1110_select_tdmaconfigidx(priv);
+
 	return 0;
 }
 
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 4eba79bdedbf..eda571819d45 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -211,6 +211,7 @@ size_t sja1110_general_params_entry_packing(void *buf, void *entry_ptr,
 	sja1105_packing(buf, &entry->egrmirrpcp,   113, 111, size, op);
 	sja1105_packing(buf, &entry->egrmirrdei,   110, 110, size, op);
 	sja1105_packing(buf, &entry->replay_port,  109, 106, size, op);
+	sja1105_packing(buf, &entry->tdmaconfigidx, 70,  67, size, op);
 	sja1105_packing(buf, &entry->tte_en,        16,  16, size, op);
 	return size;
 }
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index d24227f78a72..9bef51791bff 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -216,6 +216,7 @@ struct sja1105_general_params_entry {
 	u64 replay_port;
 	/* SJA1110 only */
 	u64 tte_en;
+	u64 tdmaconfigidx;
 };
 
 struct sja1105_schedule_entry_points_entry {

From 5a8f09748ee79f2ef28e560bd095587a0e204b3d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 12:25:38 +0300
Subject: [PATCH 1206/3292] net: dsa: sja1105: register the MDIO buses for
 100base-T1 and 100base-TX

The SJA1110 contains two types of integrated PHYs: one 100base-TX PHY
and multiple 100base-T1 PHYs.

The access procedure for the 100base-T1 PHYs is also different than it
is for the 100base-TX one. So we register 2 MDIO buses, one for the
base-TX and the other for the base-T1. Each bus has an OF node which is
a child of the "mdio" subnode of the switch, and they are recognized by
compatible string.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: devicetree@vger.kernel.org
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/Makefile       |   1 +
 drivers/net/dsa/sja1105/sja1105.h      |  19 ++
 drivers/net/dsa/sja1105/sja1105_main.c |  21 +-
 drivers/net/dsa/sja1105/sja1105_mdio.c | 288 +++++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_spi.c  |  30 +++
 5 files changed, 358 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/dsa/sja1105/sja1105_mdio.c

diff --git a/drivers/net/dsa/sja1105/Makefile b/drivers/net/dsa/sja1105/Makefile
index a860e3a910be..40d69e6c0bae 100644
--- a/drivers/net/dsa/sja1105/Makefile
+++ b/drivers/net/dsa/sja1105/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_NET_DSA_SJA1105) += sja1105.o
 sja1105-objs := \
     sja1105_spi.o \
     sja1105_main.o \
+    sja1105_mdio.o \
     sja1105_flower.o \
     sja1105_ethtool.o \
     sja1105_devlink.o \
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index ef6e22c78373..f762f5488a76 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -67,6 +67,12 @@ struct sja1105_regs {
 	u64 rmii_ref_clk[SJA1105_MAX_NUM_PORTS];
 	u64 rmii_ext_tx_clk[SJA1105_MAX_NUM_PORTS];
 	u64 stats[__MAX_SJA1105_STATS_AREA][SJA1105_MAX_NUM_PORTS];
+	u64 mdio_100base_tx;
+	u64 mdio_100base_t1;
+};
+
+struct sja1105_mdio_private {
+	struct sja1105_private *priv;
 };
 
 enum {
@@ -78,6 +84,12 @@ enum {
 	SJA1105_SPEED_MAX,
 };
 
+enum sja1105_internal_phy_t {
+	SJA1105_NO_PHY		= 0,
+	SJA1105_PHY_BASE_TX,
+	SJA1105_PHY_BASE_T1,
+};
+
 struct sja1105_info {
 	u64 device_id;
 	/* Needed for distinction between P and R, and between Q and S
@@ -123,6 +135,7 @@ struct sja1105_info {
 	bool supports_rgmii[SJA1105_MAX_NUM_PORTS];
 	bool supports_sgmii[SJA1105_MAX_NUM_PORTS];
 	bool supports_2500basex[SJA1105_MAX_NUM_PORTS];
+	enum sja1105_internal_phy_t internal_phy[SJA1105_MAX_NUM_PORTS];
 	const u64 port_speed[SJA1105_SPEED_MAX];
 };
 
@@ -246,6 +259,8 @@ struct sja1105_private {
 	enum sja1105_vlan_state vlan_state;
 	struct devlink_region **regions;
 	struct sja1105_cbs_entry *cbs;
+	struct mii_bus *mdio_base_t1;
+	struct mii_bus *mdio_base_tx;
 	struct sja1105_tagger_data tagger_data;
 	struct sja1105_ptp_data ptp_data;
 	struct sja1105_tas_data tas_data;
@@ -275,6 +290,10 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
 			   struct netlink_ext_ack *extack);
 void sja1105_frame_memory_partitioning(struct sja1105_private *priv);
 
+/* From sja1105_mdio.c */
+int sja1105_mdiobus_register(struct dsa_switch *ds);
+void sja1105_mdiobus_unregister(struct dsa_switch *ds);
+
 /* From sja1105_devlink.c */
 int sja1105_devlink_setup(struct dsa_switch *ds);
 void sja1105_devlink_teardown(struct dsa_switch *ds);
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 801cf47d9572..3b031864ad74 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -168,6 +168,15 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv)
 			continue;
 
 		switch (priv->phy_mode[i]) {
+		case PHY_INTERFACE_MODE_INTERNAL:
+			if (priv->info->internal_phy[i] == SJA1105_NO_PHY)
+				goto unsupported;
+
+			mii->xmii_mode[i] = XMII_MODE_MII;
+			if (priv->info->internal_phy[i] == SJA1105_PHY_BASE_TX)
+				mii->special[i] = true;
+
+			break;
 		case PHY_INTERFACE_MODE_REVMII:
 			role = XMII_PHY;
 			fallthrough;
@@ -3109,11 +3118,19 @@ static int sja1105_setup(struct dsa_switch *ds)
 		dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc);
 		return rc;
 	}
+
+	rc = sja1105_mdiobus_register(ds);
+	if (rc < 0) {
+		dev_err(ds->dev, "Failed to register MDIO bus: %pe\n",
+			ERR_PTR(rc));
+		goto out_ptp_clock_unregister;
+	}
+
 	/* Create and send configuration down to device */
 	rc = sja1105_static_config_load(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to load static config: %d\n", rc);
-		goto out_ptp_clock_unregister;
+		goto out_mdiobus_unregister;
 	}
 	/* Configure the CGU (PHY link modes and speeds) */
 	rc = priv->info->clocking_setup(priv);
@@ -3156,6 +3173,8 @@ static int sja1105_setup(struct dsa_switch *ds)
 
 out_devlink_teardown:
 	sja1105_devlink_teardown(ds);
+out_mdiobus_unregister:
+	sja1105_mdiobus_unregister(ds);
 out_ptp_clock_unregister:
 	sja1105_ptp_clock_unregister(ds);
 out_static_config_free:
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
new file mode 100644
index 000000000000..8dfd06318b23
--- /dev/null
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2021, NXP Semiconductors
+ */
+#include <linux/of_mdio.h>
+#include "sja1105.h"
+
+enum sja1105_mdio_opcode {
+	SJA1105_C45_ADDR = 0,
+	SJA1105_C22 = 1,
+	SJA1105_C45_DATA = 2,
+	SJA1105_C45_DATA_AUTOINC = 3,
+};
+
+static u64 sja1105_base_t1_encode_addr(struct sja1105_private *priv,
+				       int phy, enum sja1105_mdio_opcode op,
+				       int xad)
+{
+	const struct sja1105_regs *regs = priv->info->regs;
+
+	return regs->mdio_100base_t1 | (phy << 7) | (op << 5) | (xad << 0);
+}
+
+static int sja1105_base_t1_mdio_read(struct mii_bus *bus, int phy, int reg)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	u64 addr;
+	u32 tmp;
+	int rc;
+
+	if (reg & MII_ADDR_C45) {
+		u16 mmd = (reg >> MII_DEVADDR_C45_SHIFT) & 0x1f;
+
+		addr = sja1105_base_t1_encode_addr(priv, phy, SJA1105_C45_ADDR,
+						   mmd);
+
+		tmp = reg & MII_REGADDR_C45_MASK;
+
+		rc = sja1105_xfer_u32(priv, SPI_WRITE, addr, &tmp, NULL);
+		if (rc < 0)
+			return rc;
+
+		addr = sja1105_base_t1_encode_addr(priv, phy, SJA1105_C45_DATA,
+						   mmd);
+
+		rc = sja1105_xfer_u32(priv, SPI_READ, addr, &tmp, NULL);
+		if (rc < 0)
+			return rc;
+
+		return tmp & 0xffff;
+	}
+
+	/* Clause 22 read */
+	addr = sja1105_base_t1_encode_addr(priv, phy, SJA1105_C22, reg & 0x1f);
+
+	rc = sja1105_xfer_u32(priv, SPI_READ, addr, &tmp, NULL);
+	if (rc < 0)
+		return rc;
+
+	return tmp & 0xffff;
+}
+
+static int sja1105_base_t1_mdio_write(struct mii_bus *bus, int phy, int reg,
+				      u16 val)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	u64 addr;
+	u32 tmp;
+	int rc;
+
+	if (reg & MII_ADDR_C45) {
+		u16 mmd = (reg >> MII_DEVADDR_C45_SHIFT) & 0x1f;
+
+		addr = sja1105_base_t1_encode_addr(priv, phy, SJA1105_C45_ADDR,
+						   mmd);
+
+		tmp = reg & MII_REGADDR_C45_MASK;
+
+		rc = sja1105_xfer_u32(priv, SPI_WRITE, addr, &tmp, NULL);
+		if (rc < 0)
+			return rc;
+
+		addr = sja1105_base_t1_encode_addr(priv, phy, SJA1105_C45_DATA,
+						   mmd);
+
+		tmp = val & 0xffff;
+
+		rc = sja1105_xfer_u32(priv, SPI_WRITE, addr, &tmp, NULL);
+		if (rc < 0)
+			return rc;
+
+		return 0;
+	}
+
+	/* Clause 22 write */
+	addr = sja1105_base_t1_encode_addr(priv, phy, SJA1105_C22, reg & 0x1f);
+
+	tmp = val & 0xffff;
+
+	return sja1105_xfer_u32(priv, SPI_WRITE, addr, &tmp, NULL);
+}
+
+static int sja1105_base_tx_mdio_read(struct mii_bus *bus, int phy, int reg)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	const struct sja1105_regs *regs = priv->info->regs;
+	u32 tmp;
+	int rc;
+
+	rc = sja1105_xfer_u32(priv, SPI_READ, regs->mdio_100base_tx + reg,
+			      &tmp, NULL);
+	if (rc < 0)
+		return rc;
+
+	return tmp & 0xffff;
+}
+
+static int sja1105_base_tx_mdio_write(struct mii_bus *bus, int phy, int reg,
+				      u16 val)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	const struct sja1105_regs *regs = priv->info->regs;
+	u32 tmp = val;
+
+	return sja1105_xfer_u32(priv, SPI_WRITE, regs->mdio_100base_tx + reg,
+				&tmp, NULL);
+}
+
+static int sja1105_mdiobus_base_tx_register(struct sja1105_private *priv,
+					    struct device_node *mdio_node)
+{
+	struct sja1105_mdio_private *mdio_priv;
+	struct device_node *np;
+	struct mii_bus *bus;
+	int rc = 0;
+
+	np = of_find_compatible_node(mdio_node, NULL,
+				     "nxp,sja1110-base-tx-mdio");
+	if (!np)
+		return 0;
+
+	if (!of_device_is_available(np))
+		goto out_put_np;
+
+	bus = mdiobus_alloc_size(sizeof(*mdio_priv));
+	if (!bus) {
+		rc = -ENOMEM;
+		goto out_put_np;
+	}
+
+	bus->name = "SJA1110 100base-TX MDIO bus";
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-base-tx",
+		 dev_name(priv->ds->dev));
+	bus->read = sja1105_base_tx_mdio_read;
+	bus->write = sja1105_base_tx_mdio_write;
+	bus->parent = priv->ds->dev;
+	mdio_priv = bus->priv;
+	mdio_priv->priv = priv;
+
+	rc = of_mdiobus_register(bus, np);
+	if (rc) {
+		mdiobus_free(bus);
+		goto out_put_np;
+	}
+
+	priv->mdio_base_tx = bus;
+
+out_put_np:
+	of_node_put(np);
+
+	return 0;
+}
+
+static void sja1105_mdiobus_base_tx_unregister(struct sja1105_private *priv)
+{
+	if (!priv->mdio_base_tx)
+		return;
+
+	mdiobus_unregister(priv->mdio_base_tx);
+	mdiobus_free(priv->mdio_base_tx);
+	priv->mdio_base_tx = NULL;
+}
+
+static int sja1105_mdiobus_base_t1_register(struct sja1105_private *priv,
+					    struct device_node *mdio_node)
+{
+	struct sja1105_mdio_private *mdio_priv;
+	struct device_node *np;
+	struct mii_bus *bus;
+	int rc = 0;
+
+	np = of_find_compatible_node(mdio_node, NULL,
+				     "nxp,sja1110-base-t1-mdio");
+	if (!np)
+		return 0;
+
+	if (!of_device_is_available(np))
+		goto out_put_np;
+
+	bus = mdiobus_alloc_size(sizeof(*mdio_priv));
+	if (!bus) {
+		rc = -ENOMEM;
+		goto out_put_np;
+	}
+
+	bus->name = "SJA1110 100base-T1 MDIO bus";
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-base-t1",
+		 dev_name(priv->ds->dev));
+	bus->read = sja1105_base_t1_mdio_read;
+	bus->write = sja1105_base_t1_mdio_write;
+	bus->parent = priv->ds->dev;
+	mdio_priv = bus->priv;
+	mdio_priv->priv = priv;
+
+	rc = of_mdiobus_register(bus, np);
+	if (rc) {
+		mdiobus_free(bus);
+		goto out_put_np;
+	}
+
+	priv->mdio_base_t1 = bus;
+
+out_put_np:
+	of_node_put(np);
+
+	return rc;
+}
+
+static void sja1105_mdiobus_base_t1_unregister(struct sja1105_private *priv)
+{
+	if (!priv->mdio_base_t1)
+		return;
+
+	mdiobus_unregister(priv->mdio_base_t1);
+	mdiobus_free(priv->mdio_base_t1);
+	priv->mdio_base_t1 = NULL;
+}
+
+int sja1105_mdiobus_register(struct dsa_switch *ds)
+{
+	struct sja1105_private *priv = ds->priv;
+	const struct sja1105_regs *regs = priv->info->regs;
+	struct device_node *switch_node = ds->dev->of_node;
+	struct device_node *mdio_node;
+	int rc;
+
+	mdio_node = of_get_child_by_name(switch_node, "mdios");
+	if (!mdio_node)
+		return 0;
+
+	if (!of_device_is_available(mdio_node))
+		goto out_put_mdio_node;
+
+	if (regs->mdio_100base_tx != SJA1105_RSV_ADDR) {
+		rc = sja1105_mdiobus_base_tx_register(priv, mdio_node);
+		if (rc)
+			goto err_put_mdio_node;
+	}
+
+	if (regs->mdio_100base_t1 != SJA1105_RSV_ADDR) {
+		rc = sja1105_mdiobus_base_t1_register(priv, mdio_node);
+		if (rc)
+			goto err_free_base_tx_mdiobus;
+	}
+
+out_put_mdio_node:
+	of_node_put(mdio_node);
+
+	return 0;
+
+err_free_base_tx_mdiobus:
+	sja1105_mdiobus_base_tx_unregister(priv);
+err_put_mdio_node:
+	of_node_put(mdio_node);
+
+	return rc;
+}
+
+void sja1105_mdiobus_unregister(struct dsa_switch *ds)
+{
+	struct sja1105_private *priv = ds->priv;
+
+	sja1105_mdiobus_base_t1_unregister(priv);
+	sja1105_mdiobus_base_tx_unregister(priv);
+}
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 187c9fbbd397..54ecb5565761 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -436,6 +436,8 @@ static struct sja1105_regs sja1105et_regs = {
 	.ptpclkval = 0x18, /* Spans 0x18 to 0x19 */
 	.ptpclkrate = 0x1A,
 	.ptpclkcorp = 0x1D,
+	.mdio_100base_tx = SJA1105_RSV_ADDR,
+	.mdio_100base_t1 = SJA1105_RSV_ADDR,
 };
 
 static struct sja1105_regs sja1105pqrs_regs = {
@@ -473,6 +475,8 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.ptpclkrate = 0x1B,
 	.ptpclkcorp = 0x1E,
 	.ptpsyncts = 0x1F,
+	.mdio_100base_tx = SJA1105_RSV_ADDR,
+	.mdio_100base_t1 = SJA1105_RSV_ADDR,
 };
 
 static struct sja1105_regs sja1110_regs = {
@@ -555,6 +559,8 @@ static struct sja1105_regs sja1110_regs = {
 	.ptpclkrate = SJA1110_SPI_ADDR(0x74),
 	.ptpclkcorp = SJA1110_SPI_ADDR(0x80),
 	.ptpsyncts = SJA1110_SPI_ADDR(0x84),
+	.mdio_100base_tx = 0x1c2400,
+	.mdio_100base_t1 = 0x1c1000,
 };
 
 const struct sja1105_info sja1105e_info = {
@@ -785,6 +791,12 @@ const struct sja1105_info sja1110a_info = {
 				   false, false, false, false, false, false},
 	.supports_2500basex	= {false, false, false, true, true,
 				   false, false, false, false, false, false},
+	.internal_phy		= {SJA1105_NO_PHY, SJA1105_PHY_BASE_TX,
+				   SJA1105_NO_PHY, SJA1105_NO_PHY,
+				   SJA1105_NO_PHY, SJA1105_PHY_BASE_T1,
+				   SJA1105_PHY_BASE_T1, SJA1105_PHY_BASE_T1,
+				   SJA1105_PHY_BASE_T1, SJA1105_PHY_BASE_T1,
+				   SJA1105_PHY_BASE_T1},
 	.name			= "SJA1110A",
 };
 
@@ -824,6 +836,12 @@ const struct sja1105_info sja1110b_info = {
 				   false, false, false, false, false, false},
 	.supports_2500basex	= {false, false, false, true, true,
 				   false, false, false, false, false, false},
+	.internal_phy		= {SJA1105_NO_PHY, SJA1105_PHY_BASE_TX,
+				   SJA1105_NO_PHY, SJA1105_NO_PHY,
+				   SJA1105_NO_PHY, SJA1105_PHY_BASE_T1,
+				   SJA1105_PHY_BASE_T1, SJA1105_PHY_BASE_T1,
+				   SJA1105_PHY_BASE_T1, SJA1105_PHY_BASE_T1,
+				   SJA1105_NO_PHY},
 	.name			= "SJA1110B",
 };
 
@@ -863,6 +881,12 @@ const struct sja1105_info sja1110c_info = {
 				   false, false, false, false, false, false},
 	.supports_2500basex	= {false, false, false, false, true,
 				   false, false, false, false, false, false},
+	.internal_phy		= {SJA1105_NO_PHY, SJA1105_PHY_BASE_TX,
+				   SJA1105_NO_PHY, SJA1105_NO_PHY,
+				   SJA1105_NO_PHY, SJA1105_PHY_BASE_T1,
+				   SJA1105_PHY_BASE_T1, SJA1105_PHY_BASE_T1,
+				   SJA1105_NO_PHY, SJA1105_NO_PHY,
+				   SJA1105_NO_PHY},
 	.name			= "SJA1110C",
 };
 
@@ -900,5 +924,11 @@ const struct sja1105_info sja1110d_info = {
 				   false, false, false, false, false, false},
 	.supports_sgmii		= {false, true, true, true, true,
 				   false, false, false, false, false, false},
+	.internal_phy		= {SJA1105_NO_PHY, SJA1105_NO_PHY,
+				   SJA1105_NO_PHY, SJA1105_NO_PHY,
+				   SJA1105_NO_PHY, SJA1105_PHY_BASE_T1,
+				   SJA1105_PHY_BASE_T1, SJA1105_PHY_BASE_T1,
+				   SJA1105_NO_PHY, SJA1105_NO_PHY,
+				   SJA1105_NO_PHY},
 	.name			= "SJA1110D",
 };

From a08a61934cfad0506f8ed39d605ee7cd77c2381f Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:07 +0300
Subject: [PATCH 1207/3292] mlxsw: spectrum_router: Remove abort mechanism

The abort mechanism was introduced in commit 8e05fd7166c6 ("fib: hook
IPv4 fib for hardware offload") with the purpose of falling back to
software-based routing in case of a route programming error in hardware.
The process is irreversible and requires users to reload the offloading
driver or reboot the machine.

While this approach might make sense in theory, it makes very little
sense in practice. In the case of high speed ASICs such as the Spectrum
ASIC, the abort mechanism effectively kills the machine upon a non-fatal
error such as a route programming error.

Such an extreme policy does not belong in the kernel, especially when
user space can simply try to reprogram the route following the
RTM_NEWROUTE failure notification.

Therefore, remove the abort mechanism.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 129 +-----------------
 .../ethernet/mellanox/mlxsw/spectrum_router.h |   1 -
 2 files changed, 5 insertions(+), 125 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 6decc5a43f98..bc47ed766878 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4312,9 +4312,6 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_nexthop_key key;
 	struct mlxsw_sp_nexthop *nh;
 
-	if (mlxsw_sp->router->aborted)
-		return;
-
 	key.fib_nh = fib_nh;
 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
 	if (!nh)
@@ -6422,9 +6419,6 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_fib_node *fib_node;
 	int err;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	if (fen_info->fi->nh &&
 	    !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
 		return 0;
@@ -6485,9 +6479,6 @@ static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_fib_node *fib_node;
 	int err;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
 	if (!fib4_entry)
 		return 0;
@@ -7070,9 +7061,6 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
 	struct fib6_info *rt = rt_arr[0];
 	int err;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	if (rt->fib6_src.plen)
 		return -EINVAL;
 
@@ -7136,9 +7124,6 @@ static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
 	struct fib6_info *rt = rt_arr[0];
 	int err;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	if (rt->fib6_src.plen)
 		return -EINVAL;
 
@@ -7180,9 +7165,6 @@ static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
 	struct fib6_info *rt = rt_arr[0];
 	int err;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
 		return 0;
 
@@ -7211,55 +7193,6 @@ static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
 	return err;
 }
 
-static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
-					    enum mlxsw_sp_l3proto proto,
-					    u8 tree_id)
-{
-	const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto];
-	enum mlxsw_reg_ralxx_protocol ralxx_proto =
-				(enum mlxsw_reg_ralxx_protocol) proto;
-	struct mlxsw_sp_fib_entry_priv *priv;
-	char xralta_pl[MLXSW_REG_XRALTA_LEN];
-	char xralst_pl[MLXSW_REG_XRALST_LEN];
-	int i, err;
-
-	mlxsw_reg_xralta_pack(xralta_pl, true, ralxx_proto, tree_id);
-	err = ll_ops->ralta_write(mlxsw_sp, xralta_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_xralst_pack(xralst_pl, 0xff, tree_id);
-	err = ll_ops->ralst_write(mlxsw_sp, xralst_pl);
-	if (err)
-		return err;
-
-	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
-		struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx;
-		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
-		char xraltb_pl[MLXSW_REG_XRALTB_LEN];
-
-		mlxsw_sp_fib_entry_op_ctx_clear(op_ctx);
-		mlxsw_reg_xraltb_pack(xraltb_pl, vr->id, ralxx_proto, tree_id);
-		err = ll_ops->raltb_write(mlxsw_sp, xraltb_pl);
-		if (err)
-			return err;
-
-		priv = mlxsw_sp_fib_entry_priv_create(ll_ops);
-		if (IS_ERR(priv))
-			return PTR_ERR(priv);
-
-		ll_ops->fib_entry_pack(op_ctx, proto, MLXSW_SP_FIB_ENTRY_OP_WRITE,
-				       vr->id, 0, NULL, priv);
-		ll_ops->fib_entry_act_ip2me_pack(op_ctx);
-		err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, NULL);
-		mlxsw_sp_fib_entry_priv_put(priv);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 static struct mlxsw_sp_mr_table *
 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
 {
@@ -7276,9 +7209,6 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
@@ -7293,9 +7223,6 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
-	if (mlxsw_sp->router->aborted)
-		return;
-
 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
 	if (WARN_ON(!vr))
 		return;
@@ -7313,9 +7240,6 @@ mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_rif *rif;
 	struct mlxsw_sp_vr *vr;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
@@ -7334,9 +7258,6 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
-	if (mlxsw_sp->router->aborted)
-		return;
-
 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
 	if (WARN_ON(!vr))
 		return;
@@ -7346,25 +7267,6 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 
-static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
-{
-	enum mlxsw_sp_l3proto proto = MLXSW_SP_L3_PROTO_IPV4;
-	int err;
-
-	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
-					       MLXSW_SP_LPM_TREE_MIN);
-	if (err)
-		return err;
-
-	/* The multicast router code does not need an abort trap as by default,
-	 * packets that don't match any routes are trapped to the CPU.
-	 */
-
-	proto = MLXSW_SP_L3_PROTO_IPV6;
-	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
-						MLXSW_SP_LPM_TREE_MIN + 1);
-}
-
 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_fib_node *fib_node)
 {
@@ -7451,20 +7353,6 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp->router->adj_discard_index_valid = false;
 }
 
-static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
-{
-	int err;
-
-	if (mlxsw_sp->router->aborted)
-		return;
-	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
-	mlxsw_sp_router_fib_flush(mlxsw_sp);
-	mlxsw_sp->router->aborted = true;
-	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
-	if (err)
-		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
-}
-
 struct mlxsw_sp_fib6_event {
 	struct fib6_info **rt_arr;
 	unsigned int nrt6;
@@ -7546,7 +7434,7 @@ static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp,
 		err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info);
 		if (err) {
 			mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
-			mlxsw_sp_router_fib_abort(mlxsw_sp);
+			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
 			mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
 							      &fib_event->fen_info);
 		}
@@ -7581,7 +7469,7 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
 						   fib_event->fib6_event.nrt6);
 		if (err) {
 			mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
-			mlxsw_sp_router_fib_abort(mlxsw_sp);
+			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
 							      fib6_event->rt_arr,
 							      fib6_event->nrt6);
@@ -7593,7 +7481,7 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp,
 						  fib_event->fib6_event.nrt6);
 		if (err) {
 			mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx);
-			mlxsw_sp_router_fib_abort(mlxsw_sp);
+			dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
 							      fib6_event->rt_arr,
 							      fib6_event->nrt6);
@@ -7625,7 +7513,7 @@ static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp,
 
 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace);
 		if (err)
-			mlxsw_sp_router_fib_abort(mlxsw_sp);
+			dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
 		mr_cache_put(fib_event->men_info.mfc);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
@@ -7636,7 +7524,7 @@ static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp,
 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
 						    &fib_event->ven_info);
 		if (err)
-			mlxsw_sp_router_fib_abort(mlxsw_sp);
+			dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
 		dev_put(fib_event->ven_info.dev);
 		break;
 	case FIB_EVENT_VIF_DEL:
@@ -7800,9 +7688,6 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
 	if (event == FIB_EVENT_RULE_DEL)
 		return 0;
 
-	if (mlxsw_sp->router->aborted)
-		return 0;
-
 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
 	rule = fr_info->rule;
 
@@ -7860,10 +7745,6 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	case FIB_EVENT_ENTRY_ADD:
 	case FIB_EVENT_ENTRY_REPLACE:
 	case FIB_EVENT_ENTRY_APPEND:
-		if (router->aborted) {
-			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
-			return notifier_from_errno(-EINVAL);
-		}
 		if (info->family == AF_INET) {
 			struct fib_entry_notifier_info *fen_info = ptr;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index be7708a375e1..c5d7007f9173 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -58,7 +58,6 @@ struct mlxsw_sp_router {
 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
 	struct list_head nexthop_neighs_list;
 	struct list_head ipip_list;
-	bool aborted;
 	struct notifier_block nexthop_nb;
 	struct notifier_block fib_nb;
 	struct notifier_block netevent_nb;

From 00190c2b19eb33969befb68bbbc6d00edc11bda5 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:08 +0300
Subject: [PATCH 1208/3292] selftests: router_scale: Do not count failed routes

To check how many routes are installed in hardware, the test runs "ip
route" and greps for "offload", which includes routes with state
"offload_failed".

Till now, this wrong check was not found because after one failure in
route insertion, the driver moved to "abort" mode, which means that user
cannot try to add more routes.

The previous patch removed the abort mechanism and now failed routes are
counted as offloaded.

Fix this by not considering routes with "offload_failed" flag as
offloaded.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/router_scale.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index e93878d42596..683759d29199 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -68,7 +68,7 @@ wait_for_routes()
 	local t0=$1; shift
 	local route_count=$1; shift
 
-	local t1=$(ip route | grep -o 'offload' | wc -l)
+	local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l)
 	local delta=$((t1 - t0))
 	echo $delta
 	[[ $delta -ge $route_count ]]

From e67dfb8d15deb33c425d0b0ee22f2e5eef54c162 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:09 +0300
Subject: [PATCH 1209/3292] selftests: Clean forgotten resources as part of
 cleanup()

Several tests do not set some ports down as part of their cleanup(),
resulting in IPv6 link-local addresses and associated routes not being
deleted.

These leaks were found using a BPF tool that monitors ASIC resources.

Solve this by setting the ports down at the end of the tests.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh       | 3 +++
 .../selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh  | 3 +++
 tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh   | 2 ++
 tools/testing/selftests/net/forwarding/pedit_dsfield.sh        | 2 ++
 tools/testing/selftests/net/forwarding/pedit_l4port.sh         | 2 ++
 tools/testing/selftests/net/forwarding/skbedit_priority.sh     | 2 ++
 6 files changed, 14 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 4029833f7e27..160891dcb4bc 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -109,6 +109,9 @@ router_destroy()
 	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
 
 	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
 }
 
 setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 42d44e27802c..190c1b6b5365 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -111,6 +111,9 @@ router_destroy()
 	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
 
 	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
 }
 
 setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 5cbff8038f84..28a570006d4d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -93,7 +93,9 @@ switch_destroy()
 	lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
 	lldpad_app_wait_del
 
+	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
 	ip link del dev br1
 }
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 55eeacf59241..64fbd211d907 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -75,7 +75,9 @@ switch_destroy()
 	tc qdisc del dev $swp2 clsact
 	tc qdisc del dev $swp1 clsact
 
+	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
 	ip link del dev br1
 }
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
index 5f20d289ee43..10e594c55117 100755
--- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -71,7 +71,9 @@ switch_destroy()
 	tc qdisc del dev $swp2 clsact
 	tc qdisc del dev $swp1 clsact
 
+	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
 	ip link del dev br1
 }
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
index e3bd8a6bb8b4..bde11dc27873 100755
--- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -72,7 +72,9 @@ switch_destroy()
 	tc qdisc del dev $swp2 clsact
 	tc qdisc del dev $swp1 clsact
 
+	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
 	ip link del dev br1
 }

From 0521a262f043ea521790ed2976141086c75d2f74 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:10 +0300
Subject: [PATCH 1210/3292] selftests: devlink_lib: Fix bouncing of netdevsim
 DEVLINK_DEV

In the commit referenced below, a check was added to devlink_lib that
asserts the existence of a devlink device referenced by $DEVLINK_DEV.
Unfortunately, several netdevsim tests point DEVLINK_DEV at a device that
does not exist at the time that devlink_lib is sourced. Thus these tests
spuriously fail.

Fix this by introducing an override. By setting DEVLINK_DEV to an empty
string, the user declares their intention to handle DEVLINK_DEV management
on their own.

In all netdevsim tests that use devlink_lib and set DEVLINK_DEV, set
instead an empty DEVLINK_DEV just before sourcing devlink_lib, and set it
to the correct value right afterwards.

Fixes: 557c4d2f780c ("selftests: devlink_lib: add check for devlink device existence")
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/drivers/net/netdevsim/devlink_trap.sh | 4 +++-
 tools/testing/selftests/drivers/net/netdevsim/fib.sh        | 6 ++++--
 tools/testing/selftests/drivers/net/netdevsim/nexthop.sh    | 4 +++-
 tools/testing/selftests/drivers/net/netdevsim/psample.sh    | 4 +++-
 tools/testing/selftests/net/forwarding/devlink_lib.sh       | 2 +-
 5 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index da49ad2761b5..6165901a1cf3 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -24,13 +24,15 @@ ALL_TESTS="
 NETDEVSIM_PATH=/sys/bus/netdevsim/
 DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
 DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
 SLEEP_TIME=1
 NETDEV=""
 NUM_NETIFS=0
 source $lib_dir/lib.sh
+
+DEVLINK_DEV=
 source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
 
 require_command udevadm
 
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index 251f228ce63e..fc794cd30389 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -33,13 +33,15 @@ ALL_TESTS="
 NETDEVSIM_PATH=/sys/bus/netdevsim/
 DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
 SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
 NUM_NETIFS=0
 source $lib_dir/lib.sh
-source $lib_dir/devlink_lib.sh
 source $lib_dir/fib_offload_lib.sh
 
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
 ipv4_identical_routes()
 {
 	fib_ipv4_identical_routes_test "testns1"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index ba75c81cda91..e8e0dc088d6a 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -44,12 +44,14 @@ ALL_TESTS="
 NETDEVSIM_PATH=/sys/bus/netdevsim/
 DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
 SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
 DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
 NUM_NETIFS=0
 source $lib_dir/lib.sh
+
+DEVLINK_DEV=
 source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
 
 nexthop_check()
 {
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
index ee10b1a8933c..e689ff7a0b12 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/psample.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -14,13 +14,15 @@ ALL_TESTS="
 NETDEVSIM_PATH=/sys/bus/netdevsim/
 DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
 SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
 PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
 CAPTURE_FILE=$(mktemp)
 NUM_NETIFS=0
 source $lib_dir/lib.sh
+
+DEVLINK_DEV=
 source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
 
 # Available at https://github.com/Mellanox/libpsample
 require_command psample
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index c19e001f138b..39fb9b8e7b58 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -18,7 +18,7 @@ if [[ ! -v DEVLINK_DEV ]]; then
 
 	DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
 			 -n | cut -d" " -f3)
-else
+elif [[ ! -z "$DEVLINK_DEV" ]]; then
 	devlink dev show $DEVLINK_DEV &> /dev/null
 	if [ $? -ne 0 ]; then
 		echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"

From 314dbb19f95b67456cb042e4a7a36b777a029bea Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:11 +0300
Subject: [PATCH 1211/3292] mlxsw: reg: Extend MTMP register with new threshold
 field

Extend Management Temperature (MTMP) register with new field specifying
the maximum temperature threshold.

Extend mlxsw_reg_mtmp_unpack() function with two extra arguments,
providing high and maximum temperature thresholds. For modules, these
thresholds correspond to critical and emergency thresholds that are read
from the module's EEPROM.

Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
Acked-by: Vadim Pasternak <vadimp@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/core_env.c    |  2 +-
 .../net/ethernet/mellanox/mlxsw/core_hwmon.c  |  6 +++---
 .../ethernet/mellanox/mlxsw/core_thermal.c    |  6 +++---
 drivers/net/ethernet/mellanox/mlxsw/reg.h     | 20 ++++++++++++++++++-
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index dd26865bd587..bcad1327d861 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -142,7 +142,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err)
 		return err;
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL, NULL, NULL);
 	if (!module_temp) {
 		*temp = 0;
 		return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 2196c946698a..d41afdfbd085 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -72,7 +72,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
 		return err;
 	}
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
 	return sprintf(buf, "%d\n", temp);
 }
 
@@ -95,7 +95,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
 		dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
 		return err;
 	}
-	mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL, NULL, NULL);
 	return sprintf(buf, "%d\n", temp_max);
 }
 
@@ -239,7 +239,7 @@ static int mlxsw_hwmon_module_temp_get(struct device *dev,
 		dev_err(dev, "Failed to query module temperature\n");
 		return err;
 	}
-	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL, NULL, NULL);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index dfea14399607..cb1b68b6bf47 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -281,7 +281,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
 		dev_err(dev, "Failed to query temp sensor\n");
 		return err;
 	}
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
 	if (temp > 0)
 		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
 					      temp);
@@ -442,7 +442,7 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 		*p_temp = (int) temp;
 		return 0;
 	}
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
 	*p_temp = temp;
 
 	if (!temp)
@@ -560,7 +560,7 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
 	if (err)
 		return err;
 
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
 	if (temp > 0)
 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index f9419cc53480..5304309ecb9d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9463,6 +9463,14 @@ MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12);
 					  ((s16)((GENMASK(15, 0) + (v_) + 1) \
 					   * 125)); })
 
+/* reg_mtmp_max_operational_temperature
+ * The highest temperature in the nominal operational range. Reading is in
+ * 0.125 Celsius degrees units.
+ * In case of module this is SFF critical temperature threshold.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtmp, max_operational_temperature, 0x04, 16, 16);
+
 /* reg_mtmp_temperature
  * Temperature reading from the sensor. Reading is in 0.125 Celsius
  * degrees units.
@@ -9541,7 +9549,9 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index,
 }
 
 static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
-					 int *p_max_temp, char *sensor_name)
+					 int *p_max_temp, int *p_temp_hi,
+					 int *p_max_oper_temp,
+					 char *sensor_name)
 {
 	s16 temp;
 
@@ -9553,6 +9563,14 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
 		temp = mlxsw_reg_mtmp_max_temperature_get(payload);
 		*p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
 	}
+	if (p_temp_hi) {
+		temp = mlxsw_reg_mtmp_temperature_threshold_hi_get(payload);
+		*p_temp_hi = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
+	}
+	if (p_max_oper_temp) {
+		temp = mlxsw_reg_mtmp_max_operational_temperature_get(payload);
+		*p_max_oper_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
+	}
 	if (sensor_name)
 		mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
 }

From befc2048088aefbcd88b18225ba33231887137dc Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:12 +0300
Subject: [PATCH 1212/3292] mlxsw: core_env: Read module temperature thresholds
 using MTMP register

Currently, module temperature thresholds are obtained from Management
Cable Info Access (MCIA) register by specifying the thresholds offsets
within module EEPROM layout. This data does not pass validation and in
some cases can be unreliable. For example, due to some problem with the
module.

Add support for a new feature provided by Management Temperature (MTMP)
register for sanitization of temperature thresholds values.

Extend mlxsw_env_module_temp_thresholds_get() to get temperature
thresholds through MTMP field 'max_operational_temperature' - if it is
not zero, feature is supported. Otherwise fallback to old method and get
the thresholds through MCIA.

Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
Acked-by: Vadim Pasternak <vadimp@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core_env.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index bcad1327d861..b3ca5bd33a7f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -125,6 +125,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
 int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 					 int off, int *temp)
 {
+	unsigned int module_temp, module_crit, module_emerg;
 	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
 	union {
 		u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE];
@@ -132,7 +133,6 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 	} temp_thresh;
 	char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int module_temp;
 	bool qsfp, cmis;
 	int page;
 	int err;
@@ -142,12 +142,21 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
 	if (err)
 		return err;
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL, NULL, NULL);
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, &module_crit,
+			      &module_emerg, NULL);
 	if (!module_temp) {
 		*temp = 0;
 		return 0;
 	}
 
+	/* Validate if threshold reading is available through MTMP register,
+	 * otherwise fallback to read through MCIA.
+	 */
+	if (module_emerg) {
+		*temp = off == SFP_TEMP_HIGH_WARN ? module_crit : module_emerg;
+		return 0;
+	}
+
 	/* Read Free Side Device Temperature Thresholds from page 03h
 	 * (MSB at lower byte address).
 	 * Bytes:

From e57977b34ab5d52d73bc0b8b2ff941ac21d7166f Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:13 +0300
Subject: [PATCH 1213/3292] mlxsw: thermal: Add function for reading module
 temperature and thresholds

Provide new function mlxsw_thermal_module_temp_and_thresholds_get() for
reading temperature and temperature thresholds by a single operation.
The motivation is to reduce the number of transactions with the device
which is important when operating over a slow bus such as I2C.

Currently, the sole caller of the function is only using it to read the
module's temperature. The next patch will also use it to query the
module's temperature thresholds.

Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
Acked-by: Vadim Pasternak <vadimp@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/core_thermal.c    | 50 +++++++++++++------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index cb1b68b6bf47..0983e4d4f888 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -420,29 +420,49 @@ static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
 	return err;
 }
 
+static void
+mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
+					     u16 sensor_index, int *p_temp,
+					     int *p_crit_temp,
+					     int *p_emerg_temp)
+{
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	int err;
+
+	/* Read module temperature and thresholds. */
+	mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false);
+	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		/* Set temperature and thresholds to zero to avoid passing
+		 * uninitialized data back to the caller.
+		 */
+		*p_temp = 0;
+		*p_crit_temp = 0;
+		*p_emerg_temp = 0;
+
+		return;
+	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp,
+			      NULL);
+}
+
 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 					 int *p_temp)
 {
 	struct mlxsw_thermal_module *tz = tzdev->devdata;
 	struct mlxsw_thermal *thermal = tz->parent;
-	struct device *dev = thermal->bus_info->dev;
-	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	struct device *dev;
+	u16 sensor_index;
 	int temp;
 	int err;
 
-	/* Read module temperature. */
-	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
-			    tz->module, false, false);
-	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
-	if (err) {
-		/* Do not return error - in case of broken module's sensor
-		 * it will cause error message flooding.
-		 */
-		temp = 0;
-		*p_temp = (int) temp;
-		return 0;
-	}
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
+	dev = thermal->bus_info->dev;
+	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
+
+	/* Read module temperature and thresholds. */
+	mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
+						     sensor_index, &temp, NULL,
+						     NULL);
 	*p_temp = temp;
 
 	if (!temp)

From 72a64c2fe9d8a08c9c57fc22adc1b44d13f97cac Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@nvidia.com>
Date: Tue, 8 Jun 2021 15:44:14 +0300
Subject: [PATCH 1214/3292] mlxsw: thermal: Read module temperature thresholds
 using MTMP register

mlxsw_thermal_module_trips_update() is used to update the trip points of
the module's thermal zone. Currently, this is done by querying the
thresholds from the module's EEPROM via MCIA register. This data does
not pass validation and in some cases can be unreliable. For example,
due to some problem with transceiver module.

Previous patch made it possible to read module's temperature and
thresholds via MTMP register. Therefore, extend
mlxsw_thermal_module_trips_update() to use the thresholds queried from
MTMP, if valid.

This is both more reliable and more efficient than current method, as
temperature and thresholds are queried in one transaction instead of
three. This is significant when working over a slow bus such as I2C.

Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
Acked-by: Vadim Pasternak <vadimp@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/core_thermal.c    | 47 ++++++++++++-------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 0983e4d4f888..b96fb88aac0a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -149,22 +149,27 @@ mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
 
 static int
 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
-				  struct mlxsw_thermal_module *tz)
+				  struct mlxsw_thermal_module *tz,
+				  int crit_temp, int emerg_temp)
 {
-	int crit_temp, emerg_temp;
 	int err;
 
-	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
-						   SFP_TEMP_HIGH_WARN,
-						   &crit_temp);
-	if (err)
-		return err;
+	/* Do not try to query temperature thresholds directly from the module's
+	 * EEPROM if we got valid thresholds from MTMP.
+	 */
+	if (!emerg_temp || !crit_temp) {
+		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
+							   SFP_TEMP_HIGH_WARN,
+							   &crit_temp);
+		if (err)
+			return err;
 
-	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
-						   SFP_TEMP_HIGH_ALARM,
-						   &emerg_temp);
-	if (err)
-		return err;
+		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
+							   SFP_TEMP_HIGH_ALARM,
+							   &emerg_temp);
+		if (err)
+			return err;
+	}
 
 	if (crit_temp > emerg_temp) {
 		dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
@@ -451,9 +456,9 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 {
 	struct mlxsw_thermal_module *tz = tzdev->devdata;
 	struct mlxsw_thermal *thermal = tz->parent;
+	int temp, crit_temp, emerg_temp;
 	struct device *dev;
 	u16 sensor_index;
-	int temp;
 	int err;
 
 	dev = thermal->bus_info->dev;
@@ -461,15 +466,16 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 
 	/* Read module temperature and thresholds. */
 	mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
-						     sensor_index, &temp, NULL,
-						     NULL);
+						     sensor_index, &temp,
+						     &crit_temp, &emerg_temp);
 	*p_temp = temp;
 
 	if (!temp)
 		return 0;
 
 	/* Update trip points. */
-	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
+	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
+						crit_temp, emerg_temp);
 	if (!err && temp > 0)
 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
 
@@ -736,7 +742,10 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
 			  struct mlxsw_thermal *thermal, u8 module)
 {
 	struct mlxsw_thermal_module *module_tz;
+	int crit_temp, emerg_temp;
+	u16 sensor_index;
 
+	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
 	module_tz = &thermal->tz_module_arr[module];
 	/* Skip if parent is already set (case of port split). */
 	if (module_tz->parent)
@@ -747,8 +756,12 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
 	       sizeof(thermal->trips));
 	/* Initialize all trip point. */
 	mlxsw_thermal_module_trips_reset(module_tz);
+	/* Read module temperature and thresholds. */
+	mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, NULL,
+						     &crit_temp, &emerg_temp);
 	/* Update trip point according to the module data. */
-	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
+	return mlxsw_thermal_module_trips_update(dev, core, module_tz,
+						 crit_temp, emerg_temp);
 }
 
 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)

From e4ac382ebfb4e40dbf01db9ab4a42b10b298946a Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:09 +0300
Subject: [PATCH 1215/3292] net: ena: optimize data access in fast-path code

This tweaks several small places to improve the data access in fast
path:

* Remove duplicates of first_interrupt flag and surround it with
  WRITE/READ_ONCE macros:

  The flag is used to detect HW disorders in its
  interrupt communication with the driver. The flag is set when an
  interrupt is received and used in the health check function
  (ena_timer_service()) to help it find irregularities.

* Reorder some fields in ena_napi struct to take better advantage of
  cache access pattern.

* Move XDP TX queue number to a variable to save its calculation for
  every packet.

* Use likely in a condition to improve branch prediction

The 'first_interrupt' and 'interrupt_masked' flags were moved to reside
in the same cache line as the first fields of 'napi' struct. This
placement ensures that all memory accessed during upper-half handler
reside in the same cacheline (napi_schedule_irqoff() only accesses
'state' and 'poll_list' fields which are at the beginning of napi
struct).

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_eth_com.c |  2 +-
 drivers/net/ethernet/amazon/ena/ena_netdev.c  | 23 +++++++++----------
 drivers/net/ethernet/amazon/ena/ena_netdev.h  | 11 +++++----
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index c3be751e7379..2aecd4c3de59 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -151,7 +151,7 @@ static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
 		return 0;
 
 	/* bounce buffer was used, so write it and get a new one */
-	if (pkt_ctrl->idx) {
+	if (likely(pkt_ctrl->idx)) {
 		rc = ena_com_write_bounce_buffer_to_dev(io_sq,
 							pkt_ctrl->curr_bounce_buf);
 		if (unlikely(rc))
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 881f88754bf6..b613067a06d8 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -197,7 +197,6 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
 	int ret;
 
 	xdp_ring = ena_napi->xdp_ring;
-	xdp_ring->first_interrupt = ena_napi->first_interrupt;
 
 	xdp_budget = budget;
 
@@ -383,7 +382,6 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 	u32 verdict = XDP_PASS;
 	struct xdp_frame *xdpf;
 	u64 *xdp_stat;
-	int qid;
 
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
@@ -404,8 +402,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 		}
 
 		/* Find xmit queue */
-		qid = rx_ring->qid + rx_ring->adapter->num_io_queues;
-		xdp_ring = &rx_ring->adapter->tx_ring[qid];
+		xdp_ring = rx_ring->xdp_ring;
 
 		/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
 		spin_lock(&xdp_ring->xdp_tx_lock);
@@ -681,7 +678,6 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
 	ring->ena_dev = adapter->ena_dev;
 	ring->per_napi_packets = 0;
 	ring->cpu = 0;
-	ring->first_interrupt = false;
 	ring->no_interrupt_event_cnt = 0;
 	u64_stats_init(&ring->syncp);
 }
@@ -725,6 +721,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter,
 				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 			rxr->empty_rx_queue = 0;
 			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+			rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
 		}
 	}
 }
@@ -1922,9 +1919,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 	tx_ring = ena_napi->tx_ring;
 	rx_ring = ena_napi->rx_ring;
 
-	tx_ring->first_interrupt = ena_napi->first_interrupt;
-	rx_ring->first_interrupt = ena_napi->first_interrupt;
-
 	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
 
 	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
@@ -2003,7 +1997,8 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
 {
 	struct ena_napi *ena_napi = data;
 
-	ena_napi->first_interrupt = true;
+	/* Used to check HW health */
+	WRITE_ONCE(ena_napi->first_interrupt, true);
 
 	WRITE_ONCE(ena_napi->interrupts_masked, true);
 	smp_wmb(); /* write interrupts_masked before calling napi */
@@ -3657,7 +3652,9 @@ static void ena_fw_reset_device(struct work_struct *work)
 static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
 					struct ena_ring *rx_ring)
 {
-	if (likely(rx_ring->first_interrupt))
+	struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
+
+	if (likely(READ_ONCE(ena_napi->first_interrupt)))
 		return 0;
 
 	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
@@ -3681,6 +3678,7 @@ static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
 static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
 					  struct ena_ring *tx_ring)
 {
+	struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
 	struct ena_tx_buffer *tx_buf;
 	unsigned long last_jiffies;
 	u32 missed_tx = 0;
@@ -3694,8 +3692,9 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
 			/* no pending Tx at this location */
 			continue;
 
-		if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
-			     2 * adapter->missing_tx_completion_to))) {
+		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) &&
+			time_is_before_jiffies(last_jiffies + 2 *
+				adapter->missing_tx_completion_to))) {
 			/* If after graceful period interrupt is still not
 			 * received, we schedule a reset
 			 */
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 74af15d62ee1..21758707a929 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -135,12 +135,12 @@ struct ena_irq {
 };
 
 struct ena_napi {
-	struct napi_struct napi ____cacheline_aligned;
+	u8 first_interrupt ____cacheline_aligned;
+	u8 interrupts_masked;
+	struct napi_struct napi;
 	struct ena_ring *tx_ring;
 	struct ena_ring *rx_ring;
 	struct ena_ring *xdp_ring;
-	bool first_interrupt;
-	bool interrupts_masked;
 	u32 qid;
 	struct dim dim;
 };
@@ -259,6 +259,10 @@ struct ena_ring {
 	struct bpf_prog *xdp_bpf_prog;
 	struct xdp_rxq_info xdp_rxq;
 	spinlock_t xdp_tx_lock;	/* synchronize XDP TX/Redirect traffic */
+	/* Used for rx queues only to point to the xdp tx ring, to
+	 * which traffic should be redirected from this rx ring.
+	 */
+	struct ena_ring *xdp_ring;
 
 	u16 next_to_use;
 	u16 next_to_clean;
@@ -271,7 +275,6 @@ struct ena_ring {
 	/* The maximum header length the device can handle */
 	u8 tx_max_header_size;
 
-	bool first_interrupt;
 	bool disable_meta_caching;
 	u16 no_interrupt_event_cnt;
 

From 9912c72edd8c1e1d5432ed4ad350125833c42f6c Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:10 +0300
Subject: [PATCH 1216/3292] net: ena: Remove unused code

The ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE macro,
ena_xdp_queues_present() function and SUSPEND_RESUME enums aren't used
in the driver, and so not needed.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Gal Pressman <galpress@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_admin_defs.h |  2 --
 drivers/net/ethernet/amazon/ena/ena_netdev.h     | 11 -----------
 2 files changed, 13 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 4164eacc5c28..f5ec35fa4c63 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -1042,8 +1042,6 @@ enum ena_admin_aenq_group {
 };
 
 enum ena_admin_aenq_notification_syndrome {
-	ENA_ADMIN_SUSPEND                           = 0,
-	ENA_ADMIN_RESUME                            = 1,
 	ENA_ADMIN_UPDATE_HINTS                      = 2,
 };
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 21758707a929..834348fcdf3c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -55,12 +55,6 @@
 #define ENA_TX_WAKEUP_THRESH		(MAX_SKB_FRAGS + 2)
 #define ENA_DEFAULT_RX_COPYBREAK	(256 - NET_IP_ALIGN)
 
-/* limit the buffer size to 600 bytes to handle MTU changes from very
- * small to very large, in which case the number of buffers per packet
- * could exceed ENA_PKT_MAX_BUFS
- */
-#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600
-
 #define ENA_MIN_MTU		128
 
 #define ENA_NAME_MAX_LEN	20
@@ -417,11 +411,6 @@ enum ena_xdp_errors_t {
 	ENA_XDP_NO_ENOUGH_QUEUES,
 };
 
-static inline bool ena_xdp_queues_present(struct ena_adapter *adapter)
-{
-	return adapter->xdp_first_ring != 0;
-}
-
 static inline bool ena_xdp_present(struct ena_adapter *adapter)
 {
 	return !!adapter->xdp_bpf_prog;

From 091d0e85a0d4051b286767b05d3a18c87b6c4a14 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:11 +0300
Subject: [PATCH 1217/3292] net: ena: Improve error logging in driver

Add prints to improve logging of driver's errors.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_eth_com.c | 28 +++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index 2aecd4c3de59..3d6f0a466a9e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -154,8 +154,11 @@ static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
 	if (likely(pkt_ctrl->idx)) {
 		rc = ena_com_write_bounce_buffer_to_dev(io_sq,
 							pkt_ctrl->curr_bounce_buf);
-		if (unlikely(rc))
+		if (unlikely(rc)) {
+			netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+				   "Failed to write bounce buffer to device\n");
 			return rc;
+		}
 
 		pkt_ctrl->curr_bounce_buf =
 			ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
@@ -185,8 +188,11 @@ static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
 	if (!pkt_ctrl->descs_left_in_line) {
 		rc = ena_com_write_bounce_buffer_to_dev(io_sq,
 							pkt_ctrl->curr_bounce_buf);
-		if (unlikely(rc))
+		if (unlikely(rc)) {
+			netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+				   "Failed to write bounce buffer to device\n");
 			return rc;
+		}
 
 		pkt_ctrl->curr_bounce_buf =
 			ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
@@ -406,8 +412,11 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 	}
 
 	if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV &&
-		     !buffer_to_push))
+		     !buffer_to_push)) {
+		netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+			   "Push header wasn't provided in LLQ mode\n");
 		return -EINVAL;
+	}
 
 	rc = ena_com_write_header_to_bounce(io_sq, buffer_to_push, header_len);
 	if (unlikely(rc))
@@ -423,6 +432,9 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 	/* If the caller doesn't want to send packets */
 	if (unlikely(!num_bufs && !header_len)) {
 		rc = ena_com_close_bounce_buffer(io_sq);
+		if (rc)
+			netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+				   "Failed to write buffers to LLQ\n");
 		*nb_hw_desc = io_sq->tail - start_tail;
 		return rc;
 	}
@@ -482,8 +494,11 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 		/* The first desc share the same desc as the header */
 		if (likely(i != 0)) {
 			rc = ena_com_sq_update_tail(io_sq);
-			if (unlikely(rc))
+			if (unlikely(rc)) {
+				netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+					   "Failed to update sq tail\n");
 				return rc;
+			}
 
 			desc = get_sq_desc(io_sq);
 			if (unlikely(!desc))
@@ -512,8 +527,11 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
 	desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
 
 	rc = ena_com_sq_update_tail(io_sq);
-	if (unlikely(rc))
+	if (unlikely(rc)) {
+		netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device,
+			   "Failed to update sq tail of the last descriptor\n");
 		return rc;
+	}
 
 	rc = ena_com_close_bounce_buffer(io_sq);
 

From 9e5269a915a8e3d6f4bae5641451737e9cca70c0 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:12 +0300
Subject: [PATCH 1218/3292] net: ena: use build_skb() in RX path

This patch converts the RX path to use build_skb() for packets larger
than copybreak (set to 256 by default). This function makes the first
descriptor's page to be the linear part of the sk_buff struct buffer.

Also remove the SKB description from the README since most of it no
longer relevant and the parts that are left don't add information.

Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 66 ++++++++++++--------
 1 file changed, 41 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index b613067a06d8..d7bc4f45e5df 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -529,7 +529,7 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
 			rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
 		} else {
 			ena_xdp_unregister_rxq_info(rx_ring);
-			rx_ring->rx_headroom = 0;
+			rx_ring->rx_headroom = NET_SKB_PAD;
 		}
 	}
 }
@@ -720,6 +720,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter,
 			rxr->smoothed_interval =
 				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 			rxr->empty_rx_queue = 0;
+			rxr->rx_headroom = NET_SKB_PAD;
 			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 			rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
 		}
@@ -982,6 +983,7 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 	struct ena_com_buf *ena_buf;
 	struct page *page;
 	dma_addr_t dma;
+	int tailroom;
 
 	/* restore page offset value in case it has been changed by device */
 	rx_info->page_offset = headroom;
@@ -1012,10 +1014,12 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 		  "Allocate page %p, rx_info %p\n", page, rx_info);
 
+	tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
 	rx_info->page = page;
 	ena_buf = &rx_info->ena_buf;
 	ena_buf->paddr = dma + headroom;
-	ena_buf->len = ENA_PAGE_SIZE - headroom;
+	ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
 
 	return 0;
 }
@@ -1381,21 +1385,23 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
 	return tx_pkts;
 }
 
-static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
+static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag)
 {
 	struct sk_buff *skb;
 
-	if (frags)
-		skb = napi_get_frags(rx_ring->napi);
-	else
+	if (!first_frag)
 		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
 						rx_ring->rx_copybreak);
+	else
+		skb = build_skb(first_frag, ENA_PAGE_SIZE);
 
 	if (unlikely(!skb)) {
 		ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
 				  &rx_ring->syncp);
+
 		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
-			  "Failed to allocate skb. frags: %d\n", frags);
+			  "Failed to allocate skb. first_frag %s\n",
+			  first_frag ? "provided" : "not provided");
 		return NULL;
 	}
 
@@ -1410,7 +1416,9 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 	struct sk_buff *skb;
 	struct ena_rx_buffer *rx_info;
 	u16 len, req_id, buf = 0;
-	void *va;
+	void *page_addr;
+	u32 page_offset;
+	void *data_addr;
 
 	len = ena_bufs[buf].len;
 	req_id = ena_bufs[buf].req_id;
@@ -1428,12 +1436,14 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 		  rx_info, rx_info->page);
 
 	/* save virt address of first buffer */
-	va = page_address(rx_info->page) + rx_info->page_offset;
+	page_addr = page_address(rx_info->page);
+	page_offset = rx_info->page_offset;
+	data_addr = page_addr + page_offset;
 
-	prefetch(va);
+	prefetch(data_addr);
 
 	if (len <= rx_ring->rx_copybreak) {
-		skb = ena_alloc_skb(rx_ring, false);
+		skb = ena_alloc_skb(rx_ring, NULL);
 		if (unlikely(!skb))
 			return NULL;
 
@@ -1446,7 +1456,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 					dma_unmap_addr(&rx_info->ena_buf, paddr),
 					len,
 					DMA_FROM_DEVICE);
-		skb_copy_to_linear_data(skb, va, len);
+		skb_copy_to_linear_data(skb, data_addr, len);
 		dma_sync_single_for_device(rx_ring->dev,
 					   dma_unmap_addr(&rx_info->ena_buf, paddr),
 					   len,
@@ -1460,16 +1470,18 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 		return skb;
 	}
 
-	skb = ena_alloc_skb(rx_ring, true);
+	ena_unmap_rx_buff(rx_ring, rx_info);
+
+	skb = ena_alloc_skb(rx_ring, page_addr);
 	if (unlikely(!skb))
 		return NULL;
 
+	/* Populate skb's linear part */
+	skb_reserve(skb, page_offset);
+	skb_put(skb, len);
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
 	do {
-		ena_unmap_rx_buff(rx_ring, rx_info);
-
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
-				rx_info->page_offset, len, ENA_PAGE_SIZE);
-
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 			  "RX skb updated. len %d. data_len %d\n",
 			  skb->len, skb->data_len);
@@ -1488,6 +1500,12 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 		req_id = ena_bufs[buf].req_id;
 
 		rx_info = &rx_ring->rx_buffer_info[req_id];
+
+		ena_unmap_rx_buff(rx_ring, rx_info);
+
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
+				rx_info->page_offset, len, ENA_PAGE_SIZE);
+
 	} while (1);
 
 	return skb;
@@ -1700,14 +1718,12 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 
 		skb_record_rx_queue(skb, rx_ring->qid);
 
-		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
-			total_len += rx_ring->ena_bufs[0].len;
+		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
 			rx_copybreak_pkt++;
-			napi_gro_receive(napi, skb);
-		} else {
-			total_len += skb->len;
-			napi_gro_frags(napi);
-		}
+
+		total_len += skb->len;
+
+		napi_gro_receive(napi, skb);
 
 		res_budget--;
 	} while (likely(res_budget));

From 0ee251cd9a6398a07cc8cfc849c0efa3f28ece98 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:13 +0300
Subject: [PATCH 1219/3292] net: ena: add jiffies of last napi call to stats

There are instances when we want to know when the last napi was
called for debugging.

On stuck / heavy loaded CPUs, the ena napi handler might not be
called for a long period of time. This stat can help us to
determine how much time passed since the last execution of napi.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 28 ++++++++++++++------
 drivers/net/ethernet/amazon/ena/ena_netdev.h |  1 +
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index d7bc4f45e5df..f013fa312937 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -228,6 +228,7 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
 	xdp_ring->tx_stats.napi_comp += napi_comp_call;
 	xdp_ring->tx_stats.tx_poll++;
 	u64_stats_update_end(&xdp_ring->syncp);
+	xdp_ring->tx_stats.last_napi_jiffies = jiffies;
 
 	return ret;
 }
@@ -1989,6 +1990,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 	tx_ring->tx_stats.tx_poll++;
 	u64_stats_update_end(&tx_ring->syncp);
 
+	tx_ring->tx_stats.last_napi_jiffies = jiffies;
+
 	return ret;
 }
 
@@ -3695,6 +3698,9 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
 					  struct ena_ring *tx_ring)
 {
 	struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
+	unsigned int time_since_last_napi;
+	unsigned int missing_tx_comp_to;
+	bool is_tx_comp_time_expired;
 	struct ena_tx_buffer *tx_buf;
 	unsigned long last_jiffies;
 	u32 missed_tx = 0;
@@ -3708,9 +3714,10 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
 			/* no pending Tx at this location */
 			continue;
 
-		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) &&
-			time_is_before_jiffies(last_jiffies + 2 *
-				adapter->missing_tx_completion_to))) {
+		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
+			 2 * adapter->missing_tx_completion_to);
+
+		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
 			/* If after graceful period interrupt is still not
 			 * received, we schedule a reset
 			 */
@@ -3723,12 +3730,17 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
 			return -EIO;
 		}
 
-		if (unlikely(time_is_before_jiffies(last_jiffies +
-				adapter->missing_tx_completion_to))) {
-			if (!tx_buf->print_once)
+		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
+			adapter->missing_tx_completion_to);
+
+		if (unlikely(is_tx_comp_time_expired)) {
+			if (!tx_buf->print_once) {
+				time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
+				missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
 				netif_notice(adapter, tx_err, adapter->netdev,
-					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
-					     tx_ring->qid, i);
+					     "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n",
+					     tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to);
+			}
 
 			tx_buf->print_once = 1;
 			missed_tx++;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 834348fcdf3c..0c39fc2fa345 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -206,6 +206,7 @@ struct ena_stats_tx {
 	u64 llq_buffer_copy;
 	u64 missed_tx;
 	u64 unmask_interrupt;
+	u64 last_napi_jiffies;
 };
 
 struct ena_stats_rx {

From 15efff76491edd31f57eb3358d80868747d1397e Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:14 +0300
Subject: [PATCH 1220/3292] net: ena: Remove module param and change message
 severity

Remove the module param 'debug' which allows to specify the message
level of the driver. This value can be specified using ethtool command.
Also reduce the message level of LLQ support to be a warning since it is
not an indication of an error.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index f013fa312937..6e648b6882b7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -35,9 +35,6 @@ MODULE_LICENSE("GPL");
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
 		NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
-static int debug = -1;
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
 static struct ena_aenq_handlers aenq_handlers;
 
@@ -3360,7 +3357,7 @@ static int ena_set_queues_placement_policy(struct pci_dev *pdev,
 
 	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
 	if (!(ena_dev->supported_features & llq_feature_mask)) {
-		dev_err(&pdev->dev,
+		dev_warn(&pdev->dev,
 			"LLQ is not supported Fallback to host mode policy.\n");
 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
 		return 0;
@@ -4271,7 +4268,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->ena_dev = ena_dev;
 	adapter->netdev = netdev;
 	adapter->pdev = pdev;
-	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+	adapter->msg_enable = DEFAULT_MSG_ENABLE;
 
 	ena_dev->net_device = netdev;
 

From 511c537bb5647662ff7df7a41180a1721c078720 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:15 +0300
Subject: [PATCH 1221/3292] net: ena: fix RST format in ENA documentation file

The documentation file used to be written in markdown format but was
converted to reStructuredText (rst).

The converted file doesn't keep up with rst format requirements which
results in hard-to-read text.

This patch fixes the formatting of the file. The patch also
* Highlights and emphasizes some lines to improve readability
* Rephrases some hard-to-understand text
* Updates outdated function descriptions.
* Removes TSO description which falsely claims the driver supports it

Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/amazon/ena.rst    | 164 +++++++++---------
 1 file changed, 78 insertions(+), 86 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
index f8c6469f2bd2..01b2a69b0cb0 100644
--- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
+++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
@@ -11,12 +11,12 @@ ENA is a networking interface designed to make good use of modern CPU
 features and system architectures.
 
 The ENA device exposes a lightweight management interface with a
-minimal set of memory mapped registers and extendable command set
+minimal set of memory mapped registers and extendible command set
 through an Admin Queue.
 
 The driver supports a range of ENA devices, is link-speed independent
-(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
-a negotiated and extendable feature set.
+(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc), and has
+a negotiated and extendible feature set.
 
 Some ENA devices support SR-IOV. This driver is used for both the
 SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
@@ -27,9 +27,9 @@ is advertised by the device via the Admin Queue), a dedicated MSI-X
 interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
 and CPU cacheline optimized data placement.
 
-The ENA driver supports industry standard TCP/IP offload features such
-as checksum offload and TCP transmit segmentation offload (TSO).
-Receive-side scaling (RSS) is supported for multi-core scaling.
+The ENA driver supports industry standard TCP/IP offload features such as
+checksum offload. Receive-side scaling (RSS) is supported for multi-core
+scaling.
 
 The ENA driver and its corresponding devices implement health
 monitoring mechanisms such as watchdog, enabling the device and driver
@@ -38,22 +38,20 @@ debug logs.
 
 Some of the ENA devices support a working mode called Low-latency
 Queue (LLQ), which saves several more microseconds.
-
 ENA Source Code Directory Structure
 ===================================
 
 =================   ======================================================
 ena_com.[ch]        Management communication layer. This layer is
-		    responsible for the handling all the management
-		    (admin) communication between the device and the
-		    driver.
+                    responsible for the handling all the management
+                    (admin) communication between the device and the
+                    driver.
 ena_eth_com.[ch]    Tx/Rx data path.
 ena_admin_defs.h    Definition of ENA management interface.
 ena_eth_io_defs.h   Definition of ENA data path interface.
 ena_common_defs.h   Common definitions for ena_com layer.
 ena_regs_defs.h     Definition of ENA PCI memory-mapped (MMIO) registers.
 ena_netdev.[ch]     Main Linux kernel driver.
-ena_syfsfs.[ch]     Sysfs files.
 ena_ethtool.c       ethtool callbacks.
 ena_pci_id_tbl.h    Supported device IDs.
 =================   ======================================================
@@ -69,7 +67,7 @@ ENA management interface is exposed by means of:
 - Asynchronous Event Notification Queue (AENQ)
 
 ENA device MMIO Registers are accessed only during driver
-initialization and are not involved in further normal device
+initialization and are not used during further normal device
 operation.
 
 AQ is used for submitting management commands, and the
@@ -100,28 +98,27 @@ group may have multiple syndromes, as shown below
 
 The events are:
 
-	====================	===============
-	Group			Syndrome
-	====================	===============
-	Link state change	**X**
-	Fatal error		**X**
-	Notification		Suspend traffic
-	Notification		Resume traffic
-	Keep-Alive		**X**
-	====================	===============
+====================    ===============
+Group                   Syndrome
+====================    ===============
+Link state change       **X**
+Fatal error             **X**
+Notification            Suspend traffic
+Notification            Resume traffic
+Keep-Alive              **X**
+====================    ===============
 
 ACQ and AENQ share the same MSI-X vector.
 
-Keep-Alive is a special mechanism that allows monitoring of the
-device's health. The driver maintains a watchdog (WD) handler which,
-if fired, logs the current state and statistics then resets and
-restarts the ENA device and driver. A Keep-Alive event is delivered by
-the device every second. The driver re-arms the WD upon reception of a
-Keep-Alive event. A missed Keep-Alive event causes the WD handler to
-fire.
+Keep-Alive is a special mechanism that allows monitoring the device's health.
+A Keep-Alive event is delivered by the device every second.
+The driver maintains a watchdog (WD) handler which logs the current state and
+statistics. If the keep-alive events aren't delivered as expected the WD resets
+the device and the driver.
 
 Data Path Interface
 ===================
+
 I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx
 SQ correspondingly). Each SQ has a completion queue (CQ) associated
 with it.
@@ -131,26 +128,24 @@ physical memory.
 
 The ENA driver supports two Queue Operation modes for Tx SQs:
 
-- Regular mode
+- **Regular mode:**
+  In this mode the Tx SQs reside in the host's memory. The ENA
+  device fetches the ENA Tx descriptors and packet data from host
+  memory.
 
-  * In this mode the Tx SQs reside in the host's memory. The ENA
-    device fetches the ENA Tx descriptors and packet data from host
-    memory.
+- **Low Latency Queue (LLQ) mode or "push-mode":**
+  In this mode the driver pushes the transmit descriptors and the
+  first 128 bytes of the packet directly to the ENA device memory
+  space. The rest of the packet payload is fetched by the
+  device. For this operation mode, the driver uses a dedicated PCI
+  device memory BAR, which is mapped with write-combine capability.
 
-- Low Latency Queue (LLQ) mode or "push-mode".
-
-  * In this mode the driver pushes the transmit descriptors and the
-    first 128 bytes of the packet directly to the ENA device memory
-    space. The rest of the packet payload is fetched by the
-    device. For this operation mode, the driver uses a dedicated PCI
-    device memory BAR, which is mapped with write-combine capability.
+  **Note that** not all ENA devices support LLQ, and this feature is negotiated
+  with the device upon initialization. If the ENA device does not
+  support LLQ mode, the driver falls back to the regular mode.
 
 The Rx SQs support only the regular mode.
 
-Note: Not all ENA devices support LLQ, and this feature is negotiated
-      with the device upon initialization. If the ENA device does not
-      support LLQ mode, the driver falls back to the regular mode.
-
 The driver supports multi-queue for both Tx and Rx. This has various
 benefits:
 
@@ -165,6 +160,7 @@ benefits:
 
 Interrupt Modes
 ===============
+
 The driver assigns a single MSI-X vector per queue pair (for both Tx
 and Rx directions). The driver assigns an additional dedicated MSI-X vector
 for management (for ACQ and AENQ).
@@ -190,20 +186,21 @@ unmasked by the driver after NAPI processing is complete.
 
 Interrupt Moderation
 ====================
+
 ENA driver and device can operate in conventional or adaptive interrupt
 moderation mode.
 
-In conventional mode the driver instructs device to postpone interrupt
+**In conventional mode** the driver instructs device to postpone interrupt
 posting according to static interrupt delay value. The interrupt delay
-value can be configured through ethtool(8). The following ethtool
-parameters are supported by the driver: tx-usecs, rx-usecs
+value can be configured through `ethtool(8)`. The following `ethtool`
+parameters are supported by the driver: ``tx-usecs``, ``rx-usecs``
 
-In adaptive interrupt moderation mode the interrupt delay value is
+**In adaptive interrupt** moderation mode the interrupt delay value is
 updated by the driver dynamically and adjusted every NAPI cycle
 according to the traffic nature.
 
-Adaptive coalescing can be switched on/off through ethtool(8)
-adaptive_rx on|off parameter.
+Adaptive coalescing can be switched on/off through `ethtool(8)`'s
+:code:`adaptive_rx on|off` parameter.
 
 More information about Adaptive Interrupt Moderation (DIM) can be found in
 Documentation/networking/net_dim.rst
@@ -214,17 +211,10 @@ The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
 and can be configured by the ETHTOOL_STUNABLE command of the
 SIOCETHTOOL ioctl.
 
-SKB
-===
-The driver-allocated SKB for frames received from Rx handling using
-NAPI context. The allocation method depends on the size of the packet.
-If the frame length is larger than rx_copybreak, napi_get_frags()
-is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
-content is copied (by CPU) to the SKB, and the buffer is recycled.
-
 Statistics
 ==========
-The user can obtain ENA device and driver statistics using ethtool.
+
+The user can obtain ENA device and driver statistics using `ethtool`.
 The driver can collect regular or extended statistics (including
 per-queue stats) from the device.
 
@@ -232,22 +222,23 @@ In addition the driver logs the stats to syslog upon device reset.
 
 MTU
 ===
+
 The driver supports an arbitrarily large MTU with a maximum that is
 negotiated with the device. The driver configures MTU using the
 SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
-via ip(8) and similar legacy tools.
+via `ip(8)` and similar legacy tools.
 
 Stateless Offloads
 ==================
+
 The ENA driver supports:
 
-- TSO over IPv4/IPv6
-- TSO with ECN
 - IPv4 header checksum offload
 - TCP/UDP over IPv4/IPv6 checksum offloads
 
 RSS
 ===
+
 - The ENA device supports RSS that allows flexible Rx traffic
   steering.
 - Toeplitz and CRC32 hash functions are supported.
@@ -260,41 +251,42 @@ RSS
   function delivered in the Rx CQ descriptor is set in the received
   SKB.
 - The user can provide a hash key, hash function, and configure the
-  indirection table through ethtool(8).
+  indirection table through `ethtool(8)`.
 
 DATA PATH
 =========
+
 Tx
 --
 
-ena_start_xmit() is called by the stack. This function does the following:
+:code:`ena_start_xmit()` is called by the stack. This function does the following:
 
-- Maps data buffers (skb->data and frags).
-- Populates ena_buf for the push buffer (if the driver and device are
-  in push mode.)
+- Maps data buffers (``skb->data`` and frags).
+- Populates ``ena_buf`` for the push buffer (if the driver and device are
+  in push mode).
 - Prepares ENA bufs for the remaining frags.
-- Allocates a new request ID from the empty req_id ring. The request
+- Allocates a new request ID from the empty ``req_id`` ring. The request
   ID is the index of the packet in the Tx info. This is used for
-  out-of-order TX completions.
+  out-of-order Tx completions.
 - Adds the packet to the proper place in the Tx ring.
-- Calls ena_com_prepare_tx(), an ENA communication layer that converts
-  the ena_bufs to ENA descriptors (and adds meta ENA descriptors as
-  needed.)
+- Calls :code:`ena_com_prepare_tx()`, an ENA communication layer that converts
+  the ``ena_bufs`` to ENA descriptors (and adds meta ENA descriptors as
+  needed).
 
   * This function also copies the ENA descriptors and the push buffer
-    to the Device memory space (if in push mode.)
+    to the Device memory space (if in push mode).
 
-- Writes doorbell to the ENA device.
+- Writes a doorbell to the ENA device.
 - When the ENA device finishes sending the packet, a completion
   interrupt is raised.
 - The interrupt handler schedules NAPI.
-- The ena_clean_tx_irq() function is called. This function handles the
+- The :code:`ena_clean_tx_irq()` function is called. This function handles the
   completion descriptors generated by the ENA, with a single
   completion descriptor per completed packet.
 
-  * req_id is retrieved from the completion descriptor. The tx_info of
-    the packet is retrieved via the req_id. The data buffers are
-    unmapped and req_id is returned to the empty req_id ring.
+  * ``req_id`` is retrieved from the completion descriptor. The ``tx_info`` of
+    the packet is retrieved via the ``req_id``. The data buffers are
+    unmapped and ``req_id`` is returned to the empty ``req_id`` ring.
   * The function stops when the completion descriptors are completed or
     the budget is reached.
 
@@ -303,12 +295,11 @@ Rx
 
 - When a packet is received from the ENA device.
 - The interrupt handler schedules NAPI.
-- The ena_clean_rx_irq() function is called. This function calls
-  ena_rx_pkt(), an ENA communication layer function, which returns the
-  number of descriptors used for a new unhandled packet, and zero if
+- The :code:`ena_clean_rx_irq()` function is called. This function calls
+  :code:`ena_com_rx_pkt()`, an ENA communication layer function, which returns the
+  number of descriptors used for a new packet, and zero if
   no new packet is found.
-- Then it calls the ena_clean_rx_irq() function.
-- ena_eth_rx_skb() checks packet length:
+- :code:`ena_rx_skb()` checks packet length:
 
   * If the packet is small (len < rx_copybreak), the driver allocates
     a SKB for the new packet, and copies the packet payload into the
@@ -317,9 +308,10 @@ Rx
     - In this way the original data buffer is not passed to the stack
       and is reused for future Rx packets.
 
-  * Otherwise the function unmaps the Rx buffer, then allocates the
-    new SKB structure and hooks the Rx buffer to the SKB frags.
+  * Otherwise the function unmaps the Rx buffer, sets the first
+    descriptor as `skb`'s linear part and the other descriptors as the
+    `skb`'s frags.
 
 - The new SKB is updated with the necessary information (protocol,
-  checksum hw verify result, etc.), and then passed to the network
-  stack, using the NAPI interface function napi_gro_receive().
+  checksum hw verify result, etc), and then passed to the network
+  stack, using the NAPI interface function :code:`napi_gro_receive()`.

From 9e8afb05961147509181baf4de5cbd644757e850 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:16 +0300
Subject: [PATCH 1222/3292] net: ena: aggregate doorbell common operations into
 a function

The ena_ring_tx_doorbell() is introduced to call the doorbell and
increase the driver's corresponding stat.

Signed-off-by: Ido Segev <idose@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 38 ++++++++++----------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6e648b6882b7..37c839401c6c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -86,6 +86,12 @@ static void ena_increase_stat(u64 *statp, u64 cnt,
 	u64_stats_update_end(syncp);
 }
 
+static void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
+{
+	ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+	ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
+}
+
 static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ena_adapter *adapter = netdev_priv(dev);
@@ -144,7 +150,7 @@ static int ena_xmit_common(struct net_device *dev,
 		netif_dbg(adapter, tx_queued, dev,
 			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
 			  ring->qid);
-		ena_com_write_sq_doorbell(ring->ena_com_io_sq);
+		ena_ring_tx_doorbell(ring);
 	}
 
 	/* prepare the packet's descriptors to dma engine */
@@ -313,14 +319,12 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
 			     xdpf->len);
 	if (rc)
 		goto error_unmap_dma;
-	/* trigger the dma engine. ena_com_write_sq_doorbell()
-	 * has a mb
+
+	/* trigger the dma engine. ena_ring_tx_doorbell()
+	 * calls a memory barrier inside it.
 	 */
-	if (flags & XDP_XMIT_FLUSH) {
-		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
-		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
-				  &xdp_ring->syncp);
-	}
+	if (flags & XDP_XMIT_FLUSH)
+		ena_ring_tx_doorbell(xdp_ring);
 
 	return rc;
 
@@ -361,11 +365,8 @@ static int ena_xdp_xmit(struct net_device *dev, int n,
 	}
 
 	/* Ring doorbell to make device aware of the packets */
-	if (flags & XDP_XMIT_FLUSH) {
-		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
-		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
-				  &xdp_ring->syncp);
-	}
+	if (flags & XDP_XMIT_FLUSH)
+		ena_ring_tx_doorbell(xdp_ring);
 
 	spin_unlock(&xdp_ring->xdp_tx_lock);
 
@@ -3100,14 +3101,11 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
-		/* trigger the dma engine. ena_com_write_sq_doorbell()
-		 * has a mb
+	if (netif_xmit_stopped(txq) || !netdev_xmit_more())
+		/* trigger the dma engine. ena_ring_tx_doorbell()
+		 * calls a memory barrier inside it.
 		 */
-		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
-		ena_increase_stat(&tx_ring->tx_stats.doorbells, 1,
-				  &tx_ring->syncp);
-	}
+		ena_ring_tx_doorbell(tx_ring);
 
 	return NETDEV_TX_OK;
 

From 947c54c395cb8368abebf3bec1413123d5dd4339 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:17 +0300
Subject: [PATCH 1223/3292] net: ena: Use dev_alloc() in RX buffer allocation

Use dev_alloc() when allocating RX buffers instead of specifying the
allocation flags explicitly. This result in same behaviour with less
code.

Also move the page allocation and its DMA mapping into a function. This
creates a logical block, which may help understanding the code.

Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 58 ++++++++++++--------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 37c839401c6c..261680aba33c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -975,8 +975,37 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 		ena_free_rx_resources(adapter, i);
 }
 
-static int ena_alloc_rx_page(struct ena_ring *rx_ring,
-				    struct ena_rx_buffer *rx_info, gfp_t gfp)
+struct page *ena_alloc_map_page(struct ena_ring *rx_ring, dma_addr_t *dma)
+{
+	struct page *page;
+
+	/* This would allocate the page on the same NUMA node the executing code
+	 * is running on.
+	 */
+	page = dev_alloc_page();
+	if (!page) {
+		ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1,
+				  &rx_ring->syncp);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	/* To enable NIC-side port-mirroring, AKA SPAN port,
+	 * we make the buffer readable from the nic as well
+	 */
+	*dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
+			    DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
+		ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
+				  &rx_ring->syncp);
+		__free_page(page);
+		return ERR_PTR(-EIO);
+	}
+
+	return page;
+}
+
+static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
+			       struct ena_rx_buffer *rx_info)
 {
 	int headroom = rx_ring->rx_headroom;
 	struct ena_com_buf *ena_buf;
@@ -991,25 +1020,11 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 	if (unlikely(rx_info->page))
 		return 0;
 
-	page = alloc_page(gfp);
-	if (unlikely(!page)) {
-		ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1,
-				  &rx_ring->syncp);
-		return -ENOMEM;
-	}
+	/* We handle DMA here */
+	page = ena_alloc_map_page(rx_ring, &dma);
+	if (unlikely(IS_ERR(page)))
+		return PTR_ERR(page);
 
-	/* To enable NIC-side port-mirroring, AKA SPAN port,
-	 * we make the buffer readable from the nic as well
-	 */
-	dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
-			   DMA_BIDIRECTIONAL);
-	if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
-		ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
-				  &rx_ring->syncp);
-
-		__free_page(page);
-		return -EIO;
-	}
 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 		  "Allocate page %p, rx_info %p\n", page, rx_info);
 
@@ -1065,8 +1080,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 
 		rx_info = &rx_ring->rx_buffer_info[req_id];
 
-		rc = ena_alloc_rx_page(rx_ring, rx_info,
-				       GFP_ATOMIC | __GFP_COMP);
+		rc = ena_alloc_rx_buffer(rx_ring, rx_info);
 		if (unlikely(rc < 0)) {
 			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 				   "Failed to allocate buffer for rx queue %d\n",

From a01f2cd0ccf473f7af32afc9b74ac5f2caff3c18 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:01:18 +0300
Subject: [PATCH 1224/3292] net: ena: re-organize code to improve readability

Restructure some ethtool to a switch-case blocks to make it more uniform
with other similar functions.
Also restructure variable declaration to create reversed x-mas tree.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c     |  3 ++-
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 18 +++++++++++-------
 drivers/net/ethernet/amazon/ena/ena_netdev.c  |  2 +-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 764852ead1d6..ab413fc1f68e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -1979,7 +1979,8 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
 		if (rc)
 			return rc;
 
-		if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER)
+		if (get_resp.u.max_queue_ext.version !=
+		    ENA_FEATURE_MAX_QUEUE_EXT_VER)
 			return -EINVAL;
 
 		memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext,
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 2fe7ccee55b2..27dae632efcb 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -233,10 +233,13 @@ int ena_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
 
-	if (sset != ETH_SS_STATS)
-		return -EOPNOTSUPP;
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ena_get_sw_stats_count(adapter) +
+		       ena_get_hw_stats_count(adapter);
+	}
 
-	return ena_get_sw_stats_count(adapter) + ena_get_hw_stats_count(adapter);
+	return -EOPNOTSUPP;
 }
 
 static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
@@ -314,10 +317,11 @@ static void ena_get_ethtool_strings(struct net_device *netdev,
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
 
-	if (sset != ETH_SS_STATS)
-		return;
-
-	ena_get_strings(adapter, data, adapter->eni_stats_supported);
+	switch (sset) {
+	case ETH_SS_STATS:
+		ena_get_strings(adapter, data, adapter->eni_stats_supported);
+		break;
+	}
 }
 
 static int ena_get_link_ksettings(struct net_device *netdev,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 261680aba33c..cd6ea59c543c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1426,9 +1426,9 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 				  u32 descs,
 				  u16 *next_to_clean)
 {
-	struct sk_buff *skb;
 	struct ena_rx_buffer *rx_info;
 	u16 len, req_id, buf = 0;
+	struct sk_buff *skb;
 	void *page_addr;
 	u32 page_offset;
 	void *data_addr;

From 17f59244029bf9c0673725efdd0386ed95e127a7 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Tue, 8 Jun 2021 21:08:27 +0800
Subject: [PATCH 1225/3292] net: hns3: add support for handling all errors
 through MSI-X

Currently, hardware errors can be reported through AER or MSI-X mode.
However, the AER mode is intended to handle only bus errors, but not
hardware errors. On the other hand, virtual machines cannot handle
AER errors. When an AER error is reported, virtual machines will be
suspended. So add support for handling all these hardware errors
through MSI-X mode which depends on a newer version of firmware,
and reserve the handler of the AER mode for compatibility.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_err.c         | 16 +++++++
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 47 ++++++++++---------
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  1 +
 3 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index f125aa425872..540dd15d7771 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1611,11 +1611,27 @@ static const struct hclge_hw_blk hw_blk[] = {
 	{ /* sentinel */ }
 };
 
+static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable)
+{
+	u32 reg_val;
+
+	reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
+
+	if (enable)
+		reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
+	else
+		reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
+
+	hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val);
+}
+
 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
 {
 	const struct hclge_hw_blk *module = hw_blk;
 	int ret = 0;
 
+	hclge_config_all_msix_error(hdev, state);
+
 	while (module->name) {
 		if (module->config_err_int) {
 			ret = module->config_err_int(hdev, state);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 45102681bd2a..d5be3bc50b5c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3307,11 +3307,13 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
 
 static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 {
-	u32 cmdq_src_reg, msix_src_reg;
+	u32 cmdq_src_reg, msix_src_reg, hw_err_src_reg;
 
 	/* fetch the events from their corresponding regs */
 	cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
 	msix_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
+	hw_err_src_reg = hclge_read_dev(&hdev->hw,
+					HCLGE_RAS_PF_OTHER_INT_STS_REG);
 
 	/* Assumption: If by any chance reset and mailbox events are reported
 	 * together then we will only process reset event in this go and will
@@ -3339,11 +3341,11 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 		return HCLGE_VECTOR0_EVENT_RST;
 	}
 
-	/* check for vector0 msix event source */
-	if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
-		*clearval = msix_src_reg;
+	/* check for vector0 msix event and hardware error event source */
+	if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK ||
+	    hw_err_src_reg & HCLGE_RAS_REG_NFE_MASK ||
+	    hw_err_src_reg & HCLGE_RAS_REG_ROCEE_ERR_MASK)
 		return HCLGE_VECTOR0_EVENT_ERR;
-	}
 
 	/* check for vector0 mailbox(=CMDQ RX) event source */
 	if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
@@ -3354,9 +3356,8 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 
 	/* print other vector0 event source */
 	dev_info(&hdev->pdev->dev,
-		 "CMDQ INT status:0x%x, other INT status:0x%x\n",
-		 cmdq_src_reg, msix_src_reg);
-	*clearval = msix_src_reg;
+		 "INT status: CMDQ(%#x) HW errors(%#x) other(%#x)\n",
+		 cmdq_src_reg, hw_err_src_reg, msix_src_reg);
 
 	return HCLGE_VECTOR0_EVENT_OTHER;
 }
@@ -3427,15 +3428,10 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 
 	hclge_clear_event_cause(hdev, event_cause, clearval);
 
-	/* Enable interrupt if it is not cause by reset. And when
-	 * clearval equal to 0, it means interrupt status may be
-	 * cleared by hardware before driver reads status register.
-	 * For this case, vector0 interrupt also should be enabled.
-	 */
-	if (!clearval ||
-	    event_cause == HCLGE_VECTOR0_EVENT_MBX) {
+	/* Enable interrupt if it is not caused by reset event or error event */
+	if (event_cause == HCLGE_VECTOR0_EVENT_MBX ||
+	    event_cause == HCLGE_VECTOR0_EVENT_OTHER)
 		hclge_enable_vector(&hdev->misc_vector, true);
-	}
 
 	return IRQ_HANDLED;
 }
@@ -4244,22 +4240,27 @@ static void hclge_misc_err_recovery(struct hclge_dev *hdev)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	struct device *dev = &hdev->pdev->dev;
+	enum hnae3_reset_type reset_type;
 	u32 msix_sts_reg;
 
 	msix_sts_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
-
 	if (msix_sts_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
-		if (hclge_handle_hw_msix_error(hdev,
-					       &hdev->default_reset_request))
+		if (hclge_handle_hw_msix_error
+				(hdev, &hdev->default_reset_request))
 			dev_info(dev, "received msix interrupt 0x%x\n",
 				 msix_sts_reg);
+	}
+	hclge_enable_vector(&hdev->misc_vector, true);
 
-		if (hdev->default_reset_request)
-			if (ae_dev->ops->reset_event)
-				ae_dev->ops->reset_event(hdev->pdev, NULL);
+	hclge_handle_hw_ras_error(ae_dev);
+	if (ae_dev->hw_err_reset_req) {
+		reset_type = hclge_get_reset_level(ae_dev,
+						   &ae_dev->hw_err_reset_req);
+		hclge_set_def_reset_request(ae_dev, reset_type);
 	}
 
-	hclge_enable_vector(&hdev->misc_vector, true);
+	if (hdev->default_reset_request && ae_dev->ops->reset_event)
+		ae_dev->ops->reset_event(hdev->pdev, NULL);
 }
 
 static void hclge_errhand_service_task(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9b8abb5d7a8e..582972a6f60e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -190,6 +190,7 @@ enum HLCGE_PORT_TYPE {
 #define HCLGE_VECTOR0_IMP_RESET_INT_B	1
 #define HCLGE_VECTOR0_IMP_CMDQ_ERR_B	4U
 #define HCLGE_VECTOR0_IMP_RD_POISON_B	5U
+#define HCLGE_VECTOR0_ALL_MSIX_ERR_B	6U
 
 #define HCLGE_MAC_DEFAULT_FRAME \
 	(ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN)

From 2e2deee7618b062efe3aba9fcb017dadcf148819 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Tue, 8 Jun 2021 21:08:28 +0800
Subject: [PATCH 1226/3292] net: hns3: add the RAS compatibility adaptation
 solution

To adapt to hardware modification and ensure that the driver is
compatible with the original error handling content, we need to add the
RAS compatibility adaptation solution.

Add a processing branch to the driver during error handling. In the new
processing branch, NIC fault information is integrated by the IMP. An
interaction command is added between the driver and IMP to query
and clear the fault source and interrupt source. The IMP integrates
error information and reports the highest reset level to the driver.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_cmd.c         |   3 +-
 .../hisilicon/hns3/hns3pf/hclge_cmd.h         |   2 +
 .../hisilicon/hns3/hns3pf/hclge_err.c         | 320 ++++++++++++++++--
 .../hisilicon/hns3/hns3pf/hclge_err.h         |  69 ++++
 .../hisilicon/hns3/hns3pf/hclge_main.c        |  54 ++-
 5 files changed, 409 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 8f6ed8577aea..614763f5e877 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -178,7 +178,8 @@ static bool hclge_is_special_opcode(u16 opcode)
 			     HCLGE_QUERY_CLEAR_MPF_RAS_INT,
 			     HCLGE_QUERY_CLEAR_PF_RAS_INT,
 			     HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
-			     HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT};
+			     HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
+			     HCLGE_QUERY_ALL_ERR_INFO};
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index da78a6477e46..234f0a3beec1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -293,6 +293,8 @@ enum hclge_opcode_type {
 	HCLGE_QUERY_MSIX_INT_STS_BD_NUM	= 0x1513,
 	HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT	= 0x1514,
 	HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT	= 0x1515,
+	HCLGE_QUERY_ALL_ERR_BD_NUM		= 0x1516,
+	HCLGE_QUERY_ALL_ERR_INFO		= 0x1517,
 	HCLGE_CONFIG_ROCEE_RAS_INT_EN	= 0x1580,
 	HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
 	HCLGE_ROCEE_PF_RAS_INT_CMD	= 0x1584,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 540dd15d7771..36f8055bd859 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -631,6 +631,98 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
 	{ /* sentinel */ }
 };
 
+static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
+	{
+		.module_id = MODULE_NONE,
+		.msg = "MODULE_NONE"
+	}, {
+		.module_id = MODULE_BIOS_COMMON,
+		.msg = "MODULE_BIOS_COMMON"
+	}, {
+		.module_id = MODULE_GE,
+		.msg = "MODULE_GE"
+	}, {
+		.module_id = MODULE_IGU_EGU,
+		.msg = "MODULE_IGU_EGU"
+	}, {
+		.module_id = MODULE_LGE,
+		.msg = "MODULE_LGE"
+	}, {
+		.module_id = MODULE_NCSI,
+		.msg = "MODULE_NCSI"
+	}, {
+		.module_id = MODULE_PPP,
+		.msg = "MODULE_PPP"
+	}, {
+		.module_id = MODULE_QCN,
+		.msg = "MODULE_QCN"
+	}, {
+		.module_id = MODULE_RCB_RX,
+		.msg = "MODULE_RCB_RX"
+	}, {
+		.module_id = MODULE_RTC,
+		.msg = "MODULE_RTC"
+	}, {
+		.module_id = MODULE_SSU,
+		.msg = "MODULE_SSU"
+	}, {
+		.module_id = MODULE_TM,
+		.msg = "MODULE_TM"
+	}, {
+		.module_id = MODULE_RCB_TX,
+		.msg = "MODULE_RCB_TX"
+	}, {
+		.module_id = MODULE_TXDMA,
+		.msg = "MODULE_TXDMA"
+	}, {
+		.module_id = MODULE_MASTER,
+		.msg = "MODULE_MASTER"
+	}
+};
+
+static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
+	{
+		.type_id = NONE_ERROR,
+		.msg = "none_error"
+	}, {
+		.type_id = FIFO_ERROR,
+		.msg = "fifo_error"
+	}, {
+		.type_id = MEMORY_ERROR,
+		.msg = "memory_error"
+	}, {
+		.type_id = POISON_ERROR,
+		.msg = "poison_error"
+	}, {
+		.type_id = MSIX_ECC_ERROR,
+		.msg = "msix_ecc_error"
+	}, {
+		.type_id = TQP_INT_ECC_ERROR,
+		.msg = "tqp_int_ecc_error"
+	}, {
+		.type_id = PF_ABNORMAL_INT_ERROR,
+		.msg = "pf_abnormal_int_error"
+	}, {
+		.type_id = MPF_ABNORMAL_INT_ERROR,
+		.msg = "mpf_abnormal_int_error"
+	}, {
+		.type_id = COMMON_ERROR,
+		.msg = "common_error"
+	}, {
+		.type_id = PORT_ERROR,
+		.msg = "port_error"
+	}, {
+		.type_id = ETS_ERROR,
+		.msg = "ets_error"
+	}, {
+		.type_id = NCSI_ERROR,
+		.msg = "ncsi_error"
+	}, {
+		.type_id = GLB_ERROR,
+		.msg = "glb_error"
+	}
+};
+
 static void hclge_log_error(struct device *dev, char *reg,
 			    const struct hclge_hw_error *err,
 			    u32 err_sts, unsigned long *reset_requests)
@@ -1892,11 +1984,8 @@ static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
 					  unsigned long *reset_requests)
 {
-	struct hclge_mac_tnl_stats mac_tnl_stats;
-	struct device *dev = &hdev->pdev->dev;
 	u32 mpf_bd_num, pf_bd_num, bd_num;
 	struct hclge_desc *desc;
-	u32 status;
 	int ret;
 
 	/* query the number of bds for the MSIx int status */
@@ -1919,29 +2008,7 @@ static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
 	if (ret)
 		goto msi_error;
 
-	/* query and clear mac tnl interruptions */
-	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
-				   true);
-	ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
-	if (ret) {
-		dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
-		goto msi_error;
-	}
-
-	status = le32_to_cpu(desc->data[0]);
-	if (status) {
-		/* When mac tnl interrupt occurs, we record current time and
-		 * register status here in a fifo, then clear the status. So
-		 * that if link status changes suddenly at some time, we can
-		 * query them by debugfs.
-		 */
-		mac_tnl_stats.time = local_clock();
-		mac_tnl_stats.status = status;
-		kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
-		ret = hclge_clear_mac_tnl_int(hdev);
-		if (ret)
-			dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
-	}
+	ret = hclge_handle_mac_tnl(hdev);
 
 msi_error:
 	kfree(desc);
@@ -1963,10 +2030,43 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
 	return hclge_handle_all_hw_msix_error(hdev, reset_requests);
 }
 
+int hclge_handle_mac_tnl(struct hclge_dev *hdev)
+{
+	struct hclge_mac_tnl_stats mac_tnl_stats;
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc;
+	u32 status;
+	int ret;
+
+	/* query and clear mac tnl interruptions */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret);
+		return ret;
+	}
+
+	status = le32_to_cpu(desc.data[0]);
+	if (status) {
+		/* When mac tnl interrupt occurs, we record current time and
+		 * register status here in a fifo, then clear the status. So
+		 * that if link status changes suddenly at some time, we can
+		 * query them by debugfs.
+		 */
+		mac_tnl_stats.time = local_clock();
+		mac_tnl_stats.status = status;
+		kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
+		ret = hclge_clear_mac_tnl_int(hdev);
+		if (ret)
+			dev_err(dev, "failed to clear mac tnl int, ret = %d.\n",
+				ret);
+	}
+
+	return ret;
+}
+
 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
 {
-#define HCLGE_DESC_NO_DATA_LEN 8
-
 	struct hclge_dev *hdev = ae_dev->priv;
 	struct device *dev = &hdev->pdev->dev;
 	u32 mpf_bd_num, pf_bd_num, bd_num;
@@ -2015,3 +2115,167 @@ void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
 msi_error:
 	kfree(desc);
 }
+
+static void
+hclge_handle_error_type_reg_log(struct device *dev,
+				struct hclge_mod_err_info *mod_info,
+				struct hclge_type_reg_err_info *type_reg_info)
+{
+#define HCLGE_ERR_TYPE_MASK 0x7F
+#define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
+
+	u8 mod_id, total_module, type_id, total_type, i, is_ras;
+
+	mod_id = mod_info->mod_id;
+	type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
+	is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET;
+
+	total_module = ARRAY_SIZE(hclge_hw_module_id_st);
+	total_type = ARRAY_SIZE(hclge_hw_type_id_st);
+
+	if (mod_id < total_module && type_id < total_type)
+		dev_err(dev,
+			"found %s %s, is %s error.\n",
+			hclge_hw_module_id_st[mod_id].msg,
+			hclge_hw_type_id_st[type_id].msg,
+			is_ras ? "ras" : "msix");
+	else
+		dev_err(dev,
+			"unknown module[%u] or type[%u].\n", mod_id, type_id);
+
+	dev_err(dev, "reg_value:\n");
+	for (i = 0; i < type_reg_info->reg_num; i++)
+		dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
+}
+
+static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
+					  const u32 *buf, u32 buf_size)
+{
+	struct hclge_type_reg_err_info *type_reg_info;
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_mod_err_info *mod_info;
+	struct hclge_sum_err_info *sum_info;
+	u8 mod_num, err_num, i;
+	u32 offset = 0;
+
+	sum_info = (struct hclge_sum_err_info *)&buf[offset++];
+	if (sum_info->reset_type &&
+	    sum_info->reset_type != HNAE3_NONE_RESET)
+		set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req);
+	mod_num = sum_info->mod_num;
+
+	while (mod_num--) {
+		if (offset >= buf_size) {
+			dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n",
+				offset, buf_size);
+			return;
+		}
+		mod_info = (struct hclge_mod_err_info *)&buf[offset++];
+		err_num = mod_info->err_num;
+
+		for (i = 0; i < err_num; i++) {
+			if (offset >= buf_size) {
+				dev_err(dev,
+					"The offset(%u) exceeds buf size(%u).\n",
+					offset, buf_size);
+				return;
+			}
+
+			type_reg_info = (struct hclge_type_reg_err_info *)
+					    &buf[offset++];
+			hclge_handle_error_type_reg_log(dev, mod_info,
+							type_reg_info);
+
+			offset += type_reg_info->reg_num;
+		}
+	}
+}
+
+static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc_bd;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+	if (ret) {
+		dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret);
+		return ret;
+	}
+
+	*bd_num = le32_to_cpu(desc_bd.data[0]);
+	if (!(*bd_num)) {
+		dev_err(dev, "The value of bd_num is 0!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hclge_query_all_err_info(struct hclge_dev *hdev,
+				    struct hclge_desc *desc, u32 bd_num)
+{
+	struct device *dev = &hdev->pdev->dev;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
+	if (ret)
+		dev_err(dev, "failed to query error info, ret = %d.\n", ret);
+
+	return ret;
+}
+
+int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
+{
+	u32 bd_num, desc_len, buf_len, buf_size, i;
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct hclge_desc *desc;
+	__le32 *desc_data;
+	u32 *buf;
+	int ret;
+
+	ret = hclge_query_all_err_bd_num(hdev, &bd_num);
+	if (ret)
+		goto out;
+
+	desc_len = bd_num * sizeof(struct hclge_desc);
+	desc = kzalloc(desc_len, GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = hclge_query_all_err_info(hdev, desc, bd_num);
+	if (ret)
+		goto err_desc;
+
+	buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN;
+	buf_size = buf_len / sizeof(u32);
+
+	desc_data = kzalloc(buf_len, GFP_KERNEL);
+	if (!desc_data)
+		return -ENOMEM;
+
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf) {
+		ret = -ENOMEM;
+		goto err_buf_alloc;
+	}
+
+	memcpy(desc_data, &desc[0].data[0], buf_len);
+	for (i = 0; i < buf_size; i++)
+		buf[i] = le32_to_cpu(desc_data[i]);
+
+	hclge_handle_error_module_log(ae_dev, buf, buf_size);
+	kfree(buf);
+
+err_buf_alloc:
+	kfree(desc_data);
+err_desc:
+	kfree(desc);
+out:
+	return ret;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index d647f3c84134..27ab772c665e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -107,6 +107,10 @@
 #define HCLGE_ROCEE_OVF_ERR_INT_MASK		0x10000
 #define HCLGE_ROCEE_OVF_ERR_TYPE_MASK		0x3F
 
+#define HCLGE_DESC_DATA_MAX			8
+#define HCLGE_REG_NUM_MAX			256
+#define HCLGE_DESC_NO_DATA_LEN			8
+
 enum hclge_err_int_type {
 	HCLGE_ERR_INT_MSIX = 0,
 	HCLGE_ERR_INT_RAS_CE = 1,
@@ -114,6 +118,40 @@ enum hclge_err_int_type {
 	HCLGE_ERR_INT_RAS_FE = 3,
 };
 
+enum hclge_mod_name_list {
+	MODULE_NONE		= 0,
+	MODULE_BIOS_COMMON	= 1,
+	MODULE_GE		= 2,
+	MODULE_IGU_EGU		= 3,
+	MODULE_LGE		= 4,
+	MODULE_NCSI		= 5,
+	MODULE_PPP		= 6,
+	MODULE_QCN		= 7,
+	MODULE_RCB_RX		= 8,
+	MODULE_RTC		= 9,
+	MODULE_SSU		= 10,
+	MODULE_TM		= 11,
+	MODULE_RCB_TX		= 12,
+	MODULE_TXDMA		= 13,
+	MODULE_MASTER		= 14,
+};
+
+enum hclge_err_type_list {
+	NONE_ERROR		= 0,
+	FIFO_ERROR		= 1,
+	MEMORY_ERROR		= 2,
+	POISON_ERROR		= 3,
+	MSIX_ECC_ERROR		= 4,
+	TQP_INT_ECC_ERROR	= 5,
+	PF_ABNORMAL_INT_ERROR	= 6,
+	MPF_ABNORMAL_INT_ERROR	= 7,
+	COMMON_ERROR		= 8,
+	PORT_ERROR		= 9,
+	ETS_ERROR		= 10,
+	NCSI_ERROR		= 11,
+	GLB_ERROR		= 12,
+};
+
 struct hclge_hw_blk {
 	u32 msk;
 	const char *name;
@@ -126,6 +164,35 @@ struct hclge_hw_error {
 	enum hnae3_reset_type reset_level;
 };
 
+struct hclge_hw_module_id {
+	enum hclge_mod_name_list module_id;
+	const char *msg;
+};
+
+struct hclge_hw_type_id {
+	enum hclge_err_type_list type_id;
+	const char *msg;
+};
+
+struct hclge_sum_err_info {
+	u8 reset_type;
+	u8 mod_num;
+	u8 rsv[2];
+};
+
+struct hclge_mod_err_info {
+	u8 mod_id;
+	u8 err_num;
+	u8 rsv[2];
+};
+
+struct hclge_type_reg_err_info {
+	u8 type_id;
+	u8 reg_num;
+	u8 rsv[2];
+	u32 hclge_reg[HCLGE_REG_NUM_MAX];
+};
+
 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en);
@@ -133,4 +200,6 @@ void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev);
 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
 int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
 			       unsigned long *reset_requests);
+int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev);
+int hclge_handle_mac_tnl(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d5be3bc50b5c..3c08fc71b951 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4236,11 +4236,49 @@ static void hclge_reset_subtask(struct hclge_dev *hdev)
 	hdev->reset_type = HNAE3_NONE_RESET;
 }
 
+static void hclge_handle_err_reset_request(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	enum hnae3_reset_type reset_type;
+
+	if (ae_dev->hw_err_reset_req) {
+		reset_type = hclge_get_reset_level(ae_dev,
+						   &ae_dev->hw_err_reset_req);
+		hclge_set_def_reset_request(ae_dev, reset_type);
+	}
+
+	if (hdev->default_reset_request && ae_dev->ops->reset_event)
+		ae_dev->ops->reset_event(hdev->pdev, NULL);
+
+	/* enable interrupt after error handling complete */
+	hclge_enable_vector(&hdev->misc_vector, true);
+}
+
+static void hclge_handle_err_recovery(struct hclge_dev *hdev)
+{
+	u32 mask_val = HCLGE_RAS_REG_NFE_MASK | HCLGE_RAS_REG_ROCEE_ERR_MASK;
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	u32 msix_src_flag, hw_err_src_flag;
+
+	msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
+			HCLGE_VECTOR0_REG_MSIX_MASK;
+
+	hw_err_src_flag = hclge_read_dev(&hdev->hw,
+					 HCLGE_RAS_PF_OTHER_INT_STS_REG) &
+			  mask_val;
+
+	if (msix_src_flag || hw_err_src_flag) {
+		hclge_handle_error_info_log(ae_dev);
+		hclge_handle_mac_tnl(hdev);
+	}
+
+	hclge_handle_err_reset_request(hdev);
+}
+
 static void hclge_misc_err_recovery(struct hclge_dev *hdev)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	struct device *dev = &hdev->pdev->dev;
-	enum hnae3_reset_type reset_type;
 	u32 msix_sts_reg;
 
 	msix_sts_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
@@ -4250,17 +4288,10 @@ static void hclge_misc_err_recovery(struct hclge_dev *hdev)
 			dev_info(dev, "received msix interrupt 0x%x\n",
 				 msix_sts_reg);
 	}
-	hclge_enable_vector(&hdev->misc_vector, true);
 
 	hclge_handle_hw_ras_error(ae_dev);
-	if (ae_dev->hw_err_reset_req) {
-		reset_type = hclge_get_reset_level(ae_dev,
-						   &ae_dev->hw_err_reset_req);
-		hclge_set_def_reset_request(ae_dev, reset_type);
-	}
 
-	if (hdev->default_reset_request && ae_dev->ops->reset_event)
-		ae_dev->ops->reset_event(hdev->pdev, NULL);
+	hclge_handle_err_reset_request(hdev);
 }
 
 static void hclge_errhand_service_task(struct hclge_dev *hdev)
@@ -4268,7 +4299,10 @@ static void hclge_errhand_service_task(struct hclge_dev *hdev)
 	if (!test_and_clear_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
 		return;
 
-	hclge_misc_err_recovery(hdev);
+	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
+		hclge_handle_err_recovery(hdev);
+	else
+		hclge_misc_err_recovery(hdev);
 }
 
 static void hclge_reset_service_task(struct hclge_dev *hdev)

From e65e9f5c2e4efc17657d016d767eb7010d9dd598 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Tue, 8 Jun 2021 21:08:29 +0800
Subject: [PATCH 1227/3292] net: hns3: add support for imp-handle ras
 capability

IMP(Intelligent Management Processor) firmware add a new feature to
handle and consolidate RAS information for new devices, NIC driver
only needs to query the reported RAS information. NIC driver adds
support for this feature.

Driver queries device capability to check whether IMP support this
feature, If yes, execute the new RAS processing branch.

In order to add a method to check whether PF supports imp-handle RAS
feature, add dumping this info in debugfs.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             | 4 ++++
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c      | 3 +++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c  | 2 ++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 5 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index dc9b5bc3431b..e564aa32a414 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -91,6 +91,7 @@ enum HNAE3_DEV_CAP_BITS {
 	HNAE3_DEV_SUPPORT_STASH_B,
 	HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
 	HNAE3_DEV_SUPPORT_PAUSE_B,
+	HNAE3_DEV_SUPPORT_RAS_IMP_B,
 	HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
 	HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B,
 	HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B,
@@ -129,6 +130,9 @@ enum HNAE3_DEV_CAP_BITS {
 #define hnae3_dev_phy_imp_supported(hdev) \
 	test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, (hdev)->ae_dev->caps)
 
+#define hnae3_dev_ras_imp_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, (hdev)->ae_dev->caps)
+
 #define hnae3_dev_tqp_txrx_indep_supported(hdev) \
 	test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (hdev)->ae_dev->caps)
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index cf1efd2f4a0f..a0edca848392 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -349,6 +349,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
 	}, {
 		.name = "support imp-controlled PHY",
 		.cap_bit = HNAE3_DEV_SUPPORT_PHY_IMP_B,
+	}, {
+		.name = "support imp-controlled RAS",
+		.cap_bit = HNAE3_DEV_SUPPORT_RAS_IMP_B,
 	}, {
 		.name = "support rxd advanced layout",
 		.cap_bit = HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 614763f5e877..887297e37cf3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -387,6 +387,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
 		set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_RAS_IMP_B))
+		set_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_RXD_ADV_LAYOUT_B))
 		set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_PORT_VLAN_BYPASS_B)) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 234f0a3beec1..221811af9473 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -392,6 +392,7 @@ enum HCLGE_CAP_BITS {
 	HCLGE_CAP_HW_PAD_B,
 	HCLGE_CAP_STASH_B,
 	HCLGE_CAP_UDP_TUNNEL_CSUM_B,
+	HCLGE_CAP_RAS_IMP_B = 12,
 	HCLGE_CAP_FEC_B = 13,
 	HCLGE_CAP_PAUSE_B = 14,
 	HCLGE_CAP_RXD_ADV_LAYOUT_B = 15,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 3c08fc71b951..cf34216df171 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4299,7 +4299,7 @@ static void hclge_errhand_service_task(struct hclge_dev *hdev)
 	if (!test_and_clear_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
 		return;
 
-	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
+	if (hnae3_dev_ras_imp_supported(hdev))
 		hclge_handle_err_recovery(hdev);
 	else
 		hclge_misc_err_recovery(hdev);

From 8a95e360fd512f1cb55239645879b15d26bc7e21 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Tue, 8 Jun 2021 21:08:30 +0800
Subject: [PATCH 1228/3292] net: hns3: update error recovery module and type

Update error recovery module and type for RoCE.

The enumeration values of module names and error types are not sorted
in sequence. If use the current printing mode, they cannot be correctly
printed.

Use the index mode, If mod_id and type_id match the enumerated value,
display the corresponding information.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_err.c         | 58 ++++++++++++++++++-
 .../hisilicon/hns3/hns3pf/hclge_err.h         | 18 ++++++
 .../hisilicon/hns3/hns3pf/hclge_main.c        |  3 +-
 3 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 36f8055bd859..0e942d11dbf3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -677,6 +677,36 @@ static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
 	}, {
 		.module_id = MODULE_MASTER,
 		.msg = "MODULE_MASTER"
+	}, {
+		.module_id = MODULE_ROCEE_TOP,
+		.msg = "MODULE_ROCEE_TOP"
+	}, {
+		.module_id = MODULE_ROCEE_TIMER,
+		.msg = "MODULE_ROCEE_TIMER"
+	}, {
+		.module_id = MODULE_ROCEE_MDB,
+		.msg = "MODULE_ROCEE_MDB"
+	}, {
+		.module_id = MODULE_ROCEE_TSP,
+		.msg = "MODULE_ROCEE_TSP"
+	}, {
+		.module_id = MODULE_ROCEE_TRP,
+		.msg = "MODULE_ROCEE_TRP"
+	}, {
+		.module_id = MODULE_ROCEE_SCC,
+		.msg = "MODULE_ROCEE_SCC"
+	}, {
+		.module_id = MODULE_ROCEE_CAEP,
+		.msg = "MODULE_ROCEE_CAEP"
+	}, {
+		.module_id = MODULE_ROCEE_GEN_AC,
+		.msg = "MODULE_ROCEE_GEN_AC"
+	}, {
+		.module_id = MODULE_ROCEE_QMM,
+		.msg = "MODULE_ROCEE_QMM"
+	}, {
+		.module_id = MODULE_ROCEE_LSAN,
+		.msg = "MODULE_ROCEE_LSAN"
 	}
 };
 
@@ -720,6 +750,12 @@ static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
 	}, {
 		.type_id = GLB_ERROR,
 		.msg = "glb_error"
+	}, {
+		.type_id = ROCEE_NORMAL_ERR,
+		.msg = "rocee_normal_error"
+	}, {
+		.type_id = ROCEE_OVF_ERR,
+		.msg = "rocee_ovf_error"
 	}
 };
 
@@ -2125,6 +2161,8 @@ hclge_handle_error_type_reg_log(struct device *dev,
 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
 
 	u8 mod_id, total_module, type_id, total_type, i, is_ras;
+	u8 index_module = MODULE_NONE;
+	u8 index_type = NONE_ERROR;
 
 	mod_id = mod_info->mod_id;
 	type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
@@ -2133,11 +2171,25 @@ hclge_handle_error_type_reg_log(struct device *dev,
 	total_module = ARRAY_SIZE(hclge_hw_module_id_st);
 	total_type = ARRAY_SIZE(hclge_hw_type_id_st);
 
-	if (mod_id < total_module && type_id < total_type)
+	for (i = 0; i < total_module; i++) {
+		if (mod_id == hclge_hw_module_id_st[i].module_id) {
+			index_module = i;
+			break;
+		}
+	}
+
+	for (i = 0; i < total_type; i++) {
+		if (type_id == hclge_hw_type_id_st[i].type_id) {
+			index_type = i;
+			break;
+		}
+	}
+
+	if (index_module != MODULE_NONE && index_type != NONE_ERROR)
 		dev_err(dev,
 			"found %s %s, is %s error.\n",
-			hclge_hw_module_id_st[mod_id].msg,
-			hclge_hw_type_id_st[type_id].msg,
+			hclge_hw_module_id_st[index_module].msg,
+			hclge_hw_type_id_st[index_type].msg,
 			is_ras ? "ras" : "msix");
 	else
 		dev_err(dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 27ab772c665e..ce4c96bbef8e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -15,6 +15,8 @@
 #define HCLGE_RAS_PF_OTHER_INT_STS_REG   0x20B00
 #define HCLGE_RAS_REG_NFE_MASK   0xFF00
 #define HCLGE_RAS_REG_ROCEE_ERR_MASK   0x3000000
+#define HCLGE_RAS_REG_ERR_MASK \
+	(HCLGE_RAS_REG_NFE_MASK | HCLGE_RAS_REG_ROCEE_ERR_MASK)
 
 #define HCLGE_VECTOR0_REG_MSIX_MASK   0x1FF00
 
@@ -134,6 +136,18 @@ enum hclge_mod_name_list {
 	MODULE_RCB_TX		= 12,
 	MODULE_TXDMA		= 13,
 	MODULE_MASTER		= 14,
+	/* add new MODULE NAME for NIC here in order */
+	MODULE_ROCEE_TOP	= 40,
+	MODULE_ROCEE_TIMER	= 41,
+	MODULE_ROCEE_MDB	= 42,
+	MODULE_ROCEE_TSP	= 43,
+	MODULE_ROCEE_TRP	= 44,
+	MODULE_ROCEE_SCC	= 45,
+	MODULE_ROCEE_CAEP	= 46,
+	MODULE_ROCEE_GEN_AC	= 47,
+	MODULE_ROCEE_QMM	= 48,
+	MODULE_ROCEE_LSAN	= 49,
+	/* add new MODULE NAME for RoCEE here in order */
 };
 
 enum hclge_err_type_list {
@@ -150,6 +164,10 @@ enum hclge_err_type_list {
 	ETS_ERROR		= 10,
 	NCSI_ERROR		= 11,
 	GLB_ERROR		= 12,
+	/* add new ERROR TYPE for NIC here in order */
+	ROCEE_NORMAL_ERR	= 40,
+	ROCEE_OVF_ERR		= 41,
+	/* add new ERROR TYPE for ROCEE here in order */
 };
 
 struct hclge_hw_blk {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index cf34216df171..9ff4210f6477 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3343,8 +3343,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 
 	/* check for vector0 msix event and hardware error event source */
 	if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK ||
-	    hw_err_src_reg & HCLGE_RAS_REG_NFE_MASK ||
-	    hw_err_src_reg & HCLGE_RAS_REG_ROCEE_ERR_MASK)
+	    hw_err_src_reg & HCLGE_RAS_REG_ERR_MASK)
 		return HCLGE_VECTOR0_EVENT_ERR;
 
 	/* check for vector0 mailbox(=CMDQ RX) event source */

From 1c360a4a077fc0f74a350fe2ef267cbe8a9388e3 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Tue, 8 Jun 2021 21:08:31 +0800
Subject: [PATCH 1229/3292] net: hns3: add error handling compatibility during
 initialization

During initialization, the driver logs and clears the hw errors that
already occurred. For device supports imp-handle ras capability, it
needs handle different error status, otherwise it may cause wrong reset.

So fix it by adding a new processing branch.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_err.c         | 22 +++++++++++++++++++
 .../hisilicon/hns3/hns3pf/hclge_err.h         |  2 ++
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 21 +++++++++---------
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 0e942d11dbf3..bad9fda19398 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -2152,6 +2152,28 @@ msi_error:
 	kfree(desc);
 }
 
+bool hclge_find_error_source(struct hclge_dev *hdev)
+{
+	u32 msix_src_flag, hw_err_src_flag;
+
+	msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
+			HCLGE_VECTOR0_REG_MSIX_MASK;
+
+	hw_err_src_flag = hclge_read_dev(&hdev->hw,
+					 HCLGE_RAS_PF_OTHER_INT_STS_REG) &
+			  HCLGE_RAS_REG_ERR_MASK;
+
+	return msix_src_flag || hw_err_src_flag;
+}
+
+void hclge_handle_occurred_error(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
+	if (hclge_find_error_source(hdev))
+		hclge_handle_error_info_log(ae_dev);
+}
+
 static void
 hclge_handle_error_type_reg_log(struct device *dev,
 				struct hclge_mod_err_info *mod_info,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index ce4c96bbef8e..07987fb8332e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -215,6 +215,8 @@ int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en);
 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev);
+bool hclge_find_error_source(struct hclge_dev *hdev);
+void hclge_handle_occurred_error(struct hclge_dev *hdev);
 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
 int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
 			       unsigned long *reset_requests);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 9ff4210f6477..d960e08850ae 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4255,18 +4255,11 @@ static void hclge_handle_err_reset_request(struct hclge_dev *hdev)
 
 static void hclge_handle_err_recovery(struct hclge_dev *hdev)
 {
-	u32 mask_val = HCLGE_RAS_REG_NFE_MASK | HCLGE_RAS_REG_ROCEE_ERR_MASK;
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
-	u32 msix_src_flag, hw_err_src_flag;
 
-	msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
-			HCLGE_VECTOR0_REG_MSIX_MASK;
+	ae_dev->hw_err_reset_req = 0;
 
-	hw_err_src_flag = hclge_read_dev(&hdev->hw,
-					 HCLGE_RAS_PF_OTHER_INT_STS_REG) &
-			  mask_val;
-
-	if (msix_src_flag || hw_err_src_flag) {
+	if (hclge_find_error_source(hdev)) {
 		hclge_handle_error_info_log(ae_dev);
 		hclge_handle_mac_tnl(hdev);
 	}
@@ -11558,7 +11551,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_clear_resetting_state(hdev);
 
 	/* Log and clear the hw errors those already occurred */
-	hclge_handle_all_hns_hw_errors(ae_dev);
+	if (hnae3_dev_ras_imp_supported(hdev))
+		hclge_handle_occurred_error(hdev);
+	else
+		hclge_handle_all_hns_hw_errors(ae_dev);
 
 	/* request delayed reset for the error recovery because an immediate
 	 * global reset on a PF affecting pending initialization of other PFs
@@ -11911,7 +11907,10 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 	}
 
 	/* Log and clear the hw errors those already occurred */
-	hclge_handle_all_hns_hw_errors(ae_dev);
+	if (hnae3_dev_ras_imp_supported(hdev))
+		hclge_handle_occurred_error(hdev);
+	else
+		hclge_handle_all_hns_hw_errors(ae_dev);
 
 	/* Re-enable the hw error interrupts because
 	 * the interrupts get disabled on global reset.

From 36861d1f0408a431ede4184d90f7bf1598d639ca Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 8 Jun 2021 07:57:37 +0000
Subject: [PATCH 1230/3292] net: qede: Use list_for_each_entry() to simplify
 code

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_rdma.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 2f6598086d9b..6304514a6f2c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -247,12 +247,10 @@ static struct qede_rdma_event_work *
 qede_rdma_get_free_event_node(struct qede_dev *edev)
 {
 	struct qede_rdma_event_work *event_node = NULL;
-	struct list_head *list_node = NULL;
 	bool found = false;
 
-	list_for_each(list_node, &edev->rdma_info.rdma_event_list) {
-		event_node = list_entry(list_node, struct qede_rdma_event_work,
-					list);
+	list_for_each_entry(event_node, &edev->rdma_info.rdma_event_list,
+			    list) {
 		if (!work_pending(&event_node->work)) {
 			found = true;
 			break;

From 3835a6614ae7ee4840459bf47528a97b1dfc5439 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 8 Jun 2021 08:05:05 +0000
Subject: [PATCH 1231/3292] net: x25: Use list_for_each_entry() to simplify
 code in x25_link.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_link.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 57a81100c5da..5460b9146dd8 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -332,12 +332,9 @@ void x25_link_device_down(struct net_device *dev)
 struct x25_neigh *x25_get_neigh(struct net_device *dev)
 {
 	struct x25_neigh *nb, *use = NULL;
-	struct list_head *entry;
 
 	read_lock_bh(&x25_neigh_list_lock);
-	list_for_each(entry, &x25_neigh_list) {
-		nb = list_entry(entry, struct x25_neigh, node);
-
+	list_for_each_entry(nb, &x25_neigh_list, node) {
 		if (nb->dev == dev) {
 			use = nb;
 			break;

From e83332842a46c091992ad06145b5c1b65a08ab05 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 8 Jun 2021 08:13:01 +0000
Subject: [PATCH 1232/3292] net: lapb: Use list_for_each_entry() to simplify
 code in lapb_iface.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/lapb/lapb_iface.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 1078e14f1acf..0971ca48ba15 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -80,11 +80,9 @@ static void __lapb_insert_cb(struct lapb_cb *lapb)
 
 static struct lapb_cb *__lapb_devtostruct(struct net_device *dev)
 {
-	struct list_head *entry;
 	struct lapb_cb *lapb, *use = NULL;
 
-	list_for_each(entry, &lapb_list) {
-		lapb = list_entry(entry, struct lapb_cb, node);
+	list_for_each_entry(lapb, &lapb_list, node) {
 		if (lapb->dev == dev) {
 			use = lapb;
 			break;

From b55b1d50b08ce3b79329f62da8104b25c607bf38 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 16:10:37 +0300
Subject: [PATCH 1233/3292] net: stmmac: fix NPD with phylink_set_pcs if there
 is no MDIO bus

priv->plat->mdio_bus_data is optional, some platforms may not set it,
however we proceed to look straight at priv->plat->mdio_bus_data->has_xpcs.

Since the xpcs is instantiated based on the has_xpcs property, we can
avoid looking at the priv->plat->mdio_bus_data structure altogether and
just check for the presence of the xpcs pointer.

Fixes: 11059740e616 ("net: pcs: xpcs: convert to phylink_pcs_ops")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1b12a2f8bfb5..f2adff59b07d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1238,11 +1238,8 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 	if (IS_ERR(phylink))
 		return PTR_ERR(phylink);
 
-	if (mdio_bus_data->has_xpcs) {
-		struct mdio_xpcs_args *xpcs = priv->hw->xpcs;
-
-		phylink_set_pcs(phylink, &xpcs->pcs);
-	}
+	if (priv->hw->xpcs)
+		phylink_set_pcs(phylink, &priv->hw->xpcs->pcs);
 
 	priv->phylink = phylink;
 	return 0;

From 78595dfcb29b7426410b93c1400dca507e6e899e Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 8 Jun 2021 13:29:08 +0000
Subject: [PATCH 1234/3292] ethernet/qlogic: Use list_for_each_entry() to
 simplify code in qlcnic_hw.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index e1b8490bed0a..4b8bc46f55c2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -460,12 +460,10 @@ int qlcnic_82xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr,
 int qlcnic_nic_del_mac(struct qlcnic_adapter *adapter, const u8 *addr)
 {
 	struct qlcnic_mac_vlan_list *cur;
-	struct list_head *head;
 	int err = -EINVAL;
 
 	/* Delete MAC from the existing list */
-	list_for_each(head, &adapter->mac_list) {
-		cur = list_entry(head, struct qlcnic_mac_vlan_list, list);
+	list_for_each_entry(cur, &adapter->mac_list, list) {
 		if (ether_addr_equal(addr, cur->mac_addr)) {
 			err = qlcnic_sre_macaddr_change(adapter, cur->mac_addr,
 							0, QLCNIC_MAC_DEL);
@@ -483,11 +481,9 @@ int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, const u8 *addr, u16 vlan,
 		       enum qlcnic_mac_type mac_type)
 {
 	struct qlcnic_mac_vlan_list *cur;
-	struct list_head *head;
 
 	/* look up if already exists */
-	list_for_each(head, &adapter->mac_list) {
-		cur = list_entry(head, struct qlcnic_mac_vlan_list, list);
+	list_for_each_entry(cur, &adapter->mac_list, list) {
 		if (ether_addr_equal(addr, cur->mac_addr) &&
 		    cur->vlan_id == vlan)
 			return 0;

From 96bffe70231c871d1b39ecc44288c96bed66422b Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 8 Jun 2021 13:30:07 +0000
Subject: [PATCH 1235/3292] net: x25: Use list_for_each_entry() to simplify
 code in x25_forward.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_forward.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index d48ad6d29197..21b30b56e889 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -19,7 +19,6 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
 {
 	struct x25_route *rt;
 	struct x25_neigh *neigh_new = NULL;
-	struct list_head *entry;
 	struct x25_forward *x25_frwd, *new_frwd;
 	struct sk_buff *skbn;
 	short same_lci = 0;
@@ -46,8 +45,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
 	 * established LCI? It shouldn't happen, just in case..
 	 */
 	read_lock_bh(&x25_forward_list_lock);
-	list_for_each(entry, &x25_forward_list) {
-		x25_frwd = list_entry(entry, struct x25_forward, node);
+	list_for_each_entry(x25_frwd, &x25_forward_list, node) {
 		if (x25_frwd->lci == lci) {
 			pr_warn("call request for lci which is already registered!, transmitting but not registering new pair\n");
 			same_lci = 1;
@@ -92,15 +90,13 @@ out_no_route:
 int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) {
 
 	struct x25_forward *frwd;
-	struct list_head *entry;
 	struct net_device *peer = NULL;
 	struct x25_neigh *nb;
 	struct sk_buff *skbn;
 	int rc = 0;
 
 	read_lock_bh(&x25_forward_list_lock);
-	list_for_each(entry, &x25_forward_list) {
-		frwd = list_entry(entry, struct x25_forward, node);
+	list_for_each_entry(frwd, &x25_forward_list, node) {
 		if (frwd->lci == lci) {
 			/* The call is established, either side can send */
 			if (from->dev == frwd->dev1) {

From de274be32cb288d96b91494aeaafccc34cf4e00f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 14:16:51 +0300
Subject: [PATCH 1236/3292] net: dsa: felix: set TX flow control according to
 the phylink_mac_link_up resolution

Instead of relying on the static initialization done by ocelot_init_port()
which enables flow control unconditionally, set SYS_PAUSE_CFG_PAUSE_ENA
according to the parameters negotiated by the PHY.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index ce607fbaaa3a..a2a15919b960 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -940,6 +940,8 @@ static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
 
 	ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
 
+	ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
+
 	/* Undo the effects of felix_phylink_mac_link_down:
 	 * enable MAC module
 	 */

From 5b38b97f40a7bd8295260c59b997bf47b79c3675 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 8 Jun 2021 21:56:22 +0800
Subject: [PATCH 1237/3292] net: nixge: simplify code with devm platform
 functions

Use devm_platform_get_and_ioremap_resource() and
devm_platform_ioremap_resource_byname to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ni/nixge.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index a6861df9904f..2d097dcb7bda 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1224,7 +1224,6 @@ static int nixge_of_get_resources(struct platform_device *pdev)
 	const struct of_device_id *of_id;
 	enum nixge_version version;
 	struct resource *ctrlres;
-	struct resource *dmares;
 	struct net_device *ndev;
 	struct nixge_priv *priv;
 
@@ -1236,12 +1235,9 @@ static int nixge_of_get_resources(struct platform_device *pdev)
 
 	version = (enum nixge_version)of_id->data;
 	if (version <= NIXGE_V2)
-		dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		priv->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	else
-		dmares = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						      "dma");
-
-	priv->dma_regs = devm_ioremap_resource(&pdev->dev, dmares);
+		priv->dma_regs = devm_platform_ioremap_resource_byname(pdev, "dma");
 	if (IS_ERR(priv->dma_regs)) {
 		netdev_err(ndev, "failed to map dma regs\n");
 		return PTR_ERR(priv->dma_regs);

From 52481e585951f4a199678cd6e61f85db52548a01 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 8 Jun 2021 21:57:18 +0800
Subject: [PATCH 1238/3292] sh_eth: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index c5b154868c1f..177523be4fb6 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3225,9 +3225,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	int ret;
 
-	/* get base addr */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
 	ndev = alloc_etherdev(sizeof(struct sh_eth_private));
 	if (!ndev)
 		return -ENOMEM;
@@ -3245,7 +3242,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 	mdp = netdev_priv(ndev);
 	mdp->num_tx_ring = TX_RING_SIZE;
 	mdp->num_rx_ring = RX_RING_SIZE;
-	mdp->addr = devm_ioremap_resource(&pdev->dev, res);
+	mdp->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(mdp->addr)) {
 		ret = PTR_ERR(mdp->addr);
 		goto out_release;

From e67f325e9cd67562b761e884680c0fec03a6f404 Mon Sep 17 00:00:00 2001
From: Matthew Hagan <mnhagan88@gmail.com>
Date: Tue, 8 Jun 2021 19:59:06 +0100
Subject: [PATCH 1239/3292] net: stmmac: explicitly deassert GMAC_AHB_RESET

We are currently assuming that GMAC_AHB_RESET will already be deasserted
by the bootloader. However if this has not been done, probing of the GMAC
will fail. To remedy this we must ensure GMAC_AHB_RESET has been deasserted
prior to probing.

v2 changes:
 - remove NULL condition check for stmmac_ahb_rst in stmmac_main.c
 - unwrap dev_err() message in stmmac_main.c
 - add PTR_ERR() around plat->stmmac_ahb_rst in stmmac_platform.c

v3 changes:
 - add error pointer to dev_err() output
 - add reset_control_assert(stmmac_ahb_rst) in stmmac_dvr_remove
 - revert PTR_ERR() around plat->stmmac_ahb_rst since this is performed
   on the returned value of ret by the calling function

Signed-off-by: Matthew Hagan <mnhagan88@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c     | 6 ++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 7 +++++++
 include/linux/stmmac.h                                | 1 +
 3 files changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f2adff59b07d..1c881ec8cd04 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6843,6 +6843,11 @@ int stmmac_dvr_probe(struct device *device,
 			reset_control_reset(priv->plat->stmmac_rst);
 	}
 
+	ret = reset_control_deassert(priv->plat->stmmac_ahb_rst);
+	if (ret == -ENOTSUPP)
+		dev_err(priv->device, "unable to bring out of ahb reset: %pe\n",
+			ERR_PTR(ret));
+
 	/* Init MAC and get the capabilities */
 	ret = stmmac_hw_init(priv);
 	if (ret)
@@ -7086,6 +7091,7 @@ int stmmac_dvr_remove(struct device *dev)
 	phylink_destroy(priv->phylink);
 	if (priv->plat->stmmac_rst)
 		reset_control_assert(priv->plat->stmmac_rst);
+	reset_control_assert(priv->plat->stmmac_ahb_rst);
 	pm_runtime_put(dev);
 	pm_runtime_disable(dev);
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 97a1fedcc9ac..d8ae58bdbbe3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -600,6 +600,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 		goto error_hw_init;
 	}
 
+	plat->stmmac_ahb_rst = devm_reset_control_get_optional_shared(
+							&pdev->dev, "ahb");
+	if (IS_ERR(plat->stmmac_ahb_rst)) {
+		ret = plat->stmmac_ahb_rst;
+		goto error_hw_init;
+	}
+
 	return plat;
 
 error_hw_init:
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index b10be3385a30..3867980d1447 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -240,6 +240,7 @@ struct plat_stmmacenet_data {
 	unsigned int mult_fact_100ns;
 	s32 ptp_max_adj;
 	struct reset_control *stmmac_rst;
+	struct reset_control *stmmac_ahb_rst;
 	struct stmmac_axi *axi;
 	int has_gmac4;
 	bool has_sun8i;

From cf6b5ffdce5a78b2fcb0e53b3a2487c490bcbf7f Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Tue, 8 Jun 2021 11:40:57 +0200
Subject: [PATCH 1240/3292] netfilter: nft_exthdr: Fix for unsafe packet data
 read

While iterating through an SCTP packet's chunks, skb_header_pointer() is
called for the minimum expected chunk header size. If (that part of) the
skbuff is non-linear, the following memcpy() may read data past
temporary buffer '_sch'. Use skb_copy_bits() instead which does the
right thing in this situation.

Fixes: 133dc203d77df ("netfilter: nft_exthdr: Support SCTP chunks")
Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Phil Sutter <phil@nwl.cc>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_exthdr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1b0579cb62d0..7f705b5c09de 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -327,7 +327,9 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
 				break;
 
 			dest[priv->len / NFT_REG32_SIZE] = 0;
-			memcpy(dest, (char *)sch + priv->offset, priv->len);
+			if (skb_copy_bits(pkt->skb, offset + priv->offset,
+					  dest, priv->len) < 0)
+				break;
 			return;
 		}
 		offset += SCTP_PAD4(ntohs(sch->length));

From 619ed58ac4e2fbed71ac05f664a4a17e32537176 Mon Sep 17 00:00:00 2001
From: Sergey Nazarov <s-nazarov@yandex.ru>
Date: Tue, 8 Jun 2021 09:32:06 +0300
Subject: [PATCH 1241/3292] audit: Rename enum audit_state constants to avoid
 AUDIT_DISABLED redefinition

AUDIT_DISABLED defined in kernel/audit.h as element of enum audit_state
and redefined in kernel/audit.c. This produces a warning when kernel builds
with syscalls audit disabled and brokes kernel build if -Werror used.
enum audit_state used in syscall audit code only. This patch changes
enum audit_state constants prefix AUDIT to AUDIT_STATE to avoid
AUDIT_DISABLED redefinition.

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.h   |  8 ++++----
 kernel/auditsc.c | 34 +++++++++++++++++-----------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/kernel/audit.h b/kernel/audit.h
index 1522e100fd17..e518ad9374fc 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -21,16 +21,16 @@
    a per-task filter.  At syscall entry, the audit_state is augmented by
    the syscall filter. */
 enum audit_state {
-	AUDIT_DISABLED,		/* Do not create per-task audit_context.
+	AUDIT_STATE_DISABLED,	/* Do not create per-task audit_context.
 				 * No syscall-specific audit records can
 				 * be generated. */
-	AUDIT_BUILD_CONTEXT,	/* Create the per-task audit_context,
+	AUDIT_STATE_BUILD,	/* Create the per-task audit_context,
 				 * and fill it in at syscall
 				 * entry time.  This makes a full
 				 * syscall record available if some
 				 * other part of the kernel decides it
 				 * should be recorded. */
-	AUDIT_RECORD_CONTEXT	/* Create the per-task audit_context,
+	AUDIT_STATE_RECORD	/* Create the per-task audit_context,
 				 * always fill it in at syscall entry
 				 * time, and always write out the audit
 				 * record at syscall exit time.  */
@@ -322,7 +322,7 @@ static inline int audit_signal_info_syscall(struct task_struct *t)
 	return 0;
 }
 
-#define audit_filter_inodes(t, c) AUDIT_DISABLED
+#define audit_filter_inodes(t, c) AUDIT_STATE_DISABLED
 #endif /* CONFIG_AUDITSYSCALL */
 
 extern char *audit_unpack_string(void **bufp, size_t *remain, size_t len);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 0a9a1569f1ea..123f9dc12665 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -232,7 +232,7 @@ static void audit_set_auditable(struct audit_context *ctx)
 {
 	if (!ctx->prio) {
 		ctx->prio = 1;
-		ctx->current_state = AUDIT_RECORD_CONTEXT;
+		ctx->current_state = AUDIT_STATE_RECORD;
 	}
 }
 
@@ -757,10 +757,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 	}
 	switch (rule->action) {
 	case AUDIT_NEVER:
-		*state = AUDIT_DISABLED;
+		*state = AUDIT_STATE_DISABLED;
 		break;
 	case AUDIT_ALWAYS:
-		*state = AUDIT_RECORD_CONTEXT;
+		*state = AUDIT_STATE_RECORD;
 		break;
 	}
 	return 1;
@@ -779,14 +779,14 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
 	list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) {
 		if (audit_filter_rules(tsk, &e->rule, NULL, NULL,
 				       &state, true)) {
-			if (state == AUDIT_RECORD_CONTEXT)
+			if (state == AUDIT_STATE_RECORD)
 				*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
 			rcu_read_unlock();
 			return state;
 		}
 	}
 	rcu_read_unlock();
-	return AUDIT_BUILD_CONTEXT;
+	return AUDIT_STATE_BUILD;
 }
 
 static int audit_in_mask(const struct audit_krule *rule, unsigned long val)
@@ -808,7 +808,7 @@ static int audit_in_mask(const struct audit_krule *rule, unsigned long val)
 /* At syscall exit time, this filter is called if the audit_state is
  * not low enough that auditing cannot take place, but is also not
  * high enough that we already know we have to write an audit record
- * (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT).
+ * (i.e., the state is AUDIT_STATE_BUILD).
  */
 static void audit_filter_syscall(struct task_struct *tsk,
 				 struct audit_context *ctx)
@@ -929,7 +929,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
 	if (!context)
 		return NULL;
 	context->state = state;
-	context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
+	context->prio = state == AUDIT_STATE_RECORD ? ~0ULL : 0;
 	INIT_LIST_HEAD(&context->killed_trees);
 	INIT_LIST_HEAD(&context->names_list);
 	context->fds[0] = -1;
@@ -956,7 +956,7 @@ int audit_alloc(struct task_struct *tsk)
 		return 0; /* Return if not auditing. */
 
 	state = audit_filter_task(tsk, &key);
-	if (state == AUDIT_DISABLED) {
+	if (state == AUDIT_STATE_DISABLED) {
 		clear_task_syscall_work(tsk, SYSCALL_AUDIT);
 		return 0;
 	}
@@ -1639,7 +1639,7 @@ void __audit_free(struct task_struct *tsk)
 
 		audit_filter_syscall(tsk, context);
 		audit_filter_inodes(tsk, context);
-		if (context->current_state == AUDIT_RECORD_CONTEXT)
+		if (context->current_state == AUDIT_STATE_RECORD)
 			audit_log_exit();
 	}
 
@@ -1658,7 +1658,7 @@ void __audit_free(struct task_struct *tsk)
  * Fill in audit context at syscall entry.  This only happens if the
  * audit context was created when the task was created and the state or
  * filters demand the audit context be built.  If the state from the
- * per-task filter or from the per-syscall filter is AUDIT_RECORD_CONTEXT,
+ * per-task filter or from the per-syscall filter is AUDIT_STATE_RECORD,
  * then the record will be written at syscall exit time (otherwise, it
  * will only be written if another part of the kernel requests that it
  * be written).
@@ -1675,11 +1675,11 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
 	BUG_ON(context->in_syscall || context->name_count);
 
 	state = context->state;
-	if (state == AUDIT_DISABLED)
+	if (state == AUDIT_STATE_DISABLED)
 		return;
 
 	context->dummy = !audit_n_rules;
-	if (!context->dummy && state == AUDIT_BUILD_CONTEXT) {
+	if (!context->dummy && state == AUDIT_STATE_BUILD) {
 		context->prio = 0;
 		if (auditd_test_task(current))
 			return;
@@ -1704,7 +1704,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
  * @return_code: return value of the syscall
  *
  * Tear down after system call.  If the audit context has been marked as
- * auditable (either because of the AUDIT_RECORD_CONTEXT state from
+ * auditable (either because of the AUDIT_STATE_RECORD state from
  * filtering, or because some other part of the kernel wrote an audit
  * message), then write out the syscall information.  In call cases,
  * free the names stored from getname().
@@ -1746,12 +1746,12 @@ void __audit_syscall_exit(int success, long return_code)
 
 		audit_filter_syscall(current, context);
 		audit_filter_inodes(current, context);
-		if (context->current_state == AUDIT_RECORD_CONTEXT)
+		if (context->current_state == AUDIT_STATE_RECORD)
 			audit_log_exit();
 	}
 
 	context->in_syscall = 0;
-	context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
+	context->prio = context->state == AUDIT_STATE_RECORD ? ~0ULL : 0;
 
 	audit_free_module(context);
 	audit_free_names(context);
@@ -1764,7 +1764,7 @@ void __audit_syscall_exit(int success, long return_code)
 	context->sockaddr_len = 0;
 	context->type = 0;
 	context->fds[0] = -1;
-	if (context->state != AUDIT_RECORD_CONTEXT) {
+	if (context->state != AUDIT_STATE_RECORD) {
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 	}
@@ -2217,7 +2217,7 @@ int auditsc_get_stamp(struct audit_context *ctx,
 	*serial    = ctx->serial;
 	if (!ctx->prio) {
 		ctx->prio = 1;
-		ctx->current_state = AUDIT_RECORD_CONTEXT;
+		ctx->current_state = AUDIT_STATE_RECORD;
 	}
 	return 1;
 }

From dd8b865cc40832d32bbf912a65c657483533fdd4 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Tue, 8 Jun 2021 11:11:50 +0800
Subject: [PATCH 1242/3292] audit: Use list_move instead of list_del/list_add

Using list_move() instead of list_del() + list_add().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit_tree.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 6c91902f4f45..b2be4e978ba3 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -689,8 +689,7 @@ void audit_trim_trees(void)
 
 		tree = container_of(cursor.next, struct audit_tree, list);
 		get_tree(tree);
-		list_del(&cursor);
-		list_add(&cursor, &tree->list);
+		list_move(&cursor, &tree->list);
 		mutex_unlock(&audit_filter_mutex);
 
 		err = kern_path(tree->pathname, 0, &path);
@@ -899,8 +898,7 @@ int audit_tag_tree(char *old, char *new)
 
 		tree = container_of(cursor.next, struct audit_tree, list);
 		get_tree(tree);
-		list_del(&cursor);
-		list_add(&cursor, &tree->list);
+		list_move(&cursor, &tree->list);
 		mutex_unlock(&audit_filter_mutex);
 
 		err = kern_path(tree->pathname, 0, &path2);
@@ -925,8 +923,7 @@ int audit_tag_tree(char *old, char *new)
 		mutex_lock(&audit_filter_mutex);
 		spin_lock(&hash_lock);
 		if (!tree->goner) {
-			list_del(&tree->list);
-			list_add(&tree->list, &tree_list);
+			list_move(&tree->list, &tree_list);
 		}
 		spin_unlock(&hash_lock);
 		put_tree(tree);
@@ -937,8 +934,7 @@ int audit_tag_tree(char *old, char *new)
 
 		tree = container_of(barrier.prev, struct audit_tree, list);
 		get_tree(tree);
-		list_del(&tree->list);
-		list_add(&tree->list, &barrier);
+		list_move(&tree->list, &barrier);
 		mutex_unlock(&audit_filter_mutex);
 
 		if (!failed) {

From 2aa8eca6cbb5912aa0c07ebecb846b3d6182415c Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Tue, 8 Jun 2021 18:52:57 -0700
Subject: [PATCH 1243/3292] net: appletalk: fix some mistakes in grammar

Fix some mistakes in grammar.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index ebda397fa95a..8ade5a4ceaf5 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -707,7 +707,7 @@ static int atif_ioctl(int cmd, void __user *arg)
 
 		/*
 		 * Phase 1 is fine on LocalTalk but we don't do
-		 * EtherTalk phase 1. Anyone wanting to add it go ahead.
+		 * EtherTalk phase 1. Anyone wanting to add it, go ahead.
 		 */
 		if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
 			return -EPROTONOSUPPORT;
@@ -828,7 +828,7 @@ static int atif_ioctl(int cmd, void __user *arg)
 		nr = (struct atalk_netrange *)&(atif->nets);
 		/*
 		 * Phase 1 is fine on Localtalk but we don't do
-		 * Ethertalk phase 1. Anyone wanting to add it go ahead.
+		 * Ethertalk phase 1. Anyone wanting to add it, go ahead.
 		 */
 		if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
 			return -EPROTONOSUPPORT;
@@ -2018,7 +2018,7 @@ module_init(atalk_init);
  * by the network device layer.
  *
  * Ergo, before the AppleTalk module can be removed, all AppleTalk
- * sockets be closed from user space.
+ * sockets should be closed from user space.
  */
 static void __exit atalk_exit(void)
 {

From 152bca090243f2aebbf4c0a2aa723ab610e6f3c4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 5 Jun 2021 12:54:43 +0200
Subject: [PATCH 1244/3292] xfrm: remove description from xfrm_type struct

Its set but never read. Reduces size of xfrm_type to 64 bytes on 64bit.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      | 2 --
 net/ipv4/ah4.c          | 1 -
 net/ipv4/esp4.c         | 1 -
 net/ipv4/esp4_offload.c | 1 -
 net/ipv4/ipcomp.c       | 1 -
 net/ipv4/xfrm4_tunnel.c | 1 -
 net/ipv6/ah6.c          | 1 -
 net/ipv6/esp6.c         | 1 -
 net/ipv6/esp6_offload.c | 1 -
 net/ipv6/ipcomp6.c      | 1 -
 net/ipv6/mip6.c         | 2 --
 net/ipv6/xfrm6_tunnel.c | 1 -
 12 files changed, 14 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6e11db6fa0ab..1aad78c5f2d5 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -388,7 +388,6 @@ void xfrm_flush_gc(void);
 void xfrm_state_delete_tunnel(struct xfrm_state *x);
 
 struct xfrm_type {
-	char			*description;
 	struct module		*owner;
 	u8			proto;
 	u8			flags;
@@ -410,7 +409,6 @@ int xfrm_register_type(const struct xfrm_type *type, unsigned short family);
 void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family);
 
 struct xfrm_type_offload {
-	char		*description;
 	struct module	*owner;
 	u8		proto;
 	void		(*encap)(struct xfrm_state *, struct sk_buff *pskb);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 36ed85bf2ad5..2d2d08aa787d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -554,7 +554,6 @@ static int ah4_rcv_cb(struct sk_buff *skb, int err)
 
 static const struct xfrm_type ah_type =
 {
-	.description	= "AH4",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_AH,
 	.flags		= XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 35803ab7ac80..f5362b9d75eb 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1198,7 +1198,6 @@ static int esp4_rcv_cb(struct sk_buff *skb, int err)
 
 static const struct xfrm_type esp_type =
 {
-	.description	= "ESP4",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_ESP,
 	.flags		= XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index be019a1fe3af..8e4e9aa12130 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -342,7 +342,6 @@ static const struct net_offload esp4_offload = {
 };
 
 static const struct xfrm_type_offload esp_type_offload = {
-	.description	= "ESP4 OFFLOAD",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_ESP,
 	.input_tail	= esp_input_tail,
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index b42683212c65..2e69e81e1f5d 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -152,7 +152,6 @@ static int ipcomp4_rcv_cb(struct sk_buff *skb, int err)
 }
 
 static const struct xfrm_type ipcomp_type = {
-	.description	= "IPCOMP4",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_COMP,
 	.init_state	= ipcomp4_init_state,
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index fb0648e7fb32..f4555a88f86b 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -42,7 +42,6 @@ static void ipip_destroy(struct xfrm_state *x)
 }
 
 static const struct xfrm_type ipip_type = {
-	.description	= "IPIP",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_IPIP,
 	.init_state	= ipip_init_state,
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 20d492da725a..e9705c256068 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -755,7 +755,6 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err)
 }
 
 static const struct xfrm_type ah6_type = {
-	.description	= "AH6",
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_AH,
 	.flags		= XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 393ae2b78e7d..be2c0ac76eaa 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1243,7 +1243,6 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err)
 }
 
 static const struct xfrm_type esp6_type = {
-	.description	= "ESP6",
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_ESP,
 	.flags		= XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 40ed4fcf1cf4..a349d4798077 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -377,7 +377,6 @@ static const struct net_offload esp6_offload = {
 };
 
 static const struct xfrm_type_offload esp6_type_offload = {
-	.description	= "ESP6 OFFLOAD",
 	.owner		= THIS_MODULE,
 	.proto	     	= IPPROTO_ESP,
 	.input_tail	= esp6_input_tail,
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index daef890460b7..491aba66b7ae 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -172,7 +172,6 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
 }
 
 static const struct xfrm_type ipcomp6_type = {
-	.description	= "IPCOMP6",
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_COMP,
 	.init_state	= ipcomp6_init_state,
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 878fcec14949..bc560e1664aa 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -324,7 +324,6 @@ static void mip6_destopt_destroy(struct xfrm_state *x)
 }
 
 static const struct xfrm_type mip6_destopt_type = {
-	.description	= "MIP6DESTOPT",
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_DSTOPTS,
 	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
@@ -456,7 +455,6 @@ static void mip6_rthdr_destroy(struct xfrm_state *x)
 }
 
 static const struct xfrm_type mip6_rthdr_type = {
-	.description	= "MIP6RT",
 	.owner		= THIS_MODULE,
 	.proto		= IPPROTO_ROUTING,
 	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f696d46e6910..2b31112c0856 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -291,7 +291,6 @@ static void xfrm6_tunnel_destroy(struct xfrm_state *x)
 }
 
 static const struct xfrm_type xfrm6_tunnel_type = {
-	.description	= "IP6IP6",
 	.owner          = THIS_MODULE,
 	.proto		= IPPROTO_IPV6,
 	.init_state	= xfrm6_tunnel_init_state,

From 0ec4e55e9f571f08970ed115ec0addc691eda613 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 9 Jun 2021 10:14:42 +0800
Subject: [PATCH 1245/3292] ACPI: resources: Add checks for ACPI IRQ override

The laptop keyboard doesn't work on many MEDION notebooks, but the
keyboard works well under Windows and Unix.

Through debugging, we found this log in the dmesg:

 ACPI: IRQ 1 override to edge, high
 pnp 00:03: Plug and Play ACPI device, IDs PNP0303 (active)

 And we checked the IRQ definition in the DSDT, it is:

    IRQ (Level, ActiveLow, Exclusive, )
        {1}

So the BIOS defines the keyboard IRQ to Level_Low, but the Linux
kernel override it to Edge_High. If the Linux kernel is modified
to skip the IRQ override, the keyboard will work normally.

From the existing comment in acpi_dev_get_irqresource(), the override
function only needs to be called when IRQ() or IRQNoFlags() is used
to populate the resource descriptor, and according to Section 6.4.2.1
of ACPI 6.4 [1], if IRQ() is empty or IRQNoFlags() is used, the IRQ
is High true, edge sensitive and non-shareable. ACPICA also assumes
that to be the case (see acpi_rs_set_irq[] in rsirq.c).

In accordance with the above, check 3 additional conditions
(EdgeSensitive, ActiveHigh and Exclusive) when deciding whether or
not to treat an ACPI_RESOURCE_TYPE_IRQ resource as "legacy", in which
case the IRQ override is applicable to it.

Link: https://uefi.org/specs/ACPI/6.4/06_Device_Configuration/Device_Configuration.html#irq-descriptor # [1]
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213031
BugLink: http://bugs.launchpad.net/bugs/1909814
Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-by: Manuel Krause <manuelkrause@netscape.net>
Tested-by: Manuel Krause <manuelkrause@netscape.net>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
[ rjw: Subject rewrite, changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/resource.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index ee78a210c606..dc01fb550b28 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -423,6 +423,13 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
 	}
 }
 
+static bool irq_is_legacy(struct acpi_resource_irq *irq)
+{
+	return irq->triggering == ACPI_EDGE_SENSITIVE &&
+		irq->polarity == ACPI_ACTIVE_HIGH &&
+		irq->shareable == ACPI_EXCLUSIVE;
+}
+
 /**
  * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
  * @ares: Input ACPI resource object.
@@ -461,7 +468,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
 		}
 		acpi_dev_get_irqresource(res, irq->interrupts[index],
 					 irq->triggering, irq->polarity,
-					 irq->shareable, true);
+					 irq->shareable, irq_is_legacy(irq));
 		break;
 	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
 		ext_irq = &ares->data.extended_irq;

From 17aa26c96fb240de92db90ec1bfd616f28b6dc16 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 8 Jun 2021 20:35:32 +0800
Subject: [PATCH 1246/3292] PNP: pnpbios: Use list_for_each_entry() instead of
 list_for_each()

Use list_for_each_entry() instead of list_for_each() where
applicable.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
[ rjw: Subject and changelog rewrite ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/pnpbios/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 9b760e73ee8f..669ef4700c1a 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -298,14 +298,12 @@ struct pnp_protocol pnpbios_protocol = {
 
 static int __init insert_device(struct pnp_bios_node *node)
 {
-	struct list_head *pos;
 	struct pnp_dev *dev;
 	char id[8];
 	int error;
 
 	/* check if the device is already added */
-	list_for_each(pos, &pnpbios_protocol.devices) {
-		dev = list_entry(pos, struct pnp_dev, protocol_list);
+	list_for_each_entry(dev, &pnpbios_protocol.devices, protocol_list) {
 		if (dev->number == node->handle)
 			return -EEXIST;
 	}

From a624eb520390cecf644a8906c982fd53b2afcc49 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 7 Jun 2021 16:52:43 -0700
Subject: [PATCH 1247/3292] libnvdimm/pmem: Fix blk_cleanup_disk() usage

The queue_to_disk() helper can not be used after del_gendisk()
communicate @disk via the pgmap->owner.

Otherwise, queue_to_disk() returns NULL resulting in the splat below.

 Kernel attempted to read user page (330) - exploit attempt? (uid: 0)
 BUG: Kernel NULL pointer dereference on read at 0x00000330
 Faulting instruction address: 0xc000000000906344
 Oops: Kernel access of bad area, sig: 11 [#1]
 [..]
 NIP [c000000000906344] pmem_pagemap_cleanup+0x24/0x40
 LR [c0000000004701d4] memunmap_pages+0x1b4/0x4b0
 Call Trace:
 [c000000022cbb9c0] [c0000000009063c8] pmem_pagemap_kill+0x28/0x40 (unreliable)
 [c000000022cbb9e0] [c0000000004701d4] memunmap_pages+0x1b4/0x4b0
 [c000000022cbba90] [c0000000008b28a0] devm_action_release+0x30/0x50
 [c000000022cbbab0] [c0000000008b39c8] release_nodes+0x2f8/0x3e0
 [c000000022cbbb60] [c0000000008ac440] device_release_driver_internal+0x190/0x2b0
 [c000000022cbbba0] [c0000000008a8450] unbind_store+0x130/0x170

Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Fixes: 87eb73b2ca7c ("nvdimm-pmem: convert to blk_alloc_disk/blk_cleanup_disk")
Link: http://lore.kernel.org/r/DFB75BA8-603F-4A35-880B-C5B23EF8FA7D@linux.vnet.ibm.com
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Link: https://lore.kernel.org/r/162310994435.1571616.334551212901820961.stgit@dwillia2-desk3.amr.corp.intel.com
[axboe: fold in compile warning fix]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvdimm/pmem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 31f3c4bd6f72..1e0615b8565e 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -335,10 +335,9 @@ static const struct attribute_group *pmem_attribute_groups[] = {
 
 static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap)
 {
-	struct request_queue *q =
-		container_of(pgmap->ref, struct request_queue, q_usage_counter);
+	struct pmem_device *pmem = pgmap->owner;
 
-	blk_cleanup_disk(queue_to_disk(q));
+	blk_cleanup_disk(pmem->disk);
 }
 
 static void pmem_release_queue(void *pgmap)
@@ -427,6 +426,7 @@ static int pmem_attach_disk(struct device *dev,
 	q = disk->queue;
 
 	pmem->disk = disk;
+	pmem->pgmap.owner = pmem;
 	pmem->pfn_flags = PFN_DEV;
 	pmem->pgmap.ref = &q->q_usage_counter;
 	if (is_nd_pfn(dev)) {

From 76cdb09b38afb4ffb031b56ebc41cb33ddcd85fb Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 20:11:25 +0800
Subject: [PATCH 1248/3292] aoe: remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/aoe/aoechr.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index ab41be625a53..8eea2529da20 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -140,10 +140,8 @@ bail:		spin_unlock_irqrestore(&emsgs_lock, flags);
 	}
 
 	mp = kmemdup(msg, n, GFP_ATOMIC);
-	if (mp == NULL) {
-		printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n);
+	if (!mp)
 		goto bail;
-	}
 
 	em->msg = mp;
 	em->flags |= EMFL_VALID;

From 8404e19194813d850e89fb3504223c09aa9776f3 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 20:14:26 +0800
Subject: [PATCH 1249/3292] drbd: remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_receiver.c | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 69284ebba786..1f740e42e457 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3770,10 +3770,8 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
 	}
 
 	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
-	if (!new_net_conf) {
-		drbd_err(connection, "Allocation of new net_conf failed\n");
+	if (!new_net_conf)
 		goto disconnect;
-	}
 
 	mutex_lock(&connection->data.mutex);
 	mutex_lock(&connection->resource->conf_update);
@@ -4020,10 +4018,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
 
 		if (verify_tfm || csums_tfm) {
 			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
-			if (!new_net_conf) {
-				drbd_err(device, "Allocation of new net_conf failed\n");
+			if (!new_net_conf)
 				goto disconnect;
-			}
 
 			*new_net_conf = *old_net_conf;
 
@@ -4161,7 +4157,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
 			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
 			if (!new_disk_conf) {
-				drbd_err(device, "Allocation of new disk_conf failed\n");
 				put_ldev(device);
 				return -ENOMEM;
 			}
@@ -4288,10 +4283,8 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
 	device = peer_device->device;
 
 	p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
-	if (!p_uuid) {
-		drbd_err(device, "kmalloc of p_uuid failed\n");
+	if (!p_uuid)
 		return false;
-	}
 
 	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
 		p_uuid[i] = be64_to_cpu(p->uuid[i]);
@@ -5484,8 +5477,7 @@ static int drbd_do_auth(struct drbd_connection *connection)
 	}
 
 	peers_ch = kmalloc(pi.size, GFP_NOIO);
-	if (peers_ch == NULL) {
-		drbd_err(connection, "kmalloc of peers_ch failed\n");
+	if (!peers_ch) {
 		rv = -1;
 		goto fail;
 	}
@@ -5504,8 +5496,7 @@ static int drbd_do_auth(struct drbd_connection *connection)
 
 	resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
 	response = kmalloc(resp_size, GFP_NOIO);
-	if (response == NULL) {
-		drbd_err(connection, "kmalloc of response failed\n");
+	if (!response) {
 		rv = -1;
 		goto fail;
 	}
@@ -5552,8 +5543,7 @@ static int drbd_do_auth(struct drbd_connection *connection)
 	}
 
 	right_response = kmalloc(resp_size, GFP_NOIO);
-	if (right_response == NULL) {
-		drbd_err(connection, "kmalloc of right_response failed\n");
+	if (!right_response) {
 		rv = -1;
 		goto fail;
 	}

From ce9a8ca68aec3fe2b817e38d169b792214e5fda0 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 20:19:58 +0800
Subject: [PATCH 1250/3292] mtip32xx: remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/mtip32xx/mtip32xx.c | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 589cb0f1e030..ff3e7b3f5ad8 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2238,7 +2238,6 @@ static ssize_t show_device_status(struct device_driver *drv, char *buf)
 static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
 						size_t len, loff_t *offset)
 {
-	struct driver_data *dd =  (struct driver_data *)f->private_data;
 	int size = *offset;
 	char *buf;
 	int rv = 0;
@@ -2247,11 +2246,8 @@ static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
 		return 0;
 
 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
-	if (!buf) {
-		dev_err(&dd->pdev->dev,
-			"Memory allocation: status buffer\n");
+	if (!buf)
 		return -ENOMEM;
-	}
 
 	size += show_device_status(NULL, buf);
 
@@ -2277,11 +2273,8 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
 		return 0;
 
 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
-	if (!buf) {
-		dev_err(&dd->pdev->dev,
-			"Memory allocation: register buffer\n");
+	if (!buf)
 		return -ENOMEM;
-	}
 
 	size += sprintf(&buf[size], "H/ S ACTive      : [ 0x");
 
@@ -2343,11 +2336,8 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
 		return 0;
 
 	buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
-	if (!buf) {
-		dev_err(&dd->pdev->dev,
-			"Memory allocation: flag buffer\n");
+	if (!buf)
 		return -ENOMEM;
-	}
 
 	size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
 							dd->port->flags);
@@ -2884,11 +2874,8 @@ static int mtip_hw_init(struct driver_data *dd)
 
 	dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
 				dd->numa_node);
-	if (!dd->port) {
-		dev_err(&dd->pdev->dev,
-			"Memory allocation: port structure\n");
+	if (!dd->port)
 		return -ENOMEM;
-	}
 
 	/* Continue workqueue setup */
 	for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
@@ -4002,11 +3989,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 		cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id());
 
 	dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
-	if (dd == NULL) {
-		dev_err(&pdev->dev,
-			"Unable to allocate memory for driver data\n");
+	if (!dd)
 		return -ENOMEM;
-	}
 
 	/* Attach the private data to this PCI device.  */
 	pci_set_drvdata(pdev, dd);

From 6597efa6c58fa9f02f624e3e99bb00e73c32bcb2 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 20:23:27 +0800
Subject: [PATCH 1251/3292] sunvdc: remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/sunvdc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 39aeebc6837d..448970491bf8 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -1001,9 +1001,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	}
 
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
-	err = -ENOMEM;
 	if (!port) {
-		printk(KERN_ERR PFX "Cannot allocate vdc_port.\n");
+		err = -ENOMEM;
 		goto err_out_release_mdesc;
 	}
 

From c744b06254a3a163c6bcf70bb21f0241107271fc Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 20:24:50 +0800
Subject: [PATCH 1252/3292] sx8: remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/sx8.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 2cdf2771f8e8..71dcfde042ab 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1429,8 +1429,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	host = kzalloc(sizeof(*host), GFP_KERNEL);
 	if (!host) {
-		printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n",
-		       pci_name(pdev));
 		rc = -ENOMEM;
 		goto err_out_regions;
 	}

From ec1e7e8853b62cb971828d66a1f298a280182831 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 20:27:39 +0800
Subject: [PATCH 1253/3292] z2ram: remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/z2ram.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index c1d20818e649..dce119f697a7 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -236,11 +236,8 @@ static int z2_open(struct block_device *bdev, fmode_t mode)
 
 			case Z2MINOR_Z2ONLY:
 				z2ram_map = kmalloc(max_z2_map, GFP_KERNEL);
-				if (z2ram_map == NULL) {
-					printk(KERN_ERR DEVICE_NAME
-					       ": cannot get mem for z2ram_map\n");
+				if (!z2ram_map)
 					goto err_out;
-				}
 
 				get_z2ram();
 
@@ -253,11 +250,8 @@ static int z2_open(struct block_device *bdev, fmode_t mode)
 
 			case Z2MINOR_CHIPONLY:
 				z2ram_map = kmalloc(max_chip_map, GFP_KERNEL);
-				if (z2ram_map == NULL) {
-					printk(KERN_ERR DEVICE_NAME
-					       ": cannot get mem for z2ram_map\n");
+				if (!z2ram_map)
 					goto err_out;
-				}
 
 				get_chipram();
 

From 64233338499126c5c31e07165735ab5441c7e45a Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 28 May 2021 11:20:54 +0800
Subject: [PATCH 1254/3292] intel_idle: Adjust the SKX C6 parameters if PC6 is
 disabled

Because cpuidle assumes worst-case C-state parameters, PC6 parameters
are used for describing C6, which is worst-case for requesting CC6.
When PC6 is enabled, this is appropriate. But if PC6 is disabled
in the BIOS, the exit latency and target residency should be adjusted
accordingly.

Exit latency:
Previously the C6 exit latency was measured as the PC6 exit latency.
With PC6 disabled, the C6 exit latency should be the one of CC6.

Target residency:
With PC6 disabled, the idle duration within [CC6, PC6) would make the
idle governor choose C1E over C6. This would cause low energy-efficiency.
We should lower the bar to request C6 when PC6 is disabled.

To fill this gap, check if PC6 is disabled in the BIOS in the
MSR_PKG_CST_CONFIG_CONTROL(0xe2) register. If so, use the CC6 exit latency
for C6 and set target_residency to 3 times of the new exit latency. [This
is consistent with how intel_idle driver uses _CST to calculate the
target_residency.] As a result, the OS would be more likely to choose C6
over C1E when PC6 is disabled, which is reasonable, because if C6 is
enabled, it implies that the user cares about energy, so choosing C6 more
frequently makes sense.

The new CC6 exit latency of 92us was measured with wult[1] on SKX via NIC
wakeup as the 99.99th percentile. Also CLX and CPX both have the same CPU
model number as SkX, but their CC6 exit latencies are similar to the SKX
one, 96us and 89us respectively, so reuse the SKX value for them.

There is a concern that it might be better to use a more generic approach
instead of optimizing every platform. However, if the required code
complexity and different PC6 bit interpretation on different platforms
are taken into account, tuning the code per platform seems to be an
acceptable tradeoff.

Link: https://intel.github.io/wult/ # [1]
Suggested-by: Len Brown <len.brown@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Reviewed-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index ec1b9d306ba6..e6c543b5ee1d 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1484,6 +1484,36 @@ static void __init sklh_idle_state_table_update(void)
 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
 }
 
+/**
+ * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
+ * idle states table.
+ */
+static void __init skx_idle_state_table_update(void)
+{
+	unsigned long long msr;
+
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+	/*
+	 * 000b: C0/C1 (no package C-state support)
+	 * 001b: C2
+	 * 010b: C6 (non-retention)
+	 * 011b: C6 (retention)
+	 * 111b: No Package C state limits.
+	 */
+	if ((msr & 0x7) < 2) {
+		/*
+		 * Uses the CC6 + PC0 latency and 3 times of
+		 * latency for target_residency if the PC6
+		 * is disabled in BIOS. This is consistent
+		 * with how intel_idle driver uses _CST
+		 * to set the target_residency.
+		 */
+		skx_cstates[2].exit_latency = 92;
+		skx_cstates[2].target_residency = 276;
+	}
+}
+
 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
 {
 	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
@@ -1515,6 +1545,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 	case INTEL_FAM6_SKYLAKE:
 		sklh_idle_state_table_update();
 		break;
+	case INTEL_FAM6_SKYLAKE_X:
+		skx_idle_state_table_update();
+		break;
 	}
 
 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {

From 5302560bb49d38bf6e62a47c44e19ef04bd5344d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 8 Jun 2021 16:34:08 +0100
Subject: [PATCH 1255/3292] netfilter: nfnetlink_hook: fix array index
 out-of-bounds error

Currently the array net->nf.hooks_ipv6 is accessed by index hook
before hook is sanity checked. Fix this by moving the sanity check
to before the array access.

Addresses-Coverity: ("Out-of-bounds access")
Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_hook.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 04586dfa2acd..58fda6ac663b 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -181,9 +181,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
 		hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
 		break;
 	case NFPROTO_IPV6:
-		hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
 		if (hook >= ARRAY_SIZE(net->nf.hooks_ipv6))
 			return ERR_PTR(-EINVAL);
+		hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
 		break;
 	case NFPROTO_ARP:
 #ifdef CONFIG_NETFILTER_FAMILY_ARP

From d4fb1f954fc7e2044b64b7d690400b99a6d5775c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Jun 2021 22:53:22 +0200
Subject: [PATCH 1256/3292] netfilter: nfnetlink_hook: add depends-on nftables

nfnetlink_hook.c: In function 'nfnl_hook_put_nft_chain_info':
nfnetlink_hook.c:76:7: error: implicit declaration of 'nft_is_active'

This macro is only defined when NF_TABLES is enabled.
While its possible to also add an ifdef-guard, the infrastructure
is currently not useful without nf_tables.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 252956528caa ("netfilter: add new hook nfnl subsystem")
Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c81321372198..54395266339d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -22,6 +22,7 @@ config NETFILTER_FAMILY_ARP
 config NETFILTER_NETLINK_HOOK
 	tristate "Netfilter base hook dump support"
 	depends on NETFILTER_ADVANCED
+	depends on NF_TABLES
 	select NETFILTER_NETLINK
 	help
 	  If this option is enabled, the kernel will include support

From c5c6accd7b7e10434d6afda4f6a5107c480bb4fb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Jun 2021 23:06:07 +0200
Subject: [PATCH 1257/3292] netfilter: nf_tables: move base hook annotation to
 init helper

coverity scanner says:
2187  if (nft_is_base_chain(chain)) {
vvv   CID 1505166:  Memory - corruptions  (UNINIT)
vvv   Using uninitialized value "basechain".
2188  basechain->ops.hook_ops_type = NF_HOOK_OP_NF_TABLES;

... I don't see how nft_is_base_chain() can evaluate to true
while basechain pointer is garbage.

However, it seems better to place the NF_HOOK_OP_NF_TABLES annotation
in nft_basechain_hook_init() instead.

Reported-by: coverity-bot <keescook+coverity-bot@chromium.org>
Addresses-Coverity-ID: 1505166 ("Memory - corruptions")
Fixes: 65b8b7bfc5284f ("netfilter: annotate nf_tables base hook ops")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c9308241b688..caaff7ab9e73 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1997,11 +1997,12 @@ static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
 				    const struct nft_chain_hook *hook,
 				    struct nft_chain *chain)
 {
-	ops->pf		= family;
-	ops->hooknum	= hook->num;
-	ops->priority	= hook->priority;
-	ops->priv	= chain;
-	ops->hook	= hook->type->hooks[ops->hooknum];
+	ops->pf			= family;
+	ops->hooknum		= hook->num;
+	ops->priority		= hook->priority;
+	ops->priv		= chain;
+	ops->hook		= hook->type->hooks[ops->hooknum];
+	ops->hook_ops_type	= NF_HOOK_OP_NF_TABLES;
 }
 
 static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
@@ -2168,10 +2169,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	}
 
 	nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
-	if (nft_is_base_chain(chain)) {
-		basechain->ops.hook_ops_type = NF_HOOK_OP_NF_TABLES;
+	if (nft_is_base_chain(chain))
 		nft_trans_chain_policy(trans) = policy;
-	}
 
 	err = nft_chain_add(table, chain);
 	if (err < 0) {

From 2c32a3d3c233b855943677609fe388f82b1f0975 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 8 Jun 2021 14:22:04 -0700
Subject: [PATCH 1258/3292] net: dsa: b53: Do not force CPU to be always tagged

Commit ca8931948344 ("net: dsa: b53: Keep CPU port as tagged in all
VLANs") forced the CPU port to be always tagged in any VLAN membership.
This was necessary back then because we did not support Broadcom tags
for all configurations so the only way to differentiate tagged and
untagged traffic while DSA_TAG_PROTO_NONE was used was to force the CPU
port into being always tagged.

With most configurations enabling Broadcom tags, especially after
8fab459e69ab ("net: dsa: b53: Enable Broadcom tags for 531x5/539x
families") we do not need to apply this unconditional force tagging of
the CPU port in all VLANs.

A helper function is introduced to faciliate the encapsulation of the
specific condition requiring the CPU port to be tagged in all VLANs and
the dsa_switch_ops::untag_bridge_pvid boolean is moved to when
dsa_switch_ops::setup is called when we have already determined the
tagging protocol we will be using.

Reported-by: Matthew Hagan <mnhagan88@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Tested-by: Matthew Hagan <mnhagan88@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 3ca6b394dd5f..6e199454e41d 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1084,6 +1084,11 @@ static int b53_setup(struct dsa_switch *ds)
 	unsigned int port;
 	int ret;
 
+	/* Request bridge PVID untagged when DSA_TAG_PROTO_NONE is set
+	 * which forces the CPU port to be tagged in all VLANs.
+	 */
+	ds->untag_bridge_pvid = dev->tag_protocol == DSA_TAG_PROTO_NONE;
+
 	ret = b53_reset_switch(dev);
 	if (ret) {
 		dev_err(ds->dev, "failed to reset switch\n");
@@ -1455,6 +1460,13 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port,
 	return 0;
 }
 
+static bool b53_vlan_port_needs_forced_tagged(struct dsa_switch *ds, int port)
+{
+	struct b53_device *dev = ds->priv;
+
+	return dev->tag_protocol == DSA_TAG_PROTO_NONE && dsa_is_cpu_port(ds, port);
+}
+
 int b53_vlan_add(struct dsa_switch *ds, int port,
 		 const struct switchdev_obj_port_vlan *vlan,
 		 struct netlink_ext_ack *extack)
@@ -1477,7 +1489,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port,
 		untagged = true;
 
 	vl->members |= BIT(port);
-	if (untagged && !dsa_is_cpu_port(ds, port))
+	if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port))
 		vl->untag |= BIT(port);
 	else
 		vl->untag &= ~BIT(port);
@@ -1514,7 +1526,7 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 	if (pvid == vlan->vid)
 		pvid = b53_default_pvid(dev);
 
-	if (untagged && !dsa_is_cpu_port(ds, port))
+	if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port))
 		vl->untag &= ~(BIT(port));
 
 	b53_set_vlan_entry(dev, vlan->vid, vl);
@@ -2660,7 +2672,6 @@ struct b53_device *b53_switch_alloc(struct device *base,
 	dev->priv = priv;
 	dev->ops = ops;
 	ds->ops = &b53_switch_ops;
-	ds->untag_bridge_pvid = true;
 	dev->vlan_enabled = true;
 	/* Let DSA handle the case were multiple bridges span the same switch
 	 * device and different VLAN awareness settings are requested, which

From e89a2cdb1cca513a3f431c9f404fe220dfbf949c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Jun 2021 09:24:44 +0800
Subject: [PATCH 1259/3292] net: ethernet: ravb: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 4afff320dfd0..69c50f81e1cb 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2047,13 +2047,6 @@ static int ravb_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	/* Get base address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "invalid resource\n");
-		return -EINVAL;
-	}
-
 	ndev = alloc_etherdev_mqs(sizeof(struct ravb_private),
 				  NUM_TX_QUEUE, NUM_RX_QUEUE);
 	if (!ndev)
@@ -2065,9 +2058,6 @@ static int ravb_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
-	/* The Ether-specific entries in the device structure. */
-	ndev->base_addr = res->start;
-
 	chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev);
 
 	if (chip_id == RCAR_GEN3)
@@ -2089,12 +2079,15 @@ static int ravb_probe(struct platform_device *pdev)
 	priv->num_rx_ring[RAVB_BE] = BE_RX_RING_SIZE;
 	priv->num_tx_ring[RAVB_NC] = NC_TX_RING_SIZE;
 	priv->num_rx_ring[RAVB_NC] = NC_RX_RING_SIZE;
-	priv->addr = devm_ioremap_resource(&pdev->dev, res);
+	priv->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(priv->addr)) {
 		error = PTR_ERR(priv->addr);
 		goto out_release;
 	}
 
+	/* The Ether-specific entries in the device structure. */
+	ndev->base_addr = res->start;
+
 	spin_lock_init(&priv->lock);
 	INIT_WORK(&priv->work, ravb_tx_timeout_work);
 

From db67f2493431c32935e5b239175df4b0b9cf0171 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Tue, 8 Jun 2021 20:03:17 -0700
Subject: [PATCH 1260/3292] net/x25: fix a mistake in grammar

Fix a mistake in grammar.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 1816899499ce..3583354a7d7f 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -366,7 +366,7 @@ static void x25_destroy_timer(struct timer_list *t)
 
 /*
  *	This is called from user mode and the timers. Thus it protects itself
- *	against interrupt users but doesn't worry about being called during
+ *	against interrupting users but doesn't worry about being called during
  *	work. Once it is removed from the queue no interrupt or bottom half
  *	will touch it and we are (fairly 8-) ) safe.
  *	Not static as it's used by the timer

From 39c3783ec062231b6befc193742161af33e17d88 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:09:21 +0800
Subject: [PATCH 1261/3292] nfp: use list_move instead of list_del/list_add in
 nfp_cppcore.c

Using list_move() instead of list_del() + list_add() in nfp_cppcore.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 94994a939277..d7ac0307797f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -905,8 +905,7 @@ area_cache_put(struct nfp_cpp *cpp, struct nfp_cpp_area_cache *cache)
 		return;
 
 	/* Move to front of LRU */
-	list_del(&cache->entry);
-	list_add(&cache->entry, &cpp->area_cache_list);
+	list_move(&cache->entry, &cpp->area_cache_list);
 
 	mutex_unlock(&cpp->area_cache_mutex);
 }

From 49768ce98c2c1766619dfd6d157dd87826738fa7 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:17:20 +0800
Subject: [PATCH 1262/3292] net: hns3: use list_move_tail instead of
 list_del/list_add_tail in hclgevf_main.c

Using list_move_tail() instead of list_del() + list_add_tail() in hclgevf_main.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c    | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index f84b3a135c06..52eaf82b7cd7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1536,8 +1536,7 @@ static void hclgevf_sync_from_add_list(struct list_head *add_list,
 			kfree(mac_node);
 		} else if (mac_node->state == HCLGEVF_MAC_ACTIVE) {
 			mac_node->state = HCLGEVF_MAC_TO_DEL;
-			list_del(&mac_node->node);
-			list_add_tail(&mac_node->node, mac_list);
+			list_move_tail(&mac_node->node, mac_list);
 		} else {
 			list_del(&mac_node->node);
 			kfree(mac_node);
@@ -1562,8 +1561,7 @@ static void hclgevf_sync_from_del_list(struct list_head *del_list,
 			list_del(&mac_node->node);
 			kfree(mac_node);
 		} else {
-			list_del(&mac_node->node);
-			list_add_tail(&mac_node->node, mac_list);
+			list_move_tail(&mac_node->node, mac_list);
 		}
 	}
 }
@@ -1599,8 +1597,7 @@ static void hclgevf_sync_mac_list(struct hclgevf_dev *hdev,
 	list_for_each_entry_safe(mac_node, tmp, list, node) {
 		switch (mac_node->state) {
 		case HCLGEVF_MAC_TO_DEL:
-			list_del(&mac_node->node);
-			list_add_tail(&mac_node->node, &tmp_del_list);
+			list_move_tail(&mac_node->node, &tmp_del_list);
 			break;
 		case HCLGEVF_MAC_TO_ADD:
 			new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);

From 4724acc47c9441ec1e131299853d1a9e8c3fb2cd Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:20:56 +0800
Subject: [PATCH 1263/3292] net: hns3: use list_move_tail instead of
 list_del/list_add_tail in hclge_main.c

Using list_move_tail() instead of list_del() + list_add_tail() in hclge_main.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c   | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d960e08850ae..c90d7c1550c5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -8834,8 +8834,7 @@ static bool hclge_sync_from_add_list(struct list_head *add_list,
 			kfree(mac_node);
 		} else if (mac_node->state == HCLGE_MAC_ACTIVE) {
 			mac_node->state = HCLGE_MAC_TO_DEL;
-			list_del(&mac_node->node);
-			list_add_tail(&mac_node->node, mac_list);
+			list_move_tail(&mac_node->node, mac_list);
 		} else {
 			list_del(&mac_node->node);
 			kfree(mac_node);
@@ -8864,8 +8863,7 @@ static void hclge_sync_from_del_list(struct list_head *del_list,
 			list_del(&mac_node->node);
 			kfree(mac_node);
 		} else {
-			list_del(&mac_node->node);
-			list_add_tail(&mac_node->node, mac_list);
+			list_move_tail(&mac_node->node, mac_list);
 		}
 	}
 }
@@ -8909,8 +8907,7 @@ static void hclge_sync_vport_mac_table(struct hclge_vport *vport,
 	list_for_each_entry_safe(mac_node, tmp, list, node) {
 		switch (mac_node->state) {
 		case HCLGE_MAC_TO_DEL:
-			list_del(&mac_node->node);
-			list_add_tail(&mac_node->node, &tmp_del_list);
+			list_move_tail(&mac_node->node, &tmp_del_list);
 			break;
 		case HCLGE_MAC_TO_ADD:
 			new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
@@ -8992,8 +8989,7 @@ static void hclge_build_del_list(struct list_head *list,
 		switch (mac_cfg->state) {
 		case HCLGE_MAC_TO_DEL:
 		case HCLGE_MAC_ACTIVE:
-			list_del(&mac_cfg->node);
-			list_add_tail(&mac_cfg->node, tmp_del_list);
+			list_move_tail(&mac_cfg->node, tmp_del_list);
 			break;
 		case HCLGE_MAC_TO_ADD:
 			if (is_del_list) {
@@ -9088,8 +9084,7 @@ static void hclge_uninit_vport_mac_list(struct hclge_vport *vport,
 		switch (mac_node->state) {
 		case HCLGE_MAC_TO_DEL:
 		case HCLGE_MAC_ACTIVE:
-			list_del(&mac_node->node);
-			list_add_tail(&mac_node->node, &tmp_del_list);
+			list_move_tail(&mac_node->node, &tmp_del_list);
 			break;
 		case HCLGE_MAC_TO_ADD:
 			list_del(&mac_node->node);

From eff57ab52cc4bcdca095a5a129bc0912a402af11 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:47 +0800
Subject: [PATCH 1264/3292] net: lapbether: remove redundant blank line

This patch the redundant blank line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 59646865a3a4..bb529effad1a 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -307,7 +307,6 @@ static int lapbeth_set_mac_address(struct net_device *dev, void *addr)
 	return 0;
 }
 
-
 static const struct lapb_register_struct lapbeth_callbacks = {
 	.connect_confirmation    = lapbeth_connected,
 	.connect_indication      = lapbeth_connected,

From 5bc5f5f27b89fd029515a305caaaf40b77c63c76 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:48 +0800
Subject: [PATCH 1265/3292] net: lapbether: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index bb529effad1a..b6aef7b53eca 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -303,6 +303,7 @@ static void lapbeth_disconnected(struct net_device *dev, int reason)
 static int lapbeth_set_mac_address(struct net_device *dev, void *addr)
 {
 	struct sockaddr *sa = addr;
+
 	memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
 	return 0;
 }

From a61bebc774cbc6595ca32a8ef69e6fe3289ebb33 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:49 +0800
Subject: [PATCH 1266/3292] net: lapbether: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index b6aef7b53eca..e5ae04338258 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -116,7 +116,8 @@ static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	if (dev_net(dev) != &init_net)
 		goto drop;
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
 		return NET_RX_DROP;
 
 	if (!pskb_may_pull(skb, 2))
@@ -137,7 +138,8 @@ static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	skb_pull(skb, 2);	/* Remove the length bytes */
 	skb_trim(skb, len);	/* Set the length of the data */
 
-	if ((err = lapb_data_received(lapbeth->axdev, skb)) != LAPB_OK) {
+	err = lapb_data_received(lapbeth->axdev, skb);
+	if (err != LAPB_OK) {
 		printk(KERN_DEBUG "lapbether: lapb_data_received err - %d\n", err);
 		goto drop_unlock;
 	}
@@ -219,7 +221,8 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
 
 	skb_pull(skb, 1);
 
-	if ((err = lapb_data_request(dev, skb)) != LAPB_OK) {
+	err = lapb_data_request(dev, skb);
+	if (err != LAPB_OK) {
 		pr_err("lapb_data_request error - %d\n", err);
 		goto drop;
 	}
@@ -327,7 +330,8 @@ static int lapbeth_open(struct net_device *dev)
 
 	napi_enable(&lapbeth->napi);
 
-	if ((err = lapb_register(dev, &lapbeth_callbacks)) != LAPB_OK) {
+	err = lapb_register(dev, &lapbeth_callbacks);
+	if (err != LAPB_OK) {
 		pr_err("lapb_register error: %d\n", err);
 		return -ENODEV;
 	}
@@ -348,7 +352,8 @@ static int lapbeth_close(struct net_device *dev)
 	lapbeth->up = false;
 	spin_unlock_bh(&lapbeth->up_lock);
 
-	if ((err = lapb_unregister(dev)) != LAPB_OK)
+	err = lapb_unregister(dev);
+	if (err != LAPB_OK)
 		pr_err("lapb_unregister error: %d\n", err);
 
 	napi_disable(&lapbeth->napi);

From 2e350780ae4f2be8a2525929b6c69c2dd9591a20 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:50 +0800
Subject: [PATCH 1267/3292] net: lapbether: remove trailing whitespaces

This patch removes trailing whitespaces.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index e5ae04338258..169b323733ac 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -6,7 +6,7 @@
  *
  *	This is a "pseudo" network driver to allow LAPB over Ethernet.
  *
- *	This driver can use any ethernet destination address, and can be 
+ *	This driver can use any ethernet destination address, and can be
  *	limited to accept frames from one dedicated ethernet card only.
  *
  *	History
@@ -72,7 +72,7 @@ static struct lapbethdev *lapbeth_get_x25_dev(struct net_device *dev)
 	struct lapbethdev *lapbeth;
 
 	list_for_each_entry_rcu(lapbeth, &lapbeth_devices, node, lockdep_rtnl_is_held()) {
-		if (lapbeth->ethdev == dev) 
+		if (lapbeth->ethdev == dev)
 			return lapbeth;
 	}
 	return NULL;
@@ -468,7 +468,7 @@ static int lapbeth_device_event(struct notifier_block *this,
 	case NETDEV_GOING_DOWN:
 		/* ethernet device closes -> close LAPB interface */
 		lapbeth = lapbeth_get_x25_dev(dev);
-		if (lapbeth) 
+		if (lapbeth)
 			dev_close(lapbeth->axdev);
 		break;
 	case NETDEV_UNREGISTER:

From d5e686e8b66d513833e350ab776a8052b0f2ba9e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:51 +0800
Subject: [PATCH 1268/3292] net: lapbether: remove unnecessary out of memory
 message

This patch removes unnecessary out of memory message,
to fix the following checkpatch.pl warning:
"WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 169b323733ac..705a8984e4d9 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -266,10 +266,8 @@ static void lapbeth_connected(struct net_device *dev, int reason)
 	unsigned char *ptr;
 	struct sk_buff *skb = __dev_alloc_skb(1, GFP_ATOMIC | __GFP_NOMEMALLOC);
 
-	if (!skb) {
-		pr_err("out of memory\n");
+	if (!skb)
 		return;
-	}
 
 	ptr  = skb_put(skb, 1);
 	*ptr = X25_IFACE_CONNECT;
@@ -286,10 +284,8 @@ static void lapbeth_disconnected(struct net_device *dev, int reason)
 	unsigned char *ptr;
 	struct sk_buff *skb = __dev_alloc_skb(1, GFP_ATOMIC | __GFP_NOMEMALLOC);
 
-	if (!skb) {
-		pr_err("out of memory\n");
+	if (!skb)
 		return;
-	}
 
 	ptr  = skb_put(skb, 1);
 	*ptr = X25_IFACE_DISCONNECT;

From 4f9893c762f8a22df77ed2e4c149ad943c1eaf08 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:52 +0800
Subject: [PATCH 1269/3292] net: lapbether: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 705a8984e4d9..60628aa40d10 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -44,7 +44,8 @@
 static const u8 bcast_addr[6] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 
 /* If this number is made larger, check that the temporary string buffer
- * in lapbeth_new_device is large enough to store the probe device name.*/
+ * in lapbeth_new_device is large enough to store the probe device name.
+ */
 #define MAXLAPBDEV 100
 
 struct lapbethdev {
@@ -64,8 +65,7 @@ static void lapbeth_disconnected(struct net_device *dev, int reason);
 
 /* ------------------------------------------------------------------------ */
 
-/*
- *	Get the LAPB device for the ethernet device
+/*	Get the LAPB device for the ethernet device
  */
 static struct lapbethdev *lapbeth_get_x25_dev(struct net_device *dev)
 {
@@ -105,8 +105,7 @@ static int lapbeth_napi_poll(struct napi_struct *napi, int budget)
 	return processed;
 }
 
-/*
- *	Receive a LAPB frame via an ethernet interface.
+/*	Receive a LAPB frame via an ethernet interface.
  */
 static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev)
 {
@@ -179,8 +178,7 @@ static int lapbeth_data_indication(struct net_device *dev, struct sk_buff *skb)
 	return NET_RX_SUCCESS;
 }
 
-/*
- *	Send a LAPB frame via an ethernet interface
+/*	Send a LAPB frame via an ethernet interface
  */
 static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
@@ -296,8 +294,7 @@ static void lapbeth_disconnected(struct net_device *dev, int reason)
 	napi_schedule(&lapbeth->napi);
 }
 
-/*
- *	Set AX.25 callsign
+/*	Set AX.25 callsign
  */
 static int lapbeth_set_mac_address(struct net_device *dev, void *addr)
 {
@@ -316,8 +313,7 @@ static const struct lapb_register_struct lapbeth_callbacks = {
 	.data_transmit           = lapbeth_data_transmit,
 };
 
-/*
- * open/close a device
+/* open/close a device
  */
 static int lapbeth_open(struct net_device *dev)
 {
@@ -376,8 +372,7 @@ static void lapbeth_setup(struct net_device *dev)
 	dev->addr_len        = 0;
 }
 
-/*
- *	Setup a new device.
+/*	Setup a new device.
  */
 static int lapbeth_new_device(struct net_device *dev)
 {
@@ -428,8 +423,7 @@ fail:
 	goto out;
 }
 
-/*
- *	Free a lapb network device.
+/*	Free a lapb network device.
  */
 static void lapbeth_free_device(struct lapbethdev *lapbeth)
 {
@@ -438,8 +432,7 @@ static void lapbeth_free_device(struct lapbethdev *lapbeth)
 	unregister_netdevice(lapbeth->axdev);
 }
 
-/*
- *	Handle device status changes.
+/*	Handle device status changes.
  *
  * Called from notifier with RTNL held.
  */

From d49859601d72baef143703c6944a4e41921f7e6e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:53 +0800
Subject: [PATCH 1270/3292] net: lapbether: replace comparison to NULL with
 "lapbeth_get_x25_dev"

According to the chackpatch.pl, comparison to NULL could
be written "lapbeth_get_x25_dev".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 60628aa40d10..74694af7eb38 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -451,7 +451,7 @@ static int lapbeth_device_event(struct notifier_block *this,
 	switch (event) {
 	case NETDEV_UP:
 		/* New ethernet device -> new LAPB interface	 */
-		if (lapbeth_get_x25_dev(dev) == NULL)
+		if (!lapbeth_get_x25_dev(dev))
 			lapbeth_new_device(dev);
 		break;
 	case NETDEV_GOING_DOWN:

From c564c049a34f94b1a4dbe02332b529a6bcb25496 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:54 +0800
Subject: [PATCH 1271/3292] net: lapbether: fix the alignment issue

Alignment should match open parenthesis.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 74694af7eb38..47ffb3ce6d69 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -181,7 +181,7 @@ static int lapbeth_data_indication(struct net_device *dev, struct sk_buff *skb)
 /*	Send a LAPB frame via an ethernet interface
  */
 static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
-				      struct net_device *dev)
+				struct net_device *dev)
 {
 	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;

From 63a2bb15fe594f328ee1535f3f13e10f863c4c8e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 9 Jun 2021 17:39:55 +0800
Subject: [PATCH 1272/3292] net: lapbether: fix the code style issue about line
 length

According to the chackpatch.pl,
line length of 123 exceeds 100 columns, so fix it.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 47ffb3ce6d69..89d31adc3809 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -107,7 +107,8 @@ static int lapbeth_napi_poll(struct napi_struct *napi, int budget)
 
 /*	Receive a LAPB frame via an ethernet interface.
  */
-static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev)
+static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev,
+		       struct packet_type *ptype, struct net_device *orig_dev)
 {
 	int len, err;
 	struct lapbethdev *lapbeth;

From aa3d020b22cb844ab7bdbb9e5d861a64666e2b74 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 9 Jun 2021 12:52:12 +0300
Subject: [PATCH 1273/3292] net: dsa: qca8k: fix an endian bug in
 qca8k_get_ethtool_stats()

The "hi" variable is a u64 but the qca8k_read() writes to the top 32
bits of it.  That will work on little endian systems but it's a bit
subtle.  It's cleaner to make declare "hi" as a u32.  We will still need
to cast it when we shift it later on in the function but that's fine.

Fixes: 7c9896e37807 ("net: dsa: qca8k: check return value of read functions correctly")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 6fe963ba23e8..9df3514d1ff2 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1412,7 +1412,7 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	const struct qca8k_mib_desc *mib;
 	u32 reg, i, val;
-	u64 hi = 0;
+	u32 hi = 0;
 	int ret;
 
 	for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) {
@@ -1424,14 +1424,14 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 			continue;
 
 		if (mib->size == 2) {
-			ret = qca8k_read(priv, reg + 4, (u32 *)&hi);
+			ret = qca8k_read(priv, reg + 4, &hi);
 			if (ret < 0)
 				continue;
 		}
 
 		data[i] = val;
 		if (mib->size == 2)
-			data[i] |= hi << 32;
+			data[i] |= (u64)hi << 32;
 	}
 }
 

From 3d0167f2a627528032821cdeb78b4eab0510460f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 9 Jun 2021 12:53:03 +0300
Subject: [PATCH 1274/3292] net: dsa: qca8k: check the correct variable in
 qca8k_set_mac_eee()

This code check "reg" but "ret" was intended so the error handling will
never trigger.

Fixes: 7c9896e37807 ("net: dsa: qca8k: check return value of read functions correctly")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/qca8k.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 9df3514d1ff2..1f63f50f73f1 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1454,10 +1454,8 @@ qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 
 	mutex_lock(&priv->reg_mutex);
 	ret = qca8k_read(priv, QCA8K_REG_EEE_CTRL, &reg);
-	if (reg < 0) {
-		ret = reg;
+	if (ret < 0)
 		goto exit;
-	}
 
 	if (eee->eee_enabled)
 		reg |= lpi_en;

From 711d1dee1c86294b43e33202fb1eabd7e524ed9a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 9 Jun 2021 12:54:31 +0300
Subject: [PATCH 1275/3292] devlink: Fix error message in
 devlink_rate_set_ops_supported()

The WARN_ON() macro takes a condition, it doesn't take a message.  Use
WARN() instead.

Fixes: 1897db2ec310 ("devlink: Allow setting tx rate for devlink rate leaf objects")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5260bdfb2403..3bdb7eac730a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1732,7 +1732,7 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
 			return false;
 		}
 	} else {
-		WARN_ON("Unknown type of rate object");
+		WARN(1, "Unknown type of rate object");
 		return false;
 	}
 

From 4e744cb8126deac52257219fad754614a61989da Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 9 Jun 2021 12:56:45 +0300
Subject: [PATCH 1276/3292] netdevsim: delete unnecessary debugfs checking

In normal situations where the driver doesn't dereference
"nsim_node->ddir" or "nsim_node->rate_parent" itself then we are not
supposed to check the return from debugfs functions.  In the case of
debugfs_create_dir() the check was wrong as well because it doesn't
return NULL, it returns error pointers.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 527b019ae0b2..6f4bc70049d2 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -1141,7 +1141,6 @@ static int nsim_rate_node_new(struct devlink_rate *node, void **priv,
 {
 	struct nsim_dev *nsim_dev = devlink_priv(node->devlink);
 	struct nsim_rate_node *nsim_node;
-	int err;
 
 	if (!nsim_esw_mode_is_switchdev(nsim_dev)) {
 		NL_SET_ERR_MSG_MOD(extack, "Node creation allowed only in switchdev mode.");
@@ -1153,29 +1152,16 @@ static int nsim_rate_node_new(struct devlink_rate *node, void **priv,
 		return -ENOMEM;
 
 	nsim_node->ddir = debugfs_create_dir(node->name, nsim_dev->nodes_ddir);
-	if (!nsim_node->ddir) {
-		err = -ENOMEM;
-		goto err_node;
-	}
+
 	debugfs_create_u16("tx_share", 0400, nsim_node->ddir, &nsim_node->tx_share);
 	debugfs_create_u16("tx_max", 0400, nsim_node->ddir, &nsim_node->tx_max);
 	nsim_node->rate_parent = debugfs_create_file("rate_parent", 0400,
 						     nsim_node->ddir,
 						     &nsim_node->parent_name,
 						     &nsim_dev_rate_parent_fops);
-	if (IS_ERR(nsim_node->rate_parent)) {
-		err = PTR_ERR(nsim_node->rate_parent);
-		goto err_ddir;
-	}
 
 	*priv = nsim_node;
 	return 0;
-
-err_ddir:
-	debugfs_remove_recursive(nsim_node->ddir);
-err_node:
-	kfree(nsim_node);
-	return err;
 }
 
 static int nsim_rate_node_del(struct devlink_rate *node, void *priv,

From e67665946599abe0ae8d454ceb6bad4b1d1d6189 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 Jun 2021 11:24:47 +0100
Subject: [PATCH 1277/3292] net: usb: asix: Fix less than zero comparison of a
 u16

The comparison of the u16 priv->phy_addr < 0 is always false because
phy_addr is unsigned. Fix this by assigning the return from the call
to function asix_read_phy_addr to int ret and using this for the
less than zero error check comparison.

Addresses-Coverity: ("Unsigned compared against 0")
Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 57dafb3262d9..8a477171e8f5 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -704,9 +704,11 @@ static int ax88772_init_phy(struct usbnet *dev)
 	struct asix_common_private *priv = dev->driver_priv;
 	int ret;
 
-	priv->phy_addr = asix_read_phy_addr(dev, true);
-	if (priv->phy_addr < 0)
-		return priv->phy_addr;
+	ret = asix_read_phy_addr(dev, true);
+	if (ret < 0)
+		return ret;
+
+	priv->phy_addr = ret;
 
 	snprintf(priv->phy_name, sizeof(priv->phy_name), PHY_ID_FMT,
 		 priv->mdio->id, priv->phy_addr);

From c6be5a22fde5f5cbcef3e1473efbeb312aed1f0e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 Jun 2021 11:24:48 +0100
Subject: [PATCH 1278/3292] net: usb: asix: ax88772: Fix less than zero
 comparison of a u16

The comparison of the u16 priv->phy_addr < 0 is always false because
phy_addr is unsigned. Fix this by assigning the return from the call
to function asix_read_phy_addr to int ret and using this for the
less than zero error check comparison.

Fixes: 7e88b11a862a ("net: usb: asix: refactor asix_read_phy_addr() and handle errors on return")
Addresses-Coverity: ("Unsigned compared against 0")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88172a.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 2e2081346740..530947d7477b 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -205,11 +205,11 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
 		goto free;
 	}
 
-	priv->phy_addr = asix_read_phy_addr(dev, priv->use_embdphy);
-	if (priv->phy_addr < 0) {
-		ret = priv->phy_addr;
+	ret = asix_read_phy_addr(dev, priv->use_embdphy);
+	if (ret < 0)
 		goto free;
-	}
+
+	priv->phy_addr = ret;
 
 	ax88172a_reset_phy(dev, priv->use_embdphy);
 

From daf6e8c9caa0955e8d190a606b1bacf9a903d3c1 Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Wed, 9 Jun 2021 13:49:18 +0300
Subject: [PATCH 1279/3292] Revert "nvme-tcp-offload: ULP Series"

This reverts commits:
- 762411542050dbe27c7c96f13c57f93da5d9b89a
     nvme: NVME_TCP_OFFLOAD should not default to m
- 5ff5622ea1f16d535f1be4e478e712ef48fe183b:
     Merge branch 'NVMeTCP-Offload-ULP'

As requested on the mailing-list: https://lore.kernel.org/netdev/SJ0PR18MB3882C20793EA35A3E8DAE300CC379@SJ0PR18MB3882.namprd18.prod.outlook.com/
This patch will revert the nvme-tcp-offload ULP from net-next.

The nvme-tcp-offload ULP series will continue to be considered only on
linux-nvme@lists.infradead.org.

Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                     |    8 -
 drivers/nvme/host/Kconfig       |   16 -
 drivers/nvme/host/Makefile      |    3 -
 drivers/nvme/host/fabrics.c     |   12 +-
 drivers/nvme/host/fabrics.h     |    9 -
 drivers/nvme/host/tcp-offload.c | 1318 -------------------------------
 drivers/nvme/host/tcp-offload.h |  206 -----
 7 files changed, 9 insertions(+), 1563 deletions(-)
 delete mode 100644 drivers/nvme/host/tcp-offload.c
 delete mode 100644 drivers/nvme/host/tcp-offload.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 85a87a93e194..e69c1991ec3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13107,14 +13107,6 @@ F:	drivers/nvme/host/
 F:	include/linux/nvme.h
 F:	include/uapi/linux/nvme_ioctl.h
 
-NVM EXPRESS TCP OFFLOAD TRANSPORT DRIVERS
-M:	Shai Malin <smalin@marvell.com>
-M:	Ariel Elior <aelior@marvell.com>
-L:	linux-nvme@lists.infradead.org
-S:	Supported
-F:	drivers/nvme/host/tcp-offload.c
-F:	drivers/nvme/host/tcp-offload.h
-
 NVM EXPRESS FC TRANSPORT DRIVERS
 M:	James Smart <james.smart@broadcom.com>
 L:	linux-nvme@lists.infradead.org
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index a01e3f380e74..a44d49d63968 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -84,19 +84,3 @@ config NVME_TCP
 	  from https://github.com/linux-nvme/nvme-cli.
 
 	  If unsure, say N.
-
-config NVME_TCP_OFFLOAD
-	tristate "NVM Express over Fabrics TCP offload common layer"
-	depends on BLOCK
-	depends on INET
-	select NVME_CORE
-	select NVME_FABRICS
-	help
-	  This provides support for the NVMe over Fabrics protocol using
-	  the TCP offload transport. This allows you to use remote block devices
-	  exported using the NVMe protocol set.
-
-	  To configure a NVMe over Fabrics controller use the nvme-cli tool
-	  from https://github.com/linux-nvme/nvme-cli.
-
-	  If unsure, say N.
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 3c3fdf83ce38..cbc509784b2e 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_NVME_FABRICS)		+= nvme-fabrics.o
 obj-$(CONFIG_NVME_RDMA)			+= nvme-rdma.o
 obj-$(CONFIG_NVME_FC)			+= nvme-fc.o
 obj-$(CONFIG_NVME_TCP)			+= nvme-tcp.o
-obj-$(CONFIG_NVME_TCP_OFFLOAD)	+= nvme-tcp-offload.o
 
 nvme-core-y				:= core.o ioctl.o
 nvme-core-$(CONFIG_TRACING)		+= trace.o
@@ -27,5 +26,3 @@ nvme-rdma-y				+= rdma.o
 nvme-fc-y				+= fc.o
 
 nvme-tcp-y				+= tcp.o
-
-nvme-tcp-offload-y		+= tcp-offload.o
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index ceb263eb50fb..a2bb7fc63a73 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -860,8 +860,8 @@ out:
 	return ret;
 }
 
-int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
-			     unsigned int required_opts)
+static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
+		unsigned int required_opts)
 {
 	if ((opts->mask & required_opts) != required_opts) {
 		int i;
@@ -879,7 +879,6 @@ int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nvmf_check_required_opts);
 
 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
 		struct nvmf_ctrl_options *opts)
@@ -943,6 +942,13 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
 }
 EXPORT_SYMBOL_GPL(nvmf_free_options);
 
+#define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
+#define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
+				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
+				 NVMF_OPT_DISABLE_SQFLOW |\
+				 NVMF_OPT_FAIL_FAST_TMO)
+
 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf)
 {
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 8399fcc063ef..d7f7974dc208 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -68,13 +68,6 @@ enum {
 	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };
 
-#define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
-#define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
-				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
-				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
-				 NVMF_OPT_DISABLE_SQFLOW |\
-				 NVMF_OPT_FAIL_FAST_TMO)
-
 /**
  * struct nvmf_ctrl_options - Used to hold the options specified
  *			      with the parsing opts enum.
@@ -193,7 +186,5 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
 		struct nvmf_ctrl_options *opts);
-int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
-			     unsigned int required_opts);
 
 #endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
deleted file mode 100644
index c76822e5ada7..000000000000
--- a/drivers/nvme/host/tcp-offload.c
+++ /dev/null
@@ -1,1318 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright 2021 Marvell. All rights reserved.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-/* Kernel includes */
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* Driver includes */
-#include "tcp-offload.h"
-
-static LIST_HEAD(nvme_tcp_ofld_devices);
-static DEFINE_MUTEX(nvme_tcp_ofld_devices_mutex);
-static LIST_HEAD(nvme_tcp_ofld_ctrl_list);
-static DEFINE_MUTEX(nvme_tcp_ofld_ctrl_mutex);
-static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops;
-static struct blk_mq_ops nvme_tcp_ofld_mq_ops;
-
-static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)
-{
-	return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl);
-}
-
-static inline int nvme_tcp_ofld_qid(struct nvme_tcp_ofld_queue *queue)
-{
-	return queue - queue->ctrl->queues;
-}
-
-/**
- * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration
- * function.
- * @dev:	NVMeTCP offload device instance to be registered to the
- *		common tcp offload instance.
- *
- * API function that registers the type of vendor specific driver
- * being implemented to the common NVMe over TCP offload library. Part of
- * the overall init sequence of starting up an offload driver.
- */
-int nvme_tcp_ofld_register_dev(struct nvme_tcp_ofld_dev *dev)
-{
-	struct nvme_tcp_ofld_ops *ops = dev->ops;
-
-	if (!ops->claim_dev ||
-	    !ops->setup_ctrl ||
-	    !ops->release_ctrl ||
-	    !ops->create_queue ||
-	    !ops->drain_queue ||
-	    !ops->destroy_queue ||
-	    !ops->poll_queue ||
-	    !ops->send_req)
-		return -EINVAL;
-
-	mutex_lock(&nvme_tcp_ofld_devices_mutex);
-	list_add_tail(&dev->entry, &nvme_tcp_ofld_devices);
-	mutex_unlock(&nvme_tcp_ofld_devices_mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nvme_tcp_ofld_register_dev);
-
-/**
- * nvme_tcp_ofld_unregister_dev() - NVMeTCP Offload Library unregistration
- * function.
- * @dev:	NVMeTCP offload device instance to be unregistered from the
- *		common tcp offload instance.
- *
- * API function that unregisters the type of vendor specific driver being
- * implemented from the common NVMe over TCP offload library.
- * Part of the overall exit sequence of unloading the implemented driver.
- */
-void nvme_tcp_ofld_unregister_dev(struct nvme_tcp_ofld_dev *dev)
-{
-	mutex_lock(&nvme_tcp_ofld_devices_mutex);
-	list_del(&dev->entry);
-	mutex_unlock(&nvme_tcp_ofld_devices_mutex);
-}
-EXPORT_SYMBOL_GPL(nvme_tcp_ofld_unregister_dev);
-
-/**
- * nvme_tcp_ofld_error_recovery() - NVMeTCP Offload library error recovery.
- * function.
- * @nctrl:	NVMe controller instance to change to resetting.
- *
- * API function that change the controller state to resseting.
- * Part of the overall controller reset sequence.
- */
-void nvme_tcp_ofld_error_recovery(struct nvme_ctrl *nctrl)
-{
-	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_RESETTING))
-		return;
-
-	queue_work(nvme_reset_wq, &to_tcp_ofld_ctrl(nctrl)->err_work);
-}
-EXPORT_SYMBOL_GPL(nvme_tcp_ofld_error_recovery);
-
-/**
- * nvme_tcp_ofld_report_queue_err() - NVMeTCP Offload report error event
- * callback function. Pointed to by nvme_tcp_ofld_queue->report_err.
- * @queue:	NVMeTCP offload queue instance on which the error has occurred.
- *
- * API function that allows the vendor specific offload driver to reports errors
- * to the common offload layer, to invoke error recovery.
- */
-int nvme_tcp_ofld_report_queue_err(struct nvme_tcp_ofld_queue *queue)
-{
-	pr_err("nvme-tcp-offload queue error\n");
-	nvme_tcp_ofld_error_recovery(&queue->ctrl->nctrl);
-
-	return 0;
-}
-
-/**
- * nvme_tcp_ofld_req_done() - NVMeTCP Offload request done callback
- * function. Pointed to by nvme_tcp_ofld_req->done.
- * Handles both NVME_TCP_F_DATA_SUCCESS flag and NVMe CQ.
- * @req:	NVMeTCP offload request to complete.
- * @result:     The nvme_result.
- * @status:     The completion status.
- *
- * API function that allows the vendor specific offload driver to report request
- * completions to the common offload layer.
- */
-void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,
-			    union nvme_result *result,
-			    __le16 status)
-{
-	struct request *rq = blk_mq_rq_from_pdu(req);
-
-	if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), *result))
-		nvme_complete_rq(rq);
-}
-
-/**
- * nvme_tcp_ofld_async_req_done() - NVMeTCP Offload request done callback
- * function for async request. Pointed to by nvme_tcp_ofld_req->done.
- * Handles both NVME_TCP_F_DATA_SUCCESS flag and NVMe CQ.
- * @req:	NVMeTCP offload request to complete.
- * @result:     The nvme_result.
- * @status:     The completion status.
- *
- * API function that allows the vendor specific offload driver to report request
- * completions to the common offload layer.
- */
-void nvme_tcp_ofld_async_req_done(struct nvme_tcp_ofld_req *req,
-				  union nvme_result *result, __le16 status)
-{
-	struct nvme_tcp_ofld_queue *queue = req->queue;
-	struct nvme_tcp_ofld_ctrl *ctrl = queue->ctrl;
-
-	nvme_complete_async_event(&ctrl->nctrl, status, result);
-}
-
-static struct nvme_tcp_ofld_dev *
-nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl)
-{
-	struct nvme_tcp_ofld_dev *dev;
-
-	mutex_lock(&nvme_tcp_ofld_devices_mutex);
-	list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) {
-		if (dev->ops->claim_dev(dev, ctrl))
-			goto out;
-	}
-
-	dev = NULL;
-out:
-	mutex_unlock(&nvme_tcp_ofld_devices_mutex);
-
-	return dev;
-}
-
-static struct blk_mq_tag_set *
-nvme_tcp_ofld_alloc_tagset(struct nvme_ctrl *nctrl, bool admin)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct blk_mq_tag_set *set;
-	int rc;
-
-	if (admin) {
-		set = &ctrl->admin_tag_set;
-		memset(set, 0, sizeof(*set));
-		set->ops = &nvme_tcp_ofld_admin_mq_ops;
-		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
-		set->reserved_tags = NVMF_RESERVED_TAGS;
-		set->numa_node = nctrl->numa_node;
-		set->flags = BLK_MQ_F_BLOCKING;
-		set->cmd_size = sizeof(struct nvme_tcp_ofld_req);
-		set->driver_data = ctrl;
-		set->nr_hw_queues = 1;
-		set->timeout = NVME_ADMIN_TIMEOUT;
-	} else {
-		set = &ctrl->tag_set;
-		memset(set, 0, sizeof(*set));
-		set->ops = &nvme_tcp_ofld_mq_ops;
-		set->queue_depth = nctrl->sqsize + 1;
-		set->reserved_tags = NVMF_RESERVED_TAGS;
-		set->numa_node = nctrl->numa_node;
-		set->flags = BLK_MQ_F_SHOULD_MERGE;
-		set->cmd_size = sizeof(struct nvme_tcp_ofld_req);
-		set->driver_data = ctrl;
-		set->nr_hw_queues = nctrl->queue_count - 1;
-		set->timeout = NVME_IO_TIMEOUT;
-		set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
-	}
-
-	rc = blk_mq_alloc_tag_set(set);
-	if (rc)
-		return ERR_PTR(rc);
-
-	return set;
-}
-
-static void __nvme_tcp_ofld_stop_queue(struct nvme_tcp_ofld_queue *queue)
-{
-	queue->dev->ops->drain_queue(queue);
-}
-
-static void nvme_tcp_ofld_stop_queue(struct nvme_ctrl *nctrl, int qid)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[qid];
-
-	mutex_lock(&queue->queue_lock);
-	if (test_and_clear_bit(NVME_TCP_OFLD_Q_LIVE, &queue->flags))
-		__nvme_tcp_ofld_stop_queue(queue);
-	mutex_unlock(&queue->queue_lock);
-}
-
-static void nvme_tcp_ofld_stop_io_queues(struct nvme_ctrl *ctrl)
-{
-	int i;
-
-	for (i = 1; i < ctrl->queue_count; i++)
-		nvme_tcp_ofld_stop_queue(ctrl, i);
-}
-
-static void __nvme_tcp_ofld_free_queue(struct nvme_tcp_ofld_queue *queue)
-{
-	queue->dev->ops->destroy_queue(queue);
-}
-
-static void nvme_tcp_ofld_free_queue(struct nvme_ctrl *nctrl, int qid)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[qid];
-
-	if (test_and_clear_bit(NVME_TCP_OFLD_Q_ALLOCATED, &queue->flags)) {
-		__nvme_tcp_ofld_free_queue(queue);
-		mutex_destroy(&queue->queue_lock);
-	}
-}
-
-static void
-nvme_tcp_ofld_free_io_queues(struct nvme_ctrl *nctrl)
-{
-	int i;
-
-	for (i = 1; i < nctrl->queue_count; i++)
-		nvme_tcp_ofld_free_queue(nctrl, i);
-}
-
-static void nvme_tcp_ofld_destroy_io_queues(struct nvme_ctrl *nctrl, bool remove)
-{
-	nvme_tcp_ofld_stop_io_queues(nctrl);
-	if (remove) {
-		blk_cleanup_queue(nctrl->connect_q);
-		blk_mq_free_tag_set(nctrl->tagset);
-	}
-	nvme_tcp_ofld_free_io_queues(nctrl);
-}
-
-static void nvme_tcp_ofld_destroy_admin_queue(struct nvme_ctrl *nctrl, bool remove)
-{
-	nvme_tcp_ofld_stop_queue(nctrl, 0);
-	if (remove) {
-		blk_cleanup_queue(nctrl->admin_q);
-		blk_cleanup_queue(nctrl->fabrics_q);
-		blk_mq_free_tag_set(nctrl->admin_tagset);
-	}
-	nvme_tcp_ofld_free_queue(nctrl, 0);
-}
-
-static int nvme_tcp_ofld_start_queue(struct nvme_ctrl *nctrl, int qid)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[qid];
-	int rc;
-
-	queue = &ctrl->queues[qid];
-	if (qid) {
-		queue->cmnd_capsule_len = nctrl->ioccsz * 16;
-		rc = nvmf_connect_io_queue(nctrl, qid, false);
-	} else {
-		queue->cmnd_capsule_len = sizeof(struct nvme_command) + NVME_TCP_ADMIN_CCSZ;
-		rc = nvmf_connect_admin_queue(nctrl);
-	}
-
-	if (!rc) {
-		set_bit(NVME_TCP_OFLD_Q_LIVE, &queue->flags);
-	} else {
-		if (test_bit(NVME_TCP_OFLD_Q_ALLOCATED, &queue->flags))
-			__nvme_tcp_ofld_stop_queue(queue);
-		dev_err(nctrl->device,
-			"failed to connect queue: %d ret=%d\n", qid, rc);
-	}
-
-	return rc;
-}
-
-static int nvme_tcp_ofld_configure_admin_queue(struct nvme_ctrl *nctrl,
-					       bool new)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[0];
-	int rc;
-
-	mutex_init(&queue->queue_lock);
-
-	rc = ctrl->dev->ops->create_queue(queue, 0, NVME_AQ_DEPTH);
-	if (rc)
-		return rc;
-
-	set_bit(NVME_TCP_OFLD_Q_ALLOCATED, &queue->flags);
-	if (new) {
-		nctrl->admin_tagset =
-				nvme_tcp_ofld_alloc_tagset(nctrl, true);
-		if (IS_ERR(nctrl->admin_tagset)) {
-			rc = PTR_ERR(nctrl->admin_tagset);
-			nctrl->admin_tagset = NULL;
-			goto out_free_queue;
-		}
-
-		nctrl->fabrics_q = blk_mq_init_queue(nctrl->admin_tagset);
-		if (IS_ERR(nctrl->fabrics_q)) {
-			rc = PTR_ERR(nctrl->fabrics_q);
-			nctrl->fabrics_q = NULL;
-			goto out_free_tagset;
-		}
-
-		nctrl->admin_q = blk_mq_init_queue(nctrl->admin_tagset);
-		if (IS_ERR(nctrl->admin_q)) {
-			rc = PTR_ERR(nctrl->admin_q);
-			nctrl->admin_q = NULL;
-			goto out_cleanup_fabrics_q;
-		}
-	}
-
-	rc = nvme_tcp_ofld_start_queue(nctrl, 0);
-	if (rc)
-		goto out_cleanup_queue;
-
-	rc = nvme_enable_ctrl(nctrl);
-	if (rc)
-		goto out_stop_queue;
-
-	blk_mq_unquiesce_queue(nctrl->admin_q);
-
-	rc = nvme_init_ctrl_finish(nctrl);
-	if (rc)
-		goto out_quiesce_queue;
-
-	return 0;
-
-out_quiesce_queue:
-	blk_mq_quiesce_queue(nctrl->admin_q);
-	blk_sync_queue(nctrl->admin_q);
-out_stop_queue:
-	nvme_tcp_ofld_stop_queue(nctrl, 0);
-	nvme_cancel_admin_tagset(nctrl);
-out_cleanup_queue:
-	if (new)
-		blk_cleanup_queue(nctrl->admin_q);
-out_cleanup_fabrics_q:
-	if (new)
-		blk_cleanup_queue(nctrl->fabrics_q);
-out_free_tagset:
-	if (new)
-		blk_mq_free_tag_set(nctrl->admin_tagset);
-out_free_queue:
-	nvme_tcp_ofld_free_queue(nctrl, 0);
-
-	return rc;
-}
-
-static unsigned int nvme_tcp_ofld_nr_io_queues(struct nvme_ctrl *nctrl)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvme_tcp_ofld_dev *dev = ctrl->dev;
-	u32 hw_vectors = dev->num_hw_vectors;
-	u32 nr_write_queues, nr_poll_queues;
-	u32 nr_io_queues, nr_total_queues;
-
-	nr_io_queues = min3(nctrl->opts->nr_io_queues, num_online_cpus(),
-			    hw_vectors);
-	nr_write_queues = min3(nctrl->opts->nr_write_queues, num_online_cpus(),
-			       hw_vectors);
-	nr_poll_queues = min3(nctrl->opts->nr_poll_queues, num_online_cpus(),
-			      hw_vectors);
-
-	nr_total_queues = nr_io_queues + nr_write_queues + nr_poll_queues;
-
-	return nr_total_queues;
-}
-
-static void
-nvme_tcp_ofld_set_io_queues(struct nvme_ctrl *nctrl, unsigned int nr_io_queues)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvmf_ctrl_options *opts = nctrl->opts;
-
-	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
-		/*
-		 * separate read/write queues
-		 * hand out dedicated default queues only after we have
-		 * sufficient read queues.
-		 */
-		ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
-		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
-		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
-			min(opts->nr_write_queues, nr_io_queues);
-		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
-	} else {
-		/*
-		 * shared read/write queues
-		 * either no write queues were requested, or we don't have
-		 * sufficient queue count to have dedicated default queues.
-		 */
-		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
-			min(opts->nr_io_queues, nr_io_queues);
-		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
-	}
-
-	if (opts->nr_poll_queues && nr_io_queues) {
-		/* map dedicated poll queues only if we have queues left */
-		ctrl->io_queues[HCTX_TYPE_POLL] =
-			min(opts->nr_poll_queues, nr_io_queues);
-	}
-}
-
-static int nvme_tcp_ofld_create_io_queues(struct nvme_ctrl *nctrl)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	int i, rc;
-
-	for (i = 1; i < nctrl->queue_count; i++) {
-		mutex_init(&ctrl->queues[i].queue_lock);
-
-		rc = ctrl->dev->ops->create_queue(&ctrl->queues[i],
-						  i, nctrl->sqsize + 1);
-		if (rc)
-			goto out_free_queues;
-
-		set_bit(NVME_TCP_OFLD_Q_ALLOCATED, &ctrl->queues[i].flags);
-	}
-
-	return 0;
-
-out_free_queues:
-	for (i--; i >= 1; i--)
-		nvme_tcp_ofld_free_queue(nctrl, i);
-
-	return rc;
-}
-
-static int nvme_tcp_ofld_alloc_io_queues(struct nvme_ctrl *nctrl)
-{
-	unsigned int nr_io_queues;
-	int rc;
-
-	nr_io_queues = nvme_tcp_ofld_nr_io_queues(nctrl);
-	rc = nvme_set_queue_count(nctrl, &nr_io_queues);
-	if (rc)
-		return rc;
-
-	nctrl->queue_count = nr_io_queues + 1;
-	if (nctrl->queue_count < 2) {
-		dev_err(nctrl->device,
-			"unable to set any I/O queues\n");
-
-		return -ENOMEM;
-	}
-
-	dev_info(nctrl->device, "creating %d I/O queues.\n", nr_io_queues);
-	nvme_tcp_ofld_set_io_queues(nctrl, nr_io_queues);
-
-	return nvme_tcp_ofld_create_io_queues(nctrl);
-}
-
-static int nvme_tcp_ofld_start_io_queues(struct nvme_ctrl *nctrl)
-{
-	int i, rc = 0;
-
-	for (i = 1; i < nctrl->queue_count; i++) {
-		rc = nvme_tcp_ofld_start_queue(nctrl, i);
-		if (rc)
-			goto out_stop_queues;
-	}
-
-	return 0;
-
-out_stop_queues:
-	for (i--; i >= 1; i--)
-		nvme_tcp_ofld_stop_queue(nctrl, i);
-
-	return rc;
-}
-
-static int
-nvme_tcp_ofld_configure_io_queues(struct nvme_ctrl *nctrl, bool new)
-{
-	int rc = nvme_tcp_ofld_alloc_io_queues(nctrl);
-
-	if (rc)
-		return rc;
-
-	if (new) {
-		nctrl->tagset = nvme_tcp_ofld_alloc_tagset(nctrl, false);
-		if (IS_ERR(nctrl->tagset)) {
-			rc = PTR_ERR(nctrl->tagset);
-			nctrl->tagset = NULL;
-			goto out_free_io_queues;
-		}
-
-		nctrl->connect_q = blk_mq_init_queue(nctrl->tagset);
-		if (IS_ERR(nctrl->connect_q)) {
-			rc = PTR_ERR(nctrl->connect_q);
-			nctrl->connect_q = NULL;
-			goto out_free_tag_set;
-		}
-	}
-
-	rc = nvme_tcp_ofld_start_io_queues(nctrl);
-	if (rc)
-		goto out_cleanup_connect_q;
-
-	if (!new) {
-		nvme_start_queues(nctrl);
-		if (!nvme_wait_freeze_timeout(nctrl, NVME_IO_TIMEOUT)) {
-			/*
-			 * If we timed out waiting for freeze we are likely to
-			 * be stuck.  Fail the controller initialization just
-			 * to be safe.
-			 */
-			rc = -ENODEV;
-			goto out_wait_freeze_timed_out;
-		}
-		blk_mq_update_nr_hw_queues(nctrl->tagset, nctrl->queue_count - 1);
-		nvme_unfreeze(nctrl);
-	}
-
-	return 0;
-
-out_wait_freeze_timed_out:
-	nvme_stop_queues(nctrl);
-	nvme_sync_io_queues(nctrl);
-	nvme_tcp_ofld_stop_io_queues(nctrl);
-out_cleanup_connect_q:
-	nvme_cancel_tagset(nctrl);
-	if (new)
-		blk_cleanup_queue(nctrl->connect_q);
-out_free_tag_set:
-	if (new)
-		blk_mq_free_tag_set(nctrl->tagset);
-out_free_io_queues:
-	nvme_tcp_ofld_free_io_queues(nctrl);
-
-	return rc;
-}
-
-static void nvme_tcp_ofld_reconnect_or_remove(struct nvme_ctrl *nctrl)
-{
-	/* If we are resetting/deleting then do nothing */
-	if (nctrl->state != NVME_CTRL_CONNECTING) {
-		WARN_ON_ONCE(nctrl->state == NVME_CTRL_NEW ||
-			     nctrl->state == NVME_CTRL_LIVE);
-
-		return;
-	}
-
-	if (nvmf_should_reconnect(nctrl)) {
-		dev_info(nctrl->device, "Reconnecting in %d seconds...\n",
-			 nctrl->opts->reconnect_delay);
-		queue_delayed_work(nvme_wq,
-				   &to_tcp_ofld_ctrl(nctrl)->connect_work,
-				   nctrl->opts->reconnect_delay * HZ);
-	} else {
-		dev_info(nctrl->device, "Removing controller...\n");
-		nvme_delete_ctrl(nctrl);
-	}
-}
-
-static int
-nvme_tcp_ofld_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-			      unsigned int hctx_idx)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = data;
-
-	hctx->driver_data = &ctrl->queues[0];
-
-	return 0;
-}
-
-static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvmf_ctrl_options *opts = nctrl->opts;
-	int rc = 0;
-
-	rc = ctrl->dev->ops->setup_ctrl(ctrl);
-	if (rc)
-		return rc;
-
-	rc = nvme_tcp_ofld_configure_admin_queue(nctrl, new);
-	if (rc)
-		goto out_release_ctrl;
-
-	if (nctrl->icdoff) {
-		dev_err(nctrl->device, "icdoff is not supported!\n");
-		rc = -EINVAL;
-		goto destroy_admin;
-	}
-
-	if (!(nctrl->sgls & ((1 << 0) | (1 << 1)))) {
-		dev_err(nctrl->device, "Mandatory sgls are not supported!\n");
-		goto destroy_admin;
-	}
-
-	if (opts->queue_size > nctrl->sqsize + 1)
-		dev_warn(nctrl->device,
-			 "queue_size %zu > ctrl sqsize %u, clamping down\n",
-			 opts->queue_size, nctrl->sqsize + 1);
-
-	if (nctrl->sqsize + 1 > nctrl->maxcmd) {
-		dev_warn(nctrl->device,
-			 "sqsize %u > ctrl maxcmd %u, clamping down\n",
-			 nctrl->sqsize + 1, nctrl->maxcmd);
-		nctrl->sqsize = nctrl->maxcmd - 1;
-	}
-
-	if (nctrl->queue_count > 1) {
-		rc = nvme_tcp_ofld_configure_io_queues(nctrl, new);
-		if (rc)
-			goto destroy_admin;
-	}
-
-	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_LIVE)) {
-		/*
-		 * state change failure is ok if we started ctrl delete,
-		 * unless we're during creation of a new controller to
-		 * avoid races with teardown flow.
-		 */
-		WARN_ON_ONCE(nctrl->state != NVME_CTRL_DELETING &&
-			     nctrl->state != NVME_CTRL_DELETING_NOIO);
-		WARN_ON_ONCE(new);
-		rc = -EINVAL;
-		goto destroy_io;
-	}
-
-	nvme_start_ctrl(nctrl);
-
-	return 0;
-
-destroy_io:
-	if (nctrl->queue_count > 1) {
-		nvme_stop_queues(nctrl);
-		nvme_sync_io_queues(nctrl);
-		nvme_tcp_ofld_stop_io_queues(nctrl);
-		nvme_cancel_tagset(nctrl);
-		nvme_tcp_ofld_destroy_io_queues(nctrl, new);
-	}
-destroy_admin:
-	blk_mq_quiesce_queue(nctrl->admin_q);
-	blk_sync_queue(nctrl->admin_q);
-	nvme_tcp_ofld_stop_queue(nctrl, 0);
-	nvme_cancel_admin_tagset(nctrl);
-	nvme_tcp_ofld_destroy_admin_queue(nctrl, new);
-out_release_ctrl:
-	ctrl->dev->ops->release_ctrl(ctrl);
-
-	return rc;
-}
-
-static int
-nvme_tcp_ofld_check_dev_opts(struct nvmf_ctrl_options *opts,
-			     struct nvme_tcp_ofld_ops *ofld_ops)
-{
-	unsigned int nvme_tcp_ofld_opt_mask = NVMF_ALLOWED_OPTS |
-			ofld_ops->allowed_opts | ofld_ops->required_opts;
-	struct nvmf_ctrl_options dev_opts_mask;
-
-	if (opts->mask & ~nvme_tcp_ofld_opt_mask) {
-		pr_warn("One or more nvmf options missing from ofld drvr %s.\n",
-			ofld_ops->name);
-
-		dev_opts_mask.mask = nvme_tcp_ofld_opt_mask;
-
-		return nvmf_check_required_opts(&dev_opts_mask, opts->mask);
-	}
-
-	return 0;
-}
-
-static void nvme_tcp_ofld_free_ctrl(struct nvme_ctrl *nctrl)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-	struct nvme_tcp_ofld_dev *dev = ctrl->dev;
-
-	if (list_empty(&ctrl->list))
-		goto free_ctrl;
-
-	ctrl->dev->ops->release_ctrl(ctrl);
-
-	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
-	list_del(&ctrl->list);
-	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
-
-	nvmf_free_options(nctrl->opts);
-free_ctrl:
-	module_put(dev->ops->module);
-	kfree(ctrl->queues);
-	kfree(ctrl);
-}
-
-static void nvme_tcp_ofld_set_sg_null(struct nvme_command *c)
-{
-	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
-
-	sg->addr = 0;
-	sg->length = 0;
-	sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | NVME_SGL_FMT_TRANSPORT_A;
-}
-
-inline void nvme_tcp_ofld_set_sg_inline(struct nvme_tcp_ofld_queue *queue,
-					struct nvme_command *c, u32 data_len)
-{
-	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
-
-	sg->addr = cpu_to_le64(queue->ctrl->nctrl.icdoff);
-	sg->length = cpu_to_le32(data_len);
-	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
-}
-
-static void nvme_tcp_ofld_map_data(struct nvme_command *c, u32 data_len)
-{
-	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
-
-	sg->addr = 0;
-	sg->length = cpu_to_le32(data_len);
-	sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | NVME_SGL_FMT_TRANSPORT_A;
-}
-
-static void nvme_tcp_ofld_submit_async_event(struct nvme_ctrl *arg)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(arg);
-	struct nvme_tcp_ofld_queue *queue = &ctrl->queues[0];
-	struct nvme_tcp_ofld_dev *dev = queue->dev;
-	struct nvme_tcp_ofld_ops *ops = dev->ops;
-
-	ctrl->async_req.nvme_cmd.common.opcode = nvme_admin_async_event;
-	ctrl->async_req.nvme_cmd.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
-	ctrl->async_req.nvme_cmd.common.flags |= NVME_CMD_SGL_METABUF;
-
-	nvme_tcp_ofld_set_sg_null(&ctrl->async_req.nvme_cmd);
-
-	ctrl->async_req.async = true;
-	ctrl->async_req.queue = queue;
-	ctrl->async_req.done = nvme_tcp_ofld_async_req_done;
-
-	ops->send_req(&ctrl->async_req);
-}
-
-static void
-nvme_tcp_ofld_teardown_admin_queue(struct nvme_ctrl *nctrl, bool remove)
-{
-	blk_mq_quiesce_queue(nctrl->admin_q);
-	blk_sync_queue(nctrl->admin_q);
-
-	nvme_tcp_ofld_stop_queue(nctrl, 0);
-	nvme_cancel_admin_tagset(nctrl);
-
-	if (remove)
-		blk_mq_unquiesce_queue(nctrl->admin_q);
-
-	nvme_tcp_ofld_destroy_admin_queue(nctrl, remove);
-}
-
-static void
-nvme_tcp_ofld_teardown_io_queues(struct nvme_ctrl *nctrl, bool remove)
-{
-	if (nctrl->queue_count <= 1)
-		return;
-
-	blk_mq_quiesce_queue(nctrl->admin_q);
-	nvme_start_freeze(nctrl);
-	nvme_stop_queues(nctrl);
-	nvme_sync_io_queues(nctrl);
-	nvme_tcp_ofld_stop_io_queues(nctrl);
-	nvme_cancel_tagset(nctrl);
-
-	if (remove)
-		nvme_start_queues(nctrl);
-
-	nvme_tcp_ofld_destroy_io_queues(nctrl, remove);
-}
-
-static void nvme_tcp_ofld_reconnect_ctrl_work(struct work_struct *work)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl =
-				container_of(to_delayed_work(work),
-					     struct nvme_tcp_ofld_ctrl,
-					     connect_work);
-	struct nvme_ctrl *nctrl = &ctrl->nctrl;
-
-	++nctrl->nr_reconnects;
-
-	if (nvme_tcp_ofld_setup_ctrl(nctrl, false))
-		goto requeue;
-
-	dev_info(nctrl->device, "Successfully reconnected (%d attempt)\n",
-		 nctrl->nr_reconnects);
-
-	nctrl->nr_reconnects = 0;
-
-	return;
-
-requeue:
-	dev_info(nctrl->device, "Failed reconnect attempt %d\n",
-		 nctrl->nr_reconnects);
-	nvme_tcp_ofld_reconnect_or_remove(nctrl);
-}
-
-static void nvme_tcp_ofld_error_recovery_work(struct work_struct *work)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl =
-		container_of(work, struct nvme_tcp_ofld_ctrl, err_work);
-	struct nvme_ctrl *nctrl = &ctrl->nctrl;
-
-	nvme_stop_keep_alive(nctrl);
-	nvme_tcp_ofld_teardown_io_queues(nctrl, false);
-	/* unquiesce to fail fast pending requests */
-	nvme_start_queues(nctrl);
-	nvme_tcp_ofld_teardown_admin_queue(nctrl, false);
-	blk_mq_unquiesce_queue(nctrl->admin_q);
-
-	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_CONNECTING)) {
-		/* state change failure is ok if we started nctrl delete */
-		WARN_ON_ONCE(nctrl->state != NVME_CTRL_DELETING &&
-			     nctrl->state != NVME_CTRL_DELETING_NOIO);
-
-		return;
-	}
-
-	nvme_tcp_ofld_reconnect_or_remove(nctrl);
-}
-
-static void
-nvme_tcp_ofld_teardown_ctrl(struct nvme_ctrl *nctrl, bool shutdown)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = to_tcp_ofld_ctrl(nctrl);
-
-	cancel_work_sync(&ctrl->err_work);
-	cancel_delayed_work_sync(&ctrl->connect_work);
-	nvme_tcp_ofld_teardown_io_queues(nctrl, shutdown);
-	blk_mq_quiesce_queue(nctrl->admin_q);
-	if (shutdown)
-		nvme_shutdown_ctrl(nctrl);
-	else
-		nvme_disable_ctrl(nctrl);
-	nvme_tcp_ofld_teardown_admin_queue(nctrl, shutdown);
-}
-
-static void nvme_tcp_ofld_delete_ctrl(struct nvme_ctrl *nctrl)
-{
-	nvme_tcp_ofld_teardown_ctrl(nctrl, true);
-}
-
-static void nvme_tcp_ofld_reset_ctrl_work(struct work_struct *work)
-{
-	struct nvme_ctrl *nctrl =
-		container_of(work, struct nvme_ctrl, reset_work);
-
-	nvme_stop_ctrl(nctrl);
-	nvme_tcp_ofld_teardown_ctrl(nctrl, false);
-
-	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_CONNECTING)) {
-		/* state change failure is ok if we started ctrl delete */
-		WARN_ON_ONCE(nctrl->state != NVME_CTRL_DELETING &&
-			     nctrl->state != NVME_CTRL_DELETING_NOIO);
-
-		return;
-	}
-
-	if (nvme_tcp_ofld_setup_ctrl(nctrl, false))
-		goto out_fail;
-
-	return;
-
-out_fail:
-	++nctrl->nr_reconnects;
-	nvme_tcp_ofld_reconnect_or_remove(nctrl);
-}
-
-static int
-nvme_tcp_ofld_init_request(struct blk_mq_tag_set *set,
-			   struct request *rq,
-			   unsigned int hctx_idx,
-			   unsigned int numa_node)
-{
-	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_tcp_ofld_ctrl *ctrl = set->driver_data;
-	int qid;
-
-	qid = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
-	req->queue = &ctrl->queues[qid];
-	nvme_req(rq)->ctrl = &ctrl->nctrl;
-	nvme_req(rq)->cmd = &req->nvme_cmd;
-	req->done = nvme_tcp_ofld_req_done;
-
-	return 0;
-}
-
-inline size_t nvme_tcp_ofld_inline_data_size(struct nvme_tcp_ofld_queue *queue)
-{
-	return queue->cmnd_capsule_len - sizeof(struct nvme_command);
-}
-EXPORT_SYMBOL_GPL(nvme_tcp_ofld_inline_data_size);
-
-static blk_status_t
-nvme_tcp_ofld_queue_rq(struct blk_mq_hw_ctx *hctx,
-		       const struct blk_mq_queue_data *bd)
-{
-	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(bd->rq);
-	struct nvme_tcp_ofld_queue *queue = hctx->driver_data;
-	struct nvme_tcp_ofld_ctrl *ctrl = queue->ctrl;
-	struct nvme_ns *ns = hctx->queue->queuedata;
-	struct nvme_tcp_ofld_dev *dev = queue->dev;
-	struct nvme_tcp_ofld_ops *ops = dev->ops;
-	struct nvme_command *nvme_cmd;
-	struct request *rq = bd->rq;
-	bool queue_ready;
-	u32 data_len;
-	int rc;
-
-	queue_ready = test_bit(NVME_TCP_OFLD_Q_LIVE, &queue->flags);
-
-	req->async = false;
-
-	if (!nvme_check_ready(&ctrl->nctrl, rq, queue_ready))
-		return nvme_fail_nonready_command(&ctrl->nctrl, rq);
-
-	rc = nvme_setup_cmd(ns, rq);
-	if (unlikely(rc))
-		return rc;
-
-	blk_mq_start_request(rq);
-
-	nvme_cmd = &req->nvme_cmd;
-	nvme_cmd->common.flags |= NVME_CMD_SGL_METABUF;
-
-	data_len = blk_rq_nr_phys_segments(rq) ? blk_rq_payload_bytes(rq) : 0;
-	if (!data_len)
-		nvme_tcp_ofld_set_sg_null(&req->nvme_cmd);
-	else if ((rq_data_dir(rq) == WRITE) &&
-		 data_len <= nvme_tcp_ofld_inline_data_size(queue))
-		nvme_tcp_ofld_set_sg_inline(queue, nvme_cmd, data_len);
-	else
-		nvme_tcp_ofld_map_data(nvme_cmd, data_len);
-
-	rc = ops->send_req(req);
-	if (unlikely(rc))
-		return rc;
-
-	return BLK_STS_OK;
-}
-
-static void
-nvme_tcp_ofld_exit_request(struct blk_mq_tag_set *set,
-			   struct request *rq, unsigned int hctx_idx)
-{
-	/*
-	 * Nothing is allocated in nvme_tcp_ofld_init_request,
-	 * hence empty.
-	 */
-}
-
-static int
-nvme_tcp_ofld_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-			unsigned int hctx_idx)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = data;
-
-	hctx->driver_data = &ctrl->queues[hctx_idx + 1];
-
-	return 0;
-}
-
-static int nvme_tcp_ofld_map_queues(struct blk_mq_tag_set *set)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl = set->driver_data;
-	struct nvmf_ctrl_options *opts = ctrl->nctrl.opts;
-
-	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
-		/* separate read/write queues */
-		set->map[HCTX_TYPE_DEFAULT].nr_queues =
-			ctrl->io_queues[HCTX_TYPE_DEFAULT];
-		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
-		set->map[HCTX_TYPE_READ].nr_queues =
-			ctrl->io_queues[HCTX_TYPE_READ];
-		set->map[HCTX_TYPE_READ].queue_offset =
-			ctrl->io_queues[HCTX_TYPE_DEFAULT];
-	} else {
-		/* shared read/write queues */
-		set->map[HCTX_TYPE_DEFAULT].nr_queues =
-			ctrl->io_queues[HCTX_TYPE_DEFAULT];
-		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
-		set->map[HCTX_TYPE_READ].nr_queues =
-			ctrl->io_queues[HCTX_TYPE_DEFAULT];
-		set->map[HCTX_TYPE_READ].queue_offset = 0;
-	}
-	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
-	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
-
-	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
-		/* map dedicated poll queues only if we have queues left */
-		set->map[HCTX_TYPE_POLL].nr_queues =
-				ctrl->io_queues[HCTX_TYPE_POLL];
-		set->map[HCTX_TYPE_POLL].queue_offset =
-			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
-			ctrl->io_queues[HCTX_TYPE_READ];
-		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
-	}
-
-	dev_info(ctrl->nctrl.device,
-		 "mapped %d/%d/%d default/read/poll queues.\n",
-		 ctrl->io_queues[HCTX_TYPE_DEFAULT],
-		 ctrl->io_queues[HCTX_TYPE_READ],
-		 ctrl->io_queues[HCTX_TYPE_POLL]);
-
-	return 0;
-}
-
-static int nvme_tcp_ofld_poll(struct blk_mq_hw_ctx *hctx)
-{
-	struct nvme_tcp_ofld_queue *queue = hctx->driver_data;
-	struct nvme_tcp_ofld_dev *dev = queue->dev;
-	struct nvme_tcp_ofld_ops *ops = dev->ops;
-
-	return ops->poll_queue(queue);
-}
-
-static void nvme_tcp_ofld_complete_timed_out(struct request *rq)
-{
-	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_ctrl *nctrl = &req->queue->ctrl->nctrl;
-
-	nvme_tcp_ofld_stop_queue(nctrl, nvme_tcp_ofld_qid(req->queue));
-	if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
-		nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
-		blk_mq_complete_request(rq);
-	}
-}
-
-static enum blk_eh_timer_return nvme_tcp_ofld_timeout(struct request *rq, bool reserved)
-{
-	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_tcp_ofld_ctrl *ctrl = req->queue->ctrl;
-
-	dev_warn(ctrl->nctrl.device,
-		 "queue %d: timeout request %#x type %d\n",
-		 nvme_tcp_ofld_qid(req->queue), rq->tag, req->nvme_cmd.common.opcode);
-
-	if (ctrl->nctrl.state != NVME_CTRL_LIVE) {
-		/*
-		 * If we are resetting, connecting or deleting we should
-		 * complete immediately because we may block controller
-		 * teardown or setup sequence
-		 * - ctrl disable/shutdown fabrics requests
-		 * - connect requests
-		 * - initialization admin requests
-		 * - I/O requests that entered after unquiescing and
-		 *   the controller stopped responding
-		 *
-		 * All other requests should be cancelled by the error
-		 * recovery work, so it's fine that we fail it here.
-		 */
-		nvme_tcp_ofld_complete_timed_out(rq);
-
-		return BLK_EH_DONE;
-	}
-
-	nvme_tcp_ofld_error_recovery(&ctrl->nctrl);
-
-	return BLK_EH_RESET_TIMER;
-}
-
-static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
-	.queue_rq	= nvme_tcp_ofld_queue_rq,
-	.complete	= nvme_complete_rq,
-	.init_request	= nvme_tcp_ofld_init_request,
-	.exit_request	= nvme_tcp_ofld_exit_request,
-	.init_hctx	= nvme_tcp_ofld_init_hctx,
-	.timeout	= nvme_tcp_ofld_timeout,
-	.map_queues	= nvme_tcp_ofld_map_queues,
-	.poll		= nvme_tcp_ofld_poll,
-};
-
-static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops = {
-	.queue_rq	= nvme_tcp_ofld_queue_rq,
-	.complete	= nvme_complete_rq,
-	.init_request	= nvme_tcp_ofld_init_request,
-	.exit_request	= nvme_tcp_ofld_exit_request,
-	.init_hctx	= nvme_tcp_ofld_init_admin_hctx,
-	.timeout	= nvme_tcp_ofld_timeout,
-};
-
-static const struct nvme_ctrl_ops nvme_tcp_ofld_ctrl_ops = {
-	.name			= "tcp_offload",
-	.module			= THIS_MODULE,
-	.flags			= NVME_F_FABRICS,
-	.reg_read32		= nvmf_reg_read32,
-	.reg_read64		= nvmf_reg_read64,
-	.reg_write32		= nvmf_reg_write32,
-	.free_ctrl		= nvme_tcp_ofld_free_ctrl,
-	.submit_async_event     = nvme_tcp_ofld_submit_async_event,
-	.delete_ctrl		= nvme_tcp_ofld_delete_ctrl,
-	.get_address		= nvmf_get_address,
-};
-
-static bool
-nvme_tcp_ofld_existing_controller(struct nvmf_ctrl_options *opts)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl;
-	bool found = false;
-
-	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
-	list_for_each_entry(ctrl, &nvme_tcp_ofld_ctrl_list, list) {
-		found = nvmf_ip_options_match(&ctrl->nctrl, opts);
-		if (found)
-			break;
-	}
-	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
-
-	return found;
-}
-
-static struct nvme_ctrl *
-nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
-{
-	struct nvme_tcp_ofld_queue *queue;
-	struct nvme_tcp_ofld_ctrl *ctrl;
-	struct nvme_tcp_ofld_dev *dev;
-	struct nvme_ctrl *nctrl;
-	int i, rc = 0;
-
-	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
-	if (!ctrl)
-		return ERR_PTR(-ENOMEM);
-
-	INIT_LIST_HEAD(&ctrl->list);
-	nctrl = &ctrl->nctrl;
-	nctrl->opts = opts;
-	nctrl->queue_count = opts->nr_io_queues + opts->nr_write_queues +
-			     opts->nr_poll_queues + 1;
-	nctrl->sqsize = opts->queue_size - 1;
-	nctrl->kato = opts->kato;
-	INIT_DELAYED_WORK(&ctrl->connect_work,
-			  nvme_tcp_ofld_reconnect_ctrl_work);
-	INIT_WORK(&ctrl->err_work, nvme_tcp_ofld_error_recovery_work);
-	INIT_WORK(&nctrl->reset_work, nvme_tcp_ofld_reset_ctrl_work);
-	if (!(opts->mask & NVMF_OPT_TRSVCID)) {
-		opts->trsvcid =
-			kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
-		if (!opts->trsvcid) {
-			rc = -ENOMEM;
-			goto out_free_ctrl;
-		}
-		opts->mask |= NVMF_OPT_TRSVCID;
-	}
-
-	rc = inet_pton_with_scope(&init_net, AF_UNSPEC, opts->traddr,
-				  opts->trsvcid,
-				  &ctrl->conn_params.remote_ip_addr);
-	if (rc) {
-		pr_err("malformed address passed: %s:%s\n",
-		       opts->traddr, opts->trsvcid);
-		goto out_free_ctrl;
-	}
-
-	if (opts->mask & NVMF_OPT_HOST_TRADDR) {
-		rc = inet_pton_with_scope(&init_net, AF_UNSPEC,
-					  opts->host_traddr, NULL,
-					  &ctrl->conn_params.local_ip_addr);
-		if (rc) {
-			pr_err("malformed src address passed: %s\n",
-			       opts->host_traddr);
-			goto out_free_ctrl;
-		}
-	}
-
-	if (!opts->duplicate_connect &&
-	    nvme_tcp_ofld_existing_controller(opts)) {
-		rc = -EALREADY;
-		goto out_free_ctrl;
-	}
-
-	/* Find device that can reach the dest addr */
-	dev = nvme_tcp_ofld_lookup_dev(ctrl);
-	if (!dev) {
-		pr_info("no device found for addr %s:%s.\n",
-			opts->traddr, opts->trsvcid);
-		rc = -EINVAL;
-		goto out_free_ctrl;
-	}
-
-	/* Increase driver refcnt */
-	if (!try_module_get(dev->ops->module)) {
-		pr_err("try_module_get failed\n");
-		dev = NULL;
-		goto out_free_ctrl;
-	}
-
-	rc = nvme_tcp_ofld_check_dev_opts(opts, dev->ops);
-	if (rc)
-		goto out_module_put;
-
-	ctrl->dev = dev;
-
-	if (ctrl->dev->ops->max_hw_sectors)
-		nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors;
-	if (ctrl->dev->ops->max_segments)
-		nctrl->max_segments = ctrl->dev->ops->max_segments;
-
-	ctrl->queues = kcalloc(nctrl->queue_count,
-			       sizeof(struct nvme_tcp_ofld_queue),
-			       GFP_KERNEL);
-	if (!ctrl->queues) {
-		rc = -ENOMEM;
-		goto out_module_put;
-	}
-
-	for (i = 0; i < nctrl->queue_count; ++i) {
-		queue = &ctrl->queues[i];
-		queue->ctrl = ctrl;
-		queue->dev = dev;
-		queue->report_err = nvme_tcp_ofld_report_queue_err;
-	}
-
-	rc = nvme_init_ctrl(nctrl, ndev, &nvme_tcp_ofld_ctrl_ops, 0);
-	if (rc)
-		goto out_free_queues;
-
-	if (!nvme_change_ctrl_state(nctrl, NVME_CTRL_CONNECTING)) {
-		WARN_ON_ONCE(1);
-		rc = -EINTR;
-		goto out_uninit_ctrl;
-	}
-
-	rc = nvme_tcp_ofld_setup_ctrl(nctrl, true);
-	if (rc)
-		goto out_uninit_ctrl;
-
-	dev_info(nctrl->device, "new ctrl: NQN \"%s\", addr %pISp\n",
-		 opts->subsysnqn, &ctrl->conn_params.remote_ip_addr);
-
-	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
-	list_add_tail(&ctrl->list, &nvme_tcp_ofld_ctrl_list);
-	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
-
-	return nctrl;
-
-out_uninit_ctrl:
-	nvme_uninit_ctrl(nctrl);
-	nvme_put_ctrl(nctrl);
-out_free_queues:
-	kfree(ctrl->queues);
-out_module_put:
-	module_put(dev->ops->module);
-out_free_ctrl:
-	kfree(ctrl);
-
-	return ERR_PTR(rc);
-}
-
-static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
-	.name		= "tcp_offload",
-	.module		= THIS_MODULE,
-	.required_opts	= NVMF_OPT_TRADDR,
-	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_NR_WRITE_QUEUES  |
-			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
-			  NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST |
-			  NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES |
-			  NVMF_OPT_TOS,
-	.create_ctrl	= nvme_tcp_ofld_create_ctrl,
-};
-
-static int __init nvme_tcp_ofld_init_module(void)
-{
-	nvmf_register_transport(&nvme_tcp_ofld_transport);
-
-	return 0;
-}
-
-static void __exit nvme_tcp_ofld_cleanup_module(void)
-{
-	struct nvme_tcp_ofld_ctrl *ctrl;
-
-	nvmf_unregister_transport(&nvme_tcp_ofld_transport);
-
-	mutex_lock(&nvme_tcp_ofld_ctrl_mutex);
-	list_for_each_entry(ctrl, &nvme_tcp_ofld_ctrl_list, list)
-		nvme_delete_ctrl(&ctrl->nctrl);
-	mutex_unlock(&nvme_tcp_ofld_ctrl_mutex);
-	flush_workqueue(nvme_delete_wq);
-}
-
-module_init(nvme_tcp_ofld_init_module);
-module_exit(nvme_tcp_ofld_cleanup_module);
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvme/host/tcp-offload.h b/drivers/nvme/host/tcp-offload.h
deleted file mode 100644
index 2ac5b2428612..000000000000
--- a/drivers/nvme/host/tcp-offload.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2021 Marvell. All rights reserved.
- */
-
-/* Linux includes */
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-#include <linux/types.h>
-#include <linux/nvme-tcp.h>
-
-/* Driver includes */
-#include "nvme.h"
-#include "fabrics.h"
-
-/* Forward declarations */
-struct nvme_tcp_ofld_ops;
-
-/* Representation of a vendor-specific device. This is the struct used to
- * register to the offload layer by the vendor-specific driver during its probe
- * function.
- * Allocated by vendor-specific driver.
- */
-struct nvme_tcp_ofld_dev {
-	struct list_head entry;
-	struct net_device *ndev;
-	struct nvme_tcp_ofld_ops *ops;
-
-	/* Vendor specific driver context */
-	int num_hw_vectors;
-};
-
-/* Per IO struct holding the nvme_request and command
- * Allocated by blk-mq.
- */
-struct nvme_tcp_ofld_req {
-	struct nvme_request req;
-	struct nvme_command nvme_cmd;
-	struct list_head queue_entry;
-	struct nvme_tcp_ofld_queue *queue;
-
-	/* Vendor specific driver context */
-	void *private_data;
-
-	/* async flag is used to distinguish between async and IO flow
-	 * in common send_req() of nvme_tcp_ofld_ops.
-	 */
-	bool async;
-
-	void (*done)(struct nvme_tcp_ofld_req *req,
-		     union nvme_result *result,
-		     __le16 status);
-};
-
-enum nvme_tcp_ofld_queue_flags {
-	NVME_TCP_OFLD_Q_ALLOCATED = 0,
-	NVME_TCP_OFLD_Q_LIVE = 1,
-};
-
-/* Allocated by nvme_tcp_ofld */
-struct nvme_tcp_ofld_queue {
-	/* Offload device associated to this queue */
-	struct nvme_tcp_ofld_dev *dev;
-	struct nvme_tcp_ofld_ctrl *ctrl;
-	unsigned long flags;
-	size_t cmnd_capsule_len;
-
-	/* mutex used during stop_queue */
-	struct mutex queue_lock;
-
-	u8 hdr_digest;
-	u8 data_digest;
-	u8 tos;
-
-	/* Vendor specific driver context */
-	void *private_data;
-
-	/* Error callback function */
-	int (*report_err)(struct nvme_tcp_ofld_queue *queue);
-};
-
-/* Connectivity (routing) params used for establishing a connection */
-struct nvme_tcp_ofld_ctrl_con_params {
-	struct sockaddr_storage remote_ip_addr;
-
-	/* If NVMF_OPT_HOST_TRADDR is provided it will be set in local_ip_addr
-	 * in nvme_tcp_ofld_create_ctrl().
-	 * If NVMF_OPT_HOST_TRADDR is not provided the local_ip_addr will be
-	 * initialized by claim_dev().
-	 */
-	struct sockaddr_storage local_ip_addr;
-};
-
-/* Allocated by nvme_tcp_ofld */
-struct nvme_tcp_ofld_ctrl {
-	struct nvme_ctrl nctrl;
-	struct list_head list;
-	struct nvme_tcp_ofld_dev *dev;
-
-	/* admin and IO queues */
-	struct blk_mq_tag_set tag_set;
-	struct blk_mq_tag_set admin_tag_set;
-	struct nvme_tcp_ofld_queue *queues;
-
-	struct work_struct err_work;
-	struct delayed_work connect_work;
-
-	/*
-	 * Each entry in the array indicates the number of queues of
-	 * corresponding type.
-	 */
-	u32 io_queues[HCTX_MAX_TYPES];
-
-	/* Connectivity params */
-	struct nvme_tcp_ofld_ctrl_con_params conn_params;
-
-	struct nvme_tcp_ofld_req async_req;
-
-	/* Vendor specific driver context */
-	void *private_data;
-};
-
-struct nvme_tcp_ofld_ops {
-	const char *name;
-	struct module *module;
-
-	/* For vendor-specific driver to report what opts it supports.
-	 * It could be different than the ULP supported opts due to hardware
-	 * limitations. Also it could be different among different vendor
-	 * drivers.
-	 */
-	int required_opts; /* bitmap using enum nvmf_parsing_opts */
-	int allowed_opts; /* bitmap using enum nvmf_parsing_opts */
-
-	/* For vendor-specific max num of segments and IO sizes */
-	u32 max_hw_sectors;
-	u32 max_segments;
-
-	/**
-	 * claim_dev: Return True if addr is reachable via offload device.
-	 * @dev: The offload device to check.
-	 * @ctrl: The offload ctrl have the conn_params field. The
-	 * conn_params is to be filled with routing params by the lower
-	 * driver.
-	 */
-	int (*claim_dev)(struct nvme_tcp_ofld_dev *dev,
-			 struct nvme_tcp_ofld_ctrl *ctrl);
-
-	/**
-	 * setup_ctrl: Setup device specific controller structures.
-	 * @ctrl: The offload ctrl.
-	 */
-	int (*setup_ctrl)(struct nvme_tcp_ofld_ctrl *ctrl);
-
-	/**
-	 * release_ctrl: Release/Free device specific controller structures.
-	 * @ctrl: The offload ctrl.
-	 */
-	int (*release_ctrl)(struct nvme_tcp_ofld_ctrl *ctrl);
-
-	/**
-	 * create_queue: Create offload queue and establish TCP + NVMeTCP
-	 * (icreq+icresp) connection. Return true on successful connection.
-	 * Based on nvme_tcp_alloc_queue.
-	 * @queue: The queue itself - used as input and output.
-	 * @qid: The queue ID associated with the requested queue.
-	 * @q_size: The queue depth.
-	 */
-	int (*create_queue)(struct nvme_tcp_ofld_queue *queue, int qid,
-			    size_t queue_size);
-
-	/**
-	 * drain_queue: Drain a given queue - blocking function call.
-	 * Return from this function ensures that no additional
-	 * completions will arrive on this queue and that the HW will
-	 * not access host memory.
-	 * @queue: The queue to drain.
-	 */
-	void (*drain_queue)(struct nvme_tcp_ofld_queue *queue);
-
-	/**
-	 * destroy_queue: Close the TCP + NVMeTCP connection of a given queue
-	 * and make sure its no longer active (no completions will arrive on the
-	 * queue).
-	 * @queue: The queue to destroy.
-	 */
-	void (*destroy_queue)(struct nvme_tcp_ofld_queue *queue);
-
-	/**
-	 * poll_queue: Poll a given queue for completions.
-	 * @queue: The queue to poll.
-	 */
-	int (*poll_queue)(struct nvme_tcp_ofld_queue *queue);
-
-	/**
-	 * send_req: Dispatch a request. Returns the execution status.
-	 * @req: Ptr to request to be sent.
-	 */
-	int (*send_req)(struct nvme_tcp_ofld_req *req);
-};
-
-/* Exported functions for lower vendor specific offload drivers */
-int nvme_tcp_ofld_register_dev(struct nvme_tcp_ofld_dev *dev);
-void nvme_tcp_ofld_unregister_dev(struct nvme_tcp_ofld_dev *dev);
-void nvme_tcp_ofld_error_recovery(struct nvme_ctrl *nctrl);
-inline size_t nvme_tcp_ofld_inline_data_size(struct nvme_tcp_ofld_queue *queue);

From db8f7be1e1d64fbf113a456ef94534fbf5e9a9af Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Jun 2021 21:25:15 +0800
Subject: [PATCH 1280/3292] net: sgi: ioc3-eth: check return value after
 calling platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sgi/ioc3-eth.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 6eef0f45b133..2b29fd4cbdf4 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -835,6 +835,10 @@ static int ioc3eth_probe(struct platform_device *pdev)
 	int err;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs) {
+		dev_err(&pdev->dev, "Invalid resource\n");
+		return -EINVAL;
+	}
 	/* get mac addr from one wire prom */
 	if (ioc3eth_get_mac_addr(regs, mac_addr))
 		return -EPROBE_DEFER; /* not available yet */

From 3a5a32b5f2c1c7a1657a44bb9bc571f0df1d5a81 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Jun 2021 21:36:55 +0800
Subject: [PATCH 1281/3292] net: stmmac: Use
 devm_platform_ioremap_resource_byname()

Use the devm_platform_ioremap_resource_byname() helper instead of
calling platform_get_resource_byname() and devm_ioremap_resource()
separately.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 84382fc5cc4d..5c74b6279d69 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -454,7 +454,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
 	struct stmmac_resources stmmac_res;
 	const struct ethqos_emac_driver_data *data;
 	struct qcom_ethqos *ethqos;
-	struct resource *res;
 	int ret;
 
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
@@ -474,8 +473,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
 	}
 
 	ethqos->pdev = pdev;
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rgmii");
-	ethqos->rgmii_base = devm_ioremap_resource(&pdev->dev, res);
+	ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii");
 	if (IS_ERR(ethqos->rgmii_base)) {
 		ret = PTR_ERR(ethqos->rgmii_base);
 		goto err_mem;

From e77e2cf4a198a3eb0fc25c13bbbbe07afb74079f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Jun 2021 21:45:37 +0800
Subject: [PATCH 1282/3292] net: ethernet: ti: am65-cpts: Use
 devm_platform_ioremap_resource_byname()

Use the devm_platform_ioremap_resource_byname() helper instead of
calling platform_get_resource_byname() and devm_ioremap_resource()
separately.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/am65-cpts.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
index 9caaae79fc95..c30a6e510aa3 100644
--- a/drivers/net/ethernet/ti/am65-cpts.c
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -1037,11 +1037,9 @@ static int am65_cpts_probe(struct platform_device *pdev)
 	struct device_node *node = pdev->dev.of_node;
 	struct device *dev = &pdev->dev;
 	struct am65_cpts *cpts;
-	struct resource *res;
 	void __iomem *base;
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cpts");
-	base = devm_ioremap_resource(dev, res);
+	base = devm_platform_ioremap_resource_byname(pdev, "cpts");
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 

From d8ea89fe8a49bfa18b009b16b66c137dba263f87 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Wed, 9 Jun 2021 15:47:13 +0200
Subject: [PATCH 1283/3292] mvpp2: prefetch right address

In the RX buffer, the received data starts after a headroom used to
align the IP header and to allow prepending headers efficiently.
The prefetch() should take this into account, and prefetch from
the very start of the received data.

We can see that ether_addr_equal_64bits(), which is the first function
to access the data, drops from the top of the perf top output.

prefetch(data):

Overhead  Shared Object     Symbol
  11.64%  [kernel]          [k] eth_type_trans

prefetch(data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM):

Overhead  Shared Object     Symbol
  13.42%  [kernel]          [k] build_skb
  10.35%  [mvpp2]           [k] mvpp2_rx
   9.35%  [kernel]          [k] __netif_receive_skb_core
   8.24%  [kernel]          [k] kmem_cache_free
   7.97%  [kernel]          [k] dev_gro_receive
   7.68%  [kernel]          [k] page_pool_put_page
   7.32%  [kernel]          [k] kmem_cache_alloc
   7.09%  [mvpp2]           [k] mvpp2_bm_pool_put
   3.36%  [kernel]          [k] eth_type_trans

Also, move the eth_type_trans() call a bit down, to give the RAM more
time to prefetch the data.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 5663c1b21870..07d8f3e31b52 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3938,7 +3938,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 			goto err_drop_frame;
 
 		/* Prefetch header */
-		prefetch(data);
+		prefetch(data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM);
 
 		if (bm_pool->frag_size > PAGE_SIZE)
 			frag_size = 0;
@@ -4008,8 +4008,8 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 
 		skb_reserve(skb, MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM);
 		skb_put(skb, rx_bytes);
-		skb->protocol = eth_type_trans(skb, dev);
 		mvpp2_rx_csum(port, rx_status, skb);
+		skb->protocol = eth_type_trans(skb, dev);
 
 		napi_gro_receive(napi, skb);
 		continue;

From 2f128eb3308a74ef478286b75e26aa6d0ed3c6a6 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Wed, 9 Jun 2021 15:47:14 +0200
Subject: [PATCH 1284/3292] mvpp2: prefetch page
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most of the time during the RX is caused by the compound_head() call
done at the end of the RX loop:

       │     build_skb():
       [...]
       │     static inline struct page *compound_head(struct page *page)
       │     {
       │     unsigned long head = READ_ONCE(page->compound_head);
 65.23 │       ldr  x2, [x1, #8]

Prefetch the page struct as soon as possible, to speedup the RX path
noticeabily by a ~3-4% packet rate in a drop test.

       │     build_skb():
       [...]
       │     static inline struct page *compound_head(struct page *page)
       │     {
       │     unsigned long head = READ_ONCE(page->compound_head);
 17.92 │       ldr  x2, [x1, #8]

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 07d8f3e31b52..9bca8c8f9f8d 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3900,15 +3900,19 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 		phys_addr_t phys_addr;
 		u32 rx_status, timestamp;
 		int pool, rx_bytes, err, ret;
+		struct page *page;
 		void *data;
 
+		phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
+		data = (void *)phys_to_virt(phys_addr);
+		page = virt_to_page(data);
+		prefetch(page);
+
 		rx_done++;
 		rx_status = mvpp2_rxdesc_status_get(port, rx_desc);
 		rx_bytes = mvpp2_rxdesc_size_get(port, rx_desc);
 		rx_bytes -= MVPP2_MH_SIZE;
 		dma_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc);
-		phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
-		data = (void *)phys_to_virt(phys_addr);
 
 		pool = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
 			MVPP2_RXD_BM_POOL_ID_OFFS;
@@ -3997,7 +4001,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 		}
 
 		if (pp)
-			skb_mark_for_recycle(skb, virt_to_page(data), pp);
+			skb_mark_for_recycle(skb, page, pp);
 		else
 			dma_unmap_single_attrs(dev->dev.parent, dma_addr,
 					       bm_pool->buf_size, DMA_FROM_DEVICE,

From ba539319cce68e670bfc16571da89066046719e2 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Jun 2021 21:51:38 +0800
Subject: [PATCH 1285/3292] net: ethernet: ti: cpsw-phy-sel: Use
 devm_platform_ioremap_resource_byname()

Use the devm_platform_ioremap_resource_byname() helper instead of
calling platform_get_resource_byname() and devm_ioremap_resource()
separately.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw-phy-sel.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index 6e72ecbe5cf7..e8f38e3f7706 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -206,7 +206,6 @@ static const struct of_device_id cpsw_phy_sel_id_table[] = {
 
 static int cpsw_phy_sel_probe(struct platform_device *pdev)
 {
-	struct resource	*res;
 	const struct of_device_id *of_id;
 	struct cpsw_phy_sel_priv *priv;
 
@@ -223,8 +222,7 @@ static int cpsw_phy_sel_probe(struct platform_device *pdev)
 	priv->dev = &pdev->dev;
 	priv->cpsw_phy_sel = of_id->data;
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gmii-sel");
-	priv->gmii_sel = devm_ioremap_resource(&pdev->dev, res);
+	priv->gmii_sel = devm_platform_ioremap_resource_byname(pdev, "gmii-sel");
 	if (IS_ERR(priv->gmii_sel))
 		return PTR_ERR(priv->gmii_sel);
 

From 345502af4e42cef57782118520c3c326b55f1071 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 Jun 2021 18:05:12 +0100
Subject: [PATCH 1286/3292] net: stmmac: Fix missing { } around two statements
 in an if statement

There are missing { } around a block of code on an if statement. Fix this
by adding them in.

Addresses-Coverity: ("Nesting level does not match indentation")
Fixes: 46682cb86a37 ("net: stmmac: enable Intel mGbE 2.5Gbps link speed")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1c881ec8cd04..1f817b1b890c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -932,9 +932,10 @@ static void stmmac_validate(struct phylink_config *config,
 		phylink_set(mask, 1000baseT_Full);
 		phylink_set(mask, 1000baseX_Full);
 	} else if (priv->plat->has_gmac4) {
-		if (!max_speed || max_speed >= 2500)
+		if (!max_speed || max_speed >= 2500) {
 			phylink_set(mac_supported, 2500baseT_Full);
 			phylink_set(mac_supported, 2500baseX_Full);
+		}
 	} else if (priv->plat->has_xgmac) {
 		if (!max_speed || (max_speed >= 2500)) {
 			phylink_set(mac_supported, 2500baseT_Full);

From f25247d88708ff0666573541923a7339845403de Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 Jun 2021 18:17:48 +0100
Subject: [PATCH 1287/3292] net: phy: realtek: net: Fix less than zero
 comparison of a u16

The comparisons of the u16 values priv->phycr1 and priv->phycr2 to less
than zero always false because they are unsigned. Fix this by using an
int for the assignment and less than zero check.

Addresses-Coverity: ("Unsigned compared against 0")
Fixes: 0a4355c2b7f8 ("net: phy: realtek: add dt property to disable CLKOUT clock")
Fixes: d90db36a9e74 ("net: phy: realtek: add dt property to enable ALDPS mode")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 1b844a06fe72..11be60333fa8 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -94,24 +94,25 @@ static int rtl821x_probe(struct phy_device *phydev)
 {
 	struct device *dev = &phydev->mdio.dev;
 	struct rtl821x_priv *priv;
+	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	priv->phycr1 = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR1);
-	if (priv->phycr1 < 0)
-		return priv->phycr1;
+	ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR1);
+	if (ret < 0)
+		return ret;
 
-	priv->phycr1 &= (RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF);
+	priv->phycr1 = ret & (RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF);
 	if (of_property_read_bool(dev->of_node, "realtek,aldps-enable"))
 		priv->phycr1 |= RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF;
 
-	priv->phycr2 = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2);
-	if (priv->phycr2 < 0)
-		return priv->phycr2;
+	ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2);
+	if (ret < 0)
+		return ret;
 
-	priv->phycr2 &= RTL8211F_CLKOUT_EN;
+	priv->phycr2 = ret & RTL8211F_CLKOUT_EN;
 	if (of_property_read_bool(dev->of_node, "realtek,clkout-disable"))
 		priv->phycr2 &= ~RTL8211F_CLKOUT_EN;
 

From 6fb566c9278a2ea65a12fdff665bead392f4c0d2 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 9 Jun 2021 14:25:06 +0000
Subject: [PATCH 1288/3292] net: ena: make symbol 'ena_alloc_map_page' static

The sparse tool complains as follows:

drivers/net/ethernet/amazon/ena/ena_netdev.c:978:13: warning:
 symbol 'ena_alloc_map_page' was not declared. Should it be static?

This symbol is not used outside of ena_netdev.c, so marks it static.

Fixes: 947c54c395cb ("net: ena: Use dev_alloc() in RX buffer allocation")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index cd6ea59c543c..3bb0e66b2c7e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -975,7 +975,8 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 		ena_free_rx_resources(adapter, i);
 }
 
-struct page *ena_alloc_map_page(struct ena_ring *rx_ring, dma_addr_t *dma)
+static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
+				       dma_addr_t *dma)
 {
 	struct page *page;
 

From 8b8701d0b4925807a6d4dc4699cb80a1e16218ad Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Wed, 9 Jun 2021 16:32:49 +0200
Subject: [PATCH 1289/3292] net: ethernet: rmnet: Always subtract MAP header

Commit e1d9a90a9bfd ("net: ethernet: rmnet: Support for ingress MAPv5
checksum offload") broke ingress handling for devices where
RMNET_FLAGS_INGRESS_MAP_CKSUMV5 or RMNET_FLAGS_INGRESS_MAP_CKSUMV4 are
not set. Unless either of these flags are set, the MAP header is not
removed. This commit restores the original logic by ensuring that the
MAP header is removed for all MAP packets.

Fixes: e1d9a90a9bfd ("net: ethernet: rmnet: Support for ingress MAPv5 checksum offload")
Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 2504d0363b6b..bfbd7847f946 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -88,11 +88,12 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 			goto free_skb;
 		skb_pull(skb, sizeof(*map_header));
 		rmnet_set_skb_proto(skb);
-	} else if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
+	} else {
 		/* Subtract MAP header */
 		skb_pull(skb, sizeof(*map_header));
 		rmnet_set_skb_proto(skb);
-		if (!rmnet_map_checksum_downlink_packet(skb, len + pad))
+		if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4 &&
+		    !rmnet_map_checksum_downlink_packet(skb, len + pad))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 

From f3b5a8907543e2c539d09d01a1732826e070d351 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 Jun 2021 18:56:57 +0100
Subject: [PATCH 1290/3292] mlxsw: thermal: Fix null dereference of NULL
 temperature parameter

The call to mlxsw_thermal_module_temp_and_thresholds_get passes a NULL
pointer for the temperature and this can be dereferenced in this function
if the mlxsw_reg_query call fails.  The simplist fix is to pass the
address of dummy temperature variable instead of a NULL pointer.

Addresses-Coverity: ("Explicit null dereferenced")
Fixes: 72a64c2fe9d8 ("mlxsw: thermal: Read module temperature thresholds using MTMP register")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index b96fb88aac0a..677a53f65008 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -742,7 +742,7 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
 			  struct mlxsw_thermal *thermal, u8 module)
 {
 	struct mlxsw_thermal_module *module_tz;
-	int crit_temp, emerg_temp;
+	int dummy_temp, crit_temp, emerg_temp;
 	u16 sensor_index;
 
 	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
@@ -757,7 +757,7 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
 	/* Initialize all trip point. */
 	mlxsw_thermal_module_trips_reset(module_tz);
 	/* Read module temperature and thresholds. */
-	mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, NULL,
+	mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, &dummy_temp,
 						     &crit_temp, &emerg_temp);
 	/* Update trip point according to the module data. */
 	return mlxsw_thermal_module_trips_update(dev, core, module_tz,

From 4744bf072b4640c5e2ea65c2361ad6c832f28fa8 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Wed, 9 Jun 2021 19:23:03 +0200
Subject: [PATCH 1291/3292] stmmac: prefetch right address

To support XDP, a headroom is prepended to the packet data.
Consider this offset when doing a prefetch.

Fixes: da5ec7f22a0f ("net: stmmac: refactor stmmac_init_rx_buffers for stmmac_reinit_rx_buffers")
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1f817b1b890c..180f347b4c8e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5131,7 +5131,7 @@ read_again:
 
 		/* Buffer is good. Go on. */
 
-		prefetch(page_address(buf->page));
+		prefetch(page_address(buf->page) + buf->page_offset);
 		if (buf->sec_page)
 			prefetch(page_address(buf->sec_page));
 

From ab324d8dfddad04bec0e8421242716504e31e204 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 Jun 2021 18:43:53 +0100
Subject: [PATCH 1292/3292] net: dsa: sja1105: Fix assigned yet unused return
 code rc

The return code variable rc is being set to return error values in two
places in sja1105_mdiobus_base_tx_register and yet it is not being
returned, the function always returns 0 instead. Fix this by replacing
the return 0 with the return code rc.

Addresses-Coverity: ("Unused value")
Fixes: 5a8f09748ee7 ("net: dsa: sja1105: register the MDIO buses for 100base-T1 and 100base-TX")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_mdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 8dfd06318b23..08517c70cb48 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -171,7 +171,7 @@ static int sja1105_mdiobus_base_tx_register(struct sja1105_private *priv,
 out_put_np:
 	of_node_put(np);
 
-	return 0;
+	return rc;
 }
 
 static void sja1105_mdiobus_base_tx_unregister(struct sja1105_private *priv)

From f636a83662ffdc3e05526770e73628485f4a53de Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:34:53 -0500
Subject: [PATCH 1293/3292] net: ipa: define IPA_MEM_END_MARKER

Define a new pseudo memory region identifer that specifies the
offset at the end of IPA resident memory.  Use it instead of
IPA_MEM_UC_EVENT_RING in places where the size of that region was
defined to be 0.

The size of the IPA_MEM_END_MARKER pseudo region must be zero.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-v4.11.c | 2 +-
 drivers/net/ipa/ipa_data-v4.2.c  | 2 +-
 drivers/net/ipa/ipa_mem.c        | 2 ++
 drivers/net/ipa/ipa_mem.h        | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c
index 05806ceae8b5..e7bdb8b4400e 100644
--- a/drivers/net/ipa/ipa_data-v4.11.c
+++ b/drivers/net/ipa/ipa_data-v4.11.c
@@ -325,7 +325,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x100c,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_UC_EVENT_RING] = {
+	[IPA_MEM_END_MARKER] = {
 		.offset		= 0x3000,
 		.size		= 0x0000,
 		.canary_count	= 1,
diff --git a/drivers/net/ipa/ipa_data-v4.2.c b/drivers/net/ipa/ipa_data-v4.2.c
index 8744f19c6401..95f75dbc3c3b 100644
--- a/drivers/net/ipa/ipa_data-v4.2.c
+++ b/drivers/net/ipa/ipa_data-v4.2.c
@@ -304,7 +304,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x140c,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_UC_EVENT_RING] = {
+	[IPA_MEM_END_MARKER] = {
 		.offset		= 0x2000,
 		.size		= 0,
 		.canary_count	= 1,
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 1624125e7459..e3c43cf6e441 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -120,6 +120,8 @@ static bool ipa_mem_valid(struct ipa *ipa, enum ipa_mem_id mem_id)
 	else if (mem->offset + mem->size > ipa->mem_size)
 		dev_err(dev, "region %u ends beyond memory limit (0x%08x)\n",
 			mem_id, ipa->mem_size);
+	else if (mem_id == IPA_MEM_END_MARKER && mem->size)
+		dev_err(dev, "non-zero end marker region size\n");
 	else
 		return true;
 
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index a422aec69e5d..5a4f865a45af 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -70,6 +70,7 @@ enum ipa_mem_id {
 	IPA_MEM_STATS_DROP,		/* 0 canaries (IPA v4.0 and above) */
 	IPA_MEM_MODEM,			/* 0/2 canaries */
 	IPA_MEM_UC_EVENT_RING,		/* 1 canary */
+	IPA_MEM_END_MARKER,		/* 1 canary (not a real region) */
 	IPA_MEM_COUNT,			/* Number of regions (not an index) */
 };
 

From 14ab6a208c11dcb7b91fda3e0866c7e6188dc553 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:34:54 -0500
Subject: [PATCH 1294/3292] net: ipa: store memory region id in descriptor

Store the memory region ID in the memory descriptor structure.  This
is a move toward *not* indexing the array by the ID, but for now we
must still specify those index values.  Define an explicitly
undefined region ID, value 0, so uninitialized entries in the array
won't use an otherwise valid ID.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-v3.5.1.c | 15 +++++++++++++++
 drivers/net/ipa/ipa_data-v4.11.c  | 22 ++++++++++++++++++++++
 drivers/net/ipa/ipa_data-v4.2.c   | 18 ++++++++++++++++++
 drivers/net/ipa/ipa_data-v4.5.c   | 23 +++++++++++++++++++++++
 drivers/net/ipa/ipa_data-v4.9.c   | 26 +++++++++++++++++++++++++-
 drivers/net/ipa/ipa_mem.h         |  3 +++
 6 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipa/ipa_data-v3.5.1.c b/drivers/net/ipa/ipa_data-v3.5.1.c
index ead1a82f32f5..945d45b72b24 100644
--- a/drivers/net/ipa/ipa_data-v3.5.1.c
+++ b/drivers/net/ipa/ipa_data-v3.5.1.c
@@ -272,76 +272,91 @@ static const struct ipa_resource_data ipa_resource_data = {
 /* IPA-resident memory region data for an SoC having IPA v3.5.1 */
 static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_SHARED] = {
+		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_UC_INFO] = {
+		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_V4_FILTER_HASHED] = {
+		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_FILTER] = {
+		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER_HASHED] = {
+		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER] = {
+		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE] = {
+		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE] = {
+		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_MODEM_HEADER] = {
+		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
+		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x07d0,
 		.size		= 0x0200,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_PROC_CTX] = {
+		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x09d0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM] = {
+		.id		= IPA_MEM_MODEM,
 		.offset		= 0x0bd8,
 		.size		= 0x1024,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_UC_EVENT_RING] = {
+		.id		= IPA_MEM_UC_EVENT_RING,
 		.offset		= 0x1c00,
 		.size		= 0x0400,
 		.canary_count	= 1,
diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c
index e7bdb8b4400e..2ff3fcf4e21f 100644
--- a/drivers/net/ipa/ipa_data-v4.11.c
+++ b/drivers/net/ipa/ipa_data-v4.11.c
@@ -221,111 +221,133 @@ static const struct ipa_resource_data ipa_resource_data = {
 /* IPA-resident memory region data for an SoC having IPA v4.11 */
 static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_SHARED] = {
+		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_UC_INFO] = {
+		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_V4_FILTER_HASHED] = {
+		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_FILTER] = {
+		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER_HASHED] = {
+		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER] = {
+		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE] = {
+		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE] = {
+		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_MODEM_HEADER] = {
+		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0240,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_HEADER] = {
+		.id		= IPA_MEM_AP_HEADER,
 		.offset		= 0x08c8,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
+		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x0ad0,
 		.size		= 0x0200,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_PROC_CTX] = {
+		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x0cd0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_NAT_TABLE] = {
+		.id		= IPA_MEM_NAT_TABLE,
 		.offset		= 0x0ee0,
 		.size		= 0x0d00,
 		.canary_count	= 4,
 	},
 	[IPA_MEM_PDN_CONFIG] = {
+		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x1be8,
 		.size		= 0x0050,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x1c40,
 		.size		= 0x0030,
 		.canary_count	= 4,
 	},
 	[IPA_MEM_STATS_QUOTA_AP] = {
+		.id		= IPA_MEM_STATS_QUOTA_AP,
 		.offset		= 0x1c70,
 		.size		= 0x0048,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_TETHERING] = {
+		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x1cb8,
 		.size		= 0x0238,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_DROP] = {
+		.id		= IPA_MEM_STATS_DROP,
 		.offset		= 0x1ef0,
 		.size		= 0x0020,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM] = {
+		.id		= IPA_MEM_MODEM,
 		.offset		= 0x1f18,
 		.size		= 0x100c,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_END_MARKER] = {
+		.id		= IPA_MEM_END_MARKER,
 		.offset		= 0x3000,
 		.size		= 0x0000,
 		.canary_count	= 1,
diff --git a/drivers/net/ipa/ipa_data-v4.2.c b/drivers/net/ipa/ipa_data-v4.2.c
index 95f75dbc3c3b..f06eb07a7895 100644
--- a/drivers/net/ipa/ipa_data-v4.2.c
+++ b/drivers/net/ipa/ipa_data-v4.2.c
@@ -220,91 +220,109 @@ static const struct ipa_resource_data ipa_resource_data = {
 /* IPA-resident memory region data for an SoC having IPA v4.2 */
 static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_SHARED] = {
+		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_UC_INFO] = {
+		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_V4_FILTER_HASHED] = {
+		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_FILTER] = {
+		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0290,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER_HASHED] = {
+		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0310,
 		.size		= 0,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER] = {
+		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0318,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0398,
 		.size		= 0,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE] = {
+		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x03a0,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0420,
 		.size		= 0,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE] = {
+		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0428,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_MODEM_HEADER] = {
+		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x04a8,
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
+		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x05f0,
 		.size		= 0x0200,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_PROC_CTX] = {
+		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x07f0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_PDN_CONFIG] = {
+		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x09f8,
 		.size		= 0x0050,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x0a50,
 		.size		= 0x0060,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_STATS_TETHERING] = {
+		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x0ab0,
 		.size		= 0x0140,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM] = {
+		.id		= IPA_MEM_MODEM,
 		.offset		= 0x0bf0,
 		.size		= 0x140c,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_END_MARKER] = {
+		.id		= IPA_MEM_END_MARKER,
 		.offset		= 0x2000,
 		.size		= 0,
 		.canary_count	= 1,
diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c
index 5f67a3a909ee..1c8a9099639a 100644
--- a/drivers/net/ipa/ipa_data-v4.5.c
+++ b/drivers/net/ipa/ipa_data-v4.5.c
@@ -266,116 +266,139 @@ static const struct ipa_resource_data ipa_resource_data = {
 /* IPA-resident memory region data for an SoC having IPA v4.5 */
 static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_SHARED] = {
+		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_UC_INFO] = {
+		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_V4_FILTER_HASHED] = {
+		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_FILTER] = {
+		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER_HASHED] = {
+		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER] = {
+		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE] = {
+		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE] = {
+		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_MODEM_HEADER] = {
+		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0240,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_HEADER] = {
+		.id		= IPA_MEM_AP_HEADER,
 		.offset		= 0x08c8,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
+		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x0ad0,
 		.size		= 0x0b20,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_PROC_CTX] = {
+		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x15f0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_NAT_TABLE] = {
+		.id		= IPA_MEM_NAT_TABLE,
 		.offset		= 0x1800,
 		.size		= 0x0d00,
 		.canary_count	= 4,
 	},
 	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x2510,
 		.size		= 0x0030,
 		.canary_count	= 4,
 	},
 	[IPA_MEM_STATS_QUOTA_AP] = {
+		.id		= IPA_MEM_STATS_QUOTA_AP,
 		.offset		= 0x2540,
 		.size		= 0x0048,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_TETHERING] = {
+		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x2588,
 		.size		= 0x0238,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_FILTER_ROUTE] = {
+		.id		= IPA_MEM_STATS_FILTER_ROUTE,
 		.offset		= 0x27c0,
 		.size		= 0x0800,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_DROP] = {
+		.id		= IPA_MEM_STATS_DROP,
 		.offset		= 0x2fc0,
 		.size		= 0x0020,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM] = {
+		.id		= IPA_MEM_MODEM,
 		.offset		= 0x2fe8,
 		.size		= 0x0800,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_UC_EVENT_RING] = {
+		.id		= IPA_MEM_UC_EVENT_RING,
 		.offset		= 0x3800,
 		.size		= 0x1000,
 		.canary_count	= 1,
 	},
 	[IPA_MEM_PDN_CONFIG] = {
+		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x4800,
 		.size		= 0x0050,
 		.canary_count	= 0,
diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c
index e41be790f45e..f77169709eb2 100644
--- a/drivers/net/ipa/ipa_data-v4.9.c
+++ b/drivers/net/ipa/ipa_data-v4.9.c
@@ -264,115 +264,139 @@ static const struct ipa_resource_data ipa_resource_data = {
 /* IPA-resident memory region data for an SoC having IPA v4.9 */
 static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_SHARED] = {
+		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_UC_INFO] = {
+		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_V4_FILTER_HASHED] = { .offset		= 0x0288,
+	[IPA_MEM_V4_FILTER_HASHED] = {
+		.id		= IPA_MEM_V4_FILTER_HASHED,
+		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_FILTER] = {
+		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER_HASHED] = {
+		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_FILTER] = {
+		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V4_ROUTE] = {
+		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_V6_ROUTE] = {
+		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_MODEM_HEADER] = {
+		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0240,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_HEADER] = {
+		.id		= IPA_MEM_AP_HEADER,
 		.offset		= 0x08c8,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
+		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x0ad0,
 		.size		= 0x0b20,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_AP_PROC_CTX] = {
+		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x15f0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_NAT_TABLE] = {
+		.id		= IPA_MEM_NAT_TABLE,
 		.offset		= 0x1800,
 		.size		= 0x0d00,
 		.canary_count	= 4,
 	},
 	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x2510,
 		.size		= 0x0030,
 		.canary_count	= 4,
 	},
 	[IPA_MEM_STATS_QUOTA_AP] = {
+		.id		= IPA_MEM_STATS_QUOTA_AP,
 		.offset		= 0x2540,
 		.size		= 0x0048,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_TETHERING] = {
+		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x2588,
 		.size		= 0x0238,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_FILTER_ROUTE] = {
+		.id		= IPA_MEM_STATS_FILTER_ROUTE,
 		.offset		= 0x27c0,
 		.size		= 0x0800,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_STATS_DROP] = {
+		.id		= IPA_MEM_STATS_DROP,
 		.offset		= 0x2fc0,
 		.size		= 0x0020,
 		.canary_count	= 0,
 	},
 	[IPA_MEM_MODEM] = {
+		.id		= IPA_MEM_MODEM,
 		.offset		= 0x2fe8,
 		.size		= 0x0800,
 		.canary_count	= 2,
 	},
 	[IPA_MEM_UC_EVENT_RING] = {
+		.id		= IPA_MEM_UC_EVENT_RING,
 		.offset		= 0x3800,
 		.size		= 0x1000,
 		.canary_count	= 1,
 	},
 	[IPA_MEM_PDN_CONFIG] = {
+		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x4800,
 		.size		= 0x0050,
 		.canary_count	= 0,
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index 5a4f865a45af..ce692f948d59 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -43,6 +43,7 @@ struct ipa_mem_data;
 
 /* IPA-resident memory region ids */
 enum ipa_mem_id {
+	IPA_MEM_UNDEFINED = 0,		/* undefined region */
 	IPA_MEM_UC_SHARED,		/* 0 canaries */
 	IPA_MEM_UC_INFO,		/* 0 canaries */
 	IPA_MEM_V4_FILTER_HASHED,	/* 2 canaries */
@@ -76,11 +77,13 @@ enum ipa_mem_id {
 
 /**
  * struct ipa_mem - IPA local memory region description
+ * @id:			memory region identifier
  * @offset:		offset in IPA memory space to base of the region
  * @size:		size in bytes base of the region
  * @canary_count:	Number of 32-bit "canary" values that precede region
  */
 struct ipa_mem {
+	enum ipa_mem_id id;
 	u32 offset;
 	u16 size;
 	u16 canary_count;

From 0300df2d9d249c3dd8e0ad1b7dd5b7da1b8d0e37 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:34:55 -0500
Subject: [PATCH 1295/3292] net: ipa: validate memory regions unconditionally

Do memory region descriptor validation unconditionally, rather than
having it depend on IPA_VALIDATION being defined.

Pass the address of a memory region descriptor rather than a memory
ID to ipa_mem_valid().

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index e3c43cf6e441..effaa745a406 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -99,12 +99,10 @@ int ipa_mem_setup(struct ipa *ipa)
 	return 0;
 }
 
-#ifdef IPA_VALIDATE
-
-static bool ipa_mem_valid(struct ipa *ipa, enum ipa_mem_id mem_id)
+static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem *mem)
 {
-	const struct ipa_mem *mem = &ipa->mem[mem_id];
 	struct device *dev = &ipa->pdev->dev;
+	enum ipa_mem_id mem_id = mem->id;
 	u16 size_multiple;
 
 	/* Other than modem memory, sizes must be a multiple of 8 */
@@ -128,15 +126,6 @@ static bool ipa_mem_valid(struct ipa *ipa, enum ipa_mem_id mem_id)
 	return false;
 }
 
-#else /* !IPA_VALIDATE */
-
-static bool ipa_mem_valid(struct ipa *ipa, enum ipa_mem_id mem_id)
-{
-	return true;
-}
-
-#endif /*! IPA_VALIDATE */
-
 /**
  * ipa_mem_config() - Configure IPA shared memory
  * @ipa:	IPA pointer
@@ -188,7 +177,7 @@ int ipa_mem_config(struct ipa *ipa)
 		__le32 *canary;
 
 		/* Validate all regions (even undefined ones) */
-		if (!ipa_mem_valid(ipa, mem_id))
+		if (!ipa_mem_valid(ipa, mem))
 			goto err_dma_free;
 
 		/* Skip over undefined regions */

From 98334d2a3ba4c79947650710ac06434e25824a35 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:34:56 -0500
Subject: [PATCH 1296/3292] net: ipa: separate memory validation from
 initialization

Currently, memory regions are validated in the loop that initializes
them.  Instead, validate them separately.

Rename ipa_mem_valid() to be ipa_mem_valid_one().  Define a *new*
function named ipa_mem_valid() that performs validation of the array
of memory regions provided.  This function calls ipa_mem_valid_one()
for each region in turn.

Skip validation for any "empty" region descriptors, which have zero
size and are not preceded by any canary values.  Issue a warning for
such descriptors if the offset is non-zero.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 40 +++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index effaa745a406..62e1b8280d98 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -99,7 +99,7 @@ int ipa_mem_setup(struct ipa *ipa)
 	return 0;
 }
 
-static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem *mem)
+static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
 {
 	struct device *dev = &ipa->pdev->dev;
 	enum ipa_mem_id mem_id = mem->id;
@@ -126,6 +126,31 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem *mem)
 	return false;
 }
 
+/* Verify each defined memory region is valid. */
+static bool ipa_mem_valid(struct ipa *ipa)
+{
+	struct device *dev = &ipa->pdev->dev;
+	enum ipa_mem_id mem_id;
+
+	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
+		const struct ipa_mem *mem = &ipa->mem[mem_id];
+
+		/* Defined regions have non-zero size and/or canary count */
+		if (mem->size || mem->canary_count) {
+			if (ipa_mem_valid_one(ipa, mem))
+				continue;
+			return false;
+		}
+
+		/* It's harmless, but warn if an offset is provided */
+		if (mem->offset)
+			dev_warn(dev, "empty region %u has non-zero offset\n",
+				 mem_id);
+	}
+
+	return true;
+}
+
 /**
  * ipa_mem_config() - Configure IPA shared memory
  * @ipa:	IPA pointer
@@ -167,19 +192,18 @@ int ipa_mem_config(struct ipa *ipa)
 	ipa->zero_virt = virt;
 	ipa->zero_size = IPA_MEM_MAX;
 
-	/* Verify each defined memory region is valid, and if indicated
-	 * for the region, write "canary" values in the space prior to
-	 * the region's base address.
+	/* Make sure all defined memory regions are valid */
+	if (!ipa_mem_valid(ipa))
+		goto err_dma_free;
+
+	/* For each region, write "canary" values in the space prior to
+	 * the region's base address if indicated.
 	 */
 	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
 		const struct ipa_mem *mem = &ipa->mem[mem_id];
 		u16 canary_count;
 		__le32 *canary;
 
-		/* Validate all regions (even undefined ones) */
-		if (!ipa_mem_valid(ipa, mem))
-			goto err_dma_free;
-
 		/* Skip over undefined regions */
 		if (!mem->offset && !mem->size)
 			continue;

From 5e57c6c5a349ed8026e63c9ef70e4655fe171b09 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:34:57 -0500
Subject: [PATCH 1297/3292] net: ipa: separate region range check from other
 validation

The only thing done by ipa_mem_valid_one() that requires hardware
access is the check for whether all regions fit within the size of
IPA local memory specified by an IPA register.

Introduce ipa_mem_size_valid() to implement this verification and
stop doing so in ipa_mem_valid_one().  Call the new function from
ipa_mem_config() (which is also the caller of ipa_mem_valid()).

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 62e1b8280d98..f245e1a60a44 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -115,9 +115,6 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
 	else if (mem->offset < mem->canary_count * sizeof(__le32))
 		dev_err(dev, "region %u offset too small for %hu canaries\n",
 			mem_id, mem->canary_count);
-	else if (mem->offset + mem->size > ipa->mem_size)
-		dev_err(dev, "region %u ends beyond memory limit (0x%08x)\n",
-			mem_id, ipa->mem_size);
 	else if (mem_id == IPA_MEM_END_MARKER && mem->size)
 		dev_err(dev, "non-zero end marker region size\n");
 	else
@@ -151,6 +148,28 @@ static bool ipa_mem_valid(struct ipa *ipa)
 	return true;
 }
 
+/* Do all memory regions fit within the IPA local memory? */
+static bool ipa_mem_size_valid(struct ipa *ipa)
+{
+	struct device *dev = &ipa->pdev->dev;
+	u32 limit = ipa->mem_size;
+	enum ipa_mem_id mem_id;
+
+	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
+		const struct ipa_mem *mem = &ipa->mem[mem_id];
+
+		if (mem->offset + mem->size <= limit)
+			continue;
+
+		dev_err(dev, "region %u ends beyond memory limit (0x%08x)\n",
+			mem_id, limit);
+
+		return false;
+	}
+
+	return true;
+}
+
 /**
  * ipa_mem_config() - Configure IPA shared memory
  * @ipa:	IPA pointer
@@ -184,6 +203,10 @@ int ipa_mem_config(struct ipa *ipa)
 			mem_size);
 	}
 
+	/* We know our memory size; make sure regions are all in range */
+	if (!ipa_mem_size_valid(ipa))
+		return -EINVAL;
+
 	/* Prealloc DMA memory for zeroing regions */
 	virt = dma_alloc_coherent(dev, IPA_MEM_MAX, &addr, GFP_KERNEL);
 	if (!virt)

From 2f9be1e90860e8acb43dc164e25c0d0be60f6a7b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:34:58 -0500
Subject: [PATCH 1298/3292] net: ipa: validate memory regions at init time

Move the memory region validation check so it happens earlier when
initializing the driver, at init time rather than config time (i.e.,
before access to hardware is required).

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index f245e1a60a44..b2d149e7c5f0 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -215,10 +215,6 @@ int ipa_mem_config(struct ipa *ipa)
 	ipa->zero_virt = virt;
 	ipa->zero_size = IPA_MEM_MAX;
 
-	/* Make sure all defined memory regions are valid */
-	if (!ipa_mem_valid(ipa))
-		goto err_dma_free;
-
 	/* For each region, write "canary" values in the space prior to
 	 * the region's base address if indicated.
 	 */
@@ -528,6 +524,10 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	ipa->mem_count = mem_data->local_count;
 	ipa->mem = mem_data->local;
 
+	/* Make sure all defined memory regions are valid */
+	if (!ipa_mem_valid(ipa))
+		goto err_unmap;
+
 	ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size);
 	if (ret)
 		goto err_unmap;

From 1eec767746e5fe4e4376ad511558de3c77f49d82 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:34:59 -0500
Subject: [PATCH 1299/3292] net: ipa: pass memory configuration data to
 ipa_mem_valid()

Pass the memory configuration data array to ipa_mem_valid() for
validation, and use that rather than assuming it's already been
recorded in the IPA structure.  Move the memory data array size
check into ipa_mem_valid().

Call ipa_mem_valid() early in ipa_mem_init(), and only proceed with
assigning the memory array pointer and size if it is found to be
valid.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index b2d149e7c5f0..cb70f063320c 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -124,13 +124,19 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
 }
 
 /* Verify each defined memory region is valid. */
-static bool ipa_mem_valid(struct ipa *ipa)
+static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 {
 	struct device *dev = &ipa->pdev->dev;
 	enum ipa_mem_id mem_id;
 
-	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
-		const struct ipa_mem *mem = &ipa->mem[mem_id];
+	if (mem_data->local_count > IPA_MEM_COUNT) {
+		dev_err(dev, "too many memory regions (%u > %u)\n",
+			mem_data->local_count, IPA_MEM_COUNT);
+		return false;
+	}
+
+	for (mem_id = 0; mem_id < mem_data->local_count; mem_id++) {
+		const struct ipa_mem *mem = &mem_data->local[mem_id];
 
 		/* Defined regions have non-zero size and/or canary count */
 		if (mem->size || mem->canary_count) {
@@ -491,11 +497,12 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	struct resource *res;
 	int ret;
 
-	if (mem_data->local_count > IPA_MEM_COUNT) {
-		dev_err(dev, "to many memory regions (%u > %u)\n",
-			mem_data->local_count, IPA_MEM_COUNT);
+	/* Make sure the set of defined memory regions is valid */
+	if (!ipa_mem_valid(ipa, mem_data))
 		return -EINVAL;
-	}
+
+	ipa->mem_count = mem_data->local_count;
+	ipa->mem = mem_data->local;
 
 	ret = dma_set_mask_and_coherent(&ipa->pdev->dev, DMA_BIT_MASK(64));
 	if (ret) {
@@ -520,14 +527,6 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	ipa->mem_addr = res->start;
 	ipa->mem_size = resource_size(res);
 
-	/* The ipa->mem[] array is indexed by enum ipa_mem_id values */
-	ipa->mem_count = mem_data->local_count;
-	ipa->mem = mem_data->local;
-
-	/* Make sure all defined memory regions are valid */
-	if (!ipa_mem_valid(ipa))
-		goto err_unmap;
-
 	ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size);
 	if (ret)
 		goto err_unmap;

From d39ffb97079b3f97f4f9d7cc633c78a1f69d3264 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:35:00 -0500
Subject: [PATCH 1300/3292] net: ipa: introduce ipa_mem_id_optional()

Introduce a new function that indicates whether a given memory
region is required for a given version of IPA hardware.  Use it to
verify that all required regions are present during initialization.

Reorder the definitions of the memory region IDs to be based on
the version in which they're first defined.  Use "+" rather than
"and above" where defining the IPA versions in which memory IDs are
used, and indicate which regions are optional (many are not).

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 42 +++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_mem.h | 21 ++++++++++----------
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index cb70f063320c..29c626c46abf 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -99,6 +99,36 @@ int ipa_mem_setup(struct ipa *ipa)
 	return 0;
 }
 
+/* Must the given memory region be present in the configuration? */
+static bool ipa_mem_id_required(struct ipa *ipa, enum ipa_mem_id mem_id)
+{
+	switch (mem_id) {
+	case IPA_MEM_UC_SHARED:
+	case IPA_MEM_UC_INFO:
+	case IPA_MEM_V4_FILTER_HASHED:
+	case IPA_MEM_V4_FILTER:
+	case IPA_MEM_V6_FILTER_HASHED:
+	case IPA_MEM_V6_FILTER:
+	case IPA_MEM_V4_ROUTE_HASHED:
+	case IPA_MEM_V4_ROUTE:
+	case IPA_MEM_V6_ROUTE_HASHED:
+	case IPA_MEM_V6_ROUTE:
+	case IPA_MEM_MODEM_HEADER:
+	case IPA_MEM_MODEM_PROC_CTX:
+	case IPA_MEM_AP_PROC_CTX:
+	case IPA_MEM_MODEM:
+		return true;
+
+	case IPA_MEM_PDN_CONFIG:
+	case IPA_MEM_STATS_QUOTA_MODEM:
+	case IPA_MEM_STATS_TETHERING:
+		return ipa->version >= IPA_VERSION_4_0;
+
+	default:
+		return false;		/* Anything else is optional */
+	}
+}
+
 static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
 {
 	struct device *dev = &ipa->pdev->dev;
@@ -149,8 +179,20 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 		if (mem->offset)
 			dev_warn(dev, "empty region %u has non-zero offset\n",
 				 mem_id);
+
+		if (ipa_mem_id_required(ipa, mem_id)) {
+			dev_err(dev, "required memory region %u missing\n",
+				mem_id);
+			return false;
+		}
 	}
 
+	/* Now see if any required regions are not defined */
+	while (mem_id < IPA_MEM_COUNT)
+		if (ipa_mem_id_required(ipa, mem_id++))
+			dev_err(dev, "required memory region %u missing\n",
+				mem_id);
+
 	return true;
 }
 
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index ce692f948d59..effe01f7310a 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -55,22 +55,23 @@ enum ipa_mem_id {
 	IPA_MEM_V6_ROUTE_HASHED,	/* 2 canaries */
 	IPA_MEM_V6_ROUTE,		/* 2 canaries */
 	IPA_MEM_MODEM_HEADER,		/* 2 canaries */
-	IPA_MEM_AP_HEADER,		/* 0 canaries */
+	IPA_MEM_AP_HEADER,		/* 0 canaries, optional */
 	IPA_MEM_MODEM_PROC_CTX,		/* 2 canaries */
 	IPA_MEM_AP_PROC_CTX,		/* 0 canaries */
-	IPA_MEM_NAT_TABLE,		/* 4 canaries (IPA v4.5 and above) */
-	IPA_MEM_PDN_CONFIG,		/* 0/2 canaries (IPA v4.0 and above) */
-	IPA_MEM_STATS_QUOTA_MODEM,	/* 2/4 canaries (IPA v4.0 and above) */
-	IPA_MEM_STATS_QUOTA_AP,		/* 0 canaries (IPA v4.0 and above) */
-	IPA_MEM_STATS_TETHERING,	/* 0 canaries (IPA v4.0 and above) */
+	IPA_MEM_MODEM,			/* 0/2 canaries */
+	IPA_MEM_UC_EVENT_RING,		/* 1 canary, optional */
+	IPA_MEM_PDN_CONFIG,		/* 0/2 canaries (IPA v4.0+) */
+	IPA_MEM_STATS_QUOTA_MODEM,	/* 2/4 canaries (IPA v4.0+) */
+	IPA_MEM_STATS_QUOTA_AP,		/* 0 canaries, optional (IPA v4.0+) */
+	IPA_MEM_STATS_TETHERING,	/* 0 canaries (IPA v4.0+) */
+	IPA_MEM_STATS_DROP,		/* 0 canaries, optional (IPA v4.0+) */
+	/* The next 5 filter and route statistics regions are optional */
 	IPA_MEM_STATS_V4_FILTER,	/* 0 canaries (IPA v4.0-v4.2) */
 	IPA_MEM_STATS_V6_FILTER,	/* 0 canaries (IPA v4.0-v4.2) */
 	IPA_MEM_STATS_V4_ROUTE,		/* 0 canaries (IPA v4.0-v4.2) */
 	IPA_MEM_STATS_V6_ROUTE,		/* 0 canaries (IPA v4.0-v4.2) */
-	IPA_MEM_STATS_FILTER_ROUTE,	/* 0 canaries (IPA v4.5 and above) */
-	IPA_MEM_STATS_DROP,		/* 0 canaries (IPA v4.0 and above) */
-	IPA_MEM_MODEM,			/* 0/2 canaries */
-	IPA_MEM_UC_EVENT_RING,		/* 1 canary */
+	IPA_MEM_STATS_FILTER_ROUTE,	/* 0 canaries (IPA v4.5+) */
+	IPA_MEM_NAT_TABLE,		/* 4 canaries, optional (IPA v4.5+) */
 	IPA_MEM_END_MARKER,		/* 1 canary (not a real region) */
 	IPA_MEM_COUNT,			/* Number of regions (not an index) */
 };

From 75bcfde6c1131371adc2388b9226db7b2465e4c1 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:35:01 -0500
Subject: [PATCH 1301/3292] net: ipa: validate memory regions based on version

Introduce ipa_mem_id_valid(), and use it to check defined memory
regions to ensure they are valid for a given version of IPA.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 61 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 29c626c46abf..e5ca6a2ac626 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -99,6 +99,61 @@ int ipa_mem_setup(struct ipa *ipa)
 	return 0;
 }
 
+/* Is the given memory region ID is valid for the current IPA version? */
+static bool ipa_mem_id_valid(struct ipa *ipa, enum ipa_mem_id mem_id)
+{
+	enum ipa_version version = ipa->version;
+
+	switch (mem_id) {
+	case IPA_MEM_UC_SHARED:
+	case IPA_MEM_UC_INFO:
+	case IPA_MEM_V4_FILTER_HASHED:
+	case IPA_MEM_V4_FILTER:
+	case IPA_MEM_V6_FILTER_HASHED:
+	case IPA_MEM_V6_FILTER:
+	case IPA_MEM_V4_ROUTE_HASHED:
+	case IPA_MEM_V4_ROUTE:
+	case IPA_MEM_V6_ROUTE_HASHED:
+	case IPA_MEM_V6_ROUTE:
+	case IPA_MEM_MODEM_HEADER:
+	case IPA_MEM_AP_HEADER:
+	case IPA_MEM_MODEM_PROC_CTX:
+	case IPA_MEM_AP_PROC_CTX:
+	case IPA_MEM_MODEM:
+	case IPA_MEM_UC_EVENT_RING:
+	case IPA_MEM_PDN_CONFIG:
+	case IPA_MEM_STATS_QUOTA_MODEM:
+	case IPA_MEM_STATS_QUOTA_AP:
+	case IPA_MEM_END_MARKER:	/* pseudo region */
+		break;
+
+	case IPA_MEM_STATS_TETHERING:
+	case IPA_MEM_STATS_DROP:
+		if (version < IPA_VERSION_4_0)
+			return false;
+		break;
+
+	case IPA_MEM_STATS_V4_FILTER:
+	case IPA_MEM_STATS_V6_FILTER:
+	case IPA_MEM_STATS_V4_ROUTE:
+	case IPA_MEM_STATS_V6_ROUTE:
+		if (version < IPA_VERSION_4_0 || version > IPA_VERSION_4_2)
+			return false;
+		break;
+
+	case IPA_MEM_NAT_TABLE:
+	case IPA_MEM_STATS_FILTER_ROUTE:
+		if (version < IPA_VERSION_4_5)
+			return false;
+		break;
+
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 /* Must the given memory region be present in the configuration? */
 static bool ipa_mem_id_required(struct ipa *ipa, enum ipa_mem_id mem_id)
 {
@@ -135,6 +190,12 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
 	enum ipa_mem_id mem_id = mem->id;
 	u16 size_multiple;
 
+	/* Make sure the memory region is valid for this version of IPA */
+	if (!ipa_mem_id_valid(ipa, mem_id)) {
+		dev_err(dev, "region id %u not valid\n", mem_id);
+		return false;
+	}
+
 	/* Other than modem memory, sizes must be a multiple of 8 */
 	size_multiple = mem_id == IPA_MEM_MODEM ? 4 : 8;
 	if (mem->size % size_multiple)

From eadf7f9376145a2827010f1775570cfe009afc70 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:35:02 -0500
Subject: [PATCH 1302/3292] net: ipa: flag duplicate memory regions

Add a test in ipa_mem_valid() to ensure no memory region is defined
more than once, using a bitmap to record each defined memory region.
Skip over undefined regions when checking (we can have any number of
those).

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index e5ca6a2ac626..7b79aeb5f68f 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -217,6 +217,7 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
 /* Verify each defined memory region is valid. */
 static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 {
+	DECLARE_BITMAP(regions, IPA_MEM_COUNT) = { };
 	struct device *dev = &ipa->pdev->dev;
 	enum ipa_mem_id mem_id;
 
@@ -229,6 +230,14 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	for (mem_id = 0; mem_id < mem_data->local_count; mem_id++) {
 		const struct ipa_mem *mem = &mem_data->local[mem_id];
 
+		if (mem_id == IPA_MEM_UNDEFINED)
+			continue;
+
+		if (__test_and_set_bit(mem->id, regions)) {
+			dev_err(dev, "duplicate memory region %u\n", mem->id);
+			return false;
+		}
+
 		/* Defined regions have non-zero size and/or canary count */
 		if (mem->size || mem->canary_count) {
 			if (ipa_mem_valid_one(ipa, mem))

From 6857b02392ab89ffc4235de991b544ef22375e6e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 9 Jun 2021 17:35:03 -0500
Subject: [PATCH 1303/3292] net: ipa: use bitmap to check for missing regions

In ipa_mem_valid(), wait until regions have been marked in the memory
region bitmap, and check all that are not found there to ensure they
are not required.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 7b79aeb5f68f..ef9fdd3b8875 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -249,19 +249,16 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 		if (mem->offset)
 			dev_warn(dev, "empty region %u has non-zero offset\n",
 				 mem_id);
-
-		if (ipa_mem_id_required(ipa, mem_id)) {
-			dev_err(dev, "required memory region %u missing\n",
-				mem_id);
-			return false;
-		}
 	}
 
 	/* Now see if any required regions are not defined */
-	while (mem_id < IPA_MEM_COUNT)
-		if (ipa_mem_id_required(ipa, mem_id++))
+	for (mem_id = find_first_zero_bit(regions, IPA_MEM_COUNT);
+	     mem_id < IPA_MEM_COUNT;
+	     mem_id = find_next_zero_bit(regions, IPA_MEM_COUNT, mem_id + 1)) {
+		if (ipa_mem_id_required(ipa, mem_id))
 			dev_err(dev, "required memory region %u missing\n",
 				mem_id);
+	}
 
 	return true;
 }

From 67133eaa93e810f5c510cd0ec6e2e7ca76fc1340 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 9 Mar 2021 03:29:16 +0200
Subject: [PATCH 1304/3292] net/mlx5: mlx5_ifc support for header insert/remove

Add support for HCA caps 2 that contains capabilities for the new
insert/remove header actions.

Added the required definitions for supporting the new reformat type:
added packet reformat parameters, reformat anchors and definitions
to allow copy/set into the inserted EMD (Embedded MetaData) tag.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c |  6 +++
 include/linux/mlx5/device.h                  | 10 +++++
 include/linux/mlx5/mlx5_ifc.h                | 40 +++++++++++++++++---
 3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 02558ac2ace6..016d26f809a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -148,6 +148,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	if (err)
 		return err;
 
+	if (MLX5_CAP_GEN(dev, hca_cap_2)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
+		if (err)
+			return err;
+	}
+
 	if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
 		if (err)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 578c4ccae91c..0025913505ab 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1179,6 +1179,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_VDPA_EMULATION = 0x13,
 	MLX5_CAP_DEV_EVENT = 0x14,
 	MLX5_CAP_IPSEC,
+	MLX5_CAP_GENERAL_2 = 0x20,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1220,6 +1221,15 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_GEN_MAX(mdev, cap) \
 	MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
 
+#define MLX5_CAP_GEN_2(mdev, cap) \
+	MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+
+#define MLX5_CAP_GEN_2_64(mdev, cap) \
+	MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+
+#define MLX5_CAP_GEN_2_MAX(mdev, cap) \
+	MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_max[MLX5_CAP_GENERAL_2], cap)
+
 #define MLX5_CAP_ETH(mdev, cap) \
 	MLX5_GET(per_protocol_networking_offload_caps,\
 		 mdev->caps.hca_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index eb86e80e4643..057db0eaf195 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -435,7 +435,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 
 	u8         reserved_at_40[0x20];
 
-	u8         reserved_at_60[0x18];
+	u8         reserved_at_60[0x2];
+	u8         reformat_insert[0x1];
+	u8         reformat_remove[0x1];
+	u8         reserver_at_64[0x14];
 	u8         log_max_ft_num[0x8];
 
 	u8         reserved_at_80[0x10];
@@ -1312,7 +1315,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_0[0x1f];
 	u8         vhca_resource_manager[0x1];
 
-	u8         reserved_at_20[0x3];
+	u8         hca_cap_2[0x1];
+	u8         reserved_at_21[0x2];
 	u8         event_on_vhca_state_teardown_request[0x1];
 	u8         event_on_vhca_state_in_use[0x1];
 	u8         event_on_vhca_state_active[0x1];
@@ -1732,6 +1736,17 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8	   reserved_at_7c0[0x40];
 };
 
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+	u8	   reserved_at_0[0xa0];
+
+	u8	   max_reformat_insert_size[0x8];
+	u8	   max_reformat_insert_offset[0x8];
+	u8	   max_reformat_remove_size[0x8];
+	u8	   max_reformat_remove_offset[0x8];
+
+	u8	   reserved_at_c0[0x740];
+};
+
 enum mlx5_flow_destination_type {
 	MLX5_FLOW_DESTINATION_TYPE_VPORT        = 0x0,
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
@@ -3105,6 +3120,7 @@ struct mlx5_ifc_roce_addr_layout_bits {
 
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
+	struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2;
 	struct mlx5_ifc_odp_cap_bits odp_cap;
 	struct mlx5_ifc_atomic_caps_bits atomic_caps;
 	struct mlx5_ifc_roce_cap_bits roce_cap;
@@ -5785,12 +5801,14 @@ struct mlx5_ifc_query_eq_in_bits {
 };
 
 struct mlx5_ifc_packet_reformat_context_in_bits {
-	u8         reserved_at_0[0x5];
-	u8         reformat_type[0x3];
-	u8         reserved_at_8[0xe];
+	u8         reformat_type[0x8];
+	u8         reserved_at_8[0x4];
+	u8         reformat_param_0[0x4];
+	u8         reserved_at_10[0x6];
 	u8         reformat_data_size[0xa];
 
-	u8         reserved_at_20[0x10];
+	u8         reformat_param_1[0x8];
+	u8         reserved_at_28[0x8];
 	u8         reformat_data[2][0x8];
 
 	u8         more_reformat_data[][0x8];
@@ -5830,12 +5848,20 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
 	u8         reserved_at_60[0x20];
 };
 
+enum {
+	MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START = 0x1,
+	MLX5_REFORMAT_CONTEXT_ANCHOR_IP_START = 0x7,
+	MLX5_REFORMAT_CONTEXT_ANCHOR_TCP_UDP_START = 0x9,
+};
+
 enum mlx5_reformat_ctx_type {
 	MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
 	MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
 	MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
 	MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
 	MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
+	MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
+	MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
 };
 
 struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
@@ -5956,6 +5982,8 @@ enum {
 	MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM   = 0x59,
 	MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM   = 0x5B,
 	MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME    = 0x5D,
+	MLX5_ACTION_IN_FIELD_OUT_EMD_47_32     = 0x6F,
+	MLX5_ACTION_IN_FIELD_OUT_EMD_31_0      = 0x70,
 };
 
 struct mlx5_ifc_alloc_modify_header_context_out_bits {

From 28de41a4ba7b5937b2338d8d8b58f5fda3641188 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Wed, 10 Mar 2021 04:12:28 +0200
Subject: [PATCH 1305/3292] net/mlx5: DR, Split reformat state to Encap and
 Decap

Split single reformat state into two separate states for encap and decap.
This will allow adding actions to the specific domain, such as encap on RX.

Signed-off-by: Erez Shitrit <erezsh@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_action.c   | 69 ++++++++++---------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 949879cf2092..467f2eac6503 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -14,7 +14,8 @@ enum dr_action_domain {
 enum dr_action_valid_state {
 	DR_ACTION_STATE_ERR,
 	DR_ACTION_STATE_NO_ACTION,
-	DR_ACTION_STATE_REFORMAT,
+	DR_ACTION_STATE_ENCAP,
+	DR_ACTION_STATE_DECAP,
 	DR_ACTION_STATE_MODIFY_HDR,
 	DR_ACTION_STATE_MODIFY_VLAN,
 	DR_ACTION_STATE_NON_TERM,
@@ -31,17 +32,17 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
-			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
-		[DR_ACTION_STATE_REFORMAT] = {
+		[DR_ACTION_STATE_DECAP] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
-			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -67,8 +68,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
-			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -81,22 +82,22 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
-		[DR_ACTION_STATE_REFORMAT] = {
+		[DR_ACTION_STATE_ENCAP] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
-			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_HDR] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
@@ -104,15 +105,15 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -125,16 +126,16 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
-			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
-		[DR_ACTION_STATE_REFORMAT] = {
+		[DR_ACTION_STATE_DECAP] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
-			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -157,8 +158,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
-			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -173,23 +174,23 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
-		[DR_ACTION_STATE_REFORMAT] = {
+		[DR_ACTION_STATE_ENCAP] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
-			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
 		[DR_ACTION_STATE_MODIFY_HDR] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
@@ -198,8 +199,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
@@ -207,8 +208,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
-			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_REFORMAT,
-			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_REFORMAT,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},

From d7418b4efa3bb1aff70d20f02455685c832c8ccb Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Wed, 10 Mar 2021 04:38:06 +0200
Subject: [PATCH 1306/3292] net/mlx5: DR, Allow encap action for RX for
 supporting devices

Encap actions on RX flow were not supported on older devices.
However, this is no longer the case in devices that support STEv1.
This patch adds support for encap l3/l2 on RX flow for supported
devices: update actions state machine by adding the newely supported
transitions and add the required support in STEv0/1 files.
The new transitions that are supported are:
 - from decap/modify-header/pop-vlan to encap
 - from encap to termination table

Signed-off-by: Erez Shitrit <erezsh@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_action.c   | 40 +++++++++++++
 .../mellanox/mlx5/core/steering/dr_ste.h      |  1 +
 .../mellanox/mlx5/core/steering/dr_ste_v0.c   |  1 +
 .../mellanox/mlx5/core/steering/dr_ste_v1.c   | 60 ++++++++++++++-----
 .../mellanox/mlx5/core/steering/dr_types.h    |  5 ++
 5 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 467f2eac6503..1b7a0e94d432 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 Mellanox Technologies. */
 
 #include "dr_types.h"
+#include "dr_ste.h"
 
 enum dr_action_domain {
 	DR_ACTION_DOMAIN_NIC_INGRESS,
@@ -34,6 +35,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -43,15 +46,26 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
+		[DR_ACTION_STATE_ENCAP] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
+		},
 		[DR_ACTION_STATE_MODIFY_HDR] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -61,6 +75,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -70,6 +86,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -128,6 +146,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -139,12 +159,23 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+		},
+		[DR_ACTION_STATE_ENCAP] = {
+			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_HDR] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -153,6 +184,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -160,6 +193,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -455,6 +490,11 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 			break;
 		case DR_ACTION_TYP_L2_TO_TNL_L2:
 		case DR_ACTION_TYP_L2_TO_TNL_L3:
+			if (rx_rule &&
+			    !(dmn->ste_ctx->actions_caps & DR_STE_CTX_ACTION_CAP_RX_ENCAP)) {
+				mlx5dr_info(dmn, "Device doesn't support Encap on RX\n");
+				goto out_invalid_arg;
+			}
 			attr.reformat_size = action->reformat->reformat_size;
 			attr.reformat_id = action->reformat->reformat_id;
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 992b591bf0c5..12a8bbbf944b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -156,6 +156,7 @@ struct mlx5dr_ste_ctx {
 	u16  (*get_byte_mask)(u8 *hw_ste_p);
 
 	/* Actions */
+	u32 actions_caps;
 	void (*set_actions_rx)(struct mlx5dr_domain *dmn,
 			       u8 *action_type_set,
 			       u8 *hw_ste_arr,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index 0757a4e8540e..7e26a9e3afc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1893,6 +1893,7 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	.get_byte_mask			= &dr_ste_v0_get_byte_mask,
 
 	/* Actions */
+	.actions_caps			= DR_STE_CTX_ACTION_CAP_NONE,
 	.set_actions_rx			= &dr_ste_v0_set_actions_rx,
 	.set_actions_tx			= &dr_ste_v0_set_actions_tx,
 	.modify_field_arr_sz		= ARRAY_SIZE(dr_ste_v0_action_modify_field_arr),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 054c2e2b6554..a5807d190698 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -361,8 +361,8 @@ static void dr_ste_v1_set_reparse(u8 *hw_ste_p)
 	MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1);
 }
 
-static void dr_ste_v1_set_tx_encap(u8 *hw_ste_p, u8 *d_action,
-				   u32 reformat_id, int size)
+static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action,
+				u32 reformat_id, int size)
 {
 	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id,
 		 DR_STE_V1_ACTION_ID_INSERT_POINTER);
@@ -401,11 +401,11 @@ static void dr_ste_v1_set_rx_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num)
 	dr_ste_v1_set_reparse(hw_ste_p);
 }
 
-static void dr_ste_v1_set_tx_encap_l3(u8 *hw_ste_p,
-				      u8 *frst_s_action,
-				      u8 *scnd_d_action,
-				      u32 reformat_id,
-				      int size)
+static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p,
+				   u8 *frst_s_action,
+				   u8 *scnd_d_action,
+				   u32 reformat_id,
+				   int size)
 {
 	/* Remove L2 headers */
 	MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id,
@@ -519,9 +519,9 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
 			action_sz = DR_STE_ACTION_TRIPLE_SZ;
 			allow_encap = true;
 		}
-		dr_ste_v1_set_tx_encap(last_ste, action,
-				       attr->reformat_id,
-				       attr->reformat_size);
+		dr_ste_v1_set_encap(last_ste, action,
+				    attr->reformat_id,
+				    attr->reformat_size);
 		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
 		action += DR_STE_ACTION_DOUBLE_SZ;
 	} else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
@@ -532,10 +532,10 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
 		action_sz = DR_STE_ACTION_TRIPLE_SZ;
 		d_action = action + DR_STE_ACTION_SINGLE_SZ;
 
-		dr_ste_v1_set_tx_encap_l3(last_ste,
-					  action, d_action,
-					  attr->reformat_id,
-					  attr->reformat_size);
+		dr_ste_v1_set_encap_l3(last_ste,
+				       action, d_action,
+				       attr->reformat_id,
+				       attr->reformat_size);
 		action_sz -= DR_STE_ACTION_TRIPLE_SZ;
 		action += DR_STE_ACTION_TRIPLE_SZ;
 	}
@@ -627,6 +627,37 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
 		dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
 	}
 
+	if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) {
+		if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+			action_sz = DR_STE_ACTION_TRIPLE_SZ;
+		}
+		dr_ste_v1_set_encap(last_ste, action,
+				    attr->reformat_id,
+				    attr->reformat_size);
+		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+		action += DR_STE_ACTION_DOUBLE_SZ;
+		allow_modify_hdr = false;
+	} else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
+		u8 *d_action;
+
+		if (action_sz < DR_STE_ACTION_TRIPLE_SZ) {
+			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+			action_sz = DR_STE_ACTION_TRIPLE_SZ;
+		}
+
+		d_action = action + DR_STE_ACTION_SINGLE_SZ;
+
+		dr_ste_v1_set_encap_l3(last_ste,
+				       action, d_action,
+				       attr->reformat_id,
+				       attr->reformat_size);
+		action_sz -= DR_STE_ACTION_TRIPLE_SZ;
+		allow_modify_hdr = false;
+	}
+
 	dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
 	dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
 }
@@ -1865,6 +1896,7 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	.set_byte_mask			= &dr_ste_v1_set_byte_mask,
 	.get_byte_mask			= &dr_ste_v1_get_byte_mask,
 	/* Actions */
+	.actions_caps			= DR_STE_CTX_ACTION_CAP_RX_ENCAP,
 	.set_actions_rx			= &dr_ste_v1_set_actions_rx,
 	.set_actions_tx			= &dr_ste_v1_set_actions_tx,
 	.modify_field_arr_sz		= ARRAY_SIZE(dr_ste_v1_action_modify_field_arr),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 7600004d79a8..b34018d49326 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -89,6 +89,11 @@ enum {
 	DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
 };
 
+enum mlx5dr_ste_ctx_action_cap {
+	DR_STE_CTX_ACTION_CAP_NONE = 0,
+	DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 0,
+};
+
 enum {
 	DR_MODIFY_ACTION_SIZE = 8,
 };

From 3f3f05ab88722224fef5b0b78a0969f6b54f2cba Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 9 Mar 2021 03:30:44 +0200
Subject: [PATCH 1307/3292] net/mlx5: Added new parameters to reformat context

Adding new reformat context type (INSERT_HEADER) requires adding two new
parameters to reformat context - reformat_param_0 and reformat_param_1.
As defined by HW spec, these parameters have different meaning for
different reformat context type.

The first parameter (reformat_param_0) is not new to HW spec, but it
wasn't used by any of the supported reformats. The second parameter
(reformat_param_1) is new to the HW spec - it was added to allow
supporting INSERT_HEADER.

For NSERT_HEADER, reformat_param_0 indicates the header used to
reference the location of the inserted header, and reformat_param_1
indicates the offset of the inserted header from the reference point
defined by reformat_param_0.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/fs.c               |  9 ++++-
 .../ethernet/mellanox/mlx5/core/en/tc_tun.c   | 38 +++++++++++++------
 .../mellanox/mlx5/core/en/tc_tun_encap.c      | 17 ++++++---
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 29 +++++++-------
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.h  |  4 +-
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |  9 ++---
 .../mellanox/mlx5/core/steering/dr_action.c   |  2 +
 .../mellanox/mlx5/core/steering/fs_dr.c       | 17 +++++----
 .../mellanox/mlx5/core/steering/mlx5dr.h      |  2 +
 include/linux/mlx5/fs.h                       | 12 ++++--
 10 files changed, 86 insertions(+), 53 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 2fc6a60c4e77..941adf5cf3d0 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -2280,6 +2280,7 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
 	u8 ft_type, u8 dv_prt,
 	void *in, size_t len)
 {
+	struct mlx5_pkt_reformat_params reformat_params;
 	enum mlx5_flow_namespace_type namespace;
 	u8 prm_prt;
 	int ret;
@@ -2292,9 +2293,13 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
 	if (ret)
 		return ret;
 
+	memset(&reformat_params, 0, sizeof(reformat_params));
+	reformat_params.type = prm_prt;
+	reformat_params.size = len;
+	reformat_params.data = in;
 	maction->flow_action_raw.pkt_reformat =
-		mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
-					   in, namespace);
+		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
+					   namespace);
 	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
 		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
 		return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 172e0474f2e6..8f79f04eccd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -212,6 +212,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	struct mlx5_pkt_reformat_params reformat_params;
 	struct mlx5e_neigh m_neigh = {};
 	TC_TUN_ROUTE_ATTR_INIT(attr);
 	int ipv4_encap_size;
@@ -295,9 +296,12 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 		 */
 		goto release_neigh;
 	}
-	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-						     e->reformat_type,
-						     ipv4_encap_size, encap_header,
+
+	memset(&reformat_params, 0, sizeof(reformat_params));
+	reformat_params.type = e->reformat_type;
+	reformat_params.size = ipv4_encap_size;
+	reformat_params.data = encap_header;
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 						     MLX5_FLOW_NAMESPACE_FDB);
 	if (IS_ERR(e->pkt_reformat)) {
 		err = PTR_ERR(e->pkt_reformat);
@@ -324,6 +328,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	struct mlx5_pkt_reformat_params reformat_params;
 	TC_TUN_ROUTE_ATTR_INIT(attr);
 	int ipv4_encap_size;
 	char *encap_header;
@@ -396,9 +401,12 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
 		 */
 		goto release_neigh;
 	}
-	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-						     e->reformat_type,
-						     ipv4_encap_size, encap_header,
+
+	memset(&reformat_params, 0, sizeof(reformat_params));
+	reformat_params.type = e->reformat_type;
+	reformat_params.size = ipv4_encap_size;
+	reformat_params.data = encap_header;
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 						     MLX5_FLOW_NAMESPACE_FDB);
 	if (IS_ERR(e->pkt_reformat)) {
 		err = PTR_ERR(e->pkt_reformat);
@@ -471,6 +479,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	struct mlx5_pkt_reformat_params reformat_params;
 	struct mlx5e_neigh m_neigh = {};
 	TC_TUN_ROUTE_ATTR_INIT(attr);
 	struct ipv6hdr *ip6h;
@@ -553,9 +562,11 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 		goto release_neigh;
 	}
 
-	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-						     e->reformat_type,
-						     ipv6_encap_size, encap_header,
+	memset(&reformat_params, 0, sizeof(reformat_params));
+	reformat_params.type = e->reformat_type;
+	reformat_params.size = ipv6_encap_size;
+	reformat_params.data = encap_header;
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 						     MLX5_FLOW_NAMESPACE_FDB);
 	if (IS_ERR(e->pkt_reformat)) {
 		err = PTR_ERR(e->pkt_reformat);
@@ -582,6 +593,7 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
 	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+	struct mlx5_pkt_reformat_params reformat_params;
 	TC_TUN_ROUTE_ATTR_INIT(attr);
 	struct ipv6hdr *ip6h;
 	int ipv6_encap_size;
@@ -654,9 +666,11 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
 		goto release_neigh;
 	}
 
-	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-						     e->reformat_type,
-						     ipv6_encap_size, encap_header,
+	memset(&reformat_params, 0, sizeof(reformat_params));
+	reformat_params.type = e->reformat_type;
+	reformat_params.size = ipv6_encap_size;
+	reformat_params.data = encap_header;
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
 						     MLX5_FLOW_NAMESPACE_FDB);
 	if (IS_ERR(e->pkt_reformat)) {
 		err = PTR_ERR(e->pkt_reformat);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index f1fb11680d20..0dfd51d2d178 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -120,6 +120,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 			      struct list_head *flow_list)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_pkt_reformat_params reformat_params;
 	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_attr *attr;
@@ -130,9 +131,12 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
 		return;
 
+	memset(&reformat_params, 0, sizeof(reformat_params));
+	reformat_params.type = e->reformat_type;
+	reformat_params.size = e->encap_size;
+	reformat_params.data = e->encap_header;
 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-						     e->reformat_type,
-						     e->encap_size, e->encap_header,
+						     &reformat_params,
 						     MLX5_FLOW_NAMESPACE_FDB);
 	if (IS_ERR(e->pkt_reformat)) {
 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
@@ -812,6 +816,7 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+	struct mlx5_pkt_reformat_params reformat_params;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5e_decap_entry *d;
 	struct mlx5e_decap_key key;
@@ -853,10 +858,12 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
 	mutex_unlock(&esw->offloads.decap_tbl_lock);
 
+	memset(&reformat_params, 0, sizeof(reformat_params));
+	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+	reformat_params.size = sizeof(parse_attr->eth);
+	reformat_params.data = &parse_attr->eth;
 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-						     MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
-						     sizeof(parse_attr->eth),
-						     &parse_attr->eth,
+						     &reformat_params,
 						     MLX5_FLOW_NAMESPACE_FDB);
 	if (IS_ERR(d->pkt_reformat)) {
 		err = PTR_ERR(d->pkt_reformat);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index b7aae8b75760..896a6c3dbdb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -111,9 +111,7 @@ static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns,
 }
 
 static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
-					       int reformat_type,
-					       size_t size,
-					       void *reformat_data,
+					       struct mlx5_pkt_reformat_params *params,
 					       enum mlx5_flow_namespace_type namespace,
 					       struct mlx5_pkt_reformat *pkt_reformat)
 {
@@ -701,9 +699,7 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
 }
 
 static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
-					  int reformat_type,
-					  size_t size,
-					  void *reformat_data,
+					  struct mlx5_pkt_reformat_params *params,
 					  enum mlx5_flow_namespace_type namespace,
 					  struct mlx5_pkt_reformat *pkt_reformat)
 {
@@ -721,14 +717,14 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
 	else
 		max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
 
-	if (size > max_encap_size) {
+	if (params->size > max_encap_size) {
 		mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
-			       size, max_encap_size);
+			       params->size, max_encap_size);
 		return -EINVAL;
 	}
 
-	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size,
-		     GFP_KERNEL);
+	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) +
+		     params->size, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -737,15 +733,20 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
 	reformat = MLX5_ADDR_OF(packet_reformat_context_in,
 				packet_reformat_context_in,
 				reformat_data);
-	inlen = reformat - (void *)in  + size;
+	inlen = reformat - (void *)in + params->size;
 
 	MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
 	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
-		 reformat_data_size, size);
+		 reformat_data_size, params->size);
 	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
-		 reformat_type, reformat_type);
-	memcpy(reformat, reformat_data, size);
+		 reformat_type, params->type);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_param_0, params->param_0);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_param_1, params->param_1);
+	if (params->data && params->size)
+		memcpy(reformat, params->data, params->size);
 
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index c2e102ed82ad..5ecd33cdc087 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -77,9 +77,7 @@ struct mlx5_flow_cmds {
 			      bool disconnect);
 
 	int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns,
-				     int reformat_type,
-				     size_t size,
-				     void *reformat_data,
+				     struct mlx5_pkt_reformat_params *params,
 				     enum mlx5_flow_namespace_type namespace,
 				     struct mlx5_pkt_reformat *pkt_reformat);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 1b7a1cde097c..c0936b4e53a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -3165,9 +3165,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
 EXPORT_SYMBOL(mlx5_modify_header_dealloc);
 
 struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
-						     int reformat_type,
-						     size_t size,
-						     void *reformat_data,
+						     struct mlx5_pkt_reformat_params *params,
 						     enum mlx5_flow_namespace_type ns_type)
 {
 	struct mlx5_pkt_reformat *pkt_reformat;
@@ -3183,9 +3181,8 @@ struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
 		return ERR_PTR(-ENOMEM);
 
 	pkt_reformat->ns_type = ns_type;
-	pkt_reformat->reformat_type = reformat_type;
-	err = root->cmds->packet_reformat_alloc(root, reformat_type, size,
-						reformat_data, ns_type,
+	pkt_reformat->reformat_type = params->type;
+	err = root->cmds->packet_reformat_alloc(root, params, ns_type,
 						pkt_reformat);
 	if (err) {
 		kfree(pkt_reformat);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 1b7a0e94d432..13fceba11d3f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -937,6 +937,8 @@ struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn,
 struct mlx5dr_action *
 mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
 				     enum mlx5dr_action_reformat_type reformat_type,
+				     u8 reformat_param_0,
+				     u8 reformat_param_1,
 				     size_t data_sz,
 				     void *data)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index ee0e9d79aaec..d866cd609d0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -289,7 +289,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 			DR_ACTION_REFORMAT_TYP_TNL_L2_TO_L2;
 
 		tmp_action = mlx5dr_action_create_packet_reformat(domain,
-								  decap_type, 0,
+								  decap_type,
+								  0, 0, 0,
 								  NULL);
 		if (!tmp_action) {
 			err = -ENOMEM;
@@ -522,9 +523,7 @@ out_err:
 }
 
 static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
-					     int reformat_type,
-					     size_t size,
-					     void *reformat_data,
+					     struct mlx5_pkt_reformat_params *params,
 					     enum mlx5_flow_namespace_type namespace,
 					     struct mlx5_pkt_reformat *pkt_reformat)
 {
@@ -532,7 +531,7 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
 	struct mlx5dr_action *action;
 	int dr_reformat;
 
-	switch (reformat_type) {
+	switch (params->type) {
 	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 	case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
 	case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
@@ -546,14 +545,16 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
 		break;
 	default:
 		mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
-			      reformat_type);
+			      params->type);
 		return -EOPNOTSUPP;
 	}
 
 	action = mlx5dr_action_create_packet_reformat(dr_domain,
 						      dr_reformat,
-						      size,
-						      reformat_data);
+						      params->param_0,
+						      params->param_1,
+						      params->size,
+						      params->data);
 	if (!action) {
 		mlx5_core_err(ns->dev, "Failed allocating packet-reformat action\n");
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 612b0ac31db2..8d821bbe3309 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -105,6 +105,8 @@ mlx5dr_action_create_flow_counter(u32 counter_id);
 struct mlx5dr_action *
 mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
 				     enum mlx5dr_action_reformat_type reformat_type,
+				     u8 reformat_param_0,
+				     u8 reformat_param_1,
 				     size_t data_sz,
 				     void *data);
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 1f51f4c3b1af..f69f68fba946 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -254,10 +254,16 @@ struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
 				struct mlx5_modify_hdr *modify_hdr);
 
+struct mlx5_pkt_reformat_params {
+	int type;
+	u8 param_0;
+	u8 param_1;
+	size_t size;
+	void *data;
+};
+
 struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
-						     int reformat_type,
-						     size_t size,
-						     void *reformat_data,
+						     struct mlx5_pkt_reformat_params *params,
 						     enum mlx5_flow_namespace_type ns_type);
 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
 				  struct mlx5_pkt_reformat *reformat);

From 7ea9b39852fa5990fd8d0a981ca8d4457a14cdd3 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Thu, 28 Jan 2021 22:12:07 +0200
Subject: [PATCH 1308/3292] net/mlx5: DR, Added support for INSERT_HEADER
 reformat type

Add support for INSERT_HEADER packet reformat context type

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_action.c   | 76 ++++++++++++++++---
 .../mellanox/mlx5/core/steering/dr_cmd.c      |  7 +-
 .../mellanox/mlx5/core/steering/dr_ste_v0.c   |  4 +-
 .../mellanox/mlx5/core/steering/dr_ste_v1.c   | 68 ++++++++++++++---
 .../mellanox/mlx5/core/steering/dr_types.h    | 17 ++++-
 .../mellanox/mlx5/core/steering/fs_dr.c       |  3 +
 .../mellanox/mlx5/core/steering/mlx5dr.h      |  1 +
 7 files changed, 150 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 13fceba11d3f..de68c0ec2143 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -37,6 +37,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -48,6 +49,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -66,6 +68,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -77,6 +80,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -88,6 +92,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -102,6 +107,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -116,6 +122,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
@@ -125,6 +132,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -132,6 +140,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 		},
@@ -148,6 +157,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -161,6 +171,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_ENCAP] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -176,6 +187,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -186,6 +198,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
@@ -195,6 +208,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -211,6 +225,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
@@ -226,6 +241,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
@@ -236,6 +252,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
 		[DR_ACTION_STATE_NON_TERM] = {
@@ -245,6 +262,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_INSERT_HDR]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
@@ -271,6 +289,9 @@ dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type
 	case DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3:
 		*action_type = DR_ACTION_TYP_L2_TO_TNL_L3;
 		break;
+	case DR_ACTION_REFORMAT_TYP_INSERT_HDR:
+		*action_type = DR_ACTION_TYP_INSERT_HDR;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -495,8 +516,8 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 				mlx5dr_info(dmn, "Device doesn't support Encap on RX\n");
 				goto out_invalid_arg;
 			}
-			attr.reformat_size = action->reformat->reformat_size;
-			attr.reformat_id = action->reformat->reformat_id;
+			attr.reformat.size = action->reformat->size;
+			attr.reformat.id = action->reformat->id;
 			break;
 		case DR_ACTION_TYP_VPORT:
 			attr.hit_gvmi = action->vport->caps->vhca_gvmi;
@@ -522,6 +543,12 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 
 			attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr;
 			break;
+		case DR_ACTION_TYP_INSERT_HDR:
+			attr.reformat.size = action->reformat->size;
+			attr.reformat.id = action->reformat->id;
+			attr.reformat.param_0 = action->reformat->param_0;
+			attr.reformat.param_1 = action->reformat->param_1;
+			break;
 		default:
 			goto out_invalid_arg;
 		}
@@ -584,6 +611,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = {
 	[DR_ACTION_TYP_MODIFY_HDR]   = sizeof(struct mlx5dr_action_rewrite),
 	[DR_ACTION_TYP_VPORT]        = sizeof(struct mlx5dr_action_vport),
 	[DR_ACTION_TYP_PUSH_VLAN]    = sizeof(struct mlx5dr_action_push_vlan),
+	[DR_ACTION_TYP_INSERT_HDR]   = sizeof(struct mlx5dr_action_reformat),
 };
 
 static struct mlx5dr_action *
@@ -692,7 +720,7 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
 			if (reformat_action) {
 				reformat_req = true;
 				hw_dests[i].vport.reformat_id =
-					reformat_action->reformat->reformat_id;
+					reformat_action->reformat->id;
 				ref_actions[num_of_ref++] = reformat_action;
 				hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
 			}
@@ -799,11 +827,15 @@ struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value)
 static int
 dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
 				 struct mlx5dr_domain *dmn,
+				 u8 reformat_param_0,
+				 u8 reformat_param_1,
 				 size_t data_sz,
 				 void *data)
 {
-	if ((!data && data_sz) || (data && !data_sz) || reformat_type >
-		DR_ACTION_TYP_L2_TO_TNL_L3) {
+	if ((!data && data_sz) || (data && !data_sz) ||
+	    ((reformat_param_0 || reformat_param_1) &&
+	     reformat_type != DR_ACTION_TYP_INSERT_HDR) ||
+	    reformat_type > DR_ACTION_TYP_INSERT_HDR) {
 		mlx5dr_dbg(dmn, "Invalid reformat parameter!\n");
 		goto out_err;
 	}
@@ -835,6 +867,7 @@ out_err:
 
 static int
 dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
+				 u8 reformat_param_0, u8 reformat_param_1,
 				 size_t data_sz, void *data,
 				 struct mlx5dr_action *action)
 {
@@ -852,13 +885,14 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
 		else
 			rt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
 
-		ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, data_sz, data,
+		ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev, rt, 0, 0,
+						     data_sz, data,
 						     &reformat_id);
 		if (ret)
 			return ret;
 
-		action->reformat->reformat_id = reformat_id;
-		action->reformat->reformat_size = data_sz;
+		action->reformat->id = reformat_id;
+		action->reformat->size = data_sz;
 		return 0;
 	}
 	case DR_ACTION_TYP_TNL_L2_TO_L2:
@@ -900,6 +934,23 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
 		}
 		return 0;
 	}
+	case DR_ACTION_TYP_INSERT_HDR:
+	{
+		ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev,
+						     MLX5_REFORMAT_TYPE_INSERT_HDR,
+						     reformat_param_0,
+						     reformat_param_1,
+						     data_sz, data,
+						     &reformat_id);
+		if (ret)
+			return ret;
+
+		action->reformat->id = reformat_id;
+		action->reformat->size = data_sz;
+		action->reformat->param_0 = reformat_param_0;
+		action->reformat->param_1 = reformat_param_1;
+		return 0;
+	}
 	default:
 		mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type);
 		return -EINVAL;
@@ -955,7 +1006,9 @@ mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
 		goto dec_ref;
 	}
 
-	ret = dr_action_verify_reformat_params(action_type, dmn, data_sz, data);
+	ret = dr_action_verify_reformat_params(action_type, dmn,
+					       reformat_param_0, reformat_param_1,
+					       data_sz, data);
 	if (ret)
 		goto dec_ref;
 
@@ -966,6 +1019,8 @@ mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
 	action->reformat->dmn = dmn;
 
 	ret = dr_action_create_reformat_action(dmn,
+					       reformat_param_0,
+					       reformat_param_1,
 					       data_sz,
 					       data,
 					       action);
@@ -1559,8 +1614,9 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
 		break;
 	case DR_ACTION_TYP_L2_TO_TNL_L2:
 	case DR_ACTION_TYP_L2_TO_TNL_L3:
+	case DR_ACTION_TYP_INSERT_HDR:
 		mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev,
-						action->reformat->reformat_id);
+						action->reformat->id);
 		refcount_dec(&action->reformat->dmn->refcount);
 		break;
 	case DR_ACTION_TYP_MODIFY_HDR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 5970cb8fc0c0..6314f50efbd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -460,6 +460,8 @@ int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
 
 int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
 				   enum mlx5_reformat_ctx_type rt,
+				   u8 reformat_param_0,
+				   u8 reformat_param_1,
 				   size_t reformat_size,
 				   void *reformat_data,
 				   u32 *reformat_id)
@@ -486,8 +488,11 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
 	pdata = MLX5_ADDR_OF(packet_reformat_context_in, prctx, reformat_data);
 
 	MLX5_SET(packet_reformat_context_in, prctx, reformat_type, rt);
+	MLX5_SET(packet_reformat_context_in, prctx, reformat_param_0, reformat_param_0);
+	MLX5_SET(packet_reformat_context_in, prctx, reformat_param_1, reformat_param_1);
 	MLX5_SET(packet_reformat_context_in, prctx, reformat_data_size, reformat_size);
-	memcpy(pdata, reformat_data, reformat_size);
+	if (reformat_data && reformat_size)
+		memcpy(pdata, reformat_data, reformat_size);
 
 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 	if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index 7e26a9e3afc7..f1950e4968da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -437,8 +437,8 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
 						attr->gvmi);
 
 		dr_ste_v0_set_tx_encap(last_ste,
-				       attr->reformat_id,
-				       attr->reformat_size,
+				       attr->reformat.id,
+				       attr->reformat.size,
 				       action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]);
 		/* Whenever prio_tag_required enabled, we can be sure that the
 		 * previous table (ACL) already push vlan to our packet,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index a5807d190698..b4dae628e716 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -374,6 +374,26 @@ static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action,
 	dr_ste_v1_set_reparse(hw_ste_p);
 }
 
+static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
+				     u32 reformat_id,
+				     u8 anchor, u8 offset,
+				     int size)
+{
+	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action,
+		 action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER);
+	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_anchor, anchor);
+
+	/* The hardware expects here size and offset in words (2 byte) */
+	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2);
+	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_offset, offset / 2);
+
+	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id);
+	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes,
+		 DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE);
+
+	dr_ste_v1_set_reparse(hw_ste_p);
+}
+
 static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action,
 				       u32 vlan_hdr)
 {
@@ -520,8 +540,8 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
 			allow_encap = true;
 		}
 		dr_ste_v1_set_encap(last_ste, action,
-				    attr->reformat_id,
-				    attr->reformat_size);
+				    attr->reformat.id,
+				    attr->reformat.size);
 		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
 		action += DR_STE_ACTION_DOUBLE_SZ;
 	} else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
@@ -534,10 +554,23 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
 
 		dr_ste_v1_set_encap_l3(last_ste,
 				       action, d_action,
-				       attr->reformat_id,
-				       attr->reformat_size);
+				       attr->reformat.id,
+				       attr->reformat.size);
 		action_sz -= DR_STE_ACTION_TRIPLE_SZ;
 		action += DR_STE_ACTION_TRIPLE_SZ;
+	} else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
+		if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+			action_sz = DR_STE_ACTION_TRIPLE_SZ;
+		}
+		dr_ste_v1_set_insert_hdr(last_ste, action,
+					 attr->reformat.id,
+					 attr->reformat.param_0,
+					 attr->reformat.param_1,
+					 attr->reformat.size);
+		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+		action += DR_STE_ACTION_DOUBLE_SZ;
 	}
 
 	dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
@@ -616,7 +649,9 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
 	}
 
 	if (action_type_set[DR_ACTION_TYP_CTR]) {
-		/* Counter action set after decap to exclude decaped header */
+		/* Counter action set after decap and before insert_hdr
+		 * to exclude decaped / encaped header respectively.
+		 */
 		if (!allow_ctr) {
 			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
 			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
@@ -634,8 +669,8 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
 			action_sz = DR_STE_ACTION_TRIPLE_SZ;
 		}
 		dr_ste_v1_set_encap(last_ste, action,
-				    attr->reformat_id,
-				    attr->reformat_size);
+				    attr->reformat.id,
+				    attr->reformat.size);
 		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
 		action += DR_STE_ACTION_DOUBLE_SZ;
 		allow_modify_hdr = false;
@@ -652,10 +687,25 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
 
 		dr_ste_v1_set_encap_l3(last_ste,
 				       action, d_action,
-				       attr->reformat_id,
-				       attr->reformat_size);
+				       attr->reformat.id,
+				       attr->reformat.size);
 		action_sz -= DR_STE_ACTION_TRIPLE_SZ;
 		allow_modify_hdr = false;
+	} else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
+		/* Modify header, decap, and encap must use different STEs */
+		if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+			action_sz = DR_STE_ACTION_TRIPLE_SZ;
+		}
+		dr_ste_v1_set_insert_hdr(last_ste, action,
+					 attr->reformat.id,
+					 attr->reformat.param_0,
+					 attr->reformat.param_1,
+					 attr->reformat.size);
+		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+		action += DR_STE_ACTION_DOUBLE_SZ;
+		allow_modify_hdr = false;
 	}
 
 	dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index b34018d49326..60b8c04e165e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -123,6 +123,7 @@ enum mlx5dr_action_type {
 	DR_ACTION_TYP_VPORT,
 	DR_ACTION_TYP_POP_VLAN,
 	DR_ACTION_TYP_PUSH_VLAN,
+	DR_ACTION_TYP_INSERT_HDR,
 	DR_ACTION_TYP_MAX,
 };
 
@@ -266,8 +267,12 @@ struct mlx5dr_ste_actions_attr {
 	u32	ctr_id;
 	u16	gvmi;
 	u16	hit_gvmi;
-	u32	reformat_id;
-	u32	reformat_size;
+	struct {
+		u32	id;
+		u32	size;
+		u8	param_0;
+		u8	param_1;
+	} reformat;
 	struct {
 		int	count;
 		u32	headers[MLX5DR_MAX_VLANS];
@@ -908,8 +913,10 @@ struct mlx5dr_action_rewrite {
 
 struct mlx5dr_action_reformat {
 	struct mlx5dr_domain *dmn;
-	u32 reformat_id;
-	u32 reformat_size;
+	u32 id;
+	u32 size;
+	u8 param_0;
+	u8 param_1;
 };
 
 struct mlx5dr_action_dest_tbl {
@@ -1147,6 +1154,8 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
 				struct mlx5dr_cmd_query_flow_table_details *output);
 int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
 				   enum mlx5_reformat_ctx_type rt,
+				   u8 reformat_param_0,
+				   u8 reformat_param_1,
 				   size_t reformat_size,
 				   void *reformat_data,
 				   u32 *reformat_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index d866cd609d0b..00b4c753cae2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -543,6 +543,9 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
 	case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
 		dr_reformat = DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3;
 		break;
+	case MLX5_REFORMAT_TYPE_INSERT_HDR:
+		dr_reformat = DR_ACTION_REFORMAT_TYP_INSERT_HDR;
+		break;
 	default:
 		mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
 			      params->type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 8d821bbe3309..0e2b73731117 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -26,6 +26,7 @@ enum mlx5dr_action_reformat_type {
 	DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2,
 	DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
 	DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
+	DR_ACTION_REFORMAT_TYP_INSERT_HDR,
 };
 
 struct mlx5dr_match_parameters {

From ded6a877a3fcbafcbe32c69cfec13f2d86a8576b Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sun, 14 Mar 2021 03:08:28 +0200
Subject: [PATCH 1309/3292] net/mlx5: DR, Support EMD tag in modify header for
 STEv1

Add support for EMD tag in modify header set/copy actions
on device that supports STEv1.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c  | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index b4dae628e716..42668de01abc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -116,6 +116,8 @@ enum {
 	DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3	= 0x4f,
 	DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0		= 0x5e,
 	DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1		= 0x5f,
+	DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0		= 0x6f,
+	DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1		= 0x70,
 	DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE	= 0x7b,
 	DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE		= 0x7c,
 	DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2		= 0x8c,
@@ -246,6 +248,12 @@ static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field
 	[MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
 		.hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15,
 	},
+	[MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = {
+		.hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31,
+	},
+	[MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = {
+		.hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15,
+	},
 };
 
 static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type)

From ec3be8873df3bf467ead27f7cedc896cbb2bd819 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Thu, 4 Mar 2021 13:09:53 +0200
Subject: [PATCH 1310/3292] net/mlx5: Create TC-miss priority and table

In order to adhere to kernel software datapath model bridge offloads must
come after TC and NF FDBs. Following patches in this series add new FDB
priority for bridge after FDB_FT_OFFLOAD. However, since netfilter offload
is implemented with unmanaged tables, its miss path is not automatically
connected to next priority and requires the code to manually connect with
slow table. To keep bridge offloads encapsulated and not mix it with
eswitch offloads, create a new FDB_TC_MISS priority between FDB_FT_OFFLOAD
and FDB_SLOW_PATH:

          +
          |
+---------v----------+
|                    |
|   FDB_TC_OFFLOAD   |
|                    |
+---------+----------+
          |
          |
          |
+---------v----------+
|                    |
|   FDB_FT_OFFLOAD   |
|                    |
+---------+----------+
          |
          |
          |
+---------v----------+
|                    |
|    FDB_TC_MISS     |
|                    |
+---------+----------+
          |
          |
          |
+---------v----------+
|                    |
|   FDB_SLOW_PATH    |
|                    |
+---------+----------+
          |
          v

Initialize the new priority with single default empty managed table and use
the table as TC/NF miss patch instead of slow table. This approach allows
bridge offloads to be created as new FDB namespace priority between
FDB_TC_MISS and FDB_SLOW_PATH without exposing its internal tables to any
other modules since miss path of managed TC-miss table is automatically
wired to next priority.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  1 +
 .../mellanox/mlx5/core/eswitch_offloads.c     | 19 ++++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |  6 ++++++
 include/linux/mlx5/fs.h                       |  1 +
 4 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 64ccb2bc0b58..55404eabff39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -196,6 +196,7 @@ struct mlx5_eswitch_fdb {
 
 		struct offloads_fdb {
 			struct mlx5_flow_namespace *ns;
+			struct mlx5_flow_table *tc_miss_table;
 			struct mlx5_flow_table *slow_fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *send_to_vport_meta_grp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d18a28a6e9a6..7579f3402776 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1634,7 +1634,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	}
 	esw->fdb_table.offloads.slow_fdb = fdb;
 
-	err = esw_chains_create(esw, fdb);
+	/* Create empty TC-miss managed table. This allows plugging in following
+	 * priorities without directly exposing their level 0 table to
+	 * eswitch_offloads and passing it as miss_fdb to following call to
+	 * esw_chains_create().
+	 */
+	memset(&ft_attr, 0, sizeof(ft_attr));
+	ft_attr.prio = FDB_TC_MISS;
+	esw->fdb_table.offloads.tc_miss_table = mlx5_create_flow_table(root_ns, &ft_attr);
+	if (IS_ERR(esw->fdb_table.offloads.tc_miss_table)) {
+		err = PTR_ERR(esw->fdb_table.offloads.tc_miss_table);
+		esw_warn(dev, "Failed to create TC miss FDB Table err %d\n", err);
+		goto tc_miss_table_err;
+	}
+
+	err = esw_chains_create(esw, esw->fdb_table.offloads.tc_miss_table);
 	if (err) {
 		esw_warn(dev, "Failed to open fdb chains err(%d)\n", err);
 		goto fdb_chains_err;
@@ -1779,6 +1793,8 @@ send_vport_meta_err:
 send_vport_err:
 	esw_chains_destroy(esw, esw_chains(esw));
 fdb_chains_err:
+	mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
+tc_miss_table_err:
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
 slow_fdb_err:
 	/* Holds true only as long as DMFS is the default */
@@ -1806,6 +1822,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 
 	esw_chains_destroy(esw, esw_chains(esw));
 
+	mlx5_destroy_flow_table(esw->fdb_table.offloads.tc_miss_table);
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
 	/* Holds true only as long as DMFS is the default */
 	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c0936b4e53a9..fc70c4ed8469 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2780,6 +2780,12 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 	if (err)
 		goto out_err;
 
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_TC_MISS, 1);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
+		goto out_err;
+	}
+
 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
 	if (IS_ERR(maj_prio)) {
 		err = PTR_ERR(maj_prio);
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index f69f68fba946..271f2f4d6b60 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -87,6 +87,7 @@ enum {
 	FDB_BYPASS_PATH,
 	FDB_TC_OFFLOAD,
 	FDB_FT_OFFLOAD,
+	FDB_TC_MISS,
 	FDB_SLOW_PATH,
 	FDB_PER_VPORT,
 };

From 0781015288ec7c7b734f3b69bbf1816729481f13 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Wed, 21 Apr 2021 21:52:43 +0300
Subject: [PATCH 1311/3292] net/mlx5e: Refactor mlx5e_eswitch_{*}rep() helpers

Change the helper to functions to accept constant pointer to struct
net_device. This is necessary for following patches in series that pass
mlx5e_eswitch_rep() as a callback to kernel bridge infrastructure code.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 34eb1118670f..40db54412041 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -536,13 +536,13 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_change_carrier      = mlx5e_rep_change_carrier,
 };
 
-bool mlx5e_eswitch_uplink_rep(struct net_device *netdev)
+bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev)
 {
 	return netdev->netdev_ops == &mlx5e_netdev_ops &&
 	       mlx5e_is_uplink_rep(netdev_priv(netdev));
 }
 
-bool mlx5e_eswitch_vf_rep(struct net_device *netdev)
+bool mlx5e_eswitch_vf_rep(const struct net_device *netdev)
 {
 	return netdev->netdev_ops == &mlx5e_netdev_ops_rep;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 22585015c7a7..47a2dfb7792a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -231,9 +231,9 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 
 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
 
-bool mlx5e_eswitch_vf_rep(struct net_device *netdev);
-bool mlx5e_eswitch_uplink_rep(struct net_device *netdev);
-static inline bool mlx5e_eswitch_rep(struct net_device *netdev)
+bool mlx5e_eswitch_vf_rep(const struct net_device *netdev);
+bool mlx5e_eswitch_uplink_rep(const struct net_device *netdev);
+static inline bool mlx5e_eswitch_rep(const struct net_device *netdev)
 {
 	return mlx5e_eswitch_vf_rep(netdev) ||
 	       mlx5e_eswitch_uplink_rep(netdev);

From 19e9bfa044f32655f1c14e95784be93da34e103e Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Fri, 2 Apr 2021 15:57:02 +0300
Subject: [PATCH 1312/3292] net/mlx5: Bridge, add offload infrastructure

Create new files bridge.{c|h} in en/rep directory that implement bridge
interaction with representor netdevices and handle required
events/notifications, bridge.{c|h} in esw directory that implement all
necessary eswitch offloading infrastructure and works on vport/eswitch
level. Provide new kconfig MLX5_BRIDGE which is automatically selected when
both kernel bridge and mlx5 eswitch configs are enabled.

Provide basic infrastructure for bridge offloads:

- struct mlx5_esw_bridge_offloads - per-eswitch bridge offload structure
that encapsulates generic bridge-offloads data (notifier blocks, ingress
flow table/group, etc.) that is created/deleted on enable/disable eswitch
offloads.

- struct mlx5_esw_bridge - per-bridge structure that encapsulates
per-bridge data (reference counter, FDB, egress flow table/group, etc.)
that is created when first eswitch represetor is attached to new bridge and
deleted when last representor is removed from the bridge as a result of
NETDEV_CHANGEUPPER event.

The bridge tables are created with new priority FDB_BR_OFFLOAD in FDB
namespace. The new priority is between tc-miss and slow path priorities.
Priority consist of two levels: the ingress table that is global per
eswitch and matches incoming packets by src_mac/vid and redirects them to
next level (egress table) that is chosen according to ingress port bridge
membership and matches on dst_mac/vid in order to redirect packet to vport
according to the following diagram:

                +
                |
      +---------v----------+
      |                    |
      |   FDB_TC_OFFLOAD   |
      |                    |
      +---------+----------+
                |
                |
      +---------v----------+
      |                    |
      |   FDB_FT_OFFLOAD   |
      |                    |
      +---------+----------+
                |
                |
      +---------v----------+
      |                    |
      |    FDB_TC_MISS     |
      |                    |
      +---------+----------+
                |
+--------------------------------------+
|               |                      |
|        +------+                      |
|        |                             |
| +------v--------+   FDB_BR_OFFLOAD   |
| | INGRESS_TABLE |                    |
| +------+---+----+                    |
|        |   |      match              |
|        |   +---------+               |
|        |             |               |    +-------+
|        |     +-------v-------+ match |    |       |
|        |     | EGRESS_TABLE  +------------> vport |
|        |     +-------+-------+       |    |       |
|        |             |               |    +-------+
|        |    miss     |               |
|        +------+------+               |
|               |                      |
+--------------------------------------+
                |
                |
      +---------v----------+
      |                    |
      |   FDB_SLOW_PATH    |
      |                    |
      +---------+----------+
                |
                v

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/Kconfig   |  10 +
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   1 +
 .../mellanox/mlx5/core/en/rep/bridge.c        | 108 ++++++
 .../mellanox/mlx5/core/en/rep/bridge.h        |  21 ++
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |   3 +
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 354 ++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/esw/bridge.h  |  30 ++
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |   6 +
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |   6 +
 include/linux/mlx5/fs.h                       |   1 +
 10 files changed, 540 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 461a43f338e6..d62f90aedade 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -79,6 +79,16 @@ config MLX5_ESWITCH
 	        Legacy SRIOV mode (L2 mac vlan steering based).
 	        Switchdev mode (eswitch offloads).
 
+config MLX5_BRIDGE
+	bool
+	depends on MLX5_ESWITCH && BRIDGE
+	default y
+	help
+	  mlx5 ConnectX offloads support for Ethernet Bridging (BRIDGE).
+	  Enable adding representors of mlx5 uplink and VF ports to Bridge and
+	  offloading rules for traffic between such ports. Supports VLANs (trunk and
+	  access modes).
+
 config MLX5_CLS_ACT
 	bool "MLX5 TC classifier action support"
 	depends on MLX5_ESWITCH && NET_CLS_ACT
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8dbdf1aef00f..b5072a3a2585 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -56,6 +56,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
 				      esw/devlink_port.o esw/vporttbl.o
 mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o
+mlx5_core-$(CONFIG_MLX5_BRIDGE)    += esw/bridge.o en/rep/bridge.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
new file mode 100644
index 000000000000..de7a68488a9d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <net/netevent.h>
+#include <net/switchdev.h>
+#include "bridge.h"
+#include "esw/bridge.h"
+#include "en_rep.h"
+
+static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+								    struct mlx5_esw_bridge_offloads,
+								    netdev_nb);
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	struct net_device *upper;
+	struct mlx5e_priv *priv;
+	u16 vport_num;
+
+	if (!mlx5e_eswitch_rep(dev))
+		return 0;
+
+	upper = info->upper_dev;
+	if (!netif_is_bridge_master(upper))
+		return 0;
+
+	esw = br_offloads->esw;
+	priv = netdev_priv(dev);
+	if (esw != priv->mdev->priv.eswitch)
+		return 0;
+
+	rpriv = priv->ppriv;
+	vport_num = rpriv->rep->vport;
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	return info->linking ?
+		mlx5_esw_bridge_vport_link(upper->ifindex, br_offloads, vport, extack) :
+		mlx5_esw_bridge_vport_unlink(upper->ifindex, br_offloads, vport, extack);
+}
+
+static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
+						unsigned long event, void *ptr)
+{
+	int err = 0;
+
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+		break;
+
+	case NETDEV_CHANGEUPPER:
+		err = mlx5_esw_bridge_port_changeupper(nb, ptr);
+		break;
+	}
+
+	return notifier_from_errno(err);
+}
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_eswitch *esw =
+		mdev->priv.eswitch;
+	int err;
+
+	rtnl_lock();
+	br_offloads = mlx5_esw_bridge_init(esw);
+	rtnl_unlock();
+	if (IS_ERR(br_offloads)) {
+		esw_warn(mdev, "Failed to init esw bridge (err=%ld)\n", PTR_ERR(br_offloads));
+		return;
+	}
+
+	br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
+	err = register_netdevice_notifier(&br_offloads->netdev_nb);
+	if (err) {
+		esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
+			 err);
+		mlx5_esw_bridge_cleanup(esw);
+	}
+}
+
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_eswitch *esw =
+		mdev->priv.eswitch;
+
+	br_offloads = esw->br_offloads;
+	if (!br_offloads)
+		return;
+
+	unregister_netdevice_notifier(&br_offloads->netdev_nb);
+	rtnl_lock();
+	mlx5_esw_bridge_cleanup(esw);
+	rtnl_unlock();
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
new file mode 100644
index 000000000000..fbeb64242831
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_BRIDGE__
+#define __MLX5_EN_REP_BRIDGE__
+
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_MLX5_BRIDGE)
+
+void mlx5e_rep_bridge_init(struct mlx5e_priv *priv);
+void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_BRIDGE */
+
+static inline void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) {}
+static inline void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_BRIDGE */
+
+#endif /* __MLX5_EN_REP_BRIDGE__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 40db54412041..8290e0086178 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -45,6 +45,7 @@
 #include "en_tc.h"
 #include "en/rep/tc.h"
 #include "en/rep/neigh.h"
+#include "en/rep/bridge.h"
 #include "en/devlink.h"
 #include "fs_core.h"
 #include "lib/mlx5.h"
@@ -981,6 +982,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 	mlx5e_dcbnl_initialize(priv);
 	mlx5e_dcbnl_init_app(priv);
 	mlx5e_rep_neigh_init(rpriv);
+	mlx5e_rep_bridge_init(priv);
 
 	netdev->wanted_features |= NETIF_F_HW_TC;
 
@@ -1002,6 +1004,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 	netif_device_detach(priv->netdev);
 	rtnl_unlock();
 
+	mlx5e_rep_bridge_cleanup(priv);
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_dcbnl_delete_app(priv);
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
new file mode 100644
index 000000000000..b503562f97d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <net/switchdev.h>
+#include "bridge.h"
+#include "eswitch.h"
+#include "fs_core.h"
+
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE - 1)
+
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE 64000
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1)
+
+enum {
+	MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+	MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+};
+
+struct mlx5_esw_bridge {
+	int ifindex;
+	int refcnt;
+	struct list_head list;
+
+	struct mlx5_flow_table *egress_ft;
+	struct mlx5_flow_group *egress_mac_fg;
+};
+
+static struct mlx5_flow_table *
+mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *fdb;
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!ns) {
+		esw_warn(dev, "Failed to get FDB namespace\n");
+		return ERR_PTR(-ENOENT);
+	}
+
+	ft_attr.max_fte = max_fte;
+	ft_attr.level = level;
+	ft_attr.prio = FDB_BR_OFFLOAD;
+	fdb = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(fdb))
+		esw_warn(dev, "Failed to create bridge FDB Table (err=%ld)\n", PTR_ERR(fdb));
+
+	return fdb;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *fg;
+	u32 *in, *match;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return ERR_PTR(-ENOMEM);
+
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+
+	MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_mask());
+
+	MLX5_SET(create_flow_group_in, in, start_flow_index,
+		 MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM);
+	MLX5_SET(create_flow_group_in, in, end_flow_index,
+		 MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO);
+
+	fg = mlx5_create_flow_group(ingress_ft, in);
+	if (IS_ERR(fg))
+		esw_warn(esw->dev,
+			 "Failed to create bridge ingress table MAC flow group (err=%ld)\n",
+			 PTR_ERR(fg));
+
+	kvfree(in);
+	return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *fg;
+	u32 *in, *match;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return ERR_PTR(-ENOMEM);
+
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0);
+
+	MLX5_SET(create_flow_group_in, in, start_flow_index,
+		 MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM);
+	MLX5_SET(create_flow_group_in, in, end_flow_index,
+		 MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO);
+
+	fg = mlx5_create_flow_group(egress_ft, in);
+	if (IS_ERR(fg))
+		esw_warn(esw->dev,
+			 "Failed to create bridge egress table MAC flow group (err=%ld)\n",
+			 PTR_ERR(fg));
+	kvfree(in);
+	return fg;
+}
+
+static int
+mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+	struct mlx5_flow_table *ingress_ft;
+	struct mlx5_flow_group *mac_fg;
+	int err;
+
+	ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE,
+						  MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
+						  br_offloads->esw);
+	if (IS_ERR(ingress_ft))
+		return PTR_ERR(ingress_ft);
+
+	mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(br_offloads->esw, ingress_ft);
+	if (IS_ERR(mac_fg)) {
+		err = PTR_ERR(mac_fg);
+		goto err_mac_fg;
+	}
+
+	br_offloads->ingress_ft = ingress_ft;
+	br_offloads->ingress_mac_fg = mac_fg;
+	return 0;
+
+err_mac_fg:
+	mlx5_destroy_flow_table(ingress_ft);
+	return err;
+}
+
+static void
+mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+	mlx5_destroy_flow_group(br_offloads->ingress_mac_fg);
+	br_offloads->ingress_mac_fg = NULL;
+	mlx5_destroy_flow_table(br_offloads->ingress_ft);
+	br_offloads->ingress_ft = NULL;
+}
+
+static int
+mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
+				  struct mlx5_esw_bridge *bridge)
+{
+	struct mlx5_flow_table *egress_ft;
+	struct mlx5_flow_group *mac_fg;
+	int err;
+
+	egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE,
+						 MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+						 br_offloads->esw);
+	if (IS_ERR(egress_ft))
+		return PTR_ERR(egress_ft);
+
+	mac_fg = mlx5_esw_bridge_egress_mac_fg_create(br_offloads->esw, egress_ft);
+	if (IS_ERR(mac_fg)) {
+		err = PTR_ERR(mac_fg);
+		goto err_mac_fg;
+	}
+
+	bridge->egress_ft = egress_ft;
+	bridge->egress_mac_fg = mac_fg;
+	return 0;
+
+err_mac_fg:
+	mlx5_destroy_flow_table(egress_ft);
+	return err;
+}
+
+static void
+mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
+{
+	mlx5_destroy_flow_group(bridge->egress_mac_fg);
+	mlx5_destroy_flow_table(bridge->egress_ft);
+}
+
+static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
+						      struct mlx5_esw_bridge_offloads *br_offloads)
+{
+	struct mlx5_esw_bridge *bridge;
+	int err;
+
+	bridge = kvzalloc(sizeof(*bridge), GFP_KERNEL);
+	if (!bridge)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx5_esw_bridge_egress_table_init(br_offloads, bridge);
+	if (err)
+		goto err_egress_tbl;
+
+	bridge->ifindex = ifindex;
+	bridge->refcnt = 1;
+	list_add(&bridge->list, &br_offloads->bridges);
+
+	return bridge;
+
+err_egress_tbl:
+	kvfree(bridge);
+	return ERR_PTR(err);
+}
+
+static void mlx5_esw_bridge_get(struct mlx5_esw_bridge *bridge)
+{
+	bridge->refcnt++;
+}
+
+static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
+				struct mlx5_esw_bridge *bridge)
+{
+	if (--bridge->refcnt)
+		return;
+
+	mlx5_esw_bridge_egress_table_cleanup(bridge);
+	list_del(&bridge->list);
+	kvfree(bridge);
+
+	if (list_empty(&br_offloads->bridges))
+		mlx5_esw_bridge_ingress_table_cleanup(br_offloads);
+}
+
+static struct mlx5_esw_bridge *
+mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads)
+{
+	struct mlx5_esw_bridge *bridge;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry(bridge, &br_offloads->bridges, list) {
+		if (bridge->ifindex == ifindex) {
+			mlx5_esw_bridge_get(bridge);
+			return bridge;
+		}
+	}
+
+	if (!br_offloads->ingress_ft) {
+		int err = mlx5_esw_bridge_ingress_table_init(br_offloads);
+
+		if (err)
+			return ERR_PTR(err);
+	}
+
+	bridge = mlx5_esw_bridge_create(ifindex, br_offloads);
+	if (IS_ERR(bridge) && list_empty(&br_offloads->bridges))
+		mlx5_esw_bridge_ingress_table_cleanup(br_offloads);
+	return bridge;
+}
+
+static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge *bridge,
+				      struct mlx5_vport *vport)
+{
+	vport->bridge = bridge;
+	return 0;
+}
+
+static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads,
+					 struct mlx5_vport *vport)
+{
+	mlx5_esw_bridge_put(br_offloads, vport->bridge);
+	vport->bridge = NULL;
+	return 0;
+}
+
+int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
+			       struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+{
+	struct mlx5_esw_bridge *bridge;
+
+	WARN_ON(vport->bridge);
+
+	bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads);
+	if (IS_ERR(bridge)) {
+		NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex");
+		return PTR_ERR(bridge);
+	}
+
+	return mlx5_esw_bridge_vport_init(bridge, vport);
+}
+
+int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
+				 struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+{
+	if (!vport->bridge) {
+		NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge");
+		return -EINVAL;
+	}
+	if (vport->bridge->ifindex != ifindex) {
+		NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge");
+		return -EINVAL;
+	}
+
+	return mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+}
+
+static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+	struct mlx5_eswitch *esw = br_offloads->esw;
+	struct mlx5_vport *vport;
+	unsigned long i;
+
+	mlx5_esw_for_each_vport(esw, i, vport)
+		if (vport->bridge)
+			mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+
+	WARN_ONCE(!list_empty(&br_offloads->bridges),
+		  "Cleaning up bridge offloads while still having bridges attached\n");
+}
+
+struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads;
+
+	br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL);
+	if (!br_offloads)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&br_offloads->bridges);
+	br_offloads->esw = esw;
+	esw->br_offloads = br_offloads;
+
+	return br_offloads;
+}
+
+void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads;
+
+	if (!br_offloads)
+		return;
+
+	mlx5_esw_bridge_flush(br_offloads);
+
+	esw->br_offloads = NULL;
+	kvfree(br_offloads);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
new file mode 100644
index 000000000000..319b6f1db0ba
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_ESW_BRIDGE_H__
+#define __MLX5_ESW_BRIDGE_H__
+
+#include <linux/notifier.h>
+#include <linux/list.h>
+#include "eswitch.h"
+
+struct mlx5_flow_table;
+struct mlx5_flow_group;
+
+struct mlx5_esw_bridge_offloads {
+	struct mlx5_eswitch *esw;
+	struct list_head bridges;
+	struct notifier_block netdev_nb;
+
+	struct mlx5_flow_table *ingress_ft;
+	struct mlx5_flow_group *ingress_mac_fg;
+};
+
+struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw);
+void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw);
+int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
+			       struct mlx5_vport *vport, struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
+				 struct mlx5_vport *vport, struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_ESW_BRIDGE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 55404eabff39..48cac5bf606d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -150,6 +150,8 @@ enum mlx5_eswitch_vport_event {
 	MLX5_VPORT_PROMISC_CHANGE = BIT(3),
 };
 
+struct mlx5_esw_bridge;
+
 struct mlx5_vport {
 	struct mlx5_core_dev    *dev;
 	struct hlist_head       uc_list[MLX5_L2_ADDR_HASH_SIZE];
@@ -178,6 +180,7 @@ struct mlx5_vport {
 	enum mlx5_eswitch_vport_event enabled_events;
 	int index;
 	struct devlink_port *dl_port;
+	struct mlx5_esw_bridge *bridge;
 };
 
 struct mlx5_esw_indir_table;
@@ -271,6 +274,8 @@ enum {
 	MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED = BIT(1),
 };
 
+struct mlx5_esw_bridge_offloads;
+
 struct mlx5_eswitch {
 	struct mlx5_core_dev    *dev;
 	struct mlx5_nb          nb;
@@ -300,6 +305,7 @@ struct mlx5_eswitch {
 		u32             root_tsar_id;
 	} qos;
 
+	struct mlx5_esw_bridge_offloads *br_offloads;
 	struct mlx5_esw_offload offloads;
 	int                     mode;
 	u16                     manager_vport;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fc70c4ed8469..fc37ac9eab12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2786,6 +2786,12 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 		goto out_err;
 	}
 
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 2);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
+		goto out_err;
+	}
+
 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
 	if (IS_ERR(maj_prio)) {
 		err = PTR_ERR(maj_prio);
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 271f2f4d6b60..77746f7e35b8 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -88,6 +88,7 @@ enum {
 	FDB_TC_OFFLOAD,
 	FDB_FT_OFFLOAD,
 	FDB_TC_MISS,
+	FDB_BR_OFFLOAD,
 	FDB_SLOW_PATH,
 	FDB_PER_VPORT,
 };

From 7cd6a54a828558c02ee2117eeba43593de54c448 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Mon, 7 Jun 2021 17:42:49 +0300
Subject: [PATCH 1313/3292] net/mlx5: Bridge, handle FDB events

Hardware supported by mlx5 driver doesn't provide learning and requires the
driver to emulate all switch-like behavior in software. As such, all
packets by default go through miss path, appear on representor and get to
software bridge, if it is the upper device of the representor. This causes
bridge to process packet in software, learn the MAC address to FDB and send
SWITCHDEV_FDB_ADD_TO_DEVICE event to all subscribers.

In order to offload FDB entries in mlx5, register switchdev notifier
callback and implement support for both 'added_by_user' and dynamic FDB
entry SWITCHDEV_FDB_ADD_TO_DEVICE events asynchronously using new
mlx5_esw_bridge_offloads->wq ordered workqueue. In workqueue callback
offload the ingress rule (matching FDB entry MAC as packet source MAC) and
egress table rule (matching FDB entry MAC as destination MAC). For ingress
table rule also match source vport to ensure that only traffic coming from
expected bridge port is matched by offloaded rule. Save all the relevant
FDB entry data in struct mlx5_esw_bridge_fdb_entry instance and insert the
instance in new mlx5_esw_bridge->fdb_list list (for traversing all entries
by software ageing implementation in following patch) and in new
mlx5_esw_bridge->fdb_ht hash table for fast retrieval. Notify the bridge
that FDB entry has been offloaded by sending SWITCHDEV_FDB_OFFLOADED
notification.

Delete FDB entry on reception of SWITCHDEV_FDB_DEL_TO_DEVICE event.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../device_drivers/ethernet/mellanox/mlx5.rst |  15 ++
 .../mellanox/mlx5/core/en/rep/bridge.c        | 150 ++++++++++-
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 254 +++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/esw/bridge.h  |   9 +
 4 files changed, 424 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index 936a10f1942c..ea32136b30e7 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -12,6 +12,7 @@ Contents
 - `Enabling the driver and kconfig options`_
 - `Devlink info`_
 - `Devlink parameters`_
+- `Bridge offload`_
 - `mlx5 subfunction`_
 - `mlx5 function attributes`_
 - `Devlink health reporters`_
@@ -217,6 +218,20 @@ users try to enable them.
 
     $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
 
+Bridge offload
+==============
+The mlx5 driver implements support for offloading bridge rules when in switchdev
+mode. Linux bridge FDBs are automatically offloaded when mlx5 switchdev
+representor is attached to bridge.
+
+- Change device to switchdev mode::
+
+    $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
+
+- Attach mlx5 switchdev representor 'enp8s0f0' to bridge netdev 'bridge1'::
+
+    $ ip link set enp8s0f0 master bridge1
+
 mlx5 subfunction
 ================
 mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index de7a68488a9d..b34e9cb686e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -8,6 +8,13 @@
 #include "esw/bridge.h"
 #include "en_rep.h"
 
+struct mlx5_bridge_switchdev_fdb_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	bool add;
+};
+
 static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr)
 {
 	struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
@@ -65,6 +72,124 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
 	return notifier_from_errno(err);
 }
 
+static void
+mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work)
+{
+	dev_put(fdb_work->dev);
+	kfree(fdb_work->fdb_info.addr);
+	kfree(fdb_work);
+}
+
+static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work)
+{
+	struct mlx5_bridge_switchdev_fdb_work *fdb_work =
+		container_of(work, struct mlx5_bridge_switchdev_fdb_work, work);
+	struct switchdev_notifier_fdb_info *fdb_info =
+		&fdb_work->fdb_info;
+	struct net_device *dev = fdb_work->dev;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	struct mlx5e_priv *priv;
+	u16 vport_num;
+
+	rtnl_lock();
+
+	priv = netdev_priv(dev);
+	rpriv = priv->ppriv;
+	vport_num = rpriv->rep->vport;
+	esw = priv->mdev->priv.eswitch;
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		goto out;
+
+	if (fdb_work->add)
+		mlx5_esw_bridge_fdb_create(dev, esw, vport, fdb_info);
+	else
+		mlx5_esw_bridge_fdb_remove(dev, esw, vport, fdb_info);
+
+out:
+	rtnl_unlock();
+	mlx5_esw_bridge_cleanup_switchdev_fdb_work(fdb_work);
+}
+
+static struct mlx5_bridge_switchdev_fdb_work *
+mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
+					struct switchdev_notifier_fdb_info *fdb_info)
+{
+	struct mlx5_bridge_switchdev_fdb_work *work;
+	u8 *addr;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_WORK(&work->work, mlx5_esw_bridge_switchdev_fdb_event_work);
+	memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info));
+
+	addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+	if (!addr) {
+		kfree(work);
+		return ERR_PTR(-ENOMEM);
+	}
+	ether_addr_copy(addr, fdb_info->addr);
+	work->fdb_info.addr = addr;
+
+	dev_hold(dev);
+	work->dev = dev;
+	work->add = add;
+	return work;
+}
+
+static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
+					   unsigned long event, void *ptr)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+								    struct mlx5_esw_bridge_offloads,
+								    nb);
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct mlx5_bridge_switchdev_fdb_work *work;
+	struct switchdev_notifier_info *info = ptr;
+	struct net_device *upper;
+	struct mlx5e_priv *priv;
+
+	if (!mlx5e_eswitch_rep(dev))
+		return NOTIFY_DONE;
+	priv = netdev_priv(dev);
+	if (priv->mdev->priv.eswitch != br_offloads->esw)
+		return NOTIFY_DONE;
+
+	upper = netdev_master_upper_dev_get_rcu(dev);
+	if (!upper)
+		return NOTIFY_DONE;
+	if (!netif_is_bridge_master(upper))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		fdb_info = container_of(info,
+					struct switchdev_notifier_fdb_info,
+					info);
+
+		work = mlx5_esw_bridge_init_switchdev_fdb_work(dev,
+							       event == SWITCHDEV_FDB_ADD_TO_DEVICE,
+							       fdb_info);
+		if (IS_ERR(work)) {
+			WARN_ONCE(1, "Failed to init switchdev work, err=%ld",
+				  PTR_ERR(work));
+			return notifier_from_errno(PTR_ERR(work));
+		}
+
+		queue_work(br_offloads->wq, &work->work);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
 {
 	struct mlx5_esw_bridge_offloads *br_offloads;
@@ -81,13 +206,34 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
 		return;
 	}
 
+	br_offloads->wq = alloc_ordered_workqueue("mlx5_bridge_wq", 0);
+	if (!br_offloads->wq) {
+		esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n");
+		goto err_alloc_wq;
+	}
+
+	br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event;
+	err = register_switchdev_notifier(&br_offloads->nb);
+	if (err) {
+		esw_warn(mdev, "Failed to register switchdev notifier (err=%d)\n", err);
+		goto err_register_swdev;
+	}
+
 	br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
 	err = register_netdevice_notifier(&br_offloads->netdev_nb);
 	if (err) {
 		esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
 			 err);
-		mlx5_esw_bridge_cleanup(esw);
+		goto err_register_netdev;
 	}
+	return;
+
+err_register_netdev:
+	unregister_switchdev_notifier(&br_offloads->nb);
+err_register_swdev:
+	destroy_workqueue(br_offloads->wq);
+err_alloc_wq:
+	mlx5_esw_bridge_cleanup(esw);
 }
 
 void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
@@ -102,6 +248,8 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
 		return;
 
 	unregister_netdevice_notifier(&br_offloads->netdev_nb);
+	unregister_switchdev_notifier(&br_offloads->nb);
+	destroy_workqueue(br_offloads->wq);
 	rtnl_lock();
 	mlx5_esw_bridge_cleanup(esw);
 	rtnl_unlock();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index b503562f97d0..6dd47891189c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -3,6 +3,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/list.h>
+#include <linux/rhashtable.h>
 #include <net/switchdev.h>
 #include "bridge.h"
 #include "eswitch.h"
@@ -21,15 +22,53 @@ enum {
 	MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
 };
 
+struct mlx5_esw_bridge_fdb_key {
+	unsigned char addr[ETH_ALEN];
+	u16 vid;
+};
+
+struct mlx5_esw_bridge_fdb_entry {
+	struct mlx5_esw_bridge_fdb_key key;
+	struct rhash_head ht_node;
+	struct list_head list;
+	u16 vport_num;
+
+	struct mlx5_flow_handle *ingress_handle;
+	struct mlx5_flow_handle *egress_handle;
+};
+
+static const struct rhashtable_params fdb_ht_params = {
+	.key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key),
+	.key_len = sizeof(struct mlx5_esw_bridge_fdb_key),
+	.head_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, ht_node),
+	.automatic_shrinking = true,
+};
+
 struct mlx5_esw_bridge {
 	int ifindex;
 	int refcnt;
 	struct list_head list;
+	struct mlx5_esw_bridge_offloads *br_offloads;
+
+	struct list_head fdb_list;
+	struct rhashtable fdb_ht;
 
 	struct mlx5_flow_table *egress_ft;
 	struct mlx5_flow_group *egress_mac_fg;
 };
 
+static void
+mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid,
+				   unsigned long val)
+{
+	struct switchdev_notifier_fdb_info send_info;
+
+	send_info.addr = addr;
+	send_info.vid = vid;
+	send_info.offloaded = true;
+	call_switchdev_notifiers(val, dev, &send_info.info, NULL);
+}
+
 static struct mlx5_flow_table *
 mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
 {
@@ -128,6 +167,9 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
 	struct mlx5_flow_group *mac_fg;
 	int err;
 
+	if (!mlx5_eswitch_vport_match_metadata_enabled(br_offloads->esw))
+		return -EOPNOTSUPP;
+
 	ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE,
 						  MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
 						  br_offloads->esw);
@@ -194,6 +236,82 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
 	mlx5_destroy_flow_table(bridge->egress_ft);
 }
 
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid,
+				    struct mlx5_esw_bridge *bridge)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+	struct mlx5_flow_destination dest = {
+		.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+		.ft = bridge->egress_ft,
+	};
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flags = FLOW_ACT_NO_APPEND,
+	};
+	struct mlx5_flow_spec *rule_spec;
+	struct mlx5_flow_handle *handle;
+	u8 *smac_v, *smac_c;
+
+	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+	if (!rule_spec)
+		return ERR_PTR(-ENOMEM);
+
+	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2;
+
+	smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+			      outer_headers.smac_47_16);
+	ether_addr_copy(smac_v, addr);
+	smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+			      outer_headers.smac_47_16);
+	eth_broadcast_addr(smac_c);
+
+	MLX5_SET(fte_match_param, rule_spec->match_criteria,
+		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
+
+	handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+	kvfree(rule_spec);
+	return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid,
+				   struct mlx5_esw_bridge *bridge)
+{
+	struct mlx5_flow_destination dest = {
+		.type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+		.vport.num = vport_num,
+	};
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flags = FLOW_ACT_NO_APPEND,
+	};
+	struct mlx5_flow_spec *rule_spec;
+	struct mlx5_flow_handle *handle;
+	u8 *dmac_v, *dmac_c;
+
+	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+	if (!rule_spec)
+		return ERR_PTR(-ENOMEM);
+
+	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+	dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+			      outer_headers.dmac_47_16);
+	ether_addr_copy(dmac_v, addr);
+	dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+			      outer_headers.dmac_47_16);
+	eth_broadcast_addr(dmac_c);
+
+	handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1);
+
+	kvfree(rule_spec);
+	return handle;
+}
+
 static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
 						      struct mlx5_esw_bridge_offloads *br_offloads)
 {
@@ -204,16 +322,24 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
 	if (!bridge)
 		return ERR_PTR(-ENOMEM);
 
+	bridge->br_offloads = br_offloads;
 	err = mlx5_esw_bridge_egress_table_init(br_offloads, bridge);
 	if (err)
 		goto err_egress_tbl;
 
+	err = rhashtable_init(&bridge->fdb_ht, &fdb_ht_params);
+	if (err)
+		goto err_fdb_ht;
+
+	INIT_LIST_HEAD(&bridge->fdb_list);
 	bridge->ifindex = ifindex;
 	bridge->refcnt = 1;
 	list_add(&bridge->list, &br_offloads->bridges);
 
 	return bridge;
 
+err_fdb_ht:
+	mlx5_esw_bridge_egress_table_cleanup(bridge);
 err_egress_tbl:
 	kvfree(bridge);
 	return ERR_PTR(err);
@@ -232,6 +358,7 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
 
 	mlx5_esw_bridge_egress_table_cleanup(bridge);
 	list_del(&bridge->list);
+	rhashtable_destroy(&bridge->fdb_ht);
 	kvfree(bridge);
 
 	if (list_empty(&br_offloads->bridges))
@@ -265,6 +392,69 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads
 	return bridge;
 }
 
+static void
+mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
+				  struct mlx5_esw_bridge *bridge)
+{
+	rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
+	mlx5_del_flow_rules(entry->egress_handle);
+	mlx5_del_flow_rules(entry->ingress_handle);
+	list_del(&entry->list);
+	kvfree(entry);
+}
+
+static struct mlx5_esw_bridge_fdb_entry *
+mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr,
+			       u16 vid, struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge)
+{
+	struct mlx5_esw_bridge_fdb_entry *entry;
+	struct mlx5_flow_handle *handle;
+	int err;
+
+	entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	ether_addr_copy(entry->key.addr, addr);
+	entry->key.vid = vid;
+	entry->vport_num = vport_num;
+
+	handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vid, bridge);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n",
+			 vport_num, err);
+		goto err_ingress_flow_create;
+	}
+	entry->ingress_handle = handle;
+
+	handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vid, bridge);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n",
+			 vport_num, err);
+		goto err_egress_flow_create;
+	}
+	entry->egress_handle = handle;
+
+	err = rhashtable_insert_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
+	if (err) {
+		esw_warn(esw->dev, "Failed to insert FDB flow(vport=%u,err=%d)\n", vport_num, err);
+		goto err_ht_init;
+	}
+
+	list_add(&entry->list, &bridge->fdb_list);
+	return entry;
+
+err_ht_init:
+	mlx5_del_flow_rules(entry->egress_handle);
+err_egress_flow_create:
+	mlx5_del_flow_rules(entry->ingress_handle);
+err_ingress_flow_create:
+	kvfree(entry);
+	return ERR_PTR(err);
+}
+
 static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge *bridge,
 				      struct mlx5_vport *vport)
 {
@@ -275,7 +465,14 @@ static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge *bridge,
 static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads,
 					 struct mlx5_vport *vport)
 {
-	mlx5_esw_bridge_put(br_offloads, vport->bridge);
+	struct mlx5_esw_bridge *bridge = vport->bridge;
+	struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
+		if (entry->vport_num == vport->vport)
+			mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+
+	mlx5_esw_bridge_put(br_offloads, bridge);
 	vport->bridge = NULL;
 	return 0;
 }
@@ -299,11 +496,13 @@ int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_
 int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
 				 struct mlx5_vport *vport, struct netlink_ext_ack *extack)
 {
-	if (!vport->bridge) {
+	struct mlx5_esw_bridge *bridge = vport->bridge;
+
+	if (!bridge) {
 		NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge");
 		return -EINVAL;
 	}
-	if (vport->bridge->ifindex != ifindex) {
+	if (bridge->ifindex != ifindex) {
 		NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge");
 		return -EINVAL;
 	}
@@ -311,6 +510,55 @@ int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *b
 	return mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
 }
 
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport,
+				struct switchdev_notifier_fdb_info *fdb_info)
+{
+	struct mlx5_esw_bridge *bridge = vport->bridge;
+	struct mlx5_esw_bridge_fdb_entry *entry;
+	u16 vport_num = vport->vport;
+
+	if (!bridge) {
+		esw_info(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num);
+		return;
+	}
+
+	entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, fdb_info->addr, fdb_info->vid,
+					       esw, bridge);
+	if (IS_ERR(entry))
+		return;
+
+	mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+					   SWITCHDEV_FDB_OFFLOADED);
+}
+
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport,
+				struct switchdev_notifier_fdb_info *fdb_info)
+{
+	struct mlx5_esw_bridge *bridge = vport->bridge;
+	struct mlx5_esw_bridge_fdb_entry *entry;
+	struct mlx5_esw_bridge_fdb_key key;
+	u16 vport_num = vport->vport;
+
+	if (!bridge) {
+		esw_warn(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num);
+		return;
+	}
+
+	ether_addr_copy(key.addr, fdb_info->addr);
+	key.vid = fdb_info->vid;
+	entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params);
+	if (!entry) {
+		esw_warn(esw->dev,
+			 "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+			 key.addr, key.vid, vport_num);
+		return;
+	}
+
+	mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+}
+
 static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
 {
 	struct mlx5_eswitch *esw = br_offloads->esw;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index 319b6f1db0ba..cec118c0b733 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -10,11 +10,14 @@
 
 struct mlx5_flow_table;
 struct mlx5_flow_group;
+struct workqueue_struct;
 
 struct mlx5_esw_bridge_offloads {
 	struct mlx5_eswitch *esw;
 	struct list_head bridges;
 	struct notifier_block netdev_nb;
+	struct notifier_block nb;
+	struct workqueue_struct *wq;
 
 	struct mlx5_flow_table *ingress_ft;
 	struct mlx5_flow_group *ingress_mac_fg;
@@ -26,5 +29,11 @@ int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_
 			       struct mlx5_vport *vport, struct netlink_ext_ack *extack);
 int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
 				 struct mlx5_vport *vport, struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport,
+				struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
+				struct mlx5_vport *vport,
+				struct switchdev_notifier_fdb_info *fdb_info);
 
 #endif /* __MLX5_ESW_BRIDGE_H__ */

From c636a0f0f3f0c6ef715d86118273aa6d62ccc69a Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Tue, 30 Mar 2021 20:08:57 +0300
Subject: [PATCH 1314/3292] net/mlx5: Bridge, dynamic entry ageing

Dynamic FDB entries require capability to age out unused entries. Such
entries are either aged out by kernel software bridge implementation or by
hardware switch that offloaded them (and notified the kernel to mark them
as SWITCHDEV_FDB_ADD_TO_BRIDGE). Leaving ageing to kernel bridge would
result it deleting offloaded dynamic FDB entries every ageing_time period
due to packets being processed by hardware and, consecutively, 'used'
timestamp for FDB entry not being updated. However, since hardware doesn't
support ageing, software solution inside the driver is required.

In order to emulate hardware ageing in driver, extend bridge FDB ingress
flows with counter and create delayed br_offloads->update_work task on
bridge offloads workqueue. Run the task every second, update 'used'
timestamp in software bridge dynamic entry by sending
SWITCHDEV_FDB_ADD_TO_BRIDGE for the entry, if it flow hardware counter
lastuse field was changed since last update. If lastuse wasn't changed for
ageing_time period, then delete the FDB entry and notify kernel bridge by
sending SWITCHDEV_FDB_DEL_TO_BRIDGE notification.

Register blocking switchdev notifier callback and handle attribute set
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME event to allow user to dynamically
configure bridge FDB entry ageing timeout. Save the value per-bridge in
struct mlx5_esw_bridge. Silently ignore
SWITCHDEV_ATTR_ID_PORT_{PRE_}BRIDGE_FLAGS switchdev event since mlx5 bridge
implementation relies on software bridge for implementing necessary
behavior for all of these flags.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en/rep/bridge.c        |  95 ++++++++++++++++
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 104 ++++++++++++++++--
 .../ethernet/mellanox/mlx5/core/esw/bridge.h  |   7 +-
 3 files changed, 193 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index b34e9cb686e3..14645f24671f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -2,12 +2,15 @@
 /* Copyright (c) 2021 Mellanox Technologies. */
 
 #include <linux/netdevice.h>
+#include <linux/if_bridge.h>
 #include <net/netevent.h>
 #include <net/switchdev.h>
 #include "bridge.h"
 #include "esw/bridge.h"
 #include "en_rep.h"
 
+#define MLX5_ESW_BRIDGE_UPDATE_INTERVAL 1000
+
 struct mlx5_bridge_switchdev_fdb_work {
 	struct work_struct work;
 	struct switchdev_notifier_fdb_info fdb_info;
@@ -72,6 +75,63 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
 	return notifier_from_errno(err);
 }
 
+static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
+					     const struct switchdev_attr *attr,
+					     struct netlink_ext_ack *extack)
+{
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	struct mlx5e_priv *priv;
+	u16 vport_num;
+	int err = 0;
+
+	priv = netdev_priv(dev);
+	rpriv = priv->ppriv;
+	vport_num = rpriv->rep->vport;
+	esw = priv->mdev->priv.eswitch;
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		if (attr->u.brport_flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) {
+			NL_SET_ERR_MSG_MOD(extack, "Flag is not supported");
+			err = -EINVAL;
+		}
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+		err = mlx5_esw_bridge_ageing_time_set(attr->u.ageing_time, esw, vport);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int mlx5_esw_bridge_event_blocking(struct notifier_block *unused,
+					  unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err;
+
+	switch (event) {
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     mlx5e_eswitch_rep,
+						     mlx5_esw_bridge_port_obj_attr_set);
+		break;
+	default:
+		err = 0;
+	}
+
+	return notifier_from_errno(err);
+}
+
 static void
 mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work)
 {
@@ -160,6 +220,13 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
 	if (priv->mdev->priv.eswitch != br_offloads->esw)
 		return NOTIFY_DONE;
 
+	if (event == SWITCHDEV_PORT_ATTR_SET) {
+		int err = switchdev_handle_port_attr_set(dev, ptr,
+							 mlx5e_eswitch_rep,
+							 mlx5_esw_bridge_port_obj_attr_set);
+		return notifier_from_errno(err);
+	}
+
 	upper = netdev_master_upper_dev_get_rcu(dev);
 	if (!upper)
 		return NOTIFY_DONE;
@@ -190,6 +257,20 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static void mlx5_esw_bridge_update_work(struct work_struct *work)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads = container_of(work,
+								    struct mlx5_esw_bridge_offloads,
+								    update_work.work);
+
+	rtnl_lock();
+	mlx5_esw_bridge_update(br_offloads);
+	rtnl_unlock();
+
+	queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+			   msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
+}
+
 void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
 {
 	struct mlx5_esw_bridge_offloads *br_offloads;
@@ -211,6 +292,9 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
 		esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n");
 		goto err_alloc_wq;
 	}
+	INIT_DELAYED_WORK(&br_offloads->update_work, mlx5_esw_bridge_update_work);
+	queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+			   msecs_to_jiffies(MLX5_ESW_BRIDGE_UPDATE_INTERVAL));
 
 	br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event;
 	err = register_switchdev_notifier(&br_offloads->nb);
@@ -219,6 +303,13 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
 		goto err_register_swdev;
 	}
 
+	br_offloads->nb_blk.notifier_call = mlx5_esw_bridge_event_blocking;
+	err = register_switchdev_blocking_notifier(&br_offloads->nb_blk);
+	if (err) {
+		esw_warn(mdev, "Failed to register blocking switchdev notifier (err=%d)\n", err);
+		goto err_register_swdev_blk;
+	}
+
 	br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
 	err = register_netdevice_notifier(&br_offloads->netdev_nb);
 	if (err) {
@@ -229,6 +320,8 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
 	return;
 
 err_register_netdev:
+	unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
+err_register_swdev_blk:
 	unregister_switchdev_notifier(&br_offloads->nb);
 err_register_swdev:
 	destroy_workqueue(br_offloads->wq);
@@ -248,7 +341,9 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
 		return;
 
 	unregister_netdevice_notifier(&br_offloads->netdev_nb);
+	unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
 	unregister_switchdev_notifier(&br_offloads->nb);
+	cancel_delayed_work(&br_offloads->update_work);
 	destroy_workqueue(br_offloads->wq);
 	rtnl_lock();
 	mlx5_esw_bridge_cleanup(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index 6dd47891189c..557dac5e9745 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -4,6 +4,7 @@
 #include <linux/netdevice.h>
 #include <linux/list.h>
 #include <linux/rhashtable.h>
+#include <linux/if_bridge.h>
 #include <net/switchdev.h>
 #include "bridge.h"
 #include "eswitch.h"
@@ -27,13 +28,21 @@ struct mlx5_esw_bridge_fdb_key {
 	u16 vid;
 };
 
+enum {
+	MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
+};
+
 struct mlx5_esw_bridge_fdb_entry {
 	struct mlx5_esw_bridge_fdb_key key;
 	struct rhash_head ht_node;
+	struct net_device *dev;
 	struct list_head list;
 	u16 vport_num;
+	u16 flags;
 
 	struct mlx5_flow_handle *ingress_handle;
+	struct mlx5_fc *ingress_counter;
+	unsigned long lastuse;
 	struct mlx5_flow_handle *egress_handle;
 };
 
@@ -55,6 +64,7 @@ struct mlx5_esw_bridge {
 
 	struct mlx5_flow_table *egress_ft;
 	struct mlx5_flow_group *egress_mac_fg;
+	unsigned long ageing_time;
 };
 
 static void
@@ -238,17 +248,14 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
 
 static struct mlx5_flow_handle *
 mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid,
-				    struct mlx5_esw_bridge *bridge)
+				    u32 counter_id, struct mlx5_esw_bridge *bridge)
 {
 	struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
-	struct mlx5_flow_destination dest = {
-		.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
-		.ft = bridge->egress_ft,
-	};
 	struct mlx5_flow_act flow_act = {
-		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT,
 		.flags = FLOW_ACT_NO_APPEND,
 	};
+	struct mlx5_flow_destination dests[2] = {};
 	struct mlx5_flow_spec *rule_spec;
 	struct mlx5_flow_handle *handle;
 	u8 *smac_v, *smac_c;
@@ -271,7 +278,13 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u1
 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
 		 mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
 
-	handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1);
+	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dests[0].ft = bridge->egress_ft;
+	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+	dests[1].counter_id = counter_id;
+
+	handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, dests,
+				     ARRAY_SIZE(dests));
 
 	kvfree(rule_spec);
 	return handle;
@@ -334,6 +347,7 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
 	INIT_LIST_HEAD(&bridge->fdb_list);
 	bridge->ifindex = ifindex;
 	bridge->refcnt = 1;
+	bridge->ageing_time = BR_DEFAULT_AGEING_TIME;
 	list_add(&bridge->list, &br_offloads->bridges);
 
 	return bridge;
@@ -399,27 +413,44 @@ mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
 	rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
 	mlx5_del_flow_rules(entry->egress_handle);
 	mlx5_del_flow_rules(entry->ingress_handle);
+	mlx5_fc_destroy(bridge->br_offloads->esw->dev, entry->ingress_counter);
 	list_del(&entry->list);
 	kvfree(entry);
 }
 
 static struct mlx5_esw_bridge_fdb_entry *
 mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr,
-			       u16 vid, struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge)
+			       u16 vid, bool added_by_user, struct mlx5_eswitch *esw,
+			       struct mlx5_esw_bridge *bridge)
 {
 	struct mlx5_esw_bridge_fdb_entry *entry;
 	struct mlx5_flow_handle *handle;
+	struct mlx5_fc *counter;
+	struct mlx5e_priv *priv;
 	int err;
 
+	priv = netdev_priv(dev);
 	entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return ERR_PTR(-ENOMEM);
 
 	ether_addr_copy(entry->key.addr, addr);
 	entry->key.vid = vid;
+	entry->dev = dev;
 	entry->vport_num = vport_num;
+	entry->lastuse = jiffies;
+	if (added_by_user)
+		entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER;
 
-	handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vid, bridge);
+	counter = mlx5_fc_create(priv->mdev, true);
+	if (IS_ERR(counter)) {
+		err = PTR_ERR(counter);
+		goto err_ingress_fc_create;
+	}
+	entry->ingress_counter = counter;
+
+	handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vid, mlx5_fc_id(counter),
+						     bridge);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
 		esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n",
@@ -451,10 +482,22 @@ err_ht_init:
 err_egress_flow_create:
 	mlx5_del_flow_rules(entry->ingress_handle);
 err_ingress_flow_create:
+	mlx5_fc_destroy(priv->mdev, entry->ingress_counter);
+err_ingress_fc_create:
 	kvfree(entry);
 	return ERR_PTR(err);
 }
 
+int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw,
+				    struct mlx5_vport *vport)
+{
+	if (!vport->bridge)
+		return -EINVAL;
+
+	vport->bridge->ageing_time = ageing_time;
+	return 0;
+}
+
 static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge *bridge,
 				      struct mlx5_vport *vport)
 {
@@ -524,12 +567,17 @@ void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw
 	}
 
 	entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, fdb_info->addr, fdb_info->vid,
-					       esw, bridge);
+					       fdb_info->added_by_user, esw, bridge);
 	if (IS_ERR(entry))
 		return;
 
-	mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
-					   SWITCHDEV_FDB_OFFLOADED);
+	if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+		mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+						   SWITCHDEV_FDB_OFFLOADED);
+	else
+		/* Take over dynamic entries to prevent kernel bridge from aging them out. */
+		mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+						   SWITCHDEV_FDB_ADD_TO_BRIDGE);
 }
 
 void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
@@ -556,9 +604,41 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw
 		return;
 	}
 
+	if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
+		mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
+						   SWITCHDEV_FDB_DEL_TO_BRIDGE);
 	mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
 }
 
+void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
+{
+	struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+	struct mlx5_esw_bridge *bridge;
+
+	list_for_each_entry(bridge, &br_offloads->bridges, list) {
+		list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
+			unsigned long lastuse =
+				(unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter);
+
+			if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
+				continue;
+
+			if (time_after(lastuse, entry->lastuse)) {
+				entry->lastuse = lastuse;
+				/* refresh existing bridge entry */
+				mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+								   entry->key.vid,
+								   SWITCHDEV_FDB_ADD_TO_BRIDGE);
+			} else if (time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) {
+				mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+								   entry->key.vid,
+								   SWITCHDEV_FDB_DEL_TO_BRIDGE);
+				mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+			}
+		}
+	}
+}
+
 static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
 {
 	struct mlx5_eswitch *esw = br_offloads->esw;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index cec118c0b733..07726ae55b2b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -6,18 +6,20 @@
 
 #include <linux/notifier.h>
 #include <linux/list.h>
+#include <linux/workqueue.h>
 #include "eswitch.h"
 
 struct mlx5_flow_table;
 struct mlx5_flow_group;
-struct workqueue_struct;
 
 struct mlx5_esw_bridge_offloads {
 	struct mlx5_eswitch *esw;
 	struct list_head bridges;
 	struct notifier_block netdev_nb;
+	struct notifier_block nb_blk;
 	struct notifier_block nb;
 	struct workqueue_struct *wq;
+	struct delayed_work update_work;
 
 	struct mlx5_flow_table *ingress_ft;
 	struct mlx5_flow_group *ingress_mac_fg;
@@ -35,5 +37,8 @@ void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw
 void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
 				struct mlx5_vport *vport,
 				struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw,
+				    struct mlx5_vport *vport);
 
 #endif /* __MLX5_ESW_BRIDGE_H__ */

From d75b9e804858c8eee5549b821fd48e780d3bb871 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Tue, 30 Mar 2021 18:46:31 +0300
Subject: [PATCH 1315/3292] net/mlx5: Bridge, implement infrastructure for
 vlans

Establish all the necessary infrastructure for implementing vlan matching
and vlan push/pop in following patches:

- Add new per-vport struct mlx5_esw_bridge_port that is used to store
metadata for all port vlans. Initialize and cleanup the instance of the
structure when port representor is linked/unliked to bridge. Use xarray to
allow quick vport metadata lookup by vport number.

- Add new per-port-vlan struct mlx5_esw_bridge_vlan that is used to store
vlan-specific data (vid, flags). Handle SWITCHDEV_PORT_OBJ_{ADD|DEL}
switchdev blocking event for SWITCHDEV_OBJ_ID_PORT_VLAN object by
creating/deleting the vlan structure and saving it in per-vport xarray for
quick lookup.

- Implement support for SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING object
attribute that is used to toggle vlan filtering. Remove all FDB entries
from hardware when vlan filtering state is changed.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en/rep/bridge.c        |  73 ++++++
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 211 +++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/esw/bridge.h  |   5 +
 3 files changed, 286 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index 14645f24671f..7f5efc1b4392 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -75,6 +75,66 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
 	return notifier_from_errno(err);
 }
 
+static int mlx5_esw_bridge_port_obj_add(struct net_device *dev,
+					const struct switchdev_obj *obj,
+					struct netlink_ext_ack *extack)
+{
+	const struct switchdev_obj_port_vlan *vlan;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	struct mlx5e_priv *priv;
+	u16 vport_num;
+	int err = 0;
+
+	priv = netdev_priv(dev);
+	rpriv = priv->ppriv;
+	vport_num = rpriv->rep->vport;
+	esw = priv->mdev->priv.eswitch;
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+		err = mlx5_esw_bridge_port_vlan_add(vlan->vid, vlan->flags, esw, vport, extack);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return err;
+}
+
+static int mlx5_esw_bridge_port_obj_del(struct net_device *dev,
+					const struct switchdev_obj *obj)
+{
+	const struct switchdev_obj_port_vlan *vlan;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+	struct mlx5_vport *vport;
+	struct mlx5e_priv *priv;
+	u16 vport_num;
+
+	priv = netdev_priv(dev);
+	rpriv = priv->ppriv;
+	vport_num = rpriv->rep->vport;
+	esw = priv->mdev->priv.eswitch;
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+		mlx5_esw_bridge_port_vlan_del(vlan->vid, esw, vport);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
 static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
 					     const struct switchdev_attr *attr,
 					     struct netlink_ext_ack *extack)
@@ -106,6 +166,9 @@ static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
 	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
 		err = mlx5_esw_bridge_ageing_time_set(attr->u.ageing_time, esw, vport);
 		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		err = mlx5_esw_bridge_vlan_filtering_set(attr->u.vlan_filtering, esw, vport);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 	}
@@ -120,6 +183,16 @@ static int mlx5_esw_bridge_event_blocking(struct notifier_block *unused,
 	int err;
 
 	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = switchdev_handle_port_obj_add(dev, ptr,
+						    mlx5e_eswitch_rep,
+						    mlx5_esw_bridge_port_obj_add);
+		break;
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = switchdev_handle_port_obj_del(dev, ptr,
+						    mlx5e_eswitch_rep,
+						    mlx5_esw_bridge_port_obj_del);
+		break;
 	case SWITCHDEV_PORT_ATTR_SET:
 		err = switchdev_handle_port_attr_set(dev, ptr,
 						     mlx5e_eswitch_rep,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index 557dac5e9745..eec5897c6b79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -4,6 +4,7 @@
 #include <linux/netdevice.h>
 #include <linux/list.h>
 #include <linux/rhashtable.h>
+#include <linux/xarray.h>
 #include <linux/if_bridge.h>
 #include <net/switchdev.h>
 #include "bridge.h"
@@ -53,6 +54,20 @@ static const struct rhashtable_params fdb_ht_params = {
 	.automatic_shrinking = true,
 };
 
+struct mlx5_esw_bridge_vlan {
+	u16 vid;
+	u16 flags;
+};
+
+struct mlx5_esw_bridge_port {
+	u16 vport_num;
+	struct xarray vlans;
+};
+
+enum {
+	MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
+};
+
 struct mlx5_esw_bridge {
 	int ifindex;
 	int refcnt;
@@ -61,10 +76,12 @@ struct mlx5_esw_bridge {
 
 	struct list_head fdb_list;
 	struct rhashtable fdb_ht;
+	struct xarray vports;
 
 	struct mlx5_flow_table *egress_ft;
 	struct mlx5_flow_group *egress_mac_fg;
 	unsigned long ageing_time;
+	u32 flags;
 };
 
 static void
@@ -345,6 +362,7 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
 		goto err_fdb_ht;
 
 	INIT_LIST_HEAD(&bridge->fdb_list);
+	xa_init(&bridge->vports);
 	bridge->ifindex = ifindex;
 	bridge->refcnt = 1;
 	bridge->ageing_time = BR_DEFAULT_AGEING_TIME;
@@ -371,6 +389,7 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
 		return;
 
 	mlx5_esw_bridge_egress_table_cleanup(bridge);
+	WARN_ON(!xa_empty(&bridge->vports));
 	list_del(&bridge->list);
 	rhashtable_destroy(&bridge->fdb_ht);
 	kvfree(bridge);
@@ -406,6 +425,24 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads
 	return bridge;
 }
 
+static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port,
+				       struct mlx5_esw_bridge *bridge)
+{
+	return xa_insert(&bridge->vports, port->vport_num, port, GFP_KERNEL);
+}
+
+static struct mlx5_esw_bridge_port *
+mlx5_esw_bridge_port_lookup(u16 vport_num, struct mlx5_esw_bridge *bridge)
+{
+	return xa_load(&bridge->vports, vport_num);
+}
+
+static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
+				       struct mlx5_esw_bridge *bridge)
+{
+	xa_erase(&bridge->vports, port->vport_num);
+}
+
 static void
 mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
 				  struct mlx5_esw_bridge *bridge)
@@ -418,6 +455,68 @@ mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
 	kvfree(entry);
 }
 
+static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge)
+{
+	struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
+		if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
+			mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+							   entry->key.vid,
+							   SWITCHDEV_FDB_DEL_TO_BRIDGE);
+		mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+	}
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port)
+{
+	return xa_load(&port->vlans, vid);
+}
+
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port)
+{
+	struct mlx5_esw_bridge_vlan *vlan;
+	int err;
+
+	vlan = kvzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return ERR_PTR(-ENOMEM);
+
+	vlan->vid = vid;
+	vlan->flags = flags;
+	err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL);
+	if (err) {
+		kvfree(vlan);
+		return ERR_PTR(err);
+	}
+
+	return vlan;
+}
+
+static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port,
+				       struct mlx5_esw_bridge_vlan *vlan)
+{
+	xa_erase(&port->vlans, vlan->vid);
+}
+
+static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
+					 struct mlx5_esw_bridge_vlan *vlan)
+{
+	mlx5_esw_bridge_vlan_erase(port, vlan);
+	kvfree(vlan);
+}
+
+static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port)
+{
+	struct mlx5_esw_bridge_vlan *vlan;
+	unsigned long index;
+
+	xa_for_each(&port->vlans, index, vlan)
+		mlx5_esw_bridge_vlan_cleanup(port, vlan);
+}
+
 static struct mlx5_esw_bridge_fdb_entry *
 mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr,
 			       u16 vid, bool added_by_user, struct mlx5_eswitch *esw,
@@ -498,11 +597,60 @@ int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswit
 	return 0;
 }
 
-static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge *bridge,
+int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
+				       struct mlx5_vport *vport)
+{
+	struct mlx5_esw_bridge *bridge;
+	bool filtering;
+
+	if (!vport->bridge)
+		return -EINVAL;
+
+	bridge = vport->bridge;
+	filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+	if (filtering == enable)
+		return 0;
+
+	mlx5_esw_bridge_fdb_flush(bridge);
+	if (enable)
+		bridge->flags |= MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+	else
+		bridge->flags &= ~MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
+
+	return 0;
+}
+
+static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge_offloads *br_offloads,
+				      struct mlx5_esw_bridge *bridge,
 				      struct mlx5_vport *vport)
 {
+	struct mlx5_eswitch *esw = br_offloads->esw;
+	struct mlx5_esw_bridge_port *port;
+	int err;
+
+	port = kvzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port) {
+		err = -ENOMEM;
+		goto err_port_alloc;
+	}
+
+	port->vport_num = vport->vport;
+	xa_init(&port->vlans);
+	err = mlx5_esw_bridge_port_insert(port, bridge);
+	if (err) {
+		esw_warn(esw->dev, "Failed to insert port metadata (vport=%u,err=%d)\n",
+			 vport->vport, err);
+		goto err_port_insert;
+	}
+
 	vport->bridge = bridge;
 	return 0;
+
+err_port_insert:
+	kvfree(port);
+err_port_alloc:
+	mlx5_esw_bridge_put(br_offloads, bridge);
+	return err;
 }
 
 static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads,
@@ -510,11 +658,21 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off
 {
 	struct mlx5_esw_bridge *bridge = vport->bridge;
 	struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+	struct mlx5_esw_bridge_port *port;
 
 	list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
 		if (entry->vport_num == vport->vport)
 			mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
 
+	port = mlx5_esw_bridge_port_lookup(vport->vport, bridge);
+	if (!port) {
+		WARN(1, "Vport %u metadata not found on bridge", vport->vport);
+		return -EINVAL;
+	}
+
+	mlx5_esw_bridge_port_vlans_flush(port);
+	mlx5_esw_bridge_port_erase(port, bridge);
+	kvfree(port);
 	mlx5_esw_bridge_put(br_offloads, bridge);
 	vport->bridge = NULL;
 	return 0;
@@ -524,6 +682,7 @@ int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_
 			       struct mlx5_vport *vport, struct netlink_ext_ack *extack)
 {
 	struct mlx5_esw_bridge *bridge;
+	int err;
 
 	WARN_ON(vport->bridge);
 
@@ -533,13 +692,17 @@ int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_
 		return PTR_ERR(bridge);
 	}
 
-	return mlx5_esw_bridge_vport_init(bridge, vport);
+	err = mlx5_esw_bridge_vport_init(br_offloads, bridge, vport);
+	if (err)
+		NL_SET_ERR_MSG_MOD(extack, "Error initializing port");
+	return err;
 }
 
 int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
 				 struct mlx5_vport *vport, struct netlink_ext_ack *extack)
 {
 	struct mlx5_esw_bridge *bridge = vport->bridge;
+	int err;
 
 	if (!bridge) {
 		NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge");
@@ -550,7 +713,49 @@ int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *b
 		return -EINVAL;
 	}
 
-	return mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+	err = mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+	if (err)
+		NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed");
+	return err;
+}
+
+int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
+				  struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+{
+	struct mlx5_esw_bridge_port *port;
+	struct mlx5_esw_bridge_vlan *vlan;
+
+	port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge);
+	if (!port)
+		return -EINVAL;
+
+	vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+	if (vlan) {
+		vlan->flags = flags;
+		return 0;
+	}
+
+	vlan = mlx5_esw_bridge_vlan_create(vid, flags, port);
+	if (IS_ERR(vlan)) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry");
+		return PTR_ERR(vlan);
+	}
+	return 0;
+}
+
+void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+	struct mlx5_esw_bridge_port *port;
+	struct mlx5_esw_bridge_vlan *vlan;
+
+	port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge);
+	if (!port)
+		return;
+
+	vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+	if (!vlan)
+		return;
+	mlx5_esw_bridge_vlan_cleanup(port, vlan);
 }
 
 void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index 07726ae55b2b..276ed0392607 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -40,5 +40,10 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw
 void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads);
 int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw,
 				    struct mlx5_vport *vport);
+int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
+				       struct mlx5_vport *vport);
+int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
+				  struct mlx5_vport *vport, struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 
 #endif /* __MLX5_ESW_BRIDGE_H__ */

From ffc89ee5e5e88aa5924034c28d5e5aae75229e0f Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Fri, 12 Mar 2021 13:37:46 +0200
Subject: [PATCH 1316/3292] net/mlx5: Bridge, match FDB entry vlan tag

Add support for FDB vlan-tagged entries. Extend ingress and egress flow
tables with flow groups to match packet vlan tag. Modify the flow creation
code to include vlan tag, if vlan is configured on port and vlan
configuration is supported for offload.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../device_drivers/ethernet/mellanox/mlx5.rst |   9 +
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 181 +++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/esw/bridge.h  |   1 +
 3 files changed, 181 insertions(+), 10 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index ea32136b30e7..a0c91fe5574d 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -232,6 +232,15 @@ representor is attached to bridge.
 
     $ ip link set enp8s0f0 master bridge1
 
+VLANs
+-----
+Following bridge VLAN functions are supported by mlx5:
+
+- VLAN filtering (including multiple VLANs per port)::
+
+    $ ip link set bridge1 type bridge vlan_filtering 1
+    $ bridge vlan add dev enp8s0f0 vid 2-3
+
 mlx5 subfunction
 ================
 mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index eec5897c6b79..e1467dbe80dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -12,11 +12,17 @@
 #include "fs_core.h"
 
 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 2 - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+	(MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE - 1)
 
 #define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE 64000
-#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE / 2 - 1)
+#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \
+	(MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
 #define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1)
 
 enum {
@@ -79,6 +85,7 @@ struct mlx5_esw_bridge {
 	struct xarray vports;
 
 	struct mlx5_flow_table *egress_ft;
+	struct mlx5_flow_group *egress_vlan_fg;
 	struct mlx5_flow_group *egress_mac_fg;
 	unsigned long ageing_time;
 	u32 flags;
@@ -120,6 +127,44 @@ mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
 	return fdb;
 }
 
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *fg;
+	u32 *in, *match;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return ERR_PTR(-ENOMEM);
+
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+	MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_mask());
+
+	MLX5_SET(create_flow_group_in, in, start_flow_index,
+		 MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM);
+	MLX5_SET(create_flow_group_in, in, end_flow_index,
+		 MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO);
+
+	fg = mlx5_create_flow_group(ingress_ft, in);
+	kvfree(in);
+	if (IS_ERR(fg))
+		esw_warn(esw->dev,
+			 "Failed to create VLAN flow group for bridge ingress table (err=%ld)\n",
+			 PTR_ERR(fg));
+
+	return fg;
+}
+
 static struct mlx5_flow_group *
 mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
 {
@@ -149,13 +194,46 @@ mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow
 	fg = mlx5_create_flow_group(ingress_ft, in);
 	if (IS_ERR(fg))
 		esw_warn(esw->dev,
-			 "Failed to create bridge ingress table MAC flow group (err=%ld)\n",
+			 "Failed to create MAC flow group for bridge ingress table (err=%ld)\n",
 			 PTR_ERR(fg));
 
 	kvfree(in);
 	return fg;
 }
 
+static struct mlx5_flow_group *
+mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *fg;
+	u32 *in, *match;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return ERR_PTR(-ENOMEM);
+
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid);
+
+	MLX5_SET(create_flow_group_in, in, start_flow_index,
+		 MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM);
+	MLX5_SET(create_flow_group_in, in, end_flow_index,
+		 MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO);
+
+	fg = mlx5_create_flow_group(egress_ft, in);
+	if (IS_ERR(fg))
+		esw_warn(esw->dev,
+			 "Failed to create VLAN flow group for bridge egress table (err=%ld)\n",
+			 PTR_ERR(fg));
+	kvfree(in);
+	return fg;
+}
+
 static struct mlx5_flow_group *
 mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft)
 {
@@ -190,8 +268,8 @@ mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_
 static int
 mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
 {
+	struct mlx5_flow_group *mac_fg, *vlan_fg;
 	struct mlx5_flow_table *ingress_ft;
-	struct mlx5_flow_group *mac_fg;
 	int err;
 
 	if (!mlx5_eswitch_vport_match_metadata_enabled(br_offloads->esw))
@@ -203,6 +281,12 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
 	if (IS_ERR(ingress_ft))
 		return PTR_ERR(ingress_ft);
 
+	vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(br_offloads->esw, ingress_ft);
+	if (IS_ERR(vlan_fg)) {
+		err = PTR_ERR(vlan_fg);
+		goto err_vlan_fg;
+	}
+
 	mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(br_offloads->esw, ingress_ft);
 	if (IS_ERR(mac_fg)) {
 		err = PTR_ERR(mac_fg);
@@ -210,10 +294,13 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
 	}
 
 	br_offloads->ingress_ft = ingress_ft;
+	br_offloads->ingress_vlan_fg = vlan_fg;
 	br_offloads->ingress_mac_fg = mac_fg;
 	return 0;
 
 err_mac_fg:
+	mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
 	mlx5_destroy_flow_table(ingress_ft);
 	return err;
 }
@@ -223,6 +310,8 @@ mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloa
 {
 	mlx5_destroy_flow_group(br_offloads->ingress_mac_fg);
 	br_offloads->ingress_mac_fg = NULL;
+	mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg);
+	br_offloads->ingress_vlan_fg = NULL;
 	mlx5_destroy_flow_table(br_offloads->ingress_ft);
 	br_offloads->ingress_ft = NULL;
 }
@@ -231,8 +320,8 @@ static int
 mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
 				  struct mlx5_esw_bridge *bridge)
 {
+	struct mlx5_flow_group *mac_fg, *vlan_fg;
 	struct mlx5_flow_table *egress_ft;
-	struct mlx5_flow_group *mac_fg;
 	int err;
 
 	egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE,
@@ -241,6 +330,12 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
 	if (IS_ERR(egress_ft))
 		return PTR_ERR(egress_ft);
 
+	vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(br_offloads->esw, egress_ft);
+	if (IS_ERR(vlan_fg)) {
+		err = PTR_ERR(vlan_fg);
+		goto err_vlan_fg;
+	}
+
 	mac_fg = mlx5_esw_bridge_egress_mac_fg_create(br_offloads->esw, egress_ft);
 	if (IS_ERR(mac_fg)) {
 		err = PTR_ERR(mac_fg);
@@ -248,10 +343,13 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads,
 	}
 
 	bridge->egress_ft = egress_ft;
+	bridge->egress_vlan_fg = vlan_fg;
 	bridge->egress_mac_fg = mac_fg;
 	return 0;
 
 err_mac_fg:
+	mlx5_destroy_flow_group(vlan_fg);
+err_vlan_fg:
 	mlx5_destroy_flow_table(egress_ft);
 	return err;
 }
@@ -260,12 +358,14 @@ static void
 mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
 {
 	mlx5_destroy_flow_group(bridge->egress_mac_fg);
+	mlx5_destroy_flow_group(bridge->egress_vlan_fg);
 	mlx5_destroy_flow_table(bridge->egress_ft);
 }
 
 static struct mlx5_flow_handle *
-mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid,
-				    u32 counter_id, struct mlx5_esw_bridge *bridge)
+mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
+				    struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+				    struct mlx5_esw_bridge *bridge)
 {
 	struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
 	struct mlx5_flow_act flow_act = {
@@ -295,6 +395,17 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u1
 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
 		 mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
 
+	if (vlan) {
+		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+				 outer_headers.cvlan_tag);
+		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+				 outer_headers.cvlan_tag);
+		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+				 outer_headers.first_vid);
+		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+			 vlan->vid);
+	}
+
 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dests[0].ft = bridge->egress_ft;
 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
@@ -308,7 +419,8 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u1
 }
 
 static struct mlx5_flow_handle *
-mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid,
+mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr,
+				   struct mlx5_esw_bridge_vlan *vlan,
 				   struct mlx5_esw_bridge *bridge)
 {
 	struct mlx5_flow_destination dest = {
@@ -336,6 +448,17 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, u16
 			      outer_headers.dmac_47_16);
 	eth_broadcast_addr(dmac_c);
 
+	if (vlan) {
+		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+				 outer_headers.cvlan_tag);
+		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+				 outer_headers.cvlan_tag);
+		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+				 outer_headers.first_vid);
+		MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid,
+			 vlan->vid);
+	}
+
 	handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1);
 
 	kvfree(rule_spec);
@@ -517,17 +640,55 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port)
 		mlx5_esw_bridge_vlan_cleanup(port, vlan);
 }
 
+static struct mlx5_esw_bridge_vlan *
+mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge *bridge,
+				 struct mlx5_eswitch *esw)
+{
+	struct mlx5_esw_bridge_port *port;
+	struct mlx5_esw_bridge_vlan *vlan;
+
+	port = mlx5_esw_bridge_port_lookup(vport_num, bridge);
+	if (!port) {
+		/* FDB is added asynchronously on wq while port might have been deleted
+		 * concurrently. Report on 'info' logging level and skip the FDB offload.
+		 */
+		esw_info(esw->dev, "Failed to lookup bridge port (vport=%u)\n", vport_num);
+		return ERR_PTR(-EINVAL);
+	}
+
+	vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
+	if (!vlan) {
+		/* FDB is added asynchronously on wq while vlan might have been deleted
+		 * concurrently. Report on 'info' logging level and skip the FDB offload.
+		 */
+		esw_info(esw->dev, "Failed to lookup bridge port vlan metadata (vport=%u)\n",
+			 vport_num);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return vlan;
+}
+
 static struct mlx5_esw_bridge_fdb_entry *
 mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr,
 			       u16 vid, bool added_by_user, struct mlx5_eswitch *esw,
 			       struct mlx5_esw_bridge *bridge)
 {
+	struct mlx5_esw_bridge_vlan *vlan = NULL;
 	struct mlx5_esw_bridge_fdb_entry *entry;
 	struct mlx5_flow_handle *handle;
 	struct mlx5_fc *counter;
 	struct mlx5e_priv *priv;
 	int err;
 
+	if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) {
+		vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw);
+		if (IS_ERR(vlan))
+			return ERR_CAST(vlan);
+		if (vlan->flags & (BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED))
+			return ERR_PTR(-EOPNOTSUPP); /* can't offload vlan push/pop */
+	}
+
 	priv = netdev_priv(dev);
 	entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
@@ -548,7 +709,7 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
 	}
 	entry->ingress_counter = counter;
 
-	handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vid, mlx5_fc_id(counter),
+	handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter),
 						     bridge);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
@@ -558,7 +719,7 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
 	}
 	entry->ingress_handle = handle;
 
-	handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vid, bridge);
+	handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vlan, bridge);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
 		esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index 276ed0392607..bedbda57cdb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -22,6 +22,7 @@ struct mlx5_esw_bridge_offloads {
 	struct delayed_work update_work;
 
 	struct mlx5_flow_table *ingress_ft;
+	struct mlx5_flow_group *ingress_vlan_fg;
 	struct mlx5_flow_group *ingress_mac_fg;
 };
 

From 36e55079e54955a70b2c340eedd6125f794a911d Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Fri, 2 Apr 2021 15:09:06 +0300
Subject: [PATCH 1317/3292] net/mlx5: Bridge, support pvid and untagged vlan
 configurations

Implement support for pushing vlan header into untagged packet on ingress
of port that has pvid configured and support for popping vlan on egress of
port that has the matching vlan configured as untagged. To support such
configurations packet reformat contexts of {INSERT|REMOVE}_HEADER types are
created per such vlan and saved to struct mlx5_esw_bridge_vlan which allows
all FDB entries on particular vlan to share single packet reformat
instance. When initializing FDB entries with pvid or untagged vlan type set
its mlx5_flow_act->pkt_reformat action accordingly.

Flush all flows when removing vlan from port. This is necessary because
even though software bridge removes all FDB entries before removing their
vlan, mlx5 bridge implementation deletes their corresponding flow entries
from hardware in asynchronous workqueue task, which will cause firmware
error if vlan packet reformat context is deleted before all flows that
point to it.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../device_drivers/ethernet/mellanox/mlx5.rst |   8 +
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 177 ++++++++++++++++--
 2 files changed, 168 insertions(+), 17 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index a0c91fe5574d..058882dca17b 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -241,6 +241,14 @@ Following bridge VLAN functions are supported by mlx5:
     $ ip link set bridge1 type bridge vlan_filtering 1
     $ bridge vlan add dev enp8s0f0 vid 2-3
 
+- VLAN push on bridge ingress::
+
+    $ bridge vlan add dev enp8s0f0 vid 3 pvid
+
+- VLAN pop on bridge egress::
+
+    $ bridge vlan add dev enp8s0f0 vid 3 untagged
+
 mlx5 subfunction
 ================
 mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index e1467dbe80dc..442a62ff7b43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -6,6 +6,8 @@
 #include <linux/rhashtable.h>
 #include <linux/xarray.h>
 #include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
 #include <net/switchdev.h>
 #include "bridge.h"
 #include "eswitch.h"
@@ -44,6 +46,7 @@ struct mlx5_esw_bridge_fdb_entry {
 	struct rhash_head ht_node;
 	struct net_device *dev;
 	struct list_head list;
+	struct list_head vlan_list;
 	u16 vport_num;
 	u16 flags;
 
@@ -63,6 +66,9 @@ static const struct rhashtable_params fdb_ht_params = {
 struct mlx5_esw_bridge_vlan {
 	u16 vid;
 	u16 flags;
+	struct list_head fdb_list;
+	struct mlx5_pkt_reformat *pkt_reformat_push;
+	struct mlx5_pkt_reformat *pkt_reformat_pop;
 };
 
 struct mlx5_esw_bridge_port {
@@ -117,6 +123,7 @@ mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
 		return ERR_PTR(-ENOENT);
 	}
 
+	ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 	ft_attr.max_fte = max_fte;
 	ft_attr.level = level;
 	ft_attr.prio = FDB_BR_OFFLOAD;
@@ -395,7 +402,10 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
 	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
 		 mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
 
-	if (vlan) {
+	if (vlan && vlan->pkt_reformat_push) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+		flow_act.pkt_reformat = vlan->pkt_reformat_push;
+	} else if (vlan) {
 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
 				 outer_headers.cvlan_tag);
 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
@@ -449,6 +459,11 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr,
 	eth_broadcast_addr(dmac_c);
 
 	if (vlan) {
+		if (vlan->pkt_reformat_pop) {
+			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			flow_act.pkt_reformat = vlan->pkt_reformat_pop;
+		}
+
 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
 				 outer_headers.cvlan_tag);
 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
@@ -597,8 +612,90 @@ mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port)
 	return xa_load(&port->vlans, vid);
 }
 
+static int
+mlx5_esw_bridge_vlan_push_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+	struct {
+		__be16	h_vlan_proto;
+		__be16	h_vlan_TCI;
+	} vlan_hdr = { htons(ETH_P_8021Q), htons(vlan->vid) };
+	struct mlx5_pkt_reformat_params reformat_params = {};
+	struct mlx5_pkt_reformat *pkt_reformat;
+
+	if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_insert)) ||
+	    MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_size) < sizeof(vlan_hdr) ||
+	    MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_offset) <
+	    offsetof(struct vlan_ethhdr, h_vlan_proto)) {
+		esw_warn(esw->dev, "Packet reformat INSERT_HEADER is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	reformat_params.type = MLX5_REFORMAT_TYPE_INSERT_HDR;
+	reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START;
+	reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto);
+	reformat_params.size = sizeof(vlan_hdr);
+	reformat_params.data = &vlan_hdr;
+	pkt_reformat = mlx5_packet_reformat_alloc(esw->dev,
+						  &reformat_params,
+						  MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(pkt_reformat)) {
+		esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%ld)\n",
+			 PTR_ERR(pkt_reformat));
+		return PTR_ERR(pkt_reformat);
+	}
+
+	vlan->pkt_reformat_push = pkt_reformat;
+	return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_push_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+	mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_push);
+	vlan->pkt_reformat_push = NULL;
+}
+
+static int
+mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+	struct mlx5_pkt_reformat_params reformat_params = {};
+	struct mlx5_pkt_reformat *pkt_reformat;
+
+	if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) ||
+	    MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) < sizeof(struct vlan_hdr) ||
+	    MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) <
+	    offsetof(struct vlan_ethhdr, h_vlan_proto)) {
+		esw_warn(esw->dev, "Packet reformat REMOVE_HEADER is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR;
+	reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START;
+	reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto);
+	reformat_params.size = sizeof(struct vlan_hdr);
+	pkt_reformat = mlx5_packet_reformat_alloc(esw->dev,
+						  &reformat_params,
+						  MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(pkt_reformat)) {
+		esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n",
+			 PTR_ERR(pkt_reformat));
+		return PTR_ERR(pkt_reformat);
+	}
+
+	vlan->pkt_reformat_pop = pkt_reformat;
+	return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+	mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_pop);
+	vlan->pkt_reformat_pop = NULL;
+}
+
 static struct mlx5_esw_bridge_vlan *
-mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port)
+mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port,
+			    struct mlx5_eswitch *esw)
 {
 	struct mlx5_esw_bridge_vlan *vlan;
 	int err;
@@ -609,13 +706,34 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por
 
 	vlan->vid = vid;
 	vlan->flags = flags;
-	err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL);
-	if (err) {
-		kvfree(vlan);
-		return ERR_PTR(err);
+	INIT_LIST_HEAD(&vlan->fdb_list);
+
+	if (flags & BRIDGE_VLAN_INFO_PVID) {
+		err = mlx5_esw_bridge_vlan_push_create(vlan, esw);
+		if (err)
+			goto err_vlan_push;
+	}
+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
+		err = mlx5_esw_bridge_vlan_pop_create(vlan, esw);
+		if (err)
+			goto err_vlan_pop;
 	}
 
+	err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL);
+	if (err)
+		goto err_xa_insert;
+
 	return vlan;
+
+err_xa_insert:
+	if (vlan->pkt_reformat_pop)
+		mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
+err_vlan_pop:
+	if (vlan->pkt_reformat_push)
+		mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
+err_vlan_push:
+	kvfree(vlan);
+	return ERR_PTR(err);
 }
 
 static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port,
@@ -624,20 +742,42 @@ static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port,
 	xa_erase(&port->vlans, vlan->vid);
 }
 
-static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
-					 struct mlx5_esw_bridge_vlan *vlan)
+static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
+				       struct mlx5_esw_bridge *bridge)
 {
+	struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) {
+		if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
+			mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+							   entry->key.vid,
+							   SWITCHDEV_FDB_DEL_TO_BRIDGE);
+		mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
+	}
+
+	if (vlan->pkt_reformat_pop)
+		mlx5_esw_bridge_vlan_pop_cleanup(vlan, bridge->br_offloads->esw);
+	if (vlan->pkt_reformat_push)
+		mlx5_esw_bridge_vlan_push_cleanup(vlan, bridge->br_offloads->esw);
+}
+
+static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
+					 struct mlx5_esw_bridge_vlan *vlan,
+					 struct mlx5_esw_bridge *bridge)
+{
+	mlx5_esw_bridge_vlan_flush(vlan, bridge);
 	mlx5_esw_bridge_vlan_erase(port, vlan);
 	kvfree(vlan);
 }
 
-static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port)
+static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port,
+					     struct mlx5_esw_bridge *bridge)
 {
 	struct mlx5_esw_bridge_vlan *vlan;
 	unsigned long index;
 
 	xa_for_each(&port->vlans, index, vlan)
-		mlx5_esw_bridge_vlan_cleanup(port, vlan);
+		mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge);
 }
 
 static struct mlx5_esw_bridge_vlan *
@@ -685,8 +825,6 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
 		vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw);
 		if (IS_ERR(vlan))
 			return ERR_CAST(vlan);
-		if (vlan->flags & (BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED))
-			return ERR_PTR(-EOPNOTSUPP); /* can't offload vlan push/pop */
 	}
 
 	priv = netdev_priv(dev);
@@ -734,6 +872,10 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
 		goto err_ht_init;
 	}
 
+	if (vlan)
+		list_add(&entry->vlan_list, &vlan->fdb_list);
+	else
+		INIT_LIST_HEAD(&entry->vlan_list);
 	list_add(&entry->list, &bridge->fdb_list);
 	return entry;
 
@@ -831,7 +973,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off
 		return -EINVAL;
 	}
 
-	mlx5_esw_bridge_port_vlans_flush(port);
+	mlx5_esw_bridge_port_vlans_flush(port, bridge);
 	mlx5_esw_bridge_port_erase(port, bridge);
 	kvfree(port);
 	mlx5_esw_bridge_put(br_offloads, bridge);
@@ -892,11 +1034,12 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
 
 	vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
 	if (vlan) {
-		vlan->flags = flags;
-		return 0;
+		if (vlan->flags == flags)
+			return 0;
+		mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge);
 	}
 
-	vlan = mlx5_esw_bridge_vlan_create(vid, flags, port);
+	vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, esw);
 	if (IS_ERR(vlan)) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry");
 		return PTR_ERR(vlan);
@@ -916,7 +1059,7 @@ void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx
 	vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
 	if (!vlan)
 		return;
-	mlx5_esw_bridge_vlan_cleanup(port, vlan);
+	mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge);
 }
 
 void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,

From cc2987c44be5d188b0fdf5c07b65a5c952457ef9 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Thu, 18 Mar 2021 21:05:20 +0200
Subject: [PATCH 1318/3292] net/mlx5: Bridge, filter tagged packets that didn't
 match tagged fg

With support for pvid vlans in mlx5 bridge it is possible to have rules in
untagged flow group when vlan filtering is enabled. However, such rules can
also match tagged packets that didn't match anything in tagged flow group.
Filter such packets by introducing additional flow group between tagged and
untagged groups. When filtering is enabled on the bridge create additional
flow in vlan filtering flow group and matches tagged packets with specified
source MAC address and redirects them to new "skip" table. The skip table
is new lowest-level empty table that is used to skip all further processing
on packet in bridge priority.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  | 141 +++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/esw/bridge.h  |   3 +
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |   2 +-
 3 files changed, 141 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index 442a62ff7b43..b6345619cbfe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -15,9 +15,13 @@
 
 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000
 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 2 - 1)
-#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 4 - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_FROM \
 	(MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO \
+	(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 2 - 1)
+#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \
+	(MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO + 1)
 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE - 1)
 
 #define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE 64000
@@ -27,9 +31,12 @@
 	(MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1)
 #define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1)
 
+#define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
+
 enum {
 	MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
 	MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+	MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
 };
 
 struct mlx5_esw_bridge_fdb_key {
@@ -54,6 +61,7 @@ struct mlx5_esw_bridge_fdb_entry {
 	struct mlx5_fc *ingress_counter;
 	unsigned long lastuse;
 	struct mlx5_flow_handle *egress_handle;
+	struct mlx5_flow_handle *filter_handle;
 };
 
 static const struct rhashtable_params fdb_ht_params = {
@@ -172,6 +180,44 @@ mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flo
 	return fg;
 }
 
+static struct mlx5_flow_group *
+mlx5_esw_bridge_ingress_filter_fg_create(struct mlx5_eswitch *esw,
+					 struct mlx5_flow_table *ingress_ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *fg;
+	u32 *in, *match;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return ERR_PTR(-ENOMEM);
+
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2);
+	match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0);
+	MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag);
+
+	MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_mask());
+
+	MLX5_SET(create_flow_group_in, in, start_flow_index,
+		 MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_FROM);
+	MLX5_SET(create_flow_group_in, in, end_flow_index,
+		 MLX5_ESW_BRIDGE_INGRESS_TABLE_FILTER_GRP_IDX_TO);
+
+	fg = mlx5_create_flow_group(ingress_ft, in);
+	if (IS_ERR(fg))
+		esw_warn(esw->dev,
+			 "Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n",
+			 PTR_ERR(fg));
+
+	kvfree(in);
+	return fg;
+}
+
 static struct mlx5_flow_group *
 mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft)
 {
@@ -275,8 +321,8 @@ mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_
 static int
 mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
 {
-	struct mlx5_flow_group *mac_fg, *vlan_fg;
-	struct mlx5_flow_table *ingress_ft;
+	struct mlx5_flow_group *mac_fg, *filter_fg, *vlan_fg;
+	struct mlx5_flow_table *ingress_ft, *skip_ft;
 	int err;
 
 	if (!mlx5_eswitch_vport_match_metadata_enabled(br_offloads->esw))
@@ -288,12 +334,26 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
 	if (IS_ERR(ingress_ft))
 		return PTR_ERR(ingress_ft);
 
+	skip_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE,
+					       MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
+					       br_offloads->esw);
+	if (IS_ERR(skip_ft)) {
+		err = PTR_ERR(skip_ft);
+		goto err_skip_tbl;
+	}
+
 	vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(br_offloads->esw, ingress_ft);
 	if (IS_ERR(vlan_fg)) {
 		err = PTR_ERR(vlan_fg);
 		goto err_vlan_fg;
 	}
 
+	filter_fg = mlx5_esw_bridge_ingress_filter_fg_create(br_offloads->esw, ingress_ft);
+	if (IS_ERR(filter_fg)) {
+		err = PTR_ERR(filter_fg);
+		goto err_filter_fg;
+	}
+
 	mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(br_offloads->esw, ingress_ft);
 	if (IS_ERR(mac_fg)) {
 		err = PTR_ERR(mac_fg);
@@ -301,13 +361,19 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads)
 	}
 
 	br_offloads->ingress_ft = ingress_ft;
+	br_offloads->skip_ft = skip_ft;
 	br_offloads->ingress_vlan_fg = vlan_fg;
+	br_offloads->ingress_filter_fg = filter_fg;
 	br_offloads->ingress_mac_fg = mac_fg;
 	return 0;
 
 err_mac_fg:
+	mlx5_destroy_flow_group(filter_fg);
+err_filter_fg:
 	mlx5_destroy_flow_group(vlan_fg);
 err_vlan_fg:
+	mlx5_destroy_flow_table(skip_ft);
+err_skip_tbl:
 	mlx5_destroy_flow_table(ingress_ft);
 	return err;
 }
@@ -317,8 +383,12 @@ mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloa
 {
 	mlx5_destroy_flow_group(br_offloads->ingress_mac_fg);
 	br_offloads->ingress_mac_fg = NULL;
+	mlx5_destroy_flow_group(br_offloads->ingress_filter_fg);
+	br_offloads->ingress_filter_fg = NULL;
 	mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg);
 	br_offloads->ingress_vlan_fg = NULL;
+	mlx5_destroy_flow_table(br_offloads->skip_ft);
+	br_offloads->skip_ft = NULL;
 	mlx5_destroy_flow_table(br_offloads->ingress_ft);
 	br_offloads->ingress_ft = NULL;
 }
@@ -428,6 +498,52 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
 	return handle;
 }
 
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr,
+					   struct mlx5_esw_bridge *bridge)
+{
+	struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+	struct mlx5_flow_destination dest = {
+		.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+		.ft = br_offloads->skip_ft,
+	};
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flags = FLOW_ACT_NO_APPEND,
+	};
+	struct mlx5_flow_spec *rule_spec;
+	struct mlx5_flow_handle *handle;
+	u8 *smac_v, *smac_c;
+
+	rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+	if (!rule_spec)
+		return ERR_PTR(-ENOMEM);
+
+	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2;
+
+	smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value,
+			      outer_headers.smac_47_16);
+	ether_addr_copy(smac_v, addr);
+	smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria,
+			      outer_headers.smac_47_16);
+	eth_broadcast_addr(smac_c);
+
+	MLX5_SET(fte_match_param, rule_spec->match_criteria,
+		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+	MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
+
+	MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
+			 outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value,
+			 outer_headers.cvlan_tag);
+
+	handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1);
+
+	kvfree(rule_spec);
+	return handle;
+}
+
 static struct mlx5_flow_handle *
 mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr,
 				   struct mlx5_esw_bridge_vlan *vlan,
@@ -587,8 +703,11 @@ mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
 {
 	rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
 	mlx5_del_flow_rules(entry->egress_handle);
+	if (entry->filter_handle)
+		mlx5_del_flow_rules(entry->filter_handle);
 	mlx5_del_flow_rules(entry->ingress_handle);
 	mlx5_fc_destroy(bridge->br_offloads->esw->dev, entry->ingress_counter);
+	list_del(&entry->vlan_list);
 	list_del(&entry->list);
 	kvfree(entry);
 }
@@ -857,6 +976,17 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
 	}
 	entry->ingress_handle = handle;
 
+	if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG) {
+		handle = mlx5_esw_bridge_ingress_filter_flow_create(vport_num, addr, bridge);
+		if (IS_ERR(handle)) {
+			err = PTR_ERR(handle);
+			esw_warn(esw->dev, "Failed to create ingress filter(vport=%u,err=%d)\n",
+				 vport_num, err);
+			goto err_ingress_filter_flow_create;
+		}
+		entry->filter_handle = handle;
+	}
+
 	handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vlan, bridge);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
@@ -882,6 +1012,9 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
 err_ht_init:
 	mlx5_del_flow_rules(entry->egress_handle);
 err_egress_flow_create:
+	if (entry->filter_handle)
+		mlx5_del_flow_rules(entry->filter_handle);
+err_ingress_filter_flow_create:
 	mlx5_del_flow_rules(entry->ingress_handle);
 err_ingress_flow_create:
 	mlx5_fc_destroy(priv->mdev, entry->ingress_counter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
index bedbda57cdb3..d826942b27fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
@@ -23,7 +23,10 @@ struct mlx5_esw_bridge_offloads {
 
 	struct mlx5_flow_table *ingress_ft;
 	struct mlx5_flow_group *ingress_vlan_fg;
+	struct mlx5_flow_group *ingress_filter_fg;
 	struct mlx5_flow_group *ingress_mac_fg;
+
+	struct mlx5_flow_table *skip_ft;
 };
 
 struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fc37ac9eab12..2cd7aea5d329 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2786,7 +2786,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 		goto out_err;
 	}
 
-	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 2);
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
 	if (IS_ERR(maj_prio)) {
 		err = PTR_ERR(maj_prio);
 		goto out_err;

From 9724fd5d9c2a0d3686b799ed5ca90cb9378ca4f2 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Fri, 2 Apr 2021 21:16:13 +0300
Subject: [PATCH 1319/3292] net/mlx5: Bridge, add tracepoints

Move private bridge structures to dedicated headers that is accessible to
bridge tracepoint header. Implemented following tracepoints:

- Initialize FDB entry.
- Refresh FDB entry.
- Cleanup FDB entry.
- Create VLAN.
- Cleanup VLAN.
- Attach port to bridge.
- Detach port from bridge.

Usage example:

># cd /sys/kernel/debug/tracing
># echo mlx5:mlx5_esw_bridge_fdb_entry_init >> set_event
># cat trace
...
   kworker/u20:1-96      [001] ....   231.892503: mlx5_esw_bridge_fdb_entry_init: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=3 flags=0 lastuse=4294895695

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../device_drivers/ethernet/mellanox/mlx5.rst |  56 +++++++++
 .../ethernet/mellanox/mlx5/core/esw/bridge.c  |  75 ++++--------
 .../mellanox/mlx5/core/esw/bridge_priv.h      |  53 ++++++++
 .../mlx5/core/esw/diag/bridge_tracepoint.h    | 113 ++++++++++++++++++
 4 files changed, 247 insertions(+), 50 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index 058882dca17b..ef8cb62e82a1 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -600,3 +600,59 @@ tc and eswitch offloads tracepoints:
     $ cat /sys/kernel/debug/tracing/trace
     ...
     kworker/u48:7-2221  [009] ...1  1475.387435: mlx5e_rep_neigh_update: netdev: ens1f0 MAC: 24:8a:07:9a:17:9a IPv4: 1.1.1.10 IPv6: ::ffff:1.1.1.10 neigh_connected=1
+
+Bridge offloads tracepoints:
+
+- mlx5_esw_bridge_fdb_entry_init: trace bridge FDB entry offloaded to mlx5::
+
+    $ echo mlx5:mlx5_esw_bridge_fdb_entry_init >> set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    kworker/u20:9-2217    [003] ...1   318.582243: mlx5_esw_bridge_fdb_entry_init: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=0 flags=0 used=0
+
+- mlx5_esw_bridge_fdb_entry_cleanup: trace bridge FDB entry deleted from mlx5::
+
+    $ echo mlx5:mlx5_esw_bridge_fdb_entry_cleanup >> set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    ip-2581    [005] ...1   318.629871: mlx5_esw_bridge_fdb_entry_cleanup: net_device=enp8s0f0_1 addr=e4:fd:05:08:00:03 vid=0 flags=0 used=16
+
+- mlx5_esw_bridge_fdb_entry_refresh: trace bridge FDB entry offload refreshed in
+  mlx5::
+
+    $ echo mlx5:mlx5_esw_bridge_fdb_entry_refresh >> set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    kworker/u20:8-3849    [003] ...1       466716: mlx5_esw_bridge_fdb_entry_refresh: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=3 flags=0 used=0
+
+- mlx5_esw_bridge_vlan_create: trace bridge VLAN object add on mlx5
+  representor::
+
+    $ echo mlx5:mlx5_esw_bridge_vlan_create >> set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    ip-2560    [007] ...1   318.460258: mlx5_esw_bridge_vlan_create: vid=1 flags=6
+
+- mlx5_esw_bridge_vlan_cleanup: trace bridge VLAN object delete from mlx5
+  representor::
+
+    $ echo mlx5:mlx5_esw_bridge_vlan_cleanup >> set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    bridge-2582    [007] ...1   318.653496: mlx5_esw_bridge_vlan_cleanup: vid=2 flags=8
+
+- mlx5_esw_bridge_vport_init: trace mlx5 vport assigned with bridge upper
+  device::
+
+    $ echo mlx5:mlx5_esw_bridge_vport_init >> set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    ip-2560    [007] ...1   318.458915: mlx5_esw_bridge_vport_init: vport_num=1
+
+- mlx5_esw_bridge_vport_cleanup: trace mlx5 vport removed from bridge upper
+  device::
+
+    $ echo mlx5:mlx5_esw_bridge_vport_cleanup >> set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    ip-5387    [000] ...1       573713: mlx5_esw_bridge_vport_cleanup: vport_num=1
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index b6345619cbfe..a6e1d4f78268 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -1,17 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2021 Mellanox Technologies. */
 
-#include <linux/netdevice.h>
 #include <linux/list.h>
-#include <linux/rhashtable.h>
-#include <linux/xarray.h>
-#include <linux/if_bridge.h>
-#include <linux/if_vlan.h>
-#include <linux/if_ether.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
 #include <net/switchdev.h>
 #include "bridge.h"
 #include "eswitch.h"
-#include "fs_core.h"
+#include "bridge_priv.h"
+#define CREATE_TRACE_POINTS
+#include "diag/bridge_tracepoint.h"
 
 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000
 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0
@@ -39,31 +37,6 @@ enum {
 	MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
 };
 
-struct mlx5_esw_bridge_fdb_key {
-	unsigned char addr[ETH_ALEN];
-	u16 vid;
-};
-
-enum {
-	MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
-};
-
-struct mlx5_esw_bridge_fdb_entry {
-	struct mlx5_esw_bridge_fdb_key key;
-	struct rhash_head ht_node;
-	struct net_device *dev;
-	struct list_head list;
-	struct list_head vlan_list;
-	u16 vport_num;
-	u16 flags;
-
-	struct mlx5_flow_handle *ingress_handle;
-	struct mlx5_fc *ingress_counter;
-	unsigned long lastuse;
-	struct mlx5_flow_handle *egress_handle;
-	struct mlx5_flow_handle *filter_handle;
-};
-
 static const struct rhashtable_params fdb_ht_params = {
 	.key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key),
 	.key_len = sizeof(struct mlx5_esw_bridge_fdb_key),
@@ -71,19 +44,6 @@ static const struct rhashtable_params fdb_ht_params = {
 	.automatic_shrinking = true,
 };
 
-struct mlx5_esw_bridge_vlan {
-	u16 vid;
-	u16 flags;
-	struct list_head fdb_list;
-	struct mlx5_pkt_reformat *pkt_reformat_push;
-	struct mlx5_pkt_reformat *pkt_reformat_pop;
-};
-
-struct mlx5_esw_bridge_port {
-	u16 vport_num;
-	struct xarray vlans;
-};
-
 enum {
 	MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0),
 };
@@ -697,10 +657,23 @@ static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
 	xa_erase(&bridge->vports, port->vport_num);
 }
 
+static void mlx5_esw_bridge_fdb_entry_refresh(unsigned long lastuse,
+					      struct mlx5_esw_bridge_fdb_entry *entry)
+{
+	trace_mlx5_esw_bridge_fdb_entry_refresh(entry);
+
+	entry->lastuse = lastuse;
+	mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+					   entry->key.vid,
+					   SWITCHDEV_FDB_ADD_TO_BRIDGE);
+}
+
 static void
 mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry,
 				  struct mlx5_esw_bridge *bridge)
 {
+	trace_mlx5_esw_bridge_fdb_entry_cleanup(entry);
+
 	rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params);
 	mlx5_del_flow_rules(entry->egress_handle);
 	if (entry->filter_handle)
@@ -842,6 +815,7 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por
 	if (err)
 		goto err_xa_insert;
 
+	trace_mlx5_esw_bridge_vlan_create(vlan);
 	return vlan;
 
 err_xa_insert:
@@ -884,6 +858,7 @@ static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port,
 					 struct mlx5_esw_bridge_vlan *vlan,
 					 struct mlx5_esw_bridge *bridge)
 {
+	trace_mlx5_esw_bridge_vlan_cleanup(vlan);
 	mlx5_esw_bridge_vlan_flush(vlan, bridge);
 	mlx5_esw_bridge_vlan_erase(port, vlan);
 	kvfree(vlan);
@@ -1007,6 +982,8 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
 	else
 		INIT_LIST_HEAD(&entry->vlan_list);
 	list_add(&entry->list, &bridge->fdb_list);
+
+	trace_mlx5_esw_bridge_fdb_entry_init(entry);
 	return entry;
 
 err_ht_init:
@@ -1078,6 +1055,7 @@ static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge_offloads *br_offloa
 			 vport->vport, err);
 		goto err_port_insert;
 	}
+	trace_mlx5_esw_bridge_vport_init(port);
 
 	vport->bridge = bridge;
 	return 0;
@@ -1106,6 +1084,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off
 		return -EINVAL;
 	}
 
+	trace_mlx5_esw_bridge_vport_cleanup(port);
 	mlx5_esw_bridge_port_vlans_flush(port, bridge);
 	mlx5_esw_bridge_port_erase(port, bridge);
 	kvfree(port);
@@ -1266,11 +1245,7 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
 				continue;
 
 			if (time_after(lastuse, entry->lastuse)) {
-				entry->lastuse = lastuse;
-				/* refresh existing bridge entry */
-				mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
-								   entry->key.vid,
-								   SWITCHDEV_FDB_ADD_TO_BRIDGE);
+				mlx5_esw_bridge_fdb_entry_refresh(lastuse, entry);
 			} else if (time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) {
 				mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
 								   entry->key.vid,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
new file mode 100644
index 000000000000..d9ab2e8bc2cb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef _MLX5_ESW_BRIDGE_PRIVATE_
+#define _MLX5_ESW_BRIDGE_PRIVATE_
+
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/rhashtable.h>
+#include <linux/xarray.h>
+#include "fs_core.h"
+
+struct mlx5_esw_bridge_fdb_key {
+	unsigned char addr[ETH_ALEN];
+	u16 vid;
+};
+
+enum {
+	MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
+};
+
+struct mlx5_esw_bridge_fdb_entry {
+	struct mlx5_esw_bridge_fdb_key key;
+	struct rhash_head ht_node;
+	struct net_device *dev;
+	struct list_head list;
+	struct list_head vlan_list;
+	u16 vport_num;
+	u16 flags;
+
+	struct mlx5_flow_handle *ingress_handle;
+	struct mlx5_fc *ingress_counter;
+	unsigned long lastuse;
+	struct mlx5_flow_handle *egress_handle;
+	struct mlx5_flow_handle *filter_handle;
+};
+
+struct mlx5_esw_bridge_vlan {
+	u16 vid;
+	u16 flags;
+	struct list_head fdb_list;
+	struct mlx5_pkt_reformat *pkt_reformat_push;
+	struct mlx5_pkt_reformat *pkt_reformat_pop;
+};
+
+struct mlx5_esw_bridge_port {
+	u16 vport_num;
+	struct xarray vlans;
+};
+
+#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
new file mode 100644
index 000000000000..227964b7d3b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_BRIDGE_TRACEPOINT_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_BRIDGE_TRACEPOINT_
+
+#include <linux/tracepoint.h>
+#include "../bridge_priv.h"
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template,
+		    TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+		    TP_ARGS(fdb),
+		    TP_STRUCT__entry(
+			    __array(char, dev_name, IFNAMSIZ)
+			    __array(unsigned char, addr, ETH_ALEN)
+			    __field(u16, vid)
+			    __field(u16, flags)
+			    __field(unsigned int, used)
+			    ),
+		    TP_fast_assign(
+			    strncpy(__entry->dev_name,
+				    netdev_name(fdb->dev),
+				    IFNAMSIZ);
+			    memcpy(__entry->addr, fdb->key.addr, ETH_ALEN);
+			    __entry->vid = fdb->key.vid;
+			    __entry->flags = fdb->flags;
+			    __entry->used = jiffies_to_msecs(jiffies - fdb->lastuse)
+			    ),
+		    TP_printk("net_device=%s addr=%pM vid=%hu flags=%hx used=%u",
+			      __entry->dev_name,
+			      __entry->addr,
+			      __entry->vid,
+			      __entry->flags,
+			      __entry->used / 1000)
+	);
+
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+	     mlx5_esw_bridge_fdb_entry_init,
+	     TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+	     TP_ARGS(fdb)
+	);
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+	     mlx5_esw_bridge_fdb_entry_refresh,
+	     TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+	     TP_ARGS(fdb)
+	);
+DEFINE_EVENT(mlx5_esw_bridge_fdb_template,
+	     mlx5_esw_bridge_fdb_entry_cleanup,
+	     TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb),
+	     TP_ARGS(fdb)
+	);
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_vlan_template,
+		    TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+		    TP_ARGS(vlan),
+		    TP_STRUCT__entry(
+			    __field(u16, vid)
+			    __field(u16, flags)
+			    ),
+		    TP_fast_assign(
+			    __entry->vid = vlan->vid;
+			    __entry->flags = vlan->flags;
+			    ),
+		    TP_printk("vid=%hu flags=%hx",
+			      __entry->vid,
+			      __entry->flags)
+	);
+
+DEFINE_EVENT(mlx5_esw_bridge_vlan_template,
+	     mlx5_esw_bridge_vlan_create,
+	     TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+	     TP_ARGS(vlan)
+	);
+DEFINE_EVENT(mlx5_esw_bridge_vlan_template,
+	     mlx5_esw_bridge_vlan_cleanup,
+	     TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan),
+	     TP_ARGS(vlan)
+	);
+
+DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template,
+		    TP_PROTO(const struct mlx5_esw_bridge_port *port),
+		    TP_ARGS(port),
+		    TP_STRUCT__entry(
+			    __field(u16, vport_num)
+			    ),
+		    TP_fast_assign(
+			    __entry->vport_num = port->vport_num;
+			    ),
+		    TP_printk("vport_num=%hu", __entry->vport_num)
+	);
+
+DEFINE_EVENT(mlx5_esw_bridge_port_template,
+	     mlx5_esw_bridge_vport_init,
+	     TP_PROTO(const struct mlx5_esw_bridge_port *port),
+	     TP_ARGS(port)
+	);
+DEFINE_EVENT(mlx5_esw_bridge_port_template,
+	     mlx5_esw_bridge_vport_cleanup,
+	     TP_PROTO(const struct mlx5_esw_bridge_port *port),
+	     TP_ARGS(port)
+	);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE bridge_tracepoint
+#include <trace/define_trace.h>

From 9bd1cc4148cbea44ca7d8254b50edb6cb660957a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 10:59:03 +0100
Subject: [PATCH 1320/3292] nios2: Do not include linux/irqdomain.h from
 asm/irq.h

Including linux/irqdomain.h from asm/irq.h is going to break
as soon as linux/irqdomain.h will include linux/irq.h, so
let's fix this. Code relying on linux/irqomain.h should include
it directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/nios2/include/asm/irq.h | 1 -
 arch/nios2/kernel/irq.c      | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/nios2/include/asm/irq.h b/arch/nios2/include/asm/irq.h
index 13ce37272279..c52c94884e93 100644
--- a/arch/nios2/include/asm/irq.h
+++ b/arch/nios2/include/asm/irq.h
@@ -10,6 +10,5 @@
 #define NIOS2_CPU_NR_IRQS	32
 
 #include <asm-generic/irq.h>
-#include <linux/irqdomain.h>
 
 #endif
diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c
index 5f3555ce4865..c6a1a9f6ac42 100644
--- a/arch/nios2/kernel/irq.c
+++ b/arch/nios2/kernel/irq.c
@@ -11,6 +11,7 @@
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 
 static u32 ienable;

From aa5f6a89700700fe6fe7e8727581a21a7d679630 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:31:09 +0100
Subject: [PATCH 1321/3292] staging: octeon-hcd: Directly include linux/of.h

This drivers currently obtains linux/of.h by luck and a chain of
bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/staging/octeon-usb/octeon-hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/octeon-usb/octeon-hcd.c b/drivers/staging/octeon-usb/octeon-hcd.c
index f27f20a4aa2d..a1cd81d4a114 100644
--- a/drivers/staging/octeon-usb/octeon-hcd.c
+++ b/drivers/staging/octeon-usb/octeon-hcd.c
@@ -50,8 +50,10 @@
 #include <linux/module.h>
 #include <linux/usb/hcd.h>
 #include <linux/prefetch.h>
+#include <linux/irqdomain.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 
 #include <asm/octeon/octeon.h>
 

From c7d49545997eab111aec14be152842f56a0cabc4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 16 May 2021 17:57:15 +0100
Subject: [PATCH 1322/3292] mfd: ioc3: Directly include linux/irqdomain.h

This driver include linux/irqdomain.h via a bizarre set of
indirection, which we are about to break.

Directly include the required file.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/mfd/ioc3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mfd/ioc3.c b/drivers/mfd/ioc3.c
index c73ec78f255b..99b9c113f964 100644
--- a/drivers/mfd/ioc3.c
+++ b/drivers/mfd/ioc3.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/pci.h>

From bc9a454a9440e2872ecf71256fb962e4bb35e937 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:30:34 +0100
Subject: [PATCH 1323/3292] watchdog/octeon-wdt: Directly include
 linux/irqdomain.h

This drivers currently obtains linux/irqdomain.h by luck and
a chain of bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/watchdog/octeon-wdt-main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/watchdog/octeon-wdt-main.c b/drivers/watchdog/octeon-wdt-main.c
index fde9e739b436..391c774a1f67 100644
--- a/drivers/watchdog/octeon-wdt-main.c
+++ b/drivers/watchdog/octeon-wdt-main.c
@@ -54,6 +54,7 @@
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 
 #include <asm/mipsregs.h>
 #include <asm/uasm.h>

From 1982752f6ba6a9d74a214b008ae9e336339276e8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:30:34 +0100
Subject: [PATCH 1324/3292] irqchip/mips-gic: Directly include
 linux/irqdomain.h

This drivers currently obtains linux/irqdomain.h by luck and
a chain of bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-mips-gic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 215885962bb0..a2cbf0acff1c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqdomain.h>
 #include <linux/of_address.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>

From 95af1df6f4e2b121ce33166d61c99250143073b5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:31:09 +0100
Subject: [PATCH 1325/3292] MIPS: lantiq: Directly include linux/of.h in
 xway/dma.c

This drivers currently obtains linux/of.h by luck and a chain of
bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/mips/lantiq/xway/dma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
index aeb1b989cd4e..63dccb2ed08b 100644
--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -12,6 +12,7 @@
 #include <linux/spinlock.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/of.h>
 
 #include <lantiq_soc.h>
 #include <xway_dma.h>

From 18ca45f5ba1e31704bcca038b8b612e9b1f52b4f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 19 May 2021 09:01:06 +0100
Subject: [PATCH 1326/3292] MIPS: Add missing linux/irqdomain.h includes

A number of MIPS platforms are failing to directly include
irqdomain.h. Fix this so that we can drop unnecessary dependencies

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/mips/pci/pci-rt3883.c       | 1 +
 arch/mips/pci/pci-xtalk-bridge.c | 1 +
 arch/mips/sgi-ip27/ip27-irq.c    | 1 +
 arch/mips/sgi-ip30/ip30-irq.c    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index aebd4964ea34..c48e23cf5b5e 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index d2216942af18..ab9bedb82b28 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -13,6 +13,7 @@
 #include <linux/platform_data/xtalk-bridge.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/crc16.h>
+#include <linux/irqdomain.h>
 
 #include <asm/pci/bridge.h>
 #include <asm/paccess.h>
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 42df9fafa943..95c1bff1ab9f 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -9,6 +9,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
index e8374e4c705b..ba87704073c8 100644
--- a/arch/mips/sgi-ip30/ip30-irq.c
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/percpu.h>
 #include <linux/spinlock.h>
 #include <linux/tick.h>

From a12a9c5c03072ec6b1f4f9bd7a554a718ecf234a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 Sep 2018 16:11:20 +0100
Subject: [PATCH 1327/3292] MIPS: Do not include linux/irqdomain.h from
 asm/irq.h

Including linux/irqdomain.h from asm/irq.h is going to break
as soon as linux/irqdomain.h will include linux/irq.h, so
let's fix this. Code relying on linux/irqomain.h should include
it directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/mips/include/asm/irq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index f021de661c3a..d1477ecb1af9 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -11,7 +11,6 @@
 
 #include <linux/linkage.h>
 #include <linux/smp.h>
-#include <linux/irqdomain.h>
 
 #include <asm/mipsmtregs.h>
 

From 13a9a5d17d07cec8181ea0843674ce48c191628e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 17:09:06 +0100
Subject: [PATCH 1328/3292] powerpc: Add missing linux/{of.h,irqdomain.h}
 include directives

A bunch of PPC files are missing the inclusion of linux/of.h and
linux/irqdomain.h, relying on transitive inclusion from another
file.

As we are about to break this dependency, make sure these dependencies
are explicit.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/kernel/mce.c                        | 1 +
 arch/powerpc/kvm/book3s_hv_uvmem.c               | 1 +
 arch/powerpc/kvm/book3s_xive.c                   | 1 +
 arch/powerpc/kvm/book3s_xive_native.c            | 1 +
 arch/powerpc/mm/book3s64/radix_pgtable.c         | 1 +
 arch/powerpc/platforms/cell/pmu.c                | 1 +
 arch/powerpc/platforms/embedded6xx/flipper-pic.c | 1 +
 arch/powerpc/platforms/ps3/interrupt.c           | 1 +
 arch/powerpc/platforms/pseries/ibmebus.c         | 1 +
 arch/powerpc/sysdev/ehv_pic.c                    | 1 +
 arch/powerpc/sysdev/fsl_mpic_err.c               | 1 +
 arch/powerpc/sysdev/xics/icp-hv.c                | 1 +
 arch/powerpc/sysdev/xics/icp-opal.c              | 1 +
 13 files changed, 13 insertions(+)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9a3c2a84a2ac..15e7b4900689 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -18,6 +18,7 @@
 #include <linux/extable.h>
 #include <linux/ftrace.h>
 #include <linux/memblock.h>
+#include <linux/of.h>
 
 #include <asm/interrupt.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 84e5a2dc8be5..b898a596db42 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -90,6 +90,7 @@
 #include <linux/migrate.h>
 #include <linux/kvm_host.h>
 #include <linux/ksm.h>
+#include <linux/of.h>
 #include <asm/ultravisor.h>
 #include <asm/mman.h>
 #include <asm/kvm_ppc.h>
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e7219b6f5f9a..08910d44d2cb 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/uaccess.h>
+#include <linux/irqdomain.h>
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 76800c84f2a3..30bacf6dd53d 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -12,6 +12,7 @@
 #include <linux/spinlock.h>
 #include <linux/delay.h>
 #include <linux/file.h>
+#include <linux/irqdomain.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5fef8db3b463..edadb9e9c9c0 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/sched/mm.h>
 #include <linux/memblock.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
index 35bbd15582af..b207a7f99be5 100644
--- a/arch/powerpc/platforms/cell/pmu.c
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/types.h>
 #include <linux/export.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index d39a9213a3e6..609bda2ad5dd 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 78f2339ed5cb..e68f4fb1c1f4 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 
 #include <asm/machdep.h>
 #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index a15ab33646b3..c6c79ef55e13 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -42,6 +42,7 @@
 #include <linux/kobject.h>
 #include <linux/dma-map-ops.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index 48866e6c1efb..00705258ecf9 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 13583bbc3e8e..5fa5fa215541 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -8,6 +8,7 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 21b9d1bf39ff..6765d9e264a3 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 68fd2540b093..675d708863d5 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 

From 5951be4c9c361242c9f0d7c9b9ef03fe82e45c7b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 14 Sep 2018 09:49:11 +0100
Subject: [PATCH 1329/3292] scsi/ibmvscsi: Directly include
 linux/{of.h,irqdomain.h}

A couple of ibmvscsi files are missing the inclusion of linux/of.h
and linux/irqdomain.h, relying on transitive inclusion from another
file.

As we are about to break this dependency, make sure these dependencies
are explicit.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvfc.c           | 1 +
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 6540d48eb0e8..715c34904e3e 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -13,6 +13,7 @@
 #include <linux/dmapool.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
 #include <linux/of.h>
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 41ac9477df7a..10b6c6daaacd 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 
 #include <target/target_core_base.h>
 #include <target/target_core_fabric.h>

From 7c576f4d3ce43fa0fc1ac258dc4768d0f3b3b992 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 10:42:25 +0100
Subject: [PATCH 1330/3292] powerpc: Convert irq_domain_add_legacy_isa use to
 irq_domain_add_legacy

irq_domain_add_legacy_isa is a pain. It only exists for the benefit of
two PPC-specific drivers, and creates an ugly dependency between asm/irq.h
and linux/irqdomain.h

Instead, let's convert these two drivers to irq_domain_add_legacy(),
stop using NUM_ISA_INTERRUPTS by directly setting NR_IRQS_LEGACY.

The dependency cannot be broken yet as there is a lot of PPC-related
code that depends on it, but that's the first step towards it.

A followup patch will remove irq_domain_add_legacy_isa.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/include/asm/irq.h         | 4 ++--
 arch/powerpc/platforms/ps3/interrupt.c | 4 ++--
 arch/powerpc/sysdev/i8259.c            | 3 ++-
 arch/powerpc/sysdev/mpic.c             | 2 +-
 arch/powerpc/sysdev/tsi108_pci.c       | 3 ++-
 arch/powerpc/sysdev/xics/xics-common.c | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index b2bd58830430..c1eda9199214 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -23,8 +23,8 @@ extern atomic_t ppc_n_lost_interrupts;
 /* Total number of virq in the platform */
 #define NR_IRQS		CONFIG_NR_IRQS
 
-/* Same thing, used by the generic IRQ code */
-#define NR_IRQS_LEGACY		NUM_ISA_INTERRUPTS
+/* Number of irqs reserved for a legacy isa controller */
+#define NR_IRQS_LEGACY		16
 
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
 
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index e68f4fb1c1f4..49871427f599 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -46,7 +46,7 @@
  * implementation equates HV plug value to Linux virq value, constrains each
  * interrupt to have a system wide unique plug number, and limits the range
  * of the plug values to map into the first dword of the bitmaps.  This
- * gives a usable range of plug values of  {NUM_ISA_INTERRUPTS..63}.  Note
+ * gives a usable range of plug values of  {NR_IRQS_LEGACY..63}.  Note
  * that there is no constraint on how many in this set an individual thread
  * can acquire.
  *
@@ -722,7 +722,7 @@ static unsigned int ps3_get_irq(void)
 	}
 
 #if defined(DEBUG)
-	if (unlikely(plug < NUM_ISA_INTERRUPTS || plug > PS3_PLUG_MAX)) {
+	if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) {
 		dump_bmp(&per_cpu(ps3_private, 0));
 		dump_bmp(&per_cpu(ps3_private, 1));
 		BUG();
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index c1d76c344351..dc1a151c63d7 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -260,7 +260,8 @@ void i8259_init(struct device_node *node, unsigned long intack_addr)
 	raw_spin_unlock_irqrestore(&i8259_lock, flags);
 
 	/* create a legacy host */
-	i8259_host = irq_domain_add_legacy_isa(node, &i8259_host_ops, NULL);
+	i8259_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					   &i8259_host_ops, NULL);
 	if (i8259_host == NULL) {
 		printk(KERN_ERR "i8259: failed to allocate irq host !\n");
 		return;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b0426f28946a..995fb2ada507 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -602,7 +602,7 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
 /* Find an mpic associated with a given linux interrupt */
 static struct mpic *mpic_find(unsigned int irq)
 {
-	if (irq < NUM_ISA_INTERRUPTS)
+	if (irq < NR_IRQS_LEGACY)
 		return NULL;
 
 	return irq_get_chip_data(irq);
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 49f9541954f8..042bb38fa5c2 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -404,7 +404,8 @@ void __init tsi108_pci_int_init(struct device_node *node)
 {
 	DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
 
-	pci_irq_host = irq_domain_add_legacy_isa(node, &pci_irq_domain_ops, NULL);
+	pci_irq_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					     &pci_irq_domain_ops, NULL);
 	if (pci_irq_host == NULL) {
 		printk(KERN_ERR "pci_irq_host: failed to allocate irq domain!\n");
 		return;
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 7e4305c01bac..fdf8db4444b6 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -201,7 +201,7 @@ void xics_migrate_irqs_away(void)
 		struct ics *ics;
 
 		/* We can't set affinity on ISA interrupts */
-		if (virq < NUM_ISA_INTERRUPTS)
+		if (virq < NR_IRQS_LEGACY)
 			continue;
 		/* We only need to migrate enabled IRQS */
 		if (!desc->action)

From 582f5aa1dbb3bd7bd3dd12de7e87f6dafb3f8258 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 11:15:27 +0100
Subject: [PATCH 1331/3292] powerpc: Drop dependency between asm/irq.h and
 linux/irqdomain.h

Directly including linux/irqdomain.h was hiding all sort of sins,
which have now been fixed. Drop the spurious include.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/include/asm/irq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index c1eda9199214..4982f3711fc3 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -6,7 +6,6 @@
 /*
  */
 
-#include <linux/irqdomain.h>
 #include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/radix-tree.h>

From 405e94e9aed2a38bdcd22efe53c36c6cd53185a6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 10:42:25 +0100
Subject: [PATCH 1332/3292] irqdomain: Kill irq_domain_add_legacy_isa

This helper doesn't have a user anymore, let's remove it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/core-api/irq/irq-domain.rst |  1 -
 include/linux/irqdomain.h                 | 11 -----------
 2 files changed, 12 deletions(-)

diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
index 8214e215a8bf..53283b3729a1 100644
--- a/Documentation/core-api/irq/irq-domain.rst
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -146,7 +146,6 @@ Legacy
 
 	irq_domain_add_simple()
 	irq_domain_add_legacy()
-	irq_domain_add_legacy_isa()
 	irq_domain_create_simple()
 	irq_domain_create_legacy()
 
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 62a8e3d23829..9f884c948739 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -45,9 +45,6 @@ struct cpumask;
 struct seq_file;
 struct irq_affinity_desc;
 
-/* Number of irqs reserved for a legacy isa controller */
-#define NUM_ISA_INTERRUPTS	16
-
 #define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16
 
 /**
@@ -355,14 +352,6 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
 {
 	return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
 }
-static inline struct irq_domain *irq_domain_add_legacy_isa(
-				struct device_node *of_node,
-				const struct irq_domain_ops *ops,
-				void *host_data)
-{
-	return irq_domain_add_legacy(of_node, NUM_ISA_INTERRUPTS, 0, 0, ops,
-				     host_data);
-}
 static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)

From 1da027362a7db422243601e895e6f8288389f435 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 2 Apr 2021 12:50:14 +0100
Subject: [PATCH 1333/3292] irqdomain: Reimplement irq_linear_revmap() with
 irq_find_mapping()

irq_linear_revmap() is supposed to be a fast path for domain
lookups, but it only exposes low-level details of the irqdomain
implementation, details which are better kept private.

The *overhead* between the two is only a function call and
a couple of tests, so it is likely that noone can show any
meaningful difference compared to the cost of taking an
interrupt.

Reimplement irq_linear_revmap() with irq_find_mapping()
in order to preserve source code compatibility, and
rename the internal field for a measure.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h | 22 +++++++++-------------
 kernel/irq/irqdomain.c    |  6 +++---
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 9f884c948739..42b3f7d03a32 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,9 +151,9 @@ struct irq_domain_chip_generic;
  * Revmap data, used internally by irq_domain
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
  *                         support direct mapping
- * @revmap_size: Size of the linear map table @linear_revmap[]
+ * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
- * @linear_revmap: Linear table of hwirq->virq reverse mappings
+ * @revmap: Linear table of hwirq->virq reverse mappings
  */
 struct irq_domain {
 	struct list_head link;
@@ -177,7 +177,7 @@ struct irq_domain {
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
 	struct mutex revmap_tree_mutex;
-	unsigned int linear_revmap[];
+	unsigned int revmap[];
 };
 
 /* Irq domain flags */
@@ -394,24 +394,20 @@ static inline unsigned int irq_create_mapping(struct irq_domain *host,
 	return irq_create_mapping_affinity(host, hwirq, NULL);
 }
 
-
 /**
- * irq_linear_revmap() - Find a linux irq from a hw irq number.
+ * irq_find_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
- *
- * This is a fast path alternative to irq_find_mapping() that can be
- * called directly by irq controller code to save a handful of
- * instructions. It is always safe to call, but won't find irqs mapped
- * using the radix tree.
  */
+extern unsigned int irq_find_mapping(struct irq_domain *host,
+				     irq_hw_number_t hwirq);
+
 static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 					     irq_hw_number_t hwirq)
 {
-	return hwirq < domain->revmap_size ? domain->linear_revmap[hwirq] : 0;
+	return irq_find_mapping(domain, hwirq);
 }
-extern unsigned int irq_find_mapping(struct irq_domain *host,
-				     irq_hw_number_t hwirq);
+
 extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
 
 extern const struct irq_domain_ops irq_domain_simple_ops;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 6284443b87ec..8bd012253989 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -486,7 +486,7 @@ static void irq_domain_clear_mapping(struct irq_domain *domain,
 				     irq_hw_number_t hwirq)
 {
 	if (hwirq < domain->revmap_size) {
-		domain->linear_revmap[hwirq] = 0;
+		domain->revmap[hwirq] = 0;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_delete(&domain->revmap_tree, hwirq);
@@ -499,7 +499,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 				   struct irq_data *irq_data)
 {
 	if (hwirq < domain->revmap_size) {
-		domain->linear_revmap[hwirq] = irq_data->irq;
+		domain->revmap[hwirq] = irq_data->irq;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
@@ -885,7 +885,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 
 	/* Check if the hwirq is in the linear revmap. */
 	if (hwirq < domain->revmap_size)
-		return domain->linear_revmap[hwirq];
+		return domain->revmap[hwirq];
 
 	rcu_read_lock();
 	data = radix_tree_lookup(&domain->revmap_tree, hwirq);

From e37af8011a9631996e6cd32dd81a152708eee7d4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 4 Apr 2021 13:06:39 +0100
Subject: [PATCH 1334/3292] powerpc: Move the use of irq_domain_add_nomap()
 behind a config option

Only a handful of old PPC systems are still using the old 'nomap'
variant of the irqdomain library. Move the associated definitions
behind a configuration option, which will allow us to make some
more radical changes.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/platforms/cell/Kconfig     | 1 +
 arch/powerpc/platforms/powermac/Kconfig | 1 +
 arch/powerpc/platforms/ps3/Kconfig      | 1 +
 arch/powerpc/sysdev/xive/Kconfig        | 1 +
 include/linux/irqdomain.h               | 8 ++++++--
 kernel/irq/Kconfig                      | 5 +++++
 kernel/irq/irqdomain.c                  | 2 ++
 7 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index e7c976bcadff..cb70c5f25bc6 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,7 @@ config PPC_IBM_CELL_BLADE
 config AXON_MSI
 	bool
 	depends on PPC_IBM_CELL_BLADE && PCI_MSI
+	select IRQ_DOMAIN_NOMAP
 	default y
 
 menu "Cell Broadband Engine options"
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index c02d8c503b29..b97bf12801eb 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -24,6 +24,7 @@ config PPC_PMAC32_PSURGE
 	bool "Support for powersurge upgrade cards" if EXPERT
 	depends on SMP && PPC32 && PPC_PMAC
 	select PPC_SMP_MUXED_IPI
+	select IRQ_DOMAIN_NOMAP
 	default y
 	help
 	  The powersurge cpu boards can be used in the generation
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index e32406e918d0..4d0535cc7946 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -7,6 +7,7 @@ config PPC_PS3
 	select USB_OHCI_BIG_ENDIAN_MMIO
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select HAVE_PCI
+	select IRQ_DOMAIN_NOMAP
 	help
 	  This option enables support for the Sony PS3 game console
 	  and other platforms using the PS3 hypervisor.  Enabling this
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 785c292d104b..97796c6b63f0 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -3,6 +3,7 @@ config PPC_XIVE
 	bool
 	select PPC_SMP_MUXED_IPI
 	select HARDIRQS_SW_RESEND
+	select IRQ_DOMAIN_NOMAP
 
 config PPC_XIVE_NATIVE
 	bool
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 42b3f7d03a32..723495ec5a2f 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -345,6 +345,8 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
 {
 	return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
 }
+
+#ifdef CONFIG_IRQ_DOMAIN_NOMAP
 static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
 					 unsigned int max_irq,
 					 const struct irq_domain_ops *ops,
@@ -352,6 +354,10 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
 {
 	return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
 }
+
+extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
+#endif
+
 static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
@@ -408,8 +414,6 @@ static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 	return irq_find_mapping(domain, hwirq);
 }
 
-extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
-
 extern const struct irq_domain_ops irq_domain_simple_ops;
 
 /* stock xlate functions */
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index d79ef2493a28..fbc54c2a7f23 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -70,6 +70,11 @@ config IRQ_DOMAIN_HIERARCHY
 	bool
 	select IRQ_DOMAIN
 
+# Support for obsolete non-mapping irq domains
+config IRQ_DOMAIN_NOMAP
+	bool
+	select IRQ_DOMAIN
+
 # Support for hierarchical fasteoi+edge and fasteoi+level handlers
 config IRQ_FASTEOI_HIERARCHY_HANDLERS
 	bool
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8bd012253989..e0143e640683 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -604,6 +604,7 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
 }
 EXPORT_SYMBOL_GPL(irq_domain_associate_many);
 
+#ifdef CONFIG_IRQ_DOMAIN_NOMAP
 /**
  * irq_create_direct_mapping() - Allocate an irq for direct mapping
  * @domain: domain to allocate the irq for or NULL for default domain
@@ -644,6 +645,7 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
 	return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+#endif
 
 /**
  * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space

From 4f86a06e2d6ece5316e4c42fbf946ee22acb30f3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 10 Sep 2018 18:33:46 +0100
Subject: [PATCH 1335/3292] irqdomain: Make normal and nomap irqdomains
 exclusive

Direct mappings are completely exclusive of normal mappings, meaning
that we can refactor the code slightly so that we can get rid of
the revmap_direct_max_irq field and use the revmap_size field
instead, reducing the size of the irqdomain structure.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h |  6 +++---
 kernel/irq/irqdomain.c    | 45 ++++++++++++++++++++++++++++++---------
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 723495ec5a2f..0916cf9c6e20 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -149,8 +149,6 @@ struct irq_domain_chip_generic;
  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
  *
  * Revmap data, used internally by irq_domain
- * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
- *                         support direct mapping
  * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
  * @revmap: Linear table of hwirq->virq reverse mappings
@@ -173,7 +171,6 @@ struct irq_domain {
 
 	/* reverse map data. The linear map gets appended to the irq_domain */
 	irq_hw_number_t hwirq_max;
-	unsigned int revmap_direct_max_irq;
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
 	struct mutex revmap_tree_mutex;
@@ -207,6 +204,9 @@ enum {
 	 */
 	IRQ_DOMAIN_MSI_NOMASK_QUIRK	= (1 << 6),
 
+	/* Irq domain doesn't translate anything */
+	IRQ_DOMAIN_FLAG_NO_MAP		= (1 << 7),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e0143e640683..fa94c86e47d4 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -146,6 +146,10 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 
 	static atomic_t unknown_domains;
 
+	if (WARN_ON((size && direct_max) ||
+		    (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max)))
+		return NULL;
+
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
 	if (!domain)
@@ -213,8 +217,14 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 	domain->ops = ops;
 	domain->host_data = host_data;
 	domain->hwirq_max = hwirq_max;
+
+	if (direct_max) {
+		size = direct_max;
+		domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;
+	}
+
 	domain->revmap_size = size;
-	domain->revmap_direct_max_irq = direct_max;
+
 	irq_domain_check_hierarchy(domain);
 
 	mutex_lock(&irq_domain_mutex);
@@ -482,9 +492,18 @@ struct irq_domain *irq_get_default_host(void)
 	return irq_default_domain;
 }
 
+static bool irq_domain_is_nomap(struct irq_domain *domain)
+{
+	return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) &&
+	       (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP);
+}
+
 static void irq_domain_clear_mapping(struct irq_domain *domain,
 				     irq_hw_number_t hwirq)
 {
+	if (irq_domain_is_nomap(domain))
+		return;
+
 	if (hwirq < domain->revmap_size) {
 		domain->revmap[hwirq] = 0;
 	} else {
@@ -498,6 +517,9 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 				   irq_hw_number_t hwirq,
 				   struct irq_data *irq_data)
 {
+	if (irq_domain_is_nomap(domain))
+		return;
+
 	if (hwirq < domain->revmap_size) {
 		domain->revmap[hwirq] = irq_data->irq;
 	} else {
@@ -629,9 +651,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
 		pr_debug("create_direct virq allocation failed\n");
 		return 0;
 	}
-	if (virq >= domain->revmap_direct_max_irq) {
+	if (virq >= domain->revmap_size) {
 		pr_err("ERROR: no free irqs available below %i maximum\n",
-			domain->revmap_direct_max_irq);
+			domain->revmap_size);
 		irq_free_desc(virq);
 		return 0;
 	}
@@ -879,10 +901,14 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 	if (domain == NULL)
 		return 0;
 
-	if (hwirq < domain->revmap_direct_max_irq) {
-		data = irq_domain_get_irq_data(domain, hwirq);
-		if (data && data->hwirq == hwirq)
-			return hwirq;
+	if (irq_domain_is_nomap(domain)) {
+		if (hwirq < domain->revmap_size) {
+			data = irq_domain_get_irq_data(domain, hwirq);
+			if (data && data->hwirq == hwirq)
+				return hwirq;
+		}
+
+		return 0;
 	}
 
 	/* Check if the hwirq is in the linear revmap. */
@@ -1470,7 +1496,7 @@ static void irq_domain_fix_revmap(struct irq_data *d)
 {
 	void __rcu **slot;
 
-	if (d->hwirq < d->domain->revmap_size)
+	if (irq_domain_is_nomap(d->domain) || d->hwirq < d->domain->revmap_size)
 		return; /* Not using radix tree. */
 
 	/* Fix up the revmap. */
@@ -1830,8 +1856,7 @@ static void
 irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
 {
 	seq_printf(m, "%*sname:   %s\n", ind, "", d->name);
-	seq_printf(m, "%*ssize:   %u\n", ind + 1, "",
-		   d->revmap_size + d->revmap_direct_max_irq);
+	seq_printf(m, "%*ssize:   %u\n", ind + 1, "", d->revmap_size);
 	seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
 	seq_printf(m, "%*sflags:  0x%08x\n", ind +1 , "", d->flags);
 	if (d->ops && d->ops->debug_show)

From 426fa316148bccabf48f9c91a13c387ee911eadc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 11:30:51 +0100
Subject: [PATCH 1336/3292] irqdomain: Use struct_size() helper when allocating
 irqdomain

Instead of open-coding the size computation of struct irqdomain,
use the struct_size() helper instead.

This is going to be handy as we change the type of the revmap
array.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 kernel/irq/irqdomain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index fa94c86e47d4..cdcb1989cd20 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -150,7 +150,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 		    (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max)))
 		return NULL;
 
-	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
+	domain = kzalloc_node(struct_size(domain, revmap, size),
 			      GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
 	if (!domain)
 		return NULL;

From 48b15a7921d60680babe59f64e127816585a585c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 11:46:53 +0100
Subject: [PATCH 1337/3292] irqdomain: Cache irq_data instead of a virq number
 in the revmap

Caching a virq number in the revmap is pretty inefficient, as
it means we will need to convert it back to either an irq_data
or irq_desc to do anything with it.

It is also a bit odd, as the radix tree does cache irq_data
pointers.

Change the revmap type to be an irq_data pointer instead of
an unsigned int, and preserve the current API for now.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h |  4 ++--
 kernel/irq/irqdomain.c    | 16 +++++++++++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 0916cf9c6e20..340cc04611dd 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,7 +151,7 @@ struct irq_domain_chip_generic;
  * Revmap data, used internally by irq_domain
  * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
- * @revmap: Linear table of hwirq->virq reverse mappings
+ * @revmap: Linear table of irq_data pointers
  */
 struct irq_domain {
 	struct list_head link;
@@ -174,7 +174,7 @@ struct irq_domain {
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
 	struct mutex revmap_tree_mutex;
-	unsigned int revmap[];
+	struct irq_data *revmap[];
 };
 
 /* Irq domain flags */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index cdcb1989cd20..7a4e38804487 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -505,7 +505,7 @@ static void irq_domain_clear_mapping(struct irq_domain *domain,
 		return;
 
 	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = 0;
+		domain->revmap[hwirq] = NULL;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_delete(&domain->revmap_tree, hwirq);
@@ -521,7 +521,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 		return;
 
 	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = irq_data->irq;
+		domain->revmap[hwirq] = irq_data;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
@@ -913,7 +913,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 
 	/* Check if the hwirq is in the linear revmap. */
 	if (hwirq < domain->revmap_size)
-		return domain->revmap[hwirq];
+		return domain->revmap[hwirq]->irq;
 
 	rcu_read_lock();
 	data = radix_tree_lookup(&domain->revmap_tree, hwirq);
@@ -1496,8 +1496,14 @@ static void irq_domain_fix_revmap(struct irq_data *d)
 {
 	void __rcu **slot;
 
-	if (irq_domain_is_nomap(d->domain) || d->hwirq < d->domain->revmap_size)
-		return; /* Not using radix tree. */
+	if (irq_domain_is_nomap(d->domain))
+		return;
+
+	if (d->hwirq < d->domain->revmap_size) {
+		/* Not using radix tree */
+		d->domain->revmap[d->hwirq] = d;
+		return;
+	}
 
 	/* Fix up the revmap. */
 	mutex_lock(&d->domain->revmap_tree_mutex);

From d4a45c68dc81f9117ceaff9f058d5fae674181b9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 12:57:27 +0100
Subject: [PATCH 1338/3292] irqdomain: Protect the linear revmap with RCU

It is pretty odd that the radix tree uses RCU while the linear
portion doesn't, leading to potential surprises for the users,
depending on how the irqdomain has been created.

Fix this by moving the update of the linear revmap under
the mutex, and the lookup under the RCU read-side lock.

The mutex name is updated to reflect that it doesn't only
cover the radix-tree anymore.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h |  5 ++--
 kernel/irq/irqdomain.c    | 49 ++++++++++++++++++---------------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 340cc04611dd..2b696c9bcaaf 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,6 +151,7 @@ struct irq_domain_chip_generic;
  * Revmap data, used internally by irq_domain
  * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
+ * @revmap_mutex: Lock for the revmap
  * @revmap: Linear table of irq_data pointers
  */
 struct irq_domain {
@@ -173,8 +174,8 @@ struct irq_domain {
 	irq_hw_number_t hwirq_max;
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
-	struct mutex revmap_tree_mutex;
-	struct irq_data *revmap[];
+	struct mutex revmap_mutex;
+	struct irq_data __rcu *revmap[];
 };
 
 /* Irq domain flags */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 7a4e38804487..8fbadeefc814 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -213,7 +213,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 
 	/* Fill structure */
 	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
-	mutex_init(&domain->revmap_tree_mutex);
+	mutex_init(&domain->revmap_mutex);
 	domain->ops = ops;
 	domain->host_data = host_data;
 	domain->hwirq_max = hwirq_max;
@@ -504,13 +504,12 @@ static void irq_domain_clear_mapping(struct irq_domain *domain,
 	if (irq_domain_is_nomap(domain))
 		return;
 
-	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = NULL;
-	} else {
-		mutex_lock(&domain->revmap_tree_mutex);
+	mutex_lock(&domain->revmap_mutex);
+	if (hwirq < domain->revmap_size)
+		rcu_assign_pointer(domain->revmap[hwirq], NULL);
+	else
 		radix_tree_delete(&domain->revmap_tree, hwirq);
-		mutex_unlock(&domain->revmap_tree_mutex);
-	}
+	mutex_unlock(&domain->revmap_mutex);
 }
 
 static void irq_domain_set_mapping(struct irq_domain *domain,
@@ -520,13 +519,12 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 	if (irq_domain_is_nomap(domain))
 		return;
 
-	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = irq_data;
-	} else {
-		mutex_lock(&domain->revmap_tree_mutex);
+	mutex_lock(&domain->revmap_mutex);
+	if (hwirq < domain->revmap_size)
+		rcu_assign_pointer(domain->revmap[hwirq], irq_data);
+	else
 		radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
-		mutex_unlock(&domain->revmap_tree_mutex);
-	}
+	mutex_unlock(&domain->revmap_mutex);
 }
 
 static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
@@ -911,12 +909,12 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 		return 0;
 	}
 
+	rcu_read_lock();
 	/* Check if the hwirq is in the linear revmap. */
 	if (hwirq < domain->revmap_size)
-		return domain->revmap[hwirq]->irq;
-
-	rcu_read_lock();
-	data = radix_tree_lookup(&domain->revmap_tree, hwirq);
+		data = rcu_dereference(domain->revmap[hwirq]);
+	else
+		data = radix_tree_lookup(&domain->revmap_tree, hwirq);
 	rcu_read_unlock();
 	return data ? data->irq : 0;
 }
@@ -1499,18 +1497,17 @@ static void irq_domain_fix_revmap(struct irq_data *d)
 	if (irq_domain_is_nomap(d->domain))
 		return;
 
+	/* Fix up the revmap. */
+	mutex_lock(&d->domain->revmap_mutex);
 	if (d->hwirq < d->domain->revmap_size) {
 		/* Not using radix tree */
-		d->domain->revmap[d->hwirq] = d;
-		return;
+		rcu_assign_pointer(d->domain->revmap[d->hwirq], d);
+	} else {
+		slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq);
+		if (slot)
+			radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
 	}
-
-	/* Fix up the revmap. */
-	mutex_lock(&d->domain->revmap_tree_mutex);
-	slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq);
-	if (slot)
-		radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
-	mutex_unlock(&d->domain->revmap_tree_mutex);
+	mutex_unlock(&d->domain->revmap_mutex);
 }
 
 /**

From d22558dd0a6c888b1829f9d3a0a627e330e27585 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 14:00:13 +0100
Subject: [PATCH 1339/3292] irqdomain: Introduce irq_resolve_mapping()

Rework irq_find_mapping() to return an both an irq_desc pointer,
optionally the virtual irq number, and rename the result to
__irq_resolve_mapping(). a new helper called irq_resolve_mapping()
is provided for code that doesn't need the virtual irq number.

irq_find_mapping() is also rewritten in terms of __irq_resolve_mapping().

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h | 23 +++++++++++++++++++++--
 kernel/irq/irqdomain.c    | 28 ++++++++++++++++++++--------
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 2b696c9bcaaf..23e4ee523576 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -41,6 +41,7 @@ struct fwnode_handle;
 struct irq_domain;
 struct irq_chip;
 struct irq_data;
+struct irq_desc;
 struct cpumask;
 struct seq_file;
 struct irq_affinity_desc;
@@ -401,13 +402,31 @@ static inline unsigned int irq_create_mapping(struct irq_domain *host,
 	return irq_create_mapping_affinity(host, hwirq, NULL);
 }
 
+extern struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
+					      irq_hw_number_t hwirq,
+					      unsigned int *irq);
+
+static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
+						   irq_hw_number_t hwirq)
+{
+	return __irq_resolve_mapping(domain, hwirq, NULL);
+}
+
 /**
  * irq_find_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
  */
-extern unsigned int irq_find_mapping(struct irq_domain *host,
-				     irq_hw_number_t hwirq);
+static inline unsigned int irq_find_mapping(struct irq_domain *domain,
+					    irq_hw_number_t hwirq)
+{
+	unsigned int irq;
+
+	if (__irq_resolve_mapping(domain, hwirq, &irq))
+		return irq;
+
+	return 0;
+}
 
 static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 					     irq_hw_number_t hwirq)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8fbadeefc814..51c483ce2447 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -884,29 +884,34 @@ void irq_dispose_mapping(unsigned int virq)
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
 /**
- * irq_find_mapping() - Find a linux irq from a hw irq number.
+ * __irq_resolve_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
+ * @irq: optional pointer to return the Linux irq if required
+ *
+ * Returns the interrupt descriptor.
  */
-unsigned int irq_find_mapping(struct irq_domain *domain,
-			      irq_hw_number_t hwirq)
+struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
+				       irq_hw_number_t hwirq,
+				       unsigned int *irq)
 {
+	struct irq_desc *desc = NULL;
 	struct irq_data *data;
 
 	/* Look for default domain if necessary */
 	if (domain == NULL)
 		domain = irq_default_domain;
 	if (domain == NULL)
-		return 0;
+		return desc;
 
 	if (irq_domain_is_nomap(domain)) {
 		if (hwirq < domain->revmap_size) {
 			data = irq_domain_get_irq_data(domain, hwirq);
 			if (data && data->hwirq == hwirq)
-				return hwirq;
+				desc = irq_data_to_desc(data);
 		}
 
-		return 0;
+		return desc;
 	}
 
 	rcu_read_lock();
@@ -915,10 +920,17 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 		data = rcu_dereference(domain->revmap[hwirq]);
 	else
 		data = radix_tree_lookup(&domain->revmap_tree, hwirq);
+
+	if (likely(data)) {
+		desc = irq_data_to_desc(data);
+		if (irq)
+			*irq = data->irq;
+	}
+
 	rcu_read_unlock();
-	return data ? data->irq : 0;
+	return desc;
 }
-EXPORT_SYMBOL_GPL(irq_find_mapping);
+EXPORT_SYMBOL_GPL(__irq_resolve_mapping);
 
 /**
  * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings

From a3016b26ee6ee13d5647d701404a7912d4eaea9e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 14:24:37 +0100
Subject: [PATCH 1340/3292] genirq: Use irq_resolve_mapping() to implement
 __handle_domain_irq() and co

In order to start reaping the benefits of irq_resolve_mapping(),
start using it in __handle_domain_irq() and handle_domain_nmi().

This involves splitting generic_handle_irq() to be able to directly
provide the irq_desc.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdesc.h |  1 +
 kernel/irq/irqdesc.c    | 60 ++++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index df4651250785..cdd1cf8207f6 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -158,6 +158,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
 	desc->handle_irq(desc);
 }
 
+int handle_irq_desc(struct irq_desc *desc);
 int generic_handle_irq(unsigned int irq);
 
 #ifdef CONFIG_HANDLE_DOMAIN_IRQ
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4a617d7312a4..684c5b7b7832 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -632,14 +632,8 @@ void irq_init_desc(unsigned int irq)
 
 #endif /* !CONFIG_SPARSE_IRQ */
 
-/**
- * generic_handle_irq - Invoke the handler for a particular irq
- * @irq:	The irq number to handle
- *
- */
-int generic_handle_irq(unsigned int irq)
+int handle_irq_desc(struct irq_desc *desc)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_data *data;
 
 	if (!desc)
@@ -652,6 +646,17 @@ int generic_handle_irq(unsigned int irq)
 	generic_handle_irq_desc(desc);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(handle_irq_desc);
+
+/**
+ * generic_handle_irq - Invoke the handler for a particular irq
+ * @irq:	The irq number to handle
+ *
+ */
+int generic_handle_irq(unsigned int irq)
+{
+	return handle_irq_desc(irq_to_desc(irq));
+}
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
 #ifdef CONFIG_HANDLE_DOMAIN_IRQ
@@ -668,27 +673,32 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 			bool lookup, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	unsigned int irq = hwirq;
+	struct irq_desc *desc;
 	int ret = 0;
 
 	irq_enter();
 
-#ifdef CONFIG_IRQ_DOMAIN
-	if (lookup)
-		irq = irq_find_mapping(domain, hwirq);
-#endif
-
-	/*
-	 * Some hardware gives randomly wrong interrupts.  Rather
-	 * than crashing, do something sensible.
-	 */
-	if (unlikely(!irq || irq >= nr_irqs)) {
-		ack_bad_irq(irq);
-		ret = -EINVAL;
+	if (likely(IS_ENABLED(CONFIG_IRQ_DOMAIN) && lookup)) {
+		/* The irqdomain code provides boundary checks */
+		desc = irq_resolve_mapping(domain, hwirq);
 	} else {
-		generic_handle_irq(irq);
+		/*
+		 * Some hardware gives randomly wrong interrupts.  Rather
+		 * than crashing, do something sensible.
+		 */
+		if (unlikely(!hwirq || hwirq >= nr_irqs)) {
+			ack_bad_irq(hwirq);
+			desc = NULL;
+		} else {
+			desc = irq_to_desc(hwirq);
+		}
 	}
 
+	if (likely(desc))
+		handle_irq_desc(desc);
+	else
+		ret = -EINVAL;
+
 	irq_exit();
 	set_irq_regs(old_regs);
 	return ret;
@@ -709,7 +719,7 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
 		      struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	unsigned int irq;
+	struct irq_desc *desc;
 	int ret = 0;
 
 	/*
@@ -717,14 +727,14 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
 	 */
 	WARN_ON(!in_nmi());
 
-	irq = irq_find_mapping(domain, hwirq);
+	desc = irq_resolve_mapping(domain, hwirq);
 
 	/*
 	 * ack_bad_irq is not NMI-safe, just report
 	 * an invalid interrupt.
 	 */
-	if (likely(irq))
-		generic_handle_irq(irq);
+	if (likely(desc))
+		handle_irq_desc(desc);
 	else
 		ret = -EINVAL;
 

From 9626d18a20e166a864e8d1f6ed6bbb84a0fa4989 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 14:33:24 +0100
Subject: [PATCH 1341/3292] irqdesc: Fix __handle_domain_irq() comment

It appears that the comment about a NULL domain meaning anything
has always been wrong. Fix it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdesc.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index cdd1cf8207f6..2971eb7e65f1 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -165,8 +165,7 @@ int generic_handle_irq(unsigned int irq);
 /*
  * Convert a HW interrupt number to a logical one using a IRQ domain,
  * and handle the result interrupt number. Return -EINVAL if
- * conversion failed. Providing a NULL domain indicates that the
- * conversion has already been done.
+ * conversion failed.
  */
 int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 			bool lookup, struct pt_regs *regs);

From 9e027dd979beca41cd85f4e971d184fe0ffcff3c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 May 2021 13:46:05 +0100
Subject: [PATCH 1342/3292] irqchip/nvic: Convert from handle_IRQ() to
 handle_domain_irq()

Given that the nvic driver is fully irqdomain aware, there is no
reason for it to use the arch-specific handle_IRQ(), and it can
be moved over to handle_domain_irq().

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-nvic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index f747e2209ea9..b31c4cff4d3a 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -40,9 +40,7 @@ static struct irq_domain *nvic_irq_domain;
 asmlinkage void __exception_irq_entry
 nvic_handle_irq(irq_hw_number_t hwirq, struct pt_regs *regs)
 {
-	unsigned int irq = irq_linear_revmap(nvic_irq_domain, hwirq);
-
-	handle_IRQ(irq, regs);
+	handle_domain_irq(nvic_irq_domain, hwirq, regs);
 }
 
 static int nvic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,

From 8240ef50d4864325b346e40bb9d30cda9f22102d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 May 2021 13:45:52 +0100
Subject: [PATCH 1343/3292] genirq: Add generic_handle_domain_irq() helper

Provide generic_handle_domain_irq() as a pendent to handle_domain_irq()
for non-root interrupt controllers

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdesc.h |  2 ++
 kernel/irq/irqdesc.c    | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 2971eb7e65f1..0f226c6b0c70 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -170,6 +170,8 @@ int generic_handle_irq(unsigned int irq);
 int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 			bool lookup, struct pt_regs *regs);
 
+int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
+
 static inline int handle_domain_irq(struct irq_domain *domain,
 				    unsigned int hwirq, struct pt_regs *regs)
 {
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 684c5b7b7832..6179d5bde88e 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -661,7 +661,24 @@ EXPORT_SYMBOL_GPL(generic_handle_irq);
 
 #ifdef CONFIG_HANDLE_DOMAIN_IRQ
 /**
- * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain
+ * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
+ *                             to a domain, usually for a non-root interrupt
+ *                             controller
+ * @domain:	The domain where to perform the lookup
+ * @hwirq:	The HW irq number to convert to a logical one
+ *
+ * Returns:	0 on success, or -EINVAL if conversion has failed
+ *
+ */
+int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
+{
+	return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
+}
+EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
+
+/**
+ * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain,
+ *                       usually for a root interrupt controller
  * @domain:	The domain where to perform the lookup
  * @hwirq:	The HW irq number to convert to a logical one
  * @lookup:	Whether to perform the domain lookup or not

From e1c054918c6c7a30a35d2c183ed86600a071cdab Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 May 2021 16:18:15 +0100
Subject: [PATCH 1344/3292] genirq: Move non-irqdomain handle_domain_irq()
 handling into ARM's handle_IRQ()

Despite the name, handle_domain_irq() deals with non-irqdomain
handling for the sake of a handful of legacy ARM platforms.

Move such handling into ARM's handle_IRQ(), allowing for better
code generation for everyone else. This allows us get rid of
some complexity, and to rearrange the guards on the various helpers
in a more logical way.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/kernel/irq.c   | 22 +++++++++++++++++++++-
 include/linux/irqdesc.h | 14 ++++----------
 kernel/irq/irqdesc.c    | 30 ++++++++----------------------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 698b6f636156..20ab1e607522 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -63,7 +63,27 @@ int arch_show_interrupts(struct seq_file *p, int prec)
  */
 void handle_IRQ(unsigned int irq, struct pt_regs *regs)
 {
-	__handle_domain_irq(NULL, irq, false, regs);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct irq_desc *desc;
+
+	irq_enter();
+
+	/*
+	 * Some hardware gives randomly wrong interrupts.  Rather
+	 * than crashing, do something sensible.
+	 */
+	if (unlikely(!irq || irq >= nr_irqs))
+		desc = NULL;
+	else
+		desc = irq_to_desc(irq);
+
+	if (likely(desc))
+		handle_irq_desc(desc);
+	else
+		ack_bad_irq(irq);
+
+	irq_exit();
+	set_irq_regs(old_regs);
 }
 
 /*
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 0f226c6b0c70..59aea39785bf 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -161,24 +161,18 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
 int handle_irq_desc(struct irq_desc *desc);
 int generic_handle_irq(unsigned int irq);
 
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+#ifdef CONFIG_IRQ_DOMAIN
 /*
  * Convert a HW interrupt number to a logical one using a IRQ domain,
  * and handle the result interrupt number. Return -EINVAL if
  * conversion failed.
  */
-int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
-			bool lookup, struct pt_regs *regs);
-
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
 
-static inline int handle_domain_irq(struct irq_domain *domain,
-				    unsigned int hwirq, struct pt_regs *regs)
-{
-	return __handle_domain_irq(domain, hwirq, true, regs);
-}
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+int handle_domain_irq(struct irq_domain *domain,
+		      unsigned int hwirq, struct pt_regs *regs);
 
-#ifdef CONFIG_IRQ_DOMAIN
 int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
 		      struct pt_regs *regs);
 #endif
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 6179d5bde88e..f4dd5186858a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -659,7 +659,7 @@ int generic_handle_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+#ifdef CONFIG_IRQ_DOMAIN
 /**
  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
  *                             to a domain, usually for a non-root interrupt
@@ -676,9 +676,10 @@ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
 
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
 /**
- * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain,
- *                       usually for a root interrupt controller
+ * handle_domain_irq - Invoke the handler for a HW irq belonging to a domain,
+ *                     usually for a root interrupt controller
  * @domain:	The domain where to perform the lookup
  * @hwirq:	The HW irq number to convert to a logical one
  * @lookup:	Whether to perform the domain lookup or not
@@ -686,8 +687,8 @@ EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
  *
  * Returns:	0 on success, or -EINVAL if conversion has failed
  */
-int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
-			bool lookup, struct pt_regs *regs)
+int handle_domain_irq(struct irq_domain *domain,
+		      unsigned int hwirq, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	struct irq_desc *desc;
@@ -695,22 +696,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 
 	irq_enter();
 
-	if (likely(IS_ENABLED(CONFIG_IRQ_DOMAIN) && lookup)) {
-		/* The irqdomain code provides boundary checks */
-		desc = irq_resolve_mapping(domain, hwirq);
-	} else {
-		/*
-		 * Some hardware gives randomly wrong interrupts.  Rather
-		 * than crashing, do something sensible.
-		 */
-		if (unlikely(!hwirq || hwirq >= nr_irqs)) {
-			ack_bad_irq(hwirq);
-			desc = NULL;
-		} else {
-			desc = irq_to_desc(hwirq);
-		}
-	}
-
+	/* The irqdomain code provides boundary checks */
+	desc = irq_resolve_mapping(domain, hwirq);
 	if (likely(desc))
 		handle_irq_desc(desc);
 	else
@@ -721,7 +708,6 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 	return ret;
 }
 
-#ifdef CONFIG_IRQ_DOMAIN
 /**
  * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain
  * @domain:	The domain where to perform the lookup

From 046a6ee2343bb26d85a9973a39ccdb9764236fa4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 17:42:18 +0100
Subject: [PATCH 1345/3292] irqchip: Bulk conversion to
 generic_handle_domain_irq()

Wherever possible, replace constructs that match either
generic_handle_irq(irq_find_mapping()) or
generic_handle_irq(irq_linear_revmap()) to a single call to
generic_handle_domain_irq().

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/exynos-combiner.c      | 10 ++++------
 drivers/irqchip/irq-al-fic.c           |  7 ++-----
 drivers/irqchip/irq-armada-370-xp.c    | 19 ++++++++-----------
 drivers/irqchip/irq-aspeed-i2c-ic.c    |  8 +++-----
 drivers/irqchip/irq-aspeed-scu-ic.c    |  6 ++----
 drivers/irqchip/irq-ath79-misc.c       |  2 +-
 drivers/irqchip/irq-bcm2835.c          |  2 +-
 drivers/irqchip/irq-bcm2836.c          |  2 +-
 drivers/irqchip/irq-bcm7038-l1.c       |  6 ++----
 drivers/irqchip/irq-bcm7120-l2.c       |  6 ++----
 drivers/irqchip/irq-brcmstb-l2.c       |  2 +-
 drivers/irqchip/irq-dw-apb-ictl.c      |  3 +--
 drivers/irqchip/irq-gic.c              | 13 +++++--------
 drivers/irqchip/irq-goldfish-pic.c     |  5 ++---
 drivers/irqchip/irq-i8259.c            |  4 +---
 drivers/irqchip/irq-idt3243x.c         |  6 ++----
 drivers/irqchip/irq-imgpdc.c           | 11 ++++-------
 drivers/irqchip/irq-imx-intmux.c       |  9 +++------
 drivers/irqchip/irq-imx-irqsteer.c     |  9 +++------
 drivers/irqchip/irq-ingenic-tcu.c      |  2 +-
 drivers/irqchip/irq-ingenic.c          |  3 +--
 drivers/irqchip/irq-keystone.c         | 14 ++++++--------
 drivers/irqchip/irq-loongson-htpic.c   |  2 +-
 drivers/irqchip/irq-loongson-htvec.c   |  4 ++--
 drivers/irqchip/irq-loongson-liointc.c |  2 +-
 drivers/irqchip/irq-lpc32xx.c          |  2 +-
 drivers/irqchip/irq-ls-scfg-msi.c      |  6 ++----
 drivers/irqchip/irq-ls1x.c             |  2 +-
 drivers/irqchip/irq-mips-gic.c         | 20 ++++++++++----------
 drivers/irqchip/irq-mscc-ocelot.c      |  2 +-
 drivers/irqchip/irq-mvebu-pic.c        |  7 ++-----
 drivers/irqchip/irq-mvebu-sei.c        | 13 ++++---------
 drivers/irqchip/irq-orion.c            |  2 +-
 drivers/irqchip/irq-partition-percpu.c |  9 +++------
 drivers/irqchip/irq-pruss-intc.c       |  9 +++------
 drivers/irqchip/irq-realtek-rtl.c      |  2 +-
 drivers/irqchip/irq-renesas-irqc.c     |  2 +-
 drivers/irqchip/irq-sifive-plic.c      |  8 +++-----
 drivers/irqchip/irq-stm32-exti.c       | 10 ++++------
 drivers/irqchip/irq-sunxi-nmi.c        |  3 +--
 drivers/irqchip/irq-tb10x.c            |  2 +-
 drivers/irqchip/irq-ti-sci-inta.c      |  9 +++------
 drivers/irqchip/irq-ts4800.c           |  3 +--
 drivers/irqchip/irq-versatile-fpga.c   |  2 +-
 drivers/irqchip/irq-vic.c              |  2 +-
 drivers/irqchip/irq-xilinx-intc.c      | 23 +++++------------------
 drivers/irqchip/qcom-irq-combiner.c    |  6 +-----
 47 files changed, 111 insertions(+), 190 deletions(-)

diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c
index 0b85d9a3fbff..14106126cbf3 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -66,8 +66,9 @@ static void combiner_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct combiner_chip_data *chip_data = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int cascade_irq, combiner_irq;
+	unsigned int combiner_irq;
 	unsigned long status;
+	int ret;
 
 	chained_irq_enter(chip, desc);
 
@@ -80,12 +81,9 @@ static void combiner_handle_cascade_irq(struct irq_desc *desc)
 		goto out;
 
 	combiner_irq = chip_data->hwirq_offset + __ffs(status);
-	cascade_irq = irq_find_mapping(combiner_irq_domain, combiner_irq);
-
-	if (unlikely(!cascade_irq))
+	ret = generic_handle_domain_irq(combiner_irq_domain, combiner_irq);
+	if (unlikely(ret))
 		handle_bad_irq(desc);
-	else
-		generic_handle_irq(cascade_irq);
 
  out:
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c
index 0b0a73739756..886de028a901 100644
--- a/drivers/irqchip/irq-al-fic.c
+++ b/drivers/irqchip/irq-al-fic.c
@@ -111,7 +111,6 @@ static void al_fic_irq_handler(struct irq_desc *desc)
 	struct irq_chip *irqchip = irq_desc_get_chip(desc);
 	struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
 	unsigned long pending;
-	unsigned int irq;
 	u32 hwirq;
 
 	chained_irq_enter(irqchip, desc);
@@ -119,10 +118,8 @@ static void al_fic_irq_handler(struct irq_desc *desc)
 	pending = readl_relaxed(fic->base + AL_FIC_CAUSE);
 	pending &= ~gc->mask_cache;
 
-	for_each_set_bit(hwirq, &pending, NR_FIC_IRQS) {
-		irq = irq_find_mapping(domain, hwirq);
-		generic_handle_irq(irq);
-	}
+	for_each_set_bit(hwirq, &pending, NR_FIC_IRQS)
+		generic_handle_domain_irq(domain, hwirq);
 
 	chained_irq_exit(irqchip, desc);
 }
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 32938dfc0e46..7557ab551295 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -582,20 +582,19 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained)
 
 	for (msinr = PCI_MSI_DOORBELL_START;
 	     msinr < PCI_MSI_DOORBELL_END; msinr++) {
-		int irq;
+		unsigned int irq;
 
 		if (!(msimask & BIT(msinr)))
 			continue;
 
-		if (is_chained) {
-			irq = irq_find_mapping(armada_370_xp_msi_inner_domain,
-					       msinr - PCI_MSI_DOORBELL_START);
-			generic_handle_irq(irq);
-		} else {
-			irq = msinr - PCI_MSI_DOORBELL_START;
+		irq = msinr - PCI_MSI_DOORBELL_START;
+
+		if (is_chained)
+			generic_handle_domain_irq(armada_370_xp_msi_inner_domain,
+						  irq);
+		else
 			handle_domain_irq(armada_370_xp_msi_inner_domain,
 					  irq, regs);
-		}
 	}
 }
 #else
@@ -606,7 +605,6 @@ static void armada_370_xp_mpic_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long irqmap, irqn, irqsrc, cpuid;
-	unsigned int cascade_irq;
 
 	chained_irq_enter(chip, desc);
 
@@ -628,8 +626,7 @@ static void armada_370_xp_mpic_handle_cascade_irq(struct irq_desc *desc)
 			continue;
 		}
 
-		cascade_irq = irq_find_mapping(armada_370_xp_mpic_domain, irqn);
-		generic_handle_irq(cascade_irq);
+		generic_handle_domain_irq(armada_370_xp_mpic_domain, irqn);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-aspeed-i2c-ic.c b/drivers/irqchip/irq-aspeed-i2c-ic.c
index 8d591c179f81..a47db16ff960 100644
--- a/drivers/irqchip/irq-aspeed-i2c-ic.c
+++ b/drivers/irqchip/irq-aspeed-i2c-ic.c
@@ -34,14 +34,12 @@ static void aspeed_i2c_ic_irq_handler(struct irq_desc *desc)
 	struct aspeed_i2c_ic *i2c_ic = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long bit, status;
-	unsigned int bus_irq;
 
 	chained_irq_enter(chip, desc);
 	status = readl(i2c_ic->base);
-	for_each_set_bit(bit, &status, ASPEED_I2C_IC_NUM_BUS) {
-		bus_irq = irq_find_mapping(i2c_ic->irq_domain, bit);
-		generic_handle_irq(bus_irq);
-	}
+	for_each_set_bit(bit, &status, ASPEED_I2C_IC_NUM_BUS)
+		generic_handle_domain_irq(i2c_ic->irq_domain, bit);
+
 	chained_irq_exit(chip, desc);
 }
 
diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c
index c90a3346b985..f3c6855a4cef 100644
--- a/drivers/irqchip/irq-aspeed-scu-ic.c
+++ b/drivers/irqchip/irq-aspeed-scu-ic.c
@@ -44,7 +44,6 @@ struct aspeed_scu_ic {
 
 static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
 {
-	unsigned int irq;
 	unsigned int sts;
 	unsigned long bit;
 	unsigned long enabled;
@@ -74,9 +73,8 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
 	max = scu_ic->num_irqs + bit;
 
 	for_each_set_bit_from(bit, &status, max) {
-		irq = irq_find_mapping(scu_ic->irq_domain,
-				       bit - scu_ic->irq_shift);
-		generic_handle_irq(irq);
+		generic_handle_domain_irq(scu_ic->irq_domain,
+					  bit - scu_ic->irq_shift);
 
 		regmap_update_bits(scu_ic->scu, scu_ic->reg, mask,
 				   BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT));
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
index 3d641bb6f3f1..92f001a5ff8d 100644
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -50,7 +50,7 @@ static void ath79_misc_irq_handler(struct irq_desc *desc)
 	while (pending) {
 		int bit = __ffs(pending);
 
-		generic_handle_irq(irq_linear_revmap(domain, bit));
+		generic_handle_domain_irq(domain, bit);
 		pending &= ~BIT(bit);
 	}
 
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index a1e004af23e7..adc1556ed332 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -254,7 +254,7 @@ static void bcm2836_chained_handle_irq(struct irq_desc *desc)
 	u32 hwirq;
 
 	while ((hwirq = get_next_armctrl_hwirq()) != ~0)
-		generic_handle_irq(irq_linear_revmap(intc.domain, hwirq));
+		generic_handle_domain_irq(intc.domain, hwirq);
 }
 
 IRQCHIP_DECLARE(bcm2835_armctrl_ic, "brcm,bcm2835-armctrl-ic",
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index 25c9a9c06e41..501facdb4570 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -161,7 +161,7 @@ static void bcm2836_arm_irqchip_handle_ipi(struct irq_desc *desc)
 	mbox_val = readl_relaxed(intc.base + LOCAL_MAILBOX0_CLR0 + 16 * cpu);
 	if (mbox_val) {
 		int hwirq = ffs(mbox_val) - 1;
-		generic_handle_irq(irq_find_mapping(ipi_domain, hwirq));
+		generic_handle_domain_irq(ipi_domain, hwirq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index 9dc9bf8cdcc4..a035c385ca7a 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -145,10 +145,8 @@ static void bcm7038_l1_irq_handle(struct irq_desc *desc)
 			  ~cpu->mask_cache[idx];
 		raw_spin_unlock_irqrestore(&intc->lock, flags);
 
-		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
-			generic_handle_irq(irq_find_mapping(intc->domain,
-							    base + hwirq));
-		}
+		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD)
+			generic_handle_domain_irq(intc->domain, base + hwirq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index ad59656ccc28..f23d7651ea84 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -74,10 +74,8 @@ static void bcm7120_l2_intc_irq_handle(struct irq_desc *desc)
 					    data->irq_map_mask[idx];
 		irq_gc_unlock(gc);
 
-		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
-			generic_handle_irq(irq_find_mapping(b->domain,
-					   base + hwirq));
-		}
+		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD)
+			generic_handle_domain_irq(b->domain, base + hwirq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index cdd6a42d4efa..8e0911561f2d 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -110,7 +110,7 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc)
 	do {
 		irq = ffs(status) - 1;
 		status &= ~(1 << irq);
-		generic_handle_irq(irq_linear_revmap(b->domain, irq));
+		generic_handle_domain_irq(b->domain, irq);
 	} while (status);
 out:
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c
index 54b09d6c407c..a67266e44491 100644
--- a/drivers/irqchip/irq-dw-apb-ictl.c
+++ b/drivers/irqchip/irq-dw-apb-ictl.c
@@ -62,9 +62,8 @@ static void dw_apb_ictl_handle_irq_cascaded(struct irq_desc *desc)
 
 		while (stat) {
 			u32 hwirq = ffs(stat) - 1;
-			u32 virq = irq_find_mapping(d, gc->irq_base + hwirq);
+			generic_handle_domain_irq(d, gc->irq_base + hwirq);
 
-			generic_handle_irq(virq);
 			stat &= ~BIT(hwirq);
 		}
 	}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b1d9c22caf2e..46c9c5fafdbc 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -375,8 +375,9 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct gic_chip_data *chip_data = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int cascade_irq, gic_irq;
+	unsigned int gic_irq;
 	unsigned long status;
+	int ret;
 
 	chained_irq_enter(chip, desc);
 
@@ -386,14 +387,10 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
 	if (gic_irq == GICC_INT_SPURIOUS)
 		goto out;
 
-	cascade_irq = irq_find_mapping(chip_data->domain, gic_irq);
-	if (unlikely(gic_irq < 32 || gic_irq > 1020)) {
+	isb();
+	ret = generic_handle_domain_irq(chip_data->domain, gic_irq);
+	if (unlikely(ret))
 		handle_bad_irq(desc);
-	} else {
-		isb();
-		generic_handle_irq(cascade_irq);
-	}
-
  out:
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/irq-goldfish-pic.c b/drivers/irqchip/irq-goldfish-pic.c
index 4f021530e7f3..513f6edbbe95 100644
--- a/drivers/irqchip/irq-goldfish-pic.c
+++ b/drivers/irqchip/irq-goldfish-pic.c
@@ -34,15 +34,14 @@ static void goldfish_pic_cascade(struct irq_desc *desc)
 {
 	struct goldfish_pic_data *gfpic = irq_desc_get_handler_data(desc);
 	struct irq_chip *host_chip = irq_desc_get_chip(desc);
-	u32 pending, hwirq, virq;
+	u32 pending, hwirq;
 
 	chained_irq_enter(host_chip, desc);
 
 	pending = readl(gfpic->base + GFPIC_REG_IRQ_PENDING);
 	while (pending) {
 		hwirq = __fls(pending);
-		virq = irq_linear_revmap(gfpic->irq_domain, hwirq);
-		generic_handle_irq(virq);
+		generic_handle_domain_irq(gfpic->irq_domain, hwirq);
 		pending &= ~(1 << hwirq);
 	}
 
diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c
index b6f6aa7b2862..b70ce0d3c092 100644
--- a/drivers/irqchip/irq-i8259.c
+++ b/drivers/irqchip/irq-i8259.c
@@ -333,13 +333,11 @@ static void i8259_irq_dispatch(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	int hwirq = i8259_poll();
-	unsigned int irq;
 
 	if (hwirq < 0)
 		return;
 
-	irq = irq_linear_revmap(domain, hwirq);
-	generic_handle_irq(irq);
+	generic_handle_domain_irq(domain, hwirq);
 }
 
 int __init i8259_of_init(struct device_node *node, struct device_node *parent)
diff --git a/drivers/irqchip/irq-idt3243x.c b/drivers/irqchip/irq-idt3243x.c
index f0996820077a..0732a0e9af62 100644
--- a/drivers/irqchip/irq-idt3243x.c
+++ b/drivers/irqchip/irq-idt3243x.c
@@ -28,7 +28,7 @@ static void idt_irq_dispatch(struct irq_desc *desc)
 {
 	struct idt_pic_data *idtpic = irq_desc_get_handler_data(desc);
 	struct irq_chip *host_chip = irq_desc_get_chip(desc);
-	u32 pending, hwirq, virq;
+	u32 pending, hwirq;
 
 	chained_irq_enter(host_chip, desc);
 
@@ -36,9 +36,7 @@ static void idt_irq_dispatch(struct irq_desc *desc)
 	pending &= ~idtpic->gc->mask_cache;
 	while (pending) {
 		hwirq = __fls(pending);
-		virq = irq_linear_revmap(idtpic->irq_domain, hwirq);
-		if (virq)
-			generic_handle_irq(virq);
+		generic_handle_domain_irq(idtpic->irq_domain, hwirq);
 		pending &= ~(1 << hwirq);
 	}
 
diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
index 698d07f48fed..646dfbf4890b 100644
--- a/drivers/irqchip/irq-imgpdc.c
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -223,7 +223,7 @@ static void pdc_intc_perip_isr(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct pdc_intc_priv *priv;
-	unsigned int i, irq_no;
+	unsigned int i;
 
 	priv = (struct pdc_intc_priv *)irq_desc_get_handler_data(desc);
 
@@ -237,14 +237,13 @@ static void pdc_intc_perip_isr(struct irq_desc *desc)
 found:
 
 	/* pass on the interrupt */
-	irq_no = irq_linear_revmap(priv->domain, i);
-	generic_handle_irq(irq_no);
+	generic_handle_domain_irq(priv->domain, i);
 }
 
 static void pdc_intc_syswake_isr(struct irq_desc *desc)
 {
 	struct pdc_intc_priv *priv;
-	unsigned int syswake, irq_no;
+	unsigned int syswake;
 	unsigned int status;
 
 	priv = (struct pdc_intc_priv *)irq_desc_get_handler_data(desc);
@@ -258,9 +257,7 @@ static void pdc_intc_syswake_isr(struct irq_desc *desc)
 		if (!(status & 1))
 			continue;
 
-		irq_no = irq_linear_revmap(priv->domain,
-					   syswake_to_hwirq(syswake));
-		generic_handle_irq(irq_no);
+		generic_handle_domain_irq(priv->domain, syswake_to_hwirq(syswake));
 	}
 }
 
diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index 7709f9712cb3..e86ff743e98c 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -182,18 +182,15 @@ static void imx_intmux_irq_handler(struct irq_desc *desc)
 	struct intmux_data *data = container_of(irqchip_data, struct intmux_data,
 						irqchip_data[idx]);
 	unsigned long irqstat;
-	int pos, virq;
+	int pos;
 
 	chained_irq_enter(irq_desc_get_chip(desc), desc);
 
 	/* read the interrupt source pending status of this channel */
 	irqstat = readl_relaxed(data->regs + CHANIPR(idx));
 
-	for_each_set_bit(pos, &irqstat, 32) {
-		virq = irq_find_mapping(irqchip_data->domain, pos);
-		if (virq)
-			generic_handle_irq(virq);
-	}
+	for_each_set_bit(pos, &irqstat, 32)
+		generic_handle_domain_irq(irqchip_data->domain, pos);
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
 }
diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
index 1edf7692a790..8d91a02593fc 100644
--- a/drivers/irqchip/irq-imx-irqsteer.c
+++ b/drivers/irqchip/irq-imx-irqsteer.c
@@ -122,7 +122,7 @@ static void imx_irqsteer_irq_handler(struct irq_desc *desc)
 	for (i = 0; i < 2; i++, hwirq += 32) {
 		int idx = imx_irqsteer_get_reg_index(data, hwirq);
 		unsigned long irqmap;
-		int pos, virq;
+		int pos;
 
 		if (hwirq >= data->reg_num * 32)
 			break;
@@ -130,11 +130,8 @@ static void imx_irqsteer_irq_handler(struct irq_desc *desc)
 		irqmap = readl_relaxed(data->regs +
 				       CHANSTATUS(idx, data->reg_num));
 
-		for_each_set_bit(pos, &irqmap, 32) {
-			virq = irq_find_mapping(data->domain, pos + hwirq);
-			if (virq)
-				generic_handle_irq(virq);
-		}
+		for_each_set_bit(pos, &irqmap, 32)
+			generic_handle_domain_irq(data->domain, pos + hwirq);
 	}
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
diff --git a/drivers/irqchip/irq-ingenic-tcu.c b/drivers/irqchip/irq-ingenic-tcu.c
index b938d1d04d96..34a7d261b710 100644
--- a/drivers/irqchip/irq-ingenic-tcu.c
+++ b/drivers/irqchip/irq-ingenic-tcu.c
@@ -38,7 +38,7 @@ static void ingenic_tcu_intc_cascade(struct irq_desc *desc)
 	irq_reg &= ~irq_mask;
 
 	for_each_set_bit(i, (unsigned long *)&irq_reg, 32)
-		generic_handle_irq(irq_linear_revmap(domain, i));
+		generic_handle_domain_irq(domain, i);
 
 	chained_irq_exit(irq_chip, desc);
 }
diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c
index ea36bb00be80..cee839ca627e 100644
--- a/drivers/irqchip/irq-ingenic.c
+++ b/drivers/irqchip/irq-ingenic.c
@@ -49,8 +49,7 @@ static irqreturn_t intc_cascade(int irq, void *data)
 		while (pending) {
 			int bit = __fls(pending);
 
-			irq = irq_linear_revmap(domain, bit + (i * 32));
-			generic_handle_irq(irq);
+			generic_handle_domain_irq(domain, bit + (i * 32));
 			pending &= ~BIT(bit);
 		}
 	}
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index 8118ebe80b09..d47c8041e5bc 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -89,7 +89,7 @@ static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq)
 	struct keystone_irq_device *kirq = keystone_irq;
 	unsigned long wa_lock_flags;
 	unsigned long pending;
-	int src, virq;
+	int src, err;
 
 	dev_dbg(kirq->dev, "start irq %d\n", irq);
 
@@ -104,16 +104,14 @@ static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq)
 
 	for (src = 0; src < KEYSTONE_N_IRQ; src++) {
 		if (BIT(src) & pending) {
-			virq = irq_find_mapping(kirq->irqd, src);
-			dev_dbg(kirq->dev, "dispatch bit %d, virq %d\n",
-				src, virq);
-			if (!virq)
-				dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n",
-					 src, virq);
 			raw_spin_lock_irqsave(&kirq->wa_lock, wa_lock_flags);
-			generic_handle_irq(virq);
+			err = generic_handle_domain_irq(kirq->irqd, src);
 			raw_spin_unlock_irqrestore(&kirq->wa_lock,
 						   wa_lock_flags);
+
+			if (err)
+				dev_warn_ratelimited(kirq->dev, "spurious irq detected hwirq %d\n",
+						     src);
 		}
 	}
 
diff --git a/drivers/irqchip/irq-loongson-htpic.c b/drivers/irqchip/irq-loongson-htpic.c
index 1b801c4fb026..f4abdf156de7 100644
--- a/drivers/irqchip/irq-loongson-htpic.c
+++ b/drivers/irqchip/irq-loongson-htpic.c
@@ -48,7 +48,7 @@ static void htpic_irq_dispatch(struct irq_desc *desc)
 			break;
 		}
 
-		generic_handle_irq(irq_linear_revmap(priv->domain, bit));
+		generic_handle_domain_irq(priv->domain, bit);
 		pending &= ~BIT(bit);
 	}
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c
index 6392aafb9a63..60a335d7e64e 100644
--- a/drivers/irqchip/irq-loongson-htvec.c
+++ b/drivers/irqchip/irq-loongson-htvec.c
@@ -47,8 +47,8 @@ static void htvec_irq_dispatch(struct irq_desc *desc)
 		while (pending) {
 			int bit = __ffs(pending);
 
-			generic_handle_irq(irq_linear_revmap(priv->htvec_domain, bit +
-							     VEC_COUNT_PER_REG * i));
+			generic_handle_domain_irq(priv->htvec_domain,
+						  bit + VEC_COUNT_PER_REG * i);
 			pending &= ~BIT(bit);
 			handled = true;
 		}
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index 8ccb30421806..649c58391618 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -73,7 +73,7 @@ static void liointc_chained_handle_irq(struct irq_desc *desc)
 	while (pending) {
 		int bit = __ffs(pending);
 
-		generic_handle_irq(irq_find_mapping(gc->domain, bit));
+		generic_handle_domain_irq(gc->domain, bit);
 		pending &= ~BIT(bit);
 	}
 
diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c
index 7d9b388afe64..5e6f6e25f2ae 100644
--- a/drivers/irqchip/irq-lpc32xx.c
+++ b/drivers/irqchip/irq-lpc32xx.c
@@ -141,7 +141,7 @@ static void lpc32xx_sic_handler(struct irq_desc *desc)
 	while (hwirq) {
 		irq = __ffs(hwirq);
 		hwirq &= ~BIT(irq);
-		generic_handle_irq(irq_find_mapping(ic->domain, irq));
+		generic_handle_domain_irq(ic->domain, irq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index 61dbfda08527..55322da51c56 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -194,7 +194,7 @@ static void ls_scfg_msi_irq_handler(struct irq_desc *desc)
 	struct ls_scfg_msir *msir = irq_desc_get_handler_data(desc);
 	struct ls_scfg_msi *msi_data = msir->msi_data;
 	unsigned long val;
-	int pos, size, virq, hwirq;
+	int pos, size, hwirq;
 
 	chained_irq_enter(irq_desc_get_chip(desc), desc);
 
@@ -206,9 +206,7 @@ static void ls_scfg_msi_irq_handler(struct irq_desc *desc)
 	for_each_set_bit_from(pos, &val, size) {
 		hwirq = ((msir->bit_end - pos) << msi_data->cfg->ibs_shift) |
 			msir->srs;
-		virq = irq_find_mapping(msi_data->parent, hwirq);
-		if (virq)
-			generic_handle_irq(virq);
+		generic_handle_domain_irq(msi_data->parent, hwirq);
 	}
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
diff --git a/drivers/irqchip/irq-ls1x.c b/drivers/irqchip/irq-ls1x.c
index 353111a10413..77a3f7dfaaf0 100644
--- a/drivers/irqchip/irq-ls1x.c
+++ b/drivers/irqchip/irq-ls1x.c
@@ -50,7 +50,7 @@ static void ls1x_chained_handle_irq(struct irq_desc *desc)
 	while (pending) {
 		int bit = __ffs(pending);
 
-		generic_handle_irq(irq_find_mapping(priv->domain, bit));
+		generic_handle_domain_irq(priv->domain, bit);
 		pending &= ~BIT(bit);
 	}
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index a2cbf0acff1c..b146e069bf5b 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -148,7 +148,7 @@ int gic_get_c0_fdc_int(void)
 
 static void gic_handle_shared_int(bool chained)
 {
-	unsigned int intr, virq;
+	unsigned int intr;
 	unsigned long *pcpu_mask;
 	DECLARE_BITMAP(pending, GIC_MAX_INTRS);
 
@@ -165,12 +165,12 @@ static void gic_handle_shared_int(bool chained)
 	bitmap_and(pending, pending, pcpu_mask, gic_shared_intrs);
 
 	for_each_set_bit(intr, pending, gic_shared_intrs) {
-		virq = irq_linear_revmap(gic_irq_domain,
-					 GIC_SHARED_TO_HWIRQ(intr));
 		if (chained)
-			generic_handle_irq(virq);
+			generic_handle_domain_irq(gic_irq_domain,
+						  GIC_SHARED_TO_HWIRQ(intr));
 		else
-			do_IRQ(virq);
+			do_IRQ(irq_find_mapping(gic_irq_domain,
+						GIC_SHARED_TO_HWIRQ(intr)));
 	}
 }
 
@@ -308,7 +308,7 @@ static struct irq_chip gic_edge_irq_controller = {
 static void gic_handle_local_int(bool chained)
 {
 	unsigned long pending, masked;
-	unsigned int intr, virq;
+	unsigned int intr;
 
 	pending = read_gic_vl_pend();
 	masked = read_gic_vl_mask();
@@ -316,12 +316,12 @@ static void gic_handle_local_int(bool chained)
 	bitmap_and(&pending, &pending, &masked, GIC_NUM_LOCAL_INTRS);
 
 	for_each_set_bit(intr, &pending, GIC_NUM_LOCAL_INTRS) {
-		virq = irq_linear_revmap(gic_irq_domain,
-					 GIC_LOCAL_TO_HWIRQ(intr));
 		if (chained)
-			generic_handle_irq(virq);
+			generic_handle_domain_irq(gic_irq_domain,
+						  GIC_LOCAL_TO_HWIRQ(intr));
 		else
-			do_IRQ(virq);
+			do_IRQ(irq_find_mapping(gic_irq_domain,
+						GIC_LOCAL_TO_HWIRQ(intr)));
 	}
 }
 
diff --git a/drivers/irqchip/irq-mscc-ocelot.c b/drivers/irqchip/irq-mscc-ocelot.c
index 8235d98650c1..4d0c3532dbe7 100644
--- a/drivers/irqchip/irq-mscc-ocelot.c
+++ b/drivers/irqchip/irq-mscc-ocelot.c
@@ -107,7 +107,7 @@ static void ocelot_irq_handler(struct irq_desc *desc)
 	while (reg) {
 		u32 hwirq = __fls(reg);
 
-		generic_handle_irq(irq_find_mapping(d, hwirq));
+		generic_handle_domain_irq(d, hwirq);
 		reg &= ~(BIT(hwirq));
 	}
 
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
index eec63951129a..dc1cee4b0fe1 100644
--- a/drivers/irqchip/irq-mvebu-pic.c
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -91,15 +91,12 @@ static void mvebu_pic_handle_cascade_irq(struct irq_desc *desc)
 	struct mvebu_pic *pic = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long irqmap, irqn;
-	unsigned int cascade_irq;
 
 	irqmap = readl_relaxed(pic->base + PIC_CAUSE);
 	chained_irq_enter(chip, desc);
 
-	for_each_set_bit(irqn, &irqmap, BITS_PER_LONG) {
-		cascade_irq = irq_find_mapping(pic->domain, irqn);
-		generic_handle_irq(cascade_irq);
-	}
+	for_each_set_bit(irqn, &irqmap, BITS_PER_LONG)
+		generic_handle_domain_irq(pic->domain, irqn);
 
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 3a7b7a7f20ca..4ecef6d83777 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -337,17 +337,12 @@ static void mvebu_sei_handle_cascade_irq(struct irq_desc *desc)
 		irqmap = readl_relaxed(sei->base + GICP_SECR(idx));
 		for_each_set_bit(bit, &irqmap, SEI_IRQ_COUNT_PER_REG) {
 			unsigned long hwirq;
-			unsigned int virq;
+			int err;
 
 			hwirq = idx * SEI_IRQ_COUNT_PER_REG + bit;
-			virq = irq_find_mapping(sei->sei_domain, hwirq);
-			if (likely(virq)) {
-				generic_handle_irq(virq);
-				continue;
-			}
-
-			dev_warn(sei->dev,
-				 "Spurious IRQ detected (hwirq %lu)\n", hwirq);
+			err = generic_handle_domain_irq(sei->sei_domain, hwirq);
+			if (unlikely(err))
+				dev_warn(sei->dev, "Spurious IRQ detected (hwirq %lu)\n", hwirq);
 		}
 	}
 
diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c
index c4b5ffb61954..b6868f7b805a 100644
--- a/drivers/irqchip/irq-orion.c
+++ b/drivers/irqchip/irq-orion.c
@@ -117,7 +117,7 @@ static void orion_bridge_irq_handler(struct irq_desc *desc)
 	while (stat) {
 		u32 hwirq = __fls(stat);
 
-		generic_handle_irq(irq_find_mapping(d, gc->irq_base + hwirq));
+		generic_handle_domain_irq(d, gc->irq_base + hwirq);
 		stat &= ~(1 << hwirq);
 	}
 }
diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c
index 0c4c8ed7064e..89c23a1566dc 100644
--- a/drivers/irqchip/irq-partition-percpu.c
+++ b/drivers/irqchip/irq-partition-percpu.c
@@ -124,13 +124,10 @@ static void partition_handle_irq(struct irq_desc *desc)
 			break;
 	}
 
-	if (unlikely(hwirq == part->nr_parts)) {
+	if (unlikely(hwirq == part->nr_parts))
 		handle_bad_irq(desc);
-	} else {
-		unsigned int irq;
-		irq = irq_find_mapping(part->domain, hwirq);
-		generic_handle_irq(irq);
-	}
+	else
+		generic_handle_domain_irq(part->domain, hwirq);
 
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/irq-pruss-intc.c b/drivers/irqchip/irq-pruss-intc.c
index 92fb5780dc10..fa8d89b02ec0 100644
--- a/drivers/irqchip/irq-pruss-intc.c
+++ b/drivers/irqchip/irq-pruss-intc.c
@@ -488,8 +488,7 @@ static void pruss_intc_irq_handler(struct irq_desc *desc)
 
 	while (true) {
 		u32 hipir;
-		unsigned int virq;
-		int hwirq;
+		int hwirq, err;
 
 		/* get highest priority pending PRUSS system event */
 		hipir = pruss_intc_read_reg(intc, PRU_INTC_HIPIR(host_irq));
@@ -497,16 +496,14 @@ static void pruss_intc_irq_handler(struct irq_desc *desc)
 			break;
 
 		hwirq = hipir & GENMASK(9, 0);
-		virq = irq_find_mapping(intc->domain, hwirq);
+		err = generic_handle_domain_irq(intc->domain, hwirq);
 
 		/*
 		 * NOTE: manually ACK any system events that do not have a
 		 * handler mapped yet
 		 */
-		if (WARN_ON_ONCE(!virq))
+		if (WARN_ON_ONCE(err))
 			pruss_intc_write_reg(intc, PRU_INTC_SICR, hwirq);
-		else
-			generic_handle_irq(virq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c
index b57c67dfab5b..fd9f275592d2 100644
--- a/drivers/irqchip/irq-realtek-rtl.c
+++ b/drivers/irqchip/irq-realtek-rtl.c
@@ -85,7 +85,7 @@ static void realtek_irq_dispatch(struct irq_desc *desc)
 		goto out;
 	}
 	domain = irq_desc_get_handler_data(desc);
-	generic_handle_irq(irq_find_mapping(domain, __ffs(pending)));
+	generic_handle_domain_irq(domain, __ffs(pending));
 
 out:
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 11abc09ef76c..07a6d8b42b63 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -115,7 +115,7 @@ static irqreturn_t irqc_irq_handler(int irq, void *dev_id)
 	if (ioread32(p->iomem + DETECT_STATUS) & bit) {
 		iowrite32(bit, p->iomem + DETECT_STATUS);
 		irqc_dbg(i, "demux2");
-		generic_handle_irq(irq_find_mapping(p->irq_domain, i->hw_irq));
+		generic_handle_domain_irq(p->irq_domain, i->hw_irq);
 		return IRQ_HANDLED;
 	}
 	return IRQ_NONE;
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 97d4d04b0a80..cf74cfa82045 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -233,13 +233,11 @@ static void plic_handle_irq(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 
 	while ((hwirq = readl(claim))) {
-		int irq = irq_find_mapping(handler->priv->irqdomain, hwirq);
-
-		if (unlikely(irq <= 0))
+		int err = generic_handle_domain_irq(handler->priv->irqdomain,
+						    hwirq);
+		if (unlikely(err))
 			pr_warn_ratelimited("can't find mapping for hwirq %lu\n",
 					hwirq);
-		else
-			generic_handle_irq(irq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 4704f2ee5797..33c76710f845 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -257,7 +257,7 @@ static void stm32_irq_handler(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int virq, nbanks = domain->gc->num_chips;
+	unsigned int nbanks = domain->gc->num_chips;
 	struct irq_chip_generic *gc;
 	unsigned long pending;
 	int n, i, irq_base = 0;
@@ -268,11 +268,9 @@ static void stm32_irq_handler(struct irq_desc *desc)
 		gc = irq_get_domain_generic_chip(domain, irq_base);
 
 		while ((pending = stm32_exti_pending(gc))) {
-			for_each_set_bit(n, &pending, IRQS_PER_BANK) {
-				virq = irq_find_mapping(domain, irq_base + n);
-				generic_handle_irq(virq);
-			}
-		}
+			for_each_set_bit(n, &pending, IRQS_PER_BANK)
+				generic_handle_domain_irq(domain, irq_base + n);
+ 		}
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 9f2bd0c5d289..21d49791f855 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -88,10 +88,9 @@ static void sunxi_sc_nmi_handle_irq(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int virq = irq_find_mapping(domain, 0);
 
 	chained_irq_enter(chip, desc);
-	generic_handle_irq(virq);
+	generic_handle_domain_irq(domain, 0);
 	chained_irq_exit(chip, desc);
 }
 
diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c
index 9a63b02b8176..8a0e69298e83 100644
--- a/drivers/irqchip/irq-tb10x.c
+++ b/drivers/irqchip/irq-tb10x.c
@@ -91,7 +91,7 @@ static void tb10x_irq_cascade(struct irq_desc *desc)
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	unsigned int irq = irq_desc_get_irq(desc);
 
-	generic_handle_irq(irq_find_mapping(domain, irq));
+	generic_handle_domain_irq(domain, irq);
 }
 
 static int __init of_tb10x_init_irq(struct device_node *ictl,
diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
index ca1f593f4d13..97f454ec376b 100644
--- a/drivers/irqchip/irq-ti-sci-inta.c
+++ b/drivers/irqchip/irq-ti-sci-inta.c
@@ -147,7 +147,7 @@ static void ti_sci_inta_irq_handler(struct irq_desc *desc)
 	struct ti_sci_inta_vint_desc *vint_desc;
 	struct ti_sci_inta_irq_domain *inta;
 	struct irq_domain *domain;
-	unsigned int virq, bit;
+	unsigned int bit;
 	unsigned long val;
 
 	vint_desc = irq_desc_get_handler_data(desc);
@@ -159,11 +159,8 @@ static void ti_sci_inta_irq_handler(struct irq_desc *desc)
 	val = readq_relaxed(inta->base + vint_desc->vint_id * 0x1000 +
 			    VINT_STATUS_MASKED_OFFSET);
 
-	for_each_set_bit(bit, &val, MAX_EVENTS_PER_VINT) {
-		virq = irq_find_mapping(domain, vint_desc->events[bit].hwirq);
-		if (virq)
-			generic_handle_irq(virq);
-	}
+	for_each_set_bit(bit, &val, MAX_EVENTS_PER_VINT)
+		generic_handle_domain_irq(domain, vint_desc->events[bit].hwirq);
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
 }
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index 2325fb3c482b..34337a61b1ef 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c
@@ -79,10 +79,9 @@ static void ts4800_ic_chained_handle_irq(struct irq_desc *desc)
 
 	do {
 		unsigned int bit = __ffs(status);
-		int irq = irq_find_mapping(data->domain, bit);
 
+		generic_handle_domain_irq(data->domain, bit);
 		status &= ~(1 << bit);
-		generic_handle_irq(irq);
 	} while (status);
 
 out:
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index f1386733d3bc..75be350cf82f 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -85,7 +85,7 @@ static void fpga_irq_handle(struct irq_desc *desc)
 		unsigned int irq = ffs(status) - 1;
 
 		status &= ~(1 << irq);
-		generic_handle_irq(irq_find_mapping(f->domain, irq));
+		generic_handle_domain_irq(f->domain, irq);
 	} while (status);
 
 out:
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index 62f3d29f9042..1e1f2d115257 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -225,7 +225,7 @@ static void vic_handle_irq_cascaded(struct irq_desc *desc)
 
 	while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) {
 		hwirq = ffs(stat) - 1;
-		generic_handle_irq(irq_find_mapping(vic->domain, hwirq));
+		generic_handle_domain_irq(vic->domain, hwirq);
 	}
 
 	chained_irq_exit(host_chip, desc);
diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
index 8cd1bfc73057..356a59755d63 100644
--- a/drivers/irqchip/irq-xilinx-intc.c
+++ b/drivers/irqchip/irq-xilinx-intc.c
@@ -110,20 +110,6 @@ static struct irq_chip intc_dev = {
 	.irq_mask_ack = intc_mask_ack,
 };
 
-static unsigned int xintc_get_irq_local(struct xintc_irq_chip *irqc)
-{
-	unsigned int irq = 0;
-	u32 hwirq;
-
-	hwirq = xintc_read(irqc, IVR);
-	if (hwirq != -1U)
-		irq = irq_find_mapping(irqc->root_domain, hwirq);
-
-	pr_debug("irq-xilinx: hwirq=%d, irq=%d\n", hwirq, irq);
-
-	return irq;
-}
-
 unsigned int xintc_get_irq(void)
 {
 	unsigned int irq = -1;
@@ -164,15 +150,16 @@ static void xil_intc_irq_handler(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct xintc_irq_chip *irqc;
-	u32 pending;
 
 	irqc = irq_data_get_irq_handler_data(&desc->irq_data);
 	chained_irq_enter(chip, desc);
 	do {
-		pending = xintc_get_irq_local(irqc);
-		if (pending == 0)
+		u32 hwirq = xintc_read(irqc, IVR);
+
+		if (hwirq == -1U)
 			break;
-		generic_handle_irq(pending);
+
+		generic_handle_domain_irq(irqc->root_domain, hwirq);
 	} while (true);
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/qcom-irq-combiner.c b/drivers/irqchip/qcom-irq-combiner.c
index aa54bfcb0433..18e696dc7f4d 100644
--- a/drivers/irqchip/qcom-irq-combiner.c
+++ b/drivers/irqchip/qcom-irq-combiner.c
@@ -53,7 +53,6 @@ static void combiner_handle_irq(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 
 	for (reg = 0; reg < combiner->nregs; reg++) {
-		int virq;
 		int hwirq;
 		u32 bit;
 		u32 status;
@@ -70,10 +69,7 @@ static void combiner_handle_irq(struct irq_desc *desc)
 			bit = __ffs(status);
 			status &= ~(1 << bit);
 			hwirq = irq_nr(reg, bit);
-			virq = irq_find_mapping(combiner->domain, hwirq);
-			if (virq > 0)
-				generic_handle_irq(virq);
-
+			generic_handle_domain_irq(combiner->domain, hwirq);
 		}
 	}
 

From 9f8c7baedabc9693fbd7890f8fda40578bde4f73 Mon Sep 17 00:00:00 2001
From: Erik Kaneda <erik.kaneda@intel.com>
Date: Wed, 9 Jun 2021 20:41:51 -0700
Subject: [PATCH 1346/3292] ACPICA: Add PRMT module header to facilitate
 parsing

ACPICA commit bd46cb07e614fd85ea69e54c1f6f0ae0a5fb20ab

This structure is used in to parse PRMT in other Operating Systems
that relies on using subtable headers in order to parse ACPI tables.
Although the PRMT doesn't have "subtables" it has a list of module
information structures that act as subtables.

Link: https://github.com/acpica/acpica/commit/bd46cb07
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 05b086d4d94a..2069ac38a4e2 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1728,6 +1728,11 @@ struct acpi_table_prmt_header {
 	u32 module_info_count;
 };
 
+struct acpi_prmt_module_header {
+	u16 revision;
+	u16 length;
+};
+
 struct acpi_prmt_module_info {
 	u16 revision;
 	u16 length;

From cefc7ca46235f01d5233e3abd4b79452af01d9e9 Mon Sep 17 00:00:00 2001
From: Erik Kaneda <erik.kaneda@intel.com>
Date: Wed, 9 Jun 2021 20:41:52 -0700
Subject: [PATCH 1347/3292] ACPI: PRM: implement OperationRegion handler for
 the PlatformRtMechanism subtype

Platform Runtime Mechanism (PRM) is a firmware interface that exposes
a set of binary executables that can either be called from the AML
interpreter or device drivers by bypassing the AML interpreter.
This change implements the AML interpreter path.

According to the specification [1], PRM services are listed in an
ACPI table called the PRMT. This patch parses module and handler
information listed in the PRMT and registers the PlatformRtMechanism
OpRegion handler before ACPI tables are loaded.

Each service is defined by a 16-byte GUID and called from writing a
26-byte ASL buffer containing the identifier to a FieldUnit object
defined inside a PlatformRtMechanism OperationRegion.

    OperationRegion (PRMR, PlatformRtMechanism, 0, 26)
    Field (PRMR, BufferAcc, NoLock, Preserve)
    {
        PRMF, 208 // Write to this field to invoke the OperationRegion Handler
    }

The 26-byte ASL buffer is defined as the following:

Byte Offset   Byte Length    Description
=============================================================
     0             1         PRM OperationRegion handler status
     1             8         PRM service status
     9             1         PRM command
    10            16         PRM handler GUID

The ASL caller fills out a 26-byte buffer containing the PRM command
and the PRM handler GUID like so:

    /* Local0 is the PRM data buffer */
    Local0 = buffer (26){}

    /* Create byte fields over the buffer */
    CreateByteField (Local0, 0x9, CMD)
    CreateField (Local0, 0x50, 0x80, GUID)

    /* Fill in the command and data fields of the data buffer */
    CMD = 0 // run command
    GUID = ToUUID("xxxx-xx-xxx-xxxx")

    /*
     * Invoke PRM service with an ID that matches GUID and save the
     * result.
     */
    Local0 = (\_SB.PRMT.PRMF = Local0)

Byte offset 0 - 8 are written by the handler as a status passed back to AML
and used by ASL like so:

    /* Create byte fields over the buffer */
    CreateByteField (Local0, 0x0, PSTA)
    CreateQWordField (Local0, 0x1, USTA)

In this ASL code, PSTA contains a status from the OperationRegion and
USTA contains a status from the PRM service.

The 26-byte buffer is recieved by acpi_platformrt_space_handler. This
handler will look at the command value and the handler guid and take
the approperiate actions.

Command value    Action
=====================================================================
    0            Run the PRM service indicated by the PRM handler
                 GUID (bytes 10-26)

    1            Prevent PRM runtime updates from happening to the
                 service's parent module

    2            Allow PRM updates from happening to the service's parent module

This patch enables command value 0.

Link: https://uefi.org/sites/default/files/resources/Platform%20Runtime%20Mechanism%20-%20with%20legal%20notice.pdf # [1]
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Kconfig  |   5 +
 drivers/acpi/Makefile |   1 +
 drivers/acpi/bus.c    |   2 +
 drivers/acpi/prmt.c   | 303 ++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/tables.c |   9 ++
 include/linux/acpi.h  |   1 +
 include/linux/prmt.h  |   7 +
 7 files changed, 328 insertions(+)
 create mode 100644 drivers/acpi/prmt.c
 create mode 100644 include/linux/prmt.h

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index eedec61e3476..3972de7b7565 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -543,3 +543,8 @@ config X86_PM_TIMER
 
 	  You should nearly always say Y here because many modern
 	  systems require this timer.
+
+config ACPI_PRMT
+	bool "Platform Runtime Mechanism Support"
+	depends on EFI && X86_64
+	default y
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 700b41adf2db..efb0d1f64019 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -61,6 +61,7 @@ acpi-$(CONFIG_ACPI_FPDT)	+= acpi_fpdt.o
 acpi-$(CONFIG_ACPI_LPIT)	+= acpi_lpit.o
 acpi-$(CONFIG_ACPI_GENERIC_GSI) += irq.o
 acpi-$(CONFIG_ACPI_WATCHDOG)	+= acpi_watchdog.o
+acpi-$(CONFIG_ACPI_PRMT)	+= prmt.o
 
 # Address translation
 acpi-$(CONFIG_ACPI_ADXL)	+= acpi_adxl.o
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index be7da23fad76..3484497923d5 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -30,6 +30,7 @@
 #include <linux/pci.h>
 #include <acpi/apei.h>
 #include <linux/suspend.h>
+#include <linux/prmt.h>
 
 #include "internal.h"
 
@@ -1330,6 +1331,7 @@ static int __init acpi_init(void)
 		acpi_kobj = NULL;
 	}
 
+	init_prmt();
 	result = acpi_bus_init();
 	if (result) {
 		disable_acpi();
diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
new file mode 100644
index 000000000000..33c274698d07
--- /dev/null
+++ b/drivers/acpi/prmt.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Author: Erik Kaneda <erik.kaneda@intel.com>
+ * Copyright 2020 Intel Corporation
+ *
+ * prmt.c
+ *
+ * Each PRM service is an executable that is run in a restricted environment
+ * that is invoked by writing to the PlatformRtMechanism OperationRegion from
+ * AML bytecode.
+ *
+ * init_prmt initializes the Platform Runtime Mechanism (PRM) services by
+ * processing data in the PRMT as well as registering an ACPI OperationRegion
+ * handler for the PlatformRtMechanism subtype.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/efi.h>
+#include <linux/acpi.h>
+#include <linux/prmt.h>
+#include <asm/efi.h>
+
+#pragma pack(1)
+struct prm_mmio_addr_range {
+	u64 phys_addr;
+	u64 virt_addr;
+	u32 length;
+};
+
+struct prm_mmio_info {
+	u64 mmio_count;
+	struct prm_mmio_addr_range addr_ranges[];
+};
+
+struct prm_buffer {
+	u8 prm_status;
+	u64 efi_status;
+	u8 prm_cmd;
+	guid_t handler_guid;
+};
+
+struct prm_context_buffer {
+	char signature[ACPI_NAMESEG_SIZE];
+	u16 revision;
+	u16 reserved;
+	guid_t identifier;
+	u64 static_data_buffer;
+	struct prm_mmio_info *mmio_ranges;
+};
+#pragma pack()
+
+
+LIST_HEAD(prm_module_list);
+
+struct prm_handler_info {
+	guid_t guid;
+	u64 handler_addr;
+	u64 static_data_buffer_addr;
+	u64 acpi_param_buffer_addr;
+
+	struct list_head handler_list;
+};
+
+struct prm_module_info {
+	guid_t guid;
+	u16 major_rev;
+	u16 minor_rev;
+	u16 handler_count;
+	struct prm_mmio_info *mmio_info;
+	bool updatable;
+
+	struct list_head module_list;
+	struct prm_handler_info handlers[];
+};
+
+
+static u64 efi_pa_va_lookup(u64 pa)
+{
+	efi_memory_desc_t *md;
+	u64 pa_offset = pa & ~PAGE_MASK;
+	u64 page = pa & PAGE_MASK;
+
+	for_each_efi_memory_desc(md) {
+		if (md->phys_addr < pa && pa < md->phys_addr + PAGE_SIZE * md->num_pages)
+			return pa_offset + md->virt_addr + page - md->phys_addr;
+	}
+
+	return 0;
+}
+
+
+#define get_first_handler(a) ((struct acpi_prmt_handler_info *) ((char *) (a) + a->handler_info_offset))
+#define get_next_handler(a) ((struct acpi_prmt_handler_info *) (sizeof(struct acpi_prmt_handler_info) + (char *) a))
+
+static int __init
+acpi_parse_prmt(union acpi_subtable_headers *header, const unsigned long end)
+{
+	struct acpi_prmt_module_info *module_info;
+	struct acpi_prmt_handler_info *handler_info;
+	struct prm_handler_info *th;
+	struct prm_module_info *tm;
+	u64 mmio_count = 0;
+	u64 cur_handler = 0;
+	u32 module_info_size = 0;
+	u64 mmio_range_size = 0;
+	void *temp_mmio;
+
+	module_info = (struct acpi_prmt_module_info *) header;
+	module_info_size = struct_size(tm, handlers, module_info->handler_info_count);
+	tm = kmalloc(module_info_size, GFP_KERNEL);
+
+	guid_copy(&tm->guid, (guid_t *) module_info->module_guid);
+	tm->major_rev = module_info->major_rev;
+	tm->minor_rev = module_info->minor_rev;
+	tm->handler_count = module_info->handler_info_count;
+	tm->updatable = true;
+
+	if (module_info->mmio_list_pointer) {
+		/*
+		 * Each module is associated with a list of addr
+		 * ranges that it can use during the service
+		 */
+		mmio_count = *(u64 *) memremap(module_info->mmio_list_pointer, 8, MEMREMAP_WB);
+		mmio_range_size = struct_size(tm->mmio_info, addr_ranges, mmio_count);
+		tm->mmio_info = kmalloc(mmio_range_size, GFP_KERNEL);
+		temp_mmio = memremap(module_info->mmio_list_pointer, mmio_range_size, MEMREMAP_WB);
+		memmove(tm->mmio_info, temp_mmio, mmio_range_size);
+	} else {
+		mmio_range_size = struct_size(tm->mmio_info, addr_ranges, mmio_count);
+		tm->mmio_info = kmalloc(mmio_range_size, GFP_KERNEL);
+		tm->mmio_info->mmio_count = 0;
+	}
+
+	INIT_LIST_HEAD(&tm->module_list);
+	list_add(&tm->module_list, &prm_module_list);
+
+	handler_info = get_first_handler(module_info);
+	do {
+		th = &tm->handlers[cur_handler];
+
+		guid_copy(&th->guid, (guid_t *)handler_info->handler_guid);
+		th->handler_addr = efi_pa_va_lookup(handler_info->handler_address);
+		th->static_data_buffer_addr = efi_pa_va_lookup(handler_info->static_data_buffer_address);
+		th->acpi_param_buffer_addr = efi_pa_va_lookup(handler_info->acpi_param_buffer_address);
+	} while (++cur_handler < tm->handler_count && (handler_info = get_next_handler(handler_info)));
+
+	return 0;
+}
+
+#define GET_MODULE	0
+#define GET_HANDLER	1
+
+static void *find_guid_info(const guid_t *guid, u8 mode)
+{
+	struct prm_handler_info *cur_handler;
+	struct prm_module_info *cur_module;
+	int i = 0;
+
+	list_for_each_entry(cur_module, &prm_module_list, module_list) {
+		for (i = 0; i < cur_module->handler_count; ++i) {
+			cur_handler = &cur_module->handlers[i];
+			if (guid_equal(guid, &cur_handler->guid)) {
+				if (mode == GET_MODULE)
+					return (void *)cur_module;
+				else
+					return (void *)cur_handler;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+
+static struct prm_module_info *find_prm_module(const guid_t *guid)
+{
+	return (struct prm_module_info *)find_guid_info(guid, GET_MODULE);
+}
+
+static struct prm_handler_info *find_prm_handler(const guid_t *guid)
+{
+	return (struct prm_handler_info *) find_guid_info(guid, GET_HANDLER);
+}
+
+/* In-coming PRM commands */
+
+#define PRM_CMD_RUN_SERVICE		0
+#define PRM_CMD_START_TRANSACTION	1
+#define PRM_CMD_END_TRANSACTION		2
+
+/* statuses that can be passed back to ASL */
+
+#define PRM_HANDLER_SUCCESS 		0
+#define PRM_HANDLER_ERROR 		1
+#define INVALID_PRM_COMMAND 		2
+#define PRM_HANDLER_GUID_NOT_FOUND 	3
+#define UPDATE_LOCK_ALREADY_HELD 	4
+#define UPDATE_UNLOCK_WITHOUT_LOCK 	5
+
+/*
+ * This is the PlatformRtMechanism opregion space handler.
+ * @function: indicates the read/write. In fact as the PlatformRtMechanism
+ * message is driven by command, only write is meaningful.
+ *
+ * @addr   : not used
+ * @bits   : not used.
+ * @value  : it is an in/out parameter. It points to the PRM message buffer.
+ * @handler_context: not used
+ */
+static acpi_status acpi_platformrt_space_handler(u32 function,
+						 acpi_physical_address addr,
+						 u32 bits, acpi_integer *value,
+						 void *handler_context,
+						 void *region_context)
+{
+	struct prm_buffer *buffer = ACPI_CAST_PTR(struct prm_buffer, value);
+	struct prm_handler_info *handler;
+	struct prm_module_info *module;
+	efi_status_t status;
+	struct prm_context_buffer context;
+
+	/*
+	 * The returned acpi_status will always be AE_OK. Error values will be
+	 * saved in the first byte of the PRM message buffer to be used by ASL.
+	 */
+	switch (buffer->prm_cmd) {
+	case PRM_CMD_RUN_SERVICE:
+
+		handler = find_prm_handler(&buffer->handler_guid);
+		module = find_prm_module(&buffer->handler_guid);
+		if (!handler || !module)
+			goto invalid_guid;
+
+		ACPI_COPY_NAMESEG(context.signature, "PRMC");
+		context.revision = 0x0;
+		context.reserved = 0x0;
+		context.identifier = handler->guid;
+		context.static_data_buffer = handler->static_data_buffer_addr;
+		context.mmio_ranges = module->mmio_info;
+
+		status = efi_call_virt_pointer(handler, handler_addr,
+					       handler->acpi_param_buffer_addr,
+					       &context);
+		if (status == EFI_SUCCESS) {
+			buffer->prm_status = PRM_HANDLER_SUCCESS;
+		} else {
+			buffer->prm_status = PRM_HANDLER_ERROR;
+			buffer->efi_status = status;
+		}
+		break;
+
+	case PRM_CMD_START_TRANSACTION:
+
+		module = find_prm_module(&buffer->handler_guid);
+		if (!module)
+			goto invalid_guid;
+
+		if (module->updatable)
+			module->updatable = false;
+		else
+			buffer->prm_status = UPDATE_LOCK_ALREADY_HELD;
+		break;
+
+	case PRM_CMD_END_TRANSACTION:
+
+		module = find_prm_module(&buffer->handler_guid);
+		if (!module)
+			goto invalid_guid;
+
+		if (module->updatable)
+			buffer->prm_status = UPDATE_UNLOCK_WITHOUT_LOCK;
+		else
+			module->updatable = true;
+		break;
+
+	default:
+
+		buffer->prm_status = INVALID_PRM_COMMAND;
+		break;
+	}
+
+	return AE_OK;
+
+invalid_guid:
+	buffer->prm_status = PRM_HANDLER_GUID_NOT_FOUND;
+	return AE_OK;
+}
+
+void __init init_prmt(void)
+{
+	acpi_status status;
+	int mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) +
+					  sizeof (struct acpi_table_prmt_header),
+					  0, acpi_parse_prmt, 0);
+	pr_info("PRM: found %u modules\n", mc);
+
+	status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT,
+						    ACPI_ADR_SPACE_PLATFORM_RT,
+						    &acpi_platformrt_space_handler,
+						    NULL, NULL);
+	if (ACPI_FAILURE(status))
+		pr_alert("PRM: OperationRegion handler could not be installed\n");
+}
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 9d581045acff..a37a1532a575 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -39,6 +39,7 @@ static int acpi_apic_instance __initdata;
 enum acpi_subtable_type {
 	ACPI_SUBTABLE_COMMON,
 	ACPI_SUBTABLE_HMAT,
+	ACPI_SUBTABLE_PRMT,
 };
 
 struct acpi_subtable_entry {
@@ -222,6 +223,8 @@ acpi_get_entry_type(struct acpi_subtable_entry *entry)
 		return entry->hdr->common.type;
 	case ACPI_SUBTABLE_HMAT:
 		return entry->hdr->hmat.type;
+	case ACPI_SUBTABLE_PRMT:
+		return 0;
 	}
 	return 0;
 }
@@ -234,6 +237,8 @@ acpi_get_entry_length(struct acpi_subtable_entry *entry)
 		return entry->hdr->common.length;
 	case ACPI_SUBTABLE_HMAT:
 		return entry->hdr->hmat.length;
+	case ACPI_SUBTABLE_PRMT:
+		return entry->hdr->prmt.length;
 	}
 	return 0;
 }
@@ -246,6 +251,8 @@ acpi_get_subtable_header_length(struct acpi_subtable_entry *entry)
 		return sizeof(entry->hdr->common);
 	case ACPI_SUBTABLE_HMAT:
 		return sizeof(entry->hdr->hmat);
+	case ACPI_SUBTABLE_PRMT:
+		return sizeof(entry->hdr->prmt);
 	}
 	return 0;
 }
@@ -255,6 +262,8 @@ acpi_get_subtable_type(char *id)
 {
 	if (strncmp(id, ACPI_SIG_HMAT, 4) == 0)
 		return ACPI_SUBTABLE_HMAT;
+	if (strncmp(id, ACPI_SIG_PRMT, 4) == 0)
+		return ACPI_SUBTABLE_PRMT;
 	return ACPI_SUBTABLE_COMMON;
 }
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c60745f657e9..4c07ac22c6ba 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -132,6 +132,7 @@ enum acpi_address_range_id {
 union acpi_subtable_headers {
 	struct acpi_subtable_header common;
 	struct acpi_hmat_structure hmat;
+	struct acpi_prmt_module_header prmt;
 };
 
 typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
diff --git a/include/linux/prmt.h b/include/linux/prmt.h
new file mode 100644
index 000000000000..24da8364b919
--- /dev/null
+++ b/include/linux/prmt.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifdef CONFIG_ACPI_PRMT
+void init_prmt(void);
+#else
+static inline void init_prmt(void) { }
+#endif

From 60faa8f1ac6e0588d53eb9a345adcdbcc96a8f47 Mon Sep 17 00:00:00 2001
From: Erik Kaneda <erik.kaneda@intel.com>
Date: Wed, 9 Jun 2021 20:41:53 -0700
Subject: [PATCH 1348/3292] ACPI: Add \_SB._OSC bit for PRM

Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c   | 1 +
 include/linux/acpi.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 3484497923d5..e8119a9eca28 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -305,6 +305,7 @@ static void acpi_bus_osc_negotiate_platform_control(void)
 
 	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT;
 	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PCLPI_SUPPORT;
+	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PRM_SUPPORT;
 
 #ifdef CONFIG_ARM64
 	capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4c07ac22c6ba..a618ba698a5c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -551,6 +551,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_OSLPI_SUPPORT			0x00000100
 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT		0x00001000
 #define OSC_SB_GENERIC_INITIATOR_SUPPORT	0x00002000
+#define OSC_SB_PRM_SUPPORT			0x00020000
 #define OSC_SB_NATIVE_USB4_SUPPORT		0x00040000
 
 extern bool osc_sb_apei_support_acked;

From f39de44fbb478ed476f001ca505b2b58d3345a30 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Jun 2021 19:51:30 +0200
Subject: [PATCH 1349/3292] ACPI: Remove redundant clearing of
 context->ret.pointer from acpi_run_osc()

context->ret.pointer already gets set to NULL at the beginning of
acpi_run_osc() and it only gets assigned a new value in the success
path near the end of acpi_run_osc(), so the clearing of
context->ret.pointer (when status != AE_OK) at the end of
acpi_run_osc() is redundant since it will always already be NULL when
status != AE_OK.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index be7da23fad76..1a4cff4bec52 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -262,8 +262,6 @@ out_success:
 
 out_kfree:
 	kfree(output.pointer);
-	if (status != AE_OK)
-		context->ret.pointer = NULL;
 	return status;
 }
 EXPORT_SYMBOL(acpi_run_osc);

From 23db673d7e5194c8fbbb8c307e23960767305c09 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 Jun 2021 18:33:12 +0100
Subject: [PATCH 1350/3292] ACPI: scan: initialize local variable to avoid
 garbage being returned

In the unlikely event that there are no callback calls made in
acpi_walk_dep_device_list(), local variable ret will be returned as
an uninitialized value.

Clean up static analysis warnings by ensuring ret is initialized.

Addresses-Coverity: ("Uninitialized scalar variable")
Fixes: a9e10e587304 ("ACPI: scan: Extend acpi_walk_dep_device_list()")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 708c1d84e7bf..f64119a17382 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2150,7 +2150,7 @@ int acpi_walk_dep_device_list(acpi_handle handle,
 			      void *data)
 {
 	struct acpi_dep_data *dep, *tmp;
-	int ret;
+	int ret = 0;
 
 	mutex_lock(&acpi_dep_list_lock);
 	list_for_each_entry_safe(dep, tmp, &acpi_dep_list, node) {

From 1b8b719229197b7afa1b1191e083fb41ace095c5 Mon Sep 17 00:00:00 2001
From: Austin Kim <austindh.kim@gmail.com>
Date: Wed, 9 Jun 2021 00:09:29 +0100
Subject: [PATCH 1351/3292] LSM: SafeSetID: Mark safesetid_initialized as
 __initdata

Mark safesetid_initialized as __initdata since it is only used
in initialization routine.

Signed-off-by: Austin Kim <austindh.kim@gmail.com>
Signed-off-by: Micah Morton <mortonm@chromium.org>
---
 security/safesetid/lsm.c | 2 +-
 security/safesetid/lsm.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/safesetid/lsm.c b/security/safesetid/lsm.c
index 1079c6d54784..963f4ad9cb66 100644
--- a/security/safesetid/lsm.c
+++ b/security/safesetid/lsm.c
@@ -22,7 +22,7 @@
 #include "lsm.h"
 
 /* Flag indicating whether initialization completed */
-int safesetid_initialized;
+int safesetid_initialized __initdata;
 
 struct setid_ruleset __rcu *safesetid_setuid_rules;
 struct setid_ruleset __rcu *safesetid_setgid_rules;
diff --git a/security/safesetid/lsm.h b/security/safesetid/lsm.h
index bde8c43a3767..d346f4849cea 100644
--- a/security/safesetid/lsm.h
+++ b/security/safesetid/lsm.h
@@ -19,7 +19,7 @@
 #include <linux/hashtable.h>
 
 /* Flag indicating whether initialization completed */
-extern int safesetid_initialized;
+extern int safesetid_initialized __initdata;
 
 enum sid_policy_type {
 	SIDPOL_DEFAULT, /* source ID is unaffected by policy */

From d409989b59ad0b8d108706db25e17c320a9664eb Mon Sep 17 00:00:00 2001
From: Chen Li <chenli@uniontech.com>
Date: Mon, 7 Jun 2021 09:44:35 +0800
Subject: [PATCH 1352/3292] netlink: simplify NLMSG_DATA with NLMSG_HDRLEN

The NLMSG_LENGTH(0) may confuse the API users,
NLMSG_HDRLEN is much more clear.

Besides, some code style problems are also fixed.
Signed-off-by: Chen Li <chenli@uniontech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 3d94269bbfa8..4c0cde075c27 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -91,9 +91,10 @@ struct nlmsghdr {
 #define NLMSG_HDRLEN	 ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
 #define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
 #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
-#define NLMSG_DATA(nlh)  ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_DATA(nlh)  ((void *)(((char *)nlh) + NLMSG_HDRLEN))
 #define NLMSG_NEXT(nlh,len)	 ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
-				  (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+				  (struct nlmsghdr *)(((char *)(nlh)) + \
+				  NLMSG_ALIGN((nlh)->nlmsg_len)))
 #define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
 			   (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
 			   (nlh)->nlmsg_len <= (len))

From b040aab763236568e198ea193cb8b3e930fd0a37 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Mon, 7 Jun 2021 10:36:45 +0800
Subject: [PATCH 1353/3292] net: phy: probe for C45 PHYs that return PHY ID of
 zero in C22 space

PHY devices such as the Marvell Alaska 88E2110 does not return a valid
PHY ID when probed using Clause-22. The current implementation treats
PHY ID of zero as a non-error and valid PHY ID, and causing the PHY
device failed to bind to the Marvell driver.

For such devices, we do an additional probe in the Clause-45 space,
if a valid PHY ID is returned, we then proceed to attach the PHY
device to the matching PHY ID driver.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 1539ea021ac0..495d86b4af7c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -870,6 +870,18 @@ struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45)
 	if (r)
 		return ERR_PTR(r);
 
+	/* PHY device such as the Marvell Alaska 88E2110 will return a PHY ID
+	 * of 0 when probed using get_phy_c22_id() with no error. Proceed to
+	 * probe with C45 to see if we're able to get a valid PHY ID in the C45
+	 * space, if successful, create the C45 PHY device.
+	 */
+	if (!is_c45 && phy_id == 0 && bus->probe_capabilities >= MDIOBUS_C45) {
+		r = get_phy_c45_ids(bus, addr, &c45_ids);
+		if (!r)
+			return phy_device_create(bus, addr, phy_id,
+						 true, &c45_ids);
+	}
+
 	return phy_device_create(bus, addr, phy_id, is_c45, &c45_ids);
 }
 EXPORT_SYMBOL(get_phy_device);

From aced6d37df797db46fa4d3540f657e8e46f2667c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Jun 2021 22:01:52 +0800
Subject: [PATCH 1354/3292] net: ethernet: ti: cpsw: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c     | 3 +--
 drivers/net/ethernet/ti/cpsw_new.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c0cd7de88316..b1e80cc96f56 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1532,8 +1532,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	}
 	cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
 
-	ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ss_regs = devm_ioremap_resource(dev, ss_res);
+	ss_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &ss_res);
 	if (IS_ERR(ss_regs))
 		return PTR_ERR(ss_regs);
 	cpsw->regs = ss_regs;
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 69b7a4e0220a..8d4f3c53385d 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1883,8 +1883,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	}
 	cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
 
-	ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ss_regs = devm_ioremap_resource(dev, ss_res);
+	ss_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &ss_res);
 	if (IS_ERR(ss_regs)) {
 		ret = PTR_ERR(ss_regs);
 		return ret;

From 0699073951e354069b4cfec28dbc4c35cef46e97 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Jun 2021 22:17:44 +0800
Subject: [PATCH 1355/3292] net: davinci_emac: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code and avoid a null-ptr-deref by checking 'res' in it.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/davinci_emac.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index f9417b44cae8..c674e34b6839 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1814,13 +1814,12 @@ static int davinci_emac_probe(struct platform_device *pdev)
 	priv->bus_freq_mhz = (u32)(emac_bus_frequency / 1000000);
 
 	/* Get EMAC platform data */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->emac_base_phys = res->start + pdata->ctrl_reg_offset;
-	priv->remap_addr = devm_ioremap_resource(&pdev->dev, res);
+	priv->remap_addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(priv->remap_addr)) {
 		rc = PTR_ERR(priv->remap_addr);
 		goto no_pdata;
 	}
+	priv->emac_base_phys = res->start + pdata->ctrl_reg_offset;
 
 	res_ctrl = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (res_ctrl) {

From 268551503d66dc0a266fe6034c84a31ab4f3edf7 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Wed, 9 Jun 2021 18:11:59 -0700
Subject: [PATCH 1356/3292] vsock/vmci: remove the repeated word "be"

Remove the repeated word "be".

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/vmci_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c99bc4ce78e2..e617ed93f06b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1248,7 +1248,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
 	vsock_remove_pending(listener, pending);
 	vsock_enqueue_accept(listener, pending);
 
-	/* Callers of accept() will be be waiting on the listening socket, not
+	/* Callers of accept() will be waiting on the listening socket, not
 	 * the pending socket.
 	 */
 	listener->sk_data_ready(listener);

From 4e03d073afc4f6e5b1f34e58cce7d9942d703488 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Wed, 9 Jun 2021 20:09:35 -0700
Subject: [PATCH 1357/3292] af_unix: remove the repeated word "and"

Remove the repeated word "and".

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5a31307ceb76..4d4f24cbd86b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1392,7 +1392,7 @@ restart:
 
 	unix_state_unlock(sk);
 
-	/* take ten and and send info to listening sock */
+	/* take ten and send info to listening sock */
 	spin_lock(&other->sk_receive_queue.lock);
 	__skb_queue_tail(&other->sk_receive_queue, skb);
 	spin_unlock(&other->sk_receive_queue.lock);

From 15139bcbb610f54f4362f099ae6bf9b824b97c82 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Wed, 9 Jun 2021 22:50:46 -0700
Subject: [PATCH 1358/3292] node.c: fix the use of indefinite article

Fix the use of indefinite article.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 81af92954c6c..9947b7dfe1d2 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1214,7 +1214,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
 		/* Peer has changed i/f address without rebooting.
 		 * If so, the link will reset soon, and the next
 		 * discovery will be accepted. So we can ignore it.
-		 * It may also be an cloned or malicious peer having
+		 * It may also be a cloned or malicious peer having
 		 * chosen the same node address and signature as an
 		 * existing one.
 		 * Ignore requests until the link goes down, if ever.

From 326af505ca1fbad6b9b7ba9f36399ceba0b6aba2 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Wed, 9 Jun 2021 23:18:53 -0700
Subject: [PATCH 1359/3292] tipc: socket.c: fix the use of copular verb

Fix the use of copular verb.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 575a0238deb2..34a97ea36cc8 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -662,7 +662,7 @@ static int tipc_release(struct socket *sock)
  * @skaddr: socket address describing name(s) and desired operation
  * @alen: size of socket address data structure
  *
- * Name and name sequence binding is indicated using a positive scope value;
+ * Name and name sequence binding are indicated using a positive scope value;
  * a negative scope value unbinds the specified name.  Specifying no name
  * (i.e. a socket address length of 0) unbinds all names from the socket.
  *

From f1dcdc075617a2a8a866f4f928a780287a553ed0 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Wed, 9 Jun 2021 23:29:58 -0700
Subject: [PATCH 1360/3292] tipc:subscr.c: fix a spelling mistake

Fix a spelling mistake.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 8e00d739f03a..05d49ad81290 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -66,7 +66,7 @@ static void tipc_sub_send_event(struct tipc_subscription *sub,
 /**
  * tipc_sub_check_overlap - test for subscription overlap with the given values
  * @subscribed: the service range subscribed for
- * @found: the service range we are checning for match
+ * @found: the service range we are checking for match
  *
  * Returns true if there is overlap, otherwise false.
  */

From 5c32fdbb899707ffa61a3887f12f57277287d643 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:19:58 +0800
Subject: [PATCH 1361/3292] net: ixp4xx_hss: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index ecea09fd21cb..2c135a9ac7cc 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -83,7 +83,6 @@
 #define PKT_HDLC_CRC_32			0x2 /* default = CRC-16 */
 #define PKT_HDLC_MSB_ENDIAN		0x4 /* default = LE */
 
-
 /* hss_config, PCRs */
 /* Frame sync sampling, default = active low */
 #define PCR_FRM_SYNC_ACTIVE_HIGH	0x40000000
@@ -150,7 +149,6 @@
 /* HSS number, default = 0 (first) */
 #define CCR_SECOND_HSS			0x01000000
 
-
 /* hss_config, clkCR: main:10, num:10, denom:12 */
 #define CLK42X_SPEED_EXP	((0x3FF << 22) | (  2 << 12) |   15) /*65 KHz*/
 
@@ -208,7 +206,6 @@
 #define HSS_CONFIG_TX_LUT	0x18 /* channel look-up tables */
 #define HSS_CONFIG_RX_LUT	0x38
 
-
 /* NPE command codes */
 /* writes the ConfigWord value to the location specified by offset */
 #define PORT_CONFIG_WRITE		0x40
@@ -240,7 +237,6 @@
 #define ERR_HDLC_ABORT		6 /* abort sequence received */
 #define ERR_DISCONNECTING	7 /* disconnect is in progress */
 
-
 #ifdef __ARMEB__
 typedef struct sk_buff buffer_t;
 #define free_buffer dev_kfree_skb
@@ -308,7 +304,6 @@ struct desc {
 	u32 __reserved1[4];
 };
 
-
 #define rx_desc_phys(port, n)	((port)->desc_tab_phys +		\
 				 (n) * sizeof(struct desc))
 #define rx_desc_ptr(port, n)	(&(port)->desc_tab[n])
@@ -567,7 +562,6 @@ static inline void debug_pkt(struct net_device *dev, const char *func,
 #endif
 }
 
-
 static inline void debug_desc(u32 phys, struct desc *desc)
 {
 #if DEBUG_DESC
@@ -606,7 +600,6 @@ static inline void queue_put_desc(unsigned int queue, u32 phys,
 	   length and queues >= 32 don't support this check anyway. */
 }
 
-
 static inline void dma_unmap_tx(struct port *port, struct desc *desc)
 {
 #ifdef __ARMEB__
@@ -619,7 +612,6 @@ static inline void dma_unmap_tx(struct port *port, struct desc *desc)
 #endif
 }
 
-
 static void hss_hdlc_set_carrier(void *pdev, int carrier)
 {
 	struct net_device *netdev = pdev;
@@ -784,7 +776,6 @@ static int hss_hdlc_poll(struct napi_struct *napi, int budget)
 	return received;	/* not all work done */
 }
 
-
 static void hss_hdlc_txdone_irq(void *pdev)
 {
 	struct net_device *dev = pdev;
@@ -910,7 +901,6 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-
 static int request_hdlc_queues(struct port *port)
 {
 	int err;
@@ -1160,7 +1150,6 @@ static int hss_hdlc_close(struct net_device *dev)
 	return 0;
 }
 
-
 static int hss_hdlc_attach(struct net_device *dev, unsigned short encoding,
 			   unsigned short parity)
 {

From 6f2016ed65385223ba8ace9c8897d04e3c2e1f16 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:19:59 +0800
Subject: [PATCH 1362/3292] net: ixp4xx_hss: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 2c135a9ac7cc..6aa01d548008 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -341,6 +341,7 @@ static inline struct port* dev_to_port(struct net_device *dev)
 static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt)
 {
 	int i;
+
 	for (i = 0; i < cnt; i++)
 		dest[i] = swab32(src[i]);
 }
@@ -353,6 +354,7 @@ static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt)
 static void hss_npe_send(struct port *port, struct msg *msg, const char* what)
 {
 	u32 *val = (u32*)msg;
+
 	if (npe_send_message(port->npe, msg, what)) {
 		pr_crit("HSS-%i: unable to send command [%08X:%08X] to %s\n",
 			port->id, val[0], val[1], npe_name(port->npe));
@@ -1006,6 +1008,7 @@ static void destroy_hdlc_queues(struct port *port)
 		for (i = 0; i < RX_DESCS; i++) {
 			struct desc *desc = rx_desc_ptr(port, i);
 			buffer_t *buff = port->rx_buff_tab[i];
+
 			if (buff) {
 				dma_unmap_single(&port->netdev->dev,
 						 desc->data, RX_SIZE,
@@ -1016,6 +1019,7 @@ static void destroy_hdlc_queues(struct port *port)
 		for (i = 0; i < TX_DESCS; i++) {
 			struct desc *desc = tx_desc_ptr(port, i);
 			buffer_t *buff = port->tx_buff_tab[i];
+
 			if (buff) {
 				dma_unmap_tx(port, desc);
 				free_buffer(buff);
@@ -1213,6 +1217,7 @@ static void find_best_clock(u32 timer_freq, u32 rate, u32 *best, u32 *reg)
 
 	for (b = 0; b < 0x400; b++) {
 		u64 c = (b + 1) * (u64)rate;
+
 		do_div(c, timer_freq - rate * a);
 		c--;
 		if (c >= 0xFFF) { /* 12-bit - no need to check more 'b's */

From 6487fab04f2734eefaae1c3c32eec364d5e26bf3 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:20:00 +0800
Subject: [PATCH 1363/3292] net: ixp4xx_hss: fix the code style issue about
 "foo* bar"

Fix the checkpatch error as "foo* bar" and should be "foo *bar",
and "(foo*)" should be "(foo *)".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 6aa01d548008..48bc91462593 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -332,7 +332,7 @@ static const struct {
  * utility functions
  ****************************************************************************/
 
-static inline struct port* dev_to_port(struct net_device *dev)
+static inline struct port *dev_to_port(struct net_device *dev)
 {
 	return dev_to_hdlc(dev)->priv;
 }
@@ -351,9 +351,9 @@ static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt)
  * HSS access
  ****************************************************************************/
 
-static void hss_npe_send(struct port *port, struct msg *msg, const char* what)
+static void hss_npe_send(struct port *port, struct msg *msg, const char *what)
 {
-	u32 *val = (u32*)msg;
+	u32 *val = (u32 *)msg;
 
 	if (npe_send_message(port->npe, msg, what)) {
 		pr_crit("HSS-%i: unable to send command [%08X:%08X] to %s\n",

From 99ebe65eb9c0ada015931d239d9f2d1dc8897fee Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:20:01 +0800
Subject: [PATCH 1364/3292] net: ixp4xx_hss: move out assignment in if
 condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 66 +++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 48bc91462593..d657bca1750b 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -510,10 +510,12 @@ static int hss_load_firmware(struct port *port)
 	if (port->initialized)
 		return 0;
 
-	if (!npe_running(port->npe) &&
-	    (err = npe_load_firmware(port->npe, npe_name(port->npe),
-				     port->dev)))
-		return err;
+	if (!npe_running(port->npe)) {
+		err = npe_load_firmware(port->npe, npe_name(port->npe),
+					port->dev);
+		if (err)
+			return err;
+	}
 
 	/* HDLC mode configuration */
 	memset(&msg, 0, sizeof(msg));
@@ -579,7 +581,8 @@ static inline int queue_get_desc(unsigned int queue, struct port *port,
 	u32 phys, tab_phys, n_desc;
 	struct desc *tab;
 
-	if (!(phys = qmgr_get_entry(queue)))
+	phys = qmgr_get_entry(queue);
+	if (!phys)
 		return -1;
 
 	BUG_ON(phys & 0x1F);
@@ -664,7 +667,8 @@ static int hss_hdlc_poll(struct napi_struct *napi, int budget)
 		u32 phys;
 #endif
 
-		if ((n = queue_get_desc(rxq, port, 0)) < 0) {
+		n = queue_get_desc(rxq, port, 0);
+		if (n < 0) {
 #if DEBUG_RX
 			printk(KERN_DEBUG "%s: hss_hdlc_poll"
 			       " napi_complete\n", dev->name);
@@ -699,7 +703,8 @@ static int hss_hdlc_poll(struct napi_struct *napi, int budget)
 		switch (desc->status) {
 		case 0:
 #ifdef __ARMEB__
-			if ((skb = netdev_alloc_skb(dev, RX_SIZE)) != NULL) {
+			skb = netdev_alloc_skb(dev, RX_SIZE);
+			if (skb) {
 				phys = dma_map_single(&dev->dev, skb->data,
 						      RX_SIZE,
 						      DMA_FROM_DEVICE);
@@ -847,7 +852,8 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
 #else
 	offset = (int)skb->data & 3; /* keep 32-bit alignment */
 	bytes = ALIGN(offset + len, 4);
-	if (!(mem = kmalloc(bytes, GFP_ATOMIC))) {
+	mem = kmalloc(bytes, GFP_ATOMIC);
+	if (!mem) {
 		dev_kfree_skb(skb);
 		dev->stats.tx_dropped++;
 		return NETDEV_TX_OK;
@@ -966,8 +972,9 @@ static int init_hdlc_queues(struct port *port)
 			return -ENOMEM;
 	}
 
-	if (!(port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL,
-					      &port->desc_tab_phys)))
+	port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL,
+					&port->desc_tab_phys);
+	if (!port->desc_tab)
 		return -ENOMEM;
 	memset(port->desc_tab, 0, POOL_ALLOC_SIZE);
 	memset(port->rx_buff_tab, 0, sizeof(port->rx_buff_tab)); /* tables */
@@ -979,11 +986,13 @@ static int init_hdlc_queues(struct port *port)
 		buffer_t *buff;
 		void *data;
 #ifdef __ARMEB__
-		if (!(buff = netdev_alloc_skb(port->netdev, RX_SIZE)))
+		buff = netdev_alloc_skb(port->netdev, RX_SIZE);
+		if (!buff)
 			return -ENOMEM;
 		data = buff->data;
 #else
-		if (!(buff = kmalloc(RX_SIZE, GFP_KERNEL)))
+		buff = kmalloc(RX_SIZE, GFP_KERNEL);
+		if (!buff)
 			return -ENOMEM;
 		data = buff;
 #endif
@@ -1041,23 +1050,29 @@ static int hss_hdlc_open(struct net_device *dev)
 	unsigned long flags;
 	int i, err = 0;
 
-	if ((err = hdlc_open(dev)))
+	err = hdlc_open(dev);
+	if (err)
 		return err;
 
-	if ((err = hss_load_firmware(port)))
+	err = hss_load_firmware(port);
+	if (err)
 		goto err_hdlc_close;
 
-	if ((err = request_hdlc_queues(port)))
+	err = request_hdlc_queues(port);
+	if (err)
 		goto err_hdlc_close;
 
-	if ((err = init_hdlc_queues(port)))
+	err = init_hdlc_queues(port);
+	if (err)
 		goto err_destroy_queues;
 
 	spin_lock_irqsave(&npe_lock, flags);
-	if (port->plat->open)
-		if ((err = port->plat->open(port->id, dev,
-					    hss_hdlc_set_carrier)))
+	if (port->plat->open) {
+		err = port->plat->open(port->id, dev, hss_hdlc_set_carrier);
+		if (err)
 			goto err_unlock;
+	}
+
 	spin_unlock_irqrestore(&npe_lock, flags);
 
 	/* Populate queues with buffers, no failure after this point */
@@ -1328,15 +1343,19 @@ static int hss_init_one(struct platform_device *pdev)
 	hdlc_device *hdlc;
 	int err;
 
-	if ((port = kzalloc(sizeof(*port), GFP_KERNEL)) == NULL)
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
 		return -ENOMEM;
 
-	if ((port->npe = npe_request(0)) == NULL) {
+	port->npe = npe_request(0);
+	if (!port->npe) {
 		err = -ENODEV;
 		goto err_free;
 	}
 
-	if ((port->netdev = dev = alloc_hdlcdev(port)) == NULL) {
+	dev = alloc_hdlcdev(port);
+	port->netdev = alloc_hdlcdev(port);
+	if (!port->netdev) {
 		err = -ENOMEM;
 		goto err_plat;
 	}
@@ -1355,7 +1374,8 @@ static int hss_init_one(struct platform_device *pdev)
 	port->plat = pdev->dev.platform_data;
 	netif_napi_add(dev, &port->napi, hss_hdlc_poll, NAPI_WEIGHT);
 
-	if ((err = register_hdlc_device(dev)))
+	err = register_hdlc_device(dev);
+	if (err)
 		goto err_free_netdev;
 
 	platform_set_drvdata(pdev, port);

From dee014567732c7d16c395777e32952d7c59f01ca Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:20:02 +0800
Subject: [PATCH 1365/3292] net: ixp4xx_hss: add some required spaces

Add space required before the open parenthesis '('.
Add space required after that close brace '}'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index d657bca1750b..d8f1df9be0cf 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -322,7 +322,7 @@ static DEFINE_SPINLOCK(npe_lock);
 
 static const struct {
 	int tx, txdone, rx, rxfree;
-}queue_ids[2] = {{HSS0_PKT_TX0_QUEUE, HSS0_PKT_TXDONE_QUEUE, HSS0_PKT_RX_QUEUE,
+} queue_ids[2] = {{HSS0_PKT_TX0_QUEUE, HSS0_PKT_TXDONE_QUEUE, HSS0_PKT_RX_QUEUE,
 		  HSS0_PKT_RXFREE0_QUEUE},
 		 {HSS1_PKT_TX0_QUEUE, HSS1_PKT_TXDONE_QUEUE, HSS1_PKT_RX_QUEUE,
 		  HSS1_PKT_RXFREE0_QUEUE},
@@ -1177,7 +1177,7 @@ static int hss_hdlc_attach(struct net_device *dev, unsigned short encoding,
 	if (encoding != ENCODING_NRZ)
 		return -EINVAL;
 
-	switch(parity) {
+	switch (parity) {
 	case PARITY_CRC16_PR1_CCITT:
 		port->hdlc_cfg = 0;
 		return 0;
@@ -1264,7 +1264,7 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (cmd != SIOCWANDEV)
 		return hdlc_ioctl(dev, ifr, cmd);
 
-	switch(ifr->ifr_settings.type) {
+	switch (ifr->ifr_settings.type) {
 	case IF_GET_IFACE:
 		ifr->ifr_settings.type = IF_IFACE_V35;
 		if (ifr->ifr_settings.size < size) {
@@ -1281,7 +1281,7 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 	case IF_IFACE_SYNC_SERIAL:
 	case IF_IFACE_V35:
-		if(!capable(CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 		if (copy_from_user(&new_line, line, size))
 			return -EFAULT;

From 137d5672f80f8f08612659d6787a1fd196849c76 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:20:03 +0800
Subject: [PATCH 1366/3292] net: ixp4xx_hss: remove redundant spaces

According to the chackpatch.pl,
space prohibited after that open parenthesis '('.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index d8f1df9be0cf..30a6df4b2eae 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -150,21 +150,21 @@
 #define CCR_SECOND_HSS			0x01000000
 
 /* hss_config, clkCR: main:10, num:10, denom:12 */
-#define CLK42X_SPEED_EXP	((0x3FF << 22) | (  2 << 12) |   15) /*65 KHz*/
+#define CLK42X_SPEED_EXP	((0x3FF << 22) | (2 << 12) |   15) /*65 KHz*/
 
-#define CLK42X_SPEED_512KHZ	((  130 << 22) | (  2 << 12) |   15)
-#define CLK42X_SPEED_1536KHZ	((   43 << 22) | ( 18 << 12) |   47)
-#define CLK42X_SPEED_1544KHZ	((   43 << 22) | ( 33 << 12) |  192)
-#define CLK42X_SPEED_2048KHZ	((   32 << 22) | ( 34 << 12) |   63)
-#define CLK42X_SPEED_4096KHZ	((   16 << 22) | ( 34 << 12) |  127)
-#define CLK42X_SPEED_8192KHZ	((    8 << 22) | ( 34 << 12) |  255)
+#define CLK42X_SPEED_512KHZ	((130 << 22) | (2 << 12) |   15)
+#define CLK42X_SPEED_1536KHZ	((43 << 22) | (18 << 12) |   47)
+#define CLK42X_SPEED_1544KHZ	((43 << 22) | (33 << 12) |  192)
+#define CLK42X_SPEED_2048KHZ	((32 << 22) | (34 << 12) |   63)
+#define CLK42X_SPEED_4096KHZ	((16 << 22) | (34 << 12) |  127)
+#define CLK42X_SPEED_8192KHZ	((8 << 22) | (34 << 12) |  255)
 
-#define CLK46X_SPEED_512KHZ	((  130 << 22) | ( 24 << 12) |  127)
-#define CLK46X_SPEED_1536KHZ	((   43 << 22) | (152 << 12) |  383)
-#define CLK46X_SPEED_1544KHZ	((   43 << 22) | ( 66 << 12) |  385)
-#define CLK46X_SPEED_2048KHZ	((   32 << 22) | (280 << 12) |  511)
-#define CLK46X_SPEED_4096KHZ	((   16 << 22) | (280 << 12) | 1023)
-#define CLK46X_SPEED_8192KHZ	((    8 << 22) | (280 << 12) | 2047)
+#define CLK46X_SPEED_512KHZ	((130 << 22) | (24 << 12) |  127)
+#define CLK46X_SPEED_1536KHZ	((43 << 22) | (152 << 12) |  383)
+#define CLK46X_SPEED_1544KHZ	((43 << 22) | (66 << 12) |  385)
+#define CLK46X_SPEED_2048KHZ	((32 << 22) | (280 << 12) |  511)
+#define CLK46X_SPEED_4096KHZ	((16 << 22) | (280 << 12) | 1023)
+#define CLK46X_SPEED_8192KHZ	((8 << 22) | (280 << 12) | 2047)
 
 /*
  * HSS_CONFIG_CLOCK_CR register consists of 3 parts:

From 17ce9764bb26f43b40de904f0d4a06be71abb979 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:20:04 +0800
Subject: [PATCH 1367/3292] net: ixp4xx_hss: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 30a6df4b2eae..319ae509d1f4 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -166,8 +166,7 @@
 #define CLK46X_SPEED_4096KHZ	((16 << 22) | (280 << 12) | 1023)
 #define CLK46X_SPEED_8192KHZ	((8 << 22) | (280 << 12) | 2047)
 
-/*
- * HSS_CONFIG_CLOCK_CR register consists of 3 parts:
+/* HSS_CONFIG_CLOCK_CR register consists of 3 parts:
  *     A (10 bits), B (10 bits) and C (12 bits).
  * IXP42x HSS clock generator operation (verified with an oscilloscope):
  * Each clock bit takes 7.5 ns (1 / 133.xx MHz).
@@ -217,7 +216,8 @@
 #define PORT_ERROR_READ			0x42
 
 /* triggers the NPE to reset internal status and enable the HssPacketized
-   operation for the flow specified by pPipe */
+ * operation for the flow specified by pPipe
+ */
 #define PKT_PIPE_FLOW_ENABLE		0x50
 #define PKT_PIPE_FLOW_DISABLE		0x51
 #define PKT_NUM_PIPES_WRITE		0x52
@@ -232,7 +232,8 @@
 #define ERR_HDLC_ALIGN		2 /* HDLC alignment error */
 #define ERR_HDLC_FCS		3 /* HDLC Frame Check Sum error */
 #define ERR_RXFREE_Q_EMPTY	4 /* RX-free queue became empty while receiving
-				     this packet (if buf_len < pkt_len) */
+				   * this packet (if buf_len < pkt_len)
+				   */
 #define ERR_HDLC_TOO_LONG	5 /* HDLC frame size too long */
 #define ERR_HDLC_ABORT		6 /* abort sequence received */
 #define ERR_DISCONNECTING	7 /* disconnect is in progress */
@@ -602,7 +603,8 @@ static inline void queue_put_desc(unsigned int queue, u32 phys,
 	BUG_ON(phys & 0x1F);
 	qmgr_put_entry(queue, phys);
 	/* Don't check for queue overflow here, we've allocated sufficient
-	   length and queues >= 32 don't support this check anyway. */
+	 * length and queues >= 32 don't support this check anyway.
+	 */
 }
 
 static inline void dma_unmap_tx(struct port *port, struct desc *desc)

From e0bd276463e874dfa572a9557c62f9a3d5bfcfd4 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 10 Jun 2021 15:20:05 +0800
Subject: [PATCH 1368/3292] net: ixp4xx_hss: add braces {} to all arms of the
 statement

Braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/ixp4xx_hss.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index 319ae509d1f4..e97521138f7e 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -1299,11 +1299,11 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			return -EINVAL;
 
 		port->clock_type = clk; /* Update settings */
-		if (clk == CLOCK_INT)
+		if (clk == CLOCK_INT) {
 			find_best_clock(port->plat->timer_freq,
 					new_line.clock_rate,
 					&port->clock_rate, &port->clock_reg);
-		else {
+		} else {
 			port->clock_rate = 0;
 			port->clock_reg = CLK42X_SPEED_2048KHZ;
 		}

From 0b462d017caff780f4922872c7098b193feee8b6 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 10 Jun 2021 15:29:33 +0800
Subject: [PATCH 1369/3292] net: w5100: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/wiznet/w5100.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index ec5db481c9cd..811815f8cd3b 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -263,19 +263,14 @@ static int w5100_writebulk_direct(struct net_device *ndev, u32 addr,
 static int w5100_mmio_init(struct net_device *ndev)
 {
 	struct platform_device *pdev = to_platform_device(ndev->dev.parent);
-	struct w5100_priv *priv = netdev_priv(ndev);
 	struct w5100_mmio_priv *mmio_priv = w5100_mmio_priv(ndev);
-	struct resource *mem;
 
 	spin_lock_init(&mmio_priv->reg_lock);
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mmio_priv->base = devm_ioremap_resource(&pdev->dev, mem);
+	mmio_priv->base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(mmio_priv->base))
 		return PTR_ERR(mmio_priv->base);
 
-	netdev_info(ndev, "at 0x%llx irq %d\n", (u64)mem->start, priv->irq);
-
 	return 0;
 }
 

From 47651c51c02fc4937b39b2d2207aa0d9d26a4b58 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 10 Jun 2021 15:36:22 +0800
Subject: [PATCH 1370/3292] net: axienet: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index e29ad9a86a3c..13cd799541aa 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1894,8 +1894,7 @@ static int axienet_probe(struct platform_device *pdev)
 		goto cleanup_clk;
 
 	/* Map device registers */
-	ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	lp->regs = devm_ioremap_resource(&pdev->dev, ethres);
+	lp->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &ethres);
 	if (IS_ERR(lp->regs)) {
 		ret = PTR_ERR(lp->regs);
 		goto cleanup_clk;
@@ -2010,9 +2009,7 @@ static int axienet_probe(struct platform_device *pdev)
 		lp->eth_irq = platform_get_irq_optional(pdev, 0);
 	} else {
 		/* Check for these resources directly on the Ethernet node. */
-		struct resource *res = platform_get_resource(pdev,
-							     IORESOURCE_MEM, 1);
-		lp->dma_regs = devm_ioremap_resource(&pdev->dev, res);
+		lp->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
 		lp->rx_irq = platform_get_irq(pdev, 1);
 		lp->tx_irq = platform_get_irq(pdev, 0);
 		lp->eth_irq = platform_get_irq_optional(pdev, 2);

From f18c11812c949553d2b2481ecaa274dd51bed1e7 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 10 Jun 2021 16:02:43 +0800
Subject: [PATCH 1371/3292] fjes: check return value after calling
 platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index d098b1fcf006..185c8a398681 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1254,6 +1254,10 @@ static int fjes_probe(struct platform_device *plat_dev)
 	adapter->interrupt_watch_enable = false;
 
 	res = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
+	if (!res) {
+		err = -EINVAL;
+		goto err_free_control_wq;
+	}
 	hw->hw_res.start = res->start;
 	hw->hw_res.size = resource_size(res);
 	hw->hw_res.irq = platform_get_irq(plat_dev, 0);

From 1f7096f0fdb2ac5ae6f1e290dfdd2fb7bbb074d3 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Thu, 10 Jun 2021 16:53:54 +0800
Subject: [PATCH 1372/3292] net: stmmac: Fix mixed enum type warning

The commit 5a5586112b92 ("net: stmmac: support FPE link partner
hand-shaking procedure") introduced the following coverity warning:

  "Parse warning (PW.MIXED_ENUM_TYPE)"
  "1. mixed_enum_type: enumerated type mixed with another type"

This is due to both "lo_state" and "lp_sate" which their datatype are
enum stmmac_fpe_state type, and being assigned with "FPE_EVENT_UNKNOWN"
which is a macro-defined of 0. Fixed this by assigned both these
variables with the correct enum value.

Fixes: 5a5586112b92 ("net: stmmac: support FPE link partner hand-shaking procedure")
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 180f347b4c8e..db97cd4b871d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1021,8 +1021,8 @@ static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up)
 	if (is_up && *hs_enable) {
 		stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY);
 	} else {
-		*lo_state = FPE_EVENT_UNKNOWN;
-		*lp_state = FPE_EVENT_UNKNOWN;
+		*lo_state = FPE_STATE_OFF;
+		*lp_state = FPE_STATE_OFF;
 	}
 }
 

From 8a55a73433e763c8aec4a3e8df5c28c821fc44b9 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 10 Jun 2021 17:17:12 +0800
Subject: [PATCH 1373/3292] net: mido: mdio-mux-bcm-iproc: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code and avoid a null-ptr-deref by checking 'res' in it.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-mux-bcm-iproc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 239e88c7a272..014c0baedbd2 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -187,7 +187,9 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	md->dev = &pdev->dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	md->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(md->base))
+		return PTR_ERR(md->base);
 	if (res->start & 0xfff) {
 		/* For backward compatibility in case the
 		 * base address is specified with an offset.
@@ -196,9 +198,6 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
 		res->start &= ~0xfff;
 		res->end = res->start + MDIO_REG_ADDR_SPACE_SIZE - 1;
 	}
-	md->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(md->base))
-		return PTR_ERR(md->base);
 
 	md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
 	if (!md->mii_bus) {

From 9e2b7b0450cfc6a99ceaa37843cb5d0179e1c2ae Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 10 Jun 2021 17:25:35 +0800
Subject: [PATCH 1374/3292] mt76: mt7615: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/mediatek/mt76/mt7615/soc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
index be9a69fe1b38..f13d1b418742 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
@@ -31,7 +31,6 @@ int mt7622_wmac_init(struct mt7615_dev *dev)
 
 static int mt7622_wmac_probe(struct platform_device *pdev)
 {
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	void __iomem *mem_base;
 	int irq;
 
@@ -39,7 +38,7 @@ static int mt7622_wmac_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	mem_base = devm_ioremap_resource(&pdev->dev, res);
+	mem_base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(mem_base))
 		return PTR_ERR(mem_base);
 

From bc831facf8a11e4e615dc67ae790325710bc1979 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Thu, 10 Jun 2021 20:48:26 +0800
Subject: [PATCH 1375/3292] net: x25: Use list_for_each_entry() to simplify
 code in x25_route.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_route.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 9fbe4bb38d94..647f325ed867 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -27,14 +27,11 @@ static int x25_add_route(struct x25_address *address, unsigned int sigdigits,
 			 struct net_device *dev)
 {
 	struct x25_route *rt;
-	struct list_head *entry;
 	int rc = -EINVAL;
 
 	write_lock_bh(&x25_route_list_lock);
 
-	list_for_each(entry, &x25_route_list) {
-		rt = list_entry(entry, struct x25_route, node);
-
+	list_for_each_entry(rt, &x25_route_list, node) {
 		if (!memcmp(&rt->address, address, sigdigits) &&
 		    rt->sigdigits == sigdigits)
 			goto out;
@@ -78,14 +75,11 @@ static int x25_del_route(struct x25_address *address, unsigned int sigdigits,
 			 struct net_device *dev)
 {
 	struct x25_route *rt;
-	struct list_head *entry;
 	int rc = -EINVAL;
 
 	write_lock_bh(&x25_route_list_lock);
 
-	list_for_each(entry, &x25_route_list) {
-		rt = list_entry(entry, struct x25_route, node);
-
+	list_for_each_entry(rt, &x25_route_list, node) {
 		if (!memcmp(&rt->address, address, sigdigits) &&
 		    rt->sigdigits == sigdigits && rt->dev == dev) {
 			__x25_remove_route(rt);
@@ -141,13 +135,10 @@ struct net_device *x25_dev_get(char *devname)
 struct x25_route *x25_get_route(struct x25_address *addr)
 {
 	struct x25_route *rt, *use = NULL;
-	struct list_head *entry;
 
 	read_lock_bh(&x25_route_list_lock);
 
-	list_for_each(entry, &x25_route_list) {
-		rt = list_entry(entry, struct x25_route, node);
-
+	list_for_each_entry(rt, &x25_route_list, node) {
 		if (!memcmp(&rt->address, addr, rt->sigdigits)) {
 			if (!use)
 				use = rt;

From 3e98ae0014cb882a2989cf4465e2b26688d3608d Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Thu, 10 Jun 2021 20:54:17 +0800
Subject: [PATCH 1376/3292] ibmvnic: Use list_for_each_entry() to simplify code
 in ibmvnic.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Acked-by: Lijun Pan <lijunp213@gmail.com>
Reviewed-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4d439413f6d9..ffd26cdc7bcd 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2400,8 +2400,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 		goto err;
 	}
 
-	list_for_each(entry, &adapter->rwi_list) {
-		tmp = list_entry(entry, struct ibmvnic_rwi, list);
+	list_for_each_entry(tmp, &adapter->rwi_list, list) {
 		if (tmp->reset_reason == reason) {
 			netdev_dbg(netdev, "Skipping matching reset, reason=%s\n",
 				   reset_reason_to_string(reason));

From 73e42909ef2d1fa554b39bf6ae3eb1546dfc97a5 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Thu, 10 Jun 2021 21:03:55 +0800
Subject: [PATCH 1377/3292] atm: Use list_for_each_entry() to simplify code in
 resources.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/resources.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/atm/resources.c b/net/atm/resources.c
index 53236986dfe0..2b2d33eeaf20 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -52,10 +52,8 @@ static struct atm_dev *__alloc_atm_dev(const char *type)
 static struct atm_dev *__atm_dev_lookup(int number)
 {
 	struct atm_dev *dev;
-	struct list_head *p;
 
-	list_for_each(p, &atm_devs) {
-		dev = list_entry(p, struct atm_dev, dev_list);
+	list_for_each_entry(dev, &atm_devs, dev_list) {
 		if (dev->number == number) {
 			atm_dev_hold(dev);
 			return dev;
@@ -215,8 +213,7 @@ int atm_getnames(void __user *buf, int __user *iobuf_len)
 		return -ENOMEM;
 	}
 	tmp_p = tmp_buf;
-	list_for_each(p, &atm_devs) {
-		dev = list_entry(p, struct atm_dev, dev_list);
+	list_for_each_entry(dev, &atm_devs, dev_list) {
 		*tmp_p++ = dev->number;
 	}
 	mutex_unlock(&atm_dev_mutex);

From cb8e2e4300fc17e1028cce554ecf72a9e6161742 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Thu, 10 Jun 2021 21:26:03 +0800
Subject: [PATCH 1378/3292] dccp: tfrc: fix doc warnings in tfrc_equation.c

Add description for `tfrc_invert_loss_event_rate` to fix the W=1 warnings:

 net/dccp/ccids/lib/tfrc_equation.c:695: warning: Function parameter or
  member 'loss_event_rate' not described in 'tfrc_invert_loss_event_rate'

Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Richard Sailer <richard_siegfried@systemli.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/tfrc_equation.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index e2a337fa9ff7..92a8c6bea316 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -688,6 +688,7 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 
 /**
  * tfrc_invert_loss_event_rate  -  Compute p so that 10^6 corresponds to 100%
+ * @loss_event_rate: loss event rate to invert
  * When @loss_event_rate is large, there is a chance that p is truncated to 0.
  * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
  */

From 950fd045d76c96ada8c7a6e80f1f4e40de4beb17 Mon Sep 17 00:00:00 2001
From: Tan Zhongjun <tanzhongjun@yulong.com>
Date: Thu, 10 Jun 2021 22:01:18 +0800
Subject: [PATCH 1379/3292] soc: qcom: ipa: Remove superfluous error message
 around platform_get_irq()

The platform_get_irq() prints error message telling that interrupt is
missing,hence there is no need to duplicated that message in the
drivers.

Signed-off-by: Tan Zhongjun <tanzhongjun@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_smp2p.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c
index a5f7a79a1923..cf709df70d28 100644
--- a/drivers/net/ipa/ipa_smp2p.c
+++ b/drivers/net/ipa/ipa_smp2p.c
@@ -176,11 +176,8 @@ static int ipa_smp2p_irq_init(struct ipa_smp2p *smp2p, const char *name,
 	int ret;
 
 	ret = platform_get_irq_byname(smp2p->ipa->pdev, name);
-	if (ret <= 0) {
-		dev_err(dev, "DT error %d getting \"%s\" IRQ property\n",
-			ret, name);
+	if (ret <= 0)
 		return ret ? : -EINVAL;
-	}
 	irq = ret;
 
 	ret = request_threaded_irq(irq, NULL, handler, 0, name, smp2p);

From 3d5048cc54bd250cfbb358c37fcc011135977887 Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Thu, 10 Jun 2021 18:43:09 +0300
Subject: [PATCH 1380/3292] net: marvell: prestera: move netdev topology
 validation to prestera_main

Move handling of PRECHANGEUPPER event from prestera_switchdev to
prestera_main which is responsible for basic netdev events handling
and routing them to related module.

Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/prestera/prestera_main.c | 29 +++++++++++++++++--
 .../marvell/prestera/prestera_switchdev.c     | 20 -------------
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 2768c78528a5..767a06862662 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -508,13 +508,36 @@ struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev)
 static int prestera_netdev_port_event(struct net_device *dev,
 				      unsigned long event, void *ptr)
 {
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+	upper = info->upper_dev;
+
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
+		if (!netif_is_bridge_master(upper)) {
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+			return -EINVAL;
+		}
+
+		if (!info->linking)
+			break;
+
+		if (netdev_has_any_upper_dev(upper)) {
+			NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
+			return -EINVAL;
+		}
+		break;
+
 	case NETDEV_CHANGEUPPER:
-		return prestera_bridge_port_event(dev, event, ptr);
-	default:
-		return 0;
+		if (netif_is_bridge_master(upper))
+			return prestera_bridge_port_event(dev, event, ptr);
+		break;
 	}
+
+	return 0;
 }
 
 static int prestera_netdev_event_handler(struct notifier_block *nb,
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 6442dc411285..8e29cbb3d10e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -537,35 +537,15 @@ int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
 			       void *ptr)
 {
 	struct netdev_notifier_changeupper_info *info = ptr;
-	struct netlink_ext_ack *extack;
 	struct prestera_port *port;
 	struct net_device *upper;
 	int err;
 
-	extack = netdev_notifier_info_to_extack(&info->info);
 	port = netdev_priv(dev);
 	upper = info->upper_dev;
 
 	switch (event) {
-	case NETDEV_PRECHANGEUPPER:
-		if (!netif_is_bridge_master(upper)) {
-			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
-			return -EINVAL;
-		}
-
-		if (!info->linking)
-			break;
-
-		if (netdev_has_any_upper_dev(upper)) {
-			NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
-			return -EINVAL;
-		}
-		break;
-
 	case NETDEV_CHANGEUPPER:
-		if (!netif_is_bridge_master(upper))
-			break;
-
 		if (info->linking) {
 			err = prestera_port_bridge_join(port, upper);
 			if (err)

From 82bbaa05bf9062d085b236d4799c494b62c1c7ef Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Thu, 10 Jun 2021 18:43:10 +0300
Subject: [PATCH 1381/3292] net: marvell: prestera: do not propagate netdev
 events to prestera_switchdev.c

Replace prestera_bridge_port_event(...) by
prestera_bridge_port_join(...) and prestera_bridge_port_leave().

It simplifies the code by reading netdev event specific handling only
once in prestera_main.c

Signed-off-by: Vadym Kochan <vkochan@marvell.com>
CC: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/prestera/prestera_main.c |  9 ++++-
 .../marvell/prestera/prestera_switchdev.c     | 40 ++++---------------
 .../marvell/prestera/prestera_switchdev.h     |  7 +++-
 3 files changed, 19 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 767a06862662..bee477f44e06 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -509,6 +509,7 @@ static int prestera_netdev_port_event(struct net_device *dev,
 				      unsigned long event, void *ptr)
 {
 	struct netdev_notifier_changeupper_info *info = ptr;
+	struct prestera_port *port = netdev_priv(dev);
 	struct netlink_ext_ack *extack;
 	struct net_device *upper;
 
@@ -532,8 +533,12 @@ static int prestera_netdev_port_event(struct net_device *dev,
 		break;
 
 	case NETDEV_CHANGEUPPER:
-		if (netif_is_bridge_master(upper))
-			return prestera_bridge_port_event(dev, event, ptr);
+		if (netif_is_bridge_master(upper)) {
+			if (info->linking)
+				return prestera_bridge_port_join(upper, port);
+			else
+				prestera_bridge_port_leave(upper, port);
+		}
 		break;
 	}
 
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 8e29cbb3d10e..0afbd485a3a2 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -422,17 +422,17 @@ err_port_flood_set:
 	return err;
 }
 
-static int prestera_port_bridge_join(struct prestera_port *port,
-				     struct net_device *upper)
+int prestera_bridge_port_join(struct net_device *br_dev,
+			      struct prestera_port *port)
 {
 	struct prestera_switchdev *swdev = port->sw->swdev;
 	struct prestera_bridge_port *br_port;
 	struct prestera_bridge *bridge;
 	int err;
 
-	bridge = prestera_bridge_by_dev(swdev, upper);
+	bridge = prestera_bridge_by_dev(swdev, br_dev);
 	if (!bridge) {
-		bridge = prestera_bridge_create(swdev, upper);
+		bridge = prestera_bridge_create(swdev, br_dev);
 		if (IS_ERR(bridge))
 			return PTR_ERR(bridge);
 	}
@@ -505,14 +505,14 @@ static int prestera_port_vid_stp_set(struct prestera_port *port, u16 vid,
 	return prestera_hw_vlan_port_stp_set(port, vid, hw_state);
 }
 
-static void prestera_port_bridge_leave(struct prestera_port *port,
-				       struct net_device *upper)
+void prestera_bridge_port_leave(struct net_device *br_dev,
+				struct prestera_port *port)
 {
 	struct prestera_switchdev *swdev = port->sw->swdev;
 	struct prestera_bridge_port *br_port;
 	struct prestera_bridge *bridge;
 
-	bridge = prestera_bridge_by_dev(swdev, upper);
+	bridge = prestera_bridge_by_dev(swdev, br_dev);
 	if (!bridge)
 		return;
 
@@ -533,32 +533,6 @@ static void prestera_port_bridge_leave(struct prestera_port *port,
 	prestera_bridge_port_put(br_port);
 }
 
-int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
-			       void *ptr)
-{
-	struct netdev_notifier_changeupper_info *info = ptr;
-	struct prestera_port *port;
-	struct net_device *upper;
-	int err;
-
-	port = netdev_priv(dev);
-	upper = info->upper_dev;
-
-	switch (event) {
-	case NETDEV_CHANGEUPPER:
-		if (info->linking) {
-			err = prestera_port_bridge_join(port, upper);
-			if (err)
-				return err;
-		} else {
-			prestera_port_bridge_leave(port, upper);
-		}
-		break;
-	}
-
-	return 0;
-}
-
 static int prestera_port_attr_br_flags_set(struct prestera_port *port,
 					   struct net_device *dev,
 					   struct switchdev_brport_flags flags)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
index 606e21d2355b..a91bc35d235f 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
@@ -7,7 +7,10 @@
 int prestera_switchdev_init(struct prestera_switch *sw);
 void prestera_switchdev_fini(struct prestera_switch *sw);
 
-int prestera_bridge_port_event(struct net_device *dev, unsigned long event,
-			       void *ptr);
+int prestera_bridge_port_join(struct net_device *br_dev,
+			      struct prestera_port *port);
+
+void prestera_bridge_port_leave(struct net_device *br_dev,
+				struct prestera_port *port);
 
 #endif /* _PRESTERA_SWITCHDEV_H_ */

From 255213ca688767662a23d14f3fbf099c0e0b755d Mon Sep 17 00:00:00 2001
From: Serhiy Boiko <serhiy.boiko@plvision.eu>
Date: Thu, 10 Jun 2021 18:43:11 +0300
Subject: [PATCH 1382/3292] net: marvell: prestera: add LAG support

The following features are supported:

    - LAG basic operations
        - create/delete LAG
        - add/remove a member to LAG
        - enable/disable member in LAG
    - LAG Bridge support
    - LAG VLAN support
    - LAG FDB support

Limitations:

    - Only HASH lag tx type is supported
    - The Hash parameters are not configurable. They are applied
      during the LAG creation stage.
    - Enslaving a port to the LAG device that already has an
      upper device is not supported.

Co-developed-by: Andrii Savka <andrii.savka@plvision.eu>
Signed-off-by: Andrii Savka <andrii.savka@plvision.eu>
Signed-off-by: Serhiy Boiko <serhiy.boiko@plvision.eu>
Co-developed-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/prestera/prestera.h  |  30 ++-
 .../ethernet/marvell/prestera/prestera_hw.c   | 180 +++++++++++++-
 .../ethernet/marvell/prestera/prestera_hw.h   |  14 ++
 .../ethernet/marvell/prestera/prestera_main.c | 235 +++++++++++++++++-
 .../marvell/prestera/prestera_switchdev.c     | 103 ++++++--
 5 files changed, 531 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index 55aa4bf8a27c..ad0f33a7e517 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -60,10 +60,19 @@ struct prestera_port_caps {
 	u8 transceiver;
 };
 
+struct prestera_lag {
+	struct net_device *dev;
+	struct list_head members;
+	u16 member_count;
+	u16 lag_id;
+};
+
 struct prestera_port {
 	struct net_device *dev;
 	struct prestera_switch *sw;
 	struct devlink_port dl_port;
+	struct list_head lag_member;
+	struct prestera_lag *lag;
 	u32 id;
 	u32 hw_id;
 	u32 dev_id;
@@ -127,6 +136,12 @@ struct prestera_port_event {
 	} data;
 };
 
+enum prestera_fdb_entry_type {
+	PRESTERA_FDB_ENTRY_TYPE_REG_PORT,
+	PRESTERA_FDB_ENTRY_TYPE_LAG,
+	PRESTERA_FDB_ENTRY_TYPE_MAX
+};
+
 enum prestera_fdb_event_id {
 	PRESTERA_FDB_EVENT_UNSPEC,
 	PRESTERA_FDB_EVENT_LEARNED,
@@ -134,7 +149,11 @@ enum prestera_fdb_event_id {
 };
 
 struct prestera_fdb_event {
-	u32 port_id;
+	enum prestera_fdb_entry_type type;
+	union {
+		u32 port_id;
+		u16 lag_id;
+	} dest;
 	u32 vid;
 	union {
 		u8 mac[ETH_ALEN];
@@ -165,6 +184,9 @@ struct prestera_switch {
 	u32 mtu_min;
 	u32 mtu_max;
 	u8 id;
+	struct prestera_lag *lags;
+	u8 lag_member_max;
+	u8 lag_max;
 };
 
 struct prestera_rxtx_params {
@@ -203,4 +225,10 @@ int prestera_port_pvid_set(struct prestera_port *port, u16 vid);
 
 bool prestera_netdev_check(const struct net_device *dev);
 
+bool prestera_port_is_lag_member(const struct prestera_port *port);
+
+struct prestera_lag *prestera_lag_by_id(struct prestera_switch *sw, u16 id);
+
+u16 prestera_port_lag_id(const struct prestera_port *port);
+
 #endif /* _PRESTERA_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 96ce73b50fec..886ce251330e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -40,6 +40,11 @@ enum prestera_cmd_type_t {
 	PRESTERA_CMD_TYPE_RXTX_INIT = 0x800,
 	PRESTERA_CMD_TYPE_RXTX_PORT_INIT = 0x801,
 
+	PRESTERA_CMD_TYPE_LAG_MEMBER_ADD = 0x900,
+	PRESTERA_CMD_TYPE_LAG_MEMBER_DELETE = 0x901,
+	PRESTERA_CMD_TYPE_LAG_MEMBER_ENABLE = 0x902,
+	PRESTERA_CMD_TYPE_LAG_MEMBER_DISABLE = 0x903,
+
 	PRESTERA_CMD_TYPE_STP_PORT_SET = 0x1000,
 
 	PRESTERA_CMD_TYPE_ACK = 0x10000,
@@ -133,6 +138,12 @@ enum {
 	PRESTERA_FC_SYMM_ASYMM,
 };
 
+enum {
+	PRESTERA_HW_FDB_ENTRY_TYPE_REG_PORT = 0,
+	PRESTERA_HW_FDB_ENTRY_TYPE_LAG = 1,
+	PRESTERA_HW_FDB_ENTRY_TYPE_MAX = 2,
+};
+
 struct prestera_fw_event_handler {
 	struct list_head list;
 	struct rcu_head rcu;
@@ -174,6 +185,8 @@ struct prestera_msg_switch_init_resp {
 	u32 port_count;
 	u32 mtu_max;
 	u8  switch_id;
+	u8  lag_max;
+	u8  lag_member_max;
 };
 
 struct prestera_msg_port_autoneg_param {
@@ -261,8 +274,13 @@ struct prestera_msg_vlan_req {
 struct prestera_msg_fdb_req {
 	struct prestera_msg_cmd cmd;
 	u8 dest_type;
-	u32 port;
-	u32 dev;
+	union {
+		struct {
+			u32 port;
+			u32 dev;
+		};
+		u16 lag_id;
+	} dest;
 	u8  mac[ETH_ALEN];
 	u16 vid;
 	u8  dynamic;
@@ -305,6 +323,13 @@ struct prestera_msg_rxtx_port_req {
 	u32 dev;
 };
 
+struct prestera_msg_lag_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+	u32 dev;
+	u16 lag_id;
+};
+
 struct prestera_msg_event {
 	u16 type;
 	u16 id;
@@ -327,7 +352,10 @@ union prestera_msg_event_fdb_param {
 struct prestera_msg_event_fdb {
 	struct prestera_msg_event id;
 	u8 dest_type;
-	u32 port_id;
+	union {
+		u32 port_id;
+		u16 lag_id;
+	} dest;
 	u32 vid;
 	union prestera_msg_event_fdb_param param;
 };
@@ -398,7 +426,19 @@ static int prestera_fw_parse_fdb_evt(void *msg, struct prestera_event *evt)
 {
 	struct prestera_msg_event_fdb *hw_evt = msg;
 
-	evt->fdb_evt.port_id = hw_evt->port_id;
+	switch (hw_evt->dest_type) {
+	case PRESTERA_HW_FDB_ENTRY_TYPE_REG_PORT:
+		evt->fdb_evt.type = PRESTERA_FDB_ENTRY_TYPE_REG_PORT;
+		evt->fdb_evt.dest.port_id = hw_evt->dest.port_id;
+		break;
+	case PRESTERA_HW_FDB_ENTRY_TYPE_LAG:
+		evt->fdb_evt.type = PRESTERA_FDB_ENTRY_TYPE_LAG;
+		evt->fdb_evt.dest.lag_id = hw_evt->dest.lag_id;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	evt->fdb_evt.vid = hw_evt->vid;
 
 	ether_addr_copy(evt->fdb_evt.data.mac, hw_evt->param.mac);
@@ -543,6 +583,8 @@ int prestera_hw_switch_init(struct prestera_switch *sw)
 	sw->mtu_min = PRESTERA_MIN_MTU;
 	sw->mtu_max = resp.mtu_max;
 	sw->id = resp.switch_id;
+	sw->lag_member_max = resp.lag_member_max;
+	sw->lag_max = resp.lag_max;
 
 	return 0;
 }
@@ -1150,8 +1192,10 @@ int prestera_hw_fdb_add(struct prestera_port *port, const unsigned char *mac,
 			u16 vid, bool dynamic)
 {
 	struct prestera_msg_fdb_req req = {
-		.port = port->hw_id,
-		.dev = port->dev_id,
+		.dest = {
+			.dev = port->dev_id,
+			.port = port->hw_id,
+		},
 		.vid = vid,
 		.dynamic = dynamic,
 	};
@@ -1166,8 +1210,10 @@ int prestera_hw_fdb_del(struct prestera_port *port, const unsigned char *mac,
 			u16 vid)
 {
 	struct prestera_msg_fdb_req req = {
-		.port = port->hw_id,
-		.dev = port->dev_id,
+		.dest = {
+			.dev = port->dev_id,
+			.port = port->hw_id,
+		},
 		.vid = vid,
 	};
 
@@ -1177,11 +1223,48 @@ int prestera_hw_fdb_del(struct prestera_port *port, const unsigned char *mac,
 			    &req.cmd, sizeof(req));
 }
 
+int prestera_hw_lag_fdb_add(struct prestera_switch *sw, u16 lag_id,
+			    const unsigned char *mac, u16 vid, bool dynamic)
+{
+	struct prestera_msg_fdb_req req = {
+		.dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+		.dest = {
+			.lag_id = lag_id,
+		},
+		.vid = vid,
+		.dynamic = dynamic,
+	};
+
+	ether_addr_copy(req.mac, mac);
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_ADD,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_lag_fdb_del(struct prestera_switch *sw, u16 lag_id,
+			    const unsigned char *mac, u16 vid)
+{
+	struct prestera_msg_fdb_req req = {
+		.dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+		.dest = {
+			.lag_id = lag_id,
+		},
+		.vid = vid,
+	};
+
+	ether_addr_copy(req.mac, mac);
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
 int prestera_hw_fdb_flush_port(struct prestera_port *port, u32 mode)
 {
 	struct prestera_msg_fdb_req req = {
-		.port = port->hw_id,
-		.dev = port->dev_id,
+		.dest = {
+			.dev = port->dev_id,
+			.port = port->hw_id,
+		},
 		.flush_mode = mode,
 	};
 
@@ -1204,8 +1287,10 @@ int prestera_hw_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
 				    u32 mode)
 {
 	struct prestera_msg_fdb_req req = {
-		.port = port->hw_id,
-		.dev = port->dev_id,
+		.dest = {
+			.dev = port->dev_id,
+			.port = port->hw_id,
+		},
 		.vid = vid,
 		.flush_mode = mode,
 	};
@@ -1214,6 +1299,37 @@ int prestera_hw_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
 			    &req.cmd, sizeof(req));
 }
 
+int prestera_hw_fdb_flush_lag(struct prestera_switch *sw, u16 lag_id,
+			      u32 mode)
+{
+	struct prestera_msg_fdb_req req = {
+		.dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+		.dest = {
+			.lag_id = lag_id,
+		},
+		.flush_mode = mode,
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_fdb_flush_lag_vlan(struct prestera_switch *sw,
+				   u16 lag_id, u16 vid, u32 mode)
+{
+	struct prestera_msg_fdb_req req = {
+		.dest_type = PRESTERA_HW_FDB_ENTRY_TYPE_LAG,
+		.dest = {
+			.lag_id = lag_id,
+		},
+		.vid = vid,
+		.flush_mode = mode,
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_FDB_FLUSH_PORT_VLAN,
+			    &req.cmd, sizeof(req));
+}
+
 int prestera_hw_bridge_create(struct prestera_switch *sw, u16 *bridge_id)
 {
 	struct prestera_msg_bridge_resp resp;
@@ -1295,6 +1411,46 @@ int prestera_hw_rxtx_port_init(struct prestera_port *port)
 			    &req.cmd, sizeof(req));
 }
 
+int prestera_hw_lag_member_add(struct prestera_port *port, u16 lag_id)
+{
+	struct prestera_msg_lag_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.lag_id = lag_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_LAG_MEMBER_ADD,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_lag_member_del(struct prestera_port *port, u16 lag_id)
+{
+	struct prestera_msg_lag_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.lag_id = lag_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_LAG_MEMBER_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_lag_member_enable(struct prestera_port *port, u16 lag_id,
+				  bool enable)
+{
+	struct prestera_msg_lag_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.lag_id = lag_id,
+	};
+	u32 cmd;
+
+	cmd = enable ? PRESTERA_CMD_TYPE_LAG_MEMBER_ENABLE :
+			PRESTERA_CMD_TYPE_LAG_MEMBER_DISABLE;
+
+	return prestera_cmd(port->sw, cmd, &req.cmd, sizeof(req));
+}
+
 int prestera_hw_event_handler_register(struct prestera_switch *sw,
 				       enum prestera_event_type type,
 				       prestera_event_cb_t fn,
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index e8dd0e2b81d2..846bdc04e278 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -180,4 +180,18 @@ int prestera_hw_rxtx_init(struct prestera_switch *sw,
 			  struct prestera_rxtx_params *params);
 int prestera_hw_rxtx_port_init(struct prestera_port *port);
 
+/* LAG API */
+int prestera_hw_lag_member_add(struct prestera_port *port, u16 lag_id);
+int prestera_hw_lag_member_del(struct prestera_port *port, u16 lag_id);
+int prestera_hw_lag_member_enable(struct prestera_port *port, u16 lag_id,
+				  bool enable);
+int prestera_hw_lag_fdb_add(struct prestera_switch *sw, u16 lag_id,
+			    const unsigned char *mac, u16 vid, bool dynamic);
+int prestera_hw_lag_fdb_del(struct prestera_switch *sw, u16 lag_id,
+			    const unsigned char *mac, u16 vid);
+int prestera_hw_fdb_flush_lag(struct prestera_switch *sw, u16 lag_id,
+			      u32 mode);
+int prestera_hw_fdb_flush_lag_vlan(struct prestera_switch *sw,
+				   u16 lag_id, u16 vid, u32 mode);
+
 #endif /* _PRESTERA_HW_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index bee477f44e06..d825fbdfa86f 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -8,6 +8,7 @@
 #include <linux/netdev_features.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
+#include <linux/if_vlan.h>
 
 #include "prestera.h"
 #include "prestera_hw.h"
@@ -281,6 +282,7 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id)
 
 	INIT_LIST_HEAD(&port->vlans_list);
 	port->pvid = PRESTERA_DEFAULT_VID;
+	port->lag = NULL;
 	port->dev = dev;
 	port->id = id;
 	port->sw = sw;
@@ -472,6 +474,149 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw)
 	return prestera_hw_switch_mac_set(sw, sw->base_mac);
 }
 
+struct prestera_lag *prestera_lag_by_id(struct prestera_switch *sw, u16 id)
+{
+	return id < sw->lag_max ? &sw->lags[id] : NULL;
+}
+
+static struct prestera_lag *prestera_lag_by_dev(struct prestera_switch *sw,
+						struct net_device *dev)
+{
+	struct prestera_lag *lag;
+	u16 id;
+
+	for (id = 0; id < sw->lag_max; id++) {
+		lag = &sw->lags[id];
+		if (lag->dev == dev)
+			return lag;
+	}
+
+	return NULL;
+}
+
+static struct prestera_lag *prestera_lag_create(struct prestera_switch *sw,
+						struct net_device *lag_dev)
+{
+	struct prestera_lag *lag = NULL;
+	u16 id;
+
+	for (id = 0; id < sw->lag_max; id++) {
+		lag = &sw->lags[id];
+		if (!lag->dev)
+			break;
+	}
+	if (lag) {
+		INIT_LIST_HEAD(&lag->members);
+		lag->dev = lag_dev;
+	}
+
+	return lag;
+}
+
+static void prestera_lag_destroy(struct prestera_switch *sw,
+				 struct prestera_lag *lag)
+{
+	WARN_ON(!list_empty(&lag->members));
+	lag->member_count = 0;
+	lag->dev = NULL;
+}
+
+static int prestera_lag_port_add(struct prestera_port *port,
+				 struct net_device *lag_dev)
+{
+	struct prestera_switch *sw = port->sw;
+	struct prestera_lag *lag;
+	int err;
+
+	lag = prestera_lag_by_dev(sw, lag_dev);
+	if (!lag) {
+		lag = prestera_lag_create(sw, lag_dev);
+		if (!lag)
+			return -ENOSPC;
+	}
+
+	if (lag->member_count >= sw->lag_member_max)
+		return -ENOSPC;
+
+	err = prestera_hw_lag_member_add(port, lag->lag_id);
+	if (err) {
+		if (!lag->member_count)
+			prestera_lag_destroy(sw, lag);
+		return err;
+	}
+
+	list_add(&port->lag_member, &lag->members);
+	lag->member_count++;
+	port->lag = lag;
+
+	return 0;
+}
+
+static int prestera_lag_port_del(struct prestera_port *port)
+{
+	struct prestera_switch *sw = port->sw;
+	struct prestera_lag *lag = port->lag;
+	int err;
+
+	if (!lag || !lag->member_count)
+		return -EINVAL;
+
+	err = prestera_hw_lag_member_del(port, lag->lag_id);
+	if (err)
+		return err;
+
+	list_del(&port->lag_member);
+	lag->member_count--;
+	port->lag = NULL;
+
+	if (netif_is_bridge_port(lag->dev)) {
+		struct net_device *br_dev;
+
+		br_dev = netdev_master_upper_dev_get(lag->dev);
+
+		prestera_bridge_port_leave(br_dev, port);
+	}
+
+	if (!lag->member_count)
+		prestera_lag_destroy(sw, lag);
+
+	return 0;
+}
+
+bool prestera_port_is_lag_member(const struct prestera_port *port)
+{
+	return !!port->lag;
+}
+
+u16 prestera_port_lag_id(const struct prestera_port *port)
+{
+	return port->lag->lag_id;
+}
+
+static int prestera_lag_init(struct prestera_switch *sw)
+{
+	u16 id;
+
+	sw->lags = kcalloc(sw->lag_max, sizeof(*sw->lags), GFP_KERNEL);
+	if (!sw->lags)
+		return -ENOMEM;
+
+	for (id = 0; id < sw->lag_max; id++)
+		sw->lags[id].lag_id = id;
+
+	return 0;
+}
+
+static void prestera_lag_fini(struct prestera_switch *sw)
+{
+	u8 idx;
+
+	for (idx = 0; idx < sw->lag_max; idx++)
+		WARN_ON(sw->lags[idx].member_count);
+
+	kfree(sw->lags);
+}
+
 bool prestera_netdev_check(const struct net_device *dev)
 {
 	return dev->netdev_ops == &prestera_netdev_ops;
@@ -505,7 +650,39 @@ struct prestera_port *prestera_port_dev_lower_find(struct net_device *dev)
 	return port;
 }
 
-static int prestera_netdev_port_event(struct net_device *dev,
+static int prestera_netdev_port_lower_event(struct net_device *dev,
+					    unsigned long event, void *ptr)
+{
+	struct netdev_notifier_changelowerstate_info *info = ptr;
+	struct netdev_lag_lower_state_info *lower_state_info;
+	struct prestera_port *port = netdev_priv(dev);
+	bool enabled;
+
+	if (!netif_is_lag_port(dev))
+		return 0;
+	if (!prestera_port_is_lag_member(port))
+		return 0;
+
+	lower_state_info = info->lower_state_info;
+	enabled = lower_state_info->link_up && lower_state_info->tx_enabled;
+
+	return prestera_hw_lag_member_enable(port, port->lag->lag_id, enabled);
+}
+
+static bool prestera_lag_master_check(struct net_device *lag_dev,
+				      struct netdev_lag_upper_info *info,
+				      struct netlink_ext_ack *ext_ack)
+{
+	if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+		NL_SET_ERR_MSG_MOD(ext_ack, "Unsupported LAG Tx type");
+		return false;
+	}
+
+	return true;
+}
+
+static int prestera_netdev_port_event(struct net_device *lower,
+				      struct net_device *dev,
 				      unsigned long event, void *ptr)
 {
 	struct netdev_notifier_changeupper_info *info = ptr;
@@ -518,7 +695,8 @@ static int prestera_netdev_port_event(struct net_device *dev,
 
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
-		if (!netif_is_bridge_master(upper)) {
+		if (!netif_is_bridge_master(upper) &&
+		    !netif_is_lag_master(upper)) {
 			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
 			return -EINVAL;
 		}
@@ -530,6 +708,21 @@ static int prestera_netdev_port_event(struct net_device *dev,
 			NL_SET_ERR_MSG_MOD(extack, "Upper device is already enslaved");
 			return -EINVAL;
 		}
+
+		if (netif_is_lag_master(upper) &&
+		    !prestera_lag_master_check(upper, info->upper_info, extack))
+			return -EOPNOTSUPP;
+		if (netif_is_lag_master(upper) && vlan_uses_dev(dev)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Master device is a LAG master and port has a VLAN");
+			return -EINVAL;
+		}
+		if (netif_is_lag_port(dev) && is_vlan_dev(upper) &&
+		    !netif_is_lag_master(vlan_dev_real_dev(upper))) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Can not put a VLAN on a LAG port");
+			return -EINVAL;
+		}
 		break;
 
 	case NETDEV_CHANGEUPPER:
@@ -538,8 +731,35 @@ static int prestera_netdev_port_event(struct net_device *dev,
 				return prestera_bridge_port_join(upper, port);
 			else
 				prestera_bridge_port_leave(upper, port);
+		} else if (netif_is_lag_master(upper)) {
+			if (info->linking)
+				return prestera_lag_port_add(port, upper);
+			else
+				prestera_lag_port_del(port);
 		}
 		break;
+
+	case NETDEV_CHANGELOWERSTATE:
+		return prestera_netdev_port_lower_event(dev, event, ptr);
+	}
+
+	return 0;
+}
+
+static int prestera_netdevice_lag_event(struct net_device *lag_dev,
+					unsigned long event, void *ptr)
+{
+	struct net_device *dev;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(lag_dev, dev, iter) {
+		if (prestera_netdev_check(dev)) {
+			err = prestera_netdev_port_event(lag_dev, dev, event,
+							 ptr);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
@@ -552,7 +772,9 @@ static int prestera_netdev_event_handler(struct notifier_block *nb,
 	int err = 0;
 
 	if (prestera_netdev_check(dev))
-		err = prestera_netdev_port_event(dev, event, ptr);
+		err = prestera_netdev_port_event(dev, dev, event, ptr);
+	else if (netif_is_lag_master(dev))
+		err = prestera_netdevice_lag_event(dev, event, ptr);
 
 	return notifier_from_errno(err);
 }
@@ -606,6 +828,10 @@ static int prestera_switch_init(struct prestera_switch *sw)
 	if (err)
 		goto err_dl_register;
 
+	err = prestera_lag_init(sw);
+	if (err)
+		goto err_lag_init;
+
 	err = prestera_create_ports(sw);
 	if (err)
 		goto err_ports_create;
@@ -613,6 +839,8 @@ static int prestera_switch_init(struct prestera_switch *sw)
 	return 0;
 
 err_ports_create:
+	prestera_lag_fini(sw);
+err_lag_init:
 	prestera_devlink_unregister(sw);
 err_dl_register:
 	prestera_event_handlers_unregister(sw);
@@ -630,6 +858,7 @@ err_swdev_register:
 static void prestera_switch_fini(struct prestera_switch *sw)
 {
 	prestera_destroy_ports(sw);
+	prestera_lag_fini(sw);
 	prestera_devlink_unregister(sw);
 	prestera_event_handlers_unregister(sw);
 	prestera_rxtx_switch_fini(sw);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 0afbd485a3a2..74b81b4fbb97 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -180,6 +180,45 @@ err_port_vlan_alloc:
 	return ERR_PTR(err);
 }
 
+static int prestera_fdb_add(struct prestera_port *port,
+			    const unsigned char *mac, u16 vid, bool dynamic)
+{
+	if (prestera_port_is_lag_member(port))
+		return prestera_hw_lag_fdb_add(port->sw, prestera_port_lag_id(port),
+					      mac, vid, dynamic);
+
+	return prestera_hw_fdb_add(port, mac, vid, dynamic);
+}
+
+static int prestera_fdb_del(struct prestera_port *port,
+			    const unsigned char *mac, u16 vid)
+{
+	if (prestera_port_is_lag_member(port))
+		return prestera_hw_lag_fdb_del(port->sw, prestera_port_lag_id(port),
+					      mac, vid);
+	else
+		return prestera_hw_fdb_del(port, mac, vid);
+}
+
+static int prestera_fdb_flush_port_vlan(struct prestera_port *port, u16 vid,
+					u32 mode)
+{
+	if (prestera_port_is_lag_member(port))
+		return prestera_hw_fdb_flush_lag_vlan(port->sw, prestera_port_lag_id(port),
+						      vid, mode);
+	else
+		return prestera_hw_fdb_flush_port_vlan(port, vid, mode);
+}
+
+static int prestera_fdb_flush_port(struct prestera_port *port, u32 mode)
+{
+	if (prestera_port_is_lag_member(port))
+		return prestera_hw_fdb_flush_lag(port->sw, prestera_port_lag_id(port),
+						 mode);
+	else
+		return prestera_hw_fdb_flush_port(port, mode);
+}
+
 static void
 prestera_port_vlan_bridge_leave(struct prestera_port_vlan *port_vlan)
 {
@@ -199,11 +238,11 @@ prestera_port_vlan_bridge_leave(struct prestera_port_vlan *port_vlan)
 	last_port = port_count == 1;
 
 	if (last_vlan)
-		prestera_hw_fdb_flush_port(port, fdb_flush_mode);
+		prestera_fdb_flush_port(port, fdb_flush_mode);
 	else if (last_port)
 		prestera_hw_fdb_flush_vlan(port->sw, vid, fdb_flush_mode);
 	else
-		prestera_hw_fdb_flush_port_vlan(port, vid, fdb_flush_mode);
+		prestera_fdb_flush_port_vlan(port, vid, fdb_flush_mode);
 
 	list_del(&port_vlan->br_vlan_head);
 	prestera_bridge_vlan_put(br_vlan);
@@ -312,11 +351,29 @@ __prestera_bridge_port_by_dev(struct prestera_bridge *bridge,
 	return NULL;
 }
 
+static int prestera_match_upper_bridge_dev(struct net_device *dev,
+					   struct netdev_nested_priv *priv)
+{
+	if (netif_is_bridge_master(dev))
+		priv->data = dev;
+
+	return 0;
+}
+
+static struct net_device *prestera_get_upper_bridge_dev(struct net_device *dev)
+{
+	struct netdev_nested_priv priv = { };
+
+	netdev_walk_all_upper_dev_rcu(dev, prestera_match_upper_bridge_dev,
+				      &priv);
+	return priv.data;
+}
+
 static struct prestera_bridge_port *
 prestera_bridge_port_by_dev(struct prestera_switchdev *swdev,
 			    struct net_device *dev)
 {
-	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+	struct net_device *br_dev = prestera_get_upper_bridge_dev(dev);
 	struct prestera_bridge *bridge;
 
 	if (!br_dev)
@@ -723,9 +780,9 @@ static int prestera_port_fdb_set(struct prestera_port *port,
 		vid = bridge->bridge_id;
 
 	if (adding)
-		err = prestera_hw_fdb_add(port, fdb_info->addr, vid, false);
+		err = prestera_fdb_add(port, fdb_info->addr, vid, false);
 	else
-		err = prestera_hw_fdb_del(port, fdb_info->addr, vid);
+		err = prestera_fdb_del(port, fdb_info->addr, vid);
 
 	return err;
 }
@@ -962,15 +1019,15 @@ static int prestera_port_vlans_add(struct prestera_port *port,
 {
 	bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
-	struct net_device *dev = vlan->obj.orig_dev;
+	struct net_device *orig_dev = vlan->obj.orig_dev;
 	struct prestera_bridge_port *br_port;
 	struct prestera_switch *sw = port->sw;
 	struct prestera_bridge *bridge;
 
-	if (netif_is_bridge_master(dev))
+	if (netif_is_bridge_master(orig_dev))
 		return 0;
 
-	br_port = prestera_bridge_port_by_dev(sw->swdev, dev);
+	br_port = prestera_bridge_port_by_dev(sw->swdev, port->dev);
 	if (WARN_ON(!br_port))
 		return -EINVAL;
 
@@ -1002,14 +1059,14 @@ static int prestera_port_obj_add(struct net_device *dev,
 static int prestera_port_vlans_del(struct prestera_port *port,
 				   const struct switchdev_obj_port_vlan *vlan)
 {
-	struct net_device *dev = vlan->obj.orig_dev;
+	struct net_device *orig_dev = vlan->obj.orig_dev;
 	struct prestera_bridge_port *br_port;
 	struct prestera_switch *sw = port->sw;
 
-	if (netif_is_bridge_master(dev))
+	if (netif_is_bridge_master(orig_dev))
 		return -EOPNOTSUPP;
 
-	br_port = prestera_bridge_port_by_dev(sw->swdev, dev);
+	br_port = prestera_bridge_port_by_dev(sw->swdev, port->dev);
 	if (WARN_ON(!br_port))
 		return -EINVAL;
 
@@ -1067,10 +1124,26 @@ static void prestera_fdb_event(struct prestera_switch *sw,
 			       struct prestera_event *evt, void *arg)
 {
 	struct switchdev_notifier_fdb_info info;
+	struct net_device *dev = NULL;
 	struct prestera_port *port;
+	struct prestera_lag *lag;
 
-	port = prestera_find_port(sw, evt->fdb_evt.port_id);
-	if (!port)
+	switch (evt->fdb_evt.type) {
+	case PRESTERA_FDB_ENTRY_TYPE_REG_PORT:
+		port = prestera_find_port(sw, evt->fdb_evt.dest.port_id);
+		if (port)
+			dev = port->dev;
+		break;
+	case PRESTERA_FDB_ENTRY_TYPE_LAG:
+		lag = prestera_lag_by_id(sw, evt->fdb_evt.dest.lag_id);
+		if (lag)
+			dev = lag->dev;
+		break;
+	default:
+		return;
+	}
+
+	if (!dev)
 		return;
 
 	info.addr = evt->fdb_evt.data.mac;
@@ -1082,11 +1155,11 @@ static void prestera_fdb_event(struct prestera_switch *sw,
 	switch (evt->id) {
 	case PRESTERA_FDB_EVENT_LEARNED:
 		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
-					 port->dev, &info.info, NULL);
+					 dev, &info.info, NULL);
 		break;
 	case PRESTERA_FDB_EVENT_AGED:
 		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
-					 port->dev, &info.info, NULL);
+					 dev, &info.info, NULL);
 		break;
 	}
 

From 53f8b1b25419a14b784feb6706bfe5bac03c5a75 Mon Sep 17 00:00:00 2001
From: Cristobal Forno <cforno12@linux.ibm.com>
Date: Thu, 10 Jun 2021 11:08:35 -0600
Subject: [PATCH 1383/3292] ibmvnic: Allow device probe if the device is not
 ready at boot

Allow the device to be initialized at a later time if
it is not available at boot. The device will be allowed to probe but
will be given a "down" state. After completing device probe and
registering the net device, the driver will await an interrupt signal
from its partner device, indicating that it is ready for boot. The
driver will schedule a work event to perform the necessary procedure
and begin operation.

Co-developed-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: Cristobal Forno <cforno12@linux.ibm.com>
Acked-by: Lijun Pan <lijunp213@gmail.com>
Reviewed-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 153 ++++++++++++++++++++++++-----
 drivers/net/ethernet/ibm/ibmvnic.h |   6 +-
 2 files changed, 132 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ffd26cdc7bcd..d66e15866315 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -141,6 +141,29 @@ static const struct ibmvnic_stat ibmvnic_stats[] = {
 	{"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)},
 };
 
+static int send_crq_init_complete(struct ibmvnic_adapter *adapter)
+{
+	union ibmvnic_crq crq;
+
+	memset(&crq, 0, sizeof(crq));
+	crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
+	crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE;
+
+	return ibmvnic_send_crq(adapter, &crq);
+}
+
+static int send_version_xchg(struct ibmvnic_adapter *adapter)
+{
+	union ibmvnic_crq crq;
+
+	memset(&crq, 0, sizeof(crq));
+	crq.version_exchange.first = IBMVNIC_CRQ_CMD;
+	crq.version_exchange.cmd = VERSION_EXCHANGE;
+	crq.version_exchange.version = cpu_to_be16(ibmvnic_version);
+
+	return ibmvnic_send_crq(adapter, &crq);
+}
+
 static long h_reg_sub_crq(unsigned long unit_address, unsigned long token,
 			  unsigned long length, unsigned long *number,
 			  unsigned long *irq)
@@ -2083,10 +2106,10 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			goto out;
 		}
 
-		/* If the adapter was in PROBE state prior to the reset,
+		/* If the adapter was in PROBE or DOWN state prior to the reset,
 		 * exit here.
 		 */
-		if (reset_state == VNIC_PROBED) {
+		if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) {
 			rc = 0;
 			goto out;
 		}
@@ -2212,10 +2235,10 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
 	if (rc)
 		goto out;
 
-	/* If the adapter was in PROBE state prior to the reset,
+	/* If the adapter was in PROBE or DOWN state prior to the reset,
 	 * exit here.
 	 */
-	if (reset_state == VNIC_PROBED)
+	if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN)
 		goto out;
 
 	rc = ibmvnic_login(netdev);
@@ -2268,6 +2291,76 @@ static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
 	return rwi;
 }
 
+/**
+ * do_passive_init - complete probing when partner device is detected.
+ * @adapter: ibmvnic_adapter struct
+ *
+ * If the ibmvnic device does not have a partner device to communicate with at boot
+ * and that partner device comes online at a later time, this function is called
+ * to complete the initialization process of ibmvnic device.
+ * Caller is expected to hold rtnl_lock().
+ *
+ * Returns non-zero if sub-CRQs are not initialized properly leaving the device
+ * in the down state.
+ * Returns 0 upon success and the device is in PROBED state.
+ */
+
+static int do_passive_init(struct ibmvnic_adapter *adapter)
+{
+	unsigned long timeout = msecs_to_jiffies(30000);
+	struct net_device *netdev = adapter->netdev;
+	struct device *dev = &adapter->vdev->dev;
+	int rc;
+
+	netdev_dbg(netdev, "Partner device found, probing.\n");
+
+	adapter->state = VNIC_PROBING;
+	reinit_completion(&adapter->init_done);
+	adapter->init_done_rc = 0;
+	adapter->crq.active = true;
+
+	rc = send_crq_init_complete(adapter);
+	if (rc)
+		goto out;
+
+	rc = send_version_xchg(adapter);
+	if (rc)
+		netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc);
+
+	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
+		dev_err(dev, "Initialization sequence timed out\n");
+		rc = -ETIMEDOUT;
+		goto out;
+	}
+
+	rc = init_sub_crqs(adapter);
+	if (rc) {
+		dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc);
+		goto out;
+	}
+
+	rc = init_sub_crq_irqs(adapter);
+	if (rc) {
+		dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc);
+		goto init_failed;
+	}
+
+	netdev->mtu = adapter->req_mtu - ETH_HLEN;
+	netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
+	netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
+
+	adapter->state = VNIC_PROBED;
+	netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n");
+
+	return 0;
+
+init_failed:
+	release_sub_crqs(adapter, 1);
+out:
+	adapter->state = VNIC_DOWN;
+	return rc;
+}
+
 static void __ibmvnic_reset(struct work_struct *work)
 {
 	struct ibmvnic_rwi *rwi;
@@ -2304,7 +2397,13 @@ static void __ibmvnic_reset(struct work_struct *work)
 		}
 		spin_unlock_irqrestore(&adapter->state_lock, flags);
 
-		if (adapter->force_reset_recovery) {
+		if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) {
+			rtnl_lock();
+			rc = do_passive_init(adapter);
+			rtnl_unlock();
+			if (!rc)
+				netif_carrier_on(adapter->netdev);
+		} else if (adapter->force_reset_recovery) {
 			/* Since we are doing a hard reset now, clear the
 			 * failover_pending flag so we don't ignore any
 			 * future MOBILITY or other resets.
@@ -3773,18 +3872,6 @@ static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter)
 	return 0;
 }
 
-static int send_version_xchg(struct ibmvnic_adapter *adapter)
-{
-	union ibmvnic_crq crq;
-
-	memset(&crq, 0, sizeof(crq));
-	crq.version_exchange.first = IBMVNIC_CRQ_CMD;
-	crq.version_exchange.cmd = VERSION_EXCHANGE;
-	crq.version_exchange.version = cpu_to_be16(ibmvnic_version);
-
-	return ibmvnic_send_crq(adapter, &crq);
-}
-
 struct vnic_login_client_data {
 	u8	type;
 	__be16	len;
@@ -4904,7 +4991,12 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 				complete(&adapter->init_done);
 				adapter->init_done_rc = -EIO;
 			}
-			rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+
+			if (adapter->state == VNIC_DOWN)
+				rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT);
+			else
+				rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+
 			if (rc && rc != -EBUSY) {
 				/* We were unable to schedule the failover
 				 * reset either because the adapter was still
@@ -5327,6 +5419,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	struct ibmvnic_adapter *adapter;
 	struct net_device *netdev;
 	unsigned char *mac_addr_p;
+	bool init_success;
 	int rc;
 
 	dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n",
@@ -5373,6 +5466,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	init_completion(&adapter->stats_done);
 	clear_bit(0, &adapter->resetting);
 
+	init_success = false;
 	do {
 		rc = init_crq_queue(adapter);
 		if (rc) {
@@ -5382,10 +5476,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		}
 
 		rc = ibmvnic_reset_init(adapter, false);
-		if (rc && rc != EAGAIN)
-			goto ibmvnic_init_fail;
 	} while (rc == EAGAIN);
 
+	/* We are ignoring the error from ibmvnic_reset_init() assuming that the
+	 * partner is not ready. CRQ is not active. When the partner becomes
+	 * ready, we will do the passive init reset.
+	 */
+
+	if (!rc)
+		init_success = true;
+
 	rc = init_stats_buffers(adapter);
 	if (rc)
 		goto ibmvnic_init_fail;
@@ -5394,10 +5494,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	if (rc)
 		goto ibmvnic_stats_fail;
 
-	netdev->mtu = adapter->req_mtu - ETH_HLEN;
-	netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
-	netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
-
 	rc = device_create_file(&dev->dev, &dev_attr_failover);
 	if (rc)
 		goto ibmvnic_dev_file_err;
@@ -5410,7 +5506,14 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	}
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
-	adapter->state = VNIC_PROBED;
+	if (init_success) {
+		adapter->state = VNIC_PROBED;
+		netdev->mtu = adapter->req_mtu - ETH_HLEN;
+		netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
+		netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
+	} else {
+		adapter->state = VNIC_DOWN;
+	}
 
 	adapter->wait_for_reset = false;
 	adapter->last_reset_time = jiffies;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index c1d39a748546..22df602323bc 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -851,14 +851,16 @@ enum vnic_state {VNIC_PROBING = 1,
 		 VNIC_CLOSING,
 		 VNIC_CLOSED,
 		 VNIC_REMOVING,
-		 VNIC_REMOVED};
+		 VNIC_REMOVED,
+		 VNIC_DOWN};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
 			   VNIC_RESET_MOBILITY,
 			   VNIC_RESET_FATAL,
 			   VNIC_RESET_NON_FATAL,
 			   VNIC_RESET_TIMEOUT,
-			   VNIC_RESET_CHANGE_PARAM};
+			   VNIC_RESET_CHANGE_PARAM,
+			   VNIC_RESET_PASSIVE_INIT};
 
 struct ibmvnic_rwi {
 	enum ibmvnic_reset_reason reset_reason;

From 8cc7ebbf5f6e8ca825dba4d0180329857c997c40 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:01 -0500
Subject: [PATCH 1384/3292] net: ipa: don't assume mem array indexed by ID

Change ipa_mem_valid() to iterate over the entries using a u32 index
variable rather than using a memory region ID.  Use the ID found
inside the memory descriptor rather than the loop index.

Change ipa_mem_size_valid() to iterate over the entries but without
assuming the array index is the memory region ID.  "Empty" entries
will have zero size; and we'll temporarily assume such entries have
zero offset as well (they all do, currently).

Similarly, don't assume the mem[] array is indexed by ID in
ipa_mem_config().  There, "empty" entries will have a zero canary
count, so no special assumptions are needed to handle them correctly.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index ef9fdd3b8875..9e504ec27817 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -220,6 +220,7 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	DECLARE_BITMAP(regions, IPA_MEM_COUNT) = { };
 	struct device *dev = &ipa->pdev->dev;
 	enum ipa_mem_id mem_id;
+	u32 i;
 
 	if (mem_data->local_count > IPA_MEM_COUNT) {
 		dev_err(dev, "too many memory regions (%u > %u)\n",
@@ -227,10 +228,10 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 		return false;
 	}
 
-	for (mem_id = 0; mem_id < mem_data->local_count; mem_id++) {
-		const struct ipa_mem *mem = &mem_data->local[mem_id];
+	for (i = 0; i < mem_data->local_count; i++) {
+		const struct ipa_mem *mem = &mem_data->local[i];
 
-		if (mem_id == IPA_MEM_UNDEFINED)
+		if (mem->id == IPA_MEM_UNDEFINED)
 			continue;
 
 		if (__test_and_set_bit(mem->id, regions)) {
@@ -248,7 +249,7 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 		/* It's harmless, but warn if an offset is provided */
 		if (mem->offset)
 			dev_warn(dev, "empty region %u has non-zero offset\n",
-				 mem_id);
+				 mem->id);
 	}
 
 	/* Now see if any required regions are not defined */
@@ -268,16 +269,16 @@ static bool ipa_mem_size_valid(struct ipa *ipa)
 {
 	struct device *dev = &ipa->pdev->dev;
 	u32 limit = ipa->mem_size;
-	enum ipa_mem_id mem_id;
+	u32 i;
 
-	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
-		const struct ipa_mem *mem = &ipa->mem[mem_id];
+	for (i = 0; i < ipa->mem_count; i++) {
+		const struct ipa_mem *mem = &ipa->mem[i];
 
 		if (mem->offset + mem->size <= limit)
 			continue;
 
 		dev_err(dev, "region %u ends beyond memory limit (0x%08x)\n",
-			mem_id, limit);
+			mem->id, limit);
 
 		return false;
 	}
@@ -294,11 +295,11 @@ static bool ipa_mem_size_valid(struct ipa *ipa)
 int ipa_mem_config(struct ipa *ipa)
 {
 	struct device *dev = &ipa->pdev->dev;
-	enum ipa_mem_id mem_id;
 	dma_addr_t addr;
 	u32 mem_size;
 	void *virt;
 	u32 val;
+	u32 i;
 
 	/* Check the advertised location and size of the shared memory area */
 	val = ioread32(ipa->reg_virt + IPA_REG_SHARED_MEM_SIZE_OFFSET);
@@ -330,11 +331,11 @@ int ipa_mem_config(struct ipa *ipa)
 	ipa->zero_virt = virt;
 	ipa->zero_size = IPA_MEM_MAX;
 
-	/* For each region, write "canary" values in the space prior to
-	 * the region's base address if indicated.
+	/* For each defined region, write "canary" values in the
+	 * space prior to the region's base address if indicated.
 	 */
-	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
-		const struct ipa_mem *mem = &ipa->mem[mem_id];
+	for (i = 0; i < ipa->mem_count; i++) {
+		const struct ipa_mem *mem = &ipa->mem[i];
 		u16 canary_count;
 		__le32 *canary;
 

From ce05a9f39607623362aea9147c3dfab7a9f94ccb Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:02 -0500
Subject: [PATCH 1385/3292] net: ipa: clean up header memory validation

Do some general cleanup in ipa_cmd_header_valid():
  - Delay assigning the mem variable until just before it's used.
  - Assign the maximum offset and size values together.
  - Improve comments explaining the single range of memory being
    made up of a modem portion and an AP portion.
  - Record the offset of the combined range in a local variable.
  - Do the initial size assignment right after assigning the offset.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_cmd.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index 525cdf28d9ea..3e5f10d3c131 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -200,41 +200,53 @@ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
 /* Validate the memory region that holds headers */
 static bool ipa_cmd_header_valid(struct ipa *ipa)
 {
-	const struct ipa_mem *mem = &ipa->mem[IPA_MEM_MODEM_HEADER];
 	struct device *dev = &ipa->pdev->dev;
+	const struct ipa_mem *mem;
 	u32 offset_max;
 	u32 size_max;
+	u32 offset;
 	u32 size;
 
-	/* In ipa_cmd_hdr_init_local_add() we record the offset and size
-	 * of the header table memory area.  Make sure the offset and size
-	 * fit in the fields that need to hold them, and that the entire
-	 * range is within the overall IPA memory range.
+	/* In ipa_cmd_hdr_init_local_add() we record the offset and size of
+	 * the header table memory area in an immediate command.  Make sure
+	 * the offset and size fit in the fields that need to hold them, and
+	 * that the entire range is within the overall IPA memory range.
 	 */
 	offset_max = field_max(HDR_INIT_LOCAL_FLAGS_HDR_ADDR_FMASK);
-	if (mem->offset > offset_max ||
-	    ipa->mem_offset > offset_max - mem->offset) {
+	size_max = field_max(HDR_INIT_LOCAL_FLAGS_TABLE_SIZE_FMASK);
+
+	/* The header memory area contains both the modem and AP header
+	 * regions.  The modem portion defines the address of the region.
+	 */
+	mem = &ipa->mem[IPA_MEM_MODEM_HEADER];
+	offset = mem->offset;
+	size = mem->size;
+
+	/* Make sure the offset fits in the IPA command */
+	if (offset > offset_max || ipa->mem_offset > offset_max - offset) {
 		dev_err(dev, "header table region offset too large\n");
 		dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
-			ipa->mem_offset, mem->offset, offset_max);
+			ipa->mem_offset, offset, offset_max);
 
 		return false;
 	}
 
-	size_max = field_max(HDR_INIT_LOCAL_FLAGS_TABLE_SIZE_FMASK);
-	size = ipa->mem[IPA_MEM_MODEM_HEADER].size;
+	/* Add the size of the AP portion to the combined size */
 	size += ipa->mem[IPA_MEM_AP_HEADER].size;
 
+	/* Make sure the combined size fits in the IPA command */
 	if (size > size_max) {
 		dev_err(dev, "header table region size too large\n");
 		dev_err(dev, "    (0x%04x > 0x%08x)\n", size, size_max);
 
 		return false;
 	}
-	if (size > ipa->mem_size || mem->offset > ipa->mem_size - size) {
+
+	/* Make sure the entire combined area fits in IPA memory */
+	if (size > ipa->mem_size || offset > ipa->mem_size - size) {
 		dev_err(dev, "header table region out of range\n");
 		dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
-			mem->offset, size, ipa->mem_size);
+			offset, size, ipa->mem_size);
 
 		return false;
 	}

From 07c525a62a4db207e298064b026b8f3f8da192a6 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:03 -0500
Subject: [PATCH 1386/3292] net: ipa: pass mem_id to ipa_filter_reset_table()

Pass a memory region ID rather than the address of a memory region
descriptor to ipa_filter_reset_table(), to simplify callers.

We can eliminate the check for a zero region size in this function
because ipa_table_reset_add() checks that before adding anything to
the transaction.

Note that here and in subsequent commits there is no need to check
whether a memory region exists, because we will have already
verified that during initialization.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_table.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 3168d72f4245..95a4c2aceb01 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -282,16 +282,14 @@ static void ipa_table_reset_add(struct gsi_trans *trans, bool filter,
  * for the IPv4 and IPv6 non-hashed and hashed filter tables.
  */
 static int
-ipa_filter_reset_table(struct ipa *ipa, const struct ipa_mem *mem, bool modem)
+ipa_filter_reset_table(struct ipa *ipa, enum ipa_mem_id mem_id, bool modem)
 {
+	const struct ipa_mem *mem = &ipa->mem[mem_id];
 	u32 ep_mask = ipa->filter_map;
 	u32 count = hweight32(ep_mask);
 	struct gsi_trans *trans;
 	enum gsi_ee_id ee_id;
 
-	if (!mem->size)
-		return 0;
-
 	trans = ipa_cmd_trans_alloc(ipa, count);
 	if (!trans) {
 		dev_err(&ipa->pdev->dev,
@@ -327,20 +325,18 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
 {
 	int ret;
 
-	ret = ipa_filter_reset_table(ipa, &ipa->mem[IPA_MEM_V4_FILTER], modem);
+	ret = ipa_filter_reset_table(ipa, IPA_MEM_V4_FILTER, modem);
 	if (ret)
 		return ret;
 
-	ret = ipa_filter_reset_table(ipa, &ipa->mem[IPA_MEM_V4_FILTER_HASHED],
-				     modem);
+	ret = ipa_filter_reset_table(ipa, IPA_MEM_V4_FILTER_HASHED, modem);
 	if (ret)
 		return ret;
 
-	ret = ipa_filter_reset_table(ipa, &ipa->mem[IPA_MEM_V6_FILTER], modem);
+	ret = ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER, modem);
 	if (ret)
 		return ret;
-	ret = ipa_filter_reset_table(ipa, &ipa->mem[IPA_MEM_V6_FILTER_HASHED],
-				     modem);
+	ret = ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER_HASHED, modem);
 
 	return ret;
 }

From ce928bf8fec070b2239ede568687bd878032b325 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:04 -0500
Subject: [PATCH 1387/3292] net: ipa: pass mem ID to ipa_mem_zero_region_add()

Pass a memory region ID rather than the address of a memory region
descriptor to ipa_mem_zero_region_add() to simplify callers.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 9e504ec27817..7df5496bdc2e 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -28,9 +28,10 @@
 
 /* Add an immediate command to a transaction that zeroes a memory region */
 static void
-ipa_mem_zero_region_add(struct gsi_trans *trans, const struct ipa_mem *mem)
+ipa_mem_zero_region_add(struct gsi_trans *trans, enum ipa_mem_id mem_id)
 {
 	struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi);
+	const struct ipa_mem *mem = &ipa->mem[mem_id];
 	dma_addr_t addr = ipa->zero_addr;
 
 	if (!mem->size)
@@ -83,11 +84,9 @@ int ipa_mem_setup(struct ipa *ipa)
 
 	ipa_cmd_hdr_init_local_add(trans, offset, size, addr);
 
-	ipa_mem_zero_region_add(trans, &ipa->mem[IPA_MEM_MODEM_PROC_CTX]);
-
-	ipa_mem_zero_region_add(trans, &ipa->mem[IPA_MEM_AP_PROC_CTX]);
-
-	ipa_mem_zero_region_add(trans, &ipa->mem[IPA_MEM_MODEM]);
+	ipa_mem_zero_region_add(trans, IPA_MEM_MODEM_PROC_CTX);
+	ipa_mem_zero_region_add(trans, IPA_MEM_AP_PROC_CTX);
+	ipa_mem_zero_region_add(trans, IPA_MEM_MODEM);
 
 	gsi_trans_commit_wait(trans);
 
@@ -411,11 +410,9 @@ int ipa_mem_zero_modem(struct ipa *ipa)
 		return -EBUSY;
 	}
 
-	ipa_mem_zero_region_add(trans, &ipa->mem[IPA_MEM_MODEM_HEADER]);
-
-	ipa_mem_zero_region_add(trans, &ipa->mem[IPA_MEM_MODEM_PROC_CTX]);
-
-	ipa_mem_zero_region_add(trans, &ipa->mem[IPA_MEM_MODEM]);
+	ipa_mem_zero_region_add(trans, IPA_MEM_MODEM_HEADER);
+	ipa_mem_zero_region_add(trans, IPA_MEM_MODEM_PROC_CTX);
+	ipa_mem_zero_region_add(trans, IPA_MEM_MODEM);
 
 	gsi_trans_commit_wait(trans);
 

From 25116645dbcbd67dc7f1535d395aa3611e31ba88 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:05 -0500
Subject: [PATCH 1388/3292] net: ipa: pass mem_id to ipa_table_reset_add()

Pass a memory region ID rather than the address of a memory region
descriptor to ipa_table_reset_add() to simplify callers.  Similarly,
pass memory region IDs to ipa_table_init_add().

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_table.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 95a4c2aceb01..f7ee75bfba74 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -256,14 +256,15 @@ static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count)
 }
 
 static void ipa_table_reset_add(struct gsi_trans *trans, bool filter,
-				u16 first, u16 count, const struct ipa_mem *mem)
+				u16 first, u16 count, enum ipa_mem_id mem_id)
 {
 	struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi);
+	const struct ipa_mem *mem = &ipa->mem[mem_id];
 	dma_addr_t addr;
 	u32 offset;
 	u16 size;
 
-	/* Nothing to do if the table memory regions is empty */
+	/* Nothing to do if the table memory region is empty */
 	if (!mem->size)
 		return;
 
@@ -284,7 +285,6 @@ static void ipa_table_reset_add(struct gsi_trans *trans, bool filter,
 static int
 ipa_filter_reset_table(struct ipa *ipa, enum ipa_mem_id mem_id, bool modem)
 {
-	const struct ipa_mem *mem = &ipa->mem[mem_id];
 	u32 ep_mask = ipa->filter_map;
 	u32 count = hweight32(ep_mask);
 	struct gsi_trans *trans;
@@ -309,7 +309,7 @@ ipa_filter_reset_table(struct ipa *ipa, enum ipa_mem_id mem_id, bool modem)
 		if (endpoint->ee_id != ee_id)
 			continue;
 
-		ipa_table_reset_add(trans, true, endpoint_id, 1, mem);
+		ipa_table_reset_add(trans, true, endpoint_id, 1, mem_id);
 	}
 
 	gsi_trans_commit_wait(trans);
@@ -367,15 +367,13 @@ static int ipa_route_reset(struct ipa *ipa, bool modem)
 		count = IPA_ROUTE_AP_COUNT;
 	}
 
+	ipa_table_reset_add(trans, false, first, count, IPA_MEM_V4_ROUTE);
 	ipa_table_reset_add(trans, false, first, count,
-			    &ipa->mem[IPA_MEM_V4_ROUTE]);
-	ipa_table_reset_add(trans, false, first, count,
-			    &ipa->mem[IPA_MEM_V4_ROUTE_HASHED]);
+			    IPA_MEM_V4_ROUTE_HASHED);
 
+	ipa_table_reset_add(trans, false, first, count, IPA_MEM_V6_ROUTE);
 	ipa_table_reset_add(trans, false, first, count,
-			    &ipa->mem[IPA_MEM_V6_ROUTE]);
-	ipa_table_reset_add(trans, false, first, count,
-			    &ipa->mem[IPA_MEM_V6_ROUTE_HASHED]);
+			    IPA_MEM_V6_ROUTE_HASHED);
 
 	gsi_trans_commit_wait(trans);
 
@@ -429,10 +427,12 @@ int ipa_table_hash_flush(struct ipa *ipa)
 
 static void ipa_table_init_add(struct gsi_trans *trans, bool filter,
 			       enum ipa_cmd_opcode opcode,
-			       const struct ipa_mem *mem,
-			       const struct ipa_mem *hash_mem)
+			       enum ipa_mem_id mem_id,
+			       enum ipa_mem_id hash_mem_id)
 {
 	struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi);
+	const struct ipa_mem *hash_mem = &ipa->mem[hash_mem_id];
+	const struct ipa_mem *mem = &ipa->mem[mem_id];
 	dma_addr_t hash_addr;
 	dma_addr_t addr;
 	u16 hash_count;
@@ -473,20 +473,16 @@ int ipa_table_setup(struct ipa *ipa)
 	}
 
 	ipa_table_init_add(trans, false, IPA_CMD_IP_V4_ROUTING_INIT,
-			   &ipa->mem[IPA_MEM_V4_ROUTE],
-			   &ipa->mem[IPA_MEM_V4_ROUTE_HASHED]);
+			   IPA_MEM_V4_ROUTE, IPA_MEM_V4_ROUTE_HASHED);
 
 	ipa_table_init_add(trans, false, IPA_CMD_IP_V6_ROUTING_INIT,
-			   &ipa->mem[IPA_MEM_V6_ROUTE],
-			   &ipa->mem[IPA_MEM_V6_ROUTE_HASHED]);
+			   IPA_MEM_V6_ROUTE, IPA_MEM_V6_ROUTE_HASHED);
 
 	ipa_table_init_add(trans, true, IPA_CMD_IP_V4_FILTER_INIT,
-			   &ipa->mem[IPA_MEM_V4_FILTER],
-			   &ipa->mem[IPA_MEM_V4_FILTER_HASHED]);
+			   IPA_MEM_V4_FILTER, IPA_MEM_V4_FILTER_HASHED);
 
 	ipa_table_init_add(trans, true, IPA_CMD_IP_V6_FILTER_INIT,
-			   &ipa->mem[IPA_MEM_V6_FILTER],
-			   &ipa->mem[IPA_MEM_V6_FILTER_HASHED]);
+			   IPA_MEM_V6_FILTER, IPA_MEM_V6_FILTER_HASHED);
 
 	gsi_trans_commit_wait(trans);
 

From e9f5b2766e706f3020b3d975fee3b42d056b0849 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:06 -0500
Subject: [PATCH 1389/3292] net: ipa: pass memory id to ipa_table_valid_one()

Stop passing most of the Boolean flags to ipa_table_valid_one(), and
just pass a memory region ID to it instead.  We still need to
indicate whether we're operating on a routing or filter table.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_table.c | 44 +++++++++++++------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index f7ee75bfba74..679855b1d549 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -150,29 +150,16 @@ static void ipa_table_validate_build(void)
 }
 
 static bool
-ipa_table_valid_one(struct ipa *ipa, bool route, bool ipv6, bool hashed)
+ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route)
 {
+	const struct ipa_mem *mem = &ipa->mem[mem_id];
 	struct device *dev = &ipa->pdev->dev;
-	const struct ipa_mem *mem;
 	u32 size;
 
-	if (route) {
-		if (ipv6)
-			mem = hashed ? &ipa->mem[IPA_MEM_V6_ROUTE_HASHED]
-				     : &ipa->mem[IPA_MEM_V6_ROUTE];
-		else
-			mem = hashed ? &ipa->mem[IPA_MEM_V4_ROUTE_HASHED]
-				     : &ipa->mem[IPA_MEM_V4_ROUTE];
+	if (route)
 		size = IPA_ROUTE_COUNT_MAX * sizeof(__le64);
-	} else {
-		if (ipv6)
-			mem = hashed ? &ipa->mem[IPA_MEM_V6_FILTER_HASHED]
-				     : &ipa->mem[IPA_MEM_V6_FILTER];
-		else
-			mem = hashed ? &ipa->mem[IPA_MEM_V4_FILTER_HASHED]
-				     : &ipa->mem[IPA_MEM_V4_FILTER];
+	else
 		size = (1 + IPA_FILTER_COUNT_MAX) * sizeof(__le64);
-	}
 
 	if (!ipa_cmd_table_valid(ipa, mem, route, ipv6, hashed))
 		return false;
@@ -185,9 +172,8 @@ ipa_table_valid_one(struct ipa *ipa, bool route, bool ipv6, bool hashed)
 	if (hashed && !mem->size)
 		return true;
 
-	dev_err(dev, "IPv%c %s%s table region size 0x%02x, expected 0x%02x\n",
-		ipv6 ? '6' : '4', hashed ? "hashed " : "",
-		route ? "route" : "filter", mem->size, size);
+	dev_err(dev, "%s table region %u size 0x%02x, expected 0x%02x\n",
+		route ? "route" : "filter", mem_id, mem->size, size);
 
 	return false;
 }
@@ -195,16 +181,16 @@ ipa_table_valid_one(struct ipa *ipa, bool route, bool ipv6, bool hashed)
 /* Verify the filter and route table memory regions are the expected size */
 bool ipa_table_valid(struct ipa *ipa)
 {
-	bool valid = true;
+	bool valid;
 
-	valid = valid && ipa_table_valid_one(ipa, false, false, false);
-	valid = valid && ipa_table_valid_one(ipa, false, false, true);
-	valid = valid && ipa_table_valid_one(ipa, false, true, false);
-	valid = valid && ipa_table_valid_one(ipa, false, true, true);
-	valid = valid && ipa_table_valid_one(ipa, true, false, false);
-	valid = valid && ipa_table_valid_one(ipa, true, false, true);
-	valid = valid && ipa_table_valid_one(ipa, true, true, false);
-	valid = valid && ipa_table_valid_one(ipa, true, true, true);
+	valid = ipa_table_valid_one(IPA_MEM_V4_FILTER, false);
+	valid = valid && ipa_table_valid_one(IPA_MEM_V4_FILTER_HASHED, false);
+	valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER, false);
+	valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER_HASHED, false);
+	valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE, true);
+	valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE_HASHED, true);
+	valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE, true);
+	valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE_HASHED, true);
 
 	return valid;
 }

From 5e3bc1e5d0021c2efcbc8ba7da7b96c6a502d8bf Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:07 -0500
Subject: [PATCH 1390/3292] net: ipa: introduce ipa_mem_find()

Introduce a new function that abstracts finding information about a
region in IPA-local memory, given its memory region ID.  For now it
simply uses the region ID as an index into the IPA memory array.
If the region is not defined, ipa_mem_find() returns a null pointer.

Update all code that accesses the ipa->mem[] array directly to use
ipa_mem_find() instead.  The return value must be checked for null
when optional memory regions are sought.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_cmd.c   |  8 +++++---
 drivers/net/ipa/ipa_mem.c   | 38 ++++++++++++++++++++++++++-----------
 drivers/net/ipa/ipa_mem.h   |  2 ++
 drivers/net/ipa/ipa_qmi.c   | 32 +++++++++++++++----------------
 drivers/net/ipa/ipa_table.c |  8 ++++----
 drivers/net/ipa/ipa_uc.c    |  3 ++-
 6 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index 3e5f10d3c131..af44ca41189e 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -218,7 +218,7 @@ static bool ipa_cmd_header_valid(struct ipa *ipa)
 	/* The header memory area contains both the modem and AP header
 	 * regions.  The modem portion defines the address of the region.
 	 */
-	mem = &ipa->mem[IPA_MEM_MODEM_HEADER];
+	mem = ipa_mem_find(ipa, IPA_MEM_MODEM_HEADER);
 	offset = mem->offset;
 	size = mem->size;
 
@@ -231,8 +231,10 @@ static bool ipa_cmd_header_valid(struct ipa *ipa)
 		return false;
 	}
 
-	/* Add the size of the AP portion to the combined size */
-	size += ipa->mem[IPA_MEM_AP_HEADER].size;
+	/* Add the size of the AP portion (if defined) to the combined size */
+	mem = ipa_mem_find(ipa, IPA_MEM_AP_HEADER);
+	if (mem)
+		size += mem->size;
 
 	/* Make sure the combined size fits in the IPA command */
 	if (size > size_max) {
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 7df5496bdc2e..633895fc67b6 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -26,12 +26,20 @@
 /* SMEM host id representing the modem. */
 #define QCOM_SMEM_HOST_MODEM	1
 
+const struct ipa_mem *ipa_mem_find(struct ipa *ipa, enum ipa_mem_id mem_id)
+{
+	if (mem_id < IPA_MEM_COUNT)
+		return &ipa->mem[mem_id];
+
+	return NULL;
+}
+
 /* Add an immediate command to a transaction that zeroes a memory region */
 static void
 ipa_mem_zero_region_add(struct gsi_trans *trans, enum ipa_mem_id mem_id)
 {
 	struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi);
-	const struct ipa_mem *mem = &ipa->mem[mem_id];
+	const struct ipa_mem *mem = ipa_mem_find(ipa, mem_id);
 	dma_addr_t addr = ipa->zero_addr;
 
 	if (!mem->size)
@@ -61,6 +69,7 @@ ipa_mem_zero_region_add(struct gsi_trans *trans, enum ipa_mem_id mem_id)
 int ipa_mem_setup(struct ipa *ipa)
 {
 	dma_addr_t addr = ipa->zero_addr;
+	const struct ipa_mem *mem;
 	struct gsi_trans *trans;
 	u32 offset;
 	u16 size;
@@ -75,12 +84,16 @@ int ipa_mem_setup(struct ipa *ipa)
 		return -EBUSY;
 	}
 
-	/* Initialize IPA-local header memory.  The modem and AP header
-	 * regions are contiguous, and initialized together.
+	/* Initialize IPA-local header memory.  The AP header region, if
+	 * present, is contiguous with and follows the modem header region,
+	 * and they are initialized together.
 	 */
-	offset = ipa->mem[IPA_MEM_MODEM_HEADER].offset;
-	size = ipa->mem[IPA_MEM_MODEM_HEADER].size;
-	size += ipa->mem[IPA_MEM_AP_HEADER].size;
+	mem = ipa_mem_find(ipa, IPA_MEM_MODEM_HEADER);
+	offset = mem->offset;
+	size = mem->size;
+	mem = ipa_mem_find(ipa, IPA_MEM_AP_HEADER);
+	if (mem)
+		size += mem->size;
 
 	ipa_cmd_hdr_init_local_add(trans, offset, size, addr);
 
@@ -91,7 +104,8 @@ int ipa_mem_setup(struct ipa *ipa)
 	gsi_trans_commit_wait(trans);
 
 	/* Tell the hardware where the processing context area is located */
-	offset = ipa->mem_offset + ipa->mem[IPA_MEM_MODEM_PROC_CTX].offset;
+	mem = ipa_mem_find(ipa, IPA_MEM_MODEM_PROC_CTX);
+	offset = ipa->mem_offset + mem->offset;
 	val = proc_cntxt_base_addr_encoded(ipa->version, offset);
 	iowrite32(val, ipa->reg_virt + IPA_REG_LOCAL_PKT_PROC_CNTXT_OFFSET);
 
@@ -294,6 +308,7 @@ static bool ipa_mem_size_valid(struct ipa *ipa)
 int ipa_mem_config(struct ipa *ipa)
 {
 	struct device *dev = &ipa->pdev->dev;
+	const struct ipa_mem *mem;
 	dma_addr_t addr;
 	u32 mem_size;
 	void *virt;
@@ -334,11 +349,11 @@ int ipa_mem_config(struct ipa *ipa)
 	 * space prior to the region's base address if indicated.
 	 */
 	for (i = 0; i < ipa->mem_count; i++) {
-		const struct ipa_mem *mem = &ipa->mem[i];
 		u16 canary_count;
 		__le32 *canary;
 
 		/* Skip over undefined regions */
+		mem = &ipa->mem[i];
 		if (!mem->offset && !mem->size)
 			continue;
 
@@ -361,8 +376,9 @@ int ipa_mem_config(struct ipa *ipa)
 	if (!ipa_cmd_data_valid(ipa))
 		goto err_dma_free;
 
-	/* Verify the microcontroller ring alignment (0 is OK too) */
-	if (ipa->mem[IPA_MEM_UC_EVENT_RING].offset % 1024) {
+	/* Verify the microcontroller ring alignment (if defined) */
+	mem = ipa_mem_find(ipa, IPA_MEM_UC_EVENT_RING);
+	if (mem && mem->offset % 1024) {
 		dev_err(dev, "microcontroller ring not 1024-byte aligned\n");
 		goto err_dma_free;
 	}
@@ -527,7 +543,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
 	 * (in this case, the modem).  An allocation from SMEM is persistent
 	 * until the AP reboots; there is no way to free an allocated SMEM
 	 * region.  Allocation only reserves the space; to use it you need
-	 * to "get" a pointer it (this implies no reference counting).
+	 * to "get" a pointer it (this does not imply reference counting).
 	 * The item might have already been allocated, in which case we
 	 * use it unless the size isn't what we expect.
 	 */
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index effe01f7310a..712b2881be0c 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -90,6 +90,8 @@ struct ipa_mem {
 	u16 canary_count;
 };
 
+const struct ipa_mem *ipa_mem_find(struct ipa *ipa, enum ipa_mem_id mem_id);
+
 int ipa_mem_config(struct ipa *ipa);
 void ipa_mem_deconfig(struct ipa *ipa);
 
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index 593665efbcf9..4661105ce7ab 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -298,32 +298,32 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 	req.platform_type_valid = 1;
 	req.platform_type = IPA_QMI_PLATFORM_TYPE_MSM_ANDROID;
 
-	mem = &ipa->mem[IPA_MEM_MODEM_HEADER];
+	mem = ipa_mem_find(ipa, IPA_MEM_MODEM_HEADER);
 	if (mem->size) {
 		req.hdr_tbl_info_valid = 1;
 		req.hdr_tbl_info.start = ipa->mem_offset + mem->offset;
 		req.hdr_tbl_info.end = req.hdr_tbl_info.start + mem->size - 1;
 	}
 
-	mem = &ipa->mem[IPA_MEM_V4_ROUTE];
+	mem = ipa_mem_find(ipa, IPA_MEM_V4_ROUTE);
 	req.v4_route_tbl_info_valid = 1;
 	req.v4_route_tbl_info.start = ipa->mem_offset + mem->offset;
 	req.v4_route_tbl_info.count = mem->size / sizeof(__le64);
 
-	mem = &ipa->mem[IPA_MEM_V6_ROUTE];
+	mem = ipa_mem_find(ipa, IPA_MEM_V6_ROUTE);
 	req.v6_route_tbl_info_valid = 1;
 	req.v6_route_tbl_info.start = ipa->mem_offset + mem->offset;
 	req.v6_route_tbl_info.count = mem->size / sizeof(__le64);
 
-	mem = &ipa->mem[IPA_MEM_V4_FILTER];
+	mem = ipa_mem_find(ipa, IPA_MEM_V4_FILTER);
 	req.v4_filter_tbl_start_valid = 1;
 	req.v4_filter_tbl_start = ipa->mem_offset + mem->offset;
 
-	mem = &ipa->mem[IPA_MEM_V6_FILTER];
+	mem = ipa_mem_find(ipa, IPA_MEM_V6_FILTER);
 	req.v6_filter_tbl_start_valid = 1;
 	req.v6_filter_tbl_start = ipa->mem_offset + mem->offset;
 
-	mem = &ipa->mem[IPA_MEM_MODEM];
+	mem = ipa_mem_find(ipa, IPA_MEM_MODEM);
 	if (mem->size) {
 		req.modem_mem_info_valid = 1;
 		req.modem_mem_info.start = ipa->mem_offset + mem->offset;
@@ -336,7 +336,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 
 	/* skip_uc_load_valid and skip_uc_load are set above */
 
-	mem = &ipa->mem[IPA_MEM_MODEM_PROC_CTX];
+	mem = ipa_mem_find(ipa, IPA_MEM_MODEM_PROC_CTX);
 	if (mem->size) {
 		req.hdr_proc_ctx_tbl_info_valid = 1;
 		req.hdr_proc_ctx_tbl_info.start =
@@ -347,7 +347,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 
 	/* Nothing to report for the compression table (zip_tbl_info) */
 
-	mem = &ipa->mem[IPA_MEM_V4_ROUTE_HASHED];
+	mem = ipa_mem_find(ipa, IPA_MEM_V4_ROUTE_HASHED);
 	if (mem->size) {
 		req.v4_hash_route_tbl_info_valid = 1;
 		req.v4_hash_route_tbl_info.start =
@@ -355,7 +355,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 		req.v4_hash_route_tbl_info.count = mem->size / sizeof(__le64);
 	}
 
-	mem = &ipa->mem[IPA_MEM_V6_ROUTE_HASHED];
+	mem = ipa_mem_find(ipa, IPA_MEM_V6_ROUTE_HASHED);
 	if (mem->size) {
 		req.v6_hash_route_tbl_info_valid = 1;
 		req.v6_hash_route_tbl_info.start =
@@ -363,22 +363,21 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 		req.v6_hash_route_tbl_info.count = mem->size / sizeof(__le64);
 	}
 
-	mem = &ipa->mem[IPA_MEM_V4_FILTER_HASHED];
+	mem = ipa_mem_find(ipa, IPA_MEM_V4_FILTER_HASHED);
 	if (mem->size) {
 		req.v4_hash_filter_tbl_start_valid = 1;
 		req.v4_hash_filter_tbl_start = ipa->mem_offset + mem->offset;
 	}
 
-	mem = &ipa->mem[IPA_MEM_V6_FILTER_HASHED];
+	mem = ipa_mem_find(ipa, IPA_MEM_V6_FILTER_HASHED);
 	if (mem->size) {
 		req.v6_hash_filter_tbl_start_valid = 1;
 		req.v6_hash_filter_tbl_start = ipa->mem_offset + mem->offset;
 	}
 
-	/* None of the stats fields are valid (IPA v4.0 and above) */
-
+	/* The stats fields are only valid for IPA v4.0+ */
 	if (ipa->version >= IPA_VERSION_4_0) {
-		mem = &ipa->mem[IPA_MEM_STATS_QUOTA_MODEM];
+		mem = ipa_mem_find(ipa, IPA_MEM_STATS_QUOTA_MODEM);
 		if (mem->size) {
 			req.hw_stats_quota_base_addr_valid = 1;
 			req.hw_stats_quota_base_addr =
@@ -387,8 +386,9 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 			req.hw_stats_quota_size = ipa->mem_offset + mem->size;
 		}
 
-		mem = &ipa->mem[IPA_MEM_STATS_DROP];
-		if (mem->size) {
+		/* If the DROP stats region is defined, include it */
+		mem = ipa_mem_find(ipa, IPA_MEM_STATS_DROP);
+		if (mem && mem->size) {
 			req.hw_stats_drop_base_addr_valid = 1;
 			req.hw_stats_drop_base_addr =
 				ipa->mem_offset + mem->offset;
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 679855b1d549..c617a9156f26 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -152,7 +152,7 @@ static void ipa_table_validate_build(void)
 static bool
 ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route)
 {
-	const struct ipa_mem *mem = &ipa->mem[mem_id];
+	const struct ipa_mem *mem = ipa_mem_find(ipa, mem_id);
 	struct device *dev = &ipa->pdev->dev;
 	u32 size;
 
@@ -245,7 +245,7 @@ static void ipa_table_reset_add(struct gsi_trans *trans, bool filter,
 				u16 first, u16 count, enum ipa_mem_id mem_id)
 {
 	struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi);
-	const struct ipa_mem *mem = &ipa->mem[mem_id];
+	const struct ipa_mem *mem = ipa_mem_find(ipa, mem_id);
 	dma_addr_t addr;
 	u32 offset;
 	u16 size;
@@ -417,8 +417,8 @@ static void ipa_table_init_add(struct gsi_trans *trans, bool filter,
 			       enum ipa_mem_id hash_mem_id)
 {
 	struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi);
-	const struct ipa_mem *hash_mem = &ipa->mem[hash_mem_id];
-	const struct ipa_mem *mem = &ipa->mem[mem_id];
+	const struct ipa_mem *hash_mem = ipa_mem_find(ipa, hash_mem_id);
+	const struct ipa_mem *mem = ipa_mem_find(ipa, mem_id);
 	dma_addr_t hash_addr;
 	dma_addr_t addr;
 	u16 hash_count;
diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c
index 2756363e6938..fd9219863234 100644
--- a/drivers/net/ipa/ipa_uc.c
+++ b/drivers/net/ipa/ipa_uc.c
@@ -116,7 +116,8 @@ enum ipa_uc_event {
 
 static struct ipa_uc_mem_area *ipa_uc_shared(struct ipa *ipa)
 {
-	u32 offset = ipa->mem_offset + ipa->mem[IPA_MEM_UC_SHARED].offset;
+	const struct ipa_mem *mem = ipa_mem_find(ipa, IPA_MEM_UC_SHARED);
+	u32 offset = ipa->mem_offset + mem->offset;
 
 	return ipa->mem_virt + offset;
 }

From c61cfb941dcd8c0529a0c0be31bb1722feaa6082 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 10 Jun 2021 14:23:08 -0500
Subject: [PATCH 1391/3292] net: ipa: don't index mem data array by ID

Finally the code handles the IPA memory region array in the
configuration data without assuming it is indexed by region ID.
Get rid of the array index designators where these arrays are
initialized.  As a result, there's no more need to define an
explicitly undefined memory region ID, so get rid of that.

Change ipa_mem_find() so it no longer assumes the ipa->mem[] array
is indexed by memory region ID.  Instead, have it search the array
for the entry having the requested memory ID, and return the address
of the descriptor if found.  Otherwise return NULL.

Stop allowing memory regions to be defined with zero size and zero
canary value.  Check for this condition in ipa_mem_valid_one().
As a result, it is not necessary to check for this case in
ipa_mem_config().

Finally, there is no need for IPA_MEM_UNDEFINED to be defined any
more, so get rid of it.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-v3.5.1.c | 30 ++++++++++----------
 drivers/net/ipa/ipa_data-v4.11.c  | 44 ++++++++++++++---------------
 drivers/net/ipa/ipa_data-v4.2.c   | 36 ++++++++++++------------
 drivers/net/ipa/ipa_data-v4.5.c   | 46 +++++++++++++++----------------
 drivers/net/ipa/ipa_data-v4.9.c   | 46 +++++++++++++++----------------
 drivers/net/ipa/ipa_mem.c         | 38 +++++++++++--------------
 drivers/net/ipa/ipa_mem.h         |  1 -
 7 files changed, 117 insertions(+), 124 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-v3.5.1.c b/drivers/net/ipa/ipa_data-v3.5.1.c
index 945d45b72b24..af536ef8c120 100644
--- a/drivers/net/ipa/ipa_data-v3.5.1.c
+++ b/drivers/net/ipa/ipa_data-v3.5.1.c
@@ -271,91 +271,91 @@ static const struct ipa_resource_data ipa_resource_data = {
 
 /* IPA-resident memory region data for an SoC having IPA v3.5.1 */
 static const struct ipa_mem ipa_mem_local_data[] = {
-	[IPA_MEM_UC_SHARED] = {
+	{
 		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_UC_INFO] = {
+	{
 		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_V4_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_FILTER] = {
+	{
 		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER] = {
+	{
 		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_MODEM_HEADER] = {
+	{
 		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_MODEM_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x07d0,
 		.size		= 0x0200,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x09d0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM] = {
+	{
 		.id		= IPA_MEM_MODEM,
 		.offset		= 0x0bd8,
 		.size		= 0x1024,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_UC_EVENT_RING] = {
+	{
 		.id		= IPA_MEM_UC_EVENT_RING,
 		.offset		= 0x1c00,
 		.size		= 0x0400,
diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c
index 2ff3fcf4e21f..9353efbd504f 100644
--- a/drivers/net/ipa/ipa_data-v4.11.c
+++ b/drivers/net/ipa/ipa_data-v4.11.c
@@ -220,133 +220,133 @@ static const struct ipa_resource_data ipa_resource_data = {
 
 /* IPA-resident memory region data for an SoC having IPA v4.11 */
 static const struct ipa_mem ipa_mem_local_data[] = {
-	[IPA_MEM_UC_SHARED] = {
+	{
 		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_UC_INFO] = {
+	{
 		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_V4_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_FILTER] = {
+	{
 		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER] = {
+	{
 		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_MODEM_HEADER] = {
+	{
 		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0240,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_HEADER] = {
+	{
 		.id		= IPA_MEM_AP_HEADER,
 		.offset		= 0x08c8,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x0ad0,
 		.size		= 0x0200,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x0cd0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_NAT_TABLE] = {
+	{
 		.id		= IPA_MEM_NAT_TABLE,
 		.offset		= 0x0ee0,
 		.size		= 0x0d00,
 		.canary_count	= 4,
 	},
-	[IPA_MEM_PDN_CONFIG] = {
+	{
 		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x1be8,
 		.size		= 0x0050,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_QUOTA_MODEM] = {
+	{
 		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x1c40,
 		.size		= 0x0030,
 		.canary_count	= 4,
 	},
-	[IPA_MEM_STATS_QUOTA_AP] = {
+	{
 		.id		= IPA_MEM_STATS_QUOTA_AP,
 		.offset		= 0x1c70,
 		.size		= 0x0048,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_TETHERING] = {
+	{
 		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x1cb8,
 		.size		= 0x0238,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_DROP] = {
+	{
 		.id		= IPA_MEM_STATS_DROP,
 		.offset		= 0x1ef0,
 		.size		= 0x0020,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM] = {
+	{
 		.id		= IPA_MEM_MODEM,
 		.offset		= 0x1f18,
 		.size		= 0x100c,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_END_MARKER] = {
+	{
 		.id		= IPA_MEM_END_MARKER,
 		.offset		= 0x3000,
 		.size		= 0x0000,
diff --git a/drivers/net/ipa/ipa_data-v4.2.c b/drivers/net/ipa/ipa_data-v4.2.c
index f06eb07a7895..3b09b7baa95f 100644
--- a/drivers/net/ipa/ipa_data-v4.2.c
+++ b/drivers/net/ipa/ipa_data-v4.2.c
@@ -219,109 +219,109 @@ static const struct ipa_resource_data ipa_resource_data = {
 
 /* IPA-resident memory region data for an SoC having IPA v4.2 */
 static const struct ipa_mem ipa_mem_local_data[] = {
-	[IPA_MEM_UC_SHARED] = {
+	{
 		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_UC_INFO] = {
+	{
 		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_V4_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_FILTER] = {
+	{
 		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0290,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0310,
 		.size		= 0,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER] = {
+	{
 		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0318,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0398,
 		.size		= 0,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x03a0,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0420,
 		.size		= 0,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0428,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_MODEM_HEADER] = {
+	{
 		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x04a8,
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_MODEM_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x05f0,
 		.size		= 0x0200,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x07f0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_PDN_CONFIG] = {
+	{
 		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x09f8,
 		.size		= 0x0050,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_STATS_QUOTA_MODEM] = {
+	{
 		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x0a50,
 		.size		= 0x0060,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_STATS_TETHERING] = {
+	{
 		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x0ab0,
 		.size		= 0x0140,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM] = {
+	{
 		.id		= IPA_MEM_MODEM,
 		.offset		= 0x0bf0,
 		.size		= 0x140c,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_END_MARKER] = {
+	{
 		.id		= IPA_MEM_END_MARKER,
 		.offset		= 0x2000,
 		.size		= 0,
diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c
index 1c8a9099639a..a99b6478fa3a 100644
--- a/drivers/net/ipa/ipa_data-v4.5.c
+++ b/drivers/net/ipa/ipa_data-v4.5.c
@@ -265,139 +265,139 @@ static const struct ipa_resource_data ipa_resource_data = {
 
 /* IPA-resident memory region data for an SoC having IPA v4.5 */
 static const struct ipa_mem ipa_mem_local_data[] = {
-	[IPA_MEM_UC_SHARED] = {
+	{
 		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_UC_INFO] = {
+	{
 		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_V4_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_FILTER] = {
+	{
 		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER] = {
+	{
 		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_MODEM_HEADER] = {
+	{
 		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0240,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_HEADER] = {
+	{
 		.id		= IPA_MEM_AP_HEADER,
 		.offset		= 0x08c8,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x0ad0,
 		.size		= 0x0b20,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x15f0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_NAT_TABLE] = {
+	{
 		.id		= IPA_MEM_NAT_TABLE,
 		.offset		= 0x1800,
 		.size		= 0x0d00,
 		.canary_count	= 4,
 	},
-	[IPA_MEM_STATS_QUOTA_MODEM] = {
+	{
 		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x2510,
 		.size		= 0x0030,
 		.canary_count	= 4,
 	},
-	[IPA_MEM_STATS_QUOTA_AP] = {
+	{
 		.id		= IPA_MEM_STATS_QUOTA_AP,
 		.offset		= 0x2540,
 		.size		= 0x0048,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_TETHERING] = {
+	{
 		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x2588,
 		.size		= 0x0238,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_FILTER_ROUTE] = {
+	{
 		.id		= IPA_MEM_STATS_FILTER_ROUTE,
 		.offset		= 0x27c0,
 		.size		= 0x0800,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_DROP] = {
+	{
 		.id		= IPA_MEM_STATS_DROP,
 		.offset		= 0x2fc0,
 		.size		= 0x0020,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM] = {
+	{
 		.id		= IPA_MEM_MODEM,
 		.offset		= 0x2fe8,
 		.size		= 0x0800,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_UC_EVENT_RING] = {
+	{
 		.id		= IPA_MEM_UC_EVENT_RING,
 		.offset		= 0x3800,
 		.size		= 0x1000,
 		.canary_count	= 1,
 	},
-	[IPA_MEM_PDN_CONFIG] = {
+	{
 		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x4800,
 		.size		= 0x0050,
diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c
index f77169709eb2..798d43e1eb13 100644
--- a/drivers/net/ipa/ipa_data-v4.9.c
+++ b/drivers/net/ipa/ipa_data-v4.9.c
@@ -263,139 +263,139 @@ static const struct ipa_resource_data ipa_resource_data = {
 
 /* IPA-resident memory region data for an SoC having IPA v4.9 */
 static const struct ipa_mem ipa_mem_local_data[] = {
-	[IPA_MEM_UC_SHARED] = {
+	{
 		.id		= IPA_MEM_UC_SHARED,
 		.offset		= 0x0000,
 		.size		= 0x0080,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_UC_INFO] = {
+	{
 		.id		= IPA_MEM_UC_INFO,
 		.offset		= 0x0080,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_V4_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_FILTER_HASHED,
 		.offset		= 0x0288,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_FILTER] = {
+	{
 		.id		= IPA_MEM_V4_FILTER,
 		.offset		= 0x0308,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_FILTER_HASHED,
 		.offset		= 0x0388,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_FILTER] = {
+	{
 		.id		= IPA_MEM_V6_FILTER,
 		.offset		= 0x0408,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE_HASHED,
 		.offset		= 0x0488,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V4_ROUTE] = {
+	{
 		.id		= IPA_MEM_V4_ROUTE,
 		.offset		= 0x0508,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE_HASHED] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE_HASHED,
 		.offset		= 0x0588,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_V6_ROUTE] = {
+	{
 		.id		= IPA_MEM_V6_ROUTE,
 		.offset		= 0x0608,
 		.size		= 0x0078,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_MODEM_HEADER] = {
+	{
 		.id		= IPA_MEM_MODEM_HEADER,
 		.offset		= 0x0688,
 		.size		= 0x0240,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_HEADER] = {
+	{
 		.id		= IPA_MEM_AP_HEADER,
 		.offset		= 0x08c8,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_MODEM_PROC_CTX,
 		.offset		= 0x0ad0,
 		.size		= 0x0b20,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_PROC_CTX] = {
+	{
 		.id		= IPA_MEM_AP_PROC_CTX,
 		.offset		= 0x15f0,
 		.size		= 0x0200,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_NAT_TABLE] = {
+	{
 		.id		= IPA_MEM_NAT_TABLE,
 		.offset		= 0x1800,
 		.size		= 0x0d00,
 		.canary_count	= 4,
 	},
-	[IPA_MEM_STATS_QUOTA_MODEM] = {
+	{
 		.id		= IPA_MEM_STATS_QUOTA_MODEM,
 		.offset		= 0x2510,
 		.size		= 0x0030,
 		.canary_count	= 4,
 	},
-	[IPA_MEM_STATS_QUOTA_AP] = {
+	{
 		.id		= IPA_MEM_STATS_QUOTA_AP,
 		.offset		= 0x2540,
 		.size		= 0x0048,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_TETHERING] = {
+	{
 		.id		= IPA_MEM_STATS_TETHERING,
 		.offset		= 0x2588,
 		.size		= 0x0238,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_FILTER_ROUTE] = {
+	{
 		.id		= IPA_MEM_STATS_FILTER_ROUTE,
 		.offset		= 0x27c0,
 		.size		= 0x0800,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_DROP] = {
+	{
 		.id		= IPA_MEM_STATS_DROP,
 		.offset		= 0x2fc0,
 		.size		= 0x0020,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_MODEM] = {
+	{
 		.id		= IPA_MEM_MODEM,
 		.offset		= 0x2fe8,
 		.size		= 0x0800,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_UC_EVENT_RING] = {
+	{
 		.id		= IPA_MEM_UC_EVENT_RING,
 		.offset		= 0x3800,
 		.size		= 0x1000,
 		.canary_count	= 1,
 	},
-	[IPA_MEM_PDN_CONFIG] = {
+	{
 		.id		= IPA_MEM_PDN_CONFIG,
 		.offset		= 0x4800,
 		.size		= 0x0050,
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 633895fc67b6..4337b0920d3d 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -28,8 +28,14 @@
 
 const struct ipa_mem *ipa_mem_find(struct ipa *ipa, enum ipa_mem_id mem_id)
 {
-	if (mem_id < IPA_MEM_COUNT)
-		return &ipa->mem[mem_id];
+	u32 i;
+
+	for (i = 0; i < ipa->mem_count; i++) {
+		const struct ipa_mem *mem = &ipa->mem[i];
+
+		if (mem->id == mem_id)
+			return mem;
+	}
 
 	return NULL;
 }
@@ -209,6 +215,11 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem)
 		return false;
 	}
 
+	if (!mem->size && !mem->canary_count) {
+		dev_err(dev, "empty memory region %u\n", mem_id);
+		return false;
+	}
+
 	/* Other than modem memory, sizes must be a multiple of 8 */
 	size_multiple = mem_id == IPA_MEM_MODEM ? 4 : 8;
 	if (mem->size % size_multiple)
@@ -244,25 +255,14 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	for (i = 0; i < mem_data->local_count; i++) {
 		const struct ipa_mem *mem = &mem_data->local[i];
 
-		if (mem->id == IPA_MEM_UNDEFINED)
-			continue;
-
 		if (__test_and_set_bit(mem->id, regions)) {
 			dev_err(dev, "duplicate memory region %u\n", mem->id);
 			return false;
 		}
 
 		/* Defined regions have non-zero size and/or canary count */
-		if (mem->size || mem->canary_count) {
-			if (ipa_mem_valid_one(ipa, mem))
-				continue;
+		if (!ipa_mem_valid_one(ipa, mem))
 			return false;
-		}
-
-		/* It's harmless, but warn if an offset is provided */
-		if (mem->offset)
-			dev_warn(dev, "empty region %u has non-zero offset\n",
-				 mem->id);
 	}
 
 	/* Now see if any required regions are not defined */
@@ -349,20 +349,14 @@ int ipa_mem_config(struct ipa *ipa)
 	 * space prior to the region's base address if indicated.
 	 */
 	for (i = 0; i < ipa->mem_count; i++) {
-		u16 canary_count;
+		u16 canary_count = ipa->mem[i].canary_count;
 		__le32 *canary;
 
-		/* Skip over undefined regions */
-		mem = &ipa->mem[i];
-		if (!mem->offset && !mem->size)
-			continue;
-
-		canary_count = mem->canary_count;
 		if (!canary_count)
 			continue;
 
 		/* Write canary values in the space before the region */
-		canary = ipa->mem_virt + ipa->mem_offset + mem->offset;
+		canary = ipa->mem_virt + ipa->mem_offset + ipa->mem[i].offset;
 		do
 			*--canary = IPA_MEM_CANARY_VAL;
 		while (--canary_count);
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index 712b2881be0c..570bfdd99bff 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -43,7 +43,6 @@ struct ipa_mem_data;
 
 /* IPA-resident memory region ids */
 enum ipa_mem_id {
-	IPA_MEM_UNDEFINED = 0,		/* undefined region */
 	IPA_MEM_UC_SHARED,		/* 0 canaries */
 	IPA_MEM_UC_INFO,		/* 0 canaries */
 	IPA_MEM_V4_FILTER_HASHED,	/* 2 canaries */

From 6ddb5680085a3eefe0c6267e3514060045a13c95 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 10:27:01 +0800
Subject: [PATCH 1392/3292] audit: remove trailing spaces and tabs

Run the following command to find and remove the trailing spaces and tabs:

sed -r -i 's/[ \t]+$//' <audit_files>

The files to be checked are as follows:
kernel/audit*
include/linux/audit.h
include/uapi/linux/audit.h

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h | 4 ++--
 kernel/audit.h             | 2 +-
 kernel/auditsc.c           | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index cd2d8279a5e4..daa481729e9b 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -48,7 +48,7 @@
  * 2500 - 2999 future user space (maybe integrity labels and related events)
  *
  * Messages from 1000-1199 are bi-directional. 1200-1299 & 2100 - 2999 are
- * exclusively user space. 1300-2099 is kernel --> user space 
+ * exclusively user space. 1300-2099 is kernel --> user space
  * communication.
  */
 #define AUDIT_GET		1000	/* Get status */
@@ -78,7 +78,7 @@
 #define AUDIT_LAST_USER_MSG	1199
 #define AUDIT_FIRST_USER_MSG2	2100	/* More user space messages */
 #define AUDIT_LAST_USER_MSG2	2999
- 
+
 #define AUDIT_DAEMON_START      1200    /* Daemon startup record */
 #define AUDIT_DAEMON_END        1201    /* Daemon normal stop record */
 #define AUDIT_DAEMON_ABORT      1202    /* Daemon error stop record */
diff --git a/kernel/audit.h b/kernel/audit.h
index e518ad9374fc..b565ea16c0a5 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
-/* audit -- definition of audit_context structure and supporting types 
+/* audit -- definition of audit_context structure and supporting types
  *
  * Copyright 2003-2004 Red Hat, Inc.
  * Copyright 2005 Hewlett-Packard Development Company, L.P.
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 123f9dc12665..8dd73a64f921 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -343,13 +343,13 @@ static int audit_compare_uid(kuid_t uid,
 {
 	struct audit_names *n;
 	int rc;
- 
+
 	if (name) {
 		rc = audit_uid_comparator(uid, f->op, name->uid);
 		if (rc)
 			return rc;
 	}
- 
+
 	if (ctx) {
 		list_for_each_entry(n, &ctx->names_list, list) {
 			rc = audit_uid_comparator(uid, f->op, n->uid);
@@ -367,13 +367,13 @@ static int audit_compare_gid(kgid_t gid,
 {
 	struct audit_names *n;
 	int rc;
- 
+
 	if (name) {
 		rc = audit_gid_comparator(gid, f->op, name->gid);
 		if (rc)
 			return rc;
 	}
- 
+
 	if (ctx) {
 		list_for_each_entry(n, &ctx->names_list, list) {
 			rc = audit_gid_comparator(gid, f->op, n->gid);

From 648f2c6100cfa18e7dfe43bc0b9c3b73560d623c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Wed, 9 Jun 2021 09:37:17 -0700
Subject: [PATCH 1393/3292] selinux: use __GFP_NOWARN with GFP_NOWAIT in the
 AVC

In the field, we have seen lots of allocation failure from the call
path below.

06-03 13:29:12.999 1010315 31557 31557 W Binder  : 31542_2: page allocation failure: order:0, mode:0x800(GFP_NOWAIT), nodemask=(null),cpuset=background,mems_allowed=0
...
...
06-03 13:29:12.999 1010315 31557 31557 W Call trace:
06-03 13:29:12.999 1010315 31557 31557 W         : dump_backtrace.cfi_jt+0x0/0x8
06-03 13:29:12.999 1010315 31557 31557 W         : dump_stack+0xc8/0x14c
06-03 13:29:12.999 1010315 31557 31557 W         : warn_alloc+0x158/0x1c8
06-03 13:29:12.999 1010315 31557 31557 W         : __alloc_pages_slowpath+0x9d8/0xb80
06-03 13:29:12.999 1010315 31557 31557 W         : __alloc_pages_nodemask+0x1c4/0x430
06-03 13:29:12.999 1010315 31557 31557 W         : allocate_slab+0xb4/0x390
06-03 13:29:12.999 1010315 31557 31557 W         : ___slab_alloc+0x12c/0x3a4
06-03 13:29:12.999 1010315 31557 31557 W         : kmem_cache_alloc+0x358/0x5e4
06-03 13:29:12.999 1010315 31557 31557 W         : avc_alloc_node+0x30/0x184
06-03 13:29:12.999 1010315 31557 31557 W         : avc_update_node+0x54/0x4f0
06-03 13:29:12.999 1010315 31557 31557 W         : avc_has_extended_perms+0x1a4/0x460
06-03 13:29:12.999 1010315 31557 31557 W         : selinux_file_ioctl+0x320/0x3d0
06-03 13:29:12.999 1010315 31557 31557 W         : __arm64_sys_ioctl+0xec/0x1fc
06-03 13:29:12.999 1010315 31557 31557 W         : el0_svc_common+0xc0/0x24c
06-03 13:29:12.999 1010315 31557 31557 W         : el0_svc+0x28/0x88
06-03 13:29:12.999 1010315 31557 31557 W         : el0_sync_handler+0x8c/0xf0
06-03 13:29:12.999 1010315 31557 31557 W         : el0_sync+0x1a4/0x1c0
..
..
06-03 13:29:12.999 1010315 31557 31557 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:12.999 1010315 31557 31557 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:12.999 1010315 31557 31557 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:12.999 1010161 10686 10686 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:12.999 1010161 10686 10686 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:12.999 1010161 10686 10686 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:12.999 1010161 10686 10686 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:12.999 1010161 10686 10686 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:12.999 1010161 10686 10686 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:12.999 1010161 10686 10686 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:12.999 1010161 10686 10686 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:12.999 1010161 10686 10686 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:13.000 1010161 10686 10686 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:13.000 1010161 10686 10686 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:13.000 1010161 10686 10686 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:13.000 1010161 10686 10686 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:13.000 1010161 10686 10686 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:13.000 1010161 10686 10686 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:13.000 1010161 10686 10686 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:13.000 1010161 10686 10686 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:13.000 1010161 10686 10686 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:13.000 10230 30892 30892 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:13.000 10230 30892 30892 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0
06-03 13:29:13.000 10230 30892 30892 W node 0  : slabs: 57, objs: 2907, free: 0
06-03 13:29:13.000 10230 30892 30892 W SLUB    : Unable to allocate memory on node -1, gfp=0x900(GFP_NOWAIT|__GFP_ZERO)
06-03 13:29:13.000 10230 30892 30892 W cache   : avc_node, object size: 72, buffer size: 80, default order: 0, min order: 0

Based on [1], selinux is tolerate for failure of memory allocation.
Then, use __GFP_NOWARN together.

[1] 476accbe2f6e ("selinux: use GFP_NOWAIT in the AVC kmem_caches")

Signed-off-by: Minchan Kim <minchan@kernel.org>
[PM: subj fix, line wraps, normalized commit refs]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/avc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index c90f45587a0c..6417ed49661f 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -297,26 +297,27 @@ static struct avc_xperms_decision_node
 	struct avc_xperms_decision_node *xpd_node;
 	struct extended_perms_decision *xpd;
 
-	xpd_node = kmem_cache_zalloc(avc_xperms_decision_cachep, GFP_NOWAIT);
+	xpd_node = kmem_cache_zalloc(avc_xperms_decision_cachep,
+				     GFP_NOWAIT | __GFP_NOWARN);
 	if (!xpd_node)
 		return NULL;
 
 	xpd = &xpd_node->xpd;
 	if (which & XPERMS_ALLOWED) {
 		xpd->allowed = kmem_cache_zalloc(avc_xperms_data_cachep,
-						GFP_NOWAIT);
+						GFP_NOWAIT | __GFP_NOWARN);
 		if (!xpd->allowed)
 			goto error;
 	}
 	if (which & XPERMS_AUDITALLOW) {
 		xpd->auditallow = kmem_cache_zalloc(avc_xperms_data_cachep,
-						GFP_NOWAIT);
+						GFP_NOWAIT | __GFP_NOWARN);
 		if (!xpd->auditallow)
 			goto error;
 	}
 	if (which & XPERMS_DONTAUDIT) {
 		xpd->dontaudit = kmem_cache_zalloc(avc_xperms_data_cachep,
-						GFP_NOWAIT);
+						GFP_NOWAIT | __GFP_NOWARN);
 		if (!xpd->dontaudit)
 			goto error;
 	}
@@ -344,7 +345,7 @@ static struct avc_xperms_node *avc_xperms_alloc(void)
 {
 	struct avc_xperms_node *xp_node;
 
-	xp_node = kmem_cache_zalloc(avc_xperms_cachep, GFP_NOWAIT);
+	xp_node = kmem_cache_zalloc(avc_xperms_cachep, GFP_NOWAIT | __GFP_NOWARN);
 	if (!xp_node)
 		return xp_node;
 	INIT_LIST_HEAD(&xp_node->xpd_head);
@@ -500,7 +501,7 @@ static struct avc_node *avc_alloc_node(struct selinux_avc *avc)
 {
 	struct avc_node *node;
 
-	node = kmem_cache_zalloc(avc_node_cachep, GFP_NOWAIT);
+	node = kmem_cache_zalloc(avc_node_cachep, GFP_NOWAIT | __GFP_NOWARN);
 	if (!node)
 		goto out;
 

From 7a7ae1eba24a04fdaf84ef6a11760b5b8db3f723 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Tue, 8 Jun 2021 19:27:46 -0700
Subject: [PATCH 1394/3292] xfrm: policy: fix a spelling mistake

Fix a spelling mistake.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ce500f847b99..1e24b21457f7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3247,7 +3247,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
 
 /*
  * 0 or more than 0 is returned when validation is succeeded (either bypass
- * because of optional transport mode, or next index of the mathced secpath
+ * because of optional transport mode, or next index of the matched secpath
  * state with the template.
  * -1 is returned when no matching template is found.
  * Otherwise "-2 - errored_index" is returned.

From 1f9482aa8d412b4ba06ce6ab8e333fb8ca29a06e Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 14 May 2021 19:42:27 -0700
Subject: [PATCH 1395/3292] mwifiex: bring down link before deleting interface

We can deadlock when rmmod'ing the driver or going through firmware
reset, because the cfg80211_unregister_wdev() has to bring down the link
for us, ... which then grab the same wiphy lock.

nl80211_del_interface() already handles a very similar case, with a nice
description:

        /*
         * We hold RTNL, so this is safe, without RTNL opencount cannot
         * reach 0, and thus the rdev cannot be deleted.
         *
         * We need to do it for the dev_close(), since that will call
         * the netdev notifiers, and we need to acquire the mutex there
         * but don't know if we get there from here or from some other
         * place (e.g. "ip link set ... down").
         */
        mutex_unlock(&rdev->wiphy.mtx);
...

Do similarly for mwifiex teardown, by ensuring we bring the link down
first.

Sample deadlock trace:

[  247.103516] INFO: task rmmod:2119 blocked for more than 123 seconds.
[  247.110630]       Not tainted 5.12.4 #5
[  247.115796] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  247.124557] task:rmmod           state:D stack:    0 pid: 2119 ppid:  2114 flags:0x00400208
[  247.133905] Call trace:
[  247.136644]  __switch_to+0x130/0x170
[  247.140643]  __schedule+0x714/0xa0c
[  247.144548]  schedule_preempt_disabled+0x88/0xf4
[  247.149714]  __mutex_lock_common+0x43c/0x750
[  247.154496]  mutex_lock_nested+0x5c/0x68
[  247.158884]  cfg80211_netdev_notifier_call+0x280/0x4e0 [cfg80211]
[  247.165769]  raw_notifier_call_chain+0x4c/0x78
[  247.170742]  call_netdevice_notifiers_info+0x68/0xa4
[  247.176305]  __dev_close_many+0x7c/0x138
[  247.180693]  dev_close_many+0x7c/0x10c
[  247.184893]  unregister_netdevice_many+0xfc/0x654
[  247.190158]  unregister_netdevice_queue+0xb4/0xe0
[  247.195424]  _cfg80211_unregister_wdev+0xa4/0x204 [cfg80211]
[  247.201816]  cfg80211_unregister_wdev+0x20/0x2c [cfg80211]
[  247.208016]  mwifiex_del_virtual_intf+0xc8/0x188 [mwifiex]
[  247.214174]  mwifiex_uninit_sw+0x158/0x1b0 [mwifiex]
[  247.219747]  mwifiex_remove_card+0x38/0xa0 [mwifiex]
[  247.225316]  mwifiex_pcie_remove+0xd0/0xe0 [mwifiex_pcie]
[  247.231451]  pci_device_remove+0x50/0xe0
[  247.235849]  device_release_driver_internal+0x110/0x1b0
[  247.241701]  driver_detach+0x5c/0x9c
[  247.245704]  bus_remove_driver+0x84/0xb8
[  247.250095]  driver_unregister+0x3c/0x60
[  247.254486]  pci_unregister_driver+0x2c/0x90
[  247.259267]  cleanup_module+0x18/0xcdc [mwifiex_pcie]

Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/linux-wireless/98392296-40ee-6300-369c-32e16cff3725@gmail.com/
Link: https://lore.kernel.org/linux-wireless/ab4d00ce52f32bd8e45ad0448a44737e@bewaar.me/
Reported-by: Maximilian Luz <luzmaximilian@gmail.com>
Reported-by: dave@bewaar.me
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Tested-by: Maximilian Luz <luzmaximilian@gmail.com>
Tested-by: Dave Olsthoorn <dave@bewaar.me>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210515024227.2159311-1-briannorris@chromium.org
---
 drivers/net/wireless/marvell/mwifiex/main.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 529dfd8b7ae8..17399d4aa129 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1445,11 +1445,18 @@ static void mwifiex_uninit_sw(struct mwifiex_adapter *adapter)
 		if (!priv)
 			continue;
 		rtnl_lock();
-		wiphy_lock(adapter->wiphy);
 		if (priv->netdev &&
-		    priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED)
+		    priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) {
+			/*
+			 * Close the netdev now, because if we do it later, the
+			 * netdev notifiers will need to acquire the wiphy lock
+			 * again --> deadlock.
+			 */
+			dev_close(priv->wdev.netdev);
+			wiphy_lock(adapter->wiphy);
 			mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev);
-		wiphy_unlock(adapter->wiphy);
+			wiphy_unlock(adapter->wiphy);
+		}
 		rtnl_unlock();
 	}
 

From 9acf4d3b9ec15f27a7d027c4ae4736c2fb967391 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 11 Jun 2021 12:50:10 +0200
Subject: [PATCH 1396/3292] xfrm: ipv6: add xfrm6_hdr_offset helper

This moves the ->hdr_offset indirect call to a new helper.

A followup patch can then modify the new function to replace
the indirect call by direct calls to the required hdr_offset helper.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_output.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4cb0ff4dcf4..6b44b6e738f7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -77,6 +77,11 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
+static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr)
+{
+	return x->type->hdr_offset(x, skb, prevhdr);
+}
+
 /* Add encapsulation header.
  *
  * The IP header and mutable extension headers will be moved forward to make
@@ -92,7 +97,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ipv6_hdr(skb);
 	skb_set_inner_transport_header(skb, skb_transport_offset(skb));
 
-	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
 	if (hdr_len < 0)
 		return hdr_len;
 	skb_set_mac_header(skb,
@@ -122,7 +127,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	iph = ipv6_hdr(skb);
 
-	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+	hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
 	if (hdr_len < 0)
 		return hdr_len;
 	skb_set_mac_header(skb,

From 37b9e7eb55659b270f0e8aebd98308716d935586 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 11 Jun 2021 12:50:11 +0200
Subject: [PATCH 1397/3292] xfrm: ipv6: move mip6_destopt_offset into xfrm core

This helper is relatively small, just move this to the xfrm core
and call it directly.

Next patch does the same for the ROUTING type.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/mip6.c        | 49 ------------------------------------
 net/xfrm/xfrm_output.c | 57 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 49 deletions(-)

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index bc560e1664aa..fba3b56a7dd2 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -247,54 +247,6 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
 	return err;
 }
 
-static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
-			       u8 **nexthdr)
-{
-	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr =
-				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
-	const unsigned char *nh = skb_network_header(skb);
-	unsigned int packet_len = skb_tail_pointer(skb) -
-		skb_network_header(skb);
-	int found_rhdr = 0;
-
-	*nexthdr = &ipv6_hdr(skb)->nexthdr;
-
-	while (offset + 1 <= packet_len) {
-
-		switch (**nexthdr) {
-		case NEXTHDR_HOP:
-			break;
-		case NEXTHDR_ROUTING:
-			found_rhdr = 1;
-			break;
-		case NEXTHDR_DEST:
-			/*
-			 * HAO MUST NOT appear more than once.
-			 * XXX: It is better to try to find by the end of
-			 * XXX: packet if HAO exists.
-			 */
-			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
-				net_dbg_ratelimited("mip6: hao exists already, override\n");
-				return offset;
-			}
-
-			if (found_rhdr)
-				return offset;
-
-			break;
-		default:
-			return offset;
-		}
-
-		offset += ipv6_optlen(exthdr);
-		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-	}
-
-	return offset;
-}
-
 static int mip6_destopt_init_state(struct xfrm_state *x)
 {
 	if (x->id.spi) {
@@ -332,7 +284,6 @@ static const struct xfrm_type mip6_destopt_type = {
 	.input		= mip6_destopt_input,
 	.output		= mip6_destopt_output,
 	.reject		= mip6_destopt_reject,
-	.hdr_offset	= mip6_destopt_offset,
 };
 
 static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 6b44b6e738f7..29959054a535 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -77,8 +77,65 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
+			       u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb_tail_pointer(skb) -
+		skb_network_header(skb);
+	int found_rhdr = 0;
+
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			/* HAO MUST NOT appear more than once.
+			 * XXX: It is better to try to find by the end of
+			 * XXX: packet if HAO exists.
+			 */
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+				net_dbg_ratelimited("mip6: hao exists already, override\n");
+				return offset;
+			}
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+	}
+
+	return offset;
+}
+#endif
+
 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr)
 {
+	switch (x->type->proto) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+	case IPPROTO_DSTOPTS:
+		return mip6_destopt_offset(x, skb, prevhdr);
+#endif
+	default:
+		break;
+	}
+
 	return x->type->hdr_offset(x, skb, prevhdr);
 }
 

From 848b18fb7fbd2fa5bc4fc2263bb69956fb86120d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 11 Jun 2021 12:50:12 +0200
Subject: [PATCH 1398/3292] xfrm: ipv6: move mip6_rthdr_offset into xfrm core

Place the call into the xfrm core.  After this all remaining users
set the hdr_offset function pointer to the same function which opens
the possiblity to remove the indirection.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/mip6.c        | 48 -----------------------------------------
 net/xfrm/xfrm_output.c | 49 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 48 deletions(-)

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index fba3b56a7dd2..aeb35d26e474 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -333,53 +333,6 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
-static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
-			     u8 **nexthdr)
-{
-	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr =
-				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
-	const unsigned char *nh = skb_network_header(skb);
-	unsigned int packet_len = skb_tail_pointer(skb) -
-		skb_network_header(skb);
-	int found_rhdr = 0;
-
-	*nexthdr = &ipv6_hdr(skb)->nexthdr;
-
-	while (offset + 1 <= packet_len) {
-
-		switch (**nexthdr) {
-		case NEXTHDR_HOP:
-			break;
-		case NEXTHDR_ROUTING:
-			if (offset + 3 <= packet_len) {
-				struct ipv6_rt_hdr *rt;
-				rt = (struct ipv6_rt_hdr *)(nh + offset);
-				if (rt->type != 0)
-					return offset;
-			}
-			found_rhdr = 1;
-			break;
-		case NEXTHDR_DEST:
-			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
-				return offset;
-
-			if (found_rhdr)
-				return offset;
-
-			break;
-		default:
-			return offset;
-		}
-
-		offset += ipv6_optlen(exthdr);
-		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-	}
-
-	return offset;
-}
-
 static int mip6_rthdr_init_state(struct xfrm_state *x)
 {
 	if (x->id.spi) {
@@ -413,7 +366,6 @@ static const struct xfrm_type mip6_rthdr_type = {
 	.destructor	= mip6_rthdr_destroy,
 	.input		= mip6_rthdr_input,
 	.output		= mip6_rthdr_output,
-	.hdr_offset	= mip6_rthdr_offset,
 };
 
 static int __init mip6_init(void)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 29959054a535..1734339b6dd0 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -123,6 +123,53 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 
 	return offset;
 }
+
+static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
+			     u8 **nexthdr)
+{
+	u16 offset = sizeof(struct ipv6hdr);
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb_tail_pointer(skb) -
+		skb_network_header(skb);
+	int found_rhdr = 0;
+
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+	while (offset + 1 <= packet_len) {
+		switch (**nexthdr) {
+		case NEXTHDR_HOP:
+			break;
+		case NEXTHDR_ROUTING:
+			if (offset + 3 <= packet_len) {
+				struct ipv6_rt_hdr *rt;
+
+				rt = (struct ipv6_rt_hdr *)(nh + offset);
+				if (rt->type != 0)
+					return offset;
+			}
+			found_rhdr = 1;
+			break;
+		case NEXTHDR_DEST:
+			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+				return offset;
+
+			if (found_rhdr)
+				return offset;
+
+			break;
+		default:
+			return offset;
+		}
+
+		offset += ipv6_optlen(exthdr);
+		*nexthdr = &exthdr->nexthdr;
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+	}
+
+	return offset;
+}
 #endif
 
 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr)
@@ -131,6 +178,8 @@ static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prev
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 	case IPPROTO_DSTOPTS:
 		return mip6_destopt_offset(x, skb, prevhdr);
+	case IPPROTO_ROUTING:
+		return mip6_rthdr_offset(x, skb, prevhdr);
 #endif
 	default:
 		break;

From d1002d2490e3ebc30dd3ba747656cfa90c87e984 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 11 Jun 2021 12:50:13 +0200
Subject: [PATCH 1399/3292] xfrm: remove hdr_offset indirection

After previous patches all remaining users set the function pointer to
the same function: xfrm6_find_1stfragopt.

So remove this function pointer and call ip6_find_1stfragopt directly.

Reduces size of xfrm_type to 64 bytes on 64bit platforms.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      | 3 ---
 net/ipv6/ah6.c          | 1 -
 net/ipv6/esp6.c         | 1 -
 net/ipv6/ipcomp6.c      | 1 -
 net/ipv6/xfrm6_output.c | 7 -------
 net/xfrm/xfrm_output.c  | 2 +-
 6 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1aad78c5f2d5..c8890da00b8a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -402,7 +402,6 @@ struct xfrm_type {
 	int			(*output)(struct xfrm_state *, struct sk_buff *pskb);
 	int			(*reject)(struct xfrm_state *, struct sk_buff *,
 					  const struct flowi *);
-	int			(*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **);
 };
 
 int xfrm_register_type(const struct xfrm_type *type, unsigned short family);
@@ -1605,8 +1604,6 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
 __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
 int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb);
-int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
-			  u8 **prevhdr);
 
 #ifdef CONFIG_XFRM
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index e9705c256068..828e62514260 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -762,7 +762,6 @@ static const struct xfrm_type ah6_type = {
 	.destructor	= ah6_destroy,
 	.input		= ah6_input,
 	.output		= ah6_output,
-	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct xfrm6_protocol ah6_protocol = {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index be2c0ac76eaa..37c4b1726c5e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1250,7 +1250,6 @@ static const struct xfrm_type esp6_type = {
 	.destructor	= esp6_destroy,
 	.input		= esp6_input,
 	.output		= esp6_output,
-	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct xfrm6_protocol esp6_protocol = {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 491aba66b7ae..15f984be3570 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -178,7 +178,6 @@ static const struct xfrm_type ipcomp6_type = {
 	.destructor	= ipcomp_destroy,
 	.input		= ipcomp_input,
 	.output		= ipcomp_output,
-	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
 static struct xfrm6_protocol ipcomp6_protocol = {
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8b84d534b19d..57fa27c1cdf9 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -16,13 +16,6 @@
 #include <net/ip6_route.h>
 #include <net/xfrm.h>
 
-int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
-			  u8 **prevhdr)
-{
-	return ip6_find_1stfragopt(skb, prevhdr);
-}
-EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
 {
 	struct flowi6 fl6;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 1734339b6dd0..10842d5cf6e1 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -185,7 +185,7 @@ static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prev
 		break;
 	}
 
-	return x->type->hdr_offset(x, skb, prevhdr);
+	return ip6_find_1stfragopt(skb, prevhdr);
 }
 
 /* Add encapsulation header.

From 3ca5ca83e206eab566830e08664eda415f428374 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 11 Jun 2021 12:50:14 +0200
Subject: [PATCH 1400/3292] xfrm: merge dstopt and routing hdroff functions

Both functions are very similar, so merge them into one.

The nexthdr is passed as argument to break the loop in the
ROUTING case, this is the only header type where slightly different
rules apply.

While at it, the merged function is realigned with
ip6_find_1stfragopt().  That function received bug fixes for an infinite
loop, but neither dstopt nor rh parsing functions (copy-pasted from
ip6_find_1stfragopt) were changed.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_output.c | 82 ++++++++++++------------------------------
 1 file changed, 23 insertions(+), 59 deletions(-)

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 10842d5cf6e1..e14fca1fb003 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -78,24 +78,30 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 }
 
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
-static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
-			       u8 **nexthdr)
+static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type)
 {
-	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr =
-				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	const unsigned char *nh = skb_network_header(skb);
-	unsigned int packet_len = skb_tail_pointer(skb) -
-		skb_network_header(skb);
+	unsigned int offset = sizeof(struct ipv6hdr);
+	unsigned int packet_len;
 	int found_rhdr = 0;
 
+	packet_len = skb_tail_pointer(skb) - nh;
 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
-	while (offset + 1 <= packet_len) {
+	while (offset <= packet_len) {
+		struct ipv6_opt_hdr *exthdr;
+
 		switch (**nexthdr) {
 		case NEXTHDR_HOP:
 			break;
 		case NEXTHDR_ROUTING:
+			if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) {
+				struct ipv6_rt_hdr *rt;
+
+				rt = (struct ipv6_rt_hdr *)(nh + offset);
+				if (rt->type != 0)
+					return offset;
+			}
 			found_rhdr = 1;
 			break;
 		case NEXTHDR_DEST:
@@ -116,59 +122,18 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 			return offset;
 		}
 
+		if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+			return -EINVAL;
+
+		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+						 offset);
 		offset += ipv6_optlen(exthdr);
+		if (offset > IPV6_MAXPLEN)
+			return -EINVAL;
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 	}
 
-	return offset;
-}
-
-static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
-			     u8 **nexthdr)
-{
-	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr =
-				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
-	const unsigned char *nh = skb_network_header(skb);
-	unsigned int packet_len = skb_tail_pointer(skb) -
-		skb_network_header(skb);
-	int found_rhdr = 0;
-
-	*nexthdr = &ipv6_hdr(skb)->nexthdr;
-
-	while (offset + 1 <= packet_len) {
-		switch (**nexthdr) {
-		case NEXTHDR_HOP:
-			break;
-		case NEXTHDR_ROUTING:
-			if (offset + 3 <= packet_len) {
-				struct ipv6_rt_hdr *rt;
-
-				rt = (struct ipv6_rt_hdr *)(nh + offset);
-				if (rt->type != 0)
-					return offset;
-			}
-			found_rhdr = 1;
-			break;
-		case NEXTHDR_DEST:
-			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
-				return offset;
-
-			if (found_rhdr)
-				return offset;
-
-			break;
-		default:
-			return offset;
-		}
-
-		offset += ipv6_optlen(exthdr);
-		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-	}
-
-	return offset;
+	return -EINVAL;
 }
 #endif
 
@@ -177,9 +142,8 @@ static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prev
 	switch (x->type->proto) {
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 	case IPPROTO_DSTOPTS:
-		return mip6_destopt_offset(x, skb, prevhdr);
 	case IPPROTO_ROUTING:
-		return mip6_rthdr_offset(x, skb, prevhdr);
+		return mip6_rthdr_offset(skb, prevhdr, x->type->proto);
 #endif
 	default:
 		break;

From da30e6688dd64fabc3746e00e4a9b6f926efd5ca Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:03:35 +0800
Subject: [PATCH 1401/3292] irqchip/exynos-combiner: Remove unnecessary oom
 message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140335.14425-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/exynos-combiner.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c
index 0b85d9a3fbff..256ed73ddfef 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -179,10 +179,8 @@ static void __init combiner_init(void __iomem *combiner_base,
 	nr_irq = max_nr * IRQ_IN_COMBINER;
 
 	combiner_data = kcalloc(max_nr, sizeof (*combiner_data), GFP_KERNEL);
-	if (!combiner_data) {
-		pr_warn("%s: could not allocate combiner data\n", __func__);
+	if (!combiner_data)
 		return;
-	}
 
 	combiner_irq_domain = irq_domain_add_linear(np, nr_irq,
 				&combiner_irq_domain_ops, combiner_data);

From 98ae089e1e6e5bab6f8c89412da5fc447e3580cb Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:05:34 +0800
Subject: [PATCH 1402/3292] irqchip/gic-v2m: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140534.14478-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-gic-v2m.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 4116b48e60af..be9ea6fd6f8b 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -323,10 +323,8 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
 	struct v2m_data *v2m;
 
 	v2m = kzalloc(sizeof(struct v2m_data), GFP_KERNEL);
-	if (!v2m) {
-		pr_err("Failed to allocate struct v2m_data.\n");
+	if (!v2m)
 		return -ENOMEM;
-	}
 
 	INIT_LIST_HEAD(&v2m->entry);
 	v2m->fwnode = fwnode;

From 944a1a17d399b33410af6dfcf2b5a0f74b42b3d0 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:06:42 +0800
Subject: [PATCH 1403/3292] irqchip/gic-v3-its: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140643.14531-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-gic-v3-its.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 2e6923c2c8a8..ba39668c3e08 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4895,10 +4895,8 @@ static int its_init_vpe_domain(void)
 	entries = roundup_pow_of_two(nr_cpu_ids);
 	vpe_proxy.vpes = kcalloc(entries, sizeof(*vpe_proxy.vpes),
 				 GFP_KERNEL);
-	if (!vpe_proxy.vpes) {
-		pr_err("ITS: Can't allocate GICv4 proxy device array\n");
+	if (!vpe_proxy.vpes)
 		return -ENOMEM;
-	}
 
 	/* Use the last possible DevID */
 	devid = GENMASK(device_ids(its) - 1, 0);
@@ -5314,10 +5312,8 @@ static void __init acpi_table_parse_srat_its(void)
 
 	its_srat_maps = kmalloc_array(count, sizeof(struct its_srat_map),
 				      GFP_KERNEL);
-	if (!its_srat_maps) {
-		pr_warn("SRAT: Failed to allocate memory for its_srat_maps!\n");
+	if (!its_srat_maps)
 		return;
-	}
 
 	acpi_table_parse_entries(ACPI_SIG_SRAT,
 			sizeof(struct acpi_table_srat),

From e3f389ed3a421f45b46e774b543648ebcab9020a Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:08:28 +0800
Subject: [PATCH 1404/3292] irqchip/imgpdc: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140828.14584-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-imgpdc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
index 698d07f48fed..04efa8e79863 100644
--- a/drivers/irqchip/irq-imgpdc.c
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -316,10 +316,8 @@ static int pdc_intc_probe(struct platform_device *pdev)
 
 	/* Allocate driver data */
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		dev_err(&pdev->dev, "cannot allocate device data\n");
+	if (!priv)
 		return -ENOMEM;
-	}
 	raw_spin_lock_init(&priv->lock);
 	platform_set_drvdata(pdev, priv);
 
@@ -356,10 +354,8 @@ static int pdc_intc_probe(struct platform_device *pdev)
 	/* Get peripheral IRQ numbers */
 	priv->perip_irqs = devm_kcalloc(&pdev->dev, 4, priv->nr_perips,
 					GFP_KERNEL);
-	if (!priv->perip_irqs) {
-		dev_err(&pdev->dev, "cannot allocate perip IRQ list\n");
+	if (!priv->perip_irqs)
 		return -ENOMEM;
-	}
 	for (i = 0; i < priv->nr_perips; ++i) {
 		irq = platform_get_irq(pdev, 1 + i);
 		if (irq < 0)

From 76fc40ec22b9947351f6f9d37a86d47e72af4e50 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:11:50 +0800
Subject: [PATCH 1405/3292] irqchip/irq-imx-gpcv2: Remove unnecessary oom
 message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609141150.14637-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-imx-gpcv2.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c
index 7031ef44de4f..5b5a365dbd5e 100644
--- a/drivers/irqchip/irq-imx-gpcv2.c
+++ b/drivers/irqchip/irq-imx-gpcv2.c
@@ -228,10 +228,8 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node,
 	}
 
 	cd = kzalloc(sizeof(struct gpcv2_irqchip_data), GFP_KERNEL);
-	if (!cd) {
-		pr_err("%pOF: kzalloc failed!\n", node);
+	if (!cd)
 		return -ENOMEM;
-	}
 
 	raw_spin_lock_init(&cd->rlock);
 

From 75768e391f8947ea8b2e7997af68dbd68814f00c Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:14:28 +0800
Subject: [PATCH 1406/3292] irqchip/sun4i: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609141428.14737-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-sun4i.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c
index 9ea94456b178..8a315d6a3399 100644
--- a/drivers/irqchip/irq-sun4i.c
+++ b/drivers/irqchip/irq-sun4i.c
@@ -147,10 +147,8 @@ static int __init sun4i_ic_of_init(struct device_node *node,
 				   struct device_node *parent)
 {
 	irq_ic_data = kzalloc(sizeof(struct sun4i_irq_chip_data), GFP_KERNEL);
-	if (!irq_ic_data) {
-		pr_err("kzalloc failed!\n");
+	if (!irq_ic_data)
 		return -ENOMEM;
-	}
 
 	irq_ic_data->enable_reg_offset = SUN4I_IRQ_ENABLE_REG_OFFSET;
 	irq_ic_data->mask_reg_offset = SUN4I_IRQ_MASK_REG_OFFSET;
@@ -164,10 +162,8 @@ static int __init suniv_ic_of_init(struct device_node *node,
 				   struct device_node *parent)
 {
 	irq_ic_data = kzalloc(sizeof(struct sun4i_irq_chip_data), GFP_KERNEL);
-	if (!irq_ic_data) {
-		pr_err("kzalloc failed!\n");
+	if (!irq_ic_data)
 		return -ENOMEM;
-	}
 
 	irq_ic_data->enable_reg_offset = SUNIV_IRQ_ENABLE_REG_OFFSET;
 	irq_ic_data->mask_reg_offset = SUNIV_IRQ_MASK_REG_OFFSET;

From 21a496179c6e3a9fc03d1296b36afd14046db88f Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Fri, 9 Apr 2021 11:04:34 +0800
Subject: [PATCH 1407/3292] irqchip: gic-pm: Remove redundant error log of
 clock bulk

There is error log in clk_bulk_prepare/enable()

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1617937474-24630-1-git-send-email-chunfeng.yun@mediatek.com
---
 drivers/irqchip/irq-gic-pm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c
index 1337ceceb59b..b60e1853593f 100644
--- a/drivers/irqchip/irq-gic-pm.c
+++ b/drivers/irqchip/irq-gic-pm.c
@@ -30,10 +30,8 @@ static int gic_runtime_resume(struct device *dev)
 	int ret;
 
 	ret = clk_bulk_prepare_enable(data->num_clocks, chip_pm->clks);
-	if (ret) {
-		dev_err(dev, "clk_enable failed: %d\n", ret);
+	if (ret)
 		return ret;
-	}
 
 	/*
 	 * On the very first resume, the pointer to chip_pm->chip_data

From 4e08a559a18c1b6424e56859c74adb4b29c17318 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Wed, 9 Jun 2021 16:51:08 +0100
Subject: [PATCH 1408/3292] dt-bindings: interrupt-controller: arm,gic-v3:
 Describe GICv3 optional properties

Describe the optional GICv3 properties:
- clocks
- clock-names
- power-domains
- resets

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609155108.16590-1-prabhakar.mahadev-lad.rj@bp.renesas.com
---
 .../bindings/interrupt-controller/arm,gic-v3.yaml   | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
index 1ecd1831cf02..c84f9fe7f254 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
@@ -145,6 +145,19 @@ properties:
         required:
           - affinity
 
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: aclk
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
 dependencies:
   mbi-ranges: [ msi-controller ]
   msi-controller: [ mbi-ranges ]

From 8f5ee3c477a8e416e30ec75caed53a80fdca3462 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:46 -0700
Subject: [PATCH 1409/3292] ice: add support for sideband messages

In order to support certain device features, including enabling the PTP
hardware clock, the ice driver needs to control some registers on the
device PHY.

These registers are accessed by sending sideband messages. For some
hardware, these messages must be sent over the device admin queue, while
other hardware has a dedicated control queue for the sideband messages.

Add the neighbor device message structure for sending a message to the
neighboring device. Where supported, initialize the sideband control
queue and handle cleanup.

Add a wrapper function for sending sideband control queue messages that
read or write a neighboring device register.

Because some devices send sideband messages over the AdminQ, also
increase the length of the admin queue to allow more messages to be
queued up. This is important because the sideband messages add
additional pressure on the AQ usage.

This support will be used in following patches to enable support for
CONFIG_1588_PTP_CLOCK.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  4 +-
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   | 13 +++
 drivers/net/ethernet/intel/ice/ice_common.c   | 58 ++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |  3 +
 drivers/net/ethernet/intel/ice/ice_controlq.c | 62 +++++++++++++
 drivers/net/ethernet/intel/ice/ice_controlq.h |  2 +
 .../net/ethernet/intel/ice/ice_hw_autogen.h   | 51 ++++++++++
 drivers/net/ethernet/intel/ice/ice_lib.c      | 11 ++-
 drivers/net/ethernet/intel/ice/ice_main.c     | 47 ++++++++++
 drivers/net/ethernet/intel/ice/ice_sbq_cmd.h  | 92 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_type.h     |  2 +
 11 files changed, 343 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_sbq_cmd.h

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 317b19901053..b745e250ced9 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -74,8 +74,9 @@
 
 #define ICE_DFLT_TRAFFIC_CLASS	BIT(0)
 #define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
-#define ICE_AQ_LEN		64
+#define ICE_AQ_LEN		192
 #define ICE_MBXSQ_LEN		64
+#define ICE_SBQ_LEN		64
 #define ICE_MIN_LAN_TXRX_MSIX	1
 #define ICE_MIN_LAN_OICR_MSIX	1
 #define ICE_MIN_MSIX		(ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
@@ -227,6 +228,7 @@ enum ice_pf_state {
 	ICE_STATE_NOMINAL_CHECK_BITS,
 	ICE_ADMINQ_EVENT_PENDING,
 	ICE_MAILBOXQ_EVENT_PENDING,
+	ICE_SIDEBANDQ_EVENT_PENDING,
 	ICE_MDD_EVENT_PENDING,
 	ICE_VFLR_EVENT_PENDING,
 	ICE_FLTR_OVERFLOW_PROMISC,
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index a9a7d2d1aca7..272d1600268e 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1611,6 +1611,15 @@ struct ice_aqc_get_set_rss_lut {
 	__le32 addr_low;
 };
 
+/* Sideband Control Interface Commands */
+/* Neighbor Device Request (indirect 0x0C00); also used for the response. */
+struct ice_aqc_neigh_dev_req {
+	__le16 sb_data_len;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
 /* Add Tx LAN Queues (indirect 0x0C30) */
 struct ice_aqc_add_txqs {
 	u8 num_qgrps;
@@ -1911,6 +1920,7 @@ struct ice_aq_desc {
 		struct ice_aqc_lldp_filter_ctrl lldp_filter_ctrl;
 		struct ice_aqc_get_set_rss_lut get_set_rss_lut;
 		struct ice_aqc_get_set_rss_key get_set_rss_key;
+		struct ice_aqc_neigh_dev_req neigh_dev;
 		struct ice_aqc_add_txqs add_txqs;
 		struct ice_aqc_dis_txqs dis_txqs;
 		struct ice_aqc_add_rdma_qset add_rdma_qset;
@@ -2059,6 +2069,9 @@ enum ice_adminq_opc {
 	ice_aqc_opc_get_rss_key				= 0x0B04,
 	ice_aqc_opc_get_rss_lut				= 0x0B05,
 
+	/* Sideband Control Interface commands */
+	ice_aqc_opc_neighbour_device_request		= 0x0C00,
+
 	/* Tx queue handling commands/events */
 	ice_aqc_opc_add_txqs				= 0x0C30,
 	ice_aqc_opc_dis_txqs				= 0x0C31,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index f687d1f6b765..298e654583bd 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1293,6 +1293,64 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
 	{ 0 }
 };
 
+/* Sideband Queue command wrappers */
+
+/**
+ * ice_sbq_send_cmd - send Sideband Queue command to Sideband Queue
+ * @hw: pointer to the HW struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ */
+static int
+ice_sbq_send_cmd(struct ice_hw *hw, struct ice_sbq_cmd_desc *desc,
+		 void *buf, u16 buf_size, struct ice_sq_cd *cd)
+{
+	return ice_status_to_errno(ice_sq_send_cmd(hw, ice_get_sbq(hw),
+						   (struct ice_aq_desc *)desc,
+						   buf, buf_size, cd));
+}
+
+/**
+ * ice_sbq_rw_reg - Fill Sideband Queue command
+ * @hw: pointer to the HW struct
+ * @in: message info to be filled in descriptor
+ */
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in)
+{
+	struct ice_sbq_cmd_desc desc = {0};
+	struct ice_sbq_msg_req msg = {0};
+	u16 msg_len;
+	int status;
+
+	msg_len = sizeof(msg);
+
+	msg.dest_dev = in->dest_dev;
+	msg.opcode = in->opcode;
+	msg.flags = ICE_SBQ_MSG_FLAGS;
+	msg.sbe_fbe = ICE_SBQ_MSG_SBE_FBE;
+	msg.msg_addr_low = cpu_to_le16(in->msg_addr_low);
+	msg.msg_addr_high = cpu_to_le32(in->msg_addr_high);
+
+	if (in->opcode)
+		msg.data = cpu_to_le32(in->data);
+	else
+		/* data read comes back in completion, so shorten the struct by
+		 * sizeof(msg.data)
+		 */
+		msg_len -= sizeof(msg.data);
+
+	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
+	desc.opcode = cpu_to_le16(ice_sbq_opc_neigh_dev_req);
+	desc.param0.cmd_len = cpu_to_le16(msg_len);
+	status = ice_sbq_send_cmd(hw, &desc, &msg, msg_len, NULL);
+	if (!status && !in->opcode)
+		in->data = le32_to_cpu
+			(((struct ice_sbq_msg_cmpl *)&msg)->data);
+	return status;
+}
+
 /* FW Admin Queue command wrappers */
 
 /* Software lock/mutex that is meant to be held while the Global Config Lock
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 0fdda597fbc8..86bc261177d6 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -40,6 +40,8 @@ enum ice_status
 ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
 		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
 		      enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+bool ice_is_sbq_supported(struct ice_hw *hw);
+struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw);
 enum ice_status
 ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		struct ice_aq_desc *desc, void *buf, u16 buf_size,
@@ -173,6 +175,7 @@ void ice_replay_post(struct ice_hw *hw);
 void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
 struct ice_q_ctx *
 ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in);
 void
 ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
 		  u64 *prev_stat, u64 *cur_stat);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 87b33bdd4960..03bdb125be36 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -51,6 +51,19 @@ static void ice_mailbox_init_regs(struct ice_hw *hw)
 	ICE_CQ_INIT_REGS(cq, PF_MBX);
 }
 
+/**
+ * ice_sb_init_regs - Initialize Sideband registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_sb_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->sbq;
+
+	ICE_CQ_INIT_REGS(cq, PF_SB);
+}
+
 /**
  * ice_check_sq_alive
  * @hw: pointer to the HW struct
@@ -609,6 +622,10 @@ static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
 		ice_adminq_init_regs(hw);
 		cq = &hw->adminq;
 		break;
+	case ICE_CTL_Q_SB:
+		ice_sb_init_regs(hw);
+		cq = &hw->sbq;
+		break;
 	case ICE_CTL_Q_MAILBOX:
 		ice_mailbox_init_regs(hw);
 		cq = &hw->mailboxq;
@@ -645,6 +662,32 @@ init_ctrlq_free_sq:
 	return ret_code;
 }
 
+/**
+ * ice_is_sbq_supported - is the sideband queue supported
+ * @hw: pointer to the hardware structure
+ *
+ * Returns true if the sideband control queue interface is
+ * supported for the device, false otherwise
+ */
+bool ice_is_sbq_supported(struct ice_hw *hw)
+{
+	/* The device sideband queue is only supported on devices with the
+	 * generic MAC type.
+	 */
+	return hw->mac_type == ICE_MAC_GENERIC;
+}
+
+/**
+ * ice_get_sbq - returns the right control queue to use for sideband
+ * @hw: pointer to the hardware structure
+ */
+struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw)
+{
+	if (ice_is_sbq_supported(hw))
+		return &hw->sbq;
+	return &hw->adminq;
+}
+
 /**
  * ice_shutdown_ctrlq - shutdown routine for any control queue
  * @hw: pointer to the hardware structure
@@ -662,6 +705,9 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
 		if (ice_check_sq_alive(hw, cq))
 			ice_aq_q_shutdown(hw, true);
 		break;
+	case ICE_CTL_Q_SB:
+		cq = &hw->sbq;
+		break;
 	case ICE_CTL_Q_MAILBOX:
 		cq = &hw->mailboxq;
 		break;
@@ -685,6 +731,9 @@ void ice_shutdown_all_ctrlq(struct ice_hw *hw)
 {
 	/* Shutdown FW admin queue */
 	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	/* Shutdown PHY Sideband */
+	if (ice_is_sbq_supported(hw))
+		ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB);
 	/* Shutdown PF-VF Mailbox */
 	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
 }
@@ -724,6 +773,15 @@ enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
 
 	if (status)
 		return status;
+	/* sideband control queue (SBQ) interface is not supported on some
+	 * devices. Initialize if supported, else fallback to the admin queue
+	 * interface
+	 */
+	if (ice_is_sbq_supported(hw)) {
+		status = ice_init_ctrlq(hw, ICE_CTL_Q_SB);
+		if (status)
+			return status;
+	}
 	/* Init Mailbox queue */
 	return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
 }
@@ -759,6 +817,8 @@ static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq)
 enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
 {
 	ice_init_ctrlq_locks(&hw->adminq);
+	if (ice_is_sbq_supported(hw))
+		ice_init_ctrlq_locks(&hw->sbq);
 	ice_init_ctrlq_locks(&hw->mailboxq);
 
 	return ice_init_all_ctrlq(hw);
@@ -791,6 +851,8 @@ void ice_destroy_all_ctrlq(struct ice_hw *hw)
 	ice_shutdown_all_ctrlq(hw);
 
 	ice_destroy_ctrlq_locks(&hw->adminq);
+	if (ice_is_sbq_supported(hw))
+		ice_destroy_ctrlq_locks(&hw->sbq);
 	ice_destroy_ctrlq_locks(&hw->mailboxq);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index fe75871e48ca..c07e9cc9fc6e 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -9,6 +9,7 @@
 /* Maximum buffer lengths for all control queue types */
 #define ICE_AQ_MAX_BUF_LEN 4096
 #define ICE_MBXQ_MAX_BUF_LEN 4096
+#define ICE_SBQ_MAX_BUF_LEN 512
 
 #define ICE_CTL_Q_DESC(R, i) \
 	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
@@ -29,6 +30,7 @@ enum ice_ctl_q {
 	ICE_CTL_Q_UNKNOWN = 0,
 	ICE_CTL_Q_ADMIN,
 	ICE_CTL_Q_MAILBOX,
+	ICE_CTL_Q_SB,
 };
 
 /* Control Queue timeout settings - max delay 1s */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 5386285c39e7..84d5d43fe029 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -52,6 +52,54 @@
 #define PF_MBX_ATQLEN_ATQCRIT_M			BIT(30)
 #define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
 #define PF_MBX_ATQT				0x0022E300
+#define PF_SB_ARQBAH				0x0022FF00
+#define PF_SB_ARQBAH_ARQBAH_S			0
+#define PF_SB_ARQBAH_ARQBAH_M			ICE_M(0xFFFFFFFF, 0)
+#define PF_SB_ARQBAL				0x0022FE80
+#define PF_SB_ARQBAL_ARQBAL_LSB_S		0
+#define PF_SB_ARQBAL_ARQBAL_LSB_M		ICE_M(0x3F, 0)
+#define PF_SB_ARQBAL_ARQBAL_S			6
+#define PF_SB_ARQBAL_ARQBAL_M			ICE_M(0x3FFFFFF, 6)
+#define PF_SB_ARQH				0x00230000
+#define PF_SB_ARQH_ARQH_S			0
+#define PF_SB_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_SB_ARQLEN				0x0022FF80
+#define PF_SB_ARQLEN_ARQLEN_S			0
+#define PF_SB_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_SB_ARQLEN_ARQVFE_S			28
+#define PF_SB_ARQLEN_ARQVFE_M			BIT(28)
+#define PF_SB_ARQLEN_ARQOVFL_S			29
+#define PF_SB_ARQLEN_ARQOVFL_M			BIT(29)
+#define PF_SB_ARQLEN_ARQCRIT_S			30
+#define PF_SB_ARQLEN_ARQCRIT_M			BIT(30)
+#define PF_SB_ARQLEN_ARQENABLE_S		31
+#define PF_SB_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_SB_ARQT				0x00230080
+#define PF_SB_ARQT_ARQT_S			0
+#define PF_SB_ARQT_ARQT_M			ICE_M(0x3FF, 0)
+#define PF_SB_ATQBAH				0x0022FC80
+#define PF_SB_ATQBAH_ATQBAH_S			0
+#define PF_SB_ATQBAH_ATQBAH_M			ICE_M(0xFFFFFFFF, 0)
+#define PF_SB_ATQBAL				0x0022FC00
+#define PF_SB_ATQBAL_ATQBAL_S			6
+#define PF_SB_ATQBAL_ATQBAL_M			ICE_M(0x3FFFFFF, 6)
+#define PF_SB_ATQH				0x0022FD80
+#define PF_SB_ATQH_ATQH_S			0
+#define PF_SB_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_SB_ATQLEN				0x0022FD00
+#define PF_SB_ATQLEN_ATQLEN_S			0
+#define PF_SB_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_SB_ATQLEN_ATQVFE_S			28
+#define PF_SB_ATQLEN_ATQVFE_M			BIT(28)
+#define PF_SB_ATQLEN_ATQOVFL_S			29
+#define PF_SB_ATQLEN_ATQOVFL_M			BIT(29)
+#define PF_SB_ATQLEN_ATQCRIT_S			30
+#define PF_SB_ATQLEN_ATQCRIT_M			BIT(30)
+#define PF_SB_ATQLEN_ATQENABLE_S		31
+#define PF_SB_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_SB_ATQT				0x0022FE00
+#define PF_SB_ATQT_ATQT_S			0
+#define PF_SB_ATQT_ATQT_M			ICE_M(0x3FF, 0)
 #define PRTDCB_GENC				0x00083000
 #define PRTDCB_GENC_PFCLDA_S			16
 #define PRTDCB_GENC_PFCLDA_M			ICE_M(0xFFFF, 16)
@@ -169,6 +217,9 @@
 #define PFINT_OICR_CTL_ITR_INDX_M		ICE_M(0x3, 11)
 #define PFINT_OICR_CTL_CAUSE_ENA_M		BIT(30)
 #define PFINT_OICR_ENA				0x0016C900
+#define PFINT_SB_CTL				0x0016B600
+#define PFINT_SB_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_SB_CTL_CAUSE_ENA_M		BIT(30)
 #define QINT_RQCTL(_QRX)			(0x00150000 + ((_QRX) * 4))
 #define QINT_RQCTL_MSIX_INDX_S			0
 #define QINT_RQCTL_MSIX_INDX_M			ICE_M(0x7FF, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index c14be5c00d2d..db36ce9c0b1c 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3392,13 +3392,22 @@ int ice_status_to_errno(enum ice_status err)
 	case ICE_ERR_DOES_NOT_EXIST:
 		return -ENOENT;
 	case ICE_ERR_OUT_OF_RANGE:
-		return -ENOTTY;
+	case ICE_ERR_AQ_ERROR:
+	case ICE_ERR_AQ_TIMEOUT:
+	case ICE_ERR_AQ_EMPTY:
+	case ICE_ERR_AQ_FW_CRITICAL:
+		return -EIO;
 	case ICE_ERR_PARAM:
+	case ICE_ERR_INVAL_SIZE:
 		return -EINVAL;
 	case ICE_ERR_NO_MEMORY:
 		return -ENOMEM;
 	case ICE_ERR_MAX_LIMIT:
 		return -EAGAIN;
+	case ICE_ERR_RESET_ONGOING:
+		return -EBUSY;
+	case ICE_ERR_AQ_FULL:
+		return -ENOSPC;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4c0412d87b1a..4b6911955f7c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1231,6 +1231,10 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 		cq = &hw->adminq;
 		qtype = "Admin";
 		break;
+	case ICE_CTL_Q_SB:
+		cq = &hw->sbq;
+		qtype = "Sideband";
+		break;
 	case ICE_CTL_Q_MAILBOX:
 		cq = &hw->mailboxq;
 		qtype = "Mailbox";
@@ -1404,6 +1408,34 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
 	ice_flush(hw);
 }
 
+/**
+ * ice_clean_sbq_subtask - clean the Sideband Queue rings
+ * @pf: board private structure
+ */
+static void ice_clean_sbq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	/* Nothing to do here if sideband queue is not supported */
+	if (!ice_is_sbq_supported(hw)) {
+		clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+		return;
+	}
+
+	if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB))
+		return;
+
+	clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+
+	if (ice_ctrlq_pending(hw, &hw->sbq))
+		__ice_clean_ctrlq(pf, ICE_CTL_Q_SB);
+
+	ice_flush(hw);
+}
+
 /**
  * ice_service_task_schedule - schedule the service task to wake up
  * @pf: board private structure
@@ -2106,6 +2138,7 @@ static void ice_service_task(struct work_struct *work)
 
 	ice_process_vflr_event(pf);
 	ice_clean_mailboxq_subtask(pf);
+	ice_clean_sbq_subtask(pf);
 	ice_sync_arfs_fltrs(pf);
 	ice_flush_fdir_ctx(pf);
 
@@ -2121,6 +2154,7 @@ static void ice_service_task(struct work_struct *work)
 	    test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
 	    test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
 	    test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
+	    test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) ||
 	    test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
 		mod_timer(&pf->serv_tmr, jiffies);
 }
@@ -2139,6 +2173,10 @@ static void ice_set_ctrlq_len(struct ice_hw *hw)
 	hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
 	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
 	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+	hw->sbq.num_rq_entries = ICE_SBQ_LEN;
+	hw->sbq.num_sq_entries = ICE_SBQ_LEN;
+	hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
+	hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
 }
 
 /**
@@ -2679,6 +2717,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	dev = ice_pf_to_dev(pf);
 	set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
 	set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+	set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
 
 	oicr = rd32(hw, PFINT_OICR);
 	ena_mask = rd32(hw, PFINT_OICR_ENA);
@@ -2800,6 +2839,9 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
 	wr32(hw, PFINT_MBX_CTL,
 	     rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
 
+	wr32(hw, PFINT_SB_CTL,
+	     rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
+
 	/* disable Control queue Interrupt causes */
 	wr32(hw, PFINT_OICR_CTL,
 	     rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
@@ -2854,6 +2896,11 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
 	       PFINT_MBX_CTL_CAUSE_ENA_M);
 	wr32(hw, PFINT_MBX_CTL, val);
 
+	/* This enables Sideband queue Interrupt causes */
+	val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) |
+	       PFINT_SB_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_SB_CTL, val);
+
 	ice_flush(hw);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
new file mode 100644
index 000000000000..ead75fe2bcda
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_SBQ_CMD_H_
+#define _ICE_SBQ_CMD_H_
+
+/* This header file defines the Sideband Queue commands, error codes and
+ * descriptor format. It is shared between Firmware and Software.
+ */
+
+/* Sideband Queue command structure and opcodes */
+enum ice_sbq_opc {
+	/* Sideband Queue commands */
+	ice_sbq_opc_neigh_dev_req			= 0x0C00,
+	ice_sbq_opc_neigh_dev_ev			= 0x0C01
+};
+
+/* Sideband Queue descriptor. Indirect command
+ * and non posted
+ */
+struct ice_sbq_cmd_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 cmd_retval;
+
+	/* Opaque message data */
+	__le32 cookie_high;
+	__le32 cookie_low;
+
+	union {
+		__le16 cmd_len;
+		__le16 cmpl_len;
+	} param0;
+
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_sbq_evt_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 cmd_retval;
+	u8 data[24];
+};
+
+enum ice_sbq_msg_dev {
+	rmn_0	= 0x02,
+	rmn_1	= 0x03,
+	rmn_2	= 0x04,
+	cgu	= 0x06
+};
+
+enum ice_sbq_msg_opcode {
+	ice_sbq_msg_rd	= 0x00,
+	ice_sbq_msg_wr	= 0x01
+};
+
+#define ICE_SBQ_MSG_FLAGS	0x40
+#define ICE_SBQ_MSG_SBE_FBE	0x0F
+
+struct ice_sbq_msg_req {
+	u8 dest_dev;
+	u8 src_dev;
+	u8 opcode;
+	u8 flags;
+	u8 sbe_fbe;
+	u8 func_id;
+	__le16 msg_addr_low;
+	__le32 msg_addr_high;
+	__le32 data;
+};
+
+struct ice_sbq_msg_cmpl {
+	u8 dest_dev;
+	u8 src_dev;
+	u8 opcode;
+	u8 flags;
+	__le32 data;
+};
+
+/* Internal struct */
+struct ice_sbq_msg_input {
+	u8 dest_dev;
+	u8 opcode;
+	u16 msg_addr_low;
+	u32 msg_addr_high;
+	u32 data;
+};
+#endif /* _ICE_SBQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 61ea46dd80b7..4a40e2b3732a 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -14,6 +14,7 @@
 #include "ice_lan_tx_rx.h"
 #include "ice_flex_type.h"
 #include "ice_protocol_type.h"
+#include "ice_sbq_cmd.h"
 
 static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 {
@@ -754,6 +755,7 @@ struct ice_hw {
 
 	/* Control Queue info */
 	struct ice_ctl_q_info adminq;
+	struct ice_ctl_q_info sbq;
 	struct ice_ctl_q_info mailboxq;
 
 	u8 api_branch;		/* API branch version */

From 9733cc94c52320a13bf0357d4937e7c9ed759ac9 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:47 -0700
Subject: [PATCH 1410/3292] ice: process 1588 PTP capabilities during
 initialization

The device firmware reports PTP clock capabilities to each PF during
initialization. This includes various information for both the overall
device and the individual function, including

For functions:
* whether this function has timesync enabled
* whether this function owns one of the 2 possible clock timers, and
  which one
* which timer the function is associated with
* the clock frequency, if the device supports multiple clock frequencies
* The GPIO pin association for the timer owned by this PF, if any

For the device:
* Which PF owns timer 0, if any
* Which PF owns timer 1, if any
* whether timer 0 is enabled
* whether timer 1 is enabled

Extract the bits from the capabilities information reported by firmware
and store them in the device and function capability structures.o

This information will be used in a future change to have the function
driver enable PTP hardware clock support.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  1 +
 drivers/net/ethernet/intel/ice/ice_common.c   | 99 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_type.h     | 51 ++++++++++
 3 files changed, 151 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 272d1600268e..f10c1b8555a4 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -108,6 +108,7 @@ struct ice_aqc_list_caps_elem {
 #define ICE_AQC_CAPS_TXQS				0x0042
 #define ICE_AQC_CAPS_MSIX				0x0043
 #define ICE_AQC_CAPS_FD					0x0045
+#define ICE_AQC_CAPS_1588				0x0046
 #define ICE_AQC_CAPS_MAX_MTU				0x0047
 #define ICE_AQC_CAPS_NVM_VER				0x0048
 #define ICE_AQC_CAPS_PENDING_NVM_VER			0x0049
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 298e654583bd..e9eb48bd4b1e 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2092,6 +2092,48 @@ ice_parse_vsi_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
 		  func_p->guar_num_vsi);
 }
 
+/**
+ * ice_parse_1588_func_caps - Parse ICE_AQC_CAPS_1588 function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_1588.
+ */
+static void
+ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+			 struct ice_aqc_list_caps_elem *cap)
+{
+	struct ice_ts_func_info *info = &func_p->ts_func_info;
+	u32 number = le32_to_cpu(cap->number);
+
+	info->ena = ((number & ICE_TS_FUNC_ENA_M) != 0);
+	func_p->common_cap.ieee_1588 = info->ena;
+
+	info->src_tmr_owned = ((number & ICE_TS_SRC_TMR_OWND_M) != 0);
+	info->tmr_ena = ((number & ICE_TS_TMR_ENA_M) != 0);
+	info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0);
+	info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0);
+
+	info->clk_freq = (number & ICE_TS_CLK_FREQ_M) >> ICE_TS_CLK_FREQ_S;
+	info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0);
+
+	ice_debug(hw, ICE_DBG_INIT, "func caps: ieee_1588 = %u\n",
+		  func_p->common_cap.ieee_1588);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: src_tmr_owned = %u\n",
+		  info->src_tmr_owned);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_ena = %u\n",
+		  info->tmr_ena);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_owned = %u\n",
+		  info->tmr_index_owned);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_assoc = %u\n",
+		  info->tmr_index_assoc);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: clk_freq = %u\n",
+		  info->clk_freq);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: clk_src = %u\n",
+		  info->clk_src);
+}
+
 /**
  * ice_parse_fdir_func_caps - Parse ICE_AQC_CAPS_FD function caps
  * @hw: pointer to the HW struct
@@ -2158,6 +2200,9 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
 		case ICE_AQC_CAPS_VSI:
 			ice_parse_vsi_func_caps(hw, func_p, &cap_resp[i]);
 			break;
+		case ICE_AQC_CAPS_1588:
+			ice_parse_1588_func_caps(hw, func_p, &cap_resp[i]);
+			break;
 		case ICE_AQC_CAPS_FD:
 			ice_parse_fdir_func_caps(hw, func_p);
 			break;
@@ -2230,6 +2275,57 @@ ice_parse_vsi_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
 		  dev_p->num_vsi_allocd_to_host);
 }
 
+/**
+ * ice_parse_1588_dev_caps - Parse ICE_AQC_CAPS_1588 device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_1588 for device capabilities.
+ */
+static void
+ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+			struct ice_aqc_list_caps_elem *cap)
+{
+	struct ice_ts_dev_info *info = &dev_p->ts_dev_info;
+	u32 logical_id = le32_to_cpu(cap->logical_id);
+	u32 phys_id = le32_to_cpu(cap->phys_id);
+	u32 number = le32_to_cpu(cap->number);
+
+	info->ena = ((number & ICE_TS_DEV_ENA_M) != 0);
+	dev_p->common_cap.ieee_1588 = info->ena;
+
+	info->tmr0_owner = number & ICE_TS_TMR0_OWNR_M;
+	info->tmr0_owned = ((number & ICE_TS_TMR0_OWND_M) != 0);
+	info->tmr0_ena = ((number & ICE_TS_TMR0_ENA_M) != 0);
+
+	info->tmr1_owner = (number & ICE_TS_TMR1_OWNR_M) >> ICE_TS_TMR1_OWNR_S;
+	info->tmr1_owned = ((number & ICE_TS_TMR1_OWND_M) != 0);
+	info->tmr1_ena = ((number & ICE_TS_TMR1_ENA_M) != 0);
+
+	info->ena_ports = logical_id;
+	info->tmr_own_map = phys_id;
+
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 = %u\n",
+		  dev_p->common_cap.ieee_1588);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owner = %u\n",
+		  info->tmr0_owner);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owned = %u\n",
+		  info->tmr0_owned);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_ena = %u\n",
+		  info->tmr0_ena);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owner = %u\n",
+		  info->tmr1_owner);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owned = %u\n",
+		  info->tmr1_owned);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_ena = %u\n",
+		  info->tmr1_ena);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 ena_ports = %u\n",
+		  info->ena_ports);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr_own_map = %u\n",
+		  info->tmr_own_map);
+}
+
 /**
  * ice_parse_fdir_dev_caps - Parse ICE_AQC_CAPS_FD device caps
  * @hw: pointer to the HW struct
@@ -2291,6 +2387,9 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
 		case ICE_AQC_CAPS_VSI:
 			ice_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]);
 			break;
+		case ICE_AQC_CAPS_1588:
+			ice_parse_1588_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
 		case  ICE_AQC_CAPS_FD:
 			ice_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]);
 			break;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 4a40e2b3732a..0e5d8d52728b 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -265,6 +265,7 @@ struct ice_hw_common_caps {
 	u8 rss_table_entry_width;	/* RSS Entry width in bits */
 
 	u8 dcb;
+	u8 ieee_1588;
 	u8 rdma;
 
 	bool nvm_update_pending_nvm;
@@ -277,6 +278,54 @@ struct ice_hw_common_caps {
 #define ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT	BIT(3)
 };
 
+/* IEEE 1588 TIME_SYNC specific info */
+/* Function specific definitions */
+#define ICE_TS_FUNC_ENA_M		BIT(0)
+#define ICE_TS_SRC_TMR_OWND_M		BIT(1)
+#define ICE_TS_TMR_ENA_M		BIT(2)
+#define ICE_TS_TMR_IDX_OWND_S		4
+#define ICE_TS_TMR_IDX_OWND_M		BIT(4)
+#define ICE_TS_CLK_FREQ_S		16
+#define ICE_TS_CLK_FREQ_M		ICE_M(0x7, ICE_TS_CLK_FREQ_S)
+#define ICE_TS_CLK_SRC_S		20
+#define ICE_TS_CLK_SRC_M		BIT(20)
+#define ICE_TS_TMR_IDX_ASSOC_S		24
+#define ICE_TS_TMR_IDX_ASSOC_M		BIT(24)
+
+struct ice_ts_func_info {
+	/* Function specific info */
+	u32 clk_freq;
+	u8 clk_src;
+	u8 tmr_index_assoc;
+	u8 ena;
+	u8 tmr_index_owned;
+	u8 src_tmr_owned;
+	u8 tmr_ena;
+};
+
+/* Device specific definitions */
+#define ICE_TS_TMR0_OWNR_M		0x7
+#define ICE_TS_TMR0_OWND_M		BIT(3)
+#define ICE_TS_TMR1_OWNR_S		4
+#define ICE_TS_TMR1_OWNR_M		ICE_M(0x7, ICE_TS_TMR1_OWNR_S)
+#define ICE_TS_TMR1_OWND_M		BIT(7)
+#define ICE_TS_DEV_ENA_M		BIT(24)
+#define ICE_TS_TMR0_ENA_M		BIT(25)
+#define ICE_TS_TMR1_ENA_M		BIT(26)
+
+struct ice_ts_dev_info {
+	/* Device specific info */
+	u32 ena_ports;
+	u32 tmr_own_map;
+	u32 tmr0_owner;
+	u32 tmr1_owner;
+	u8 tmr0_owned;
+	u8 tmr1_owned;
+	u8 ena;
+	u8 tmr0_ena;
+	u8 tmr1_ena;
+};
+
 /* Function specific capabilities */
 struct ice_hw_func_caps {
 	struct ice_hw_common_caps common_cap;
@@ -285,6 +334,7 @@ struct ice_hw_func_caps {
 	u32 guar_num_vsi;
 	u32 fd_fltr_guar;		/* Number of filters guaranteed */
 	u32 fd_fltr_best_effort;	/* Number of best effort filters */
+	struct ice_ts_func_info ts_func_info;
 };
 
 /* Device wide capabilities */
@@ -293,6 +343,7 @@ struct ice_hw_dev_caps {
 	u32 num_vfs_exposed;		/* Total number of VFs exposed */
 	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
 	u32 num_flow_director_fltr;	/* Number of FD filters available */
+	struct ice_ts_dev_info ts_dev_info;
 	u32 num_funcs;
 };
 

From 7f9ab54d314456884209f088aeaaf24e14d9ddf4 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:48 -0700
Subject: [PATCH 1411/3292] ice: add support for set/get of driver-stored
 firmware parameters

Depending on the device configuration, the ice hardware may share the
PTP hardware clock timer between multiple PFs. Each PF is informed by
firmware during initialization of the PTP timer association.

When bringing up PTP, only the PFs which own the timer shall allocate
a PTP hardware clock. Other PFs associated with that timer must report
the correct PTP clock index in order to allow userspace software the
ability to know which ports are connected to the same clock.

To support this, the firmware has driver shared parameters. These
parameters enable one PF to write the clock index into firmware, and
have other PFs read the associated value out. This enables the driver to
have only a single PF allocate and control the device timer registers,
while other PFs associated with that timer can report the correct clock
in the ETHTOOL_GET_TS_INFO report.

Add support for the necessary admin queue commands to enable reading and
writing of the driver shared parameters. This will be used in a future
change to enable sharing the PTP clock index between PF drivers.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   | 27 +++++++
 drivers/net/ethernet/intel/ice/ice_common.c   | 75 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |  6 ++
 3 files changed, 108 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index f10c1b8555a4..21b4c7cd6f05 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1853,6 +1853,30 @@ struct ice_aqc_get_pkg_info_resp {
 	struct ice_aqc_get_pkg_info pkg_info[];
 };
 
+/* Driver Shared Parameters (direct, 0x0C90) */
+struct ice_aqc_driver_shared_params {
+	u8 set_or_get_op;
+#define ICE_AQC_DRIVER_PARAM_OP_MASK		BIT(0)
+#define ICE_AQC_DRIVER_PARAM_SET		0
+#define ICE_AQC_DRIVER_PARAM_GET		1
+	u8 param_indx;
+#define ICE_AQC_DRIVER_PARAM_MAX_IDX		15
+	u8 rsvd[2];
+	__le32 param_val;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+enum ice_aqc_driver_params {
+	/* OS clock index for PTP timer Domain 0 */
+	ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0 = 0,
+	/* OS clock index for PTP timer Domain 1 */
+	ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1,
+
+	/* Add new parameters above */
+	ICE_AQC_DRIVER_PARAM_MAX = 16,
+};
+
 /* Lan Queue Overflow Event (direct, 0x1001) */
 struct ice_aqc_event_lan_overflow {
 	__le32 prtdcb_ruptq;
@@ -1930,6 +1954,7 @@ struct ice_aq_desc {
 		struct ice_aqc_fw_logging fw_logging;
 		struct ice_aqc_get_clear_fw_log get_clear_fw_log;
 		struct ice_aqc_download_pkg download_pkg;
+		struct ice_aqc_driver_shared_params drv_shared_params;
 		struct ice_aqc_set_mac_lb set_mac_lb;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
 		struct ice_aqc_set_mac_cfg set_mac_cfg;
@@ -2083,6 +2108,8 @@ enum ice_adminq_opc {
 	ice_aqc_opc_update_pkg				= 0x0C42,
 	ice_aqc_opc_get_pkg_info_list			= 0x0C43,
 
+	ice_aqc_opc_driver_shared_params		= 0x0C90,
+
 	/* Standalone Commands/Events */
 	ice_aqc_opc_event_lan_overflow			= 0x1001,
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index e9eb48bd4b1e..39c1ed628be7 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4680,6 +4680,81 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
 	return status;
 }
 
+/**
+ * ice_aq_set_driver_param - Set driver parameter to share via firmware
+ * @hw: pointer to the HW struct
+ * @idx: parameter index to set
+ * @value: the value to set the parameter to
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the value of one of the software defined parameters. All PFs connected
+ * to this device can read the value using ice_aq_get_driver_param.
+ *
+ * Note that firmware provides no synchronization or locking, and will not
+ * save the parameter value during a device reset. It is expected that
+ * a single PF will write the parameter value, while all other PFs will only
+ * read it.
+ */
+int
+ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 value, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_driver_shared_params *cmd;
+	struct ice_aq_desc desc;
+
+	if (idx >= ICE_AQC_DRIVER_PARAM_MAX)
+		return -EIO;
+
+	cmd = &desc.params.drv_shared_params;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params);
+
+	cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_SET;
+	cmd->param_indx = idx;
+	cmd->param_val = cpu_to_le32(value);
+
+	return ice_status_to_errno(ice_aq_send_cmd(hw, &desc, NULL, 0, cd));
+}
+
+/**
+ * ice_aq_get_driver_param - Get driver parameter shared via firmware
+ * @hw: pointer to the HW struct
+ * @idx: parameter index to set
+ * @value: storage to return the shared parameter
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the value of one of the software defined parameters.
+ *
+ * Note that firmware provides no synchronization or locking. It is expected
+ * that only a single PF will write a given parameter.
+ */
+int
+ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 *value, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_driver_shared_params *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (idx >= ICE_AQC_DRIVER_PARAM_MAX)
+		return -EIO;
+
+	cmd = &desc.params.drv_shared_params;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params);
+
+	cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_GET;
+	cmd->param_indx = idx;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (status)
+		return ice_status_to_errno(status);
+
+	*value = le32_to_cpu(cmd->param_val);
+
+	return 0;
+}
+
 /**
  * ice_fw_supports_link_override
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 86bc261177d6..8cc0a639c208 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -185,6 +185,12 @@ ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
 enum ice_status
 ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
 		     struct ice_aqc_txsched_elem_data *buf);
+int
+ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 value, struct ice_sq_cd *cd);
+int
+ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 *value, struct ice_sq_cd *cd);
 enum ice_status
 ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
 		    struct ice_sq_cd *cd);

From 03cb4473be92a4207a3d1df25186dafd1a5add4d Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:49 -0700
Subject: [PATCH 1412/3292] ice: add low level PTP clock access functions

Add the ice_ptp_hw.c file and some associated definitions to the ice
driver folder. This file contains basic low level definitions for
functions that interact with the device hardware.

For now, only E810-based devices are supported. The ice hardware
supports 2 major variants which have different PHYs with different
procedures necessary for interacting with the device clock.

Because the device captures timestamps in the PHY, each PHY has its own
internal timer. The timers are synchronized in hardware by first
preparing the source timer and the PHY timer shadow registers, and then
issuing a synchronization command. This ensures that both the source
timer and PHY timers are programmed simultaneously. The timers
themselves are all driven from the same oscillator source.

The functions in ice_ptp_hw.c abstract over the differences between how
the PHYs in E810 are programmed vs how the PHYs in E822 devices are
programmed. This series only implements E810 support, but E822 support
will be added in a future change.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |  17 +
 drivers/net/ethernet/intel/ice/ice_ptp_hw.c   | 653 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_ptp_hw.h   |  79 +++
 drivers/net/ethernet/intel/ice/ice_type.h     |   9 +
 include/linux/kernel.h                        |  12 +
 5 files changed, 770 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_ptp_hw.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_ptp_hw.h

diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 84d5d43fe029..f6f5ced50be2 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -433,6 +433,23 @@
 #define GLV_UPRCL(_i)				(0x003B2000 + ((_i) * 8))
 #define GLV_UPTCL(_i)				(0x0030A000 + ((_i) * 8))
 #define PRTRPB_RDPC				0x000AC260
+#define GLTSYN_CMD				0x00088810
+#define GLTSYN_CMD_SYNC				0x00088814
+#define GLTSYN_ENA(_i)				(0x00088808 + ((_i) * 4))
+#define GLTSYN_ENA_TSYN_ENA_M			BIT(0)
+#define GLTSYN_INCVAL_H(_i)			(0x00088920 + ((_i) * 4))
+#define GLTSYN_INCVAL_L(_i)			(0x00088918 + ((_i) * 4))
+#define GLTSYN_SHADJ_H(_i)			(0x00088910 + ((_i) * 4))
+#define GLTSYN_SHADJ_L(_i)			(0x00088908 + ((_i) * 4))
+#define GLTSYN_SHTIME_0(_i)			(0x000888E0 + ((_i) * 4))
+#define GLTSYN_SHTIME_H(_i)			(0x000888F0 + ((_i) * 4))
+#define GLTSYN_SHTIME_L(_i)			(0x000888E8 + ((_i) * 4))
+#define GLTSYN_STAT(_i)				(0x000888C0 + ((_i) * 4))
+#define GLTSYN_SYNC_DLAY			0x00088818
+#define GLTSYN_TIME_H(_i)			(0x000888D8 + ((_i) * 4))
+#define GLTSYN_TIME_L(_i)			(0x000888D0 + ((_i) * 4))
+#define PFTSYN_SEM				0x00088880
+#define PFTSYN_SEM_BUSY_M			BIT(0)
 #define VSIQF_FD_CNT(_VSI)			(0x00464000 + ((_VSI) * 4))
 #define VSIQF_FD_CNT_FD_GCNT_S			0
 #define VSIQF_FD_CNT_FD_GCNT_M			ICE_M(0x3FFF, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
new file mode 100644
index 000000000000..267312fad59a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -0,0 +1,653 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_ptp_hw.h"
+
+/* Low level functions for interacting with and managing the device clock used
+ * for the Precision Time Protocol.
+ *
+ * The ice hardware represents the current time using three registers:
+ *
+ *    GLTSYN_TIME_H     GLTSYN_TIME_L     GLTSYN_TIME_R
+ *  +---------------+ +---------------+ +---------------+
+ *  |    32 bits    | |    32 bits    | |    32 bits    |
+ *  +---------------+ +---------------+ +---------------+
+ *
+ * The registers are incremented every clock tick using a 40bit increment
+ * value defined over two registers:
+ *
+ *                     GLTSYN_INCVAL_H   GLTSYN_INCVAL_L
+ *                    +---------------+ +---------------+
+ *                    |    8 bit s    | |    32 bits    |
+ *                    +---------------+ +---------------+
+ *
+ * The increment value is added to the GLSTYN_TIME_R and GLSTYN_TIME_L
+ * registers every clock source tick. Depending on the specific device
+ * configuration, the clock source frequency could be one of a number of
+ * values.
+ *
+ * For E810 devices, the increment frequency is 812.5 MHz
+ *
+ * The hardware captures timestamps in the PHY for incoming packets, and for
+ * outgoing packets on request. To support this, the PHY maintains a timer
+ * that matches the lower 64 bits of the global source timer.
+ *
+ * In order to ensure that the PHY timers and the source timer are equivalent,
+ * shadow registers are used to prepare the desired initial values. A special
+ * sync command is issued to trigger copying from the shadow registers into
+ * the appropriate source and PHY registers simultaneously.
+ */
+
+/**
+ * ice_get_ptp_src_clock_index - determine source clock index
+ * @hw: pointer to HW struct
+ *
+ * Determine the source clock index currently in use, based on device
+ * capabilities reported during initialization.
+ */
+u8 ice_get_ptp_src_clock_index(struct ice_hw *hw)
+{
+	return hw->func_caps.ts_func_info.tmr_index_assoc;
+}
+
+/* E810 functions
+ *
+ * The following functions operate on the E810 series devices which use
+ * a separate external PHY.
+ */
+
+/**
+ * ice_read_phy_reg_e810 - Read register from external PHY on E810
+ * @hw: pointer to the HW struct
+ * @addr: the address to read from
+ * @val: On return, the value read from the PHY
+ *
+ * Read a register from the external PHY on the E810 device.
+ */
+static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int status;
+
+	msg.msg_addr_low = lower_16_bits(addr);
+	msg.msg_addr_high = upper_16_bits(addr);
+	msg.opcode = ice_sbq_msg_rd;
+	msg.dest_dev = rmn_0;
+
+	status = ice_sbq_rw_reg(hw, &msg);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, status %d\n",
+			  status);
+		return status;
+	}
+
+	*val = msg.data;
+
+	return 0;
+}
+
+/**
+ * ice_write_phy_reg_e810 - Write register on external PHY on E810
+ * @hw: pointer to the HW struct
+ * @addr: the address to writem to
+ * @val: the value to write to the PHY
+ *
+ * Write a value to a register of the external PHY on the E810 device.
+ */
+static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int status;
+
+	msg.msg_addr_low = lower_16_bits(addr);
+	msg.msg_addr_high = upper_16_bits(addr);
+	msg.opcode = ice_sbq_msg_wr;
+	msg.dest_dev = rmn_0;
+	msg.data = val;
+
+	status = ice_sbq_rw_reg(hw, &msg);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, status %d\n",
+			  status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_read_phy_tstamp_e810 - Read a PHY timestamp out of the external PHY
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the timestamp block of the external PHY
+ * on the E810 device.
+ */
+static int
+ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp)
+{
+	u32 lo_addr, hi_addr, lo, hi;
+	int status;
+
+	lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
+	hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
+
+	status = ice_read_phy_reg_e810(hw, lo_addr, &lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_read_phy_reg_e810(hw, hi_addr, &hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, status %d\n",
+			  status);
+		return status;
+	}
+
+	/* For E810 devices, the timestamp is reported with the lower 32 bits
+	 * in the low register, and the upper 8 bits in the high register.
+	 */
+	*tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M);
+
+	return 0;
+}
+
+/**
+ * ice_clear_phy_tstamp_e810 - Clear a timestamp from the external PHY
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the timestamp block of the
+ * external PHY on the E810 device.
+ */
+static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx)
+{
+	u32 lo_addr, hi_addr;
+	int status;
+
+	lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
+	hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
+
+	status = ice_write_phy_reg_e810(hw, lo_addr, 0);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_write_phy_reg_e810(hw, hi_addr, 0);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, status %d\n",
+			  status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_init_phy_e810 - Enable PTP function on the external PHY
+ * @hw: pointer to HW struct
+ *
+ * Enable the timesync PTP functionality for the external PHY connected to
+ * this function.
+ */
+int ice_ptp_init_phy_e810(struct ice_hw *hw)
+{
+	int status;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx),
+					GLTSYN_ENA_TSYN_ENA_M);
+	if (status)
+		ice_debug(hw, ICE_DBG_PTP, "PTP failed in ena_phy_time_syn %d\n",
+			  status);
+
+	return status;
+}
+
+/**
+ * ice_ptp_prep_phy_time_e810 - Prepare PHY port with initial time
+ * @hw: Board private structure
+ * @time: Time to initialize the PHY port clock to
+ *
+ * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the
+ * initial clock time. The time will not actually be programmed until the
+ * driver issues an INIT_TIME command.
+ *
+ * The time value is the upper 32 bits of the PHY timer, usually in units of
+ * nominal nanoseconds.
+ */
+static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time)
+{
+	int status;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_0(tmr_idx), 0);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_0, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_L(tmr_idx), time);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_L, status %d\n",
+			  status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_adj_e810 - Prep PHY port for a time adjustment
+ * @hw: pointer to HW struct
+ * @adj: adjustment value to program
+ *
+ * Prepare the PHY port for an atomic adjustment by programming the PHY
+ * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual adjustment
+ * is completed by issuing an ADJ_TIME sync command.
+ *
+ * The adjustment value only contains the portion used for the upper 32bits of
+ * the PHY timer, usually in units of nominal nanoseconds. Negative
+ * adjustments are supported using 2s complement arithmetic.
+ */
+static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj)
+{
+	int status;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Adjustments are represented as signed 2's complement values in
+	 * nanoseconds. Sub-nanosecond adjustment is not supported.
+	 */
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), 0);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_L, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), adj);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_H, status %d\n",
+			  status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_incval_e810 - Prep PHY port increment value change
+ * @hw: pointer to HW struct
+ * @incval: The new 40bit increment value to prepare
+ *
+ * Prepare the PHY port for a new increment value by programming the PHY
+ * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual change is
+ * completed by issuing an INIT_INCVAL command.
+ */
+static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval)
+{
+	u32 high, low;
+	int status;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	low = lower_32_bits(incval);
+	high = upper_32_bits(incval);
+
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), low);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write incval to PHY SHADJ_L, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), high);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write incval PHY SHADJ_H, status %d\n",
+			  status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_port_cmd_e810 - Prepare all external PHYs for a timer command
+ * @hw: pointer to HW struct
+ * @cmd: Command to be sent to the port
+ *
+ * Prepare the external PHYs connected to this device for a timer sync
+ * command.
+ */
+static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+	u32 cmd_val, val;
+	int status;
+
+	switch (cmd) {
+	case INIT_TIME:
+		cmd_val = GLTSYN_CMD_INIT_TIME;
+		break;
+	case INIT_INCVAL:
+		cmd_val = GLTSYN_CMD_INIT_INCVAL;
+		break;
+	case ADJ_TIME:
+		cmd_val = GLTSYN_CMD_ADJ_TIME;
+		break;
+	case READ_TIME:
+		cmd_val = GLTSYN_CMD_READ_TIME;
+		break;
+	case ADJ_TIME_AT_TIME:
+		cmd_val = GLTSYN_CMD_ADJ_INIT_TIME;
+		break;
+	}
+
+	/* Read, modify, write */
+	status = ice_read_phy_reg_e810(hw, ETH_GLTSYN_CMD, &val);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read GLTSYN_CMD, status %d\n", status);
+		return status;
+	}
+
+	/* Modify necessary bits only and perform write */
+	val &= ~TS_CMD_MASK_E810;
+	val |= cmd_val;
+
+	status = ice_write_phy_reg_e810(hw, ETH_GLTSYN_CMD, val);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write back GLTSYN_CMD, status %d\n", status);
+		return status;
+	}
+
+	return 0;
+}
+
+/* Device agnostic functions
+ *
+ * The following functions implement useful behavior to hide the differences
+ * between E810 and other devices. They call the device-specific
+ * implementations where necessary.
+ *
+ * Currently, the driver only supports E810, but future work will enable
+ * support for E822-based devices.
+ */
+
+/**
+ * ice_ptp_lock - Acquire PTP global semaphore register lock
+ * @hw: pointer to the HW struct
+ *
+ * Acquire the global PTP hardware semaphore lock. Returns true if the lock
+ * was acquired, false otherwise.
+ *
+ * The PFTSYN_SEM register sets the busy bit on read, returning the previous
+ * value. If software sees the busy bit cleared, this means that this function
+ * acquired the lock (and the busy bit is now set). If software sees the busy
+ * bit set, it means that another function acquired the lock.
+ *
+ * Software must clear the busy bit with a write to release the lock for other
+ * functions when done.
+ */
+bool ice_ptp_lock(struct ice_hw *hw)
+{
+	u32 hw_lock;
+	int i;
+
+#define MAX_TRIES 5
+
+	for (i = 0; i < MAX_TRIES; i++) {
+		hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+		hw_lock = hw_lock & PFTSYN_SEM_BUSY_M;
+		if (hw_lock) {
+			/* Somebody is holding the lock */
+			usleep_range(10000, 20000);
+			continue;
+		} else {
+			break;
+		}
+	}
+
+	return !hw_lock;
+}
+
+/**
+ * ice_ptp_unlock - Release PTP global semaphore register lock
+ * @hw: pointer to the HW struct
+ *
+ * Release the global PTP hardware semaphore lock. This is done by writing to
+ * the PFTSYN_SEM register.
+ */
+void ice_ptp_unlock(struct ice_hw *hw)
+{
+	wr32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), 0);
+}
+
+/**
+ * ice_ptp_src_cmd - Prepare source timer for a timer command
+ * @hw: pointer to HW structure
+ * @cmd: Timer command
+ *
+ * Prepare the source timer for an upcoming timer sync command.
+ */
+static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+	u32 cmd_val;
+	u8 tmr_idx;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+	cmd_val = tmr_idx << SEL_CPK_SRC;
+
+	switch (cmd) {
+	case INIT_TIME:
+		cmd_val |= GLTSYN_CMD_INIT_TIME;
+		break;
+	case INIT_INCVAL:
+		cmd_val |= GLTSYN_CMD_INIT_INCVAL;
+		break;
+	case ADJ_TIME:
+		cmd_val |= GLTSYN_CMD_ADJ_TIME;
+		break;
+	case ADJ_TIME_AT_TIME:
+		cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME;
+		break;
+	case READ_TIME:
+		cmd_val |= GLTSYN_CMD_READ_TIME;
+		break;
+	}
+
+	wr32(hw, GLTSYN_CMD, cmd_val);
+}
+
+/**
+ * ice_ptp_tmr_cmd - Prepare and trigger a timer sync command
+ * @hw: pointer to HW struct
+ * @cmd: the command to issue
+ *
+ * Prepare the source timer and PHY timers and then trigger the requested
+ * command. This causes the shadow registers previously written in preparation
+ * for the command to be synchronously applied to both the source and PHY
+ * timers.
+ */
+static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+	int status;
+
+	/* First, prepare the source timer */
+	ice_ptp_src_cmd(hw, cmd);
+
+	/* Next, prepare the ports */
+	status = ice_ptp_port_cmd_e810(hw, cmd);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, status %d\n",
+			  cmd, status);
+		return status;
+	}
+
+	/* Write the sync command register to drive both source and PHY timer commands
+	 * synchronously
+	 */
+	wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_init_time - Initialize device time to provided value
+ * @hw: pointer to HW struct
+ * @time: 64bits of time (GLTSYN_TIME_L and GLTSYN_TIME_H)
+ *
+ * Initialize the device to the specified time provided. This requires a three
+ * step process:
+ *
+ * 1) write the new init time to the source timer shadow registers
+ * 2) write the new init time to the PHY timer shadow registers
+ * 3) issue an init_time timer command to synchronously switch both the source
+ *    and port timers to the new init time value at the next clock cycle.
+ */
+int ice_ptp_init_time(struct ice_hw *hw, u64 time)
+{
+	int status;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Source timers */
+	wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time));
+	wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time));
+	wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0);
+
+	/* PHY timers */
+	/* Fill Rx and Tx ports and send msg to PHY */
+	status = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF);
+	if (status)
+		return status;
+
+	return ice_ptp_tmr_cmd(hw, INIT_TIME);
+}
+
+/**
+ * ice_ptp_write_incval - Program PHC with new increment value
+ * @hw: pointer to HW struct
+ * @incval: Source timer increment value per clock cycle
+ *
+ * Program the PHC with a new increment value. This requires a three-step
+ * process:
+ *
+ * 1) Write the increment value to the source timer shadow registers
+ * 2) Write the increment value to the PHY timer shadow registers
+ * 3) Issue an INIT_INCVAL timer command to synchronously switch both the
+ *    source and port timers to the new increment value at the next clock
+ *    cycle.
+ */
+int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
+{
+	int status;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Shadow Adjust */
+	wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval));
+	wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval));
+
+	status = ice_ptp_prep_phy_incval_e810(hw, incval);
+	if (status)
+		return status;
+
+	return ice_ptp_tmr_cmd(hw, INIT_INCVAL);
+}
+
+/**
+ * ice_ptp_write_incval_locked - Program new incval while holding semaphore
+ * @hw: pointer to HW struct
+ * @incval: Source timer increment value per clock cycle
+ *
+ * Program a new PHC incval while holding the PTP semaphore.
+ */
+int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval)
+{
+	int status;
+
+	if (!ice_ptp_lock(hw))
+		return -EBUSY;
+
+	status = ice_ptp_write_incval(hw, incval);
+
+	ice_ptp_unlock(hw);
+
+	return status;
+}
+
+/**
+ * ice_ptp_adj_clock - Adjust PHC clock time atomically
+ * @hw: pointer to HW struct
+ * @adj: Adjustment in nanoseconds
+ *
+ * Perform an atomic adjustment of the PHC time by the specified number of
+ * nanoseconds. This requires a three-step process:
+ *
+ * 1) Write the adjustment to the source timer shadow registers
+ * 2) Write the adjustment to the PHY timer shadow registers
+ * 3) Issue an ADJ_TIME timer command to synchronously apply the adjustment to
+ *    both the source and port timers at the next clock cycle.
+ */
+int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
+{
+	int status;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Write the desired clock adjustment into the GLTSYN_SHADJ register.
+	 * For an ADJ_TIME command, this set of registers represents the value
+	 * to add to the clock time. It supports subtraction by interpreting
+	 * the value as a 2's complement integer.
+	 */
+	wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0);
+	wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj);
+
+	status = ice_ptp_prep_phy_adj_e810(hw, adj);
+	if (status)
+		return status;
+
+	return ice_ptp_tmr_cmd(hw, ADJ_TIME);
+}
+
+/**
+ * ice_read_phy_tstamp - Read a PHY timestamp from the timestamo block
+ * @hw: pointer to the HW struct
+ * @block: the block to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the timestamp block.
+ */
+int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
+{
+	return ice_read_phy_tstamp_e810(hw, block, idx, tstamp);
+}
+
+/**
+ * ice_clear_phy_tstamp - Clear a timestamp from the timestamp block
+ * @hw: pointer to the HW struct
+ * @block: the block to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the timestamp block.
+ */
+int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
+{
+	return ice_clear_phy_tstamp_e810(hw, block, idx);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
new file mode 100644
index 000000000000..55a414e87018
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_HW_H_
+#define _ICE_PTP_HW_H_
+
+enum ice_ptp_tmr_cmd {
+	INIT_TIME,
+	INIT_INCVAL,
+	ADJ_TIME,
+	ADJ_TIME_AT_TIME,
+	READ_TIME
+};
+
+/* Increment value to generate nanoseconds in the GLTSYN_TIME_L register for
+ * the E810 devices. Based off of a PLL with an 812.5 MHz frequency.
+ */
+#define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL
+
+/* Device agnostic functions */
+u8 ice_get_ptp_src_clock_index(struct ice_hw *hw);
+bool ice_ptp_lock(struct ice_hw *hw);
+void ice_ptp_unlock(struct ice_hw *hw);
+int ice_ptp_init_time(struct ice_hw *hw, u64 time);
+int ice_ptp_write_incval(struct ice_hw *hw, u64 incval);
+int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval);
+int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj);
+int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp);
+int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx);
+
+/* E810 family functions */
+int ice_ptp_init_phy_e810(struct ice_hw *hw);
+
+#define PFTSYN_SEM_BYTES	4
+
+/* PHY timer commands */
+#define SEL_CPK_SRC	8
+
+/* Time Sync command Definitions */
+#define GLTSYN_CMD_INIT_TIME		BIT(0)
+#define GLTSYN_CMD_INIT_INCVAL		BIT(1)
+#define GLTSYN_CMD_ADJ_TIME		BIT(2)
+#define GLTSYN_CMD_ADJ_INIT_TIME	(BIT(2) | BIT(3))
+#define GLTSYN_CMD_READ_TIME		BIT(7)
+
+#define TS_CMD_MASK_E810		0xFF
+#define SYNC_EXEC_CMD			0x3
+
+/* E810 timesync enable register */
+#define ETH_GLTSYN_ENA(_i)		(0x03000348 + ((_i) * 4))
+
+/* E810 shadow init time registers */
+#define ETH_GLTSYN_SHTIME_0(i)		(0x03000368 + ((i) * 32))
+#define ETH_GLTSYN_SHTIME_L(i)		(0x0300036C + ((i) * 32))
+
+/* E810 shadow time adjust registers */
+#define ETH_GLTSYN_SHADJ_L(_i)		(0x03000378 + ((_i) * 32))
+#define ETH_GLTSYN_SHADJ_H(_i)		(0x0300037C + ((_i) * 32))
+
+/* E810 timer command register */
+#define ETH_GLTSYN_CMD			0x03000344
+
+/* Source timer incval macros */
+#define INCVAL_HIGH_M			0xFF
+
+/* Timestamp block macros */
+#define TS_LOW_M			0xFFFFFFFF
+#define TS_HIGH_S			32
+
+#define BYTES_PER_IDX_ADDR_L_U		8
+
+/* External PHY timestamp address */
+#define TS_EXT(a, port, idx) ((a) + (0x1000 * (port)) +			\
+				 ((idx) * BYTES_PER_IDX_ADDR_L_U))
+
+#define LOW_TX_MEMORY_BANK_START	0x03090000
+#define HIGH_TX_MEMORY_BANK_START	0x03090004
+
+#endif /* _ICE_PTP_HW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 0e5d8d52728b..d33d1906103c 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -49,6 +49,7 @@ static inline u32 ice_round_to_num(u32 N, u32 R)
 #define ICE_DBG_RDMA		BIT_ULL(15)
 #define ICE_DBG_PKG		BIT_ULL(16)
 #define ICE_DBG_RES		BIT_ULL(17)
+#define ICE_DBG_PTP		BIT_ULL(19)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
 #define ICE_DBG_AQ_DESC		BIT_ULL(25)
 #define ICE_DBG_AQ_DESC_BUF	BIT_ULL(26)
@@ -842,6 +843,14 @@ struct ice_hw {
 
 	u8 ucast_shared;	/* true if VSIs can share unicast addr */
 
+#define ICE_PHY_PER_NAC		1
+#define ICE_MAX_QUAD		2
+#define ICE_NUM_QUAD_TYPE	2
+#define ICE_PORTS_PER_QUAD	4
+#define ICE_PHY_0_LAST_QUAD	1
+#define ICE_PORTS_PER_PHY	8
+#define ICE_NUM_EXTERNAL_PORTS		ICE_PORTS_PER_PHY
+
 	/* Active package version (currently active) */
 	struct ice_pkg_ver active_pkg_ver;
 	u32 active_track_id;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 15d8bad3d2f2..e73f3bc3dba5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -71,6 +71,18 @@
  */
 #define lower_32_bits(n) ((u32)((n) & 0xffffffff))
 
+/**
+ * upper_16_bits - return bits 16-31 of a number
+ * @n: the number we're accessing
+ */
+#define upper_16_bits(n) ((u16)((n) >> 16))
+
+/**
+ * lower_16_bits - return bits 0-15 of a number
+ * @n: the number we're accessing
+ */
+#define lower_16_bits(n) ((u16)((n) & 0xffff))
+
 struct completion;
 struct pt_regs;
 struct user;

From 06c16d89d2cbe284c2792caa7648c052c1b91042 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:50 -0700
Subject: [PATCH 1413/3292] ice: register 1588 PTP clock device object for E810
 devices

Add a new ice_ptp.c file for holding the basic PTP clock interface
functions. If the device supports PTP, call the new ice_ptp_init and
ice_ptp_release functions where appropriate.

If the function owns the hardware resource associated with the PTP
hardware clock, register with the PTP_1588_CLOCK infrastructure to
allocate a new clock object that represents the device hardware clock.

Implement basic functionality for reading and setting the clock time,
performing clock adjustments, and adjusting the clock frequency.

Future changes will introduce functionality for handling related
features including Tx and Rx timestamps.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/Kconfig          |   1 +
 drivers/net/ethernet/intel/ice/Makefile     |   1 +
 drivers/net/ethernet/intel/ice/ice.h        |   4 +
 drivers/net/ethernet/intel/ice/ice_common.c |  11 +
 drivers/net/ethernet/intel/ice/ice_common.h |   1 +
 drivers/net/ethernet/intel/ice/ice_main.c   |  21 +
 drivers/net/ethernet/intel/ice/ice_ptp.c    | 438 ++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_ptp.h    |  36 ++
 8 files changed, 513 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_ptp.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_ptp.h

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index eae1b42e48db..82744a7501c7 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -299,6 +299,7 @@ config ICE
 	select DIMLIB
 	select NET_DEVLINK
 	select PLDMFW
+	imply PTP_1588_CLOCK
 	help
 	  This driver supports Intel(R) Ethernet Connection E800 Series of
 	  devices.  For more information on how to identify your adapter, go
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index dfb64fb504a2..4f538cdf42c1 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -29,6 +29,7 @@ ice-y := ice_main.o	\
 	 ice_ethtool.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o
+ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o
 ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
 ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
 ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index b745e250ced9..a450343fbb92 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -59,6 +59,7 @@
 #include "ice_idc_int.h"
 #include "ice_virtchnl_pf.h"
 #include "ice_sriov.h"
+#include "ice_ptp.h"
 #include "ice_fdir.h"
 #include "ice_xsk.h"
 #include "ice_arfs.h"
@@ -389,6 +390,8 @@ enum ice_pf_flags {
 	ICE_FLAG_DCB_CAPABLE,
 	ICE_FLAG_DCB_ENA,
 	ICE_FLAG_FD_ENA,
+	ICE_FLAG_PTP_SUPPORTED,		/* PTP is supported by NVM */
+	ICE_FLAG_PTP,			/* PTP is enabled by software */
 	ICE_FLAG_AUX_ENA,
 	ICE_FLAG_ADV_FEATURES,
 	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
@@ -451,6 +454,7 @@ struct ice_pf {
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
 	struct mutex tc_mutex;		/* lock to protect TC changes */
 	u32 msg_enable;
+	struct ice_ptp ptp;
 	u16 num_rdma_msix;		/* Total MSIX vectors for RDMA driver */
 	u16 rdma_base_vector;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 39c1ed628be7..2fb81e359cdf 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -58,6 +58,17 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
 	return 0;
 }
 
+/**
+ * ice_is_e810
+ * @hw: pointer to the hardware structure
+ *
+ * returns true if the device is E810 based, false if not.
+ */
+bool ice_is_e810(struct ice_hw *hw)
+{
+	return hw->mac_type == ICE_MAC_E810;
+}
+
 /**
  * ice_clear_pf_cfg - Clear PF configuration
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 8cc0a639c208..fb16070f02e2 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -99,6 +99,7 @@ ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
 enum ice_status
 ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
 			struct ice_sq_cd *cd);
+bool ice_is_e810(struct ice_hw *hw);
 enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
 enum ice_status
 ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4b6911955f7c..611f111daa8f 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -471,6 +471,9 @@ ice_prepare_for_reset(struct ice_pf *pf)
 	/* disable the VSIs and their queues that are not already DOWN */
 	ice_pf_dis_all_vsi(pf, false);
 
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_release(pf);
+
 	if (hw->port_info)
 		ice_sched_clear_port(hw->port_info);
 
@@ -3364,6 +3367,9 @@ static void ice_deinit_pf(struct ice_pf *pf)
 		bitmap_free(pf->avail_rxqs);
 		pf->avail_rxqs = NULL;
 	}
+
+	if (pf->ptp.clock)
+		ptp_clock_unregister(pf->ptp.clock);
 }
 
 /**
@@ -3410,6 +3416,10 @@ static void ice_set_pf_caps(struct ice_pf *pf)
 				       func_caps->fd_fltr_best_effort);
 	}
 
+	clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
+	if (func_caps->common_cap.ieee_1588)
+		set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
+
 	pf->max_pf_txqs = func_caps->common_cap.num_txq;
 	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
 }
@@ -4392,6 +4402,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	}
 
 	/* initialize DDP driven features */
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_init(pf);
 
 	/* Note: Flow director init failure is non-fatal to load */
 	if (ice_init_fdir(pf))
@@ -4559,6 +4571,8 @@ static void ice_remove(struct pci_dev *pdev)
 
 	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_deinit_lag(pf);
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_release(pf);
 	if (!ice_is_safe_mode(pf))
 		ice_remove_arfs(pf);
 	ice_setup_mc_magic_wake(pf);
@@ -6350,6 +6364,13 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
 		ice_dcb_rebuild(pf);
 
+	/* If the PF previously had enabled PTP, PTP init needs to happen before
+	 * the VSI rebuild. If not, this causes the PTP link status events to
+	 * fail.
+	 */
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_init(pf);
+
 	/* rebuild PF VSI */
 	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
 	if (err) {
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
new file mode 100644
index 000000000000..4ec4b2352234
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+
+/**
+ * ice_ptp_read_src_clk_reg - Read the source clock register
+ * @pf: Board private structure
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ *       the system clock. Will be ignored if NULL is given.
+ */
+static u64
+ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 hi, lo, lo2;
+	u8 tmr_idx;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+	/* Read the system timestamp pre PHC read */
+	if (sts)
+		ptp_read_system_prets(sts);
+
+	lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+
+	/* Read the system timestamp post PHC read */
+	if (sts)
+		ptp_read_system_postts(sts);
+
+	hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
+	lo2 = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+
+	if (lo2 < lo) {
+		/* if TIME_L rolled over read TIME_L again and update
+		 * system timestamps
+		 */
+		if (sts)
+			ptp_read_system_prets(sts);
+		lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+		if (sts)
+			ptp_read_system_postts(sts);
+		hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
+	}
+
+	return ((u64)hi << 32) | lo;
+}
+
+/**
+ * ice_ptp_read_time - Read the time from the device
+ * @pf: Board private structure
+ * @ts: timespec structure to hold the current time value
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ *       the system clock. Will be ignored if NULL is given.
+ *
+ * This function reads the source clock registers and stores them in a timespec.
+ * However, since the registers are 64 bits of nanoseconds, we must convert the
+ * result to a timespec before we can return.
+ */
+static void
+ice_ptp_read_time(struct ice_pf *pf, struct timespec64 *ts,
+		  struct ptp_system_timestamp *sts)
+{
+	u64 time_ns = ice_ptp_read_src_clk_reg(pf, sts);
+
+	*ts = ns_to_timespec64(time_ns);
+}
+
+/**
+ * ice_ptp_write_init - Set PHC time to provided value
+ * @pf: Board private structure
+ * @ts: timespec structure that holds the new time value
+ *
+ * Set the PHC time to the specified time provided in the timespec.
+ */
+static int ice_ptp_write_init(struct ice_pf *pf, struct timespec64 *ts)
+{
+	u64 ns = timespec64_to_ns(ts);
+	struct ice_hw *hw = &pf->hw;
+
+	return ice_ptp_init_time(hw, ns);
+}
+
+/**
+ * ice_ptp_write_adj - Adjust PHC clock time atomically
+ * @pf: Board private structure
+ * @adj: Adjustment in nanoseconds
+ *
+ * Perform an atomic adjustment of the PHC time by the specified number of
+ * nanoseconds.
+ */
+static int ice_ptp_write_adj(struct ice_pf *pf, s32 adj)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	return ice_ptp_adj_clock(hw, adj);
+}
+
+/**
+ * ice_ptp_adjfine - Adjust clock increment rate
+ * @info: the driver's PTP info structure
+ * @scaled_ppm: Parts per million with 16-bit fractional field
+ *
+ * Adjust the frequency of the clock by the indicated scaled ppm from the
+ * base frequency.
+ */
+static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	u64 freq, divisor = 1000000ULL;
+	struct ice_hw *hw = &pf->hw;
+	s64 incval, diff;
+	int neg_adj = 0;
+	int err;
+
+	incval = ICE_PTP_NOMINAL_INCVAL_E810;
+
+	if (scaled_ppm < 0) {
+		neg_adj = 1;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	while ((u64)scaled_ppm > div_u64(U64_MAX, incval)) {
+		/* handle overflow by scaling down the scaled_ppm and
+		 * the divisor, losing some precision
+		 */
+		scaled_ppm >>= 2;
+		divisor >>= 2;
+	}
+
+	freq = (incval * (u64)scaled_ppm) >> 16;
+	diff = div_u64(freq, divisor);
+
+	if (neg_adj)
+		incval -= diff;
+	else
+		incval += diff;
+
+	err = ice_ptp_write_incval_locked(hw, incval);
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to set incval, err %d\n",
+			err);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_gettimex64 - Get the time of the clock
+ * @info: the driver's PTP info structure
+ * @ts: timespec64 structure to hold the current time value
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ *       the system clock. Will be ignored if NULL is given.
+ *
+ * Read the device clock and return the correct value on ns, after converting it
+ * into a timespec struct.
+ */
+static int
+ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
+		   struct ptp_system_timestamp *sts)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_hw *hw = &pf->hw;
+
+	if (!ice_ptp_lock(hw)) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n");
+		return -EBUSY;
+	}
+
+	ice_ptp_read_time(pf, ts, sts);
+	ice_ptp_unlock(hw);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_settime64 - Set the time of the clock
+ * @info: the driver's PTP info structure
+ * @ts: timespec64 structure that holds the new time value
+ *
+ * Set the device clock to the user input value. The conversion from timespec
+ * to ns happens in the write function.
+ */
+static int
+ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct timespec64 ts64 = *ts;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	if (!ice_ptp_lock(hw)) {
+		err = -EBUSY;
+		goto exit;
+	}
+
+	err = ice_ptp_write_init(pf, &ts64);
+	ice_ptp_unlock(hw);
+
+exit:
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to set time %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_adjtime_nonatomic - Do a non-atomic clock adjustment
+ * @info: the driver's PTP info structure
+ * @delta: Offset in nanoseconds to adjust the time by
+ */
+static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta)
+{
+	struct timespec64 now, then;
+
+	then = ns_to_timespec64(delta);
+	ice_ptp_gettimex64(info, &now, NULL);
+	now = timespec64_add(now, then);
+
+	return ice_ptp_settime64(info, (const struct timespec64 *)&now);
+}
+
+/**
+ * ice_ptp_adjtime - Adjust the time of the clock by the indicated delta
+ * @info: the driver's PTP info structure
+ * @delta: Offset in nanoseconds to adjust the time by
+ */
+static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_hw *hw = &pf->hw;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+
+	/* Hardware only supports atomic adjustments using signed 32-bit
+	 * integers. For any adjustment outside this range, perform
+	 * a non-atomic get->adjust->set flow.
+	 */
+	if (delta > S32_MAX || delta < S32_MIN) {
+		dev_dbg(dev, "delta = %lld, adjtime non-atomic\n", delta);
+		return ice_ptp_adjtime_nonatomic(info, delta);
+	}
+
+	if (!ice_ptp_lock(hw)) {
+		dev_err(dev, "PTP failed to acquire semaphore in adjtime\n");
+		return -EBUSY;
+	}
+
+	err = ice_ptp_write_adj(pf, delta);
+
+	ice_ptp_unlock(hw);
+
+	if (err) {
+		dev_err(dev, "PTP failed to adjust time, err %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_set_caps - Set PTP capabilities
+ * @pf: Board private structure
+ */
+static void ice_ptp_set_caps(struct ice_pf *pf)
+{
+	struct ptp_clock_info *info = &pf->ptp.info;
+	struct device *dev = ice_pf_to_dev(pf);
+
+	snprintf(info->name, sizeof(info->name) - 1, "%s-%s-clk",
+		 dev_driver_string(dev), dev_name(dev));
+	info->owner = THIS_MODULE;
+	info->max_adj = 999999999;
+	info->adjtime = ice_ptp_adjtime;
+	info->adjfine = ice_ptp_adjfine;
+	info->gettimex64 = ice_ptp_gettimex64;
+	info->settime64 = ice_ptp_settime64;
+}
+
+/**
+ * ice_ptp_create_clock - Create PTP clock device for userspace
+ * @pf: Board private structure
+ *
+ * This function creates a new PTP clock device. It only creates one if we
+ * don't already have one. Will return error if it can't create one, but success
+ * if we already have a device. Should be used by ice_ptp_init to create clock
+ * initially, and prevent global resets from creating new clock devices.
+ */
+static long ice_ptp_create_clock(struct ice_pf *pf)
+{
+	struct ptp_clock_info *info;
+	struct ptp_clock *clock;
+	struct device *dev;
+
+	/* No need to create a clock device if we already have one */
+	if (pf->ptp.clock)
+		return 0;
+
+	ice_ptp_set_caps(pf);
+
+	info = &pf->ptp.info;
+	dev = ice_pf_to_dev(pf);
+
+	/* Attempt to register the clock before enabling the hardware. */
+	clock = ptp_clock_register(info, dev);
+	if (IS_ERR(clock))
+		return PTR_ERR(clock);
+
+	pf->ptp.clock = clock;
+
+	return 0;
+}
+
+/**
+ * ice_ptp_init_owner - Initialize PTP_1588_CLOCK device
+ * @pf: Board private structure
+ *
+ * Setup and initialize a PTP clock device that represents the device hardware
+ * clock. Save the clock index for other functions connected to the same
+ * hardware resource.
+ */
+static int ice_ptp_init_owner(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct timespec64 ts;
+	u8 src_idx;
+	int err;
+
+	wr32(hw, GLTSYN_SYNC_DLAY, 0);
+
+	/* Clear some HW residue and enable source clock */
+	src_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Enable source clocks */
+	wr32(hw, GLTSYN_ENA(src_idx), GLTSYN_ENA_TSYN_ENA_M);
+
+	/* Enable PHY time sync */
+	err = ice_ptp_init_phy_e810(hw);
+	if (err)
+		goto err_exit;
+
+	/* Clear event status indications for auxiliary pins */
+	(void)rd32(hw, GLTSYN_STAT(src_idx));
+
+	/* Acquire the global hardware lock */
+	if (!ice_ptp_lock(hw)) {
+		err = -EBUSY;
+		goto err_exit;
+	}
+
+	/* Write the increment time value to PHY and LAN */
+	err = ice_ptp_write_incval(hw, ICE_PTP_NOMINAL_INCVAL_E810);
+	if (err) {
+		ice_ptp_unlock(hw);
+		goto err_exit;
+	}
+
+	ts = ktime_to_timespec64(ktime_get_real());
+	/* Write the initial Time value to PHY and LAN */
+	err = ice_ptp_write_init(pf, &ts);
+	if (err) {
+		ice_ptp_unlock(hw);
+		goto err_exit;
+	}
+
+	/* Release the global hardware lock */
+	ice_ptp_unlock(hw);
+
+	/* Ensure we have a clock device */
+	err = ice_ptp_create_clock(pf);
+	if (err)
+		goto err_clk;
+
+	return 0;
+
+err_clk:
+	pf->ptp.clock = NULL;
+err_exit:
+	dev_err(dev, "PTP failed to register clock, err %d\n", err);
+
+	return err;
+}
+
+/**
+ * ice_ptp_init - Initialize the PTP support after device probe or reset
+ * @pf: Board private structure
+ *
+ * This function sets device up for PTP support. The first time it is run, it
+ * will create a clock device. It does not create a clock device if one
+ * already exists. It also reconfigures the device after a reset.
+ */
+void ice_ptp_init(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	/* PTP is currently only supported on E810 devices */
+	if (!ice_is_e810(hw))
+		return;
+
+	/* Check if this PF owns the source timer */
+	if (hw->func_caps.ts_func_info.src_tmr_owned) {
+		err = ice_ptp_init_owner(pf);
+		if (err)
+			return;
+	}
+
+	set_bit(ICE_FLAG_PTP, pf->flags);
+
+	dev_info(dev, "PTP init successful\n");
+}
+
+/**
+ * ice_ptp_release - Disable the driver/HW support and unregister the clock
+ * @pf: Board private structure
+ *
+ * This function handles the cleanup work required from the initialization by
+ * clearing out the important information and unregistering the clock
+ */
+void ice_ptp_release(struct ice_pf *pf)
+{
+	clear_bit(ICE_FLAG_PTP, pf->flags);
+
+	if (!pf->ptp.clock)
+		return;
+
+	ptp_clock_unregister(pf->ptp.clock);
+	pf->ptp.clock = NULL;
+
+	dev_info(ice_pf_to_dev(pf), "Removed PTP clock\n");
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
new file mode 100644
index 000000000000..15f2e325bd68
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_H_
+#define _ICE_PTP_H_
+
+#include <linux/ptp_clock_kernel.h>
+
+#include "ice_ptp_hw.h"
+
+/**
+ * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK
+ * @info: structure defining PTP hardware capabilities
+ * @clock: pointer to registered PTP clock device
+ */
+struct ice_ptp {
+	struct ptp_clock_info info;
+	struct ptp_clock *clock;
+};
+
+#define __ptp_info_to_ptp(i) \
+	container_of((i), struct ice_ptp, info)
+#define ptp_info_to_pf(i) \
+	container_of(__ptp_info_to_ptp((i)), struct ice_pf, ptp)
+
+#define PTP_SHARED_CLK_IDX_VALID	BIT(31)
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+struct ice_pf;
+void ice_ptp_init(struct ice_pf *pf);
+void ice_ptp_release(struct ice_pf *pf);
+#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+static inline void ice_ptp_init(struct ice_pf *pf) { }
+static inline void ice_ptp_release(struct ice_pf *pf) { }
+#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+#endif /* _ICE_PTP_H_ */

From 67569a7f940130fcfe9041e07a614a5263a9944d Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:51 -0700
Subject: [PATCH 1414/3292] ice: report the PTP clock index in ethtool
 .get_ts_info

Now that the driver registers a PTP clock device that represents the
clock hardware, it is important that the clock index is reported via the
ethtool .get_ts_info callback.

The underlying hardware resource is shared between multiple PF
functions. Only one function owns the hardware resources associated with
a timer, but multiple functions may be associated with it for the
purposes of timestamping.

To support this, the owning PF will store the clock index into the
driver shared parameters buffer in firmware. Other PFs will look up the
clock index by reading the driver shared parameter on demand when
requested via the .get_ts_info ethtool function.

In this way, all functions which are tied to the same timer are able to
report the clock index. Userspace software such as ptp4l performs
a look up on the netdev to determine the associated clock, and all
commands to control or configure the clock will be handled through the
controlling PF.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c |  22 +++-
 drivers/net/ethernet/intel/ice/ice_ptp.c     | 129 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_ptp.h     |   5 +
 3 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 1f30f24648d8..01466b9f29b7 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3195,6 +3195,26 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 	return 0;
 }
 
+static int
+ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+	/* only report timestamping if PTP is enabled */
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return ethtool_op_get_ts_info(dev, info);
+
+	info->so_timestamping = SOF_TIMESTAMPING_SOFTWARE;
+
+	info->phc_index = ice_get_ptp_clock_index(pf);
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF);
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+
+	return 0;
+}
+
 /**
  * ice_get_max_txq - return the maximum number of Tx queues for in a PF
  * @pf: PF structure
@@ -3986,7 +4006,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.set_rxfh		= ice_set_rxfh,
 	.get_channels		= ice_get_channels,
 	.set_channels		= ice_set_channels,
-	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_ts_info		= ice_get_ts_info,
 	.get_per_queue_coalesce	= ice_get_per_q_coalesce,
 	.set_per_queue_coalesce	= ice_set_per_q_coalesce,
 	.get_fecparam		= ice_get_fecparam,
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 4ec4b2352234..82be5846b42f 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -4,6 +4,131 @@
 #include "ice.h"
 #include "ice_lib.h"
 
+/**
+ * ice_get_ptp_clock_index - Get the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Determine the clock index of the PTP clock associated with this device. If
+ * this is the PF controlling the clock, just use the local access to the
+ * clock device pointer.
+ *
+ * Otherwise, read from the driver shared parameters to determine the clock
+ * index value.
+ *
+ * Returns: the index of the PTP clock associated with this device, or -1 if
+ * there is no associated clock.
+ */
+int ice_get_ptp_clock_index(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_aqc_driver_params param_idx;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+	u32 value;
+	int err;
+
+	/* Use the ptp_clock structure if we're the main PF */
+	if (pf->ptp.clock)
+		return ptp_clock_index(pf->ptp.clock);
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+	if (!tmr_idx)
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+	else
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+	err = ice_aq_get_driver_param(hw, param_idx, &value, NULL);
+	if (err) {
+		dev_err(dev, "Failed to read PTP clock index parameter, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		return -1;
+	}
+
+	/* The PTP clock index is an integer, and will be between 0 and
+	 * INT_MAX. The highest bit of the driver shared parameter is used to
+	 * indicate whether or not the currently stored clock index is valid.
+	 */
+	if (!(value & PTP_SHARED_CLK_IDX_VALID))
+		return -1;
+
+	return value & ~PTP_SHARED_CLK_IDX_VALID;
+}
+
+/**
+ * ice_set_ptp_clock_index - Set the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Set the PTP clock index for this device into the shared driver parameters,
+ * so that other PFs associated with this device can read it.
+ *
+ * If the PF is unable to store the clock index, it will log an error, but
+ * will continue operating PTP.
+ */
+static void ice_set_ptp_clock_index(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_aqc_driver_params param_idx;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+	u32 value;
+	int err;
+
+	if (!pf->ptp.clock)
+		return;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+	if (!tmr_idx)
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+	else
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+	value = (u32)ptp_clock_index(pf->ptp.clock);
+	if (value > INT_MAX) {
+		dev_err(dev, "PTP Clock index is too large to store\n");
+		return;
+	}
+	value |= PTP_SHARED_CLK_IDX_VALID;
+
+	err = ice_aq_set_driver_param(hw, param_idx, value, NULL);
+	if (err) {
+		dev_err(dev, "Failed to set PTP clock index parameter, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	}
+}
+
+/**
+ * ice_clear_ptp_clock_index - Clear the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Clear the PTP clock index for this device. Must be called when
+ * unregistering the PTP clock, in order to ensure other PFs stop reporting
+ * a clock object that no longer exists.
+ */
+static void ice_clear_ptp_clock_index(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_aqc_driver_params param_idx;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+	int err;
+
+	/* Do not clear the index if we don't own the timer */
+	if (!hw->func_caps.ts_func_info.src_tmr_owned)
+		return;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+	if (!tmr_idx)
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+	else
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+	err = ice_aq_set_driver_param(hw, param_idx, 0, NULL);
+	if (err) {
+		dev_dbg(dev, "Failed to clear PTP clock index parameter, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	}
+}
+
 /**
  * ice_ptp_read_src_clk_reg - Read the source clock register
  * @pf: Board private structure
@@ -377,6 +502,9 @@ static int ice_ptp_init_owner(struct ice_pf *pf)
 	if (err)
 		goto err_clk;
 
+	/* Store the PTP clock index for other PFs */
+	ice_set_ptp_clock_index(pf);
+
 	return 0;
 
 err_clk:
@@ -431,6 +559,7 @@ void ice_ptp_release(struct ice_pf *pf)
 	if (!pf->ptp.clock)
 		return;
 
+	ice_clear_ptp_clock_index(pf);
 	ptp_clock_unregister(pf->ptp.clock);
 	pf->ptp.clock = NULL;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 15f2e325bd68..01f7db05ef7d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -27,9 +27,14 @@ struct ice_ptp {
 
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 struct ice_pf;
+int ice_get_ptp_clock_index(struct ice_pf *pf);
 void ice_ptp_init(struct ice_pf *pf);
 void ice_ptp_release(struct ice_pf *pf);
 #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+static inline int ice_get_ptp_clock_index(struct ice_pf *pf)
+{
+	return -1;
+}
 static inline void ice_ptp_init(struct ice_pf *pf) { }
 static inline void ice_ptp_release(struct ice_pf *pf) { }
 #endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */

From 77a781155a659053f3b7e81a0ab115d27ff151cd Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:52 -0700
Subject: [PATCH 1415/3292] ice: enable receive hardware timestamping

Add SIOCGHWTSTAMP and SIOCSHWTSTAMP ioctl handlers to respond to
requests to enable timestamping support. If the request is for enabling
Rx timestamps, set a bit in the Rx descriptors to indicate that receive
timestamps should be reported.

Hardware captures receive timestamps in the PHY which only captures part
of the timer, and reports only 40 bits into the Rx descriptor. The upper
32 bits represent the contents of GLTSYN_TIME_L at the point of packet
reception, while the lower 8 bits represent the upper 8 bits of
GLTSYN_TIME_0.

The networking and PTP stack expect 64 bit timestamps in nanoseconds. To
support this, implement some logic to extend the timestamps by using the
full PHC time.

If the Rx timestamp was captured prior to the PHC time, then the real
timestamp is

  PHC - (lower_32_bits(PHC) - timestamp)

If the Rx timestamp was captured after the PHC time, then the real
timestamp is

  PHC + (timestamp - lower_32_bits(PHC))

These calculations are correct as long as neither the PHC timestamp nor
the Rx timestamps are more than 2^32-1 nanseconds old. Further, we can
detect when the Rx timestamp is before or after the PHC as long as the
PHC timestamp is no more than 2^31-1 nanoseconds old.

In that case, we calculate the delta between the lower 32 bits of the
PHC and the Rx timestamp. If it's larger than 2^31-1 then the Rx
timestamp must have been captured in the past. If it's smaller, then the
Rx timestamp must have been captured after PHC time.

Add an ice_ptp_extend_32b_ts function that relies on a cached copy of
the PHC time and implements this algorithm to calculate the proper upper
32bits of the Rx timestamps.

Cache the PHC time periodically in all of the Rx rings. This enables
each Rx ring to simply call the extension function with a recent copy of
the PHC time. By ensuring that the PHC time is kept up to date
periodically, we ensure this algorithm doesn't use stale data and
produce incorrect results.

To cache the time, introduce a kworker and a kwork item to periodically
store the Rx time. It might seem like we should use the .do_aux_work
interface of the PTP clock. This doesn't work because all PFs must cache
this time, but only one PF owns the PTP clock device.

Thus, the ice driver will manage its own kthread instead of relying on
the PTP do_aux_work handler.

With this change, the driver can now report Rx timestamps on all
incoming packets.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c     |   5 +-
 drivers/net/ethernet/intel/ice/ice_ethtool.c  |   7 +-
 drivers/net/ethernet/intel/ice/ice_lib.c      |   8 +-
 drivers/net/ethernet/intel/ice/ice_lib.h      |   3 +-
 drivers/net/ethernet/intel/ice/ice_main.c     |  22 ++
 drivers/net/ethernet/intel/ice/ice_ptp.c      | 337 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_ptp.h      |  29 ++
 drivers/net/ethernet/intel/ice/ice_txrx.h     |   2 +
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c |   3 +
 9 files changed, 410 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 142d660010c6..0b44baf0dcff 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -393,9 +393,10 @@ static int ice_setup_rx_ctx(struct ice_ring *ring)
 	 * of same priority
 	 */
 	if (vsi->type != ICE_VSI_VF)
-		ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3);
+		ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true);
 	else
-		ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3);
+		ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3,
+					false);
 
 	/* Absolute queue number out of 2K needs to be passed */
 	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 01466b9f29b7..38d784742bf3 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3204,13 +3204,16 @@ ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
 	if (!test_bit(ICE_FLAG_PTP, pf->flags))
 		return ethtool_op_get_ts_info(dev, info);
 
-	info->so_timestamping = SOF_TIMESTAMPING_SOFTWARE;
+	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	info->phc_index = ice_get_ptp_clock_index(pf);
 
 	info->tx_types = BIT(HWTSTAMP_TX_OFF);
 
-	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index db36ce9c0b1c..7bb10fa032e1 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1675,9 +1675,11 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
  * @pf_q: index of the Rx queue in the PF's queue space
  * @rxdid: flexible descriptor RXDID
  * @prio: priority for the RXDID for this queue
+ * @ena_ts: true to enable timestamp and false to disable timestamp
  */
 void
-ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio)
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
+			bool ena_ts)
 {
 	int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
 
@@ -1692,6 +1694,10 @@ ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio)
 	regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) &
 		QRXFLXP_CNTXT_RXDID_PRIO_M;
 
+	if (ena_ts)
+		/* Enable TimeSync on this queue */
+		regval |= QRXFLXP_CNTXT_TS_M;
+
 	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 6e2b8c2c8aa0..d5a28bf0fc2c 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -80,7 +80,8 @@ bool ice_is_reset_in_progress(unsigned long *state);
 int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout);
 
 void
-ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio);
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
+			bool ena_ts);
 
 void ice_vsi_dis_irq(struct ice_vsi *vsi);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 611f111daa8f..082e704472be 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6519,6 +6519,27 @@ event_after:
 	return err;
 }
 
+/**
+ * ice_do_ioctl - Access the hwtstamp interface
+ * @netdev: network interface device structure
+ * @ifr: interface request data
+ * @cmd: ioctl command
+ */
+static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		return ice_ptp_get_ts_config(pf, ifr);
+	case SIOCSHWTSTAMP:
+		return ice_ptp_set_ts_config(pf, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 /**
  * ice_aq_str - convert AQ err code to a string
  * @aq_err: the AQ error code to convert
@@ -7169,6 +7190,7 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_change_mtu = ice_change_mtu,
 	.ndo_get_stats64 = ice_get_stats64,
 	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
+	.ndo_do_ioctl = ice_do_ioctl,
 	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
 	.ndo_set_vf_mac = ice_set_vf_mac,
 	.ndo_get_vf_config = ice_get_vf_cfg,
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 82be5846b42f..b22b7a93f6ca 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -4,6 +4,46 @@
 #include "ice.h"
 #include "ice_lib.h"
 
+/**
+ * ice_set_rx_tstamp - Enable or disable Rx timestamping
+ * @pf: The PF pointer to search in
+ * @on: bool value for whether timestamps are enabled or disabled
+ */
+static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
+{
+	struct ice_vsi *vsi;
+	u16 i;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Set the timestamp flag for all the Rx rings */
+	ice_for_each_rxq(vsi, i) {
+		if (!vsi->rx_rings[i])
+			continue;
+		vsi->rx_rings[i]->ptp_rx = on;
+	}
+}
+
+/**
+ * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit
+ * @pf: Board private structure
+ * @ena: bool value to enable or disable time stamp
+ *
+ * This function will configure timestamping during PTP initialization
+ * and deinitialization
+ */
+static void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena)
+{
+	ice_set_rx_tstamp(pf, ena);
+
+	if (ena)
+		pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL;
+	else
+		pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
 /**
  * ice_get_ptp_clock_index - Get the PTP clock index
  * @pf: the PF pointer
@@ -171,6 +211,113 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
 	return ((u64)hi << 32) | lo;
 }
 
+/**
+ * ice_ptp_update_cached_phctime - Update the cached PHC time values
+ * @pf: Board specific private structure
+ *
+ * This function updates the system time values which are cached in the PF
+ * structure and the Rx rings.
+ *
+ * This function must be called periodically to ensure that the cached value
+ * is never more than 2 seconds old. It must also be called whenever the PHC
+ * time has been changed.
+ */
+static void ice_ptp_update_cached_phctime(struct ice_pf *pf)
+{
+	u64 systime;
+	int i;
+
+	/* Read the current PHC time */
+	systime = ice_ptp_read_src_clk_reg(pf, NULL);
+
+	/* Update the cached PHC time stored in the PF structure */
+	WRITE_ONCE(pf->ptp.cached_phc_time, systime);
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+		int j;
+
+		if (!vsi)
+			continue;
+
+		if (vsi->type != ICE_VSI_PF)
+			continue;
+
+		ice_for_each_rxq(vsi, j) {
+			if (!vsi->rx_rings[j])
+				continue;
+			WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
+		}
+	}
+}
+
+/**
+ * ice_ptp_extend_32b_ts - Convert a 32b nanoseconds timestamp to 64b
+ * @cached_phc_time: recently cached copy of PHC time
+ * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value
+ *
+ * Hardware captures timestamps which contain only 32 bits of nominal
+ * nanoseconds, as opposed to the 64bit timestamps that the stack expects.
+ * Note that the captured timestamp values may be 40 bits, but the lower
+ * 8 bits are sub-nanoseconds and generally discarded.
+ *
+ * Extend the 32bit nanosecond timestamp using the following algorithm and
+ * assumptions:
+ *
+ * 1) have a recently cached copy of the PHC time
+ * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1
+ *    seconds) before or after the PHC time was captured.
+ * 3) calculate the delta between the cached time and the timestamp
+ * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was
+ *    captured after the PHC time. In this case, the full timestamp is just
+ *    the cached PHC time plus the delta.
+ * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the
+ *    timestamp was captured *before* the PHC time, i.e. because the PHC
+ *    cache was updated after the timestamp was captured by hardware. In this
+ *    case, the full timestamp is the cached time minus the inverse delta.
+ *
+ * This algorithm works even if the PHC time was updated after a Tx timestamp
+ * was requested, but before the Tx timestamp event was reported from
+ * hardware.
+ *
+ * This calculation primarily relies on keeping the cached PHC time up to
+ * date. If the timestamp was captured more than 2^31 nanoseconds after the
+ * PHC time, it is possible that the lower 32bits of PHC time have
+ * overflowed more than once, and we might generate an incorrect timestamp.
+ *
+ * This is prevented by (a) periodically updating the cached PHC time once
+ * a second, and (b) discarding any Tx timestamp packet if it has waited for
+ * a timestamp for more than one second.
+ */
+static u64 ice_ptp_extend_32b_ts(u64 cached_phc_time, u32 in_tstamp)
+{
+	u32 delta, phc_time_lo;
+	u64 ns;
+
+	/* Extract the lower 32 bits of the PHC time */
+	phc_time_lo = (u32)cached_phc_time;
+
+	/* Calculate the delta between the lower 32bits of the cached PHC
+	 * time and the in_tstamp value
+	 */
+	delta = (in_tstamp - phc_time_lo);
+
+	/* Do not assume that the in_tstamp is always more recent than the
+	 * cached PHC time. If the delta is large, it indicates that the
+	 * in_tstamp was taken in the past, and should be converted
+	 * forward.
+	 */
+	if (delta > (U32_MAX / 2)) {
+		/* reverse the delta calculation here */
+		delta = (phc_time_lo - in_tstamp);
+		ns = cached_phc_time - delta;
+	} else {
+		ns = cached_phc_time + delta;
+	}
+
+	return ns;
+}
+
 /**
  * ice_ptp_read_time - Read the time from the device
  * @pf: Board private structure
@@ -323,6 +470,9 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
 	err = ice_ptp_write_init(pf, &ts64);
 	ice_ptp_unlock(hw);
 
+	if (!err)
+		ice_ptp_update_cached_phctime(pf);
+
 exit:
 	if (err) {
 		dev_err(ice_pf_to_dev(pf), "PTP failed to set time %d\n", err);
@@ -385,9 +535,142 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta)
 		return err;
 	}
 
+	ice_ptp_update_cached_phctime(pf);
+
 	return 0;
 }
 
+/**
+ * ice_ptp_get_ts_config - ioctl interface to read the timestamping config
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Copy the timestamping config to user buffer
+ */
+int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	struct hwtstamp_config *config;
+
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return -EIO;
+
+	config = &pf->ptp.tstamp_config;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * ice_ptp_set_timestamp_mode - Setup driver for requested timestamp mode
+ * @pf: Board private structure
+ * @config: hwtstamp settings requested or saved
+ */
+static int
+ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
+{
+	/* Reserved for future extensions. */
+	if (config->flags)
+		return -EINVAL;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		ice_set_rx_tstamp(pf, false);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		ice_set_rx_tstamp(pf, true);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_set_ts_config - ioctl interface to control the timestamping
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Get the user config and store it
+ */
+int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return -EAGAIN;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = ice_ptp_set_timestamp_mode(pf, &config);
+	if (err)
+		return err;
+
+	/* Save these settings for future reference */
+	pf->ptp.tstamp_config = config;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * ice_ptp_rx_hwtstamp - Check for an Rx timestamp
+ * @rx_ring: Ring to get the VSI info
+ * @rx_desc: Receive descriptor
+ * @skb: Particular skb to send timestamp with
+ *
+ * The driver receives a notification in the receive descriptor with timestamp.
+ * The timestamp is in ns, so we must convert the result first.
+ */
+void
+ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring,
+		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb)
+{
+	u32 ts_high;
+	u64 ts_ns;
+
+	/* Populate timesync data into skb */
+	if (rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID) {
+		struct skb_shared_hwtstamps *hwtstamps;
+
+		/* Use ice_ptp_extend_32b_ts directly, using the ring-specific
+		 * cached PHC value, rather than accessing the PF. This also
+		 * allows us to simply pass the upper 32bits of nanoseconds
+		 * directly. Calling ice_ptp_extend_40b_ts is unnecessary as
+		 * it would just discard these bits itself.
+		 */
+		ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
+		ts_ns = ice_ptp_extend_32b_ts(rx_ring->cached_phctime, ts_high);
+
+		hwtstamps = skb_hwtstamps(skb);
+		memset(hwtstamps, 0, sizeof(*hwtstamps));
+		hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
+	}
+}
+
 /**
  * ice_ptp_set_caps - Set PTP capabilities
  * @pf: Board private structure
@@ -441,6 +724,21 @@ static long ice_ptp_create_clock(struct ice_pf *pf)
 	return 0;
 }
 
+static void ice_ptp_periodic_work(struct kthread_work *work)
+{
+	struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
+	struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
+
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return;
+
+	ice_ptp_update_cached_phctime(pf);
+
+	/* Run twice a second */
+	kthread_queue_delayed_work(ptp->kworker, &ptp->work,
+				   msecs_to_jiffies(500));
+}
+
 /**
  * ice_ptp_init_owner - Initialize PTP_1588_CLOCK device
  * @pf: Board private structure
@@ -526,6 +824,7 @@ err_exit:
 void ice_ptp_init(struct ice_pf *pf)
 {
 	struct device *dev = ice_pf_to_dev(pf);
+	struct kthread_worker *kworker;
 	struct ice_hw *hw = &pf->hw;
 	int err;
 
@@ -540,9 +839,37 @@ void ice_ptp_init(struct ice_pf *pf)
 			return;
 	}
 
+	/* Disable timestamping for both Tx and Rx */
+	ice_ptp_cfg_timestamp(pf, false);
+
+	/* Initialize work functions */
+	kthread_init_delayed_work(&pf->ptp.work, ice_ptp_periodic_work);
+
+	/* Allocate a kworker for handling work required for the ports
+	 * connected to the PTP hardware clock.
+	 */
+	kworker = kthread_create_worker(0, "ice-ptp-%s", dev_name(dev));
+	if (IS_ERR(kworker)) {
+		err = PTR_ERR(kworker);
+		goto err_kworker;
+	}
+	pf->ptp.kworker = kworker;
+
 	set_bit(ICE_FLAG_PTP, pf->flags);
 
+	/* Start periodic work going */
+	kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work, 0);
+
 	dev_info(dev, "PTP init successful\n");
+	return;
+
+err_kworker:
+	/* If we registered a PTP clock, release it */
+	if (pf->ptp.clock) {
+		ptp_clock_unregister(pf->ptp.clock);
+		pf->ptp.clock = NULL;
+	}
+	dev_err(dev, "PTP failed %d\n", err);
 }
 
 /**
@@ -554,8 +881,18 @@ void ice_ptp_init(struct ice_pf *pf)
  */
 void ice_ptp_release(struct ice_pf *pf)
 {
+	/* Disable timestamping for both Tx and Rx */
+	ice_ptp_cfg_timestamp(pf, false);
+
 	clear_bit(ICE_FLAG_PTP, pf->flags);
 
+	kthread_cancel_delayed_work_sync(&pf->ptp.work);
+
+	if (pf->ptp.kworker) {
+		kthread_destroy_worker(pf->ptp.kworker);
+		pf->ptp.kworker = NULL;
+	}
+
 	if (!pf->ptp.clock)
 		return;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 01f7db05ef7d..48850391ab28 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -5,17 +5,26 @@
 #define _ICE_PTP_H_
 
 #include <linux/ptp_clock_kernel.h>
+#include <linux/kthread.h>
 
 #include "ice_ptp_hw.h"
 
 /**
  * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK
+ * @work: delayed work function for periodic tasks
+ * @cached_phc_time: a cached copy of the PHC time for timestamp extension
+ * @kworker: kwork thread for handling periodic work
  * @info: structure defining PTP hardware capabilities
  * @clock: pointer to registered PTP clock device
+ * @tstamp_config: hardware timestamping configuration
  */
 struct ice_ptp {
+	struct kthread_delayed_work work;
+	u64 cached_phc_time;
+	struct kthread_worker *kworker;
 	struct ptp_clock_info info;
 	struct ptp_clock *clock;
+	struct hwtstamp_config tstamp_config;
 };
 
 #define __ptp_info_to_ptp(i) \
@@ -24,17 +33,37 @@ struct ice_ptp {
 	container_of(__ptp_info_to_ptp((i)), struct ice_pf, ptp)
 
 #define PTP_SHARED_CLK_IDX_VALID	BIT(31)
+#define ICE_PTP_TS_VALID		BIT(0)
 
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 struct ice_pf;
+int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);
+int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr);
 int ice_get_ptp_clock_index(struct ice_pf *pf);
+void
+ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring,
+		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb);
 void ice_ptp_init(struct ice_pf *pf);
 void ice_ptp_release(struct ice_pf *pf);
 #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int ice_get_ptp_clock_index(struct ice_pf *pf)
 {
 	return -1;
 }
+
+static inline void
+ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring,
+		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { }
 static inline void ice_ptp_init(struct ice_pf *pf) { }
 static inline void ice_ptp_release(struct ice_pf *pf) { }
 #endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index c5a92ac787d6..1069f3a9b6cb 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -311,6 +311,8 @@ struct ice_ring {
 	u32 txq_teid;			/* Added Tx queue TEID */
 	u16 rx_buf_len;
 	u8 dcb_tc;			/* Traffic class of ring */
+	u64 cached_phctime;
+	u8 ptp_rx:1;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 207f6ee3a7f6..166cf25d1139 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -175,6 +175,9 @@ ice_process_skb_fields(struct ice_ring *rx_ring,
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 
 	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+
+	if (rx_ring->ptp_rx)
+		ice_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
 }
 
 /**

From ea9b847cda647b9849b0b9fa0447e876a1ac62e1 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 9 Jun 2021 09:39:53 -0700
Subject: [PATCH 1416/3292] ice: enable transmit timestamps for E810 devices

Add support for enabling Tx timestamp requests for outgoing packets on
E810 devices.

The ice hardware can support multiple outstanding Tx timestamp requests.
When sending a descriptor to hardware, a Tx timestamp request is made by
setting a request bit, and assigning an index that represents which Tx
timestamp index to store the timestamp in.

Hardware makes no effort to synchronize the index use, so it is up to
software to ensure that Tx timestamp indexes are not re-used before the
timestamp is reported back.

To do this, introduce a Tx timestamp tracker which will keep track of
currently in-use indexes.

In the hot path, if a packet has a timestamp request, an index will be
requested from the tracker. Unfortunately, this does require a lock as
the indexes are shared across all queues on a PHY. There are not enough
indexes to reliably assign only 1 to each queue.

For the E810 devices, the timestamp indexes are not shared across PHYs,
so each port can have its own tracking.

Once hardware captures a timestamp, an interrupt is fired. In this
interrupt, trigger a new work item that will figure out which timestamp
was completed, and report the timestamp back to the stack.

This function loops through the Tx timestamp indexes and checks whether
there is now a valid timestamp. If so, it clears the PHY timestamp
indication in the PHY memory, locks and removes the SKB and bit in the
tracker, then reports the timestamp to the stack.

It is possible in some cases that a timestamp request will be initiated
but never completed. This might occur if the packet is dropped by
software or hardware before it reaches the PHY.

Add a task to the periodic work function that will check whether
a timestamp request is more than a few seconds old. If so, the timestamp
index is cleared in the PHY, and the SKB is released.

Just as with Rx timestamps, the Tx timestamps are only 40 bits wide, and
use the same overall logic for extending to 64 bits of nanoseconds.

With this change, E810 devices should be able to perform basic PTP
functionality.

Future changes will extend the support to cover the E822-based devices.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c     |   9 +
 drivers/net/ethernet/intel/ice/ice_ethtool.c  |   6 +-
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |   1 +
 drivers/net/ethernet/intel/ice/ice_lib.c      |   1 +
 drivers/net/ethernet/intel/ice/ice_main.c     |   5 +
 drivers/net/ethernet/intel/ice/ice_ptp.c      | 369 +++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_ptp.h      |  91 +++++
 drivers/net/ethernet/intel/ice/ice_txrx.c     |  37 ++
 drivers/net/ethernet/intel/ice/ice_txrx.h     |   3 +
 9 files changed, 518 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 0b44baf0dcff..c36057efc7ae 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -287,6 +287,15 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
 	/* make sure the context is associated with the right VSI */
 	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
 
+	/* Restrict Tx timestamps to the PF VSI */
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		tlan_ctx->tsyn_ena = 1;
+		break;
+	default:
+		break;
+	}
+
 	tlan_ctx->tso_ena = ICE_TX_LEGACY;
 	tlan_ctx->tso_qnum = pf_q;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 38d784742bf3..d95a5daca114 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3204,14 +3204,16 @@ ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
 	if (!test_bit(ICE_FLAG_PTP, pf->flags))
 		return ethtool_op_get_ts_info(dev, info);
 
-	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
 				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
 				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	info->phc_index = ice_get_ptp_clock_index(pf);
 
-	info->tx_types = BIT(HWTSTAMP_TX_OFF);
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
 
 	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index f6f5ced50be2..6989a76c42a7 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -202,6 +202,7 @@
 #define PFINT_MBX_CTL_ITR_INDX_M		ICE_M(0x3, 11)
 #define PFINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
 #define PFINT_OICR				0x0016CA00
+#define PFINT_OICR_TSYN_TX_M			BIT(11)
 #define PFINT_OICR_ECC_ERR_M			BIT(16)
 #define PFINT_OICR_MAL_DETECT_M			BIT(19)
 #define PFINT_OICR_GRST_M			BIT(20)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 7bb10fa032e1..a46aba5e9c12 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1298,6 +1298,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
 		ring->reg_idx = vsi->txq_map[i];
 		ring->ring_active = false;
 		ring->vsi = vsi;
+		ring->tx_tstamps = &pf->ptp.port.tx;
 		ring->dev = dev;
 		ring->count = vsi->num_tx_desc;
 		WRITE_ONCE(vsi->tx_rings[i], ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 082e704472be..96276533822e 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2792,6 +2792,11 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		}
 	}
 
+	if (oicr & PFINT_OICR_TSYN_TX_M) {
+		ena_mask &= ~PFINT_OICR_TSYN_TX_M;
+		ice_ptp_process_ts(pf);
+	}
+
 #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
 	if (oicr & ICE_AUX_CRIT_ERR) {
 		struct iidc_event *event;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index b22b7a93f6ca..e14f81321768 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -4,6 +4,37 @@
 #include "ice.h"
 #include "ice_lib.h"
 
+/**
+ * ice_set_tx_tstamp - Enable or disable Tx timestamping
+ * @pf: The PF pointer to search in
+ * @on: bool value for whether timestamps are enabled or disabled
+ */
+static void ice_set_tx_tstamp(struct ice_pf *pf, bool on)
+{
+	struct ice_vsi *vsi;
+	u32 val;
+	u16 i;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Set the timestamp enable flag for all the Tx rings */
+	ice_for_each_rxq(vsi, i) {
+		if (!vsi->tx_rings[i])
+			continue;
+		vsi->tx_rings[i]->ptp_tx = on;
+	}
+
+	/* Configure the Tx timestamp interrupt */
+	val = rd32(&pf->hw, PFINT_OICR_ENA);
+	if (on)
+		val |= PFINT_OICR_TSYN_TX_M;
+	else
+		val &= ~PFINT_OICR_TSYN_TX_M;
+	wr32(&pf->hw, PFINT_OICR_ENA, val);
+}
+
 /**
  * ice_set_rx_tstamp - Enable or disable Rx timestamping
  * @pf: The PF pointer to search in
@@ -36,12 +67,16 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
  */
 static void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena)
 {
+	ice_set_tx_tstamp(pf, ena);
 	ice_set_rx_tstamp(pf, ena);
 
-	if (ena)
+	if (ena) {
 		pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL;
-	else
+		pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_ON;
+	} else {
 		pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+		pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+	}
 }
 
 /**
@@ -318,6 +353,40 @@ static u64 ice_ptp_extend_32b_ts(u64 cached_phc_time, u32 in_tstamp)
 	return ns;
 }
 
+/**
+ * ice_ptp_extend_40b_ts - Convert a 40b timestamp to 64b nanoseconds
+ * @pf: Board private structure
+ * @in_tstamp: Ingress/egress 40b timestamp value
+ *
+ * The Tx and Rx timestamps are 40 bits wide, including 32 bits of nominal
+ * nanoseconds, 7 bits of sub-nanoseconds, and a valid bit.
+ *
+ *  *--------------------------------------------------------------*
+ *  | 32 bits of nanoseconds | 7 high bits of sub ns underflow | v |
+ *  *--------------------------------------------------------------*
+ *
+ * The low bit is an indicator of whether the timestamp is valid. The next
+ * 7 bits are a capture of the upper 7 bits of the sub-nanosecond underflow,
+ * and the remaining 32 bits are the lower 32 bits of the PHC timer.
+ *
+ * It is assumed that the caller verifies the timestamp is valid prior to
+ * calling this function.
+ *
+ * Extract the 32bit nominal nanoseconds and extend them. Use the cached PHC
+ * time stored in the device private PTP structure as the basis for timestamp
+ * extension.
+ *
+ * See ice_ptp_extend_32b_ts for a detailed explanation of the extension
+ * algorithm.
+ */
+static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
+{
+	const u64 mask = GENMASK_ULL(31, 0);
+
+	return ice_ptp_extend_32b_ts(pf->ptp.cached_phc_time,
+				     (in_tstamp >> 8) & mask);
+}
+
 /**
  * ice_ptp_read_time - Read the time from the device
  * @pf: Board private structure
@@ -574,6 +643,10 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
 
 	switch (config->tx_type) {
 	case HWTSTAMP_TX_OFF:
+		ice_set_tx_tstamp(pf, false);
+		break;
+	case HWTSTAMP_TX_ON:
+		ice_set_tx_tstamp(pf, true);
 		break;
 	default:
 		return -ERANGE;
@@ -724,6 +797,291 @@ static long ice_ptp_create_clock(struct ice_pf *pf)
 	return 0;
 }
 
+/**
+ * ice_ptp_tx_tstamp_work - Process Tx timestamps for a port
+ * @work: pointer to the kthread_work struct
+ *
+ * Process timestamps captured by the PHY associated with this port. To do
+ * this, loop over each index with a waiting skb.
+ *
+ * If a given index has a valid timestamp, perform the following steps:
+ *
+ * 1) copy the timestamp out of the PHY register
+ * 4) clear the timestamp valid bit in the PHY register
+ * 5) unlock the index by clearing the associated in_use bit.
+ * 2) extend the 40b timestamp value to get a 64bit timestamp
+ * 3) send that timestamp to the stack
+ *
+ * After looping, if we still have waiting SKBs, then re-queue the work. This
+ * may cause us effectively poll even when not strictly necessary. We do this
+ * because it's possible a new timestamp was requested around the same time as
+ * the interrupt. In some cases hardware might not interrupt us again when the
+ * timestamp is captured.
+ *
+ * Note that we only take the tracking lock when clearing the bit and when
+ * checking if we need to re-queue this task. The only place where bits can be
+ * set is the hard xmit routine where an SKB has a request flag set. The only
+ * places where we clear bits are this work function, or the periodic cleanup
+ * thread. If the cleanup thread clears a bit we're processing we catch it
+ * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread
+ * starts a new timestamp, we might not begin processing it right away but we
+ * will notice it at the end when we re-queue the work item. If a Tx thread
+ * starts a new timestamp just after this function exits without re-queuing,
+ * the interrupt when the timestamp finishes should trigger. Avoiding holding
+ * the lock for the entire function is important in order to ensure that Tx
+ * threads do not get blocked while waiting for the lock.
+ */
+static void ice_ptp_tx_tstamp_work(struct kthread_work *work)
+{
+	struct ice_ptp_port *ptp_port;
+	struct ice_ptp_tx *tx;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u8 idx;
+
+	tx = container_of(work, struct ice_ptp_tx, work);
+	if (!tx->init)
+		return;
+
+	ptp_port = container_of(tx, struct ice_ptp_port, tx);
+	pf = ptp_port_to_pf(ptp_port);
+	hw = &pf->hw;
+
+	for_each_set_bit(idx, tx->in_use, tx->len) {
+		struct skb_shared_hwtstamps shhwtstamps = {};
+		u8 phy_idx = idx + tx->quad_offset;
+		u64 raw_tstamp, tstamp;
+		struct sk_buff *skb;
+		int err;
+
+		err = ice_read_phy_tstamp(hw, tx->quad, phy_idx,
+					  &raw_tstamp);
+		if (err)
+			continue;
+
+		/* Check if the timestamp is valid */
+		if (!(raw_tstamp & ICE_PTP_TS_VALID))
+			continue;
+
+		/* clear the timestamp register, so that it won't show valid
+		 * again when re-used.
+		 */
+		ice_clear_phy_tstamp(hw, tx->quad, phy_idx);
+
+		/* The timestamp is valid, so we'll go ahead and clear this
+		 * index and then send the timestamp up to the stack.
+		 */
+		spin_lock(&tx->lock);
+		clear_bit(idx, tx->in_use);
+		skb = tx->tstamps[idx].skb;
+		tx->tstamps[idx].skb = NULL;
+		spin_unlock(&tx->lock);
+
+		/* it's (unlikely but) possible we raced with the cleanup
+		 * thread for discarding old timestamp requests.
+		 */
+		if (!skb)
+			continue;
+
+		/* Extend the timestamp using cached PHC time */
+		tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp);
+		shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+
+		skb_tstamp_tx(skb, &shhwtstamps);
+		dev_kfree_skb_any(skb);
+	}
+
+	/* Check if we still have work to do. If so, re-queue this task to
+	 * poll for remaining timestamps.
+	 */
+	spin_lock(&tx->lock);
+	if (!bitmap_empty(tx->in_use, tx->len))
+		kthread_queue_work(pf->ptp.kworker, &tx->work);
+	spin_unlock(&tx->lock);
+}
+
+/**
+ * ice_ptp_request_ts - Request an available Tx timestamp index
+ * @tx: the PTP Tx timestamp tracker to request from
+ * @skb: the SKB to associate with this timestamp request
+ */
+s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
+{
+	u8 idx;
+
+	/* Check if this tracker is initialized */
+	if (!tx->init)
+		return -1;
+
+	spin_lock(&tx->lock);
+	/* Find and set the first available index */
+	idx = find_first_zero_bit(tx->in_use, tx->len);
+	if (idx < tx->len) {
+		/* We got a valid index that no other thread could have set. Store
+		 * a reference to the skb and the start time to allow discarding old
+		 * requests.
+		 */
+		set_bit(idx, tx->in_use);
+		tx->tstamps[idx].start = jiffies;
+		tx->tstamps[idx].skb = skb_get(skb);
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+	}
+
+	spin_unlock(&tx->lock);
+
+	/* return the appropriate PHY timestamp register index, -1 if no
+	 * indexes were available.
+	 */
+	if (idx >= tx->len)
+		return -1;
+	else
+		return idx + tx->quad_offset;
+}
+
+/**
+ * ice_ptp_process_ts - Spawn kthread work to handle timestamps
+ * @pf: Board private structure
+ *
+ * Queue work required to process the PTP Tx timestamps outside of interrupt
+ * context.
+ */
+void ice_ptp_process_ts(struct ice_pf *pf)
+{
+	if (pf->ptp.port.tx.init)
+		kthread_queue_work(pf->ptp.kworker, &pf->ptp.port.tx.work);
+}
+
+/**
+ * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps
+ * @tx: Tx tracking structure to initialize
+ *
+ * Assumes that the length has already been initialized. Do not call directly,
+ * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead.
+ */
+static int
+ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx)
+{
+	tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL);
+	if (!tx->tstamps)
+		return -ENOMEM;
+
+	tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL);
+	if (!tx->in_use) {
+		kfree(tx->tstamps);
+		tx->tstamps = NULL;
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&tx->lock);
+	kthread_init_work(&tx->work, ice_ptp_tx_tstamp_work);
+
+	tx->init = 1;
+
+	return 0;
+}
+
+/**
+ * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker
+ * @pf: Board private structure
+ * @tx: the tracker to flush
+ */
+static void
+ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+	u8 idx;
+
+	for (idx = 0; idx < tx->len; idx++) {
+		u8 phy_idx = idx + tx->quad_offset;
+
+		/* Clear any potential residual timestamp in the PHY block */
+		if (!pf->hw.reset_ongoing)
+			ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx);
+
+		if (tx->tstamps[idx].skb) {
+			dev_kfree_skb_any(tx->tstamps[idx].skb);
+			tx->tstamps[idx].skb = NULL;
+		}
+	}
+}
+
+/**
+ * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker
+ * @pf: Board private structure
+ * @tx: Tx tracking structure to release
+ *
+ * Free memory associated with the Tx timestamp tracker.
+ */
+static void
+ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+	tx->init = 0;
+
+	kthread_cancel_work_sync(&tx->work);
+
+	ice_ptp_flush_tx_tracker(pf, tx);
+
+	kfree(tx->tstamps);
+	tx->tstamps = NULL;
+
+	kfree(tx->in_use);
+	tx->in_use = NULL;
+
+	tx->len = 0;
+}
+
+/**
+ * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps
+ * @pf: Board private structure
+ * @tx: the Tx tracking structure to initialize
+ *
+ * Initialize the Tx timestamp tracker for this PF. For E810 devices, each
+ * port has its own block of timestamps, independent of the other ports.
+ */
+static int
+ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+	tx->quad = pf->hw.port_info->lport;
+	tx->quad_offset = 0;
+	tx->len = INDEX_PER_QUAD;
+
+	return ice_ptp_alloc_tx_tracker(tx);
+}
+
+/**
+ * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped
+ * @tx: PTP Tx tracker to clean up
+ *
+ * Loop through the Tx timestamp requests and see if any of them have been
+ * waiting for a long time. Discard any SKBs that have been waiting for more
+ * than 2 seconds. This is long enough to be reasonably sure that the
+ * timestamp will never be captured. This might happen if the packet gets
+ * discarded before it reaches the PHY timestamping block.
+ */
+static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx)
+{
+	u8 idx;
+
+	if (!tx->init)
+		return;
+
+	for_each_set_bit(idx, tx->in_use, tx->len) {
+		struct sk_buff *skb;
+
+		/* Check if this SKB has been waiting for too long */
+		if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ))
+			continue;
+
+		spin_lock(&tx->lock);
+		skb = tx->tstamps[idx].skb;
+		tx->tstamps[idx].skb = NULL;
+		clear_bit(idx, tx->in_use);
+		spin_unlock(&tx->lock);
+
+		/* Free the SKB after we've cleared the bit */
+		dev_kfree_skb_any(skb);
+	}
+}
+
 static void ice_ptp_periodic_work(struct kthread_work *work)
 {
 	struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
@@ -734,6 +1092,8 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
 
 	ice_ptp_update_cached_phctime(pf);
 
+	ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx);
+
 	/* Run twice a second */
 	kthread_queue_delayed_work(ptp->kworker, &ptp->work,
 				   msecs_to_jiffies(500));
@@ -842,6 +1202,9 @@ void ice_ptp_init(struct ice_pf *pf)
 	/* Disable timestamping for both Tx and Rx */
 	ice_ptp_cfg_timestamp(pf, false);
 
+	/* Initialize the PTP port Tx timestamp tracker */
+	ice_ptp_init_tx_e810(pf, &pf->ptp.port.tx);
+
 	/* Initialize work functions */
 	kthread_init_delayed_work(&pf->ptp.work, ice_ptp_periodic_work);
 
@@ -884,6 +1247,8 @@ void ice_ptp_release(struct ice_pf *pf)
 	/* Disable timestamping for both Tx and Rx */
 	ice_ptp_cfg_timestamp(pf, false);
 
+	ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+
 	clear_bit(ICE_FLAG_PTP, pf->flags);
 
 	kthread_cancel_delayed_work_sync(&pf->ptp.work);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 48850391ab28..41e14f98f0e6 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -9,8 +9,82 @@
 
 #include "ice_ptp_hw.h"
 
+/* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp
+ * is stored in a buffer of registers. Depending on the specific hardware,
+ * this buffer might be shared across multiple PHY ports.
+ *
+ * On transmit of a packet to be timestamped, software is responsible for
+ * selecting an open index. Hardware makes no attempt to lock or prevent
+ * re-use of an index for multiple packets.
+ *
+ * To handle this, timestamp indexes must be tracked by software to ensure
+ * that an index is not re-used for multiple transmitted packets. The
+ * structures and functions declared in this file track the available Tx
+ * register indexes, as well as provide storage for the SKB pointers.
+ *
+ * To allow multiple ports to access the shared register block independently,
+ * the blocks are split up so that indexes are assigned to each port based on
+ * hardware logical port number.
+ */
+
+/**
+ * struct ice_tx_tstamp - Tracking for a single Tx timestamp
+ * @skb: pointer to the SKB for this timestamp request
+ * @start: jiffies when the timestamp was first requested
+ *
+ * This structure tracks a single timestamp request. The SKB pointer is
+ * provided when initiating a request. The start time is used to ensure that
+ * we discard old requests that were not fulfilled within a 2 second time
+ * window.
+ */
+struct ice_tx_tstamp {
+	struct sk_buff *skb;
+	unsigned long start;
+};
+
+/**
+ * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port
+ * @work: work function to handle processing of Tx timestamps
+ * @lock: lock to prevent concurrent write to in_use bitmap
+ * @tstamps: array of len to store outstanding requests
+ * @in_use: bitmap of len to indicate which slots are in use
+ * @quad: which quad the timestamps are captured in
+ * @quad_offset: offset into timestamp block of the quad to get the real index
+ * @len: length of the tstamps and in_use fields.
+ * @init: if true, the tracker is initialized;
+ */
+struct ice_ptp_tx {
+	struct kthread_work work;
+	spinlock_t lock; /* lock protecting in_use bitmap */
+	struct ice_tx_tstamp *tstamps;
+	unsigned long *in_use;
+	u8 quad;
+	u8 quad_offset;
+	u8 len;
+	u8 init;
+};
+
+/* Quad and port information for initializing timestamp blocks */
+#define INDEX_PER_QUAD			64
+#define INDEX_PER_PORT			(INDEX_PER_QUAD / ICE_PORTS_PER_QUAD)
+
+/**
+ * struct ice_ptp_port - data used to initialize an external port for PTP
+ *
+ * This structure contains PTP data related to the external ports. Currently
+ * it is used for tracking the Tx timestamps of a port. In the future this
+ * structure will also hold information for the E822 port initialization
+ * logic.
+ *
+ * @tx: Tx timestamp tracking for this port
+ */
+struct ice_ptp_port {
+	struct ice_ptp_tx tx;
+};
+
 /**
  * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK
+ * @port: data for the PHY port initialization procedure
  * @work: delayed work function for periodic tasks
  * @cached_phc_time: a cached copy of the PHC time for timestamp extension
  * @kworker: kwork thread for handling periodic work
@@ -19,6 +93,7 @@
  * @tstamp_config: hardware timestamping configuration
  */
 struct ice_ptp {
+	struct ice_ptp_port port;
 	struct kthread_delayed_work work;
 	u64 cached_phc_time;
 	struct kthread_worker *kworker;
@@ -27,6 +102,11 @@ struct ice_ptp {
 	struct hwtstamp_config tstamp_config;
 };
 
+#define __ptp_port_to_ptp(p) \
+	container_of((p), struct ice_ptp, port)
+#define ptp_port_to_pf(p) \
+	container_of(__ptp_port_to_ptp((p)), struct ice_pf, ptp)
+
 #define __ptp_info_to_ptp(i) \
 	container_of((i), struct ice_ptp, info)
 #define ptp_info_to_pf(i) \
@@ -40,6 +120,10 @@ struct ice_pf;
 int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);
 int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr);
 int ice_get_ptp_clock_index(struct ice_pf *pf);
+
+s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
+void ice_ptp_process_ts(struct ice_pf *pf);
+
 void
 ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring,
 		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb);
@@ -61,6 +145,13 @@ static inline int ice_get_ptp_clock_index(struct ice_pf *pf)
 	return -1;
 }
 
+static inline
+ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
+{
+	return -1;
+}
+
+static inline void ice_ptp_process_ts(struct ice_pf *pf) { }
 static inline void
 ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring,
 		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 04748aa4c7c8..917eba7fdd0c 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -2136,6 +2136,41 @@ static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
 	return count != ICE_MAX_BUF_TXD;
 }
 
+/**
+ * ice_tstamp - set up context descriptor for hardware timestamp
+ * @tx_ring: pointer to the Tx ring to send buffer on
+ * @skb: pointer to the SKB we're sending
+ * @first: Tx buffer
+ * @off: Tx offload parameters
+ */
+static void
+ice_tstamp(struct ice_ring *tx_ring, struct sk_buff *skb,
+	   struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	s8 idx;
+
+	/* only timestamp the outbound packet if the user has requested it */
+	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
+		return;
+
+	if (!tx_ring->ptp_tx)
+		return;
+
+	/* Tx timestamps cannot be sampled when doing TSO */
+	if (first->tx_flags & ICE_TX_FLAGS_TSO)
+		return;
+
+	/* Grab an open timestamp slot */
+	idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb);
+	if (idx < 0)
+		return;
+
+	off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+			     (ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) |
+			     ((u64)idx << ICE_TXD_CTX_QW1_TSO_LEN_S));
+	first->tx_flags |= ICE_TX_FLAGS_TSYN;
+}
+
 /**
  * ice_xmit_frame_ring - Sends buffer on Tx ring
  * @skb: send buffer
@@ -2205,6 +2240,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 					ICE_TX_CTX_DESC_SWTCH_UPLINK <<
 					ICE_TXD_CTX_QW1_CMD_S);
 
+	ice_tstamp(tx_ring, skb, first, &offload);
+
 	if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
 		struct ice_tx_ctx_desc *cdesc;
 		u16 i = tx_ring->next_to_use;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 1069f3a9b6cb..1e46e80f3d6f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -118,6 +118,7 @@ static inline int ice_skb_pad(void)
  * freed instead of returned like skb packets.
  */
 #define ICE_TX_FLAGS_DUMMY_PKT	BIT(3)
+#define ICE_TX_FLAGS_TSYN	BIT(4)
 #define ICE_TX_FLAGS_IPV4	BIT(5)
 #define ICE_TX_FLAGS_IPV6	BIT(6)
 #define ICE_TX_FLAGS_TUNNEL	BIT(7)
@@ -311,8 +312,10 @@ struct ice_ring {
 	u32 txq_teid;			/* Added Tx queue TEID */
 	u16 rx_buf_len;
 	u8 dcb_tc;			/* Traffic class of ring */
+	struct ice_ptp_tx *tx_tstamps;
 	u64 cached_phctime;
 	u8 ptp_rx:1;
+	u8 ptp_tx:1;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)

From f53cbdab011b200c67c7e5f476046828014501eb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:15:10 +0200
Subject: [PATCH 1417/3292] cpuidle: teo: Cosmetic modifications of
 teo_update()

Rename a local variable in teo_update() so that its purpose is better
reflected by its name and use one more local variable in the loop
over the CPU idle states in that function to make the code somewhat
easier to read.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index ac4bb27d69b0..e97ae84fa5a8 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -117,7 +117,7 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	int i, idx_hit = 0, idx_timer = 0;
+	int i, idx_timer = 0, idx_duration = 0;
 	unsigned int hits, misses;
 	u64 measured_ns;
 
@@ -156,14 +156,15 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * states matching the sleep length and the measured idle duration.
 	 */
 	for (i = 0; i < drv->state_count; i++) {
+		s64 target_residency_ns = drv->states[i].target_residency_ns;
 		unsigned int early_hits = cpu_data->states[i].early_hits;
 
 		cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
 
-		if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
+		if (target_residency_ns <= cpu_data->sleep_length_ns) {
 			idx_timer = i;
-			if (drv->states[i].target_residency_ns <= measured_ns)
-				idx_hit = i;
+			if (target_residency_ns <= measured_ns)
+				idx_duration = i;
 		}
 	}
 
@@ -181,11 +182,11 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	misses = cpu_data->states[idx_timer].misses;
 	misses -= misses >> DECAY_SHIFT;
 
-	if (idx_timer == idx_hit) {
+	if (idx_timer == idx_duration) {
 		hits += PULSE;
 	} else {
 		misses += PULSE;
-		cpu_data->states[idx_hit].early_hits += PULSE;
+		cpu_data->states[idx_duration].early_hits += PULSE;
 	}
 
 	cpu_data->states[idx_timer].misses = misses;

From b18e0de1cf85eed6e9ced086d6323e867d4b57aa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:15:52 +0200
Subject: [PATCH 1418/3292] cpuidle: teo: Cosmetic modification of teo_select()

Initialize local variables in teo_select() where they are declared.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index e97ae84fa5a8..173ab30b9a06 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -241,10 +241,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
-	int max_early_idx, prev_max_early_idx, constraint_idx, idx0, idx, i;
-	unsigned int hits, misses, early_hits;
+	int constraint_idx = drv->state_count;
+	unsigned int hits = 0, misses = 0;
+	unsigned int early_hits = 0;
+	int prev_max_early_idx = -1;
+	int max_early_idx = -1;
+	int idx0 = -1, idx = -1;
 	ktime_t delta_tick;
 	s64 duration_ns;
+	int i;
 
 	if (dev->last_state_idx >= 0) {
 		teo_update(drv, dev);
@@ -256,15 +261,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
 	cpu_data->sleep_length_ns = duration_ns;
 
-	hits = 0;
-	misses = 0;
-	early_hits = 0;
-	max_early_idx = -1;
-	prev_max_early_idx = -1;
-	constraint_idx = drv->state_count;
-	idx = -1;
-	idx0 = idx;
-
 	for (i = 0; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
 

From c410a9a142f152006c21a858d734a9f868bc90a6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:16:32 +0200
Subject: [PATCH 1419/3292] cpuidle: teo: Change the main idle state selection
 logic

Two aspects of the current main idle state selection logic in the
TEO (Timer Events Oriented) cpuidle governor are quite questionable.

First of all, the "hits" and "misses" metrics used by it are only
updated for a given idle state if the time till the next timer event
("sleep length") is between the target residency of that state and
the target residency of the next one.  Consequently, they are likely
to become stale if the sleep length tends to fall outside that
interval which increases the likelihood of subomtimal idle state
selection.

Second, the decision on whether or not to select the idle state
"matching" the sleep length is based on the metrics collected for
that state alone, whereas in principle the metrics collected for
the other idle states should be taken into consideration when that
decision is made.  For example, if the measured idle duration is less
than the target residency of the idle state "matching" the sleep
length, then it is also less than the target residency of any deeper
idle state and that should be taken into account when considering
whether or not to select any of those states, but currently it is
not.

In order to address the above shortcomings, modify the main idle
state selection logic in the TEO governor to take the metrics
collected for all of the idle states into account when deciding
whether or not to select the one "matching" the sleep length.

Moreover, drop the "misses" metric that becomes redundant after the
above change and rename the "early_hits" metric to "intercepts" so
that its role is better reflected by its name (the idea being that
if a CPU wakes up earlier than indicated by the sleep length, then
it must be a result of a non-timer interrupt that "intercepts" the
CPU).

Also rename the states[] array in struct struct teo_cpu to
state_bins[] to avoid confusing it with the states[] array in
struct cpuidle_driver and update the documentation to match the
new code (and make it more comprehensive while at it).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 366 +++++++++++++++++---------------
 1 file changed, 199 insertions(+), 167 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 173ab30b9a06..5bcd45f1d610 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -2,47 +2,90 @@
 /*
  * Timer events oriented CPU idle governor
  *
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  *
  * The idea of this governor is based on the observation that on many systems
  * timer events are two or more orders of magnitude more frequent than any
- * other interrupts, so they are likely to be the most significant source of CPU
+ * other interrupts, so they are likely to be the most significant cause of CPU
  * wakeups from idle states.  Moreover, information about what happened in the
  * (relatively recent) past can be used to estimate whether or not the deepest
- * idle state with target residency within the time to the closest timer is
- * likely to be suitable for the upcoming idle time of the CPU and, if not, then
- * which of the shallower idle states to choose.
+ * idle state with target residency within the (known) time till the closest
+ * timer event, referred to as the sleep length, is likely to be suitable for
+ * the upcoming CPU idle period and, if not, then which of the shallower idle
+ * states to choose instead of it.
  *
- * Of course, non-timer wakeup sources are more important in some use cases and
- * they can be covered by taking a few most recent idle time intervals of the
- * CPU into account.  However, even in that case it is not necessary to consider
- * idle duration values greater than the time till the closest timer, as the
- * patterns that they may belong to produce average values close enough to
- * the time till the closest timer (sleep length) anyway.
+ * Of course, non-timer wakeup sources are more important in some use cases
+ * which can be covered by taking a few most recent idle time intervals of the
+ * CPU into account.  However, even in that context it is not necessary to
+ * consider idle duration values greater than the sleep length, because the
+ * closest timer will ultimately wake up the CPU anyway unless it is woken up
+ * earlier.
  *
- * Thus this governor estimates whether or not the upcoming idle time of the CPU
- * is likely to be significantly shorter than the sleep length and selects an
- * idle state for it in accordance with that, as follows:
+ * Thus this governor estimates whether or not the prospective idle duration of
+ * a CPU is likely to be significantly shorter than the sleep length and selects
+ * an idle state for it accordingly.
  *
- * - Find an idle state on the basis of the sleep length and state statistics
- *   collected over time:
+ * The computations carried out by this governor are based on using bins whose
+ * boundaries are aligned with the target residency parameter values of the CPU
+ * idle states provided by the cpuidle driver in the ascending order.  That is,
+ * the first bin spans from 0 up to, but not including, the target residency of
+ * the second idle state (idle state 1), the second bin spans from the target
+ * residency of idle state 1 up to, but not including, the target residency of
+ * idle state 2, the third bin spans from the target residency of idle state 2
+ * up to, but not including, the target residency of idle state 3 and so on.
+ * The last bin spans from the target residency of the deepest idle state
+ * supplied by the driver to infinity.
  *
- *   o Find the deepest idle state whose target residency is less than or equal
- *     to the sleep length.
+ * Two metrics called "hits" and "intercepts" are associated with each bin.
+ * They are updated every time before selecting an idle state for the given CPU
+ * in accordance with what happened last time.
  *
- *   o Select it if it matched both the sleep length and the observed idle
- *     duration in the past more often than it matched the sleep length alone
- *     (i.e. the observed idle duration was significantly shorter than the sleep
- *     length matched by it).
+ * The "hits" metric reflects the relative frequency of situations in which the
+ * sleep length and the idle duration measured after CPU wakeup fall into the
+ * same bin (that is, the CPU appears to wake up "on time" relative to the sleep
+ * length).  In turn, the "intercepts" metric reflects the relative frequency of
+ * situations in which the measured idle duration is so much shorter than the
+ * sleep length that the bin it falls into corresponds to an idle state
+ * shallower than the one whose bin is fallen into by the sleep length.
  *
- *   o Otherwise, select the shallower state with the greatest matched "early"
- *     wakeups metric.
+ * In order to select an idle state for a CPU, the governor takes the following
+ * steps (modulo the possible latency constraint that must be taken into account
+ * too):
  *
- * - If the majority of the most recent idle duration values are below the
- *   target residency of the idle state selected so far, use those values to
- *   compute the new expected idle duration and find an idle state matching it
- *   (which has to be shallower than the one selected so far).
+ * 1. Find the deepest CPU idle state whose target residency does not exceed
+ *    the current sleep length (the candidate idle state) and compute two sums
+ *    as follows:
+ *
+ *    - The sum of the "hits" and "intercepts" metrics for the candidate state
+ *      and all of the deeper idle states (it represents the cases in which the
+ *      CPU was idle long enough to avoid being intercepted if the sleep length
+ *      had been equal to the current one).
+ *
+ *    - The sum of the "intercepts" metrics for all of the idle states shallower
+ *      than the candidate one (it represents the cases in which the CPU was not
+ *      idle long enough to avoid being intercepted if the sleep length had been
+ *      equal to the current one).
+ *
+ * 2. If the second sum is greater than the first one, look for an alternative
+ *    idle state to select.
+ *
+ *    - Traverse the idle states shallower than the candidate one in the
+ *      descending order.
+ *
+ *    - For each of them compute the sum of the "intercepts" metrics over all of
+ *      the idle  states between it and the candidate one (including the former
+ *      and excluding the latter).
+ *
+ *    - If that sum is greater than a half of the second sum computed in step 1
+ *      (which means that the target residency of the state in question had not
+ *      exceeded the idle duration in over a half of the relevant cases), select
+ *      the given idle state instead of the candidate one.
+ *
+ * 3. If the majority of the most recent idle duration values are below the
+ *    current anticipated idle duration, use those values to compute the new
+ *    expected idle duration and find an idle state matching it (which has to
+ *    be shallower than the current candidate one).
  */
 
 #include <linux/cpuidle.h>
@@ -65,44 +108,29 @@
 #define INTERVALS	8
 
 /**
- * struct teo_idle_state - Idle state data used by the TEO cpuidle governor.
- * @early_hits: "Early" CPU wakeups "matching" this state.
- * @hits: "On time" CPU wakeups "matching" this state.
- * @misses: CPU wakeups "missing" this state.
- *
- * A CPU wakeup is "matched" by a given idle state if the idle duration measured
- * after the wakeup is between the target residency of that state and the target
- * residency of the next one (or if this is the deepest available idle state, it
- * "matches" a CPU wakeup when the measured idle duration is at least equal to
- * its target residency).
- *
- * Also, from the TEO governor perspective, a CPU wakeup from idle is "early" if
- * it occurs significantly earlier than the closest expected timer event (that
- * is, early enough to match an idle state shallower than the one matching the
- * time till the closest timer event).  Otherwise, the wakeup is "on time", or
- * it is a "hit".
- *
- * A "miss" occurs when the given state doesn't match the wakeup, but it matches
- * the time till the closest timer event used for idle state selection.
+ * struct teo_bin - Metrics used by the TEO cpuidle governor.
+ * @intercepts: The "intercepts" metric.
+ * @hits: The "hits" metric.
  */
-struct teo_idle_state {
-	unsigned int early_hits;
+struct teo_bin {
+	unsigned int intercepts;
 	unsigned int hits;
-	unsigned int misses;
 };
 
 /**
  * struct teo_cpu - CPU data used by the TEO cpuidle governor.
  * @time_span_ns: Time between idle state selection and post-wakeup update.
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
- * @states: Idle states data corresponding to this CPU.
+ * @state_bins: Idle state data bins for this CPU.
+ * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
  * @interval_idx: Index of the most recent saved idle interval.
  * @intervals: Saved idle duration values.
  */
 struct teo_cpu {
 	s64 time_span_ns;
 	s64 sleep_length_ns;
-	struct teo_idle_state states[CPUIDLE_STATE_MAX];
+	struct teo_bin state_bins[CPUIDLE_STATE_MAX];
+	unsigned int total;
 	int interval_idx;
 	u64 intervals[INTERVALS];
 };
@@ -110,7 +138,7 @@ struct teo_cpu {
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 
 /**
- * teo_update - Update CPU data after wakeup.
+ * teo_update - Update CPU metrics after wakeup.
  * @drv: cpuidle driver containing state data.
  * @dev: Target CPU.
  */
@@ -118,7 +146,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	int i, idx_timer = 0, idx_duration = 0;
-	unsigned int hits, misses;
 	u64 measured_ns;
 
 	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -151,15 +178,21 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 			measured_ns /= 2;
 	}
 
+	cpu_data->total = 0;
+
 	/*
-	 * Decay the "early hits" metric for all of the states and find the
-	 * states matching the sleep length and the measured idle duration.
+	 * Decay the "hits" and "intercepts" metrics for all of the bins and
+	 * find the bins that the sleep length and the measured idle duration
+	 * fall into.
 	 */
 	for (i = 0; i < drv->state_count; i++) {
 		s64 target_residency_ns = drv->states[i].target_residency_ns;
-		unsigned int early_hits = cpu_data->states[i].early_hits;
+		struct teo_bin *bin = &cpu_data->state_bins[i];
 
-		cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
+		bin->hits -= bin->hits >> DECAY_SHIFT;
+		bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
+
+		cpu_data->total += bin->hits + bin->intercepts;
 
 		if (target_residency_ns <= cpu_data->sleep_length_ns) {
 			idx_timer = i;
@@ -169,28 +202,17 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	}
 
 	/*
-	 * Update the "hits" and "misses" data for the state matching the sleep
-	 * length.  If it matches the measured idle duration too, this is a hit,
-	 * so increase the "hits" metric for it then.  Otherwise, this is a
-	 * miss, so increase the "misses" metric for it.  In the latter case
-	 * also increase the "early hits" metric for the state that actually
-	 * matches the measured idle duration.
+	 * If the measured idle duration falls into the same bin as the sleep
+	 * length, this is a "hit", so update the "hits" metric for that bin.
+	 * Otherwise, update the "intercepts" metric for the bin fallen into by
+	 * the measured idle duration.
 	 */
-	hits = cpu_data->states[idx_timer].hits;
-	hits -= hits >> DECAY_SHIFT;
+	if (idx_timer == idx_duration)
+		cpu_data->state_bins[idx_timer].hits += PULSE;
+	else
+		cpu_data->state_bins[idx_duration].intercepts += PULSE;
 
-	misses = cpu_data->states[idx_timer].misses;
-	misses -= misses >> DECAY_SHIFT;
-
-	if (idx_timer == idx_duration) {
-		hits += PULSE;
-	} else {
-		misses += PULSE;
-		cpu_data->states[idx_duration].early_hits += PULSE;
-	}
-
-	cpu_data->states[idx_timer].misses = misses;
-	cpu_data->states[idx_timer].hits = hits;
+	cpu_data->total += PULSE;
 
 	/*
 	 * Save idle duration values corresponding to non-timer wakeups for
@@ -206,6 +228,12 @@ static bool teo_time_ok(u64 interval_ns)
 	return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
 }
 
+static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
+{
+	return (drv->states[idx].target_residency_ns +
+		drv->states[idx+1].target_residency_ns) / 2;
+}
+
 /**
  * teo_find_shallower_state - Find shallower idle state matching given duration.
  * @drv: cpuidle driver containing state data.
@@ -241,12 +269,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
-	int constraint_idx = drv->state_count;
-	unsigned int hits = 0, misses = 0;
-	unsigned int early_hits = 0;
-	int prev_max_early_idx = -1;
-	int max_early_idx = -1;
-	int idx0 = -1, idx = -1;
+	unsigned int idx_intercept_sum = 0;
+	unsigned int intercept_sum = 0;
+	unsigned int idx_hit_sum = 0;
+	unsigned int hit_sum = 0;
+	int constraint_idx = 0;
+	int idx0 = 0, idx = -1;
 	ktime_t delta_tick;
 	s64 duration_ns;
 	int i;
@@ -261,119 +289,122 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
 	cpu_data->sleep_length_ns = duration_ns;
 
-	for (i = 0; i < drv->state_count; i++) {
+	/* Check if there is any choice in the first place. */
+	if (drv->state_count < 2) {
+		idx = 0;;
+		goto end;
+	}
+	if (!dev->states_usage[0].disable) {
+		idx = 0;
+		if (drv->states[1].target_residency_ns > duration_ns)
+			goto end;
+	}
+
+	/*
+	 * Find the deepest idle state whose target residency does not exceed
+	 * the current sleep length and the deepest idle state not deeper than
+	 * the former whose exit latency does not exceed the current latency
+	 * constraint.  Compute the sums of metrics for early wakeup pattern
+	 * detection.
+	 */
+	for (i = 1; i < drv->state_count; i++) {
+		struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
 		struct cpuidle_state *s = &drv->states[i];
 
-		if (dev->states_usage[i].disable) {
-			/*
-			 * Ignore disabled states with target residencies beyond
-			 * the anticipated idle duration.
-			 */
-			if (s->target_residency_ns > duration_ns)
-				continue;
-
-			/*
-			 * This state is disabled, so the range of idle duration
-			 * values corresponding to it is covered by the current
-			 * candidate state, but still the "hits" and "misses"
-			 * metrics of the disabled state need to be used to
-			 * decide whether or not the state covering the range in
-			 * question is good enough.
-			 */
-			hits = cpu_data->states[i].hits;
-			misses = cpu_data->states[i].misses;
-
-			if (early_hits >= cpu_data->states[i].early_hits ||
-			    idx < 0)
-				continue;
-
-			/*
-			 * If the current candidate state has been the one with
-			 * the maximum "early hits" metric so far, the "early
-			 * hits" metric of the disabled state replaces the
-			 * current "early hits" count to avoid selecting a
-			 * deeper state with lower "early hits" metric.
-			 */
-			if (max_early_idx == idx) {
-				early_hits = cpu_data->states[i].early_hits;
-				continue;
-			}
-
-			/*
-			 * The current candidate state is closer to the disabled
-			 * one than the current maximum "early hits" state, so
-			 * replace the latter with it, but in case the maximum
-			 * "early hits" state index has not been set so far,
-			 * check if the current candidate state is not too
-			 * shallow for that role.
-			 */
-			if (teo_time_ok(drv->states[idx].target_residency_ns)) {
-				prev_max_early_idx = max_early_idx;
-				early_hits = cpu_data->states[i].early_hits;
-				max_early_idx = idx;
-			}
+		/*
+		 * Update the sums of idle state mertics for all of the states
+		 * shallower than the current one.
+		 */
+		intercept_sum += prev_bin->intercepts;
+		hit_sum += prev_bin->hits;
 
+		if (dev->states_usage[i].disable)
 			continue;
-		}
 
 		if (idx < 0) {
 			idx = i; /* first enabled state */
-			hits = cpu_data->states[i].hits;
-			misses = cpu_data->states[i].misses;
 			idx0 = i;
 		}
 
 		if (s->target_residency_ns > duration_ns)
 			break;
 
-		if (s->exit_latency_ns > latency_req && constraint_idx > i)
+		idx = i;
+
+		if (s->exit_latency_ns <= latency_req)
 			constraint_idx = i;
 
-		idx = i;
-		hits = cpu_data->states[i].hits;
-		misses = cpu_data->states[i].misses;
+		idx_intercept_sum = intercept_sum;
+		idx_hit_sum = hit_sum;
+	}
 
-		if (early_hits < cpu_data->states[i].early_hits &&
-		    teo_time_ok(drv->states[i].target_residency_ns)) {
-			prev_max_early_idx = max_early_idx;
-			early_hits = cpu_data->states[i].early_hits;
-			max_early_idx = i;
-		}
+	/* Avoid unnecessary overhead. */
+	if (idx < 0) {
+		idx = 0; /* No states enabled, must use 0. */
+		goto end;
+	} else if (idx == idx0) {
+		goto end;
 	}
 
 	/*
-	 * If the "hits" metric of the idle state matching the sleep length is
-	 * greater than its "misses" metric, that is the one to use.  Otherwise,
-	 * it is more likely that one of the shallower states will match the
-	 * idle duration observed after wakeup, so take the one with the maximum
-	 * "early hits" metric, but if that cannot be determined, just use the
-	 * state selected so far.
+	 * If the sum of the intercepts metric for all of the idle states
+	 * shallower than the current candidate one (idx) is greater than the
+	 * sum of the intercepts and hits metrics for the candidate state and
+	 * all of the deeper states, the CPU is likely to wake up early, so find
+	 * an alternative idle state to select.
 	 */
-	if (hits <= misses) {
+	if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
+		s64 last_enabled_span_ns = duration_ns;
+		int last_enabled_idx = idx;
+
 		/*
-		 * The current candidate state is not suitable, so take the one
-		 * whose "early hits" metric is the maximum for the range of
-		 * shallower states.
+		 * Look for the deepest idle state whose target residency had
+		 * not exceeded the idle duration in over a half of the relevant
+		 * cases in the past.
+		 *
+		 * Take the possible latency constraint and duration limitation
+		 * present if the tick has been stopped already into account.
 		 */
-		if (idx == max_early_idx)
-			max_early_idx = prev_max_early_idx;
+		intercept_sum = 0;
 
-		if (max_early_idx >= 0) {
-			idx = max_early_idx;
-			duration_ns = drv->states[idx].target_residency_ns;
+		for (i = idx - 1; i >= idx0; i--) {
+			s64 span_ns;
+
+			intercept_sum += cpu_data->state_bins[i].intercepts;
+
+			if (dev->states_usage[i].disable)
+				continue;
+
+			span_ns = teo_middle_of_bin(i, drv);
+			if (!teo_time_ok(span_ns)) {
+				/*
+				 * The current state is too shallow, so select
+				 * the first enabled deeper state.
+				 */
+				duration_ns = last_enabled_span_ns;
+				idx = last_enabled_idx;
+				break;
+			}
+
+			if (2 * intercept_sum > idx_intercept_sum) {
+				idx = i;
+				duration_ns = span_ns;
+				break;
+			}
+
+			last_enabled_span_ns = span_ns;
+			last_enabled_idx = i;
 		}
 	}
 
 	/*
-	 * If there is a latency constraint, it may be necessary to use a
-	 * shallower idle state than the one selected so far.
+	 * If there is a latency constraint, it may be necessary to select an
+	 * idle state shallower than the current candidate one.
 	 */
-	if (constraint_idx < idx)
+	if (idx > constraint_idx)
 		idx = constraint_idx;
 
-	if (idx < 0) {
-		idx = 0; /* No states enabled. Must use 0. */
-	} else if (idx > idx0) {
+	if (idx > idx0) {
 		unsigned int count = 0;
 		u64 sum = 0;
 
@@ -416,6 +447,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		}
 	}
 
+end:
 	/*
 	 * Don't stop the tick if the selected state is a polling one or if the
 	 * expected idle duration is shorter than the tick period length.

From 77577558f25d40b82fba98673cf31ca16ba41d34 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:17:18 +0200
Subject: [PATCH 1420/3292] cpuidle: teo: Rework most recent idle duration
 values treatment

The TEO (Timer Events Oriented) cpuidle governor uses several most
recent idle duration values for a given CPU to refine the idle state
selection in case the previous long-term trends have not been
followed recently and a new trend appears to be forming.  That is
done by computing the average of the most recent idle duration
values falling below the time till the next timer event ("sleep
length"), provided that they are the majority of the most recent
idle duration values taken into account, and using it as the new
expected idle duration value.

However, idle state selection based on that value may not be optimal,
because the average does not really indicate which of the idle states
with target residencies less than or equal to it is likely to be the
best fit.

Thus, instead of computing the average, make the governor carry out
computations based on the distribution of the most recent idle
duration values among the bins corresponding to different idle
states.  Namely, if the majority of the most recent idle duration
values taken into consideration are less than the current sleep
length (which means that the CPU is likely to wake up early), find
the idle state closest to the "candidate" one "matching" the sleep
length whose target residency is less than or equal to the majority
of the most recent idle duration values that have fallen below the
current sleep length (which means that it is likely to be "shallow
enough" this time).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 153 +++++++++++++++-----------------
 1 file changed, 70 insertions(+), 83 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 5bcd45f1d610..7c2024f91fd7 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -47,15 +47,20 @@
  * length).  In turn, the "intercepts" metric reflects the relative frequency of
  * situations in which the measured idle duration is so much shorter than the
  * sleep length that the bin it falls into corresponds to an idle state
- * shallower than the one whose bin is fallen into by the sleep length.
+ * shallower than the one whose bin is fallen into by the sleep length (these
+ * situations are referred to as "intercepts" below).
+ *
+ * In addition to the metrics described above, the governor counts recent
+ * intercepts (that is, intercepts that have occurred during the last NR_RECENT
+ * invocations of it for the given CPU) for each bin.
  *
  * In order to select an idle state for a CPU, the governor takes the following
  * steps (modulo the possible latency constraint that must be taken into account
  * too):
  *
  * 1. Find the deepest CPU idle state whose target residency does not exceed
- *    the current sleep length (the candidate idle state) and compute two sums
- *    as follows:
+ *    the current sleep length (the candidate idle state) and compute 3 sums as
+ *    follows:
  *
  *    - The sum of the "hits" and "intercepts" metrics for the candidate state
  *      and all of the deeper idle states (it represents the cases in which the
@@ -67,25 +72,29 @@
  *      idle long enough to avoid being intercepted if the sleep length had been
  *      equal to the current one).
  *
- * 2. If the second sum is greater than the first one, look for an alternative
- *    idle state to select.
+ *    - The sum of the numbers of recent intercepts for all of the idle states
+ *      shallower than the candidate one.
+ *
+ * 2. If the second sum is greater than the first one or the third sum is
+ *    greater than NR_RECENT / 2, the CPU is likely to wake up early, so look
+ *    for an alternative idle state to select.
  *
  *    - Traverse the idle states shallower than the candidate one in the
  *      descending order.
  *
- *    - For each of them compute the sum of the "intercepts" metrics over all of
- *      the idle  states between it and the candidate one (including the former
- *      and excluding the latter).
+ *    - For each of them compute the sum of the "intercepts" metrics and the sum
+ *      of the numbers of recent intercepts over all of the idle states between
+ *      it and the candidate one (including the former and excluding the
+ *      latter).
  *
- *    - If that sum is greater than a half of the second sum computed in step 1
- *      (which means that the target residency of the state in question had not
- *      exceeded the idle duration in over a half of the relevant cases), select
- *      the given idle state instead of the candidate one.
+ *    - If each of these sums that needs to be taken into account (because the
+ *      check related to it has indicated that the CPU is likely to wake up
+ *      early) is greater than a half of the corresponding sum computed in step
+ *      1 (which means that the target residency of the state in question had
+ *      not exceeded the idle duration in over a half of the relevant cases),
+ *      select the given idle state instead of the candidate one.
  *
- * 3. If the majority of the most recent idle duration values are below the
- *    current anticipated idle duration, use those values to compute the new
- *    expected idle duration and find an idle state matching it (which has to
- *    be shallower than the current candidate one).
+ * 3. By default, select the candidate state.
  */
 
 #include <linux/cpuidle.h>
@@ -103,18 +112,20 @@
 
 /*
  * Number of the most recent idle duration values to take into consideration for
- * the detection of wakeup patterns.
+ * the detection of recent early wakeup patterns.
  */
-#define INTERVALS	8
+#define NR_RECENT	9
 
 /**
  * struct teo_bin - Metrics used by the TEO cpuidle governor.
  * @intercepts: The "intercepts" metric.
  * @hits: The "hits" metric.
+ * @recent: The number of recent "intercepts".
  */
 struct teo_bin {
 	unsigned int intercepts;
 	unsigned int hits;
+	unsigned int recent;
 };
 
 /**
@@ -123,16 +134,16 @@ struct teo_bin {
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
  * @state_bins: Idle state data bins for this CPU.
  * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
- * @interval_idx: Index of the most recent saved idle interval.
- * @intervals: Saved idle duration values.
+ * @next_recent_idx: Index of the next @recent_idx entry to update.
+ * @recent_idx: Indices of bins corresponding to recent "intercepts".
  */
 struct teo_cpu {
 	s64 time_span_ns;
 	s64 sleep_length_ns;
 	struct teo_bin state_bins[CPUIDLE_STATE_MAX];
 	unsigned int total;
-	int interval_idx;
-	u64 intervals[INTERVALS];
+	int next_recent_idx;
+	int recent_idx[NR_RECENT];
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -201,26 +212,29 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		}
 	}
 
+	i = cpu_data->next_recent_idx++;
+	if (cpu_data->next_recent_idx >= NR_RECENT)
+		cpu_data->next_recent_idx = 0;
+
+	if (cpu_data->recent_idx[i] >= 0)
+		cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
+
 	/*
 	 * If the measured idle duration falls into the same bin as the sleep
 	 * length, this is a "hit", so update the "hits" metric for that bin.
 	 * Otherwise, update the "intercepts" metric for the bin fallen into by
 	 * the measured idle duration.
 	 */
-	if (idx_timer == idx_duration)
+	if (idx_timer == idx_duration) {
 		cpu_data->state_bins[idx_timer].hits += PULSE;
-	else
+		cpu_data->recent_idx[i] = -1;
+	} else {
 		cpu_data->state_bins[idx_duration].intercepts += PULSE;
+		cpu_data->state_bins[idx_duration].recent++;
+		cpu_data->recent_idx[i] = idx_duration;
+	}
 
 	cpu_data->total += PULSE;
-
-	/*
-	 * Save idle duration values corresponding to non-timer wakeups for
-	 * pattern detection.
-	 */
-	cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
-	if (cpu_data->interval_idx >= INTERVALS)
-		cpu_data->interval_idx = 0;
 }
 
 static bool teo_time_ok(u64 interval_ns)
@@ -271,10 +285,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 	unsigned int idx_intercept_sum = 0;
 	unsigned int intercept_sum = 0;
+	unsigned int idx_recent_sum = 0;
+	unsigned int recent_sum = 0;
 	unsigned int idx_hit_sum = 0;
 	unsigned int hit_sum = 0;
 	int constraint_idx = 0;
 	int idx0 = 0, idx = -1;
+	bool alt_intercepts, alt_recent;
 	ktime_t delta_tick;
 	s64 duration_ns;
 	int i;
@@ -317,6 +334,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 */
 		intercept_sum += prev_bin->intercepts;
 		hit_sum += prev_bin->hits;
+		recent_sum += prev_bin->recent;
 
 		if (dev->states_usage[i].disable)
 			continue;
@@ -336,6 +354,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 		idx_intercept_sum = intercept_sum;
 		idx_hit_sum = hit_sum;
+		idx_recent_sum = recent_sum;
 	}
 
 	/* Avoid unnecessary overhead. */
@@ -350,27 +369,36 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * If the sum of the intercepts metric for all of the idle states
 	 * shallower than the current candidate one (idx) is greater than the
 	 * sum of the intercepts and hits metrics for the candidate state and
-	 * all of the deeper states, the CPU is likely to wake up early, so find
-	 * an alternative idle state to select.
+	 * all of the deeper states, or the sum of the numbers of recent
+	 * intercepts over all of the states shallower than the candidate one
+	 * is greater than a half of the number of recent events taken into
+	 * account, the CPU is likely to wake up early, so find an alternative
+	 * idle state to select.
 	 */
-	if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
+	alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
+	alt_recent = idx_recent_sum > NR_RECENT / 2;
+	if (alt_recent || alt_intercepts) {
 		s64 last_enabled_span_ns = duration_ns;
 		int last_enabled_idx = idx;
 
 		/*
 		 * Look for the deepest idle state whose target residency had
 		 * not exceeded the idle duration in over a half of the relevant
-		 * cases in the past.
+		 * cases (both with respect to intercepts overall and with
+		 * respect to the recent intercepts only) in the past.
 		 *
 		 * Take the possible latency constraint and duration limitation
 		 * present if the tick has been stopped already into account.
 		 */
 		intercept_sum = 0;
+		recent_sum = 0;
 
 		for (i = idx - 1; i >= idx0; i--) {
+			struct teo_bin *bin = &cpu_data->state_bins[i];
 			s64 span_ns;
 
-			intercept_sum += cpu_data->state_bins[i].intercepts;
+			intercept_sum += bin->intercepts;
+			recent_sum += bin->recent;
 
 			if (dev->states_usage[i].disable)
 				continue;
@@ -386,7 +414,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				break;
 			}
 
-			if (2 * intercept_sum > idx_intercept_sum) {
+			if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
+			    (!alt_intercepts ||
+			     2 * intercept_sum > idx_intercept_sum)) {
 				idx = i;
 				duration_ns = span_ns;
 				break;
@@ -404,49 +434,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	if (idx > constraint_idx)
 		idx = constraint_idx;
 
-	if (idx > idx0) {
-		unsigned int count = 0;
-		u64 sum = 0;
-
-		/*
-		 * The target residencies of at least two different enabled idle
-		 * states are less than or equal to the current expected idle
-		 * duration.  Try to refine the selection using the most recent
-		 * measured idle duration values.
-		 *
-		 * Count and sum the most recent idle duration values less than
-		 * the current expected idle duration value.
-		 */
-		for (i = 0; i < INTERVALS; i++) {
-			u64 val = cpu_data->intervals[i];
-
-			if (val >= duration_ns)
-				continue;
-
-			count++;
-			sum += val;
-		}
-
-		/*
-		 * Give up unless the majority of the most recent idle duration
-		 * values are in the interesting range.
-		 */
-		if (count > INTERVALS / 2) {
-			u64 avg_ns = div64_u64(sum, count);
-
-			/*
-			 * Avoid spending too much time in an idle state that
-			 * would be too shallow.
-			 */
-			if (teo_time_ok(avg_ns)) {
-				duration_ns = avg_ns;
-				if (drv->states[idx].target_residency_ns > avg_ns)
-					idx = teo_find_shallower_state(drv, dev,
-								       idx, avg_ns);
-			}
-		}
-	}
-
 end:
 	/*
 	 * Don't stop the tick if the selected state is a polling one or if the
@@ -507,8 +494,8 @@ static int teo_enable_device(struct cpuidle_driver *drv,
 
 	memset(cpu_data, 0, sizeof(*cpu_data));
 
-	for (i = 0; i < INTERVALS; i++)
-		cpu_data->intervals[i] = U64_MAX;
+	for (i = 0; i < NR_RECENT; i++)
+		cpu_data->recent_idx[i] = -1;
 
 	return 0;
 }

From 154ae8bb3c830f0a568a5194ce7e631aa6bcfe8b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:18:02 +0200
Subject: [PATCH 1421/3292] cpuidle: teo: Use kerneldoc documentation in
 admin-guide

There are two descriptions of the TEO (Timer Events Oriented) cpuidle
governor in the kernel source tree, one in the C file containing its
code and one in cpuidle.rst which is part of admin-guide.

Instead of trying to keep them both in sync and in order to reduce
text duplication, include the governor description from the C file
directly into cpuidle.rst.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/cpuidle.rst | 77 +-----------------------
 drivers/cpuidle/governors/teo.c          | 12 ++--
 2 files changed, 10 insertions(+), 79 deletions(-)

diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 10fde58d0869..aec2cd2aaea7 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -347,81 +347,8 @@ for tickless systems.  It follows the same basic strategy as the ``menu`` `one
 <menu-gov_>`_: it always tries to find the deepest idle state suitable for the
 given conditions.  However, it applies a different approach to that problem.
 
-First, it does not use sleep length correction factors, but instead it attempts
-to correlate the observed idle duration values with the available idle states
-and use that information to pick up the idle state that is most likely to
-"match" the upcoming CPU idle interval.   Second, it does not take the tasks
-that were running on the given CPU in the past and are waiting on some I/O
-operations to complete now at all (there is no guarantee that they will run on
-the same CPU when they become runnable again) and the pattern detection code in
-it avoids taking timer wakeups into account.  It also only uses idle duration
-values less than the current time till the closest timer (with the scheduler
-tick excluded) for that purpose.
-
-Like in the ``menu`` governor `case <menu-gov_>`_, the first step is to obtain
-the *sleep length*, which is the time until the closest timer event with the
-assumption that the scheduler tick will be stopped (that also is the upper bound
-on the time until the next CPU wakeup).  That value is then used to preselect an
-idle state on the basis of three metrics maintained for each idle state provided
-by the ``CPUIdle`` driver: ``hits``, ``misses`` and ``early_hits``.
-
-The ``hits`` and ``misses`` metrics measure the likelihood that a given idle
-state will "match" the observed (post-wakeup) idle duration if it "matches" the
-sleep length.  They both are subject to decay (after a CPU wakeup) every time
-the target residency of the idle state corresponding to them is less than or
-equal to the sleep length and the target residency of the next idle state is
-greater than the sleep length (that is, when the idle state corresponding to
-them "matches" the sleep length).  The ``hits`` metric is increased if the
-former condition is satisfied and the target residency of the given idle state
-is less than or equal to the observed idle duration and the target residency of
-the next idle state is greater than the observed idle duration at the same time
-(that is, it is increased when the given idle state "matches" both the sleep
-length and the observed idle duration).  In turn, the ``misses`` metric is
-increased when the given idle state "matches" the sleep length only and the
-observed idle duration is too short for its target residency.
-
-The ``early_hits`` metric measures the likelihood that a given idle state will
-"match" the observed (post-wakeup) idle duration if it does not "match" the
-sleep length.  It is subject to decay on every CPU wakeup and it is increased
-when the idle state corresponding to it "matches" the observed (post-wakeup)
-idle duration and the target residency of the next idle state is less than or
-equal to the sleep length (i.e. the idle state "matching" the sleep length is
-deeper than the given one).
-
-The governor walks the list of idle states provided by the ``CPUIdle`` driver
-and finds the last (deepest) one with the target residency less than or equal
-to the sleep length.  Then, the ``hits`` and ``misses`` metrics of that idle
-state are compared with each other and it is preselected if the ``hits`` one is
-greater (which means that that idle state is likely to "match" the observed idle
-duration after CPU wakeup).  If the ``misses`` one is greater, the governor
-preselects the shallower idle state with the maximum ``early_hits`` metric
-(or if there are multiple shallower idle states with equal ``early_hits``
-metric which also is the maximum, the shallowest of them will be preselected).
-[If there is a wakeup latency constraint coming from the `PM QoS framework
-<cpu-pm-qos_>`_ which is hit before reaching the deepest idle state with the
-target residency within the sleep length, the deepest idle state with the exit
-latency within the constraint is preselected without consulting the ``hits``,
-``misses`` and ``early_hits`` metrics.]
-
-Next, the governor takes several idle duration values observed most recently
-into consideration and if at least a half of them are greater than or equal to
-the target residency of the preselected idle state, that idle state becomes the
-final candidate to ask for.  Otherwise, the average of the most recent idle
-duration values below the target residency of the preselected idle state is
-computed and the governor walks the idle states shallower than the preselected
-one and finds the deepest of them with the target residency within that average.
-That idle state is then taken as the final candidate to ask for.
-
-Still, at this point the governor may need to refine the idle state selection if
-it has not decided to `stop the scheduler tick <idle-cpus-and-tick_>`_.  That
-generally happens if the target residency of the idle state selected so far is
-less than the tick period and the tick has not been stopped already (in a
-previous iteration of the idle loop).  Then, like in the ``menu`` governor
-`case <menu-gov_>`_, the sleep length used in the previous computations may not
-reflect the real time until the closest timer event and if it really is greater
-than that time, a shallower state with a suitable target residency may need to
-be selected.
-
+.. kernel-doc:: drivers/cpuidle/governors/teo.c
+   :doc: teo-description
 
 .. _idle-states-representation:
 
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 7c2024f91fd7..1e0b2f828abb 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -4,6 +4,10 @@
  *
  * Copyright (C) 2018 - 2021 Intel Corporation
  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ */
+
+/**
+ * DOC: teo-description
  *
  * The idea of this governor is based on the observation that on many systems
  * timer events are two or more orders of magnitude more frequent than any
@@ -28,7 +32,7 @@
  *
  * The computations carried out by this governor are based on using bins whose
  * boundaries are aligned with the target residency parameter values of the CPU
- * idle states provided by the cpuidle driver in the ascending order.  That is,
+ * idle states provided by the %CPUIdle driver in the ascending order.  That is,
  * the first bin spans from 0 up to, but not including, the target residency of
  * the second idle state (idle state 1), the second bin spans from the target
  * residency of idle state 1 up to, but not including, the target residency of
@@ -51,8 +55,8 @@
  * situations are referred to as "intercepts" below).
  *
  * In addition to the metrics described above, the governor counts recent
- * intercepts (that is, intercepts that have occurred during the last NR_RECENT
- * invocations of it for the given CPU) for each bin.
+ * intercepts (that is, intercepts that have occurred during the last
+ * %NR_RECENT invocations of it for the given CPU) for each bin.
  *
  * In order to select an idle state for a CPU, the governor takes the following
  * steps (modulo the possible latency constraint that must be taken into account
@@ -76,7 +80,7 @@
  *      shallower than the candidate one.
  *
  * 2. If the second sum is greater than the first one or the third sum is
- *    greater than NR_RECENT / 2, the CPU is likely to wake up early, so look
+ *    greater than %NR_RECENT / 2, the CPU is likely to wake up early, so look
  *    for an alternative idle state to select.
  *
  *    - Traverse the idle states shallower than the candidate one in the

From 0eef091d2dc447e10607f6dafa173c311ada972b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 3 Jun 2021 11:34:35 +0200
Subject: [PATCH 1422/3292] PM: domains: Split code in
 dev_pm_genpd_set_performance_state()

To prepare some of the code in dev_pm_genpd_set_performance_state() to be
re-used from subsequent changes, let's split it up into two functions.

Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5695a641efd3..ede0f576efe6 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -379,6 +379,24 @@ err:
 	return ret;
 }
 
+static int genpd_set_performance_state(struct device *dev, unsigned int state)
+{
+	struct generic_pm_domain *genpd = dev_to_genpd(dev);
+	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
+	unsigned int prev_state;
+	int ret;
+
+	prev_state = gpd_data->performance_state;
+	gpd_data->performance_state = state;
+	state = _genpd_reeval_performance_state(genpd, state);
+
+	ret = _genpd_set_performance_state(genpd, state, 0);
+	if (ret)
+		gpd_data->performance_state = prev_state;
+
+	return ret;
+}
+
 /**
  * dev_pm_genpd_set_performance_state- Set performance state of device's power
  * domain.
@@ -397,8 +415,6 @@ err:
 int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
 {
 	struct generic_pm_domain *genpd;
-	struct generic_pm_domain_data *gpd_data;
-	unsigned int prev;
 	int ret;
 
 	genpd = dev_to_genpd_safe(dev);
@@ -410,16 +426,7 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
 		return -EINVAL;
 
 	genpd_lock(genpd);
-
-	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	prev = gpd_data->performance_state;
-	gpd_data->performance_state = state;
-
-	state = _genpd_reeval_performance_state(genpd, state);
-	ret = _genpd_set_performance_state(genpd, state, 0);
-	if (ret)
-		gpd_data->performance_state = prev;
-
+	ret = genpd_set_performance_state(dev, state);
 	genpd_unlock(genpd);
 
 	return ret;

From d97fe100ee0b36c5dd8013ffd70fe8fcdcabff2b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 3 Jun 2021 11:34:36 +0200
Subject: [PATCH 1423/3292] PM: domains: Return early if perf state is already
 set for the device

When dev_pm_genpd_set_performance_state() gets called to set a new
performance state for the device, let's take a quicker path by doing an
early return, if it turns out that the new state is already set for the
device.

Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index ede0f576efe6..90a9828fcb2f 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -387,6 +387,9 @@ static int genpd_set_performance_state(struct device *dev, unsigned int state)
 	int ret;
 
 	prev_state = gpd_data->performance_state;
+	if (prev_state == state)
+		return 0;
+
 	gpd_data->performance_state = state;
 	state = _genpd_reeval_performance_state(genpd, state);
 

From 5937c3ce21228d33d2eb3287baa7e4cf6978dba9 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 3 Jun 2021 11:34:37 +0200
Subject: [PATCH 1424/3292] PM: domains: Drop/restore performance state votes
 for devices at runtime PM

A subsystem/driver that need to manage OPPs for its device, should
typically drop its vote for the OPP when the device becomes runtime
suspended. In this way, the corresponding aggregation of the performance
state votes that is managed in genpd for the attached PM domain, may find
that the aggregated vote can be decreased. Hence, it may allow genpd to set
the lower performance state for the PM domain, thus avoiding to waste
energy.

To accomplish this, typically a subsystem/driver would need to call
dev_pm_opp_set_rate|opp() for its device from its ->runtime_suspend()
callback, to drop the vote for the OPP. Accordingly, it needs another call
to dev_pm_opp_set_rate|opp() to restore the vote for the OPP from its
->runtime_resume() callback.

To avoid boilerplate code in subsystems/driver to deal with these things,
let's instead manage this internally in genpd.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 27 +++++++++++++++++++++++++--
 include/linux/pm_domain.h   |  1 +
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 90a9828fcb2f..ab0b740cc0f1 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -400,6 +400,23 @@ static int genpd_set_performance_state(struct device *dev, unsigned int state)
 	return ret;
 }
 
+static int genpd_drop_performance_state(struct device *dev)
+{
+	unsigned int prev_state = dev_gpd_data(dev)->performance_state;
+
+	if (!genpd_set_performance_state(dev, 0))
+		return prev_state;
+
+	return 0;
+}
+
+static void genpd_restore_performance_state(struct device *dev,
+					    unsigned int state)
+{
+	if (state)
+		genpd_set_performance_state(dev, state);
+}
+
 /**
  * dev_pm_genpd_set_performance_state- Set performance state of device's power
  * domain.
@@ -843,7 +860,8 @@ static int genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 	bool (*suspend_ok)(struct device *__dev);
-	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
+	struct gpd_timing_data *td = &gpd_data->td;
 	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
 	s64 elapsed_ns;
@@ -900,6 +918,7 @@ static int genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	genpd_lock(genpd);
+	gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
 	genpd_power_off(genpd, true, 0);
 	genpd_unlock(genpd);
 
@@ -917,7 +936,8 @@ static int genpd_runtime_suspend(struct device *dev)
 static int genpd_runtime_resume(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
+	struct gpd_timing_data *td = &gpd_data->td;
 	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
 	s64 elapsed_ns;
@@ -941,6 +961,8 @@ static int genpd_runtime_resume(struct device *dev)
 
 	genpd_lock(genpd);
 	ret = genpd_power_on(genpd, 0);
+	if (!ret)
+		genpd_restore_performance_state(dev, gpd_data->rpm_pstate);
 	genpd_unlock(genpd);
 
 	if (ret)
@@ -979,6 +1001,7 @@ err_stop:
 err_poweroff:
 	if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) {
 		genpd_lock(genpd);
+		gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
 		genpd_power_off(genpd, true, 0);
 		genpd_unlock(genpd);
 	}
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index dfcfbcecc34b..21a0577305ef 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -198,6 +198,7 @@ struct generic_pm_domain_data {
 	struct notifier_block *power_nb;
 	int cpu;
 	unsigned int performance_state;
+	unsigned int rpm_pstate;
 	ktime_t	next_wakeup;
 	void *data;
 };

From 03466883a0fdb5c38f2907b027565b9f253688a8 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 8 Jun 2021 15:44:37 +0800
Subject: [PATCH 1425/3292] PM: sleep: remove trailing spaces and tabs

Run the following command to find and remove the trailing spaces and tabs:

$ find kernel/power/ -type f | xargs sed -r -i 's/[ \t]+$//'

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/Kconfig   | 12 ++++++------
 kernel/power/process.c |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6bfe3ead10ad..a12779650f15 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -98,20 +98,20 @@ config PM_STD_PARTITION
 	default ""
 	help
 	  The default resume partition is the partition that the suspend-
-	  to-disk implementation will look for a suspended disk image. 
+	  to-disk implementation will look for a suspended disk image.
 
-	  The partition specified here will be different for almost every user. 
+	  The partition specified here will be different for almost every user.
 	  It should be a valid swap partition (at least for now) that is turned
-	  on before suspending. 
+	  on before suspending.
 
 	  The partition specified can be overridden by specifying:
 
-		resume=/dev/<other device> 
+		resume=/dev/<other device>
 
-	  which will set the resume partition to the device specified. 
+	  which will set the resume partition to the device specified.
 
 	  Note there is currently not a way to specify which device to save the
-	  suspended image to. It will simply pick the first available swap 
+	  suspended image to. It will simply pick the first available swap
 	  device.
 
 config PM_SLEEP
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 50cc63534486..37401c99b7d7 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * drivers/power/process.c - Functions for starting/stopping processes on 
+ * drivers/power/process.c - Functions for starting/stopping processes on
  *                           suspend transitions.
  *
  * Originally from swsusp.

From 480f0de68caddfe336b8cc0c74a40328779940d3 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 8 Jun 2021 16:13:14 +0800
Subject: [PATCH 1426/3292] PM: hibernate: remove leading spaces before tabs

 1) Run the following command to find and remove the leading spaces
    before tabs:
    $ find kernel/power/ -type f | xargs sed -r -i 's/^[ ]+\t/\t/'
 2) Manually check and correct if necessary

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/snapshot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index af507c8c895b..f7a986078213 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1146,7 +1146,7 @@ int create_basic_memory_bitmaps(void)
  Free_second_object:
 	kfree(bm2);
  Free_first_bitmap:
- 	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
  Free_first_object:
 	kfree(bm1);
 	return -ENOMEM;

From d0a83314db291fdf2e1c36102212c80e01022d6d Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 11 Jun 2021 18:16:07 +0800
Subject: [PATCH 1427/3292] selinux: Fix kernel-doc

Fix function name and add comment for parameter state in ss/services.c
kernel-doc to remove some warnings found by running make W=1 LLVM=1.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/services.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 3658488efdcb..d84c77f370dc 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -859,6 +859,7 @@ int security_validate_transition(struct selinux_state *state,
  * It returns 0, if @newsid is bounded by @oldsid.
  * Otherwise, it returns error code.
  *
+ * @state: SELinux state
  * @oldsid : current security identifier
  * @newsid : destinated security identifier
  */
@@ -1098,6 +1099,7 @@ allow:
 
 /**
  * security_compute_av - Compute access vector decisions.
+ * @state: SELinux state
  * @ssid: source security identifier
  * @tsid: target security identifier
  * @tclass: target security class
@@ -1386,6 +1388,7 @@ out_unlock:
 
 /**
  * security_sid_to_context - Obtain a context for a given SID.
+ * @state: SELinux state
  * @sid: security identifier, SID
  * @scontext: security context
  * @scontext_len: length in bytes
@@ -1411,6 +1414,7 @@ int security_sid_to_context_force(struct selinux_state *state, u32 sid,
 /**
  * security_sid_to_context_inval - Obtain a context for a given SID if it
  *                                 is invalid.
+ * @state: SELinux state
  * @sid: security identifier, SID
  * @scontext: security context
  * @scontext_len: length in bytes
@@ -1587,6 +1591,7 @@ out:
 
 /**
  * security_context_to_sid - Obtain a SID for a given security context.
+ * @state: SELinux state
  * @scontext: security context
  * @scontext_len: length in bytes
  * @sid: security identifier, SID
@@ -1616,6 +1621,7 @@ int security_context_str_to_sid(struct selinux_state *state,
  * security_context_to_sid_default - Obtain a SID for a given security context,
  * falling back to specified default if needed.
  *
+ * @state: SELinux state
  * @scontext: security context
  * @scontext_len: length in bytes
  * @sid: security identifier, SID
@@ -1907,6 +1913,7 @@ out:
 
 /**
  * security_transition_sid - Compute the SID for a new subject/object.
+ * @state: SELinux state
  * @ssid: source security identifier
  * @tsid: target security identifier
  * @tclass: target security class
@@ -1962,6 +1969,7 @@ int security_member_sid(struct selinux_state *state,
 
 /**
  * security_change_sid - Compute the SID for object relabeling.
+ * @state: SELinux state
  * @ssid: source security identifier
  * @tsid: target security identifier
  * @tclass: target security class
@@ -2260,6 +2268,7 @@ void selinux_policy_commit(struct selinux_state *state,
 
 /**
  * security_load_policy - Load a security policy configuration.
+ * @state: SELinux state
  * @data: binary policy data
  * @len: length of data in bytes
  *
@@ -2367,6 +2376,7 @@ err_policy:
 
 /**
  * security_port_sid - Obtain the SID for a port.
+ * @state: SELinux state
  * @protocol: protocol number
  * @port: port number
  * @out_sid: security identifier
@@ -2423,7 +2433,8 @@ out:
 }
 
 /**
- * security_pkey_sid - Obtain the SID for a pkey.
+ * security_ib_pkey_sid - Obtain the SID for a pkey.
+ * @state: SELinux state
  * @subnet_prefix: Subnet Prefix
  * @pkey_num: pkey number
  * @out_sid: security identifier
@@ -2482,6 +2493,7 @@ out:
 
 /**
  * security_ib_endport_sid - Obtain the SID for a subnet management interface.
+ * @state: SELinux state
  * @dev_name: device name
  * @port: port number
  * @out_sid: security identifier
@@ -2540,6 +2552,7 @@ out:
 
 /**
  * security_netif_sid - Obtain the SID for a network interface.
+ * @state: SELinux state
  * @name: interface name
  * @if_sid: interface SID
  */
@@ -2614,6 +2627,7 @@ static int match_ipv6_addrmask(u32 *input, u32 *addr, u32 *mask)
 
 /**
  * security_node_sid - Obtain the SID for a node (host).
+ * @state: SELinux state
  * @domain: communication domain aka address family
  * @addrp: address
  * @addrlen: address length in bytes
@@ -2707,6 +2721,7 @@ out:
 
 /**
  * security_get_user_sids - Obtain reachable SIDs for a user.
+ * @state: SELinux state
  * @fromsid: starting SID
  * @username: username
  * @sids: array of reachable SIDs for user
@@ -2899,6 +2914,7 @@ out:
 
 /**
  * security_genfs_sid - Obtain a SID for a file in a filesystem
+ * @state: SELinux state
  * @fstype: filesystem type
  * @path: path from root of mount
  * @sclass: file security class
@@ -2943,6 +2959,7 @@ int selinux_policy_genfs_sid(struct selinux_policy *policy,
 
 /**
  * security_fs_use - Determine how to handle labeling for a filesystem.
+ * @state: SELinux state
  * @sb: superblock in question
  */
 int security_fs_use(struct selinux_state *state, struct super_block *sb)
@@ -3282,6 +3299,7 @@ out_unlock:
 
 /**
  * security_net_peersid_resolve - Compare and resolve two network peer SIDs
+ * @state: SELinux state
  * @nlbl_sid: NetLabel SID
  * @nlbl_type: NetLabel labeling protocol type
  * @xfrm_sid: XFRM SID
@@ -3506,6 +3524,7 @@ int security_get_allow_unknown(struct selinux_state *state)
 
 /**
  * security_policycap_supported - Check for a specific policy capability
+ * @state: SELinux state
  * @req_cap: capability
  *
  * Description:
@@ -3840,6 +3859,7 @@ static void security_netlbl_cache_add(struct netlbl_lsm_secattr *secattr,
 
 /**
  * security_netlbl_secattr_to_sid - Convert a NetLabel secattr to a SELinux SID
+ * @state: SELinux state
  * @secattr: the NetLabel packet security attributes
  * @sid: the SELinux SID
  *
@@ -3922,6 +3942,7 @@ out:
 
 /**
  * security_netlbl_sid_to_secattr - Convert a SELinux SID to a NetLabel secattr
+ * @state: SELinux state
  * @sid: the SELinux SID
  * @secattr: the NetLabel packet security attributes
  *

From 5a2bd1b1c64e1ac5627db3767ac465f18606315c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 8 Jun 2021 11:02:48 +0200
Subject: [PATCH 1428/3292] PM: runtime: Improve path in rpm_idle() when no
 callback

When pm_runtime_no_callbacks() has been called for a struct device to set
the dev->power.no_callbacks flag for it, it enables rpm_idle() to take a
slightly quicker path by assuming that a ->runtime_idle() callback would
have returned 0 to indicate success.

A device that does not have the dev->power.no_callbacks flag set for it,
may still be missing a corresponding ->runtime_idle() callback, in which
case the slower path in rpm_idle() is taken. Let's improve the behaviour
for this case, by aligning code to the quicker path.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b570848d23e0..68bebbf81347 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -446,7 +446,10 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	/* Pending requests need to be canceled. */
 	dev->power.request = RPM_REQ_NONE;
 
-	if (dev->power.no_callbacks)
+	callback = RPM_GET_CALLBACK(dev, runtime_idle);
+
+	/* If no callback assume success. */
+	if (!callback || dev->power.no_callbacks)
 		goto out;
 
 	/* Carry out an asynchronous or a synchronous idle notification. */
@@ -462,10 +465,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
 
 	dev->power.idle_notification = true;
 
-	callback = RPM_GET_CALLBACK(dev, runtime_idle);
-
-	if (callback)
-		retval = __rpm_callback(callback, dev);
+	retval = __rpm_callback(callback, dev);
 
 	dev->power.idle_notification = false;
 	wake_up_all(&dev->power.wait_queue);

From 63d00be69348fda431ae59aba6af268a5cf5058e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 8 Jun 2021 11:02:49 +0200
Subject: [PATCH 1429/3292] PM: runtime: Allow unassigned
 ->runtime_suspend|resume callbacks

We are currently allowing ->runtime_idle() callbacks to be unassigned
without returning an error code from rpm_idle(). This has been useful to
avoid boilerplate code in drivers. Let's take this approach a step further,
by allowing also unassigned ->runtime_suspend|resume() callbacks.

In this way, a consumer/supplier device link can be used to let a consumer
device be power managed through its supplier device, without requiring
assigned ->runtime_suspend|resume() callbacks for the consumer device, for
example.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 68bebbf81347..8a66eaf731e4 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -345,7 +345,7 @@ static void rpm_suspend_suppliers(struct device *dev)
 static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
-	int retval, idx;
+	int retval = 0, idx;
 	bool use_links = dev->power.links_count > 0;
 
 	if (dev->power.irq_safe) {
@@ -373,7 +373,8 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 		}
 	}
 
-	retval = cb(dev);
+	if (cb)
+		retval = cb(dev);
 
 	if (dev->power.irq_safe) {
 		spin_lock(&dev->power.lock);
@@ -484,9 +485,6 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev)
 {
 	int retval;
 
-	if (!cb)
-		return -ENOSYS;
-
 	if (dev->power.memalloc_noio) {
 		unsigned int noio_flag;
 

From 4ec4f059088b48585c337328e05fa930c64d1ba8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 9 Jun 2021 12:06:10 +0200
Subject: [PATCH 1430/3292] PM: runtime: Clarify documentation when callbacks
 are unassigned

Recent changes to the PM core allows ->runtime_suspend|resume callbacks to
be unassigned.

In the earlier behaviour the PM core would return -ENOSYS, when trying to
runtime resume a device, for example. Let's update the documentation to
clarify this.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
index b48cac5f9048..d6bf84f061f4 100644
--- a/Documentation/power/runtime_pm.rst
+++ b/Documentation/power/runtime_pm.rst
@@ -831,6 +831,15 @@ or driver about runtime power changes.  Instead, the driver for the device's
 parent must take responsibility for telling the device's driver when the
 parent's power state changes.
 
+Note that, in some cases it may not be desirable for subsystems/drivers to call
+pm_runtime_no_callbacks() for their devices. This could be because a subset of
+the runtime PM callbacks needs to be implemented, a platform dependent PM
+domain could get attached to the device or that the device is power managed
+through a supplier device link. For these reasons and to avoid boilerplate code
+in subsystems/drivers, the PM core allows runtime PM callbacks to be
+unassigned. More precisely, if a callback pointer is NULL, the PM core will act
+as though there was a callback and it returned 0.
+
 9. Autosuspend, or automatically-delayed suspends
 =================================================
 

From b17ec22fb339bfcb1eadd5f5e196dc705dcf3949 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 16 Jan 2021 15:40:54 -0500
Subject: [PATCH 1431/3292] selinux: slow_avc_audit has become non-blocking

dump_common_audit_data() is safe to use under rcu_read_lock() now;
no need for AVC_NONBLOCKING and games around it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/avc.c         | 28 ++++++++--------------------
 security/selinux/hooks.c       | 13 ++-----------
 security/selinux/include/avc.h |  4 ----
 3 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 6417ed49661f..9322687cb030 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -759,7 +759,11 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
 	}
 }
 
-/* This is the slow part of avc audit with big stack footprint */
+/*
+ * This is the slow part of avc audit with big stack footprint.
+ * Note that it is non-blocking and can be called from under
+ * rcu_read_lock().
+ */
 noinline int slow_avc_audit(struct selinux_state *state,
 			    u32 ssid, u32 tsid, u16 tclass,
 			    u32 requested, u32 audited, u32 denied, int result,
@@ -826,7 +830,7 @@ out:
  * @ssid,@tsid,@tclass : identifier of an AVC entry
  * @seqno : sequence number when decision was made
  * @xpd: extended_perms_decision to be added to the node
- * @flags: the AVC_* flags, e.g. AVC_NONBLOCKING, AVC_EXTENDED_PERMS, or 0.
+ * @flags: the AVC_* flags, e.g. AVC_EXTENDED_PERMS, or 0.
  *
  * if a valid AVC entry doesn't exist,this function returns -ENOENT.
  * if kmalloc() called internal returns NULL, this function returns -ENOMEM.
@@ -845,21 +849,6 @@ static int avc_update_node(struct selinux_avc *avc,
 	struct hlist_head *head;
 	spinlock_t *lock;
 
-	/*
-	 * If we are in a non-blocking code path, e.g. VFS RCU walk,
-	 * then we must not add permissions to a cache entry
-	 * because we will not audit the denial.  Otherwise,
-	 * during the subsequent blocking retry (e.g. VFS ref walk), we
-	 * will find the permissions already granted in the cache entry
-	 * and won't audit anything at all, leading to silent denials in
-	 * permissive mode that only appear when in enforcing mode.
-	 *
-	 * See the corresponding handling of MAY_NOT_BLOCK in avc_audit()
-	 * and selinux_inode_permission().
-	 */
-	if (flags & AVC_NONBLOCKING)
-		return 0;
-
 	node = avc_alloc_node(avc);
 	if (!node) {
 		rc = -ENOMEM;
@@ -1120,7 +1109,7 @@ decision:
  * @tsid: target security identifier
  * @tclass: target security class
  * @requested: requested permissions, interpreted based on @tclass
- * @flags:  AVC_STRICT, AVC_NONBLOCKING, or 0
+ * @flags:  AVC_STRICT or 0
  * @avd: access vector decisions
  *
  * Check the AVC to determine whether the @requested permissions are granted
@@ -1205,8 +1194,7 @@ int avc_has_perm_flags(struct selinux_state *state,
 	struct av_decision avd;
 	int rc, rc2;
 
-	rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested,
-				  (flags & MAY_NOT_BLOCK) ? AVC_NONBLOCKING : 0,
+	rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, 0,
 				  &avd);
 
 	rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index fc6a3ab7e179..4e64f681e5d1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3164,17 +3164,13 @@ static noinline int audit_inode_permission(struct inode *inode,
 {
 	struct common_audit_data ad;
 	struct inode_security_struct *isec = selinux_inode(inode);
-	int rc;
 
 	ad.type = LSM_AUDIT_DATA_INODE;
 	ad.u.inode = inode;
 
-	rc = slow_avc_audit(&selinux_state,
+	return slow_avc_audit(&selinux_state,
 			    current_sid(), isec->sid, isec->sclass, perms,
 			    audited, denied, result, &ad);
-	if (rc)
-		return rc;
-	return 0;
 }
 
 static int selinux_inode_permission(struct inode *inode, int mask)
@@ -3209,8 +3205,7 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 		return PTR_ERR(isec);
 
 	rc = avc_has_perm_noaudit(&selinux_state,
-				  sid, isec->sid, isec->sclass, perms,
-				  no_block ? AVC_NONBLOCKING : 0,
+				  sid, isec->sid, isec->sclass, perms, 0,
 				  &avd);
 	audited = avc_audit_required(perms, &avd, rc,
 				     from_access ? FILE__AUDIT_ACCESS : 0,
@@ -3218,10 +3213,6 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 	if (likely(!audited))
 		return rc;
 
-	/* fall back to ref-walk if we have to generate audit */
-	if (no_block)
-		return -ECHILD;
-
 	rc2 = audit_inode_permission(inode, perms, audited, denied, rc);
 	if (rc2)
 		return rc2;
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index cf4cc3ef959b..c3bbfc8e8b46 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -134,9 +134,6 @@ static inline int avc_audit(struct selinux_state *state,
 	audited = avc_audit_required(requested, avd, result, 0, &denied);
 	if (likely(!audited))
 		return 0;
-	/* fall back to ref-walk if we have to generate audit */
-	if (flags & MAY_NOT_BLOCK)
-		return -ECHILD;
 	return slow_avc_audit(state, ssid, tsid, tclass,
 			      requested, audited, denied, result,
 			      a);
@@ -144,7 +141,6 @@ static inline int avc_audit(struct selinux_state *state,
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
 #define AVC_EXTENDED_PERMS 2	/* update extended permissions */
-#define AVC_NONBLOCKING    4	/* non blocking */
 int avc_has_perm_noaudit(struct selinux_state *state,
 			 u32 ssid, u32 tsid,
 			 u16 tclass, u32 requested,

From d99cf13f14200cdb5cbb704345774c9c0698612d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 16 Jan 2021 15:57:49 -0500
Subject: [PATCH 1432/3292] selinux: kill 'flags' argument in
 avc_has_perm_flags() and avc_audit()

... along with avc_has_perm_flags() itself, since now it's identical
to avc_has_perm() (as pointed out by Paul Moore)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
[PM: add "selinux:" prefix to subj and tweak for length]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/avc.c         | 20 +-------------------
 security/selinux/hooks.c       |  7 +++----
 security/selinux/include/avc.h |  9 +--------
 3 files changed, 5 insertions(+), 31 deletions(-)

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 9322687cb030..97f4c944a20f 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -1180,25 +1180,7 @@ int avc_has_perm(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass,
 				  &avd);
 
 	rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc,
-			auditdata, 0);
-	if (rc2)
-		return rc2;
-	return rc;
-}
-
-int avc_has_perm_flags(struct selinux_state *state,
-		       u32 ssid, u32 tsid, u16 tclass, u32 requested,
-		       struct common_audit_data *auditdata,
-		       int flags)
-{
-	struct av_decision avd;
-	int rc, rc2;
-
-	rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, 0,
-				  &avd);
-
-	rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc,
-			auditdata, flags);
+			auditdata);
 	if (rc2)
 		return rc2;
 	return rc;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4e64f681e5d1..b0032c42333e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1676,7 +1676,7 @@ static int cred_has_capability(const struct cred *cred,
 				  sid, sid, sclass, av, 0, &avd);
 	if (!(opts & CAP_OPT_NOAUDIT)) {
 		int rc2 = avc_audit(&selinux_state,
-				    sid, sid, sclass, av, &avd, rc, &ad, 0);
+				    sid, sid, sclass, av, &avd, rc, &ad);
 		if (rc2)
 			return rc2;
 	}
@@ -3153,9 +3153,8 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode,
 	if (IS_ERR(isec))
 		return PTR_ERR(isec);
 
-	return avc_has_perm_flags(&selinux_state,
-				  sid, isec->sid, isec->sclass, FILE__READ, &ad,
-				  rcu ? MAY_NOT_BLOCK : 0);
+	return avc_has_perm(&selinux_state,
+				  sid, isec->sid, isec->sclass, FILE__READ, &ad);
 }
 
 static noinline int audit_inode_permission(struct inode *inode,
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index c3bbfc8e8b46..00f78be48283 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -111,7 +111,6 @@ int slow_avc_audit(struct selinux_state *state,
  * @avd: access vector decisions
  * @result: result from avc_has_perm_noaudit
  * @a:  auxiliary audit data
- * @flags: VFS walk flags
  *
  * Audit the granting or denial of permissions in accordance
  * with the policy.  This function is typically called by
@@ -127,8 +126,7 @@ static inline int avc_audit(struct selinux_state *state,
 			    u16 tclass, u32 requested,
 			    struct av_decision *avd,
 			    int result,
-			    struct common_audit_data *a,
-			    int flags)
+			    struct common_audit_data *a)
 {
 	u32 audited, denied;
 	audited = avc_audit_required(requested, avd, result, 0, &denied);
@@ -151,11 +149,6 @@ int avc_has_perm(struct selinux_state *state,
 		 u32 ssid, u32 tsid,
 		 u16 tclass, u32 requested,
 		 struct common_audit_data *auditdata);
-int avc_has_perm_flags(struct selinux_state *state,
-		       u32 ssid, u32 tsid,
-		       u16 tclass, u32 requested,
-		       struct common_audit_data *auditdata,
-		       int flags);
 
 int avc_has_extended_perms(struct selinux_state *state,
 			   u32 ssid, u32 tsid, u16 tclass, u32 requested,

From 0ecc617858fce305bedcf735eb18a46cb7e9921f Mon Sep 17 00:00:00 2001
From: Austin Kim <austin.kim@lge.com>
Date: Fri, 11 Jun 2021 03:28:35 +0100
Subject: [PATCH 1433/3292] audit: remove unnecessary 'ret' initialization

The variable 'ret' is set to 0 when declared.
The 'ret' is unused until it is set to 0 again.

So it had better remove unnecessary initialization.

Signed-off-by: Austin Kim <austin.kim@lge.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/lsm_audit.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 82ce14933513..5a5016ef43b0 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -119,7 +119,6 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
 		return -EINVAL;
 	ad->u.net->v6info.saddr = ip6->saddr;
 	ad->u.net->v6info.daddr = ip6->daddr;
-	ret = 0;
 	/* IPv6 can have several extension header before the Transport header
 	 * skip them */
 	offset = skb_network_offset(skb);

From f5fe8d5107ad68279528f39ceae64ab0d68deb3c Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 11 Jun 2021 12:55:40 -0400
Subject: [PATCH 1434/3292] fs: dlm: fix race in mhandle deletion

This patch fixes a race between mhandle deletion in case of receiving an
acknowledge and flush of all pending mhandle in cases of an timeout or
resetting node states.

Fixes: 489d8e559c65 ("fs: dlm: add reliable connection if reconnect")
Reported-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 7d217234b697..92f95ee7003a 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -287,6 +287,14 @@ static void dlm_mhandle_release(struct rcu_head *rcu)
 	kfree(mh);
 }
 
+static void dlm_mhandle_delete(struct midcomms_node *node,
+			       struct dlm_mhandle *mh)
+{
+	list_del_rcu(&mh->list);
+	atomic_dec(&node->send_queue_cnt);
+	call_rcu(&mh->rcu, dlm_mhandle_release);
+}
+
 static void dlm_send_queue_flush(struct midcomms_node *node)
 {
 	struct dlm_mhandle *mh;
@@ -294,15 +302,11 @@ static void dlm_send_queue_flush(struct midcomms_node *node)
 	pr_debug("flush midcomms send queue of node %d\n", node->nodeid);
 
 	rcu_read_lock();
+	spin_lock(&node->send_queue_lock);
 	list_for_each_entry_rcu(mh, &node->send_queue, list) {
-		spin_lock(&node->send_queue_lock);
-		list_del_rcu(&mh->list);
-		spin_unlock(&node->send_queue_lock);
-
-		atomic_dec(&node->send_queue_cnt);
-
-		call_rcu(&mh->rcu, dlm_mhandle_release);
+		dlm_mhandle_delete(node, mh);
 	}
+	spin_unlock(&node->send_queue_lock);
 	rcu_read_unlock();
 }
 
@@ -424,21 +428,24 @@ static void dlm_receive_ack(struct midcomms_node *node, uint32_t seq)
 	rcu_read_lock();
 	list_for_each_entry_rcu(mh, &node->send_queue, list) {
 		if (before(mh->seq, seq)) {
-			spin_lock(&node->send_queue_lock);
-			list_del_rcu(&mh->list);
-			spin_unlock(&node->send_queue_lock);
-
-			atomic_dec(&node->send_queue_cnt);
-
 			if (mh->ack_rcv)
 				mh->ack_rcv(node);
-
-			call_rcu(&mh->rcu, dlm_mhandle_release);
 		} else {
 			/* send queue should be ordered */
 			break;
 		}
 	}
+
+	spin_lock(&node->send_queue_lock);
+	list_for_each_entry_rcu(mh, &node->send_queue, list) {
+		if (before(mh->seq, seq)) {
+			dlm_mhandle_delete(node, mh);
+		} else {
+			/* send queue should be ordered */
+			break;
+		}
+	}
+	spin_unlock(&node->send_queue_lock);
 	rcu_read_unlock();
 }
 

From 957adb68b3f7df8421a05f1647d3027f2acad310 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 11 Jun 2021 12:55:41 -0400
Subject: [PATCH 1435/3292] fs: dlm: invalid buffer access in lookup error

This patch will evaluate the message length if a dlm opts header can fit
in before accessing it if a node lookup fails. The invalid sequence
error means that the version detection failed and an unexpected message
arrived. For debugging such situation the type of arrived message is
important to know.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 92f95ee7003a..e3de268898ed 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -621,8 +621,23 @@ dlm_midcomms_recv_node_lookup(int nodeid, const union dlm_packet *p,
 
 	node = nodeid2node(nodeid, allocation);
 	if (!node) {
-		log_print_ratelimited("received dlm message cmd %d nextcmd %d from node %d in an invalid sequence",
-				      p->header.h_cmd, p->opts.o_nextcmd, nodeid);
+		switch (p->header.h_cmd) {
+		case DLM_OPTS:
+			if (msglen < sizeof(struct dlm_opts)) {
+				log_print("opts msg too small: %u, will skip this message from node %d",
+					  msglen, nodeid);
+				return NULL;
+			}
+
+			log_print_ratelimited("received dlm opts message nextcmd %d from node %d in an invalid sequence",
+					      p->opts.o_nextcmd, nodeid);
+			break;
+		default:
+			log_print_ratelimited("received dlm message cmd %d from node %d in an invalid sequence",
+					      p->header.h_cmd, nodeid);
+			break;
+		}
+
 		return NULL;
 	}
 

From cdb14e0f7775e767484843e8ecd736bb21754c58 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:16 +0300
Subject: [PATCH 1436/3292] blk-mq: factor out a blk_mq_alloc_sq_tag_set helper

Factour out a helper to initialize a simple single hw queue tag_set from
blk_mq_init_sq_queue.  This will allow to phase out blk_mq_init_sq_queue
in favor of a more symmetric and general API.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 32 ++++++++++++++++++--------------
 include/linux/blk-mq.h |  3 +++
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4261adee9964..867e5faf4f5b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3152,24 +3152,12 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
 	struct request_queue *q;
 	int ret;
 
-	memset(set, 0, sizeof(*set));
-	set->ops = ops;
-	set->nr_hw_queues = 1;
-	set->nr_maps = 1;
-	set->queue_depth = queue_depth;
-	set->numa_node = NUMA_NO_NODE;
-	set->flags = set_flags;
-
-	ret = blk_mq_alloc_tag_set(set);
+	ret = blk_mq_alloc_sq_tag_set(set, ops, queue_depth, set_flags);
 	if (ret)
 		return ERR_PTR(ret);
-
 	q = blk_mq_init_queue(set);
-	if (IS_ERR(q)) {
+	if (IS_ERR(q))
 		blk_mq_free_tag_set(set);
-		return q;
-	}
-
 	return q;
 }
 EXPORT_SYMBOL(blk_mq_init_sq_queue);
@@ -3589,6 +3577,22 @@ out_free_mq_map:
 }
 EXPORT_SYMBOL(blk_mq_alloc_tag_set);
 
+/* allocate and initialize a tagset for a simple single-queue device */
+int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
+		const struct blk_mq_ops *ops, unsigned int queue_depth,
+		unsigned int set_flags)
+{
+	memset(set, 0, sizeof(*set));
+	set->ops = ops;
+	set->nr_hw_queues = 1;
+	set->nr_maps = 1;
+	set->queue_depth = queue_depth;
+	set->numa_node = NUMA_NO_NODE;
+	set->flags = set_flags;
+	return blk_mq_alloc_tag_set(set);
+}
+EXPORT_SYMBOL_GPL(blk_mq_alloc_sq_tag_set);
+
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
 	int i, j;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 359486940fa0..bb950fc669ef 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -439,6 +439,9 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
 
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
+int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
+		const struct blk_mq_ops *ops, unsigned int queue_depth,
+		unsigned int set_flags);
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);

From 26a9750aa875126e4b7fc5ee6de652a529c5b7ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:17 +0300
Subject: [PATCH 1437/3292] blk-mq: improve the blk_mq_init_allocated_queue
 interface

Don't return the passed in request_queue but a normal error code, and
drop the elevator_init argument in favor of just calling elevator_init_mq
directly from dm-rq.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c           | 36 ++++++++++++++----------------------
 block/blk.h              |  1 -
 block/elevator.c         |  2 +-
 drivers/md/dm-rq.c       |  9 +++------
 include/linux/blk-mq.h   |  5 ++---
 include/linux/elevator.h |  1 +
 6 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 867e5faf4f5b..8550ad64982f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3115,21 +3115,18 @@ void blk_mq_release(struct request_queue *q)
 struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
 		void *queuedata)
 {
-	struct request_queue *uninit_q, *q;
+	struct request_queue *q;
+	int ret;
 
-	uninit_q = blk_alloc_queue(set->numa_node);
-	if (!uninit_q)
+	q = blk_alloc_queue(set->numa_node);
+	if (!q)
 		return ERR_PTR(-ENOMEM);
-	uninit_q->queuedata = queuedata;
-
-	/*
-	 * Initialize the queue without an elevator. device_add_disk() will do
-	 * the initialization.
-	 */
-	q = blk_mq_init_allocated_queue(set, uninit_q, false);
-	if (IS_ERR(q))
-		blk_cleanup_queue(uninit_q);
-
+	q->queuedata = queuedata;
+	ret = blk_mq_init_allocated_queue(set, q);
+	if (ret) {
+		blk_cleanup_queue(q);
+		return ERR_PTR(ret);
+	}
 	return q;
 }
 EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
@@ -3273,9 +3270,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 	mutex_unlock(&q->sysfs_lock);
 }
 
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-						  struct request_queue *q,
-						  bool elevator_init)
+int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+		struct request_queue *q)
 {
 	/* mark the queue as mq asap */
 	q->mq_ops = set->ops;
@@ -3325,11 +3321,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 	blk_mq_add_queue_tag_set(set, q);
 	blk_mq_map_swqueue(q);
-
-	if (elevator_init)
-		elevator_init_mq(q);
-
-	return q;
+	return 0;
 
 err_hctxs:
 	kfree(q->queue_hw_ctx);
@@ -3340,7 +3332,7 @@ err_poll:
 	q->poll_cb = NULL;
 err_exit:
 	q->mq_ops = NULL;
-	return ERR_PTR(-ENOMEM);
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 
diff --git a/block/blk.h b/block/blk.h
index 3440142f029b..d3fa47af3607 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -192,7 +192,6 @@ void blk_account_io_done(struct request *req, u64 now);
 
 void blk_insert_flush(struct request *rq);
 
-void elevator_init_mq(struct request_queue *q);
 int elevator_switch_mq(struct request_queue *q,
 			      struct elevator_type *new_e);
 void __elevator_exit(struct request_queue *, struct elevator_queue *);
diff --git a/block/elevator.c b/block/elevator.c
index 440699c28119..06e203426410 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -693,7 +693,7 @@ void elevator_init_mq(struct request_queue *q)
 		elevator_put(e);
 	}
 }
-
+EXPORT_SYMBOL_GPL(elevator_init_mq); /* only for dm-rq */
 
 /*
  * switch to new_e io scheduler. be careful not to introduce deadlocks -
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 9c3bc3711b33..0dbd48cbdff9 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -530,7 +530,6 @@ static const struct blk_mq_ops dm_mq_ops = {
 
 int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 {
-	struct request_queue *q;
 	struct dm_target *immutable_tgt;
 	int err;
 
@@ -557,12 +556,10 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 	if (err)
 		goto out_kfree_tag_set;
 
-	q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
-	if (IS_ERR(q)) {
-		err = PTR_ERR(q);
+	err = blk_mq_init_allocated_queue(md->tag_set, md->queue);
+	if (err)
 		goto out_tag_set;
-	}
-
+	elevator_init_mq(md->queue);
 	return 0;
 
 out_tag_set:
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index bb950fc669ef..73750b2838d2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -429,9 +429,8 @@ enum {
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
 		void *queuedata);
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-						  struct request_queue *q,
-						  bool elevator_init);
+int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+		struct request_queue *q);
 struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
 						const struct blk_mq_ops *ops,
 						unsigned int queue_depth,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index dcb2f9022c1d..783ecb3cb77a 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -120,6 +120,7 @@ extern void elv_merged_request(struct request_queue *, struct request *,
 extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
+void elevator_init_mq(struct request_queue *q);
 
 /*
  * io scheduler registration

From b461dfc49eb6fbabc60b9dad476e787ada56b7b4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:18 +0300
Subject: [PATCH 1438/3292] blk-mq: add the blk_mq_alloc_disk APIs

Add a new API to allocate a gendisk including the request_queue for use
with blk-mq based drivers.  This is to avoid boilerplate code in drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 19 +++++++++++++++++++
 include/linux/blk-mq.h | 12 ++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8550ad64982f..b123077a0dc4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3137,6 +3137,25 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
+{
+	struct request_queue *q;
+	struct gendisk *disk;
+
+	q = blk_mq_init_queue_data(set, queuedata);
+	if (IS_ERR(q))
+		return ERR_CAST(q);
+
+	disk = __alloc_disk_node(0, set->numa_node);
+	if (!disk) {
+		blk_cleanup_queue(q);
+		return ERR_PTR(-ENOMEM);
+	}
+	disk->queue = q;
+	return disk;
+}
+EXPORT_SYMBOL(__blk_mq_alloc_disk);
+
 /*
  * Helper for setting up a queue with mq ops, given queue depth, and
  * the passed in mq ops flags.
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 73750b2838d2..f496c6c5b5d2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -426,6 +426,18 @@ enum {
 	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
 		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
 
+#define blk_mq_alloc_disk(set, queuedata)				\
+({									\
+	static struct lock_class_key __key;				\
+	struct gendisk *__disk = __blk_mq_alloc_disk(set, queuedata);	\
+									\
+	if (__disk)							\
+		lockdep_init_map(&__disk->lockdep_map,			\
+			"(bio completion)", &__key, 0);			\
+	__disk;								\
+})
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
+		void *queuedata);
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
 		void *queuedata);

From 89a5f065653bcf7d8c3e4101e025e6c7b03339e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:19 +0300
Subject: [PATCH 1439/3292] virtio-blk: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/virtio_blk.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index b9fa3ef5b57c..e4bd3b1fc3c2 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -749,13 +749,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	if (err)
 		goto out_free_vblk;
 
-	/* FIXME: How many partitions?  How long is a piece of string? */
-	vblk->disk = alloc_disk(1 << PART_BITS);
-	if (!vblk->disk) {
-		err = -ENOMEM;
-		goto out_free_vq;
-	}
-
 	/* Default queue sizing is to fill the ring. */
 	if (likely(!virtblk_queue_depth)) {
 		queue_depth = vblk->vqs[0].vq->num_free;
@@ -779,21 +772,20 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 	err = blk_mq_alloc_tag_set(&vblk->tag_set);
 	if (err)
-		goto out_put_disk;
+		goto out_free_vq;
 
-	q = blk_mq_init_queue(&vblk->tag_set);
-	if (IS_ERR(q)) {
-		err = -ENOMEM;
+	vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk);
+	if (IS_ERR(vblk->disk)) {
+		err = PTR_ERR(vblk->disk);
 		goto out_free_tags;
 	}
-	vblk->disk->queue = q;
-
-	q->queuedata = vblk;
+	q = vblk->disk->queue;
 
 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
 
 	vblk->disk->major = major;
 	vblk->disk->first_minor = index_to_minor(index);
+	vblk->disk->minors = 1 << PART_BITS;
 	vblk->disk->private_data = vblk;
 	vblk->disk->fops = &virtblk_fops;
 	vblk->disk->flags |= GENHD_FL_EXT_DEVT;
@@ -892,8 +884,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 out_free_tags:
 	blk_mq_free_tag_set(&vblk->tag_set);
-out_put_disk:
-	put_disk(vblk->disk);
 out_free_vq:
 	vdev->config->del_vqs(vdev);
 	kfree(vblk->vqs);
@@ -913,8 +903,7 @@ static void virtblk_remove(struct virtio_device *vdev)
 	flush_work(&vblk->config_work);
 
 	del_gendisk(vblk->disk);
-	blk_cleanup_queue(vblk->disk->queue);
-
+	blk_cleanup_disk(vblk->disk);
 	blk_mq_free_tag_set(&vblk->tag_set);
 
 	mutex_lock(&vblk->vdev_mutex);
@@ -925,7 +914,6 @@ static void virtblk_remove(struct virtio_device *vdev)
 	/* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
 	vblk->vdev = NULL;
 
-	put_disk(vblk->disk);
 	vdev->config->del_vqs(vdev);
 	kfree(vblk->vqs);
 

From 9c4f8971cc26cd485a39d02706533717eb3430dc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:20 +0300
Subject: [PATCH 1440/3292] pcd: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/paride/pcd.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 70da8b86ce58..f9cdd11f02f5 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -309,21 +309,19 @@ static void pcd_init_units(void)
 
 	pcd_drive_count = 0;
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
-		struct gendisk *disk = alloc_disk(1);
+		struct gendisk *disk;
 
-		if (!disk)
+		if (blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1,
+				BLK_MQ_F_SHOULD_MERGE))
 			continue;
 
-		disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
-						   1, BLK_MQ_F_SHOULD_MERGE);
-		if (IS_ERR(disk->queue)) {
-			disk->queue = NULL;
-			put_disk(disk);
+		disk = blk_mq_alloc_disk(&cd->tag_set, cd);
+		if (IS_ERR(disk)) {
+			blk_mq_free_tag_set(&cd->tag_set);
 			continue;
 		}
 
 		INIT_LIST_HEAD(&cd->rq_list);
-		disk->queue->queuedata = cd;
 		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		cd->disk = disk;
 		cd->pi = &cd->pia;
@@ -343,6 +341,7 @@ static void pcd_init_units(void)
 		cd->info.mask = 0;
 		disk->major = major;
 		disk->first_minor = unit;
+		disk->minors = 1;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
 		disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
@@ -759,10 +758,8 @@ static int pcd_detect(void)
 	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
 		if (!cd->disk)
 			continue;
-		blk_cleanup_queue(cd->disk->queue);
-		cd->disk->queue = NULL;
+		blk_cleanup_disk(cd->disk);
 		blk_mq_free_tag_set(&cd->tag_set);
-		put_disk(cd->disk);
 	}
 	pi_unregister_driver(par_drv);
 	return -1;

From c684b577968abeef96bf3c75d76d2dc19a1eb080 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:21 +0300
Subject: [PATCH 1441/3292] pf: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/paride/pf.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index bb09f21ce21a..d5b9c88ba76f 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -294,20 +294,17 @@ static void __init pf_init_units(void)
 	for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) {
 		struct gendisk *disk;
 
-		disk = alloc_disk(1);
-		if (!disk)
+		if (blk_mq_alloc_sq_tag_set(&pf->tag_set, &pf_mq_ops, 1,
+				BLK_MQ_F_SHOULD_MERGE))
 			continue;
 
-		disk->queue = blk_mq_init_sq_queue(&pf->tag_set, &pf_mq_ops,
-							1, BLK_MQ_F_SHOULD_MERGE);
-		if (IS_ERR(disk->queue)) {
-			disk->queue = NULL;
-			put_disk(disk);
+		disk = blk_mq_alloc_disk(&pf->tag_set, pf);
+		if (IS_ERR(disk)) {
+			blk_mq_free_tag_set(&pf->tag_set);
 			continue;
 		}
 
 		INIT_LIST_HEAD(&pf->rq_list);
-		disk->queue->queuedata = pf;
 		blk_queue_max_segments(disk->queue, cluster);
 		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
 		pf->disk = disk;
@@ -318,6 +315,7 @@ static void __init pf_init_units(void)
 		snprintf(pf->name, PF_NAMELEN, "%s%d", name, unit);
 		disk->major = major;
 		disk->first_minor = unit;
+		disk->minors = 1;
 		strcpy(disk->disk_name, pf->name);
 		disk->fops = &pf_fops;
 		disk->events = DISK_EVENT_MEDIA_CHANGE;
@@ -766,10 +764,8 @@ static int pf_detect(void)
 	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
 		if (!pf->disk)
 			continue;
-		blk_cleanup_queue(pf->disk->queue);
-		pf->disk->queue = NULL;
+		blk_cleanup_disk(pf->disk);
 		blk_mq_free_tag_set(&pf->tag_set);
-		put_disk(pf->disk);
 	}
 	pi_unregister_driver(par_drv);
 	return -1;

From f368b7d7fa77768026d439ec9c32fe16a2d5eb52 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:22 +0300
Subject: [PATCH 1442/3292] ms_block: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/memstick/core/ms_block.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 0bacf4268f83..dac258d12aca 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2110,21 +2110,17 @@ static int msb_init_disk(struct memstick_dev *card)
 	if (msb->disk_id  < 0)
 		return msb->disk_id;
 
-	msb->disk = alloc_disk(0);
-	if (!msb->disk) {
-		rc = -ENOMEM;
+	rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &msb_mq_ops, 2,
+				     BLK_MQ_F_SHOULD_MERGE);
+	if (rc)
 		goto out_release_id;
-	}
 
-	msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &msb_mq_ops, 2,
-						BLK_MQ_F_SHOULD_MERGE);
-	if (IS_ERR(msb->queue)) {
-		rc = PTR_ERR(msb->queue);
-		msb->queue = NULL;
-		goto out_put_disk;
+	msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
+	if (IS_ERR(msb->disk)) {
+		rc = PTR_ERR(msb->disk);
+		goto out_free_tag_set;
 	}
-
-	msb->queue->queuedata = card;
+	msb->queue = msb->disk->queue;
 
 	blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
 	blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
@@ -2135,7 +2131,6 @@ static int msb_init_disk(struct memstick_dev *card)
 	sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id);
 	msb->disk->fops = &msb_bdops;
 	msb->disk->private_data = msb;
-	msb->disk->queue = msb->queue;
 
 	capacity = msb->pages_in_block * msb->logical_block_count;
 	capacity *= (msb->page_size / 512);
@@ -2155,8 +2150,8 @@ static int msb_init_disk(struct memstick_dev *card)
 	dbg("Disk added");
 	return 0;
 
-out_put_disk:
-	put_disk(msb->disk);
+out_free_tag_set:
+	blk_mq_free_tag_set(&msb->tag_set);
 out_release_id:
 	mutex_lock(&msb_disk_lock);
 	idr_remove(&msb_disk_idr, msb->disk_id);

From 51ed5bd55eb602fd8b3531bb919bcb59849fa569 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:23 +0300
Subject: [PATCH 1443/3292] mspro: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/memstick/core/mspro_block.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index cf7fe0d58ee7..22778d0e24f5 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1205,21 +1205,17 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	if (disk_id < 0)
 		return disk_id;
 
-	msb->disk = alloc_disk(1 << MSPRO_BLOCK_PART_SHIFT);
-	if (!msb->disk) {
-		rc = -ENOMEM;
+	rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &mspro_mq_ops, 2,
+				     BLK_MQ_F_SHOULD_MERGE);
+	if (rc)
 		goto out_release_id;
-	}
 
-	msb->queue = blk_mq_init_sq_queue(&msb->tag_set, &mspro_mq_ops, 2,
-						BLK_MQ_F_SHOULD_MERGE);
-	if (IS_ERR(msb->queue)) {
-		rc = PTR_ERR(msb->queue);
-		msb->queue = NULL;
-		goto out_put_disk;
+	msb->disk = blk_mq_alloc_disk(&msb->tag_set, card);
+	if (IS_ERR(msb->disk)) {
+		rc = PTR_ERR(msb->disk);
+		goto out_free_tag_set;
 	}
-
-	msb->queue->queuedata = card;
+	msb->queue = msb->disk->queue;
 
 	blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
 	blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
@@ -1228,10 +1224,10 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 
 	msb->disk->major = major;
 	msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT;
+	msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT;
 	msb->disk->fops = &ms_block_bdops;
 	msb->usage_count = 1;
 	msb->disk->private_data = msb;
-	msb->disk->queue = msb->queue;
 
 	sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
 
@@ -1247,8 +1243,8 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	msb->active = 1;
 	return 0;
 
-out_put_disk:
-	put_disk(msb->disk);
+out_free_tag_set:
+	blk_mq_free_tag_set(&msb->tag_set);
 out_release_id:
 	mutex_lock(&mspro_block_disk_lock);
 	idr_remove(&mspro_block_disk_idr, disk_id);

From 6966bb921def0a4ef1e069f806c086efae6782ea Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:24 +0300
Subject: [PATCH 1444/3292] mtd_blkdevs: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/mtd/mtd_blkdevs.c | 48 ++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index fb8e12d590a1..5dc4c966ea73 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -30,11 +30,9 @@ static void blktrans_dev_release(struct kref *kref)
 	struct mtd_blktrans_dev *dev =
 		container_of(kref, struct mtd_blktrans_dev, ref);
 
-	dev->disk->private_data = NULL;
-	blk_cleanup_queue(dev->rq);
+	blk_cleanup_disk(dev->disk);
 	blk_mq_free_tag_set(dev->tag_set);
 	kfree(dev->tag_set);
-	put_disk(dev->disk);
 	list_del(&dev->list);
 	kfree(dev);
 }
@@ -354,7 +352,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	if (new->devnum > (MINORMASK >> tr->part_bits) ||
 	    (tr->part_bits && new->devnum >= 27 * 26)) {
 		mutex_unlock(&blktrans_ref_mutex);
-		goto error1;
+		return ret;
 	}
 
 	list_add_tail(&new->list, &tr->devs);
@@ -366,17 +364,28 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	if (!tr->writesect)
 		new->readonly = 1;
 
-	/* Create gendisk */
 	ret = -ENOMEM;
-	gd = alloc_disk(1 << tr->part_bits);
+	new->tag_set = kzalloc(sizeof(*new->tag_set), GFP_KERNEL);
+	if (!new->tag_set)
+		goto out_list_del;
 
-	if (!gd)
-		goto error2;
+	ret = blk_mq_alloc_sq_tag_set(new->tag_set, &mtd_mq_ops, 2,
+			BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
+	if (ret)
+		goto out_kfree_tag_set;
+
+	/* Create gendisk */
+	gd = blk_mq_alloc_disk(new->tag_set, new);
+	if (IS_ERR(gd)) {
+		ret = PTR_ERR(gd);
+		goto out_free_tag_set;
+	}
 
 	new->disk = gd;
 	gd->private_data = new;
 	gd->major = tr->major;
 	gd->first_minor = (new->devnum) << tr->part_bits;
+	gd->minors = 1 << tr->part_bits;
 	gd->fops = &mtd_block_ops;
 
 	if (tr->part_bits)
@@ -398,22 +407,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	spin_lock_init(&new->queue_lock);
 	INIT_LIST_HEAD(&new->rq_list);
 
-	new->tag_set = kzalloc(sizeof(*new->tag_set), GFP_KERNEL);
-	if (!new->tag_set)
-		goto error3;
-
-	new->rq = blk_mq_init_sq_queue(new->tag_set, &mtd_mq_ops, 2,
-				BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
-	if (IS_ERR(new->rq)) {
-		ret = PTR_ERR(new->rq);
-		new->rq = NULL;
-		goto error4;
-	}
-
 	if (tr->flush)
 		blk_queue_write_cache(new->rq, true, false);
 
-	new->rq->queuedata = new;
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
@@ -437,13 +433,13 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 		WARN_ON(ret);
 	}
 	return 0;
-error4:
+
+out_free_tag_set:
+	blk_mq_free_tag_set(new->tag_set);
+out_kfree_tag_set:
 	kfree(new->tag_set);
-error3:
-	put_disk(new->disk);
-error2:
+out_list_del:
 	list_del(&new->list);
-error1:
 	return ret;
 }
 

From 89662ac55a204d82f9b0c1497e060d18b51fadc2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:25 +0300
Subject: [PATCH 1445/3292] ps3disk: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Geoff Levand <geoff@infradead.org>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/ps3disk.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index ba3ece56cbb3..f374ea2c67ce 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -29,7 +29,6 @@
 
 struct ps3disk_private {
 	spinlock_t lock;		/* Request queue spinlock */
-	struct request_queue *queue;
 	struct blk_mq_tag_set tag_set;
 	struct gendisk *gendisk;
 	unsigned int blocking_factor;
@@ -267,7 +266,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
 	blk_mq_end_request(req, error);
 	spin_unlock(&priv->lock);
 
-	blk_mq_run_hw_queues(priv->queue, true);
+	blk_mq_run_hw_queues(priv->gendisk->queue, true);
 	return IRQ_HANDLED;
 }
 
@@ -441,17 +440,20 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 
 	ps3disk_identify(dev);
 
-	queue = blk_mq_init_sq_queue(&priv->tag_set, &ps3disk_mq_ops, 1,
+	error = blk_mq_alloc_sq_tag_set(&priv->tag_set, &ps3disk_mq_ops, 1,
 					BLK_MQ_F_SHOULD_MERGE);
-	if (IS_ERR(queue)) {
-		dev_err(&dev->sbd.core, "%s:%u: blk_mq_init_queue failed\n",
-			__func__, __LINE__);
-		error = PTR_ERR(queue);
+	if (error)
 		goto fail_teardown;
+
+	gendisk = blk_mq_alloc_disk(&priv->tag_set, dev);
+	if (IS_ERR(gendisk)) {
+		dev_err(&dev->sbd.core, "%s:%u: blk_mq_alloc_disk failed\n",
+			__func__, __LINE__);
+		error = PTR_ERR(gendisk);
+		goto fail_free_tag_set;
 	}
 
-	priv->queue = queue;
-	queue->queuedata = dev;
+	queue = gendisk->queue;
 
 	blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
@@ -462,19 +464,11 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 	blk_queue_max_segments(queue, -1);
 	blk_queue_max_segment_size(queue, dev->bounce_size);
 
-	gendisk = alloc_disk(PS3DISK_MINORS);
-	if (!gendisk) {
-		dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__,
-			__LINE__);
-		error = -ENOMEM;
-		goto fail_cleanup_queue;
-	}
-
 	priv->gendisk = gendisk;
 	gendisk->major = ps3disk_major;
 	gendisk->first_minor = devidx * PS3DISK_MINORS;
+	gendisk->minors = PS3DISK_MINORS;
 	gendisk->fops = &ps3disk_fops;
-	gendisk->queue = queue;
 	gendisk->private_data = dev;
 	snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME,
 		 devidx+'a');
@@ -490,8 +484,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 	device_add_disk(&dev->sbd.core, gendisk, NULL);
 	return 0;
 
-fail_cleanup_queue:
-	blk_cleanup_queue(queue);
+fail_free_tag_set:
 	blk_mq_free_tag_set(&priv->tag_set);
 fail_teardown:
 	ps3stor_teardown(dev);
@@ -517,9 +510,8 @@ static void ps3disk_remove(struct ps3_system_bus_device *_dev)
 		    &ps3disk_mask);
 	mutex_unlock(&ps3disk_mask_mutex);
 	del_gendisk(priv->gendisk);
-	blk_cleanup_queue(priv->queue);
+	blk_cleanup_disk(priv->gendisk);
 	blk_mq_free_tag_set(&priv->tag_set);
-	put_disk(priv->gendisk);
 	dev_notice(&dev->sbd.core, "Synchronizing disk cache\n");
 	ps3disk_sync_cache(dev);
 	ps3stor_teardown(dev);

From 9c8463e8e19c442aaf896468ce72c1ed82655781 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:26 +0300
Subject: [PATCH 1446/3292] swim3: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/swim3.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index a515d0c1d2cb..965af0a3e95b 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1202,30 +1202,27 @@ static int swim3_attach(struct macio_dev *mdev,
 			return rc;
 	}
 
-	disk = alloc_disk(1);
-	if (disk == NULL) {
-		rc = -ENOMEM;
-		goto out_unregister;
-	}
-
 	fs = &floppy_states[floppy_count];
 	memset(fs, 0, sizeof(*fs));
 
-	disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2,
-						BLK_MQ_F_SHOULD_MERGE);
-	if (IS_ERR(disk->queue)) {
-		rc = PTR_ERR(disk->queue);
-		disk->queue = NULL;
-		goto out_put_disk;
+	rc = blk_mq_alloc_sq_tag_set(&fs->tag_set, &swim3_mq_ops, 2,
+			BLK_MQ_F_SHOULD_MERGE);
+	if (rc)
+		goto out_unregister;
+
+	disk = blk_mq_alloc_disk(&fs->tag_set, fs);
+	if (IS_ERR(disk)) {
+		rc = PTR_ERR(disk);
+		goto out_free_tag_set;
 	}
-	disk->queue->queuedata = fs;
 
 	rc = swim3_add_device(mdev, floppy_count);
 	if (rc)
-		goto out_cleanup_queue;
+		goto out_cleanup_disk;
 
 	disk->major = FLOPPY_MAJOR;
 	disk->first_minor = floppy_count;
+	disk->minors = 1;
 	disk->fops = &floppy_fops;
 	disk->private_data = fs;
 	disk->events = DISK_EVENT_MEDIA_CHANGE;
@@ -1237,12 +1234,10 @@ static int swim3_attach(struct macio_dev *mdev,
 	disks[floppy_count++] = disk;
 	return 0;
 
-out_cleanup_queue:
-	blk_cleanup_queue(disk->queue);
-	disk->queue = NULL;
+out_cleanup_disk:
+	blk_cleanup_disk(disk);
+out_free_tag_set:
 	blk_mq_free_tag_set(&fs->tag_set);
-out_put_disk:
-	put_disk(disk);
 out_unregister:
 	if (floppy_count == 0)
 		unregister_blkdev(FLOPPY_MAJOR, "fd");

From 51fbfedfcc86273eded52f05a2c1aa75e91df8ff Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:27 +0300
Subject: [PATCH 1447/3292] swim: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/swim.c | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 2917b21f48ff..7ccc8d2a41bc 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -800,23 +800,20 @@ static int swim_floppy_init(struct swim_priv *swd)
 	spin_lock_init(&swd->lock);
 
 	for (drive = 0; drive < swd->floppy_count; drive++) {
-		struct request_queue *q;
+		err = blk_mq_alloc_sq_tag_set(&swd->unit[drive].tag_set,
+				&swim_mq_ops, 2, BLK_MQ_F_SHOULD_MERGE);
+		if (err)
+			goto exit_put_disks;
 
-		swd->unit[drive].disk = alloc_disk(1);
-		if (swd->unit[drive].disk == NULL) {
-			err = -ENOMEM;
+		swd->unit[drive].disk =
+			blk_mq_alloc_disk(&swd->unit[drive].tag_set,
+					  &swd->unit[drive]);
+		if (IS_ERR(swd->unit[drive].disk)) {
+			blk_mq_free_tag_set(&swd->unit[drive].tag_set);
+			err = PTR_ERR(swd->unit[drive].disk);
 			goto exit_put_disks;
 		}
 
-		q = blk_mq_init_sq_queue(&swd->unit[drive].tag_set, &swim_mq_ops,
-						2, BLK_MQ_F_SHOULD_MERGE);
-		if (IS_ERR(q)) {
-			err = PTR_ERR(q);
-			goto exit_put_disks;
-		}
-
-		swd->unit[drive].disk->queue = q;
-		swd->unit[drive].disk->queue->queuedata = &swd->unit[drive];
 		swd->unit[drive].swd = swd;
 	}
 
@@ -824,6 +821,7 @@ static int swim_floppy_init(struct swim_priv *swd)
 		swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE;
 		swd->unit[drive].disk->major = FLOPPY_MAJOR;
 		swd->unit[drive].disk->first_minor = drive;
+		swd->unit[drive].disk->minors = 1;
 		sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
 		swd->unit[drive].disk->fops = &floppy_fops;
 		swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE;
@@ -839,14 +837,10 @@ exit_put_disks:
 	do {
 		struct gendisk *disk = swd->unit[drive].disk;
 
-		if (disk) {
-			if (disk->queue) {
-				blk_cleanup_queue(disk->queue);
-				disk->queue = NULL;
-			}
-			blk_mq_free_tag_set(&swd->unit[drive].tag_set);
-			put_disk(disk);
-		}
+		if (!disk)
+			continue;
+		blk_cleanup_disk(disk);
+		blk_mq_free_tag_set(&swd->unit[drive].tag_set);
 	} while (drive--);
 	return err;
 }

From afea05a18d18673750bad33f7aa710ff71a78e91 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:28 +0300
Subject: [PATCH 1448/3292] sunvdc: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/sunvdc.c | 47 ++++++++++++------------------------------
 1 file changed, 13 insertions(+), 34 deletions(-)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 39aeebc6837d..c53b38578bb7 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -780,27 +780,6 @@ static const struct blk_mq_ops vdc_mq_ops = {
 	.queue_rq	= vdc_queue_rq,
 };
 
-static void cleanup_queue(struct request_queue *q)
-{
-	struct vdc_port *port = q->queuedata;
-
-	blk_cleanup_queue(q);
-	blk_mq_free_tag_set(&port->tag_set);
-}
-
-static struct request_queue *init_queue(struct vdc_port *port)
-{
-	struct request_queue *q;
-
-	q = blk_mq_init_sq_queue(&port->tag_set, &vdc_mq_ops, VDC_TX_RING_SIZE,
-					BLK_MQ_F_SHOULD_MERGE);
-	if (IS_ERR(q))
-		return q;
-
-	q->queuedata = port;
-	return q;
-}
-
 static int probe_disk(struct vdc_port *port)
 {
 	struct request_queue *q;
@@ -838,21 +817,21 @@ static int probe_disk(struct vdc_port *port)
 				    (u64)geom.num_sec);
 	}
 
-	q = init_queue(port);
-	if (IS_ERR(q)) {
-		printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
-		       port->vio.name);
-		return PTR_ERR(q);
-	}
-	g = alloc_disk(1 << PARTITION_SHIFT);
-	if (!g) {
+	err = blk_mq_alloc_sq_tag_set(&port->tag_set, &vdc_mq_ops,
+			VDC_TX_RING_SIZE, BLK_MQ_F_SHOULD_MERGE);
+	if (err)
+		return err;
+
+	g = blk_mq_alloc_disk(&port->tag_set, port);
+	if (IS_ERR(g)) {
 		printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
 		       port->vio.name);
-		cleanup_queue(q);
-		return -ENOMEM;
+		blk_mq_free_tag_set(&port->tag_set);
+		return PTR_ERR(g);
 	}
 
 	port->disk = g;
+	q = g->queue;
 
 	/* Each segment in a request is up to an aligned page in size. */
 	blk_queue_segment_boundary(q, PAGE_SIZE - 1);
@@ -862,6 +841,7 @@ static int probe_disk(struct vdc_port *port)
 	blk_queue_max_hw_sectors(q, port->max_xfer_size);
 	g->major = vdc_major;
 	g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
+	g->minors = 1 << PARTITION_SHIFT;
 	strcpy(g->disk_name, port->disk_name);
 
 	g->fops = &vdc_fops;
@@ -1083,9 +1063,8 @@ static int vdc_port_remove(struct vio_dev *vdev)
 		del_timer_sync(&port->vio.timer);
 
 		del_gendisk(port->disk);
-		cleanup_queue(port->disk->queue);
-		put_disk(port->disk);
-		port->disk = NULL;
+		blk_cleanup_disk(port->disk);
+		blk_mq_free_tag_set(&port->tag_set);
 
 		vdc_free_tx_ring(port);
 		vio_ldc_free(&port->vio);

From 0592c3d166c967056faa03b944c6c9c9b4e8a0cc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:29 +0300
Subject: [PATCH 1449/3292] gdrom: use blk_mq_alloc_disk

Use the blk_mq_alloc_disk API to simplify the gendisk and request_queue
allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/gdrom.c | 45 ++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index c6d8c0f59722..8e1fe75af93f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -772,53 +772,50 @@ static int probe_gdrom(struct platform_device *devptr)
 		goto probe_fail_no_mem;
 	}
 	probe_gdrom_setupcd();
-	gd.disk = alloc_disk(1);
-	if (!gd.disk) {
-		err = -ENODEV;
-		goto probe_fail_no_disk;
+
+	err = blk_mq_alloc_sq_tag_set(&gd.tag_set, &gdrom_mq_ops, 1,
+				BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
+	if (err)
+		goto probe_fail_free_cd_info;
+
+	gd.disk = blk_mq_alloc_disk(&gd.tag_set, NULL);
+	if (IS_ERR(gd.disk)) {
+		err = PTR_ERR(gd.disk);
+		goto probe_fail_free_tag_set;
 	}
+	gd.gdrom_rq = gd.disk->queue;
 	probe_gdrom_setupdisk();
 	if (register_cdrom(gd.disk, gd.cd_info)) {
 		err = -ENODEV;
-		goto probe_fail_cdrom_register;
+		goto probe_fail_cleanup_disk;
 	}
 	gd.disk->fops = &gdrom_bdops;
 	gd.disk->events = DISK_EVENT_MEDIA_CHANGE;
 	/* latch on to the interrupt */
 	err = gdrom_set_interrupt_handlers();
 	if (err)
-		goto probe_fail_cmdirq_register;
-
-	gd.gdrom_rq = blk_mq_init_sq_queue(&gd.tag_set, &gdrom_mq_ops, 1,
-				BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
-	if (IS_ERR(gd.gdrom_rq)) {
-		err = PTR_ERR(gd.gdrom_rq);
-		gd.gdrom_rq = NULL;
-		goto probe_fail_requestq;
-	}
+		goto probe_fail_cleanup_disk;
 
 	err = probe_gdrom_setupqueue();
 	if (err)
-		goto probe_fail_toc;
+		goto probe_fail_free_irqs;
 
 	gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL);
 	if (!gd.toc) {
 		err = -ENOMEM;
-		goto probe_fail_toc;
+		goto probe_fail_free_irqs;
 	}
 	add_disk(gd.disk);
 	return 0;
 
-probe_fail_toc:
-	blk_cleanup_queue(gd.gdrom_rq);
-	blk_mq_free_tag_set(&gd.tag_set);
-probe_fail_requestq:
+probe_fail_free_irqs:
 	free_irq(HW_EVENT_GDROM_DMA, &gd);
 	free_irq(HW_EVENT_GDROM_CMD, &gd);
-probe_fail_cmdirq_register:
-probe_fail_cdrom_register:
-	del_gendisk(gd.disk);
-probe_fail_no_disk:
+probe_fail_cleanup_disk:
+	blk_cleanup_disk(gd.disk);
+probe_fail_free_tag_set:
+	blk_mq_free_tag_set(&gd.tag_set);
+probe_fail_free_cd_info:
 	kfree(gd.cd_info);
 probe_fail_no_mem:
 	unregister_blkdev(gdrom_major, GDROM_DEV_NAME);

From 08c1d480ed38995690a7d83f2c6a505f6cbbed9f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:30 +0300
Subject: [PATCH 1450/3292] blk-mq: remove blk_mq_init_sq_queue

All users are gone now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-16-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 22 ----------------------
 include/linux/blk-mq.h |  4 ----
 2 files changed, 26 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b123077a0dc4..3115ea2d0990 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3156,28 +3156,6 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
 }
 EXPORT_SYMBOL(__blk_mq_alloc_disk);
 
-/*
- * Helper for setting up a queue with mq ops, given queue depth, and
- * the passed in mq ops flags.
- */
-struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
-					   const struct blk_mq_ops *ops,
-					   unsigned int queue_depth,
-					   unsigned int set_flags)
-{
-	struct request_queue *q;
-	int ret;
-
-	ret = blk_mq_alloc_sq_tag_set(set, ops, queue_depth, set_flags);
-	if (ret)
-		return ERR_PTR(ret);
-	q = blk_mq_init_queue(set);
-	if (IS_ERR(q))
-		blk_mq_free_tag_set(set);
-	return q;
-}
-EXPORT_SYMBOL(blk_mq_init_sq_queue);
-
 static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
 		struct blk_mq_tag_set *set, struct request_queue *q,
 		int hctx_idx, int node)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f496c6c5b5d2..02a4aab0aeac 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -443,10 +443,6 @@ struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
 		void *queuedata);
 int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		struct request_queue *q);
-struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
-						const struct blk_mq_ops *ops,
-						unsigned int queue_depth,
-						unsigned int set_flags);
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
 
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);

From 6560ec961a080944f8d5e1fef17b771bfaf189cb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:31 +0300
Subject: [PATCH 1451/3292] aoe: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/aoe/aoeblk.c | 33 ++++++++++++---------------------
 drivers/block/aoe/aoedev.c |  3 +--
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index c34e71b0c4a9..06b360f7123a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -338,14 +338,13 @@ static const struct blk_mq_ops aoeblk_mq_ops = {
 	.queue_rq	= aoeblk_queue_rq,
 };
 
-/* alloc_disk and add_disk can sleep */
+/* blk_mq_alloc_disk and add_disk can sleep */
 void
 aoeblk_gdalloc(void *vp)
 {
 	struct aoedev *d = vp;
 	struct gendisk *gd;
 	mempool_t *mp;
-	struct request_queue *q;
 	struct blk_mq_tag_set *set;
 	ulong flags;
 	int late = 0;
@@ -362,19 +361,12 @@ aoeblk_gdalloc(void *vp)
 	if (late)
 		return;
 
-	gd = alloc_disk(AOE_PARTITIONS);
-	if (gd == NULL) {
-		pr_err("aoe: cannot allocate disk structure for %ld.%d\n",
-			d->aoemajor, d->aoeminor);
-		goto err;
-	}
-
 	mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
 		buf_pool_cache);
 	if (mp == NULL) {
 		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
 			d->aoemajor, d->aoeminor);
-		goto err_disk;
+		goto err;
 	}
 
 	set = &d->tag_set;
@@ -391,12 +383,11 @@ aoeblk_gdalloc(void *vp)
 		goto err_mempool;
 	}
 
-	q = blk_mq_init_queue(set);
-	if (IS_ERR(q)) {
+	gd = blk_mq_alloc_disk(set, d);
+	if (IS_ERR(gd)) {
 		pr_err("aoe: cannot allocate block queue for %ld.%d\n",
 			d->aoemajor, d->aoeminor);
-		blk_mq_free_tag_set(set);
-		goto err_mempool;
+		goto err_tagset;
 	}
 
 	spin_lock_irqsave(&d->lock, flags);
@@ -405,16 +396,16 @@ aoeblk_gdalloc(void *vp)
 	WARN_ON(d->flags & DEVFL_TKILL);
 	WARN_ON(d->gd);
 	WARN_ON(d->flags & DEVFL_UP);
-	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
-	blk_queue_io_opt(q, SZ_2M);
+	blk_queue_max_hw_sectors(gd->queue, BLK_DEF_MAX_SECTORS);
+	blk_queue_io_opt(gd->queue, SZ_2M);
 	d->bufpool = mp;
-	d->blkq = gd->queue = q;
-	q->queuedata = d;
+	d->blkq = gd->queue;
 	d->gd = gd;
 	if (aoe_maxsectors)
-		blk_queue_max_hw_sectors(q, aoe_maxsectors);
+		blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors);
 	gd->major = AOE_MAJOR;
 	gd->first_minor = d->sysminor;
+	gd->minors = AOE_PARTITIONS;
 	gd->fops = &aoe_bdops;
 	gd->private_data = d;
 	set_capacity(gd, d->ssize);
@@ -435,10 +426,10 @@ aoeblk_gdalloc(void *vp)
 	spin_unlock_irqrestore(&d->lock, flags);
 	return;
 
+err_tagset:
+	blk_mq_free_tag_set(set);
 err_mempool:
 	mempool_destroy(mp);
-err_disk:
-	put_disk(gd);
 err:
 	spin_lock_irqsave(&d->lock, flags);
 	d->flags &= ~DEVFL_GD_NOW;
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index e2ea2356da06..c5753c6bfe80 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -277,9 +277,8 @@ freedev(struct aoedev *d)
 	if (d->gd) {
 		aoedisk_rm_debugfs(d);
 		del_gendisk(d->gd);
-		put_disk(d->gd);
+		blk_cleanup_disk(d->gd);
 		blk_mq_free_tag_set(&d->tag_set);
-		blk_cleanup_queue(d->blkq);
 	}
 	t = d->targets;
 	e = t + d->ntargets;

From 34f84aefe2ba0a77431cc0c0808c5c0239b6ba43 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:32 +0300
Subject: [PATCH 1452/3292] floppy: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-18-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/floppy.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8a9d22207c59..cbed9776f285 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4491,23 +4491,15 @@ static bool floppy_available(int drive)
 static int floppy_alloc_disk(unsigned int drive, unsigned int type)
 {
 	struct gendisk *disk;
-	int err;
 
-	disk = alloc_disk(1);
-	if (!disk)
-		return -ENOMEM;
-
-	disk->queue = blk_mq_init_queue(&tag_sets[drive]);
-	if (IS_ERR(disk->queue)) {
-		err = PTR_ERR(disk->queue);
-		disk->queue = NULL;
-		put_disk(disk);
-		return err;
-	}
+	disk = blk_mq_alloc_disk(&tag_sets[drive], NULL);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
 
 	blk_queue_max_hw_sectors(disk->queue, 64);
 	disk->major = FLOPPY_MAJOR;
 	disk->first_minor = TOMINOR(drive) | (type << 2);
+	disk->minors = 1;
 	disk->fops = &floppy_fops;
 	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	if (type)
@@ -4727,10 +4719,8 @@ out_put_disk:
 		if (!disks[drive][0])
 			break;
 		del_timer_sync(&motor_off_timer[drive]);
-		blk_cleanup_queue(disks[drive][0]->queue);
-		disks[drive][0]->queue = NULL;
+		blk_cleanup_disk(disks[drive][0]);
 		blk_mq_free_tag_set(&tag_sets[drive]);
-		put_disk(disks[drive][0]);
 	}
 	return err;
 }

From 1c99502fae359182a93a1c9cf7406edc0e28b6b0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:33 +0300
Subject: [PATCH 1453/3292] loop: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-19-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 95c570f5923f..3f40e673a101 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2117,12 +2117,12 @@ static int loop_add(struct loop_device **l, int i)
 	if (err)
 		goto out_free_idr;
 
-	lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
-	if (IS_ERR(lo->lo_queue)) {
-		err = PTR_ERR(lo->lo_queue);
+	disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, lo);
+	if (IS_ERR(disk)) {
+		err = PTR_ERR(disk);
 		goto out_cleanup_tags;
 	}
-	lo->lo_queue->queuedata = lo;
+	lo->lo_queue = lo->lo_disk->queue;
 
 	blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
 
@@ -2134,11 +2134,6 @@ static int loop_add(struct loop_device **l, int i)
 	 */
 	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 
-	err = -ENOMEM;
-	disk = lo->lo_disk = alloc_disk(1 << part_shift);
-	if (!disk)
-		goto out_free_queue;
-
 	/*
 	 * Disable partition scanning by default. The in-kernel partition
 	 * scanning can be requested individually per-device during its
@@ -2166,6 +2161,7 @@ static int loop_add(struct loop_device **l, int i)
 	spin_lock_init(&lo->lo_lock);
 	disk->major		= LOOP_MAJOR;
 	disk->first_minor	= i << part_shift;
+	disk->minors		= 1 << part_shift;
 	disk->fops		= &lo_fops;
 	disk->private_data	= lo;
 	disk->queue		= lo->lo_queue;
@@ -2174,8 +2170,6 @@ static int loop_add(struct loop_device **l, int i)
 	*l = lo;
 	return lo->lo_number;
 
-out_free_queue:
-	blk_cleanup_queue(lo->lo_queue);
 out_cleanup_tags:
 	blk_mq_free_tag_set(&lo->tag_set);
 out_free_idr:
@@ -2189,9 +2183,8 @@ out:
 static void loop_remove(struct loop_device *lo)
 {
 	del_gendisk(lo->lo_disk);
-	blk_cleanup_queue(lo->lo_queue);
 	blk_mq_free_tag_set(&lo->tag_set);
-	put_disk(lo->lo_disk);
+	blk_cleanup_disk(lo->lo_disk);
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }

From 4af5f2e0301311f88c420fcfc5f3c8611ade20ac Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:34 +0300
Subject: [PATCH 1454/3292] nbd: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-20-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 57 ++++++++++++++++++---------------------------
 1 file changed, 23 insertions(+), 34 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 45d2c28c8fc8..614d82e7fae4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -219,15 +219,11 @@ static const struct device_attribute pid_attr = {
 static void nbd_dev_remove(struct nbd_device *nbd)
 {
 	struct gendisk *disk = nbd->disk;
-	struct request_queue *q;
 
 	if (disk) {
-		q = disk->queue;
 		del_gendisk(disk);
-		blk_cleanup_queue(q);
 		blk_mq_free_tag_set(&nbd->tag_set);
-		disk->private_data = NULL;
-		put_disk(disk);
+		blk_cleanup_disk(disk);
 	}
 
 	/*
@@ -1646,15 +1642,24 @@ static int nbd_dev_add(int index)
 {
 	struct nbd_device *nbd;
 	struct gendisk *disk;
-	struct request_queue *q;
 	int err = -ENOMEM;
 
 	nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
 	if (!nbd)
 		goto out;
 
-	disk = alloc_disk(1 << part_shift);
-	if (!disk)
+	nbd->tag_set.ops = &nbd_mq_ops;
+	nbd->tag_set.nr_hw_queues = 1;
+	nbd->tag_set.queue_depth = 128;
+	nbd->tag_set.numa_node = NUMA_NO_NODE;
+	nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
+	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
+		BLK_MQ_F_BLOCKING;
+	nbd->tag_set.driver_data = nbd;
+	nbd->destroy_complete = NULL;
+
+	err = blk_mq_alloc_tag_set(&nbd->tag_set);
+	if (err)
 		goto out_free_nbd;
 
 	if (index >= 0) {
@@ -1668,30 +1673,15 @@ static int nbd_dev_add(int index)
 			index = err;
 	}
 	if (err < 0)
-		goto out_free_disk;
-
-	nbd->index = index;
-	nbd->disk = disk;
-	nbd->tag_set.ops = &nbd_mq_ops;
-	nbd->tag_set.nr_hw_queues = 1;
-	nbd->tag_set.queue_depth = 128;
-	nbd->tag_set.numa_node = NUMA_NO_NODE;
-	nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
-	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
-		BLK_MQ_F_BLOCKING;
-	nbd->tag_set.driver_data = nbd;
-	nbd->destroy_complete = NULL;
-
-	err = blk_mq_alloc_tag_set(&nbd->tag_set);
-	if (err)
-		goto out_free_idr;
-
-	q = blk_mq_init_queue(&nbd->tag_set);
-	if (IS_ERR(q)) {
-		err = PTR_ERR(q);
 		goto out_free_tags;
+	nbd->index = index;
+
+	disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
+	if (IS_ERR(disk)) {
+		err = PTR_ERR(disk);
+		goto out_free_idr;
 	}
-	disk->queue = q;
+	nbd->disk = disk;
 
 	/*
 	 * Tell the block layer that we are not a rotational device
@@ -1712,6 +1702,7 @@ static int nbd_dev_add(int index)
 	INIT_LIST_HEAD(&nbd->list);
 	disk->major = NBD_MAJOR;
 	disk->first_minor = index << part_shift;
+	disk->minors = 1 << part_shift;
 	disk->fops = &nbd_fops;
 	disk->private_data = nbd;
 	sprintf(disk->disk_name, "nbd%d", index);
@@ -1719,12 +1710,10 @@ static int nbd_dev_add(int index)
 	nbd_total_devices++;
 	return index;
 
-out_free_tags:
-	blk_mq_free_tag_set(&nbd->tag_set);
 out_free_idr:
 	idr_remove(&nbd_index_idr, index);
-out_free_disk:
-	put_disk(disk);
+out_free_tags:
+	blk_mq_free_tag_set(&nbd->tag_set);
 out_free_nbd:
 	kfree(nbd);
 out:

From 6759b1a2013ba6f65e97f0b9444fa1cf69654f7c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:35 +0300
Subject: [PATCH 1455/3292] nullb: use blk_mq_alloc_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-21-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 83d803cb57c8..3b320b005aa8 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1851,13 +1851,12 @@ static int null_add_dev(struct nullb_device *dev)
 
 		rv = -ENOMEM;
 		nullb->tag_set->timeout = 5 * HZ;
-		nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb);
-		if (IS_ERR(nullb->q))
+		nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
+		if (IS_ERR(nullb->disk)) {
+			rv = PTR_ERR(nullb->disk);
 			goto out_cleanup_tags;
-		nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
-		if (!nullb->disk)
-			goto out_cleanup_tags;
-		nullb->disk->queue = nullb->q;
+		}
+		nullb->q = nullb->disk->queue;
 	} else if (dev->queue_mode == NULL_Q_BIO) {
 		rv = -ENOMEM;
 		nullb->disk = blk_alloc_disk(nullb->dev->home_node);

From 262d431f90003b1a7d9585ef5465252317eb6bd7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:36 +0300
Subject: [PATCH 1456/3292] pd: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-22-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/paride/pd.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 828a45ffe0e7..3b2b8e872beb 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -879,18 +879,6 @@ static void pd_probe_drive(struct pd_unit *disk)
 {
 	struct gendisk *p;
 
-	p = alloc_disk(1 << PD_BITS);
-	if (!p)
-		return;
-
-	strcpy(p->disk_name, disk->name);
-	p->fops = &pd_fops;
-	p->major = major;
-	p->first_minor = (disk - pd) << PD_BITS;
-	p->events = DISK_EVENT_MEDIA_CHANGE;
-	disk->gd = p;
-	p->private_data = disk;
-
 	memset(&disk->tag_set, 0, sizeof(disk->tag_set));
 	disk->tag_set.ops = &pd_mq_ops;
 	disk->tag_set.cmd_size = sizeof(struct pd_req);
@@ -903,14 +891,21 @@ static void pd_probe_drive(struct pd_unit *disk)
 	if (blk_mq_alloc_tag_set(&disk->tag_set))
 		return;
 
-	p->queue = blk_mq_init_queue(&disk->tag_set);
-	if (IS_ERR(p->queue)) {
+	p = blk_mq_alloc_disk(&disk->tag_set, disk);
+	if (!p) {
 		blk_mq_free_tag_set(&disk->tag_set);
-		p->queue = NULL;
 		return;
 	}
+	disk->gd = p;
+
+	strcpy(p->disk_name, disk->name);
+	p->fops = &pd_fops;
+	p->major = major;
+	p->first_minor = (disk - pd) << PD_BITS;
+	p->minors = 1 << PD_BITS;
+	p->events = DISK_EVENT_MEDIA_CHANGE;
+	p->private_data = disk;
 
-	p->queue->queuedata = disk;
 	blk_queue_max_hw_sectors(p->queue, cluster);
 	blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
 
@@ -1019,9 +1014,8 @@ static void __exit pd_exit(void)
 		if (p) {
 			disk->gd = NULL;
 			del_gendisk(p);
-			blk_cleanup_queue(p->queue);
 			blk_mq_free_tag_set(&disk->tag_set);
-			put_disk(p);
+			blk_cleanup_disk(p);
 			pi_release(disk->pi);
 		}
 	}

From 195b1956b85baefc5049883fdcff249a8ff1911c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:37 +0300
Subject: [PATCH 1457/3292] rbd: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-23-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rbd.c | 52 ++++++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 34 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bbb88eb009e0..531d390902dd 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4750,9 +4750,8 @@ static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
 {
-	blk_cleanup_queue(rbd_dev->disk->queue);
+	blk_cleanup_disk(rbd_dev->disk);
 	blk_mq_free_tag_set(&rbd_dev->tag_set);
-	put_disk(rbd_dev->disk);
 	rbd_dev->disk = NULL;
 }
 
@@ -4922,22 +4921,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	    rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
 	int err;
 
-	/* create gendisk info */
-	disk = alloc_disk(single_major ?
-			  (1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
-			  RBD_MINORS_PER_MAJOR);
-	if (!disk)
-		return -ENOMEM;
-
-	snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
-		 rbd_dev->dev_id);
-	disk->major = rbd_dev->major;
-	disk->first_minor = rbd_dev->minor;
-	if (single_major)
-		disk->flags |= GENHD_FL_EXT_DEVT;
-	disk->fops = &rbd_bd_ops;
-	disk->private_data = rbd_dev;
-
 	memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
 	rbd_dev->tag_set.ops = &rbd_mq_ops;
 	rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
@@ -4948,13 +4931,26 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 
 	err = blk_mq_alloc_tag_set(&rbd_dev->tag_set);
 	if (err)
-		goto out_disk;
+		return err;
 
-	q = blk_mq_init_queue(&rbd_dev->tag_set);
-	if (IS_ERR(q)) {
-		err = PTR_ERR(q);
+	disk = blk_mq_alloc_disk(&rbd_dev->tag_set, rbd_dev);
+	if (IS_ERR(disk)) {
+		err = PTR_ERR(disk);
 		goto out_tag_set;
 	}
+	q = disk->queue;
+
+	snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
+		 rbd_dev->dev_id);
+	disk->major = rbd_dev->major;
+	disk->first_minor = rbd_dev->minor;
+	if (single_major) {
+		disk->minors = (1 << RBD_SINGLE_MAJOR_PART_SHIFT);
+		disk->flags |= GENHD_FL_EXT_DEVT;
+	} else {
+		disk->minors = RBD_MINORS_PER_MAJOR;
+	}
+	disk->fops = &rbd_bd_ops;
 
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
@@ -4976,21 +4972,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
 		blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
 
-	/*
-	 * disk_release() expects a queue ref from add_disk() and will
-	 * put it.  Hold an extra ref until add_disk() is called.
-	 */
-	WARN_ON(!blk_get_queue(q));
-	disk->queue = q;
-	q->queuedata = rbd_dev;
-
 	rbd_dev->disk = disk;
 
 	return 0;
 out_tag_set:
 	blk_mq_free_tag_set(&rbd_dev->tag_set);
-out_disk:
-	put_disk(disk);
 	return err;
 }
 
@@ -7088,8 +7074,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 		goto err_out_image_lock;
 
 	device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL);
-	/* see rbd_init_disk() */
-	blk_put_queue(rbd_dev->disk->queue);
 
 	spin_lock(&rbd_dev_list_lock);
 	list_add_tail(&rbd_dev->node, &rbd_dev_list);

From 2c6ee0ae5fc7aed7b3309e725104ea60da2cc9c9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:38 +0300
Subject: [PATCH 1458/3292] rnbd: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-24-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c | 35 ++++++++---------------------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index c604a402cd5c..f4fa45d24c0b 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1353,18 +1353,6 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
 	}
 }
 
-static int setup_mq_dev(struct rnbd_clt_dev *dev)
-{
-	dev->queue = blk_mq_init_queue(&dev->sess->tag_set);
-	if (IS_ERR(dev->queue)) {
-		rnbd_clt_err(dev, "Initializing multiqueue queue failed, err: %ld\n",
-			      PTR_ERR(dev->queue));
-		return PTR_ERR(dev->queue);
-	}
-	rnbd_init_mq_hw_queues(dev);
-	return 0;
-}
-
 static void setup_request_queue(struct rnbd_clt_dev *dev)
 {
 	blk_queue_logical_block_size(dev->queue, dev->logical_block_size);
@@ -1393,13 +1381,13 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
 	blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
 	blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
 	blk_queue_write_cache(dev->queue, dev->wc, dev->fua);
-	dev->queue->queuedata = dev;
 }
 
 static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
 {
 	dev->gd->major		= rnbd_client_major;
 	dev->gd->first_minor	= idx << RNBD_PART_BITS;
+	dev->gd->minors		= 1 << RNBD_PART_BITS;
 	dev->gd->fops		= &rnbd_client_ops;
 	dev->gd->queue		= dev->queue;
 	dev->gd->private_data	= dev;
@@ -1426,24 +1414,18 @@ static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
 
 static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
 {
-	int err, idx = dev->clt_device_id;
+	int idx = dev->clt_device_id;
 
 	dev->size = dev->nsectors * dev->logical_block_size;
 
-	err = setup_mq_dev(dev);
-	if (err)
-		return err;
+	dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
+	if (IS_ERR(dev->gd))
+		return PTR_ERR(dev->gd);
+	dev->queue = dev->gd->queue;
+	rnbd_init_mq_hw_queues(dev);
 
 	setup_request_queue(dev);
-
-	dev->gd = alloc_disk_node(1 << RNBD_PART_BITS,	NUMA_NO_NODE);
-	if (!dev->gd) {
-		blk_cleanup_queue(dev->queue);
-		return -ENOMEM;
-	}
-
 	rnbd_clt_setup_gen_disk(dev, idx);
-
 	return 0;
 }
 
@@ -1650,8 +1632,7 @@ put_sess:
 static void destroy_gen_disk(struct rnbd_clt_dev *dev)
 {
 	del_gendisk(dev->gd);
-	blk_cleanup_queue(dev->queue);
-	put_disk(dev->gd);
+	blk_cleanup_disk(dev->gd);
 }
 
 static void destroy_sysfs(struct rnbd_clt_dev *dev,

From 693874035e6e54981771eb5f19e6eb0da2437175 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:39 +0300
Subject: [PATCH 1459/3292] sx8: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-25-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/sx8.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 2cdf2771f8e8..f01f860b0e62 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1343,32 +1343,25 @@ static int carm_init_disk(struct carm_host *host, unsigned int port_no)
 {
 	struct carm_port *port = &host->port[port_no];
 	struct gendisk *disk;
-	struct request_queue *q;
 
 	port->host = host;
 	port->port_no = port_no;
 
-	disk = alloc_disk(CARM_MINORS_PER_MAJOR);
-	if (!disk)
-		return -ENOMEM;
+	disk = blk_mq_alloc_disk(&host->tag_set, port);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
 
 	port->disk = disk;
 	sprintf(disk->disk_name, DRV_NAME "/%u",
 		(unsigned int)host->id * CARM_MAX_PORTS + port_no);
 	disk->major = host->major;
 	disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
+	disk->minors = CARM_MINORS_PER_MAJOR;
 	disk->fops = &carm_bd_ops;
 	disk->private_data = port;
 
-	q = blk_mq_init_queue(&host->tag_set);
-	if (IS_ERR(q))
-		return PTR_ERR(q);
-
-	blk_queue_max_segments(q, CARM_MAX_REQ_SG);
-	blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
-
-	q->queuedata = port;
-	disk->queue = q;
+	blk_queue_max_segments(disk->queue, CARM_MAX_REQ_SG);
+	blk_queue_segment_boundary(disk->queue, CARM_SG_BOUNDARY);
 	return 0;
 }
 
@@ -1382,9 +1375,7 @@ static void carm_free_disk(struct carm_host *host, unsigned int port_no)
 
 	if (disk->flags & GENHD_FL_UP)
 		del_gendisk(disk);
-	if (disk->queue)
-		blk_cleanup_queue(disk->queue);
-	put_disk(disk);
+	blk_cleanup_disk(disk);
 }
 
 static int carm_init_shm(struct carm_host *host)

From 3b62c140e93d32c825ed028faca45dee58dbe37f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:40 +0300
Subject: [PATCH 1460/3292] xen-blkfront: use blk_mq_alloc_disk and
 blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-26-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkfront.c | 96 +++++++++++++++---------------------
 1 file changed, 39 insertions(+), 57 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f2c1aedcdf5a..8d49f8fa98bb 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -968,48 +968,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
 	blk_queue_dma_alignment(rq, 511);
 }
 
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
-				unsigned int physical_sector_size)
-{
-	struct request_queue *rq;
-	struct blkfront_info *info = gd->private_data;
-
-	memset(&info->tag_set, 0, sizeof(info->tag_set));
-	info->tag_set.ops = &blkfront_mq_ops;
-	info->tag_set.nr_hw_queues = info->nr_rings;
-	if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
-		/*
-		 * When indirect descriptior is not supported, the I/O request
-		 * will be split between multiple request in the ring.
-		 * To avoid problems when sending the request, divide by
-		 * 2 the depth of the queue.
-		 */
-		info->tag_set.queue_depth =  BLK_RING_SIZE(info) / 2;
-	} else
-		info->tag_set.queue_depth = BLK_RING_SIZE(info);
-	info->tag_set.numa_node = NUMA_NO_NODE;
-	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-	info->tag_set.cmd_size = sizeof(struct blkif_req);
-	info->tag_set.driver_data = info;
-
-	if (blk_mq_alloc_tag_set(&info->tag_set))
-		return -EINVAL;
-	rq = blk_mq_init_queue(&info->tag_set);
-	if (IS_ERR(rq)) {
-		blk_mq_free_tag_set(&info->tag_set);
-		return PTR_ERR(rq);
-	}
-
-	rq->queuedata = info;
-	info->rq = gd->queue = rq;
-	info->gd = gd;
-	info->sector_size = sector_size;
-	info->physical_sector_size = physical_sector_size;
-	blkif_set_queue_limits(info);
-
-	return 0;
-}
-
 static const char *flush_info(struct blkfront_info *info)
 {
 	if (info->feature_flush && info->feature_fua)
@@ -1146,12 +1104,36 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
 	err = xlbd_reserve_minors(minor, nr_minors);
 	if (err)
-		goto out;
+		return err;
 	err = -ENODEV;
 
-	gd = alloc_disk(nr_minors);
-	if (gd == NULL)
-		goto release;
+	memset(&info->tag_set, 0, sizeof(info->tag_set));
+	info->tag_set.ops = &blkfront_mq_ops;
+	info->tag_set.nr_hw_queues = info->nr_rings;
+	if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
+		/*
+		 * When indirect descriptior is not supported, the I/O request
+		 * will be split between multiple request in the ring.
+		 * To avoid problems when sending the request, divide by
+		 * 2 the depth of the queue.
+		 */
+		info->tag_set.queue_depth =  BLK_RING_SIZE(info) / 2;
+	} else
+		info->tag_set.queue_depth = BLK_RING_SIZE(info);
+	info->tag_set.numa_node = NUMA_NO_NODE;
+	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+	info->tag_set.cmd_size = sizeof(struct blkif_req);
+	info->tag_set.driver_data = info;
+
+	err = blk_mq_alloc_tag_set(&info->tag_set);
+	if (err)
+		goto out_release_minors;
+
+	gd = blk_mq_alloc_disk(&info->tag_set, info);
+	if (IS_ERR(gd)) {
+		err = PTR_ERR(gd);
+		goto out_free_tag_set;
+	}
 
 	strcpy(gd->disk_name, DEV_NAME);
 	ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
@@ -1164,14 +1146,16 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
 	gd->major = XENVBD_MAJOR;
 	gd->first_minor = minor;
+	gd->minors = nr_minors;
 	gd->fops = &xlvbd_block_fops;
 	gd->private_data = info;
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
-		del_gendisk(gd);
-		goto release;
-	}
+	info->rq = gd->queue;
+	info->gd = gd;
+	info->sector_size = sector_size;
+	info->physical_sector_size = physical_sector_size;
+	blkif_set_queue_limits(info);
 
 	xlvbd_flush(info);
 
@@ -1186,9 +1170,10 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
 	return 0;
 
- release:
+out_free_tag_set:
+	blk_mq_free_tag_set(&info->tag_set);
+out_release_minors:
 	xlbd_release_minors(minor, nr_minors);
- out:
 	return err;
 }
 
@@ -1217,12 +1202,9 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
 	nr_minors = info->gd->minors;
 	xlbd_release_minors(minor, nr_minors);
 
-	blk_cleanup_queue(info->rq);
-	blk_mq_free_tag_set(&info->tag_set);
-	info->rq = NULL;
-
-	put_disk(info->gd);
+	blk_cleanup_disk(info->gd);
 	info->gd = NULL;
+	blk_mq_free_tag_set(&info->tag_set);
 }
 
 /* Already hold rinfo->ring_lock. */

From 77567b25ab9f06c6477a153e58ace6897c6794d1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:41 +0300
Subject: [PATCH 1461/3292] ubi: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-27-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/mtd/ubi/block.c | 70 ++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 40 deletions(-)

diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index e1a2ae21dfd3..e003b4b44ffa 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -394,28 +394,6 @@ int ubiblock_create(struct ubi_volume_info *vi)
 	dev->vol_id = vi->vol_id;
 	dev->leb_size = vi->usable_leb_size;
 
-	/* Initialize the gendisk of this ubiblock device */
-	gd = alloc_disk(1);
-	if (!gd) {
-		pr_err("UBI: block: alloc_disk failed\n");
-		ret = -ENODEV;
-		goto out_free_dev;
-	}
-
-	gd->fops = &ubiblock_ops;
-	gd->major = ubiblock_major;
-	gd->first_minor = idr_alloc(&ubiblock_minor_idr, dev, 0, 0, GFP_KERNEL);
-	if (gd->first_minor < 0) {
-		dev_err(disk_to_dev(gd),
-			"block: dynamic minor allocation failed");
-		ret = -ENODEV;
-		goto out_put_disk;
-	}
-	gd->private_data = dev;
-	sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
-	set_capacity(gd, disk_capacity);
-	dev->gd = gd;
-
 	dev->tag_set.ops = &ubiblock_mq_ops;
 	dev->tag_set.queue_depth = 64;
 	dev->tag_set.numa_node = NUMA_NO_NODE;
@@ -427,19 +405,34 @@ int ubiblock_create(struct ubi_volume_info *vi)
 	ret = blk_mq_alloc_tag_set(&dev->tag_set);
 	if (ret) {
 		dev_err(disk_to_dev(dev->gd), "blk_mq_alloc_tag_set failed");
-		goto out_remove_minor;
+		goto out_free_dev;;
 	}
 
-	dev->rq = blk_mq_init_queue(&dev->tag_set);
-	if (IS_ERR(dev->rq)) {
-		dev_err(disk_to_dev(gd), "blk_mq_init_queue failed");
-		ret = PTR_ERR(dev->rq);
+
+	/* Initialize the gendisk of this ubiblock device */
+	gd = blk_mq_alloc_disk(&dev->tag_set, dev);
+	if (IS_ERR(gd)) {
+		ret = PTR_ERR(gd);
 		goto out_free_tags;
 	}
-	blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT);
 
-	dev->rq->queuedata = dev;
-	dev->gd->queue = dev->rq;
+	gd->fops = &ubiblock_ops;
+	gd->major = ubiblock_major;
+	gd->minors = 1;
+	gd->first_minor = idr_alloc(&ubiblock_minor_idr, dev, 0, 0, GFP_KERNEL);
+	if (gd->first_minor < 0) {
+		dev_err(disk_to_dev(gd),
+			"block: dynamic minor allocation failed");
+		ret = -ENODEV;
+		goto out_cleanup_disk;
+	}
+	gd->private_data = dev;
+	sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
+	set_capacity(gd, disk_capacity);
+	dev->gd = gd;
+
+	dev->rq = gd->queue;
+	blk_queue_max_segments(dev->rq, UBI_MAX_SG_COUNT);
 
 	/*
 	 * Create one workqueue per volume (per registered block device).
@@ -448,7 +441,7 @@ int ubiblock_create(struct ubi_volume_info *vi)
 	dev->wq = alloc_workqueue("%s", 0, 0, gd->disk_name);
 	if (!dev->wq) {
 		ret = -ENOMEM;
-		goto out_free_queue;
+		goto out_remove_minor;
 	}
 
 	list_add_tail(&dev->list, &ubiblock_devices);
@@ -460,14 +453,12 @@ int ubiblock_create(struct ubi_volume_info *vi)
 	mutex_unlock(&devices_mutex);
 	return 0;
 
-out_free_queue:
-	blk_cleanup_queue(dev->rq);
-out_free_tags:
-	blk_mq_free_tag_set(&dev->tag_set);
 out_remove_minor:
 	idr_remove(&ubiblock_minor_idr, gd->first_minor);
-out_put_disk:
-	put_disk(dev->gd);
+out_cleanup_disk:
+	blk_cleanup_disk(dev->gd);
+out_free_tags:
+	blk_mq_free_tag_set(&dev->tag_set);
 out_free_dev:
 	kfree(dev);
 out_unlock:
@@ -483,11 +474,10 @@ static void ubiblock_cleanup(struct ubiblock *dev)
 	/* Flush pending work */
 	destroy_workqueue(dev->wq);
 	/* Finally destroy the blk queue */
-	blk_cleanup_queue(dev->rq);
-	blk_mq_free_tag_set(&dev->tag_set);
 	dev_info(disk_to_dev(dev->gd), "released");
+	blk_cleanup_disk(dev->gd);
+	blk_mq_free_tag_set(&dev->tag_set);
 	idr_remove(&ubiblock_minor_idr, dev->gd->first_minor);
-	put_disk(dev->gd);
 }
 
 int ubiblock_remove(struct ubi_volume_info *vi)

From c06cf063b3e5d590781fec6e88ccc259384dc157 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:42 +0300
Subject: [PATCH 1462/3292] scm_blk: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-28-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/s390/block/scm_blk.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index a4f6f2e62b1d..88cba6212ee2 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -462,12 +462,12 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	if (ret)
 		goto out;
 
-	rq = blk_mq_init_queue(&bdev->tag_set);
-	if (IS_ERR(rq)) {
-		ret = PTR_ERR(rq);
+	bdev->gendisk = blk_mq_alloc_disk(&bdev->tag_set, scmdev);
+	if (IS_ERR(bdev->gendisk)) {
+		ret = PTR_ERR(bdev->gendisk);
 		goto out_tag;
 	}
-	bdev->rq = rq;
+	rq = bdev->rq = bdev->gendisk->queue;
 	nr_max_blk = min(scmdev->nr_max_block,
 			 (unsigned int) (PAGE_SIZE / sizeof(struct aidaw)));
 
@@ -477,17 +477,11 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
 
-	bdev->gendisk = alloc_disk(SCM_NR_PARTS);
-	if (!bdev->gendisk) {
-		ret = -ENOMEM;
-		goto out_queue;
-	}
-	rq->queuedata = scmdev;
 	bdev->gendisk->private_data = scmdev;
 	bdev->gendisk->fops = &scm_blk_devops;
-	bdev->gendisk->queue = rq;
 	bdev->gendisk->major = scm_major;
 	bdev->gendisk->first_minor = devindex * SCM_NR_PARTS;
+	bdev->gendisk->minors = SCM_NR_PARTS;
 
 	len = snprintf(bdev->gendisk->disk_name, DISK_NAME_LEN, "scm");
 	if (devindex > 25) {
@@ -504,8 +498,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	device_add_disk(&scmdev->dev, bdev->gendisk, NULL);
 	return 0;
 
-out_queue:
-	blk_cleanup_queue(rq);
 out_tag:
 	blk_mq_free_tag_set(&bdev->tag_set);
 out:
@@ -516,9 +508,8 @@ out:
 void scm_blk_dev_cleanup(struct scm_blk_dev *bdev)
 {
 	del_gendisk(bdev->gendisk);
-	blk_cleanup_queue(bdev->gendisk->queue);
+	blk_cleanup_disk(bdev->gendisk);
 	blk_mq_free_tag_set(&bdev->tag_set);
-	put_disk(bdev->gendisk);
 }
 
 void scm_blk_set_available(struct scm_blk_dev *bdev)

From f6d8297412f882a2eabbf026f0d98449ae14e0fe Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:43 +0300
Subject: [PATCH 1463/3292] amiflop: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-29-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/amiflop.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 9e2d0c6a3877..8b1714021498 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1781,15 +1781,13 @@ static int fd_alloc_disk(int drive, int system)
 {
 	struct gendisk *disk;
 
-	disk = alloc_disk(1);
-	if (!disk)
-		goto out;
-	disk->queue = blk_mq_init_queue(&unit[drive].tag_set);
-	if (IS_ERR(disk->queue))
-		goto out_put_disk;
+	disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
 
 	disk->major = FLOPPY_MAJOR;
 	disk->first_minor = drive + system;
+	disk->minors = 1;
 	disk->fops = &floppy_fops;
 	disk->events = DISK_EVENT_MEDIA_CHANGE;
 	if (system)
@@ -1802,12 +1800,6 @@ static int fd_alloc_disk(int drive, int system)
 	unit[drive].gendisk[system] = disk;
 	add_disk(disk);
 	return 0;
-
-out_put_disk:
-	disk->queue = NULL;
-	put_disk(disk);
-out:
-	return -ENOMEM;
 }
 
 static int fd_alloc_drive(int drive)

From fd71c8a8ac77242661fff4af39593cd606a90a41 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:44 +0300
Subject: [PATCH 1464/3292] ataflop: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-30-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/ataflop.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index d601e49f80e0..a093644ac39f 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1968,22 +1968,14 @@ static const struct blk_mq_ops ataflop_mq_ops = {
 static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
 {
 	struct gendisk *disk;
-	int ret;
 
-	disk = alloc_disk(1);
-	if (!disk)
-		return -ENOMEM;
-
-	disk->queue = blk_mq_init_queue(&unit[drive].tag_set);
-	if (IS_ERR(disk->queue)) {
-		ret = PTR_ERR(disk->queue);
-		disk->queue = NULL;
-		put_disk(disk);
-		return ret;
-	}
+	disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
 
 	disk->major = FLOPPY_MAJOR;
 	disk->first_minor = drive + (type << 2);
+	disk->minors = 1;
 	sprintf(disk->disk_name, "fd%d", drive);
 	disk->fops = &floppy_fops;
 	disk->events = DISK_EVENT_MEDIA_CHANGE;

From ec06c989bb45acc28c7633703685dd684b1b5d9c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 2 Jun 2021 09:53:45 +0300
Subject: [PATCH 1465/3292] z2ram: use blk_mq_alloc_disk and blk_cleanup_disk

Use blk_mq_alloc_disk and blk_cleanup_disk to simplify the gendisk and
request_queue allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Link: https://lore.kernel.org/r/20210602065345.355274-31-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/z2ram.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index c1d20818e649..a8968d9e759b 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -323,27 +323,20 @@ static const struct blk_mq_ops z2_mq_ops = {
 
 static int z2ram_register_disk(int minor)
 {
-	struct request_queue *q;
 	struct gendisk *disk;
 
-	disk = alloc_disk(1);
-	if (!disk)
-		return -ENOMEM;
-
-	q = blk_mq_init_queue(&tag_set);
-	if (IS_ERR(q)) {
-		put_disk(disk);
-		return PTR_ERR(q);
-	}
+	disk = blk_mq_alloc_disk(&tag_set, NULL);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
 
 	disk->major = Z2RAM_MAJOR;
 	disk->first_minor = minor;
+	disk->minors = 1;
 	disk->fops = &z2_fops;
 	if (minor)
 		sprintf(disk->disk_name, "z2ram%d", minor);
 	else
 		sprintf(disk->disk_name, "z2ram");
-	disk->queue = q;
 
 	z2ram_gendisk[minor] = disk;
 	add_disk(disk);

From 0bf5eb788512187b744ef7f79de835e6cbe85b9c Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 10 Jun 2021 21:38:56 +0800
Subject: [PATCH 1466/3292] net: hns3: add support for PTP

Adds PTP support for HNS3 ethernet driver.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/Kconfig        |   1 +
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  12 +
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   |  27 +
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |   9 +-
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    |  12 +
 .../ethernet/hisilicon/hns3/hns3pf/Makefile   |   2 +-
 .../hisilicon/hns3/hns3pf/hclge_cmd.h         |   4 +
 .../hisilicon/hns3/hns3pf/hclge_main.c        |  61 +-
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   6 +
 .../hisilicon/hns3/hns3pf/hclge_ptp.c         | 544 ++++++++++++++++++
 .../hisilicon/hns3/hns3pf/hclge_ptp.h         | 134 +++++
 11 files changed, 806 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
 create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h

diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index fa6025dc4cdb..bb062b02fb85 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -102,6 +102,7 @@ config HNS3_HCLGE
 	tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
 	default m
 	depends on PCI_MSI
+	imply PTP_1588_CLOCK
 	help
 	  This selects the HNS3_HCLGE network acceleration engine & its hardware
 	  compatibility layer. The engine would be used in Hisilicon hip08 family of
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e564aa32a414..f6f524c2ab7b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -525,6 +525,12 @@ struct hnae3_ae_dev {
  *   Check if any cls flower rule exist
  * dbg_read_cmd
  *   Execute debugfs read command.
+ * set_tx_hwts_info
+ *   Save information for 1588 tx packet
+ * get_rx_hwts
+ *   Get 1588 rx hwstamp
+ * get_ts_info
+ *   Get phc info
  */
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -710,6 +716,12 @@ struct hnae3_ae_ops {
 				      struct ethtool_link_ksettings *cmd);
 	int (*set_phy_link_ksettings)(struct hnae3_handle *handle,
 				      const struct ethtool_link_ksettings *cmd);
+	bool (*set_tx_hwts_info)(struct hnae3_handle *handle,
+				 struct sk_buff *skb);
+	void (*get_rx_hwts)(struct hnae3_handle *handle, struct sk_buff *skb,
+			    u32 nsec, u32 sec);
+	int (*get_ts_info)(struct hnae3_handle *handle,
+			   struct ethtool_ts_info *info);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 393979bec170..9a45f3cde6a2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1799,6 +1799,18 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
 	WRITE_ONCE(ring->last_to_use, ring->next_to_use);
 }
 
+static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
+		      struct hns3_desc *desc)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (!(h->ae_algo->ops->set_tx_hwts_info &&
+	      h->ae_algo->ops->set_tx_hwts_info(h, skb)))
+		return;
+
+	desc->tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_TSYN_B));
+}
+
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -1851,10 +1863,16 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) :
 					(ring->desc_num - 1);
+
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		hns3_tsyn(netdev, skb, &ring->desc[pre_ntu]);
+
 	ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
 				cpu_to_le16(BIT(HNS3_TXD_FE_B));
 	trace_hns3_tx_desc(ring, pre_ntu);
 
+	skb_tx_timestamp(skb);
+
 	/* Complete translate all packets */
 	dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
 	doorbell = __netdev_tx_sent_queue(dev_queue, desc_cb->send_bytes,
@@ -3585,6 +3603,15 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb)
 	ol_info = le32_to_cpu(desc->rx.ol_info);
 	csum = le16_to_cpu(desc->csum);
 
+	if (unlikely(bd_base_info & BIT(HNS3_RXD_TS_VLD_B))) {
+		struct hnae3_handle *h = hns3_get_handle(netdev);
+		u32 nsec = le32_to_cpu(desc->ts_nsec);
+		u32 sec = le32_to_cpu(desc->ts_sec);
+
+		if (h->ae_algo->ops->get_rx_hwts)
+			h->ae_algo->ops->get_rx_hwts(h, skb, nsec, sec);
+	}
+
 	/* Based on hw strategy, the tag offloaded will be stored at
 	 * ot_vlan_tag in two layer tag case, and stored at vlan_tag
 	 * in one layer tag case.
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 5698a14a804e..79821c7bdc16 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -122,8 +122,9 @@ enum hns3_nic_state {
 #define HNS3_RXD_LUM_B				9
 #define HNS3_RXD_CRCP_B				10
 #define HNS3_RXD_L3L4P_B			11
-#define HNS3_RXD_TSIND_S			12
-#define HNS3_RXD_TSIND_M			(0x7 << HNS3_RXD_TSIND_S)
+#define HNS3_RXD_TSIDX_S			12
+#define HNS3_RXD_TSIDX_M			(0x3 << HNS3_RXD_TSIDX_S)
+#define HNS3_RXD_TS_VLD_B			14
 #define HNS3_RXD_LKBK_B				15
 #define HNS3_RXD_GRO_SIZE_S			16
 #define HNS3_RXD_GRO_SIZE_M			(0x3fff << HNS3_RXD_GRO_SIZE_S)
@@ -240,6 +241,10 @@ struct __packed hns3_desc {
 	union {
 		__le64 addr;
 		__le16 csum;
+		struct {
+			__le32 ts_nsec;
+			__le32 ts_sec;
+		};
 	};
 	union {
 		struct {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index bb7c2ec7ed6f..acef5435d7b7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1598,6 +1598,17 @@ static int hns3_set_priv_flags(struct net_device *netdev, u32 pflags)
 				 ETHTOOL_COALESCE_TX_USECS_HIGH |	\
 				 ETHTOOL_COALESCE_MAX_FRAMES)
 
+static int hns3_get_ts_info(struct net_device *netdev,
+			    struct ethtool_ts_info *info)
+{
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+
+	if (handle->ae_algo->ops->get_ts_info)
+		return handle->ae_algo->ops->get_ts_info(handle, info);
+
+	return ethtool_op_get_ts_info(netdev, info);
+}
+
 static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.supported_coalesce_params = HNS3_ETHTOOL_COALESCE,
 	.get_drvinfo = hns3_get_drvinfo,
@@ -1662,6 +1673,7 @@ static const struct ethtool_ops hns3_ethtool_ops = {
 	.get_module_eeprom = hns3_get_module_eeprom,
 	.get_priv_flags = hns3_get_priv_flags,
 	.set_priv_flags = hns3_set_priv_flags,
+	.get_ts_info = hns3_get_ts_info,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 6c28c8f6292c..a685392dbfe9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -7,6 +7,6 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 ccflags-y += -I $(srctree)/$(src)
 
 obj-$(CONFIG_HNS3_HCLGE) += hclge.o
-hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o  hclge_debugfs.o
+hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o  hclge_debugfs.o hclge_ptp.o
 
 hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 221811af9473..51be76f1795e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -130,6 +130,10 @@ enum hclge_opcode_type {
 	HCLGE_OPC_COMMON_LOOPBACK       = 0x0315,
 	HCLGE_OPC_CONFIG_FEC_MODE	= 0x031A,
 
+	/* PTP commands */
+	HCLGE_OPC_PTP_INT_EN		= 0x0501,
+	HCLGE_OPC_PTP_MODE_CFG		= 0x0507,
+
 	/* PFC/Pause commands */
 	HCLGE_OPC_CFG_MAC_PAUSE_EN      = 0x0701,
 	HCLGE_OPC_CFG_PFC_PAUSE_EN      = 0x0702,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c90d7c1550c5..f6fdf93c8cad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3346,6 +3346,12 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 	    hw_err_src_reg & HCLGE_RAS_REG_ERR_MASK)
 		return HCLGE_VECTOR0_EVENT_ERR;
 
+	/* check for vector0 ptp event source */
+	if (BIT(HCLGE_VECTOR0_REG_PTP_INT_B) & msix_src_reg) {
+		*clearval = msix_src_reg;
+		return HCLGE_VECTOR0_EVENT_PTP;
+	}
+
 	/* check for vector0 mailbox(=CMDQ RX) event source */
 	if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
 		cmdq_src_reg &= ~BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B);
@@ -3365,6 +3371,7 @@ static void hclge_clear_event_cause(struct hclge_dev *hdev, u32 event_type,
 				    u32 regclr)
 {
 	switch (event_type) {
+	case HCLGE_VECTOR0_EVENT_PTP:
 	case HCLGE_VECTOR0_EVENT_RST:
 		hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, regclr);
 		break;
@@ -3393,6 +3400,7 @@ static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable)
 static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 {
 	struct hclge_dev *hdev = data;
+	unsigned long flags;
 	u32 clearval = 0;
 	u32 event_cause;
 
@@ -3407,6 +3415,11 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 	case HCLGE_VECTOR0_EVENT_RST:
 		hclge_reset_task_schedule(hdev);
 		break;
+	case HCLGE_VECTOR0_EVENT_PTP:
+		spin_lock_irqsave(&hdev->ptp->lock, flags);
+		hclge_ptp_clean_tx_hwts(hdev);
+		spin_unlock_irqrestore(&hdev->ptp->lock, flags);
+		break;
 	case HCLGE_VECTOR0_EVENT_MBX:
 		/* If we are here then,
 		 * 1. Either we are not handling any mbx task and we are not
@@ -3428,7 +3441,8 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 	hclge_clear_event_cause(hdev, event_cause, clearval);
 
 	/* Enable interrupt if it is not caused by reset event or error event */
-	if (event_cause == HCLGE_VECTOR0_EVENT_MBX ||
+	if (event_cause == HCLGE_VECTOR0_EVENT_PTP ||
+	    event_cause == HCLGE_VECTOR0_EVENT_MBX ||
 	    event_cause == HCLGE_VECTOR0_EVENT_OTHER)
 		hclge_enable_vector(&hdev->misc_vector, true);
 
@@ -4375,6 +4389,27 @@ out:
 	hclge_task_schedule(hdev, delta);
 }
 
+static void hclge_ptp_service_task(struct hclge_dev *hdev)
+{
+	unsigned long flags;
+
+	if (!test_bit(HCLGE_STATE_PTP_EN, &hdev->state) ||
+	    !test_bit(HCLGE_STATE_PTP_TX_HANDLING, &hdev->state) ||
+	    !time_is_before_jiffies(hdev->ptp->tx_start + HZ))
+		return;
+
+	/* to prevent concurrence with the irq handler */
+	spin_lock_irqsave(&hdev->ptp->lock, flags);
+
+	/* check HCLGE_STATE_PTP_TX_HANDLING here again, since the irq
+	 * handler may handle it just before spin_lock_irqsave().
+	 */
+	if (test_bit(HCLGE_STATE_PTP_TX_HANDLING, &hdev->state))
+		hclge_ptp_clean_tx_hwts(hdev);
+
+	spin_unlock_irqrestore(&hdev->ptp->lock, flags);
+}
+
 static void hclge_service_task(struct work_struct *work)
 {
 	struct hclge_dev *hdev =
@@ -4382,6 +4417,7 @@ static void hclge_service_task(struct work_struct *work)
 
 	hclge_errhand_service_task(hdev);
 	hclge_reset_service_task(hdev);
+	hclge_ptp_service_task(hdev);
 	hclge_mailbox_service_task(hdev);
 	hclge_periodic_service_task(hdev);
 
@@ -9413,8 +9449,15 @@ static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	if (!hdev->hw.mac.phydev)
-		return hclge_mii_ioctl(hdev, ifr, cmd);
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		return hclge_ptp_get_cfg(hdev, ifr);
+	case SIOCSHWTSTAMP:
+		return hclge_ptp_set_cfg(hdev, ifr);
+	default:
+		if (!hdev->hw.mac.phydev)
+			return hclge_mii_ioctl(hdev, ifr, cmd);
+	}
 
 	return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd);
 }
@@ -11530,6 +11573,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		goto err_mdiobus_unreg;
 	}
 
+	ret = hclge_ptp_init(hdev);
+	if (ret)
+		goto err_mdiobus_unreg;
+
 	INIT_KFIFO(hdev->mac_tnl_log);
 
 	hclge_dcb_ops_set(hdev);
@@ -11901,6 +11948,10 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	ret = hclge_ptp_init(hdev);
+	if (ret)
+		return ret;
+
 	/* Log and clear the hw errors those already occurred */
 	if (hnae3_dev_ras_imp_supported(hdev))
 		hclge_handle_occurred_error(hdev);
@@ -11954,6 +12005,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_clear_vf_vlan(hdev);
 	hclge_misc_affinity_teardown(hdev);
 	hclge_state_uninit(hdev);
+	hclge_ptp_uninit(hdev);
 	hclge_uninit_rxd_adv_layout(hdev);
 	hclge_uninit_mac_table(hdev);
 	hclge_del_all_fd_entries(hdev);
@@ -12850,6 +12902,9 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.cls_flower_active = hclge_is_cls_flower_active,
 	.get_phy_link_ksettings = hclge_get_phy_link_ksettings,
 	.set_phy_link_ksettings = hclge_set_phy_link_ksettings,
+	.set_tx_hwts_info = hclge_ptp_set_tx_info,
+	.get_rx_hwts = hclge_ptp_get_rx_hwts,
+	.get_ts_info = hclge_ptp_get_ts_info,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 582972a6f60e..02852738ce21 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -10,6 +10,7 @@
 #include <linux/kfifo.h>
 
 #include "hclge_cmd.h"
+#include "hclge_ptp.h"
 #include "hnae3.h"
 
 #define HCLGE_MOD_VERSION "1.0"
@@ -178,6 +179,7 @@ enum HLCGE_PORT_TYPE {
 #define HCLGE_FUN_RST_ING_B		0
 
 /* Vector0 register bits define */
+#define HCLGE_VECTOR0_REG_PTP_INT_B	0
 #define HCLGE_VECTOR0_GLOBALRESET_INT_B	5
 #define HCLGE_VECTOR0_CORERESET_INT_B	6
 #define HCLGE_VECTOR0_IMPRESET_INT_B	7
@@ -230,6 +232,8 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_FD_TBL_CHANGED,
 	HCLGE_STATE_FD_CLEAR_ALL,
 	HCLGE_STATE_FD_USER_DEF_CHANGED,
+	HCLGE_STATE_PTP_EN,
+	HCLGE_STATE_PTP_TX_HANDLING,
 	HCLGE_STATE_MAX
 };
 
@@ -237,6 +241,7 @@ enum hclge_evt_cause {
 	HCLGE_VECTOR0_EVENT_RST,
 	HCLGE_VECTOR0_EVENT_MBX,
 	HCLGE_VECTOR0_EVENT_ERR,
+	HCLGE_VECTOR0_EVENT_PTP,
 	HCLGE_VECTOR0_EVENT_OTHER,
 };
 
@@ -935,6 +940,7 @@ struct hclge_dev {
 	/* affinity mask and notify for misc interrupt */
 	cpumask_t affinity_mask;
 	struct irq_affinity_notify affinity_notify;
+	struct hclge_ptp *ptp;
 };
 
 /* VPort level vlan tag configuration for TX direction */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
new file mode 100644
index 000000000000..b3eb8f109dbb
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2021 Hisilicon Limited.
+
+#include <linux/skbuff.h>
+#include "hclge_main.h"
+#include "hnae3.h"
+
+static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+	u64 adj_val, adj_base, diff;
+	unsigned long flags;
+	bool is_neg = false;
+	u32 quo, numerator;
+
+	if (ppb < 0) {
+		ppb = -ppb;
+		is_neg = true;
+	}
+
+	adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT;
+	adj_val = adj_base * ppb;
+	diff = div_u64(adj_val, 1000000000ULL);
+
+	if (is_neg)
+		adj_val = adj_base - diff;
+	else
+		adj_val = adj_base + diff;
+
+	/* This clock cycle is defined by three part: quotient, numerator
+	 * and denominator. For example, 2.5ns, the quotient is 2,
+	 * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator
+	 * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT.
+	 */
+	quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator);
+
+	spin_lock_irqsave(&hdev->ptp->lock, flags);
+	writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
+	writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
+	writel(HCLGE_PTP_CYCLE_ADJ_UNIT,
+	       hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+	writel(HCLGE_PTP_CYCLE_ADJ_EN,
+	       hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG);
+	spin_unlock_irqrestore(&hdev->ptp->lock, flags);
+
+	return 0;
+}
+
+bool hclge_ptp_set_tx_info(struct hnae3_handle *handle, struct sk_buff *skb)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_ptp *ptp = hdev->ptp;
+
+	if (!test_bit(HCLGE_PTP_FLAG_TX_EN, &ptp->flags) ||
+	    test_and_set_bit(HCLGE_STATE_PTP_TX_HANDLING, &hdev->state)) {
+		ptp->tx_skipped++;
+		return false;
+	}
+
+	ptp->tx_start = jiffies;
+	ptp->tx_skb = skb_get(skb);
+	ptp->tx_cnt++;
+
+	return true;
+}
+
+void hclge_ptp_clean_tx_hwts(struct hclge_dev *hdev)
+{
+	struct sk_buff *skb = hdev->ptp->tx_skb;
+	struct skb_shared_hwtstamps hwts;
+	u32 hi, lo;
+	u64 ns;
+
+	ns = readl(hdev->ptp->io_base + HCLGE_PTP_TX_TS_NSEC_REG) &
+	     HCLGE_PTP_TX_TS_NSEC_MASK;
+	lo = readl(hdev->ptp->io_base + HCLGE_PTP_TX_TS_SEC_L_REG);
+	hi = readl(hdev->ptp->io_base + HCLGE_PTP_TX_TS_SEC_H_REG) &
+	     HCLGE_PTP_TX_TS_SEC_H_MASK;
+	hdev->ptp->last_tx_seqid = readl(hdev->ptp->io_base +
+		HCLGE_PTP_TX_TS_SEQID_REG);
+
+	if (skb) {
+		hdev->ptp->tx_skb = NULL;
+		hdev->ptp->tx_cleaned++;
+
+		ns += (((u64)hi) << 32 | lo) * NSEC_PER_SEC;
+		hwts.hwtstamp = ns_to_ktime(ns);
+		skb_tstamp_tx(skb, &hwts);
+		dev_kfree_skb_any(skb);
+	}
+
+	clear_bit(HCLGE_STATE_PTP_TX_HANDLING, &hdev->state);
+}
+
+void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb,
+			   u32 nsec, u32 sec)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	unsigned long flags;
+	u64 ns = nsec;
+	u32 sec_h;
+
+	if (!test_bit(HCLGE_PTP_FLAG_RX_EN, &hdev->ptp->flags))
+		return;
+
+	/* Since the BD does not have enough space for the higher 16 bits of
+	 * second, and this part will not change frequently, so read it
+	 * from register.
+	 */
+	spin_lock_irqsave(&hdev->ptp->lock, flags);
+	sec_h = readl(hdev->ptp->io_base + HCLGE_PTP_CUR_TIME_SEC_H_REG);
+	spin_unlock_irqrestore(&hdev->ptp->lock, flags);
+
+	ns += (((u64)sec_h) << HCLGE_PTP_SEC_H_OFFSET | sec) * NSEC_PER_SEC;
+	skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns);
+	hdev->ptp->last_rx = jiffies;
+	hdev->ptp->rx_cnt++;
+}
+
+static int hclge_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+			      struct ptp_system_timestamp *sts)
+{
+	struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+	unsigned long flags;
+	u32 hi, lo;
+	u64 ns;
+
+	spin_lock_irqsave(&hdev->ptp->lock, flags);
+	ns = readl(hdev->ptp->io_base + HCLGE_PTP_CUR_TIME_NSEC_REG);
+	hi = readl(hdev->ptp->io_base + HCLGE_PTP_CUR_TIME_SEC_H_REG);
+	lo = readl(hdev->ptp->io_base + HCLGE_PTP_CUR_TIME_SEC_L_REG);
+	spin_unlock_irqrestore(&hdev->ptp->lock, flags);
+
+	ns += (((u64)hi) << HCLGE_PTP_SEC_H_OFFSET | lo) * NSEC_PER_SEC;
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int hclge_ptp_settime(struct ptp_clock_info *ptp,
+			     const struct timespec64 *ts)
+{
+	struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hdev->ptp->lock, flags);
+	writel(ts->tv_nsec, hdev->ptp->io_base + HCLGE_PTP_TIME_NSEC_REG);
+	writel(ts->tv_sec >> HCLGE_PTP_SEC_H_OFFSET,
+	       hdev->ptp->io_base + HCLGE_PTP_TIME_SEC_H_REG);
+	writel(ts->tv_sec & HCLGE_PTP_SEC_L_MASK,
+	       hdev->ptp->io_base + HCLGE_PTP_TIME_SEC_L_REG);
+	/* synchronize the time of phc */
+	writel(HCLGE_PTP_TIME_SYNC_EN,
+	       hdev->ptp->io_base + HCLGE_PTP_TIME_SYNC_REG);
+	spin_unlock_irqrestore(&hdev->ptp->lock, flags);
+
+	return 0;
+}
+
+static int hclge_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+	unsigned long flags;
+	bool is_neg = false;
+	u32 adj_val = 0;
+
+	if (delta < 0) {
+		adj_val |= HCLGE_PTP_TIME_NSEC_NEG;
+		delta = -delta;
+		is_neg = true;
+	}
+
+	if (delta > HCLGE_PTP_TIME_NSEC_MASK) {
+		struct timespec64 ts;
+		s64 ns;
+
+		hclge_ptp_gettimex(ptp, &ts, NULL);
+		ns = timespec64_to_ns(&ts);
+		ns = is_neg ? ns - delta : ns + delta;
+		ts = ns_to_timespec64(ns);
+		return hclge_ptp_settime(ptp, &ts);
+	}
+
+	adj_val |= delta & HCLGE_PTP_TIME_NSEC_MASK;
+
+	spin_lock_irqsave(&hdev->ptp->lock, flags);
+	writel(adj_val, hdev->ptp->io_base + HCLGE_PTP_TIME_NSEC_REG);
+	writel(HCLGE_PTP_TIME_ADJ_EN,
+	       hdev->ptp->io_base + HCLGE_PTP_TIME_ADJ_REG);
+	spin_unlock_irqrestore(&hdev->ptp->lock, flags);
+
+	return 0;
+}
+
+int hclge_ptp_get_cfg(struct hclge_dev *hdev, struct ifreq *ifr)
+{
+	if (!test_bit(HCLGE_STATE_PTP_EN, &hdev->state))
+		return -EOPNOTSUPP;
+
+	return copy_to_user(ifr->ifr_data, &hdev->ptp->ts_cfg,
+		sizeof(struct hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int hclge_ptp_int_en(struct hclge_dev *hdev, bool en)
+{
+	struct hclge_ptp_int_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_ptp_int_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PTP_INT_EN, false);
+	req->int_en = en ? 1 : 0;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to %s ptp interrupt, ret = %d\n",
+			en ? "enable" : "disable", ret);
+
+	return ret;
+}
+
+int hclge_ptp_cfg_qry(struct hclge_dev *hdev, u32 *cfg)
+{
+	struct hclge_ptp_cfg_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_ptp_cfg_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PTP_MODE_CFG, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to query ptp config, ret = %d\n", ret);
+		return ret;
+	}
+
+	*cfg = le32_to_cpu(req->cfg);
+
+	return 0;
+}
+
+static int hclge_ptp_cfg(struct hclge_dev *hdev, u32 cfg)
+{
+	struct hclge_ptp_cfg_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_ptp_cfg_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PTP_MODE_CFG, false);
+	req->cfg = cpu_to_le32(cfg);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to config ptp, ret = %d\n", ret);
+
+	return ret;
+}
+
+static int hclge_ptp_set_tx_mode(struct hwtstamp_config *cfg,
+				 unsigned long *flags, u32 *ptp_cfg)
+{
+	switch (cfg->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		clear_bit(HCLGE_PTP_FLAG_TX_EN, flags);
+		break;
+	case HWTSTAMP_TX_ON:
+		set_bit(HCLGE_PTP_FLAG_TX_EN, flags);
+		*ptp_cfg |= HCLGE_PTP_TX_EN_B;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static int hclge_ptp_set_rx_mode(struct hwtstamp_config *cfg,
+				 unsigned long *flags, u32 *ptp_cfg)
+{
+	int rx_filter = cfg->rx_filter;
+
+	switch (cfg->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		clear_bit(HCLGE_PTP_FLAG_RX_EN, flags);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+		set_bit(HCLGE_PTP_FLAG_RX_EN, flags);
+		*ptp_cfg |= HCLGE_PTP_RX_EN_B;
+		*ptp_cfg |= HCLGE_PTP_UDP_FULL_TYPE << HCLGE_PTP_UDP_EN_SHIFT;
+		rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		set_bit(HCLGE_PTP_FLAG_RX_EN, flags);
+		*ptp_cfg |= HCLGE_PTP_RX_EN_B;
+		*ptp_cfg |= HCLGE_PTP_UDP_FULL_TYPE << HCLGE_PTP_UDP_EN_SHIFT;
+		*ptp_cfg |= HCLGE_PTP_MSG1_V2_DEFAULT << HCLGE_PTP_MSG1_SHIFT;
+		*ptp_cfg |= HCLGE_PTP_MSG0_V2_EVENT << HCLGE_PTP_MSG0_SHIFT;
+		*ptp_cfg |= HCLGE_PTP_MSG_TYPE_V2 << HCLGE_PTP_MSG_TYPE_SHIFT;
+		rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	default:
+		return -ERANGE;
+	}
+
+	cfg->rx_filter = rx_filter;
+
+	return 0;
+}
+
+static int hclge_ptp_set_ts_mode(struct hclge_dev *hdev,
+				 struct hwtstamp_config *cfg)
+{
+	unsigned long flags = hdev->ptp->flags;
+	u32 ptp_cfg = 0;
+	int ret;
+
+	if (test_bit(HCLGE_PTP_FLAG_EN, &hdev->ptp->flags))
+		ptp_cfg |= HCLGE_PTP_EN_B;
+
+	ret = hclge_ptp_set_tx_mode(cfg, &flags, &ptp_cfg);
+	if (ret)
+		return ret;
+
+	ret = hclge_ptp_set_rx_mode(cfg, &flags, &ptp_cfg);
+	if (ret)
+		return ret;
+
+	ret = hclge_ptp_cfg(hdev, ptp_cfg);
+	if (ret)
+		return ret;
+
+	hdev->ptp->flags = flags;
+	hdev->ptp->ptp_cfg = ptp_cfg;
+
+	return 0;
+}
+
+int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr)
+{
+	struct hwtstamp_config cfg;
+	int ret;
+
+	if (!test_bit(HCLGE_STATE_PTP_EN, &hdev->state)) {
+		dev_err(&hdev->pdev->dev, "phc is unsupported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	ret = hclge_ptp_set_ts_mode(hdev, &cfg);
+	if (ret)
+		return ret;
+
+	hdev->ptp->ts_cfg = cfg;
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+int hclge_ptp_get_ts_info(struct hnae3_handle *handle,
+			  struct ethtool_ts_info *info)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (!test_bit(HCLGE_STATE_PTP_EN, &hdev->state)) {
+		dev_err(&hdev->pdev->dev, "phc is unsupported\n");
+		return -EOPNOTSUPP;
+	}
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	if (hdev->ptp->clock)
+		info->phc_index = ptp_clock_index(hdev->ptp->clock);
+	else
+		info->phc_index = -1;
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ);
+
+	info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+			    BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ);
+
+	return 0;
+}
+
+static int hclge_ptp_create_clock(struct hclge_dev *hdev)
+{
+#define HCLGE_PTP_NAME_LEN	32
+
+	struct hclge_ptp *ptp;
+
+	ptp = devm_kzalloc(&hdev->pdev->dev, sizeof(*ptp), GFP_KERNEL);
+	if (!ptp)
+		return -ENOMEM;
+
+	ptp->hdev = hdev;
+	snprintf(ptp->info.name, HCLGE_PTP_NAME_LEN, "%s",
+		 HCLGE_DRIVER_NAME);
+	ptp->info.owner = THIS_MODULE;
+	ptp->info.max_adj = HCLGE_PTP_CYCLE_ADJ_MAX;
+	ptp->info.n_ext_ts = 0;
+	ptp->info.pps = 0;
+	ptp->info.adjfreq = hclge_ptp_adjfreq;
+	ptp->info.adjtime = hclge_ptp_adjtime;
+	ptp->info.gettimex64 = hclge_ptp_gettimex;
+	ptp->info.settime64 = hclge_ptp_settime;
+
+	ptp->info.n_alarm = 0;
+	ptp->clock = ptp_clock_register(&ptp->info, &hdev->pdev->dev);
+	if (IS_ERR(ptp->clock)) {
+		dev_err(&hdev->pdev->dev,
+			"%d failed to register ptp clock, ret = %ld\n",
+			ptp->info.n_alarm, PTR_ERR(ptp->clock));
+		return -ENODEV;
+	} else if (!ptp->clock) {
+		dev_err(&hdev->pdev->dev, "failed to register ptp clock\n");
+		return -ENODEV;
+	}
+
+	spin_lock_init(&ptp->lock);
+	ptp->io_base = hdev->hw.io_base + HCLGE_PTP_REG_OFFSET;
+	ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE;
+	ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF;
+	hdev->ptp = ptp;
+
+	return 0;
+}
+
+static void hclge_ptp_destroy_clock(struct hclge_dev *hdev)
+{
+	ptp_clock_unregister(hdev->ptp->clock);
+	hdev->ptp->clock = NULL;
+	devm_kfree(&hdev->pdev->dev, hdev->ptp);
+	hdev->ptp = NULL;
+}
+
+int hclge_ptp_init(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	struct timespec64 ts;
+	int ret;
+
+	if (!test_bit(HNAE3_DEV_SUPPORT_PTP_B, ae_dev->caps))
+		return 0;
+
+	if (!hdev->ptp) {
+		ret = hclge_ptp_create_clock(hdev);
+		if (ret)
+			return ret;
+	}
+
+	ret = hclge_ptp_int_en(hdev, true);
+	if (ret)
+		goto out;
+
+	set_bit(HCLGE_PTP_FLAG_EN, &hdev->ptp->flags);
+	ret = hclge_ptp_adjfreq(&hdev->ptp->info, 0);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to init freq, ret = %d\n", ret);
+		goto out;
+	}
+
+	ret = hclge_ptp_set_ts_mode(hdev, &hdev->ptp->ts_cfg);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to init ts mode, ret = %d\n", ret);
+		goto out;
+	}
+
+	ktime_get_real_ts64(&ts);
+	ret = hclge_ptp_settime(&hdev->ptp->info, &ts);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to init ts time, ret = %d\n", ret);
+		goto out;
+	}
+
+	set_bit(HCLGE_STATE_PTP_EN, &hdev->state);
+	dev_info(&hdev->pdev->dev, "phc initializes ok!\n");
+
+	return 0;
+
+out:
+	hclge_ptp_destroy_clock(hdev);
+
+	return ret;
+}
+
+void hclge_ptp_uninit(struct hclge_dev *hdev)
+{
+	struct hclge_ptp *ptp = hdev->ptp;
+
+	if (!ptp)
+		return;
+
+	hclge_ptp_int_en(hdev, false);
+	clear_bit(HCLGE_STATE_PTP_EN, &hdev->state);
+	clear_bit(HCLGE_PTP_FLAG_EN, &ptp->flags);
+	ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE;
+	ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF;
+
+	if (hclge_ptp_set_ts_mode(hdev, &ptp->ts_cfg))
+		dev_err(&hdev->pdev->dev, "failed to disable phc\n");
+
+	if (ptp->tx_skb) {
+		struct sk_buff *skb = ptp->tx_skb;
+
+		ptp->tx_skb = NULL;
+		dev_kfree_skb_any(skb);
+	}
+
+	hclge_ptp_destroy_clock(hdev);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
new file mode 100644
index 000000000000..b3ca7afdaaa6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// Copyright (c) 2021 Hisilicon Limited.
+
+#ifndef __HCLGE_PTP_H
+#define __HCLGE_PTP_H
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+#include <linux/types.h>
+
+#define HCLGE_PTP_REG_OFFSET	0x29000
+
+#define HCLGE_PTP_TX_TS_SEQID_REG	0x0
+#define HCLGE_PTP_TX_TS_NSEC_REG	0x4
+#define HCLGE_PTP_TX_TS_NSEC_MASK	GENMASK(29, 0)
+#define HCLGE_PTP_TX_TS_SEC_L_REG	0x8
+#define HCLGE_PTP_TX_TS_SEC_H_REG	0xC
+#define HCLGE_PTP_TX_TS_SEC_H_MASK	GENMASK(15, 0)
+#define HCLGE_PTP_TX_TS_CNT_REG		0x30
+
+#define HCLGE_PTP_TIME_SEC_H_REG	0x50
+#define HCLGE_PTP_TIME_SEC_H_MASK	GENMASK(15, 0)
+#define HCLGE_PTP_TIME_SEC_L_REG	0x54
+#define HCLGE_PTP_TIME_NSEC_REG		0x58
+#define HCLGE_PTP_TIME_NSEC_MASK	GENMASK(29, 0)
+#define HCLGE_PTP_TIME_NSEC_NEG		BIT(31)
+#define HCLGE_PTP_TIME_SYNC_REG		0x5C
+#define HCLGE_PTP_TIME_SYNC_EN		BIT(0)
+#define HCLGE_PTP_TIME_ADJ_REG		0x60
+#define HCLGE_PTP_TIME_ADJ_EN		BIT(0)
+#define HCLGE_PTP_CYCLE_QUO_REG		0x64
+#define HCLGE_PTP_CYCLE_DEN_REG		0x68
+#define HCLGE_PTP_CYCLE_NUM_REG		0x6C
+#define HCLGE_PTP_CYCLE_CFG_REG		0x70
+#define HCLGE_PTP_CYCLE_ADJ_EN		BIT(0)
+#define HCLGE_PTP_CUR_TIME_SEC_H_REG	0x74
+#define HCLGE_PTP_CUR_TIME_SEC_L_REG	0x78
+#define HCLGE_PTP_CUR_TIME_NSEC_REG	0x7C
+
+#define HCLGE_PTP_CYCLE_ADJ_BASE	2
+#define HCLGE_PTP_CYCLE_ADJ_MAX		500000000
+#define HCLGE_PTP_CYCLE_ADJ_UNIT	100000000
+#define HCLGE_PTP_SEC_H_OFFSET		32u
+#define HCLGE_PTP_SEC_L_MASK		GENMASK(31, 0)
+
+#define HCLGE_PTP_FLAG_EN		BIT(0)
+#define HCLGE_PTP_FLAG_TX_EN		BIT(1)
+#define HCLGE_PTP_FLAG_RX_EN		BIT(2)
+
+struct hclge_ptp {
+	struct hclge_dev *hdev;
+	struct ptp_clock *clock;
+	struct sk_buff *tx_skb;
+	unsigned long flags;
+	void __iomem *io_base;
+	struct ptp_clock_info info;
+	struct hwtstamp_config ts_cfg;
+	spinlock_t lock;	/* protects ptp registers */
+	u32 ptp_cfg;
+	u32 last_tx_seqid;
+	unsigned long tx_start;
+	unsigned long tx_cnt;
+	unsigned long tx_skipped;
+	unsigned long tx_cleaned;
+	unsigned long last_rx;
+	unsigned long rx_cnt;
+	unsigned long tx_timeout;
+};
+
+struct hclge_ptp_int_cmd {
+#define HCLGE_PTP_INT_EN_B	BIT(0)
+
+	u8 int_en;
+	u8 rsvd[23];
+};
+
+enum hclge_ptp_udp_type {
+	HCLGE_PTP_UDP_NOT_TYPE,
+	HCLGE_PTP_UDP_P13F_TYPE,
+	HCLGE_PTP_UDP_P140_TYPE,
+	HCLGE_PTP_UDP_FULL_TYPE,
+};
+
+enum hclge_ptp_msg_type {
+	HCLGE_PTP_MSG_TYPE_V2_L2,
+	HCLGE_PTP_MSG_TYPE_V2,
+	HCLGE_PTP_MSG_TYPE_V2_EVENT,
+};
+
+enum hclge_ptp_msg0_type {
+	HCLGE_PTP_MSG0_V2_DELAY_REQ = 1,
+	HCLGE_PTP_MSG0_V2_PDELAY_REQ,
+	HCLGE_PTP_MSG0_V2_DELAY_RESP,
+	HCLGE_PTP_MSG0_V2_EVENT = 0xF,
+};
+
+#define HCLGE_PTP_MSG1_V2_DEFAULT	1
+
+struct hclge_ptp_cfg_cmd {
+#define HCLGE_PTP_EN_B			BIT(0)
+#define HCLGE_PTP_TX_EN_B		BIT(1)
+#define HCLGE_PTP_RX_EN_B		BIT(2)
+#define HCLGE_PTP_UDP_EN_SHIFT		3
+#define HCLGE_PTP_UDP_EN_MASK		GENMASK(4, 3)
+#define HCLGE_PTP_MSG_TYPE_SHIFT	8
+#define HCLGE_PTP_MSG_TYPE_MASK		GENMASK(9, 8)
+#define HCLGE_PTP_MSG1_SHIFT		16
+#define HCLGE_PTP_MSG1_MASK		GENMASK(19, 16)
+#define HCLGE_PTP_MSG0_SHIFT		24
+#define HCLGE_PTP_MSG0_MASK		GENMASK(27, 24)
+
+	__le32 cfg;
+	u8 rsvd[20];
+};
+
+static inline struct hclge_dev *hclge_ptp_get_hdev(struct ptp_clock_info *info)
+{
+	struct hclge_ptp *ptp = container_of(info, struct hclge_ptp, info);
+
+	return ptp->hdev;
+}
+
+bool hclge_ptp_set_tx_info(struct hnae3_handle *handle, struct sk_buff *skb);
+void hclge_ptp_clean_tx_hwts(struct hclge_dev *dev);
+void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb,
+			   u32 nsec, u32 sec);
+int hclge_ptp_get_cfg(struct hclge_dev *hdev, struct ifreq *ifr);
+int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr);
+int hclge_ptp_init(struct hclge_dev *hdev);
+void hclge_ptp_uninit(struct hclge_dev *hdev);
+int hclge_ptp_get_ts_info(struct hnae3_handle *handle,
+			  struct ethtool_ts_info *info);
+int hclge_ptp_cfg_qry(struct hclge_dev *hdev, u32 *cfg);
+#endif

From b34c157f0cdd0b9e52c002288ff77b1a553dd438 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 10 Jun 2021 21:38:57 +0800
Subject: [PATCH 1467/3292] net: hns3: add debugfs support for ptp info

Add a debugfs interface for dumping ptp information, which
is helpful for debugging.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 13 ++++-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 55 +++++++++++++++++++
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index f6f524c2ab7b..ba883b0a19f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -276,6 +276,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_MAC_MC,
 	HNAE3_DBG_CMD_MNG_TBL,
 	HNAE3_DBG_CMD_LOOPBACK,
+	HNAE3_DBG_CMD_PTP_INFO,
 	HNAE3_DBG_CMD_INTERRUPT_INFO,
 	HNAE3_DBG_CMD_RESET_INFO,
 	HNAE3_DBG_CMD_IMP_INFO,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index a0edca848392..c512a63c423b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -316,6 +316,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "ptp_info",
+		.cmd = HNAE3_DBG_CMD_PTP_INFO,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
@@ -1059,8 +1066,10 @@ int hns3_dbg_init(struct hnae3_handle *handle)
 					   handle->hnae3_dbgfs);
 
 	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) {
-		if (hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES &&
-		    ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2)
+		if ((hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES &&
+		     ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) ||
+		    (hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_PTP_INFO &&
+		     !test_bit(HNAE3_DEV_SUPPORT_PTP_B, ae_dev->caps)))
 			continue;
 
 		if (!hns3_dbg_cmd[i].init) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 0d433a5ff807..6fc50d09b9db 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -2173,6 +2173,57 @@ static int hclge_dbg_dump_vlan_config(struct hclge_dev *hdev, char *buf,
 	return hclge_dbg_dump_vlan_offload_config(hdev, buf, len, &pos);
 }
 
+static int hclge_dbg_dump_ptp_info(struct hclge_dev *hdev, char *buf, int len)
+{
+	struct hclge_ptp *ptp = hdev->ptp;
+	u32 sw_cfg = ptp->ptp_cfg;
+	unsigned int tx_start;
+	unsigned int last_rx;
+	int pos = 0;
+	u32 hw_cfg;
+	int ret;
+
+	pos += scnprintf(buf + pos, len - pos, "phc %s's debug info:\n",
+			 ptp->info.name);
+	pos += scnprintf(buf + pos, len - pos, "ptp enable: %s\n",
+			 test_bit(HCLGE_PTP_FLAG_EN, &ptp->flags) ?
+			 "yes" : "no");
+	pos += scnprintf(buf + pos, len - pos, "ptp tx enable: %s\n",
+			 test_bit(HCLGE_PTP_FLAG_TX_EN, &ptp->flags) ?
+			 "yes" : "no");
+	pos += scnprintf(buf + pos, len - pos, "ptp rx enable: %s\n",
+			 test_bit(HCLGE_PTP_FLAG_RX_EN, &ptp->flags) ?
+			 "yes" : "no");
+
+	last_rx = jiffies_to_msecs(ptp->last_rx);
+	pos += scnprintf(buf + pos, len - pos, "last rx time: %lu.%lu\n",
+			 last_rx / MSEC_PER_SEC, last_rx % MSEC_PER_SEC);
+	pos += scnprintf(buf + pos, len - pos, "rx count: %lu\n", ptp->rx_cnt);
+
+	tx_start = jiffies_to_msecs(ptp->tx_start);
+	pos += scnprintf(buf + pos, len - pos, "last tx start time: %lu.%lu\n",
+			 tx_start / MSEC_PER_SEC, tx_start % MSEC_PER_SEC);
+	pos += scnprintf(buf + pos, len - pos, "tx count: %lu\n", ptp->tx_cnt);
+	pos += scnprintf(buf + pos, len - pos, "tx skipped count: %lu\n",
+			 ptp->tx_skipped);
+	pos += scnprintf(buf + pos, len - pos, "tx timeout count: %lu\n",
+			 ptp->tx_timeout);
+	pos += scnprintf(buf + pos, len - pos, "last tx seqid: %u\n",
+			 ptp->last_tx_seqid);
+
+	ret = hclge_ptp_cfg_qry(hdev, &hw_cfg);
+	if (ret)
+		return ret;
+
+	pos += scnprintf(buf + pos, len - pos, "sw_cfg: %#x, hw_cfg: %#x\n",
+			 sw_cfg, hw_cfg);
+
+	pos += scnprintf(buf + pos, len - pos, "tx type: %d, rx filter: %d\n",
+			 ptp->ts_cfg.tx_type, ptp->ts_cfg.rx_filter);
+
+	return 0;
+}
+
 static int hclge_dbg_dump_mac_uc(struct hclge_dev *hdev, char *buf, int len)
 {
 	hclge_dbg_dump_mac_list(hdev, buf, len, true);
@@ -2244,6 +2295,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_LOOPBACK,
 		.dbg_dump = hclge_dbg_dump_loopback,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_PTP_INFO,
+		.dbg_dump = hclge_dbg_dump_ptp_info,
+	},
 	{
 		.cmd = HNAE3_DBG_CMD_INTERRUPT_INFO,
 		.dbg_dump = hclge_dbg_dump_interrupt,

From 29305260d29ec4ed9a657af818f2744a6ee09913 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:22 +0300
Subject: [PATCH 1468/3292] net: dsa: sja1105: enable the TTEthernet engine on
 SJA1110

As opposed to SJA1105 where there are parts with TTEthernet and parts
without, in SJA1110 all parts support it, but it must be enabled in the
static config. So enable it unconditionally. We use it for the tc-taprio
and tc-gate offload.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 3b031864ad74..de132a7a4a7a 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -673,6 +673,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		 */
 		.tpid = ETH_P_SJA1105,
 		.tpid2 = ETH_P_SJA1105,
+		/* Enable the TTEthernet engine on SJA1110 */
+		.tte_en = true,
 	};
 	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;

From 6c0de59b3d735f4c8c704dae30db540204b496ec Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:23 +0300
Subject: [PATCH 1469/3292] net: dsa: sja1105: allow RX timestamps to be taken
 on all ports for SJA1110

On SJA1105, there is support for a cascade port which is presumably
connected to a downstream SJA1105 switch. The upstream one does not take
PTP timestamps for packets received on this port, presumably because the
downstream switch already did (and for PTP, it only makes sense for the
leaf nodes in a DSA switch tree to do that).

I haven't been able to validate that feature in a fully assembled setup,
so I am disabling the feature by setting the cascade port to an unused
port value (ds->num_ports).

In SJA1110, multiple cascade ports are supported, and CASC_PORT became
a bit mask from a port number. So when CASC_PORT is set to ds->num_ports
(which is 11 on SJA1110), it is actually set to 0b1011, so ports 3, 1
and 0 are configured as cascade ports and we cannot take RX timestamps
on them.

So we need to introduce a check for SJA1110 and set things differently
(to zero there), so that the cascading feature is properly disabled and
RX timestamps can be taken on all ports.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  1 +
 drivers/net/dsa/sja1105/sja1105_main.c | 27 ++++++++++++++++----------
 drivers/net/dsa/sja1105/sja1105_spi.c  |  4 ++++
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index f762f5488a76..4d192331754c 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -109,6 +109,7 @@ struct sja1105_info {
 	int num_cbs_shapers;
 	int max_frame_mem;
 	int num_ports;
+	bool multiple_cascade_ports;
 	const struct sja1105_dynamic_table_ops *dyn_ops;
 	const struct sja1105_table_ops *static_ops;
 	const struct sja1105_regs *regs;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index de132a7a4a7a..850bbc793369 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -654,14 +654,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		.host_port = priv->ds->num_ports,
 		/* Default to an invalid value */
 		.mirr_port = priv->ds->num_ports,
-		/* Link-local traffic received on casc_port will be forwarded
-		 * to host_port without embedding the source port and device ID
-		 * info in the destination MAC address (presumably because it
-		 * is a cascaded port and a downstream SJA switch already did
-		 * that). Default to an invalid port (to disable the feature)
-		 * and overwrite this if we find any DSA (cascaded) ports.
-		 */
-		.casc_port = priv->ds->num_ports,
 		/* No TTEthernet */
 		.vllupformat = SJA1105_VL_FORMAT_PSFP,
 		.vlmarker = 0,
@@ -676,6 +668,7 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		/* Enable the TTEthernet engine on SJA1110 */
 		.tte_en = true,
 	};
+	struct sja1105_general_params_entry *general_params;
 	struct dsa_switch *ds = priv->ds;
 	struct sja1105_table *table;
 	int port;
@@ -701,12 +694,26 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 
 	table->entry_count = table->ops->max_entry_count;
 
+	general_params = table->entries;
+
 	/* This table only has a single entry */
-	((struct sja1105_general_params_entry *)table->entries)[0] =
-				default_general_params;
+	general_params[0] = default_general_params;
 
 	sja1110_select_tdmaconfigidx(priv);
 
+	/* Link-local traffic received on casc_port will be forwarded
+	 * to host_port without embedding the source port and device ID
+	 * info in the destination MAC address, and no RX timestamps will be
+	 * taken either (presumably because it is a cascaded port and a
+	 * downstream SJA switch already did that).
+	 * To disable the feature, we need to do different things depending on
+	 * switch generation. On SJA1105 we need to set an invalid port, while
+	 * on SJA1110 which support multiple cascaded ports, this field is a
+	 * bitmask so it must be left zero.
+	 */
+	if (!priv->info->multiple_cascade_ports)
+		general_params->casc_port = ds->num_ports;
+
 	return 0;
 }
 
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 54ecb5565761..e6c2a37aa617 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -763,6 +763,7 @@ const struct sja1105_info sja1110a_info = {
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
 	.can_limit_mcast_flood	= true,
+	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,
@@ -808,6 +809,7 @@ const struct sja1105_info sja1110b_info = {
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
 	.can_limit_mcast_flood	= true,
+	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,
@@ -853,6 +855,7 @@ const struct sja1105_info sja1110c_info = {
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
 	.can_limit_mcast_flood	= true,
+	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,
@@ -898,6 +901,7 @@ const struct sja1105_info sja1110d_info = {
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
 	.can_limit_mcast_flood	= true,
+	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.max_frame_mem		= SJA1110_MAX_FRAME_MEMORY,

From 4e50025129efabb07714c1f27a80526897da374b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:24 +0300
Subject: [PATCH 1470/3292] net: dsa: generalize overhead for taggers that use
 both headers and trailers

Some really really weird switches just couldn't decide whether to use a
normal or a tail tagger, so they just did both.

This creates problems for DSA, because we only have the concept of an
'overhead' which can be applied to the headroom or to the tailroom of
the skb (like for example during the central TX reallocation procedure),
depending on the value of bool tail_tag, but not to both.

We need to generalize DSA to cater for these odd switches by
transforming the 'overhead / tail_tag' pair into 'needed_headroom /
needed_tailroom'.

The DSA master's MTU is increased to account for both.

The flow dissector code is modified such that it only calls the DSA
adjustment callback if the tagger has a non-zero header length.

Taggers are trivially modified to declare either needed_headroom or
needed_tailroom, based on the tail_tag value that they currently
declare.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 21 +++++++++++----------
 include/net/dsa.h                    |  6 +++---
 net/core/flow_dissector.c            |  2 +-
 net/dsa/dsa_priv.h                   |  5 +++++
 net/dsa/master.c                     |  6 ++++--
 net/dsa/slave.c                      | 10 ++++------
 net/dsa/tag_ar9331.c                 |  2 +-
 net/dsa/tag_brcm.c                   |  6 +++---
 net/dsa/tag_dsa.c                    |  4 ++--
 net/dsa/tag_gswip.c                  |  2 +-
 net/dsa/tag_hellcreek.c              |  3 +--
 net/dsa/tag_ksz.c                    |  9 +++------
 net/dsa/tag_lan9303.c                |  2 +-
 net/dsa/tag_mtk.c                    |  2 +-
 net/dsa/tag_ocelot.c                 |  4 ++--
 net/dsa/tag_ocelot_8021q.c           |  2 +-
 net/dsa/tag_qca.c                    |  2 +-
 net/dsa/tag_rtl4_a.c                 |  2 +-
 net/dsa/tag_sja1105.c                |  2 +-
 net/dsa/tag_trailer.c                |  3 +--
 net/dsa/tag_xrs700x.c                |  3 +--
 21 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 8688009514cc..20baacf2bc5c 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -93,14 +93,15 @@ A tagging protocol may tag all packets with switch tags of the same length, or
 the tag length might vary (for example packets with PTP timestamps might
 require an extended switch tag, or there might be one tag length on TX and a
 different one on RX). Either way, the tagging protocol driver must populate the
-``struct dsa_device_ops::overhead`` with the length in octets of the longest
-switch frame header. The DSA framework will automatically adjust the MTU of the
-master interface to accomodate for this extra size in order for DSA user ports
-to support the standard MTU (L2 payload length) of 1500 octets. The ``overhead``
-is also used to request from the network stack, on a best-effort basis, the
-allocation of packets with a ``needed_headroom`` or ``needed_tailroom``
-sufficient such that the act of pushing the switch tag on transmission of a
-packet does not cause it to reallocate due to lack of memory.
+``struct dsa_device_ops::needed_headroom`` and/or ``struct dsa_device_ops::needed_tailroom``
+with the length in octets of the longest switch frame header/trailer. The DSA
+framework will automatically adjust the MTU of the master interface to
+accommodate for this extra size in order for DSA user ports to support the
+standard MTU (L2 payload length) of 1500 octets. The ``needed_headroom`` and
+``needed_tailroom`` properties are also used to request from the network stack,
+on a best-effort basis, the allocation of packets with enough extra space such
+that the act of pushing the switch tag on transmission of a packet does not
+cause it to reallocate due to lack of memory.
 
 Even though applications are not expected to parse DSA-specific frame headers,
 the format on the wire of the tagging protocol represents an Application Binary
@@ -169,8 +170,8 @@ The job of this method is to prepare the skb in a way that the switch will
 understand what egress port the packet is for (and not deliver it towards other
 ports). Typically this is fulfilled by pushing a frame header. Checking for
 insufficient size in the skb headroom or tailroom is unnecessary provided that
-the ``overhead`` and ``tail_tag`` properties were filled out properly, because
-DSA ensures there is enough space before calling this method.
+the ``needed_headroom`` and ``needed_tailroom`` properties were filled out
+properly, because DSA ensures there is enough space before calling this method.
 
 The reception of a packet goes through the tagger's ``rcv`` function. The
 passed ``struct sk_buff *skb`` has ``skb->data`` pointing at
diff --git a/include/net/dsa.h b/include/net/dsa.h
index e1a2610a0e06..0a10f6fffc3d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -91,7 +91,8 @@ struct dsa_device_ops {
 	 * as regular on the master net device.
 	 */
 	bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
-	unsigned int overhead;
+	unsigned int needed_headroom;
+	unsigned int needed_tailroom;
 	const char *name;
 	enum dsa_tag_protocol proto;
 	/* Some tagging protocols either mangle or shift the destination MAC
@@ -100,7 +101,6 @@ struct dsa_device_ops {
 	 * its RX filter.
 	 */
 	bool promisc_on_master;
-	bool tail_tag;
 };
 
 /* This structure defines the control interfaces that are overlayed by the
@@ -926,7 +926,7 @@ static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb,
 {
 #if IS_ENABLED(CONFIG_NET_DSA)
 	const struct dsa_device_ops *ops = skb->dev->dsa_ptr->tag_ops;
-	int tag_len = ops->overhead;
+	int tag_len = ops->needed_headroom;
 
 	*offset = tag_len;
 	*proto = ((__be16 *)skb->data)[(tag_len / 2) - 1];
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3ed7c98a98e1..c04455981c1e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -944,7 +944,7 @@ bool __skb_flow_dissect(const struct net *net,
 
 			ops = skb->dev->dsa_ptr->tag_ops;
 			/* Tail taggers don't break flow dissection */
-			if (!ops->tail_tag) {
+			if (!ops->needed_headroom) {
 				if (ops->flow_dissect)
 					ops->flow_dissect(skb, &proto, &offset);
 				else
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 92282de54230..b8b17474b72b 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -154,6 +154,11 @@ const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
 bool dsa_schedule_work(struct work_struct *work);
 const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
 
+static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
+{
+	return ops->needed_headroom + ops->needed_tailroom;
+}
+
 /* master.c */
 int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
 void dsa_master_teardown(struct net_device *dev);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 63adbc21a735..3fc90e36772d 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -346,10 +346,12 @@ static struct lock_class_key dsa_master_addr_list_lock_key;
 
 int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 {
-	int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead;
+	const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops;
 	struct dsa_switch *ds = cpu_dp->ds;
 	struct device_link *consumer_link;
-	int ret;
+	int mtu, ret;
+
+	mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops);
 
 	/* The DSA master must use SET_NETDEV_DEV for this to work. */
 	consumer_link = device_link_add(ds->dev, dev->dev.parent,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d4756b920108..3ca509eb284d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1569,7 +1569,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 
 	mtu_limit = min_t(int, master->max_mtu, dev->max_mtu);
 	old_master_mtu = master->mtu;
-	new_master_mtu = largest_mtu + cpu_dp->tag_ops->overhead;
+	new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops);
 	if (new_master_mtu > mtu_limit)
 		return -ERANGE;
 
@@ -1605,7 +1605,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 out_port_failed:
 	if (new_master_mtu != old_master_mtu)
 		dsa_port_mtu_change(cpu_dp, old_master_mtu -
-				    cpu_dp->tag_ops->overhead,
+				    dsa_tag_protocol_overhead(cpu_dp->tag_ops),
 				    true);
 out_cpu_failed:
 	if (new_master_mtu != old_master_mtu)
@@ -1824,10 +1824,8 @@ void dsa_slave_setup_tagger(struct net_device *slave)
 	const struct dsa_port *cpu_dp = dp->cpu_dp;
 	struct net_device *master = cpu_dp->master;
 
-	if (cpu_dp->tag_ops->tail_tag)
-		slave->needed_tailroom = cpu_dp->tag_ops->overhead;
-	else
-		slave->needed_headroom = cpu_dp->tag_ops->overhead;
+	slave->needed_headroom = cpu_dp->tag_ops->needed_headroom;
+	slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom;
 	/* Try to save one extra realloc later in the TX path (in the master)
 	 * by also inheriting the master's needed headroom and tailroom.
 	 * The 8021q driver also does this.
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 002cf7f952e2..0efae1a372b3 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -85,7 +85,7 @@ static const struct dsa_device_ops ar9331_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_AR9331,
 	.xmit	= ar9331_tag_xmit,
 	.rcv	= ar9331_tag_rcv,
-	.overhead = AR9331_HDR_LEN,
+	.needed_headroom = AR9331_HDR_LEN,
 };
 
 MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 40e9f3098c8d..0750af951fc9 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -205,7 +205,7 @@ static const struct dsa_device_ops brcm_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_BRCM,
 	.xmit	= brcm_tag_xmit,
 	.rcv	= brcm_tag_rcv,
-	.overhead = BRCM_TAG_LEN,
+	.needed_headroom = BRCM_TAG_LEN,
 };
 
 DSA_TAG_DRIVER(brcm_netdev_ops);
@@ -286,7 +286,7 @@ static const struct dsa_device_ops brcm_legacy_netdev_ops = {
 	.proto = DSA_TAG_PROTO_BRCM_LEGACY,
 	.xmit = brcm_leg_tag_xmit,
 	.rcv = brcm_leg_tag_rcv,
-	.overhead = BRCM_LEG_TAG_LEN,
+	.needed_headroom = BRCM_LEG_TAG_LEN,
 };
 
 DSA_TAG_DRIVER(brcm_legacy_netdev_ops);
@@ -314,7 +314,7 @@ static const struct dsa_device_ops brcm_prepend_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_BRCM_PREPEND,
 	.xmit	= brcm_tag_xmit_prepend,
 	.rcv	= brcm_tag_rcv_prepend,
-	.overhead = BRCM_TAG_LEN,
+	.needed_headroom = BRCM_TAG_LEN,
 };
 
 DSA_TAG_DRIVER(brcm_prepend_netdev_ops);
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 7e7b7decdf39..a822355afc90 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -303,7 +303,7 @@ static const struct dsa_device_ops dsa_netdev_ops = {
 	.proto	  = DSA_TAG_PROTO_DSA,
 	.xmit	  = dsa_xmit,
 	.rcv	  = dsa_rcv,
-	.overhead = DSA_HLEN,
+	.needed_headroom = DSA_HLEN,
 };
 
 DSA_TAG_DRIVER(dsa_netdev_ops);
@@ -346,7 +346,7 @@ static const struct dsa_device_ops edsa_netdev_ops = {
 	.proto	  = DSA_TAG_PROTO_EDSA,
 	.xmit	  = edsa_xmit,
 	.rcv	  = edsa_rcv,
-	.overhead = EDSA_HLEN,
+	.needed_headroom = EDSA_HLEN,
 };
 
 DSA_TAG_DRIVER(edsa_netdev_ops);
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index 2f5bd5e338ab..5985dab06ab8 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -103,7 +103,7 @@ static const struct dsa_device_ops gswip_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_GSWIP,
 	.xmit = gswip_tag_xmit,
 	.rcv = gswip_tag_rcv,
-	.overhead = GSWIP_RX_HEADER_LEN,
+	.needed_headroom = GSWIP_RX_HEADER_LEN,
 };
 
 MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index a09805c8e1ab..424130f85f59 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -54,8 +54,7 @@ static const struct dsa_device_ops hellcreek_netdev_ops = {
 	.proto	  = DSA_TAG_PROTO_HELLCREEK,
 	.xmit	  = hellcreek_xmit,
 	.rcv	  = hellcreek_rcv,
-	.overhead = HELLCREEK_TAG_LEN,
-	.tail_tag = true,
+	.needed_tailroom = HELLCREEK_TAG_LEN,
 };
 
 MODULE_LICENSE("Dual MIT/GPL");
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 4820dbcedfa2..53565f48934c 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -77,8 +77,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_KSZ8795,
 	.xmit	= ksz8795_xmit,
 	.rcv	= ksz8795_rcv,
-	.overhead = KSZ_INGRESS_TAG_LEN,
-	.tail_tag = true,
+	.needed_tailroom = KSZ_INGRESS_TAG_LEN,
 };
 
 DSA_TAG_DRIVER(ksz8795_netdev_ops);
@@ -149,8 +148,7 @@ static const struct dsa_device_ops ksz9477_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_KSZ9477,
 	.xmit	= ksz9477_xmit,
 	.rcv	= ksz9477_rcv,
-	.overhead = KSZ9477_INGRESS_TAG_LEN,
-	.tail_tag = true,
+	.needed_tailroom = KSZ9477_INGRESS_TAG_LEN,
 };
 
 DSA_TAG_DRIVER(ksz9477_netdev_ops);
@@ -183,8 +181,7 @@ static const struct dsa_device_ops ksz9893_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_KSZ9893,
 	.xmit	= ksz9893_xmit,
 	.rcv	= ksz9477_rcv,
-	.overhead = KSZ_INGRESS_TAG_LEN,
-	.tail_tag = true,
+	.needed_tailroom = KSZ_INGRESS_TAG_LEN,
 };
 
 DSA_TAG_DRIVER(ksz9893_netdev_ops);
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index aa1318dccaf0..26207ef39ebc 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -125,7 +125,7 @@ static const struct dsa_device_ops lan9303_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_LAN9303,
 	.xmit = lan9303_xmit,
 	.rcv = lan9303_rcv,
-	.overhead = LAN9303_TAG_LEN,
+	.needed_headroom = LAN9303_TAG_LEN,
 };
 
 MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index f9b2966d1936..cc3ba864ad5b 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -102,7 +102,7 @@ static const struct dsa_device_ops mtk_netdev_ops = {
 	.proto		= DSA_TAG_PROTO_MTK,
 	.xmit		= mtk_tag_xmit,
 	.rcv		= mtk_tag_rcv,
-	.overhead	= MTK_HDR_LEN,
+	.needed_headroom = MTK_HDR_LEN,
 };
 
 MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 91f0fd1242cd..190f4bfd3bef 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -143,7 +143,7 @@ static const struct dsa_device_ops ocelot_netdev_ops = {
 	.proto			= DSA_TAG_PROTO_OCELOT,
 	.xmit			= ocelot_xmit,
 	.rcv			= ocelot_rcv,
-	.overhead		= OCELOT_TOTAL_TAG_LEN,
+	.needed_headroom	= OCELOT_TOTAL_TAG_LEN,
 	.promisc_on_master	= true,
 };
 
@@ -155,7 +155,7 @@ static const struct dsa_device_ops seville_netdev_ops = {
 	.proto			= DSA_TAG_PROTO_SEVILLE,
 	.xmit			= seville_xmit,
 	.rcv			= ocelot_rcv,
-	.overhead		= OCELOT_TOTAL_TAG_LEN,
+	.needed_headroom	= OCELOT_TOTAL_TAG_LEN,
 	.promisc_on_master	= true,
 };
 
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 62a93303bd63..663b74793cfc 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -73,7 +73,7 @@ static const struct dsa_device_ops ocelot_8021q_netdev_ops = {
 	.proto			= DSA_TAG_PROTO_OCELOT_8021Q,
 	.xmit			= ocelot_xmit,
 	.rcv			= ocelot_rcv,
-	.overhead		= VLAN_HLEN,
+	.needed_headroom	= VLAN_HLEN,
 	.promisc_on_master	= true,
 };
 
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 88181b52f480..693bda013065 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -91,7 +91,7 @@ static const struct dsa_device_ops qca_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_QCA,
 	.xmit	= qca_tag_xmit,
 	.rcv	= qca_tag_rcv,
-	.overhead = QCA_HDR_LEN,
+	.needed_headroom = QCA_HDR_LEN,
 };
 
 MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index cf8ac316f4c7..57c46b4ab2b3 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -124,7 +124,7 @@ static const struct dsa_device_ops rtl4a_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_RTL4_A,
 	.xmit	= rtl4a_tag_xmit,
 	.rcv	= rtl4a_tag_rcv,
-	.overhead = RTL4_A_HDR_LEN,
+	.needed_headroom = RTL4_A_HDR_LEN,
 };
 module_dsa_tag_driver(rtl4a_netdev_ops);
 
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 50496013cdb7..ff4a81eae16f 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -362,7 +362,7 @@ static const struct dsa_device_ops sja1105_netdev_ops = {
 	.xmit = sja1105_xmit,
 	.rcv = sja1105_rcv,
 	.filter = sja1105_filter,
-	.overhead = VLAN_HLEN,
+	.needed_headroom = VLAN_HLEN,
 	.flow_dissect = sja1105_flow_dissect,
 	.promisc_on_master = true,
 };
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 5b97ede56a0f..ba73804340a5 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -55,8 +55,7 @@ static const struct dsa_device_ops trailer_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_TRAILER,
 	.xmit	= trailer_xmit,
 	.rcv	= trailer_rcv,
-	.overhead = 4,
-	.tail_tag = true,
+	.needed_tailroom = 4,
 };
 
 MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
index 858cdf9d2913..a31ff7fcb45f 100644
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -56,8 +56,7 @@ static const struct dsa_device_ops xrs700x_netdev_ops = {
 	.proto	= DSA_TAG_PROTO_XRS700X,
 	.xmit	= xrs700x_xmit,
 	.rcv	= xrs700x_rcv,
-	.overhead = 1,
-	.tail_tag = true,
+	.needed_tailroom = 1,
 };
 
 MODULE_LICENSE("GPL");

From baa3ad08de6d44a40b94ef1a65640b5076755f9d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:25 +0300
Subject: [PATCH 1471/3292] net: dsa: tag_sja1105: stop resetting network and
 transport headers

This makes no sense and is not needed, it is probably a debugging
leftover.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_sja1105.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index ff4a81eae16f..92e147293acf 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -307,8 +307,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 			__skb_vlan_pop(skb, &tci);
 		}
 		skb_pull_rcsum(skb, ETH_HLEN);
-		skb_reset_network_header(skb);
-		skb_reset_transport_header(skb);
 
 		vid = tci & VLAN_VID_MASK;
 		source_port = dsa_8021q_rx_source_port(vid);

From ab6a303c5440156dd475b5884cff26a7245630f8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:26 +0300
Subject: [PATCH 1472/3292] net: dsa: tag_8021q: remove shim declarations

All users of tag_8021q select it in Kconfig, so shim functions are not
needed because it is not possible for it to be disabled and its callers
enabled.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h | 76 ---------------------------------------
 1 file changed, 76 deletions(-)

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index b12b05f1c8b4..cbf2c9b1ee4f 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -37,8 +37,6 @@ struct dsa_8021q_context {
 
 #define DSA_8021Q_N_SUBVLAN			8
 
-#if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q)
-
 int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled);
 
 int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
@@ -70,78 +68,4 @@ bool vid_is_dsa_8021q_txvlan(u16 vid);
 
 bool vid_is_dsa_8021q(u16 vid);
 
-#else
-
-int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled)
-{
-	return 0;
-}
-
-int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
-				    struct dsa_8021q_context *other_ctx,
-				    int other_port)
-{
-	return 0;
-}
-
-int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
-				     struct dsa_8021q_context *other_ctx,
-				     int other_port)
-{
-	return 0;
-}
-
-struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
-			       u16 tpid, u16 tci)
-{
-	return NULL;
-}
-
-u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port)
-{
-	return 0;
-}
-
-u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
-{
-	return 0;
-}
-
-u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan)
-{
-	return 0;
-}
-
-int dsa_8021q_rx_switch_id(u16 vid)
-{
-	return 0;
-}
-
-int dsa_8021q_rx_source_port(u16 vid)
-{
-	return 0;
-}
-
-u16 dsa_8021q_rx_subvlan(u16 vid)
-{
-	return 0;
-}
-
-bool vid_is_dsa_8021q_rxvlan(u16 vid)
-{
-	return false;
-}
-
-bool vid_is_dsa_8021q_txvlan(u16 vid)
-{
-	return false;
-}
-
-bool vid_is_dsa_8021q(u16 vid)
-{
-	return false;
-}
-
-#endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */
-
 #endif /* _NET_DSA_8021Q_H */

From 233697b3b3f60b17d02ca2a35230aee0ac6f1759 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:27 +0300
Subject: [PATCH 1473/3292] net: dsa: tag_8021q: refactor RX VLAN parsing into
 a dedicated function

The added value of this function is that it can deal with both the case
where the VLAN header is in the skb head, as well as in the offload field.
This is something I was not able to do using other functions in the
network stack.

Since both ocelot-8021q and sja1105 need to do the same stuff, let's
make it a common service provided by tag_8021q.

This is done as refactoring for the new SJA1110 tagger, which partly
uses tag_8021q as well (just like SJA1105), and will be the third caller.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h  |  3 +++
 net/dsa/tag_8021q.c        | 23 +++++++++++++++++++++++
 net/dsa/tag_ocelot_8021q.c | 18 ++----------------
 net/dsa/tag_sja1105.c      | 33 +++++++++++----------------------
 4 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index cbf2c9b1ee4f..1587961f1a7b 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -50,6 +50,9 @@ int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 			       u16 tpid, u16 tci);
 
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
+		   int *subvlan);
+
 u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port);
 
 u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port);
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 122ad5833fb1..4aa29f90ecea 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -471,4 +471,27 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
 
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
+		   int *subvlan)
+{
+	u16 vid, tci;
+
+	skb_push_rcsum(skb, ETH_HLEN);
+	if (skb_vlan_tag_present(skb)) {
+		tci = skb_vlan_tag_get(skb);
+		__vlan_hwaccel_clear_tag(skb);
+	} else {
+		__skb_vlan_pop(skb, &tci);
+	}
+	skb_pull_rcsum(skb, ETH_HLEN);
+
+	vid = tci & VLAN_VID_MASK;
+
+	*source_port = dsa_8021q_rx_source_port(vid);
+	*switch_id = dsa_8021q_rx_switch_id(vid);
+	*subvlan = dsa_8021q_rx_subvlan(vid);
+	skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
+
 MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 663b74793cfc..85ac85c3af8c 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -41,29 +41,15 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 				  struct net_device *netdev,
 				  struct packet_type *pt)
 {
-	int src_port, switch_id, qos_class;
-	u16 vid, tci;
+	int src_port, switch_id, subvlan;
 
-	skb_push_rcsum(skb, ETH_HLEN);
-	if (skb_vlan_tag_present(skb)) {
-		tci = skb_vlan_tag_get(skb);
-		__vlan_hwaccel_clear_tag(skb);
-	} else {
-		__skb_vlan_pop(skb, &tci);
-	}
-	skb_pull_rcsum(skb, ETH_HLEN);
-
-	vid = tci & VLAN_VID_MASK;
-	src_port = dsa_8021q_rx_source_port(vid);
-	switch_id = dsa_8021q_rx_switch_id(vid);
-	qos_class = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+	dsa_8021q_rcv(skb, &src_port, &switch_id, &subvlan);
 
 	skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
 	if (!skb->dev)
 		return NULL;
 
 	skb->offload_fwd_mark = 1;
-	skb->priority = qos_class;
 
 	return skb;
 }
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 92e147293acf..a70625fe64f7 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -275,44 +275,33 @@ static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan)
 	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
 }
 
+static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
+{
+	u16 tpid = ntohs(eth_hdr(skb)->h_proto);
+
+	return tpid == ETH_P_SJA1105 || tpid == ETH_P_8021Q ||
+	       skb_vlan_tag_present(skb);
+}
+
 static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 				   struct net_device *netdev,
 				   struct packet_type *pt)
 {
+	int source_port, switch_id, subvlan = 0;
 	struct sja1105_meta meta = {0};
-	int source_port, switch_id;
 	struct ethhdr *hdr;
-	u16 tpid, vid, tci;
 	bool is_link_local;
-	u16 subvlan = 0;
-	bool is_tagged;
 	bool is_meta;
 
 	hdr = eth_hdr(skb);
-	tpid = ntohs(hdr->h_proto);
-	is_tagged = (tpid == ETH_P_SJA1105 || tpid == ETH_P_8021Q ||
-		     skb_vlan_tag_present(skb));
 	is_link_local = sja1105_is_link_local(skb);
 	is_meta = sja1105_is_meta_frame(skb);
 
 	skb->offload_fwd_mark = 1;
 
-	if (is_tagged) {
+	if (sja1105_skb_has_tag_8021q(skb)) {
 		/* Normal traffic path. */
-		skb_push_rcsum(skb, ETH_HLEN);
-		if (skb_vlan_tag_present(skb)) {
-			tci = skb_vlan_tag_get(skb);
-			__vlan_hwaccel_clear_tag(skb);
-		} else {
-			__skb_vlan_pop(skb, &tci);
-		}
-		skb_pull_rcsum(skb, ETH_HLEN);
-
-		vid = tci & VLAN_VID_MASK;
-		source_port = dsa_8021q_rx_source_port(vid);
-		switch_id = dsa_8021q_rx_switch_id(vid);
-		skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-		subvlan = dsa_8021q_rx_subvlan(vid);
+		dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
 	} else if (is_link_local) {
 		/* Management traffic path. Switch embeds the switch ID and
 		 * port ID into bytes of the destination MAC, courtesy of

From 617ef8d9377b9aac381c023cd0823da264c2f463 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:28 +0300
Subject: [PATCH 1474/3292] net: dsa: sja1105: make SJA1105_SKB_CB fit a full
 timestamp

In SJA1105, RX timestamps for packets sent to the CPU are transmitted in
separate follow-up packets (metadata frames). These contain partial
timestamps (24 or 32 bits) which are kept in SJA1105_SKB_CB(skb)->meta_tstamp.

Thankfully, SJA1110 improved that, and the RX timestamps are now
transmitted in-band with the actual packet, in the timestamp trailer.
The RX timestamps are now full-width 64 bits.

Because we process the RX DSA tags in the rcv() method in the tagger,
but we would like to preserve the DSA code structure in that we populate
the skb timestamp in the port_rxtstamp() call which only happens later,
the implication is that we must somehow pass the 64-bit timestamp from
the rcv() method all the way to port_rxtstamp(). We can use the skb->cb
for that.

Rename the meta_tstamp from struct sja1105_skb_cb from "meta_tstamp" to
"tstamp", and increase its size to 64 bits.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_ptp.c | 2 +-
 include/linux/dsa/sja1105.h           | 2 +-
 net/dsa/tag_sja1105.c                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 0bc566b9e958..dea82f8a40c4 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -397,7 +397,7 @@ static long sja1105_rxtstamp_work(struct ptp_clock_info *ptp)
 
 		*shwt = (struct skb_shared_hwtstamps) {0};
 
-		ts = SJA1105_SKB_CB(skb)->meta_tstamp;
+		ts = SJA1105_SKB_CB(skb)->tstamp;
 		ts = sja1105_tstamp_reconstruct(ds, ticks, ts);
 
 		shwt->hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts));
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 1eb84562b311..865a548a6ef2 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -48,7 +48,7 @@ struct sja1105_tagger_data {
 
 struct sja1105_skb_cb {
 	struct sk_buff *clone;
-	u32 meta_tstamp;
+	u64 tstamp;
 };
 
 #define SJA1105_SKB_CB(skb) \
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index a70625fe64f7..11f555dd9566 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -147,7 +147,7 @@ static void sja1105_transfer_meta(struct sk_buff *skb,
 
 	hdr->h_dest[3] = meta->dmac_byte_3;
 	hdr->h_dest[4] = meta->dmac_byte_4;
-	SJA1105_SKB_CB(skb)->meta_tstamp = meta->tstamp;
+	SJA1105_SKB_CB(skb)->tstamp = meta->tstamp;
 }
 
 /* This is a simple state machine which follows the hardware mechanism of

From 4913b8ebf8a9c56ce66466b4daa07d7d4678cdd8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:29 +0300
Subject: [PATCH 1475/3292] net: dsa: add support for the SJA1110 native
 tagging protocol

The SJA1110 has improved a few things compared to SJA1105:

- To send a control packet from the host port with SJA1105, one needed
  to program a one-shot "management route" over SPI. This is no longer
  true with SJA1110, you can actually send "in-band control extensions"
  in the packets sent by DSA, these are in fact DSA tags which contain
  the destination port and switch ID.

- When receiving a control packet from the switch with SJA1105, the
  source port and switch ID were written in bytes 3 and 4 of the
  destination MAC address of the frame (which was a very poor shot at a
  DSA header). If the control packet also had an RX timestamp, that
  timestamp was sent in an actual follow-up packet, so there were
  reordering concerns on multi-core/multi-queue DSA masters, where the
  metadata frame with the RX timestamp might get processed before the
  actual packet to which that timestamp belonged (there is no way to
  pair a packet to its timestamp other than the order in which they were
  received). On SJA1110, this is no longer true, control packets have
  the source port, switch ID and timestamp all in the DSA tags.

- Timestamps from the switch were partial: to get a 64-bit timestamp as
  required by PTP stacks, one would need to take the partial 24-bit or
  32-bit timestamp from the packet, then read the current PTP time very
  quickly, and then patch in the high bits of the current PTP time into
  the captured partial timestamp, to reconstruct what the full 64-bit
  timestamp must have been. That is awful because packet processing is
  done in NAPI context, but reading the current PTP time is done over
  SPI and therefore needs sleepable context.

But it also aggravated a few things:

- Not only is there a DSA header in SJA1110, but there is a DSA trailer
  in fact, too. So DSA needs to be extended to support taggers which
  have both a header and a trailer. Very unconventional - my understanding
  is that the trailer exists because the timestamps couldn't be prepared
  in time for putting them in the header area.

- Like SJA1105, not all packets sent to the CPU have the DSA tag added
  to them, only control packets do:

  * the ones which match the destination MAC filters/traps in
    MAC_FLTRES1 and MAC_FLTRES0
  * the ones which match FDB entries which have TRAP or TAKETS bits set

  So we could in theory hack something up to request the switch to take
  timestamps for all packets that reach the CPU, and those would be
  DSA-tagged and contain the source port / switch ID by virtue of the
  fact that there needs to be a timestamp trailer provided. BUT:

- The SJA1110 does not parse its own DSA tags in a way that is useful
  for routing in cross-chip topologies, a la Marvell. And the sja1105
  driver already supports cross-chip bridging from the SJA1105 days.
  It does that by automatically setting up the DSA links as VLAN trunks
  which contain all the necessary tag_8021q RX VLANs that must be
  communicated between the switches that span the same bridge. So when
  using tag_8021q on sja1105, it is possible to have 2 switches with
  ports sw0p0, sw0p1, sw1p0, sw1p1, and 2 VLAN-unaware bridges br0 and
  br1, and br0 can take sw0p0 and sw1p0, and br1 can take sw0p1 and
  sw1p1, and forwarding will happen according to the expected rules of
  the Linux bridge.
  We like that, and we don't want that to go away, so as a matter of
  fact, the SJA1110 tagger still needs to support tag_8021q.

So the sja1110 tagger is a hybrid between tag_8021q for data packets,
and the native hardware support for control packets.

On RX, packets have a 13-byte trailer if they contain an RX timestamp.
That trailer is padded in such a way that its byte 8 (the start of the
"residence time" field - not parsed by Linux because we don't care) is
aligned on a 16 byte boundary. So the padding has a variable length
between 0 and 15 bytes. The DSA header contains the offset of the
beginning of the padding relative to the beginning of the frame (and the
end of the padding is obviously the end of the packet minus 13 bytes,
the length of the trailer). So we discard it.

Packets which don't have a trailer contain the source port and switch ID
information in the header (they are "trap-to-host" packets). Packets
which have a trailer contain the source port and switch ID in the trailer.

On TX, the destination port mask and switch ID is always in the trailer,
so we always need to say in the header that a trailer is present.

The header needs a custom EtherType and this was chosen as 0xdadc, after
0xdada which is for Marvell and 0xdadb which is for VLANs in
VLAN-unaware mode on SJA1105 (and SJA1110 in fact too).

Because we use tag_8021q in concert with the native tagging protocol,
control packets will have 2 DSA tags.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h             |   1 +
 drivers/net/dsa/sja1105/sja1105_main.c        |   6 +-
 drivers/net/dsa/sja1105/sja1105_spi.c         |  10 +
 .../net/dsa/sja1105/sja1105_static_config.c   |   1 +
 .../net/dsa/sja1105/sja1105_static_config.h   |   1 +
 include/linux/dsa/sja1105.h                   |   1 +
 include/net/dsa.h                             |   2 +
 net/dsa/tag_sja1105.c                         | 221 +++++++++++++++++-
 8 files changed, 240 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 4d192331754c..a6d64b27e6a9 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -110,6 +110,7 @@ struct sja1105_info {
 	int max_frame_mem;
 	int num_ports;
 	bool multiple_cascade_ports;
+	enum dsa_tag_protocol tag_proto;
 	const struct sja1105_dynamic_table_ops *dyn_ops;
 	const struct sja1105_table_ops *static_ops;
 	const struct sja1105_regs *regs;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 850bbc793369..6e2cfbf605ef 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -667,6 +667,8 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		.tpid2 = ETH_P_SJA1105,
 		/* Enable the TTEthernet engine on SJA1110 */
 		.tte_en = true,
+		/* Set up the EtherType for control packets on SJA1110 */
+		.header_type = ETH_P_SJA1110,
 	};
 	struct sja1105_general_params_entry *general_params;
 	struct dsa_switch *ds = priv->ds;
@@ -2174,7 +2176,9 @@ static enum dsa_tag_protocol
 sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
 			 enum dsa_tag_protocol mp)
 {
-	return DSA_TAG_PROTO_SJA1105;
+	struct sja1105_private *priv = ds->priv;
+
+	return priv->info->tag_proto;
 }
 
 static int sja1105_find_free_subvlan(u16 *subvlan_map, bool pvid)
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index e6c2a37aa617..9156f4cc11f2 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -569,6 +569,7 @@ const struct sja1105_info sja1105e_info = {
 	.static_ops		= sja1105e_table_ops,
 	.dyn_ops		= sja1105et_dyn_ops,
 	.qinq_tpid		= ETH_P_8021Q,
+	.tag_proto		= DSA_TAG_PROTO_SJA1105,
 	.can_limit_mcast_flood	= false,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
@@ -600,6 +601,7 @@ const struct sja1105_info sja1105t_info = {
 	.static_ops		= sja1105t_table_ops,
 	.dyn_ops		= sja1105et_dyn_ops,
 	.qinq_tpid		= ETH_P_8021Q,
+	.tag_proto		= DSA_TAG_PROTO_SJA1105,
 	.can_limit_mcast_flood	= false,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
@@ -631,6 +633,7 @@ const struct sja1105_info sja1105p_info = {
 	.static_ops		= sja1105p_table_ops,
 	.dyn_ops		= sja1105pqrs_dyn_ops,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1105,
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
@@ -663,6 +666,7 @@ const struct sja1105_info sja1105q_info = {
 	.static_ops		= sja1105q_table_ops,
 	.dyn_ops		= sja1105pqrs_dyn_ops,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1105,
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
@@ -695,6 +699,7 @@ const struct sja1105_info sja1105r_info = {
 	.static_ops		= sja1105r_table_ops,
 	.dyn_ops		= sja1105pqrs_dyn_ops,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1105,
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
@@ -729,6 +734,7 @@ const struct sja1105_info sja1105s_info = {
 	.dyn_ops		= sja1105pqrs_dyn_ops,
 	.regs			= &sja1105pqrs_regs,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1105,
 	.can_limit_mcast_flood	= true,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
@@ -762,6 +768,7 @@ const struct sja1105_info sja1110a_info = {
 	.dyn_ops		= sja1110_dyn_ops,
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1110,
 	.can_limit_mcast_flood	= true,
 	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
@@ -808,6 +815,7 @@ const struct sja1105_info sja1110b_info = {
 	.dyn_ops		= sja1110_dyn_ops,
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1110,
 	.can_limit_mcast_flood	= true,
 	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
@@ -854,6 +862,7 @@ const struct sja1105_info sja1110c_info = {
 	.dyn_ops		= sja1110_dyn_ops,
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1110,
 	.can_limit_mcast_flood	= true,
 	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
@@ -900,6 +909,7 @@ const struct sja1105_info sja1110d_info = {
 	.dyn_ops		= sja1110_dyn_ops,
 	.regs			= &sja1110_regs,
 	.qinq_tpid		= ETH_P_8021AD,
+	.tag_proto		= DSA_TAG_PROTO_SJA1110,
 	.can_limit_mcast_flood	= true,
 	.multiple_cascade_ports	= true,
 	.ptp_ts_bits		= 32,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index eda571819d45..1491b72008f3 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -212,6 +212,7 @@ size_t sja1110_general_params_entry_packing(void *buf, void *entry_ptr,
 	sja1105_packing(buf, &entry->egrmirrdei,   110, 110, size, op);
 	sja1105_packing(buf, &entry->replay_port,  109, 106, size, op);
 	sja1105_packing(buf, &entry->tdmaconfigidx, 70,  67, size, op);
+	sja1105_packing(buf, &entry->header_type,   64,  49, size, op);
 	sja1105_packing(buf, &entry->tte_en,        16,  16, size, op);
 	return size;
 }
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 9bef51791bff..bce0f5c03d0b 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -217,6 +217,7 @@ struct sja1105_general_params_entry {
 	/* SJA1110 only */
 	u64 tte_en;
 	u64 tdmaconfigidx;
+	u64 header_type;
 };
 
 struct sja1105_schedule_entry_points_entry {
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 865a548a6ef2..b02cf7b515ae 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -14,6 +14,7 @@
 
 #define ETH_P_SJA1105				ETH_P_DSA_8021Q
 #define ETH_P_SJA1105_META			0x0008
+#define ETH_P_SJA1110				0xdadc
 
 /* IEEE 802.3 Annex 57A: Slow Protocols PDUs (01:80:C2:xx:xx:xx) */
 #define SJA1105_LINKLOCAL_FILTER_A		0x0180C2000000ull
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 0a10f6fffc3d..289d68e82da0 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -50,6 +50,7 @@ struct phylink_link_state;
 #define DSA_TAG_PROTO_OCELOT_8021Q_VALUE	20
 #define DSA_TAG_PROTO_SEVILLE_VALUE		21
 #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE		22
+#define DSA_TAG_PROTO_SJA1110_VALUE		23
 
 enum dsa_tag_protocol {
 	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
@@ -75,6 +76,7 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_XRS700X		= DSA_TAG_PROTO_XRS700X_VALUE,
 	DSA_TAG_PROTO_OCELOT_8021Q	= DSA_TAG_PROTO_OCELOT_8021Q_VALUE,
 	DSA_TAG_PROTO_SEVILLE		= DSA_TAG_PROTO_SEVILLE_VALUE,
+	DSA_TAG_PROTO_SJA1110		= DSA_TAG_PROTO_SJA1110_VALUE,
 };
 
 struct packet_type;
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 11f555dd9566..37e1d64e07c6 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -7,6 +7,47 @@
 #include <linux/packing.h>
 #include "dsa_priv.h"
 
+/* Is this a TX or an RX header? */
+#define SJA1110_HEADER_HOST_TO_SWITCH		BIT(15)
+
+/* RX header */
+#define SJA1110_RX_HEADER_IS_METADATA		BIT(14)
+#define SJA1110_RX_HEADER_HOST_ONLY		BIT(13)
+#define SJA1110_RX_HEADER_HAS_TRAILER		BIT(12)
+
+/* Trap-to-host format (no trailer present) */
+#define SJA1110_RX_HEADER_SRC_PORT(x)		(((x) & GENMASK(7, 4)) >> 4)
+#define SJA1110_RX_HEADER_SWITCH_ID(x)		((x) & GENMASK(3, 0))
+
+/* Timestamp format (trailer present) */
+#define SJA1110_RX_HEADER_TRAILER_POS(x)	((x) & GENMASK(11, 0))
+
+#define SJA1110_RX_TRAILER_SWITCH_ID(x)		(((x) & GENMASK(7, 4)) >> 4)
+#define SJA1110_RX_TRAILER_SRC_PORT(x)		((x) & GENMASK(3, 0))
+
+/* TX header */
+#define SJA1110_TX_HEADER_UPDATE_TC		BIT(14)
+#define SJA1110_TX_HEADER_TAKE_TS		BIT(13)
+#define SJA1110_TX_HEADER_TAKE_TS_CASC		BIT(12)
+#define SJA1110_TX_HEADER_HAS_TRAILER		BIT(11)
+
+/* Only valid if SJA1110_TX_HEADER_HAS_TRAILER is false */
+#define SJA1110_TX_HEADER_PRIO(x)		(((x) << 7) & GENMASK(10, 7))
+#define SJA1110_TX_HEADER_TSTAMP_ID(x)		((x) & GENMASK(7, 0))
+
+/* Only valid if SJA1110_TX_HEADER_HAS_TRAILER is true */
+#define SJA1110_TX_HEADER_TRAILER_POS(x)	((x) & GENMASK(10, 0))
+
+#define SJA1110_TX_TRAILER_TSTAMP_ID(x)		(((x) << 24) & GENMASK(31, 24))
+#define SJA1110_TX_TRAILER_PRIO(x)		(((x) << 21) & GENMASK(23, 21))
+#define SJA1110_TX_TRAILER_SWITCHID(x)		(((x) << 12) & GENMASK(15, 12))
+#define SJA1110_TX_TRAILER_DESTPORTS(x)		(((x) << 1) & GENMASK(11, 1))
+
+#define SJA1110_HEADER_LEN			4
+#define SJA1110_RX_TRAILER_LEN			13
+#define SJA1110_TX_TRAILER_LEN			4
+#define SJA1110_MAX_PADDING_LEN			15
+
 /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
 static inline bool sja1105_is_link_local(const struct sk_buff *skb)
 {
@@ -140,6 +181,50 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
 			     ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
 }
 
+static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(netdev);
+	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
+	u16 queue_mapping = skb_get_queue_mapping(skb);
+	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+	struct ethhdr *eth_hdr;
+	__be32 *tx_trailer;
+	__be16 *tx_header;
+	int trailer_pos;
+
+	/* Transmitting control packets is done using in-band control
+	 * extensions, while data packets are transmitted using
+	 * tag_8021q TX VLANs.
+	 */
+	if (likely(!sja1105_is_link_local(skb)))
+		return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv),
+				     ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
+
+	skb_push(skb, SJA1110_HEADER_LEN);
+
+	/* Move Ethernet header to the left, making space for DSA tag */
+	memmove(skb->data, skb->data + SJA1110_HEADER_LEN, 2 * ETH_ALEN);
+
+	trailer_pos = skb->len;
+
+	/* On TX, skb->data points to skb_mac_header(skb) */
+	eth_hdr = (struct ethhdr *)skb->data;
+	tx_header = (__be16 *)(eth_hdr + 1);
+	tx_trailer = skb_put(skb, SJA1110_TX_TRAILER_LEN);
+
+	eth_hdr->h_proto = htons(ETH_P_SJA1110);
+
+	*tx_header = htons(SJA1110_HEADER_HOST_TO_SWITCH |
+			   SJA1110_TX_HEADER_HAS_TRAILER |
+			   SJA1110_TX_HEADER_TRAILER_POS(trailer_pos));
+	*tx_trailer = cpu_to_be32(SJA1110_TX_TRAILER_PRIO(pcp) |
+				  SJA1110_TX_TRAILER_SWITCHID(dp->ds->index) |
+				  SJA1110_TX_TRAILER_DESTPORTS(BIT(dp->index)));
+
+	return skb;
+}
+
 static void sja1105_transfer_meta(struct sk_buff *skb,
 				  const struct sja1105_meta *meta)
 {
@@ -283,6 +368,11 @@ static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
 	       skb_vlan_tag_present(skb);
 }
 
+static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
+{
+	return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110;
+}
+
 static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 				   struct net_device *netdev,
 				   struct packet_type *pt)
@@ -333,6 +423,98 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 					      is_meta);
 }
 
+static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
+							    int *source_port,
+							    int *switch_id)
+{
+	u16 rx_header;
+
+	if (unlikely(!pskb_may_pull(skb, SJA1110_HEADER_LEN)))
+		return NULL;
+
+	/* skb->data points to skb_mac_header(skb) + ETH_HLEN, which is exactly
+	 * what we need because the caller has checked the EtherType (which is
+	 * located 2 bytes back) and we just need a pointer to the header that
+	 * comes afterwards.
+	 */
+	rx_header = ntohs(*(__be16 *)skb->data);
+
+	/* Timestamp frame, we have a trailer */
+	if (rx_header & SJA1110_RX_HEADER_HAS_TRAILER) {
+		int start_of_padding = SJA1110_RX_HEADER_TRAILER_POS(rx_header);
+		u8 *rx_trailer = skb_tail_pointer(skb) - SJA1110_RX_TRAILER_LEN;
+		u64 *tstamp = &SJA1105_SKB_CB(skb)->tstamp;
+		u8 last_byte = rx_trailer[12];
+
+		/* The timestamp is unaligned, so we need to use packing()
+		 * to get it
+		 */
+		packing(rx_trailer, tstamp, 63, 0, 8, UNPACK, 0);
+
+		*source_port = SJA1110_RX_TRAILER_SRC_PORT(last_byte);
+		*switch_id = SJA1110_RX_TRAILER_SWITCH_ID(last_byte);
+
+		/* skb->len counts from skb->data, while start_of_padding
+		 * counts from the destination MAC address. Right now skb->data
+		 * is still as set by the DSA master, so to trim away the
+		 * padding and trailer we need to account for the fact that
+		 * skb->data points to skb_mac_header(skb) + ETH_HLEN.
+		 */
+		pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN);
+	/* Trap-to-host frame, no timestamp trailer */
+	} else {
+		*source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header);
+		*switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header);
+	}
+
+	/* Advance skb->data past the DSA header */
+	skb_pull_rcsum(skb, SJA1110_HEADER_LEN);
+
+	/* Remove the DSA header */
+	memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - SJA1110_HEADER_LEN,
+		2 * ETH_ALEN);
+
+	/* With skb->data in its final place, update the MAC header
+	 * so that eth_hdr() continues to works properly.
+	 */
+	skb_set_mac_header(skb, -ETH_HLEN);
+
+	return skb;
+}
+
+static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
+				   struct net_device *netdev,
+				   struct packet_type *pt)
+{
+	int source_port = -1, switch_id = -1, subvlan = 0;
+
+	skb->offload_fwd_mark = 1;
+
+	if (sja1110_skb_has_inband_control_extension(skb)) {
+		skb = sja1110_rcv_inband_control_extension(skb, &source_port,
+							   &switch_id);
+		if (!skb)
+			return NULL;
+	}
+
+	/* Packets with in-band control extensions might still have RX VLANs */
+	if (likely(sja1105_skb_has_tag_8021q(skb)))
+		dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
+
+	skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+	if (!skb->dev) {
+		netdev_warn(netdev,
+			    "Couldn't decode source port %d and switch id %d\n",
+			    source_port, switch_id);
+		return NULL;
+	}
+
+	if (subvlan)
+		sja1105_decode_subvlan(skb, subvlan);
+
+	return skb;
+}
+
 static void sja1105_flow_dissect(const struct sk_buff *skb, __be16 *proto,
 				 int *offset)
 {
@@ -343,6 +525,20 @@ static void sja1105_flow_dissect(const struct sk_buff *skb, __be16 *proto,
 	dsa_tag_generic_flow_dissect(skb, proto, offset);
 }
 
+static void sja1110_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+				 int *offset)
+{
+	/* Management frames have 2 DSA tags on RX, so the needed_headroom we
+	 * declared is fine for the generic dissector adjustment procedure.
+	 */
+	if (unlikely(sja1105_is_link_local(skb)))
+		return dsa_tag_generic_flow_dissect(skb, proto, offset);
+
+	/* For the rest, there is a single DSA tag, the tag_8021q one */
+	*offset = VLAN_HLEN;
+	*proto = ((__be16 *)skb->data)[(VLAN_HLEN / 2) - 1];
+}
+
 static const struct dsa_device_ops sja1105_netdev_ops = {
 	.name = "sja1105",
 	.proto = DSA_TAG_PROTO_SJA1105,
@@ -354,7 +550,28 @@ static const struct dsa_device_ops sja1105_netdev_ops = {
 	.promisc_on_master = true,
 };
 
-MODULE_LICENSE("GPL v2");
+DSA_TAG_DRIVER(sja1105_netdev_ops);
 MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1105);
 
-module_dsa_tag_driver(sja1105_netdev_ops);
+static const struct dsa_device_ops sja1110_netdev_ops = {
+	.name = "sja1110",
+	.proto = DSA_TAG_PROTO_SJA1110,
+	.xmit = sja1110_xmit,
+	.rcv = sja1110_rcv,
+	.filter = sja1105_filter,
+	.flow_dissect = sja1110_flow_dissect,
+	.needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN,
+	.needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN,
+};
+
+DSA_TAG_DRIVER(sja1110_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1110);
+
+static struct dsa_tag_driver *sja1105_tag_driver_array[] = {
+	&DSA_TAG_DRIVER_NAME(sja1105_netdev_ops),
+	&DSA_TAG_DRIVER_NAME(sja1110_netdev_ops),
+};
+
+module_dsa_tag_drivers(sja1105_tag_driver_array);
+
+MODULE_LICENSE("GPL v2");

From 30b73242e679fb6cdee9f00aac6e05278fef48ca Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:30 +0300
Subject: [PATCH 1476/3292] net: dsa: sja1105: add the RX timestamping
 procedure for SJA1110

This is really easy, since the full RX timestamp is in the DSA trailer
and the tagger code transfers it to SJA1105_SKB_CB(skb)->tstamp, we just
need to move it to the skb shared info region. This is as opposed to
SJA1105, where the RX timestamp was received in a meta frame (so there
needed to be a state machine to pair the 2 packets) and the timestamp
was partial (so the packet, once matched with its timestamp, needed to
be added to an RX timestamping queue where the PTP aux worker would
reconstruct that timestamp).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h     |  1 +
 drivers/net/dsa/sja1105/sja1105_ptp.c | 26 +++++++++++++++++++++++---
 drivers/net/dsa/sja1105/sja1105_ptp.h |  6 ++++++
 drivers/net/dsa/sja1105/sja1105_spi.c | 10 ++++++++++
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index a6d64b27e6a9..201bca282884 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -130,6 +130,7 @@ struct sja1105_info {
 			   const unsigned char *addr, u16 vid);
 	void (*ptp_cmd_packing)(u8 *buf, struct sja1105_ptp_cmd *cmd,
 				enum packing_op op);
+	bool (*rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb);
 	int (*clocking_setup)(struct sja1105_private *priv);
 	const char *name;
 	bool supports_mii[SJA1105_MAX_NUM_PORTS];
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index dea82f8a40c4..62fe05b4cb60 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -413,9 +413,7 @@ static long sja1105_rxtstamp_work(struct ptp_clock_info *ptp)
 	return -1;
 }
 
-/* Called from dsa_skb_defer_rx_timestamp */
-bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
-			   struct sk_buff *skb, unsigned int type)
+bool sja1105_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 {
 	struct sja1105_private *priv = ds->priv;
 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
@@ -431,6 +429,28 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 	return true;
 }
 
+bool sja1110_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb);
+	u64 ts = SJA1105_SKB_CB(skb)->tstamp;
+
+	*shwt = (struct skb_shared_hwtstamps) {0};
+
+	shwt->hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(ts));
+
+	/* Don't defer */
+	return false;
+}
+
+/* Called from dsa_skb_defer_rx_timestamp */
+bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
+			   struct sk_buff *skb, unsigned int type)
+{
+	struct sja1105_private *priv = ds->priv;
+
+	return priv->info->rxtstamp(ds, port, skb);
+}
+
 /* Called from dsa_skb_tx_timestamp. This callback is just to clone
  * the skb and have it available in SJA1105_SKB_CB in the .port_deferred_xmit
  * callback, where we will timestamp it synchronously.
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 34f97f58a355..bf0c4f1dfed7 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -122,6 +122,9 @@ int __sja1105_ptp_adjtime(struct dsa_switch *ds, s64 delta);
 int sja1105_ptp_commit(struct dsa_switch *ds, struct sja1105_ptp_cmd *cmd,
 		       sja1105_spi_rw_mode_t rw);
 
+bool sja1105_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb);
+bool sja1110_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb);
+
 #else
 
 struct sja1105_ptp_cmd;
@@ -184,6 +187,9 @@ static inline int sja1105_ptp_commit(struct dsa_switch *ds,
 
 #define sja1105_hwtstamp_set NULL
 
+#define sja1105_rxtstamp NULL
+#define sja1110_rxtstamp NULL
+
 #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
 
 #endif /* _SJA1105_PTP_H */
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 9156f4cc11f2..f7dd86271891 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -580,6 +580,7 @@ const struct sja1105_info sja1105e_info = {
 	.fdb_add_cmd		= sja1105et_fdb_add,
 	.fdb_del_cmd		= sja1105et_fdb_del,
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
+	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
 	.port_speed		= {
@@ -612,6 +613,7 @@ const struct sja1105_info sja1105t_info = {
 	.fdb_add_cmd		= sja1105et_fdb_add,
 	.fdb_del_cmd		= sja1105et_fdb_del,
 	.ptp_cmd_packing	= sja1105et_ptp_cmd_packing,
+	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105et_regs,
 	.port_speed		= {
@@ -645,6 +647,7 @@ const struct sja1105_info sja1105p_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
 	.port_speed		= {
@@ -678,6 +681,7 @@ const struct sja1105_info sja1105q_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
 	.port_speed		= {
@@ -711,6 +715,7 @@ const struct sja1105_info sja1105r_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
 	.regs			= &sja1105pqrs_regs,
 	.port_speed		= {
@@ -746,6 +751,7 @@ const struct sja1105_info sja1105s_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -781,6 +787,7 @@ const struct sja1105_info sja1110a_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1110_rxtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -828,6 +835,7 @@ const struct sja1105_info sja1110b_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1110_rxtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -875,6 +883,7 @@ const struct sja1105_info sja1110c_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1110_rxtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -922,6 +931,7 @@ const struct sja1105_info sja1110d_info = {
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
+	.rxtstamp		= sja1110_rxtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,

From 566b18c8b752f67c4e82f0eb4563dd71f84a8799 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 22:01:31 +0300
Subject: [PATCH 1477/3292] net: dsa: sja1105: implement TX timestamping for
 SJA1110

The TX timestamping procedure for SJA1105 is a bit unconventional
because the transmit procedure itself is unconventional.

Control packets (and therefore PTP as well) are transmitted to a
specific port in SJA1105 using "management routes" which must be written
over SPI to the switch. These are one-shot rules that match by
destination MAC address on traffic coming from the CPU port, and select
the precise destination port for that packet. So to transmit a packet
from NET_TX softirq context, we actually need to defer to a process
context so that we can perform that SPI write before we send the packet.
The DSA master dev_queue_xmit() runs in process context, and we poll
until the switch confirms it took the TX timestamp, then we annotate the
skb clone with that TX timestamp. This is why the sja1105 driver does
not need an skb queue for TX timestamping.

But the SJA1110 is a bit (not much!) more conventional, and you can
request 2-step TX timestamping through the DSA header, as well as give
the switch a cookie (timestamp ID) which it will give back to you when
it has the timestamp. So now we do need a queue for keeping the skb
clones until their TX timestamps become available.

The interesting part is that the metadata frames from SJA1105 haven't
disappeared completely. On SJA1105 they were used as follow-ups which
contained RX timestamps, but on SJA1110 they are actually TX completion
packets, which contain a variable (up to 32) array of timestamps.
Why an array? Because:
- not only is the TX timestamp on the egress port being communicated,
  but also the RX timestamp on the CPU port. Nice, but we don't care
  about that, so we ignore it.
- because a packet could be multicast to multiple egress ports, each
  port takes its own timestamp, and the TX completion packet contains
  the individual timestamps on each port.

This is unconventional because switches typically have a timestamping
FIFO and raise an interrupt, but this one doesn't. So the tagger needs
to detect and parse meta frames, and call into the main switch driver,
which pairs the timestamps with the skbs in the TX timestamping queue
which are waiting for one.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h     |  1 +
 drivers/net/dsa/sja1105/sja1105_ptp.c | 69 +++++++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_ptp.h |  7 +++
 drivers/net/dsa/sja1105/sja1105_spi.c |  4 ++
 include/linux/dsa/sja1105.h           | 23 +++++++++
 net/dsa/tag_sja1105.c                 | 52 ++++++++++++++++++++
 6 files changed, 156 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 201bca282884..5f3449351668 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -131,6 +131,7 @@ struct sja1105_info {
 	void (*ptp_cmd_packing)(u8 *buf, struct sja1105_ptp_cmd *cmd,
 				enum packing_op op);
 	bool (*rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb);
+	void (*txtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb);
 	int (*clocking_setup)(struct sja1105_private *priv);
 	const char *name;
 	bool supports_mii[SJA1105_MAX_NUM_PORTS];
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 62fe05b4cb60..691f6dd7e669 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -79,6 +79,7 @@ static int sja1105_change_rxtstamping(struct sja1105_private *priv,
 		priv->tagger_data.stampable_skb = NULL;
 	}
 	ptp_cancel_worker_sync(ptp_data->clock);
+	skb_queue_purge(&ptp_data->skb_txtstamp_queue);
 	skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
 
 	return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING);
@@ -451,6 +452,67 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 	return priv->info->rxtstamp(ds, port, skb);
 }
 
+void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
+				 enum sja1110_meta_tstamp dir, u64 tstamp)
+{
+	struct sja1105_private *priv = ds->priv;
+	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
+	struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+	struct skb_shared_hwtstamps shwt = {0};
+
+	/* We don't care about RX timestamps on the CPU port */
+	if (dir == SJA1110_META_TSTAMP_RX)
+		return;
+
+	spin_lock(&ptp_data->skb_txtstamp_queue.lock);
+
+	skb_queue_walk_safe(&ptp_data->skb_txtstamp_queue, skb, skb_tmp) {
+		if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
+			continue;
+
+		__skb_unlink(skb, &ptp_data->skb_txtstamp_queue);
+		skb_match = skb;
+
+		break;
+	}
+
+	spin_unlock(&ptp_data->skb_txtstamp_queue.lock);
+
+	if (WARN_ON(!skb_match))
+		return;
+
+	shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
+	skb_complete_tx_timestamp(skb_match, &shwt);
+}
+EXPORT_SYMBOL_GPL(sja1110_process_meta_tstamp);
+
+/* In addition to cloning the skb which is done by the common
+ * sja1105_port_txtstamp, we need to generate a timestamp ID and save the
+ * packet to the TX timestamping queue.
+ */
+void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
+{
+	struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
+	struct sja1105_private *priv = ds->priv;
+	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
+	struct sja1105_port *sp = &priv->ports[port];
+	u8 ts_id;
+
+	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+	spin_lock(&sp->data->meta_lock);
+
+	ts_id = sp->data->ts_id;
+	/* Deal automatically with 8-bit wraparound */
+	sp->data->ts_id++;
+
+	SJA1105_SKB_CB(clone)->ts_id = ts_id;
+
+	spin_unlock(&sp->data->meta_lock);
+
+	skb_queue_tail(&ptp_data->skb_txtstamp_queue, clone);
+}
+
 /* Called from dsa_skb_tx_timestamp. This callback is just to clone
  * the skb and have it available in SJA1105_SKB_CB in the .port_deferred_xmit
  * callback, where we will timestamp it synchronously.
@@ -469,6 +531,9 @@ void sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 		return;
 
 	SJA1105_SKB_CB(skb)->clone = clone;
+
+	if (priv->info->txtstamp)
+		priv->info->txtstamp(ds, port, skb);
 }
 
 static int sja1105_ptp_reset(struct dsa_switch *ds)
@@ -885,7 +950,10 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds)
 		.n_per_out	= 1,
 	};
 
+	/* Only used on SJA1105 */
 	skb_queue_head_init(&ptp_data->skb_rxtstamp_queue);
+	/* Only used on SJA1110 */
+	skb_queue_head_init(&ptp_data->skb_txtstamp_queue);
 	spin_lock_init(&tagger_data->meta_lock);
 
 	ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev);
@@ -910,6 +978,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
 
 	del_timer_sync(&ptp_data->extts_timer);
 	ptp_cancel_worker_sync(ptp_data->clock);
+	skb_queue_purge(&ptp_data->skb_txtstamp_queue);
 	skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
 	ptp_clock_unregister(ptp_data->clock);
 	ptp_data->clock = NULL;
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index bf0c4f1dfed7..3c874bb4c17b 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -75,7 +75,12 @@ struct sja1105_ptp_cmd {
 
 struct sja1105_ptp_data {
 	struct timer_list extts_timer;
+	/* Used only on SJA1105 to reconstruct partial timestamps */
 	struct sk_buff_head skb_rxtstamp_queue;
+	/* Used on SJA1110 where meta frames are generated only for
+	 * 2-step TX timestamps
+	 */
+	struct sk_buff_head skb_txtstamp_queue;
 	struct ptp_clock_info caps;
 	struct ptp_clock *clock;
 	struct sja1105_ptp_cmd cmd;
@@ -124,6 +129,7 @@ int sja1105_ptp_commit(struct dsa_switch *ds, struct sja1105_ptp_cmd *cmd,
 
 bool sja1105_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb);
 bool sja1110_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb);
+void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb);
 
 #else
 
@@ -189,6 +195,7 @@ static inline int sja1105_ptp_commit(struct dsa_switch *ds,
 
 #define sja1105_rxtstamp NULL
 #define sja1110_rxtstamp NULL
+#define sja1110_txtstamp NULL
 
 #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
 
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index f7dd86271891..32d00212423c 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -788,6 +788,7 @@ const struct sja1105_info sja1110a_info = {
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
+	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -836,6 +837,7 @@ const struct sja1105_info sja1110b_info = {
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
+	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -884,6 +886,7 @@ const struct sja1105_info sja1110c_info = {
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
+	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -932,6 +935,7 @@ const struct sja1105_info sja1110d_info = {
 	.fdb_del_cmd		= sja1105pqrs_fdb_del,
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
+	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index b02cf7b515ae..b6089b88314c 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -45,11 +45,14 @@ struct sja1105_tagger_data {
 	 */
 	spinlock_t meta_lock;
 	unsigned long state;
+	u8 ts_id;
 };
 
 struct sja1105_skb_cb {
 	struct sk_buff *clone;
 	u64 tstamp;
+	/* Only valid for packets cloned for 2-step TX timestamping */
+	u8 ts_id;
 };
 
 #define SJA1105_SKB_CB(skb) \
@@ -66,4 +69,24 @@ struct sja1105_port {
 	u16 xmit_tpid;
 };
 
+enum sja1110_meta_tstamp {
+	SJA1110_META_TSTAMP_TX = 0,
+	SJA1110_META_TSTAMP_RX = 1,
+};
+
+#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
+
+void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
+				 enum sja1110_meta_tstamp dir, u64 tstamp);
+
+#else
+
+static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
+					       u8 ts_id, enum sja1110_meta_tstamp dir,
+					       u64 tstamp)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
+
 #endif /* _NET_DSA_SJA1105_H */
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 37e1d64e07c6..9c2df9ece01b 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -25,6 +25,9 @@
 #define SJA1110_RX_TRAILER_SWITCH_ID(x)		(((x) & GENMASK(7, 4)) >> 4)
 #define SJA1110_RX_TRAILER_SRC_PORT(x)		((x) & GENMASK(3, 0))
 
+/* Meta frame format (for 2-step TX timestamps) */
+#define SJA1110_RX_HEADER_N_TS(x)		(((x) & GENMASK(8, 4)) >> 4)
+
 /* TX header */
 #define SJA1110_TX_HEADER_UPDATE_TC		BIT(14)
 #define SJA1110_TX_HEADER_TAKE_TS		BIT(13)
@@ -43,6 +46,8 @@
 #define SJA1110_TX_TRAILER_SWITCHID(x)		(((x) << 12) & GENMASK(15, 12))
 #define SJA1110_TX_TRAILER_DESTPORTS(x)		(((x) << 1) & GENMASK(11, 1))
 
+#define SJA1110_META_TSTAMP_SIZE		10
+
 #define SJA1110_HEADER_LEN			4
 #define SJA1110_RX_TRAILER_LEN			13
 #define SJA1110_TX_TRAILER_LEN			4
@@ -184,6 +189,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
 static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
 				    struct net_device *netdev)
 {
+	struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
 	struct dsa_port *dp = dsa_slave_to_port(netdev);
 	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
@@ -221,6 +227,12 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
 	*tx_trailer = cpu_to_be32(SJA1110_TX_TRAILER_PRIO(pcp) |
 				  SJA1110_TX_TRAILER_SWITCHID(dp->ds->index) |
 				  SJA1110_TX_TRAILER_DESTPORTS(BIT(dp->index)));
+	if (clone) {
+		u8 ts_id = SJA1105_SKB_CB(clone)->ts_id;
+
+		*tx_header |= htons(SJA1110_TX_HEADER_TAKE_TS);
+		*tx_trailer |= cpu_to_be32(SJA1110_TX_TRAILER_TSTAMP_ID(ts_id));
+	}
 
 	return skb;
 }
@@ -423,6 +435,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 					      is_meta);
 }
 
+static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
+{
+	int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header);
+	int n_ts = SJA1110_RX_HEADER_N_TS(rx_header);
+	struct net_device *master = skb->dev;
+	struct dsa_port *cpu_dp;
+	u8 *buf = skb->data + 2;
+	struct dsa_switch *ds;
+	int i;
+
+	cpu_dp = master->dsa_ptr;
+	ds = dsa_switch_find(cpu_dp->dst->index, switch_id);
+	if (!ds) {
+		net_err_ratelimited("%s: cannot find switch id %d\n",
+				    master->name, switch_id);
+		return NULL;
+	}
+
+	for (i = 0; i <= n_ts; i++) {
+		u8 ts_id, source_port, dir;
+		u64 tstamp;
+
+		ts_id = buf[0];
+		source_port = (buf[1] & GENMASK(7, 4)) >> 4;
+		dir = (buf[1] & BIT(3)) >> 3;
+		tstamp = be64_to_cpu(*(__be64 *)(buf + 2));
+
+		sja1110_process_meta_tstamp(ds, source_port, ts_id, dir,
+					    tstamp);
+
+		buf += SJA1110_META_TSTAMP_SIZE;
+	}
+
+	/* Discard the meta frame, we've consumed the timestamps it contained */
+	return NULL;
+}
+
 static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
 							    int *source_port,
 							    int *switch_id)
@@ -439,6 +488,9 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
 	 */
 	rx_header = ntohs(*(__be16 *)skb->data);
 
+	if (rx_header & SJA1110_RX_HEADER_IS_METADATA)
+		return sja1110_rcv_meta(skb, rx_header);
+
 	/* Timestamp frame, we have a trailer */
 	if (rx_header & SJA1110_RX_HEADER_HAS_TRAILER) {
 		int start_of_padding = SJA1110_RX_HEADER_TRAILER_POS(rx_header);

From e872d0c1249be74a65723664a329ef5a9113cc40 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:33 +0200
Subject: [PATCH 1478/3292] s390/qeth: count TX completion interrupts

While the qdio layer already tracks the number of HW interrupts for a
device, there's value in understanding how many of them have been
raised due to our TX completion logic.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      | 1 +
 drivers/s390/net/qeth_core_main.c | 4 +++-
 drivers/s390/net/qeth_ethtool.c   | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index fd9b869d278e..3a49ef8dd906 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -483,6 +483,7 @@ struct qeth_out_q_stats {
 	u64 stopped;
 	u64 doorbell;
 	u64 coal_frames;
+	u64 completion_irq;
 	u64 completion_yield;
 	u64 completion_timer;
 
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index a1f08e9aa064..9085f22ca34c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1400,8 +1400,10 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 	int i;
 
 	/* is PCI flag set on buffer? */
-	if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ)
+	if (buf->buffer->element[0].sflags & SBAL_SFLAGS0_PCI_REQ) {
 		atomic_dec(&queue->set_pci_flags_count);
+		QETH_TXQ_STAT_INC(queue, completion_irq);
+	}
 
 	qeth_tx_complete_buf(buf, error, budget);
 
diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c
index 3a51bbff0ffe..190dac2065df 100644
--- a/drivers/s390/net/qeth_ethtool.c
+++ b/drivers/s390/net/qeth_ethtool.c
@@ -41,6 +41,7 @@ static const struct qeth_stats txq_stats[] = {
 	QETH_TXQ_STAT("Queue stopped", stopped),
 	QETH_TXQ_STAT("Doorbell", doorbell),
 	QETH_TXQ_STAT("IRQ for frames", coal_frames),
+	QETH_TXQ_STAT("Completion IRQ", completion_irq),
 	QETH_TXQ_STAT("Completion yield", completion_yield),
 	QETH_TXQ_STAT("Completion timer", completion_timer),
 };

From 7a4b92e8e0de9cbbb623310af76b1d60cd344b1d Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:34 +0200
Subject: [PATCH 1479/3292] s390/qeth: also use TX NAPI for non-IQD devices

Set scan_threshold = 0 to opt out from the qdio layer's internal tasklet
& timer mechanism for TX completions, and replace it with the TX NAPI
infrastructure that qeth already uses for IQD devices. This avoids the
fragile logic in qdio_check_output_queue(), enables tighter integration
and gives us more tuning options via ethtool in the future.

For now we continue to apply the same policy as the qdio layer:
scan for completions if 32 TX buffers are in use, or after 1 sec.
A re-scan is done after 10 sec, but only if no TX interrupt is pending.

With scan_threshold = 0 we no longer get TX completion scans from
within qdio_get_next_buffers(). So trigger these manually in qeth_poll()
and in the RX path switch to the equivalent qdio_inspect_queue().

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |   6 ++
 drivers/s390/net/qeth_core_main.c | 148 +++++++++++++++---------------
 2 files changed, 79 insertions(+), 75 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 3a49ef8dd906..4d29801bcf41 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -527,6 +527,7 @@ struct qeth_qdio_out_q {
 
 	unsigned int coalesce_usecs;
 	unsigned int max_coalesced_frames;
+	unsigned int rescan_usecs;
 };
 
 #define qeth_for_each_output_queue(card, q, i)		\
@@ -887,6 +888,11 @@ static inline bool qeth_card_hw_is_reachable(struct qeth_card *card)
 	return card->state == CARD_STATE_SOFTSETUP;
 }
 
+static inline bool qeth_use_tx_irqs(struct qeth_card *card)
+{
+	return !IS_IQD(card);
+}
+
 static inline void qeth_unlock_channel(struct qeth_card *card,
 				       struct qeth_channel *channel)
 {
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9085f22ca34c..f22f223a4a6c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2665,8 +2665,15 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card)
 		INIT_LIST_HEAD(&queue->pending_bufs);
 		spin_lock_init(&queue->lock);
 		timer_setup(&queue->timer, qeth_tx_completion_timer, 0);
-		queue->coalesce_usecs = QETH_TX_COALESCE_USECS;
-		queue->max_coalesced_frames = QETH_TX_MAX_COALESCED_FRAMES;
+		if (IS_IQD(card)) {
+			queue->coalesce_usecs = QETH_TX_COALESCE_USECS;
+			queue->max_coalesced_frames = QETH_TX_MAX_COALESCED_FRAMES;
+			queue->rescan_usecs = QETH_TX_TIMER_USECS;
+		} else {
+			queue->coalesce_usecs = USEC_PER_SEC;
+			queue->max_coalesced_frames = 0;
+			queue->rescan_usecs = 10 * USEC_PER_SEC;
+		}
 		queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT;
 	}
 
@@ -3603,8 +3610,8 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 			       int count)
 {
 	struct qeth_qdio_out_buffer *buf = queue->bufs[index];
-	unsigned int qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
 	struct qeth_card *card = queue->card;
+	unsigned int frames, usecs;
 	struct qaob *aob = NULL;
 	int rc;
 	int i;
@@ -3660,14 +3667,11 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 				buf->buffer->element[0].sflags |= SBAL_SFLAGS0_PCI_REQ;
 			}
 		}
-
-		if (atomic_read(&queue->set_pci_flags_count))
-			qdio_flags |= QDIO_FLAG_PCI_OUT;
 	}
 
 	QETH_TXQ_STAT_INC(queue, doorbell);
-	rc = do_QDIO(CARD_DDEV(card), qdio_flags, queue->queue_no, index, count,
-		     aob);
+	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_OUTPUT, queue->queue_no,
+		     index, count, aob);
 
 	switch (rc) {
 	case 0:
@@ -3675,17 +3679,20 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 		/* ignore temporary SIGA errors without busy condition */
 
 		/* Fake the TX completion interrupt: */
-		if (IS_IQD(card)) {
-			unsigned int frames = READ_ONCE(queue->max_coalesced_frames);
-			unsigned int usecs = READ_ONCE(queue->coalesce_usecs);
+		frames = READ_ONCE(queue->max_coalesced_frames);
+		usecs = READ_ONCE(queue->coalesce_usecs);
 
-			if (frames && queue->coalesced_frames >= frames) {
-				napi_schedule(&queue->napi);
-				queue->coalesced_frames = 0;
-				QETH_TXQ_STAT_INC(queue, coal_frames);
-			} else if (usecs) {
-				qeth_tx_arm_timer(queue, usecs);
-			}
+		if (frames && queue->coalesced_frames >= frames) {
+			napi_schedule(&queue->napi);
+			queue->coalesced_frames = 0;
+			QETH_TXQ_STAT_INC(queue, coal_frames);
+		} else if (qeth_use_tx_irqs(card) &&
+			   atomic_read(&queue->used_buffers) >= 32) {
+			/* Old behaviour carried over from the qdio layer: */
+			napi_schedule(&queue->napi);
+			QETH_TXQ_STAT_INC(queue, coal_frames);
+		} else if (usecs) {
+			qeth_tx_arm_timer(queue, usecs);
 		}
 
 		break;
@@ -3833,36 +3840,14 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
 				     unsigned long card_ptr)
 {
 	struct qeth_card *card        = (struct qeth_card *) card_ptr;
-	struct qeth_qdio_out_q *queue = card->qdio.out_qs[__queue];
 	struct net_device *dev = card->dev;
-	struct netdev_queue *txq;
-	int i;
 
 	QETH_CARD_TEXT(card, 6, "qdouhdl");
 	if (qdio_error & QDIO_ERROR_FATAL) {
 		QETH_CARD_TEXT(card, 2, "achkcond");
 		netif_tx_stop_all_queues(dev);
 		qeth_schedule_recovery(card);
-		return;
 	}
-
-	for (i = first_element; i < (first_element + count); ++i) {
-		struct qeth_qdio_out_buffer *buf = queue->bufs[QDIO_BUFNR(i)];
-
-		qeth_handle_send_error(card, buf, qdio_error);
-		qeth_clear_output_buffer(queue, buf, qdio_error, 0);
-	}
-
-	atomic_sub(count, &queue->used_buffers);
-	qeth_check_outbound_queue(queue);
-
-	txq = netdev_get_tx_queue(dev, __queue);
-	/* xmit may have observed the full-condition, but not yet stopped the
-	 * txq. In which case the code below won't trigger. So before returning,
-	 * xmit will re-check the txq's fill level and wake it up if needed.
-	 */
-	if (netif_tx_queue_stopped(txq) && !qeth_out_queue_is_full(queue))
-		netif_tx_wake_queue(txq);
 }
 
 /**
@@ -5258,7 +5243,6 @@ static int qeth_qdio_establish(struct qeth_card *card)
 	init_data.int_parm               = (unsigned long) card;
 	init_data.input_sbal_addr_array  = in_sbal_ptrs;
 	init_data.output_sbal_addr_array = out_sbal_ptrs;
-	init_data.scan_threshold	 = IS_IQD(card) ? 0 : 32;
 
 	if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED,
 		QETH_QDIO_ESTABLISHED) == QETH_QDIO_ALLOCATED) {
@@ -5958,9 +5942,10 @@ static unsigned int qeth_rx_poll(struct qeth_card *card, int budget)
 		/* Fetch completed RX buffers: */
 		if (!card->rx.b_count) {
 			card->rx.qdio_err = 0;
-			card->rx.b_count = qdio_get_next_buffers(
-				card->data.ccwdev, 0, &card->rx.b_index,
-				&card->rx.qdio_err);
+			card->rx.b_count = qdio_inspect_queue(CARD_DDEV(card),
+							      0, true,
+							      &card->rx.b_index,
+							      &card->rx.qdio_err);
 			if (card->rx.b_count <= 0) {
 				card->rx.b_count = 0;
 				break;
@@ -6024,6 +6009,16 @@ int qeth_poll(struct napi_struct *napi, int budget)
 
 	work_done = qeth_rx_poll(card, budget);
 
+	if (qeth_use_tx_irqs(card)) {
+		struct qeth_qdio_out_q *queue;
+		unsigned int i;
+
+		qeth_for_each_output_queue(card, queue, i) {
+			if (!qeth_out_queue_is_empty(queue))
+				napi_schedule(&queue->napi);
+		}
+	}
+
 	if (card->options.cq == QETH_CQ_ENABLED)
 		qeth_cq_poll(card);
 
@@ -6140,7 +6135,10 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 	unsigned int work_done = 0;
 	struct netdev_queue *txq;
 
-	txq = netdev_get_tx_queue(dev, qeth_iqd_translate_txq(dev, queue_no));
+	if (IS_IQD(card))
+		txq = netdev_get_tx_queue(dev, qeth_iqd_translate_txq(dev, queue_no));
+	else
+		txq = netdev_get_tx_queue(dev, queue_no);
 
 	while (1) {
 		unsigned int start, error, i;
@@ -6167,8 +6165,9 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 					       &start, &error);
 		if (completed <= 0) {
 			/* Ensure we see TX completion for pending work: */
-			if (napi_complete_done(napi, 0))
-				qeth_tx_arm_timer(queue, QETH_TX_TIMER_USECS);
+			if (napi_complete_done(napi, 0) &&
+			    !atomic_read(&queue->set_pci_flags_count))
+				qeth_tx_arm_timer(queue, queue->rescan_usecs);
 			return 0;
 		}
 
@@ -6181,12 +6180,19 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 			bytes += buffer->bytes;
 
 			qeth_handle_send_error(card, buffer, error);
-			qeth_iqd_tx_complete(queue, bidx, error, budget);
+			if (IS_IQD(card))
+				qeth_iqd_tx_complete(queue, bidx, error, budget);
+			else
+				qeth_clear_output_buffer(queue, buffer, error,
+							 budget);
 		}
 
-		netdev_tx_completed_queue(txq, packets, bytes);
 		atomic_sub(completed, &queue->used_buffers);
 		work_done += completed;
+		if (IS_IQD(card))
+			netdev_tx_completed_queue(txq, packets, bytes);
+		else
+			qeth_check_outbound_queue(queue);
 
 		/* xmit may have observed the full-condition, but not yet
 		 * stopped the txq. In which case the code below won't trigger.
@@ -7230,6 +7236,8 @@ EXPORT_SYMBOL_GPL(qeth_iqd_select_queue);
 int qeth_open(struct net_device *dev)
 {
 	struct qeth_card *card = dev->ml_priv;
+	struct qeth_qdio_out_q *queue;
+	unsigned int i;
 
 	QETH_CARD_TEXT(card, 4, "qethopen");
 
@@ -7237,16 +7245,11 @@ int qeth_open(struct net_device *dev)
 	netif_tx_start_all_queues(dev);
 
 	local_bh_disable();
-	if (IS_IQD(card)) {
-		struct qeth_qdio_out_q *queue;
-		unsigned int i;
-
-		qeth_for_each_output_queue(card, queue, i) {
-			netif_tx_napi_add(dev, &queue->napi, qeth_tx_poll,
-					  QETH_NAPI_WEIGHT);
-			napi_enable(&queue->napi);
-			napi_schedule(&queue->napi);
-		}
+	qeth_for_each_output_queue(card, queue, i) {
+		netif_tx_napi_add(dev, &queue->napi, qeth_tx_poll,
+				  QETH_NAPI_WEIGHT);
+		napi_enable(&queue->napi);
+		napi_schedule(&queue->napi);
 	}
 
 	napi_enable(&card->napi);
@@ -7261,6 +7264,8 @@ EXPORT_SYMBOL_GPL(qeth_open);
 int qeth_stop(struct net_device *dev)
 {
 	struct qeth_card *card = dev->ml_priv;
+	struct qeth_qdio_out_q *queue;
+	unsigned int i;
 
 	QETH_CARD_TEXT(card, 4, "qethstop");
 
@@ -7268,24 +7273,17 @@ int qeth_stop(struct net_device *dev)
 	cancel_delayed_work_sync(&card->buffer_reclaim_work);
 	qdio_stop_irq(CARD_DDEV(card));
 
-	if (IS_IQD(card)) {
-		struct qeth_qdio_out_q *queue;
-		unsigned int i;
+	/* Quiesce the NAPI instances: */
+	qeth_for_each_output_queue(card, queue, i)
+		napi_disable(&queue->napi);
 
-		/* Quiesce the NAPI instances: */
-		qeth_for_each_output_queue(card, queue, i)
-			napi_disable(&queue->napi);
+	/* Stop .ndo_start_xmit, might still access queue->napi. */
+	netif_tx_disable(dev);
 
-		/* Stop .ndo_start_xmit, might still access queue->napi. */
-		netif_tx_disable(dev);
-
-		qeth_for_each_output_queue(card, queue, i) {
-			del_timer_sync(&queue->timer);
-			/* Queues may get re-allocated, so remove the NAPIs. */
-			netif_napi_del(&queue->napi);
-		}
-	} else {
-		netif_tx_disable(dev);
+	qeth_for_each_output_queue(card, queue, i) {
+		del_timer_sync(&queue->timer);
+		/* Queues may get re-allocated, so remove the NAPIs. */
+		netif_napi_del(&queue->napi);
 	}
 
 	return 0;

From 3518ae76f2bbc465f7a0d7075fe71815e37b21c6 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:35 +0200
Subject: [PATCH 1480/3292] s390/qeth: unify the tracking of active cmds on ccw
 device

We have one field to track _whether_ a cmd is active on a ccw device
('irq_pending'), and one to track _which_ cmd it is ('active_cmd').

Get rid of the irq_pending field, by testing active_cmd for NULL.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      | 14 +++++++-------
 drivers/s390/net/qeth_core_main.c | 12 ++++--------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 4d29801bcf41..ad0e86aa99b2 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -614,7 +614,6 @@ struct qeth_channel {
 	struct ccw_device *ccwdev;
 	struct qeth_cmd_buffer *active_cmd;
 	enum qeth_channel_states state;
-	atomic_t irq_pending;
 };
 
 struct qeth_reply {
@@ -664,11 +663,6 @@ static inline struct ccw1 *__ccw_from_cmd(struct qeth_cmd_buffer *iob)
 	return (struct ccw1 *)(iob->data + ALIGN(iob->length, 8));
 }
 
-static inline bool qeth_trylock_channel(struct qeth_channel *channel)
-{
-	return atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0;
-}
-
 /**
  *  OSA card related definitions
  */
@@ -896,10 +890,16 @@ static inline bool qeth_use_tx_irqs(struct qeth_card *card)
 static inline void qeth_unlock_channel(struct qeth_card *card,
 				       struct qeth_channel *channel)
 {
-	atomic_set(&channel->irq_pending, 0);
+	xchg(&channel->active_cmd, NULL);
 	wake_up(&card->wait_q);
 }
 
+static inline bool qeth_trylock_channel(struct qeth_channel *channel,
+					struct qeth_cmd_buffer *cmd)
+{
+	return cmpxchg(&channel->active_cmd, NULL, cmd) == NULL;
+}
+
 struct qeth_trap_id {
 	__u16 lparnr;
 	char vmname[8];
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index f22f223a4a6c..83d540f8b527 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1268,7 +1268,6 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
 		iob = (struct qeth_cmd_buffer *) (addr_t)intparm;
 	}
 
-	channel->active_cmd = NULL;
 	qeth_unlock_channel(card, channel);
 
 	rc = qeth_check_irb_error(card, cdev, irb);
@@ -1715,11 +1714,10 @@ static int qeth_stop_channel(struct qeth_channel *channel)
 	rc = ccw_device_set_offline(cdev);
 
 	spin_lock_irq(get_ccwdev_lock(cdev));
-	if (channel->active_cmd) {
+	if (channel->active_cmd)
 		dev_err(&cdev->dev, "Stopped channel while cmd %px was still active\n",
 			channel->active_cmd);
-		channel->active_cmd = NULL;
-	}
+
 	cdev->handler = NULL;
 	spin_unlock_irq(get_ccwdev_lock(cdev));
 
@@ -1732,7 +1730,7 @@ static int qeth_start_channel(struct qeth_channel *channel)
 	int rc;
 
 	channel->state = CH_STATE_DOWN;
-	atomic_set(&channel->irq_pending, 0);
+	xchg(&channel->active_cmd, NULL);
 
 	spin_lock_irq(get_ccwdev_lock(cdev));
 	cdev->handler = qeth_irq;
@@ -2039,7 +2037,7 @@ static int qeth_send_control_data(struct qeth_card *card,
 	reply->param = reply_param;
 
 	timeout = wait_event_interruptible_timeout(card->wait_q,
-						   qeth_trylock_channel(channel),
+						   qeth_trylock_channel(channel, iob),
 						   timeout);
 	if (timeout <= 0) {
 		qeth_put_cmd(iob);
@@ -2059,8 +2057,6 @@ static int qeth_send_control_data(struct qeth_card *card,
 	spin_lock_irq(get_ccwdev_lock(channel->ccwdev));
 	rc = ccw_device_start_timeout(channel->ccwdev, __ccw_from_cmd(iob),
 				      (addr_t) iob, 0, 0, timeout);
-	if (!rc)
-		channel->active_cmd = iob;
 	spin_unlock_irq(get_ccwdev_lock(channel->ccwdev));
 	if (rc) {
 		QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n",

From c0a0186630fb0e9880aa4c3d6ea136146c48db56 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:36 +0200
Subject: [PATCH 1481/3292] s390/qeth: use ethtool_sprintf()

Use a recently introduced helper to fill our ethtool stats strings.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_ethtool.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c
index 190dac2065df..2c4cb300a8fc 100644
--- a/drivers/s390/net/qeth_ethtool.c
+++ b/drivers/s390/net/qeth_ethtool.c
@@ -80,10 +80,8 @@ static void qeth_add_stat_strings(u8 **data, const char *prefix,
 {
 	unsigned int i;
 
-	for (i = 0; i < size; i++) {
-		snprintf(*data, ETH_GSTRING_LEN, "%s%s", prefix, stats[i].name);
-		*data += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < size; i++)
+		ethtool_sprintf(data, "%s%s", prefix, stats[i].name);
 }
 
 static int qeth_get_sset_count(struct net_device *dev, int stringset)

From f875d880f04970e86039c670cabfc52b0412a1b3 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:37 +0200
Subject: [PATCH 1482/3292] s390/qeth: consolidate completion of pending TX
 buffers

With commit 396c100472dd ("s390/qdio: let driver manage the QAOB")
a pending TX buffer now has access to its associated QAOB during
TX completion processing. We can thus reduce the amount of work & state
propagation that needs to be done by qeth_qdio_handle_aob().

Move all this logic into the respective TX completion paths. Doing so
even allows us to determine more precise TX_NOTIFY_* values via
qeth_compute_cq_notification(aob->aorc, ...).

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  3 +-
 drivers/s390/net/qeth_core_main.c | 73 ++++++++++++-------------------
 2 files changed, 29 insertions(+), 47 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index ad0e86aa99b2..5de5b419a761 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -422,8 +422,7 @@ enum qeth_qdio_out_buffer_state {
 	/* Finished by the TX completion code: */
 	QETH_QDIO_BUF_NEED_QAOB,
 	/* Received QAOB notification on CQ: */
-	QETH_QDIO_BUF_QAOB_OK,
-	QETH_QDIO_BUF_QAOB_ERROR,
+	QETH_QDIO_BUF_QAOB_DONE,
 };
 
 struct qeth_qdio_out_buffer {
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 83d540f8b527..99e3b0b75cc3 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -70,9 +70,6 @@ static void qeth_issue_next_read_cb(struct qeth_card *card,
 				    unsigned int data_length);
 static int qeth_qdio_establish(struct qeth_card *);
 static void qeth_free_qdio_queues(struct qeth_card *card);
-static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
-		struct qeth_qdio_out_buffer *buf,
-		enum iucv_tx_notify notification);
 
 static void qeth_close_dev_handler(struct work_struct *work)
 {
@@ -437,12 +434,9 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
 static void qeth_qdio_handle_aob(struct qeth_card *card,
 				 unsigned long phys_aob_addr)
 {
-	enum qeth_qdio_out_buffer_state new_state = QETH_QDIO_BUF_QAOB_OK;
 	struct qaob *aob;
 	struct qeth_qdio_out_buffer *buffer;
-	enum iucv_tx_notify notification;
 	struct qeth_qdio_out_q *queue;
-	unsigned int i;
 
 	aob = (struct qaob *) phys_to_virt(phys_aob_addr);
 	QETH_CARD_TEXT(card, 5, "haob");
@@ -450,12 +444,10 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 	buffer = (struct qeth_qdio_out_buffer *) aob->user1;
 	QETH_CARD_TEXT_(card, 5, "%lx", aob->user1);
 
-	if (aob->aorc) {
+	if (aob->aorc)
 		QETH_CARD_TEXT_(card, 2, "aorc%02X", aob->aorc);
-		new_state = QETH_QDIO_BUF_QAOB_ERROR;
-	}
 
-	switch (atomic_xchg(&buffer->state, new_state)) {
+	switch (atomic_xchg(&buffer->state, QETH_QDIO_BUF_QAOB_DONE)) {
 	case QETH_QDIO_BUF_PRIMED:
 		/* Faster than TX completion code, let it handle the async
 		 * completion for us. It will also recycle the QAOB.
@@ -468,21 +460,6 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 		break;
 	case QETH_QDIO_BUF_NEED_QAOB:
 		/* TX completion code is already finished. */
-		notification = qeth_compute_cq_notification(aob->aorc, 1);
-		qeth_notify_skbs(buffer->q, buffer, notification);
-
-		/* Free dangling allocations. The attached skbs are handled by
-		 * qeth_tx_complete_pending_bufs(), and so is the QAOB.
-		 */
-		for (i = 0;
-		     i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card);
-		     i++) {
-			void *data = phys_to_virt(aob->sba[i]);
-
-			if (data && buffer->is_header[i])
-				kmem_cache_free(qeth_core_header_cache, data);
-			buffer->is_header[i] = 0;
-		}
 
 		queue = buffer->q;
 		atomic_set(&buffer->state, QETH_QDIO_BUF_EMPTY);
@@ -1435,15 +1412,29 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 	struct qeth_qdio_out_buffer *buf, *tmp;
 
 	list_for_each_entry_safe(buf, tmp, &queue->pending_bufs, list_entry) {
+		struct qaob *aob = buf->aob;
+		enum iucv_tx_notify notify;
+		unsigned int i;
+
 		if (drain || atomic_read(&buf->state) == QETH_QDIO_BUF_EMPTY) {
 			QETH_CARD_TEXT(card, 5, "fp");
 			QETH_CARD_TEXT_(card, 5, "%lx", (long) buf);
 
-			if (drain)
-				qeth_notify_skbs(queue, buf,
-						 TX_NOTIFY_GENERALERROR);
+			notify = drain ? TX_NOTIFY_GENERALERROR :
+					 qeth_compute_cq_notification(aob->aorc, 1);
+			qeth_notify_skbs(queue, buf, notify);
 			qeth_tx_complete_buf(buf, drain, budget);
 
+			for (i = 0;
+			     i < aob->sb_count && i < queue->max_elements;
+			     i++) {
+				void *data = phys_to_virt(aob->sba[i]);
+
+				if (data && buf->is_header[i])
+					kmem_cache_free(qeth_core_header_cache,
+							data);
+			}
+
 			list_del(&buf->list_entry);
 			qeth_free_out_buf(buf);
 		}
@@ -6048,6 +6039,7 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 
 	if (qdio_error == QDIO_ERROR_SLSB_PENDING) {
 		struct qaob *aob = buffer->aob;
+		enum iucv_tx_notify notify;
 
 		if (!aob) {
 			netdev_WARN_ONCE(card->dev,
@@ -6084,30 +6076,21 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 					 &queue->pending_bufs);
 				/* Skip clearing the buffer: */
 				return;
-			case QETH_QDIO_BUF_QAOB_OK:
-				qeth_notify_skbs(queue, buffer,
-						 TX_NOTIFY_DELAYED_OK);
-				error = false;
-				break;
-			case QETH_QDIO_BUF_QAOB_ERROR:
-				qeth_notify_skbs(queue, buffer,
-						 TX_NOTIFY_DELAYED_GENERALERROR);
-				error = true;
+			case QETH_QDIO_BUF_QAOB_DONE:
+				notify = qeth_compute_cq_notification(aob->aorc, 1);
+				qeth_notify_skbs(queue, buffer, notify);
+				error = !!aob->aorc;
 				break;
 			default:
 				WARN_ON_ONCE(1);
 			}
 
 			break;
-		case QETH_QDIO_BUF_QAOB_OK:
+		case QETH_QDIO_BUF_QAOB_DONE:
 			/* qeth_qdio_handle_aob() already received a QAOB: */
-			qeth_notify_skbs(queue, buffer, TX_NOTIFY_OK);
-			error = false;
-			break;
-		case QETH_QDIO_BUF_QAOB_ERROR:
-			/* qeth_qdio_handle_aob() already received a QAOB: */
-			qeth_notify_skbs(queue, buffer, TX_NOTIFY_GENERALERROR);
-			error = true;
+			notify = qeth_compute_cq_notification(aob->aorc, 0);
+			qeth_notify_skbs(queue, buffer, notify);
+			error = !!aob->aorc;
 			break;
 		default:
 			WARN_ON_ONCE(1);

From 838e4cc80814aad973fbfdd836b2b25eb27681f1 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:38 +0200
Subject: [PATCH 1483/3292] s390/qeth: remove QAOB's pointer to its TX buffer

Maintaining a pointer inside the aob's user-definable area is fragile
and unnecessary. At this stage we only need it to overload the buffer's
state field, and to access the buffer's TX queue.

The first part is easily solved by tracking the aob's state within the
aob itself. This also feels much cleaner and self-contained.
For enabling the access to the associated TX queue, we can store the
queue's index in the aob.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/include/asm/qdio.h      |   4 +-
 drivers/s390/net/qeth_core.h      |  17 +++--
 drivers/s390/net/qeth_core_main.c | 118 ++++++++++--------------------
 3 files changed, 49 insertions(+), 90 deletions(-)

diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 8fc52679543d..cb4f73c7228d 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -137,7 +137,6 @@ struct slibe {
  * @user0: user defineable value
  * @res4: reserved paramater
  * @user1: user defineable value
- * @user2: user defineable value
  */
 struct qaob {
 	u64 res0[6];
@@ -152,8 +151,7 @@ struct qaob {
 	u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
 	u64 user0;
 	u64 res4[2];
-	u64 user1;
-	u64 user2;
+	u8 user1[16];
 } __attribute__ ((packed, aligned(256)));
 
 /**
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 5de5b419a761..457224b7b97f 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -417,12 +417,17 @@ enum qeth_qdio_out_buffer_state {
 	QETH_QDIO_BUF_EMPTY,
 	/* Filled by driver; owned by hardware in order to be sent. */
 	QETH_QDIO_BUF_PRIMED,
-	/* Discovered by the TX completion code: */
-	QETH_QDIO_BUF_PENDING,
-	/* Finished by the TX completion code: */
-	QETH_QDIO_BUF_NEED_QAOB,
-	/* Received QAOB notification on CQ: */
-	QETH_QDIO_BUF_QAOB_DONE,
+};
+
+enum qeth_qaob_state {
+	QETH_QAOB_ISSUED,
+	QETH_QAOB_PENDING,
+	QETH_QAOB_DONE,
+};
+
+struct qeth_qaob_priv1 {
+	unsigned int state;
+	u8 queue_no;
 };
 
 struct qeth_qdio_out_buffer {
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 99e3b0b75cc3..5ddb2939d4fc 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -431,45 +431,6 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
 	return n;
 }
 
-static void qeth_qdio_handle_aob(struct qeth_card *card,
-				 unsigned long phys_aob_addr)
-{
-	struct qaob *aob;
-	struct qeth_qdio_out_buffer *buffer;
-	struct qeth_qdio_out_q *queue;
-
-	aob = (struct qaob *) phys_to_virt(phys_aob_addr);
-	QETH_CARD_TEXT(card, 5, "haob");
-	QETH_CARD_TEXT_(card, 5, "%lx", phys_aob_addr);
-	buffer = (struct qeth_qdio_out_buffer *) aob->user1;
-	QETH_CARD_TEXT_(card, 5, "%lx", aob->user1);
-
-	if (aob->aorc)
-		QETH_CARD_TEXT_(card, 2, "aorc%02X", aob->aorc);
-
-	switch (atomic_xchg(&buffer->state, QETH_QDIO_BUF_QAOB_DONE)) {
-	case QETH_QDIO_BUF_PRIMED:
-		/* Faster than TX completion code, let it handle the async
-		 * completion for us. It will also recycle the QAOB.
-		 */
-		break;
-	case QETH_QDIO_BUF_PENDING:
-		/* TX completion code is active and will handle the async
-		 * completion for us. It will also recycle the QAOB.
-		 */
-		break;
-	case QETH_QDIO_BUF_NEED_QAOB:
-		/* TX completion code is already finished. */
-
-		queue = buffer->q;
-		atomic_set(&buffer->state, QETH_QDIO_BUF_EMPTY);
-		napi_schedule(&queue->napi);
-		break;
-	default:
-		WARN_ON_ONCE(1);
-	}
-}
-
 static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len,
 			   void *data)
 {
@@ -1412,11 +1373,13 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 	struct qeth_qdio_out_buffer *buf, *tmp;
 
 	list_for_each_entry_safe(buf, tmp, &queue->pending_bufs, list_entry) {
+		struct qeth_qaob_priv1 *priv;
 		struct qaob *aob = buf->aob;
 		enum iucv_tx_notify notify;
 		unsigned int i;
 
-		if (drain || atomic_read(&buf->state) == QETH_QDIO_BUF_EMPTY) {
+		priv = (struct qeth_qaob_priv1 *)&aob->user1;
+		if (drain || READ_ONCE(priv->state) == QETH_QAOB_DONE) {
 			QETH_CARD_TEXT(card, 5, "fp");
 			QETH_CARD_TEXT_(card, 5, "%lx", (long) buf);
 
@@ -3625,8 +3588,12 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 			if (!buf->aob)
 				buf->aob = qdio_allocate_aob();
 			if (buf->aob) {
+				struct qeth_qaob_priv1 *priv;
+
 				aob = buf->aob;
-				aob->user1 = (u64) buf;
+				priv = (struct qeth_qaob_priv1 *)&aob->user1;
+				priv->state = QETH_QAOB_ISSUED;
+				priv->queue_no = queue->queue_no;
 			}
 		}
 	} else {
@@ -3765,6 +3732,18 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_configure_cq);
 
+static void qeth_qdio_handle_aob(struct qeth_card *card, struct qaob *aob)
+{
+	struct qeth_qaob_priv1 *priv = (struct qeth_qaob_priv1 *)&aob->user1;
+	unsigned int queue_no = priv->queue_no;
+
+	BUILD_BUG_ON(sizeof(*priv) > ARRAY_SIZE(aob->user1));
+
+	if (xchg(&priv->state, QETH_QAOB_DONE) == QETH_QAOB_PENDING &&
+	    queue_no < card->qdio.no_out_queues)
+		napi_schedule(&card->qdio.out_qs[queue_no]->napi);
+}
+
 static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
 				 unsigned int queue, int first_element,
 				 int count)
@@ -3791,7 +3770,7 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err,
 		       buffer->element[e].addr) {
 			unsigned long phys_aob_addr = buffer->element[e].addr;
 
-			qeth_qdio_handle_aob(card, phys_aob_addr);
+			qeth_qdio_handle_aob(card, phys_to_virt(phys_aob_addr));
 			++e;
 		}
 		qeth_scrub_qdio_buffer(buffer, QDIO_MAX_ELEMENTS_PER_BUFFER);
@@ -6039,6 +6018,7 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 
 	if (qdio_error == QDIO_ERROR_SLSB_PENDING) {
 		struct qaob *aob = buffer->aob;
+		struct qeth_qaob_priv1 *priv;
 		enum iucv_tx_notify notify;
 
 		if (!aob) {
@@ -6051,51 +6031,27 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 
 		QETH_CARD_TEXT_(card, 5, "pel%u", bidx);
 
-		switch (atomic_cmpxchg(&buffer->state,
-				       QETH_QDIO_BUF_PRIMED,
-				       QETH_QDIO_BUF_PENDING)) {
-		case QETH_QDIO_BUF_PRIMED:
-			/* We have initial ownership, no QAOB (yet): */
+		priv = (struct qeth_qaob_priv1 *)&aob->user1;
+		/* QAOB hasn't completed yet: */
+		if (xchg(&priv->state, QETH_QAOB_PENDING) != QETH_QAOB_DONE) {
 			qeth_notify_skbs(queue, buffer, TX_NOTIFY_PENDING);
 
-			/* Handle race with qeth_qdio_handle_aob(): */
-			switch (atomic_xchg(&buffer->state,
-					    QETH_QDIO_BUF_NEED_QAOB)) {
-			case QETH_QDIO_BUF_PENDING:
-				/* No concurrent QAOB notification. */
-
-				/* Prepare the queue slot for immediate re-use: */
-				qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements);
-				if (qeth_alloc_out_buf(queue, bidx,
-						       GFP_ATOMIC)) {
-					QETH_CARD_TEXT(card, 2, "outofbuf");
-					qeth_schedule_recovery(card);
-				}
-
-				list_add(&buffer->list_entry,
-					 &queue->pending_bufs);
-				/* Skip clearing the buffer: */
-				return;
-			case QETH_QDIO_BUF_QAOB_DONE:
-				notify = qeth_compute_cq_notification(aob->aorc, 1);
-				qeth_notify_skbs(queue, buffer, notify);
-				error = !!aob->aorc;
-				break;
-			default:
-				WARN_ON_ONCE(1);
+			/* Prepare the queue slot for immediate re-use: */
+			qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements);
+			if (qeth_alloc_out_buf(queue, bidx, GFP_ATOMIC)) {
+				QETH_CARD_TEXT(card, 2, "outofbuf");
+				qeth_schedule_recovery(card);
 			}
 
-			break;
-		case QETH_QDIO_BUF_QAOB_DONE:
-			/* qeth_qdio_handle_aob() already received a QAOB: */
-			notify = qeth_compute_cq_notification(aob->aorc, 0);
-			qeth_notify_skbs(queue, buffer, notify);
-			error = !!aob->aorc;
-			break;
-		default:
-			WARN_ON_ONCE(1);
+			list_add(&buffer->list_entry, &queue->pending_bufs);
+			/* Skip clearing the buffer: */
+			return;
 		}
 
+		/* QAOB already completed: */
+		notify = qeth_compute_cq_notification(aob->aorc, 0);
+		qeth_notify_skbs(queue, buffer, notify);
+		error = !!aob->aorc;
 		memset(aob, 0, sizeof(*aob));
 	} else if (card->options.cq == QETH_CQ_ENABLED) {
 		qeth_notify_skbs(queue, buffer,

From 6b7ec41e574a399ed2165ae13975c531b00e1eb8 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:39 +0200
Subject: [PATCH 1484/3292] s390/qeth: remove TX buffer's pointer to its queue

qeth_tx_complete_buf() is the only remaining user of buf->q, and the
callers can easily provide this as a parameter instead.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      | 1 -
 drivers/s390/net/qeth_core_main.c | 9 ++++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 457224b7b97f..ff1064f871e5 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -439,7 +439,6 @@ struct qeth_qdio_out_buffer {
 	struct sk_buff_head skb_list;
 	int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER];
 
-	struct qeth_qdio_out_q *q;
 	struct list_head list_entry;
 	struct qaob *aob;
 };
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 5ddb2939d4fc..0ad175d54c13 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1290,10 +1290,10 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q,
 	}
 }
 
-static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error,
+static void qeth_tx_complete_buf(struct qeth_qdio_out_q *queue,
+				 struct qeth_qdio_out_buffer *buf, bool error,
 				 int budget)
 {
-	struct qeth_qdio_out_q *queue = buf->q;
 	struct sk_buff *skb;
 
 	/* Empty buffer? */
@@ -1342,7 +1342,7 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 		QETH_TXQ_STAT_INC(queue, completion_irq);
 	}
 
-	qeth_tx_complete_buf(buf, error, budget);
+	qeth_tx_complete_buf(queue, buf, error, budget);
 
 	for (i = 0; i < queue->max_elements; ++i) {
 		void *data = phys_to_virt(buf->buffer->element[i].addr);
@@ -1386,7 +1386,7 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 			notify = drain ? TX_NOTIFY_GENERALERROR :
 					 qeth_compute_cq_notification(aob->aorc, 1);
 			qeth_notify_skbs(queue, buf, notify);
-			qeth_tx_complete_buf(buf, drain, budget);
+			qeth_tx_complete_buf(queue, buf, drain, budget);
 
 			for (i = 0;
 			     i < aob->sb_count && i < queue->max_elements;
@@ -2530,7 +2530,6 @@ static int qeth_alloc_out_buf(struct qeth_qdio_out_q *q, unsigned int bidx,
 	newbuf->buffer = q->qdio_bufs[bidx];
 	skb_queue_head_init(&newbuf->skb_list);
 	lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key);
-	newbuf->q = q;
 	atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY);
 	q->bufs[bidx] = newbuf;
 	return 0;

From bb7032ddc947ddde42cb695b5602e040167bce18 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:40 +0200
Subject: [PATCH 1485/3292] s390/qeth: shrink TX buffer struct

Convert the large boolean array into a bitmap, this substantially
reduces the struct's size. While at it also clarify the naming.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      | 2 +-
 drivers/s390/net/qeth_core_main.c | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index ff1064f871e5..f4d554ea0c93 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -437,7 +437,7 @@ struct qeth_qdio_out_buffer {
 	unsigned int frames;
 	unsigned int bytes;
 	struct sk_buff_head skb_list;
-	int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	DECLARE_BITMAP(from_kmem_cache, QDIO_MAX_ELEMENTS_PER_BUFFER);
 
 	struct list_head list_entry;
 	struct qaob *aob;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 0ad175d54c13..62f88ccbd03f 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1347,9 +1347,8 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 	for (i = 0; i < queue->max_elements; ++i) {
 		void *data = phys_to_virt(buf->buffer->element[i].addr);
 
-		if (data && buf->is_header[i])
+		if (__test_and_clear_bit(i, buf->from_kmem_cache) && data)
 			kmem_cache_free(qeth_core_header_cache, data);
-		buf->is_header[i] = 0;
 	}
 
 	qeth_scrub_qdio_buffer(buf->buffer, queue->max_elements);
@@ -1393,7 +1392,7 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 			     i++) {
 				void *data = phys_to_virt(aob->sba[i]);
 
-				if (data && buf->is_header[i])
+				if (test_bit(i, buf->from_kmem_cache) && data)
 					kmem_cache_free(qeth_core_header_cache,
 							data);
 			}
@@ -4053,7 +4052,7 @@ static unsigned int qeth_fill_buffer(struct qeth_qdio_out_buffer *buf,
 
 		/* HW header is allocated from cache: */
 		if ((void *)hdr != skb->data)
-			buf->is_header[element] = 1;
+			__set_bit(element, buf->from_kmem_cache);
 		/* HW header was pushed and is contiguous with linear part: */
 		else if (length > 0 && !PAGE_ALIGNED(data) &&
 			 (data == (char *)hdr + hd_len))

From 953fb4dc4f4af63d9283d2cb386403fc24b15118 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:33:41 +0200
Subject: [PATCH 1486/3292] s390/qeth: Consider dependency on SWITCHDEV module

Without the SWITCHDEV module, the bridgeport attribute LEARNING_SYNC
of the physical device (self) does not provide any functionality.
Instead of calling the no-op stub version of the switchdev functions,
fail the setting of the attribute with an appropriate message.

While at it, also add an error message for the 'not supported by HW'
case.

Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index ca44421a6d6e..2abf86c104d5 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -805,8 +805,6 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
 
 	if (!netif_device_present(dev))
 		return -ENODEV;
-	if (!(priv->brport_hw_features))
-		return -EOPNOTSUPP;
 
 	nlmsg_for_each_attr(attr, nlh, sizeof(struct ifinfomsg), rem1) {
 		if (nla_type(attr) == IFLA_PROTINFO) {
@@ -832,6 +830,16 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
 		return 0;
 	if (!bp_tb[IFLA_BRPORT_LEARNING_SYNC])
 		return -EINVAL;
+	if (!(priv->brport_hw_features & BR_LEARNING_SYNC)) {
+		NL_SET_ERR_MSG_ATTR(extack, bp_tb[IFLA_BRPORT_LEARNING_SYNC],
+				    "Operation not supported by HW");
+		return -EOPNOTSUPP;
+	}
+	if (!IS_ENABLED(CONFIG_NET_SWITCHDEV)) {
+		NL_SET_ERR_MSG_ATTR(extack, bp_tb[IFLA_BRPORT_LEARNING_SYNC],
+				    "Requires NET_SWITCHDEV");
+		return -EOPNOTSUPP;
+	}
 	enable = !!nla_get_u8(bp_tb[IFLA_BRPORT_LEARNING_SYNC]);
 
 	if (enable == !!(priv->brport_features & BR_LEARNING_SYNC))

From 43fa32d1cc1b967858ba5786b1b913527f1b10ed Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Thu, 10 Jun 2021 10:46:16 +0800
Subject: [PATCH 1487/3292] nfc: fdp: remove unnecessary labels

Some labels are meaningless, so we delete them and use the
return statement instead of the goto statement.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/fdp/fdp.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 7863b2536999..528745862738 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -266,7 +266,7 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
 	r = request_firmware(&info->ram_patch, FDP_RAM_PATCH_NAME, dev);
 	if (r < 0) {
 		nfc_err(dev, "RAM patch request error\n");
-		goto error;
+		return r;
 	}
 
 	data = (u8 *) info->ram_patch->data;
@@ -283,7 +283,7 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
 	r = request_firmware(&info->otp_patch, FDP_OTP_PATCH_NAME, dev);
 	if (r < 0) {
 		nfc_err(dev, "OTP patch request error\n");
-		goto out;
+		return 0;
 	}
 
 	data = (u8 *) info->otp_patch->data;
@@ -295,10 +295,7 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
 
 	dev_dbg(dev, "OTP patch version: %d, size: %d\n",
 		 info->otp_patch_version, (int) info->otp_patch->size);
-out:
 	return 0;
-error:
-	return r;
 }
 
 static void fdp_nci_release_firmware(struct nci_dev *ndev)

From 1ee8856de82faec9bc8bd0f2308a7f27e30ba207 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 10 Jun 2021 22:56:59 +0200
Subject: [PATCH 1488/3292] r8169: avoid link-up interrupt issue on RTL8106e if
 user enables ASPM

It has been reported that on RTL8106e the link-up interrupt may be
significantly delayed if the user enables ASPM L1. Per default ASPM
is disabled. The change leaves L1 enabled on the PCIe link (thus still
allowing to reach higher package power saving states), but the
NIC won't actively trigger it.

Reported-by: Koba Ko <koba.ko@canonical.com>
Tested-by: Koba Ko <koba.ko@canonical.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 64f94a3fe646..6a9fe9f7e0be 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3508,7 +3508,6 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
 
 	rtl_pcie_state_l2l3_disable(tp);
-	rtl_hw_aspm_clkreq_enable(tp, true);
 }
 
 DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)

From 51a1ebc35b46dc322071cfa7fcd4cdcfde0c1aa4 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Fri, 11 Jun 2021 09:33:33 +0800
Subject: [PATCH 1489/3292] net: devres: Correct a grammatical error

Correct a grammatical error.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/devres.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/devres.c b/net/devres.c
index 1f9be2133787..5ccf6ca311dc 100644
--- a/net/devres.c
+++ b/net/devres.c
@@ -60,7 +60,7 @@ static int netdev_devres_match(struct device *dev, void *this, void *match_data)
  *	@ndev: device to register
  *
  *	This is a devres variant of register_netdev() for which the unregister
- *	function will be call automatically when the managing device is
+ *	function will be called automatically when the managing device is
  *	detached. Note: the net_device used must also be resource managed by
  *	the same struct device.
  */

From 93f764371c45bc3f1d859026f12ef6255c388a85 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:15 +0800
Subject: [PATCH 1490/3292] net: pc300too: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 001fd378d417..5ccaec97e3e9 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -52,7 +52,6 @@ static unsigned int CLOCK_BASE;
 #define PC300_CHMEDIA_MASK(port) (0x00000020UL << ((port) * 3))
 #define PC300_CTYPE_MASK	 (0x00000800UL)
 
-
 enum { PC300_RSV = 1, PC300_X21, PC300_TE }; /* card types */
 
 /*
@@ -71,8 +70,6 @@ typedef struct {
 	u32 init_ctrl;		/* 50h : EEPROM ctrl, Init Ctrl, etc */
 }plx9050;
 
-
-
 typedef struct port_s {
 	struct napi_struct napi;
 	struct net_device *netdev;
@@ -90,8 +87,6 @@ typedef struct port_s {
 	u8 chan;		/* physical port # - 0 or 1 */
 }port_t;
 
-
-
 typedef struct card_s {
 	int type;		/* RSV, X21, etc. */
 	int n_ports;		/* 1 or 2 ports */
@@ -107,13 +102,11 @@ typedef struct card_s {
 	port_t ports[2];
 }card_t;
 
-
 #define get_port(card, port)	     ((port) < (card)->n_ports ? \
 					 (&(card)->ports[port]) : (NULL))
 
 #include "hd64572.c"
 
-
 static void pc300_set_iface(port_t *port)
 {
 	card_t *card = port->card;
@@ -162,8 +155,6 @@ static void pc300_set_iface(port_t *port)
 	}
 }
 
-
-
 static int pc300_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -177,8 +168,6 @@ static int pc300_open(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int pc300_close(struct net_device *dev)
 {
 	sca_close(dev);
@@ -186,8 +175,6 @@ static int pc300_close(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	const size_t size = sizeof(sync_serial_settings);
@@ -214,7 +201,6 @@ static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (copy_to_user(line, &port->settings, size))
 			return -EFAULT;
 		return 0;
-
 	}
 
 	if (port->card->type == PC300_X21 &&
@@ -255,8 +241,6 @@ static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return 0;
 }
 
-
-
 static void pc300_pci_remove_one(struct pci_dev *pdev)
 {
 	int i;
@@ -472,8 +456,6 @@ static int pc300_pci_init_one(struct pci_dev *pdev,
 	return 0;
 }
 
-
-
 static const struct pci_device_id pc300_pci_tbl[] = {
 	{ PCI_VENDOR_ID_CYCLADES, PCI_DEVICE_ID_PC300_RX_1, PCI_ANY_ID,
 	  PCI_ANY_ID, 0, 0, 0 },
@@ -486,7 +468,6 @@ static const struct pci_device_id pc300_pci_tbl[] = {
 	{ 0, }
 };
 
-
 static struct pci_driver pc300_pci_driver = {
 	.name =          "PC300",
 	.id_table =      pc300_pci_tbl,
@@ -494,7 +475,6 @@ static struct pci_driver pc300_pci_driver = {
 	.remove =        pc300_pci_remove_one,
 };
 
-
 static int __init pc300_init_module(void)
 {
 	if (pci_clock_freq < 1000000 || pci_clock_freq > 80000000) {
@@ -511,8 +491,6 @@ static int __init pc300_init_module(void)
 	return pci_register_driver(&pc300_pci_driver);
 }
 
-
-
 static void __exit pc300_cleanup_module(void)
 {
 	pci_unregister_driver(&pc300_pci_driver);

From a657c8b4d50d33954a3f766c8876f31d323d32b9 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:16 +0800
Subject: [PATCH 1491/3292] net: pc300too: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 5ccaec97e3e9..8f592ff798fb 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -158,8 +158,8 @@ static void pc300_set_iface(port_t *port)
 static int pc300_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
-
 	int result = hdlc_open(dev);
+
 	if (result)
 		return result;
 
@@ -426,6 +426,7 @@ static int pc300_pci_init_one(struct pci_dev *pdev,
 		port_t *port = &card->ports[i];
 		struct net_device *dev = port->netdev;
 		hdlc_device *hdlc = dev_to_hdlc(dev);
+
 		port->chan = i;
 
 		spin_lock_init(&port->lock);

From f8864e26d3118ccf91d6a1ae5cdd18f4b56b7704 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:17 +0800
Subject: [PATCH 1492/3292] net: pc300too: fix the code style issue about "foo
 * bar"

Fix the checkpatch error as "foo * bar" and should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 8f592ff798fb..17d5cb8287d4 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -110,7 +110,7 @@ typedef struct card_s {
 static void pc300_set_iface(port_t *port)
 {
 	card_t *card = port->card;
-	u32 __iomem * init_ctrl = &card->plxbase->init_ctrl;
+	u32 __iomem *init_ctrl = &card->plxbase->init_ctrl;
 	u16 msci = get_msci(port);
 	u8 rxs = port->rxs & CLK_BRG_MASK;
 	u8 txs = port->txs & CLK_BRG_MASK;

From d72f78db55d6daa4542668de1b2cb4f974708a99 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:18 +0800
Subject: [PATCH 1493/3292] net: pc300too: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 17d5cb8287d4..7d8eae5ea140 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -349,12 +349,14 @@ static int pc300_pci_init_one(struct pci_dev *pdev,
 	else
 		card->n_ports = 2;
 
-	for (i = 0; i < card->n_ports; i++)
-		if (!(card->ports[i].netdev = alloc_hdlcdev(&card->ports[i]))) {
+	for (i = 0; i < card->n_ports; i++) {
+		card->ports[i].netdev = alloc_hdlcdev(&card->ports[i]);
+		if (!card->ports[i].netdev) {
 			pr_err("unable to allocate memory\n");
 			pc300_pci_remove_one(pdev);
 			return -ENOMEM;
 		}
+	}
 
 	/* Reset PLX */
 	p = &card->plxbase->init_ctrl;

From ae6440483b545176b888bfe74971aa40a5749d46 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:19 +0800
Subject: [PATCH 1494/3292] net: pc300too: remove redundant initialization for
 statics

Should not initialise statics to 0.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 7d8eae5ea140..56f7d96298a3 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -44,7 +44,7 @@
 #define MAX_TX_BUFFERS		10
 
 static int pci_clock_freq = 33000000;
-static int use_crystal_clock = 0;
+static int use_crystal_clock;
 static unsigned int CLOCK_BASE;
 
 /* Masks to access the init_ctrl PLX register */

From 0cd2135cf83dd183d3fe05658e17b67b5f6cba86 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:20 +0800
Subject: [PATCH 1495/3292] net: pc300too: replace comparison to NULL with
 "!card->plxbase"

According to the chackpatch.pl, comparison to NULL could
be written "!card->plxbase".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 56f7d96298a3..ecce9992493d 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -298,7 +298,7 @@ static int pc300_pci_init_one(struct pci_dev *pdev,
 	}
 
 	card = kzalloc(sizeof(card_t), GFP_KERNEL);
-	if (card == NULL) {
+	if (!card) {
 		pci_release_regions(pdev);
 		pci_disable_device(pdev);
 		return -ENOBUFS;
@@ -322,9 +322,7 @@ static int pc300_pci_init_one(struct pci_dev *pdev,
 	ramphys = pci_resource_start(pdev, 3) & PCI_BASE_ADDRESS_MEM_MASK;
 	card->rambase = pci_ioremap_bar(pdev, 3);
 
-	if (card->plxbase == NULL ||
-	    card->scabase == NULL ||
-	    card->rambase == NULL) {
+	if (!card->plxbase || !card->scabase || !card->rambase) {
 		pr_err("ioremap() failed\n");
 		pc300_pci_remove_one(pdev);
 		return -ENOMEM;

From eed00311659fc5aaae7689b46c7740350199477d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:21 +0800
Subject: [PATCH 1496/3292] net: pc300too: add some required spaces

Add spaces required before the open parenthesis '('.
Add spaces required after that close brace '}'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index ecce9992493d..885dcc5e597e 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -68,7 +68,7 @@ typedef struct {
 	u32 cs_base[4];		/* 3C-48h : Chip Select Base Addrs */
 	u32 intr_ctrl_stat;	/* 4Ch : Interrupt Control/Status */
 	u32 init_ctrl;		/* 50h : EEPROM ctrl, Init Ctrl, etc */
-}plx9050;
+} plx9050;
 
 typedef struct port_s {
 	struct napi_struct napi;
@@ -85,7 +85,7 @@ typedef struct port_s {
 	u16 txlast;
 	u8 rxs, txs, tmc;	/* SCA registers */
 	u8 chan;		/* physical port # - 0 or 1 */
-}port_t;
+} port_t;
 
 typedef struct card_s {
 	int type;		/* RSV, X21, etc. */
@@ -100,7 +100,7 @@ typedef struct card_s {
 	u8 irq;			/* interrupt request level */
 
 	port_t ports[2];
-}card_t;
+} card_t;
 
 #define get_port(card, port)	     ((port) < (card)->n_ports ? \
 					 (&(card)->ports[port]) : (NULL))
@@ -117,7 +117,7 @@ static void pc300_set_iface(port_t *port)
 
 	sca_out(EXS_TES1, (port->chan ? MSCI1_OFFSET : MSCI0_OFFSET) + EXS,
 		port->card);
-	switch(port->settings.clock_type) {
+	switch (port->settings.clock_type) {
 	case CLOCK_INT:
 		rxs |= CLK_BRG; /* BRG output */
 		txs |= CLK_PIN_OUT | CLK_TX_RXCLK; /* RX clock */

From ef1806a8b9615923c0719548b5fc96a9daa037b1 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 11 Jun 2021 11:36:22 +0800
Subject: [PATCH 1497/3292] net: pc300too: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...
This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pc300too.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index 885dcc5e597e..7b123a771aa6 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -54,8 +54,7 @@ static unsigned int CLOCK_BASE;
 
 enum { PC300_RSV = 1, PC300_X21, PC300_TE }; /* card types */
 
-/*
- *      PLX PCI9050-1 local configuration and shared runtime registers.
+/*      PLX PCI9050-1 local configuration and shared runtime registers.
  *      This structure can be used to access 9050 registers (memory mapped).
  */
 typedef struct {

From 4a2c7217cd5a87e85ceb761e307b030fe6db4805 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 11 Jun 2021 05:55:59 +0200
Subject: [PATCH 1498/3292] net: usb: asix: ax88772: manage PHY PM from MAC

Take over PHY power management, otherwise PHY framework will try to
access ASIX MDIO bus before MAC resume was completed.

Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Suggested-by: Heiner Kallweit <hkallweit1@gmail.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 43 ++++++++++------------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 8a477171e8f5..aec97b021a73 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -598,6 +598,9 @@ static void ax88772_suspend(struct usbnet *dev)
 	struct asix_common_private *priv = dev->driver_priv;
 	u16 medium;
 
+	if (netif_running(dev->net))
+		phy_stop(priv->phydev);
+
 	/* Stop MAC operation */
 	medium = asix_read_medium_status(dev, 1);
 	medium &= ~AX_MEDIUM_RE;
@@ -605,14 +608,6 @@ static void ax88772_suspend(struct usbnet *dev)
 
 	netdev_dbg(dev->net, "ax88772_suspend: medium=0x%04x\n",
 		   asix_read_medium_status(dev, 1));
-
-	/* Preserve BMCR for restoring */
-	priv->presvd_phy_bmcr =
-		asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_BMCR);
-
-	/* Preserve ANAR for restoring */
-	priv->presvd_phy_advertise =
-		asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE);
 }
 
 static int asix_suspend(struct usb_interface *intf, pm_message_t message)
@@ -626,39 +621,22 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message)
 	return usbnet_suspend(intf, message);
 }
 
-static void ax88772_restore_phy(struct usbnet *dev)
-{
-	struct asix_common_private *priv = dev->driver_priv;
-
-	if (priv->presvd_phy_advertise) {
-		/* Restore Advertisement control reg */
-		asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE,
-				     priv->presvd_phy_advertise);
-
-		/* Restore BMCR */
-		if (priv->presvd_phy_bmcr & BMCR_ANENABLE)
-			priv->presvd_phy_bmcr |= BMCR_ANRESTART;
-
-		asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR,
-				     priv->presvd_phy_bmcr);
-
-		priv->presvd_phy_advertise = 0;
-		priv->presvd_phy_bmcr = 0;
-	}
-}
-
 static void ax88772_resume(struct usbnet *dev)
 {
+	struct asix_common_private *priv = dev->driver_priv;
 	int i;
 
 	for (i = 0; i < 3; i++)
 		if (!ax88772_hw_reset(dev, 1))
 			break;
-	ax88772_restore_phy(dev);
+
+	if (netif_running(dev->net))
+		phy_start(priv->phydev);
 }
 
 static void ax88772a_resume(struct usbnet *dev)
 {
+	struct asix_common_private *priv = dev->driver_priv;
 	int i;
 
 	for (i = 0; i < 3; i++) {
@@ -666,7 +644,8 @@ static void ax88772a_resume(struct usbnet *dev)
 			break;
 	}
 
-	ax88772_restore_phy(dev);
+	if (netif_running(dev->net))
+		phy_start(priv->phydev);
 }
 
 static int asix_resume(struct usb_interface *intf)
@@ -722,6 +701,8 @@ static int ax88772_init_phy(struct usbnet *dev)
 		return ret;
 	}
 
+	priv->phydev->mac_managed_pm = 1;
+
 	phy_attached_info(priv->phydev);
 
 	return 0;

From 3e6dc7b650250f88b8f2a62ed0edac8df951e952 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Fri, 11 Jun 2021 15:11:43 +0800
Subject: [PATCH 1499/3292] net: stmmac: Fix unused values warnings

The commit 8532f613bc78 ("net: stmmac: introduce MSI Interrupt routines
for mac, safety, RX & TX") introduced the converity warnings:-

  1. Unused value (UNUSED_VALUE)
     assigned_value: Assigning value REQ_IRQ_ERR_MAC to irq_err here,
     but that stored value is not used.

  2. Unused value (UNUSED_VALUE)
     assigned_value: Assigning value REQ_IRQ_ERR_NO to irq_err here,
     but that stored value is overwritten before it can used.

  3. Unused value (UNUSED_VALUE)
     assigned_value: Assigning value REQ_IRQ_ERR_WOL to irq_err here,
     but that stored value is not used.

Fixed these by removing the unnecessary value assignments.

Fixes: 8532f613bc78 ("net: stmmac: introduce MSI Interrupt routines for mac, safety, RX & TX")
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index db97cd4b871d..4177fd6a9db5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3406,8 +3406,8 @@ static void stmmac_free_irq(struct net_device *dev,
 
 static int stmmac_request_irq_multi_msi(struct net_device *dev)
 {
-	enum request_irq_err irq_err = REQ_IRQ_ERR_NO;
 	struct stmmac_priv *priv = netdev_priv(dev);
+	enum request_irq_err irq_err;
 	cpumask_t cpu_mask;
 	int irq_idx = 0;
 	char *int_name;
@@ -3554,8 +3554,8 @@ irq_error:
 
 static int stmmac_request_irq_single(struct net_device *dev)
 {
-	enum request_irq_err irq_err = REQ_IRQ_ERR_NO;
 	struct stmmac_priv *priv = netdev_priv(dev);
+	enum request_irq_err irq_err;
 	int ret;
 
 	ret = request_irq(dev->irq, stmmac_interrupt,
@@ -3565,7 +3565,7 @@ static int stmmac_request_irq_single(struct net_device *dev)
 			   "%s: ERROR: allocating the IRQ %d (error: %d)\n",
 			   __func__, dev->irq, ret);
 		irq_err = REQ_IRQ_ERR_MAC;
-		return ret;
+		goto irq_error;
 	}
 
 	/* Request the Wake IRQ in case of another line
@@ -3579,7 +3579,7 @@ static int stmmac_request_irq_single(struct net_device *dev)
 				   "%s: ERROR: allocating the WoL IRQ %d (%d)\n",
 				   __func__, priv->wol_irq, ret);
 			irq_err = REQ_IRQ_ERR_WOL;
-			return ret;
+			goto irq_error;
 		}
 	}
 

From e71305acd81cac222c41849e538c5c661b12c584 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:47 +0300
Subject: [PATCH 1500/3292] Documentation: ACPI: DSD: Document MDIO PHY

Introduce a mechanism based on generic ACPI _DSD device properties
definition [1] to get PHYs registered on a MDIO bus and provide them to
be connected to MAC.

[1] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf

Describe properties "phy-handle" and "phy-mode".

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/firmware-guide/acpi/dsd/phy.rst | 133 ++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 Documentation/firmware-guide/acpi/dsd/phy.rst

diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst
new file mode 100644
index 000000000000..7d01ae8b3cc6
--- /dev/null
+++ b/Documentation/firmware-guide/acpi/dsd/phy.rst
@@ -0,0 +1,133 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+MDIO bus and PHYs in ACPI
+=========================
+
+The PHYs on an MDIO bus [1] are probed and registered using
+fwnode_mdiobus_register_phy().
+
+Later, for connecting these PHYs to their respective MACs, the PHYs registered
+on the MDIO bus have to be referenced.
+
+This document introduces two _DSD properties that are to be used
+for connecting PHYs on the MDIO bus [3] to the MAC layer.
+
+These properties are defined in accordance with the "Device
+Properties UUID For _DSD" [2] document and the
+daffd814-6eba-4d8c-8a91-bc9bbf4aa301 UUID must be used in the Device
+Data Descriptors containing them.
+
+phy-handle
+----------
+For each MAC node, a device property "phy-handle" is used to reference
+the PHY that is registered on an MDIO bus. This is mandatory for
+network interfaces that have PHYs connected to MAC via MDIO bus.
+
+During the MDIO bus driver initialization, PHYs on this bus are probed
+using the _ADR object as shown below and are registered on the MDIO bus.
+
+::
+      Scope(\_SB.MDI0)
+      {
+        Device(PHY1) {
+          Name (_ADR, 0x1)
+        } // end of PHY1
+
+        Device(PHY2) {
+          Name (_ADR, 0x2)
+        } // end of PHY2
+      }
+
+Later, during the MAC driver initialization, the registered PHY devices
+have to be retrieved from the MDIO bus. For this, the MAC driver needs
+references to the previously registered PHYs which are provided
+as device object references (e.g. \_SB.MDI0.PHY1).
+
+phy-mode
+--------
+The "phy-mode" _DSD property is used to describe the connection to
+the PHY. The valid values for "phy-mode" are defined in [4].
+
+The following ASL example illustrates the usage of these properties.
+
+DSDT entry for MDIO node
+------------------------
+
+The MDIO bus has an SoC component (MDIO controller) and a platform
+component (PHYs on the MDIO bus).
+
+a) Silicon Component
+This node describes the MDIO controller, MDI0
+---------------------------------------------
+::
+	Scope(_SB)
+	{
+	  Device(MDI0) {
+	    Name(_HID, "NXP0006")
+	    Name(_CCA, 1)
+	    Name(_UID, 0)
+	    Name(_CRS, ResourceTemplate() {
+	      Memory32Fixed(ReadWrite, MDI0_BASE, MDI_LEN)
+	      Interrupt(ResourceConsumer, Level, ActiveHigh, Shared)
+	       {
+		 MDI0_IT
+	       }
+	    }) // end of _CRS for MDI0
+	  } // end of MDI0
+	}
+
+b) Platform Component
+The PHY1 and PHY2 nodes represent the PHYs connected to MDIO bus MDI0
+---------------------------------------------------------------------
+::
+	Scope(\_SB.MDI0)
+	{
+	  Device(PHY1) {
+	    Name (_ADR, 0x1)
+	  } // end of PHY1
+
+	  Device(PHY2) {
+	    Name (_ADR, 0x2)
+	  } // end of PHY2
+	}
+
+DSDT entries representing MAC nodes
+-----------------------------------
+
+Below are the MAC nodes where PHY nodes are referenced.
+phy-mode and phy-handle are used as explained earlier.
+------------------------------------------------------
+::
+	Scope(\_SB.MCE0.PR17)
+	{
+	  Name (_DSD, Package () {
+	     ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+		 Package () {
+		     Package (2) {"phy-mode", "rgmii-id"},
+		     Package (2) {"phy-handle", \_SB.MDI0.PHY1}
+	      }
+	   })
+	}
+
+	Scope(\_SB.MCE0.PR18)
+	{
+	  Name (_DSD, Package () {
+	    ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+		Package () {
+		    Package (2) {"phy-mode", "rgmii-id"},
+		    Package (2) {"phy-handle", \_SB.MDI0.PHY2}}
+	    }
+	  })
+	}
+
+References
+==========
+
+[1] Documentation/networking/phy.rst
+
+[2] https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
+
+[3] Documentation/firmware-guide/acpi/DSD-properties-rules.rst
+
+[4] Documentation/devicetree/bindings/net/ethernet-controller.yaml

From 0fb16976765143cf0d7d0dd78b3f406ab135c494 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:48 +0300
Subject: [PATCH 1501/3292] net: phy: Introduce fwnode_mdio_find_device()

Define fwnode_mdio_find_device() to get a pointer to the
mdio_device from fwnode passed to the function.

Refactor of_mdio_find_device() to use fwnode_mdio_find_device().

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/of_mdio.c   | 11 +----------
 drivers/net/phy/phy_device.c | 23 +++++++++++++++++++++++
 include/linux/phy.h          |  7 +++++++
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 8e97d5b825f5..6ef8b6e40189 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -347,16 +347,7 @@ EXPORT_SYMBOL(of_mdiobus_register);
  */
 struct mdio_device *of_mdio_find_device(struct device_node *np)
 {
-	struct device *d;
-
-	if (!np)
-		return NULL;
-
-	d = bus_find_device_by_of_node(&mdio_bus_type, np);
-	if (!d)
-		return NULL;
-
-	return to_mdio_device(d);
+	return fwnode_mdio_find_device(of_fwnode_handle(np));
 }
 EXPORT_SYMBOL(of_mdio_find_device);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 495d86b4af7c..dca454b5c209 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2875,6 +2875,29 @@ static bool phy_drv_supports_irq(struct phy_driver *phydrv)
 	return phydrv->config_intr && phydrv->handle_interrupt;
 }
 
+/**
+ * fwnode_mdio_find_device - Given a fwnode, find the mdio_device
+ * @fwnode: pointer to the mdio_device's fwnode
+ *
+ * If successful, returns a pointer to the mdio_device with the embedded
+ * struct device refcount incremented by one, or NULL on failure.
+ * The caller should call put_device() on the mdio_device after its use.
+ */
+struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode)
+{
+	struct device *d;
+
+	if (!fwnode)
+		return NULL;
+
+	d = bus_find_device_by_fwnode(&mdio_bus_type, fwnode);
+	if (!d)
+		return NULL;
+
+	return to_mdio_device(d);
+}
+EXPORT_SYMBOL(fwnode_mdio_find_device);
+
 /**
  * phy_probe - probe and init a PHY device
  * @dev: device to probe and init
diff --git a/include/linux/phy.h b/include/linux/phy.h
index ed332ac92e25..7aa97f4e5387 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1377,10 +1377,17 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
 				     bool is_c45,
 				     struct phy_c45_device_ids *c45_ids);
 #if IS_ENABLED(CONFIG_PHYLIB)
+struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
 void phy_device_free(struct phy_device *phydev);
 #else
+static inline
+struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode)
+{
+	return 0;
+}
+
 static inline
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45)
 {

From 425775ed31a6fac8b66ab077f7936fafad895ef6 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:49 +0300
Subject: [PATCH 1502/3292] net: phy: Introduce phy related fwnode functions

Define fwnode_phy_find_device() to iterate an mdiobus and find the
phy device of the provided phy fwnode. Additionally define
device_phy_find_device() to find phy device of provided device.

Define fwnode_get_phy_node() to get phy_node using named reference.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 62 ++++++++++++++++++++++++++++++++++++
 include/linux/phy.h          | 20 ++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index dca454b5c209..786f464216dd 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -9,6 +9,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -2898,6 +2899,67 @@ struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode)
 }
 EXPORT_SYMBOL(fwnode_mdio_find_device);
 
+/**
+ * fwnode_phy_find_device - For provided phy_fwnode, find phy_device.
+ *
+ * @phy_fwnode: Pointer to the phy's fwnode.
+ *
+ * If successful, returns a pointer to the phy_device with the embedded
+ * struct device refcount incremented by one, or NULL on failure.
+ */
+struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
+{
+	struct mdio_device *mdiodev;
+
+	mdiodev = fwnode_mdio_find_device(phy_fwnode);
+	if (!mdiodev)
+		return NULL;
+
+	if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
+		return to_phy_device(&mdiodev->dev);
+
+	put_device(&mdiodev->dev);
+
+	return NULL;
+}
+EXPORT_SYMBOL(fwnode_phy_find_device);
+
+/**
+ * device_phy_find_device - For the given device, get the phy_device
+ * @dev: Pointer to the given device
+ *
+ * Refer return conditions of fwnode_phy_find_device().
+ */
+struct phy_device *device_phy_find_device(struct device *dev)
+{
+	return fwnode_phy_find_device(dev_fwnode(dev));
+}
+EXPORT_SYMBOL_GPL(device_phy_find_device);
+
+/**
+ * fwnode_get_phy_node - Get the phy_node using the named reference.
+ * @fwnode: Pointer to fwnode from which phy_node has to be obtained.
+ *
+ * Refer return conditions of fwnode_find_reference().
+ * For ACPI, only "phy-handle" is supported. Legacy DT properties "phy"
+ * and "phy-device" are not supported in ACPI. DT supports all the three
+ * named references to the phy node.
+ */
+struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *phy_node;
+
+	/* Only phy-handle is used for ACPI */
+	phy_node = fwnode_find_reference(fwnode, "phy-handle", 0);
+	if (is_acpi_node(fwnode) || !IS_ERR(phy_node))
+		return phy_node;
+	phy_node = fwnode_find_reference(fwnode, "phy", 0);
+	if (IS_ERR(phy_node))
+		phy_node = fwnode_find_reference(fwnode, "phy-device", 0);
+	return phy_node;
+}
+EXPORT_SYMBOL_GPL(fwnode_get_phy_node);
+
 /**
  * phy_probe - probe and init a PHY device
  * @dev: device to probe and init
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7aa97f4e5387..f9b5fb099fa6 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1378,6 +1378,9 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
 				     struct phy_c45_device_ids *c45_ids);
 #if IS_ENABLED(CONFIG_PHYLIB)
 struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
+struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode);
+struct phy_device *device_phy_find_device(struct device *dev);
+struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode);
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
 void phy_device_free(struct phy_device *phydev);
@@ -1388,6 +1391,23 @@ struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode)
 	return 0;
 }
 
+static inline
+struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
+{
+	return NULL;
+}
+
+static inline struct phy_device *device_phy_find_device(struct device *dev)
+{
+	return NULL;
+}
+
+static inline
+struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
 static inline
 struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45)
 {

From 2d7b8bf1fa7afab77f106b67ec6e3d524e3745ca Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:50 +0300
Subject: [PATCH 1503/3292] of: mdio: Refactor of_phy_find_device()

Refactor of_phy_find_device() to use fwnode_phy_find_device().

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/of_mdio.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 6ef8b6e40189..0ba1158796d9 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -360,18 +360,7 @@ EXPORT_SYMBOL(of_mdio_find_device);
  */
 struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
-	struct mdio_device *mdiodev;
-
-	mdiodev = of_mdio_find_device(phy_np);
-	if (!mdiodev)
-		return NULL;
-
-	if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
-		return to_phy_device(&mdiodev->dev);
-
-	put_device(&mdiodev->dev);
-
-	return NULL;
+	return fwnode_phy_find_device(of_fwnode_handle(phy_np));
 }
 EXPORT_SYMBOL(of_phy_find_device);
 

From 114dea60043b8f0c82c67dd281719ef8919c2416 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:51 +0300
Subject: [PATCH 1504/3292] net: phy: Introduce fwnode_get_phy_id()

Extract phy_id from compatible string. This will be used by
fwnode_mdiobus_register_phy() to create phy device using the
phy_id.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 21 +++++++++++++++++++++
 include/linux/phy.h          |  5 +++++
 2 files changed, 26 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 786f464216dd..f7472a0cf771 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -834,6 +834,27 @@ static int get_phy_c22_id(struct mii_bus *bus, int addr, u32 *phy_id)
 	return 0;
 }
 
+/* Extract the phy ID from the compatible string of the form
+ * ethernet-phy-idAAAA.BBBB.
+ */
+int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id)
+{
+	unsigned int upper, lower;
+	const char *cp;
+	int ret;
+
+	ret = fwnode_property_read_string(fwnode, "compatible", &cp);
+	if (ret)
+		return ret;
+
+	if (sscanf(cp, "ethernet-phy-id%4x.%4x", &upper, &lower) != 2)
+		return -EINVAL;
+
+	*phy_id = ((upper & GENMASK(15, 0)) << 16) | (lower & GENMASK(15, 0));
+	return 0;
+}
+EXPORT_SYMBOL(fwnode_get_phy_id);
+
 /**
  * get_phy_device - reads the specified PHY device and returns its @phy_device
  *		    struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index f9b5fb099fa6..b60694734b07 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1377,6 +1377,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
 				     bool is_c45,
 				     struct phy_c45_device_ids *c45_ids);
 #if IS_ENABLED(CONFIG_PHYLIB)
+int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id);
 struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
 struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode);
 struct phy_device *device_phy_find_device(struct device *dev);
@@ -1385,6 +1386,10 @@ struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
 void phy_device_free(struct phy_device *phydev);
 #else
+static inline int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id)
+{
+	return 0;
+}
 static inline
 struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode)
 {

From cf99686072a1b7037a1d782b66037b2b722bf2c9 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:52 +0300
Subject: [PATCH 1505/3292] of: mdio: Refactor of_get_phy_id()

With the introduction of fwnode_get_phy_id(), refactor of_get_phy_id()
to use fwnode equivalent.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/of_mdio.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 0ba1158796d9..29f121cba314 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -29,17 +29,7 @@ MODULE_LICENSE("GPL");
  * ethernet-phy-idAAAA.BBBB */
 static int of_get_phy_id(struct device_node *device, u32 *phy_id)
 {
-	struct property *prop;
-	const char *cp;
-	unsigned int upper, lower;
-
-	of_property_for_each_string(device, "compatible", prop, cp) {
-		if (sscanf(cp, "ethernet-phy-id%4x.%4x", &upper, &lower) == 2) {
-			*phy_id = ((upper & 0xFFFF) << 16) | (lower & 0xFFFF);
-			return 0;
-		}
-	}
-	return -EINVAL;
+	return fwnode_get_phy_id(of_fwnode_handle(device), phy_id);
 }
 
 static struct mii_timestamper *of_find_mii_timestamper(struct device_node *node)

From b9926da003cab58594803a2bc5a1d5bd7c670eba Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:53 +0300
Subject: [PATCH 1506/3292] net: mii_timestamper: check NULL in
 unregister_mii_timestamper()

Callers of unregister_mii_timestamper() currently check for NULL
value of mii_ts before calling it.

Place the NULL check inside unregister_mii_timestamper() and update
the callers accordingly.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/of_mdio.c        | 6 ++----
 drivers/net/phy/mii_timestamper.c | 3 +++
 drivers/net/phy/phy_device.c      | 3 +--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 29f121cba314..d73c0570f19c 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -115,15 +115,13 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
 	else
 		phy = get_phy_device(mdio, addr, is_c45);
 	if (IS_ERR(phy)) {
-		if (mii_ts)
-			unregister_mii_timestamper(mii_ts);
+		unregister_mii_timestamper(mii_ts);
 		return PTR_ERR(phy);
 	}
 
 	rc = of_mdiobus_phy_device_register(mdio, phy, child, addr);
 	if (rc) {
-		if (mii_ts)
-			unregister_mii_timestamper(mii_ts);
+		unregister_mii_timestamper(mii_ts);
 		phy_device_free(phy);
 		return rc;
 	}
diff --git a/drivers/net/phy/mii_timestamper.c b/drivers/net/phy/mii_timestamper.c
index b71b7456462d..51ae0593a04f 100644
--- a/drivers/net/phy/mii_timestamper.c
+++ b/drivers/net/phy/mii_timestamper.c
@@ -111,6 +111,9 @@ void unregister_mii_timestamper(struct mii_timestamper *mii_ts)
 	struct mii_timestamping_desc *desc;
 	struct list_head *this;
 
+	if (!mii_ts)
+		return;
+
 	/* mii_timestamper statically registered by the PHY driver won't use the
 	 * register_mii_timestamper() and thus don't have ->device set. Don't
 	 * try to unregister these.
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index f7472a0cf771..85734309b580 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -957,8 +957,7 @@ EXPORT_SYMBOL(phy_device_register);
  */
 void phy_device_remove(struct phy_device *phydev)
 {
-	if (phydev->mii_ts)
-		unregister_mii_timestamper(phydev->mii_ts);
+	unregister_mii_timestamper(phydev->mii_ts);
 
 	device_del(&phydev->mdio.dev);
 

From bc1bee3b87ee48bd97ef7fd306445132ba2041b0 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:54 +0300
Subject: [PATCH 1507/3292] net: mdiobus: Introduce
 fwnode_mdiobus_register_phy()

Introduce fwnode_mdiobus_register_phy() to register PHYs on the
mdiobus. From the compatible string, identify whether the PHY is
c45 and based on this create a PHY device instance which is
registered on the mdiobus.

Along with fwnode_mdiobus_register_phy() also introduce
fwnode_find_mii_timestamper() and fwnode_mdiobus_phy_device_register()
since they are needed.
While at it, also use the newly introduced fwnode operation in
of_mdiobus_phy_device_register().

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                    |   1 +
 drivers/net/mdio/Kconfig       |   7 ++
 drivers/net/mdio/Makefile      |   3 +-
 drivers/net/mdio/fwnode_mdio.c | 144 +++++++++++++++++++++++++++++++++
 drivers/net/mdio/of_mdio.c     |  44 ++--------
 include/linux/fwnode_mdio.h    |  35 ++++++++
 6 files changed, 194 insertions(+), 40 deletions(-)
 create mode 100644 drivers/net/mdio/fwnode_mdio.c
 create mode 100644 include/linux/fwnode_mdio.h

diff --git a/MAINTAINERS b/MAINTAINERS
index e69c1991ec3b..e8f8b6c33a51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6811,6 +6811,7 @@ F:	Documentation/devicetree/bindings/net/mdio*
 F:	Documentation/devicetree/bindings/net/qca,ar803x.yaml
 F:	Documentation/networking/phy.rst
 F:	drivers/net/mdio/
+F:	drivers/net/mdio/fwnode_mdio.c
 F:	drivers/net/mdio/of_mdio.c
 F:	drivers/net/pcs/
 F:	drivers/net/phy/
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index d06e06f5e31a..422e9e042a3c 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -19,6 +19,13 @@ config MDIO_BUS
 	  reflects whether the mdio_bus/mdio_device code is built as a
 	  loadable module or built-in.
 
+config FWNODE_MDIO
+	def_tristate PHYLIB
+	depends on (ACPI || OF) || COMPILE_TEST
+	select FIXED_PHY
+	help
+	  FWNODE MDIO bus (Ethernet PHY) accessors
+
 config OF_MDIO
 	def_tristate PHYLIB
 	depends on OF
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
index c3ec0ef989df..2e6813c709eb 100644
--- a/drivers/net/mdio/Makefile
+++ b/drivers/net/mdio/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Linux MDIO bus drivers
 
-obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
+obj-$(CONFIG_FWNODE_MDIO)	+= fwnode_mdio.o
+obj-$(CONFIG_OF_MDIO)		+= of_mdio.o
 
 obj-$(CONFIG_MDIO_ASPEED)		+= mdio-aspeed.o
 obj-$(CONFIG_MDIO_BCM_IPROC)		+= mdio-bcm-iproc.o
diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
new file mode 100644
index 000000000000..e96766da8de4
--- /dev/null
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * fwnode helpers for the MDIO (Ethernet PHY) API
+ *
+ * This file provides helper functions for extracting PHY device information
+ * out of the fwnode and using it to populate an mii_bus.
+ */
+
+#include <linux/acpi.h>
+#include <linux/fwnode_mdio.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+
+MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>");
+MODULE_LICENSE("GPL");
+
+static struct mii_timestamper *
+fwnode_find_mii_timestamper(struct fwnode_handle *fwnode)
+{
+	struct of_phandle_args arg;
+	int err;
+
+	if (is_acpi_node(fwnode))
+		return NULL;
+
+	err = of_parse_phandle_with_fixed_args(to_of_node(fwnode),
+					       "timestamper", 1, 0, &arg);
+	if (err == -ENOENT)
+		return NULL;
+	else if (err)
+		return ERR_PTR(err);
+
+	if (arg.args_count != 1)
+		return ERR_PTR(-EINVAL);
+
+	return register_mii_timestamper(arg.np, arg.args[0]);
+}
+
+int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
+				       struct phy_device *phy,
+				       struct fwnode_handle *child, u32 addr)
+{
+	int rc;
+
+	rc = fwnode_irq_get(child, 0);
+	if (rc == -EPROBE_DEFER)
+		return rc;
+
+	if (rc > 0) {
+		phy->irq = rc;
+		mdio->irq[addr] = rc;
+	} else {
+		phy->irq = mdio->irq[addr];
+	}
+
+	if (fwnode_property_read_bool(child, "broken-turn-around"))
+		mdio->phy_ignore_ta_mask |= 1 << addr;
+
+	fwnode_property_read_u32(child, "reset-assert-us",
+				 &phy->mdio.reset_assert_delay);
+	fwnode_property_read_u32(child, "reset-deassert-us",
+				 &phy->mdio.reset_deassert_delay);
+
+	/* Associate the fwnode with the device structure so it
+	 * can be looked up later
+	 */
+	fwnode_handle_get(child);
+	phy->mdio.dev.fwnode = child;
+
+	/* All data is now stored in the phy struct;
+	 * register it
+	 */
+	rc = phy_device_register(phy);
+	if (rc) {
+		fwnode_handle_put(child);
+		return rc;
+	}
+
+	dev_dbg(&mdio->dev, "registered phy %p fwnode at address %i\n",
+		child, addr);
+	return 0;
+}
+EXPORT_SYMBOL(fwnode_mdiobus_phy_device_register);
+
+int fwnode_mdiobus_register_phy(struct mii_bus *bus,
+				struct fwnode_handle *child, u32 addr)
+{
+	struct mii_timestamper *mii_ts = NULL;
+	struct phy_device *phy;
+	bool is_c45 = false;
+	u32 phy_id;
+	int rc;
+
+	mii_ts = fwnode_find_mii_timestamper(child);
+	if (IS_ERR(mii_ts))
+		return PTR_ERR(mii_ts);
+
+	rc = fwnode_property_match_string(child, "compatible",
+					  "ethernet-phy-ieee802.3-c45");
+	if (rc >= 0)
+		is_c45 = true;
+
+	if (is_c45 || fwnode_get_phy_id(child, &phy_id))
+		phy = get_phy_device(bus, addr, is_c45);
+	else
+		phy = phy_device_create(bus, addr, phy_id, 0, NULL);
+	if (IS_ERR(phy)) {
+		unregister_mii_timestamper(mii_ts);
+		return PTR_ERR(phy);
+	}
+
+	if (is_acpi_node(child)) {
+		phy->irq = bus->irq[addr];
+
+		/* Associate the fwnode with the device structure so it
+		 * can be looked up later.
+		 */
+		phy->mdio.dev.fwnode = child;
+
+		/* All data is now stored in the phy struct, so register it */
+		rc = phy_device_register(phy);
+		if (rc) {
+			phy_device_free(phy);
+			fwnode_handle_put(phy->mdio.dev.fwnode);
+			return rc;
+		}
+	} else if (is_of_node(child)) {
+		rc = fwnode_mdiobus_phy_device_register(bus, phy, child, addr);
+		if (rc) {
+			unregister_mii_timestamper(mii_ts);
+			phy_device_free(phy);
+			return rc;
+		}
+	}
+
+	/* phy->mii_ts may already be defined by the PHY driver. A
+	 * mii_timestamper probed via the device tree will still have
+	 * precedence.
+	 */
+	if (mii_ts)
+		phy->mii_ts = mii_ts;
+	return 0;
+}
+EXPORT_SYMBOL(fwnode_mdiobus_register_phy);
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index d73c0570f19c..17327bbc1de4 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -10,6 +10,7 @@
 
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/fwnode_mdio.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
@@ -51,46 +52,11 @@ static struct mii_timestamper *of_find_mii_timestamper(struct device_node *node)
 }
 
 int of_mdiobus_phy_device_register(struct mii_bus *mdio, struct phy_device *phy,
-			      struct device_node *child, u32 addr)
+				   struct device_node *child, u32 addr)
 {
-	int rc;
-
-	rc = of_irq_get(child, 0);
-	if (rc == -EPROBE_DEFER)
-		return rc;
-
-	if (rc > 0) {
-		phy->irq = rc;
-		mdio->irq[addr] = rc;
-	} else {
-		phy->irq = mdio->irq[addr];
-	}
-
-	if (of_property_read_bool(child, "broken-turn-around"))
-		mdio->phy_ignore_ta_mask |= 1 << addr;
-
-	of_property_read_u32(child, "reset-assert-us",
-			     &phy->mdio.reset_assert_delay);
-	of_property_read_u32(child, "reset-deassert-us",
-			     &phy->mdio.reset_deassert_delay);
-
-	/* Associate the OF node with the device structure so it
-	 * can be looked up later */
-	of_node_get(child);
-	phy->mdio.dev.of_node = child;
-	phy->mdio.dev.fwnode = of_fwnode_handle(child);
-
-	/* All data is now stored in the phy struct;
-	 * register it */
-	rc = phy_device_register(phy);
-	if (rc) {
-		of_node_put(child);
-		return rc;
-	}
-
-	dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
-		child, addr);
-	return 0;
+	return fwnode_mdiobus_phy_device_register(mdio, phy,
+						  of_fwnode_handle(child),
+						  addr);
 }
 EXPORT_SYMBOL(of_mdiobus_phy_device_register);
 
diff --git a/include/linux/fwnode_mdio.h b/include/linux/fwnode_mdio.h
new file mode 100644
index 000000000000..faf603c48c86
--- /dev/null
+++ b/include/linux/fwnode_mdio.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * FWNODE helper for the MDIO (Ethernet PHY) API
+ */
+
+#ifndef __LINUX_FWNODE_MDIO_H
+#define __LINUX_FWNODE_MDIO_H
+
+#include <linux/phy.h>
+
+#if IS_ENABLED(CONFIG_FWNODE_MDIO)
+int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
+				       struct phy_device *phy,
+				       struct fwnode_handle *child, u32 addr);
+
+int fwnode_mdiobus_register_phy(struct mii_bus *bus,
+				struct fwnode_handle *child, u32 addr);
+
+#else /* CONFIG_FWNODE_MDIO */
+int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
+				       struct phy_device *phy,
+				       struct fwnode_handle *child, u32 addr)
+{
+	return -EINVAL;
+}
+
+static inline int fwnode_mdiobus_register_phy(struct mii_bus *bus,
+					      struct fwnode_handle *child,
+					      u32 addr)
+{
+	return -EINVAL;
+}
+#endif
+
+#endif /* __LINUX_FWNODE_MDIO_H */

From 8d2cb3ad31181f050af4d46d6854cf332d1207a9 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:55 +0300
Subject: [PATCH 1508/3292] of: mdio: Refactor of_mdiobus_register_phy()

Refactor of_mdiobus_register_phy() to use fwnode_mdiobus_register_phy().
Also, remove the of_find_mii_timestamper() since the fwnode variant is
used instead.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/of_mdio.c | 56 +-------------------------------------
 1 file changed, 1 insertion(+), 55 deletions(-)

diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 17327bbc1de4..8744b1e1c2b1 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -33,24 +33,6 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id)
 	return fwnode_get_phy_id(of_fwnode_handle(device), phy_id);
 }
 
-static struct mii_timestamper *of_find_mii_timestamper(struct device_node *node)
-{
-	struct of_phandle_args arg;
-	int err;
-
-	err = of_parse_phandle_with_fixed_args(node, "timestamper", 1, 0, &arg);
-
-	if (err == -ENOENT)
-		return NULL;
-	else if (err)
-		return ERR_PTR(err);
-
-	if (arg.args_count != 1)
-		return ERR_PTR(-EINVAL);
-
-	return register_mii_timestamper(arg.np, arg.args[0]);
-}
-
 int of_mdiobus_phy_device_register(struct mii_bus *mdio, struct phy_device *phy,
 				   struct device_node *child, u32 addr)
 {
@@ -63,43 +45,7 @@ EXPORT_SYMBOL(of_mdiobus_phy_device_register);
 static int of_mdiobus_register_phy(struct mii_bus *mdio,
 				    struct device_node *child, u32 addr)
 {
-	struct mii_timestamper *mii_ts;
-	struct phy_device *phy;
-	bool is_c45;
-	int rc;
-	u32 phy_id;
-
-	mii_ts = of_find_mii_timestamper(child);
-	if (IS_ERR(mii_ts))
-		return PTR_ERR(mii_ts);
-
-	is_c45 = of_device_is_compatible(child,
-					 "ethernet-phy-ieee802.3-c45");
-
-	if (!is_c45 && !of_get_phy_id(child, &phy_id))
-		phy = phy_device_create(mdio, addr, phy_id, 0, NULL);
-	else
-		phy = get_phy_device(mdio, addr, is_c45);
-	if (IS_ERR(phy)) {
-		unregister_mii_timestamper(mii_ts);
-		return PTR_ERR(phy);
-	}
-
-	rc = of_mdiobus_phy_device_register(mdio, phy, child, addr);
-	if (rc) {
-		unregister_mii_timestamper(mii_ts);
-		phy_device_free(phy);
-		return rc;
-	}
-
-	/* phy->mii_ts may already be defined by the PHY driver. A
-	 * mii_timestamper probed via the device tree will still have
-	 * precedence.
-	 */
-	if (mii_ts)
-		phy->mii_ts = mii_ts;
-
-	return 0;
+	return fwnode_mdiobus_register_phy(mdio, of_fwnode_handle(child), addr);
 }
 
 static int of_mdiobus_register_device(struct mii_bus *mdio,

From 7ec16433cf1e97cfc823e50e9ee4e2fd3abfc4ee Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:56 +0300
Subject: [PATCH 1509/3292] ACPI: utils: Introduce acpi_get_local_address()

Introduce a wrapper around the _ADR evaluation.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/utils.c | 14 ++++++++++++++
 include/linux/acpi.h |  7 +++++++
 2 files changed, 21 insertions(+)

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 3b54b8fd7396..e7ddd281afff 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -277,6 +277,20 @@ acpi_evaluate_integer(acpi_handle handle,
 
 EXPORT_SYMBOL(acpi_evaluate_integer);
 
+int acpi_get_local_address(acpi_handle handle, u32 *addr)
+{
+	unsigned long long adr;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
+	if (ACPI_FAILURE(status))
+		return -ENODATA;
+
+	*addr = (u32)adr;
+	return 0;
+}
+EXPORT_SYMBOL(acpi_get_local_address);
+
 acpi_status
 acpi_evaluate_reference(acpi_handle handle,
 			acpi_string pathname,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c60745f657e9..6ace3a0f1415 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -710,6 +710,8 @@ static inline u64 acpi_arch_get_root_pointer(void)
 }
 #endif
 
+int acpi_get_local_address(acpi_handle handle, u32 *addr);
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -965,6 +967,11 @@ static inline struct acpi_device *acpi_resource_consumer(struct resource *res)
 	return NULL;
 }
 
+static inline int acpi_get_local_address(acpi_handle handle, u32 *addr)
+{
+	return -ENODEV;
+}
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC

From 803ca24d2f92e2cf393df4705423f7b09a5eabd9 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:57 +0300
Subject: [PATCH 1510/3292] net: mdio: Add ACPI support code for mdio

Define acpi_mdiobus_register() to Register mii_bus and create PHYs for
each ACPI child node.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                  |  1 +
 drivers/net/mdio/Kconfig     |  7 +++++
 drivers/net/mdio/Makefile    |  1 +
 drivers/net/mdio/acpi_mdio.c | 58 ++++++++++++++++++++++++++++++++++++
 include/linux/acpi_mdio.h    | 26 ++++++++++++++++
 5 files changed, 93 insertions(+)
 create mode 100644 drivers/net/mdio/acpi_mdio.c
 create mode 100644 include/linux/acpi_mdio.h

diff --git a/MAINTAINERS b/MAINTAINERS
index e8f8b6c33a51..2172f594be8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6811,6 +6811,7 @@ F:	Documentation/devicetree/bindings/net/mdio*
 F:	Documentation/devicetree/bindings/net/qca,ar803x.yaml
 F:	Documentation/networking/phy.rst
 F:	drivers/net/mdio/
+F:	drivers/net/mdio/acpi_mdio.c
 F:	drivers/net/mdio/fwnode_mdio.c
 F:	drivers/net/mdio/of_mdio.c
 F:	drivers/net/pcs/
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index 422e9e042a3c..99a6c13a11af 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -34,6 +34,13 @@ config OF_MDIO
 	help
 	  OpenFirmware MDIO bus (Ethernet PHY) accessors
 
+config ACPI_MDIO
+	def_tristate PHYLIB
+	depends on ACPI
+	depends on PHYLIB
+	help
+	  ACPI MDIO bus (Ethernet PHY) accessors
+
 if MDIO_BUS
 
 config MDIO_DEVRES
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
index 2e6813c709eb..15f8dc4042ce 100644
--- a/drivers/net/mdio/Makefile
+++ b/drivers/net/mdio/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Linux MDIO bus drivers
 
+obj-$(CONFIG_ACPI_MDIO)		+= acpi_mdio.o
 obj-$(CONFIG_FWNODE_MDIO)	+= fwnode_mdio.o
 obj-$(CONFIG_OF_MDIO)		+= of_mdio.o
 
diff --git a/drivers/net/mdio/acpi_mdio.c b/drivers/net/mdio/acpi_mdio.c
new file mode 100644
index 000000000000..d77c987fda9c
--- /dev/null
+++ b/drivers/net/mdio/acpi_mdio.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ACPI helpers for the MDIO (Ethernet PHY) API
+ *
+ * This file provides helper functions for extracting PHY device information
+ * out of the ACPI ASL and using it to populate an mii_bus.
+ */
+
+#include <linux/acpi.h>
+#include <linux/acpi_mdio.h>
+#include <linux/bits.h>
+#include <linux/dev_printk.h>
+#include <linux/fwnode_mdio.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>");
+MODULE_LICENSE("GPL");
+
+/**
+ * acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL.
+ * @mdio: pointer to mii_bus structure
+ * @fwnode: pointer to fwnode of MDIO bus. This fwnode is expected to represent
+ * an ACPI device object corresponding to the MDIO bus and its children are
+ * expected to correspond to the PHY devices on that bus.
+ *
+ * This function registers the mii_bus structure and registers a phy_device
+ * for each child node of @fwnode.
+ */
+int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *child;
+	u32 addr;
+	int ret;
+
+	/* Mask out all PHYs from auto probing. */
+	mdio->phy_mask = GENMASK(31, 0);
+	ret = mdiobus_register(mdio);
+	if (ret)
+		return ret;
+
+	ACPI_COMPANION_SET(&mdio->dev, to_acpi_device_node(fwnode));
+
+	/* Loop over the child nodes and register a phy_device for each PHY */
+	fwnode_for_each_child_node(fwnode, child) {
+		ret = acpi_get_local_address(ACPI_HANDLE_FWNODE(child), &addr);
+		if (ret || addr >= PHY_MAX_ADDR)
+			continue;
+
+		ret = fwnode_mdiobus_register_phy(mdio, child, addr);
+		if (ret == -ENODEV)
+			dev_err(&mdio->dev,
+				"MDIO device at address %d is missing.\n",
+				addr);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(acpi_mdiobus_register);
diff --git a/include/linux/acpi_mdio.h b/include/linux/acpi_mdio.h
new file mode 100644
index 000000000000..0a24ab7cb66f
--- /dev/null
+++ b/include/linux/acpi_mdio.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * ACPI helper for the MDIO (Ethernet PHY) API
+ */
+
+#ifndef __LINUX_ACPI_MDIO_H
+#define __LINUX_ACPI_MDIO_H
+
+#include <linux/phy.h>
+
+#if IS_ENABLED(CONFIG_ACPI_MDIO)
+int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode);
+#else /* CONFIG_ACPI_MDIO */
+static inline int
+acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode)
+{
+	/*
+	 * Fall back to mdiobus_register() function to register a bus.
+	 * This way, we don't have to keep compat bits around in drivers.
+	 */
+
+	return mdiobus_register(mdio);
+}
+#endif
+
+#endif /* __LINUX_ACPI_MDIO_H */

From 15e7064e8793352a44f65f3c18a4d84a625d95c2 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:58 +0300
Subject: [PATCH 1511/3292] net/fsl: Use [acpi|of]_mdiobus_register

Depending on the device node type, call the specific OF or ACPI
mdiobus_register function.

Note: For both ACPI and DT cases, endianness of MDIO controllers
need to be specified using the "little-endian" property.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/xgmac_mdio.c | 30 ++++++++++++++-------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index bfa2826c5545..0b68852379da 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -2,6 +2,7 @@
  * QorIQ 10G MDIO Controller
  *
  * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2021 NXP
  *
  * Authors: Andy Fleming <afleming@freescale.com>
  *          Timur Tabi <timur@freescale.com>
@@ -11,15 +12,17 @@
  * kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/acpi_mdio.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/phy.h>
+#include <linux/kernel.h>
 #include <linux/mdio.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/of_mdio.h>
+#include <linux/of_platform.h>
+#include <linux/phy.h>
+#include <linux/slab.h>
 
 /* Number of microseconds to wait for a register to respond */
 #define TIMEOUT	1000
@@ -243,10 +246,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 
 static int xgmac_mdio_probe(struct platform_device *pdev)
 {
-	struct device_node *np = pdev->dev.of_node;
-	struct mii_bus *bus;
-	struct resource *res;
+	struct fwnode_handle *fwnode;
 	struct mdio_fsl_priv *priv;
+	struct resource *res;
+	struct mii_bus *bus;
 	int ret;
 
 	/* In DPAA-1, MDIO is one of the many FMan sub-devices. The FMan
@@ -279,13 +282,22 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 		goto err_ioremap;
 	}
 
+	/* For both ACPI and DT cases, endianness of MDIO controller
+	 * needs to be specified using "little-endian" property.
+	 */
 	priv->is_little_endian = device_property_read_bool(&pdev->dev,
 							   "little-endian");
 
 	priv->has_a011043 = device_property_read_bool(&pdev->dev,
 						      "fsl,erratum-a011043");
 
-	ret = of_mdiobus_register(bus, np);
+	fwnode = pdev->dev.fwnode;
+	if (is_of_node(fwnode))
+		ret = of_mdiobus_register(bus, to_of_node(fwnode));
+	else if (is_acpi_node(fwnode))
+		ret = acpi_mdiobus_register(bus, fwnode);
+	else
+		ret = -EINVAL;
 	if (ret) {
 		dev_err(&pdev->dev, "cannot register MDIO bus\n");
 		goto err_registration;

From 25396f680dd6257096c5dc6ceb90ce57caba8de1 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:53:59 +0300
Subject: [PATCH 1512/3292] net: phylink: introduce
 phylink_fwnode_phy_connect()

Define phylink_fwnode_phy_connect() to connect phy specified by
a fwnode to a phylink instance.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 54 +++++++++++++++++++++++++++++++++++++++
 include/linux/phylink.h   |  3 +++
 2 files changed, 57 insertions(+)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 96d8e88b4e46..9cc0f69faafe 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -5,6 +5,7 @@
  *
  * Copyright (C) 2015 Russell King
  */
+#include <linux/acpi.h>
 #include <linux/ethtool.h>
 #include <linux/export.h>
 #include <linux/gpio/consumer.h>
@@ -1125,6 +1126,59 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn,
 }
 EXPORT_SYMBOL_GPL(phylink_of_phy_connect);
 
+/**
+ * phylink_fwnode_phy_connect() - connect the PHY specified in the fwnode.
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @fwnode: a pointer to a &struct fwnode_handle.
+ * @flags: PHY-specific flags to communicate to the PHY device driver
+ *
+ * Connect the phy specified @fwnode to the phylink instance specified
+ * by @pl.
+ *
+ * Returns 0 on success or a negative errno.
+ */
+int phylink_fwnode_phy_connect(struct phylink *pl,
+			       struct fwnode_handle *fwnode,
+			       u32 flags)
+{
+	struct fwnode_handle *phy_fwnode;
+	struct phy_device *phy_dev;
+	int ret;
+
+	/* Fixed links and 802.3z are handled without needing a PHY */
+	if (pl->cfg_link_an_mode == MLO_AN_FIXED ||
+	    (pl->cfg_link_an_mode == MLO_AN_INBAND &&
+	     phy_interface_mode_is_8023z(pl->link_interface)))
+		return 0;
+
+	phy_fwnode = fwnode_get_phy_node(fwnode);
+	if (IS_ERR(phy_fwnode)) {
+		if (pl->cfg_link_an_mode == MLO_AN_PHY)
+			return -ENODEV;
+		return 0;
+	}
+
+	phy_dev = fwnode_phy_find_device(phy_fwnode);
+	/* We're done with the phy_node handle */
+	fwnode_handle_put(phy_fwnode);
+	if (!phy_dev)
+		return -ENODEV;
+
+	ret = phy_attach_direct(pl->netdev, phy_dev, flags,
+				pl->link_interface);
+	if (ret) {
+		phy_device_free(phy_dev);
+		return ret;
+	}
+
+	ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface);
+	if (ret)
+		phy_detach(phy_dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_fwnode_phy_connect);
+
 /**
  * phylink_disconnect_phy() - disconnect any PHY attached to the phylink
  *   instance.
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index fd2acfd9b597..afb3ded0b691 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -441,6 +441,9 @@ void phylink_destroy(struct phylink *);
 
 int phylink_connect_phy(struct phylink *, struct phy_device *);
 int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags);
+int phylink_fwnode_phy_connect(struct phylink *pl,
+			       struct fwnode_handle *fwnode,
+			       u32 flags);
 void phylink_disconnect_phy(struct phylink *);
 
 void phylink_mac_change(struct phylink *, bool up);

From 423e6e8946f5bb1f7ec3c0b562ab89becad82629 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:54:00 +0300
Subject: [PATCH 1513/3292] net: phylink: Refactor phylink_of_phy_connect()

Refactor phylink_of_phy_connect() to use phylink_fwnode_phy_connect().

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Grant Likely <grant.likely@arm.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 39 +--------------------------------------
 1 file changed, 1 insertion(+), 38 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 9cc0f69faafe..bb9eeb74f70a 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1085,44 +1085,7 @@ EXPORT_SYMBOL_GPL(phylink_connect_phy);
 int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn,
 			   u32 flags)
 {
-	struct device_node *phy_node;
-	struct phy_device *phy_dev;
-	int ret;
-
-	/* Fixed links and 802.3z are handled without needing a PHY */
-	if (pl->cfg_link_an_mode == MLO_AN_FIXED ||
-	    (pl->cfg_link_an_mode == MLO_AN_INBAND &&
-	     phy_interface_mode_is_8023z(pl->link_interface)))
-		return 0;
-
-	phy_node = of_parse_phandle(dn, "phy-handle", 0);
-	if (!phy_node)
-		phy_node = of_parse_phandle(dn, "phy", 0);
-	if (!phy_node)
-		phy_node = of_parse_phandle(dn, "phy-device", 0);
-
-	if (!phy_node) {
-		if (pl->cfg_link_an_mode == MLO_AN_PHY)
-			return -ENODEV;
-		return 0;
-	}
-
-	phy_dev = of_phy_find_device(phy_node);
-	/* We're done with the phy_node handle */
-	of_node_put(phy_node);
-	if (!phy_dev)
-		return -ENODEV;
-
-	ret = phy_attach_direct(pl->netdev, phy_dev, flags,
-				pl->link_interface);
-	if (ret)
-		return ret;
-
-	ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface);
-	if (ret)
-		phy_detach(phy_dev);
-
-	return ret;
+	return phylink_fwnode_phy_connect(pl, of_fwnode_handle(dn), flags);
 }
 EXPORT_SYMBOL_GPL(phylink_of_phy_connect);
 

From 3264f599c1a83a08a172031a647ca5c1f30411b3 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Fri, 11 Jun 2021 13:54:01 +0300
Subject: [PATCH 1514/3292] net: dpaa2-mac: Add ACPI support for DPAA2 MAC
 driver

Modify dpaa2_mac_get_node() to get the dpmac fwnode from either
DT or ACPI.

Modify dpaa2_mac_get_if_mode() to get interface mode from dpmac_node
which is a fwnode.

Modify dpaa2_pcs_create() to create pcs from dpmac_node fwnode.

Modify dpaa2_mac_connect() to support ACPI along with DT.

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org> # from the ACPI side
Acked-by: Grant Likely <grant.likely@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-mac.c  | 92 +++++++++++--------
 .../net/ethernet/freescale/dpaa2/dpaa2-mac.h  |  2 +-
 2 files changed, 55 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index 4dfadf2b70d6..ae6d382d8735 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -1,6 +1,9 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2019 NXP */
 
+#include <linux/acpi.h>
+#include <linux/property.h>
+
 #include "dpaa2-eth.h"
 #include "dpaa2-mac.h"
 
@@ -34,39 +37,51 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode)
 	return 0;
 }
 
-/* Caller must call of_node_put on the returned value */
-static struct device_node *dpaa2_mac_get_node(u16 dpmac_id)
+static struct fwnode_handle *dpaa2_mac_get_node(struct device *dev,
+						u16 dpmac_id)
 {
-	struct device_node *dpmacs, *dpmac = NULL;
-	u32 id;
+	struct fwnode_handle *fwnode, *parent, *child  = NULL;
+	struct device_node *dpmacs = NULL;
 	int err;
+	u32 id;
 
-	dpmacs = of_find_node_by_name(NULL, "dpmacs");
-	if (!dpmacs)
-		return NULL;
-
-	while ((dpmac = of_get_next_child(dpmacs, dpmac)) != NULL) {
-		err = of_property_read_u32(dpmac, "reg", &id);
-		if (err)
-			continue;
-		if (id == dpmac_id)
-			break;
+	fwnode = dev_fwnode(dev->parent);
+	if (is_of_node(fwnode)) {
+		dpmacs = of_find_node_by_name(NULL, "dpmacs");
+		if (!dpmacs)
+			return NULL;
+		parent = of_fwnode_handle(dpmacs);
+	} else if (is_acpi_node(fwnode)) {
+		parent = fwnode;
 	}
 
-	of_node_put(dpmacs);
+	fwnode_for_each_child_node(parent, child) {
+		err = -EINVAL;
+		if (is_acpi_device_node(child))
+			err = acpi_get_local_address(ACPI_HANDLE_FWNODE(child), &id);
+		else if (is_of_node(child))
+			err = of_property_read_u32(to_of_node(child), "reg", &id);
+		if (err)
+			continue;
 
-	return dpmac;
+		if (id == dpmac_id) {
+			of_node_put(dpmacs);
+			return child;
+		}
+	}
+	of_node_put(dpmacs);
+	return NULL;
 }
 
-static int dpaa2_mac_get_if_mode(struct device_node *node,
+static int dpaa2_mac_get_if_mode(struct fwnode_handle *dpmac_node,
 				 struct dpmac_attr attr)
 {
 	phy_interface_t if_mode;
 	int err;
 
-	err = of_get_phy_mode(node, &if_mode);
-	if (!err)
-		return if_mode;
+	err = fwnode_get_phy_mode(dpmac_node);
+	if (err > 0)
+		return err;
 
 	err = phy_mode(attr.eth_if, &if_mode);
 	if (!err)
@@ -235,26 +250,27 @@ static const struct phylink_mac_ops dpaa2_mac_phylink_ops = {
 };
 
 static int dpaa2_pcs_create(struct dpaa2_mac *mac,
-			    struct device_node *dpmac_node, int id)
+			    struct fwnode_handle *dpmac_node,
+			    int id)
 {
 	struct mdio_device *mdiodev;
-	struct device_node *node;
+	struct fwnode_handle *node;
 
-	node = of_parse_phandle(dpmac_node, "pcs-handle", 0);
-	if (!node) {
+	node = fwnode_find_reference(dpmac_node, "pcs-handle", 0);
+	if (IS_ERR(node)) {
 		/* do not error out on old DTS files */
 		netdev_warn(mac->net_dev, "pcs-handle node not found\n");
 		return 0;
 	}
 
-	if (!of_device_is_available(node)) {
+	if (!fwnode_device_is_available(node)) {
 		netdev_err(mac->net_dev, "pcs-handle node not available\n");
-		of_node_put(node);
+		fwnode_handle_put(node);
 		return -ENODEV;
 	}
 
-	mdiodev = of_mdio_find_device(node);
-	of_node_put(node);
+	mdiodev = fwnode_mdio_find_device(node);
+	fwnode_handle_put(node);
 	if (!mdiodev)
 		return -EPROBE_DEFER;
 
@@ -283,13 +299,13 @@ static void dpaa2_pcs_destroy(struct dpaa2_mac *mac)
 int dpaa2_mac_connect(struct dpaa2_mac *mac)
 {
 	struct net_device *net_dev = mac->net_dev;
-	struct device_node *dpmac_node;
+	struct fwnode_handle *dpmac_node;
 	struct phylink *phylink;
 	int err;
 
 	mac->if_link_type = mac->attr.link_type;
 
-	dpmac_node = mac->of_node;
+	dpmac_node = mac->fw_node;
 	if (!dpmac_node) {
 		netdev_err(net_dev, "No dpmac@%d node found.\n", mac->attr.id);
 		return -ENODEV;
@@ -304,7 +320,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 	 * error out if the interface mode requests them and there is no PHY
 	 * to act upon them
 	 */
-	if (of_phy_is_fixed_link(dpmac_node) &&
+	if (of_phy_is_fixed_link(to_of_node(dpmac_node)) &&
 	    (mac->if_mode == PHY_INTERFACE_MODE_RGMII_ID ||
 	     mac->if_mode == PHY_INTERFACE_MODE_RGMII_RXID ||
 	     mac->if_mode == PHY_INTERFACE_MODE_RGMII_TXID)) {
@@ -324,7 +340,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 	mac->phylink_config.type = PHYLINK_NETDEV;
 
 	phylink = phylink_create(&mac->phylink_config,
-				 of_fwnode_handle(dpmac_node), mac->if_mode,
+				 dpmac_node, mac->if_mode,
 				 &dpaa2_mac_phylink_ops);
 	if (IS_ERR(phylink)) {
 		err = PTR_ERR(phylink);
@@ -335,9 +351,9 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
 	if (mac->pcs)
 		phylink_set_pcs(mac->phylink, &mac->pcs->pcs);
 
-	err = phylink_of_phy_connect(mac->phylink, dpmac_node, 0);
+	err = phylink_fwnode_phy_connect(mac->phylink, dpmac_node, 0);
 	if (err) {
-		netdev_err(net_dev, "phylink_of_phy_connect() = %d\n", err);
+		netdev_err(net_dev, "phylink_fwnode_phy_connect() = %d\n", err);
 		goto err_phylink_destroy;
 	}
 
@@ -384,8 +400,8 @@ int dpaa2_mac_open(struct dpaa2_mac *mac)
 	/* Find the device node representing the MAC device and link the device
 	 * behind the associated netdev to it.
 	 */
-	mac->of_node = dpaa2_mac_get_node(mac->attr.id);
-	net_dev->dev.of_node = mac->of_node;
+	mac->fw_node = dpaa2_mac_get_node(&mac->mc_dev->dev, mac->attr.id);
+	net_dev->dev.of_node = to_of_node(mac->fw_node);
 
 	return 0;
 
@@ -399,8 +415,8 @@ void dpaa2_mac_close(struct dpaa2_mac *mac)
 	struct fsl_mc_device *dpmac_dev = mac->mc_dev;
 
 	dpmac_close(mac->mc_io, 0, dpmac_dev->mc_handle);
-	if (mac->of_node)
-		of_node_put(mac->of_node);
+	if (mac->fw_node)
+		fwnode_handle_put(mac->fw_node);
 }
 
 static char dpaa2_mac_ethtool_stats[][ETH_GSTRING_LEN] = {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
index 8ebcb3420d02..7842cbb2207a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
@@ -24,7 +24,7 @@ struct dpaa2_mac {
 	phy_interface_t if_mode;
 	enum dpmac_link_type if_link_type;
 	struct lynx_pcs *pcs;
-	struct device_node *of_node;
+	struct fwnode_handle *fw_node;
 };
 
 bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,

From 8ee1a0eed16a221c7078848ac165d4d57dad8784 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Jun 2021 16:04:09 +0800
Subject: [PATCH 1515/3292] net: mdio: mscc-miim: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-mscc-miim.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index b36e5ea04ddf..2d67e12c8262 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -139,10 +139,6 @@ static int mscc_miim_probe(struct platform_device *pdev)
 	struct mscc_miim_dev *dev;
 	int ret;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
 	bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*dev));
 	if (!bus)
 		return -ENOMEM;
@@ -155,7 +151,7 @@ static int mscc_miim_probe(struct platform_device *pdev)
 	bus->parent = &pdev->dev;
 
 	dev = bus->priv;
-	dev->regs = devm_ioremap_resource(&pdev->dev, res);
+	dev->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(dev->regs)) {
 		dev_err(&pdev->dev, "Unable to map MIIM registers\n");
 		return PTR_ERR(dev->regs);

From 52e597d3e2e6e5bfce47559eb22b955ac17b3826 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Fri, 11 Jun 2021 17:02:38 +0800
Subject: [PATCH 1516/3292] net: stmmac: Fix potential integer overflow

The commit d96febedfde2 ("net: stmmac: arrange Tx tail pointer update
to stmmac_flush_tx_descriptors") introduced the following coverity
warning:-

  1. Unintentional integer overflow (OVERFLOW_BEFORE_WIDEN)
     overflow_before_widen: Potentially overflowing expression
     'tx_q->cur_tx * desc_size' with type 'unsigned int' (32 bits,
     unsigned) is evaluated using 32-bit arithmetic, and then used in a
     context that expects an expression of type dma_addr_t (64 bits,
     unsigned).

Fixed this by assigning tx_tail_addr to dma_addr_t type, as dma_addr_t
datatype is decided by CONFIG_ARCH_DMA_ADDR_T_64_BIT.

Fixes: d96febedfde2 ("net: stmmac: arrange Tx tail pointer update to stmmac_flush_tx_descriptors")
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index fd7212afc543..6655cb8e24cf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -75,7 +75,7 @@ struct stmmac_tx_queue {
 	unsigned int cur_tx;
 	unsigned int dirty_tx;
 	dma_addr_t dma_tx_phy;
-	u32 tx_tail_addr;
+	dma_addr_t tx_tail_addr;
 	u32 mss;
 };
 

From 967db3529ecac305d230aa4e60abddf6ab63543a Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Fri, 11 Jun 2021 15:12:02 +0530
Subject: [PATCH 1517/3292] octeontx2-af: add support for multicast/promisc
 packet replication feature

Currently, multicast packet filtering is accomplished by installing
MCAM rule that matches all-multicast MAC address and has its
NPC_RX_ACTION set to unicast to PF. Similarly promisc feature is
achieved by installing MCAM rule that matches all the traffic received
by the channel and unicast the packets to PF. This approach only applies
to PF and is not scalable across VFs.

This patch adds support for PF/VF multicast and promisc feature by
reserving NIX_RX_MCE_S entries from the global MCE list allocated
during NIX block initialization. The NIX_RX_MCE_S entries create a
linked list with a flag indicating the end of the list, and each entry
points to a PF_FUNC (either PF or VF). When a packet NPC_RX_ACTION is
set to MCAST, the corresponding NIX_RX_MCE_S list is traversed and the
packet is queued to each PF_FUNC available on the list.

The PF or VF driver adds the multicast/promisc packet match entry and
updates the MCE list with correspondng PF_FUNC. When a PF or VF interface
is disabled, the corresponding NIX_RX_MCE_S entry is removed from the
MCE list and the MCAM entry will be disabled if the list is empty.

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/af/common.h    |   5 +
 .../net/ethernet/marvell/octeontx2/af/mbox.h  |   5 +-
 .../net/ethernet/marvell/octeontx2/af/npc.h   |   3 +-
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |  49 ++-
 .../marvell/octeontx2/af/rvu_debugfs.c        |   5 +-
 .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 254 ++++++++++++---
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 308 ++++++++++++------
 .../marvell/octeontx2/af/rvu_npc_fs.c         |   7 +-
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |   4 +-
 9 files changed, 482 insertions(+), 158 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
index e66109367487..47f5ed006a93 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -197,6 +197,11 @@ enum nix_scheduler {
 
 #define SDP_CHANNELS			256
 
+/* The mask is to extract lower 10-bits of channel number
+ * which CPT will pass to X2P.
+ */
+#define NIX_CHAN_CPT_X2P_MASK          (0x3ffull)
+
 /* NIX LSO format indices.
  * As of now TSO is the only one using, so statically assigning indices.
  */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index cedb2616c509..ed0bc9d3d5dd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -611,7 +611,9 @@ enum nix_af_status {
 	NIX_AF_INVAL_SSO_PF_FUNC    = -420,
 	NIX_AF_ERR_TX_VTAG_NOSPC    = -421,
 	NIX_AF_ERR_RX_VTAG_INUSE    = -422,
-	NIX_AF_ERR_NPC_KEY_NOT_SUPP = -423,
+	NIX_AF_ERR_PTP_CONFIG_FAIL  = -423,
+	NIX_AF_ERR_NPC_KEY_NOT_SUPP = -424,
+	NIX_AF_ERR_INVALID_NIXBLK   = -425,
 };
 
 /* For NIX RX vtag action  */
@@ -913,6 +915,7 @@ struct nix_rx_mode {
 #define NIX_RX_MODE_UCAST	BIT(0)
 #define NIX_RX_MODE_PROMISC	BIT(1)
 #define NIX_RX_MODE_ALLMULTI	BIT(2)
+#define NIX_RX_MODE_USE_MCE	BIT(3)
 	u16	mode;
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index fe19704173a1..19bad9a59c8f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -438,7 +438,8 @@ struct nix_tx_action {
 /* NPC MCAM reserved entry index per nixlf */
 #define NIXLF_UCAST_ENTRY	0
 #define NIXLF_BCAST_ENTRY	1
-#define NIXLF_PROMISC_ENTRY	2
+#define NIXLF_ALLMULTI_ENTRY	2
+#define NIXLF_PROMISC_ENTRY	3
 
 struct npc_coalesced_kpu_prfl {
 #define NPC_SIGN	0x00666f727063706e
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 74ed929f101b..29bc9a6792d3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -227,9 +227,14 @@ struct rvu_pfvf {
 	u8		mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */
 	u8		default_mac[ETH_ALEN]; /* MAC address from FWdata */
 
-	/* Broadcast pkt replication info */
+	/* Broadcast/Multicast/Promisc pkt replication info */
 	u16			bcast_mce_idx;
+	u16			mcast_mce_idx;
+	u16			promisc_mce_idx;
 	struct nix_mce_list	bcast_mce_list;
+	struct nix_mce_list	mcast_mce_list;
+	struct nix_mce_list	promisc_mce_list;
+	bool			use_mce_list;
 
 	struct rvu_npc_mcam_rule *def_ucast_rule;
 
@@ -239,6 +244,11 @@ struct rvu_pfvf {
 	u8	nix_blkaddr; /* BLKADDR_NIX0/1 assigned to this PF */
 	u8	nix_rx_intf; /* NIX0_RX/NIX1_RX interface to NPC */
 	u8	nix_tx_intf; /* NIX0_TX/NIX1_TX interface to NPC */
+	unsigned long flags;
+};
+
+enum rvu_pfvf_flags {
+	NIXLF_INITIALIZED = 0,
 };
 
 struct nix_txsch {
@@ -548,11 +558,16 @@ static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan)
 /* Function Prototypes
  * RVU
  */
-static inline int is_afvf(u16 pcifunc)
+static inline bool is_afvf(u16 pcifunc)
 {
 	return !(pcifunc & ~RVU_PFVF_FUNC_MASK);
 }
 
+static inline bool is_vf(u16 pcifunc)
+{
+	return !!(pcifunc & RVU_PFVF_FUNC_MASK);
+}
+
 /* check if PF_FUNC is AF */
 static inline bool is_pffunc_af(u16 pcifunc)
 {
@@ -608,6 +623,12 @@ static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id)
 	*lmac_id = (map & 0xF);
 }
 
+static inline bool is_cgx_vf(struct rvu *rvu, u16 pcifunc)
+{
+	return ((pcifunc & RVU_PFVF_FUNC_MASK) &&
+		is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc)));
+}
+
 #define M(_name, _id, fn_name, req, rsp)				\
 int rvu_mbox_handler_ ## fn_name(struct rvu *, struct req *, struct rsp *);
 MBOX_MESSAGES
@@ -637,10 +658,16 @@ void rvu_nix_freemem(struct rvu *rvu);
 int rvu_get_nixlf_count(struct rvu *rvu);
 void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf);
 int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr);
-int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add);
+int nix_update_mce_list(struct rvu *rvu, u16 pcifunc,
+			struct nix_mce_list *mce_list,
+			int mce_idx, int mcam_index, bool add);
+void nix_get_mce_list(struct rvu *rvu, u16 pcifunc, int type,
+		      struct nix_mce_list **mce_list, int *mce_idx);
 struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr);
 int rvu_get_next_nix_blkaddr(struct rvu *rvu, int blkaddr);
 void rvu_nix_reset_mac(struct rvu_pfvf *pfvf, int pcifunc);
+int nix_get_struct_ptrs(struct rvu *rvu, u16 pcifunc,
+			struct nix_hw **nix_hw, int *blkaddr);
 
 /* NPC APIs */
 int rvu_npc_init(struct rvu *rvu);
@@ -651,13 +678,19 @@ int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool en);
 void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
 				 int nixlf, u64 chan, u8 *mac_addr);
 void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
-				   int nixlf, u64 chan, u8 chan_cnt,
-				   bool allmulti);
-void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
-void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
+				   int nixlf, u64 chan, u8 chan_cnt);
+void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+				  bool enable);
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 				       int nixlf, u64 chan);
-void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable);
+void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+				bool enable);
+void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+				    u64 chan);
+void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+				   bool enable);
+void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc,
+				  int nixlf, int type, bool enable);
 void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_free_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index 9bf8eaabf9ab..7103f8216ad1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2132,6 +2132,7 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused)
 	struct rvu *rvu = s->private;
 	struct npc_mcam *mcam;
 	int pf, vf = -1;
+	bool enabled;
 	int blkaddr;
 	u16 target;
 	u64 hits;
@@ -2173,7 +2174,9 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused)
 		}
 
 		rvu_dbg_npc_mcam_show_action(s, iter);
-		seq_printf(s, "\tenabled: %s\n", iter->enable ? "yes" : "no");
+
+		enabled = is_mcam_entry_enabled(rvu, mcam, blkaddr, iter->entry);
+		seq_printf(s, "\tenabled: %s\n", enabled ? "yes" : "no");
 
 		if (!iter->has_cntr)
 			continue;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 174ef09f9069..8c8d739755cd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -21,6 +21,8 @@
 static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc);
 static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
 			    int type, int chan_id);
+static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
+			       int type, bool add);
 
 enum mc_tbl_sz {
 	MC_TBL_SZ_256,
@@ -132,6 +134,22 @@ int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr)
 	return 0;
 }
 
+int nix_get_struct_ptrs(struct rvu *rvu, u16 pcifunc,
+			struct nix_hw **nix_hw, int *blkaddr)
+{
+	struct rvu_pfvf *pfvf;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	*blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (!pfvf->nixlf || *blkaddr < 0)
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	*nix_hw = get_nix_hw(rvu->hw, *blkaddr);
+	if (!*nix_hw)
+		return NIX_AF_ERR_INVALID_NIXBLK;
+	return 0;
+}
+
 static void nix_mce_list_init(struct nix_mce_list *list, int max)
 {
 	INIT_HLIST_HEAD(&list->head);
@@ -274,7 +292,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
 		pfvf->tx_chan_cnt = 1;
 		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
 					      pfvf->rx_chan_base,
-					      pfvf->rx_chan_cnt, false);
+					      pfvf->rx_chan_cnt);
 		break;
 	}
 
@@ -285,16 +303,17 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
 				    pfvf->rx_chan_base, pfvf->mac_addr);
 
 	/* Add this PF_FUNC to bcast pkt replication list */
-	err = nix_update_bcast_mce_list(rvu, pcifunc, true);
+	err = nix_update_mce_rule(rvu, pcifunc, NIXLF_BCAST_ENTRY, true);
 	if (err) {
 		dev_err(rvu->dev,
 			"Bcast list, failed to enable PF_FUNC 0x%x\n",
 			pcifunc);
 		return err;
 	}
-
+	/* Install MCAM rule matching Ethernet broadcast mac address */
 	rvu_npc_install_bcast_match_entry(rvu, pcifunc,
 					  nixlf, pfvf->rx_chan_base);
+
 	pfvf->maxlen = NIC_HW_MIN_FRS;
 	pfvf->minlen = NIC_HW_MIN_FRS;
 
@@ -310,7 +329,7 @@ static void nix_interface_deinit(struct rvu *rvu, u16 pcifunc, u8 nixlf)
 	pfvf->minlen = 0;
 
 	/* Remove this PF_FUNC from bcast pkt replication list */
-	err = nix_update_bcast_mce_list(rvu, pcifunc, false);
+	err = nix_update_mce_rule(rvu, pcifunc, NIXLF_BCAST_ENTRY, false);
 	if (err) {
 		dev_err(rvu->dev,
 			"Bcast list, failed to disable PF_FUNC 0x%x\n",
@@ -2203,8 +2222,8 @@ static int nix_blk_setup_mce(struct rvu *rvu, struct nix_hw *nix_hw,
 	aq_req.op = op;
 	aq_req.qidx = mce;
 
-	/* Forward bcast pkts to RQ0, RSS not needed */
-	aq_req.mce.op = 0;
+	/* Use RSS with RSS index 0 */
+	aq_req.mce.op = 1;
 	aq_req.mce.index = 0;
 	aq_req.mce.eol = eol;
 	aq_req.mce.pf_func = pcifunc;
@@ -2222,8 +2241,8 @@ static int nix_blk_setup_mce(struct rvu *rvu, struct nix_hw *nix_hw,
 	return 0;
 }
 
-static int nix_update_mce_list(struct nix_mce_list *mce_list,
-			       u16 pcifunc, bool add)
+static int nix_update_mce_list_entry(struct nix_mce_list *mce_list,
+				     u16 pcifunc, bool add)
 {
 	struct mce *mce, *tail = NULL;
 	bool delete = false;
@@ -2234,6 +2253,9 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list,
 		if (mce->pcifunc == pcifunc && !add) {
 			delete = true;
 			break;
+		} else if (mce->pcifunc == pcifunc && add) {
+			/* entry already exists */
+			return 0;
 		}
 		tail = mce;
 	}
@@ -2261,36 +2283,23 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list,
 	return 0;
 }
 
-int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
+int nix_update_mce_list(struct rvu *rvu, u16 pcifunc,
+			struct nix_mce_list *mce_list,
+			int mce_idx, int mcam_index, bool add)
 {
-	int err = 0, idx, next_idx, last_idx;
-	struct nix_mce_list *mce_list;
+	int err = 0, idx, next_idx, last_idx, blkaddr, npc_blkaddr;
+	struct npc_mcam *mcam = &rvu->hw->mcam;
 	struct nix_mcast *mcast;
 	struct nix_hw *nix_hw;
-	struct rvu_pfvf *pfvf;
 	struct mce *mce;
-	int blkaddr;
 
-	/* Broadcast pkt replication is not needed for AF's VFs, hence skip */
-	if (is_afvf(pcifunc))
-		return 0;
-
-	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
-	if (blkaddr < 0)
-		return 0;
-
-	nix_hw = get_nix_hw(rvu->hw, blkaddr);
-	if (!nix_hw)
-		return 0;
-
-	mcast = &nix_hw->mcast;
+	if (!mce_list)
+		return -EINVAL;
 
 	/* Get this PF/VF func's MCE index */
-	pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
-	idx = pfvf->bcast_mce_idx + (pcifunc & RVU_PFVF_FUNC_MASK);
+	idx = mce_idx + (pcifunc & RVU_PFVF_FUNC_MASK);
 
-	mce_list = &pfvf->bcast_mce_list;
-	if (idx > (pfvf->bcast_mce_idx + mce_list->max)) {
+	if (idx > (mce_idx + mce_list->max)) {
 		dev_err(rvu->dev,
 			"%s: Idx %d > max MCE idx %d, for PF%d bcast list\n",
 			__func__, idx, mce_list->max,
@@ -2298,20 +2307,26 @@ int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
 		return -EINVAL;
 	}
 
+	err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
+	if (err)
+		return err;
+
+	mcast = &nix_hw->mcast;
 	mutex_lock(&mcast->mce_lock);
 
-	err = nix_update_mce_list(mce_list, pcifunc, add);
+	err = nix_update_mce_list_entry(mce_list, pcifunc, add);
 	if (err)
 		goto end;
 
 	/* Disable MCAM entry in NPC */
 	if (!mce_list->count) {
-		rvu_npc_enable_bcast_entry(rvu, pcifunc, false);
+		npc_blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+		npc_enable_mcam_entry(rvu, mcam, npc_blkaddr, mcam_index, false);
 		goto end;
 	}
 
 	/* Dump the updated list to HW */
-	idx = pfvf->bcast_mce_idx;
+	idx = mce_idx;
 	last_idx = idx + mce_list->count - 1;
 	hlist_for_each_entry(mce, &mce_list->head, node) {
 		if (idx > last_idx)
@@ -2332,7 +2347,71 @@ end:
 	return err;
 }
 
-static int nix_setup_bcast_tables(struct rvu *rvu, struct nix_hw *nix_hw)
+void nix_get_mce_list(struct rvu *rvu, u16 pcifunc, int type,
+		      struct nix_mce_list **mce_list, int *mce_idx)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct rvu_pfvf *pfvf;
+
+	if (!hw->cap.nix_rx_multicast ||
+	    !is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc & ~RVU_PFVF_FUNC_MASK))) {
+		*mce_list = NULL;
+		*mce_idx = 0;
+		return;
+	}
+
+	/* Get this PF/VF func's MCE index */
+	pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+
+	if (type == NIXLF_BCAST_ENTRY) {
+		*mce_list = &pfvf->bcast_mce_list;
+		*mce_idx = pfvf->bcast_mce_idx;
+	} else if (type == NIXLF_ALLMULTI_ENTRY) {
+		*mce_list = &pfvf->mcast_mce_list;
+		*mce_idx = pfvf->mcast_mce_idx;
+	} else if (type == NIXLF_PROMISC_ENTRY) {
+		*mce_list = &pfvf->promisc_mce_list;
+		*mce_idx = pfvf->promisc_mce_idx;
+	}  else {
+		*mce_list = NULL;
+		*mce_idx = 0;
+	}
+}
+
+static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
+			       int type, bool add)
+{
+	int err = 0, nixlf, blkaddr, mcam_index, mce_idx;
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct nix_mce_list *mce_list;
+
+	/* skip multicast pkt replication for AF's VFs */
+	if (is_afvf(pcifunc))
+		return 0;
+
+	if (!hw->cap.nix_rx_multicast)
+		return 0;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
+	if (blkaddr < 0)
+		return -EINVAL;
+
+	nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
+	if (nixlf < 0)
+		return -EINVAL;
+
+	nix_get_mce_list(rvu, pcifunc, type, &mce_list, &mce_idx);
+
+	mcam_index = npc_get_nixlf_mcam_index(mcam,
+					      pcifunc & ~RVU_PFVF_FUNC_MASK,
+					      nixlf, type);
+	err = nix_update_mce_list(rvu, pcifunc, mce_list,
+				  mce_idx, mcam_index, add);
+	return err;
+}
+
+static int nix_setup_mce_tables(struct rvu *rvu, struct nix_hw *nix_hw)
 {
 	struct nix_mcast *mcast = &nix_hw->mcast;
 	int err, pf, numvfs, idx;
@@ -2355,11 +2434,18 @@ static int nix_setup_bcast_tables(struct rvu *rvu, struct nix_hw *nix_hw)
 		if (pfvf->nix_blkaddr != nix_hw->blkaddr)
 			continue;
 
-		/* Save the start MCE */
+		/* save start idx of broadcast mce list */
 		pfvf->bcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1);
-
 		nix_mce_list_init(&pfvf->bcast_mce_list, numvfs + 1);
 
+		/* save start idx of multicast mce list */
+		pfvf->mcast_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1);
+		nix_mce_list_init(&pfvf->mcast_mce_list, numvfs + 1);
+
+		/* save the start idx of promisc mce list */
+		pfvf->promisc_mce_idx = nix_alloc_mce_list(mcast, numvfs + 1);
+		nix_mce_list_init(&pfvf->promisc_mce_list, numvfs + 1);
+
 		for (idx = 0; idx < (numvfs + 1); idx++) {
 			/* idx-0 is for PF, followed by VFs */
 			pcifunc = (pf << RVU_PFVF_PF_SHIFT);
@@ -2375,6 +2461,22 @@ static int nix_setup_bcast_tables(struct rvu *rvu, struct nix_hw *nix_hw)
 						pcifunc, 0, true);
 			if (err)
 				return err;
+
+			/* add dummy entries to multicast mce list */
+			err = nix_blk_setup_mce(rvu, nix_hw,
+						pfvf->mcast_mce_idx + idx,
+						NIX_AQ_INSTOP_INIT,
+						pcifunc, 0, true);
+			if (err)
+				return err;
+
+			/* add dummy entries to promisc mce list */
+			err = nix_blk_setup_mce(rvu, nix_hw,
+						pfvf->promisc_mce_idx + idx,
+						NIX_AQ_INSTOP_INIT,
+						pcifunc, 0, true);
+			if (err)
+				return err;
 		}
 	}
 	return 0;
@@ -2421,7 +2523,7 @@ static int nix_setup_mcast(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
 
 	mutex_init(&mcast->mce_lock);
 
-	return nix_setup_bcast_tables(rvu, nix_hw);
+	return nix_setup_mce_tables(rvu, nix_hw);
 }
 
 static int nix_setup_txvlan(struct rvu *rvu, struct nix_hw *nix_hw)
@@ -3067,30 +3169,70 @@ int rvu_mbox_handler_nix_get_mac_addr(struct rvu *rvu,
 int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
 				     struct msg_rsp *rsp)
 {
-	bool allmulti = false, disable_promisc = false;
+	bool allmulti, promisc, nix_rx_multicast;
 	u16 pcifunc = req->hdr.pcifunc;
-	int blkaddr, nixlf, err;
 	struct rvu_pfvf *pfvf;
+	int nixlf, err;
 
-	err = nix_get_nixlf(rvu, pcifunc, &nixlf, &blkaddr);
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	promisc = req->mode & NIX_RX_MODE_PROMISC ? true : false;
+	allmulti = req->mode & NIX_RX_MODE_ALLMULTI ? true : false;
+	pfvf->use_mce_list = req->mode & NIX_RX_MODE_USE_MCE ? true : false;
+
+	nix_rx_multicast = rvu->hw->cap.nix_rx_multicast & pfvf->use_mce_list;
+
+	if (is_vf(pcifunc) && !nix_rx_multicast &&
+	    (promisc || allmulti)) {
+		dev_warn_ratelimited(rvu->dev,
+				     "VF promisc/multicast not supported\n");
+		return 0;
+	}
+
+	err = nix_get_nixlf(rvu, pcifunc, &nixlf, NULL);
 	if (err)
 		return err;
 
-	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	if (nix_rx_multicast) {
+		/* add/del this PF_FUNC to/from mcast pkt replication list */
+		err = nix_update_mce_rule(rvu, pcifunc, NIXLF_ALLMULTI_ENTRY,
+					  allmulti);
+		if (err) {
+			dev_err(rvu->dev,
+				"Failed to update pcifunc 0x%x to multicast list\n",
+				pcifunc);
+			return err;
+		}
 
-	if (req->mode & NIX_RX_MODE_PROMISC)
-		allmulti = false;
-	else if (req->mode & NIX_RX_MODE_ALLMULTI)
-		allmulti = true;
-	else
-		disable_promisc = true;
+		/* add/del this PF_FUNC to/from promisc pkt replication list */
+		err = nix_update_mce_rule(rvu, pcifunc, NIXLF_PROMISC_ENTRY,
+					  promisc);
+		if (err) {
+			dev_err(rvu->dev,
+				"Failed to update pcifunc 0x%x to promisc list\n",
+				pcifunc);
+			return err;
+		}
+	}
 
-	if (disable_promisc)
-		rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
-	else
+	/* install/uninstall allmulti entry */
+	if (allmulti) {
+		rvu_npc_install_allmulti_entry(rvu, pcifunc, nixlf,
+					       pfvf->rx_chan_base);
+	} else {
+		if (!nix_rx_multicast)
+			rvu_npc_enable_allmulti_entry(rvu, pcifunc, nixlf, false);
+	}
+
+	/* install/uninstall promisc entry */
+	if (promisc) {
 		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
 					      pfvf->rx_chan_base,
-					      pfvf->rx_chan_cnt, allmulti);
+					      pfvf->rx_chan_cnt);
+	} else {
+		if (!nix_rx_multicast)
+			rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false);
+	}
+
 	return 0;
 }
 
@@ -3648,6 +3790,7 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req,
 				     struct msg_rsp *rsp)
 {
 	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_pfvf *pfvf;
 	int nixlf, err;
 
 	err = nix_get_nixlf(rvu, pcifunc, &nixlf, NULL);
@@ -3658,6 +3801,9 @@ int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req,
 
 	npc_mcam_enable_flows(rvu, pcifunc);
 
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	set_bit(NIXLF_INITIALIZED, &pfvf->flags);
+
 	return rvu_cgx_start_stop_io(rvu, pcifunc, true);
 }
 
@@ -3665,6 +3811,7 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req,
 				    struct msg_rsp *rsp)
 {
 	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_pfvf *pfvf;
 	int nixlf, err;
 
 	err = nix_get_nixlf(rvu, pcifunc, &nixlf, NULL);
@@ -3673,6 +3820,9 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req,
 
 	rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf);
 
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	clear_bit(NIXLF_INITIALIZED, &pfvf->flags);
+
 	return rvu_cgx_start_stop_io(rvu, pcifunc, false);
 }
 
@@ -3691,6 +3841,8 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
 	nix_rx_sync(rvu, blkaddr);
 	nix_txschq_free(rvu, pcifunc);
 
+	clear_bit(NIXLF_INITIALIZED, &pfvf->flags);
+
 	rvu_cgx_start_stop_io(rvu, pcifunc, false);
 
 	if (pfvf->sq_ctx) {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 053cc872d0cc..5c2bd4337170 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -19,7 +19,7 @@
 #include "cgx.h"
 #include "npc_profile.h"
 
-#define RSVD_MCAM_ENTRIES_PER_PF	2 /* Bcast & Promisc */
+#define RSVD_MCAM_ENTRIES_PER_PF	3 /* Broadcast, Promisc and AllMulticast */
 #define RSVD_MCAM_ENTRIES_PER_NIXLF	1 /* Ucast for LFs */
 
 #define NPC_PARSE_RESULT_DMAC_OFFSET	8
@@ -214,8 +214,10 @@ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
 		 */
 		if (type == NIXLF_BCAST_ENTRY)
 			return index;
-		else if (type == NIXLF_PROMISC_ENTRY)
+		else if (type == NIXLF_ALLMULTI_ENTRY)
 			return index + 1;
+		else if (type == NIXLF_PROMISC_ENTRY)
+			return index + 2;
 	}
 
 	return npc_get_ucast_mcam_index(mcam, pcifunc, nixlf);
@@ -413,37 +415,49 @@ static void npc_fill_entryword(struct mcam_entry *entry, int idx,
 	}
 }
 
-static void npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam,
-					 int blkaddr, int index,
-					 struct mcam_entry *entry)
+static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam,
+					int blkaddr, u16 pf_func)
+{
+	int bank, nixlf, index;
+
+	/* get ucast entry rule entry index */
+	nix_get_nixlf(rvu, pf_func, &nixlf, NULL);
+	index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf,
+					 NIXLF_UCAST_ENTRY);
+	bank = npc_get_bank(mcam, index);
+	index &= (mcam->banksize - 1);
+
+	return rvu_read64(rvu, blkaddr,
+			  NPC_AF_MCAMEX_BANKX_ACTION(index, bank));
+}
+
+static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
+			      int blkaddr, int index, struct mcam_entry *entry,
+			      bool *enable)
 {
 	u16 owner, target_func;
 	struct rvu_pfvf *pfvf;
-	int bank, nixlf;
 	u64 rx_action;
 
 	owner = mcam->entry2pfvf_map[index];
 	target_func = (entry->action >> 4) & 0xffff;
-	/* return incase target is PF or LBK or rule owner is not PF */
+	/* do nothing when target is LBK/PF or owner is not PF */
 	if (is_afvf(target_func) || (owner & RVU_PFVF_FUNC_MASK) ||
 	    !(target_func & RVU_PFVF_FUNC_MASK))
 		return;
 
+	/* save entry2target_pffunc */
 	pfvf = rvu_get_pfvf(rvu, target_func);
 	mcam->entry2target_pffunc[index] = target_func;
-	/* return if nixlf is not attached or initialized */
-	if (!is_nixlf_attached(rvu, target_func) || !pfvf->def_ucast_rule)
-		return;
 
-	/* get VF ucast entry rule */
-	nix_get_nixlf(rvu, target_func, &nixlf, NULL);
-	index = npc_get_nixlf_mcam_index(mcam, target_func,
-					 nixlf, NIXLF_UCAST_ENTRY);
-	bank = npc_get_bank(mcam, index);
-	index &= (mcam->banksize - 1);
+	/* don't enable rule when nixlf not attached or initialized */
+	if (!(is_nixlf_attached(rvu, target_func) &&
+	      test_bit(NIXLF_INITIALIZED, &pfvf->flags)))
+		*enable = false;
 
-	rx_action = rvu_read64(rvu, blkaddr,
-			       NPC_AF_MCAMEX_BANKX_ACTION(index, bank));
+	/* copy VF default entry action to the VF mcam entry */
+	rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
+						 target_func);
 	if (rx_action)
 		entry->action = rx_action;
 }
@@ -495,10 +509,9 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
 			    NPC_AF_MCAMEX_BANKX_CAMX_W1(index, bank, 0), cam0);
 	}
 
-	/* copy VF default entry action to the VF mcam entry */
+	/* PF installing VF rule */
 	if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries)
-		npc_get_default_entry_action(rvu, mcam, blkaddr, actindex,
-					     entry);
+		npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable);
 
 	/* Set 'action' */
 	rvu_write64(rvu, blkaddr,
@@ -649,30 +662,32 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
 }
 
 void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
-				   int nixlf, u64 chan, u8 chan_cnt,
-				   bool allmulti)
+				   int nixlf, u64 chan, u8 chan_cnt)
 {
 	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
 	struct npc_install_flow_req req = { 0 };
 	struct npc_install_flow_rsp rsp = { 0 };
 	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct rvu_hwinfo *hw = rvu->hw;
 	int blkaddr, ucast_idx, index;
-	u8 mac_addr[ETH_ALEN] = { 0 };
 	struct nix_rx_action action;
 	u64 relaxed_mask;
 
-	/* Only PF or AF VF can add a promiscuous entry */
-	if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc))
+	if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc))
 		return;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
 	if (blkaddr < 0)
 		return;
 
-	*(u64 *)&action = 0x00;
 	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
 					 nixlf, NIXLF_PROMISC_ENTRY);
 
+	if (is_cgx_vf(rvu, pcifunc))
+		index = npc_get_nixlf_mcam_index(mcam,
+						 pcifunc & ~RVU_PFVF_FUNC_MASK,
+						 nixlf, NIXLF_PROMISC_ENTRY);
+
 	/* If the corresponding PF's ucast action is RSS,
 	 * use the same action for promisc also
 	 */
@@ -680,19 +695,20 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 					     nixlf, NIXLF_UCAST_ENTRY);
 	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
 		*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
-							blkaddr, ucast_idx);
+						      blkaddr, ucast_idx);
 
 	if (action.op != NIX_RX_ACTIONOP_RSS) {
 		*(u64 *)&action = 0x00;
 		action.op = NIX_RX_ACTIONOP_UCAST;
-		action.pf_func = pcifunc;
 	}
 
-	if (allmulti) {
-		mac_addr[0] = 0x01;	/* LSB bit of 1st byte in DMAC */
-		ether_addr_copy(req.packet.dmac, mac_addr);
-		ether_addr_copy(req.mask.dmac, mac_addr);
-		req.features = BIT_ULL(NPC_DMAC);
+	/* RX_ACTION set to MCAST for CGX PF's */
+	if (hw->cap.nix_rx_multicast && pfvf->use_mce_list &&
+	    is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) {
+		*(u64 *)&action = 0x00;
+		action.op = NIX_RX_ACTIONOP_MCAST;
+		pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+		action.index = pfvf->promisc_mce_idx;
 	}
 
 	req.chan_mask = 0xFFFU;
@@ -720,8 +736,8 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
 }
 
-static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc,
-				     int nixlf, bool enable)
+void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc,
+				  int nixlf, bool enable)
 {
 	struct npc_mcam *mcam = &rvu->hw->mcam;
 	int blkaddr, index;
@@ -730,25 +746,14 @@ static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc,
 	if (blkaddr < 0)
 		return;
 
-	/* Only PF's have a promiscuous entry */
-	if (pcifunc & RVU_PFVF_FUNC_MASK)
-		return;
+	/* Get 'pcifunc' of PF device */
+	pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK;
 
 	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
 					 nixlf, NIXLF_PROMISC_ENTRY);
 	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
 }
 
-void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf)
-{
-	npc_enadis_promisc_entry(rvu, pcifunc, nixlf, false);
-}
-
-void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf)
-{
-	npc_enadis_promisc_entry(rvu, pcifunc, nixlf, true);
-}
-
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 				       int nixlf, u64 chan)
 {
@@ -758,8 +763,6 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 	struct npc_mcam *mcam = &rvu->hw->mcam;
 	struct rvu_hwinfo *hw = rvu->hw;
 	int blkaddr, index;
-	u32 req_index = 0;
-	u8 op;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
 	if (blkaddr < 0)
@@ -772,7 +775,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 	/* If pkt replication is not supported,
 	 * then only PF is allowed to add a bcast match entry.
 	 */
-	if (!hw->cap.nix_rx_multicast && pcifunc & RVU_PFVF_FUNC_MASK)
+	if (!hw->cap.nix_rx_multicast && is_vf(pcifunc))
 		return;
 
 	/* Get 'pcifunc' of PF device */
@@ -786,10 +789,10 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 		 * so install entry with UCAST action, so that PF
 		 * receives all broadcast packets.
 		 */
-		op = NIX_RX_ACTIONOP_UCAST;
+		req.op = NIX_RX_ACTIONOP_UCAST;
 	} else {
-		op = NIX_RX_ACTIONOP_MCAST;
-		req_index = pfvf->bcast_mce_idx;
+		req.op = NIX_RX_ACTIONOP_MCAST;
+		req.index = pfvf->bcast_mce_idx;
 	}
 
 	eth_broadcast_addr((u8 *)&req.packet.dmac);
@@ -798,15 +801,14 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 	req.channel = chan;
 	req.intf = pfvf->nix_rx_intf;
 	req.entry = index;
-	req.op = op;
 	req.hdr.pcifunc = 0; /* AF is requester */
 	req.vf = pcifunc;
-	req.index = req_index;
 
 	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
 }
 
-void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable)
+void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+				bool enable)
 {
 	struct npc_mcam *mcam = &rvu->hw->mcam;
 	int blkaddr, index;
@@ -818,7 +820,104 @@ void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable)
 	/* Get 'pcifunc' of PF device */
 	pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK;
 
-	index = npc_get_nixlf_mcam_index(mcam, pcifunc, 0, NIXLF_BCAST_ENTRY);
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf,
+					 NIXLF_BCAST_ENTRY);
+	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
+}
+
+void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+				    u64 chan)
+{
+	struct npc_install_flow_req req = { 0 };
+	struct npc_install_flow_rsp rsp = { 0 };
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct rvu_hwinfo *hw = rvu->hw;
+	int blkaddr, ucast_idx, index;
+	u8 mac_addr[ETH_ALEN] = { 0 };
+	struct nix_rx_action action;
+	struct rvu_pfvf *pfvf;
+	u16 vf_func;
+
+	/* Only CGX PF/VF can add allmulticast entry */
+	if (is_afvf(pcifunc))
+		return;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	/* Get 'pcifunc' of PF device */
+	vf_func = pcifunc & RVU_PFVF_FUNC_MASK;
+	pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK;
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					 nixlf, NIXLF_ALLMULTI_ENTRY);
+
+	/* If the corresponding PF's ucast action is RSS,
+	 * use the same action for multicast entry also
+	 */
+	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					     nixlf, NIXLF_UCAST_ENTRY);
+	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
+		*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
+							blkaddr, ucast_idx);
+
+	if (action.op != NIX_RX_ACTIONOP_RSS) {
+		*(u64 *)&action = 0x00;
+		action.op = NIX_RX_ACTIONOP_UCAST;
+		action.pf_func = pcifunc;
+	}
+
+	/* RX_ACTION set to MCAST for CGX PF's */
+	if (hw->cap.nix_rx_multicast && pfvf->use_mce_list) {
+		*(u64 *)&action = 0x00;
+		action.op = NIX_RX_ACTIONOP_MCAST;
+		action.index = pfvf->mcast_mce_idx;
+	}
+
+	mac_addr[0] = 0x01;	/* LSB bit of 1st byte in DMAC */
+	ether_addr_copy(req.packet.dmac, mac_addr);
+	ether_addr_copy(req.mask.dmac, mac_addr);
+	req.features = BIT_ULL(NPC_DMAC);
+
+	/* For cn10k the upper two bits of the channel number are
+	 * cpt channel number. with masking out these bits in the
+	 * mcam entry, same entry used for NIX will allow packets
+	 * received from cpt for parsing.
+	 */
+	if (!is_rvu_otx2(rvu))
+		req.chan_mask = NIX_CHAN_CPT_X2P_MASK;
+	else
+		req.chan_mask = 0xFFFU;
+
+	req.channel = chan;
+	req.intf = pfvf->nix_rx_intf;
+	req.entry = index;
+	req.op = action.op;
+	req.hdr.pcifunc = 0; /* AF is requester */
+	req.vf = pcifunc | vf_func;
+	req.index = action.index;
+	req.match_id = action.match_id;
+	req.flow_key_alg = action.flow_key_alg;
+
+	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
+}
+
+void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
+				   bool enable)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	int blkaddr, index;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	/* Get 'pcifunc' of PF device */
+	pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK;
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf,
+					 NIXLF_ALLMULTI_ENTRY);
 	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
 }
 
@@ -860,6 +959,7 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
 				    int group, int alg_idx, int mcam_index)
 {
 	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct rvu_hwinfo *hw = rvu->hw;
 	struct nix_rx_action action;
 	int blkaddr, index, bank;
 	struct rvu_pfvf *pfvf;
@@ -915,7 +1015,8 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
 	/* If PF's promiscuous entry is enabled,
 	 * Set RSS action for that entry as well
 	 */
-	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) {
+	if ((!hw->cap.nix_rx_multicast || !pfvf->use_mce_list) &&
+	    is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) {
 		bank = npc_get_bank(mcam, index);
 		index &= (mcam->banksize - 1);
 
@@ -925,12 +1026,47 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
 	}
 }
 
+void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc,
+				  int nixlf, int type, bool enable)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct nix_mce_list *mce_list;
+	int index, blkaddr, mce_idx;
+	struct rvu_pfvf *pfvf;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return;
+
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK,
+					 nixlf, type);
+
+	/* disable MCAM entry when packet replication is not supported by hw */
+	if (!hw->cap.nix_rx_multicast && !is_vf(pcifunc)) {
+		npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
+		return;
+	}
+
+	/* return incase mce list is not enabled */
+	pfvf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+	if (hw->cap.nix_rx_multicast && is_vf(pcifunc) &&
+	    type != NIXLF_BCAST_ENTRY && !pfvf->use_mce_list)
+		return;
+
+	nix_get_mce_list(rvu, pcifunc, type, &mce_list, &mce_idx);
+
+	nix_update_mce_list(rvu, pcifunc, mce_list,
+			    mce_idx, index, enable);
+	if (enable)
+		npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
+}
+
 static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
 				       int nixlf, bool enable)
 {
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	struct nix_rx_action action;
-	int index, bank, blkaddr;
+	int index, blkaddr;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
 	if (blkaddr < 0)
@@ -941,48 +1077,33 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
 					 nixlf, NIXLF_UCAST_ENTRY);
 	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
 
-	/* For PF, ena/dis promisc and bcast MCAM match entries.
-	 * For VFs add/delete from bcast list when RX multicast
-	 * feature is present.
+	/* Nothing to do for VFs, on platforms where pkt replication
+	 * is not supported
 	 */
-	if (pcifunc & RVU_PFVF_FUNC_MASK && !rvu->hw->cap.nix_rx_multicast)
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) && !rvu->hw->cap.nix_rx_multicast)
 		return;
 
-	/* For bcast, enable/disable only if it's action is not
-	 * packet replication, incase if action is replication
-	 * then this PF/VF's nixlf is removed from bcast replication
-	 * list.
-	 */
-	index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK,
-					 nixlf, NIXLF_BCAST_ENTRY);
-	bank = npc_get_bank(mcam, index);
-	*(u64 *)&action = rvu_read64(rvu, blkaddr,
-	     NPC_AF_MCAMEX_BANKX_ACTION(index & (mcam->banksize - 1), bank));
-
-	/* VFs will not have BCAST entry */
-	if (action.op != NIX_RX_ACTIONOP_MCAST &&
-	    !(pcifunc & RVU_PFVF_FUNC_MASK)) {
-		npc_enable_mcam_entry(rvu, mcam,
-				      blkaddr, index, enable);
-	} else {
-		nix_update_bcast_mce_list(rvu, pcifunc, enable);
-		/* Enable PF's BCAST entry for packet replication */
-		rvu_npc_enable_bcast_entry(rvu, pcifunc, enable);
-	}
-
-	if (enable)
-		rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf);
-	else
-		rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
+	/* add/delete pf_func to broadcast MCE list */
+	npc_enadis_default_mce_entry(rvu, pcifunc, nixlf,
+				     NIXLF_BCAST_ENTRY, enable);
 }
 
 void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 {
 	npc_enadis_default_entries(rvu, pcifunc, nixlf, false);
+
+	/* Delete multicast and promisc MCAM entries */
+	npc_enadis_default_mce_entry(rvu, pcifunc, nixlf,
+				     NIXLF_ALLMULTI_ENTRY, false);
+	npc_enadis_default_mce_entry(rvu, pcifunc, nixlf,
+				     NIXLF_PROMISC_ENTRY, false);
 }
 
 void rvu_npc_enable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 {
+	/* Enables only broadcast match entry. Promisc/Allmulti are enabled
+	 * in set_rx_mode mbox handler.
+	 */
 	npc_enadis_default_entries(rvu, pcifunc, nixlf, true);
 }
 
@@ -1002,7 +1123,8 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 	/* Disable MCAM entries directing traffic to this 'pcifunc' */
 	list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) {
 		if (is_npc_intf_rx(rule->intf) &&
-		    rule->rx_action.pf_func == pcifunc) {
+		    rule->rx_action.pf_func == pcifunc &&
+		    rule->rx_action.op != NIX_RX_ACTIONOP_MCAST) {
 			npc_enable_mcam_entry(rvu, mcam, blkaddr,
 					      rule->entry, false);
 			rule->enable = false;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 7f35b62eea13..bc37858c6a14 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1177,9 +1177,12 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 	}
 
 	err = nix_get_nixlf(rvu, target, &nixlf, NULL);
+	if (err)
+		return -EINVAL;
 
-	/* If interface is uninitialized then do not enable entry */
-	if (err || (!req->default_rule && !pfvf->def_ucast_rule))
+	/* don't enable rule when nixlf not attached or initialized */
+	if (!(is_nixlf_attached(rvu, target) &&
+	      test_bit(NIXLF_INITIALIZED, &pfvf->flags)))
 		enable = false;
 
 	/* Packets reaching NPC in Tx path implies that a
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 03004fdac0c6..dcc6b74471e3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1820,9 +1820,11 @@ static void otx2_do_set_rx_mode(struct work_struct *work)
 
 	if (promisc)
 		req->mode |= NIX_RX_MODE_PROMISC;
-	else if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
+	if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
 		req->mode |= NIX_RX_MODE_ALLMULTI;
 
+	req->mode |= NIX_RX_MODE_USE_MCE;
+
 	otx2_sync_mbox_msg(&pf->mbox);
 	mutex_unlock(&pf->mbox.lock);
 }

From cbc100aa220556d056272f07dc735c9758089da9 Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Fri, 11 Jun 2021 15:12:03 +0530
Subject: [PATCH 1518/3292] octeontx2-nicvf: add ndo_set_rx_mode support for
 multicast & promisc

Add ndo_set_rx_mode callback handler to configure promisc, multicast and
allmulti options for VF driver. Also, modified PF driver ndo_set_rx_mode
handler to support multicast and promisc mode independently.

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/nic/otx2_vf.c  | 58 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 085be90a03eb..13a908f75ba0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -395,6 +395,42 @@ static netdev_tx_t otx2vf_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return NETDEV_TX_OK;
 }
 
+static void otx2vf_set_rx_mode(struct net_device *netdev)
+{
+	struct otx2_nic *vf = netdev_priv(netdev);
+
+	queue_work(vf->otx2_wq, &vf->rx_mode_work);
+}
+
+static void otx2vf_do_set_rx_mode(struct work_struct *work)
+{
+	struct otx2_nic *vf = container_of(work, struct otx2_nic, rx_mode_work);
+	struct net_device *netdev = vf->netdev;
+	unsigned int flags = netdev->flags;
+	struct nix_rx_mode *req;
+
+	mutex_lock(&vf->mbox.lock);
+
+	req = otx2_mbox_alloc_msg_nix_set_rx_mode(&vf->mbox);
+	if (!req) {
+		mutex_unlock(&vf->mbox.lock);
+		return;
+	}
+
+	req->mode = NIX_RX_MODE_UCAST;
+
+	if (flags & IFF_PROMISC)
+		req->mode |= NIX_RX_MODE_PROMISC;
+	if (flags & (IFF_ALLMULTI | IFF_MULTICAST))
+		req->mode |= NIX_RX_MODE_ALLMULTI;
+
+	req->mode |= NIX_RX_MODE_USE_MCE;
+
+	otx2_sync_mbox_msg(&vf->mbox);
+
+	mutex_unlock(&vf->mbox.lock);
+}
+
 static int otx2vf_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	bool if_up = netif_running(netdev);
@@ -432,12 +468,24 @@ static const struct net_device_ops otx2vf_netdev_ops = {
 	.ndo_open = otx2vf_open,
 	.ndo_stop = otx2vf_stop,
 	.ndo_start_xmit = otx2vf_xmit,
+	.ndo_set_rx_mode = otx2vf_set_rx_mode,
 	.ndo_set_mac_address = otx2_set_mac_address,
 	.ndo_change_mtu = otx2vf_change_mtu,
 	.ndo_get_stats64 = otx2_get_stats64,
 	.ndo_tx_timeout = otx2_tx_timeout,
 };
 
+static int otx2_wq_init(struct otx2_nic *vf)
+{
+	vf->otx2_wq = create_singlethread_workqueue("otx2vf_wq");
+	if (!vf->otx2_wq)
+		return -ENOMEM;
+
+	INIT_WORK(&vf->rx_mode_work, otx2vf_do_set_rx_mode);
+	INIT_WORK(&vf->reset_task, otx2vf_reset_task);
+	return 0;
+}
+
 static int otx2vf_realloc_msix_vectors(struct otx2_nic *vf)
 {
 	struct otx2_hw *hw = &vf->hw;
@@ -588,8 +636,6 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	netdev->min_mtu = OTX2_MIN_MTU;
 	netdev->max_mtu = otx2_get_max_mtu(vf);
 
-	INIT_WORK(&vf->reset_task, otx2vf_reset_task);
-
 	/* To distinguish, for LBK VFs set netdev name explicitly */
 	if (is_otx2_lbkvf(vf->pdev)) {
 		int n;
@@ -606,6 +652,10 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_detach_rsrc;
 	}
 
+	err = otx2_wq_init(vf);
+	if (err)
+		goto err_unreg_netdev;
+
 	otx2vf_set_ethtool_ops(netdev);
 
 	/* Enable pause frames by default */
@@ -614,6 +664,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	return 0;
 
+err_unreg_netdev:
+	unregister_netdev(netdev);
 err_detach_rsrc:
 	if (hw->lmt_base)
 		iounmap(hw->lmt_base);
@@ -644,6 +696,8 @@ static void otx2vf_remove(struct pci_dev *pdev)
 
 	cancel_work_sync(&vf->reset_task);
 	unregister_netdev(netdev);
+	if (vf->otx2_wq)
+		destroy_workqueue(vf->otx2_wq);
 	otx2vf_disable_mbox_intr(vf);
 	otx2_detach_resources(&vf->mbox);
 

From bd4302b8fd166a8424af9ac2435aeb4514d811f5 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Fri, 11 Jun 2021 15:12:04 +0530
Subject: [PATCH 1519/3292] octeontx2-af: add new mailbox to configure VF trust
 mode

Add new mailbox to enable PF to configure VF as trusted VF.
Trusted VF feature allows VFs to perform priviliged operations
such as enabling VF promiscuous mode, all-multicast mode and
changing the VF MAC address configured by PF. Refactored the
VF interface flags maintained by the AF driver such that the
flags do not overlap for various configurations.

Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/octeontx2/af/mbox.h  |  9 ++++
 .../net/ethernet/marvell/octeontx2/af/rvu.c   | 42 +++++++++++++++++++
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |  6 ++-
 .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 16 ++++++-
 .../marvell/octeontx2/af/rvu_npc_fs.c         | 16 +++++--
 5 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index ed0bc9d3d5dd..aee6a6f31b0d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -134,6 +134,7 @@ M(MSIX_OFFSET,		0x005, msix_offset, msg_req, msix_offset_rsp)	\
 M(VF_FLR,		0x006, vf_flr, msg_req, msg_rsp)		\
 M(PTP_OP,		0x007, ptp_op, ptp_req, ptp_rsp)		\
 M(GET_HW_CAP,		0x008, get_hw_cap, msg_req, get_hw_cap_rsp)	\
+M(SET_VF_PERM,		0x00b, set_vf_perm, set_vf_perm, msg_rsp)	\
 /* CGX mbox IDs (range 0x200 - 0x3FF) */				\
 M(CGX_START_RXTX,	0x200, cgx_start_rxtx, msg_req, msg_rsp)	\
 M(CGX_STOP_RXTX,	0x201, cgx_stop_rxtx, msg_req, msg_rsp)		\
@@ -1231,6 +1232,14 @@ struct ptp_rsp {
 	u64 clk;
 };
 
+struct set_vf_perm  {
+	struct  mbox_msghdr hdr;
+	u16	vf;
+#define RESET_VF_PERM		BIT_ULL(0)
+#define	VF_TRUSTED		BIT_ULL(1)
+	u64	flags;
+};
+
 /* CPT mailbox error codes
  * Range 901 - 1000.
  */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index bc71a9c462de..f11a02d6b6ef 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -1758,6 +1758,48 @@ int rvu_mbox_handler_get_hw_cap(struct rvu *rvu, struct msg_req *req,
 	return 0;
 }
 
+int rvu_mbox_handler_set_vf_perm(struct rvu *rvu, struct set_vf_perm *req,
+				 struct msg_rsp *rsp)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_pfvf *pfvf;
+	int blkaddr, nixlf;
+	u16 target;
+
+	/* Only PF can add VF permissions */
+	if ((pcifunc & RVU_PFVF_FUNC_MASK) || is_afvf(pcifunc))
+		return -EOPNOTSUPP;
+
+	target = (pcifunc & ~RVU_PFVF_FUNC_MASK) | (req->vf + 1);
+	pfvf = rvu_get_pfvf(rvu, target);
+
+	if (req->flags & RESET_VF_PERM) {
+		pfvf->flags &= RVU_CLEAR_VF_PERM;
+	} else if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) ^
+		 (req->flags & VF_TRUSTED)) {
+		change_bit(PF_SET_VF_TRUSTED, &pfvf->flags);
+		/* disable multicast and promisc entries */
+		if (!test_bit(PF_SET_VF_TRUSTED, &pfvf->flags)) {
+			blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, target);
+			if (blkaddr < 0)
+				return 0;
+			nixlf = rvu_get_lf(rvu, &hw->block[blkaddr],
+					   target, 0);
+			if (nixlf < 0)
+				return 0;
+			npc_enadis_default_mce_entry(rvu, target, nixlf,
+						     NIXLF_ALLMULTI_ENTRY,
+						     false);
+			npc_enadis_default_mce_entry(rvu, target, nixlf,
+						     NIXLF_PROMISC_ENTRY,
+						     false);
+		}
+	}
+
+	return 0;
+}
+
 static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid,
 				struct mbox_msghdr *req)
 {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 29bc9a6792d3..c88dab7747ef 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -223,7 +223,6 @@ struct rvu_pfvf {
 	u16		maxlen;
 	u16		minlen;
 
-	u8		pf_set_vf_cfg;
 	u8		mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */
 	u8		default_mac[ETH_ALEN]; /* MAC address from FWdata */
 
@@ -249,8 +248,13 @@ struct rvu_pfvf {
 
 enum rvu_pfvf_flags {
 	NIXLF_INITIALIZED = 0,
+	PF_SET_VF_MAC,
+	PF_SET_VF_CFG,
+	PF_SET_VF_TRUSTED,
 };
 
+#define RVU_CLEAR_VF_PERM  ~GENMASK(PF_SET_VF_TRUSTED, PF_SET_VF_MAC)
+
 struct nix_txsch {
 	struct rsrc_bmap schq;
 	u8   lvl;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 8c8d739755cd..d8cb665b7d8a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3137,15 +3137,22 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu,
 
 	pfvf = rvu_get_pfvf(rvu, pcifunc);
 
-	/* VF can't overwrite admin(PF) changes */
-	if (from_vf && pfvf->pf_set_vf_cfg)
+	/* untrusted VF can't overwrite admin(PF) changes */
+	if (!test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) &&
+	    (from_vf && test_bit(PF_SET_VF_MAC, &pfvf->flags))) {
+		dev_warn(rvu->dev,
+			 "MAC address set by admin(PF) cannot be overwritten by untrusted VF");
 		return -EPERM;
+	}
 
 	ether_addr_copy(pfvf->mac_addr, req->mac_addr);
 
 	rvu_npc_install_ucast_entry(rvu, pcifunc, nixlf,
 				    pfvf->rx_chan_base, req->mac_addr);
 
+	if (test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) && from_vf)
+		ether_addr_copy(pfvf->default_mac, req->mac_addr);
+
 	return 0;
 }
 
@@ -3188,6 +3195,11 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
 		return 0;
 	}
 
+	/* untrusted VF can't configure promisc/allmulti */
+	if (is_vf(pcifunc) && !test_bit(PF_SET_VF_TRUSTED, &pfvf->flags) &&
+	    (promisc || allmulti))
+		return 0;
+
 	err = nix_get_nixlf(rvu, pcifunc, &nixlf, NULL);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index bc37858c6a14..6ba6a835e2fa 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1103,9 +1103,11 @@ find_rule:
 	if (pf_set_vfs_mac) {
 		ether_addr_copy(pfvf->default_mac, req->packet.dmac);
 		ether_addr_copy(pfvf->mac_addr, req->packet.dmac);
+		set_bit(PF_SET_VF_MAC, &pfvf->flags);
 	}
 
-	if (pfvf->pf_set_vf_cfg && req->vtag0_type == NIX_AF_LFX_RX_VTAG_TYPE7)
+	if (test_bit(PF_SET_VF_CFG, &pfvf->flags) &&
+	    req->vtag0_type == NIX_AF_LFX_RX_VTAG_TYPE7)
 		rule->vfvlan_cfg = true;
 
 	return 0;
@@ -1167,7 +1169,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 
 	/* PF installing for its VF */
 	if (req->hdr.pcifunc && !from_vf && req->vf)
-		pfvf->pf_set_vf_cfg = 1;
+		set_bit(PF_SET_VF_CFG, &pfvf->flags);
 
 	/* update req destination mac addr */
 	if ((req->features & BIT_ULL(NPC_DMAC)) && is_npc_intf_rx(req->intf) &&
@@ -1177,7 +1179,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 	}
 
 	err = nix_get_nixlf(rvu, target, &nixlf, NULL);
-	if (err)
+	if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac)
 		return -EINVAL;
 
 	/* don't enable rule when nixlf not attached or initialized */
@@ -1196,6 +1198,14 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 	if (from_vf && !enable)
 		return -EINVAL;
 
+	/* PF sets VF mac & VF NIXLF is not attached, update the mac addr */
+	if (pf_set_vfs_mac && !enable) {
+		ether_addr_copy(pfvf->default_mac, req->packet.dmac);
+		ether_addr_copy(pfvf->mac_addr, req->packet.dmac);
+		set_bit(PF_SET_VF_MAC, &pfvf->flags);
+		return 0;
+	}
+
 	/* If message is from VF then its flow should not overlap with
 	 * reserved unicast flow.
 	 */

From b1dc20407b5920d9058c2d1b021a44c32acbf8fa Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Fri, 11 Jun 2021 15:12:05 +0530
Subject: [PATCH 1520/3292] octeontx2-pf: add support for ndo_set_vf_trust

Add support for setting a VF as a trusted VF by PF admin. Trusted VF
feature allows VFs to perform priviliged operations such as enabling
VF promiscuous mode, all-multicast mode and changing the VF MAC address
even if it was assigned by PF.

Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <Sunil.Goutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../marvell/octeontx2/nic/otx2_common.h       |   6 +
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c  | 128 ++++++++++++++----
 2 files changed, 109 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 45730d0d92f2..543aee726fbe 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -223,6 +223,11 @@ struct otx2_hw {
 	u64			*nix_lmt_base;
 };
 
+enum vfperm {
+	OTX2_RESET_VF_PERM,
+	OTX2_TRUSTED_VF,
+};
+
 struct otx2_vf_config {
 	struct otx2_nic *pf;
 	struct delayed_work link_event_work;
@@ -230,6 +235,7 @@ struct otx2_vf_config {
 	u8 mac[ETH_ALEN];
 	u16 vlan;
 	int tx_vtag_idx;
+	bool trusted;
 };
 
 struct flr_work {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index dcc6b74471e3..82b53e72268f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -39,6 +39,8 @@ MODULE_DESCRIPTION(DRV_STRING);
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(pci, otx2_pf_id_table);
 
+static void otx2_vf_link_event_task(struct work_struct *work);
+
 enum {
 	TYPE_PFAF,
 	TYPE_PFVF,
@@ -2046,7 +2048,7 @@ static int otx2_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	if (!netif_running(netdev))
 		return -EAGAIN;
 
-	if (vf >= pci_num_vf(pdev))
+	if (vf >= pf->total_vfs)
 		return -EINVAL;
 
 	if (!is_valid_ether_addr(mac))
@@ -2057,7 +2059,8 @@ static int otx2_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 
 	ret = otx2_do_set_vf_mac(pf, vf, mac);
 	if (ret == 0)
-		dev_info(&pdev->dev, "Reload VF driver to apply the changes\n");
+		dev_info(&pdev->dev,
+			 "Load/Reload VF driver\n");
 
 	return ret;
 }
@@ -2243,10 +2246,63 @@ static int otx2_get_vf_config(struct net_device *netdev, int vf,
 	ivi->vf = vf;
 	ether_addr_copy(ivi->mac, config->mac);
 	ivi->vlan = config->vlan;
+	ivi->trusted = config->trusted;
 
 	return 0;
 }
 
+static int otx2_set_vf_permissions(struct otx2_nic *pf, int vf,
+				   int req_perm)
+{
+	struct set_vf_perm *req;
+	int rc;
+
+	mutex_lock(&pf->mbox.lock);
+	req = otx2_mbox_alloc_msg_set_vf_perm(&pf->mbox);
+	if (!req) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Let AF reset VF permissions as sriov is disabled */
+	if (req_perm == OTX2_RESET_VF_PERM) {
+		req->flags |= RESET_VF_PERM;
+	} else if (req_perm == OTX2_TRUSTED_VF) {
+		if (pf->vf_configs[vf].trusted)
+			req->flags |= VF_TRUSTED;
+	}
+
+	req->vf = vf;
+	rc = otx2_sync_mbox_msg(&pf->mbox);
+out:
+	mutex_unlock(&pf->mbox.lock);
+	return rc;
+}
+
+static int otx2_ndo_set_vf_trust(struct net_device *netdev, int vf,
+				 bool enable)
+{
+	struct otx2_nic *pf = netdev_priv(netdev);
+	struct pci_dev *pdev = pf->pdev;
+	int rc;
+
+	if (vf >= pci_num_vf(pdev))
+		return -EINVAL;
+
+	if (pf->vf_configs[vf].trusted == enable)
+		return 0;
+
+	pf->vf_configs[vf].trusted = enable;
+	rc = otx2_set_vf_permissions(pf, vf, OTX2_TRUSTED_VF);
+
+	if (rc)
+		pf->vf_configs[vf].trusted = !enable;
+	else
+		netdev_info(pf->netdev, "VF %d is %strusted\n",
+			    vf, enable ? "" : "not ");
+	return rc;
+}
+
 static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_open		= otx2_open,
 	.ndo_stop		= otx2_stop,
@@ -2263,6 +2319,7 @@ static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_set_vf_vlan	= otx2_set_vf_vlan,
 	.ndo_get_vf_config	= otx2_get_vf_config,
 	.ndo_setup_tc		= otx2_setup_tc,
+	.ndo_set_vf_trust	= otx2_ndo_set_vf_trust,
 };
 
 static int otx2_wq_init(struct otx2_nic *pf)
@@ -2317,6 +2374,40 @@ static int otx2_realloc_msix_vectors(struct otx2_nic *pf)
 	return otx2_register_mbox_intr(pf, false);
 }
 
+static int otx2_sriov_vfcfg_init(struct otx2_nic *pf)
+{
+	int i;
+
+	pf->vf_configs = devm_kcalloc(pf->dev, pf->total_vfs,
+				      sizeof(struct otx2_vf_config),
+				      GFP_KERNEL);
+	if (!pf->vf_configs)
+		return -ENOMEM;
+
+	for (i = 0; i < pf->total_vfs; i++) {
+		pf->vf_configs[i].pf = pf;
+		pf->vf_configs[i].intf_down = true;
+		pf->vf_configs[i].trusted = false;
+		INIT_DELAYED_WORK(&pf->vf_configs[i].link_event_work,
+				  otx2_vf_link_event_task);
+	}
+
+	return 0;
+}
+
+static void otx2_sriov_vfcfg_cleanup(struct otx2_nic *pf)
+{
+	int i;
+
+	if (!pf->vf_configs)
+		return;
+
+	for (i = 0; i < pf->total_vfs; i++) {
+		cancel_delayed_work_sync(&pf->vf_configs[i].link_event_work);
+		otx2_set_vf_permissions(pf, i, OTX2_RESET_VF_PERM);
+	}
+}
+
 static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct device *dev = &pdev->dev;
@@ -2511,6 +2602,11 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		goto err_mcam_flow_del;
 
+	/* Initialize SR-IOV resources */
+	err = otx2_sriov_vfcfg_init(pf);
+	if (err)
+		goto err_pf_sriov_init;
+
 	/* Enable link notifications */
 	otx2_cgx_config_linkevents(pf, true);
 
@@ -2520,6 +2616,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	return 0;
 
+err_pf_sriov_init:
+	otx2_shutdown_tc(pf);
 err_mcam_flow_del:
 	otx2_mcam_flow_del(pf);
 err_unreg_netdev:
@@ -2578,7 +2676,7 @@ static int otx2_sriov_enable(struct pci_dev *pdev, int numvfs)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct otx2_nic *pf = netdev_priv(netdev);
-	int ret, i;
+	int ret;
 
 	/* Init PF <=> VF mailbox stuff */
 	ret = otx2_pfvf_mbox_init(pf, numvfs);
@@ -2589,23 +2687,9 @@ static int otx2_sriov_enable(struct pci_dev *pdev, int numvfs)
 	if (ret)
 		goto free_mbox;
 
-	pf->vf_configs = kcalloc(numvfs, sizeof(struct otx2_vf_config),
-				 GFP_KERNEL);
-	if (!pf->vf_configs) {
-		ret = -ENOMEM;
-		goto free_intr;
-	}
-
-	for (i = 0; i < numvfs; i++) {
-		pf->vf_configs[i].pf = pf;
-		pf->vf_configs[i].intf_down = true;
-		INIT_DELAYED_WORK(&pf->vf_configs[i].link_event_work,
-				  otx2_vf_link_event_task);
-	}
-
 	ret = otx2_pf_flr_init(pf, numvfs);
 	if (ret)
-		goto free_configs;
+		goto free_intr;
 
 	ret = otx2_register_flr_me_intr(pf, numvfs);
 	if (ret)
@@ -2620,8 +2704,6 @@ free_flr_intr:
 	otx2_disable_flr_me_intr(pf);
 free_flr:
 	otx2_flr_wq_destroy(pf);
-free_configs:
-	kfree(pf->vf_configs);
 free_intr:
 	otx2_disable_pfvf_mbox_intr(pf, numvfs);
 free_mbox:
@@ -2634,17 +2716,12 @@ static int otx2_sriov_disable(struct pci_dev *pdev)
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct otx2_nic *pf = netdev_priv(netdev);
 	int numvfs = pci_num_vf(pdev);
-	int i;
 
 	if (!numvfs)
 		return 0;
 
 	pci_disable_sriov(pdev);
 
-	for (i = 0; i < pci_num_vf(pdev); i++)
-		cancel_delayed_work_sync(&pf->vf_configs[i].link_event_work);
-	kfree(pf->vf_configs);
-
 	otx2_disable_flr_me_intr(pf);
 	otx2_flr_wq_destroy(pf);
 	otx2_disable_pfvf_mbox_intr(pf, numvfs);
@@ -2684,6 +2761,7 @@ static void otx2_remove(struct pci_dev *pdev)
 
 	unregister_netdev(netdev);
 	otx2_sriov_disable(pf->pdev);
+	otx2_sriov_vfcfg_cleanup(pf);
 	if (pf->otx2_wq)
 		destroy_workqueue(pf->otx2_wq);
 

From a9e29e5511b9e68b64e9031edb7b7f8920ad3de1 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:09:47 +0300
Subject: [PATCH 1521/3292] af_vsock: update functions for connectible socket

Prepare af_vsock.c for SEQPACKET support: rename some functions such
as setsockopt(), getsockopt(), connect(), recvmsg(), sendmsg() in general
manner, because they are shared with stream sockets.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 64 +++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 92a72f0e0d94..7dd8e70d78cd 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -604,8 +604,8 @@ out:
 
 /**** SOCKET OPERATIONS ****/
 
-static int __vsock_bind_stream(struct vsock_sock *vsk,
-			       struct sockaddr_vm *addr)
+static int __vsock_bind_connectible(struct vsock_sock *vsk,
+				    struct sockaddr_vm *addr)
 {
 	static u32 port;
 	struct sockaddr_vm new_addr;
@@ -685,7 +685,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
 	switch (sk->sk_socket->type) {
 	case SOCK_STREAM:
 		spin_lock_bh(&vsock_table_lock);
-		retval = __vsock_bind_stream(vsk, addr);
+		retval = __vsock_bind_connectible(vsk, addr);
 		spin_unlock_bh(&vsock_table_lock);
 		break;
 
@@ -768,6 +768,11 @@ static struct sock *__vsock_create(struct net *net,
 	return sk;
 }
 
+static bool sock_type_connectible(u16 type)
+{
+	return type == SOCK_STREAM;
+}
+
 static void __vsock_release(struct sock *sk, int level)
 {
 	if (sk) {
@@ -786,7 +791,7 @@ static void __vsock_release(struct sock *sk, int level)
 
 		if (vsk->transport)
 			vsk->transport->release(vsk);
-		else if (sk->sk_type == SOCK_STREAM)
+		else if (sock_type_connectible(sk->sk_type))
 			vsock_remove_sock(vsk);
 
 		sock_orphan(sk);
@@ -948,7 +953,7 @@ static int vsock_shutdown(struct socket *sock, int mode)
 	lock_sock(sk);
 	if (sock->state == SS_UNCONNECTED) {
 		err = -ENOTCONN;
-		if (sk->sk_type == SOCK_STREAM)
+		if (sock_type_connectible(sk->sk_type))
 			goto out;
 	} else {
 		sock->state = SS_DISCONNECTING;
@@ -961,7 +966,7 @@ static int vsock_shutdown(struct socket *sock, int mode)
 		sk->sk_shutdown |= mode;
 		sk->sk_state_change(sk);
 
-		if (sk->sk_type == SOCK_STREAM) {
+		if (sock_type_connectible(sk->sk_type)) {
 			sock_reset_flag(sk, SOCK_DONE);
 			vsock_send_shutdown(sk, mode);
 		}
@@ -1016,7 +1021,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
 		if (!(sk->sk_shutdown & SEND_SHUTDOWN))
 			mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
 
-	} else if (sock->type == SOCK_STREAM) {
+	} else if (sock_type_connectible(sk->sk_type)) {
 		const struct vsock_transport *transport;
 
 		lock_sock(sk);
@@ -1263,8 +1268,8 @@ static void vsock_connect_timeout(struct work_struct *work)
 	sock_put(sk);
 }
 
-static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
-				int addr_len, int flags)
+static int vsock_connect(struct socket *sock, struct sockaddr *addr,
+			 int addr_len, int flags)
 {
 	int err;
 	struct sock *sk;
@@ -1414,7 +1419,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
 
 	lock_sock(listener);
 
-	if (sock->type != SOCK_STREAM) {
+	if (!sock_type_connectible(sock->type)) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -1491,7 +1496,7 @@ static int vsock_listen(struct socket *sock, int backlog)
 
 	lock_sock(sk);
 
-	if (sock->type != SOCK_STREAM) {
+	if (!sock_type_connectible(sk->sk_type)) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -1535,11 +1540,11 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk,
 	vsk->buffer_size = val;
 }
 
-static int vsock_stream_setsockopt(struct socket *sock,
-				   int level,
-				   int optname,
-				   sockptr_t optval,
-				   unsigned int optlen)
+static int vsock_connectible_setsockopt(struct socket *sock,
+					int level,
+					int optname,
+					sockptr_t optval,
+					unsigned int optlen)
 {
 	int err;
 	struct sock *sk;
@@ -1617,10 +1622,10 @@ exit:
 	return err;
 }
 
-static int vsock_stream_getsockopt(struct socket *sock,
-				   int level, int optname,
-				   char __user *optval,
-				   int __user *optlen)
+static int vsock_connectible_getsockopt(struct socket *sock,
+					int level, int optname,
+					char __user *optval,
+					int __user *optlen)
 {
 	int err;
 	int len;
@@ -1688,8 +1693,8 @@ static int vsock_stream_getsockopt(struct socket *sock,
 	return 0;
 }
 
-static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
-				size_t len)
+static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
+				     size_t len)
 {
 	struct sock *sk;
 	struct vsock_sock *vsk;
@@ -1828,10 +1833,9 @@ out:
 	return err;
 }
 
-
 static int
-vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
-		     int flags)
+vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			  int flags)
 {
 	struct sock *sk;
 	struct vsock_sock *vsk;
@@ -2007,7 +2011,7 @@ static const struct proto_ops vsock_stream_ops = {
 	.owner = THIS_MODULE,
 	.release = vsock_release,
 	.bind = vsock_bind,
-	.connect = vsock_stream_connect,
+	.connect = vsock_connect,
 	.socketpair = sock_no_socketpair,
 	.accept = vsock_accept,
 	.getname = vsock_getname,
@@ -2015,10 +2019,10 @@ static const struct proto_ops vsock_stream_ops = {
 	.ioctl = sock_no_ioctl,
 	.listen = vsock_listen,
 	.shutdown = vsock_shutdown,
-	.setsockopt = vsock_stream_setsockopt,
-	.getsockopt = vsock_stream_getsockopt,
-	.sendmsg = vsock_stream_sendmsg,
-	.recvmsg = vsock_stream_recvmsg,
+	.setsockopt = vsock_connectible_setsockopt,
+	.getsockopt = vsock_connectible_getsockopt,
+	.sendmsg = vsock_connectible_sendmsg,
+	.recvmsg = vsock_connectible_recvmsg,
 	.mmap = sock_no_mmap,
 	.sendpage = sock_no_sendpage,
 };

From b3f7fd54881bcba5dc529935f38df649167803b1 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:10:07 +0300
Subject: [PATCH 1522/3292] af_vsock: separate wait data loop

Wait loop for data could be shared between SEQPACKET and STREAM
sockets, so move it to dedicated function. While moving the code
around, let's update an old comment.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 162 +++++++++++++++++++++------------------
 1 file changed, 87 insertions(+), 75 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7dd8e70d78cd..4269e80b02cd 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1833,6 +1833,69 @@ out:
 	return err;
 }
 
+static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
+			   long timeout,
+			   struct vsock_transport_recv_notify_data *recv_data,
+			   size_t target)
+{
+	const struct vsock_transport *transport;
+	struct vsock_sock *vsk;
+	s64 data;
+	int err;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+	transport = vsk->transport;
+
+	while ((data = vsock_stream_has_data(vsk)) == 0) {
+		prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
+
+		if (sk->sk_err != 0 ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+		    (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+			break;
+		}
+
+		/* Don't wait for non-blocking sockets. */
+		if (timeout == 0) {
+			err = -EAGAIN;
+			break;
+		}
+
+		if (recv_data) {
+			err = transport->notify_recv_pre_block(vsk, target, recv_data);
+			if (err < 0)
+				break;
+		}
+
+		release_sock(sk);
+		timeout = schedule_timeout(timeout);
+		lock_sock(sk);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			break;
+		} else if (timeout == 0) {
+			err = -EAGAIN;
+			break;
+		}
+	}
+
+	finish_wait(sk_sleep(sk), wait);
+
+	if (err)
+		return err;
+
+	/* Internal transport error when checking for available
+	 * data. XXX This should be changed to a connection
+	 * reset in a later change.
+	 */
+	if (data < 0)
+		return -ENOMEM;
+
+	return data;
+}
+
 static int
 vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 			  int flags)
@@ -1912,85 +1975,34 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 
 	while (1) {
-		s64 ready;
+		ssize_t read;
 
-		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		ready = vsock_stream_has_data(vsk);
+		err = vsock_wait_data(sk, &wait, timeout, &recv_data, target);
+		if (err <= 0)
+			break;
 
-		if (ready == 0) {
-			if (sk->sk_err != 0 ||
-			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
-			    (vsk->peer_shutdown & SEND_SHUTDOWN)) {
-				finish_wait(sk_sleep(sk), &wait);
-				break;
-			}
-			/* Don't wait for non-blocking sockets. */
-			if (timeout == 0) {
-				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
-				break;
-			}
+		err = transport->notify_recv_pre_dequeue(vsk, target,
+							 &recv_data);
+		if (err < 0)
+			break;
 
-			err = transport->notify_recv_pre_block(
-					vsk, target, &recv_data);
-			if (err < 0) {
-				finish_wait(sk_sleep(sk), &wait);
-				break;
-			}
-			release_sock(sk);
-			timeout = schedule_timeout(timeout);
-			lock_sock(sk);
-
-			if (signal_pending(current)) {
-				err = sock_intr_errno(timeout);
-				finish_wait(sk_sleep(sk), &wait);
-				break;
-			} else if (timeout == 0) {
-				err = -EAGAIN;
-				finish_wait(sk_sleep(sk), &wait);
-				break;
-			}
-		} else {
-			ssize_t read;
-
-			finish_wait(sk_sleep(sk), &wait);
-
-			if (ready < 0) {
-				/* Invalid queue pair content. XXX This should
-				* be changed to a connection reset in a later
-				* change.
-				*/
-
-				err = -ENOMEM;
-				goto out;
-			}
-
-			err = transport->notify_recv_pre_dequeue(
-					vsk, target, &recv_data);
-			if (err < 0)
-				break;
-
-			read = transport->stream_dequeue(
-					vsk, msg,
-					len - copied, flags);
-			if (read < 0) {
-				err = -ENOMEM;
-				break;
-			}
-
-			copied += read;
-
-			err = transport->notify_recv_post_dequeue(
-					vsk, target, read,
-					!(flags & MSG_PEEK), &recv_data);
-			if (err < 0)
-				goto out;
-
-			if (read >= target || flags & MSG_PEEK)
-				break;
-
-			target -= read;
+		read = transport->stream_dequeue(vsk, msg, len - copied, flags);
+		if (read < 0) {
+			err = -ENOMEM;
+			break;
 		}
+
+		copied += read;
+
+		err = transport->notify_recv_post_dequeue(vsk, target, read,
+						!(flags & MSG_PEEK), &recv_data);
+		if (err < 0)
+			goto out;
+
+		if (read >= target || flags & MSG_PEEK)
+			break;
+
+		target -= read;
 	}
 
 	if (sk->sk_err)

From 19c1b90e1979c3974cd6a3ec0cbb886a84278d84 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:10:21 +0300
Subject: [PATCH 1523/3292] af_vsock: separate receive data loop

Some code in receive data loop could be shared between SEQPACKET
and STREAM sockets, while another part is type specific, so move STREAM
specific data receive logic to '__vsock_stream_recvmsg()' dedicated
function, while checks, that will be same for both STREAM and SEQPACKET
sockets, stays in 'vsock_connectible_recvmsg()'.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 120 ++++++++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 51 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 4269e80b02cd..c4f6bfa1e381 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1896,65 +1896,22 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
 	return data;
 }
 
-static int
-vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
-			  int flags)
+static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
+				  size_t len, int flags)
 {
-	struct sock *sk;
-	struct vsock_sock *vsk;
-	const struct vsock_transport *transport;
-	int err;
-	size_t target;
-	ssize_t copied;
-	long timeout;
 	struct vsock_transport_recv_notify_data recv_data;
+	const struct vsock_transport *transport;
+	struct vsock_sock *vsk;
+	ssize_t copied;
+	size_t target;
+	long timeout;
+	int err;
 
 	DEFINE_WAIT(wait);
 
-	sk = sock->sk;
 	vsk = vsock_sk(sk);
-	err = 0;
-
-	lock_sock(sk);
-
 	transport = vsk->transport;
 
-	if (!transport || sk->sk_state != TCP_ESTABLISHED) {
-		/* Recvmsg is supposed to return 0 if a peer performs an
-		 * orderly shutdown. Differentiate between that case and when a
-		 * peer has not connected or a local shutdown occurred with the
-		 * SOCK_DONE flag.
-		 */
-		if (sock_flag(sk, SOCK_DONE))
-			err = 0;
-		else
-			err = -ENOTCONN;
-
-		goto out;
-	}
-
-	if (flags & MSG_OOB) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
-
-	/* We don't check peer_shutdown flag here since peer may actually shut
-	 * down, but there can be data in the queue that a local socket can
-	 * receive.
-	 */
-	if (sk->sk_shutdown & RCV_SHUTDOWN) {
-		err = 0;
-		goto out;
-	}
-
-	/* It is valid on Linux to pass in a zero-length receive buffer.  This
-	 * is not an error.  We may as well bail out now.
-	 */
-	if (!len) {
-		err = 0;
-		goto out;
-	}
-
 	/* We must not copy less than target bytes into the user's buffer
 	 * before returning successfully, so we wait for the consume queue to
 	 * have that much data to consume before dequeueing.  Note that this
@@ -2013,6 +1970,67 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	if (copied > 0)
 		err = copied;
 
+out:
+	return err;
+}
+
+static int
+vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+			  int flags)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	const struct vsock_transport *transport;
+	int err;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	transport = vsk->transport;
+
+	if (!transport || sk->sk_state != TCP_ESTABLISHED) {
+		/* Recvmsg is supposed to return 0 if a peer performs an
+		 * orderly shutdown. Differentiate between that case and when a
+		 * peer has not connected or a local shutdown occurred with the
+		 * SOCK_DONE flag.
+		 */
+		if (sock_flag(sk, SOCK_DONE))
+			err = 0;
+		else
+			err = -ENOTCONN;
+
+		goto out;
+	}
+
+	if (flags & MSG_OOB) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* We don't check peer_shutdown flag here since peer may actually shut
+	 * down, but there can be data in the queue that a local socket can
+	 * receive.
+	 */
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		err = 0;
+		goto out;
+	}
+
+	/* It is valid on Linux to pass in a zero-length receive buffer.  This
+	 * is not an error.  We may as well bail out now.
+	 */
+	if (!len) {
+		err = 0;
+		goto out;
+	}
+
+	err = __vsock_stream_recvmsg(sk, msg, len, flags);
+
 out:
 	release_sock(sk);
 	return err;

From 9942c192b256bc11cc903f89f4057bc97434dee9 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:10:34 +0300
Subject: [PATCH 1524/3292] af_vsock: implement SEQPACKET receive loop

Add receive loop for SEQPACKET. It looks like receive loop for
STREAM, but there are differences:
1) It doesn't call notify callbacks.
2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
   there is no sense for these values in SEQPACKET case.
3) It waits until whole record is received.
4) It processes and sets 'MSG_TRUNC' flag.

So to avoid extra conditions for two types of socket inside one loop, two
independent functions were created.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_vsock.h   |  4 +++
 net/vmw_vsock/af_vsock.c | 55 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index b1c717286993..4d7cf6b2aca2 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -135,6 +135,10 @@ struct vsock_transport {
 	bool (*stream_is_active)(struct vsock_sock *);
 	bool (*stream_allow)(u32 cid, u32 port);
 
+	/* SEQ_PACKET. */
+	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
+				     int flags);
+
 	/* Notification. */
 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c4f6bfa1e381..87ae26b2e3e1 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1974,6 +1974,56 @@ out:
 	return err;
 }
 
+static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
+				     size_t len, int flags)
+{
+	const struct vsock_transport *transport;
+	struct vsock_sock *vsk;
+	ssize_t record_len;
+	long timeout;
+	int err = 0;
+	DEFINE_WAIT(wait);
+
+	vsk = vsock_sk(sk);
+	transport = vsk->transport;
+
+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	err = vsock_wait_data(sk, &wait, timeout, NULL, 0);
+	if (err <= 0)
+		goto out;
+
+	record_len = transport->seqpacket_dequeue(vsk, msg, flags);
+
+	if (record_len < 0) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (sk->sk_err) {
+		err = -sk->sk_err;
+	} else if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		err = 0;
+	} else {
+		/* User sets MSG_TRUNC, so return real length of
+		 * packet.
+		 */
+		if (flags & MSG_TRUNC)
+			err = record_len;
+		else
+			err = len - msg_data_left(msg);
+
+		/* Always set MSG_TRUNC if real length of packet is
+		 * bigger than user's buffer.
+		 */
+		if (record_len > len)
+			msg->msg_flags |= MSG_TRUNC;
+	}
+
+out:
+	return err;
+}
+
 static int
 vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 			  int flags)
@@ -2029,7 +2079,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		goto out;
 	}
 
-	err = __vsock_stream_recvmsg(sk, msg, len, flags);
+	if (sk->sk_type == SOCK_STREAM)
+		err = __vsock_stream_recvmsg(sk, msg, len, flags);
+	else
+		err = __vsock_seqpacket_recvmsg(sk, msg, len, flags);
 
 out:
 	release_sock(sk);

From fbe70c480796d9052fcc786c76e6b029acb1c7bc Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:10:49 +0300
Subject: [PATCH 1525/3292] af_vsock: implement send logic for SEQPACKET

Update current stream enqueue function for SEQPACKET
support:
1) Call transport's seqpacket enqueue callback.
2) Return value from enqueue function is whole record length or error
   for SOCK_SEQPACKET.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_vsock.h   |  2 ++
 net/vmw_vsock/af_vsock.c | 20 +++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 4d7cf6b2aca2..d6745d8b8f3e 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -138,6 +138,8 @@ struct vsock_transport {
 	/* SEQ_PACKET. */
 	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
 				     int flags);
+	int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
+				 size_t len);
 
 	/* Notification. */
 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 87ae26b2e3e1..9e0cc07e3caf 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1808,9 +1808,13 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
 		 * responsibility to check how many bytes we were able to send.
 		 */
 
-		written = transport->stream_enqueue(
-				vsk, msg,
-				len - total_written);
+		if (sk->sk_type == SOCK_SEQPACKET) {
+			written = transport->seqpacket_enqueue(vsk,
+						msg, len - total_written);
+		} else {
+			written = transport->stream_enqueue(vsk,
+					msg, len - total_written);
+		}
 		if (written < 0) {
 			err = -ENOMEM;
 			goto out_err;
@@ -1826,8 +1830,14 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 out_err:
-	if (total_written > 0)
-		err = total_written;
+	if (total_written > 0) {
+		/* Return number of written bytes only if:
+		 * 1) SOCK_STREAM socket.
+		 * 2) SOCK_SEQPACKET socket when whole buffer is sent.
+		 */
+		if (sk->sk_type == SOCK_STREAM || total_written == len)
+			err = total_written;
+	}
 out:
 	release_sock(sk);
 	return err;

From 0798e78b102b79ed9fe4b2beeb18cf0db117c79b Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:11:04 +0300
Subject: [PATCH 1526/3292] af_vsock: rest of SEQPACKET support

Add socket ops for SEQPACKET type and .seqpacket_allow() callback
to query transports if they support SEQPACKET. Also split path
for data check for STREAM and SEQPACKET branches.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_vsock.h   |  2 ++
 net/vmw_vsock/af_vsock.c | 48 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index d6745d8b8f3e..ab207677e0a8 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -140,6 +140,8 @@ struct vsock_transport {
 				     int flags);
 	int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
 				 size_t len);
+	bool (*seqpacket_allow)(u32 remote_cid);
+	u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
 
 	/* Notification. */
 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9e0cc07e3caf..21a56f52d683 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -452,6 +452,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 		new_transport = transport_dgram;
 		break;
 	case SOCK_STREAM:
+	case SOCK_SEQPACKET:
 		if (vsock_use_local_transport(remote_cid))
 			new_transport = transport_local;
 		else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g ||
@@ -484,6 +485,14 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 	if (!new_transport || !try_module_get(new_transport->module))
 		return -ENODEV;
 
+	if (sk->sk_type == SOCK_SEQPACKET) {
+		if (!new_transport->seqpacket_allow ||
+		    !new_transport->seqpacket_allow(remote_cid)) {
+			module_put(new_transport->module);
+			return -ESOCKTNOSUPPORT;
+		}
+	}
+
 	ret = new_transport->init(vsk, psk);
 	if (ret) {
 		module_put(new_transport->module);
@@ -684,6 +693,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
 
 	switch (sk->sk_socket->type) {
 	case SOCK_STREAM:
+	case SOCK_SEQPACKET:
 		spin_lock_bh(&vsock_table_lock);
 		retval = __vsock_bind_connectible(vsk, addr);
 		spin_unlock_bh(&vsock_table_lock);
@@ -770,7 +780,7 @@ static struct sock *__vsock_create(struct net *net,
 
 static bool sock_type_connectible(u16 type)
 {
-	return type == SOCK_STREAM;
+	return (type == SOCK_STREAM) || (type == SOCK_SEQPACKET);
 }
 
 static void __vsock_release(struct sock *sk, int level)
@@ -849,6 +859,16 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_stream_has_data);
 
+static s64 vsock_has_data(struct vsock_sock *vsk)
+{
+	struct sock *sk = sk_vsock(vsk);
+
+	if (sk->sk_type == SOCK_SEQPACKET)
+		return vsk->transport->seqpacket_has_data(vsk);
+	else
+		return vsock_stream_has_data(vsk);
+}
+
 s64 vsock_stream_has_space(struct vsock_sock *vsk)
 {
 	return vsk->transport->stream_has_space(vsk);
@@ -1857,7 +1877,7 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
 	err = 0;
 	transport = vsk->transport;
 
-	while ((data = vsock_stream_has_data(vsk)) == 0) {
+	while ((data = vsock_has_data(vsk)) == 0) {
 		prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
 
 		if (sk->sk_err != 0 ||
@@ -2120,6 +2140,27 @@ static const struct proto_ops vsock_stream_ops = {
 	.sendpage = sock_no_sendpage,
 };
 
+static const struct proto_ops vsock_seqpacket_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = vsock_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = vsock_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = vsock_connectible_setsockopt,
+	.getsockopt = vsock_connectible_getsockopt,
+	.sendmsg = vsock_connectible_sendmsg,
+	.recvmsg = vsock_connectible_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
 static int vsock_create(struct net *net, struct socket *sock,
 			int protocol, int kern)
 {
@@ -2140,6 +2181,9 @@ static int vsock_create(struct net *net, struct socket *sock,
 	case SOCK_STREAM:
 		sock->ops = &vsock_stream_ops;
 		break;
+	case SOCK_SEQPACKET:
+		sock->ops = &vsock_seqpacket_ops;
+		break;
 	default:
 		return -ESOCKTNOSUPPORT;
 	}

From 8cb48554ad822fb8553380b4781ea65f1e3ca7bb Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:11:18 +0300
Subject: [PATCH 1527/3292] af_vsock: update comments for stream sockets

Replace 'stream' to 'connection oriented' in comments as
SEQPACKET is also connection oriented.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 21a56f52d683..67954afef4e1 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -415,8 +415,8 @@ static void vsock_deassign_transport(struct vsock_sock *vsk)
 
 /* Assign a transport to a socket and call the .init transport callback.
  *
- * Note: for stream socket this must be called when vsk->remote_addr is set
- * (e.g. during the connect() or when a connection request on a listener
+ * Note: for connection oriented socket this must be called when vsk->remote_addr
+ * is set (e.g. during the connect() or when a connection request on a listener
  * socket is received).
  * The vsk->remote_addr is used to decide which transport to use:
  *  - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
@@ -470,10 +470,10 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 			return 0;
 
 		/* transport->release() must be called with sock lock acquired.
-		 * This path can only be taken during vsock_stream_connect(),
-		 * where we have already held the sock lock.
-		 * In the other cases, this function is called on a new socket
-		 * which is not assigned to any transport.
+		 * This path can only be taken during vsock_connect(), where we
+		 * have already held the sock lock. In the other cases, this
+		 * function is called on a new socket which is not assigned to
+		 * any transport.
 		 */
 		vsk->transport->release(vsk);
 		vsock_deassign_transport(vsk);
@@ -658,9 +658,10 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
 
 	vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
 
-	/* Remove stream sockets from the unbound list and add them to the hash
-	 * table for easy lookup by its address.  The unbound list is simply an
-	 * extra entry at the end of the hash table, a trick used by AF_UNIX.
+	/* Remove connection oriented sockets from the unbound list and add them
+	 * to the hash table for easy lookup by its address.  The unbound list
+	 * is simply an extra entry at the end of the hash table, a trick used
+	 * by AF_UNIX.
 	 */
 	__vsock_remove_bound(vsk);
 	__vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
@@ -962,10 +963,10 @@ static int vsock_shutdown(struct socket *sock, int mode)
 	if ((mode & ~SHUTDOWN_MASK) || !mode)
 		return -EINVAL;
 
-	/* If this is a STREAM socket and it is not connected then bail out
-	 * immediately.  If it is a DGRAM socket then we must first kick the
-	 * socket so that it wakes up from any sleeping calls, for example
-	 * recv(), and then afterwards return the error.
+	/* If this is a connection oriented socket and it is not connected then
+	 * bail out immediately.  If it is a DGRAM socket then we must first
+	 * kick the socket so that it wakes up from any sleeping calls, for
+	 * example recv(), and then afterwards return the error.
 	 */
 
 	sk = sock->sk;
@@ -1737,7 +1738,9 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
 
 	transport = vsk->transport;
 
-	/* Callers should not provide a destination with stream sockets. */
+	/* Callers should not provide a destination with connection oriented
+	 * sockets.
+	 */
 	if (msg->msg_namelen) {
 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
 		goto out;

From b93f8877c1f2e3d3dcdec7759c5de3d67777f45d Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:11:31 +0300
Subject: [PATCH 1528/3292] virtio/vsock: set packet's type in
 virtio_transport_send_pkt_info()

There is no need to set type of packet which differs from type
of socket, so move passing type of packet from 'info' structure
to  'virtio_transport_send_pkt_info()' function. Since at current
time only stream type is supported, set it directly in 'virtio_
transport_send_pkt_info()', so callers don't need to set it.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 902cb6dd710b..6503a8370130 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -179,6 +179,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 	struct virtio_vsock_pkt *pkt;
 	u32 pkt_len = info->pkt_len;
 
+	info->type = VIRTIO_VSOCK_TYPE_STREAM;
+
 	t_ops = virtio_transport_get_ops(vsk);
 	if (unlikely(!t_ops))
 		return -EFAULT;
@@ -270,12 +272,10 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
 EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
 
 static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
-					       int type,
 					       struct virtio_vsock_hdr *hdr)
 {
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
-		.type = type,
 		.vsk = vsk,
 	};
 
@@ -383,11 +383,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 	 * messages, we set the limit to a high value. TODO: experiment
 	 * with different values.
 	 */
-	if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
-		virtio_transport_send_credit_update(vsk,
-						    VIRTIO_VSOCK_TYPE_STREAM,
-						    NULL);
-	}
+	if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
+		virtio_transport_send_credit_update(vsk, NULL);
 
 	return total;
 
@@ -496,8 +493,7 @@ void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
 
 	vvs->buf_alloc = *val;
 
-	virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
-					    NULL);
+	virtio_transport_send_credit_update(vsk, NULL);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
 
@@ -624,7 +620,6 @@ int virtio_transport_connect(struct vsock_sock *vsk)
 {
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_REQUEST,
-		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.vsk = vsk,
 	};
 
@@ -636,7 +631,6 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
 {
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_SHUTDOWN,
-		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.flags = (mode & RCV_SHUTDOWN ?
 			  VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
 			 (mode & SEND_SHUTDOWN ?
@@ -665,7 +659,6 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
 {
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_RW,
-		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.msg = msg,
 		.pkt_len = len,
 		.vsk = vsk,
@@ -688,7 +681,6 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
 {
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_RST,
-		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.reply = !!pkt,
 		.vsk = vsk,
 	};
@@ -1000,7 +992,6 @@ virtio_transport_send_response(struct vsock_sock *vsk,
 {
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_RESPONSE,
-		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.remote_cid = le64_to_cpu(pkt->hdr.src_cid),
 		.remote_port = le32_to_cpu(pkt->hdr.src_port),
 		.reply = true,

From c10844c5979992fde734f566357059e4a7c815bc Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:12:08 +0300
Subject: [PATCH 1529/3292] virtio/vsock: simplify credit update function API

This function is static and 'hdr' arg was always NULL.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 6503a8370130..ad0d34d41444 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -271,8 +271,7 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
 }
 EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
 
-static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
-					       struct virtio_vsock_hdr *hdr)
+static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
 {
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
@@ -384,7 +383,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 	 * with different values.
 	 */
 	if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
-		virtio_transport_send_credit_update(vsk, NULL);
+		virtio_transport_send_credit_update(vsk);
 
 	return total;
 
@@ -493,7 +492,7 @@ void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
 
 	vvs->buf_alloc = *val;
 
-	virtio_transport_send_credit_update(vsk, NULL);
+	virtio_transport_send_credit_update(vsk);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
 

From f07b2a5b04d4a50d931a0afe4e3e114ce09a2e4b Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:12:22 +0300
Subject: [PATCH 1530/3292] virtio/vsock: defines and constants for SEQPACKET

Add set of defines and constants for SOCK_SEQPACKET support
in vsock.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/virtio_vsock.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 1d57ed3d84d2..3dd3555b2740 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -38,6 +38,9 @@
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
+/* The feature bitmap for virtio vsock */
+#define VIRTIO_VSOCK_F_SEQPACKET	1	/* SOCK_SEQPACKET supported */
+
 struct virtio_vsock_config {
 	__le64 guest_cid;
 } __attribute__((packed));
@@ -65,6 +68,7 @@ struct virtio_vsock_hdr {
 
 enum virtio_vsock_type {
 	VIRTIO_VSOCK_TYPE_STREAM = 1,
+	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
 };
 
 enum virtio_vsock_op {
@@ -91,4 +95,9 @@ enum virtio_vsock_shutdown {
 	VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
 };
 
+/* VIRTIO_VSOCK_OP_RW flags values */
+enum virtio_vsock_rw {
+	VIRTIO_VSOCK_SEQ_EOR = 1,
+};
+
 #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */

From 44931195a5412a97c46d299227fbabad4e09010d Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:12:38 +0300
Subject: [PATCH 1531/3292] virtio/vsock: dequeue callback for SOCK_SEQPACKET

Callback fetches RW packets from rx queue of socket until whole record
is copied(if user's buffer is full, user is not woken up). This is done
to not stall sender, because if we wake up user and it leaves syscall,
nobody will send credit update for rest of record, and sender will wait
for next enter of read syscall at receiver's side. So if user buffer is
full, we just send credit update and drop data.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h            |  5 ++
 net/vmw_vsock/virtio_transport_common.c | 84 +++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index dc636b727179..1d9a302cb91d 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -36,6 +36,7 @@ struct virtio_vsock_sock {
 	u32 rx_bytes;
 	u32 buf_alloc;
 	struct list_head rx_queue;
+	u32 msg_count;
 };
 
 struct virtio_vsock_pkt {
@@ -80,6 +81,10 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
 			       struct msghdr *msg,
 			       size_t len, int flags);
 
+ssize_t
+virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
+				   struct msghdr *msg,
+				   int flags);
 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index ad0d34d41444..1e1df19ec164 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -393,6 +393,78 @@ out:
 	return err;
 }
 
+static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+						 struct msghdr *msg,
+						 int flags)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	struct virtio_vsock_pkt *pkt;
+	int dequeued_len = 0;
+	size_t user_buf_len = msg_data_left(msg);
+	bool copy_failed = false;
+	bool msg_ready = false;
+
+	spin_lock_bh(&vvs->rx_lock);
+
+	if (vvs->msg_count == 0) {
+		spin_unlock_bh(&vvs->rx_lock);
+		return 0;
+	}
+
+	while (!msg_ready) {
+		pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
+
+		if (!copy_failed) {
+			size_t pkt_len;
+			size_t bytes_to_copy;
+
+			pkt_len = (size_t)le32_to_cpu(pkt->hdr.len);
+			bytes_to_copy = min(user_buf_len, pkt_len);
+
+			if (bytes_to_copy) {
+				int err;
+
+				/* sk_lock is held by caller so no one else can dequeue.
+				 * Unlock rx_lock since memcpy_to_msg() may sleep.
+				 */
+				spin_unlock_bh(&vvs->rx_lock);
+
+				err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
+				if (err) {
+					/* Copy of message failed, set flag to skip
+					 * copy path for rest of fragments. Rest of
+					 * fragments will be freed without copy.
+					 */
+					copy_failed = true;
+					dequeued_len = err;
+				} else {
+					user_buf_len -= bytes_to_copy;
+				}
+
+				spin_lock_bh(&vvs->rx_lock);
+			}
+
+			if (dequeued_len >= 0)
+				dequeued_len += pkt_len;
+		}
+
+		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+			msg_ready = true;
+			vvs->msg_count--;
+		}
+
+		virtio_transport_dec_rx_pkt(vvs, pkt);
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+
+	spin_unlock_bh(&vvs->rx_lock);
+
+	virtio_transport_send_credit_update(vsk);
+
+	return dequeued_len;
+}
+
 ssize_t
 virtio_transport_stream_dequeue(struct vsock_sock *vsk,
 				struct msghdr *msg,
@@ -405,6 +477,18 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk,
 }
 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
 
+ssize_t
+virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
+				   struct msghdr *msg,
+				   int flags)
+{
+	if (flags & MSG_PEEK)
+		return -EOPNOTSUPP;
+
+	return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
+
 int
 virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
 			       struct msghdr *msg,

From e4b1ef152f53d5ea0cae89f12f241f7293657718 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:12:53 +0300
Subject: [PATCH 1532/3292] virtio/vsock: add SEQPACKET receive logic

Update current receive logic for SEQPACKET support: performs
check for packet and socket types on receive(if mismatch, then
reset connection). Increment EOR counter on receive. Also if
buffer of new packet was appended to buffer of last packet in
rx queue, update flags of last packet with flags of new packet.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 34 ++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 1e1df19ec164..3a658ff8fccb 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -165,6 +165,14 @@ void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
 }
 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
 
+static u16 virtio_transport_get_type(struct sock *sk)
+{
+	if (sk->sk_type == SOCK_STREAM)
+		return VIRTIO_VSOCK_TYPE_STREAM;
+	else
+		return VIRTIO_VSOCK_TYPE_SEQPACKET;
+}
+
 /* This function can only be used on connecting/connected sockets,
  * since a socket assigned to a transport is required.
  *
@@ -987,6 +995,9 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
 		goto out;
 	}
 
+	if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+		vvs->msg_count++;
+
 	/* Try to copy small packets into the buffer of last packet queued,
 	 * to avoid wasting memory queueing the entire buffer with a small
 	 * payload.
@@ -998,13 +1009,18 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
 					   struct virtio_vsock_pkt, list);
 
 		/* If there is space in the last packet queued, we copy the
-		 * new packet in its buffer.
+		 * new packet in its buffer. We avoid this if the last packet
+		 * queued has VIRTIO_VSOCK_SEQ_EOR set, because this is
+		 * delimiter of SEQPACKET record, so 'pkt' is the first packet
+		 * of a new record.
 		 */
-		if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+		if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
+		    !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) {
 			memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
 			       pkt->len);
 			last_pkt->len += pkt->len;
 			free_pkt = true;
+			last_pkt->hdr.flags |= pkt->hdr.flags;
 			goto out;
 		}
 	}
@@ -1170,6 +1186,12 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
 	return 0;
 }
 
+static bool virtio_transport_valid_type(u16 type)
+{
+	return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
+	       (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
+}
+
 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
  * lock.
  */
@@ -1195,7 +1217,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
 					le32_to_cpu(pkt->hdr.buf_alloc),
 					le32_to_cpu(pkt->hdr.fwd_cnt));
 
-	if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) {
+	if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) {
 		(void)virtio_transport_reset_no_sock(t, pkt);
 		goto free_pkt;
 	}
@@ -1212,6 +1234,12 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
 		}
 	}
 
+	if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) {
+		(void)virtio_transport_reset_no_sock(t, pkt);
+		sock_put(sk);
+		goto free_pkt;
+	}
+
 	vsk = vsock_sk(sk);
 
 	lock_sock(sk);

From 9ac841f5e9f261245d9d2841ad123566bd160a6e Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:13:06 +0300
Subject: [PATCH 1533/3292] virtio/vsock: rest of SOCK_SEQPACKET support

Small updates to make SOCK_SEQPACKET work:
1) Send SHUTDOWN on socket close for SEQPACKET type.
2) Set SEQPACKET packet type during send.
3) Set 'VIRTIO_VSOCK_SEQ_EOR' bit in flags for last
   packet of message.
4) Implement data check function for SEQPACKET.
5) Check for max datagram size.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h            |  5 +++
 net/vmw_vsock/virtio_transport_common.c | 41 +++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 1d9a302cb91d..35d7eedb5e8e 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -81,12 +81,17 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
 			       struct msghdr *msg,
 			       size_t len, int flags);
 
+int
+virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
+				   struct msghdr *msg,
+				   size_t len);
 ssize_t
 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
 				   struct msghdr *msg,
 				   int flags);
 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
+u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);
 
 int virtio_transport_do_socket_init(struct vsock_sock *vsk,
 				 struct vsock_sock *psk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 3a658ff8fccb..23704a6bc437 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -74,6 +74,10 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
 		err = memcpy_from_msg(pkt->buf, info->msg, len);
 		if (err)
 			goto out;
+
+		if (msg_data_left(info->msg) == 0 &&
+		    info->type == VIRTIO_VSOCK_TYPE_SEQPACKET)
+			pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
 	}
 
 	trace_virtio_transport_alloc_pkt(src_cid, src_port,
@@ -187,7 +191,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 	struct virtio_vsock_pkt *pkt;
 	u32 pkt_len = info->pkt_len;
 
-	info->type = VIRTIO_VSOCK_TYPE_STREAM;
+	info->type = virtio_transport_get_type(sk_vsock(vsk));
 
 	t_ops = virtio_transport_get_ops(vsk);
 	if (unlikely(!t_ops))
@@ -497,6 +501,26 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
 }
 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
 
+int
+virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
+				   struct msghdr *msg,
+				   size_t len)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+
+	spin_lock_bh(&vvs->tx_lock);
+
+	if (len > vvs->peer_buf_alloc) {
+		spin_unlock_bh(&vvs->tx_lock);
+		return -EMSGSIZE;
+	}
+
+	spin_unlock_bh(&vvs->tx_lock);
+
+	return virtio_transport_stream_enqueue(vsk, msg, len);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
+
 int
 virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
 			       struct msghdr *msg,
@@ -519,6 +543,19 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
 
+u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	u32 msg_count;
+
+	spin_lock_bh(&vvs->rx_lock);
+	msg_count = vvs->msg_count;
+	spin_unlock_bh(&vvs->rx_lock);
+
+	return msg_count;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
+
 static s64 virtio_transport_has_space(struct vsock_sock *vsk)
 {
 	struct virtio_vsock_sock *vvs = vsk->trans;
@@ -931,7 +968,7 @@ void virtio_transport_release(struct vsock_sock *vsk)
 	struct sock *sk = &vsk->sk;
 	bool remove_sock = true;
 
-	if (sk->sk_type == SOCK_STREAM)
+	if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
 		remove_sock = virtio_transport_close(vsk);
 
 	if (remove_sock) {

From 53efbba12cc7ea2aa47d888532fdc1b3b43afef0 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:13:22 +0300
Subject: [PATCH 1534/3292] virtio/vsock: enable SEQPACKET for transport

To make transport work with SOCK_SEQPACKET add two things:
1) SOCK_SEQPACKET ops for virtio transport and 'seqpacket_allow()'
   callback.
2) Handling of SEQPACKET bit: guest tries to negotiate it with vhost,
   so feature will be enabled only if bit is negotiated with device.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 2700a63ab095..e73ce652bf3c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -62,6 +62,7 @@ struct virtio_vsock {
 	struct virtio_vsock_event event_list[8];
 
 	u32 guest_cid;
+	bool seqpacket_allow;
 };
 
 static u32 virtio_transport_get_local_cid(void)
@@ -443,6 +444,8 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
 	queue_work(virtio_vsock_workqueue, &vsock->rx_work);
 }
 
+static bool virtio_transport_seqpacket_allow(u32 remote_cid);
+
 static struct virtio_transport virtio_transport = {
 	.transport = {
 		.module                   = THIS_MODULE,
@@ -469,6 +472,11 @@ static struct virtio_transport virtio_transport = {
 		.stream_is_active         = virtio_transport_stream_is_active,
 		.stream_allow             = virtio_transport_stream_allow,
 
+		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
+		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
+		.seqpacket_allow          = virtio_transport_seqpacket_allow,
+		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,
+
 		.notify_poll_in           = virtio_transport_notify_poll_in,
 		.notify_poll_out          = virtio_transport_notify_poll_out,
 		.notify_recv_init         = virtio_transport_notify_recv_init,
@@ -485,6 +493,19 @@ static struct virtio_transport virtio_transport = {
 	.send_pkt = virtio_transport_send_pkt,
 };
 
+static bool virtio_transport_seqpacket_allow(u32 remote_cid)
+{
+	struct virtio_vsock *vsock;
+	bool seqpacket_allow;
+
+	rcu_read_lock();
+	vsock = rcu_dereference(the_virtio_vsock);
+	seqpacket_allow = vsock->seqpacket_allow;
+	rcu_read_unlock();
+
+	return seqpacket_allow;
+}
+
 static void virtio_transport_rx_work(struct work_struct *work)
 {
 	struct virtio_vsock *vsock =
@@ -608,10 +629,14 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 	vsock->event_run = true;
 	mutex_unlock(&vsock->event_lock);
 
+	if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+		vsock->seqpacket_allow = true;
+
 	vdev->priv = vsock;
 	rcu_assign_pointer(the_virtio_vsock, vsock);
 
 	mutex_unlock(&the_virtio_vsock_mutex);
+
 	return 0;
 
 out:
@@ -695,6 +720,7 @@ static struct virtio_device_id id_table[] = {
 };
 
 static unsigned int features[] = {
+	VIRTIO_VSOCK_F_SEQPACKET
 };
 
 static struct virtio_driver virtio_vsock_driver = {

From ced7b713711fdd8f99d8d04dc53451441d194c60 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:13:37 +0300
Subject: [PATCH 1535/3292] vhost/vsock: support SEQPACKET for transport

When received packet is copied to guests's rx queue, data buffers
of rx queue could be smaller that data buffer of input packet, so
data of input packet is copied to each rx buffer, thus each rx
buffer will be a packet with dynamically created header. Fields
of such header are initialized from header of input packet(except
length field which value is depends on number of bytes copied to
rx buffer). But in SEQPACKET case, we also need to take care of
record delimeter bit: if input packet has this bit set, we don't
copy it to header of packet in rx buffer, except case when such
rx buffer is last part of input packet. Otherwise, we will get
sequence of packets with delimeter bit set, thus braking record
bounds.
Also remove ignore of non-stream type of packets, handle SEQPACKET
feature bit.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vsock.c | 56 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 5e78fb719602..119f08491d3c 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -31,7 +31,8 @@
 
 enum {
 	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
-			       (1ULL << VIRTIO_F_ACCESS_PLATFORM)
+			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
+			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
 };
 
 enum {
@@ -56,6 +57,7 @@ struct vhost_vsock {
 	atomic_t queued_replies;
 
 	u32 guest_cid;
+	bool seqpacket_allow;
 };
 
 static u32 vhost_transport_get_local_cid(void)
@@ -112,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		size_t nbytes;
 		size_t iov_len, payload_len;
 		int head;
+		bool restore_flag = false;
 
 		spin_lock_bh(&vsock->send_pkt_list_lock);
 		if (list_empty(&vsock->send_pkt_list)) {
@@ -168,9 +171,26 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		/* If the packet is greater than the space available in the
 		 * buffer, we split it using multiple buffers.
 		 */
-		if (payload_len > iov_len - sizeof(pkt->hdr))
+		if (payload_len > iov_len - sizeof(pkt->hdr)) {
 			payload_len = iov_len - sizeof(pkt->hdr);
 
+			/* As we are copying pieces of large packet's buffer to
+			 * small rx buffers, headers of packets in rx queue are
+			 * created dynamically and are initialized with header
+			 * of current packet(except length). But in case of
+			 * SOCK_SEQPACKET, we also must clear record delimeter
+			 * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
+			 * packet with delimeter(which marks end of record),
+			 * there will be sequence of packets with delimeter
+			 * bit set. After initialized header will be copied to
+			 * rx buffer, this bit will be restored.
+			 */
+			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+				pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+				restore_flag = true;
+			}
+		}
+
 		/* Set the correct length in the header */
 		pkt->hdr.len = cpu_to_le32(payload_len);
 
@@ -204,6 +224,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 		 * to send it with the next available buffer.
 		 */
 		if (pkt->off < pkt->len) {
+			if (restore_flag)
+				pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+
 			/* We are queueing the same virtio_vsock_pkt to handle
 			 * the remaining bytes, and we want to deliver it
 			 * to monitoring devices in the next iteration.
@@ -354,8 +377,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		return NULL;
 	}
 
-	if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
-		pkt->len = le32_to_cpu(pkt->hdr.len);
+	pkt->len = le32_to_cpu(pkt->hdr.len);
 
 	/* No payload */
 	if (!pkt->len)
@@ -398,6 +420,8 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
 	return val < vq->num;
 }
 
+static bool vhost_transport_seqpacket_allow(u32 remote_cid);
+
 static struct virtio_transport vhost_transport = {
 	.transport = {
 		.module                   = THIS_MODULE,
@@ -424,6 +448,11 @@ static struct virtio_transport vhost_transport = {
 		.stream_is_active         = virtio_transport_stream_is_active,
 		.stream_allow             = virtio_transport_stream_allow,
 
+		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
+		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
+		.seqpacket_allow          = vhost_transport_seqpacket_allow,
+		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,
+
 		.notify_poll_in           = virtio_transport_notify_poll_in,
 		.notify_poll_out          = virtio_transport_notify_poll_out,
 		.notify_recv_init         = virtio_transport_notify_recv_init,
@@ -441,6 +470,22 @@ static struct virtio_transport vhost_transport = {
 	.send_pkt = vhost_transport_send_pkt,
 };
 
+static bool vhost_transport_seqpacket_allow(u32 remote_cid)
+{
+	struct vhost_vsock *vsock;
+	bool seqpacket_allow = false;
+
+	rcu_read_lock();
+	vsock = vhost_vsock_get(remote_cid);
+
+	if (vsock)
+		seqpacket_allow = vsock->seqpacket_allow;
+
+	rcu_read_unlock();
+
+	return seqpacket_allow;
+}
+
 static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
 {
 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -785,6 +830,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
 			goto err;
 	}
 
+	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
+		vsock->seqpacket_allow = true;
+
 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
 		vq = &vsock->vqs[i];
 		mutex_lock(&vq->mutex);

From 6e90a57795aa6ab2ab65fd6ac76ee0b245e5988a Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:13:50 +0300
Subject: [PATCH 1536/3292] vsock/loopback: enable SEQPACKET for transport

Add SEQPACKET ops for loopback transport and 'seqpacket_allow()'
callback.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/vsock_loopback.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index a45f7ffca8c5..169a8cf65b39 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -63,6 +63,8 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
 	return 0;
 }
 
+static bool vsock_loopback_seqpacket_allow(u32 remote_cid);
+
 static struct virtio_transport loopback_transport = {
 	.transport = {
 		.module                   = THIS_MODULE,
@@ -89,6 +91,11 @@ static struct virtio_transport loopback_transport = {
 		.stream_is_active         = virtio_transport_stream_is_active,
 		.stream_allow             = virtio_transport_stream_allow,
 
+		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
+		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
+		.seqpacket_allow          = vsock_loopback_seqpacket_allow,
+		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,
+
 		.notify_poll_in           = virtio_transport_notify_poll_in,
 		.notify_poll_out          = virtio_transport_notify_poll_out,
 		.notify_recv_init         = virtio_transport_notify_recv_init,
@@ -105,6 +112,11 @@ static struct virtio_transport loopback_transport = {
 	.send_pkt = vsock_loopback_send_pkt,
 };
 
+static bool vsock_loopback_seqpacket_allow(u32 remote_cid)
+{
+	return true;
+}
+
 static void vsock_loopback_work(struct work_struct *work)
 {
 	struct vsock_loopback *vsock =

From 41b792d7a86dd7fc77d5877e814d322e9f7efa75 Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:14:04 +0300
Subject: [PATCH 1537/3292] vsock_test: add SOCK_SEQPACKET tests

Implement two tests of SOCK_SEQPACKET socket: first sends data by
several 'write()'s and checks that number of 'read()' were same.
Second test checks MSG_TRUNC flag. Cases for connect(), bind(),
etc. are not tested, because it is same as for stream socket.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/vsock/util.c       |  32 +++++++--
 tools/testing/vsock/util.h       |   3 +
 tools/testing/vsock/vsock_test.c | 116 +++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 93cbd6f603f9..2acbb7703c6a 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -84,7 +84,7 @@ void vsock_wait_remote_close(int fd)
 }
 
 /* Connect to <cid, port> and return the file descriptor. */
-int vsock_stream_connect(unsigned int cid, unsigned int port)
+static int vsock_connect(unsigned int cid, unsigned int port, int type)
 {
 	union {
 		struct sockaddr sa;
@@ -101,7 +101,7 @@ int vsock_stream_connect(unsigned int cid, unsigned int port)
 
 	control_expectln("LISTENING");
 
-	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+	fd = socket(AF_VSOCK, type, 0);
 
 	timeout_begin(TIMEOUT);
 	do {
@@ -120,11 +120,21 @@ int vsock_stream_connect(unsigned int cid, unsigned int port)
 	return fd;
 }
 
+int vsock_stream_connect(unsigned int cid, unsigned int port)
+{
+	return vsock_connect(cid, port, SOCK_STREAM);
+}
+
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
+{
+	return vsock_connect(cid, port, SOCK_SEQPACKET);
+}
+
 /* Listen on <cid, port> and return the first incoming connection.  The remote
  * address is stored to clientaddrp.  clientaddrp may be NULL.
  */
-int vsock_stream_accept(unsigned int cid, unsigned int port,
-			struct sockaddr_vm *clientaddrp)
+static int vsock_accept(unsigned int cid, unsigned int port,
+			struct sockaddr_vm *clientaddrp, int type)
 {
 	union {
 		struct sockaddr sa;
@@ -145,7 +155,7 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
 	int client_fd;
 	int old_errno;
 
-	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+	fd = socket(AF_VSOCK, type, 0);
 
 	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
 		perror("bind");
@@ -189,6 +199,18 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
 	return client_fd;
 }
 
+int vsock_stream_accept(unsigned int cid, unsigned int port,
+			struct sockaddr_vm *clientaddrp)
+{
+	return vsock_accept(cid, port, clientaddrp, SOCK_STREAM);
+}
+
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+			   struct sockaddr_vm *clientaddrp)
+{
+	return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET);
+}
+
 /* Transmit one byte and check the return value.
  *
  * expected_ret:
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e53dd09d26d9..a3375ad2fb7f 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -36,8 +36,11 @@ struct test_case {
 void init_signals(void);
 unsigned int parse_cid(const char *str);
 int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
 int vsock_stream_accept(unsigned int cid, unsigned int port,
 			struct sockaddr_vm *clientaddrp);
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+			   struct sockaddr_vm *clientaddrp);
 void vsock_wait_remote_close(int fd);
 void send_byte(int fd, int expected_ret, int flags);
 void recv_byte(int fd, int expected_ret, int flags);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 5a4fb80fa832..67766bfe176f 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -14,6 +14,8 @@
 #include <errno.h>
 #include <unistd.h>
 #include <linux/kernel.h>
+#include <sys/types.h>
+#include <sys/socket.h>
 
 #include "timeout.h"
 #include "control.h"
@@ -279,6 +281,110 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
 	close(fd);
 }
 
+#define MESSAGES_CNT 7
+static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Send several messages, one with MSG_EOR flag */
+	for (int i = 0; i < MESSAGES_CNT; i++)
+		send_byte(fd, 1, 0);
+
+	control_writeln("SENDDONE");
+	close(fd);
+}
+
+static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
+{
+	int fd;
+	char buf[16];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+
+	fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("SENDDONE");
+	iov.iov_base = buf;
+	iov.iov_len = sizeof(buf);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	for (int i = 0; i < MESSAGES_CNT; i++) {
+		if (recvmsg(fd, &msg, 0) != 1) {
+			perror("message bound violated");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	close(fd);
+}
+
+#define MESSAGE_TRUNC_SZ 32
+static void test_seqpacket_msg_trunc_client(const struct test_opts *opts)
+{
+	int fd;
+	char buf[MESSAGE_TRUNC_SZ];
+
+	fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	if (send(fd, buf, sizeof(buf), 0) != sizeof(buf)) {
+		perror("send failed");
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("SENDDONE");
+	close(fd);
+}
+
+static void test_seqpacket_msg_trunc_server(const struct test_opts *opts)
+{
+	int fd;
+	char buf[MESSAGE_TRUNC_SZ / 2];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+
+	fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("SENDDONE");
+	iov.iov_base = buf;
+	iov.iov_len = sizeof(buf);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	ssize_t ret = recvmsg(fd, &msg, MSG_TRUNC);
+
+	if (ret != MESSAGE_TRUNC_SZ) {
+		printf("%zi\n", ret);
+		perror("MSG_TRUNC doesn't work");
+		exit(EXIT_FAILURE);
+	}
+
+	if (!(msg.msg_flags & MSG_TRUNC)) {
+		fprintf(stderr, "MSG_TRUNC expected\n");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -309,6 +415,16 @@ static struct test_case test_cases[] = {
 		.run_client = test_stream_msg_peek_client,
 		.run_server = test_stream_msg_peek_server,
 	},
+	{
+		.name = "SOCK_SEQPACKET msg bounds",
+		.run_client = test_seqpacket_msg_bounds_client,
+		.run_server = test_seqpacket_msg_bounds_server,
+	},
+	{
+		.name = "SOCK_SEQPACKET MSG_TRUNC flag",
+		.run_client = test_seqpacket_msg_trunc_client,
+		.run_server = test_seqpacket_msg_trunc_server,
+	},
 	{},
 };
 

From 184039eefeaeab02abf7552504d2950dccf8785b Mon Sep 17 00:00:00 2001
From: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Date: Fri, 11 Jun 2021 14:14:20 +0300
Subject: [PATCH 1538/3292] virtio/vsock: update trace event for SEQPACKET

Add SEQPACKET socket type to vsock trace event.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/vsock_virtio_transport_common.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h
index 6782213778be..d0b3f0ea9ba1 100644
--- a/include/trace/events/vsock_virtio_transport_common.h
+++ b/include/trace/events/vsock_virtio_transport_common.h
@@ -9,9 +9,12 @@
 #include <linux/tracepoint.h>
 
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM);
+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_SEQPACKET);
 
 #define show_type(val) \
-	__print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" })
+	__print_symbolic(val, \
+			 { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }, \
+			 { VIRTIO_VSOCK_TYPE_SEQPACKET, "SEQPACKET" })
 
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID);
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST);

From e4517d8a7f07b18f4f0e68940e2822a1b92b471f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:22 -0500
Subject: [PATCH 1539/3292] net: qualcomm: rmnet: use ip_is_fragment()

In rmnet_map_ipv4_dl_csum_trailer() use ip_is_fragment() to
determine whether a socket buffer contains a packet fragment.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index cecf72be5102..34bd1a98a101 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -50,8 +50,9 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	__be16 addend;
 
 	ip4h = (struct iphdr *)(skb->data);
-	if ((ntohs(ip4h->frag_off) & IP_MF) ||
-	    ((ntohs(ip4h->frag_off) & IP_OFFSET) > 0)) {
+
+	/* We don't support checksum offload on IPv4 fragments */
+	if (ip_is_fragment(ip4h)) {
 		priv->stats.csum_fragmented_pkt++;
 		return -EOPNOTSUPP;
 	}

From 75db5b07f8c393c216fd20f7adc9a167fc684c23 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:23 -0500
Subject: [PATCH 1540/3292] net: qualcomm: rmnet: eliminate some ifdefs

If IPV6 is not enabled in the kernel configuration, the RMNet
checksum code indicates a buffer containing an IPv6 packet is not
supported.  The same thing happens if a buffer contains something
other than an IPv4 or IPv6 packet.

We can rearrange things a bit in two functions so that some #ifdef
calls can simply be eliminated.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 56 ++++++++-----------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 34bd1a98a101..b8e504ac7fb1 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -431,21 +431,15 @@ int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
 		return -EINVAL;
 	}
 
-	if (skb->protocol == htons(ETH_P_IP)) {
+	if (skb->protocol == htons(ETH_P_IP))
 		return rmnet_map_ipv4_dl_csum_trailer(skb, csum_trailer, priv);
-	} else if (skb->protocol == htons(ETH_P_IPV6)) {
-#if IS_ENABLED(CONFIG_IPV6)
-		return rmnet_map_ipv6_dl_csum_trailer(skb, csum_trailer, priv);
-#else
-		priv->stats.csum_err_invalid_ip_version++;
-		return -EPROTONOSUPPORT;
-#endif
-	} else {
-		priv->stats.csum_err_invalid_ip_version++;
-		return -EPROTONOSUPPORT;
-	}
 
-	return 0;
+	if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6))
+		return rmnet_map_ipv6_dl_csum_trailer(skb, csum_trailer, priv);
+
+	priv->stats.csum_err_invalid_ip_version++;
+
+	return -EPROTONOSUPPORT;
 }
 
 static void rmnet_map_v4_checksum_uplink_packet(struct sk_buff *skb,
@@ -462,28 +456,26 @@ static void rmnet_map_v4_checksum_uplink_packet(struct sk_buff *skb,
 		     (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))))
 		goto sw_csum;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		iphdr = (char *)ul_header +
-			sizeof(struct rmnet_map_ul_csum_header);
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		goto sw_csum;
 
-		if (skb->protocol == htons(ETH_P_IP)) {
-			rmnet_map_ipv4_ul_csum_header(iphdr, ul_header, skb);
-			priv->stats.csum_hw++;
-			return;
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-#if IS_ENABLED(CONFIG_IPV6)
-			rmnet_map_ipv6_ul_csum_header(iphdr, ul_header, skb);
-			priv->stats.csum_hw++;
-			return;
-#else
-			priv->stats.csum_err_invalid_ip_version++;
-			goto sw_csum;
-#endif
-		} else {
-			priv->stats.csum_err_invalid_ip_version++;
-		}
+	iphdr = (char *)ul_header +
+		sizeof(struct rmnet_map_ul_csum_header);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		rmnet_map_ipv4_ul_csum_header(iphdr, ul_header, skb);
+		priv->stats.csum_hw++;
+		return;
 	}
 
+	if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) {
+		rmnet_map_ipv6_ul_csum_header(iphdr, ul_header, skb);
+		priv->stats.csum_hw++;
+		return;
+	}
+
+	priv->stats.csum_err_invalid_ip_version++;
+
 sw_csum:
 	memset(ul_header, 0, sizeof(*ul_header));
 

From 1d257f45ef66796526425afc7d0a9d4dbf57fbb9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:24 -0500
Subject: [PATCH 1541/3292] net: qualcomm: rmnet: get rid of some local
 variables

The value passed as an argument to rmnet_map_ipv4_ul_csum_header()
is always an IPv4 header.  Rather than using a local variable, just
have the type of the argument reflect the proper type.

In rmnet_map_ipv6_ul_csum_header() things are defined a little
differently, but make the same basic change there.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index b8e504ac7fb1..ca07b87d7ed7 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -195,15 +195,14 @@ static void rmnet_map_complement_ipv4_txporthdr_csum_field(void *iphdr)
 }
 
 static void
-rmnet_map_ipv4_ul_csum_header(void *iphdr,
+rmnet_map_ipv4_ul_csum_header(struct iphdr *iphdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	struct iphdr *ip4h = iphdr;
 	u16 val;
 
 	val = MAP_CSUM_UL_ENABLED_FLAG;
-	if (ip4h->protocol == IPPROTO_UDP)
+	if (iphdr->protocol == IPPROTO_UDP)
 		val |= MAP_CSUM_UL_UDP_FLAG;
 	val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK;
 
@@ -231,15 +230,14 @@ static void rmnet_map_complement_ipv6_txporthdr_csum_field(void *ip6hdr)
 }
 
 static void
-rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
+rmnet_map_ipv6_ul_csum_header(struct ipv6hdr *ipv6hdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	struct ipv6hdr *ip6h = ip6hdr;
 	u16 val;
 
 	val = MAP_CSUM_UL_ENABLED_FLAG;
-	if (ip6h->nexthdr == IPPROTO_UDP)
+	if (ipv6hdr->nexthdr == IPPROTO_UDP)
 		val |= MAP_CSUM_UL_UDP_FLAG;
 	val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK;
 
@@ -248,7 +246,7 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	rmnet_map_complement_ipv6_txporthdr_csum_field(ip6hdr);
+	rmnet_map_complement_ipv6_txporthdr_csum_field(ipv6hdr);
 }
 #endif
 

From 874a333f7472b2cb57d8528cb26089858ca91005 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:25 -0500
Subject: [PATCH 1542/3292] net: qualcomm: rmnet: simplify
 rmnet_map_get_csum_field()

The checksum fields of the TCP and UDP header structures already
have type __sum16.  We don't support any other protocol headers, so
we can simplify rmnet_map_get_csum_field(), getting rid of the local
variable entirely and just returning the appropriate address.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 20 +++++--------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index ca07b87d7ed7..79f1d516b5cc 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -19,23 +19,13 @@
 static __sum16 *rmnet_map_get_csum_field(unsigned char protocol,
 					 const void *txporthdr)
 {
-	__sum16 *check = NULL;
+	if (protocol == IPPROTO_TCP)
+		return &((struct tcphdr *)txporthdr)->check;
 
-	switch (protocol) {
-	case IPPROTO_TCP:
-		check = &(((struct tcphdr *)txporthdr)->check);
-		break;
+	if (protocol == IPPROTO_UDP)
+		return &((struct udphdr *)txporthdr)->check;
 
-	case IPPROTO_UDP:
-		check = &(((struct udphdr *)txporthdr)->check);
-		break;
-
-	default:
-		check = NULL;
-		break;
-	}
-
-	return check;
+	return NULL;
 }
 
 static int

From 16bf3d33c6b042c894747b96580db392b7e6c0c0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:26 -0500
Subject: [PATCH 1543/3292] net: qualcomm: rmnet: IPv4 header has zero checksum

In rmnet_map_ipv4_dl_csum_trailer(), an illegal checksum subtraction
is done, subtracting hdr_csum (in host byte order) from csum_value (in
network byte order).  Despite being illegal, it generally works,
because it turns out the value subtracted is (or should be) always 0,
which has the same representation in either byte order.

Doing illegal operations is not good form though, so fix this by
verifying the IP header checksum early in that function.  If its
checksum is non-zero, the packet will be bad, so just return an
error.  This will cause the packet to passed to the IP layer where
it can be dropped.

Thereafter, there is no need subtract the IP header checksum from
the checksum value in the trailer because we know it is zero.
Add a comment explaining this.

This type of packet error is different from other types, so add a
new statistics counter to track this condition.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_config.h    |  1 +
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 41 ++++++++++++-------
 .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c   |  1 +
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 8e64ca98068d..3d3cba56c516 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -49,6 +49,7 @@ struct rmnet_pcpu_stats {
 
 struct rmnet_priv_stats {
 	u64 csum_ok;
+	u64 csum_ip4_header_bad;
 	u64 csum_valid_unset;
 	u64 csum_validation_failed;
 	u64 csum_err_bad_buffer;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 79f1d516b5cc..40d7e0c615f9 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -33,13 +33,21 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 			       struct rmnet_map_dl_csum_trailer *csum_trailer,
 			       struct rmnet_priv *priv)
 {
-	__sum16 *csum_field, csum_temp, pseudo_csum, hdr_csum, ip_payload_csum;
-	u16 csum_value, csum_value_final;
-	struct iphdr *ip4h;
-	void *txporthdr;
+	struct iphdr *ip4h = (struct iphdr *)skb->data;
+	void *txporthdr = skb->data + ip4h->ihl * 4;
+	__sum16 *csum_field, csum_temp, pseudo_csum;
+	__sum16 ip_payload_csum;
+	u16 csum_value_final;
 	__be16 addend;
 
-	ip4h = (struct iphdr *)(skb->data);
+	/* Computing the checksum over just the IPv4 header--including its
+	 * checksum field--should yield 0.  If it doesn't, the IP header
+	 * is bad, so return an error and let the IP layer drop it.
+	 */
+	if (ip_fast_csum(ip4h, ip4h->ihl)) {
+		priv->stats.csum_ip4_header_bad++;
+		return -EINVAL;
+	}
 
 	/* We don't support checksum offload on IPv4 fragments */
 	if (ip_is_fragment(ip4h)) {
@@ -47,25 +55,30 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 		return -EOPNOTSUPP;
 	}
 
-	txporthdr = skb->data + ip4h->ihl * 4;
-
+	/* Checksum offload is only supported for UDP and TCP protocols */
 	csum_field = rmnet_map_get_csum_field(ip4h->protocol, txporthdr);
-
 	if (!csum_field) {
 		priv->stats.csum_err_invalid_transport++;
 		return -EPROTONOSUPPORT;
 	}
 
-	/* RFC 768 - Skip IPv4 UDP packets where sender checksum field is 0 */
-	if (*csum_field == 0 && ip4h->protocol == IPPROTO_UDP) {
+	/* RFC 768: UDP checksum is optional for IPv4, and is 0 if unused */
+	if (!*csum_field && ip4h->protocol == IPPROTO_UDP) {
 		priv->stats.csum_skipped++;
 		return 0;
 	}
 
-	csum_value = ~ntohs(csum_trailer->csum_value);
-	hdr_csum = ~ip_fast_csum(ip4h, (int)ip4h->ihl);
-	ip_payload_csum = csum16_sub((__force __sum16)csum_value,
-				     (__force __be16)hdr_csum);
+	/* The checksum value in the trailer is computed over the entire
+	 * IP packet, including the IP header and payload.  To derive the
+	 * transport checksum from this, we first subract the contribution
+	 * of the IP header from the trailer checksum.  We then add the
+	 * checksum computed over the pseudo header.
+	 *
+	 * We verified above that the IP header contributes zero to the
+	 * trailer checksum.  Therefore the checksum in the trailer is
+	 * just the checksum computed over the IP payload.
+	 */
+	ip_payload_csum = (__force __sum16)~ntohs(csum_trailer->csum_value);
 
 	pseudo_csum = ~csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
 					 ntohs(ip4h->tot_len) - ip4h->ihl * 4,
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index fe13017e9a41..6556b5381ce8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -166,6 +166,7 @@ static const struct net_device_ops rmnet_vnd_ops = {
 
 static const char rmnet_gstrings_stats[][ETH_GSTRING_LEN] = {
 	"Checksum ok",
+	"Bad IPv4 header checksum",
 	"Checksum valid bit not set",
 	"Checksum validation failed",
 	"Checksum error bad buffer",

From e5adbbdfa2fb17e3d266011cef816ee492235581 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:27 -0500
Subject: [PATCH 1544/3292] net: qualcomm: rmnet: clarify a bit of code

In rmnet_map_ipv6_dl_csum_trailer() there is an especially involved
line of code that determines the ones' complement sum of the IPv6
packet header (in host byte order).  Simplify that by storing the
result of computing just the header checksum in a local variable,
then using that in the original assignment.

Use the size of the IPv6 header structure as the number of bytes to
checksum, rather than computing the offset to the transport header.
And use ip_fast_csum() rather than ipa_compute_csum(), knowing that
the size of an IPv6 header (40 bytes) is a multiple of 4 bytes
greater than 16.

Add some comments to match rmnet_map_ipv4_dl_csum_trailer().

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 40d7e0c615f9..4f93355e9a93 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -120,27 +120,33 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 			       struct rmnet_map_dl_csum_trailer *csum_trailer,
 			       struct rmnet_priv *priv)
 {
-	__sum16 *csum_field, ip6_payload_csum, pseudo_csum, csum_temp;
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data;
+	void *txporthdr = skb->data + sizeof(*ip6h);
+	__sum16 *csum_field, pseudo_csum, csum_temp;
 	u16 csum_value, csum_value_final;
 	__be16 ip6_hdr_csum, addend;
-	struct ipv6hdr *ip6h;
-	void *txporthdr;
+	__sum16 ip6_payload_csum;
+	__be16 ip_header_csum;
 	u32 length;
 
-	ip6h = (struct ipv6hdr *)(skb->data);
-
-	txporthdr = skb->data + sizeof(struct ipv6hdr);
+	/* Checksum offload is only supported for UDP and TCP protocols;
+	 * the packet cannot include any IPv6 extension headers
+	 */
 	csum_field = rmnet_map_get_csum_field(ip6h->nexthdr, txporthdr);
-
 	if (!csum_field) {
 		priv->stats.csum_err_invalid_transport++;
 		return -EPROTONOSUPPORT;
 	}
 
+	/* The checksum value in the trailer is computed over the entire
+	 * IP packet, including the IP header and payload.  To derive the
+	 * transport checksum from this, we first subract the contribution
+	 * of the IP header from the trailer checksum.  We then add the
+	 * checksum computed over the pseudo header.
+	 */
 	csum_value = ~ntohs(csum_trailer->csum_value);
-	ip6_hdr_csum = (__force __be16)
-			~ntohs((__force __be16)ip_compute_csum(ip6h,
-			       (int)(txporthdr - (void *)(skb->data))));
+	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
+	ip6_hdr_csum = (__force __be16)~ntohs(ip_header_csum);
 	ip6_payload_csum = csum16_sub((__force __sum16)csum_value,
 				      ip6_hdr_csum);
 

From a2918a169f57f965e4e5949822c2602c90e388ab Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:28 -0500
Subject: [PATCH 1545/3292] net: qualcomm: rmnet: avoid unnecessary
 byte-swapping

Internet checksums are used for IPv4 header checksum, as well as TCP
segment and UDP datagram checksums.  Such a checksum represents the
negated sum of adjacent pairs of bytes, using ones' complement
arithmetic.

One property of the Internet checkum is byte order independence [1].
Specifically, the sum of byte-swapped pairs is equal to the result
of byte swapping the sum of those same pairs when not byte-swapped.

So for example if a, b, c, d, y, and z are hexadecimal digits, and
PLUS represents ones' complement addition:
    If:		ab PLUS cd = yz
    Then:	ba PLUS dc = zy

For this reason, there is no need to swap the order of bytes in the
checksum value held in a message header, nor the one in the QMAPv4
trailer, in order to operate on them.

In other words, we can determine whether the hardware-computed
checksum matches the one in the message header without any byte
swaps.

(This patch leaves in place all existing type casts.)

[1] https://tools.ietf.org/html/rfc1071

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 4f93355e9a93..39f198d7595b 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -78,15 +78,15 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	 * trailer checksum.  Therefore the checksum in the trailer is
 	 * just the checksum computed over the IP payload.
 	 */
-	ip_payload_csum = (__force __sum16)~ntohs(csum_trailer->csum_value);
+	ip_payload_csum = (__force __sum16)~csum_trailer->csum_value;
 
 	pseudo_csum = ~csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
 					 ntohs(ip4h->tot_len) - ip4h->ihl * 4,
 					 ip4h->protocol, 0);
-	addend = (__force __be16)ntohs((__force __be16)pseudo_csum);
+	addend = (__force __be16)pseudo_csum;
 	pseudo_csum = csum16_add(ip_payload_csum, addend);
 
-	addend = (__force __be16)ntohs((__force __be16)*csum_field);
+	addend = (__force __be16)*csum_field;
 	csum_temp = ~csum16_sub(pseudo_csum, addend);
 	csum_value_final = (__force u16)csum_temp;
 
@@ -105,7 +105,7 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 		}
 	}
 
-	if (csum_value_final == ntohs((__force __be16)*csum_field)) {
+	if (csum_value_final == (__force u16)*csum_field) {
 		priv->stats.csum_ok++;
 		return 0;
 	} else {

From 23a5708d4e78a97a8ee0b3bcbf93c81b43504b84 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 14:05:29 -0500
Subject: [PATCH 1546/3292] net: qualcomm: rmnet: avoid unnecessary IPv6
 byte-swapping

In the previous patch IPv4 download checksum offload code was
updated to avoid unnecessary byte swapping, based on properties of
the Internet checksum algorithm.  This patch makes comparable
changes to the IPv6 download checksum offload handling.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c    | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 39f198d7595b..d4d23ab446ef 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -123,10 +123,11 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data;
 	void *txporthdr = skb->data + sizeof(*ip6h);
 	__sum16 *csum_field, pseudo_csum, csum_temp;
-	u16 csum_value, csum_value_final;
 	__be16 ip6_hdr_csum, addend;
 	__sum16 ip6_payload_csum;
 	__be16 ip_header_csum;
+	u16 csum_value_final;
+	__be16 csum_value;
 	u32 length;
 
 	/* Checksum offload is only supported for UDP and TCP protocols;
@@ -144,21 +145,21 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	 * of the IP header from the trailer checksum.  We then add the
 	 * checksum computed over the pseudo header.
 	 */
-	csum_value = ~ntohs(csum_trailer->csum_value);
+	csum_value = ~csum_trailer->csum_value;
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
-	ip6_hdr_csum = (__force __be16)~ntohs(ip_header_csum);
+	ip6_hdr_csum = (__force __be16)~ip_header_csum;
 	ip6_payload_csum = csum16_sub((__force __sum16)csum_value,
 				      ip6_hdr_csum);
 
 	length = (ip6h->nexthdr == IPPROTO_UDP) ?
 		 ntohs(((struct udphdr *)txporthdr)->len) :
 		 ntohs(ip6h->payload_len);
-	pseudo_csum = ~(csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-			     length, ip6h->nexthdr, 0));
-	addend = (__force __be16)ntohs((__force __be16)pseudo_csum);
+	pseudo_csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				       length, ip6h->nexthdr, 0);
+	addend = (__force __be16)pseudo_csum;
 	pseudo_csum = csum16_add(ip6_payload_csum, addend);
 
-	addend = (__force __be16)ntohs((__force __be16)*csum_field);
+	addend = (__force __be16)*csum_field;
 	csum_temp = ~csum16_sub(pseudo_csum, addend);
 	csum_value_final = (__force u16)csum_temp;
 
@@ -179,7 +180,7 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 		}
 	}
 
-	if (csum_value_final == ntohs((__force __be16)*csum_field)) {
+	if (csum_value_final == (__force u16)*csum_field) {
 		priv->stats.csum_ok++;
 		return 0;
 	} else {

From 5673ef86380414be1702ba2f1ef92526a14dd1e0 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:19 +0300
Subject: [PATCH 1547/3292] net: pcs: xpcs: rename mdio_xpcs_args to dw_xpcs

The struct mdio_xpcs_args is reminiscent of when a similarly named
struct mdio_xpcs_ops existed. Now that that is removed, we can shorten
the name to dw_xpcs (dw for DesignWare).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h  |  2 +-
 .../net/ethernet/stmicro/stmmac/stmmac_mdio.c |  2 +-
 drivers/net/pcs/pcs-xpcs.c                    | 73 +++++++++----------
 include/linux/pcs/pcs-xpcs.h                  | 14 ++--
 4 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 8a83f9e1e95b..5fecc83f175b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -503,7 +503,7 @@ struct mac_device_info {
 	const struct stmmac_hwtimestamp *ptp;
 	const struct stmmac_tc_ops *tc;
 	const struct stmmac_mmc_ops *mmc;
-	struct mdio_xpcs_args *xpcs;
+	struct dw_xpcs *xpcs;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index bc900e240da2..3b3033b20b1d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -401,7 +401,7 @@ int stmmac_xpcs_setup(struct mii_bus *bus)
 {
 	int mode, addr;
 	struct net_device *ndev = bus->priv;
-	struct mdio_xpcs_args *xpcs;
+	struct dw_xpcs *xpcs;
 	struct stmmac_priv *priv;
 	struct mdio_device *mdiodev;
 
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 98c4a3973402..a2cbb2d926b7 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -109,7 +109,7 @@
 #define DW_VR_MII_EEE_TRN_LPI		BIT(0)	/* Transparent Mode Enable */
 
 #define phylink_pcs_to_xpcs(pl_pcs) \
-	container_of((pl_pcs), struct mdio_xpcs_args, pcs)
+	container_of((pl_pcs), struct dw_xpcs, pcs)
 
 static const int xpcs_usxgmii_features[] = {
 	ETHTOOL_LINK_MODE_Pause_BIT,
@@ -236,7 +236,7 @@ static const struct xpcs_compat *xpcs_find_compat(const struct xpcs_id *id,
 	return NULL;
 }
 
-int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
+int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface)
 {
 	const struct xpcs_compat *compat;
 
@@ -263,7 +263,7 @@ static bool __xpcs_linkmode_supported(const struct xpcs_compat *compat,
 #define xpcs_linkmode_supported(compat, mode) \
 	__xpcs_linkmode_supported(compat, ETHTOOL_LINK_MODE_ ## mode ## _BIT)
 
-static int xpcs_read(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
+static int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg)
 {
 	u32 reg_addr = mdiobus_c45_addr(dev, reg);
 	struct mii_bus *bus = xpcs->mdiodev->bus;
@@ -272,7 +272,7 @@ static int xpcs_read(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
 	return mdiobus_read(bus, addr, reg_addr);
 }
 
-static int xpcs_write(struct mdio_xpcs_args *xpcs, int dev, u32 reg, u16 val)
+static int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val)
 {
 	u32 reg_addr = mdiobus_c45_addr(dev, reg);
 	struct mii_bus *bus = xpcs->mdiodev->bus;
@@ -281,28 +281,28 @@ static int xpcs_write(struct mdio_xpcs_args *xpcs, int dev, u32 reg, u16 val)
 	return mdiobus_write(bus, addr, reg_addr, val);
 }
 
-static int xpcs_read_vendor(struct mdio_xpcs_args *xpcs, int dev, u32 reg)
+static int xpcs_read_vendor(struct dw_xpcs *xpcs, int dev, u32 reg)
 {
 	return xpcs_read(xpcs, dev, DW_VENDOR | reg);
 }
 
-static int xpcs_write_vendor(struct mdio_xpcs_args *xpcs, int dev, int reg,
+static int xpcs_write_vendor(struct dw_xpcs *xpcs, int dev, int reg,
 			     u16 val)
 {
 	return xpcs_write(xpcs, dev, DW_VENDOR | reg, val);
 }
 
-static int xpcs_read_vpcs(struct mdio_xpcs_args *xpcs, int reg)
+static int xpcs_read_vpcs(struct dw_xpcs *xpcs, int reg)
 {
 	return xpcs_read_vendor(xpcs, MDIO_MMD_PCS, reg);
 }
 
-static int xpcs_write_vpcs(struct mdio_xpcs_args *xpcs, int reg, u16 val)
+static int xpcs_write_vpcs(struct dw_xpcs *xpcs, int reg, u16 val)
 {
 	return xpcs_write_vendor(xpcs, MDIO_MMD_PCS, reg, val);
 }
 
-static int xpcs_poll_reset(struct mdio_xpcs_args *xpcs, int dev)
+static int xpcs_poll_reset(struct dw_xpcs *xpcs, int dev)
 {
 	/* Poll until the reset bit clears (50ms per retry == 0.6 sec) */
 	unsigned int retries = 12;
@@ -318,7 +318,7 @@ static int xpcs_poll_reset(struct mdio_xpcs_args *xpcs, int dev)
 	return (ret & MDIO_CTRL1_RESET) ? -ETIMEDOUT : 0;
 }
 
-static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs,
+static int xpcs_soft_reset(struct dw_xpcs *xpcs,
 			   const struct xpcs_compat *compat)
 {
 	int ret, dev;
@@ -348,7 +348,7 @@ static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs,
 		dev_warn(&(__xpcs)->mdiodev->dev, ##__args); \
 })
 
-static int xpcs_read_fault_c73(struct mdio_xpcs_args *xpcs,
+static int xpcs_read_fault_c73(struct dw_xpcs *xpcs,
 			       struct phylink_link_state *state)
 {
 	int ret;
@@ -399,7 +399,7 @@ static int xpcs_read_fault_c73(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_read_link_c73(struct mdio_xpcs_args *xpcs, bool an)
+static int xpcs_read_link_c73(struct dw_xpcs *xpcs, bool an)
 {
 	bool link = true;
 	int ret;
@@ -439,7 +439,7 @@ static int xpcs_get_max_usxgmii_speed(const unsigned long *supported)
 	return max;
 }
 
-static void xpcs_config_usxgmii(struct mdio_xpcs_args *xpcs, int speed)
+static void xpcs_config_usxgmii(struct dw_xpcs *xpcs, int speed)
 {
 	int ret, speed_sel;
 
@@ -500,7 +500,7 @@ out:
 	pr_err("%s: XPCS access returned %pe\n", __func__, ERR_PTR(ret));
 }
 
-static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs,
+static int _xpcs_config_aneg_c73(struct dw_xpcs *xpcs,
 				 const struct xpcs_compat *compat)
 {
 	int ret, adv;
@@ -545,7 +545,7 @@ static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs,
 	return xpcs_write(xpcs, MDIO_MMD_AN, DW_SR_AN_ADV1, adv);
 }
 
-static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs,
+static int xpcs_config_aneg_c73(struct dw_xpcs *xpcs,
 				const struct xpcs_compat *compat)
 {
 	int ret;
@@ -563,7 +563,7 @@ static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs,
 	return xpcs_write(xpcs, MDIO_MMD_AN, MDIO_CTRL1, ret);
 }
 
-static int xpcs_aneg_done_c73(struct mdio_xpcs_args *xpcs,
+static int xpcs_aneg_done_c73(struct dw_xpcs *xpcs,
 			      struct phylink_link_state *state,
 			      const struct xpcs_compat *compat)
 {
@@ -590,7 +590,7 @@ static int xpcs_aneg_done_c73(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_read_lpa_c73(struct mdio_xpcs_args *xpcs,
+static int xpcs_read_lpa_c73(struct dw_xpcs *xpcs,
 			     struct phylink_link_state *state)
 {
 	int ret;
@@ -639,7 +639,7 @@ static int xpcs_read_lpa_c73(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static void xpcs_resolve_lpa_c73(struct mdio_xpcs_args *xpcs,
+static void xpcs_resolve_lpa_c73(struct dw_xpcs *xpcs,
 				 struct phylink_link_state *state)
 {
 	int max_speed = xpcs_get_max_usxgmii_speed(state->lp_advertising);
@@ -649,7 +649,7 @@ static void xpcs_resolve_lpa_c73(struct mdio_xpcs_args *xpcs,
 	state->duplex = DUPLEX_FULL;
 }
 
-static int xpcs_get_max_xlgmii_speed(struct mdio_xpcs_args *xpcs,
+static int xpcs_get_max_xlgmii_speed(struct dw_xpcs *xpcs,
 				     struct phylink_link_state *state)
 {
 	unsigned long *adv = state->advertising;
@@ -703,7 +703,7 @@ static int xpcs_get_max_xlgmii_speed(struct mdio_xpcs_args *xpcs,
 	return speed;
 }
 
-static void xpcs_resolve_pma(struct mdio_xpcs_args *xpcs,
+static void xpcs_resolve_pma(struct dw_xpcs *xpcs,
 			     struct phylink_link_state *state)
 {
 	state->pause = MLO_PAUSE_TX | MLO_PAUSE_RX;
@@ -722,7 +722,7 @@ static void xpcs_resolve_pma(struct mdio_xpcs_args *xpcs,
 	}
 }
 
-void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
+void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
 		   struct phylink_link_state *state)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(xpcs_supported);
@@ -752,8 +752,7 @@ void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
 }
 EXPORT_SYMBOL_GPL(xpcs_validate);
 
-int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
-		    int enable)
+int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
 {
 	int ret;
 
@@ -786,7 +785,7 @@ int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
 }
 EXPORT_SYMBOL_GPL(xpcs_config_eee);
 
-static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
+static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs)
 {
 	int ret;
 
@@ -827,7 +826,7 @@ static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
 }
 
-static int xpcs_config_2500basex(struct mdio_xpcs_args *xpcs)
+static int xpcs_config_2500basex(struct dw_xpcs *xpcs)
 {
 	int ret;
 
@@ -849,8 +848,8 @@ static int xpcs_config_2500basex(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, ret);
 }
 
-static int xpcs_do_config(struct mdio_xpcs_args *xpcs,
-			  phy_interface_t interface, unsigned int mode)
+static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
+			  unsigned int mode)
 {
 	const struct xpcs_compat *compat;
 	int ret;
@@ -889,12 +888,12 @@ static int xpcs_config(struct phylink_pcs *pcs, unsigned int mode,
 		       const unsigned long *advertising,
 		       bool permit_pause_to_mac)
 {
-	struct mdio_xpcs_args *xpcs = phylink_pcs_to_xpcs(pcs);
+	struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
 
 	return xpcs_do_config(xpcs, interface, mode);
 }
 
-static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
+static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
 			      struct phylink_link_state *state,
 			      const struct xpcs_compat *compat)
 {
@@ -928,7 +927,7 @@ static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_get_state_c37_sgmii(struct mdio_xpcs_args *xpcs,
+static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
 				    struct phylink_link_state *state)
 {
 	int ret;
@@ -972,7 +971,7 @@ static int xpcs_get_state_c37_sgmii(struct mdio_xpcs_args *xpcs,
 static void xpcs_get_state(struct phylink_pcs *pcs,
 			   struct phylink_link_state *state)
 {
-	struct mdio_xpcs_args *xpcs = phylink_pcs_to_xpcs(pcs);
+	struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
 	const struct xpcs_compat *compat;
 	int ret;
 
@@ -1004,13 +1003,13 @@ static void xpcs_get_state(struct phylink_pcs *pcs,
 static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 			 phy_interface_t interface, int speed, int duplex)
 {
-	struct mdio_xpcs_args *xpcs = phylink_pcs_to_xpcs(pcs);
+	struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
 
 	if (interface == PHY_INTERFACE_MODE_USXGMII)
 		return xpcs_config_usxgmii(xpcs, speed);
 }
 
-static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
+static u32 xpcs_get_id(struct dw_xpcs *xpcs)
 {
 	int ret;
 	u32 id;
@@ -1095,10 +1094,10 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = {
 	.pcs_link_up = xpcs_link_up,
 };
 
-struct mdio_xpcs_args *xpcs_create(struct mdio_device *mdiodev,
-				   phy_interface_t interface)
+struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
+			    phy_interface_t interface)
 {
-	struct mdio_xpcs_args *xpcs;
+	struct dw_xpcs *xpcs;
 	u32 xpcs_id;
 	int i, ret;
 
@@ -1144,7 +1143,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(xpcs_create);
 
-void xpcs_destroy(struct mdio_xpcs_args *xpcs)
+void xpcs_destroy(struct dw_xpcs *xpcs)
 {
 	kfree(xpcs);
 }
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 4d815f03b4b2..4f1cdf6f3d4c 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -17,19 +17,19 @@
 
 struct xpcs_id;
 
-struct mdio_xpcs_args {
+struct dw_xpcs {
 	struct mdio_device *mdiodev;
 	const struct xpcs_id *id;
 	struct phylink_pcs pcs;
 };
 
-int xpcs_get_an_mode(struct mdio_xpcs_args *xpcs, phy_interface_t interface);
-void xpcs_validate(struct mdio_xpcs_args *xpcs, unsigned long *supported,
+int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
+void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
 		   struct phylink_link_state *state);
-int xpcs_config_eee(struct mdio_xpcs_args *xpcs, int mult_fact_100ns,
+int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
 		    int enable);
-struct mdio_xpcs_args *xpcs_create(struct mdio_device *mdiodev,
-				   phy_interface_t interface);
-void xpcs_destroy(struct mdio_xpcs_args *xpcs);
+struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
+			    phy_interface_t interface);
+void xpcs_destroy(struct dw_xpcs *xpcs);
 
 #endif /* __LINUX_PCS_XPCS_H */

From 47538dbeb70198c6036cfd4a60b292f1398f8f5e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:20 +0300
Subject: [PATCH 1548/3292] net: stmmac: reverse Christmas tree notation in
 stmmac_xpcs_setup

Reorder the variable declarations in descending line length order,
according to the networking coding style.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 3b3033b20b1d..a5d150c5f3d8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -399,11 +399,11 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 
 int stmmac_xpcs_setup(struct mii_bus *bus)
 {
-	int mode, addr;
 	struct net_device *ndev = bus->priv;
-	struct dw_xpcs *xpcs;
-	struct stmmac_priv *priv;
 	struct mdio_device *mdiodev;
+	struct stmmac_priv *priv;
+	struct dw_xpcs *xpcs;
+	int mode, addr;
 
 	priv = netdev_priv(ndev);
 	mode = priv->plat->phy_interface;

From 7413f9a6af00bb4dc0ef01944aceb8e2ee17a8d5 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:21 +0300
Subject: [PATCH 1549/3292] net: stmmac: reduce indentation when calling
 stmmac_xpcs_setup

There is no reason to embed an if within an if, we can just logically
AND the two conditions.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4177fd6a9db5..16820873b01d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7003,12 +7003,10 @@ int stmmac_dvr_probe(struct device *device,
 	if (priv->plat->speed_mode_2500)
 		priv->plat->speed_mode_2500(ndev, priv->plat->bsp_priv);
 
-	if (priv->plat->mdio_bus_data) {
-		if (priv->plat->mdio_bus_data->has_xpcs) {
-			ret = stmmac_xpcs_setup(priv->mii);
-			if (ret)
-				goto error_xpcs_setup;
-		}
+	if (priv->plat->mdio_bus_data && priv->plat->mdio_bus_data->has_xpcs) {
+		ret = stmmac_xpcs_setup(priv->mii);
+		if (ret)
+			goto error_xpcs_setup;
 	}
 
 	ret = stmmac_phy_setup(priv);

From d4433d5b7b34fa316c473769d51c79b2755953e4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:22 +0300
Subject: [PATCH 1550/3292] net: pcs: xpcs: move register bit descriptions to a
 header file

Vendors which integrate the Designware XPCS might modify a few things
here and there, and to support those, it's best to create separate C
files in order to not clutter up the main pcs-xpcs.c.

Because the vendor files might want to access the common xpcs registers
too, let's move them in a header file which is local to this driver and
can be included by vendor files as appropriate.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                |   1 +
 drivers/net/pcs/pcs-xpcs.c |  97 +---------------------------------
 drivers/net/pcs/pcs-xpcs.h | 103 +++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 96 deletions(-)
 create mode 100644 drivers/net/pcs/pcs-xpcs.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 2172f594be8f..c8214235380e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17676,6 +17676,7 @@ M:	Jose Abreu <Jose.Abreu@synopsys.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/pcs/pcs-xpcs.c
+F:	drivers/net/pcs/pcs-xpcs.h
 F:	include/linux/pcs/pcs-xpcs.h
 
 SYNOPSYS DESIGNWARE I2C DRIVER
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index a2cbb2d926b7..8ca7592b02ec 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -11,102 +11,7 @@
 #include <linux/mdio.h>
 #include <linux/phylink.h>
 #include <linux/workqueue.h>
-
-#define SYNOPSYS_XPCS_ID		0x7996ced0
-#define SYNOPSYS_XPCS_MASK		0xffffffff
-
-/* Vendor regs access */
-#define DW_VENDOR			BIT(15)
-
-/* VR_XS_PCS */
-#define DW_USXGMII_RST			BIT(10)
-#define DW_USXGMII_EN			BIT(9)
-#define DW_VR_XS_PCS_DIG_STS		0x0010
-#define DW_RXFIFO_ERR			GENMASK(6, 5)
-
-/* SR_MII */
-#define DW_USXGMII_FULL			BIT(8)
-#define DW_USXGMII_SS_MASK		(BIT(13) | BIT(6) | BIT(5))
-#define DW_USXGMII_10000		(BIT(13) | BIT(6))
-#define DW_USXGMII_5000			(BIT(13) | BIT(5))
-#define DW_USXGMII_2500			(BIT(5))
-#define DW_USXGMII_1000			(BIT(6))
-#define DW_USXGMII_100			(BIT(13))
-#define DW_USXGMII_10			(0)
-
-/* SR_AN */
-#define DW_SR_AN_ADV1			0x10
-#define DW_SR_AN_ADV2			0x11
-#define DW_SR_AN_ADV3			0x12
-#define DW_SR_AN_LP_ABL1		0x13
-#define DW_SR_AN_LP_ABL2		0x14
-#define DW_SR_AN_LP_ABL3		0x15
-
-/* Clause 73 Defines */
-/* AN_LP_ABL1 */
-#define DW_C73_PAUSE			BIT(10)
-#define DW_C73_ASYM_PAUSE		BIT(11)
-#define DW_C73_AN_ADV_SF		0x1
-/* AN_LP_ABL2 */
-#define DW_C73_1000KX			BIT(5)
-#define DW_C73_10000KX4			BIT(6)
-#define DW_C73_10000KR			BIT(7)
-/* AN_LP_ABL3 */
-#define DW_C73_2500KX			BIT(0)
-#define DW_C73_5000KR			BIT(1)
-
-/* Clause 37 Defines */
-/* VR MII MMD registers offsets */
-#define DW_VR_MII_MMD_CTRL		0x0000
-#define DW_VR_MII_DIG_CTRL1		0x8000
-#define DW_VR_MII_AN_CTRL		0x8001
-#define DW_VR_MII_AN_INTR_STS		0x8002
-/* Enable 2.5G Mode */
-#define DW_VR_MII_DIG_CTRL1_2G5_EN	BIT(2)
-/* EEE Mode Control Register */
-#define DW_VR_MII_EEE_MCTRL0		0x8006
-#define DW_VR_MII_EEE_MCTRL1		0x800b
-
-/* VR_MII_DIG_CTRL1 */
-#define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW		BIT(9)
-
-/* VR_MII_AN_CTRL */
-#define DW_VR_MII_AN_CTRL_TX_CONFIG_SHIFT	3
-#define DW_VR_MII_TX_CONFIG_MASK		BIT(3)
-#define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII	0x1
-#define DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII	0x0
-#define DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT	1
-#define DW_VR_MII_PCS_MODE_MASK			GENMASK(2, 1)
-#define DW_VR_MII_PCS_MODE_C37_1000BASEX	0x0
-#define DW_VR_MII_PCS_MODE_C37_SGMII		0x2
-
-/* VR_MII_AN_INTR_STS */
-#define DW_VR_MII_AN_STS_C37_ANSGM_FD		BIT(1)
-#define DW_VR_MII_AN_STS_C37_ANSGM_SP_SHIFT	2
-#define DW_VR_MII_AN_STS_C37_ANSGM_SP		GENMASK(3, 2)
-#define DW_VR_MII_C37_ANSGM_SP_10		0x0
-#define DW_VR_MII_C37_ANSGM_SP_100		0x1
-#define DW_VR_MII_C37_ANSGM_SP_1000		0x2
-#define DW_VR_MII_C37_ANSGM_SP_LNKSTS		BIT(4)
-
-/* SR MII MMD Control defines */
-#define AN_CL37_EN		BIT(12)	/* Enable Clause 37 auto-nego */
-#define SGMII_SPEED_SS13	BIT(13)	/* SGMII speed along with SS6 */
-#define SGMII_SPEED_SS6		BIT(6)	/* SGMII speed along with SS13 */
-
-/* VR MII EEE Control 0 defines */
-#define DW_VR_MII_EEE_LTX_EN		BIT(0)  /* LPI Tx Enable */
-#define DW_VR_MII_EEE_LRX_EN		BIT(1)  /* LPI Rx Enable */
-#define DW_VR_MII_EEE_TX_QUIET_EN		BIT(2)  /* Tx Quiet Enable */
-#define DW_VR_MII_EEE_RX_QUIET_EN		BIT(3)  /* Rx Quiet Enable */
-#define DW_VR_MII_EEE_TX_EN_CTRL		BIT(4)  /* Tx Control Enable */
-#define DW_VR_MII_EEE_RX_EN_CTRL		BIT(7)  /* Rx Control Enable */
-
-#define DW_VR_MII_EEE_MULT_FACT_100NS_SHIFT	8
-#define DW_VR_MII_EEE_MULT_FACT_100NS		GENMASK(11, 8)
-
-/* VR MII EEE Control 1 defines */
-#define DW_VR_MII_EEE_TRN_LPI		BIT(0)	/* Transparent Mode Enable */
+#include "pcs-xpcs.h"
 
 #define phylink_pcs_to_xpcs(pl_pcs) \
 	container_of((pl_pcs), struct dw_xpcs, pcs)
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
new file mode 100644
index 000000000000..867537a68c63
--- /dev/null
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare XPCS helpers
+ *
+ * Author: Jose Abreu <Jose.Abreu@synopsys.com>
+ */
+
+#define SYNOPSYS_XPCS_ID		0x7996ced0
+#define SYNOPSYS_XPCS_MASK		0xffffffff
+
+/* Vendor regs access */
+#define DW_VENDOR			BIT(15)
+
+/* VR_XS_PCS */
+#define DW_USXGMII_RST			BIT(10)
+#define DW_USXGMII_EN			BIT(9)
+#define DW_VR_XS_PCS_DIG_STS		0x0010
+#define DW_RXFIFO_ERR			GENMASK(6, 5)
+
+/* SR_MII */
+#define DW_USXGMII_FULL			BIT(8)
+#define DW_USXGMII_SS_MASK		(BIT(13) | BIT(6) | BIT(5))
+#define DW_USXGMII_10000		(BIT(13) | BIT(6))
+#define DW_USXGMII_5000			(BIT(13) | BIT(5))
+#define DW_USXGMII_2500			(BIT(5))
+#define DW_USXGMII_1000			(BIT(6))
+#define DW_USXGMII_100			(BIT(13))
+#define DW_USXGMII_10			(0)
+
+/* SR_AN */
+#define DW_SR_AN_ADV1			0x10
+#define DW_SR_AN_ADV2			0x11
+#define DW_SR_AN_ADV3			0x12
+#define DW_SR_AN_LP_ABL1		0x13
+#define DW_SR_AN_LP_ABL2		0x14
+#define DW_SR_AN_LP_ABL3		0x15
+
+/* Clause 73 Defines */
+/* AN_LP_ABL1 */
+#define DW_C73_PAUSE			BIT(10)
+#define DW_C73_ASYM_PAUSE		BIT(11)
+#define DW_C73_AN_ADV_SF		0x1
+/* AN_LP_ABL2 */
+#define DW_C73_1000KX			BIT(5)
+#define DW_C73_10000KX4			BIT(6)
+#define DW_C73_10000KR			BIT(7)
+/* AN_LP_ABL3 */
+#define DW_C73_2500KX			BIT(0)
+#define DW_C73_5000KR			BIT(1)
+
+/* Clause 37 Defines */
+/* VR MII MMD registers offsets */
+#define DW_VR_MII_MMD_CTRL		0x0000
+#define DW_VR_MII_DIG_CTRL1		0x8000
+#define DW_VR_MII_AN_CTRL		0x8001
+#define DW_VR_MII_AN_INTR_STS		0x8002
+/* Enable 2.5G Mode */
+#define DW_VR_MII_DIG_CTRL1_2G5_EN	BIT(2)
+/* EEE Mode Control Register */
+#define DW_VR_MII_EEE_MCTRL0		0x8006
+#define DW_VR_MII_EEE_MCTRL1		0x800b
+
+/* VR_MII_DIG_CTRL1 */
+#define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW		BIT(9)
+
+/* VR_MII_AN_CTRL */
+#define DW_VR_MII_AN_CTRL_TX_CONFIG_SHIFT	3
+#define DW_VR_MII_TX_CONFIG_MASK		BIT(3)
+#define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII	0x1
+#define DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII	0x0
+#define DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT	1
+#define DW_VR_MII_PCS_MODE_MASK			GENMASK(2, 1)
+#define DW_VR_MII_PCS_MODE_C37_1000BASEX	0x0
+#define DW_VR_MII_PCS_MODE_C37_SGMII		0x2
+
+/* VR_MII_AN_INTR_STS */
+#define DW_VR_MII_AN_STS_C37_ANSGM_FD		BIT(1)
+#define DW_VR_MII_AN_STS_C37_ANSGM_SP_SHIFT	2
+#define DW_VR_MII_AN_STS_C37_ANSGM_SP		GENMASK(3, 2)
+#define DW_VR_MII_C37_ANSGM_SP_10		0x0
+#define DW_VR_MII_C37_ANSGM_SP_100		0x1
+#define DW_VR_MII_C37_ANSGM_SP_1000		0x2
+#define DW_VR_MII_C37_ANSGM_SP_LNKSTS		BIT(4)
+
+/* SR MII MMD Control defines */
+#define AN_CL37_EN			BIT(12)	/* Enable Clause 37 auto-nego */
+#define SGMII_SPEED_SS13		BIT(13)	/* SGMII speed along with SS6 */
+#define SGMII_SPEED_SS6			BIT(6)	/* SGMII speed along with SS13 */
+
+/* VR MII EEE Control 0 defines */
+#define DW_VR_MII_EEE_LTX_EN			BIT(0)  /* LPI Tx Enable */
+#define DW_VR_MII_EEE_LRX_EN			BIT(1)  /* LPI Rx Enable */
+#define DW_VR_MII_EEE_TX_QUIET_EN		BIT(2)  /* Tx Quiet Enable */
+#define DW_VR_MII_EEE_RX_QUIET_EN		BIT(3)  /* Rx Quiet Enable */
+#define DW_VR_MII_EEE_TX_EN_CTRL		BIT(4)  /* Tx Control Enable */
+#define DW_VR_MII_EEE_RX_EN_CTRL		BIT(7)  /* Rx Control Enable */
+
+#define DW_VR_MII_EEE_MULT_FACT_100NS_SHIFT	8
+#define DW_VR_MII_EEE_MULT_FACT_100NS		GENMASK(11, 8)
+
+/* VR MII EEE Control 1 defines */
+#define DW_VR_MII_EEE_TRN_LPI		BIT(0)	/* Transparent Mode Enable */

From 2031c09e6d5f50d4c337da11efd1deb8279687d6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:23 +0300
Subject: [PATCH 1551/3292] net: pcs: xpcs: add support for sgmii with no
 inband AN

In fixed-link use cases, the XPCS can disable the clause 37 in-band
autoneg process, disable the "Automatic Speed Mode Change after CL37 AN"
setting, and force operation in a speed dictated by management.

Add support for this operating mode.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c | 41 +++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 8ca7592b02ec..743b53734eeb 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -690,7 +690,7 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
 }
 EXPORT_SYMBOL_GPL(xpcs_config_eee);
 
-static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs)
+static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode)
 {
 	int ret;
 
@@ -726,7 +726,10 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs)
 	if (ret < 0)
 		return ret;
 
-	ret |= DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
+	if (phylink_autoneg_inband(mode))
+		ret |= DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
+	else
+		ret &= ~DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
 
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
 }
@@ -772,7 +775,7 @@ static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 		}
 		break;
 	case DW_AN_C37_SGMII:
-		ret = xpcs_config_aneg_c37_sgmii(xpcs);
+		ret = xpcs_config_aneg_c37_sgmii(xpcs, mode);
 		if (ret)
 			return ret;
 		break;
@@ -905,6 +908,36 @@ static void xpcs_get_state(struct phylink_pcs *pcs,
 	}
 }
 
+static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int mode,
+			       int speed, int duplex)
+{
+	int val, ret;
+
+	if (phylink_autoneg_inband(mode))
+		return;
+
+	switch (speed) {
+	case SPEED_1000:
+		val = BMCR_SPEED1000;
+		break;
+	case SPEED_100:
+		val = BMCR_SPEED100;
+		break;
+	case SPEED_10:
+		val = BMCR_SPEED10;
+		break;
+	default:
+		return;
+	}
+
+	if (duplex == DUPLEX_FULL)
+		val |= BMCR_FULLDPLX;
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val);
+	if (ret)
+		pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret));
+}
+
 static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 			 phy_interface_t interface, int speed, int duplex)
 {
@@ -912,6 +945,8 @@ static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 
 	if (interface == PHY_INTERFACE_MODE_USXGMII)
 		return xpcs_config_usxgmii(xpcs, speed);
+	if (interface == PHY_INTERFACE_MODE_SGMII)
+		return xpcs_link_up_sgmii(xpcs, mode, speed, duplex);
 }
 
 static u32 xpcs_get_id(struct dw_xpcs *xpcs)

From 36641b045c839797739f9863e86e4dae2370e24f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:24 +0300
Subject: [PATCH 1552/3292] net: pcs: xpcs: also ignore phy id if it's all ones

xpcs_get_id() searches multiple MMDs for a known PHY ID, starting with
MDIO_MMD_PCS (3). However not all integrators might have implemented
that MMD on their MDIO bus. For example, the NXP SJA1105 and SJA1110
switches only implement vendor-specific MMD 1 and 2.

When there is nothing on an MDIO bus at a certain address, traditionally
the bus returns 0xffff, which means that the bus remained in its default
pull-up state for the duration of the MDIO transaction. The 0xffff value
is widely used in drivers/net/phy/phy_device.c (see get_phy_c22_id for
example) to denote a missing device.

So it makes sense for the xpcs to ignore this value as well, and
continue its search, eventually finding the proper PHY ID in the
vendor-specific MMDs.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 743b53734eeb..ecf5011977d3 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -965,8 +965,10 @@ static u32 xpcs_get_id(struct dw_xpcs *xpcs)
 	if (ret < 0)
 		return 0xffffffff;
 
-	/* If Device IDs are not all zeros, we found C73 AN-type device */
-	if (id | ret)
+	/* If Device IDs are not all zeros or all ones,
+	 * we found C73 AN-type device
+	 */
+	if ((id | ret) && (id | ret) != 0xffffffff)
 		return id | ret;
 
 	/* Next, search C37 PCS using Vendor-Specific MII MMD */

From dd0721ea4c7a6c2ec8b309ff57d74d88f08d4c23 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:25 +0300
Subject: [PATCH 1553/3292] net: pcs: xpcs: add support for NXP SJA1105

The NXP SJA1105 DSA switch integrates a Synopsys SGMII XPCS on port 4.
The generic code works fine, except there is an integration issue which
needs to be dealt with: in this switch, the XPCS is integrated with a
PMA that has the TX lane polarity inverted by default (PLUS is MINUS,
MINUS is PLUS).

To obtain normal non-inverted behavior, the TX lane polarity must be
inverted in the PCS, via the DIGITAL_CONTROL_2 register.

We introduce a pma_config() method in xpcs_compat which is called by the
phylink_pcs_config() implementation.

Also, the NXP SJA1105 returns all zeroes in the PHY ID registers 2 and 3.
We need to hack up an ad-hoc PHY ID (OUI is zero, device ID is 1) in
order for the XPCS driver to recognize it. This PHY ID is added to the
public include/linux/pcs/pcs-xpcs.h for that reason (for the sja1105
driver to be able to use it in a later patch).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                    |  1 +
 drivers/net/pcs/Makefile       |  4 +++-
 drivers/net/pcs/pcs-xpcs-nxp.c | 16 ++++++++++++++++
 drivers/net/pcs/pcs-xpcs.c     | 25 +++++++++++++++++++++++--
 drivers/net/pcs/pcs-xpcs.h     | 10 ++++++++++
 include/linux/pcs/pcs-xpcs.h   |  2 ++
 6 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/pcs/pcs-xpcs-nxp.c

diff --git a/MAINTAINERS b/MAINTAINERS
index c8214235380e..349a87b42d3c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13203,6 +13203,7 @@ M:	Vladimir Oltean <olteanv@gmail.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	drivers/net/dsa/sja1105
+F:	drivers/net/pcs/pcs-xpcs-nxp.c
 
 NXP TDA998X DRM DRIVER
 M:	Russell King <linux@armlinux.org.uk>
diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile
index c23146755972..0603d469bd57 100644
--- a/drivers/net/pcs/Makefile
+++ b/drivers/net/pcs/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Linux PCS drivers
 
-obj-$(CONFIG_PCS_XPCS)		+= pcs-xpcs.o
+pcs_xpcs-$(CONFIG_PCS_XPCS)	:= pcs-xpcs.o pcs-xpcs-nxp.o
+
+obj-$(CONFIG_PCS_XPCS)		+= pcs_xpcs.o
 obj-$(CONFIG_PCS_LYNX)		+= pcs-lynx.o
diff --git a/drivers/net/pcs/pcs-xpcs-nxp.c b/drivers/net/pcs/pcs-xpcs-nxp.c
new file mode 100644
index 000000000000..51b2fc7d36a9
--- /dev/null
+++ b/drivers/net/pcs/pcs-xpcs-nxp.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2021 NXP Semiconductors
+ */
+#include <linux/pcs/pcs-xpcs.h>
+#include "pcs-xpcs.h"
+
+/* In NXP SJA1105, the PCS is integrated with a PMA that has the TX lane
+ * polarity inverted by default (PLUS is MINUS, MINUS is PLUS). To obtain
+ * normal non-inverted behavior, the TX lane polarity must be inverted in the
+ * PCS, via the DIGITAL_CONTROL_2 register.
+ */
+int nxp_sja1105_sgmii_pma_config(struct dw_xpcs *xpcs)
+{
+	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL2,
+			  DW_VR_MII_DIG_CTRL2_TX_POL_INV);
+}
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index ecf5011977d3..3b1baacfaf8f 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -117,6 +117,7 @@ struct xpcs_compat {
 	const phy_interface_t *interface;
 	int num_interfaces;
 	int an_mode;
+	int (*pma_config)(struct dw_xpcs *xpcs);
 };
 
 struct xpcs_id {
@@ -168,7 +169,7 @@ static bool __xpcs_linkmode_supported(const struct xpcs_compat *compat,
 #define xpcs_linkmode_supported(compat, mode) \
 	__xpcs_linkmode_supported(compat, ETHTOOL_LINK_MODE_ ## mode ## _BIT)
 
-static int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg)
+int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg)
 {
 	u32 reg_addr = mdiobus_c45_addr(dev, reg);
 	struct mii_bus *bus = xpcs->mdiodev->bus;
@@ -177,7 +178,7 @@ static int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg)
 	return mdiobus_read(bus, addr, reg_addr);
 }
 
-static int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val)
+int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val)
 {
 	u32 reg_addr = mdiobus_c45_addr(dev, reg);
 	struct mii_bus *bus = xpcs->mdiodev->bus;
@@ -788,6 +789,12 @@ static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 		return -1;
 	}
 
+	if (compat->pma_config) {
+		ret = compat->pma_config(xpcs);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -1022,11 +1029,25 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
 	},
 };
 
+static const struct xpcs_compat nxp_sja1105_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
+	[DW_XPCS_SGMII] = {
+		.supported = xpcs_sgmii_features,
+		.interface = xpcs_sgmii_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_sgmii_interfaces),
+		.an_mode = DW_AN_C37_SGMII,
+		.pma_config = nxp_sja1105_sgmii_pma_config,
+	},
+};
+
 static const struct xpcs_id xpcs_id_list[] = {
 	{
 		.id = SYNOPSYS_XPCS_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.compat = synopsys_xpcs_compat,
+	}, {
+		.id = NXP_SJA1105_XPCS_ID,
+		.mask = SYNOPSYS_XPCS_MASK,
+		.compat = nxp_sja1105_xpcs_compat,
 	},
 };
 
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
index 867537a68c63..3daf4276a158 100644
--- a/drivers/net/pcs/pcs-xpcs.h
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -60,10 +60,15 @@
 /* EEE Mode Control Register */
 #define DW_VR_MII_EEE_MCTRL0		0x8006
 #define DW_VR_MII_EEE_MCTRL1		0x800b
+#define DW_VR_MII_DIG_CTRL2		0x80e1
 
 /* VR_MII_DIG_CTRL1 */
 #define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW		BIT(9)
 
+/* VR_MII_DIG_CTRL2 */
+#define DW_VR_MII_DIG_CTRL2_TX_POL_INV		BIT(4)
+#define DW_VR_MII_DIG_CTRL2_RX_POL_INV		BIT(0)
+
 /* VR_MII_AN_CTRL */
 #define DW_VR_MII_AN_CTRL_TX_CONFIG_SHIFT	3
 #define DW_VR_MII_TX_CONFIG_MASK		BIT(3)
@@ -101,3 +106,8 @@
 
 /* VR MII EEE Control 1 defines */
 #define DW_VR_MII_EEE_TRN_LPI		BIT(0)	/* Transparent Mode Enable */
+
+int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg);
+int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val);
+
+int nxp_sja1105_sgmii_pma_config(struct dw_xpcs *xpcs);
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 4f1cdf6f3d4c..c594f7cdc304 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -10,6 +10,8 @@
 #include <linux/phy.h>
 #include <linux/phylink.h>
 
+#define NXP_SJA1105_XPCS_ID		0x00000010
+
 /* AN mode */
 #define DW_AN_C73			1
 #define DW_AN_C37_SGMII			2

From f7380bba42fd0654bf8195fb741d5f92b0f46df9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:26 +0300
Subject: [PATCH 1554/3292] net: pcs: xpcs: add support for NXP SJA1110

The NXP SJA1110 switch integrates its own, non-Synopsys PMA, but it
manages it through the register space of the XPCS itself, in a small
register window inside MDIO_MMD_VEND2 from address 0x8030 to 0x806e.

This coincides with where the registers for the default Synopsys PMA
are, but the register definitions are of course not the same.

This situation is an odd hardware quirk, but the simplest way to manage
it is to drive the SJA1110's PMA from within the XPCS driver.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs-nxp.c | 169 +++++++++++++++++++++++++++++++++
 drivers/net/pcs/pcs-xpcs.c     |  21 ++++
 drivers/net/pcs/pcs-xpcs.h     |   2 +
 include/linux/pcs/pcs-xpcs.h   |   1 +
 4 files changed, 193 insertions(+)

diff --git a/drivers/net/pcs/pcs-xpcs-nxp.c b/drivers/net/pcs/pcs-xpcs-nxp.c
index 51b2fc7d36a9..de99c37cf2ae 100644
--- a/drivers/net/pcs/pcs-xpcs-nxp.c
+++ b/drivers/net/pcs/pcs-xpcs-nxp.c
@@ -4,6 +4,66 @@
 #include <linux/pcs/pcs-xpcs.h>
 #include "pcs-xpcs.h"
 
+/* LANE_DRIVER1_0 register */
+#define SJA1110_LANE_DRIVER1_0		0x8038
+#define SJA1110_TXDRV(x)		(((x) << 12) & GENMASK(14, 12))
+
+/* LANE_DRIVER2_0 register */
+#define SJA1110_LANE_DRIVER2_0		0x803a
+#define SJA1110_TXDRVTRIM_LSB(x)	((x) & GENMASK_ULL(15, 0))
+
+/* LANE_DRIVER2_1 register */
+#define SJA1110_LANE_DRIVER2_1		0x803b
+#define SJA1110_LANE_DRIVER2_1_RSV	BIT(9)
+#define SJA1110_TXDRVTRIM_MSB(x)	(((x) & GENMASK_ULL(23, 16)) >> 16)
+
+/* LANE_TRIM register */
+#define SJA1110_LANE_TRIM		0x8040
+#define SJA1110_TXTEN			BIT(11)
+#define SJA1110_TXRTRIM(x)		(((x) << 8) & GENMASK(10, 8))
+#define SJA1110_TXPLL_BWSEL		BIT(7)
+#define SJA1110_RXTEN			BIT(6)
+#define SJA1110_RXRTRIM(x)		(((x) << 3) & GENMASK(5, 3))
+#define SJA1110_CDR_GAIN		BIT(2)
+#define SJA1110_ACCOUPLE_RXVCM_EN	BIT(0)
+
+/* LANE_DATAPATH_1 register */
+#define SJA1110_LANE_DATAPATH_1		0x8037
+
+/* POWERDOWN_ENABLE register */
+#define SJA1110_POWERDOWN_ENABLE	0x8041
+#define SJA1110_TXPLL_PD		BIT(12)
+#define SJA1110_TXPD			BIT(11)
+#define SJA1110_RXPKDETEN		BIT(10)
+#define SJA1110_RXCH_PD			BIT(9)
+#define SJA1110_RXBIAS_PD		BIT(8)
+#define SJA1110_RESET_SER_EN		BIT(7)
+#define SJA1110_RESET_SER		BIT(6)
+#define SJA1110_RESET_DES		BIT(5)
+#define SJA1110_RCVEN			BIT(4)
+
+/* RXPLL_CTRL0 register */
+#define SJA1110_RXPLL_CTRL0		0x8065
+#define SJA1110_RXPLL_FBDIV(x)		(((x) << 2) & GENMASK(9, 2))
+
+/* RXPLL_CTRL1 register */
+#define SJA1110_RXPLL_CTRL1		0x8066
+#define SJA1110_RXPLL_REFDIV(x)		((x) & GENMASK(4, 0))
+
+/* TXPLL_CTRL0 register */
+#define SJA1110_TXPLL_CTRL0		0x806d
+#define SJA1110_TXPLL_FBDIV(x)		((x) & GENMASK(11, 0))
+
+/* TXPLL_CTRL1 register */
+#define SJA1110_TXPLL_CTRL1		0x806e
+#define SJA1110_TXPLL_REFDIV(x)		((x) & GENMASK(5, 0))
+
+/* RX_DATA_DETECT register */
+#define SJA1110_RX_DATA_DETECT		0x8045
+
+/* RX_CDR_CTLE register */
+#define SJA1110_RX_CDR_CTLE		0x8042
+
 /* In NXP SJA1105, the PCS is integrated with a PMA that has the TX lane
  * polarity inverted by default (PLUS is MINUS, MINUS is PLUS). To obtain
  * normal non-inverted behavior, the TX lane polarity must be inverted in the
@@ -14,3 +74,112 @@ int nxp_sja1105_sgmii_pma_config(struct dw_xpcs *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL2,
 			  DW_VR_MII_DIG_CTRL2_TX_POL_INV);
 }
+
+static int nxp_sja1110_pma_config(struct dw_xpcs *xpcs,
+				  u16 txpll_fbdiv, u16 txpll_refdiv,
+				  u16 rxpll_fbdiv, u16 rxpll_refdiv,
+				  u16 rx_cdr_ctle)
+{
+	u16 val;
+	int ret;
+
+	/* Program TX PLL feedback divider and reference divider settings for
+	 * correct oscillation frequency.
+	 */
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_TXPLL_CTRL0,
+			 SJA1110_TXPLL_FBDIV(txpll_fbdiv));
+	if (ret < 0)
+		return ret;
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_TXPLL_CTRL1,
+			 SJA1110_TXPLL_REFDIV(txpll_refdiv));
+	if (ret < 0)
+		return ret;
+
+	/* Program transmitter amplitude and disable amplitude trimming */
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_LANE_DRIVER1_0,
+			 SJA1110_TXDRV(0x5));
+	if (ret < 0)
+		return ret;
+
+	val = SJA1110_TXDRVTRIM_LSB(0xffffffull);
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_LANE_DRIVER2_0, val);
+	if (ret < 0)
+		return ret;
+
+	val = SJA1110_TXDRVTRIM_MSB(0xffffffull) | SJA1110_LANE_DRIVER2_1_RSV;
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_LANE_DRIVER2_1, val);
+	if (ret < 0)
+		return ret;
+
+	/* Enable input and output resistor terminations for low BER. */
+	val = SJA1110_ACCOUPLE_RXVCM_EN | SJA1110_CDR_GAIN |
+	      SJA1110_RXRTRIM(4) | SJA1110_RXTEN | SJA1110_TXPLL_BWSEL |
+	      SJA1110_TXRTRIM(3) | SJA1110_TXTEN;
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_LANE_TRIM, val);
+	if (ret < 0)
+		return ret;
+
+	/* Select PCS as transmitter data source. */
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_LANE_DATAPATH_1, 0);
+	if (ret < 0)
+		return ret;
+
+	/* Program RX PLL feedback divider and reference divider for correct
+	 * oscillation frequency.
+	 */
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_RXPLL_CTRL0,
+			 SJA1110_RXPLL_FBDIV(rxpll_fbdiv));
+	if (ret < 0)
+		return ret;
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_RXPLL_CTRL1,
+			 SJA1110_RXPLL_REFDIV(rxpll_refdiv));
+	if (ret < 0)
+		return ret;
+
+	/* Program threshold for receiver signal detector.
+	 * Enable control of RXPLL by receiver signal detector to disable RXPLL
+	 * when an input signal is not present.
+	 */
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_RX_DATA_DETECT, 0x0005);
+	if (ret < 0)
+		return ret;
+
+	/* Enable TX and RX PLLs and circuits.
+	 * Release reset of PMA to enable data flow to/from PCS.
+	 */
+	val = xpcs_read(xpcs, MDIO_MMD_VEND2, SJA1110_POWERDOWN_ENABLE);
+	if (val < 0)
+		return val;
+
+	val &= ~(SJA1110_TXPLL_PD | SJA1110_TXPD | SJA1110_RXCH_PD |
+		 SJA1110_RXBIAS_PD | SJA1110_RESET_SER_EN |
+		 SJA1110_RESET_SER | SJA1110_RESET_DES);
+	val |= SJA1110_RXPKDETEN | SJA1110_RCVEN;
+
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_POWERDOWN_ENABLE, val);
+	if (ret < 0)
+		return ret;
+
+	/* Program continuous-time linear equalizer (CTLE) settings. */
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_RX_CDR_CTLE,
+			 rx_cdr_ctle);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+int nxp_sja1110_sgmii_pma_config(struct dw_xpcs *xpcs)
+{
+	return nxp_sja1110_pma_config(xpcs, 0x19, 0x1, 0x19, 0x1, 0x212a);
+}
+
+int nxp_sja1110_2500basex_pma_config(struct dw_xpcs *xpcs)
+{
+	return nxp_sja1110_pma_config(xpcs, 0x7d, 0x2, 0x7d, 0x2, 0x732a);
+}
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 3b1baacfaf8f..b66e46fc88dc 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -1039,6 +1039,23 @@ static const struct xpcs_compat nxp_sja1105_xpcs_compat[DW_XPCS_INTERFACE_MAX] =
 	},
 };
 
+static const struct xpcs_compat nxp_sja1110_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
+	[DW_XPCS_SGMII] = {
+		.supported = xpcs_sgmii_features,
+		.interface = xpcs_sgmii_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_sgmii_interfaces),
+		.an_mode = DW_AN_C37_SGMII,
+		.pma_config = nxp_sja1110_sgmii_pma_config,
+	},
+	[DW_XPCS_2500BASEX] = {
+		.supported = xpcs_2500basex_features,
+		.interface = xpcs_2500basex_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_2500basex_interfaces),
+		.an_mode = DW_2500BASEX,
+		.pma_config = nxp_sja1110_2500basex_pma_config,
+	},
+};
+
 static const struct xpcs_id xpcs_id_list[] = {
 	{
 		.id = SYNOPSYS_XPCS_ID,
@@ -1048,6 +1065,10 @@ static const struct xpcs_id xpcs_id_list[] = {
 		.id = NXP_SJA1105_XPCS_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.compat = nxp_sja1105_xpcs_compat,
+	}, {
+		.id = NXP_SJA1110_XPCS_ID,
+		.mask = SYNOPSYS_XPCS_MASK,
+		.compat = nxp_sja1110_xpcs_compat,
 	},
 };
 
diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h
index 3daf4276a158..35651d32a224 100644
--- a/drivers/net/pcs/pcs-xpcs.h
+++ b/drivers/net/pcs/pcs-xpcs.h
@@ -111,3 +111,5 @@ int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg);
 int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val);
 
 int nxp_sja1105_sgmii_pma_config(struct dw_xpcs *xpcs);
+int nxp_sja1110_sgmii_pma_config(struct dw_xpcs *xpcs);
+int nxp_sja1110_2500basex_pma_config(struct dw_xpcs *xpcs);
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index c594f7cdc304..dae7dd8ac683 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -11,6 +11,7 @@
 #include <linux/phylink.h>
 
 #define NXP_SJA1105_XPCS_ID		0x00000010
+#define NXP_SJA1110_XPCS_ID		0x00000020
 
 /* AN mode */
 #define DW_AN_C73			1

From a853c68e29bb974ca0cc0a8eaf88c333217556aa Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:27 +0300
Subject: [PATCH 1555/3292] net: pcs: xpcs: export xpcs_do_config and
 xpcs_link_up

The sja1105 hardware has a quirk in that some changes require a switch
reset, which loses all configuration. When the reset is initiated,
everything needs to be reprogrammed, including the MACs and the PCS.
This is currently done in sja1105_static_config_reload() - we manually
call sja1105_adjust_port_config(), sja1105_sgmii_pcs_config() and
sja1105_sgmii_pcs_force_speed() which are all internal functions.

There is a desire for sja1105 to use the common xpcs driver, and that
means that the equivalents of those functions, xpcs_do_config() and
xpcs_link_up() respectively, will no longer be local functions.

Forcing phylink to retrigger a resolve somehow, say by doing dev_close()
followed by dev_open() is not really an option, because the CPU port
might have a PCS as well, and there is no net device which we can close
and reopen for that. Additionally, the dev_close/dev_open sequence might
force a renegotiation of the copper-side link for SGMII ports connected
to a PHY, and this is undesirable as well, because the switch reset is
much quicker than a PHY autoneg, so we would have a lot more downtime.

The only solution I see is for the sja1105 driver to keep doing what
it's doing, and that means we need to export the equivalents from xpcs
for sja1105_sgmii_pcs_config and sja1105_sgmii_pcs_force_speed, and call
them directly in sja1105_static_config_reload(). This will be done
during the conversion patch.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c   | 10 ++++++----
 include/linux/pcs/pcs-xpcs.h |  4 ++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index b66e46fc88dc..63fda3fc40aa 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -757,8 +757,8 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, ret);
 }
 
-static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
-			  unsigned int mode)
+int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
+		   unsigned int mode)
 {
 	const struct xpcs_compat *compat;
 	int ret;
@@ -797,6 +797,7 @@ static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xpcs_do_config);
 
 static int xpcs_config(struct phylink_pcs *pcs, unsigned int mode,
 		       phy_interface_t interface,
@@ -945,8 +946,8 @@ static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int mode,
 		pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret));
 }
 
-static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
-			 phy_interface_t interface, int speed, int duplex)
+void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+		  phy_interface_t interface, int speed, int duplex)
 {
 	struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
 
@@ -955,6 +956,7 @@ static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 	if (interface == PHY_INTERFACE_MODE_SGMII)
 		return xpcs_link_up_sgmii(xpcs, mode, speed, duplex);
 }
+EXPORT_SYMBOL_GPL(xpcs_link_up);
 
 static u32 xpcs_get_id(struct dw_xpcs *xpcs)
 {
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index dae7dd8ac683..add077a81b21 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -27,6 +27,10 @@ struct dw_xpcs {
 };
 
 int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
+void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+		  phy_interface_t interface, int speed, int duplex);
+int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
+		   unsigned int mode);
 void xpcs_validate(struct dw_xpcs *xpcs, unsigned long *supported,
 		   struct phylink_link_state *state);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,

From 3ad1d171548e85fd582c8de8c0946875579aebe8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:28 +0300
Subject: [PATCH 1556/3292] net: dsa: sja1105: migrate to xpcs for SGMII

There is a desire to use the generic driver for the Synopsys XPCS
located in drivers/net/pcs, and to achieve that, the sja1105 driver must
expose an MDIO bus for the SGMII PCS, because the XPCS probes as an
mdio_device.

In preparation of the SJA1110 which in fact has a different access
procedure for the SJA1105, we register this PCS MDIO bus once in the
common code, but we implement function pointers for the read and write
methods. In this patch there is a single implementation for them.

There is exactly one MDIO bus for the PCS, this will contain all PCSes
at MDIO addresses equal to the port number.

We delete a bunch of hardware support code because the xpcs driver
already does what we need.

We need to hack up the MDIO reads for the PHY ID, since our XPCS
instantiation returns zeroes and there are some specific fixups which
need to be applied by the xpcs driver.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/Kconfig         |   1 +
 drivers/net/dsa/sja1105/sja1105.h       |   6 +
 drivers/net/dsa/sja1105/sja1105_main.c  | 171 ++++--------------------
 drivers/net/dsa/sja1105/sja1105_mdio.c  | 159 ++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_sgmii.h |  53 --------
 drivers/net/dsa/sja1105/sja1105_spi.c   |   4 +
 6 files changed, 195 insertions(+), 199 deletions(-)
 delete mode 100644 drivers/net/dsa/sja1105/sja1105_sgmii.h

diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig
index 5e83b365f17a..8383cd6d2178 100644
--- a/drivers/net/dsa/sja1105/Kconfig
+++ b/drivers/net/dsa/sja1105/Kconfig
@@ -3,6 +3,7 @@ config NET_DSA_SJA1105
 tristate "NXP SJA1105 Ethernet switch family support"
 	depends on NET_DSA && SPI
 	select NET_DSA_TAG_SJA1105
+	select PCS_XPCS
 	select PACKING
 	select CRC32
 	help
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 5f3449351668..82450921059a 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -133,6 +133,8 @@ struct sja1105_info {
 	bool (*rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb);
 	void (*txtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb);
 	int (*clocking_setup)(struct sja1105_private *priv);
+	int (*pcs_mdio_read)(struct mii_bus *bus, int phy, int reg);
+	int (*pcs_mdio_write)(struct mii_bus *bus, int phy, int reg, u16 val);
 	const char *name;
 	bool supports_mii[SJA1105_MAX_NUM_PORTS];
 	bool supports_rmii[SJA1105_MAX_NUM_PORTS];
@@ -265,6 +267,8 @@ struct sja1105_private {
 	struct sja1105_cbs_entry *cbs;
 	struct mii_bus *mdio_base_t1;
 	struct mii_bus *mdio_base_tx;
+	struct mii_bus *mdio_pcs;
+	struct dw_xpcs *xpcs[SJA1105_MAX_NUM_PORTS];
 	struct sja1105_tagger_data tagger_data;
 	struct sja1105_ptp_data ptp_data;
 	struct sja1105_tas_data tas_data;
@@ -297,6 +301,8 @@ void sja1105_frame_memory_partitioning(struct sja1105_private *priv);
 /* From sja1105_mdio.c */
 int sja1105_mdiobus_register(struct dsa_switch *ds);
 void sja1105_mdiobus_unregister(struct dsa_switch *ds);
+int sja1105_pcs_mdio_read(struct mii_bus *bus, int phy, int reg);
+int sja1105_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val);
 
 /* From sja1105_devlink.c */
 int sja1105_devlink_setup(struct dsa_switch *ds);
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 6e2cfbf605ef..bd1f2686e37d 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -16,13 +16,13 @@
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
 #include <linux/of_device.h>
+#include <linux/pcs/pcs-xpcs.h>
 #include <linux/netdev_features.h>
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
 #include <linux/if_ether.h>
 #include <linux/dsa/8021q.h>
 #include "sja1105.h"
-#include "sja1105_sgmii.h"
 #include "sja1105_tas.h"
 
 #define SJA1105_UNKNOWN_MULTICAST	0x010000000000ull
@@ -1002,93 +1002,6 @@ static int sja1105_parse_dt(struct sja1105_private *priv)
 	return rc;
 }
 
-static int sja1105_sgmii_read(struct sja1105_private *priv, int port, int mmd,
-			      int pcs_reg)
-{
-	u64 addr = (mmd << 16) | pcs_reg;
-	u32 val;
-	int rc;
-
-	if (port != SJA1105_SGMII_PORT)
-		return -ENODEV;
-
-	rc = sja1105_xfer_u32(priv, SPI_READ, addr, &val, NULL);
-	if (rc < 0)
-		return rc;
-
-	return val;
-}
-
-static int sja1105_sgmii_write(struct sja1105_private *priv, int port, int mmd,
-			       int pcs_reg, u16 pcs_val)
-{
-	u64 addr = (mmd << 16) | pcs_reg;
-	u32 val = pcs_val;
-	int rc;
-
-	if (port != SJA1105_SGMII_PORT)
-		return -ENODEV;
-
-	rc = sja1105_xfer_u32(priv, SPI_WRITE, addr, &val, NULL);
-	if (rc < 0)
-		return rc;
-
-	return val;
-}
-
-static void sja1105_sgmii_pcs_config(struct sja1105_private *priv, int port,
-				     bool an_enabled, bool an_master)
-{
-	u16 ac = SJA1105_AC_AUTONEG_MODE_SGMII;
-
-	/* DIGITAL_CONTROL_1: Enable vendor-specific MMD1, allow the PHY to
-	 * stop the clock during LPI mode, make the MAC reconfigure
-	 * autonomously after PCS autoneg is done, flush the internal FIFOs.
-	 */
-	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, SJA1105_DC1,
-			    SJA1105_DC1_EN_VSMMD1 |
-			    SJA1105_DC1_CLOCK_STOP_EN |
-			    SJA1105_DC1_MAC_AUTO_SW |
-			    SJA1105_DC1_INIT);
-	/* DIGITAL_CONTROL_2: No polarity inversion for TX and RX lanes */
-	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, SJA1105_DC2,
-			    SJA1105_DC2_TX_POL_INV_DISABLE);
-	/* AUTONEG_CONTROL: Use SGMII autoneg */
-	if (an_master)
-		ac |= SJA1105_AC_PHY_MODE | SJA1105_AC_SGMII_LINK;
-	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, SJA1105_AC, ac);
-	/* BASIC_CONTROL: enable in-band AN now, if requested. Otherwise,
-	 * sja1105_sgmii_pcs_force_speed must be called later for the link
-	 * to become operational.
-	 */
-	if (an_enabled)
-		sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, MDIO_CTRL1,
-				    BMCR_ANENABLE | BMCR_ANRESTART);
-}
-
-static void sja1105_sgmii_pcs_force_speed(struct sja1105_private *priv,
-					  int port, int speed)
-{
-	int pcs_speed;
-
-	switch (speed) {
-	case SPEED_1000:
-		pcs_speed = BMCR_SPEED1000;
-		break;
-	case SPEED_100:
-		pcs_speed = BMCR_SPEED100;
-		break;
-	case SPEED_10:
-		pcs_speed = BMCR_SPEED10;
-		break;
-	default:
-		dev_err(priv->ds->dev, "Invalid speed %d\n", speed);
-		return;
-	}
-	sja1105_sgmii_write(priv, port, MDIO_MMD_VEND2, MDIO_CTRL1,
-			    pcs_speed | BMCR_FULLDPLX);
-}
-
 /* Convert link speed from SJA1105 to ethtool encoding */
 static int sja1105_port_speed_to_ethtool(struct sja1105_private *priv,
 					 u64 speed)
@@ -1195,10 +1108,9 @@ static void sja1105_mac_config(struct dsa_switch *ds, int port,
 			       unsigned int mode,
 			       const struct phylink_link_state *state)
 {
+	struct dsa_port *dp = dsa_to_port(ds, port);
 	struct sja1105_private *priv = ds->priv;
-	bool is_sgmii;
-
-	is_sgmii = (state->interface == PHY_INTERFACE_MODE_SGMII);
+	struct dw_xpcs *xpcs;
 
 	if (sja1105_phy_mode_mismatch(priv, port, state->interface)) {
 		dev_err(ds->dev, "Changing PHY mode to %s not supported!\n",
@@ -1206,15 +1118,10 @@ static void sja1105_mac_config(struct dsa_switch *ds, int port,
 		return;
 	}
 
-	if (phylink_autoneg_inband(mode) && !is_sgmii) {
-		dev_err(ds->dev, "In-band AN not supported!\n");
-		return;
-	}
+	xpcs = priv->xpcs[port];
 
-	if (is_sgmii)
-		sja1105_sgmii_pcs_config(priv, port,
-					 phylink_autoneg_inband(mode),
-					 false);
+	if (xpcs)
+		phylink_set_pcs(dp->pl, &xpcs->pcs);
 }
 
 static void sja1105_mac_link_down(struct dsa_switch *ds, int port,
@@ -1235,10 +1142,6 @@ static void sja1105_mac_link_up(struct dsa_switch *ds, int port,
 
 	sja1105_adjust_port_config(priv, port, speed);
 
-	if (priv->phy_mode[port] == PHY_INTERFACE_MODE_SGMII &&
-	    !phylink_autoneg_inband(mode))
-		sja1105_sgmii_pcs_force_speed(priv, port, speed);
-
 	sja1105_inhibit_tx(priv, BIT(port), false);
 }
 
@@ -1283,38 +1186,6 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port,
 		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
-static int sja1105_mac_pcs_get_state(struct dsa_switch *ds, int port,
-				     struct phylink_link_state *state)
-{
-	struct sja1105_private *priv = ds->priv;
-	int ais;
-
-	/* Read the vendor-specific AUTONEG_INTR_STATUS register */
-	ais = sja1105_sgmii_read(priv, port, MDIO_MMD_VEND2, SJA1105_AIS);
-	if (ais < 0)
-		return ais;
-
-	switch (SJA1105_AIS_SPEED(ais)) {
-	case 0:
-		state->speed = SPEED_10;
-		break;
-	case 1:
-		state->speed = SPEED_100;
-		break;
-	case 2:
-		state->speed = SPEED_1000;
-		break;
-	default:
-		dev_err(ds->dev, "Invalid SGMII PCS speed %lu\n",
-			SJA1105_AIS_SPEED(ais));
-	}
-	state->duplex = SJA1105_AIS_DUPLEX_MODE(ais);
-	state->an_complete = SJA1105_AIS_COMPLETE(ais);
-	state->link = SJA1105_AIS_LINK_STATUS(ais);
-
-	return 0;
-}
-
 static int
 sja1105_find_static_fdb_entry(struct sja1105_private *priv, int port,
 			      const struct sja1105_l2_lookup_entry *requested)
@@ -1990,14 +1861,14 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 	 * change it through the dynamic interface later.
 	 */
 	for (i = 0; i < ds->num_ports; i++) {
+		u32 reg_addr = mdiobus_c45_addr(MDIO_MMD_VEND2, MDIO_CTRL1);
+
 		speed_mbps[i] = sja1105_port_speed_to_ethtool(priv,
 							      mac[i].speed);
 		mac[i].speed = priv->info->port_speed[SJA1105_SPEED_AUTO];
 
-		if (priv->phy_mode[i] == PHY_INTERFACE_MODE_SGMII)
-			bmcr[i] = sja1105_sgmii_read(priv, i,
-						     MDIO_MMD_VEND2,
-						     MDIO_CTRL1);
+		if (priv->xpcs[i])
+			bmcr[i] = mdiobus_read(priv->mdio_pcs, i, reg_addr);
 	}
 
 	/* No PTP operations can run right now */
@@ -2045,20 +1916,28 @@ out_unlock_ptp:
 		goto out;
 
 	for (i = 0; i < ds->num_ports; i++) {
-		bool an_enabled;
+		struct dw_xpcs *xpcs = priv->xpcs[i];
+		unsigned int mode;
 
 		rc = sja1105_adjust_port_config(priv, i, speed_mbps[i]);
 		if (rc < 0)
 			goto out;
 
-		if (priv->phy_mode[i] != PHY_INTERFACE_MODE_SGMII)
+		if (!xpcs)
 			continue;
 
-		an_enabled = !!(bmcr[i] & BMCR_ANENABLE);
+		if (bmcr[i] & BMCR_ANENABLE)
+			mode = MLO_AN_INBAND;
+		else if (priv->fixed_link[i])
+			mode = MLO_AN_FIXED;
+		else
+			mode = MLO_AN_PHY;
 
-		sja1105_sgmii_pcs_config(priv, i, an_enabled, false);
+		rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode);
+		if (rc < 0)
+			goto out;
 
-		if (!an_enabled) {
+		if (!phylink_autoneg_inband(mode)) {
 			int speed = SPEED_UNKNOWN;
 
 			if (bmcr[i] & BMCR_SPEED1000)
@@ -2068,7 +1947,8 @@ out_unlock_ptp:
 			else
 				speed = SPEED_10;
 
-			sja1105_sgmii_pcs_force_speed(priv, i, speed);
+			xpcs_link_up(&xpcs->pcs, mode, priv->phy_mode[i],
+				     speed, DUPLEX_FULL);
 		}
 	}
 
@@ -3649,7 +3529,6 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
 	.port_change_mtu	= sja1105_change_mtu,
 	.port_max_mtu		= sja1105_get_max_mtu,
 	.phylink_validate	= sja1105_phylink_validate,
-	.phylink_mac_link_state	= sja1105_mac_pcs_get_state,
 	.phylink_mac_config	= sja1105_mac_config,
 	.phylink_mac_link_up	= sja1105_mac_link_up,
 	.phylink_mac_link_down	= sja1105_mac_link_down,
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 08517c70cb48..5185471e9b7c 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -1,9 +1,61 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright 2021, NXP Semiconductors
  */
+#include <linux/pcs/pcs-xpcs.h>
 #include <linux/of_mdio.h>
 #include "sja1105.h"
 
+int sja1105_pcs_mdio_read(struct mii_bus *bus, int phy, int reg)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	u64 addr;
+	u32 tmp;
+	u16 mmd;
+	int rc;
+
+	if (!(reg & MII_ADDR_C45))
+		return -EINVAL;
+
+	mmd = (reg >> MII_DEVADDR_C45_SHIFT) & 0x1f;
+	addr = (mmd << 16) | (reg & GENMASK(15, 0));
+
+	if (mmd != MDIO_MMD_VEND1 && mmd != MDIO_MMD_VEND2)
+		return 0xffff;
+
+	if (mmd == MDIO_MMD_VEND2 && (reg & GENMASK(15, 0)) == MII_PHYSID1)
+		return NXP_SJA1105_XPCS_ID >> 16;
+	if (mmd == MDIO_MMD_VEND2 && (reg & GENMASK(15, 0)) == MII_PHYSID2)
+		return NXP_SJA1105_XPCS_ID & GENMASK(15, 0);
+
+	rc = sja1105_xfer_u32(priv, SPI_READ, addr, &tmp, NULL);
+	if (rc < 0)
+		return rc;
+
+	return tmp & 0xffff;
+}
+
+int sja1105_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	u64 addr;
+	u32 tmp;
+	u16 mmd;
+
+	if (!(reg & MII_ADDR_C45))
+		return -EINVAL;
+
+	mmd = (reg >> MII_DEVADDR_C45_SHIFT) & 0x1f;
+	addr = (mmd << 16) | (reg & GENMASK(15, 0));
+	tmp = val;
+
+	if (mmd != MDIO_MMD_VEND1 && mmd != MDIO_MMD_VEND2)
+		return -EINVAL;
+
+	return sja1105_xfer_u32(priv, SPI_WRITE, addr, &tmp, NULL);
+}
+
 enum sja1105_mdio_opcode {
 	SJA1105_C45_ADDR = 0,
 	SJA1105_C22 = 1,
@@ -239,6 +291,107 @@ static void sja1105_mdiobus_base_t1_unregister(struct sja1105_private *priv)
 	priv->mdio_base_t1 = NULL;
 }
 
+static int sja1105_mdiobus_pcs_register(struct sja1105_private *priv)
+{
+	struct sja1105_mdio_private *mdio_priv;
+	struct dsa_switch *ds = priv->ds;
+	struct mii_bus *bus;
+	int rc = 0;
+	int port;
+
+	if (!priv->info->pcs_mdio_read || !priv->info->pcs_mdio_write)
+		return 0;
+
+	bus = mdiobus_alloc_size(sizeof(*mdio_priv));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "SJA1105 PCS MDIO bus";
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-pcs",
+		 dev_name(ds->dev));
+	bus->read = priv->info->pcs_mdio_read;
+	bus->write = priv->info->pcs_mdio_write;
+	bus->parent = ds->dev;
+	/* There is no PHY on this MDIO bus => mask out all PHY addresses
+	 * from auto probing.
+	 */
+	bus->phy_mask = ~0;
+	mdio_priv = bus->priv;
+	mdio_priv->priv = priv;
+
+	rc = mdiobus_register(bus);
+	if (rc) {
+		mdiobus_free(bus);
+		return rc;
+	}
+
+	for (port = 0; port < ds->num_ports; port++) {
+		struct mdio_device *mdiodev;
+		struct dw_xpcs *xpcs;
+
+		if (dsa_is_unused_port(ds, port))
+			continue;
+
+		if (priv->phy_mode[port] != PHY_INTERFACE_MODE_SGMII)
+			continue;
+
+		mdiodev = mdio_device_create(bus, port);
+		if (IS_ERR(mdiodev)) {
+			rc = PTR_ERR(mdiodev);
+			goto out_pcs_free;
+		}
+
+		xpcs = xpcs_create(mdiodev, priv->phy_mode[port]);
+		if (IS_ERR(xpcs)) {
+			rc = PTR_ERR(xpcs);
+			goto out_pcs_free;
+		}
+
+		priv->xpcs[port] = xpcs;
+	}
+
+	priv->mdio_pcs = bus;
+
+	return 0;
+
+out_pcs_free:
+	for (port = 0; port < ds->num_ports; port++) {
+		if (!priv->xpcs[port])
+			continue;
+
+		mdio_device_free(priv->xpcs[port]->mdiodev);
+		xpcs_destroy(priv->xpcs[port]);
+		priv->xpcs[port] = NULL;
+	}
+
+	mdiobus_unregister(bus);
+	mdiobus_free(bus);
+
+	return rc;
+}
+
+static void sja1105_mdiobus_pcs_unregister(struct sja1105_private *priv)
+{
+	struct dsa_switch *ds = priv->ds;
+	int port;
+
+	if (!priv->mdio_pcs)
+		return;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (!priv->xpcs[port])
+			continue;
+
+		mdio_device_free(priv->xpcs[port]->mdiodev);
+		xpcs_destroy(priv->xpcs[port]);
+		priv->xpcs[port] = NULL;
+	}
+
+	mdiobus_unregister(priv->mdio_pcs);
+	mdiobus_free(priv->mdio_pcs);
+	priv->mdio_pcs = NULL;
+}
+
 int sja1105_mdiobus_register(struct dsa_switch *ds)
 {
 	struct sja1105_private *priv = ds->priv;
@@ -247,6 +400,10 @@ int sja1105_mdiobus_register(struct dsa_switch *ds)
 	struct device_node *mdio_node;
 	int rc;
 
+	rc = sja1105_mdiobus_pcs_register(priv);
+	if (rc)
+		return rc;
+
 	mdio_node = of_get_child_by_name(switch_node, "mdios");
 	if (!mdio_node)
 		return 0;
@@ -275,6 +432,7 @@ err_free_base_tx_mdiobus:
 	sja1105_mdiobus_base_tx_unregister(priv);
 err_put_mdio_node:
 	of_node_put(mdio_node);
+	sja1105_mdiobus_pcs_unregister(priv);
 
 	return rc;
 }
@@ -285,4 +443,5 @@ void sja1105_mdiobus_unregister(struct dsa_switch *ds)
 
 	sja1105_mdiobus_base_t1_unregister(priv);
 	sja1105_mdiobus_base_tx_unregister(priv);
+	sja1105_mdiobus_pcs_unregister(priv);
 }
diff --git a/drivers/net/dsa/sja1105/sja1105_sgmii.h b/drivers/net/dsa/sja1105/sja1105_sgmii.h
deleted file mode 100644
index 24d9bc046e70..000000000000
--- a/drivers/net/dsa/sja1105/sja1105_sgmii.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright 2020, NXP Semiconductors
- */
-#ifndef _SJA1105_SGMII_H
-#define _SJA1105_SGMII_H
-
-#define SJA1105_SGMII_PORT		4
-
-/* DIGITAL_CONTROL_1 (address 1f8000h) */
-#define SJA1105_DC1			0x8000
-#define SJA1105_DC1_VS_RESET		BIT(15)
-#define SJA1105_DC1_REMOTE_LOOPBACK	BIT(14)
-#define SJA1105_DC1_EN_VSMMD1		BIT(13)
-#define SJA1105_DC1_POWER_SAVE		BIT(11)
-#define SJA1105_DC1_CLOCK_STOP_EN	BIT(10)
-#define SJA1105_DC1_MAC_AUTO_SW		BIT(9)
-#define SJA1105_DC1_INIT		BIT(8)
-#define SJA1105_DC1_TX_DISABLE		BIT(4)
-#define SJA1105_DC1_AUTONEG_TIMER_OVRR	BIT(3)
-#define SJA1105_DC1_BYP_POWERUP		BIT(1)
-#define SJA1105_DC1_PHY_MODE_CONTROL	BIT(0)
-
-/* DIGITAL_CONTROL_2 register (address 1f80E1h) */
-#define SJA1105_DC2			0x80e1
-#define SJA1105_DC2_TX_POL_INV_DISABLE	BIT(4)
-#define SJA1105_DC2_RX_POL_INV		BIT(0)
-
-/* DIGITAL_ERROR_CNT register (address 1f80E2h) */
-#define SJA1105_DEC			0x80e2
-#define SJA1105_DEC_ICG_EC_ENA		BIT(4)
-#define SJA1105_DEC_CLEAR_ON_READ	BIT(0)
-
-/* AUTONEG_CONTROL register (address 1f8001h) */
-#define SJA1105_AC			0x8001
-#define SJA1105_AC_MII_CONTROL		BIT(8)
-#define SJA1105_AC_SGMII_LINK		BIT(4)
-#define SJA1105_AC_PHY_MODE		BIT(3)
-#define SJA1105_AC_AUTONEG_MODE(x)	(((x) << 1) & GENMASK(2, 1))
-#define SJA1105_AC_AUTONEG_MODE_SGMII	SJA1105_AC_AUTONEG_MODE(2)
-
-/* AUTONEG_INTR_STATUS register (address 1f8002h) */
-#define SJA1105_AIS			0x8002
-#define SJA1105_AIS_LINK_STATUS(x)	(!!((x) & BIT(4)))
-#define SJA1105_AIS_SPEED(x)		(((x) & GENMASK(3, 2)) >> 2)
-#define SJA1105_AIS_DUPLEX_MODE(x)	(!!((x) & BIT(1)))
-#define SJA1105_AIS_COMPLETE(x)		(!!((x) & BIT(0)))
-
-/* DEBUG_CONTROL register (address 1f8005h) */
-#define SJA1105_DC			0x8005
-#define SJA1105_DC_SUPPRESS_LOS		BIT(4)
-#define SJA1105_DC_RESTART_SYNC		BIT(0)
-
-#endif
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 32d00212423c..c1c54b7ff0e4 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -717,6 +717,8 @@ const struct sja1105_info sja1105r_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
+	.pcs_mdio_read		= sja1105_pcs_mdio_read,
+	.pcs_mdio_write		= sja1105_pcs_mdio_write,
 	.regs			= &sja1105pqrs_regs,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
@@ -753,6 +755,8 @@ const struct sja1105_info sja1105s_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1105_rxtstamp,
 	.clocking_setup		= sja1105_clocking_setup,
+	.pcs_mdio_read		= sja1105_pcs_mdio_read,
+	.pcs_mdio_write		= sja1105_pcs_mdio_write,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
 		[SJA1105_SPEED_10MBPS] = 3,

From 27871359bdf82677c0a854d17eb93c34402321c9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:29 +0300
Subject: [PATCH 1557/3292] net: dsa: sja1105: register the PCS MDIO bus for
 SJA1110

On the SJA1110, the PCS of each SERDES-capable port is accessed through
a different memory window which is 0x100 bytes in size, denoted by
"pcs_base".

In each PCS register access window, the XPCS MMDs are accessed in an
indirect way: in pages/banks of up to 0x100 addresses each. Changing the
page/bank is done by writing to a special register at the end of the
access window.

The MDIO register map accessed indirectly through the indirect banked
method described above is similar to what SJA1105 has: upper 5 bits are
the MMD, lower 16 bits are the MDIO address within that MMD.

Since the PHY ID reported by the XPCS inside SJA1110 is also all zeroes
(like SJA1105), we need to trap those reads and return a fake PHY ID so
that the xpcs driver can apply some specific fixups for our integration.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  3 +
 drivers/net/dsa/sja1105/sja1105_mdio.c | 95 ++++++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_spi.c  | 11 +++
 3 files changed, 109 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 82450921059a..39124726bdd9 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -69,6 +69,7 @@ struct sja1105_regs {
 	u64 stats[__MAX_SJA1105_STATS_AREA][SJA1105_MAX_NUM_PORTS];
 	u64 mdio_100base_tx;
 	u64 mdio_100base_t1;
+	u64 pcs_base[SJA1105_MAX_NUM_PORTS];
 };
 
 struct sja1105_mdio_private {
@@ -303,6 +304,8 @@ int sja1105_mdiobus_register(struct dsa_switch *ds);
 void sja1105_mdiobus_unregister(struct dsa_switch *ds);
 int sja1105_pcs_mdio_read(struct mii_bus *bus, int phy, int reg);
 int sja1105_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val);
+int sja1110_pcs_mdio_read(struct mii_bus *bus, int phy, int reg);
+int sja1110_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val);
 
 /* From sja1105_devlink.c */
 int sja1105_devlink_setup(struct dsa_switch *ds);
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 5185471e9b7c..41468e51a38e 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -5,6 +5,8 @@
 #include <linux/of_mdio.h>
 #include "sja1105.h"
 
+#define SJA1110_PCS_BANK_REG		SJA1110_SPI_ADDR(0x3fc)
+
 int sja1105_pcs_mdio_read(struct mii_bus *bus, int phy, int reg)
 {
 	struct sja1105_mdio_private *mdio_priv = bus->priv;
@@ -56,6 +58,99 @@ int sja1105_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
 	return sja1105_xfer_u32(priv, SPI_WRITE, addr, &tmp, NULL);
 }
 
+int sja1110_pcs_mdio_read(struct mii_bus *bus, int phy, int reg)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	const struct sja1105_regs *regs = priv->info->regs;
+	int offset, bank;
+	u64 addr;
+	u32 tmp;
+	u16 mmd;
+	int rc;
+
+	if (!(reg & MII_ADDR_C45))
+		return -EINVAL;
+
+	if (regs->pcs_base[phy] == SJA1105_RSV_ADDR)
+		return -ENODEV;
+
+	mmd = (reg >> MII_DEVADDR_C45_SHIFT) & 0x1f;
+	addr = (mmd << 16) | (reg & GENMASK(15, 0));
+
+	if (mmd == MDIO_MMD_VEND2 && (reg & GENMASK(15, 0)) == MII_PHYSID1)
+		return NXP_SJA1110_XPCS_ID >> 16;
+	if (mmd == MDIO_MMD_VEND2 && (reg & GENMASK(15, 0)) == MII_PHYSID2)
+		return NXP_SJA1110_XPCS_ID & GENMASK(15, 0);
+
+	bank = addr >> 8;
+	offset = addr & GENMASK(7, 0);
+
+	/* This addressing scheme reserves register 0xff for the bank address
+	 * register, so that can never be addressed.
+	 */
+	if (WARN_ON(offset == 0xff))
+		return -ENODEV;
+
+	tmp = bank;
+
+	rc = sja1105_xfer_u32(priv, SPI_WRITE,
+			      regs->pcs_base[phy] + SJA1110_PCS_BANK_REG,
+			      &tmp, NULL);
+	if (rc < 0)
+		return rc;
+
+	rc = sja1105_xfer_u32(priv, SPI_READ, regs->pcs_base[phy] + offset,
+			      &tmp, NULL);
+	if (rc < 0)
+		return rc;
+
+	return tmp & 0xffff;
+}
+
+int sja1110_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
+{
+	struct sja1105_mdio_private *mdio_priv = bus->priv;
+	struct sja1105_private *priv = mdio_priv->priv;
+	const struct sja1105_regs *regs = priv->info->regs;
+	int offset, bank;
+	u64 addr;
+	u32 tmp;
+	u16 mmd;
+	int rc;
+
+	if (!(reg & MII_ADDR_C45))
+		return -EINVAL;
+
+	if (regs->pcs_base[phy] == SJA1105_RSV_ADDR)
+		return -ENODEV;
+
+	mmd = (reg >> MII_DEVADDR_C45_SHIFT) & 0x1f;
+	addr = (mmd << 16) | (reg & GENMASK(15, 0));
+
+	bank = addr >> 8;
+	offset = addr & GENMASK(7, 0);
+
+	/* This addressing scheme reserves register 0xff for the bank address
+	 * register, so that can never be addressed.
+	 */
+	if (WARN_ON(offset == 0xff))
+		return -ENODEV;
+
+	tmp = bank;
+
+	rc = sja1105_xfer_u32(priv, SPI_WRITE,
+			      regs->pcs_base[phy] + SJA1110_PCS_BANK_REG,
+			      &tmp, NULL);
+	if (rc < 0)
+		return rc;
+
+	tmp = val;
+
+	return sja1105_xfer_u32(priv, SPI_WRITE, regs->pcs_base[phy] + offset,
+				&tmp, NULL);
+}
+
 enum sja1105_mdio_opcode {
 	SJA1105_C45_ADDR = 0,
 	SJA1105_C22 = 1,
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index c1c54b7ff0e4..96768af4c6a8 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -561,6 +561,9 @@ static struct sja1105_regs sja1110_regs = {
 	.ptpsyncts = SJA1110_SPI_ADDR(0x84),
 	.mdio_100base_tx = 0x1c2400,
 	.mdio_100base_t1 = 0x1c1000,
+	.pcs_base = {SJA1105_RSV_ADDR, 0x1c1400, 0x1c1800, 0x1c1c00, 0x1c2000,
+		     SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR,
+		     SJA1105_RSV_ADDR, SJA1105_RSV_ADDR, SJA1105_RSV_ADDR},
 };
 
 const struct sja1105_info sja1105e_info = {
@@ -794,6 +797,8 @@ const struct sja1105_info sja1110a_info = {
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
+	.pcs_mdio_read		= sja1110_pcs_mdio_read,
+	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
 		[SJA1105_SPEED_10MBPS] = 4,
@@ -843,6 +848,8 @@ const struct sja1105_info sja1110b_info = {
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
+	.pcs_mdio_read		= sja1110_pcs_mdio_read,
+	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
 		[SJA1105_SPEED_10MBPS] = 4,
@@ -892,6 +899,8 @@ const struct sja1105_info sja1110c_info = {
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
+	.pcs_mdio_read		= sja1110_pcs_mdio_read,
+	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
 		[SJA1105_SPEED_10MBPS] = 4,
@@ -941,6 +950,8 @@ const struct sja1105_info sja1110d_info = {
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
 	.clocking_setup		= sja1110_clocking_setup,
+	.pcs_mdio_read		= sja1110_pcs_mdio_read,
+	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {
 		[SJA1105_SPEED_AUTO] = 0,
 		[SJA1105_SPEED_10MBPS] = 4,

From ece578bc3ea44a39efdb5299ce60c1a54cd2e184 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:30 +0300
Subject: [PATCH 1558/3292] net: dsa: sja1105: SGMII and 2500base-x on the
 SJA1110 are 'special'

For the xMII Mode Parameters Table to be properly configured for SGMII
mode on SJA1110, we need to set the "special" bit, since SGMII is
officially bitwise coded as 0b0011 in SJA1105 (decimal 3, equal to
XMII_MODE_SGMII), and as 0b1011 in SJA1110 (decimal 11).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index bd1f2686e37d..3e32b8676fa7 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -209,12 +209,14 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv)
 				goto unsupported;
 
 			mii->xmii_mode[i] = XMII_MODE_SGMII;
+			mii->special[i] = true;
 			break;
 		case PHY_INTERFACE_MODE_2500BASEX:
 			if (!priv->info->supports_2500basex[i])
 				goto unsupported;
 
 			mii->xmii_mode[i] = XMII_MODE_SGMII;
+			mii->special[i] = true;
 			break;
 unsupported:
 		default:

From 56b63466333b25f4d6482516070251cb0a757a6c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 11 Jun 2021 23:05:31 +0300
Subject: [PATCH 1559/3292] net: dsa: sja1105: plug in support for 2500base-x

The MAC treats 2500base-x same as SGMII (yay for that) except that it
must be set to a different speed.

Extend all places that check for SGMII to also check for 2500base-x.

Also add the missing 2500base-x compatibility matrix entry for SJA1110D.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 13 ++++++++++++-
 drivers/net/dsa/sja1105/sja1105_mdio.c |  3 ++-
 drivers/net/dsa/sja1105/sja1105_spi.c  |  2 ++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 3e32b8676fa7..8e5cdf93c23b 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1056,6 +1056,9 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	case SPEED_1000:
 		speed = priv->info->port_speed[SJA1105_SPEED_1000MBPS];
 		break;
+	case SPEED_2500:
+		speed = priv->info->port_speed[SJA1105_SPEED_2500MBPS];
+		break;
 	default:
 		dev_err(dev, "Invalid speed %iMbps\n", speed_mbps);
 		return -EINVAL;
@@ -1070,6 +1073,8 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	 */
 	if (priv->phy_mode[port] == PHY_INTERFACE_MODE_SGMII)
 		mac[port].speed = priv->info->port_speed[SJA1105_SPEED_1000MBPS];
+	else if (priv->phy_mode[port] == PHY_INTERFACE_MODE_2500BASEX)
+		mac[port].speed = priv->info->port_speed[SJA1105_SPEED_2500MBPS];
 	else
 		mac[port].speed = speed;
 
@@ -1182,6 +1187,10 @@ static void sja1105_phylink_validate(struct dsa_switch *ds, int port,
 	if (mii->xmii_mode[port] == XMII_MODE_RGMII ||
 	    mii->xmii_mode[port] == XMII_MODE_SGMII)
 		phylink_set(mask, 1000baseT_Full);
+	if (priv->info->supports_2500basex[port]) {
+		phylink_set(mask, 2500baseT_Full);
+		phylink_set(mask, 2500baseX_Full);
+	}
 
 	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
 	bitmap_and(state->advertising, state->advertising, mask,
@@ -1942,7 +1951,9 @@ out_unlock_ptp:
 		if (!phylink_autoneg_inband(mode)) {
 			int speed = SPEED_UNKNOWN;
 
-			if (bmcr[i] & BMCR_SPEED1000)
+			if (priv->phy_mode[i] == PHY_INTERFACE_MODE_2500BASEX)
+				speed = SPEED_2500;
+			else if (bmcr[i] & BMCR_SPEED1000)
 				speed = SPEED_1000;
 			else if (bmcr[i] & BMCR_SPEED100)
 				speed = SPEED_100;
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 41468e51a38e..19aea8fb76f6 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -427,7 +427,8 @@ static int sja1105_mdiobus_pcs_register(struct sja1105_private *priv)
 		if (dsa_is_unused_port(ds, port))
 			continue;
 
-		if (priv->phy_mode[port] != PHY_INTERFACE_MODE_SGMII)
+		if (priv->phy_mode[port] != PHY_INTERFACE_MODE_SGMII &&
+		    priv->phy_mode[port] != PHY_INTERFACE_MODE_2500BASEX)
 			continue;
 
 		mdiodev = mdio_device_create(bus, port);
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 96768af4c6a8..7c493c6a839d 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -967,6 +967,8 @@ const struct sja1105_info sja1110d_info = {
 				   false, false, false, false, false, false},
 	.supports_sgmii		= {false, true, true, true, true,
 				   false, false, false, false, false, false},
+	.supports_2500basex     = {false, false, false, true, true,
+				   false, false, false, false, false, false},
 	.internal_phy		= {SJA1105_NO_PHY, SJA1105_NO_PHY,
 				   SJA1105_NO_PHY, SJA1105_NO_PHY,
 				   SJA1105_NO_PHY, SJA1105_PHY_BASE_T1,

From 0b217d3d746233ace52b5dcb26974e929abf62c5 Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Fri, 11 Jun 2021 13:33:53 -0500
Subject: [PATCH 1560/3292] ibmvnic: fix kernel build warning in strncpy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/net/ethernet/ibm/ibmvnic.c: In function ‘handle_vpd_rsp’:
drivers/net/ethernet/ibm/ibmvnic.c:4393:3: warning: ‘strncpy’ output truncated before terminating nul copying 3 bytes from a string of the same length [-Wstringop-truncation]
 4393 |   strncpy((char *)adapter->fw_version, "N/A", 3 * sizeof(char));
      |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index d66e15866315..e2630b60c001 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4385,7 +4385,7 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq,
 
 complete:
 	if (adapter->fw_version[0] == '\0')
-		strncpy((char *)adapter->fw_version, "N/A", 3 * sizeof(char));
+		strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version));
 	complete(&adapter->fw_done);
 }
 

From 9e8fb7bf9c8033e0617515d9a06c2ae9f58b812f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 15:39:38 -0500
Subject: [PATCH 1561/3292] net: ipa: make endpoint data validation
 unconditional

The cost of validating the endpoint configuration data is not all
that high, so just do it unconditionally, rather than doing so only
when IPA_VALIDATAION is defined.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 07a81b73306f..3520852936ed 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -75,8 +75,6 @@ struct ipa_status {
 #define IPA_STATUS_FLAGS1_RT_RULE_ID_FMASK	GENMASK(31, 22)
 #define IPA_STATUS_FLAGS2_TAG_FMASK		GENMASK_ULL(63, 16)
 
-#ifdef IPA_VALIDATE
-
 static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
 			    const struct ipa_gsi_endpoint_data *all_data,
 			    const struct ipa_gsi_endpoint_data *data)
@@ -225,16 +223,6 @@ static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
 	return true;
 }
 
-#else /* !IPA_VALIDATE */
-
-static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
-				    const struct ipa_gsi_endpoint_data *data)
-{
-	return true;
-}
-
-#endif /* !IPA_VALIDATE */
-
 /* Allocate a transaction to use on a non-command endpoint */
 static struct gsi_trans *ipa_endpoint_trans_alloc(struct ipa_endpoint *endpoint,
 						  u32 tre_count)

From e22e8e2fae61de990e3a815a66f2ffa166669b8e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 15:39:39 -0500
Subject: [PATCH 1562/3292] net: ipa: introduce ipa_version_valid()

Define and use a new function that just validates the version
defined in configuration data.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 9915603ed10b..cbd39e4667a3 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -639,6 +639,27 @@ static void ipa_validate_build(void)
 #endif /* IPA_VALIDATE */
 }
 
+static bool ipa_version_valid(enum ipa_version version)
+{
+	switch (version) {
+	case IPA_VERSION_3_0:
+	case IPA_VERSION_3_1:
+	case IPA_VERSION_3_5:
+	case IPA_VERSION_3_5_1:
+	case IPA_VERSION_4_0:
+	case IPA_VERSION_4_1:
+	case IPA_VERSION_4_2:
+	case IPA_VERSION_4_5:
+	case IPA_VERSION_4_7:
+	case IPA_VERSION_4_9:
+	case IPA_VERSION_4_11:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
 /**
  * ipa_probe() - IPA platform driver probe function
  * @pdev:	Platform device pointer
@@ -676,11 +697,15 @@ static int ipa_probe(struct platform_device *pdev)
 	/* Get configuration data early; needed for clock initialization */
 	data = of_device_get_match_data(dev);
 	if (!data) {
-		/* This is really IPA_VALIDATE (should never happen) */
 		dev_err(dev, "matched hardware not supported\n");
 		return -ENODEV;
 	}
 
+	if (!ipa_version_valid(data->version)) {
+		dev_err(dev, "invalid IPA version\n");
+		return -EINVAL;
+	}
+
 	/* If we need Trust Zone, make sure it's available */
 	modem_init = of_property_read_bool(dev->of_node, "modem-init");
 	if (!modem_init)

From 2e3cf97f4741b320e8f4639fcca732b17614a55f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 15:39:40 -0500
Subject: [PATCH 1563/3292] net: ipa: introduce sysfs code

Add IPA device attributes to expose information known by the IPA
driver about the hardware and its configuration.

All pointers used to display these attribute values (i.e., IPA
pointer and endpoint pointers) will have been initialized by the
time IPA probe has completed, so they may be safely dereferenced.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/sysfs-devices-platform-soc-ipa    |  78 ++++++++++
 drivers/net/ipa/Makefile                      |   3 +-
 drivers/net/ipa/ipa_main.c                    |   9 ++
 drivers/net/ipa/ipa_sysfs.c                   | 136 ++++++++++++++++++
 drivers/net/ipa/ipa_sysfs.h                   |  15 ++
 drivers/net/ipa/ipa_version.h                 |   2 +
 6 files changed, 242 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-platform-soc-ipa
 create mode 100644 drivers/net/ipa/ipa_sysfs.c
 create mode 100644 drivers/net/ipa/ipa_sysfs.h

diff --git a/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa b/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa
new file mode 100644
index 000000000000..c56dcf15bf29
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa
@@ -0,0 +1,78 @@
+What:		/sys/devices/platform/soc@X/XXXXXXX.ipa/
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The /sys/devices/platform/soc@X/XXXXXXX.ipa/ directory
+		contains read-only attributes exposing information about
+		an IPA device.  The X values could vary, but are typically
+		"soc@0/1e40000.ipa".
+
+What:		.../XXXXXXX.ipa/version
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The .../XXXXXXX.ipa/version file contains the IPA hardware
+		version, as a period-separated set of two or three integers
+		(e.g., "3.5.1" or "4.2").
+
+What:		.../XXXXXXX.ipa/feature/
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The .../XXXXXXX.ipa/feature/ directory contains a set of
+		attributes describing features implemented by the IPA
+		hardware.
+
+What:		.../XXXXXXX.ipa/feature/rx_offload
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The .../XXXXXXX.ipa/feature/rx_offload file contains a
+		string indicating the type of receive checksum offload
+		that is supported by the hardware.  The possible values
+		are "MAPv4" or "MAPv5".
+
+What:		.../XXXXXXX.ipa/feature/tx_offload
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The .../XXXXXXX.ipa/feature/tx_offload file contains a
+		string indicating the type of transmit checksum offload
+		that is supported by the hardware.  The possible values
+		are "MAPv4" or "MAPv5".
+
+What:		.../XXXXXXX.ipa/modem/
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The .../XXXXXXX.ipa/modem/ directory contains a set of
+		attributes describing properties of the modem execution
+		environment reachable by the IPA hardware.
+
+What:		.../XXXXXXX.ipa/modem/rx_endpoint_id
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The .../XXXXXXX.ipa/feature/rx_endpoint_id file contains
+		the AP endpoint ID that receives packets originating from
+		the modem execution environment.  The "rx" is from the
+		perspective of the AP; this endpoint is considered an "IPA
+		producer".  An endpoint ID is a small unsigned integer.
+
+What:		.../XXXXXXX.ipa/modem/tx_endpoint_id
+Date:		June 2021
+KernelVersion:	v5.14
+Contact:	Alex Elder <elder@kernel.org>
+Description:
+		The .../XXXXXXX.ipa/feature/tx_endpoint_id file contains
+		the AP endpoint ID used to transmit packets destined for
+		the modem execution environment.  The "tx" is from the
+		perspective of the AP; this endpoint is considered an "IPA
+		consumer".  An endpoint ID is a small unsigned integer.
diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index 1efe1a88104b..bd34fce8f6e6 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -7,7 +7,8 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_table.o ipa_interrupt.o gsi.o gsi_trans.o \
 				ipa_gsi.o ipa_smp2p.o ipa_uc.o \
 				ipa_endpoint.o ipa_cmd.o ipa_modem.o \
-				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o
+				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o \
+				ipa_sysfs.o
 
 ipa-y			+=	ipa_data-v3.5.1.o ipa_data-v4.2.o \
 				ipa_data-v4.5.o ipa_data-v4.9.o \
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index cbd39e4667a3..2243e3e5b7ea 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -31,6 +31,7 @@
 #include "ipa_uc.h"
 #include "ipa_interrupt.h"
 #include "gsi_trans.h"
+#include "ipa_sysfs.h"
 
 /**
  * DOC: The IP Accelerator
@@ -906,6 +907,13 @@ static const struct dev_pm_ops ipa_pm_ops = {
 	.resume		= ipa_resume,
 };
 
+static const struct attribute_group *ipa_attribute_groups[] = {
+	&ipa_attribute_group,
+	&ipa_feature_attribute_group,
+	&ipa_modem_attribute_group,
+	NULL,
+};
+
 static struct platform_driver ipa_driver = {
 	.probe		= ipa_probe,
 	.remove		= ipa_remove,
@@ -914,6 +922,7 @@ static struct platform_driver ipa_driver = {
 		.name		= "ipa",
 		.pm		= &ipa_pm_ops,
 		.of_match_table	= ipa_match,
+		.dev_groups	= ipa_attribute_groups,
 	},
 };
 
diff --git a/drivers/net/ipa/ipa_sysfs.c b/drivers/net/ipa/ipa_sysfs.c
new file mode 100644
index 000000000000..ff61dbdd70d8
--- /dev/null
+++ b/drivers/net/ipa/ipa_sysfs.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (C) 2021 Linaro Ltd. */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+
+#include "ipa.h"
+#include "ipa_version.h"
+#include "ipa_sysfs.h"
+
+static const char *ipa_version_string(struct ipa *ipa)
+{
+	switch (ipa->version) {
+	case IPA_VERSION_3_0:
+		return "3.0";
+	case IPA_VERSION_3_1:
+		return "3.1";
+	case IPA_VERSION_3_5:
+		return "3.5";
+	case IPA_VERSION_3_5_1:
+		return "3.5.1";
+	case IPA_VERSION_4_0:
+		return "4.0";
+	case IPA_VERSION_4_1:
+		return "4.1";
+	case IPA_VERSION_4_2:
+		return "4.2";
+	case IPA_VERSION_4_5:
+		return "4.5";
+	case IPA_VERSION_4_7:
+		return "4.7";
+	case IPA_VERSION_4_9:
+		return "4.9";
+	case IPA_VERSION_4_11:
+		return "4.11";
+	default:
+		return "0.0";	/* Won't happen (checked at probe time) */
+	}
+}
+
+static ssize_t
+version_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct ipa *ipa = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_version_string(ipa));
+}
+
+static DEVICE_ATTR_RO(version);
+
+static struct attribute *ipa_attrs[] = {
+	&dev_attr_version.attr,
+	NULL
+};
+
+const struct attribute_group ipa_attribute_group = {
+	.attrs		= ipa_attrs,
+};
+
+static const char *ipa_offload_string(struct ipa *ipa)
+{
+	return ipa->version < IPA_VERSION_4_5 ? "MAPv4" : "MAPv5";
+}
+
+static ssize_t rx_offload_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct ipa *ipa = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa));
+}
+
+static DEVICE_ATTR_RO(rx_offload);
+
+static ssize_t tx_offload_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct ipa *ipa = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa));
+}
+
+static DEVICE_ATTR_RO(tx_offload);
+
+static struct attribute *ipa_feature_attrs[] = {
+	&dev_attr_rx_offload.attr,
+	&dev_attr_tx_offload.attr,
+	NULL
+};
+
+const struct attribute_group ipa_feature_attribute_group = {
+	.name		= "feature",
+	.attrs		= ipa_feature_attrs,
+};
+
+static ssize_t
+ipa_endpoint_id_show(struct ipa *ipa, char *buf, enum ipa_endpoint_name name)
+{
+	u32 endpoint_id = ipa->name_map[name]->endpoint_id;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", endpoint_id);
+}
+
+static ssize_t rx_endpoint_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct ipa *ipa = dev_get_drvdata(dev);
+
+	return ipa_endpoint_id_show(ipa, buf, IPA_ENDPOINT_AP_MODEM_RX);
+}
+
+static DEVICE_ATTR_RO(rx_endpoint_id);
+
+static ssize_t tx_endpoint_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct ipa *ipa = dev_get_drvdata(dev);
+
+	return ipa_endpoint_id_show(ipa, buf, IPA_ENDPOINT_AP_MODEM_TX);
+}
+
+static DEVICE_ATTR_RO(tx_endpoint_id);
+
+static struct attribute *ipa_modem_attrs[] = {
+	&dev_attr_rx_endpoint_id.attr,
+	&dev_attr_tx_endpoint_id.attr,
+	NULL
+};
+
+const struct attribute_group ipa_modem_attribute_group = {
+	.name		= "modem",
+	.attrs		= ipa_modem_attrs,
+};
diff --git a/drivers/net/ipa/ipa_sysfs.h b/drivers/net/ipa/ipa_sysfs.h
new file mode 100644
index 000000000000..b34e5650bf8c
--- /dev/null
+++ b/drivers/net/ipa/ipa_sysfs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2019-2021 Linaro Ltd.
+ */
+#ifndef _IPA_SYSFS_H_
+#define _IPA_SYSFS_H_
+
+struct attribute_group;
+
+extern const struct attribute_group ipa_attribute_group;
+extern const struct attribute_group ipa_feature_attribute_group;
+extern const struct attribute_group ipa_modem_attribute_group;
+
+#endif /* _IPA_SYSFS_H_ */
diff --git a/drivers/net/ipa/ipa_version.h b/drivers/net/ipa/ipa_version.h
index ee2b3d02f3cd..6c16c895d842 100644
--- a/drivers/net/ipa/ipa_version.h
+++ b/drivers/net/ipa/ipa_version.h
@@ -21,6 +21,8 @@
  * @IPA_VERSION_4_11:	IPA version 4.11/GSI version 2.11 (2.1.1)
  *
  * Defines the version of IPA (and GSI) hardware present on the platform.
+ * Please update ipa_version_valid() and ipa_version_string() whenever a
+ * new version is added.
  */
 enum ipa_version {
 	IPA_VERSION_3_0,

From 3b3af91cb6893967bbec30f5c14562d0f7f00c2a Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 9 Jun 2021 19:56:51 +0800
Subject: [PATCH 1564/3292] libbpf: Simplify the return expression of
 bpf_object__init_maps function

There is no need for special treatment of the 'ret == 0' case.
This patch simplifies the return expression.

Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210609115651.3392580-1-wanghai38@huawei.com
---
 tools/lib/bpf/libbpf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 65f87cc1220c..128715b8794b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2465,10 +2465,8 @@ static int bpf_object__init_maps(struct bpf_object *obj,
 	err = err ?: bpf_object__init_global_data_maps(obj);
 	err = err ?: bpf_object__init_kconfig_map(obj);
 	err = err ?: bpf_object__init_struct_ops_maps(obj);
-	if (err)
-		return err;
 
-	return 0;
+	return err;
 }
 
 static bool section_have_execinstr(struct bpf_object *obj, int idx)

From ca16b429f39b4ce013bfa7e197f25681e65a2a42 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Wed, 9 Jun 2021 19:59:16 +0800
Subject: [PATCH 1565/3292] tools/bpftool: Fix error return code in do_batch()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 668da745af3c2 ("tools: bpftool: add support for quotations ...")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20210609115916.2186872-1-chengzhihao1@huawei.com
---
 tools/bpf/bpftool/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 7f2817d97079..3ddfd4843738 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -341,8 +341,10 @@ static int do_batch(int argc, char **argv)
 		n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
 		if (!n_argc)
 			continue;
-		if (n_argc < 0)
+		if (n_argc < 0) {
+			err = n_argc;
 			goto err_close;
+		}
 
 		if (json_output) {
 			jsonw_start_object(json_wtr);

From fb312ac5ccb007e843f982b38d4d6886ba4b32f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Mon, 31 May 2021 17:41:27 +0300
Subject: [PATCH 1566/3292] ath9k: Fix kernel NULL pointer dereference during
 ath_reset_internal()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I got this crash more times during debugging of PCIe controller and crash
happens somehow at the time when PCIe kernel code started link retraining (as
part of ASPM code) when at the same time PCIe link went down and ath9k probably
executed hw reset procedure.

Currently I'm not able to reproduce this issue as it looks like to be
some race condition between link training, ASPM, link down and reset
path. And as always, race conditions which depends on more input
parameters are hard to reproduce as it depends on precise timings.

But it is clear that pointers are zero in this case and should be
properly filled as same code pattern is used in ath9k_stop() function.
Anyway I was able to reproduce this crash by manually triggering ath
reset worker prior putting card up. I created simple patch to export
reset functionality via debugfs and use it to "simulate" of triggering
reset.    s proved that NULL-pointer dereference issue is there.

Function ath9k_hw_reset() is dereferencing chan structure pointer, so it
needs to be non-NULL pointer.

Function ath9k_stop() already contains code which sets ah->curchan to valid
non-NULL pointer prior calling ath9k_hw_reset() function.

Add same code pattern also into ath_reset_internal() function to prevent
kernel NULL pointer dereference in ath9k_hw_reset() function.

This change fixes kernel NULL pointer dereference in ath9k_hw_reset() which
is caused by calling ath9k_hw_reset() from ath_reset_internal() with NULL
chan structure.

    [   45.334305] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
    [   45.344417] Mem abort info:
    [   45.347301]   ESR = 0x96000005
    [   45.350448]   EC = 0x25: DABT (current EL), IL = 32 bits
    [   45.356166]   SET = 0, FnV = 0
    [   45.359350]   EA = 0, S1PTW = 0
    [   45.362596] Data abort info:
    [   45.365756]   ISV = 0, ISS = 0x00000005
    [   45.369735]   CM = 0, WnR = 0
    [   45.372814] user pgtable: 4k pages, 39-bit VAs, pgdp=000000000685d000
    [   45.379663] [0000000000000008] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
    [   45.388856] Internal error: Oops: 96000005 [#1] SMP
    [   45.393897] Modules linked in: ath9k ath9k_common ath9k_hw
    [   45.399574] CPU: 1 PID: 309 Comm: kworker/u4:2 Not tainted 5.12.0-rc2-dirty #785
    [   45.414746] Workqueue: phy0 ath_reset_work [ath9k]
    [   45.419713] pstate: 40000005 (nZcv daif -PAN -UAO -TCO BTYPE=--)
    [   45.425910] pc : ath9k_hw_reset+0xc4/0x1c48 [ath9k_hw]
    [   45.431234] lr : ath9k_hw_reset+0xc0/0x1c48 [ath9k_hw]
    [   45.436548] sp : ffffffc0118dbca0
    [   45.439961] x29: ffffffc0118dbca0 x28: 0000000000000000
    [   45.445442] x27: ffffff800dee4080 x26: 0000000000000000
    [   45.450923] x25: ffffff800df9b9d8 x24: 0000000000000000
    [   45.456404] x23: ffffffc0115f6000 x22: ffffffc008d0d408
    [   45.461885] x21: ffffff800dee5080 x20: ffffff800df9b9d8
    [   45.467366] x19: 0000000000000000 x18: 0000000000000000
    [   45.472846] x17: 0000000000000000 x16: 0000000000000000
    [   45.478326] x15: 0000000000000010 x14: ffffffffffffffff
    [   45.483807] x13: ffffffc0918db94f x12: ffffffc011498720
    [   45.489289] x11: 0000000000000003 x10: ffffffc0114806e0
    [   45.494770] x9 : ffffffc01014b2ec x8 : 0000000000017fe8
    [   45.500251] x7 : c0000000ffffefff x6 : 0000000000000001
    [   45.505733] x5 : 0000000000000000 x4 : 0000000000000000
    [   45.511213] x3 : 0000000000000000 x2 : ffffff801fece870
    [   45.516693] x1 : ffffffc00eded000 x0 : 000000000000003f
    [   45.522174] Call trace:
    [   45.524695]  ath9k_hw_reset+0xc4/0x1c48 [ath9k_hw]
    [   45.529653]  ath_reset_internal+0x1a8/0x2b8 [ath9k]
    [   45.534696]  ath_reset_work+0x2c/0x40 [ath9k]
    [   45.539198]  process_one_work+0x210/0x480
    [   45.543339]  worker_thread+0x5c/0x510
    [   45.547115]  kthread+0x12c/0x130
    [   45.550445]  ret_from_fork+0x10/0x1c
    [   45.554138] Code: 910922c2 9117e021 95ff0398 b4000294 (b9400a61)
    [   45.560430] ---[ end trace 566410ba90b50e8b ]---
    [   45.565193] Kernel panic - not syncing: Oops: Fatal exception in interrupt
    [   45.572282] SMP: stopping secondary CPUs
    [   45.576331] Kernel Offset: disabled
    [   45.579924] CPU features: 0x00040002,0000200c
    [   45.584416] Memory Limit: none
    [   45.587564] Rebooting in 3 seconds..

Signed-off-by: Pali Rohár <pali@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210402122653.24014-1-pali@kernel.org
---
 drivers/net/wireless/ath/ath9k/main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 45f6402478b5..97c3a53f9cef 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -307,6 +307,11 @@ static int ath_reset_internal(struct ath_softc *sc, struct ath9k_channel *hchan)
 		hchan = ah->curchan;
 	}
 
+	if (!hchan) {
+		fastcc = false;
+		hchan = ath9k_cmn_get_channel(sc->hw, ah, &sc->cur_chan->chandef);
+	}
+
 	if (!ath_prepare_reset(sc))
 		fastcc = false;
 

From 755b1f73173e004e8c89a17fa4e8b329481495d4 Mon Sep 17 00:00:00 2001
From: Baochen Qiang <bqiang@codeaurora.org>
Date: Mon, 31 May 2021 17:41:27 +0300
Subject: [PATCH 1567/3292] ath11k: add hw reg support for WCN6855

Reg address of WCN6855 is different from other devices,
so add separate reg definition for this target.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-01720.1-QCAHSPSWPL_V1_V2_SILICONZ_LITE-1

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Signed-off-by: Baochen Qiang <bqiang@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511162214.29475-2-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/hw.c | 71 ++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/hw.h |  1 +
 2 files changed, 72 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index 377ae8d5b58f..d9d7c4608052 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -1688,3 +1688,74 @@ const struct ath11k_hw_regs qcn9074_regs = {
 	.pcie_qserdes_sysclk_en_sel = 0x01e0e0a8,
 	.pcie_pcs_osc_dtct_config_base = 0x01e0f45c,
 };
+
+const struct ath11k_hw_regs wcn6855_regs = {
+	/* SW2TCL(x) R0 ring configuration address */
+	.hal_tcl1_ring_base_lsb = 0x00000690,
+	.hal_tcl1_ring_base_msb = 0x00000694,
+	.hal_tcl1_ring_id = 0x00000698,
+	.hal_tcl1_ring_misc = 0x000006a0,
+	.hal_tcl1_ring_tp_addr_lsb = 0x000006ac,
+	.hal_tcl1_ring_tp_addr_msb = 0x000006b0,
+	.hal_tcl1_ring_consumer_int_setup_ix0 = 0x000006c0,
+	.hal_tcl1_ring_consumer_int_setup_ix1 = 0x000006c4,
+	.hal_tcl1_ring_msi1_base_lsb = 0x000006d8,
+	.hal_tcl1_ring_msi1_base_msb = 0x000006dc,
+	.hal_tcl1_ring_msi1_data = 0x000006e0,
+	.hal_tcl2_ring_base_lsb = 0x000006e8,
+	.hal_tcl_ring_base_lsb = 0x00000798,
+
+	/* TCL STATUS ring address */
+	.hal_tcl_status_ring_base_lsb = 0x000008a0,
+
+	/* REO2SW(x) R0 ring configuration address */
+	.hal_reo1_ring_base_lsb = 0x00000244,
+	.hal_reo1_ring_base_msb = 0x00000248,
+	.hal_reo1_ring_id = 0x0000024c,
+	.hal_reo1_ring_misc = 0x00000254,
+	.hal_reo1_ring_hp_addr_lsb = 0x00000258,
+	.hal_reo1_ring_hp_addr_msb = 0x0000025c,
+	.hal_reo1_ring_producer_int_setup = 0x00000268,
+	.hal_reo1_ring_msi1_base_lsb = 0x0000028c,
+	.hal_reo1_ring_msi1_base_msb = 0x00000290,
+	.hal_reo1_ring_msi1_data = 0x00000294,
+	.hal_reo2_ring_base_lsb = 0x0000029c,
+	.hal_reo1_aging_thresh_ix_0 = 0x000005bc,
+	.hal_reo1_aging_thresh_ix_1 = 0x000005c0,
+	.hal_reo1_aging_thresh_ix_2 = 0x000005c4,
+	.hal_reo1_aging_thresh_ix_3 = 0x000005c8,
+
+	/* REO2SW(x) R2 ring pointers (head/tail) address */
+	.hal_reo1_ring_hp = 0x00003030,
+	.hal_reo1_ring_tp = 0x00003034,
+	.hal_reo2_ring_hp = 0x00003038,
+
+	/* REO2TCL R0 ring configuration address */
+	.hal_reo_tcl_ring_base_lsb = 0x00000454,
+	.hal_reo_tcl_ring_hp = 0x00003060,
+
+	/* REO status address */
+	.hal_reo_status_ring_base_lsb = 0x0000055c,
+	.hal_reo_status_hp = 0x00003078,
+
+	/* WCSS relative address */
+	.hal_seq_wcss_umac_ce0_src_reg = 0x1b80000,
+	.hal_seq_wcss_umac_ce0_dst_reg = 0x1b81000,
+	.hal_seq_wcss_umac_ce1_src_reg = 0x1b82000,
+	.hal_seq_wcss_umac_ce1_dst_reg = 0x1b83000,
+
+	/* WBM Idle address */
+	.hal_wbm_idle_link_ring_base_lsb = 0x00000870,
+	.hal_wbm_idle_link_ring_misc = 0x00000880,
+
+	/* SW2WBM release address */
+	.hal_wbm_release_ring_base_lsb = 0x000001e8,
+
+	/* WBM2SW release address */
+	.hal_wbm0_release_ring_base_lsb = 0x00000920,
+	.hal_wbm1_release_ring_base_lsb = 0x00000978,
+
+	/* PCIe base address */
+	.pcie_qserdes_sysclk_en_sel = 0x01e0c0ac,
+	.pcie_pcs_osc_dtct_config_base = 0x01e0c628,
+};
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index c81a6328361d..4e7261c0dca1 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -318,5 +318,6 @@ struct ath11k_hw_regs {
 extern const struct ath11k_hw_regs ipq8074_regs;
 extern const struct ath11k_hw_regs qca6390_regs;
 extern const struct ath11k_hw_regs qcn9074_regs;
+extern const struct ath11k_hw_regs wcn6855_regs;
 
 #endif

From e4073430ee1dec5402a6158755ac8b84eade83c6 Mon Sep 17 00:00:00 2001
From: Baochen Qiang <bqiang@codeaurora.org>
Date: Mon, 31 May 2021 17:41:27 +0300
Subject: [PATCH 1568/3292] ath11k: add dp support for WCN6855

hal rx descriptor is different for WCN6855 and there are such a lot
of handlers processing this descriptor in data path. So add separate
handling for this target.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-01720.1-QCAHSPSWPL_V1_V2_SILICONZ_LITE-1

Signed-off-by: Baochen Qiang <bqiang@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511162214.29475-3-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/hw.c      | 201 ++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/hw.h      |   1 +
 drivers/net/wireless/ath/ath11k/rx_desc.h |  87 ++++++++++
 3 files changed, 289 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index d9d7c4608052..023047df954c 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -45,6 +45,13 @@ static void ath11k_hw_qcn9074_tx_mesh_enable(struct ath11k_base *ab,
 				     true);
 }
 
+static void ath11k_hw_wcn6855_tx_mesh_enable(struct ath11k_base *ab,
+					     struct hal_tcl_data_cmd *tcl_cmd)
+{
+	tcl_cmd->info3 |= FIELD_PREP(HAL_QCN9074_TCL_DATA_CMD_INFO3_MESH_ENABLE,
+				     true);
+}
+
 static void ath11k_init_wmi_config_qca6390(struct ath11k_base *ab,
 					   struct target_resource_config *config)
 {
@@ -489,6 +496,166 @@ static u8 *ath11k_hw_qcn9074_rx_desc_get_msdu_payload(struct hal_rx_desc *desc)
 	return &desc->u.qcn9074.msdu_payload[0];
 }
 
+static bool ath11k_hw_wcn6855_rx_desc_get_first_msdu(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MSDU_END_INFO2_FIRST_MSDU_WCN6855,
+			   __le32_to_cpu(desc->u.wcn6855.msdu_end.info2));
+}
+
+static bool ath11k_hw_wcn6855_rx_desc_get_last_msdu(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MSDU_END_INFO2_LAST_MSDU_WCN6855,
+			   __le32_to_cpu(desc->u.wcn6855.msdu_end.info2));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_l3_pad_bytes(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_END_INFO2_L3_HDR_PADDING,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_end.info2));
+}
+
+static u8 *ath11k_hw_wcn6855_rx_desc_get_hdr_status(struct hal_rx_desc *desc)
+{
+	return desc->u.wcn6855.hdr_status;
+}
+
+static bool ath11k_hw_wcn6855_rx_desc_encrypt_valid(struct hal_rx_desc *desc)
+{
+	return __le32_to_cpu(desc->u.wcn6855.mpdu_start.info1) &
+	       RX_MPDU_START_INFO1_ENCRYPT_INFO_VALID;
+}
+
+static u32 ath11k_hw_wcn6855_rx_desc_get_encrypt_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO2_ENC_TYPE,
+			 __le32_to_cpu(desc->u.wcn6855.mpdu_start.info2));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_decap_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO2_DECAP_FORMAT,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info2));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_mesh_ctl(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO2_MESH_CTRL_PRESENT,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info2));
+}
+
+static bool ath11k_hw_wcn6855_rx_desc_get_mpdu_seq_ctl_vld(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MPDU_START_INFO1_MPDU_SEQ_CTRL_VALID,
+			   __le32_to_cpu(desc->u.wcn6855.mpdu_start.info1));
+}
+
+static bool ath11k_hw_wcn6855_rx_desc_get_mpdu_fc_valid(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MPDU_START_INFO1_MPDU_FCTRL_VALID,
+			   __le32_to_cpu(desc->u.wcn6855.mpdu_start.info1));
+}
+
+static u16 ath11k_hw_wcn6855_rx_desc_get_mpdu_start_seq_no(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO1_MPDU_SEQ_NUM,
+			 __le32_to_cpu(desc->u.wcn6855.mpdu_start.info1));
+}
+
+static u16 ath11k_hw_wcn6855_rx_desc_get_msdu_len(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO1_MSDU_LENGTH,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info1));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_msdu_sgi(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_SGI,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info3));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_msdu_rate_mcs(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_RATE_MCS,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info3));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_msdu_rx_bw(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_RECV_BW,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info3));
+}
+
+static u32 ath11k_hw_wcn6855_rx_desc_get_msdu_freq(struct hal_rx_desc *desc)
+{
+	return __le32_to_cpu(desc->u.wcn6855.msdu_start.phy_meta_data);
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_msdu_pkt_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_PKT_TYPE,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info3));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_msdu_nss(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_MIMO_SS_BITMAP,
+			 __le32_to_cpu(desc->u.wcn6855.msdu_start.info3));
+}
+
+static u8 ath11k_hw_wcn6855_rx_desc_get_mpdu_tid(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO2_TID_WCN6855,
+			 __le32_to_cpu(desc->u.wcn6855.mpdu_start.info2));
+}
+
+static u16 ath11k_hw_wcn6855_rx_desc_get_mpdu_peer_id(struct hal_rx_desc *desc)
+{
+	return __le16_to_cpu(desc->u.wcn6855.mpdu_start.sw_peer_id);
+}
+
+static void ath11k_hw_wcn6855_rx_desc_copy_attn_end(struct hal_rx_desc *fdesc,
+						    struct hal_rx_desc *ldesc)
+{
+	memcpy((u8 *)&fdesc->u.wcn6855.msdu_end, (u8 *)&ldesc->u.wcn6855.msdu_end,
+	       sizeof(struct rx_msdu_end_wcn6855));
+	memcpy((u8 *)&fdesc->u.wcn6855.attention, (u8 *)&ldesc->u.wcn6855.attention,
+	       sizeof(struct rx_attention));
+	memcpy((u8 *)&fdesc->u.wcn6855.mpdu_end, (u8 *)&ldesc->u.wcn6855.mpdu_end,
+	       sizeof(struct rx_mpdu_end));
+}
+
+static u32 ath11k_hw_wcn6855_rx_desc_get_mpdu_start_tag(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(HAL_TLV_HDR_TAG,
+			 __le32_to_cpu(desc->u.wcn6855.mpdu_start_tag));
+}
+
+static u32 ath11k_hw_wcn6855_rx_desc_get_mpdu_ppdu_id(struct hal_rx_desc *desc)
+{
+	return __le16_to_cpu(desc->u.wcn6855.mpdu_start.phy_ppdu_id);
+}
+
+static void ath11k_hw_wcn6855_rx_desc_set_msdu_len(struct hal_rx_desc *desc, u16 len)
+{
+	u32 info = __le32_to_cpu(desc->u.wcn6855.msdu_start.info1);
+
+	info &= ~RX_MSDU_START_INFO1_MSDU_LENGTH;
+	info |= FIELD_PREP(RX_MSDU_START_INFO1_MSDU_LENGTH, len);
+
+	desc->u.wcn6855.msdu_start.info1 = __cpu_to_le32(info);
+}
+
+static
+struct rx_attention *ath11k_hw_wcn6855_rx_desc_get_attention(struct hal_rx_desc *desc)
+{
+	return &desc->u.wcn6855.attention;
+}
+
+static u8 *ath11k_hw_wcn6855_rx_desc_get_msdu_payload(struct hal_rx_desc *desc)
+{
+	return &desc->u.wcn6855.msdu_payload[0];
+}
+
 const struct ath11k_hw_ops ipq8074_ops = {
 	.get_hw_mac_from_pdev_id = ath11k_hw_ipq8074_mac_from_pdev_id,
 	.wmi_init_config = ath11k_init_wmi_config_ipq8074,
@@ -625,6 +792,40 @@ const struct ath11k_hw_ops qcn9074_ops = {
 	.rx_desc_get_msdu_payload = ath11k_hw_qcn9074_rx_desc_get_msdu_payload,
 };
 
+const struct ath11k_hw_ops wcn6855_ops = {
+	.get_hw_mac_from_pdev_id = ath11k_hw_ipq8074_mac_from_pdev_id,
+	.wmi_init_config = ath11k_init_wmi_config_qca6390,
+	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_qca6390,
+	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_qca6390,
+	.tx_mesh_enable = ath11k_hw_wcn6855_tx_mesh_enable,
+	.rx_desc_get_first_msdu = ath11k_hw_wcn6855_rx_desc_get_first_msdu,
+	.rx_desc_get_last_msdu = ath11k_hw_wcn6855_rx_desc_get_last_msdu,
+	.rx_desc_get_l3_pad_bytes = ath11k_hw_wcn6855_rx_desc_get_l3_pad_bytes,
+	.rx_desc_get_hdr_status = ath11k_hw_wcn6855_rx_desc_get_hdr_status,
+	.rx_desc_encrypt_valid = ath11k_hw_wcn6855_rx_desc_encrypt_valid,
+	.rx_desc_get_encrypt_type = ath11k_hw_wcn6855_rx_desc_get_encrypt_type,
+	.rx_desc_get_decap_type = ath11k_hw_wcn6855_rx_desc_get_decap_type,
+	.rx_desc_get_mesh_ctl = ath11k_hw_wcn6855_rx_desc_get_mesh_ctl,
+	.rx_desc_get_mpdu_seq_ctl_vld = ath11k_hw_wcn6855_rx_desc_get_mpdu_seq_ctl_vld,
+	.rx_desc_get_mpdu_fc_valid = ath11k_hw_wcn6855_rx_desc_get_mpdu_fc_valid,
+	.rx_desc_get_mpdu_start_seq_no = ath11k_hw_wcn6855_rx_desc_get_mpdu_start_seq_no,
+	.rx_desc_get_msdu_len = ath11k_hw_wcn6855_rx_desc_get_msdu_len,
+	.rx_desc_get_msdu_sgi = ath11k_hw_wcn6855_rx_desc_get_msdu_sgi,
+	.rx_desc_get_msdu_rate_mcs = ath11k_hw_wcn6855_rx_desc_get_msdu_rate_mcs,
+	.rx_desc_get_msdu_rx_bw = ath11k_hw_wcn6855_rx_desc_get_msdu_rx_bw,
+	.rx_desc_get_msdu_freq = ath11k_hw_wcn6855_rx_desc_get_msdu_freq,
+	.rx_desc_get_msdu_pkt_type = ath11k_hw_wcn6855_rx_desc_get_msdu_pkt_type,
+	.rx_desc_get_msdu_nss = ath11k_hw_wcn6855_rx_desc_get_msdu_nss,
+	.rx_desc_get_mpdu_tid = ath11k_hw_wcn6855_rx_desc_get_mpdu_tid,
+	.rx_desc_get_mpdu_peer_id = ath11k_hw_wcn6855_rx_desc_get_mpdu_peer_id,
+	.rx_desc_copy_attn_end_tlv = ath11k_hw_wcn6855_rx_desc_copy_attn_end,
+	.rx_desc_get_mpdu_start_tag = ath11k_hw_wcn6855_rx_desc_get_mpdu_start_tag,
+	.rx_desc_get_mpdu_ppdu_id = ath11k_hw_wcn6855_rx_desc_get_mpdu_ppdu_id,
+	.rx_desc_set_msdu_len = ath11k_hw_wcn6855_rx_desc_set_msdu_len,
+	.rx_desc_get_attention = ath11k_hw_wcn6855_rx_desc_get_attention,
+	.rx_desc_get_msdu_payload = ath11k_hw_wcn6855_rx_desc_get_msdu_payload,
+};
+
 #define ATH11K_TX_RING_MASK_0 0x1
 #define ATH11K_TX_RING_MASK_1 0x2
 #define ATH11K_TX_RING_MASK_2 0x4
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 4e7261c0dca1..6e924f628f22 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -205,6 +205,7 @@ extern const struct ath11k_hw_ops ipq8074_ops;
 extern const struct ath11k_hw_ops ipq6018_ops;
 extern const struct ath11k_hw_ops qca6390_ops;
 extern const struct ath11k_hw_ops qcn9074_ops;
+extern const struct ath11k_hw_ops wcn6855_ops;
 
 extern const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_ipq8074;
 extern const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_qca6390;
diff --git a/drivers/net/wireless/ath/ath11k/rx_desc.h b/drivers/net/wireless/ath/ath11k/rx_desc.h
index 0cdb4a1f816e..79c50804d7dc 100644
--- a/drivers/net/wireless/ath/ath11k/rx_desc.h
+++ b/drivers/net/wireless/ath/ath11k/rx_desc.h
@@ -368,6 +368,7 @@ struct rx_attention {
 #define RX_MPDU_START_INFO2_BSSID_HIT		BIT(9)
 #define RX_MPDU_START_INFO2_BSSID_NUM		GENMASK(13, 10)
 #define RX_MPDU_START_INFO2_TID			GENMASK(17, 14)
+#define RX_MPDU_START_INFO2_TID_WCN6855		GENMASK(18, 15)
 
 #define RX_MPDU_START_INFO3_REO_DEST_IND		GENMASK(4, 0)
 #define RX_MPDU_START_INFO3_FLOW_ID_TOEPLITZ		BIT(7)
@@ -546,6 +547,31 @@ struct rx_mpdu_start_qcn9074 {
 	__le32 ht_ctrl;
 } __packed;
 
+struct rx_mpdu_start_wcn6855 {
+	__le32 info3;
+	__le32 reo_queue_desc_lo;
+	__le32 info4;
+	__le32 pn[4];
+	__le32 info2;
+	__le32 peer_meta_data;
+	__le16 info0;
+	__le16 phy_ppdu_id;
+	__le16 ast_index;
+	__le16 sw_peer_id;
+	__le32 info1;
+	__le32 info5;
+	__le32 info6;
+	__le16 frame_ctrl;
+	__le16 duration;
+	u8 addr1[ETH_ALEN];
+	u8 addr2[ETH_ALEN];
+	u8 addr3[ETH_ALEN];
+	__le16 seq_ctrl;
+	u8 addr4[ETH_ALEN];
+	__le16 qos_ctrl;
+	__le32 ht_ctrl;
+} __packed;
+
 /* rx_mpdu_start
  *
  * rxpcu_mpdu_filter_in_category
@@ -804,6 +830,20 @@ struct rx_msdu_start_qcn9074 {
 	__le16 vlan_stag_c1;
 } __packed;
 
+struct rx_msdu_start_wcn6855 {
+	__le16 info0;
+	__le16 phy_ppdu_id;
+	__le32 info1;
+	__le32 info2;
+	__le32 toeplitz_hash;
+	__le32 flow_id_toeplitz;
+	__le32 info3;
+	__le32 ppdu_start_timestamp;
+	__le32 phy_meta_data;
+	__le16 vlan_ctag_ci;
+	__le16 vlan_stag_ci;
+} __packed;
+
 /* rx_msdu_start
  *
  * rxpcu_mpdu_filter_in_category
@@ -988,7 +1028,9 @@ struct rx_msdu_start_qcn9074 {
 
 #define RX_MSDU_END_INFO2_REPORTED_MPDU_LEN	GENMASK(13, 0)
 #define RX_MSDU_END_INFO2_FIRST_MSDU		BIT(14)
+#define RX_MSDU_END_INFO2_FIRST_MSDU_WCN6855	BIT(28)
 #define RX_MSDU_END_INFO2_LAST_MSDU		BIT(15)
+#define RX_MSDU_END_INFO2_LAST_MSDU_WCN6855	BIT(29)
 #define RX_MSDU_END_INFO2_SA_IDX_TIMEOUT	BIT(16)
 #define RX_MSDU_END_INFO2_DA_IDX_TIMEOUT	BIT(17)
 #define RX_MSDU_END_INFO2_MSDU_LIMIT_ERR	BIT(18)
@@ -1037,6 +1079,31 @@ struct rx_msdu_end_ipq8074 {
 	__le16 sa_sw_peer_id;
 } __packed;
 
+struct rx_msdu_end_wcn6855 {
+	__le16 info0;
+	__le16 phy_ppdu_id;
+	__le16 ip_hdr_cksum;
+	__le16 reported_mpdu_len;
+	__le32 info1;
+	__le32 ext_wapi_pn[2];
+	__le32 info4;
+	__le32 ipv6_options_crc;
+	__le32 tcp_seq_num;
+	__le32 tcp_ack_num;
+	__le16 info3;
+	__le16 window_size;
+	__le32 info2;
+	__le16 sa_idx;
+	__le16 da_idx;
+	__le32 info5;
+	__le32 fse_metadata;
+	__le16 cce_metadata;
+	__le16 sa_sw_peer_id;
+	__le32 rule_indication[2];
+	__le32 info6;
+	__le32 info7;
+} __packed;
+
 #define RX_MSDU_END_MPDU_LENGTH_INFO		GENMASK(13, 0)
 
 #define RX_MSDU_END_INFO2_DA_OFFSET		GENMASK(5, 0)
@@ -1400,10 +1467,30 @@ struct hal_rx_desc_qcn9074 {
 	u8 msdu_payload[0];
 } __packed;
 
+struct hal_rx_desc_wcn6855 {
+	__le32 msdu_end_tag;
+	struct rx_msdu_end_wcn6855 msdu_end;
+	__le32 rx_attn_tag;
+	struct rx_attention attention;
+	__le32 msdu_start_tag;
+	struct rx_msdu_start_wcn6855 msdu_start;
+	u8 rx_padding0[HAL_RX_DESC_PADDING0_BYTES];
+	__le32 mpdu_start_tag;
+	struct rx_mpdu_start_wcn6855 mpdu_start;
+	__le32 mpdu_end_tag;
+	struct rx_mpdu_end mpdu_end;
+	u8 rx_padding1[HAL_RX_DESC_PADDING1_BYTES];
+	__le32 hdr_status_tag;
+	__le32 phy_ppdu_id;
+	u8 hdr_status[HAL_RX_DESC_HDR_STATUS_LEN];
+	u8 msdu_payload[0];
+} __packed;
+
 struct hal_rx_desc {
 	union {
 		struct hal_rx_desc_ipq8074 ipq8074;
 		struct hal_rx_desc_qcn9074 qcn9074;
+		struct hal_rx_desc_wcn6855 wcn6855;
 	} u;
 } __packed;
 

From 0d55b76fd815f4d685a62afe44e623501186ceb4 Mon Sep 17 00:00:00 2001
From: Baochen Qiang <bqiang@codeaurora.org>
Date: Mon, 31 May 2021 17:41:27 +0300
Subject: [PATCH 1569/3292] ath11k: setup REO for WCN6855

WCN6855 needs a different reo configuration, so add separate handling
for this target in ath11k_hw_ops.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-01720.1-QCAHSPSWPL_V1_V2_SILICONZ_LITE-1
Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Baochen Qiang <bqiang@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511162214.29475-4-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp.c     | 16 +----
 drivers/net/wireless/ath/ath11k/hal.h    |  3 +-
 drivers/net/wireless/ath/ath11k/hal_rx.c | 37 ----------
 drivers/net/wireless/ath/ath11k/hw.c     | 91 ++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/hw.h     |  1 +
 5 files changed, 96 insertions(+), 52 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index 04f6c4e0658b..b0c8f6290099 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -342,7 +342,6 @@ static int ath11k_dp_srng_common_setup(struct ath11k_base *ab)
 	struct ath11k_dp *dp = &ab->dp;
 	struct hal_srng *srng;
 	int i, ret;
-	u32 ring_hash_map;
 
 	ret = ath11k_dp_srng_setup(ab, &dp->wbm_desc_rel_ring,
 				   HAL_SW2WBM_RELEASE, 0, 0,
@@ -439,20 +438,9 @@ static int ath11k_dp_srng_common_setup(struct ath11k_base *ab)
 	}
 
 	/* When hash based routing of rx packet is enabled, 32 entries to map
-	 * the hash values to the ring will be configured. Each hash entry uses
-	 * three bits to map to a particular ring. The ring mapping will be
-	 * 0:TCL, 1:SW1, 2:SW2, 3:SW3, 4:SW4, 5:Release, 6:FW and 7:Not used.
+	 * the hash values to the ring will be configured.
 	 */
-	ring_hash_map = HAL_HASH_ROUTING_RING_SW1 << 0 |
-			HAL_HASH_ROUTING_RING_SW2 << 3 |
-			HAL_HASH_ROUTING_RING_SW3 << 6 |
-			HAL_HASH_ROUTING_RING_SW4 << 9 |
-			HAL_HASH_ROUTING_RING_SW1 << 12 |
-			HAL_HASH_ROUTING_RING_SW2 << 15 |
-			HAL_HASH_ROUTING_RING_SW3 << 18 |
-			HAL_HASH_ROUTING_RING_SW4 << 21;
-
-	ath11k_hal_reo_hw_setup(ab, ring_hash_map);
+	ab->hw_params.hw_ops->reo_setup(ab);
 
 	return 0;
 
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 91d1428b8b94..35ed3a14e200 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -120,6 +120,7 @@ struct ath11k_base;
 #define HAL_REO1_DEST_RING_CTRL_IX_1		0x00000008
 #define HAL_REO1_DEST_RING_CTRL_IX_2		0x0000000c
 #define HAL_REO1_DEST_RING_CTRL_IX_3		0x00000010
+#define HAL_REO1_MISC_CTL			0x00000630
 #define HAL_REO1_RING_BASE_LSB(ab)		ab->hw_params.regs->hal_reo1_ring_base_lsb
 #define HAL_REO1_RING_BASE_MSB(ab)		ab->hw_params.regs->hal_reo1_ring_base_msb
 #define HAL_REO1_RING_ID(ab)			ab->hw_params.regs->hal_reo1_ring_id
@@ -280,6 +281,7 @@ struct ath11k_base;
 #define HAL_REO1_GEN_ENABLE_FRAG_DST_RING		GENMASK(25, 23)
 #define HAL_REO1_GEN_ENABLE_AGING_LIST_ENABLE		BIT(2)
 #define HAL_REO1_GEN_ENABLE_AGING_FLUSH_ENABLE		BIT(3)
+#define HAL_REO1_MISC_CTL_FRAGMENT_DST_RING		GENMASK(20, 17)
 
 /* CE ring bit field mask and shift */
 #define HAL_CE_DST_R0_DEST_CTRL_MAX_LEN			GENMASK(15, 0)
@@ -906,7 +908,6 @@ void ath11k_hal_reo_qdesc_setup(void *vaddr, int tid, u32 ba_window_size,
 				u32 start_seq, enum hal_pn_type type);
 void ath11k_hal_reo_init_cmd_ring(struct ath11k_base *ab,
 				  struct hal_srng *srng);
-void ath11k_hal_reo_hw_setup(struct ath11k_base *ab, u32 ring_hash_map);
 void ath11k_hal_setup_link_idle_list(struct ath11k_base *ab,
 				     struct hal_wbm_idle_scatter_list *sbuf,
 				     u32 nsbufs, u32 tot_link_desc,
diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c
index fac2396edf32..80999bae0958 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.c
@@ -801,43 +801,6 @@ void ath11k_hal_reo_init_cmd_ring(struct ath11k_base *ab,
 	}
 }
 
-void ath11k_hal_reo_hw_setup(struct ath11k_base *ab, u32 ring_hash_map)
-{
-	u32 reo_base = HAL_SEQ_WCSS_UMAC_REO_REG;
-	u32 val;
-
-	val = ath11k_hif_read32(ab, reo_base + HAL_REO1_GEN_ENABLE);
-
-	val &= ~HAL_REO1_GEN_ENABLE_FRAG_DST_RING;
-	val |= FIELD_PREP(HAL_REO1_GEN_ENABLE_FRAG_DST_RING,
-			  HAL_SRNG_RING_ID_REO2SW1) |
-	       FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_LIST_ENABLE, 1) |
-	       FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_FLUSH_ENABLE, 1);
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_GEN_ENABLE, val);
-
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_0(ab),
-			   HAL_DEFAULT_REO_TIMEOUT_USEC);
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_1(ab),
-			   HAL_DEFAULT_REO_TIMEOUT_USEC);
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_2(ab),
-			   HAL_DEFAULT_REO_TIMEOUT_USEC);
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_3(ab),
-			   HAL_DEFAULT_REO_TIMEOUT_USEC);
-
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_0,
-			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
-				      ring_hash_map));
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_1,
-			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
-				      ring_hash_map));
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_2,
-			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
-				      ring_hash_map));
-	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_3,
-			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
-				      ring_hash_map));
-}
-
 static enum hal_rx_mon_status
 ath11k_hal_rx_parse_mon_status_tlv(struct ath11k_base *ab,
 				   struct hal_rx_mon_ppdu_info *ppdu_info,
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index 023047df954c..01207b50a454 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -10,6 +10,7 @@
 #include "hw.h"
 #include "core.h"
 #include "ce.h"
+#include "hif.h"
 
 /* Map from pdev index to hw mac index */
 static u8 ath11k_hw_ipq8074_mac_from_pdev_id(int pdev_idx)
@@ -98,6 +99,52 @@ static void ath11k_init_wmi_config_qca6390(struct ath11k_base *ab,
 	config->num_keep_alive_pattern = 0;
 }
 
+static void ath11k_hw_ipq8074_reo_setup(struct ath11k_base *ab)
+{
+	u32 reo_base = HAL_SEQ_WCSS_UMAC_REO_REG;
+	u32 val;
+	/* Each hash entry uses three bits to map to a particular ring. */
+	u32 ring_hash_map = HAL_HASH_ROUTING_RING_SW1 << 0 |
+		HAL_HASH_ROUTING_RING_SW2 << 3 |
+		HAL_HASH_ROUTING_RING_SW3 << 6 |
+		HAL_HASH_ROUTING_RING_SW4 << 9 |
+		HAL_HASH_ROUTING_RING_SW1 << 12 |
+		HAL_HASH_ROUTING_RING_SW2 << 15 |
+		HAL_HASH_ROUTING_RING_SW3 << 18 |
+		HAL_HASH_ROUTING_RING_SW4 << 21;
+
+	val = ath11k_hif_read32(ab, reo_base + HAL_REO1_GEN_ENABLE);
+
+	val &= ~HAL_REO1_GEN_ENABLE_FRAG_DST_RING;
+	val |= FIELD_PREP(HAL_REO1_GEN_ENABLE_FRAG_DST_RING,
+			HAL_SRNG_RING_ID_REO2SW1) |
+		FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_LIST_ENABLE, 1) |
+		FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_FLUSH_ENABLE, 1);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_GEN_ENABLE, val);
+
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_0(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_1(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_2(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_3(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_0,
+			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
+				      ring_hash_map));
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_1,
+			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
+				      ring_hash_map));
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_2,
+			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
+				      ring_hash_map));
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_3,
+			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
+				      ring_hash_map));
+}
+
 static void ath11k_init_wmi_config_ipq8074(struct ath11k_base *ab,
 					   struct target_resource_config *config)
 {
@@ -656,6 +703,45 @@ static u8 *ath11k_hw_wcn6855_rx_desc_get_msdu_payload(struct hal_rx_desc *desc)
 	return &desc->u.wcn6855.msdu_payload[0];
 }
 
+static void ath11k_hw_wcn6855_reo_setup(struct ath11k_base *ab)
+{
+	u32 reo_base = HAL_SEQ_WCSS_UMAC_REO_REG;
+	u32 val;
+	/* Each hash entry uses four bits to map to a particular ring. */
+	u32 ring_hash_map = HAL_HASH_ROUTING_RING_SW1 << 0 |
+		HAL_HASH_ROUTING_RING_SW2 << 4 |
+		HAL_HASH_ROUTING_RING_SW3 << 8 |
+		HAL_HASH_ROUTING_RING_SW4 << 12 |
+		HAL_HASH_ROUTING_RING_SW1 << 16 |
+		HAL_HASH_ROUTING_RING_SW2 << 20 |
+		HAL_HASH_ROUTING_RING_SW3 << 24 |
+		HAL_HASH_ROUTING_RING_SW4 << 28;
+
+	val = ath11k_hif_read32(ab, reo_base + HAL_REO1_GEN_ENABLE);
+	val |= FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_LIST_ENABLE, 1) |
+		FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_FLUSH_ENABLE, 1);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_GEN_ENABLE, val);
+
+	val = ath11k_hif_read32(ab, reo_base + HAL_REO1_MISC_CTL);
+	val &= ~HAL_REO1_MISC_CTL_FRAGMENT_DST_RING;
+	val |= FIELD_PREP(HAL_REO1_MISC_CTL_FRAGMENT_DST_RING, HAL_SRNG_RING_ID_REO2SW1);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_MISC_CTL, val);
+
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_0(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_1(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_2(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_3(ab),
+			   HAL_DEFAULT_REO_TIMEOUT_USEC);
+
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_2,
+			   ring_hash_map);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_3,
+			   ring_hash_map);
+}
+
 const struct ath11k_hw_ops ipq8074_ops = {
 	.get_hw_mac_from_pdev_id = ath11k_hw_ipq8074_mac_from_pdev_id,
 	.wmi_init_config = ath11k_init_wmi_config_ipq8074,
@@ -688,6 +774,7 @@ const struct ath11k_hw_ops ipq8074_ops = {
 	.rx_desc_set_msdu_len = ath11k_hw_ipq8074_rx_desc_set_msdu_len,
 	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
+	.reo_setup = ath11k_hw_ipq8074_reo_setup,
 };
 
 const struct ath11k_hw_ops ipq6018_ops = {
@@ -722,6 +809,7 @@ const struct ath11k_hw_ops ipq6018_ops = {
 	.rx_desc_set_msdu_len = ath11k_hw_ipq8074_rx_desc_set_msdu_len,
 	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
+	.reo_setup = ath11k_hw_ipq8074_reo_setup,
 };
 
 const struct ath11k_hw_ops qca6390_ops = {
@@ -756,6 +844,7 @@ const struct ath11k_hw_ops qca6390_ops = {
 	.rx_desc_set_msdu_len = ath11k_hw_ipq8074_rx_desc_set_msdu_len,
 	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
+	.reo_setup = ath11k_hw_ipq8074_reo_setup,
 };
 
 const struct ath11k_hw_ops qcn9074_ops = {
@@ -790,6 +879,7 @@ const struct ath11k_hw_ops qcn9074_ops = {
 	.rx_desc_set_msdu_len = ath11k_hw_qcn9074_rx_desc_set_msdu_len,
 	.rx_desc_get_attention = ath11k_hw_qcn9074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_qcn9074_rx_desc_get_msdu_payload,
+	.reo_setup = ath11k_hw_ipq8074_reo_setup,
 };
 
 const struct ath11k_hw_ops wcn6855_ops = {
@@ -824,6 +914,7 @@ const struct ath11k_hw_ops wcn6855_ops = {
 	.rx_desc_set_msdu_len = ath11k_hw_wcn6855_rx_desc_set_msdu_len,
 	.rx_desc_get_attention = ath11k_hw_wcn6855_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_wcn6855_rx_desc_get_msdu_payload,
+	.reo_setup = ath11k_hw_wcn6855_reo_setup,
 };
 
 #define ATH11K_TX_RING_MASK_0 0x1
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 6e924f628f22..afe3b3c71695 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -199,6 +199,7 @@ struct ath11k_hw_ops {
 	void (*rx_desc_set_msdu_len)(struct hal_rx_desc *desc, u16 len);
 	struct rx_attention *(*rx_desc_get_attention)(struct hal_rx_desc *desc);
 	u8 *(*rx_desc_get_msdu_payload)(struct hal_rx_desc *desc);
+	void (*reo_setup)(struct ath11k_base *ab);
 };
 
 extern const struct ath11k_hw_ops ipq8074_ops;

From ed66849e159ba92a91ccde13ce3aebd90c644e05 Mon Sep 17 00:00:00 2001
From: Baochen Qiang <bqiang@codeaurora.org>
Date: Mon, 31 May 2021 17:41:27 +0300
Subject: [PATCH 1570/3292] ath11k: setup WBM_IDLE_LINK ring once again

For WCN6855, WBM idle link ring needs a reinit.
Without this reinit, firmware crash might happen occasionally.
This is requested by the hw team.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-01720.1-QCAHSPSWPL_V1_V2_SILICONZ_LITE-1
Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Baochen Qiang <bqiang@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511162214.29475-5-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/hal.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index 08e3c72d9237..eaa0edca5576 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -382,6 +382,16 @@ static void ath11k_hal_srng_src_hw_init(struct ath11k_base *ab,
 	val = FIELD_PREP(HAL_REO1_RING_ID_ENTRY_SIZE, srng->entry_size);
 	ath11k_hif_write32(ab, reg_base + HAL_TCL1_RING_ID_OFFSET(ab), val);
 
+	if (srng->ring_id == HAL_SRNG_RING_ID_WBM_IDLE_LINK) {
+		ath11k_hif_write32(ab, reg_base, (u32)srng->ring_base_paddr);
+		val = FIELD_PREP(HAL_TCL1_RING_BASE_MSB_RING_BASE_ADDR_MSB,
+				 ((u64)srng->ring_base_paddr >>
+				 HAL_ADDR_MSB_REG_SHIFT)) |
+			FIELD_PREP(HAL_TCL1_RING_BASE_MSB_RING_SIZE,
+				   (srng->entry_size * srng->num_entries));
+		ath11k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET(ab), val);
+	}
+
 	/* interrupt setup */
 	/* NOTE: IPQ8074 v2 requires the interrupt timer threshold in the
 	 * unit of 8 usecs instead of 1 usec (as required by v1).

From 8845fed1ad7b2fcd4dde82737c197805255bed0f Mon Sep 17 00:00:00 2001
From: Baochen Qiang <bqiang@codeaurora.org>
Date: Mon, 31 May 2021 17:41:27 +0300
Subject: [PATCH 1571/3292] ath11k: add support to get peer id for WCN6855

For WCN6855, the layout of hal rx mpdu info is different, so need to
handle this target differently when getting peer id.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-01720.1-QCAHSPSWPL_V1_V2_SILICONZ_LITE-1
Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Baochen Qiang <bqiang@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511162214.29475-6-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/hal_rx.c |  5 +----
 drivers/net/wireless/ath/ath11k/hal_rx.h |  8 +++++++
 drivers/net/wireless/ath/ath11k/hw.c     | 28 ++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/hw.h     |  1 +
 4 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c
index 80999bae0958..325055ca41ab 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.c
@@ -1091,12 +1091,9 @@ ath11k_hal_rx_parse_mon_status_tlv(struct ath11k_base *ab,
 		break;
 	}
 	case HAL_RX_MPDU_START: {
-		struct hal_rx_mpdu_info *mpdu_info =
-			(struct hal_rx_mpdu_info *)tlv_data;
 		u16 peer_id;
 
-		peer_id = FIELD_GET(HAL_RX_MPDU_INFO_INFO0_PEERID,
-				    __le32_to_cpu(mpdu_info->info0));
+		peer_id = ab->hw_params.hw_ops->mpdu_info_get_peerid(tlv_data);
 		if (peer_id)
 			ppdu_info->peer_id = peer_id;
 		break;
diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.h b/drivers/net/wireless/ath/ath11k/hal_rx.h
index d464a270c049..0f1f04b812b9 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.h
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.h
@@ -254,12 +254,20 @@ struct hal_rx_phyrx_rssi_legacy_info {
 } __packed;
 
 #define HAL_RX_MPDU_INFO_INFO0_PEERID	GENMASK(31, 16)
+#define HAL_RX_MPDU_INFO_INFO0_PEERID_WCN6855	GENMASK(15, 0)
+
 struct hal_rx_mpdu_info {
 	__le32 rsvd0;
 	__le32 info0;
 	__le32 rsvd1[21];
 } __packed;
 
+struct hal_rx_mpdu_info_wcn6855 {
+	__le32 rsvd0[8];
+	__le32 info0;
+	__le32 rsvd1[14];
+} __packed;
+
 #define HAL_RX_PPDU_END_DURATION	GENMASK(23, 0)
 struct hal_rx_ppdu_end_duration {
 	__le32 rsvd0[9];
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index 01207b50a454..d9596903b0a5 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -742,6 +742,29 @@ static void ath11k_hw_wcn6855_reo_setup(struct ath11k_base *ab)
 			   ring_hash_map);
 }
 
+static u16 ath11k_hw_ipq8074_mpdu_info_get_peerid(u8 *tlv_data)
+{
+	u16 peer_id = 0;
+	struct hal_rx_mpdu_info *mpdu_info =
+		(struct hal_rx_mpdu_info *)tlv_data;
+
+	peer_id = FIELD_GET(HAL_RX_MPDU_INFO_INFO0_PEERID,
+			    __le32_to_cpu(mpdu_info->info0));
+
+	return peer_id;
+}
+
+static u16 ath11k_hw_wcn6855_mpdu_info_get_peerid(u8 *tlv_data)
+{
+	u16 peer_id = 0;
+	struct hal_rx_mpdu_info_wcn6855 *mpdu_info =
+		(struct hal_rx_mpdu_info_wcn6855 *)tlv_data;
+
+	peer_id = FIELD_GET(HAL_RX_MPDU_INFO_INFO0_PEERID_WCN6855,
+			    __le32_to_cpu(mpdu_info->info0));
+	return peer_id;
+}
+
 const struct ath11k_hw_ops ipq8074_ops = {
 	.get_hw_mac_from_pdev_id = ath11k_hw_ipq8074_mac_from_pdev_id,
 	.wmi_init_config = ath11k_init_wmi_config_ipq8074,
@@ -775,6 +798,7 @@ const struct ath11k_hw_ops ipq8074_ops = {
 	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
 	.reo_setup = ath11k_hw_ipq8074_reo_setup,
+	.mpdu_info_get_peerid = ath11k_hw_ipq8074_mpdu_info_get_peerid,
 };
 
 const struct ath11k_hw_ops ipq6018_ops = {
@@ -810,6 +834,7 @@ const struct ath11k_hw_ops ipq6018_ops = {
 	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
 	.reo_setup = ath11k_hw_ipq8074_reo_setup,
+	.mpdu_info_get_peerid = ath11k_hw_ipq8074_mpdu_info_get_peerid,
 };
 
 const struct ath11k_hw_ops qca6390_ops = {
@@ -845,6 +870,7 @@ const struct ath11k_hw_ops qca6390_ops = {
 	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
 	.reo_setup = ath11k_hw_ipq8074_reo_setup,
+	.mpdu_info_get_peerid = ath11k_hw_ipq8074_mpdu_info_get_peerid,
 };
 
 const struct ath11k_hw_ops qcn9074_ops = {
@@ -880,6 +906,7 @@ const struct ath11k_hw_ops qcn9074_ops = {
 	.rx_desc_get_attention = ath11k_hw_qcn9074_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_qcn9074_rx_desc_get_msdu_payload,
 	.reo_setup = ath11k_hw_ipq8074_reo_setup,
+	.mpdu_info_get_peerid = ath11k_hw_ipq8074_mpdu_info_get_peerid,
 };
 
 const struct ath11k_hw_ops wcn6855_ops = {
@@ -915,6 +942,7 @@ const struct ath11k_hw_ops wcn6855_ops = {
 	.rx_desc_get_attention = ath11k_hw_wcn6855_rx_desc_get_attention,
 	.rx_desc_get_msdu_payload = ath11k_hw_wcn6855_rx_desc_get_msdu_payload,
 	.reo_setup = ath11k_hw_wcn6855_reo_setup,
+	.mpdu_info_get_peerid = ath11k_hw_wcn6855_mpdu_info_get_peerid,
 };
 
 #define ATH11K_TX_RING_MASK_0 0x1
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index afe3b3c71695..be62f0c2e25e 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -200,6 +200,7 @@ struct ath11k_hw_ops {
 	struct rx_attention *(*rx_desc_get_attention)(struct hal_rx_desc *desc);
 	u8 *(*rx_desc_get_msdu_payload)(struct hal_rx_desc *desc);
 	void (*reo_setup)(struct ath11k_base *ab);
+	u16 (*mpdu_info_get_peerid)(u8 *tlv_data);
 };
 
 extern const struct ath11k_hw_ops ipq8074_ops;

From 0fbf19570099cf1c41e86b3b14a392d46131ed0d Mon Sep 17 00:00:00 2001
From: Baochen Qiang <bqiang@codeaurora.org>
Date: Mon, 31 May 2021 17:41:28 +0300
Subject: [PATCH 1572/3292] ath11k: add support for WCN6855

This patch is to add support for WCN6855. For station mode,
WCN6855 is able to connect to an AP, and ping works well.
For AP mode, hostapd is able to bringup an SAP interface with
WCN6855, a normal station can connect to this AP and
ping works well.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-01720.1-QCAHSPSWPL_V1_V2_SILICONZ_LITE-1
Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Baochen Qiang <bqiang@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511162214.29475-7-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c | 39 +++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/core.h |  1 +
 drivers/net/wireless/ath/ath11k/mhi.c  |  1 +
 drivers/net/wireless/ath/ath11k/pci.c  | 44 ++++++++++++++++++++------
 4 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 77ce3347ab86..fd8941905fdb 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -187,6 +187,45 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.supports_suspend = false,
 		.hal_desc_sz = sizeof(struct hal_rx_desc_qcn9074),
 	},
+	{
+		.name = "wcn6855 hw2.0",
+		.hw_rev = ATH11K_HW_WCN6855_HW20,
+		.fw = {
+			.dir = "WCN6855/hw2.0",
+			.board_size = 256 * 1024,
+			.cal_size = 256 * 1024,
+		},
+		.max_radios = 3,
+		.bdf_addr = 0x4B0C0000,
+		.hw_ops = &wcn6855_ops,
+		.ring_mask = &ath11k_hw_ring_mask_qca6390,
+		.internal_sleep_clock = true,
+		.regs = &wcn6855_regs,
+		.qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCA6390,
+		.host_ce_config = ath11k_host_ce_config_qca6390,
+		.ce_count = 9,
+		.target_ce_config = ath11k_target_ce_config_wlan_qca6390,
+		.target_ce_count = 9,
+		.svc_to_ce_map = ath11k_target_service_to_ce_map_wlan_qca6390,
+		.svc_to_ce_map_len = 14,
+		.single_pdev_only = true,
+		.rxdma1_enable = false,
+		.num_rxmda_per_pdev = 2,
+		.rx_mac_buf_ring = true,
+		.vdev_start_delay = true,
+		.htt_peer_map_v2 = false,
+		.tcl_0_only = true,
+		.spectral_fft_sz = 0,
+
+		.interface_modes = BIT(NL80211_IFTYPE_STATION) |
+					BIT(NL80211_IFTYPE_AP),
+		.supports_monitor = false,
+		.supports_shadow_regs = true,
+		.idle_ps = true,
+		.cold_boot_calib = false,
+		.supports_suspend = true,
+		.hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855),
+	},
 };
 
 int ath11k_core_suspend(struct ath11k_base *ab)
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 382df5318b61..018fb2385f2a 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -107,6 +107,7 @@ enum ath11k_hw_rev {
 	ATH11K_HW_QCA6390_HW20,
 	ATH11K_HW_IPQ6018_HW10,
 	ATH11K_HW_QCN9074_HW10,
+	ATH11K_HW_WCN6855_HW20,
 };
 
 enum ath11k_firmware_mode {
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 27b394d115e2..75cc2d80fde8 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -354,6 +354,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
 		ath11k_mhi_config = &ath11k_mhi_config_qcn9074;
 		break;
 	case ATH11K_HW_QCA6390_HW20:
+	case ATH11K_HW_WCN6855_HW20:
 		ath11k_mhi_config = &ath11k_mhi_config_qca6390;
 		break;
 	default:
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 0f31eb566fb6..a4688b6beeb1 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -36,9 +36,11 @@
 
 #define QCA6390_DEVICE_ID		0x1101
 #define QCN9074_DEVICE_ID		0x1104
+#define WCN6855_DEVICE_ID		0x1103
 
 static const struct pci_device_id ath11k_pci_id_table[] = {
 	{ PCI_VDEVICE(QCOM, QCA6390_DEVICE_ID) },
+	{ PCI_VDEVICE(QCOM, WCN6855_DEVICE_ID) },
 	/* TODO: add QCN9074_DEVICE_ID) once firmware issues are resolved */
 	{0}
 };
@@ -1176,12 +1178,26 @@ static const struct ath11k_hif_ops ath11k_pci_hif_ops = {
 	.get_ce_msi_idx = ath11k_pci_get_ce_msi_idx,
 };
 
+static void ath11k_pci_read_hw_version(struct ath11k_base *ab, u32 *major, u32 *minor)
+{
+	u32 soc_hw_version;
+
+	soc_hw_version = ath11k_pci_read32(ab, TCSR_SOC_HW_VERSION);
+	*major = FIELD_GET(TCSR_SOC_HW_VERSION_MAJOR_MASK,
+			   soc_hw_version);
+	*minor = FIELD_GET(TCSR_SOC_HW_VERSION_MINOR_MASK,
+			   soc_hw_version);
+
+	ath11k_dbg(ab, ATH11K_DBG_PCI, "pci tcsr_soc_hw_version major %d minor %d\n",
+		   *major, *minor);
+}
+
 static int ath11k_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *pci_dev)
 {
 	struct ath11k_base *ab;
 	struct ath11k_pci *ab_pci;
-	u32 soc_hw_version, soc_hw_version_major, soc_hw_version_minor;
+	u32 soc_hw_version_major, soc_hw_version_minor;
 	int ret;
 
 	ab = ath11k_core_alloc(&pdev->dev, sizeof(*ab_pci), ATH11K_BUS_PCI,
@@ -1209,15 +1225,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
 
 	switch (pci_dev->device) {
 	case QCA6390_DEVICE_ID:
-		soc_hw_version = ath11k_pci_read32(ab, TCSR_SOC_HW_VERSION);
-		soc_hw_version_major = FIELD_GET(TCSR_SOC_HW_VERSION_MAJOR_MASK,
-						 soc_hw_version);
-		soc_hw_version_minor = FIELD_GET(TCSR_SOC_HW_VERSION_MINOR_MASK,
-						 soc_hw_version);
-
-		ath11k_dbg(ab, ATH11K_DBG_PCI, "pci tcsr_soc_hw_version major %d minor %d\n",
-			   soc_hw_version_major, soc_hw_version_minor);
-
+		ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
+					   &soc_hw_version_minor);
 		switch (soc_hw_version_major) {
 		case 2:
 			ab->hw_rev = ATH11K_HW_QCA6390_HW20;
@@ -1235,6 +1244,21 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
 		ab->bus_params.static_window_map = true;
 		ab->hw_rev = ATH11K_HW_QCN9074_HW10;
 		break;
+	case WCN6855_DEVICE_ID:
+		ath11k_pci_read_hw_version(ab, &soc_hw_version_major,
+					   &soc_hw_version_minor);
+		switch (soc_hw_version_major) {
+		case 2:
+			ab->hw_rev = ATH11K_HW_WCN6855_HW20;
+			break;
+		default:
+			dev_err(&pdev->dev, "Unsupported WCN6855 SOC hardware version: %d %d\n",
+				soc_hw_version_major, soc_hw_version_minor);
+			ret = -EOPNOTSUPP;
+			goto err_pci_free_region;
+		}
+		ab_pci->msi_config = &ath11k_msi_config[0];
+		break;
 	default:
 		dev_err(&pdev->dev, "Unknown PCI device found: 0x%x\n",
 			pci_dev->device);

From 5088df0504fe7d9623bf5789950327bc9e594fed Mon Sep 17 00:00:00 2001
From: Baochen Qiang <bqiang@codeaurora.org>
Date: Mon, 31 May 2021 17:41:28 +0300
Subject: [PATCH 1573/3292] ath11k: don't call ath11k_pci_set_l1ss for WCN6855

For QCA6390, one PCI related clock drifts sometimes, and it makes
PCI link difficult to quit L1ss. Current implementation fixed this
by configuring some related regs using ath11k_pci_fix_l1ss.

WCN6855 does not have this clock drift problem, so no need to set
these regs.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-01720.1-QCAHSPSWPL_V1_V2_SILICONZ_LITE-1
Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Baochen Qiang <bqiang@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511162214.29475-8-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c | 5 +++++
 drivers/net/wireless/ath/ath11k/hw.h   | 1 +
 drivers/net/wireless/ath/ath11k/pci.c  | 3 ++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index fd8941905fdb..4a1051418f33 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -70,6 +70,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.cold_boot_calib = true,
 		.supports_suspend = false,
 		.hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
+		.fix_l1ss = true,
 	},
 	{
 		.hw_rev = ATH11K_HW_IPQ6018_HW10,
@@ -110,6 +111,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.cold_boot_calib = true,
 		.supports_suspend = false,
 		.hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
+		.fix_l1ss = true,
 	},
 	{
 		.name = "qca6390 hw2.0",
@@ -149,6 +151,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.cold_boot_calib = false,
 		.supports_suspend = true,
 		.hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
+		.fix_l1ss = true,
 	},
 	{
 		.name = "qcn9074 hw1.0",
@@ -186,6 +189,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.cold_boot_calib = false,
 		.supports_suspend = false,
 		.hal_desc_sz = sizeof(struct hal_rx_desc_qcn9074),
+		.fix_l1ss = true,
 	},
 	{
 		.name = "wcn6855 hw2.0",
@@ -225,6 +229,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.cold_boot_calib = false,
 		.supports_suspend = true,
 		.hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855),
+		.fix_l1ss = false,
 	},
 };
 
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index be62f0c2e25e..62f5978b3005 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -162,6 +162,7 @@ struct ath11k_hw_params {
 	bool cold_boot_calib;
 	bool supports_suspend;
 	u32 hal_desc_sz;
+	bool fix_l1ss;
 };
 
 struct ath11k_hw_ops {
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index a4688b6beeb1..f8f6b2090dad 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -434,7 +434,8 @@ static void ath11k_pci_sw_reset(struct ath11k_base *ab, bool power_on)
 		ath11k_pci_enable_ltssm(ab);
 		ath11k_pci_clear_all_intrs(ab);
 		ath11k_pci_set_wlaon_pwr_ctrl(ab);
-		ath11k_pci_fix_l1ss(ab);
+		if (ab->hw_params.fix_l1ss)
+			ath11k_pci_fix_l1ss(ab);
 	}
 
 	ath11k_mhi_clear_vector(ab);

From 9e88dd431d2345acdb7a549f3e88aaf4c2a307a1 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 31 May 2021 17:41:28 +0300
Subject: [PATCH 1574/3292] ath10k: go to path err_unsupported when chip id is
 not supported

When chip id is not supported, it go to path err_unsupported
to print the error message.

Fixes: f8914a14623a ("ath10k: restore QCA9880-AR1A (v1) detection")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210522105822.1091848-2-yangyingliang@huawei.com
---
 drivers/net/wireless/ath/ath10k/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index e7fde635e0ee..463cf3f8f8a5 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -3701,7 +3701,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 		goto err_unsupported;
 
 	if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id))
-		goto err_free_irq;
+		goto err_unsupported;
 
 	ret = ath10k_core_register(ar, &bus_params);
 	if (ret) {

From e2783e2f39ba99178dedfc1646d5cc0979d1bab3 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 31 May 2021 17:41:28 +0300
Subject: [PATCH 1575/3292] ath10k: add missing error return code in
 ath10k_pci_probe()

When chip_id is not supported, the resources will be freed
on path err_unsupported, these resources will also be freed
when calling ath10k_pci_remove(), it will cause double free,
so return -ENODEV when it doesn't support the device with wrong
chip_id.

Fixes: c0c378f9907c ("ath10k: remove target soc ps code")
Fixes: 7505f7c3ec1d ("ath10k: create a chip revision whitelist")
Fixes: f8914a14623a ("ath10k: restore QCA9880-AR1A (v1) detection")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210522105822.1091848-3-yangyingliang@huawei.com
---
 drivers/net/wireless/ath/ath10k/pci.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 463cf3f8f8a5..71878ab35b93 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -3685,8 +3685,10 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 			ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
 		if (bus_params.chip_id != 0xffffffff) {
 			if (!ath10k_pci_chip_is_supported(pdev->device,
-							  bus_params.chip_id))
+							  bus_params.chip_id)) {
+				ret = -ENODEV;
 				goto err_unsupported;
+			}
 		}
 	}
 
@@ -3697,11 +3699,15 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 	}
 
 	bus_params.chip_id = ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
-	if (bus_params.chip_id == 0xffffffff)
+	if (bus_params.chip_id == 0xffffffff) {
+		ret = -ENODEV;
 		goto err_unsupported;
+	}
 
-	if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id))
+	if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id)) {
+		ret = -ENODEV;
 		goto err_unsupported;
+	}
 
 	ret = ath10k_core_register(ar, &bus_params);
 	if (ret) {

From 272fdc0c4542fad173b44965be02a16d6db95499 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 31 May 2021 17:41:28 +0300
Subject: [PATCH 1576/3292] wireless: carl9170: fix LEDS build errors &
 warnings

kernel test robot reports over 200 build errors and warnings
that are due to this Kconfig problem when CARL9170=m,
MAC80211=y, and LEDS_CLASS=m.

WARNING: unmet direct dependencies detected for MAC80211_LEDS
  Depends on [n]: NET [=y] && WIRELESS [=y] && MAC80211 [=y] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=MAC80211 [=y])
  Selected by [m]:
  - CARL9170_LEDS [=y] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_ATH [=y] && CARL9170 [=m]

CARL9170_LEDS selects MAC80211_LEDS even though its kconfig
dependencies are not met. This happens because 'select' does not follow
any Kconfig dependency chains.

Fix this by making CARL9170_LEDS depend on MAC80211_LEDS, where
the latter supplies any needed dependencies on LEDS_CLASS.

Fixes: 1d7e1e6b1b8ed ("carl9170: Makefile, Kconfig files and MAINTAINERS")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Christian Lamparter <chunkeey@googlemail.com>
Cc: linux-wireless@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Christian Lamparter <chunkeey@googlemail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210530031134.23274-1-rdunlap@infradead.org
---
 drivers/net/wireless/ath/carl9170/Kconfig | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/carl9170/Kconfig b/drivers/net/wireless/ath/carl9170/Kconfig
index b2d760873992..ba9bea79381c 100644
--- a/drivers/net/wireless/ath/carl9170/Kconfig
+++ b/drivers/net/wireless/ath/carl9170/Kconfig
@@ -16,13 +16,11 @@ config CARL9170
 
 config CARL9170_LEDS
 	bool "SoftLED Support"
-	depends on CARL9170
-	select MAC80211_LEDS
-	select LEDS_CLASS
-	select NEW_LEDS
 	default y
+	depends on CARL9170
+	depends on MAC80211_LEDS
 	help
-	  This option is necessary, if you want your device' LEDs to blink
+	  This option is necessary, if you want your device's LEDs to blink.
 
 	  Say Y, unless you need the LEDs for firmware debugging.
 

From e0a6120f6816ddd366530ce7ae5cb001a5e819dd Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 26 May 2021 22:22:19 +0800
Subject: [PATCH 1577/3292] ath10k: remove unused more_frags variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following W=1 build warning:

  drivers/net/wireless/ath/ath10k/htt_rx.c:1790:7: warning: variable ‘more_frags’ set but not used [-Wunused-but-set-variable]
   1790 |  bool more_frags;
        |       ^~~~~~~~~~

Fixes: a1166b2653db ("ath10k: add CCMP PN replay protection for fragmented frames for PCIe")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210526142219.2542528-1-yangyingliang@huawei.com
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 7ffb5d5b2a70..adbaeb67eedf 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1787,7 +1787,6 @@ static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar,
 	struct ath10k_peer *peer;
 	union htt_rx_pn_t *last_pn, new_pn = {0};
 	struct ieee80211_hdr *hdr;
-	bool more_frags;
 	u8 tid, frag_number;
 	u32 seq;
 
@@ -1805,7 +1804,6 @@ static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar,
 
 	last_pn = &peer->frag_tids_last_pn[tid];
 	new_pn.pn48 = ath10k_htt_rx_h_get_pn(ar, skb, offset, enctype);
-	more_frags = ieee80211_has_morefrags(hdr->frame_control);
 	frag_number = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG;
 	seq = (__le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4;
 

From 87c272c618c7197b24fd3acf2d337315bd93b4fa Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:45:01 +0200
Subject: [PATCH 1578/3292] net/af_iucv: clean up some forward declarations

The forward declarations for the iucv_handler callbacks are causing
various compile warnings with gcc-11. Reshuffle the code to get rid
of these prototypes.

Reported-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 0fdb389c3390..44453b35c7b7 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -44,6 +44,7 @@ static struct proto iucv_proto = {
 };
 
 static struct iucv_interface *pr_iucv;
+static struct iucv_handler af_iucv_handler;
 
 /* special AF_IUCV IPRM messages */
 static const u8 iprm_shutdown[8] =
@@ -91,28 +92,11 @@ static void iucv_sock_close(struct sock *sk);
 
 static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify);
 
-/* Call Back functions */
-static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
-static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
-static void iucv_callback_connack(struct iucv_path *, u8 *);
-static int iucv_callback_connreq(struct iucv_path *, u8 *, u8 *);
-static void iucv_callback_connrej(struct iucv_path *, u8 *);
-static void iucv_callback_shutdown(struct iucv_path *, u8 *);
-
 static struct iucv_sock_list iucv_sk_list = {
 	.lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock),
 	.autobind_name = ATOMIC_INIT(0)
 };
 
-static struct iucv_handler af_iucv_handler = {
-	.path_pending	  = iucv_callback_connreq,
-	.path_complete	  = iucv_callback_connack,
-	.path_severed	  = iucv_callback_connrej,
-	.message_pending  = iucv_callback_rx,
-	.message_complete = iucv_callback_txdone,
-	.path_quiesced	  = iucv_callback_shutdown,
-};
-
 static inline void high_nmcpy(unsigned char *dst, char *src)
 {
        memcpy(dst, src, 8);
@@ -1817,6 +1801,15 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16])
 	bh_unlock_sock(sk);
 }
 
+static struct iucv_handler af_iucv_handler = {
+	.path_pending		= iucv_callback_connreq,
+	.path_complete		= iucv_callback_connack,
+	.path_severed		= iucv_callback_connrej,
+	.message_pending	= iucv_callback_rx,
+	.message_complete	= iucv_callback_txdone,
+	.path_quiesced		= iucv_callback_shutdown,
+};
+
 /***************** HiperSockets transport callbacks ********************/
 static void afiucv_swap_src_dest(struct sk_buff *skb)
 {

From fbf179683655ca83b442d5f86e17ad25a462560e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Fri, 11 Jun 2021 09:45:02 +0200
Subject: [PATCH 1579/3292] s390/netiuvc: get rid of forward declarations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move netiucv_handler to get rid of forward declarations and gcc11
compile warnings:

drivers/s390/net/netiucv.c:518:65: warning: argument 2 of type ‘u8[16]’ {aka ‘unsigned char[16]’} with mismatched bound [-Warray-parameter=]
  518 | static void netiucv_callback_connack(struct iucv_path *path, u8 ipuser[16])
      |                                                              ~~~^~~~~~~~~~
drivers/s390/net/netiucv.c:122:58: note: previously declared as ‘u8 *’ {aka ‘unsigned char *’}
  122 | static void netiucv_callback_connack(struct iucv_path *, u8 *);

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/netiucv.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 260860cf3aa1..5a0c2f07a3a2 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -118,24 +118,6 @@ static struct device_driver netiucv_driver = {
 	.bus  = &iucv_bus,
 };
 
-static int netiucv_callback_connreq(struct iucv_path *, u8 *, u8 *);
-static void netiucv_callback_connack(struct iucv_path *, u8 *);
-static void netiucv_callback_connrej(struct iucv_path *, u8 *);
-static void netiucv_callback_connsusp(struct iucv_path *, u8 *);
-static void netiucv_callback_connres(struct iucv_path *, u8 *);
-static void netiucv_callback_rx(struct iucv_path *, struct iucv_message *);
-static void netiucv_callback_txdone(struct iucv_path *, struct iucv_message *);
-
-static struct iucv_handler netiucv_handler = {
-	.path_pending	  = netiucv_callback_connreq,
-	.path_complete	  = netiucv_callback_connack,
-	.path_severed	  = netiucv_callback_connrej,
-	.path_quiesced	  = netiucv_callback_connsusp,
-	.path_resumed	  = netiucv_callback_connres,
-	.message_pending  = netiucv_callback_rx,
-	.message_complete = netiucv_callback_txdone
-};
-
 /**
  * Per connection profiling data
  */
@@ -774,6 +756,16 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 	}
 }
 
+static struct iucv_handler netiucv_handler = {
+	.path_pending	  = netiucv_callback_connreq,
+	.path_complete	  = netiucv_callback_connack,
+	.path_severed	  = netiucv_callback_connrej,
+	.path_quiesced	  = netiucv_callback_connsusp,
+	.path_resumed	  = netiucv_callback_connres,
+	.message_pending  = netiucv_callback_rx,
+	.message_complete = netiucv_callback_txdone,
+};
+
 static void conn_action_connaccept(fsm_instance *fi, int event, void *arg)
 {
 	struct iucv_event *ev = arg;

From 858252c9c3463abc3f7b13e42aae3b8845f0479d Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Fri, 11 Jun 2021 14:54:50 +0200
Subject: [PATCH 1580/3292] dt-bindings: net: Add 25G BASE-R phy interface

Add 25gbase-r PHY interface mode.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ethernet-controller.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index d97b561003ed..b0933a8c295a 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -98,6 +98,7 @@ properties:
       - 10gbase-kr
       - usxgmii
       - 10gbase-r
+      - 25gbase-r
 
   phy-mode:
     $ref: "#/properties/phy-connection-type"

From a56c286865692ac12291afe4c66198915c6b08f9 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Fri, 11 Jun 2021 14:54:51 +0200
Subject: [PATCH 1581/3292] net: phy: Add 25G BASE-R interface mode

Add 25gbase-r phy interface mode

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.rst | 6 ++++++
 include/linux/phy.h              | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 3f05d50ecd6e..571ba08386e7 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -292,6 +292,12 @@ Some of the interface modes are described below:
     Note: due to legacy usage, some 10GBASE-R usage incorrectly makes
     use of this definition.
 
+``PHY_INTERFACE_MODE_25GBASER``
+    This is the IEEE 802.3 PCS Clause 107 defined 25GBASE-R protocol.
+    The PCS is identical to 10GBASE-R, i.e. 64B/66B encoded
+    running 2.5 as fast, giving a fixed bit rate of 25.78125 Gbaud.
+    Please refer to the IEEE standard for further information.
+
 ``PHY_INTERFACE_MODE_100BASEX``
     This defines IEEE 802.3 Clause 24.  The link operates at a fixed data
     rate of 125Mpbs using a 4B/5B encoding scheme, resulting in an underlying
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b60694734b07..3b80dc3ed68b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -112,6 +112,7 @@ extern const int phy_10gbit_features_array[1];
  * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI
  * @PHY_INTERFACE_MODE_XAUI: 10 Gigabit Attachment Unit Interface
  * @PHY_INTERFACE_MODE_10GBASER: 10G BaseR
+ * @PHY_INTERFACE_MODE_25GBASER: 25G BaseR
  * @PHY_INTERFACE_MODE_USXGMII:  Universal Serial 10GE MII
  * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN
  * @PHY_INTERFACE_MODE_MAX: Book keeping
@@ -147,6 +148,7 @@ typedef enum {
 	PHY_INTERFACE_MODE_XAUI,
 	/* 10GBASE-R, XFI, SFI - single lane 10G Serdes */
 	PHY_INTERFACE_MODE_10GBASER,
+	PHY_INTERFACE_MODE_25GBASER,
 	PHY_INTERFACE_MODE_USXGMII,
 	/* 10GBASE-KR - with Clause 73 AN */
 	PHY_INTERFACE_MODE_10GKR,
@@ -223,6 +225,8 @@ static inline const char *phy_modes(phy_interface_t interface)
 		return "xaui";
 	case PHY_INTERFACE_MODE_10GBASER:
 		return "10gbase-r";
+	case PHY_INTERFACE_MODE_25GBASER:
+		return "25gbase-r";
 	case PHY_INTERFACE_MODE_USXGMII:
 		return "usxgmii";
 	case PHY_INTERFACE_MODE_10GKR:

From 452d2c6fbae2c11e3b0c17a3afe7b145db2196e7 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Fri, 11 Jun 2021 14:54:52 +0200
Subject: [PATCH 1582/3292] net: sfp: add support for 25G BASE-R SFPs

Add support for 25gbase-r modules. This is needed for the Sparx5 switch.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp-bus.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index e61de66e973b..1db9cea13690 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -392,6 +392,11 @@ EXPORT_SYMBOL_GPL(sfp_parse_support);
 phy_interface_t sfp_select_interface(struct sfp_bus *bus,
 				     unsigned long *link_modes)
 {
+	if (phylink_test(link_modes, 25000baseCR_Full) ||
+	    phylink_test(link_modes, 25000baseKR_Full) ||
+	    phylink_test(link_modes, 25000baseSR_Full))
+		return PHY_INTERFACE_MODE_25GBASER;
+
 	if (phylink_test(link_modes, 10000baseCR_Full) ||
 	    phylink_test(link_modes, 10000baseSR_Full) ||
 	    phylink_test(link_modes, 10000baseLR_Full) ||

From 21e0c59edc09ff8d50722071ded66574b1cc4e99 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Fri, 11 Jun 2021 14:54:53 +0200
Subject: [PATCH 1583/3292] net: phylink: Add 25G BASE-R support

Add 25gbase-r interface type and speed to phylink.
This is needed for the Sparx5 switch.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index bb9eeb74f70a..8ce8db487596 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -312,6 +312,11 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
 			phylink_set(pl->supported, 5000baseT_Full);
 			break;
 
+		case PHY_INTERFACE_MODE_25GBASER:
+			phylink_set(pl->supported, 25000baseCR_Full);
+			phylink_set(pl->supported, 25000baseKR_Full);
+			phylink_set(pl->supported, 25000baseSR_Full);
+			fallthrough;
 		case PHY_INTERFACE_MODE_USXGMII:
 		case PHY_INTERFACE_MODE_10GKR:
 		case PHY_INTERFACE_MODE_10GBASER:

From fb9349c4163e387db3750a4f2c507c9111ec1ed2 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Fri, 11 Jun 2021 21:16:08 +0800
Subject: [PATCH 1584/3292] stmmac: intel: move definitions to dwmac-intel
 header file

Currently some of the dwmac-intel definitions are in the header file,
while some are in the driver source file. Cleaning this by moving all
the definitions to the header file.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-intel.c    | 16 ----------------
 .../net/ethernet/stmicro/stmmac/dwmac-intel.h    | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 6a9a19b0844c..a38e47e6d470 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -10,22 +10,6 @@
 #include "stmmac.h"
 #include "stmmac_ptp.h"
 
-#define INTEL_MGBE_ADHOC_ADDR	0x15
-#define INTEL_MGBE_XPCS_ADDR	0x16
-
-/* Selection for PTP Clock Freq belongs to PSE & PCH GbE */
-#define PSE_PTP_CLK_FREQ_MASK		(GMAC_GPO0 | GMAC_GPO3)
-#define PSE_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
-#define PSE_PTP_CLK_FREQ_200MHZ		(GMAC_GPO0 | GMAC_GPO3)
-#define PSE_PTP_CLK_FREQ_256MHZ		(0)
-#define PCH_PTP_CLK_FREQ_MASK		(GMAC_GPO0)
-#define PCH_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
-#define PCH_PTP_CLK_FREQ_200MHZ		(0)
-
-/* Cross-timestamping defines */
-#define ART_CPUID_LEAF		0x15
-#define EHL_PSE_ART_MHZ		19200000
-
 struct intel_priv_data {
 	int mdio_adhoc_addr;	/* mdio address for serdes & etc */
 	unsigned long crossts_adj;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
index 20d14e588044..0a37987478c1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
@@ -34,4 +34,20 @@
 #define SERDES_RATE_PCIE_SHIFT	8
 #define SERDES_PCLK_SHIFT	12
 
+#define INTEL_MGBE_ADHOC_ADDR	0x15
+#define INTEL_MGBE_XPCS_ADDR	0x16
+
+/* Cross-timestamping defines */
+#define ART_CPUID_LEAF		0x15
+#define EHL_PSE_ART_MHZ		19200000
+
+/* Selection for PTP Clock Freq belongs to PSE & PCH GbE */
+#define PSE_PTP_CLK_FREQ_MASK		(GMAC_GPO0 | GMAC_GPO3)
+#define PSE_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
+#define PSE_PTP_CLK_FREQ_200MHZ		(GMAC_GPO0 | GMAC_GPO3)
+#define PSE_PTP_CLK_FREQ_256MHZ		(0)
+#define PCH_PTP_CLK_FREQ_MASK		(GMAC_GPO0)
+#define PCH_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
+#define PCH_PTP_CLK_FREQ_200MHZ		(0)
+
 #endif /* __DWMAC_INTEL_H__ */

From 3c3ea630e87c3ab9b91d1800b408dae6fc8ee1aa Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Fri, 11 Jun 2021 21:16:09 +0800
Subject: [PATCH 1585/3292] stmmac: intel: fix wrong kernel-doc

Kernel-doc for intel_eth_pci_remove is incorrect, pdev datatype is
struct pci_dev. Changed it to the 'pci device pointer'.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index a38e47e6d470..e0a7d2b17921 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -1087,7 +1087,7 @@ err_alloc_irq:
 /**
  * intel_eth_pci_remove
  *
- * @pdev: platform device pointer
+ * @pdev: pci device pointer
  * Description: this function calls the main to free the net resources
  * and releases the PCI resources.
  */

From 822ebc2cf50c4f223e859c35393b5cf0d96c56e1 Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Fri, 11 Jun 2021 10:35:37 -0500
Subject: [PATCH 1586/3292] ibmvnic: fix kernel build warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/net/ethernet/ibm/ibmvnic.c: In function ‘adapter_state_to_string’:
drivers/net/ethernet/ibm/ibmvnic.c:855:2: warning: enumeration value ‘VNIC_DOWN’ not handled in switch [-Wswitch]
  855 |  switch (state) {
      |  ^~~~~~
drivers/net/ethernet/ibm/ibmvnic.c: In function ‘reset_reason_to_string’:
drivers/net/ethernet/ibm/ibmvnic.c:1958:2: warning: enumeration value ‘VNIC_RESET_PASSIVE_INIT’ not handled in switch [-Wswitch]
 1958 |  switch (reason) {
      |  ^~~~~~

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e2630b60c001..b52aa092a74b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -869,6 +869,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
 		return "REMOVING";
 	case VNIC_REMOVED:
 		return "REMOVED";
+	case VNIC_DOWN:
+		return "DOWN";
 	}
 	return "UNKNOWN";
 }
@@ -1968,6 +1970,8 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
 		return "TIMEOUT";
 	case VNIC_RESET_CHANGE_PARAM:
 		return "CHANGE_PARAM";
+	case VNIC_RESET_PASSIVE_INIT:
+		return "PASSIVE_INIT";
 	}
 	return "UNKNOWN";
 }

From 73214a690c50a134bd364e1a4430e0e7ac81a8d8 Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Fri, 11 Jun 2021 10:43:39 -0500
Subject: [PATCH 1587/3292] ibmvnic: fix kernel build warnings in
 build_hdr_descs_arr

Fix the following kernel build warnings:
drivers/net/ethernet/ibm/ibmvnic.c:1516: warning: Function parameter or member 'skb' not described in 'build_hdr_descs_arr'
drivers/net/ethernet/ibm/ibmvnic.c:1516: warning: Function parameter or member 'indir_arr' not described in 'build_hdr_descs_arr'
drivers/net/ethernet/ibm/ibmvnic.c:1516: warning: Excess function parameter 'txbuff' description in 'build_hdr_descs_arr'

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index b52aa092a74b..99eddb2c8e36 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1526,7 +1526,8 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
 
 /**
  * build_hdr_descs_arr - build a header descriptor array
- * @txbuff: tx buffer
+ * @skb: tx socket buffer
+ * @indir_arr: indirect array
  * @num_entries: number of descriptors to be sent
  * @hdr_field: bit field determining which headers will be sent
  *

From 8c713dc93ca9a423d6af8849c9254742a1070c37 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 10:20:54 +0200
Subject: [PATCH 1588/3292] rtnetlink: add alloc() method to rtnl_link_ops

In order to make rtnetlink ops that can create different
kinds of devices, like what we want to add to the WWAN
framework, the priv_size and setup parameters aren't quite
sufficient. Make this easier to manage by allowing ops to
allocate their own netdev via an @alloc method that gets
the tb netlink data.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h |  8 ++++++++
 net/core/rtnetlink.c    | 19 ++++++++++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 479f60ef54c0..384e800665f2 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -37,6 +37,9 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  *	@maxtype: Highest device specific netlink attribute number
  *	@policy: Netlink policy for device specific attribute validation
  *	@validate: Optional validation function for netlink/changelink parameters
+ *	@alloc: netdev allocation function, can be %NULL and is then used
+ *		in place of alloc_netdev_mqs(), in this case @priv_size
+ *		and @setup are unused. Returns a netdev or ERR_PTR().
  *	@priv_size: sizeof net_device private space
  *	@setup: net_device setup function
  *	@newlink: Function for configuring and registering a new device
@@ -63,6 +66,11 @@ struct rtnl_link_ops {
 	const char		*kind;
 
 	size_t			priv_size;
+	struct net_device	*(*alloc)(struct nlattr *tb[],
+					  const char *ifname,
+					  unsigned char name_assign_type,
+					  unsigned int num_tx_queues,
+					  unsigned int num_rx_queues);
 	void			(*setup)(struct net_device *dev);
 
 	bool			netns_refund;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cd87c7661c72..92c3e43db812 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -376,12 +376,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
 	if (rtnl_link_ops_get(ops->kind))
 		return -EEXIST;
 
-	/* The check for setup is here because if ops
+	/* The check for alloc/setup is here because if ops
 	 * does not have that filled up, it is not possible
 	 * to use the ops for creating device. So do not
 	 * fill up dellink as well. That disables rtnl_dellink.
 	 */
-	if (ops->setup && !ops->dellink)
+	if ((ops->alloc || ops->setup) && !ops->dellink)
 		ops->dellink = unregister_netdevice_queue;
 
 	list_add_tail(&ops->list, &link_ops);
@@ -3165,8 +3165,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
 		return ERR_PTR(-EINVAL);
 	}
 
-	dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
-			       ops->setup, num_tx_queues, num_rx_queues);
+	if (ops->alloc) {
+		dev = ops->alloc(tb, ifname, name_assign_type,
+				 num_tx_queues, num_rx_queues);
+		if (IS_ERR(dev))
+			return dev;
+	} else {
+		dev = alloc_netdev_mqs(ops->priv_size, ifname,
+				       name_assign_type, ops->setup,
+				       num_tx_queues, num_rx_queues);
+	}
+
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
@@ -3399,7 +3408,7 @@ replay:
 		return -EOPNOTSUPP;
 	}
 
-	if (!ops->setup)
+	if (!ops->alloc && !ops->setup)
 		return -EOPNOTSUPP;
 
 	if (!ifname[0]) {

From 00e77ed8e64d5f271c1f015c7153545980d48a76 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 10:20:55 +0200
Subject: [PATCH 1589/3292] rtnetlink: add IFLA_PARENT_[DEV|DEV_BUS]_NAME

In some cases, for example in the upcoming WWAN framework changes,
there's no natural "parent netdev", so sometimes dummy netdevs are
created or similar. IFLA_PARENT_DEV_NAME is a new attribute intended to
contain a device (sysfs, struct device) name that can be used instead
when creating a new netdev, if the rtnetlink family implements it.

As suggested by Parav Pandit, we also introduce IFLA_PARENT_DEV_BUS_NAME
attribute in order to uniquely identify a device on the system (with
bus/name pair).

ip-link(8) support for the generic parent device attributes will help
us avoid code duplication, so no other link type will require a custom
code to handle the parent name attribute. E.g. the WWAN interface
creation command will looks like this:

$ ip link add wwan0-1 parent-dev wwan0 type wwan channel-id 1

So, some future subsystem (or driver) FOO will have an interface
creation command that looks like this:

$ ip link add foo1-3 parent-dev foo1 type foo bar-id 3 baz-type Y

Below is an example of dumping link info of a random device with these
new attributes:

$ ip --details link show wlp0s20f3
  4: wlp0s20f3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue
     state UP mode DORMANT group default qlen 1000
     ...
     parent_bus pci parent_dev 0000:00:14.3

Co-developed-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Co-developed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Suggested-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h |  7 +++++++
 net/core/rtnetlink.c         | 10 ++++++++++
 2 files changed, 17 insertions(+)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index a5a7f0e64865..4882e81514b6 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -341,6 +341,13 @@ enum {
 	IFLA_ALT_IFNAME, /* Alternative ifname */
 	IFLA_PERM_ADDRESS,
 	IFLA_PROTO_DOWN_REASON,
+
+	/* device (sysfs) name as parent, used instead
+	 * of IFLA_LINK where there's no parent netdev
+	 */
+	IFLA_PARENT_DEV_NAME,
+	IFLA_PARENT_DEV_BUS_NAME,
+
 	__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 92c3e43db812..170e97f3b3c6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1821,6 +1821,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	if (rtnl_fill_prop_list(skb, dev))
 		goto nla_put_failure;
 
+	if (dev->dev.parent &&
+	    nla_put_string(skb, IFLA_PARENT_DEV_NAME,
+			   dev_name(dev->dev.parent)))
+		goto nla_put_failure;
+
+	if (dev->dev.parent && dev->dev.parent->bus &&
+	    nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME,
+			   dev->dev.parent->bus->name))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 

From 88b710532e53de2466d1033fb1d5125aabf3215a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 10:20:56 +0200
Subject: [PATCH 1590/3292] wwan: add interface creation support

Add support to create (and destroy) interfaces via a new
rtnetlink kind "wwan". The responsible driver has to use
the new wwan_register_ops() to make this possible.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 245 ++++++++++++++++++++++++++++++++++-
 include/linux/wwan.h         |  24 ++++
 include/uapi/linux/wwan.h    |  16 +++
 net/core/rtnetlink.c         |   1 +
 4 files changed, 279 insertions(+), 7 deletions(-)
 create mode 100644 include/uapi/linux/wwan.h

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 45a41aee8958..7e728042fc41 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -14,6 +14,8 @@
 #include <linux/types.h>
 #include <linux/termios.h>
 #include <linux/wwan.h>
+#include <net/rtnetlink.h>
+#include <uapi/linux/wwan.h>
 
 /* Maximum number of minors in use */
 #define WWAN_MAX_MINORS		(1 << MINORBITS)
@@ -35,10 +37,16 @@ static int wwan_major;
  *
  * @id: WWAN device unique ID.
  * @dev: Underlying device.
+ * @port_id: Current available port ID to pick.
+ * @ops: wwan device ops
+ * @ops_ctxt: context to pass to ops
  */
 struct wwan_device {
 	unsigned int id;
 	struct device dev;
+	atomic_t port_id;
+	const struct wwan_ops *ops;
+	void *ops_ctxt;
 };
 
 /**
@@ -102,7 +110,8 @@ static const struct device_type wwan_dev_type = {
 
 static int wwan_dev_parent_match(struct device *dev, const void *parent)
 {
-	return (dev->type == &wwan_dev_type && dev->parent == parent);
+	return (dev->type == &wwan_dev_type &&
+		(dev->parent == parent || dev == parent));
 }
 
 static struct wwan_device *wwan_dev_get_by_parent(struct device *parent)
@@ -116,6 +125,23 @@ static struct wwan_device *wwan_dev_get_by_parent(struct device *parent)
 	return to_wwan_dev(dev);
 }
 
+static int wwan_dev_name_match(struct device *dev, const void *name)
+{
+	return dev->type == &wwan_dev_type &&
+	       strcmp(dev_name(dev), name) == 0;
+}
+
+static struct wwan_device *wwan_dev_get_by_name(const char *name)
+{
+	struct device *dev;
+
+	dev = class_find_device(wwan_class, NULL, name, wwan_dev_name_match);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	return to_wwan_dev(dev);
+}
+
 /* This function allocates and registers a new WWAN device OR if a WWAN device
  * already exist for the given parent, it gets a reference and return it.
  * This function is not exported (for now), it is called indirectly via
@@ -180,9 +206,14 @@ static void wwan_remove_dev(struct wwan_device *wwandev)
 	/* WWAN device is created and registered (get+add) along with its first
 	 * child port, and subsequent port registrations only grab a reference
 	 * (get). The WWAN device must then be unregistered (del+put) along with
-	 * its latest port, and reference simply dropped (put) otherwise.
+	 * its last port, and reference simply dropped (put) otherwise. In the
+	 * same fashion, we must not unregister it when the ops are still there.
 	 */
-	ret = device_for_each_child(&wwandev->dev, NULL, is_wwan_child);
+	if (wwandev->ops)
+		ret = 1;
+	else
+		ret = device_for_each_child(&wwandev->dev, NULL, is_wwan_child);
+
 	if (!ret)
 		device_unregister(&wwandev->dev);
 	else
@@ -750,26 +781,226 @@ static const struct file_operations wwan_port_fops = {
 	.llseek = noop_llseek,
 };
 
+/**
+ * wwan_register_ops - register WWAN device ops
+ * @parent: Device to use as parent and shared by all WWAN ports and
+ *	created netdevs
+ * @ops: operations to register
+ * @ctxt: context to pass to operations
+ *
+ * Returns: 0 on success, a negative error code on failure
+ */
+int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
+		      void *ctxt)
+{
+	struct wwan_device *wwandev;
+
+	if (WARN_ON(!parent || !ops))
+		return -EINVAL;
+
+	wwandev = wwan_create_dev(parent);
+	if (!wwandev)
+		return -ENOMEM;
+
+	if (WARN_ON(wwandev->ops)) {
+		wwan_remove_dev(wwandev);
+		return -EBUSY;
+	}
+
+	if (!try_module_get(ops->owner)) {
+		wwan_remove_dev(wwandev);
+		return -ENODEV;
+	}
+
+	wwandev->ops = ops;
+	wwandev->ops_ctxt = ctxt;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wwan_register_ops);
+
+/**
+ * wwan_unregister_ops - remove WWAN device ops
+ * @parent: Device to use as parent and shared by all WWAN ports and
+ *	created netdevs
+ */
+void wwan_unregister_ops(struct device *parent)
+{
+	struct wwan_device *wwandev = wwan_dev_get_by_parent(parent);
+	bool has_ops;
+
+	if (WARN_ON(IS_ERR(wwandev)))
+		return;
+
+	has_ops = wwandev->ops;
+
+	/* put the reference obtained by wwan_dev_get_by_parent(),
+	 * we should still have one (that the owner is giving back
+	 * now) due to the ops being assigned, check that below
+	 * and return if not.
+	 */
+	put_device(&wwandev->dev);
+
+	if (WARN_ON(!has_ops))
+		return;
+
+	module_put(wwandev->ops->owner);
+
+	wwandev->ops = NULL;
+	wwandev->ops_ctxt = NULL;
+	wwan_remove_dev(wwandev);
+}
+EXPORT_SYMBOL_GPL(wwan_unregister_ops);
+
+static int wwan_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
+			      struct netlink_ext_ack *extack)
+{
+	if (!data)
+		return -EINVAL;
+
+	if (!tb[IFLA_PARENT_DEV_NAME])
+		return -EINVAL;
+
+	if (!data[IFLA_WWAN_LINK_ID])
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct device_type wwan_type = { .name = "wwan" };
+
+static struct net_device *wwan_rtnl_alloc(struct nlattr *tb[],
+					  const char *ifname,
+					  unsigned char name_assign_type,
+					  unsigned int num_tx_queues,
+					  unsigned int num_rx_queues)
+{
+	const char *devname = nla_data(tb[IFLA_PARENT_DEV_NAME]);
+	struct wwan_device *wwandev = wwan_dev_get_by_name(devname);
+	struct net_device *dev;
+
+	if (IS_ERR(wwandev))
+		return ERR_CAST(wwandev);
+
+	/* only supported if ops were registered (not just ports) */
+	if (!wwandev->ops) {
+		dev = ERR_PTR(-EOPNOTSUPP);
+		goto out;
+	}
+
+	dev = alloc_netdev_mqs(wwandev->ops->priv_size, ifname, name_assign_type,
+			       wwandev->ops->setup, num_tx_queues, num_rx_queues);
+
+	if (dev) {
+		SET_NETDEV_DEV(dev, &wwandev->dev);
+		SET_NETDEV_DEVTYPE(dev, &wwan_type);
+	}
+
+out:
+	/* release the reference */
+	put_device(&wwandev->dev);
+	return dev;
+}
+
+static int wwan_rtnl_newlink(struct net *src_net, struct net_device *dev,
+			     struct nlattr *tb[], struct nlattr *data[],
+			     struct netlink_ext_ack *extack)
+{
+	struct wwan_device *wwandev = wwan_dev_get_by_parent(dev->dev.parent);
+	u32 link_id = nla_get_u32(data[IFLA_WWAN_LINK_ID]);
+	int ret;
+
+	if (IS_ERR(wwandev))
+		return PTR_ERR(wwandev);
+
+	/* shouldn't have a netdev (left) with us as parent so WARN */
+	if (WARN_ON(!wwandev->ops)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (wwandev->ops->newlink)
+		ret = wwandev->ops->newlink(wwandev->ops_ctxt, dev,
+					    link_id, extack);
+	else
+		ret = register_netdevice(dev);
+
+out:
+	/* release the reference */
+	put_device(&wwandev->dev);
+	return ret;
+}
+
+static void wwan_rtnl_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct wwan_device *wwandev = wwan_dev_get_by_parent(dev->dev.parent);
+
+	if (IS_ERR(wwandev))
+		return;
+
+	/* shouldn't have a netdev (left) with us as parent so WARN */
+	if (WARN_ON(!wwandev->ops))
+		goto out;
+
+	if (wwandev->ops->dellink)
+		wwandev->ops->dellink(wwandev->ops_ctxt, dev, head);
+	else
+		unregister_netdevice(dev);
+
+out:
+	/* release the reference */
+	put_device(&wwandev->dev);
+}
+
+static const struct nla_policy wwan_rtnl_policy[IFLA_WWAN_MAX + 1] = {
+	[IFLA_WWAN_LINK_ID] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = {
+	.kind = "wwan",
+	.maxtype = __IFLA_WWAN_MAX,
+	.alloc = wwan_rtnl_alloc,
+	.validate = wwan_rtnl_validate,
+	.newlink = wwan_rtnl_newlink,
+	.dellink = wwan_rtnl_dellink,
+	.policy = wwan_rtnl_policy,
+};
+
 static int __init wwan_init(void)
 {
+	int err;
+
+	err = rtnl_link_register(&wwan_rtnl_link_ops);
+	if (err)
+		return err;
+
 	wwan_class = class_create(THIS_MODULE, "wwan");
-	if (IS_ERR(wwan_class))
-		return PTR_ERR(wwan_class);
+	if (IS_ERR(wwan_class)) {
+		err = PTR_ERR(wwan_class);
+		goto unregister;
+	}
 
 	/* chrdev used for wwan ports */
 	wwan_major = __register_chrdev(0, 0, WWAN_MAX_MINORS, "wwan_port",
 				       &wwan_port_fops);
 	if (wwan_major < 0) {
-		class_destroy(wwan_class);
-		return wwan_major;
+		err = wwan_major;
+		goto destroy;
 	}
 
 	return 0;
+
+destroy:
+	class_destroy(wwan_class);
+unregister:
+	rtnl_link_unregister(&wwan_rtnl_link_ops);
+	return err;
 }
 
 static void __exit wwan_exit(void)
 {
 	__unregister_chrdev(wwan_major, 0, WWAN_MAX_MINORS, "wwan_port");
+	rtnl_link_unregister(&wwan_rtnl_link_ops);
 	class_destroy(wwan_class);
 }
 
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index fa33cc16d931..430a3a0817de 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -7,6 +7,7 @@
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <linux/netlink.h>
 
 /**
  * enum wwan_port_type - WWAN port types
@@ -116,4 +117,27 @@ void wwan_port_txon(struct wwan_port *port);
  */
 void *wwan_port_get_drvdata(struct wwan_port *port);
 
+/**
+ * struct wwan_ops - WWAN device ops
+ * @owner: module owner of the WWAN ops
+ * @priv_size: size of private netdev data area
+ * @setup: set up a new netdev
+ * @newlink: register the new netdev
+ * @dellink: remove the given netdev
+ */
+struct wwan_ops {
+	struct module *owner;
+	unsigned int priv_size;
+	void (*setup)(struct net_device *dev);
+	int (*newlink)(void *ctxt, struct net_device *dev,
+		       u32 if_id, struct netlink_ext_ack *extack);
+	void (*dellink)(void *ctxt, struct net_device *dev,
+			struct list_head *head);
+};
+
+int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
+		      void *ctxt);
+
+void wwan_unregister_ops(struct device *parent);
+
 #endif /* __WWAN_H */
diff --git a/include/uapi/linux/wwan.h b/include/uapi/linux/wwan.h
new file mode 100644
index 000000000000..32a2720b4d11
--- /dev/null
+++ b/include/uapi/linux/wwan.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2021 Intel Corporation.
+ */
+#ifndef _UAPI_WWAN_H_
+#define _UAPI_WWAN_H_
+
+enum {
+	IFLA_WWAN_UNSPEC,
+	IFLA_WWAN_LINK_ID, /* u32 */
+
+	__IFLA_WWAN_MAX
+};
+#define IFLA_WWAN_MAX (__IFLA_WWAN_MAX - 1)
+
+#endif /* _UAPI_WWAN_H_ */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 170e97f3b3c6..5baa86bca876 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1890,6 +1890,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_PERM_ADDRESS]	= { .type = NLA_REJECT },
 	[IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
 	[IFLA_NEW_IFINDEX]	= NLA_POLICY_MIN(NLA_S32, 1),
+	[IFLA_PARENT_DEV_NAME]	= { .type = NLA_NUL_STRING },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {

From 13adac032982c61bb590669e8e87e51558917ca1 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Sat, 12 Jun 2021 10:20:57 +0200
Subject: [PATCH 1591/3292] net: mhi_net: Register wwan_ops for link creation

Register wwan_ops for link management via wwan rtnetlink. This is
only basic support for now, since we only support creating one
single link (link-0), but is useful to validate new wwan rtnetlink
interface.

For backward compatibity support, we still register a default netdev
at probe time, except if 'create_default_iface' module parameter is
set to false.

This has been tested with iproute2 and mbimcli:
$ ip link add dev wwan0-0 parentdev-name wwan0 type wwan linkid 0
$ mbimcli -p -d /dev/wwan0p2MBIM --connect apn=free
$ ip link set dev wwan0-0 up
$ ip addr add dev wwan0 ${IP}
$ ip route replace default via ${IP}
$ ping 8.8.8.8
...

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig   |   1 +
 drivers/net/mhi/net.c | 125 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 102 insertions(+), 24 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 4da68ba8448f..30d6e2f7686e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -431,6 +431,7 @@ config VSOCKMON
 config MHI_NET
 	tristate "MHI network driver"
 	depends on MHI_BUS
+	select WWAN_CORE
 	help
 	  This is the network driver for MHI bus.  It can be used with
 	  QCOM based WWAN modems (like SDX55).  Say Y or M.
diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index 0d8293a47a56..64af1e518484 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -11,6 +11,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/u64_stats_sync.h>
+#include <linux/wwan.h>
 
 #include "mhi.h"
 
@@ -18,6 +19,12 @@
 #define MHI_NET_MAX_MTU		0xffff
 #define MHI_NET_DEFAULT_MTU	0x4000
 
+/* When set to false, the default netdev (link 0) is not created, and it's up
+ * to user to create the link (via wwan rtnetlink).
+ */
+static bool create_default_iface = true;
+module_param(create_default_iface, bool, 0);
+
 struct mhi_device_info {
 	const char *netname;
 	const struct mhi_net_proto *proto;
@@ -295,32 +302,33 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
 		schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
 }
 
-static struct device_type wwan_type = {
-	.name = "wwan",
-};
-
-static int mhi_net_probe(struct mhi_device *mhi_dev,
-			 const struct mhi_device_id *id)
+static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
+			   struct netlink_ext_ack *extack)
 {
-	const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
-	struct device *dev = &mhi_dev->dev;
+	const struct mhi_device_info *info;
+	struct mhi_device *mhi_dev = ctxt;
 	struct mhi_net_dev *mhi_netdev;
-	struct net_device *ndev;
 	int err;
 
-	ndev = alloc_netdev(sizeof(*mhi_netdev), info->netname,
-			    NET_NAME_PREDICTABLE, mhi_net_setup);
-	if (!ndev)
-		return -ENOMEM;
+	info = (struct mhi_device_info *)mhi_dev->id->driver_data;
+
+	/* For now we only support one link (link context 0), driver must be
+	 * reworked to break 1:1 relationship for net MBIM and to forward setup
+	 * call to rmnet(QMAP) otherwise.
+	 */
+	if (if_id != 0)
+		return -EINVAL;
+
+	if (dev_get_drvdata(&mhi_dev->dev))
+		return -EBUSY;
 
 	mhi_netdev = netdev_priv(ndev);
-	dev_set_drvdata(dev, mhi_netdev);
+
+	dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
 	mhi_netdev->ndev = ndev;
 	mhi_netdev->mdev = mhi_dev;
 	mhi_netdev->skbagg_head = NULL;
 	mhi_netdev->proto = info->proto;
-	SET_NETDEV_DEV(ndev, &mhi_dev->dev);
-	SET_NETDEV_DEVTYPE(ndev, &wwan_type);
 
 	INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
 	u64_stats_init(&mhi_netdev->stats.rx_syncp);
@@ -334,7 +342,10 @@ static int mhi_net_probe(struct mhi_device *mhi_dev,
 	/* Number of transfer descriptors determines size of the queue */
 	mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
 
-	err = register_netdev(ndev);
+	if (extack)
+		err = register_netdevice(ndev);
+	else
+		err = register_netdev(ndev);
 	if (err)
 		goto out_err;
 
@@ -347,23 +358,89 @@ static int mhi_net_probe(struct mhi_device *mhi_dev,
 	return 0;
 
 out_err_proto:
-	unregister_netdev(ndev);
+	unregister_netdevice(ndev);
 out_err:
 	free_netdev(ndev);
 	return err;
 }
 
+static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
+			    struct list_head *head)
+{
+	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+	struct mhi_device *mhi_dev = ctxt;
+
+	if (head)
+		unregister_netdevice_queue(ndev, head);
+	else
+		unregister_netdev(ndev);
+
+	mhi_unprepare_from_transfer(mhi_dev);
+
+	kfree_skb(mhi_netdev->skbagg_head);
+
+	dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+const struct wwan_ops mhi_wwan_ops = {
+	.owner = THIS_MODULE,
+	.priv_size = sizeof(struct mhi_net_dev),
+	.setup = mhi_net_setup,
+	.newlink = mhi_net_newlink,
+	.dellink = mhi_net_dellink,
+};
+
+static int mhi_net_probe(struct mhi_device *mhi_dev,
+			 const struct mhi_device_id *id)
+{
+	const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
+	struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+	struct net_device *ndev;
+	int err;
+
+	err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev);
+	if (err)
+		return err;
+
+	if (!create_default_iface)
+		return 0;
+
+	/* Create a default interface which is used as either RMNET real-dev,
+	 * MBIM link 0 or ip link 0)
+	 */
+	ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
+			    NET_NAME_PREDICTABLE, mhi_net_setup);
+	if (!ndev) {
+		err = -ENOMEM;
+		goto err_unregister;
+	}
+
+	SET_NETDEV_DEV(ndev, &mhi_dev->dev);
+
+	err = mhi_net_newlink(mhi_dev, ndev, 0, NULL);
+	if (err)
+		goto err_release;
+
+	return 0;
+
+err_release:
+	free_netdev(ndev);
+err_unregister:
+	wwan_unregister_ops(&cntrl->mhi_dev->dev);
+
+	return err;
+}
+
 static void mhi_net_remove(struct mhi_device *mhi_dev)
 {
 	struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+	struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
 
-	unregister_netdev(mhi_netdev->ndev);
+	/* rtnetlink takes care of removing remaining links */
+	wwan_unregister_ops(&cntrl->mhi_dev->dev);
 
-	mhi_unprepare_from_transfer(mhi_netdev->mdev);
-
-	kfree_skb(mhi_netdev->skbagg_head);
-
-	free_netdev(mhi_netdev->ndev);
+	if (create_default_iface)
+		mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL);
 }
 
 static const struct mhi_device_info mhi_hwip0 = {

From 0e6af897fcd9c154c06f239669401c64da52d84e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:29 -0500
Subject: [PATCH 1592/3292] net: qualcomm: rmnet: remove some local variables

In rmnet_map_ipv4_dl_csum_trailer(), remove the "csum_temp" and
"addend" local variables, and simplify a few lines of code.

Remove the "csum_temp", "csum_value", "ip6_hdr_csum", and "addend"
local variables in rmnet_map_ipv6_dl_csum_trailer(), and simplify a
few lines of code there as well.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 37 +++++++------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index d4d23ab446ef..3e6feef0fd25 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -35,10 +35,9 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 {
 	struct iphdr *ip4h = (struct iphdr *)skb->data;
 	void *txporthdr = skb->data + ip4h->ihl * 4;
-	__sum16 *csum_field, csum_temp, pseudo_csum;
+	__sum16 *csum_field, pseudo_csum;
 	__sum16 ip_payload_csum;
-	u16 csum_value_final;
-	__be16 addend;
+	__sum16 csum_value_final;
 
 	/* Computing the checksum over just the IPv4 header--including its
 	 * checksum field--should yield 0.  If it doesn't, the IP header
@@ -83,14 +82,11 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	pseudo_csum = ~csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
 					 ntohs(ip4h->tot_len) - ip4h->ihl * 4,
 					 ip4h->protocol, 0);
-	addend = (__force __be16)pseudo_csum;
-	pseudo_csum = csum16_add(ip_payload_csum, addend);
+	pseudo_csum = csum16_add(ip_payload_csum, (__force __be16)pseudo_csum);
 
-	addend = (__force __be16)*csum_field;
-	csum_temp = ~csum16_sub(pseudo_csum, addend);
-	csum_value_final = (__force u16)csum_temp;
+	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
 
-	if (unlikely(csum_value_final == 0)) {
+	if (unlikely(!csum_value_final)) {
 		switch (ip4h->protocol) {
 		case IPPROTO_UDP:
 			/* RFC 768 - DL4 1's complement rule for UDP csum 0 */
@@ -105,7 +101,7 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 		}
 	}
 
-	if (csum_value_final == (__force u16)*csum_field) {
+	if (csum_value_final == *csum_field) {
 		priv->stats.csum_ok++;
 		return 0;
 	} else {
@@ -122,12 +118,10 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 {
 	struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data;
 	void *txporthdr = skb->data + sizeof(*ip6h);
-	__sum16 *csum_field, pseudo_csum, csum_temp;
-	__be16 ip6_hdr_csum, addend;
+	__sum16 *csum_field, pseudo_csum;
 	__sum16 ip6_payload_csum;
 	__be16 ip_header_csum;
-	u16 csum_value_final;
-	__be16 csum_value;
+	__sum16 csum_value_final;
 	u32 length;
 
 	/* Checksum offload is only supported for UDP and TCP protocols;
@@ -145,23 +139,18 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	 * of the IP header from the trailer checksum.  We then add the
 	 * checksum computed over the pseudo header.
 	 */
-	csum_value = ~csum_trailer->csum_value;
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
-	ip6_hdr_csum = (__force __be16)~ip_header_csum;
-	ip6_payload_csum = csum16_sub((__force __sum16)csum_value,
-				      ip6_hdr_csum);
+	ip6_payload_csum = csum16_sub((__force __sum16)~csum_trailer->csum_value,
+				      ~ip_header_csum);
 
 	length = (ip6h->nexthdr == IPPROTO_UDP) ?
 		 ntohs(((struct udphdr *)txporthdr)->len) :
 		 ntohs(ip6h->payload_len);
 	pseudo_csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
 				       length, ip6h->nexthdr, 0);
-	addend = (__force __be16)pseudo_csum;
-	pseudo_csum = csum16_add(ip6_payload_csum, addend);
+	pseudo_csum = csum16_add(ip6_payload_csum, (__force __be16)pseudo_csum);
 
-	addend = (__force __be16)*csum_field;
-	csum_temp = ~csum16_sub(pseudo_csum, addend);
-	csum_value_final = (__force u16)csum_temp;
+	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
 
 	if (unlikely(csum_value_final == 0)) {
 		switch (ip6h->nexthdr) {
@@ -180,7 +169,7 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 		}
 	}
 
-	if (csum_value_final == (__force u16)*csum_field) {
+	if (csum_value_final == *csum_field) {
 		priv->stats.csum_ok++;
 		return 0;
 	} else {

From bbd45f10ed0e032b599973d56d5c221266cf2ccf Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:30 -0500
Subject: [PATCH 1593/3292] net: qualcomm: rmnet: rearrange some NOTs

With the ones' complement arithmetic, the sum of two negated values
is equal to the negation of the sum of the two original values [1].
Rearrange the calculation ip6_payload_sum using this property.

[1] https://tools.ietf.org/html/rfc1071

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 3e6feef0fd25..1b170e9189d8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -140,8 +140,8 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	 * checksum computed over the pseudo header.
 	 */
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
-	ip6_payload_csum = csum16_sub((__force __sum16)~csum_trailer->csum_value,
-				      ~ip_header_csum);
+	ip6_payload_csum = ~csum16_sub((__force __sum16)csum_trailer->csum_value,
+				       ip_header_csum);
 
 	length = (ip6h->nexthdr == IPPROTO_UDP) ?
 		 ntohs(((struct udphdr *)txporthdr)->len) :

From 9d0407bc4c9ca7a2378230dd86f8ce90a2b6ad09 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:31 -0500
Subject: [PATCH 1594/3292] net: qualcomm: rmnet: show that an intermediate sum
 is zero

This patch simply demonstrates that a checksum value computed when
verifying an offloaded transport checksum value for both IPv4 and
IPv6 is (normally) 0.  It can be squashed into the next patch.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 1b170e9189d8..51909b8fa8a8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -84,6 +84,11 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 					 ip4h->protocol, 0);
 	pseudo_csum = csum16_add(ip_payload_csum, (__force __be16)pseudo_csum);
 
+	/* The trailer checksum *includes* the checksum in the transport
+	 * header.  Adding that to the pseudo checksum will yield 0xffff
+	 * ("negative 0") if the message arrived intact.
+	 */
+	WARN_ON((__sum16)~pseudo_csum);
 	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
 
 	if (unlikely(!csum_value_final)) {
@@ -150,6 +155,10 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 				       length, ip6h->nexthdr, 0);
 	pseudo_csum = csum16_add(ip6_payload_csum, (__force __be16)pseudo_csum);
 
+	/* Adding the payload checksum to the pseudo checksum yields 0xffff
+	 * ("negative 0") if the message arrived intact.
+	 */
+	WARN_ON((__sum16)~pseudo_csum);
 	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
 
 	if (unlikely(csum_value_final == 0)) {

From fab01a6f3a61748b9c2e038c84498c6624b06236 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:32 -0500
Subject: [PATCH 1595/3292] net: qualcomm: rmnet: return earlier for bad
 checksum

In rmnet_map_ipv4_dl_csum_trailer(), if the sum of the trailer
checksum and the pseudo checksum is non-zero, checksum validation
has failed.  We can return an error as soon as we know that.

We can do the same thing in rmnet_map_ipv6_dl_csum_trailer().

Add some comments that explain where we're headed.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 36 ++++++++++++++-----
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 51909b8fa8a8..a05124eb8602 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -76,6 +76,17 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	 * We verified above that the IP header contributes zero to the
 	 * trailer checksum.  Therefore the checksum in the trailer is
 	 * just the checksum computed over the IP payload.
+
+	 * If the IP payload arrives intact, adding the pseudo header
+	 * checksum to the IP payload checksum will yield 0xffff (negative
+	 * zero).  This means the trailer checksum and the pseudo checksum
+	 * are additive inverses of each other.  Put another way, the
+	 * message passes the checksum test if the trailer checksum value
+	 * is the negated pseudo header checksum.
+	 *
+	 * Knowing this, we don't even need to examine the transport
+	 * header checksum value; it is already accounted for in the
+	 * checksum value found in the trailer.
 	 */
 	ip_payload_csum = (__force __sum16)~csum_trailer->csum_value;
 
@@ -84,11 +95,11 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 					 ip4h->protocol, 0);
 	pseudo_csum = csum16_add(ip_payload_csum, (__force __be16)pseudo_csum);
 
-	/* The trailer checksum *includes* the checksum in the transport
-	 * header.  Adding that to the pseudo checksum will yield 0xffff
-	 * ("negative 0") if the message arrived intact.
-	 */
-	WARN_ON((__sum16)~pseudo_csum);
+	/* The cast is required to ensure only the low 16 bits are examined */
+	if ((__sum16)~pseudo_csum) {
+		priv->stats.csum_validation_failed++;
+		return -EINVAL;
+	}
 	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
 
 	if (unlikely(!csum_value_final)) {
@@ -143,6 +154,11 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	 * transport checksum from this, we first subract the contribution
 	 * of the IP header from the trailer checksum.  We then add the
 	 * checksum computed over the pseudo header.
+	 *
+	 * It's sufficient to compare the IP payload checksum with the
+	 * negated pseudo checksum to determine whether the packet
+	 * checksum was good.  (See further explanation in comments
+	 * in rmnet_map_ipv4_dl_csum_trailer()).
 	 */
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
 	ip6_payload_csum = ~csum16_sub((__force __sum16)csum_trailer->csum_value,
@@ -155,10 +171,12 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 				       length, ip6h->nexthdr, 0);
 	pseudo_csum = csum16_add(ip6_payload_csum, (__force __be16)pseudo_csum);
 
-	/* Adding the payload checksum to the pseudo checksum yields 0xffff
-	 * ("negative 0") if the message arrived intact.
-	 */
-	WARN_ON((__sum16)~pseudo_csum);
+	/* The cast is required to ensure only the low 16 bits are examined */
+	if ((__sum16)~pseudo_csum) {
+		priv->stats.csum_validation_failed++;
+		return -EINVAL;
+	}
+
 	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
 
 	if (unlikely(csum_value_final == 0)) {

From 698aa6c46bf09070310cd2c8893ea2de5a796644 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:33 -0500
Subject: [PATCH 1596/3292] net: qualcomm: rmnet: remove unneeded code

The previous patch makes rmnet_map_ipv4_dl_csum_trailer() return
early with an error if it is determined that the computed checksum
for the IP payload does not match what was expected.

If the computed checksum *does* match the expected value, the IP
payload (i.e., the transport message), can be considered good.
There is no need to do any further processing of the message.

This means a big block of code is unnecessary for validating the
transport checksum value, and can be removed.

Make comparable changes in rmnet_map_ipv6_dl_csum_trailer().

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  | 75 ++++---------------
 1 file changed, 14 insertions(+), 61 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index a05124eb8602..033b8ad3d735 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -37,7 +37,6 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	void *txporthdr = skb->data + ip4h->ihl * 4;
 	__sum16 *csum_field, pseudo_csum;
 	__sum16 ip_payload_csum;
-	__sum16 csum_value_final;
 
 	/* Computing the checksum over just the IPv4 header--including its
 	 * checksum field--should yield 0.  If it doesn't, the IP header
@@ -93,37 +92,15 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	pseudo_csum = ~csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
 					 ntohs(ip4h->tot_len) - ip4h->ihl * 4,
 					 ip4h->protocol, 0);
-	pseudo_csum = csum16_add(ip_payload_csum, (__force __be16)pseudo_csum);
 
 	/* The cast is required to ensure only the low 16 bits are examined */
-	if ((__sum16)~pseudo_csum) {
+	if (ip_payload_csum != (__sum16)~pseudo_csum) {
 		priv->stats.csum_validation_failed++;
 		return -EINVAL;
 	}
-	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
 
-	if (unlikely(!csum_value_final)) {
-		switch (ip4h->protocol) {
-		case IPPROTO_UDP:
-			/* RFC 768 - DL4 1's complement rule for UDP csum 0 */
-			csum_value_final = ~csum_value_final;
-			break;
-
-		case IPPROTO_TCP:
-			/* DL4 Non-RFC compliant TCP checksum found */
-			if (*csum_field == (__force __sum16)0xFFFF)
-				csum_value_final = ~csum_value_final;
-			break;
-		}
-	}
-
-	if (csum_value_final == *csum_field) {
-		priv->stats.csum_ok++;
-		return 0;
-	} else {
-		priv->stats.csum_validation_failed++;
-		return -EINVAL;
-	}
+	priv->stats.csum_ok++;
+	return 0;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -137,7 +114,6 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	__sum16 *csum_field, pseudo_csum;
 	__sum16 ip6_payload_csum;
 	__be16 ip_header_csum;
-	__sum16 csum_value_final;
 	u32 length;
 
 	/* Checksum offload is only supported for UDP and TCP protocols;
@@ -154,11 +130,6 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	 * transport checksum from this, we first subract the contribution
 	 * of the IP header from the trailer checksum.  We then add the
 	 * checksum computed over the pseudo header.
-	 *
-	 * It's sufficient to compare the IP payload checksum with the
-	 * negated pseudo checksum to determine whether the packet
-	 * checksum was good.  (See further explanation in comments
-	 * in rmnet_map_ipv4_dl_csum_trailer()).
 	 */
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
 	ip6_payload_csum = ~csum16_sub((__force __sum16)csum_trailer->csum_value,
@@ -169,40 +140,22 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 		 ntohs(ip6h->payload_len);
 	pseudo_csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
 				       length, ip6h->nexthdr, 0);
-	pseudo_csum = csum16_add(ip6_payload_csum, (__force __be16)pseudo_csum);
 
-	/* The cast is required to ensure only the low 16 bits are examined */
-	if ((__sum16)~pseudo_csum) {
+	/* It's sufficient to compare the IP payload checksum with the
+	 * negated pseudo checksum to determine whether the packet
+	 * checksum was good.  (See further explanation in comments
+	 * in rmnet_map_ipv4_dl_csum_trailer()).
+	 *
+	 * The cast is required to ensure only the low 16 bits are
+	 * examined.
+	 */
+	if (ip6_payload_csum != (__sum16)~pseudo_csum) {
 		priv->stats.csum_validation_failed++;
 		return -EINVAL;
 	}
 
-	csum_value_final = ~csum16_sub(pseudo_csum, (__force __be16)*csum_field);
-
-	if (unlikely(csum_value_final == 0)) {
-		switch (ip6h->nexthdr) {
-		case IPPROTO_UDP:
-			/* RFC 2460 section 8.1
-			 * DL6 One's complement rule for UDP checksum 0
-			 */
-			csum_value_final = ~csum_value_final;
-			break;
-
-		case IPPROTO_TCP:
-			/* DL6 Non-RFC compliant TCP checksum found */
-			if (*csum_field == (__force __sum16)0xFFFF)
-				csum_value_final = ~csum_value_final;
-			break;
-		}
-	}
-
-	if (csum_value_final == *csum_field) {
-		priv->stats.csum_ok++;
-		return 0;
-	} else {
-		priv->stats.csum_validation_failed++;
-		return -EINVAL;
-	}
+	priv->stats.csum_ok++;
+	return 0;
 }
 #endif
 

From be754f6435936e78dafe0ebb9d1e9d52c3bde842 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:34 -0500
Subject: [PATCH 1597/3292] net: qualcomm: rmnet: trailer value is a checksum

The csum_value field in the rmnet_map_dl_csum_trailer structure is a
"real" Internet checksum.  It is a 16 bit value, in big endian format,
which represents an inverted ones' complement sum over pairs of bytes.

Make that clear by changing its type to __sum16.

This makes a typecast in rmnet_map_ipv4_dl_csum_trailer() and
another in rmnet_map_ipv6_dl_csum_trailer() unnecessary.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 5 ++---
 include/linux/if_rmnet.h                             | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 033b8ad3d735..610c8b5a8f46 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -87,7 +87,7 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	 * header checksum value; it is already accounted for in the
 	 * checksum value found in the trailer.
 	 */
-	ip_payload_csum = (__force __sum16)~csum_trailer->csum_value;
+	ip_payload_csum = ~csum_trailer->csum_value;
 
 	pseudo_csum = ~csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
 					 ntohs(ip4h->tot_len) - ip4h->ihl * 4,
@@ -132,8 +132,7 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	 * checksum computed over the pseudo header.
 	 */
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
-	ip6_payload_csum = ~csum16_sub((__force __sum16)csum_trailer->csum_value,
-				       ip_header_csum);
+	ip6_payload_csum = ~csum16_sub(csum_trailer->csum_value, ip_header_csum);
 
 	length = (ip6h->nexthdr == IPPROTO_UDP) ?
 		 ntohs(((struct udphdr *)txporthdr)->len) :
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index be17610a981e..10e7521ecb6c 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -25,7 +25,7 @@ struct rmnet_map_dl_csum_trailer {
 	u8 flags;			/* MAP_CSUM_DL_VALID_FLAG */
 	__be16 csum_start_offset;
 	__be16 csum_length;
-	__be16 csum_value;
+	__sum16 csum_value;
 } __aligned(1);
 
 /* rmnet_map_dl_csum_trailer flags field:

From 411a795e14b1fcbf64bc9ef6869d2bf9a5bf3c9a Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:35 -0500
Subject: [PATCH 1598/3292] net: qualcomm: rmnet: drop some unary NOTs

We compare a payload checksum with a pseudo checksum value for
equality in rmnet_map_ipv4_dl_csum_trailer().  Both of those values
are computed with a unary NOT (~) operation.  The result of the
comparison is the same if we omit that NOT for both values.

Remove these operations in rmnet_map_ipv6_dl_csum_trailer() also.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 610c8b5a8f46..ed4737d0043d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -87,11 +87,11 @@ rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
 	 * header checksum value; it is already accounted for in the
 	 * checksum value found in the trailer.
 	 */
-	ip_payload_csum = ~csum_trailer->csum_value;
+	ip_payload_csum = csum_trailer->csum_value;
 
-	pseudo_csum = ~csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
-					 ntohs(ip4h->tot_len) - ip4h->ihl * 4,
-					 ip4h->protocol, 0);
+	pseudo_csum = csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
+					ntohs(ip4h->tot_len) - ip4h->ihl * 4,
+					ip4h->protocol, 0);
 
 	/* The cast is required to ensure only the low 16 bits are examined */
 	if (ip_payload_csum != (__sum16)~pseudo_csum) {
@@ -132,13 +132,13 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	 * checksum computed over the pseudo header.
 	 */
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
-	ip6_payload_csum = ~csum16_sub(csum_trailer->csum_value, ip_header_csum);
+	ip6_payload_csum = csum16_sub(csum_trailer->csum_value, ip_header_csum);
 
 	length = (ip6h->nexthdr == IPPROTO_UDP) ?
 		 ntohs(((struct udphdr *)txporthdr)->len) :
 		 ntohs(ip6h->payload_len);
-	pseudo_csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-				       length, ip6h->nexthdr, 0);
+	pseudo_csum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				      length, ip6h->nexthdr, 0);
 
 	/* It's sufficient to compare the IP payload checksum with the
 	 * negated pseudo checksum to determine whether the packet

From 185a108fe0429ddde6388d5a85d701a39beadfec Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 12 Jun 2021 09:37:36 -0500
Subject: [PATCH 1599/3292] net: qualcomm: rmnet: IPv6 payload length is simple

We don't support any extension headers for IPv6 packets.  Extension
headers therefore contribute 0 bytes to the payload length.  As a
result we can just use the IPv6 payload length as the length used to
compute the pseudo header checksum for both UDP and TCP messages.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index ed4737d0043d..a6ce22f60a00 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -114,7 +114,6 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	__sum16 *csum_field, pseudo_csum;
 	__sum16 ip6_payload_csum;
 	__be16 ip_header_csum;
-	u32 length;
 
 	/* Checksum offload is only supported for UDP and TCP protocols;
 	 * the packet cannot include any IPv6 extension headers
@@ -134,11 +133,9 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	ip_header_csum = (__force __be16)ip_fast_csum(ip6h, sizeof(*ip6h) / 4);
 	ip6_payload_csum = csum16_sub(csum_trailer->csum_value, ip_header_csum);
 
-	length = (ip6h->nexthdr == IPPROTO_UDP) ?
-		 ntohs(((struct udphdr *)txporthdr)->len) :
-		 ntohs(ip6h->payload_len);
 	pseudo_csum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-				      length, ip6h->nexthdr, 0);
+				      ntohs(ip6h->payload_len),
+				      ip6h->nexthdr, 0);
 
 	/* It's sufficient to compare the IP payload checksum with the
 	 * negated pseudo checksum to determine whether the packet

From 7e98d785ae6184c7580a33619dae8b651769ff08 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:08 +0530
Subject: [PATCH 1600/3292] net: iosm: entry point

1) Register IOSM driver with kernel to manage Intel WWAN PCIe
   device(PCI_VENDOR_ID_INTEL, INTEL_CP_DEVICE_7560_ID).
2) Exposes the EP PCIe device capability to Host PCIe core.
3) Initializes PCIe EP configuration and defines PCIe driver probe, remove
   and power management OPS.
4) Allocate and map(dma) skb memory for data communication from device to
   kernel and vice versa.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_pcie.c | 579 ++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_pcie.h | 209 ++++++++++
 2 files changed, 788 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_pcie.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_pcie.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
new file mode 100644
index 000000000000..ac6baddfde61
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/module.h>
+#include <net/rtnetlink.h>
+
+#include "iosm_ipc_imem.h"
+#include "iosm_ipc_pcie.h"
+#include "iosm_ipc_protocol.h"
+
+MODULE_DESCRIPTION("IOSM Driver");
+MODULE_LICENSE("GPL v2");
+
+/* WWAN GUID */
+static guid_t wwan_acpi_guid = GUID_INIT(0xbad01b75, 0x22a8, 0x4f48, 0x87, 0x92,
+				       0xbd, 0xde, 0x94, 0x67, 0x74, 0x7d);
+
+static void ipc_pcie_resources_release(struct iosm_pcie *ipc_pcie)
+{
+	/* Free the MSI resources. */
+	ipc_release_irq(ipc_pcie);
+
+	/* Free mapped doorbell scratchpad bus memory into CPU space. */
+	iounmap(ipc_pcie->scratchpad);
+
+	/* Free mapped IPC_REGS bus memory into CPU space. */
+	iounmap(ipc_pcie->ipc_regs);
+
+	/* Releases all PCI I/O and memory resources previously reserved by a
+	 * successful call to pci_request_regions.  Call this function only
+	 * after all use of the PCI regions has ceased.
+	 */
+	pci_release_regions(ipc_pcie->pci);
+}
+
+static void ipc_pcie_cleanup(struct iosm_pcie *ipc_pcie)
+{
+	/* Free the shared memory resources. */
+	ipc_imem_cleanup(ipc_pcie->imem);
+
+	ipc_pcie_resources_release(ipc_pcie);
+
+	/* Signal to the system that the PCI device is not in use. */
+	pci_disable_device(ipc_pcie->pci);
+}
+
+static void ipc_pcie_deinit(struct iosm_pcie *ipc_pcie)
+{
+	kfree(ipc_pcie->imem);
+	kfree(ipc_pcie);
+}
+
+static void ipc_pcie_remove(struct pci_dev *pci)
+{
+	struct iosm_pcie *ipc_pcie = pci_get_drvdata(pci);
+
+	ipc_pcie_cleanup(ipc_pcie);
+
+	ipc_pcie_deinit(ipc_pcie);
+}
+
+static int ipc_pcie_resources_request(struct iosm_pcie *ipc_pcie)
+{
+	struct pci_dev *pci = ipc_pcie->pci;
+	u32 cap = 0;
+	u32 ret;
+
+	/* Reserved PCI I/O and memory resources.
+	 * Mark all PCI regions associated with PCI device pci as
+	 * being reserved by owner IOSM_IPC.
+	 */
+	ret = pci_request_regions(pci, "IOSM_IPC");
+	if (ret) {
+		dev_err(ipc_pcie->dev, "failed pci request regions");
+		goto pci_request_region_fail;
+	}
+
+	/* Reserve the doorbell IPC REGS memory resources.
+	 * Remap the memory into CPU space. Arrange for the physical address
+	 * (BAR) to be visible from this driver.
+	 * pci_ioremap_bar() ensures that the memory is marked uncachable.
+	 */
+	ipc_pcie->ipc_regs = pci_ioremap_bar(pci, ipc_pcie->ipc_regs_bar_nr);
+
+	if (!ipc_pcie->ipc_regs) {
+		dev_err(ipc_pcie->dev, "IPC REGS ioremap error");
+		ret = -EBUSY;
+		goto ipc_regs_remap_fail;
+	}
+
+	/* Reserve the MMIO scratchpad memory resources.
+	 * Remap the memory into CPU space. Arrange for the physical address
+	 * (BAR) to be visible from this driver.
+	 * pci_ioremap_bar() ensures that the memory is marked uncachable.
+	 */
+	ipc_pcie->scratchpad =
+		pci_ioremap_bar(pci, ipc_pcie->scratchpad_bar_nr);
+
+	if (!ipc_pcie->scratchpad) {
+		dev_err(ipc_pcie->dev, "doorbell scratchpad ioremap error");
+		ret = -EBUSY;
+		goto scratch_remap_fail;
+	}
+
+	/* Install the irq handler triggered by CP. */
+	ret = ipc_acquire_irq(ipc_pcie);
+	if (ret) {
+		dev_err(ipc_pcie->dev, "acquiring MSI irq failed!");
+		goto irq_acquire_fail;
+	}
+
+	/* Enable bus-mastering for the IOSM IPC device. */
+	pci_set_master(pci);
+
+	/* Enable LTR if possible
+	 * This is needed for L1.2!
+	 */
+	pcie_capability_read_dword(ipc_pcie->pci, PCI_EXP_DEVCAP2, &cap);
+	if (cap & PCI_EXP_DEVCAP2_LTR)
+		pcie_capability_set_word(ipc_pcie->pci, PCI_EXP_DEVCTL2,
+					 PCI_EXP_DEVCTL2_LTR_EN);
+
+	dev_dbg(ipc_pcie->dev, "link between AP and CP is fully on");
+
+	return ret;
+
+irq_acquire_fail:
+	iounmap(ipc_pcie->scratchpad);
+scratch_remap_fail:
+	iounmap(ipc_pcie->ipc_regs);
+ipc_regs_remap_fail:
+	pci_release_regions(pci);
+pci_request_region_fail:
+	return ret;
+}
+
+bool ipc_pcie_check_aspm_enabled(struct iosm_pcie *ipc_pcie,
+				 bool parent)
+{
+	struct pci_dev *pdev;
+	u16 value = 0;
+	u32 enabled;
+
+	if (parent)
+		pdev = ipc_pcie->pci->bus->self;
+	else
+		pdev = ipc_pcie->pci;
+
+	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &value);
+	enabled = value & PCI_EXP_LNKCTL_ASPMC;
+	dev_dbg(ipc_pcie->dev, "ASPM L1: 0x%04X 0x%03X", pdev->device, value);
+
+	return (enabled == PCI_EXP_LNKCTL_ASPM_L1 ||
+		enabled == PCI_EXP_LNKCTL_ASPMC);
+}
+
+bool ipc_pcie_check_data_link_active(struct iosm_pcie *ipc_pcie)
+{
+	struct pci_dev *parent;
+	u16 link_status = 0;
+
+	if (!ipc_pcie->pci->bus || !ipc_pcie->pci->bus->self) {
+		dev_err(ipc_pcie->dev, "root port not found");
+		return false;
+	}
+
+	parent = ipc_pcie->pci->bus->self;
+
+	pcie_capability_read_word(parent, PCI_EXP_LNKSTA, &link_status);
+	dev_dbg(ipc_pcie->dev, "Link status: 0x%04X", link_status);
+
+	return link_status & PCI_EXP_LNKSTA_DLLLA;
+}
+
+static bool ipc_pcie_check_aspm_supported(struct iosm_pcie *ipc_pcie,
+					  bool parent)
+{
+	struct pci_dev *pdev;
+	u32 support;
+	u32 cap = 0;
+
+	if (parent)
+		pdev = ipc_pcie->pci->bus->self;
+	else
+		pdev = ipc_pcie->pci;
+	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &cap);
+	support = u32_get_bits(cap, PCI_EXP_LNKCAP_ASPMS);
+	if (support < PCI_EXP_LNKCTL_ASPM_L1) {
+		dev_dbg(ipc_pcie->dev, "ASPM L1 not supported: 0x%04X",
+			pdev->device);
+		return false;
+	}
+	return true;
+}
+
+void ipc_pcie_config_aspm(struct iosm_pcie *ipc_pcie)
+{
+	bool parent_aspm_enabled, dev_aspm_enabled;
+
+	/* check if both root port and child supports ASPM L1 */
+	if (!ipc_pcie_check_aspm_supported(ipc_pcie, true) ||
+	    !ipc_pcie_check_aspm_supported(ipc_pcie, false))
+		return;
+
+	parent_aspm_enabled = ipc_pcie_check_aspm_enabled(ipc_pcie, true);
+	dev_aspm_enabled = ipc_pcie_check_aspm_enabled(ipc_pcie, false);
+
+	dev_dbg(ipc_pcie->dev, "ASPM parent: %s device: %s",
+		parent_aspm_enabled ? "Enabled" : "Disabled",
+		dev_aspm_enabled ? "Enabled" : "Disabled");
+}
+
+/* Initializes PCIe endpoint configuration */
+static void ipc_pcie_config_init(struct iosm_pcie *ipc_pcie)
+{
+	/* BAR0 is used for doorbell */
+	ipc_pcie->ipc_regs_bar_nr = IPC_DOORBELL_BAR0;
+
+	/* update HW configuration */
+	ipc_pcie->scratchpad_bar_nr = IPC_SCRATCHPAD_BAR2;
+	ipc_pcie->doorbell_reg_offset = IPC_DOORBELL_CH_OFFSET;
+	ipc_pcie->doorbell_write = IPC_WRITE_PTR_REG_0;
+	ipc_pcie->doorbell_capture = IPC_CAPTURE_PTR_REG_0;
+}
+
+/* This will read the BIOS WWAN RTD3 settings:
+ * D0L1.2/D3L2/Disabled
+ */
+static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev)
+{
+	union acpi_object *object;
+	acpi_handle handle_acpi;
+
+	handle_acpi = ACPI_HANDLE(dev);
+	if (!handle_acpi) {
+		pr_debug("pci device is NOT ACPI supporting device\n");
+		goto default_ret;
+	}
+
+	object = acpi_evaluate_dsm(handle_acpi, &wwan_acpi_guid, 0, 3, NULL);
+
+	if (object && object->integer.value == 3)
+		return IPC_PCIE_D3L2;
+
+default_ret:
+	return IPC_PCIE_D0L12;
+}
+
+static int ipc_pcie_probe(struct pci_dev *pci,
+			  const struct pci_device_id *pci_id)
+{
+	struct iosm_pcie *ipc_pcie = kzalloc(sizeof(*ipc_pcie), GFP_KERNEL);
+
+	pr_debug("Probing device 0x%X from the vendor 0x%X", pci_id->device,
+		 pci_id->vendor);
+
+	if (!ipc_pcie)
+		goto ret_fail;
+
+	/* Initialize ipc dbg component for the PCIe device */
+	ipc_pcie->dev = &pci->dev;
+
+	/* Set the driver specific data. */
+	pci_set_drvdata(pci, ipc_pcie);
+
+	/* Save the address of the PCI device configuration. */
+	ipc_pcie->pci = pci;
+
+	/* Update platform configuration */
+	ipc_pcie_config_init(ipc_pcie);
+
+	/* Initialize the device before it is used. Ask low-level code
+	 * to enable I/O and memory. Wake up the device if it was suspended.
+	 */
+	if (pci_enable_device(pci)) {
+		dev_err(ipc_pcie->dev, "failed to enable the AP PCIe device");
+		/* If enable of PCIe device has failed then calling
+		 * ipc_pcie_cleanup will panic the system. More over
+		 * ipc_pcie_cleanup() is required to be called after
+		 * ipc_imem_mount()
+		 */
+		goto pci_enable_fail;
+	}
+
+	ipc_pcie_config_aspm(ipc_pcie);
+	dev_dbg(ipc_pcie->dev, "PCIe device enabled.");
+
+	/* Read WWAN RTD3 BIOS Setting
+	 */
+	ipc_pcie->d3l2_support = ipc_pcie_read_bios_cfg(&pci->dev);
+
+	ipc_pcie->suspend = 0;
+
+	if (ipc_pcie_resources_request(ipc_pcie))
+		goto resources_req_fail;
+
+	/* Establish the link to the imem layer. */
+	ipc_pcie->imem = ipc_imem_init(ipc_pcie, pci->device,
+				       ipc_pcie->scratchpad, ipc_pcie->dev);
+	if (!ipc_pcie->imem) {
+		dev_err(ipc_pcie->dev, "failed to init imem");
+		goto imem_init_fail;
+	}
+
+	return 0;
+
+imem_init_fail:
+	ipc_pcie_resources_release(ipc_pcie);
+resources_req_fail:
+	pci_disable_device(pci);
+pci_enable_fail:
+	kfree(ipc_pcie);
+ret_fail:
+	return -EIO;
+}
+
+static const struct pci_device_id iosm_ipc_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, INTEL_CP_DEVICE_7560_ID) },
+	{}
+};
+
+/* Enter sleep in s2idle case
+ */
+static int __maybe_unused ipc_pcie_suspend_s2idle(struct iosm_pcie *ipc_pcie)
+{
+	ipc_cp_irq_sleep_control(ipc_pcie, IPC_MEM_DEV_PM_FORCE_SLEEP);
+
+	/* Complete all memory stores before setting bit */
+	smp_mb__before_atomic();
+
+	set_bit(0, &ipc_pcie->suspend);
+
+	/* Complete all memory stores after setting bit */
+	smp_mb__after_atomic();
+
+	ipc_imem_pm_s2idle_sleep(ipc_pcie->imem, true);
+
+	return 0;
+}
+
+/* Resume from sleep in s2idle case
+ */
+static int __maybe_unused ipc_pcie_resume_s2idle(struct iosm_pcie *ipc_pcie)
+{
+	ipc_cp_irq_sleep_control(ipc_pcie, IPC_MEM_DEV_PM_FORCE_ACTIVE);
+
+	ipc_imem_pm_s2idle_sleep(ipc_pcie->imem, false);
+
+	/* Complete all memory stores before clearing bit. */
+	smp_mb__before_atomic();
+
+	clear_bit(0, &ipc_pcie->suspend);
+
+	/* Complete all memory stores after clearing bit. */
+	smp_mb__after_atomic();
+	return 0;
+}
+
+int __maybe_unused ipc_pcie_suspend(struct iosm_pcie *ipc_pcie)
+{
+	struct pci_dev *pdev;
+	int ret;
+
+	pdev = ipc_pcie->pci;
+
+	/* Execute D3 one time. */
+	if (pdev->current_state != PCI_D0) {
+		dev_dbg(ipc_pcie->dev, "done for PM=%d", pdev->current_state);
+		return 0;
+	}
+
+	/* The HAL shall ask the shared memory layer whether D3 is allowed. */
+	ipc_imem_pm_suspend(ipc_pcie->imem);
+
+	/* Save the PCI configuration space of a device before suspending. */
+	ret = pci_save_state(pdev);
+
+	if (ret) {
+		dev_err(ipc_pcie->dev, "pci_save_state error=%d", ret);
+		return ret;
+	}
+
+	/* Set the power state of a PCI device.
+	 * Transition a device to a new power state, using the device's PCI PM
+	 * registers.
+	 */
+	ret = pci_set_power_state(pdev, PCI_D3cold);
+
+	if (ret) {
+		dev_err(ipc_pcie->dev, "pci_set_power_state error=%d", ret);
+		return ret;
+	}
+
+	dev_dbg(ipc_pcie->dev, "SUSPEND done");
+	return ret;
+}
+
+int __maybe_unused ipc_pcie_resume(struct iosm_pcie *ipc_pcie)
+{
+	int ret;
+
+	/* Set the power state of a PCI device.
+	 * Transition a device to a new power state, using the device's PCI PM
+	 * registers.
+	 */
+	ret = pci_set_power_state(ipc_pcie->pci, PCI_D0);
+
+	if (ret) {
+		dev_err(ipc_pcie->dev, "pci_set_power_state error=%d", ret);
+		return ret;
+	}
+
+	pci_restore_state(ipc_pcie->pci);
+
+	/* The HAL shall inform the shared memory layer that the device is
+	 * active.
+	 */
+	ipc_imem_pm_resume(ipc_pcie->imem);
+
+	dev_dbg(ipc_pcie->dev, "RESUME done");
+	return ret;
+}
+
+static int __maybe_unused ipc_pcie_suspend_cb(struct device *dev)
+{
+	struct iosm_pcie *ipc_pcie;
+	struct pci_dev *pdev;
+
+	pdev = to_pci_dev(dev);
+
+	ipc_pcie = pci_get_drvdata(pdev);
+
+	switch (ipc_pcie->d3l2_support) {
+	case IPC_PCIE_D0L12:
+		ipc_pcie_suspend_s2idle(ipc_pcie);
+		break;
+	case IPC_PCIE_D3L2:
+		ipc_pcie_suspend(ipc_pcie);
+		break;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused ipc_pcie_resume_cb(struct device *dev)
+{
+	struct iosm_pcie *ipc_pcie;
+	struct pci_dev *pdev;
+
+	pdev = to_pci_dev(dev);
+
+	ipc_pcie = pci_get_drvdata(pdev);
+
+	switch (ipc_pcie->d3l2_support) {
+	case IPC_PCIE_D0L12:
+		ipc_pcie_resume_s2idle(ipc_pcie);
+		break;
+	case IPC_PCIE_D3L2:
+		ipc_pcie_resume(ipc_pcie);
+		break;
+	}
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(iosm_ipc_pm, ipc_pcie_suspend_cb, ipc_pcie_resume_cb);
+
+static struct pci_driver iosm_ipc_driver = {
+	.name = KBUILD_MODNAME,
+	.probe = ipc_pcie_probe,
+	.remove = ipc_pcie_remove,
+	.driver = {
+		.pm = &iosm_ipc_pm,
+	},
+	.id_table = iosm_ipc_ids,
+};
+
+int ipc_pcie_addr_map(struct iosm_pcie *ipc_pcie, unsigned char *data,
+		      size_t size, dma_addr_t *mapping, int direction)
+{
+	if (ipc_pcie->pci) {
+		*mapping = dma_map_single(&ipc_pcie->pci->dev, data, size,
+					  direction);
+		if (dma_mapping_error(&ipc_pcie->pci->dev, *mapping)) {
+			dev_err(ipc_pcie->dev, "dma mapping failed");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+void ipc_pcie_addr_unmap(struct iosm_pcie *ipc_pcie, size_t size,
+			 dma_addr_t mapping, int direction)
+{
+	if (!mapping)
+		return;
+	if (ipc_pcie->pci)
+		dma_unmap_single(&ipc_pcie->pci->dev, mapping, size, direction);
+}
+
+struct sk_buff *ipc_pcie_alloc_local_skb(struct iosm_pcie *ipc_pcie,
+					 gfp_t flags, size_t size)
+{
+	struct sk_buff *skb;
+
+	if (!ipc_pcie || !size) {
+		pr_err("invalid pcie object or size");
+		return NULL;
+	}
+
+	skb = __netdev_alloc_skb(NULL, size, flags);
+	if (!skb)
+		return NULL;
+
+	IPC_CB(skb)->op_type = (u8)UL_DEFAULT;
+	IPC_CB(skb)->mapping = 0;
+
+	return skb;
+}
+
+struct sk_buff *ipc_pcie_alloc_skb(struct iosm_pcie *ipc_pcie, size_t size,
+				   gfp_t flags, dma_addr_t *mapping,
+				   int direction, size_t headroom)
+{
+	struct sk_buff *skb = ipc_pcie_alloc_local_skb(ipc_pcie, flags,
+						       size + headroom);
+	if (!skb)
+		return NULL;
+
+	if (headroom)
+		skb_reserve(skb, headroom);
+
+	if (ipc_pcie_addr_map(ipc_pcie, skb->data, size, mapping, direction)) {
+		dev_kfree_skb(skb);
+		return NULL;
+	}
+
+	BUILD_BUG_ON(sizeof(*IPC_CB(skb)) > sizeof(skb->cb));
+
+	/* Store the mapping address in skb scratch pad for later usage */
+	IPC_CB(skb)->mapping = *mapping;
+	IPC_CB(skb)->direction = direction;
+	IPC_CB(skb)->len = size;
+
+	return skb;
+}
+
+void ipc_pcie_kfree_skb(struct iosm_pcie *ipc_pcie, struct sk_buff *skb)
+{
+	if (!skb)
+		return;
+
+	ipc_pcie_addr_unmap(ipc_pcie, IPC_CB(skb)->len, IPC_CB(skb)->mapping,
+			    IPC_CB(skb)->direction);
+	IPC_CB(skb)->mapping = 0;
+	dev_kfree_skb(skb);
+}
+
+static int __init iosm_ipc_driver_init(void)
+{
+	if (pci_register_driver(&iosm_ipc_driver)) {
+		pr_err("registering of IOSM PCIe driver failed");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void __exit iosm_ipc_driver_exit(void)
+{
+	pci_unregister_driver(&iosm_ipc_driver);
+}
+
+module_init(iosm_ipc_driver_init);
+module_exit(iosm_ipc_driver_exit);
diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.h b/drivers/net/wwan/iosm/iosm_ipc_pcie.h
new file mode 100644
index 000000000000..7d1f0cd7364c
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PCIE_H
+#define IOSM_IPC_PCIE_H
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+
+#include "iosm_ipc_irq.h"
+
+/* Device ID */
+#define INTEL_CP_DEVICE_7560_ID 0x7560
+
+/* Define for BAR area usage */
+#define IPC_DOORBELL_BAR0 0
+#define IPC_SCRATCHPAD_BAR2 2
+
+/* Defines for DOORBELL registers information */
+#define IPC_DOORBELL_CH_OFFSET BIT(5)
+#define IPC_WRITE_PTR_REG_0 BIT(4)
+#define IPC_CAPTURE_PTR_REG_0 BIT(3)
+
+/* Number of MSI used for IPC */
+#define IPC_MSI_VECTORS 1
+
+/* Total number of Maximum IPC IRQ vectors used for IPC */
+#define IPC_IRQ_VECTORS IPC_MSI_VECTORS
+
+/**
+ * enum ipc_pcie_sleep_state - Enum type to different sleep state transitions
+ * @IPC_PCIE_D0L12:	Put the sleep state in D0L12
+ * @IPC_PCIE_D3L2:	Put the sleep state in D3L2
+ */
+enum ipc_pcie_sleep_state {
+	IPC_PCIE_D0L12,
+	IPC_PCIE_D3L2,
+};
+
+/**
+ * struct iosm_pcie - IPC_PCIE struct.
+ * @pci:			Address of the device description
+ * @dev:			Pointer to generic device structure
+ * @ipc_regs:			Remapped CP doorbell address of the irq register
+ *				set, to fire the doorbell irq.
+ * @scratchpad:			Remapped CP scratchpad address, to send the
+ *				configuration. tuple and the IPC descriptors
+ *				to CP in the ROM phase. The config tuple
+ *				information are saved on the MSI scratchpad.
+ * @imem:			Pointer to imem data struct
+ * @ipc_regs_bar_nr:		BAR number to be used for IPC doorbell
+ * @scratchpad_bar_nr:		BAR number to be used for Scratchpad
+ * @nvec:			number of requested irq vectors
+ * @doorbell_reg_offset:	doorbell_reg_offset
+ * @doorbell_write:		doorbell write register
+ * @doorbell_capture:		doorbell capture resgister
+ * @suspend:			S2IDLE sleep/active
+ * @d3l2_support:		Read WWAN RTD3 BIOS setting for D3L2 support
+ */
+struct iosm_pcie {
+	struct pci_dev *pci;
+	struct device *dev;
+	void __iomem *ipc_regs;
+	void __iomem *scratchpad;
+	struct iosm_imem *imem;
+	int ipc_regs_bar_nr;
+	int scratchpad_bar_nr;
+	int nvec;
+	u32 doorbell_reg_offset;
+	u32 doorbell_write;
+	u32 doorbell_capture;
+	unsigned long suspend;
+	enum ipc_pcie_sleep_state d3l2_support;
+};
+
+/**
+ * struct ipc_skb_cb - Struct definition of the socket buffer which is mapped to
+ *		       the cb field of sbk
+ * @mapping:	Store physical or IOVA mapped address of skb virtual add.
+ * @direction:	DMA direction
+ * @len:	Length of the DMA mapped region
+ * @op_type:    Expected values are defined about enum ipc_ul_usr_op.
+ */
+struct ipc_skb_cb {
+	dma_addr_t mapping;
+	int direction;
+	int len;
+	u8 op_type;
+};
+
+/**
+ * enum ipc_ul_usr_op - Control operation to execute the right action on
+ *			the user interface.
+ * @UL_USR_OP_BLOCKED:	The uplink app was blocked until CP confirms that the
+ *			uplink buffer was consumed triggered by the IRQ.
+ * @UL_MUX_OP_ADB:	In MUX mode the UL ADB shall be addedd to the free list.
+ * @UL_DEFAULT:		SKB in non muxing mode
+ */
+enum ipc_ul_usr_op {
+	UL_USR_OP_BLOCKED,
+	UL_MUX_OP_ADB,
+	UL_DEFAULT,
+};
+
+/**
+ * ipc_pcie_addr_map - Maps the kernel's virtual address to either IOVA
+ *		       address space or Physical address space, the mapping is
+ *		       stored in the skb's cb.
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ * @data:	Skb mem containing data
+ * @size:	Data size
+ * @mapping:	Dma mapping address
+ * @direction:	Data direction
+ *
+ * Returns: 0 on success and failure value on error
+ */
+int ipc_pcie_addr_map(struct iosm_pcie *ipc_pcie, unsigned char *data,
+		      size_t size, dma_addr_t *mapping, int direction);
+
+/**
+ * ipc_pcie_addr_unmap - Unmaps the skb memory region from IOVA address space
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ * @size:	Data size
+ * @mapping:	Dma mapping address
+ * @direction:	Data direction
+ */
+void ipc_pcie_addr_unmap(struct iosm_pcie *ipc_pcie, size_t size,
+			 dma_addr_t mapping, int direction);
+
+/**
+ * ipc_pcie_alloc_skb - Allocate an uplink SKB for the given size.
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ * @size:	Size of the SKB required.
+ * @flags:	Allocation flags
+ * @mapping:	Copies either mapped IOVA add. or converted Phy address
+ * @direction:	DMA data direction
+ * @headroom:	Header data offset
+ *
+ * Returns: Pointer to ipc_skb on Success, NULL on failure.
+ */
+struct sk_buff *ipc_pcie_alloc_skb(struct iosm_pcie *ipc_pcie, size_t size,
+				   gfp_t flags, dma_addr_t *mapping,
+				   int direction, size_t headroom);
+
+/**
+ * ipc_pcie_alloc_local_skb - Allocate a local SKB for the given size.
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ * @flags:	Allocation flags
+ * @size:	Size of the SKB required.
+ *
+ * Returns: Pointer to ipc_skb on Success, NULL on failure.
+ */
+struct sk_buff *ipc_pcie_alloc_local_skb(struct iosm_pcie *ipc_pcie,
+					 gfp_t flags, size_t size);
+
+/**
+ * ipc_pcie_kfree_skb - Free skb allocated by ipc_pcie_alloc_*_skb().
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ * @skb:	Pointer to the skb
+ */
+void ipc_pcie_kfree_skb(struct iosm_pcie *ipc_pcie, struct sk_buff *skb);
+
+/**
+ * ipc_pcie_check_data_link_active - Check Data Link Layer Active
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ *
+ * Returns: true if active, otherwise false
+ */
+bool ipc_pcie_check_data_link_active(struct iosm_pcie *ipc_pcie);
+
+/**
+ * ipc_pcie_suspend - Callback invoked by pm_runtime_suspend. It decrements
+ *		     the device's usage count then, carry out a suspend,
+ *		     either synchronous or asynchronous.
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ *
+ * Returns: 0 on success and failure value on error
+ */
+int ipc_pcie_suspend(struct iosm_pcie *ipc_pcie);
+
+/**
+ * ipc_pcie_resume - Callback invoked by pm_runtime_resume. It increments
+ *		    the device's usage count then, carry out a resume,
+ *		    either synchronous or asynchronous.
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ *
+ * Returns: 0 on success and failure value on error
+ */
+int ipc_pcie_resume(struct iosm_pcie *ipc_pcie);
+
+/**
+ * ipc_pcie_check_aspm_enabled - Check if ASPM L1 is already enabled
+ * @ipc_pcie:			 Pointer to struct iosm_pcie
+ * @parent:			 True if checking ASPM L1 for parent else false
+ *
+ * Returns: true if ASPM is already enabled else false
+ */
+bool ipc_pcie_check_aspm_enabled(struct iosm_pcie *ipc_pcie,
+				 bool parent);
+/**
+ * ipc_pcie_config_aspm - Configure ASPM L1
+ * @ipc_pcie:	Pointer to struct iosm_pcie
+ */
+void ipc_pcie_config_aspm(struct iosm_pcie *ipc_pcie);
+
+#endif

From 7f41ce085de0bada1e8c974cb3edd906ee49cb4c Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:09 +0530
Subject: [PATCH 1601/3292] net: iosm: irq handling

1) Request interrupt vector, frees allocated resource.
2) Registers IRQ handler.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_irq.c | 90 ++++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_irq.h | 33 ++++++++++
 2 files changed, 123 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_irq.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_irq.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_irq.c b/drivers/net/wwan/iosm/iosm_ipc_irq.c
new file mode 100644
index 000000000000..702f50a48151
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_irq.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include "iosm_ipc_pcie.h"
+#include "iosm_ipc_protocol.h"
+
+static void ipc_write_dbell_reg(struct iosm_pcie *ipc_pcie, int irq_n, u32 data)
+{
+	void __iomem *write_reg;
+
+	/* Select the first doorbell register, which is only currently needed
+	 * by CP.
+	 */
+	write_reg = (void __iomem *)((u8 __iomem *)ipc_pcie->ipc_regs +
+				     ipc_pcie->doorbell_write +
+				     (irq_n * ipc_pcie->doorbell_reg_offset));
+
+	/* Fire the doorbell irq by writing data on the doorbell write pointer
+	 * register.
+	 */
+	iowrite32(data, write_reg);
+}
+
+void ipc_doorbell_fire(struct iosm_pcie *ipc_pcie, int irq_n, u32 data)
+{
+	ipc_write_dbell_reg(ipc_pcie, irq_n, data);
+}
+
+/* Threaded Interrupt handler for MSI interrupts */
+static irqreturn_t ipc_msi_interrupt(int irq, void *dev_id)
+{
+	struct iosm_pcie *ipc_pcie = dev_id;
+	int instance = irq - ipc_pcie->pci->irq;
+
+	/* Shift the MSI irq actions to the IPC tasklet. IRQ_NONE means the
+	 * irq was not from the IPC device or could not be served.
+	 */
+	if (instance >= ipc_pcie->nvec)
+		return IRQ_NONE;
+
+	if (!test_bit(0, &ipc_pcie->suspend))
+		ipc_imem_irq_process(ipc_pcie->imem, instance);
+
+	return IRQ_HANDLED;
+}
+
+void ipc_release_irq(struct iosm_pcie *ipc_pcie)
+{
+	struct pci_dev *pdev = ipc_pcie->pci;
+
+	if (pdev->msi_enabled) {
+		while (--ipc_pcie->nvec >= 0)
+			free_irq(pdev->irq + ipc_pcie->nvec, ipc_pcie);
+	}
+	pci_free_irq_vectors(pdev);
+}
+
+int ipc_acquire_irq(struct iosm_pcie *ipc_pcie)
+{
+	struct pci_dev *pdev = ipc_pcie->pci;
+	int i, rc = -EINVAL;
+
+	ipc_pcie->nvec = pci_alloc_irq_vectors(pdev, IPC_MSI_VECTORS,
+					       IPC_MSI_VECTORS, PCI_IRQ_MSI);
+
+	if (ipc_pcie->nvec < 0) {
+		rc = ipc_pcie->nvec;
+		goto error;
+	}
+
+	if (!pdev->msi_enabled)
+		goto error;
+
+	for (i = 0; i < ipc_pcie->nvec; ++i) {
+		rc = request_threaded_irq(pdev->irq + i, NULL,
+					  ipc_msi_interrupt, IRQF_ONESHOT,
+					  KBUILD_MODNAME, ipc_pcie);
+		if (rc) {
+			dev_err(ipc_pcie->dev, "unable to grab IRQ, rc=%d", rc);
+			ipc_pcie->nvec = i;
+			ipc_release_irq(ipc_pcie);
+			goto error;
+		}
+	}
+
+error:
+	return rc;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_irq.h b/drivers/net/wwan/iosm/iosm_ipc_irq.h
new file mode 100644
index 000000000000..a8ed596cb6a5
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_irq.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_IRQ_H
+#define IOSM_IPC_IRQ_H
+
+struct iosm_pcie;
+
+/**
+ * ipc_doorbell_fire - fire doorbell to CP
+ * @ipc_pcie:	Pointer to iosm_pcie
+ * @irq_n:	Doorbell type
+ * @data:	ipc state
+ */
+void ipc_doorbell_fire(struct iosm_pcie *ipc_pcie, int irq_n, u32 data);
+
+/**
+ * ipc_release_irq - Release the IRQ handler.
+ * @ipc_pcie:	Pointer to iosm_pcie struct
+ */
+void ipc_release_irq(struct iosm_pcie *ipc_pcie);
+
+/**
+ * ipc_acquire_irq - acquire IRQ & register IRQ handler.
+ * @ipc_pcie:	Pointer to iosm_pcie struct
+ *
+ * Return: 0 on success and failure value on error
+ */
+int ipc_acquire_irq(struct iosm_pcie *ipc_pcie);
+
+#endif

From dc0514f5d828e8358fdab722cfa9c263bb583fea Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:10 +0530
Subject: [PATCH 1602/3292] net: iosm: mmio scratchpad

1) Initializes the Scratchpad region for Host-Device communication.
2) Exposes device capabilities like chip info and device execution
   stages.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_mmio.c | 223 ++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_mmio.h | 193 ++++++++++++++++++++++
 2 files changed, 416 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mmio.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mmio.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.c b/drivers/net/wwan/iosm/iosm_ipc_mmio.c
new file mode 100644
index 000000000000..06c94b1720b6
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/slab.h>
+
+#include "iosm_ipc_mmio.h"
+
+/* Definition of MMIO offsets
+ * note that MMIO_CI offsets are relative to end of chip info structure
+ */
+
+/* MMIO chip info size in bytes */
+#define MMIO_CHIP_INFO_SIZE 60
+
+/* CP execution stage */
+#define MMIO_OFFSET_EXECUTION_STAGE 0x00
+
+/* Boot ROM Chip Info struct */
+#define MMIO_OFFSET_CHIP_INFO 0x04
+
+#define MMIO_OFFSET_ROM_EXIT_CODE 0x40
+
+#define MMIO_OFFSET_PSI_ADDRESS 0x54
+
+#define MMIO_OFFSET_PSI_SIZE 0x5C
+
+#define MMIO_OFFSET_IPC_STATUS 0x60
+
+#define MMIO_OFFSET_CONTEXT_INFO 0x64
+
+#define MMIO_OFFSET_BASE_ADDR 0x6C
+
+#define MMIO_OFFSET_END_ADDR 0x74
+
+#define MMIO_OFFSET_CP_VERSION 0xF0
+
+#define MMIO_OFFSET_CP_CAPABILITIES 0xF4
+
+/* Timeout in 50 msec to wait for the modem boot code to write a valid
+ * execution stage into mmio area
+ */
+#define IPC_MMIO_EXEC_STAGE_TIMEOUT 50
+
+/* check if exec stage has one of the valid values */
+static bool ipc_mmio_is_valid_exec_stage(enum ipc_mem_exec_stage stage)
+{
+	switch (stage) {
+	case IPC_MEM_EXEC_STAGE_BOOT:
+	case IPC_MEM_EXEC_STAGE_PSI:
+	case IPC_MEM_EXEC_STAGE_EBL:
+	case IPC_MEM_EXEC_STAGE_RUN:
+	case IPC_MEM_EXEC_STAGE_CRASH:
+	case IPC_MEM_EXEC_STAGE_CD_READY:
+		return true;
+	default:
+		return false;
+	}
+}
+
+void ipc_mmio_update_cp_capability(struct iosm_mmio *ipc_mmio)
+{
+	u32 cp_cap;
+	unsigned int ver;
+
+	ver = ipc_mmio_get_cp_version(ipc_mmio);
+	cp_cap = readl(ipc_mmio->base + ipc_mmio->offset.cp_capability);
+
+	ipc_mmio->has_mux_lite = (ver >= IOSM_CP_VERSION) &&
+				 !(cp_cap & DL_AGGR) && !(cp_cap & UL_AGGR);
+
+	ipc_mmio->has_ul_flow_credit =
+		(ver >= IOSM_CP_VERSION) && (cp_cap & UL_FLOW_CREDIT);
+}
+
+struct iosm_mmio *ipc_mmio_init(void __iomem *mmio, struct device *dev)
+{
+	struct iosm_mmio *ipc_mmio = kzalloc(sizeof(*ipc_mmio), GFP_KERNEL);
+	int retries = IPC_MMIO_EXEC_STAGE_TIMEOUT;
+	enum ipc_mem_exec_stage stage;
+
+	if (!ipc_mmio)
+		return NULL;
+
+	ipc_mmio->dev = dev;
+
+	ipc_mmio->base = mmio;
+
+	ipc_mmio->offset.exec_stage = MMIO_OFFSET_EXECUTION_STAGE;
+
+	/* Check for a valid execution stage to make sure that the boot code
+	 * has correctly initialized the MMIO area.
+	 */
+	do {
+		stage = ipc_mmio_get_exec_stage(ipc_mmio);
+		if (ipc_mmio_is_valid_exec_stage(stage))
+			break;
+
+		msleep(20);
+	} while (retries-- > 0);
+
+	if (!retries) {
+		dev_err(ipc_mmio->dev, "invalid exec stage %X", stage);
+		goto init_fail;
+	}
+
+	ipc_mmio->offset.chip_info = MMIO_OFFSET_CHIP_INFO;
+
+	/* read chip info size and version from chip info structure */
+	ipc_mmio->chip_info_version =
+		ioread8(ipc_mmio->base + ipc_mmio->offset.chip_info);
+
+	/* Increment of 2 is needed as the size value in the chip info
+	 * excludes the version and size field, which are always present
+	 */
+	ipc_mmio->chip_info_size =
+		ioread8(ipc_mmio->base + ipc_mmio->offset.chip_info + 1) + 2;
+
+	if (ipc_mmio->chip_info_size != MMIO_CHIP_INFO_SIZE) {
+		dev_err(ipc_mmio->dev, "Unexpected Chip Info");
+		goto init_fail;
+	}
+
+	ipc_mmio->offset.rom_exit_code = MMIO_OFFSET_ROM_EXIT_CODE;
+
+	ipc_mmio->offset.psi_address = MMIO_OFFSET_PSI_ADDRESS;
+	ipc_mmio->offset.psi_size = MMIO_OFFSET_PSI_SIZE;
+	ipc_mmio->offset.ipc_status = MMIO_OFFSET_IPC_STATUS;
+	ipc_mmio->offset.context_info = MMIO_OFFSET_CONTEXT_INFO;
+	ipc_mmio->offset.ap_win_base = MMIO_OFFSET_BASE_ADDR;
+	ipc_mmio->offset.ap_win_end = MMIO_OFFSET_END_ADDR;
+
+	ipc_mmio->offset.cp_version = MMIO_OFFSET_CP_VERSION;
+	ipc_mmio->offset.cp_capability = MMIO_OFFSET_CP_CAPABILITIES;
+
+	return ipc_mmio;
+
+init_fail:
+	kfree(ipc_mmio);
+	return NULL;
+}
+
+enum ipc_mem_exec_stage ipc_mmio_get_exec_stage(struct iosm_mmio *ipc_mmio)
+{
+	if (!ipc_mmio)
+		return IPC_MEM_EXEC_STAGE_INVALID;
+
+	return (enum ipc_mem_exec_stage)readl(ipc_mmio->base +
+					      ipc_mmio->offset.exec_stage);
+}
+
+void ipc_mmio_copy_chip_info(struct iosm_mmio *ipc_mmio, void *dest,
+			     size_t size)
+{
+	if (ipc_mmio && dest)
+		memcpy_fromio(dest, ipc_mmio->base + ipc_mmio->offset.chip_info,
+			      size);
+}
+
+enum ipc_mem_device_ipc_state ipc_mmio_get_ipc_state(struct iosm_mmio *ipc_mmio)
+{
+	if (!ipc_mmio)
+		return IPC_MEM_DEVICE_IPC_INVALID;
+
+	return (enum ipc_mem_device_ipc_state)
+		readl(ipc_mmio->base + ipc_mmio->offset.ipc_status);
+}
+
+enum rom_exit_code ipc_mmio_get_rom_exit_code(struct iosm_mmio *ipc_mmio)
+{
+	if (!ipc_mmio)
+		return IMEM_ROM_EXIT_FAIL;
+
+	return (enum rom_exit_code)readl(ipc_mmio->base +
+					 ipc_mmio->offset.rom_exit_code);
+}
+
+void ipc_mmio_config(struct iosm_mmio *ipc_mmio)
+{
+	if (!ipc_mmio)
+		return;
+
+	/* AP memory window (full window is open and active so that modem checks
+	 * each AP address) 0 means don't check on modem side.
+	 */
+	iowrite64_lo_hi(0, ipc_mmio->base + ipc_mmio->offset.ap_win_base);
+	iowrite64_lo_hi(0, ipc_mmio->base + ipc_mmio->offset.ap_win_end);
+
+	iowrite64_lo_hi(ipc_mmio->context_info_addr,
+			ipc_mmio->base + ipc_mmio->offset.context_info);
+}
+
+void ipc_mmio_set_psi_addr_and_size(struct iosm_mmio *ipc_mmio, dma_addr_t addr,
+				    u32 size)
+{
+	if (!ipc_mmio)
+		return;
+
+	iowrite64_lo_hi(addr, ipc_mmio->base + ipc_mmio->offset.psi_address);
+	writel(size, ipc_mmio->base + ipc_mmio->offset.psi_size);
+}
+
+void ipc_mmio_set_contex_info_addr(struct iosm_mmio *ipc_mmio, phys_addr_t addr)
+{
+	if (!ipc_mmio)
+		return;
+
+	/* store context_info address. This will be stored in the mmio area
+	 * during IPC_MEM_DEVICE_IPC_INIT state via ipc_mmio_config()
+	 */
+	ipc_mmio->context_info_addr = addr;
+}
+
+int ipc_mmio_get_cp_version(struct iosm_mmio *ipc_mmio)
+{
+	return ipc_mmio ? readl(ipc_mmio->base + ipc_mmio->offset.cp_version) :
+			  -EFAULT;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h
new file mode 100644
index 000000000000..bcf77aea06e7
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_MMIO_H
+#define IOSM_IPC_MMIO_H
+
+/* Minimal IOSM CP VERSION which has valid CP_CAPABILITIES field */
+#define IOSM_CP_VERSION 0x0100UL
+
+/* DL dir Aggregation support mask */
+#define DL_AGGR BIT(23)
+
+/* UL dir Aggregation support mask */
+#define UL_AGGR BIT(22)
+
+/* UL flow credit support mask */
+#define UL_FLOW_CREDIT BIT(21)
+
+/* Possible states of the IPC finite state machine. */
+enum ipc_mem_device_ipc_state {
+	IPC_MEM_DEVICE_IPC_UNINIT,
+	IPC_MEM_DEVICE_IPC_INIT,
+	IPC_MEM_DEVICE_IPC_RUNNING,
+	IPC_MEM_DEVICE_IPC_RECOVERY,
+	IPC_MEM_DEVICE_IPC_ERROR,
+	IPC_MEM_DEVICE_IPC_DONT_CARE,
+	IPC_MEM_DEVICE_IPC_INVALID = -1
+};
+
+/* Boot ROM exit status. */
+enum rom_exit_code {
+	IMEM_ROM_EXIT_OPEN_EXT = 0x01,
+	IMEM_ROM_EXIT_OPEN_MEM = 0x02,
+	IMEM_ROM_EXIT_CERT_EXT = 0x10,
+	IMEM_ROM_EXIT_CERT_MEM = 0x20,
+	IMEM_ROM_EXIT_FAIL = 0xFF
+};
+
+/* Boot stages */
+enum ipc_mem_exec_stage {
+	IPC_MEM_EXEC_STAGE_RUN = 0x600DF00D,
+	IPC_MEM_EXEC_STAGE_CRASH = 0x8BADF00D,
+	IPC_MEM_EXEC_STAGE_CD_READY = 0xBADC0DED,
+	IPC_MEM_EXEC_STAGE_BOOT = 0xFEEDB007,
+	IPC_MEM_EXEC_STAGE_PSI = 0xFEEDBEEF,
+	IPC_MEM_EXEC_STAGE_EBL = 0xFEEDCAFE,
+	IPC_MEM_EXEC_STAGE_INVALID = 0xFFFFFFFF
+};
+
+/* mmio scratchpad info */
+struct mmio_offset {
+	int exec_stage;
+	int chip_info;
+	int rom_exit_code;
+	int psi_address;
+	int psi_size;
+	int ipc_status;
+	int context_info;
+	int ap_win_base;
+	int ap_win_end;
+	int cp_version;
+	int cp_capability;
+};
+
+/**
+ * struct iosm_mmio - MMIO region mapped to the doorbell scratchpad.
+ * @base:		Base address of MMIO region
+ * @dev:		Pointer to device structure
+ * @offset:		Start offset
+ * @context_info_addr:	Physical base address of context info structure
+ * @chip_info_version:	Version of chip info structure
+ * @chip_info_size:	Size of chip info structure
+ * @has_mux_lite:	It doesn't support mux aggergation
+ * @has_ul_flow_credit:	Ul flow credit support
+ * @has_slp_no_prot:	Device sleep no protocol support
+ * @has_mcr_support:	Usage of mcr support
+ */
+struct iosm_mmio {
+	unsigned char __iomem *base;
+	struct device *dev;
+	struct mmio_offset offset;
+	phys_addr_t context_info_addr;
+	unsigned int chip_info_version;
+	unsigned int chip_info_size;
+	u8 has_mux_lite:1,
+	   has_ul_flow_credit:1,
+	   has_slp_no_prot:1,
+	   has_mcr_support:1;
+};
+
+/**
+ * ipc_mmio_init - Allocate mmio instance data
+ * @mmio_addr:	Mapped AP base address of the MMIO area.
+ * @dev:	Pointer to device structure
+ *
+ * Returns: address of mmio instance data or NULL if fails.
+ */
+struct iosm_mmio *ipc_mmio_init(void __iomem *mmio_addr, struct device *dev);
+
+/**
+ * ipc_mmio_set_psi_addr_and_size - Set start address and size of the
+ *				    primary system image (PSI) for the
+ *				    FW dowload.
+ * @ipc_mmio:	Pointer to mmio instance
+ * @addr:	PSI address
+ * @size:	PSI immage size
+ */
+void ipc_mmio_set_psi_addr_and_size(struct iosm_mmio *ipc_mmio, dma_addr_t addr,
+				    u32 size);
+
+/**
+ * ipc_mmio_set_contex_info_addr - Stores the Context Info Address in
+ *				   MMIO instance to share it with CP during
+ *				   mmio_init.
+ * @ipc_mmio:	Pointer to mmio instance
+ * @addr:	64-bit address of AP context information.
+ */
+void ipc_mmio_set_contex_info_addr(struct iosm_mmio *ipc_mmio,
+				   phys_addr_t addr);
+
+/**
+ * ipc_mmio_get_cp_version - Write context info and AP memory range addresses.
+ *			     This needs to be called when CP is in
+ *			     IPC_MEM_DEVICE_IPC_INIT state
+ * @ipc_mmio:	Pointer to mmio instance
+ *
+ * Returns: cp version else failure value on error
+ */
+int ipc_mmio_get_cp_version(struct iosm_mmio *ipc_mmio);
+
+/**
+ * ipc_mmio_get_cp_version - Get the CP IPC version
+ * @ipc_mmio:	Pointer to mmio instance
+ *
+ * Returns: version number on success and failure value on error.
+ */
+int ipc_mmio_get_cp_version(struct iosm_mmio *ipc_mmio);
+
+/**
+ * ipc_mmio_get_rom_exit_code - Get exit code from CP boot rom download app
+ * @ipc_mmio:	Pointer to mmio instance
+ *
+ * Returns: exit code from CP boot rom download APP
+ */
+enum rom_exit_code ipc_mmio_get_rom_exit_code(struct iosm_mmio *ipc_mmio);
+
+/**
+ * ipc_mmio_get_exec_stage - Query CP execution stage
+ * @ipc_mmio:	Pointer to mmio instance
+ *
+ * Returns: CP execution stage
+ */
+enum ipc_mem_exec_stage ipc_mmio_get_exec_stage(struct iosm_mmio *ipc_mmio);
+
+/**
+ * ipc_mmio_get_ipc_state - Query CP IPC state
+ * @ipc_mmio:	Pointer to mmio instance
+ *
+ * Returns: CP IPC state
+ */
+enum ipc_mem_device_ipc_state
+ipc_mmio_get_ipc_state(struct iosm_mmio *ipc_mmio);
+
+/**
+ * ipc_mmio_copy_chip_info - Copy size bytes of CP chip info structure
+ *			     into caller provided buffer
+ * @ipc_mmio:	Pointer to mmio instance
+ * @dest:	Pointer to caller provided buff
+ * @size:	Number of bytes to copy
+ */
+void ipc_mmio_copy_chip_info(struct iosm_mmio *ipc_mmio, void *dest,
+			     size_t size);
+
+/**
+ * ipc_mmio_config - Write context info and AP memory range addresses.
+ *		     This needs to be called when CP is in
+ *		     IPC_MEM_DEVICE_IPC_INIT state
+ *
+ * @ipc_mmio:	Pointer to mmio instance
+ */
+void ipc_mmio_config(struct iosm_mmio *ipc_mmio);
+
+/**
+ * ipc_mmio_update_cp_capability - Read and update modem capability, from mmio
+ *				   capability offset
+ *
+ * @ipc_mmio:	Pointer to mmio instance
+ */
+void ipc_mmio_update_cp_capability(struct iosm_mmio *ipc_mmio);
+
+#endif

From 3670970dd8c661c10c10c300d726f59428eaad32 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:11 +0530
Subject: [PATCH 1603/3292] net: iosm: shared memory IPC interface

1) Initializes shared memory for host-device communication.
2) Allocate resources required for control & data operations.
3) Transfers the Device IRQ to IPC execution thread.
4) Defines the timer cbs for async events.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_imem.c | 1363 +++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_imem.h |  579 +++++++++++
 2 files changed, 1942 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_imem.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_imem.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c
new file mode 100644
index 000000000000..9f00e36b7f79
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c
@@ -0,0 +1,1363 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/delay.h>
+
+#include "iosm_ipc_chnl_cfg.h"
+#include "iosm_ipc_imem.h"
+#include "iosm_ipc_port.h"
+
+/* Check the wwan ips if it is valid with Channel as input. */
+static int ipc_imem_check_wwan_ips(struct ipc_mem_channel *chnl)
+{
+	if (chnl)
+		return chnl->ctype == IPC_CTYPE_WWAN &&
+		       chnl->if_id == IPC_MEM_MUX_IP_CH_IF_ID;
+	return false;
+}
+
+static int ipc_imem_msg_send_device_sleep(struct iosm_imem *ipc_imem, u32 state)
+{
+	union ipc_msg_prep_args prep_args = {
+		.sleep.target = 1,
+		.sleep.state = state,
+	};
+
+	ipc_imem->device_sleep = state;
+
+	return ipc_protocol_tq_msg_send(ipc_imem->ipc_protocol,
+					IPC_MSG_PREP_SLEEP, &prep_args, NULL);
+}
+
+static bool ipc_imem_dl_skb_alloc(struct iosm_imem *ipc_imem,
+				  struct ipc_pipe *pipe)
+{
+	/* limit max. nr of entries */
+	if (pipe->nr_of_queued_entries >= pipe->max_nr_of_queued_entries)
+		return false;
+
+	return ipc_protocol_dl_td_prepare(ipc_imem->ipc_protocol, pipe);
+}
+
+/* This timer handler will retry DL buff allocation if a pipe has no free buf
+ * and gives doorbell if TD is available
+ */
+static int ipc_imem_tq_td_alloc_timer(struct iosm_imem *ipc_imem, int arg,
+				      void *msg, size_t size)
+{
+	bool new_buffers_available = false;
+	bool retry_allocation = false;
+	int i;
+
+	for (i = 0; i < IPC_MEM_MAX_CHANNELS; i++) {
+		struct ipc_pipe *pipe = &ipc_imem->channels[i].dl_pipe;
+
+		if (!pipe->is_open || pipe->nr_of_queued_entries > 0)
+			continue;
+
+		while (ipc_imem_dl_skb_alloc(ipc_imem, pipe))
+			new_buffers_available = true;
+
+		if (pipe->nr_of_queued_entries == 0)
+			retry_allocation = true;
+	}
+
+	if (new_buffers_available)
+		ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol,
+					      IPC_HP_DL_PROCESS);
+
+	if (retry_allocation) {
+		ipc_imem->hrtimer_period =
+		ktime_set(0, IPC_TD_ALLOC_TIMER_PERIOD_MS * 1000 * 1000ULL);
+		if (!hrtimer_active(&ipc_imem->td_alloc_timer))
+			hrtimer_start(&ipc_imem->td_alloc_timer,
+				      ipc_imem->hrtimer_period,
+				      HRTIMER_MODE_REL);
+	}
+	return 0;
+}
+
+static enum hrtimer_restart ipc_imem_td_alloc_timer_cb(struct hrtimer *hr_timer)
+{
+	struct iosm_imem *ipc_imem =
+		container_of(hr_timer, struct iosm_imem, td_alloc_timer);
+	/* Post an async tasklet event to trigger HP update Doorbell */
+	ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_td_alloc_timer, 0, NULL,
+				 0, false);
+	return HRTIMER_NORESTART;
+}
+
+/* Fast update timer tasklet handler to trigger HP update */
+static int ipc_imem_tq_fast_update_timer_cb(struct iosm_imem *ipc_imem, int arg,
+					    void *msg, size_t size)
+{
+	ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol,
+				      IPC_HP_FAST_TD_UPD_TMR);
+
+	return 0;
+}
+
+static enum hrtimer_restart
+ipc_imem_fast_update_timer_cb(struct hrtimer *hr_timer)
+{
+	struct iosm_imem *ipc_imem =
+		container_of(hr_timer, struct iosm_imem, fast_update_timer);
+	/* Post an async tasklet event to trigger HP update Doorbell */
+	ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_fast_update_timer_cb, 0,
+				 NULL, 0, false);
+	return HRTIMER_NORESTART;
+}
+
+static int ipc_imem_setup_cp_mux_cap_init(struct iosm_imem *ipc_imem,
+					  struct ipc_mux_config *cfg)
+{
+	ipc_mmio_update_cp_capability(ipc_imem->mmio);
+
+	if (!ipc_imem->mmio->has_mux_lite) {
+		dev_err(ipc_imem->dev, "Failed to get Mux capability.");
+		return -EINVAL;
+	}
+
+	cfg->protocol = MUX_LITE;
+
+	cfg->ul_flow = (ipc_imem->mmio->has_ul_flow_credit == 1) ?
+			       MUX_UL_ON_CREDITS :
+			       MUX_UL;
+
+	/* The instance ID is same as channel ID because this is been reused
+	 * for channel alloc function.
+	 */
+	cfg->instance_id = IPC_MEM_MUX_IP_CH_IF_ID;
+	cfg->nr_sessions = IPC_MEM_MUX_IP_SESSION_ENTRIES;
+
+	return 0;
+}
+
+void ipc_imem_msg_send_feature_set(struct iosm_imem *ipc_imem,
+				   unsigned int reset_enable, bool atomic_ctx)
+{
+	union ipc_msg_prep_args prep_args = { .feature_set.reset_enable =
+						      reset_enable };
+
+	if (atomic_ctx)
+		ipc_protocol_tq_msg_send(ipc_imem->ipc_protocol,
+					 IPC_MSG_PREP_FEATURE_SET, &prep_args,
+					 NULL);
+	else
+		ipc_protocol_msg_send(ipc_imem->ipc_protocol,
+				      IPC_MSG_PREP_FEATURE_SET, &prep_args);
+}
+
+void ipc_imem_td_update_timer_start(struct iosm_imem *ipc_imem)
+{
+	/* Use the TD update timer only in the runtime phase */
+	if (!ipc_imem->enter_runtime || ipc_imem->td_update_timer_suspended) {
+		/* trigger the doorbell irq on CP directly. */
+		ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol,
+					      IPC_HP_TD_UPD_TMR_START);
+		return;
+	}
+
+	if (!hrtimer_active(&ipc_imem->tdupdate_timer)) {
+		ipc_imem->hrtimer_period =
+		ktime_set(0, TD_UPDATE_DEFAULT_TIMEOUT_USEC * 1000ULL);
+		if (!hrtimer_active(&ipc_imem->tdupdate_timer))
+			hrtimer_start(&ipc_imem->tdupdate_timer,
+				      ipc_imem->hrtimer_period,
+				      HRTIMER_MODE_REL);
+	}
+}
+
+void ipc_imem_hrtimer_stop(struct hrtimer *hr_timer)
+{
+	if (hrtimer_active(hr_timer))
+		hrtimer_cancel(hr_timer);
+}
+
+bool ipc_imem_ul_write_td(struct iosm_imem *ipc_imem)
+{
+	struct ipc_mem_channel *channel;
+	struct sk_buff_head *ul_list;
+	bool hpda_pending = false;
+	bool forced_hpdu = false;
+	struct ipc_pipe *pipe;
+	int i;
+
+	/* Analyze the uplink pipe of all active channels. */
+	for (i = 0; i < ipc_imem->nr_of_channels; i++) {
+		channel = &ipc_imem->channels[i];
+
+		if (channel->state != IMEM_CHANNEL_ACTIVE)
+			continue;
+
+		pipe = &channel->ul_pipe;
+
+		/* Get the reference to the skbuf accumulator list. */
+		ul_list = &channel->ul_list;
+
+		/* Fill the transfer descriptor with the uplink buffer info. */
+		hpda_pending |= ipc_protocol_ul_td_send(ipc_imem->ipc_protocol,
+							pipe, ul_list);
+
+		/* forced HP update needed for non data channels */
+		if (hpda_pending && !ipc_imem_check_wwan_ips(channel))
+			forced_hpdu = true;
+	}
+
+	if (forced_hpdu) {
+		hpda_pending = false;
+		ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol,
+					      IPC_HP_UL_WRITE_TD);
+	}
+
+	return hpda_pending;
+}
+
+void ipc_imem_ipc_init_check(struct iosm_imem *ipc_imem)
+{
+	int timeout = IPC_MODEM_BOOT_TIMEOUT;
+
+	ipc_imem->ipc_requested_state = IPC_MEM_DEVICE_IPC_INIT;
+
+	/* Trigger the CP interrupt to enter the init state. */
+	ipc_doorbell_fire(ipc_imem->pcie, IPC_DOORBELL_IRQ_IPC,
+			  IPC_MEM_DEVICE_IPC_INIT);
+	/* Wait for the CP update. */
+	do {
+		if (ipc_mmio_get_ipc_state(ipc_imem->mmio) ==
+		    ipc_imem->ipc_requested_state) {
+			/* Prepare the MMIO space */
+			ipc_mmio_config(ipc_imem->mmio);
+
+			/* Trigger the CP irq to enter the running state. */
+			ipc_imem->ipc_requested_state =
+				IPC_MEM_DEVICE_IPC_RUNNING;
+			ipc_doorbell_fire(ipc_imem->pcie, IPC_DOORBELL_IRQ_IPC,
+					  IPC_MEM_DEVICE_IPC_RUNNING);
+
+			return;
+		}
+		msleep(20);
+	} while (--timeout);
+
+	/* timeout */
+	dev_err(ipc_imem->dev, "%s: ipc_status(%d) ne. IPC_MEM_DEVICE_IPC_INIT",
+		ipc_imem_phase_get_string(ipc_imem->phase),
+		ipc_mmio_get_ipc_state(ipc_imem->mmio));
+
+	ipc_uevent_send(ipc_imem->dev, UEVENT_MDM_TIMEOUT);
+}
+
+/* Analyze the packet type and distribute it. */
+static void ipc_imem_dl_skb_process(struct iosm_imem *ipc_imem,
+				    struct ipc_pipe *pipe, struct sk_buff *skb)
+{
+	u16 port_id;
+
+	if (!skb)
+		return;
+
+	/* An AT/control or IP packet is expected. */
+	switch (pipe->channel->ctype) {
+	case IPC_CTYPE_CTRL:
+		port_id = pipe->channel->channel_id;
+
+		/* Pass the packet to the wwan layer. */
+		wwan_port_rx(ipc_imem->ipc_port[port_id]->iosm_port, skb);
+		break;
+
+	case IPC_CTYPE_WWAN:
+		if (pipe->channel->if_id == IPC_MEM_MUX_IP_CH_IF_ID)
+			ipc_mux_dl_decode(ipc_imem->mux, skb);
+		break;
+	default:
+		dev_err(ipc_imem->dev, "Invalid channel type");
+		break;
+	}
+}
+
+/* Process the downlink data and pass them to the char or net layer. */
+static void ipc_imem_dl_pipe_process(struct iosm_imem *ipc_imem,
+				     struct ipc_pipe *pipe)
+{
+	s32 cnt = 0, processed_td_cnt = 0;
+	struct ipc_mem_channel *channel;
+	u32 head = 0, tail = 0;
+	bool processed = false;
+	struct sk_buff *skb;
+
+	channel = pipe->channel;
+
+	ipc_protocol_get_head_tail_index(ipc_imem->ipc_protocol, pipe, &head,
+					 &tail);
+	if (pipe->old_tail != tail) {
+		if (pipe->old_tail < tail)
+			cnt = tail - pipe->old_tail;
+		else
+			cnt = pipe->nr_of_entries - pipe->old_tail + tail;
+	}
+
+	processed_td_cnt = cnt;
+
+	/* Seek for pipes with pending DL data. */
+	while (cnt--) {
+		skb = ipc_protocol_dl_td_process(ipc_imem->ipc_protocol, pipe);
+
+		/* Analyze the packet type and distribute it. */
+		ipc_imem_dl_skb_process(ipc_imem, pipe, skb);
+	}
+
+	/* try to allocate new empty DL SKbs from head..tail - 1*/
+	while (ipc_imem_dl_skb_alloc(ipc_imem, pipe))
+		processed = true;
+
+	if (processed && !ipc_imem_check_wwan_ips(channel)) {
+		/* Force HP update for non IP channels */
+		ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol,
+					      IPC_HP_DL_PROCESS);
+		processed = false;
+
+		/* If Fast Update timer is already running then stop */
+		ipc_imem_hrtimer_stop(&ipc_imem->fast_update_timer);
+	}
+
+	/* Any control channel process will get immediate HP update.
+	 * Start Fast update timer only for IP channel if all the TDs were
+	 * used in last process.
+	 */
+	if (processed && (processed_td_cnt == pipe->nr_of_entries - 1)) {
+		ipc_imem->hrtimer_period =
+		ktime_set(0, FORCE_UPDATE_DEFAULT_TIMEOUT_USEC * 1000ULL);
+		hrtimer_start(&ipc_imem->fast_update_timer,
+			      ipc_imem->hrtimer_period, HRTIMER_MODE_REL);
+	}
+
+	if (ipc_imem->app_notify_dl_pend)
+		complete(&ipc_imem->dl_pend_sem);
+}
+
+/* process open uplink pipe */
+static void ipc_imem_ul_pipe_process(struct iosm_imem *ipc_imem,
+				     struct ipc_pipe *pipe)
+{
+	struct ipc_mem_channel *channel;
+	u32 tail = 0, head = 0;
+	struct sk_buff *skb;
+	s32 cnt = 0;
+
+	channel = pipe->channel;
+
+	/* Get the internal phase. */
+	ipc_protocol_get_head_tail_index(ipc_imem->ipc_protocol, pipe, &head,
+					 &tail);
+
+	if (pipe->old_tail != tail) {
+		if (pipe->old_tail < tail)
+			cnt = tail - pipe->old_tail;
+		else
+			cnt = pipe->nr_of_entries - pipe->old_tail + tail;
+	}
+
+	/* Free UL buffers. */
+	while (cnt--) {
+		skb = ipc_protocol_ul_td_process(ipc_imem->ipc_protocol, pipe);
+
+		if (!skb)
+			continue;
+
+		/* If the user app was suspended in uplink direction - blocking
+		 * write, resume it.
+		 */
+		if (IPC_CB(skb)->op_type == UL_USR_OP_BLOCKED)
+			complete(&channel->ul_sem);
+
+		/* Free the skbuf element. */
+		if (IPC_CB(skb)->op_type == UL_MUX_OP_ADB) {
+			if (channel->if_id == IPC_MEM_MUX_IP_CH_IF_ID)
+				ipc_mux_ul_encoded_process(ipc_imem->mux, skb);
+			else
+				dev_err(ipc_imem->dev,
+					"OP Type is UL_MUX, unknown if_id %d",
+					channel->if_id);
+		} else {
+			ipc_pcie_kfree_skb(ipc_imem->pcie, skb);
+		}
+	}
+
+	/* Trace channel stats for IP UL pipe. */
+	if (ipc_imem_check_wwan_ips(pipe->channel))
+		ipc_mux_check_n_restart_tx(ipc_imem->mux);
+
+	if (ipc_imem->app_notify_ul_pend)
+		complete(&ipc_imem->ul_pend_sem);
+}
+
+/* Executes the irq. */
+static void ipc_imem_rom_irq_exec(struct iosm_imem *ipc_imem)
+{
+	struct ipc_mem_channel *channel;
+
+	if (ipc_imem->flash_channel_id < 0) {
+		ipc_imem->rom_exit_code = IMEM_ROM_EXIT_FAIL;
+		dev_err(ipc_imem->dev, "Missing flash app:%d",
+			ipc_imem->flash_channel_id);
+		return;
+	}
+
+	ipc_imem->rom_exit_code = ipc_mmio_get_rom_exit_code(ipc_imem->mmio);
+
+	/* Wake up the flash app to continue or to terminate depending
+	 * on the CP ROM exit code.
+	 */
+	channel = &ipc_imem->channels[ipc_imem->flash_channel_id];
+	complete(&channel->ul_sem);
+}
+
+/* Execute the UL bundle timer actions, generating the doorbell irq. */
+static int ipc_imem_tq_td_update_timer_cb(struct iosm_imem *ipc_imem, int arg,
+					  void *msg, size_t size)
+{
+	ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol,
+				      IPC_HP_TD_UPD_TMR);
+	return 0;
+}
+
+/* Consider link power management in the runtime phase. */
+static void ipc_imem_slp_control_exec(struct iosm_imem *ipc_imem)
+{
+	    /* link will go down, Test pending UL packets.*/
+	if (ipc_protocol_pm_dev_sleep_handle(ipc_imem->ipc_protocol) &&
+	    hrtimer_active(&ipc_imem->tdupdate_timer)) {
+		/* Generate the doorbell irq. */
+		ipc_imem_tq_td_update_timer_cb(ipc_imem, 0, NULL, 0);
+		/* Stop the TD update timer. */
+		ipc_imem_hrtimer_stop(&ipc_imem->tdupdate_timer);
+		/* Stop the fast update timer. */
+		ipc_imem_hrtimer_stop(&ipc_imem->fast_update_timer);
+	}
+}
+
+/* Execute startup timer and wait for delayed start (e.g. NAND) */
+static int ipc_imem_tq_startup_timer_cb(struct iosm_imem *ipc_imem, int arg,
+					void *msg, size_t size)
+{
+	/* Update & check the current operation phase. */
+	if (ipc_imem_phase_update(ipc_imem) != IPC_P_RUN)
+		return -EIO;
+
+	if (ipc_mmio_get_ipc_state(ipc_imem->mmio) ==
+	    IPC_MEM_DEVICE_IPC_UNINIT) {
+		ipc_imem->ipc_requested_state = IPC_MEM_DEVICE_IPC_INIT;
+
+		ipc_doorbell_fire(ipc_imem->pcie, IPC_DOORBELL_IRQ_IPC,
+				  IPC_MEM_DEVICE_IPC_INIT);
+
+		ipc_imem->hrtimer_period = ktime_set(0, 100 * 1000UL * 1000ULL);
+		/* reduce period to 100 ms to check for mmio init state */
+		if (!hrtimer_active(&ipc_imem->startup_timer))
+			hrtimer_start(&ipc_imem->startup_timer,
+				      ipc_imem->hrtimer_period,
+				      HRTIMER_MODE_REL);
+	} else if (ipc_mmio_get_ipc_state(ipc_imem->mmio) ==
+		   IPC_MEM_DEVICE_IPC_INIT) {
+		/* Startup complete  - disable timer */
+		ipc_imem_hrtimer_stop(&ipc_imem->startup_timer);
+
+		/* Prepare the MMIO space */
+		ipc_mmio_config(ipc_imem->mmio);
+		ipc_imem->ipc_requested_state = IPC_MEM_DEVICE_IPC_RUNNING;
+		ipc_doorbell_fire(ipc_imem->pcie, IPC_DOORBELL_IRQ_IPC,
+				  IPC_MEM_DEVICE_IPC_RUNNING);
+	}
+
+	return 0;
+}
+
+static enum hrtimer_restart ipc_imem_startup_timer_cb(struct hrtimer *hr_timer)
+{
+	enum hrtimer_restart result = HRTIMER_NORESTART;
+	struct iosm_imem *ipc_imem =
+		container_of(hr_timer, struct iosm_imem, startup_timer);
+
+	if (ktime_to_ns(ipc_imem->hrtimer_period)) {
+		hrtimer_forward(&ipc_imem->startup_timer, ktime_get(),
+				ipc_imem->hrtimer_period);
+		result = HRTIMER_RESTART;
+	}
+
+	ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_startup_timer_cb, 0,
+				 NULL, 0, false);
+	return result;
+}
+
+/* Get the CP execution stage */
+static enum ipc_mem_exec_stage
+ipc_imem_get_exec_stage_buffered(struct iosm_imem *ipc_imem)
+{
+	return (ipc_imem->phase == IPC_P_RUN &&
+		ipc_imem->ipc_status == IPC_MEM_DEVICE_IPC_RUNNING) ?
+		       ipc_protocol_get_ap_exec_stage(ipc_imem->ipc_protocol) :
+		       ipc_mmio_get_exec_stage(ipc_imem->mmio);
+}
+
+/* Callback to send the modem ready uevent */
+static int ipc_imem_send_mdm_rdy_cb(struct iosm_imem *ipc_imem, int arg,
+				    void *msg, size_t size)
+{
+	enum ipc_mem_exec_stage exec_stage =
+		ipc_imem_get_exec_stage_buffered(ipc_imem);
+
+	if (exec_stage == IPC_MEM_EXEC_STAGE_RUN)
+		ipc_uevent_send(ipc_imem->dev, UEVENT_MDM_READY);
+
+	return 0;
+}
+
+/* This function is executed in a task context via an ipc_worker object,
+ * as the creation or removal of device can't be done from tasklet.
+ */
+static void ipc_imem_run_state_worker(struct work_struct *instance)
+{
+	struct ipc_chnl_cfg chnl_cfg_port = { 0 };
+	struct ipc_mux_config mux_cfg;
+	struct iosm_imem *ipc_imem;
+	u8 ctrl_chl_idx = 0;
+
+	ipc_imem = container_of(instance, struct iosm_imem, run_state_worker);
+
+	if (ipc_imem->phase != IPC_P_RUN) {
+		dev_err(ipc_imem->dev,
+			"Modem link down. Exit run state worker.");
+		return;
+	}
+
+	if (!ipc_imem_setup_cp_mux_cap_init(ipc_imem, &mux_cfg))
+		ipc_imem->mux = ipc_mux_init(&mux_cfg, ipc_imem);
+
+	ipc_imem_wwan_channel_init(ipc_imem, mux_cfg.protocol);
+	if (ipc_imem->mux)
+		ipc_imem->mux->wwan = ipc_imem->wwan;
+
+	while (ctrl_chl_idx < IPC_MEM_MAX_CHANNELS) {
+		if (!ipc_chnl_cfg_get(&chnl_cfg_port, ctrl_chl_idx)) {
+			ipc_imem->ipc_port[ctrl_chl_idx] = NULL;
+			if (chnl_cfg_port.wwan_port_type != WWAN_PORT_UNKNOWN) {
+				ipc_imem_channel_init(ipc_imem, IPC_CTYPE_CTRL,
+						      chnl_cfg_port,
+						      IRQ_MOD_OFF);
+				ipc_imem->ipc_port[ctrl_chl_idx] =
+					ipc_port_init(ipc_imem, chnl_cfg_port);
+			}
+		}
+		ctrl_chl_idx++;
+	}
+
+	ipc_task_queue_send_task(ipc_imem, ipc_imem_send_mdm_rdy_cb, 0, NULL, 0,
+				 false);
+
+	/* Complete all memory stores before setting bit */
+	smp_mb__before_atomic();
+
+	set_bit(FULLY_FUNCTIONAL, &ipc_imem->flag);
+
+	/* Complete all memory stores after setting bit */
+	smp_mb__after_atomic();
+}
+
+static void ipc_imem_handle_irq(struct iosm_imem *ipc_imem, int irq)
+{
+	enum ipc_mem_device_ipc_state curr_ipc_status;
+	enum ipc_phase old_phase, phase;
+	bool retry_allocation = false;
+	bool ul_pending = false;
+	int ch_id, i;
+
+	if (irq != IMEM_IRQ_DONT_CARE)
+		ipc_imem->ev_irq_pending[irq] = false;
+
+	/* Get the internal phase. */
+	old_phase = ipc_imem->phase;
+
+	if (old_phase == IPC_P_OFF_REQ) {
+		dev_dbg(ipc_imem->dev,
+			"[%s]: Ignoring MSI. Deinit sequence in progress!",
+			ipc_imem_phase_get_string(old_phase));
+		return;
+	}
+
+	/* Update the phase controlled by CP. */
+	phase = ipc_imem_phase_update(ipc_imem);
+
+	switch (phase) {
+	case IPC_P_RUN:
+		if (!ipc_imem->enter_runtime) {
+			/* Excute the transition from flash/boot to runtime. */
+			ipc_imem->enter_runtime = 1;
+
+			/* allow device to sleep, default value is
+			 * IPC_HOST_SLEEP_ENTER_SLEEP
+			 */
+			ipc_imem_msg_send_device_sleep(ipc_imem,
+						       ipc_imem->device_sleep);
+
+			ipc_imem_msg_send_feature_set(ipc_imem,
+						      IPC_MEM_INBAND_CRASH_SIG,
+						  true);
+		}
+
+		curr_ipc_status =
+			ipc_protocol_get_ipc_status(ipc_imem->ipc_protocol);
+
+		/* check ipc_status change */
+		if (ipc_imem->ipc_status != curr_ipc_status) {
+			ipc_imem->ipc_status = curr_ipc_status;
+
+			if (ipc_imem->ipc_status ==
+			    IPC_MEM_DEVICE_IPC_RUNNING) {
+				schedule_work(&ipc_imem->run_state_worker);
+			}
+		}
+
+		/* Consider power management in the runtime phase. */
+		ipc_imem_slp_control_exec(ipc_imem);
+		break; /* Continue with skbuf processing. */
+
+		/* Unexpected phases. */
+	case IPC_P_OFF:
+	case IPC_P_OFF_REQ:
+		dev_err(ipc_imem->dev, "confused phase %s",
+			ipc_imem_phase_get_string(phase));
+		return;
+
+	case IPC_P_PSI:
+		if (old_phase != IPC_P_ROM)
+			break;
+
+		fallthrough;
+		/* On CP the PSI phase is already active. */
+
+	case IPC_P_ROM:
+		/* Before CP ROM driver starts the PSI image, it sets
+		 * the exit_code field on the doorbell scratchpad and
+		 * triggers the irq.
+		 */
+		ipc_imem_rom_irq_exec(ipc_imem);
+		return;
+
+	default:
+		break;
+	}
+
+	/* process message ring */
+	ipc_protocol_msg_process(ipc_imem, irq);
+
+	/* process all open pipes */
+	for (i = 0; i < IPC_MEM_MAX_CHANNELS; i++) {
+		struct ipc_pipe *ul_pipe = &ipc_imem->channels[i].ul_pipe;
+		struct ipc_pipe *dl_pipe = &ipc_imem->channels[i].dl_pipe;
+
+		if (dl_pipe->is_open &&
+		    (irq == IMEM_IRQ_DONT_CARE || irq == dl_pipe->irq)) {
+			ipc_imem_dl_pipe_process(ipc_imem, dl_pipe);
+
+			if (dl_pipe->nr_of_queued_entries == 0)
+				retry_allocation = true;
+		}
+
+		if (ul_pipe->is_open)
+			ipc_imem_ul_pipe_process(ipc_imem, ul_pipe);
+	}
+
+	/* Try to generate new ADB or ADGH. */
+	if (ipc_mux_ul_data_encode(ipc_imem->mux))
+		ipc_imem_td_update_timer_start(ipc_imem);
+
+	/* Continue the send procedure with accumulated SIO or NETIF packets.
+	 * Reset the debounce flags.
+	 */
+	ul_pending |= ipc_imem_ul_write_td(ipc_imem);
+
+	/* if UL data is pending restart TD update timer */
+	if (ul_pending) {
+		ipc_imem->hrtimer_period =
+		ktime_set(0, TD_UPDATE_DEFAULT_TIMEOUT_USEC * 1000ULL);
+		if (!hrtimer_active(&ipc_imem->tdupdate_timer))
+			hrtimer_start(&ipc_imem->tdupdate_timer,
+				      ipc_imem->hrtimer_period,
+				      HRTIMER_MODE_REL);
+	}
+
+	/* If CP has executed the transition
+	 * from IPC_INIT to IPC_RUNNING in the PSI
+	 * phase, wake up the flash app to open the pipes.
+	 */
+	if ((phase == IPC_P_PSI || phase == IPC_P_EBL) &&
+	    ipc_imem->ipc_requested_state == IPC_MEM_DEVICE_IPC_RUNNING &&
+	    ipc_mmio_get_ipc_state(ipc_imem->mmio) ==
+		    IPC_MEM_DEVICE_IPC_RUNNING &&
+	    ipc_imem->flash_channel_id >= 0) {
+		/* Wake up the flash app to open the pipes. */
+		ch_id = ipc_imem->flash_channel_id;
+		complete(&ipc_imem->channels[ch_id].ul_sem);
+	}
+
+	/* Reset the expected CP state. */
+	ipc_imem->ipc_requested_state = IPC_MEM_DEVICE_IPC_DONT_CARE;
+
+	if (retry_allocation) {
+		ipc_imem->hrtimer_period =
+		ktime_set(0, IPC_TD_ALLOC_TIMER_PERIOD_MS * 1000 * 1000ULL);
+		if (!hrtimer_active(&ipc_imem->td_alloc_timer))
+			hrtimer_start(&ipc_imem->td_alloc_timer,
+				      ipc_imem->hrtimer_period,
+				      HRTIMER_MODE_REL);
+	}
+}
+
+/* Callback by tasklet for handling interrupt events. */
+static int ipc_imem_tq_irq_cb(struct iosm_imem *ipc_imem, int arg, void *msg,
+			      size_t size)
+{
+	ipc_imem_handle_irq(ipc_imem, arg);
+
+	return 0;
+}
+
+void ipc_imem_ul_send(struct iosm_imem *ipc_imem)
+{
+	/* start doorbell irq delay timer if UL is pending */
+	if (ipc_imem_ul_write_td(ipc_imem))
+		ipc_imem_td_update_timer_start(ipc_imem);
+}
+
+/* Check the execution stage and update the AP phase */
+static enum ipc_phase ipc_imem_phase_update_check(struct iosm_imem *ipc_imem,
+						  enum ipc_mem_exec_stage stage)
+{
+	switch (stage) {
+	case IPC_MEM_EXEC_STAGE_BOOT:
+		if (ipc_imem->phase != IPC_P_ROM) {
+			/* Send this event only once */
+			ipc_uevent_send(ipc_imem->dev, UEVENT_ROM_READY);
+		}
+
+		ipc_imem->phase = IPC_P_ROM;
+		break;
+
+	case IPC_MEM_EXEC_STAGE_PSI:
+		ipc_imem->phase = IPC_P_PSI;
+		break;
+
+	case IPC_MEM_EXEC_STAGE_EBL:
+		ipc_imem->phase = IPC_P_EBL;
+		break;
+
+	case IPC_MEM_EXEC_STAGE_RUN:
+		if (ipc_imem->phase != IPC_P_RUN &&
+		    ipc_imem->ipc_status == IPC_MEM_DEVICE_IPC_RUNNING) {
+			ipc_uevent_send(ipc_imem->dev, UEVENT_MDM_READY);
+		}
+		ipc_imem->phase = IPC_P_RUN;
+		break;
+
+	case IPC_MEM_EXEC_STAGE_CRASH:
+		if (ipc_imem->phase != IPC_P_CRASH)
+			ipc_uevent_send(ipc_imem->dev, UEVENT_CRASH);
+
+		ipc_imem->phase = IPC_P_CRASH;
+		break;
+
+	case IPC_MEM_EXEC_STAGE_CD_READY:
+		if (ipc_imem->phase != IPC_P_CD_READY)
+			ipc_uevent_send(ipc_imem->dev, UEVENT_CD_READY);
+		ipc_imem->phase = IPC_P_CD_READY;
+		break;
+
+	default:
+		/* unknown exec stage:
+		 * assume that link is down and send info to listeners
+		 */
+		ipc_uevent_send(ipc_imem->dev, UEVENT_CD_READY_LINK_DOWN);
+		break;
+	}
+
+	return ipc_imem->phase;
+}
+
+/* Send msg to device to open pipe */
+static bool ipc_imem_pipe_open(struct iosm_imem *ipc_imem,
+			       struct ipc_pipe *pipe)
+{
+	union ipc_msg_prep_args prep_args = {
+		.pipe_open.pipe = pipe,
+	};
+
+	if (ipc_protocol_msg_send(ipc_imem->ipc_protocol,
+				  IPC_MSG_PREP_PIPE_OPEN, &prep_args) == 0)
+		pipe->is_open = true;
+
+	return pipe->is_open;
+}
+
+/* Allocates the TDs for the given pipe along with firing HP update DB. */
+static int ipc_imem_tq_pipe_td_alloc(struct iosm_imem *ipc_imem, int arg,
+				     void *msg, size_t size)
+{
+	struct ipc_pipe *dl_pipe = msg;
+	bool processed = false;
+	int i;
+
+	for (i = 0; i < dl_pipe->nr_of_entries - 1; i++)
+		processed |= ipc_imem_dl_skb_alloc(ipc_imem, dl_pipe);
+
+	/* Trigger the doorbell irq to inform CP that new downlink buffers are
+	 * available.
+	 */
+	if (processed)
+		ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol, arg);
+
+	return 0;
+}
+
+static enum hrtimer_restart
+ipc_imem_td_update_timer_cb(struct hrtimer *hr_timer)
+{
+	struct iosm_imem *ipc_imem =
+		container_of(hr_timer, struct iosm_imem, tdupdate_timer);
+
+	ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_td_update_timer_cb, 0,
+				 NULL, 0, false);
+	return HRTIMER_NORESTART;
+}
+
+/* Get the CP execution state and map it to the AP phase. */
+enum ipc_phase ipc_imem_phase_update(struct iosm_imem *ipc_imem)
+{
+	enum ipc_mem_exec_stage exec_stage =
+				ipc_imem_get_exec_stage_buffered(ipc_imem);
+	/* If the CP stage is undef, return the internal precalculated phase. */
+	return ipc_imem->phase == IPC_P_OFF_REQ ?
+		       ipc_imem->phase :
+		       ipc_imem_phase_update_check(ipc_imem, exec_stage);
+}
+
+const char *ipc_imem_phase_get_string(enum ipc_phase phase)
+{
+	switch (phase) {
+	case IPC_P_RUN:
+		return "A-RUN";
+
+	case IPC_P_OFF:
+		return "A-OFF";
+
+	case IPC_P_ROM:
+		return "A-ROM";
+
+	case IPC_P_PSI:
+		return "A-PSI";
+
+	case IPC_P_EBL:
+		return "A-EBL";
+
+	case IPC_P_CRASH:
+		return "A-CRASH";
+
+	case IPC_P_CD_READY:
+		return "A-CD_READY";
+
+	case IPC_P_OFF_REQ:
+		return "A-OFF_REQ";
+
+	default:
+		return "A-???";
+	}
+}
+
+void ipc_imem_pipe_close(struct iosm_imem *ipc_imem, struct ipc_pipe *pipe)
+{
+	union ipc_msg_prep_args prep_args = { .pipe_close.pipe = pipe };
+
+	pipe->is_open = false;
+	ipc_protocol_msg_send(ipc_imem->ipc_protocol, IPC_MSG_PREP_PIPE_CLOSE,
+			      &prep_args);
+
+	ipc_imem_pipe_cleanup(ipc_imem, pipe);
+}
+
+void ipc_imem_channel_close(struct iosm_imem *ipc_imem, int channel_id)
+{
+	struct ipc_mem_channel *channel;
+
+	if (channel_id < 0 || channel_id >= ipc_imem->nr_of_channels) {
+		dev_err(ipc_imem->dev, "invalid channel id %d", channel_id);
+		return;
+	}
+
+	channel = &ipc_imem->channels[channel_id];
+
+	if (channel->state == IMEM_CHANNEL_FREE) {
+		dev_err(ipc_imem->dev, "ch[%d]: invalid channel state %d",
+			channel_id, channel->state);
+		return;
+	}
+
+	/* Free only the channel id in the CP power off mode. */
+	if (channel->state == IMEM_CHANNEL_RESERVED)
+		/* Release only the channel id. */
+		goto channel_free;
+
+	if (ipc_imem->phase == IPC_P_RUN) {
+		ipc_imem_pipe_close(ipc_imem, &channel->ul_pipe);
+		ipc_imem_pipe_close(ipc_imem, &channel->dl_pipe);
+	}
+
+	ipc_imem_pipe_cleanup(ipc_imem, &channel->ul_pipe);
+	ipc_imem_pipe_cleanup(ipc_imem, &channel->dl_pipe);
+
+channel_free:
+	ipc_imem_channel_free(channel);
+}
+
+struct ipc_mem_channel *ipc_imem_channel_open(struct iosm_imem *ipc_imem,
+					      int channel_id, u32 db_id)
+{
+	struct ipc_mem_channel *channel;
+
+	if (channel_id < 0 || channel_id >= IPC_MEM_MAX_CHANNELS) {
+		dev_err(ipc_imem->dev, "invalid channel ID: %d", channel_id);
+		return NULL;
+	}
+
+	channel = &ipc_imem->channels[channel_id];
+
+	channel->state = IMEM_CHANNEL_ACTIVE;
+
+	if (!ipc_imem_pipe_open(ipc_imem, &channel->ul_pipe))
+		goto ul_pipe_err;
+
+	if (!ipc_imem_pipe_open(ipc_imem, &channel->dl_pipe))
+		goto dl_pipe_err;
+
+	/* Allocate the downlink buffers in tasklet context. */
+	if (ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_pipe_td_alloc, db_id,
+				     &channel->dl_pipe, 0, false)) {
+		dev_err(ipc_imem->dev, "td allocation failed : %d", channel_id);
+		goto task_failed;
+	}
+
+	/* Active channel. */
+	return channel;
+task_failed:
+	ipc_imem_pipe_close(ipc_imem, &channel->dl_pipe);
+dl_pipe_err:
+	ipc_imem_pipe_close(ipc_imem, &channel->ul_pipe);
+ul_pipe_err:
+	ipc_imem_channel_free(channel);
+	return NULL;
+}
+
+void ipc_imem_pm_suspend(struct iosm_imem *ipc_imem)
+{
+	ipc_protocol_suspend(ipc_imem->ipc_protocol);
+}
+
+void ipc_imem_pm_s2idle_sleep(struct iosm_imem *ipc_imem, bool sleep)
+{
+	ipc_protocol_s2idle_sleep(ipc_imem->ipc_protocol, sleep);
+}
+
+void ipc_imem_pm_resume(struct iosm_imem *ipc_imem)
+{
+	enum ipc_mem_exec_stage stage;
+
+	if (ipc_protocol_resume(ipc_imem->ipc_protocol)) {
+		stage = ipc_mmio_get_exec_stage(ipc_imem->mmio);
+		ipc_imem_phase_update_check(ipc_imem, stage);
+	}
+}
+
+void ipc_imem_channel_free(struct ipc_mem_channel *channel)
+{
+	/* Reset dynamic channel elements. */
+	channel->state = IMEM_CHANNEL_FREE;
+}
+
+int ipc_imem_channel_alloc(struct iosm_imem *ipc_imem, int index,
+			   enum ipc_ctype ctype)
+{
+	struct ipc_mem_channel *channel;
+	int i;
+
+	/* Find channel of given type/index */
+	for (i = 0; i < ipc_imem->nr_of_channels; i++) {
+		channel = &ipc_imem->channels[i];
+		if (channel->ctype == ctype && channel->index == index)
+			break;
+	}
+
+	if (i >= ipc_imem->nr_of_channels) {
+		dev_dbg(ipc_imem->dev,
+			"no channel definition for index=%d ctype=%d", index,
+			ctype);
+		return -ECHRNG;
+	}
+
+	if (ipc_imem->channels[i].state != IMEM_CHANNEL_FREE) {
+		dev_dbg(ipc_imem->dev, "channel is in use");
+		return -EBUSY;
+	}
+
+	if (channel->ctype == IPC_CTYPE_WWAN &&
+	    index == IPC_MEM_MUX_IP_CH_IF_ID)
+		channel->if_id = index;
+
+	channel->channel_id = index;
+	channel->state = IMEM_CHANNEL_RESERVED;
+
+	return i;
+}
+
+void ipc_imem_channel_init(struct iosm_imem *ipc_imem, enum ipc_ctype ctype,
+			   struct ipc_chnl_cfg chnl_cfg, u32 irq_moderation)
+{
+	struct ipc_mem_channel *channel;
+
+	if (chnl_cfg.ul_pipe >= IPC_MEM_MAX_PIPES ||
+	    chnl_cfg.dl_pipe >= IPC_MEM_MAX_PIPES) {
+		dev_err(ipc_imem->dev, "invalid pipe: ul_pipe=%d, dl_pipe=%d",
+			chnl_cfg.ul_pipe, chnl_cfg.dl_pipe);
+		return;
+	}
+
+	if (ipc_imem->nr_of_channels >= IPC_MEM_MAX_CHANNELS) {
+		dev_err(ipc_imem->dev, "too many channels");
+		return;
+	}
+
+	channel = &ipc_imem->channels[ipc_imem->nr_of_channels];
+	channel->channel_id = ipc_imem->nr_of_channels;
+	channel->ctype = ctype;
+	channel->index = chnl_cfg.id;
+	channel->net_err_count = 0;
+	channel->state = IMEM_CHANNEL_FREE;
+	ipc_imem->nr_of_channels++;
+
+	ipc_imem_channel_update(ipc_imem, channel->channel_id, chnl_cfg,
+				IRQ_MOD_OFF);
+
+	skb_queue_head_init(&channel->ul_list);
+
+	init_completion(&channel->ul_sem);
+}
+
+void ipc_imem_channel_update(struct iosm_imem *ipc_imem, int id,
+			     struct ipc_chnl_cfg chnl_cfg, u32 irq_moderation)
+{
+	struct ipc_mem_channel *channel;
+
+	if (id < 0 || id >= ipc_imem->nr_of_channels) {
+		dev_err(ipc_imem->dev, "invalid channel id %d", id);
+		return;
+	}
+
+	channel = &ipc_imem->channels[id];
+
+	if (channel->state != IMEM_CHANNEL_FREE &&
+	    channel->state != IMEM_CHANNEL_RESERVED) {
+		dev_err(ipc_imem->dev, "invalid channel state %d",
+			channel->state);
+		return;
+	}
+
+	channel->ul_pipe.nr_of_entries = chnl_cfg.ul_nr_of_entries;
+	channel->ul_pipe.pipe_nr = chnl_cfg.ul_pipe;
+	channel->ul_pipe.is_open = false;
+	channel->ul_pipe.irq = IPC_UL_PIPE_IRQ_VECTOR;
+	channel->ul_pipe.channel = channel;
+	channel->ul_pipe.dir = IPC_MEM_DIR_UL;
+	channel->ul_pipe.accumulation_backoff = chnl_cfg.accumulation_backoff;
+	channel->ul_pipe.irq_moderation = irq_moderation;
+	channel->ul_pipe.buf_size = 0;
+
+	channel->dl_pipe.nr_of_entries = chnl_cfg.dl_nr_of_entries;
+	channel->dl_pipe.pipe_nr = chnl_cfg.dl_pipe;
+	channel->dl_pipe.is_open = false;
+	channel->dl_pipe.irq = IPC_DL_PIPE_IRQ_VECTOR;
+	channel->dl_pipe.channel = channel;
+	channel->dl_pipe.dir = IPC_MEM_DIR_DL;
+	channel->dl_pipe.accumulation_backoff = chnl_cfg.accumulation_backoff;
+	channel->dl_pipe.irq_moderation = irq_moderation;
+	channel->dl_pipe.buf_size = chnl_cfg.dl_buf_size;
+}
+
+static void ipc_imem_channel_reset(struct iosm_imem *ipc_imem)
+{
+	int i;
+
+	for (i = 0; i < ipc_imem->nr_of_channels; i++) {
+		struct ipc_mem_channel *channel;
+
+		channel = &ipc_imem->channels[i];
+
+		ipc_imem_pipe_cleanup(ipc_imem, &channel->dl_pipe);
+		ipc_imem_pipe_cleanup(ipc_imem, &channel->ul_pipe);
+
+		ipc_imem_channel_free(channel);
+	}
+}
+
+void ipc_imem_pipe_cleanup(struct iosm_imem *ipc_imem, struct ipc_pipe *pipe)
+{
+	struct sk_buff *skb;
+
+	/* Force pipe to closed state also when not explicitly closed through
+	 * ipc_imem_pipe_close()
+	 */
+	pipe->is_open = false;
+
+	/* Empty the uplink skb accumulator. */
+	while ((skb = skb_dequeue(&pipe->channel->ul_list)))
+		ipc_pcie_kfree_skb(ipc_imem->pcie, skb);
+
+	ipc_protocol_pipe_cleanup(ipc_imem->ipc_protocol, pipe);
+}
+
+/* Send IPC protocol uninit to the modem when Link is active. */
+static void ipc_imem_device_ipc_uninit(struct iosm_imem *ipc_imem)
+{
+	int timeout = IPC_MODEM_UNINIT_TIMEOUT_MS;
+	enum ipc_mem_device_ipc_state ipc_state;
+
+	/* When PCIe link is up set IPC_UNINIT
+	 * of the modem otherwise ignore it when PCIe link down happens.
+	 */
+	if (ipc_pcie_check_data_link_active(ipc_imem->pcie)) {
+		/* set modem to UNINIT
+		 * (in case we want to reload the AP driver without resetting
+		 * the modem)
+		 */
+		ipc_doorbell_fire(ipc_imem->pcie, IPC_DOORBELL_IRQ_IPC,
+				  IPC_MEM_DEVICE_IPC_UNINIT);
+		ipc_state = ipc_mmio_get_ipc_state(ipc_imem->mmio);
+
+		/* Wait for maximum 30ms to allow the Modem to uninitialize the
+		 * protocol.
+		 */
+		while ((ipc_state <= IPC_MEM_DEVICE_IPC_DONT_CARE) &&
+		       (ipc_state != IPC_MEM_DEVICE_IPC_UNINIT) &&
+		       (timeout > 0)) {
+			usleep_range(1000, 1250);
+			timeout--;
+			ipc_state = ipc_mmio_get_ipc_state(ipc_imem->mmio);
+		}
+	}
+}
+
+void ipc_imem_cleanup(struct iosm_imem *ipc_imem)
+{
+	ipc_imem->phase = IPC_P_OFF_REQ;
+
+	/* forward MDM_NOT_READY to listeners */
+	ipc_uevent_send(ipc_imem->dev, UEVENT_MDM_NOT_READY);
+
+	hrtimer_cancel(&ipc_imem->td_alloc_timer);
+	hrtimer_cancel(&ipc_imem->tdupdate_timer);
+	hrtimer_cancel(&ipc_imem->fast_update_timer);
+	hrtimer_cancel(&ipc_imem->startup_timer);
+
+	/* cancel the workqueue */
+	cancel_work_sync(&ipc_imem->run_state_worker);
+
+	if (test_and_clear_bit(FULLY_FUNCTIONAL, &ipc_imem->flag)) {
+		ipc_mux_deinit(ipc_imem->mux);
+		ipc_wwan_deinit(ipc_imem->wwan);
+		ipc_port_deinit(ipc_imem->ipc_port);
+	}
+
+	ipc_imem_device_ipc_uninit(ipc_imem);
+	ipc_imem_channel_reset(ipc_imem);
+
+	ipc_protocol_deinit(ipc_imem->ipc_protocol);
+	ipc_task_deinit(ipc_imem->ipc_task);
+
+	kfree(ipc_imem->ipc_task);
+	kfree(ipc_imem->mmio);
+
+	ipc_imem->phase = IPC_P_OFF;
+}
+
+/* After CP has unblocked the PCIe link, save the start address of the doorbell
+ * scratchpad and prepare the shared memory region. If the flashing to RAM
+ * procedure shall be executed, copy the chip information from the doorbell
+ * scratchtpad to the application buffer and wake up the flash app.
+ */
+static int ipc_imem_config(struct iosm_imem *ipc_imem)
+{
+	enum ipc_phase phase;
+
+	/* Initialize the semaphore for the blocking read UL/DL transfer. */
+	init_completion(&ipc_imem->ul_pend_sem);
+
+	init_completion(&ipc_imem->dl_pend_sem);
+
+	/* clear internal flags */
+	ipc_imem->ipc_status = IPC_MEM_DEVICE_IPC_UNINIT;
+	ipc_imem->enter_runtime = 0;
+
+	phase = ipc_imem_phase_update(ipc_imem);
+
+	/* Either CP shall be in the power off or power on phase. */
+	switch (phase) {
+	case IPC_P_ROM:
+		ipc_imem->hrtimer_period = ktime_set(0, 1000 * 1000 * 1000ULL);
+		/* poll execution stage (for delayed start, e.g. NAND) */
+		if (!hrtimer_active(&ipc_imem->startup_timer))
+			hrtimer_start(&ipc_imem->startup_timer,
+				      ipc_imem->hrtimer_period,
+				      HRTIMER_MODE_REL);
+		return 0;
+
+	case IPC_P_PSI:
+	case IPC_P_EBL:
+	case IPC_P_RUN:
+		/* The initial IPC state is IPC_MEM_DEVICE_IPC_UNINIT. */
+		ipc_imem->ipc_requested_state = IPC_MEM_DEVICE_IPC_UNINIT;
+
+		/* Verify the exepected initial state. */
+		if (ipc_imem->ipc_requested_state ==
+		    ipc_mmio_get_ipc_state(ipc_imem->mmio)) {
+			ipc_imem_ipc_init_check(ipc_imem);
+
+			return 0;
+		}
+		dev_err(ipc_imem->dev,
+			"ipc_status(%d) != IPC_MEM_DEVICE_IPC_UNINIT",
+			ipc_mmio_get_ipc_state(ipc_imem->mmio));
+		break;
+	case IPC_P_CRASH:
+	case IPC_P_CD_READY:
+		dev_dbg(ipc_imem->dev,
+			"Modem is in phase %d, reset Modem to collect CD",
+			phase);
+		return 0;
+	default:
+		dev_err(ipc_imem->dev, "unexpected operation phase %d", phase);
+		break;
+	}
+
+	complete(&ipc_imem->dl_pend_sem);
+	complete(&ipc_imem->ul_pend_sem);
+	ipc_imem->phase = IPC_P_OFF;
+	return -EIO;
+}
+
+/* Pass the dev ptr to the shared memory driver and request the entry points */
+struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id,
+				void __iomem *mmio, struct device *dev)
+{
+	struct iosm_imem *ipc_imem = kzalloc(sizeof(*pcie->imem), GFP_KERNEL);
+
+	if (!ipc_imem)
+		return NULL;
+
+	/* Save the device address. */
+	ipc_imem->pcie = pcie;
+	ipc_imem->dev = dev;
+
+	ipc_imem->pci_device_id = device_id;
+
+	ipc_imem->ev_cdev_write_pending = false;
+	ipc_imem->cp_version = 0;
+	ipc_imem->device_sleep = IPC_HOST_SLEEP_ENTER_SLEEP;
+
+	/* Reset the flash channel id. */
+	ipc_imem->flash_channel_id = -1;
+
+	/* Reset the max number of configured channels */
+	ipc_imem->nr_of_channels = 0;
+
+	/* allocate IPC MMIO */
+	ipc_imem->mmio = ipc_mmio_init(mmio, ipc_imem->dev);
+	if (!ipc_imem->mmio) {
+		dev_err(ipc_imem->dev, "failed to initialize mmio region");
+		goto mmio_init_fail;
+	}
+
+	ipc_imem->ipc_task = kzalloc(sizeof(*ipc_imem->ipc_task),
+				     GFP_KERNEL);
+
+	/* Create tasklet for event handling*/
+	if (!ipc_imem->ipc_task)
+		goto ipc_task_fail;
+
+	if (ipc_task_init(ipc_imem->ipc_task))
+		goto ipc_task_init_fail;
+
+	ipc_imem->ipc_task->dev = ipc_imem->dev;
+
+	INIT_WORK(&ipc_imem->run_state_worker, ipc_imem_run_state_worker);
+
+	ipc_imem->ipc_protocol = ipc_protocol_init(ipc_imem);
+
+	if (!ipc_imem->ipc_protocol)
+		goto protocol_init_fail;
+
+	/* The phase is set to power off. */
+	ipc_imem->phase = IPC_P_OFF;
+
+	hrtimer_init(&ipc_imem->startup_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	ipc_imem->startup_timer.function = ipc_imem_startup_timer_cb;
+
+	hrtimer_init(&ipc_imem->tdupdate_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	ipc_imem->tdupdate_timer.function = ipc_imem_td_update_timer_cb;
+
+	hrtimer_init(&ipc_imem->fast_update_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	ipc_imem->fast_update_timer.function = ipc_imem_fast_update_timer_cb;
+
+	hrtimer_init(&ipc_imem->td_alloc_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	ipc_imem->td_alloc_timer.function = ipc_imem_td_alloc_timer_cb;
+
+	if (ipc_imem_config(ipc_imem)) {
+		dev_err(ipc_imem->dev, "failed to initialize the imem");
+		goto imem_config_fail;
+	}
+
+	return ipc_imem;
+
+imem_config_fail:
+	hrtimer_cancel(&ipc_imem->td_alloc_timer);
+	hrtimer_cancel(&ipc_imem->fast_update_timer);
+	hrtimer_cancel(&ipc_imem->tdupdate_timer);
+	hrtimer_cancel(&ipc_imem->startup_timer);
+protocol_init_fail:
+	cancel_work_sync(&ipc_imem->run_state_worker);
+	ipc_task_deinit(ipc_imem->ipc_task);
+ipc_task_init_fail:
+	kfree(ipc_imem->ipc_task);
+ipc_task_fail:
+	kfree(ipc_imem->mmio);
+mmio_init_fail:
+	kfree(ipc_imem);
+	return NULL;
+}
+
+void ipc_imem_irq_process(struct iosm_imem *ipc_imem, int irq)
+{
+	/* Debounce IPC_EV_IRQ. */
+	if (ipc_imem && !ipc_imem->ev_irq_pending[irq]) {
+		ipc_imem->ev_irq_pending[irq] = true;
+		ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_irq_cb, irq,
+					 NULL, 0, false);
+	}
+}
+
+void ipc_imem_td_update_timer_suspend(struct iosm_imem *ipc_imem, bool suspend)
+{
+	ipc_imem->td_update_timer_suspended = suspend;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h
new file mode 100644
index 000000000000..0d2f10e4cbc8
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h
@@ -0,0 +1,579 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_IMEM_H
+#define IOSM_IPC_IMEM_H
+
+#include <linux/skbuff.h>
+#include <stdbool.h>
+
+#include "iosm_ipc_mmio.h"
+#include "iosm_ipc_pcie.h"
+#include "iosm_ipc_uevent.h"
+#include "iosm_ipc_wwan.h"
+#include "iosm_ipc_task_queue.h"
+
+struct ipc_chnl_cfg;
+
+/* IRQ moderation in usec */
+#define IRQ_MOD_OFF 0
+#define IRQ_MOD_NET 1000
+#define IRQ_MOD_TRC 4000
+
+/* Either the PSI image is accepted by CP or the suspended flash tool is waken,
+ * informed that the CP ROM driver is not ready to process the PSI image.
+ * unit : milliseconds
+ */
+#define IPC_PSI_TRANSFER_TIMEOUT 3000
+
+/* Timeout in 20 msec to wait for the modem to boot up to
+ * IPC_MEM_DEVICE_IPC_INIT state.
+ * unit : milliseconds (500 * ipc_util_msleep(20))
+ */
+#define IPC_MODEM_BOOT_TIMEOUT 500
+
+/* Wait timeout for ipc status reflects IPC_MEM_DEVICE_IPC_UNINIT
+ * unit : milliseconds
+ */
+#define IPC_MODEM_UNINIT_TIMEOUT_MS 30
+
+/* Pending time for processing data.
+ * unit : milliseconds
+ */
+#define IPC_PEND_DATA_TIMEOUT 500
+
+/* The timeout in milliseconds for application to wait for remote time. */
+#define IPC_REMOTE_TS_TIMEOUT_MS 10
+
+/* Timeout for TD allocation retry.
+ * unit : milliseconds
+ */
+#define IPC_TD_ALLOC_TIMER_PERIOD_MS 100
+
+/* Host sleep target is host */
+#define IPC_HOST_SLEEP_HOST 0
+
+/* Host sleep target is device */
+#define IPC_HOST_SLEEP_DEVICE 1
+
+/* Sleep message, target host: AP enters sleep / target device: CP is
+ * allowed to enter sleep and shall use the host sleep protocol
+ */
+#define IPC_HOST_SLEEP_ENTER_SLEEP 0
+
+/* Sleep_message, target host: AP exits  sleep / target device: CP is
+ * NOT allowed to enter sleep
+ */
+#define IPC_HOST_SLEEP_EXIT_SLEEP 1
+
+#define IMEM_IRQ_DONT_CARE (-1)
+
+#define IPC_MEM_MAX_CHANNELS 7
+
+#define IPC_MEM_MUX_IP_SESSION_ENTRIES 8
+
+#define IPC_MEM_MUX_IP_CH_IF_ID 0
+
+#define TD_UPDATE_DEFAULT_TIMEOUT_USEC 1900
+
+#define FORCE_UPDATE_DEFAULT_TIMEOUT_USEC 500
+
+/* Sleep_message, target host: not applicable  / target device: CP is
+ * allowed to enter sleep and shall NOT use the device sleep protocol
+ */
+#define IPC_HOST_SLEEP_ENTER_SLEEP_NO_PROTOCOL 2
+
+/* in_band_crash_signal IPC_MEM_INBAND_CRASH_SIG
+ * Modem crash notification configuration. If this value is non-zero then
+ * FEATURE_SET message will be sent to the Modem as a result the Modem will
+ * signal Crash via Execution Stage register. If this value is zero then Modem
+ * will use out-of-band method to notify about it's Crash.
+ */
+#define IPC_MEM_INBAND_CRASH_SIG 1
+
+/* Extra headroom to be allocated for DL SKBs to allow addition of Ethernet
+ * header
+ */
+#define IPC_MEM_DL_ETH_OFFSET 16
+
+#define IPC_CB(skb) ((struct ipc_skb_cb *)((skb)->cb))
+
+#define FULLY_FUNCTIONAL 0
+
+/* List of the supported UL/DL pipes. */
+enum ipc_mem_pipes {
+	IPC_MEM_PIPE_0 = 0,
+	IPC_MEM_PIPE_1,
+	IPC_MEM_PIPE_2,
+	IPC_MEM_PIPE_3,
+	IPC_MEM_PIPE_4,
+	IPC_MEM_PIPE_5,
+	IPC_MEM_PIPE_6,
+	IPC_MEM_PIPE_7,
+	IPC_MEM_PIPE_8,
+	IPC_MEM_PIPE_9,
+	IPC_MEM_PIPE_10,
+	IPC_MEM_PIPE_11,
+	IPC_MEM_PIPE_12,
+	IPC_MEM_PIPE_13,
+	IPC_MEM_PIPE_14,
+	IPC_MEM_PIPE_15,
+	IPC_MEM_PIPE_16,
+	IPC_MEM_PIPE_17,
+	IPC_MEM_PIPE_18,
+	IPC_MEM_PIPE_19,
+	IPC_MEM_PIPE_20,
+	IPC_MEM_PIPE_21,
+	IPC_MEM_PIPE_22,
+	IPC_MEM_PIPE_23,
+	IPC_MEM_MAX_PIPES
+};
+
+/* Enum defining channel states. */
+enum ipc_channel_state {
+	IMEM_CHANNEL_FREE,
+	IMEM_CHANNEL_RESERVED,
+	IMEM_CHANNEL_ACTIVE,
+	IMEM_CHANNEL_CLOSING,
+};
+
+/* Time Unit */
+enum ipc_time_unit {
+	IPC_SEC = 0,
+	IPC_MILLI_SEC = 1,
+	IPC_MICRO_SEC = 2,
+	IPC_NANO_SEC = 3,
+	IPC_PICO_SEC = 4,
+	IPC_FEMTO_SEC = 5,
+	IPC_ATTO_SEC = 6,
+};
+
+/**
+ * enum ipc_ctype - Enum defining supported channel type needed for control
+ *		    /IP traffic.
+ * @IPC_CTYPE_WWAN:		Used for IP traffic
+ * @IPC_CTYPE_CTRL:		Used for Control Communication
+ */
+enum ipc_ctype {
+	IPC_CTYPE_WWAN,
+	IPC_CTYPE_CTRL,
+};
+
+/* Pipe direction. */
+enum ipc_mem_pipe_dir {
+	IPC_MEM_DIR_UL,
+	IPC_MEM_DIR_DL,
+};
+
+/* HP update identifier. To be used as data for ipc_cp_irq_hpda_update() */
+enum ipc_hp_identifier {
+	IPC_HP_MR = 0,
+	IPC_HP_PM_TRIGGER,
+	IPC_HP_WAKEUP_SPEC_TMR,
+	IPC_HP_TD_UPD_TMR_START,
+	IPC_HP_TD_UPD_TMR,
+	IPC_HP_FAST_TD_UPD_TMR,
+	IPC_HP_UL_WRITE_TD,
+	IPC_HP_DL_PROCESS,
+	IPC_HP_NET_CHANNEL_INIT,
+	IPC_HP_CDEV_OPEN,
+};
+
+/**
+ * struct ipc_pipe - Structure for Pipe.
+ * @tdr_start:			Ipc private protocol Transfer Descriptor Ring
+ * @channel:			Id of the sio device, set by imem_sio_open,
+ *				needed to pass DL char to the user terminal
+ * @skbr_start:			Circular buffer for skbuf and the buffer
+ *				reference in a tdr_start entry.
+ * @phy_tdr_start:		Transfer descriptor start address
+ * @old_head:			last head pointer reported to CP.
+ * @old_tail:			AP read position before CP moves the read
+ *				position to write/head. If CP has consumed the
+ *				buffers, AP has to freed the skbuf starting at
+ *				tdr_start[old_tail].
+ * @nr_of_entries:		Number of elements of skb_start and tdr_start.
+ * @max_nr_of_queued_entries:	Maximum number of queued entries in TDR
+ * @accumulation_backoff:	Accumulation in usec for accumulation
+ *				backoff (0 = no acc backoff)
+ * @irq_moderation:		timer in usec for irq_moderation
+ *				(0=no irq moderation)
+ * @pipe_nr:			Pipe identification number
+ * @irq:			Interrupt vector
+ * @dir:			Direction of data stream in pipe
+ * @td_tag:			Unique tag of the buffer queued
+ * @buf_size:			Buffer size (in bytes) for preallocated
+ *				buffers (for DL pipes)
+ * @nr_of_queued_entries:	Aueued number of entries
+ * @is_open:			Check for open pipe status
+ */
+struct ipc_pipe {
+	struct ipc_protocol_td *tdr_start;
+	struct ipc_mem_channel *channel;
+	struct sk_buff **skbr_start;
+	dma_addr_t phy_tdr_start;
+	u32 old_head;
+	u32 old_tail;
+	u32 nr_of_entries;
+	u32 max_nr_of_queued_entries;
+	u32 accumulation_backoff;
+	u32 irq_moderation;
+	u32 pipe_nr;
+	u32 irq;
+	enum ipc_mem_pipe_dir dir;
+	u32 td_tag;
+	u32 buf_size;
+	u16 nr_of_queued_entries;
+	u8 is_open:1;
+};
+
+/**
+ * struct ipc_mem_channel - Structure for Channel.
+ * @channel_id:		Instance of the channel list and is return to the user
+ *			at the end of the open operation.
+ * @ctype:		Control or netif channel.
+ * @index:		unique index per ctype
+ * @ul_pipe:		pipe objects
+ * @dl_pipe:		pipe objects
+ * @if_id:		Interface ID
+ * @net_err_count:	Number of downlink errors returned by ipc_wwan_receive
+ *			interface at the entry point of the IP stack.
+ * @state:		Free, reserved or busy (in use).
+ * @ul_sem:		Needed for the blocking write or uplink transfer.
+ * @ul_list:		Uplink accumulator which is filled by the uplink
+ *			char app or IP stack. The socket buffer pointer are
+ *			added to the descriptor list in the kthread context.
+ */
+struct ipc_mem_channel {
+	int channel_id;
+	enum ipc_ctype ctype;
+	int index;
+	struct ipc_pipe ul_pipe;
+	struct ipc_pipe dl_pipe;
+	int if_id;
+	u32 net_err_count;
+	enum ipc_channel_state state;
+	struct completion ul_sem;
+	struct sk_buff_head ul_list;
+};
+
+/**
+ * enum ipc_phase - Different AP and CP phases.
+ *		    The enums defined after "IPC_P_ROM" and before
+ *		    "IPC_P_RUN" indicates the operating state where CP can
+ *		    respond to any requests. So while introducing new phase
+ *		    this shall be taken into consideration.
+ * @IPC_P_OFF:		On host PC, the PCIe device link settings are known
+ *			about the combined power on. PC is running, the driver
+ *			is loaded and CP is in power off mode. The PCIe bus
+ *			driver call the device power mode D3hot. In this phase
+ *			the driver the polls the device, until the device is in
+ *			the power on state and signals the power mode D0.
+ * @IPC_P_OFF_REQ:	The intermediate phase between cleanup activity starts
+ *			and ends.
+ * @IPC_P_CRASH:	The phase indicating CP crash
+ * @IPC_P_CD_READY:	The phase indicating CP core dump is ready
+ * @IPC_P_ROM:		After power on, CP starts in ROM mode and the IPC ROM
+ *			driver is waiting 150 ms for the AP active notification
+ *			saved in the PCI link status register.
+ * @IPC_P_PSI:		Primary signed image download phase
+ * @IPC_P_EBL:		Extended bootloader pahse
+ * @IPC_P_RUN:		The phase after flashing to RAM is the RUNTIME phase.
+ */
+enum ipc_phase {
+	IPC_P_OFF,
+	IPC_P_OFF_REQ,
+	IPC_P_CRASH,
+	IPC_P_CD_READY,
+	IPC_P_ROM,
+	IPC_P_PSI,
+	IPC_P_EBL,
+	IPC_P_RUN,
+};
+
+/**
+ * struct iosm_imem - Current state of the IPC shared memory.
+ * @mmio:			mmio instance to access CP MMIO area /
+ *				doorbell scratchpad.
+ * @ipc_protocol:		IPC Protocol instance
+ * @ipc_task:			Task for entry into ipc task queue
+ * @wwan:			WWAN device pointer
+ * @mux:			IP Data multiplexing state.
+ * @sio:			IPC SIO data structure pointer
+ * @ipc_port:			IPC PORT data structure pointer
+ * @pcie:			IPC PCIe
+ * @dev:			Pointer to device structure
+ * @flash_channel_id:		Reserved channel id for flashing to RAM.
+ * @ipc_requested_state:	Expected IPC state on CP.
+ * @channels:			Channel list with UL/DL pipe pairs.
+ * @ipc_status:			local ipc_status
+ * @nr_of_channels:		number of configured channels
+ * @startup_timer:		startup timer for NAND support.
+ * @hrtimer_period:		Hr timer period
+ * @tdupdate_timer:		Delay the TD update doorbell.
+ * @fast_update_timer:		forced head pointer update delay timer.
+ * @td_alloc_timer:		Timer for DL pipe TD allocation retry
+ * @rom_exit_code:		Mapped boot rom exit code.
+ * @enter_runtime:		1 means the transition to runtime phase was
+ *				executed.
+ * @ul_pend_sem:		Semaphore to wait/complete of UL TDs
+ *				before closing pipe.
+ * @app_notify_ul_pend:		Signal app if UL TD is pending
+ * @dl_pend_sem:		Semaphore to wait/complete of DL TDs
+ *				before closing pipe.
+ * @app_notify_dl_pend:		Signal app if DL TD is pending
+ * @phase:			Operating phase like runtime.
+ * @pci_device_id:		Device ID
+ * @cp_version:			CP version
+ * @device_sleep:		Device sleep state
+ * @run_state_worker:		Pointer to worker component for device
+ *				setup operations to be called when modem
+ *				reaches RUN state
+ * @ev_irq_pending:		0 means inform the IPC tasklet to
+ *				process the irq actions.
+ * @flag:			Flag to monitor the state of driver
+ * @td_update_timer_suspended:	if true then td update timer suspend
+ * @ev_cdev_write_pending:	0 means inform the IPC tasklet to pass
+ *				the accumulated uplink buffers to CP.
+ * @ev_mux_net_transmit_pending:0 means inform the IPC tasklet to pass
+ * @reset_det_n:		Reset detect flag
+ * @pcie_wake_n:		Pcie wake flag
+ */
+struct iosm_imem {
+	struct iosm_mmio *mmio;
+	struct iosm_protocol *ipc_protocol;
+	struct ipc_task *ipc_task;
+	struct iosm_wwan *wwan;
+	struct iosm_mux *mux;
+	struct iosm_cdev *ipc_port[IPC_MEM_MAX_CHANNELS];
+	struct iosm_pcie *pcie;
+	struct device *dev;
+	int flash_channel_id;
+	enum ipc_mem_device_ipc_state ipc_requested_state;
+	struct ipc_mem_channel channels[IPC_MEM_MAX_CHANNELS];
+	u32 ipc_status;
+	u32 nr_of_channels;
+	struct hrtimer startup_timer;
+	ktime_t hrtimer_period;
+	struct hrtimer tdupdate_timer;
+	struct hrtimer fast_update_timer;
+	struct hrtimer td_alloc_timer;
+	enum rom_exit_code rom_exit_code;
+	u32 enter_runtime;
+	struct completion ul_pend_sem;
+	u32 app_notify_ul_pend;
+	struct completion dl_pend_sem;
+	u32 app_notify_dl_pend;
+	enum ipc_phase phase;
+	u16 pci_device_id;
+	int cp_version;
+	int device_sleep;
+	struct work_struct run_state_worker;
+	u8 ev_irq_pending[IPC_IRQ_VECTORS];
+	unsigned long flag;
+	u8 td_update_timer_suspended:1,
+	   ev_cdev_write_pending:1,
+	   ev_mux_net_transmit_pending:1,
+	   reset_det_n:1,
+	   pcie_wake_n:1;
+};
+
+/**
+ * ipc_imem_init - Initialize the shared memory region
+ * @pcie:	Pointer to core driver data-struct
+ * @device_id:	PCI device ID
+ * @mmio:	Pointer to the mmio area
+ * @dev:	Pointer to device structure
+ *
+ * Returns:  Initialized imem pointer on success else NULL
+ */
+struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id,
+				void __iomem *mmio, struct device *dev);
+
+/**
+ * ipc_imem_pm_s2idle_sleep - Set PM variables to sleep/active for
+ *			      s2idle sleep/active
+ * @ipc_imem:	Pointer to imem data-struct
+ * @sleep:	Set PM Variable to sleep/active
+ */
+void ipc_imem_pm_s2idle_sleep(struct iosm_imem *ipc_imem, bool sleep);
+
+/**
+ * ipc_imem_pm_suspend - The HAL shall ask the shared memory layer
+ *			 whether D3 is allowed.
+ * @ipc_imem:	Pointer to imem data-struct
+ */
+void ipc_imem_pm_suspend(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_pm_resume - The HAL shall inform the shared memory layer
+ *			that the device is active.
+ * @ipc_imem:	Pointer to imem data-struct
+ */
+void ipc_imem_pm_resume(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_cleanup -	Inform CP and free the shared memory resources.
+ * @ipc_imem:	Pointer to imem data-struct
+ */
+void ipc_imem_cleanup(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_irq_process - Shift the IRQ actions to the IPC thread.
+ * @ipc_imem:	Pointer to imem data-struct
+ * @irq:	Irq number
+ */
+void ipc_imem_irq_process(struct iosm_imem *ipc_imem, int irq);
+
+/**
+ * imem_get_device_sleep_state - Get the device sleep state value.
+ * @ipc_imem:	Pointer to imem instance
+ *
+ * Returns: device sleep state
+ */
+int imem_get_device_sleep_state(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_td_update_timer_suspend - Updates the TD Update Timer suspend flag.
+ * @ipc_imem:	Pointer to imem data-struct
+ * @suspend:	Flag to update. If TRUE then HP update doorbell is triggered to
+ *		device without any wait. If FALSE then HP update doorbell is
+ *		delayed until timeout.
+ */
+void ipc_imem_td_update_timer_suspend(struct iosm_imem *ipc_imem, bool suspend);
+
+/**
+ * ipc_imem_channel_close - Release the channel resources.
+ * @ipc_imem:		Pointer to imem data-struct
+ * @channel_id:		Channel ID to be cleaned up.
+ */
+void ipc_imem_channel_close(struct iosm_imem *ipc_imem, int channel_id);
+
+/**
+ * ipc_imem_channel_alloc - Reserves a channel
+ * @ipc_imem:	Pointer to imem data-struct
+ * @index:	ID to lookup from the preallocated list.
+ * @ctype:	Channel type.
+ *
+ * Returns: Index on success and failure value on error
+ */
+int ipc_imem_channel_alloc(struct iosm_imem *ipc_imem, int index,
+			   enum ipc_ctype ctype);
+
+/**
+ * ipc_imem_channel_open - Establish the pipes.
+ * @ipc_imem:		Pointer to imem data-struct
+ * @channel_id:		Channel ID returned during alloc.
+ * @db_id:		Doorbell ID for trigger identifier.
+ *
+ * Returns: Pointer of ipc_mem_channel on success and NULL on failure.
+ */
+struct ipc_mem_channel *ipc_imem_channel_open(struct iosm_imem *ipc_imem,
+					      int channel_id, u32 db_id);
+
+/**
+ * ipc_imem_td_update_timer_start - Starts the TD Update Timer if not running.
+ * @ipc_imem:	Pointer to imem data-struct
+ */
+void ipc_imem_td_update_timer_start(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_ul_write_td - Pass the channel UL list to protocol layer for TD
+ *		      preparation and sending them to the device.
+ * @ipc_imem:	Pointer to imem data-struct
+ *
+ * Returns: TRUE of HP Doorbell trigger is pending. FALSE otherwise.
+ */
+bool ipc_imem_ul_write_td(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_ul_send - Dequeue SKB from channel list and start with
+ *		  the uplink transfer.If HP Doorbell is pending to be
+ *		  triggered then starts the TD Update Timer.
+ * @ipc_imem:	Pointer to imem data-struct
+ */
+void ipc_imem_ul_send(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_channel_update - Set or modify pipe config of an existing channel
+ * @ipc_imem:		Pointer to imem data-struct
+ * @id:			Channel config index
+ * @chnl_cfg:		Channel config struct
+ * @irq_moderation:	Timer in usec for irq_moderation
+ */
+void ipc_imem_channel_update(struct iosm_imem *ipc_imem, int id,
+			     struct ipc_chnl_cfg chnl_cfg, u32 irq_moderation);
+
+/**
+ * ipc_imem_channel_free -Free an IPC channel.
+ * @channel:	Channel to be freed
+ */
+void ipc_imem_channel_free(struct ipc_mem_channel *channel);
+
+/**
+ * ipc_imem_hrtimer_stop - Stop the hrtimer
+ * @hr_timer:	Pointer to hrtimer instance
+ */
+void ipc_imem_hrtimer_stop(struct hrtimer *hr_timer);
+
+/**
+ * ipc_imem_pipe_cleanup - Reset volatile pipe content for all channels
+ * @ipc_imem:	Pointer to imem data-struct
+ * @pipe:	Pipe to cleaned up
+ */
+void ipc_imem_pipe_cleanup(struct iosm_imem *ipc_imem, struct ipc_pipe *pipe);
+
+/**
+ * ipc_imem_pipe_close - Send msg to device to close pipe
+ * @ipc_imem:	Pointer to imem data-struct
+ * @pipe:	Pipe to be closed
+ */
+void ipc_imem_pipe_close(struct iosm_imem *ipc_imem, struct ipc_pipe *pipe);
+
+/**
+ * ipc_imem_phase_update - Get the CP execution state
+ *			  and map it to the AP phase.
+ * @ipc_imem:	Pointer to imem data-struct
+ *
+ * Returns: Current ap updated phase
+ */
+enum ipc_phase ipc_imem_phase_update(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_phase_get_string - Return the current operation
+ *			     phase as string.
+ * @phase:	AP phase
+ *
+ * Returns: AP phase string
+ */
+const char *ipc_imem_phase_get_string(enum ipc_phase phase);
+
+/**
+ * ipc_imem_msg_send_feature_set - Send feature set message to modem
+ * @ipc_imem:		Pointer to imem data-struct
+ * @reset_enable:	0 = out-of-band, 1 = in-band-crash notification
+ * @atomic_ctx:		if disabled call in tasklet context
+ *
+ */
+void ipc_imem_msg_send_feature_set(struct iosm_imem *ipc_imem,
+				   unsigned int reset_enable, bool atomic_ctx);
+
+/**
+ * ipc_imem_ipc_init_check - Send the init event to CP, wait a certain time and
+ *			     set CP to runtime with the context information
+ * @ipc_imem:	Pointer to imem data-struct
+ */
+void ipc_imem_ipc_init_check(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_imem_channel_init - Initialize the channel list with UL/DL pipe pairs.
+ * @ipc_imem:		Pointer to imem data-struct
+ * @ctype:		Channel type
+ * @chnl_cfg:		Channel configuration struct
+ * @irq_moderation:	Timer in usec for irq_moderation
+ */
+void ipc_imem_channel_init(struct iosm_imem *ipc_imem, enum ipc_ctype ctype,
+			   struct ipc_chnl_cfg chnl_cfg, u32 irq_moderation);
+#endif

From edf6423c04037040c7e0549fdebc903d68979515 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:12 +0530
Subject: [PATCH 1604/3292] net: iosm: shared memory I/O operations

1) Binds logical channel between host-device for communication.
2) Implements device specific(Char/Net) IO operations.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 346 ++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_imem_ops.h |  98 ++++++
 2 files changed, 444 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_imem_ops.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_imem_ops.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c
new file mode 100644
index 000000000000..46f76e8aae92
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/delay.h>
+
+#include "iosm_ipc_chnl_cfg.h"
+#include "iosm_ipc_imem.h"
+#include "iosm_ipc_imem_ops.h"
+#include "iosm_ipc_port.h"
+#include "iosm_ipc_task_queue.h"
+
+/* Open a packet data online channel between the network layer and CP. */
+int ipc_imem_sys_wwan_open(struct iosm_imem *ipc_imem, int if_id)
+{
+	dev_dbg(ipc_imem->dev, "%s if id: %d",
+		ipc_imem_phase_get_string(ipc_imem->phase), if_id);
+
+	/* The network interface is only supported in the runtime phase. */
+	if (ipc_imem_phase_update(ipc_imem) != IPC_P_RUN) {
+		dev_err(ipc_imem->dev, "net:%d : refused phase %s", if_id,
+			ipc_imem_phase_get_string(ipc_imem->phase));
+		return -EIO;
+	}
+
+	/* check for the interafce id
+	 * if if_id 1 to 8 then create IP MUX channel sessions.
+	 * To start MUX session from 0 as network interface id would start
+	 * from 1 so map it to if_id = if_id - 1
+	 */
+	if (if_id >= IP_MUX_SESSION_START && if_id <= IP_MUX_SESSION_END)
+		return ipc_mux_open_session(ipc_imem->mux, if_id - 1);
+
+	return -EINVAL;
+}
+
+/* Release a net link to CP. */
+void ipc_imem_sys_wwan_close(struct iosm_imem *ipc_imem, int if_id,
+			     int channel_id)
+{
+	if (ipc_imem->mux && if_id >= IP_MUX_SESSION_START &&
+	    if_id <= IP_MUX_SESSION_END)
+		ipc_mux_close_session(ipc_imem->mux, if_id - 1);
+}
+
+/* Tasklet call to do uplink transfer. */
+static int ipc_imem_tq_cdev_write(struct iosm_imem *ipc_imem, int arg,
+				  void *msg, size_t size)
+{
+	ipc_imem->ev_cdev_write_pending = false;
+	ipc_imem_ul_send(ipc_imem);
+
+	return 0;
+}
+
+/* Through tasklet to do sio write. */
+static int ipc_imem_call_cdev_write(struct iosm_imem *ipc_imem)
+{
+	if (ipc_imem->ev_cdev_write_pending)
+		return -1;
+
+	ipc_imem->ev_cdev_write_pending = true;
+
+	return ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_cdev_write, 0,
+					NULL, 0, false);
+}
+
+/* Function for transfer UL data */
+int ipc_imem_sys_wwan_transmit(struct iosm_imem *ipc_imem,
+			       int if_id, int channel_id, struct sk_buff *skb)
+{
+	int ret = -EINVAL;
+
+	if (!ipc_imem || channel_id < 0)
+		goto out;
+
+	/* Is CP Running? */
+	if (ipc_imem->phase != IPC_P_RUN) {
+		dev_dbg(ipc_imem->dev, "phase %s transmit",
+			ipc_imem_phase_get_string(ipc_imem->phase));
+		ret = -EIO;
+		goto out;
+	}
+
+	if (if_id >= IP_MUX_SESSION_START && if_id <= IP_MUX_SESSION_END)
+		/* Route the UL packet through IP MUX Layer */
+		ret = ipc_mux_ul_trigger_encode(ipc_imem->mux,
+						if_id - 1, skb);
+	else
+		dev_err(ipc_imem->dev,
+			"invalid if_id %d: ", if_id);
+out:
+	return ret;
+}
+
+/* Initialize wwan channel */
+void ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
+				enum ipc_mux_protocol mux_type)
+{
+	struct ipc_chnl_cfg chnl_cfg = { 0 };
+
+	ipc_imem->cp_version = ipc_mmio_get_cp_version(ipc_imem->mmio);
+
+	/* If modem version is invalid (0xffffffff), do not initialize WWAN. */
+	if (ipc_imem->cp_version == -1) {
+		dev_err(ipc_imem->dev, "invalid CP version");
+		return;
+	}
+
+	ipc_chnl_cfg_get(&chnl_cfg, ipc_imem->nr_of_channels);
+	ipc_imem_channel_init(ipc_imem, IPC_CTYPE_WWAN, chnl_cfg,
+			      IRQ_MOD_OFF);
+
+	/* WWAN registration. */
+	ipc_imem->wwan = ipc_wwan_init(ipc_imem, ipc_imem->dev);
+	if (!ipc_imem->wwan)
+		dev_err(ipc_imem->dev,
+			"failed to register the ipc_wwan interfaces");
+}
+
+/* Map SKB to DMA for transfer */
+static int ipc_imem_map_skb_to_dma(struct iosm_imem *ipc_imem,
+				   struct sk_buff *skb)
+{
+	struct iosm_pcie *ipc_pcie = ipc_imem->pcie;
+	char *buf = skb->data;
+	int len = skb->len;
+	dma_addr_t mapping;
+	int ret;
+
+	ret = ipc_pcie_addr_map(ipc_pcie, buf, len, &mapping, DMA_TO_DEVICE);
+
+	if (ret)
+		goto err;
+
+	BUILD_BUG_ON(sizeof(*IPC_CB(skb)) > sizeof(skb->cb));
+
+	IPC_CB(skb)->mapping = mapping;
+	IPC_CB(skb)->direction = DMA_TO_DEVICE;
+	IPC_CB(skb)->len = len;
+	IPC_CB(skb)->op_type = (u8)UL_DEFAULT;
+
+err:
+	return ret;
+}
+
+/* return true if channel is ready for use */
+static bool ipc_imem_is_channel_active(struct iosm_imem *ipc_imem,
+				       struct ipc_mem_channel *channel)
+{
+	enum ipc_phase phase;
+
+	/* Update the current operation phase. */
+	phase = ipc_imem->phase;
+
+	/* Select the operation depending on the execution stage. */
+	switch (phase) {
+	case IPC_P_RUN:
+	case IPC_P_PSI:
+	case IPC_P_EBL:
+		break;
+
+	case IPC_P_ROM:
+		/* Prepare the PSI image for the CP ROM driver and
+		 * suspend the flash app.
+		 */
+		if (channel->state != IMEM_CHANNEL_RESERVED) {
+			dev_err(ipc_imem->dev,
+				"ch[%d]:invalid channel state %d,expected %d",
+				channel->channel_id, channel->state,
+				IMEM_CHANNEL_RESERVED);
+			goto channel_unavailable;
+		}
+		goto channel_available;
+
+	default:
+		/* Ignore uplink actions in all other phases. */
+		dev_err(ipc_imem->dev, "ch[%d]: confused phase %d",
+			channel->channel_id, phase);
+		goto channel_unavailable;
+	}
+	/* Check the full availability of the channel. */
+	if (channel->state != IMEM_CHANNEL_ACTIVE) {
+		dev_err(ipc_imem->dev, "ch[%d]: confused channel state %d",
+			channel->channel_id, channel->state);
+		goto channel_unavailable;
+	}
+
+channel_available:
+	return true;
+
+channel_unavailable:
+	return false;
+}
+
+/* Release a sio link to CP. */
+void ipc_imem_sys_cdev_close(struct iosm_cdev *ipc_cdev)
+{
+	struct iosm_imem *ipc_imem = ipc_cdev->ipc_imem;
+	struct ipc_mem_channel *channel = ipc_cdev->channel;
+	enum ipc_phase curr_phase;
+	int status = 0;
+	u32 tail = 0;
+
+	curr_phase = ipc_imem->phase;
+
+	/* If current phase is IPC_P_OFF or SIO ID is -ve then
+	 * channel is already freed. Nothing to do.
+	 */
+	if (curr_phase == IPC_P_OFF) {
+		dev_err(ipc_imem->dev,
+			"nothing to do. Current Phase: %s",
+			ipc_imem_phase_get_string(curr_phase));
+		return;
+	}
+
+	if (channel->state == IMEM_CHANNEL_FREE) {
+		dev_err(ipc_imem->dev, "ch[%d]: invalid channel state %d",
+			channel->channel_id, channel->state);
+		return;
+	}
+
+	/* If there are any pending TDs then wait for Timeout/Completion before
+	 * closing pipe.
+	 */
+	if (channel->ul_pipe.old_tail != channel->ul_pipe.old_head) {
+		ipc_imem->app_notify_ul_pend = 1;
+
+		/* Suspend the user app and wait a certain time for processing
+		 * UL Data.
+		 */
+		status = wait_for_completion_interruptible_timeout
+			 (&ipc_imem->ul_pend_sem,
+			  msecs_to_jiffies(IPC_PEND_DATA_TIMEOUT));
+		if (status == 0) {
+			dev_dbg(ipc_imem->dev,
+				"Pend data Timeout UL-Pipe:%d Head:%d Tail:%d",
+				channel->ul_pipe.pipe_nr,
+				channel->ul_pipe.old_head,
+				channel->ul_pipe.old_tail);
+		}
+
+		ipc_imem->app_notify_ul_pend = 0;
+	}
+
+	/* If there are any pending TDs then wait for Timeout/Completion before
+	 * closing pipe.
+	 */
+	ipc_protocol_get_head_tail_index(ipc_imem->ipc_protocol,
+					 &channel->dl_pipe, NULL, &tail);
+
+	if (tail != channel->dl_pipe.old_tail) {
+		ipc_imem->app_notify_dl_pend = 1;
+
+		/* Suspend the user app and wait a certain time for processing
+		 * DL Data.
+		 */
+		status = wait_for_completion_interruptible_timeout
+			 (&ipc_imem->dl_pend_sem,
+			  msecs_to_jiffies(IPC_PEND_DATA_TIMEOUT));
+		if (status == 0) {
+			dev_dbg(ipc_imem->dev,
+				"Pend data Timeout DL-Pipe:%d Head:%d Tail:%d",
+				channel->dl_pipe.pipe_nr,
+				channel->dl_pipe.old_head,
+				channel->dl_pipe.old_tail);
+		}
+
+		ipc_imem->app_notify_dl_pend = 0;
+	}
+
+	/* Due to wait for completion in messages, there is a small window
+	 * between closing the pipe and updating the channel is closed. In this
+	 * small window there could be HP update from Host Driver. Hence update
+	 * the channel state as CLOSING to aviod unnecessary interrupt
+	 * towards CP.
+	 */
+	channel->state = IMEM_CHANNEL_CLOSING;
+
+	ipc_imem_pipe_close(ipc_imem, &channel->ul_pipe);
+	ipc_imem_pipe_close(ipc_imem, &channel->dl_pipe);
+
+	ipc_imem_channel_free(channel);
+}
+
+/* Open a PORT link to CP and return the channel */
+struct ipc_mem_channel *ipc_imem_sys_port_open(struct iosm_imem *ipc_imem,
+					       int chl_id, int hp_id)
+{
+	struct ipc_mem_channel *channel;
+	int ch_id;
+
+	/* The PORT interface is only supported in the runtime phase. */
+	if (ipc_imem_phase_update(ipc_imem) != IPC_P_RUN) {
+		dev_err(ipc_imem->dev, "PORT open refused, phase %s",
+			ipc_imem_phase_get_string(ipc_imem->phase));
+		return NULL;
+	}
+
+	ch_id = ipc_imem_channel_alloc(ipc_imem, chl_id, IPC_CTYPE_CTRL);
+
+	if (ch_id < 0) {
+		dev_err(ipc_imem->dev, "reservation of an PORT chnl id failed");
+		return NULL;
+	}
+
+	channel = ipc_imem_channel_open(ipc_imem, ch_id, hp_id);
+
+	if (!channel) {
+		dev_err(ipc_imem->dev, "PORT channel id open failed");
+		return NULL;
+	}
+
+	return channel;
+}
+
+/* transfer skb to modem */
+int ipc_imem_sys_cdev_write(struct iosm_cdev *ipc_cdev, struct sk_buff *skb)
+{
+	struct ipc_mem_channel *channel = ipc_cdev->channel;
+	struct iosm_imem *ipc_imem = ipc_cdev->ipc_imem;
+	int ret = -EIO;
+
+	if (!ipc_imem_is_channel_active(ipc_imem, channel) ||
+	    ipc_imem->phase == IPC_P_OFF_REQ)
+		goto out;
+
+	ret = ipc_imem_map_skb_to_dma(ipc_imem, skb);
+
+	if (ret)
+		goto out;
+
+	/* Add skb to the uplink skbuf accumulator. */
+	skb_queue_tail(&channel->ul_list, skb);
+
+	ret = ipc_imem_call_cdev_write(ipc_imem);
+
+	if (ret) {
+		skb_dequeue_tail(&channel->ul_list);
+		dev_err(ipc_cdev->dev, "channel id[%d] write failed\n",
+			ipc_cdev->channel->channel_id);
+	}
+out:
+	return ret;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
new file mode 100644
index 000000000000..84087cf33329
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_IMEM_OPS_H
+#define IOSM_IPC_IMEM_OPS_H
+
+#include "iosm_ipc_mux_codec.h"
+
+/* Maximum wait time for blocking read */
+#define IPC_READ_TIMEOUT 500
+
+/* The delay in ms for defering the unregister */
+#define SIO_UNREGISTER_DEFER_DELAY_MS 1
+
+/* Default delay till CP PSI image is running and modem updates the
+ * execution stage.
+ * unit : milliseconds
+ */
+#define PSI_START_DEFAULT_TIMEOUT 3000
+
+/* Default time out when closing SIO, till the modem is in
+ * running state.
+ * unit : milliseconds
+ */
+#define BOOT_CHECK_DEFAULT_TIMEOUT 400
+
+/* IP MUX channel range */
+#define IP_MUX_SESSION_START 1
+#define IP_MUX_SESSION_END 8
+
+/**
+ * ipc_imem_sys_port_open - Open a port link to CP.
+ * @ipc_imem:	Imem instance.
+ * @chl_id:	Channel Indentifier.
+ * @hp_id:	HP Indentifier.
+ *
+ * Return: channel instance on success, NULL for failure
+ */
+struct ipc_mem_channel *ipc_imem_sys_port_open(struct iosm_imem *ipc_imem,
+					       int chl_id, int hp_id);
+
+/**
+ * ipc_imem_sys_cdev_close - Release a sio link to CP.
+ * @ipc_cdev:		iosm sio instance.
+ */
+void ipc_imem_sys_cdev_close(struct iosm_cdev *ipc_cdev);
+
+/**
+ * ipc_imem_sys_cdev_write - Route the uplink buffer to CP.
+ * @ipc_cdev:		iosm_cdev instance.
+ * @skb:		Pointer to skb.
+ *
+ * Return: 0 on success and failure value on error
+ */
+int ipc_imem_sys_cdev_write(struct iosm_cdev *ipc_cdev, struct sk_buff *skb);
+
+/**
+ * ipc_imem_sys_wwan_open - Open packet data online channel between network
+ *			layer and CP.
+ * @ipc_imem:		Imem instance.
+ * @if_id:		ip link tag of the net device.
+ *
+ * Return: Channel ID on success and failure value on error
+ */
+int ipc_imem_sys_wwan_open(struct iosm_imem *ipc_imem, int if_id);
+
+/**
+ * ipc_imem_sys_wwan_close - Close packet data online channel between network
+ *			 layer and CP.
+ * @ipc_imem:		Imem instance.
+ * @if_id:		IP link id net device.
+ * @channel_id:		Channel ID to be closed.
+ */
+void ipc_imem_sys_wwan_close(struct iosm_imem *ipc_imem, int if_id,
+			     int channel_id);
+
+/**
+ * ipc_imem_sys_wwan_transmit - Function for transfer UL data
+ * @ipc_imem:		Imem instance.
+ * @if_id:		link ID of the device.
+ * @channel_id:		Channel ID used
+ * @skb:		Pointer to sk buffer
+ *
+ * Return: 0 on success and failure value on error
+ */
+int ipc_imem_sys_wwan_transmit(struct iosm_imem *ipc_imem, int if_id,
+			       int channel_id, struct sk_buff *skb);
+/**
+ * ipc_imem_wwan_channel_init - Initializes WWAN channels and the channel for
+ *				MUX.
+ * @ipc_imem:		Pointer to iosm_imem struct.
+ * @mux_type:		Type of mux protocol.
+ */
+void ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
+				enum ipc_mux_protocol mux_type);
+#endif

From 30ebda7a313d1b45ea64311d8dbb12ff3961bb80 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:13 +0530
Subject: [PATCH 1605/3292] net: iosm: channel configuration

Defines pipes & channel configurations like channel type,
pipe mappings, No. of transfer descriptors and transfer
buffer size etc.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c | 88 +++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.h | 59 +++++++++++++++
 2 files changed, 147 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c
new file mode 100644
index 000000000000..804e6c4f2c78
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/wwan.h>
+
+#include "iosm_ipc_chnl_cfg.h"
+
+/* Max. sizes of a downlink buffers */
+#define IPC_MEM_MAX_DL_FLASH_BUF_SIZE (16 * 1024)
+#define IPC_MEM_MAX_DL_LOOPBACK_SIZE (1 * 1024 * 1024)
+#define IPC_MEM_MAX_DL_AT_BUF_SIZE 2048
+#define IPC_MEM_MAX_DL_RPC_BUF_SIZE (32 * 1024)
+#define IPC_MEM_MAX_DL_MBIM_BUF_SIZE IPC_MEM_MAX_DL_RPC_BUF_SIZE
+
+/* Max. transfer descriptors for a pipe. */
+#define IPC_MEM_MAX_TDS_FLASH_DL 3
+#define IPC_MEM_MAX_TDS_FLASH_UL 6
+#define IPC_MEM_MAX_TDS_AT 4
+#define IPC_MEM_MAX_TDS_RPC 4
+#define IPC_MEM_MAX_TDS_MBIM IPC_MEM_MAX_TDS_RPC
+#define IPC_MEM_MAX_TDS_LOOPBACK 11
+
+/* Accumulation backoff usec */
+#define IRQ_ACC_BACKOFF_OFF 0
+
+/* MUX acc backoff 1ms */
+#define IRQ_ACC_BACKOFF_MUX 1000
+
+/* Modem channel configuration table
+ * Always reserve element zero for flash channel.
+ */
+static struct ipc_chnl_cfg modem_cfg[] = {
+	/* IP Mux */
+	{ IPC_MEM_IP_CHL_ID_0, IPC_MEM_PIPE_0, IPC_MEM_PIPE_1,
+	  IPC_MEM_MAX_TDS_MUX_LITE_UL, IPC_MEM_MAX_TDS_MUX_LITE_DL,
+	  IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE, WWAN_PORT_UNKNOWN },
+	/* RPC - 0 */
+	{ IPC_MEM_CTRL_CHL_ID_1, IPC_MEM_PIPE_2, IPC_MEM_PIPE_3,
+	  IPC_MEM_MAX_TDS_RPC, IPC_MEM_MAX_TDS_RPC,
+	  IPC_MEM_MAX_DL_RPC_BUF_SIZE, WWAN_PORT_UNKNOWN },
+	/* IAT0 */
+	{ IPC_MEM_CTRL_CHL_ID_2, IPC_MEM_PIPE_4, IPC_MEM_PIPE_5,
+	  IPC_MEM_MAX_TDS_AT, IPC_MEM_MAX_TDS_AT, IPC_MEM_MAX_DL_AT_BUF_SIZE,
+	  WWAN_PORT_AT },
+	/* Trace */
+	{ IPC_MEM_CTRL_CHL_ID_3, IPC_MEM_PIPE_6, IPC_MEM_PIPE_7,
+	  IPC_MEM_TDS_TRC, IPC_MEM_TDS_TRC, IPC_MEM_MAX_DL_TRC_BUF_SIZE,
+	  WWAN_PORT_UNKNOWN },
+	/* IAT1 */
+	{ IPC_MEM_CTRL_CHL_ID_4, IPC_MEM_PIPE_8, IPC_MEM_PIPE_9,
+	  IPC_MEM_MAX_TDS_AT, IPC_MEM_MAX_TDS_AT, IPC_MEM_MAX_DL_AT_BUF_SIZE,
+	  WWAN_PORT_AT },
+	/* Loopback */
+	{ IPC_MEM_CTRL_CHL_ID_5, IPC_MEM_PIPE_10, IPC_MEM_PIPE_11,
+	  IPC_MEM_MAX_TDS_LOOPBACK, IPC_MEM_MAX_TDS_LOOPBACK,
+	  IPC_MEM_MAX_DL_LOOPBACK_SIZE, WWAN_PORT_UNKNOWN },
+	/* MBIM Channel */
+	{ IPC_MEM_CTRL_CHL_ID_6, IPC_MEM_PIPE_12, IPC_MEM_PIPE_13,
+	  IPC_MEM_MAX_TDS_MBIM, IPC_MEM_MAX_TDS_MBIM,
+	  IPC_MEM_MAX_DL_MBIM_BUF_SIZE, WWAN_PORT_MBIM },
+};
+
+int ipc_chnl_cfg_get(struct ipc_chnl_cfg *chnl_cfg, int index)
+{
+	int array_size = ARRAY_SIZE(modem_cfg);
+
+	if (index >= array_size) {
+		pr_err("index: %d and array_size %d", index, array_size);
+		return -ECHRNG;
+	}
+
+	if (index == IPC_MEM_MUX_IP_CH_IF_ID)
+		chnl_cfg->accumulation_backoff = IRQ_ACC_BACKOFF_MUX;
+	else
+		chnl_cfg->accumulation_backoff = IRQ_ACC_BACKOFF_OFF;
+
+	chnl_cfg->ul_nr_of_entries = modem_cfg[index].ul_nr_of_entries;
+	chnl_cfg->dl_nr_of_entries = modem_cfg[index].dl_nr_of_entries;
+	chnl_cfg->dl_buf_size = modem_cfg[index].dl_buf_size;
+	chnl_cfg->id = modem_cfg[index].id;
+	chnl_cfg->ul_pipe = modem_cfg[index].ul_pipe;
+	chnl_cfg->dl_pipe = modem_cfg[index].dl_pipe;
+	chnl_cfg->wwan_port_type = modem_cfg[index].wwan_port_type;
+
+	return 0;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.h b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.h
new file mode 100644
index 000000000000..422471367f78
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation
+ */
+
+#ifndef IOSM_IPC_CHNL_CFG_H
+#define IOSM_IPC_CHNL_CFG_H
+
+#include "iosm_ipc_mux.h"
+
+/* Number of TDs on the trace channel */
+#define IPC_MEM_TDS_TRC 32
+
+/* Trace channel TD buffer size. */
+#define IPC_MEM_MAX_DL_TRC_BUF_SIZE 8192
+
+/* Channel ID */
+enum ipc_channel_id {
+	IPC_MEM_IP_CHL_ID_0 = 0,
+	IPC_MEM_CTRL_CHL_ID_1,
+	IPC_MEM_CTRL_CHL_ID_2,
+	IPC_MEM_CTRL_CHL_ID_3,
+	IPC_MEM_CTRL_CHL_ID_4,
+	IPC_MEM_CTRL_CHL_ID_5,
+	IPC_MEM_CTRL_CHL_ID_6,
+};
+
+/**
+ * struct ipc_chnl_cfg - IPC channel configuration structure
+ * @id:				Interface ID
+ * @ul_pipe:			Uplink datastream
+ * @dl_pipe:			Downlink datastream
+ * @ul_nr_of_entries:		Number of Transfer descriptor uplink pipe
+ * @dl_nr_of_entries:		Number of Transfer descriptor downlink pipe
+ * @dl_buf_size:		Downlink buffer size
+ * @wwan_port_type:		Wwan subsystem port type
+ * @accumulation_backoff:	Time in usec for data accumalation
+ */
+struct ipc_chnl_cfg {
+	u32 id;
+	u32 ul_pipe;
+	u32 dl_pipe;
+	u32 ul_nr_of_entries;
+	u32 dl_nr_of_entries;
+	u32 dl_buf_size;
+	u32 wwan_port_type;
+	u32 accumulation_backoff;
+};
+
+/**
+ * ipc_chnl_cfg_get - Get pipe configuration.
+ * @chnl_cfg:		Array of ipc_chnl_cfg struct
+ * @index:		Channel index (upto MAX_CHANNELS)
+ *
+ * Return: 0 on success and failure value on error
+ */
+int ipc_chnl_cfg_get(struct ipc_chnl_cfg *chnl_cfg, int index);
+
+#endif

From 10685b6e9868cdee3c747a6b6fce53332875ed2f Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:14 +0530
Subject: [PATCH 1606/3292] net: iosm: wwan port control device

Implements wwan port for MBIM & AT protocol communication

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_port.c | 85 +++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_port.h | 50 ++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_port.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_port.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_port.c b/drivers/net/wwan/iosm/iosm_ipc_port.c
new file mode 100644
index 000000000000..beb944847398
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_port.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include "iosm_ipc_chnl_cfg.h"
+#include "iosm_ipc_imem_ops.h"
+#include "iosm_ipc_port.h"
+
+/* open logical channel for control communication */
+static int ipc_port_ctrl_start(struct wwan_port *port)
+{
+	struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port);
+	int ret = 0;
+
+	ipc_port->channel = ipc_imem_sys_port_open(ipc_port->ipc_imem,
+						   ipc_port->chl_id,
+						   IPC_HP_CDEV_OPEN);
+	if (!ipc_port->channel)
+		ret = -EIO;
+
+	return ret;
+}
+
+/* close logical channel */
+static void ipc_port_ctrl_stop(struct wwan_port *port)
+{
+	struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port);
+
+	ipc_imem_sys_cdev_close(ipc_port);
+}
+
+/* transfer control data to modem */
+static int ipc_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
+{
+	struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port);
+
+	return ipc_imem_sys_cdev_write(ipc_port, skb);
+}
+
+static const struct wwan_port_ops ipc_wwan_ctrl_ops = {
+	.start = ipc_port_ctrl_start,
+	.stop = ipc_port_ctrl_stop,
+	.tx = ipc_port_ctrl_tx,
+};
+
+/* Port init func */
+struct iosm_cdev *ipc_port_init(struct iosm_imem *ipc_imem,
+				struct ipc_chnl_cfg ipc_port_cfg)
+{
+	struct iosm_cdev *ipc_port = kzalloc(sizeof(*ipc_port), GFP_KERNEL);
+	enum wwan_port_type port_type = ipc_port_cfg.wwan_port_type;
+	enum ipc_channel_id chl_id = ipc_port_cfg.id;
+
+	if (!ipc_port)
+		return NULL;
+
+	ipc_port->dev = ipc_imem->dev;
+	ipc_port->pcie = ipc_imem->pcie;
+
+	ipc_port->port_type = port_type;
+	ipc_port->chl_id = chl_id;
+	ipc_port->ipc_imem = ipc_imem;
+
+	ipc_port->iosm_port = wwan_create_port(ipc_port->dev, port_type,
+					       &ipc_wwan_ctrl_ops, ipc_port);
+
+	return ipc_port;
+}
+
+/* Port deinit func */
+void ipc_port_deinit(struct iosm_cdev *port[])
+{
+	struct iosm_cdev *ipc_port;
+	u8 ctrl_chl_nr;
+
+	for (ctrl_chl_nr = 0; ctrl_chl_nr < IPC_MEM_MAX_CHANNELS;
+	     ctrl_chl_nr++) {
+		if (port[ctrl_chl_nr]) {
+			ipc_port = port[ctrl_chl_nr];
+			wwan_remove_port(ipc_port->iosm_port);
+			kfree(ipc_port);
+		}
+	}
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_port.h b/drivers/net/wwan/iosm/iosm_ipc_port.h
new file mode 100644
index 000000000000..11bc8ed21616
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_port.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PORT_H
+#define IOSM_IPC_PORT_H
+
+#include <linux/wwan.h>
+
+#include "iosm_ipc_imem_ops.h"
+
+/**
+ * struct iosm_cdev - State of the char driver layer.
+ * @iosm_port:		Pointer of type wwan_port
+ * @ipc_imem:		imem instance
+ * @dev:		Pointer to device struct
+ * @pcie:		PCIe component
+ * @port_type:		WWAN port type
+ * @channel:		Channel instance
+ * @chl_id:		Channel Indentifier
+ */
+struct iosm_cdev {
+	struct wwan_port *iosm_port;
+	struct iosm_imem *ipc_imem;
+	struct device *dev;
+	struct iosm_pcie *pcie;
+	enum wwan_port_type port_type;
+	struct ipc_mem_channel *channel;
+	enum ipc_channel_id chl_id;
+};
+
+/**
+ * ipc_port_init - Allocate IPC port & register to wwan subsystem for AT/MBIM
+ *		   communication.
+ * @ipc_imem:		Pointer to iosm_imem structure
+ * @ipc_port_cfg:	IPC Port Config
+ *
+ * Returns: 0 on success & NULL on failure
+ */
+struct iosm_cdev *ipc_port_init(struct iosm_imem *ipc_imem,
+				struct ipc_chnl_cfg ipc_port_cfg);
+
+/**
+ * ipc_port_deinit - Free IPC port & unregister port with wwan subsystem.
+ * @ipc_port:	Array of pointer to the ipc port data-struct
+ */
+void ipc_port_deinit(struct iosm_cdev *ipc_port[]);
+
+#endif

From 3b575260cb863d063669f382458e94ebdae6843d Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:15 +0530
Subject: [PATCH 1607/3292] net: iosm: bottom half

1) Bottom half(tasklet) for IRQ and task processing.
2) Tasks are processed asynchronous and synchronously.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_task_queue.c | 202 ++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_task_queue.h |  97 ++++++++++
 2 files changed, 299 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_task_queue.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_task_queue.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_task_queue.c b/drivers/net/wwan/iosm/iosm_ipc_task_queue.c
new file mode 100644
index 000000000000..852a99166144
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_task_queue.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include "iosm_ipc_imem.h"
+#include "iosm_ipc_task_queue.h"
+
+/* Actual tasklet function, will be called whenever tasklet is scheduled.
+ * Calls event handler involves callback for each element in the message queue
+ */
+static void ipc_task_queue_handler(unsigned long data)
+{
+	struct ipc_task_queue *ipc_task = (struct ipc_task_queue *)data;
+	unsigned int q_rpos = ipc_task->q_rpos;
+
+	/* Loop over the input queue contents. */
+	while (q_rpos != ipc_task->q_wpos) {
+		/* Get the current first queue element. */
+		struct ipc_task_queue_args *args = &ipc_task->args[q_rpos];
+
+		/* Process the input message. */
+		if (args->func)
+			args->response = args->func(args->ipc_imem, args->arg,
+						    args->msg, args->size);
+
+		/* Signal completion for synchronous calls */
+		if (args->completion)
+			complete(args->completion);
+
+		/* Free message if copy was allocated. */
+		if (args->is_copy)
+			kfree(args->msg);
+
+		/* Set invalid queue element. Technically
+		 * spin_lock_irqsave is not required here as
+		 * the array element has been processed already
+		 * so we can assume that immediately after processing
+		 * ipc_task element, queue will not rotate again to
+		 * ipc_task same element within such short time.
+		 */
+		args->completion = NULL;
+		args->func = NULL;
+		args->msg = NULL;
+		args->size = 0;
+		args->is_copy = false;
+
+		/* calculate the new read ptr and update the volatile read
+		 * ptr
+		 */
+		q_rpos = (q_rpos + 1) % IPC_THREAD_QUEUE_SIZE;
+		ipc_task->q_rpos = q_rpos;
+	}
+}
+
+/* Free memory alloc and trigger completions left in the queue during dealloc */
+static void ipc_task_queue_cleanup(struct ipc_task_queue *ipc_task)
+{
+	unsigned int q_rpos = ipc_task->q_rpos;
+
+	while (q_rpos != ipc_task->q_wpos) {
+		struct ipc_task_queue_args *args = &ipc_task->args[q_rpos];
+
+		if (args->completion)
+			complete(args->completion);
+
+		if (args->is_copy)
+			kfree(args->msg);
+
+		q_rpos = (q_rpos + 1) % IPC_THREAD_QUEUE_SIZE;
+		ipc_task->q_rpos = q_rpos;
+	}
+}
+
+/* Add a message to the queue and trigger the ipc_task. */
+static int
+ipc_task_queue_add_task(struct iosm_imem *ipc_imem,
+			int arg, void *msg,
+			int (*func)(struct iosm_imem *ipc_imem, int arg,
+				    void *msg, size_t size),
+			size_t size, bool is_copy, bool wait)
+{
+	struct tasklet_struct *ipc_tasklet = ipc_imem->ipc_task->ipc_tasklet;
+	struct ipc_task_queue *ipc_task = &ipc_imem->ipc_task->ipc_queue;
+	struct completion completion;
+	unsigned int pos, nextpos;
+	unsigned long flags;
+	int result = -EIO;
+
+	init_completion(&completion);
+
+	/* tasklet send may be called from both interrupt or thread
+	 * context, therefore protect queue operation by spinlock
+	 */
+	spin_lock_irqsave(&ipc_task->q_lock, flags);
+
+	pos = ipc_task->q_wpos;
+	nextpos = (pos + 1) % IPC_THREAD_QUEUE_SIZE;
+
+	/* Get next queue position. */
+	if (nextpos != ipc_task->q_rpos) {
+		/* Get the reference to the queue element and save the passed
+		 * values.
+		 */
+		ipc_task->args[pos].arg = arg;
+		ipc_task->args[pos].msg = msg;
+		ipc_task->args[pos].func = func;
+		ipc_task->args[pos].ipc_imem = ipc_imem;
+		ipc_task->args[pos].size = size;
+		ipc_task->args[pos].is_copy = is_copy;
+		ipc_task->args[pos].completion = wait ? &completion : NULL;
+		ipc_task->args[pos].response = -1;
+
+		/* apply write barrier so that ipc_task->q_rpos elements
+		 * are updated before ipc_task->q_wpos is being updated.
+		 */
+		smp_wmb();
+
+		/* Update the status of the free queue space. */
+		ipc_task->q_wpos = nextpos;
+		result = 0;
+	}
+
+	spin_unlock_irqrestore(&ipc_task->q_lock, flags);
+
+	if (result == 0) {
+		tasklet_schedule(ipc_tasklet);
+
+		if (wait) {
+			wait_for_completion(&completion);
+			result = ipc_task->args[pos].response;
+		}
+	} else {
+		dev_err(ipc_imem->ipc_task->dev, "queue is full");
+	}
+
+	return result;
+}
+
+int ipc_task_queue_send_task(struct iosm_imem *imem,
+			     int (*func)(struct iosm_imem *ipc_imem, int arg,
+					 void *msg, size_t size),
+			     int arg, void *msg, size_t size, bool wait)
+{
+	bool is_copy = false;
+	void *copy = msg;
+	int ret = -ENOMEM;
+
+	if (size > 0) {
+		copy = kmemdup(msg, size, GFP_ATOMIC);
+		if (!copy)
+			goto out;
+
+		is_copy = true;
+	}
+
+	ret = ipc_task_queue_add_task(imem, arg, copy, func,
+				      size, is_copy, wait);
+	if (ret < 0) {
+		dev_err(imem->ipc_task->dev,
+			"add task failed for %ps %d, %p, %zu, %d", func, arg,
+			copy, size, is_copy);
+		if (is_copy)
+			kfree(copy);
+		goto out;
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
+int ipc_task_init(struct ipc_task *ipc_task)
+{
+	struct ipc_task_queue *ipc_queue = &ipc_task->ipc_queue;
+
+	ipc_task->ipc_tasklet = kzalloc(sizeof(*ipc_task->ipc_tasklet),
+					GFP_KERNEL);
+
+	if (!ipc_task->ipc_tasklet)
+		return -ENOMEM;
+
+	/* Initialize the spinlock needed to protect the message queue of the
+	 * ipc_task
+	 */
+	spin_lock_init(&ipc_queue->q_lock);
+
+	tasklet_init(ipc_task->ipc_tasklet, ipc_task_queue_handler,
+		     (unsigned long)ipc_queue);
+	return 0;
+}
+
+void ipc_task_deinit(struct ipc_task *ipc_task)
+{
+	tasklet_kill(ipc_task->ipc_tasklet);
+
+	kfree(ipc_task->ipc_tasklet);
+	/* This will free/complete any outstanding messages,
+	 * without calling the actual handler
+	 */
+	ipc_task_queue_cleanup(&ipc_task->ipc_queue);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_task_queue.h b/drivers/net/wwan/iosm/iosm_ipc_task_queue.h
new file mode 100644
index 000000000000..df6e9cd925a9
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_task_queue.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_TASK_QUEUE_H
+#define IOSM_IPC_TASK_QUEUE_H
+
+/* Number of available element for the input message queue of the IPC
+ * ipc_task
+ */
+#define IPC_THREAD_QUEUE_SIZE 256
+
+/**
+ * struct ipc_task_queue_args - Struct for Task queue elements
+ * @ipc_imem:   Pointer to struct iosm_imem
+ * @msg:        Message argument for tasklet function. (optional, can be NULL)
+ * @completion: OS object used to wait for the tasklet function to finish for
+ *              synchronous calls
+ * @func:       Function to be called in tasklet (tl) context
+ * @arg:        Generic integer argument for tasklet function (optional)
+ * @size:       Message size argument for tasklet function (optional)
+ * @response:   Return code of tasklet function for synchronous calls
+ * @is_copy:    Is true if msg contains a pointer to a copy of the original msg
+ *              for async. calls that needs to be freed once the tasklet returns
+ */
+struct ipc_task_queue_args {
+	struct iosm_imem *ipc_imem;
+	void *msg;
+	struct completion *completion;
+	int (*func)(struct iosm_imem *ipc_imem, int arg, void *msg,
+		    size_t size);
+	int arg;
+	size_t size;
+	int response;
+	u8 is_copy:1;
+};
+
+/**
+ * struct ipc_task_queue - Struct for Task queue
+ * @q_lock:     Protect the message queue of the ipc ipc_task
+ * @args:       Message queue of the IPC ipc_task
+ * @q_rpos:     First queue element to process.
+ * @q_wpos:     First free element of the input queue.
+ */
+struct ipc_task_queue {
+	spinlock_t q_lock; /* for atomic operation on queue */
+	struct ipc_task_queue_args args[IPC_THREAD_QUEUE_SIZE];
+	unsigned int q_rpos;
+	unsigned int q_wpos;
+};
+
+/**
+ * struct ipc_task - Struct for Task
+ * @dev:	 Pointer to device structure
+ * @ipc_tasklet: Tasklet for serialized work offload
+ *		 from interrupts and OS callbacks
+ * @ipc_queue:	 Task for entry into ipc task queue
+ */
+struct ipc_task {
+	struct device *dev;
+	struct tasklet_struct *ipc_tasklet;
+	struct ipc_task_queue ipc_queue;
+};
+
+/**
+ * ipc_task_init - Allocate a tasklet
+ * @ipc_task:	Pointer to ipc_task structure
+ * Returns: 0 on success and failure value on error.
+ */
+int ipc_task_init(struct ipc_task *ipc_task);
+
+/**
+ * ipc_task_deinit - Free a tasklet, invalidating its pointer.
+ * @ipc_task:	Pointer to ipc_task structure
+ */
+void ipc_task_deinit(struct ipc_task *ipc_task);
+
+/**
+ * ipc_task_queue_send_task - Synchronously/Asynchronously call a function in
+ *			      tasklet context.
+ * @imem:		Pointer to iosm_imem struct
+ * @func:		Function to be called in tasklet context
+ * @arg:		Integer argument for func
+ * @msg:		Message pointer argument for func
+ * @size:		Size argument for func
+ * @wait:		if true wait for result
+ *
+ * Returns: Result value returned by func or failure value if func could not
+ *	    be called.
+ */
+int ipc_task_queue_send_task(struct iosm_imem *imem,
+			     int (*func)(struct iosm_imem *ipc_imem, int arg,
+					 void *msg, size_t size),
+			     int arg, void *msg, size_t size, bool wait);
+
+#endif

From 51c45fa95435c55f2ae161fb9634671ab0411ead Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:16 +0530
Subject: [PATCH 1608/3292] net: iosm: multiplex IP sessions

Establish IP session between host-device & session management.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_mux.c | 455 +++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_mux.h | 343 ++++++++++++++++++++
 2 files changed, 798 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mux.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mux.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c
new file mode 100644
index 000000000000..c1c77ce699da
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c
@@ -0,0 +1,455 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include "iosm_ipc_mux_codec.h"
+
+/* At the begin of the runtime phase the IP MUX channel shall created. */
+static int ipc_mux_channel_create(struct iosm_mux *ipc_mux)
+{
+	int channel_id;
+
+	channel_id = ipc_imem_channel_alloc(ipc_mux->imem, ipc_mux->instance_id,
+					    IPC_CTYPE_WWAN);
+
+	if (channel_id < 0) {
+		dev_err(ipc_mux->dev,
+			"allocation of the MUX channel id failed");
+		ipc_mux->state = MUX_S_ERROR;
+		ipc_mux->event = MUX_E_NOT_APPLICABLE;
+		goto no_channel;
+	}
+
+	/* Establish the MUX channel in blocking mode. */
+	ipc_mux->channel = ipc_imem_channel_open(ipc_mux->imem, channel_id,
+						 IPC_HP_NET_CHANNEL_INIT);
+
+	if (!ipc_mux->channel) {
+		dev_err(ipc_mux->dev, "ipc_imem_channel_open failed");
+		ipc_mux->state = MUX_S_ERROR;
+		ipc_mux->event = MUX_E_NOT_APPLICABLE;
+		return -ENODEV; /* MUX channel is not available. */
+	}
+
+	/* Define the MUX active state properties. */
+	ipc_mux->state = MUX_S_ACTIVE;
+	ipc_mux->event = MUX_E_NO_ORDERS;
+
+no_channel:
+	return channel_id;
+}
+
+/* Reset the session/if id state. */
+static void ipc_mux_session_free(struct iosm_mux *ipc_mux, int if_id)
+{
+	struct mux_session *if_entry;
+
+	if_entry = &ipc_mux->session[if_id];
+	/* Reset the session state. */
+	if_entry->wwan = NULL;
+}
+
+/* Create and send the session open command. */
+static struct mux_cmd_open_session_resp *
+ipc_mux_session_open_send(struct iosm_mux *ipc_mux, int if_id)
+{
+	struct mux_cmd_open_session_resp *open_session_resp;
+	struct mux_acb *acb = &ipc_mux->acb;
+	union mux_cmd_param param;
+
+	/* open_session commands to one ACB and start transmission. */
+	param.open_session.flow_ctrl = 0;
+	param.open_session.ipv4v6_hints = 0;
+	param.open_session.reserved2 = 0;
+	param.open_session.dl_head_pad_len = cpu_to_le32(IPC_MEM_DL_ETH_OFFSET);
+
+	/* Finish and transfer ACB. The user thread is suspended.
+	 * It is a blocking function call, until CP responds or timeout.
+	 */
+	acb->wanted_response = MUX_CMD_OPEN_SESSION_RESP;
+	if (ipc_mux_dl_acb_send_cmds(ipc_mux, MUX_CMD_OPEN_SESSION, if_id, 0,
+				     &param, sizeof(param.open_session), true,
+				 false) ||
+	    acb->got_response != MUX_CMD_OPEN_SESSION_RESP) {
+		dev_err(ipc_mux->dev, "if_id %d: OPEN_SESSION send failed",
+			if_id);
+		return NULL;
+	}
+
+	open_session_resp = &ipc_mux->acb.got_param.open_session_resp;
+	if (open_session_resp->response != cpu_to_le32(MUX_CMD_RESP_SUCCESS)) {
+		dev_err(ipc_mux->dev,
+			"if_id %d,session open failed,response=%d", if_id,
+			open_session_resp->response);
+		return NULL;
+	}
+
+	return open_session_resp;
+}
+
+/* Open the first IP session. */
+static bool ipc_mux_session_open(struct iosm_mux *ipc_mux,
+				 struct mux_session_open *session_open)
+{
+	struct mux_cmd_open_session_resp *open_session_resp;
+	int if_id;
+
+	/* Search for a free session interface id. */
+	if_id = le32_to_cpu(session_open->if_id);
+	if (if_id < 0 || if_id >= ipc_mux->nr_sessions) {
+		dev_err(ipc_mux->dev, "invalid interface id=%d", if_id);
+		return false;
+	}
+
+	/* Create and send the session open command.
+	 * It is a blocking function call, until CP responds or timeout.
+	 */
+	open_session_resp = ipc_mux_session_open_send(ipc_mux, if_id);
+	if (!open_session_resp) {
+		ipc_mux_session_free(ipc_mux, if_id);
+		session_open->if_id = cpu_to_le32(-1);
+		return false;
+	}
+
+	/* Initialize the uplink skb accumulator. */
+	skb_queue_head_init(&ipc_mux->session[if_id].ul_list);
+
+	ipc_mux->session[if_id].dl_head_pad_len = IPC_MEM_DL_ETH_OFFSET;
+	ipc_mux->session[if_id].ul_head_pad_len =
+		le32_to_cpu(open_session_resp->ul_head_pad_len);
+	ipc_mux->session[if_id].wwan = ipc_mux->wwan;
+
+	/* Reset the flow ctrl stats of the session */
+	ipc_mux->session[if_id].flow_ctl_en_cnt = 0;
+	ipc_mux->session[if_id].flow_ctl_dis_cnt = 0;
+	ipc_mux->session[if_id].ul_flow_credits = 0;
+	ipc_mux->session[if_id].net_tx_stop = false;
+	ipc_mux->session[if_id].flow_ctl_mask = 0;
+
+	/* Save and return the assigned if id. */
+	session_open->if_id = cpu_to_le32(if_id);
+
+	return true;
+}
+
+/* Free pending session UL packet. */
+static void ipc_mux_session_reset(struct iosm_mux *ipc_mux, int if_id)
+{
+	/* Reset the session/if id state. */
+	ipc_mux_session_free(ipc_mux, if_id);
+
+	/* Empty the uplink skb accumulator. */
+	skb_queue_purge(&ipc_mux->session[if_id].ul_list);
+}
+
+static void ipc_mux_session_close(struct iosm_mux *ipc_mux,
+				  struct mux_session_close *msg)
+{
+	int if_id;
+
+	/* Copy the session interface id. */
+	if_id = le32_to_cpu(msg->if_id);
+
+	if (if_id < 0 || if_id >= ipc_mux->nr_sessions) {
+		dev_err(ipc_mux->dev, "invalid session id %d", if_id);
+		return;
+	}
+
+	/* Create and send the session close command.
+	 * It is a blocking function call, until CP responds or timeout.
+	 */
+	if (ipc_mux_dl_acb_send_cmds(ipc_mux, MUX_CMD_CLOSE_SESSION, if_id, 0,
+				     NULL, 0, true, false))
+		dev_err(ipc_mux->dev, "if_id %d: CLOSE_SESSION send failed",
+			if_id);
+
+	/* Reset the flow ctrl stats of the session */
+	ipc_mux->session[if_id].flow_ctl_en_cnt = 0;
+	ipc_mux->session[if_id].flow_ctl_dis_cnt = 0;
+	ipc_mux->session[if_id].flow_ctl_mask = 0;
+
+	ipc_mux_session_reset(ipc_mux, if_id);
+}
+
+static void ipc_mux_channel_close(struct iosm_mux *ipc_mux,
+				  struct mux_channel_close *channel_close_p)
+{
+	int i;
+
+	/* Free pending session UL packet. */
+	for (i = 0; i < ipc_mux->nr_sessions; i++)
+		if (ipc_mux->session[i].wwan)
+			ipc_mux_session_reset(ipc_mux, i);
+
+	ipc_imem_channel_close(ipc_mux->imem, ipc_mux->channel_id);
+
+	/* Reset the MUX object. */
+	ipc_mux->state = MUX_S_INACTIVE;
+	ipc_mux->event = MUX_E_INACTIVE;
+}
+
+/* CP has interrupted AP. If AP is in IP MUX mode, execute the pending ops. */
+static int ipc_mux_schedule(struct iosm_mux *ipc_mux, union mux_msg *msg)
+{
+	enum mux_event order;
+	bool success;
+	int ret = -EIO;
+
+	if (!ipc_mux->initialized) {
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	order = msg->common.event;
+
+	switch (ipc_mux->state) {
+	case MUX_S_INACTIVE:
+		if (order != MUX_E_MUX_SESSION_OPEN)
+			goto out; /* Wait for the request to open a session */
+
+		if (ipc_mux->event == MUX_E_INACTIVE)
+			/* Establish the MUX channel and the new state. */
+			ipc_mux->channel_id = ipc_mux_channel_create(ipc_mux);
+
+		if (ipc_mux->state != MUX_S_ACTIVE) {
+			ret = ipc_mux->channel_id; /* Missing the MUX channel */
+			goto out;
+		}
+
+		/* Disable the TD update timer and open the first IP session. */
+		ipc_imem_td_update_timer_suspend(ipc_mux->imem, true);
+		ipc_mux->event = MUX_E_MUX_SESSION_OPEN;
+		success = ipc_mux_session_open(ipc_mux, &msg->session_open);
+
+		ipc_imem_td_update_timer_suspend(ipc_mux->imem, false);
+		if (success)
+			ret = ipc_mux->channel_id;
+		goto out;
+
+	case MUX_S_ACTIVE:
+		switch (order) {
+		case MUX_E_MUX_SESSION_OPEN:
+			/* Disable the TD update timer and open a session */
+			ipc_imem_td_update_timer_suspend(ipc_mux->imem, true);
+			ipc_mux->event = MUX_E_MUX_SESSION_OPEN;
+			success = ipc_mux_session_open(ipc_mux,
+						       &msg->session_open);
+			ipc_imem_td_update_timer_suspend(ipc_mux->imem, false);
+			if (success)
+				ret = ipc_mux->channel_id;
+			goto out;
+
+		case MUX_E_MUX_SESSION_CLOSE:
+			/* Release an IP session. */
+			ipc_mux->event = MUX_E_MUX_SESSION_CLOSE;
+			ipc_mux_session_close(ipc_mux, &msg->session_close);
+			ret = ipc_mux->channel_id;
+			goto out;
+
+		case MUX_E_MUX_CHANNEL_CLOSE:
+			/* Close the MUX channel pipes. */
+			ipc_mux->event = MUX_E_MUX_CHANNEL_CLOSE;
+			ipc_mux_channel_close(ipc_mux, &msg->channel_close);
+			ret = ipc_mux->channel_id;
+			goto out;
+
+		default:
+			/* Invalid order. */
+			goto out;
+		}
+
+	default:
+		dev_err(ipc_mux->dev,
+			"unexpected MUX transition: state=%d, event=%d",
+			ipc_mux->state, ipc_mux->event);
+	}
+out:
+	return ret;
+}
+
+struct iosm_mux *ipc_mux_init(struct ipc_mux_config *mux_cfg,
+			      struct iosm_imem *imem)
+{
+	struct iosm_mux *ipc_mux = kzalloc(sizeof(*ipc_mux), GFP_KERNEL);
+	int i, ul_tds, ul_td_size;
+	struct sk_buff_head *free_list;
+	struct sk_buff *skb;
+
+	if (!ipc_mux)
+		return NULL;
+
+	ipc_mux->protocol = mux_cfg->protocol;
+	ipc_mux->ul_flow = mux_cfg->ul_flow;
+	ipc_mux->nr_sessions = mux_cfg->nr_sessions;
+	ipc_mux->instance_id = mux_cfg->instance_id;
+	ipc_mux->wwan_q_offset = 0;
+
+	ipc_mux->pcie = imem->pcie;
+	ipc_mux->imem = imem;
+	ipc_mux->ipc_protocol = imem->ipc_protocol;
+	ipc_mux->dev = imem->dev;
+	ipc_mux->wwan = imem->wwan;
+
+	/* Get the reference to the UL ADB list. */
+	free_list = &ipc_mux->ul_adb.free_list;
+
+	/* Initialize the list with free ADB. */
+	skb_queue_head_init(free_list);
+
+	ul_td_size = IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE;
+
+	ul_tds = IPC_MEM_MAX_TDS_MUX_LITE_UL;
+
+	ipc_mux->ul_adb.dest_skb = NULL;
+
+	ipc_mux->initialized = true;
+	ipc_mux->adb_prep_ongoing = false;
+	ipc_mux->size_needed = 0;
+	ipc_mux->ul_data_pend_bytes = 0;
+	ipc_mux->state = MUX_S_INACTIVE;
+	ipc_mux->ev_mux_net_transmit_pending = false;
+	ipc_mux->tx_transaction_id = 0;
+	ipc_mux->rr_next_session = 0;
+	ipc_mux->event = MUX_E_INACTIVE;
+	ipc_mux->channel_id = -1;
+	ipc_mux->channel = NULL;
+
+	/* Allocate the list of UL ADB. */
+	for (i = 0; i < ul_tds; i++) {
+		dma_addr_t mapping;
+
+		skb = ipc_pcie_alloc_skb(ipc_mux->pcie, ul_td_size, GFP_ATOMIC,
+					 &mapping, DMA_TO_DEVICE, 0);
+		if (!skb) {
+			ipc_mux_deinit(ipc_mux);
+			return NULL;
+		}
+		/* Extend the UL ADB list. */
+		skb_queue_tail(free_list, skb);
+	}
+
+	return ipc_mux;
+}
+
+/* Informs the network stack to restart transmission for all opened session if
+ * Flow Control is not ON for that session.
+ */
+static void ipc_mux_restart_tx_for_all_sessions(struct iosm_mux *ipc_mux)
+{
+	struct mux_session *session;
+	int idx;
+
+	for (idx = 0; idx < ipc_mux->nr_sessions; idx++) {
+		session = &ipc_mux->session[idx];
+
+		if (!session->wwan)
+			continue;
+
+		/* If flow control of the session is OFF and if there was tx
+		 * stop then restart. Inform the network interface to restart
+		 * sending data.
+		 */
+		if (session->flow_ctl_mask == 0) {
+			session->net_tx_stop = false;
+			ipc_mux_netif_tx_flowctrl(session, idx, false);
+		}
+	}
+}
+
+/* Informs the network stack to stop sending further pkt for all opened
+ * sessions
+ */
+static void ipc_mux_stop_netif_for_all_sessions(struct iosm_mux *ipc_mux)
+{
+	struct mux_session *session;
+	int idx;
+
+	for (idx = 0; idx < ipc_mux->nr_sessions; idx++) {
+		session = &ipc_mux->session[idx];
+
+		if (!session->wwan)
+			continue;
+
+		ipc_mux_netif_tx_flowctrl(session, session->if_id, true);
+	}
+}
+
+void ipc_mux_check_n_restart_tx(struct iosm_mux *ipc_mux)
+{
+	if (ipc_mux->ul_flow == MUX_UL) {
+		int low_thresh = IPC_MEM_MUX_UL_FLOWCTRL_LOW_B;
+
+		if (ipc_mux->ul_data_pend_bytes < low_thresh)
+			ipc_mux_restart_tx_for_all_sessions(ipc_mux);
+	}
+}
+
+int ipc_mux_get_max_sessions(struct iosm_mux *ipc_mux)
+{
+	return ipc_mux ? ipc_mux->nr_sessions : -EFAULT;
+}
+
+enum ipc_mux_protocol ipc_mux_get_active_protocol(struct iosm_mux *ipc_mux)
+{
+	return ipc_mux ? ipc_mux->protocol : MUX_UNKNOWN;
+}
+
+int ipc_mux_open_session(struct iosm_mux *ipc_mux, int session_nr)
+{
+	struct mux_session_open *session_open;
+	union mux_msg mux_msg;
+
+	session_open = &mux_msg.session_open;
+	session_open->event = MUX_E_MUX_SESSION_OPEN;
+
+	session_open->if_id = cpu_to_le32(session_nr);
+	ipc_mux->session[session_nr].flags |= IPC_MEM_WWAN_MUX;
+	return ipc_mux_schedule(ipc_mux, &mux_msg);
+}
+
+int ipc_mux_close_session(struct iosm_mux *ipc_mux, int session_nr)
+{
+	struct mux_session_close *session_close;
+	union mux_msg mux_msg;
+	int ret_val;
+
+	session_close = &mux_msg.session_close;
+	session_close->event = MUX_E_MUX_SESSION_CLOSE;
+
+	session_close->if_id = cpu_to_le32(session_nr);
+	ret_val = ipc_mux_schedule(ipc_mux, &mux_msg);
+	ipc_mux->session[session_nr].flags &= ~IPC_MEM_WWAN_MUX;
+
+	return ret_val;
+}
+
+void ipc_mux_deinit(struct iosm_mux *ipc_mux)
+{
+	struct mux_channel_close *channel_close;
+	struct sk_buff_head *free_list;
+	union mux_msg mux_msg;
+	struct sk_buff *skb;
+
+	if (!ipc_mux->initialized)
+		return;
+	ipc_mux_stop_netif_for_all_sessions(ipc_mux);
+
+	channel_close = &mux_msg.channel_close;
+	channel_close->event = MUX_E_MUX_CHANNEL_CLOSE;
+	ipc_mux_schedule(ipc_mux, &mux_msg);
+
+	/* Empty the ADB free list. */
+	free_list = &ipc_mux->ul_adb.free_list;
+
+	/* Remove from the head of the downlink queue. */
+	while ((skb = skb_dequeue(free_list)))
+		ipc_pcie_kfree_skb(ipc_mux->pcie, skb);
+
+	if (ipc_mux->channel) {
+		ipc_mux->channel->ul_pipe.is_open = false;
+		ipc_mux->channel->dl_pipe.is_open = false;
+	}
+
+	kfree(ipc_mux);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.h b/drivers/net/wwan/iosm/iosm_ipc_mux.h
new file mode 100644
index 000000000000..ddd2cd0bd911
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux.h
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_MUX_H
+#define IOSM_IPC_MUX_H
+
+#include "iosm_ipc_protocol.h"
+
+/* Size of the buffer for the IP MUX data buffer. */
+#define IPC_MEM_MAX_DL_MUX_BUF_SIZE (16 * 1024)
+#define IPC_MEM_MAX_UL_ADB_BUF_SIZE IPC_MEM_MAX_DL_MUX_BUF_SIZE
+
+/* Size of the buffer for the IP MUX Lite data buffer. */
+#define IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE (2 * 1024)
+
+/* TD counts for IP MUX Lite */
+#define IPC_MEM_MAX_TDS_MUX_LITE_UL 800
+#define IPC_MEM_MAX_TDS_MUX_LITE_DL 1200
+
+/* open session request (AP->CP) */
+#define MUX_CMD_OPEN_SESSION 1
+
+/* response to open session request (CP->AP) */
+#define MUX_CMD_OPEN_SESSION_RESP 2
+
+/* close session request (AP->CP) */
+#define MUX_CMD_CLOSE_SESSION 3
+
+/* response to close session request (CP->AP) */
+#define MUX_CMD_CLOSE_SESSION_RESP 4
+
+/* Flow control command with mask of the flow per queue/flow. */
+#define MUX_LITE_CMD_FLOW_CTL 5
+
+/* ACK the flow control command. Shall have the same Transaction ID as the
+ * matching FLOW_CTL command.
+ */
+#define MUX_LITE_CMD_FLOW_CTL_ACK 6
+
+/* Command for report packet indicating link quality metrics. */
+#define MUX_LITE_CMD_LINK_STATUS_REPORT 7
+
+/* Response to a report packet */
+#define MUX_LITE_CMD_LINK_STATUS_REPORT_RESP 8
+
+/* Used to reset a command/response state. */
+#define MUX_CMD_INVALID 255
+
+/* command response : command processed successfully */
+#define MUX_CMD_RESP_SUCCESS 0
+
+/* MUX for route link devices */
+#define IPC_MEM_WWAN_MUX BIT(0)
+
+/* Initiated actions to change the state of the MUX object. */
+enum mux_event {
+	MUX_E_INACTIVE, /* No initiated actions. */
+	MUX_E_MUX_SESSION_OPEN, /* Create the MUX channel and a session. */
+	MUX_E_MUX_SESSION_CLOSE, /* Release a session. */
+	MUX_E_MUX_CHANNEL_CLOSE, /* Release the MUX channel. */
+	MUX_E_NO_ORDERS, /* No MUX order. */
+	MUX_E_NOT_APPLICABLE, /* Defect IP MUX. */
+};
+
+/* MUX session open command. */
+struct mux_session_open {
+	enum mux_event event;
+	__le32 if_id;
+};
+
+/* MUX session close command. */
+struct mux_session_close {
+	enum mux_event event;
+	__le32 if_id;
+};
+
+/* MUX channel close command. */
+struct mux_channel_close {
+	enum mux_event event;
+};
+
+/* Default message type to find out the right message type. */
+struct mux_common {
+	enum mux_event event;
+};
+
+/* List of ops in MUX mode. */
+union mux_msg {
+	struct mux_session_open session_open;
+	struct mux_session_close session_close;
+	struct mux_channel_close channel_close;
+	struct mux_common common;
+};
+
+/* Parameter definition of the open session command. */
+struct mux_cmd_open_session {
+	u8 flow_ctrl; /* 0: Flow control disabled (flow allowed). */
+	/* 1: Flow control enabled (flow not allowed)*/
+	u8 ipv4v6_hints; /* 0: IPv4/IPv6 hints not supported.*/
+	/* 1: IPv4/IPv6 hints supported*/
+	__le16 reserved2; /* Reserved. Set to zero. */
+	__le32 dl_head_pad_len; /* Maximum length supported */
+	/* for DL head padding on a datagram. */
+};
+
+/* Parameter definition of the open session response. */
+struct mux_cmd_open_session_resp {
+	__le32 response; /* Response code */
+	u8 flow_ctrl; /* 0: Flow control disabled (flow allowed). */
+	/* 1: Flow control enabled (flow not allowed) */
+	u8 ipv4v6_hints; /* 0: IPv4/IPv6 hints not supported */
+	/* 1: IPv4/IPv6 hints supported */
+	__le16 reserved2; /* Reserved. Set to zero. */
+	__le32 ul_head_pad_len; /* Actual length supported for */
+	/* UL head padding on adatagram.*/
+};
+
+/* Parameter definition of the close session response code */
+struct mux_cmd_close_session_resp {
+	__le32 response;
+};
+
+/* Parameter definition of the flow control command. */
+struct mux_cmd_flow_ctl {
+	__le32 mask; /* indicating the desired flow control */
+	/* state for various flows/queues */
+};
+
+/* Parameter definition of the link status report code*/
+struct mux_cmd_link_status_report {
+	u8 payload;
+};
+
+/* Parameter definition of the link status report response code. */
+struct mux_cmd_link_status_report_resp {
+	__le32 response;
+};
+
+/**
+ * union mux_cmd_param - Union-definition of the command parameters.
+ * @open_session:	Inband command for open session
+ * @open_session_resp:	Inband command for open session response
+ * @close_session_resp:	Inband command for close session response
+ * @flow_ctl:		In-band flow control on the opened interfaces
+ * @link_status:	In-band Link Status Report
+ * @link_status_resp:	In-band command for link status report response
+ */
+union mux_cmd_param {
+	struct mux_cmd_open_session open_session;
+	struct mux_cmd_open_session_resp open_session_resp;
+	struct mux_cmd_close_session_resp close_session_resp;
+	struct mux_cmd_flow_ctl flow_ctl;
+	struct mux_cmd_link_status_report link_status;
+	struct mux_cmd_link_status_report_resp link_status_resp;
+};
+
+/* States of the MUX object.. */
+enum mux_state {
+	MUX_S_INACTIVE, /* IP MUX is unused. */
+	MUX_S_ACTIVE, /* IP MUX channel is available. */
+	MUX_S_ERROR, /* Defect IP MUX. */
+};
+
+/* Supported MUX protocols. */
+enum ipc_mux_protocol {
+	MUX_UNKNOWN,
+	MUX_LITE,
+};
+
+/* Supported UL data transfer methods. */
+enum ipc_mux_ul_flow {
+	MUX_UL_UNKNOWN,
+	MUX_UL, /* Normal UL data transfer */
+	MUX_UL_ON_CREDITS, /* UL data transfer will be based on credits */
+};
+
+/* List of the MUX session. */
+struct mux_session {
+	struct iosm_wwan *wwan; /*Network i/f used for communication*/
+	int if_id; /* i/f id for session open message.*/
+	u32 flags;
+	u32 ul_head_pad_len; /* Nr of bytes for UL head padding. */
+	u32 dl_head_pad_len; /* Nr of bytes for DL head padding. */
+	struct sk_buff_head ul_list; /* skb entries for an ADT. */
+	u32 flow_ctl_mask; /* UL flow control */
+	u32 flow_ctl_en_cnt; /* Flow control Enable cmd count */
+	u32 flow_ctl_dis_cnt; /* Flow Control Disable cmd count */
+	int ul_flow_credits; /* UL flow credits */
+	u8 net_tx_stop:1,
+	   flush:1; /* flush net interface ? */
+};
+
+/* State of a single UL data block. */
+struct mux_adb {
+	struct sk_buff *dest_skb; /* Current UL skb for the data block. */
+	u8 *buf; /* ADB memory. */
+	struct mux_adgh *adgh; /* ADGH pointer */
+	struct sk_buff *qlth_skb; /* QLTH pointer */
+	u32 *next_table_index; /* Pointer to next table index. */
+	struct sk_buff_head free_list; /* List of alloc. ADB for the UL sess.*/
+	int size; /* Size of the ADB memory. */
+	u32 if_cnt; /* Statistic counter */
+	u32 dg_cnt_total;
+	u32 payload_size;
+};
+
+/* Temporary ACB state. */
+struct mux_acb {
+	struct sk_buff *skb; /* Used UL skb. */
+	int if_id; /* Session id. */
+	u32 wanted_response;
+	u32 got_response;
+	u32 cmd;
+	union mux_cmd_param got_param; /* Received command/response parameter */
+};
+
+/**
+ * struct iosm_mux - Structure of the data multiplexing over an IP channel.
+ * @dev:		Pointer to device structure
+ * @session:		Array of the MUX sessions.
+ * @channel:		Reference to the IP MUX channel
+ * @pcie:		Pointer to iosm_pcie struct
+ * @imem:		Pointer to iosm_imem
+ * @wwan:		Poinetr to iosm_wwan
+ * @ipc_protocol:	Pointer to iosm_protocol
+ * @channel_id:		Channel ID for MUX
+ * @protocol:		Type of the MUX protocol
+ * @ul_flow:		UL Flow type
+ * @nr_sessions:	Number of sessions
+ * @instance_id:	Instance ID
+ * @state:		States of the MUX object
+ * @event:		Initiated actions to change the state of the MUX object
+ * @tx_transaction_id:	Transaction id for the ACB command.
+ * @rr_next_session:	Next session number for round robin.
+ * @ul_adb:		State of the UL ADB/ADGH.
+ * @size_needed:	Variable to store the size needed during ADB preparation
+ * @ul_data_pend_bytes:	Pending UL data to be processed in bytes
+ * @acb:		Temporary ACB state
+ * @wwan_q_offset:	This will hold the offset of the given instance
+ *			Useful while passing or receiving packets from
+ *			wwan/imem layer.
+ * @initialized:	MUX object is initialized
+ * @ev_mux_net_transmit_pending:
+ *			0 means inform the IPC tasklet to pass the
+ *			accumulated uplink ADB to CP.
+ * @adb_prep_ongoing:	Flag for ADB preparation status
+ */
+struct iosm_mux {
+	struct device *dev;
+	struct mux_session session[IPC_MEM_MUX_IP_SESSION_ENTRIES];
+	struct ipc_mem_channel *channel;
+	struct iosm_pcie *pcie;
+	struct iosm_imem *imem;
+	struct iosm_wwan *wwan;
+	struct iosm_protocol *ipc_protocol;
+	int channel_id;
+	enum ipc_mux_protocol protocol;
+	enum ipc_mux_ul_flow ul_flow;
+	int nr_sessions;
+	int instance_id;
+	enum mux_state state;
+	enum mux_event event;
+	u32 tx_transaction_id;
+	int rr_next_session;
+	struct mux_adb ul_adb;
+	int size_needed;
+	long long ul_data_pend_bytes;
+	struct mux_acb acb;
+	int wwan_q_offset;
+	u8 initialized:1,
+	   ev_mux_net_transmit_pending:1,
+	   adb_prep_ongoing:1;
+};
+
+/* MUX configuration structure */
+struct ipc_mux_config {
+	enum ipc_mux_protocol protocol;
+	enum ipc_mux_ul_flow ul_flow;
+	int nr_sessions;
+	int instance_id;
+};
+
+/**
+ * ipc_mux_init - Allocates and Init MUX instance
+ * @mux_cfg:	Pointer to MUX configuration structure
+ * @ipc_imem:	Pointer to imem data-struct
+ *
+ * Returns: Initialized mux pointer on success else NULL
+ */
+struct iosm_mux *ipc_mux_init(struct ipc_mux_config *mux_cfg,
+			      struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_mux_deinit - Deallocates MUX instance
+ * @ipc_mux:	Pointer to the MUX instance.
+ */
+void ipc_mux_deinit(struct iosm_mux *ipc_mux);
+
+/**
+ * ipc_mux_check_n_restart_tx - Checks for pending UL date bytes and then
+ *				it restarts the net interface tx queue if
+ *				device has set flow control as off.
+ * @ipc_mux:	Pointer to MUX data-struct
+ */
+void ipc_mux_check_n_restart_tx(struct iosm_mux *ipc_mux);
+
+/**
+ * ipc_mux_get_active_protocol - Returns the active MUX protocol type.
+ * @ipc_mux:	Pointer to MUX data-struct
+ *
+ * Returns: enum of type ipc_mux_protocol
+ */
+enum ipc_mux_protocol ipc_mux_get_active_protocol(struct iosm_mux *ipc_mux);
+
+/**
+ * ipc_mux_open_session - Opens a MUX session for IP traffic.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @session_nr:	Interface ID or session number
+ *
+ * Returns: channel id on success, failure value on error
+ */
+int ipc_mux_open_session(struct iosm_mux *ipc_mux, int session_nr);
+
+/**
+ * ipc_mux_close_session - Closes a MUX session.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @session_nr:	Interface ID or session number
+ *
+ * Returns: channel id on success, failure value on error
+ */
+int ipc_mux_close_session(struct iosm_mux *ipc_mux, int session_nr);
+
+/**
+ * ipc_mux_get_max_sessions - Retuns the maximum sessions supported on the
+ *			      provided MUX instance..
+ * @ipc_mux:	Pointer to MUX data-struct
+ *
+ * Returns: Number of sessions supported on Success and failure value on error
+ */
+int ipc_mux_get_max_sessions(struct iosm_mux *ipc_mux);
+#endif

From 9413491e20e1aba6e471d90c19cc43e523216a4d Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:17 +0530
Subject: [PATCH 1609/3292] net: iosm: encode or decode datagram

1) Encode UL packet into datagram.
2) Decode DL datagram and route it to network layer.
3) Supports credit based flow control.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 910 +++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_mux_codec.h | 193 +++++
 2 files changed, 1103 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_mux_codec.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
new file mode 100644
index 000000000000..fbf3cab3394c
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/nospec.h>
+
+#include "iosm_ipc_imem_ops.h"
+#include "iosm_ipc_mux_codec.h"
+#include "iosm_ipc_task_queue.h"
+
+/* Test the link power state and send a MUX command in blocking mode. */
+static int ipc_mux_tq_cmd_send(struct iosm_imem *ipc_imem, int arg, void *msg,
+			       size_t size)
+{
+	struct iosm_mux *ipc_mux = ipc_imem->mux;
+	const struct mux_acb *acb = msg;
+
+	skb_queue_tail(&ipc_mux->channel->ul_list, acb->skb);
+	ipc_imem_ul_send(ipc_mux->imem);
+
+	return 0;
+}
+
+static int ipc_mux_acb_send(struct iosm_mux *ipc_mux, bool blocking)
+{
+	struct completion *completion = &ipc_mux->channel->ul_sem;
+	int ret = ipc_task_queue_send_task(ipc_mux->imem, ipc_mux_tq_cmd_send,
+					   0, &ipc_mux->acb,
+					   sizeof(ipc_mux->acb), false);
+	if (ret) {
+		dev_err(ipc_mux->dev, "unable to send mux command");
+		return ret;
+	}
+
+	/* if blocking, suspend the app and wait for irq in the flash or
+	 * crash phase. return false on timeout to indicate failure.
+	 */
+	if (blocking) {
+		u32 wait_time_milliseconds = IPC_MUX_CMD_RUN_DEFAULT_TIMEOUT;
+
+		reinit_completion(completion);
+
+		if (wait_for_completion_interruptible_timeout
+		   (completion, msecs_to_jiffies(wait_time_milliseconds)) ==
+		   0) {
+			dev_err(ipc_mux->dev, "ch[%d] timeout",
+				ipc_mux->channel_id);
+			ipc_uevent_send(ipc_mux->imem->dev, UEVENT_MDM_TIMEOUT);
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+/* Prepare mux Command */
+static struct mux_lite_cmdh *ipc_mux_lite_add_cmd(struct iosm_mux *ipc_mux,
+						  u32 cmd, struct mux_acb *acb,
+						  void *param, u32 param_size)
+{
+	struct mux_lite_cmdh *cmdh = (struct mux_lite_cmdh *)acb->skb->data;
+
+	cmdh->signature = cpu_to_le32(MUX_SIG_CMDH);
+	cmdh->command_type = cpu_to_le32(cmd);
+	cmdh->if_id = acb->if_id;
+
+	acb->cmd = cmd;
+
+	cmdh->cmd_len = cpu_to_le16(offsetof(struct mux_lite_cmdh, param) +
+				    param_size);
+	cmdh->transaction_id = cpu_to_le32(ipc_mux->tx_transaction_id++);
+
+	if (param)
+		memcpy(&cmdh->param, param, param_size);
+
+	skb_put(acb->skb, le16_to_cpu(cmdh->cmd_len));
+
+	return cmdh;
+}
+
+static int ipc_mux_acb_alloc(struct iosm_mux *ipc_mux)
+{
+	struct mux_acb *acb = &ipc_mux->acb;
+	struct sk_buff *skb;
+	dma_addr_t mapping;
+
+	/* Allocate skb memory for the uplink buffer. */
+	skb = ipc_pcie_alloc_skb(ipc_mux->pcie, MUX_MAX_UL_ACB_BUF_SIZE,
+				 GFP_ATOMIC, &mapping, DMA_TO_DEVICE, 0);
+	if (!skb)
+		return -ENOMEM;
+
+	/* Save the skb address. */
+	acb->skb = skb;
+
+	memset(skb->data, 0, MUX_MAX_UL_ACB_BUF_SIZE);
+
+	return 0;
+}
+
+int ipc_mux_dl_acb_send_cmds(struct iosm_mux *ipc_mux, u32 cmd_type, u8 if_id,
+			     u32 transaction_id, union mux_cmd_param *param,
+			     size_t res_size, bool blocking, bool respond)
+{
+	struct mux_acb *acb = &ipc_mux->acb;
+	struct mux_lite_cmdh *ack_lite;
+	int ret = 0;
+
+	acb->if_id = if_id;
+	ret = ipc_mux_acb_alloc(ipc_mux);
+	if (ret)
+		return ret;
+
+	ack_lite = ipc_mux_lite_add_cmd(ipc_mux, cmd_type, acb, param,
+					res_size);
+	if (respond)
+		ack_lite->transaction_id = cpu_to_le32(transaction_id);
+
+	ret = ipc_mux_acb_send(ipc_mux, blocking);
+
+	return ret;
+}
+
+void ipc_mux_netif_tx_flowctrl(struct mux_session *session, int idx, bool on)
+{
+	/* Inform the network interface to start/stop flow ctrl */
+	ipc_wwan_tx_flowctrl(session->wwan, idx, on);
+}
+
+static int ipc_mux_dl_cmdresps_decode_process(struct iosm_mux *ipc_mux,
+					      struct mux_lite_cmdh *cmdh)
+{
+	struct mux_acb *acb = &ipc_mux->acb;
+
+	switch (le32_to_cpu(cmdh->command_type)) {
+	case MUX_CMD_OPEN_SESSION_RESP:
+	case MUX_CMD_CLOSE_SESSION_RESP:
+		/* Resume the control application. */
+		acb->got_param = cmdh->param;
+		break;
+
+	case MUX_LITE_CMD_FLOW_CTL_ACK:
+		/* This command type is not expected as response for
+		 * Aggregation version of the protocol. So return non-zero.
+		 */
+		if (ipc_mux->protocol != MUX_LITE)
+			return -EINVAL;
+
+		dev_dbg(ipc_mux->dev, "if %u FLOW_CTL_ACK %u received",
+			cmdh->if_id, le32_to_cpu(cmdh->transaction_id));
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	acb->wanted_response = MUX_CMD_INVALID;
+	acb->got_response = le32_to_cpu(cmdh->command_type);
+	complete(&ipc_mux->channel->ul_sem);
+
+	return 0;
+}
+
+static int ipc_mux_dl_dlcmds_decode_process(struct iosm_mux *ipc_mux,
+					    struct mux_lite_cmdh *cmdh)
+{
+	union mux_cmd_param *param = &cmdh->param;
+	struct mux_session *session;
+	int new_size;
+
+	dev_dbg(ipc_mux->dev, "if_id[%d]: dlcmds decode process %d",
+		cmdh->if_id, le32_to_cpu(cmdh->command_type));
+
+	switch (le32_to_cpu(cmdh->command_type)) {
+	case MUX_LITE_CMD_FLOW_CTL:
+
+		if (cmdh->if_id >= ipc_mux->nr_sessions) {
+			dev_err(ipc_mux->dev, "if_id [%d] not valid",
+				cmdh->if_id);
+			return -EINVAL; /* No session interface id. */
+		}
+
+		session = &ipc_mux->session[cmdh->if_id];
+
+		new_size = offsetof(struct mux_lite_cmdh, param) +
+			   sizeof(param->flow_ctl);
+		if (param->flow_ctl.mask == cpu_to_le32(0xFFFFFFFF)) {
+			/* Backward Compatibility */
+			if (cmdh->cmd_len == cpu_to_le16(new_size))
+				session->flow_ctl_mask =
+					le32_to_cpu(param->flow_ctl.mask);
+			else
+				session->flow_ctl_mask = ~0;
+			/* if CP asks for FLOW CTRL Enable
+			 * then set our internal flow control Tx flag
+			 * to limit uplink session queueing
+			 */
+			session->net_tx_stop = true;
+			/* Update the stats */
+			session->flow_ctl_en_cnt++;
+		} else if (param->flow_ctl.mask == 0) {
+			/* Just reset the Flow control mask and let
+			 * mux_flow_ctrl_low_thre_b take control on
+			 * our internal Tx flag and enabling kernel
+			 * flow control
+			 */
+			/* Backward Compatibility */
+			if (cmdh->cmd_len == cpu_to_le16(new_size))
+				session->flow_ctl_mask =
+					le32_to_cpu(param->flow_ctl.mask);
+			else
+				session->flow_ctl_mask = 0;
+			/* Update the stats */
+			session->flow_ctl_dis_cnt++;
+		} else {
+			break;
+		}
+
+		dev_dbg(ipc_mux->dev, "if[%u] FLOW CTRL 0x%08X", cmdh->if_id,
+			le32_to_cpu(param->flow_ctl.mask));
+		break;
+
+	case MUX_LITE_CMD_LINK_STATUS_REPORT:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Decode and Send appropriate response to a command block. */
+static void ipc_mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+{
+	struct mux_lite_cmdh *cmdh = (struct mux_lite_cmdh *)skb->data;
+	__le32 trans_id = cmdh->transaction_id;
+
+	if (ipc_mux_dl_cmdresps_decode_process(ipc_mux, cmdh)) {
+		/* Unable to decode command response indicates the cmd_type
+		 * may be a command instead of response. So try to decoding it.
+		 */
+		if (!ipc_mux_dl_dlcmds_decode_process(ipc_mux, cmdh)) {
+			/* Decoded command may need a response. Give the
+			 * response according to the command type.
+			 */
+			union mux_cmd_param *mux_cmd = NULL;
+			size_t size = 0;
+			u32 cmd = MUX_LITE_CMD_LINK_STATUS_REPORT_RESP;
+
+			if (cmdh->command_type ==
+			    cpu_to_le32(MUX_LITE_CMD_LINK_STATUS_REPORT)) {
+				mux_cmd = &cmdh->param;
+				mux_cmd->link_status_resp.response =
+					cpu_to_le32(MUX_CMD_RESP_SUCCESS);
+				/* response field is u32 */
+				size = sizeof(u32);
+			} else if (cmdh->command_type ==
+				   cpu_to_le32(MUX_LITE_CMD_FLOW_CTL)) {
+				cmd = MUX_LITE_CMD_FLOW_CTL_ACK;
+			} else {
+				return;
+			}
+
+			if (ipc_mux_dl_acb_send_cmds(ipc_mux, cmd, cmdh->if_id,
+						     le32_to_cpu(trans_id),
+						     mux_cmd, size, false,
+						     true))
+				dev_err(ipc_mux->dev,
+					"if_id %d: cmd send failed",
+					cmdh->if_id);
+		}
+	}
+}
+
+/* Pass the DL packet to the netif layer. */
+static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id,
+			       struct iosm_wwan *wwan, u32 offset,
+			       u8 service_class, struct sk_buff *skb)
+{
+	struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC);
+
+	if (!dest_skb)
+		return -ENOMEM;
+
+	skb_pull(dest_skb, offset);
+	skb_set_tail_pointer(dest_skb, dest_skb->len);
+	/* Pass the packet to the netif layer. */
+	dest_skb->priority = service_class;
+
+	return ipc_wwan_receive(wwan, dest_skb, false, if_id + 1);
+}
+
+/* Decode Flow Credit Table in the block */
+static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux,
+				   unsigned char *block)
+{
+	struct ipc_mem_lite_gen_tbl *fct = (struct ipc_mem_lite_gen_tbl *)block;
+	struct iosm_wwan *wwan;
+	int ul_credits;
+	int if_id;
+
+	if (fct->vfl_length != sizeof(fct->vfl.nr_of_bytes)) {
+		dev_err(ipc_mux->dev, "unexpected FCT length: %d",
+			fct->vfl_length);
+		return;
+	}
+
+	if_id = fct->if_id;
+	if (if_id >= ipc_mux->nr_sessions) {
+		dev_err(ipc_mux->dev, "not supported if_id: %d", if_id);
+		return;
+	}
+
+	/* Is the session active ? */
+	if_id = array_index_nospec(if_id, ipc_mux->nr_sessions);
+	wwan = ipc_mux->session[if_id].wwan;
+	if (!wwan) {
+		dev_err(ipc_mux->dev, "session Net ID is NULL");
+		return;
+	}
+
+	ul_credits = fct->vfl.nr_of_bytes;
+
+	dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d",
+		if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits);
+
+	/* Update the Flow Credit information from ADB */
+	ipc_mux->session[if_id].ul_flow_credits += ul_credits;
+
+	/* Check whether the TX can be started */
+	if (ipc_mux->session[if_id].ul_flow_credits > 0) {
+		ipc_mux->session[if_id].net_tx_stop = false;
+		ipc_mux_netif_tx_flowctrl(&ipc_mux->session[if_id],
+					  ipc_mux->session[if_id].if_id, false);
+	}
+}
+
+/* Decode non-aggregated datagram */
+static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux,
+				   struct sk_buff *skb)
+{
+	u32 pad_len, packet_offset;
+	struct iosm_wwan *wwan;
+	struct mux_adgh *adgh;
+	u8 *block = skb->data;
+	int rc = 0;
+	u8 if_id;
+
+	adgh = (struct mux_adgh *)block;
+
+	if (adgh->signature != cpu_to_le32(MUX_SIG_ADGH)) {
+		dev_err(ipc_mux->dev, "invalid ADGH signature received");
+		return;
+	}
+
+	if_id = adgh->if_id;
+	if (if_id >= ipc_mux->nr_sessions) {
+		dev_err(ipc_mux->dev, "invalid if_id while decoding %d", if_id);
+		return;
+	}
+
+	/* Is the session active ? */
+	if_id = array_index_nospec(if_id, ipc_mux->nr_sessions);
+	wwan = ipc_mux->session[if_id].wwan;
+	if (!wwan) {
+		dev_err(ipc_mux->dev, "session Net ID is NULL");
+		return;
+	}
+
+	/* Store the pad len for the corresponding session
+	 * Pad bytes as negotiated in the open session less the header size
+	 * (see session management chapter for details).
+	 * If resulting padding is zero or less, the additional head padding is
+	 * omitted. For e.g., if HEAD_PAD_LEN = 16 or less, this field is
+	 * omitted if HEAD_PAD_LEN = 20, then this field will have 4 bytes
+	 * set to zero
+	 */
+	pad_len =
+		ipc_mux->session[if_id].dl_head_pad_len - IPC_MEM_DL_ETH_OFFSET;
+	packet_offset = sizeof(*adgh) + pad_len;
+
+	if_id += ipc_mux->wwan_q_offset;
+
+	/* Pass the packet to the netif layer */
+	rc = ipc_mux_net_receive(ipc_mux, if_id, wwan, packet_offset,
+				 adgh->service_class, skb);
+	if (rc) {
+		dev_err(ipc_mux->dev, "mux adgh decoding error");
+		return;
+	}
+	ipc_mux->session[if_id].flush = 1;
+}
+
+void ipc_mux_dl_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+{
+	u32 signature;
+
+	if (!skb->data)
+		return;
+
+	/* Decode the MUX header type. */
+	signature = le32_to_cpup((__le32 *)skb->data);
+
+	switch (signature) {
+	case MUX_SIG_ADGH:
+		ipc_mux_dl_adgh_decode(ipc_mux, skb);
+		break;
+
+	case MUX_SIG_FCTH:
+		ipc_mux_dl_fcth_decode(ipc_mux, skb->data);
+		break;
+
+	case MUX_SIG_CMDH:
+		ipc_mux_dl_cmd_decode(ipc_mux, skb);
+		break;
+
+	default:
+		dev_err(ipc_mux->dev, "invalid ABH signature");
+	}
+
+	ipc_pcie_kfree_skb(ipc_mux->pcie, skb);
+}
+
+static int ipc_mux_ul_skb_alloc(struct iosm_mux *ipc_mux,
+				struct mux_adb *ul_adb, u32 type)
+{
+	/* Take the first element of the free list. */
+	struct sk_buff *skb = skb_dequeue(&ul_adb->free_list);
+	int qlt_size;
+
+	if (!skb)
+		return -EBUSY; /* Wait for a free ADB skb. */
+
+	/* Mark it as UL ADB to select the right free operation. */
+	IPC_CB(skb)->op_type = (u8)UL_MUX_OP_ADB;
+
+	switch (type) {
+	case MUX_SIG_ADGH:
+		/* Save the ADB memory settings. */
+		ul_adb->dest_skb = skb;
+		ul_adb->buf = skb->data;
+		ul_adb->size = IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE;
+		/* reset statistic counter */
+		ul_adb->if_cnt = 0;
+		ul_adb->payload_size = 0;
+		ul_adb->dg_cnt_total = 0;
+
+		ul_adb->adgh = (struct mux_adgh *)skb->data;
+		memset(ul_adb->adgh, 0, sizeof(struct mux_adgh));
+		break;
+
+	case MUX_SIG_QLTH:
+		qlt_size = offsetof(struct ipc_mem_lite_gen_tbl, vfl) +
+			   (MUX_QUEUE_LEVEL * sizeof(struct mux_lite_vfl));
+
+		if (qlt_size > IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE) {
+			dev_err(ipc_mux->dev,
+				"can't support. QLT size:%d SKB size: %d",
+				qlt_size, IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE);
+			return -ERANGE;
+		}
+
+		ul_adb->qlth_skb = skb;
+		memset((ul_adb->qlth_skb)->data, 0, qlt_size);
+		skb_put(skb, qlt_size);
+		break;
+	}
+
+	return 0;
+}
+
+static void ipc_mux_ul_adgh_finish(struct iosm_mux *ipc_mux)
+{
+	struct mux_adb *ul_adb = &ipc_mux->ul_adb;
+	u16 adgh_len;
+	long long bytes;
+	char *str;
+
+	if (!ul_adb || !ul_adb->dest_skb) {
+		dev_err(ipc_mux->dev, "no dest skb");
+		return;
+	}
+
+	adgh_len = le16_to_cpu(ul_adb->adgh->length);
+	skb_put(ul_adb->dest_skb, adgh_len);
+	skb_queue_tail(&ipc_mux->channel->ul_list, ul_adb->dest_skb);
+	ul_adb->dest_skb = NULL;
+
+	if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS) {
+		struct mux_session *session;
+
+		session = &ipc_mux->session[ul_adb->adgh->if_id];
+		str = "available_credits";
+		bytes = (long long)session->ul_flow_credits;
+
+	} else {
+		str = "pend_bytes";
+		bytes = ipc_mux->ul_data_pend_bytes;
+		ipc_mux->ul_data_pend_bytes = ipc_mux->ul_data_pend_bytes +
+					      adgh_len;
+	}
+
+	dev_dbg(ipc_mux->dev, "UL ADGH: size=%u, if_id=%d, payload=%d, %s=%lld",
+		adgh_len, ul_adb->adgh->if_id, ul_adb->payload_size,
+		str, bytes);
+}
+
+/* Allocates an ADB from the free list and initializes it with ADBH  */
+static bool ipc_mux_ul_adb_allocate(struct iosm_mux *ipc_mux,
+				    struct mux_adb *adb, int *size_needed,
+				    u32 type)
+{
+	bool ret_val = false;
+	int status;
+
+	if (!adb->dest_skb) {
+		/* Allocate memory for the ADB including of the
+		 * datagram table header.
+		 */
+		status = ipc_mux_ul_skb_alloc(ipc_mux, adb, type);
+		if (status)
+			/* Is a pending ADB available ? */
+			ret_val = true; /* None. */
+
+		/* Update size need to zero only for new ADB memory */
+		*size_needed = 0;
+	}
+
+	return ret_val;
+}
+
+/* Informs the network stack to stop sending further packets for all opened
+ * sessions
+ */
+static void ipc_mux_stop_tx_for_all_sessions(struct iosm_mux *ipc_mux)
+{
+	struct mux_session *session;
+	int idx;
+
+	for (idx = 0; idx < ipc_mux->nr_sessions; idx++) {
+		session = &ipc_mux->session[idx];
+
+		if (!session->wwan)
+			continue;
+
+		session->net_tx_stop = true;
+	}
+}
+
+/* Sends Queue Level Table of all opened sessions */
+static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux)
+{
+	struct ipc_mem_lite_gen_tbl *qlt;
+	struct mux_session *session;
+	bool qlt_updated = false;
+	int i;
+	int qlt_size;
+
+	if (!ipc_mux->initialized || ipc_mux->state != MUX_S_ACTIVE)
+		return qlt_updated;
+
+	qlt_size = offsetof(struct ipc_mem_lite_gen_tbl, vfl) +
+		   MUX_QUEUE_LEVEL * sizeof(struct mux_lite_vfl);
+
+	for (i = 0; i < ipc_mux->nr_sessions; i++) {
+		session = &ipc_mux->session[i];
+
+		if (!session->wwan || session->flow_ctl_mask)
+			continue;
+
+		if (ipc_mux_ul_skb_alloc(ipc_mux, &ipc_mux->ul_adb,
+					 MUX_SIG_QLTH)) {
+			dev_err(ipc_mux->dev,
+				"no reserved mem to send QLT of if_id: %d", i);
+			break;
+		}
+
+		/* Prepare QLT */
+		qlt = (struct ipc_mem_lite_gen_tbl *)(ipc_mux->ul_adb.qlth_skb)
+			      ->data;
+		qlt->signature = cpu_to_le32(MUX_SIG_QLTH);
+		qlt->length = cpu_to_le16(qlt_size);
+		qlt->if_id = i;
+		qlt->vfl_length = MUX_QUEUE_LEVEL * sizeof(struct mux_lite_vfl);
+		qlt->reserved[0] = 0;
+		qlt->reserved[1] = 0;
+
+		qlt->vfl.nr_of_bytes = session->ul_list.qlen;
+
+		/* Add QLT to the transfer list. */
+		skb_queue_tail(&ipc_mux->channel->ul_list,
+			       ipc_mux->ul_adb.qlth_skb);
+
+		qlt_updated = true;
+		ipc_mux->ul_adb.qlth_skb = NULL;
+	}
+
+	if (qlt_updated)
+		/* Updates the TDs with ul_list */
+		(void)ipc_imem_ul_write_td(ipc_mux->imem);
+
+	return qlt_updated;
+}
+
+/* Checks the available credits for the specified session and returns
+ * number of packets for which credits are available.
+ */
+static int ipc_mux_ul_bytes_credits_check(struct iosm_mux *ipc_mux,
+					  struct mux_session *session,
+					  struct sk_buff_head *ul_list,
+					  int max_nr_of_pkts)
+{
+	int pkts_to_send = 0;
+	struct sk_buff *skb;
+	int credits = 0;
+
+	if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS) {
+		credits = session->ul_flow_credits;
+		if (credits <= 0) {
+			dev_dbg(ipc_mux->dev,
+				"FC::if_id[%d] Insuff.Credits/Qlen:%d/%u",
+				session->if_id, session->ul_flow_credits,
+				session->ul_list.qlen); /* nr_of_bytes */
+			return 0;
+		}
+	} else {
+		credits = IPC_MEM_MUX_UL_FLOWCTRL_HIGH_B -
+			  ipc_mux->ul_data_pend_bytes;
+		if (credits <= 0) {
+			ipc_mux_stop_tx_for_all_sessions(ipc_mux);
+
+			dev_dbg(ipc_mux->dev,
+				"if_id[%d] encod. fail Bytes: %llu, thresh: %d",
+				session->if_id, ipc_mux->ul_data_pend_bytes,
+				IPC_MEM_MUX_UL_FLOWCTRL_HIGH_B);
+			return 0;
+		}
+	}
+
+	/* Check if there are enough credits/bytes available to send the
+	 * requested max_nr_of_pkts. Otherwise restrict the nr_of_pkts
+	 * depending on available credits.
+	 */
+	skb_queue_walk(ul_list, skb)
+	{
+		if (!(credits >= skb->len && pkts_to_send < max_nr_of_pkts))
+			break;
+		credits -= skb->len;
+		pkts_to_send++;
+	}
+
+	return pkts_to_send;
+}
+
+/* Encode the UL IP packet according to Lite spec. */
+static int ipc_mux_ul_adgh_encode(struct iosm_mux *ipc_mux, int session_id,
+				  struct mux_session *session,
+				  struct sk_buff_head *ul_list,
+				  struct mux_adb *adb, int nr_of_pkts)
+{
+	int offset = sizeof(struct mux_adgh);
+	int adb_updated = -EINVAL;
+	struct sk_buff *src_skb;
+	int aligned_size = 0;
+	int nr_of_skb = 0;
+	u32 pad_len = 0;
+
+	/* Re-calculate the number of packets depending on number of bytes to be
+	 * processed/available credits.
+	 */
+	nr_of_pkts = ipc_mux_ul_bytes_credits_check(ipc_mux, session, ul_list,
+						    nr_of_pkts);
+
+	/* If calculated nr_of_pkts from available credits is <= 0
+	 * then nothing to do.
+	 */
+	if (nr_of_pkts <= 0)
+		return 0;
+
+	/* Read configured UL head_pad_length for session.*/
+	if (session->ul_head_pad_len > IPC_MEM_DL_ETH_OFFSET)
+		pad_len = session->ul_head_pad_len - IPC_MEM_DL_ETH_OFFSET;
+
+	/* Process all pending UL packets for this session
+	 * depending on the allocated datagram table size.
+	 */
+	while (nr_of_pkts > 0) {
+		/* get destination skb allocated */
+		if (ipc_mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed,
+					    MUX_SIG_ADGH)) {
+			dev_err(ipc_mux->dev, "no reserved memory for ADGH");
+			return -ENOMEM;
+		}
+
+		/* Peek at the head of the list. */
+		src_skb = skb_peek(ul_list);
+		if (!src_skb) {
+			dev_err(ipc_mux->dev,
+				"skb peek return NULL with count : %d",
+				nr_of_pkts);
+			break;
+		}
+
+		/* Calculate the memory value. */
+		aligned_size = ALIGN((pad_len + src_skb->len), 4);
+
+		ipc_mux->size_needed = sizeof(struct mux_adgh) + aligned_size;
+
+		if (ipc_mux->size_needed > adb->size) {
+			dev_dbg(ipc_mux->dev, "size needed %d, adgh size %d",
+				ipc_mux->size_needed, adb->size);
+			/* Return 1 if any IP packet is added to the transfer
+			 * list.
+			 */
+			return nr_of_skb ? 1 : 0;
+		}
+
+		/* Add buffer (without head padding to next pending transfer) */
+		memcpy(adb->buf + offset + pad_len, src_skb->data,
+		       src_skb->len);
+
+		adb->adgh->signature = cpu_to_le32(MUX_SIG_ADGH);
+		adb->adgh->if_id = session_id;
+		adb->adgh->length =
+			cpu_to_le16(sizeof(struct mux_adgh) + pad_len +
+				    src_skb->len);
+		adb->adgh->service_class = src_skb->priority;
+		adb->adgh->next_count = --nr_of_pkts;
+		adb->dg_cnt_total++;
+		adb->payload_size += src_skb->len;
+
+		if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS)
+			/* Decrement the credit value as we are processing the
+			 * datagram from the UL list.
+			 */
+			session->ul_flow_credits -= src_skb->len;
+
+		/* Remove the processed elements and free it. */
+		src_skb = skb_dequeue(ul_list);
+		dev_kfree_skb(src_skb);
+		nr_of_skb++;
+
+		ipc_mux_ul_adgh_finish(ipc_mux);
+	}
+
+	if (nr_of_skb) {
+		/* Send QLT info to modem if pending bytes > high watermark
+		 * in case of mux lite
+		 */
+		if (ipc_mux->ul_flow == MUX_UL_ON_CREDITS ||
+		    ipc_mux->ul_data_pend_bytes >=
+			    IPC_MEM_MUX_UL_FLOWCTRL_LOW_B)
+			adb_updated = ipc_mux_lite_send_qlt(ipc_mux);
+		else
+			adb_updated = 1;
+
+		/* Updates the TDs with ul_list */
+		(void)ipc_imem_ul_write_td(ipc_mux->imem);
+	}
+
+	return adb_updated;
+}
+
+bool ipc_mux_ul_data_encode(struct iosm_mux *ipc_mux)
+{
+	struct sk_buff_head *ul_list;
+	struct mux_session *session;
+	int updated = 0;
+	int session_id;
+	int dg_n;
+	int i;
+
+	if (!ipc_mux || ipc_mux->state != MUX_S_ACTIVE ||
+	    ipc_mux->adb_prep_ongoing)
+		return false;
+
+	ipc_mux->adb_prep_ongoing = true;
+
+	for (i = 0; i < ipc_mux->nr_sessions; i++) {
+		session_id = ipc_mux->rr_next_session;
+		session = &ipc_mux->session[session_id];
+
+		/* Go to next handle rr_next_session overflow */
+		ipc_mux->rr_next_session++;
+		if (ipc_mux->rr_next_session >= ipc_mux->nr_sessions)
+			ipc_mux->rr_next_session = 0;
+
+		if (!session->wwan || session->flow_ctl_mask ||
+		    session->net_tx_stop)
+			continue;
+
+		ul_list = &session->ul_list;
+
+		/* Is something pending in UL and flow ctrl off */
+		dg_n = skb_queue_len(ul_list);
+		if (dg_n > MUX_MAX_UL_DG_ENTRIES)
+			dg_n = MUX_MAX_UL_DG_ENTRIES;
+
+		if (dg_n == 0)
+			/* Nothing to do for ipc_mux session
+			 * -> try next session id.
+			 */
+			continue;
+
+		updated = ipc_mux_ul_adgh_encode(ipc_mux, session_id, session,
+						 ul_list, &ipc_mux->ul_adb,
+						 dg_n);
+	}
+
+	ipc_mux->adb_prep_ongoing = false;
+	return updated == 1;
+}
+
+void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb)
+{
+	struct mux_adgh *adgh;
+	u16 adgh_len;
+
+	adgh = (struct mux_adgh *)skb->data;
+	adgh_len = le16_to_cpu(adgh->length);
+
+	if (adgh->signature == cpu_to_le32(MUX_SIG_ADGH) &&
+	    ipc_mux->ul_flow == MUX_UL)
+		ipc_mux->ul_data_pend_bytes = ipc_mux->ul_data_pend_bytes -
+					      adgh_len;
+
+	if (ipc_mux->ul_flow == MUX_UL)
+		dev_dbg(ipc_mux->dev, "ul_data_pend_bytes: %lld",
+			ipc_mux->ul_data_pend_bytes);
+
+	/* Reset the skb settings. */
+	skb->tail = 0;
+	skb->len = 0;
+
+	/* Add the consumed ADB to the free list. */
+	skb_queue_tail((&ipc_mux->ul_adb.free_list), skb);
+}
+
+/* Start the NETIF uplink send transfer in MUX mode. */
+static int ipc_mux_tq_ul_trigger_encode(struct iosm_imem *ipc_imem, int arg,
+					void *msg, size_t size)
+{
+	struct iosm_mux *ipc_mux = ipc_imem->mux;
+	bool ul_data_pend = false;
+
+	/* Add session UL data to a ADB and ADGH */
+	ul_data_pend = ipc_mux_ul_data_encode(ipc_mux);
+	if (ul_data_pend)
+		/* Delay the doorbell irq */
+		ipc_imem_td_update_timer_start(ipc_mux->imem);
+
+	/* reset the debounce flag */
+	ipc_mux->ev_mux_net_transmit_pending = false;
+
+	return 0;
+}
+
+int ipc_mux_ul_trigger_encode(struct iosm_mux *ipc_mux, int if_id,
+			      struct sk_buff *skb)
+{
+	struct mux_session *session = &ipc_mux->session[if_id];
+	int ret = -EINVAL;
+
+	if (ipc_mux->channel &&
+	    ipc_mux->channel->state != IMEM_CHANNEL_ACTIVE) {
+		dev_err(ipc_mux->dev,
+			"channel state is not IMEM_CHANNEL_ACTIVE");
+		goto out;
+	}
+
+	if (!session->wwan) {
+		dev_err(ipc_mux->dev, "session net ID is NULL");
+		ret = -EFAULT;
+		goto out;
+	}
+
+	/* Session is under flow control.
+	 * Check if packet can be queued in session list, if not
+	 * suspend net tx
+	 */
+	if (skb_queue_len(&session->ul_list) >=
+	    (session->net_tx_stop ?
+		     IPC_MEM_MUX_UL_SESS_FCON_THRESHOLD :
+		     (IPC_MEM_MUX_UL_SESS_FCON_THRESHOLD *
+		      IPC_MEM_MUX_UL_SESS_FCOFF_THRESHOLD_FACTOR))) {
+		ipc_mux_netif_tx_flowctrl(session, session->if_id, true);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Add skb to the uplink skb accumulator. */
+	skb_queue_tail(&session->ul_list, skb);
+
+	/* Inform the IPC kthread to pass uplink IP packets to CP. */
+	if (!ipc_mux->ev_mux_net_transmit_pending) {
+		ipc_mux->ev_mux_net_transmit_pending = true;
+		ret = ipc_task_queue_send_task(ipc_mux->imem,
+					       ipc_mux_tq_ul_trigger_encode, 0,
+					       NULL, 0, false);
+		if (ret)
+			goto out;
+	}
+	dev_dbg(ipc_mux->dev, "mux ul if[%d] qlen=%d/%u, len=%d/%d, prio=%d",
+		if_id, skb_queue_len(&session->ul_list), session->ul_list.qlen,
+		skb->len, skb->truesize, skb->priority);
+	ret = 0;
+out:
+	return ret;
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
new file mode 100644
index 000000000000..4a74e3c9457f
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_MUX_CODEC_H
+#define IOSM_IPC_MUX_CODEC_H
+
+#include "iosm_ipc_mux.h"
+
+/* Queue level size and reporting
+ * >1 is enable, 0 is disable
+ */
+#define MUX_QUEUE_LEVEL 1
+
+/* Size of the buffer for the IP MUX commands. */
+#define MUX_MAX_UL_ACB_BUF_SIZE 256
+
+/* Maximum number of packets in a go per session */
+#define MUX_MAX_UL_DG_ENTRIES 100
+
+/* ADGH: Signature of the Datagram Header. */
+#define MUX_SIG_ADGH 0x48474441
+
+/* CMDH: Signature of the Command Header. */
+#define MUX_SIG_CMDH 0x48444D43
+
+/* QLTH: Signature of the Queue Level Table */
+#define MUX_SIG_QLTH 0x48544C51
+
+/* FCTH: Signature of the Flow Credit Table */
+#define MUX_SIG_FCTH 0x48544346
+
+/* MUX UL session threshold factor */
+#define IPC_MEM_MUX_UL_SESS_FCOFF_THRESHOLD_FACTOR (4)
+
+/* Size of the buffer for the IP MUX Lite data buffer. */
+#define IPC_MEM_MAX_DL_MUX_LITE_BUF_SIZE (2 * 1024)
+
+/* MUX UL session threshold in number of packets */
+#define IPC_MEM_MUX_UL_SESS_FCON_THRESHOLD (64)
+
+/* Default time out for sending IPC session commands like
+ * open session, close session etc
+ * unit : milliseconds
+ */
+#define IPC_MUX_CMD_RUN_DEFAULT_TIMEOUT 1000 /* 1 second */
+
+/* MUX UL flow control lower threshold in bytes */
+#define IPC_MEM_MUX_UL_FLOWCTRL_LOW_B 10240 /* 10KB */
+
+/* MUX UL flow control higher threshold in bytes (5ms worth of data)*/
+#define IPC_MEM_MUX_UL_FLOWCTRL_HIGH_B (110 * 1024)
+
+/**
+ * struct mux_adgh - Aggregated Datagram Header.
+ * @signature:		Signature of the Aggregated Datagram Header(0x48474441)
+ * @length:		Length (in bytes) of the datagram header. This length
+ *			shall include the header size. Min value: 0x10
+ * @if_id:		ID of the interface the datagrams belong to
+ * @opt_ipv4v6:		Indicates IPv4(=0)/IPv6(=1), It is optional if not
+ *			used set it to zero.
+ * @reserved:		Reserved bits. Set to zero.
+ * @service_class:	Service class identifier for the datagram.
+ * @next_count:		Count of the datagrams that shall be following this
+ *			datagrams for this interface. A count of zero means
+ *			the next datagram may not belong to this interface.
+ * @reserved1:		Reserved bytes, Set to zero
+ */
+struct mux_adgh {
+	__le32 signature;
+	__le16 length;
+	u8 if_id;
+	u8 opt_ipv4v6;
+	u8 service_class;
+	u8 next_count;
+	u8 reserved1[6];
+};
+
+/**
+ * struct mux_lite_cmdh - MUX Lite Command Header
+ * @signature:		Signature of the Command Header(0x48444D43)
+ * @cmd_len:		Length (in bytes) of the command. This length shall
+ *			include the header size. Minimum value: 0x10
+ * @if_id:		ID of the interface the commands in the table belong to.
+ * @reserved:		Reserved Set to zero.
+ * @command_type:	Command Enum.
+ * @transaction_id:	4 byte value shall be generated and sent along with a
+ *			command Responses and ACKs shall have the same
+ *			Transaction ID as their commands. It shall be unique to
+ *			the command transaction on the given interface.
+ * @param:		Optional parameters used with the command.
+ */
+struct mux_lite_cmdh {
+	__le32 signature;
+	__le16 cmd_len;
+	u8 if_id;
+	u8 reserved;
+	__le32 command_type;
+	__le32 transaction_id;
+	union mux_cmd_param param;
+};
+
+/**
+ * struct mux_lite_vfl - value field in generic table
+ * @nr_of_bytes:	Number of bytes available to transmit in the queue.
+ */
+struct mux_lite_vfl {
+	u32 nr_of_bytes;
+};
+
+/**
+ * struct ipc_mem_lite_gen_tbl - Generic table format for Queue Level
+ *				 and Flow Credit
+ * @signature:	Signature of the table
+ * @length:	Length of the table
+ * @if_id:	ID of the interface the table belongs to
+ * @vfl_length:	Value field length
+ * @reserved:	Reserved
+ * @vfl:	Value field of variable length
+ */
+struct ipc_mem_lite_gen_tbl {
+	__le32 signature;
+	__le16 length;
+	u8 if_id;
+	u8 vfl_length;
+	u32 reserved[2];
+	struct mux_lite_vfl vfl;
+};
+
+/**
+ * ipc_mux_dl_decode -Route the DL packet through the IP MUX layer
+ *		      depending on Header.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @skb:	Pointer to ipc_skb.
+ */
+void ipc_mux_dl_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb);
+
+/**
+ * ipc_mux_dl_acb_send_cmds - Respond to the Command blocks.
+ * @ipc_mux:		Pointer to MUX data-struct
+ * @cmd_type:		Command
+ * @if_id:		Session interface id.
+ * @transaction_id:	Command transaction id.
+ * @param:		Pointer to command params.
+ * @res_size:		Response size
+ * @blocking:		True for blocking send
+ * @respond:		If true return transaction ID
+ *
+ * Returns: 0 in success and failure value on error
+ */
+int ipc_mux_dl_acb_send_cmds(struct iosm_mux *ipc_mux, u32 cmd_type, u8 if_id,
+			     u32 transaction_id, union mux_cmd_param *param,
+			     size_t res_size, bool blocking, bool respond);
+
+/**
+ * ipc_mux_netif_tx_flowctrl - Enable/Disable TX flow control on MUX sessions.
+ * @session:	Pointer to mux_session struct
+ * @idx:	Session ID
+ * @on:		true for Enable and false for disable flow control
+ */
+void ipc_mux_netif_tx_flowctrl(struct mux_session *session, int idx, bool on);
+
+/**
+ * ipc_mux_ul_trigger_encode - Route the UL packet through the IP MUX layer
+ *			       for encoding.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @if_id:	Session ID.
+ * @skb:	Pointer to ipc_skb.
+ *
+ * Returns: 0 if successfully encoded
+ *	    failure value on error
+ *	    -EBUSY if packet has to be retransmitted.
+ */
+int ipc_mux_ul_trigger_encode(struct iosm_mux *ipc_mux, int if_id,
+			      struct sk_buff *skb);
+/**
+ * ipc_mux_ul_data_encode - UL encode function for calling from Tasklet context.
+ * @ipc_mux:	Pointer to MUX data-struct
+ *
+ * Returns: TRUE if any packet of any session is encoded FALSE otherwise.
+ */
+bool ipc_mux_ul_data_encode(struct iosm_mux *ipc_mux);
+
+/**
+ * ipc_mux_ul_encoded_process - Handles the Modem processed UL data by adding
+ *				the SKB to the UL free list.
+ * @ipc_mux:	Pointer to MUX data-struct
+ * @skb:	Pointer to ipc_skb.
+ */
+void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb);
+
+#endif

From be8c936e540fe6e60d03fa1578205c936e71335b Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:18 +0530
Subject: [PATCH 1610/3292] net: iosm: power management

Implements state machine to handle host & device sleep.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_pm.c | 333 ++++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_pm.h | 207 +++++++++++++++++
 2 files changed, 540 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_pm.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_pm.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_pm.c b/drivers/net/wwan/iosm/iosm_ipc_pm.c
new file mode 100644
index 000000000000..413601c72dcd
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_pm.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include "iosm_ipc_protocol.h"
+
+/* Timeout value in MS for the PM to wait for device to reach active state */
+#define IPC_PM_ACTIVE_TIMEOUT_MS (500)
+
+/* Note that here "active" has the value 1, as compared to the enums
+ * ipc_mem_host_pm_state or ipc_mem_dev_pm_state, where "active" is 0
+ */
+#define IPC_PM_SLEEP (0)
+#define CONSUME_STATE (0)
+#define IPC_PM_ACTIVE (1)
+
+void ipc_pm_signal_hpda_doorbell(struct iosm_pm *ipc_pm, u32 identifier,
+				 bool host_slp_check)
+{
+	if (host_slp_check && ipc_pm->host_pm_state != IPC_MEM_HOST_PM_ACTIVE &&
+	    ipc_pm->host_pm_state != IPC_MEM_HOST_PM_ACTIVE_WAIT) {
+		ipc_pm->pending_hpda_update = true;
+		dev_dbg(ipc_pm->dev,
+			"Pend HPDA update set. Host PM_State: %d identifier:%d",
+			ipc_pm->host_pm_state, identifier);
+		return;
+	}
+
+	if (!ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_IRQ, true)) {
+		ipc_pm->pending_hpda_update = true;
+		dev_dbg(ipc_pm->dev, "Pending HPDA update set. identifier:%d",
+			identifier);
+		return;
+	}
+	ipc_pm->pending_hpda_update = false;
+
+	/* Trigger the irq towards CP */
+	ipc_cp_irq_hpda_update(ipc_pm->pcie, identifier);
+
+	ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_IRQ, false);
+}
+
+/* Wake up the device if it is in low power mode. */
+static bool ipc_pm_link_activate(struct iosm_pm *ipc_pm)
+{
+	if (ipc_pm->cp_state == IPC_MEM_DEV_PM_ACTIVE)
+		return true;
+
+	if (ipc_pm->cp_state == IPC_MEM_DEV_PM_SLEEP) {
+		if (ipc_pm->ap_state == IPC_MEM_DEV_PM_SLEEP) {
+			/* Wake up the device. */
+			ipc_cp_irq_sleep_control(ipc_pm->pcie,
+						 IPC_MEM_DEV_PM_WAKEUP);
+			ipc_pm->ap_state = IPC_MEM_DEV_PM_ACTIVE_WAIT;
+
+			goto not_active;
+		}
+
+		if (ipc_pm->ap_state == IPC_MEM_DEV_PM_ACTIVE_WAIT)
+			goto not_active;
+
+		return true;
+	}
+
+not_active:
+	/* link is not ready */
+	return false;
+}
+
+bool ipc_pm_wait_for_device_active(struct iosm_pm *ipc_pm)
+{
+	bool ret_val = false;
+
+	if (ipc_pm->ap_state != IPC_MEM_DEV_PM_ACTIVE) {
+		/* Complete all memory stores before setting bit */
+		smp_mb__before_atomic();
+
+		/* Wait for IPC_PM_ACTIVE_TIMEOUT_MS for Device sleep state
+		 * machine to enter ACTIVE state.
+		 */
+		set_bit(0, &ipc_pm->host_sleep_pend);
+
+		/* Complete all memory stores after setting bit */
+		smp_mb__after_atomic();
+
+		if (!wait_for_completion_interruptible_timeout
+		   (&ipc_pm->host_sleep_complete,
+		    msecs_to_jiffies(IPC_PM_ACTIVE_TIMEOUT_MS))) {
+			dev_err(ipc_pm->dev,
+				"PM timeout. Expected State:%d. Actual: %d",
+				IPC_MEM_DEV_PM_ACTIVE, ipc_pm->ap_state);
+			goto  active_timeout;
+		}
+	}
+
+	ret_val = true;
+active_timeout:
+	/* Complete all memory stores before clearing bit */
+	smp_mb__before_atomic();
+
+	/* Reset the atomic variable in any case as device sleep
+	 * state machine change is no longer of interest.
+	 */
+	clear_bit(0, &ipc_pm->host_sleep_pend);
+
+	/* Complete all memory stores after clearing bit */
+	smp_mb__after_atomic();
+
+	return ret_val;
+}
+
+static void ipc_pm_on_link_sleep(struct iosm_pm *ipc_pm)
+{
+	/* pending sleep ack and all conditions are cleared
+	 * -> signal SLEEP__ACK to CP
+	 */
+	ipc_pm->cp_state = IPC_MEM_DEV_PM_SLEEP;
+	ipc_pm->ap_state = IPC_MEM_DEV_PM_SLEEP;
+
+	ipc_cp_irq_sleep_control(ipc_pm->pcie, IPC_MEM_DEV_PM_SLEEP);
+}
+
+static void ipc_pm_on_link_wake(struct iosm_pm *ipc_pm, bool ack)
+{
+	ipc_pm->ap_state = IPC_MEM_DEV_PM_ACTIVE;
+
+	if (ack) {
+		ipc_pm->cp_state = IPC_MEM_DEV_PM_ACTIVE;
+
+		ipc_cp_irq_sleep_control(ipc_pm->pcie, IPC_MEM_DEV_PM_ACTIVE);
+
+		/* check the consume state !!! */
+		if (test_bit(CONSUME_STATE, &ipc_pm->host_sleep_pend))
+			complete(&ipc_pm->host_sleep_complete);
+	}
+
+	/* Check for pending HPDA update.
+	 * Pending HP update could be because of sending message was
+	 * put on hold due to Device sleep state or due to TD update
+	 * which could be because of Device Sleep and Host Sleep
+	 * states.
+	 */
+	if (ipc_pm->pending_hpda_update &&
+	    ipc_pm->host_pm_state == IPC_MEM_HOST_PM_ACTIVE)
+		ipc_pm_signal_hpda_doorbell(ipc_pm, IPC_HP_PM_TRIGGER, true);
+}
+
+bool ipc_pm_trigger(struct iosm_pm *ipc_pm, enum ipc_pm_unit unit, bool active)
+{
+	union ipc_pm_cond old_cond;
+	union ipc_pm_cond new_cond;
+	bool link_active;
+
+	/* Save the current D3 state. */
+	new_cond = ipc_pm->pm_cond;
+	old_cond = ipc_pm->pm_cond;
+
+	/* Calculate the power state only in the runtime phase. */
+	switch (unit) {
+	case IPC_PM_UNIT_IRQ: /* CP irq */
+		new_cond.irq = active;
+		break;
+
+	case IPC_PM_UNIT_LINK: /* Device link state. */
+		new_cond.link = active;
+		break;
+
+	case IPC_PM_UNIT_HS: /* Host sleep trigger requires Link. */
+		new_cond.hs = active;
+		break;
+
+	default:
+		break;
+	}
+
+	/* Something changed ? */
+	if (old_cond.raw == new_cond.raw) {
+		/* Stay in the current PM state. */
+		link_active = old_cond.link == IPC_PM_ACTIVE;
+		goto ret;
+	}
+
+	ipc_pm->pm_cond = new_cond;
+
+	if (new_cond.link)
+		ipc_pm_on_link_wake(ipc_pm, unit == IPC_PM_UNIT_LINK);
+	else if (unit == IPC_PM_UNIT_LINK)
+		ipc_pm_on_link_sleep(ipc_pm);
+
+	if (old_cond.link == IPC_PM_SLEEP && new_cond.raw) {
+		link_active = ipc_pm_link_activate(ipc_pm);
+		goto ret;
+	}
+
+	link_active = old_cond.link == IPC_PM_ACTIVE;
+
+ret:
+	return link_active;
+}
+
+bool ipc_pm_prepare_host_sleep(struct iosm_pm *ipc_pm)
+{
+	/* suspend not allowed if host_pm_state is not IPC_MEM_HOST_PM_ACTIVE */
+	if (ipc_pm->host_pm_state != IPC_MEM_HOST_PM_ACTIVE) {
+		dev_err(ipc_pm->dev, "host_pm_state=%d\tExpected to be: %d",
+			ipc_pm->host_pm_state, IPC_MEM_HOST_PM_ACTIVE);
+		return false;
+	}
+
+	ipc_pm->host_pm_state = IPC_MEM_HOST_PM_SLEEP_WAIT_D3;
+
+	return true;
+}
+
+bool ipc_pm_prepare_host_active(struct iosm_pm *ipc_pm)
+{
+	if (ipc_pm->host_pm_state != IPC_MEM_HOST_PM_SLEEP) {
+		dev_err(ipc_pm->dev, "host_pm_state=%d\tExpected to be: %d",
+			ipc_pm->host_pm_state, IPC_MEM_HOST_PM_SLEEP);
+		return false;
+	}
+
+	/* Sending Sleep Exit message to CP. Update the state */
+	ipc_pm->host_pm_state = IPC_MEM_HOST_PM_ACTIVE_WAIT;
+
+	return true;
+}
+
+void ipc_pm_set_s2idle_sleep(struct iosm_pm *ipc_pm, bool sleep)
+{
+	if (sleep) {
+		ipc_pm->ap_state = IPC_MEM_DEV_PM_SLEEP;
+		ipc_pm->cp_state = IPC_MEM_DEV_PM_SLEEP;
+		ipc_pm->device_sleep_notification = IPC_MEM_DEV_PM_SLEEP;
+	} else {
+		ipc_pm->ap_state = IPC_MEM_DEV_PM_ACTIVE;
+		ipc_pm->cp_state = IPC_MEM_DEV_PM_ACTIVE;
+		ipc_pm->device_sleep_notification = IPC_MEM_DEV_PM_ACTIVE;
+		ipc_pm->pm_cond.link = IPC_PM_ACTIVE;
+	}
+}
+
+bool ipc_pm_dev_slp_notification(struct iosm_pm *ipc_pm, u32 cp_pm_req)
+{
+	if (cp_pm_req == ipc_pm->device_sleep_notification)
+		return false;
+
+	ipc_pm->device_sleep_notification = cp_pm_req;
+
+	/* Evaluate the PM request. */
+	switch (ipc_pm->cp_state) {
+	case IPC_MEM_DEV_PM_ACTIVE:
+		switch (cp_pm_req) {
+		case IPC_MEM_DEV_PM_ACTIVE:
+			break;
+
+		case IPC_MEM_DEV_PM_SLEEP:
+			/* Inform the PM that the device link can go down. */
+			ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_LINK, false);
+			return true;
+
+		default:
+			dev_err(ipc_pm->dev,
+				"loc-pm=%d active: confused req-pm=%d",
+				ipc_pm->cp_state, cp_pm_req);
+			break;
+		}
+		break;
+
+	case IPC_MEM_DEV_PM_SLEEP:
+		switch (cp_pm_req) {
+		case IPC_MEM_DEV_PM_ACTIVE:
+			/* Inform the PM that the device link is active. */
+			ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_LINK, true);
+			break;
+
+		case IPC_MEM_DEV_PM_SLEEP:
+			break;
+
+		default:
+			dev_err(ipc_pm->dev,
+				"loc-pm=%d sleep: confused req-pm=%d",
+				ipc_pm->cp_state, cp_pm_req);
+			break;
+		}
+		break;
+
+	default:
+		dev_err(ipc_pm->dev, "confused loc-pm=%d, req-pm=%d",
+			ipc_pm->cp_state, cp_pm_req);
+		break;
+	}
+
+	return false;
+}
+
+void ipc_pm_init(struct iosm_protocol *ipc_protocol)
+{
+	struct iosm_imem *ipc_imem = ipc_protocol->imem;
+	struct iosm_pm *ipc_pm = &ipc_protocol->pm;
+
+	ipc_pm->pcie = ipc_imem->pcie;
+	ipc_pm->dev = ipc_imem->dev;
+
+	ipc_pm->pm_cond.irq = IPC_PM_SLEEP;
+	ipc_pm->pm_cond.hs = IPC_PM_SLEEP;
+	ipc_pm->pm_cond.link = IPC_PM_ACTIVE;
+
+	ipc_pm->cp_state = IPC_MEM_DEV_PM_ACTIVE;
+	ipc_pm->ap_state = IPC_MEM_DEV_PM_ACTIVE;
+	ipc_pm->host_pm_state = IPC_MEM_HOST_PM_ACTIVE;
+
+	/* Create generic wait-for-completion handler for Host Sleep
+	 * and device sleep coordination.
+	 */
+	init_completion(&ipc_pm->host_sleep_complete);
+
+	/* Complete all memory stores before clearing bit */
+	smp_mb__before_atomic();
+
+	clear_bit(0, &ipc_pm->host_sleep_pend);
+
+	/* Complete all memory stores after clearing bit */
+	smp_mb__after_atomic();
+}
+
+void ipc_pm_deinit(struct iosm_protocol *proto)
+{
+	struct iosm_pm *ipc_pm = &proto->pm;
+
+	complete(&ipc_pm->host_sleep_complete);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_pm.h b/drivers/net/wwan/iosm/iosm_ipc_pm.h
new file mode 100644
index 000000000000..e7c00f388cb0
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_pm.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PM_H
+#define IOSM_IPC_PM_H
+
+/* Trigger the doorbell interrupt on cp to change the PM sleep/active status */
+#define ipc_cp_irq_sleep_control(ipc_pcie, data)                               \
+	ipc_doorbell_fire(ipc_pcie, IPC_DOORBELL_IRQ_SLEEP, data)
+
+/* Trigger the doorbell interrupt on CP to do hpda update */
+#define ipc_cp_irq_hpda_update(ipc_pcie, data)                                 \
+	ipc_doorbell_fire(ipc_pcie, IPC_DOORBELL_IRQ_HPDA, 0xFF & (data))
+
+/**
+ * union ipc_pm_cond - Conditions for D3 and the sleep message to CP.
+ * @raw:	raw/combined value for faster check
+ * @irq:	IRQ towards CP
+ * @hs:		Host Sleep
+ * @link:	Device link state.
+ */
+union ipc_pm_cond {
+	unsigned int raw;
+
+	struct {
+		unsigned int irq:1,
+			     hs:1,
+			     link:1;
+	};
+};
+
+/**
+ * enum ipc_mem_host_pm_state - Possible states of the HOST SLEEP finite state
+ *				machine.
+ * @IPC_MEM_HOST_PM_ACTIVE:		   Host is active
+ * @IPC_MEM_HOST_PM_ACTIVE_WAIT:	   Intermediate state before going to
+ *					   active
+ * @IPC_MEM_HOST_PM_SLEEP_WAIT_IDLE:	   Intermediate state to wait for idle
+ *					   before going into sleep
+ * @IPC_MEM_HOST_PM_SLEEP_WAIT_D3:	   Intermediate state to wait for D3
+ *					   before going to sleep
+ * @IPC_MEM_HOST_PM_SLEEP:		   after this state the interface is not
+ *					   accessible host is in suspend to RAM
+ * @IPC_MEM_HOST_PM_SLEEP_WAIT_EXIT_SLEEP: Intermediate state before exiting
+ *					   sleep
+ */
+enum ipc_mem_host_pm_state {
+	IPC_MEM_HOST_PM_ACTIVE,
+	IPC_MEM_HOST_PM_ACTIVE_WAIT,
+	IPC_MEM_HOST_PM_SLEEP_WAIT_IDLE,
+	IPC_MEM_HOST_PM_SLEEP_WAIT_D3,
+	IPC_MEM_HOST_PM_SLEEP,
+	IPC_MEM_HOST_PM_SLEEP_WAIT_EXIT_SLEEP,
+};
+
+/**
+ * enum ipc_mem_dev_pm_state - Possible states of the DEVICE SLEEP finite state
+ *			       machine.
+ * @IPC_MEM_DEV_PM_ACTIVE:		IPC_MEM_DEV_PM_ACTIVE is the initial
+ *					power management state.
+ *					IRQ(struct ipc_mem_device_info:
+ *					device_sleep_notification)
+ *					and DOORBELL-IRQ-HPDA(data) values.
+ * @IPC_MEM_DEV_PM_SLEEP:		IPC_MEM_DEV_PM_SLEEP is PM state for
+ *					sleep.
+ * @IPC_MEM_DEV_PM_WAKEUP:		DOORBELL-IRQ-DEVICE_WAKE(data).
+ * @IPC_MEM_DEV_PM_HOST_SLEEP:		DOORBELL-IRQ-HOST_SLEEP(data).
+ * @IPC_MEM_DEV_PM_ACTIVE_WAIT:		Local intermediate states.
+ * @IPC_MEM_DEV_PM_FORCE_SLEEP:		DOORBELL-IRQ-FORCE_SLEEP.
+ * @IPC_MEM_DEV_PM_FORCE_ACTIVE:	DOORBELL-IRQ-FORCE_ACTIVE.
+ */
+enum ipc_mem_dev_pm_state {
+	IPC_MEM_DEV_PM_ACTIVE,
+	IPC_MEM_DEV_PM_SLEEP,
+	IPC_MEM_DEV_PM_WAKEUP,
+	IPC_MEM_DEV_PM_HOST_SLEEP,
+	IPC_MEM_DEV_PM_ACTIVE_WAIT,
+	IPC_MEM_DEV_PM_FORCE_SLEEP = 7,
+	IPC_MEM_DEV_PM_FORCE_ACTIVE,
+};
+
+/**
+ * struct iosm_pm - Power management instance
+ * @pcie:			Pointer to iosm_pcie structure
+ * @dev:			Pointer to device structure
+ * @host_pm_state:		PM states for host
+ * @host_sleep_pend:		Variable to indicate Host Sleep Pending
+ * @host_sleep_complete:	Generic wait-for-completion used in
+ *				case of Host Sleep
+ * @pm_cond:			Conditions for power management
+ * @ap_state:			Current power management state, the
+ *				initial state is IPC_MEM_DEV_PM_ACTIVE eq. 0.
+ * @cp_state:			PM State of CP
+ * @device_sleep_notification:	last handled device_sleep_notfication
+ * @pending_hpda_update:	is a HPDA update pending?
+ */
+struct iosm_pm {
+	struct iosm_pcie *pcie;
+	struct device *dev;
+	enum ipc_mem_host_pm_state host_pm_state;
+	unsigned long host_sleep_pend;
+	struct completion host_sleep_complete;
+	union ipc_pm_cond pm_cond;
+	enum ipc_mem_dev_pm_state ap_state;
+	enum ipc_mem_dev_pm_state cp_state;
+	u32 device_sleep_notification;
+	u8 pending_hpda_update:1;
+};
+
+/**
+ * enum ipc_pm_unit - Power management units.
+ * @IPC_PM_UNIT_IRQ:	IRQ towards CP
+ * @IPC_PM_UNIT_HS:	Host Sleep for converged protocol
+ * @IPC_PM_UNIT_LINK:	Link state controlled by CP.
+ */
+enum ipc_pm_unit {
+	IPC_PM_UNIT_IRQ,
+	IPC_PM_UNIT_HS,
+	IPC_PM_UNIT_LINK,
+};
+
+/**
+ * ipc_pm_init - Allocate power management component
+ * @ipc_protocol:	Pointer to iosm_protocol structure
+ */
+void ipc_pm_init(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_pm_deinit - Free power management component, invalidating its pointer.
+ * @ipc_protocol:	Pointer to iosm_protocol structure
+ */
+void ipc_pm_deinit(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_pm_dev_slp_notification - Handle a sleep notification message from the
+ *				 device. This can be called from interrupt state
+ *				 This function handles Host Sleep requests too
+ *				 if the Host Sleep protocol is register based.
+ * @ipc_pm:			Pointer to power management component
+ * @sleep_notification:		Actual notification from device
+ *
+ * Returns: true if dev sleep state has to be checked, false otherwise.
+ */
+bool ipc_pm_dev_slp_notification(struct iosm_pm *ipc_pm,
+				 u32 sleep_notification);
+
+/**
+ * ipc_pm_set_s2idle_sleep - Set PM variables to sleep/active
+ * @ipc_pm:	Pointer to power management component
+ * @sleep:	true to enter sleep/false to exit sleep
+ */
+void ipc_pm_set_s2idle_sleep(struct iosm_pm *ipc_pm, bool sleep);
+
+/**
+ * ipc_pm_prepare_host_sleep - Prepare the PM for sleep by entering
+ *			       IPC_MEM_HOST_PM_SLEEP_WAIT_D3 state.
+ * @ipc_pm:	Pointer to power management component
+ *
+ * Returns: true on success, false if the host was not active.
+ */
+bool ipc_pm_prepare_host_sleep(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_prepare_host_active - Prepare the PM for wakeup by entering
+ *				IPC_MEM_HOST_PM_ACTIVE_WAIT state.
+ * @ipc_pm:	Pointer to power management component
+ *
+ * Returns: true on success, false if the host was not sleeping.
+ */
+bool ipc_pm_prepare_host_active(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_wait_for_device_active - Wait upto IPC_PM_ACTIVE_TIMEOUT_MS ms
+ *				   for the device to reach active state
+ * @ipc_pm:	Pointer to power management component
+ *
+ * Returns: true if device is active, false on timeout
+ */
+bool ipc_pm_wait_for_device_active(struct iosm_pm *ipc_pm);
+
+/**
+ * ipc_pm_signal_hpda_doorbell - Wake up the device if it is in low power mode
+ *				 and trigger a head pointer update interrupt.
+ * @ipc_pm:		Pointer to power management component
+ * @identifier:		specifies what component triggered hpda update irq
+ * @host_slp_check:	if set to true then Host Sleep state machine check will
+ *			be performed. If Host Sleep state machine allows HP
+ *			update then only doorbell is triggered otherwise pending
+ *			flag will be set. If set to false then Host Sleep check
+ *			will not be performed. This is helpful for Host Sleep
+ *			negotiation through message ring.
+ */
+void ipc_pm_signal_hpda_doorbell(struct iosm_pm *ipc_pm, u32 identifier,
+				 bool host_slp_check);
+/**
+ * ipc_pm_trigger - Update power manager and wake up the link if needed
+ * @ipc_pm:	Pointer to power management component
+ * @unit:	Power management units
+ * @active:	Device link state
+ *
+ * Returns: true if link is unchanged or active, false otherwise
+ */
+bool ipc_pm_trigger(struct iosm_pm *ipc_pm, enum ipc_pm_unit unit, bool active);
+
+#endif

From faed4c6f6f486fbd0bde233dd46beb26ebdb1ab2 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:19 +0530
Subject: [PATCH 1611/3292] net: iosm: shared memory protocol

1) Defines messaging protocol for handling Transfer Descriptor
   in both UL/DL direction.
2) Ring buffer management.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_protocol.c | 283 ++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_protocol.h | 237 ++++++++++++++++++
 2 files changed, 520 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.c b/drivers/net/wwan/iosm/iosm_ipc_protocol.c
new file mode 100644
index 000000000000..834d8b146a94
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include "iosm_ipc_imem.h"
+#include "iosm_ipc_protocol.h"
+#include "iosm_ipc_protocol_ops.h"
+#include "iosm_ipc_pm.h"
+#include "iosm_ipc_task_queue.h"
+
+int ipc_protocol_tq_msg_send(struct iosm_protocol *ipc_protocol,
+			     enum ipc_msg_prep_type msg_type,
+			     union ipc_msg_prep_args *prep_args,
+			     struct ipc_rsp *response)
+{
+	int index = ipc_protocol_msg_prep(ipc_protocol->imem, msg_type,
+					  prep_args);
+
+	/* Store reference towards caller specified response in response ring
+	 * and signal CP
+	 */
+	if (index >= 0 && index < IPC_MEM_MSG_ENTRIES) {
+		ipc_protocol->rsp_ring[index] = response;
+		ipc_protocol_msg_hp_update(ipc_protocol->imem);
+	}
+
+	return index;
+}
+
+/* Callback for message send */
+static int ipc_protocol_tq_msg_send_cb(struct iosm_imem *ipc_imem, int arg,
+				       void *msg, size_t size)
+{
+	struct ipc_call_msg_send_args *send_args = msg;
+	struct iosm_protocol *ipc_protocol = ipc_imem->ipc_protocol;
+
+	return ipc_protocol_tq_msg_send(ipc_protocol, send_args->msg_type,
+					send_args->prep_args,
+					send_args->response);
+}
+
+/* Remove reference to a response. This is typically used when a requestor timed
+ * out and is no longer interested in the response.
+ */
+static int ipc_protocol_tq_msg_remove(struct iosm_imem *ipc_imem, int arg,
+				      void *msg, size_t size)
+{
+	struct iosm_protocol *ipc_protocol = ipc_imem->ipc_protocol;
+
+	ipc_protocol->rsp_ring[arg] = NULL;
+	return 0;
+}
+
+int ipc_protocol_msg_send(struct iosm_protocol *ipc_protocol,
+			  enum ipc_msg_prep_type prep,
+			  union ipc_msg_prep_args *prep_args)
+{
+	struct ipc_call_msg_send_args send_args;
+	unsigned int exec_timeout;
+	struct ipc_rsp response;
+	int index;
+
+	exec_timeout = (ipc_protocol_get_ap_exec_stage(ipc_protocol) ==
+					IPC_MEM_EXEC_STAGE_RUN ?
+				IPC_MSG_COMPLETE_RUN_DEFAULT_TIMEOUT :
+				IPC_MSG_COMPLETE_BOOT_DEFAULT_TIMEOUT);
+
+	/* Trap if called from non-preemptible context */
+	might_sleep();
+
+	response.status = IPC_MEM_MSG_CS_INVALID;
+	init_completion(&response.completion);
+
+	send_args.msg_type = prep;
+	send_args.prep_args = prep_args;
+	send_args.response = &response;
+
+	/* Allocate and prepare message to be sent in tasklet context.
+	 * A positive index returned form tasklet_call references the message
+	 * in case it needs to be cancelled when there is a timeout.
+	 */
+	index = ipc_task_queue_send_task(ipc_protocol->imem,
+					 ipc_protocol_tq_msg_send_cb, 0,
+					 &send_args, 0, true);
+
+	if (index < 0) {
+		dev_err(ipc_protocol->dev, "msg %d failed", prep);
+		return index;
+	}
+
+	/* Wait for the device to respond to the message */
+	switch (wait_for_completion_timeout(&response.completion,
+					    msecs_to_jiffies(exec_timeout))) {
+	case 0:
+		/* Timeout, there was no response from the device.
+		 * Remove the reference to the local response completion
+		 * object as we are no longer interested in the response.
+		 */
+		ipc_task_queue_send_task(ipc_protocol->imem,
+					 ipc_protocol_tq_msg_remove, index,
+					 NULL, 0, true);
+		dev_err(ipc_protocol->dev, "msg timeout");
+		ipc_uevent_send(ipc_protocol->pcie->dev, UEVENT_MDM_TIMEOUT);
+		break;
+	default:
+		/* We got a response in time; check completion status: */
+		if (response.status != IPC_MEM_MSG_CS_SUCCESS) {
+			dev_err(ipc_protocol->dev,
+				"msg completion status error %d",
+				response.status);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int ipc_protocol_msg_send_host_sleep(struct iosm_protocol *ipc_protocol,
+					    u32 state)
+{
+	union ipc_msg_prep_args prep_args = {
+		.sleep.target = 0,
+		.sleep.state = state,
+	};
+
+	return ipc_protocol_msg_send(ipc_protocol, IPC_MSG_PREP_SLEEP,
+				     &prep_args);
+}
+
+void ipc_protocol_doorbell_trigger(struct iosm_protocol *ipc_protocol,
+				   u32 identifier)
+{
+	ipc_pm_signal_hpda_doorbell(&ipc_protocol->pm, identifier, true);
+}
+
+bool ipc_protocol_pm_dev_sleep_handle(struct iosm_protocol *ipc_protocol)
+{
+	u32 ipc_status = ipc_protocol_get_ipc_status(ipc_protocol);
+	u32 requested;
+
+	if (ipc_status != IPC_MEM_DEVICE_IPC_RUNNING) {
+		dev_err(ipc_protocol->dev,
+			"irq ignored, CP IPC state is %d, should be RUNNING",
+			ipc_status);
+
+		/* Stop further processing. */
+		return false;
+	}
+
+	/* Get a copy of the requested PM state by the device and the local
+	 * device PM state.
+	 */
+	requested = ipc_protocol_pm_dev_get_sleep_notification(ipc_protocol);
+
+	return ipc_pm_dev_slp_notification(&ipc_protocol->pm, requested);
+}
+
+static int ipc_protocol_tq_wakeup_dev_slp(struct iosm_imem *ipc_imem, int arg,
+					  void *msg, size_t size)
+{
+	struct iosm_pm *ipc_pm = &ipc_imem->ipc_protocol->pm;
+
+	/* Wakeup from device sleep if it is not ACTIVE */
+	ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_HS, true);
+
+	ipc_pm_trigger(ipc_pm, IPC_PM_UNIT_HS, false);
+
+	return 0;
+}
+
+void ipc_protocol_s2idle_sleep(struct iosm_protocol *ipc_protocol, bool sleep)
+{
+	ipc_pm_set_s2idle_sleep(&ipc_protocol->pm, sleep);
+}
+
+bool ipc_protocol_suspend(struct iosm_protocol *ipc_protocol)
+{
+	if (!ipc_pm_prepare_host_sleep(&ipc_protocol->pm))
+		goto err;
+
+	ipc_task_queue_send_task(ipc_protocol->imem,
+				 ipc_protocol_tq_wakeup_dev_slp, 0, NULL, 0,
+				 true);
+
+	if (!ipc_pm_wait_for_device_active(&ipc_protocol->pm)) {
+		ipc_uevent_send(ipc_protocol->pcie->dev, UEVENT_MDM_TIMEOUT);
+		goto err;
+	}
+
+	/* Send the sleep message for sync sys calls. */
+	dev_dbg(ipc_protocol->dev, "send TARGET_HOST, ENTER_SLEEP");
+	if (ipc_protocol_msg_send_host_sleep(ipc_protocol,
+					     IPC_HOST_SLEEP_ENTER_SLEEP)) {
+		/* Sending ENTER_SLEEP message failed, we are still active */
+		ipc_protocol->pm.host_pm_state = IPC_MEM_HOST_PM_ACTIVE;
+		goto err;
+	}
+
+	ipc_protocol->pm.host_pm_state = IPC_MEM_HOST_PM_SLEEP;
+	return true;
+err:
+	return false;
+}
+
+bool ipc_protocol_resume(struct iosm_protocol *ipc_protocol)
+{
+	if (!ipc_pm_prepare_host_active(&ipc_protocol->pm))
+		return false;
+
+	dev_dbg(ipc_protocol->dev, "send TARGET_HOST, EXIT_SLEEP");
+	if (ipc_protocol_msg_send_host_sleep(ipc_protocol,
+					     IPC_HOST_SLEEP_EXIT_SLEEP)) {
+		ipc_protocol->pm.host_pm_state = IPC_MEM_HOST_PM_SLEEP;
+		return false;
+	}
+
+	ipc_protocol->pm.host_pm_state = IPC_MEM_HOST_PM_ACTIVE;
+
+	return true;
+}
+
+struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem)
+{
+	struct iosm_protocol *ipc_protocol =
+		kzalloc(sizeof(*ipc_protocol), GFP_KERNEL);
+	struct ipc_protocol_context_info *p_ci;
+	u64 addr;
+
+	if (!ipc_protocol)
+		return NULL;
+
+	ipc_protocol->dev = ipc_imem->dev;
+	ipc_protocol->pcie = ipc_imem->pcie;
+	ipc_protocol->imem = ipc_imem;
+	ipc_protocol->p_ap_shm = NULL;
+	ipc_protocol->phy_ap_shm = 0;
+
+	ipc_protocol->old_msg_tail = 0;
+
+	ipc_protocol->p_ap_shm =
+		pci_alloc_consistent(ipc_protocol->pcie->pci,
+				     sizeof(*ipc_protocol->p_ap_shm),
+				     &ipc_protocol->phy_ap_shm);
+
+	if (!ipc_protocol->p_ap_shm) {
+		dev_err(ipc_protocol->dev, "pci shm alloc error");
+		kfree(ipc_protocol);
+		return NULL;
+	}
+
+	/* Prepare the context info for CP. */
+	addr = ipc_protocol->phy_ap_shm;
+	p_ci = &ipc_protocol->p_ap_shm->ci;
+	p_ci->device_info_addr =
+		addr + offsetof(struct ipc_protocol_ap_shm, device_info);
+	p_ci->head_array =
+		addr + offsetof(struct ipc_protocol_ap_shm, head_array);
+	p_ci->tail_array =
+		addr + offsetof(struct ipc_protocol_ap_shm, tail_array);
+	p_ci->msg_head = addr + offsetof(struct ipc_protocol_ap_shm, msg_head);
+	p_ci->msg_tail = addr + offsetof(struct ipc_protocol_ap_shm, msg_tail);
+	p_ci->msg_ring_addr =
+		addr + offsetof(struct ipc_protocol_ap_shm, msg_ring);
+	p_ci->msg_ring_entries = cpu_to_le16(IPC_MEM_MSG_ENTRIES);
+	p_ci->msg_irq_vector = IPC_MSG_IRQ_VECTOR;
+	p_ci->device_info_irq_vector = IPC_DEVICE_IRQ_VECTOR;
+
+	ipc_mmio_set_contex_info_addr(ipc_imem->mmio, addr);
+
+	ipc_pm_init(ipc_protocol);
+
+	return ipc_protocol;
+}
+
+void ipc_protocol_deinit(struct iosm_protocol *proto)
+{
+	pci_free_consistent(proto->pcie->pci, sizeof(*proto->p_ap_shm),
+			    proto->p_ap_shm, proto->phy_ap_shm);
+
+	ipc_pm_deinit(proto);
+	kfree(proto);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
new file mode 100644
index 000000000000..9b3a6d86ece7
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PROTOCOL_H
+#define IOSM_IPC_PROTOCOL_H
+
+#include "iosm_ipc_imem.h"
+#include "iosm_ipc_pm.h"
+#include "iosm_ipc_protocol_ops.h"
+
+/* Trigger the doorbell interrupt on CP. */
+#define IPC_DOORBELL_IRQ_HPDA 0
+#define IPC_DOORBELL_IRQ_IPC 1
+#define IPC_DOORBELL_IRQ_SLEEP 2
+
+/* IRQ vector number. */
+#define IPC_DEVICE_IRQ_VECTOR 0
+#define IPC_MSG_IRQ_VECTOR 0
+#define IPC_UL_PIPE_IRQ_VECTOR 0
+#define IPC_DL_PIPE_IRQ_VECTOR 0
+
+#define IPC_MEM_MSG_ENTRIES 128
+
+/* Default time out for sending IPC messages like open pipe, close pipe etc.
+ * during run mode.
+ *
+ * If the message interface lock to CP times out, the link to CP is broken.
+ * mode : run mode (IPC_MEM_EXEC_STAGE_RUN)
+ * unit : milliseconds
+ */
+#define IPC_MSG_COMPLETE_RUN_DEFAULT_TIMEOUT 500 /* 0.5 seconds */
+
+/* Default time out for sending IPC messages like open pipe, close pipe etc.
+ * during boot mode.
+ *
+ * If the message interface lock to CP times out, the link to CP is broken.
+ * mode : boot mode
+ * (IPC_MEM_EXEC_STAGE_BOOT | IPC_MEM_EXEC_STAGE_PSI | IPC_MEM_EXEC_STAGE_EBL)
+ * unit : milliseconds
+ */
+#define IPC_MSG_COMPLETE_BOOT_DEFAULT_TIMEOUT 500 /* 0.5 seconds */
+
+/**
+ * struct ipc_protocol_context_info - Structure of the context info
+ * @device_info_addr:		64 bit address to device info
+ * @head_array:			64 bit address to head pointer arr for the pipes
+ * @tail_array:			64 bit address to tail pointer arr for the pipes
+ * @msg_head:			64 bit address to message head pointer
+ * @msg_tail:			64 bit address to message tail pointer
+ * @msg_ring_addr:		64 bit pointer to the message ring buffer
+ * @msg_ring_entries:		This field provides the number of entries which
+ *				the MR can hold
+ * @msg_irq_vector:		This field provides the IRQ which shall be
+ *				generated by the EP device when generating
+ *				completion for Messages.
+ * @device_info_irq_vector:	This field provides the IRQ which shall be
+ *				generated by the EP dev after updating Dev. Info
+ */
+struct ipc_protocol_context_info {
+	phys_addr_t device_info_addr;
+	phys_addr_t head_array;
+	phys_addr_t tail_array;
+	phys_addr_t msg_head;
+	phys_addr_t msg_tail;
+	phys_addr_t msg_ring_addr;
+	__le16 msg_ring_entries;
+	u8 msg_irq_vector;
+	u8 device_info_irq_vector;
+};
+
+/**
+ * struct ipc_protocol_device_info - Structure for the device information
+ * @execution_stage:		CP execution stage
+ * @ipc_status:			IPC states
+ * @device_sleep_notification:	Requested device pm states
+ */
+struct ipc_protocol_device_info {
+	__le32 execution_stage;
+	__le32 ipc_status;
+	__le32 device_sleep_notification;
+};
+
+/**
+ * struct ipc_protocol_ap_shm - Protocol Shared Memory Structure
+ * @ci:			Context information struct
+ * @device_info:	Device information struct
+ * @msg_head:		Point to msg head
+ * @head_array:		Array of head pointer
+ * @msg_tail:		Point to msg tail
+ * @tail_array:		Array of tail pointer
+ * @msg_ring:		Circular buffers for the read/tail and write/head
+ *			indeces.
+ */
+struct ipc_protocol_ap_shm {
+	struct ipc_protocol_context_info ci;
+	struct ipc_protocol_device_info device_info;
+	__le32 msg_head;
+	__le32 head_array[IPC_MEM_MAX_PIPES];
+	__le32 msg_tail;
+	__le32 tail_array[IPC_MEM_MAX_PIPES];
+	union ipc_mem_msg_entry msg_ring[IPC_MEM_MSG_ENTRIES];
+};
+
+/**
+ * struct iosm_protocol - Structure for IPC protocol.
+ * @p_ap_shm:		Pointer to Protocol Shared Memory Structure
+ * @pm:			Instance to struct iosm_pm
+ * @pcie:		Pointer to struct iosm_pcie
+ * @imem:		Pointer to struct iosm_imem
+ * @rsp_ring:		Array of OS completion objects to be triggered once CP
+ *			acknowledges a request in the message ring
+ * @dev:		Pointer to device structure
+ * @phy_ap_shm:		Physical/Mapped representation of the shared memory info
+ * @old_msg_tail:	Old msg tail ptr, until AP has handled ACK's from CP
+ */
+struct iosm_protocol {
+	struct ipc_protocol_ap_shm *p_ap_shm;
+	struct iosm_pm pm;
+	struct iosm_pcie *pcie;
+	struct iosm_imem *imem;
+	struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES];
+	struct device *dev;
+	phys_addr_t phy_ap_shm;
+	u32 old_msg_tail;
+};
+
+/**
+ * struct ipc_call_msg_send_args - Structure for message argument for
+ *				   tasklet function.
+ * @prep_args:		Arguments for message preparation function
+ * @response:		Can be NULL if result can be ignored
+ * @msg_type:		Message Type
+ */
+struct ipc_call_msg_send_args {
+	union ipc_msg_prep_args *prep_args;
+	struct ipc_rsp *response;
+	enum ipc_msg_prep_type msg_type;
+};
+
+/**
+ * ipc_protocol_tq_msg_send - prepare the msg and send to CP
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ * @msg_type:		Message type
+ * @prep_args:		Message arguments
+ * @response:		Pointer to a response object which has a
+ *			completion object and return code.
+ *
+ * Returns: 0 on success and failure value on error
+ */
+int ipc_protocol_tq_msg_send(struct iosm_protocol *ipc_protocol,
+			     enum ipc_msg_prep_type msg_type,
+			     union ipc_msg_prep_args *prep_args,
+			     struct ipc_rsp *response);
+
+/**
+ * ipc_protocol_msg_send - Send ipc control message to CP and wait for response
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ * @prep:		Message type
+ * @prep_args:		Message arguments
+ *
+ * Returns: 0 on success and failure value on error
+ */
+int ipc_protocol_msg_send(struct iosm_protocol *ipc_protocol,
+			  enum ipc_msg_prep_type prep,
+			  union ipc_msg_prep_args *prep_args);
+
+/**
+ * ipc_protocol_suspend - Signal to CP that host wants to go to sleep (suspend).
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ *
+ * Returns: true if host can suspend, false if suspend must be aborted.
+ */
+bool ipc_protocol_suspend(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_s2idle_sleep - Call PM function to set PM variables in s2idle
+ *			       sleep/active case
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ * @sleep:		True for sleep/False for active
+ */
+void ipc_protocol_s2idle_sleep(struct iosm_protocol *ipc_protocol, bool sleep);
+
+/**
+ * ipc_protocol_resume - Signal to CP that host wants to resume operation.
+ * @ipc_protocol:	Pointer to ipc_protocol instance
+ *
+ * Returns: true if host can resume, false if there is a problem.
+ */
+bool ipc_protocol_resume(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_pm_dev_sleep_handle - Handles the Device Sleep state change
+ *				      notification.
+ * @ipc_protocol:	Pointer to ipc_protocol instance.
+ *
+ * Returns: true if sleep notification handled, false otherwise.
+ */
+bool ipc_protocol_pm_dev_sleep_handle(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_doorbell_trigger - Wrapper for PM function which wake up the
+ *				   device if it is in low power mode
+ *				   and trigger a head pointer update interrupt.
+ * @ipc_protocol:	Pointer to ipc_protocol instance.
+ * @identifier:		Specifies what component triggered hpda
+ *			update irq
+ */
+void ipc_protocol_doorbell_trigger(struct iosm_protocol *ipc_protocol,
+				   u32 identifier);
+
+/**
+ * ipc_protocol_sleep_notification_string - Returns last Sleep Notification as
+ *					    string.
+ * @ipc_protocol:	Instance pointer of Protocol module.
+ *
+ * Returns: Pointer to string.
+ */
+const char *
+ipc_protocol_sleep_notification_string(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_init - Allocates IPC protocol instance
+ * @ipc_imem:		Pointer to iosm_imem structure
+ *
+ * Returns: Address of IPC  protocol instance on success & NULL on failure.
+ */
+struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_protocol_deinit - Deallocates IPC protocol instance
+ * @ipc_protocol:	pointer to the IPC protocol instance
+ */
+void ipc_protocol_deinit(struct iosm_protocol *ipc_protocol);
+
+#endif

From 64516f633bfd2f576f3a18fe72184595367d11bf Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:20 +0530
Subject: [PATCH 1612/3292] net: iosm: protocol operations

1) Update UL/DL transfer descriptors in message ring.
2) Define message set for pipe/sleep protocol.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c | 552 ++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h | 444 ++++++++++++++
 2 files changed, 996 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
new file mode 100644
index 000000000000..91109e27efd3
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include "iosm_ipc_protocol.h"
+#include "iosm_ipc_protocol_ops.h"
+
+/* Get the next free message element.*/
+static union ipc_mem_msg_entry *
+ipc_protocol_free_msg_get(struct iosm_protocol *ipc_protocol, int *index)
+{
+	u32 head = le32_to_cpu(ipc_protocol->p_ap_shm->msg_head);
+	u32 new_head = (head + 1) % IPC_MEM_MSG_ENTRIES;
+	union ipc_mem_msg_entry *msg;
+
+	if (new_head == le32_to_cpu(ipc_protocol->p_ap_shm->msg_tail)) {
+		dev_err(ipc_protocol->dev, "message ring is full");
+		return NULL;
+	}
+
+	/* Get the pointer to the next free message element,
+	 * reset the fields and mark is as invalid.
+	 */
+	msg = &ipc_protocol->p_ap_shm->msg_ring[head];
+	memset(msg, 0, sizeof(*msg));
+
+	/* return index in message ring */
+	*index = head;
+
+	return msg;
+}
+
+/* Updates the message ring Head pointer */
+void ipc_protocol_msg_hp_update(struct iosm_imem *ipc_imem)
+{
+	struct iosm_protocol *ipc_protocol = ipc_imem->ipc_protocol;
+	u32 head = le32_to_cpu(ipc_protocol->p_ap_shm->msg_head);
+	u32 new_head = (head + 1) % IPC_MEM_MSG_ENTRIES;
+
+	/* Update head pointer and fire doorbell. */
+	ipc_protocol->p_ap_shm->msg_head = cpu_to_le32(new_head);
+	ipc_protocol->old_msg_tail =
+		le32_to_cpu(ipc_protocol->p_ap_shm->msg_tail);
+
+	ipc_pm_signal_hpda_doorbell(&ipc_protocol->pm, IPC_HP_MR, false);
+}
+
+/* Allocate and prepare a OPEN_PIPE message.
+ * This also allocates the memory for the new TDR structure and
+ * updates the pipe structure referenced in the preparation arguments.
+ */
+static int ipc_protocol_msg_prepipe_open(struct iosm_protocol *ipc_protocol,
+					 union ipc_msg_prep_args *args)
+{
+	int index;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+	struct ipc_pipe *pipe = args->pipe_open.pipe;
+	struct ipc_protocol_td *tdr;
+	struct sk_buff **skbr;
+
+	if (!msg) {
+		dev_err(ipc_protocol->dev, "failed to get free message");
+		return -EIO;
+	}
+
+	/* Allocate the skbuf elements for the skbuf which are on the way.
+	 * SKB ring is internal memory allocation for driver. No need to
+	 * re-calculate the start and end addresses.
+	 */
+	skbr = kcalloc(pipe->nr_of_entries, sizeof(*skbr), GFP_ATOMIC);
+	if (!skbr)
+		return -ENOMEM;
+
+	/* Allocate the transfer descriptors for the pipe. */
+	tdr = pci_alloc_consistent(ipc_protocol->pcie->pci,
+				   pipe->nr_of_entries * sizeof(*tdr),
+				   &pipe->phy_tdr_start);
+	if (!tdr) {
+		kfree(skbr);
+		dev_err(ipc_protocol->dev, "tdr alloc error");
+		return -ENOMEM;
+	}
+
+	pipe->max_nr_of_queued_entries = pipe->nr_of_entries - 1;
+	pipe->nr_of_queued_entries = 0;
+	pipe->tdr_start = tdr;
+	pipe->skbr_start = skbr;
+	pipe->old_tail = 0;
+
+	ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] = 0;
+
+	msg->open_pipe.type_of_message = IPC_MEM_MSG_OPEN_PIPE;
+	msg->open_pipe.pipe_nr = pipe->pipe_nr;
+	msg->open_pipe.tdr_addr = cpu_to_le64(pipe->phy_tdr_start);
+	msg->open_pipe.tdr_entries = cpu_to_le16(pipe->nr_of_entries);
+	msg->open_pipe.accumulation_backoff =
+				cpu_to_le32(pipe->accumulation_backoff);
+	msg->open_pipe.irq_vector = cpu_to_le32(pipe->irq);
+
+	return index;
+}
+
+static int ipc_protocol_msg_prepipe_close(struct iosm_protocol *ipc_protocol,
+					  union ipc_msg_prep_args *args)
+{
+	int index = -1;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+	struct ipc_pipe *pipe = args->pipe_close.pipe;
+
+	if (!msg)
+		return -EIO;
+
+	msg->close_pipe.type_of_message = IPC_MEM_MSG_CLOSE_PIPE;
+	msg->close_pipe.pipe_nr = pipe->pipe_nr;
+
+	dev_dbg(ipc_protocol->dev, "IPC_MEM_MSG_CLOSE_PIPE(pipe_nr=%d)",
+		msg->close_pipe.pipe_nr);
+
+	return index;
+}
+
+static int ipc_protocol_msg_prep_sleep(struct iosm_protocol *ipc_protocol,
+				       union ipc_msg_prep_args *args)
+{
+	int index = -1;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+
+	if (!msg) {
+		dev_err(ipc_protocol->dev, "failed to get free message");
+		return -EIO;
+	}
+
+	/* Prepare and send the host sleep message to CP to enter or exit D3. */
+	msg->host_sleep.type_of_message = IPC_MEM_MSG_SLEEP;
+	msg->host_sleep.target = args->sleep.target; /* 0=host, 1=device */
+
+	/* state; 0=enter, 1=exit 2=enter w/o protocol */
+	msg->host_sleep.state = args->sleep.state;
+
+	dev_dbg(ipc_protocol->dev, "IPC_MEM_MSG_SLEEP(target=%d; state=%d)",
+		msg->host_sleep.target, msg->host_sleep.state);
+
+	return index;
+}
+
+static int ipc_protocol_msg_prep_feature_set(struct iosm_protocol *ipc_protocol,
+					     union ipc_msg_prep_args *args)
+{
+	int index = -1;
+	union ipc_mem_msg_entry *msg =
+		ipc_protocol_free_msg_get(ipc_protocol, &index);
+
+	if (!msg) {
+		dev_err(ipc_protocol->dev, "failed to get free message");
+		return -EIO;
+	}
+
+	msg->feature_set.type_of_message = IPC_MEM_MSG_FEATURE_SET;
+	msg->feature_set.reset_enable = args->feature_set.reset_enable <<
+					RESET_BIT;
+
+	dev_dbg(ipc_protocol->dev, "IPC_MEM_MSG_FEATURE_SET(reset_enable=%d)",
+		msg->feature_set.reset_enable >> RESET_BIT);
+
+	return index;
+}
+
+/* Processes the message consumed by CP. */
+bool ipc_protocol_msg_process(struct iosm_imem *ipc_imem, int irq)
+{
+	struct iosm_protocol *ipc_protocol = ipc_imem->ipc_protocol;
+	struct ipc_rsp **rsp_ring = ipc_protocol->rsp_ring;
+	bool msg_processed = false;
+	u32 i;
+
+	if (le32_to_cpu(ipc_protocol->p_ap_shm->msg_tail) >=
+			IPC_MEM_MSG_ENTRIES) {
+		dev_err(ipc_protocol->dev, "msg_tail out of range: %d",
+			le32_to_cpu(ipc_protocol->p_ap_shm->msg_tail));
+		return msg_processed;
+	}
+
+	if (irq != IMEM_IRQ_DONT_CARE &&
+	    irq != ipc_protocol->p_ap_shm->ci.msg_irq_vector)
+		return msg_processed;
+
+	for (i = ipc_protocol->old_msg_tail;
+	     i != le32_to_cpu(ipc_protocol->p_ap_shm->msg_tail);
+	     i = (i + 1) % IPC_MEM_MSG_ENTRIES) {
+		union ipc_mem_msg_entry *msg =
+			&ipc_protocol->p_ap_shm->msg_ring[i];
+
+		dev_dbg(ipc_protocol->dev, "msg[%d]: type=%u status=%d", i,
+			msg->common.type_of_message,
+			msg->common.completion_status);
+
+		/* Update response with status and wake up waiting requestor */
+		if (rsp_ring[i]) {
+			rsp_ring[i]->status =
+				le32_to_cpu(msg->common.completion_status);
+			complete(&rsp_ring[i]->completion);
+			rsp_ring[i] = NULL;
+		}
+		msg_processed = true;
+	}
+
+	ipc_protocol->old_msg_tail = i;
+	return msg_processed;
+}
+
+/* Sends data from UL list to CP for the provided pipe by updating the Head
+ * pointer of given pipe.
+ */
+bool ipc_protocol_ul_td_send(struct iosm_protocol *ipc_protocol,
+			     struct ipc_pipe *pipe,
+			     struct sk_buff_head *p_ul_list)
+{
+	struct ipc_protocol_td *td;
+	bool hpda_pending = false;
+	struct sk_buff *skb;
+	s32 free_elements;
+	u32 head;
+	u32 tail;
+
+	if (!ipc_protocol->p_ap_shm) {
+		dev_err(ipc_protocol->dev, "driver is not initialized");
+		return false;
+	}
+
+	/* Get head and tail of the td list and calculate
+	 * the number of free elements.
+	 */
+	head = le32_to_cpu(ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr]);
+	tail = pipe->old_tail;
+
+	while (!skb_queue_empty(p_ul_list)) {
+		if (head < tail)
+			free_elements = tail - head - 1;
+		else
+			free_elements =
+				pipe->nr_of_entries - head + ((s32)tail - 1);
+
+		if (free_elements <= 0) {
+			dev_dbg(ipc_protocol->dev,
+				"no free td elements for UL pipe %d",
+				pipe->pipe_nr);
+			break;
+		}
+
+		/* Get the td address. */
+		td = &pipe->tdr_start[head];
+
+		/* Take the first element of the uplink list and add it
+		 * to the td list.
+		 */
+		skb = skb_dequeue(p_ul_list);
+		if (WARN_ON(!skb))
+			break;
+
+		/* Save the reference to the uplink skbuf. */
+		pipe->skbr_start[head] = skb;
+
+		td->buffer.address = IPC_CB(skb)->mapping;
+		td->scs = cpu_to_le32(skb->len) & cpu_to_le32(SIZE_MASK);
+		td->next = 0;
+
+		pipe->nr_of_queued_entries++;
+
+		/* Calculate the new head and save it. */
+		head++;
+		if (head >= pipe->nr_of_entries)
+			head = 0;
+
+		ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] =
+			cpu_to_le32(head);
+	}
+
+	if (pipe->old_head != head) {
+		dev_dbg(ipc_protocol->dev, "New UL TDs Pipe:%d", pipe->pipe_nr);
+
+		pipe->old_head = head;
+		/* Trigger doorbell because of pending UL packets. */
+		hpda_pending = true;
+	}
+
+	return hpda_pending;
+}
+
+/* Checks for Tail pointer update from CP and returns the data as SKB. */
+struct sk_buff *ipc_protocol_ul_td_process(struct iosm_protocol *ipc_protocol,
+					   struct ipc_pipe *pipe)
+{
+	struct ipc_protocol_td *p_td = &pipe->tdr_start[pipe->old_tail];
+	struct sk_buff *skb = pipe->skbr_start[pipe->old_tail];
+
+	pipe->nr_of_queued_entries--;
+	pipe->old_tail++;
+	if (pipe->old_tail >= pipe->nr_of_entries)
+		pipe->old_tail = 0;
+
+	if (!p_td->buffer.address) {
+		dev_err(ipc_protocol->dev, "Td buffer address is NULL");
+		return NULL;
+	}
+
+	if (p_td->buffer.address != IPC_CB(skb)->mapping) {
+		dev_err(ipc_protocol->dev,
+			"pipe %d: invalid buf_addr or skb_data",
+			pipe->pipe_nr);
+		return NULL;
+	}
+
+	return skb;
+}
+
+/* Allocates an SKB for CP to send data and updates the Head Pointer
+ * of the given Pipe#.
+ */
+bool ipc_protocol_dl_td_prepare(struct iosm_protocol *ipc_protocol,
+				struct ipc_pipe *pipe)
+{
+	struct ipc_protocol_td *td;
+	dma_addr_t mapping = 0;
+	u32 head, new_head;
+	struct sk_buff *skb;
+	u32 tail;
+
+	/* Get head and tail of the td list and calculate
+	 * the number of free elements.
+	 */
+	head = le32_to_cpu(ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr]);
+	tail = le32_to_cpu(ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr]);
+
+	new_head = head + 1;
+	if (new_head >= pipe->nr_of_entries)
+		new_head = 0;
+
+	if (new_head == tail)
+		return false;
+
+	/* Get the td address. */
+	td = &pipe->tdr_start[head];
+
+	/* Allocate the skbuf for the descriptor. */
+	skb = ipc_pcie_alloc_skb(ipc_protocol->pcie, pipe->buf_size, GFP_ATOMIC,
+				 &mapping, DMA_FROM_DEVICE,
+				 IPC_MEM_DL_ETH_OFFSET);
+	if (!skb)
+		return false;
+
+	td->buffer.address = mapping;
+	td->scs = cpu_to_le32(pipe->buf_size) & cpu_to_le32(SIZE_MASK);
+	td->next = 0;
+
+	/* store the new head value. */
+	ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] =
+		cpu_to_le32(new_head);
+
+	/* Save the reference to the skbuf. */
+	pipe->skbr_start[head] = skb;
+
+	pipe->nr_of_queued_entries++;
+
+	return true;
+}
+
+/* Processes DL TD's */
+struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol,
+					   struct ipc_pipe *pipe)
+{
+	u32 tail =
+		le32_to_cpu(ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr]);
+	struct ipc_protocol_td *p_td;
+	struct sk_buff *skb;
+
+	if (!pipe->tdr_start)
+		return NULL;
+
+	/* Copy the reference to the downlink buffer. */
+	p_td = &pipe->tdr_start[pipe->old_tail];
+	skb = pipe->skbr_start[pipe->old_tail];
+
+	/* Reset the ring elements. */
+	pipe->skbr_start[pipe->old_tail] = NULL;
+
+	pipe->nr_of_queued_entries--;
+
+	pipe->old_tail++;
+	if (pipe->old_tail >= pipe->nr_of_entries)
+		pipe->old_tail = 0;
+
+	if (!skb) {
+		dev_err(ipc_protocol->dev, "skb is null");
+		goto ret;
+	} else if (!p_td->buffer.address) {
+		dev_err(ipc_protocol->dev, "td/buffer address is null");
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	}
+
+	if (!IPC_CB(skb)) {
+		dev_err(ipc_protocol->dev, "pipe# %d, tail: %d skb_cb is NULL",
+			pipe->pipe_nr, tail);
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	}
+
+	if (p_td->buffer.address != IPC_CB(skb)->mapping) {
+		dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p",
+			(void *)p_td->buffer.address, skb->data);
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	} else if ((le32_to_cpu(p_td->scs) & SIZE_MASK) > pipe->buf_size) {
+		dev_err(ipc_protocol->dev, "invalid buffer size %d > %d",
+			le32_to_cpu(p_td->scs) & SIZE_MASK,
+			pipe->buf_size);
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	} else if (le32_to_cpu(p_td->scs) >> COMPLETION_STATUS ==
+		  IPC_MEM_TD_CS_ABORT) {
+		/* Discard aborted buffers. */
+		dev_dbg(ipc_protocol->dev, "discard 'aborted' buffers");
+		ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+		skb = NULL;
+		goto ret;
+	}
+
+	/* Set the length field in skbuf. */
+	skb_put(skb, le32_to_cpu(p_td->scs) & SIZE_MASK);
+
+ret:
+	return skb;
+}
+
+void ipc_protocol_get_head_tail_index(struct iosm_protocol *ipc_protocol,
+				      struct ipc_pipe *pipe, u32 *head,
+				      u32 *tail)
+{
+	struct ipc_protocol_ap_shm *ipc_ap_shm = ipc_protocol->p_ap_shm;
+
+	if (head)
+		*head = le32_to_cpu(ipc_ap_shm->head_array[pipe->pipe_nr]);
+
+	if (tail)
+		*tail = le32_to_cpu(ipc_ap_shm->tail_array[pipe->pipe_nr]);
+}
+
+/* Frees the TDs given to CP.  */
+void ipc_protocol_pipe_cleanup(struct iosm_protocol *ipc_protocol,
+			       struct ipc_pipe *pipe)
+{
+	struct sk_buff *skb;
+	u32 head;
+	u32 tail;
+
+	/* Get the start and the end of the buffer list. */
+	head = le32_to_cpu(ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr]);
+	tail = pipe->old_tail;
+
+	/* Reset tail and head to 0. */
+	ipc_protocol->p_ap_shm->tail_array[pipe->pipe_nr] = 0;
+	ipc_protocol->p_ap_shm->head_array[pipe->pipe_nr] = 0;
+
+	/* Free pending uplink and downlink buffers. */
+	if (pipe->skbr_start) {
+		while (head != tail) {
+			/* Get the reference to the skbuf,
+			 * which is on the way and free it.
+			 */
+			skb = pipe->skbr_start[tail];
+			if (skb)
+				ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
+
+			tail++;
+			if (tail >= pipe->nr_of_entries)
+				tail = 0;
+		}
+
+		kfree(pipe->skbr_start);
+		pipe->skbr_start = NULL;
+	}
+
+	pipe->old_tail = 0;
+
+	/* Free and reset the td and skbuf circular buffers. kfree is save! */
+	if (pipe->tdr_start) {
+		pci_free_consistent(ipc_protocol->pcie->pci,
+				    sizeof(*pipe->tdr_start) *
+					    pipe->nr_of_entries,
+				    pipe->tdr_start, pipe->phy_tdr_start);
+
+		pipe->tdr_start = NULL;
+	}
+}
+
+enum ipc_mem_device_ipc_state ipc_protocol_get_ipc_status(struct iosm_protocol
+							  *ipc_protocol)
+{
+	return (enum ipc_mem_device_ipc_state)
+		le32_to_cpu(ipc_protocol->p_ap_shm->device_info.ipc_status);
+}
+
+enum ipc_mem_exec_stage
+ipc_protocol_get_ap_exec_stage(struct iosm_protocol *ipc_protocol)
+{
+	return le32_to_cpu(ipc_protocol->p_ap_shm->device_info.execution_stage);
+}
+
+int ipc_protocol_msg_prep(struct iosm_imem *ipc_imem,
+			  enum ipc_msg_prep_type msg_type,
+			  union ipc_msg_prep_args *args)
+{
+	struct iosm_protocol *ipc_protocol = ipc_imem->ipc_protocol;
+
+	switch (msg_type) {
+	case IPC_MSG_PREP_SLEEP:
+		return ipc_protocol_msg_prep_sleep(ipc_protocol, args);
+
+	case IPC_MSG_PREP_PIPE_OPEN:
+		return ipc_protocol_msg_prepipe_open(ipc_protocol, args);
+
+	case IPC_MSG_PREP_PIPE_CLOSE:
+		return ipc_protocol_msg_prepipe_close(ipc_protocol, args);
+
+	case IPC_MSG_PREP_FEATURE_SET:
+		return ipc_protocol_msg_prep_feature_set(ipc_protocol, args);
+
+		/* Unsupported messages in protocol */
+	case IPC_MSG_PREP_MAP:
+	case IPC_MSG_PREP_UNMAP:
+	default:
+		dev_err(ipc_protocol->dev,
+			"unsupported message type: %d in protocol", msg_type);
+		return -EINVAL;
+	}
+}
+
+u32
+ipc_protocol_pm_dev_get_sleep_notification(struct iosm_protocol *ipc_protocol)
+{
+	struct ipc_protocol_ap_shm *ipc_ap_shm = ipc_protocol->p_ap_shm;
+
+	return le32_to_cpu(ipc_ap_shm->device_info.device_sleep_notification);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h
new file mode 100644
index 000000000000..35aa1387306e
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.h
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_PROTOCOL_OPS_H
+#define IOSM_IPC_PROTOCOL_OPS_H
+
+#define SIZE_MASK 0x00FFFFFF
+#define COMPLETION_STATUS 24
+#define RESET_BIT 7
+
+/**
+ * enum ipc_mem_td_cs - Completion status of a TD
+ * @IPC_MEM_TD_CS_INVALID:	      Initial status - td not yet used.
+ * @IPC_MEM_TD_CS_PARTIAL_TRANSFER:   More data pending -> next TD used for this
+ * @IPC_MEM_TD_CS_END_TRANSFER:	      IO transfer is complete.
+ * @IPC_MEM_TD_CS_OVERFLOW:	      IO transfer to small for the buff to write
+ * @IPC_MEM_TD_CS_ABORT:	      TD marked as abort and shall be discarded
+ *				      by AP.
+ * @IPC_MEM_TD_CS_ERROR:	      General error.
+ */
+enum ipc_mem_td_cs {
+	IPC_MEM_TD_CS_INVALID,
+	IPC_MEM_TD_CS_PARTIAL_TRANSFER,
+	IPC_MEM_TD_CS_END_TRANSFER,
+	IPC_MEM_TD_CS_OVERFLOW,
+	IPC_MEM_TD_CS_ABORT,
+	IPC_MEM_TD_CS_ERROR,
+};
+
+/**
+ * enum ipc_mem_msg_cs - Completion status of IPC Message
+ * @IPC_MEM_MSG_CS_INVALID:	Initial status.
+ * @IPC_MEM_MSG_CS_SUCCESS:	IPC Message completion success.
+ * @IPC_MEM_MSG_CS_ERROR:	Message send error.
+ */
+enum ipc_mem_msg_cs {
+	IPC_MEM_MSG_CS_INVALID,
+	IPC_MEM_MSG_CS_SUCCESS,
+	IPC_MEM_MSG_CS_ERROR,
+};
+
+/**
+ * struct ipc_msg_prep_args_pipe - struct for pipe args for message preparation
+ * @pipe:	Pipe to open/close
+ */
+struct ipc_msg_prep_args_pipe {
+	struct ipc_pipe *pipe;
+};
+
+/**
+ * struct ipc_msg_prep_args_sleep - struct for sleep args for message
+ *				    preparation
+ * @target:	0=host, 1=device
+ * @state:	0=enter sleep, 1=exit sleep
+ */
+struct ipc_msg_prep_args_sleep {
+	unsigned int target;
+	unsigned int state;
+};
+
+/**
+ * struct ipc_msg_prep_feature_set - struct for feature set argument for
+ *				     message preparation
+ * @reset_enable:	0=out-of-band, 1=in-band-crash notification
+ */
+struct ipc_msg_prep_feature_set {
+	u8 reset_enable;
+};
+
+/**
+ * struct ipc_msg_prep_map - struct for map argument for message preparation
+ * @region_id:	Region to map
+ * @addr:	Pcie addr of region to map
+ * @size:	Size of the region to map
+ */
+struct ipc_msg_prep_map {
+	unsigned int region_id;
+	unsigned long addr;
+	size_t size;
+};
+
+/**
+ * struct ipc_msg_prep_unmap - struct for unmap argument for message preparation
+ * @region_id:	Region to unmap
+ */
+struct ipc_msg_prep_unmap {
+	unsigned int region_id;
+};
+
+/**
+ * struct ipc_msg_prep_args - Union to handle different message types
+ * @pipe_open:		Pipe open message preparation struct
+ * @pipe_close:		Pipe close message preparation struct
+ * @sleep:		Sleep message preparation struct
+ * @feature_set:	Feature set message preparation struct
+ * @map:		Memory map message preparation struct
+ * @unmap:		Memory unmap message preparation struct
+ */
+union ipc_msg_prep_args {
+	struct ipc_msg_prep_args_pipe pipe_open;
+	struct ipc_msg_prep_args_pipe pipe_close;
+	struct ipc_msg_prep_args_sleep sleep;
+	struct ipc_msg_prep_feature_set feature_set;
+	struct ipc_msg_prep_map map;
+	struct ipc_msg_prep_unmap unmap;
+};
+
+/**
+ * enum ipc_msg_prep_type - Enum for message prepare actions
+ * @IPC_MSG_PREP_SLEEP:		Sleep message preparation type
+ * @IPC_MSG_PREP_PIPE_OPEN:	Pipe open message preparation type
+ * @IPC_MSG_PREP_PIPE_CLOSE:	Pipe close message preparation type
+ * @IPC_MSG_PREP_FEATURE_SET:	Feature set message preparation type
+ * @IPC_MSG_PREP_MAP:		Memory map message preparation type
+ * @IPC_MSG_PREP_UNMAP:		Memory unmap message preparation type
+ */
+enum ipc_msg_prep_type {
+	IPC_MSG_PREP_SLEEP,
+	IPC_MSG_PREP_PIPE_OPEN,
+	IPC_MSG_PREP_PIPE_CLOSE,
+	IPC_MSG_PREP_FEATURE_SET,
+	IPC_MSG_PREP_MAP,
+	IPC_MSG_PREP_UNMAP,
+};
+
+/**
+ * struct ipc_rsp - Response to sent message
+ * @completion:	For waking up requestor
+ * @status:	Completion status
+ */
+struct ipc_rsp {
+	struct completion completion;
+	enum ipc_mem_msg_cs status;
+};
+
+/**
+ * enum ipc_mem_msg - Type-definition of the messages.
+ * @IPC_MEM_MSG_OPEN_PIPE:	AP ->CP: Open a pipe
+ * @IPC_MEM_MSG_CLOSE_PIPE:	AP ->CP: Close a pipe
+ * @IPC_MEM_MSG_ABORT_PIPE:	AP ->CP: wait for completion of the
+ *				running transfer and abort all pending
+ *				IO-transfers for the pipe
+ * @IPC_MEM_MSG_SLEEP:		AP ->CP: host enter or exit sleep
+ * @IPC_MEM_MSG_FEATURE_SET:	AP ->CP: Intel feature configuration
+ */
+enum ipc_mem_msg {
+	IPC_MEM_MSG_OPEN_PIPE = 0x01,
+	IPC_MEM_MSG_CLOSE_PIPE = 0x02,
+	IPC_MEM_MSG_ABORT_PIPE = 0x03,
+	IPC_MEM_MSG_SLEEP = 0x04,
+	IPC_MEM_MSG_FEATURE_SET = 0xF0,
+};
+
+/**
+ * struct ipc_mem_msg_open_pipe - Message structure for open pipe
+ * @tdr_addr:			Tdr address
+ * @tdr_entries:		Tdr entries
+ * @pipe_nr:			Pipe number
+ * @type_of_message:		Message type
+ * @irq_vector:			MSI vector number
+ * @accumulation_backoff:	Time in usec for data accumalation
+ * @completion_status:		Message Completion Status
+ */
+struct ipc_mem_msg_open_pipe {
+	__le64 tdr_addr;
+	__le16 tdr_entries;
+	u8 pipe_nr;
+	u8 type_of_message;
+	__le32 irq_vector;
+	__le32 accumulation_backoff;
+	__le32 completion_status;
+};
+
+/**
+ * struct ipc_mem_msg_close_pipe - Message structure for close pipe
+ * @reserved1:			Reserved
+ * @reserved2:			Reserved
+ * @pipe_nr:			Pipe number
+ * @type_of_message:		Message type
+ * @reserved3:			Reserved
+ * @reserved4:			Reserved
+ * @completion_status:		Message Completion Status
+ */
+struct ipc_mem_msg_close_pipe {
+	__le32 reserved1[2];
+	__le16 reserved2;
+	u8 pipe_nr;
+	u8 type_of_message;
+	__le32  reserved3;
+	__le32 reserved4;
+	__le32 completion_status;
+};
+
+/**
+ * struct ipc_mem_msg_abort_pipe - Message structure for abort pipe
+ * @reserved1:			Reserved
+ * @reserved2:			Reserved
+ * @pipe_nr:			Pipe number
+ * @type_of_message:		Message type
+ * @reserved3:			Reserved
+ * @reserved4:			Reserved
+ * @completion_status:		Message Completion Status
+ */
+struct ipc_mem_msg_abort_pipe {
+	__le32  reserved1[2];
+	__le16 reserved2;
+	u8 pipe_nr;
+	u8 type_of_message;
+	__le32 reserved3;
+	__le32 reserved4;
+	__le32 completion_status;
+};
+
+/**
+ * struct ipc_mem_msg_host_sleep - Message structure for sleep message.
+ * @reserved1:		Reserved
+ * @target:		0=host, 1=device, host or EP devie
+ *			is the message target
+ * @state:		0=enter sleep, 1=exit sleep,
+ *			2=enter sleep no protocol
+ * @reserved2:		Reserved
+ * @type_of_message:	Message type
+ * @reserved3:		Reserved
+ * @reserved4:		Reserved
+ * @completion_status:	Message Completion Status
+ */
+struct ipc_mem_msg_host_sleep {
+	__le32 reserved1[2];
+	u8 target;
+	u8 state;
+	u8 reserved2;
+	u8 type_of_message;
+	__le32 reserved3;
+	__le32 reserved4;
+	__le32 completion_status;
+};
+
+/**
+ * struct ipc_mem_msg_feature_set - Message structure for feature_set message
+ * @reserved1:			Reserved
+ * @reserved2:			Reserved
+ * @reset_enable:		0=out-of-band, 1=in-band-crash notification
+ * @type_of_message:		Message type
+ * @reserved3:			Reserved
+ * @reserved4:			Reserved
+ * @completion_status:		Message Completion Status
+ */
+struct ipc_mem_msg_feature_set {
+	__le32 reserved1[2];
+	__le16 reserved2;
+	u8 reset_enable;
+	u8 type_of_message;
+	__le32 reserved3;
+	__le32 reserved4;
+	__le32 completion_status;
+};
+
+/**
+ * struct ipc_mem_msg_common - Message structure for completion status update.
+ * @reserved1:			Reserved
+ * @reserved2:			Reserved
+ * @type_of_message:		Message type
+ * @reserved3:			Reserved
+ * @reserved4:			Reserved
+ * @completion_status:		Message Completion Status
+ */
+struct ipc_mem_msg_common {
+	__le32 reserved1[2];
+	u8 reserved2[3];
+	u8 type_of_message;
+	__le32 reserved3;
+	__le32 reserved4;
+	__le32 completion_status;
+};
+
+/**
+ * union ipc_mem_msg_entry - Union with all possible messages.
+ * @open_pipe:		Open pipe message struct
+ * @close_pipe:		Close pipe message struct
+ * @abort_pipe:		Abort pipe message struct
+ * @host_sleep:		Host sleep message struct
+ * @feature_set:	Featuer set message struct
+ * @common:		Used to access msg_type and to set the completion status
+ */
+union ipc_mem_msg_entry {
+	struct ipc_mem_msg_open_pipe open_pipe;
+	struct ipc_mem_msg_close_pipe close_pipe;
+	struct ipc_mem_msg_abort_pipe abort_pipe;
+	struct ipc_mem_msg_host_sleep host_sleep;
+	struct ipc_mem_msg_feature_set feature_set;
+	struct ipc_mem_msg_common common;
+};
+
+/* Transfer descriptor definition. */
+struct ipc_protocol_td {
+	union {
+		/*   0 :  63 - 64-bit address of a buffer in host memory. */
+		dma_addr_t address;
+		struct {
+			/*   0 :  31 - 32 bit address */
+			__le32 address;
+			/*  32 :  63 - corresponding descriptor */
+			__le32 desc;
+		} __packed shm;
+	} buffer;
+
+	/*	0 - 2nd byte - Size of the buffer.
+	 *	The host provides the size of the buffer queued.
+	 *	The EP device reads this value and shall update
+	 *	it for downlink transfers to indicate the
+	 *	amount of data written in buffer.
+	 *	3rd byte - This field provides the completion status
+	 *	of the TD. When queuing the TD, the host sets
+	 *	the status to 0. The EP device updates this
+	 *	field when completing the TD.
+	 */
+	__le32 scs;
+
+	/*	0th - nr of following descriptors
+	 *	1 - 3rd byte - reserved
+	 */
+	__le32 next;
+} __packed;
+
+/**
+ * ipc_protocol_msg_prep - Prepare message based upon message type
+ * @ipc_imem:	iosm_protocol instance
+ * @msg_type:	message prepare type
+ * @args:	message arguments
+ *
+ * Return: 0 on success and failure value on error
+ */
+int ipc_protocol_msg_prep(struct iosm_imem *ipc_imem,
+			  enum ipc_msg_prep_type msg_type,
+			  union ipc_msg_prep_args *args);
+
+/**
+ * ipc_protocol_msg_hp_update - Function for head pointer update
+ *				of message ring
+ * @ipc_imem:	iosm_protocol instance
+ */
+void ipc_protocol_msg_hp_update(struct iosm_imem *ipc_imem);
+
+/**
+ * ipc_protocol_msg_process - Function for processing responses
+ *			      to IPC messages
+ * @ipc_imem:	iosm_protocol instance
+ * @irq:	IRQ vector
+ *
+ * Return:	True on success, false if error
+ */
+bool ipc_protocol_msg_process(struct iosm_imem *ipc_imem, int irq);
+
+/**
+ * ipc_protocol_ul_td_send - Function for sending the data to CP
+ * @ipc_protocol:	iosm_protocol instance
+ * @pipe:		Pipe instance
+ * @p_ul_list:		uplink sk_buff list
+ *
+ * Return: true in success, false in case of error
+ */
+bool ipc_protocol_ul_td_send(struct iosm_protocol *ipc_protocol,
+			     struct ipc_pipe *pipe,
+			     struct sk_buff_head *p_ul_list);
+
+/**
+ * ipc_protocol_ul_td_process - Function for processing the sent data
+ * @ipc_protocol:	iosm_protocol instance
+ * @pipe:		Pipe instance
+ *
+ * Return: sk_buff instance
+ */
+struct sk_buff *ipc_protocol_ul_td_process(struct iosm_protocol *ipc_protocol,
+					   struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_dl_td_prepare - Function for providing DL TDs to CP
+ * @ipc_protocol:	iosm_protocol instance
+ * @pipe:		Pipe instance
+ *
+ * Return: true in success, false in case of error
+ */
+bool ipc_protocol_dl_td_prepare(struct iosm_protocol *ipc_protocol,
+				struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_dl_td_process - Function for processing the DL data
+ * @ipc_protocol:	iosm_protocol instance
+ * @pipe:		Pipe instance
+ *
+ * Return: sk_buff instance
+ */
+struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol,
+					   struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_get_head_tail_index - Function for getting Head and Tail
+ *				      pointer index of given pipe
+ * @ipc_protocol:	iosm_protocol instance
+ * @pipe:		Pipe Instance
+ * @head:		head pointer index of the given pipe
+ * @tail:		tail pointer index of the given pipe
+ */
+void ipc_protocol_get_head_tail_index(struct iosm_protocol *ipc_protocol,
+				      struct ipc_pipe *pipe, u32 *head,
+				      u32 *tail);
+/**
+ * ipc_protocol_get_ipc_status - Function for getting the IPC Status
+ * @ipc_protocol:	iosm_protocol instance
+ *
+ * Return: Returns IPC State
+ */
+enum ipc_mem_device_ipc_state ipc_protocol_get_ipc_status(struct iosm_protocol
+							  *ipc_protocol);
+
+/**
+ * ipc_protocol_pipe_cleanup - Function to cleanup pipe resources
+ * @ipc_protocol:	iosm_protocol instance
+ * @pipe:		Pipe instance
+ */
+void ipc_protocol_pipe_cleanup(struct iosm_protocol *ipc_protocol,
+			       struct ipc_pipe *pipe);
+
+/**
+ * ipc_protocol_get_ap_exec_stage - Function for getting AP Exec Stage
+ * @ipc_protocol:	pointer to struct iosm protocol
+ *
+ * Return: returns BOOT Stages
+ */
+enum ipc_mem_exec_stage
+ipc_protocol_get_ap_exec_stage(struct iosm_protocol *ipc_protocol);
+
+/**
+ * ipc_protocol_pm_dev_get_sleep_notification - Function for getting Dev Sleep
+ *						notification
+ * @ipc_protocol:	iosm_protocol instance
+ *
+ * Return: Returns dev PM State
+ */
+u32 ipc_protocol_pm_dev_get_sleep_notification(struct iosm_protocol
+					       *ipc_protocol);
+#endif

From 110e6e02eb190ee4a799502d6cfa0f28d4efc294 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:21 +0530
Subject: [PATCH 1613/3292] net: iosm: uevent support

Report modem status via uevent.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_uevent.c | 44 +++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_uevent.h | 41 +++++++++++++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_uevent.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_uevent.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_uevent.c b/drivers/net/wwan/iosm/iosm_ipc_uevent.c
new file mode 100644
index 000000000000..2229d752926c
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_uevent.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include "iosm_ipc_uevent.h"
+
+/* Update the uevent in work queue context */
+static void ipc_uevent_work(struct work_struct *data)
+{
+	struct ipc_uevent_info *info;
+	char *envp[2] = { NULL, NULL };
+
+	info = container_of(data, struct ipc_uevent_info, work);
+
+	envp[0] = info->uevent;
+
+	if (kobject_uevent_env(&info->dev->kobj, KOBJ_CHANGE, envp))
+		pr_err("uevent %s failed to sent", info->uevent);
+
+	kfree(info);
+}
+
+void ipc_uevent_send(struct device *dev, char *uevent)
+{
+	struct ipc_uevent_info *info = kzalloc(sizeof(*info), GFP_ATOMIC);
+
+	if (!info)
+		return;
+
+	/* Initialize the kernel work queue */
+	INIT_WORK(&info->work, ipc_uevent_work);
+
+	/* Store the device and event information */
+	info->dev = dev;
+	snprintf(info->uevent, MAX_UEVENT_LEN, "%s: %s", dev_name(dev), uevent);
+
+	/* Schedule uevent in process context using work queue */
+	schedule_work(&info->work);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_uevent.h b/drivers/net/wwan/iosm/iosm_ipc_uevent.h
new file mode 100644
index 000000000000..2e45c051b5f4
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_uevent.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_UEVENT_H
+#define IOSM_IPC_UEVENT_H
+
+/* Baseband event strings */
+#define UEVENT_MDM_NOT_READY "MDM_NOT_READY"
+#define UEVENT_ROM_READY "ROM_READY"
+#define UEVENT_MDM_READY "MDM_READY"
+#define UEVENT_CRASH "CRASH"
+#define UEVENT_CD_READY "CD_READY"
+#define UEVENT_CD_READY_LINK_DOWN "CD_READY_LINK_DOWN"
+#define UEVENT_MDM_TIMEOUT "MDM_TIMEOUT"
+
+/* Maximum length of user events */
+#define MAX_UEVENT_LEN 64
+
+/**
+ * struct ipc_uevent_info - Uevent information structure.
+ * @dev:	Pointer to device structure
+ * @uevent:	Uevent information
+ * @work:	Uevent work struct
+ */
+struct ipc_uevent_info {
+	struct device *dev;
+	char uevent[MAX_UEVENT_LEN];
+	struct work_struct work;
+};
+
+/**
+ * ipc_uevent_send - Send modem event to user space.
+ * @dev:	Generic device pointer
+ * @uevent:	Uevent information
+ *
+ */
+void ipc_uevent_send(struct device *dev, char *uevent);
+
+#endif

From 2a54f2c7793409736f2e5ea101e050b3f1997088 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:22 +0530
Subject: [PATCH 1614/3292] net: iosm: net driver

1) Create net device & implement net operations for data/IP communication.
2) Bind IP Link to mux IP session for simultaneous IP traffic.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_wwan.c | 351 ++++++++++++++++++++++++++
 drivers/net/wwan/iosm/iosm_ipc_wwan.h |  55 ++++
 2 files changed, 406 insertions(+)
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_wwan.c
 create mode 100644 drivers/net/wwan/iosm/iosm_ipc_wwan.h

diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
new file mode 100644
index 000000000000..1711b79fc616
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_link.h>
+#include <linux/rtnetlink.h>
+#include <linux/wwan.h>
+
+#include "iosm_ipc_chnl_cfg.h"
+#include "iosm_ipc_imem_ops.h"
+#include "iosm_ipc_wwan.h"
+
+#define IOSM_IP_TYPE_MASK 0xF0
+#define IOSM_IP_TYPE_IPV4 0x40
+#define IOSM_IP_TYPE_IPV6 0x60
+
+#define IOSM_IF_ID_PAYLOAD 2
+
+/**
+ * struct iosm_netdev_priv - netdev private data
+ * @ipc_wwan:	Pointer to iosm_wwan struct
+ * @netdev:	Pointer to network interface device structure
+ * @if_id:	Interface id for device.
+ * @ch_id:	IPC channel number for which interface device is created.
+ */
+struct iosm_netdev_priv {
+	struct iosm_wwan *ipc_wwan;
+	struct net_device *netdev;
+	int if_id;
+	int ch_id;
+};
+
+/**
+ * struct iosm_wwan - This structure contains information about WWAN root device
+ *		      and interface to the IPC layer.
+ * @ipc_imem:		Pointer to imem data-struct
+ * @sub_netlist:	List of active netdevs
+ * @dev:		Pointer device structure
+ * @if_mutex:		Mutex used for add and remove interface id
+ */
+struct iosm_wwan {
+	struct iosm_imem *ipc_imem;
+	struct iosm_netdev_priv __rcu *sub_netlist[IP_MUX_SESSION_END + 1];
+	struct device *dev;
+	struct mutex if_mutex; /* Mutex used for add and remove interface id */
+};
+
+/* Bring-up the wwan net link */
+static int ipc_wwan_link_open(struct net_device *netdev)
+{
+	struct iosm_netdev_priv *priv = netdev_priv(netdev);
+	struct iosm_wwan *ipc_wwan = priv->ipc_wwan;
+	int if_id = priv->if_id;
+	int ret;
+
+	if (if_id < IP_MUX_SESSION_START ||
+	    if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist))
+		return -EINVAL;
+
+	mutex_lock(&ipc_wwan->if_mutex);
+
+	/* get channel id */
+	priv->ch_id = ipc_imem_sys_wwan_open(ipc_wwan->ipc_imem, if_id);
+
+	if (priv->ch_id < 0) {
+		dev_err(ipc_wwan->dev,
+			"cannot connect wwan0 & id %d to the IPC mem layer",
+			if_id);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* enable tx path, DL data may follow */
+	netif_start_queue(netdev);
+
+	dev_dbg(ipc_wwan->dev, "Channel id %d allocated to if_id %d",
+		priv->ch_id, priv->if_id);
+
+	ret = 0;
+out:
+	mutex_unlock(&ipc_wwan->if_mutex);
+	return ret;
+}
+
+/* Bring-down the wwan net link */
+static int ipc_wwan_link_stop(struct net_device *netdev)
+{
+	struct iosm_netdev_priv *priv = netdev_priv(netdev);
+
+	netif_stop_queue(netdev);
+
+	mutex_lock(&priv->ipc_wwan->if_mutex);
+	ipc_imem_sys_wwan_close(priv->ipc_wwan->ipc_imem, priv->if_id,
+				priv->ch_id);
+	priv->ch_id = -1;
+	mutex_unlock(&priv->ipc_wwan->if_mutex);
+
+	return 0;
+}
+
+/* Transmit a packet */
+static int ipc_wwan_link_transmit(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct iosm_netdev_priv *priv = netdev_priv(netdev);
+	struct iosm_wwan *ipc_wwan = priv->ipc_wwan;
+	int if_id = priv->if_id;
+	int ret;
+
+	/* Interface IDs from 1 to 8 are for IP data
+	 * & from 257 to 261 are for non-IP data
+	 */
+	if (if_id < IP_MUX_SESSION_START ||
+	    if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist))
+		return -EINVAL;
+
+	/* Send the SKB to device for transmission */
+	ret = ipc_imem_sys_wwan_transmit(ipc_wwan->ipc_imem,
+					 if_id, priv->ch_id, skb);
+
+	/* Return code of zero is success */
+	if (ret == 0) {
+		ret = NETDEV_TX_OK;
+	} else if (ret == -EBUSY) {
+		ret = NETDEV_TX_BUSY;
+		dev_err(ipc_wwan->dev, "unable to push packets");
+	} else {
+		goto exit;
+	}
+
+	return ret;
+
+exit:
+	/* Log any skb drop */
+	if (if_id)
+		dev_dbg(ipc_wwan->dev, "skb dropped. IF_ID: %d, ret: %d", if_id,
+			ret);
+
+	dev_kfree_skb_any(skb);
+	return ret;
+}
+
+/* Ops structure for wwan net link */
+static const struct net_device_ops ipc_inm_ops = {
+	.ndo_open = ipc_wwan_link_open,
+	.ndo_stop = ipc_wwan_link_stop,
+	.ndo_start_xmit = ipc_wwan_link_transmit,
+};
+
+/* Setup function for creating new net link */
+static void ipc_wwan_setup(struct net_device *iosm_dev)
+{
+	iosm_dev->header_ops = NULL;
+	iosm_dev->hard_header_len = 0;
+	iosm_dev->priv_flags |= IFF_NO_QUEUE;
+
+	iosm_dev->type = ARPHRD_NONE;
+	iosm_dev->min_mtu = ETH_MIN_MTU;
+	iosm_dev->max_mtu = ETH_MAX_MTU;
+
+	iosm_dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+
+	iosm_dev->netdev_ops = &ipc_inm_ops;
+}
+
+/* Create new wwan net link */
+static int ipc_wwan_newlink(void *ctxt, struct net_device *dev,
+			    u32 if_id, struct netlink_ext_ack *extack)
+{
+	struct iosm_wwan *ipc_wwan = ctxt;
+	struct iosm_netdev_priv *priv;
+	int err;
+
+	if (if_id < IP_MUX_SESSION_START ||
+	    if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist))
+		return -EINVAL;
+
+	priv = netdev_priv(dev);
+	priv->if_id = if_id;
+	priv->netdev = dev;
+	priv->ipc_wwan = ipc_wwan;
+
+	mutex_lock(&ipc_wwan->if_mutex);
+	if (rcu_access_pointer(ipc_wwan->sub_netlist[if_id])) {
+		err = -EBUSY;
+		goto out_unlock;
+	}
+
+	err = register_netdevice(dev);
+	if (err)
+		goto out_unlock;
+
+	rcu_assign_pointer(ipc_wwan->sub_netlist[if_id], priv);
+	mutex_unlock(&ipc_wwan->if_mutex);
+
+	netif_device_attach(dev);
+
+	return 0;
+
+out_unlock:
+	mutex_unlock(&ipc_wwan->if_mutex);
+	return err;
+}
+
+static void ipc_wwan_dellink(void *ctxt, struct net_device *dev,
+			     struct list_head *head)
+{
+	struct iosm_wwan *ipc_wwan = ctxt;
+	struct iosm_netdev_priv *priv = netdev_priv(dev);
+	int if_id = priv->if_id;
+
+	if (WARN_ON(if_id < IP_MUX_SESSION_START ||
+		    if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist)))
+		return;
+
+	mutex_lock(&ipc_wwan->if_mutex);
+
+	if (WARN_ON(rcu_access_pointer(ipc_wwan->sub_netlist[if_id]) != priv))
+		goto unlock;
+
+	RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL);
+	/* unregistering includes synchronize_net() */
+	unregister_netdevice(dev);
+
+unlock:
+	mutex_unlock(&ipc_wwan->if_mutex);
+}
+
+static const struct wwan_ops iosm_wwan_ops = {
+	.priv_size = sizeof(struct iosm_netdev_priv),
+	.setup = ipc_wwan_setup,
+	.newlink = ipc_wwan_newlink,
+	.dellink = ipc_wwan_dellink,
+};
+
+int ipc_wwan_receive(struct iosm_wwan *ipc_wwan, struct sk_buff *skb_arg,
+		     bool dss, int if_id)
+{
+	struct sk_buff *skb = skb_arg;
+	struct net_device_stats *stats;
+	struct iosm_netdev_priv *priv;
+	int ret;
+
+	if ((skb->data[0] & IOSM_IP_TYPE_MASK) == IOSM_IP_TYPE_IPV4)
+		skb->protocol = htons(ETH_P_IP);
+	else if ((skb->data[0] & IOSM_IP_TYPE_MASK) ==
+		 IOSM_IP_TYPE_IPV6)
+		skb->protocol = htons(ETH_P_IPV6);
+
+	skb->pkt_type = PACKET_HOST;
+
+	if (if_id < (IP_MUX_SESSION_START - 1) ||
+	    if_id > (IP_MUX_SESSION_END - 1)) {
+		ret = -EINVAL;
+		goto free;
+	}
+
+	rcu_read_lock();
+	priv = rcu_dereference(ipc_wwan->sub_netlist[if_id]);
+	if (!priv) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+	skb->dev = priv->netdev;
+	stats = &priv->netdev->stats;
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+
+	ret = netif_rx(skb);
+	skb = NULL;
+unlock:
+	rcu_read_unlock();
+free:
+	dev_kfree_skb(skb);
+	return ret;
+}
+
+void ipc_wwan_tx_flowctrl(struct iosm_wwan *ipc_wwan, int if_id, bool on)
+{
+	struct net_device *netdev;
+	struct iosm_netdev_priv *priv;
+	bool is_tx_blk;
+
+	rcu_read_lock();
+	priv = rcu_dereference(ipc_wwan->sub_netlist[if_id]);
+	if (!priv) {
+		rcu_read_unlock();
+		return;
+	}
+
+	netdev = priv->netdev;
+
+	is_tx_blk = netif_queue_stopped(netdev);
+
+	if (on)
+		dev_dbg(ipc_wwan->dev, "session id[%d]: flowctrl enable",
+			if_id);
+
+	if (on && !is_tx_blk)
+		netif_stop_queue(netdev);
+	else if (!on && is_tx_blk)
+		netif_wake_queue(netdev);
+	rcu_read_unlock();
+}
+
+struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev)
+{
+	struct iosm_wwan *ipc_wwan;
+
+	ipc_wwan = kzalloc(sizeof(*ipc_wwan), GFP_KERNEL);
+	if (!ipc_wwan)
+		return NULL;
+
+	ipc_wwan->dev = dev;
+	ipc_wwan->ipc_imem = ipc_imem;
+
+	if (wwan_register_ops(ipc_wwan->dev, &iosm_wwan_ops, ipc_wwan)) {
+		kfree(ipc_wwan);
+		return NULL;
+	}
+
+	mutex_init(&ipc_wwan->if_mutex);
+
+	return ipc_wwan;
+}
+
+void ipc_wwan_deinit(struct iosm_wwan *ipc_wwan)
+{
+	int if_id;
+
+	wwan_unregister_ops(ipc_wwan->dev);
+
+	for (if_id = 0; if_id < ARRAY_SIZE(ipc_wwan->sub_netlist); if_id++) {
+		struct iosm_netdev_priv *priv;
+
+		priv = rcu_access_pointer(ipc_wwan->sub_netlist[if_id]);
+		if (!priv)
+			continue;
+
+		rtnl_lock();
+		ipc_wwan_dellink(ipc_wwan, priv->netdev, NULL);
+		rtnl_unlock();
+	}
+
+	mutex_destroy(&ipc_wwan->if_mutex);
+
+	kfree(ipc_wwan);
+}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.h b/drivers/net/wwan/iosm/iosm_ipc_wwan.h
new file mode 100644
index 000000000000..4925f22dff0a
--- /dev/null
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (C) 2020-21 Intel Corporation.
+ */
+
+#ifndef IOSM_IPC_WWAN_H
+#define IOSM_IPC_WWAN_H
+
+/**
+ * ipc_wwan_init - Allocate, Init and register WWAN device
+ * @ipc_imem:		Pointer to imem data-struct
+ * @dev:		Pointer to device structure
+ *
+ * Returns: Pointer to instance on success else NULL
+ */
+struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev);
+
+/**
+ * ipc_wwan_deinit - Unregister and free WWAN device, clear pointer
+ * @ipc_wwan:	Pointer to wwan instance data
+ */
+void ipc_wwan_deinit(struct iosm_wwan *ipc_wwan);
+
+/**
+ * ipc_wwan_receive - Receive a downlink packet from CP.
+ * @ipc_wwan:	Pointer to wwan instance
+ * @skb_arg:	Pointer to struct sk_buff
+ * @dss:	Set to true if interafce id is from 257 to 261,
+ *		else false
+ * @if_id:	Interface ID
+ *
+ * Return: 0 on success and failure value on error
+ */
+int ipc_wwan_receive(struct iosm_wwan *ipc_wwan, struct sk_buff *skb_arg,
+		     bool dss, int if_id);
+
+/**
+ * ipc_wwan_tx_flowctrl - Enable/Disable TX flow control
+ * @ipc_wwan:	Pointer to wwan instance
+ * @id:		Ipc mux channel session id
+ * @on:		if true then flow ctrl would be enabled else disable
+ *
+ */
+void ipc_wwan_tx_flowctrl(struct iosm_wwan *ipc_wwan, int id, bool on);
+
+/**
+ * ipc_wwan_is_tx_stopped - Checks if Tx stopped for a Interface id.
+ * @ipc_wwan:	Pointer to wwan instance
+ * @id:		Ipc mux channel session id
+ *
+ * Return: true if stopped, false otherwise
+ */
+bool ipc_wwan_is_tx_stopped(struct iosm_wwan *ipc_wwan, int id);
+
+#endif

From f7af616c632ee2ac3af0876fe33bf9e0232e665a Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Sun, 13 Jun 2021 18:20:23 +0530
Subject: [PATCH 1615/3292] net: iosm: infrastructure

1) Kconfig & Makefile changes for IOSM Driver compilation.
2) Add IOSM Driver documentation.
3) Modified MAINTAINER file for IOSM Driver addition.

Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/index.rst       |  1 +
 .../networking/device_drivers/wwan/index.rst  | 18 ++++
 .../networking/device_drivers/wwan/iosm.rst   | 96 +++++++++++++++++++
 MAINTAINERS                                   |  7 ++
 drivers/net/wwan/Kconfig                      | 12 +++
 drivers/net/wwan/Makefile                     |  1 +
 drivers/net/wwan/iosm/Makefile                | 26 +++++
 7 files changed, 161 insertions(+)
 create mode 100644 Documentation/networking/device_drivers/wwan/index.rst
 create mode 100644 Documentation/networking/device_drivers/wwan/iosm.rst
 create mode 100644 drivers/net/wwan/iosm/Makefile

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index d8279de7bf25..3a5a1d46e77e 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -18,6 +18,7 @@ Contents:
    qlogic/index
    wan/index
    wifi/index
+   wwan/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/wwan/index.rst b/Documentation/networking/device_drivers/wwan/index.rst
new file mode 100644
index 000000000000..1cb8c7371401
--- /dev/null
+++ b/Documentation/networking/device_drivers/wwan/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+WWAN Device Drivers
+===================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   iosm
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/networking/device_drivers/wwan/iosm.rst b/Documentation/networking/device_drivers/wwan/iosm.rst
new file mode 100644
index 000000000000..cd12f57d980a
--- /dev/null
+++ b/Documentation/networking/device_drivers/wwan/iosm.rst
@@ -0,0 +1,96 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+.. Copyright (C) 2020-21 Intel Corporation
+
+.. _iosm_driver_doc:
+
+===========================================
+IOSM Driver for Intel M.2 PCIe based Modems
+===========================================
+The IOSM (IPC over Shared Memory) driver is a WWAN PCIe host driver developed
+for linux or chrome platform for data exchange over PCIe interface between
+Host platform & Intel M.2 Modem. The driver exposes interface conforming to the
+MBIM protocol [1]. Any front end application ( eg: Modem Manager) could easily
+manage the MBIM interface to enable data communication towards WWAN.
+
+Basic usage
+===========
+MBIM functions are inactive when unmanaged. The IOSM driver only provides a
+userspace interface MBIM "WWAN PORT" representing MBIM control channel and does
+not play any role in managing the functionality. It is the job of a userspace
+application to detect port enumeration and enable MBIM functionality.
+
+Examples of few such userspace application are:
+- mbimcli (included with the libmbim [2] library), and
+- Modem Manager [3]
+
+Management Applications to carry out below required actions for establishing
+MBIM IP session:
+- open the MBIM control channel
+- configure network connection settings
+- connect to network
+- configure IP network interface
+
+Management application development
+==================================
+The driver and userspace interfaces are described below. The MBIM protocol is
+described in [1] Mobile Broadband Interface Model v1.0 Errata-1.
+
+MBIM control channel userspace ABI
+----------------------------------
+
+/dev/wwan0mbim0 character device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The driver exposes an MBIM interface to the MBIM function by implementing
+MBIM WWAN Port. The userspace end of the control channel pipe is a
+/dev/wwan0mbim0 character device. Application shall use this interface for
+MBIM protocol communication.
+
+Fragmentation
+~~~~~~~~~~~~~
+The userspace application is responsible for all control message fragmentation
+and defragmentation as per MBIM specification.
+
+/dev/wwan0mbim0 write()
+~~~~~~~~~~~~~~~~~~~~~
+The MBIM control messages from the management application must not exceed the
+negotiated control message size.
+
+/dev/wwan0mbim0 read()
+~~~~~~~~~~~~~~~~~~~~
+The management application must accept control messages of up the negotiated
+control message size.
+
+MBIM data channel userspace ABI
+-------------------------------
+
+wwan0-X network device
+~~~~~~~~~~~~~~~~~~~~
+The IOSM driver exposes IP link interface "wwan0-X" of type "wwan" for IP
+traffic. Iproute network utility is used for creating "wwan0-X" network
+interface and for associating it with MBIM IP session. The Driver supports
+upto 8 IP sessions for simultaneous IP communication.
+
+The userspace management application is responsible for creating new IP link
+prior to establishing MBIM IP session where the SessionId is greater than 0.
+
+For example, creating new IP link for a MBIM IP session with SessionId 1:
+
+  ip link add dev wwan0-1 parentdev-name wwan0 type wwan linkid 1
+
+The driver will automatically map the "wwan0-1" network device to MBIM IP
+session 1.
+
+References
+==========
+[1] "MBIM (Mobile Broadband Interface Model) Errata-1"
+      - https://www.usb.org/document-library/
+
+[2] libmbim - "a glib-based library for talking to WWAN modems and
+      devices which speak the Mobile Interface Broadband Model (MBIM)
+      protocol"
+      - http://www.freedesktop.org/wiki/Software/libmbim/
+
+[3] Modem Manager - "a DBus-activated daemon which controls mobile
+      broadband (2G/3G/4G) devices and connections"
+      - http://www.freedesktop.org/wiki/Software/ModemManager/
diff --git a/MAINTAINERS b/MAINTAINERS
index 349a87b42d3c..183cc61e2dc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9453,6 +9453,13 @@ L:	Dell.Client.Kernel@dell.com
 S:	Maintained
 F:	drivers/platform/x86/intel-wmi-thunderbolt.c
 
+INTEL WWAN IOSM DRIVER
+M:	M Chetan Kumar <m.chetan.kumar@intel.com>
+M:	Intel Corporation <linuxwwan@intel.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/wwan/iosm/
+
 INTEL(R) TRACE HUB
 M:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
 S:	Supported
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index ec0b194a373c..13613a4f53d8 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -44,4 +44,16 @@ config MHI_WWAN_CTRL
 	  To compile this driver as a module, choose M here: the module will be
 	  called mhi_wwan_ctrl.
 
+config IOSM
+	tristate "IOSM Driver for Intel M.2 WWAN Device"
+	select WWAN_CORE
+	depends on INTEL_IOMMU
+	help
+	  This driver enables Intel M.2 WWAN Device communication.
+
+	  If you have one of those Intel M.2 WWAN Modules and wish to use it in
+	  Linux say Y/M here.
+
+	  If unsure, say N.
+
 endif # WWAN
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
index f33f77ca1021..3e565d3f984f 100644
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -9,3 +9,4 @@ wwan-objs += wwan_core.o
 obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
 
 obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
+obj-$(CONFIG_IOSM) += iosm/
diff --git a/drivers/net/wwan/iosm/Makefile b/drivers/net/wwan/iosm/Makefile
new file mode 100644
index 000000000000..cdeeb9357af6
--- /dev/null
+++ b/drivers/net/wwan/iosm/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: (GPL-2.0-only)
+#
+# Copyright (C) 2020-21 Intel Corporation.
+#
+
+iosm-y = \
+	iosm_ipc_task_queue.o	\
+	iosm_ipc_imem.o			\
+	iosm_ipc_imem_ops.o		\
+	iosm_ipc_mmio.o			\
+	iosm_ipc_port.o			\
+	iosm_ipc_wwan.o			\
+	iosm_ipc_uevent.o		\
+	iosm_ipc_pm.o			\
+	iosm_ipc_pcie.o			\
+	iosm_ipc_irq.o			\
+	iosm_ipc_chnl_cfg.o		\
+	iosm_ipc_protocol.o		\
+	iosm_ipc_protocol_ops.o	\
+	iosm_ipc_mux.o			\
+	iosm_ipc_mux_codec.o
+
+obj-$(CONFIG_IOSM) := iosm.o
+
+# compilation flags
+ccflags-y += -DDEBUG

From 6fd06963fa74197103cdbb4b494763127b3f2f34 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 7 Jun 2021 15:21:49 +0200
Subject: [PATCH 1616/3292] xfrm: Fix error reporting in xfrm_state_construct.

When memory allocation for XFRMA_ENCAP or XFRMA_COADDR fails,
the error will not be reported because the -ENOMEM assignment
to the err variable is overwritten before. Fix this by moving
these two in front of the function so that memory allocation
failures will be reported.

Reported-by: Tobias Brunner <tobias@strongswan.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5a0ef4361e43..817e714dedea 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -580,6 +580,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	copy_from_user_state(x, p);
 
+	if (attrs[XFRMA_ENCAP]) {
+		x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
+				   sizeof(*x->encap), GFP_KERNEL);
+		if (x->encap == NULL)
+			goto error;
+	}
+
+	if (attrs[XFRMA_COADDR]) {
+		x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
+				    sizeof(*x->coaddr), GFP_KERNEL);
+		if (x->coaddr == NULL)
+			goto error;
+	}
+
 	if (attrs[XFRMA_SA_EXTRA_FLAGS])
 		x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
 
@@ -600,23 +614,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 				   attrs[XFRMA_ALG_COMP])))
 		goto error;
 
-	if (attrs[XFRMA_ENCAP]) {
-		x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
-				   sizeof(*x->encap), GFP_KERNEL);
-		if (x->encap == NULL)
-			goto error;
-	}
-
 	if (attrs[XFRMA_TFCPAD])
 		x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]);
 
-	if (attrs[XFRMA_COADDR]) {
-		x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
-				    sizeof(*x->coaddr), GFP_KERNEL);
-		if (x->coaddr == NULL)
-			goto error;
-	}
-
 	xfrm_mark_get(attrs, &x->mark);
 
 	xfrm_smark_init(attrs, &x->props.smark);

From ab372c2293f5d0b279f31c8d768566ea37602dc9 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Fri, 11 Jun 2021 09:58:12 +0800
Subject: [PATCH 1617/3292] ieee802154: hwsim: Fix possible memory leak in
 hwsim_subscribe_all_others

In hwsim_subscribe_all_others, the error handling code performs
incorrectly if the second hwsim_alloc_edge fails. When this issue occurs,
it goes to sub_fail, without cleaning the edges allocated before.

Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20210611015812.1626999-1-mudongliangabcd@gmail.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/mac802154_hwsim.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index da9135231c07..366eaae3550a 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -715,6 +715,8 @@ static int hwsim_subscribe_all_others(struct hwsim_phy *phy)
 
 	return 0;
 
+sub_fail:
+	hwsim_edge_unsubscribe_me(phy);
 me_fail:
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &phy->edges, list) {
@@ -722,8 +724,6 @@ me_fail:
 		hwsim_free_edge(e);
 	}
 	rcu_read_unlock();
-sub_fail:
-	hwsim_edge_unsubscribe_me(phy);
 	return -ENOMEM;
 }
 

From f8e5388a9da05708fb6d5a0137b287dbfd32d29c Mon Sep 17 00:00:00 2001
From: Austin Kim <austindh.kim@gmail.com>
Date: Thu, 27 May 2021 08:16:37 +0100
Subject: [PATCH 1618/3292] HID: intel-ish-hid: Fix minor typos in comments

Change "poiner" to "pointer" in comments.

Signed-off-by: Austin Kim <austindh.kim@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index 6cf59fd26ad7..a8fe893aed62 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -455,7 +455,7 @@ static void loader_cl_event_cb(struct ishtp_cl_device *cl_device)
 /**
  * ish_query_loader_prop() -  Query ISH Shim firmware loader
  * @client_data:	Client data instance
- * @fw:			Poiner to firmware data struct in host memory
+ * @fw:			Pointer to firmware data struct in host memory
  * @fw_info:		Loader firmware properties
  *
  * This function queries the ISH Shim firmware loader for capabilities.

From b5539722eb832441f309642fe5102cc3536f92b8 Mon Sep 17 00:00:00 2001
From: Zoltan Tamas Vajda <zoltan.tamas.vajda@gmail.com>
Date: Thu, 3 Jun 2021 20:58:14 +0200
Subject: [PATCH 1619/3292] HID: hid-input: add Surface Go battery quirk

The Elantech touchscreen/digitizer in the Surface Go mistakenly reports
having a battery. This results in a low battery message every time you
try to use the pen.

This patch adds a quirk to ignore the non-existent battery and
gets rid of the false low battery messages.

Signed-off-by: Zoltan Tamas Vajda <zoltan.tamas.vajda@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h   | 1 +
 drivers/hid/hid-input.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index b84a0a11e05b..63ca5959dc67 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -396,6 +396,7 @@
 #define USB_DEVICE_ID_HP_X2_10_COVER	0x0755
 #define I2C_DEVICE_ID_HP_SPECTRE_X360_15	0x2817
 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN	0x2706
+#define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN	0x261A
 
 #define USB_VENDOR_ID_ELECOM		0x056e
 #define USB_DEVICE_ID_ELECOM_BM084	0x0061
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index abbfa91e73e4..68c8644234a4 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -326,6 +326,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
 	  HID_BATTERY_QUIRK_IGNORE },
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
 	  HID_BATTERY_QUIRK_IGNORE },
+	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
+	  HID_BATTERY_QUIRK_IGNORE },
 	{}
 };
 

From 65e4122d2ea758a1834a5ddf9c555a4d2c1dd66a Mon Sep 17 00:00:00 2001
From: Even Xu <even.xu@intel.com>
Date: Thu, 10 Jun 2021 14:21:52 +0800
Subject: [PATCH 1620/3292] HID: intel-ish-hid: Set ISH driver depends on x86

During ISH DMA enabling, some platforms (such as EHL) don't
support cache snooping, driver needs involve clflush_cache_range
API which isn't supported by all archs (such as ARM).

Considering ISH only exists on Intel platforms, add the dependence
in Kconfig to avoid build warnings or errors on other archs.

Acked-by: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>
Signed-off-by: Even Xu <even.xu@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/intel-ish-hid/Kconfig b/drivers/hid/intel-ish-hid/Kconfig
index c6c9cfe2475e..689da84a520d 100644
--- a/drivers/hid/intel-ish-hid/Kconfig
+++ b/drivers/hid/intel-ish-hid/Kconfig
@@ -5,6 +5,7 @@ menu "Intel ISH HID support"
 config INTEL_ISH_HID
 	tristate "Intel Integrated Sensor Hub"
 	default n
+	depends on X86
 	select HID
 	help
 	  The Integrated Sensor Hub (ISH) enables the ability to offload

From 4aae88b9a9749f5b1c74f004ed8bd8efbaa96440 Mon Sep 17 00:00:00 2001
From: Even Xu <even.xu@intel.com>
Date: Thu, 10 Jun 2021 14:21:53 +0800
Subject: [PATCH 1621/3292] HID: intel-ish-hid: ishtp: Add
 dma_no_cache_snooping() callback

Different platforms have different DMA capability, on most of
platforms, DMA support cache snooping. But few platforms,
such as ElkhartLake (EHL), don't support cache snooping
which requires cache flush from driver.

So add a hardware level callback to let ishtp driver know if cache
flush is needed.

As most of platform support cache snooping, so driver will not
do cache flush by default, until platform implements this callback
and return true explicitly.

Acked-by: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>
Signed-off-by: Even Xu <even.xu@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp/client.c    | 18 ++++++++++++++++++
 drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c
index 585a5c4066cb..405e0d5212cc 100644
--- a/drivers/hid/intel-ish-hid/ishtp/client.c
+++ b/drivers/hid/intel-ish-hid/ishtp/client.c
@@ -10,6 +10,7 @@
 #include <linux/wait.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
+#include <asm/cacheflush.h>
 #include "hbm.h"
 #include "client.h"
 
@@ -772,6 +773,14 @@ static void ishtp_cl_send_msg_dma(struct ishtp_device *dev,
 	/* write msg to dma buf */
 	memcpy(msg_addr, cl_msg->send_buf.data, cl_msg->send_buf.size);
 
+	/*
+	 * if current fw don't support cache snooping, driver have to
+	 * flush the cache manually.
+	 */
+	if (dev->ops->dma_no_cache_snooping &&
+		dev->ops->dma_no_cache_snooping(dev))
+		clflush_cache_range(msg_addr, cl_msg->send_buf.size);
+
 	/* send dma_xfer hbm msg */
 	off = msg_addr - (unsigned char *)dev->ishtp_host_dma_tx_buf;
 	ishtp_hbm_hdr(&hdr, sizeof(struct dma_xfer_hbm));
@@ -996,6 +1005,15 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 		}
 
 		buffer = rb->buffer.data;
+
+		/*
+		 * if current fw don't support cache snooping, driver have to
+		 * flush the cache manually.
+		 */
+		if (dev->ops->dma_no_cache_snooping &&
+			dev->ops->dma_no_cache_snooping(dev))
+			clflush_cache_range(msg, hbm->msg_length);
+
 		memcpy(buffer, msg, hbm->msg_length);
 		rb->buf_idx = hbm->msg_length;
 
diff --git a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
index f579b16e6d7a..32142c7d9a04 100644
--- a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
+++ b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h
@@ -119,6 +119,7 @@ struct ishtp_hw_ops {
 			unsigned long buffer_length);
 	uint32_t	(*get_fw_status)(struct ishtp_device *dev);
 	void	(*sync_fw_clock)(struct ishtp_device *dev);
+	bool	(*dma_no_cache_snooping)(struct ishtp_device *dev);
 };
 
 /**

From aa59d6bb5ec88e30802174f9accc5d2dc50209ad Mon Sep 17 00:00:00 2001
From: Even Xu <even.xu@intel.com>
Date: Thu, 10 Jun 2021 14:21:54 +0800
Subject: [PATCH 1622/3292] HID: intel-ish-hid: ipc: Specify that EHL no cache
 snooping

Specify that EHL doesn't support DMA cache snooping.

Acked-by: Pandruvada, Srinivas <srinivas.pandruvada@intel.com>
Signed-off-by: Even Xu <even.xu@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ipc/ipc.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index 9037816e2bf7..45e0c7b1c9ec 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -889,6 +889,29 @@ static uint32_t ish_ipc_get_header(struct ishtp_device *dev, int length,
 	return drbl_val;
 }
 
+/**
+ * _dma_no_cache_snooping()
+ *
+ * Check on current platform, DMA supports cache snooping or not.
+ * This callback is used to notify uplayer driver if manully cache
+ * flush is needed when do DMA operation.
+ *
+ * Please pay attention to this callback implementation, if declare
+ * having cache snooping on a cache snooping not supported platform
+ * will cause uplayer driver receiving mismatched data; and if
+ * declare no cache snooping on a cache snooping supported platform
+ * will cause cache be flushed twice and performance hit.
+ *
+ * @dev: ishtp device pointer
+ *
+ * Return: false - has cache snooping capability
+ *         true - no cache snooping, need manually cache flush
+ */
+static bool _dma_no_cache_snooping(struct ishtp_device *dev)
+{
+	return dev->pdev->device == EHL_Ax_DEVICE_ID;
+}
+
 static const struct ishtp_hw_ops ish_hw_ops = {
 	.hw_reset = _ish_hw_reset,
 	.ipc_reset = _ish_ipc_reset,
@@ -897,7 +920,8 @@ static const struct ishtp_hw_ops ish_hw_ops = {
 	.write = write_ipc_to_queue,
 	.get_fw_status = _ish_read_fw_sts_reg,
 	.sync_fw_clock = _ish_sync_fw_clock,
-	.ishtp_read_hdr = _ishtp_read_hdr
+	.ishtp_read_hdr = _ishtp_read_hdr,
+	.dma_no_cache_snooping = _dma_no_cache_snooping
 };
 
 /**

From e4b6d902a9e38f424ce118106ea4d1665b7951b5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:00 +0100
Subject: [PATCH 1623/3292] io_uring: improve sqpoll event/state handling

As sqd->state changes rarely, don't check every event one by one but
look them all at once. Add a helper function. Also don't go into event
waiting sleeping with STOP flag set.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/645025f95c7eeec97f88ff497785f4f1d6f3966f.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fa8794c61af7..24c0042b0de7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6757,6 +6757,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 	return submitted;
 }
 
+static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
+{
+	return READ_ONCE(sqd->state);
+}
+
 static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
 {
 	/* Tell userspace we may need a wakeup call */
@@ -6815,6 +6820,24 @@ static void io_sqd_update_thread_idle(struct io_sq_data *sqd)
 	sqd->sq_thread_idle = sq_thread_idle;
 }
 
+static bool io_sqd_handle_event(struct io_sq_data *sqd)
+{
+	bool did_sig = false;
+	struct ksignal ksig;
+
+	if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
+	    signal_pending(current)) {
+		mutex_unlock(&sqd->lock);
+		if (signal_pending(current))
+			did_sig = get_signal(&ksig);
+		cond_resched();
+		mutex_lock(&sqd->lock);
+	}
+	io_run_task_work();
+	io_run_task_work_head(&sqd->park_task_work);
+	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+}
+
 static int io_sq_thread(void *data)
 {
 	struct io_sq_data *sqd = data;
@@ -6836,29 +6859,17 @@ static int io_sq_thread(void *data)
 	/* a user may had exited before the thread started */
 	io_run_task_work_head(&sqd->park_task_work);
 
-	while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) {
+	while (1) {
 		int ret;
 		bool cap_entries, sqt_spin, needs_sched;
 
-		if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
-		    signal_pending(current)) {
-			bool did_sig = false;
-
-			mutex_unlock(&sqd->lock);
-			if (signal_pending(current)) {
-				struct ksignal ksig;
-
-				did_sig = get_signal(&ksig);
-			}
-			cond_resched();
-			mutex_lock(&sqd->lock);
-			io_run_task_work();
-			io_run_task_work_head(&sqd->park_task_work);
-			if (did_sig)
+		if (io_sqd_events_pending(sqd) || signal_pending(current)) {
+			if (io_sqd_handle_event(sqd))
 				break;
 			timeout = jiffies + sqd->sq_thread_idle;
 			continue;
 		}
+
 		sqt_spin = false;
 		cap_entries = !list_is_singular(&sqd->ctx_list);
 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@@ -6882,7 +6893,7 @@ static int io_sq_thread(void *data)
 		}
 
 		prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
-		if (!test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) {
+		if (!io_sqd_events_pending(sqd)) {
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 				io_ring_set_wakeup_flag(ctx);
 

From aaa9f0f48172b190a835792abe63f8859372eeec Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:01 +0100
Subject: [PATCH 1624/3292] io_uring: improve sq_thread waiting check

If SQPOLL task finds a ring requesting it to continue running, no need
to set wake flag to rest of the rings as it will be cleared in a moment
anyway, so hide it in a single sqd->ctx_list loop.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1ee5a696d9fd08645994c58ee147d149a8957d94.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 24c0042b0de7..85d0184c585d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6894,11 +6894,10 @@ static int io_sq_thread(void *data)
 
 		prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
 		if (!io_sqd_events_pending(sqd)) {
-			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-				io_ring_set_wakeup_flag(ctx);
-
 			needs_sched = true;
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+				io_ring_set_wakeup_flag(ctx);
+
 				if ((ctx->flags & IORING_SETUP_IOPOLL) &&
 				    !list_empty_careful(&ctx->iopoll_list)) {
 					needs_sched = false;

From 21f2fc080f8654ce60b3e9192ba3b596c6a2ead6 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:02 +0100
Subject: [PATCH 1625/3292] io_uring: remove unused park_task_work

As sqpoll cancel via task_work is killed, remove everything related to
park_task_work as it's not used anymore.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/310d8b76a2fbbf3e139373500e04ad9af7ee3dbb.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 85d0184c585d..d18be5afc403 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -288,7 +288,6 @@ struct io_sq_data {
 
 	unsigned long		state;
 	struct completion	exited;
-	struct callback_head	*park_task_work;
 };
 
 #define IO_IOPOLL_BATCH			8
@@ -6834,7 +6833,6 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
 		mutex_lock(&sqd->lock);
 	}
 	io_run_task_work();
-	io_run_task_work_head(&sqd->park_task_work);
 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
 }
 
@@ -6856,9 +6854,6 @@ static int io_sq_thread(void *data)
 	current->flags |= PF_NO_SETAFFINITY;
 
 	mutex_lock(&sqd->lock);
-	/* a user may had exited before the thread started */
-	io_run_task_work_head(&sqd->park_task_work);
-
 	while (1) {
 		int ret;
 		bool cap_entries, sqt_spin, needs_sched;
@@ -6919,7 +6914,6 @@ static int io_sq_thread(void *data)
 		}
 
 		finish_wait(&sqd->wait, &wait);
-		io_run_task_work_head(&sqd->park_task_work);
 		timeout = jiffies + sqd->sq_thread_idle;
 	}
 
@@ -6928,7 +6922,6 @@ static int io_sq_thread(void *data)
 	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 		io_ring_set_wakeup_flag(ctx);
 	io_run_task_work();
-	io_run_task_work_head(&sqd->park_task_work);
 	mutex_unlock(&sqd->lock);
 
 	complete(&sqd->exited);

From acfb381d9d714c657ff540099fa5a6fa98e71f07 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:03 +0100
Subject: [PATCH 1626/3292] io_uring: simplify waking sqo_sq_wait

Going through submission in __io_sq_thread() and still having a full SQ
is rather unexpected, so remove a check for SQ fullness and just wake up
whoever wait on sqo_sq_wait. Also skip if it doesn't do submission in
the first place, likely may to happen for SQPOLL sharing and/or IOPOLL.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e2e91751e87b1a39f8d63ef884aaff578123f61e.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d18be5afc403..3a7889939455 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6801,10 +6801,10 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 		    !(ctx->flags & IORING_SETUP_R_DISABLED))
 			ret = io_submit_sqes(ctx, to_submit);
 		mutex_unlock(&ctx->uring_lock);
-	}
 
-	if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait))
-		wake_up(&ctx->sqo_sq_wait);
+		if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
+			wake_up(&ctx->sqo_sq_wait);
+	}
 
 	return ret;
 }

From 3dd0c97a9e011b11ce6bd245bacf58c57f6f7875 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:04 +0100
Subject: [PATCH 1627/3292] io_uring: get rid of files in exit cancel

We don't match against files on cancellation anymore, so no need to drag
around files_struct anymore, just pass a flag telling whether only
inflight or all requests should be killed.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7bfc5409a78f8e2d6b27dec3293ec2d248677348.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 63 +++++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3a7889939455..8b8d25216662 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1036,7 +1036,7 @@ static bool io_disarm_next(struct io_kiocb *req);
 static void io_uring_del_task_file(unsigned long index);
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
-					 struct files_struct *files);
+					 bool cancel_all);
 static void io_uring_cancel_sqpoll(struct io_sq_data *sqd);
 static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
 
@@ -1105,15 +1105,14 @@ static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
 		percpu_ref_put(ref);
 }
 
-static bool io_match_task(struct io_kiocb *head,
-			  struct task_struct *task,
-			  struct files_struct *files)
+static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
+			  bool cancel_all)
 {
 	struct io_kiocb *req;
 
 	if (task && head->task != task)
 		return false;
-	if (!files)
+	if (cancel_all)
 		return true;
 
 	io_for_each_link(req, head) {
@@ -5256,7 +5255,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
  * Returns true if we found and killed one or more poll requests
  */
 static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
-			       struct files_struct *files)
+			       bool cancel_all)
 {
 	struct hlist_node *tmp;
 	struct io_kiocb *req;
@@ -5268,7 +5267,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
 
 		list = &ctx->cancel_hash[i];
 		hlist_for_each_entry_safe(req, tmp, list, hash_node) {
-			if (io_match_task(req, tsk, files))
+			if (io_match_task(req, tsk, cancel_all))
 				posted += io_poll_remove_one(req);
 		}
 	}
@@ -8742,7 +8741,7 @@ static void io_ring_exit_work(struct work_struct *work)
 	 * as nobody else will be looking for them.
 	 */
 	do {
-		io_uring_try_cancel_requests(ctx, NULL, NULL);
+		io_uring_try_cancel_requests(ctx, NULL, true);
 		if (ctx->sq_data) {
 			struct io_sq_data *sqd = ctx->sq_data;
 			struct task_struct *tsk;
@@ -8793,14 +8792,14 @@ static void io_ring_exit_work(struct work_struct *work)
 
 /* Returns true if we found and killed one or more timeouts */
 static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
-			     struct files_struct *files)
+			     bool cancel_all)
 {
 	struct io_kiocb *req, *tmp;
 	int canceled = 0;
 
 	spin_lock_irq(&ctx->completion_lock);
 	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
-		if (io_match_task(req, tsk, files)) {
+		if (io_match_task(req, tsk, cancel_all)) {
 			io_kill_timeout(req, -ECANCELED);
 			canceled++;
 		}
@@ -8826,8 +8825,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 		io_unregister_personality(ctx, index);
 	mutex_unlock(&ctx->uring_lock);
 
-	io_kill_timeouts(ctx, NULL, NULL);
-	io_poll_remove_all(ctx, NULL, NULL);
+	io_kill_timeouts(ctx, NULL, true);
+	io_poll_remove_all(ctx, NULL, true);
 
 	/* if we failed setting up the ctx, we might not have any rings */
 	io_iopoll_try_reap_events(ctx);
@@ -8853,7 +8852,7 @@ static int io_uring_release(struct inode *inode, struct file *file)
 
 struct io_task_cancel {
 	struct task_struct *task;
-	struct files_struct *files;
+	bool all;
 };
 
 static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
@@ -8862,30 +8861,29 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
 	struct io_task_cancel *cancel = data;
 	bool ret;
 
-	if (cancel->files && (req->flags & REQ_F_LINK_TIMEOUT)) {
+	if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) {
 		unsigned long flags;
 		struct io_ring_ctx *ctx = req->ctx;
 
 		/* protect against races with linked timeouts */
 		spin_lock_irqsave(&ctx->completion_lock, flags);
-		ret = io_match_task(req, cancel->task, cancel->files);
+		ret = io_match_task(req, cancel->task, cancel->all);
 		spin_unlock_irqrestore(&ctx->completion_lock, flags);
 	} else {
-		ret = io_match_task(req, cancel->task, cancel->files);
+		ret = io_match_task(req, cancel->task, cancel->all);
 	}
 	return ret;
 }
 
 static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
-				  struct task_struct *task,
-				  struct files_struct *files)
+				  struct task_struct *task, bool cancel_all)
 {
 	struct io_defer_entry *de;
 	LIST_HEAD(list);
 
 	spin_lock_irq(&ctx->completion_lock);
 	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
-		if (io_match_task(de->req, task, files)) {
+		if (io_match_task(de->req, task, cancel_all)) {
 			list_cut_position(&list, &ctx->defer_list, &de->list);
 			break;
 		}
@@ -8929,9 +8927,9 @@ static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
 
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
-					 struct files_struct *files)
+					 bool cancel_all)
 {
-	struct io_task_cancel cancel = { .task = task, .files = files, };
+	struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
 	struct io_uring_task *tctx = task ? task->io_uring : NULL;
 
 	while (1) {
@@ -8951,7 +8949,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 		}
 
 		/* SQPOLL thread does its own polling */
-		if ((!(ctx->flags & IORING_SETUP_SQPOLL) && !files) ||
+		if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
 		    (ctx->sq_data && ctx->sq_data->thread == current)) {
 			while (!list_empty_careful(&ctx->iopoll_list)) {
 				io_iopoll_try_reap_events(ctx);
@@ -8959,9 +8957,9 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 			}
 		}
 
-		ret |= io_cancel_defer_files(ctx, task, files);
-		ret |= io_poll_remove_all(ctx, task, files);
-		ret |= io_kill_timeouts(ctx, task, files);
+		ret |= io_cancel_defer_files(ctx, task, cancel_all);
+		ret |= io_poll_remove_all(ctx, task, cancel_all);
+		ret |= io_kill_timeouts(ctx, task, cancel_all);
 		ret |= io_run_task_work();
 		ret |= io_run_ctx_fallback(ctx);
 		if (!ret)
@@ -9067,7 +9065,7 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
 	return percpu_counter_sum(&tctx->inflight);
 }
 
-static void io_uring_try_cancel(struct files_struct *files)
+static void io_uring_try_cancel(bool cancel_all)
 {
 	struct io_uring_task *tctx = current->io_uring;
 	struct io_tctx_node *node;
@@ -9078,7 +9076,7 @@ static void io_uring_try_cancel(struct files_struct *files)
 
 		/* sqpoll task will cancel all its requests */
 		if (!ctx->sq_data)
-			io_uring_try_cancel_requests(ctx, current, files);
+			io_uring_try_cancel_requests(ctx, current, cancel_all);
 	}
 }
 
@@ -9104,7 +9102,7 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
 		if (!inflight)
 			break;
 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-			io_uring_try_cancel_requests(ctx, current, NULL);
+			io_uring_try_cancel_requests(ctx, current, true);
 
 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 		/*
@@ -9128,6 +9126,7 @@ void __io_uring_cancel(struct files_struct *files)
 	struct io_uring_task *tctx = current->io_uring;
 	DEFINE_WAIT(wait);
 	s64 inflight;
+	bool cancel_all = !files;
 
 	if (tctx->io_wq)
 		io_wq_exit_start(tctx->io_wq);
@@ -9136,10 +9135,10 @@ void __io_uring_cancel(struct files_struct *files)
 	atomic_inc(&tctx->in_idle);
 	do {
 		/* read completions before cancelations */
-		inflight = tctx_inflight(tctx, !!files);
+		inflight = tctx_inflight(tctx, !cancel_all);
 		if (!inflight)
 			break;
-		io_uring_try_cancel(files);
+		io_uring_try_cancel(cancel_all);
 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 
 		/*
@@ -9147,14 +9146,14 @@ void __io_uring_cancel(struct files_struct *files)
 		 * avoids a race where a completion comes in before we did
 		 * prepare_to_wait().
 		 */
-		if (inflight == tctx_inflight(tctx, !!files))
+		if (inflight == tctx_inflight(tctx, !cancel_all))
 			schedule();
 		finish_wait(&tctx->wait, &wait);
 	} while (1);
 	atomic_dec(&tctx->in_idle);
 
 	io_uring_clean_tctx(tctx);
-	if (!files) {
+	if (cancel_all) {
 		/* for exec all current's requests should be gone, kill tctx */
 		__io_uring_free(current);
 	}

From 93d2bcd2cbfed2c714341f7a7ecd511aaedabd83 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:05 +0100
Subject: [PATCH 1628/3292] io_uring: make fail flag not link specific

The main difference is in req_set_fail_links() renamed into
req_set_fail(), which now sets REQ_F_FAIL_LINK/REQ_F_FAIL flag
unconditional on whether it has been a link or not. It only matters in
io_disarm_next(), which already handles it well, and all calls to it
have a fast path checking REQ_F_LINK/HARDLINK.

It looks cleaner, and sheds binary size
   text    data     bss     dec     hex filename
  84235   12390       8   96633   17979 ./fs/io_uring.o
  84151   12414       8   96573   1793d ./fs/io_uring.o

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e2224154dd6e53b665ac835d29436b177872fa10.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 87 +++++++++++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8b8d25216662..8a5fda76f7a0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -705,7 +705,7 @@ enum {
 	REQ_F_BUFFER_SELECT_BIT	= IOSQE_BUFFER_SELECT_BIT,
 
 	/* first byte is taken by user flags, shift it to not overlap */
-	REQ_F_FAIL_LINK_BIT	= 8,
+	REQ_F_FAIL_BIT		= 8,
 	REQ_F_INFLIGHT_BIT,
 	REQ_F_CUR_POS_BIT,
 	REQ_F_NOWAIT_BIT,
@@ -741,7 +741,7 @@ enum {
 	REQ_F_BUFFER_SELECT	= BIT(REQ_F_BUFFER_SELECT_BIT),
 
 	/* fail rest of links */
-	REQ_F_FAIL_LINK		= BIT(REQ_F_FAIL_LINK_BIT),
+	REQ_F_FAIL		= BIT(REQ_F_FAIL_BIT),
 	/* on inflight list, should be cancelled and waited on exit reliably */
 	REQ_F_INFLIGHT		= BIT(REQ_F_INFLIGHT_BIT),
 	/* read/write uses file position */
@@ -1122,10 +1122,9 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
 	return false;
 }
 
-static inline void req_set_fail_links(struct io_kiocb *req)
+static inline void req_set_fail(struct io_kiocb *req)
 {
-	if (req->flags & REQ_F_LINK)
-		req->flags |= REQ_F_FAIL_LINK;
+	req->flags |= REQ_F_FAIL;
 }
 
 static void io_ring_ctx_ref_free(struct percpu_ref *ref)
@@ -1594,7 +1593,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 		struct io_comp_state *cs = &ctx->submit_state.comp;
 
 		if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
-			if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL_LINK))
+			if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL))
 				io_disarm_next(req);
 			if (req->link) {
 				io_req_task_queue(req->link);
@@ -1650,7 +1649,7 @@ static inline void io_req_complete(struct io_kiocb *req, long res)
 
 static void io_req_complete_failed(struct io_kiocb *req, long res)
 {
-	req_set_fail_links(req);
+	req_set_fail(req);
 	io_put_req(req);
 	io_req_complete_post(req, res, 0);
 }
@@ -1829,7 +1828,7 @@ static bool io_disarm_next(struct io_kiocb *req)
 
 	if (likely(req->flags & REQ_F_LINK_TIMEOUT))
 		posted = io_kill_linked_timeout(req);
-	if (unlikely((req->flags & REQ_F_FAIL_LINK) &&
+	if (unlikely((req->flags & REQ_F_FAIL) &&
 		     !(req->flags & REQ_F_HARDLINK))) {
 		posted |= (req->link != NULL);
 		io_fail_links(req);
@@ -1847,7 +1846,7 @@ static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
 	 * dependencies to the next request. In case of failure, fail the rest
 	 * of the chain.
 	 */
-	if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL_LINK)) {
+	if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL)) {
 		struct io_ring_ctx *ctx = req->ctx;
 		unsigned long flags;
 		bool posted;
@@ -2486,7 +2485,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 			req->flags |= REQ_F_REISSUE;
 			return;
 		}
-		req_set_fail_links(req);
+		req_set_fail(req);
 	}
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		cflags = io_put_rw_kbuf(req);
@@ -2509,7 +2508,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 	if (unlikely(res != req->result)) {
 		if (!(res == -EAGAIN && io_rw_should_reissue(req) &&
 		    io_resubmit_prep(req))) {
-			req_set_fail_links(req);
+			req_set_fail(req);
 			req->flags |= REQ_F_DONT_REISSUE;
 		}
 	}
@@ -2765,7 +2764,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 		} else {
 			int cflags = 0;
 
-			req_set_fail_links(req);
+			req_set_fail(req);
 			if (req->flags & REQ_F_BUFFER_SELECTED)
 				cflags = io_put_rw_kbuf(req);
 			__io_req_complete(req, issue_flags, ret, cflags);
@@ -3487,7 +3486,7 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
 
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -3531,7 +3530,7 @@ static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
 
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -3568,7 +3567,7 @@ static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
 
 	ret = __sys_shutdown_sock(sock, req->shutdown.how);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 #else
@@ -3626,7 +3625,7 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 
 	if (ret != sp->len)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -3663,7 +3662,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags)
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 
 	if (ret != sp->len)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -3716,7 +3715,7 @@ static int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
 				end > 0 ? end : LLONG_MAX,
 				req->sync.flags & IORING_FSYNC_DATASYNC);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -3745,7 +3744,7 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
 	ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
 				req->sync.len);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -3864,7 +3863,7 @@ err:
 	putname(req->open.filename);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
@@ -3936,7 +3935,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 	if (head)
 		ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 
 	/* complete before unlock, IOPOLL may need the lock */
 	__io_req_complete(req, issue_flags, ret, 0);
@@ -4027,7 +4026,7 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 			__io_remove_buffers(ctx, head, p->bgid, -1U);
 	}
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	/* complete before unlock, IOPOLL may need the lock */
 	__io_req_complete(req, issue_flags, ret, 0);
 	io_ring_submit_unlock(ctx, !force_nonblock);
@@ -4073,7 +4072,7 @@ static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
 		return -EAGAIN;
 
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 #else
@@ -4109,7 +4108,7 @@ static int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
 
 	ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 #else
@@ -4148,7 +4147,7 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
 
 	ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
@@ -4183,7 +4182,7 @@ static int io_statx(struct io_kiocb *req, unsigned int issue_flags)
 		       ctx->buffer);
 
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -4241,7 +4240,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
 	ret = filp_close(file, current->files);
 err:
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	if (file)
 		fput(file);
 	__io_req_complete(req, issue_flags, ret, 0);
@@ -4274,7 +4273,7 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
 	ret = sync_file_range(req->file, req->sync.off, req->sync.len,
 				req->sync.flags);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_req_complete(req, ret);
 	return 0;
 }
@@ -4378,7 +4377,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 		kfree(kmsg->free_iov);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < min_ret)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
@@ -4420,7 +4419,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
 		ret = -EINTR;
 
 	if (ret < min_ret)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
@@ -4615,7 +4614,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 		kfree(kmsg->free_iov);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, cflags);
 	return 0;
 }
@@ -4670,7 +4669,7 @@ out_free:
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		cflags = io_put_recv_kbuf(req);
 	if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, cflags);
 	return 0;
 }
@@ -4709,7 +4708,7 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 	if (ret < 0) {
 		if (ret == -ERESTARTSYS)
 			ret = -EINTR;
-		req_set_fail_links(req);
+		req_set_fail(req);
 	}
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
@@ -4773,7 +4772,7 @@ static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
 		ret = -EINTR;
 out:
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
@@ -5244,7 +5243,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
 	if (do_complete) {
 		io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0);
 		io_commit_cqring(req->ctx);
-		req_set_fail_links(req);
+		req_set_fail(req);
 		io_put_req_deferred(req, 1);
 	}
 
@@ -5454,7 +5453,7 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
 err:
 	if (ret < 0) {
 		spin_unlock_irq(&ctx->completion_lock);
-		req_set_fail_links(req);
+		req_set_fail(req);
 		io_req_complete(req, ret);
 		return 0;
 	}
@@ -5474,7 +5473,7 @@ err:
 	if (!completing) {
 		ret = io_poll_add(preq, issue_flags);
 		if (ret < 0) {
-			req_set_fail_links(preq);
+			req_set_fail(preq);
 			io_req_complete(preq, ret);
 		}
 	}
@@ -5499,7 +5498,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
 	io_cqring_ev_posted(ctx);
-	req_set_fail_links(req);
+	req_set_fail(req);
 	io_put_req(req);
 	return HRTIMER_NORESTART;
 }
@@ -5535,7 +5534,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	req_set_fail_links(req);
+	req_set_fail(req);
 	io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0);
 	io_put_req_deferred(req, 1);
 	return 0;
@@ -5614,7 +5613,7 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
 	spin_unlock_irq(&ctx->completion_lock);
 	io_cqring_ev_posted(ctx);
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_put_req(req);
 	return 0;
 }
@@ -5767,7 +5766,7 @@ done:
 	io_cqring_ev_posted(ctx);
 
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 }
 
 static int io_async_cancel_prep(struct io_kiocb *req,
@@ -5824,7 +5823,7 @@ done:
 	io_cqring_ev_posted(ctx);
 
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	io_put_req(req);
 	return 0;
 }
@@ -5866,7 +5865,7 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
 	mutex_unlock(&ctx->uring_lock);
 
 	if (ret < 0)
-		req_set_fail_links(req);
+		req_set_fail(req);
 	__io_req_complete(req, issue_flags, ret, 0);
 	return 0;
 }
@@ -6569,7 +6568,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 fail_req:
 		if (link->head) {
 			/* fail even hard links since we don't submit */
-			link->head->flags |= REQ_F_FAIL_LINK;
+			req_set_fail(link->head);
 			io_req_complete_failed(link->head, -ECANCELED);
 			link->head = NULL;
 		}

From b986af7e2df4f0871367c397ba61a542f37c0ab3 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:06 +0100
Subject: [PATCH 1629/3292] io_uring: shuffle rarely used ctx fields

There is a bunch of scattered around ctx fields that are almost never
used, e.g. only on ring exit, plunge them to the end, better locality,
better aesthetically.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/782ff94b00355923eae757d58b1a47821b5b46d4.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8a5fda76f7a0..6e19fe04a5d1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -368,9 +368,6 @@ struct io_ring_ctx {
 		unsigned		cached_cq_overflow;
 		unsigned long		sq_check_overflow;
 
-		/* hashed buffered write serialization */
-		struct io_wq_hash	*hash_map;
-
 		struct list_head	defer_list;
 		struct list_head	timeout_list;
 		struct list_head	cq_overflow_list;
@@ -387,9 +384,6 @@ struct io_ring_ctx {
 
 	struct io_rings	*rings;
 
-	/* Only used for accounting purposes */
-	struct mm_struct	*mm_account;
-
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
@@ -410,14 +404,6 @@ struct io_ring_ctx {
 	unsigned		nr_user_bufs;
 	struct io_mapped_ubuf	**user_bufs;
 
-	struct user_struct	*user;
-
-	struct completion	ref_comp;
-
-#if defined(CONFIG_UNIX)
-	struct socket		*ring_sock;
-#endif
-
 	struct xarray		io_buffers;
 
 	struct xarray		personalities;
@@ -461,12 +447,24 @@ struct io_ring_ctx {
 
 	struct io_restriction		restrictions;
 
-	/* exit task_work */
-	struct callback_head		*exit_task_work;
-
 	/* Keep this last, we don't need it for the fast path */
-	struct work_struct		exit_work;
-	struct list_head		tctx_list;
+	struct {
+		#if defined(CONFIG_UNIX)
+			struct socket		*ring_sock;
+		#endif
+		/* hashed buffered write serialization */
+		struct io_wq_hash		*hash_map;
+
+		/* Only used for accounting purposes */
+		struct user_struct		*user;
+		struct mm_struct		*mm_account;
+
+		/* ctx exit and cancelation */
+		struct callback_head		*exit_task_work;
+		struct work_struct		exit_work;
+		struct list_head		tctx_list;
+		struct completion		ref_comp;
+	};
 };
 
 struct io_uring_task {

From b13a8918d395554ff9a8cee17d03ed45d805df24 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:07 +0100
Subject: [PATCH 1630/3292] io_uring: better locality for rsrc fields

ring has two types of resource-related fields: used for request
submission, and field needed for update/registration. Reshuffle them
into these two groups for better locality and readability. The second
group is not in the hot path, so it's natural to place them somewhere in
the end. Also update an outdated comment.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/05b34795bb4440f4ec4510f08abd5a31830f8ca0.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6e19fe04a5d1..f628af3a3368 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -391,21 +391,17 @@ struct io_ring_ctx {
 	struct list_head	sqd_list;
 
 	/*
-	 * If used, fixed file set. Writers must ensure that ->refs is dead,
-	 * readers must ensure that ->refs is alive as long as the file* is
-	 * used. Only updated through io_uring_register(2).
+	 * Fixed resources fast path, should be accessed only under uring_lock,
+	 * and updated through io_uring_register(2)
 	 */
-	struct io_rsrc_data	*file_data;
+	struct io_rsrc_node	*rsrc_node;
+
 	struct io_file_table	file_table;
 	unsigned		nr_user_files;
-
-	/* if used, fixed mapped user buffers */
-	struct io_rsrc_data	*buf_data;
 	unsigned		nr_user_bufs;
 	struct io_mapped_ubuf	**user_bufs;
 
 	struct xarray		io_buffers;
-
 	struct xarray		personalities;
 	u32			pers_next;
 
@@ -437,16 +433,21 @@ struct io_ring_ctx {
 		bool			poll_multi_file;
 	} ____cacheline_aligned_in_smp;
 
-	struct delayed_work		rsrc_put_work;
-	struct llist_head		rsrc_put_llist;
-	struct list_head		rsrc_ref_list;
-	spinlock_t			rsrc_ref_lock;
-	struct io_rsrc_node		*rsrc_node;
-	struct io_rsrc_node		*rsrc_backup_node;
-	struct io_mapped_ubuf		*dummy_ubuf;
-
 	struct io_restriction		restrictions;
 
+	/* slow path rsrc auxilary data, used by update/register */
+	struct {
+		struct io_rsrc_node		*rsrc_backup_node;
+		struct io_mapped_ubuf		*dummy_ubuf;
+		struct io_rsrc_data		*file_data;
+		struct io_rsrc_data		*buf_data;
+
+		struct delayed_work		rsrc_put_work;
+		struct llist_head		rsrc_put_llist;
+		struct list_head		rsrc_ref_list;
+		spinlock_t			rsrc_ref_lock;
+	};
+
 	/* Keep this last, we don't need it for the fast path */
 	struct {
 		#if defined(CONFIG_UNIX)

From a566c5562d41b99f11c8224b2a3010e60ad93acf Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:08 +0100
Subject: [PATCH 1631/3292] io_uring: remove dependency on ring->sq/cq_entries

We have numbers of {sq,cq} entries cached in ctx, don't look up them in
user-shared rings as 1) it may fetch additional cacheline 2) user may
change it and so it's always error prone.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/745d31bc2da41283ddd0489ef784af5c8d6310e9.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f628af3a3368..169e95126acf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1356,7 +1356,7 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx)
 {
 	struct io_rings *r = ctx->rings;
 
-	return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries;
+	return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries;
 }
 
 static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
@@ -1374,7 +1374,7 @@ static inline struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 	 * control dependency is enough as we're using WRITE_ONCE to
 	 * fill the cq entry
 	 */
-	if (__io_cqring_events(ctx) == rings->cq_ring_entries)
+	if (__io_cqring_events(ctx) == ctx->cq_entries)
 		return NULL;
 
 	tail = ctx->cached_cq_tail++;
@@ -1427,11 +1427,10 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 /* Returns true if there are no backlogged entries after the flush */
 static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 {
-	struct io_rings *rings = ctx->rings;
 	unsigned long flags;
 	bool all_flushed, posted;
 
-	if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries)
+	if (!force && __io_cqring_events(ctx) == ctx->cq_entries)
 		return false;
 
 	posted = false;

From ea5ab3b579836d784357ae9cb5bf9d7242a645b9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:09 +0100
Subject: [PATCH 1632/3292] io_uring: deduce cq_mask from cq_entries

No need to cache cq_mask, it's exactly cq_entries - 1, so just deduce
it to not carry it around.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d439efad0503c8398451dae075e68a04362fbc8d.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 169e95126acf..a35a8dc5c930 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -362,7 +362,6 @@ struct io_ring_ctx {
 		u32			*sq_array;
 		unsigned		cached_sq_head;
 		unsigned		sq_entries;
-		unsigned		sq_mask;
 		unsigned		sq_thread_idle;
 		unsigned		cached_sq_dropped;
 		unsigned		cached_cq_overflow;
@@ -408,7 +407,6 @@ struct io_ring_ctx {
 	struct {
 		unsigned		cached_cq_tail;
 		unsigned		cq_entries;
-		unsigned		cq_mask;
 		atomic_t		cq_timeouts;
 		unsigned		cq_last_tm_flush;
 		unsigned		cq_extra;
@@ -1367,7 +1365,7 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
 static inline struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
-	unsigned tail;
+	unsigned tail, mask = ctx->cq_entries - 1;
 
 	/*
 	 * writes to the cq entry need to come after reading head; the
@@ -1378,7 +1376,7 @@ static inline struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
 		return NULL;
 
 	tail = ctx->cached_cq_tail++;
-	return &rings->cqes[tail & ctx->cq_mask];
+	return &rings->cqes[tail & mask];
 }
 
 static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
@@ -6680,7 +6678,7 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
 static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
 {
 	u32 *sq_array = ctx->sq_array;
-	unsigned head;
+	unsigned head, mask = ctx->sq_entries - 1;
 
 	/*
 	 * The cached sq head (or cq tail) serves two purposes:
@@ -6690,7 +6688,7 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
 	 * 2) allows the kernel side to track the head on its own, even
 	 *    though the application is the one updating it.
 	 */
-	head = READ_ONCE(sq_array[ctx->cached_sq_head++ & ctx->sq_mask]);
+	head = READ_ONCE(sq_array[ctx->cached_sq_head++ & mask]);
 	if (likely(head < ctx->sq_entries))
 		return &ctx->sq_sqes[head];
 
@@ -9512,8 +9510,6 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 	rings->cq_ring_mask = p->cq_entries - 1;
 	rings->sq_ring_entries = p->sq_entries;
 	rings->cq_ring_entries = p->cq_entries;
-	ctx->sq_mask = rings->sq_ring_mask;
-	ctx->cq_mask = rings->cq_ring_mask;
 
 	size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
 	if (size == SIZE_MAX) {

From 8f6ed49a4443be35a11807695dbae2680f7ca6fc Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:10 +0100
Subject: [PATCH 1633/3292] io_uring: kill cached_cq_overflow

There are two copies of cq_overflow, shared with userspace and internal
cached one. It was needed for DRAIN accounting, but now we have yet
another knob to tune the accounting, i.e. cq_extra, and we can throw
away the internal counter and just increment the one in the shared ring.

If user modifies it as so never gets the right overflow value ever
again, it's its problem, even though before we would have restored it
back by next overflow.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/8427965f5175dd051febc63804909861109ce859.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a35a8dc5c930..623978fd2883 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -364,7 +364,6 @@ struct io_ring_ctx {
 		unsigned		sq_entries;
 		unsigned		sq_thread_idle;
 		unsigned		cached_sq_dropped;
-		unsigned		cached_cq_overflow;
 		unsigned long		sq_check_overflow;
 
 		struct list_head	defer_list;
@@ -1199,13 +1198,20 @@ err:
 	return NULL;
 }
 
+static void io_account_cq_overflow(struct io_ring_ctx *ctx)
+{
+	struct io_rings *r = ctx->rings;
+
+	WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
+	ctx->cq_extra--;
+}
+
 static bool req_need_defer(struct io_kiocb *req, u32 seq)
 {
 	if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
 		struct io_ring_ctx *ctx = req->ctx;
 
-		return seq + ctx->cq_extra != ctx->cached_cq_tail
-				+ READ_ONCE(ctx->cached_cq_overflow);
+		return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail;
 	}
 
 	return false;
@@ -1444,8 +1450,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 		if (cqe)
 			memcpy(cqe, &ocqe->cqe, sizeof(*cqe));
 		else
-			WRITE_ONCE(ctx->rings->cq_overflow,
-				   ++ctx->cached_cq_overflow);
+			io_account_cq_overflow(ctx);
+
 		posted = true;
 		list_del(&ocqe->list);
 		kfree(ocqe);
@@ -1529,7 +1535,7 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 		 * or cannot allocate an overflow entry, then we need to drop it
 		 * on the floor.
 		 */
-		WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
+		io_account_cq_overflow(ctx);
 		return false;
 	}
 	if (list_empty(&ctx->cq_overflow_list)) {

From d068b5068d43353a352b3ec92865f7045fdb213e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:11 +0100
Subject: [PATCH 1634/3292] io_uring: rename io_get_cqring

Rename io_get_cqring() into io_get_cqe() for consistency with SQ, and
just because the old name is not as clear.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a46a53e3f781de372f5632c184e61546b86515ce.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 623978fd2883..60230e7b75fa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -11,7 +11,7 @@
  * before writing the tail (using smp_load_acquire to read the tail will
  * do). It also needs a smp_mb() before updating CQ head (ordering the
  * entry load(s) with the head store), pairing with an implicit barrier
- * through a control-dependency in io_get_cqring (smp_store_release to
+ * through a control-dependency in io_get_cqe (smp_store_release to
  * store head will do). Failure to do so could lead to reading invalid
  * CQ entries.
  *
@@ -1368,7 +1368,7 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
 	return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
 }
 
-static inline struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
+static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
 	unsigned tail, mask = ctx->cq_entries - 1;
@@ -1440,7 +1440,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 	posted = false;
 	spin_lock_irqsave(&ctx->completion_lock, flags);
 	while (!list_empty(&ctx->cq_overflow_list)) {
-		struct io_uring_cqe *cqe = io_get_cqring(ctx);
+		struct io_uring_cqe *cqe = io_get_cqe(ctx);
 		struct io_overflow_cqe *ocqe;
 
 		if (!cqe && !force)
@@ -1562,7 +1562,7 @@ static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data
 	 * submission (by quite a lot). Increment the overflow count in
 	 * the ring.
 	 */
-	cqe = io_get_cqring(ctx);
+	cqe = io_get_cqe(ctx);
 	if (likely(cqe)) {
 		WRITE_ONCE(cqe->user_data, user_data);
 		WRITE_ONCE(cqe->res, res);

From d0acdee296d42e700c16271d9f95085a9c897a53 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 May 2021 22:58:12 +0100
Subject: [PATCH 1635/3292] io_uring: don't bounce submit_state cachelines

struct io_submit_state contains struct io_comp_state and so
locked_free_*, that renders cachelines around ->locked_free* being
invalidated on most non-inline completions, that may terrorise caches if
submissions and completions are done by different tasks.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/290cb5412b76892e8631978ee8ab9db0c6290dd5.1621201931.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 60230e7b75fa..c42a2f3b7259 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -298,11 +298,8 @@ struct io_sq_data {
 struct io_comp_state {
 	struct io_kiocb		*reqs[IO_COMPL_BATCH];
 	unsigned int		nr;
-	unsigned int		locked_free_nr;
 	/* inline/task_work completion list, under ->uring_lock */
 	struct list_head	free_list;
-	/* IRQ completion list, under ->completion_lock */
-	struct list_head	locked_free_list;
 };
 
 struct io_submit_link {
@@ -379,6 +376,9 @@ struct io_ring_ctx {
 	} ____cacheline_aligned_in_smp;
 
 	struct io_submit_state		submit_state;
+	/* IRQ completion list, under ->completion_lock */
+	struct list_head	locked_free_list;
+	unsigned int		locked_free_nr;
 
 	struct io_rings	*rings;
 
@@ -1189,7 +1189,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_llist_head(&ctx->rsrc_put_llist);
 	INIT_LIST_HEAD(&ctx->tctx_list);
 	INIT_LIST_HEAD(&ctx->submit_state.comp.free_list);
-	INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list);
+	INIT_LIST_HEAD(&ctx->locked_free_list);
 	return ctx;
 err:
 	kfree(ctx->dummy_ubuf);
@@ -1592,8 +1592,6 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 	 * free_list cache.
 	 */
 	if (req_ref_put_and_test(req)) {
-		struct io_comp_state *cs = &ctx->submit_state.comp;
-
 		if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
 			if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL))
 				io_disarm_next(req);
@@ -1604,8 +1602,8 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 		}
 		io_dismantle_req(req);
 		io_put_task(req->task, 1);
-		list_add(&req->compl.list, &cs->locked_free_list);
-		cs->locked_free_nr++;
+		list_add(&req->compl.list, &ctx->locked_free_list);
+		ctx->locked_free_nr++;
 	} else {
 		if (!percpu_ref_tryget(&ctx->refs))
 			req = NULL;
@@ -1660,8 +1658,8 @@ static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx,
 					struct io_comp_state *cs)
 {
 	spin_lock_irq(&ctx->completion_lock);
-	list_splice_init(&cs->locked_free_list, &cs->free_list);
-	cs->locked_free_nr = 0;
+	list_splice_init(&ctx->locked_free_list, &cs->free_list);
+	ctx->locked_free_nr = 0;
 	spin_unlock_irq(&ctx->completion_lock);
 }
 
@@ -1677,7 +1675,7 @@ static bool io_flush_cached_reqs(struct io_ring_ctx *ctx)
 	 * locked cache, grab the lock and move them over to our submission
 	 * side cache.
 	 */
-	if (READ_ONCE(cs->locked_free_nr) > IO_COMPL_BATCH)
+	if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH)
 		io_flush_cached_locked_reqs(ctx, cs);
 
 	nr = state->free_reqs;

From 40dad765c045ab6dbd481cc4f00d04953e77510c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 9 Jun 2021 15:26:54 +0100
Subject: [PATCH 1636/3292] io_uring: enable shmem/memfd memory registration

Relax buffer registration restictions, which filters out file backed
memory, and allow shmem/memfd as they have normal anonymous pages
underneath.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c42a2f3b7259..b93fa32172af 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8306,6 +8306,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 		for (i = 0; i < nr_pages; i++) {
 			struct vm_area_struct *vma = vmas[i];
 
+			if (vma_is_shmem(vma))
+				continue;
 			if (vma->vm_file &&
 			    !is_file_hugepages(vma->vm_file)) {
 				ret = -EOPNOTSUPP;

From 976517f162a05f4315b2373fd11585c395506259 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 9 Jun 2021 12:07:25 +0100
Subject: [PATCH 1637/3292] io_uring: fix blocking inline submission

There is a complaint against sys_io_uring_enter() blocking if it submits
stdin reads. The problem is in __io_file_supports_async(), which
sees that it's a cdev and allows it to be processed inline.

Punt char devices using generic rules of io_file_supports_async(),
including checking for presence of *_iter() versions of rw callbacks.
Apparently, it will affect most of cdevs with some exceptions like
null and zero devices.

Cc: stable@vger.kernel.org
Reported-by: Birk Hirdman <lonjil@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d60270856b8a4560a639ef5f76e55eb563633599.1623236455.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b93fa32172af..cdd9b53abbb2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2618,7 +2618,7 @@ static bool __io_file_supports_async(struct file *file, int rw)
 			return true;
 		return false;
 	}
-	if (S_ISCHR(mode) || S_ISSOCK(mode))
+	if (S_ISSOCK(mode))
 		return true;
 	if (S_ISREG(mode)) {
 		if (IS_ENABLED(CONFIG_BLOCK) &&

From c7f405d6fa36f778931881bfb1e12dd401d0bc62 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:12 +0100
Subject: [PATCH 1638/3292] io-wq: embed wqe ptr array into struct io_wq

io-wq keeps an array of pointers to struct io_wqe, allocate this array
as a part of struct io-wq, it's easier to code and saves an extra
indirection for nearly each io-wq call.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1482c6a001923bbed662dc38a8a580fb08b1ed8c.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index b3e8624a37d0..1ca98fc7d52b 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -102,7 +102,6 @@ struct io_wqe {
  * Per io_wq state
   */
 struct io_wq {
-	struct io_wqe **wqes;
 	unsigned long state;
 
 	free_work_fn *free_work;
@@ -118,6 +117,8 @@ struct io_wq {
 	struct hlist_node cpuhp_node;
 
 	struct task_struct *task;
+
+	struct io_wqe *wqes[];
 };
 
 static enum cpuhp_state io_wq_online;
@@ -907,17 +908,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 	if (WARN_ON_ONCE(!data->free_work || !data->do_work))
 		return ERR_PTR(-EINVAL);
 
-	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+	wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL);
 	if (!wq)
 		return ERR_PTR(-ENOMEM);
-
-	wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
-	if (!wq->wqes)
-		goto err_wq;
-
 	ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 	if (ret)
-		goto err_wqes;
+		goto err_wq;
 
 	refcount_inc(&data->hash->refs);
 	wq->hash = data->hash;
@@ -962,8 +958,6 @@ err:
 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 	for_each_node(node)
 		kfree(wq->wqes[node]);
-err_wqes:
-	kfree(wq->wqes);
 err_wq:
 	kfree(wq);
 	return ERR_PTR(ret);
@@ -1036,7 +1030,6 @@ static void io_wq_destroy(struct io_wq *wq)
 		kfree(wqe);
 	}
 	io_wq_put_hash(wq->hash);
-	kfree(wq->wqes);
 	kfree(wq);
 }
 

From 382cb030469db3d428ada09e7925f684ba9d61cf Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:13 +0100
Subject: [PATCH 1639/3292] io-wq: remove unused io-wq refcounting

iowq->refs is initialised to one and killed on exit, so it's not used
and we can kill it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/401007393528ea7c102360e69a29b64498e15db2.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 1ca98fc7d52b..f058ea0bcae8 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -109,8 +109,6 @@ struct io_wq {
 
 	struct io_wq_hash *hash;
 
-	refcount_t refs;
-
 	atomic_t worker_refs;
 	struct completion worker_done;
 
@@ -949,7 +947,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 	}
 
 	wq->task = get_task_struct(data->task);
-	refcount_set(&wq->refs, 1);
 	atomic_set(&wq->worker_refs, 1);
 	init_completion(&wq->worker_done);
 	return wq;
@@ -1038,8 +1035,7 @@ void io_wq_put_and_exit(struct io_wq *wq)
 	WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
 
 	io_wq_exit_workers(wq);
-	if (refcount_dec_and_test(&wq->refs))
-		io_wq_destroy(wq);
+	io_wq_destroy(wq);
 }
 
 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)

From cb3d8972c78ab0cdb55a30d6db927a3e0442b3f9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:14 +0100
Subject: [PATCH 1640/3292] io_uring: refactor io_iopoll_req_issued

A simple refactoring of io_iopoll_req_issued(), move in_async inside so
we don't pass it around and save on double checking it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1513bfde4f0c835be25ac69a82737ab0668d7665.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cdd9b53abbb2..6c0b3f91e1ad 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2525,9 +2525,14 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
  * find it from a io_do_iopoll() thread before the issuer is done
  * accessing the kiocb cookie.
  */
-static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
+static void io_iopoll_req_issued(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	const bool in_async = io_wq_current_is_worker();
+
+	/* workqueue context doesn't hold uring_lock, grab it now */
+	if (unlikely(in_async))
+		mutex_lock(&ctx->uring_lock);
 
 	/*
 	 * Track whether we have multiple files in our lists. This will impact
@@ -2554,14 +2559,19 @@ static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
 	else
 		list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
 
-	/*
-	 * If IORING_SETUP_SQPOLL is enabled, sqes are either handled in sq thread
-	 * task context or in io worker task context. If current task context is
-	 * sq thread, we don't need to check whether should wake up sq thread.
-	 */
-	if (in_async && (ctx->flags & IORING_SETUP_SQPOLL) &&
-	    wq_has_sleeper(&ctx->sq_data->wait))
-		wake_up(&ctx->sq_data->wait);
+	if (unlikely(in_async)) {
+		/*
+		 * If IORING_SETUP_SQPOLL is enabled, sqes are either handle
+		 * in sq thread task context or in io worker task context. If
+		 * current task context is sq thread, we don't need to check
+		 * whether should wake up sq thread.
+		 */
+		if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+		    wq_has_sleeper(&ctx->sq_data->wait))
+			wake_up(&ctx->sq_data->wait);
+
+		mutex_unlock(&ctx->uring_lock);
+	}
 }
 
 static inline void io_state_file_put(struct io_submit_state *state)
@@ -6215,23 +6225,11 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (creds)
 		revert_creds(creds);
-
 	if (ret)
 		return ret;
-
 	/* If the op doesn't have a file, we're not polling for it */
-	if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) {
-		const bool in_async = io_wq_current_is_worker();
-
-		/* workqueue context doesn't hold uring_lock, grab it now */
-		if (in_async)
-			mutex_lock(&ctx->uring_lock);
-
-		io_iopoll_req_issued(req, in_async);
-
-		if (in_async)
-			mutex_unlock(&ctx->uring_lock);
-	}
+	if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file)
+		io_iopoll_req_issued(req);
 
 	return 0;
 }

From eef51daa72f745b6e771d18f6f37c7e5cd4ccdf1 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:15 +0100
Subject: [PATCH 1641/3292] io_uring: rename function *task_file

What at some moment was references to struct file used to control
lifetimes of task/ctx is now just internal tctx structures/nodes,
so rename outdated *task_file() routines into something more sensible.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e2fbce42932154c2631ce58ffbffaa232afe18d5.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6c0b3f91e1ad..41483ed8b8aa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1029,7 +1029,7 @@ static const struct io_op_def io_op_defs[] = {
 };
 
 static bool io_disarm_next(struct io_kiocb *req);
-static void io_uring_del_task_file(unsigned long index);
+static void io_uring_del_tctx_node(unsigned long index);
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
 					 bool cancel_all);
@@ -8714,7 +8714,7 @@ static void io_tctx_exit_cb(struct callback_head *cb)
 	 * node. It'll be removed by the end of cancellation, just ignore it.
 	 */
 	if (!atomic_read(&tctx->in_idle))
-		io_uring_del_task_file((unsigned long)work->ctx);
+		io_uring_del_tctx_node((unsigned long)work->ctx);
 	complete(&work->completion);
 }
 
@@ -8967,7 +8967,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	}
 }
 
-static int __io_uring_add_task_file(struct io_ring_ctx *ctx)
+static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
 {
 	struct io_uring_task *tctx = current->io_uring;
 	struct io_tctx_node *node;
@@ -9004,19 +9004,19 @@ static int __io_uring_add_task_file(struct io_ring_ctx *ctx)
 /*
  * Note that this task has used io_uring. We use it for cancelation purposes.
  */
-static inline int io_uring_add_task_file(struct io_ring_ctx *ctx)
+static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx)
 {
 	struct io_uring_task *tctx = current->io_uring;
 
 	if (likely(tctx && tctx->last == ctx))
 		return 0;
-	return __io_uring_add_task_file(ctx);
+	return __io_uring_add_tctx_node(ctx);
 }
 
 /*
  * Remove this io_uring_file -> task mapping.
  */
-static void io_uring_del_task_file(unsigned long index)
+static void io_uring_del_tctx_node(unsigned long index)
 {
 	struct io_uring_task *tctx = current->io_uring;
 	struct io_tctx_node *node;
@@ -9046,7 +9046,7 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
 	unsigned long index;
 
 	xa_for_each(&tctx->xa, index, node)
-		io_uring_del_task_file(index);
+		io_uring_del_tctx_node(index);
 	if (wq) {
 		/*
 		 * Must be after io_uring_del_task_file() (removes nodes under
@@ -9330,7 +9330,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		}
 		submitted = to_submit;
 	} else if (to_submit) {
-		ret = io_uring_add_task_file(ctx);
+		ret = io_uring_add_tctx_node(ctx);
 		if (unlikely(ret))
 			goto out;
 		mutex_lock(&ctx->uring_lock);
@@ -9540,7 +9540,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
 	if (fd < 0)
 		return fd;
 
-	ret = io_uring_add_task_file(ctx);
+	ret = io_uring_add_tctx_node(ctx);
 	if (ret) {
 		put_unused_fd(fd);
 		return ret;

From 769e683715211ad3cbed5908a86b97dd54d60970 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:16 +0100
Subject: [PATCH 1642/3292] io-wq: don't repeat IO_WQ_BIT_EXIT check by worker

io_wqe_worker()'s main loop does check IO_WQ_BIT_EXIT flag, so no need
for a second test_bit at the end as it will immediately jump to the
first check afterwards.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d6af4a51c86523a527fb5417c9fbc775c4b26497.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index f058ea0bcae8..8c13e23d4a8a 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -559,8 +559,7 @@ loop:
 		if (ret)
 			continue;
 		/* timed out, exit unless we're the fixed worker */
-		if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
-		    !(worker->flags & IO_WORKER_F_FIXED))
+		if (!(worker->flags & IO_WORKER_F_FIXED))
 			break;
 	}
 

From e587227b680f798dd74644d047dd52ddb36cb82c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:17 +0100
Subject: [PATCH 1643/3292] io-wq: simplify worker exiting

io_worker_handle_work() already takes care of the empty list case and
releases spinlock, so get rid of ugly conditional unlocking and
unconditionally call handle_work()

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7521e485677f381036676943e876a0afecc23017.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 8c13e23d4a8a..2c37776c0280 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -565,10 +565,7 @@ loop:
 
 	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
 		raw_spin_lock_irq(&wqe->lock);
-		if (!wq_list_empty(&wqe->work_list))
-			io_worker_handle_work(worker);
-		else
-			raw_spin_unlock_irq(&wqe->lock);
+		io_worker_handle_work(worker);
 	}
 
 	io_worker_exit(worker);

From d878c81610e187becff1454f36b63c59ec165566 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:18 +0100
Subject: [PATCH 1644/3292] io_uring: hide rsrc tag copy into generic helpers

Make io_rsrc_data_alloc() taking care of rsrc tags loading on
registration, so we don't need to repeat it for each new rsrc type.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/5609680697bd09735de10561b75edb95283459da.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 55 +++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 41483ed8b8aa..6fda9fdee582 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7161,27 +7161,38 @@ static void io_rsrc_data_free(struct io_rsrc_data *data)
 	kfree(data);
 }
 
-static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
-					       rsrc_put_fn *do_put,
-					       unsigned nr)
+static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
+			      u64 __user *utags, unsigned nr,
+			      struct io_rsrc_data **pdata)
 {
 	struct io_rsrc_data *data;
+	unsigned i;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
-		return NULL;
+		return -ENOMEM;
 
 	data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL);
 	if (!data->tags) {
 		kfree(data);
-		return NULL;
+		return -ENOMEM;
+	}
+	if (utags) {
+		for (i = 0; i < nr; i++) {
+			if (copy_from_user(&data->tags[i], &utags[i],
+					   sizeof(data->tags[i]))) {
+				io_rsrc_data_free(data);
+				return -EFAULT;
+			}
+		}
 	}
 
 	atomic_set(&data->refs, 1);
 	data->ctx = ctx;
 	data->do_put = do_put;
 	init_completion(&data->done);
-	return data;
+	*pdata = data;
+	return 0;
 }
 
 static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -7633,7 +7644,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	struct file *file;
 	int fd, ret;
 	unsigned i;
-	struct io_rsrc_data *file_data;
 
 	if (ctx->file_data)
 		return -EBUSY;
@@ -7644,27 +7654,24 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	ret = io_rsrc_node_switch_start(ctx);
 	if (ret)
 		return ret;
+	ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args,
+				 &ctx->file_data);
+	if (ret)
+		return ret;
 
-	file_data = io_rsrc_data_alloc(ctx, io_rsrc_file_put, nr_args);
-	if (!file_data)
-		return -ENOMEM;
-	ctx->file_data = file_data;
 	ret = -ENOMEM;
 	if (!io_alloc_file_tables(&ctx->file_table, nr_args))
 		goto out_free;
 
 	for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
-		u64 tag = 0;
-
-		if ((tags && copy_from_user(&tag, &tags[i], sizeof(tag))) ||
-		    copy_from_user(&fd, &fds[i], sizeof(fd))) {
+		if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
 			ret = -EFAULT;
 			goto out_fput;
 		}
 		/* allow sparse sets */
 		if (fd == -1) {
 			ret = -EINVAL;
-			if (unlikely(tag))
+			if (unlikely(ctx->file_data->tags[i]))
 				goto out_fput;
 			continue;
 		}
@@ -7685,7 +7692,6 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 			fput(file);
 			goto out_fput;
 		}
-		ctx->file_data->tags[i] = tag;
 		io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file);
 	}
 
@@ -8403,9 +8409,9 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 	ret = io_rsrc_node_switch_start(ctx);
 	if (ret)
 		return ret;
-	data = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, nr_args);
-	if (!data)
-		return -ENOMEM;
+	ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data);
+	if (ret)
+		return ret;
 	ret = io_buffers_map_alloc(ctx, nr_args);
 	if (ret) {
 		io_rsrc_data_free(data);
@@ -8413,19 +8419,13 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 	}
 
 	for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
-		u64 tag = 0;
-
-		if (tags && copy_from_user(&tag, &tags[i], sizeof(tag))) {
-			ret = -EFAULT;
-			break;
-		}
 		ret = io_copy_iov(ctx, &iov, arg, i);
 		if (ret)
 			break;
 		ret = io_buffer_validate(&iov);
 		if (ret)
 			break;
-		if (!iov.iov_base && tag) {
+		if (!iov.iov_base && data->tags[i]) {
 			ret = -EINVAL;
 			break;
 		}
@@ -8434,7 +8434,6 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 					     &last_hpage);
 		if (ret)
 			break;
-		data->tags[i] = tag;
 	}
 
 	WARN_ON_ONCE(ctx->buf_data);

From 157d257f99c15c43668a98f804e3e3e6eb956464 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:19 +0100
Subject: [PATCH 1645/3292] io_uring: remove rsrc put work irq save/restore

io_rsrc_put_work() is executed by workqueue in non-irq context, so no
need for irqsave/restore variants of spinlocking.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/2a7f77220735f4ad404ac885b4d73bdf42d2f836.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6fda9fdee582..55e449d84b3e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7555,14 +7555,13 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
 
 		if (prsrc->tag) {
 			bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL;
-			unsigned long flags;
 
 			io_ring_submit_lock(ctx, lock_ring);
-			spin_lock_irqsave(&ctx->completion_lock, flags);
+			spin_lock_irq(&ctx->completion_lock);
 			io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
 			ctx->cq_extra++;
 			io_commit_cqring(ctx);
-			spin_unlock_irqrestore(&ctx->completion_lock, flags);
+			spin_unlock_irq(&ctx->completion_lock);
 			io_cqring_ev_posted(ctx);
 			io_ring_submit_unlock(ctx, lock_ring);
 		}

From 9123c8ffce1610323ec9c0874fa0262353f41fc3 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:20 +0100
Subject: [PATCH 1646/3292] io_uring: add helpers for 2 level table alloc

Some parts like fixed file table use 2 level tables, factor out helpers
for allocating/deallocating them as more users are to come.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1709212359cd82eb416d395f86fc78431ccfc0aa.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 73 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 43 insertions(+), 30 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 55e449d84b3e..0fbf1946ac42 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7059,14 +7059,36 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 }
 
-static void io_free_file_tables(struct io_file_table *table, unsigned nr_files)
+static void io_free_page_table(void **table, size_t size)
 {
-	unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
+	unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
 
 	for (i = 0; i < nr_tables; i++)
-		kfree(table->files[i]);
-	kfree(table->files);
-	table->files = NULL;
+		kfree(table[i]);
+	kfree(table);
+}
+
+static void **io_alloc_page_table(size_t size)
+{
+	unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
+	size_t init_size = size;
+	void **table;
+
+	table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	for (i = 0; i < nr_tables; i++) {
+		unsigned int this_size = min(size, PAGE_SIZE);
+
+		table[i] = kzalloc(this_size, GFP_KERNEL);
+		if (!table[i]) {
+			io_free_page_table(table, init_size);
+			return NULL;
+		}
+		size -= this_size;
+	}
+	return table;
 }
 
 static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
@@ -7195,6 +7217,22 @@ static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
 	return 0;
 }
 
+static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
+{
+	size_t size = nr_files * sizeof(struct io_fixed_file);
+
+	table->files = (struct io_fixed_file **)io_alloc_page_table(size);
+	return !!table->files;
+}
+
+static void io_free_file_tables(struct io_file_table *table, unsigned nr_files)
+{
+	size_t size = nr_files * sizeof(struct io_fixed_file);
+
+	io_free_page_table((void **)table->files, size);
+	table->files = NULL;
+}
+
 static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 #if defined(CONFIG_UNIX)
@@ -7456,31 +7494,6 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
 }
 #endif
 
-static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
-{
-	unsigned i, nr_tables = DIV_ROUND_UP(nr_files, IORING_MAX_FILES_TABLE);
-
-	table->files = kcalloc(nr_tables, sizeof(*table->files), GFP_KERNEL);
-	if (!table->files)
-		return false;
-
-	for (i = 0; i < nr_tables; i++) {
-		unsigned int this_files = min(nr_files, IORING_MAX_FILES_TABLE);
-
-		table->files[i] = kcalloc(this_files, sizeof(*table->files[i]),
-					GFP_KERNEL);
-		if (!table->files[i])
-			break;
-		nr_files -= this_files;
-	}
-
-	if (i == nr_tables)
-		return true;
-
-	io_free_file_tables(table, nr_tables * IORING_MAX_FILES_TABLE);
-	return false;
-}
-
 static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
 {
 	struct file *file = prsrc->file;

From 2d091d62b1106e90f195599c67bf385ddedfc915 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:21 +0100
Subject: [PATCH 1647/3292] io_uring: don't vmalloc rsrc tags

We don't really need vmalloc for keeping tags, it's not a hot path and
is there out of convenience, so replace it with two level tables to not
litter kernel virtual memory mappings.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/241a3422747113a8909e7e1030eb585d4a349e0d.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 52 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0fbf1946ac42..bc4d03b15aa6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -100,6 +100,10 @@
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
 				 IORING_REGISTER_LAST + IORING_OP_LAST)
 
+#define IO_RSRC_TAG_TABLE_SHIFT	9
+#define IO_RSRC_TAG_TABLE_MAX	(1U << IO_RSRC_TAG_TABLE_SHIFT)
+#define IO_RSRC_TAG_TABLE_MASK	(IO_RSRC_TAG_TABLE_MAX - 1)
+
 #define IORING_MAX_REG_BUFFERS	(1U << 14)
 
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
@@ -243,7 +247,8 @@ typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
 struct io_rsrc_data {
 	struct io_ring_ctx		*ctx;
 
-	u64				*tags;
+	u64				**tags;
+	unsigned int			nr;
 	rsrc_put_fn			*do_put;
 	atomic_t			refs;
 	struct completion		done;
@@ -7177,9 +7182,20 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct
 	return ret;
 }
 
+static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx)
+{
+	unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK;
+	unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT;
+
+	return &data->tags[table_idx][off];
+}
+
 static void io_rsrc_data_free(struct io_rsrc_data *data)
 {
-	kvfree(data->tags);
+	size_t size = data->nr * sizeof(data->tags[0][0]);
+
+	if (data->tags)
+		io_free_page_table((void **)data->tags, size);
 	kfree(data);
 }
 
@@ -7188,33 +7204,37 @@ static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
 			      struct io_rsrc_data **pdata)
 {
 	struct io_rsrc_data *data;
+	int ret = -ENOMEM;
 	unsigned i;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
-
-	data->tags = kvcalloc(nr, sizeof(*data->tags), GFP_KERNEL);
+	data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0]));
 	if (!data->tags) {
 		kfree(data);
 		return -ENOMEM;
 	}
+
+	data->nr = nr;
+	data->ctx = ctx;
+	data->do_put = do_put;
 	if (utags) {
+		ret = -EFAULT;
 		for (i = 0; i < nr; i++) {
-			if (copy_from_user(&data->tags[i], &utags[i],
-					   sizeof(data->tags[i]))) {
-				io_rsrc_data_free(data);
-				return -EFAULT;
-			}
+			if (copy_from_user(io_get_tag_slot(data, i), &utags[i],
+					   sizeof(data->tags[i])))
+				goto fail;
 		}
 	}
 
 	atomic_set(&data->refs, 1);
-	data->ctx = ctx;
-	data->do_put = do_put;
 	init_completion(&data->done);
 	*pdata = data;
 	return 0;
+fail:
+	io_rsrc_data_free(data);
+	return ret;
 }
 
 static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files)
@@ -7683,7 +7703,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 		/* allow sparse sets */
 		if (fd == -1) {
 			ret = -EINVAL;
-			if (unlikely(ctx->file_data->tags[i]))
+			if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
 				goto out_fput;
 			continue;
 		}
@@ -7781,7 +7801,7 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
 	if (!prsrc)
 		return -ENOMEM;
 
-	prsrc->tag = data->tags[idx];
+	prsrc->tag = *io_get_tag_slot(data, idx);
 	prsrc->rsrc = rsrc;
 	list_add(&prsrc->list, &node->rsrc_list);
 	return 0;
@@ -7851,7 +7871,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 				err = -EBADF;
 				break;
 			}
-			data->tags[up->offset + done] = tag;
+			*io_get_tag_slot(data, up->offset + done) = tag;
 			io_fixed_file_set(file_slot, file);
 			err = io_sqe_file_register(ctx, file, i);
 			if (err) {
@@ -8437,7 +8457,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 		ret = io_buffer_validate(&iov);
 		if (ret)
 			break;
-		if (!iov.iov_base && data->tags[i]) {
+		if (!iov.iov_base && *io_get_tag_slot(data, i)) {
 			ret = -EINVAL;
 			break;
 		}
@@ -8510,7 +8530,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
 		}
 
 		ctx->user_bufs[i] = imu;
-		ctx->buf_data->tags[offset] = tag;
+		*io_get_tag_slot(ctx->buf_data, offset) = tag;
 	}
 
 	if (needs_switch)

From 09899b19155a152f3ff4eb5c203232175d630fbc Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:22 +0100
Subject: [PATCH 1648/3292] io_uring: cache task struct refs

tctx in submission part is always synchronised because is executed from
the task's context, so we can batch allocate tctx/task references and
store them across syscall boundaries. It avoids enough of operations,
including an atomic for getting task ref and a percpu_counter_add()
function call, which still fallback to spinlock for large batching
cases (around >=32). Should be good for SQPOLL submitting in small
portions and coming at some moment bpf submissions.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/14b327b973410a3eec1f702ecf650e100513aca9.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bc4d03b15aa6..0ca0282e96b0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -110,6 +110,8 @@
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
 				IOSQE_BUFFER_SELECT)
 
+#define IO_TCTX_REFS_CACHE_NR	(1U << 10)
+
 struct io_uring {
 	u32 head ____cacheline_aligned_in_smp;
 	u32 tail ____cacheline_aligned_in_smp;
@@ -472,6 +474,7 @@ struct io_ring_ctx {
 
 struct io_uring_task {
 	/* submission side */
+	int			cached_refs;
 	struct xarray		xa;
 	struct wait_queue_head	wait;
 	const struct io_ring_ctx *last;
@@ -6707,16 +6710,23 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
 
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 {
+	struct io_uring_task *tctx;
 	int submitted = 0;
 
 	/* make sure SQ entry isn't read before tail */
 	nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
-
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	percpu_counter_add(&current->io_uring->inflight, nr);
-	refcount_add(nr, &current->usage);
+	tctx = current->io_uring;
+	tctx->cached_refs -= nr;
+	if (unlikely(tctx->cached_refs < 0)) {
+		unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR;
+
+		percpu_counter_add(&tctx->inflight, refill);
+		refcount_add(refill, &current->usage);
+		tctx->cached_refs += refill;
+	}
 	io_submit_state_start(&ctx->submit_state, nr);
 
 	while (submitted < nr) {
@@ -6742,12 +6752,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 
 	if (unlikely(submitted != nr)) {
 		int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
-		struct io_uring_task *tctx = current->io_uring;
 		int unused = nr - ref_used;
 
+		current->io_uring->cached_refs += unused;
 		percpu_ref_put_many(&ctx->refs, unused);
-		percpu_counter_sub(&tctx->inflight, unused);
-		put_task_struct_many(current, unused);
 	}
 
 	io_submit_state_end(&ctx->submit_state, ctx);
@@ -7929,7 +7937,7 @@ static int io_uring_alloc_task_context(struct task_struct *task,
 	struct io_uring_task *tctx;
 	int ret;
 
-	tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
+	tctx = kzalloc(sizeof(*tctx), GFP_KERNEL);
 	if (unlikely(!tctx))
 		return -ENOMEM;
 
@@ -7949,13 +7957,11 @@ static int io_uring_alloc_task_context(struct task_struct *task,
 
 	xa_init(&tctx->xa);
 	init_waitqueue_head(&tctx->wait);
-	tctx->last = NULL;
 	atomic_set(&tctx->in_idle, 0);
 	atomic_set(&tctx->inflight_tracked, 0);
 	task->io_uring = tctx;
 	spin_lock_init(&tctx->task_lock);
 	INIT_WQ_LIST(&tctx->task_list);
-	tctx->task_state = 0;
 	init_task_work(&tctx->task_work, tctx_task_work);
 	return 0;
 }
@@ -7966,6 +7972,7 @@ void __io_uring_free(struct task_struct *tsk)
 
 	WARN_ON_ONCE(!xa_empty(&tctx->xa));
 	WARN_ON_ONCE(tctx->io_wq);
+	WARN_ON_ONCE(tctx->cached_refs);
 
 	percpu_counter_destroy(&tctx->inflight);
 	kfree(tctx);
@@ -9110,6 +9117,16 @@ static void io_uring_try_cancel(bool cancel_all)
 	}
 }
 
+static void io_uring_drop_tctx_refs(struct task_struct *task)
+{
+	struct io_uring_task *tctx = task->io_uring;
+	unsigned int refs = tctx->cached_refs;
+
+	tctx->cached_refs = 0;
+	percpu_counter_sub(&tctx->inflight, refs);
+	put_task_struct_many(task, refs);
+}
+
 /* should only be called by SQPOLL task */
 static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
 {
@@ -9125,6 +9142,7 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
 
 	WARN_ON_ONCE(!sqd || sqd->thread != current);
 
+	io_uring_drop_tctx_refs(current);
 	atomic_inc(&tctx->in_idle);
 	do {
 		/* read completions before cancelations */
@@ -9162,6 +9180,7 @@ void __io_uring_cancel(struct files_struct *files)
 		io_wq_exit_start(tctx->io_wq);
 
 	/* make sure overflow events are dropped */
+	io_uring_drop_tctx_refs(current);
 	atomic_inc(&tctx->in_idle);
 	do {
 		/* read completions before cancelations */

From 78cc687be9c5420d743346f78bb8af9d59a903f9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:23 +0100
Subject: [PATCH 1649/3292] io_uring: unify SQPOLL and user task cancellations

Merge io_uring_cancel_sqpoll() and __io_uring_cancel() as it's easier to
have a conditional ctx traverse inside than keeping them in sync.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/adfe24d6dad4a3883a40eee54352b8b65ac851bb.1623634181.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 93 ++++++++++++++++++---------------------------------
 1 file changed, 32 insertions(+), 61 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0ca0282e96b0..b49dc2d74e84 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1041,7 +1041,7 @@ static void io_uring_del_tctx_node(unsigned long index);
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
 					 bool cancel_all);
-static void io_uring_cancel_sqpoll(struct io_sq_data *sqd);
+static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
 static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
 
 static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
@@ -6926,7 +6926,7 @@ static int io_sq_thread(void *data)
 		timeout = jiffies + sqd->sq_thread_idle;
 	}
 
-	io_uring_cancel_sqpoll(sqd);
+	io_uring_cancel_generic(true, sqd);
 	sqd->thread = NULL;
 	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 		io_ring_set_wakeup_flag(ctx);
@@ -9102,21 +9102,6 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
 	return percpu_counter_sum(&tctx->inflight);
 }
 
-static void io_uring_try_cancel(bool cancel_all)
-{
-	struct io_uring_task *tctx = current->io_uring;
-	struct io_tctx_node *node;
-	unsigned long index;
-
-	xa_for_each(&tctx->xa, index, node) {
-		struct io_ring_ctx *ctx = node->ctx;
-
-		/* sqpoll task will cancel all its requests */
-		if (!ctx->sq_data)
-			io_uring_try_cancel_requests(ctx, current, cancel_all);
-	}
-}
-
 static void io_uring_drop_tctx_refs(struct task_struct *task)
 {
 	struct io_uring_task *tctx = task->io_uring;
@@ -9127,59 +9112,24 @@ static void io_uring_drop_tctx_refs(struct task_struct *task)
 	put_task_struct_many(task, refs);
 }
 
-/* should only be called by SQPOLL task */
-static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
+/*
+ * Find any io_uring ctx that this task has registered or done IO on, and cancel
+ * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation.
+ */
+static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 {
 	struct io_uring_task *tctx = current->io_uring;
 	struct io_ring_ctx *ctx;
 	s64 inflight;
 	DEFINE_WAIT(wait);
 
+	WARN_ON_ONCE(sqd && sqd->thread != current);
+
 	if (!current->io_uring)
 		return;
 	if (tctx->io_wq)
 		io_wq_exit_start(tctx->io_wq);
 
-	WARN_ON_ONCE(!sqd || sqd->thread != current);
-
-	io_uring_drop_tctx_refs(current);
-	atomic_inc(&tctx->in_idle);
-	do {
-		/* read completions before cancelations */
-		inflight = tctx_inflight(tctx, false);
-		if (!inflight)
-			break;
-		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
-			io_uring_try_cancel_requests(ctx, current, true);
-
-		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
-		/*
-		 * If we've seen completions, retry without waiting. This
-		 * avoids a race where a completion comes in before we did
-		 * prepare_to_wait().
-		 */
-		if (inflight == tctx_inflight(tctx, false))
-			schedule();
-		finish_wait(&tctx->wait, &wait);
-	} while (1);
-	atomic_dec(&tctx->in_idle);
-}
-
-/*
- * Find any io_uring fd that this task has registered or done IO on, and cancel
- * requests.
- */
-void __io_uring_cancel(struct files_struct *files)
-{
-	struct io_uring_task *tctx = current->io_uring;
-	DEFINE_WAIT(wait);
-	s64 inflight;
-	bool cancel_all = !files;
-
-	if (tctx->io_wq)
-		io_wq_exit_start(tctx->io_wq);
-
-	/* make sure overflow events are dropped */
 	io_uring_drop_tctx_refs(current);
 	atomic_inc(&tctx->in_idle);
 	do {
@@ -9187,9 +9137,25 @@ void __io_uring_cancel(struct files_struct *files)
 		inflight = tctx_inflight(tctx, !cancel_all);
 		if (!inflight)
 			break;
-		io_uring_try_cancel(cancel_all);
-		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 
+		if (!sqd) {
+			struct io_tctx_node *node;
+			unsigned long index;
+
+			xa_for_each(&tctx->xa, index, node) {
+				/* sqpoll task will cancel all its requests */
+				if (node->ctx->sq_data)
+					continue;
+				io_uring_try_cancel_requests(node->ctx, current,
+							     cancel_all);
+			}
+		} else {
+			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+				io_uring_try_cancel_requests(ctx, current,
+							     cancel_all);
+		}
+
+		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 		/*
 		 * If we've seen completions, retry without waiting. This
 		 * avoids a race where a completion comes in before we did
@@ -9208,6 +9174,11 @@ void __io_uring_cancel(struct files_struct *files)
 	}
 }
 
+void __io_uring_cancel(struct files_struct *files)
+{
+	io_uring_cancel_generic(!files, NULL);
+}
+
 static void *io_uring_validate_mmap_request(struct file *file,
 					    loff_t pgoff, size_t sz)
 {

From aeab9506ef50d23b350d1822c324023c9e1cb783 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 02:36:24 +0100
Subject: [PATCH 1650/3292] io_uring: inline io_iter_do_read()

There are only two calls in source code of io_iter_do_read(), the
function is small and pretty hot though is failed to get inlined.
Makr it as inline.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/25a26dae7660da73fbc2244b361b397ef43d3caf.1623634182.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b49dc2d74e84..d665c9419ad3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3253,7 +3253,7 @@ static bool io_rw_should_retry(struct io_kiocb *req)
 	return true;
 }
 
-static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
+static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
 {
 	if (req->file->f_op->read_iter)
 		return call_read_iter(req->file, &req->rw.kiocb, iter);

From d07f3b081ee632268786601f55e1334d1f68b997 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Jun 2021 18:13:27 +0200
Subject: [PATCH 1651/3292] mark pstore-blk as broken

pstore-blk just pokes directly into the pagecache for the block
device without going through the file operations for that by faking
up it's own file operations that do not match the block device ones.

As this breaks the control of the block layer of it's page cache,
and even now just works by accident only the best thing is to just
disable this driver.

Fixes: 17639f67c1d6 ("pstore/blk: Introduce backend for block devices")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210608161327.1537919-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/pstore/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 8adabde685f1..328da35da390 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -173,6 +173,7 @@ config PSTORE_BLK
 	tristate "Log panic/oops to a block device"
 	depends on PSTORE
 	depends on BLOCK
+	depends on BROKEN
 	select PSTORE_ZONE
 	default n
 	help

From ba4e967379f0a2c2a72fb572c69761218a2468a6 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:29 +0100
Subject: [PATCH 1652/3292] wcn36xx: Return result of set_power_params in
 suspend

wcn36xx_smd_set_power_params() can return an error. For the purposes of
entering into suspend we need the suspend() function to trap and report
errors up the stack.

First step in this process is reporting the existing result code for
wcn36xx_smd_set_power_params().

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-2-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index afb4877eaad8..b361e40697a6 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1091,12 +1091,14 @@ static int wcn36xx_sta_remove(struct ieee80211_hw *hw,
 static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 {
 	struct wcn36xx *wcn = hw->priv;
+	int ret;
 
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac suspend\n");
 
 	flush_workqueue(wcn->hal_ind_wq);
-	wcn36xx_smd_set_power_params(wcn, true);
-	return 0;
+	ret = wcn36xx_smd_set_power_params(wcn, true);
+
+	return ret;
 }
 
 static int wcn36xx_resume(struct ieee80211_hw *hw)

From f2f49601067bd7b7f3392a50a3738335fe9d0cb8 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:30 +0100
Subject: [PATCH 1653/3292] wcn36xx: Run suspend for the first ieee80211_vif

A subsequent set of patches will extend out suspend/resume support in this
driver, we cannot set the firmware up for multiple ipv4/ipv6 addresses and
as such we can't iterate through a list of ieee80211_vif.

Constrain the interaction with the firmware to the first ieee80211_vif on
the suspend/resume/wowlan path.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-3-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c | 31 ++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index b361e40697a6..9a97a288a96f 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1088,15 +1088,34 @@ static int wcn36xx_sta_remove(struct ieee80211_hw *hw,
 
 #ifdef CONFIG_PM
 
+static struct ieee80211_vif *wcn36xx_get_first_assoc_vif(struct wcn36xx *wcn)
+{
+	struct wcn36xx_vif *vif_priv = NULL;
+	struct ieee80211_vif *vif = NULL;
+
+	list_for_each_entry(vif_priv, &wcn->vif_list, list) {
+		if (vif_priv->sta_assoc) {
+			vif = wcn36xx_priv_to_vif(vif_priv);
+			break;
+		}
+	}
+	return vif;
+}
+
 static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 {
 	struct wcn36xx *wcn = hw->priv;
-	int ret;
+	struct ieee80211_vif *vif = NULL;
+	int ret = 0;
 
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac suspend\n");
 
 	flush_workqueue(wcn->hal_ind_wq);
-	ret = wcn36xx_smd_set_power_params(wcn, true);
+	mutex_lock(&wcn->conf_mutex);
+	vif = wcn36xx_get_first_assoc_vif(wcn);
+	if (vif)
+		ret = wcn36xx_smd_set_power_params(wcn, true);
+	mutex_unlock(&wcn->conf_mutex);
 
 	return ret;
 }
@@ -1104,11 +1123,17 @@ static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 static int wcn36xx_resume(struct ieee80211_hw *hw)
 {
 	struct wcn36xx *wcn = hw->priv;
+	struct ieee80211_vif *vif = NULL;
 
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac resume\n");
 
 	flush_workqueue(wcn->hal_ind_wq);
-	wcn36xx_smd_set_power_params(wcn, false);
+	mutex_lock(&wcn->conf_mutex);
+	vif = wcn36xx_get_first_assoc_vif(wcn);
+	if (vif)
+		wcn36xx_smd_set_power_params(wcn, false);
+	mutex_unlock(&wcn->conf_mutex);
+
 	return 0;
 }
 

From 5478c41fce5dd6b751075856666a0f9a101c3dd2 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:31 +0100
Subject: [PATCH 1654/3292] wcn36xx: Add ipv4 ARP offload support in suspend

Add ARP offload support. Firmware is capable of responding to ARP requests
for a single ipv4 address only.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-4-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/hal.h  | 10 +++----
 drivers/net/wireless/ath/wcn36xx/main.c | 17 +++++++----
 drivers/net/wireless/ath/wcn36xx/smd.c  | 38 +++++++++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/smd.h  |  4 +++
 4 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
index 65ef893f2736..b56c8292fa62 100644
--- a/drivers/net/wireless/ath/wcn36xx/hal.h
+++ b/drivers/net/wireless/ath/wcn36xx/hal.h
@@ -3465,7 +3465,7 @@ struct wcn36xx_hal_rem_bcn_filter_req {
 #define WCN36XX_HAL_OFFLOAD_ENABLE                          1
 #define WCN36XX_HAL_OFFLOAD_BCAST_FILTER_ENABLE             0x2
 #define WCN36XX_HAL_OFFLOAD_ARP_AND_BCAST_FILTER_ENABLE	\
-	(HAL_OFFLOAD_ENABLE|HAL_OFFLOAD_BCAST_FILTER_ENABLE)
+	(WCN36XX_HAL_OFFLOAD_ENABLE | WCN36XX_HAL_OFFLOAD_BCAST_FILTER_ENABLE)
 
 struct wcn36xx_hal_ns_offload_params {
 	u8 src_ipv6_addr[WCN36XX_HAL_IPV6_ADDR_LEN];
@@ -3487,10 +3487,10 @@ struct wcn36xx_hal_ns_offload_params {
 	/* slot index for this offload */
 	u32 slot_index;
 	u8 bss_index;
-};
+} __packed;
 
 struct wcn36xx_hal_host_offload_req {
-	u8 offload_Type;
+	u8 offload_type;
 
 	/* enable or disable */
 	u8 enable;
@@ -3499,13 +3499,13 @@ struct wcn36xx_hal_host_offload_req {
 		u8 host_ipv4_addr[4];
 		u8 host_ipv6_addr[WCN36XX_HAL_IPV6_ADDR_LEN];
 	} u;
-};
+} __packed;
 
 struct wcn36xx_hal_host_offload_req_msg {
 	struct wcn36xx_hal_msg_header header;
 	struct wcn36xx_hal_host_offload_req host_offload_params;
 	struct wcn36xx_hal_ns_offload_params ns_offload_params;
-};
+} __packed;
 
 /* Packet Types. */
 #define WCN36XX_HAL_KEEP_ALIVE_NULL_PKT              1
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 9a97a288a96f..0da123660fa1 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1112,11 +1112,16 @@ static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 
 	flush_workqueue(wcn->hal_ind_wq);
 	mutex_lock(&wcn->conf_mutex);
-	vif = wcn36xx_get_first_assoc_vif(wcn);
-	if (vif)
-		ret = wcn36xx_smd_set_power_params(wcn, true);
-	mutex_unlock(&wcn->conf_mutex);
 
+	vif = wcn36xx_get_first_assoc_vif(wcn);
+	if (vif) {
+		ret = wcn36xx_smd_arp_offload(wcn, vif, true);
+		if (ret)
+			goto out;
+		ret = wcn36xx_smd_set_power_params(wcn, true);
+	}
+out:
+	mutex_unlock(&wcn->conf_mutex);
 	return ret;
 }
 
@@ -1130,8 +1135,10 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
 	flush_workqueue(wcn->hal_ind_wq);
 	mutex_lock(&wcn->conf_mutex);
 	vif = wcn36xx_get_first_assoc_vif(wcn);
-	if (vif)
+	if (vif) {
 		wcn36xx_smd_set_power_params(wcn, false);
+		wcn36xx_smd_arp_offload(wcn, vif, false);
+	}
 	mutex_unlock(&wcn->conf_mutex);
 
 	return 0;
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index d0c3a1557e8d..478e363610e1 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2756,6 +2756,43 @@ out:
 	return ret;
 }
 
+int wcn36xx_smd_arp_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+			    bool enable)
+{
+	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+	struct wcn36xx_hal_host_offload_req_msg msg_body;
+	int ret;
+
+	mutex_lock(&wcn->hal_mutex);
+
+	INIT_HAL_MSG(msg_body, WCN36XX_HAL_HOST_OFFLOAD_REQ);
+	msg_body.host_offload_params.offload_type =
+		WCN36XX_HAL_IPV4_ARP_REPLY_OFFLOAD;
+	if (enable) {
+		msg_body.host_offload_params.enable =
+			WCN36XX_HAL_OFFLOAD_ARP_AND_BCAST_FILTER_ENABLE;
+		memcpy(&msg_body.host_offload_params.u,
+		       &vif->bss_conf.arp_addr_list[0], sizeof(__be32));
+	}
+	msg_body.ns_offload_params.bss_index = vif_priv->bss_index;
+
+	PREPARE_HAL_BUF(wcn->hal_buf, msg_body);
+
+	ret = wcn36xx_smd_send_and_wait(wcn, msg_body.header.len);
+	if (ret) {
+		wcn36xx_err("Sending host_offload_arp failed\n");
+		goto out;
+	}
+	ret = wcn36xx_smd_rsp_status_check(wcn->hal_buf, wcn->hal_rsp_len);
+	if (ret) {
+		wcn36xx_err("host_offload_arp failed err=%d\n", ret);
+		goto out;
+	}
+out:
+	mutex_unlock(&wcn->hal_mutex);
+	return ret;
+}
+
 int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 			    void *buf, int len, void *priv, u32 addr)
 {
@@ -2804,6 +2841,7 @@ int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 	case WCN36XX_HAL_8023_MULTICAST_LIST_RSP:
 	case WCN36XX_HAL_START_SCAN_OFFLOAD_RSP:
 	case WCN36XX_HAL_STOP_SCAN_OFFLOAD_RSP:
+	case WCN36XX_HAL_HOST_OFFLOAD_RSP:
 		memcpy(wcn->hal_buf, buf, len);
 		wcn->hal_rsp_len = len;
 		complete(&wcn->hal_rsp_compl);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
index 462860572e1f..6492a628ea6a 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.h
+++ b/drivers/net/wireless/ath/wcn36xx/smd.h
@@ -146,4 +146,8 @@ int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 int wcn36xx_smd_set_mc_list(struct wcn36xx *wcn,
 			    struct ieee80211_vif *vif,
 			    struct wcn36xx_hal_rcv_flt_mc_addr_list_type *fp);
+
+int wcn36xx_smd_arp_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+			    bool enable);
+
 #endif	/* _SMD_H_ */

From 6feb634f4d9fceb0241612ad46ad55b54fd78df5 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:32 +0100
Subject: [PATCH 1655/3292] wcn36xx: Do not flush indication queue on
 suspend/resume

Testing on Android reveals that the flush on both suspend and resume of the
firmware indication work-queue can stall indefinitely.

Given this code path doesn't appear to have been exercised up until now,
removing this flush to unblock this situation.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-5-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 0da123660fa1..9731fcbe2e7f 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1110,7 +1110,6 @@ static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac suspend\n");
 
-	flush_workqueue(wcn->hal_ind_wq);
 	mutex_lock(&wcn->conf_mutex);
 
 	vif = wcn36xx_get_first_assoc_vif(wcn);
@@ -1132,7 +1131,6 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
 
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac resume\n");
 
-	flush_workqueue(wcn->hal_ind_wq);
 	mutex_lock(&wcn->conf_mutex);
 	vif = wcn36xx_get_first_assoc_vif(wcn);
 	if (vif) {

From c7a61af55976dbd11b176d2badda869a7537dca4 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:33 +0100
Subject: [PATCH 1656/3292] wcn36xx: Add ipv6 address tracking

Taking code from iwlwifi this commit adds a standard callback for
ipv6_addr_change().

This callback allows wcn36xx to know the set of ipv6 addresses. Something
we need to know in order to get wowlan working with ipv6.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-6-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/hal.h     |  1 +
 drivers/net/wireless/ath/wcn36xx/main.c    | 32 ++++++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/wcn36xx.h |  8 ++++++
 3 files changed, 41 insertions(+)

diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
index b56c8292fa62..90333daed845 100644
--- a/drivers/net/wireless/ath/wcn36xx/hal.h
+++ b/drivers/net/wireless/ath/wcn36xx/hal.h
@@ -3466,6 +3466,7 @@ struct wcn36xx_hal_rem_bcn_filter_req {
 #define WCN36XX_HAL_OFFLOAD_BCAST_FILTER_ENABLE             0x2
 #define WCN36XX_HAL_OFFLOAD_ARP_AND_BCAST_FILTER_ENABLE	\
 	(WCN36XX_HAL_OFFLOAD_ENABLE | WCN36XX_HAL_OFFLOAD_BCAST_FILTER_ENABLE)
+#define WCN36XX_HAL_IPV6_OFFLOAD_ADDR_MAX		0x02
 
 struct wcn36xx_hal_ns_offload_params {
 	u8 src_ipv6_addr[WCN36XX_HAL_IPV6_ADDR_LEN];
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 9731fcbe2e7f..240ecdd52f81 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -25,6 +25,7 @@
 #include <linux/rpmsg.h>
 #include <linux/soc/qcom/smem_state.h>
 #include <linux/soc/qcom/wcnss_ctrl.h>
+#include <net/ipv6.h>
 #include "wcn36xx.h"
 #include "testmode.h"
 
@@ -1208,6 +1209,34 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void wcn36xx_ipv6_addr_change(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif,
+				     struct inet6_dev *idev)
+{
+	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+	struct inet6_ifaddr *ifa;
+	int idx = 0;
+
+	memset(vif_priv->tentative_addrs, 0, sizeof(vif_priv->tentative_addrs));
+
+	read_lock_bh(&idev->lock);
+	list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		vif_priv->target_ipv6_addrs[idx] = ifa->addr;
+		if (ifa->flags & IFA_F_TENTATIVE)
+			__set_bit(idx, vif_priv->tentative_addrs);
+		idx++;
+		if (idx >= WCN36XX_HAL_IPV6_OFFLOAD_ADDR_MAX)
+			break;
+		wcn36xx_dbg(WCN36XX_DBG_MAC, "%pI6 %s\n", &ifa->addr,
+			    (ifa->flags & IFA_F_TENTATIVE) ? "tentative" : NULL);
+	}
+	read_unlock_bh(&idev->lock);
+
+	vif_priv->num_target_ipv6_addrs = idx;
+}
+#endif
+
 static const struct ieee80211_ops wcn36xx_ops = {
 	.start			= wcn36xx_start,
 	.stop			= wcn36xx_stop,
@@ -1231,6 +1260,9 @@ static const struct ieee80211_ops wcn36xx_ops = {
 	.sta_add		= wcn36xx_sta_add,
 	.sta_remove		= wcn36xx_sta_remove,
 	.ampdu_action		= wcn36xx_ampdu_action,
+#if IS_ENABLED(CONFIG_IPV6)
+	.ipv6_addr_change	= wcn36xx_ipv6_addr_change,
+#endif
 
 	CFG80211_TESTMODE_CMD(wcn36xx_tm_cmd)
 };
diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
index 71fa9992b118..5a5114660b18 100644
--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
@@ -18,6 +18,7 @@
 #define _WCN36XX_H_
 
 #include <linux/completion.h>
+#include <linux/in6.h>
 #include <linux/printk.h>
 #include <linux/spinlock.h>
 #include <net/mac80211.h>
@@ -136,6 +137,13 @@ struct wcn36xx_vif {
 	u8 self_dpu_desc_index;
 	u8 self_ucast_dpu_sign;
 
+#if IS_ENABLED(CONFIG_IPV6)
+	/* IPv6 addresses for WoWLAN */
+	struct in6_addr target_ipv6_addrs[WCN36XX_HAL_IPV6_OFFLOAD_ADDR_MAX];
+	unsigned long tentative_addrs[BITS_TO_LONGS(WCN36XX_HAL_IPV6_OFFLOAD_ADDR_MAX)];
+	int num_target_ipv6_addrs;
+#endif
+
 	struct list_head sta_list;
 };
 

From 1456223c468447b1c5d2c1d8748748eea379a501 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:34 +0100
Subject: [PATCH 1657/3292] wcn36xx: Add ipv6 namespace offload in suspend

We need to respond to ipv6 namespace lookups when in suspend. This patch
adds the necessary changes to issue the appropriate firmware command on
suspend and resume to enter/exit firmware offloaded ns lookup.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Reported-by: kernel test robot <lkp@intel.com>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-7-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/hal.h  |  3 ++
 drivers/net/wireless/ath/wcn36xx/main.c |  4 ++
 drivers/net/wireless/ath/wcn36xx/smd.c  | 63 +++++++++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/smd.h  |  3 ++
 4 files changed, 73 insertions(+)

diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
index 90333daed845..3b949b0b3792 100644
--- a/drivers/net/wireless/ath/wcn36xx/hal.h
+++ b/drivers/net/wireless/ath/wcn36xx/hal.h
@@ -3464,6 +3464,9 @@ struct wcn36xx_hal_rem_bcn_filter_req {
 #define WCN36XX_HAL_OFFLOAD_DISABLE                         0
 #define WCN36XX_HAL_OFFLOAD_ENABLE                          1
 #define WCN36XX_HAL_OFFLOAD_BCAST_FILTER_ENABLE             0x2
+#define WCN36XX_HAL_OFFLOAD_MCAST_FILTER_ENABLE             0x4
+#define WCN36XX_HAL_OFFLOAD_NS_AND_MCAST_FILTER_ENABLE	\
+	(WCN36XX_HAL_OFFLOAD_ENABLE | WCN36XX_HAL_OFFLOAD_MCAST_FILTER_ENABLE)
 #define WCN36XX_HAL_OFFLOAD_ARP_AND_BCAST_FILTER_ENABLE	\
 	(WCN36XX_HAL_OFFLOAD_ENABLE | WCN36XX_HAL_OFFLOAD_BCAST_FILTER_ENABLE)
 #define WCN36XX_HAL_IPV6_OFFLOAD_ADDR_MAX		0x02
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 240ecdd52f81..09e22f829682 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1116,6 +1116,9 @@ static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 	vif = wcn36xx_get_first_assoc_vif(wcn);
 	if (vif) {
 		ret = wcn36xx_smd_arp_offload(wcn, vif, true);
+		if (ret)
+			goto out;
+		ret = wcn36xx_smd_ipv6_ns_offload(wcn, vif, true);
 		if (ret)
 			goto out;
 		ret = wcn36xx_smd_set_power_params(wcn, true);
@@ -1136,6 +1139,7 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
 	vif = wcn36xx_get_first_assoc_vif(wcn);
 	if (vif) {
 		wcn36xx_smd_set_power_params(wcn, false);
+		wcn36xx_smd_ipv6_ns_offload(wcn, vif, false);
 		wcn36xx_smd_arp_offload(wcn, vif, false);
 	}
 	mutex_unlock(&wcn->conf_mutex);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 478e363610e1..4a50e5f8456a 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2793,6 +2793,69 @@ out:
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+int wcn36xx_smd_ipv6_ns_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+				bool enable)
+{
+	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+	struct wcn36xx_hal_host_offload_req_msg msg_body;
+	struct wcn36xx_hal_ns_offload_params *ns_params;
+	struct wcn36xx_hal_host_offload_req *ho_params;
+	int ret;
+
+	mutex_lock(&wcn->hal_mutex);
+
+	INIT_HAL_MSG(msg_body, WCN36XX_HAL_HOST_OFFLOAD_REQ);
+	ho_params = &msg_body.host_offload_params;
+	ns_params = &msg_body.ns_offload_params;
+
+	ho_params->offload_type = WCN36XX_HAL_IPV6_NS_OFFLOAD;
+	if (enable) {
+		ho_params->enable =
+			WCN36XX_HAL_OFFLOAD_NS_AND_MCAST_FILTER_ENABLE;
+		if (vif_priv->num_target_ipv6_addrs) {
+			memcpy(&ho_params->u,
+			       &vif_priv->target_ipv6_addrs[0].in6_u,
+			       sizeof(struct in6_addr));
+			memcpy(&ns_params->target_ipv6_addr1,
+			       &vif_priv->target_ipv6_addrs[0].in6_u,
+			       sizeof(struct in6_addr));
+			ns_params->target_ipv6_addr1_valid = 1;
+		}
+		if (vif_priv->num_target_ipv6_addrs > 1) {
+			memcpy(&ns_params->target_ipv6_addr2,
+			       &vif_priv->target_ipv6_addrs[1].in6_u,
+			       sizeof(struct in6_addr));
+			ns_params->target_ipv6_addr2_valid = 1;
+		}
+	}
+	memcpy(&ns_params->self_addr, vif->addr, ETH_ALEN);
+	ns_params->bss_index = vif_priv->bss_index;
+
+	PREPARE_HAL_BUF(wcn->hal_buf, msg_body);
+
+	ret = wcn36xx_smd_send_and_wait(wcn, msg_body.header.len);
+	if (ret) {
+		wcn36xx_err("Sending host_offload_arp failed\n");
+		goto out;
+	}
+	ret = wcn36xx_smd_rsp_status_check(wcn->hal_buf, wcn->hal_rsp_len);
+	if (ret) {
+		wcn36xx_err("host_offload_arp failed err=%d\n", ret);
+		goto out;
+	}
+out:
+	mutex_unlock(&wcn->hal_mutex);
+	return ret;
+}
+#else
+int wcn36xx_smd_ipv6_ns_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+				bool enable)
+{
+	return 0;
+}
+#endif
+
 int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 			    void *buf, int len, void *priv, u32 addr)
 {
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
index 6492a628ea6a..e03ab7878432 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.h
+++ b/drivers/net/wireless/ath/wcn36xx/smd.h
@@ -150,4 +150,7 @@ int wcn36xx_smd_set_mc_list(struct wcn36xx *wcn,
 int wcn36xx_smd_arp_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 			    bool enable);
 
+int wcn36xx_smd_ipv6_ns_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+				bool enable);
+
 #endif	/* _SMD_H_ */

From 8974e5917b31c30ab30af1e992cfb35eec839a5f Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:35 +0100
Subject: [PATCH 1658/3292] wcn36xx: Add set_rekey_data callback

Add a callback for Group Temporal Key tracking as provided by the standard
WiFi ops structure.

We track the key to integrate GTK offloading into the WoWLAN suspend path
later on. Code comes from the Intel iwlwifi driver with minimal name
changes.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-8-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c    | 19 +++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/wcn36xx.h |  6 ++++++
 2 files changed, 25 insertions(+)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 09e22f829682..ec32b8b0067d 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1147,6 +1147,24 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
 	return 0;
 }
 
+static void wcn36xx_set_rekey_data(struct ieee80211_hw *hw,
+				   struct ieee80211_vif *vif,
+				   struct cfg80211_gtk_rekey_data *data)
+{
+	struct wcn36xx *wcn = hw->priv;
+	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+
+	mutex_lock(&wcn->conf_mutex);
+
+	memcpy(vif_priv->rekey_data.kek, data->kek, NL80211_KEK_LEN);
+	memcpy(vif_priv->rekey_data.kck, data->kck, NL80211_KCK_LEN);
+	vif_priv->rekey_data.replay_ctr =
+		cpu_to_le64(be64_to_cpup((__be64 *)data->replay_ctr));
+	vif_priv->rekey_data.valid = true;
+
+	mutex_unlock(&wcn->conf_mutex);
+}
+
 #endif
 
 static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
@@ -1249,6 +1267,7 @@ static const struct ieee80211_ops wcn36xx_ops = {
 #ifdef CONFIG_PM
 	.suspend		= wcn36xx_suspend,
 	.resume			= wcn36xx_resume,
+	.set_rekey_data		= wcn36xx_set_rekey_data,
 #endif
 	.config			= wcn36xx_config,
 	.prepare_multicast	= wcn36xx_prepare_multicast,
diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
index 5a5114660b18..6121d8a5641a 100644
--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
@@ -143,6 +143,12 @@ struct wcn36xx_vif {
 	unsigned long tentative_addrs[BITS_TO_LONGS(WCN36XX_HAL_IPV6_OFFLOAD_ADDR_MAX)];
 	int num_target_ipv6_addrs;
 #endif
+	/* WoWLAN GTK rekey data */
+	struct {
+		u8 kck[NL80211_KCK_LEN], kek[NL80211_KEK_LEN];
+		__le64 replay_ctr;
+		bool valid;
+	} rekey_data;
 
 	struct list_head sta_list;
 };

From 6693f7675c9b055ee3428844ee8999bfe22d3cd2 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:36 +0100
Subject: [PATCH 1659/3292] wcn36xx: Add GTK offload to WoWLAN path

Using previously set GTK KCK and KEK material this commit adds GTK rekeying
to the WoWLAN suspend/resume path. A small error in the packing of the
up to now unused command structure is fixed as we go.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-9-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/hal.h  |  2 +-
 drivers/net/wireless/ath/wcn36xx/main.c |  4 +++
 drivers/net/wireless/ath/wcn36xx/smd.c  | 39 +++++++++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/smd.h  |  3 ++
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
index 3b949b0b3792..1f3c2e840232 100644
--- a/drivers/net/wireless/ath/wcn36xx/hal.h
+++ b/drivers/net/wireless/ath/wcn36xx/hal.h
@@ -4905,7 +4905,7 @@ struct wcn36xx_hal_gtk_offload_req_msg {
 	u64 key_replay_counter;
 
 	u8 bss_index;
-};
+} __packed;
 
 struct wcn36xx_hal_gtk_offload_rsp_msg {
 	struct wcn36xx_hal_msg_header header;
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index ec32b8b0067d..db1528a14757 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1119,6 +1119,9 @@ static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 		if (ret)
 			goto out;
 		ret = wcn36xx_smd_ipv6_ns_offload(wcn, vif, true);
+		if (ret)
+			goto out;
+		ret = wcn36xx_smd_gtk_offload(wcn, vif, true);
 		if (ret)
 			goto out;
 		ret = wcn36xx_smd_set_power_params(wcn, true);
@@ -1139,6 +1142,7 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
 	vif = wcn36xx_get_first_assoc_vif(wcn);
 	if (vif) {
 		wcn36xx_smd_set_power_params(wcn, false);
+		wcn36xx_smd_gtk_offload(wcn, vif, false);
 		wcn36xx_smd_ipv6_ns_offload(wcn, vif, false);
 		wcn36xx_smd_arp_offload(wcn, vif, false);
 	}
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 4a50e5f8456a..4063888e3f03 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2856,6 +2856,44 @@ int wcn36xx_smd_ipv6_ns_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 }
 #endif
 
+int wcn36xx_smd_gtk_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+			    bool enable)
+{
+	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+	struct wcn36xx_hal_gtk_offload_req_msg msg_body;
+	int ret;
+
+	mutex_lock(&wcn->hal_mutex);
+
+	INIT_HAL_MSG(msg_body, WCN36XX_HAL_GTK_OFFLOAD_REQ);
+
+	if (enable) {
+		memcpy(&msg_body.kek, vif_priv->rekey_data.kek, NL80211_KEK_LEN);
+		memcpy(&msg_body.kck, vif_priv->rekey_data.kck, NL80211_KCK_LEN);
+		msg_body.key_replay_counter =
+			le64_to_cpu(vif_priv->rekey_data.replay_ctr);
+		msg_body.bss_index = vif_priv->bss_index;
+	} else {
+		msg_body.flags = WCN36XX_HAL_GTK_OFFLOAD_FLAGS_DISABLE;
+	}
+
+	PREPARE_HAL_BUF(wcn->hal_buf, msg_body);
+
+	ret = wcn36xx_smd_send_and_wait(wcn, msg_body.header.len);
+	if (ret) {
+		wcn36xx_err("Sending host_offload_arp failed\n");
+		goto out;
+	}
+	ret = wcn36xx_smd_rsp_status_check(wcn->hal_buf, wcn->hal_rsp_len);
+	if (ret) {
+		wcn36xx_err("host_offload_arp failed err=%d\n", ret);
+		goto out;
+	}
+out:
+	mutex_unlock(&wcn->hal_mutex);
+	return ret;
+}
+
 int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 			    void *buf, int len, void *priv, u32 addr)
 {
@@ -2905,6 +2943,7 @@ int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 	case WCN36XX_HAL_START_SCAN_OFFLOAD_RSP:
 	case WCN36XX_HAL_STOP_SCAN_OFFLOAD_RSP:
 	case WCN36XX_HAL_HOST_OFFLOAD_RSP:
+	case WCN36XX_HAL_GTK_OFFLOAD_RSP:
 		memcpy(wcn->hal_buf, buf, len);
 		wcn->hal_rsp_len = len;
 		complete(&wcn->hal_rsp_compl);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
index e03ab7878432..cdf4231efe26 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.h
+++ b/drivers/net/wireless/ath/wcn36xx/smd.h
@@ -153,4 +153,7 @@ int wcn36xx_smd_arp_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 int wcn36xx_smd_ipv6_ns_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 				bool enable);
 
+int wcn36xx_smd_gtk_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
+			    bool enable);
+
 #endif	/* _SMD_H_ */

From bedf1169bcae2f762b37d40dc9db648fe7ad1952 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:37 +0100
Subject: [PATCH 1660/3292] wcn36xx: Add GTK offload info to WoWLAN resume

Having enabled GTK rekey in suspend, we need to extract the replay counter
from the firmware on resume and perform a ieee80211_gtk_rekey_notify() so
that the STA remains verified from the perspective of the AP.

In order to enable the SMD command and response we need to pack the
existing command/response structures. Given these structures are currently
unused, there's no need to backport this as a fix.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-10-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/hal.h  |  4 +-
 drivers/net/wireless/ath/wcn36xx/main.c |  1 +
 drivers/net/wireless/ath/wcn36xx/smd.c  | 73 +++++++++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/smd.h  |  3 +
 4 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
index 1f3c2e840232..455143c4164e 100644
--- a/drivers/net/wireless/ath/wcn36xx/hal.h
+++ b/drivers/net/wireless/ath/wcn36xx/hal.h
@@ -4919,7 +4919,7 @@ struct wcn36xx_hal_gtk_offload_rsp_msg {
 struct wcn36xx_hal_gtk_offload_get_info_req_msg {
 	struct wcn36xx_hal_msg_header header;
 	u8 bss_index;
-};
+} __packed;
 
 struct wcn36xx_hal_gtk_offload_get_info_rsp_msg {
 	struct wcn36xx_hal_msg_header header;
@@ -4943,7 +4943,7 @@ struct wcn36xx_hal_gtk_offload_get_info_rsp_msg {
 	u32 igtk_rekey_count;
 
 	u8 bss_index;
-};
+} __packed;
 
 struct dhcp_info {
 	/* Indicates the device mode which indicates about the DHCP activity */
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index db1528a14757..8e5d8326d551 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1142,6 +1142,7 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
 	vif = wcn36xx_get_first_assoc_vif(wcn);
 	if (vif) {
 		wcn36xx_smd_set_power_params(wcn, false);
+		wcn36xx_smd_gtk_offload_get_info(wcn, vif);
 		wcn36xx_smd_gtk_offload(wcn, vif, false);
 		wcn36xx_smd_ipv6_ns_offload(wcn, vif, false);
 		wcn36xx_smd_arp_offload(wcn, vif, false);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 4063888e3f03..13200a079bd0 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2894,6 +2894,78 @@ out:
 	return ret;
 }
 
+static int wcn36xx_smd_gtk_offload_get_info_rsp(struct wcn36xx *wcn,
+						struct ieee80211_vif *vif)
+{
+	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+	struct wcn36xx_hal_gtk_offload_get_info_rsp_msg *rsp;
+	__be64 replay_ctr;
+
+	if (wcn36xx_smd_rsp_status_check(wcn->hal_buf, wcn->hal_rsp_len))
+		return -EIO;
+
+	rsp = (struct wcn36xx_hal_gtk_offload_get_info_rsp_msg *)wcn->hal_buf;
+
+	if (rsp->bss_index != vif_priv->bss_index) {
+		wcn36xx_err("gtk_offload_info invalid response bss index %d\n",
+			    rsp->bss_index);
+		return -ENOENT;
+	}
+
+	if (vif_priv->rekey_data.replay_ctr != cpu_to_le64(rsp->key_replay_counter)) {
+		replay_ctr = cpu_to_be64(rsp->key_replay_counter);
+		vif_priv->rekey_data.replay_ctr =
+			cpu_to_le64(rsp->key_replay_counter);
+		ieee80211_gtk_rekey_notify(vif, vif->bss_conf.bssid,
+					   (void *)&replay_ctr, GFP_KERNEL);
+		 wcn36xx_dbg(WCN36XX_DBG_HAL,
+			     "GTK replay counter increment %llu\n",
+			     rsp->key_replay_counter);
+	}
+
+	wcn36xx_dbg(WCN36XX_DBG_HAL,
+		    "gtk offload info status %d last_rekey_status %d "
+		    "replay_counter %llu total_rekey_count %d gtk_rekey_count %d "
+		    "igtk_rekey_count %d bss_index %d\n",
+		    rsp->status, rsp->last_rekey_status,
+		    rsp->key_replay_counter, rsp->total_rekey_count,
+		    rsp->gtk_rekey_count, rsp->igtk_rekey_count,
+		    rsp->bss_index);
+
+	return 0;
+}
+
+int wcn36xx_smd_gtk_offload_get_info(struct wcn36xx *wcn,
+				     struct ieee80211_vif *vif)
+{
+	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
+	struct wcn36xx_hal_gtk_offload_get_info_req_msg msg_body;
+	int ret;
+
+	mutex_lock(&wcn->hal_mutex);
+
+	INIT_HAL_MSG(msg_body, WCN36XX_HAL_GTK_OFFLOAD_GETINFO_REQ);
+
+	msg_body.bss_index = vif_priv->bss_index;
+
+	PREPARE_HAL_BUF(wcn->hal_buf, msg_body);
+
+	ret = wcn36xx_smd_send_and_wait(wcn, msg_body.header.len);
+	if (ret) {
+		wcn36xx_err("Sending gtk_offload_get_info failed\n");
+		goto out;
+	}
+	ret = wcn36xx_smd_rsp_status_check(wcn->hal_buf, wcn->hal_rsp_len);
+	if (ret) {
+		wcn36xx_err("gtk_offload_get_info failed err=%d\n", ret);
+		goto out;
+	}
+	ret = wcn36xx_smd_gtk_offload_get_info_rsp(wcn, vif);
+out:
+	mutex_unlock(&wcn->hal_mutex);
+	return ret;
+}
+
 int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 			    void *buf, int len, void *priv, u32 addr)
 {
@@ -2944,6 +3016,7 @@ int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 	case WCN36XX_HAL_STOP_SCAN_OFFLOAD_RSP:
 	case WCN36XX_HAL_HOST_OFFLOAD_RSP:
 	case WCN36XX_HAL_GTK_OFFLOAD_RSP:
+	case WCN36XX_HAL_GTK_OFFLOAD_GETINFO_RSP:
 		memcpy(wcn->hal_buf, buf, len);
 		wcn->hal_rsp_len = len;
 		complete(&wcn->hal_rsp_compl);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
index cdf4231efe26..90c7faea0ef6 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.h
+++ b/drivers/net/wireless/ath/wcn36xx/smd.h
@@ -156,4 +156,7 @@ int wcn36xx_smd_ipv6_ns_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 int wcn36xx_smd_gtk_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 			    bool enable);
 
+int wcn36xx_smd_gtk_offload_get_info(struct wcn36xx *wcn,
+				     struct ieee80211_vif *vif);
+
 #endif	/* _SMD_H_ */

From 60f0078b1ebd51b5cde01f0001c8402a9ef0cec5 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:38 +0100
Subject: [PATCH 1661/3292] wcn36xx: Add Host suspend indication support

In order to activate ipv4 ARP offload, ipv6 NS offload and firmware GTK
offload we need to send a unidirectional indication from host to wcn
indicating a transition to suspend.

Once done, firmware will respond to ARP broadcasts, ipv6 NS lookups and
perform GTK rekeys without waking the host.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-11-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c |  3 +++
 drivers/net/wireless/ath/wcn36xx/smd.c  | 19 +++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/smd.h  |  2 ++
 3 files changed, 24 insertions(+)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 8e5d8326d551..e4d5e4d597f9 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1125,6 +1125,9 @@ static int wcn36xx_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wow)
 		if (ret)
 			goto out;
 		ret = wcn36xx_smd_set_power_params(wcn, true);
+		if (ret)
+			goto out;
+		ret = wcn36xx_smd_wlan_host_suspend_ind(wcn);
 	}
 out:
 	mutex_unlock(&wcn->conf_mutex);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 13200a079bd0..3b4ba6edd17a 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2966,6 +2966,25 @@ out:
 	return ret;
 }
 
+int wcn36xx_smd_wlan_host_suspend_ind(struct wcn36xx *wcn)
+{
+	struct wcn36xx_hal_wlan_host_suspend_ind_msg msg_body;
+	int ret;
+
+	mutex_lock(&wcn->hal_mutex);
+
+	INIT_HAL_MSG(msg_body, WCN36XX_HAL_HOST_SUSPEND_IND);
+	msg_body.configured_mcst_bcst_filter_setting = 0;
+	msg_body.active_session_count = 1;
+	PREPARE_HAL_BUF(wcn->hal_buf, msg_body);
+
+	ret = rpmsg_send(wcn->smd_channel, wcn->hal_buf, msg_body.header.len);
+
+	mutex_unlock(&wcn->hal_mutex);
+
+	return ret;
+}
+
 int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 			    void *buf, int len, void *priv, u32 addr)
 {
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
index 90c7faea0ef6..2909facdb100 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.h
+++ b/drivers/net/wireless/ath/wcn36xx/smd.h
@@ -159,4 +159,6 @@ int wcn36xx_smd_gtk_offload(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 int wcn36xx_smd_gtk_offload_get_info(struct wcn36xx *wcn,
 				     struct ieee80211_vif *vif);
 
+int wcn36xx_smd_wlan_host_suspend_ind(struct wcn36xx *wcn);
+
 #endif	/* _SMD_H_ */

From ebe7c1a6635f19c61c14b78ba05a287e3022f4f8 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:39 +0100
Subject: [PATCH 1662/3292] wcn36xx: Add host resume request support

This commit is the corresponding resume() path request to the firmware when
resuming. Unlike the suspend() version which is a unidirectional
indication, the resume version is a standard request/response.

Once the resume() request completes ipv4 ARP, ipv6 NS and GTK rekey offload
stop working and can subsequently be rolled back.

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-12-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c |  1 +
 drivers/net/wireless/ath/wcn36xx/smd.c  | 35 +++++++++++++++++++++++++
 drivers/net/wireless/ath/wcn36xx/smd.h  |  2 ++
 3 files changed, 38 insertions(+)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index e4d5e4d597f9..c697b9f3633a 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1144,6 +1144,7 @@ static int wcn36xx_resume(struct ieee80211_hw *hw)
 	mutex_lock(&wcn->conf_mutex);
 	vif = wcn36xx_get_first_assoc_vif(wcn);
 	if (vif) {
+		wcn36xx_smd_host_resume(wcn);
 		wcn36xx_smd_set_power_params(wcn, false);
 		wcn36xx_smd_gtk_offload_get_info(wcn, vif);
 		wcn36xx_smd_gtk_offload(wcn, vif, false);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 3b4ba6edd17a..cf8e52cbdd9b 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2985,6 +2985,40 @@ int wcn36xx_smd_wlan_host_suspend_ind(struct wcn36xx *wcn)
 	return ret;
 }
 
+int wcn36xx_smd_host_resume(struct wcn36xx *wcn)
+{
+	struct wcn36xx_hal_wlan_host_resume_req_msg msg_body;
+	struct wcn36xx_hal_host_resume_rsp_msg *rsp;
+	int ret;
+
+	mutex_lock(&wcn->hal_mutex);
+
+	INIT_HAL_MSG(msg_body, WCN36XX_HAL_HOST_RESUME_REQ);
+	msg_body.configured_mcst_bcst_filter_setting = 0;
+
+	PREPARE_HAL_BUF(wcn->hal_buf, msg_body);
+
+	ret = wcn36xx_smd_send_and_wait(wcn, msg_body.header.len);
+	if (ret) {
+		wcn36xx_err("Sending wlan_host_resume failed\n");
+		goto out;
+	}
+	ret = wcn36xx_smd_rsp_status_check(wcn->hal_buf, wcn->hal_rsp_len);
+	if (ret) {
+		wcn36xx_err("wlan_host_resume err=%d\n", ret);
+		goto out;
+	}
+
+	rsp = (struct wcn36xx_hal_host_resume_rsp_msg *)wcn->hal_buf;
+	if (rsp->status)
+		wcn36xx_warn("wlan_host_resume status=%d\n", rsp->status);
+
+out:
+	mutex_unlock(&wcn->hal_mutex);
+
+	return ret;
+}
+
 int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 			    void *buf, int len, void *priv, u32 addr)
 {
@@ -3036,6 +3070,7 @@ int wcn36xx_smd_rsp_process(struct rpmsg_device *rpdev,
 	case WCN36XX_HAL_HOST_OFFLOAD_RSP:
 	case WCN36XX_HAL_GTK_OFFLOAD_RSP:
 	case WCN36XX_HAL_GTK_OFFLOAD_GETINFO_RSP:
+	case WCN36XX_HAL_HOST_RESUME_RSP:
 		memcpy(wcn->hal_buf, buf, len);
 		wcn->hal_rsp_len = len;
 		complete(&wcn->hal_rsp_compl);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h
index 2909facdb100..d8bded03945d 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.h
+++ b/drivers/net/wireless/ath/wcn36xx/smd.h
@@ -161,4 +161,6 @@ int wcn36xx_smd_gtk_offload_get_info(struct wcn36xx *wcn,
 
 int wcn36xx_smd_wlan_host_suspend_ind(struct wcn36xx *wcn);
 
+int wcn36xx_smd_host_resume(struct wcn36xx *wcn);
+
 #endif	/* _SMD_H_ */

From 1e2e8ee957343575be7b370367b8312f7c489ac0 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 02:11:40 +0100
Subject: [PATCH 1663/3292] wcn36xx: Enable WOWLAN flags

Enable flags for

- Magic packet
- GTK rekey

Previous patches implemented the necessary code to switch these two on.
Standalone magic packet absent GTK rekey is pretty useless, so it makes
sense to flag both at once.

Once done it is possible for wcn36xx firmware to

1. Respond to ipv4 and ipv6 ARP/NS lookup requests
2. Bring the system out of suspend when a magic packet is received.
   Magic in our case is a simple ipv4 or ipv6 unicast.
3. GTK rekey whilst in suspend
   Once we wake from suspend the GTK will be updated as necessary

Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Tested-by: Benjamin Li <benl@squareup.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605011140.2004643-13-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index c697b9f3633a..3a4e383b3a6c 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -173,7 +173,9 @@ static struct ieee80211_supported_band wcn_band_5ghz = {
 #ifdef CONFIG_PM
 
 static const struct wiphy_wowlan_support wowlan_support = {
-	.flags = WIPHY_WOWLAN_ANY
+	.flags = WIPHY_WOWLAN_ANY	|
+		 WIPHY_WOWLAN_MAGIC_PKT	|
+		 WIPHY_WOWLAN_SUPPORTS_GTK_REKEY
 };
 
 #endif

From 69530b434780217053a5a98462d76129ac776451 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Thu, 10 Jun 2021 17:40:38 +0800
Subject: [PATCH 1664/3292] ACPI: processor_throttling: Remove redundant
 initialization of 'obj'

'obj' is being initialized, however this value is never read as
'obj' is assigned an updated value later. Remove the redundant
initialization.

Clean up clang warning:
drivers/acpi/processor_throttling.c:409:20: warning: Value stored to
'obj' during its initialization is never read
[clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_throttling.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index e61b8f038364..fb6e0145e534 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -408,7 +408,7 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
 	acpi_status status = 0;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *ptc = NULL;
-	union acpi_object obj = { 0 };
+	union acpi_object obj;
 	struct acpi_processor_throttling *throttling;
 
 	status = acpi_evaluate_object(pr->handle, "_PTC", NULL, &buffer);

From 743b575af18ddfdb45bd7d1c338d3d9e730728b3 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 26 May 2021 18:22:48 +0800
Subject: [PATCH 1665/3292] wcn36xx: Fix inconsistent indenting

Eliminate the follow smatch warning:

drivers/net/wireless/ath/wcn36xx/dxe.c:803 wcn36xx_dxe_tx_frame() warn:
inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1622024568-32130-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 drivers/net/wireless/ath/wcn36xx/dxe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c
index 63079231e48e..8e1dbfda6538 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.c
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.c
@@ -800,7 +800,7 @@ int wcn36xx_dxe_tx_frame(struct wcn36xx *wcn,
 			 (char *)ctl_skb->skb->data, ctl_skb->skb->len);
 
 	/* Move the head of the ring to the next empty descriptor */
-	 ch->head_blk_ctl = ctl_skb->next;
+	ch->head_blk_ctl = ctl_skb->next;
 
 	/* Commit all previous writes and set descriptors to VALID */
 	wmb();

From aa3a522c4f41537909b2ab4da660cb0deee136e8 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Thu, 10 Jun 2021 17:40:39 +0800
Subject: [PATCH 1666/3292] ACPI: sleep: Fix acpi_pm_pre_suspend() kernel-doc

Fix function name in sleep.c kernel-doc comment
to remove a warning found by running make W=1 LLVM=1.

drivers/acpi/sleep.c:413: warning: expecting prototype for
acpi_pre_suspend(). Prototype was for acpi_pm_pre_suspend() instead.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 4d2118c8dd6c..1c118c86cd44 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -406,7 +406,7 @@ static int acpi_pm_freeze(void)
 }
 
 /**
- * acpi_pre_suspend - Enable wakeup devices, "freeze" EC and save NVS.
+ * acpi_pm_pre_suspend - Enable wakeup devices, "freeze" EC and save NVS.
  */
 static int acpi_pm_pre_suspend(void)
 {

From ef48667557c53d4b51a1ee3090eab7699324c9de Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sat, 5 Jun 2021 18:33:47 +0100
Subject: [PATCH 1667/3292] wcn36xx: Move hal_buf allocation to devm_kmalloc in
 probe

Right now wcn->hal_buf is allocated in wcn36xx_start(). This is a problem
since we should have setup all of the buffers we required by the time
ieee80211_register_hw() is called.

struct ieee80211_ops callbacks may run prior to mac_start() and therefore
wcn->hal_buf must be initialized.

This is easily remediated by moving the allocation to probe() taking the
opportunity to tidy up freeing memory by using devm_kmalloc().

Fixes: 8e84c2582169 ("wcn36xx: mac80211 driver for Qualcomm WCN3660/WCN3680 hardware")
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605173347.2266003-1-bryan.odonoghue@linaro.org
---
 drivers/net/wireless/ath/wcn36xx/main.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 3a4e383b3a6c..d202f2128df2 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -296,23 +296,16 @@ static int wcn36xx_start(struct ieee80211_hw *hw)
 		goto out_free_dxe_pool;
 	}
 
-	wcn->hal_buf = kmalloc(WCN36XX_HAL_BUF_SIZE, GFP_KERNEL);
-	if (!wcn->hal_buf) {
-		wcn36xx_err("Failed to allocate smd buf\n");
-		ret = -ENOMEM;
-		goto out_free_dxe_ctl;
-	}
-
 	ret = wcn36xx_smd_load_nv(wcn);
 	if (ret) {
 		wcn36xx_err("Failed to push NV to chip\n");
-		goto out_free_smd_buf;
+		goto out_free_dxe_ctl;
 	}
 
 	ret = wcn36xx_smd_start(wcn);
 	if (ret) {
 		wcn36xx_err("Failed to start chip\n");
-		goto out_free_smd_buf;
+		goto out_free_dxe_ctl;
 	}
 
 	if (!wcn36xx_is_fw_version(wcn, 1, 2, 2, 24)) {
@@ -339,8 +332,6 @@ static int wcn36xx_start(struct ieee80211_hw *hw)
 
 out_smd_stop:
 	wcn36xx_smd_stop(wcn);
-out_free_smd_buf:
-	kfree(wcn->hal_buf);
 out_free_dxe_ctl:
 	wcn36xx_dxe_free_ctl_blks(wcn);
 out_free_dxe_pool:
@@ -375,8 +366,6 @@ static void wcn36xx_stop(struct ieee80211_hw *hw)
 
 	wcn36xx_dxe_free_mem_pools(wcn);
 	wcn36xx_dxe_free_ctl_blks(wcn);
-
-	kfree(wcn->hal_buf);
 }
 
 static void wcn36xx_change_ps(struct wcn36xx *wcn, bool enable)
@@ -1499,6 +1488,12 @@ static int wcn36xx_probe(struct platform_device *pdev)
 	mutex_init(&wcn->hal_mutex);
 	mutex_init(&wcn->scan_lock);
 
+	wcn->hal_buf = devm_kmalloc(wcn->dev, WCN36XX_HAL_BUF_SIZE, GFP_KERNEL);
+	if (!wcn->hal_buf) {
+		ret = -ENOMEM;
+		goto out_wq;
+	}
+
 	ret = dma_set_mask_and_coherent(wcn->dev, DMA_BIT_MASK(32));
 	if (ret < 0) {
 		wcn36xx_err("failed to set DMA mask: %d\n", ret);

From 49b9441a258175a6941a29ed23dfc39f1b632723 Mon Sep 17 00:00:00 2001
From: Clayton Casciato <majortomtosourcecontrol@gmail.com>
Date: Sat, 12 Jun 2021 13:38:01 -0600
Subject: [PATCH 1668/3292] ACPI: video: Drop three redundant return statements

Signed-off-by: Clayton Casciato <majortomtosourcecontrol@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_video.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 0c884020f74b..ffb4afc5aad9 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -1619,8 +1619,6 @@ static void acpi_video_bus_notify(struct acpi_device *device, u32 event)
 		input_report_key(input, keycode, 0);
 		input_sync(input);
 	}
-
-	return;
 }
 
 static void brightness_switch_event(struct acpi_video_device *video_device,
@@ -1690,8 +1688,6 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data)
 		input_report_key(input, keycode, 0);
 		input_sync(input);
 	}
-
-	return;
 }
 
 static int acpi_video_resume(struct notifier_block *nb,
@@ -2308,8 +2304,6 @@ static void __exit acpi_video_exit(void)
 {
 	acpi_video_detect_exit();
 	acpi_video_unregister();
-
-	return;
 }
 
 module_init(acpi_video_init);

From 85c1ad47d2c7b9df335511b72b0fb6fdb230811d Mon Sep 17 00:00:00 2001
From: Clayton Casciato <majortomtosourcecontrol@gmail.com>
Date: Sat, 12 Jun 2021 14:09:11 -0600
Subject: [PATCH 1669/3292] ACPI: sysfs: Drop four redundant return statements

Signed-off-by: Clayton Casciato <majortomtosourcecontrol@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index d25927195d6d..d6626aba4a6a 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -582,8 +582,6 @@ static void delete_gpe_attr_array(void)
 		kfree(counter_attrs);
 	}
 	kfree(all_attrs);
-
-	return;
 }
 
 static void gpe_count(u32 gpe_number)
@@ -598,8 +596,6 @@ static void gpe_count(u32 gpe_number)
 	else
 		all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS +
 			     COUNT_ERROR].count++;
-
-	return;
 }
 
 static void fixed_event_count(u32 event_number)
@@ -612,8 +608,6 @@ static void fixed_event_count(u32 event_number)
 	else
 		all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS +
 			     COUNT_ERROR].count++;
-
-	return;
 }
 
 static void acpi_global_event_handler(u32 event_type, acpi_handle device,
@@ -914,8 +908,6 @@ static void __exit interrupt_stats_exit(void)
 	sysfs_remove_group(acpi_kobj, &interrupt_stats_attr_group);
 
 	delete_gpe_attr_array();
-
-	return;
 }
 
 static ssize_t

From 2ef53bf7147778e5784d14bcfedb6d83ba20b9b0 Mon Sep 17 00:00:00 2001
From: Clayton Casciato <majortomtosourcecontrol@gmail.com>
Date: Sat, 12 Jun 2021 19:53:53 -0600
Subject: [PATCH 1670/3292] ACPI: processor_throttling: Fix several coding
 style issues

Drop one redundant return statement and fix a few white space
issues.

Signed-off-by: Clayton Casciato <majortomtosourcecontrol@gmail.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_throttling.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index fb6e0145e534..dbccc92e81e7 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -6,7 +6,7 @@
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2004       Dominik Brodowski <linux@brodo.de>
  *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- *  			- Added processor hotplug support
+ *                      - Added processor hotplug support
  */
 
 #include <linux/kernel.h>
@@ -195,15 +195,13 @@ void acpi_processor_throttling_init(void)
 {
 	if (acpi_processor_update_tsd_coord())
 		pr_debug("Assume no T-state coordination\n");
-
-	return;
 }
 
 static int acpi_processor_throttling_notifier(unsigned long event, void *data)
 {
 	struct throttling_tstate *p_tstate = data;
 	struct acpi_processor *pr;
-	unsigned int cpu ;
+	unsigned int cpu;
 	int target_state;
 	struct acpi_processor_limit *p_limit;
 	struct acpi_processor_throttling *p_throttling;
@@ -477,7 +475,7 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
 		goto end;
 	}
 
-      end:
+end:
 	kfree(buffer.pointer);
 
 	return result;
@@ -554,7 +552,7 @@ static int acpi_processor_get_throttling_states(struct acpi_processor *pr)
 		}
 	}
 
-      end:
+end:
 	kfree(buffer.pointer);
 
 	return result;
@@ -639,7 +637,7 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr)
 		pthrottling->shared_type = DOMAIN_COORD_TYPE_SW_ALL;
 	}
 
-      end:
+end:
 	kfree(buffer.pointer);
 	return result;
 }
@@ -717,7 +715,7 @@ static int acpi_throttling_rdmsr(u64 *value)
 		msr_low = 0;
 		msr_high = 0;
 		rdmsr_safe(MSR_IA32_THERM_CONTROL,
-			(u32 *)&msr_low , (u32 *) &msr_high);
+			(u32 *)&msr_low, (u32 *) &msr_high);
 		msr = (msr_high << 32) | msr_low;
 		*value = (u64) msr;
 		ret = 0;
@@ -1185,8 +1183,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
 	 */
 	if (acpi_processor_get_throttling_control(pr) ||
 		acpi_processor_get_throttling_states(pr) ||
-		acpi_processor_get_platform_limit(pr))
-	{
+		acpi_processor_get_platform_limit(pr)) {
 		pr->throttling.acpi_processor_get_throttling =
 		    &acpi_processor_get_throttling_fadt;
 		pr->throttling.acpi_processor_set_throttling =
@@ -1246,7 +1243,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
 			goto end;
 	}
 
-      end:
+end:
 	if (result)
 		pr->flags.throttling = 0;
 

From 620cbe40ed10aebf596767e934ab42057c34ab04 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 6 Jun 2021 16:09:10 -0400
Subject: [PATCH 1671/3292] dm writecache: remove unused gfp_t argument from
 wc_add_block()

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index ea9f0d8fff1d..01c3f51f4270 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1664,7 +1664,7 @@ pop_from_list:
 	return 0;
 }
 
-static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t gfp)
+static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e)
 {
 	struct dm_writecache *wc = wb->wc;
 	unsigned block_size = wc->block_size;
@@ -1725,7 +1725,7 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
 			max_pages = WB_LIST_INLINE;
 		}
 
-		BUG_ON(!wc_add_block(wb, e, GFP_NOIO));
+		BUG_ON(!wc_add_block(wb, e));
 
 		wb->wc_list[0] = e;
 		wb->wc_list_n = 1;
@@ -1735,7 +1735,7 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
 			if (read_original_sector(wc, f) !=
 			    read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
 				break;
-			if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN))
+			if (!wc_add_block(wb, f))
 				break;
 			wbl->size--;
 			list_del(&f->lru);

From 991bd8d7bc78966b4dc427b53a144f276bffcd52 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 6 Jun 2021 16:13:16 -0400
Subject: [PATCH 1672/3292] dm writecache: commit just one block, not a full
 page

Some architectures have pages larger than 4k and committing a full
page causes needless overhead.

Fix this by writing a single block when committing the superblock.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 01c3f51f4270..e18bbfd3289b 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -532,11 +532,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
 
 	region.bdev = wc->ssd_dev->bdev;
 	region.sector = 0;
-	region.count = PAGE_SIZE >> SECTOR_SHIFT;
-
-	if (unlikely(region.sector + region.count > wc->metadata_sectors))
-		region.count = wc->metadata_sectors - region.sector;
-
+	region.count = wc->block_size >> SECTOR_SHIFT;
 	region.sector += wc->start_sector;
 
 	req.bi_op = REQ_OP_WRITE;

From 86f1ea9d645edb16358e4a3e1602e1cec81e5048 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 11:35:38 +0800
Subject: [PATCH 1673/3292] wlcore: use DEVICE_ATTR_<RW|RO> macro

Use DEVICE_ATTR_<RW|RO> helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210523033538.25568-1-yuehaibing@huawei.com
---
 drivers/net/wireless/ti/wlcore/sysfs.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/sysfs.c b/drivers/net/wireless/ti/wlcore/sysfs.c
index 5cf0379b88b6..35b535c125b6 100644
--- a/drivers/net/wireless/ti/wlcore/sysfs.c
+++ b/drivers/net/wireless/ti/wlcore/sysfs.c
@@ -12,9 +12,9 @@
 #include "debug.h"
 #include "sysfs.h"
 
-static ssize_t wl1271_sysfs_show_bt_coex_state(struct device *dev,
-					       struct device_attribute *attr,
-					       char *buf)
+static ssize_t bt_coex_state_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
 {
 	struct wl1271 *wl = dev_get_drvdata(dev);
 	ssize_t len;
@@ -30,9 +30,9 @@ static ssize_t wl1271_sysfs_show_bt_coex_state(struct device *dev,
 
 }
 
-static ssize_t wl1271_sysfs_store_bt_coex_state(struct device *dev,
-						struct device_attribute *attr,
-						const char *buf, size_t count)
+static ssize_t bt_coex_state_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
 {
 	struct wl1271 *wl = dev_get_drvdata(dev);
 	unsigned long res;
@@ -71,13 +71,11 @@ static ssize_t wl1271_sysfs_store_bt_coex_state(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(bt_coex_state, 0644,
-		   wl1271_sysfs_show_bt_coex_state,
-		   wl1271_sysfs_store_bt_coex_state);
+static DEVICE_ATTR_RW(bt_coex_state);
 
-static ssize_t wl1271_sysfs_show_hw_pg_ver(struct device *dev,
-					   struct device_attribute *attr,
-					   char *buf)
+static ssize_t hw_pg_ver_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
 {
 	struct wl1271 *wl = dev_get_drvdata(dev);
 	ssize_t len;
@@ -94,7 +92,7 @@ static ssize_t wl1271_sysfs_show_hw_pg_ver(struct device *dev,
 	return len;
 }
 
-static DEVICE_ATTR(hw_pg_ver, 0444, wl1271_sysfs_show_hw_pg_ver, NULL);
+static DEVICE_ATTR_RO(hw_pg_ver);
 
 static ssize_t wl1271_sysfs_read_fwlog(struct file *filp, struct kobject *kobj,
 				       struct bin_attribute *bin_attr,

From 11ef6bc846dcdce838f0b00c5f6a562c57e5d43b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 3 Jun 2021 09:28:14 +0300
Subject: [PATCH 1674/3292] wlcore/wl12xx: Fix wl12xx get_mac error if device
 is in ELP

At least on wl12xx, reading the MAC after boot can fail with a warning
at drivers/net/wireless/ti/wlcore/sdio.c:78 wl12xx_sdio_raw_read.
The failed call comes from wl12xx_get_mac() that wlcore_nvs_cb() calls
after request_firmware_work_func().

After the error, no wireless interface is created. Reloading the wl12xx
module makes the interface work.

Turns out the wlan controller can be in a low-power ELP state after the
boot from the bootloader or kexec, and needs to be woken up first.

Let's wake the hardware and add a sleep after that similar to
wl12xx_pre_boot() is already doing.

Note that a similar issue could exist for wl18xx, but I have not seen it
so far. And a search for wl18xx_get_mac and wl12xx_sdio_raw_read did not
produce similar errors.

Cc: Carl Philipp Klemm <philipp@uvos.xyz>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210603062814.19464-1-tony@atomide.com
---
 drivers/net/wireless/ti/wl12xx/main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c
index 9d7dbfe7fe0c..c6da0cfb4afb 100644
--- a/drivers/net/wireless/ti/wl12xx/main.c
+++ b/drivers/net/wireless/ti/wl12xx/main.c
@@ -1503,6 +1503,13 @@ static int wl12xx_get_fuse_mac(struct wl1271 *wl)
 	u32 mac1, mac2;
 	int ret;
 
+	/* Device may be in ELP from the bootloader or kexec */
+	ret = wlcore_write32(wl, WL12XX_WELP_ARM_COMMAND, WELP_ARM_COMMAND_VAL);
+	if (ret < 0)
+		goto out;
+
+	usleep_range(500000, 700000);
+
 	ret = wlcore_set_partition(wl, &wl->ptable[PART_DRPW]);
 	if (ret < 0)
 		goto out;

From 8c77f1cb84585efba108df5e67ecc5cbbceef0d9 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:04:33 +0800
Subject: [PATCH 1675/3292] dm writecache: use list_move instead of
 list_del/list_add in writecache_writeback()

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index e18bbfd3289b..56179a21db0e 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1864,15 +1864,13 @@ restart:
 			if (unlikely(read_original_sector(wc, f) ==
 				     read_original_sector(wc, e))) {
 				BUG_ON(!f->write_in_progress);
-				list_del(&e->lru);
-				list_add(&e->lru, &skipped);
+				list_move(&e->lru, &skipped);
 				cond_resched();
 				continue;
 			}
 		}
 		wc->writeback_size++;
-		list_del(&e->lru);
-		list_add(&e->lru, &wbl.list);
+		list_move(&e->lru, &wbl.list);
 		wbl.size++;
 		e->write_in_progress = true;
 		e->wc_list_contiguous = 1;
@@ -1907,8 +1905,7 @@ restart:
 			//	break;
 
 			wc->writeback_size++;
-			list_del(&g->lru);
-			list_add(&g->lru, &wbl.list);
+			list_move(&g->lru, &wbl.list);
 			wbl.size++;
 			g->write_in_progress = true;
 			g->wc_list_contiguous = BIO_MAX_VECS;

From 913112398d5e3e64eb3a45b8a0f1c196daed8f0b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 3 Jun 2021 12:54:14 +0100
Subject: [PATCH 1676/3292] wlcore: tidy up use of fw_log.actual_buff_size

Tidy up the use of fw_log.actual_buff_size - rather than reading it
multiple times and applying the endian conversion, read it once into
actual_len and use that instead.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/E1lolvS-0003Ql-NJ@rmk-PC.armlinux.org.uk
---
 drivers/net/wireless/ti/wlcore/event.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c
index a68bbadae043..a5c261affdc7 100644
--- a/drivers/net/wireless/ti/wlcore/event.c
+++ b/drivers/net/wireless/ti/wlcore/event.c
@@ -40,7 +40,7 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 	buffer = kzalloc(WL18XX_LOGGER_SDIO_BUFF_MAX, GFP_KERNEL);
 	if (!buffer) {
 		wl1271_error("Fail to allocate fw logger memory");
-		fw_log.actual_buff_size = cpu_to_le32(0);
+		actual_len = 0;
 		goto out;
 	}
 
@@ -49,30 +49,30 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 	if (ret < 0) {
 		wl1271_error("Fail to read logger buffer, error_id = %d",
 			     ret);
-		fw_log.actual_buff_size = cpu_to_le32(0);
+		actual_len = 0;
 		goto free_out;
 	}
 
 	memcpy(&fw_log, buffer, sizeof(fw_log));
 
-	if (le32_to_cpu(fw_log.actual_buff_size) == 0)
+	actual_len = le32_to_cpu(fw_log.actual_buff_size);
+	if (actual_len == 0)
 		goto free_out;
 
-	actual_len = le32_to_cpu(fw_log.actual_buff_size);
 	start_loc = (le32_to_cpu(fw_log.buff_read_ptr) -
 			internal_fw_addrbase) - addr;
 	end_buff_addr += le32_to_cpu(fw_log.max_buff_size);
 	available_len = end_buff_addr -
 			(le32_to_cpu(fw_log.buff_read_ptr) -
 				 internal_fw_addrbase);
-	actual_len = min(actual_len, available_len);
-	len = actual_len;
 
+	/* Copy initial part from end of ring buffer */
+	len = min(actual_len, available_len);
 	wl12xx_copy_fwlog(wl, &buffer[start_loc], len);
-	clear_addr = addr + start_loc + le32_to_cpu(fw_log.actual_buff_size) +
-			internal_fw_addrbase;
+	clear_addr = addr + start_loc + actual_len + internal_fw_addrbase;
 
-	len = le32_to_cpu(fw_log.actual_buff_size) - len;
+	/* Copy any remaining part from beginning of ring buffer */
+	len = actual_len - len;
 	if (len) {
 		wl12xx_copy_fwlog(wl,
 				  &buffer[WL18XX_LOGGER_BUFF_OFFSET],
@@ -93,7 +93,7 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 free_out:
 	kfree(buffer);
 out:
-	return le32_to_cpu(fw_log.actual_buff_size);
+	return actual_len;
 }
 EXPORT_SYMBOL_GPL(wlcore_event_fw_logger);
 

From 98e94771cadcef2952d3aa64e72b2b8fecef465b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 3 Jun 2021 12:54:19 +0100
Subject: [PATCH 1677/3292] wlcore: make some of the fwlog calculations more
 obvious

Make some of the fwlog calculations more obvious by calculating bits
that get used and documenting what they are. Validate the read pointer
while we're at it to ensure we do not overflow the data block we have
allocated and read.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/E1lolvX-0003R3-RE@rmk-PC.armlinux.org.uk
---
 drivers/net/wireless/ti/wlcore/event.c | 43 +++++++++++++++++---------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c
index a5c261affdc7..875198fb1480 100644
--- a/drivers/net/wireless/ti/wlcore/event.c
+++ b/drivers/net/wireless/ti/wlcore/event.c
@@ -29,11 +29,13 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 	u8  *buffer;
 	u32 internal_fw_addrbase = WL18XX_DATA_RAM_BASE_ADDRESS;
 	u32 addr = WL18XX_LOGGER_SDIO_BUFF_ADDR;
-	u32 end_buff_addr = WL18XX_LOGGER_SDIO_BUFF_ADDR +
-				WL18XX_LOGGER_BUFF_OFFSET;
+	u32 addr_ptr;
+	u32 buff_start_ptr;
+	u32 buff_read_ptr;
+	u32 buff_end_ptr;
 	u32 available_len;
 	u32 actual_len;
-	u32 clear_addr;
+	u32 clear_ptr;
 	size_t len;
 	u32 start_loc;
 
@@ -59,17 +61,29 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 	if (actual_len == 0)
 		goto free_out;
 
-	start_loc = (le32_to_cpu(fw_log.buff_read_ptr) -
-			internal_fw_addrbase) - addr;
-	end_buff_addr += le32_to_cpu(fw_log.max_buff_size);
-	available_len = end_buff_addr -
-			(le32_to_cpu(fw_log.buff_read_ptr) -
-				 internal_fw_addrbase);
+	/* Calculate the internal pointer to the fwlog structure */
+	addr_ptr = internal_fw_addrbase + addr;
 
-	/* Copy initial part from end of ring buffer */
+	/* Calculate the internal pointers to the start and end of log buffer */
+	buff_start_ptr = addr_ptr + WL18XX_LOGGER_BUFF_OFFSET;
+	buff_end_ptr = buff_start_ptr + le32_to_cpu(fw_log.max_buff_size);
+
+	/* Read the read pointer and validate it */
+	buff_read_ptr = le32_to_cpu(fw_log.buff_read_ptr);
+	if (buff_read_ptr < buff_start_ptr ||
+	    buff_read_ptr >= buff_end_ptr) {
+		wl1271_error("buffer read pointer out of bounds: %x not in (%x-%x)\n",
+			     buff_read_ptr, buff_start_ptr, buff_end_ptr);
+		goto free_out;
+	}
+
+	start_loc = buff_read_ptr - addr_ptr;
+	available_len = buff_end_ptr - buff_read_ptr;
+
+	/* Copy initial part up to the end of ring buffer */
 	len = min(actual_len, available_len);
 	wl12xx_copy_fwlog(wl, &buffer[start_loc], len);
-	clear_addr = addr + start_loc + actual_len + internal_fw_addrbase;
+	clear_ptr = addr_ptr + start_loc + actual_len;
 
 	/* Copy any remaining part from beginning of ring buffer */
 	len = actual_len - len;
@@ -77,14 +91,13 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 		wl12xx_copy_fwlog(wl,
 				  &buffer[WL18XX_LOGGER_BUFF_OFFSET],
 				  len);
-		clear_addr = addr + WL18XX_LOGGER_BUFF_OFFSET + len +
-				internal_fw_addrbase;
+		clear_ptr = addr_ptr + WL18XX_LOGGER_BUFF_OFFSET + len;
 	}
 
 	/* double check that clear address and write pointer are the same */
-	if (clear_addr != le32_to_cpu(fw_log.buff_write_ptr)) {
+	if (clear_ptr != le32_to_cpu(fw_log.buff_write_ptr)) {
 		wl1271_error("Calculate of clear addr Clear = %x, write = %x",
-			     clear_addr, le32_to_cpu(fw_log.buff_write_ptr));
+			     clear_ptr, le32_to_cpu(fw_log.buff_write_ptr));
 	}
 
 	/* indicate FW about Clear buffer */

From 87ab9cbaee7c11dd12587d60fb16f3ec22c1a5b3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 3 Jun 2021 12:54:24 +0100
Subject: [PATCH 1678/3292] wlcore: fix bug reading fwlog

With logging enabled, it has been observed that the driver spews
messages such as:

wlcore: ERROR Calculate of clear addr Clear = 204025b0, write = 204015b0

The problem occurs because 204025b0 is the end of the buffer, and
204015b0 is the beginning, and the calculation for "clear"ing the
buffer does not take into account that if we read to the very end
of the ring buffer, we are actually at the beginning of the buffer.

Fix this.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/E1lolvc-0003RM-VE@rmk-PC.armlinux.org.uk
---
 drivers/net/wireless/ti/wlcore/event.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c
index 875198fb1480..8a67a708c96e 100644
--- a/drivers/net/wireless/ti/wlcore/event.c
+++ b/drivers/net/wireless/ti/wlcore/event.c
@@ -84,6 +84,8 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 	len = min(actual_len, available_len);
 	wl12xx_copy_fwlog(wl, &buffer[start_loc], len);
 	clear_ptr = addr_ptr + start_loc + actual_len;
+	if (clear_ptr == buff_end_ptr)
+		clear_ptr = buff_start_ptr;
 
 	/* Copy any remaining part from beginning of ring buffer */
 	len = actual_len - len;

From 01de6fe49ca406d4e44c6e4327a7f49d240113c1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 3 Jun 2021 12:54:30 +0100
Subject: [PATCH 1679/3292] wlcore: fix read pointer update

When reading the fw_log structure from the device's memory, we could
race with the firmware updating the actual_buff_size and buff_write_ptr
members of this structure. This would lead to bytes being dropped from
the log.

Fix this by writing back the actual - now fixed - clear_ptr which
reflects where we read up to in the buffer.

This also means that we must not check that the clear_ptr matches the
current write pointer, so remove that check.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/E1lolvi-0003Ri-39@rmk-PC.armlinux.org.uk
---
 drivers/net/wireless/ti/wlcore/event.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c
index 8a67a708c96e..46ab69eab26a 100644
--- a/drivers/net/wireless/ti/wlcore/event.c
+++ b/drivers/net/wireless/ti/wlcore/event.c
@@ -96,15 +96,9 @@ int wlcore_event_fw_logger(struct wl1271 *wl)
 		clear_ptr = addr_ptr + WL18XX_LOGGER_BUFF_OFFSET + len;
 	}
 
-	/* double check that clear address and write pointer are the same */
-	if (clear_ptr != le32_to_cpu(fw_log.buff_write_ptr)) {
-		wl1271_error("Calculate of clear addr Clear = %x, write = %x",
-			     clear_ptr, le32_to_cpu(fw_log.buff_write_ptr));
-	}
-
-	/* indicate FW about Clear buffer */
+	/* Update the read pointer */
 	ret = wlcore_write32(wl, addr + WL18XX_LOGGER_READ_POINT_OFFSET,
-			     fw_log.buff_write_ptr);
+			     clear_ptr);
 free_out:
 	kfree(buffer);
 out:

From 3f1639f8f96c4c60ccf737b02c61eb7c59355a5b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 31 May 2021 14:06:30 -0700
Subject: [PATCH 1680/3292] MAINTAINERS: Add Clang CFI section

Sami is the primary developer and Kees has been chauffeuring the patches
to Linus so ensure they are always kept in the loop about proposed
changes to these files. Add Nick and I as reviewers so we are CC'd as
well.

Fixes: cf68fffb66d6 ("add support for Clang CFI")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210531210629.864888-1-nathan@kernel.org
---
 MAINTAINERS | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 008fcad7ac00..4ddf370572d3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4436,6 +4436,18 @@ F:	include/linux/compiler-clang.h
 F:	scripts/clang-tools/
 K:	\b(?i:clang|llvm)\b
 
+CLANG CONTROL FLOW INTEGRITY SUPPORT
+M:	Sami Tolvanen <samitolvanen@google.com>
+M:	Kees Cook <keescook@chromium.org>
+R:	Nathan Chancellor <nathan@kernel.org>
+R:	Nick Desaulniers <ndesaulniers@google.com>
+L:	clang-built-linux@googlegroups.com
+S:	Supported
+B:	https://github.com/ClangBuiltLinux/linux/issues
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/clang/features
+F:	include/linux/cfi.h
+F:	kernel/cfi.c
+
 CLEANCACHE API
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 L:	linux-kernel@vger.kernel.org

From 590e8a082a5772071d7bcfea2b8e5a2453cecad2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 2 Jun 2021 16:37:01 +0100
Subject: [PATCH 1681/3292] CFI: Move function_nocfi() into compiler.h

Currently the common definition of function_nocfi() is provided by
<linux/mm.h>, and architectures are expected to provide a definition in
<asm/memory.h>. Due to header dependencies, this can make it hard to use
function_nocfi() in low-level headers.

As function_nocfi() has no dependency on any mm code, nor on any memory
definitions, it doesn't need to live in <linux/mm.h> or <asm/memory.h>.
Generally, it would make more sense for it to live in
<linux/compiler.h>, where an architecture can override it in
<asm/compiler.h>.

Move the definitions accordingly.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210602153701.35957-1-mark.rutland@arm.com
---
 arch/arm64/include/asm/compiler.h | 16 ++++++++++++++++
 arch/arm64/include/asm/memory.h   | 16 ----------------
 include/linux/compiler.h          | 10 ++++++++++
 include/linux/mm.h                | 10 ----------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
index 6fb2e6bcc392..dc3ea4080e2e 100644
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@@ -23,4 +23,20 @@
 #define __builtin_return_address(val)					\
 	(void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val)))
 
+#ifdef CONFIG_CFI_CLANG
+/*
+ * With CONFIG_CFI_CLANG, the compiler replaces function address
+ * references with the address of the function's CFI jump table
+ * entry. The function_nocfi macro always returns the address of the
+ * actual function instead.
+ */
+#define function_nocfi(x) ({						\
+	void *addr;							\
+	asm("adrp %0, " __stringify(x) "\n\t"				\
+	    "add  %0, %0, :lo12:" __stringify(x)			\
+	    : "=r" (addr));						\
+	addr;								\
+})
+#endif
+
 #endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 87b90dc27a43..ced44ca3e175 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -323,22 +323,6 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define virt_to_pfn(x)		__phys_to_pfn(__virt_to_phys((unsigned long)(x)))
 #define sym_to_pfn(x)		__phys_to_pfn(__pa_symbol(x))
 
-#ifdef CONFIG_CFI_CLANG
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function address
- * references with the address of the function's CFI jump table
- * entry. The function_nocfi macro always returns the address of the
- * actual function instead.
- */
-#define function_nocfi(x) ({						\
-	void *addr;							\
-	asm("adrp %0, " __stringify(x) "\n\t"				\
-	    "add  %0, %0, :lo12:" __stringify(x)			\
-	    : "=r" (addr));						\
-	addr;								\
-})
-#endif
-
 /*
  *  virt_to_page(x)	convert a _valid_ virtual address to struct page *
  *  virt_addr_valid(x)	indicates whether a virtual address is valid
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index df5b405e6305..099e529a5d25 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -213,6 +213,16 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	__v;								\
 })
 
+/*
+ * With CONFIG_CFI_CLANG, the compiler replaces function addresses in
+ * instrumented C code with jump table addresses. Architectures that
+ * support CFI can define this macro to return the actual function address
+ * when needed.
+ */
+#ifndef function_nocfi
+#define function_nocfi(x) (x)
+#endif
+
 #endif /* __KERNEL__ */
 
 /*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c274f75efcf9..b8c28b10f25d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -124,16 +124,6 @@ extern int mmap_rnd_compat_bits __read_mostly;
 #define lm_alias(x)	__va(__pa_symbol(x))
 #endif
 
-/*
- * With CONFIG_CFI_CLANG, the compiler replaces function addresses in
- * instrumented C code with jump table addresses. Architectures that
- * support CFI can define this macro to return the actual function address
- * when needed.
- */
-#ifndef function_nocfi
-#define function_nocfi(x) (x)
-#endif
-
 /*
  * To prevent common memory management code establishing
  * a zero page mapping on a read fault.

From 583bfd484bcc85e9371e7205fa9e827c18ae34fb Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 29 Apr 2021 16:26:12 -0700
Subject: [PATCH 1682/3292] x86, lto: Enable Clang LTO for 32-bit as well

Commit b33fff07e3e3 ("x86, build: allow LTO to be selected") enabled
support for LTO for x86_64 but 32-bit works fine as well.

I tested the following config combinations:

* i386_defconfig + CONFIG_LTO_CLANG_FULL=y

* i386_defconfig + CONFIG_LTO_CLANG_THIN=y

* ARCH=i386 allmodconfig + CONFIG_LTO_CLANG_THIN=y

with LLVM 11.1.0, 12.0.0, and 13.0.0 from git without any build
failures. The defconfigs boot in QEMU with no new warnings.

Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210429232611.3966964-1-nathan@kernel.org
---
 arch/x86/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0045e1b44190..12fa0d7bfa64 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -103,8 +103,8 @@ config X86
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
-	select ARCH_SUPPORTS_LTO_CLANG		if X86_64
-	select ARCH_SUPPORTS_LTO_CLANG_THIN	if X86_64
+	select ARCH_SUPPORTS_LTO_CLANG
+	select ARCH_SUPPORTS_LTO_CLANG_THIN
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_MEMTEST
 	select ARCH_USE_QUEUED_RWLOCKS

From 2d7ff2d83cac1ca8742aa02cac0516e58e8c65c8 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Sat, 12 Jun 2021 10:32:15 -0600
Subject: [PATCH 1683/3292] nexthops: Add selftests for cleanup of known bad
 route add

Test cleanup path for routes usinig nexthop objects before the
reference is taken on the nexthop. Specifically, bad metric for
ipv4 and ipv6 and source routing for ipv6.

Selftests that correspond to the recent bug fix:
    821bbf79fe46 ("ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions")

Signed-off-by: David Ahern <dsahern@kernel.org>
Cc: Coco Li <lixiaoyan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 49774a8a7736..0d293391e9a4 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -925,6 +925,14 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
 	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
 
+	# route can not use prefsrc with nexthops
+	run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1"
+	log_test $? 2 "IPv6 route can not use src routing with external nexthop"
+
+	# check cleanup path on invalid metric
+	run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo"
+	log_test $? 2 "IPv6 route with invalid metric"
+
 	# rpfilter and default route
 	$IP nexthop flush >/dev/null 2>&1
 	run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
@@ -1366,6 +1374,10 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
 	log_test $? 2 "Nexthop replace with invalid scope for existing route"
 
+	# check cleanup path on invalid metric
+	run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo"
+	log_test $? 2 "IPv4 route with invalid metric"
+
 	#
 	# add route with nexthop and check traffic
 	#

From 9917060fc30a40f1ef899dc8c61ae964a9a4407c Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Sun, 13 Jun 2021 16:23:05 +0530
Subject: [PATCH 1684/3292] octeontx2-pf: Cleanup flow rule management

Current MCAM allocation scheme allocates a single lot of
MCAM entries for ntuple filters, unicast filters and VF VLAN
rules. This patch attempts to cleanup this logic by segregating
MCAM rule allocation and management for Ntuple rules and unicast,
VF VLAN rules. This segregation will result in reusing most of
the logic for supporting ntuple filters for VF devices.

Also added debug messages for MCAM entry allocation failures.

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   |  11 +-
 .../marvell/octeontx2/nic/otx2_common.h       |  30 +--
 .../marvell/octeontx2/nic/otx2_flows.c        | 187 ++++++++++++++----
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |   8 +-
 .../ethernet/marvell/octeontx2/nic/otx2_tc.c  |   4 +-
 5 files changed, 179 insertions(+), 61 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 5c2bd4337170..ef833fe39114 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -2537,8 +2537,11 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu,
 	rsp->free_count = 0;
 
 	/* Check if ref_entry is within range */
-	if (req->priority && req->ref_entry >= mcam->bmap_entries)
+	if (req->priority && req->ref_entry >= mcam->bmap_entries) {
+		dev_err(rvu->dev, "%s: reference entry %d is out of range\n",
+			__func__, req->ref_entry);
 		return NPC_MCAM_INVALID_REQ;
+	}
 
 	/* ref_entry can't be '0' if requested priority is high.
 	 * Can't be last entry if requested priority is low.
@@ -2551,8 +2554,12 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu,
 	/* Since list of allocated indices needs to be sent to requester,
 	 * max number of non-contiguous entries per mbox msg is limited.
 	 */
-	if (!req->contig && req->count > NPC_MAX_NONCONTIG_ENTRIES)
+	if (!req->contig && req->count > NPC_MAX_NONCONTIG_ENTRIES) {
+		dev_err(rvu->dev,
+			"%s: %d Non-contiguous MCAM entries requested is morethan max (%d) allowed\n",
+			__func__, req->count, NPC_MAX_NONCONTIG_ENTRIES);
 		return NPC_MCAM_INVALID_REQ;
+	}
 
 	/* Alloc request from PFFUNC with no NIXLF attached should be denied */
 	if (!is_nixlf_attached(rvu, pcifunc))
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 543aee726fbe..e5616d466236 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -267,24 +267,26 @@ struct otx2_mac_table {
 
 struct otx2_flow_config {
 	u16			entry[NPC_MAX_NONCONTIG_ENTRIES];
-	u32			nr_flows;
-#define OTX2_MAX_NTUPLE_FLOWS	32
-#define OTX2_MAX_UNICAST_FLOWS	8
-#define OTX2_MAX_VLAN_FLOWS	1
-#define OTX2_MAX_TC_FLOWS	OTX2_MAX_NTUPLE_FLOWS
-#define OTX2_MCAM_COUNT		(OTX2_MAX_NTUPLE_FLOWS + \
+	u16			*flow_ent;
+	u16			*def_ent;
+	u16			nr_flows;
+#define OTX2_DEFAULT_FLOWCOUNT		16
+#define OTX2_MAX_UNICAST_FLOWS		8
+#define OTX2_MAX_VLAN_FLOWS		1
+#define OTX2_MAX_TC_FLOWS	OTX2_DEFAULT_FLOWCOUNT
+#define OTX2_MCAM_COUNT		(OTX2_DEFAULT_FLOWCOUNT + \
 				 OTX2_MAX_UNICAST_FLOWS + \
 				 OTX2_MAX_VLAN_FLOWS)
-	u32			ntuple_offset;
-	u32			unicast_offset;
-	u32			rx_vlan_offset;
-	u32			vf_vlan_offset;
-#define OTX2_PER_VF_VLAN_FLOWS	2 /* rx+tx per VF */
+	u16			ntuple_offset;
+	u16			unicast_offset;
+	u16			rx_vlan_offset;
+	u16			vf_vlan_offset;
+#define OTX2_PER_VF_VLAN_FLOWS	2 /* Rx + Tx per VF */
 #define OTX2_VF_VLAN_RX_INDEX	0
 #define OTX2_VF_VLAN_TX_INDEX	1
-	u32			tc_flower_offset;
-	u32                     ntuple_max_flows;
-	u32			tc_max_flows;
+	u16			tc_flower_offset;
+	u16                     ntuple_max_flows;
+	u16			tc_max_flows;
 	struct list_head	flow_list;
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 80b769079d51..8c97106bdd1c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -20,13 +20,125 @@ struct otx2_flow {
 	int vf;
 };
 
+static void otx2_clear_ntuple_flow_info(struct otx2_nic *pfvf, struct otx2_flow_config *flow_cfg)
+{
+	devm_kfree(pfvf->dev, flow_cfg->flow_ent);
+	flow_cfg->flow_ent = NULL;
+	flow_cfg->ntuple_max_flows = 0;
+	flow_cfg->tc_max_flows = 0;
+}
+
+static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf)
+{
+	struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
+	struct npc_mcam_free_entry_req *req;
+	int ent, err;
+
+	if (!flow_cfg->ntuple_max_flows)
+		return 0;
+
+	mutex_lock(&pfvf->mbox.lock);
+	for (ent = 0; ent < flow_cfg->ntuple_max_flows; ent++) {
+		req = otx2_mbox_alloc_msg_npc_mcam_free_entry(&pfvf->mbox);
+		if (!req)
+			break;
+
+		req->entry = flow_cfg->flow_ent[ent];
+
+		/* Send message to AF to free MCAM entries */
+		err = otx2_sync_mbox_msg(&pfvf->mbox);
+		if (err)
+			break;
+	}
+	mutex_unlock(&pfvf->mbox.lock);
+	otx2_clear_ntuple_flow_info(pfvf, flow_cfg);
+	return 0;
+}
+
+static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
+{
+	struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
+	struct npc_mcam_alloc_entry_req *req;
+	struct npc_mcam_alloc_entry_rsp *rsp;
+	int ent, allocated = 0;
+
+	/* Free current ones and allocate new ones with requested count */
+	otx2_free_ntuple_mcam_entries(pfvf);
+
+	if (!count)
+		return 0;
+
+	flow_cfg->flow_ent = devm_kmalloc_array(pfvf->dev, count,
+						sizeof(u16), GFP_KERNEL);
+	if (!flow_cfg->flow_ent)
+		return -ENOMEM;
+
+	mutex_lock(&pfvf->mbox.lock);
+
+	/* In a single request a max of NPC_MAX_NONCONTIG_ENTRIES MCAM entries
+	 * can only be allocated.
+	 */
+	while (allocated < count) {
+		req = otx2_mbox_alloc_msg_npc_mcam_alloc_entry(&pfvf->mbox);
+		if (!req)
+			goto exit;
+
+		req->contig = false;
+		req->count = (count - allocated) > NPC_MAX_NONCONTIG_ENTRIES ?
+				NPC_MAX_NONCONTIG_ENTRIES : count - allocated;
+		req->priority = NPC_MCAM_HIGHER_PRIO;
+		req->ref_entry = flow_cfg->def_ent[0];
+
+		/* Send message to AF */
+		if (otx2_sync_mbox_msg(&pfvf->mbox))
+			goto exit;
+
+		rsp = (struct npc_mcam_alloc_entry_rsp *)otx2_mbox_get_rsp
+			(&pfvf->mbox.mbox, 0, &req->hdr);
+
+		for (ent = 0; ent < rsp->count; ent++)
+			flow_cfg->flow_ent[ent + allocated] = rsp->entry_list[ent];
+
+		allocated += rsp->count;
+
+		/* If this request is not fulfilled, no need to send
+		 * further requests.
+		 */
+		if (rsp->count != req->count)
+			break;
+	}
+
+exit:
+	mutex_unlock(&pfvf->mbox.lock);
+
+	flow_cfg->ntuple_offset = 0;
+	flow_cfg->ntuple_max_flows = allocated;
+	flow_cfg->tc_max_flows = allocated;
+
+	if (allocated != count)
+		netdev_info(pfvf->netdev,
+			    "Unable to allocate %d MCAM entries for ntuple, got %d\n",
+			    count, allocated);
+
+	return allocated;
+}
+
 int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
 {
 	struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
 	struct npc_mcam_alloc_entry_req *req;
 	struct npc_mcam_alloc_entry_rsp *rsp;
 	int vf_vlan_max_flows;
-	int i;
+	int ent, count;
+
+	vf_vlan_max_flows = pfvf->total_vfs * OTX2_PER_VF_VLAN_FLOWS;
+	count = OTX2_MAX_UNICAST_FLOWS +
+			OTX2_MAX_VLAN_FLOWS + vf_vlan_max_flows;
+
+	flow_cfg->def_ent = devm_kmalloc_array(pfvf->dev, count,
+					       sizeof(u16), GFP_KERNEL);
+	if (!flow_cfg->def_ent)
+		return -ENOMEM;
 
 	mutex_lock(&pfvf->mbox.lock);
 
@@ -36,9 +148,8 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
 		return -ENOMEM;
 	}
 
-	vf_vlan_max_flows = pfvf->total_vfs * OTX2_PER_VF_VLAN_FLOWS;
 	req->contig = false;
-	req->count = OTX2_MCAM_COUNT + vf_vlan_max_flows;
+	req->count = count;
 
 	/* Send message to AF */
 	if (otx2_sync_mbox_msg(&pfvf->mbox)) {
@@ -51,37 +162,36 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
 
 	if (rsp->count != req->count) {
 		netdev_info(pfvf->netdev,
-			    "Unable to allocate %d MCAM entries, got %d\n",
-			    req->count, rsp->count);
-		/* support only ntuples here */
-		flow_cfg->ntuple_max_flows = rsp->count;
-		flow_cfg->ntuple_offset = 0;
-		pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
-		flow_cfg->tc_max_flows = flow_cfg->ntuple_max_flows;
-		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
-	} else {
-		flow_cfg->vf_vlan_offset = 0;
-		flow_cfg->ntuple_offset = flow_cfg->vf_vlan_offset +
-						vf_vlan_max_flows;
-		flow_cfg->tc_flower_offset = flow_cfg->ntuple_offset;
-		flow_cfg->unicast_offset = flow_cfg->ntuple_offset +
-						OTX2_MAX_NTUPLE_FLOWS;
-		flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset +
-						OTX2_MAX_UNICAST_FLOWS;
-		pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
-		pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT;
-		pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT;
-		pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT;
-		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
+			    "Unable to allocate MCAM entries for ucast, vlan and vf_vlan\n");
+		mutex_unlock(&pfvf->mbox.lock);
+		devm_kfree(pfvf->dev, flow_cfg->def_ent);
+		return 0;
 	}
 
-	for (i = 0; i < rsp->count; i++)
-		flow_cfg->entry[i] = rsp->entry_list[i];
+	for (ent = 0; ent < rsp->count; ent++)
+		flow_cfg->def_ent[ent] = rsp->entry_list[ent];
+
+	flow_cfg->vf_vlan_offset = 0;
+	flow_cfg->unicast_offset = vf_vlan_max_flows;
+	flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset +
+					OTX2_MAX_UNICAST_FLOWS;
+	pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT;
+	pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT;
+	pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT;
 
 	pfvf->flags |= OTX2_FLAG_MCAM_ENTRIES_ALLOC;
-
 	mutex_unlock(&pfvf->mbox.lock);
 
+	/* Allocate entries for Ntuple filters */
+	count = otx2_alloc_ntuple_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT);
+	if (count <= 0) {
+		otx2_clear_ntuple_flow_info(pfvf, flow_cfg);
+		return 0;
+	}
+
+	pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
+	pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
+
 	return 0;
 }
 
@@ -96,13 +206,14 @@ int otx2_mcam_flow_init(struct otx2_nic *pf)
 
 	INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
 
-	pf->flow_cfg->ntuple_max_flows = OTX2_MAX_NTUPLE_FLOWS;
-	pf->flow_cfg->tc_max_flows = pf->flow_cfg->ntuple_max_flows;
-
 	err = otx2_alloc_mcam_entries(pf);
 	if (err)
 		return err;
 
+	/* Check if MCAM entries are allocate or not */
+	if (!(pf->flags & OTX2_FLAG_UCAST_FLTR_SUPPORT))
+		return 0;
+
 	pf->mac_table = devm_kzalloc(pf->dev, sizeof(struct otx2_mac_table)
 					* OTX2_MAX_UNICAST_FLOWS, GFP_KERNEL);
 	if (!pf->mac_table)
@@ -146,7 +257,7 @@ static int otx2_do_add_macfilter(struct otx2_nic *pf, const u8 *mac)
 		ether_addr_copy(pf->mac_table[i].addr, mac);
 		pf->mac_table[i].inuse = true;
 		pf->mac_table[i].mcam_entry =
-			flow_cfg->entry[i + flow_cfg->unicast_offset];
+			flow_cfg->def_ent[i + flow_cfg->unicast_offset];
 		req->entry =  pf->mac_table[i].mcam_entry;
 		break;
 	}
@@ -732,8 +843,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
 		if (!flow)
 			return -ENOMEM;
 		flow->location = fsp->location;
-		flow->entry = flow_cfg->entry[flow_cfg->ntuple_offset +
-						flow->location];
+		flow->entry = flow_cfg->flow_ent[flow->location];
 		new = true;
 	}
 	/* struct copy */
@@ -837,9 +947,8 @@ int otx2_destroy_ntuple_flows(struct otx2_nic *pfvf)
 		return -ENOMEM;
 	}
 
-	req->start = flow_cfg->entry[flow_cfg->ntuple_offset];
-	req->end   = flow_cfg->entry[flow_cfg->ntuple_offset +
-				      flow_cfg->ntuple_max_flows - 1];
+	req->start = flow_cfg->flow_ent[0];
+	req->end   = flow_cfg->flow_ent[flow_cfg->ntuple_max_flows - 1];
 	err = otx2_sync_mbox_msg(&pfvf->mbox);
 	mutex_unlock(&pfvf->mbox.lock);
 
@@ -906,7 +1015,7 @@ int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf)
 		return -ENOMEM;
 	}
 
-	req->entry = flow_cfg->entry[flow_cfg->rx_vlan_offset];
+	req->entry = flow_cfg->def_ent[flow_cfg->rx_vlan_offset];
 	req->intf = NIX_INTF_RX;
 	ether_addr_copy(req->packet.dmac, pfvf->netdev->dev_addr);
 	eth_broadcast_addr((u8 *)&req->mask.dmac);
@@ -935,7 +1044,7 @@ static int otx2_delete_rxvlan_offload_flow(struct otx2_nic *pfvf)
 		return -ENOMEM;
 	}
 
-	req->entry = flow_cfg->entry[flow_cfg->rx_vlan_offset];
+	req->entry = flow_cfg->def_ent[flow_cfg->rx_vlan_offset];
 	/* Send message to AF */
 	err = otx2_sync_mbox_msg(&pfvf->mbox);
 	mutex_unlock(&pfvf->mbox.lock);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 82b53e72268f..65f505b07b5d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -2109,7 +2109,7 @@ static int otx2_do_set_vf_vlan(struct otx2_nic *pf, int vf, u16 vlan, u8 qos,
 		}
 		idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_RX_INDEX);
 		del_req->entry =
-			flow_cfg->entry[flow_cfg->vf_vlan_offset + idx];
+			flow_cfg->def_ent[flow_cfg->vf_vlan_offset + idx];
 		err = otx2_sync_mbox_msg(&pf->mbox);
 		if (err)
 			goto out;
@@ -2122,7 +2122,7 @@ static int otx2_do_set_vf_vlan(struct otx2_nic *pf, int vf, u16 vlan, u8 qos,
 		}
 		idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_TX_INDEX);
 		del_req->entry =
-			flow_cfg->entry[flow_cfg->vf_vlan_offset + idx];
+			flow_cfg->def_ent[flow_cfg->vf_vlan_offset + idx];
 		err = otx2_sync_mbox_msg(&pf->mbox);
 
 		goto out;
@@ -2136,7 +2136,7 @@ static int otx2_do_set_vf_vlan(struct otx2_nic *pf, int vf, u16 vlan, u8 qos,
 	}
 
 	idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_RX_INDEX);
-	req->entry = flow_cfg->entry[flow_cfg->vf_vlan_offset + idx];
+	req->entry = flow_cfg->def_ent[flow_cfg->vf_vlan_offset + idx];
 	req->packet.vlan_tci = htons(vlan);
 	req->mask.vlan_tci = htons(VLAN_VID_MASK);
 	/* af fills the destination mac addr */
@@ -2187,7 +2187,7 @@ static int otx2_do_set_vf_vlan(struct otx2_nic *pf, int vf, u16 vlan, u8 qos,
 
 	eth_zero_addr((u8 *)&req->mask.dmac);
 	idx = ((vf * OTX2_PER_VF_VLAN_FLOWS) + OTX2_VF_VLAN_TX_INDEX);
-	req->entry = flow_cfg->entry[flow_cfg->vf_vlan_offset + idx];
+	req->entry = flow_cfg->def_ent[flow_cfg->vf_vlan_offset + idx];
 	req->features = BIT_ULL(NPC_DMAC);
 	req->channel = pf->hw.tx_chan_base;
 	req->intf = NIX_INTF_TX;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 51157b283f6f..26712c091c63 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -570,8 +570,8 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap,
 					       nic->flow_cfg->tc_max_flows);
 	req->channel = nic->hw.rx_chan_base;
-	req->entry = nic->flow_cfg->entry[nic->flow_cfg->tc_flower_offset +
-					  nic->flow_cfg->tc_max_flows - new_node->bitpos];
+	req->entry = nic->flow_cfg->flow_ent[nic->flow_cfg->tc_flower_offset +
+				nic->flow_cfg->tc_max_flows - new_node->bitpos];
 	req->intf = NIX_INTF_RX;
 	req->set_cntr = 1;
 	new_node->entry = req->entry;

From ce4f8afd85d6871e3ad76ce737fbbfc0521fa3e4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 13 Jun 2021 14:27:40 +0100
Subject: [PATCH 1685/3292] net: phy: micrel: remove redundant assignment to
 pointer of_node

The pointer of_node is being initialized with a value that is never
read and it is being updated later with a new value inside a do-while
loop. The initialization is redundant and can be removed and the
pointer dev is no longer required and can be removed too.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index a14a00328fa3..93cf9500728f 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -488,8 +488,7 @@ static int ksz9021_load_values_from_of(struct phy_device *phydev,
 
 static int ksz9021_config_init(struct phy_device *phydev)
 {
-	const struct device *dev = &phydev->mdio.dev;
-	const struct device_node *of_node = dev->of_node;
+	const struct device_node *of_node;
 	const struct device *dev_walker;
 
 	/* The Micrel driver has a deprecated option to place phy OF
@@ -711,8 +710,7 @@ static int ksz9031_config_rgmii_delay(struct phy_device *phydev)
 
 static int ksz9031_config_init(struct phy_device *phydev)
 {
-	const struct device *dev = &phydev->mdio.dev;
-	const struct device_node *of_node = dev->of_node;
+	const struct device_node *of_node;
 	static const char *clk_skews[2] = {"rxc-skew-ps", "txc-skew-ps"};
 	static const char *rx_data_skews[4] = {
 		"rxd0-skew-ps", "rxd1-skew-ps",
@@ -907,8 +905,7 @@ static int ksz9131_config_rgmii_delay(struct phy_device *phydev)
 
 static int ksz9131_config_init(struct phy_device *phydev)
 {
-	const struct device *dev = &phydev->mdio.dev;
-	struct device_node *of_node = dev->of_node;
+	struct device_node *of_node;
 	char *clk_skews[2] = {"rxc-skew-psec", "txc-skew-psec"};
 	char *rx_data_skews[4] = {
 		"rxd0-skew-psec", "rxd1-skew-psec",

From b5ec0705ffe891910e8e615b2efb1c2b292e917c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 13 Jun 2021 14:46:36 +0100
Subject: [PATCH 1686/3292] ipv6: fib6: remove redundant initialization of
 variable err

The variable err is being initialized with a value that is never read, the
assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8f9a83314de7..40f3e4f9f33a 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -467,7 +467,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
 static int __net_init fib6_rules_net_init(struct net *net)
 {
 	struct fib_rules_ops *ops;
-	int err = -ENOMEM;
+	int err;
 
 	ops = fib_rules_register(&fib6_rules_ops_template, net);
 	if (IS_ERR(ops))

From b84b53ee8337ca69512d25295961571fa08a219d Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 13 Jun 2021 09:25:22 -0500
Subject: [PATCH 1687/3292] net: qualcomm: rmnet: always expose a few functions

A recent change tidied up some conditional code, avoiding the use of
some #ifdefs.  Unfortunately, if CONFIG_IPV6 was not enabled, it
meant that two functions were referenced but never defined.

The easiest fix is to just define stubs for these functions if
CONFIG_IPV6 is not defined.  This will soon be simplified further
by some other development in the works...

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 75db5b07f8c39 ("net: qualcomm: rmnet: eliminate some ifdefs")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index a6ce22f60a00..39fba3a347fa 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -153,6 +153,14 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 	priv->stats.csum_ok++;
 	return 0;
 }
+#else
+static int
+rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
+			       struct rmnet_map_dl_csum_trailer *csum_trailer,
+			       struct rmnet_priv *priv)
+{
+	return 0;
+}
 #endif
 
 static void rmnet_map_complement_ipv4_txporthdr_csum_field(void *iphdr)
@@ -223,6 +231,13 @@ rmnet_map_ipv6_ul_csum_header(struct ipv6hdr *ipv6hdr,
 
 	rmnet_map_complement_ipv6_txporthdr_csum_field(ipv6hdr);
 }
+#else
+static void
+rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
+			      struct rmnet_map_ul_csum_header *ul_header,
+			      struct sk_buff *skb)
+{
+}
 #endif
 
 static void rmnet_map_v5_checksum_uplink_packet(struct sk_buff *skb,

From ffa85b73c3c4143a8e8087c0930f6c5a6ead8e9f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 13 Jun 2021 14:43:44 +0000
Subject: [PATCH 1688/3292] mld: avoid unnecessary high order page allocation
 in mld_newpack()

If link mtu is too big, mld_newpack() allocates high-order page.
But most mld packets don't need high-order page.
So, it might waste unnecessary pages.
To avoid this, it makes mld_newpack() try to allocate order-0 page.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d36ef9d25e73..54ec163fbafa 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1729,22 +1729,25 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
 
 static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
 {
+	u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT,
+		     2, 0, 0, IPV6_TLV_PADN, 0 };
 	struct net_device *dev = idev->dev;
-	struct net *net = dev_net(dev);
-	struct sock *sk = net->ipv6.igmp_sk;
-	struct sk_buff *skb;
-	struct mld2_report *pmr;
-	struct in6_addr addr_buf;
-	const struct in6_addr *saddr;
 	int hlen = LL_RESERVED_SPACE(dev);
 	int tlen = dev->needed_tailroom;
-	unsigned int size = mtu + hlen + tlen;
+	struct net *net = dev_net(dev);
+	const struct in6_addr *saddr;
+	struct in6_addr addr_buf;
+	struct mld2_report *pmr;
+	struct sk_buff *skb;
+	unsigned int size;
+	struct sock *sk;
 	int err;
-	u8 ra[8] = { IPPROTO_ICMPV6, 0,
-		     IPV6_TLV_ROUTERALERT, 2, 0, 0,
-		     IPV6_TLV_PADN, 0 };
 
-	/* we assume size > sizeof(ra) here */
+	sk = net->ipv6.igmp_sk;
+	/* we assume size > sizeof(ra) here
+	 * Also try to not allocate high-order pages for big MTU
+	 */
+	size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
 	skb = sock_alloc_send_skb(sk, size, 1, &err);
 	if (!skb)
 		return NULL;

From ec4b94f9b37bf028cb9b9c39cd1c1cb5dd1ab40c Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 14 Jun 2021 06:31:18 +0200
Subject: [PATCH 1689/3292] net: phy: micrel: move phy reg offsets to common
 header

Some micrel devices share the same PHY register defines. This patch
moves them to one common header so other drivers can reuse them.
And reuse generic MII_* defines where possible.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c     | 119 ++++++++++++------------
 drivers/net/dsa/microchip/ksz8795_reg.h |  62 ------------
 drivers/net/ethernet/micrel/ksz884x.c   | 105 +++------------------
 include/linux/micrel_phy.h              |  13 +++
 4 files changed, 88 insertions(+), 211 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index ad509a57a945..ba065003623f 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -15,6 +15,7 @@
 #include <linux/phy.h>
 #include <linux/etherdevice.h>
 #include <linux/if_bridge.h>
+#include <linux/micrel_phy.h>
 #include <net/dsa.h>
 #include <net/switchdev.h>
 
@@ -731,88 +732,88 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 	u8 p = phy;
 
 	switch (reg) {
-	case PHY_REG_CTRL:
+	case MII_BMCR:
 		ksz_pread8(dev, p, regs[P_NEG_RESTART_CTRL], &restart);
 		ksz_pread8(dev, p, regs[P_SPEED_STATUS], &speed);
 		ksz_pread8(dev, p, regs[P_FORCE_CTRL], &ctrl);
 		if (restart & PORT_PHY_LOOPBACK)
-			data |= PHY_LOOPBACK;
+			data |= BMCR_LOOPBACK;
 		if (ctrl & PORT_FORCE_100_MBIT)
-			data |= PHY_SPEED_100MBIT;
+			data |= BMCR_SPEED100;
 		if (ksz_is_ksz88x3(dev)) {
 			if ((ctrl & PORT_AUTO_NEG_ENABLE))
-				data |= PHY_AUTO_NEG_ENABLE;
+				data |= BMCR_ANENABLE;
 		} else {
 			if (!(ctrl & PORT_AUTO_NEG_DISABLE))
-				data |= PHY_AUTO_NEG_ENABLE;
+				data |= BMCR_ANENABLE;
 		}
 		if (restart & PORT_POWER_DOWN)
-			data |= PHY_POWER_DOWN;
+			data |= BMCR_PDOWN;
 		if (restart & PORT_AUTO_NEG_RESTART)
-			data |= PHY_AUTO_NEG_RESTART;
+			data |= BMCR_ANRESTART;
 		if (ctrl & PORT_FORCE_FULL_DUPLEX)
-			data |= PHY_FULL_DUPLEX;
+			data |= BMCR_FULLDPLX;
 		if (speed & PORT_HP_MDIX)
-			data |= PHY_HP_MDIX;
+			data |= KSZ886X_BMCR_HP_MDIX;
 		if (restart & PORT_FORCE_MDIX)
-			data |= PHY_FORCE_MDIX;
+			data |= KSZ886X_BMCR_FORCE_MDI;
 		if (restart & PORT_AUTO_MDIX_DISABLE)
-			data |= PHY_AUTO_MDIX_DISABLE;
+			data |= KSZ886X_BMCR_DISABLE_AUTO_MDIX;
 		if (restart & PORT_TX_DISABLE)
-			data |= PHY_TRANSMIT_DISABLE;
+			data |= KSZ886X_BMCR_DISABLE_TRANSMIT;
 		if (restart & PORT_LED_OFF)
-			data |= PHY_LED_DISABLE;
+			data |= KSZ886X_BMCR_DISABLE_LED;
 		break;
-	case PHY_REG_STATUS:
+	case MII_BMSR:
 		ksz_pread8(dev, p, regs[P_LINK_STATUS], &link);
-		data = PHY_100BTX_FD_CAPABLE |
-		       PHY_100BTX_CAPABLE |
-		       PHY_10BT_FD_CAPABLE |
-		       PHY_10BT_CAPABLE |
-		       PHY_AUTO_NEG_CAPABLE;
+		data = BMSR_100FULL |
+		       BMSR_100HALF |
+		       BMSR_10FULL |
+		       BMSR_10HALF |
+		       BMSR_ANEGCAPABLE;
 		if (link & PORT_AUTO_NEG_COMPLETE)
-			data |= PHY_AUTO_NEG_ACKNOWLEDGE;
+			data |= BMSR_ANEGCOMPLETE;
 		if (link & PORT_STAT_LINK_GOOD)
-			data |= PHY_LINK_STATUS;
+			data |= BMSR_LSTATUS;
 		break;
-	case PHY_REG_ID_1:
+	case MII_PHYSID1:
 		data = KSZ8795_ID_HI;
 		break;
-	case PHY_REG_ID_2:
+	case MII_PHYSID2:
 		if (ksz_is_ksz88x3(dev))
 			data = KSZ8863_ID_LO;
 		else
 			data = KSZ8795_ID_LO;
 		break;
-	case PHY_REG_AUTO_NEGOTIATION:
+	case MII_ADVERTISE:
 		ksz_pread8(dev, p, regs[P_LOCAL_CTRL], &ctrl);
-		data = PHY_AUTO_NEG_802_3;
+		data = ADVERTISE_CSMA;
 		if (ctrl & PORT_AUTO_NEG_SYM_PAUSE)
-			data |= PHY_AUTO_NEG_SYM_PAUSE;
+			data |= ADVERTISE_PAUSE_CAP;
 		if (ctrl & PORT_AUTO_NEG_100BTX_FD)
-			data |= PHY_AUTO_NEG_100BTX_FD;
+			data |= ADVERTISE_100FULL;
 		if (ctrl & PORT_AUTO_NEG_100BTX)
-			data |= PHY_AUTO_NEG_100BTX;
+			data |= ADVERTISE_100HALF;
 		if (ctrl & PORT_AUTO_NEG_10BT_FD)
-			data |= PHY_AUTO_NEG_10BT_FD;
+			data |= ADVERTISE_10FULL;
 		if (ctrl & PORT_AUTO_NEG_10BT)
-			data |= PHY_AUTO_NEG_10BT;
+			data |= ADVERTISE_10HALF;
 		break;
-	case PHY_REG_REMOTE_CAPABILITY:
+	case MII_LPA:
 		ksz_pread8(dev, p, regs[P_REMOTE_STATUS], &link);
-		data = PHY_AUTO_NEG_802_3;
+		data = LPA_SLCT;
 		if (link & PORT_REMOTE_SYM_PAUSE)
-			data |= PHY_AUTO_NEG_SYM_PAUSE;
+			data |= LPA_PAUSE_CAP;
 		if (link & PORT_REMOTE_100BTX_FD)
-			data |= PHY_AUTO_NEG_100BTX_FD;
+			data |= LPA_100FULL;
 		if (link & PORT_REMOTE_100BTX)
-			data |= PHY_AUTO_NEG_100BTX;
+			data |= LPA_100HALF;
 		if (link & PORT_REMOTE_10BT_FD)
-			data |= PHY_AUTO_NEG_10BT_FD;
+			data |= LPA_10FULL;
 		if (link & PORT_REMOTE_10BT)
-			data |= PHY_AUTO_NEG_10BT;
-		if (data & ~PHY_AUTO_NEG_802_3)
-			data |= PHY_REMOTE_ACKNOWLEDGE_NOT;
+			data |= LPA_10HALF;
+		if (data & ~LPA_SLCT)
+			data |= LPA_LPACK;
 		break;
 	default:
 		processed = false;
@@ -830,14 +831,14 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 	u8 p = phy;
 
 	switch (reg) {
-	case PHY_REG_CTRL:
+	case MII_BMCR:
 
 		/* Do not support PHY reset function. */
-		if (val & PHY_RESET)
+		if (val & BMCR_RESET)
 			break;
 		ksz_pread8(dev, p, regs[P_SPEED_STATUS], &speed);
 		data = speed;
-		if (val & PHY_HP_MDIX)
+		if (val & KSZ886X_BMCR_HP_MDIX)
 			data |= PORT_HP_MDIX;
 		else
 			data &= ~PORT_HP_MDIX;
@@ -846,12 +847,12 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 		ksz_pread8(dev, p, regs[P_FORCE_CTRL], &ctrl);
 		data = ctrl;
 		if (ksz_is_ksz88x3(dev)) {
-			if ((val & PHY_AUTO_NEG_ENABLE))
+			if ((val & BMCR_ANENABLE))
 				data |= PORT_AUTO_NEG_ENABLE;
 			else
 				data &= ~PORT_AUTO_NEG_ENABLE;
 		} else {
-			if (!(val & PHY_AUTO_NEG_ENABLE))
+			if (!(val & BMCR_ANENABLE))
 				data |= PORT_AUTO_NEG_DISABLE;
 			else
 				data &= ~PORT_AUTO_NEG_DISABLE;
@@ -861,11 +862,11 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 				data |= PORT_AUTO_NEG_DISABLE;
 		}
 
-		if (val & PHY_SPEED_100MBIT)
+		if (val & BMCR_SPEED100)
 			data |= PORT_FORCE_100_MBIT;
 		else
 			data &= ~PORT_FORCE_100_MBIT;
-		if (val & PHY_FULL_DUPLEX)
+		if (val & BMCR_FULLDPLX)
 			data |= PORT_FORCE_FULL_DUPLEX;
 		else
 			data &= ~PORT_FORCE_FULL_DUPLEX;
@@ -873,38 +874,38 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 			ksz_pwrite8(dev, p, regs[P_FORCE_CTRL], data);
 		ksz_pread8(dev, p, regs[P_NEG_RESTART_CTRL], &restart);
 		data = restart;
-		if (val & PHY_LED_DISABLE)
+		if (val & KSZ886X_BMCR_DISABLE_LED)
 			data |= PORT_LED_OFF;
 		else
 			data &= ~PORT_LED_OFF;
-		if (val & PHY_TRANSMIT_DISABLE)
+		if (val & KSZ886X_BMCR_DISABLE_TRANSMIT)
 			data |= PORT_TX_DISABLE;
 		else
 			data &= ~PORT_TX_DISABLE;
-		if (val & PHY_AUTO_NEG_RESTART)
+		if (val & BMCR_ANRESTART)
 			data |= PORT_AUTO_NEG_RESTART;
 		else
 			data &= ~(PORT_AUTO_NEG_RESTART);
-		if (val & PHY_POWER_DOWN)
+		if (val & BMCR_PDOWN)
 			data |= PORT_POWER_DOWN;
 		else
 			data &= ~PORT_POWER_DOWN;
-		if (val & PHY_AUTO_MDIX_DISABLE)
+		if (val & KSZ886X_BMCR_DISABLE_AUTO_MDIX)
 			data |= PORT_AUTO_MDIX_DISABLE;
 		else
 			data &= ~PORT_AUTO_MDIX_DISABLE;
-		if (val & PHY_FORCE_MDIX)
+		if (val & KSZ886X_BMCR_FORCE_MDI)
 			data |= PORT_FORCE_MDIX;
 		else
 			data &= ~PORT_FORCE_MDIX;
-		if (val & PHY_LOOPBACK)
+		if (val & BMCR_LOOPBACK)
 			data |= PORT_PHY_LOOPBACK;
 		else
 			data &= ~PORT_PHY_LOOPBACK;
 		if (data != restart)
 			ksz_pwrite8(dev, p, regs[P_NEG_RESTART_CTRL], data);
 		break;
-	case PHY_REG_AUTO_NEGOTIATION:
+	case MII_ADVERTISE:
 		ksz_pread8(dev, p, regs[P_LOCAL_CTRL], &ctrl);
 		data = ctrl;
 		data &= ~(PORT_AUTO_NEG_SYM_PAUSE |
@@ -912,15 +913,15 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 			  PORT_AUTO_NEG_100BTX |
 			  PORT_AUTO_NEG_10BT_FD |
 			  PORT_AUTO_NEG_10BT);
-		if (val & PHY_AUTO_NEG_SYM_PAUSE)
+		if (val & ADVERTISE_PAUSE_CAP)
 			data |= PORT_AUTO_NEG_SYM_PAUSE;
-		if (val & PHY_AUTO_NEG_100BTX_FD)
+		if (val & ADVERTISE_100FULL)
 			data |= PORT_AUTO_NEG_100BTX_FD;
-		if (val & PHY_AUTO_NEG_100BTX)
+		if (val & ADVERTISE_100HALF)
 			data |= PORT_AUTO_NEG_100BTX;
-		if (val & PHY_AUTO_NEG_10BT_FD)
+		if (val & ADVERTISE_10FULL)
 			data |= PORT_AUTO_NEG_10BT_FD;
-		if (val & PHY_AUTO_NEG_10BT)
+		if (val & ADVERTISE_10HALF)
 			data |= PORT_AUTO_NEG_10BT;
 		if (data != ctrl)
 			ksz_pwrite8(dev, p, regs[P_LOCAL_CTRL], data);
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index c2e52c40a54c..f925ddee5238 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -744,68 +744,6 @@
 
 #define PORT_ACL_FORCE_DLR_MISS		BIT(0)
 
-#ifndef PHY_REG_CTRL
-#define PHY_REG_CTRL			0
-
-#define PHY_RESET			BIT(15)
-#define PHY_LOOPBACK			BIT(14)
-#define PHY_SPEED_100MBIT		BIT(13)
-#define PHY_AUTO_NEG_ENABLE		BIT(12)
-#define PHY_POWER_DOWN			BIT(11)
-#define PHY_MII_DISABLE			BIT(10)
-#define PHY_AUTO_NEG_RESTART		BIT(9)
-#define PHY_FULL_DUPLEX			BIT(8)
-#define PHY_COLLISION_TEST_NOT		BIT(7)
-#define PHY_HP_MDIX			BIT(5)
-#define PHY_FORCE_MDIX			BIT(4)
-#define PHY_AUTO_MDIX_DISABLE		BIT(3)
-#define PHY_REMOTE_FAULT_DISABLE	BIT(2)
-#define PHY_TRANSMIT_DISABLE		BIT(1)
-#define PHY_LED_DISABLE			BIT(0)
-
-#define PHY_REG_STATUS			1
-
-#define PHY_100BT4_CAPABLE		BIT(15)
-#define PHY_100BTX_FD_CAPABLE		BIT(14)
-#define PHY_100BTX_CAPABLE		BIT(13)
-#define PHY_10BT_FD_CAPABLE		BIT(12)
-#define PHY_10BT_CAPABLE		BIT(11)
-#define PHY_MII_SUPPRESS_CAPABLE_NOT	BIT(6)
-#define PHY_AUTO_NEG_ACKNOWLEDGE	BIT(5)
-#define PHY_REMOTE_FAULT		BIT(4)
-#define PHY_AUTO_NEG_CAPABLE		BIT(3)
-#define PHY_LINK_STATUS			BIT(2)
-#define PHY_JABBER_DETECT_NOT		BIT(1)
-#define PHY_EXTENDED_CAPABILITY		BIT(0)
-
-#define PHY_REG_ID_1			2
-#define PHY_REG_ID_2			3
-
-#define PHY_REG_AUTO_NEGOTIATION	4
-
-#define PHY_AUTO_NEG_NEXT_PAGE_NOT	BIT(15)
-#define PHY_AUTO_NEG_REMOTE_FAULT_NOT	BIT(13)
-#define PHY_AUTO_NEG_SYM_PAUSE		BIT(10)
-#define PHY_AUTO_NEG_100BT4		BIT(9)
-#define PHY_AUTO_NEG_100BTX_FD		BIT(8)
-#define PHY_AUTO_NEG_100BTX		BIT(7)
-#define PHY_AUTO_NEG_10BT_FD		BIT(6)
-#define PHY_AUTO_NEG_10BT		BIT(5)
-#define PHY_AUTO_NEG_SELECTOR		0x001F
-#define PHY_AUTO_NEG_802_3		0x0001
-
-#define PHY_REG_REMOTE_CAPABILITY	5
-
-#define PHY_REMOTE_NEXT_PAGE_NOT	BIT(15)
-#define PHY_REMOTE_ACKNOWLEDGE_NOT	BIT(14)
-#define PHY_REMOTE_REMOTE_FAULT_NOT	BIT(13)
-#define PHY_REMOTE_SYM_PAUSE		BIT(10)
-#define PHY_REMOTE_100BTX_FD		BIT(8)
-#define PHY_REMOTE_100BTX		BIT(7)
-#define PHY_REMOTE_10BT_FD		BIT(6)
-#define PHY_REMOTE_10BT			BIT(5)
-#endif
-
 #define KSZ8795_ID_HI			0x0022
 #define KSZ8795_ID_LO			0x1550
 #define KSZ8863_ID_LO			0x1430
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 3532bfe936f6..7945eb5e2fe8 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -25,6 +25,7 @@
 #include <linux/crc32.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/micrel_phy.h>
 
 
 /* DMA Registers */
@@ -271,84 +272,15 @@
 
 #define KS884X_PHY_CTRL_OFFSET		0x00
 
-/* Mode Control Register */
-#define PHY_REG_CTRL			0
-
-#define PHY_RESET			0x8000
-#define PHY_LOOPBACK			0x4000
-#define PHY_SPEED_100MBIT		0x2000
-#define PHY_AUTO_NEG_ENABLE		0x1000
-#define PHY_POWER_DOWN			0x0800
-#define PHY_MII_DISABLE			0x0400
-#define PHY_AUTO_NEG_RESTART		0x0200
-#define PHY_FULL_DUPLEX			0x0100
-#define PHY_COLLISION_TEST		0x0080
-#define PHY_HP_MDIX			0x0020
-#define PHY_FORCE_MDIX			0x0010
-#define PHY_AUTO_MDIX_DISABLE		0x0008
-#define PHY_REMOTE_FAULT_DISABLE	0x0004
-#define PHY_TRANSMIT_DISABLE		0x0002
-#define PHY_LED_DISABLE			0x0001
-
 #define KS884X_PHY_STATUS_OFFSET	0x02
 
-/* Mode Status Register */
-#define PHY_REG_STATUS			1
-
-#define PHY_100BT4_CAPABLE		0x8000
-#define PHY_100BTX_FD_CAPABLE		0x4000
-#define PHY_100BTX_CAPABLE		0x2000
-#define PHY_10BT_FD_CAPABLE		0x1000
-#define PHY_10BT_CAPABLE		0x0800
-#define PHY_MII_SUPPRESS_CAPABLE	0x0040
-#define PHY_AUTO_NEG_ACKNOWLEDGE	0x0020
-#define PHY_REMOTE_FAULT		0x0010
-#define PHY_AUTO_NEG_CAPABLE		0x0008
-#define PHY_LINK_STATUS			0x0004
-#define PHY_JABBER_DETECT		0x0002
-#define PHY_EXTENDED_CAPABILITY		0x0001
-
 #define KS884X_PHY_ID_1_OFFSET		0x04
 #define KS884X_PHY_ID_2_OFFSET		0x06
 
-/* PHY Identifier Registers */
-#define PHY_REG_ID_1			2
-#define PHY_REG_ID_2			3
-
 #define KS884X_PHY_AUTO_NEG_OFFSET	0x08
 
-/* Auto-Negotiation Advertisement Register */
-#define PHY_REG_AUTO_NEGOTIATION	4
-
-#define PHY_AUTO_NEG_NEXT_PAGE		0x8000
-#define PHY_AUTO_NEG_REMOTE_FAULT	0x2000
-/* Not supported. */
-#define PHY_AUTO_NEG_ASYM_PAUSE		0x0800
-#define PHY_AUTO_NEG_SYM_PAUSE		0x0400
-#define PHY_AUTO_NEG_100BT4		0x0200
-#define PHY_AUTO_NEG_100BTX_FD		0x0100
-#define PHY_AUTO_NEG_100BTX		0x0080
-#define PHY_AUTO_NEG_10BT_FD		0x0040
-#define PHY_AUTO_NEG_10BT		0x0020
-#define PHY_AUTO_NEG_SELECTOR		0x001F
-#define PHY_AUTO_NEG_802_3		0x0001
-
-#define PHY_AUTO_NEG_PAUSE  (PHY_AUTO_NEG_SYM_PAUSE | PHY_AUTO_NEG_ASYM_PAUSE)
-
 #define KS884X_PHY_REMOTE_CAP_OFFSET	0x0A
 
-/* Auto-Negotiation Link Partner Ability Register */
-#define PHY_REG_REMOTE_CAPABILITY	5
-
-#define PHY_REMOTE_NEXT_PAGE		0x8000
-#define PHY_REMOTE_ACKNOWLEDGE		0x4000
-#define PHY_REMOTE_REMOTE_FAULT		0x2000
-#define PHY_REMOTE_SYM_PAUSE		0x0400
-#define PHY_REMOTE_100BTX_FD		0x0100
-#define PHY_REMOTE_100BTX		0x0080
-#define PHY_REMOTE_10BT_FD		0x0040
-#define PHY_REMOTE_10BT			0x0020
-
 /* P1VCT */
 #define KS884X_P1VCT_P			0x04F0
 #define KS884X_P1PHYCTRL_P		0x04F2
@@ -2886,15 +2818,6 @@ static void sw_block_addr(struct ksz_hw *hw)
 	}
 }
 
-#define PHY_LINK_SUPPORT		\
-	(PHY_AUTO_NEG_ASYM_PAUSE |	\
-	PHY_AUTO_NEG_SYM_PAUSE |	\
-	PHY_AUTO_NEG_100BT4 |		\
-	PHY_AUTO_NEG_100BTX_FD |	\
-	PHY_AUTO_NEG_100BTX |		\
-	PHY_AUTO_NEG_10BT_FD |		\
-	PHY_AUTO_NEG_10BT)
-
 static inline void hw_r_phy_ctrl(struct ksz_hw *hw, int phy, u16 *data)
 {
 	*data = readw(hw->io + phy + KS884X_PHY_CTRL_OFFSET);
@@ -3238,16 +3161,18 @@ static void determine_flow_ctrl(struct ksz_hw *hw, struct ksz_port *port,
 	rx = tx = 0;
 	if (port->force_link)
 		rx = tx = 1;
-	if (remote & PHY_AUTO_NEG_SYM_PAUSE) {
-		if (local & PHY_AUTO_NEG_SYM_PAUSE) {
+	if (remote & LPA_PAUSE_CAP) {
+		if (local & ADVERTISE_PAUSE_CAP) {
 			rx = tx = 1;
-		} else if ((remote & PHY_AUTO_NEG_ASYM_PAUSE) &&
-				(local & PHY_AUTO_NEG_PAUSE) ==
-				PHY_AUTO_NEG_ASYM_PAUSE) {
+		} else if ((remote & LPA_PAUSE_ASYM) &&
+			   (local &
+			    (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM)) ==
+			   ADVERTISE_PAUSE_ASYM) {
 			tx = 1;
 		}
-	} else if (remote & PHY_AUTO_NEG_ASYM_PAUSE) {
-		if ((local & PHY_AUTO_NEG_PAUSE) == PHY_AUTO_NEG_PAUSE)
+	} else if (remote & LPA_PAUSE_ASYM) {
+		if ((local & (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM))
+		    == (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM))
 			rx = 1;
 	}
 	if (!hw->ksz_switch)
@@ -3428,16 +3353,16 @@ static void port_force_link_speed(struct ksz_port *port)
 		phy = KS884X_PHY_1_CTRL_OFFSET + p * PHY_CTRL_INTERVAL;
 		hw_r_phy_ctrl(hw, phy, &data);
 
-		data &= ~PHY_AUTO_NEG_ENABLE;
+		data &= ~BMCR_ANENABLE;
 
 		if (10 == port->speed)
-			data &= ~PHY_SPEED_100MBIT;
+			data &= ~BMCR_SPEED100;
 		else if (100 == port->speed)
-			data |= PHY_SPEED_100MBIT;
+			data |= BMCR_SPEED100;
 		if (1 == port->duplex)
-			data &= ~PHY_FULL_DUPLEX;
+			data &= ~BMCR_FULLDPLX;
 		else if (2 == port->duplex)
-			data |= PHY_FULL_DUPLEX;
+			data |= BMCR_FULLDPLX;
 		hw_w_phy_ctrl(hw, phy, data);
 	}
 }
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 416ee6dd2574..b03e2afcb53f 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -45,4 +45,17 @@
 #define MICREL_KSZ9021_RGMII_CLK_CTRL_PAD_SCEW	0x104
 #define MICREL_KSZ9021_RGMII_RX_DATA_PAD_SCEW	0x105
 
+/* Device specific MII_BMCR (Reg 0) bits */
+/* 1 = HP Auto MDI/MDI-X mode, 0 = Microchip Auto MDI/MDI-X mode */
+#define KSZ886X_BMCR_HP_MDIX			BIT(5)
+/* 1 = Force MDI (transmit on RXP/RXM pins), 0 = Normal operation
+ * (transmit on TXP/TXM pins)
+ */
+#define KSZ886X_BMCR_FORCE_MDI			BIT(4)
+/* 1 = Disable auto MDI-X */
+#define KSZ886X_BMCR_DISABLE_AUTO_MDIX		BIT(3)
+#define KSZ886X_BMCR_DISABLE_FAR_END_FAULT	BIT(2)
+#define KSZ886X_BMCR_DISABLE_TRANSMIT		BIT(1)
+#define KSZ886X_BMCR_DISABLE_LED		BIT(0)
+
 #endif /* _MICREL_PHY_H */

From 2c709e0bdad4d996ec8925b9ee6d5b97458708f1 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 14 Jun 2021 06:31:19 +0200
Subject: [PATCH 1690/3292] net: dsa: microchip: ksz8795: add phylink support

This patch adds the phylink support to the ksz8795 driver to provide
configuration exceptions on quirky KSZ8863 and KSZ8873 ports.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c | 55 +++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index ba065003623f..cfa2a5000cd3 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -18,6 +18,7 @@
 #include <linux/micrel_phy.h>
 #include <net/dsa.h>
 #include <net/switchdev.h>
+#include <linux/phylink.h>
 
 #include "ksz_common.h"
 #include "ksz8795_reg.h"
@@ -1420,11 +1421,65 @@ static int ksz8_setup(struct dsa_switch *ds)
 	return 0;
 }
 
+static void ksz8_validate(struct dsa_switch *ds, int port,
+			  unsigned long *supported,
+			  struct phylink_link_state *state)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+	struct ksz_device *dev = ds->priv;
+
+	if (port == dev->cpu_port) {
+		if (state->interface != PHY_INTERFACE_MODE_RMII &&
+		    state->interface != PHY_INTERFACE_MODE_MII &&
+		    state->interface != PHY_INTERFACE_MODE_NA)
+			goto unsupported;
+	} else {
+		if (state->interface != PHY_INTERFACE_MODE_INTERNAL &&
+		    state->interface != PHY_INTERFACE_MODE_NA)
+			goto unsupported;
+	}
+
+	/* Allow all the expected bits */
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Autoneg);
+
+	/* Silicon Errata Sheet (DS80000830A):
+	 * "Port 1 does not respond to received flow control PAUSE frames"
+	 * So, disable Pause support on "Port 1" (port == 0) for all ksz88x3
+	 * switches.
+	 */
+	if (!ksz_is_ksz88x3(dev) || port)
+		phylink_set(mask, Pause);
+
+	/* Asym pause is not supported on KSZ8863 and KSZ8873 */
+	if (!ksz_is_ksz88x3(dev))
+		phylink_set(mask, Asym_Pause);
+
+	/* 10M and 100M are only supported */
+	phylink_set(mask, 10baseT_Half);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Half);
+	phylink_set(mask, 100baseT_Full);
+
+	bitmap_and(supported, supported, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	return;
+
+unsupported:
+	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	dev_err(ds->dev, "Unsupported interface: %s, port: %d\n",
+		phy_modes(state->interface), port);
+}
+
 static const struct dsa_switch_ops ksz8_switch_ops = {
 	.get_tag_protocol	= ksz8_get_tag_protocol,
 	.setup			= ksz8_setup,
 	.phy_read		= ksz_phy_read16,
 	.phy_write		= ksz_phy_write16,
+	.phylink_validate	= ksz8_validate,
 	.phylink_mac_link_down	= ksz_mac_link_down,
 	.port_enable		= ksz_enable_port,
 	.get_strings		= ksz8_get_strings,

From 0033f890f95ba52dd7bf154d62aa9a5317456401 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 14 Jun 2021 06:31:20 +0200
Subject: [PATCH 1691/3292] net: phy: micrel: use consistent alignments

This patch changes the alignments to one space between "#define" and the
macro.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 44 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 93cf9500728f..a2755f2eb7db 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -38,42 +38,42 @@
 
 /* general Interrupt control/status reg in vendor specific block. */
 #define MII_KSZPHY_INTCS			0x1B
-#define	KSZPHY_INTCS_JABBER			BIT(15)
-#define	KSZPHY_INTCS_RECEIVE_ERR		BIT(14)
-#define	KSZPHY_INTCS_PAGE_RECEIVE		BIT(13)
-#define	KSZPHY_INTCS_PARELLEL			BIT(12)
-#define	KSZPHY_INTCS_LINK_PARTNER_ACK		BIT(11)
-#define	KSZPHY_INTCS_LINK_DOWN			BIT(10)
-#define	KSZPHY_INTCS_REMOTE_FAULT		BIT(9)
-#define	KSZPHY_INTCS_LINK_UP			BIT(8)
-#define	KSZPHY_INTCS_ALL			(KSZPHY_INTCS_LINK_UP |\
+#define KSZPHY_INTCS_JABBER			BIT(15)
+#define KSZPHY_INTCS_RECEIVE_ERR		BIT(14)
+#define KSZPHY_INTCS_PAGE_RECEIVE		BIT(13)
+#define KSZPHY_INTCS_PARELLEL			BIT(12)
+#define KSZPHY_INTCS_LINK_PARTNER_ACK		BIT(11)
+#define KSZPHY_INTCS_LINK_DOWN			BIT(10)
+#define KSZPHY_INTCS_REMOTE_FAULT		BIT(9)
+#define KSZPHY_INTCS_LINK_UP			BIT(8)
+#define KSZPHY_INTCS_ALL			(KSZPHY_INTCS_LINK_UP |\
 						KSZPHY_INTCS_LINK_DOWN)
-#define	KSZPHY_INTCS_LINK_DOWN_STATUS		BIT(2)
-#define	KSZPHY_INTCS_LINK_UP_STATUS		BIT(0)
-#define	KSZPHY_INTCS_STATUS			(KSZPHY_INTCS_LINK_DOWN_STATUS |\
+#define KSZPHY_INTCS_LINK_DOWN_STATUS		BIT(2)
+#define KSZPHY_INTCS_LINK_UP_STATUS		BIT(0)
+#define KSZPHY_INTCS_STATUS			(KSZPHY_INTCS_LINK_DOWN_STATUS |\
 						 KSZPHY_INTCS_LINK_UP_STATUS)
 
 /* PHY Control 1 */
-#define	MII_KSZPHY_CTRL_1			0x1e
+#define MII_KSZPHY_CTRL_1			0x1e
 
 /* PHY Control 2 / PHY Control (if no PHY Control 1) */
-#define	MII_KSZPHY_CTRL_2			0x1f
-#define	MII_KSZPHY_CTRL				MII_KSZPHY_CTRL_2
+#define MII_KSZPHY_CTRL_2			0x1f
+#define MII_KSZPHY_CTRL				MII_KSZPHY_CTRL_2
 /* bitmap of PHY register to set interrupt mode */
 #define KSZPHY_CTRL_INT_ACTIVE_HIGH		BIT(9)
 #define KSZPHY_RMII_REF_CLK_SEL			BIT(7)
 
 /* Write/read to/from extended registers */
-#define MII_KSZPHY_EXTREG                       0x0b
-#define KSZPHY_EXTREG_WRITE                     0x8000
+#define MII_KSZPHY_EXTREG			0x0b
+#define KSZPHY_EXTREG_WRITE			0x8000
 
-#define MII_KSZPHY_EXTREG_WRITE                 0x0c
-#define MII_KSZPHY_EXTREG_READ                  0x0d
+#define MII_KSZPHY_EXTREG_WRITE			0x0c
+#define MII_KSZPHY_EXTREG_READ			0x0d
 
 /* Extended registers */
-#define MII_KSZPHY_CLK_CONTROL_PAD_SKEW         0x104
-#define MII_KSZPHY_RX_DATA_PAD_SKEW             0x105
-#define MII_KSZPHY_TX_DATA_PAD_SKEW             0x106
+#define MII_KSZPHY_CLK_CONTROL_PAD_SKEW		0x104
+#define MII_KSZPHY_RX_DATA_PAD_SKEW		0x105
+#define MII_KSZPHY_TX_DATA_PAD_SKEW		0x106
 
 #define PS_TO_REG				200
 

From 52939393bd682248a415de4c0439280aafaccd66 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 14 Jun 2021 06:31:21 +0200
Subject: [PATCH 1692/3292] net: phy/dsa micrel/ksz886x add MDI-X support

Add support for MDI-X status and configuration

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c |  5 ++
 drivers/net/phy/micrel.c            | 88 +++++++++++++++++++++++++++++
 include/linux/micrel_phy.h          |  2 +
 3 files changed, 95 insertions(+)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index cfa2a5000cd3..690304c87b02 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -816,6 +816,11 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 		if (data & ~LPA_SLCT)
 			data |= LPA_LPACK;
 		break;
+	case PHY_REG_PHY_CTRL:
+		ksz_pread8(dev, p, regs[P_LINK_STATUS], &link);
+		if (link & PORT_MDIX_STATUS)
+			data |= KSZ886X_CTRL_MDIX_STAT;
+		break;
 	default:
 		processed = false;
 		break;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index a2755f2eb7db..9ffca754f6f7 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1045,6 +1045,92 @@ static int ksz8873mll_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int ksz886x_config_mdix(struct phy_device *phydev, u8 ctrl)
+{
+	u16 val;
+
+	switch (ctrl) {
+	case ETH_TP_MDI:
+		val = KSZ886X_BMCR_DISABLE_AUTO_MDIX;
+		break;
+	case ETH_TP_MDI_X:
+		/* Note: The naming of the bit KSZ886X_BMCR_FORCE_MDI is bit
+		 * counter intuitive, the "-X" in "1 = Force MDI" in the data
+		 * sheet seems to be missing:
+		 * 1 = Force MDI (sic!) (transmit on RX+/RX- pins)
+		 * 0 = Normal operation (transmit on TX+/TX- pins)
+		 */
+		val = KSZ886X_BMCR_DISABLE_AUTO_MDIX | KSZ886X_BMCR_FORCE_MDI;
+		break;
+	case ETH_TP_MDI_AUTO:
+		val = 0;
+		break;
+	default:
+		return 0;
+	}
+
+	return phy_modify(phydev, MII_BMCR,
+			  KSZ886X_BMCR_HP_MDIX | KSZ886X_BMCR_FORCE_MDI |
+			  KSZ886X_BMCR_DISABLE_AUTO_MDIX,
+			  KSZ886X_BMCR_HP_MDIX | val);
+}
+
+static int ksz886x_config_aneg(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_config_aneg(phydev);
+	if (ret)
+		return ret;
+
+	/* The MDI-X configuration is automatically changed by the PHY after
+	 * switching from autoneg off to on. So, take MDI-X configuration under
+	 * own control and set it after autoneg configuration was done.
+	 */
+	return ksz886x_config_mdix(phydev, phydev->mdix_ctrl);
+}
+
+static int ksz886x_mdix_update(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read(phydev, MII_BMCR);
+	if (ret < 0)
+		return ret;
+
+	if (ret & KSZ886X_BMCR_DISABLE_AUTO_MDIX) {
+		if (ret & KSZ886X_BMCR_FORCE_MDI)
+			phydev->mdix_ctrl = ETH_TP_MDI_X;
+		else
+			phydev->mdix_ctrl = ETH_TP_MDI;
+	} else {
+		phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+	}
+
+	ret = phy_read(phydev, MII_KSZPHY_CTRL);
+	if (ret < 0)
+		return ret;
+
+	/* Same reverse logic as KSZ886X_BMCR_FORCE_MDI */
+	if (ret & KSZ886X_CTRL_MDIX_STAT)
+		phydev->mdix = ETH_TP_MDI_X;
+	else
+		phydev->mdix = ETH_TP_MDI;
+
+	return 0;
+}
+
+static int ksz886x_read_status(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = ksz886x_mdix_update(phydev);
+	if (ret < 0)
+		return ret;
+
+	return genphy_read_status(phydev);
+}
+
 static int kszphy_get_sset_count(struct phy_device *phydev)
 {
 	return ARRAY_SIZE(kszphy_hw_stats);
@@ -1397,6 +1483,8 @@ static struct phy_driver ksphy_driver[] = {
 	.name		= "Micrel KSZ8851 Ethernet MAC or KSZ886X Switch",
 	/* PHY_BASIC_FEATURES */
 	.config_init	= kszphy_config_init,
+	.config_aneg	= ksz886x_config_aneg,
+	.read_status	= ksz886x_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index b03e2afcb53f..58370abd9f4f 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -58,4 +58,6 @@
 #define KSZ886X_BMCR_DISABLE_TRANSMIT		BIT(1)
 #define KSZ886X_BMCR_DISABLE_LED		BIT(0)
 
+#define KSZ886X_CTRL_MDIX_STAT			BIT(4)
+
 #endif /* _MICREL_PHY_H */

From f873f112553b520e86be3992a38b19e3747b31af Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 14 Jun 2021 06:31:22 +0200
Subject: [PATCH 1693/3292] net: phy: micrel: ksz8081 add MDI-X support

Add support for MDI-X status and configuration

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 89 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 9ffca754f6f7..5ca39ce8db96 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -55,11 +55,17 @@
 
 /* PHY Control 1 */
 #define MII_KSZPHY_CTRL_1			0x1e
+#define KSZ8081_CTRL1_MDIX_STAT			BIT(4)
 
 /* PHY Control 2 / PHY Control (if no PHY Control 1) */
 #define MII_KSZPHY_CTRL_2			0x1f
 #define MII_KSZPHY_CTRL				MII_KSZPHY_CTRL_2
 /* bitmap of PHY register to set interrupt mode */
+#define KSZ8081_CTRL2_HP_MDIX			BIT(15)
+#define KSZ8081_CTRL2_MDI_MDI_X_SELECT		BIT(14)
+#define KSZ8081_CTRL2_DISABLE_AUTO_MDIX		BIT(13)
+#define KSZ8081_CTRL2_FORCE_LINK		BIT(11)
+#define KSZ8081_CTRL2_POWER_SAVING		BIT(10)
 #define KSZPHY_CTRL_INT_ACTIVE_HIGH		BIT(9)
 #define KSZPHY_RMII_REF_CLK_SEL			BIT(7)
 
@@ -422,6 +428,87 @@ static int ksz8081_config_init(struct phy_device *phydev)
 	return kszphy_config_init(phydev);
 }
 
+static int ksz8081_config_mdix(struct phy_device *phydev, u8 ctrl)
+{
+	u16 val;
+
+	switch (ctrl) {
+	case ETH_TP_MDI:
+		val = KSZ8081_CTRL2_DISABLE_AUTO_MDIX;
+		break;
+	case ETH_TP_MDI_X:
+		val = KSZ8081_CTRL2_DISABLE_AUTO_MDIX |
+			KSZ8081_CTRL2_MDI_MDI_X_SELECT;
+		break;
+	case ETH_TP_MDI_AUTO:
+		val = 0;
+		break;
+	default:
+		return 0;
+	}
+
+	return phy_modify(phydev, MII_KSZPHY_CTRL_2,
+			  KSZ8081_CTRL2_HP_MDIX |
+			  KSZ8081_CTRL2_MDI_MDI_X_SELECT |
+			  KSZ8081_CTRL2_DISABLE_AUTO_MDIX,
+			  KSZ8081_CTRL2_HP_MDIX | val);
+}
+
+static int ksz8081_config_aneg(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_config_aneg(phydev);
+	if (ret)
+		return ret;
+
+	/* The MDI-X configuration is automatically changed by the PHY after
+	 * switching from autoneg off to on. So, take MDI-X configuration under
+	 * own control and set it after autoneg configuration was done.
+	 */
+	return ksz8081_config_mdix(phydev, phydev->mdix_ctrl);
+}
+
+static int ksz8081_mdix_update(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read(phydev, MII_KSZPHY_CTRL_2);
+	if (ret < 0)
+		return ret;
+
+	if (ret & KSZ8081_CTRL2_DISABLE_AUTO_MDIX) {
+		if (ret & KSZ8081_CTRL2_MDI_MDI_X_SELECT)
+			phydev->mdix_ctrl = ETH_TP_MDI_X;
+		else
+			phydev->mdix_ctrl = ETH_TP_MDI;
+	} else {
+		phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+	}
+
+	ret = phy_read(phydev, MII_KSZPHY_CTRL_1);
+	if (ret < 0)
+		return ret;
+
+	if (ret & KSZ8081_CTRL1_MDIX_STAT)
+		phydev->mdix = ETH_TP_MDI;
+	else
+		phydev->mdix = ETH_TP_MDI_X;
+
+	return 0;
+}
+
+static int ksz8081_read_status(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = ksz8081_mdix_update(phydev);
+	if (ret < 0)
+		return ret;
+
+	return genphy_read_status(phydev);
+}
+
 static int ksz8061_config_init(struct phy_device *phydev)
 {
 	int ret;
@@ -1387,6 +1474,8 @@ static struct phy_driver ksphy_driver[] = {
 	.probe		= kszphy_probe,
 	.config_init	= ksz8081_config_init,
 	.soft_reset	= genphy_soft_reset,
+	.config_aneg	= ksz8081_config_aneg,
+	.read_status	= ksz8081_read_status,
 	.config_intr	= kszphy_config_intr,
 	.handle_interrupt = kszphy_handle_interrupt,
 	.get_sset_count = kszphy_get_sset_count,

From 36838050c453e591ff2ee744d1149630bc5c3310 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <linux@rempel-privat.de>
Date: Mon, 14 Jun 2021 06:31:23 +0200
Subject: [PATCH 1694/3292] net: dsa: microchip: ksz8795: add LINK_MD register
 support

Add mapping for LINK_MD register to enable cable testing functionality.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c     | 22 ++++++++++++++++++++++
 drivers/net/dsa/microchip/ksz8795_reg.h |  5 +++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 690304c87b02..e1731ae4497d 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -6,6 +6,7 @@
  *	Tristram Ha <Tristram.Ha@microchip.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/gpio.h>
@@ -729,6 +730,7 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 	u8 restart, speed, ctrl, link;
 	const u8 *regs = ksz8->regs;
 	int processed = true;
+	u8 val1, val2;
 	u16 data = 0;
 	u8 p = phy;
 
@@ -816,6 +818,22 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 		if (data & ~LPA_SLCT)
 			data |= LPA_LPACK;
 		break;
+	case PHY_REG_LINK_MD:
+		ksz_pread8(dev, p, REG_PORT_LINK_MD_CTRL, &val1);
+		ksz_pread8(dev, p, REG_PORT_LINK_MD_RESULT, &val2);
+		if (val1 & PORT_START_CABLE_DIAG)
+			data |= PHY_START_CABLE_DIAG;
+
+		if (val1 & PORT_CABLE_10M_SHORT)
+			data |= PHY_CABLE_10M_SHORT;
+
+		data |= FIELD_PREP(PHY_CABLE_DIAG_RESULT_M,
+				FIELD_GET(PORT_CABLE_DIAG_RESULT_M, val1));
+
+		data |= FIELD_PREP(PHY_CABLE_FAULT_COUNTER_M,
+				(FIELD_GET(PORT_CABLE_FAULT_COUNTER_H, val1) << 8) |
+				FIELD_GET(PORT_CABLE_FAULT_COUNTER_L, val2));
+		break;
 	case PHY_REG_PHY_CTRL:
 		ksz_pread8(dev, p, regs[P_LINK_STATUS], &link);
 		if (link & PORT_MDIX_STATUS)
@@ -932,6 +950,10 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 		if (data != ctrl)
 			ksz_pwrite8(dev, p, regs[P_LOCAL_CTRL], data);
 		break;
+	case PHY_REG_LINK_MD:
+		if (val & PHY_START_CABLE_DIAG)
+			ksz_port_cfg(dev, p, REG_PORT_LINK_MD_CTRL, PORT_START_CABLE_DIAG, true);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index f925ddee5238..a32355624f31 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -249,7 +249,7 @@
 #define REG_PORT_4_LINK_MD_CTRL		0x4A
 
 #define PORT_CABLE_10M_SHORT		BIT(7)
-#define PORT_CABLE_DIAG_RESULT_M	0x3
+#define PORT_CABLE_DIAG_RESULT_M	GENMASK(6, 5)
 #define PORT_CABLE_DIAG_RESULT_S	5
 #define PORT_CABLE_STAT_NORMAL		0
 #define PORT_CABLE_STAT_OPEN		1
@@ -753,13 +753,14 @@
 #define PHY_REG_LINK_MD			0x1D
 
 #define PHY_START_CABLE_DIAG		BIT(15)
+#define PHY_CABLE_DIAG_RESULT_M		GENMASK(14, 13)
 #define PHY_CABLE_DIAG_RESULT		0x6000
 #define PHY_CABLE_STAT_NORMAL		0x0000
 #define PHY_CABLE_STAT_OPEN		0x2000
 #define PHY_CABLE_STAT_SHORT		0x4000
 #define PHY_CABLE_STAT_FAILED		0x6000
 #define PHY_CABLE_10M_SHORT		BIT(12)
-#define PHY_CABLE_FAULT_COUNTER		0x01FF
+#define PHY_CABLE_FAULT_COUNTER_M	GENMASK(8, 0)
 
 #define PHY_REG_PHY_CTRL		0x1F
 

From c916e8e1ea724db0f7bae36c11aaadc631226321 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 14 Jun 2021 06:31:24 +0200
Subject: [PATCH 1695/3292] net: dsa: dsa_slave_phy_connect(): extend phy's
 flags with port specific phy flags

The current get_phy_flags() is only processed when we connect to a PHY
via a designed phy-handle property via phylink_of_phy_connect(), but if
we fallback on the internal MDIO bus created by a switch and take the
dsa_slave_phy_connect() path then we would not be processing that flag
and using it at PHY connection time.

Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3ca509eb284d..798944aa847a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1749,7 +1749,8 @@ static void dsa_slave_phylink_fixed_state(struct phylink_config *config,
 }
 
 /* slave device setup *******************************************************/
-static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
+static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr,
+				 u32 flags)
 {
 	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
 	struct dsa_switch *ds = dp->ds;
@@ -1760,6 +1761,8 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
 		return -ENODEV;
 	}
 
+	slave_dev->phydev->dev_flags |= flags;
+
 	return phylink_connect_phy(dp->pl, slave_dev->phydev);
 }
 
@@ -1804,7 +1807,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
 		/* We could not connect to a designated PHY or SFP, so try to
 		 * use the switch internal MDIO bus instead
 		 */
-		ret = dsa_slave_phy_connect(slave_dev, dp->index);
+		ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags);
 		if (ret) {
 			netdev_err(slave_dev,
 				   "failed to connect to port %d: %d\n",

From 49011e0c1555dd7a689d0f32fd78c1ecd43e59cd Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 14 Jun 2021 06:31:25 +0200
Subject: [PATCH 1696/3292] net: phy: micrel: ksz886x/ksz8081: add cabletest
 support

This patch support for cable test for the ksz886x switches and the
ksz8081 PHY.

The patch was tested on a KSZ8873RLL switch with following results:

- port 1:
  - provides invalid values, thus return -ENOTSUPP
    (Errata: DS80000830A: "LinkMD does not work on Port 1",
     http://ww1.microchip.com/downloads/en/DeviceDoc/KSZ8873-Errata-DS80000830A.pdf)

- port 2:
  - can detect distance
  - can detect open on each wire of pair A (wire 1 and 2)
  - can detect open only on one wire of pair B (only wire 3)
  - can detect short between wires of a pair (wires 1 + 2 or 3 + 6)
  - short between pairs is detected as open.
    For example short between wires 2 + 3 is detected as open.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c |  13 ++
 drivers/net/phy/micrel.c            | 180 ++++++++++++++++++++++++++++
 include/linux/micrel_phy.h          |   1 +
 3 files changed, 194 insertions(+)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index e1731ae4497d..560f6843bb65 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -970,6 +970,18 @@ static enum dsa_tag_protocol ksz8_get_tag_protocol(struct dsa_switch *ds,
 		DSA_TAG_PROTO_KSZ9893 : DSA_TAG_PROTO_KSZ8795;
 }
 
+static u32 ksz8_sw_get_phy_flags(struct dsa_switch *ds, int port)
+{
+	/* Silicon Errata Sheet (DS80000830A):
+	 * Port 1 does not work with LinkMD Cable-Testing.
+	 * Port 1 does not respond to received PAUSE control frames.
+	 */
+	if (!port)
+		return MICREL_KSZ8_P1_ERRATA;
+
+	return 0;
+}
+
 static void ksz8_get_strings(struct dsa_switch *ds, int port,
 			     u32 stringset, uint8_t *buf)
 {
@@ -1503,6 +1515,7 @@ unsupported:
 
 static const struct dsa_switch_ops ksz8_switch_ops = {
 	.get_tag_protocol	= ksz8_get_tag_protocol,
+	.get_phy_flags		= ksz8_sw_get_phy_flags,
 	.setup			= ksz8_setup,
 	.phy_read		= ksz_phy_read16,
 	.phy_write		= ksz_phy_write16,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 5ca39ce8db96..4d53886f7d51 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/bitfield.h>
+#include <linux/ethtool_netlink.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/phy.h>
@@ -53,6 +54,18 @@
 #define KSZPHY_INTCS_STATUS			(KSZPHY_INTCS_LINK_DOWN_STATUS |\
 						 KSZPHY_INTCS_LINK_UP_STATUS)
 
+/* LinkMD Control/Status */
+#define KSZ8081_LMD				0x1d
+#define KSZ8081_LMD_ENABLE_TEST			BIT(15)
+#define KSZ8081_LMD_STAT_NORMAL			0
+#define KSZ8081_LMD_STAT_OPEN			1
+#define KSZ8081_LMD_STAT_SHORT			2
+#define KSZ8081_LMD_STAT_FAIL			3
+#define KSZ8081_LMD_STAT_MASK			GENMASK(14, 13)
+/* Short cable (<10 meter) has been detected by LinkMD */
+#define KSZ8081_LMD_SHORT_INDICATOR		BIT(12)
+#define KSZ8081_LMD_DELTA_TIME_MASK		GENMASK(8, 0)
+
 /* PHY Control 1 */
 #define MII_KSZPHY_CTRL_1			0x1e
 #define KSZ8081_CTRL1_MDIX_STAT			BIT(4)
@@ -1363,6 +1376,167 @@ static int kszphy_probe(struct phy_device *phydev)
 	return 0;
 }
 
+static int ksz886x_cable_test_start(struct phy_device *phydev)
+{
+	if (phydev->dev_flags & MICREL_KSZ8_P1_ERRATA)
+		return -EOPNOTSUPP;
+
+	/* If autoneg is enabled, we won't be able to test cross pair
+	 * short. In this case, the PHY will "detect" a link and
+	 * confuse the internal state machine - disable auto neg here.
+	 * If autoneg is disabled, we should set the speed to 10mbit.
+	 */
+	return phy_clear_bits(phydev, MII_BMCR, BMCR_ANENABLE | BMCR_SPEED100);
+}
+
+static int ksz886x_cable_test_result_trans(u16 status)
+{
+	switch (FIELD_GET(KSZ8081_LMD_STAT_MASK, status)) {
+	case KSZ8081_LMD_STAT_NORMAL:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+	case KSZ8081_LMD_STAT_SHORT:
+		return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+	case KSZ8081_LMD_STAT_OPEN:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+	case KSZ8081_LMD_STAT_FAIL:
+		fallthrough;
+	default:
+		return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+	}
+}
+
+static bool ksz886x_cable_test_failed(u16 status)
+{
+	return FIELD_GET(KSZ8081_LMD_STAT_MASK, status) ==
+		KSZ8081_LMD_STAT_FAIL;
+}
+
+static bool ksz886x_cable_test_fault_length_valid(u16 status)
+{
+	switch (FIELD_GET(KSZ8081_LMD_STAT_MASK, status)) {
+	case KSZ8081_LMD_STAT_OPEN:
+		fallthrough;
+	case KSZ8081_LMD_STAT_SHORT:
+		return true;
+	}
+	return false;
+}
+
+static int ksz886x_cable_test_fault_length(u16 status)
+{
+	int dt;
+
+	/* According to the data sheet the distance to the fault is
+	 * DELTA_TIME * 0.4 meters.
+	 */
+	dt = FIELD_GET(KSZ8081_LMD_DELTA_TIME_MASK, status);
+
+	return (dt * 400) / 10;
+}
+
+static int ksz886x_cable_test_wait_for_completion(struct phy_device *phydev)
+{
+	int val, ret;
+
+	ret = phy_read_poll_timeout(phydev, KSZ8081_LMD, val,
+				    !(val & KSZ8081_LMD_ENABLE_TEST),
+				    30000, 100000, true);
+
+	return ret < 0 ? ret : 0;
+}
+
+static int ksz886x_cable_test_one_pair(struct phy_device *phydev, int pair)
+{
+	static const int ethtool_pair[] = {
+		ETHTOOL_A_CABLE_PAIR_A,
+		ETHTOOL_A_CABLE_PAIR_B,
+	};
+	int ret, val, mdix;
+
+	/* There is no way to choice the pair, like we do one ksz9031.
+	 * We can workaround this limitation by using the MDI-X functionality.
+	 */
+	if (pair == 0)
+		mdix = ETH_TP_MDI;
+	else
+		mdix = ETH_TP_MDI_X;
+
+	switch (phydev->phy_id & MICREL_PHY_ID_MASK) {
+	case PHY_ID_KSZ8081:
+		ret = ksz8081_config_mdix(phydev, mdix);
+		break;
+	case PHY_ID_KSZ886X:
+		ret = ksz886x_config_mdix(phydev, mdix);
+		break;
+	default:
+		ret = -ENODEV;
+	}
+
+	if (ret)
+		return ret;
+
+	/* Now we are ready to fire. This command will send a 100ns pulse
+	 * to the pair.
+	 */
+	ret = phy_write(phydev, KSZ8081_LMD, KSZ8081_LMD_ENABLE_TEST);
+	if (ret)
+		return ret;
+
+	ret = ksz886x_cable_test_wait_for_completion(phydev);
+	if (ret)
+		return ret;
+
+	val = phy_read(phydev, KSZ8081_LMD);
+	if (val < 0)
+		return val;
+
+	if (ksz886x_cable_test_failed(val))
+		return -EAGAIN;
+
+	ret = ethnl_cable_test_result(phydev, ethtool_pair[pair],
+				      ksz886x_cable_test_result_trans(val));
+	if (ret)
+		return ret;
+
+	if (!ksz886x_cable_test_fault_length_valid(val))
+		return 0;
+
+	return ethnl_cable_test_fault_length(phydev, ethtool_pair[pair],
+					     ksz886x_cable_test_fault_length(val));
+}
+
+static int ksz886x_cable_test_get_status(struct phy_device *phydev,
+					 bool *finished)
+{
+	unsigned long pair_mask = 0x3;
+	int retries = 20;
+	int pair, ret;
+
+	*finished = false;
+
+	/* Try harder if link partner is active */
+	while (pair_mask && retries--) {
+		for_each_set_bit(pair, &pair_mask, 4) {
+			ret = ksz886x_cable_test_one_pair(phydev, pair);
+			if (ret == -EAGAIN)
+				continue;
+			if (ret < 0)
+				return ret;
+			clear_bit(pair, &pair_mask);
+		}
+		/* If link partner is in autonegotiation mode it will send 2ms
+		 * of FLPs with at least 6ms of silence.
+		 * Add 2ms sleep to have better chances to hit this silence.
+		 */
+		if (pair_mask)
+			msleep(2);
+	}
+
+	*finished = true;
+
+	return ret;
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	.phy_id		= PHY_ID_KS8737,
@@ -1469,6 +1643,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id		= PHY_ID_KSZ8081,
 	.name		= "Micrel KSZ8081 or KSZ8091",
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
+	.flags		= PHY_POLL_CABLE_TEST,
 	/* PHY_BASIC_FEATURES */
 	.driver_data	= &ksz8081_type,
 	.probe		= kszphy_probe,
@@ -1483,6 +1658,8 @@ static struct phy_driver ksphy_driver[] = {
 	.get_stats	= kszphy_get_stats,
 	.suspend	= kszphy_suspend,
 	.resume		= kszphy_resume,
+	.cable_test_start	= ksz886x_cable_test_start,
+	.cable_test_get_status	= ksz886x_cable_test_get_status,
 }, {
 	.phy_id		= PHY_ID_KSZ8061,
 	.name		= "Micrel KSZ8061",
@@ -1571,11 +1748,14 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8851 Ethernet MAC or KSZ886X Switch",
 	/* PHY_BASIC_FEATURES */
+	.flags		= PHY_POLL_CABLE_TEST,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz886x_config_aneg,
 	.read_status	= ksz886x_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
+	.cable_test_start	= ksz886x_cable_test_start,
+	.cable_test_get_status	= ksz886x_cable_test_get_status,
 }, {
 	.name		= "Micrel KSZ87XX Switch",
 	/* PHY_BASIC_FEATURES */
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 58370abd9f4f..3d43c60b49fa 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -39,6 +39,7 @@
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
 #define MICREL_PHY_FXEN		0x00000002
+#define MICREL_KSZ8_P1_ERRATA	0x00000003
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC

From 673ead2431e205ba9e9ccd6059532564ab83eb0a Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Mon, 14 Jun 2021 00:20:45 -0500
Subject: [PATCH 1697/3292] ibmvnic: fix send_request_map incompatible argument

The 3rd argument is u32 by function definition while it is __be32
by function declaration.

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 99eddb2c8e36..2d8804ebdf96 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -95,7 +95,7 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
 					struct ibmvnic_sub_crq_queue *);
 static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_query_map(struct ibmvnic_adapter *adapter);
-static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
+static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
 static int send_request_unmap(struct ibmvnic_adapter *, u8);
 static int send_login(struct ibmvnic_adapter *adapter);
 static void send_query_cap(struct ibmvnic_adapter *adapter);

From ea99750e401972c896d2212ea9244d903b8fbf73 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Mon, 14 Jun 2021 10:16:40 +0200
Subject: [PATCH 1698/3292] net: wwan: iosm: Remove DEBUG flag

Author forgot to remove that flag.

Fixes: f7af616c632e ("net: iosm: infrastructure")
Reported-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/Makefile | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wwan/iosm/Makefile b/drivers/net/wwan/iosm/Makefile
index cdeeb9357af6..4f9f0ae398e1 100644
--- a/drivers/net/wwan/iosm/Makefile
+++ b/drivers/net/wwan/iosm/Makefile
@@ -21,6 +21,3 @@ iosm-y = \
 	iosm_ipc_mux_codec.o
 
 obj-$(CONFIG_IOSM) := iosm.o
-
-# compilation flags
-ccflags-y += -DDEBUG

From ddee9dbc3d7aec1cd9fdcc671db2dd0016fd0f3d Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Mon, 14 Jun 2021 16:01:12 +0300
Subject: [PATCH 1699/3292] net: core: devlink: add dropped stats traps field

Whenever query statistics is issued for trap, devlink subsystem
would also fill-in statistics 'dropped' field. This field indicates
the number of packets HW dropped and failed to report to the device driver,
and thus - to the devlink subsystem itself.
In case if device driver didn't register callback for hard drop
statistics querying, 'dropped' field will be omitted and not filled.

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 10 ++++++++
 net/core/devlink.c    | 53 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index eb045f1b5d1d..57b738b78073 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1347,6 +1347,16 @@ struct devlink_ops {
 				     const struct devlink_trap_group *group,
 				     enum devlink_trap_action action,
 				     struct netlink_ext_ack *extack);
+	/**
+	 * @trap_drop_counter_get: Trap drop counter get function.
+	 *
+	 * Should be used by device drivers to report number of packets
+	 * that have been dropped, and cannot be passed to the devlink
+	 * subsystem by the underlying device.
+	 */
+	int (*trap_drop_counter_get)(struct devlink *devlink,
+				     const struct devlink_trap *trap,
+				     u64 *p_drops);
 	/**
 	 * @trap_policer_init: Trap policer initialization function.
 	 *
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3bdb7eac730a..566ddd147633 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -7519,8 +7519,9 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
 	}
 }
 
-static int devlink_trap_stats_put(struct sk_buff *msg,
-				  struct devlink_stats __percpu *trap_stats)
+static int
+devlink_trap_group_stats_put(struct sk_buff *msg,
+			     struct devlink_stats __percpu *trap_stats)
 {
 	struct devlink_stats stats;
 	struct nlattr *attr;
@@ -7548,6 +7549,50 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
+				  const struct devlink_trap_item *trap_item)
+{
+	struct devlink_stats stats;
+	struct nlattr *attr;
+	u64 drops = 0;
+	int err;
+
+	if (devlink->ops->trap_drop_counter_get) {
+		err = devlink->ops->trap_drop_counter_get(devlink,
+							  trap_item->trap,
+							  &drops);
+		if (err)
+			return err;
+	}
+
+	devlink_trap_stats_read(trap_item->stats, &stats);
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (devlink->ops->trap_drop_counter_get &&
+	    nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+			      DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+			      stats.rx_packets, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+			      stats.rx_bytes, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
 static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
 				const struct devlink_trap_item *trap_item,
 				enum devlink_command cmd, u32 portid, u32 seq,
@@ -7585,7 +7630,7 @@ static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (err)
 		goto nla_put_failure;
 
-	err = devlink_trap_stats_put(msg, trap_item->stats);
+	err = devlink_trap_stats_put(msg, devlink, trap_item);
 	if (err)
 		goto nla_put_failure;
 
@@ -7802,7 +7847,7 @@ devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
 			group_item->policer_item->policer->id))
 		goto nla_put_failure;
 
-	err = devlink_trap_stats_put(msg, group_item->stats);
+	err = devlink_trap_group_stats_put(msg, group_item->stats);
 	if (err)
 		goto nla_put_failure;
 

From 53f1bd6b281945f82b48ec44be34488ee9765cb8 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Mon, 14 Jun 2021 16:01:13 +0300
Subject: [PATCH 1700/3292] testing: selftests: net: forwarding: add
 devlink-required functionality to test (hard) dropped stats field

Add devlink_trap_drop_packets_get function, as well as test that are
used to verify devlink (hard) dropped stats functionality works.

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/devlink_lib.sh   | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 39fb9b8e7b58..13d3d4428a32 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -324,6 +324,14 @@ devlink_trap_rx_bytes_get()
 		| jq '.[][][]["stats"]["rx"]["bytes"]'
 }
 
+devlink_trap_drop_packets_get()
+{
+	local trap_name=$1; shift
+
+	devlink -js trap show $DEVLINK_DEV trap $trap_name \
+		| jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
 devlink_trap_stats_idle_test()
 {
 	local trap_name=$1; shift
@@ -345,6 +353,24 @@ devlink_trap_stats_idle_test()
 	fi
 }
 
+devlink_trap_drop_stats_idle_test()
+{
+	local trap_name=$1; shift
+	local t0_packets t0_bytes
+
+	t0_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+	sleep 1
+
+	t1_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
 devlink_traps_enable_all()
 {
 	local trap_name

From a7b3527a43feb017f48c699d859aef787c8af031 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Mon, 14 Jun 2021 16:01:14 +0300
Subject: [PATCH 1701/3292] drivers: net: netdevsim: add devlink
 trap_drop_counter_get implementation

Whenever query statistics is issued for trap with DROP action,
devlink subsystem would also fill-in statistics 'dropped' field.
In case if device driver did't register callback for hard drop
statistics querying, 'dropped' field will be omitted and not filled.
Add trap_drop_counter_get callback implementation to the netdevsim.
Add new test cases for netdevsim, to test both the callback
functionality, as well as drop statistics alteration check.

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c       | 22 ++++++++++++++++++++++
 drivers/net/netdevsim/netdevsim.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 6f4bc70049d2..d85521989753 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -269,6 +269,9 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 		err = PTR_ERR(nsim_dev->nodes_ddir);
 		goto err_out;
 	}
+	debugfs_create_bool("fail_trap_counter_get", 0600,
+			    nsim_dev->ddir,
+			    &nsim_dev->fail_trap_counter_get);
 	nsim_udp_tunnels_debugfs_create(nsim_dev);
 	return 0;
 
@@ -563,6 +566,7 @@ struct nsim_trap_data {
 	struct delayed_work trap_report_dw;
 	struct nsim_trap_item *trap_items_arr;
 	u64 *trap_policers_cnt_arr;
+	u64 trap_pkt_cnt;
 	struct nsim_dev *nsim_dev;
 	spinlock_t trap_lock;	/* Protects trap_items_arr */
 };
@@ -1203,6 +1207,23 @@ static int nsim_rate_node_parent_set(struct devlink_rate *child,
 	return 0;
 }
 
+static int
+nsim_dev_devlink_trap_hw_counter_get(struct devlink *devlink,
+				     const struct devlink_trap *trap,
+				     u64 *p_drops)
+{
+	struct nsim_dev *nsim_dev = devlink_priv(devlink);
+	u64 *cnt;
+
+	if (nsim_dev->fail_trap_counter_get)
+		return -EINVAL;
+
+	cnt = &nsim_dev->trap_data->trap_pkt_cnt;
+	*p_drops = (*cnt)++;
+
+	return 0;
+}
+
 static const struct devlink_ops nsim_dev_devlink_ops = {
 	.eswitch_mode_set = nsim_devlink_eswitch_mode_set,
 	.eswitch_mode_get = nsim_devlink_eswitch_mode_get,
@@ -1226,6 +1247,7 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 	.rate_node_del = nsim_rate_node_del,
 	.rate_leaf_parent_set = nsim_rate_leaf_parent_set,
 	.rate_node_parent_set = nsim_rate_node_parent_set,
+	.trap_drop_counter_get = nsim_dev_devlink_trap_hw_counter_get,
 };
 
 #define NSIM_DEV_MAX_MACS_DEFAULT 32
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index cdfdf2a99578..f2304e61919a 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -249,6 +249,7 @@ struct nsim_dev {
 	bool fail_trap_group_set;
 	bool fail_trap_policer_set;
 	bool fail_trap_policer_counter_get;
+	bool fail_trap_counter_get;
 	struct {
 		struct udp_tunnel_nic_shared utn_shared;
 		u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];

From 7a4f54798a53db5f94d6e0bd1b0bfe53900fc058 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Mon, 14 Jun 2021 16:01:15 +0300
Subject: [PATCH 1702/3292] testing: selftests: drivers: net: netdevsim:
 devlink: add test case for hard drop statistics

Add hard drop counter check testcase, to make sure netdevsim driver
properly handles the devlink hard drop counters get/set callbacks.

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/netdevsim/devlink_trap.sh    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index 6165901a1cf3..109900c817be 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -165,6 +165,16 @@ trap_stats_test()
 			devlink_trap_action_set $trap_name "drop"
 			devlink_trap_stats_idle_test $trap_name
 			check_err $? "Stats of trap $trap_name not idle when action is drop"
+
+			echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+			devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+			check_fail $? "Managed to read trap (hard dropped) statistics when should not"
+			echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+			devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+			check_err $? "Did not manage to read trap (hard dropped) statistics when should"
+
+			devlink_trap_drop_stats_idle_test $trap_name
+			check_fail $? "Drop stats of trap $trap_name idle when should not"
 		else
 			devlink_trap_stats_idle_test $trap_name
 			check_fail $? "Stats of non-drop trap $trap_name idle when should not"

From 0a9003f45e9105628a2437ae1d8877e695cb34b8 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Mon, 14 Jun 2021 16:01:16 +0300
Subject: [PATCH 1703/3292] net: marvell: prestera: devlink: add traps/groups
 implementation

Add devlink traps registration (with corresponding groups) for
all the traffic types that driver traps to the CPU;
prestera_rxtx: report each packet trapped to the CPU (RX) to the
prestera_devlink;

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/prestera/prestera.h  |   2 +
 .../marvell/prestera/prestera_devlink.c       | 439 +++++++++++++++++-
 .../marvell/prestera/prestera_devlink.h       |   3 +
 .../ethernet/marvell/prestera/prestera_dsa.c  |   3 +
 .../ethernet/marvell/prestera/prestera_dsa.h  |   1 +
 .../ethernet/marvell/prestera/prestera_rxtx.c |   7 +-
 6 files changed, 452 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index ad0f33a7e517..6353f1c67638 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -170,6 +170,7 @@ struct prestera_event {
 
 struct prestera_switchdev;
 struct prestera_rxtx;
+struct prestera_trap_data;
 
 struct prestera_switch {
 	struct prestera_device *dev;
@@ -177,6 +178,7 @@ struct prestera_switch {
 	struct prestera_rxtx *rxtx;
 	struct list_head event_handlers;
 	struct notifier_block netdev_nb;
+	struct prestera_trap_data *trap_data;
 	char base_mac[ETH_ALEN];
 	struct list_head port_list;
 	rwlock_t port_list_lock;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
index 94c185a0e2b8..f59727f050ba 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -5,6 +5,276 @@
 
 #include "prestera_devlink.h"
 
+/* All driver-specific traps must be documented in
+ * Documentation/networking/devlink/prestera.rst
+ */
+enum {
+	DEVLINK_PRESTERA_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
+	DEVLINK_PRESTERA_TRAP_ID_ARP_BC,
+	DEVLINK_PRESTERA_TRAP_ID_IS_IS,
+	DEVLINK_PRESTERA_TRAP_ID_OSPF,
+	DEVLINK_PRESTERA_TRAP_ID_IP_BC_MAC,
+	DEVLINK_PRESTERA_TRAP_ID_ROUTER_MC,
+	DEVLINK_PRESTERA_TRAP_ID_VRRP,
+	DEVLINK_PRESTERA_TRAP_ID_DHCP,
+	DEVLINK_PRESTERA_TRAP_ID_MAC_TO_ME,
+	DEVLINK_PRESTERA_TRAP_ID_IPV4_OPTIONS,
+	DEVLINK_PRESTERA_TRAP_ID_IP_DEFAULT_ROUTE,
+	DEVLINK_PRESTERA_TRAP_ID_IP_TO_ME,
+	DEVLINK_PRESTERA_TRAP_ID_IPV4_ICMP_REDIRECT,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_0,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_1,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_2,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_3,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_4,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_5,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_6,
+	DEVLINK_PRESTERA_TRAP_ID_ACL_CODE_7,
+	DEVLINK_PRESTERA_TRAP_ID_BGP,
+	DEVLINK_PRESTERA_TRAP_ID_SSH,
+	DEVLINK_PRESTERA_TRAP_ID_TELNET,
+	DEVLINK_PRESTERA_TRAP_ID_ICMP,
+};
+
+#define DEVLINK_PRESTERA_TRAP_NAME_ARP_BC \
+	"arp_bc"
+#define DEVLINK_PRESTERA_TRAP_NAME_IS_IS \
+	"is_is"
+#define DEVLINK_PRESTERA_TRAP_NAME_OSPF \
+	"ospf"
+#define DEVLINK_PRESTERA_TRAP_NAME_IP_BC_MAC \
+	"ip_bc_mac"
+#define DEVLINK_PRESTERA_TRAP_NAME_ROUTER_MC \
+	"router_mc"
+#define DEVLINK_PRESTERA_TRAP_NAME_VRRP \
+	"vrrp"
+#define DEVLINK_PRESTERA_TRAP_NAME_DHCP \
+	"dhcp"
+#define DEVLINK_PRESTERA_TRAP_NAME_MAC_TO_ME \
+	"mac_to_me"
+#define DEVLINK_PRESTERA_TRAP_NAME_IPV4_OPTIONS \
+	"ipv4_options"
+#define DEVLINK_PRESTERA_TRAP_NAME_IP_DEFAULT_ROUTE \
+	"ip_default_route"
+#define DEVLINK_PRESTERA_TRAP_NAME_IP_TO_ME \
+	"ip_to_me"
+#define DEVLINK_PRESTERA_TRAP_NAME_IPV4_ICMP_REDIRECT \
+	"ipv4_icmp_redirect"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_0 \
+	"acl_code_0"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_1 \
+	"acl_code_1"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_2 \
+	"acl_code_2"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_3 \
+	"acl_code_3"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_4 \
+	"acl_code_4"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_5 \
+	"acl_code_5"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_6 \
+	"acl_code_6"
+#define DEVLINK_PRESTERA_TRAP_NAME_ACL_CODE_7 \
+	"acl_code_7"
+#define DEVLINK_PRESTERA_TRAP_NAME_BGP \
+	"bgp"
+#define DEVLINK_PRESTERA_TRAP_NAME_SSH \
+	"ssh"
+#define DEVLINK_PRESTERA_TRAP_NAME_TELNET \
+	"telnet"
+#define DEVLINK_PRESTERA_TRAP_NAME_ICMP \
+	"icmp"
+
+struct prestera_trap {
+	struct devlink_trap trap;
+	u8 cpu_code;
+};
+
+struct prestera_trap_item {
+	enum devlink_trap_action action;
+	void *trap_ctx;
+};
+
+struct prestera_trap_data {
+	struct prestera_switch *sw;
+	struct prestera_trap_item *trap_items_arr;
+	u32 traps_count;
+};
+
+#define PRESTERA_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
+
+#define PRESTERA_TRAP_CONTROL(_id, _group_id, _action)			      \
+	DEVLINK_TRAP_GENERIC(CONTROL, _action, _id,			      \
+			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
+			     PRESTERA_TRAP_METADATA)
+
+#define PRESTERA_TRAP_DRIVER_CONTROL(_id, _group_id)			      \
+	DEVLINK_TRAP_DRIVER(CONTROL, TRAP, DEVLINK_PRESTERA_TRAP_ID_##_id,    \
+			    DEVLINK_PRESTERA_TRAP_NAME_##_id,		      \
+			    DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
+			    PRESTERA_TRAP_METADATA)
+
+#define PRESTERA_TRAP_EXCEPTION(_id, _group_id)				      \
+	DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id,			      \
+			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
+			     PRESTERA_TRAP_METADATA)
+
+#define PRESTERA_TRAP_DRIVER_EXCEPTION(_id, _group_id)			      \
+	DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, DEVLINK_PRESTERA_TRAP_ID_##_id,  \
+			    DEVLINK_PRESTERA_TRAP_NAME_##_id,		      \
+			    DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
+			    PRESTERA_TRAP_METADATA)
+
+static const struct devlink_trap_group prestera_trap_groups_arr[] = {
+	/* No policer is associated with following groups (policerid == 0)*/
+	DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(L3_EXCEPTIONS, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(NEIGH_DISCOVERY, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(ACL_TRAP, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(OSPF, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(STP, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(LACP, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(LLDP, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(VRRP, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(DHCP, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(BGP, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(LOCAL_DELIVERY, 0),
+};
+
+/* Initialize trap list, as well as associate CPU code with them. */
+static struct prestera_trap prestera_trap_items_arr[] = {
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ARP_BC, NEIGH_DISCOVERY),
+		.cpu_code = 5,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(IS_IS, LOCAL_DELIVERY),
+		.cpu_code = 13,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(OSPF, OSPF),
+		.cpu_code = 16,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(IP_BC_MAC, LOCAL_DELIVERY),
+		.cpu_code = 19,
+	},
+	{
+		.trap = PRESTERA_TRAP_CONTROL(STP, STP, TRAP),
+		.cpu_code = 26,
+	},
+	{
+		.trap = PRESTERA_TRAP_CONTROL(LACP, LACP, TRAP),
+		.cpu_code = 27,
+	},
+	{
+		.trap = PRESTERA_TRAP_CONTROL(LLDP, LLDP, TRAP),
+		.cpu_code = 28,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ROUTER_MC, LOCAL_DELIVERY),
+		.cpu_code = 29,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(VRRP, VRRP),
+		.cpu_code = 30,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(DHCP, DHCP),
+		.cpu_code = 33,
+	},
+	{
+		.trap = PRESTERA_TRAP_EXCEPTION(MTU_ERROR, L3_EXCEPTIONS),
+		.cpu_code = 63,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(MAC_TO_ME, LOCAL_DELIVERY),
+		.cpu_code = 65,
+	},
+	{
+		.trap = PRESTERA_TRAP_EXCEPTION(TTL_ERROR, L3_EXCEPTIONS),
+		.cpu_code = 133,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_EXCEPTION(IPV4_OPTIONS,
+						       L3_EXCEPTIONS),
+		.cpu_code = 141,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(IP_DEFAULT_ROUTE,
+						     LOCAL_DELIVERY),
+		.cpu_code = 160,
+	},
+	{
+		.trap = PRESTERA_TRAP_CONTROL(LOCAL_ROUTE, LOCAL_DELIVERY,
+					      TRAP),
+		.cpu_code = 161,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_EXCEPTION(IPV4_ICMP_REDIRECT,
+						       L3_EXCEPTIONS),
+		.cpu_code = 180,
+	},
+	{
+		.trap = PRESTERA_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY,
+					      TRAP),
+		.cpu_code = 188,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_0, ACL_TRAP),
+		.cpu_code = 192,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_1, ACL_TRAP),
+		.cpu_code = 193,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_2, ACL_TRAP),
+		.cpu_code = 194,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_3, ACL_TRAP),
+		.cpu_code = 195,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_4, ACL_TRAP),
+		.cpu_code = 196,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_5, ACL_TRAP),
+		.cpu_code = 197,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_6, ACL_TRAP),
+		.cpu_code = 198,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ACL_CODE_7, ACL_TRAP),
+		.cpu_code = 199,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(BGP, BGP),
+		.cpu_code = 206,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(SSH, LOCAL_DELIVERY),
+		.cpu_code = 207,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(TELNET, LOCAL_DELIVERY),
+		.cpu_code = 208,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ICMP, LOCAL_DELIVERY),
+		.cpu_code = 209,
+	},
+};
+
+static void prestera_devlink_traps_fini(struct prestera_switch *sw);
+
 static int prestera_dl_info_get(struct devlink *dl,
 				struct devlink_info_req *req,
 				struct netlink_ext_ack *extack)
@@ -27,8 +297,20 @@ static int prestera_dl_info_get(struct devlink *dl,
 					       buf);
 }
 
+static int prestera_trap_init(struct devlink *devlink,
+			      const struct devlink_trap *trap, void *trap_ctx);
+
+static int prestera_trap_action_set(struct devlink *devlink,
+				    const struct devlink_trap *trap,
+				    enum devlink_trap_action action,
+				    struct netlink_ext_ack *extack);
+
+static int prestera_devlink_traps_register(struct prestera_switch *sw);
+
 static const struct devlink_ops prestera_dl_ops = {
 	.info_get = prestera_dl_info_get,
+	.trap_init = prestera_trap_init,
+	.trap_action_set = prestera_trap_action_set,
 };
 
 struct prestera_switch *prestera_devlink_alloc(void)
@@ -53,17 +335,32 @@ int prestera_devlink_register(struct prestera_switch *sw)
 	int err;
 
 	err = devlink_register(dl, sw->dev->dev);
-	if (err)
+	if (err) {
 		dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err);
+		return err;
+	}
 
-	return err;
+	err = prestera_devlink_traps_register(sw);
+	if (err) {
+		devlink_unregister(dl);
+		dev_err(sw->dev->dev, "devlink_traps_register failed: %d\n",
+			err);
+		return err;
+	}
+
+	return 0;
 }
 
 void prestera_devlink_unregister(struct prestera_switch *sw)
 {
+	struct prestera_trap_data *trap_data = sw->trap_data;
 	struct devlink *dl = priv_to_devlink(sw);
 
+	prestera_devlink_traps_fini(sw);
 	devlink_unregister(dl);
+
+	kfree(trap_data->trap_items_arr);
+	kfree(trap_data);
 }
 
 int prestera_devlink_port_register(struct prestera_port *port)
@@ -110,3 +407,141 @@ struct devlink_port *prestera_devlink_get_port(struct net_device *dev)
 
 	return &port->dl_port;
 }
+
+static int prestera_devlink_traps_register(struct prestera_switch *sw)
+{
+	const u32 groups_count = ARRAY_SIZE(prestera_trap_groups_arr);
+	const u32 traps_count = ARRAY_SIZE(prestera_trap_items_arr);
+	struct devlink *devlink = priv_to_devlink(sw);
+	struct prestera_trap_data *trap_data;
+	struct prestera_trap *prestera_trap;
+	int err, i;
+
+	trap_data = kzalloc(sizeof(*trap_data), GFP_KERNEL);
+	if (!trap_data)
+		return -ENOMEM;
+
+	trap_data->trap_items_arr = kcalloc(traps_count,
+					    sizeof(struct prestera_trap_item),
+					    GFP_KERNEL);
+	if (!trap_data->trap_items_arr) {
+		err = -ENOMEM;
+		goto err_trap_items_alloc;
+	}
+
+	trap_data->sw = sw;
+	trap_data->traps_count = traps_count;
+	sw->trap_data = trap_data;
+
+	err = devlink_trap_groups_register(devlink, prestera_trap_groups_arr,
+					   groups_count);
+	if (err)
+		goto err_groups_register;
+
+	for (i = 0; i < traps_count; i++) {
+		prestera_trap = &prestera_trap_items_arr[i];
+		err = devlink_traps_register(devlink, &prestera_trap->trap, 1,
+					     sw);
+		if (err)
+			goto err_trap_register;
+	}
+
+	return 0;
+
+err_trap_register:
+	for (i--; i >= 0; i--) {
+		prestera_trap = &prestera_trap_items_arr[i];
+		devlink_traps_unregister(devlink, &prestera_trap->trap, 1);
+	}
+err_groups_register:
+	kfree(trap_data->trap_items_arr);
+err_trap_items_alloc:
+	kfree(trap_data);
+	return err;
+}
+
+static struct prestera_trap_item *
+prestera_get_trap_item_by_cpu_code(struct prestera_switch *sw, u8 cpu_code)
+{
+	struct prestera_trap_data *trap_data = sw->trap_data;
+	struct prestera_trap *prestera_trap;
+	int i;
+
+	for (i = 0; i < trap_data->traps_count; i++) {
+		prestera_trap = &prestera_trap_items_arr[i];
+		if (cpu_code == prestera_trap->cpu_code)
+			return &trap_data->trap_items_arr[i];
+	}
+
+	return NULL;
+}
+
+void prestera_devlink_trap_report(struct prestera_port *port,
+				  struct sk_buff *skb, u8 cpu_code)
+{
+	struct prestera_trap_item *trap_item;
+	struct devlink *devlink;
+
+	devlink = port->dl_port.devlink;
+
+	trap_item = prestera_get_trap_item_by_cpu_code(port->sw, cpu_code);
+	if (unlikely(!trap_item))
+		return;
+
+	devlink_trap_report(devlink, skb, trap_item->trap_ctx,
+			    &port->dl_port, NULL);
+}
+
+static struct prestera_trap_item *
+prestera_devlink_trap_item_lookup(struct prestera_switch *sw, u16 trap_id)
+{
+	struct prestera_trap_data *trap_data = sw->trap_data;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prestera_trap_items_arr); i++) {
+		if (prestera_trap_items_arr[i].trap.id == trap_id)
+			return &trap_data->trap_items_arr[i];
+	}
+
+	return NULL;
+}
+
+static int prestera_trap_init(struct devlink *devlink,
+			      const struct devlink_trap *trap, void *trap_ctx)
+{
+	struct prestera_switch *sw = devlink_priv(devlink);
+	struct prestera_trap_item *trap_item;
+
+	trap_item = prestera_devlink_trap_item_lookup(sw, trap->id);
+	if (WARN_ON(!trap_item))
+		return -EINVAL;
+
+	trap_item->trap_ctx = trap_ctx;
+	trap_item->action = trap->init_action;
+
+	return 0;
+}
+
+static int prestera_trap_action_set(struct devlink *devlink,
+				    const struct devlink_trap *trap,
+				    enum devlink_trap_action action,
+				    struct netlink_ext_ack *extack)
+{
+	/* Currently, driver does not support trap action altering */
+	return -EOPNOTSUPP;
+}
+
+static void prestera_devlink_traps_fini(struct prestera_switch *sw)
+{
+	struct devlink *dl = priv_to_devlink(sw);
+	const struct devlink_trap *trap;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prestera_trap_items_arr); ++i) {
+		trap = &prestera_trap_items_arr[i].trap;
+		devlink_traps_unregister(dl, trap, 1);
+	}
+
+	devlink_trap_groups_unregister(dl, prestera_trap_groups_arr,
+				       ARRAY_SIZE(prestera_trap_groups_arr));
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
index 51bee9f75415..5d73aa9db897 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
@@ -20,4 +20,7 @@ void prestera_devlink_port_clear(struct prestera_port *port);
 
 struct devlink_port *prestera_devlink_get_port(struct net_device *dev);
 
+void prestera_devlink_trap_report(struct prestera_port *port,
+				  struct sk_buff *skb, u8 cpu_code);
+
 #endif /* _PRESTERA_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_dsa.c b/drivers/net/ethernet/marvell/prestera/prestera_dsa.c
index a5e01c7a307b..b7e89c0ca5c0 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_dsa.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_dsa.c
@@ -19,6 +19,7 @@
 #define PRESTERA_DSA_W1_EXT_BIT		BIT(31)
 #define PRESTERA_DSA_W1_CFI_BIT		BIT(30)
 #define PRESTERA_DSA_W1_PORT_NUM	GENMASK(11, 10)
+#define PRESTERA_DSA_W1_MASK_CPU_CODE	GENMASK(7, 0)
 
 #define PRESTERA_DSA_W2_EXT_BIT		BIT(31)
 #define PRESTERA_DSA_W2_PORT_NUM	BIT(20)
@@ -74,6 +75,8 @@ int prestera_dsa_parse(struct prestera_dsa *dsa, const u8 *dsa_buf)
 			(FIELD_GET(PRESTERA_DSA_W1_PORT_NUM, words[1]) << 5) |
 			(FIELD_GET(PRESTERA_DSA_W2_PORT_NUM, words[2]) << 7);
 
+	dsa->cpu_code = FIELD_GET(PRESTERA_DSA_W1_MASK_CPU_CODE, words[1]);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_dsa.h b/drivers/net/ethernet/marvell/prestera/prestera_dsa.h
index 67018629bdd2..c99342f475cf 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_dsa.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_dsa.h
@@ -27,6 +27,7 @@ struct prestera_dsa {
 	struct prestera_dsa_vlan vlan;
 	u32 hw_dev_num;
 	u32 port_num;
+	u8 cpu_code;
 };
 
 int prestera_dsa_parse(struct prestera_dsa *dsa, const u8 *dsa_buf);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
index 2a13c318048c..73d2eba5262f 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c
@@ -14,6 +14,7 @@
 #include "prestera.h"
 #include "prestera_hw.h"
 #include "prestera_rxtx.h"
+#include "prestera_devlink.h"
 
 #define PRESTERA_SDMA_WAIT_MUL		10
 
@@ -214,9 +215,10 @@ static struct sk_buff *prestera_sdma_rx_skb_get(struct prestera_sdma *sdma,
 static int prestera_rxtx_process_skb(struct prestera_sdma *sdma,
 				     struct sk_buff *skb)
 {
-	const struct prestera_port *port;
+	struct prestera_port *port;
 	struct prestera_dsa dsa;
 	u32 hw_port, dev_id;
+	u8 cpu_code;
 	int err;
 
 	skb_pull(skb, ETH_HLEN);
@@ -259,6 +261,9 @@ static int prestera_rxtx_process_skb(struct prestera_sdma *sdma,
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tci);
 	}
 
+	cpu_code = dsa.cpu_code;
+	prestera_devlink_trap_report(port, skb, cpu_code);
+
 	return 0;
 }
 

From a80cf955c9e51d137fc772510b213c8d2c96c1d3 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Mon, 14 Jun 2021 16:01:17 +0300
Subject: [PATCH 1704/3292] net: marvell: prestera: devlink: add traps with
 DROP action

Add traps that have init_action being set to DROP.
Add 'trap_drop_counter_get' (devlink API) callback implementation,
that is used to get number of packets that have been dropped by the HW
(traps with action 'DROP').
Add new FW command CPU_CODE_COUNTERS_GET.

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../marvell/prestera/prestera_devlink.c       | 91 +++++++++++++++++++
 .../ethernet/marvell/prestera/prestera_hw.c   | 35 +++++++
 .../ethernet/marvell/prestera/prestera_hw.h   | 11 +++
 3 files changed, 137 insertions(+)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
index f59727f050ba..d12e21db9fd6 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -4,6 +4,7 @@
 #include <net/devlink.h>
 
 #include "prestera_devlink.h"
+#include "prestera_hw.h"
 
 /* All driver-specific traps must be documented in
  * Documentation/networking/devlink/prestera.rst
@@ -34,6 +35,15 @@ enum {
 	DEVLINK_PRESTERA_TRAP_ID_SSH,
 	DEVLINK_PRESTERA_TRAP_ID_TELNET,
 	DEVLINK_PRESTERA_TRAP_ID_ICMP,
+	DEVLINK_PRESTERA_TRAP_ID_MET_RED,
+	DEVLINK_PRESTERA_TRAP_ID_IP_SIP_IS_ZERO,
+	DEVLINK_PRESTERA_TRAP_ID_IP_UC_DIP_DA_MISMATCH,
+	DEVLINK_PRESTERA_TRAP_ID_ILLEGAL_IPV4_HDR,
+	DEVLINK_PRESTERA_TRAP_ID_ILLEGAL_IP_ADDR,
+	DEVLINK_PRESTERA_TRAP_ID_INVALID_SA,
+	DEVLINK_PRESTERA_TRAP_ID_LOCAL_PORT,
+	DEVLINK_PRESTERA_TRAP_ID_PORT_NO_VLAN,
+	DEVLINK_PRESTERA_TRAP_ID_RXDMA_DROP,
 };
 
 #define DEVLINK_PRESTERA_TRAP_NAME_ARP_BC \
@@ -84,6 +94,24 @@ enum {
 	"telnet"
 #define DEVLINK_PRESTERA_TRAP_NAME_ICMP \
 	"icmp"
+#define DEVLINK_PRESTERA_TRAP_NAME_RXDMA_DROP \
+	"rxdma_drop"
+#define DEVLINK_PRESTERA_TRAP_NAME_PORT_NO_VLAN \
+	"port_no_vlan"
+#define DEVLINK_PRESTERA_TRAP_NAME_LOCAL_PORT \
+	"local_port"
+#define DEVLINK_PRESTERA_TRAP_NAME_INVALID_SA \
+	"invalid_sa"
+#define DEVLINK_PRESTERA_TRAP_NAME_ILLEGAL_IP_ADDR \
+	"illegal_ip_addr"
+#define DEVLINK_PRESTERA_TRAP_NAME_ILLEGAL_IPV4_HDR \
+	"illegal_ipv4_hdr"
+#define DEVLINK_PRESTERA_TRAP_NAME_IP_UC_DIP_DA_MISMATCH \
+	"ip_uc_dip_da_mismatch"
+#define DEVLINK_PRESTERA_TRAP_NAME_IP_SIP_IS_ZERO \
+	"ip_sip_is_zero"
+#define DEVLINK_PRESTERA_TRAP_NAME_MET_RED \
+	"met_red"
 
 struct prestera_trap {
 	struct devlink_trap trap;
@@ -125,6 +153,12 @@ struct prestera_trap_data {
 			    DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
 			    PRESTERA_TRAP_METADATA)
 
+#define PRESTERA_TRAP_DRIVER_DROP(_id, _group_id)			      \
+	DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_PRESTERA_TRAP_ID_##_id,	      \
+			    DEVLINK_PRESTERA_TRAP_NAME_##_id,		      \
+			    DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
+			    PRESTERA_TRAP_METADATA)
+
 static const struct devlink_trap_group prestera_trap_groups_arr[] = {
 	/* No policer is associated with following groups (policerid == 0)*/
 	DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0),
@@ -142,6 +176,7 @@ static const struct devlink_trap_group prestera_trap_groups_arr[] = {
 	DEVLINK_TRAP_GROUP_GENERIC(DHCP, 0),
 	DEVLINK_TRAP_GROUP_GENERIC(BGP, 0),
 	DEVLINK_TRAP_GROUP_GENERIC(LOCAL_DELIVERY, 0),
+	DEVLINK_TRAP_GROUP_GENERIC(BUFFER_DROPS, 0),
 };
 
 /* Initialize trap list, as well as associate CPU code with them. */
@@ -271,10 +306,51 @@ static struct prestera_trap prestera_trap_items_arr[] = {
 		.trap = PRESTERA_TRAP_DRIVER_CONTROL(ICMP, LOCAL_DELIVERY),
 		.cpu_code = 209,
 	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(RXDMA_DROP, BUFFER_DROPS),
+		.cpu_code = 37,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(PORT_NO_VLAN, L2_DROPS),
+		.cpu_code = 39,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(LOCAL_PORT, L2_DROPS),
+		.cpu_code = 56,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(INVALID_SA, L2_DROPS),
+		.cpu_code = 60,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(ILLEGAL_IP_ADDR, L3_DROPS),
+		.cpu_code = 136,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(ILLEGAL_IPV4_HDR, L3_DROPS),
+		.cpu_code = 137,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(IP_UC_DIP_DA_MISMATCH,
+						  L3_DROPS),
+		.cpu_code = 138,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(IP_SIP_IS_ZERO, L3_DROPS),
+		.cpu_code = 145,
+	},
+	{
+		.trap = PRESTERA_TRAP_DRIVER_DROP(MET_RED, BUFFER_DROPS),
+		.cpu_code = 185,
+	},
 };
 
 static void prestera_devlink_traps_fini(struct prestera_switch *sw);
 
+static int prestera_drop_counter_get(struct devlink *devlink,
+				     const struct devlink_trap *trap,
+				     u64 *p_drops);
+
 static int prestera_dl_info_get(struct devlink *dl,
 				struct devlink_info_req *req,
 				struct netlink_ext_ack *extack)
@@ -311,6 +387,7 @@ static const struct devlink_ops prestera_dl_ops = {
 	.info_get = prestera_dl_info_get,
 	.trap_init = prestera_trap_init,
 	.trap_action_set = prestera_trap_action_set,
+	.trap_drop_counter_get = prestera_drop_counter_get,
 };
 
 struct prestera_switch *prestera_devlink_alloc(void)
@@ -531,6 +608,20 @@ static int prestera_trap_action_set(struct devlink *devlink,
 	return -EOPNOTSUPP;
 }
 
+static int prestera_drop_counter_get(struct devlink *devlink,
+				     const struct devlink_trap *trap,
+				     u64 *p_drops)
+{
+	struct prestera_switch *sw = devlink_priv(devlink);
+	enum prestera_hw_cpu_code_cnt_t cpu_code_type =
+		PRESTERA_HW_CPU_CODE_CNT_TYPE_DROP;
+	struct prestera_trap *prestera_trap =
+		container_of(trap, struct prestera_trap, trap);
+
+	return prestera_hw_cpu_code_counters_get(sw, prestera_trap->cpu_code,
+						 cpu_code_type, p_drops);
+}
+
 static void prestera_devlink_traps_fini(struct prestera_switch *sw)
 {
 	struct devlink *dl = priv_to_devlink(sw);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 886ce251330e..a4e3dc8d3abe 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -47,6 +47,8 @@ enum prestera_cmd_type_t {
 
 	PRESTERA_CMD_TYPE_STP_PORT_SET = 0x1000,
 
+	PRESTERA_CMD_TYPE_CPU_CODE_COUNTERS_GET = 0x2000,
+
 	PRESTERA_CMD_TYPE_ACK = 0x10000,
 	PRESTERA_CMD_TYPE_MAX
 };
@@ -330,6 +332,17 @@ struct prestera_msg_lag_req {
 	u16 lag_id;
 };
 
+struct prestera_msg_cpu_code_counter_req {
+	struct prestera_msg_cmd cmd;
+	u8 counter_type;
+	u8 code;
+};
+
+struct mvsw_msg_cpu_code_counter_ret {
+	struct prestera_msg_ret ret;
+	u64 packet_count;
+};
+
 struct prestera_msg_event {
 	u16 type;
 	u16 id;
@@ -1451,6 +1464,28 @@ int prestera_hw_lag_member_enable(struct prestera_port *port, u16 lag_id,
 	return prestera_cmd(port->sw, cmd, &req.cmd, sizeof(req));
 }
 
+int
+prestera_hw_cpu_code_counters_get(struct prestera_switch *sw, u8 code,
+				  enum prestera_hw_cpu_code_cnt_t counter_type,
+				  u64 *packet_count)
+{
+	struct prestera_msg_cpu_code_counter_req req = {
+		.counter_type = counter_type,
+		.code = code,
+	};
+	struct mvsw_msg_cpu_code_counter_ret resp;
+	int err;
+
+	err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_CPU_CODE_COUNTERS_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*packet_count = resp.packet_count;
+
+	return 0;
+}
+
 int prestera_hw_event_handler_register(struct prestera_switch *sw,
 				       enum prestera_event_type type,
 				       prestera_event_cb_t fn,
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index 846bdc04e278..7f72d81cf918 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -89,6 +89,11 @@ enum {
 	PRESTERA_STP_FORWARD,
 };
 
+enum prestera_hw_cpu_code_cnt_t {
+	PRESTERA_HW_CPU_CODE_CNT_TYPE_DROP = 0,
+	PRESTERA_HW_CPU_CODE_CNT_TYPE_TRAP = 1,
+};
+
 struct prestera_switch;
 struct prestera_port;
 struct prestera_port_stats;
@@ -194,4 +199,10 @@ int prestera_hw_fdb_flush_lag(struct prestera_switch *sw, u16 lag_id,
 int prestera_hw_fdb_flush_lag_vlan(struct prestera_switch *sw,
 				   u16 lag_id, u16 vid, u32 mode);
 
+/* HW trap/drop counters API */
+int
+prestera_hw_cpu_code_counters_get(struct prestera_switch *sw, u8 code,
+				  enum prestera_hw_cpu_code_cnt_t counter_type,
+				  u64 *packet_count);
+
 #endif /* _PRESTERA_HW_H_ */

From 66826c43e63d5c5e8307bd36862d6334db9d98b7 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Mon, 14 Jun 2021 16:01:18 +0300
Subject: [PATCH 1705/3292] documentation: networking: devlink: add prestera
 switched driver Documentation

Add documentation for the devlink feature prestera switchdev driver supports:
add description for the support of the driver-specific devlink traps
(include both traps with action TRAP and action DROP);

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/prestera.rst | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 Documentation/networking/devlink/prestera.rst

diff --git a/Documentation/networking/devlink/prestera.rst b/Documentation/networking/devlink/prestera.rst
new file mode 100644
index 000000000000..e8b52ffd4707
--- /dev/null
+++ b/Documentation/networking/devlink/prestera.rst
@@ -0,0 +1,141 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+prestera devlink support
+=====================
+
+This document describes the devlink features implemented by the ``prestera``
+device driver.
+
+Driver-specific Traps
+=====================
+
+.. list-table:: List of Driver-specific Traps Registered by ``prestera``
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+.. list-table:: List of Driver-specific Traps Registered by ``prestera``
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``arp_bc``
+     - ``trap``
+     - Traps ARP broadcast packets (both requests/responses)
+   * - ``is_is``
+     - ``trap``
+     - Traps IS-IS packets
+   * - ``ospf``
+     - ``trap``
+     - Traps OSPF packets
+   * - ``ip_bc_mac``
+     - ``trap``
+     - Traps IPv4 packets with broadcast DA Mac address
+   * - ``stp``
+     - ``trap``
+     - Traps STP BPDU
+   * - ``lacp``
+     - ``trap``
+     - Traps LACP packets
+   * - ``lldp``
+     - ``trap``
+     - Traps LLDP packets
+   * - ``router_mc``
+     - ``trap``
+     - Traps multicast packets
+   * - ``vrrp``
+     - ``trap``
+     - Traps VRRP packets
+   * - ``dhcp``
+     - ``trap``
+     - Traps DHCP packets
+   * - ``mtu_error``
+     - ``trap``
+     - Traps (exception) packets that exceeded port's MTU
+   * - ``mac_to_me``
+     - ``trap``
+     -  Traps packets with switch-port's DA Mac address
+   * - ``ttl_error``
+     - ``trap``
+     - Traps (exception) IPv4 packets whose TTL exceeded
+   * - ``ipv4_options``
+     - ``trap``
+     - Traps (exception) packets due to the malformed IPV4 header options
+   * - ``ip_default_route``
+     - ``trap``
+     - Traps packets that have no specific IP interface (IP to me) and no forwarding prefix
+   * - ``local_route``
+     - ``trap``
+     - Traps packets that have been send to one of switch IP interfaces addresses
+   * - ``ipv4_icmp_redirect``
+     - ``trap``
+     - Traps (exception) IPV4 ICMP redirect packets
+   * - ``arp_response``
+     - ``trap``
+     - Traps ARP replies packets that have switch-port's DA Mac address
+   * - ``acl_code_0``
+     - ``trap``
+     - Traps packets that have ACL priority set to 0 (tc pref 0)
+   * - ``acl_code_1``
+     - ``trap``
+     - Traps packets that have ACL priority set to 1 (tc pref 1)
+   * - ``acl_code_2``
+     - ``trap``
+     - Traps packets that have ACL priority set to 2 (tc pref 2)
+   * - ``acl_code_3``
+     - ``trap``
+     - Traps packets that have ACL priority set to 3 (tc pref 3)
+   * - ``acl_code_4``
+     - ``trap``
+     - Traps packets that have ACL priority set to 4 (tc pref 4)
+   * - ``acl_code_5``
+     - ``trap``
+     - Traps packets that have ACL priority set to 5 (tc pref 5)
+   * - ``acl_code_6``
+     - ``trap``
+     - Traps packets that have ACL priority set to 6 (tc pref 6)
+   * - ``acl_code_7``
+     - ``trap``
+     - Traps packets that have ACL priority set to 7 (tc pref 7)
+   * - ``ipv4_bgp``
+     - ``trap``
+     - Traps IPv4 BGP packets
+   * - ``ssh``
+     - ``trap``
+     - Traps SSH packets
+   * - ``telnet``
+     - ``trap``
+     - Traps Telnet packets
+   * - ``icmp``
+     - ``trap``
+     - Traps ICMP packets
+   * - ``rxdma_drop``
+     - ``drop``
+     - Drops packets (RxDMA) due to the lack of ingress buffers etc.
+   * - ``port_no_vlan``
+     - ``drop``
+     - Drops packets due to faulty-configured network or due to internal bug (config issue).
+   * - ``local_port``
+     - ``drop``
+     - Drops packets whose decision (FDB entry) is to bridge packet back to the incoming port/trunk.
+   * - ``invalid_sa``
+     - ``drop``
+     - Drops packets with multicast source MAC address.
+   * - ``illegal_ip_addr``
+     - ``drop``
+     - Drops packets with illegal SIP/DIP multicast/unicast addresses.
+   * - ``illegal_ipv4_hdr``
+     - ``drop``
+     - Drops packets with illegal IPV4 header.
+   * - ``ip_uc_dip_da_mismatch``
+     - ``drop``
+     - Drops packets with destination MAC being unicast, but destination IP address being multicast.
+   * - ``ip_sip_is_zero``
+     - ``drop``
+     - Drops packets with zero (0) IPV4 source address.
+   * - ``met_red``
+     - ``drop``
+     - Drops non-conforming packets (dropped by Ingress policer, metering drop), e.g. packet rate exceeded configured bandwith.

From 3b8401066e5a8ee465891cc8bad614c797701348 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
 <zhouyanjie@wanyeetech.com>
Date: Tue, 15 Jun 2021 01:15:36 +0800
Subject: [PATCH 1706/3292] dt-bindings: dwmac: Add bindings for new Ingenic
 SoCs.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the dwmac bindings for the JZ4775 SoC, the X1000 SoC,
the X1600 SoC, the X1830 SoC and the X2000 SoC from Ingenic.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ingenic,mac.yaml  | 76 +++++++++++++++++++
 .../devicetree/bindings/net/snps,dwmac.yaml   | 15 ++++
 2 files changed, 91 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/ingenic,mac.yaml

diff --git a/Documentation/devicetree/bindings/net/ingenic,mac.yaml b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
new file mode 100644
index 000000000000..5e93d4f9a080
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ingenic,mac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bindings for MAC in Ingenic SoCs
+
+maintainers:
+  - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+
+description:
+  The Ethernet Media Access Controller in Ingenic SoCs.
+
+properties:
+  compatible:
+    enum:
+      - ingenic,jz4775-mac
+      - ingenic,x1000-mac
+      - ingenic,x1600-mac
+      - ingenic,x1830-mac
+      - ingenic,x2000-mac
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    const: macirq
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: stmmaceth
+
+  mode-reg:
+    description: An extra syscon register that control ethernet interface and timing delay
+
+  rx-clk-delay-ps:
+    description: RGMII receive clock delay defined in pico seconds
+
+  tx-clk-delay-ps:
+    description: RGMII transmit clock delay defined in pico seconds
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clock-names
+  - mode-reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/x1000-cgu.h>
+
+    mac: ethernet@134b0000 {
+        compatible = "ingenic,x1000-mac", "snps,dwmac";
+        reg = <0x134b0000 0x2000>;
+
+        interrupt-parent = <&intc>;
+        interrupts = <55>;
+        interrupt-names = "macirq";
+
+        clocks = <&cgu X1000_CLK_MAC>;
+        clock-names = "stmmaceth";
+
+        mode-reg = <&mac_phy_ctrl>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 2edd8bea993e..9c0ce92e9212 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -56,6 +56,11 @@ properties:
         - amlogic,meson8m2-dwmac
         - amlogic,meson-gxbb-dwmac
         - amlogic,meson-axg-dwmac
+        - ingenic,jz4775-mac
+        - ingenic,x1000-mac
+        - ingenic,x1600-mac
+        - ingenic,x1830-mac
+        - ingenic,x2000-mac
         - rockchip,px30-gmac
         - rockchip,rk3128-gmac
         - rockchip,rk3228-gmac
@@ -310,6 +315,11 @@ allOf:
               - allwinner,sun8i-r40-emac
               - allwinner,sun8i-v3s-emac
               - allwinner,sun50i-a64-emac
+              - ingenic,jz4775-mac
+              - ingenic,x1000-mac
+              - ingenic,x1600-mac
+              - ingenic,x1830-mac
+              - ingenic,x2000-mac
               - snps,dwxgmac
               - snps,dwxgmac-2.10
               - st,spear600-gmac
@@ -353,6 +363,11 @@ allOf:
               - allwinner,sun8i-r40-emac
               - allwinner,sun8i-v3s-emac
               - allwinner,sun50i-a64-emac
+              - ingenic,jz4775-mac
+              - ingenic,x1000-mac
+              - ingenic,x1600-mac
+              - ingenic,x1830-mac
+              - ingenic,x2000-mac
               - snps,dwmac-4.00
               - snps,dwmac-4.10a
               - snps,dwmac-4.20a

From 2bb4b98b60d7dc89fc0a5bb64534be348ab654df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
 <zhouyanjie@wanyeetech.com>
Date: Tue, 15 Jun 2021 01:15:37 +0800
Subject: [PATCH 1707/3292] net: stmmac: Add Ingenic SoCs MAC support.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for Ingenic SoC MAC glue layer support for the stmmac
device driver. This driver is used on for the MAC ethernet controller
found in the JZ4775 SoC, the X1000 SoC, the X1600 SoC, the X1830 SoC,
and the X2000 SoC.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig   |  12 +
 drivers/net/ethernet/stmicro/stmmac/Makefile  |   1 +
 .../ethernet/stmicro/stmmac/dwmac-ingenic.c   | 401 ++++++++++++++++++
 3 files changed, 414 insertions(+)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 7737e4d0bb9e..9a19e4d9da02 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -66,6 +66,18 @@ config DWMAC_ANARION
 
 	  This selects the Anarion SoC glue layer support for the stmmac driver.
 
+config DWMAC_INGENIC
+	tristate "Ingenic MAC support"
+	default MACH_INGENIC
+	depends on OF && HAS_IOMEM && (MACH_INGENIC || COMPILE_TEST)
+	select MFD_SYSCON
+	help
+	  Support for ethernet controller on Ingenic SoCs.
+
+	  This selects Ingenic SoCs glue layer support for the stmmac
+	  device driver. This driver is used on for the Ingenic SoCs
+	  MAC ethernet controller.
+
 config DWMAC_IPQ806X
 	tristate "QCA IPQ806x DWMAC support"
 	default ARCH_QCOM
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index f2e478b884b0..6471f93889ee 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -14,6 +14,7 @@ stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
 obj-$(CONFIG_DWMAC_ANARION)	+= dwmac-anarion.o
+obj-$(CONFIG_DWMAC_INGENIC)	+= dwmac-ingenic.o
 obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MEDIATEK)	+= dwmac-mediatek.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
new file mode 100644
index 000000000000..9807339032e0
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwmac-ingenic.c - Ingenic SoCs DWMAC specific glue layer
+ *
+ * Copyright (c) 2021 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/stmmac.h>
+
+#include "stmmac_platform.h"
+
+#define MACPHYC_TXCLK_SEL_MASK		GENMASK(31, 31)
+#define MACPHYC_TXCLK_SEL_OUTPUT	0x1
+#define MACPHYC_TXCLK_SEL_INPUT		0x0
+#define MACPHYC_MODE_SEL_MASK		GENMASK(31, 31)
+#define MACPHYC_MODE_SEL_RMII		0x0
+#define MACPHYC_TX_SEL_MASK			GENMASK(19, 19)
+#define MACPHYC_TX_SEL_ORIGIN		0x0
+#define MACPHYC_TX_SEL_DELAY		0x1
+#define MACPHYC_TX_DELAY_MASK		GENMASK(18, 12)
+#define MACPHYC_RX_SEL_MASK			GENMASK(11, 11)
+#define MACPHYC_RX_SEL_ORIGIN		0x0
+#define MACPHYC_RX_SEL_DELAY		0x1
+#define MACPHYC_RX_DELAY_MASK		GENMASK(10, 4)
+#define MACPHYC_SOFT_RST_MASK		GENMASK(3, 3)
+#define MACPHYC_PHY_INFT_MASK		GENMASK(2, 0)
+#define MACPHYC_PHY_INFT_RMII		0x4
+#define MACPHYC_PHY_INFT_RGMII		0x1
+#define MACPHYC_PHY_INFT_GMII		0x0
+#define MACPHYC_PHY_INFT_MII		0x0
+
+#define MACPHYC_TX_DELAY_PS_MAX		2496
+#define MACPHYC_TX_DELAY_PS_MIN		20
+
+#define MACPHYC_RX_DELAY_PS_MAX		2496
+#define MACPHYC_RX_DELAY_PS_MIN		20
+
+enum ingenic_mac_version {
+	ID_JZ4775,
+	ID_X1000,
+	ID_X1600,
+	ID_X1830,
+	ID_X2000,
+};
+
+struct ingenic_mac {
+	const struct ingenic_soc_info *soc_info;
+	struct device *dev;
+	struct regmap *regmap;
+
+	int rx_delay;
+	int tx_delay;
+};
+
+struct ingenic_soc_info {
+	enum ingenic_mac_version version;
+	u32 mask;
+
+	int (*set_mode)(struct plat_stmmacenet_data *plat_dat);
+};
+
+static int ingenic_mac_init(struct plat_stmmacenet_data *plat_dat)
+{
+	struct ingenic_mac *mac = plat_dat->bsp_priv;
+	int ret;
+
+	if (mac->soc_info->set_mode) {
+		ret = mac->soc_info->set_mode(plat_dat);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct ingenic_mac *mac = plat_dat->bsp_priv;
+	unsigned int val;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_MII:
+		val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) |
+			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_MII);
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_MII\n");
+		break;
+
+	case PHY_INTERFACE_MODE_GMII:
+		val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) |
+			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_GMII);
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_GMII\n");
+		break;
+
+	case PHY_INTERFACE_MODE_RMII:
+		val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) |
+			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII);
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n");
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+		val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) |
+			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RGMII);
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n");
+		break;
+
+	default:
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		return -EINVAL;
+	}
+
+	/* Update MAC PHY control register */
+	return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val);
+}
+
+static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct ingenic_mac *mac = plat_dat->bsp_priv;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_RMII:
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n");
+		break;
+
+	default:
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		return -EINVAL;
+	}
+
+	/* Update MAC PHY control register */
+	return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, 0);
+}
+
+static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct ingenic_mac *mac = plat_dat->bsp_priv;
+	unsigned int val;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_RMII:
+		val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII);
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n");
+		break;
+
+	default:
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		return -EINVAL;
+	}
+
+	/* Update MAC PHY control register */
+	return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val);
+}
+
+static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct ingenic_mac *mac = plat_dat->bsp_priv;
+	unsigned int val;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_RMII:
+		val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII) |
+			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII);
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n");
+		break;
+
+	default:
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		return -EINVAL;
+	}
+
+	/* Update MAC PHY control register */
+	return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val);
+}
+
+static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct ingenic_mac *mac = plat_dat->bsp_priv;
+	unsigned int val;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_RMII:
+		val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) |
+			  FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN) |
+			  FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII);
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n");
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+		val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RGMII);
+
+		if (mac->tx_delay == 0)
+			val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN);
+		else
+			val |= FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_DELAY) |
+				   FIELD_PREP(MACPHYC_TX_DELAY_MASK, (mac->tx_delay + 9750) / 19500 - 1);
+
+		if (mac->rx_delay == 0)
+			val |= FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN);
+		else
+			val |= FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_DELAY) |
+				   FIELD_PREP(MACPHYC_RX_DELAY_MASK, (mac->rx_delay + 9750) / 19500 - 1);
+
+		dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RGMII\n");
+		break;
+
+	default:
+		dev_err(mac->dev, "Unsupported interface %d", plat_dat->interface);
+		return -EINVAL;
+	}
+
+	/* Update MAC PHY control register */
+	return regmap_update_bits(mac->regmap, 0, mac->soc_info->mask, val);
+}
+
+static int ingenic_mac_probe(struct platform_device *pdev)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct ingenic_mac *mac;
+	const struct ingenic_soc_info *data;
+	u32 tx_delay_ps, rx_delay_ps;
+	int ret;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	mac = devm_kzalloc(&pdev->dev, sizeof(*mac), GFP_KERNEL);
+	if (!mac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
+
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data) {
+		dev_err(&pdev->dev, "No of match data provided\n");
+		ret = -EINVAL;
+		goto err_remove_config_dt;
+	}
+
+	/* Get MAC PHY control register */
+	mac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "mode-reg");
+	if (IS_ERR(mac->regmap)) {
+		dev_err(&pdev->dev, "%s: Failed to get syscon regmap\n", __func__);
+		goto err_remove_config_dt;
+	}
+
+	if (!of_property_read_u32(pdev->dev.of_node, "tx-clk-delay-ps", &tx_delay_ps)) {
+		if (tx_delay_ps >= MACPHYC_TX_DELAY_PS_MIN &&
+			tx_delay_ps <= MACPHYC_TX_DELAY_PS_MAX) {
+			mac->tx_delay = tx_delay_ps * 1000;
+		} else {
+			dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps);
+			return -EINVAL;
+		}
+	}
+
+	if (!of_property_read_u32(pdev->dev.of_node, "rx-clk-delay-ps", &rx_delay_ps)) {
+		if (rx_delay_ps >= MACPHYC_RX_DELAY_PS_MIN &&
+			rx_delay_ps <= MACPHYC_RX_DELAY_PS_MAX) {
+			mac->rx_delay = rx_delay_ps * 1000;
+		} else {
+			dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps);
+			return -EINVAL;
+		}
+	}
+
+	mac->soc_info = data;
+	mac->dev = &pdev->dev;
+
+	plat_dat->bsp_priv = mac;
+
+	ret = ingenic_mac_init(plat_dat);
+	if (ret)
+		goto err_remove_config_dt;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_remove_config_dt;
+
+	return 0;
+
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int ingenic_mac_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct ingenic_mac *mac = priv->plat->bsp_priv;
+	int ret;
+
+	ret = stmmac_suspend(dev);
+
+	return ret;
+}
+
+static int ingenic_mac_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct ingenic_mac *mac = priv->plat->bsp_priv;
+	int ret;
+
+	ret = ingenic_mac_init(priv->plat);
+	if (ret)
+		return ret;
+
+	ret = stmmac_resume(dev);
+
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(ingenic_mac_pm_ops, ingenic_mac_suspend, ingenic_mac_resume);
+
+static struct ingenic_soc_info jz4775_soc_info = {
+	.version = ID_JZ4775,
+	.mask = MACPHYC_TXCLK_SEL_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK,
+
+	.set_mode = jz4775_mac_set_mode,
+};
+
+static struct ingenic_soc_info x1000_soc_info = {
+	.version = ID_X1000,
+	.mask = MACPHYC_SOFT_RST_MASK,
+
+	.set_mode = x1000_mac_set_mode,
+};
+
+static struct ingenic_soc_info x1600_soc_info = {
+	.version = ID_X1600,
+	.mask = MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK,
+
+	.set_mode = x1600_mac_set_mode,
+};
+
+static struct ingenic_soc_info x1830_soc_info = {
+	.version = ID_X1830,
+	.mask = MACPHYC_MODE_SEL_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK,
+
+	.set_mode = x1830_mac_set_mode,
+};
+
+static struct ingenic_soc_info x2000_soc_info = {
+	.version = ID_X2000,
+	.mask = MACPHYC_TX_SEL_MASK | MACPHYC_TX_DELAY_MASK | MACPHYC_RX_SEL_MASK |
+			MACPHYC_RX_DELAY_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK,
+
+	.set_mode = x2000_mac_set_mode,
+};
+
+static const struct of_device_id ingenic_mac_of_matches[] = {
+	{ .compatible = "ingenic,jz4775-mac", .data = &jz4775_soc_info },
+	{ .compatible = "ingenic,x1000-mac", .data = &x1000_soc_info },
+	{ .compatible = "ingenic,x1600-mac", .data = &x1600_soc_info },
+	{ .compatible = "ingenic,x1830-mac", .data = &x1830_soc_info },
+	{ .compatible = "ingenic,x2000-mac", .data = &x2000_soc_info },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ingenic_mac_of_matches);
+
+static struct platform_driver ingenic_mac_driver = {
+	.probe		= ingenic_mac_probe,
+	.remove		= stmmac_pltfr_remove,
+	.driver		= {
+		.name	= "ingenic-mac",
+		.pm		= pm_ptr(&ingenic_mac_pm_ops),
+		.of_match_table = ingenic_mac_of_matches,
+	},
+};
+module_platform_driver(ingenic_mac_driver);
+
+MODULE_AUTHOR("周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>");
+MODULE_DESCRIPTION("Ingenic SoCs DWMAC specific glue layer");
+MODULE_LICENSE("GPL v2");

From 565c6d8cff6a982e0e6b5f0dbf32b4342d0d3dc9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 14 Jun 2021 16:44:38 +0300
Subject: [PATCH 1708/3292] net: phy: nxp-c45-tja11xx: demote the "no PTP
 support" message to debug

The SJA1110 switch integrates these PHYs, and they do not have support
for timestamping. This message becomes quite overwhelming:

[   10.056596] NXP C45 TJA1103 spi1.0-base-t1:01: the phy does not support PTP
[   10.112625] NXP C45 TJA1103 spi1.0-base-t1:02: the phy does not support PTP
[   10.167461] NXP C45 TJA1103 spi1.0-base-t1:03: the phy does not support PTP
[   10.223510] NXP C45 TJA1103 spi1.0-base-t1:04: the phy does not support PTP
[   10.278239] NXP C45 TJA1103 spi1.0-base-t1:05: the phy does not support PTP
[   10.332663] NXP C45 TJA1103 spi1.0-base-t1:06: the phy does not support PTP
[   15.390828] NXP C45 TJA1103 spi1.2-base-t1:01: the phy does not support PTP
[   15.445224] NXP C45 TJA1103 spi1.2-base-t1:02: the phy does not support PTP
[   15.499673] NXP C45 TJA1103 spi1.2-base-t1:03: the phy does not support PTP
[   15.554074] NXP C45 TJA1103 spi1.2-base-t1:04: the phy does not support PTP
[   15.608516] NXP C45 TJA1103 spi1.2-base-t1:05: the phy does not support PTP
[   15.662996] NXP C45 TJA1103 spi1.2-base-t1:06: the phy does not support PTP

So reduce its log level to debug.

Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 512e4cb5d2c2..902fe1aa7782 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -1090,7 +1090,7 @@ static int nxp_c45_probe(struct phy_device *phydev)
 				   VEND1_PORT_ABILITIES);
 	ptp_ability = !!(ptp_ability & PTP_ABILITY);
 	if (!ptp_ability) {
-		phydev_info(phydev, "the phy does not support PTP");
+		phydev_dbg(phydev, "the phy does not support PTP");
 		goto no_ptp_support;
 	}
 

From 661fef5698bc44c9cc4844140ce055e69d57e1b7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 14 Jun 2021 16:44:39 +0300
Subject: [PATCH 1709/3292] net: phy: nxp-c45-tja11xx: express timestamp
 wraparound interval in terms of TS_SEC_MASK

nxp_c45_reconstruct_ts() takes a partial hardware timestamp in @hwts,
with 2 bits of the 'seconds' portion, and a full PTP time in @ts.

It patches in the lower bits of @hwts into @ts, and to ensure that the
reconstructed timestamp is correct, it checks whether the lower 2 bits
of @hwts are not in fact higher than the lower 2 bits of @ts. This is
not logically possible because, according to the calling convention, @ts
was collected later in time than @hwts, but due to two's complement
arithmetic it can actually happen, because the current PTP time might
have wrapped around between when @hwts was collected and when @ts was,
yielding the lower 2 bits of @ts smaller than those of @hwts.

To correct for that situation which is expected to happen under normal
conditions, the driver subtracts exactly one wraparound interval from
the reconstructed timestamp, since the upper bits of that need to
correspond to what the upper bits of @hwts were, not to what the upper
bits of @ts were.

Readers might be confused because the driver denotes the amount of bits
that the partial hardware timestamp has to offer as TS_SEC_MASK
(timestamp mask for seconds). But it subtracts a seemingly unrelated
BIT(2), which is in fact more subtle: if the hardware timestamp provides
2 bits of partial 'seconds' timestamp, then the wraparound interval is
2^2 == BIT(2).

But nonetheless, it is better to express the wraparound interval in
terms of a definition we already have, so replace BIT(2) with
1 + GENMASK(1, 0) which produces the same result but is clearer.

Suggested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 902fe1aa7782..afdcd6772b1d 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -325,7 +325,7 @@ static void nxp_c45_reconstruct_ts(struct timespec64 *ts,
 {
 	ts->tv_nsec = hwts->nsec;
 	if ((ts->tv_sec & TS_SEC_MASK) < (hwts->sec & TS_SEC_MASK))
-		ts->tv_sec -= BIT(2);
+		ts->tv_sec -= TS_SEC_MASK + 1;
 	ts->tv_sec &= ~TS_SEC_MASK;
 	ts->tv_sec |= hwts->sec & TS_SEC_MASK;
 }

From 109258ed6262e3fedfa241dd309161cb31018016 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 14 Jun 2021 16:44:40 +0300
Subject: [PATCH 1710/3292] net: phy: nxp-c45-tja11xx: fix potential RX
 timestamp wraparound

The reconstruction procedure for partial timestamps reads the current
PTP time and fills in the low 2 bits of the second portion, as well as
the nanoseconds portion, from the actual hardware packet timestamp.
Critically, the reconstruction procedure works because it assumes that
the current PTP time is strictly larger than the hardware timestamp was:
it detects a 2-bit wraparound of the 'seconds' portion by checking whether
the 'seconds' portion of the partial hardware timestamp is larger than
the 'seconds' portion of the current time. That can only happen if the
hardware timestamp was captured by the PHY during the last phase of a
'modulo 4 seconds' interval, and the current PTP time was read by the
driver during the initial phase of the next 'modulo 4 seconds' interval.

The partial RX timestamps are added to priv->rx_queue in
nxp_c45_rxtstamp() and they are processed potentially in parallel by the
aux worker thread in nxp_c45_do_aux_work(). This means that it is
possible for nxp_c45_do_aux_work() to process more than one RX timestamp
during the same schedule.

There is one premature optimization that will cause issues: for RX
timestamping, the driver reads the current time only once, and it uses
that to reconstruct all PTP RX timestamps in the queue. For the second
and later timestamps, this will be an issue if we are processing two RX
timestamps which are to the left and to the right, respectively, of a
4-bit wraparound of the 'seconds' portion of the PTP time, and the
current PTP time is also pre-wraparound.

 0.000000000        4.000000000        8.000000000        12.000000000
 |..................|..................|..................|............>
                 ^ ^ ^ ^                                            time
                 | | | |
                 | | | process hwts 1 and hwts 2
                 | | |
                 | | hwts 2
                 | |
                 | read current PTP time
                 |
                 hwts 1

What will happen in that case is that hwts 2 (post-wraparound) will use
a stale current PTP time that is pre-wraparound.
But nxp_c45_reconstruct_ts will not detect this condition, because it is
not coded up for it, so it will reconstruct hwts 2 with a current time
from the previous 4 second interval (i.e. 0.something instead of
4.something).

This is solvable by making sure that the full 64-bit current time is
always read after the PHY has taken the partial RX timestamp. We do this
by reading the current PTP time for every timestamp in the RX queue.

Fixes: 514def5dd339 ("phy: nxp-c45-tja11xx: add timestamping support")
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index afdcd6772b1d..7eac58b78c53 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -427,8 +427,8 @@ static long nxp_c45_do_aux_work(struct ptp_clock_info *ptp)
 		nxp_c45_process_txts(priv, &hwts);
 	}
 
-	nxp_c45_ptp_gettimex64(&priv->caps, &ts, NULL);
 	while ((skb = skb_dequeue(&priv->rx_queue)) != NULL) {
+		nxp_c45_ptp_gettimex64(&priv->caps, &ts, NULL);
 		ts_raw = __be32_to_cpu(NXP_C45_SKB_CB(skb)->header->reserved2);
 		hwts.sec = ts_raw >> 30;
 		hwts.nsec = ts_raw & GENMASK(29, 0);

From 0b5f0f29b118910c89fe249cdfbc11b400a86a18 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 14 Jun 2021 16:44:41 +0300
Subject: [PATCH 1711/3292] net: phy: nxp-c45-tja11xx: enable MDIO write access
 to the master/slave registers

The SJA1110 switch integrates TJA1103 PHYs, but in SJA1110 switch rev B
silicon, there is a bug in that the registers for selecting the 100base-T1
autoneg master/slave roles are not writable.

To enable write access to the master/slave registers, these additional
PHY writes are necessary during initialization.

The issue has been corrected in later SJA1110 silicon versions and is
not present in the standalone PHY variants, but applying the workaround
unconditionally in the driver should not do any harm.

Suggested-by: Radu Pirea (NXP OSS) <radu-nicolae.pirea@oss.nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 7eac58b78c53..91a327f67a42 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -1035,6 +1035,12 @@ static int nxp_c45_config_init(struct phy_device *phydev)
 		return ret;
 	}
 
+	/* Bug workaround for SJA1110 rev B: enable write access
+	 * to MDIO_MMD_PMAPMD
+	 */
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F8, 1);
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x01F9, 2);
+
 	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONFIG,
 			 PHY_CONFIG_AUTO);
 

From 3009e8aa85af080b3e03c803b86fe496b7713e69 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 14 Jun 2021 16:50:50 +0300
Subject: [PATCH 1712/3292] net: dsa: sja1105: constify the sja1105_regs
 structures

The struct sja1105_regs tables are not modified during the runtime of
the driver, so they can be made constant. In fact, struct sja1105_info
already holds a const pointer to these.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_spi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 7c493c6a839d..4aed16d23f21 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -404,7 +404,7 @@ out:
 	return rc;
 }
 
-static struct sja1105_regs sja1105et_regs = {
+static const struct sja1105_regs sja1105et_regs = {
 	.device_id = 0x0,
 	.prod_id = 0x100BC3,
 	.status = 0x1,
@@ -440,7 +440,7 @@ static struct sja1105_regs sja1105et_regs = {
 	.mdio_100base_t1 = SJA1105_RSV_ADDR,
 };
 
-static struct sja1105_regs sja1105pqrs_regs = {
+static const struct sja1105_regs sja1105pqrs_regs = {
 	.device_id = 0x0,
 	.prod_id = 0x100BC3,
 	.status = 0x1,
@@ -479,7 +479,7 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.mdio_100base_t1 = SJA1105_RSV_ADDR,
 };
 
-static struct sja1105_regs sja1110_regs = {
+static const struct sja1105_regs sja1110_regs = {
 	.device_id = SJA1110_SPI_ADDR(0x0),
 	.prod_id = SJA1110_ACU_ADDR(0xf00),
 	.status = SJA1110_SPI_ADDR(0x4),

From ec13357263fb672390250fcfaa4c86b6dce66062 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 14 Jun 2021 16:58:19 +0300
Subject: [PATCH 1713/3292] net: flow_dissector: fix RPS on DSA masters

After the blamed patch, __skb_flow_dissect() on the DSA master stopped
adjusting for the length of the DSA headers. This is because it was told
to adjust only if the needed_headroom is zero, aka if there is no DSA
header. Of course, the adjustment should be done only if there _is_ a
DSA header.

Modify the comment too so it is clearer.

Fixes: 4e50025129ef ("net: dsa: generalize overhead for taggers that use both headers and trailers")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c04455981c1e..2aadbfc5193b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -943,8 +943,8 @@ bool __skb_flow_dissect(const struct net *net,
 			int offset = 0;
 
 			ops = skb->dev->dsa_ptr->tag_ops;
-			/* Tail taggers don't break flow dissection */
-			if (!ops->needed_headroom) {
+			/* Only DSA header taggers break flow dissection */
+			if (ops->needed_headroom) {
 				if (ops->flow_dissect)
 					ops->flow_dissect(skb, &proto, &offset);
 				else

From 89212e160b81e778f829b89743570665810e3b13 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Mon, 14 Jun 2021 18:56:36 +0200
Subject: [PATCH 1714/3292] net: wwan: Fix WWAN config symbols

There is not strong reason to have both WWAN and WWAN_CORE symbols,
Let's build the WWAN core framework when WWAN is selected, in the
same way as for other subsystems.

This fixes issue with mhi_net selecting WWAN_CORE without WWAN and
reported by kernel test robot:

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for WWAN_CORE
   Depends on NETDEVICES && WWAN
   Selected by
   - MHI_NET && NETDEVICES && NET_CORE && MHI_BUS

Fixes: 9a44c1cc6388 ("net: Add a WWAN subsystem")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig       |  2 +-
 drivers/net/wwan/Kconfig  | 17 ++++++-----------
 drivers/net/wwan/Makefile |  2 +-
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 30d6e2f7686e..6977f8248df7 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -431,7 +431,7 @@ config VSOCKMON
 config MHI_NET
 	tristate "MHI network driver"
 	depends on MHI_BUS
-	select WWAN_CORE
+	select WWAN
 	help
 	  This is the network driver for MHI bus.  It can be used with
 	  QCOM based WWAN modems (like SDX55).  Say Y or M.
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index 13613a4f53d8..249b3f1ed62b 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -3,15 +3,9 @@
 # Wireless WAN device configuration
 #
 
-menuconfig WWAN
-	bool "Wireless WAN"
-	help
-	  This section contains Wireless WAN configuration for WWAN framework
-	  and drivers.
+menu "Wireless WAN"
 
-if WWAN
-
-config WWAN_CORE
+config WWAN
 	tristate "WWAN Driver Core"
 	help
 	  Say Y here if you want to use the WWAN driver core. This driver
@@ -20,9 +14,10 @@ config WWAN_CORE
 	  To compile this driver as a module, choose M here: the module will be
 	  called wwan.
 
+if WWAN
+
 config WWAN_HWSIM
 	tristate "Simulated WWAN device"
-	depends on WWAN_CORE
 	help
 	  This driver is a developer testing tool that can be used to test WWAN
 	  framework.
@@ -32,7 +27,6 @@ config WWAN_HWSIM
 
 config MHI_WWAN_CTRL
 	tristate "MHI WWAN control driver for QCOM-based PCIe modems"
-	select WWAN_CORE
 	depends on MHI_BUS
 	help
 	  MHI WWAN CTRL allows QCOM-based PCIe modems to expose different modem
@@ -46,7 +40,6 @@ config MHI_WWAN_CTRL
 
 config IOSM
 	tristate "IOSM Driver for Intel M.2 WWAN Device"
-	select WWAN_CORE
 	depends on INTEL_IOMMU
 	help
 	  This driver enables Intel M.2 WWAN Device communication.
@@ -57,3 +50,5 @@ config IOSM
 	  If unsure, say N.
 
 endif # WWAN
+
+endmenu
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
index 3e565d3f984f..83dd3482ffc3 100644
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the Linux WWAN device drivers.
 #
 
-obj-$(CONFIG_WWAN_CORE) += wwan.o
+obj-$(CONFIG_WWAN) += wwan.o
 wwan-objs += wwan_core.o
 
 obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o

From 8c22ad36eefa5e1c4af0d653d385041527d7b7b9 Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 14 Apr 2021 07:16:40 +0000
Subject: [PATCH 1715/3292] net/mlx5: Lag, refactor disable flow

When a net device is removed (can happen if the PCI function is unbound
from the system) it's not enough to destroy the hardware lag. The system
should recreate the original devices that were present before the lag.
As the same flow is done when a net device is removed from the bond
refactor and reuse the code.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 40 ++++++++++++-------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 1fb70524d067..6642ff0115f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -276,6 +276,29 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
 	}
 }
 
+static void mlx5_disable_lag(struct mlx5_lag *ldev)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+	bool roce_lag;
+	int err;
+
+	roce_lag = __mlx5_lag_is_roce(ldev);
+
+	if (roce_lag) {
+		dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+		mlx5_rescan_drivers_locked(dev0);
+		mlx5_nic_vport_disable_roce(dev1);
+	}
+
+	err = mlx5_deactivate_lag(ldev);
+	if (err)
+		return;
+
+	if (roce_lag)
+		mlx5_lag_add_devices(ldev);
+}
+
 static void mlx5_do_bond(struct mlx5_lag *ldev)
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
@@ -322,20 +345,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 	} else if (do_bond && __mlx5_lag_is_active(ldev)) {
 		mlx5_modify_lag(ldev, &tracker);
 	} else if (!do_bond && __mlx5_lag_is_active(ldev)) {
-		roce_lag = __mlx5_lag_is_roce(ldev);
-
-		if (roce_lag) {
-			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
-			mlx5_rescan_drivers_locked(dev0);
-			mlx5_nic_vport_disable_roce(dev1);
-		}
-
-		err = mlx5_deactivate_lag(ldev);
-		if (err)
-			return;
-
-		if (roce_lag)
-			mlx5_lag_add_devices(ldev);
+		mlx5_disable_lag(ldev);
 	}
 }
 
@@ -620,7 +630,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
 		return;
 
 	if (__mlx5_lag_is_active(ldev))
-		mlx5_deactivate_lag(ldev);
+		mlx5_disable_lag(ldev);
 
 	mlx5_lag_dev_remove_pf(ldev, dev);
 

From 8ed19471fdaad266225aa15f8e2626a7a3265504 Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 14 Apr 2021 07:28:19 +0000
Subject: [PATCH 1716/3292] net/mlx5: Lag, Don't rescan if the device is going
 down

If MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV is set it means the device is going
down and mlx5_rescan_drivers_locked() shouldn't be called.
With this patch and the previous one in the series, unbinding a PCI
function when its netdev is part of a bond works and leaves the system in a
working state.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 6642ff0115f8..4a4e9b228ba0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -258,6 +258,10 @@ static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
 		if (!ldev->pf[i].dev)
 			continue;
 
+		if (ldev->pf[i].dev->priv.flags &
+		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+			continue;
+
 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
 	}
@@ -286,8 +290,10 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
 	roce_lag = __mlx5_lag_is_roce(ldev);
 
 	if (roce_lag) {
-		dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
-		mlx5_rescan_drivers_locked(dev0);
+		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
+			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+			mlx5_rescan_drivers_locked(dev0);
+		}
 		mlx5_nic_vport_disable_roce(dev1);
 	}
 

From 8a66e45859797e5dd77ff17dd37781f99d5f5b9b Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 14 Apr 2021 08:18:09 +0000
Subject: [PATCH 1717/3292] net/mlx5: Change ownership model for lag

Lag is used to combine two PCI functions of the same HCA into a single
logical unit. This is a core functionality and as such should be managed by
the core driver. Currently this isn't the case. While we store the lag
software structure inside the lower device, its lifetime (creation /
destruction) is dictated by the mlx5e part. Change the ownership model so
lag is tied to the lifetime of the lower level driver instead to the
mlx5e part.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c |   4 +-
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 255 +++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/lag.h |   3 +-
 .../net/ethernet/mellanox/mlx5/core/lag_mp.c  |   2 +-
 .../net/ethernet/mellanox/mlx5/core/main.c    |   2 +
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |   6 +-
 7 files changed, 171 insertions(+), 105 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 59ee28156603..930b225dfe77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5114,7 +5114,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 	mlx5e_set_netdev_mtu_boundaries(priv);
 	mlx5e_set_dev_port_mtu(priv);
 
-	mlx5_lag_add(mdev, netdev);
+	mlx5_lag_add_netdev(mdev, netdev);
 
 	mlx5e_enable_async_events(priv);
 	mlx5e_enable_blocking_events(priv);
@@ -5162,7 +5162,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 		priv->en_trap = NULL;
 	}
 	mlx5e_disable_async_events(priv);
-	mlx5_lag_remove(mdev);
+	mlx5_lag_remove_netdev(mdev, priv->netdev);
 	mlx5_vxlan_reset_to_default(mdev->vxlan);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8290e0086178..2d2cc5f3b03f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -976,7 +976,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 	if (MLX5_CAP_GEN(mdev, uplink_follow))
 		mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
 					      0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
-	mlx5_lag_add(mdev, netdev);
+	mlx5_lag_add_netdev(mdev, netdev);
 	priv->events_nb.notifier_call = uplink_rep_async_event;
 	mlx5_notifier_register(mdev, &priv->events_nb);
 	mlx5e_dcbnl_initialize(priv);
@@ -1009,7 +1009,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 	mlx5e_dcbnl_delete_app(priv);
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
 	mlx5e_rep_tc_disable(priv);
-	mlx5_lag_remove(mdev);
+	mlx5_lag_remove_netdev(mdev, priv->netdev);
 }
 
 static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 4a4e9b228ba0..5c043c5cc403 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -93,6 +93,64 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
 
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+				 unsigned long event, void *ptr);
+static void mlx5_do_bond_work(struct work_struct *work);
+
+static void mlx5_ldev_free(struct kref *ref)
+{
+	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
+
+	if (ldev->nb.notifier_call)
+		unregister_netdevice_notifier_net(&init_net, &ldev->nb);
+	mlx5_lag_mp_cleanup(ldev);
+	cancel_delayed_work_sync(&ldev->bond_work);
+	destroy_workqueue(ldev->wq);
+	kfree(ldev);
+}
+
+static void mlx5_ldev_put(struct mlx5_lag *ldev)
+{
+	kref_put(&ldev->ref, mlx5_ldev_free);
+}
+
+static void mlx5_ldev_get(struct mlx5_lag *ldev)
+{
+	kref_get(&ldev->ref);
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
+	int err;
+
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev)
+		return NULL;
+
+	ldev->wq = create_singlethread_workqueue("mlx5_lag");
+	if (!ldev->wq) {
+		kfree(ldev);
+		return NULL;
+	}
+
+	kref_init(&ldev->ref);
+	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+
+	ldev->nb.notifier_call = mlx5_lag_netdev_event;
+	if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
+		ldev->nb.notifier_call = NULL;
+		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+	}
+
+	err = mlx5_lag_mp_init(ldev);
+	if (err)
+		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
+			      err);
+
+	return ldev;
+}
+
 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 				struct net_device *ndev)
 {
@@ -511,55 +569,52 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
-static struct mlx5_lag *mlx5_lag_dev_alloc(void)
-{
-	struct mlx5_lag *ldev;
-
-	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
-	if (!ldev)
-		return NULL;
-
-	ldev->wq = create_singlethread_workqueue("mlx5_lag");
-	if (!ldev->wq) {
-		kfree(ldev);
-		return NULL;
-	}
-
-	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
-
-	return ldev;
-}
-
-static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
-{
-	destroy_workqueue(ldev->wq);
-	kfree(ldev);
-}
-
-static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
-			       struct mlx5_core_dev *dev,
-			       struct net_device *netdev)
+static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
+				 struct mlx5_core_dev *dev,
+				 struct net_device *netdev)
 {
 	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 
 	if (fn >= MLX5_MAX_PORTS)
-		return -EPERM;
+		return;
 
 	spin_lock(&lag_lock);
-	ldev->pf[fn].dev    = dev;
 	ldev->pf[fn].netdev = netdev;
 	ldev->tracker.netdev_state[fn].link_up = 0;
 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
-
-	dev->priv.lag = ldev;
-
 	spin_unlock(&lag_lock);
-
-	return fn;
 }
 
-static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
-				   struct mlx5_core_dev *dev)
+static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
+				    struct net_device *netdev)
+{
+	int i;
+
+	spin_lock(&lag_lock);
+	for (i = 0; i < MLX5_MAX_PORTS; i++) {
+		if (ldev->pf[i].netdev == netdev) {
+			ldev->pf[i].netdev = NULL;
+			break;
+		}
+	}
+	spin_unlock(&lag_lock);
+}
+
+static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
+			       struct mlx5_core_dev *dev)
+{
+	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
+
+	if (fn >= MLX5_MAX_PORTS)
+		return;
+
+	ldev->pf[fn].dev = dev;
+	dev->priv.lag = ldev;
+}
+
+/* Must be called with intf_mutex held */
+static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
+				  struct mlx5_core_dev *dev)
 {
 	int i;
 
@@ -570,19 +625,15 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
 	if (i == MLX5_MAX_PORTS)
 		return;
 
-	spin_lock(&lag_lock);
-	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
-
+	ldev->pf[i].dev = NULL;
 	dev->priv.lag = NULL;
-	spin_unlock(&lag_lock);
 }
 
 /* Must be called with intf_mutex held */
-void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
+static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
 {
 	struct mlx5_lag *ldev = NULL;
 	struct mlx5_core_dev *tmp_dev;
-	int i, err;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
 	    !MLX5_CAP_GEN(dev, lag_master) ||
@@ -594,67 +645,77 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 		ldev = tmp_dev->priv.lag;
 
 	if (!ldev) {
-		ldev = mlx5_lag_dev_alloc();
+		ldev = mlx5_lag_dev_alloc(dev);
 		if (!ldev) {
 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
 			return;
 		}
+	} else {
+		mlx5_ldev_get(ldev);
 	}
 
-	if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
+	mlx5_ldev_add_mdev(ldev, dev);
+
+	return;
+}
+
+void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
 		return;
 
+	mlx5_dev_list_lock();
+	mlx5_ldev_remove_mdev(ldev, dev);
+	mlx5_dev_list_unlock();
+	mlx5_ldev_put(ldev);
+}
+
+void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
+{
+	mlx5_dev_list_lock();
+	__mlx5_lag_dev_add_mdev(dev);
+	mlx5_dev_list_unlock();
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
+			    struct net_device *netdev)
+{
+	struct mlx5_lag *ldev;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
+		return;
+
+	if (__mlx5_lag_is_active(ldev))
+		mlx5_disable_lag(ldev);
+
+	mlx5_ldev_remove_netdev(ldev, netdev);
+	ldev->flags &= ~MLX5_LAG_FLAG_READY;
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
+			 struct net_device *netdev)
+{
+	struct mlx5_lag *ldev;
+	int i;
+
+	ldev = mlx5_lag_dev(dev);
+	if (!ldev)
+		return;
+
+	mlx5_ldev_add_netdev(ldev, dev, netdev);
+
 	for (i = 0; i < MLX5_MAX_PORTS; i++)
 		if (!ldev->pf[i].dev)
 			break;
 
 	if (i >= MLX5_MAX_PORTS)
 		ldev->flags |= MLX5_LAG_FLAG_READY;
-
-	if (!ldev->nb.notifier_call) {
-		ldev->nb.notifier_call = mlx5_lag_netdev_event;
-		if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
-			ldev->nb.notifier_call = NULL;
-			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
-		}
-	}
-
-	err = mlx5_lag_mp_init(ldev);
-	if (err)
-		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
-			      err);
-}
-
-/* Must be called with intf_mutex held */
-void mlx5_lag_remove(struct mlx5_core_dev *dev)
-{
-	struct mlx5_lag *ldev;
-	int i;
-
-	ldev = mlx5_lag_dev_get(dev);
-	if (!ldev)
-		return;
-
-	if (__mlx5_lag_is_active(ldev))
-		mlx5_disable_lag(ldev);
-
-	mlx5_lag_dev_remove_pf(ldev, dev);
-
-	ldev->flags &= ~MLX5_LAG_FLAG_READY;
-
-	for (i = 0; i < MLX5_MAX_PORTS; i++)
-		if (ldev->pf[i].dev)
-			break;
-
-	if (i == MLX5_MAX_PORTS) {
-		if (ldev->nb.notifier_call) {
-			unregister_netdevice_notifier_net(&init_net, &ldev->nb);
-			ldev->nb.notifier_call = NULL;
-		}
-		mlx5_lag_mp_cleanup(ldev);
-		cancel_delayed_work_sync(&ldev->bond_work);
-		mlx5_lag_dev_free(ldev);
-	}
 }
 
 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -663,7 +724,7 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
 	bool res;
 
 	spin_lock(&lag_lock);
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 	res  = ldev && __mlx5_lag_is_roce(ldev);
 	spin_unlock(&lag_lock);
 
@@ -677,7 +738,7 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 	bool res;
 
 	spin_lock(&lag_lock);
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 	res  = ldev && __mlx5_lag_is_active(ldev);
 	spin_unlock(&lag_lock);
 
@@ -691,7 +752,7 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 	bool res;
 
 	spin_lock(&lag_lock);
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 	res  = ldev && __mlx5_lag_is_sriov(ldev);
 	spin_unlock(&lag_lock);
 
@@ -704,7 +765,7 @@ void mlx5_lag_update(struct mlx5_core_dev *dev)
 	struct mlx5_lag *ldev;
 
 	mlx5_dev_list_lock();
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 	if (!ldev)
 		goto unlock;
 
@@ -720,7 +781,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 	struct mlx5_lag *ldev;
 
 	spin_lock(&lag_lock);
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 
 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
 		goto unlock;
@@ -749,7 +810,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 	u8 port = 0;
 
 	spin_lock(&lag_lock);
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
 		goto unlock;
 
@@ -785,7 +846,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 	memset(values, 0, sizeof(*values) * num_counters);
 
 	spin_lock(&lag_lock);
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 	if (ldev && __mlx5_lag_is_active(ldev)) {
 		num_ports = MLX5_MAX_PORTS;
 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 8d8cf2d0bc6d..191392c37558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -40,6 +40,7 @@ struct lag_tracker {
 struct mlx5_lag {
 	u8                        flags;
 	u8                        v2p_map[MLX5_MAX_PORTS];
+	struct kref               ref;
 	struct lag_func           pf[MLX5_MAX_PORTS];
 	struct lag_tracker        tracker;
 	struct workqueue_struct   *wq;
@@ -49,7 +50,7 @@ struct mlx5_lag {
 };
 
 static inline struct mlx5_lag *
-mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+mlx5_lag_dev(struct mlx5_core_dev *dev)
 {
 	return dev->priv.lag;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index fd6196b5e163..c4bf8b679541 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -28,7 +28,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
 	struct mlx5_lag *ldev;
 	bool res;
 
-	ldev = mlx5_lag_dev_get(dev);
+	ldev = mlx5_lag_dev(dev);
 	res  = ldev && __mlx5_lag_is_multipath(ldev);
 
 	return res;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a1d67bd7fb43..310518fabf77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1185,6 +1185,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 	}
 
 	mlx5_sf_dev_table_create(dev);
+	mlx5_lag_add_mdev(dev);
 
 	return 0;
 
@@ -1219,6 +1220,7 @@ err_irq_table:
 
 static void mlx5_unload(struct mlx5_core_dev *dev)
 {
+	mlx5_lag_remove_mdev(dev);
 	mlx5_sf_dev_table_destroy(dev);
 	mlx5_sriov_detach(dev);
 	mlx5_ec_cleanup(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a22b706eebd3..dd95aa6eb2f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -164,8 +164,10 @@ int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group,
 int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
 			u8 feature_group, u8 access_reg_group);
 
-void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
-void mlx5_lag_remove(struct mlx5_core_dev *dev);
+void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
 
 int mlx5_irq_table_init(struct mlx5_core_dev *dev);
 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);

From c38421abcf21d477691277218106780233abc2d8 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 23 Feb 2021 09:32:21 +0200
Subject: [PATCH 1718/3292] net/mlx5: Delay IRQ destruction till all users are
 gone

Shared IRQ are consumed by multiple EQ users and in order to properly
initialize and later release such IRQs, we add kref counting of IRQ
structure.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 55 ++++++++++++-------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index c3373fb1cd7f..0e65ac3301c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -16,6 +16,8 @@ struct mlx5_irq {
 	struct atomic_notifier_head nh;
 	cpumask_var_t mask;
 	char name[MLX5_MAX_IRQ_NAME];
+	struct kref kref;
+	int irqn;
 };
 
 struct mlx5_irq_table {
@@ -146,13 +148,35 @@ out:
 	return ret;
 }
 
+static void irq_release(struct kref *kref)
+{
+	struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
+
+	free_irq(irq->irqn, &irq->nh);
+}
+
+static void irq_put(struct mlx5_irq *irq)
+{
+	kref_put(&irq->kref, irq_release);
+}
+
 int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
 		       struct notifier_block *nb)
 {
 	struct mlx5_irq *irq;
+	int err;
 
 	irq = &irq_table->irq[vecidx];
-	return atomic_notifier_chain_register(&irq->nh, nb);
+	err = kref_get_unless_zero(&irq->kref);
+	if (WARN_ON_ONCE(!err))
+		/* Something very bad happens here, we are enabling EQ
+		 * on non-existing IRQ.
+		 */
+		return -ENOENT;
+	err = atomic_notifier_chain_register(&irq->nh, nb);
+	if (err)
+		irq_put(irq);
+	return err;
 }
 
 int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
@@ -161,6 +185,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
 	struct mlx5_irq *irq;
 
 	irq = &irq_table->irq[vecidx];
+	irq_put(irq);
 	return atomic_notifier_chain_unregister(&irq->nh, nb);
 }
 
@@ -189,28 +214,26 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec)
 
 	for (i = 0; i < nvec; i++) {
 		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
-		int irqn = pci_irq_vector(dev->pdev, i);
 
+		irq->irqn = pci_irq_vector(dev->pdev, i);
 		irq_set_name(name, i);
 		ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
 		snprintf(irq->name, MLX5_MAX_IRQ_NAME,
 			 "%s@pci:%s", name, pci_name(dev->pdev));
-		err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+		err = request_irq(irq->irqn, mlx5_irq_int_handler, 0, irq->name,
 				  &irq->nh);
 		if (err) {
 			mlx5_core_err(dev, "Failed to request irq\n");
 			goto err_request_irq;
 		}
+		kref_init(&irq->kref);
 	}
 	return 0;
 
 err_request_irq:
-	while (i--) {
-		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
-		int irqn = pci_irq_vector(dev->pdev, i);
+	while (i--)
+		irq_put(mlx5_irq_get(dev, i));
 
-		free_irq(irqn, &irq->nh);
-	}
 	return  err;
 }
 
@@ -264,10 +287,8 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
 {
 	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
 	struct mlx5_irq *irq;
-	int irqn;
 
 	irq = mlx5_irq_get(mdev, vecidx);
-	irqn = pci_irq_vector(mdev->pdev, vecidx);
 	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
 		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
 		return -ENOMEM;
@@ -276,9 +297,9 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
 	cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
 			irq->mask);
 	if (IS_ENABLED(CONFIG_SMP) &&
-	    irq_set_affinity_hint(irqn, irq->mask))
+	    irq_set_affinity_hint(irq->irqn, irq->mask))
 		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
-			       irqn);
+			       irq->irqn);
 
 	return 0;
 }
@@ -287,11 +308,9 @@ static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
 {
 	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
 	struct mlx5_irq *irq;
-	int irqn;
 
 	irq = mlx5_irq_get(mdev, vecidx);
-	irqn = pci_irq_vector(mdev->pdev, vecidx);
-	irq_set_affinity_hint(irqn, NULL);
+	irq_set_affinity_hint(irq->irqn, NULL);
 	free_cpumask_var(irq->mask);
 }
 
@@ -344,8 +363,7 @@ static void unrequest_irqs(struct mlx5_core_dev *dev)
 	int i;
 
 	for (i = 0; i < table->nvec; i++)
-		free_irq(pci_irq_vector(dev->pdev, i),
-			 &mlx5_irq_get(dev, i)->nh);
+		irq_put(mlx5_irq_get(dev, i));
 }
 
 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
@@ -422,8 +440,7 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 	irq_clear_rmap(dev);
 	clear_comp_irqs_affinity_hints(dev);
 	for (i = 0; i < table->nvec; i++)
-		free_irq(pci_irq_vector(dev->pdev, i),
-			 &mlx5_irq_get(dev, i)->nh);
+		irq_release(&mlx5_irq_get(dev, i)->kref);
 	pci_free_irq_vectors(dev->pdev);
 	kfree(table->irq);
 }

From 3b43190b2f25e8e477c9bb32afd01e61161c60f7 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 6 Apr 2021 21:42:17 +0300
Subject: [PATCH 1719/3292] net/mlx5: Introduce API for request and release
 IRQs

Introduce new API that will allow IRQs users to hold a pointer to
mlx5_irq.
In the end of this series, IRQs will be allocated on demand. Hence,
this will allow us to properly manage and use IRQs.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 22 +++++++------
 .../net/ethernet/mellanox/mlx5/core/lib/eq.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/main.c    |  1 +
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   | 19 ------------
 .../ethernet/mellanox/mlx5/core/mlx5_irq.h    | 30 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 31 +++++++++++++------
 .../net/ethernet/mellanox/mlx5/core/sriov.c   |  1 +
 7 files changed, 68 insertions(+), 37 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 77c0ca655975..7e7bbed3763d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -45,6 +45,7 @@
 #include "eswitch.h"
 #include "lib/clock.h"
 #include "diag/fw_tracer.h"
+#include "mlx5_irq.h"
 
 enum {
 	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
@@ -309,13 +310,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
 	init_eq_buf(eq);
 
+	eq->irq = mlx5_irq_request(dev, vecidx);
+	if (IS_ERR(eq->irq)) {
+		err = PTR_ERR(eq->irq);
+		goto err_buf;
+	}
+
 	inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
 		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
 
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
-		goto err_buf;
+		goto err_irq;
 	}
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
@@ -359,6 +366,8 @@ err_eq:
 err_in:
 	kvfree(in);
 
+err_irq:
+	mlx5_irq_release(eq->irq);
 err_buf:
 	mlx5_frag_buf_free(dev, &eq->frag_buf);
 	return err;
@@ -377,10 +386,9 @@ err_buf:
 int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 		   struct notifier_block *nb)
 {
-	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 	int err;
 
-	err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+	err = mlx5_irq_attach_nb(eq->irq, nb);
 	if (!err)
 		eq_update_ci(eq, 1);
 
@@ -399,9 +407,7 @@ EXPORT_SYMBOL(mlx5_eq_enable);
 void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 		     struct notifier_block *nb)
 {
-	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-
-	mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+	mlx5_irq_detach_nb(eq->irq, nb);
 }
 EXPORT_SYMBOL(mlx5_eq_disable);
 
@@ -415,10 +421,9 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 	if (err)
 		mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
 			       eq->eqn);
-	synchronize_irq(eq->irqn);
+	mlx5_irq_release(eq->irq);
 
 	mlx5_frag_buf_free(dev, &eq->frag_buf);
-
 	return err;
 }
 
@@ -863,7 +868,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 	}
 
 	return 0;
-
 clean:
 	destroy_comp_eqs(dev);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index f607a3858ef5..f618cf95e030 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -32,6 +32,7 @@ struct mlx5_eq {
 	unsigned int            irqn;
 	u8                      eqn;
 	struct mlx5_rsc_debug   *dbg;
+	struct mlx5_irq         *irq;
 };
 
 struct mlx5_eq_async {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 310518fabf77..390b1d3a6fde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -76,6 +76,7 @@
 #include "sf/vhca_event.h"
 #include "sf/dev/dev.h"
 #include "sf/sf.h"
+#include "mlx5_irq.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index dd95aa6eb2f8..343807ac2036 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -169,25 +169,6 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev
 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
 
-int mlx5_irq_table_init(struct mlx5_core_dev *dev);
-void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
-int mlx5_irq_table_create(struct mlx5_core_dev *dev);
-void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
-int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
-		       struct notifier_block *nb);
-int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
-		       struct notifier_block *nb);
-
-int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
-			    int msix_vec_count);
-int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
-
-struct cpumask *
-mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
-struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
-int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
-struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
-
 int mlx5_events_init(struct mlx5_core_dev *dev);
 void mlx5_events_cleanup(struct mlx5_core_dev *dev);
 void mlx5_events_start(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
new file mode 100644
index 000000000000..dd138b38bf36
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_IRQ_H__
+#define __MLX5_IRQ_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_irq;
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
+
+int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
+			    int msix_vec_count);
+int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
+
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx);
+void mlx5_irq_release(struct mlx5_irq *irq);
+int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+
+#endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 0e65ac3301c5..ecace7ca4a01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
+#include "mlx5_irq.h"
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
@@ -160,13 +161,10 @@ static void irq_put(struct mlx5_irq *irq)
 	kref_put(&irq->kref, irq_release);
 }
 
-int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
-		       struct notifier_block *nb)
+int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 {
-	struct mlx5_irq *irq;
 	int err;
 
-	irq = &irq_table->irq[vecidx];
 	err = kref_get_unless_zero(&irq->kref);
 	if (WARN_ON_ONCE(!err))
 		/* Something very bad happens here, we are enabling EQ
@@ -179,16 +177,31 @@ int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
 	return err;
 }
 
-int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
-		       struct notifier_block *nb)
+int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 {
-	struct mlx5_irq *irq;
-
-	irq = &irq_table->irq[vecidx];
 	irq_put(irq);
 	return atomic_notifier_chain_unregister(&irq->nh, nb);
 }
 
+void mlx5_irq_release(struct mlx5_irq *irq)
+{
+	synchronize_irq(irq->irqn);
+	irq_put(irq);
+}
+
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx)
+{
+	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
+	struct mlx5_irq *irq = &table->irq[vecidx];
+	int err;
+
+	err = kref_get_unless_zero(&irq->kref);
+	if (!err)
+		return ERR_PTR(-ENOENT);
+
+	return irq;
+}
+
 static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
 {
 	atomic_notifier_call_chain(nh, 0, NULL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 2338989d4403..e8185b69ac6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -34,6 +34,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/vport.h>
 #include "mlx5_core.h"
+#include "mlx5_irq.h"
 #include "eswitch.h"
 
 static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)

From e4e3f24b822f9dc9ae2427a8d686e8c1d80d6bd2 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 23 Feb 2021 10:37:05 +0200
Subject: [PATCH 1720/3292] net/mlx5: Provide cpumask at EQ creation phase

The users of EQ are running their code on different CPUs and with
various affinity patterns. Move the cpumask setting close to their
actual usage.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/odp.c              |   5 +
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  |  27 +++--
 .../ethernet/mellanox/mlx5/core/mlx5_irq.h    |   3 +-
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 103 ++++--------------
 include/linux/mlx5/eq.h                       |   1 +
 5 files changed, 49 insertions(+), 90 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 782b2af8f211..8f88b044ccbc 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1564,7 +1564,12 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
 		.nent = MLX5_IB_NUM_PF_EQE,
 	};
 	param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
+	if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
+		err = -ENOMEM;
+		goto err_wq;
+	}
 	eq->core = mlx5_eq_create_generic(dev->mdev, &param);
+	free_cpumask_var(param.affinity);
 	if (IS_ERR(eq->core)) {
 		err = PTR_ERR(eq->core);
 		goto err_wq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 7e7bbed3763d..5a88887c1a58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -310,7 +310,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
 	init_eq_buf(eq);
 
-	eq->irq = mlx5_irq_request(dev, vecidx);
+	eq->irq = mlx5_irq_request(dev, vecidx, param->affinity);
 	if (IS_ERR(eq->irq)) {
 		err = PTR_ERR(eq->irq);
 		goto err_buf;
@@ -621,8 +621,11 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
 
 	eq->irq_nb.notifier_call = mlx5_eq_async_int;
 	spin_lock_init(&eq->lock);
+	if (!zalloc_cpumask_var(&param->affinity, GFP_KERNEL))
+		return -ENOMEM;
 
 	err = create_async_eq(dev, &eq->core, param);
+	free_cpumask_var(param->affinity);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
 		return err;
@@ -740,6 +743,9 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev,
 	struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
 	int err;
 
+	if (!param->affinity)
+		return ERR_PTR(-EINVAL);
+
 	if (!eq)
 		return ERR_PTR(-ENOMEM);
 
@@ -850,16 +856,21 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 			.irq_index = vecidx,
 			.nent = nent,
 		};
-		err = create_map_eq(dev, &eq->core, &param);
-		if (err) {
-			kfree(eq);
-			goto clean;
+
+		if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
+			err = -ENOMEM;
+			goto clean_eq;
 		}
+		cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node),
+				param.affinity);
+		err = create_map_eq(dev, &eq->core, &param);
+		free_cpumask_var(param.affinity);
+		if (err)
+			goto clean_eq;
 		err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
 		if (err) {
 			destroy_unmap_eq(dev, &eq->core);
-			kfree(eq);
-			goto clean;
+			goto clean_eq;
 		}
 
 		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
@@ -868,6 +879,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 	}
 
 	return 0;
+clean_eq:
+	kfree(eq);
 clean:
 	destroy_comp_eqs(dev);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index dd138b38bf36..81bfb5f0d332 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -20,7 +20,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
 			    int msix_vec_count);
 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
 
-struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx);
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
+				  struct cpumask *affinity);
 void mlx5_irq_release(struct mlx5_irq *irq);
 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index ecace7ca4a01..81b06b5693cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -17,6 +17,7 @@ struct mlx5_irq {
 	struct atomic_notifier_head nh;
 	cpumask_var_t mask;
 	char name[MLX5_MAX_IRQ_NAME];
+	spinlock_t lock; /* protects affinity assignment */
 	struct kref kref;
 	int irqn;
 };
@@ -153,6 +154,8 @@ static void irq_release(struct kref *kref)
 {
 	struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
 
+	irq_set_affinity_hint(irq->irqn, NULL);
+	free_cpumask_var(irq->mask);
 	free_irq(irq->irqn, &irq->nh);
 }
 
@@ -189,7 +192,8 @@ void mlx5_irq_release(struct mlx5_irq *irq)
 	irq_put(irq);
 }
 
-struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx)
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
+				  struct cpumask *affinity)
 {
 	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
 	struct mlx5_irq *irq = &table->irq[vecidx];
@@ -199,6 +203,16 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx)
 	if (!err)
 		return ERR_PTR(-ENOENT);
 
+	spin_lock(&irq->lock);
+	if (!cpumask_empty(irq->mask)) {
+		/* already configured */
+		spin_unlock(&irq->lock);
+		return irq;
+	}
+
+	cpumask_copy(irq->mask, affinity);
+	irq_set_affinity_hint(irq->irqn, irq->mask);
+	spin_unlock(&irq->lock);
 	return irq;
 }
 
@@ -239,6 +253,12 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec)
 			mlx5_core_err(dev, "Failed to request irq\n");
 			goto err_request_irq;
 		}
+		if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+			mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
+			err = -ENOMEM;
+			goto err_request_irq;
+		}
+		spin_lock_init(&irq->lock);
 		kref_init(&irq->kref);
 	}
 	return 0;
@@ -294,69 +314,6 @@ err_out:
 	return err;
 }
 
-/* Completion IRQ vectors */
-
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
-	struct mlx5_irq *irq;
-
-	irq = mlx5_irq_get(mdev, vecidx);
-	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
-		mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
-		return -ENOMEM;
-	}
-
-	cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
-			irq->mask);
-	if (IS_ENABLED(CONFIG_SMP) &&
-	    irq_set_affinity_hint(irq->irqn, irq->mask))
-		mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
-			       irq->irqn);
-
-	return 0;
-}
-
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-	int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
-	struct mlx5_irq *irq;
-
-	irq = mlx5_irq_get(mdev, vecidx);
-	irq_set_affinity_hint(irq->irqn, NULL);
-	free_cpumask_var(irq->mask);
-}
-
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
-	int err;
-	int i;
-
-	for (i = 0; i < nvec; i++) {
-		err = set_comp_irq_affinity_hint(mdev, i);
-		if (err)
-			goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	for (i--; i >= 0; i--)
-		clear_comp_irq_affinity_hint(mdev, i);
-
-	return err;
-}
-
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
-{
-	int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
-	int i;
-
-	for (i = 0; i < nvec; i++)
-		clear_comp_irq_affinity_hint(mdev, i);
-}
-
 struct cpumask *
 mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
 {
@@ -370,15 +327,6 @@ struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
 }
 #endif
 
-static void unrequest_irqs(struct mlx5_core_dev *dev)
-{
-	struct mlx5_irq_table *table = dev->priv.irq_table;
-	int i;
-
-	for (i = 0; i < table->nvec; i++)
-		irq_put(mlx5_irq_get(dev, i));
-}
-
 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 {
 	struct mlx5_priv *priv = &dev->priv;
@@ -419,16 +367,8 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 	if (err)
 		goto err_request_irqs;
 
-	err = set_comp_irq_affinity_hints(dev);
-	if (err) {
-		mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
-		goto err_set_affinity;
-	}
-
 	return 0;
 
-err_set_affinity:
-	unrequest_irqs(dev);
 err_request_irqs:
 	irq_clear_rmap(dev);
 err_set_rmap:
@@ -451,7 +391,6 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 	 * which should be called after alloc_irq but before request_irq.
 	 */
 	irq_clear_rmap(dev);
-	clear_comp_irqs_affinity_hints(dev);
 	for (i = 0; i < table->nvec; i++)
 		irq_release(&mlx5_irq_get(dev, i)->kref);
 	pci_free_irq_vectors(dev->pdev);
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index e49d8c0d4f26..cea6ecb4b73e 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -16,6 +16,7 @@ struct mlx5_eq_param {
 	u8             irq_index;
 	int            nent;
 	u64            mask[4];
+	cpumask_var_t  affinity;
 };
 
 struct mlx5_eq *

From 652e3581f2483a4965ea79a4dbce153fe0f39d1f Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Thu, 14 Jan 2021 15:19:40 +0200
Subject: [PATCH 1721/3292] net/mlx5: Clean license text in eq.[c|h] files

The eq.[c|h] files are under major rewrite. so use this opportunity and
update their copyright and license texts.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 31 ++-----------------
 .../net/ethernet/mellanox/mlx5/core/lib/eq.h  |  2 +-
 2 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 5a88887c1a58..ef0fe499eaed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2021, Mellanox Technologies inc.  All rights reserved.
  */
 
 #include <linux/interrupt.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index f618cf95e030..624cedebb510 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2018 Mellanox Technologies */
+/* Copyright (c) 2018-2021, Mellanox Technologies inc.  All rights reserved. */
 
 #ifndef __LIB_MLX5_EQ_H__
 #define __LIB_MLX5_EQ_H__

From 2de61538377c6d417c5c18e12309fe7bf098f2c9 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 23 Feb 2021 11:08:26 +0200
Subject: [PATCH 1722/3292] net/mlx5: Removing rmap per IRQ

In next patches, IRQs will be requested according to demand, instead of
statically on driver boot.
Also, currently, rmap is managed by the IRQ layer. rmap management will
move out from the IRQ layer in future patches.

Therefore, we want to remove the IRQ from the rmap, when IRQ is destroyed,
instead of removing all the IRQs from the rmap when irq_table is destroyed.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c   | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 81b06b5693cd..6a5a6ec0ddbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -154,8 +154,14 @@ static void irq_release(struct kref *kref)
 {
 	struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
 
+	/* free_irq requires that affinity and rmap will be cleared
+	 * before calling it. This is why there is asymmetry with set_rmap
+	 * which should be called after alloc_irq but before request_irq.
+	 */
 	irq_set_affinity_hint(irq->irqn, NULL);
 	free_cpumask_var(irq->mask);
+	/* this line is releasing this irq from the rmap */
+	irq_set_affinity_notifier(irq->irqn, NULL);
 	free_irq(irq->irqn, &irq->nh);
 }
 
@@ -378,6 +384,11 @@ err_free_irq:
 	return err;
 }
 
+static void irq_table_clear_rmap(struct mlx5_irq_table *table)
+{
+	cpu_rmap_put(table->rmap);
+}
+
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 {
 	struct mlx5_irq_table *table = dev->priv.irq_table;
@@ -386,11 +397,7 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 	if (mlx5_core_is_sf(dev))
 		return;
 
-	/* free_irq requires that affinity and rmap will be cleared
-	 * before calling it. This is why there is asymmetry with set_rmap
-	 * which should be called after alloc_irq but before request_irq.
-	 */
-	irq_clear_rmap(dev);
+	irq_table_clear_rmap(table);
 	for (i = 0; i < table->nvec; i++)
 		irq_release(&mlx5_irq_get(dev, i)->kref);
 	pci_free_irq_vectors(dev->pdev);

From e8abebb3a48e867179dc6c61c0579e2c6f6cac7b Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 23 Feb 2021 11:15:43 +0200
Subject: [PATCH 1723/3292] net/mlx5: Extend mlx5_irq_request to request IRQ
 from the kernel

Extend mlx5_irq_request so that IRQs will be requested upon EQ creation,
and not on driver boot.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 126 ++++++++----------
 1 file changed, 56 insertions(+), 70 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 6a5a6ec0ddbf..7d6ca2581532 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -17,7 +17,6 @@ struct mlx5_irq {
 	struct atomic_notifier_head nh;
 	cpumask_var_t mask;
 	char name[MLX5_MAX_IRQ_NAME];
-	spinlock_t lock; /* protects affinity assignment */
 	struct kref kref;
 	int irqn;
 };
@@ -60,7 +59,7 @@ int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
 
 static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
 {
-	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 
 	return &irq_table->irq[vecidx];
 }
@@ -192,37 +191,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 	return atomic_notifier_chain_unregister(&irq->nh, nb);
 }
 
-void mlx5_irq_release(struct mlx5_irq *irq)
-{
-	synchronize_irq(irq->irqn);
-	irq_put(irq);
-}
-
-struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
-				  struct cpumask *affinity)
-{
-	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
-	struct mlx5_irq *irq = &table->irq[vecidx];
-	int err;
-
-	err = kref_get_unless_zero(&irq->kref);
-	if (!err)
-		return ERR_PTR(-ENOENT);
-
-	spin_lock(&irq->lock);
-	if (!cpumask_empty(irq->mask)) {
-		/* already configured */
-		spin_unlock(&irq->lock);
-		return irq;
-	}
-
-	cpumask_copy(irq->mask, affinity);
-	irq_set_affinity_hint(irq->irqn, irq->mask);
-	spin_unlock(&irq->lock);
-	return irq;
-}
-
-static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+static irqreturn_t irq_int_handler(int irq, void *nh)
 {
 	atomic_notifier_call_chain(nh, 0, NULL);
 	return IRQ_HANDLED;
@@ -230,7 +199,7 @@ static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
 
 static void irq_set_name(char *name, int vecidx)
 {
-	if (vecidx == 0) {
+	if (!vecidx) {
 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
 		return;
 	}
@@ -239,41 +208,67 @@ static void irq_set_name(char *name, int vecidx)
 		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
 }
 
-static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+static int irq_request(struct mlx5_core_dev *dev, int i)
 {
+	struct mlx5_irq *irq = mlx5_irq_get(dev, i);
 	char name[MLX5_MAX_IRQ_NAME];
 	int err;
-	int i;
 
-	for (i = 0; i < nvec; i++) {
-		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
-
-		irq->irqn = pci_irq_vector(dev->pdev, i);
-		irq_set_name(name, i);
-		ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
-		snprintf(irq->name, MLX5_MAX_IRQ_NAME,
-			 "%s@pci:%s", name, pci_name(dev->pdev));
-		err = request_irq(irq->irqn, mlx5_irq_int_handler, 0, irq->name,
-				  &irq->nh);
-		if (err) {
-			mlx5_core_err(dev, "Failed to request irq\n");
-			goto err_request_irq;
-		}
-		if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
-			mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
-			err = -ENOMEM;
-			goto err_request_irq;
-		}
-		spin_lock_init(&irq->lock);
-		kref_init(&irq->kref);
+	irq->irqn = pci_irq_vector(dev->pdev, i);
+	irq_set_name(name, i);
+	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+	snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+		 "%s@pci:%s", name, pci_name(dev->pdev));
+	err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
+			  &irq->nh);
+	if (err) {
+		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
+		return err;
 	}
+	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+		mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
+		free_irq(irq->irqn, &irq->nh);
+		return -ENOMEM;
+	}
+	kref_init(&irq->kref);
 	return 0;
+}
 
-err_request_irq:
-	while (i--)
-		irq_put(mlx5_irq_get(dev, i));
+/**
+ * mlx5_irq_release - release an IRQ back to the system.
+ * @irq: irq to be released.
+ */
+void mlx5_irq_release(struct mlx5_irq *irq)
+{
+	synchronize_irq(irq->irqn);
+	irq_put(irq);
+}
 
-	return  err;
+/**
+ * mlx5_irq_request - request an IRQ for mlx5 device.
+ * @dev: mlx5 device that requesting the IRQ.
+ * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
+ * provided.
+ * @affinity: cpumask requested for this IRQ.
+ *
+ * This function returns a pointer to IRQ, or ERR_PTR in case of error.
+ */
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
+				  struct cpumask *affinity)
+{
+	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
+	struct mlx5_irq *irq = &table->irq[vecidx];
+	int ret;
+
+	ret = kref_get_unless_zero(&irq->kref);
+	if (ret)
+		return irq;
+	ret = irq_request(dev, vecidx);
+	if (ret)
+		return ERR_PTR(ret);
+	cpumask_copy(irq->mask, affinity);
+	irq_set_affinity_hint(irq->irqn, irq->mask);
+	return irq;
 }
 
 static void irq_clear_rmap(struct mlx5_core_dev *dev)
@@ -369,14 +364,8 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 	if (err)
 		goto err_set_rmap;
 
-	err = request_irqs(dev, nvec);
-	if (err)
-		goto err_request_irqs;
-
 	return 0;
 
-err_request_irqs:
-	irq_clear_rmap(dev);
 err_set_rmap:
 	pci_free_irq_vectors(dev->pdev);
 err_free_irq:
@@ -392,14 +381,11 @@ static void irq_table_clear_rmap(struct mlx5_irq_table *table)
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 {
 	struct mlx5_irq_table *table = dev->priv.irq_table;
-	int i;
 
 	if (mlx5_core_is_sf(dev))
 		return;
 
 	irq_table_clear_rmap(table);
-	for (i = 0; i < table->nvec; i++)
-		irq_release(&mlx5_irq_get(dev, i)->kref);
 	pci_free_irq_vectors(dev->pdev);
 	kfree(table->irq);
 }

From 2d74524c0106abe2025228111466f2f4b63d420a Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 23 Feb 2021 11:24:47 +0200
Subject: [PATCH 1724/3292] net/mlx5: Moving rmap logic to EQs

IRQs are being simplified in order to ease their sharing and any feature
specific object will be moved to upper layer.
Hence we move rmap object into eq_table.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 67 ++++++++++++++-
 .../ethernet/mellanox/mlx5/core/mlx5_irq.h    |  1 -
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 84 +++----------------
 3 files changed, 78 insertions(+), 74 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ef0fe499eaed..898ae3d47f20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -58,6 +58,9 @@ struct mlx5_eq_table {
 	struct mutex            lock; /* sync async eqs creations */
 	int			num_comp_eqs;
 	struct mlx5_irq_table	*irq_table;
+#ifdef CONFIG_RFS_ACCEL
+	struct cpu_rmap		*rmap;
+#endif
 };
 
 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
@@ -899,7 +902,7 @@ EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
 #ifdef CONFIG_RFS_ACCEL
 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
 {
-	return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
+	return dev->priv.eq_table->rmap;
 }
 #endif
 
@@ -916,12 +919,57 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
 	return ERR_PTR(-ENOENT);
 }
 
+static void clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+	free_irq_cpu_rmap(eq_table->rmap);
+#endif
+}
+
+static int set_rmap(struct mlx5_core_dev *mdev)
+{
+	int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+	struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+	int vecidx;
+
+	eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
+	if (!eq_table->rmap) {
+		err = -ENOMEM;
+		mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+		goto err_out;
+	}
+
+	vecidx = MLX5_IRQ_VEC_COMP_BASE;
+	for (; vecidx < eq_table->num_comp_eqs + MLX5_IRQ_VEC_COMP_BASE;
+	     vecidx++) {
+		err = irq_cpu_rmap_add(eq_table->rmap,
+				       pci_irq_vector(mdev->pdev, vecidx));
+		if (err) {
+			mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+				      err);
+			goto err_irq_cpu_rmap_add;
+		}
+	}
+	return 0;
+
+err_irq_cpu_rmap_add:
+	clear_rmap(mdev);
+err_out:
+#endif
+	return err;
+}
+
 /* This function should only be called after mlx5_cmd_force_teardown_hca */
 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
 
 	mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
+	if (!mlx5_core_is_sf(dev))
+		clear_rmap(dev);
 	mlx5_irq_table_destroy(dev);
 	mutex_unlock(&table->lock);
 }
@@ -951,6 +999,18 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 		goto err_async_eqs;
 	}
 
+	if (!mlx5_core_is_sf(dev)) {
+		/* rmap is a mapping between irq number and queue number.
+		 * each irq can be assign only to a single rmap.
+		 * since SFs share IRQs, rmap mapping cannot function correctly
+		 * for irqs that are shared for different core/netdev RX rings.
+		 * Hence we don't allow netdev rmap for SFs
+		 */
+		err = set_rmap(dev);
+		if (err)
+			goto err_rmap;
+	}
+
 	err = create_comp_eqs(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to create completion EQs\n");
@@ -959,6 +1019,9 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 
 	return 0;
 err_comp_eqs:
+	if (!mlx5_core_is_sf(dev))
+		clear_rmap(dev);
+err_rmap:
 	destroy_async_eqs(dev);
 err_async_eqs:
 	return err;
@@ -966,6 +1029,8 @@ err_async_eqs:
 
 void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
 {
+	if (!mlx5_core_is_sf(dev))
+		clear_rmap(dev);
 	destroy_comp_eqs(dev);
 	destroy_async_eqs(dev);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 81bfb5f0d332..d4be79884cb4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -12,7 +12,6 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
 int mlx5_irq_table_create(struct mlx5_core_dev *dev);
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
-struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
 int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 7d6ca2581532..149d6db9ee0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -24,9 +24,6 @@ struct mlx5_irq {
 struct mlx5_irq_table {
 	struct mlx5_irq *irq;
 	int nvec;
-#ifdef CONFIG_RFS_ACCEL
-	struct cpu_rmap *rmap;
-#endif
 };
 
 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
@@ -159,8 +156,6 @@ static void irq_release(struct kref *kref)
 	 */
 	irq_set_affinity_hint(irq->irqn, NULL);
 	free_cpumask_var(irq->mask);
-	/* this line is releasing this irq from the rmap */
-	irq_set_affinity_notifier(irq->irqn, NULL);
 	free_irq(irq->irqn, &irq->nh);
 }
 
@@ -210,10 +205,11 @@ static void irq_set_name(char *name, int vecidx)
 
 static int irq_request(struct mlx5_core_dev *dev, int i)
 {
-	struct mlx5_irq *irq = mlx5_irq_get(dev, i);
 	char name[MLX5_MAX_IRQ_NAME];
+	struct mlx5_irq *irq;
 	int err;
 
+	irq = mlx5_irq_get(dev, i);
 	irq->irqn = pci_irq_vector(dev->pdev, i);
 	irq_set_name(name, i);
 	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
@@ -223,15 +219,22 @@ static int irq_request(struct mlx5_core_dev *dev, int i)
 			  &irq->nh);
 	if (err) {
 		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
-		return err;
+		goto err_req_irq;
 	}
 	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
 		mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
-		free_irq(irq->irqn, &irq->nh);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto err_cpumask;
 	}
 	kref_init(&irq->kref);
 	return 0;
+
+err_cpumask:
+	free_irq(irq->irqn, &irq->nh);
+err_req_irq:
+	if (i != 0)
+		irq_set_affinity_notifier(irq->irqn, NULL);
+	return err;
 }
 
 /**
@@ -271,63 +274,12 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
 	return irq;
 }
 
-static void irq_clear_rmap(struct mlx5_core_dev *dev)
-{
-#ifdef CONFIG_RFS_ACCEL
-	struct mlx5_irq_table *irq_table = dev->priv.irq_table;
-
-	free_irq_cpu_rmap(irq_table->rmap);
-#endif
-}
-
-static int irq_set_rmap(struct mlx5_core_dev *mdev)
-{
-	int err = 0;
-#ifdef CONFIG_RFS_ACCEL
-	struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
-	int num_affinity_vec;
-	int vecidx;
-
-	num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
-	irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
-	if (!irq_table->rmap) {
-		err = -ENOMEM;
-		mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
-		goto err_out;
-	}
-
-	vecidx = MLX5_IRQ_VEC_COMP_BASE;
-	for (; vecidx < irq_table->nvec; vecidx++) {
-		err = irq_cpu_rmap_add(irq_table->rmap,
-				       pci_irq_vector(mdev->pdev, vecidx));
-		if (err) {
-			mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
-				      err);
-			goto err_irq_cpu_rmap_add;
-		}
-	}
-	return 0;
-
-err_irq_cpu_rmap_add:
-	irq_clear_rmap(mdev);
-err_out:
-#endif
-	return err;
-}
-
 struct cpumask *
 mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
 {
 	return irq_table->irq[vecidx].mask;
 }
 
-#ifdef CONFIG_RFS_ACCEL
-struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
-{
-	return irq_table->rmap;
-}
-#endif
-
 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 {
 	struct mlx5_priv *priv = &dev->priv;
@@ -360,24 +312,13 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 
 	table->nvec = nvec;
 
-	err = irq_set_rmap(dev);
-	if (err)
-		goto err_set_rmap;
-
 	return 0;
 
-err_set_rmap:
-	pci_free_irq_vectors(dev->pdev);
 err_free_irq:
 	kfree(table->irq);
 	return err;
 }
 
-static void irq_table_clear_rmap(struct mlx5_irq_table *table)
-{
-	cpu_rmap_put(table->rmap);
-}
-
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 {
 	struct mlx5_irq_table *table = dev->priv.irq_table;
@@ -385,7 +326,6 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 	if (mlx5_core_is_sf(dev))
 		return;
 
-	irq_table_clear_rmap(table);
 	pci_free_irq_vectors(dev->pdev);
 	kfree(table->irq);
 }

From fc63dd2a85be1f37fb822594101e9219b7be7460 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 23 Feb 2021 11:38:52 +0200
Subject: [PATCH 1725/3292] net/mlx5: Change IRQ storage logic from static to
 dynamic

Store newly created IRQs in the xarray DB instead of a static array,
so we will be able to store only IRQs which are being used.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 12 ++-
 .../ethernet/mellanox/mlx5/core/mlx5_irq.h    |  3 +-
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 79 +++++++++++--------
 3 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 898ae3d47f20..96649dbcef39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -892,10 +892,16 @@ EXPORT_SYMBOL(mlx5_comp_vectors_count);
 struct cpumask *
 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
 {
-	int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+	struct mlx5_eq_table *table = dev->priv.eq_table;
+	struct mlx5_eq_comp *eq, *n;
+	int i = 0;
 
-	return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
-					  vecidx);
+	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+		if (i++ == vector)
+			break;
+	}
+
+	return mlx5_irq_get_affinity_mask(eq->core.irq);
 }
 EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index d4be79884cb4..63b33cd37f7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -24,7 +24,6 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
 void mlx5_irq_release(struct mlx5_irq *irq);
 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
-struct cpumask *
-mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
 
 #endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 149d6db9ee0e..a6acc78bd1a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -14,15 +14,17 @@
 #define MLX5_MAX_IRQ_NAME (32)
 
 struct mlx5_irq {
+	u32 index;
 	struct atomic_notifier_head nh;
 	cpumask_var_t mask;
 	char name[MLX5_MAX_IRQ_NAME];
 	struct kref kref;
 	int irqn;
+	struct mlx5_irq_table *table;
 };
 
 struct mlx5_irq_table {
-	struct mlx5_irq *irq;
+	struct xarray irqs;
 	int nvec;
 };
 
@@ -54,13 +56,6 @@ int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
 	return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
 }
 
-static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
-{
-	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
-
-	return &irq_table->irq[vecidx];
-}
-
 /**
  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
  *                                   to be ssigned to each VF.
@@ -149,7 +144,9 @@ out:
 static void irq_release(struct kref *kref)
 {
 	struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
+	struct mlx5_irq_table *table =  irq->table;
 
+	xa_erase(&table->irqs, irq->index);
 	/* free_irq requires that affinity and rmap will be cleared
 	 * before calling it. This is why there is asymmetry with set_rmap
 	 * which should be called after alloc_irq but before request_irq.
@@ -157,6 +154,7 @@ static void irq_release(struct kref *kref)
 	irq_set_affinity_hint(irq->irqn, NULL);
 	free_cpumask_var(irq->mask);
 	free_irq(irq->irqn, &irq->nh);
+	kfree(irq);
 }
 
 static void irq_put(struct mlx5_irq *irq)
@@ -203,13 +201,17 @@ static void irq_set_name(char *name, int vecidx)
 		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
 }
 
-static int irq_request(struct mlx5_core_dev *dev, int i)
+static struct mlx5_irq *irq_request(struct mlx5_core_dev *dev, int i)
 {
+	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
 	char name[MLX5_MAX_IRQ_NAME];
+	struct xa_limit xa_num_irqs;
 	struct mlx5_irq *irq;
 	int err;
 
-	irq = mlx5_irq_get(dev, i);
+	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+	if (!irq)
+		return ERR_PTR(-ENOMEM);
 	irq->irqn = pci_irq_vector(dev->pdev, i);
 	irq_set_name(name, i);
 	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
@@ -226,15 +228,25 @@ static int irq_request(struct mlx5_core_dev *dev, int i)
 		err = -ENOMEM;
 		goto err_cpumask;
 	}
+	xa_num_irqs.min = 0;
+	xa_num_irqs.max = table->nvec;
+	err = xa_alloc(&table->irqs, &irq->index, irq, xa_num_irqs,
+		       GFP_KERNEL);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
+			      irq->index, err);
+		goto err_xa;
+	}
+	irq->table = table;
 	kref_init(&irq->kref);
-	return 0;
-
+	return irq;
+err_xa:
+	free_cpumask_var(irq->mask);
 err_cpumask:
 	free_irq(irq->irqn, &irq->nh);
 err_req_irq:
-	if (i != 0)
-		irq_set_affinity_notifier(irq->irqn, NULL);
-	return err;
+	kfree(irq);
+	return ERR_PTR(err);
 }
 
 /**
@@ -259,25 +271,25 @@ void mlx5_irq_release(struct mlx5_irq *irq)
 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
 				  struct cpumask *affinity)
 {
-	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
-	struct mlx5_irq *irq = &table->irq[vecidx];
-	int ret;
+	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+	struct mlx5_irq *irq;
 
-	ret = kref_get_unless_zero(&irq->kref);
-	if (ret)
+	irq = xa_load(&irq_table->irqs, vecidx);
+	if (irq) {
+		kref_get(&irq->kref);
+		return irq;
+	}
+	irq = irq_request(dev, vecidx);
+	if (IS_ERR(irq))
 		return irq;
-	ret = irq_request(dev, vecidx);
-	if (ret)
-		return ERR_PTR(ret);
 	cpumask_copy(irq->mask, affinity);
 	irq_set_affinity_hint(irq->irqn, irq->mask);
 	return irq;
 }
 
-struct cpumask *
-mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
 {
-	return irq_table->irq[vecidx].mask;
+	return irq->mask;
 }
 
 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
@@ -299,9 +311,7 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 	if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
 		return -ENOMEM;
 
-	table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
-	if (!table->irq)
-		return -ENOMEM;
+	xa_init_flags(&table->irqs, XA_FLAGS_ALLOC);
 
 	nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
 				     nvec, PCI_IRQ_MSIX);
@@ -315,19 +325,26 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 	return 0;
 
 err_free_irq:
-	kfree(table->irq);
+	xa_destroy(&table->irqs);
 	return err;
 }
 
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 {
 	struct mlx5_irq_table *table = dev->priv.irq_table;
+	struct mlx5_irq *irq;
+	unsigned long index;
 
 	if (mlx5_core_is_sf(dev))
 		return;
 
+	/* There are cases where IRQs still will be in used when we reaching
+	 * to here. Hence, making sure all the irqs are realeased.
+	 */
+	xa_for_each(&table->irqs, index, irq)
+		irq_release(&irq->kref);
 	pci_free_irq_vectors(dev->pdev);
-	kfree(table->irq);
+	xa_destroy(&table->irqs);
 }
 
 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)

From 71e084e26414b0f27d8befa1c30b74d39d9cb2a1 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 23 Feb 2021 11:48:17 +0200
Subject: [PATCH 1726/3292] net/mlx5: Allocating a pool of MSI-X vectors for
 SFs

SFs (Sub Functions) currently use IRQs from the global IRQ table their
parent Physical Function have. In order to better scale, we need to
allocate more IRQs and share them between different SFs.

Driver will maintain 3 separated irq pools:
1. A pool that serve the PF consumer (PF's netdev, rdma stacks), similar
to what the driver had before this patch. i.e, this pool will share irqs
between rdma and netev, and will keep the irq indexes and allocation
order. The last is important for PF netdev rmap (aRFS).

2. A pool of control IRQs for SFs. The size of this pool is the number
of SFs that can be created divided by SFS_PER_IRQ. This pool will serve
the control path EQs of the SFs.

3. A pool of completion data path IRQs for SFs transport queues. The
size of this pool is:
num_irqs_allocated - pf_pool_size - sf_ctrl_pool_size.
This pool will served netdev and rdma stacks. Moreover, rmap is not
supported on SFs.

Sharing methodology of the SFs pools is explained in the next patch.

Important note: rmap is not supported on SFs because rmap mapping cannot
function correctly for IRQs that are shared for different core/netdev RX
rings.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  |  12 +-
 .../ethernet/mellanox/mlx5/core/mlx5_irq.h    |   6 +-
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 354 ++++++++++++------
 3 files changed, 240 insertions(+), 132 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 96649dbcef39..b8ac9f58d2b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -471,14 +471,7 @@ static int create_async_eq(struct mlx5_core_dev *dev,
 	int err;
 
 	mutex_lock(&eq_table->lock);
-	/* Async EQs must share irq index 0 */
-	if (param->irq_index != 0) {
-		err = -EINVAL;
-		goto unlock;
-	}
-
 	err = create_map_eq(dev, eq, param);
-unlock:
 	mutex_unlock(&eq_table->lock);
 	return err;
 }
@@ -996,8 +989,11 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 
 	eq_table->num_comp_eqs =
 		min_t(int,
-		      mlx5_irq_get_num_comp(eq_table->irq_table),
+		      mlx5_irq_table_get_num_comp(eq_table->irq_table),
 		      num_eqs - MLX5_MAX_ASYNC_EQS);
+	if (mlx5_core_is_sf(dev))
+		eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
+					       MLX5_COMP_EQS_PER_SF);
 
 	err = create_async_eqs(dev);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 63b33cd37f7c..48656e8624a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -6,13 +6,17 @@
 
 #include <linux/mlx5/driver.h>
 
+#define MLX5_COMP_EQS_PER_SF 8
+
+#define MLX5_IRQ_EQ_CTRL (0)
+
 struct mlx5_irq;
 
 int mlx5_irq_table_init(struct mlx5_core_dev *dev);
 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
 int mlx5_irq_table_create(struct mlx5_core_dev *dev);
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
-int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
 
 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index a6acc78bd1a3..4f18fbcf7ccd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -7,11 +7,19 @@
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
 #include "mlx5_irq.h"
+#include "sf/sf.h"
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
 
 #define MLX5_MAX_IRQ_NAME (32)
+/* max irq_index is 255. three chars */
+#define MLX5_MAX_IRQ_IDX_CHARS (3)
+
+#define MLX5_SFS_PER_CTRL_IRQ 64
+#define MLX5_IRQ_CTRL_SF_MAX 8
+/* min num of vectores for SFs to be enabled */
+#define MLX5_IRQ_VEC_COMP_BASE_SF 2
 
 struct mlx5_irq {
 	u32 index;
@@ -20,42 +28,22 @@ struct mlx5_irq {
 	char name[MLX5_MAX_IRQ_NAME];
 	struct kref kref;
 	int irqn;
-	struct mlx5_irq_table *table;
+	struct mlx5_irq_pool *pool;
+};
+
+struct mlx5_irq_pool {
+	char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
+	struct xa_limit xa_num_irqs;
+	struct xarray irqs;
+	struct mlx5_core_dev *dev;
 };
 
 struct mlx5_irq_table {
-	struct xarray irqs;
-	int nvec;
+	struct mlx5_irq_pool *pf_pool;
+	struct mlx5_irq_pool *sf_ctrl_pool;
+	struct mlx5_irq_pool *sf_comp_pool;
 };
 
-int mlx5_irq_table_init(struct mlx5_core_dev *dev)
-{
-	struct mlx5_irq_table *irq_table;
-
-	if (mlx5_core_is_sf(dev))
-		return 0;
-
-	irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
-	if (!irq_table)
-		return -ENOMEM;
-
-	dev->priv.irq_table = irq_table;
-	return 0;
-}
-
-void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
-{
-	if (mlx5_core_is_sf(dev))
-		return;
-
-	kvfree(dev->priv.irq_table);
-}
-
-int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
-{
-	return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
-}
-
 /**
  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
  *                                   to be ssigned to each VF.
@@ -144,9 +132,9 @@ out:
 static void irq_release(struct kref *kref)
 {
 	struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
-	struct mlx5_irq_table *table =  irq->table;
+	struct mlx5_irq_pool *pool = irq->pool;
 
-	xa_erase(&table->irqs, irq->index);
+	xa_erase(&pool->irqs, irq->index);
 	/* free_irq requires that affinity and rmap will be cleared
 	 * before calling it. This is why there is asymmetry with set_rmap
 	 * which should be called after alloc_irq but before request_irq.
@@ -162,6 +150,76 @@ static void irq_put(struct mlx5_irq *irq)
 	kref_put(&irq->kref, irq_release);
 }
 
+static irqreturn_t irq_int_handler(int irq, void *nh)
+{
+	atomic_notifier_call_chain(nh, 0, NULL);
+	return IRQ_HANDLED;
+}
+
+static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
+{
+	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+	if (vecidx == 0) {
+		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
+		return;
+	}
+
+	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
+}
+
+static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
+{
+	struct mlx5_core_dev *dev = pool->dev;
+	char name[MLX5_MAX_IRQ_NAME];
+	struct mlx5_irq *irq;
+	int err;
+
+	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+	if (!irq)
+		return ERR_PTR(-ENOMEM);
+	irq->irqn = pci_irq_vector(dev->pdev, i);
+	if (!pool->name[0])
+		irq_set_name(name, i);
+	else
+		irq_sf_set_name(pool, name, i);
+	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+	snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+		 "%s@pci:%s", name, pci_name(dev->pdev));
+	err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
+			  &irq->nh);
+	if (err) {
+		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
+		goto err_req_irq;
+	}
+	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+		mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
+		err = -ENOMEM;
+		goto err_cpumask;
+	}
+	err = xa_alloc(&pool->irqs, &irq->index, irq, pool->xa_num_irqs,
+		       GFP_KERNEL);
+	if (err) {
+		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
+			      irq->index, err);
+		goto err_xa;
+	}
+	irq->pool = pool;
+	kref_init(&irq->kref);
+	return irq;
+err_xa:
+	free_cpumask_var(irq->mask);
+err_cpumask:
+	free_irq(irq->irqn, &irq->nh);
+err_req_irq:
+	kfree(irq);
+	return ERR_PTR(err);
+}
+
 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 {
 	int err;
@@ -184,69 +242,9 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 	return atomic_notifier_chain_unregister(&irq->nh, nb);
 }
 
-static irqreturn_t irq_int_handler(int irq, void *nh)
+struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
 {
-	atomic_notifier_call_chain(nh, 0, NULL);
-	return IRQ_HANDLED;
-}
-
-static void irq_set_name(char *name, int vecidx)
-{
-	if (!vecidx) {
-		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
-		return;
-	}
-
-	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
-		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
-}
-
-static struct mlx5_irq *irq_request(struct mlx5_core_dev *dev, int i)
-{
-	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
-	char name[MLX5_MAX_IRQ_NAME];
-	struct xa_limit xa_num_irqs;
-	struct mlx5_irq *irq;
-	int err;
-
-	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
-	if (!irq)
-		return ERR_PTR(-ENOMEM);
-	irq->irqn = pci_irq_vector(dev->pdev, i);
-	irq_set_name(name, i);
-	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
-	snprintf(irq->name, MLX5_MAX_IRQ_NAME,
-		 "%s@pci:%s", name, pci_name(dev->pdev));
-	err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
-			  &irq->nh);
-	if (err) {
-		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
-		goto err_req_irq;
-	}
-	if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
-		mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
-		err = -ENOMEM;
-		goto err_cpumask;
-	}
-	xa_num_irqs.min = 0;
-	xa_num_irqs.max = table->nvec;
-	err = xa_alloc(&table->irqs, &irq->index, irq, xa_num_irqs,
-		       GFP_KERNEL);
-	if (err) {
-		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
-			      irq->index, err);
-		goto err_xa;
-	}
-	irq->table = table;
-	kref_init(&irq->kref);
-	return irq;
-err_xa:
-	free_cpumask_var(irq->mask);
-err_cpumask:
-	free_irq(irq->irqn, &irq->nh);
-err_req_irq:
-	kfree(irq);
-	return ERR_PTR(err);
+	return irq->mask;
 }
 
 /**
@@ -272,14 +270,17 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
 				  struct cpumask *affinity)
 {
 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
+	struct mlx5_irq_pool *pool;
 	struct mlx5_irq *irq;
 
-	irq = xa_load(&irq_table->irqs, vecidx);
+	pool = irq_table->pf_pool;
+
+	irq = xa_load(&pool->irqs, vecidx);
 	if (irq) {
 		kref_get(&irq->kref);
 		return irq;
 	}
-	irq = irq_request(dev, vecidx);
+	irq = irq_request(pool, vecidx);
 	if (IS_ERR(irq))
 		return irq;
 	cpumask_copy(irq->mask, affinity);
@@ -287,53 +288,162 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
 	return irq;
 }
 
-struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
+/* irq_pool API */
+
+static struct mlx5_irq_pool *
+irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name)
 {
-	return irq->mask;
+	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
+
+	if (!pool)
+		return ERR_PTR(-ENOMEM);
+	pool->dev = dev;
+	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
+	pool->xa_num_irqs.min = start;
+	pool->xa_num_irqs.max = start + size - 1;
+	if (name)
+		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
+			 name);
+	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
+		      name, size, start);
+	return pool;
+}
+
+static void irq_pool_free(struct mlx5_irq_pool *pool)
+{
+	struct mlx5_irq *irq;
+	unsigned long index;
+
+	xa_for_each(&pool->irqs, index, irq)
+		irq_release(&irq->kref);
+	xa_destroy(&pool->irqs);
+	kvfree(pool);
+}
+
+static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
+{
+	struct mlx5_irq_table *table = dev->priv.irq_table;
+	int num_sf_ctrl_by_msix;
+	int num_sf_ctrl_by_sfs;
+	int num_sf_ctrl;
+	int err;
+
+	/* init pf_pool */
+	table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL);
+	if (IS_ERR(table->pf_pool))
+		return PTR_ERR(table->pf_pool);
+	if (!mlx5_sf_max_functions(dev))
+		return 0;
+	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
+		mlx5_core_err(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
+		return 0;
+	}
+
+	/* init sf_ctrl_pool */
+	num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
+	num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
+					  MLX5_SFS_PER_CTRL_IRQ);
+	num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
+	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
+	table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
+					     "mlx5_sf_ctrl");
+	if (IS_ERR(table->sf_ctrl_pool)) {
+		err = PTR_ERR(table->sf_ctrl_pool);
+		goto err_pf;
+	}
+	/* init sf_comp_pool */
+	table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
+					     sf_vec - num_sf_ctrl, "mlx5_sf_comp");
+	if (IS_ERR(table->sf_comp_pool)) {
+		err = PTR_ERR(table->sf_comp_pool);
+		goto err_sf_ctrl;
+	}
+	return 0;
+err_sf_ctrl:
+	irq_pool_free(table->sf_ctrl_pool);
+err_pf:
+	irq_pool_free(table->pf_pool);
+	return err;
+}
+
+static void irq_pools_destroy(struct mlx5_irq_table *table)
+{
+	if (table->sf_ctrl_pool) {
+		irq_pool_free(table->sf_comp_pool);
+		irq_pool_free(table->sf_ctrl_pool);
+	}
+	irq_pool_free(table->pf_pool);
+}
+
+/* irq_table API */
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+	struct mlx5_irq_table *irq_table;
+
+	if (mlx5_core_is_sf(dev))
+		return 0;
+
+	irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+	if (!irq_table)
+		return -ENOMEM;
+
+	dev->priv.irq_table = irq_table;
+	return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+	if (mlx5_core_is_sf(dev))
+		return;
+
+	kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
+{
+	return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
 }
 
 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 {
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_irq_table *table = priv->irq_table;
 	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
 		      MLX5_CAP_GEN(dev, max_num_eqs) :
 		      1 << MLX5_CAP_GEN(dev, log_max_eq);
-	int nvec;
+	int total_vec;
+	int pf_vec;
 	int err;
 
 	if (mlx5_core_is_sf(dev))
 		return 0;
 
-	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-	       MLX5_IRQ_VEC_COMP_BASE;
-	nvec = min_t(int, nvec, num_eqs);
-	if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+	pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+		 MLX5_IRQ_VEC_COMP_BASE;
+	pf_vec = min_t(int, pf_vec, num_eqs);
+	if (pf_vec <= MLX5_IRQ_VEC_COMP_BASE)
 		return -ENOMEM;
 
-	xa_init_flags(&table->irqs, XA_FLAGS_ALLOC);
+	total_vec = pf_vec;
+	if (mlx5_sf_max_functions(dev))
+		total_vec += MLX5_IRQ_CTRL_SF_MAX +
+			MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
 
-	nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
-				     nvec, PCI_IRQ_MSIX);
-	if (nvec < 0) {
-		err = nvec;
-		goto err_free_irq;
-	}
+	total_vec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+					  total_vec, PCI_IRQ_MSIX);
+	if (total_vec < 0)
+		return total_vec;
+	pf_vec = min(pf_vec, total_vec);
 
-	table->nvec = nvec;
+	err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
+	if (err)
+		pci_free_irq_vectors(dev->pdev);
 
-	return 0;
-
-err_free_irq:
-	xa_destroy(&table->irqs);
 	return err;
 }
 
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 {
 	struct mlx5_irq_table *table = dev->priv.irq_table;
-	struct mlx5_irq *irq;
-	unsigned long index;
 
 	if (mlx5_core_is_sf(dev))
 		return;
@@ -341,10 +451,8 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 	/* There are cases where IRQs still will be in used when we reaching
 	 * to here. Hence, making sure all the irqs are realeased.
 	 */
-	xa_for_each(&table->irqs, index, irq)
-		irq_release(&irq->kref);
+	irq_pools_destroy(table);
 	pci_free_irq_vectors(dev->pdev);
-	xa_destroy(&table->irqs);
 }
 
 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)

From 3af26495a2473c95ada3674c6b4dfc658be0a6ec Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Mon, 10 May 2021 09:10:43 +0300
Subject: [PATCH 1727/3292] net/mlx5: Enlarge interrupt field in CREATE_EQ

FW is now supporting more than 256 MSI-X per PF (up to 2K).
Hence, enlarge interrupt field in CREATE_EQ to make use of the new
MSI-X's.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 057db0eaf195..2d1ed78289ff 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3806,8 +3806,8 @@ struct mlx5_ifc_eqc_bits {
 
 	u8         reserved_at_80[0x20];
 
-	u8         reserved_at_a0[0x18];
-	u8         intr[0x8];
+	u8         reserved_at_a0[0x14];
+	u8         intr[0xc];
 
 	u8         reserved_at_c0[0x3];
 	u8         log_page_size[0x5];

From c8ea212bfdff5152f1ca78400f297bfba75691e0 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 11 May 2021 18:48:30 +0300
Subject: [PATCH 1728/3292] net/mlx5: Separate between public and private API
 of sf.h

Move mlx5_sf_max_functions() and friends from the privete sf/sf.h
to the public lib/sf.h. This is done in order to have one direction
include paths.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/lib/sf.h  | 45 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/sf/sf.h   | 37 +--------------
 2 files changed, 46 insertions(+), 36 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
new file mode 100644
index 000000000000..84e5683861be
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sf.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __LIB_MLX5_SF_H__
+#define __LIB_MLX5_SF_H__
+
+#include <linux/mlx5/driver.h>
+
+static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_GEN(dev, sf_base_id);
+}
+
+#ifdef CONFIG_MLX5_SF
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_GEN(dev, sf);
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+	if (!mlx5_sf_supported(dev))
+		return 0;
+	if (MLX5_CAP_GEN(dev, max_num_sf))
+		return MLX5_CAP_GEN(dev, max_num_sf);
+	else
+		return 1 << MLX5_CAP_GEN(dev, log_max_sf);
+}
+
+#else
+
+static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
+{
+	return false;
+}
+
+static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
+{
+	return 0;
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
index 0b6aea1e6a94..81ce13b19ee8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
@@ -5,42 +5,7 @@
 #define __MLX5_SF_H__
 
 #include <linux/mlx5/driver.h>
-
-static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev)
-{
-	return MLX5_CAP_GEN(dev, sf_base_id);
-}
-
-#ifdef CONFIG_MLX5_SF
-
-static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
-{
-	return MLX5_CAP_GEN(dev, sf);
-}
-
-static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
-{
-	if (!mlx5_sf_supported(dev))
-		return 0;
-	if (MLX5_CAP_GEN(dev, max_num_sf))
-		return MLX5_CAP_GEN(dev, max_num_sf);
-	else
-		return 1 << MLX5_CAP_GEN(dev, log_max_sf);
-}
-
-#else
-
-static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev)
-{
-	return false;
-}
-
-static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev)
-{
-	return 0;
-}
-
-#endif
+#include "lib/sf.h"
 
 #ifdef CONFIG_MLX5_SF_MANAGER
 

From c36326d38d933199014aba5a17d384cf52e4b558 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 23 Feb 2021 11:57:32 +0200
Subject: [PATCH 1729/3292] net/mlx5: Round-Robin EQs over IRQs

Whenever users provided affinity for an EQ creation request, map the
EQ to a matching IRQ.
Matching IRQ=IRQ with the same affinity and type (completion/control) of
the EQ created.

This mapping is being done in agressive dedicated IRQ allocation scheme,
which described bellow.

First, we check whether there is a matching IRQ that his min threshold
is not exhausted.
   - min_eqs_threshold = 3 for control EQ.
   - min_eqs_threshold = 1 for completion EQ.
In case no matching IRQ was found, try to request a new IRQ.
In case we can't request a new IRQ, reuse least-used matching IRQ.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/odp.c              |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  |  14 +-
 .../ethernet/mellanox/mlx5/core/mlx5_irq.h    |   4 +-
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 197 ++++++++++++++++--
 4 files changed, 189 insertions(+), 29 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 8f88b044ccbc..1338c11fd121 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1559,8 +1559,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
 	}
 
 	eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
-	param = (struct mlx5_eq_param){
-		.irq_index = 0,
+	param = (struct mlx5_eq_param) {
 		.nent = MLX5_IB_NUM_PF_EQE,
 	};
 	param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index b8ac9f58d2b5..7e5b3826eae5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -263,7 +263,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
 	struct mlx5_priv *priv = &dev->priv;
-	u8 vecidx = param->irq_index;
+	u16 vecidx = param->irq_index;
 	__be64 *pas;
 	void *eqc;
 	int inlen;
@@ -292,6 +292,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 		goto err_buf;
 	}
 
+	vecidx = mlx5_irq_get_index(eq->irq);
 	inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
 		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
 
@@ -629,7 +630,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 	mlx5_eq_notifier_register(dev, &table->cq_err_nb);
 
 	param = (struct mlx5_eq_param) {
-		.irq_index = 0,
 		.nent = MLX5_NUM_CMD_EQE,
 		.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
 	};
@@ -642,7 +642,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 
 	param = (struct mlx5_eq_param) {
-		.irq_index = 0,
 		.nent = MLX5_NUM_ASYNC_EQE,
 	};
 
@@ -652,7 +651,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 		goto err2;
 
 	param = (struct mlx5_eq_param) {
-		.irq_index = 0,
 		.nent = /* TODO: sriov max_vf + */ 1,
 		.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
 	};
@@ -985,15 +983,19 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
 		      MLX5_CAP_GEN(dev, max_num_eqs) :
 		      1 << MLX5_CAP_GEN(dev, log_max_eq);
+	int max_eqs_sf;
 	int err;
 
 	eq_table->num_comp_eqs =
 		min_t(int,
 		      mlx5_irq_table_get_num_comp(eq_table->irq_table),
 		      num_eqs - MLX5_MAX_ASYNC_EQS);
-	if (mlx5_core_is_sf(dev))
+	if (mlx5_core_is_sf(dev)) {
+		max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
+				   mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
 		eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
-					       MLX5_COMP_EQS_PER_SF);
+					       max_eqs_sf);
+	}
 
 	err = create_async_eqs(dev);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 48656e8624a9..abd024173c42 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -17,17 +17,19 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
 int mlx5_irq_table_create(struct mlx5_core_dev *dev);
 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
+int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
 
 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
 			    int msix_vec_count);
 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
 
-struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
 				  struct cpumask *affinity);
 void mlx5_irq_release(struct mlx5_irq *irq);
 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
+int mlx5_irq_get_index(struct mlx5_irq *irq);
 
 #endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 4f18fbcf7ccd..27de8da8edf7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -7,7 +7,7 @@
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
 #include "mlx5_irq.h"
-#include "sf/sf.h"
+#include "lib/sf.h"
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
@@ -21,6 +21,12 @@
 /* min num of vectores for SFs to be enabled */
 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
 
+#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
+#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
+#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
+#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
+#define MLX5_EQ_REFS_PER_IRQ (2)
+
 struct mlx5_irq {
 	u32 index;
 	struct atomic_notifier_head nh;
@@ -34,7 +40,10 @@ struct mlx5_irq {
 struct mlx5_irq_pool {
 	char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
 	struct xa_limit xa_num_irqs;
+	struct mutex lock; /* sync IRQs creations */
 	struct xarray irqs;
+	u32 max_threshold;
+	u32 min_threshold;
 	struct mlx5_core_dev *dev;
 };
 
@@ -147,7 +156,11 @@ static void irq_release(struct kref *kref)
 
 static void irq_put(struct mlx5_irq *irq)
 {
+	struct mlx5_irq_pool *pool = irq->pool;
+
+	mutex_lock(&pool->lock);
 	kref_put(&irq->kref, irq_release);
+	mutex_unlock(&pool->lock);
 }
 
 static irqreturn_t irq_int_handler(int irq, void *nh)
@@ -201,15 +214,15 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
 		err = -ENOMEM;
 		goto err_cpumask;
 	}
-	err = xa_alloc(&pool->irqs, &irq->index, irq, pool->xa_num_irqs,
-		       GFP_KERNEL);
+	kref_init(&irq->kref);
+	irq->index = i;
+	err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
 	if (err) {
 		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
 			      irq->index, err);
 		goto err_xa;
 	}
 	irq->pool = pool;
-	kref_init(&irq->kref);
 	return irq;
 err_xa:
 	free_cpumask_var(irq->mask);
@@ -247,6 +260,124 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
 	return irq->mask;
 }
 
+int mlx5_irq_get_index(struct mlx5_irq *irq)
+{
+	return irq->index;
+}
+
+/* irq_pool API */
+
+/* creating an irq from irq_pool */
+static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool,
+					    struct cpumask *affinity)
+{
+	struct mlx5_irq *irq;
+	u32 irq_index;
+	int err;
+
+	err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs,
+		       GFP_KERNEL);
+	if (err)
+		return ERR_PTR(err);
+	irq = irq_request(pool, irq_index);
+	if (IS_ERR(irq))
+		return irq;
+	cpumask_copy(irq->mask, affinity);
+	irq_set_affinity_hint(irq->irqn, irq->mask);
+	return irq;
+}
+
+/* looking for the irq with the smallest refcount and the same affinity */
+static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
+						   struct cpumask *affinity)
+{
+	int start = pool->xa_num_irqs.min;
+	int end = pool->xa_num_irqs.max;
+	struct mlx5_irq *irq = NULL;
+	struct mlx5_irq *iter;
+	unsigned long index;
+
+	lockdep_assert_held(&pool->lock);
+	xa_for_each_range(&pool->irqs, index, iter, start, end) {
+		if (!cpumask_equal(iter->mask, affinity))
+			continue;
+		if (kref_read(&iter->kref) < pool->min_threshold)
+			return iter;
+		if (!irq || kref_read(&iter->kref) <
+		    kref_read(&irq->kref))
+			irq = iter;
+	}
+	return irq;
+}
+
+/* requesting an irq from a given pool according to given affinity */
+static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
+						  struct cpumask *affinity)
+{
+	struct mlx5_irq *least_loaded_irq, *new_irq;
+
+	mutex_lock(&pool->lock);
+	least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
+	if (least_loaded_irq &&
+	    kref_read(&least_loaded_irq->kref) < pool->min_threshold)
+		goto out;
+	new_irq = irq_pool_create_irq(pool, affinity);
+	if (IS_ERR(new_irq)) {
+		if (!least_loaded_irq) {
+			mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n",
+				      cpumask_first(affinity));
+			mutex_unlock(&pool->lock);
+			return new_irq;
+		}
+		/* We failed to create a new IRQ for the requested affinity,
+		 * sharing existing IRQ.
+		 */
+		goto out;
+	}
+	least_loaded_irq = new_irq;
+	goto unlock;
+out:
+	kref_get(&least_loaded_irq->kref);
+	if (kref_read(&least_loaded_irq->kref) > pool->max_threshold)
+		mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
+			      least_loaded_irq->irqn, pool->name,
+			      kref_read(&least_loaded_irq->kref) / MLX5_EQ_REFS_PER_IRQ);
+unlock:
+	mutex_unlock(&pool->lock);
+	return least_loaded_irq;
+}
+
+/* requesting an irq from a given pool according to given index */
+static struct mlx5_irq *
+irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
+			struct cpumask *affinity)
+{
+	struct mlx5_irq *irq;
+
+	mutex_lock(&pool->lock);
+	irq = xa_load(&pool->irqs, vecidx);
+	if (irq) {
+		kref_get(&irq->kref);
+		goto unlock;
+	}
+	irq = irq_request(pool, vecidx);
+	if (IS_ERR(irq) || !affinity)
+		goto unlock;
+	cpumask_copy(irq->mask, affinity);
+	irq_set_affinity_hint(irq->irqn, irq->mask);
+unlock:
+	mutex_unlock(&pool->lock);
+	return irq;
+}
+
+static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table,
+					      int i, struct cpumask *affinity)
+{
+	if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL)
+		return irq_table->sf_ctrl_pool;
+	return irq_table->sf_comp_pool;
+}
+
 /**
  * mlx5_irq_release - release an IRQ back to the system.
  * @irq: irq to be released.
@@ -266,32 +397,40 @@ void mlx5_irq_release(struct mlx5_irq *irq)
  *
  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
  */
-struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
 				  struct cpumask *affinity)
 {
 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 	struct mlx5_irq_pool *pool;
 	struct mlx5_irq *irq;
 
-	pool = irq_table->pf_pool;
-
-	irq = xa_load(&pool->irqs, vecidx);
-	if (irq) {
-		kref_get(&irq->kref);
-		return irq;
+	if (mlx5_core_is_sf(dev)) {
+		pool = find_sf_irq_pool(irq_table, vecidx, affinity);
+		if (!pool)
+			/* we don't have IRQs for SFs, using the PF IRQs */
+			goto pf_irq;
+		if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp"))
+			/* In case an SF user request IRQ with vecidx */
+			irq = irq_pool_request_vector(pool, vecidx, NULL);
+		else
+			irq = irq_pool_request_affinity(pool, affinity);
+		goto out;
 	}
-	irq = irq_request(pool, vecidx);
+pf_irq:
+	pool = irq_table->pf_pool;
+	irq = irq_pool_request_vector(pool, vecidx, affinity);
+out:
 	if (IS_ERR(irq))
 		return irq;
-	cpumask_copy(irq->mask, affinity);
-	irq_set_affinity_hint(irq->irqn, irq->mask);
+	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
+		      irq->irqn, cpumask_pr_args(affinity),
+		      kref_read(&irq->kref) / MLX5_EQ_REFS_PER_IRQ);
 	return irq;
 }
 
-/* irq_pool API */
-
 static struct mlx5_irq_pool *
-irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name)
+irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
+	       u32 min_threshold, u32 max_threshold)
 {
 	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
 
@@ -304,6 +443,9 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name)
 	if (name)
 		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
 			 name);
+	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
+	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
+	mutex_init(&pool->lock);
 	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
 		      name, size, start);
 	return pool;
@@ -329,7 +471,9 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
 	int err;
 
 	/* init pf_pool */
-	table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL);
+	table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
+					MLX5_EQ_SHARE_IRQ_MIN_COMP,
+					MLX5_EQ_SHARE_IRQ_MAX_COMP);
 	if (IS_ERR(table->pf_pool))
 		return PTR_ERR(table->pf_pool);
 	if (!mlx5_sf_max_functions(dev))
@@ -346,14 +490,18 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
 	num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
 	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
 	table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
-					     "mlx5_sf_ctrl");
+					     "mlx5_sf_ctrl",
+					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
+					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
 	if (IS_ERR(table->sf_ctrl_pool)) {
 		err = PTR_ERR(table->sf_ctrl_pool);
 		goto err_pf;
 	}
 	/* init sf_comp_pool */
 	table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
-					     sf_vec - num_sf_ctrl, "mlx5_sf_comp");
+					     sf_vec - num_sf_ctrl, "mlx5_sf_comp",
+					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
+					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
 	if (IS_ERR(table->sf_comp_pool)) {
 		err = PTR_ERR(table->sf_comp_pool);
 		goto err_sf_ctrl;
@@ -455,6 +603,15 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 	pci_free_irq_vectors(dev->pdev);
 }
 
+int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
+{
+	if (table->sf_comp_pool)
+		return table->sf_comp_pool->xa_num_irqs.max -
+			table->sf_comp_pool->xa_num_irqs.min + 1;
+	else
+		return mlx5_irq_table_get_num_comp(table);
+}
+
 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
 {
 #ifdef CONFIG_MLX5_SF

From ad3fc798800fb7ca04c1dfc439dba946818048d8 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:16 +0800
Subject: [PATCH 1730/3292] md: revert io stats accounting

The commit 41d2d848e5c0 ("md: improve io stats accounting") could cause
double fault problem per the report [1], and also it is not correct to
change ->bi_end_io if md don't own it, so let's revert it.

And io stats accounting will be replemented in later commits.

[1]. https://lore.kernel.org/linux-raid/3bf04253-3fad-434a-63a7-20214e38cf26@gmail.com/T/#t

Fixes: 41d2d848e5c0 ("md: improve io stats accounting")
Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 45 ---------------------------------------------
 drivers/md/md.h |  1 -
 2 files changed, 46 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 49f897fbb89b..7ba00e4c862d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -441,30 +441,6 @@ check_suspended:
 }
 EXPORT_SYMBOL(md_handle_request);
 
-struct md_io {
-	struct mddev *mddev;
-	bio_end_io_t *orig_bi_end_io;
-	void *orig_bi_private;
-	struct block_device *orig_bi_bdev;
-	unsigned long start_time;
-};
-
-static void md_end_io(struct bio *bio)
-{
-	struct md_io *md_io = bio->bi_private;
-	struct mddev *mddev = md_io->mddev;
-
-	bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev);
-
-	bio->bi_end_io = md_io->orig_bi_end_io;
-	bio->bi_private = md_io->orig_bi_private;
-
-	mempool_free(md_io, &mddev->md_io_pool);
-
-	if (bio->bi_end_io)
-		bio->bi_end_io(bio);
-}
-
 static blk_qc_t md_submit_bio(struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
@@ -489,21 +465,6 @@ static blk_qc_t md_submit_bio(struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	if (bio->bi_end_io != md_end_io) {
-		struct md_io *md_io;
-
-		md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
-		md_io->mddev = mddev;
-		md_io->orig_bi_end_io = bio->bi_end_io;
-		md_io->orig_bi_private = bio->bi_private;
-		md_io->orig_bi_bdev = bio->bi_bdev;
-
-		bio->bi_end_io = md_end_io;
-		bio->bi_private = md_io;
-
-		md_io->start_time = bio_start_io_acct(bio);
-	}
-
 	/* bio could be mergeable after passing to underlayer */
 	bio->bi_opf &= ~REQ_NOMERGE;
 
@@ -5608,7 +5569,6 @@ static void md_free(struct kobject *ko)
 
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
-	mempool_exit(&mddev->md_io_pool);
 	kfree(mddev);
 }
 
@@ -5705,11 +5665,6 @@ static int md_alloc(dev_t dev, char *name)
 		 */
 		mddev->hold_active = UNTIL_STOP;
 
-	error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
-					  sizeof(struct md_io));
-	if (error)
-		goto abort;
-
 	error = -ENOMEM;
 	mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
 	if (!mddev->queue)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fb7eab58cfd5..4da240ffe2c5 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -487,7 +487,6 @@ struct mddev {
 	struct bio_set			sync_set; /* for sync operations like
 						   * metadata and bitmap writes
 						   */
-	mempool_t			md_io_pool;
 
 	/* Generic flush handling.
 	 * The last to finish preflush schedules a worker to submit

From 10764815ff4728d2c57da677cd5d3dd6f446cf5f Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:17 +0800
Subject: [PATCH 1731/3292] md: add io accounting for raid0 and raid5

We introduce a new bioset (io_acct_set) for raid0 and raid5 since they
don't own clone infrastructure to accounting io. And the bioset is added
to mddev instead of to raid0 and raid5 layer, because with this way, we
can put common functions to md.h and reuse them in raid0 and raid5.

Also struct md_io_acct is added accordingly which includes io start_time,
the origin bio and cloned bio. Then we can call bio_{start,end}_io_acct
to get related io status.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c    | 51 +++++++++++++++++++++++++++++++++++++++++++---
 drivers/md/md.h    |  8 ++++++++
 drivers/md/raid0.c |  3 +++
 drivers/md/raid5.c |  9 ++++++++
 4 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7ba00e4c862d..843e13666e3f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2340,7 +2340,8 @@ int md_integrity_register(struct mddev *mddev)
 			       bdev_get_integrity(reference->bdev));
 
 	pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
-	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
+	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
+	    bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE)) {
 		pr_err("md: failed to create integrity pool for %s\n",
 		       mdname(mddev));
 		return -EINVAL;
@@ -5569,6 +5570,7 @@ static void md_free(struct kobject *ko)
 
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->io_acct_set);
 	kfree(mddev);
 }
 
@@ -5862,7 +5864,13 @@ int md_run(struct mddev *mddev)
 	if (!bioset_initialized(&mddev->sync_set)) {
 		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
 		if (err)
-			return err;
+			goto exit_bio_set;
+	}
+	if (!bioset_initialized(&mddev->io_acct_set)) {
+		err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
+				  offsetof(struct md_io_acct, bio_clone), 0);
+		if (err)
+			goto exit_sync_set;
 	}
 
 	spin_lock(&pers_lock);
@@ -5990,6 +5998,7 @@ int md_run(struct mddev *mddev)
 			blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
 		else
 			blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
+		blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
 	}
 	if (pers->sync_request) {
 		if (mddev->kobj.sd &&
@@ -6039,8 +6048,11 @@ bitmap_abort:
 	module_put(pers->owner);
 	md_bitmap_destroy(mddev);
 abort:
-	bioset_exit(&mddev->bio_set);
+	bioset_exit(&mddev->io_acct_set);
+exit_sync_set:
 	bioset_exit(&mddev->sync_set);
+exit_bio_set:
+	bioset_exit(&mddev->bio_set);
 	return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
@@ -6264,6 +6276,7 @@ void md_stop(struct mddev *mddev)
 	__md_stop(mddev);
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->io_acct_set);
 }
 
 EXPORT_SYMBOL_GPL(md_stop);
@@ -8568,6 +8581,38 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 }
 EXPORT_SYMBOL_GPL(md_submit_discard_bio);
 
+static void md_end_io_acct(struct bio *bio)
+{
+	struct md_io_acct *md_io_acct = bio->bi_private;
+	struct bio *orig_bio = md_io_acct->orig_bio;
+
+	orig_bio->bi_status = bio->bi_status;
+
+	bio_end_io_acct(orig_bio, md_io_acct->start_time);
+	bio_put(bio);
+	bio_endio(orig_bio);
+}
+
+/* used by personalities (raid0 and raid5) to account io stats */
+void md_account_bio(struct mddev *mddev, struct bio **bio)
+{
+	struct md_io_acct *md_io_acct;
+	struct bio *clone;
+
+	if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue))
+		return;
+
+	clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
+	md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
+	md_io_acct->orig_bio = *bio;
+	md_io_acct->start_time = bio_start_io_acct(*bio);
+
+	clone->bi_end_io = md_end_io_acct;
+	clone->bi_private = md_io_acct;
+	*bio = clone;
+}
+EXPORT_SYMBOL_GPL(md_account_bio);
+
 /* md_allow_write(mddev)
  * Calling this ensures that the array is marked 'active' so that writes
  * may proceed without blocking.  It is important to call this before
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4da240ffe2c5..4191f22acce4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -487,6 +487,7 @@ struct mddev {
 	struct bio_set			sync_set; /* for sync operations like
 						   * metadata and bitmap writes
 						   */
+	struct bio_set			io_acct_set; /* for raid0 and raid5 io accounting */
 
 	/* Generic flush handling.
 	 * The last to finish preflush schedules a worker to submit
@@ -683,6 +684,12 @@ struct md_thread {
 	void			*private;
 };
 
+struct md_io_acct {
+	struct bio *orig_bio;
+	unsigned long start_time;
+	struct bio bio_clone;
+};
+
 #define THREAD_WAKEUP  0
 
 static inline void safe_put_page(struct page *p)
@@ -714,6 +721,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
 extern void md_finish_reshape(struct mddev *mddev);
 void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 			struct bio *bio, sector_t start, sector_t size);
+void md_account_bio(struct mddev *mddev, struct bio **bio);
 
 extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
 extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e5d7411cba9b..62c8b6adac70 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -546,6 +546,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
 		bio = split;
 	}
 
+	if (bio->bi_pool != &mddev->bio_set)
+		md_account_bio(mddev, &bio);
+
 	orig_sector = sector;
 	zone = find_zone(mddev->private, &sector);
 	switch (conf->layout) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 841e1c1aa5e6..58e9dbc0f683 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5468,6 +5468,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
 	sector_t sector = raid_bio->bi_iter.bi_sector;
 	unsigned chunk_sects = mddev->chunk_sectors;
 	unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
+	struct r5conf *conf = mddev->private;
 
 	if (sectors < bio_sectors(raid_bio)) {
 		struct r5conf *conf = mddev->private;
@@ -5477,6 +5478,9 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
 		raid_bio = split;
 	}
 
+	if (raid_bio->bi_pool != &conf->bio_split)
+		md_account_bio(mddev, &raid_bio);
+
 	if (!raid5_read_one_chunk(mddev, raid_bio))
 		return raid_bio;
 
@@ -5756,6 +5760,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	DEFINE_WAIT(w);
 	bool do_prepare;
 	bool do_flush = false;
+	bool do_clone = false;
 
 	if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
 		int ret = log_handle_flush_request(conf, bi);
@@ -5784,6 +5789,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	if (rw == READ && mddev->degraded == 0 &&
 	    mddev->reshape_position == MaxSector) {
 		bi = chunk_aligned_read(mddev, bi);
+		do_clone = true;
 		if (!bi)
 			return true;
 	}
@@ -5798,6 +5804,9 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	last_sector = bio_end_sector(bi);
 	bi->bi_next = NULL;
 
+	if (!do_clone)
+		md_account_bio(mddev, &bi);
+
 	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 	for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
 		int previous;

From c82aa1b76787c34fd02374e519b6f52cdeb2f54b Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:18 +0800
Subject: [PATCH 1732/3292] md/raid5: move checking badblock before clone bio
 in raid5_read_one_chunk

We don't need to clone bio if the relevant region has badblock.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 58e9dbc0f683..5a05277f4be7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5427,6 +5427,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 	atomic_inc(&rdev->nr_pending);
 	rcu_read_unlock();
 
+	if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
+			&bad_sectors)) {
+		bio_put(raid_bio);
+		rdev_dec_pending(rdev, mddev);
+		return 0;
+	}
+
 	align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
 	bio_set_dev(align_bio, rdev->bdev);
 	align_bio->bi_end_io = raid5_align_endio;
@@ -5435,13 +5442,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 
 	raid_bio->bi_next = (void *)rdev;
 
-	if (is_badblock(rdev, sector, bio_sectors(align_bio), &first_bad,
-			&bad_sectors)) {
-		bio_put(align_bio);
-		rdev_dec_pending(rdev, mddev);
-		return 0;
-	}
-
 	/* No reshape active, so we can trust rdev->data_offset */
 	align_bio->bi_iter.bi_sector += rdev->data_offset;
 

From 1147f58e1010b8688bac1fd3bbab753b1379291d Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:19 +0800
Subject: [PATCH 1733/3292] md/raid5: avoid redundant bio clone in
 raid5_read_one_chunk

After enable io accounting, chunk read bio could be cloned twice which
is not good. To avoid such inefficiency, let's clone align_bio from
io_acct_set too, then we need only call md_account_bio in make_request
unconditionally.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5a05277f4be7..f83623ac8c34 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5364,11 +5364,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
  */
 static void raid5_align_endio(struct bio *bi)
 {
-	struct bio* raid_bi  = bi->bi_private;
+	struct md_io_acct *md_io_acct = bi->bi_private;
+	struct bio *raid_bi = md_io_acct->orig_bio;
 	struct mddev *mddev;
 	struct r5conf *conf;
 	struct md_rdev *rdev;
 	blk_status_t error = bi->bi_status;
+	unsigned long start_time = md_io_acct->start_time;
 
 	bio_put(bi);
 
@@ -5380,6 +5382,8 @@ static void raid5_align_endio(struct bio *bi)
 	rdev_dec_pending(rdev, conf->mddev);
 
 	if (!error) {
+		if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
+			bio_end_io_acct(raid_bi, start_time);
 		bio_endio(raid_bi);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
 			wake_up(&conf->wait_for_quiescent);
@@ -5398,6 +5402,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 	struct md_rdev *rdev;
 	sector_t sector, end_sector, first_bad;
 	int bad_sectors, dd_idx;
+	struct md_io_acct *md_io_acct;
 
 	if (!in_chunk_boundary(mddev, raid_bio)) {
 		pr_debug("%s: non aligned\n", __func__);
@@ -5434,14 +5439,18 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 		return 0;
 	}
 
-	align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
+	align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
+	md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
+	raid_bio->bi_next = (void *)rdev;
+	if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
+		md_io_acct->start_time = bio_start_io_acct(raid_bio);
+	md_io_acct->orig_bio = raid_bio;
+
 	bio_set_dev(align_bio, rdev->bdev);
 	align_bio->bi_end_io = raid5_align_endio;
-	align_bio->bi_private = raid_bio;
+	align_bio->bi_private = md_io_acct;
 	align_bio->bi_iter.bi_sector = sector;
 
-	raid_bio->bi_next = (void *)rdev;
-
 	/* No reshape active, so we can trust rdev->data_offset */
 	align_bio->bi_iter.bi_sector += rdev->data_offset;
 
@@ -5468,7 +5477,6 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
 	sector_t sector = raid_bio->bi_iter.bi_sector;
 	unsigned chunk_sects = mddev->chunk_sectors;
 	unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
-	struct r5conf *conf = mddev->private;
 
 	if (sectors < bio_sectors(raid_bio)) {
 		struct r5conf *conf = mddev->private;
@@ -5478,9 +5486,6 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
 		raid_bio = split;
 	}
 
-	if (raid_bio->bi_pool != &conf->bio_split)
-		md_account_bio(mddev, &raid_bio);
-
 	if (!raid5_read_one_chunk(mddev, raid_bio))
 		return raid_bio;
 
@@ -5760,7 +5765,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	DEFINE_WAIT(w);
 	bool do_prepare;
 	bool do_flush = false;
-	bool do_clone = false;
 
 	if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
 		int ret = log_handle_flush_request(conf, bi);
@@ -5789,7 +5793,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	if (rw == READ && mddev->degraded == 0 &&
 	    mddev->reshape_position == MaxSector) {
 		bi = chunk_aligned_read(mddev, bi);
-		do_clone = true;
 		if (!bi)
 			return true;
 	}
@@ -5804,9 +5807,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 	last_sector = bio_end_sector(bi);
 	bi->bi_next = NULL;
 
-	if (!do_clone)
-		md_account_bio(mddev, &bi);
-
+	md_account_bio(mddev, &bi);
 	prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 	for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
 		int previous;

From 9b8ae7b938235229ccb112c4e887ff1bcc232836 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:20 +0800
Subject: [PATCH 1734/3292] md/raid1: rename print_msg with r1bio_existed

The caller of raid1_read_request could pass NULL or a valid pointer for
"struct r1bio *r1_bio", so it actually means whether r1_bio is existed
or not.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid1.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ced076ba560e..696da6b8b7ed 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1210,7 +1210,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	int max_sectors;
 	int rdisk;
-	bool print_msg = !!r1_bio;
+	bool r1bio_existed = !!r1_bio;
 	char b[BDEVNAME_SIZE];
 
 	/*
@@ -1220,7 +1220,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 	 */
 	gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO;
 
-	if (print_msg) {
+	if (r1bio_existed) {
 		/* Need to get the block device name carefully */
 		struct md_rdev *rdev;
 		rcu_read_lock();
@@ -1252,7 +1252,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 
 	if (rdisk < 0) {
 		/* couldn't find anywhere to read from */
-		if (print_msg) {
+		if (r1bio_existed) {
 			pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
 					    mdname(mddev),
 					    b,
@@ -1263,7 +1263,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 	}
 	mirror = conf->mirrors + rdisk;
 
-	if (print_msg)
+	if (r1bio_existed)
 		pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n",
 				    mdname(mddev),
 				    (unsigned long long)r1_bio->sector,

From a0159832e51e3af03b89ecc5d6b9db451e529b5f Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:21 +0800
Subject: [PATCH 1735/3292] md/raid1: enable io accounting

For raid1, we record the start time between split bio and clone bio,
and finish the accounting in the final endio.

Also introduce start_time in r1bio accordingly.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid1.c | 7 +++++++
 drivers/md/raid1.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 696da6b8b7ed..51f2547c2007 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -300,6 +300,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
 	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
 		bio->bi_status = BLK_STS_IOERR;
 
+	if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+		bio_end_io_acct(bio, r1_bio->start_time);
 	bio_endio(bio);
 }
 
@@ -1292,6 +1294,9 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 
 	r1_bio->read_disk = rdisk;
 
+	if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+		r1_bio->start_time = bio_start_io_acct(bio);
+
 	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
 
 	r1_bio->bios[rdisk] = read_bio;
@@ -1461,6 +1466,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		r1_bio->sectors = max_sectors;
 	}
 
+	if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+		r1_bio->start_time = bio_start_io_acct(bio);
 	atomic_set(&r1_bio->remaining, 1);
 	atomic_set(&r1_bio->behind_remaining, 0);
 
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index b7eb09e8c025..ccf10e59b116 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -158,6 +158,7 @@ struct r1bio {
 	sector_t		sector;
 	int			sectors;
 	unsigned long		state;
+	unsigned long		start_time;
 	struct mddev		*mddev;
 	/*
 	 * original bio going to /dev/mdx

From 528bc2cf2fccef2c2c17263f9932094bf81fee5a Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:22 +0800
Subject: [PATCH 1736/3292] md/raid10: enable io accounting

For raid10, we record the start time between split bio and clone bio,
and finish the accounting in the final endio.

Also introduce start_time in r10bio accordingly.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid10.c | 6 ++++++
 drivers/md/raid10.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 13f5e6b2a73d..16977e8e075d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -297,6 +297,8 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
 	if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
 		bio->bi_status = BLK_STS_IOERR;
 
+	if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+		bio_end_io_acct(bio, r10_bio->start_time);
 	bio_endio(bio);
 	/*
 	 * Wake up any possible resync thread that waits for the device
@@ -1184,6 +1186,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	}
 	slot = r10_bio->read_slot;
 
+	if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+		r10_bio->start_time = bio_start_io_acct(bio);
 	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
 
 	r10_bio->devs[slot].bio = read_bio;
@@ -1483,6 +1487,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 		r10_bio->master_bio = bio;
 	}
 
+	if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+		r10_bio->start_time = bio_start_io_acct(bio);
 	atomic_set(&r10_bio->remaining, 1);
 	md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
 
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 1461fd55311b..c34bb196790e 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -124,6 +124,7 @@ struct r10bio {
 	sector_t		sector;	/* virtual sector number */
 	int			sectors;
 	unsigned long		state;
+	unsigned long		start_time;
 	struct mddev		*mddev;
 	/*
 	 * original bio going to /dev/mdx

From 608f52e30aae7dc8da836e5b7b112d50a2d00e43 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Tue, 25 May 2021 17:46:23 +0800
Subject: [PATCH 1737/3292] md: mark some personalities as deprecated

Mark the three personalities (linear, fault and multipath) as deprecated
because:

1. people can use dm multipath or nvme multipath.
2. linear is already deprecated in MODULE_ALIAS.
3. no one actively using fault.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/Kconfig        | 6 +++---
 drivers/md/md-faulty.c    | 2 +-
 drivers/md/md-linear.c    | 2 +-
 drivers/md/md-multipath.c | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index f2014385d48b..0602e82a9516 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -47,7 +47,7 @@ config MD_AUTODETECT
 	  If unsure, say Y.
 
 config MD_LINEAR
-	tristate "Linear (append) mode"
+	tristate "Linear (append) mode (deprecated)"
 	depends on BLK_DEV_MD
 	help
 	  If you say Y here, then your multiple devices driver will be able to
@@ -158,7 +158,7 @@ config MD_RAID456
 	  If unsure, say Y.
 
 config MD_MULTIPATH
-	tristate "Multipath I/O support"
+	tristate "Multipath I/O support (deprecated)"
 	depends on BLK_DEV_MD
 	help
 	  MD_MULTIPATH provides a simple multi-path personality for use
@@ -169,7 +169,7 @@ config MD_MULTIPATH
 	  If unsure, say N.
 
 config MD_FAULTY
-	tristate "Faulty test module for MD"
+	tristate "Faulty test module for MD (deprecated)"
 	depends on BLK_DEV_MD
 	help
 	  The "faulty" module allows for a block device that occasionally returns
diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index fda4cb3f936f..c0dc6f2ef4a3 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -357,7 +357,7 @@ static void raid_exit(void)
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Fault injection personality for MD");
+MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)");
 MODULE_ALIAS("md-personality-10"); /* faulty */
 MODULE_ALIAS("md-faulty");
 MODULE_ALIAS("md-level--5");
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 63ed8329a98d..1ff51647a682 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -312,7 +312,7 @@ static void linear_exit (void)
 module_init(linear_init);
 module_exit(linear_exit);
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Linear device concatenation personality for MD");
+MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
 MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
 MODULE_ALIAS("md-linear");
 MODULE_ALIAS("md-level--1");
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 776bbe542db5..e7d6486f090f 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -471,7 +471,7 @@ static void __exit multipath_exit (void)
 module_init(multipath_init);
 module_exit(multipath_exit);
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("simple multi-path personality for MD");
+MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
 MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
 MODULE_ALIAS("md-multipath");
 MODULE_ALIAS("md-level--4");

From c32dc04059c79ddb4f7cff94ad5de6e92ea2218d Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 29 May 2021 12:30:49 +0200
Subject: [PATCH 1738/3292] md: Constify attribute_group structs

The attribute_group structs are never modified, they're only passed to
sysfs_create_group() and sysfs_remove_group(). Make them const to allow
the compiler to put them in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md-bitmap.c | 2 +-
 drivers/md/md.c        | 6 +++---
 drivers/md/md.h        | 4 ++--
 drivers/md/raid5.c     | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index ea3130e11680..e29c6298ef5c 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2616,7 +2616,7 @@ static struct attribute *md_bitmap_attrs[] = {
 	&max_backlog_used.attr,
 	NULL
 };
-struct attribute_group md_bitmap_group = {
+const struct attribute_group md_bitmap_group = {
 	.name = "bitmap",
 	.attrs = md_bitmap_attrs,
 };
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 843e13666e3f..32abcfb8bcad 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -785,7 +785,7 @@ out_free_new:
 	return ERR_PTR(error);
 }
 
-static struct attribute_group md_redundancy_group;
+static const struct attribute_group md_redundancy_group;
 
 void mddev_unlock(struct mddev *mddev)
 {
@@ -802,7 +802,7 @@ void mddev_unlock(struct mddev *mddev)
 		 * test it under the same mutex to ensure its correct value
 		 * is seen.
 		 */
-		struct attribute_group *to_remove = mddev->to_remove;
+		const struct attribute_group *to_remove = mddev->to_remove;
 		mddev->to_remove = NULL;
 		mddev->sysfs_active = 1;
 		mutex_unlock(&mddev->reconfig_mutex);
@@ -5500,7 +5500,7 @@ static struct attribute *md_redundancy_attrs[] = {
 	&md_degraded.attr,
 	NULL,
 };
-static struct attribute_group md_redundancy_group = {
+static const struct attribute_group md_redundancy_group = {
 	.name = NULL,
 	.attrs = md_redundancy_attrs,
 };
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4191f22acce4..b9b7d2f992f3 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -481,7 +481,7 @@ struct mddev {
 	atomic_t			max_corr_read_errors; /* max read retries */
 	struct list_head		all_mddevs;
 
-	struct attribute_group		*to_remove;
+	const struct attribute_group	*to_remove;
 
 	struct bio_set			bio_set;
 	struct bio_set			sync_set; /* for sync operations like
@@ -613,7 +613,7 @@ struct md_sysfs_entry {
 	ssize_t (*show)(struct mddev *, char *);
 	ssize_t (*store)(struct mddev *, const char *, size_t);
 };
-extern struct attribute_group md_bitmap_group;
+extern const struct attribute_group md_bitmap_group;
 
 static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
 {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f83623ac8c34..0ee9aa0113f3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6940,7 +6940,7 @@ static struct attribute *raid5_attrs[] =  {
 	&ppl_write_hint.attr,
 	NULL,
 };
-static struct attribute_group raid5_attrs_group = {
+static const struct attribute_group raid5_attrs_group = {
 	.name = NULL,
 	.attrs = raid5_attrs,
 };

From daee2024715ddf430a069c0c4eab8417146934cf Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Thu, 3 Jun 2021 17:21:06 +0800
Subject: [PATCH 1739/3292] md: check level before create and exit io_acct_set

The bio_set (io_acct_set) is used by personalities to clone bio and
trace the timestamp of bio. Some personalities such as raid1/10 don't
need the bio_set, so add check to not create it unconditionally.

Also update the comment for md_account_bio to make it more clear.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 32abcfb8bcad..56b606184c87 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2341,7 +2341,8 @@ int md_integrity_register(struct mddev *mddev)
 
 	pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
 	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
-	    bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE)) {
+	    (mddev->level != 1 && mddev->level != 10 &&
+	     bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
 		pr_err("md: failed to create integrity pool for %s\n",
 		       mdname(mddev));
 		return -EINVAL;
@@ -5570,7 +5571,8 @@ static void md_free(struct kobject *ko)
 
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
-	bioset_exit(&mddev->io_acct_set);
+	if (mddev->level != 1 && mddev->level != 10)
+		bioset_exit(&mddev->io_acct_set);
 	kfree(mddev);
 }
 
@@ -5866,7 +5868,8 @@ int md_run(struct mddev *mddev)
 		if (err)
 			goto exit_bio_set;
 	}
-	if (!bioset_initialized(&mddev->io_acct_set)) {
+	if (mddev->level != 1 && mddev->level != 10 &&
+	    !bioset_initialized(&mddev->io_acct_set)) {
 		err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
 				  offsetof(struct md_io_acct, bio_clone), 0);
 		if (err)
@@ -6048,7 +6051,8 @@ bitmap_abort:
 	module_put(pers->owner);
 	md_bitmap_destroy(mddev);
 abort:
-	bioset_exit(&mddev->io_acct_set);
+	if (mddev->level != 1 && mddev->level != 10)
+		bioset_exit(&mddev->io_acct_set);
 exit_sync_set:
 	bioset_exit(&mddev->sync_set);
 exit_bio_set:
@@ -6276,7 +6280,8 @@ void md_stop(struct mddev *mddev)
 	__md_stop(mddev);
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
-	bioset_exit(&mddev->io_acct_set);
+	if (mddev->level != 1 && mddev->level != 10)
+		bioset_exit(&mddev->io_acct_set);
 }
 
 EXPORT_SYMBOL_GPL(md_stop);
@@ -8593,7 +8598,10 @@ static void md_end_io_acct(struct bio *bio)
 	bio_endio(orig_bio);
 }
 
-/* used by personalities (raid0 and raid5) to account io stats */
+/*
+ * Used by personalities that don't already clone the bio and thus can't
+ * easily add the timestamp to their extended bio structure.
+ */
 void md_account_bio(struct mddev *mddev, struct bio **bio)
 {
 	struct md_io_acct *md_io_acct;

From de3ea66e9d23a34eef5e17f960d6473f78a1c54b Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Thu, 3 Jun 2021 17:21:07 +0800
Subject: [PATCH 1740/3292] md: add comments in md_integrity_register

Given it is not obvious for the error handling, let's try to add some
comments here to make it clear.

Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 56b606184c87..2c69905dd5c0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2343,6 +2343,12 @@ int md_integrity_register(struct mddev *mddev)
 	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
 	    (mddev->level != 1 && mddev->level != 10 &&
 	     bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
+		/*
+		 * No need to handle the failure of bioset_integrity_create,
+		 * because the function is called by md_run() -> pers->run(),
+		 * md_run calls bioset_exit -> bioset_integrity_free in case
+		 * of failure case.
+		 */
 		pr_err("md: failed to create integrity pool for %s\n",
 		       mdname(mddev));
 		return -EINVAL;

From 97ae27252f4962d0fcc38ee1d9f913d817a2024e Mon Sep 17 00:00:00 2001
From: Gal Ofri <gal.ofri@storing.io>
Date: Mon, 7 Jun 2021 14:07:03 +0300
Subject: [PATCH 1741/3292] md/raid5: avoid device_lock in read_one_chunk()

There is a lock contention on device_lock in read_one_chunk().
device_lock is taken to sync conf->active_aligned_reads and
conf->quiesce.
read_one_chunk() takes the lock, then waits for quiesce=0 (resumed)
before incrementing active_aligned_reads.
raid5_quiesce() takes the lock, sets quiesce=2 (in-progress), then waits
for active_aligned_reads to be zero before setting quiesce=1
(suspended).

Introduce a fast (lockless) path in read_one_chunk(): activate aligned
read without taking device_lock.  In case quiesce starts while
activating the aligned-read in fast path, deactivate it and revert to
old behavior (take device_lock and wait for quiesce to finish).

Add smp store/load in raid5_quiesce()/read_one_chunk() respectively to
gaurantee that read_one_chunk() does not miss an ongoing quiesce.

My setups:
1. 8 local nvme drives (each up to 250k iops).
2. 8 ram disks (brd).

Each setup with raid6 (6+2), 1024 io threads on a 96 cpu-cores (48 per
socket) system. Record both iops and cpu spent on this contention with
rand-read-4k. Record bw with sequential-read-128k.  Note: in most cases
cpu is still busy but due to "new" bottlenecks.

nvme:
              | iops           | cpu  | bw
-----------------------------------------------
without patch | 1.6M           | ~50% | 5.5GB/s
with patch    | 2M (throttled) | 0%   | 16GB/s (throttled)

ram (brd):
              | iops           | cpu  | bw
-----------------------------------------------
without patch | 2M             | ~80% | 24GB/s
with patch    | 4M             | 0%   | 55GB/s

CC: Song Liu <song@kernel.org>
CC: Neil Brown <neilb@suse.de>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Gal Ofri <gal.ofri@storing.io>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0ee9aa0113f3..e248532bb70a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5403,6 +5403,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 	sector_t sector, end_sector, first_bad;
 	int bad_sectors, dd_idx;
 	struct md_io_acct *md_io_acct;
+	bool did_inc;
 
 	if (!in_chunk_boundary(mddev, raid_bio)) {
 		pr_debug("%s: non aligned\n", __func__);
@@ -5454,11 +5455,24 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 	/* No reshape active, so we can trust rdev->data_offset */
 	align_bio->bi_iter.bi_sector += rdev->data_offset;
 
-	spin_lock_irq(&conf->device_lock);
-	wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0,
-			    conf->device_lock);
-	atomic_inc(&conf->active_aligned_reads);
-	spin_unlock_irq(&conf->device_lock);
+	did_inc = false;
+	if (conf->quiesce == 0) {
+		atomic_inc(&conf->active_aligned_reads);
+		did_inc = true;
+	}
+	/* need a memory barrier to detect the race with raid5_quiesce() */
+	if (!did_inc || smp_load_acquire(&conf->quiesce) != 0) {
+		/* quiesce is in progress, so we need to undo io activation and wait
+		 * for it to finish
+		 */
+		if (did_inc && atomic_dec_and_test(&conf->active_aligned_reads))
+			wake_up(&conf->wait_for_quiescent);
+		spin_lock_irq(&conf->device_lock);
+		wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0,
+				    conf->device_lock);
+		atomic_inc(&conf->active_aligned_reads);
+		spin_unlock_irq(&conf->device_lock);
+	}
 
 	if (mddev->gendisk)
 		trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk),
@@ -8346,7 +8360,10 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce)
 		 * active stripes can drain
 		 */
 		r5c_flush_cache(conf, INT_MAX);
-		conf->quiesce = 2;
+		/* need a memory barrier to make sure read_one_chunk() sees
+		 * quiesce started and reverts to slow (locked) path.
+		 */
+		smp_store_release(&conf->quiesce, 2);
 		wait_event_cmd(conf->wait_for_quiescent,
 				    atomic_read(&conf->active_stripes) == 0 &&
 				    atomic_read(&conf->active_aligned_reads) == 0,

From 30ab5db7ee787c88236376ce6c88b53d613fcae2 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 30 Apr 2021 17:26:45 +0800
Subject: [PATCH 1742/3292] floppy: cleanup: remove redundant assignment to
 nr_sectors

Variable nr_sectors is set to zero but this value is never
read as it is overwritten later on, hence it is a redundant
assignment and can be removed.

Clean up the following clang-analyzer warning:

drivers/block/floppy.c:2333:2: warning: Value stored to 'nr_sectors' is
never read [clang-analyzer-deadcode.DeadStores].

Link: https://lore.kernel.org/r/1619774805-121562-1-git-send-email-jiapeng.chong@linux.alibaba.com
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Denis Efremov <efremov@linux.com>
---
 drivers/block/floppy.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8a9d22207c59..e96ad5b2c35e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2330,7 +2330,6 @@ static void rw_interrupt(void)
 	if (!drive_state[current_drive].first_read_date)
 		drive_state[current_drive].first_read_date = jiffies;
 
-	nr_sectors = 0;
 	ssize = DIV_ROUND_UP(1 << raw_cmd->cmd[SIZECODE], 4);
 
 	if (reply_buffer[ST1] & ST1_EOC)

From 2c9bdf6e4771a5966a4f0d6bea45a1c7f38312d7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 28 May 2021 15:03:35 -0500
Subject: [PATCH 1743/3292] floppy: Fix fall-through warning for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Link: https://lore.kernel.org/linux-hardening/47bcd36a-6524-348b-e802-0691d1b3c429@kernel.dk/
Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Denis Efremov <efremov@linux.com>
---
 drivers/block/floppy.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index e96ad5b2c35e..cde70b0a55dd 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2123,6 +2123,7 @@ static void format_interrupt(void)
 	switch (interpret_errors()) {
 	case 1:
 		cont->error();
+		break;
 	case 2:
 		break;
 	case 0:

From 04c22eeaf6323c51ac3d429dc03de91a82f811cb Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <bnatikar@amd.com>
Date: Wed, 26 May 2021 22:24:31 +0530
Subject: [PATCH 1744/3292] HID: amd_sfh: change in maintainer

I would like to take the maintainer-ship of AMD SENSOR FUSION HUB DRIVER

Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Acked-by: Nehal Shah <nehal-bakulchandra.shah@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 30aa8253bf3a..7ce76ba72612 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -972,7 +972,7 @@ F:	drivers/net/ethernet/amd/xgbe/
 
 AMD SENSOR FUSION HUB DRIVER
 M:	Nehal Shah <nehal-bakulchandra.shah@amd.com>
-M:	Sandeep Singh <sandeep.singh@amd.com>
+M:	Basavaraj Natikar <basavaraj.natikar@amd.com>
 L:	linux-input@vger.kernel.org
 S:	Maintained
 F:	Documentation/hid/amd-sfh*

From a3af901c89685acb590b49ce054ea7e3015a770a Mon Sep 17 00:00:00 2001
From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Date: Sat, 29 May 2021 19:03:57 +0900
Subject: [PATCH 1745/3292] HID: usbkbd: Avoid GFP_ATOMIC when GFP_KERNEL is
 possible

usb_kbd_alloc_mem is called in usb_kbd_probe, which is
not in atomic context. So constraints can be relaxed here.

Use GFP_KERNEL instead of GFP_ATOMIC.

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/usbkbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/usbhid/usbkbd.c b/drivers/hid/usbhid/usbkbd.c
index e22434dfc9ef..df02002066ce 100644
--- a/drivers/hid/usbhid/usbkbd.c
+++ b/drivers/hid/usbhid/usbkbd.c
@@ -239,11 +239,11 @@ static int usb_kbd_alloc_mem(struct usb_device *dev, struct usb_kbd *kbd)
 		return -1;
 	if (!(kbd->led = usb_alloc_urb(0, GFP_KERNEL)))
 		return -1;
-	if (!(kbd->new = usb_alloc_coherent(dev, 8, GFP_ATOMIC, &kbd->new_dma)))
+	if (!(kbd->new = usb_alloc_coherent(dev, 8, GFP_KERNEL, &kbd->new_dma)))
 		return -1;
 	if (!(kbd->cr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL)))
 		return -1;
-	if (!(kbd->leds = usb_alloc_coherent(dev, 1, GFP_ATOMIC, &kbd->leds_dma)))
+	if (!(kbd->leds = usb_alloc_coherent(dev, 1, GFP_KERNEL, &kbd->leds_dma)))
 		return -1;
 
 	return 0;

From fb1a79a6b6e1223ddb18f12aa35e36f832da2290 Mon Sep 17 00:00:00 2001
From: Pascal Giard <pascal.giard@etsmtl.ca>
Date: Fri, 4 Jun 2021 12:10:23 -0400
Subject: [PATCH 1746/3292] HID: sony: fix freeze when inserting ghlive ps3/wii
 dongles

This commit fixes a freeze on insertion of a Guitar Hero Live PS3/WiiU
USB dongle. Indeed, with the current implementation, inserting one of
those USB dongles will lead to a hard freeze. I apologize for not
catching this earlier, it didn't occur on my old laptop.

While the issue was isolated to memory alloc/free, I could not figure
out why it causes a freeze. So this patch fixes this issue by
simplifying memory allocation and usage.

We remind that for the dongle to work properly, a control URB needs to
be sent periodically. We used to alloc/free the URB each time this URB
needed to be sent.

With this patch, the memory for the URB is allocated on the probe, reused
for as long as the dongle is plugged in, and freed once the dongle is
unplugged.

Signed-off-by: Pascal Giard <pascal.giard@etsmtl.ca>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sony.c | 100 ++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 50 deletions(-)

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 8319b0ce385a..b3722c51ec78 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -597,9 +597,8 @@ struct sony_sc {
 	/* DS4 calibration data */
 	struct ds4_calibration_data ds4_calib_data[6];
 	/* GH Live */
+	struct urb *ghl_urb;
 	struct timer_list ghl_poke_timer;
-	struct usb_ctrlrequest *ghl_cr;
-	u8 *ghl_databuf;
 };
 
 static void sony_set_leds(struct sony_sc *sc);
@@ -625,66 +624,54 @@ static inline void sony_schedule_work(struct sony_sc *sc,
 
 static void ghl_magic_poke_cb(struct urb *urb)
 {
-	if (urb) {
-		/* Free sc->ghl_cr and sc->ghl_databuf allocated in
-		 * ghl_magic_poke()
-		 */
-		kfree(urb->setup_packet);
-		kfree(urb->transfer_buffer);
-	}
+	struct sony_sc *sc = urb->context;
+
+	if (urb->status < 0)
+		hid_err(sc->hdev, "URB transfer failed : %d", urb->status);
+
+	mod_timer(&sc->ghl_poke_timer, jiffies + GHL_GUITAR_POKE_INTERVAL*HZ);
 }
 
 static void ghl_magic_poke(struct timer_list *t)
 {
+	int ret;
 	struct sony_sc *sc = from_timer(sc, t, ghl_poke_timer);
 
-	int ret;
-	unsigned int pipe;
-	struct urb *urb;
-	struct usb_device *usbdev = to_usb_device(sc->hdev->dev.parent->parent);
-	const u16 poke_size =
-		ARRAY_SIZE(ghl_ps3wiiu_magic_data);
+	ret = usb_submit_urb(sc->ghl_urb, GFP_ATOMIC);
+	if (ret < 0)
+		hid_err(sc->hdev, "usb_submit_urb failed: %d", ret);
+}
 
+static int ghl_init_urb(struct sony_sc *sc, struct usb_device *usbdev)
+{
+	struct usb_ctrlrequest *cr;
+	u16 poke_size;
+	u8 *databuf;
+	unsigned int pipe;
+
+	poke_size = ARRAY_SIZE(ghl_ps3wiiu_magic_data);
 	pipe = usb_sndctrlpipe(usbdev, 0);
 
-	if (!sc->ghl_cr) {
-		sc->ghl_cr = kzalloc(sizeof(*sc->ghl_cr), GFP_ATOMIC);
-		if (!sc->ghl_cr)
-			goto resched;
-	}
+	cr = devm_kzalloc(&sc->hdev->dev, sizeof(*cr), GFP_ATOMIC);
+	if (cr == NULL)
+		return -ENOMEM;
 
-	if (!sc->ghl_databuf) {
-		sc->ghl_databuf = kzalloc(poke_size, GFP_ATOMIC);
-		if (!sc->ghl_databuf)
-			goto resched;
-	}
+	databuf = devm_kzalloc(&sc->hdev->dev, poke_size, GFP_ATOMIC);
+	if (databuf == NULL)
+		return -ENOMEM;
 
-	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb)
-		goto resched;
-
-	sc->ghl_cr->bRequestType =
+	cr->bRequestType =
 		USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT;
-	sc->ghl_cr->bRequest = USB_REQ_SET_CONFIGURATION;
-	sc->ghl_cr->wValue = cpu_to_le16(ghl_ps3wiiu_magic_value);
-	sc->ghl_cr->wIndex = 0;
-	sc->ghl_cr->wLength = cpu_to_le16(poke_size);
-	memcpy(sc->ghl_databuf, ghl_ps3wiiu_magic_data, poke_size);
-
+	cr->bRequest = USB_REQ_SET_CONFIGURATION;
+	cr->wValue = cpu_to_le16(ghl_ps3wiiu_magic_value);
+	cr->wIndex = 0;
+	cr->wLength = cpu_to_le16(poke_size);
+	memcpy(databuf, ghl_ps3wiiu_magic_data, poke_size);
 	usb_fill_control_urb(
-		urb, usbdev, pipe,
-		(unsigned char *) sc->ghl_cr, sc->ghl_databuf,
-		poke_size, ghl_magic_poke_cb, NULL);
-	ret = usb_submit_urb(urb, GFP_ATOMIC);
-	if (ret < 0) {
-		kfree(sc->ghl_databuf);
-		kfree(sc->ghl_cr);
-	}
-	usb_free_urb(urb);
-
-resched:
-	/* Reschedule for next time */
-	mod_timer(&sc->ghl_poke_timer, jiffies + GHL_GUITAR_POKE_INTERVAL*HZ);
+		sc->ghl_urb, usbdev, pipe,
+		(unsigned char *) cr, databuf, poke_size,
+		ghl_magic_poke_cb, sc);
+	return 0;
 }
 
 static int guitar_mapping(struct hid_device *hdev, struct hid_input *hi,
@@ -2981,6 +2968,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	int ret;
 	unsigned long quirks = id->driver_data;
 	struct sony_sc *sc;
+	struct usb_device *usbdev;
 	unsigned int connect_mask = HID_CONNECT_DEFAULT;
 
 	if (!strcmp(hdev->name, "FutureMax Dance Mat"))
@@ -3000,6 +2988,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	sc->quirks = quirks;
 	hid_set_drvdata(hdev, sc);
 	sc->hdev = hdev;
+	usbdev = to_usb_device(sc->hdev->dev.parent->parent);
 
 	ret = hid_parse(hdev);
 	if (ret) {
@@ -3042,6 +3031,15 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	}
 
 	if (sc->quirks & GHL_GUITAR_PS3WIIU) {
+		sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC);
+		if (!sc->ghl_urb)
+			return -ENOMEM;
+		ret = ghl_init_urb(sc, usbdev);
+		if (ret) {
+			hid_err(hdev, "error preparing URB\n");
+			return ret;
+		}
+
 		timer_setup(&sc->ghl_poke_timer, ghl_magic_poke, 0);
 		mod_timer(&sc->ghl_poke_timer,
 			  jiffies + GHL_GUITAR_POKE_INTERVAL*HZ);
@@ -3054,8 +3052,10 @@ static void sony_remove(struct hid_device *hdev)
 {
 	struct sony_sc *sc = hid_get_drvdata(hdev);
 
-	if (sc->quirks & GHL_GUITAR_PS3WIIU)
+	if (sc->quirks & GHL_GUITAR_PS3WIIU) {
 		del_timer_sync(&sc->ghl_poke_timer);
+		usb_free_urb(sc->ghl_urb);
+	}
 
 	hid_hw_close(hdev);
 

From 2b2bcc76e2ffbaff7e6ec1c62cb9c10881dc70cd Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Tue, 8 Jun 2021 15:29:51 +0200
Subject: [PATCH 1747/3292] HID: surface-hid: Fix get-report request

Getting a report (e.g. feature report) from a device requires us to send
a request indicating which report we want to retrieve and then waiting
for the corresponding response containing that report. We already
provide the response structure to the request call, but the request
isn't marked as a request that expects a response. Thus the request
returns before we receive the response and the response buffer indicates
a zero length response due to that.

This essentially means that the get-report calls are broken and will
always indicate that a report of length zero has been read.

Fix this by appropriately marking the request.

Fixes: b05ff1002a5c ("HID: Add support for Surface Aggregator Module HID transport")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/surface-hid/surface_hid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/surface-hid/surface_hid.c b/drivers/hid/surface-hid/surface_hid.c
index 3477b31611ae..a3a70e4f3f6c 100644
--- a/drivers/hid/surface-hid/surface_hid.c
+++ b/drivers/hid/surface-hid/surface_hid.c
@@ -143,7 +143,7 @@ static int ssam_hid_get_raw_report(struct surface_hid_device *shid, u8 rprt_id,
 	rqst.target_id = shid->uid.target;
 	rqst.instance_id = shid->uid.instance;
 	rqst.command_id = SURFACE_HID_CID_GET_FEATURE_REPORT;
-	rqst.flags = 0;
+	rqst.flags = SSAM_REQUEST_HAS_RESPONSE;
 	rqst.length = sizeof(rprt_id);
 	rqst.payload = &rprt_id;
 

From 5615eb58b2384bc1d6461c75b8bf77ad96f7842d Mon Sep 17 00:00:00 2001
From: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Date: Wed, 28 Apr 2021 21:37:47 +0530
Subject: [PATCH 1748/3292] ssb: gpio: Fix alignment of comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The closing */ has been shifted to a new line
This is done to maintain code uniformity.

Acked-by: Michael Büsch <m@bues.ch>
Signed-off-by: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210428160747.qy23g6zpmheiacpl@kewl-virtual-machine
---
 drivers/ssb/driver_gpio.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index 66a76fd83248..2de3896489c8 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -231,7 +231,8 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus)
 	chip->ngpio		= 16;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
-	 * a random base number. */
+	 * a random base number.
+	 */
 	if (bus->bustype == SSB_BUSTYPE_SSB)
 		chip->base		= 0;
 	else
@@ -424,7 +425,8 @@ static int ssb_gpio_extif_init(struct ssb_bus *bus)
 	chip->ngpio		= 5;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
-	 * a random base number. */
+	 * a random base number.
+	 */
 	if (bus->bustype == SSB_BUSTYPE_SSB)
 		chip->base		= 0;
 	else

From 2a3d830fa8f9a6148473367085a9eafd665fb20f Mon Sep 17 00:00:00 2001
From: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Date: Wed, 28 Apr 2021 21:48:36 +0530
Subject: [PATCH 1749/3292] ssb: pcicore: Fix indentation of comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shifted the closing */ to the next line
This is done to maintain code uniformity.

Acked-by: Michael Büsch <m@bues.ch>
Signed-off-by: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210428161836.sdrxzcrfiekloucz@kewl-virtual-machine
---
 drivers/ssb/driver_pcicore.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index c1186415896b..d11b4242b6d2 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -55,7 +55,8 @@ void pcicore_write16(struct ssb_pcicore *pc, u16 offset, u16 value)
 #include <asm/paccess.h>
 /* Probe a 32bit value on the bus and catch bus exceptions.
  * Returns nonzero on a bus exception.
- * This is MIPS specific */
+ * This is MIPS specific
+ */
 #define mips_busprobe32(val, addr)	get_dbe((val), ((u32 *)(addr)))
 
 /* Assume one-hot slot wiring */
@@ -255,7 +256,8 @@ static struct pci_controller ssb_pcicore_controller = {
 };
 
 /* This function is called when doing a pci_enable_device().
- * We must first check if the device is a device on the PCI-core bridge. */
+ * We must first check if the device is a device on the PCI-core bridge.
+ */
 int ssb_pcicore_plat_dev_init(struct pci_dev *d)
 {
 	if (d->bus->ops != &ssb_pcicore_pciops) {
@@ -381,11 +383,13 @@ static void ssb_pcicore_init_hostmode(struct ssb_pcicore *pc)
 
 	/* Ok, ready to run, register it to the system.
 	 * The following needs change, if we want to port hostmode
-	 * to non-MIPS platform. */
+	 * to non-MIPS platform.
+	 */
 	ssb_pcicore_controller.io_map_base = (unsigned long)ioremap(SSB_PCI_MEM, 0x04000000);
 	set_io_port_base(ssb_pcicore_controller.io_map_base);
 	/* Give some time to the PCI controller to configure itself with the new
-	 * values. Not waiting at this point causes crashes of the machine. */
+	 * values. Not waiting at this point causes crashes of the machine.
+	 */
 	mdelay(10);
 	register_pci_controller(&ssb_pcicore_controller);
 }
@@ -405,7 +409,8 @@ static int pcicore_is_in_hostmode(struct ssb_pcicore *pc)
 		return 0;
 
 	/* The 200-pin BCM4712 package does not bond out PCI. Even when
-	 * PCI is bonded out, some boards may leave the pins floating. */
+	 * PCI is bonded out, some boards may leave the pins floating.
+	 */
 	if (bus->chip_id == 0x4712) {
 		if (bus->chip_package == SSB_CHIPPACK_BCM4712S)
 			return 0;
@@ -685,7 +690,8 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc,
 	if (dev->bus->bustype != SSB_BUSTYPE_PCI) {
 		/* This SSB device is not on a PCI host-bus. So the IRQs are
 		 * not routed through the PCI core.
-		 * So we must not enable routing through the PCI core. */
+		 * So we must not enable routing through the PCI core.
+		 */
 		goto out;
 	}
 

From f30282129a4d47bf44805c712be1ac4f89eb8783 Mon Sep 17 00:00:00 2001
From: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Date: Wed, 28 Apr 2021 21:59:07 +0530
Subject: [PATCH 1750/3292] ssb: Fix indentation of comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shifted the closing */ to the next line
This is done to maintain code uniformity.

Acked-by: Michael Büsch <m@bues.ch>
Signed-off-by: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210428162907.bn5q3oh3sji6wlh4@kewl-virtual-machine
---
 drivers/ssb/main.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 0a26984acb2c..0e180c36daa4 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -37,7 +37,8 @@ static LIST_HEAD(buses);
 /* Software ID counter */
 static unsigned int next_busnumber;
 /* buses_mutes locks the two buslists and the next_busnumber.
- * Don't lock this directly, but use ssb_buses_[un]lock() below. */
+ * Don't lock this directly, but use ssb_buses_[un]lock() below.
+ */
 static DEFINE_MUTEX(buses_mutex);
 
 /* There are differences in the codeflow, if the bus is
@@ -45,7 +46,8 @@ static DEFINE_MUTEX(buses_mutex);
  * are not available early. This is a mechanism to delay
  * these initializations to after early boot has finished.
  * It's also used to avoid mutex locking, as that's not
- * available and needed early. */
+ * available and needed early.
+ */
 static bool ssb_is_early_boot = 1;
 
 static void ssb_buses_lock(void);
@@ -161,7 +163,8 @@ int ssb_bus_resume(struct ssb_bus *bus)
 	int err;
 
 	/* Reset HW state information in memory, so that HW is
-	 * completely reinitialized. */
+	 * completely reinitialized.
+	 */
 	bus->mapped_device = NULL;
 #ifdef CONFIG_SSB_DRIVER_PCICORE
 	bus->pcicore.setup_done = 0;
@@ -467,7 +470,8 @@ static int ssb_devices_register(struct ssb_bus *bus)
 		sdev = &(bus->devices[i]);
 
 		/* We don't register SSB-system devices to the kernel,
-		 * as the drivers for them are built into SSB. */
+		 * as the drivers for them are built into SSB.
+		 */
 		switch (sdev->id.coreid) {
 		case SSB_DEV_CHIPCOMMON:
 		case SSB_DEV_PCI:
@@ -521,7 +525,8 @@ static int ssb_devices_register(struct ssb_bus *bus)
 		if (err) {
 			pr_err("Could not register %s\n", dev_name(dev));
 			/* Set dev to NULL to not unregister
-			 * dev on error unwinding. */
+			 * dev on error unwinding.
+			 */
 			sdev->dev = NULL;
 			put_device(dev);
 			goto error;
@@ -667,7 +672,8 @@ ssb_bus_register(struct ssb_bus *bus,
 	ssb_bus_may_powerdown(bus);
 
 	/* Queue it for attach.
-	 * See the comment at the ssb_is_early_boot definition. */
+	 * See the comment at the ssb_is_early_boot definition.
+	 */
 	list_add_tail(&bus->list, &attach_queue);
 	if (!ssb_is_early_boot) {
 		/* This is not early boot, so we must attach the bus now */
@@ -1007,7 +1013,8 @@ static void ssb_flush_tmslow(struct ssb_device *dev)
 	 * a machine check exception otherwise.
 	 * Do this by reading the register back to commit the
 	 * PCI write and delay an additional usec for the device
-	 * to react to the change. */
+	 * to react to the change.
+	 */
 	ssb_read32(dev, SSB_TMSLOW);
 	udelay(1);
 }
@@ -1044,7 +1051,8 @@ void ssb_device_enable(struct ssb_device *dev, u32 core_specific_flags)
 EXPORT_SYMBOL(ssb_device_enable);
 
 /* Wait for bitmask in a register to get set or cleared.
- * timeout is in units of ten-microseconds */
+ * timeout is in units of ten-microseconds
+ */
 static int ssb_wait_bits(struct ssb_device *dev, u16 reg, u32 bitmask,
 			 int timeout, int set)
 {
@@ -1153,7 +1161,8 @@ int ssb_bus_may_powerdown(struct ssb_bus *bus)
 
 	/* On buses where more than one core may be working
 	 * at a time, we must not powerdown stuff if there are
-	 * still cores that may want to run. */
+	 * still cores that may want to run.
+	 */
 	if (bus->bustype == SSB_BUSTYPE_SSB)
 		goto out;
 
@@ -1322,7 +1331,8 @@ out:
 }
 /* ssb must be initialized after PCI but before the ssb drivers.
  * That means we must use some initcall between subsys_initcall
- * and device_initcall. */
+ * and device_initcall.
+ */
 fs_initcall(ssb_modinit);
 
 static void __exit ssb_modexit(void)

From 7557dfde1bd1251793fade20a52014f1105c1012 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Thu, 29 Apr 2021 18:47:10 +0800
Subject: [PATCH 1751/3292] ssb: Remove redundant assignment to err
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Variable 'err' is set to zero but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Clean up the following clang-analyzer warning:

drivers/ssb/main.c:1306:3: warning: Value stored to 'err' is never read
[clang-analyzer-deadcode.DeadStores]
drivers/ssb/main.c:1312:3: warning: Value stored to 'err' is never read
[clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Acked-by: Michael Büsch <m@bues.ch>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1619693230-108804-1-git-send-email-yang.lee@linux.alibaba.com
---
 drivers/ssb/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 0e180c36daa4..620cc9dccad7 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -1312,13 +1312,11 @@ static int __init ssb_modinit(void)
 	if (err) {
 		pr_err("Broadcom 43xx PCI-SSB-bridge initialization failed\n");
 		/* don't fail SSB init because of this */
-		err = 0;
 	}
 	err = ssb_host_pcmcia_init();
 	if (err) {
 		pr_err("PCMCIA host initialization failed\n");
 		/* don't fail SSB init because of this */
-		err = 0;
 	}
 	err = ssb_gige_init();
 	if (err) {

From 77a0989baa427dbd242c5784d05a53ca3d197d43 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 15 May 2021 15:29:49 +0800
Subject: [PATCH 1752/3292] ssb: Fix error return code in ssb_bus_scan()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix to return -EINVAL from the error handling case instead of 0, as done
elsewhere in this function.

Fixes: 61e115a56d1a ("[SSB]: add Sonics Silicon Backplane bus support")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Acked-by: Michael Büsch <m@bues.ch>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210515072949.7151-1-thunder.leizhen@huawei.com
---
 drivers/ssb/scan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index f49ab1aa2149..4161e5d1f276 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -325,6 +325,7 @@ int ssb_bus_scan(struct ssb_bus *bus,
 	if (bus->nr_devices > ARRAY_SIZE(bus->devices)) {
 		pr_err("More than %d ssb cores found (%d)\n",
 		       SSB_MAX_NR_CORES, bus->nr_devices);
+		err = -EINVAL;
 		goto err_unmap;
 	}
 	if (bus->bustype == SSB_BUSTYPE_SSB) {

From 47ec636f7a25aa2549e198c48ecb6b1c25d05456 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20B=C3=BCsch?= <m@bues.ch>
Date: Sat, 15 May 2021 21:02:52 +0200
Subject: [PATCH 1753/3292] ssb: sdio: Don't overwrite const buffer if
 block_write fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It doesn't make sense to clobber the const driver-side buffer, if a
write-to-device attempt failed. All other SSB variants (PCI, PCMCIA and SoC)
also don't corrupt the buffer on any failure in block_write.
Therefore, remove this memset from the SDIO variant.

Signed-off-by: Michael Büsch <m@bues.ch>
Cc: stable@vger.kernel.org
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210515210252.318be2ba@wiggum
---
 drivers/ssb/sdio.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/ssb/sdio.c b/drivers/ssb/sdio.c
index 7fe0afb42234..66c5c2169704 100644
--- a/drivers/ssb/sdio.c
+++ b/drivers/ssb/sdio.c
@@ -411,7 +411,6 @@ static void ssb_sdio_block_write(struct ssb_device *dev, const void *buffer,
 	sdio_claim_host(bus->host_sdio);
 	if (unlikely(ssb_sdio_switch_core(bus, dev))) {
 		error = -EIO;
-		memset((void *)buffer, 0xff, count);
 		goto err_out;
 	}
 	offset |= bus->sdio_sbaddr & 0xffff;

From 233bc283728241aa522fd2889649261b742cee5a Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Tue, 18 May 2021 10:52:32 +0800
Subject: [PATCH 1754/3292] ssb: remove unreachable code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The return value of ssb_bus_unregister can only be 0 or -1, so this
condition if (err == -EBUSY) will not hold, so delete it.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Acked-by: Michael Büsch <m@bues.ch>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1621306352-3632-1-git-send-email-tiantao6@hisilicon.com
---
 drivers/ssb/main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 620cc9dccad7..3a29b5570f9f 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -434,9 +434,7 @@ void ssb_bus_unregister(struct ssb_bus *bus)
 	int err;
 
 	err = ssb_gpio_unregister(bus);
-	if (err == -EBUSY)
-		pr_debug("Some GPIOs are still in use\n");
-	else if (err)
+	if (err)
 		pr_debug("Can not unregister GPIO driver: %i\n", err);
 
 	ssb_buses_lock();

From 573f1af86891d4ecda9f7f1073dccec28c469387 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Thu, 3 Jun 2021 16:22:18 +0800
Subject: [PATCH 1755/3292] ssb: use DEVICE_ATTR_ADMIN_RW() helper macro

Use DEVICE_ATTR_ADMIN_RW() helper macro instead of DEVICE_ATTR(), making
it simpler and easier to read.

Because the read and write function names of the sysfs attribute have been
normalized, there is a natural association.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210603082218.11718-2-thunder.leizhen@huawei.com
---
 drivers/ssb/pci.c    | 16 +++++++---------
 drivers/ssb/pcmcia.c | 16 +++++++---------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index dac54041ad8d..148bcb99c212 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -1117,9 +1117,9 @@ const struct ssb_bus_ops ssb_pci_ops = {
 #endif
 };
 
-static ssize_t ssb_pci_attr_sprom_show(struct device *pcidev,
-				       struct device_attribute *attr,
-				       char *buf)
+static ssize_t ssb_sprom_show(struct device *pcidev,
+			      struct device_attribute *attr,
+			      char *buf)
 {
 	struct pci_dev *pdev = container_of(pcidev, struct pci_dev, dev);
 	struct ssb_bus *bus;
@@ -1131,9 +1131,9 @@ static ssize_t ssb_pci_attr_sprom_show(struct device *pcidev,
 	return ssb_attr_sprom_show(bus, buf, sprom_do_read);
 }
 
-static ssize_t ssb_pci_attr_sprom_store(struct device *pcidev,
-					struct device_attribute *attr,
-					const char *buf, size_t count)
+static ssize_t ssb_sprom_store(struct device *pcidev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
 {
 	struct pci_dev *pdev = container_of(pcidev, struct pci_dev, dev);
 	struct ssb_bus *bus;
@@ -1146,9 +1146,7 @@ static ssize_t ssb_pci_attr_sprom_store(struct device *pcidev,
 				    sprom_check_crc, sprom_do_write);
 }
 
-static DEVICE_ATTR(ssb_sprom, 0600,
-		   ssb_pci_attr_sprom_show,
-		   ssb_pci_attr_sprom_store);
+static DEVICE_ATTR_ADMIN_RW(ssb_sprom);
 
 void ssb_pci_exit(struct ssb_bus *bus)
 {
diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c
index d7d730c245c5..45502098e0c7 100644
--- a/drivers/ssb/pcmcia.c
+++ b/drivers/ssb/pcmcia.c
@@ -723,9 +723,9 @@ int ssb_pcmcia_get_invariants(struct ssb_bus *bus,
 	return -ENODEV;
 }
 
-static ssize_t ssb_pcmcia_attr_sprom_show(struct device *pcmciadev,
-					  struct device_attribute *attr,
-					  char *buf)
+static ssize_t ssb_sprom_show(struct device *pcmciadev,
+			      struct device_attribute *attr,
+			      char *buf)
 {
 	struct pcmcia_device *pdev =
 		container_of(pcmciadev, struct pcmcia_device, dev);
@@ -739,9 +739,9 @@ static ssize_t ssb_pcmcia_attr_sprom_show(struct device *pcmciadev,
 				   ssb_pcmcia_sprom_read_all);
 }
 
-static ssize_t ssb_pcmcia_attr_sprom_store(struct device *pcmciadev,
-					   struct device_attribute *attr,
-					   const char *buf, size_t count)
+static ssize_t ssb_sprom_store(struct device *pcmciadev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
 {
 	struct pcmcia_device *pdev =
 		container_of(pcmciadev, struct pcmcia_device, dev);
@@ -756,9 +756,7 @@ static ssize_t ssb_pcmcia_attr_sprom_store(struct device *pcmciadev,
 				    ssb_pcmcia_sprom_write_all);
 }
 
-static DEVICE_ATTR(ssb_sprom, 0600,
-		   ssb_pcmcia_attr_sprom_show,
-		   ssb_pcmcia_attr_sprom_store);
+static DEVICE_ATTR_ADMIN_RW(ssb_sprom);
 
 static int ssb_pcmcia_cor_setup(struct ssb_bus *bus, u8 cor)
 {

From b0b524f079a23e440dd22b04e369368dde847533 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Sun, 25 Apr 2021 19:02:00 +0800
Subject: [PATCH 1756/3292] brcmfmac: use ISO3166 country code and 0 rev as
 fallback

Instead of aborting country code setup in firmware, use ISO3166 country
code and 0 rev as fallback, when country_codes mapping table is not
configured.  This fallback saves the country_codes table setup for recent
brcmfmac chipsets/firmwares, which just use ISO3166 code and require no
revision number.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210425110200.3050-1-shawn.guo@linaro.org
---
 .../broadcom/brcm80211/brcmfmac/cfg80211.c      | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index f4405d7861b6..6cb09c7c37b6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -7442,18 +7442,23 @@ static s32 brcmf_translate_country_code(struct brcmf_pub *drvr, char alpha2[2],
 	s32 found_index;
 	int i;
 
-	country_codes = drvr->settings->country_codes;
-	if (!country_codes) {
-		brcmf_dbg(TRACE, "No country codes configured for device\n");
-		return -EINVAL;
-	}
-
 	if ((alpha2[0] == ccreq->country_abbrev[0]) &&
 	    (alpha2[1] == ccreq->country_abbrev[1])) {
 		brcmf_dbg(TRACE, "Country code already set\n");
 		return -EAGAIN;
 	}
 
+	country_codes = drvr->settings->country_codes;
+	if (!country_codes) {
+		brcmf_dbg(TRACE, "No country codes configured for device, using ISO3166 code and 0 rev\n");
+		memset(ccreq, 0, sizeof(*ccreq));
+		ccreq->country_abbrev[0] = alpha2[0];
+		ccreq->country_abbrev[1] = alpha2[1];
+		ccreq->ccode[0] = alpha2[0];
+		ccreq->ccode[1] = alpha2[1];
+		return 0;
+	}
+
 	found_index = -1;
 	for (i = 0; i < country_codes->table_size; i++) {
 		cc = &country_codes->table[i];

From feb45643762172110cb3a44f99dd54304f33b711 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= <ALSI@bang-olufsen.dk>
Date: Thu, 6 May 2021 13:20:12 +0000
Subject: [PATCH 1757/3292] brcmfmac: fix setting of station info chains
 bitmask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sinfo->chains field is a bitmask for filled values in chain_signal
and chain_signal_avg, not a count. Treat it as such so that the driver
can properly report per-chain RSSI information.

Before (MIMO mode):

  $ iw dev wlan0 station dump
      ...
      signal: -51 [-51] dBm

After (MIMO mode):

  $ iw dev wlan0 station dump
      ...
      signal: -53 [-53, -54] dBm

Fixes: cae355dc90db ("brcmfmac: Add RSSI information to get_station.")
Signed-off-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210506132010.3964484-1-alsi@bang-olufsen.dk
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 6cb09c7c37b6..77458095bbd4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -2838,6 +2838,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 		count_rssi = 0;
 		for (i = 0; i < BRCMF_ANT_MAX; i++) {
 			if (sta_info_le.rssi[i]) {
+				sinfo->chains |= BIT(count_rssi);
 				sinfo->chain_signal_avg[count_rssi] =
 					sta_info_le.rssi[i];
 				sinfo->chain_signal[count_rssi] =
@@ -2848,8 +2849,6 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 		}
 		if (count_rssi) {
 			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
-			sinfo->chains = count_rssi;
-
 			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
 			total_rssi /= count_rssi;
 			sinfo->signal = total_rssi;

From 9a1590934d9a02e570636432b93052c0c035f31f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= <ALSI@bang-olufsen.dk>
Date: Thu, 6 May 2021 13:20:12 +0000
Subject: [PATCH 1758/3292] brcmfmac: correctly report average RSSI in station
 info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The rx_lastpkt_rssi field provided by the firmware is suitable for
NL80211_STA_INFO_{SIGNAL,CHAIN_SIGNAL}, while the rssi field is an
average. Fix up the assignments and set the correct STA_INFO bits. This
lets userspace know that the average RSSI is part of the station info.

Fixes: cae355dc90db ("brcmfmac: Add RSSI information to get_station.")
Signed-off-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210506132010.3964484-2-alsi@bang-olufsen.dk
---
 .../broadcom/brcm80211/brcmfmac/cfg80211.c    | 36 ++++++++++---------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 77458095bbd4..65fb038d88e7 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -2767,8 +2767,9 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 	struct brcmf_sta_info_le sta_info_le;
 	u32 sta_flags;
 	u32 is_tdls_peer;
-	s32 total_rssi;
-	s32 count_rssi;
+	s32 total_rssi_avg = 0;
+	s32 total_rssi = 0;
+	s32 count_rssi = 0;
 	int rssi;
 	u32 i;
 
@@ -2834,24 +2835,27 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES);
 			sinfo->rx_bytes = le64_to_cpu(sta_info_le.rx_tot_bytes);
 		}
-		total_rssi = 0;
-		count_rssi = 0;
 		for (i = 0; i < BRCMF_ANT_MAX; i++) {
-			if (sta_info_le.rssi[i]) {
-				sinfo->chains |= BIT(count_rssi);
-				sinfo->chain_signal_avg[count_rssi] =
-					sta_info_le.rssi[i];
-				sinfo->chain_signal[count_rssi] =
-					sta_info_le.rssi[i];
-				total_rssi += sta_info_le.rssi[i];
-				count_rssi++;
-			}
+			if (sta_info_le.rssi[i] == 0 ||
+			    sta_info_le.rx_lastpkt_rssi[i] == 0)
+				continue;
+			sinfo->chains |= BIT(count_rssi);
+			sinfo->chain_signal[count_rssi] =
+				sta_info_le.rx_lastpkt_rssi[i];
+			sinfo->chain_signal_avg[count_rssi] =
+				sta_info_le.rssi[i];
+			total_rssi += sta_info_le.rx_lastpkt_rssi[i];
+			total_rssi_avg += sta_info_le.rssi[i];
+			count_rssi++;
 		}
 		if (count_rssi) {
-			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
 			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
-			total_rssi /= count_rssi;
-			sinfo->signal = total_rssi;
+			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
+			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
+			sinfo->filled |=
+				BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
+			sinfo->signal = total_rssi / count_rssi;
+			sinfo->signal_avg = total_rssi_avg / count_rssi;
 		} else if (test_bit(BRCMF_VIF_STATUS_CONNECTED,
 			&ifp->vif->sme_state)) {
 			memset(&scb_val, 0, sizeof(scb_val));

From ea3f903caea08bbda8a5d4e86d1d24f50af40b5e Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:34 +0800
Subject: [PATCH 1759/3292] brcmfmac: Demote non-compliant kernel-doc headers

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c:2040: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c:1295: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Cc: Franky Lin <franky.lin@broadcom.com>
Cc: Hante Meuleman <hante.meuleman@broadcom.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-5-shenyang39@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c  | 2 +-
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 34cd8a7401fe..9ac0d8c73d5a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2037,7 +2037,7 @@ static void brcmf_p2p_get_current_chanspec(struct brcmf_p2p_info *p2p,
 }
 
 /**
- * Change a P2P Role.
+ * brcmf_p2p_ifchange - Change a P2P Role.
  * @cfg: driver private data for cfg80211 interface.
  * @if_type: interface type.
  * Returns 0 if success.
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 16ed325795a8..d1dda851e176 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -1291,7 +1291,7 @@ static void brcmf_sdio_free_glom(struct brcmf_sdio *bus)
 	}
 }
 
-/**
+/*
  * brcmfmac sdio bus specific header
  * This is the lowest layer header wrapped on the packets transmitted between
  * host and WiFi dongle which contains information needed for SDIO core and

From 7ea7a1e05c7ff5ffc9f9ec1f0849f6ceb7fcd57c Mon Sep 17 00:00:00 2001
From: Tong Tiangen <tongtiangen@huawei.com>
Date: Tue, 1 Jun 2021 18:01:28 +0800
Subject: [PATCH 1760/3292] brcmfmac: Fix a double-free in brcmf_sdio_bus_reset

brcmf_sdiod_remove has been called inside brcmf_sdiod_probe when fails,
so there's no need to call another one. Otherwise, sdiodev->freezer
would be double freed.

Fixes: 7836102a750a ("brcmfmac: reset SDIO bus on a firmware crash")
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Reviewed-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210601100128.69561-1-tongtiangen@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index d1dda851e176..27eb83ee2dc9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4162,7 +4162,6 @@ static int brcmf_sdio_bus_reset(struct device *dev)
 	if (ret) {
 		brcmf_err("Failed to probe after sdio device reset: ret %d\n",
 			  ret);
-		brcmf_sdiod_remove(sdiodev);
 	}
 
 	return ret;

From 4a26aafe4886a4ec9965171c280ce16df30dc362 Mon Sep 17 00:00:00 2001
From: Matthias Brugger <mbrugger@suse.com>
Date: Wed, 2 Jun 2021 16:43:05 +0200
Subject: [PATCH 1761/3292] brcmfmac: Delete second brcm folder hierarchy

BRCMF_FW_DEFAULT_PATH already defines the brcm folder, delete the second
folder to match with Linux firmware repository layout.

Fixes: 75729e110e68 ("brcmfmac: expose firmware config files through modinfo")
Signed-off-by: Matthias Brugger <mbrugger@suse.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210602144305.4481-1-matthias.bgg@kernel.org
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 27eb83ee2dc9..bb77a5987faf 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -626,8 +626,8 @@ BRCMF_FW_DEF(4373, "brcmfmac4373-sdio");
 BRCMF_FW_DEF(43012, "brcmfmac43012-sdio");
 
 /* firmware config files */
-MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcm/brcmfmac*-sdio.*.txt");
-MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcm/brcmfmac*-pcie.*.txt");
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.txt");
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt");
 
 static const struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
 	BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),

From 885f256f61f958d494ffdf5084a292808f08daee Mon Sep 17 00:00:00 2001
From: Matthias Brugger <mbrugger@suse.com>
Date: Mon, 7 Jun 2021 12:34:33 +0200
Subject: [PATCH 1762/3292] brcmfmac: Add clm_blob firmware files to modinfo

Cypress Wi-Fi chipsets include information regarding regulatory
constraints. These are provided to the driver through "Country Local
Matrix" (CLM) blobs. Files present in Linux firmware repository are
on a generic world-wide safe version with conservative power
settings which is designed to comply with regulatory but may not
provide best performance on all boards. Never the less, a better
functionality can be expected with the file present, so add it to the
modinfo of the driver.

Signed-off-by: Matthias Brugger <mbrugger@suse.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210607103433.21022-1-matthias.bgg@kernel.org
---
 .../wireless/broadcom/brcm80211/brcmfmac/firmware.h  |  7 +++++++
 .../net/wireless/broadcom/brcm80211/brcmfmac/pcie.c  |  4 ++--
 .../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c  | 12 ++++++------
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
index 46c66415b4a6..e290dec9c53d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
@@ -32,6 +32,13 @@ static const char BRCM_ ## fw_name ## _FIRMWARE_BASENAME[] = \
 	BRCMF_FW_DEFAULT_PATH fw_base; \
 MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH fw_base ".bin")
 
+/* Firmware and Country Local Matrix files */
+#define BRCMF_FW_CLM_DEF(fw_name, fw_base) \
+static const char BRCM_ ## fw_name ## _FIRMWARE_BASENAME[] = \
+	BRCMF_FW_DEFAULT_PATH fw_base; \
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH fw_base ".bin"); \
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH fw_base ".clm_blob")
+
 #define BRCMF_FW_ENTRY(chipid, mask, name) \
 	{ chipid, mask, BRCM_ ## name ## _FIRMWARE_BASENAME }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index 143a705b5cb3..c49dd0c36ae4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -48,8 +48,8 @@ enum brcmf_pcie_state {
 BRCMF_FW_DEF(43602, "brcmfmac43602-pcie");
 BRCMF_FW_DEF(4350, "brcmfmac4350-pcie");
 BRCMF_FW_DEF(4350C, "brcmfmac4350c2-pcie");
-BRCMF_FW_DEF(4356, "brcmfmac4356-pcie");
-BRCMF_FW_DEF(43570, "brcmfmac43570-pcie");
+BRCMF_FW_CLM_DEF(4356, "brcmfmac4356-pcie");
+BRCMF_FW_CLM_DEF(43570, "brcmfmac43570-pcie");
 BRCMF_FW_DEF(4358, "brcmfmac4358-pcie");
 BRCMF_FW_DEF(4359, "brcmfmac4359-pcie");
 BRCMF_FW_DEF(4364, "brcmfmac4364-pcie");
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index bb77a5987faf..97ee9e2e2e35 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -616,14 +616,14 @@ BRCMF_FW_DEF(43362, "brcmfmac43362-sdio");
 BRCMF_FW_DEF(4339, "brcmfmac4339-sdio");
 BRCMF_FW_DEF(43430A0, "brcmfmac43430a0-sdio");
 /* Note the names are not postfixed with a1 for backward compatibility */
-BRCMF_FW_DEF(43430A1, "brcmfmac43430-sdio");
-BRCMF_FW_DEF(43455, "brcmfmac43455-sdio");
+BRCMF_FW_CLM_DEF(43430A1, "brcmfmac43430-sdio");
+BRCMF_FW_CLM_DEF(43455, "brcmfmac43455-sdio");
 BRCMF_FW_DEF(43456, "brcmfmac43456-sdio");
-BRCMF_FW_DEF(4354, "brcmfmac4354-sdio");
-BRCMF_FW_DEF(4356, "brcmfmac4356-sdio");
+BRCMF_FW_CLM_DEF(4354, "brcmfmac4354-sdio");
+BRCMF_FW_CLM_DEF(4356, "brcmfmac4356-sdio");
 BRCMF_FW_DEF(4359, "brcmfmac4359-sdio");
-BRCMF_FW_DEF(4373, "brcmfmac4373-sdio");
-BRCMF_FW_DEF(43012, "brcmfmac43012-sdio");
+BRCMF_FW_CLM_DEF(4373, "brcmfmac4373-sdio");
+BRCMF_FW_CLM_DEF(43012, "brcmfmac43012-sdio");
 
 /* firmware config files */
 MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.txt");

From c0277e25d28fc534c09e140c19d5b85f9e7f86fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Tue, 11 May 2021 09:02:58 +0200
Subject: [PATCH 1763/3292] brcmsmac: improve readability on addresses copy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A static analyzer identified as a potential bug the copy of
12 bytes from a 6 bytes array to a 6 bytes array. Both
arrays are 6 bytes addresses.

Although not being a real bug, it is not immediately clear
why is done this way: next 6 bytes address, contiguous to
the first one, must also be copied to next contiguous 6 bytes
address of the destination.

Copying each one separately will make both static analyzers
and reviewers happier.

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511070257.7843-1-ihuguet@redhat.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
index 763e0ec583d7..26de1bd7fee9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
@@ -6607,7 +6607,8 @@ brcms_c_d11hdrs_mac80211(struct brcms_c_info *wlc, struct ieee80211_hw *hw,
 			rts->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
 							 IEEE80211_STYPE_RTS);
 
-			memcpy(&rts->ra, &h->addr1, 2 * ETH_ALEN);
+			memcpy(&rts->ra, &h->addr1, ETH_ALEN);
+			memcpy(&rts->ta, &h->addr2, ETH_ALEN);
 		}
 
 		/* mainrate

From 34fe7038a3b33b4b50b1e948e005bf3db20b7a54 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 11 May 2021 16:56:29 -0700
Subject: [PATCH 1764/3292] brcmsmac: Drop unnecessary NULL check after
 container_of

The parameter passed to ai_detach() is guaranteed to never be NULL
because it is checked by the caller. Consequently, the result of
container_of() on it is also never NULL, and a NULL check on it
is unnecessary. Even without that, the NULL check would still be
unnecessary because the subsequent kfree() can handle NULL arguments.
On top of all that, it is misleading to check the result of container_of()
against NULL because the position of the contained element could change,
which would make the check invalid. Remove it.

This change was made automatically with the following Coccinelle script.

@@
type t;
identifier v;
statement s;
@@

<+...
(
  t v = container_of(...);
|
  v = container_of(...);
)
  ...
  when != v
- if (\( !v \| v == NULL \) ) s
...+>

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511235629.1686038-1-linux@roeck-us.net
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c
index 53365977bfd6..2084b506a450 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/aiutils.c
@@ -531,9 +531,6 @@ void ai_detach(struct si_pub *sih)
 
 	sii = container_of(sih, struct si_info, pub);
 
-	if (sii == NULL)
-		return;
-
 	kfree(sii);
 }
 

From 9a25344d5177c2b9285532236dc3d10a091f39a8 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 12 May 2021 22:58:30 +0200
Subject: [PATCH 1765/3292] brcmsmac: mac80211_if: Fix a resource leak in an
 error handling path

If 'brcms_attach()' fails, we must undo the previous 'ieee80211_alloc_hw()'
as already done in the remove function.

Fixes: 5b435de0d786 ("net: wireless: add brcm80211 drivers")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/8fbc171a1a493b38db5a6f0873c6021fca026a6c.1620852921.git.christophe.jaillet@wanadoo.fr
---
 .../wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
index 39f3af2d0439..eadac0f5590f 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
@@ -1220,6 +1220,7 @@ static int brcms_bcma_probe(struct bcma_device *pdev)
 {
 	struct brcms_info *wl;
 	struct ieee80211_hw *hw;
+	int ret;
 
 	dev_info(&pdev->dev, "mfg %x core %x rev %d class %d irq %d\n",
 		 pdev->id.manuf, pdev->id.id, pdev->id.rev, pdev->id.class,
@@ -1244,11 +1245,16 @@ static int brcms_bcma_probe(struct bcma_device *pdev)
 	wl = brcms_attach(pdev);
 	if (!wl) {
 		pr_err("%s: brcms_attach failed!\n", __func__);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_free_ieee80211;
 	}
 	brcms_led_register(wl);
 
 	return 0;
+
+err_free_ieee80211:
+	ieee80211_free_hw(hw);
+	return ret;
 }
 
 static int brcms_suspend(struct bcma_device *pdev)

From 5a8e5dae2a22d1580f6d72be54bb57ab29305cc6 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 24 May 2021 16:20:43 +0800
Subject: [PATCH 1766/3292] brcmsmac: Remove the repeated declaration

Function 'brcms_c_stf_phy_txant_upd' are declared twice, remove
the repeated declaration.

Cc: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1621844443-38290-1-git-send-email-zhangshaokun@hisilicon.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h
index aa4ab53bf634..af86c7fc5112 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h
@@ -29,7 +29,6 @@ void brcms_c_stf_ss_update(struct brcms_c_info *wlc, struct brcms_band *band);
 void brcms_c_stf_phy_txant_upd(struct brcms_c_info *wlc);
 int brcms_c_stf_txchain_set(struct brcms_c_info *wlc, s32 int_val, bool force);
 bool brcms_c_stf_stbc_rx_set(struct brcms_c_info *wlc, s32 int_val);
-void brcms_c_stf_phy_txant_upd(struct brcms_c_info *wlc);
 void brcms_c_stf_phy_chain_calc(struct brcms_c_info *wlc);
 u16 brcms_c_stf_phytxchain_sel(struct brcms_c_info *wlc, u32 rspec);
 u16 brcms_c_stf_d11hdrs_phyctl_txant(struct brcms_c_info *wlc, u32 rspec);

From 4e164f8716853b879e2b1a21a12d54c57f11372e Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 12 Jun 2021 08:05:00 +0530
Subject: [PATCH 1767/3292] libbpf: Remove unneeded check for flags during tc
 detach

Coverity complained about this being unreachable code. It is right
because we already enforce flags to be unset, so a check validating
the flag value is redundant.

Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210612023502.1283837-2-memxor@gmail.com
---
 tools/lib/bpf/netlink.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index d743c8721aa7..efbb50ad59d8 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -675,8 +675,6 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
 		return -EINVAL;
 	if (priority > UINT16_MAX)
 		return -EINVAL;
-	if (flags & ~BPF_TC_F_REPLACE)
-		return -EINVAL;
 	if (!flush) {
 		if (!handle || !priority)
 			return -EINVAL;

From bbf29d3a2e49e482d5267311798aec42f00e88f3 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 12 Jun 2021 08:05:01 +0530
Subject: [PATCH 1768/3292] libbpf: Set NLM_F_EXCL when creating qdisc

This got lost during the refactoring across versions. We always use
NLM_F_EXCL when creating some TC object, so reflect what the function
says and set the flag.

Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210612023502.1283837-3-memxor@gmail.com
---
 tools/lib/bpf/netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index efbb50ad59d8..cf9381f03b16 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -457,7 +457,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
 
 static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
 {
-	return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE);
+	return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
 }
 
 static int tc_qdisc_delete(struct bpf_tc_hook *hook)

From 75ac5cc2ee6b499bc0225ad67302271772929f19 Mon Sep 17 00:00:00 2001
From: Evan Benn <evanbenn@chromium.org>
Date: Wed, 12 May 2021 12:25:44 +1000
Subject: [PATCH 1769/3292] clocksource/drivers/mediatek: Ack and disable
 interrupts on suspend

Interrupts are disabled during suspend before this driver disables its
timers. ARM trusted firmware will abort suspend if the timer irq is
pending, so ack and disable the timer interrupt during suspend.

Signed-off-by: Evan Benn <evanbenn@chromium.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210512122528.v4.1.I1d9917047de06715da16e1620759f703fcfdcbcb@changeid
---
 drivers/clocksource/timer-mediatek.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/clocksource/timer-mediatek.c b/drivers/clocksource/timer-mediatek.c
index 9318edcd8963..ab63b95e414f 100644
--- a/drivers/clocksource/timer-mediatek.c
+++ b/drivers/clocksource/timer-mediatek.c
@@ -241,6 +241,28 @@ static void mtk_gpt_enable_irq(struct timer_of *to, u8 timer)
 	       timer_of_base(to) + GPT_IRQ_EN_REG);
 }
 
+static void mtk_gpt_resume(struct clock_event_device *clk)
+{
+	struct timer_of *to = to_timer_of(clk);
+
+	mtk_gpt_enable_irq(to, TIMER_CLK_EVT);
+}
+
+static void mtk_gpt_suspend(struct clock_event_device *clk)
+{
+	struct timer_of *to = to_timer_of(clk);
+
+	/* Disable all interrupts */
+	writel(0x0, timer_of_base(to) + GPT_IRQ_EN_REG);
+
+	/*
+	 * This is called with interrupts disabled,
+	 * so we need to ack any interrupt that is pending
+	 * or for example ATF will prevent a suspend from completing.
+	 */
+	writel(0x3f, timer_of_base(to) + GPT_IRQ_ACK_REG);
+}
+
 static struct timer_of to = {
 	.flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
 
@@ -286,6 +308,8 @@ static int __init mtk_gpt_init(struct device_node *node)
 	to.clkevt.set_state_oneshot = mtk_gpt_clkevt_shutdown;
 	to.clkevt.tick_resume = mtk_gpt_clkevt_shutdown;
 	to.clkevt.set_next_event = mtk_gpt_clkevt_next_event;
+	to.clkevt.suspend = mtk_gpt_suspend;
+	to.clkevt.resume = mtk_gpt_resume;
 	to.of_irq.handler = mtk_gpt_interrupt;
 
 	ret = timer_of_init(node, &to);

From 9517c577f9f722270584cfb1a7b4e1354e408658 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 15 Apr 2021 11:55:06 +0300
Subject: [PATCH 1770/3292] clocksource/drivers/timer-ti-dm: Save and restore
 timer TIOCP_CFG

As we are using cpu_pm to save and restore context, we must also save and
restore the timer sysconfig register TIOCP_CFG. This is needed because
we are not calling PM runtime functions at all with cpu_pm.

Fixes: b34677b0999a ("clocksource/drivers/timer-ti-dm: Implement cpu_pm notifier for context save and restore")
Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Adam Ford <aford173@gmail.com>
Cc: Andreas Kemnade <andreas@kemnade.info>
Cc: Lokesh Vutla <lokeshvutla@ti.com>
Cc: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210415085506.56828-1-tony@atomide.com
---
 drivers/clocksource/timer-ti-dm.c | 6 ++++++
 include/clocksource/timer-ti-dm.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 33eeabf9c3d1..e5c631f1b5cb 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -78,6 +78,9 @@ static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg,
 
 static void omap_timer_restore_context(struct omap_dm_timer *timer)
 {
+	__omap_dm_timer_write(timer, OMAP_TIMER_OCP_CFG_OFFSET,
+			      timer->context.ocp_cfg, 0);
+
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_WAKEUP_EN_REG,
 				timer->context.twer);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_COUNTER_REG,
@@ -95,6 +98,9 @@ static void omap_timer_restore_context(struct omap_dm_timer *timer)
 
 static void omap_timer_save_context(struct omap_dm_timer *timer)
 {
+	timer->context.ocp_cfg =
+		__omap_dm_timer_read(timer, OMAP_TIMER_OCP_CFG_OFFSET, 0);
+
 	timer->context.tclr =
 			omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
 	timer->context.twer =
diff --git a/include/clocksource/timer-ti-dm.h b/include/clocksource/timer-ti-dm.h
index 4c61dade8835..f6da8a132639 100644
--- a/include/clocksource/timer-ti-dm.h
+++ b/include/clocksource/timer-ti-dm.h
@@ -74,6 +74,7 @@
 #define OMAP_TIMER_ERRATA_I103_I767			0x80000000
 
 struct timer_regs {
+	u32 ocp_cfg;
 	u32 tidr;
 	u32 tier;
 	u32 twer;

From 870a6e1539829356baf70b57c933d0b309cfac21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
 <zhouyanjie@wanyeetech.com>
Date: Sat, 5 Jun 2021 00:31:45 +0800
Subject: [PATCH 1771/3292] clocksource/drivers/ingenic: Rename unreasonable
 array names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.Rename the "ingenic_ost_clk_info[]" to "x1000_ost_clk_info[]" to
  facilitate the addition of OST support for X2000 SoC in a later
  commit

2.When the OST support for X2000 SoC is added, there will be two
  compatible strings, so renaming "ingenic_ost_of_match[]" to
  "ingenic_ost_of_matches[]" is more reasonable

3.Remove the unnecessary comma in "ingenic_ost_of_matches[]" to reduce
  code size as much as possible.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/1622824306-30987-2-git-send-email-zhouyanjie@wanyeetech.com
---
 drivers/clocksource/ingenic-sysost.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/clocksource/ingenic-sysost.c b/drivers/clocksource/ingenic-sysost.c
index e77d58449005..a129840f14f9 100644
--- a/drivers/clocksource/ingenic-sysost.c
+++ b/drivers/clocksource/ingenic-sysost.c
@@ -186,7 +186,7 @@ static const struct clk_ops ingenic_ost_global_timer_ops = {
 
 static const char * const ingenic_ost_clk_parents[] = { "ext" };
 
-static const struct ingenic_ost_clk_info ingenic_ost_clk_info[] = {
+static const struct ingenic_ost_clk_info x1000_ost_clk_info[] = {
 	[OST_CLK_PERCPU_TIMER] = {
 		.init_data = {
 			.name = "percpu timer",
@@ -414,14 +414,14 @@ static const struct ingenic_soc_info x1000_soc_info = {
 	.num_channels = 2,
 };
 
-static const struct of_device_id __maybe_unused ingenic_ost_of_match[] __initconst = {
-	{ .compatible = "ingenic,x1000-ost", .data = &x1000_soc_info, },
+static const struct of_device_id __maybe_unused ingenic_ost_of_matches[] __initconst = {
+	{ .compatible = "ingenic,x1000-ost", .data = &x1000_soc_info },
 	{ /* sentinel */ }
 };
 
 static int __init ingenic_ost_probe(struct device_node *np)
 {
-	const struct of_device_id *id = of_match_node(ingenic_ost_of_match, np);
+	const struct of_device_id *id = of_match_node(ingenic_ost_of_matches, np);
 	struct ingenic_ost *ost;
 	unsigned int i;
 	int ret;
@@ -462,7 +462,7 @@ static int __init ingenic_ost_probe(struct device_node *np)
 	ost->clocks->num = ost->soc_info->num_channels;
 
 	for (i = 0; i < ost->clocks->num; i++) {
-		ret = ingenic_ost_register_clock(ost, i, &ingenic_ost_clk_info[i], ost->clocks);
+		ret = ingenic_ost_register_clock(ost, i, &x1000_ost_clk_info[i], ost->clocks);
 		if (ret) {
 			pr_crit("%s: Cannot register clock %d\n", __func__, i);
 			goto err_unregister_ost_clocks;

From 30657b8ee459f3878647d29799bd13b7cf2c95f9 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:37 +0800
Subject: [PATCH 1772/3292] libertas_tf: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/marvell/libertas_tf/if_usb.c:56: warning: expecting prototype for if_usb_wrike_bulk_callback(). Prototype was for if_usb_write_bulk_callback() instead

Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-8-shenyang39@huawei.com
---
 drivers/net/wireless/marvell/libertas_tf/if_usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
index a92916dc81a9..fe0a69e804d8 100644
--- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
@@ -48,7 +48,7 @@ static int if_usb_submit_rx_urb(struct if_usb_card *cardp);
 static int if_usb_reset_device(struct lbtf_private *priv);
 
 /**
- *  if_usb_wrike_bulk_callback -  call back to handle URB status
+ *  if_usb_write_bulk_callback -  call back to handle URB status
  *
  *  @urb:		pointer to urb structure
  */

From 9a0fb9502f0dd4c41e59d6c3390794a81fca2bc3 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:38 +0800
Subject: [PATCH 1773/3292] rtlwifi: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c:124: warning: expecting prototype for writeLLT(). Prototype was for rtl92c_llt_write() instead
 drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c:154: warning: expecting prototype for rtl92c_init_LLT_table(). Prototype was for rtl92c_init_llt_table() instead

Cc: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-9-shenyang39@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
index 8d2c6d8d32d9..4ff0d4118193 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
@@ -112,7 +112,7 @@ void rtl92c_read_chip_version(struct ieee80211_hw *hw)
 }
 
 /**
- * writeLLT - LLT table write access
+ * rtl92c_llt_write - LLT table write access
  * @hw: Pointer to the ieee80211_hw structure.
  * @address: LLT logical address.
  * @data: LLT data content
@@ -144,7 +144,7 @@ bool rtl92c_llt_write(struct ieee80211_hw *hw, u32 address, u32 data)
 }
 
 /**
- * rtl92c_init_LLT_table - Init LLT table
+ * rtl92c_init_llt_table - Init LLT table
  * @hw: Pointer to the ieee80211_hw structure.
  * @boundary: Page boundary.
  *

From c707db1b2e7ba62e78998544a257fc68a3c4edd7 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:39 +0800
Subject: [PATCH 1774/3292] rsi: Fix missing function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/rsi/rsi_91x_mgmt.c:1550: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Cc: Amitkumar Karwar <amitkarwar@gmail.com>
Cc: Siva Rebbagondla <siva8118@gmail.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-10-shenyang39@huawei.com
---
 drivers/net/wireless/rsi/rsi_91x_mgmt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index 33c76d39a8e9..dffe1d6cc592 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -1547,8 +1547,8 @@ static int rsi_eeprom_read(struct rsi_common *common)
 }
 
 /**
- * This function sends a frame to block/unblock
- * data queues in the firmware
+ * rsi_send_block_unblock_frame() - This function sends a frame to block/unblock
+ *                                  data queues in the firmware
  *
  * @common: Pointer to the driver private structure.
  * @block_event: Event block if true, unblock if false

From c3b67ea3d97a5e08f7ccb0e2c90b0913b92c01cc Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:41 +0800
Subject: [PATCH 1775/3292] wlcore: Fix missing function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/ti/wlcore/cmd.c:824: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/ti/wlcore/cmd.c:853: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/ti/wlcore/cmd.c:882: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-12-shenyang39@huawei.com
---
 drivers/net/wireless/ti/wlcore/cmd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c
index 32a2e27cc561..8b798b5fcaf5 100644
--- a/drivers/net/wireless/ti/wlcore/cmd.c
+++ b/drivers/net/wireless/ti/wlcore/cmd.c
@@ -821,7 +821,7 @@ out:
 
 
 /**
- * send test command to firmware
+ * wl1271_cmd_test - send test command to firmware
  *
  * @wl: wl struct
  * @buf: buffer containing the command, with all headers, must work with dma
@@ -850,7 +850,7 @@ int wl1271_cmd_test(struct wl1271 *wl, void *buf, size_t buf_len, u8 answer)
 EXPORT_SYMBOL_GPL(wl1271_cmd_test);
 
 /**
- * read acx from firmware
+ * wl1271_cmd_interrogate - read acx from firmware
  *
  * @wl: wl struct
  * @id: acx id
@@ -879,7 +879,7 @@ int wl1271_cmd_interrogate(struct wl1271 *wl, u16 id, void *buf,
 }
 
 /**
- * write acx value to firmware
+ * wlcore_cmd_configure_failsafe - write acx value to firmware
  *
  * @wl: wl struct
  * @id: acx id

From bd65fe550973b8fafea9b06aa7435931ad13ae27 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 14:55:43 +0800
Subject: [PATCH 1776/3292] libertas: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Ganapathi Bhat <ganapathi.bhat@nxp.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1621407345-10625-2-git-send-email-tanghui20@huawei.com
---
 drivers/net/wireless/marvell/libertas/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/libertas/main.c b/drivers/net/wireless/marvell/libertas/main.c
index ee4cf3437e28..64fc5e410864 100644
--- a/drivers/net/wireless/marvell/libertas/main.c
+++ b/drivers/net/wireless/marvell/libertas/main.c
@@ -941,7 +941,7 @@ struct lbs_private *lbs_add_card(void *card, struct device *dmdev)
 	wdev->netdev = dev;
 	priv->dev = dev;
 
- 	dev->netdev_ops = &lbs_netdev_ops;
+	dev->netdev_ops = &lbs_netdev_ops;
 	dev->watchdog_timeo = 5 * HZ;
 	dev->ethtool_ops = &lbs_ethtool_ops;
 	dev->flags |= IFF_BROADCAST | IFF_MULTICAST;

From 084eb606dbcfe363f228d27d211cfcdd69bc0f2f Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 14:55:44 +0800
Subject: [PATCH 1777/3292] rt2x00: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Stanislaw Gruszka <stf_xl@wp.pl>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1621407345-10625-3-git-send-email-tanghui20@huawei.com
---
 drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
index 5264b0a1f098..deddb0afd312 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
@@ -1037,7 +1037,7 @@ void rt2800_txdone_entry(struct queue_entry *entry, u32 status, __le32 *txwi,
 	 * FIXME: if we do not find matching entry, we tell that frame was
 	 * posted without any retries. We need to find a way to fix that
 	 * and provide retry count.
- 	 */
+	 */
 	if (unlikely((aggr == 1 && ampdu == 0 && real_mcs != mcs)) || !match) {
 		rt2800_rate_from_status(skbdesc, status, rt2x00dev->curr_band);
 		mcs = real_mcs;

From 7b7362ba27a23a9042e2423407e6ce16d388aba0 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 14:55:45 +0800
Subject: [PATCH 1778/3292] wlcore: remove leading spaces before tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'
	$ find . -name '*.h' | xargs sed -r -i 's/^[ ]+\t/\t/'

Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1621407345-10625-4-git-send-email-tanghui20@huawei.com
---
 drivers/net/wireless/ti/wlcore/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 8509b989940c..e500b8405f8f 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -3242,8 +3242,8 @@ static void wl1271_op_configure_filter(struct ieee80211_hw *hw,
 		 * the firmware filters so that all multicast packets are passed
 		 * This is mandatory for MDNS based discovery protocols 
 		 */
- 		if (wlvif->bss_type == BSS_TYPE_AP_BSS) {
- 			if (*total & FIF_ALLMULTI) {
+		if (wlvif->bss_type == BSS_TYPE_AP_BSS) {
+			if (*total & FIF_ALLMULTI) {
 				ret = wl1271_acx_group_address_tbl(wl, wlvif,
 							false,
 							NULL, 0);

From ad4d74cd81779e8f33e7365ae98280835604e905 Mon Sep 17 00:00:00 2001
From: zuoqilin <zuoqilin@yulong.com>
Date: Fri, 23 Apr 2021 17:49:40 +0800
Subject: [PATCH 1779/3292] rndis_wlan: simplify is_associated()

It is not necessary to define the variable ret to receive
the return value of the get_bssid() method.

Signed-off-by: zuoqilin <zuoqilin@yulong.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210423094940.1593-1-zuoqilin1@163.com
---
 drivers/net/wireless/rndis_wlan.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 9fe77556858e..63ce2443f136 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -1036,14 +1036,11 @@ static bool is_associated(struct usbnet *usbdev)
 {
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
 	u8 bssid[ETH_ALEN];
-	int ret;
 
 	if (!priv->radio_on)
 		return false;
 
-	ret = get_bssid(usbdev, bssid);
-
-	return (ret == 0 && !is_zero_ether_addr(bssid));
+	return (get_bssid(usbdev, bssid) == 0 && !is_zero_ether_addr(bssid));
 }
 
 static int disassociate(struct usbnet *usbdev, bool reset_ssid)

From d4f23164cff08de41abfd95ad8610b94137cdf9c Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Wed, 28 Apr 2021 05:54:45 +0300
Subject: [PATCH 1780/3292] wilc1000: Fix clock name binding

Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml
requires an "rtc" clock name.
drivers/net/wireless/microchip/wilc1000/sdio.c is using "rtc" clock name
as well. Comply with the binding in wilc1000/spi.c too.

Fixes: 854d66df74ae ("staging: wilc1000: look for rtc_clk clock in spi mode")
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210428025445.81953-1-tudor.ambarus@microchip.com
---
 drivers/net/wireless/microchip/wilc1000/spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index 1472e9843896..8e9aaf03a6fa 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -164,7 +164,7 @@ static int wilc_bus_probe(struct spi_device *spi)
 	wilc->bus_data = spi_priv;
 	wilc->dev_irq_num = spi->irq;
 
-	wilc->rtc_clk = devm_clk_get(&spi->dev, "rtc_clk");
+	wilc->rtc_clk = devm_clk_get(&spi->dev, "rtc");
 	if (PTR_ERR_OR_ZERO(wilc->rtc_clk) == -EPROBE_DEFER) {
 		kfree(spi_priv);
 		return -EPROBE_DEFER;

From d10a87a3535cce2b890897914f5d0d83df669c63 Mon Sep 17 00:00:00 2001
From: Lee Gibson <leegib@gmail.com>
Date: Wed, 28 Apr 2021 12:55:08 +0100
Subject: [PATCH 1781/3292] wl1251: Fix possible buffer overflow in
 wl1251_cmd_scan

Function wl1251_cmd_scan calls memcpy without checking the length.
Harden by checking the length is within the maximum allowed size.

Signed-off-by: Lee Gibson <leegib@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210428115508.25624-1-leegib@gmail.com
---
 drivers/net/wireless/ti/wl1251/cmd.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c
index 498c8db2eb48..d7a869106782 100644
--- a/drivers/net/wireless/ti/wl1251/cmd.c
+++ b/drivers/net/wireless/ti/wl1251/cmd.c
@@ -454,9 +454,12 @@ int wl1251_cmd_scan(struct wl1251 *wl, u8 *ssid, size_t ssid_len,
 		cmd->channels[i].channel = channels[i]->hw_value;
 	}
 
-	cmd->params.ssid_len = ssid_len;
-	if (ssid)
-		memcpy(cmd->params.ssid, ssid, ssid_len);
+	if (ssid) {
+		int len = clamp_val(ssid_len, 0, IEEE80211_MAX_SSID_LEN);
+
+		cmd->params.ssid_len = len;
+		memcpy(cmd->params.ssid, ssid, len);
+	}
 
 	ret = wl1251_cmd_send(wl, CMD_SCAN, cmd, sizeof(*cmd));
 	if (ret < 0) {

From 7af305a1245a7ceff2d8577e011d0a0f7cc33e4c Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stf_xl@wp.pl>
Date: Tue, 4 May 2021 10:12:20 +0200
Subject: [PATCH 1782/3292] rt2x00: do not set timestamp for injected frames

We setup hardware to insert TSF timestamp for beacon and probe response
frames. This is undesired for injected frames, which might want to
set their own timestamp values, so disable this setting for injected
frames.

Tested-by: ZeroBeat <ZeroBeat@gmx.de>
Tested-by: n0w1re <n0w1re@protonmail.ch>
Signed-off-by: Stanislaw Gruszka <stf_xl@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210504081220.666939-1-stf_xl@wp.pl
---
 drivers/net/wireless/ralink/rt2x00/rt2x00queue.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
index d4d389e8f1b4..fb1d31b2d52a 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
@@ -446,8 +446,9 @@ static void rt2x00queue_create_tx_descriptor(struct rt2x00_dev *rt2x00dev,
 	 * Beacons and probe responses require the tsf timestamp
 	 * to be inserted into the frame.
 	 */
-	if (ieee80211_is_beacon(hdr->frame_control) ||
-	    ieee80211_is_probe_resp(hdr->frame_control))
+	if ((ieee80211_is_beacon(hdr->frame_control) ||
+	     ieee80211_is_probe_resp(hdr->frame_control)) &&
+	    !(tx_info->flags & IEEE80211_TX_CTL_INJECTED))
 		__set_bit(ENTRY_TXD_REQ_TIMESTAMP, &txdesc->flags);
 
 	if ((tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) &&

From 924599d407928b91cb22d9274a347bb90f6f6129 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Thu, 6 May 2021 09:37:38 +0800
Subject: [PATCH 1783/3292] rtlwifi: 8821a: btcoexist: add comments to explain
 why if-else branches are identical

The coexistence programmers preserve the same code of branches
intentionally to fine tune performance easier, because bandwidth and RSSI
strength are highly related to coexistence performance. The basic rule of
performance tuning is to assign most time slot to BT for realtime
application, and WiFi uses remaining time slot but don't lower than low
bound.

Reported-by: Inigo Huguet <ihuguet@redhat.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210506013738.5943-1-pkshih@realtek.com
---
 .../net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
index 447caa4aad32..b998a0eb5f09 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
@@ -2810,6 +2810,7 @@ static void btc8821a2ant_action_a2dp(struct btc_coexist *btcoexist)
 					      0x4);
 	}
 
+	/* preserve identical branches for further fine-tuning */
 	if ((bt_rssi_state == BTC_RSSI_STATE_HIGH) ||
 	    (bt_rssi_state == BTC_RSSI_STATE_STAY_HIGH)) {
 		btc8821a2ant_ps_tdma(btcoexist, NORMAL_EXEC, true, 23);
@@ -2944,6 +2945,7 @@ static void btc8821a2ant_action_pan_edr(struct btc_coexist *btcoexist)
 					      0x4);
 	}
 
+	/* preserve identical branches for further fine-tuning */
 	if ((bt_rssi_state == BTC_RSSI_STATE_HIGH) ||
 	    (bt_rssi_state == BTC_RSSI_STATE_STAY_HIGH))
 		btc8821a2ant_ps_tdma(btcoexist, NORMAL_EXEC, true, 26);
@@ -3132,6 +3134,7 @@ static void btc8821a2ant_action_pan_edr_hid(struct btc_coexist *btcoexist)
 
 	btcoexist->btc_get(btcoexist, BTC_GET_U4_WIFI_BW, &wifi_bw);
 
+	/* preserve identical branches for further fine-tuning */
 	if (wifi_bw == BTC_WIFI_BW_LEGACY) {
 		/* for HID at 11b/g mode */
 		btc8821a2ant_coex_table(btcoexist, NORMAL_EXEC, 0x55ff55ff,
@@ -3321,6 +3324,7 @@ static void btc8821a2ant_action_hid_a2dp(struct btc_coexist *btcoexist)
 					      0x4);
 	}
 
+	/* preserve identical branches for further fine-tuning */
 	if ((bt_rssi_state == BTC_RSSI_STATE_HIGH) ||
 	    (bt_rssi_state == BTC_RSSI_STATE_STAY_HIGH)) {
 		btc8821a2ant_ps_tdma(btcoexist, NORMAL_EXEC, true, 23);

From 29d97219f4035185f229769313021e350972768b Mon Sep 17 00:00:00 2001
From: Saurav Girepunje <saurav.girepunje@gmail.com>
Date: Thu, 6 May 2021 10:18:38 +0530
Subject: [PATCH 1784/3292] zd1211rw: Prefer pr_err over printk error msg

In zd_usb.c usb_init we can prefer pr_err() over printk KERN_ERR
log level.

Signed-off-by: Saurav Girepunje <saurav.girepunje@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210506044838.GA7260@user
---
 drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
index 5c4cd0e1adeb..a7ceef10bf6a 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
@@ -1544,14 +1544,14 @@ static int __init usb_init(void)
 
 	zd_workqueue = create_singlethread_workqueue(driver.name);
 	if (zd_workqueue == NULL) {
-		printk(KERN_ERR "%s couldn't create workqueue\n", driver.name);
+		pr_err("%s couldn't create workqueue\n", driver.name);
 		return -ENOMEM;
 	}
 
 	r = usb_register(&driver);
 	if (r) {
 		destroy_workqueue(zd_workqueue);
-		printk(KERN_ERR "%s usb_register() failed. Error number %d\n",
+		pr_err("%s usb_register() failed. Error number %d\n",
 		       driver.name, r);
 		return r;
 	}

From b1c3a24897bd528f2f4fda9fea7da08a84ae25b6 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 7 May 2021 23:31:05 +0200
Subject: [PATCH 1785/3292] rsi: Assign beacon rate settings to the correct
 rate_info descriptor field

The RSI_RATE_x bits must be assigned to struct rsi_data_desc rate_info
field. The rest of the driver does it correctly, except this one place,
so fix it. This is also aligned with the RSI downstream vendor driver.
Without this patch, an AP operating at 5 GHz does not transmit any
beacons at all, this patch fixes that.

Fixes: d26a9559403c ("rsi: add beacon changes for AP mode")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Cc: Angus Ainslie <angus@akkea.ca>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Karun Eagalapati <karun256@gmail.com>
Cc: Martin Kepplinger <martink@posteo.de>
Cc: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Cc: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
Cc: Siva Rebbagondla <siva8118@gmail.com>
Cc: netdev@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210507213105.140138-1-marex@denx.de
---
 drivers/net/wireless/rsi/rsi_91x_hal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index ce9892152f4d..ab837921d9a4 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -470,9 +470,9 @@ int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb)
 	}
 
 	if (common->band == NL80211_BAND_2GHZ)
-		bcn_frm->bbp_info |= cpu_to_le16(RSI_RATE_1);
+		bcn_frm->rate_info |= cpu_to_le16(RSI_RATE_1);
 	else
-		bcn_frm->bbp_info |= cpu_to_le16(RSI_RATE_6);
+		bcn_frm->rate_info |= cpu_to_le16(RSI_RATE_6);
 
 	if (mac_bcn->data[tim_offset + 2] == 0)
 		bcn_frm->frame_info |= cpu_to_le16(RSI_DATA_DESC_DTIM_BEACON);

From 29ca9e6ca5a5f850829c53557bf18912e56da6c9 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 7 May 2021 23:31:49 +0200
Subject: [PATCH 1786/3292] rsi: Add support for changing beacon interval

Pick code for changing the beacon interval (e.g. using beacon_int in
hostap config) from the downstream RSI driver.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Cc: Angus Ainslie <angus@akkea.ca>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Karun Eagalapati <karun256@gmail.com>
Cc: Martin Kepplinger <martink@posteo.de>
Cc: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Cc: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
Cc: Siva Rebbagondla <siva8118@gmail.com>
Cc: netdev@vger.kernel.org
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210507213149.140192-1-marex@denx.de
---
 drivers/net/wireless/rsi/rsi_91x_mac80211.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 16025300cddb..d9f1e73293aa 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -837,6 +837,23 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
 			common->cqm_info.rssi_hyst);
 	}
 
+	if (changed & BSS_CHANGED_BEACON_INT) {
+		rsi_dbg(INFO_ZONE, "%s: Changed Beacon interval: %d\n",
+			__func__, bss_conf->beacon_int);
+		if (common->beacon_interval != bss->beacon_int) {
+			common->beacon_interval = bss->beacon_int;
+			if (vif->type == NL80211_IFTYPE_AP) {
+				struct vif_priv *vif_info = (struct vif_priv *)vif->drv_priv;
+
+				rsi_set_vap_capabilities(common, RSI_OPMODE_AP,
+							 vif->addr, vif_info->vap_id,
+							 VAP_UPDATE);
+			}
+		}
+		adapter->ps_info.listen_interval =
+			bss->beacon_int * adapter->ps_info.num_bcns_per_lis_int;
+	}
+
 	if ((changed & BSS_CHANGED_BEACON_ENABLED) &&
 	    ((vif->type == NL80211_IFTYPE_AP) ||
 	     (vif->type == NL80211_IFTYPE_P2P_GO))) {

From d56b69c4fbc7fc85d1a232967ff72c99c3ea2b95 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 10 May 2021 16:22:37 +0800
Subject: [PATCH 1787/3292] rtlwifi: btcoex: 21a 2ant: Delete several duplicate
 condition branch codes

The statements of the "if (max_interval == 3)" branch are the same as
those of the "else" branch. Delete them to simplify the code.

No functional change.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210510082237.3315-1-thunder.leizhen@huawei.com
---
 .../realtek/rtlwifi/btcoexist/halbtc8821a2ant.c  | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
index b998a0eb5f09..c5b8df58d4a2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
@@ -1721,10 +1721,6 @@ static void btc8821a2ant_tdma_duration_adjust(struct btc_coexist *btcoexist,
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 14);
 					coex_dm->ps_tdma_du_adj_type = 14;
-				} else if (max_interval == 3) {
-					btc8821a2ant_ps_tdma(btcoexist,
-							NORMAL_EXEC, true, 15);
-					coex_dm->ps_tdma_du_adj_type = 15;
 				} else {
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 15);
@@ -1739,10 +1735,6 @@ static void btc8821a2ant_tdma_duration_adjust(struct btc_coexist *btcoexist,
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 10);
 					coex_dm->ps_tdma_du_adj_type = 10;
-				} else if (max_interval == 3) {
-					btc8821a2ant_ps_tdma(btcoexist,
-							NORMAL_EXEC, true, 11);
-					coex_dm->ps_tdma_du_adj_type = 11;
 				} else {
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 11);
@@ -1759,10 +1751,6 @@ static void btc8821a2ant_tdma_duration_adjust(struct btc_coexist *btcoexist,
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 6);
 					coex_dm->ps_tdma_du_adj_type = 6;
-				} else if (max_interval == 3) {
-					btc8821a2ant_ps_tdma(btcoexist,
-							NORMAL_EXEC, true, 7);
-					coex_dm->ps_tdma_du_adj_type = 7;
 				} else {
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 7);
@@ -1777,10 +1765,6 @@ static void btc8821a2ant_tdma_duration_adjust(struct btc_coexist *btcoexist,
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 2);
 					coex_dm->ps_tdma_du_adj_type = 2;
-				} else if (max_interval == 3) {
-					btc8821a2ant_ps_tdma(btcoexist,
-							NORMAL_EXEC, true, 3);
-					coex_dm->ps_tdma_du_adj_type = 3;
 				} else {
 					btc8821a2ant_ps_tdma(btcoexist,
 							NORMAL_EXEC, true, 3);

From dd778f89225cd258e8f0fed2b7256124982c8bb5 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 12 May 2021 11:05:14 +0800
Subject: [PATCH 1788/3292] cw1200: add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1620788714-14300-1-git-send-email-zou_wei@huawei.com
---
 drivers/net/wireless/st/cw1200/cw1200_sdio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/st/cw1200/cw1200_sdio.c b/drivers/net/wireless/st/cw1200/cw1200_sdio.c
index b65ec14136c7..4c30b5772ce0 100644
--- a/drivers/net/wireless/st/cw1200/cw1200_sdio.c
+++ b/drivers/net/wireless/st/cw1200/cw1200_sdio.c
@@ -53,6 +53,7 @@ static const struct sdio_device_id cw1200_sdio_ids[] = {
 	{ SDIO_DEVICE(SDIO_VENDOR_ID_STE, SDIO_DEVICE_ID_STE_CW1200) },
 	{ /* end: all zeroes */			},
 };
+MODULE_DEVICE_TABLE(sdio, cw1200_sdio_ids);
 
 /* hwbus_ops implemetation */
 

From c362dd84013e53ce354a8069d0795280c683450c Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:40 +0800
Subject: [PATCH 1789/3292] wl1251: Fix missing function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/ti/wl1251/cmd.c:15: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/ti/wl1251/cmd.c:62: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/ti/wl1251/cmd.c:103: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/ti/wl1251/cmd.c:141: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-11-shenyang39@huawei.com
---
 drivers/net/wireless/ti/wl1251/cmd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c
index d7a869106782..c3be81dc7970 100644
--- a/drivers/net/wireless/ti/wl1251/cmd.c
+++ b/drivers/net/wireless/ti/wl1251/cmd.c
@@ -12,7 +12,7 @@
 #include "acx.h"
 
 /**
- * send command to firmware
+ * wl1251_cmd_send - Send command to firmware
  *
  * @wl: wl struct
  * @id: command id
@@ -59,7 +59,7 @@ out:
 }
 
 /**
- * send test command to firmware
+ * wl1251_cmd_test - Send test command to firmware
  *
  * @wl: wl struct
  * @buf: buffer containing the command, with all headers, must work with dma
@@ -100,7 +100,7 @@ int wl1251_cmd_test(struct wl1251 *wl, void *buf, size_t buf_len, u8 answer)
 }
 
 /**
- * read acx from firmware
+ * wl1251_cmd_interrogate - Read acx from firmware
  *
  * @wl: wl struct
  * @id: acx id
@@ -138,7 +138,7 @@ out:
 }
 
 /**
- * write acx value to firmware
+ * wl1251_cmd_configure - Write acx value to firmware
  *
  * @wl: wl struct
  * @id: acx id

From 03a1b938cf39469da4f27b48cb47fa7b3a2f440c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 13 May 2021 13:24:09 +0100
Subject: [PATCH 1790/3292] rtlwifi: rtl8723ae: remove redundant initialization
 of variable rtstatus

The variable rtstatus is being initialized with a value that is never
read, it is being updated later on. The assignment is redundant and
can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210513122410.59204-1-colin.king@canonical.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
index f8a1de6e9849..c98f2216734f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
@@ -915,7 +915,7 @@ int rtl8723e_hw_init(struct ieee80211_hw *hw)
 	struct rtl_phy *rtlphy = &(rtlpriv->phy);
 	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
-	bool rtstatus = true;
+	bool rtstatus;
 	int err;
 	u8 tmp_u1b;
 	unsigned long flags;

From a99086057e031a88474a7432c7ed0800a3943e84 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 18 May 2021 09:59:59 +0800
Subject: [PATCH 1791/3292] rtlwifi: Remove redundant assignments to
 ul_enc_algo

Variable ul_enc_algo is being initialized with a value that is never
read, it is being set again in the following switch statements in
all of the case and default paths. Hence the unitialization is
redundant and can be removed.

Clean up clang warning:

drivers/net/wireless/realtek/rtlwifi/cam.c:170:6: warning: Value stored
to 'ul_enc_algo' during its initialization is never read
[clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1621303199-1542-1-git-send-email-yang.lee@linux.alibaba.com
---
 drivers/net/wireless/realtek/rtlwifi/cam.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/cam.c b/drivers/net/wireless/realtek/rtlwifi/cam.c
index 7aa28da39409..7a0355dc6bab 100644
--- a/drivers/net/wireless/realtek/rtlwifi/cam.c
+++ b/drivers/net/wireless/realtek/rtlwifi/cam.c
@@ -167,7 +167,7 @@ void rtl_cam_mark_invalid(struct ieee80211_hw *hw, u8 uc_index)
 
 	u32 ul_command;
 	u32 ul_content;
-	u32 ul_enc_algo = rtlpriv->cfg->maps[SEC_CAM_AES];
+	u32 ul_enc_algo;
 
 	switch (rtlpriv->sec.pairwise_enc_algorithm) {
 	case WEP40_ENCRYPTION:

From 03611cc526f9d4854dcd7cf3b7d17f5544d967e8 Mon Sep 17 00:00:00 2001
From: Ding Senjie <dingsenjie@yulong.com>
Date: Fri, 21 May 2021 14:27:34 +0800
Subject: [PATCH 1792/3292] rtlwifi: Fix spelling of 'download'

downlaod -> download

Signed-off-by: Ding Senjie <dingsenjie@yulong.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210521062734.21284-1-dingsenjie@163.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
index 38034102aacb..e474b4ec17f3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
@@ -513,7 +513,7 @@ void rtl92se_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc8,
 
 	/* This bit indicate this packet is used for FW download. */
 	if (tcb_desc->cmd_or_init == DESC_PACKET_TYPE_INIT) {
-		/* For firmware downlaod we only need to set LINIP */
+		/* For firmware download we only need to set LINIP */
 		set_tx_desc_linip(pdesc, tcb_desc->last_inipkt);
 
 		/* 92SE must set as 1 for firmware download HW DMA error */

From 3f60f4685699aa6006e58e424637e8e413e0a94d Mon Sep 17 00:00:00 2001
From: Hang Zhang <zh.nvgt@gmail.com>
Date: Fri, 21 May 2021 15:32:38 -0700
Subject: [PATCH 1793/3292] cw1200: Revert unnecessary patches that fix unreal
 use-after-free bugs

A previous commit 4f68ef64cd7f ("cw1200: Fix concurrency
use-after-free bugs in cw1200_hw_scan()") tried to fix a seemingly
use-after-free bug between cw1200_bss_info_changed() and
cw1200_hw_scan(), where the former frees a sk_buff pointed
to by frame.skb, and the latter accesses the sk_buff
pointed to by frame.skb. However, this issue should be a
false alarm because:

(1) "frame.skb" is not a shared variable between the above
two functions, because "frame" is a local function variable,
each of the two functions has its own local "frame" - they
just happen to have the same variable name.

(2) the sk_buff(s) pointed to by these two "frame.skb" are
also two different object instances, they are individually
allocated by different dev_alloc_skb() within the two above
functions. To free one object instance will not invalidate
the access of another different one.

Based on these facts, the previous commit should be unnecessary.
Moreover, it also introduced a missing unlock which was
addressed in a subsequent commit 51c8d24101c7 ("cw1200: fix missing
unlock on error in cw1200_hw_scan()"). Now that the
original use-after-free is unreal, these two commits should
be reverted. This patch performs the reversion.

Fixes: 4f68ef64cd7f ("cw1200: Fix concurrency use-after-free bugs in cw1200_hw_scan()")
Fixes: 51c8d24101c7 ("cw1200: fix missing unlock on error in cw1200_hw_scan()")
Signed-off-by: Hang Zhang <zh.nvgt@gmail.com>
Acked-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210521223238.25020-1-zh.nvgt@gmail.com
---
 drivers/net/wireless/st/cw1200/scan.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c
index 988581cc134b..1f856fbbc0ea 100644
--- a/drivers/net/wireless/st/cw1200/scan.c
+++ b/drivers/net/wireless/st/cw1200/scan.c
@@ -75,30 +75,27 @@ int cw1200_hw_scan(struct ieee80211_hw *hw,
 	if (req->n_ssids > WSM_SCAN_MAX_NUM_OF_SSIDS)
 		return -EINVAL;
 
-	/* will be unlocked in cw1200_scan_work() */
-	down(&priv->scan.lock);
-	mutex_lock(&priv->conf_mutex);
-
 	frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0,
 		req->ie_len);
-	if (!frame.skb) {
-		mutex_unlock(&priv->conf_mutex);
-		up(&priv->scan.lock);
+	if (!frame.skb)
 		return -ENOMEM;
-	}
 
 	if (req->ie_len)
 		skb_put_data(frame.skb, req->ie, req->ie_len);
 
+	/* will be unlocked in cw1200_scan_work() */
+	down(&priv->scan.lock);
+	mutex_lock(&priv->conf_mutex);
+
 	ret = wsm_set_template_frame(priv, &frame);
 	if (!ret) {
 		/* Host want to be the probe responder. */
 		ret = wsm_set_probe_responder(priv, true);
 	}
 	if (ret) {
-		dev_kfree_skb(frame.skb);
 		mutex_unlock(&priv->conf_mutex);
 		up(&priv->scan.lock);
+		dev_kfree_skb(frame.skb);
 		return ret;
 	}
 
@@ -120,8 +117,8 @@ int cw1200_hw_scan(struct ieee80211_hw *hw,
 		++priv->scan.n_ssids;
 	}
 
-	dev_kfree_skb(frame.skb);
 	mutex_unlock(&priv->conf_mutex);
+	dev_kfree_skb(frame.skb);
 	queue_work(priv->workqueue, &priv->scan.work);
 	return 0;
 }

From 8667ab49a6e0942d64b0dafd30cbf4e0c8b08a8f Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 12:03:39 +0800
Subject: [PATCH 1794/3292] libertas: use DEVICE_ATTR_RW macro

Use DEVICE_ATTR_RW helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210523040339.2724-1-yuehaibing@huawei.com
---
 drivers/net/wireless/marvell/libertas/mesh.c | 149 ++++++++++---------
 1 file changed, 78 insertions(+), 71 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c
index c68814841583..6cbba84989b8 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.c
+++ b/drivers/net/wireless/marvell/libertas/mesh.c
@@ -151,13 +151,13 @@ static uint16_t lbs_mesh_get_channel(struct lbs_private *priv)
  */
 
 /**
- * lbs_anycast_get - Get function for sysfs attribute anycast_mask
+ * anycast_mask_show - Get function for sysfs attribute anycast_mask
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t lbs_anycast_get(struct device *dev,
-		struct device_attribute *attr, char * buf)
+static ssize_t anycast_mask_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	struct cmd_ds_mesh_access mesh_access;
@@ -173,14 +173,15 @@ static ssize_t lbs_anycast_get(struct device *dev,
 }
 
 /**
- * lbs_anycast_set - Set function for sysfs attribute anycast_mask
+ * anycast_mask_store - Set function for sysfs attribute anycast_mask
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t lbs_anycast_set(struct device *dev,
-		struct device_attribute *attr, const char * buf, size_t count)
+static ssize_t anycast_mask_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	struct cmd_ds_mesh_access mesh_access;
@@ -199,13 +200,13 @@ static ssize_t lbs_anycast_set(struct device *dev,
 }
 
 /**
- * lbs_prb_rsp_limit_get - Get function for sysfs attribute prb_rsp_limit
+ * prb_rsp_limit_show - Get function for sysfs attribute prb_rsp_limit
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t lbs_prb_rsp_limit_get(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t prb_rsp_limit_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	struct cmd_ds_mesh_access mesh_access;
@@ -225,14 +226,15 @@ static ssize_t lbs_prb_rsp_limit_get(struct device *dev,
 }
 
 /**
- * lbs_prb_rsp_limit_set - Set function for sysfs attribute prb_rsp_limit
+ * prb_rsp_limit_store - Set function for sysfs attribute prb_rsp_limit
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t lbs_prb_rsp_limit_set(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t prb_rsp_limit_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	struct cmd_ds_mesh_access mesh_access;
@@ -259,27 +261,28 @@ static ssize_t lbs_prb_rsp_limit_set(struct device *dev,
 }
 
 /**
- * lbs_mesh_get - Get function for sysfs attribute mesh
+ * lbs_mesh_show - Get function for sysfs attribute mesh
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t lbs_mesh_get(struct device *dev,
-		struct device_attribute *attr, char * buf)
+static ssize_t lbs_mesh_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	return snprintf(buf, 5, "0x%X\n", !!priv->mesh_dev);
 }
 
 /**
- * lbs_mesh_set - Set function for sysfs attribute mesh
+ * lbs_mesh_store - Set function for sysfs attribute mesh
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t lbs_mesh_set(struct device *dev,
-		struct device_attribute *attr, const char * buf, size_t count)
+static ssize_t lbs_mesh_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t count)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	int enable;
@@ -301,20 +304,19 @@ static ssize_t lbs_mesh_set(struct device *dev,
  * lbs_mesh attribute to be exported per ethX interface
  * through sysfs (/sys/class/net/ethX/lbs_mesh)
  */
-static DEVICE_ATTR(lbs_mesh, 0644, lbs_mesh_get, lbs_mesh_set);
+static DEVICE_ATTR_RW(lbs_mesh);
 
 /*
  * anycast_mask attribute to be exported per mshX interface
  * through sysfs (/sys/class/net/mshX/anycast_mask)
  */
-static DEVICE_ATTR(anycast_mask, 0644, lbs_anycast_get, lbs_anycast_set);
+static DEVICE_ATTR_RW(anycast_mask);
 
 /*
  * prb_rsp_limit attribute to be exported per mshX interface
  * through sysfs (/sys/class/net/mshX/prb_rsp_limit)
  */
-static DEVICE_ATTR(prb_rsp_limit, 0644, lbs_prb_rsp_limit_get,
-		lbs_prb_rsp_limit_set);
+static DEVICE_ATTR_RW(prb_rsp_limit);
 
 static struct attribute *lbs_mesh_sysfs_entries[] = {
 	&dev_attr_anycast_mask.attr,
@@ -351,13 +353,13 @@ static int mesh_get_default_parameters(struct device *dev,
 }
 
 /**
- * bootflag_get - Get function for sysfs attribute bootflag
+ * bootflag_show - Get function for sysfs attribute bootflag
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t bootflag_get(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+static ssize_t bootflag_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mrvl_mesh_defaults defs;
 	int ret;
@@ -371,14 +373,14 @@ static ssize_t bootflag_get(struct device *dev,
 }
 
 /**
- * bootflag_set - Set function for sysfs attribute bootflag
+ * bootflag_store - Set function for sysfs attribute bootflag
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t bootflag_set(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
+static ssize_t bootflag_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	struct cmd_ds_mesh_config cmd;
@@ -401,13 +403,13 @@ static ssize_t bootflag_set(struct device *dev, struct device_attribute *attr,
 }
 
 /**
- * boottime_get - Get function for sysfs attribute boottime
+ * boottime_show - Get function for sysfs attribute boottime
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t boottime_get(struct device *dev,
-			    struct device_attribute *attr, char *buf)
+static ssize_t boottime_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct mrvl_mesh_defaults defs;
 	int ret;
@@ -421,14 +423,15 @@ static ssize_t boottime_get(struct device *dev,
 }
 
 /**
- * boottime_set - Set function for sysfs attribute boottime
+ * boottime_store - Set function for sysfs attribute boottime
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t boottime_set(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t boottime_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t count)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	struct cmd_ds_mesh_config cmd;
@@ -460,13 +463,13 @@ static ssize_t boottime_set(struct device *dev,
 }
 
 /**
- * channel_get - Get function for sysfs attribute channel
+ * channel_show - Get function for sysfs attribute channel
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t channel_get(struct device *dev,
-			   struct device_attribute *attr, char *buf)
+static ssize_t channel_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
 {
 	struct mrvl_mesh_defaults defs;
 	int ret;
@@ -480,14 +483,14 @@ static ssize_t channel_get(struct device *dev,
 }
 
 /**
- * channel_set - Set function for sysfs attribute channel
+ * channel_store - Set function for sysfs attribute channel
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t channel_set(struct device *dev, struct device_attribute *attr,
-			   const char *buf, size_t count)
+static ssize_t channel_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct lbs_private *priv = to_net_dev(dev)->ml_priv;
 	struct cmd_ds_mesh_config cmd;
@@ -510,13 +513,13 @@ static ssize_t channel_set(struct device *dev, struct device_attribute *attr,
 }
 
 /**
- * mesh_id_get - Get function for sysfs attribute mesh_id
+ * mesh_id_show - Get function for sysfs attribute mesh_id
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t mesh_id_get(struct device *dev, struct device_attribute *attr,
-			   char *buf)
+static ssize_t mesh_id_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
 {
 	struct mrvl_mesh_defaults defs;
 	int ret;
@@ -539,14 +542,14 @@ static ssize_t mesh_id_get(struct device *dev, struct device_attribute *attr,
 }
 
 /**
- * mesh_id_set - Set function for sysfs attribute mesh_id
+ * mesh_id_store - Set function for sysfs attribute mesh_id
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t mesh_id_set(struct device *dev, struct device_attribute *attr,
-			   const char *buf, size_t count)
+static ssize_t mesh_id_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct cmd_ds_mesh_config cmd;
 	struct mrvl_mesh_defaults defs;
@@ -585,13 +588,14 @@ static ssize_t mesh_id_set(struct device *dev, struct device_attribute *attr,
 }
 
 /**
- * protocol_id_get - Get function for sysfs attribute protocol_id
+ * protocol_id_show - Get function for sysfs attribute protocol_id
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t protocol_id_get(struct device *dev,
-			       struct device_attribute *attr, char *buf)
+static ssize_t protocol_id_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
 {
 	struct mrvl_mesh_defaults defs;
 	int ret;
@@ -605,14 +609,15 @@ static ssize_t protocol_id_get(struct device *dev,
 }
 
 /**
- * protocol_id_set - Set function for sysfs attribute protocol_id
+ * protocol_id_store - Set function for sysfs attribute protocol_id
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t protocol_id_set(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t protocol_id_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
 {
 	struct cmd_ds_mesh_config cmd;
 	struct mrvl_mesh_defaults defs;
@@ -646,13 +651,13 @@ static ssize_t protocol_id_set(struct device *dev,
 }
 
 /**
- * metric_id_get - Get function for sysfs attribute metric_id
+ * metric_id_show - Get function for sysfs attribute metric_id
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t metric_id_get(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t metric_id_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
 	struct mrvl_mesh_defaults defs;
 	int ret;
@@ -666,14 +671,15 @@ static ssize_t metric_id_get(struct device *dev,
 }
 
 /**
- * metric_id_set - Set function for sysfs attribute metric_id
+ * metric_id_store - Set function for sysfs attribute metric_id
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t metric_id_set(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
+static ssize_t metric_id_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
 {
 	struct cmd_ds_mesh_config cmd;
 	struct mrvl_mesh_defaults defs;
@@ -707,13 +713,13 @@ static ssize_t metric_id_set(struct device *dev, struct device_attribute *attr,
 }
 
 /**
- * capability_get - Get function for sysfs attribute capability
+ * capability_show - Get function for sysfs attribute capability
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer where data will be returned
  */
-static ssize_t capability_get(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t capability_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
 {
 	struct mrvl_mesh_defaults defs;
 	int ret;
@@ -727,14 +733,15 @@ static ssize_t capability_get(struct device *dev,
 }
 
 /**
- * capability_set - Set function for sysfs attribute capability
+ * capability_store - Set function for sysfs attribute capability
  * @dev: the &struct device
  * @attr: device attributes
  * @buf: buffer that contains new attribute value
  * @count: size of buffer
  */
-static ssize_t capability_set(struct device *dev, struct device_attribute *attr,
-			      const char *buf, size_t count)
+static ssize_t capability_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
 {
 	struct cmd_ds_mesh_config cmd;
 	struct mrvl_mesh_defaults defs;
@@ -768,13 +775,13 @@ static ssize_t capability_set(struct device *dev, struct device_attribute *attr,
 }
 
 
-static DEVICE_ATTR(bootflag, 0644, bootflag_get, bootflag_set);
-static DEVICE_ATTR(boottime, 0644, boottime_get, boottime_set);
-static DEVICE_ATTR(channel, 0644, channel_get, channel_set);
-static DEVICE_ATTR(mesh_id, 0644, mesh_id_get, mesh_id_set);
-static DEVICE_ATTR(protocol_id, 0644, protocol_id_get, protocol_id_set);
-static DEVICE_ATTR(metric_id, 0644, metric_id_get, metric_id_set);
-static DEVICE_ATTR(capability, 0644, capability_get, capability_set);
+static DEVICE_ATTR_RW(bootflag);
+static DEVICE_ATTR_RW(boottime);
+static DEVICE_ATTR_RW(channel);
+static DEVICE_ATTR_RW(mesh_id);
+static DEVICE_ATTR_RW(protocol_id);
+static DEVICE_ATTR_RW(metric_id);
+static DEVICE_ATTR_RW(capability);
 
 static struct attribute *boot_opts_attrs[] = {
 	&dev_attr_bootflag.attr,

From 314538041b5632ffaf64798faaeabaf2793fe029 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <martin.fuzzey@flowbird.group>
Date: Tue, 1 Jun 2021 18:19:53 +0200
Subject: [PATCH 1795/3292] rsi: fix AP mode with WPA failure due to encrypted
 EAPOL

In AP mode WPA2-PSK connections were not established.

The reason was that the AP was sending the first message
of the 4 way handshake encrypted, even though no pairwise
key had (correctly) yet been set.

Encryption was enabled if the "security_enable" driver flag
was set and encryption was not explicitly disabled by
IEEE80211_TX_INTFL_DONT_ENCRYPT.

However security_enable was set when *any* key, including
the AP GTK key, had been set which was causing unwanted
encryption even if no key was avaialble for the unicast
packet to be sent.

Fix this by adding a check that we have a key and drop
the old security_enable driver flag which is insufficient
and redundant.

The Redpine downstream out of tree driver does it this way too.

Regarding the Fixes tag the actual code being modified was
introduced earlier, with the original driver submission, in
dad0d04fa7ba ("rsi: Add RS9113 wireless driver"), however
at that time AP mode was not yet supported so there was
no bug at that point.

So I have tagged the introduction of AP support instead
which was part of the patch set "rsi: support for AP mode" [1]

It is not clear whether AP WPA has ever worked, I can see nothing
on the kernel side that broke it afterwards yet the AP support
patch series says "Tests are performed to confirm aggregation,
connections in WEP and WPA/WPA2 security."

One possibility is that the initial tests were done with a modified
userspace (hostapd).

[1] https://www.spinics.net/lists/linux-wireless/msg165302.html

Signed-off-by: Martin Fuzzey <martin.fuzzey@flowbird.group>
Fixes: 38ef62353acb ("rsi: security enhancements for AP mode")
CC: stable@vger.kernel.org
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1622564459-24430-1-git-send-email-martin.fuzzey@flowbird.group
---
 drivers/net/wireless/rsi/rsi_91x_hal.c      | 2 +-
 drivers/net/wireless/rsi/rsi_91x_mac80211.c | 3 ---
 drivers/net/wireless/rsi/rsi_91x_mgmt.c     | 3 +--
 drivers/net/wireless/rsi/rsi_main.h         | 1 -
 4 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index ab837921d9a4..99b21a2c8386 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -203,7 +203,7 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
 		wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE);
 
 	if ((!(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) &&
-	    (common->secinfo.security_enable)) {
+	    info->control.hw_key) {
 		if (rsi_is_cipher_wep(common))
 			ieee80211_size += 4;
 		else
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index d9f1e73293aa..b66975f54567 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -1045,7 +1045,6 @@ static int rsi_mac80211_set_key(struct ieee80211_hw *hw,
 	mutex_lock(&common->mutex);
 	switch (cmd) {
 	case SET_KEY:
-		secinfo->security_enable = true;
 		status = rsi_hal_key_config(hw, vif, key, sta);
 		if (status) {
 			mutex_unlock(&common->mutex);
@@ -1064,8 +1063,6 @@ static int rsi_mac80211_set_key(struct ieee80211_hw *hw,
 		break;
 
 	case DISABLE_KEY:
-		if (vif->type == NL80211_IFTYPE_STATION)
-			secinfo->security_enable = false;
 		rsi_dbg(ERR_ZONE, "%s: RSI del key\n", __func__);
 		memset(key, 0, sizeof(struct ieee80211_key_conf));
 		status = rsi_hal_key_config(hw, vif, key, sta);
diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index dffe1d6cc592..891fd5f0fa76 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -1803,8 +1803,7 @@ int rsi_send_wowlan_request(struct rsi_common *common, u16 flags,
 			RSI_WIFI_MGMT_Q);
 	cmd_frame->desc.desc_dword0.frame_type = WOWLAN_CONFIG_PARAMS;
 	cmd_frame->host_sleep_status = sleep_status;
-	if (common->secinfo.security_enable &&
-	    common->secinfo.gtk_cipher)
+	if (common->secinfo.gtk_cipher)
 		flags |= RSI_WOW_GTK_REKEY;
 	if (sleep_status)
 		cmd_frame->wow_flags = flags;
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index a1065e5a92b4..0f535850a383 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -151,7 +151,6 @@ enum edca_queue {
 };
 
 struct security_info {
-	bool security_enable;
 	u32 ptk_cipher;
 	u32 gtk_cipher;
 };

From 3b0c7b2415e52c48d47011c393bab7239fb59250 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:31 +0800
Subject: [PATCH 1796/3292] ath5k: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/ath/ath5k/pcu.c:865: warning: expecting prototype for at5k_hw_stop_rx_pcu(). Prototype was for ath5k_hw_stop_rx_pcu() instead

Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Nick Kossifidis <mickflemm@gmail.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-2-shenyang39@huawei.com
---
 drivers/net/wireless/ath/ath5k/pcu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c
index f2db7cf16566..3f4ce4e9c532 100644
--- a/drivers/net/wireless/ath/ath5k/pcu.c
+++ b/drivers/net/wireless/ath/ath5k/pcu.c
@@ -855,7 +855,7 @@ ath5k_hw_start_rx_pcu(struct ath5k_hw *ah)
 }
 
 /**
- * at5k_hw_stop_rx_pcu() - Stop RX engine
+ * ath5k_hw_stop_rx_pcu() - Stop RX engine
  * @ah: The &struct ath5k_hw
  *
  * Stops RX engine on PCU

From 2d1f8673ad6c38b3e5de646bf7113948a9145e9e Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:32 +0800
Subject: [PATCH 1797/3292] ath: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/ath/hw.c:119: warning: expecting prototype for ath_hw_set_bssid_mask(). Prototype was for ath_hw_setbssidmask() instead

Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-3-shenyang39@huawei.com
---
 drivers/net/wireless/ath/hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/hw.c b/drivers/net/wireless/ath/hw.c
index eae9abf540a7..b53ebb3ac9a2 100644
--- a/drivers/net/wireless/ath/hw.c
+++ b/drivers/net/wireless/ath/hw.c
@@ -24,7 +24,7 @@
 #define REG_WRITE(_ah, _reg, _val)	(common->ops->write)(_ah, _val, _reg)
 
 /**
- * ath_hw_set_bssid_mask - filter out bssids we listen
+ * ath_hw_setbssidmask - filter out bssids we listen
  *
  * @common: the ath_common struct for the device.
  *

From 9d1bb2289b426c9554eeaf78e52e1dc43671dce7 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Mon, 17 May 2021 13:01:33 +0800
Subject: [PATCH 1798/3292] wil6210: Fix wrong function name in comments

Fixes the following W=1 kernel build warning(s):

 drivers/net/wireless/ath/wil6210/interrupt.c:28: warning: expecting prototype for Theory of operation(). Prototype was for WIL6210_IRQ_DISABLE() instead
 drivers/net/wireless/ath/wil6210/wmi.c:227: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/ath/wil6210/wmi.c:245: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/net/wireless/ath/wil6210/wmi.c:263: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Cc: Maya Erez <merez@codeaurora.org>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210517050141.61488-4-shenyang39@huawei.com
---
 drivers/net/wireless/ath/wil6210/interrupt.c | 2 +-
 drivers/net/wireless/ath/wil6210/wmi.c       | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c
index d13d081fdcc6..67172385a5d6 100644
--- a/drivers/net/wireless/ath/wil6210/interrupt.c
+++ b/drivers/net/wireless/ath/wil6210/interrupt.c
@@ -9,7 +9,7 @@
 #include "wil6210.h"
 #include "trace.h"
 
-/**
+/*
  * Theory of operation:
  *
  * There is ISR pseudo-cause register,
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 02ad44997e87..2dc8406736f4 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -224,7 +224,7 @@ struct auth_no_hdr {
 u8 led_polarity = LED_POLARITY_LOW_ACTIVE;
 
 /**
- * return AHB address for given firmware internal (linker) address
+ * wmi_addr_remap - return AHB address for given firmware internal (linker) address
  * @x: internal address
  * If address have no valid AHB mapping, return 0
  */
@@ -242,7 +242,7 @@ static u32 wmi_addr_remap(u32 x)
 }
 
 /**
- * find fw_mapping entry by section name
+ * wil_find_fw_mapping - find fw_mapping entry by section name
  * @section: section name
  *
  * Return pointer to section or NULL if not found
@@ -260,7 +260,7 @@ struct fw_map *wil_find_fw_mapping(const char *section)
 }
 
 /**
- * Check address validity for WMI buffer; remap if needed
+ * wmi_buffer_block - Check address validity for WMI buffer; remap if needed
  * @wil: driver data
  * @ptr_: internal (linker) fw/ucode address
  * @size: if non zero, validate the block does not

From 515bda1d1e51c64edf2a384a58801f85a80a3f2d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 22 May 2021 11:50:54 +0200
Subject: [PATCH 1799/3292] ath11k: Fix an error handling path in
 ath11k_core_fetch_board_data_api_n()

All error paths but this one 'goto err' in order to release some
resources.
Fix this.

Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/e959eb544f3cb04258507d8e25a6f12eab126bde.1621676864.git.christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/ath/ath11k/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 4a1051418f33..969bf1a590d9 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -532,7 +532,8 @@ static int ath11k_core_fetch_board_data_api_n(struct ath11k_base *ab,
 		if (len < ALIGN(ie_len, 4)) {
 			ath11k_err(ab, "invalid length for board ie_id %d ie_len %zu len %zu\n",
 				   ie_id, ie_len, len);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err;
 		}
 
 		switch (ie_id) {

From e9ca70c735ce66fc6a0e02c8b6958434f74ef8de Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 25 May 2021 18:46:17 +0800
Subject: [PATCH 1800/3292] ath10k: Fix an error code in ath10k_add_interface()

When the code execute this if statement, the value of ret is 0.
However, we can see from the ath10k_warn() log that the value of
ret should be -EINVAL.

Clean up smatch warning:

drivers/net/wireless/ath/ath10k/mac.c:5596 ath10k_add_interface() warn:
missing error code 'ret'

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Fixes: ccec9038c721 ("ath10k: enable raw encap mode and software crypto engine")
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1621939577-62218-1-git-send-email-yang.lee@linux.alibaba.com
---
 drivers/net/wireless/ath/ath10k/mac.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 5ce4f8d038b9..c272b290fa73 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -5592,6 +5592,7 @@ static int ath10k_add_interface(struct ieee80211_hw *hw,
 
 	if (arvif->nohwcrypt &&
 	    !test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) {
+		ret = -EINVAL;
 		ath10k_warn(ar, "cryptmode module param needed for sw crypto\n");
 		goto err;
 	}

From a8b1de7f4f68d558fb952e765aa25e90b50a2388 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 31 May 2021 16:07:39 +0800
Subject: [PATCH 1801/3292] ath10k: remove the repeated declaration

Functions 'ath10k_pci_free_pipes' and 'ath10k_wmi_alloc_skb'
are declared twice in their header file, so remove the repeated
declaration.

Cc: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1622448459-50805-1-git-send-email-zhangshaokun@hisilicon.com
---
 drivers/net/wireless/ath/ath10k/pci.h | 1 -
 drivers/net/wireless/ath/ath10k/wmi.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 862d0901c5b8..cf64898b9447 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -235,7 +235,6 @@ u16 ath10k_pci_hif_get_free_queue_number(struct ath10k *ar, u8 pipe);
 void ath10k_pci_hif_power_down(struct ath10k *ar);
 int ath10k_pci_alloc_pipes(struct ath10k *ar);
 void ath10k_pci_free_pipes(struct ath10k *ar);
-void ath10k_pci_free_pipes(struct ath10k *ar);
 void ath10k_pci_rx_replenish_retry(struct timer_list *t);
 void ath10k_pci_ce_deinit(struct ath10k *ar);
 void ath10k_pci_init_napi(struct ath10k *ar);
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index dd980c81793e..41c1a3d339c2 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -7418,7 +7418,6 @@ int ath10k_wmi_wait_for_unified_ready(struct ath10k *ar);
 struct sk_buff *ath10k_wmi_alloc_skb(struct ath10k *ar, u32 len);
 int ath10k_wmi_connect(struct ath10k *ar);
 
-struct sk_buff *ath10k_wmi_alloc_skb(struct ath10k *ar, u32 len);
 int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id);
 int ath10k_wmi_cmd_send_nowait(struct ath10k *ar, struct sk_buff *skb,
 			       u32 cmd_id);

From ea1c2023efbc268f3d96b09e945af9648723d393 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 5 Jun 2021 19:02:27 +0800
Subject: [PATCH 1802/3292] ath10k: Use
 devm_platform_get_and_ioremap_resource()

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210605110227.2429420-1-yangyingliang@huawei.com
---
 drivers/net/wireless/ath/ath10k/ahb.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c
index 869524852fba..ab8f77ae5e66 100644
--- a/drivers/net/wireless/ath/ath10k/ahb.c
+++ b/drivers/net/wireless/ath/ath10k/ahb.c
@@ -442,14 +442,7 @@ static int ath10k_ahb_resource_init(struct ath10k *ar)
 
 	pdev = ar_ahb->pdev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		ath10k_err(ar, "failed to get memory resource\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
-	ar_ahb->mem = devm_ioremap_resource(&pdev->dev, res);
+	ar_ahb->mem = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(ar_ahb->mem)) {
 		ath10k_err(ar, "mem ioremap error\n");
 		ret = PTR_ERR(ar_ahb->mem);

From 979ebc54cf13bd1e3eb6e21766d208d5de984fb8 Mon Sep 17 00:00:00 2001
From: Seevalamuthu Mariappan <seevalam@codeaurora.org>
Date: Tue, 25 May 2021 15:30:28 +0200
Subject: [PATCH 1803/3292] ath11k: send beacon template after
 vdev_start/restart during csa

Firmware has added assert if beacon template is received after
vdev_down. Firmware expects beacon template after vdev_start
and before vdev_up. This change is needed to support MBSSID EMA
cases in firmware.

Hence, Change the sequence in ath11k as expected from firmware.
This new change is not causing any issues with older
firmware.

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.5.0.1.r3-00011-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.5.0.1.r4-00008-QCAHKSWPL_SILICONZ-1

Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices")
Signed-off-by: Seevalamuthu Mariappan <seevalam@codeaurora.org>
[sven@narfation.org: added tested-on/fixes information]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210525133028.2805615-1-sven@narfation.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 9d0ff150ec30..eb52332dbe3f 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -5379,11 +5379,6 @@ ath11k_mac_update_vif_chan(struct ath11k *ar,
 		if (WARN_ON(!arvif->is_up))
 			continue;
 
-		ret = ath11k_mac_setup_bcn_tmpl(arvif);
-		if (ret)
-			ath11k_warn(ab, "failed to update bcn tmpl during csa: %d\n",
-				    ret);
-
 		ret = ath11k_mac_vdev_restart(arvif, &vifs[i].new_ctx->def);
 		if (ret) {
 			ath11k_warn(ab, "failed to restart vdev %d: %d\n",
@@ -5391,6 +5386,11 @@ ath11k_mac_update_vif_chan(struct ath11k *ar,
 			continue;
 		}
 
+		ret = ath11k_mac_setup_bcn_tmpl(arvif);
+		if (ret)
+			ath11k_warn(ab, "failed to update bcn tmpl during csa: %d\n",
+				    ret);
+
 		ret = ath11k_wmi_vdev_up(arvif->ar, arvif->vdev_id, arvif->aid,
 					 arvif->bssid);
 		if (ret) {

From 75596eabd6e46e5afc31568f6f4e4c0c12a8906c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 26 May 2021 18:52:56 +0800
Subject: [PATCH 1804/3292] ath6kl: Fix inconsistent indenting

Eliminate the follow smatch warning:

drivers/net/wireless/ath/ath6kl/cfg80211.c:3308
ath6kl_cfg80211_sscan_start() warn: inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1622026376-68524-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 29527e8dcced..fefdc6753acd 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -3303,8 +3303,8 @@ static int ath6kl_cfg80211_sscan_start(struct wiphy *wiphy,
 		if (ret < 0)
 			return ret;
 	} else {
-		 ret = ath6kl_wmi_bssfilter_cmd(ar->wmi, vif->fw_vif_idx,
-						MATCHED_SSID_FILTER, 0);
+		ret = ath6kl_wmi_bssfilter_cmd(ar->wmi, vif->fw_vif_idx,
+					       MATCHED_SSID_FILTER, 0);
 		if (ret < 0)
 			return ret;
 	}

From 8f78caa2264ece71c2e207cba023f28ab6665138 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 26 Apr 2021 21:29:30 +0200
Subject: [PATCH 1805/3292] wil6210: remove erroneous wiphy locking

We already hold the wiphy lock in all cases when we get
here, so this would deadlock, remove the erroneous locking.

Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210426212929.83f1de07c2cd.I630a2a00eff185ba0452324b3d3f645e01128a95@changeid
---
 drivers/net/wireless/ath/wil6210/cfg80211.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 6746fd206d2a..1ff2679963f0 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -2842,9 +2842,7 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil)
 	wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
 	mutex_unlock(&wil->vif_mutex);
 	if (p2p_wdev) {
-		wiphy_lock(wil->wiphy);
 		cfg80211_unregister_wdev(p2p_wdev);
-		wiphy_unlock(wil->wiphy);
 		kfree(p2p_wdev);
 	}
 }

From f9ac779f881c2ec3d1cdcd7fa9d4f9442bf60e80 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:14 +0900
Subject: [PATCH 1806/3292] net: Introduce net.ipv4.tcp_migrate_req.

This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this
option is enabled or eBPF program is attached, we will be able to migrate
child sockets from a listener to another in the same reuseport group after
close() or shutdown() syscalls.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-2-kuniyu@amazon.co.jp
---
 Documentation/networking/ip-sysctl.rst | 25 +++++++++++++++++++++++++
 include/net/netns/ipv4.h               |  1 +
 net/ipv4/sysctl_net_ipv4.c             |  9 +++++++++
 3 files changed, 35 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index a5c250044500..b0436d3a4f11 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -761,6 +761,31 @@ tcp_syncookies - INTEGER
 	network connections you can set this knob to 2 to enable
 	unconditionally generation of syncookies.
 
+tcp_migrate_req - BOOLEAN
+	The incoming connection is tied to a specific listening socket when
+	the initial SYN packet is received during the three-way handshake.
+	When a listener is closed, in-flight request sockets during the
+	handshake and established sockets in the accept queue are aborted.
+
+	If the listener has SO_REUSEPORT enabled, other listeners on the
+	same port should have been able to accept such connections. This
+	option makes it possible to migrate such child sockets to another
+	listener after close() or shutdown().
+
+	The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should
+	usually be used to define the policy to pick an alive listener.
+	Otherwise, the kernel will randomly pick an alive listener only if
+	this option is enabled.
+
+	Note that migration between listeners with different settings may
+	crash applications. Let's say migration happens from listener A to
+	B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from
+	the requests migrated from A. To avoid such a situation, cancel
+	migration by returning SK_DROP in the type of eBPF program, or
+	disable this option.
+
+	Default: 0
+
 tcp_fastopen - INTEGER
 	Enable TCP Fast Open (RFC7413) to send and accept data in the opening
 	SYN packet.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 746c80cd4257..b8620519eace 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -126,6 +126,7 @@ struct netns_ipv4 {
 	u8 sysctl_tcp_syn_retries;
 	u8 sysctl_tcp_synack_retries;
 	u8 sysctl_tcp_syncookies;
+	u8 sysctl_tcp_migrate_req;
 	int sysctl_tcp_reordering;
 	u8 sysctl_tcp_retries1;
 	u8 sysctl_tcp_retries2;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4fa77f182dcb..6f1e64d49232 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -960,6 +960,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dou8vec_minmax,
 	},
 #endif
+	{
+		.procname	= "tcp_migrate_req",
+		.data		= &init_net.ipv4.sysctl_tcp_migrate_req,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE
+	},
 	{
 		.procname	= "tcp_reordering",
 		.data		= &init_net.ipv4.sysctl_tcp_reordering,

From 5c040eaf5d1753aafe12989ca712175df0b9c436 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:15 +0900
Subject: [PATCH 1807/3292] tcp: Add num_closed_socks to struct sock_reuseport.

As noted in the following commit, a closed listener has to hold the
reference to the reuseport group for socket migration. This patch adds a
field (num_closed_socks) to struct sock_reuseport to manage closed sockets
within the same reuseport group. Moreover, this and the following commits
introduce some helper functions to split socks[] into two sections and keep
TCP_LISTEN and TCP_CLOSE sockets in each section. Like a double-ended
queue, we will place TCP_LISTEN sockets from the front and TCP_CLOSE
sockets from the end.

  TCP_LISTEN---------->       <-------TCP_CLOSE
  +---+---+  ---  +---+  ---  +---+  ---  +---+
  | 0 | 1 |  ...  | i |  ...  | j |  ...  | k |
  +---+---+  ---  +---+  ---  +---+  ---  +---+

  i = num_socks - 1
  j = max_socks - num_closed_socks
  k = max_socks - 1

This patch also extends reuseport_add_sock() and reuseport_grow() to
support num_closed_socks.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-3-kuniyu@amazon.co.jp
---
 include/net/sock_reuseport.h |  5 ++-
 net/core/sock_reuseport.c    | 75 +++++++++++++++++++++++++++---------
 2 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 505f1e18e9bf..0e558ca7afbf 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -13,8 +13,9 @@ extern spinlock_t reuseport_lock;
 struct sock_reuseport {
 	struct rcu_head		rcu;
 
-	u16			max_socks;	/* length of socks */
-	u16			num_socks;	/* elements in socks */
+	u16			max_socks;		/* length of socks */
+	u16			num_socks;		/* elements in socks */
+	u16			num_closed_socks;	/* closed elements in socks */
 	/* The last synq overflow event timestamp of this
 	 * reuse->socks[] group.
 	 */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index b065f0a103ed..f478c65a281b 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -18,6 +18,49 @@ DEFINE_SPINLOCK(reuseport_lock);
 
 static DEFINE_IDA(reuseport_ida);
 
+static int reuseport_sock_index(struct sock *sk,
+				const struct sock_reuseport *reuse,
+				bool closed)
+{
+	int left, right;
+
+	if (!closed) {
+		left = 0;
+		right = reuse->num_socks;
+	} else {
+		left = reuse->max_socks - reuse->num_closed_socks;
+		right = reuse->max_socks;
+	}
+
+	for (; left < right; left++)
+		if (reuse->socks[left] == sk)
+			return left;
+	return -1;
+}
+
+static void __reuseport_add_sock(struct sock *sk,
+				 struct sock_reuseport *reuse)
+{
+	reuse->socks[reuse->num_socks] = sk;
+	/* paired with smp_rmb() in reuseport_select_sock() */
+	smp_wmb();
+	reuse->num_socks++;
+}
+
+static bool __reuseport_detach_sock(struct sock *sk,
+				    struct sock_reuseport *reuse)
+{
+	int i = reuseport_sock_index(sk, reuse, false);
+
+	if (i == -1)
+		return false;
+
+	reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+	reuse->num_socks--;
+
+	return true;
+}
+
 static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 {
 	unsigned int size = sizeof(struct sock_reuseport) +
@@ -72,9 +115,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
 	}
 
 	reuse->reuseport_id = id;
+	reuse->bind_inany = bind_inany;
 	reuse->socks[0] = sk;
 	reuse->num_socks = 1;
-	reuse->bind_inany = bind_inany;
 	rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 
 out:
@@ -98,6 +141,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 		return NULL;
 
 	more_reuse->num_socks = reuse->num_socks;
+	more_reuse->num_closed_socks = reuse->num_closed_socks;
 	more_reuse->prog = reuse->prog;
 	more_reuse->reuseport_id = reuse->reuseport_id;
 	more_reuse->bind_inany = reuse->bind_inany;
@@ -105,9 +149,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 
 	memcpy(more_reuse->socks, reuse->socks,
 	       reuse->num_socks * sizeof(struct sock *));
+	memcpy(more_reuse->socks +
+	       (more_reuse->max_socks - more_reuse->num_closed_socks),
+	       reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
+	       reuse->num_closed_socks * sizeof(struct sock *));
 	more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
 
-	for (i = 0; i < reuse->num_socks; ++i)
+	for (i = 0; i < reuse->max_socks; ++i)
 		rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
 				   more_reuse);
 
@@ -158,7 +206,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 		return -EBUSY;
 	}
 
-	if (reuse->num_socks == reuse->max_socks) {
+	if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
 		reuse = reuseport_grow(reuse);
 		if (!reuse) {
 			spin_unlock_bh(&reuseport_lock);
@@ -166,10 +214,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 		}
 	}
 
-	reuse->socks[reuse->num_socks] = sk;
-	/* paired with smp_rmb() in reuseport_select_sock() */
-	smp_wmb();
-	reuse->num_socks++;
+	__reuseport_add_sock(sk, reuse);
 	rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 
 	spin_unlock_bh(&reuseport_lock);
@@ -183,7 +228,6 @@ EXPORT_SYMBOL(reuseport_add_sock);
 void reuseport_detach_sock(struct sock *sk)
 {
 	struct sock_reuseport *reuse;
-	int i;
 
 	spin_lock_bh(&reuseport_lock);
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
@@ -200,16 +244,11 @@ void reuseport_detach_sock(struct sock *sk)
 	bpf_sk_reuseport_detach(sk);
 
 	rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
+	__reuseport_detach_sock(sk, reuse);
+
+	if (reuse->num_socks + reuse->num_closed_socks == 0)
+		call_rcu(&reuse->rcu, reuseport_free_rcu);
 
-	for (i = 0; i < reuse->num_socks; i++) {
-		if (reuse->socks[i] == sk) {
-			reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
-			reuse->num_socks--;
-			if (reuse->num_socks == 0)
-				call_rcu(&reuse->rcu, reuseport_free_rcu);
-			break;
-		}
-	}
 	spin_unlock_bh(&reuseport_lock);
 }
 EXPORT_SYMBOL(reuseport_detach_sock);
@@ -274,7 +313,7 @@ struct sock *reuseport_select_sock(struct sock *sk,
 	prog = rcu_dereference(reuse->prog);
 	socks = READ_ONCE(reuse->num_socks);
 	if (likely(socks)) {
-		/* paired with smp_wmb() in reuseport_add_sock() */
+		/* paired with smp_wmb() in __reuseport_add_sock() */
 		smp_rmb();
 
 		if (!prog || !skb)

From 333bb73f620e1a5f2e0b8df2c0d25300fab36d89 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:16 +0900
Subject: [PATCH 1808/3292] tcp: Keep TCP_CLOSE sockets in the reuseport group.

When we close a listening socket, to migrate its connections to another
listener in the same reuseport group, we have to handle two kinds of child
sockets. One is that a listening socket has a reference to, and the other
is not.

The former is the TCP_ESTABLISHED/TCP_SYN_RECV sockets, and they are in the
accept queue of their listening socket. So we can pop them out and push
them into another listener's queue at close() or shutdown() syscalls. On
the other hand, the latter, the TCP_NEW_SYN_RECV socket is during the
three-way handshake and not in the accept queue. Thus, we cannot access
such sockets at close() or shutdown() syscalls. Accordingly, we have to
migrate immature sockets after their listening socket has been closed.

Currently, if their listening socket has been closed, TCP_NEW_SYN_RECV
sockets are freed at receiving the final ACK or retransmitting SYN+ACKs. At
that time, if we could select a new listener from the same reuseport group,
no connection would be aborted. However, we cannot do that because
reuseport_detach_sock() sets NULL to sk_reuseport_cb and forbids access to
the reuseport group from closed sockets.

This patch allows TCP_CLOSE sockets to remain in the reuseport group and
access it while any child socket references them. The point is that
reuseport_detach_sock() was called twice from inet_unhash() and
sk_destruct(). This patch replaces the first reuseport_detach_sock() with
reuseport_stop_listen_sock(), which checks if the reuseport group is
capable of migration. If capable, it decrements num_socks, moves the socket
backwards in socks[] and increments num_closed_socks. When all connections
are migrated, sk_destruct() calls reuseport_detach_sock() to remove the
socket from socks[], decrement num_closed_socks, and set NULL to
sk_reuseport_cb.

By this change, closed or shutdowned sockets can keep sk_reuseport_cb.
Consequently, calling listen() after shutdown() can cause EADDRINUSE or
EBUSY in inet_csk_bind_conflict() or reuseport_add_sock() which expects
such sockets not to have the reuseport group. Therefore, this patch also
loosens such validation rules so that a socket can listen again if it has a
reuseport group with num_closed_socks more than 0.

When such sockets listen again, we handle them in reuseport_resurrect(). If
there is an existing reuseport group (reuseport_add_sock() path), we move
the socket from the old group to the new one and free the old one if
necessary. If there is no existing group (reuseport_alloc() path), we
allocate a new reuseport group, detach sk from the old one, and free it if
necessary, not to break the current shutdown behaviour:

  - we cannot carry over the eBPF prog of shutdowned sockets
  - we cannot attach/detach an eBPF prog to/from listening sockets via
    shutdowned sockets

Note that when the number of sockets gets over U16_MAX, we try to detach a
closed socket randomly to make room for the new listening socket in
reuseport_grow().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-4-kuniyu@amazon.co.jp
---
 include/net/sock_reuseport.h    |   1 +
 net/core/sock_reuseport.c       | 182 ++++++++++++++++++++++++++++++--
 net/ipv4/inet_connection_sock.c |  12 ++-
 net/ipv4/inet_hashtables.c      |   2 +-
 4 files changed, 186 insertions(+), 11 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 0e558ca7afbf..1333d0cddfbc 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -32,6 +32,7 @@ extern int reuseport_alloc(struct sock *sk, bool bind_inany);
 extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
 			      bool bind_inany);
 extern void reuseport_detach_sock(struct sock *sk);
+void reuseport_stop_listen_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
 					  struct sk_buff *skb,
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index f478c65a281b..41fcd55ab5ae 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -17,6 +17,8 @@
 DEFINE_SPINLOCK(reuseport_lock);
 
 static DEFINE_IDA(reuseport_ida);
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+			       struct sock_reuseport *reuse, bool bind_inany);
 
 static int reuseport_sock_index(struct sock *sk,
 				const struct sock_reuseport *reuse,
@@ -61,6 +63,29 @@ static bool __reuseport_detach_sock(struct sock *sk,
 	return true;
 }
 
+static void __reuseport_add_closed_sock(struct sock *sk,
+					struct sock_reuseport *reuse)
+{
+	reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk;
+	/* paired with READ_ONCE() in inet_csk_bind_conflict() */
+	WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1);
+}
+
+static bool __reuseport_detach_closed_sock(struct sock *sk,
+					   struct sock_reuseport *reuse)
+{
+	int i = reuseport_sock_index(sk, reuse, true);
+
+	if (i == -1)
+		return false;
+
+	reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+	/* paired with READ_ONCE() in inet_csk_bind_conflict() */
+	WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1);
+
+	return true;
+}
+
 static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 {
 	unsigned int size = sizeof(struct sock_reuseport) +
@@ -92,6 +117,12 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
 	if (reuse) {
+		if (reuse->num_closed_socks) {
+			/* sk was shutdown()ed before */
+			ret = reuseport_resurrect(sk, reuse, NULL, bind_inany);
+			goto out;
+		}
+
 		/* Only set reuse->bind_inany if the bind_inany is true.
 		 * Otherwise, it will overwrite the reuse->bind_inany
 		 * which was set by the bind/hash path.
@@ -133,8 +164,23 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 	u32 more_socks_size, i;
 
 	more_socks_size = reuse->max_socks * 2U;
-	if (more_socks_size > U16_MAX)
+	if (more_socks_size > U16_MAX) {
+		if (reuse->num_closed_socks) {
+			/* Make room by removing a closed sk.
+			 * The child has already been migrated.
+			 * Only reqsk left at this point.
+			 */
+			struct sock *sk;
+
+			sk = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+			RCU_INIT_POINTER(sk->sk_reuseport_cb, NULL);
+			__reuseport_detach_closed_sock(sk, reuse);
+
+			return reuse;
+		}
+
 		return NULL;
+	}
 
 	more_reuse = __reuseport_alloc(more_socks_size);
 	if (!more_reuse)
@@ -200,7 +246,15 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 	reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
 	old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
-					     lockdep_is_held(&reuseport_lock));
+					      lockdep_is_held(&reuseport_lock));
+	if (old_reuse && old_reuse->num_closed_socks) {
+		/* sk was shutdown()ed before */
+		int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany);
+
+		spin_unlock_bh(&reuseport_lock);
+		return err;
+	}
+
 	if (old_reuse && old_reuse->num_socks != 1) {
 		spin_unlock_bh(&reuseport_lock);
 		return -EBUSY;
@@ -225,6 +279,65 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 }
 EXPORT_SYMBOL(reuseport_add_sock);
 
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+			       struct sock_reuseport *reuse, bool bind_inany)
+{
+	if (old_reuse == reuse) {
+		/* If sk was in the same reuseport group, just pop sk out of
+		 * the closed section and push sk into the listening section.
+		 */
+		__reuseport_detach_closed_sock(sk, old_reuse);
+		__reuseport_add_sock(sk, old_reuse);
+		return 0;
+	}
+
+	if (!reuse) {
+		/* In bind()/listen() path, we cannot carry over the eBPF prog
+		 * for the shutdown()ed socket. In setsockopt() path, we should
+		 * not change the eBPF prog of listening sockets by attaching a
+		 * prog to the shutdown()ed socket. Thus, we will allocate a new
+		 * reuseport group and detach sk from the old group.
+		 */
+		int id;
+
+		reuse = __reuseport_alloc(INIT_SOCKS);
+		if (!reuse)
+			return -ENOMEM;
+
+		id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
+		if (id < 0) {
+			kfree(reuse);
+			return id;
+		}
+
+		reuse->reuseport_id = id;
+		reuse->bind_inany = bind_inany;
+	} else {
+		/* Move sk from the old group to the new one if
+		 * - all the other listeners in the old group were close()d or
+		 *   shutdown()ed, and then sk2 has listen()ed on the same port
+		 * OR
+		 * - sk listen()ed without bind() (or with autobind), was
+		 *   shutdown()ed, and then listen()s on another port which
+		 *   sk2 listen()s on.
+		 */
+		if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
+			reuse = reuseport_grow(reuse);
+			if (!reuse)
+				return -ENOMEM;
+		}
+	}
+
+	__reuseport_detach_closed_sock(sk, old_reuse);
+	__reuseport_add_sock(sk, reuse);
+	rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+	if (old_reuse->num_socks + old_reuse->num_closed_socks == 0)
+		call_rcu(&old_reuse->rcu, reuseport_free_rcu);
+
+	return 0;
+}
+
 void reuseport_detach_sock(struct sock *sk)
 {
 	struct sock_reuseport *reuse;
@@ -233,6 +346,10 @@ void reuseport_detach_sock(struct sock *sk)
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
 
+	/* reuseport_grow() has detached a closed sk */
+	if (!reuse)
+		goto out;
+
 	/* Notify the bpf side. The sk may be added to a sockarray
 	 * map. If so, sockarray logic will remove it from the map.
 	 *
@@ -244,15 +361,49 @@ void reuseport_detach_sock(struct sock *sk)
 	bpf_sk_reuseport_detach(sk);
 
 	rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
-	__reuseport_detach_sock(sk, reuse);
+
+	if (!__reuseport_detach_closed_sock(sk, reuse))
+		__reuseport_detach_sock(sk, reuse);
 
 	if (reuse->num_socks + reuse->num_closed_socks == 0)
 		call_rcu(&reuse->rcu, reuseport_free_rcu);
 
+out:
 	spin_unlock_bh(&reuseport_lock);
 }
 EXPORT_SYMBOL(reuseport_detach_sock);
 
+void reuseport_stop_listen_sock(struct sock *sk)
+{
+	if (sk->sk_protocol == IPPROTO_TCP) {
+		struct sock_reuseport *reuse;
+
+		spin_lock_bh(&reuseport_lock);
+
+		reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+						  lockdep_is_held(&reuseport_lock));
+
+		if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) {
+			/* Migration capable, move sk from the listening section
+			 * to the closed section.
+			 */
+			bpf_sk_reuseport_detach(sk);
+
+			__reuseport_detach_sock(sk, reuse);
+			__reuseport_add_closed_sock(sk, reuse);
+
+			spin_unlock_bh(&reuseport_lock);
+			return;
+		}
+
+		spin_unlock_bh(&reuseport_lock);
+	}
+
+	/* Not capable to do migration, detach immediately */
+	reuseport_detach_sock(sk);
+}
+EXPORT_SYMBOL(reuseport_stop_listen_sock);
+
 static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
 				   struct bpf_prog *prog, struct sk_buff *skb,
 				   int hdr_len)
@@ -352,9 +503,13 @@ int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
 	struct sock_reuseport *reuse;
 	struct bpf_prog *old_prog;
 
-	if (sk_unhashed(sk) && sk->sk_reuseport) {
-		int err = reuseport_alloc(sk, false);
+	if (sk_unhashed(sk)) {
+		int err;
 
+		if (!sk->sk_reuseport)
+			return -EINVAL;
+
+		err = reuseport_alloc(sk, false);
 		if (err)
 			return err;
 	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -380,13 +535,24 @@ int reuseport_detach_prog(struct sock *sk)
 	struct sock_reuseport *reuse;
 	struct bpf_prog *old_prog;
 
-	if (!rcu_access_pointer(sk->sk_reuseport_cb))
-		return sk->sk_reuseport ? -ENOENT : -EINVAL;
-
 	old_prog = NULL;
 	spin_lock_bh(&reuseport_lock);
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
+
+	/* reuse must be checked after acquiring the reuseport_lock
+	 * because reuseport_grow() can detach a closed sk.
+	 */
+	if (!reuse) {
+		spin_unlock_bh(&reuseport_lock);
+		return sk->sk_reuseport ? -ENOENT : -EINVAL;
+	}
+
+	if (sk_unhashed(sk) && reuse->num_closed_socks) {
+		spin_unlock_bh(&reuseport_lock);
+		return -ENOENT;
+	}
+
 	old_prog = rcu_replace_pointer(reuse->prog, old_prog,
 				       lockdep_is_held(&reuseport_lock));
 	spin_unlock_bh(&reuseport_lock);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fd472eae4f5c..fa806e9167ec 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -135,10 +135,18 @@ static int inet_csk_bind_conflict(const struct sock *sk,
 				  bool relax, bool reuseport_ok)
 {
 	struct sock *sk2;
+	bool reuseport_cb_ok;
 	bool reuse = sk->sk_reuse;
 	bool reuseport = !!sk->sk_reuseport;
+	struct sock_reuseport *reuseport_cb;
 	kuid_t uid = sock_i_uid((struct sock *)sk);
 
+	rcu_read_lock();
+	reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
+	/* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
+	reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
+	rcu_read_unlock();
+
 	/*
 	 * Unlike other sk lookup places we do not check
 	 * for sk_net here, since _all_ the socks listed
@@ -156,14 +164,14 @@ static int inet_csk_bind_conflict(const struct sock *sk,
 				if ((!relax ||
 				     (!reuseport_ok &&
 				      reuseport && sk2->sk_reuseport &&
-				      !rcu_access_pointer(sk->sk_reuseport_cb) &&
+				      reuseport_cb_ok &&
 				      (sk2->sk_state == TCP_TIME_WAIT ||
 				       uid_eq(uid, sock_i_uid(sk2))))) &&
 				    inet_rcv_saddr_equal(sk, sk2, true))
 					break;
 			} else if (!reuseport_ok ||
 				   !reuseport || !sk2->sk_reuseport ||
-				   rcu_access_pointer(sk->sk_reuseport_cb) ||
+				   !reuseport_cb_ok ||
 				   (sk2->sk_state != TCP_TIME_WAIT &&
 				    !uid_eq(uid, sock_i_uid(sk2)))) {
 				if (inet_rcv_saddr_equal(sk, sk2, true))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c96866a53a66..80aeaf9e6e16 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -697,7 +697,7 @@ void inet_unhash(struct sock *sk)
 		goto unlock;
 
 	if (rcu_access_pointer(sk->sk_reuseport_cb))
-		reuseport_detach_sock(sk);
+		reuseport_stop_listen_sock(sk);
 	if (ilb) {
 		inet_unhash2(hashinfo, sk);
 		ilb->count--;

From 1cd62c21572c1df6e7090ea4cabf4cf509616dbb Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:17 +0900
Subject: [PATCH 1809/3292] tcp: Add reuseport_migrate_sock() to select a new
 listener.

reuseport_migrate_sock() does the same check done in
reuseport_listen_stop_sock(). If the reuseport group is capable of
migration, reuseport_migrate_sock() selects a new listener by the child
socket hash and increments the listener's sk_refcnt beforehand. Thus, if we
fail in the migration, we have to decrement it later.

We will support migration by eBPF in the later commits.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-5-kuniyu@amazon.co.jp
---
 include/net/sock_reuseport.h |  3 ++
 net/core/sock_reuseport.c    | 78 +++++++++++++++++++++++++++++-------
 2 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 1333d0cddfbc..473b0b0fa4ab 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -37,6 +37,9 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
 					  struct sk_buff *skb,
 					  int hdr_len);
+struct sock *reuseport_migrate_sock(struct sock *sk,
+				    struct sock *migrating_sk,
+				    struct sk_buff *skb);
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 41fcd55ab5ae..b239f8cd9d39 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -44,7 +44,7 @@ static void __reuseport_add_sock(struct sock *sk,
 				 struct sock_reuseport *reuse)
 {
 	reuse->socks[reuse->num_socks] = sk;
-	/* paired with smp_rmb() in reuseport_select_sock() */
+	/* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
 	smp_wmb();
 	reuse->num_socks++;
 }
@@ -434,6 +434,23 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
 	return reuse->socks[index];
 }
 
+static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
+						  u32 hash, u16 num_socks)
+{
+	int i, j;
+
+	i = j = reciprocal_scale(hash, num_socks);
+	while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+		i++;
+		if (i >= num_socks)
+			i = 0;
+		if (i == j)
+			return NULL;
+	}
+
+	return reuse->socks[i];
+}
+
 /**
  *  reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
  *  @sk: First socket in the group.
@@ -477,19 +494,8 @@ struct sock *reuseport_select_sock(struct sock *sk,
 
 select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2) {
-			int i, j;
-
-			i = j = reciprocal_scale(hash, socks);
-			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
-				i++;
-				if (i >= socks)
-					i = 0;
-				if (i == j)
-					goto out;
-			}
-			sk2 = reuse->socks[i];
-		}
+		if (!sk2)
+			sk2 = reuseport_select_sock_by_hash(reuse, hash, socks);
 	}
 
 out:
@@ -498,6 +504,50 @@ out:
 }
 EXPORT_SYMBOL(reuseport_select_sock);
 
+/**
+ *  reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group.
+ *  @sk: close()ed or shutdown()ed socket in the group.
+ *  @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or
+ *    NEW_SYN_RECV request socket during 3WHS.
+ *  @skb: skb to run through BPF filter.
+ *  Returns a socket (with sk_refcnt +1) that should accept the child socket
+ *  (or NULL on error).
+ */
+struct sock *reuseport_migrate_sock(struct sock *sk,
+				    struct sock *migrating_sk,
+				    struct sk_buff *skb)
+{
+	struct sock_reuseport *reuse;
+	struct sock *nsk = NULL;
+	u16 socks;
+	u32 hash;
+
+	rcu_read_lock();
+
+	reuse = rcu_dereference(sk->sk_reuseport_cb);
+	if (!reuse)
+		goto out;
+
+	socks = READ_ONCE(reuse->num_socks);
+	if (unlikely(!socks))
+		goto out;
+
+	/* paired with smp_wmb() in __reuseport_add_sock() */
+	smp_rmb();
+
+	hash = migrating_sk->sk_hash;
+	if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+		nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
+
+	if (nsk && unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
+		nsk = NULL;
+
+out:
+	rcu_read_unlock();
+	return nsk;
+}
+EXPORT_SYMBOL(reuseport_migrate_sock);
+
 int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
 {
 	struct sock_reuseport *reuse;

From 54b92e84193749c9968aff2dd46e3b0f42643e18 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:18 +0900
Subject: [PATCH 1810/3292] tcp: Migrate TCP_ESTABLISHED/TCP_SYN_RECV sockets
 in accept queues.

When we call close() or shutdown() for listening sockets, each child socket
in the accept queue are freed at inet_csk_listen_stop(). If we can get a
new listener by reuseport_migrate_sock() and clone the request by
inet_reqsk_clone(), we try to add it into the new listener's accept queue
by inet_csk_reqsk_queue_add(). If it fails, we have to call __reqsk_free()
to call sock_put() for its listener and free the cloned request.

After putting the full socket into ehash, tcp_v[46]_syn_recv_sock() sets
NULL to ireq_opt/pktopts in struct inet_request_sock, but ipv6_opt can be
non-NULL. So, we have to set NULL to ipv6_opt of the old request to avoid
double free.

Note that we do not update req->rsk_listener and instead clone the req to
migrate because another path may reference the original request. If we
protected it by RCU, we would need to add rcu_read_lock() in many places.

Suggested-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/netdev/20201209030903.hhow5r53l6fmozjn@kafai-mbp.dhcp.thefacebook.com/
Link: https://lore.kernel.org/bpf/20210612123224.12525-6-kuniyu@amazon.co.jp
---
 net/ipv4/inet_connection_sock.c | 70 ++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fa806e9167ec..08878ef1bc70 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -695,6 +695,52 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
 }
 EXPORT_SYMBOL(inet_rtx_syn_ack);
 
+static struct request_sock *inet_reqsk_clone(struct request_sock *req,
+					     struct sock *sk)
+{
+	struct sock *req_sk, *nreq_sk;
+	struct request_sock *nreq;
+
+	nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
+	if (!nreq) {
+		/* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
+		sock_put(sk);
+		return NULL;
+	}
+
+	req_sk = req_to_sk(req);
+	nreq_sk = req_to_sk(nreq);
+
+	memcpy(nreq_sk, req_sk,
+	       offsetof(struct sock, sk_dontcopy_begin));
+	memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
+	       req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
+	sk_node_init(&nreq_sk->sk_node);
+	nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
+#ifdef CONFIG_XPS
+	nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
+#endif
+	nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
+
+	nreq->rsk_listener = sk;
+
+	/* We need not acquire fastopenq->lock
+	 * because the child socket is locked in inet_csk_listen_stop().
+	 */
+	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
+		rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
+
+	return nreq;
+}
+
+static void reqsk_migrate_reset(struct request_sock *req)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	inet_rsk(req)->ipv6_opt = NULL;
+#endif
+}
+
 /* return true if req was found in the ehash table */
 static bool reqsk_queue_unlink(struct request_sock *req)
 {
@@ -1036,14 +1082,36 @@ void inet_csk_listen_stop(struct sock *sk)
 	 * of the variants now.			--ANK
 	 */
 	while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
-		struct sock *child = req->sk;
+		struct sock *child = req->sk, *nsk;
+		struct request_sock *nreq;
 
 		local_bh_disable();
 		bh_lock_sock(child);
 		WARN_ON(sock_owned_by_user(child));
 		sock_hold(child);
 
+		nsk = reuseport_migrate_sock(sk, child, NULL);
+		if (nsk) {
+			nreq = inet_reqsk_clone(req, nsk);
+			if (nreq) {
+				refcount_set(&nreq->rsk_refcnt, 1);
+
+				if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
+					reqsk_migrate_reset(req);
+				} else {
+					reqsk_migrate_reset(nreq);
+					__reqsk_free(nreq);
+				}
+
+				/* inet_csk_reqsk_queue_add() has already
+				 * called inet_child_forget() on failure case.
+				 */
+				goto skip_child_forget;
+			}
+		}
+
 		inet_child_forget(sk, req, child);
+skip_child_forget:
 		reqsk_put(req);
 		bh_unlock_sock(child);
 		local_bh_enable();

From c905dee62232db583b50fe214080b98db623151e Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:19 +0900
Subject: [PATCH 1811/3292] tcp: Migrate TCP_NEW_SYN_RECV requests at
 retransmitting SYN+ACKs.

As with the preceding patch, this patch changes reqsk_timer_handler() to
call reuseport_migrate_sock() and inet_reqsk_clone() to migrate in-flight
requests at retransmitting SYN+ACKs. If we can select a new listener and
clone the request, we resume setting the SYN+ACK timer for the new req. If
we can set the timer, we call inet_ehash_insert() to unhash the old req and
put the new req into ehash.

The noteworthy point here is that by unhashing the old req, another CPU
processing it may lose the "own_req" race in tcp_v[46]_syn_recv_sock() and
drop the final ACK packet. However, the new timer will recover this
situation.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-7-kuniyu@amazon.co.jp
---
 net/ipv4/inet_connection_sock.c | 75 ++++++++++++++++++++++++++++++---
 1 file changed, 69 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 08878ef1bc70..f4b771e45ac1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -734,10 +734,22 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
 	return nreq;
 }
 
+static void reqsk_queue_migrated(struct request_sock_queue *queue,
+				 const struct request_sock *req)
+{
+	if (req->num_timeout == 0)
+		atomic_inc(&queue->young);
+	atomic_inc(&queue->qlen);
+}
+
 static void reqsk_migrate_reset(struct request_sock *req)
 {
+	req->saved_syn = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
 	inet_rsk(req)->ipv6_opt = NULL;
+	inet_rsk(req)->pktopts = NULL;
+#else
+	inet_rsk(req)->ireq_opt = NULL;
 #endif
 }
 
@@ -781,15 +793,39 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
 static void reqsk_timer_handler(struct timer_list *t)
 {
 	struct request_sock *req = from_timer(req, t, rsk_timer);
+	struct request_sock *nreq = NULL, *oreq = req;
 	struct sock *sk_listener = req->rsk_listener;
-	struct net *net = sock_net(sk_listener);
-	struct inet_connection_sock *icsk = inet_csk(sk_listener);
-	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+	struct inet_connection_sock *icsk;
+	struct request_sock_queue *queue;
+	struct net *net;
 	int max_syn_ack_retries, qlen, expire = 0, resend = 0;
 
-	if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
-		goto drop;
+	if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
+		struct sock *nsk;
 
+		nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
+		if (!nsk)
+			goto drop;
+
+		nreq = inet_reqsk_clone(req, nsk);
+		if (!nreq)
+			goto drop;
+
+		/* The new timer for the cloned req can decrease the 2
+		 * by calling inet_csk_reqsk_queue_drop_and_put(), so
+		 * hold another count to prevent use-after-free and
+		 * call reqsk_put() just before return.
+		 */
+		refcount_set(&nreq->rsk_refcnt, 2 + 1);
+		timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
+		reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
+
+		req = nreq;
+		sk_listener = nsk;
+	}
+
+	icsk = inet_csk(sk_listener);
+	net = sock_net(sk_listener);
 	max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
 	/* Normally all the openreqs are young and become mature
 	 * (i.e. converted to established socket) for first timeout.
@@ -808,6 +844,7 @@ static void reqsk_timer_handler(struct timer_list *t)
 	 * embrions; and abort old ones without pity, if old
 	 * ones are about to clog our table.
 	 */
+	queue = &icsk->icsk_accept_queue;
 	qlen = reqsk_queue_len(queue);
 	if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
 		int young = reqsk_queue_len_young(queue) << 1;
@@ -832,10 +869,36 @@ static void reqsk_timer_handler(struct timer_list *t)
 			atomic_dec(&queue->young);
 		timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
 		mod_timer(&req->rsk_timer, jiffies + timeo);
+
+		if (!nreq)
+			return;
+
+		if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
+			/* delete timer */
+			inet_csk_reqsk_queue_drop(sk_listener, nreq);
+			goto drop;
+		}
+
+		reqsk_migrate_reset(oreq);
+		reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
+		reqsk_put(oreq);
+
+		reqsk_put(nreq);
 		return;
 	}
+
 drop:
-	inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
+	/* Even if we can clone the req, we may need not retransmit any more
+	 * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
+	 * CPU may win the "own_req" race so that inet_ehash_insert() fails.
+	 */
+	if (nreq) {
+		reqsk_migrate_reset(nreq);
+		reqsk_queue_removed(queue, nreq);
+		__reqsk_free(nreq);
+	}
+
+	inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
 }
 
 static void reqsk_queue_hash_req(struct request_sock *req,

From d4f2c86b2b7e2e606e0868b38c8c6c49cc193a8e Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:20 +0900
Subject: [PATCH 1812/3292] tcp: Migrate TCP_NEW_SYN_RECV requests at receiving
 the final ACK.

This patch also changes the code to call reuseport_migrate_sock() and
inet_reqsk_clone(), but unlike the other cases, we do not call
inet_reqsk_clone() right after reuseport_migrate_sock().

Currently, in the receive path for TCP_NEW_SYN_RECV sockets, its listener
has three kinds of refcnt:

  (A) for listener itself
  (B) carried by reuqest_sock
  (C) sock_hold() in tcp_v[46]_rcv()

While processing the req, (A) may disappear by close(listener). Also, (B)
can disappear by accept(listener) once we put the req into the accept
queue. So, we have to hold another refcnt (C) for the listener to prevent
use-after-free.

For socket migration, we call reuseport_migrate_sock() to select a listener
with (A) and to increment the new listener's refcnt in tcp_v[46]_rcv().
This refcnt corresponds to (C) and is cleaned up later in tcp_v[46]_rcv().
Thus we have to take another refcnt (B) for the newly cloned request_sock.

In inet_csk_complete_hashdance(), we hold the count (B), clone the req, and
try to put the new req into the accept queue. By migrating req after
winning the "own_req" race, we can avoid such a worst situation:

  CPU 1 looks up req1
  CPU 2 looks up req1, unhashes it, then CPU 1 loses the race
  CPU 3 looks up req2, unhashes it, then CPU 2 loses the race
  ...

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-8-kuniyu@amazon.co.jp
---
 net/ipv4/inet_connection_sock.c | 34 ++++++++++++++++++++++++++++++---
 net/ipv4/tcp_ipv4.c             | 20 +++++++++++++------
 net/ipv4/tcp_minisocks.c        |  4 ++--
 net/ipv6/tcp_ipv6.c             | 14 +++++++++++---
 4 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f4b771e45ac1..0eea878edc30 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1114,12 +1114,40 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
 					 struct request_sock *req, bool own_req)
 {
 	if (own_req) {
-		inet_csk_reqsk_queue_drop(sk, req);
-		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-		if (inet_csk_reqsk_queue_add(sk, req, child))
+		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+		reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+
+		if (sk != req->rsk_listener) {
+			/* another listening sk has been selected,
+			 * migrate the req to it.
+			 */
+			struct request_sock *nreq;
+
+			/* hold a refcnt for the nreq->rsk_listener
+			 * which is assigned in inet_reqsk_clone()
+			 */
+			sock_hold(sk);
+			nreq = inet_reqsk_clone(req, sk);
+			if (!nreq) {
+				inet_child_forget(sk, req, child);
+				goto child_put;
+			}
+
+			refcount_set(&nreq->rsk_refcnt, 1);
+			if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
+				reqsk_migrate_reset(req);
+				reqsk_put(req);
+				return child;
+			}
+
+			reqsk_migrate_reset(nreq);
+			__reqsk_free(nreq);
+		} else if (inet_csk_reqsk_queue_add(sk, req, child)) {
 			return child;
+		}
 	}
 	/* Too bad, another child took ownership of the request, undo. */
+child_put:
 	bh_unlock_sock(child);
 	sock_put(child);
 	return NULL;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4f5b68a90be9..6cb8e269f1ab 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2002,13 +2002,21 @@ process:
 			goto csum_error;
 		}
 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
-			inet_csk_reqsk_queue_drop_and_put(sk, req);
-			goto lookup;
+			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+			if (!nsk) {
+				inet_csk_reqsk_queue_drop_and_put(sk, req);
+				goto lookup;
+			}
+			sk = nsk;
+			/* reuseport_migrate_sock() has already held one sk_refcnt
+			 * before returning.
+			 */
+		} else {
+			/* We own a reference on the listener, increase it again
+			 * as we might lose it too soon.
+			 */
+			sock_hold(sk);
 		}
-		/* We own a reference on the listener, increase it again
-		 * as we might lose it too soon.
-		 */
-		sock_hold(sk);
 		refcounted = true;
 		nsk = NULL;
 		if (!tcp_filter(sk, skb)) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7513ba45553d..f258a4c0da71 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -775,8 +775,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		goto listen_overflow;
 
 	if (own_req && rsk_drop_req(req)) {
-		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-		inet_csk_reqsk_queue_drop_and_put(sk, req);
+		reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+		inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
 		return child;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4435fa342e7a..4d71464094b3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1664,10 +1664,18 @@ process:
 			goto csum_error;
 		}
 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
-			inet_csk_reqsk_queue_drop_and_put(sk, req);
-			goto lookup;
+			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+			if (!nsk) {
+				inet_csk_reqsk_queue_drop_and_put(sk, req);
+				goto lookup;
+			}
+			sk = nsk;
+			/* reuseport_migrate_sock() has already held one sk_refcnt
+			 * before returning.
+			 */
+		} else {
+			sock_hold(sk);
 		}
-		sock_hold(sk);
 		refcounted = true;
 		nsk = NULL;
 		if (!tcp_filter(sk, skb)) {

From e061047684af63f2d4f1338ec73140f6e29eb59f Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:21 +0900
Subject: [PATCH 1813/3292] bpf: Support BPF_FUNC_get_socket_cookie() for
 BPF_PROG_TYPE_SK_REUSEPORT.

We will call sock_reuseport.prog for socket migration in the next commit,
so the eBPF program has to know which listener is closing to select a new
listener.

We can currently get a unique ID of each listener in the userspace by
calling bpf_map_lookup_elem() for BPF_MAP_TYPE_REUSEPORT_SOCKARRAY map.

This patch makes the pointer of sk available in sk_reuseport_md so that we
can get the ID by BPF_FUNC_get_socket_cookie() in the eBPF program.

Suggested-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/netdev/20201119001154.kapwihc2plp4f7zc@kafai-mbp.dhcp.thefacebook.com/
Link: https://lore.kernel.org/bpf/20210612123224.12525-9-kuniyu@amazon.co.jp
---
 include/uapi/linux/bpf.h       |  1 +
 net/core/filter.c              | 10 ++++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 3 files changed, 12 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2c1ba70abbf1..f3b72588442b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5416,6 +5416,7 @@ struct sk_reuseport_md {
 	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
 	__u32 bind_inany;	/* Is sock bound to an INANY address? */
 	__u32 hash;		/* A hash of the packet 4 tuples */
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 #define BPF_TAG_SIZE	8
diff --git a/net/core/filter.c b/net/core/filter.c
index caa88955562e..f753ab550525 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10172,6 +10172,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
 		return &sk_reuseport_load_bytes_proto;
 	case BPF_FUNC_skb_load_bytes_relative:
 		return &sk_reuseport_load_bytes_relative_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_ptr_cookie_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -10201,6 +10203,10 @@ sk_reuseport_is_valid_access(int off, int size,
 	case offsetof(struct sk_reuseport_md, hash):
 		return size == size_default;
 
+	case offsetof(struct sk_reuseport_md, sk):
+		info->reg_type = PTR_TO_SOCKET;
+		return size == sizeof(__u64);
+
 	/* Fields that allow narrowing */
 	case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
 		if (size < sizeof_field(struct sk_buff, protocol))
@@ -10273,6 +10279,10 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 	case offsetof(struct sk_reuseport_md, bind_inany):
 		SK_REUSEPORT_LOAD_FIELD(bind_inany);
 		break;
+
+	case offsetof(struct sk_reuseport_md, sk):
+		SK_REUSEPORT_LOAD_FIELD(sk);
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2c1ba70abbf1..f3b72588442b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5416,6 +5416,7 @@ struct sk_reuseport_md {
 	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
 	__u32 bind_inany;	/* Is sock bound to an INANY address? */
 	__u32 hash;		/* A hash of the packet 4 tuples */
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 #define BPF_TAG_SIZE	8

From d5e4ddaeb6ab2c3c7fbb7b247a6d34bb0b18d87e Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:22 +0900
Subject: [PATCH 1814/3292] bpf: Support socket migration by eBPF.

This patch introduces a new bpf_attach_type for BPF_PROG_TYPE_SK_REUSEPORT
to check if the attached eBPF program is capable of migrating sockets. When
the eBPF program is attached, we run it for socket migration if the
expected_attach_type is BPF_SK_REUSEPORT_SELECT_OR_MIGRATE or
net.ipv4.tcp_migrate_req is enabled.

Currently, the expected_attach_type is not enforced for the
BPF_PROG_TYPE_SK_REUSEPORT type of program. Thus, this commit follows the
earlier idea in the commit aac3fc320d94 ("bpf: Post-hooks for sys_bind") to
fix up the zero expected_attach_type in bpf_prog_load_fixup_attach_type().

Moreover, this patch adds a new field (migrating_sk) to sk_reuseport_md to
select a new listener based on the child socket. migrating_sk varies
depending on if it is migrating a request in the accept queue or during
3WHS.

  - accept_queue : sock (ESTABLISHED/SYN_RECV)
  - 3WHS         : request_sock (NEW_SYN_RECV)

In the eBPF program, we can select a new listener by
BPF_FUNC_sk_select_reuseport(). Also, we can cancel migration by returning
SK_DROP. This feature is useful when listeners have different settings at
the socket API level or when we want to free resources as soon as possible.

  - SK_PASS with selected_sk, select it as a new listener
  - SK_PASS with selected_sk NULL, fallbacks to the random selection
  - SK_DROP, cancel the migration.

There is a noteworthy point. We select a listening socket in three places,
but we do not have struct skb at closing a listener or retransmitting a
SYN+ACK. On the other hand, some helper functions do not expect skb is NULL
(e.g. skb_header_pointer() in BPF_FUNC_skb_load_bytes(), skb_tail_pointer()
in BPF_FUNC_skb_load_bytes_relative()). So we allocate an empty skb
temporarily before running the eBPF program.

Suggested-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/netdev/20201123003828.xjpjdtk4ygl6tg6h@kafai-mbp.dhcp.thefacebook.com/
Link: https://lore.kernel.org/netdev/20201203042402.6cskdlit5f3mw4ru@kafai-mbp.dhcp.thefacebook.com/
Link: https://lore.kernel.org/netdev/20201209030903.hhow5r53l6fmozjn@kafai-mbp.dhcp.thefacebook.com/
Link: https://lore.kernel.org/bpf/20210612123224.12525-10-kuniyu@amazon.co.jp
---
 include/linux/bpf.h            |  1 +
 include/linux/filter.h         |  2 ++
 include/uapi/linux/bpf.h       | 15 +++++++++++++++
 kernel/bpf/syscall.c           | 13 +++++++++++++
 net/core/filter.c              | 13 ++++++++++++-
 net/core/sock_reuseport.c      | 34 ++++++++++++++++++++++++++++++----
 tools/include/uapi/linux/bpf.h | 15 +++++++++++++++
 7 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 86dec5001ae2..f309fc1509f2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2048,6 +2048,7 @@ struct sk_reuseport_kern {
 	struct sk_buff *skb;
 	struct sock *sk;
 	struct sock *selected_sk;
+	struct sock *migrating_sk;
 	void *data_end;
 	u32 hash;
 	u32 reuseport_id;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c5ad7df029ed..688856e0b28a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -996,11 +996,13 @@ void bpf_warn_invalid_xdp_action(u32 act);
 #ifdef CONFIG_INET
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 				  struct bpf_prog *prog, struct sk_buff *skb,
+				  struct sock *migrating_sk,
 				  u32 hash);
 #else
 static inline struct sock *
 bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 		     struct bpf_prog *prog, struct sk_buff *skb,
+		     struct sock *migrating_sk,
 		     u32 hash)
 {
 	return NULL;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f3b72588442b..bf9252c7381e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -994,6 +994,8 @@ enum bpf_attach_type {
 	BPF_SK_LOOKUP,
 	BPF_XDP,
 	BPF_SK_SKB_VERDICT,
+	BPF_SK_REUSEPORT_SELECT,
+	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -5416,7 +5418,20 @@ struct sk_reuseport_md {
 	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
 	__u32 bind_inany;	/* Is sock bound to an INANY address? */
 	__u32 hash;		/* A hash of the packet 4 tuples */
+	/* When reuse->migrating_sk is NULL, it is selecting a sk for the
+	 * new incoming connection request (e.g. selecting a listen sk for
+	 * the received SYN in the TCP case).  reuse->sk is one of the sk
+	 * in the reuseport group. The bpf prog can use reuse->sk to learn
+	 * the local listening ip/port without looking into the skb.
+	 *
+	 * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+	 * reuse->migrating_sk is the socket that needs to be migrated
+	 * to another listening socket.  migrating_sk could be a fullsock
+	 * sk that is fully established or a reqsk that is in-the-middle
+	 * of 3-way handshake.
+	 */
 	__bpf_md_ptr(struct bpf_sock *, sk);
+	__bpf_md_ptr(struct bpf_sock *, migrating_sk);
 };
 
 #define BPF_TAG_SIZE	8
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 50457019da27..dbbc5342f221 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1972,6 +1972,11 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
 			attr->expected_attach_type =
 				BPF_CGROUP_INET_SOCK_CREATE;
 		break;
+	case BPF_PROG_TYPE_SK_REUSEPORT:
+		if (!attr->expected_attach_type)
+			attr->expected_attach_type =
+				BPF_SK_REUSEPORT_SELECT;
+		break;
 	}
 }
 
@@ -2055,6 +2060,14 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		if (expected_attach_type == BPF_SK_LOOKUP)
 			return 0;
 		return -EINVAL;
+	case BPF_PROG_TYPE_SK_REUSEPORT:
+		switch (expected_attach_type) {
+		case BPF_SK_REUSEPORT_SELECT:
+		case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
+			return 0;
+		default:
+			return -EINVAL;
+		}
 	case BPF_PROG_TYPE_SYSCALL:
 	case BPF_PROG_TYPE_EXT:
 		if (expected_attach_type)
diff --git a/net/core/filter.c b/net/core/filter.c
index f753ab550525..5b86e47ef079 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10044,11 +10044,13 @@ out:
 static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
 				    struct sock_reuseport *reuse,
 				    struct sock *sk, struct sk_buff *skb,
+				    struct sock *migrating_sk,
 				    u32 hash)
 {
 	reuse_kern->skb = skb;
 	reuse_kern->sk = sk;
 	reuse_kern->selected_sk = NULL;
+	reuse_kern->migrating_sk = migrating_sk;
 	reuse_kern->data_end = skb->data + skb_headlen(skb);
 	reuse_kern->hash = hash;
 	reuse_kern->reuseport_id = reuse->reuseport_id;
@@ -10057,12 +10059,13 @@ static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
 
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 				  struct bpf_prog *prog, struct sk_buff *skb,
+				  struct sock *migrating_sk,
 				  u32 hash)
 {
 	struct sk_reuseport_kern reuse_kern;
 	enum sk_action action;
 
-	bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
+	bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
 	action = BPF_PROG_RUN(prog, &reuse_kern);
 
 	if (action == SK_PASS)
@@ -10207,6 +10210,10 @@ sk_reuseport_is_valid_access(int off, int size,
 		info->reg_type = PTR_TO_SOCKET;
 		return size == sizeof(__u64);
 
+	case offsetof(struct sk_reuseport_md, migrating_sk):
+		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+		return size == sizeof(__u64);
+
 	/* Fields that allow narrowing */
 	case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
 		if (size < sizeof_field(struct sk_buff, protocol))
@@ -10283,6 +10290,10 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 	case offsetof(struct sk_reuseport_md, sk):
 		SK_REUSEPORT_LOAD_FIELD(sk);
 		break;
+
+	case offsetof(struct sk_reuseport_md, migrating_sk):
+		SK_REUSEPORT_LOAD_FIELD(migrating_sk);
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index b239f8cd9d39..de5ee3ae86d5 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -377,13 +377,17 @@ void reuseport_stop_listen_sock(struct sock *sk)
 {
 	if (sk->sk_protocol == IPPROTO_TCP) {
 		struct sock_reuseport *reuse;
+		struct bpf_prog *prog;
 
 		spin_lock_bh(&reuseport_lock);
 
 		reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 						  lockdep_is_held(&reuseport_lock));
+		prog = rcu_dereference_protected(reuse->prog,
+						 lockdep_is_held(&reuseport_lock));
 
-		if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) {
+		if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+		    (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
 			/* Migration capable, move sk from the listening section
 			 * to the closed section.
 			 */
@@ -488,7 +492,7 @@ struct sock *reuseport_select_sock(struct sock *sk,
 			goto select_by_hash;
 
 		if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
-			sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
+			sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, NULL, hash);
 		else
 			sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
 
@@ -519,6 +523,8 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
 {
 	struct sock_reuseport *reuse;
 	struct sock *nsk = NULL;
+	bool allocated = false;
+	struct bpf_prog *prog;
 	u16 socks;
 	u32 hash;
 
@@ -536,10 +542,30 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
 	smp_rmb();
 
 	hash = migrating_sk->sk_hash;
-	if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+	prog = rcu_dereference(reuse->prog);
+	if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
+		if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+			goto select_by_hash;
+		goto out;
+	}
+
+	if (!skb) {
+		skb = alloc_skb(0, GFP_ATOMIC);
+		if (!skb)
+			goto out;
+		allocated = true;
+	}
+
+	nsk = bpf_run_sk_reuseport(reuse, sk, prog, skb, migrating_sk, hash);
+
+	if (allocated)
+		kfree_skb(skb);
+
+select_by_hash:
+	if (!nsk)
 		nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
 
-	if (nsk && unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
+	if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
 		nsk = NULL;
 
 out:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f3b72588442b..bf9252c7381e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -994,6 +994,8 @@ enum bpf_attach_type {
 	BPF_SK_LOOKUP,
 	BPF_XDP,
 	BPF_SK_SKB_VERDICT,
+	BPF_SK_REUSEPORT_SELECT,
+	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -5416,7 +5418,20 @@ struct sk_reuseport_md {
 	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
 	__u32 bind_inany;	/* Is sock bound to an INANY address? */
 	__u32 hash;		/* A hash of the packet 4 tuples */
+	/* When reuse->migrating_sk is NULL, it is selecting a sk for the
+	 * new incoming connection request (e.g. selecting a listen sk for
+	 * the received SYN in the TCP case).  reuse->sk is one of the sk
+	 * in the reuseport group. The bpf prog can use reuse->sk to learn
+	 * the local listening ip/port without looking into the skb.
+	 *
+	 * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+	 * reuse->migrating_sk is the socket that needs to be migrated
+	 * to another listening socket.  migrating_sk could be a fullsock
+	 * sk that is fully established or a reqsk that is in-the-middle
+	 * of 3-way handshake.
+	 */
 	__bpf_md_ptr(struct bpf_sock *, sk);
+	__bpf_md_ptr(struct bpf_sock *, migrating_sk);
 };
 
 #define BPF_TAG_SIZE	8

From 50501271e773c51afe602918915c6beb62ac369f Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:23 +0900
Subject: [PATCH 1815/3292] libbpf: Set expected_attach_type for
 BPF_PROG_TYPE_SK_REUSEPORT.

This commit introduces a new section (sk_reuseport/migrate) and sets
expected_attach_type to two each section in BPF_PROG_TYPE_SK_REUSEPORT
program.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-11-kuniyu@amazon.co.jp
---
 tools/lib/bpf/libbpf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 128715b8794b..f6b91fb0c857 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -9075,7 +9075,10 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
 
 static const struct bpf_sec_def section_defs[] = {
 	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
-	BPF_PROG_SEC("sk_reuseport",		BPF_PROG_TYPE_SK_REUSEPORT),
+	BPF_EAPROG_SEC("sk_reuseport/migrate",	BPF_PROG_TYPE_SK_REUSEPORT,
+						BPF_SK_REUSEPORT_SELECT_OR_MIGRATE),
+	BPF_EAPROG_SEC("sk_reuseport",		BPF_PROG_TYPE_SK_REUSEPORT,
+						BPF_SK_REUSEPORT_SELECT),
 	SEC_DEF("kprobe/", KPROBE,
 		.attach_fn = attach_kprobe),
 	BPF_PROG_SEC("uprobe/",			BPF_PROG_TYPE_KPROBE),

From c9d0bdef89a6c943e98c851e8cc10c9c534329e6 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Sat, 12 Jun 2021 21:32:24 +0900
Subject: [PATCH 1816/3292] bpf: Test BPF_SK_REUSEPORT_SELECT_OR_MIGRATE.

This patch adds a test for BPF_SK_REUSEPORT_SELECT_OR_MIGRATE and
removes 'static' from settimeo() in network_helpers.c.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-12-kuniyu@amazon.co.jp
---
 tools/testing/selftests/bpf/network_helpers.c |   2 +-
 tools/testing/selftests/bpf/network_helpers.h |   1 +
 .../bpf/prog_tests/migrate_reuseport.c        | 555 ++++++++++++++++++
 .../bpf/progs/test_migrate_reuseport.c        | 135 +++++
 4 files changed, 692 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_migrate_reuseport.c

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 12ee40284da0..2060bc122c53 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = {
 	.tcp.doff = 5,
 };
 
-static int settimeo(int fd, int timeout_ms)
+int settimeo(int fd, int timeout_ms)
 {
 	struct timeval timeout = { .tv_sec = 3 };
 
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 7205f8afdba1..5e0d51c07b63 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,6 +33,7 @@ struct ipv6_packet {
 } __packed;
 extern struct ipv6_packet pkt_v6;
 
+int settimeo(int fd, int timeout_ms);
 int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
 int connect_to_fd(int server_fd, int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
new file mode 100644
index 000000000000..0fa3f750567d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ *   1. call listen() for 4 server sockets.
+ *   2. call connect() for 25 client sockets.
+ *   3. call listen() for 1 server socket. (migration target)
+ *   4. update a map to migrate all child sockets
+ *        to the last server socket (migrate_map[cookie] = 4)
+ *   5. call shutdown() for first 4 server sockets
+ *        and migrate the requests in the accept queue
+ *        to the last server socket.
+ *   6. call listen() for the second server socket.
+ *   7. call shutdown() for the last server
+ *        and migrate the requests in the accept queue
+ *        to the second server socket.
+ *   8. call listen() for the last server.
+ *   9. call shutdown() for the second server
+ *        and migrate the requests in the accept queue
+ *        to the last server socket.
+ *  10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+	const char *name;
+	__s64 servers[NR_SERVERS];
+	__s64 clients[NR_CLIENTS];
+	struct sockaddr_storage addr;
+	socklen_t addrlen;
+	int family;
+	int state;
+	bool drop_ack;
+	bool expire_synack_timer;
+	bool fastopen;
+	struct bpf_link *link;
+} test_cases[] = {
+	{
+		.name = "IPv4 TCP_ESTABLISHED  inet_csk_listen_stop",
+		.family = AF_INET,
+		.state = BPF_TCP_ESTABLISHED,
+		.drop_ack = false,
+		.expire_synack_timer = false,
+		.fastopen = false,
+	},
+	{
+		.name = "IPv4 TCP_SYN_RECV     inet_csk_listen_stop",
+		.family = AF_INET,
+		.state = BPF_TCP_SYN_RECV,
+		.drop_ack = true,
+		.expire_synack_timer = false,
+		.fastopen = true,
+	},
+	{
+		.name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+		.family = AF_INET,
+		.state = BPF_TCP_NEW_SYN_RECV,
+		.drop_ack = true,
+		.expire_synack_timer = true,
+		.fastopen = false,
+	},
+	{
+		.name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+		.family = AF_INET,
+		.state = BPF_TCP_NEW_SYN_RECV,
+		.drop_ack = true,
+		.expire_synack_timer = false,
+		.fastopen = false,
+	},
+	{
+		.name = "IPv6 TCP_ESTABLISHED  inet_csk_listen_stop",
+		.family = AF_INET6,
+		.state = BPF_TCP_ESTABLISHED,
+		.drop_ack = false,
+		.expire_synack_timer = false,
+		.fastopen = false,
+	},
+	{
+		.name = "IPv6 TCP_SYN_RECV     inet_csk_listen_stop",
+		.family = AF_INET6,
+		.state = BPF_TCP_SYN_RECV,
+		.drop_ack = true,
+		.expire_synack_timer = false,
+		.fastopen = true,
+	},
+	{
+		.name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+		.family = AF_INET6,
+		.state = BPF_TCP_NEW_SYN_RECV,
+		.drop_ack = true,
+		.expire_synack_timer = true,
+		.fastopen = false,
+	},
+	{
+		.name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+		.family = AF_INET6,
+		.state = BPF_TCP_NEW_SYN_RECV,
+		.drop_ack = true,
+		.expire_synack_timer = false,
+		.fastopen = false,
+	}
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (fds[i] != -1) {
+			close(fds[i]);
+			fds[i] = -1;
+		}
+	}
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+	int err = 0, fd, len;
+
+	fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+	if (!ASSERT_NEQ(fd, -1, "open"))
+		return -1;
+
+	if (restore) {
+		len = write(fd, buf, *saved_len);
+		if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+			err = -1;
+	} else {
+		*saved_len = read(fd, buf, size);
+		if (!ASSERT_GE(*saved_len, 1, "read")) {
+			err = -1;
+			goto close;
+		}
+
+		err = lseek(fd, 0, SEEK_SET);
+		if (!ASSERT_OK(err, "lseek"))
+			goto close;
+
+		/* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+		 *  TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+		 */
+		len = write(fd, "519", 3);
+		if (!ASSERT_EQ(len, 3, "write - setup"))
+			err = -1;
+	}
+
+close:
+	close(fd);
+
+	return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+		    struct test_migrate_reuseport *skel)
+{
+	if (test_case->family == AF_INET)
+		skel->bss->server_port = ((struct sockaddr_in *)
+					  &test_case->addr)->sin_port;
+	else
+		skel->bss->server_port = ((struct sockaddr_in6 *)
+					  &test_case->addr)->sin6_port;
+
+	test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+						  IFINDEX_LO);
+	if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+		return -1;
+
+	return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+	int err;
+
+	err = bpf_link__detach(test_case->link);
+	if (!ASSERT_OK(err, "bpf_link__detach"))
+		return -1;
+
+	test_case->link = NULL;
+
+	return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+			 struct test_migrate_reuseport *skel)
+{
+	int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+	prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+	make_sockaddr(test_case->family,
+		      test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+		      &test_case->addr, &test_case->addrlen);
+
+	for (i = 0; i < NR_SERVERS; i++) {
+		test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+					       IPPROTO_TCP);
+		if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+			return -1;
+
+		err = setsockopt(test_case->servers[i], SOL_SOCKET,
+				 SO_REUSEPORT, &reuseport, sizeof(reuseport));
+		if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+			return -1;
+
+		err = bind(test_case->servers[i],
+			   (struct sockaddr *)&test_case->addr,
+			   test_case->addrlen);
+		if (!ASSERT_OK(err, "bind"))
+			return -1;
+
+		if (i == 0) {
+			err = setsockopt(test_case->servers[i], SOL_SOCKET,
+					 SO_ATTACH_REUSEPORT_EBPF,
+					 &prog_fd, sizeof(prog_fd));
+			if (!ASSERT_OK(err,
+				       "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+				return -1;
+
+			err = getsockname(test_case->servers[i],
+					  (struct sockaddr *)&test_case->addr,
+					  &test_case->addrlen);
+			if (!ASSERT_OK(err, "getsockname"))
+				return -1;
+		}
+
+		if (test_case->fastopen) {
+			err = setsockopt(test_case->servers[i],
+					 SOL_TCP, TCP_FASTOPEN,
+					 &qlen, sizeof(qlen));
+			if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+				return -1;
+		}
+
+		/* All requests will be tied to the first four listeners */
+		if (i != MIGRATED_TO) {
+			err = listen(test_case->servers[i], qlen);
+			if (!ASSERT_OK(err, "listen"))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+	char buf[MSGLEN] = MSG;
+	int i, err;
+
+	for (i = 0; i < NR_CLIENTS; i++) {
+		test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+					       IPPROTO_TCP);
+		if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+			return -1;
+
+		/* The attached XDP program drops only the final ACK, so
+		 * clients will transition to TCP_ESTABLISHED immediately.
+		 */
+		err = settimeo(test_case->clients[i], 100);
+		if (!ASSERT_OK(err, "settimeo"))
+			return -1;
+
+		if (test_case->fastopen) {
+			int fastopen = 1;
+
+			err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+					 TCP_FASTOPEN_CONNECT, &fastopen,
+					 sizeof(fastopen));
+			if (!ASSERT_OK(err,
+				       "setsockopt - TCP_FASTOPEN_CONNECT"))
+				return -1;
+		}
+
+		err = connect(test_case->clients[i],
+			      (struct sockaddr *)&test_case->addr,
+			      test_case->addrlen);
+		if (!ASSERT_OK(err, "connect"))
+			return -1;
+
+		err = write(test_case->clients[i], buf, MSGLEN);
+		if (!ASSERT_EQ(err, MSGLEN, "write"))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+		       struct test_migrate_reuseport *skel)
+{
+	int i, err, migrated_to = MIGRATED_TO;
+	int reuseport_map_fd, migrate_map_fd;
+	__u64 value;
+
+	reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+	migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+	for (i = 0; i < NR_SERVERS; i++) {
+		value = (__u64)test_case->servers[i];
+		err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+					  BPF_NOEXIST);
+		if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+			return -1;
+
+		err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+		if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+			return -1;
+
+		err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+					  BPF_NOEXIST);
+		if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+	int i, err;
+
+	/* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+	 * to the last listener based on eBPF.
+	 */
+	for (i = 0; i < MIGRATED_TO; i++) {
+		err = shutdown(test_case->servers[i], SHUT_RDWR);
+		if (!ASSERT_OK(err, "shutdown"))
+			return -1;
+	}
+
+	/* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+	if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+		return 0;
+
+	/* Note that we use the second listener instead of the
+	 * first one here.
+	 *
+	 * The fist listener is bind()ed with port 0 and,
+	 * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+	 * calling listen() again will bind() the first listener
+	 * on a new ephemeral port and detach it from the existing
+	 * reuseport group.  (See: __inet_bind(), tcp_set_state())
+	 *
+	 * OTOH, the second one is bind()ed with a specific port,
+	 * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+	 * resurrect the listener on the existing reuseport group.
+	 */
+	err = listen(test_case->servers[1], QLEN);
+	if (!ASSERT_OK(err, "listen"))
+		return -1;
+
+	/* Migrate from the last listener to the second one.
+	 *
+	 * All listeners were detached out of the reuseport_map,
+	 * so migration will be done by kernel random pick from here.
+	 */
+	err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+	if (!ASSERT_OK(err, "shutdown"))
+		return -1;
+
+	/* Back to the existing reuseport group */
+	err = listen(test_case->servers[MIGRATED_TO], QLEN);
+	if (!ASSERT_OK(err, "listen"))
+		return -1;
+
+	/* Migrate back to the last one from the second one */
+	err = shutdown(test_case->servers[1], SHUT_RDWR);
+	if (!ASSERT_OK(err, "shutdown"))
+		return -1;
+
+	return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+			   struct test_migrate_reuseport *skel)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int err, cnt = 0, client;
+	char buf[MSGLEN];
+
+	err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+	if (!ASSERT_OK(err, "settimeo"))
+		goto out;
+
+	for (; cnt < NR_CLIENTS; cnt++) {
+		client = accept(test_case->servers[MIGRATED_TO],
+				(struct sockaddr *)&addr, &len);
+		if (!ASSERT_NEQ(client, -1, "accept"))
+			goto out;
+
+		memset(buf, 0, MSGLEN);
+		read(client, &buf, MSGLEN);
+		close(client);
+
+		if (!ASSERT_STREQ(buf, MSG, "read"))
+			goto out;
+	}
+
+out:
+	ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+	switch (test_case->state) {
+	case BPF_TCP_ESTABLISHED:
+		cnt = skel->bss->migrated_at_close;
+		break;
+	case BPF_TCP_SYN_RECV:
+		cnt = skel->bss->migrated_at_close_fastopen;
+		break;
+	case BPF_TCP_NEW_SYN_RECV:
+		if (test_case->expire_synack_timer)
+			cnt = skel->bss->migrated_at_send_synack;
+		else
+			cnt = skel->bss->migrated_at_recv_ack;
+		break;
+	default:
+		cnt = 0;
+	}
+
+	ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+		     struct test_migrate_reuseport *skel)
+{
+	int err, saved_len;
+	char buf[16];
+
+	skel->bss->migrated_at_close = 0;
+	skel->bss->migrated_at_close_fastopen = 0;
+	skel->bss->migrated_at_send_synack = 0;
+	skel->bss->migrated_at_recv_ack = 0;
+
+	init_fds(test_case->servers, NR_SERVERS);
+	init_fds(test_case->clients, NR_CLIENTS);
+
+	if (test_case->fastopen) {
+		memset(buf, 0, sizeof(buf));
+
+		err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+		if (!ASSERT_OK(err, "setup_fastopen - setup"))
+			return;
+	}
+
+	err = start_servers(test_case, skel);
+	if (!ASSERT_OK(err, "start_servers"))
+		goto close_servers;
+
+	if (test_case->drop_ack) {
+		/* Drop the final ACK of the 3-way handshake and stick the
+		 * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+		 */
+		err = drop_ack(test_case, skel);
+		if (!ASSERT_OK(err, "drop_ack"))
+			goto close_servers;
+	}
+
+	/* Tie requests to the first four listners */
+	err = start_clients(test_case);
+	if (!ASSERT_OK(err, "start_clients"))
+		goto close_clients;
+
+	err = listen(test_case->servers[MIGRATED_TO], QLEN);
+	if (!ASSERT_OK(err, "listen"))
+		goto close_clients;
+
+	err = update_maps(test_case, skel);
+	if (!ASSERT_OK(err, "fill_maps"))
+		goto close_clients;
+
+	/* Migrate the requests in the accept queue only.
+	 * TCP_NEW_SYN_RECV requests are not migrated at this point.
+	 */
+	err = migrate_dance(test_case);
+	if (!ASSERT_OK(err, "migrate_dance"))
+		goto close_clients;
+
+	if (test_case->expire_synack_timer) {
+		/* Wait for SYN+ACK timers to expire so that
+		 * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+		 */
+		sleep(1);
+	}
+
+	if (test_case->link) {
+		/* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+		err = pass_ack(test_case);
+		if (!ASSERT_OK(err, "pass_ack"))
+			goto close_clients;
+	}
+
+	count_requests(test_case, skel);
+
+close_clients:
+	close_fds(test_case->clients, NR_CLIENTS);
+
+	if (test_case->link) {
+		err = pass_ack(test_case);
+		ASSERT_OK(err, "pass_ack - clean up");
+	}
+
+close_servers:
+	close_fds(test_case->servers, NR_SERVERS);
+
+	if (test_case->fastopen) {
+		err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+		ASSERT_OK(err, "setup_fastopen - restore");
+	}
+}
+
+void test_migrate_reuseport(void)
+{
+	struct test_migrate_reuseport *skel;
+	int i;
+
+	skel = test_migrate_reuseport__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		test__start_subtest(test_cases[i].name);
+		run_test(&test_cases[i], skel);
+	}
+
+	test_migrate_reuseport__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
new file mode 100644
index 000000000000..27df571abf5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ *   1. If reuse_md->migrating_sk is NULL (SYN packet),
+ *        return SK_PASS without selecting a listener.
+ *   2. If reuse_md->migrating_sk is not NULL (socket migration),
+ *        select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, 256);
+	__type(key, int);
+	__type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 256);
+	__type(key, __u64);
+	__type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	struct tcphdr *tcp = NULL;
+
+	if (eth + 1 > data_end)
+		goto pass;
+
+	switch (bpf_ntohs(eth->h_proto)) {
+	case ETH_P_IP: {
+		struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+		if (ip + 1 > data_end)
+			goto pass;
+
+		if (ip->protocol != IPPROTO_TCP)
+			goto pass;
+
+		tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+		break;
+	}
+	case ETH_P_IPV6: {
+		struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+		if (ipv6 + 1 > data_end)
+			goto pass;
+
+		if (ipv6->nexthdr != IPPROTO_TCP)
+			goto pass;
+
+		tcp = (struct tcphdr *)(ipv6 + 1);
+		break;
+	}
+	default:
+		goto pass;
+	}
+
+	if (tcp + 1 > data_end)
+		goto pass;
+
+	if (tcp->dest != server_port)
+		goto pass;
+
+	if (!tcp->syn && tcp->ack)
+		return XDP_DROP;
+
+pass:
+	return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+	int *key, flags = 0, state, err;
+	__u64 cookie;
+
+	if (!reuse_md->migrating_sk)
+		return SK_PASS;
+
+	state = reuse_md->migrating_sk->state;
+	cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+	key = bpf_map_lookup_elem(&migrate_map, &cookie);
+	if (!key)
+		return SK_DROP;
+
+	err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+	if (err)
+		return SK_PASS;
+
+	switch (state) {
+	case BPF_TCP_ESTABLISHED:
+		__sync_fetch_and_add(&migrated_at_close, 1);
+		break;
+	case BPF_TCP_SYN_RECV:
+		__sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+		break;
+	case BPF_TCP_NEW_SYN_RECV:
+		if (!reuse_md->len)
+			__sync_fetch_and_add(&migrated_at_send_synack, 1);
+		else
+			__sync_fetch_and_add(&migrated_at_recv_ack, 1);
+		break;
+	}
+
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";

From a955318fe67ec0d962760b5ee58e74bffaf649b8 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Mon, 14 Jun 2021 04:25:04 +0200
Subject: [PATCH 1817/3292] stmmac: align RX buffers

On RX an SKB is allocated and the received buffer is copied into it.
But on some architectures, the memcpy() needs the source and destination
buffers to have the same alignment to be efficient.

This is not our case, because SKB data pointer is misaligned by two bytes
to compensate the ethernet header.

Align the RX buffer the same way as the SKB one, so the copy is faster.
An iperf3 RX test gives a decent improvement on a RISC-V machine:

before:
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec   733 MBytes   615 Mbits/sec   88             sender
[  5]   0.00-10.01  sec   730 MBytes   612 Mbits/sec                  receiver

after:
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec  1.10 GBytes   942 Mbits/sec    0             sender
[  5]   0.00-10.00  sec  1.09 GBytes   940 Mbits/sec                  receiver

And the memcpy() overhead during the RX drops dramatically.

before:
Overhead  Shared O  Symbol
  43.35%  [kernel]  [k] memcpy
  33.77%  [kernel]  [k] __asm_copy_to_user
   3.64%  [kernel]  [k] sifive_l2_flush64_range

after:
Overhead  Shared O  Symbol
  45.40%  [kernel]  [k] __asm_copy_to_user
  28.09%  [kernel]  [k] memcpy
   4.27%  [kernel]  [k] sifive_l2_flush64_range

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 6655cb8e24cf..e735134e8487 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -339,9 +339,9 @@ static inline bool stmmac_xdp_is_enabled(struct stmmac_priv *priv)
 static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
 {
 	if (stmmac_xdp_is_enabled(priv))
-		return XDP_PACKET_HEADROOM;
+		return XDP_PACKET_HEADROOM + NET_IP_ALIGN;
 
-	return 0;
+	return NET_SKB_PAD + NET_IP_ALIGN;
 }
 
 void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue);

From 0dca2c7404a938cb10c85d0515cee40ed5348788 Mon Sep 17 00:00:00 2001
From: Boris Sukholitko <boris.sukholitko@broadcom.com>
Date: Mon, 14 Jun 2021 14:13:22 +0300
Subject: [PATCH 1818/3292] net/sched: cls_flower: Remove match on n_proto

The following flower filters fail to match packets:

tc filter add dev eth0 ingress protocol 0x8864 flower \
	action simple sdata hi64
tc filter add dev eth0 ingress protocol 802.1q flower \
	vlan_ethtype 0x8864 action simple sdata "hi vlan"

The protocol 0x8864 (ETH_P_PPP_SES) is a tunnel protocol. As such, it is
being dissected by __skb_flow_dissect and it's internal protocol is
being set as key->basic.n_proto. IOW, the existence of ETH_P_PPP_SES
tunnel is transparent to the callers of __skb_flow_dissect.

OTOH, in the filters above, cls_flower configures its key->basic.n_proto
to the ETH_P_PPP_SES value configured by the user. Matching on this key
fails because of __skb_flow_dissect "transparency" mentioned above.

In the following, I would argue that the problem lies with cls_flower,
unnessary attempting key->basic.n_proto match.

There are 3 close places in fl_set_key in cls_flower setting up
mask->basic.n_proto. They are (in reverse order of appearance in the
code) due to:

(a) No vlan is given: use TCA_FLOWER_KEY_ETH_TYPE parameter
(b) One vlan tag is given: use TCA_FLOWER_KEY_VLAN_ETH_TYPE
(c) Two vlans are given: use TCA_FLOWER_KEY_CVLAN_ETH_TYPE

The match in case (a) is unneeded because flower has no its own
eth_type parameter. It was removed by Jamal Hadi Salim in commit
488b41d020fb06428b90289f70a41210718f52b7 in iproute2. For
TCA_FLOWER_KEY_ETH_TYPE the userspace uses the generic tc filter
protocol field. Therefore the match for the case (a) is done by tc
itself.

The matches in cases (b), (c) are unneeded because the protocol will
appear in and will be matched by flow_dissector_key_vlan.vlan_tpid.
Therefore in the best case, key->basic.n_proto will try to repeat vlan
key match again.

The below patch removes mask->basic.n_proto setting and resets it to 0
in case (c).

Signed-off-by: Boris Sukholitko <boris.sukholitko@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d7869a984881..2e704c7a105a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1531,14 +1531,13 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 						       &mask->basic.n_proto,
 						       TCA_FLOWER_UNSPEC,
 						       sizeof(key->basic.n_proto));
+					mask->basic.n_proto = cpu_to_be16(0);
 				} else {
 					key->basic.n_proto = ethertype;
-					mask->basic.n_proto = cpu_to_be16(~0);
 				}
 			}
 		} else {
 			key->basic.n_proto = ethertype;
-			mask->basic.n_proto = cpu_to_be16(~0);
 		}
 	}
 

From 336bac5edaa731b20fd7e2a1551f27977a2284e9 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:36 +0800
Subject: [PATCH 1819/3292] net: z85230: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 34 +---------------------------------
 1 file changed, 1 insertion(+), 33 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 002b8c99ab5b..f074cb1100cf 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -55,7 +55,6 @@
 
 #include "z85230.h"
 
-
 /**
  *	z8530_read_port - Architecture specific interface function
  *	@p: port to read
@@ -95,7 +94,6 @@ static inline int z8530_read_port(unsigned long p)
  *	dread 5uS sanity delay.
  */
 
-
 static inline void z8530_write_port(unsigned long p, u8 d)
 {
 	outb(d,Z8530_PORT_OF(p));
@@ -103,12 +101,9 @@ static inline void z8530_write_port(unsigned long p, u8 d)
 		udelay(5);
 }
 
-
-
 static void z8530_rx_done(struct z8530_channel *c);
 static void z8530_tx_done(struct z8530_channel *c);
 
-
 /**
  *	read_zsreg - Read a register from a Z85230 
  *	@c: Z8530 channel to read from (2 per chip)
@@ -159,7 +154,6 @@ static inline void write_zsreg(struct z8530_channel *c, u8 reg, u8 val)
 	if(reg)
 		z8530_write_port(c->ctrlio, reg);
 	z8530_write_port(c->ctrlio, val);
-
 }
 
 /**
@@ -182,8 +176,6 @@ static inline void write_zsctrl(struct z8530_channel *c, u8 val)
  *
  *	Write directly to the data register on the Z8530
  */
-
-
 static inline void write_zsdata(struct z8530_channel *c, u8 val)
 {
 	z8530_write_port(c->dataio, val);
@@ -204,7 +196,6 @@ EXPORT_SYMBOL(z8530_dead_port);
  *	Register loading parameters for currently supported circuit types
  */
 
-
 /*
  *	Data clocked by telco end. This is the correct data for the UK
  *	"kilostream" service, and most other similar services.
@@ -352,7 +343,6 @@ static void z8530_rx(struct z8530_channel *c)
 
 		if(stat&END_FR)
 		{
-		
 			/*
 			 *	Error ?
 			 */
@@ -392,7 +382,6 @@ static void z8530_rx(struct z8530_channel *c)
 	write_zsctrl(c, RES_H_IUS);
 }
 
-
 /**
  *	z8530_tx - Handle a PIO transmit event
  *	@c: Z8530 channel to process
@@ -423,7 +412,6 @@ static void z8530_tx(struct z8530_channel *c)
 		}
 	}
 
-	
 	/*
 	 *	End of frame TX - fire another one
 	 */
@@ -474,7 +462,6 @@ static void z8530_status(struct z8530_channel *chan)
 			if (chan->netdevice)
 				netif_carrier_off(chan->netdevice);
 		}
-
 	}
 	write_zsctrl(chan, RES_EXT_INT);
 	write_zsctrl(chan, RES_H_IUS);
@@ -564,7 +551,6 @@ static void z8530_dma_status(struct z8530_channel *chan)
 	
 	chan->status=status;
 
-
 	if(chan->dma_tx)
 	{
 		if(status&TxEOM)
@@ -621,7 +607,6 @@ static struct z8530_irqhandler z8530_txdma_sync = {
  *	(eg the MacII) we must clear the interrupt cause or die.
  */
 
-
 static void z8530_rx_clear(struct z8530_channel *c)
 {
 	/*
@@ -680,7 +665,6 @@ struct z8530_irqhandler z8530_nop = {
 	.status = z8530_status_clear,
 };
 
-
 EXPORT_SYMBOL(z8530_nop);
 
 /**
@@ -718,7 +702,6 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 
 	while(++work<5000)
 	{
-
 		intr = read_zsreg(&dev->chanA, R3);
 		if(!(intr & (CHARxIP|CHATxIP|CHAEXT|CHBRxIP|CHBTxIP|CHBEXT)))
 			break;
@@ -772,7 +755,6 @@ static const u8 reg_init[16]=
 	0x55,0,0,0
 };
 
-
 /**
  *	z8530_sync_open - Open a Z8530 channel for PIO
  *	@dev:	The network interface we are using
@@ -808,7 +790,6 @@ int z8530_sync_open(struct net_device *dev, struct z8530_channel *c)
 	return 0;
 }
 
-
 EXPORT_SYMBOL(z8530_sync_open);
 
 /**
@@ -1070,7 +1051,6 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 
 	c->tx_dma_buf[1] = c->tx_dma_buf[0] + PAGE_SIZE/2;
 
-
 	spin_lock_irqsave(c->lock, cflags);
 
 	/*
@@ -1150,7 +1130,6 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 	unsigned long dflags, cflags;
 	u8 chk;
 
-	
 	spin_lock_irqsave(c->lock, cflags);
 	
 	c->irqs = &z8530_nop;
@@ -1195,10 +1174,8 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 	return 0;
 }
 
-
 EXPORT_SYMBOL(z8530_sync_txdma_close);
 
-
 /*
  *	Name strings for Z8530 chips. SGI claim to have a 130, Zilog deny
  *	it exists...
@@ -1333,7 +1310,6 @@ int z8530_init(struct z8530_dev *dev)
 	return ret;
 }
 
-
 EXPORT_SYMBOL(z8530_init);
 
 /**
@@ -1408,7 +1384,6 @@ int z8530_channel_load(struct z8530_channel *c, u8 *rtable)
 
 EXPORT_SYMBOL(z8530_channel_load);
 
-
 /**
  *	z8530_tx_begin - Begin packet transmission
  *	@c: The Z8530 channel to kick
@@ -1455,8 +1430,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 	else
 	{
 		c->txcount=c->tx_skb->len;
-		
-		
+
 		if(c->dma_tx)
 		{
 			/*
@@ -1490,7 +1464,6 @@ static void z8530_tx_begin(struct z8530_channel *c)
 		}
 		else
 		{
-
 			/* ABUNDER off */
 			write_zsreg(c, R10, c->regs[10]);
 			write_zsctrl(c, RES_Tx_CRC);
@@ -1500,7 +1473,6 @@ static void z8530_tx_begin(struct z8530_channel *c)
 				write_zsreg(c, R8, *c->tx_ptr++);
 				c->txcount--;
 			}
-
 		}
 	}
 	/*
@@ -1573,14 +1545,12 @@ static void z8530_rx_done(struct z8530_channel *c)
 	/*
 	 *	Is our receive engine in DMA mode
 	 */
-	 
 	if(c->rxdma_on)
 	{
 		/*
 		 *	Save the ready state and the buffer currently
 		 *	being used as the DMA target
 		 */
-		 
 		int ready=c->dma_ready;
 		unsigned char *rxb=c->rx_buf[c->dma_num];
 		unsigned long flags;
@@ -1588,7 +1558,6 @@ static void z8530_rx_done(struct z8530_channel *c)
 		/*
 		 *	Complete this DMA. Necessary to find the length
 		 */		
-		 
 		flags=claim_dma_lock();
 		
 		disable_dma(c->rxdma);
@@ -1731,7 +1700,6 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 	if(c->tx_next_skb)
 		return NETDEV_TX_BUSY;
 
-	
 	/* PC SPECIFIC - DMA limits */
 	
 	/*

From 61312d78e1d4286360427aeffbc0ea464fdb5299 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:37 +0800
Subject: [PATCH 1820/3292] net: z85230: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index f074cb1100cf..3036d5801090 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -74,6 +74,7 @@
 static inline int z8530_read_port(unsigned long p)
 {
 	u8 r=inb(Z8530_PORT_OF(p));
+
 	if(p&Z8530_PORT_SLEEP)	/* gcc should figure this out efficiently ! */
 		udelay(5);
 	return r;
@@ -133,6 +134,7 @@ static inline u8 read_zsreg(struct z8530_channel *c, u8 reg)
 static inline u8 read_zsdata(struct z8530_channel *c)
 {
 	u8 r;
+
 	r=z8530_read_port(c->dataio);
 	return r;
 }
@@ -653,6 +655,7 @@ static void z8530_tx_clear(struct z8530_channel *c)
 static void z8530_status_clear(struct z8530_channel *chan)
 {
 	u8 status=read_zsreg(chan, R0);
+
 	if(status&TxEOM)
 		write_zsctrl(chan, ERR_RES);
 	write_zsctrl(chan, RES_EXT_INT);
@@ -1360,6 +1363,7 @@ int z8530_channel_load(struct z8530_channel *c, u8 *rtable)
 	while(*rtable!=255)
 	{
 		int reg=*rtable++;
+
 		if(reg>0x0F)
 			write_zsreg(c, R15, c->regs[15]|1);
 		write_zsreg(c, reg&0x0F, *rtable);
@@ -1401,6 +1405,7 @@ EXPORT_SYMBOL(z8530_channel_load);
 static void z8530_tx_begin(struct z8530_channel *c)
 {
 	unsigned long flags;
+
 	if(c->tx_skb)
 		return;
 		
@@ -1672,6 +1677,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 static inline int spans_boundary(struct sk_buff *skb)
 {
 	unsigned long a=(unsigned long)skb->data;
+
 	a^=(a+skb->len);
 	if(a&0x00010000)	/* If the 64K bit is different.. */
 		return 1;

From e07a1f9cbd4d660320ea437ad442c5b5ecbaf315 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:38 +0800
Subject: [PATCH 1821/3292] net: z85230: fix the code style issue about
 EXPORT_SYMBOL(foo)

According to the chackpatch.pl,
EXPORT_SYMBOL(foo); should immediately follow its function/variable.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 3036d5801090..94ed9a24b521 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -191,7 +191,6 @@ u8 z8530_dead_port[]=
 {
 	255
 };
-
 EXPORT_SYMBOL(z8530_dead_port);
 
 /*
@@ -221,7 +220,6 @@ u8 z8530_hdlc_kilostream[]=
 	9,	NV|MIE|NORESET,
 	255
 };
-
 EXPORT_SYMBOL(z8530_hdlc_kilostream);
 
 /*
@@ -248,7 +246,6 @@ u8 z8530_hdlc_kilostream_85230[]=
 	
 	255
 };
-
 EXPORT_SYMBOL(z8530_hdlc_kilostream_85230);
 
 /**
@@ -474,7 +471,6 @@ struct z8530_irqhandler z8530_sync = {
 	.tx = z8530_tx,
 	.status = z8530_status,
 };
-
 EXPORT_SYMBOL(z8530_sync);
 
 /**
@@ -667,7 +663,6 @@ struct z8530_irqhandler z8530_nop = {
 	.tx = z8530_tx_clear,
 	.status = z8530_status_clear,
 };
-
 EXPORT_SYMBOL(z8530_nop);
 
 /**
@@ -747,7 +742,6 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 	locker=0;
 	return IRQ_HANDLED;
 }
-
 EXPORT_SYMBOL(z8530_interrupt);
 
 static const u8 reg_init[16]=
@@ -792,7 +786,6 @@ int z8530_sync_open(struct net_device *dev, struct z8530_channel *c)
 	spin_unlock_irqrestore(c->lock, flags);
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_sync_open);
 
 /**
@@ -821,7 +814,6 @@ int z8530_sync_close(struct net_device *dev, struct z8530_channel *c)
 	spin_unlock_irqrestore(c->lock, flags);
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_sync_close);
 
 /**
@@ -945,7 +937,6 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_sync_dma_open);
 
 /**
@@ -1015,7 +1006,6 @@ int z8530_sync_dma_close(struct net_device *dev, struct z8530_channel *c)
 
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_sync_dma_close);
 
 /**
@@ -1116,7 +1106,6 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_sync_txdma_open);
 
 /**
@@ -1176,7 +1165,6 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 	spin_unlock_irqrestore(c->lock, cflags);
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_sync_txdma_close);
 
 /*
@@ -1210,7 +1198,6 @@ void z8530_describe(struct z8530_dev *dev, char *mapping, unsigned long io)
 		Z8530_PORT_OF(io),
 		dev->irq);
 }
-
 EXPORT_SYMBOL(z8530_describe);
 
 /*
@@ -1312,7 +1299,6 @@ int z8530_init(struct z8530_dev *dev)
 
 	return ret;
 }
-
 EXPORT_SYMBOL(z8530_init);
 
 /**
@@ -1340,7 +1326,6 @@ int z8530_shutdown(struct z8530_dev *dev)
 	spin_unlock_irqrestore(&dev->lock, flags);
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_shutdown);
 
 /**
@@ -1385,7 +1370,6 @@ int z8530_channel_load(struct z8530_channel *c, u8 *rtable)
 	spin_unlock_irqrestore(c->lock, flags);
 	return 0;
 }
-
 EXPORT_SYMBOL(z8530_channel_load);
 
 /**
@@ -1526,7 +1510,6 @@ void z8530_null_rx(struct z8530_channel *c, struct sk_buff *skb)
 {
 	dev_kfree_skb_any(skb);
 }
-
 EXPORT_SYMBOL(z8530_null_rx);
 
 /**
@@ -1738,7 +1721,6 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 	
 	return NETDEV_TX_OK;
 }
-
 EXPORT_SYMBOL(z8530_queue_xmit);
 
 /*

From b55932bcfabd6d9ec2835513668638dc45e7d3fb Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:39 +0800
Subject: [PATCH 1822/3292] net: z85230: replace comparison to NULL with "!skb"

According to the chackpatch.pl, comparison to NULL could
be written "!skb".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 94ed9a24b521..0162239a8cb9 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -851,12 +851,12 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 		return -EMSGSIZE;
 	 
 	c->rx_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
-	if(c->rx_buf[0]==NULL)
+	if (!c->rx_buf[0])
 		return -ENOBUFS;
 	c->rx_buf[1]=c->rx_buf[0]+PAGE_SIZE/2;
 	
 	c->tx_dma_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
-	if(c->tx_dma_buf[0]==NULL)
+	if (!c->tx_dma_buf[0])
 	{
 		free_page((unsigned long)c->rx_buf[0]);
 		c->rx_buf[0]=NULL;
@@ -1039,7 +1039,7 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 		return -EMSGSIZE;
 	 
 	c->tx_dma_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
-	if(c->tx_dma_buf[0]==NULL)
+	if (!c->tx_dma_buf[0])
 		return -ENOBUFS;
 
 	c->tx_dma_buf[1] = c->tx_dma_buf[0] + PAGE_SIZE/2;
@@ -1397,7 +1397,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 	c->tx_next_skb=NULL;
 	c->tx_ptr=c->tx_next_ptr;
 	
-	if(c->tx_skb==NULL)
+	if (!c->tx_skb)
 	{
 		/* Idle on */
 		if(c->dma_tx)
@@ -1486,7 +1486,7 @@ static void z8530_tx_done(struct z8530_channel *c)
 	struct sk_buff *skb;
 
 	/* Actually this can happen.*/
-	if (c->tx_skb == NULL)
+	if (!c->tx_skb)
 		return;
 
 	skb = c->tx_skb;
@@ -1589,7 +1589,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		 */
 
 		skb = dev_alloc_skb(ct);
-		if (skb == NULL) {
+		if (!skb) {
 			c->netdevice->stats.rx_dropped++;
 			netdev_warn(c->netdevice, "Memory squeeze\n");
 		} else {
@@ -1630,7 +1630,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		RT_UNLOCK;
 
 		c->skb2 = dev_alloc_skb(c->mtu);
-		if (c->skb2 == NULL)
+		if (!c->skb2)
 			netdev_warn(c->netdevice, "memory squeeze\n");
 		else
 			skb_put(c->skb2, c->mtu);

From c6c3ba4578e84367eda6aecd60e8a47d6d839d31 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:40 +0800
Subject: [PATCH 1823/3292] net: z85230: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 214 +++++++++++++++------------------------
 1 file changed, 83 insertions(+), 131 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 0162239a8cb9..e130c8cafd3a 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- *	(c) Copyright 1998 Alan Cox <alan@lxorguk.ukuu.org.uk>
+/*	(c) Copyright 1998 Alan Cox <alan@lxorguk.ukuu.org.uk>
  *	(c) Copyright 2000, 2001 Red Hat Inc
  *
  *	Development of this driver was funded by Equiinet Ltd
@@ -183,8 +181,7 @@ static inline void write_zsdata(struct z8530_channel *c, u8 val)
 	z8530_write_port(c->dataio, val);
 }
 
-/*
- *	Register loading parameters for a dead port
+/*	Register loading parameters for a dead port
  */
  
 u8 z8530_dead_port[]=
@@ -193,12 +190,10 @@ u8 z8530_dead_port[]=
 };
 EXPORT_SYMBOL(z8530_dead_port);
 
-/*
- *	Register loading parameters for currently supported circuit types
+/*	Register loading parameters for currently supported circuit types
  */
 
-/*
- *	Data clocked by telco end. This is the correct data for the UK
+/*	Data clocked by telco end. This is the correct data for the UK
  *	"kilostream" service, and most other similar services.
  */
  
@@ -222,8 +217,7 @@ u8 z8530_hdlc_kilostream[]=
 };
 EXPORT_SYMBOL(z8530_hdlc_kilostream);
 
-/*
- *	As above but for enhanced chips.
+/*	As above but for enhanced chips.
  */
  
 u8 z8530_hdlc_kilostream_85230[]=
@@ -331,8 +325,7 @@ static void z8530_rx(struct z8530_channel *c)
 		ch=read_zsdata(c);
 		stat=read_zsreg(c, R1);
 	
-		/*
-		 *	Overrun ?
+		/*	Overrun ?
 		 */
 		if(c->count < c->max)
 		{
@@ -342,8 +335,7 @@ static void z8530_rx(struct z8530_channel *c)
 
 		if(stat&END_FR)
 		{
-			/*
-			 *	Error ?
+			/*	Error ?
 			 */
 			if(stat&(Rx_OVR|CRC_ERR))
 			{
@@ -365,8 +357,7 @@ static void z8530_rx(struct z8530_channel *c)
 			}
 			else
 			{
-				/*
-				 *	Drop the lock for RX processing, or
+				/*	Drop the lock for RX processing, or
 		 		 *	there are deadlocks
 		 		 */
 				z8530_rx_done(c);
@@ -374,8 +365,7 @@ static void z8530_rx(struct z8530_channel *c)
 			}
 		}
 	}
-	/*
-	 *	Clear irq
+	/*	Clear irq
 	 */
 	write_zsctrl(c, ERR_RES);
 	write_zsctrl(c, RES_H_IUS);
@@ -398,8 +388,7 @@ static void z8530_tx(struct z8530_channel *c)
 		if(!(read_zsreg(c, R0)&4))
 			return;
 		c->txcount--;
-		/*
-		 *	Shovel out the byte
+		/*	Shovel out the byte
 		 */
 		write_zsreg(c, R8, *c->tx_ptr++);
 		write_zsctrl(c, RES_H_IUS);
@@ -411,8 +400,7 @@ static void z8530_tx(struct z8530_channel *c)
 		}
 	}
 
-	/*
-	 *	End of frame TX - fire another one
+	/*	End of frame TX - fire another one
 	 */
 	 
 	write_zsctrl(c, RES_Tx_P);
@@ -607,8 +595,7 @@ static struct z8530_irqhandler z8530_txdma_sync = {
 
 static void z8530_rx_clear(struct z8530_channel *c)
 {
-	/*
-	 *	Data and status bytes
+	/*	Data and status bytes
 	 */
 	u8 stat;
 
@@ -617,8 +604,7 @@ static void z8530_rx_clear(struct z8530_channel *c)
 	
 	if(stat&END_FR)
 		write_zsctrl(c, RES_Rx_CRC);
-	/*
-	 *	Clear irq
+	/*	Clear irq
 	 */
 	write_zsctrl(c, ERR_RES);
 	write_zsctrl(c, RES_H_IUS);
@@ -704,11 +690,13 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 		if(!(intr & (CHARxIP|CHATxIP|CHAEXT|CHBRxIP|CHBTxIP|CHBEXT)))
 			break;
 	
-		/* This holds the IRQ status. On the 8530 you must read it from chan 
-		   A even though it applies to the whole chip */
+		/* This holds the IRQ status. On the 8530 you must read it
+		 * from chan A even though it applies to the whole chip
+		 */
 		
 		/* Now walk the chip and see what it is wanting - it may be
-		   an IRQ for someone else remember */
+		 * an IRQ for someone else remember
+		 */
 		   
 		irqs=dev->chanA.irqs;
 
@@ -835,14 +823,13 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	c->count = 0;
 	c->skb = NULL;
 	c->skb2 = NULL;
-	/*
-	 *	Load the DMA interfaces up
+
+	/*	Load the DMA interfaces up
 	 */
 	c->rxdma_on = 0;
 	c->txdma_on = 0;
-	
-	/*
-	 *	Allocate the DMA flip buffers. Limit by page size.
+
+	/*	Allocate the DMA flip buffers. Limit by page size.
 	 *	Everyone runs 1500 mtu or less on wan links so this
 	 *	should be fine.
 	 */
@@ -869,14 +856,12 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	c->dma_num=0;
 	c->dma_ready=1;
 	
-	/*
-	 *	Enable DMA control mode
+	/*	Enable DMA control mode
 	 */
 
 	spin_lock_irqsave(c->lock, cflags);
-	 
-	/*
-	 *	TX DMA via DIR/REQ
+
+	/*	TX DMA via DIR/REQ
 	 */
 	 
 	c->regs[R14]|= DTRREQ;
@@ -884,9 +869,8 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 
 	c->regs[R1]&= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
-	
-	/*
-	 *	RX DMA via W/Req
+
+	/*	RX DMA via W/Req
 	 */	 
 
 	c->regs[R1]|= WT_FN_RDYFN;
@@ -896,13 +880,11 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	write_zsreg(c, R1, c->regs[R1]);
 	c->regs[R1]|= WT_RDY_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);            
-	
-	/*
-	 *	DMA interrupts
+
+	/*	DMA interrupts
 	 */
-	 
-	/*
-	 *	Set up the DMA configuration
+
+	/*	Set up the DMA configuration
 	 */	
 	 
 	dflags=claim_dma_lock();
@@ -920,9 +902,8 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	disable_dma(c->txdma);
 	
 	release_dma_lock(dflags);
-	
-	/*
-	 *	Select the DMA interrupt handlers
+
+	/*	Select the DMA interrupt handlers
 	 */
 
 	c->rxdma_on = 1;
@@ -956,9 +937,8 @@ int z8530_sync_dma_close(struct net_device *dev, struct z8530_channel *c)
 	c->irqs = &z8530_nop;
 	c->max = 0;
 	c->sync = 0;
-	
-	/*
-	 *	Disable the PC DMA channels
+
+	/*	Disable the PC DMA channels
 	 */
 	
 	flags=claim_dma_lock(); 
@@ -976,8 +956,7 @@ int z8530_sync_dma_close(struct net_device *dev, struct z8530_channel *c)
 
 	spin_lock_irqsave(c->lock, flags);
 
-	/*
-	 *	Disable DMA control mode
+	/*	Disable DMA control mode
 	 */
 	 
 	c->regs[R1]&= ~WT_RDY_ENAB;
@@ -1028,9 +1007,8 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	c->count = 0;
 	c->skb = NULL;
 	c->skb2 = NULL;
-	
-	/*
-	 *	Allocate the DMA flip buffers. Limit by page size.
+
+	/*	Allocate the DMA flip buffers. Limit by page size.
 	 *	Everyone runs 1500 mtu or less on wan links so this
 	 *	should be fine.
 	 */
@@ -1046,15 +1024,13 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 
 	spin_lock_irqsave(c->lock, cflags);
 
-	/*
-	 *	Load the PIO receive ring
+	/*	Load the PIO receive ring
 	 */
 
 	z8530_rx_done(c);
 	z8530_rx_done(c);
 
-	/*
-	 *	Load the DMA interfaces up
+	/*	Load the DMA interfaces up
 	 */
 
 	c->rxdma_on = 0;
@@ -1065,21 +1041,18 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	c->dma_ready=1;
 	c->dma_tx = 1;
 
-	/*
-	 *	Enable DMA control mode
+	/*	Enable DMA control mode
 	 */
 
-	/*
-	 *	TX DMA via DIR/REQ
+	/*	TX DMA via DIR/REQ
 	 */
 	c->regs[R14]|= DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);     
 	
 	c->regs[R1]&= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
-	
-	/*
-	 *	Set up the DMA configuration
+
+	/*	Set up the DMA configuration
 	 */	
 	 
 	dflags = claim_dma_lock();
@@ -1090,9 +1063,8 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	disable_dma(c->txdma);
 
 	release_dma_lock(dflags);
-	
-	/*
-	 *	Select the DMA interrupt handlers
+
+	/*	Select the DMA interrupt handlers
 	 */
 
 	c->rxdma_on = 0;
@@ -1127,9 +1099,8 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 	c->irqs = &z8530_nop;
 	c->max = 0;
 	c->sync = 0;
-	
-	/*
-	 *	Disable the PC DMA channels
+
+	/*	Disable the PC DMA channels
 	 */
 	 
 	dflags = claim_dma_lock();
@@ -1141,8 +1112,7 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 
 	release_dma_lock(dflags);
 
-	/*
-	 *	Disable DMA control mode
+	/*	Disable DMA control mode
 	 */
 	 
 	c->regs[R1]&= ~WT_RDY_ENAB;
@@ -1167,8 +1137,7 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 }
 EXPORT_SYMBOL(z8530_sync_txdma_close);
 
-/*
- *	Name strings for Z8530 chips. SGI claim to have a 130, Zilog deny
+/*	Name strings for Z8530 chips. SGI claim to have a 130, Zilog deny
  *	it exists...
  */
  
@@ -1200,14 +1169,14 @@ void z8530_describe(struct z8530_dev *dev, char *mapping, unsigned long io)
 }
 EXPORT_SYMBOL(z8530_describe);
 
-/*
- *	Locked operation part of the z8530 init code
+/*	Locked operation part of the z8530 init code
  */
  
 static inline int do_z8530_init(struct z8530_dev *dev)
 {
 	/* NOP the interrupt handlers first - we might get a
-	   floating IRQ transition when we reset the chip */
+	 * floating IRQ transition when we reset the chip
+	 */
 	dev->chanA.irqs=&z8530_nop;
 	dev->chanB.irqs=&z8530_nop;
 	dev->chanA.dcdcheck=DCD;
@@ -1225,15 +1194,13 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 		return -ENODEV;
 		
 	dev->type=Z8530;
-	
-	/*
-	 *	See the application note.
+
+	/*	See the application note.
 	 */
 	 
 	write_zsreg(&dev->chanA, R15, 0x01);
-	
-	/*
-	 *	If we can set the low bit of R15 then
+
+	/*	If we can set the low bit of R15 then
 	 *	the chip is enhanced.
 	 */
 	 
@@ -1247,17 +1214,15 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 		else
 			dev->type = Z85C30;	/* Z85C30, 1 byte FIFO */
 	}
-		
-	/*
-	 *	The code assumes R7' and friends are
+
+	/*	The code assumes R7' and friends are
 	 *	off. Use write_zsext() for these and keep
 	 *	this bit clear.
 	 */
 	 
 	write_zsreg(&dev->chanA, R15, 0);
-		
-	/*
-	 *	At this point it looks like the chip is behaving
+
+	/*	At this point it looks like the chip is behaving
 	 */
 	 
 	memcpy(dev->chanA.regs, reg_init, 16);
@@ -1404,8 +1369,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 		{
 			flags=claim_dma_lock();
 			disable_dma(c->txdma);
-			/*
-			 *	Check if we crapped out.
+			/*	Check if we crapped out.
 			 */
 			if (get_dma_residue(c->txdma))
 			{
@@ -1422,8 +1386,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 
 		if(c->dma_tx)
 		{
-			/*
-			 *	FIXME. DMA is broken for the original 8530,
+			/*	FIXME. DMA is broken for the original 8530,
 			 *	on the older parts we need to set a flag and
 			 *	wait for a further TX interrupt to fire this
 			 *	stage off	
@@ -1432,8 +1395,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			flags=claim_dma_lock();
 			disable_dma(c->txdma);
 
-			/*
-			 *	These two are needed by the 8530/85C30
+			/*	These two are needed by the 8530/85C30
 			 *	and must be issued when idling.
 			 */
 			 
@@ -1464,8 +1426,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			}
 		}
 	}
-	/*
-	 *	Since we emptied tx_skb we can ask for more
+	/*	Since we emptied tx_skb we can ask for more
 	 */
 	netif_wake_queue(c->netdevice);
 }
@@ -1529,22 +1490,19 @@ static void z8530_rx_done(struct z8530_channel *c)
 {
 	struct sk_buff *skb;
 	int ct;
-	
-	/*
-	 *	Is our receive engine in DMA mode
+
+	/*	Is our receive engine in DMA mode
 	 */
 	if(c->rxdma_on)
 	{
-		/*
-		 *	Save the ready state and the buffer currently
+		/*	Save the ready state and the buffer currently
 		 *	being used as the DMA target
 		 */
 		int ready=c->dma_ready;
 		unsigned char *rxb=c->rx_buf[c->dma_num];
 		unsigned long flags;
-		
-		/*
-		 *	Complete this DMA. Necessary to find the length
+
+		/*	Complete this DMA. Necessary to find the length
 		 */		
 		flags=claim_dma_lock();
 		
@@ -1555,9 +1513,8 @@ static void z8530_rx_done(struct z8530_channel *c)
 		if(ct<0)
 			ct=2;	/* Shit happens.. */
 		c->dma_ready=0;
-		
-		/*
-		 *	Normal case: the other slot is free, start the next DMA
+
+		/*	Normal case: the other slot is free, start the next DMA
 		 *	into it immediately.
 		 */
 		 
@@ -1569,19 +1526,20 @@ static void z8530_rx_done(struct z8530_channel *c)
 			set_dma_count(c->rxdma, c->mtu);
 			c->rxdma_on = 1;
 			enable_dma(c->rxdma);
-			/* Stop any frames that we missed the head of 
-			   from passing */
+			/* Stop any frames that we missed the head of
+			 * from passing
+			 */
 			write_zsreg(c, R0, RES_Rx_CRC);
 		}
 		else
 			/* Can't occur as we dont reenable the DMA irq until
-			   after the flip is done */
+			 * after the flip is done
+			 */
 			netdev_warn(c->netdevice, "DMA flip overrun!\n");
 
 		release_dma_lock(flags);
 
-		/*
-		 *	Shove the old buffer into an sk_buff. We can't DMA
+		/*	Shove the old buffer into an sk_buff. We can't DMA
 		 *	directly into one on a PC - it might be above the 16Mb
 		 *	boundary. Optimisation - we could check to see if we
 		 *	can avoid the copy. Optimisation 2 - make the memcpy
@@ -1603,8 +1561,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		RT_LOCK;
 		skb = c->skb;
 
-		/*
-		 *	The game we play for non DMA is similar. We want to
+		/*	The game we play for non DMA is similar. We want to
 		 *	get the controller set up for the next packet as fast
 		 *	as possible. We potentially only have one byte + the
 		 *	fifo length for this. Thus we want to flip to the new
@@ -1637,8 +1594,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		c->netdevice->stats.rx_packets++;
 		c->netdevice->stats.rx_bytes += ct;
 	}
-	/*
-	 *	If we received a frame we must now process it.
+	/*	If we received a frame we must now process it.
 	 */
 	if (skb) {
 		skb_trim(skb, ct);
@@ -1690,16 +1646,13 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 		return NETDEV_TX_BUSY;
 
 	/* PC SPECIFIC - DMA limits */
-	
-	/*
-	 *	If we will DMA the transmit and its gone over the ISA bus
+	/*	If we will DMA the transmit and its gone over the ISA bus
 	 *	limit, then copy to the flip buffer
 	 */
 	 
 	if(c->dma_tx && ((unsigned long)(virt_to_bus(skb->data+skb->len))>=16*1024*1024 || spans_boundary(skb)))
 	{
-		/* 
-		 *	Send the flip buffer, and flip the flippy bit.
+		/*	Send the flip buffer, and flip the flippy bit.
 		 *	We don't care which is used when just so long as
 		 *	we never use the same buffer twice in a row. Since
 		 *	only one buffer can be going out at a time the other
@@ -1723,8 +1676,7 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(z8530_queue_xmit);
 
-/*
- *	Module support
+/*	Module support
  */
 static const char banner[] __initconst =
 	KERN_INFO "Generic Z85C30/Z85230 interface driver v0.02\n";

From 57b6de35cf327fd1ef95fdd42162e5d6555cd024 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:41 +0800
Subject: [PATCH 1824/3292] net: z85230: fix the code style issue about
 "if..else.."

According to the chackpatch.pl, else should follow close brace '}',
braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index e130c8cafd3a..34004e463a9a 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -354,9 +354,7 @@ static void z8530_rx(struct z8530_channel *c)
 					/* printk("crc error\n"); */
 				}
 				/* Shove the frame upstream */
-			}
-			else
-			{
+			} else {
 				/*	Drop the lock for RX processing, or
 		 		 *	there are deadlocks
 		 		 */
@@ -489,9 +487,7 @@ static void z8530_dma_rx(struct z8530_channel *chan)
 		}		
 		write_zsctrl(chan, ERR_RES);
 		write_zsctrl(chan, RES_H_IUS);
-	}
-	else
-	{
+	} else {
 		/* DMA is off right now, drain the slow way */
 		z8530_rx(chan);
 	}	
@@ -1379,9 +1375,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			release_dma_lock(flags);
 		}
 		c->txcount=0;
-	}
-	else
-	{
+	} else {
 		c->txcount=c->tx_skb->len;
 
 		if(c->dma_tx)
@@ -1412,9 +1406,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			release_dma_lock(flags);
 			write_zsctrl(c, RES_EOM_L);
 			write_zsreg(c, R5, c->regs[R5]|TxENAB);
-		}
-		else
-		{
+		} else {
 			/* ABUNDER off */
 			write_zsreg(c, R10, c->regs[10]);
 			write_zsctrl(c, RES_Tx_CRC);
@@ -1530,12 +1522,12 @@ static void z8530_rx_done(struct z8530_channel *c)
 			 * from passing
 			 */
 			write_zsreg(c, R0, RES_Rx_CRC);
-		}
-		else
+		} else {
 			/* Can't occur as we dont reenable the DMA irq until
 			 * after the flip is done
 			 */
 			netdev_warn(c->netdevice, "DMA flip overrun!\n");
+		}
 
 		release_dma_lock(flags);
 
@@ -1661,9 +1653,9 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 		c->tx_next_ptr=c->tx_dma_buf[c->tx_dma_used];
 		c->tx_dma_used^=1;	/* Flip temp buffer */
 		skb_copy_from_linear_data(skb, c->tx_next_ptr, skb->len);
+	} else {
+		c->tx_next_ptr = skb->data;
 	}
-	else
-		c->tx_next_ptr=skb->data;	
 	RT_LOCK;
 	c->tx_next_skb=skb;
 	RT_UNLOCK;

From a04544ffe889105ecb2c98c48ee593d7af1014ff Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:42 +0800
Subject: [PATCH 1825/3292] net: z85230: remove trailing whitespaces

This patch removes trailing whitespaces.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 224 ++++++++++++++++++---------------------
 1 file changed, 105 insertions(+), 119 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 34004e463a9a..5db452d24e94 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -10,7 +10,7 @@
  *	Asynchronous mode dropped for 2.2. For 2.5 we will attempt the
  *	unification of all the Z85x30 asynchronous drivers for real.
  *
- *	DMA now uses get_free_page as kmalloc buffers may span a 64K 
+ *	DMA now uses get_free_page as kmalloc buffers may span a 64K
  *	boundary.
  *
  *	Modified for SMP safety and SMP locking by Alan Cox
@@ -59,7 +59,7 @@
  *
  *	Provided port access methods. The Comtrol SV11 requires no delays
  *	between accesses and uses PC I/O. Some drivers may need a 5uS delay
- *	
+ *
  *	In the longer term this should become an architecture specific
  *	section so that this can become a generic driver interface for all
  *	platforms. For now we only handle PC I/O ports with or without the
@@ -104,16 +104,16 @@ static void z8530_rx_done(struct z8530_channel *c);
 static void z8530_tx_done(struct z8530_channel *c);
 
 /**
- *	read_zsreg - Read a register from a Z85230 
+ *	read_zsreg - Read a register from a Z85230
  *	@c: Z8530 channel to read from (2 per chip)
  *	@reg: Register to read
  *	FIXME: Use a spinlock.
- *	
+ *
  *	Most of the Z8530 registers are indexed off the control registers.
  *	A read is done by writing to the control register and reading the
  *	register back.  The caller must hold the lock
  */
- 
+
 static inline u8 read_zsreg(struct z8530_channel *c, u8 reg)
 {
 	if(reg)
@@ -183,7 +183,7 @@ static inline void write_zsdata(struct z8530_channel *c, u8 val)
 
 /*	Register loading parameters for a dead port
  */
- 
+
 u8 z8530_dead_port[]=
 {
 	255
@@ -196,7 +196,7 @@ EXPORT_SYMBOL(z8530_dead_port);
 /*	Data clocked by telco end. This is the correct data for the UK
  *	"kilostream" service, and most other similar services.
  */
- 
+
 u8 z8530_hdlc_kilostream[]=
 {
 	4,	SYNC_ENAB|SDLC|X1CLK,
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(z8530_hdlc_kilostream);
 
 /*	As above but for enhanced chips.
  */
- 
+
 u8 z8530_hdlc_kilostream_85230[]=
 {
 	4,	SYNC_ENAB|SDLC|X1CLK,
@@ -237,7 +237,7 @@ u8 z8530_hdlc_kilostream_85230[]=
 	1,	EXT_INT_ENAB|TxINT_ENAB|INT_ALL_Rx,
 	9,	NV|MIE|NORESET,
 	23,	3,		/* Extended mode AUTO TX and EOM*/
-	
+
 	255
 };
 EXPORT_SYMBOL(z8530_hdlc_kilostream_85230);
@@ -246,14 +246,14 @@ EXPORT_SYMBOL(z8530_hdlc_kilostream_85230);
  *	z8530_flush_fifo - Flush on chip RX FIFO
  *	@c: Channel to flush
  *
- *	Flush the receive FIFO. There is no specific option for this, we 
+ *	Flush the receive FIFO. There is no specific option for this, we
  *	blindly read bytes and discard them. Reading when there is no data
  *	is harmless. The 8530 has a 4 byte FIFO, the 85230 has 8 bytes.
- *	
+ *
  *	All locking is handled for the caller. On return data may still be
  *	present if it arrived during the flush.
  */
- 
+
 static void z8530_flush_fifo(struct z8530_channel *c)
 {
 	read_zsreg(c, R1);
@@ -267,7 +267,7 @@ static void z8530_flush_fifo(struct z8530_channel *c)
 		read_zsreg(c, R1);
 		read_zsreg(c, R1);
 	}
-}	
+}
 
 /**
  *	z8530_rtsdtr - Control the outgoing DTS/RTS line
@@ -293,7 +293,7 @@ static void z8530_rtsdtr(struct z8530_channel *c, int set)
  *	z8530_rx - Handle a PIO receive event
  *	@c: Z8530 channel to process
  *
- *	Receive handler for receiving in PIO mode. This is much like the 
+ *	Receive handler for receiving in PIO mode. This is much like the
  *	async one but not quite the same or as complex
  *
  *	Note: Its intended that this handler can easily be separated from
@@ -306,13 +306,13 @@ static void z8530_rtsdtr(struct z8530_channel *c, int set)
  *	other code - this is true in the RT case too.
  *
  *	We only cover the sync cases for this. If you want 2Mbit async
- *	do it yourself but consider medical assistance first. This non DMA 
- *	synchronous mode is portable code. The DMA mode assumes PCI like 
+ *	do it yourself but consider medical assistance first. This non DMA
+ *	synchronous mode is portable code. The DMA mode assumes PCI like
  *	ISA DMA
  *
  *	Called with the device lock held
  */
- 
+
 static void z8530_rx(struct z8530_channel *c)
 {
 	u8 ch,stat;
@@ -324,7 +324,7 @@ static void z8530_rx(struct z8530_channel *c)
 			break;
 		ch=read_zsdata(c);
 		stat=read_zsreg(c, R1);
-	
+
 		/*	Overrun ?
 		 */
 		if(c->count < c->max)
@@ -378,7 +378,7 @@ static void z8530_rx(struct z8530_channel *c)
  *	in as possible, its quite possible that we won't keep up with the
  *	data rate otherwise.
  */
- 
+
 static void z8530_tx(struct z8530_channel *c)
 {
 	while(c->txcount) {
@@ -400,10 +400,10 @@ static void z8530_tx(struct z8530_channel *c)
 
 	/*	End of frame TX - fire another one
 	 */
-	 
+
 	write_zsctrl(c, RES_Tx_P);
 
-	z8530_tx_done(c);	 
+	z8530_tx_done(c);
 	write_zsctrl(c, RES_H_IUS);
 }
 
@@ -468,29 +468,29 @@ EXPORT_SYMBOL(z8530_sync);
  *	events are handled by the DMA hardware. We get a kick here only if
  *	a frame ended.
  */
- 
+
 static void z8530_dma_rx(struct z8530_channel *chan)
 {
 	if(chan->rxdma_on)
 	{
 		/* Special condition check only */
 		u8 status;
-	
+
 		read_zsreg(chan, R7);
 		read_zsreg(chan, R6);
-		
+
 		status=read_zsreg(chan, R1);
-	
+
 		if(status&END_FR)
 		{
 			z8530_rx_done(chan);	/* Fire up the next one */
-		}		
+		}
 		write_zsctrl(chan, ERR_RES);
 		write_zsctrl(chan, RES_H_IUS);
 	} else {
 		/* DMA is off right now, drain the slow way */
 		z8530_rx(chan);
-	}	
+	}
 }
 
 /**
@@ -500,7 +500,6 @@ static void z8530_dma_rx(struct z8530_channel *chan)
  *	We have received an interrupt while doing DMA transmissions. It
  *	shouldn't happen. Scream loudly if it does.
  */
- 
 static void z8530_dma_tx(struct z8530_channel *chan)
 {
 	if(!chan->dma_tx)
@@ -517,20 +516,19 @@ static void z8530_dma_tx(struct z8530_channel *chan)
 /**
  *	z8530_dma_status - Handle a DMA status exception
  *	@chan: Z8530 channel to process
- *	
+ *
  *	A status event occurred on the Z8530. We receive these for two reasons
  *	when in DMA mode. Firstly if we finished a packet transfer we get one
  *	and kick the next packet out. Secondly we may see a DCD change.
  *
  */
- 
 static void z8530_dma_status(struct z8530_channel *chan)
 {
 	u8 status, altered;
 
 	status=read_zsreg(chan, R0);
 	altered=chan->status^status;
-	
+
 	chan->status=status;
 
 	if(chan->dma_tx)
@@ -538,10 +536,10 @@ static void z8530_dma_status(struct z8530_channel *chan)
 		if(status&TxEOM)
 		{
 			unsigned long flags;
-	
+
 			flags=claim_dma_lock();
 			disable_dma(chan->txdma);
-			clear_dma_ff(chan->txdma);	
+			clear_dma_ff(chan->txdma);
 			chan->txdma_on=0;
 			release_dma_lock(flags);
 			z8530_tx_done(chan);
@@ -597,7 +595,7 @@ static void z8530_rx_clear(struct z8530_channel *c)
 
 	read_zsdata(c);
 	stat=read_zsreg(c, R1);
-	
+
 	if(stat&END_FR)
 		write_zsctrl(c, RES_Rx_CRC);
 	/*	Clear irq
@@ -670,7 +668,7 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 	static volatile int locker=0;
 	int work=0;
 	struct z8530_irqhandler *irqs;
-	
+
 	if(locker)
 	{
 		pr_err("IRQ re-enter\n");
@@ -685,15 +683,15 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 		intr = read_zsreg(&dev->chanA, R3);
 		if(!(intr & (CHARxIP|CHATxIP|CHAEXT|CHBRxIP|CHBTxIP|CHBEXT)))
 			break;
-	
+
 		/* This holds the IRQ status. On the 8530 you must read it
 		 * from chan A even though it applies to the whole chip
 		 */
-		
+
 		/* Now walk the chip and see what it is wanting - it may be
 		 * an IRQ for someone else remember
 		 */
-		   
+
 		irqs=dev->chanA.irqs;
 
 		if(intr & (CHARxIP|CHATxIP|CHAEXT))
@@ -744,7 +742,6 @@ static const u8 reg_init[16]=
  *	Switch a Z8530 into synchronous mode without DMA assist. We
  *	raise the RTS/DTR and commence network operation.
  */
- 
 int z8530_sync_open(struct net_device *dev, struct z8530_channel *c)
 {
 	unsigned long flags;
@@ -780,17 +777,16 @@ EXPORT_SYMBOL(z8530_sync_open);
  *	Close down a Z8530 interface and switch its interrupt handlers
  *	to discard future events.
  */
- 
 int z8530_sync_close(struct net_device *dev, struct z8530_channel *c)
 {
 	u8 chk;
 	unsigned long flags;
-	
+
 	spin_lock_irqsave(c->lock, flags);
 	c->irqs = &z8530_nop;
 	c->max = 0;
 	c->sync = 0;
-	
+
 	chk=read_zsreg(c,R0);
 	write_zsreg(c, R3, c->regs[R3]);
 	z8530_rtsdtr(c,0);
@@ -809,11 +805,10 @@ EXPORT_SYMBOL(z8530_sync_close);
  *	ISA DMA channels must be available for this to work. We assume ISA
  *	DMA driven I/O and PC limits on access.
  */
- 
 int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 {
 	unsigned long cflags, dflags;
-	
+
 	c->sync = 1;
 	c->mtu = dev->mtu+64;
 	c->count = 0;
@@ -829,15 +824,15 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	 *	Everyone runs 1500 mtu or less on wan links so this
 	 *	should be fine.
 	 */
-	 
+
 	if(c->mtu  > PAGE_SIZE/2)
 		return -EMSGSIZE;
-	 
+
 	c->rx_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
 	if (!c->rx_buf[0])
 		return -ENOBUFS;
 	c->rx_buf[1]=c->rx_buf[0]+PAGE_SIZE/2;
-	
+
 	c->tx_dma_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
 	if (!c->tx_dma_buf[0])
 	{
@@ -851,7 +846,7 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	c->dma_tx = 1;
 	c->dma_num=0;
 	c->dma_ready=1;
-	
+
 	/*	Enable DMA control mode
 	 */
 
@@ -859,15 +854,15 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 
 	/*	TX DMA via DIR/REQ
 	 */
-	 
+
 	c->regs[R14]|= DTRREQ;
-	write_zsreg(c, R14, c->regs[R14]);     
+	write_zsreg(c, R14, c->regs[R14]);
 
 	c->regs[R1]&= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
 
 	/*	RX DMA via W/Req
-	 */	 
+	 */
 
 	c->regs[R1]|= WT_FN_RDYFN;
 	c->regs[R1]|= WT_RDY_RT;
@@ -875,16 +870,16 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	c->regs[R1]&= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
 	c->regs[R1]|= WT_RDY_ENAB;
-	write_zsreg(c, R1, c->regs[R1]);            
+	write_zsreg(c, R1, c->regs[R1]);
 
 	/*	DMA interrupts
 	 */
 
 	/*	Set up the DMA configuration
-	 */	
-	 
+	 */
+
 	dflags=claim_dma_lock();
-	 
+
 	disable_dma(c->rxdma);
 	clear_dma_ff(c->rxdma);
 	set_dma_mode(c->rxdma, DMA_MODE_READ|0x10);
@@ -896,7 +891,7 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	clear_dma_ff(c->txdma);
 	set_dma_mode(c->txdma, DMA_MODE_WRITE);
 	disable_dma(c->txdma);
-	
+
 	release_dma_lock(dflags);
 
 	/*	Select the DMA interrupt handlers
@@ -905,13 +900,13 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	c->rxdma_on = 1;
 	c->txdma_on = 1;
 	c->tx_dma_used = 1;
-	 
+
 	c->irqs = &z8530_dma_sync;
 	z8530_rtsdtr(c,1);
 	write_zsreg(c, R3, c->regs[R3]|RxENABLE);
 
 	spin_unlock_irqrestore(c->lock, cflags);
-	
+
 	return 0;
 }
 EXPORT_SYMBOL(z8530_sync_dma_open);
@@ -924,29 +919,28 @@ EXPORT_SYMBOL(z8530_sync_dma_open);
  *	Shut down a DMA mode synchronous interface. Halt the DMA, and
  *	free the buffers.
  */
- 
 int z8530_sync_dma_close(struct net_device *dev, struct z8530_channel *c)
 {
 	u8 chk;
 	unsigned long flags;
-	
+
 	c->irqs = &z8530_nop;
 	c->max = 0;
 	c->sync = 0;
 
 	/*	Disable the PC DMA channels
 	 */
-	
-	flags=claim_dma_lock(); 
+
+	flags = claim_dma_lock();
 	disable_dma(c->rxdma);
 	clear_dma_ff(c->rxdma);
-	
+
 	c->rxdma_on = 0;
-	
+
 	disable_dma(c->txdma);
 	clear_dma_ff(c->txdma);
 	release_dma_lock(flags);
-	
+
 	c->txdma_on = 0;
 	c->tx_dma_used = 0;
 
@@ -954,15 +948,15 @@ int z8530_sync_dma_close(struct net_device *dev, struct z8530_channel *c)
 
 	/*	Disable DMA control mode
 	 */
-	 
+
 	c->regs[R1]&= ~WT_RDY_ENAB;
-	write_zsreg(c, R1, c->regs[R1]);            
+	write_zsreg(c, R1, c->regs[R1]);
 	c->regs[R1]&= ~(WT_RDY_RT|WT_FN_RDYFN|INT_ERR_Rx);
 	c->regs[R1]|= INT_ALL_Rx;
 	write_zsreg(c, R1, c->regs[R1]);
 	c->regs[R14]&= ~DTRREQ;
-	write_zsreg(c, R14, c->regs[R14]);   
-	
+	write_zsreg(c, R14, c->regs[R14]);
+
 	if(c->rx_buf[0])
 	{
 		free_page((unsigned long)c->rx_buf[0]);
@@ -1008,10 +1002,10 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	 *	Everyone runs 1500 mtu or less on wan links so this
 	 *	should be fine.
 	 */
-	 
+
 	if(c->mtu  > PAGE_SIZE/2)
 		return -EMSGSIZE;
-	 
+
 	c->tx_dma_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
 	if (!c->tx_dma_buf[0])
 		return -ENOBUFS;
@@ -1031,7 +1025,7 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 
 	c->rxdma_on = 0;
 	c->txdma_on = 0;
-	
+
 	c->tx_dma_used=0;
 	c->dma_num=0;
 	c->dma_ready=1;
@@ -1043,14 +1037,14 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	/*	TX DMA via DIR/REQ
 	 */
 	c->regs[R14]|= DTRREQ;
-	write_zsreg(c, R14, c->regs[R14]);     
-	
+	write_zsreg(c, R14, c->regs[R14]);
+
 	c->regs[R1]&= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
 
 	/*	Set up the DMA configuration
-	 */	
-	 
+	 */
+
 	dflags = claim_dma_lock();
 
 	disable_dma(c->txdma);
@@ -1066,12 +1060,12 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	c->rxdma_on = 0;
 	c->txdma_on = 1;
 	c->tx_dma_used = 1;
-	 
+
 	c->irqs = &z8530_txdma_sync;
 	z8530_rtsdtr(c,1);
 	write_zsreg(c, R3, c->regs[R3]|RxENABLE);
 	spin_unlock_irqrestore(c->lock, cflags);
-	
+
 	return 0;
 }
 EXPORT_SYMBOL(z8530_sync_txdma_open);
@@ -1081,7 +1075,7 @@ EXPORT_SYMBOL(z8530_sync_txdma_open);
  *	@dev: Network device to detach
  *	@c: Z8530 channel to move into discard mode
  *
- *	Shut down a DMA/PIO split mode synchronous interface. Halt the DMA, 
+ *	Shut down a DMA/PIO split mode synchronous interface. Halt the DMA,
  *	and  free the buffers.
  */
 
@@ -1091,14 +1085,14 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 	u8 chk;
 
 	spin_lock_irqsave(c->lock, cflags);
-	
+
 	c->irqs = &z8530_nop;
 	c->max = 0;
 	c->sync = 0;
 
 	/*	Disable the PC DMA channels
 	 */
-	 
+
 	dflags = claim_dma_lock();
 
 	disable_dma(c->txdma);
@@ -1110,15 +1104,15 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 
 	/*	Disable DMA control mode
 	 */
-	 
+
 	c->regs[R1]&= ~WT_RDY_ENAB;
-	write_zsreg(c, R1, c->regs[R1]);            
+	write_zsreg(c, R1, c->regs[R1]);
 	c->regs[R1]&= ~(WT_RDY_RT|WT_FN_RDYFN|INT_ERR_Rx);
 	c->regs[R1]|= INT_ALL_Rx;
 	write_zsreg(c, R1, c->regs[R1]);
 	c->regs[R14]&= ~DTRREQ;
-	write_zsreg(c, R14, c->regs[R14]);   
-	
+	write_zsreg(c, R14, c->regs[R14]);
+
 	if(c->tx_dma_buf[0])
 	{
 		free_page((unsigned long)c->tx_dma_buf[0]);
@@ -1136,7 +1130,6 @@ EXPORT_SYMBOL(z8530_sync_txdma_close);
 /*	Name strings for Z8530 chips. SGI claim to have a 130, Zilog deny
  *	it exists...
  */
- 
 static const char *z8530_type_name[]={
 	"Z8530",
 	"Z85C30",
@@ -1157,7 +1150,7 @@ static const char *z8530_type_name[]={
 void z8530_describe(struct z8530_dev *dev, char *mapping, unsigned long io)
 {
 	pr_info("%s: %s found at %s 0x%lX, IRQ %d\n",
-		dev->name, 
+		dev->name,
 		z8530_type_name[dev->type],
 		mapping,
 		Z8530_PORT_OF(io),
@@ -1167,7 +1160,6 @@ EXPORT_SYMBOL(z8530_describe);
 
 /*	Locked operation part of the z8530 init code
  */
- 
 static inline int do_z8530_init(struct z8530_dev *dev)
 {
 	/* NOP the interrupt handlers first - we might get a
@@ -1188,18 +1180,18 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 	write_zsreg(&dev->chanA, R12, 0x55);
 	if(read_zsreg(&dev->chanA, R12)!=0x55)
 		return -ENODEV;
-		
+
 	dev->type=Z8530;
 
 	/*	See the application note.
 	 */
-	 
+
 	write_zsreg(&dev->chanA, R15, 0x01);
 
 	/*	If we can set the low bit of R15 then
 	 *	the chip is enhanced.
 	 */
-	 
+
 	if(read_zsreg(&dev->chanA, R15)==0x01)
 	{
 		/* This C30 versus 230 detect is from Klaus Kudielka's dmascc */
@@ -1215,15 +1207,15 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 	 *	off. Use write_zsext() for these and keep
 	 *	this bit clear.
 	 */
-	 
+
 	write_zsreg(&dev->chanA, R15, 0);
 
 	/*	At this point it looks like the chip is behaving
 	 */
-	 
+
 	memcpy(dev->chanA.regs, reg_init, 16);
 	memcpy(dev->chanB.regs, reg_init ,16);
-	
+
 	return 0;
 }
 
@@ -1266,13 +1258,12 @@ EXPORT_SYMBOL(z8530_init);
  *	z8530_shutdown - Shutdown a Z8530 device
  *	@dev: The Z8530 chip to shutdown
  *
- *	We set the interrupt handlers to silence any interrupts. We then 
+ *	We set the interrupt handlers to silence any interrupts. We then
  *	reset the chip and wait 100uS to be sure the reset completed. Just
  *	in case the caller then tries to do stuff.
  *
  *	This is called without the lock held
  */
- 
 int z8530_shutdown(struct z8530_dev *dev)
 {
 	unsigned long flags;
@@ -1295,7 +1286,7 @@ EXPORT_SYMBOL(z8530_shutdown);
  *	@rtable: table of register, value pairs
  *	FIXME: ioctl to allow user uploaded tables
  *
- *	Load a Z8530 channel up from the system data. We use +16 to 
+ *	Load a Z8530 channel up from the system data. We use +16 to
  *	indicate the "prime" registers. The value 255 terminates the
  *	table.
  */
@@ -1339,7 +1330,7 @@ EXPORT_SYMBOL(z8530_channel_load);
  *
  *	This is the speed sensitive side of transmission. If we are called
  *	and no buffer is being transmitted we commence the next buffer. If
- *	nothing is queued we idle the sync. 
+ *	nothing is queued we idle the sync.
  *
  *	Note: We are handling this code path in the interrupt path, keep it
  *	fast or bad things will happen.
@@ -1353,11 +1344,11 @@ static void z8530_tx_begin(struct z8530_channel *c)
 
 	if(c->tx_skb)
 		return;
-		
+
 	c->tx_skb=c->tx_next_skb;
 	c->tx_next_skb=NULL;
 	c->tx_ptr=c->tx_next_ptr;
-	
+
 	if (!c->tx_skb)
 	{
 		/* Idle on */
@@ -1383,21 +1374,20 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			/*	FIXME. DMA is broken for the original 8530,
 			 *	on the older parts we need to set a flag and
 			 *	wait for a further TX interrupt to fire this
-			 *	stage off	
+			 *	stage off
 			 */
-			 
+
 			flags=claim_dma_lock();
 			disable_dma(c->txdma);
 
 			/*	These two are needed by the 8530/85C30
 			 *	and must be issued when idling.
 			 */
-			 
 			if(c->dev->type!=Z85230)
 			{
 				write_zsctrl(c, RES_Tx_CRC);
 				write_zsctrl(c, RES_EOM_L);
-			}	
+			}
 			write_zsreg(c, R10, c->regs[10]&~ABUNDER);
 			clear_dma_ff(c->txdma);
 			set_dma_addr(c->txdma, virt_to_bus(c->tx_ptr));
@@ -1410,9 +1400,8 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			/* ABUNDER off */
 			write_zsreg(c, R10, c->regs[10]);
 			write_zsctrl(c, RES_Tx_CRC);
-	
-			while(c->txcount && (read_zsreg(c,R0)&Tx_BUF_EMP))
-			{		
+
+			while (c->txcount && (read_zsreg(c, R0) & Tx_BUF_EMP)) {
 				write_zsreg(c, R8, *c->tx_ptr++);
 				c->txcount--;
 			}
@@ -1458,7 +1447,6 @@ static void z8530_tx_done(struct z8530_channel *c)
  *	We point the receive handler at this function when idle. Instead
  *	of processing the frames we get to throw them away.
  */
- 
 void z8530_null_rx(struct z8530_channel *c, struct sk_buff *skb)
 {
 	dev_kfree_skb_any(skb);
@@ -1477,7 +1465,6 @@ EXPORT_SYMBOL(z8530_null_rx);
  *
  *	Called with the lock held
  */
- 
 static void z8530_rx_done(struct z8530_channel *c)
 {
 	struct sk_buff *skb;
@@ -1495,9 +1482,9 @@ static void z8530_rx_done(struct z8530_channel *c)
 		unsigned long flags;
 
 		/*	Complete this DMA. Necessary to find the length
-		 */		
+		 */
 		flags=claim_dma_lock();
-		
+
 		disable_dma(c->rxdma);
 		clear_dma_ff(c->rxdma);
 		c->rxdma_on=0;
@@ -1509,7 +1496,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		/*	Normal case: the other slot is free, start the next DMA
 		 *	into it immediately.
 		 */
-		 
+
 		if(ready)
 		{
 			c->dma_num^=1;
@@ -1621,18 +1608,17 @@ static inline int spans_boundary(struct sk_buff *skb)
  *	@skb: The packet to kick down the channel
  *
  *	Queue a packet for transmission. Because we have rather
- *	hard to hit interrupt latencies for the Z85230 per packet 
+ *	hard to hit interrupt latencies for the Z85230 per packet
  *	even in DMA mode we do the flip to DMA buffer if needed here
  *	not in the IRQ.
  *
- *	Called from the network code. The lock is not held at this 
+ *	Called from the network code. The lock is not held at this
  *	point.
  */
-
 netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 {
 	unsigned long flags;
-	
+
 	netif_stop_queue(c->netdevice);
 	if(c->tx_next_skb)
 		return NETDEV_TX_BUSY;
@@ -1641,7 +1627,7 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 	/*	If we will DMA the transmit and its gone over the ISA bus
 	 *	limit, then copy to the flip buffer
 	 */
-	 
+
 	if(c->dma_tx && ((unsigned long)(virt_to_bus(skb->data+skb->len))>=16*1024*1024 || spans_boundary(skb)))
 	{
 		/*	Send the flip buffer, and flip the flippy bit.
@@ -1659,11 +1645,11 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 	RT_LOCK;
 	c->tx_next_skb=skb;
 	RT_UNLOCK;
-	
+
 	spin_lock_irqsave(c->lock, flags);
 	z8530_tx_begin(c);
 	spin_unlock_irqrestore(c->lock, flags);
-	
+
 	return NETDEV_TX_OK;
 }
 EXPORT_SYMBOL(z8530_queue_xmit);

From b87a5cf65655d5ea078e21dfe6ca711badca251c Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:43 +0800
Subject: [PATCH 1826/3292] net: z85230: add some required spaces

Add space required before the open parenthesis '(' and '{'.
Add space required after that close brace '}' and ','
Add spaces required around that '=' , '&', '*', '|', '+', '/' and '-'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 416 ++++++++++++++++++++-------------------
 1 file changed, 209 insertions(+), 207 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 5db452d24e94..cab963f5efca 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -71,9 +71,9 @@
 
 static inline int z8530_read_port(unsigned long p)
 {
-	u8 r=inb(Z8530_PORT_OF(p));
+	u8 r = inb(Z8530_PORT_OF(p));
 
-	if(p&Z8530_PORT_SLEEP)	/* gcc should figure this out efficiently ! */
+	if (p & Z8530_PORT_SLEEP) /* gcc should figure this out efficiently ! */
 		udelay(5);
 	return r;
 }
@@ -95,8 +95,8 @@ static inline int z8530_read_port(unsigned long p)
 
 static inline void z8530_write_port(unsigned long p, u8 d)
 {
-	outb(d,Z8530_PORT_OF(p));
-	if(p&Z8530_PORT_SLEEP)
+	outb(d, Z8530_PORT_OF(p));
+	if (p & Z8530_PORT_SLEEP)
 		udelay(5);
 }
 
@@ -116,7 +116,7 @@ static void z8530_tx_done(struct z8530_channel *c);
 
 static inline u8 read_zsreg(struct z8530_channel *c, u8 reg)
 {
-	if(reg)
+	if (reg)
 		z8530_write_port(c->ctrlio, reg);
 	return z8530_read_port(c->ctrlio);
 }
@@ -133,7 +133,7 @@ static inline u8 read_zsdata(struct z8530_channel *c)
 {
 	u8 r;
 
-	r=z8530_read_port(c->dataio);
+	r = z8530_read_port(c->dataio);
 	return r;
 }
 
@@ -151,7 +151,7 @@ static inline u8 read_zsdata(struct z8530_channel *c)
  */
 static inline void write_zsreg(struct z8530_channel *c, u8 reg, u8 val)
 {
-	if(reg)
+	if (reg)
 		z8530_write_port(c->ctrlio, reg);
 	z8530_write_port(c->ctrlio, val);
 }
@@ -184,7 +184,7 @@ static inline void write_zsdata(struct z8530_channel *c, u8 val)
 /*	Register loading parameters for a dead port
  */
 
-u8 z8530_dead_port[]=
+u8 z8530_dead_port[] =
 {
 	255
 };
@@ -197,22 +197,22 @@ EXPORT_SYMBOL(z8530_dead_port);
  *	"kilostream" service, and most other similar services.
  */
 
-u8 z8530_hdlc_kilostream[]=
+u8 z8530_hdlc_kilostream[] =
 {
-	4,	SYNC_ENAB|SDLC|X1CLK,
+	4,	SYNC_ENAB | SDLC | X1CLK,
 	2,	0,	/* No vector */
 	1,	0,
-	3,	ENT_HM|RxCRC_ENAB|Rx8,
-	5,	TxCRC_ENAB|RTS|TxENAB|Tx8|DTR,
+	3,	ENT_HM | RxCRC_ENAB | Rx8,
+	5,	TxCRC_ENAB | RTS | TxENAB | Tx8 | DTR,
 	9,	0,		/* Disable interrupts */
 	6,	0xFF,
 	7,	FLAG,
-	10,	ABUNDER|NRZ|CRCPS,/*MARKIDLE ??*/
+	10,	ABUNDER | NRZ | CRCPS,/*MARKIDLE ??*/
 	11,	TCTRxCP,
 	14,	DISDPLL,
-	15,	DCDIE|SYNCIE|CTSIE|TxUIE|BRKIE,
-	1,	EXT_INT_ENAB|TxINT_ENAB|INT_ALL_Rx,
-	9,	NV|MIE|NORESET,
+	15,	DCDIE | SYNCIE | CTSIE | TxUIE | BRKIE,
+	1,	EXT_INT_ENAB | TxINT_ENAB | INT_ALL_Rx,
+	9,	NV | MIE | NORESET,
 	255
 };
 EXPORT_SYMBOL(z8530_hdlc_kilostream);
@@ -220,22 +220,22 @@ EXPORT_SYMBOL(z8530_hdlc_kilostream);
 /*	As above but for enhanced chips.
  */
 
-u8 z8530_hdlc_kilostream_85230[]=
+u8 z8530_hdlc_kilostream_85230[] =
 {
-	4,	SYNC_ENAB|SDLC|X1CLK,
+	4,	SYNC_ENAB | SDLC | X1CLK,
 	2,	0,	/* No vector */
 	1,	0,
-	3,	ENT_HM|RxCRC_ENAB|Rx8,
-	5,	TxCRC_ENAB|RTS|TxENAB|Tx8|DTR,
+	3,	ENT_HM | RxCRC_ENAB | Rx8,
+	5,	TxCRC_ENAB | RTS | TxENAB | Tx8 | DTR,
 	9,	0,		/* Disable interrupts */
 	6,	0xFF,
 	7,	FLAG,
-	10,	ABUNDER|NRZ|CRCPS,	/* MARKIDLE?? */
+	10,	ABUNDER | NRZ | CRCPS,	/* MARKIDLE?? */
 	11,	TCTRxCP,
 	14,	DISDPLL,
-	15,	DCDIE|SYNCIE|CTSIE|TxUIE|BRKIE,
-	1,	EXT_INT_ENAB|TxINT_ENAB|INT_ALL_Rx,
-	9,	NV|MIE|NORESET,
+	15,	DCDIE | SYNCIE | CTSIE | TxUIE | BRKIE,
+	1,	EXT_INT_ENAB | TxINT_ENAB | INT_ALL_Rx,
+	9,	NV | MIE | NORESET,
 	23,	3,		/* Extended mode AUTO TX and EOM*/
 
 	255
@@ -260,7 +260,7 @@ static void z8530_flush_fifo(struct z8530_channel *c)
 	read_zsreg(c, R1);
 	read_zsreg(c, R1);
 	read_zsreg(c, R1);
-	if(c->dev->type==Z85230)
+	if (c->dev->type == Z85230)
 	{
 		read_zsreg(c, R1);
 		read_zsreg(c, R1);
@@ -315,40 +315,40 @@ static void z8530_rtsdtr(struct z8530_channel *c, int set)
 
 static void z8530_rx(struct z8530_channel *c)
 {
-	u8 ch,stat;
+	u8 ch, stat;
 
-	while(1)
+	while (1)
 	{
 		/* FIFO empty ? */
-		if(!(read_zsreg(c, R0)&1))
+		if (!(read_zsreg(c, R0) & 1))
 			break;
-		ch=read_zsdata(c);
-		stat=read_zsreg(c, R1);
+		ch = read_zsdata(c);
+		stat = read_zsreg(c, R1);
 
 		/*	Overrun ?
 		 */
-		if(c->count < c->max)
+		if (c->count < c->max)
 		{
-			*c->dptr++=ch;
+			*c->dptr++ = ch;
 			c->count++;
 		}
 
-		if(stat&END_FR)
+		if (stat & END_FR)
 		{
 			/*	Error ?
 			 */
-			if(stat&(Rx_OVR|CRC_ERR))
+			if (stat & (Rx_OVR | CRC_ERR))
 			{
 				/* Rewind the buffer and return */
-				if(c->skb)
-					c->dptr=c->skb->data;
-				c->count=0;
-				if(stat&Rx_OVR)
+				if (c->skb)
+					c->dptr = c->skb->data;
+				c->count = 0;
+				if (stat & Rx_OVR)
 				{
 					pr_warn("%s: overrun\n", c->dev->name);
 					c->rx_overrun++;
 				}
-				if(stat&CRC_ERR)
+				if (stat & CRC_ERR)
 				{
 					c->rx_crc_err++;
 					/* printk("crc error\n"); */
@@ -356,8 +356,8 @@ static void z8530_rx(struct z8530_channel *c)
 				/* Shove the frame upstream */
 			} else {
 				/*	Drop the lock for RX processing, or
-		 		 *	there are deadlocks
-		 		 */
+				 *	there are deadlocks
+				 */
 				z8530_rx_done(c);
 				write_zsctrl(c, RES_Rx_CRC);
 			}
@@ -381,9 +381,9 @@ static void z8530_rx(struct z8530_channel *c)
 
 static void z8530_tx(struct z8530_channel *c)
 {
-	while(c->txcount) {
+	while (c->txcount) {
 		/* FIFO full ? */
-		if(!(read_zsreg(c, R0)&4))
+		if (!(read_zsreg(c, R0) & 4))
 			return;
 		c->txcount--;
 		/*	Shovel out the byte
@@ -391,10 +391,10 @@ static void z8530_tx(struct z8530_channel *c)
 		write_zsreg(c, R8, *c->tx_ptr++);
 		write_zsctrl(c, RES_H_IUS);
 		/* We are about to underflow */
-		if(c->txcount==0)
+		if (c->txcount == 0)
 		{
 			write_zsctrl(c, RES_EOM_L);
-			write_zsreg(c, R10, c->regs[10]&~ABUNDER);
+			write_zsreg(c, R10, c->regs[10] & ~ABUNDER);
 		}
 	}
 
@@ -471,7 +471,7 @@ EXPORT_SYMBOL(z8530_sync);
 
 static void z8530_dma_rx(struct z8530_channel *chan)
 {
-	if(chan->rxdma_on)
+	if (chan->rxdma_on)
 	{
 		/* Special condition check only */
 		u8 status;
@@ -479,12 +479,11 @@ static void z8530_dma_rx(struct z8530_channel *chan)
 		read_zsreg(chan, R7);
 		read_zsreg(chan, R6);
 
-		status=read_zsreg(chan, R1);
+		status = read_zsreg(chan, R1);
 
-		if(status&END_FR)
-		{
+		if (status & END_FR)
 			z8530_rx_done(chan);	/* Fire up the next one */
-		}
+
 		write_zsctrl(chan, ERR_RES);
 		write_zsctrl(chan, RES_H_IUS);
 	} else {
@@ -502,7 +501,7 @@ static void z8530_dma_rx(struct z8530_channel *chan)
  */
 static void z8530_dma_tx(struct z8530_channel *chan)
 {
-	if(!chan->dma_tx)
+	if (!chan->dma_tx)
 	{
 		pr_warn("Hey who turned the DMA off?\n");
 		z8530_tx(chan);
@@ -526,21 +525,21 @@ static void z8530_dma_status(struct z8530_channel *chan)
 {
 	u8 status, altered;
 
-	status=read_zsreg(chan, R0);
-	altered=chan->status^status;
+	status = read_zsreg(chan, R0);
+	altered = chan->status ^ status;
 
-	chan->status=status;
+	chan->status = status;
 
-	if(chan->dma_tx)
+	if (chan->dma_tx)
 	{
-		if(status&TxEOM)
+		if (status & TxEOM)
 		{
 			unsigned long flags;
 
-			flags=claim_dma_lock();
+			flags = claim_dma_lock();
 			disable_dma(chan->txdma);
 			clear_dma_ff(chan->txdma);
-			chan->txdma_on=0;
+			chan->txdma_on = 0;
 			release_dma_lock(flags);
 			z8530_tx_done(chan);
 		}
@@ -594,9 +593,9 @@ static void z8530_rx_clear(struct z8530_channel *c)
 	u8 stat;
 
 	read_zsdata(c);
-	stat=read_zsreg(c, R1);
+	stat = read_zsreg(c, R1);
 
-	if(stat&END_FR)
+	if (stat & END_FR)
 		write_zsctrl(c, RES_Rx_CRC);
 	/*	Clear irq
 	 */
@@ -630,9 +629,9 @@ static void z8530_tx_clear(struct z8530_channel *c)
 
 static void z8530_status_clear(struct z8530_channel *chan)
 {
-	u8 status=read_zsreg(chan, R0);
+	u8 status = read_zsreg(chan, R0);
 
-	if(status&TxEOM)
+	if (status & TxEOM)
 		write_zsctrl(chan, ERR_RES);
 	write_zsctrl(chan, RES_EXT_INT);
 	write_zsctrl(chan, RES_H_IUS);
@@ -647,7 +646,7 @@ EXPORT_SYMBOL(z8530_nop);
 
 /**
  *	z8530_interrupt - Handle an interrupt from a Z8530
- *	@irq: 	Interrupt number
+ *	@irq: Interrupt number
  *	@dev_id: The Z8530 device that is interrupting.
  *
  *	A Z85[2]30 device has stuck its hand in the air for attention.
@@ -663,25 +662,26 @@ EXPORT_SYMBOL(z8530_nop);
 
 irqreturn_t z8530_interrupt(int irq, void *dev_id)
 {
-	struct z8530_dev *dev=dev_id;
+	struct z8530_dev *dev = dev_id;
 	u8 intr;
 	static volatile int locker=0;
-	int work=0;
+	int work = 0;
 	struct z8530_irqhandler *irqs;
 
-	if(locker)
+	if (locker)
 	{
 		pr_err("IRQ re-enter\n");
 		return IRQ_NONE;
 	}
-	locker=1;
+	locker = 1;
 
 	spin_lock(&dev->lock);
 
-	while(++work<5000)
+	while (++work < 5000)
 	{
 		intr = read_zsreg(&dev->chanA, R3);
-		if(!(intr & (CHARxIP|CHATxIP|CHAEXT|CHBRxIP|CHBTxIP|CHBEXT)))
+		if (!(intr &
+		   (CHARxIP | CHATxIP | CHAEXT | CHBRxIP | CHBTxIP | CHBEXT)))
 			break;
 
 		/* This holds the IRQ status. On the 8530 you must read it
@@ -692,46 +692,46 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 		 * an IRQ for someone else remember
 		 */
 
-		irqs=dev->chanA.irqs;
+		irqs = dev->chanA.irqs;
 
-		if(intr & (CHARxIP|CHATxIP|CHAEXT))
+		if (intr & (CHARxIP | CHATxIP | CHAEXT))
 		{
-			if(intr&CHARxIP)
+			if (intr & CHARxIP)
 				irqs->rx(&dev->chanA);
-			if(intr&CHATxIP)
+			if (intr & CHATxIP)
 				irqs->tx(&dev->chanA);
-			if(intr&CHAEXT)
+			if (intr & CHAEXT)
 				irqs->status(&dev->chanA);
 		}
 
-		irqs=dev->chanB.irqs;
+		irqs = dev->chanB.irqs;
 
-		if(intr & (CHBRxIP|CHBTxIP|CHBEXT))
+		if (intr & (CHBRxIP | CHBTxIP | CHBEXT))
 		{
-			if(intr&CHBRxIP)
+			if (intr & CHBRxIP)
 				irqs->rx(&dev->chanB);
-			if(intr&CHBTxIP)
+			if (intr & CHBTxIP)
 				irqs->tx(&dev->chanB);
-			if(intr&CHBEXT)
+			if (intr & CHBEXT)
 				irqs->status(&dev->chanB);
 		}
 	}
 	spin_unlock(&dev->lock);
-	if(work==5000)
+	if (work == 5000)
 		pr_err("%s: interrupt jammed - abort(0x%X)!\n",
 		       dev->name, intr);
 	/* Ok all done */
-	locker=0;
+	locker = 0;
 	return IRQ_HANDLED;
 }
 EXPORT_SYMBOL(z8530_interrupt);
 
-static const u8 reg_init[16]=
+static const u8 reg_init[16] =
 {
-	0,0,0,0,
-	0,0,0,0,
-	0,0,0,0,
-	0x55,0,0,0
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0x55, 0, 0, 0
 };
 
 /**
@@ -749,7 +749,7 @@ int z8530_sync_open(struct net_device *dev, struct z8530_channel *c)
 	spin_lock_irqsave(c->lock, flags);
 
 	c->sync = 1;
-	c->mtu = dev->mtu+64;
+	c->mtu = dev->mtu + 64;
 	c->count = 0;
 	c->skb = NULL;
 	c->skb2 = NULL;
@@ -758,11 +758,11 @@ int z8530_sync_open(struct net_device *dev, struct z8530_channel *c)
 	/* This loads the double buffer up */
 	z8530_rx_done(c);	/* Load the frame ring */
 	z8530_rx_done(c);	/* Load the backup frame */
-	z8530_rtsdtr(c,1);
+	z8530_rtsdtr(c, 1);
 	c->dma_tx = 0;
-	c->regs[R1]|=TxINT_ENAB;
+	c->regs[R1] |= TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
-	write_zsreg(c, R3, c->regs[R3]|RxENABLE);
+	write_zsreg(c, R3, c->regs[R3] | RxENABLE);
 
 	spin_unlock_irqrestore(c->lock, flags);
 	return 0;
@@ -787,9 +787,9 @@ int z8530_sync_close(struct net_device *dev, struct z8530_channel *c)
 	c->max = 0;
 	c->sync = 0;
 
-	chk=read_zsreg(c,R0);
+	chk = read_zsreg(c, R0);
 	write_zsreg(c, R3, c->regs[R3]);
-	z8530_rtsdtr(c,0);
+	z8530_rtsdtr(c, 0);
 
 	spin_unlock_irqrestore(c->lock, flags);
 	return 0;
@@ -810,7 +810,7 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	unsigned long cflags, dflags;
 
 	c->sync = 1;
-	c->mtu = dev->mtu+64;
+	c->mtu = dev->mtu + 64;
 	c->count = 0;
 	c->skb = NULL;
 	c->skb2 = NULL;
@@ -825,27 +825,27 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	 *	should be fine.
 	 */
 
-	if(c->mtu  > PAGE_SIZE/2)
+	if (c->mtu  > PAGE_SIZE / 2)
 		return -EMSGSIZE;
 
-	c->rx_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
+	c->rx_buf[0] = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!c->rx_buf[0])
 		return -ENOBUFS;
-	c->rx_buf[1]=c->rx_buf[0]+PAGE_SIZE/2;
+	c->rx_buf[1] = c->rx_buf[0] + PAGE_SIZE / 2;
 
-	c->tx_dma_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
+	c->tx_dma_buf[0] = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!c->tx_dma_buf[0])
 	{
 		free_page((unsigned long)c->rx_buf[0]);
-		c->rx_buf[0]=NULL;
+		c->rx_buf[0] = NULL;
 		return -ENOBUFS;
 	}
-	c->tx_dma_buf[1]=c->tx_dma_buf[0]+PAGE_SIZE/2;
+	c->tx_dma_buf[1] = c->tx_dma_buf[0] + PAGE_SIZE / 2;
 
-	c->tx_dma_used=0;
+	c->tx_dma_used = 0;
 	c->dma_tx = 1;
-	c->dma_num=0;
-	c->dma_ready=1;
+	c->dma_num = 0;
+	c->dma_ready = 1;
 
 	/*	Enable DMA control mode
 	 */
@@ -855,21 +855,21 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	/*	TX DMA via DIR/REQ
 	 */
 
-	c->regs[R14]|= DTRREQ;
+	c->regs[R14] |= DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);
 
-	c->regs[R1]&= ~TxINT_ENAB;
+	c->regs[R1] &= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
 
 	/*	RX DMA via W/Req
 	 */
 
-	c->regs[R1]|= WT_FN_RDYFN;
-	c->regs[R1]|= WT_RDY_RT;
-	c->regs[R1]|= INT_ERR_Rx;
-	c->regs[R1]&= ~TxINT_ENAB;
+	c->regs[R1] |= WT_FN_RDYFN;
+	c->regs[R1] |= WT_RDY_RT;
+	c->regs[R1] |= INT_ERR_Rx;
+	c->regs[R1] &= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
-	c->regs[R1]|= WT_RDY_ENAB;
+	c->regs[R1] |= WT_RDY_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
 
 	/*	DMA interrupts
@@ -878,11 +878,11 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	/*	Set up the DMA configuration
 	 */
 
-	dflags=claim_dma_lock();
+	dflags = claim_dma_lock();
 
 	disable_dma(c->rxdma);
 	clear_dma_ff(c->rxdma);
-	set_dma_mode(c->rxdma, DMA_MODE_READ|0x10);
+	set_dma_mode(c->rxdma, DMA_MODE_READ | 0x10);
 	set_dma_addr(c->rxdma, virt_to_bus(c->rx_buf[0]));
 	set_dma_count(c->rxdma, c->mtu);
 	enable_dma(c->rxdma);
@@ -902,8 +902,8 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	c->tx_dma_used = 1;
 
 	c->irqs = &z8530_dma_sync;
-	z8530_rtsdtr(c,1);
-	write_zsreg(c, R3, c->regs[R3]|RxENABLE);
+	z8530_rtsdtr(c, 1);
+	write_zsreg(c, R3, c->regs[R3] | RxENABLE);
 
 	spin_unlock_irqrestore(c->lock, cflags);
 
@@ -949,27 +949,27 @@ int z8530_sync_dma_close(struct net_device *dev, struct z8530_channel *c)
 	/*	Disable DMA control mode
 	 */
 
-	c->regs[R1]&= ~WT_RDY_ENAB;
+	c->regs[R1] &= ~WT_RDY_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
-	c->regs[R1]&= ~(WT_RDY_RT|WT_FN_RDYFN|INT_ERR_Rx);
-	c->regs[R1]|= INT_ALL_Rx;
+	c->regs[R1] &= ~(WT_RDY_RT | WT_FN_RDYFN | INT_ERR_Rx);
+	c->regs[R1] |= INT_ALL_Rx;
 	write_zsreg(c, R1, c->regs[R1]);
-	c->regs[R14]&= ~DTRREQ;
+	c->regs[R14] &= ~DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);
 
-	if(c->rx_buf[0])
+	if (c->rx_buf[0])
 	{
 		free_page((unsigned long)c->rx_buf[0]);
-		c->rx_buf[0]=NULL;
+		c->rx_buf[0] = NULL;
 	}
-	if(c->tx_dma_buf[0])
+	if (c->tx_dma_buf[0])
 	{
 		free_page((unsigned  long)c->tx_dma_buf[0]);
-		c->tx_dma_buf[0]=NULL;
+		c->tx_dma_buf[0] = NULL;
 	}
-	chk=read_zsreg(c,R0);
+	chk = read_zsreg(c, R0);
 	write_zsreg(c, R3, c->regs[R3]);
-	z8530_rtsdtr(c,0);
+	z8530_rtsdtr(c, 0);
 
 	spin_unlock_irqrestore(c->lock, flags);
 
@@ -993,7 +993,7 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 
 	printk("Opening sync interface for TX-DMA\n");
 	c->sync = 1;
-	c->mtu = dev->mtu+64;
+	c->mtu = dev->mtu + 64;
 	c->count = 0;
 	c->skb = NULL;
 	c->skb2 = NULL;
@@ -1003,14 +1003,14 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	 *	should be fine.
 	 */
 
-	if(c->mtu  > PAGE_SIZE/2)
+	if (c->mtu > PAGE_SIZE / 2)
 		return -EMSGSIZE;
 
-	c->tx_dma_buf[0]=(void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
+	c->tx_dma_buf[0] = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!c->tx_dma_buf[0])
 		return -ENOBUFS;
 
-	c->tx_dma_buf[1] = c->tx_dma_buf[0] + PAGE_SIZE/2;
+	c->tx_dma_buf[1] = c->tx_dma_buf[0] + PAGE_SIZE / 2;
 
 	spin_lock_irqsave(c->lock, cflags);
 
@@ -1026,9 +1026,9 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	c->rxdma_on = 0;
 	c->txdma_on = 0;
 
-	c->tx_dma_used=0;
-	c->dma_num=0;
-	c->dma_ready=1;
+	c->tx_dma_used = 0;
+	c->dma_num = 0;
+	c->dma_ready = 1;
 	c->dma_tx = 1;
 
 	/*	Enable DMA control mode
@@ -1036,10 +1036,10 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 
 	/*	TX DMA via DIR/REQ
 	 */
-	c->regs[R14]|= DTRREQ;
+	c->regs[R14] |= DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);
 
-	c->regs[R1]&= ~TxINT_ENAB;
+	c->regs[R1] &= ~TxINT_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
 
 	/*	Set up the DMA configuration
@@ -1062,8 +1062,8 @@ int z8530_sync_txdma_open(struct net_device *dev, struct z8530_channel *c)
 	c->tx_dma_used = 1;
 
 	c->irqs = &z8530_txdma_sync;
-	z8530_rtsdtr(c,1);
-	write_zsreg(c, R3, c->regs[R3]|RxENABLE);
+	z8530_rtsdtr(c, 1);
+	write_zsreg(c, R3, c->regs[R3] | RxENABLE);
 	spin_unlock_irqrestore(c->lock, cflags);
 
 	return 0;
@@ -1105,22 +1105,22 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 	/*	Disable DMA control mode
 	 */
 
-	c->regs[R1]&= ~WT_RDY_ENAB;
+	c->regs[R1] &= ~WT_RDY_ENAB;
 	write_zsreg(c, R1, c->regs[R1]);
-	c->regs[R1]&= ~(WT_RDY_RT|WT_FN_RDYFN|INT_ERR_Rx);
-	c->regs[R1]|= INT_ALL_Rx;
+	c->regs[R1] &= ~(WT_RDY_RT | WT_FN_RDYFN | INT_ERR_Rx);
+	c->regs[R1] |= INT_ALL_Rx;
 	write_zsreg(c, R1, c->regs[R1]);
-	c->regs[R14]&= ~DTRREQ;
+	c->regs[R14] &= ~DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);
 
-	if(c->tx_dma_buf[0])
+	if (c->tx_dma_buf[0])
 	{
 		free_page((unsigned long)c->tx_dma_buf[0]);
-		c->tx_dma_buf[0]=NULL;
+		c->tx_dma_buf[0] = NULL;
 	}
-	chk=read_zsreg(c,R0);
+	chk = read_zsreg(c, R0);
 	write_zsreg(c, R3, c->regs[R3]);
-	z8530_rtsdtr(c,0);
+	z8530_rtsdtr(c, 0);
 
 	spin_unlock_irqrestore(c->lock, cflags);
 	return 0;
@@ -1130,7 +1130,7 @@ EXPORT_SYMBOL(z8530_sync_txdma_close);
 /*	Name strings for Z8530 chips. SGI claim to have a 130, Zilog deny
  *	it exists...
  */
-static const char *z8530_type_name[]={
+static const char * const z8530_type_name[] = {
 	"Z8530",
 	"Z85C30",
 	"Z85230"
@@ -1165,23 +1165,23 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 	/* NOP the interrupt handlers first - we might get a
 	 * floating IRQ transition when we reset the chip
 	 */
-	dev->chanA.irqs=&z8530_nop;
-	dev->chanB.irqs=&z8530_nop;
-	dev->chanA.dcdcheck=DCD;
-	dev->chanB.dcdcheck=DCD;
+	dev->chanA.irqs = &z8530_nop;
+	dev->chanB.irqs = &z8530_nop;
+	dev->chanA.dcdcheck = DCD;
+	dev->chanB.dcdcheck = DCD;
 
 	/* Reset the chip */
 	write_zsreg(&dev->chanA, R9, 0xC0);
 	udelay(200);
 	/* Now check its valid */
 	write_zsreg(&dev->chanA, R12, 0xAA);
-	if(read_zsreg(&dev->chanA, R12)!=0xAA)
+	if (read_zsreg(&dev->chanA, R12) != 0xAA)
 		return -ENODEV;
 	write_zsreg(&dev->chanA, R12, 0x55);
-	if(read_zsreg(&dev->chanA, R12)!=0x55)
+	if (read_zsreg(&dev->chanA, R12) != 0x55)
 		return -ENODEV;
 
-	dev->type=Z8530;
+	dev->type = Z8530;
 
 	/*	See the application note.
 	 */
@@ -1192,12 +1192,12 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 	 *	the chip is enhanced.
 	 */
 
-	if(read_zsreg(&dev->chanA, R15)==0x01)
+	if (read_zsreg(&dev->chanA, R15) == 0x01)
 	{
 		/* This C30 versus 230 detect is from Klaus Kudielka's dmascc */
 		/* Put a char in the fifo */
 		write_zsreg(&dev->chanA, R8, 0);
-		if(read_zsreg(&dev->chanA, R0)&Tx_BUF_EMP)
+		if (read_zsreg(&dev->chanA, R0) & Tx_BUF_EMP)
 			dev->type = Z85230;	/* Has a FIFO */
 		else
 			dev->type = Z85C30;	/* Z85C30, 1 byte FIFO */
@@ -1214,7 +1214,7 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 	 */
 
 	memcpy(dev->chanA.regs, reg_init, 16);
-	memcpy(dev->chanB.regs, reg_init ,16);
+	memcpy(dev->chanB.regs, reg_init, 16);
 
 	return 0;
 }
@@ -1270,8 +1270,8 @@ int z8530_shutdown(struct z8530_dev *dev)
 	/* Reset the chip */
 
 	spin_lock_irqsave(&dev->lock, flags);
-	dev->chanA.irqs=&z8530_nop;
-	dev->chanB.irqs=&z8530_nop;
+	dev->chanA.irqs = &z8530_nop;
+	dev->chanB.irqs = &z8530_nop;
 	write_zsreg(&dev->chanA, R9, 0xC0);
 	/* We must lock the udelay, the chip is offlimits here */
 	udelay(100);
@@ -1297,27 +1297,27 @@ int z8530_channel_load(struct z8530_channel *c, u8 *rtable)
 
 	spin_lock_irqsave(c->lock, flags);
 
-	while(*rtable!=255)
+	while (*rtable != 255)
 	{
-		int reg=*rtable++;
+		int reg = *rtable++;
 
-		if(reg>0x0F)
-			write_zsreg(c, R15, c->regs[15]|1);
-		write_zsreg(c, reg&0x0F, *rtable);
-		if(reg>0x0F)
-			write_zsreg(c, R15, c->regs[15]&~1);
-		c->regs[reg]=*rtable++;
+		if (reg > 0x0F)
+			write_zsreg(c, R15, c->regs[15] | 1);
+		write_zsreg(c, reg & 0x0F, *rtable);
+		if (reg > 0x0F)
+			write_zsreg(c, R15, c->regs[15] & ~1);
+		c->regs[reg] = *rtable++;
 	}
-	c->rx_function=z8530_null_rx;
-	c->skb=NULL;
-	c->tx_skb=NULL;
-	c->tx_next_skb=NULL;
-	c->mtu=1500;
-	c->max=0;
-	c->count=0;
-	c->status=read_zsreg(c, R0);
-	c->sync=1;
-	write_zsreg(c, R3, c->regs[R3]|RxENABLE);
+	c->rx_function = z8530_null_rx;
+	c->skb = NULL;
+	c->tx_skb = NULL;
+	c->tx_next_skb = NULL;
+	c->mtu = 1500;
+	c->max = 0;
+	c->count = 0;
+	c->status = read_zsreg(c, R0);
+	c->sync = 1;
+	write_zsreg(c, R3, c->regs[R3] | RxENABLE);
 
 	spin_unlock_irqrestore(c->lock, flags);
 	return 0;
@@ -1342,19 +1342,19 @@ static void z8530_tx_begin(struct z8530_channel *c)
 {
 	unsigned long flags;
 
-	if(c->tx_skb)
+	if (c->tx_skb)
 		return;
 
-	c->tx_skb=c->tx_next_skb;
-	c->tx_next_skb=NULL;
-	c->tx_ptr=c->tx_next_ptr;
+	c->tx_skb = c->tx_next_skb;
+	c->tx_next_skb = NULL;
+	c->tx_ptr = c->tx_next_ptr;
 
 	if (!c->tx_skb)
 	{
 		/* Idle on */
-		if(c->dma_tx)
+		if (c->dma_tx)
 		{
-			flags=claim_dma_lock();
+			flags = claim_dma_lock();
 			disable_dma(c->txdma);
 			/*	Check if we crapped out.
 			 */
@@ -1365,11 +1365,11 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			}
 			release_dma_lock(flags);
 		}
-		c->txcount=0;
+		c->txcount = 0;
 	} else {
-		c->txcount=c->tx_skb->len;
+		c->txcount = c->tx_skb->len;
 
-		if(c->dma_tx)
+		if (c->dma_tx)
 		{
 			/*	FIXME. DMA is broken for the original 8530,
 			 *	on the older parts we need to set a flag and
@@ -1377,25 +1377,25 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			 *	stage off
 			 */
 
-			flags=claim_dma_lock();
+			flags = claim_dma_lock();
 			disable_dma(c->txdma);
 
 			/*	These two are needed by the 8530/85C30
 			 *	and must be issued when idling.
 			 */
-			if(c->dev->type!=Z85230)
+			if (c->dev->type != Z85230)
 			{
 				write_zsctrl(c, RES_Tx_CRC);
 				write_zsctrl(c, RES_EOM_L);
 			}
-			write_zsreg(c, R10, c->regs[10]&~ABUNDER);
+			write_zsreg(c, R10, c->regs[10] & ~ABUNDER);
 			clear_dma_ff(c->txdma);
 			set_dma_addr(c->txdma, virt_to_bus(c->tx_ptr));
 			set_dma_count(c->txdma, c->txcount);
 			enable_dma(c->txdma);
 			release_dma_lock(flags);
 			write_zsctrl(c, RES_EOM_L);
-			write_zsreg(c, R5, c->regs[R5]|TxENAB);
+			write_zsreg(c, R5, c->regs[R5] | TxENAB);
 		} else {
 			/* ABUNDER off */
 			write_zsreg(c, R10, c->regs[10]);
@@ -1472,35 +1472,35 @@ static void z8530_rx_done(struct z8530_channel *c)
 
 	/*	Is our receive engine in DMA mode
 	 */
-	if(c->rxdma_on)
+	if (c->rxdma_on)
 	{
 		/*	Save the ready state and the buffer currently
 		 *	being used as the DMA target
 		 */
-		int ready=c->dma_ready;
-		unsigned char *rxb=c->rx_buf[c->dma_num];
+		int ready = c->dma_ready;
+		unsigned char *rxb = c->rx_buf[c->dma_num];
 		unsigned long flags;
 
 		/*	Complete this DMA. Necessary to find the length
 		 */
-		flags=claim_dma_lock();
+		flags = claim_dma_lock();
 
 		disable_dma(c->rxdma);
 		clear_dma_ff(c->rxdma);
-		c->rxdma_on=0;
-		ct=c->mtu-get_dma_residue(c->rxdma);
-		if(ct<0)
-			ct=2;	/* Shit happens.. */
-		c->dma_ready=0;
+		c->rxdma_on = 0;
+		ct = c->mtu - get_dma_residue(c->rxdma);
+		if (ct < 0)
+			ct = 2;	/* Shit happens.. */
+		c->dma_ready = 0;
 
 		/*	Normal case: the other slot is free, start the next DMA
 		 *	into it immediately.
 		 */
 
-		if(ready)
+		if (ready)
 		{
-			c->dma_num^=1;
-			set_dma_mode(c->rxdma, DMA_MODE_READ|0x10);
+			c->dma_num ^= 1;
+			set_dma_mode(c->rxdma, DMA_MODE_READ | 0x10);
 			set_dma_addr(c->rxdma, virt_to_bus(c->rx_buf[c->dma_num]));
 			set_dma_count(c->rxdma, c->mtu);
 			c->rxdma_on = 1;
@@ -1551,7 +1551,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		 *	sync IRQ for the RT_LOCK area.
 		 *
 		 */
-		ct=c->count;
+		ct = c->count;
 
 		c->skb = c->skb2;
 		c->count = 0;
@@ -1594,10 +1594,10 @@ static void z8530_rx_done(struct z8530_channel *c)
 
 static inline int spans_boundary(struct sk_buff *skb)
 {
-	unsigned long a=(unsigned long)skb->data;
+	unsigned long a = (unsigned long)skb->data;
 
-	a^=(a+skb->len);
-	if(a&0x00010000)	/* If the 64K bit is different.. */
+	a ^= (a + skb->len);
+	if (a & 0x00010000)	/* If the 64K bit is different.. */
 		return 1;
 	return 0;
 }
@@ -1620,7 +1620,7 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 	unsigned long flags;
 
 	netif_stop_queue(c->netdevice);
-	if(c->tx_next_skb)
+	if (c->tx_next_skb)
 		return NETDEV_TX_BUSY;
 
 	/* PC SPECIFIC - DMA limits */
@@ -1628,7 +1628,9 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 	 *	limit, then copy to the flip buffer
 	 */
 
-	if(c->dma_tx && ((unsigned long)(virt_to_bus(skb->data+skb->len))>=16*1024*1024 || spans_boundary(skb)))
+	if (c->dma_tx &&
+	    ((unsigned long)(virt_to_bus(skb->data + skb->len)) >=
+	    16 * 1024 * 1024 || spans_boundary(skb)))
 	{
 		/*	Send the flip buffer, and flip the flippy bit.
 		 *	We don't care which is used when just so long as
@@ -1636,14 +1638,14 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 		 *	only one buffer can be going out at a time the other
 		 *	has to be safe.
 		 */
-		c->tx_next_ptr=c->tx_dma_buf[c->tx_dma_used];
-		c->tx_dma_used^=1;	/* Flip temp buffer */
+		c->tx_next_ptr = c->tx_dma_buf[c->tx_dma_used];
+		c->tx_dma_used ^= 1;	/* Flip temp buffer */
 		skb_copy_from_linear_data(skb, c->tx_next_ptr, skb->len);
 	} else {
 		c->tx_next_ptr = skb->data;
 	}
 	RT_LOCK;
-	c->tx_next_skb=skb;
+	c->tx_next_skb = skb;
 	RT_UNLOCK;
 
 	spin_lock_irqsave(c->lock, flags);

From 00a580db9e2a0968e212a89a9db0b89dc4a97280 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:44 +0800
Subject: [PATCH 1827/3292] net: z85230: fix the code style issue about open
 brace {

This patch fixes the code style issue according to checkpatch.pl error:
"ERROR: that open brace { should be on the previous line".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 108 +++++++++++++--------------------------
 1 file changed, 36 insertions(+), 72 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index cab963f5efca..621f73060a03 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -184,8 +184,7 @@ static inline void write_zsdata(struct z8530_channel *c, u8 val)
 /*	Register loading parameters for a dead port
  */
 
-u8 z8530_dead_port[] =
-{
+u8 z8530_dead_port[] = {
 	255
 };
 EXPORT_SYMBOL(z8530_dead_port);
@@ -197,8 +196,7 @@ EXPORT_SYMBOL(z8530_dead_port);
  *	"kilostream" service, and most other similar services.
  */
 
-u8 z8530_hdlc_kilostream[] =
-{
+u8 z8530_hdlc_kilostream[] = {
 	4,	SYNC_ENAB | SDLC | X1CLK,
 	2,	0,	/* No vector */
 	1,	0,
@@ -220,8 +218,7 @@ EXPORT_SYMBOL(z8530_hdlc_kilostream);
 /*	As above but for enhanced chips.
  */
 
-u8 z8530_hdlc_kilostream_85230[] =
-{
+u8 z8530_hdlc_kilostream_85230[] = {
 	4,	SYNC_ENAB | SDLC | X1CLK,
 	2,	0,	/* No vector */
 	1,	0,
@@ -260,8 +257,7 @@ static void z8530_flush_fifo(struct z8530_channel *c)
 	read_zsreg(c, R1);
 	read_zsreg(c, R1);
 	read_zsreg(c, R1);
-	if (c->dev->type == Z85230)
-	{
+	if (c->dev->type == Z85230) {
 		read_zsreg(c, R1);
 		read_zsreg(c, R1);
 		read_zsreg(c, R1);
@@ -317,8 +313,7 @@ static void z8530_rx(struct z8530_channel *c)
 {
 	u8 ch, stat;
 
-	while (1)
-	{
+	while (1) {
 		/* FIFO empty ? */
 		if (!(read_zsreg(c, R0) & 1))
 			break;
@@ -327,29 +322,24 @@ static void z8530_rx(struct z8530_channel *c)
 
 		/*	Overrun ?
 		 */
-		if (c->count < c->max)
-		{
+		if (c->count < c->max) {
 			*c->dptr++ = ch;
 			c->count++;
 		}
 
-		if (stat & END_FR)
-		{
+		if (stat & END_FR) {
 			/*	Error ?
 			 */
-			if (stat & (Rx_OVR | CRC_ERR))
-			{
+			if (stat & (Rx_OVR | CRC_ERR)) {
 				/* Rewind the buffer and return */
 				if (c->skb)
 					c->dptr = c->skb->data;
 				c->count = 0;
-				if (stat & Rx_OVR)
-				{
+				if (stat & Rx_OVR) {
 					pr_warn("%s: overrun\n", c->dev->name);
 					c->rx_overrun++;
 				}
-				if (stat & CRC_ERR)
-				{
+				if (stat & CRC_ERR) {
 					c->rx_crc_err++;
 					/* printk("crc error\n"); */
 				}
@@ -391,8 +381,7 @@ static void z8530_tx(struct z8530_channel *c)
 		write_zsreg(c, R8, *c->tx_ptr++);
 		write_zsctrl(c, RES_H_IUS);
 		/* We are about to underflow */
-		if (c->txcount == 0)
-		{
+		if (c->txcount == 0) {
 			write_zsctrl(c, RES_EOM_L);
 			write_zsreg(c, R10, c->regs[10] & ~ABUNDER);
 		}
@@ -433,8 +422,7 @@ static void z8530_status(struct z8530_channel *chan)
 		z8530_tx_done(chan);
 	}
 
-	if (altered & chan->dcdcheck)
-	{
+	if (altered & chan->dcdcheck) {
 		if (status & chan->dcdcheck) {
 			pr_info("%s: DCD raised\n", chan->dev->name);
 			write_zsreg(chan, R3, chan->regs[3] | RxENABLE);
@@ -471,8 +459,7 @@ EXPORT_SYMBOL(z8530_sync);
 
 static void z8530_dma_rx(struct z8530_channel *chan)
 {
-	if (chan->rxdma_on)
-	{
+	if (chan->rxdma_on) {
 		/* Special condition check only */
 		u8 status;
 
@@ -501,8 +488,7 @@ static void z8530_dma_rx(struct z8530_channel *chan)
  */
 static void z8530_dma_tx(struct z8530_channel *chan)
 {
-	if (!chan->dma_tx)
-	{
+	if (!chan->dma_tx) {
 		pr_warn("Hey who turned the DMA off?\n");
 		z8530_tx(chan);
 		return;
@@ -530,10 +516,8 @@ static void z8530_dma_status(struct z8530_channel *chan)
 
 	chan->status = status;
 
-	if (chan->dma_tx)
-	{
-		if (status & TxEOM)
-		{
+	if (chan->dma_tx) {
+		if (status & TxEOM) {
 			unsigned long flags;
 
 			flags = claim_dma_lock();
@@ -545,8 +529,7 @@ static void z8530_dma_status(struct z8530_channel *chan)
 		}
 	}
 
-	if (altered & chan->dcdcheck)
-	{
+	if (altered & chan->dcdcheck) {
 		if (status & chan->dcdcheck) {
 			pr_info("%s: DCD raised\n", chan->dev->name);
 			write_zsreg(chan, R3, chan->regs[3] | RxENABLE);
@@ -668,8 +651,7 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 	int work = 0;
 	struct z8530_irqhandler *irqs;
 
-	if (locker)
-	{
+	if (locker) {
 		pr_err("IRQ re-enter\n");
 		return IRQ_NONE;
 	}
@@ -677,8 +659,7 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 
 	spin_lock(&dev->lock);
 
-	while (++work < 5000)
-	{
+	while (++work < 5000) {
 		intr = read_zsreg(&dev->chanA, R3);
 		if (!(intr &
 		   (CHARxIP | CHATxIP | CHAEXT | CHBRxIP | CHBTxIP | CHBEXT)))
@@ -694,8 +675,7 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 
 		irqs = dev->chanA.irqs;
 
-		if (intr & (CHARxIP | CHATxIP | CHAEXT))
-		{
+		if (intr & (CHARxIP | CHATxIP | CHAEXT)) {
 			if (intr & CHARxIP)
 				irqs->rx(&dev->chanA);
 			if (intr & CHATxIP)
@@ -706,8 +686,7 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 
 		irqs = dev->chanB.irqs;
 
-		if (intr & (CHBRxIP | CHBTxIP | CHBEXT))
-		{
+		if (intr & (CHBRxIP | CHBTxIP | CHBEXT)) {
 			if (intr & CHBRxIP)
 				irqs->rx(&dev->chanB);
 			if (intr & CHBTxIP)
@@ -726,8 +705,7 @@ irqreturn_t z8530_interrupt(int irq, void *dev_id)
 }
 EXPORT_SYMBOL(z8530_interrupt);
 
-static const u8 reg_init[16] =
-{
+static const u8 reg_init[16] = {
 	0, 0, 0, 0,
 	0, 0, 0, 0,
 	0, 0, 0, 0,
@@ -834,8 +812,7 @@ int z8530_sync_dma_open(struct net_device *dev, struct z8530_channel *c)
 	c->rx_buf[1] = c->rx_buf[0] + PAGE_SIZE / 2;
 
 	c->tx_dma_buf[0] = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
-	if (!c->tx_dma_buf[0])
-	{
+	if (!c->tx_dma_buf[0]) {
 		free_page((unsigned long)c->rx_buf[0]);
 		c->rx_buf[0] = NULL;
 		return -ENOBUFS;
@@ -957,13 +934,11 @@ int z8530_sync_dma_close(struct net_device *dev, struct z8530_channel *c)
 	c->regs[R14] &= ~DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);
 
-	if (c->rx_buf[0])
-	{
+	if (c->rx_buf[0]) {
 		free_page((unsigned long)c->rx_buf[0]);
 		c->rx_buf[0] = NULL;
 	}
-	if (c->tx_dma_buf[0])
-	{
+	if (c->tx_dma_buf[0]) {
 		free_page((unsigned  long)c->tx_dma_buf[0]);
 		c->tx_dma_buf[0] = NULL;
 	}
@@ -1113,8 +1088,7 @@ int z8530_sync_txdma_close(struct net_device *dev, struct z8530_channel *c)
 	c->regs[R14] &= ~DTRREQ;
 	write_zsreg(c, R14, c->regs[R14]);
 
-	if (c->tx_dma_buf[0])
-	{
+	if (c->tx_dma_buf[0]) {
 		free_page((unsigned long)c->tx_dma_buf[0]);
 		c->tx_dma_buf[0] = NULL;
 	}
@@ -1192,8 +1166,7 @@ static inline int do_z8530_init(struct z8530_dev *dev)
 	 *	the chip is enhanced.
 	 */
 
-	if (read_zsreg(&dev->chanA, R15) == 0x01)
-	{
+	if (read_zsreg(&dev->chanA, R15) == 0x01) {
 		/* This C30 versus 230 detect is from Klaus Kudielka's dmascc */
 		/* Put a char in the fifo */
 		write_zsreg(&dev->chanA, R8, 0);
@@ -1297,8 +1270,7 @@ int z8530_channel_load(struct z8530_channel *c, u8 *rtable)
 
 	spin_lock_irqsave(c->lock, flags);
 
-	while (*rtable != 255)
-	{
+	while (*rtable != 255) {
 		int reg = *rtable++;
 
 		if (reg > 0x0F)
@@ -1349,17 +1321,14 @@ static void z8530_tx_begin(struct z8530_channel *c)
 	c->tx_next_skb = NULL;
 	c->tx_ptr = c->tx_next_ptr;
 
-	if (!c->tx_skb)
-	{
+	if (!c->tx_skb) {
 		/* Idle on */
-		if (c->dma_tx)
-		{
+		if (c->dma_tx) {
 			flags = claim_dma_lock();
 			disable_dma(c->txdma);
 			/*	Check if we crapped out.
 			 */
-			if (get_dma_residue(c->txdma))
-			{
+			if (get_dma_residue(c->txdma)) {
 				c->netdevice->stats.tx_dropped++;
 				c->netdevice->stats.tx_fifo_errors++;
 			}
@@ -1369,8 +1338,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 	} else {
 		c->txcount = c->tx_skb->len;
 
-		if (c->dma_tx)
-		{
+		if (c->dma_tx) {
 			/*	FIXME. DMA is broken for the original 8530,
 			 *	on the older parts we need to set a flag and
 			 *	wait for a further TX interrupt to fire this
@@ -1383,8 +1351,7 @@ static void z8530_tx_begin(struct z8530_channel *c)
 			/*	These two are needed by the 8530/85C30
 			 *	and must be issued when idling.
 			 */
-			if (c->dev->type != Z85230)
-			{
+			if (c->dev->type != Z85230) {
 				write_zsctrl(c, RES_Tx_CRC);
 				write_zsctrl(c, RES_EOM_L);
 			}
@@ -1472,8 +1439,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 
 	/*	Is our receive engine in DMA mode
 	 */
-	if (c->rxdma_on)
-	{
+	if (c->rxdma_on) {
 		/*	Save the ready state and the buffer currently
 		 *	being used as the DMA target
 		 */
@@ -1497,8 +1463,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		 *	into it immediately.
 		 */
 
-		if (ready)
-		{
+		if (ready) {
 			c->dma_num ^= 1;
 			set_dma_mode(c->rxdma, DMA_MODE_READ | 0x10);
 			set_dma_addr(c->rxdma, virt_to_bus(c->rx_buf[c->dma_num]));
@@ -1630,8 +1595,7 @@ netdev_tx_t z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 
 	if (c->dma_tx &&
 	    ((unsigned long)(virt_to_bus(skb->data + skb->len)) >=
-	    16 * 1024 * 1024 || spans_boundary(skb)))
-	{
+	    16 * 1024 * 1024 || spans_boundary(skb))) {
 		/*	Send the flip buffer, and flip the flippy bit.
 		 *	We don't care which is used when just so long as
 		 *	we never use the same buffer twice in a row. Since

From 2b28b711ac5d58ed828e137ac53013f5008b0d47 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 10:43:45 +0800
Subject: [PATCH 1828/3292] net: z85230: remove unnecessary out of memory
 message

This patch removes unnecessary out of memory message,
to fix the following checkpatch.pl warning:
"WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 621f73060a03..982a03488a00 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1531,10 +1531,9 @@ static void z8530_rx_done(struct z8530_channel *c)
 		RT_UNLOCK;
 
 		c->skb2 = dev_alloc_skb(c->mtu);
-		if (!c->skb2)
-			netdev_warn(c->netdevice, "memory squeeze\n");
-		else
+		if (c->skb2)
 			skb_put(c->skb2, c->mtu);
+
 		c->netdevice->stats.rx_packets++;
 		c->netdevice->stats.rx_bytes += ct;
 	}

From bbcb2840b00710d53d14947917e850c29f01812c Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 21:54:18 +0800
Subject: [PATCH 1829/3292] net: pci200syn: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pci200syn.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index ba5cc0c53833..1667dfdb41e9 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -58,8 +58,6 @@ typedef struct {
 	u32 init_ctrl;		/* 50h : EEPROM ctrl, Init Ctrl, etc */
 }plx9052;
 
-
-
 typedef struct port_s {
 	struct napi_struct napi;
 	struct net_device *netdev;
@@ -76,8 +74,6 @@ typedef struct port_s {
 	u8 chan;		/* physical port # - 0 or 1 */
 }port_t;
 
-
-
 typedef struct card_s {
 	u8 __iomem *rambase;	/* buffer memory base (virtual) */
 	u8 __iomem *scabase;	/* SCA memory base (virtual) */
@@ -90,7 +86,6 @@ typedef struct card_s {
 	port_t ports[2];
 }card_t;
 
-
 #define get_port(card, port)	     (&card->ports[port])
 #define sca_flush(card)		     (sca_in(IER0, card))
 
@@ -112,7 +107,6 @@ static inline void new_memcpy_toio(char __iomem *dest, char *src, int length)
 
 #include "hd64572.c"
 
-
 static void pci200_set_iface(port_t *port)
 {
 	card_t *card = port->card;
@@ -151,8 +145,6 @@ static void pci200_set_iface(port_t *port)
 	sca_set_port(port);
 }
 
-
-
 static int pci200_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
@@ -167,8 +159,6 @@ static int pci200_open(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int pci200_close(struct net_device *dev)
 {
 	sca_close(dev);
@@ -177,8 +167,6 @@ static int pci200_close(struct net_device *dev)
 	return 0;
 }
 
-
-
 static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	const size_t size = sizeof(sync_serial_settings);
@@ -233,8 +221,6 @@ static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	}
 }
 
-
-
 static void pci200_pci_remove_one(struct pci_dev *pdev)
 {
 	int i;
@@ -407,15 +393,12 @@ static int pci200_pci_init_one(struct pci_dev *pdev,
 	return 0;
 }
 
-
-
 static const struct pci_device_id pci200_pci_tbl[] = {
 	{ PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_PLX,
 	  PCI_DEVICE_ID_PLX_PCI200SYN, 0, 0, 0 },
 	{ 0, }
 };
 
-
 static struct pci_driver pci200_pci_driver = {
 	.name		= "PCI200SYN",
 	.id_table	= pci200_pci_tbl,
@@ -423,7 +406,6 @@ static struct pci_driver pci200_pci_driver = {
 	.remove		= pci200_pci_remove_one,
 };
 
-
 static int __init pci200_init_module(void)
 {
 	if (pci_clock_freq < 1000000 || pci_clock_freq > 80000000) {
@@ -433,8 +415,6 @@ static int __init pci200_init_module(void)
 	return pci_register_driver(&pci200_pci_driver);
 }
 
-
-
 static void __exit pci200_cleanup_module(void)
 {
 	pci_unregister_driver(&pci200_pci_driver);

From f9a03eae28507c07709c49ac283194be760e9511 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 21:54:19 +0800
Subject: [PATCH 1830/3292] net: pci200syn: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pci200syn.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index 1667dfdb41e9..a7eac9099de9 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -92,6 +92,7 @@ typedef struct card_s {
 static inline void new_memcpy_toio(char __iomem *dest, char *src, int length)
 {
 	int len;
+
 	do {
 		len = length > 256 ? 256 : length;
 		memcpy_toio(dest, src, len);
@@ -148,8 +149,8 @@ static void pci200_set_iface(port_t *port)
 static int pci200_open(struct net_device *dev)
 {
 	port_t *port = dev_to_port(dev);
-
 	int result = hdlc_open(dev);
+
 	if (result)
 		return result;
 
@@ -366,6 +367,7 @@ static int pci200_pci_init_one(struct pci_dev *pdev,
 		port_t *port = &card->ports[i];
 		struct net_device *dev = port->netdev;
 		hdlc_device *hdlc = dev_to_hdlc(dev);
+
 		port->chan = i;
 
 		spin_lock_init(&port->lock);

From b9282333efff96c19bc58a36f4929471de02b4f3 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 21:54:20 +0800
Subject: [PATCH 1831/3292] net: pci200syn: replace comparison to NULL with
 "!card"

According to the chackpatch.pl, comparison to NULL could
be written "!card".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pci200syn.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index a7eac9099de9..cee3d658a3e8 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -279,7 +279,7 @@ static int pci200_pci_init_one(struct pci_dev *pdev,
 	}
 
 	card = kzalloc(sizeof(card_t), GFP_KERNEL);
-	if (card == NULL) {
+	if (!card) {
 		pci_release_regions(pdev);
 		pci_disable_device(pdev);
 		return -ENOBUFS;
@@ -310,9 +310,7 @@ static int pci200_pci_init_one(struct pci_dev *pdev,
 	ramphys = pci_resource_start(pdev,3) & PCI_BASE_ADDRESS_MEM_MASK;
 	card->rambase = pci_ioremap_bar(pdev, 3);
 
-	if (card->plxbase == NULL ||
-	    card->scabase == NULL ||
-	    card->rambase == NULL) {
+	if (!card->plxbase || !card->scabase || !card->rambase) {
 		pr_err("ioremap() failed\n");
 		pci200_pci_remove_one(pdev);
 		return -EFAULT;

From 2b637446685f35b51b090fc3e5bc21e4fe8a21c1 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 21:54:21 +0800
Subject: [PATCH 1832/3292] net: pci200syn: add some required spaces

Add spaces required after that close brace '}'.
Add spaces required before the open parenthesis '('.
Add spaces required after that ','.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pci200syn.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index cee3d658a3e8..ac4a59947247 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -56,7 +56,7 @@ typedef struct {
 	u32 cs_base[4];		/* 3C-48h : Chip Select Base Addrs */
 	u32 intr_ctrl_stat;	/* 4Ch : Interrupt Control/Status */
 	u32 init_ctrl;		/* 50h : EEPROM ctrl, Init Ctrl, etc */
-}plx9052;
+} plx9052;
 
 typedef struct port_s {
 	struct napi_struct napi;
@@ -72,7 +72,7 @@ typedef struct port_s {
 	u16 txlast;
 	u8 rxs, txs, tmc;	/* SCA registers */
 	u8 chan;		/* physical port # - 0 or 1 */
-}port_t;
+} port_t;
 
 typedef struct card_s {
 	u8 __iomem *rambase;	/* buffer memory base (virtual) */
@@ -84,7 +84,7 @@ typedef struct card_s {
 	u8 irq;			/* interrupt request level */
 
 	port_t ports[2];
-}card_t;
+} card_t;
 
 #define get_port(card, port)	     (&card->ports[port])
 #define sca_flush(card)		     (sca_in(IER0, card))
@@ -117,7 +117,7 @@ static void pci200_set_iface(port_t *port)
 
 	sca_out(EXS_TES1, (port->chan ? MSCI1_OFFSET : MSCI0_OFFSET) + EXS,
 		port->card);
-	switch(port->settings.clock_type) {
+	switch (port->settings.clock_type) {
 	case CLOCK_INT:
 		rxs |= CLK_BRG; /* BRG output */
 		txs |= CLK_PIN_OUT | CLK_TX_RXCLK; /* RX clock */
@@ -184,7 +184,7 @@ static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (cmd != SIOCWANDEV)
 		return hdlc_ioctl(dev, ifr, cmd);
 
-	switch(ifr->ifr_settings.type) {
+	switch (ifr->ifr_settings.type) {
 	case IF_GET_IFACE:
 		ifr->ifr_settings.type = IF_IFACE_V35;
 		if (ifr->ifr_settings.size < size) {
@@ -301,13 +301,13 @@ static int pci200_pci_init_one(struct pci_dev *pdev,
 		return -EFAULT;
 	}
 
-	plxphys = pci_resource_start(pdev,0) & PCI_BASE_ADDRESS_MEM_MASK;
+	plxphys = pci_resource_start(pdev, 0) & PCI_BASE_ADDRESS_MEM_MASK;
 	card->plxbase = ioremap(plxphys, PCI200SYN_PLX_SIZE);
 
-	scaphys = pci_resource_start(pdev,2) & PCI_BASE_ADDRESS_MEM_MASK;
+	scaphys = pci_resource_start(pdev, 2) & PCI_BASE_ADDRESS_MEM_MASK;
 	card->scabase = ioremap(scaphys, PCI200SYN_SCA_SIZE);
 
-	ramphys = pci_resource_start(pdev,3) & PCI_BASE_ADDRESS_MEM_MASK;
+	ramphys = pci_resource_start(pdev, 3) & PCI_BASE_ADDRESS_MEM_MASK;
 	card->rambase = pci_ioremap_bar(pdev, 3);
 
 	if (!card->plxbase || !card->scabase || !card->rambase) {

From 8e7680c10284e75e0b4122e05a6969a53f95c1cb Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 21:54:22 +0800
Subject: [PATCH 1833/3292] net: pci200syn: add necessary () to macro argument

Macro argument 'card' may be better as '(card)' to
avoid precedence issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pci200syn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index ac4a59947247..abca13b2792a 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -86,7 +86,7 @@ typedef struct card_s {
 	port_t ports[2];
 } card_t;
 
-#define get_port(card, port)	     (&card->ports[port])
+#define get_port(card, port)	     (&(card)->ports[port])
 #define sca_flush(card)		     (sca_in(IER0, card))
 
 static inline void new_memcpy_toio(char __iomem *dest, char *src, int length)

From 6855d301e9d3da81d5f206dd2ddbaf9fb82f3736 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 15 Jun 2021 21:54:23 +0800
Subject: [PATCH 1834/3292] net: pci200syn: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/pci200syn.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c
index abca13b2792a..dee9c4e15eca 100644
--- a/drivers/net/wan/pci200syn.c
+++ b/drivers/net/wan/pci200syn.c
@@ -42,8 +42,7 @@
 static int pci_clock_freq = 33000000;
 #define CLOCK_BASE pci_clock_freq
 
-/*
- *      PLX PCI9052 local configuration and shared runtime registers.
+/*      PLX PCI9052 local configuration and shared runtime registers.
  *      This structure can be used to access 9052 registers (memory mapped).
  */
 typedef struct {

From e8e095b3b37004a4048af69de60c9af2d2268a1d Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Tue, 15 Jun 2021 17:04:27 +0530
Subject: [PATCH 1835/3292] octeontx2-af: cn10k: Bandwidth profiles config
 support

CN10K silicons supports hierarchial ingress packet ratelimiting.
There are 3 levels of profilers supported leaf, mid and top.
Ratelimiting is done after packet forwarding decision is taken
and a NIXLF's RQ is identified to DMA the packet. RQ's context
points to a leaf bandwidth profile which can be configured
to achieve desired ratelimit.

This patch adds logic for management of these bandwidth profiles
ie profile alloc, free, context update etc.

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/octeontx2/af/mbox.h  |  40 +-
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |  11 +
 .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 619 +++++++++++++++++-
 .../marvell/octeontx2/af/rvu_npc_fs.c         |   5 +
 .../ethernet/marvell/octeontx2/af/rvu_reg.h   |   8 +
 .../marvell/octeontx2/af/rvu_struct.h         |  78 ++-
 6 files changed, 757 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index aee6a6f31b0d..7d7dfa8d8a3f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -260,7 +260,11 @@ M(NIX_BP_DISABLE,	0x8017, nix_bp_disable, nix_bp_cfg_req, msg_rsp) \
 M(NIX_GET_MAC_ADDR, 0x8018, nix_get_mac_addr, msg_req, nix_get_mac_addr_rsp) \
 M(NIX_CN10K_AQ_ENQ,	0x8019, nix_cn10k_aq_enq, nix_cn10k_aq_enq_req, \
 				nix_cn10k_aq_enq_rsp)			\
-M(NIX_GET_HW_INFO,	0x801a, nix_get_hw_info, msg_req, nix_hw_info)
+M(NIX_GET_HW_INFO,	0x801c, nix_get_hw_info, msg_req, nix_hw_info)	\
+M(NIX_BANDPROF_ALLOC,	0x801d, nix_bandprof_alloc, nix_bandprof_alloc_req, \
+				nix_bandprof_alloc_rsp)			    \
+M(NIX_BANDPROF_FREE,	0x801e, nix_bandprof_free, nix_bandprof_free_req,   \
+				msg_rsp)
 
 /* Messages initiated by AF (range 0xC00 - 0xDFF) */
 #define MBOX_UP_CGX_MESSAGES						\
@@ -615,6 +619,9 @@ enum nix_af_status {
 	NIX_AF_ERR_PTP_CONFIG_FAIL  = -423,
 	NIX_AF_ERR_NPC_KEY_NOT_SUPP = -424,
 	NIX_AF_ERR_INVALID_NIXBLK   = -425,
+	NIX_AF_ERR_INVALID_BANDPROF = -426,
+	NIX_AF_ERR_IPOLICER_NOTSUPP = -427,
+	NIX_AF_ERR_BANDPROF_INVAL_REQ  = -428,
 };
 
 /* For NIX RX vtag action  */
@@ -683,6 +690,7 @@ struct nix_cn10k_aq_enq_req {
 		struct nix_cq_ctx_s cq;
 		struct nix_rsse_s   rss;
 		struct nix_rx_mce_s mce;
+		struct nix_bandprof_s prof;
 	};
 	union {
 		struct nix_cn10k_rq_ctx_s rq_mask;
@@ -690,6 +698,7 @@ struct nix_cn10k_aq_enq_req {
 		struct nix_cq_ctx_s cq_mask;
 		struct nix_rsse_s   rss_mask;
 		struct nix_rx_mce_s mce_mask;
+		struct nix_bandprof_s prof_mask;
 	};
 };
 
@@ -701,6 +710,7 @@ struct nix_cn10k_aq_enq_rsp {
 		struct nix_cq_ctx_s cq;
 		struct nix_rsse_s   rss;
 		struct nix_rx_mce_s mce;
+		struct nix_bandprof_s prof;
 	};
 };
 
@@ -716,6 +726,7 @@ struct nix_aq_enq_req {
 		struct nix_cq_ctx_s cq;
 		struct nix_rsse_s   rss;
 		struct nix_rx_mce_s mce;
+		u64 prof;
 	};
 	union {
 		struct nix_rq_ctx_s rq_mask;
@@ -723,6 +734,7 @@ struct nix_aq_enq_req {
 		struct nix_cq_ctx_s cq_mask;
 		struct nix_rsse_s   rss_mask;
 		struct nix_rx_mce_s mce_mask;
+		u64 prof_mask;
 	};
 };
 
@@ -734,6 +746,7 @@ struct nix_aq_enq_rsp {
 		struct nix_cq_ctx_s cq;
 		struct nix_rsse_s   rss;
 		struct nix_rx_mce_s mce;
+		u64 prof;
 	};
 };
 
@@ -975,6 +988,31 @@ struct nix_hw_info {
 	u16 min_mtu;
 };
 
+struct nix_bandprof_alloc_req {
+	struct mbox_msghdr hdr;
+	/* Count of profiles needed per layer */
+	u16 prof_count[BAND_PROF_NUM_LAYERS];
+};
+
+struct nix_bandprof_alloc_rsp {
+	struct mbox_msghdr hdr;
+	u16 prof_count[BAND_PROF_NUM_LAYERS];
+
+	/* There is no need to allocate morethan 1 bandwidth profile
+	 * per RQ of a PF_FUNC's NIXLF. So limit the maximum
+	 * profiles to 64 per PF_FUNC.
+	 */
+#define MAX_BANDPROF_PER_PFFUNC	64
+	u16 prof_idx[BAND_PROF_NUM_LAYERS][MAX_BANDPROF_PER_PFFUNC];
+};
+
+struct nix_bandprof_free_req {
+	struct mbox_msghdr hdr;
+	u8 free_all;
+	u16 prof_count[BAND_PROF_NUM_LAYERS];
+	u16 prof_idx[BAND_PROF_NUM_LAYERS][MAX_BANDPROF_PER_PFFUNC];
+};
+
 /* NPC mbox message structs */
 
 #define NPC_MCAM_ENTRY_INVALID	0xFFFF
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index c88dab7747ef..4d2a5ca5bd47 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -296,6 +296,13 @@ struct nix_txvlan {
 	struct mutex rsrc_lock; /* Serialize resource alloc/free */
 };
 
+struct nix_ipolicer {
+	struct rsrc_bmap band_prof;
+	u16 *pfvf_map;
+	u16 *match_id;
+	u16 *ref_count;
+};
+
 struct nix_hw {
 	int blkaddr;
 	struct rvu *rvu;
@@ -305,6 +312,7 @@ struct nix_hw {
 	struct nix_mark_format mark_format;
 	struct nix_lso lso;
 	struct nix_txvlan txvlan;
+	struct nix_ipolicer *ipolicer;
 };
 
 /* RVU block's capabilities or functionality,
@@ -322,6 +330,7 @@ struct hw_cap {
 	bool	nix_rx_multicast;	 /* Rx packet replication support */
 	bool	per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */
 	bool	programmable_chans; /* Channels programmable ? */
+	bool	ipolicer;
 };
 
 struct rvu_hwinfo {
@@ -672,6 +681,8 @@ int rvu_get_next_nix_blkaddr(struct rvu *rvu, int blkaddr);
 void rvu_nix_reset_mac(struct rvu_pfvf *pfvf, int pcifunc);
 int nix_get_struct_ptrs(struct rvu *rvu, u16 pcifunc,
 			struct nix_hw **nix_hw, int *blkaddr);
+int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc,
+				 u16 rq_idx, u16 match_id);
 
 /* NPC APIs */
 int rvu_npc_init(struct rvu *rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d8cb665b7d8a..ebd73a8856f2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -23,6 +23,14 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
 			    int type, int chan_id);
 static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
 			       int type, bool add);
+static int nix_setup_ipolicers(struct rvu *rvu,
+			       struct nix_hw *nix_hw, int blkaddr);
+static void nix_ipolicer_freemem(struct nix_hw *nix_hw);
+static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req,
+			       struct nix_hw *nix_hw, u16 pcifunc);
+static int nix_free_all_bandprof(struct rvu *rvu, u16 pcifunc);
+static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw,
+				     u32 leaf_prof);
 
 enum mc_tbl_sz {
 	MC_TBL_SZ_256,
@@ -699,8 +707,11 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
 	pfvf = rvu_get_pfvf(rvu, pcifunc);
 	nixlf = rvu_get_lf(rvu, block, pcifunc, 0);
 
-	/* Skip NIXLF check for broadcast MCE entry init */
-	if (!(!rsp && req->ctype == NIX_AQ_CTYPE_MCE)) {
+	/* Skip NIXLF check for broadcast MCE entry and bandwidth profile
+	 * operations done by AF itself.
+	 */
+	if (!((!rsp && req->ctype == NIX_AQ_CTYPE_MCE) ||
+	      (req->ctype == NIX_AQ_CTYPE_BANDPROF && !pcifunc))) {
 		if (!pfvf->nixlf || nixlf < 0)
 			return NIX_AF_ERR_AF_LF_INVALID;
 	}
@@ -740,6 +751,11 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
 		if (rsp)
 			rc = NIX_AF_ERR_AQ_ENQUEUE;
 		break;
+	case NIX_AQ_CTYPE_BANDPROF:
+		if (nix_verify_bandprof((struct nix_cn10k_aq_enq_req *)req,
+					nix_hw, pcifunc))
+			rc = NIX_AF_ERR_INVALID_BANDPROF;
+		break;
 	default:
 		rc = NIX_AF_ERR_AQ_ENQUEUE;
 	}
@@ -796,6 +812,9 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
 		else if (req->ctype == NIX_AQ_CTYPE_MCE)
 			memcpy(mask, &req->mce_mask,
 			       sizeof(struct nix_rx_mce_s));
+		else if (req->ctype == NIX_AQ_CTYPE_BANDPROF)
+			memcpy(mask, &req->prof_mask,
+			       sizeof(struct nix_bandprof_s));
 		fallthrough;
 	case NIX_AQ_INSTOP_INIT:
 		if (req->ctype == NIX_AQ_CTYPE_RQ)
@@ -808,6 +827,8 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
 			memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s));
 		else if (req->ctype == NIX_AQ_CTYPE_MCE)
 			memcpy(ctx, &req->mce, sizeof(struct nix_rx_mce_s));
+		else if (req->ctype == NIX_AQ_CTYPE_BANDPROF)
+			memcpy(ctx, &req->prof, sizeof(struct nix_bandprof_s));
 		break;
 	case NIX_AQ_INSTOP_NOP:
 	case NIX_AQ_INSTOP_READ:
@@ -885,6 +906,9 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
 			else if (req->ctype == NIX_AQ_CTYPE_MCE)
 				memcpy(&rsp->mce, ctx,
 				       sizeof(struct nix_rx_mce_s));
+			else if (req->ctype == NIX_AQ_CTYPE_BANDPROF)
+				memcpy(&rsp->prof, ctx,
+				       sizeof(struct nix_bandprof_s));
 		}
 	}
 
@@ -3624,6 +3648,10 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
 		if (err)
 			return err;
 
+		err = nix_setup_ipolicers(rvu, nix_hw, blkaddr);
+		if (err)
+			return err;
+
 		err = nix_af_mark_format_setup(rvu, nix_hw, blkaddr);
 		if (err)
 			return err;
@@ -3772,6 +3800,8 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr,
 			kfree(txsch->schq.bmap);
 		}
 
+		nix_ipolicer_freemem(nix_hw);
+
 		vlan = &nix_hw->txvlan;
 		kfree(vlan->rsrc.bmap);
 		mutex_destroy(&vlan->rsrc_lock);
@@ -3879,6 +3909,8 @@ void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int nixlf)
 	}
 
 	nix_ctx_free(rvu, pfvf);
+
+	nix_free_all_bandprof(rvu, pcifunc);
 }
 
 #define NIX_AF_LFX_TX_CFG_PTP_EN	BIT_ULL(32)
@@ -3987,3 +4019,586 @@ void rvu_nix_reset_mac(struct rvu_pfvf *pfvf, int pcifunc)
 	if (from_vf)
 		ether_addr_copy(pfvf->mac_addr, pfvf->default_mac);
 }
+
+/* NIX ingress policers or bandwidth profiles APIs */
+static void nix_config_rx_pkt_policer_precolor(struct rvu *rvu, int blkaddr)
+{
+	struct npc_lt_def_cfg defs, *ltdefs;
+
+	ltdefs = &defs;
+	memcpy(ltdefs, rvu->kpu.lt_def, sizeof(struct npc_lt_def_cfg));
+
+	/* Extract PCP and DEI fields from outer VLAN from byte offset
+	 * 2 from the start of LB_PTR (ie TAG).
+	 * VLAN0 is Outer VLAN and VLAN1 is Inner VLAN. Inner VLAN
+	 * fields are considered when 'Tunnel enable' is set in profile.
+	 */
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_VLAN0_PCP_DEI,
+		    (2UL << 12) | (ltdefs->ovlan.lid << 8) |
+		    (ltdefs->ovlan.ltype_match << 4) |
+		    ltdefs->ovlan.ltype_mask);
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_VLAN1_PCP_DEI,
+		    (2UL << 12) | (ltdefs->ivlan.lid << 8) |
+		    (ltdefs->ivlan.ltype_match << 4) |
+		    ltdefs->ivlan.ltype_mask);
+
+	/* DSCP field in outer and tunneled IPv4 packets */
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OIP4_DSCP,
+		    (1UL << 12) | (ltdefs->rx_oip4.lid << 8) |
+		    (ltdefs->rx_oip4.ltype_match << 4) |
+		    ltdefs->rx_oip4.ltype_mask);
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IIP4_DSCP,
+		    (1UL << 12) | (ltdefs->rx_iip4.lid << 8) |
+		    (ltdefs->rx_iip4.ltype_match << 4) |
+		    ltdefs->rx_iip4.ltype_mask);
+
+	/* DSCP field (traffic class) in outer and tunneled IPv6 packets */
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_OIP6_DSCP,
+		    (1UL << 11) | (ltdefs->rx_oip6.lid << 8) |
+		    (ltdefs->rx_oip6.ltype_match << 4) |
+		    ltdefs->rx_oip6.ltype_mask);
+	rvu_write64(rvu, blkaddr, NIX_AF_RX_DEF_IIP6_DSCP,
+		    (1UL << 11) | (ltdefs->rx_iip6.lid << 8) |
+		    (ltdefs->rx_iip6.ltype_match << 4) |
+		    ltdefs->rx_iip6.ltype_mask);
+}
+
+static int nix_init_policer_context(struct rvu *rvu, struct nix_hw *nix_hw,
+				    int layer, int prof_idx)
+{
+	struct nix_cn10k_aq_enq_req aq_req;
+	int rc;
+
+	memset(&aq_req, 0, sizeof(struct nix_cn10k_aq_enq_req));
+
+	aq_req.qidx = (prof_idx & 0x3FFF) | (layer << 14);
+	aq_req.ctype = NIX_AQ_CTYPE_BANDPROF;
+	aq_req.op = NIX_AQ_INSTOP_INIT;
+
+	/* Context is all zeros, submit to AQ */
+	rc = rvu_nix_blk_aq_enq_inst(rvu, nix_hw,
+				     (struct nix_aq_enq_req *)&aq_req, NULL);
+	if (rc)
+		dev_err(rvu->dev, "Failed to INIT bandwidth profile layer %d profile %d\n",
+			layer, prof_idx);
+	return rc;
+}
+
+static int nix_setup_ipolicers(struct rvu *rvu,
+			       struct nix_hw *nix_hw, int blkaddr)
+{
+	struct rvu_hwinfo *hw = rvu->hw;
+	struct nix_ipolicer *ipolicer;
+	int err, layer, prof_idx;
+	u64 cfg;
+
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+	if (!(cfg & BIT_ULL(61))) {
+		hw->cap.ipolicer = false;
+		return 0;
+	}
+
+	hw->cap.ipolicer = true;
+	nix_hw->ipolicer = devm_kcalloc(rvu->dev, BAND_PROF_NUM_LAYERS,
+					sizeof(*ipolicer), GFP_KERNEL);
+	if (!nix_hw->ipolicer)
+		return -ENOMEM;
+
+	cfg = rvu_read64(rvu, blkaddr, NIX_AF_PL_CONST);
+
+	for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+		ipolicer = &nix_hw->ipolicer[layer];
+		switch (layer) {
+		case BAND_PROF_LEAF_LAYER:
+			ipolicer->band_prof.max = cfg & 0XFFFF;
+			break;
+		case BAND_PROF_MID_LAYER:
+			ipolicer->band_prof.max = (cfg >> 16) & 0XFFFF;
+			break;
+		case BAND_PROF_TOP_LAYER:
+			ipolicer->band_prof.max = (cfg >> 32) & 0XFFFF;
+			break;
+		}
+
+		if (!ipolicer->band_prof.max)
+			continue;
+
+		err = rvu_alloc_bitmap(&ipolicer->band_prof);
+		if (err)
+			return err;
+
+		ipolicer->pfvf_map = devm_kcalloc(rvu->dev,
+						  ipolicer->band_prof.max,
+						  sizeof(u16), GFP_KERNEL);
+		if (!ipolicer->pfvf_map)
+			return -ENOMEM;
+
+		ipolicer->match_id = devm_kcalloc(rvu->dev,
+						  ipolicer->band_prof.max,
+						  sizeof(u16), GFP_KERNEL);
+		if (!ipolicer->match_id)
+			return -ENOMEM;
+
+		for (prof_idx = 0;
+		     prof_idx < ipolicer->band_prof.max; prof_idx++) {
+			/* Set AF as current owner for INIT ops to succeed */
+			ipolicer->pfvf_map[prof_idx] = 0x00;
+
+			/* There is no enable bit in the profile context,
+			 * so no context disable. So let's INIT them here
+			 * so that PF/VF later on have to just do WRITE to
+			 * setup policer rates and config.
+			 */
+			err = nix_init_policer_context(rvu, nix_hw,
+						       layer, prof_idx);
+			if (err)
+				return err;
+		}
+
+		/* Allocate memory for maintaining ref_counts for MID level
+		 * profiles, this will be needed for leaf layer profiles'
+		 * aggregation.
+		 */
+		if (layer != BAND_PROF_MID_LAYER)
+			continue;
+
+		ipolicer->ref_count = devm_kcalloc(rvu->dev,
+						   ipolicer->band_prof.max,
+						   sizeof(u16), GFP_KERNEL);
+	}
+
+	/* Set policer timeunit to 2us ie  (19 + 1) * 100 nsec = 2us */
+	rvu_write64(rvu, blkaddr, NIX_AF_PL_TS, 19);
+
+	nix_config_rx_pkt_policer_precolor(rvu, blkaddr);
+
+	return 0;
+}
+
+static void nix_ipolicer_freemem(struct nix_hw *nix_hw)
+{
+	struct nix_ipolicer *ipolicer;
+	int layer;
+
+	for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+		ipolicer = &nix_hw->ipolicer[layer];
+
+		if (!ipolicer->band_prof.max)
+			continue;
+
+		kfree(ipolicer->band_prof.bmap);
+	}
+}
+
+static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req,
+			       struct nix_hw *nix_hw, u16 pcifunc)
+{
+	struct nix_ipolicer *ipolicer;
+	int layer, hi_layer, prof_idx;
+
+	/* Bits [15:14] in profile index represent layer */
+	layer = (req->qidx >> 14) & 0x03;
+	prof_idx = req->qidx & 0x3FFF;
+
+	ipolicer = &nix_hw->ipolicer[layer];
+	if (prof_idx >= ipolicer->band_prof.max)
+		return -EINVAL;
+
+	/* Check if the profile is allocated to the requesting PCIFUNC or not
+	 * with the exception of AF. AF is allowed to read and update contexts.
+	 */
+	if (pcifunc && ipolicer->pfvf_map[prof_idx] != pcifunc)
+		return -EINVAL;
+
+	/* If this profile is linked to higher layer profile then check
+	 * if that profile is also allocated to the requesting PCIFUNC
+	 * or not.
+	 */
+	if (!req->prof.hl_en)
+		return 0;
+
+	/* Leaf layer profile can link only to mid layer and
+	 * mid layer to top layer.
+	 */
+	if (layer == BAND_PROF_LEAF_LAYER)
+		hi_layer = BAND_PROF_MID_LAYER;
+	else if (layer == BAND_PROF_MID_LAYER)
+		hi_layer = BAND_PROF_TOP_LAYER;
+	else
+		return -EINVAL;
+
+	ipolicer = &nix_hw->ipolicer[hi_layer];
+	prof_idx = req->prof.band_prof_id;
+	if (prof_idx >= ipolicer->band_prof.max ||
+	    ipolicer->pfvf_map[prof_idx] != pcifunc)
+		return -EINVAL;
+
+	return 0;
+}
+
+int rvu_mbox_handler_nix_bandprof_alloc(struct rvu *rvu,
+					struct nix_bandprof_alloc_req *req,
+					struct nix_bandprof_alloc_rsp *rsp)
+{
+	int blkaddr, layer, prof, idx, err;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct nix_ipolicer *ipolicer;
+	struct nix_hw *nix_hw;
+
+	if (!rvu->hw->cap.ipolicer)
+		return NIX_AF_ERR_IPOLICER_NOTSUPP;
+
+	err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
+	if (err)
+		return err;
+
+	mutex_lock(&rvu->rsrc_lock);
+	for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+		if (layer == BAND_PROF_INVAL_LAYER)
+			continue;
+		if (!req->prof_count[layer])
+			continue;
+
+		ipolicer = &nix_hw->ipolicer[layer];
+		for (idx = 0; idx < req->prof_count[layer]; idx++) {
+			/* Allocate a max of 'MAX_BANDPROF_PER_PFFUNC' profiles */
+			if (idx == MAX_BANDPROF_PER_PFFUNC)
+				break;
+
+			prof = rvu_alloc_rsrc(&ipolicer->band_prof);
+			if (prof < 0)
+				break;
+			rsp->prof_count[layer]++;
+			rsp->prof_idx[layer][idx] = prof;
+			ipolicer->pfvf_map[prof] = pcifunc;
+		}
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+	return 0;
+}
+
+static int nix_free_all_bandprof(struct rvu *rvu, u16 pcifunc)
+{
+	int blkaddr, layer, prof_idx, err;
+	struct nix_ipolicer *ipolicer;
+	struct nix_hw *nix_hw;
+
+	if (!rvu->hw->cap.ipolicer)
+		return NIX_AF_ERR_IPOLICER_NOTSUPP;
+
+	err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
+	if (err)
+		return err;
+
+	mutex_lock(&rvu->rsrc_lock);
+	/* Free all the profiles allocated to the PCIFUNC */
+	for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+		if (layer == BAND_PROF_INVAL_LAYER)
+			continue;
+		ipolicer = &nix_hw->ipolicer[layer];
+
+		for (prof_idx = 0; prof_idx < ipolicer->band_prof.max; prof_idx++) {
+			if (ipolicer->pfvf_map[prof_idx] != pcifunc)
+				continue;
+
+			/* Clear ratelimit aggregation, if any */
+			if (layer == BAND_PROF_LEAF_LAYER &&
+			    ipolicer->match_id[prof_idx])
+				nix_clear_ratelimit_aggr(rvu, nix_hw, prof_idx);
+
+			ipolicer->pfvf_map[prof_idx] = 0x00;
+			ipolicer->match_id[prof_idx] = 0;
+			rvu_free_rsrc(&ipolicer->band_prof, prof_idx);
+		}
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+	return 0;
+}
+
+int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu,
+				       struct nix_bandprof_free_req *req,
+				       struct msg_rsp *rsp)
+{
+	int blkaddr, layer, prof_idx, idx, err;
+	u16 pcifunc = req->hdr.pcifunc;
+	struct nix_ipolicer *ipolicer;
+	struct nix_hw *nix_hw;
+
+	if (req->free_all)
+		return nix_free_all_bandprof(rvu, pcifunc);
+
+	if (!rvu->hw->cap.ipolicer)
+		return NIX_AF_ERR_IPOLICER_NOTSUPP;
+
+	err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
+	if (err)
+		return err;
+
+	mutex_lock(&rvu->rsrc_lock);
+	/* Free the requested profile indices */
+	for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+		if (layer == BAND_PROF_INVAL_LAYER)
+			continue;
+		if (!req->prof_count[layer])
+			continue;
+
+		ipolicer = &nix_hw->ipolicer[layer];
+		for (idx = 0; idx < req->prof_count[layer]; idx++) {
+			prof_idx = req->prof_idx[layer][idx];
+			if (prof_idx >= ipolicer->band_prof.max ||
+			    ipolicer->pfvf_map[prof_idx] != pcifunc)
+				continue;
+
+			/* Clear ratelimit aggregation, if any */
+			if (layer == BAND_PROF_LEAF_LAYER &&
+			    ipolicer->match_id[prof_idx])
+				nix_clear_ratelimit_aggr(rvu, nix_hw, prof_idx);
+
+			ipolicer->pfvf_map[prof_idx] = 0x00;
+			ipolicer->match_id[prof_idx] = 0;
+			rvu_free_rsrc(&ipolicer->band_prof, prof_idx);
+			if (idx == MAX_BANDPROF_PER_PFFUNC)
+				break;
+		}
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+	return 0;
+}
+
+static int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
+			       struct nix_cn10k_aq_enq_req *aq_req,
+			       struct nix_cn10k_aq_enq_rsp *aq_rsp,
+			       u16 pcifunc, u8 ctype, u32 qidx)
+{
+	memset(aq_req, 0, sizeof(struct nix_cn10k_aq_enq_req));
+	aq_req->hdr.pcifunc = pcifunc;
+	aq_req->ctype = ctype;
+	aq_req->op = NIX_AQ_INSTOP_READ;
+	aq_req->qidx = qidx;
+
+	return rvu_nix_blk_aq_enq_inst(rvu, nix_hw,
+				       (struct nix_aq_enq_req *)aq_req,
+				       (struct nix_aq_enq_rsp *)aq_rsp);
+}
+
+static int nix_ipolicer_map_leaf_midprofs(struct rvu *rvu,
+					  struct nix_hw *nix_hw,
+					  struct nix_cn10k_aq_enq_req *aq_req,
+					  struct nix_cn10k_aq_enq_rsp *aq_rsp,
+					  u32 leaf_prof, u16 mid_prof)
+{
+	memset(aq_req, 0, sizeof(struct nix_cn10k_aq_enq_req));
+	aq_req->hdr.pcifunc = 0x00;
+	aq_req->ctype = NIX_AQ_CTYPE_BANDPROF;
+	aq_req->op = NIX_AQ_INSTOP_WRITE;
+	aq_req->qidx = leaf_prof;
+
+	aq_req->prof.band_prof_id = mid_prof;
+	aq_req->prof_mask.band_prof_id = GENMASK(6, 0);
+	aq_req->prof.hl_en = 1;
+	aq_req->prof_mask.hl_en = 1;
+
+	return rvu_nix_blk_aq_enq_inst(rvu, nix_hw,
+				       (struct nix_aq_enq_req *)aq_req,
+				       (struct nix_aq_enq_rsp *)aq_rsp);
+}
+
+int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc,
+				 u16 rq_idx, u16 match_id)
+{
+	int leaf_prof, mid_prof, leaf_match;
+	struct nix_cn10k_aq_enq_req aq_req;
+	struct nix_cn10k_aq_enq_rsp aq_rsp;
+	struct nix_ipolicer *ipolicer;
+	struct nix_hw *nix_hw;
+	int blkaddr, idx, rc;
+
+	if (!rvu->hw->cap.ipolicer)
+		return 0;
+
+	rc = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr);
+	if (rc)
+		return rc;
+
+	/* Fetch the RQ's context to see if policing is enabled */
+	rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp, pcifunc,
+				 NIX_AQ_CTYPE_RQ, rq_idx);
+	if (rc) {
+		dev_err(rvu->dev,
+			"%s: Failed to fetch RQ%d context of PFFUNC 0x%x\n",
+			__func__, rq_idx, pcifunc);
+		return rc;
+	}
+
+	if (!aq_rsp.rq.policer_ena)
+		return 0;
+
+	/* Get the bandwidth profile ID mapped to this RQ */
+	leaf_prof = aq_rsp.rq.band_prof_id;
+
+	ipolicer = &nix_hw->ipolicer[BAND_PROF_LEAF_LAYER];
+	ipolicer->match_id[leaf_prof] = match_id;
+
+	/* Check if any other leaf profile is marked with same match_id */
+	for (idx = 0; idx < ipolicer->band_prof.max; idx++) {
+		if (idx == leaf_prof)
+			continue;
+		if (ipolicer->match_id[idx] != match_id)
+			continue;
+
+		leaf_match = idx;
+		break;
+	}
+
+	if (idx == ipolicer->band_prof.max)
+		return 0;
+
+	/* Fetch the matching profile's context to check if it's already
+	 * mapped to a mid level profile.
+	 */
+	rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp, 0x00,
+				 NIX_AQ_CTYPE_BANDPROF, leaf_match);
+	if (rc) {
+		dev_err(rvu->dev,
+			"%s: Failed to fetch context of leaf profile %d\n",
+			__func__, leaf_match);
+		return rc;
+	}
+
+	ipolicer = &nix_hw->ipolicer[BAND_PROF_MID_LAYER];
+	if (aq_rsp.prof.hl_en) {
+		/* Get Mid layer prof index and map leaf_prof index
+		 * also such that flows that are being steered
+		 * to different RQs and marked with same match_id
+		 * are rate limited in a aggregate fashion
+		 */
+		mid_prof = aq_rsp.prof.band_prof_id;
+		rc = nix_ipolicer_map_leaf_midprofs(rvu, nix_hw,
+						    &aq_req, &aq_rsp,
+						    leaf_prof, mid_prof);
+		if (rc) {
+			dev_err(rvu->dev,
+				"%s: Failed to map leaf(%d) and mid(%d) profiles\n",
+				__func__, leaf_prof, mid_prof);
+			goto exit;
+		}
+
+		mutex_lock(&rvu->rsrc_lock);
+		ipolicer->ref_count[mid_prof]++;
+		mutex_unlock(&rvu->rsrc_lock);
+		goto exit;
+	}
+
+	/* Allocate a mid layer profile and
+	 * map both 'leaf_prof' and 'leaf_match' profiles to it.
+	 */
+	mutex_lock(&rvu->rsrc_lock);
+	mid_prof = rvu_alloc_rsrc(&ipolicer->band_prof);
+	if (mid_prof < 0) {
+		dev_err(rvu->dev,
+			"%s: Unable to allocate mid layer profile\n", __func__);
+		mutex_unlock(&rvu->rsrc_lock);
+		goto exit;
+	}
+	mutex_unlock(&rvu->rsrc_lock);
+	ipolicer->pfvf_map[mid_prof] = 0x00;
+	ipolicer->ref_count[mid_prof] = 0;
+
+	/* Initialize mid layer profile same as 'leaf_prof' */
+	rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp, 0x00,
+				 NIX_AQ_CTYPE_BANDPROF, leaf_prof);
+	if (rc) {
+		dev_err(rvu->dev,
+			"%s: Failed to fetch context of leaf profile %d\n",
+			__func__, leaf_prof);
+		goto exit;
+	}
+
+	memset(&aq_req, 0, sizeof(struct nix_cn10k_aq_enq_req));
+	aq_req.hdr.pcifunc = 0x00;
+	aq_req.qidx = (mid_prof & 0x3FFF) | (BAND_PROF_MID_LAYER << 14);
+	aq_req.ctype = NIX_AQ_CTYPE_BANDPROF;
+	aq_req.op = NIX_AQ_INSTOP_WRITE;
+	memcpy(&aq_req.prof, &aq_rsp.prof, sizeof(struct nix_bandprof_s));
+	/* Clear higher layer enable bit in the mid profile, just in case */
+	aq_req.prof.hl_en = 0;
+	aq_req.prof_mask.hl_en = 1;
+
+	rc = rvu_nix_blk_aq_enq_inst(rvu, nix_hw,
+				     (struct nix_aq_enq_req *)&aq_req, NULL);
+	if (rc) {
+		dev_err(rvu->dev,
+			"%s: Failed to INIT context of mid layer profile %d\n",
+			__func__, mid_prof);
+		goto exit;
+	}
+
+	/* Map both leaf profiles to this mid layer profile */
+	rc = nix_ipolicer_map_leaf_midprofs(rvu, nix_hw,
+					    &aq_req, &aq_rsp,
+					    leaf_prof, mid_prof);
+	if (rc) {
+		dev_err(rvu->dev,
+			"%s: Failed to map leaf(%d) and mid(%d) profiles\n",
+			__func__, leaf_prof, mid_prof);
+		goto exit;
+	}
+
+	mutex_lock(&rvu->rsrc_lock);
+	ipolicer->ref_count[mid_prof]++;
+	mutex_unlock(&rvu->rsrc_lock);
+
+	rc = nix_ipolicer_map_leaf_midprofs(rvu, nix_hw,
+					    &aq_req, &aq_rsp,
+					    leaf_match, mid_prof);
+	if (rc) {
+		dev_err(rvu->dev,
+			"%s: Failed to map leaf(%d) and mid(%d) profiles\n",
+			__func__, leaf_match, mid_prof);
+		ipolicer->ref_count[mid_prof]--;
+		goto exit;
+	}
+
+	mutex_lock(&rvu->rsrc_lock);
+	ipolicer->ref_count[mid_prof]++;
+	mutex_unlock(&rvu->rsrc_lock);
+
+exit:
+	return rc;
+}
+
+/* Called with mutex rsrc_lock */
+static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw,
+				     u32 leaf_prof)
+{
+	struct nix_cn10k_aq_enq_req aq_req;
+	struct nix_cn10k_aq_enq_rsp aq_rsp;
+	struct nix_ipolicer *ipolicer;
+	u16 mid_prof;
+	int rc;
+
+	mutex_unlock(&rvu->rsrc_lock);
+
+	rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp, 0x00,
+				 NIX_AQ_CTYPE_BANDPROF, leaf_prof);
+
+	mutex_lock(&rvu->rsrc_lock);
+	if (rc) {
+		dev_err(rvu->dev,
+			"%s: Failed to fetch context of leaf profile %d\n",
+			__func__, leaf_prof);
+		return;
+	}
+
+	if (!aq_rsp.prof.hl_en)
+		return;
+
+	mid_prof = aq_rsp.prof.band_prof_id;
+	ipolicer = &nix_hw->ipolicer[BAND_PROF_MID_LAYER];
+	ipolicer->ref_count[mid_prof]--;
+	/* If ref_count is zero, free mid layer profile */
+	if (!ipolicer->ref_count[mid_prof]) {
+		ipolicer->pfvf_map[mid_prof] = 0x00;
+		rvu_free_rsrc(&ipolicer->band_prof, mid_prof);
+	}
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 6ba6a835e2fa..87d7c6ab047f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -1110,6 +1110,11 @@ find_rule:
 	    req->vtag0_type == NIX_AF_LFX_RX_VTAG_TYPE7)
 		rule->vfvlan_cfg = true;
 
+	if (is_npc_intf_rx(req->intf) && req->match_id &&
+	    (req->op == NIX_RX_ACTIONOP_UCAST || req->op == NIX_RX_ACTIONOP_RSS))
+		return rvu_nix_setup_ratelimit_aggr(rvu, req->hdr.pcifunc,
+					     req->index, req->match_id);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index ce365ae80352..76837d5e19c6 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -171,6 +171,7 @@
 #define NIX_AF_SQ_CONST			(0x0040)
 #define NIX_AF_CQ_CONST			(0x0048)
 #define NIX_AF_RQ_CONST			(0x0050)
+#define NIX_AF_PL_CONST			(0x0058)
 #define NIX_AF_PSE_CONST		(0x0060)
 #define NIX_AF_TL1_CONST		(0x0070)
 #define NIX_AF_TL2_CONST		(0x0078)
@@ -181,6 +182,7 @@
 #define NIX_AF_LSO_CFG			(0x00A8)
 #define NIX_AF_BLK_RST			(0x00B0)
 #define NIX_AF_TX_TSTMP_CFG		(0x00C0)
+#define NIX_AF_PL_TS			(0x00C8)
 #define NIX_AF_RX_CFG			(0x00D0)
 #define NIX_AF_AVG_DELAY		(0x00E0)
 #define NIX_AF_CINT_DELAY		(0x00F0)
@@ -212,7 +214,9 @@
 #define NIX_AF_RX_DEF_OL2		(0x0200)
 #define NIX_AF_RX_DEF_OIP4		(0x0210)
 #define NIX_AF_RX_DEF_IIP4		(0x0220)
+#define NIX_AF_RX_DEF_VLAN0_PCP_DEI	(0x0228)
 #define NIX_AF_RX_DEF_OIP6		(0x0230)
+#define NIX_AF_RX_DEF_VLAN1_PCP_DEI	(0x0238)
 #define NIX_AF_RX_DEF_IIP6		(0x0240)
 #define NIX_AF_RX_DEF_OTCP		(0x0250)
 #define NIX_AF_RX_DEF_ITCP		(0x0260)
@@ -223,6 +227,10 @@
 #define NIX_AF_RX_DEF_ISCTP		(0x02A0)
 #define NIX_AF_RX_DEF_IPSECX		(0x02B0)
 #define NIX_AF_RX_DEF_CST_APAD1		(0x02A8)
+#define NIX_AF_RX_DEF_IIP4_DSCP		(0x02E0)
+#define NIX_AF_RX_DEF_OIP4_DSCP		(0x02E8)
+#define NIX_AF_RX_DEF_IIP6_DSCP		(0x02F0)
+#define NIX_AF_RX_DEF_OIP6_DSCP		(0x02F8)
 #define NIX_AF_RX_IPSEC_GEN_CFG		(0x0300)
 #define NIX_AF_RX_CPTX_INST_ADDR	(0x0310)
 #define NIX_AF_NDC_TX_SYNC		(0x03F0)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index 5e5f45c7eab0..8fb002d05219 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -286,7 +286,7 @@ enum nix_aq_ctype {
 	NIX_AQ_CTYPE_MCE  = 0x3,
 	NIX_AQ_CTYPE_RSS  = 0x4,
 	NIX_AQ_CTYPE_DYNO = 0x5,
-	NIX_AQ_CTYPE_BAND_PROF = 0x6,
+	NIX_AQ_CTYPE_BANDPROF = 0x6,
 };
 
 /* NIX admin queue instruction opcodes */
@@ -665,6 +665,82 @@ struct nix_rx_mce_s {
 	uint64_t next       : 16;
 };
 
+enum nix_band_prof_layers {
+	BAND_PROF_LEAF_LAYER = 0,
+	BAND_PROF_INVAL_LAYER = 1,
+	BAND_PROF_MID_LAYER = 2,
+	BAND_PROF_TOP_LAYER = 3,
+	BAND_PROF_NUM_LAYERS = 4,
+};
+
+enum NIX_RX_BAND_PROF_ACTIONRESULT_E {
+	NIX_RX_BAND_PROF_ACTIONRESULT_PASS = 0x0,
+	NIX_RX_BAND_PROF_ACTIONRESULT_DROP = 0x1,
+	NIX_RX_BAND_PROF_ACTIONRESULT_RED = 0x2,
+};
+
+/* NIX ingress policer bandwidth profile structure */
+struct nix_bandprof_s {
+	uint64_t pc_mode                     :  2; /* W0 */
+	uint64_t icolor                      :  2;
+	uint64_t tnl_ena                     :  1;
+	uint64_t reserved_5_7                :  3;
+	uint64_t peir_exponent               :  5;
+	uint64_t reserved_13_15              :  3;
+	uint64_t pebs_exponent               :  5;
+	uint64_t reserved_21_23              :  3;
+	uint64_t cir_exponent                :  5;
+	uint64_t reserved_29_31              :  3;
+	uint64_t cbs_exponent                :  5;
+	uint64_t reserved_37_39              :  3;
+	uint64_t peir_mantissa               :  8;
+	uint64_t pebs_mantissa               :  8;
+	uint64_t cir_mantissa                :  8;
+	uint64_t cbs_mantissa                :  8; /* W1 */
+	uint64_t lmode                       :  1;
+	uint64_t l_sellect                   :  3;
+	uint64_t rdiv                        :  4;
+	uint64_t adjust_exponent             :  5;
+	uint64_t reserved_85_86              :  2;
+	uint64_t adjust_mantissa             :  9;
+	uint64_t gc_action                   :  2;
+	uint64_t yc_action                   :  2;
+	uint64_t rc_action                   :  2;
+	uint64_t meter_algo                  :  2;
+	uint64_t band_prof_id                :  7;
+	uint64_t reserved_111_118            :  8;
+	uint64_t hl_en                       :  1;
+	uint64_t reserved_120_127            :  8;
+	uint64_t ts                          : 48; /* W2 */
+	uint64_t reserved_176_191            : 16;
+	uint64_t pe_accum                    : 32; /* W3 */
+	uint64_t c_accum                     : 32;
+	uint64_t green_pkt_pass              : 48; /* W4 */
+	uint64_t reserved_304_319            : 16;
+	uint64_t yellow_pkt_pass             : 48; /* W5 */
+	uint64_t reserved_368_383            : 16;
+	uint64_t red_pkt_pass                : 48; /* W6 */
+	uint64_t reserved_432_447            : 16;
+	uint64_t green_octs_pass             : 48; /* W7 */
+	uint64_t reserved_496_511            : 16;
+	uint64_t yellow_octs_pass            : 48; /* W8 */
+	uint64_t reserved_560_575            : 16;
+	uint64_t red_octs_pass               : 48; /* W9 */
+	uint64_t reserved_624_639            : 16;
+	uint64_t green_pkt_drop              : 48; /* W10 */
+	uint64_t reserved_688_703            : 16;
+	uint64_t yellow_pkt_drop             : 48; /* W11 */
+	uint64_t reserved_752_767            : 16;
+	uint64_t red_pkt_drop                : 48; /* W12 */
+	uint64_t reserved_816_831            : 16;
+	uint64_t green_octs_drop             : 48; /* W13 */
+	uint64_t reserved_880_895            : 16;
+	uint64_t yellow_octs_drop            : 48; /* W14 */
+	uint64_t reserved_944_959            : 16;
+	uint64_t red_octs_drop               : 48; /* W15 */
+	uint64_t reserved_1008_1023          : 16;
+};
+
 enum nix_lsoalg {
 	NIX_LSOALG_NOP,
 	NIX_LSOALG_ADD_SEGNUM,

From e7d8971763f3e7f12d9f9933faf6bd4912538d85 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Tue, 15 Jun 2021 17:04:28 +0530
Subject: [PATCH 1836/3292] octeontx2-af: cn10k: Debugfs support for bandwidth
 profiles

Added support for dumping current resource status of bandwidth
profiles and contexts of allocated profiles via debugfs.

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/octeontx2/af/rvu.c   |   8 +
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |   5 +
 .../marvell/octeontx2/af/rvu_debugfs.c        | 163 ++++++++++++++++++
 .../ethernet/marvell/octeontx2/af/rvu_nix.c   |   8 +-
 .../marvell/octeontx2/af/rvu_struct.h         |   7 +
 5 files changed, 187 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index f11a02d6b6ef..0b092949d7ac 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -184,6 +184,14 @@ int rvu_rsrc_free_count(struct rsrc_bmap *rsrc)
 	return (rsrc->max - used);
 }
 
+bool is_rsrc_free(struct rsrc_bmap *rsrc, int id)
+{
+	if (!rsrc->bmap)
+		return false;
+
+	return !test_bit(id, rsrc->bmap);
+}
+
 int rvu_alloc_bitmap(struct rsrc_bmap *rsrc)
 {
 	rsrc->bmap = kcalloc(BITS_TO_LONGS(rsrc->max),
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 4d2a5ca5bd47..9e5d9ba6f01e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -596,6 +596,7 @@ static inline bool is_rvu_fwdata_valid(struct rvu *rvu)
 int rvu_alloc_bitmap(struct rsrc_bmap *rsrc);
 int rvu_alloc_rsrc(struct rsrc_bmap *rsrc);
 void rvu_free_rsrc(struct rsrc_bmap *rsrc, int id);
+bool is_rsrc_free(struct rsrc_bmap *rsrc, int id);
 int rvu_rsrc_free_count(struct rsrc_bmap *rsrc);
 int rvu_alloc_rsrc_contig(struct rsrc_bmap *rsrc, int nrsrc);
 bool rvu_rsrc_check_contig(struct rsrc_bmap *rsrc, int nrsrc);
@@ -683,6 +684,10 @@ int nix_get_struct_ptrs(struct rvu *rvu, u16 pcifunc,
 			struct nix_hw **nix_hw, int *blkaddr);
 int rvu_nix_setup_ratelimit_aggr(struct rvu *rvu, u16 pcifunc,
 				 u16 rq_idx, u16 match_id);
+int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
+			struct nix_cn10k_aq_enq_req *aq_req,
+			struct nix_cn10k_aq_enq_rsp *aq_rsp,
+			u16 pcifunc, u8 ctype, u32 qidx);
 
 /* NPC APIs */
 int rvu_npc_init(struct rvu *rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index 7103f8216ad1..3cc3c6fd1d84 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -1632,6 +1632,165 @@ static int rvu_dbg_nix_qsize_display(struct seq_file *filp, void *unused)
 
 RVU_DEBUG_SEQ_FOPS(nix_qsize, nix_qsize_display, nix_qsize_write);
 
+static void print_band_prof_ctx(struct seq_file *m,
+				struct nix_bandprof_s *prof)
+{
+	char *str;
+
+	switch (prof->pc_mode) {
+	case NIX_RX_PC_MODE_VLAN:
+		str = "VLAN";
+		break;
+	case NIX_RX_PC_MODE_DSCP:
+		str = "DSCP";
+		break;
+	case NIX_RX_PC_MODE_GEN:
+		str = "Generic";
+		break;
+	case NIX_RX_PC_MODE_RSVD:
+		str = "Reserved";
+		break;
+	}
+	seq_printf(m, "W0: pc_mode\t\t%s\n", str);
+	str = (prof->icolor == 3) ? "Color blind" :
+		(prof->icolor == 0) ? "Green" :
+		(prof->icolor == 1) ? "Yellow" : "Red";
+	seq_printf(m, "W0: icolor\t\t%s\n", str);
+	seq_printf(m, "W0: tnl_ena\t\t%d\n", prof->tnl_ena);
+	seq_printf(m, "W0: peir_exponent\t%d\n", prof->peir_exponent);
+	seq_printf(m, "W0: pebs_exponent\t%d\n", prof->pebs_exponent);
+	seq_printf(m, "W0: cir_exponent\t%d\n", prof->cir_exponent);
+	seq_printf(m, "W0: cbs_exponent\t%d\n", prof->cbs_exponent);
+	seq_printf(m, "W0: peir_mantissa\t%d\n", prof->peir_mantissa);
+	seq_printf(m, "W0: pebs_mantissa\t%d\n", prof->pebs_mantissa);
+	seq_printf(m, "W0: cir_mantissa\t%d\n", prof->cir_mantissa);
+
+	seq_printf(m, "W1: cbs_mantissa\t%d\n", prof->cbs_mantissa);
+	str = (prof->lmode == 0) ? "byte" : "packet";
+	seq_printf(m, "W1: lmode\t\t%s\n", str);
+	seq_printf(m, "W1: l_select\t\t%d\n", prof->l_sellect);
+	seq_printf(m, "W1: rdiv\t\t%d\n", prof->rdiv);
+	seq_printf(m, "W1: adjust_exponent\t%d\n", prof->adjust_exponent);
+	seq_printf(m, "W1: adjust_mantissa\t%d\n", prof->adjust_mantissa);
+	str = (prof->gc_action == 0) ? "PASS" :
+		(prof->gc_action == 1) ? "DROP" : "RED";
+	seq_printf(m, "W1: gc_action\t\t%s\n", str);
+	str = (prof->yc_action == 0) ? "PASS" :
+		(prof->yc_action == 1) ? "DROP" : "RED";
+	seq_printf(m, "W1: yc_action\t\t%s\n", str);
+	str = (prof->rc_action == 0) ? "PASS" :
+		(prof->rc_action == 1) ? "DROP" : "RED";
+	seq_printf(m, "W1: rc_action\t\t%s\n", str);
+	seq_printf(m, "W1: meter_algo\t\t%d\n", prof->meter_algo);
+	seq_printf(m, "W1: band_prof_id\t%d\n", prof->band_prof_id);
+	seq_printf(m, "W1: hl_en\t\t%d\n", prof->hl_en);
+
+	seq_printf(m, "W2: ts\t\t\t%lld\n", (u64)prof->ts);
+	seq_printf(m, "W3: pe_accum\t\t%d\n", prof->pe_accum);
+	seq_printf(m, "W3: c_accum\t\t%d\n", prof->c_accum);
+	seq_printf(m, "W4: green_pkt_pass\t%lld\n",
+		   (u64)prof->green_pkt_pass);
+	seq_printf(m, "W5: yellow_pkt_pass\t%lld\n",
+		   (u64)prof->yellow_pkt_pass);
+	seq_printf(m, "W6: red_pkt_pass\t%lld\n", (u64)prof->red_pkt_pass);
+	seq_printf(m, "W7: green_octs_pass\t%lld\n",
+		   (u64)prof->green_octs_pass);
+	seq_printf(m, "W8: yellow_octs_pass\t%lld\n",
+		   (u64)prof->yellow_octs_pass);
+	seq_printf(m, "W9: red_octs_pass\t%lld\n", (u64)prof->red_octs_pass);
+	seq_printf(m, "W10: green_pkt_drop\t%lld\n",
+		   (u64)prof->green_pkt_drop);
+	seq_printf(m, "W11: yellow_pkt_drop\t%lld\n",
+		   (u64)prof->yellow_pkt_drop);
+	seq_printf(m, "W12: red_pkt_drop\t%lld\n", (u64)prof->red_pkt_drop);
+	seq_printf(m, "W13: green_octs_drop\t%lld\n",
+		   (u64)prof->green_octs_drop);
+	seq_printf(m, "W14: yellow_octs_drop\t%lld\n",
+		   (u64)prof->yellow_octs_drop);
+	seq_printf(m, "W15: red_octs_drop\t%lld\n", (u64)prof->red_octs_drop);
+	seq_puts(m, "==============================\n");
+}
+
+static int rvu_dbg_nix_band_prof_ctx_display(struct seq_file *m, void *unused)
+{
+	struct nix_hw *nix_hw = m->private;
+	struct nix_cn10k_aq_enq_req aq_req;
+	struct nix_cn10k_aq_enq_rsp aq_rsp;
+	struct rvu *rvu = nix_hw->rvu;
+	struct nix_ipolicer *ipolicer;
+	int layer, prof_idx, idx, rc;
+	u16 pcifunc;
+	char *str;
+
+	for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+		if (layer == BAND_PROF_INVAL_LAYER)
+			continue;
+		str = (layer == BAND_PROF_LEAF_LAYER) ? "Leaf" :
+			(layer == BAND_PROF_MID_LAYER) ? "Mid" : "Top";
+
+		seq_printf(m, "\n%s bandwidth profiles\n", str);
+		seq_puts(m, "=======================\n");
+
+		ipolicer = &nix_hw->ipolicer[layer];
+
+		for (idx = 0; idx < ipolicer->band_prof.max; idx++) {
+			if (is_rsrc_free(&ipolicer->band_prof, idx))
+				continue;
+
+			prof_idx = (idx & 0x3FFF) | (layer << 14);
+			rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp,
+						 0x00, NIX_AQ_CTYPE_BANDPROF,
+						 prof_idx);
+			if (rc) {
+				dev_err(rvu->dev,
+					"%s: Failed to fetch context of %s profile %d, err %d\n",
+					__func__, str, idx, rc);
+				return 0;
+			}
+			seq_printf(m, "\n%s bandwidth profile:: %d\n", str, idx);
+			pcifunc = ipolicer->pfvf_map[idx];
+			if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+				seq_printf(m, "Allocated to :: PF %d\n",
+					   rvu_get_pf(pcifunc));
+			else
+				seq_printf(m, "Allocated to :: PF %d VF %d\n",
+					   rvu_get_pf(pcifunc),
+					   (pcifunc & RVU_PFVF_FUNC_MASK) - 1);
+			print_band_prof_ctx(m, &aq_rsp.prof);
+		}
+	}
+	return 0;
+}
+
+RVU_DEBUG_SEQ_FOPS(nix_band_prof_ctx, nix_band_prof_ctx_display, NULL);
+
+static int rvu_dbg_nix_band_prof_rsrc_display(struct seq_file *m, void *unused)
+{
+	struct nix_hw *nix_hw = m->private;
+	struct nix_ipolicer *ipolicer;
+	int layer;
+	char *str;
+
+	seq_puts(m, "\nBandwidth profile resource free count\n");
+	seq_puts(m, "=====================================\n");
+	for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+		if (layer == BAND_PROF_INVAL_LAYER)
+			continue;
+		str = (layer == BAND_PROF_LEAF_LAYER) ? "Leaf" :
+			(layer == BAND_PROF_MID_LAYER) ? "Mid " : "Top ";
+
+		ipolicer = &nix_hw->ipolicer[layer];
+		seq_printf(m, "%s :: Max: %4d  Free: %4d\n", str,
+			   ipolicer->band_prof.max,
+			   rvu_rsrc_free_count(&ipolicer->band_prof));
+	}
+	seq_puts(m, "=====================================\n");
+
+	return 0;
+}
+
+RVU_DEBUG_SEQ_FOPS(nix_band_prof_rsrc, nix_band_prof_rsrc_display, NULL);
+
 static void rvu_dbg_nix_init(struct rvu *rvu, int blkaddr)
 {
 	struct nix_hw *nix_hw;
@@ -1664,6 +1823,10 @@ static void rvu_dbg_nix_init(struct rvu *rvu, int blkaddr)
 			    &rvu_dbg_nix_ndc_rx_hits_miss_fops);
 	debugfs_create_file("qsize", 0600, rvu->rvu_dbg.nix, rvu,
 			    &rvu_dbg_nix_qsize_fops);
+	debugfs_create_file("ingress_policer_ctx", 0600, rvu->rvu_dbg.nix, nix_hw,
+			    &rvu_dbg_nix_band_prof_ctx_fops);
+	debugfs_create_file("ingress_policer_rsrc", 0600, rvu->rvu_dbg.nix, nix_hw,
+			    &rvu_dbg_nix_band_prof_rsrc_fops);
 }
 
 static void rvu_dbg_npa_init(struct rvu *rvu)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index ebd73a8856f2..d6f8210652c5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -4365,10 +4365,10 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu,
 	return 0;
 }
 
-static int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
-			       struct nix_cn10k_aq_enq_req *aq_req,
-			       struct nix_cn10k_aq_enq_rsp *aq_rsp,
-			       u16 pcifunc, u8 ctype, u32 qidx)
+int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
+			struct nix_cn10k_aq_enq_req *aq_req,
+			struct nix_cn10k_aq_enq_rsp *aq_rsp,
+			u16 pcifunc, u8 ctype, u32 qidx)
 {
 	memset(aq_req, 0, sizeof(struct nix_cn10k_aq_enq_req));
 	aq_req->hdr.pcifunc = pcifunc;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index 8fb002d05219..14aa8e37ea41 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -679,6 +679,13 @@ enum NIX_RX_BAND_PROF_ACTIONRESULT_E {
 	NIX_RX_BAND_PROF_ACTIONRESULT_RED = 0x2,
 };
 
+enum nix_band_prof_pc_mode {
+	NIX_RX_PC_MODE_VLAN = 0,
+	NIX_RX_PC_MODE_DSCP = 1,
+	NIX_RX_PC_MODE_GEN = 2,
+	NIX_RX_PC_MODE_RSVD = 3,
+};
+
 /* NIX ingress policer bandwidth profile structure */
 struct nix_bandprof_s {
 	uint64_t pc_mode                     :  2; /* W0 */

From 2ca89a2c37527221edc549ffd3b65c6f8d9d4088 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Tue, 15 Jun 2021 17:04:29 +0530
Subject: [PATCH 1837/3292] octeontx2-pf: TC_MATCHALL ingress ratelimiting
 offload

Add TC_MATCHALL ingress ratelimiting offload support with POLICE
action for entire traffic coming into the interface.

Eg: To ratelimit ingress traffic to 100Mbps

$ ethtool -K eth0 hw-tc-offload on
$ tc qdisc add dev eth0 clsact
$ tc filter add dev eth0 ingress matchall skip_sw \
                action police rate 100Mbit burst 32Kbit

To support this, a leaf level bandwidth profile is allocated and all
RQs' contexts used by this interface are updated to point to it.
And the leaf level bandwidth profile is configured with user specified
rate and burst sizes.

Co-developed-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/nic/cn10k.c    | 323 ++++++++++++++++++
 .../ethernet/marvell/octeontx2/nic/cn10k.h    |  11 +
 .../marvell/octeontx2/nic/otx2_common.h       |   2 +
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |   3 +
 .../ethernet/marvell/octeontx2/nic/otx2_tc.c  |  84 +++++
 5 files changed, 423 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index 9ec0313f13fc..1b08896b46d2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -179,3 +179,326 @@ void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx)
 	sq->head++;
 	sq->head &= (sq->sqe_cnt - 1);
 }
+
+int cn10k_free_all_ipolicers(struct otx2_nic *pfvf)
+{
+	struct nix_bandprof_free_req *req;
+	int rc;
+
+	if (is_dev_otx2(pfvf->pdev))
+		return 0;
+
+	mutex_lock(&pfvf->mbox.lock);
+
+	req = otx2_mbox_alloc_msg_nix_bandprof_free(&pfvf->mbox);
+	if (!req) {
+		rc =  -ENOMEM;
+		goto out;
+	}
+
+	/* Free all bandwidth profiles allocated */
+	req->free_all = true;
+
+	rc = otx2_sync_mbox_msg(&pfvf->mbox);
+out:
+	mutex_unlock(&pfvf->mbox.lock);
+	return rc;
+}
+
+int cn10k_alloc_leaf_profile(struct otx2_nic *pfvf, u16 *leaf)
+{
+	struct nix_bandprof_alloc_req *req;
+	struct nix_bandprof_alloc_rsp *rsp;
+	int rc;
+
+	req = otx2_mbox_alloc_msg_nix_bandprof_alloc(&pfvf->mbox);
+	if (!req)
+		return  -ENOMEM;
+
+	req->prof_count[BAND_PROF_LEAF_LAYER] = 1;
+
+	rc = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (rc)
+		goto out;
+
+	rsp = (struct  nix_bandprof_alloc_rsp *)
+	       otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+	if (!rsp->prof_count[BAND_PROF_LEAF_LAYER]) {
+		rc = -EIO;
+		goto out;
+	}
+
+	*leaf = rsp->prof_idx[BAND_PROF_LEAF_LAYER][0];
+out:
+	if (rc) {
+		dev_warn(pfvf->dev,
+			 "Failed to allocate ingress bandwidth policer\n");
+	}
+
+	return rc;
+}
+
+int cn10k_alloc_matchall_ipolicer(struct otx2_nic *pfvf)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	int ret;
+
+	mutex_lock(&pfvf->mbox.lock);
+
+	ret = cn10k_alloc_leaf_profile(pfvf, &hw->matchall_ipolicer);
+
+	mutex_unlock(&pfvf->mbox.lock);
+
+	return ret;
+}
+
+#define POLICER_TIMESTAMP	  1  /* 1 second */
+#define MAX_RATE_EXP		  22 /* Valid rate exponent range: 0 - 22 */
+
+static void cn10k_get_ingress_burst_cfg(u32 burst, u32 *burst_exp,
+					u32 *burst_mantissa)
+{
+	int tmp;
+
+	/* Burst is calculated as
+	 * (1+[BURST_MANTISSA]/256)*2^[BURST_EXPONENT]
+	 * This is the upper limit on number tokens (bytes) that
+	 * can be accumulated in the bucket.
+	 */
+	*burst_exp = ilog2(burst);
+	if (burst < 256) {
+		/* No float: can't express mantissa in this case */
+		*burst_mantissa = 0;
+		return;
+	}
+
+	if (*burst_exp > MAX_RATE_EXP)
+		*burst_exp = MAX_RATE_EXP;
+
+	/* Calculate mantissa
+	 * Find remaining bytes 'burst - 2^burst_exp'
+	 * mantissa = (remaining bytes) / 2^ (burst_exp - 8)
+	 */
+	tmp = burst - rounddown_pow_of_two(burst);
+	*burst_mantissa = tmp / (1UL << (*burst_exp - 8));
+}
+
+static void cn10k_get_ingress_rate_cfg(u64 rate, u32 *rate_exp,
+				       u32 *rate_mantissa, u32 *rdiv)
+{
+	u32 div = 0;
+	u32 exp = 0;
+	u64 tmp;
+
+	/* Figure out mantissa, exponent and divider from given max pkt rate
+	 *
+	 * To achieve desired rate HW adds
+	 * (1+[RATE_MANTISSA]/256)*2^[RATE_EXPONENT] tokens (bytes) at every
+	 * policer timeunit * 2^rdiv ie 2 * 2^rdiv usecs, to the token bucket.
+	 * Here policer timeunit is 2 usecs and rate is in bits per sec.
+	 * Since floating point cannot be used below algorithm uses 1000000
+	 * scale factor to support rates upto 100Gbps.
+	 */
+	tmp = rate * 32 * 2;
+	if (tmp < 256000000) {
+		while (tmp < 256000000) {
+			tmp = tmp * 2;
+			div++;
+		}
+	} else {
+		for (exp = 0; tmp >= 512000000 && exp <= MAX_RATE_EXP; exp++)
+			tmp = tmp / 2;
+
+		if (exp > MAX_RATE_EXP)
+			exp = MAX_RATE_EXP;
+	}
+
+	*rate_mantissa = (tmp - 256000000) / 1000000;
+	*rate_exp = exp;
+	*rdiv = div;
+}
+
+int cn10k_map_unmap_rq_policer(struct otx2_nic *pfvf, int rq_idx,
+			       u16 policer, bool map)
+{
+	struct nix_cn10k_aq_enq_req *aq;
+
+	aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	/* Enable policing and set the bandwidth profile (policer) index */
+	if (map)
+		aq->rq.policer_ena = 1;
+	else
+		aq->rq.policer_ena = 0;
+	aq->rq_mask.policer_ena = 1;
+
+	aq->rq.band_prof_id = policer;
+	aq->rq_mask.band_prof_id = GENMASK(9, 0);
+
+	/* Fill AQ info */
+	aq->qidx = rq_idx;
+	aq->ctype = NIX_AQ_CTYPE_RQ;
+	aq->op = NIX_AQ_INSTOP_WRITE;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int cn10k_free_leaf_profile(struct otx2_nic *pfvf, u16 leaf)
+{
+	struct nix_bandprof_free_req *req;
+
+	req = otx2_mbox_alloc_msg_nix_bandprof_free(&pfvf->mbox);
+	if (!req)
+		return -ENOMEM;
+
+	req->prof_count[BAND_PROF_LEAF_LAYER] = 1;
+	req->prof_idx[BAND_PROF_LEAF_LAYER][0] = leaf;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int cn10k_free_matchall_ipolicer(struct otx2_nic *pfvf)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	int qidx, rc;
+
+	mutex_lock(&pfvf->mbox.lock);
+
+	/* Remove RQ's policer mapping */
+	for (qidx = 0; qidx < hw->rx_queues; qidx++)
+		cn10k_map_unmap_rq_policer(pfvf, qidx,
+					   hw->matchall_ipolicer, false);
+
+	rc = cn10k_free_leaf_profile(pfvf, hw->matchall_ipolicer);
+
+	mutex_unlock(&pfvf->mbox.lock);
+	return rc;
+}
+
+int cn10k_set_ipolicer_rate(struct otx2_nic *pfvf, u16 profile,
+			    u32 burst, u64 rate, bool pps)
+{
+	struct nix_cn10k_aq_enq_req *aq;
+	u32 burst_exp, burst_mantissa;
+	u32 rate_exp, rate_mantissa;
+	u32 rdiv;
+
+	/* Get exponent and mantissa values for the desired rate */
+	cn10k_get_ingress_burst_cfg(burst, &burst_exp, &burst_mantissa);
+	cn10k_get_ingress_rate_cfg(rate, &rate_exp, &rate_mantissa, &rdiv);
+
+	/* Init bandwidth profile */
+	aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	/* Set initial color mode to blind */
+	aq->prof.icolor = 0x03;
+	aq->prof_mask.icolor = 0x03;
+
+	/* Set rate and burst values */
+	aq->prof.cir_exponent = rate_exp;
+	aq->prof_mask.cir_exponent = 0x1F;
+
+	aq->prof.cir_mantissa = rate_mantissa;
+	aq->prof_mask.cir_mantissa = 0xFF;
+
+	aq->prof.cbs_exponent = burst_exp;
+	aq->prof_mask.cbs_exponent = 0x1F;
+
+	aq->prof.cbs_mantissa = burst_mantissa;
+	aq->prof_mask.cbs_mantissa = 0xFF;
+
+	aq->prof.rdiv = rdiv;
+	aq->prof_mask.rdiv = 0xF;
+
+	if (pps) {
+		/* The amount of decremented tokens is calculated according to
+		 * the following equation:
+		 * max([ LMODE ? 0 : (packet_length - LXPTR)] +
+		 *	     ([ADJUST_MANTISSA]/256 - 1) * 2^[ADJUST_EXPONENT],
+		 *	1/256)
+		 * if LMODE is 1 then rate limiting will be based on
+		 * PPS otherwise bps.
+		 * The aim of the ADJUST value is to specify a token cost per
+		 * packet in contrary to the packet length that specifies a
+		 * cost per byte. To rate limit based on PPS adjust mantissa
+		 * is set as 384 and exponent as 1 so that number of tokens
+		 * decremented becomes 1 i.e, 1 token per packeet.
+		 */
+		aq->prof.adjust_exponent = 1;
+		aq->prof_mask.adjust_exponent = 0x1F;
+
+		aq->prof.adjust_mantissa = 384;
+		aq->prof_mask.adjust_mantissa = 0x1FF;
+
+		aq->prof.lmode = 0x1;
+		aq->prof_mask.lmode = 0x1;
+	}
+
+	/* Two rate three color marker
+	 * With PEIR/EIR set to zero, color will be either green or red
+	 */
+	aq->prof.meter_algo = 2;
+	aq->prof_mask.meter_algo = 0x3;
+
+	aq->prof.rc_action = NIX_RX_BAND_PROF_ACTIONRESULT_DROP;
+	aq->prof_mask.rc_action = 0x3;
+
+	aq->prof.yc_action = NIX_RX_BAND_PROF_ACTIONRESULT_PASS;
+	aq->prof_mask.yc_action = 0x3;
+
+	aq->prof.gc_action = NIX_RX_BAND_PROF_ACTIONRESULT_PASS;
+	aq->prof_mask.gc_action = 0x3;
+
+	/* Setting exponent value as 24 and mantissa as 0 configures
+	 * the bucket with zero values making bucket unused. Peak
+	 * information rate and Excess information rate buckets are
+	 * unused here.
+	 */
+	aq->prof.peir_exponent = 24;
+	aq->prof_mask.peir_exponent = 0x1F;
+
+	aq->prof.peir_mantissa = 0;
+	aq->prof_mask.peir_mantissa = 0xFF;
+
+	aq->prof.pebs_exponent = 24;
+	aq->prof_mask.pebs_exponent = 0x1F;
+
+	aq->prof.pebs_mantissa = 0;
+	aq->prof_mask.pebs_mantissa = 0xFF;
+
+	/* Fill AQ info */
+	aq->qidx = profile;
+	aq->ctype = NIX_AQ_CTYPE_BANDPROF;
+	aq->op = NIX_AQ_INSTOP_WRITE;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int cn10k_set_matchall_ipolicer_rate(struct otx2_nic *pfvf,
+				     u32 burst, u64 rate)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	int qidx, rc;
+
+	mutex_lock(&pfvf->mbox.lock);
+
+	rc = cn10k_set_ipolicer_rate(pfvf, hw->matchall_ipolicer, burst,
+				     rate, false);
+	if (rc)
+		goto out;
+
+	for (qidx = 0; qidx < hw->rx_queues; qidx++) {
+		rc = cn10k_map_unmap_rq_policer(pfvf, qidx,
+						hw->matchall_ipolicer, true);
+		if (rc)
+			break;
+	}
+
+out:
+	mutex_unlock(&pfvf->mbox.lock);
+	return rc;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
index e0bc595cbb78..71292a4cf1f3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
@@ -14,4 +14,15 @@ void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
 int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
 int cn10k_pf_lmtst_init(struct otx2_nic *pf);
 int cn10k_vf_lmtst_init(struct otx2_nic *vf);
+int cn10k_free_all_ipolicers(struct otx2_nic *pfvf);
+int cn10k_alloc_matchall_ipolicer(struct otx2_nic *pfvf);
+int cn10k_free_matchall_ipolicer(struct otx2_nic *pfvf);
+int cn10k_set_matchall_ipolicer_rate(struct otx2_nic *pfvf,
+				     u32 burst, u64 rate);
+int cn10k_map_unmap_rq_policer(struct otx2_nic *pfvf, int rq_idx,
+			       u16 policer, bool map);
+int cn10k_alloc_leaf_profile(struct otx2_nic *pfvf, u16 *leaf);
+int cn10k_set_ipolicer_rate(struct otx2_nic *pfvf, u16 profile,
+			    u32 burst, u64 rate, bool pps);
+int cn10k_free_leaf_profile(struct otx2_nic *pfvf, u16 leaf);
 #endif /* CN10K_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index e5616d466236..e0a3e28f5568 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -180,6 +180,7 @@ struct otx2_hw {
 
 	/* NIX */
 	u16		txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+	u16			matchall_ipolicer;
 
 	/* HW settings, coalescing etc */
 	u16			rx_chan_base;
@@ -327,6 +328,7 @@ struct otx2_nic {
 #define OTX2_FLAG_TX_PAUSE_ENABLED		BIT_ULL(10)
 #define OTX2_FLAG_TC_FLOWER_SUPPORT		BIT_ULL(11)
 #define OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED	BIT_ULL(12)
+#define OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED	BIT_ULL(13)
 	u64			flags;
 
 	struct otx2_qset	qset;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 65f505b07b5d..59912f73417b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1461,6 +1461,9 @@ static void otx2_free_hw_resources(struct otx2_nic *pf)
 
 	otx2_free_cq_res(pf);
 
+	/* Free all ingress bandwidth profiles allocated */
+	cn10k_free_all_ipolicers(pf);
+
 	mutex_lock(&mbox->lock);
 	/* Reset NIX LF */
 	free_req = otx2_mbox_alloc_msg_nix_lf_free(mbox);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 26712c091c63..5767fa4ef205 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -15,6 +15,7 @@
 #include <net/tc_act/tc_vlan.h>
 #include <net/ipv6.h>
 
+#include "cn10k.h"
 #include "otx2_common.h"
 
 /* Egress rate limiting definitions */
@@ -675,6 +676,87 @@ static int otx2_setup_tc_cls_flower(struct otx2_nic *nic,
 	}
 }
 
+static int otx2_tc_ingress_matchall_install(struct otx2_nic *nic,
+					    struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct flow_action *actions = &cls->rule->action;
+	struct flow_action_entry *entry;
+	u64 rate;
+	int err;
+
+	err = otx2_tc_validate_flow(nic, actions, extack);
+	if (err)
+		return err;
+
+	if (nic->flags & OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only one ingress MATCHALL ratelimitter can be offloaded");
+		return -ENOMEM;
+	}
+
+	entry = &cls->rule->action.entries[0];
+	switch (entry->id) {
+	case FLOW_ACTION_POLICE:
+		/* Ingress ratelimiting is not supported on OcteonTx2 */
+		if (is_dev_otx2(nic->pdev)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Ingress policing not supported on this platform");
+			return -EOPNOTSUPP;
+		}
+
+		err = cn10k_alloc_matchall_ipolicer(nic);
+		if (err)
+			return err;
+
+		/* Convert to bits per second */
+		rate = entry->police.rate_bytes_ps * 8;
+		err = cn10k_set_matchall_ipolicer_rate(nic, entry->police.burst, rate);
+		if (err)
+			return err;
+		nic->flags |= OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only police action supported with Ingress MATCHALL offload");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int otx2_tc_ingress_matchall_delete(struct otx2_nic *nic,
+					   struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	int err;
+
+	if (nic->flags & OTX2_FLAG_INTF_DOWN) {
+		NL_SET_ERR_MSG_MOD(extack, "Interface not initialized");
+		return -EINVAL;
+	}
+
+	err = cn10k_free_matchall_ipolicer(nic);
+	nic->flags &= ~OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED;
+	return err;
+}
+
+static int otx2_setup_tc_ingress_matchall(struct otx2_nic *nic,
+					  struct tc_cls_matchall_offload *cls_matchall)
+{
+	switch (cls_matchall->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return otx2_tc_ingress_matchall_install(nic, cls_matchall);
+	case TC_CLSMATCHALL_DESTROY:
+		return otx2_tc_ingress_matchall_delete(nic, cls_matchall);
+	case TC_CLSMATCHALL_STATS:
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static int otx2_setup_tc_block_ingress_cb(enum tc_setup_type type,
 					  void *type_data, void *cb_priv)
 {
@@ -686,6 +768,8 @@ static int otx2_setup_tc_block_ingress_cb(enum tc_setup_type type,
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return otx2_setup_tc_cls_flower(nic, type_data);
+	case TC_SETUP_CLSMATCHALL:
+		return otx2_setup_tc_ingress_matchall(nic, type_data);
 	default:
 		break;
 	}

From 5d2fdd86d517350c4fc903a5a69a562a4b0084e4 Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Tue, 15 Jun 2021 17:04:30 +0530
Subject: [PATCH 1838/3292] octeontx2-pf: Use NL_SET_ERR_MSG_MOD for TC

This patch modifies all netdev_err messages in
tc code to NL_SET_ERR_MSG_MOD. NL_SET_ERR_MSG_MOD
does not support format specifiers yet hence
netdev_err messages with only strings are modified.

Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/nic/otx2_tc.c  | 29 +++++++++++--------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 5767fa4ef205..adc307871cdf 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -223,15 +223,17 @@ static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic,
 
 static int otx2_tc_parse_actions(struct otx2_nic *nic,
 				 struct flow_action *flow_action,
-				 struct npc_install_flow_req *req)
+				 struct npc_install_flow_req *req,
+				 struct flow_cls_offload *f)
 {
+	struct netlink_ext_ack *extack = f->common.extack;
 	struct flow_action_entry *act;
 	struct net_device *target;
 	struct otx2_nic *priv;
 	int i;
 
 	if (!flow_action_has_entries(flow_action)) {
-		netdev_info(nic->netdev, "no tc actions specified");
+		NL_SET_ERR_MSG_MOD(extack, "no tc actions specified");
 		return -EINVAL;
 	}
 
@@ -248,8 +250,8 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic,
 			priv = netdev_priv(target);
 			/* npc_install_flow_req doesn't support passing a target pcifunc */
 			if (rvu_get_pf(nic->pcifunc) != rvu_get_pf(priv->pcifunc)) {
-				netdev_info(nic->netdev,
-					    "can't redirect to other pf/vf\n");
+				NL_SET_ERR_MSG_MOD(extack,
+						   "can't redirect to other pf/vf");
 				return -EOPNOTSUPP;
 			}
 			req->vf = priv->pcifunc & RVU_PFVF_FUNC_MASK;
@@ -272,6 +274,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic,
 				struct flow_cls_offload *f,
 				struct npc_install_flow_req *req)
 {
+	struct netlink_ext_ack *extack = f->common.extack;
 	struct flow_msg *flow_spec = &req->packet;
 	struct flow_msg *flow_mask = &req->mask;
 	struct flow_dissector *dissector;
@@ -336,7 +339,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic,
 
 		flow_rule_match_eth_addrs(rule, &match);
 		if (!is_zero_ether_addr(match.mask->src)) {
-			netdev_err(nic->netdev, "src mac match not supported\n");
+			NL_SET_ERR_MSG_MOD(extack, "src mac match not supported");
 			return -EOPNOTSUPP;
 		}
 
@@ -354,11 +357,11 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic,
 		flow_rule_match_ip(rule, &match);
 		if ((ntohs(flow_spec->etype) != ETH_P_IP) &&
 		    match.mask->tos) {
-			netdev_err(nic->netdev, "tos not supported\n");
+			NL_SET_ERR_MSG_MOD(extack, "tos not supported");
 			return -EOPNOTSUPP;
 		}
 		if (match.mask->ttl) {
-			netdev_err(nic->netdev, "ttl not supported\n");
+			NL_SET_ERR_MSG_MOD(extack, "ttl not supported");
 			return -EOPNOTSUPP;
 		}
 		flow_spec->tos = match.key->tos;
@@ -414,8 +417,8 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic,
 
 		if (ipv6_addr_loopback(&match.key->dst) ||
 		    ipv6_addr_loopback(&match.key->src)) {
-			netdev_err(nic->netdev,
-				   "Flow matching on IPv6 loopback addr is not supported\n");
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Flow matching IPv6 loopback addr not supported");
 			return -EOPNOTSUPP;
 		}
 
@@ -464,7 +467,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic,
 			req->features |= BIT_ULL(NPC_SPORT_SCTP);
 	}
 
-	return otx2_tc_parse_actions(nic, &rule->action, req);
+	return otx2_tc_parse_actions(nic, &rule->action, req, f);
 }
 
 static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
@@ -525,6 +528,7 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
 static int otx2_tc_add_flow(struct otx2_nic *nic,
 			    struct flow_cls_offload *tc_flow_cmd)
 {
+	struct netlink_ext_ack *extack = tc_flow_cmd->common.extack;
 	struct otx2_tc_info *tc_info = &nic->tc_info;
 	struct otx2_tc_flow *new_node, *old_node;
 	struct npc_install_flow_req *req;
@@ -562,7 +566,8 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 		otx2_tc_del_flow(nic, tc_flow_cmd);
 
 	if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) {
-		netdev_err(nic->netdev, "Not enough MCAM space to add the flow\n");
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Not enough MCAM space to add the flow");
 		otx2_mbox_reset(&nic->mbox.mbox, 0);
 		mutex_unlock(&nic->mbox.lock);
 		return -ENOMEM;
@@ -580,7 +585,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	/* Send message to AF */
 	rc = otx2_sync_mbox_msg(&nic->mbox);
 	if (rc) {
-		netdev_err(nic->netdev, "Failed to install MCAM flow entry\n");
+		NL_SET_ERR_MSG_MOD(extack, "Failed to install MCAM flow entry");
 		mutex_unlock(&nic->mbox.lock);
 		goto out;
 	}

From 68fbff68dbea35f9e6f7649dd22fce492a5aedac Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Tue, 15 Jun 2021 17:04:31 +0530
Subject: [PATCH 1839/3292] octeontx2-pf: Add police action for TC flower

Added police action for ingress TC flower
hardware offload. With this rate limiting can be
done per flow. Since rate limiting is tied to
RQs in hardware the number of TC flower filters
with action as police is limited to number
of receive queues of the interface. Both bps
and pps modes are supported.

Examples to rate limit a flow:
$ ethtool -K eth0 hw-tc-offload on
$ tc qdisc add dev eth0 ingress
$ tc filter add dev eth0 parent ffff: protocol ip \
  flower ip_proto udp dst_port 80 action \
  police rate 100Mbit burst 32Kbit

$ tc filter add dev eth0 parent ffff: \
  protocol ip flower dst_mac 5e:b2:34:ee:29:49 \
  action police pkts_rate 5000 pkts_burst 2048

Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../marvell/octeontx2/nic/otx2_common.h       |   1 +
 .../marvell/octeontx2/nic/otx2_ethtool.c      |   6 +
 .../ethernet/marvell/octeontx2/nic/otx2_tc.c  | 194 +++++++++++++++---
 3 files changed, 178 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index e0a3e28f5568..234b330f3183 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -372,6 +372,7 @@ struct otx2_nic {
 
 	struct otx2_flow_config	*flow_cfg;
 	struct otx2_tc_info	tc_info;
+	unsigned long		rq_bmap;
 };
 
 static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 9d9a2e438acf..8df748e0677b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -286,6 +286,12 @@ static int otx2_set_channels(struct net_device *dev,
 	if (!channel->rx_count || !channel->tx_count)
 		return -EINVAL;
 
+	if (bitmap_weight(&pfvf->rq_bmap, pfvf->hw.rx_queues) > 1) {
+		netdev_err(dev,
+			   "Receive queues are in use by TC police action\n");
+		return -EINVAL;
+	}
+
 	if (if_up)
 		dev->netdev_ops->ndo_stop(dev);
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index adc307871cdf..905fc02a7dfe 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -42,11 +42,14 @@ struct otx2_tc_flow_stats {
 struct otx2_tc_flow {
 	struct rhash_head		node;
 	unsigned long			cookie;
-	u16				entry;
 	unsigned int			bitpos;
 	struct rcu_head			rcu;
 	struct otx2_tc_flow_stats	stats;
 	spinlock_t			lock; /* lock for stats */
+	u16				rq;
+	u16				entry;
+	u16				leaf_profile;
+	bool				is_act_police;
 };
 
 static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
@@ -221,15 +224,72 @@ static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic,
 	return err;
 }
 
+static int otx2_tc_act_set_police(struct otx2_nic *nic,
+				  struct otx2_tc_flow *node,
+				  struct flow_cls_offload *f,
+				  u64 rate, u32 burst, u32 mark,
+				  struct npc_install_flow_req *req, bool pps)
+{
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct otx2_hw *hw = &nic->hw;
+	int rq_idx, rc;
+
+	rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues);
+	if (rq_idx >= hw->rx_queues) {
+		NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded");
+		return -EINVAL;
+	}
+
+	mutex_lock(&nic->mbox.lock);
+
+	rc = cn10k_alloc_leaf_profile(nic, &node->leaf_profile);
+	if (rc) {
+		mutex_unlock(&nic->mbox.lock);
+		return rc;
+	}
+
+	rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile, burst, rate, pps);
+	if (rc)
+		goto free_leaf;
+
+	rc = cn10k_map_unmap_rq_policer(nic, rq_idx, node->leaf_profile, true);
+	if (rc)
+		goto free_leaf;
+
+	mutex_unlock(&nic->mbox.lock);
+
+	req->match_id = mark & 0xFFFFULL;
+	req->index = rq_idx;
+	req->op = NIX_RX_ACTIONOP_UCAST;
+	set_bit(rq_idx, &nic->rq_bmap);
+	node->is_act_police = true;
+	node->rq = rq_idx;
+
+	return 0;
+
+free_leaf:
+	if (cn10k_free_leaf_profile(nic, node->leaf_profile))
+		netdev_err(nic->netdev,
+			   "Unable to free leaf bandwidth profile(%d)\n",
+			   node->leaf_profile);
+	mutex_unlock(&nic->mbox.lock);
+	return rc;
+}
+
 static int otx2_tc_parse_actions(struct otx2_nic *nic,
 				 struct flow_action *flow_action,
 				 struct npc_install_flow_req *req,
-				 struct flow_cls_offload *f)
+				 struct flow_cls_offload *f,
+				 struct otx2_tc_flow *node)
 {
 	struct netlink_ext_ack *extack = f->common.extack;
 	struct flow_action_entry *act;
 	struct net_device *target;
 	struct otx2_nic *priv;
+	u32 burst, mark = 0;
+	u8 nr_police = 0;
+	bool pps;
+	u64 rate;
 	int i;
 
 	if (!flow_action_has_entries(flow_action)) {
@@ -262,15 +322,51 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic,
 			/* use RX_VTAG_TYPE7 which is initialized to strip vlan tag */
 			req->vtag0_type = NIX_AF_LFX_RX_VTAG_TYPE7;
 			break;
+		case FLOW_ACTION_POLICE:
+			/* Ingress ratelimiting is not supported on OcteonTx2 */
+			if (is_dev_otx2(nic->pdev)) {
+				NL_SET_ERR_MSG_MOD(extack,
+					"Ingress policing not supported on this platform");
+				return -EOPNOTSUPP;
+			}
+
+			if (act->police.rate_bytes_ps > 0) {
+				rate = act->police.rate_bytes_ps * 8;
+				burst = act->police.burst;
+			} else if (act->police.rate_pkt_ps > 0) {
+				/* The algorithm used to calculate rate
+				 * mantissa, exponent values for a given token
+				 * rate (token can be byte or packet) requires
+				 * token rate to be mutiplied by 8.
+				 */
+				rate = act->police.rate_pkt_ps * 8;
+				burst = act->police.burst_pkt;
+				pps = true;
+			}
+			nr_police++;
+			break;
+		case FLOW_ACTION_MARK:
+			mark = act->mark;
+			break;
 		default:
 			return -EOPNOTSUPP;
 		}
 	}
 
+	if (nr_police > 1) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "rate limit police offload requires a single action");
+		return -EOPNOTSUPP;
+	}
+
+	if (nr_police)
+		return otx2_tc_act_set_police(nic, node, f, rate, burst,
+					      mark, req, pps);
+
 	return 0;
 }
 
-static int otx2_tc_prepare_flow(struct otx2_nic *nic,
+static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
 				struct flow_cls_offload *f,
 				struct npc_install_flow_req *req)
 {
@@ -467,7 +563,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic,
 			req->features |= BIT_ULL(NPC_SPORT_SCTP);
 	}
 
-	return otx2_tc_parse_actions(nic, &rule->action, req, f);
+	return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
 }
 
 static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
@@ -502,6 +598,7 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
 {
 	struct otx2_tc_info *tc_info = &nic->tc_info;
 	struct otx2_tc_flow *flow_node;
+	int err;
 
 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
 					   &tc_flow_cmd->cookie,
@@ -512,6 +609,27 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
 		return -EINVAL;
 	}
 
+	if (flow_node->is_act_police) {
+		mutex_lock(&nic->mbox.lock);
+
+		err = cn10k_map_unmap_rq_policer(nic, flow_node->rq,
+						 flow_node->leaf_profile, false);
+		if (err)
+			netdev_err(nic->netdev,
+				   "Unmapping RQ %d & profile %d failed\n",
+				   flow_node->rq, flow_node->leaf_profile);
+
+		err = cn10k_free_leaf_profile(nic, flow_node->leaf_profile);
+		if (err)
+			netdev_err(nic->netdev,
+				   "Unable to free leaf bandwidth profile(%d)\n",
+				   flow_node->leaf_profile);
+
+		__clear_bit(flow_node->rq, &nic->rq_bmap);
+
+		mutex_unlock(&nic->mbox.lock);
+	}
+
 	otx2_del_mcam_flow_entry(nic, flow_node->entry);
 
 	WARN_ON(rhashtable_remove_fast(&nic->tc_info.flow_table,
@@ -531,12 +649,18 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	struct netlink_ext_ack *extack = tc_flow_cmd->common.extack;
 	struct otx2_tc_info *tc_info = &nic->tc_info;
 	struct otx2_tc_flow *new_node, *old_node;
-	struct npc_install_flow_req *req;
-	int rc;
+	struct npc_install_flow_req *req, dummy;
+	int rc, err;
 
 	if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
 		return -ENOMEM;
 
+	if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Not enough MCAM space to add the flow");
+		return -ENOMEM;
+	}
+
 	/* allocate memory for the new flow and it's node */
 	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
 	if (!new_node)
@@ -544,17 +668,11 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	spin_lock_init(&new_node->lock);
 	new_node->cookie = tc_flow_cmd->cookie;
 
-	mutex_lock(&nic->mbox.lock);
-	req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
-	if (!req) {
-		mutex_unlock(&nic->mbox.lock);
-		return -ENOMEM;
-	}
+	memset(&dummy, 0, sizeof(struct npc_install_flow_req));
 
-	rc = otx2_tc_prepare_flow(nic, tc_flow_cmd, req);
+	rc = otx2_tc_prepare_flow(nic, new_node, tc_flow_cmd, &dummy);
 	if (rc) {
-		otx2_mbox_reset(&nic->mbox.mbox, 0);
-		mutex_unlock(&nic->mbox.lock);
+		kfree_rcu(new_node, rcu);
 		return rc;
 	}
 
@@ -565,14 +683,17 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	if (old_node)
 		otx2_tc_del_flow(nic, tc_flow_cmd);
 
-	if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) {
-		NL_SET_ERR_MSG_MOD(extack,
-				   "Not enough MCAM space to add the flow");
-		otx2_mbox_reset(&nic->mbox.mbox, 0);
+	mutex_lock(&nic->mbox.lock);
+	req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
+	if (!req) {
 		mutex_unlock(&nic->mbox.lock);
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto free_leaf;
 	}
 
+	memcpy(&dummy.hdr, &req->hdr, sizeof(struct mbox_msghdr));
+	memcpy(req, &dummy, sizeof(struct npc_install_flow_req));
+
 	new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap,
 					       nic->flow_cfg->tc_max_flows);
 	req->channel = nic->hw.rx_chan_base;
@@ -587,7 +708,8 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	if (rc) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to install MCAM flow entry");
 		mutex_unlock(&nic->mbox.lock);
-		goto out;
+		kfree_rcu(new_node, rcu);
+		goto free_leaf;
 	}
 	mutex_unlock(&nic->mbox.lock);
 
@@ -597,12 +719,35 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	if (rc) {
 		otx2_del_mcam_flow_entry(nic, req->entry);
 		kfree_rcu(new_node, rcu);
-		goto out;
+		goto free_leaf;
 	}
 
 	set_bit(new_node->bitpos, tc_info->tc_entries_bitmap);
 	tc_info->num_entries++;
-out:
+
+	return 0;
+
+free_leaf:
+	if (new_node->is_act_police) {
+		mutex_lock(&nic->mbox.lock);
+
+		err = cn10k_map_unmap_rq_policer(nic, new_node->rq,
+						 new_node->leaf_profile, false);
+		if (err)
+			netdev_err(nic->netdev,
+				   "Unmapping RQ %d & profile %d failed\n",
+				   new_node->rq, new_node->leaf_profile);
+		err = cn10k_free_leaf_profile(nic, new_node->leaf_profile);
+		if (err)
+			netdev_err(nic->netdev,
+				   "Unable to free leaf bandwidth profile(%d)\n",
+				   new_node->leaf_profile);
+
+		__clear_bit(new_node->rq, &nic->rq_bmap);
+
+		mutex_unlock(&nic->mbox.lock);
+	}
+
 	return rc;
 }
 
@@ -864,6 +1009,9 @@ int otx2_init_tc(struct otx2_nic *nic)
 {
 	struct otx2_tc_info *tc = &nic->tc_info;
 
+	/* Exclude receive queue 0 being used for police action */
+	set_bit(0, &nic->rq_bmap);
+
 	tc->flow_ht_params = tc_flow_ht_params;
 	return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
 }

From b8f6b0522c298ae9267bd6584e19b942a0636910 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Tue, 15 Jun 2021 10:14:44 +0800
Subject: [PATCH 1840/3292] netlabel: Fix memory leak in netlbl_mgmt_add_common

Hulk Robot reported memory leak in netlbl_mgmt_add_common.
The problem is non-freed map in case of netlbl_domhsh_add() failed.

BUG: memory leak
unreferenced object 0xffff888100ab7080 (size 96):
  comm "syz-executor537", pid 360, jiffies 4294862456 (age 22.678s)
  hex dump (first 32 bytes):
    05 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    fe 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01  ................
  backtrace:
    [<0000000008b40026>] netlbl_mgmt_add_common.isra.0+0xb2a/0x1b40
    [<000000003be10950>] netlbl_mgmt_add+0x271/0x3c0
    [<00000000c70487ed>] genl_family_rcv_msg_doit.isra.0+0x20e/0x320
    [<000000001f2ff614>] genl_rcv_msg+0x2bf/0x4f0
    [<0000000089045792>] netlink_rcv_skb+0x134/0x3d0
    [<0000000020e96fdd>] genl_rcv+0x24/0x40
    [<0000000042810c66>] netlink_unicast+0x4a0/0x6a0
    [<000000002e1659f0>] netlink_sendmsg+0x789/0xc70
    [<000000006e43415f>] sock_sendmsg+0x139/0x170
    [<00000000680a73d7>] ____sys_sendmsg+0x658/0x7d0
    [<0000000065cbb8af>] ___sys_sendmsg+0xf8/0x170
    [<0000000019932b6c>] __sys_sendmsg+0xd3/0x190
    [<00000000643ac172>] do_syscall_64+0x37/0x90
    [<000000009b79d6dc>] entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 63c416887437 ("netlabel: Add network address selectors to the NetLabel/LSM domain mapping")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_mgmt.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index e664ab990941..032b7d7b32c7 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -76,6 +76,7 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
 static int netlbl_mgmt_add_common(struct genl_info *info,
 				  struct netlbl_audit *audit_info)
 {
+	void *pmap = NULL;
 	int ret_val = -EINVAL;
 	struct netlbl_domaddr_map *addrmap = NULL;
 	struct cipso_v4_doi *cipsov4 = NULL;
@@ -175,6 +176,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 			ret_val = -ENOMEM;
 			goto add_free_addrmap;
 		}
+		pmap = map;
 		map->list.addr = addr->s_addr & mask->s_addr;
 		map->list.mask = mask->s_addr;
 		map->list.valid = 1;
@@ -183,10 +185,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 			map->def.cipso = cipsov4;
 
 		ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
-		if (ret_val != 0) {
-			kfree(map);
-			goto add_free_addrmap;
-		}
+		if (ret_val != 0)
+			goto add_free_map;
 
 		entry->family = AF_INET;
 		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -223,6 +223,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 			ret_val = -ENOMEM;
 			goto add_free_addrmap;
 		}
+		pmap = map;
 		map->list.addr = *addr;
 		map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
 		map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
@@ -235,10 +236,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 			map->def.calipso = calipso;
 
 		ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
-		if (ret_val != 0) {
-			kfree(map);
-			goto add_free_addrmap;
-		}
+		if (ret_val != 0)
+			goto add_free_map;
 
 		entry->family = AF_INET6;
 		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -248,10 +247,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 
 	ret_val = netlbl_domhsh_add(entry, audit_info);
 	if (ret_val != 0)
-		goto add_free_addrmap;
+		goto add_free_map;
 
 	return 0;
 
+add_free_map:
+	kfree(pmap);
 add_free_addrmap:
 	kfree(addrmap);
 add_doi_put_def:

From 848ca9182a7d25bb54955c3aab9a3a2742bf9678 Mon Sep 17 00:00:00 2001
From: Jussi Maki <joamaki@gmail.com>
Date: Tue, 15 Jun 2021 08:54:15 +0000
Subject: [PATCH 1841/3292] net: bonding: Use per-cpu rr_tx_counter

The round-robin rr_tx_counter was shared across CPUs leading to
significant cache thrashing at high packet rates. This patch switches
the round-robin packet counter to use a per-cpu variable to decide
the destination slave.

On a test with 2x100Gbit ICE nic with pktgen_sample_04_many_flows.sh
(-s 64 -t 32) the tx rate was 19.6Mpps before and 22.3Mpps after
this patch.

"perf top -e cache_misses" before:
    12.31%  [bonding]       [k] bond_xmit_roundrobin_slave_get
    10.59%  [sch_fq_codel]  [k] fq_codel_dequeue
     9.34%  [kernel]        [k] skb_release_data
after:
    15.42%  [sch_fq_codel]  [k] fq_codel_dequeue
    10.06%  [kernel]        [k] __memset
     9.12%  [kernel]        [k] skb_release_data

Signed-off-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 18 +++++++++++++++---
 include/net/bonding.h           |  2 +-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index eb79a9f05914..1d9137e77dfc 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4202,16 +4202,16 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond)
 		slave_id = prandom_u32();
 		break;
 	case 1:
-		slave_id = bond->rr_tx_counter;
+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
 		break;
 	default:
 		reciprocal_packets_per_slave =
 			bond->params.reciprocal_packets_per_slave;
-		slave_id = reciprocal_divide(bond->rr_tx_counter,
+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
+		slave_id = reciprocal_divide(slave_id,
 					     reciprocal_packets_per_slave);
 		break;
 	}
-	bond->rr_tx_counter++;
 
 	return slave_id;
 }
@@ -4852,6 +4852,9 @@ static void bond_destructor(struct net_device *bond_dev)
 
 	if (bond->wq)
 		destroy_workqueue(bond->wq);
+
+	if (bond->rr_tx_counter)
+		free_percpu(bond->rr_tx_counter);
 }
 
 void bond_setup(struct net_device *bond_dev)
@@ -5350,6 +5353,15 @@ static int bond_init(struct net_device *bond_dev)
 	if (!bond->wq)
 		return -ENOMEM;
 
+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN) {
+		bond->rr_tx_counter = alloc_percpu(u32);
+		if (!bond->rr_tx_counter) {
+			destroy_workqueue(bond->wq);
+			bond->wq = NULL;
+			return -ENOMEM;
+		}
+	}
+
 	spin_lock_init(&bond->stats_lock);
 	netdev_lockdep_set_classes(bond_dev);
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 019e998d944a..15335732e166 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -232,7 +232,7 @@ struct bonding {
 	char     proc_file_name[IFNAMSIZ];
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
-	u32      rr_tx_counter;
+	u32 __percpu *rr_tx_counter;
 	struct   ad_bond_info ad_info;
 	struct   alb_bond_info alb_info;
 	struct   bond_params params;

From 11b57faf951cd3a570e3d9e463fc7c41023bc8c6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 15 Jun 2021 10:05:16 +0100
Subject: [PATCH 1842/3292] net: dsa: b53: remove redundant null check on dev

The pointer dev can never be null, the null check is redundant
and can be removed. Cleans up a static analysis warning that
pointer priv is dereferencing dev before dev is being null
checked.

Addresses-Coverity: ("Dereference before null check")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_srab.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index aaa12d73784e..3f4249de70c5 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -632,8 +632,7 @@ static int b53_srab_remove(struct platform_device *pdev)
 	struct b53_srab_priv *priv = dev->priv;
 
 	b53_srab_intr_set(priv, false);
-	if (dev)
-		b53_switch_remove(dev);
+	b53_switch_remove(dev);
 
 	return 0;
 }

From f25dcde974396a504af7ed795a3106dcf3ee7144 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 15 Jun 2021 11:14:57 +0100
Subject: [PATCH 1843/3292] octeontx2-pf: Fix spelling mistake "morethan" ->
 "more than"

There is a spelling mistake in a dev_err message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index ef833fe39114..3612e0a2cab3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -2556,7 +2556,7 @@ int rvu_mbox_handler_npc_mcam_alloc_entry(struct rvu *rvu,
 	 */
 	if (!req->contig && req->count > NPC_MAX_NONCONTIG_ENTRIES) {
 		dev_err(rvu->dev,
-			"%s: %d Non-contiguous MCAM entries requested is morethan max (%d) allowed\n",
+			"%s: %d Non-contiguous MCAM entries requested is more than max (%d) allowed\n",
 			__func__, req->count, NPC_MAX_NONCONTIG_ENTRIES);
 		return NPC_MCAM_INVALID_REQ;
 	}

From 925a56b2c085a7c6f5c741c8516e21c3aa6134b4 Mon Sep 17 00:00:00 2001
From: M Chetan Kumar <m.chetan.kumar@intel.com>
Date: Tue, 15 Jun 2021 18:38:22 +0530
Subject: [PATCH 1844/3292] net: wwan: iosm: Fix htmldocs warnings

Fixes .rst file warnings seen on linux-next build.

Fixes: f7af616c632e ("net: iosm: infrastructure")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/wwan/iosm.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/device_drivers/wwan/iosm.rst b/Documentation/networking/device_drivers/wwan/iosm.rst
index cd12f57d980a..aceb0223eb46 100644
--- a/Documentation/networking/device_drivers/wwan/iosm.rst
+++ b/Documentation/networking/device_drivers/wwan/iosm.rst
@@ -40,7 +40,7 @@ MBIM control channel userspace ABI
 ----------------------------------
 
 /dev/wwan0mbim0 character device
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The driver exposes an MBIM interface to the MBIM function by implementing
 MBIM WWAN Port. The userspace end of the control channel pipe is a
 /dev/wwan0mbim0 character device. Application shall use this interface for
@@ -52,12 +52,12 @@ The userspace application is responsible for all control message fragmentation
 and defragmentation as per MBIM specification.
 
 /dev/wwan0mbim0 write()
-~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~
 The MBIM control messages from the management application must not exceed the
 negotiated control message size.
 
 /dev/wwan0mbim0 read()
-~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~
 The management application must accept control messages of up the negotiated
 control message size.
 
@@ -65,7 +65,7 @@ MBIM data channel userspace ABI
 -------------------------------
 
 wwan0-X network device
-~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~
 The IOSM driver exposes IP link interface "wwan0-X" of type "wwan" for IP
 traffic. Iproute network utility is used for creating "wwan0-X" network
 interface and for associating it with MBIM IP session. The Driver supports

From a078d981f8632f7a919094c000b061593287e056 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 15 Jun 2021 15:27:41 +0200
Subject: [PATCH 1845/3292] net: ti: add pp skb recycling support

As already done for mvneta and mvpp2, enable skb recycling for ti
ethernet drivers

ti driver on net-next:
----------------------
[perf top]
 47.15%  [kernel]     [k] _raw_spin_unlock_irqrestore
 11.77%  [kernel]     [k] __cpdma_chan_free
  3.16%  [kernel]     [k] ___bpf_prog_run
  2.52%  [kernel]     [k] cpsw_rx_vlan_encap
  2.34%  [kernel]     [k] __netif_receive_skb_core
  2.27%  [kernel]     [k] free_unref_page
  2.26%  [kernel]     [k] kmem_cache_free
  2.24%  [kernel]     [k] kmem_cache_alloc
  1.69%  [kernel]     [k] __softirqentry_text_start
  1.61%  [kernel]     [k] cpsw_rx_handler
  1.19%  [kernel]     [k] page_pool_release_page
  1.19%  [kernel]     [k] clear_bits_ll
  1.15%  [kernel]     [k] page_frag_free
  1.06%  [kernel]     [k] __dma_page_dev_to_cpu
  0.99%  [kernel]     [k] memset
  0.94%  [kernel]     [k] __alloc_pages_bulk
  0.92%  [kernel]     [k] kfree_skb
  0.85%  [kernel]     [k] packet_rcv
  0.78%  [kernel]     [k] page_address
  0.75%  [kernel]     [k] v7_dma_inv_range
  0.71%  [kernel]     [k] __lock_text_start

[iperf3 tcp]
[  5]   0.00-10.00  sec   873 MBytes   732 Mbits/sec    0   sender
[  5]   0.00-10.01  sec   866 MBytes   726 Mbits/sec        receiver

ti + skb recycling:
-------------------
[perf top]
 40.58%  [kernel]    [k] _raw_spin_unlock_irqrestore
 16.18%  [kernel]    [k] __softirqentry_text_start
 10.33%  [kernel]    [k] __cpdma_chan_free
  2.62%  [kernel]    [k] ___bpf_prog_run
  2.05%  [kernel]    [k] cpsw_rx_vlan_encap
  2.00%  [kernel]    [k] kmem_cache_alloc
  1.86%  [kernel]    [k] __netif_receive_skb_core
  1.80%  [kernel]    [k] kmem_cache_free
  1.63%  [kernel]    [k] cpsw_rx_handler
  1.12%  [kernel]    [k] cpsw_rx_mq_poll
  1.11%  [kernel]    [k] page_pool_put_page
  1.04%  [kernel]    [k] _raw_spin_unlock
  0.97%  [kernel]    [k] clear_bits_ll
  0.90%  [kernel]    [k] packet_rcv
  0.88%  [kernel]    [k] __dma_page_dev_to_cpu
  0.85%  [kernel]    [k] kfree_skb
  0.80%  [kernel]    [k] memset
  0.71%  [kernel]    [k] __lock_text_start
  0.66%  [kernel]    [k] v7_dma_inv_range
  0.64%  [kernel]    [k] gen_pool_free_owner

[iperf3 tcp]
[  5]   0.00-10.00  sec   884 MBytes   742 Mbits/sec    0   sender
[  5]   0.00-10.01  sec   878 MBytes   735 Mbits/sec        receiver

Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c     | 4 ++--
 drivers/net/ethernet/ti/cpsw_new.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b1e80cc96f56..cbbd0f665796 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -430,8 +430,8 @@ static void cpsw_rx_handler(void *token, int len, int status)
 		cpts_rx_timestamp(cpsw->cpts, skb);
 	skb->protocol = eth_type_trans(skb, ndev);
 
-	/* unmap page as no netstack skb page recycling */
-	page_pool_release_page(pool, page);
+	/* mark skb for recycling */
+	skb_mark_for_recycle(skb, page, pool);
 	netif_receive_skb(skb);
 
 	ndev->stats.rx_bytes += len;
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 8d4f3c53385d..57d279fdcc9f 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -373,8 +373,8 @@ static void cpsw_rx_handler(void *token, int len, int status)
 		cpts_rx_timestamp(cpsw->cpts, skb);
 	skb->protocol = eth_type_trans(skb, ndev);
 
-	/* unmap page as no netstack skb page recycling */
-	page_pool_release_page(pool, page);
+	/* mark skb for recycling */
+	skb_mark_for_recycle(skb, page, pool);
 	netif_receive_skb(skb);
 
 	ndev->stats.rx_bytes += len;

From 293128b1ef5ae2cfa7403d54e183fe689ed5d303 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 15 Jun 2021 14:17:35 -0400
Subject: [PATCH 1846/3292] dm writecache: have ssd writeback wait if the
 kcopyd workqueue is busy

Make dm-writecache wait if the kcopyd workqueue is busy (as will
happen if waiting for page allocation or inside submit_bio).

This change improves performance of "mkfs.ext2" by approximately 20%
on one testbed.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-kcopyd.c     | 6 ++++++
 drivers/md/dm-writecache.c | 5 +++++
 include/linux/dm-kcopyd.h  | 1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index e50625ce74ec..37b03ab7e5c9 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -980,3 +980,9 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
 	kfree(kc);
 }
 EXPORT_SYMBOL(dm_kcopyd_client_destroy);
+
+void dm_kcopyd_client_flush(struct dm_kcopyd_client *kc)
+{
+	flush_workqueue(kc->kcopyd_wq);
+}
+EXPORT_SYMBOL(dm_kcopyd_client_flush);
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 56179a21db0e..28bb6890fcf4 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1812,6 +1812,11 @@ static void writecache_writeback(struct work_struct *work)
 	struct writeback_list wbl;
 	unsigned long n_walked;
 
+	if (!WC_MODE_PMEM(wc)) {
+		/* Wait for any active kcopyd work on behalf of ssd writeback */
+		dm_kcopyd_client_flush(wc->dm_kcopyd);
+	}
+
 	wc_lock(wc);
 restart:
 	if (writecache_has_error(wc)) {
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index e42de7750c88..c1707ee5b540 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(name, description)
 struct dm_kcopyd_client;
 struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle);
 void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
+void dm_kcopyd_client_flush(struct dm_kcopyd_client *kc);
 
 /*
  * Submit a copy job to kcopyd.  This is built on top of the

From fdd1dc316e8959b6730d733fba025a39dac7938f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 15 Jun 2021 14:00:11 +0100
Subject: [PATCH 1847/3292] io_uring: Fix incorrect sizeof operator for
 copy_from_user call

Static analysis is warning that the sizeof being used is should be
of *data->tags[i] and not data->tags[i]. Although these are the same
size on 64 bit systems it is not a portable assumption to assume
this is true for all cases.  Fix this by using a temporary pointer
tag_slot to make the code a clearer.

Addresses-Coverity: ("Sizeof not portable")
Fixes: d878c81610e1 ("io_uring: hide rsrc tag copy into generic helpers")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20210615130011.57387-1-colin.king@canonical.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d665c9419ad3..3692bbc7bd01 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7230,8 +7230,10 @@ static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put,
 	if (utags) {
 		ret = -EFAULT;
 		for (i = 0; i < nr; i++) {
-			if (copy_from_user(io_get_tag_slot(data, i), &utags[i],
-					   sizeof(data->tags[i])))
+			u64 *tag_slot = io_get_tag_slot(data, i);
+
+			if (copy_from_user(tag_slot, &utags[i],
+					   sizeof(*tag_slot)))
 				goto fail;
 		}
 	}

From b1b2fc3574a6a94a1ab90271a7620345c39dc01f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 15 Jun 2021 15:34:24 +0100
Subject: [PATCH 1848/3292] io-wq: remove redundant initialization of variable
 ret

The variable ret is being initialized with a value that is never read, the
assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210615143424.60449-1-colin.king@canonical.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 2c37776c0280..e221aaab585c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -896,7 +896,7 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 {
-	int ret = -ENOMEM, node;
+	int ret, node;
 	struct io_wq *wq;
 
 	if (WARN_ON_ONCE(!data->free_work || !data->do_work))

From c7af47cf0fab5bad1fb8b250dfab8efc1f991559 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:20 +0100
Subject: [PATCH 1849/3292] io_uring: keep SQ pointers in a single cacheline

sq_array and sq_sqes are always used together, however they are in
different cachelines, where the borderline is right before
cq_overflow_list is rather rarely touched. Move the fields together so
it loads only one cacheline.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/3ef2411a94874da06492506a8897eff679244f49.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3692bbc7bd01..8c501eed3813 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -364,6 +364,7 @@ struct io_ring_ctx {
 		 * array.
 		 */
 		u32			*sq_array;
+		struct io_uring_sqe	*sq_sqes;
 		unsigned		cached_sq_head;
 		unsigned		sq_entries;
 		unsigned		sq_thread_idle;
@@ -373,8 +374,6 @@ struct io_ring_ctx {
 		struct list_head	defer_list;
 		struct list_head	timeout_list;
 		struct list_head	cq_overflow_list;
-
-		struct io_uring_sqe	*sq_sqes;
 	} ____cacheline_aligned_in_smp;
 
 	struct {

From b52ecf8cb5b5ccb8069adbdb82a68d3fa0f423db Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:21 +0100
Subject: [PATCH 1850/3292] io_uring: move ctx->flags from SQ cacheline

ctx->flags are heavily used by both, completion and submission sides, so
move it out from the ctx fields related to submissions. Instead, place
it together with ctx->refs, because it's already cacheline-aligned and
so pads lots of space, and both almost never change. Also, in most
occasions they are accessed together as refs are taken at submission
time and put back during completion.

Do same with ctx->rings, where the pointer itself is never modified
apart from ring init/free.

Note: in percpu mode, struct percpu_ref doesn't modify the struct itself
but takes indirection with ref->percpu_count_ptr.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/4c48c173e63d35591383ba2b87e8b8e8dfdbd23d.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8c501eed3813..ba36eefdae2c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -341,17 +341,19 @@ struct io_submit_state {
 };
 
 struct io_ring_ctx {
+	/* const or read-mostly hot data */
 	struct {
 		struct percpu_ref	refs;
-	} ____cacheline_aligned_in_smp;
 
-	struct {
+		struct io_rings		*rings;
 		unsigned int		flags;
 		unsigned int		compat: 1;
 		unsigned int		drain_next: 1;
 		unsigned int		eventfd_async: 1;
 		unsigned int		restricted: 1;
+	} ____cacheline_aligned_in_smp;
 
+	struct {
 		/*
 		 * Ring buffer of indices into array of io_uring_sqe, which is
 		 * mmapped by the application using the IORING_OFF_SQES offset.
@@ -386,8 +388,6 @@ struct io_ring_ctx {
 	struct list_head	locked_free_list;
 	unsigned int		locked_free_nr;
 
-	struct io_rings	*rings;
-
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 

From 7f1129d227ea54526380d0f37eb7b33ab9f200c1 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:22 +0100
Subject: [PATCH 1851/3292] io_uring: shuffle more fields into SQ ctx section

Since moving locked_free_* out of struct io_submit_state
ctx->submit_state is accessed on submission side only, so move it into
the submission section. Same goes for rsrc table pointers/nodes/etc.,
they must be taken and checked during submission because sync'ed by
uring_lock, so move them there as well.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/8a5899a50afc6ccca63249e716f580b246f3dec6.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ba36eefdae2c..a0720fb2565c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -353,6 +353,7 @@ struct io_ring_ctx {
 		unsigned int		restricted: 1;
 	} ____cacheline_aligned_in_smp;
 
+	/* submission data */
 	struct {
 		/*
 		 * Ring buffer of indices into array of io_uring_sqe, which is
@@ -369,13 +370,27 @@ struct io_ring_ctx {
 		struct io_uring_sqe	*sq_sqes;
 		unsigned		cached_sq_head;
 		unsigned		sq_entries;
-		unsigned		sq_thread_idle;
 		unsigned		cached_sq_dropped;
 		unsigned long		sq_check_overflow;
-
 		struct list_head	defer_list;
+
+		/*
+		 * Fixed resources fast path, should be accessed only under
+		 * uring_lock, and updated through io_uring_register(2)
+		 */
+		struct io_rsrc_node	*rsrc_node;
+		struct io_file_table	file_table;
+		unsigned		nr_user_files;
+		unsigned		nr_user_bufs;
+		struct io_mapped_ubuf	**user_bufs;
+
+		struct io_submit_state	submit_state;
 		struct list_head	timeout_list;
 		struct list_head	cq_overflow_list;
+		struct xarray		io_buffers;
+		struct xarray		personalities;
+		u32			pers_next;
+		unsigned		sq_thread_idle;
 	} ____cacheline_aligned_in_smp;
 
 	struct {
@@ -383,7 +398,6 @@ struct io_ring_ctx {
 		wait_queue_head_t	wait;
 	} ____cacheline_aligned_in_smp;
 
-	struct io_submit_state		submit_state;
 	/* IRQ completion list, under ->completion_lock */
 	struct list_head	locked_free_list;
 	unsigned int		locked_free_nr;
@@ -394,21 +408,6 @@ struct io_ring_ctx {
 	struct wait_queue_head	sqo_sq_wait;
 	struct list_head	sqd_list;
 
-	/*
-	 * Fixed resources fast path, should be accessed only under uring_lock,
-	 * and updated through io_uring_register(2)
-	 */
-	struct io_rsrc_node	*rsrc_node;
-
-	struct io_file_table	file_table;
-	unsigned		nr_user_files;
-	unsigned		nr_user_bufs;
-	struct io_mapped_ubuf	**user_bufs;
-
-	struct xarray		io_buffers;
-	struct xarray		personalities;
-	u32			pers_next;
-
 	struct {
 		unsigned		cached_cq_tail;
 		unsigned		cq_entries;

From 17d3aeb33cdae8c87a8ad97c4358a623a630e19a Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:23 +0100
Subject: [PATCH 1852/3292] io_uring: refactor io_get_sqe()

The line of io_get_sqe() evaluating @head consists of too many
operations including READ_ONCE(), it's not convenient for probing.
Refactor it also improving readability.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/866ad6e4ef4851c7c61f6b0e08dbd0a8d1abce84.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a0720fb2565c..c74a84a2532b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6685,8 +6685,8 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
  */
 static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
 {
-	u32 *sq_array = ctx->sq_array;
 	unsigned head, mask = ctx->sq_entries - 1;
+	unsigned sq_idx = ctx->cached_sq_head++ & mask;
 
 	/*
 	 * The cached sq head (or cq tail) serves two purposes:
@@ -6696,7 +6696,7 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
 	 * 2) allows the kernel side to track the head on its own, even
 	 *    though the application is the one updating it.
 	 */
-	head = READ_ONCE(sq_array[ctx->cached_sq_head++ & mask]);
+	head = READ_ONCE(ctx->sq_array[sq_idx]);
 	if (likely(head < ctx->sq_entries))
 		return &ctx->sq_sqes[head];
 

From 15641e427070f05fad2e9d74d191146d6514d30f Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:24 +0100
Subject: [PATCH 1853/3292] io_uring: don't cache number of dropped SQEs

Kill ->cached_sq_dropped and wire DRAIN sequence number correction via
->cq_extra, which is there exactly for that purpose. User visible
dropped counter will be populated by incrementing it instead of keeping
a copy, similarly as it was done not so long ago with cq_overflow.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/088aceb2707a534d531e2770267c4498e0507cc1.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c74a84a2532b..ecac362913cc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -370,7 +370,6 @@ struct io_ring_ctx {
 		struct io_uring_sqe	*sq_sqes;
 		unsigned		cached_sq_head;
 		unsigned		sq_entries;
-		unsigned		cached_sq_dropped;
 		unsigned long		sq_check_overflow;
 		struct list_head	defer_list;
 
@@ -5994,13 +5993,11 @@ static u32 io_get_sequence(struct io_kiocb *req)
 {
 	struct io_kiocb *pos;
 	struct io_ring_ctx *ctx = req->ctx;
-	u32 total_submitted, nr_reqs = 0;
+	u32 nr_reqs = 0;
 
 	io_for_each_link(pos, req)
 		nr_reqs++;
-
-	total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped;
-	return total_submitted - nr_reqs;
+	return ctx->cached_sq_head - nr_reqs;
 }
 
 static int io_req_defer(struct io_kiocb *req)
@@ -6701,8 +6698,9 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
 		return &ctx->sq_sqes[head];
 
 	/* drop invalid entries */
-	ctx->cached_sq_dropped++;
-	WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped);
+	ctx->cq_extra--;
+	WRITE_ONCE(ctx->rings->sq_dropped,
+		   READ_ONCE(ctx->rings->sq_dropped) + 1);
 	return NULL;
 }
 

From f18ee4cf0a277a0e3d043755046d5817d4ddd618 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:25 +0100
Subject: [PATCH 1854/3292] io_uring: optimise completion timeout flushing

io_commit_cqring() might be very hot and we definitely don't want to
touch ->timeout_list there, because 1) it's shared with the submission
side so might lead to cache bouncing and 2) may need to load an extra
cache line, especially for IRQ completions.

We're interested in it at the completion side only when there are
offset-mode timeouts, which are not so popular. Replace
list_empty(->timeout_list) hot path check with a new one-way flag, which
is set when we prepare the first offset-mode timeout.

note: the flag sits in the same line as briefly used after ->rings

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e4892ec68b71a69f92ffbea4a1499be3ec0d463b.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ecac362913cc..b27734bc5ca3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -351,6 +351,7 @@ struct io_ring_ctx {
 		unsigned int		drain_next: 1;
 		unsigned int		eventfd_async: 1;
 		unsigned int		restricted: 1;
+		unsigned int		off_timeout_used: 1;
 	} ____cacheline_aligned_in_smp;
 
 	/* submission data */
@@ -1318,12 +1319,12 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 {
 	u32 seq;
 
-	if (list_empty(&ctx->timeout_list))
+	if (likely(!ctx->off_timeout_used))
 		return;
 
 	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
 
-	do {
+	while (!list_empty(&ctx->timeout_list)) {
 		u32 events_needed, events_got;
 		struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
 						struct io_kiocb, timeout.list);
@@ -1345,8 +1346,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 
 		list_del_init(&req->timeout.list);
 		io_kill_timeout(req, 0);
-	} while (!list_empty(&ctx->timeout_list));
-
+	}
 	ctx->cq_last_tm_flush = seq;
 }
 
@@ -5651,6 +5651,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		return -EINVAL;
 
 	req->timeout.off = off;
+	if (unlikely(off && !req->ctx->off_timeout_used))
+		req->ctx->off_timeout_used = true;
 
 	if (!req->async_data && io_alloc_async_data(req))
 		return -ENOMEM;

From 5e159204d7edd5bd329e8cdb419dbd81d25751e0 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:26 +0100
Subject: [PATCH 1855/3292] io_uring: small io_submit_sqe() optimisation

submit_state.link is used only to assemble a link and not used for
actual submission, so clear it before io_queue_sqe() in io_submit_sqe(),
awhile it's hot and in caches and queueing doesn't spoil it. May also
potentially help compiler with spilling or to do other optimisations.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1579939426f3ad6b55af3005b1389bbbed7d780d.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b27734bc5ca3..64fdeee01906 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6616,8 +6616,8 @@ fail_req:
 
 		/* last request of a link, enqueue the link */
 		if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
-			io_queue_sqe(head);
 			link->head = NULL;
+			io_queue_sqe(head);
 		}
 	} else {
 		if (unlikely(ctx->drain_next)) {

From 5ed7a37d21b369d03114dea12a1f16ae2e21baa8 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:27 +0100
Subject: [PATCH 1856/3292] io_uring: clean up check_overflow flag

There are no users of ->sq_check_overflow, only ->cq_check_overflow is
used. Combine it and move out of completion related part of struct
io_ring_ctx.

A not so obvious benefit of it is fitting all completion side fields
into a single cacheline. It was taking 2 lines before with 56B padding,
and io_cqring_ev_posted*() were still touching both of them.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/25927394964df31d113e3c729416af573afff5f5.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 64fdeee01906..b9fd8adf67af 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -371,7 +371,6 @@ struct io_ring_ctx {
 		struct io_uring_sqe	*sq_sqes;
 		unsigned		cached_sq_head;
 		unsigned		sq_entries;
-		unsigned long		sq_check_overflow;
 		struct list_head	defer_list;
 
 		/*
@@ -408,13 +407,14 @@ struct io_ring_ctx {
 	struct wait_queue_head	sqo_sq_wait;
 	struct list_head	sqd_list;
 
+	unsigned long		check_cq_overflow;
+
 	struct {
 		unsigned		cached_cq_tail;
 		unsigned		cq_entries;
 		atomic_t		cq_timeouts;
 		unsigned		cq_last_tm_flush;
 		unsigned		cq_extra;
-		unsigned long		cq_check_overflow;
 		struct wait_queue_head	cq_wait;
 		struct fasync_struct	*cq_fasync;
 		struct eventfd_ctx	*cq_ev_fd;
@@ -1464,8 +1464,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 
 	all_flushed = list_empty(&ctx->cq_overflow_list);
 	if (all_flushed) {
-		clear_bit(0, &ctx->sq_check_overflow);
-		clear_bit(0, &ctx->cq_check_overflow);
+		clear_bit(0, &ctx->check_cq_overflow);
 		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
 	}
 
@@ -1481,7 +1480,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 {
 	bool ret = true;
 
-	if (test_bit(0, &ctx->cq_check_overflow)) {
+	if (test_bit(0, &ctx->check_cq_overflow)) {
 		/* iopoll syncs against uring_lock, not completion_lock */
 		if (ctx->flags & IORING_SETUP_IOPOLL)
 			mutex_lock(&ctx->uring_lock);
@@ -1544,8 +1543,7 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
 		return false;
 	}
 	if (list_empty(&ctx->cq_overflow_list)) {
-		set_bit(0, &ctx->sq_check_overflow);
-		set_bit(0, &ctx->cq_check_overflow);
+		set_bit(0, &ctx->check_cq_overflow);
 		ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
 	}
 	ocqe->cqe.user_data = user_data;
@@ -2391,7 +2389,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 	 * If we do, we can potentially be spinning for commands that
 	 * already triggered a CQE (eg in error).
 	 */
-	if (test_bit(0, &ctx->cq_check_overflow))
+	if (test_bit(0, &ctx->check_cq_overflow))
 		__io_cqring_overflow_flush(ctx, false);
 	if (io_cqring_events(ctx))
 		goto out;
@@ -6965,7 +6963,7 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
 	 * Cannot safely flush overflowed CQEs from here, ensure we wake up
 	 * the task, and the next invocation will do it.
 	 */
-	if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow))
+	if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow))
 		return autoremove_wake_function(curr, mode, wake_flags, key);
 	return -1;
 }
@@ -6993,7 +6991,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
 	if (ret || io_should_wake(iowq))
 		return ret;
 	/* let the caller flush overflows, retry */
-	if (test_bit(0, &ctx->cq_check_overflow))
+	if (test_bit(0, &ctx->check_cq_overflow))
 		return 1;
 
 	*timeout = schedule_timeout(*timeout);
@@ -8704,7 +8702,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	 * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
 	 * pushs them to do the flush.
 	 */
-	if (io_cqring_events(ctx) || test_bit(0, &ctx->cq_check_overflow))
+	if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	return mask;

From 311997b3fcddc2f169fff844bf6b48dbff0bb816 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:28 +0100
Subject: [PATCH 1857/3292] io_uring: wait heads renaming

We use several wait_queue_head's for different purposes, but namings are
confusing. First rename ctx->cq_wait into ctx->poll_wait, because this
one is used for polling an io_uring instance. Then rename ctx->wait into
ctx->cq_wait, which is responsible for CQE waiting.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/47b97a097780c86c67b20b6ccc4e077523dce682.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b9fd8adf67af..e19c9f7db2cf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -394,7 +394,7 @@ struct io_ring_ctx {
 
 	struct {
 		struct mutex		uring_lock;
-		wait_queue_head_t	wait;
+		wait_queue_head_t	cq_wait;
 	} ____cacheline_aligned_in_smp;
 
 	/* IRQ completion list, under ->completion_lock */
@@ -415,7 +415,7 @@ struct io_ring_ctx {
 		atomic_t		cq_timeouts;
 		unsigned		cq_last_tm_flush;
 		unsigned		cq_extra;
-		struct wait_queue_head	cq_wait;
+		struct wait_queue_head	poll_wait;
 		struct fasync_struct	*cq_fasync;
 		struct eventfd_ctx	*cq_ev_fd;
 	} ____cacheline_aligned_in_smp;
@@ -1178,13 +1178,13 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	ctx->flags = p->flags;
 	init_waitqueue_head(&ctx->sqo_sq_wait);
 	INIT_LIST_HEAD(&ctx->sqd_list);
-	init_waitqueue_head(&ctx->cq_wait);
+	init_waitqueue_head(&ctx->poll_wait);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 	init_completion(&ctx->ref_comp);
 	xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
 	mutex_init(&ctx->uring_lock);
-	init_waitqueue_head(&ctx->wait);
+	init_waitqueue_head(&ctx->cq_wait);
 	spin_lock_init(&ctx->completion_lock);
 	INIT_LIST_HEAD(&ctx->iopoll_list);
 	INIT_LIST_HEAD(&ctx->defer_list);
@@ -1404,14 +1404,14 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 	/* see waitqueue_active() comment */
 	smp_mb();
 
-	if (waitqueue_active(&ctx->wait))
-		wake_up(&ctx->wait);
+	if (waitqueue_active(&ctx->cq_wait))
+		wake_up(&ctx->cq_wait);
 	if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
 		wake_up(&ctx->sq_data->wait);
 	if (io_should_trigger_evfd(ctx))
 		eventfd_signal(ctx->cq_ev_fd, 1);
-	if (waitqueue_active(&ctx->cq_wait)) {
-		wake_up_interruptible(&ctx->cq_wait);
+	if (waitqueue_active(&ctx->poll_wait)) {
+		wake_up_interruptible(&ctx->poll_wait);
 		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
 	}
 }
@@ -1422,13 +1422,13 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 	smp_mb();
 
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
-		if (waitqueue_active(&ctx->wait))
-			wake_up(&ctx->wait);
+		if (waitqueue_active(&ctx->cq_wait))
+			wake_up(&ctx->cq_wait);
 	}
 	if (io_should_trigger_evfd(ctx))
 		eventfd_signal(ctx->cq_ev_fd, 1);
-	if (waitqueue_active(&ctx->cq_wait)) {
-		wake_up_interruptible(&ctx->cq_wait);
+	if (waitqueue_active(&ctx->poll_wait)) {
+		wake_up_interruptible(&ctx->poll_wait);
 		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
 	}
 }
@@ -7056,10 +7056,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			ret = -EBUSY;
 			break;
 		}
-		prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
+		prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
 						TASK_INTERRUPTIBLE);
 		ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
-		finish_wait(&ctx->wait, &iowq.wq);
+		finish_wait(&ctx->cq_wait, &iowq.wq);
 		cond_resched();
 	} while (ret > 0);
 
@@ -8680,7 +8680,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 	struct io_ring_ctx *ctx = file->private_data;
 	__poll_t mask = 0;
 
-	poll_wait(file, &ctx->cq_wait, wait);
+	poll_wait(file, &ctx->poll_wait, wait);
 	/*
 	 * synchronizes with barrier from wq_has_sleeper call in
 	 * io_commit_cqring

From 0499e582aaff4e4072a760d1f31434acb50c7813 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:29 +0100
Subject: [PATCH 1858/3292] io_uring: move uring_lock location

->uring_lock is prevalently used for submission, even though it protects
many other things like iopoll, registeration, selected bufs, and more.
And it's placed together with ->cq_wait poked on completion and CQ
waiting sides. Move them apart, ->uring_lock goes to the submission
data, and cq_wait to completion related chunk. The last one requires
some reshuffling so everything needed by io_cqring_ev_posted*() is in
one cacheline.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/dea5e845caee4c98aa0922b46d713154d81f7bd8.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e19c9f7db2cf..74c8334d67a4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -356,6 +356,8 @@ struct io_ring_ctx {
 
 	/* submission data */
 	struct {
+		struct mutex		uring_lock;
+
 		/*
 		 * Ring buffer of indices into array of io_uring_sqe, which is
 		 * mmapped by the application using the IORING_OFF_SQES offset.
@@ -392,11 +394,6 @@ struct io_ring_ctx {
 		unsigned		sq_thread_idle;
 	} ____cacheline_aligned_in_smp;
 
-	struct {
-		struct mutex		uring_lock;
-		wait_queue_head_t	cq_wait;
-	} ____cacheline_aligned_in_smp;
-
 	/* IRQ completion list, under ->completion_lock */
 	struct list_head	locked_free_list;
 	unsigned int		locked_free_nr;
@@ -412,12 +409,13 @@ struct io_ring_ctx {
 	struct {
 		unsigned		cached_cq_tail;
 		unsigned		cq_entries;
-		atomic_t		cq_timeouts;
-		unsigned		cq_last_tm_flush;
-		unsigned		cq_extra;
-		struct wait_queue_head	poll_wait;
-		struct fasync_struct	*cq_fasync;
 		struct eventfd_ctx	*cq_ev_fd;
+		struct wait_queue_head	poll_wait;
+		struct wait_queue_head	cq_wait;
+		unsigned		cq_extra;
+		atomic_t		cq_timeouts;
+		struct fasync_struct	*cq_fasync;
+		unsigned		cq_last_tm_flush;
 	} ____cacheline_aligned_in_smp;
 
 	struct {

From 76cc33d79175a1b224bf02d3ff6c7be53fc684d5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:30 +0100
Subject: [PATCH 1859/3292] io_uring: refactor io_req_defer()

Rename io_req_defer() into io_drain_req() and refactor it uncoupling it
from io_queue_sqe() error handling and preparing for coming
optimisations. Also, prioritise non IOSQE_ASYNC path.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/4f17dd56e7fbe52d1866f8acd8efe3284d2bebcb.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 74c8334d67a4..fc764e912844 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5998,7 +5998,7 @@ static u32 io_get_sequence(struct io_kiocb *req)
 	return ctx->cached_sq_head - nr_reqs;
 }
 
-static int io_req_defer(struct io_kiocb *req)
+static bool io_drain_req(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_defer_entry *de;
@@ -6008,27 +6008,29 @@ static int io_req_defer(struct io_kiocb *req)
 	/* Still need defer if there is pending req in defer list. */
 	if (likely(list_empty_careful(&ctx->defer_list) &&
 		!(req->flags & REQ_F_IO_DRAIN)))
-		return 0;
+		return false;
 
 	seq = io_get_sequence(req);
 	/* Still a chance to pass the sequence check */
 	if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
-		return 0;
+		return false;
 
 	ret = io_req_prep_async(req);
 	if (ret)
 		return ret;
 	io_prep_async_link(req);
 	de = kmalloc(sizeof(*de), GFP_KERNEL);
-	if (!de)
-		return -ENOMEM;
+	if (!de) {
+		io_req_complete_failed(req, ret);
+		return true;
+	}
 
 	spin_lock_irq(&ctx->completion_lock);
 	if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
 		spin_unlock_irq(&ctx->completion_lock);
 		kfree(de);
 		io_queue_async_work(req);
-		return -EIOCBQUEUED;
+		return true;
 	}
 
 	trace_io_uring_defer(ctx, req, req->user_data);
@@ -6036,7 +6038,7 @@ static int io_req_defer(struct io_kiocb *req)
 	de->seq = seq;
 	list_add_tail(&de->list, &ctx->defer_list);
 	spin_unlock_irq(&ctx->completion_lock);
-	return -EIOCBQUEUED;
+	return true;
 }
 
 static void io_clean_op(struct io_kiocb *req)
@@ -6447,21 +6449,18 @@ static void __io_queue_sqe(struct io_kiocb *req)
 
 static void io_queue_sqe(struct io_kiocb *req)
 {
-	int ret;
+	if (io_drain_req(req))
+		return;
 
-	ret = io_req_defer(req);
-	if (ret) {
-		if (ret != -EIOCBQUEUED) {
-fail_req:
-			io_req_complete_failed(req, ret);
-		}
-	} else if (req->flags & REQ_F_FORCE_ASYNC) {
-		ret = io_req_prep_async(req);
-		if (unlikely(ret))
-			goto fail_req;
-		io_queue_async_work(req);
-	} else {
+	if (likely(!(req->flags & REQ_F_FORCE_ASYNC))) {
 		__io_queue_sqe(req);
+	} else {
+		int ret = io_req_prep_async(req);
+
+		if (unlikely(ret))
+			io_req_complete_failed(req, ret);
+		else
+			io_queue_async_work(req);
 	}
 }
 

From 441b8a7803bfa11af2355beea9a07720d4b5c03a Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 14 Jun 2021 23:37:31 +0100
Subject: [PATCH 1860/3292] io_uring: optimise non-drain path

Replace drain checks with one-way flag set upon seeing the first
IOSQE_IO_DRAIN request. There are several places where it cuts cycles
well:

1) It's much faster than the fast check with two
conditions in io_drain_req() including pretty complex
list_empty_careful().

2) We can mark io_queue_sqe() inline now, that's a huge win.

3) It replaces timeout and drain checks in io_commit_cqring() with a
single flags test. Also great not touching ->defer_list there without a
reason so limiting cache bouncing.

It adds a small amount of overhead to drain path, but it's negligible.
The main nuisance is that once it meets any DRAIN request in io_uring
instance lifetime it will _always_ go through a slower path, so
drain-less and offset-mode timeout less applications are preferable.
The overhead in that case would be not big, but it's worth to bear in
mind.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/98d2fff8c4da5144bb0d08499f591d4768128ea3.1623709150.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 57 +++++++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fc764e912844..ab9a16c89545 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -352,6 +352,7 @@ struct io_ring_ctx {
 		unsigned int		eventfd_async: 1;
 		unsigned int		restricted: 1;
 		unsigned int		off_timeout_used: 1;
+		unsigned int		drain_used: 1;
 	} ____cacheline_aligned_in_smp;
 
 	/* submission data */
@@ -1299,9 +1300,9 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
 	}
 }
 
-static void __io_queue_deferred(struct io_ring_ctx *ctx)
+static void io_queue_deferred(struct io_ring_ctx *ctx)
 {
-	do {
+	while (!list_empty(&ctx->defer_list)) {
 		struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
 						struct io_defer_entry, list);
 
@@ -1310,17 +1311,12 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
 		list_del_init(&de->list);
 		io_req_task_queue(de->req);
 		kfree(de);
-	} while (!list_empty(&ctx->defer_list));
+	}
 }
 
 static void io_flush_timeouts(struct io_ring_ctx *ctx)
 {
-	u32 seq;
-
-	if (likely(!ctx->off_timeout_used))
-		return;
-
-	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+	u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
 
 	while (!list_empty(&ctx->timeout_list)) {
 		u32 events_needed, events_got;
@@ -1350,13 +1346,14 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 
 static void io_commit_cqring(struct io_ring_ctx *ctx)
 {
-	io_flush_timeouts(ctx);
-
+	if (unlikely(ctx->off_timeout_used || ctx->drain_used)) {
+		if (ctx->off_timeout_used)
+			io_flush_timeouts(ctx);
+		if (ctx->drain_used)
+			io_queue_deferred(ctx);
+	}
 	/* order cqe stores with ring update */
 	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
-
-	if (unlikely(!list_empty(&ctx->defer_list)))
-		__io_queue_deferred(ctx);
 }
 
 static inline bool io_sqring_full(struct io_ring_ctx *ctx)
@@ -6447,9 +6444,9 @@ static void __io_queue_sqe(struct io_kiocb *req)
 		io_queue_linked_timeout(linked_timeout);
 }
 
-static void io_queue_sqe(struct io_kiocb *req)
+static inline void io_queue_sqe(struct io_kiocb *req)
 {
-	if (io_drain_req(req))
+	if (unlikely(req->ctx->drain_used) && io_drain_req(req))
 		return;
 
 	if (likely(!(req->flags & REQ_F_FORCE_ASYNC))) {
@@ -6573,6 +6570,23 @@ fail_req:
 		io_req_complete_failed(req, ret);
 		return ret;
 	}
+
+	if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
+		ctx->drain_used = true;
+
+		/*
+		 * Taking sequential execution of a link, draining both sides
+		 * of the link also fullfils IOSQE_IO_DRAIN semantics for all
+		 * requests in the link. So, it drains the head and the
+		 * next after the link request. The last one is done via
+		 * drain_next flag to persist the effect across calls.
+		 */
+		if (link->head) {
+			link->head->flags |= REQ_F_IO_DRAIN;
+			ctx->drain_next = 1;
+		}
+	}
+
 	ret = io_req_prep(req, sqe);
 	if (unlikely(ret))
 		goto fail_req;
@@ -6591,17 +6605,6 @@ fail_req:
 	if (link->head) {
 		struct io_kiocb *head = link->head;
 
-		/*
-		 * Taking sequential execution of a link, draining both sides
-		 * of the link also fullfils IOSQE_IO_DRAIN semantics for all
-		 * requests in the link. So, it drains the head and the
-		 * next after the link request. The last one is done via
-		 * drain_next flag to persist the effect across calls.
-		 */
-		if (req->flags & REQ_F_IO_DRAIN) {
-			head->flags |= REQ_F_IO_DRAIN;
-			ctx->drain_next = 1;
-		}
 		ret = io_req_prep_async(req);
 		if (unlikely(ret))
 			goto fail_req;

From dd9ae8a0b2985ead64dfcfa2f9a0ce5efa1480aa Mon Sep 17 00:00:00 2001
From: Fam Zheng <fam.zheng@bytedance.com>
Date: Fri, 4 Jun 2021 17:42:56 +0100
Subject: [PATCH 1861/3292] io_uring: Fix comment of io_get_sqe

The sqe_ptr argument has been gone since 709b302faddf (io_uring:
simplify io_get_sqring, 2020-04-08), made the return value of the
function. Update the comment accordingly.

Signed-off-by: Fam Zheng <fam.zheng@bytedance.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20210604164256.12242-1-fam.zheng@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ab9a16c89545..807a1abe69c5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6673,7 +6673,7 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
 }
 
 /*
- * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory
+ * Fetch an sqe, if one is available. Note this returns a pointer to memory
  * that is mapped by userspace. This means that care needs to be taken to
  * ensure that reads are stable, as we cannot rely on userspace always
  * being a good citizen. If members of the sqe are validated and then later

From 27f6b318dea2d7ccccc9dca416e59431838c2929 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 15 Jun 2021 13:20:13 +0100
Subject: [PATCH 1862/3292] io_uring: fix min types mismatch in table alloc

fs/io_uring.c: In function 'io_alloc_page_table':
include/linux/minmax.h:20:28: warning: comparison of distinct pointer
	types lacks a cast

Cast everything to size_t using min_t.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 9123c8ffce16 ("io_uring: add helpers for 2 level table alloc")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/50f420a956bca070a43810d4a805293ed54f39d8.1623759527.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 807a1abe69c5..6292b8da0a75 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7088,7 +7088,7 @@ static void **io_alloc_page_table(size_t size)
 		return NULL;
 
 	for (i = 0; i < nr_tables; i++) {
-		unsigned int this_size = min(size, PAGE_SIZE);
+		unsigned int this_size = min_t(size_t, size, PAGE_SIZE);
 
 		table[i] = kzalloc(this_size, GFP_KERNEL);
 		if (!table[i]) {

From 10c669040e9b3538e1732c8d40729636b17ce9dd Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 15 Jun 2021 16:47:56 +0100
Subject: [PATCH 1863/3292] io_uring: switch !DRAIN fast path when possible

->drain_used is one way, which is not optimal if users use DRAIN but
very rarely. However, we can just clear it in io_drain_req() when all
drained before requests are gone. Also rename the flag to reflect the
change and be more clear about it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7f37a240857546a94df6348507edddacab150460.1623772051.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6292b8da0a75..25106cf7e57c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -352,7 +352,7 @@ struct io_ring_ctx {
 		unsigned int		eventfd_async: 1;
 		unsigned int		restricted: 1;
 		unsigned int		off_timeout_used: 1;
-		unsigned int		drain_used: 1;
+		unsigned int		drain_active: 1;
 	} ____cacheline_aligned_in_smp;
 
 	/* submission data */
@@ -1346,10 +1346,10 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 
 static void io_commit_cqring(struct io_ring_ctx *ctx)
 {
-	if (unlikely(ctx->off_timeout_used || ctx->drain_used)) {
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active)) {
 		if (ctx->off_timeout_used)
 			io_flush_timeouts(ctx);
-		if (ctx->drain_used)
+		if (ctx->drain_active)
 			io_queue_deferred(ctx);
 	}
 	/* order cqe stores with ring update */
@@ -6004,8 +6004,10 @@ static bool io_drain_req(struct io_kiocb *req)
 
 	/* Still need defer if there is pending req in defer list. */
 	if (likely(list_empty_careful(&ctx->defer_list) &&
-		!(req->flags & REQ_F_IO_DRAIN)))
+		!(req->flags & REQ_F_IO_DRAIN))) {
+		ctx->drain_active = false;
 		return false;
+	}
 
 	seq = io_get_sequence(req);
 	/* Still a chance to pass the sequence check */
@@ -6446,7 +6448,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
 
 static inline void io_queue_sqe(struct io_kiocb *req)
 {
-	if (unlikely(req->ctx->drain_used) && io_drain_req(req))
+	if (unlikely(req->ctx->drain_active) && io_drain_req(req))
 		return;
 
 	if (likely(!(req->flags & REQ_F_FORCE_ASYNC))) {
@@ -6572,7 +6574,7 @@ fail_req:
 	}
 
 	if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
-		ctx->drain_used = true;
+		ctx->drain_active = true;
 
 		/*
 		 * Taking sequential execution of a link, draining both sides

From 3c19966d3710dbe5a44658c532052f11d797aecb Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 15 Jun 2021 16:47:57 +0100
Subject: [PATCH 1864/3292] io_uring: shove more drain bits out of hot path

Place all drain_next logic into io_drain_req(), so it's never executed
if there was no drained requests before. The only thing we need is to
set ->drain_active if we see a request with IOSQE_IO_DRAIN, do that in
io_init_req() where flags are definitely in registers.

Also, all drain-related code is encapsulated in io_drain_req(), makes it
cleaner.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/68bf4f7395ddaafbf1a26bd97b57d57d45a9f900.1623772051.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 25106cf7e57c..f63fc79df4eb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5997,11 +5997,31 @@ static u32 io_get_sequence(struct io_kiocb *req)
 
 static bool io_drain_req(struct io_kiocb *req)
 {
+	struct io_kiocb *pos;
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_defer_entry *de;
 	int ret;
 	u32 seq;
 
+	/*
+	 * If we need to drain a request in the middle of a link, drain the
+	 * head request and the next request/link after the current link.
+	 * Considering sequential execution of links, IOSQE_IO_DRAIN will be
+	 * maintained for every request of our link.
+	 */
+	if (ctx->drain_next) {
+		req->flags |= REQ_F_IO_DRAIN;
+		ctx->drain_next = false;
+	}
+	/* not interested in head, start from the first linked */
+	io_for_each_link(pos, req->link) {
+		if (pos->flags & REQ_F_IO_DRAIN) {
+			ctx->drain_next = true;
+			req->flags |= REQ_F_IO_DRAIN;
+			break;
+		}
+	}
+
 	/* Still need defer if there is pending req in defer list. */
 	if (likely(list_empty_careful(&ctx->defer_list) &&
 		!(req->flags & REQ_F_IO_DRAIN))) {
@@ -6522,6 +6542,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
 	    !io_op_defs[req->opcode].buffer_select)
 		return -EOPNOTSUPP;
+	if (unlikely(sqe_flags & IOSQE_IO_DRAIN))
+		ctx->drain_active = true;
 
 	personality = READ_ONCE(sqe->personality);
 	if (personality) {
@@ -6573,22 +6595,6 @@ fail_req:
 		return ret;
 	}
 
-	if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
-		ctx->drain_active = true;
-
-		/*
-		 * Taking sequential execution of a link, draining both sides
-		 * of the link also fullfils IOSQE_IO_DRAIN semantics for all
-		 * requests in the link. So, it drains the head and the
-		 * next after the link request. The last one is done via
-		 * drain_next flag to persist the effect across calls.
-		 */
-		if (link->head) {
-			link->head->flags |= REQ_F_IO_DRAIN;
-			ctx->drain_next = 1;
-		}
-	}
-
 	ret = io_req_prep(req, sqe);
 	if (unlikely(ret))
 		goto fail_req;
@@ -6620,10 +6626,6 @@ fail_req:
 			io_queue_sqe(head);
 		}
 	} else {
-		if (unlikely(ctx->drain_next)) {
-			req->flags |= REQ_F_IO_DRAIN;
-			ctx->drain_next = 0;
-		}
 		if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
 			link->head = req;
 			link->last = req;

From 2335f6f5ddf2f4621395fac5fa4b53d075828cc1 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 15 Jun 2021 16:47:58 +0100
Subject: [PATCH 1865/3292] io_uring: optimise io_commit_cqring()

In most cases io_commit_cqring() is just an smp_store_release(), and
it's hot enough, especially for IRQ rw, to want it to save on a function
call. Mark it inline and extract a non-inlined slow path doing drain
and timeout flushing. The inlined part is pretty slim to not cause
binary bloating.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7350f8b6b92caa50a48a80be39909f0d83eddd93.1623772051.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f63fc79df4eb..16156a655d8b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1344,14 +1344,18 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 	ctx->cq_last_tm_flush = seq;
 }
 
-static void io_commit_cqring(struct io_ring_ctx *ctx)
+static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
 {
-	if (unlikely(ctx->off_timeout_used || ctx->drain_active)) {
-		if (ctx->off_timeout_used)
-			io_flush_timeouts(ctx);
-		if (ctx->drain_active)
-			io_queue_deferred(ctx);
-	}
+	if (ctx->off_timeout_used)
+		io_flush_timeouts(ctx);
+	if (ctx->drain_active)
+		io_queue_deferred(ctx);
+}
+
+static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+	if (unlikely(ctx->off_timeout_used || ctx->drain_active))
+		__io_commit_cqring_flush(ctx);
 	/* order cqe stores with ring update */
 	smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
 }

From 2744d7a0733503931b71c00d156119ced002f22c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 9 Jun 2021 13:40:17 -0500
Subject: [PATCH 1866/3292] ACPI: Check StorageD3Enable _DSD property in ACPI
 code

Although first implemented for NVME, this check may be usable by
other drivers as well. Microsoft's specification explicitly mentions
that is may be usable by SATA and AHCI devices.  Google also indicates
that they have used this with SDHCI in a downstream kernel tree that
a user can plug a storage device into.

Link: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/power-management-for-storage-hardware-devices-intro
Suggested-by: Keith Busch <kbusch@kernel.org>
CC: Shyam-sundar S-k <Shyam-sundar.S-k@amd.com>
CC: Alexander Deucher <Alexander.Deucher@amd.com>
CC: Rafael J. Wysocki <rjw@rjwysocki.net>
CC: Prike Liang <prike.liang@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/acpi/device_pm.c | 29 +++++++++++++++++++++++++++++
 drivers/nvme/host/pci.c  | 28 +---------------------------
 include/linux/acpi.h     |  5 +++++
 3 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index d260bc1f3e6e..d76ab50c71dc 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1340,4 +1340,33 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
 	return 1;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_pm_attach);
+
+/**
+ * acpi_storage_d3 - Check if D3 should be used in the suspend path
+ * @dev: Device to check
+ *
+ * Return %true if the platform firmware wants @dev to be programmed
+ * into D3hot or D3cold (if supported) in the suspend path, or %false
+ * when there is no specific preference. On some platforms, if this
+ * hint is ignored, @dev may remain unresponsive after suspending the
+ * platform as a whole.
+ *
+ * Although the property has storage in the name it actually is
+ * applied to the PCIe slot and plugging in a non-storage device the
+ * same platform restrictions will likely apply.
+ */
+bool acpi_storage_d3(struct device *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+	u8 val;
+
+	if (!adev)
+		return false;
+	if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable",
+			&val))
+		return false;
+	return val == 1;
+}
+EXPORT_SYMBOL_GPL(acpi_storage_d3);
+
 #endif /* CONFIG_PM */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3aa7245a505f..8fbc4c87a0d8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2828,32 +2828,6 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_ACPI
-static bool nvme_acpi_storage_d3(struct pci_dev *dev)
-{
-	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
-	u8 val;
-
-	/*
-	 * Look for _DSD property specifying that the storage device on the port
-	 * must use D3 to support deep platform power savings during
-	 * suspend-to-idle.
-	 */
-
-	if (!adev)
-		return false;
-	if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable",
-			&val))
-		return false;
-	return val == 1;
-}
-#else
-static inline bool nvme_acpi_storage_d3(struct pci_dev *dev)
-{
-	return false;
-}
-#endif /* CONFIG_ACPI */
-
 static void nvme_async_probe(void *data, async_cookie_t cookie)
 {
 	struct nvme_dev *dev = data;
@@ -2903,7 +2877,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	quirks |= check_vendor_combination_bug(pdev);
 
-	if (!noacpi && nvme_acpi_storage_d3(pdev)) {
+	if (!noacpi && acpi_storage_d3(&pdev->dev)) {
 		/*
 		 * Some systems use a bios work around to ask for D3 on
 		 * platforms that support kernel managed suspend.
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c60745f657e9..dd0dafd21e33 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1004,6 +1004,7 @@ int acpi_dev_resume(struct device *dev);
 int acpi_subsys_runtime_suspend(struct device *dev);
 int acpi_subsys_runtime_resume(struct device *dev);
 int acpi_dev_pm_attach(struct device *dev, bool power_on);
+bool acpi_storage_d3(struct device *dev);
 #else
 static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
@@ -1011,6 +1012,10 @@ static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
 	return 0;
 }
+static inline bool acpi_storage_d3(struct device *dev)
+{
+	return false;
+}
 #endif
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP)

From 6485fc18faa01e8845b1e5bb55118e633f84d1f2 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 9 Jun 2021 13:40:18 -0500
Subject: [PATCH 1867/3292] ACPI: Add quirks for AMD Renoir/Lucienne CPUs to
 force the D3 hint

AMD systems from Renoir and Lucienne require that the NVME controller
is put into D3 over a Modern Standby / suspend-to-idle
cycle.  This is "typically" accomplished using the `StorageD3Enable`
property in the _DSD, but this property was introduced after many
of these systems launched and most OEM systems don't have it in
their BIOS.

On AMD Renoir without these drives going into D3 over suspend-to-idle
the resume will fail with the NVME controller being reset and a trace
like this in the kernel logs:
```
[   83.556118] nvme nvme0: I/O 161 QID 2 timeout, aborting
[   83.556178] nvme nvme0: I/O 162 QID 2 timeout, aborting
[   83.556187] nvme nvme0: I/O 163 QID 2 timeout, aborting
[   83.556196] nvme nvme0: I/O 164 QID 2 timeout, aborting
[   95.332114] nvme nvme0: I/O 25 QID 0 timeout, reset controller
[   95.332843] nvme nvme0: Abort status: 0x371
[   95.332852] nvme nvme0: Abort status: 0x371
[   95.332856] nvme nvme0: Abort status: 0x371
[   95.332859] nvme nvme0: Abort status: 0x371
[   95.332909] PM: dpm_run_callback(): pci_pm_resume+0x0/0xe0 returns -16
[   95.332936] nvme 0000:03:00.0: PM: failed to resume async: error -16
```

The Microsoft documentation for StorageD3Enable mentioned that Windows has
a hardcoded allowlist for D3 support, which was used for these platforms.
Introduce quirks to hardcode them for Linux as well.

As this property is now "standardized", OEM systems using AMD Cezanne and
newer APU's have adopted this property, and quirks like this should not be
necessary.

CC: Shyam-sundar S-k <Shyam-sundar.S-k@amd.com>
CC: Alexander Deucher <Alexander.Deucher@amd.com>
CC: Prike Liang <prike.liang@amd.com>
Link: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/power-management-for-storage-hardware-devices-intro
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Julian Sikorski <belegdol@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/acpi/device_pm.c |  3 +++
 drivers/acpi/internal.h  |  9 +++++++++
 drivers/acpi/x86/utils.c | 25 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index d76ab50c71dc..6dd9bd64903e 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1360,6 +1360,9 @@ bool acpi_storage_d3(struct device *dev)
 	struct acpi_device *adev = ACPI_COMPANION(dev);
 	u8 val;
 
+	if (force_storage_d3())
+		return true;
+
 	if (!adev)
 		return false;
 	if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable",
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index f973bbe90e5e..e29ec463bb07 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -236,6 +236,15 @@ static inline int suspend_nvs_save(void) { return 0; }
 static inline void suspend_nvs_restore(void) {}
 #endif
 
+#ifdef CONFIG_X86
+bool force_storage_d3(void);
+#else
+static inline bool force_storage_d3(void)
+{
+	return false;
+}
+#endif
+
 /*--------------------------------------------------------------------------
 				Device properties
   -------------------------------------------------------------------------- */
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index bdc1ba00aee9..f22f23933063 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -135,3 +135,28 @@ bool acpi_device_always_present(struct acpi_device *adev)
 
 	return ret;
 }
+
+/*
+ * AMD systems from Renoir and Lucienne *require* that the NVME controller
+ * is put into D3 over a Modern Standby / suspend-to-idle cycle.
+ *
+ * This is "typically" accomplished using the `StorageD3Enable`
+ * property in the _DSD that is checked via the `acpi_storage_d3` function
+ * but this property was introduced after many of these systems launched
+ * and most OEM systems don't have it in their BIOS.
+ *
+ * The Microsoft documentation for StorageD3Enable mentioned that Windows has
+ * a hardcoded allowlist for D3 support, which was used for these platforms.
+ *
+ * This allows quirking on Linux in a similar fashion.
+ */
+static const struct x86_cpu_id storage_d3_cpu_ids[] = {
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL),	/* Renoir */
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL),	/* Lucienne */
+	{}
+};
+
+bool force_storage_d3(void)
+{
+	return x86_match_cpu(storage_d3_cpu_ids);
+}

From 120bb3624d55d65145f7c1bf12a839fd323cde29 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Mon, 7 Jun 2021 10:56:56 +0200
Subject: [PATCH 1868/3292] nvme: verify MNAN value if ANA is enabled

The controller is required to have a non-zero MNAN value if it supports
ANA:

   If the controller supports Asymmetric Namespace Access Reporting, then
   this field shall be set to a non-zero value that is less than or equal
   to the NN value.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 127a17b4c13d..98426234d416 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -818,6 +818,13 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	    !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
 		return 0;
 
+	if (!ctrl->max_namespaces ||
+	    ctrl->max_namespaces > le32_to_cpu(id->nn)) {
+		dev_err(ctrl->device,
+			"Invalid MNAN value %u\n", ctrl->max_namespaces);
+		return -EINVAL;
+	}
+
 	ctrl->anacap = id->anacap;
 	ctrl->anatt = id->anatt;
 	ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);

From 2411424143bdfad3027e82fe6a66c5aadce271ee Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Mon, 7 Jun 2021 10:46:51 +0200
Subject: [PATCH 1869/3292] nvme: remove superfluous bio_set_dev in
 nvme_requeue_work

Commit ce86dad222e9 ("nvme-multipath: reset bdev to ns head when
failover") moved the reset code where the bio is added to the
requeue_list for the failover path. But it left the original
bio_set_dev in nvme_requeue_work.

There is a second path to nvme_requee_work. It is via
nvme_ns_head_submit_bio. Though we don't have to set bio->bi_bdev for
this path either, as it points to the correct bdev already.

Let's remove the bio_set_dev. It's updating the bio->bi_bdev with the
same pointer and thus it's unnecessary.

Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 98426234d416..23573fe3fc7d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -435,11 +435,6 @@ static void nvme_requeue_work(struct work_struct *work)
 		next = bio->bi_next;
 		bio->bi_next = NULL;
 
-		/*
-		 * Reset disk to the mpath node and resubmit to select a new
-		 * path.
-		 */
-		bio_set_dev(bio, head->disk->part0);
 		submit_bio_noacct(bio);
 	}
 }

From d399742cd02dca6d1ed17ae7db7a366192516591 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 14 Jun 2021 16:16:07 +0200
Subject: [PATCH 1870/3292] nvme: fix grammar in the CONFIG_NVME_MULTIPATH
 kconfig help text

Fix a singular/plural mismatch in the CONFIG_NVME_MULTIPATH help text.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index a44d49d63968..102292289cdf 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -21,7 +21,7 @@ config NVME_MULTIPATH
 	help
 	   This option enables support for multipath access to NVMe
 	   subsystems.  If this option is enabled only a single
-	   /dev/nvmeXnY device will show up for each NVMe namespaces,
+	   /dev/nvmeXnY device will show up for each NVMe namespace,
 	   even if it is accessible through multiple controllers.
 
 config NVME_HWMON

From e7d4b5493a2d5a6225fc572e01167e12f89c6a42 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 7 Jun 2021 12:54:50 -0700
Subject: [PATCH 1871/3292] nvme: factor out a nvme_validate_passthru_nsid
 helper

Add a helper nvme_validate_passthru_nsid() to validate the nsid that
removes the nsid validation and error message print code from
nvme_user_cmd() and nvme_user_cmd64().

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/ioctl.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 2e7780ea0354..d93928d1e5bd 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -177,6 +177,20 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 			metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
 }
 
+static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
+					struct nvme_ns *ns, __u32 nsid)
+{
+	if (ns && nsid != ns->head->ns_id) {
+		dev_err(ctrl->device,
+			"%s: nsid (%u) in cmd does not match nsid (%u)"
+			"of namespace\n",
+			current->comm, nsid, ns->head->ns_id);
+		return false;
+	}
+
+	return true;
+}
+
 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			struct nvme_passthru_cmd __user *ucmd)
 {
@@ -192,12 +206,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 		return -EFAULT;
 	if (cmd.flags)
 		return -EINVAL;
-	if (ns && cmd.nsid != ns->head->ns_id) {
-		dev_err(ctrl->device,
-			"%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n",
-			current->comm, cmd.nsid, ns->head->ns_id);
+	if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
 		return -EINVAL;
-	}
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = cmd.opcode;
@@ -242,12 +252,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 		return -EFAULT;
 	if (cmd.flags)
 		return -EINVAL;
-	if (ns && cmd.nsid != ns->head->ns_id) {
-		dev_err(ctrl->device,
-			"%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n",
-			current->comm, cmd.nsid, ns->head->ns_id);
+	if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
 		return -EINVAL;
-	}
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = cmd.opcode;

From 522af60cb2f8e3658bda1902fb7f200dcf888a5c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 5 Jun 2021 15:48:16 +0300
Subject: [PATCH 1872/3292] nvme-tcp: fix error codes in nvme_tcp_setup_ctrl()

These error paths currently return success but they should return
-EOPNOTSUPP.

Fixes: 73ffcefcfca0 ("nvme-tcp: check sgl supported by target")
Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5fc6c568c626..6a65b0516180 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1988,11 +1988,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
 		return ret;
 
 	if (ctrl->icdoff) {
+		ret = -EOPNOTSUPP;
 		dev_err(ctrl->device, "icdoff is not supported!\n");
 		goto destroy_admin;
 	}
 
 	if (!(ctrl->sgls & ((1 << 0) | (1 << 1)))) {
+		ret = -EOPNOTSUPP;
 		dev_err(ctrl->device, "Mandatory sgls are not supported!\n");
 		goto destroy_admin;
 	}

From 1b50dd478f495c2112d1dd5655b2317d53a0723b Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Tue, 15 Jun 2021 09:20:08 +0200
Subject: [PATCH 1873/3292] xfrm: delete xfrm4_output_finish
 xfrm6_output_finish declarations

These function declarations are not needed any more.
The definitions were deleted.

Fixes: 2ab6096db2f1 ("xfrm: remove output_finish indirection from xfrm_state_afinfo")
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c8890da00b8a..3a01570410ab 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1579,7 +1579,6 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 }
 
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb);
-int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb);
 int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
 int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol);
 int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
@@ -1603,7 +1602,6 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
 __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
 __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
 int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
-int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb);
 
 #ifdef CONFIG_XFRM
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);

From 30ad6a84f60bdaa32ef5091125299d0d96a330fe Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jun 2021 16:27:20 +0200
Subject: [PATCH 1874/3292] xfrm: avoid compiler warning when ipv6 is disabled

with CONFIG_IPV6=n:
xfrm_output.c:140:12: warning: 'xfrm6_hdr_offset' defined but not used

Fixes: 9acf4d3b9ec1 ("xfrm: ipv6: add xfrm6_hdr_offset helper")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_output.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e14fca1fb003..0b2975ef0668 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -137,6 +137,7 @@ static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IPV6)
 static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr)
 {
 	switch (x->type->proto) {
@@ -151,6 +152,7 @@ static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prev
 
 	return ip6_find_1stfragopt(skb, prevhdr);
 }
+#endif
 
 /* Add encapsulation header.
  *

From 26f1ccdf609a9fb087f49a3782fdc2ade23cde82 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 16 Jun 2021 14:36:11 +0800
Subject: [PATCH 1875/3292] net: hns3: minor refactor related to desc_cb
 handling

desc_cb is used to store mapping and freeing info for the
corresponding desc, which is used in the cleaning process.
There will be more desc_cb type coming up when supporting the
tx bounce buffer, change desc_cb type to bit-wise value in order
to reduce the desc_cb type checking operation in the data path.

Also move the desc_cb type definition to hns3_enet.h because it
is only used in hns3_enet.c, and declare a local variable desc_cb
in hns3_clear_desc() to reduce lines of code.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  7 ----
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 40 +++++++++----------
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  7 ++++
 3 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ba883b0a19f0..5822fc06f767 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -159,13 +159,6 @@ enum HNAE3_PF_CAP_BITS {
 #define ring_ptr_move_bw(ring, p) \
 	((ring)->p = ((ring)->p - 1 + (ring)->desc_num) % (ring)->desc_num)
 
-enum hns_desc_type {
-	DESC_TYPE_UNKNOWN,
-	DESC_TYPE_SKB,
-	DESC_TYPE_FRAGLIST_SKB,
-	DESC_TYPE_PAGE,
-};
-
 struct hnae3_handle;
 
 struct hnae3_queue {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 9a45f3cde6a2..f03a7a962eb0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1413,7 +1413,7 @@ out_hw_tx_csum:
 }
 
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  unsigned int size, enum hns_desc_type type)
+			  unsigned int size, unsigned int type)
 {
 #define HNS3_LIKELY_BD_NUM	1
 
@@ -1425,8 +1425,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 	int k, sizeoflast;
 	dma_addr_t dma;
 
-	if (type == DESC_TYPE_FRAGLIST_SKB ||
-	    type == DESC_TYPE_SKB) {
+	if (type & (DESC_TYPE_FRAGLIST_SKB | DESC_TYPE_SKB)) {
 		struct sk_buff *skb = (struct sk_buff *)priv;
 
 		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
@@ -1704,6 +1703,7 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 
 	for (i = 0; i < ring->desc_num; i++) {
 		struct hns3_desc *desc = &ring->desc[ring->next_to_use];
+		struct hns3_desc_cb *desc_cb;
 
 		memset(desc, 0, sizeof(*desc));
 
@@ -1714,31 +1714,27 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 		/* rollback one */
 		ring_ptr_move_bw(ring, next_to_use);
 
-		if (!ring->desc_cb[ring->next_to_use].dma)
+		desc_cb = &ring->desc_cb[ring->next_to_use];
+
+		if (!desc_cb->dma)
 			continue;
 
 		/* unmap the descriptor dma address */
-		if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB ||
-		    ring->desc_cb[ring->next_to_use].type ==
-		    DESC_TYPE_FRAGLIST_SKB)
-			dma_unmap_single(dev,
-					 ring->desc_cb[ring->next_to_use].dma,
-					ring->desc_cb[ring->next_to_use].length,
-					DMA_TO_DEVICE);
-		else if (ring->desc_cb[ring->next_to_use].length)
-			dma_unmap_page(dev,
-				       ring->desc_cb[ring->next_to_use].dma,
-				       ring->desc_cb[ring->next_to_use].length,
+		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
+			dma_unmap_single(dev, desc_cb->dma, desc_cb->length,
+					 DMA_TO_DEVICE);
+		else if (desc_cb->length)
+			dma_unmap_page(dev, desc_cb->dma, desc_cb->length,
 				       DMA_TO_DEVICE);
 
-		ring->desc_cb[ring->next_to_use].length = 0;
-		ring->desc_cb[ring->next_to_use].dma = 0;
-		ring->desc_cb[ring->next_to_use].type = DESC_TYPE_UNKNOWN;
+		desc_cb->length = 0;
+		desc_cb->dma = 0;
+		desc_cb->type = DESC_TYPE_UNKNOWN;
 	}
 }
 
 static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
-				 struct sk_buff *skb, enum hns_desc_type type)
+				 struct sk_buff *skb, unsigned int type)
 {
 	unsigned int size = skb_headlen(skb);
 	struct sk_buff *frag_skb;
@@ -2859,7 +2855,7 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 static void hns3_free_buffer(struct hns3_enet_ring *ring,
 			     struct hns3_desc_cb *cb, int budget)
 {
-	if (cb->type == DESC_TYPE_SKB)
+	if (cb->type & DESC_TYPE_SKB)
 		napi_consume_skb(cb->priv, budget);
 	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
 		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
@@ -2880,7 +2876,7 @@ static int hns3_map_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb)
 static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
 			      struct hns3_desc_cb *cb)
 {
-	if (cb->type == DESC_TYPE_SKB || cb->type == DESC_TYPE_FRAGLIST_SKB)
+	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
 		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
 				 ring_to_dma_dir(ring));
 	else if (cb->length)
@@ -3037,7 +3033,7 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 
 		desc_cb = &ring->desc_cb[ntc];
 
-		if (desc_cb->type == DESC_TYPE_SKB) {
+		if (desc_cb->type & DESC_TYPE_SKB) {
 			(*pkts)++;
 			(*bytes) += desc_cb->send_bytes;
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 79821c7bdc16..9d18b9430b54 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -299,6 +299,13 @@ struct __packed hns3_desc {
 	};
 };
 
+enum hns3_desc_type {
+	DESC_TYPE_UNKNOWN		= 0,
+	DESC_TYPE_SKB			= 1 << 0,
+	DESC_TYPE_FRAGLIST_SKB		= 1 << 1,
+	DESC_TYPE_PAGE			= 1 << 2,
+};
+
 struct hns3_desc_cb {
 	dma_addr_t dma; /* dma address of this desc */
 	void *buf;      /* cpu addr for a desc */

From 8677d78c3d860c156ccd335e2b97728298c2cbb1 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 16 Jun 2021 14:36:12 +0800
Subject: [PATCH 1876/3292] net: hns3: refactor for hns3_fill_desc() function

Factor out hns3_fill_desc() so that it can be reused in the
tx bounce supporting.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 87 ++++++++++---------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f03a7a962eb0..6fa1ed5c4098 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1412,39 +1412,14 @@ out_hw_tx_csum:
 	return 0;
 }
 
-static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
-			  unsigned int size, unsigned int type)
+static int hns3_fill_desc(struct hns3_enet_ring *ring, dma_addr_t dma,
+			  unsigned int size)
 {
 #define HNS3_LIKELY_BD_NUM	1
 
-	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
 	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
-	struct device *dev = ring_to_dev(ring);
-	skb_frag_t *frag;
 	unsigned int frag_buf_num;
 	int k, sizeoflast;
-	dma_addr_t dma;
-
-	if (type & (DESC_TYPE_FRAGLIST_SKB | DESC_TYPE_SKB)) {
-		struct sk_buff *skb = (struct sk_buff *)priv;
-
-		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
-	} else {
-		frag = (skb_frag_t *)priv;
-		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
-	}
-
-	if (unlikely(dma_mapping_error(dev, dma))) {
-		u64_stats_update_begin(&ring->syncp);
-		ring->stats.sw_err_cnt++;
-		u64_stats_update_end(&ring->syncp);
-		return -ENOMEM;
-	}
-
-	desc_cb->priv = priv;
-	desc_cb->length = size;
-	desc_cb->dma = dma;
-	desc_cb->type = type;
 
 	if (likely(size <= HNS3_MAX_BD_SIZE)) {
 		desc->addr = cpu_to_le64(dma);
@@ -1480,6 +1455,47 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 	return frag_buf_num;
 }
 
+static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv,
+				  unsigned int type)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	struct device *dev = ring_to_dev(ring);
+	unsigned int size;
+	dma_addr_t dma;
+
+	if (type & (DESC_TYPE_FRAGLIST_SKB | DESC_TYPE_SKB)) {
+		struct sk_buff *skb = (struct sk_buff *)priv;
+
+		size = skb_headlen(skb);
+		if (!size)
+			return 0;
+
+		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	} else {
+		skb_frag_t *frag = (skb_frag_t *)priv;
+
+		size = skb_frag_size(frag);
+		if (!size)
+			return 0;
+
+		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
+	}
+
+	if (unlikely(dma_mapping_error(dev, dma))) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	desc_cb->priv = priv;
+	desc_cb->length = size;
+	desc_cb->dma = dma;
+	desc_cb->type = type;
+
+	return hns3_fill_desc(ring, dma, size);
+}
+
 static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
 				    unsigned int bd_num)
 {
@@ -1736,26 +1752,19 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
 				 struct sk_buff *skb, unsigned int type)
 {
-	unsigned int size = skb_headlen(skb);
 	struct sk_buff *frag_skb;
 	int i, ret, bd_num = 0;
 
-	if (size) {
-		ret = hns3_fill_desc(ring, skb, size, type);
-		if (unlikely(ret < 0))
-			return ret;
+	ret = hns3_map_and_fill_desc(ring, skb, type);
+	if (unlikely(ret < 0))
+		return ret;
 
-		bd_num += ret;
-	}
+	bd_num += ret;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-		size = skb_frag_size(frag);
-		if (!size)
-			continue;
-
-		ret = hns3_fill_desc(ring, frag, size, DESC_TYPE_PAGE);
+		ret = hns3_map_and_fill_desc(ring, frag, DESC_TYPE_PAGE);
 		if (unlikely(ret < 0))
 			return ret;
 

From 907676b130711fd1f627824559e92259db2061d1 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 16 Jun 2021 14:36:13 +0800
Subject: [PATCH 1877/3292] net: hns3: use tx bounce buffer for small packets

when the packet or frag size is small, it causes both security and
performance issue. As dma can't map sub-page, this means some extra
kernel data is visible to devices. On the other hand, the overhead
of dma map and unmap is huge when IOMMU is on.

So add a queue based tx shared bounce buffer to memcpy the small
packet when the len of the xmitted skb is below tx_copybreak.
Add tx_spare_buf_size module param to set the size of tx spare
buffer, and add set/get_tunable to set or query the tx_copybreak.

The throughtput improves from 30 Gbps to 90+ Gbps when running 16
netperf threads with 32KB UDP message size when IOMMU is in the
strict mode(tx_copybreak = 2000 and mtu = 1500).

Suggested-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  52 ++++
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 289 +++++++++++++++++-
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  43 ++-
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    |  51 ++++
 4 files changed, 420 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index c512a63c423b..a24a75c47cad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -392,6 +392,56 @@ static void hns3_dbg_fill_content(char *content, u16 len,
 	*pos++ = '\0';
 }
 
+static const struct hns3_dbg_item tx_spare_info_items[] = {
+	{ "QUEUE_ID", 2 },
+	{ "COPYBREAK", 2 },
+	{ "LEN", 7 },
+	{ "NTU", 4 },
+	{ "NTC", 4 },
+	{ "LTC", 4 },
+	{ "DMA", 17 },
+};
+
+static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf,
+				   int len, u32 ring_num, int *pos)
+{
+	char data_str[ARRAY_SIZE(tx_spare_info_items)][HNS3_DBG_DATA_STR_LEN];
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	char *result[ARRAY_SIZE(tx_spare_info_items)];
+	char content[HNS3_DBG_INFO_LEN];
+	u32 i, j;
+
+	if (!tx_spare) {
+		*pos += scnprintf(buf + *pos, len - *pos,
+				  "tx spare buffer is not enabled\n");
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tx_spare_info_items); i++)
+		result[i] = &data_str[i][0];
+
+	*pos += scnprintf(buf + *pos, len - *pos, "tx spare buffer info\n");
+	hns3_dbg_fill_content(content, sizeof(content), tx_spare_info_items,
+			      NULL, ARRAY_SIZE(tx_spare_info_items));
+	*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+
+	for (i = 0; i < ring_num; i++) {
+		j = 0;
+		sprintf(result[j++], "%8u", i);
+		sprintf(result[j++], "%9u", ring->tx_copybreak);
+		sprintf(result[j++], "%3u", tx_spare->len);
+		sprintf(result[j++], "%3u", tx_spare->next_to_use);
+		sprintf(result[j++], "%3u", tx_spare->next_to_clean);
+		sprintf(result[j++], "%3u", tx_spare->last_to_clean);
+		sprintf(result[j++], "%pad", &tx_spare->dma);
+		hns3_dbg_fill_content(content, sizeof(content),
+				      tx_spare_info_items,
+				      (const char **)result,
+				      ARRAY_SIZE(tx_spare_info_items));
+		*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
+	}
+}
+
 static const struct hns3_dbg_item rx_queue_info_items[] = {
 	{ "QUEUE_ID", 2 },
 	{ "BD_NUM", 2 },
@@ -593,6 +643,8 @@ static int hns3_dbg_tx_queue_info(struct hnae3_handle *h,
 		pos += scnprintf(buf + pos, len - pos, "%s", content);
 	}
 
+	hns3_dbg_tx_spare_info(ring, buf, len, h->kinfo.num_tqps, &pos);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 6fa1ed5c4098..e5466daac1c4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -53,6 +53,10 @@ static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, " Network interface message level setting");
 
+static unsigned int tx_spare_buf_size;
+module_param(tx_spare_buf_size, uint, 0400);
+MODULE_PARM_DESC(tx_spare_buf_size, "Size used to allocate tx spare buffer");
+
 #define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
 			   NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
 
@@ -941,6 +945,177 @@ void hns3_request_update_promisc_mode(struct hnae3_handle *handle)
 		ops->request_update_promisc_mode(handle);
 }
 
+static u32 hns3_tx_spare_space(struct hns3_enet_ring *ring)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	u32 ntc, ntu;
+
+	/* This smp_load_acquire() pairs with smp_store_release() in
+	 * hns3_tx_spare_update() called in tx desc cleaning process.
+	 */
+	ntc = smp_load_acquire(&tx_spare->last_to_clean);
+	ntu = tx_spare->next_to_use;
+
+	if (ntc > ntu)
+		return ntc - ntu - 1;
+
+	/* The free tx buffer is divided into two part, so pick the
+	 * larger one.
+	 */
+	return (ntc > (tx_spare->len - ntu) ? ntc :
+			(tx_spare->len - ntu)) - 1;
+}
+
+static void hns3_tx_spare_update(struct hns3_enet_ring *ring)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+
+	if (!tx_spare ||
+	    tx_spare->last_to_clean == tx_spare->next_to_clean)
+		return;
+
+	/* This smp_store_release() pairs with smp_load_acquire() in
+	 * hns3_tx_spare_space() called in xmit process.
+	 */
+	smp_store_release(&tx_spare->last_to_clean,
+			  tx_spare->next_to_clean);
+}
+
+static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring,
+				   struct sk_buff *skb,
+				   u32 space)
+{
+	u32 len = skb->len <= ring->tx_copybreak ? skb->len :
+				skb_headlen(skb);
+
+	if (len > ring->tx_copybreak)
+		return false;
+
+	if (ALIGN(len, dma_get_cache_alignment()) > space) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_spare_full++;
+		u64_stats_update_end(&ring->syncp);
+		return false;
+	}
+
+	return true;
+}
+
+static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
+{
+	struct hns3_tx_spare *tx_spare;
+	struct page *page;
+	dma_addr_t dma;
+	int order;
+
+	if (!tx_spare_buf_size)
+		return;
+
+	order = get_order(tx_spare_buf_size);
+	tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare),
+				GFP_KERNEL);
+	if (!tx_spare) {
+		/* The driver still work without the tx spare buffer */
+		dev_warn(ring_to_dev(ring), "failed to allocate hns3_tx_spare\n");
+		return;
+	}
+
+	page = alloc_pages_node(dev_to_node(ring_to_dev(ring)),
+				GFP_KERNEL, order);
+	if (!page) {
+		dev_warn(ring_to_dev(ring), "failed to allocate tx spare pages\n");
+		devm_kfree(ring_to_dev(ring), tx_spare);
+		return;
+	}
+
+	dma = dma_map_page(ring_to_dev(ring), page, 0,
+			   PAGE_SIZE << order, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring_to_dev(ring), dma)) {
+		dev_warn(ring_to_dev(ring), "failed to map pages for tx spare\n");
+		put_page(page);
+		devm_kfree(ring_to_dev(ring), tx_spare);
+		return;
+	}
+
+	tx_spare->dma = dma;
+	tx_spare->buf = page_address(page);
+	tx_spare->len = PAGE_SIZE << order;
+	ring->tx_spare = tx_spare;
+}
+
+/* Use hns3_tx_spare_space() to make sure there is enough buffer
+ * before calling below function to allocate tx buffer.
+ */
+static void *hns3_tx_spare_alloc(struct hns3_enet_ring *ring,
+				 unsigned int size, dma_addr_t *dma,
+				 u32 *cb_len)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	u32 ntu = tx_spare->next_to_use;
+
+	size = ALIGN(size, dma_get_cache_alignment());
+	*cb_len = size;
+
+	/* Tx spare buffer wraps back here because the end of
+	 * freed tx buffer is not enough.
+	 */
+	if (ntu + size > tx_spare->len) {
+		*cb_len += (tx_spare->len - ntu);
+		ntu = 0;
+	}
+
+	tx_spare->next_to_use = ntu + size;
+	if (tx_spare->next_to_use == tx_spare->len)
+		tx_spare->next_to_use = 0;
+
+	*dma = tx_spare->dma + ntu;
+
+	return tx_spare->buf + ntu;
+}
+
+static void hns3_tx_spare_rollback(struct hns3_enet_ring *ring, u32 len)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+
+	if (len > tx_spare->next_to_use) {
+		len -= tx_spare->next_to_use;
+		tx_spare->next_to_use = tx_spare->len - len;
+	} else {
+		tx_spare->next_to_use -= len;
+	}
+}
+
+static void hns3_tx_spare_reclaim_cb(struct hns3_enet_ring *ring,
+				     struct hns3_desc_cb *cb)
+{
+	struct hns3_tx_spare *tx_spare = ring->tx_spare;
+	u32 ntc = tx_spare->next_to_clean;
+	u32 len = cb->length;
+
+	tx_spare->next_to_clean += len;
+
+	if (tx_spare->next_to_clean >= tx_spare->len) {
+		tx_spare->next_to_clean -= tx_spare->len;
+
+		if (tx_spare->next_to_clean) {
+			ntc = 0;
+			len = tx_spare->next_to_clean;
+		}
+	}
+
+	/* This tx spare buffer is only really reclaimed after calling
+	 * hns3_tx_spare_update(), so it is still safe to use the info in
+	 * the tx buffer to do the dma sync after tx_spare->next_to_clean
+	 * is moved forword.
+	 */
+	if (cb->type & (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) {
+		dma_addr_t dma = tx_spare->dma + ntc;
+
+		dma_sync_single_for_cpu(ring_to_dev(ring), dma, len,
+					DMA_TO_DEVICE);
+	}
+}
+
 static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs,
 			u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes)
 {
@@ -1471,6 +1646,11 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv,
 			return 0;
 
 		dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	} else if (type & DESC_TYPE_BOUNCE_HEAD) {
+		/* Head data has been filled in hns3_handle_tx_bounce(),
+		 * just return 0 here.
+		 */
+		return 0;
 	} else {
 		skb_frag_t *frag = (skb_frag_t *)priv;
 
@@ -1739,6 +1919,9 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig)
 		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
 			dma_unmap_single(dev, desc_cb->dma, desc_cb->length,
 					 DMA_TO_DEVICE);
+		else if (desc_cb->type &
+			 (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL))
+			hns3_tx_spare_rollback(ring, desc_cb->length);
 		else if (desc_cb->length)
 			dma_unmap_page(dev, desc_cb->dma, desc_cb->length,
 				       DMA_TO_DEVICE);
@@ -1816,6 +1999,79 @@ static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
 	desc->tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_TSYN_B));
 }
 
+static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring,
+				 struct sk_buff *skb)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	unsigned int type = DESC_TYPE_BOUNCE_HEAD;
+	unsigned int size = skb_headlen(skb);
+	dma_addr_t dma;
+	int bd_num = 0;
+	u32 cb_len;
+	void *buf;
+	int ret;
+
+	if (skb->len <= ring->tx_copybreak) {
+		size = skb->len;
+		type = DESC_TYPE_BOUNCE_ALL;
+	}
+
+	/* hns3_can_use_tx_bounce() is called to ensure the below
+	 * function can always return the tx buffer.
+	 */
+	buf = hns3_tx_spare_alloc(ring, size, &dma, &cb_len);
+
+	ret = skb_copy_bits(skb, 0, buf, size);
+	if (unlikely(ret < 0)) {
+		hns3_tx_spare_rollback(ring, cb_len);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.copy_bits_err++;
+		u64_stats_update_end(&ring->syncp);
+		return ret;
+	}
+
+	desc_cb->priv = skb;
+	desc_cb->length = cb_len;
+	desc_cb->dma = dma;
+	desc_cb->type = type;
+
+	bd_num += hns3_fill_desc(ring, dma, size);
+
+	if (type == DESC_TYPE_BOUNCE_HEAD) {
+		ret = hns3_fill_skb_to_desc(ring, skb,
+					    DESC_TYPE_BOUNCE_HEAD);
+		if (unlikely(ret < 0))
+			return ret;
+
+		bd_num += ret;
+	}
+
+	dma_sync_single_for_device(ring_to_dev(ring), dma, size,
+				   DMA_TO_DEVICE);
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_bounce++;
+	u64_stats_update_end(&ring->syncp);
+	return bd_num;
+}
+
+static int hns3_handle_desc_filling(struct hns3_enet_ring *ring,
+				    struct sk_buff *skb)
+{
+	u32 space;
+
+	if (!ring->tx_spare)
+		goto out;
+
+	space = hns3_tx_spare_space(ring);
+
+	if (hns3_can_use_tx_bounce(ring, skb, space))
+		return hns3_handle_tx_bounce(ring, skb);
+
+out:
+	return hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
+}
+
 netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -1862,7 +2118,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	 * zero, which is unlikely, and 'ret > 0' means how many tx desc
 	 * need to be notified to the hw.
 	 */
-	ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
+	ret = hns3_handle_desc_filling(ring, skb);
 	if (unlikely(ret <= 0))
 		goto fill_err;
 
@@ -2064,6 +2320,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_drop += ring->stats.tx_tso_err;
 			tx_drop += ring->stats.over_max_recursion;
 			tx_drop += ring->stats.hw_limitation;
+			tx_drop += ring->stats.copy_bits_err;
 			tx_errors += ring->stats.sw_err_cnt;
 			tx_errors += ring->stats.tx_vlan_err;
 			tx_errors += ring->stats.tx_l4_proto_err;
@@ -2071,6 +2328,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_errors += ring->stats.tx_tso_err;
 			tx_errors += ring->stats.over_max_recursion;
 			tx_errors += ring->stats.hw_limitation;
+			tx_errors += ring->stats.copy_bits_err;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		/* fetch the rx stats */
@@ -2864,7 +3122,8 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 static void hns3_free_buffer(struct hns3_enet_ring *ring,
 			     struct hns3_desc_cb *cb, int budget)
 {
-	if (cb->type & DESC_TYPE_SKB)
+	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
+			DESC_TYPE_BOUNCE_ALL))
 		napi_consume_skb(cb->priv, budget);
 	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
 		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
@@ -2888,9 +3147,11 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
 	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB))
 		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
 				 ring_to_dma_dir(ring));
-	else if (cb->length)
+	else if ((cb->type & DESC_TYPE_PAGE) && cb->length)
 		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
 			       ring_to_dma_dir(ring));
+	else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD))
+		hns3_tx_spare_reclaim_cb(ring, cb);
 }
 
 static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i)
@@ -3042,7 +3303,8 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 
 		desc_cb = &ring->desc_cb[ntc];
 
-		if (desc_cb->type & DESC_TYPE_SKB) {
+		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_ALL |
+				     DESC_TYPE_BOUNCE_HEAD)) {
 			(*pkts)++;
 			(*bytes) += desc_cb->send_bytes;
 		}
@@ -3065,6 +3327,9 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 	 * ring_space called by hns3_nic_net_xmit.
 	 */
 	smp_store_release(&ring->next_to_clean, ntc);
+
+	hns3_tx_spare_update(ring);
+
 	return true;
 }
 
@@ -4245,6 +4510,8 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 		ring = &priv->ring[q->tqp_index];
 		desc_num = priv->ae_handle->kinfo.num_tx_desc;
 		ring->queue_index = q->tqp_index;
+		ring->tx_copybreak = priv->tx_copybreak;
+		ring->last_to_use = 0;
 	} else {
 		ring = &priv->ring[q->tqp_index + queue_num];
 		desc_num = priv->ae_handle->kinfo.num_rx_desc;
@@ -4262,7 +4529,6 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 	ring->desc_num = desc_num;
 	ring->next_to_use = 0;
 	ring->next_to_clean = 0;
-	ring->last_to_use = 0;
 }
 
 static void hns3_queue_to_ring(struct hnae3_queue *tqp,
@@ -4322,6 +4588,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
 		ret = hns3_alloc_ring_buffers(ring);
 		if (ret)
 			goto out_with_desc;
+	} else {
+		hns3_init_tx_spare_buffer(ring);
 	}
 
 	return 0;
@@ -4344,9 +4612,18 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
 	ring->next_to_use = 0;
 	ring->last_to_use = 0;
 	ring->pending_buf = 0;
-	if (ring->skb) {
+	if (!HNAE3_IS_TX_RING(ring) && ring->skb) {
 		dev_kfree_skb_any(ring->skb);
 		ring->skb = NULL;
+	} else if (HNAE3_IS_TX_RING(ring) && ring->tx_spare) {
+		struct hns3_tx_spare *tx_spare = ring->tx_spare;
+
+		dma_unmap_page(ring_to_dev(ring), tx_spare->dma, tx_spare->len,
+			       DMA_TO_DEVICE);
+		free_pages((unsigned long)tx_spare->buf,
+			   get_order(tx_spare->len));
+		devm_kfree(ring_to_dev(ring), tx_spare);
+		ring->tx_spare = NULL;
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 9d18b9430b54..8d147c1dab2c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -304,6 +304,8 @@ enum hns3_desc_type {
 	DESC_TYPE_SKB			= 1 << 0,
 	DESC_TYPE_FRAGLIST_SKB		= 1 << 1,
 	DESC_TYPE_PAGE			= 1 << 2,
+	DESC_TYPE_BOUNCE_ALL		= 1 << 3,
+	DESC_TYPE_BOUNCE_HEAD		= 1 << 4,
 };
 
 struct hns3_desc_cb {
@@ -405,6 +407,9 @@ struct ring_stats {
 			u64 tx_tso_err;
 			u64 over_max_recursion;
 			u64 hw_limitation;
+			u64 tx_bounce;
+			u64 tx_spare_full;
+			u64 copy_bits_err;
 		};
 		struct {
 			u64 rx_pkts;
@@ -423,6 +428,15 @@ struct ring_stats {
 	};
 };
 
+struct hns3_tx_spare {
+	dma_addr_t dma;
+	void *buf;
+	u32 next_to_use;
+	u32 next_to_clean;
+	u32 last_to_clean;
+	u32 len;
+};
+
 struct hns3_enet_ring {
 	struct hns3_desc *desc; /* dma map address space */
 	struct hns3_desc_cb *desc_cb;
@@ -445,18 +459,28 @@ struct hns3_enet_ring {
 	 * next_to_use
 	 */
 	int next_to_clean;
-	union {
-		int last_to_use;	/* last idx used by xmit */
-		u32 pull_len;		/* memcpy len for current rx packet */
-	};
-	u32 frag_num;
-	void *va; /* first buffer address for current packet */
-
 	u32 flag;          /* ring attribute */
 
 	int pending_buf;
-	struct sk_buff *skb;
-	struct sk_buff *tail_skb;
+	union {
+		/* for Tx ring */
+		struct {
+			u32 fd_qb_tx_sample;
+			int last_to_use;        /* last idx used by xmit */
+			u32 tx_copybreak;
+			struct hns3_tx_spare *tx_spare;
+		};
+
+		/* for Rx ring */
+		struct {
+			u32 pull_len;   /* memcpy len for current rx packet */
+			u32 frag_num;
+			/* first buffer address for current packet */
+			unsigned char *va;
+			struct sk_buff *skb;
+			struct sk_buff *tail_skb;
+		};
+	};
 } ____cacheline_internodealigned_in_smp;
 
 enum hns3_flow_level_range {
@@ -540,6 +564,7 @@ struct hns3_nic_priv {
 
 	struct hns3_enet_coalesce tx_coal;
 	struct hns3_enet_coalesce rx_coal;
+	u32 tx_copybreak;
 };
 
 union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index acef5435d7b7..f306de16d73f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -46,6 +46,9 @@ static const struct hns3_stats hns3_txq_stats[] = {
 	HNS3_TQP_STAT("tso_err", tx_tso_err),
 	HNS3_TQP_STAT("over_max_recursion", over_max_recursion),
 	HNS3_TQP_STAT("hw_limitation", hw_limitation),
+	HNS3_TQP_STAT("bounce", tx_bounce),
+	HNS3_TQP_STAT("spare_full", tx_spare_full),
+	HNS3_TQP_STAT("copy_bits_err", copy_bits_err),
 };
 
 #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
@@ -1592,6 +1595,50 @@ static int hns3_set_priv_flags(struct net_device *netdev, u32 pflags)
 	return 0;
 }
 
+static int hns3_get_tunable(struct net_device *netdev,
+			    const struct ethtool_tunable *tuna,
+			    void *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_TX_COPYBREAK:
+		/* all the tx rings have the same tx_copybreak */
+		*(u32 *)data = priv->tx_copybreak;
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
+static int hns3_set_tunable(struct net_device *netdev,
+			    const struct ethtool_tunable *tuna,
+			    const void *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int i, ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_TX_COPYBREAK:
+		priv->tx_copybreak = *(u32 *)data;
+
+		for (i = 0; i < h->kinfo.num_tqps; i++)
+			priv->ring[i].tx_copybreak = priv->tx_copybreak;
+
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
 #define HNS3_ETHTOOL_COALESCE	(ETHTOOL_COALESCE_USECS |		\
 				 ETHTOOL_COALESCE_USE_ADAPTIVE |	\
 				 ETHTOOL_COALESCE_RX_USECS_HIGH |	\
@@ -1635,6 +1682,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.set_msglevel = hns3_set_msglevel,
 	.get_priv_flags = hns3_get_priv_flags,
 	.set_priv_flags = hns3_set_priv_flags,
+	.get_tunable = hns3_get_tunable,
+	.set_tunable = hns3_set_tunable,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
@@ -1674,6 +1723,8 @@ static const struct ethtool_ops hns3_ethtool_ops = {
 	.get_priv_flags = hns3_get_priv_flags,
 	.set_priv_flags = hns3_set_priv_flags,
 	.get_ts_info = hns3_get_ts_info,
+	.get_tunable = hns3_get_tunable,
+	.set_tunable = hns3_set_tunable,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)

From 1a00197b7d2fe57f0be93037d5090e19a9b178c8 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Wed, 16 Jun 2021 14:36:14 +0800
Subject: [PATCH 1878/3292] net: hns3: add support to query tx spare buffer
 size for pf

Add support to query tx spare buffer size from configuration
file, and use this info to do spare buffer initialization when
the module parameter 'tx_spare_buf_size' is not specified.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  1 +
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    |  7 +++++--
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  2 ++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 14 ++++++++++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  2 ++
 5 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 5822fc06f767..0b202f4def83 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -760,6 +760,7 @@ struct hnae3_knic_private_info {
 	u16 rx_buf_len;
 	u16 num_tx_desc;
 	u16 num_rx_desc;
+	u32 tx_spare_buf_size;
 
 	struct hnae3_tc_info tc_info;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index e5466daac1c4..d86b3735aa9f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1005,13 +1005,16 @@ static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
 {
 	struct hns3_tx_spare *tx_spare;
 	struct page *page;
+	u32 alloc_size;
 	dma_addr_t dma;
 	int order;
 
-	if (!tx_spare_buf_size)
+	alloc_size = tx_spare_buf_size ? tx_spare_buf_size :
+		     ring->tqp->handle->kinfo.tx_spare_buf_size;
+	if (!alloc_size)
 		return;
 
-	order = get_order(tx_spare_buf_size);
+	order = get_order(alloc_size);
 	tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare),
 				GFP_KERNEL);
 	if (!tx_spare) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 51be76f1795e..a322dfeba5cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -542,6 +542,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_UMV_TBL_SPACE_M	GENMASK(31, 16)
 #define HCLGE_CFG_PF_RSS_SIZE_S		0
 #define HCLGE_CFG_PF_RSS_SIZE_M		GENMASK(3, 0)
+#define HCLGE_CFG_TX_SPARE_BUF_SIZE_S	4
+#define HCLGE_CFG_TX_SPARE_BUF_SIZE_M	GENMASK(15, 4)
 
 #define HCLGE_CFG_CMD_CNT		4
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f6fdf93c8cad..f3e482ab3c71 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1279,6 +1279,7 @@ static u32 hclge_get_max_speed(u16 speed_ability)
 
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
+#define HCLGE_TX_SPARE_SIZE_UNIT		4096
 #define SPEED_ABILITY_EXT_SHIFT			8
 
 	struct hclge_cfg_param_cmd *req;
@@ -1358,6 +1359,15 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 	cfg->pf_rss_size_max = cfg->pf_rss_size_max ?
 			       1U << cfg->pf_rss_size_max :
 			       cfg->vf_rss_size_max;
+
+	/* The unit of the tx spare buffer size queried from configuration
+	 * file is HCLGE_TX_SPARE_SIZE_UNIT(4096) bytes, so a conversion is
+	 * needed here.
+	 */
+	cfg->tx_spare_buf_size = hnae3_get_field(__le32_to_cpu(req->param[2]),
+						 HCLGE_CFG_TX_SPARE_BUF_SIZE_M,
+						 HCLGE_CFG_TX_SPARE_BUF_SIZE_S);
+	cfg->tx_spare_buf_size *= HCLGE_TX_SPARE_SIZE_UNIT;
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1539,6 +1549,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->tc_max = cfg.tc_num;
 	hdev->tm_info.hw_pfc_map = 0;
 	hdev->wanted_umv_size = cfg.umv_space;
+	hdev->tx_spare_buf_size = cfg.tx_spare_buf_size;
 	if (cfg.vlan_fliter_cap == HCLGE_VLAN_FLTR_CAN_MDF)
 		set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
 
@@ -1736,6 +1747,7 @@ static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps,
 	kinfo->num_rx_desc = num_rx_desc;
 
 	kinfo->rx_buf_len = hdev->rx_buf_len;
+	kinfo->tx_spare_buf_size = hdev->tx_spare_buf_size;
 
 	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
@@ -11059,6 +11071,8 @@ static void hclge_info_show(struct hclge_dev *hdev)
 		 hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
 	dev_info(dev, "MQPRIO %s\n",
 		 hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
+	dev_info(dev, "Default tx spare buffer size: %u\n",
+		 hdev->tx_spare_buf_size);
 
 	dev_info(dev, "PF info end.\n");
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 02852738ce21..3d3352491dba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -384,6 +384,7 @@ struct hclge_cfg {
 	u8 mac_addr[ETH_ALEN];
 	u8 default_speed;
 	u32 numa_node_map;
+	u32 tx_spare_buf_size;
 	u16 speed_ability;
 	u16 umv_space;
 };
@@ -848,6 +849,7 @@ struct hclge_dev {
 	u16 alloc_rss_size;		/* Allocated RSS task queue */
 	u16 vf_rss_size_max;		/* HW defined VF max RSS task queue */
 	u16 pf_rss_size_max;		/* HW defined PF max RSS task queue */
+	u32 tx_spare_buf_size;		/* HW defined TX spare buffer size */
 
 	u16 fdir_pf_filter_count; /* Num of guaranteed filters for this PF */
 	u16 num_alloc_vport;		/* Num vports this driver supports */

From 7459775e9f658a2d5f3ff9d4d087e86f4d4e5b83 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 16 Jun 2021 14:36:15 +0800
Subject: [PATCH 1879/3292] net: hns3: support dma_map_sg() for multi frags skb

Using the queue based tx buffer, it is also possible to allocate a
sgl buffer, and use skb_to_sgvec() to convert the skb to the sgvec
in order to support the dma_map_sg() to decreases the overhead of
IOMMU mapping and unmapping.

Firstly, it reduces the number of buffers. For example, a tcp skb
may have a 66-byte header and 3 fragments of 4328, 32768, and 28064
bytes. With this patch, dma_map_sg() will combine them into two
buffers, 66-bytes header and one 65160-bytes fragment by using IOMMU.

Secondly, it reduces the number of dma mapping and unmapping. All the
original 4 buffers are mapped only once rather than 4 times.

The throughput improves above 10% when running single thread of iperf
using TCP when IOMMU is in strict mode.

Suggested-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 111 +++++++++++++++++-
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |   4 +
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    |   3 +
 3 files changed, 113 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index d86b3735aa9f..f60a344a6a9f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -57,6 +57,15 @@ static unsigned int tx_spare_buf_size;
 module_param(tx_spare_buf_size, uint, 0400);
 MODULE_PARM_DESC(tx_spare_buf_size, "Size used to allocate tx spare buffer");
 
+static unsigned int tx_sgl = 1;
+module_param(tx_sgl, uint, 0600);
+MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to optimize the IOMMU mapping");
+
+#define HNS3_SGL_SIZE(nfrag)	(sizeof(struct scatterlist) * (nfrag) +	\
+				 sizeof(struct sg_table))
+#define HNS3_MAX_SGL_SIZE	ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM),\
+				      dma_get_cache_alignment())
+
 #define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
 			   NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
 
@@ -1001,6 +1010,25 @@ static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring,
 	return true;
 }
 
+static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring,
+				struct sk_buff *skb,
+				u32 space)
+{
+	if (skb->len <= ring->tx_copybreak || !tx_sgl ||
+	    (!skb_has_frag_list(skb) &&
+	     skb_shinfo(skb)->nr_frags < tx_sgl))
+		return false;
+
+	if (space < HNS3_MAX_SGL_SIZE) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_spare_full++;
+		u64_stats_update_end(&ring->syncp);
+		return false;
+	}
+
+	return true;
+}
+
 static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring)
 {
 	struct hns3_tx_spare *tx_spare;
@@ -1108,14 +1136,19 @@ static void hns3_tx_spare_reclaim_cb(struct hns3_enet_ring *ring,
 
 	/* This tx spare buffer is only really reclaimed after calling
 	 * hns3_tx_spare_update(), so it is still safe to use the info in
-	 * the tx buffer to do the dma sync after tx_spare->next_to_clean
-	 * is moved forword.
+	 * the tx buffer to do the dma sync or sg unmapping after
+	 * tx_spare->next_to_clean is moved forword.
 	 */
 	if (cb->type & (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) {
 		dma_addr_t dma = tx_spare->dma + ntc;
 
 		dma_sync_single_for_cpu(ring_to_dev(ring), dma, len,
 					DMA_TO_DEVICE);
+	} else {
+		struct sg_table *sgt = tx_spare->buf + ntc;
+
+		dma_unmap_sg(ring_to_dev(ring), sgt->sgl, sgt->orig_nents,
+			     DMA_TO_DEVICE);
 	}
 }
 
@@ -2058,6 +2091,65 @@ static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring,
 	return bd_num;
 }
 
+static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring,
+			      struct sk_buff *skb)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	u32 nfrag = skb_shinfo(skb)->nr_frags + 1;
+	struct sg_table *sgt;
+	int i, bd_num = 0;
+	dma_addr_t dma;
+	u32 cb_len;
+	int nents;
+
+	if (skb_has_frag_list(skb))
+		nfrag = HNS3_MAX_TSO_BD_NUM;
+
+	/* hns3_can_use_tx_sgl() is called to ensure the below
+	 * function can always return the tx buffer.
+	 */
+	sgt = hns3_tx_spare_alloc(ring, HNS3_SGL_SIZE(nfrag),
+				  &dma, &cb_len);
+
+	/* scatterlist follows by the sg table */
+	sgt->sgl = (struct scatterlist *)(sgt + 1);
+	sg_init_table(sgt->sgl, nfrag);
+	nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len);
+	if (unlikely(nents < 0)) {
+		hns3_tx_spare_rollback(ring, cb_len);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.skb2sgl_err++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	sgt->orig_nents = nents;
+	sgt->nents = dma_map_sg(ring_to_dev(ring), sgt->sgl, sgt->orig_nents,
+				DMA_TO_DEVICE);
+	if (unlikely(!sgt->nents)) {
+		hns3_tx_spare_rollback(ring, cb_len);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.map_sg_err++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	desc_cb->priv = skb;
+	desc_cb->length = cb_len;
+	desc_cb->dma = dma;
+	desc_cb->type = DESC_TYPE_SGL_SKB;
+
+	for (i = 0; i < sgt->nents; i++)
+		bd_num += hns3_fill_desc(ring, sg_dma_address(sgt->sgl + i),
+					 sg_dma_len(sgt->sgl + i));
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_sgl++;
+	u64_stats_update_end(&ring->syncp);
+
+	return bd_num;
+}
+
 static int hns3_handle_desc_filling(struct hns3_enet_ring *ring,
 				    struct sk_buff *skb)
 {
@@ -2068,6 +2160,9 @@ static int hns3_handle_desc_filling(struct hns3_enet_ring *ring,
 
 	space = hns3_tx_spare_space(ring);
 
+	if (hns3_can_use_tx_sgl(ring, skb, space))
+		return hns3_handle_tx_sgl(ring, skb);
+
 	if (hns3_can_use_tx_bounce(ring, skb, space))
 		return hns3_handle_tx_bounce(ring, skb);
 
@@ -2324,6 +2419,8 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_drop += ring->stats.over_max_recursion;
 			tx_drop += ring->stats.hw_limitation;
 			tx_drop += ring->stats.copy_bits_err;
+			tx_drop += ring->stats.skb2sgl_err;
+			tx_drop += ring->stats.map_sg_err;
 			tx_errors += ring->stats.sw_err_cnt;
 			tx_errors += ring->stats.tx_vlan_err;
 			tx_errors += ring->stats.tx_l4_proto_err;
@@ -2332,6 +2429,8 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_errors += ring->stats.over_max_recursion;
 			tx_errors += ring->stats.hw_limitation;
 			tx_errors += ring->stats.copy_bits_err;
+			tx_errors += ring->stats.skb2sgl_err;
+			tx_errors += ring->stats.map_sg_err;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		/* fetch the rx stats */
@@ -3126,7 +3225,7 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
 			     struct hns3_desc_cb *cb, int budget)
 {
 	if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
-			DESC_TYPE_BOUNCE_ALL))
+			DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
 		napi_consume_skb(cb->priv, budget);
 	else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
 		__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
@@ -3153,7 +3252,8 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
 	else if ((cb->type & DESC_TYPE_PAGE) && cb->length)
 		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
 			       ring_to_dma_dir(ring));
-	else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD))
+	else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD |
+			     DESC_TYPE_SGL_SKB))
 		hns3_tx_spare_reclaim_cb(ring, cb);
 }
 
@@ -3307,7 +3407,8 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 		desc_cb = &ring->desc_cb[ntc];
 
 		if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_ALL |
-				     DESC_TYPE_BOUNCE_HEAD)) {
+				     DESC_TYPE_BOUNCE_HEAD |
+				     DESC_TYPE_SGL_SKB)) {
 			(*pkts)++;
 			(*bytes) += desc_cb->send_bytes;
 		}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 8d147c1dab2c..22ae291471aa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -306,6 +306,7 @@ enum hns3_desc_type {
 	DESC_TYPE_PAGE			= 1 << 2,
 	DESC_TYPE_BOUNCE_ALL		= 1 << 3,
 	DESC_TYPE_BOUNCE_HEAD		= 1 << 4,
+	DESC_TYPE_SGL_SKB		= 1 << 5,
 };
 
 struct hns3_desc_cb {
@@ -410,6 +411,9 @@ struct ring_stats {
 			u64 tx_bounce;
 			u64 tx_spare_full;
 			u64 copy_bits_err;
+			u64 tx_sgl;
+			u64 skb2sgl_err;
+			u64 map_sg_err;
 		};
 		struct {
 			u64 rx_pkts;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index f306de16d73f..d7852716aaad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -49,6 +49,9 @@ static const struct hns3_stats hns3_txq_stats[] = {
 	HNS3_TQP_STAT("bounce", tx_bounce),
 	HNS3_TQP_STAT("spare_full", tx_spare_full),
 	HNS3_TQP_STAT("copy_bits_err", copy_bits_err),
+	HNS3_TQP_STAT("sgl", tx_sgl),
+	HNS3_TQP_STAT("skb2sgl_err", skb2sgl_err),
+	HNS3_TQP_STAT("map_sg_err", map_sg_err),
 };
 
 #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)

From fa7711b888f24ee9291d90f8fbdaccfc80ed72c7 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 16 Jun 2021 14:36:16 +0800
Subject: [PATCH 1880/3292] net: hns3: optimize the rx page reuse handling
 process

Current rx page offset only reset to zero when all the below
conditions are satisfied:
1. rx page is only owned by driver.
2. rx page is reusable.
3. the page offset that is above to be given to the stack has
reached the end of the page.

If the page offset is over the hns3_buf_size(), it means the
buffer below the offset of the page is usable when the above
condition 1 & 2 are satisfied, so page offset can be reset to
zero instead of increasing the offset. We may be able to always
reuse the first 4K buffer of a 64K page, which means we can
limit the hot buffer size as much as possible.

The above optimization is a side effect when refacting the
rx page reuse handling in order to support the rx copybreak.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 46 +++++++++----------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f60a344a6a9f..98e8a548edb8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3525,7 +3525,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
 
 static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
 {
-	return (page_count(cb->priv) - cb->pagecnt_bias) == 1;
+	return page_count(cb->priv) == cb->pagecnt_bias;
 }
 
 static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
@@ -3533,40 +3533,40 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 				struct hns3_desc_cb *desc_cb)
 {
 	struct hns3_desc *desc = &ring->desc[ring->next_to_clean];
+	u32 frag_offset = desc_cb->page_offset + pull_len;
 	int size = le16_to_cpu(desc->rx.size);
 	u32 truesize = hns3_buf_size(ring);
+	u32 frag_size = size - pull_len;
 
-	desc_cb->pagecnt_bias--;
-	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
-			size - pull_len, truesize);
+	/* Avoid re-using remote or pfmem page */
+	if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
+		goto out;
 
-	/* Avoid re-using remote and pfmemalloc pages, or the stack is still
-	 * using the page when page_offset rollback to zero, flag default
-	 * unreuse
+	/* Stack is not using and current page_offset is non-zero, we can
+	 * reuse from the zero offset.
 	 */
-	if (!dev_page_is_reusable(desc_cb->priv) ||
-	    (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) {
-		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
-		return;
-	}
-
-	/* Move offset up to the next cache line */
-	desc_cb->page_offset += truesize;
-
-	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
-		desc_cb->reuse_flag = 1;
-	} else if (hns3_can_reuse_page(desc_cb)) {
-		desc_cb->reuse_flag = 1;
+	if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) {
 		desc_cb->page_offset = 0;
-	} else if (desc_cb->pagecnt_bias) {
-		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
-		return;
+		desc_cb->reuse_flag = 1;
+	} else if (desc_cb->page_offset + truesize * 2 <=
+		   hns3_page_size(ring)) {
+		desc_cb->page_offset += truesize;
+		desc_cb->reuse_flag = 1;
 	}
 
+out:
+	desc_cb->pagecnt_bias--;
+
 	if (unlikely(!desc_cb->pagecnt_bias)) {
 		page_ref_add(desc_cb->priv, USHRT_MAX);
 		desc_cb->pagecnt_bias = USHRT_MAX;
 	}
+
+	skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+			frag_size, truesize);
+
+	if (unlikely(!desc_cb->reuse_flag))
+		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
 }
 
 static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)

From 99f6b5fb5f63cf69c6e56bba8e5492c98c521a63 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 16 Jun 2021 14:36:17 +0800
Subject: [PATCH 1881/3292] net: hns3: use bounce buffer when rx page can not
 be reused

Currently rx page will be reused to receive future packet when
the stack releases the previous skb quickly. If the old page
can not be reused, a new page will be allocated and mapped,
which comsumes a lot of cpu when IOMMU is in the strict mode,
especially when the application and irq/NAPI happens to run on
the same cpu.

So allocate a new frag to memcpy the data to avoid the costly
IOMMU unmapping/mapping operation, and add "frag_alloc_err"
and "frag_alloc" stats in "ethtool -S ethX" cmd.

The throughput improves above 50% when running single thread of
iperf using TCP when IOMMU is in strict mode and iperf shares the
same cpu with irq/NAPI(rx_copybreak = 2048 and mtu = 1500).

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  2 ++
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 23 +++++++++++++++++++
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  4 ++++
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    | 12 ++++++++++
 4 files changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index a24a75c47cad..34b6cd904a1a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -450,6 +450,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = {
 	{ "HEAD", 2 },
 	{ "FBDNUM", 2 },
 	{ "PKTNUM", 2 },
+	{ "COPYBREAK", 2 },
 	{ "RING_EN", 2 },
 	{ "RX_RING_EN", 2 },
 	{ "BASE_ADDR", 10 },
@@ -481,6 +482,7 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring,
 
 	sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base +
 		HNS3_RING_RX_RING_PKTNUM_RECORD_REG));
+	sprintf(result[j++], "%9u", ring->rx_copybreak);
 
 	sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base +
 		HNS3_RING_EN_REG) ? "on" : "off");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 98e8a548edb8..51bbf5f760c5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3552,6 +3552,28 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 		   hns3_page_size(ring)) {
 		desc_cb->page_offset += truesize;
 		desc_cb->reuse_flag = 1;
+	} else if (frag_size <= ring->rx_copybreak) {
+		void *frag = napi_alloc_frag(frag_size);
+
+		if (unlikely(!frag)) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.frag_alloc_err++;
+			u64_stats_update_end(&ring->syncp);
+
+			hns3_rl_err(ring_to_netdev(ring),
+				    "failed to allocate rx frag\n");
+			goto out;
+		}
+
+		desc_cb->reuse_flag = 1;
+		memcpy(frag, desc_cb->buf + frag_offset, frag_size);
+		skb_add_rx_frag(skb, i, virt_to_page(frag),
+				offset_in_page(frag), frag_size, frag_size);
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.frag_alloc++;
+		u64_stats_update_end(&ring->syncp);
+		return;
 	}
 
 out:
@@ -4620,6 +4642,7 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 		ring = &priv->ring[q->tqp_index + queue_num];
 		desc_num = priv->ae_handle->kinfo.num_rx_desc;
 		ring->queue_index = q->tqp_index;
+		ring->rx_copybreak = priv->rx_copybreak;
 	}
 
 	hnae3_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 22ae291471aa..15af3d93857b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -427,6 +427,8 @@ struct ring_stats {
 			u64 csum_complete;
 			u64 rx_multicast;
 			u64 non_reuse_pg;
+			u64 frag_alloc_err;
+			u64 frag_alloc;
 		};
 		__le16 csum;
 	};
@@ -478,6 +480,7 @@ struct hns3_enet_ring {
 		/* for Rx ring */
 		struct {
 			u32 pull_len;   /* memcpy len for current rx packet */
+			u32 rx_copybreak;
 			u32 frag_num;
 			/* first buffer address for current packet */
 			unsigned char *va;
@@ -569,6 +572,7 @@ struct hns3_nic_priv {
 	struct hns3_enet_coalesce tx_coal;
 	struct hns3_enet_coalesce rx_coal;
 	u32 tx_copybreak;
+	u32 rx_copybreak;
 };
 
 union l3_hdr_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index d7852716aaad..82061ab6930f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -71,6 +71,8 @@ static const struct hns3_stats hns3_rxq_stats[] = {
 	HNS3_TQP_STAT("csum_complete", csum_complete),
 	HNS3_TQP_STAT("multicast", rx_multicast),
 	HNS3_TQP_STAT("non_reuse_pg", non_reuse_pg),
+	HNS3_TQP_STAT("frag_alloc_err", frag_alloc_err),
+	HNS3_TQP_STAT("frag_alloc", frag_alloc),
 };
 
 #define HNS3_PRIV_FLAGS_LEN ARRAY_SIZE(hns3_priv_flags)
@@ -1610,6 +1612,9 @@ static int hns3_get_tunable(struct net_device *netdev,
 		/* all the tx rings have the same tx_copybreak */
 		*(u32 *)data = priv->tx_copybreak;
 		break;
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = priv->rx_copybreak;
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -1633,6 +1638,13 @@ static int hns3_set_tunable(struct net_device *netdev,
 		for (i = 0; i < h->kinfo.num_tqps; i++)
 			priv->ring[i].tx_copybreak = priv->tx_copybreak;
 
+		break;
+	case ETHTOOL_RX_COPYBREAK:
+		priv->rx_copybreak = *(u32 *)data;
+
+		for (i = h->kinfo.num_tqps; i < h->kinfo.num_tqps * 2; i++)
+			priv->ring[i].rx_copybreak = priv->rx_copybreak;
+
 		break;
 	default:
 		ret = -EOPNOTSUPP;

From 1d0bbbf22b744153044a5e98c19df866dfbd18ea Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 16 Jun 2021 13:53:09 +0800
Subject: [PATCH 1882/3292] net: mhi_net: make mhi_wwan_ops static

This symbol is not used outside of net.c, so marks it static.

Fix the following sparse warning:

drivers/net/mhi/net.c:385:23: warning: symbol 'mhi_wwan_ops' was not
declared. Should it be static?

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index 64af1e518484..832d9de42f62 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -382,7 +382,7 @@ static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
 	dev_set_drvdata(&mhi_dev->dev, NULL);
 }
 
-const struct wwan_ops mhi_wwan_ops = {
+static const struct wwan_ops mhi_wwan_ops = {
 	.owner = THIS_MODULE,
 	.priv_size = sizeof(struct mhi_net_dev),
 	.setup = mhi_net_setup,

From 1b3fc771769c9f9418b23dd5676ab25a215d247d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 15 Jun 2021 23:06:04 -0700
Subject: [PATCH 1883/3292] inet_diag: add support for tw_mark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Timewait sockets have included mark since approx 4.18.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Jon Maxwell <jmaxwell37@gmail.com>
Fixes: 00483690552c ("tcp: Add mark for TIMEWAIT sockets")
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jon Maxwell <jmaxwell37@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 93474b1bea4e..e65f4ef024a4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -416,7 +416,7 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
 static int inet_twsk_diag_fill(struct sock *sk,
 			       struct sk_buff *skb,
 			       struct netlink_callback *cb,
-			       u16 nlmsg_flags)
+			       u16 nlmsg_flags, bool net_admin)
 {
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 	struct inet_diag_msg *r;
@@ -444,6 +444,12 @@ static int inet_twsk_diag_fill(struct sock *sk,
 	r->idiag_uid	      = 0;
 	r->idiag_inode	      = 0;
 
+	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+				     tw->tw_mark)) {
+		nlmsg_cancel(skb, nlh);
+		return -EMSGSIZE;
+	}
+
 	nlmsg_end(skb, nlh);
 	return 0;
 }
@@ -494,7 +500,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			u16 nlmsg_flags, bool net_admin)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
-		return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags);
+		return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV)
 		return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
@@ -801,6 +807,8 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 		entry.mark = sk->sk_mark;
 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
 		entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+	else if (sk->sk_state == TCP_TIME_WAIT)
+		entry.mark = inet_twsk(sk)->tw_mark;
 	else
 		entry.mark = 0;
 #ifdef CONFIG_SOCK_CGROUP_DATA

From 4d1fb7cde0ccc6000cafb72d9079de1504e3cb2a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 15 Jun 2021 20:33:38 -0700
Subject: [PATCH 1884/3292] ethtool: add a stricter length check

There has been a few errors in the ethtool reply size calculations,
most of those are hard to trigger during basic testing because of
skb size rounding up and netdev names being shorter than max.
Add a more precise check.

This change will affect the value of payload length displayed in
case of -EMSGSIZE but that should be okay, "payload length" isn't
a well defined term here.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/netlink.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 88d8a0243f35..a7346346114f 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -315,9 +315,9 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
 	struct ethnl_req_info *req_info = NULL;
 	const u8 cmd = info->genlhdr->cmd;
 	const struct ethnl_request_ops *ops;
+	int hdr_len, reply_len;
 	struct sk_buff *rskb;
 	void *reply_payload;
-	int reply_len;
 	int ret;
 
 	ops = ethnl_default_requests[cmd];
@@ -346,15 +346,20 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
 	ret = ops->reply_size(req_info, reply_data);
 	if (ret < 0)
 		goto err_cleanup;
-	reply_len = ret + ethnl_reply_header_size();
+	reply_len = ret;
 	ret = -ENOMEM;
-	rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd,
+	rskb = ethnl_reply_init(reply_len + ethnl_reply_header_size(),
+				req_info->dev, ops->reply_cmd,
 				ops->hdr_attr, info, &reply_payload);
 	if (!rskb)
 		goto err_cleanup;
+	hdr_len = rskb->len;
 	ret = ops->fill_reply(rskb, req_info, reply_data);
 	if (ret < 0)
 		goto err_msg;
+	WARN_ONCE(rskb->len - hdr_len > reply_len,
+		  "ethnl cmd %d: calculated reply length %d, but consumed %d\n",
+		  cmd, reply_len, rskb->len - hdr_len);
 	if (ops->cleanup_data)
 		ops->cleanup_data(reply_data);
 

From 56b57b809f9ce05ba34ba5089a54eef8b06b8a92 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 16 Jun 2021 12:21:06 +0800
Subject: [PATCH 1885/3292] qlcnic: Use list_for_each_entry() to simplify code
 in qlcnic_main.c

Convert list_for_each() to list_for_each_entry() where
applicable. This simplifies the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 96b947fde646..8a31ce29ecfc 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -319,10 +319,8 @@ int qlcnic_read_mac_addr(struct qlcnic_adapter *adapter)
 static void qlcnic_delete_adapter_mac(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_mac_vlan_list *cur;
-	struct list_head *head;
 
-	list_for_each(head, &adapter->mac_list) {
-		cur = list_entry(head, struct qlcnic_mac_vlan_list, list);
+	list_for_each_entry(cur, &adapter->mac_list, list) {
 		if (ether_addr_equal_unaligned(adapter->mac_addr, cur->mac_addr)) {
 			qlcnic_sre_macaddr_change(adapter, cur->mac_addr,
 						  0, QLCNIC_MAC_DEL);

From 95d359ed5a0c4b4c10b9d9986bc203c83d6c8a8c Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 16 Jun 2021 12:07:27 +0800
Subject: [PATCH 1886/3292] net: iosm: add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_pcie.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
index ac6baddfde61..7f7d364d3a51 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c
@@ -322,6 +322,7 @@ static const struct pci_device_id iosm_ipc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, INTEL_CP_DEVICE_7560_ID) },
 	{}
 };
+MODULE_DEVICE_TABLE(pci, iosm_ipc_ids);
 
 /* Enter sleep in s2idle case
  */

From 786f0dc627e6bc50dd57a7d4a421912224b0a061 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:27 +0800
Subject: [PATCH 1887/3292] net: cosa: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 2369ca250cd6..297ea340ca3d 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -608,7 +608,6 @@ err_out:
 	return err;
 }
 
-
 /*---------- network device ---------- */
 
 static int cosa_net_attach(struct net_device *dev, unsigned short encoding,
@@ -840,7 +839,6 @@ static int chrdev_rx_done(struct channel_data *chan)
 	return 1;
 }
 
-
 static ssize_t cosa_write(struct file *file,
 	const char __user *buf, size_t count, loff_t *ppos)
 {
@@ -988,7 +986,6 @@ static int cosa_fasync(struct inode *inode, struct file *file, int on)
 }
 #endif
 
-
 /* ---------- Ioctls ---------- */
 
 /*
@@ -1034,7 +1031,6 @@ static inline int cosa_download(struct cosa_data *cosa, void __user *arg)
 	if (d.len < 0 || d.len > COSA_MAX_FIRMWARE_SIZE)
 		return -EINVAL;
 
-
 	/* If something fails, force the user to reset the card */
 	cosa->firmware_status &= ~(COSA_FW_RESET|COSA_FW_DOWNLOAD);
 
@@ -1197,7 +1193,6 @@ static long cosa_chardev_ioctl(struct file *file, unsigned int cmd,
 	return ret;
 }
 
-
 /*---------- HW layer interface ---------- */
 
 /*
@@ -1372,7 +1367,6 @@ static int cosa_dma_able(struct channel_data *chan, char *buf, int len)
 	return 1;
 }
 
-
 /* ---------- The SRP/COSA ROM monitor functions ---------- */
 
 /*
@@ -1422,7 +1416,6 @@ static int download(struct cosa_data *cosa, const char __user *microcode, int le
 	return 0;
 }
 
-
 /*
  * Starting microcode is done via the "g" command of the SRP monitor.
  * The chat should be the following: "g" "g=" "<addr><CR>"
@@ -1537,7 +1530,6 @@ static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring)
 	return id;
 }
 
-
 /* ---------- Auxiliary routines for COSA/SRP monitor ---------- */
 
 /*
@@ -1623,7 +1615,6 @@ static int puthexnumber(struct cosa_data *cosa, int number)
 	return 0;
 }
 
-
 /* ---------- Interrupt routines ---------- */
 
 /*
@@ -1968,7 +1959,6 @@ again:
 	return IRQ_HANDLED;
 }
 
-
 /* ---------- I/O debugging routines ---------- */
 /*
  * These routines can be used to monitor COSA/SRP I/O and to printk()

From 0569a3d41667d8f3ec7764639c51b15d0e736488 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:28 +0800
Subject: [PATCH 1888/3292] net: cosa: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 297ea340ca3d..372dffc2d355 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -995,6 +995,7 @@ static int cosa_fasync(struct inode *inode, struct file *file, int on)
 static inline int cosa_reset(struct cosa_data *cosa)
 {
 	char idstring[COSA_MAX_ID_STRING];
+
 	if (cosa->usage > 1)
 		pr_info("cosa%d: WARNING: reset requested with cosa->usage > 1 (%d). Odd things may happen.\n",
 			cosa->num, cosa->usage);
@@ -1109,6 +1110,7 @@ static inline int cosa_start(struct cosa_data *cosa, int address)
 static inline int cosa_getidstr(struct cosa_data *cosa, char __user *string)
 {
 	int l = strlen(cosa->id_string)+1;
+
 	if (copy_to_user(string, cosa->id_string, l))
 		return -EFAULT;
 	return l;
@@ -1118,6 +1120,7 @@ static inline int cosa_getidstr(struct cosa_data *cosa, char __user *string)
 static inline int cosa_gettype(struct cosa_data *cosa, char __user *string)
 {
 	int l = strlen(cosa->type)+1;
+
 	if (copy_to_user(string, cosa->type, l))
 		return -EFAULT;
 	return l;
@@ -1127,6 +1130,7 @@ static int cosa_ioctl_common(struct cosa_data *cosa,
 	struct channel_data *channel, unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
+
 	switch (cmd) {
 	case COSAIORSET:	/* Reset the device */
 		if (!capable(CAP_NET_ADMIN))
@@ -1172,6 +1176,7 @@ static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	int rv;
 	struct channel_data *chan = dev_to_chan(dev);
+
 	rv = cosa_ioctl_common(chan->cosa, chan, cmd,
 			       (unsigned long)ifr->ifr_data);
 	if (rv != -ENOIOCTLCMD)
@@ -1356,6 +1361,7 @@ static int cosa_dma_able(struct channel_data *chan, char *buf, int len)
 {
 	static int count;
 	unsigned long b = (unsigned long)buf;
+
 	if (b+len >= MAX_DMA_ADDRESS)
 		return 0;
 	if ((b^ (b+len)) & 0x10000) {
@@ -1468,6 +1474,7 @@ static int readmem(struct cosa_data *cosa, char __user *microcode, int length, i
 	while (length--) {
 		char c;
 		int i;
+
 		if ((i=get_wait_data(cosa)) == -1) {
 			pr_info("0x%04x bytes remaining\n", length);
 			return -11;
@@ -1545,6 +1552,7 @@ static int get_wait_data(struct cosa_data *cosa)
 		/* read data and return them */
 		if (cosa_getstatus(cosa) & SR_RX_RDY) {
 			short r;
+
 			r = cosa_getdata8(cosa);
 #if 0
 			pr_info("get_wait_data returning after %d retries\n",
@@ -1568,6 +1576,7 @@ static int get_wait_data(struct cosa_data *cosa)
 static int put_wait_data(struct cosa_data *cosa, int data)
 {
 	int retries = 1000;
+
 	while (--retries) {
 		/* read data and return them */
 		if (cosa_getstatus(cosa) & SR_TX_RDY) {
@@ -1659,6 +1668,7 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 	if (!test_bit(IRQBIT, &cosa->rxtx)) {
 		/* flow control, see the comment above */
 		int i=0;
+
 		if (!cosa->txbitmap) {
 			pr_warn("%s: No channel wants data in TX IRQ. Expect DMA timeout.\n",
 				cosa->name);
@@ -1743,6 +1753,7 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 	if (cosa->busmaster) {
 		unsigned long addr = virt_to_bus(cosa->txbuf);
 		int count=0;
+
 		pr_info("busmaster IRQ\n");
 		while (!(cosa_getstatus(cosa)&SR_TX_RDY)) {
 			count++;
@@ -1873,6 +1884,7 @@ reject:		/* Reject the packet */
 static inline void eot_interrupt(struct cosa_data *cosa, int status)
 {
 	unsigned long flags, flags1;
+
 	spin_lock_irqsave(&cosa->lock, flags);
 	flags1 = claim_dma_lock();
 	disable_dma(cosa->dma);
@@ -1880,6 +1892,7 @@ static inline void eot_interrupt(struct cosa_data *cosa, int status)
 	release_dma_lock(flags1);
 	if (test_bit(TXBIT, &cosa->rxtx)) {
 		struct channel_data *chan = cosa->chan+cosa->txchan;
+
 		if (chan->tx_done)
 			if (chan->tx_done(chan, cosa->txsize))
 				clear_bit(chan->num, &cosa->txbitmap);
@@ -1887,6 +1900,7 @@ static inline void eot_interrupt(struct cosa_data *cosa, int status)
 #ifdef DEBUG_DATA
 	{
 		int i;
+
 		pr_info("cosa%dc%d: done rx(0x%x)",
 			cosa->num, cosa->rxchan->num, cosa->rxsize);
 		for (i=0; i<cosa->rxsize; i++)
@@ -1970,6 +1984,7 @@ again:
 static void debug_status_in(struct cosa_data *cosa, int status)
 {
 	char *s;
+
 	switch (status & SR_CMD_FROM_SRP_MASK) {
 	case SR_UP_REQUEST:
 		s = "RX_REQ";

From 77282db510d9fe4d77c1d79fb4563d5368e1d2b2 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:29 +0800
Subject: [PATCH 1889/3292] net: cosa: fix the code style issue about "foo*
 bar"

Fix the checkpatch error as "foo* bar" should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 372dffc2d355..c051c6120e30 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -337,7 +337,7 @@ static void debug_status_in(struct cosa_data *cosa, int status);
 static void debug_status_out(struct cosa_data *cosa, int status);
 #endif
 
-static inline struct channel_data* dev_to_chan(struct net_device *dev)
+static inline struct channel_data *dev_to_chan(struct net_device *dev)
 {
 	return (struct channel_data *)dev_to_hdlc(dev)->priv;
 }

From 2076b3e61a323e38256be44289aa32ae12ecf79a Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:30 +0800
Subject: [PATCH 1890/3292] net: cosa: replace comparison to NULL with
 "!chan->rx_skb"

According to the chackpatch.pl, comparison to NULL could
be written "!chan->rx_skb".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index c051c6120e30..7b57233aeec9 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -719,7 +719,7 @@ static char *cosa_net_setup_rx(struct channel_data *chan, int size)
 	 */
 	kfree_skb(chan->rx_skb);
 	chan->rx_skb = dev_alloc_skb(size);
-	if (chan->rx_skb == NULL) {
+	if (!chan->rx_skb) {
 		pr_notice("%s: Memory squeeze, dropping packet\n", chan->name);
 		chan->netdev->stats.rx_dropped++;
 		return NULL;
@@ -783,7 +783,7 @@ static ssize_t cosa_read(struct file *file,
 		return -ERESTARTSYS;
 	
 	chan->rxdata = kmalloc(COSA_MTU, GFP_DMA|GFP_KERNEL);
-	if (chan->rxdata == NULL) {
+	if (!chan->rxdata) {
 		mutex_unlock(&chan->rlock);
 		return -ENOMEM;
 	}
@@ -861,7 +861,7 @@ static ssize_t cosa_write(struct file *file,
 	
 	/* Allocate the buffer */
 	kbuf = kmalloc(count, GFP_KERNEL|GFP_DMA);
-	if (kbuf == NULL) {
+	if (!kbuf) {
 		up(&chan->wsem);
 		return -ENOMEM;
 	}

From b4d5f1e2cdebb436eea2137833f5cd267674875d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:31 +0800
Subject: [PATCH 1891/3292] net: cosa: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 7b57233aeec9..9b57b3a6c035 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -355,7 +355,8 @@ static int __init cosa_init(void)
 			goto out;
 		}
 	} else {
-		if (!(cosa_major=register_chrdev(0, "cosa", &cosa_fops))) {
+		cosa_major = register_chrdev(0, "cosa", &cosa_fops);
+		if (!cosa_major) {
 			pr_warn("unable to register chardev\n");
 			err = -EIO;
 			goto out;
@@ -563,7 +564,8 @@ static int cosa_probe(int base, int irq, int dma)
 		sema_init(&chan->wsem, 1);
 
 		/* Register the network interface */
-		if (!(chan->netdev = alloc_hdlcdev(chan))) {
+		chan->netdev = alloc_hdlcdev(chan);
+		if (!chan->netdev) {
 			pr_warn("%s: alloc_hdlcdev failed\n", chan->name);
 			err = -ENOMEM;
 			goto err_hdlcdev;
@@ -925,15 +927,15 @@ static int cosa_open(struct inode *inode, struct file *file)
 	int ret = 0;
 
 	mutex_lock(&cosa_chardev_mutex);
-	if ((n=iminor(file_inode(file))>>CARD_MINOR_BITS)
-		>= nr_cards) {
+	n = iminor(file_inode(file)) >> CARD_MINOR_BITS;
+	if (n >= nr_cards) {
 		ret = -ENODEV;
 		goto out;
 	}
 	cosa = cosa_cards+n;
 
-	if ((n=iminor(file_inode(file))
-		& ((1<<CARD_MINOR_BITS)-1)) >= cosa->nchannels) {
+	n = iminor(file_inode(file)) & ((1 << CARD_MINOR_BITS) - 1);
+	if (n >= cosa->nchannels) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -1095,7 +1097,8 @@ static inline int cosa_start(struct cosa_data *cosa, int address)
 		return -EPERM;
 	}
 	cosa->firmware_status &= ~COSA_FW_RESET;
-	if ((i=startmicrocode(cosa, address)) < 0) {
+	i = startmicrocode(cosa, address);
+	if (i < 0) {
 		pr_notice("cosa%d: start microcode at 0x%04x failed: %d\n",
 			  cosa->num, address, i);
 		return -EIO;
@@ -1475,7 +1478,8 @@ static int readmem(struct cosa_data *cosa, char __user *microcode, int length, i
 		char c;
 		int i;
 
-		if ((i=get_wait_data(cosa)) == -1) {
+		i = get_wait_data(cosa);
+		if (i == -1) {
 			pr_info("0x%04x bytes remaining\n", length);
 			return -11;
 		}
@@ -1523,9 +1527,10 @@ static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring)
 	 * the port returns '\r', '\n' or '\x2e' permanently.
 	 */
 	for (i=0; i<COSA_MAX_ID_STRING-1; i++, prev=curr) {
-		if ((curr = get_wait_data(cosa)) == -1) {
+		curr = get_wait_data(cosa);
+		if (curr == -1)
 			return -1;
-		}
+
 		curr &= 0xff;
 		if (curr != '\r' && curr != '\n' && curr != 0x2e)
 			idstring[id++] = curr;

From c0a963e25df9bce8e0b9dbe4b0d27b78338b6e1d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:32 +0800
Subject: [PATCH 1892/3292] net: cosa: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 89 ++++++++++++++++--------------------------
 1 file changed, 33 insertions(+), 56 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 9b57b3a6c035..c09c07902495 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -1,13 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /* $Id: cosa.c,v 1.31 2000/03/08 17:47:16 kas Exp $ */
 
-/*
- *  Copyright (C) 1995-1997  Jan "Yenya" Kasprzak <kas@fi.muni.cz>
+/*  Copyright (C) 1995-1997  Jan "Yenya" Kasprzak <kas@fi.muni.cz>
  *  Generic HDLC port Copyright (C) 2008 Krzysztof Halasa <khc@pm.waw.pl>
  */
 
-/*
- * The driver for the SRP and COSA synchronous serial cards.
+/* The driver for the SRP and COSA synchronous serial cards.
  *
  * HARDWARE INFO
  *
@@ -152,28 +150,25 @@ struct cosa_data {
 	char *type;				/* card type */
 };
 
-/*
- * Define this if you want all the possible ports to be autoprobed.
+/* Define this if you want all the possible ports to be autoprobed.
  * It is here but it probably is not a good idea to use this.
  */
-/* #define COSA_ISA_AUTOPROBE	1 */
+/* #define COSA_ISA_AUTOPROBE	1*/
 
-/*
- * Character device major number. 117 was allocated for us.
+/* Character device major number. 117 was allocated for us.
  * The value of 0 means to allocate a first free one.
  */
 static DEFINE_MUTEX(cosa_chardev_mutex);
 static int cosa_major = 117;
 
-/*
- * Encoding of the minor numbers:
+/* Encoding of the minor numbers:
  * The lowest CARD_MINOR_BITS bits means the channel on the single card,
  * the highest bits means the card number.
  */
 #define CARD_MINOR_BITS	4	/* How many bits in minor number are reserved
-				 * for the single card */
-/*
- * The following depends on CARD_MINOR_BITS. Unfortunately, the "MODULE_STRING"
+				 * for the single card
+				 */
+/* The following depends on CARD_MINOR_BITS. Unfortunately, the "MODULE_STRING"
  * macro doesn't like anything other than the raw number as an argument :-(
  */
 #define MAX_CARDS	16
@@ -184,8 +179,7 @@ static int cosa_major = 117;
 #define DRIVER_TXMAP_SHIFT	2
 #define DRIVER_TXMAP_MASK	0x0c	/* FIXME: 0xfc for 8-channel version */
 
-/*
- * for cosa->rxtx - indicates whether either transmit or receive is
+/* for cosa->rxtx - indicates whether either transmit or receive is
  * in progress. These values are mean number of the bit.
  */
 #define TXBIT 0
@@ -439,7 +433,8 @@ static int cosa_probe(int base, int irq, int dma)
 		return -1;
 	}
 	/* I/O address should be between 0x100 and 0x3ff and should be
-	 * multiple of 8. */
+	 * multiple of 8.
+	 */
 	if (base < 0x100 || base > 0x3ff || base & 0x7) {
 		pr_info("invalid I/O address 0x%x\n", base);
 		return -1;
@@ -450,7 +445,8 @@ static int cosa_probe(int base, int irq, int dma)
 		return -1;
 	}
 	/* and finally, on 16-bit COSA DMA should be 4-7 and 
-	 * I/O base should not be multiple of 0x10 */
+	 * I/O base should not be multiple of 0x10
+	 */
 	if (((base & 0x8) && dma < 4) || (!(base & 0x8) && dma > 3)) {
 		pr_info("8/16 bit base and DMA mismatch (base=0x%x, dma=%d)\n",
 			base, dma);
@@ -496,8 +492,7 @@ static int cosa_probe(int base, int irq, int dma)
 		unsigned long irqs;
 /*		pr_info("IRQ autoprobe\n"); */
 		irqs = probe_irq_on();
-		/* 
-		 * Enable interrupt on tx buffer empty (it sure is) 
+		/* Enable interrupt on tx buffer empty (it sure is)
 		 * really sure ?
 		 * FIXME: When this code is not used as module, we should
 		 * probably call udelay() instead of the interruptible sleep.
@@ -715,8 +710,7 @@ static int cosa_net_close(struct net_device *dev)
 
 static char *cosa_net_setup_rx(struct channel_data *chan, int size)
 {
-	/*
-	 * We can safely fall back to non-dma-able memory, because we have
+	/* We can safely fall back to non-dma-able memory, because we have
 	 * the cosa->bouncebuf pre-allocated.
 	 */
 	kfree_skb(chan->rx_skb);
@@ -990,8 +984,7 @@ static int cosa_fasync(struct inode *inode, struct file *file, int on)
 
 /* ---------- Ioctls ---------- */
 
-/*
- * Ioctl subroutines can safely be made inline, because they are called
+/* Ioctl subroutines can safely be made inline, because they are called
  * only from cosa_ioctl().
  */
 static inline int cosa_reset(struct cosa_data *cosa)
@@ -1203,8 +1196,7 @@ static long cosa_chardev_ioctl(struct file *file, unsigned int cmd,
 
 /*---------- HW layer interface ---------- */
 
-/*
- * The higher layer can bind itself to the HW layer by setting the callbacks
+/* The higher layer can bind itself to the HW layer by setting the callbacks
  * in the channel_data structure and by using these routines.
  */
 static void cosa_enable_rx(struct channel_data *chan)
@@ -1223,8 +1215,7 @@ static void cosa_disable_rx(struct channel_data *chan)
 		put_driver_status(cosa);
 }
 
-/*
- * FIXME: This routine probably should check for cosa_start_tx() called when
+/* FIXME: This routine probably should check for cosa_start_tx() called when
  * the previous transmit is still unfinished. In this case the non-zero
  * return value should indicate to the caller that the queuing(sp?) up
  * the transmit has failed.
@@ -1319,8 +1310,7 @@ static void put_driver_status_nolock(struct cosa_data *cosa)
 #endif
 }
 
-/*
- * The "kickme" function: When the DMA times out, this is called to
+/* The "kickme" function: When the DMA times out, this is called to
  * clean up the driver status.
  * FIXME: Preliminary support, the interface is probably wrong.
  */
@@ -1355,8 +1345,7 @@ static void cosa_kick(struct cosa_data *cosa)
 	spin_unlock_irqrestore(&cosa->lock, flags);
 }
 
-/*
- * Check if the whole buffer is DMA-able. It means it is below the 16M of
+/* Check if the whole buffer is DMA-able. It means it is below the 16M of
  * physical memory and doesn't span the 64k boundary. For now it seems
  * SKB's never do this, but we'll check this anyway.
  */
@@ -1378,8 +1367,7 @@ static int cosa_dma_able(struct channel_data *chan, char *buf, int len)
 
 /* ---------- The SRP/COSA ROM monitor functions ---------- */
 
-/*
- * Downloading SRP microcode: say "w" to SRP monitor, it answers by "w=",
+/* Downloading SRP microcode: say "w" to SRP monitor, it answers by "w=",
  * drivers need to say 4-digit hex number meaning start address of the microcode
  * separated by a single space. Monitor replies by saying " =". Now driver
  * has to write 4-digit hex number meaning the last byte address ended
@@ -1425,8 +1413,7 @@ static int download(struct cosa_data *cosa, const char __user *microcode, int le
 	return 0;
 }
 
-/*
- * Starting microcode is done via the "g" command of the SRP monitor.
+/* Starting microcode is done via the "g" command of the SRP monitor.
  * The chat should be the following: "g" "g=" "<addr><CR>"
  * "<CR><CR><LF><CR><LF>".
  */
@@ -1450,8 +1437,7 @@ static int startmicrocode(struct cosa_data *cosa, int address)
 	return 0;
 }
 
-/*
- * Reading memory is done via the "r" command of the SRP monitor.
+/* Reading memory is done via the "r" command of the SRP monitor.
  * The chat is the following "r" "r=" "<addr> " " =" "<last_byte> " " "
  * Then driver can read the data and the conversation is finished
  * by SRP monitor sending "<CR><LF>." (dot at the end).
@@ -1502,8 +1488,7 @@ static int readmem(struct cosa_data *cosa, char __user *microcode, int length, i
 	return 0;
 }
 
-/*
- * This function resets the device and reads the initial prompt
+/* This function resets the device and reads the initial prompt
  * of the device's ROM monitor.
  */
 static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring)
@@ -1518,8 +1503,7 @@ static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring)
 	/* Disable all IRQs from the card */
 	cosa_putstatus(cosa, 0);
 
-	/*
-	 * Try to read the ID string. The card then prints out the
+	/* Try to read the ID string. The card then prints out the
 	 * identification string ended by the "\n\x2e".
 	 *
 	 * The following loop is indexed through i (instead of id)
@@ -1544,8 +1528,7 @@ static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring)
 
 /* ---------- Auxiliary routines for COSA/SRP monitor ---------- */
 
-/*
- * This routine gets the data byte from the card waiting for the SR_RX_RDY
+/* This routine gets the data byte from the card waiting for the SR_RX_RDY
  * bit to be set in a loop. It should be used in the exceptional cases
  * only (for example when resetting the card or downloading the firmware.
  */
@@ -1573,8 +1556,7 @@ static int get_wait_data(struct cosa_data *cosa)
 	return -1;
 }
 
-/*
- * This routine puts the data byte to the card waiting for the SR_TX_RDY
+/* This routine puts the data byte to the card waiting for the SR_TX_RDY
  * bit to be set in a loop. It should be used in the exceptional cases
  * only (for example when resetting the card or downloading the firmware).
  */
@@ -1601,8 +1583,7 @@ static int put_wait_data(struct cosa_data *cosa, int data)
 	return -1;
 }
 	
-/* 
- * The following routine puts the hexadecimal number into the SRP monitor
+/* The following routine puts the hexadecimal number into the SRP monitor
  * and verifies the proper echo of the sent bytes. Returns 0 on success,
  * negative number on failure (-1,-3,-5,-7) means that put_wait_data() failed,
  * (-2,-4,-6,-8) means that reading echo failed.
@@ -1631,8 +1612,7 @@ static int puthexnumber(struct cosa_data *cosa, int number)
 
 /* ---------- Interrupt routines ---------- */
 
-/*
- * There are three types of interrupt:
+/* There are three types of interrupt:
  * At the beginning of transmit - this handled is in tx_interrupt(),
  * at the beginning of receive - it is in rx_interrupt() and
  * at the end of transmit/receive - it is the eot_interrupt() function.
@@ -1646,8 +1626,7 @@ static int puthexnumber(struct cosa_data *cosa, int number)
  * It's time to use the bottom half :-(
  */
 
-/*
- * Transmit interrupt routine - called when COSA is willing to obtain
+/* Transmit interrupt routine - called when COSA is willing to obtain
  * data from the OS. The most tricky part of the routine is selection
  * of channel we (OS) want to send packet for. For SRP we should probably
  * use the round-robin approach. The newer COSA firmwares have a simple
@@ -1924,8 +1903,7 @@ static inline void eot_interrupt(struct cosa_data *cosa, int status)
 	} else {
 		pr_notice("cosa%d: unexpected EOT interrupt\n", cosa->num);
 	}
-	/*
-	 * Clear the RXBIT, TXBIT and IRQBIT (the latest should be
+	/* Clear the RXBIT, TXBIT and IRQBIT (the latest should be
 	 * cleared anyway). We should do it as soon as possible
 	 * so that we can tell the COSA we are done and to give it a time
 	 * for recovery.
@@ -1979,8 +1957,7 @@ again:
 }
 
 /* ---------- I/O debugging routines ---------- */
-/*
- * These routines can be used to monitor COSA/SRP I/O and to printk()
+/* These routines can be used to monitor COSA/SRP I/O and to printk()
  * the data being transferred on the data and status I/O port in a
  * readable way.
  */

From c8f4b11727af9a8e7074c6def7e36ec679878001 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:33 +0800
Subject: [PATCH 1893/3292] net: cosa: add braces {} to all arms of the
 statement

Braces {} should be used on all arms of this statement.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index c09c07902495..b3dab61f176f 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -468,11 +468,11 @@ static int cosa_probe(int base, int irq, int dma)
 	}
 
 	/* Test the validity of identification string */
-	if (!strncmp(cosa->id_string, "SRP", 3))
+	if (!strncmp(cosa->id_string, "SRP", 3)) {
 		cosa->type = "srp";
-	else if (!strncmp(cosa->id_string, "COSA", 4))
+	} else if (!strncmp(cosa->id_string, "COSA", 4)) {
 		cosa->type = is_8bit(cosa)? "cosa8": "cosa16";
-	else {
+	} else {
 /* Print a warning only if we are not autoprobing */
 #ifndef COSA_ISA_AUTOPROBE
 		pr_info("valid signature not found at 0x%x\n", base);

From 70d063b9a6219a6fd6c88e9e318ea36889348a36 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:34 +0800
Subject: [PATCH 1894/3292] net: cosa: remove redundant braces {}

This patch removes redundant braces {}, to fix the
checkpatch.pl warning:
"braces {} are not necessary for single statement blocks".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index b3dab61f176f..6125ca49c944 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -1845,11 +1845,11 @@ reject:		/* Reject the packet */
 	disable_dma(cosa->dma);
 	clear_dma_ff(cosa->dma);
 	set_dma_mode(cosa->dma, DMA_MODE_READ);
-	if (cosa_dma_able(cosa->rxchan, cosa->rxbuf, cosa->rxsize & 0x1fff)) {
+	if (cosa_dma_able(cosa->rxchan, cosa->rxbuf, cosa->rxsize & 0x1fff))
 		set_dma_addr(cosa->dma, virt_to_bus(cosa->rxbuf));
-	} else {
+	else
 		set_dma_addr(cosa->dma, virt_to_bus(cosa->bouncebuf));
-	}
+
 	set_dma_count(cosa->dma, (cosa->rxsize&0x1fff));
 	enable_dma(cosa->dma);
 	release_dma_lock(flags);

From acc3edf0054eb44221b2a6629bfa575c85c6e901 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:35 +0800
Subject: [PATCH 1895/3292] net: cosa: add necessary () to macro argument

Macro argument 'cosa' may be better as '(cosa)' to avoid
precedence issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 6125ca49c944..a6629dc5c5e9 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -238,14 +238,14 @@ MODULE_LICENSE("GPL");
 #define cosa_inw  inw
 #endif
 
-#define is_8bit(cosa)		(!(cosa->datareg & 0x08))
+#define is_8bit(cosa)		(!((cosa)->datareg & 0x08))
 
-#define cosa_getstatus(cosa)	(cosa_inb(cosa->statusreg))
-#define cosa_putstatus(cosa, stat)	(cosa_outb(stat, cosa->statusreg))
-#define cosa_getdata16(cosa)	(cosa_inw(cosa->datareg))
-#define cosa_getdata8(cosa)	(cosa_inb(cosa->datareg))
-#define cosa_putdata16(cosa, dt)	(cosa_outw(dt, cosa->datareg))
-#define cosa_putdata8(cosa, dt)	(cosa_outb(dt, cosa->datareg))
+#define cosa_getstatus(cosa)	(cosa_inb((cosa)->statusreg))
+#define cosa_putstatus(cosa, stat)	(cosa_outb(stat, (cosa)->statusreg))
+#define cosa_getdata16(cosa)	(cosa_inw((cosa)->datareg))
+#define cosa_getdata8(cosa)	(cosa_inb((cosa)->datareg))
+#define cosa_putdata16(cosa, dt)	(cosa_outw(dt, (cosa)->datareg))
+#define cosa_putdata8(cosa, dt)	(cosa_outb(dt, (cosa)->datareg))
 
 /* Initialization stuff */
 static int cosa_probe(int ioaddr, int irq, int dma);

From 3fac4b941c06acaf35f578396eee3e28b1f7351e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:36 +0800
Subject: [PATCH 1896/3292] net: cosa: use BIT macro

This patch uses the BIT macro for setting individual bits,
to fix the following checkpatch.pl issue:
CHECK: Prefer using the BIT macro.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index a6629dc5c5e9..ade6da7438c9 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -122,9 +122,9 @@ struct channel_data {
 };
 
 /* cosa->firmware_status bits */
-#define COSA_FW_RESET		(1<<0)	/* Is the ROM monitor active? */
-#define COSA_FW_DOWNLOAD	(1<<1)	/* Is the microcode downloaded? */
-#define COSA_FW_START		(1<<2)	/* Is the microcode running? */
+#define COSA_FW_RESET		BIT(0)	/* Is the ROM monitor active? */
+#define COSA_FW_DOWNLOAD	BIT(1)	/* Is the microcode downloaded? */
+#define COSA_FW_START		BIT(2)	/* Is the microcode running? */
 
 struct cosa_data {
 	int num;			/* Card number */

From 9edc7d68b021c8ec9d59e0cf2d5fa8a56e7f2777 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:37 +0800
Subject: [PATCH 1897/3292] net: cosa: fix the alignment issue

Alignment should match open parenthesis.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index ade6da7438c9..fbfc3e54e293 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -274,14 +274,14 @@ static char *chrdev_setup_rx(struct channel_data *channel, int size);
 static int chrdev_rx_done(struct channel_data *channel);
 static int chrdev_tx_done(struct channel_data *channel, int size);
 static ssize_t cosa_read(struct file *file,
-	char __user *buf, size_t count, loff_t *ppos);
+			 char __user *buf, size_t count, loff_t *ppos);
 static ssize_t cosa_write(struct file *file,
-	const char __user *buf, size_t count, loff_t *ppos);
+			  const char __user *buf, size_t count, loff_t *ppos);
 static unsigned int cosa_poll(struct file *file, poll_table *poll);
 static int cosa_open(struct inode *inode, struct file *file);
 static int cosa_release(struct inode *inode, struct file *file);
 static long cosa_chardev_ioctl(struct file *file, unsigned int cmd,
-				unsigned long arg);
+			       unsigned long arg);
 #ifdef COSA_FASYNC_WORKING
 static int cosa_fasync(struct inode *inode, struct file *file, int on);
 #endif
@@ -655,7 +655,7 @@ static int cosa_net_open(struct net_device *dev)
 }
 
 static netdev_tx_t cosa_net_tx(struct sk_buff *skb,
-				     struct net_device *dev)
+			       struct net_device *dev)
 {
 	struct channel_data *chan = dev_to_chan(dev);
 
@@ -762,7 +762,7 @@ static int cosa_net_tx_done(struct channel_data *chan, int size)
 /*---------- Character device ---------- */
 
 static ssize_t cosa_read(struct file *file,
-	char __user *buf, size_t count, loff_t *ppos)
+			 char __user *buf, size_t count, loff_t *ppos)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	unsigned long flags;
@@ -836,7 +836,7 @@ static int chrdev_rx_done(struct channel_data *chan)
 }
 
 static ssize_t cosa_write(struct file *file,
-	const char __user *buf, size_t count, loff_t *ppos)
+			  const char __user *buf, size_t count, loff_t *ppos)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct channel_data *chan = file->private_data;
@@ -1123,7 +1123,8 @@ static inline int cosa_gettype(struct cosa_data *cosa, char __user *string)
 }
 
 static int cosa_ioctl_common(struct cosa_data *cosa,
-	struct channel_data *channel, unsigned int cmd, unsigned long arg)
+			     struct channel_data *channel, unsigned int cmd,
+			     unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
 
@@ -1181,7 +1182,7 @@ static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 }
 
 static long cosa_chardev_ioctl(struct file *file, unsigned int cmd,
-							unsigned long arg)
+			       unsigned long arg)
 {
 	struct channel_data *channel = file->private_data;
 	struct cosa_data *cosa;
@@ -1684,11 +1685,12 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 
 		cosa->txsize = cosa->chan[cosa->txchan].txsize;
 		if (cosa_dma_able(cosa->chan+cosa->txchan,
-			cosa->chan[cosa->txchan].txbuf, cosa->txsize)) {
+				  cosa->chan[cosa->txchan].txbuf,
+				  cosa->txsize)) {
 			cosa->txbuf = cosa->chan[cosa->txchan].txbuf;
 		} else {
 			memcpy(cosa->bouncebuf, cosa->chan[cosa->txchan].txbuf,
-				cosa->txsize);
+			       cosa->txsize);
 			cosa->txbuf = cosa->bouncebuf;
 		}
 	}

From 573747254f220f367dd3d59c7a535c08cb2ff4d2 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:38 +0800
Subject: [PATCH 1898/3292] net: cosa: fix the code style issue about trailing
 statements

Trailing statements should be on next line.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 111 +++++++++++++++++++++++++++--------------
 1 file changed, 74 insertions(+), 37 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index fbfc3e54e293..4fb602bbc04e 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -1379,18 +1379,27 @@ static int download(struct cosa_data *cosa, const char __user *microcode, int le
 {
 	int i;
 
-	if (put_wait_data(cosa, 'w') == -1) return -1;
+	if (put_wait_data(cosa, 'w') == -1)
+		return -1;
 	if ((i=get_wait_data(cosa)) != 'w') { printk("dnld: 0x%04x\n",i); return -2;}
-	if (get_wait_data(cosa) != '=') return -3;
+	if (get_wait_data(cosa) != '=')
+		return -3;
 
-	if (puthexnumber(cosa, address) < 0) return -4;
-	if (put_wait_data(cosa, ' ') == -1) return -10;
-	if (get_wait_data(cosa) != ' ') return -11;
-	if (get_wait_data(cosa) != '=') return -12;
+	if (puthexnumber(cosa, address) < 0)
+		return -4;
+	if (put_wait_data(cosa, ' ') == -1)
+		return -10;
+	if (get_wait_data(cosa) != ' ')
+		return -11;
+	if (get_wait_data(cosa) != '=')
+		return -12;
 
-	if (puthexnumber(cosa, address+length-1) < 0) return -13;
-	if (put_wait_data(cosa, ' ') == -1) return -18;
-	if (get_wait_data(cosa) != ' ') return -19;
+	if (puthexnumber(cosa, address + length - 1) < 0)
+		return -13;
+	if (put_wait_data(cosa, ' ') == -1)
+		return -18;
+	if (get_wait_data(cosa) != ' ')
+		return -19;
 
 	while (length--) {
 		char c;
@@ -1405,9 +1414,12 @@ static int download(struct cosa_data *cosa, const char __user *microcode, int le
 		microcode++;
 	}
 
-	if (get_wait_data(cosa) != '\r') return -21;
-	if (get_wait_data(cosa) != '\n') return -22;
-	if (get_wait_data(cosa) != '.') return -23;
+	if (get_wait_data(cosa) != '\r')
+		return -21;
+	if (get_wait_data(cosa) != '\n')
+		return -22;
+	if (get_wait_data(cosa) != '.')
+		return -23;
 #if 0
 	printk(KERN_DEBUG "cosa%d: download completed.\n", cosa->num);
 #endif
@@ -1420,18 +1432,28 @@ static int download(struct cosa_data *cosa, const char __user *microcode, int le
  */
 static int startmicrocode(struct cosa_data *cosa, int address)
 {
-	if (put_wait_data(cosa, 'g') == -1) return -1;
-	if (get_wait_data(cosa) != 'g') return -2;
-	if (get_wait_data(cosa) != '=') return -3;
+	if (put_wait_data(cosa, 'g') == -1)
+		return -1;
+	if (get_wait_data(cosa) != 'g')
+		return -2;
+	if (get_wait_data(cosa) != '=')
+		return -3;
 
-	if (puthexnumber(cosa, address) < 0) return -4;
-	if (put_wait_data(cosa, '\r') == -1) return -5;
+	if (puthexnumber(cosa, address) < 0)
+		return -4;
+	if (put_wait_data(cosa, '\r') == -1)
+		return -5;
 	
-	if (get_wait_data(cosa) != '\r') return -6;
-	if (get_wait_data(cosa) != '\r') return -7;
-	if (get_wait_data(cosa) != '\n') return -8;
-	if (get_wait_data(cosa) != '\r') return -9;
-	if (get_wait_data(cosa) != '\n') return -10;
+	if (get_wait_data(cosa) != '\r')
+		return -6;
+	if (get_wait_data(cosa) != '\r')
+		return -7;
+	if (get_wait_data(cosa) != '\n')
+		return -8;
+	if (get_wait_data(cosa) != '\r')
+		return -9;
+	if (get_wait_data(cosa) != '\n')
+		return -10;
 #if 0
 	printk(KERN_DEBUG "cosa%d: microcode started\n", cosa->num);
 #endif
@@ -1448,18 +1470,28 @@ static int startmicrocode(struct cosa_data *cosa, int address)
  */
 static int readmem(struct cosa_data *cosa, char __user *microcode, int length, int address)
 {
-	if (put_wait_data(cosa, 'r') == -1) return -1;
-	if ((get_wait_data(cosa)) != 'r') return -2;
-	if ((get_wait_data(cosa)) != '=') return -3;
+	if (put_wait_data(cosa, 'r') == -1)
+		return -1;
+	if ((get_wait_data(cosa)) != 'r')
+		return -2;
+	if ((get_wait_data(cosa)) != '=')
+		return -3;
 
-	if (puthexnumber(cosa, address) < 0) return -4;
-	if (put_wait_data(cosa, ' ') == -1) return -5;
-	if (get_wait_data(cosa) != ' ') return -6;
-	if (get_wait_data(cosa) != '=') return -7;
+	if (puthexnumber(cosa, address) < 0)
+		return -4;
+	if (put_wait_data(cosa, ' ') == -1)
+		return -5;
+	if (get_wait_data(cosa) != ' ')
+		return -6;
+	if (get_wait_data(cosa) != '=')
+		return -7;
 
-	if (puthexnumber(cosa, address+length-1) < 0) return -8;
-	if (put_wait_data(cosa, ' ') == -1) return -9;
-	if (get_wait_data(cosa) != ' ') return -10;
+	if (puthexnumber(cosa, address + length - 1) < 0)
+		return -8;
+	if (put_wait_data(cosa, ' ') == -1)
+		return -9;
+	if (get_wait_data(cosa) != ' ')
+		return -10;
 
 	while (length--) {
 		char c;
@@ -1480,9 +1512,12 @@ static int readmem(struct cosa_data *cosa, char __user *microcode, int length, i
 		microcode++;
 	}
 
-	if (get_wait_data(cosa) != '\r') return -21;
-	if (get_wait_data(cosa) != '\n') return -22;
-	if (get_wait_data(cosa) != '.') return -23;
+	if (get_wait_data(cosa) != '\r')
+		return -21;
+	if (get_wait_data(cosa) != '\n')
+		return -22;
+	if (get_wait_data(cosa) != '.')
+		return -23;
 #if 0
 	printk(KERN_DEBUG "cosa%d: readmem completed.\n", cosa->num);
 #endif
@@ -1744,7 +1779,8 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 		while (!(cosa_getstatus(cosa)&SR_TX_RDY)) {
 			count++;
 			udelay(10);
-			if (count > 1000) break;
+			if (count > 1000)
+				break;
 		}
 		pr_info("status %x\n", cosa_getstatus(cosa));
 		pr_info("ready after %d loops\n", count);
@@ -1753,7 +1789,8 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 		count = 0;
 		while (!(cosa_getstatus(cosa)&SR_TX_RDY)) {
 			count++;
-			if (count > 1000) break;
+			if (count > 1000)
+				break;
 			udelay(10);
 		}
 		pr_info("ready after %d loops\n", count);

From e84c3e1436dc2124242ca70d14cb5805c8c36c93 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:39 +0800
Subject: [PATCH 1899/3292] net: cosa: add some required spaces

Add space required before the open parenthesis '(' and '{'.
Add space required after that close brace '}' and ','
Add spaces required around that '=' , '&', '*', '|', '+', '/' and '-'.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 137 +++++++++++++++++++++--------------------
 1 file changed, 69 insertions(+), 68 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 4fb602bbc04e..26cdfdad4efc 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -88,7 +88,7 @@
 #define COSA_MAX_ID_STRING	128
 
 /* Maximum length of the channel name */
-#define COSA_MAX_NAME		(sizeof("cosaXXXcXXX")+1)
+#define COSA_MAX_NAME		(sizeof("cosaXXXcXXX") + 1)
 
 /* Per-channel data structure */
 
@@ -192,22 +192,22 @@ static int cosa_major = 117;
 #undef DEBUG_IRQS //1	/* Print the message when the IRQ is received */
 #undef DEBUG_IO   //1	/* Dump the I/O traffic */
 
-#define TX_TIMEOUT	(5*HZ)
+#define TX_TIMEOUT	(5 * HZ)
 
 /* Maybe the following should be allocated dynamically */
 static struct cosa_data cosa_cards[MAX_CARDS];
 static int nr_cards;
 
 #ifdef COSA_ISA_AUTOPROBE
-static int io[MAX_CARDS+1]  = { 0x220, 0x228, 0x210, 0x218, 0, };
+static int io[MAX_CARDS + 1]  = {0x220, 0x228, 0x210, 0x218, 0,};
 /* NOTE: DMA is not autoprobed!!! */
-static int dma[MAX_CARDS+1] = { 1, 7, 1, 7, 1, 7, 1, 7, 0, };
+static int dma[MAX_CARDS + 1] = {1, 7, 1, 7, 1, 7, 1, 7, 0,};
 #else
-static int io[MAX_CARDS+1];
-static int dma[MAX_CARDS+1];
+static int io[MAX_CARDS + 1];
+static int dma[MAX_CARDS + 1];
 #endif
 /* IRQ can be safely autoprobed */
-static int irq[MAX_CARDS+1] = { -1, -1, -1, -1, -1, -1, 0, };
+static int irq[MAX_CARDS + 1] = {-1, -1, -1, -1, -1, -1, 0,};
 
 /* for class stuff*/
 static struct class *cosa_class;
@@ -356,9 +356,9 @@ static int __init cosa_init(void)
 			goto out;
 		}
 	}
-	for (i=0; i<MAX_CARDS; i++)
+	for (i = 0; i < MAX_CARDS; i++)
 		cosa_cards[i].num = -1;
-	for (i=0; io[i] != 0 && i < MAX_CARDS; i++)
+	for (i = 0; io[i] != 0 && i < MAX_CARDS; i++)
 		cosa_probe(io[i], irq[i], dma[i]);
 	if (!nr_cards) {
 		pr_warn("no devices found\n");
@@ -421,7 +421,7 @@ static const struct net_device_ops cosa_ops = {
 
 static int cosa_probe(int base, int irq, int dma)
 {
-	struct cosa_data *cosa = cosa_cards+nr_cards;
+	struct cosa_data *cosa = cosa_cards + nr_cards;
 	int i, err = 0;
 
 	memset(cosa, 0, sizeof(struct cosa_data));
@@ -455,10 +455,10 @@ static int cosa_probe(int base, int irq, int dma)
 
 	cosa->dma = dma;
 	cosa->datareg = base;
-	cosa->statusreg = is_8bit(cosa)?base+1:base+2;
+	cosa->statusreg = is_8bit(cosa) ? base + 1 : base + 2;
 	spin_lock_init(&cosa->lock);
 
-	if (!request_region(base, is_8bit(cosa)?2:4,"cosa"))
+	if (!request_region(base, is_8bit(cosa) ? 2 : 4, "cosa"))
 		return -1;
 	
 	if (cosa_reset_and_read_id(cosa, cosa->id_string) < 0) {
@@ -471,7 +471,7 @@ static int cosa_probe(int base, int irq, int dma)
 	if (!strncmp(cosa->id_string, "SRP", 3)) {
 		cosa->type = "srp";
 	} else if (!strncmp(cosa->id_string, "COSA", 4)) {
-		cosa->type = is_8bit(cosa)? "cosa8": "cosa16";
+		cosa->type = is_8bit(cosa) ? "cosa8" : "cosa16";
 	} else {
 /* Print a warning only if we are not autoprobing */
 #ifndef COSA_ISA_AUTOPROBE
@@ -481,8 +481,8 @@ static int cosa_probe(int base, int irq, int dma)
 		goto err_out;
 	}
 	/* Update the name of the region now we know the type of card */ 
-	release_region(base, is_8bit(cosa)?2:4);
-	if (!request_region(base, is_8bit(cosa)?2:4, cosa->type)) {
+	release_region(base, is_8bit(cosa) ? 2 : 4);
+	if (!request_region(base, is_8bit(cosa) ? 2 : 4, cosa->type)) {
 		printk(KERN_DEBUG "changing name at 0x%x failed.\n", base);
 		return -1;
 	}
@@ -533,7 +533,7 @@ static int cosa_probe(int base, int irq, int dma)
 		goto err_out1;
 	}
 	
-	cosa->bouncebuf = kmalloc(COSA_MTU, GFP_KERNEL|GFP_DMA);
+	cosa->bouncebuf = kmalloc(COSA_MTU, GFP_KERNEL | GFP_DMA);
 	if (!cosa->bouncebuf) {
 		err = -ENOMEM;
 		goto err_out2;
@@ -600,7 +600,7 @@ err_out2:
 err_out1:
 	free_irq(cosa->irq, cosa);
 err_out:
-	release_region(cosa->datareg,is_8bit(cosa)?2:4);
+	release_region(cosa->datareg, is_8bit(cosa) ? 2 : 4);
 	pr_notice("cosa%d: allocating resources failed\n", cosa->num);
 	return err;
 }
@@ -778,7 +778,7 @@ static ssize_t cosa_read(struct file *file,
 	if (mutex_lock_interruptible(&chan->rlock))
 		return -ERESTARTSYS;
 	
-	chan->rxdata = kmalloc(COSA_MTU, GFP_DMA|GFP_KERNEL);
+	chan->rxdata = kmalloc(COSA_MTU, GFP_DMA | GFP_KERNEL);
 	if (!chan->rxdata) {
 		mutex_unlock(&chan->rlock);
 		return -ENOMEM;
@@ -856,7 +856,7 @@ static ssize_t cosa_write(struct file *file,
 		count = COSA_MTU;
 	
 	/* Allocate the buffer */
-	kbuf = kmalloc(count, GFP_KERNEL|GFP_DMA);
+	kbuf = kmalloc(count, GFP_KERNEL | GFP_DMA);
 	if (!kbuf) {
 		up(&chan->wsem);
 		return -ENOMEM;
@@ -866,7 +866,7 @@ static ssize_t cosa_write(struct file *file,
 		kfree(kbuf);
 		return -EFAULT;
 	}
-	chan->tx_status=0;
+	chan->tx_status = 0;
 	cosa_start_tx(chan, kbuf, count);
 
 	spin_lock_irqsave(&cosa->lock, flags);
@@ -926,7 +926,7 @@ static int cosa_open(struct inode *inode, struct file *file)
 		ret = -ENODEV;
 		goto out;
 	}
-	cosa = cosa_cards+n;
+	cosa = cosa_cards + n;
 
 	n = iminor(file_inode(file)) & ((1 << CARD_MINOR_BITS) - 1);
 	if (n >= cosa->nchannels) {
@@ -994,7 +994,7 @@ static inline int cosa_reset(struct cosa_data *cosa)
 	if (cosa->usage > 1)
 		pr_info("cosa%d: WARNING: reset requested with cosa->usage > 1 (%d). Odd things may happen.\n",
 			cosa->num, cosa->usage);
-	cosa->firmware_status &= ~(COSA_FW_RESET|COSA_FW_START);
+	cosa->firmware_status &= ~(COSA_FW_RESET | COSA_FW_START);
 	if (cosa_reset_and_read_id(cosa, idstring) < 0) {
 		pr_notice("cosa%d: reset failed\n", cosa->num);
 		return -EIO;
@@ -1028,7 +1028,7 @@ static inline int cosa_download(struct cosa_data *cosa, void __user *arg)
 		return -EINVAL;
 
 	/* If something fails, force the user to reset the card */
-	cosa->firmware_status &= ~(COSA_FW_RESET|COSA_FW_DOWNLOAD);
+	cosa->firmware_status &= ~(COSA_FW_RESET | COSA_FW_DOWNLOAD);
 
 	i = download(cosa, d.code, d.len, d.addr);
 	if (i < 0) {
@@ -1038,7 +1038,7 @@ static inline int cosa_download(struct cosa_data *cosa, void __user *arg)
 	}
 	pr_info("cosa%d: downloading microcode - 0x%04x bytes at 0x%04x\n",
 		cosa->num, d.len, d.addr);
-	cosa->firmware_status |= COSA_FW_RESET|COSA_FW_DOWNLOAD;
+	cosa->firmware_status |= COSA_FW_RESET | COSA_FW_DOWNLOAD;
 	return 0;
 }
 
@@ -1083,8 +1083,8 @@ static inline int cosa_start(struct cosa_data *cosa, int address)
 		pr_info("cosa%d: WARNING: start microcode requested with cosa->usage > 1 (%d). Odd things may happen.\n",
 			cosa->num, cosa->usage);
 
-	if ((cosa->firmware_status & (COSA_FW_RESET|COSA_FW_DOWNLOAD))
-		!= (COSA_FW_RESET|COSA_FW_DOWNLOAD)) {
+	if ((cosa->firmware_status & (COSA_FW_RESET | COSA_FW_DOWNLOAD))
+		!= (COSA_FW_RESET | COSA_FW_DOWNLOAD)) {
 		pr_notice("%s: download the microcode and/or reset the card first (status %d)\n",
 			  cosa->name, cosa->firmware_status);
 		return -EPERM;
@@ -1105,7 +1105,7 @@ static inline int cosa_start(struct cosa_data *cosa, int address)
 /* Buffer of size at least COSA_MAX_ID_STRING is expected */
 static inline int cosa_getidstr(struct cosa_data *cosa, char __user *string)
 {
-	int l = strlen(cosa->id_string)+1;
+	int l = strlen(cosa->id_string) + 1;
 
 	if (copy_to_user(string, cosa->id_string, l))
 		return -EFAULT;
@@ -1115,7 +1115,7 @@ static inline int cosa_getidstr(struct cosa_data *cosa, char __user *string)
 /* Buffer of size at least COSA_MAX_ID_STRING is expected */
 static inline int cosa_gettype(struct cosa_data *cosa, char __user *string)
 {
-	int l = strlen(cosa->type)+1;
+	int l = strlen(cosa->type) + 1;
 
 	if (copy_to_user(string, cosa->type, l))
 		return -EFAULT;
@@ -1230,7 +1230,7 @@ static int cosa_start_tx(struct channel_data *chan, char *buf, int len)
 
 	pr_info("cosa%dc%d: starting tx(0x%x)",
 		chan->cosa->num, chan->num, len);
-	for (i=0; i<len; i++)
+	for (i = 0; i < len; i++)
 		pr_cont(" %02x", buf[i]&0xff);
 	pr_cont("\n");
 #endif
@@ -1257,10 +1257,10 @@ static void put_driver_status(struct cosa_data *cosa)
 
 	status = (cosa->rxbitmap ? DRIVER_RX_READY : 0)
 		| (cosa->txbitmap ? DRIVER_TX_READY : 0)
-		| (cosa->txbitmap? ~(cosa->txbitmap<<DRIVER_TXMAP_SHIFT)
-			&DRIVER_TXMAP_MASK : 0);
+		| (cosa->txbitmap ? ~(cosa->txbitmap << DRIVER_TXMAP_SHIFT)
+			& DRIVER_TXMAP_MASK : 0);
 	if (!cosa->rxtx) {
-		if (cosa->rxbitmap|cosa->txbitmap) {
+		if (cosa->rxbitmap | cosa->txbitmap) {
 			if (!cosa->enabled) {
 				cosa_putstatus(cosa, SR_RX_INT_ENA);
 #ifdef DEBUG_IO
@@ -1289,10 +1289,10 @@ static void put_driver_status_nolock(struct cosa_data *cosa)
 
 	status = (cosa->rxbitmap ? DRIVER_RX_READY : 0)
 		| (cosa->txbitmap ? DRIVER_TX_READY : 0)
-		| (cosa->txbitmap? ~(cosa->txbitmap<<DRIVER_TXMAP_SHIFT)
-			&DRIVER_TXMAP_MASK : 0);
+		| (cosa->txbitmap ? ~(cosa->txbitmap << DRIVER_TXMAP_SHIFT)
+			& DRIVER_TXMAP_MASK : 0);
 
-	if (cosa->rxbitmap|cosa->txbitmap) {
+	if (cosa->rxbitmap | cosa->txbitmap) {
 		cosa_putstatus(cosa, SR_RX_INT_ENA);
 #ifdef DEBUG_IO
 		debug_status_out(cosa, SR_RX_INT_ENA);
@@ -1355,9 +1355,9 @@ static int cosa_dma_able(struct channel_data *chan, char *buf, int len)
 	static int count;
 	unsigned long b = (unsigned long)buf;
 
-	if (b+len >= MAX_DMA_ADDRESS)
+	if (b + len >= MAX_DMA_ADDRESS)
 		return 0;
-	if ((b^ (b+len)) & 0x10000) {
+	if ((b ^ (b + len)) & 0x10000) {
 		if (count++ < 5)
 			pr_info("%s: packet spanning a 64k boundary\n",
 				chan->name);
@@ -1502,7 +1502,7 @@ static int readmem(struct cosa_data *cosa, char __user *microcode, int length, i
 			pr_info("0x%04x bytes remaining\n", length);
 			return -11;
 		}
-		c=i;
+		c = i;
 #if 1
 		if (put_user(c, microcode))
 			return -23; /* ??? */
@@ -1529,7 +1529,7 @@ static int readmem(struct cosa_data *cosa, char __user *microcode, int length, i
  */
 static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring)
 {
-	int i=0, id=0, prev=0, curr=0;
+	int i = 0, id = 0, prev = 0, curr = 0;
 
 	/* Reset the card ... */
 	cosa_putstatus(cosa, 0);
@@ -1546,7 +1546,7 @@ static int cosa_reset_and_read_id(struct cosa_data *cosa, char *idstring)
 	 * to avoid looping forever when for any reason
 	 * the port returns '\r', '\n' or '\x2e' permanently.
 	 */
-	for (i=0; i<COSA_MAX_ID_STRING-1; i++, prev=curr) {
+	for (i = 0; i < COSA_MAX_ID_STRING - 1; i++, prev = curr) {
 		curr = get_wait_data(cosa);
 		if (curr == -1)
 			return -1;
@@ -1580,7 +1580,7 @@ static int get_wait_data(struct cosa_data *cosa)
 			r = cosa_getdata8(cosa);
 #if 0
 			pr_info("get_wait_data returning after %d retries\n",
-				999-retries);
+				999 - retries);
 #endif
 			return r;
 		}
@@ -1605,7 +1605,7 @@ static int put_wait_data(struct cosa_data *cosa, int data)
 		if (cosa_getstatus(cosa) & SR_TX_RDY) {
 			cosa_putdata8(cosa, data);
 #if 0
-			pr_info("Putdata: %d retries\n", 999-retries);
+			pr_info("Putdata: %d retries\n", 999 - retries);
 #endif
 			return 0;
 		}
@@ -1631,16 +1631,16 @@ static int puthexnumber(struct cosa_data *cosa, int number)
 
 	/* Well, I should probably replace this by something faster. */
 	sprintf(temp, "%04X", number);
-	for (i=0; i<4; i++) {
+	for (i = 0; i < 4; i++) {
 		if (put_wait_data(cosa, temp[i]) == -1) {
 			pr_notice("cosa%d: puthexnumber failed to write byte %d\n",
 				  cosa->num, i);
-			return -1-2*i;
+			return -1 - 2 * i;
 		}
 		if (get_wait_data(cosa) != temp[i]) {
 			pr_notice("cosa%d: puthexhumber failed to read echo of byte %d\n",
 				  cosa->num, i);
-			return -2-2*i;
+			return -2 - 2 * i;
 		}
 	}
 	return 0;
@@ -1687,7 +1687,7 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 	set_bit(TXBIT, &cosa->rxtx);
 	if (!test_bit(IRQBIT, &cosa->rxtx)) {
 		/* flow control, see the comment above */
-		int i=0;
+		int i = 0;
 
 		if (!cosa->txbitmap) {
 			pr_warn("%s: No channel wants data in TX IRQ. Expect DMA timeout.\n",
@@ -1702,9 +1702,10 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 			i++;
 			if (cosa->txchan >= cosa->nchannels)
 				cosa->txchan = 0;
-			if (!(cosa->txbitmap & (1<<cosa->txchan)))
+			if (!(cosa->txbitmap & (1 << cosa->txchan)))
 				continue;
-			if (~status & (1 << (cosa->txchan+DRIVER_TXMAP_SHIFT)))
+			if (~status &
+			    (1 << (cosa->txchan + DRIVER_TXMAP_SHIFT)))
 				break;
 			/* in second pass, accept first ready-to-TX channel */
 			if (i > cosa->nchannels) {
@@ -1719,7 +1720,7 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 		}
 
 		cosa->txsize = cosa->chan[cosa->txchan].txsize;
-		if (cosa_dma_able(cosa->chan+cosa->txchan,
+		if (cosa_dma_able(cosa->chan + cosa->txchan,
 				  cosa->chan[cosa->txchan].txbuf,
 				  cosa->txsize)) {
 			cosa->txbuf = cosa->chan[cosa->txchan].txbuf;
@@ -1733,11 +1734,11 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 	if (is_8bit(cosa)) {
 		if (!test_bit(IRQBIT, &cosa->rxtx)) {
 			cosa_putstatus(cosa, SR_TX_INT_ENA);
-			cosa_putdata8(cosa, ((cosa->txchan << 5) & 0xe0)|
+			cosa_putdata8(cosa, ((cosa->txchan << 5) & 0xe0) |
 				((cosa->txsize >> 8) & 0x1f));
 #ifdef DEBUG_IO
 			debug_status_out(cosa, SR_TX_INT_ENA);
-			debug_data_out(cosa, ((cosa->txchan << 5) & 0xe0)|
+			debug_data_out(cosa, ((cosa->txchan << 5) & 0xe0) |
                                 ((cosa->txsize >> 8) & 0x1f));
 			debug_data_in(cosa, cosa_getdata8(cosa));
 #else
@@ -1749,19 +1750,19 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 		} else {
 			clear_bit(IRQBIT, &cosa->rxtx);
 			cosa_putstatus(cosa, 0);
-			cosa_putdata8(cosa, cosa->txsize&0xff);
+			cosa_putdata8(cosa, cosa->txsize & 0xff);
 #ifdef DEBUG_IO
 			debug_status_out(cosa, 0);
-			debug_data_out(cosa, cosa->txsize&0xff);
+			debug_data_out(cosa, cosa->txsize & 0xff);
 #endif
 		}
 	} else {
 		cosa_putstatus(cosa, SR_TX_INT_ENA);
-		cosa_putdata16(cosa, ((cosa->txchan<<13) & 0xe000)
+		cosa_putdata16(cosa, ((cosa->txchan << 13) & 0xe000)
 			| (cosa->txsize & 0x1fff));
 #ifdef DEBUG_IO
 		debug_status_out(cosa, SR_TX_INT_ENA);
-		debug_data_out(cosa, ((cosa->txchan<<13) & 0xe000)
+		debug_data_out(cosa, ((cosa->txchan << 13) & 0xe000)
                         | (cosa->txsize & 0x1fff));
 		debug_data_in(cosa, cosa_getdata8(cosa));
 		debug_status_out(cosa, 0);
@@ -1773,10 +1774,10 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 
 	if (cosa->busmaster) {
 		unsigned long addr = virt_to_bus(cosa->txbuf);
-		int count=0;
+		int count = 0;
 
 		pr_info("busmaster IRQ\n");
-		while (!(cosa_getstatus(cosa)&SR_TX_RDY)) {
+		while (!(cosa_getstatus(cosa) & SR_TX_RDY)) {
 			count++;
 			udelay(10);
 			if (count > 1000)
@@ -1784,17 +1785,17 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 		}
 		pr_info("status %x\n", cosa_getstatus(cosa));
 		pr_info("ready after %d loops\n", count);
-		cosa_putdata16(cosa, (addr >> 16)&0xffff);
+		cosa_putdata16(cosa, (addr >> 16) & 0xffff);
 
 		count = 0;
-		while (!(cosa_getstatus(cosa)&SR_TX_RDY)) {
+		while (!(cosa_getstatus(cosa) & SR_TX_RDY)) {
 			count++;
 			if (count > 1000)
 				break;
 			udelay(10);
 		}
 		pr_info("ready after %d loops\n", count);
-		cosa_putdata16(cosa, addr &0xffff);
+		cosa_putdata16(cosa, addr & 0xffff);
 		flags1 = claim_dma_lock();
 		set_dma_mode(cosa->dma, DMA_MODE_CASCADE);
 		enable_dma(cosa->dma);
@@ -1810,9 +1811,9 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 		enable_dma(cosa->dma);
 		release_dma_lock(flags1);
 	}
-	cosa_putstatus(cosa, SR_TX_DMA_ENA|SR_USR_INT_ENA);
+	cosa_putstatus(cosa, SR_TX_DMA_ENA | SR_USR_INT_ENA);
 #ifdef DEBUG_IO
-	debug_status_out(cosa, SR_TX_DMA_ENA|SR_USR_INT_ENA);
+	debug_status_out(cosa, SR_TX_DMA_ENA | SR_USR_INT_ENA);
 #endif
 	spin_unlock_irqrestore(&cosa->lock, flags);
 }
@@ -1831,7 +1832,7 @@ static inline void rx_interrupt(struct cosa_data *cosa, int status)
 		if (!test_bit(IRQBIT, &cosa->rxtx)) {
 			set_bit(IRQBIT, &cosa->rxtx);
 			put_driver_status_nolock(cosa);
-			cosa->rxsize = cosa_getdata8(cosa) <<8;
+			cosa->rxsize = cosa_getdata8(cosa) << 8;
 #ifdef DEBUG_IO
 			debug_data_in(cosa, cosa->rxsize >> 8);
 #endif
@@ -1889,15 +1890,15 @@ reject:		/* Reject the packet */
 	else
 		set_dma_addr(cosa->dma, virt_to_bus(cosa->bouncebuf));
 
-	set_dma_count(cosa->dma, (cosa->rxsize&0x1fff));
+	set_dma_count(cosa->dma, (cosa->rxsize & 0x1fff));
 	enable_dma(cosa->dma);
 	release_dma_lock(flags);
 	spin_lock_irqsave(&cosa->lock, flags);
-	cosa_putstatus(cosa, SR_RX_DMA_ENA|SR_USR_INT_ENA);
+	cosa_putstatus(cosa, SR_RX_DMA_ENA | SR_USR_INT_ENA);
 	if (!is_8bit(cosa) && (status & SR_TX_RDY))
 		cosa_putdata8(cosa, DRIVER_RX_READY);
 #ifdef DEBUG_IO
-	debug_status_out(cosa, SR_RX_DMA_ENA|SR_USR_INT_ENA);
+	debug_status_out(cosa, SR_RX_DMA_ENA | SR_USR_INT_ENA);
 	if (!is_8bit(cosa) && (status & SR_TX_RDY))
 		debug_data_cmd(cosa, DRIVER_RX_READY);
 #endif
@@ -1914,7 +1915,7 @@ static inline void eot_interrupt(struct cosa_data *cosa, int status)
 	clear_dma_ff(cosa->dma);
 	release_dma_lock(flags1);
 	if (test_bit(TXBIT, &cosa->rxtx)) {
-		struct channel_data *chan = cosa->chan+cosa->txchan;
+		struct channel_data *chan = cosa->chan + cosa->txchan;
 
 		if (chan->tx_done)
 			if (chan->tx_done(chan, cosa->txsize))
@@ -1926,7 +1927,7 @@ static inline void eot_interrupt(struct cosa_data *cosa, int status)
 
 		pr_info("cosa%dc%d: done rx(0x%x)",
 			cosa->num, cosa->rxchan->num, cosa->rxsize);
-		for (i=0; i<cosa->rxsize; i++)
+		for (i = 0; i < cosa->rxsize; i++)
 			pr_cont(" %02x", cosa->rxbuf[i]&0xff);
 		pr_cont("\n");
 	}

From 6619e2b63b416516abf6e4dd1c2c52d2a641c559 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:40 +0800
Subject: [PATCH 1900/3292] net: cosa: remove trailing whitespaces

This patch removes trailing whitespaces.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 26cdfdad4efc..79941b392237 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -444,7 +444,7 @@ static int cosa_probe(int base, int irq, int dma)
 		pr_info("invalid DMA %d\n", dma);
 		return -1;
 	}
-	/* and finally, on 16-bit COSA DMA should be 4-7 and 
+	/* and finally, on 16-bit COSA DMA should be 4-7 and
 	 * I/O base should not be multiple of 0x10
 	 */
 	if (((base & 0x8) && dma < 4) || (!(base & 0x8) && dma > 3)) {
@@ -460,7 +460,7 @@ static int cosa_probe(int base, int irq, int dma)
 
 	if (!request_region(base, is_8bit(cosa) ? 2 : 4, "cosa"))
 		return -1;
-	
+
 	if (cosa_reset_and_read_id(cosa, cosa->id_string) < 0) {
 		printk(KERN_DEBUG "probe at 0x%x failed.\n", base);
 		err = -1;
@@ -480,7 +480,7 @@ static int cosa_probe(int base, int irq, int dma)
 		err = -1;
 		goto err_out;
 	}
-	/* Update the name of the region now we know the type of card */ 
+	/* Update the name of the region now we know the type of card */
 	release_region(base, is_8bit(cosa) ? 2 : 4);
 	if (!request_region(base, is_8bit(cosa) ? 2 : 4, cosa->type)) {
 		printk(KERN_DEBUG "changing name at 0x%x failed.\n", base);
@@ -532,7 +532,7 @@ static int cosa_probe(int base, int irq, int dma)
 		err = -1;
 		goto err_out1;
 	}
-	
+
 	cosa->bouncebuf = kmalloc(COSA_MTU, GFP_KERNEL | GFP_DMA);
 	if (!cosa->bouncebuf) {
 		err = -ENOMEM;
@@ -777,7 +777,7 @@ static ssize_t cosa_read(struct file *file,
 	}
 	if (mutex_lock_interruptible(&chan->rlock))
 		return -ERESTARTSYS;
-	
+
 	chan->rxdata = kmalloc(COSA_MTU, GFP_DMA | GFP_KERNEL);
 	if (!chan->rxdata) {
 		mutex_unlock(&chan->rlock);
@@ -854,7 +854,7 @@ static ssize_t cosa_write(struct file *file,
 
 	if (count > COSA_MTU)
 		count = COSA_MTU;
-	
+
 	/* Allocate the buffer */
 	kbuf = kmalloc(count, GFP_KERNEL | GFP_DMA);
 	if (!kbuf) {
@@ -934,7 +934,7 @@ static int cosa_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 	chan = cosa->chan + n;
-	
+
 	file->private_data = chan;
 
 	spin_lock_irqsave(&cosa->lock, flags);
@@ -1018,7 +1018,7 @@ static inline int cosa_download(struct cosa_data *cosa, void __user *arg)
 			  cosa->name, cosa->firmware_status);
 		return -EPERM;
 	}
-	
+
 	if (copy_from_user(&d, arg, sizeof(d)))
 		return -EFAULT;
 
@@ -1101,7 +1101,7 @@ static inline int cosa_start(struct cosa_data *cosa, int address)
 	cosa->firmware_status |= COSA_FW_START;
 	return 0;
 }
-		
+
 /* Buffer of size at least COSA_MAX_ID_STRING is expected */
 static inline int cosa_getidstr(struct cosa_data *cosa, char __user *string)
 {
@@ -1140,7 +1140,7 @@ static int cosa_ioctl_common(struct cosa_data *cosa,
 	case COSAIODOWNLD:	/* Download the firmware */
 		if (!capable(CAP_SYS_RAWIO))
 			return -EACCES;
-		
+
 		return cosa_download(cosa, argp);
 	case COSAIORMEM:
 		if (!capable(CAP_SYS_RAWIO))
@@ -1443,7 +1443,7 @@ static int startmicrocode(struct cosa_data *cosa, int address)
 		return -4;
 	if (put_wait_data(cosa, '\r') == -1)
 		return -5;
-	
+
 	if (get_wait_data(cosa) != '\r')
 		return -6;
 	if (get_wait_data(cosa) != '\r')
@@ -1618,7 +1618,7 @@ static int put_wait_data(struct cosa_data *cosa, int data)
 		cosa->num, cosa_getstatus(cosa));
 	return -1;
 }
-	
+
 /* The following routine puts the hexadecimal number into the SRP monitor
  * and verifies the proper echo of the sent bytes. Returns 0 on success,
  * negative number on failure (-1,-3,-5,-7) means that put_wait_data() failed,
@@ -1656,7 +1656,7 @@ static int puthexnumber(struct cosa_data *cosa, int number)
  * COSA status byte. I have moved the rx/tx/eot interrupt handling into
  * separate functions to make it more readable. These functions are inline,
  * so there should be no overhead of function call.
- * 
+ *
  * In the COSA bus-master mode, we need to tell the card the address of a
  * buffer. Unfortunately, COSA may be too slow for us, so we must busy-wait.
  * It's time to use the bottom half :-(

From b8773205277e3a27dcf3d06cbdc19c23d9ee9f42 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 16 Jun 2021 15:23:41 +0800
Subject: [PATCH 1901/3292] net: cosa: remove redundant spaces

According to the chackpatch.pl,
no spaces is necessary at the start of a line,
no space is necessary after a cast.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 79941b392237..43caab0b7dee 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -976,7 +976,7 @@ static struct fasync_struct *fasync[256] = { NULL, };
 /* To be done ... */
 static int cosa_fasync(struct inode *inode, struct file *file, int on)
 {
-        int port = iminor(inode);
+	int port = iminor(inode);
 
 	return fasync_helper(inode, file, on, &fasync[port]);
 }
@@ -1338,7 +1338,7 @@ static void cosa_kick(struct cosa_data *cosa)
 	udelay(100);
 	cosa_putstatus(cosa, 0);
 	udelay(100);
-	(void) cosa_getdata8(cosa);
+	(void)cosa_getdata8(cosa);
 	udelay(100);
 	cosa_putdata8(cosa, 0);
 	udelay(100);
@@ -1739,7 +1739,7 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 #ifdef DEBUG_IO
 			debug_status_out(cosa, SR_TX_INT_ENA);
 			debug_data_out(cosa, ((cosa->txchan << 5) & 0xe0) |
-                                ((cosa->txsize >> 8) & 0x1f));
+				       ((cosa->txsize >> 8) & 0x1f));
 			debug_data_in(cosa, cosa_getdata8(cosa));
 #else
 			cosa_getdata8(cosa);
@@ -1762,8 +1762,8 @@ static inline void tx_interrupt(struct cosa_data *cosa, int status)
 			| (cosa->txsize & 0x1fff));
 #ifdef DEBUG_IO
 		debug_status_out(cosa, SR_TX_INT_ENA);
-		debug_data_out(cosa, ((cosa->txchan << 13) & 0xe000)
-                        | (cosa->txsize & 0x1fff));
+		debug_data_out(cosa, ((cosa->txchan << 13) & 0xe000) |
+			       (cosa->txsize & 0x1fff));
 		debug_data_in(cosa, cosa_getdata8(cosa));
 		debug_status_out(cosa, 0);
 #else

From c7654495916e109f76a67fd3ae68f8fa70ab4faa Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 16 Jun 2021 14:43:18 +0800
Subject: [PATCH 1902/3292] net: chelsio: cxgb4: use eth_zero_addr() to assign
 zero address

Using eth_zero_addr() to assign zero address insetad of
inefficient copy from an array.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index ae3ad99fbd06..9e3ea5f7be2e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -7782,7 +7782,6 @@ int t4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
 			   int idx, bool sleep_ok)
 {
 	struct fw_vi_mac_exact *p;
-	u8 addr[] = {0, 0, 0, 0, 0, 0};
 	struct fw_vi_mac_cmd c;
 	int ret = 0;
 	u32 exact;
@@ -7799,7 +7798,7 @@ int t4_free_encap_mac_filt(struct adapter *adap, unsigned int viid,
 	p = c.u.exact;
 	p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID_F |
 				      FW_VI_MAC_CMD_IDX_V(idx));
-	memcpy(p->macaddr, addr, sizeof(p->macaddr));
+	eth_zero_addr(p->macaddr);
 	ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
 	return ret;
 }

From 236daeae3616b1c62ce1a9f8a348d576ec9e22d9 Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Mon, 31 May 2021 02:36:37 -0400
Subject: [PATCH 1903/3292] io_uring: Add to traces the req pointer when
 available

The req pointer uniquely identify a specific request.
Having it in traces can provide valuable insights that is not possible
to have if the calling process is reusing the same user_data value.

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                   | 11 ++---
 include/trace/events/io_uring.h | 71 ++++++++++++++++++++++++---------
 2 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 16156a655d8b..d916eb2cef09 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5073,7 +5073,7 @@ static void io_async_task_func(struct callback_head *cb)
 	struct async_poll *apoll = req->apoll;
 	struct io_ring_ctx *ctx = req->ctx;
 
-	trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
+	trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
 
 	if (io_poll_rewait(req, &apoll->poll)) {
 		spin_unlock_irq(&ctx->completion_lock);
@@ -5206,8 +5206,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 		return false;
 	}
 	spin_unlock_irq(&ctx->completion_lock);
-	trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask,
-					apoll->poll.events);
+	trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
+				mask, apoll->poll.events);
 	return true;
 }
 
@@ -6604,8 +6604,9 @@ fail_req:
 		goto fail_req;
 
 	/* don't need @sqe from now on */
-	trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
-				true, ctx->flags & IORING_SETUP_SQPOLL);
+	trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data,
+				  req->flags, true,
+				  ctx->flags & IORING_SETUP_SQPOLL);
 
 	/*
 	 * If we already have a head request, queue this one for async
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index abb8b24744fd..12addad1f837 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -323,8 +323,10 @@ TRACE_EVENT(io_uring_complete,
  * io_uring_submit_sqe - called before submitting one SQE
  *
  * @ctx:		pointer to a ring context structure
+ * @req:		pointer to a submitted request
  * @opcode:		opcode of request
  * @user_data:		user data associated with the request
+ * @flags		request flags
  * @force_nonblock:	whether a context blocking or not
  * @sq_thread:		true if sq_thread has submitted this SQE
  *
@@ -333,41 +335,60 @@ TRACE_EVENT(io_uring_complete,
  */
 TRACE_EVENT(io_uring_submit_sqe,
 
-	TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock,
-		 bool sq_thread),
+	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
+		 bool force_nonblock, bool sq_thread),
 
-	TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread),
+	TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
 
 	TP_STRUCT__entry (
 		__field(  void *,	ctx		)
+		__field(  void *,	req		)
 		__field(  u8,		opcode		)
 		__field(  u64,		user_data	)
+		__field(  u32,		flags		)
 		__field(  bool,		force_nonblock	)
 		__field(  bool,		sq_thread	)
 	),
 
 	TP_fast_assign(
 		__entry->ctx		= ctx;
+		__entry->req		= req;
 		__entry->opcode		= opcode;
 		__entry->user_data	= user_data;
+		__entry->flags		= flags;
 		__entry->force_nonblock	= force_nonblock;
 		__entry->sq_thread	= sq_thread;
 	),
 
-	TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d",
-			  __entry->ctx, __entry->opcode,
-			  (unsigned long long) __entry->user_data,
-			  __entry->force_nonblock, __entry->sq_thread)
+	TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
+		  "non block %d, sq_thread %d", __entry->ctx, __entry->req,
+		  __entry->opcode, (unsigned long long)__entry->user_data,
+		  __entry->flags, __entry->force_nonblock, __entry->sq_thread)
 );
 
+/*
+ * io_uring_poll_arm - called after arming a poll wait if successful
+ *
+ * @ctx:		pointer to a ring context structure
+ * @req:		pointer to the armed request
+ * @opcode:		opcode of request
+ * @user_data:		user data associated with the request
+ * @mask:		request poll events mask
+ * @events:		registered events of interest
+ *
+ * Allows to track which fds are waiting for and what are the events of
+ * interest.
+ */
 TRACE_EVENT(io_uring_poll_arm,
 
-	TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
+	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
+		 int mask, int events),
 
-	TP_ARGS(ctx, opcode, user_data, mask, events),
+	TP_ARGS(ctx, req, opcode, user_data, mask, events),
 
 	TP_STRUCT__entry (
 		__field(  void *,	ctx		)
+		__field(  void *,	req		)
 		__field(  u8,		opcode		)
 		__field(  u64,		user_data	)
 		__field(  int,		mask		)
@@ -376,16 +397,17 @@ TRACE_EVENT(io_uring_poll_arm,
 
 	TP_fast_assign(
 		__entry->ctx		= ctx;
+		__entry->req		= req;
 		__entry->opcode		= opcode;
 		__entry->user_data	= user_data;
 		__entry->mask		= mask;
 		__entry->events		= events;
 	),
 
-	TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
-			  __entry->ctx, __entry->opcode,
-			  (unsigned long long) __entry->user_data,
-			  __entry->mask, __entry->events)
+	TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
+		  __entry->ctx, __entry->req, __entry->opcode,
+		  (unsigned long long) __entry->user_data,
+		  __entry->mask, __entry->events)
 );
 
 TRACE_EVENT(io_uring_poll_wake,
@@ -440,27 +462,40 @@ TRACE_EVENT(io_uring_task_add,
 			  __entry->mask)
 );
 
+/*
+ * io_uring_task_run - called when task_work_run() executes the poll events
+ *                     notification callbacks
+ *
+ * @ctx:		pointer to a ring context structure
+ * @req:		pointer to the armed request
+ * @opcode:		opcode of request
+ * @user_data:		user data associated with the request
+ *
+ * Allows to track when notified poll events are processed
+ */
 TRACE_EVENT(io_uring_task_run,
 
-	TP_PROTO(void *ctx, u8 opcode, u64 user_data),
+	TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
 
-	TP_ARGS(ctx, opcode, user_data),
+	TP_ARGS(ctx, req, opcode, user_data),
 
 	TP_STRUCT__entry (
 		__field(  void *,	ctx		)
+		__field(  void *,	req		)
 		__field(  u8,		opcode		)
 		__field(  u64,		user_data	)
 	),
 
 	TP_fast_assign(
 		__entry->ctx		= ctx;
+		__entry->req		= req;
 		__entry->opcode		= opcode;
 		__entry->user_data	= user_data;
 	),
 
-	TP_printk("ring %p, op %d, data 0x%llx",
-			  __entry->ctx, __entry->opcode,
-			  (unsigned long long) __entry->user_data)
+	TP_printk("ring %p, req %p, op %d, data 0x%llx",
+		  __entry->ctx, __entry->req, __entry->opcode,
+		  (unsigned long long) __entry->user_data)
 );
 
 #endif /* _TRACE_IO_URING_H */

From 3d7b7b5285f0a8e73e332f3d7c7b2ca1e46309d7 Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Mon, 31 May 2021 02:54:15 -0400
Subject: [PATCH 1904/3292] io_uring: minor clean up in trace events definition

Fix tabulation to make nice columns

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/trace/events/io_uring.h | 35 ++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 12addad1f837..e4e44a2b4aa9 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -12,11 +12,11 @@ struct io_wq_work;
 /**
  * io_uring_create - called after a new io_uring context was prepared
  *
- * @fd:			corresponding file descriptor
- * @ctx:		pointer to a ring context structure
+ * @fd:		corresponding file descriptor
+ * @ctx:	pointer to a ring context structure
  * @sq_entries:	actual SQ size
  * @cq_entries:	actual CQ size
- * @flags:		SQ ring flags, provided to io_uring_setup(2)
+ * @flags:	SQ ring flags, provided to io_uring_setup(2)
  *
  * Allows to trace io_uring creation and provide pointer to a context, that can
  * be used later to find correlated events.
@@ -52,12 +52,12 @@ TRACE_EVENT(io_uring_create,
  * io_uring_register - called after a buffer/file/eventfd was successfully
  * 					   registered for a ring
  *
- * @ctx:			pointer to a ring context structure
- * @opcode:			describes which operation to perform
+ * @ctx:		pointer to a ring context structure
+ * @opcode:		describes which operation to perform
  * @nr_user_files:	number of registered files
  * @nr_user_bufs:	number of registered buffers
  * @cq_ev_fd:		whether eventfs registered or not
- * @ret:			return code
+ * @ret:		return code
  *
  * Allows to trace fixed files/buffers/eventfds, that could be registered to
  * avoid an overhead of getting references to them for every operation. This
@@ -142,16 +142,16 @@ TRACE_EVENT(io_uring_queue_async_work,
 	TP_ARGS(ctx, rw, req, work, flags),
 
 	TP_STRUCT__entry (
-		__field(  void *,				ctx		)
-		__field(  int,					rw		)
-		__field(  void *,				req		)
+		__field(  void *,			ctx	)
+		__field(  int,				rw	)
+		__field(  void *,			req	)
 		__field(  struct io_wq_work *,		work	)
 		__field(  unsigned int,			flags	)
 	),
 
 	TP_fast_assign(
 		__entry->ctx	= ctx;
-		__entry->rw		= rw;
+		__entry->rw	= rw;
 		__entry->req	= req;
 		__entry->work	= work;
 		__entry->flags	= flags;
@@ -196,10 +196,10 @@ TRACE_EVENT(io_uring_defer,
 
 /**
  * io_uring_link - called before the io_uring request added into link_list of
- * 				   another request
+ * 		   another request
  *
- * @ctx:			pointer to a ring context structure
- * @req:			pointer to a linked request
+ * @ctx:		pointer to a ring context structure
+ * @req:		pointer to a linked request
  * @target_req:		pointer to a previous request, that would contain @req
  *
  * Allows to track linked requests, to understand dependencies between requests
@@ -212,8 +212,8 @@ TRACE_EVENT(io_uring_link,
 	TP_ARGS(ctx, req, target_req),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx			)
-		__field(  void *,	req			)
+		__field(  void *,	ctx		)
+		__field(  void *,	req		)
 		__field(  void *,	target_req	)
 	),
 
@@ -244,7 +244,7 @@ TRACE_EVENT(io_uring_cqring_wait,
 	TP_ARGS(ctx, min_events),
 
 	TP_STRUCT__entry (
-		__field(  void *,	ctx			)
+		__field(  void *,	ctx		)
 		__field(  int,		min_events	)
 	),
 
@@ -272,7 +272,7 @@ TRACE_EVENT(io_uring_fail_link,
 	TP_ARGS(req, link),
 
 	TP_STRUCT__entry (
-		__field(  void *,	req		)
+		__field(  void *,	req	)
 		__field(  void *,	link	)
 	),
 
@@ -318,7 +318,6 @@ TRACE_EVENT(io_uring_complete,
 			  __entry->res, __entry->cflags)
 );
 
-
 /**
  * io_uring_submit_sqe - called before submitting one SQE
  *

From ec16d35b6c9d8c89b3b7327a52c4972a7e4281d3 Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Mon, 31 May 2021 02:54:59 -0400
Subject: [PATCH 1905/3292] io-wq: remove header files not needed anymore

mm related header files are not needed for io-wq module.
remove them for a small clean-up.

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index e221aaab585c..897b94530b57 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -9,8 +9,6 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
-#include <linux/mm.h>
-#include <linux/sched/mm.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/rculist_nulls.h>

From 07a719f8fdbe4ae0f825fa1a6d2755a63deb265e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 09:15:46 +0200
Subject: [PATCH 1906/3292] mtd_blkdevs: initialze new->rq in
 add_mtd_blktrans_dev

Various places expect the request_queue in ->rq.  Initialize it to
avoid NULL pointer derefences.

Fixes: 6966bb921def ("mtd_blkdevs: use blk_mq_alloc_disk")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/mtd/mtd_blkdevs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 5dc4c966ea73..6ce4bc57f919 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -382,6 +382,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	}
 
 	new->disk = gd;
+	new->rq = new->disk->queue;
 	gd->private_data = new;
 	gd->major = tr->major;
 	gd->first_minor = (new->devnum) << tr->part_bits;

From 6a03cd9843fa4dcf827dc3ad60fa9b4217f3057c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 09:15:47 +0200
Subject: [PATCH 1907/3292] loop: fix order of cleaning up the queue and
 freeing the tagset

We must release the queue before freeing the tagset.

Fixes: 1c99502fae35 ("loop: use blk_mq_alloc_disk and blk_cleanup_disk")
Reported-by: Bruno Goncalves <bgoncalv@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 3f40e673a101..e90f7d349816 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -2183,8 +2183,8 @@ out:
 static void loop_remove(struct loop_device *lo)
 {
 	del_gendisk(lo->lo_disk);
-	blk_mq_free_tag_set(&lo->tag_set);
 	blk_cleanup_disk(lo->lo_disk);
+	blk_mq_free_tag_set(&lo->tag_set);
 	mutex_destroy(&lo->lo_mutex);
 	kfree(lo);
 }

From 2cafe29a8d03f02a3d16193bdaae2f3e82a423f9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 9 Jun 2021 09:58:21 +0800
Subject: [PATCH 1908/3292] block: fix race between adding/removing rq qos and
 normal IO

Yi reported several kernel panics on:

[16687.001777] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
...
[16687.163549] pc : __rq_qos_track+0x38/0x60

or

[  997.690455] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020
...
[  997.850347] pc : __rq_qos_done+0x2c/0x50

Turns out it is caused by race between adding rq qos(wbt) and normal IO
because rq_qos_add can be run when IO is being submitted, fix this issue
by freezing queue before adding/deleting rq qos to queue.

rq_qos_exit() needn't to freeze queue because it is called after queue
has been frozen.

iolatency calls rq_qos_add() during allocating queue, so freezing won't
add delay because queue usage refcount works at atomic mode at that
time.

iocost calls rq_qos_add() when writing cgroup attribute file, that is
fine to freeze queue at that time since we usually freeze queue when
storing to queue sysfs attribute, meantime iocost only exists on the
root cgroup.

wbt_init calls it in blk_register_queue() and queue sysfs attribute
store(queue_wb_lat_store() when write it 1st time in case of !BLK_WBT_MQ),
the following patch will speedup the queue freezing in wbt_init.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Link: https://lore.kernel.org/r/20210609015822.103433-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-rq-qos.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 2bc43e94f4c4..2bcb3495e376 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -7,6 +7,7 @@
 #include <linux/blk_types.h>
 #include <linux/atomic.h>
 #include <linux/wait.h>
+#include <linux/blk-mq.h>
 
 #include "blk-mq-debugfs.h"
 
@@ -99,8 +100,21 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
 
 static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
 {
+	/*
+	 * No IO can be in-flight when adding rqos, so freeze queue, which
+	 * is fine since we only support rq_qos for blk-mq queue.
+	 *
+	 * Reuse ->queue_lock for protecting against other concurrent
+	 * rq_qos adding/deleting
+	 */
+	blk_mq_freeze_queue(q);
+
+	spin_lock_irq(&q->queue_lock);
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
+	spin_unlock_irq(&q->queue_lock);
+
+	blk_mq_unfreeze_queue(q);
 
 	if (rqos->ops->debugfs_attrs)
 		blk_mq_debugfs_register_rqos(rqos);
@@ -110,12 +124,22 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 {
 	struct rq_qos **cur;
 
+	/*
+	 * See comment in rq_qos_add() about freezing queue & using
+	 * ->queue_lock.
+	 */
+	blk_mq_freeze_queue(q);
+
+	spin_lock_irq(&q->queue_lock);
 	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
 		if (*cur == rqos) {
 			*cur = rqos->next;
 			break;
 		}
 	}
+	spin_unlock_irq(&q->queue_lock);
+
+	blk_mq_unfreeze_queue(q);
 
 	blk_mq_debugfs_unregister_rqos(rqos);
 }

From a72c374f97a4c7b2f9dde5144c867fec4bdcd798 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 9 Jun 2021 09:58:22 +0800
Subject: [PATCH 1909/3292] block: mark queue init done at the end of
 blk_register_queue

Mark queue init done when everything is done well in blk_register_queue(),
so that wbt_enable_default() can be run quickly without any RCU period
involved since adding rq qos requires to freeze queue.

Also no any side effect by delaying to mark queue init done.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Link: https://lore.kernel.org/r/20210609015822.103433-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f89e2fc3963b..370d83c18057 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -866,20 +866,6 @@ int blk_register_queue(struct gendisk *disk)
 		  "%s is registering an already registered queue\n",
 		  kobject_name(&dev->kobj));
 
-	/*
-	 * SCSI probing may synchronously create and destroy a lot of
-	 * request_queues for non-existent devices.  Shutting down a fully
-	 * functional queue takes measureable wallclock time as RCU grace
-	 * periods are involved.  To avoid excessive latency in these
-	 * cases, a request_queue starts out in a degraded mode which is
-	 * faster to shut down and is made fully functional here as
-	 * request_queues for non-existent devices never get registered.
-	 */
-	if (!blk_queue_init_done(q)) {
-		blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
-		percpu_ref_switch_to_percpu(&q->q_usage_counter);
-	}
-
 	blk_queue_update_readahead(q);
 
 	ret = blk_trace_init_sysfs(dev);
@@ -938,6 +924,21 @@ int blk_register_queue(struct gendisk *disk)
 	ret = 0;
 unlock:
 	mutex_unlock(&q->sysfs_dir_lock);
+
+	/*
+	 * SCSI probing may synchronously create and destroy a lot of
+	 * request_queues for non-existent devices.  Shutting down a fully
+	 * functional queue takes measureable wallclock time as RCU grace
+	 * periods are involved.  To avoid excessive latency in these
+	 * cases, a request_queue starts out in a degraded mode which is
+	 * faster to shut down and is made fully functional here as
+	 * request_queues for non-existent devices never get registered.
+	 */
+	if (!blk_queue_init_done(q)) {
+		blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q);
+		percpu_ref_switch_to_percpu(&q->q_usage_counter);
+	}
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(blk_register_queue);

From 171b45a4a70eef2fd36bb794ce4f5a48c440361e Mon Sep 17 00:00:00 2001
From: Andrea Merello <andrea.merello@gmail.com>
Date: Tue, 6 Apr 2021 15:00:44 +0200
Subject: [PATCH 1910/3292] clocksource/drivers/arm_global_timer: Implement
 rate compensation whenever source clock changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds rate change notification support for the parent clock;
should that clock change, then we try to adjust the our prescaler in order
to compensate (i.e. we adjust to still get the same timer frequency).

This is loosely based on what it's done in timer-cadence-ttc. timer-sun51,
mips-gic-timer and smp_twd.c also seem to look at their parent clock rate
and to perform some kind of adjustment whenever needed.

In this particular case we have only one single counter and prescaler for
all clocksource, clockevent and timer_delay, and we just update it for all
(i.e. we don't let it go and call clockevents_update_freq() to notify to
the kernel that our rate has changed).

Note that, there is apparently no other way to fixup things, because once
we call register_current_timer_delay(), specifying the timer rate, it seems
that that rate is not supposed to change ever.

In order for this mechanism to work, we have to make assumptions about how
much the initial clock is supposed to eventually decrease from the initial
one, and set our initial prescaler to a value that we can eventually
decrease enough to compensate. We provide an option in KConfig for this.

In case we end up in a situation in which we are not able to compensate the
parent clock change, we fail returning NOTIFY_BAD.

This fixes a real-world problem with Zynq arch not being able to use this
driver and CPU_FREQ at the same time (because ARM global timer is fed by
the CPU clock, which may keep changing when CPU_FREQ is enabled).

Signed-off-by: Andrea Merello <andrea.merello@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Sören Brinkmann <soren.brinkmann@xilinx.com>
Reviewed-by: Patrice Chotard <patrice.chotard@foss.st.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210406130045.15491-2-andrea.merello@gmail.com
---
 drivers/clocksource/Kconfig            |  13 +++
 drivers/clocksource/arm_global_timer.c | 122 +++++++++++++++++++++++--
 2 files changed, 125 insertions(+), 10 deletions(-)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 39aa21d01e05..19fc5f8883e0 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -358,6 +358,19 @@ config ARM_GLOBAL_TIMER
 	help
 	  This option enables support for the ARM global timer unit.
 
+config ARM_GT_INITIAL_PRESCALER_VAL
+	int "ARM global timer initial prescaler value"
+	default 1
+	depends on ARM_GLOBAL_TIMER
+	help
+	  When the ARM global timer initializes, its current rate is declared
+	  to the kernel and maintained forever. Should it's parent clock
+	  change, the driver tries to fix the timer's internal prescaler.
+	  On some machs (i.e. Zynq) the initial prescaler value thus poses
+	  bounds about how much the parent clock is allowed to decrease or
+	  increase wrt the initial clock value.
+	  This affects CPU_FREQ max delta from the initial frequency.
+
 config ARM_TIMER_SP804
 	bool "Support for Dual Timer SP804 module" if COMPILE_TEST
 	depends on GENERIC_SCHED_CLOCK && CLKDEV_LOOKUP
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 88b2d38a7a61..60a8047fd32e 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -31,6 +31,10 @@
 #define GT_CONTROL_COMP_ENABLE		BIT(1)	/* banked */
 #define GT_CONTROL_IRQ_ENABLE		BIT(2)	/* banked */
 #define GT_CONTROL_AUTO_INC		BIT(3)	/* banked */
+#define GT_CONTROL_PRESCALER_SHIFT      8
+#define GT_CONTROL_PRESCALER_MAX        0xF
+#define GT_CONTROL_PRESCALER_MASK       (GT_CONTROL_PRESCALER_MAX << \
+					 GT_CONTROL_PRESCALER_SHIFT)
 
 #define GT_INT_STATUS	0x0c
 #define GT_INT_STATUS_EVENT_FLAG	BIT(0)
@@ -39,6 +43,7 @@
 #define GT_COMP1	0x14
 #define GT_AUTO_INC	0x18
 
+#define MAX_F_ERR 50
 /*
  * We are expecting to be clocked by the ARM peripheral clock.
  *
@@ -46,7 +51,8 @@
  * the units for all operations.
  */
 static void __iomem *gt_base;
-static unsigned long gt_clk_rate;
+struct notifier_block gt_clk_rate_change_nb;
+static u32 gt_psv_new, gt_psv_bck, gt_target_rate;
 static int gt_ppi;
 static struct clock_event_device __percpu *gt_evt;
 
@@ -96,7 +102,10 @@ static void gt_compare_set(unsigned long delta, int periodic)
 	unsigned long ctrl;
 
 	counter += delta;
-	ctrl = GT_CONTROL_TIMER_ENABLE;
+	ctrl = readl(gt_base + GT_CONTROL);
+	ctrl &= ~(GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE |
+		  GT_CONTROL_AUTO_INC | GT_CONTROL_AUTO_INC);
+	ctrl |= GT_CONTROL_TIMER_ENABLE;
 	writel_relaxed(ctrl, gt_base + GT_CONTROL);
 	writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0);
 	writel_relaxed(upper_32_bits(counter), gt_base + GT_COMP1);
@@ -123,7 +132,7 @@ static int gt_clockevent_shutdown(struct clock_event_device *evt)
 
 static int gt_clockevent_set_periodic(struct clock_event_device *evt)
 {
-	gt_compare_set(DIV_ROUND_CLOSEST(gt_clk_rate, HZ), 1);
+	gt_compare_set(DIV_ROUND_CLOSEST(gt_target_rate, HZ), 1);
 	return 0;
 }
 
@@ -177,7 +186,7 @@ static int gt_starting_cpu(unsigned int cpu)
 	clk->cpumask = cpumask_of(cpu);
 	clk->rating = 300;
 	clk->irq = gt_ppi;
-	clockevents_config_and_register(clk, gt_clk_rate,
+	clockevents_config_and_register(clk, gt_target_rate,
 					1, 0xffffffff);
 	enable_percpu_irq(clk->irq, IRQ_TYPE_NONE);
 	return 0;
@@ -232,9 +241,28 @@ static struct delay_timer gt_delay_timer = {
 	.read_current_timer = gt_read_long,
 };
 
+static void gt_write_presc(u32 psv)
+{
+	u32 reg;
+
+	reg = readl(gt_base + GT_CONTROL);
+	reg &= ~GT_CONTROL_PRESCALER_MASK;
+	reg |= psv << GT_CONTROL_PRESCALER_SHIFT;
+	writel(reg, gt_base + GT_CONTROL);
+}
+
+static u32 gt_read_presc(void)
+{
+	u32 reg;
+
+	reg = readl(gt_base + GT_CONTROL);
+	reg &= GT_CONTROL_PRESCALER_MASK;
+	return reg >> GT_CONTROL_PRESCALER_SHIFT;
+}
+
 static void __init gt_delay_timer_init(void)
 {
-	gt_delay_timer.freq = gt_clk_rate;
+	gt_delay_timer.freq = gt_target_rate;
 	register_current_timer_delay(&gt_delay_timer);
 }
 
@@ -243,18 +271,81 @@ static int __init gt_clocksource_init(void)
 	writel(0, gt_base + GT_CONTROL);
 	writel(0, gt_base + GT_COUNTER0);
 	writel(0, gt_base + GT_COUNTER1);
-	/* enables timer on all the cores */
-	writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
+	/* set prescaler and enable timer on all the cores */
+	writel(((CONFIG_ARM_GT_INITIAL_PRESCALER_VAL - 1) <<
+		GT_CONTROL_PRESCALER_SHIFT)
+	       | GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
 
 #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
-	sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate);
+	sched_clock_register(gt_sched_clock_read, 64, gt_target_rate);
 #endif
-	return clocksource_register_hz(&gt_clocksource, gt_clk_rate);
+	return clocksource_register_hz(&gt_clocksource, gt_target_rate);
+}
+
+static int gt_clk_rate_change_cb(struct notifier_block *nb,
+				 unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+
+	switch (event) {
+	case PRE_RATE_CHANGE:
+	{
+		int psv;
+
+		psv = DIV_ROUND_CLOSEST(ndata->new_rate,
+					gt_target_rate);
+
+		if (abs(gt_target_rate - (ndata->new_rate / psv)) > MAX_F_ERR)
+			return NOTIFY_BAD;
+
+		psv--;
+
+		/* prescaler within legal range? */
+		if (psv < 0 || psv > GT_CONTROL_PRESCALER_MAX)
+			return NOTIFY_BAD;
+
+		/*
+		 * store timer clock ctrl register so we can restore it in case
+		 * of an abort.
+		 */
+		gt_psv_bck = gt_read_presc();
+		gt_psv_new = psv;
+		/* scale down: adjust divider in post-change notification */
+		if (ndata->new_rate < ndata->old_rate)
+			return NOTIFY_DONE;
+
+		/* scale up: adjust divider now - before frequency change */
+		gt_write_presc(psv);
+		break;
+	}
+	case POST_RATE_CHANGE:
+		/* scale up: pre-change notification did the adjustment */
+		if (ndata->new_rate > ndata->old_rate)
+			return NOTIFY_OK;
+
+		/* scale down: adjust divider now - after frequency change */
+		gt_write_presc(gt_psv_new);
+		break;
+
+	case ABORT_RATE_CHANGE:
+		/* we have to undo the adjustment in case we scale up */
+		if (ndata->new_rate < ndata->old_rate)
+			return NOTIFY_OK;
+
+		/* restore original register value */
+		gt_write_presc(gt_psv_bck);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_DONE;
 }
 
 static int __init global_timer_of_register(struct device_node *np)
 {
 	struct clk *gt_clk;
+	static unsigned long gt_clk_rate;
 	int err = 0;
 
 	/*
@@ -292,11 +383,20 @@ static int __init global_timer_of_register(struct device_node *np)
 	}
 
 	gt_clk_rate = clk_get_rate(gt_clk);
+	gt_target_rate = gt_clk_rate / CONFIG_ARM_GT_INITIAL_PRESCALER_VAL;
+	gt_clk_rate_change_nb.notifier_call =
+		gt_clk_rate_change_cb;
+	err = clk_notifier_register(gt_clk, &gt_clk_rate_change_nb);
+	if (err) {
+		pr_warn("Unable to register clock notifier\n");
+		goto out_clk;
+	}
+
 	gt_evt = alloc_percpu(struct clock_event_device);
 	if (!gt_evt) {
 		pr_warn("global-timer: can't allocate memory\n");
 		err = -ENOMEM;
-		goto out_clk;
+		goto out_clk_nb;
 	}
 
 	err = request_percpu_irq(gt_ppi, gt_clockevent_interrupt,
@@ -326,6 +426,8 @@ out_irq:
 	free_percpu_irq(gt_ppi, gt_evt);
 out_free:
 	free_percpu(gt_evt);
+out_clk_nb:
+	clk_notifier_unregister(gt_clk, &gt_clk_rate_change_nb);
 out_clk:
 	clk_disable_unprepare(gt_clk);
 out_unmap:

From 68e2215e9d5f5ec8e5ba0158683742932519cad9 Mon Sep 17 00:00:00 2001
From: Andrea Merello <andrea.merello@gmail.com>
Date: Tue, 6 Apr 2021 15:00:45 +0200
Subject: [PATCH 1911/3292] arm: zynq: don't disable CONFIG_ARM_GLOBAL_TIMER
 due to CONFIG_CPU_FREQ anymore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now ARM global timer driver could work even if it's source clock rate
changes, so we don't need to disable that driver when cpu frequency scaling
is in use.

This cause Zynq arch to get support for timer delay and get_cycles().

Signed-off-by: Andrea Merello <andrea.merello@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Sören Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210406130045.15491-3-andrea.merello@gmail.com
---
 arch/arm/mach-zynq/Kconfig  | 2 +-
 drivers/clocksource/Kconfig | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-zynq/Kconfig b/arch/arm/mach-zynq/Kconfig
index 43fb941dcd07..a56748d671c4 100644
--- a/arch/arm/mach-zynq/Kconfig
+++ b/arch/arm/mach-zynq/Kconfig
@@ -6,7 +6,7 @@ config ARCH_ZYNQ
 	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARM_AMBA
 	select ARM_GIC
-	select ARM_GLOBAL_TIMER if !CPU_FREQ
+	select ARM_GLOBAL_TIMER
 	select CADENCE_TTC_TIMER
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 19fc5f8883e0..9fa28237715a 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -360,6 +360,7 @@ config ARM_GLOBAL_TIMER
 
 config ARM_GT_INITIAL_PRESCALER_VAL
 	int "ARM global timer initial prescaler value"
+	default 2 if ARCH_ZYNQ
 	default 1
 	depends on ARM_GLOBAL_TIMER
 	help

From be534f8ee137b95046d7c53c8200ffdcf05781a7 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Sat, 12 Jun 2021 17:27:26 +0800
Subject: [PATCH 1912/3292] clocksource/drivers/arm_global_timer: Make symbol
 'gt_clk_rate_change_nb' static

The sparse tool complains as follows:

drivers/clocksource/arm_global_timer.c:54:23: warning:
 symbol 'gt_clk_rate_change_nb' was not declared. Should it be static?

This symbol is not used outside of arm_global_timer.c, so mark it static.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/1623490046-37972-1-git-send-email-zou_wei@huawei.com
---
 drivers/clocksource/arm_global_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 60a8047fd32e..68b1d144a412 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -51,7 +51,7 @@
  * the units for all operations.
  */
 static void __iomem *gt_base;
-struct notifier_block gt_clk_rate_change_nb;
+static struct notifier_block gt_clk_rate_change_nb;
 static u32 gt_psv_new, gt_psv_bck, gt_target_rate;
 static int gt_ppi;
 static struct clock_event_device __percpu *gt_evt;

From f94bc2667fb204d7c131ac39d9ea342bd16116dc Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 15 Jun 2021 19:54:40 +0800
Subject: [PATCH 1913/3292] clocksource/drivers/arm_global_timer: Remove
 duplicated argument in arm_global_timer

Fix the following coccicheck warning:

    drivers/clocksource/arm_global_timer.c:107:4-23:
    duplicated argument to & or |

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Patrice Chotard <patrice.chotard@foss.st.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210615115440.8881-1-wanjiabing@vivo.com
---
 drivers/clocksource/arm_global_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 68b1d144a412..44a61dc6f932 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -104,7 +104,7 @@ static void gt_compare_set(unsigned long delta, int periodic)
 	counter += delta;
 	ctrl = readl(gt_base + GT_CONTROL);
 	ctrl &= ~(GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE |
-		  GT_CONTROL_AUTO_INC | GT_CONTROL_AUTO_INC);
+		  GT_CONTROL_AUTO_INC);
 	ctrl |= GT_CONTROL_TIMER_ENABLE;
 	writel_relaxed(ctrl, gt_base + GT_CONTROL);
 	writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0);

From 8b33dfe0ba1c84c1aab2456590b38195837f1e6e Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Fri, 14 May 2021 21:14:39 -0500
Subject: [PATCH 1914/3292] clocksource/arm_arch_timer: Improve Allwinner A64
 timer workaround

Bad counter reads are experienced sometimes when bit 10 or greater rolls
over. Originally, testing showed that at least 10 lower bits would be
set to the same value during these bad reads. However, some users still
reported time skips.

Wider testing revealed that on some chips, occasionally only the lowest
9 bits would read as the anomalous value. During these reads (which
still happen only when bit 10), bit 9 would read as the correct value.

Reduce the mask by one bit to cover these cases as well.

Cc: stable@vger.kernel.org
Fixes: c950ca8c35ee ("clocksource/drivers/arch_timer: Workaround for Allwinner A64 timer instability")
Reported-by: Roman Stratiienko <r.stratiienko@gmail.com>
Signed-off-by: Samuel Holland <samuel@sholland.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210515021439.55316-1-samuel@sholland.org
---
 drivers/clocksource/arm_arch_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 89a9e0524555..be6d741d404c 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -364,7 +364,7 @@ static u64 notrace arm64_858921_read_cntvct_el0(void)
 	do {								\
 		_val = read_sysreg(reg);				\
 		_retries--;						\
-	} while (((_val + 1) & GENMASK(9, 0)) <= 1 && _retries);	\
+	} while (((_val + 1) & GENMASK(8, 0)) <= 1 && _retries);	\
 									\
 	WARN_ON_ONCE(!_retries);					\
 	_val;								\

From 3d41fff3ae3980c055f3c7861264c46c924f3e4c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 18 May 2021 10:53:06 +0300
Subject: [PATCH 1915/3292] clocksource/drivers/timer-ti-dm: Drop unnecessary
 restore

The device is not losing context on CPU_CLUSTER_PM_ERROR. As we are only
saving and restoring context with cpu_pm, there is no need to restore the
context in case of an error.

Note that the unnecessary restoring of context does not cause issues, it's
just not needed.

Cc: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210518075306.35532-1-tony@atomide.com
---
 drivers/clocksource/timer-ti-dm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index e5c631f1b5cb..3e52c5226c4d 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -128,7 +128,8 @@ static int omap_timer_context_notifier(struct notifier_block *nb,
 			break;
 		omap_timer_save_context(timer);
 		break;
-	case CPU_CLUSTER_PM_ENTER_FAILED:
+	case CPU_CLUSTER_PM_ENTER_FAILED:	/* No need to restore context */
+		break;
 	case CPU_CLUSTER_PM_EXIT:
 		if ((timer->capability & OMAP_TIMER_ALWON) ||
 		    !atomic_read(&timer->enabled))

From 97be86e3fef01736e7a58b4d59a587e471cb1481 Mon Sep 17 00:00:00 2001
From: Prasanth KSR <prasanth.ksr@dell.com>
Date: Wed, 12 May 2021 15:55:30 +0530
Subject: [PATCH 1916/3292] platform/x86: dell-wmi-sysman: Make
 populate_foo_data functions more robust

1. Check acpi type before assignment of each property value

2. Add boundary check for properties count

Co-developed-by: Divya Bharathi <divya.bharathi@dell.com>
Signed-off-by: Divya Bharathi <divya.bharathi@dell.com>
Signed-off-by: Prasanth KSR <prasanth.ksr@dell.com>
Link: https://lore.kernel.org/r/20210512102530.9704-1-prasanth.ksr@dell.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../dell/dell-wmi-sysman/dell-wmi-sysman.h    |  5 ++-
 .../dell/dell-wmi-sysman/enum-attributes.c    | 39 ++++++++++++++++---
 .../x86/dell/dell-wmi-sysman/int-attributes.c | 16 ++++++++
 .../dell/dell-wmi-sysman/passobj-attributes.c |  6 +++
 .../dell/dell-wmi-sysman/string-attributes.c  | 16 +++++++-
 .../x86/dell/dell-wmi-sysman/sysman.c         |  3 +-
 6 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h
index b80f2a62ea3f..3ad33a094588 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h
@@ -152,12 +152,15 @@ static ssize_t curr_val##_store(struct kobject *kobj,				\
 	return ret ? ret : count;						\
 }
 
+#define check_property_type(attr, prop, valuetype)				\
+	(attr##_obj[prop].type != valuetype)
+
 union acpi_object *get_wmiobj_pointer(int instance_id, const char *guid_string);
 int get_instance_count(const char *guid_string);
 void strlcpy_attr(char *dest, char *src);
 
 int populate_enum_data(union acpi_object *enumeration_obj, int instance_id,
-			struct kobject *attr_name_kobj);
+			struct kobject *attr_name_kobj, u32 enum_property_count);
 int alloc_enum_data(void);
 void exit_enum_attributes(void);
 
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c
index 091e48c217ed..8cc212c85266 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c
@@ -132,39 +132,68 @@ int alloc_enum_data(void)
  * @enumeration_obj: ACPI object with enumeration data
  * @instance_id: The instance to enumerate
  * @attr_name_kobj: The parent kernel object
+ * @enum_property_count: Total properties count under enumeration type
  */
 int populate_enum_data(union acpi_object *enumeration_obj, int instance_id,
-			struct kobject *attr_name_kobj)
+			struct kobject *attr_name_kobj, u32 enum_property_count)
 {
 	int i, next_obj, value_modifier_count, possible_values_count;
 
 	wmi_priv.enumeration_data[instance_id].attr_name_kobj = attr_name_kobj;
+	if (check_property_type(enumeration, ATTR_NAME, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.enumeration_data[instance_id].attribute_name,
 		enumeration_obj[ATTR_NAME].string.pointer);
+	if (check_property_type(enumeration, DISPL_NAME_LANG_CODE, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.enumeration_data[instance_id].display_name_language_code,
 		enumeration_obj[DISPL_NAME_LANG_CODE].string.pointer);
+	if (check_property_type(enumeration, DISPLAY_NAME, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.enumeration_data[instance_id].display_name,
 		enumeration_obj[DISPLAY_NAME].string.pointer);
+	if (check_property_type(enumeration, DEFAULT_VAL, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.enumeration_data[instance_id].default_value,
 		enumeration_obj[DEFAULT_VAL].string.pointer);
+	if (check_property_type(enumeration, MODIFIER, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.enumeration_data[instance_id].dell_modifier,
 		enumeration_obj[MODIFIER].string.pointer);
 
 	next_obj = MODIFIER + 1;
 
-	value_modifier_count = (uintptr_t)enumeration_obj[next_obj].string.pointer;
+	if (next_obj >= enum_property_count)
+		return -EINVAL;
+
+	if (check_property_type(enumeration, next_obj, ACPI_TYPE_INTEGER))
+		return -EINVAL;
+	value_modifier_count = (uintptr_t)enumeration_obj[next_obj++].string.pointer;
 
 	for (i = 0; i < value_modifier_count; i++) {
+		if (next_obj >= enum_property_count)
+			return -EINVAL;
+		if (check_property_type(enumeration, next_obj, ACPI_TYPE_STRING))
+			return -EINVAL;
 		strcat(wmi_priv.enumeration_data[instance_id].dell_value_modifier,
-			enumeration_obj[++next_obj].string.pointer);
+			enumeration_obj[next_obj++].string.pointer);
 		strcat(wmi_priv.enumeration_data[instance_id].dell_value_modifier, ";");
 	}
 
-	possible_values_count = (uintptr_t) enumeration_obj[++next_obj].string.pointer;
+	if (next_obj >= enum_property_count)
+		return -EINVAL;
+
+	if (check_property_type(enumeration, next_obj, ACPI_TYPE_INTEGER))
+		return -EINVAL;
+	possible_values_count = (uintptr_t) enumeration_obj[next_obj++].string.pointer;
 
 	for (i = 0; i < possible_values_count; i++) {
+		if (next_obj >= enum_property_count)
+			return -EINVAL;
+		if (check_property_type(enumeration, next_obj, ACPI_TYPE_STRING))
+			return -EINVAL;
 		strcat(wmi_priv.enumeration_data[instance_id].possible_values,
-			enumeration_obj[++next_obj].string.pointer);
+			enumeration_obj[next_obj++].string.pointer);
 		strcat(wmi_priv.enumeration_data[instance_id].possible_values, ";");
 	}
 
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c
index 8a49ba6e44f9..951e75b538fa 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c
@@ -141,20 +141,36 @@ int populate_int_data(union acpi_object *integer_obj, int instance_id,
 			struct kobject *attr_name_kobj)
 {
 	wmi_priv.integer_data[instance_id].attr_name_kobj = attr_name_kobj;
+	if (check_property_type(integer, ATTR_NAME, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.integer_data[instance_id].attribute_name,
 		integer_obj[ATTR_NAME].string.pointer);
+	if (check_property_type(integer, DISPL_NAME_LANG_CODE, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.integer_data[instance_id].display_name_language_code,
 		integer_obj[DISPL_NAME_LANG_CODE].string.pointer);
+	if (check_property_type(integer, DISPLAY_NAME, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.integer_data[instance_id].display_name,
 		integer_obj[DISPLAY_NAME].string.pointer);
+	if (check_property_type(integer, DEFAULT_VAL, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.integer_data[instance_id].default_value =
 		(uintptr_t)integer_obj[DEFAULT_VAL].string.pointer;
+	if (check_property_type(integer, MODIFIER, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.integer_data[instance_id].dell_modifier,
 		integer_obj[MODIFIER].string.pointer);
+	if (check_property_type(integer, MIN_VALUE, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.integer_data[instance_id].min_value =
 		(uintptr_t)integer_obj[MIN_VALUE].string.pointer;
+	if (check_property_type(integer, MAX_VALUE, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.integer_data[instance_id].max_value =
 		(uintptr_t)integer_obj[MAX_VALUE].string.pointer;
+	if (check_property_type(integer, SCALAR_INCR, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.integer_data[instance_id].scalar_increment =
 		(uintptr_t)integer_obj[SCALAR_INCR].string.pointer;
 
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
index 834b3e82ad9f..230e6ee96636 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
@@ -159,10 +159,16 @@ int alloc_po_data(void)
 int populate_po_data(union acpi_object *po_obj, int instance_id, struct kobject *attr_name_kobj)
 {
 	wmi_priv.po_data[instance_id].attr_name_kobj = attr_name_kobj;
+	if (check_property_type(po, ATTR_NAME, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.po_data[instance_id].attribute_name,
 		     po_obj[ATTR_NAME].string.pointer);
+	if (check_property_type(po, MIN_PASS_LEN, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.po_data[instance_id].min_password_length =
 		(uintptr_t)po_obj[MIN_PASS_LEN].string.pointer;
+	if (check_property_type(po, MAX_PASS_LEN, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.po_data[instance_id].max_password_length =
 		(uintptr_t) po_obj[MAX_PASS_LEN].string.pointer;
 
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c
index 552537852459..c392f0ecf8b5 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c
@@ -118,24 +118,38 @@ int alloc_str_data(void)
 
 /**
  * populate_str_data() - Populate all properties of an instance under string attribute
- * @str_obj: ACPI object with integer data
+ * @str_obj: ACPI object with string data
  * @instance_id: The instance to enumerate
  * @attr_name_kobj: The parent kernel object
  */
 int populate_str_data(union acpi_object *str_obj, int instance_id, struct kobject *attr_name_kobj)
 {
 	wmi_priv.str_data[instance_id].attr_name_kobj = attr_name_kobj;
+	if (check_property_type(str, ATTR_NAME, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.str_data[instance_id].attribute_name,
 		     str_obj[ATTR_NAME].string.pointer);
+	if (check_property_type(str, DISPL_NAME_LANG_CODE, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.str_data[instance_id].display_name_language_code,
 		     str_obj[DISPL_NAME_LANG_CODE].string.pointer);
+	if (check_property_type(str, DISPLAY_NAME, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.str_data[instance_id].display_name,
 		     str_obj[DISPLAY_NAME].string.pointer);
+	if (check_property_type(str, DEFAULT_VAL, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.str_data[instance_id].default_value,
 		     str_obj[DEFAULT_VAL].string.pointer);
+	if (check_property_type(str, MODIFIER, ACPI_TYPE_STRING))
+		return -EINVAL;
 	strlcpy_attr(wmi_priv.str_data[instance_id].dell_modifier,
 		     str_obj[MODIFIER].string.pointer);
+	if (check_property_type(str, MIN_LEN, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.str_data[instance_id].min_length = (uintptr_t)str_obj[MIN_LEN].string.pointer;
+	if (check_property_type(str, MAX_LEN, ACPI_TYPE_INTEGER))
+		return -EINVAL;
 	wmi_priv.str_data[instance_id].max_length = (uintptr_t) str_obj[MAX_LEN].string.pointer;
 
 	return sysfs_create_group(attr_name_kobj, &str_attr_group);
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
index c8d276d78e92..d21e84c7a694 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
@@ -481,7 +481,8 @@ static int init_bios_attributes(int attr_type, const char *guid)
 		/* enumerate all of this attribute */
 		switch (attr_type) {
 		case ENUM:
-			retval = populate_enum_data(elements, instance_id, attr_name_kobj);
+			retval = populate_enum_data(elements, instance_id, attr_name_kobj,
+					obj->package.count);
 			break;
 		case INT:
 			retval = populate_int_data(elements, instance_id, attr_name_kobj);

From 842631928afff953d78a71fa762c92db1ab77571 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 3 May 2021 17:46:47 +0200
Subject: [PATCH 1917/3292] platform/x86: intel_cht_int33fe: Correct
 "displayport" fwnode reference

The Type-C connector on these devices is connected to DP-2 not DP-1,
so the reference must be to the DD04 child-node of the GPU, rather
then the DD02 child-node.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210503154647.142551-10-hdegoede@redhat.com
---
 drivers/platform/x86/intel_cht_int33fe_typec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel_cht_int33fe_typec.c b/drivers/platform/x86/intel_cht_int33fe_typec.c
index b61bad9cc8d2..d59544167430 100644
--- a/drivers/platform/x86/intel_cht_int33fe_typec.c
+++ b/drivers/platform/x86/intel_cht_int33fe_typec.c
@@ -168,8 +168,8 @@ static int cht_int33fe_setup_dp(struct cht_int33fe_data *data)
 		return -ENODEV;
 	}
 
-	/* Then the DP child device node */
-	data->dp = device_get_named_child_node(&pdev->dev, "DD02");
+	/* Then the DP-2 child device node */
+	data->dp = device_get_named_child_node(&pdev->dev, "DD04");
 	pci_dev_put(pdev);
 	if (!data->dp)
 		return -ENODEV;

From e48af75dc7f3059c5a18d7176913457eef5765ad Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 12 May 2021 16:57:05 +0800
Subject: [PATCH 1918/3292] platform/x86: dcdbas: drop unneeded assignment in
 host_control_smi()

Making '==' operation with ESM_STATUS_CMD_UNSUCCESSFUL directly
after calling the function inb() is more efficient, so assignment
to 'cmd_status' is redundant.

Eliminate the following clang_analyzer warning:
drivers/platform/x86/dell/dcdbas.c:397:11: warning: Although the value
stored to 'cmd_status' is used in the enclosing expression, the value
is never actually read from 'cmd_status'

No functional change.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/1620809825-84105-1-git-send-email-yang.lee@linux.alibaba.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/dell/dcdbas.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/platform/x86/dell/dcdbas.c b/drivers/platform/x86/dell/dcdbas.c
index d513a59a5d47..28447c180be8 100644
--- a/drivers/platform/x86/dell/dcdbas.c
+++ b/drivers/platform/x86/dell/dcdbas.c
@@ -394,8 +394,7 @@ static int host_control_smi(void)
 
 		/* wait a few to see if it executed */
 		num_ticks = TIMEOUT_USEC_SHORT_SEMA_BLOCKING;
-		while ((cmd_status = inb(PCAT_APM_STATUS_PORT))
-		       == ESM_STATUS_CMD_UNSUCCESSFUL) {
+		while ((s8)inb(PCAT_APM_STATUS_PORT) == ESM_STATUS_CMD_UNSUCCESSFUL) {
 			num_ticks--;
 			if (num_ticks == EXPIRED_TIMER)
 				return -ETIME;

From 4aebcceb332c74c4a3cca60ca292cf73ce3b100c Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
Date: Thu, 13 May 2021 11:38:25 -0400
Subject: [PATCH 1919/3292] MAINTAINERS: Update info for telemetry

 - My linux.intel.com email is no longer valid, update it to my gmail id.

Signed-off-by: Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
Link: https://lore.kernel.org/r/20210513153825.77214-1-irenic.rajneesh@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 81e1edeceae4..6e848b244226 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9398,7 +9398,7 @@ F:	include/linux/firmware/intel/stratix10-smc.h
 F:	include/linux/firmware/intel/stratix10-svc-client.h
 
 INTEL TELEMETRY DRIVER
-M:	Rajneesh Bhardwaj <rajneesh.bhardwaj@linux.intel.com>
+M:	Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
 M:	"David E. Box" <david.e.box@linux.intel.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained

From 5b6a9a2f839c7e1863606a00f505ac50b8887287 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 17 May 2021 12:07:44 +0200
Subject: [PATCH 1920/3292] platform/x86: samsung-laptop: use octal numbers for
 rwx file permissions

Andy asked me to do it before working further on the code.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20210517100746.29663-1-wsa+renesas@sang-engineering.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/samsung-laptop.c | 35 +++++++++++----------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index d5cec6e35bb8..763d97cbbe53 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -388,7 +388,7 @@ MODULE_PARM_DESC(force,
 		"Disable the DMI check and forces the driver to be loaded");
 
 static bool debug;
-module_param(debug, bool, S_IRUGO | S_IWUSR);
+module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug enabled or not");
 
 static int sabi_command(struct samsung_laptop *samsung, u16 command,
@@ -705,7 +705,7 @@ static ssize_t set_performance_level(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(performance_level, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(performance_level, 0644,
 		   get_performance_level, set_performance_level);
 
 static int read_battery_life_extender(struct samsung_laptop *samsung)
@@ -774,7 +774,7 @@ static ssize_t set_battery_life_extender(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(battery_life_extender, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(battery_life_extender, 0644,
 		   get_battery_life_extender, set_battery_life_extender);
 
 static int read_usb_charge(struct samsung_laptop *samsung)
@@ -843,7 +843,7 @@ static ssize_t set_usb_charge(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(usb_charge, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(usb_charge, 0644,
 		   get_usb_charge, set_usb_charge);
 
 static int read_lid_handling(struct samsung_laptop *samsung)
@@ -908,7 +908,7 @@ static ssize_t set_lid_handling(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(lid_handling, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(lid_handling, 0644,
 		   get_lid_handling, set_lid_handling);
 
 static struct attribute *platform_attributes[] = {
@@ -1291,24 +1291,17 @@ static void samsung_debugfs_init(struct samsung_laptop *samsung)
 	samsung->debug.sdiag_wrapper.data = samsung->sdiag;
 	samsung->debug.sdiag_wrapper.size = strlen(samsung->sdiag);
 
-	debugfs_create_u16("command", S_IRUGO | S_IWUSR, root,
-			   &samsung->debug.command);
-	debugfs_create_u32("d0", S_IRUGO | S_IWUSR, root,
-			   &samsung->debug.data.d0);
-	debugfs_create_u32("d1", S_IRUGO | S_IWUSR, root,
-			   &samsung->debug.data.d1);
-	debugfs_create_u16("d2", S_IRUGO | S_IWUSR, root,
-			   &samsung->debug.data.d2);
-	debugfs_create_u8("d3", S_IRUGO | S_IWUSR, root,
-			  &samsung->debug.data.d3);
-	debugfs_create_blob("data", S_IRUGO | S_IWUSR, root,
-			    &samsung->debug.data_wrapper);
-	debugfs_create_blob("f0000_segment", S_IRUSR | S_IWUSR, root,
+	debugfs_create_u16("command", 0644, root, &samsung->debug.command);
+	debugfs_create_u32("d0", 0644, root, &samsung->debug.data.d0);
+	debugfs_create_u32("d1", 0644, root, &samsung->debug.data.d1);
+	debugfs_create_u16("d2", 0644, root, &samsung->debug.data.d2);
+	debugfs_create_u8("d3", 0644, root, &samsung->debug.data.d3);
+	debugfs_create_blob("data", 0644, root, &samsung->debug.data_wrapper);
+	debugfs_create_blob("f0000_segment", 0600, root,
 			    &samsung->debug.f0000_wrapper);
-	debugfs_create_file("call", S_IFREG | S_IRUGO, root, samsung,
+	debugfs_create_file("call", S_IFREG | 0444, root, samsung,
 			    &samsung_laptop_call_fops);
-	debugfs_create_blob("sdiag", S_IRUGO | S_IWUSR, root,
-			    &samsung->debug.sdiag_wrapper);
+	debugfs_create_blob("sdiag", 0644, root, &samsung->debug.sdiag_wrapper);
 }
 
 static void samsung_sabi_exit(struct samsung_laptop *samsung)

From 1351f1d1e2f7d91d0b1963f7b5bf829a4982c778 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 17 May 2021 12:07:45 +0200
Subject: [PATCH 1921/3292] platform/x86: samsung-laptop: set debugfs blobs to
 read only

Those blobs can only be read. So, don't confuse users with 'writable'
flags. Also, remove S_IFREG because debugfs takes care of that.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20210517100746.29663-2-wsa+renesas@sang-engineering.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/samsung-laptop.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index 763d97cbbe53..7ee010aa740a 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -1296,12 +1296,12 @@ static void samsung_debugfs_init(struct samsung_laptop *samsung)
 	debugfs_create_u32("d1", 0644, root, &samsung->debug.data.d1);
 	debugfs_create_u16("d2", 0644, root, &samsung->debug.data.d2);
 	debugfs_create_u8("d3", 0644, root, &samsung->debug.data.d3);
-	debugfs_create_blob("data", 0644, root, &samsung->debug.data_wrapper);
-	debugfs_create_blob("f0000_segment", 0600, root,
+	debugfs_create_blob("data", 0444, root, &samsung->debug.data_wrapper);
+	debugfs_create_blob("f0000_segment", 0400, root,
 			    &samsung->debug.f0000_wrapper);
-	debugfs_create_file("call", S_IFREG | 0444, root, samsung,
+	debugfs_create_file("call", 0444, root, samsung,
 			    &samsung_laptop_call_fops);
-	debugfs_create_blob("sdiag", 0644, root, &samsung->debug.sdiag_wrapper);
+	debugfs_create_blob("sdiag", 0444, root, &samsung->debug.sdiag_wrapper);
 }
 
 static void samsung_sabi_exit(struct samsung_laptop *samsung)

From 7dc4a18d017ca26abd1cea197e486fb3e5cd7632 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 19 May 2021 15:56:18 +0200
Subject: [PATCH 1922/3292] platform/x86: toshiba_haps: Fix missing newline in
 pr_debug call in toshiba_haps_notify

The pr_debug() call in toshiba_haps_notify() is missing a newline at the
end of the string, add this.

BugLink: https://bugs.debian.org/799193
Reported-by: Salvatore Bonaccorso <carnil@debian.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210519135618.139701-1-hdegoede@redhat.com
---
 drivers/platform/x86/toshiba_haps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/toshiba_haps.c b/drivers/platform/x86/toshiba_haps.c
index b237bd6b1ee5..49e84095bb01 100644
--- a/drivers/platform/x86/toshiba_haps.c
+++ b/drivers/platform/x86/toshiba_haps.c
@@ -131,7 +131,7 @@ static const struct attribute_group haps_attr_group = {
  */
 static void toshiba_haps_notify(struct acpi_device *device, u32 event)
 {
-	pr_debug("Received event: 0x%x", event);
+	pr_debug("Received event: 0x%x\n", event);
 
 	acpi_bus_generate_netlink_event(device->pnp.device_class,
 					dev_name(&device->dev),

From a558ea42c0decd088df1950bb232ac2257929281 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 19 May 2021 12:44:05 -0500
Subject: [PATCH 1923/3292] platform/x86: Rename hp-wireless to wireless-hotkey

This driver was originally intended to support some HP laptops, but
later support was added for Xioami and AMD laptops.

Rename it to make it clear that it supports a larger variety of
systems.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20210519174405.30155-1-mario.limonciello@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/Kconfig           |   9 ++-
 drivers/platform/x86/Makefile          |   2 +-
 drivers/platform/x86/hp-wireless.c     | 102 ------------------------
 drivers/platform/x86/wireless-hotkey.c | 103 +++++++++++++++++++++++++
 4 files changed, 109 insertions(+), 107 deletions(-)
 delete mode 100644 drivers/platform/x86/hp-wireless.c
 create mode 100644 drivers/platform/x86/wireless-hotkey.c

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 60592fb88e7a..9a668dae2738 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -415,16 +415,17 @@ config HP_ACCEL
 	  To compile this driver as a module, choose M here: the module will
 	  be called hp_accel.
 
-config HP_WIRELESS
-	tristate "HP wireless button"
+config WIRELESS_HOTKEY
+	tristate "Wireless hotkey button"
 	depends on ACPI
 	depends on INPUT
 	help
-	 This driver provides supports for new HP wireless button for Windows 8.
+	 This driver provides supports for the wireless buttons found on some AMD,
+	 HP, & Xioami laptops.
 	 On such systems the driver should load automatically (via ACPI alias).
 
 	 To compile this driver as a module, choose M here: the module will
-	 be called hp-wireless.
+	 be called wireless-hotkey.
 
 config HP_WMI
 	tristate "HP WMI extras"
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index dcc8cdb95b4d..415bec18ece2 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -52,7 +52,6 @@ obj-$(CONFIG_GPD_POCKET_FAN)	+= gpd-pocket-fan.o
 
 # Hewlett Packard
 obj-$(CONFIG_HP_ACCEL)		+= hp_accel.o
-obj-$(CONFIG_HP_WIRELESS)	+= hp-wireless.o
 obj-$(CONFIG_HP_WMI)		+= hp-wmi.o
 obj-$(CONFIG_TC1100_WMI)	+= tc1100-wmi.o
 
@@ -115,6 +114,7 @@ obj-$(CONFIG_TOPSTAR_LAPTOP)	+= topstar-laptop.o
 obj-$(CONFIG_I2C_MULTI_INSTANTIATE)	+= i2c-multi-instantiate.o
 obj-$(CONFIG_MLX_PLATFORM)		+= mlx-platform.o
 obj-$(CONFIG_TOUCHSCREEN_DMI)		+= touchscreen_dmi.o
+obj-$(CONFIG_WIRELESS_HOTKEY)		+= wireless-hotkey.o
 
 # Intel uncore drivers
 obj-$(CONFIG_INTEL_IPS)				+= intel_ips.o
diff --git a/drivers/platform/x86/hp-wireless.c b/drivers/platform/x86/hp-wireless.c
deleted file mode 100644
index 0753ef18e721..000000000000
--- a/drivers/platform/x86/hp-wireless.c
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Airplane mode button for HP & Xiaomi laptops
- *
- *  Copyright (C) 2014-2017 Alex Hung <alex.hung@canonical.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/input.h>
-#include <linux/platform_device.h>
-#include <linux/acpi.h>
-#include <acpi/acpi_bus.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Alex Hung");
-MODULE_ALIAS("acpi*:HPQ6001:*");
-MODULE_ALIAS("acpi*:WSTADEF:*");
-MODULE_ALIAS("acpi*:AMDI0051:*");
-
-static struct input_dev *hpwl_input_dev;
-
-static const struct acpi_device_id hpwl_ids[] = {
-	{"HPQ6001", 0},
-	{"WSTADEF", 0},
-	{"AMDI0051", 0},
-	{"", 0},
-};
-
-static int hp_wireless_input_setup(void)
-{
-	int err;
-
-	hpwl_input_dev = input_allocate_device();
-	if (!hpwl_input_dev)
-		return -ENOMEM;
-
-	hpwl_input_dev->name = "HP Wireless hotkeys";
-	hpwl_input_dev->phys = "hpq6001/input0";
-	hpwl_input_dev->id.bustype = BUS_HOST;
-	hpwl_input_dev->evbit[0] = BIT(EV_KEY);
-	set_bit(KEY_RFKILL, hpwl_input_dev->keybit);
-
-	err = input_register_device(hpwl_input_dev);
-	if (err)
-		goto err_free_dev;
-
-	return 0;
-
-err_free_dev:
-	input_free_device(hpwl_input_dev);
-	return err;
-}
-
-static void hp_wireless_input_destroy(void)
-{
-	input_unregister_device(hpwl_input_dev);
-}
-
-static void hpwl_notify(struct acpi_device *acpi_dev, u32 event)
-{
-	if (event != 0x80) {
-		pr_info("Received unknown event (0x%x)\n", event);
-		return;
-	}
-
-	input_report_key(hpwl_input_dev, KEY_RFKILL, 1);
-	input_sync(hpwl_input_dev);
-	input_report_key(hpwl_input_dev, KEY_RFKILL, 0);
-	input_sync(hpwl_input_dev);
-}
-
-static int hpwl_add(struct acpi_device *device)
-{
-	int err;
-
-	err = hp_wireless_input_setup();
-	if (err)
-		pr_err("Failed to setup hp wireless hotkeys\n");
-
-	return err;
-}
-
-static int hpwl_remove(struct acpi_device *device)
-{
-	hp_wireless_input_destroy();
-	return 0;
-}
-
-static struct acpi_driver hpwl_driver = {
-	.name	= "hp-wireless",
-	.owner	= THIS_MODULE,
-	.ids	= hpwl_ids,
-	.ops	= {
-		.add	= hpwl_add,
-		.remove	= hpwl_remove,
-		.notify	= hpwl_notify,
-	},
-};
-
-module_acpi_driver(hpwl_driver);
diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c
new file mode 100644
index 000000000000..b010e4ca3383
--- /dev/null
+++ b/drivers/platform/x86/wireless-hotkey.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Airplane mode button for AMD, HP & Xiaomi laptops
+ *
+ *  Copyright (C) 2014-2017 Alex Hung <alex.hung@canonical.com>
+ *  Copyright (C) 2021 Advanced Micro Devices
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <acpi/acpi_bus.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alex Hung");
+MODULE_ALIAS("acpi*:HPQ6001:*");
+MODULE_ALIAS("acpi*:WSTADEF:*");
+MODULE_ALIAS("acpi*:AMDI0051:*");
+
+static struct input_dev *wl_input_dev;
+
+static const struct acpi_device_id wl_ids[] = {
+	{"HPQ6001", 0},
+	{"WSTADEF", 0},
+	{"AMDI0051", 0},
+	{"", 0},
+};
+
+static int wireless_input_setup(void)
+{
+	int err;
+
+	wl_input_dev = input_allocate_device();
+	if (!wl_input_dev)
+		return -ENOMEM;
+
+	wl_input_dev->name = "Wireless hotkeys";
+	wl_input_dev->phys = "hpq6001/input0";
+	wl_input_dev->id.bustype = BUS_HOST;
+	wl_input_dev->evbit[0] = BIT(EV_KEY);
+	set_bit(KEY_RFKILL, wl_input_dev->keybit);
+
+	err = input_register_device(wl_input_dev);
+	if (err)
+		goto err_free_dev;
+
+	return 0;
+
+err_free_dev:
+	input_free_device(wl_input_dev);
+	return err;
+}
+
+static void wireless_input_destroy(void)
+{
+	input_unregister_device(wl_input_dev);
+}
+
+static void wl_notify(struct acpi_device *acpi_dev, u32 event)
+{
+	if (event != 0x80) {
+		pr_info("Received unknown event (0x%x)\n", event);
+		return;
+	}
+
+	input_report_key(wl_input_dev, KEY_RFKILL, 1);
+	input_sync(wl_input_dev);
+	input_report_key(wl_input_dev, KEY_RFKILL, 0);
+	input_sync(wl_input_dev);
+}
+
+static int wl_add(struct acpi_device *device)
+{
+	int err;
+
+	err = wireless_input_setup();
+	if (err)
+		pr_err("Failed to setup hp wireless hotkeys\n");
+
+	return err;
+}
+
+static int wl_remove(struct acpi_device *device)
+{
+	wireless_input_destroy();
+	return 0;
+}
+
+static struct acpi_driver wl_driver = {
+	.name	= "wireless-hotkey",
+	.owner	= THIS_MODULE,
+	.ids	= wl_ids,
+	.ops	= {
+		.add	= wl_add,
+		.remove	= wl_remove,
+		.notify	= wl_notify,
+	},
+};
+
+module_acpi_driver(wl_driver);

From 98c0c85b1040db24f0d04d3e1d315c6c7b05cc07 Mon Sep 17 00:00:00 2001
From: "Luke D. Jones" <luke@ljones.dev>
Date: Mon, 19 Apr 2021 19:49:14 +1200
Subject: [PATCH 1924/3292] platform/x86: asus-nb-wmi: Revert "Drop duplicate
 DMI quirk structures"

This is a preparation revert for reverting the "add support for ASUS ROG
Zephyrus G14 and G15" change. This reverts
commit 67186653c903 ("platform/x86: asus-nb-wmi: Drop duplicate DMI quirk
structures")

Signed-off-by: Luke D. Jones <luke@ljones.dev>
Link: https://lore.kernel.org/r/20210419074915.393433-2-luke@ljones.dev
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/asus-nb-wmi.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index d41d7ad14be0..b07b1288346e 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -110,7 +110,12 @@ static struct quirk_entry quirk_asus_forceals = {
 	.wmi_force_als_set = true,
 };
 
-static struct quirk_entry quirk_asus_vendor_backlight = {
+static struct quirk_entry quirk_asus_ga401i = {
+	.wmi_backlight_power = true,
+	.wmi_backlight_set_devstate = true,
+};
+
+static struct quirk_entry quirk_asus_ga502i = {
 	.wmi_backlight_power = true,
 	.wmi_backlight_set_devstate = true,
 };
@@ -432,7 +437,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IH"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga401i,
 	},
 	{
 		.callback = dmi_matched,
@@ -441,7 +446,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA401II"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga401i,
 	},
 	{
 		.callback = dmi_matched,
@@ -450,7 +455,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IU"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga401i,
 	},
 	{
 		.callback = dmi_matched,
@@ -459,7 +464,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IV"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga401i,
 	},
 	{
 		.callback = dmi_matched,
@@ -468,7 +473,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IVC"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga401i,
 	},
 		{
 		.callback = dmi_matched,
@@ -477,7 +482,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA502II"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga502i,
 	},
 	{
 		.callback = dmi_matched,
@@ -486,7 +491,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA502IU"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga502i,
 	},
 	{
 		.callback = dmi_matched,
@@ -495,7 +500,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "GA502IV"),
 		},
-		.driver_data = &quirk_asus_vendor_backlight,
+		.driver_data = &quirk_asus_ga502i,
 	},
 	{
 		.callback = dmi_matched,

From 28117f3a5c3c8375a3304af76357d5bf9cf30f0b Mon Sep 17 00:00:00 2001
From: "Luke D. Jones" <luke@ljones.dev>
Date: Mon, 19 Apr 2021 19:49:15 +1200
Subject: [PATCH 1925/3292] platform/x86: asus-nb-wmi: Revert "add support for
 ASUS ROG Zephyrus G14 and G15"

The quirks added to asus-nb-wmi for the ASUS ROG Zephyrus G14 and G15 are
wrong, they tell the asus-wmi code to use the vendor specific WMI backlight
interface. But there is no such interface on these laptops.

As a side effect, these quirks stop the acpi_video driver to register since
they make acpi_video_get_backlight_type() return acpi_backlight_vendor,
leaving only the native AMD backlight driver in place, which is the one we
want. This happy coincidence is being replaced with a new quirk in
drivers/acpi/video_detect.c which actually sets the backlight_type to
acpi_backlight_native fixinf this properly. This reverts
commit 13bceda68fb9 ("platform/x86: asus-nb-wmi: add support for ASUS ROG
Zephyrus G14 and G15").

Signed-off-by: Luke D. Jones <luke@ljones.dev>
Link: https://lore.kernel.org/r/20210419074915.393433-3-luke@ljones.dev
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/asus-nb-wmi.c | 82 ------------------------------
 1 file changed, 82 deletions(-)

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index b07b1288346e..0cb927f0f301 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -110,16 +110,6 @@ static struct quirk_entry quirk_asus_forceals = {
 	.wmi_force_als_set = true,
 };
 
-static struct quirk_entry quirk_asus_ga401i = {
-	.wmi_backlight_power = true,
-	.wmi_backlight_set_devstate = true,
-};
-
-static struct quirk_entry quirk_asus_ga502i = {
-	.wmi_backlight_power = true,
-	.wmi_backlight_set_devstate = true,
-};
-
 static struct quirk_entry quirk_asus_use_kbd_dock_devid = {
 	.use_kbd_dock_devid = true,
 };
@@ -430,78 +420,6 @@ static const struct dmi_system_id asus_quirks[] = {
 		},
 		.driver_data = &quirk_asus_forceals,
 	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA401IH",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IH"),
-		},
-		.driver_data = &quirk_asus_ga401i,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA401II",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA401II"),
-		},
-		.driver_data = &quirk_asus_ga401i,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA401IU",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IU"),
-		},
-		.driver_data = &quirk_asus_ga401i,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA401IV",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IV"),
-		},
-		.driver_data = &quirk_asus_ga401i,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA401IVC",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA401IVC"),
-		},
-		.driver_data = &quirk_asus_ga401i,
-	},
-		{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA502II",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA502II"),
-		},
-		.driver_data = &quirk_asus_ga502i,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA502IU",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA502IU"),
-		},
-		.driver_data = &quirk_asus_ga502i,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. GA502IV",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GA502IV"),
-		},
-		.driver_data = &quirk_asus_ga502i,
-	},
 	{
 		.callback = dmi_matched,
 		.ident = "Asus Transformer T100TA / T100HA / T100CHI",

From e978858b4214b1abde9fa650e3112a438d61791c Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 23 May 2021 15:45:26 +0200
Subject: [PATCH 1926/3292] platform/surface: aggregator_registry: Update
 comments for 15" AMD Surface Laptop 4

The 15" AMD version of the Surface Laptop 4 shares its WSID HID with the
15" AMD version of the Surface Laptop 3. Update the comments
accordingly.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210523134528.798887-2-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_aggregator_registry.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
index 685d37a7add1..bdc09305aab7 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -156,7 +156,7 @@ static const struct software_node *ssam_node_group_sl2[] = {
 	NULL,
 };
 
-/* Devices for Surface Laptop 3. */
+/* Devices for Surface Laptop 3 and 4. */
 static const struct software_node *ssam_node_group_sl3[] = {
 	&ssam_node_root,
 	&ssam_node_bat_ac,
@@ -521,7 +521,7 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
 	/* Surface Laptop 3 (13", Intel) */
 	{ "MSHW0114", (unsigned long)ssam_node_group_sl3 },
 
-	/* Surface Laptop 3 (15", AMD) */
+	/* Surface Laptop 3 (15", AMD) and 4 (15", AMD) */
 	{ "MSHW0110", (unsigned long)ssam_node_group_sl3 },
 
 	/* Surface Laptop Go 1 */

From 460d740839a6e786bb61263d47f4daf23b104f55 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 23 May 2021 15:45:27 +0200
Subject: [PATCH 1927/3292] platform/surface: aggregator_registry: Add support
 for 13" Intel Surface Laptop 4

Add support for the 13" Intel version of the Surface Laptop 4.

Use the existing node group for the Surface Laptop 3 since the 15" AMD
version already shares its WSID HID with its predecessor and there don't
seem to be any significant differences with regards to SAM.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210523134528.798887-3-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_aggregator_registry.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
index bdc09305aab7..ef83461fa536 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -524,6 +524,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
 	/* Surface Laptop 3 (15", AMD) and 4 (15", AMD) */
 	{ "MSHW0110", (unsigned long)ssam_node_group_sl3 },
 
+	/* Surface Laptop 4 (13", Intel) */
+	{ "MSHW0250", (unsigned long)ssam_node_group_sl3 },
+
 	/* Surface Laptop Go 1 */
 	{ "MSHW0118", (unsigned long)ssam_node_group_slg1 },
 

From b6c3c6ff2043c6519b5be38ac259752d19f4a5f9 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 23 May 2021 15:45:28 +0200
Subject: [PATCH 1928/3292] platform/surface: aggregator_registry: Consolidate
 node groups for 5th- and 6th-gen devices

5th- and 6th-generation Surface devices have all SAM clients defined in
ACPI, except for the platform profile/performance mode which his handled
via the WSID (Windows Surface Integration Device). Thus, the node groups
for those devices are the same and we can just use a single one instead
of re-defining the same one over and over again.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210523134528.798887-4-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../surface/surface_aggregator_registry.c     | 47 +++++--------------
 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
index ef83461fa536..4428c4330229 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -119,8 +119,13 @@ static const struct software_node ssam_node_hid_base_iid6 = {
 	.parent = &ssam_node_hub_base,
 };
 
-/* Devices for Surface Book 2. */
-static const struct software_node *ssam_node_group_sb2[] = {
+/*
+ * Devices for 5th- and 6th-generations models:
+ * - Surface Book 2,
+ * - Surface Laptop 1 and 2,
+ * - Surface Pro 5 and 6.
+ */
+static const struct software_node *ssam_node_group_gen5[] = {
 	&ssam_node_root,
 	&ssam_node_tmp_pprof,
 	NULL,
@@ -142,20 +147,6 @@ static const struct software_node *ssam_node_group_sb3[] = {
 	NULL,
 };
 
-/* Devices for Surface Laptop 1. */
-static const struct software_node *ssam_node_group_sl1[] = {
-	&ssam_node_root,
-	&ssam_node_tmp_pprof,
-	NULL,
-};
-
-/* Devices for Surface Laptop 2. */
-static const struct software_node *ssam_node_group_sl2[] = {
-	&ssam_node_root,
-	&ssam_node_tmp_pprof,
-	NULL,
-};
-
 /* Devices for Surface Laptop 3 and 4. */
 static const struct software_node *ssam_node_group_sl3[] = {
 	&ssam_node_root,
@@ -177,20 +168,6 @@ static const struct software_node *ssam_node_group_slg1[] = {
 	NULL,
 };
 
-/* Devices for Surface Pro 5. */
-static const struct software_node *ssam_node_group_sp5[] = {
-	&ssam_node_root,
-	&ssam_node_tmp_pprof,
-	NULL,
-};
-
-/* Devices for Surface Pro 6. */
-static const struct software_node *ssam_node_group_sp6[] = {
-	&ssam_node_root,
-	&ssam_node_tmp_pprof,
-	NULL,
-};
-
 /* Devices for Surface Pro 7 and Surface Pro 7+. */
 static const struct software_node *ssam_node_group_sp7[] = {
 	&ssam_node_root,
@@ -495,10 +472,10 @@ static struct ssam_device_driver ssam_base_hub_driver = {
 
 static const struct acpi_device_id ssam_platform_hub_match[] = {
 	/* Surface Pro 4, 5, and 6 (OMBR < 0x10) */
-	{ "MSHW0081", (unsigned long)ssam_node_group_sp5 },
+	{ "MSHW0081", (unsigned long)ssam_node_group_gen5 },
 
 	/* Surface Pro 6 (OMBR >= 0x10) */
-	{ "MSHW0111", (unsigned long)ssam_node_group_sp6 },
+	{ "MSHW0111", (unsigned long)ssam_node_group_gen5 },
 
 	/* Surface Pro 7 */
 	{ "MSHW0116", (unsigned long)ssam_node_group_sp7 },
@@ -507,16 +484,16 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
 	{ "MSHW0119", (unsigned long)ssam_node_group_sp7 },
 
 	/* Surface Book 2 */
-	{ "MSHW0107", (unsigned long)ssam_node_group_sb2 },
+	{ "MSHW0107", (unsigned long)ssam_node_group_gen5 },
 
 	/* Surface Book 3 */
 	{ "MSHW0117", (unsigned long)ssam_node_group_sb3 },
 
 	/* Surface Laptop 1 */
-	{ "MSHW0086", (unsigned long)ssam_node_group_sl1 },
+	{ "MSHW0086", (unsigned long)ssam_node_group_gen5 },
 
 	/* Surface Laptop 2 */
-	{ "MSHW0112", (unsigned long)ssam_node_group_sl2 },
+	{ "MSHW0112", (unsigned long)ssam_node_group_gen5 },
 
 	/* Surface Laptop 3 (13", Intel) */
 	{ "MSHW0114", (unsigned long)ssam_node_group_sl3 },

From ab66724a230937982d58711302f51aa9b569a5c0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 23 May 2021 19:23:31 +0200
Subject: [PATCH 1929/3292] platform/x86: ideapad-laptop: Ignore VPC event bit
 10
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VPC event bit 10 gets set on a Yoga 300-11IBR when the EC believes that the
device has changed between laptop/tent/stand/tablet mode.

The EC relies on getting angle info from 2 accelerometers through a special
windows service calling a DSM on the DUAL250E ACPI-device. Linux does not
do this, making the laptop/tent/stand/tablet mode info unreliable.

Ignore VPC event bit 10 to avoid the warnings triggered by the default case
in ideapad_acpi_notify().

Note that the plan for Linux is to have iio-sensor-proxy read the 2
accelerometers and have it provide info about which mode 360° hinges
2-in-1s to the rest of userspace:
https://gitlab.freedesktop.org/hadess/iio-sensor-proxy/-/issues/216

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210523172331.177834-1-hdegoede@redhat.com
---
 drivers/platform/x86/ideapad-laptop.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 387817290921..784326bd72f0 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1408,6 +1408,18 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data)
 		case 6:
 			ideapad_input_report(priv, bit);
 			break;
+		case 10:
+			/*
+			 * This event gets send on a Yoga 300-11IBR when the EC
+			 * believes that the device has changed between laptop/
+			 * tent/stand/tablet mode. The EC relies on getting
+			 * angle info from 2 accelerometers through a special
+			 * windows service calling a DSM on the DUAL250E ACPI-
+			 * device. Linux does not do this, making the laptop/
+			 * tent/stand/tablet mode info unreliable, so we simply
+			 * ignore these events.
+			 */
+			break;
 		case 9:
 			ideapad_sync_rfk_state(priv);
 			break;

From 3fca4b143e49252934d01ee034227c708da5120e Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 24 May 2021 14:23:58 +0800
Subject: [PATCH 1930/3292] platform/x86: thinkpad_acpi: Fix inconsistent
 indenting

Eliminate the follow smatch warning:

drivers/platform/x86/thinkpad_acpi.c:7942 volume_write() warn:
inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1621837438-70790-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index dd60c9397d35..b7fec1b9fffc 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -7938,7 +7938,7 @@ static int volume_write(char *buf)
 				continue;
 			} else if (sscanf(cmd, "level %u", &l) == 1 &&
 				   l >= 0 && l <= TP_EC_VOLUME_MAX) {
-					new_level = l;
+				new_level = l;
 				continue;
 			}
 		}

From 25acf21f3a78a1d2815e605e45924393e039b210 Mon Sep 17 00:00:00 2001
From: Til Jasper Ullrich <tju@tju.me>
Date: Tue, 25 May 2021 17:09:52 +0200
Subject: [PATCH 1931/3292] platform/x86: thinkpad_acpi: Add X1 Carbon Gen 9
 second fan support

The X1 Carbon Gen 9 uses two fans instead of one like the previous
generation. This adds support for the second fan. It has been tested
on my X1 Carbon Gen 9 (20XXS00100) and works fine.

Signed-off-by: Til Jasper Ullrich <tju@tju.me>
Link: https://lore.kernel.org/r/20210525150950.14805-1-tju@tju.me
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index b7fec1b9fffc..603156a6e3ed 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -8853,6 +8853,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
 	TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (2nd gen) */
 	TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (3nd gen) */
 	TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL),	/* P15 (1st gen) / P15v (1st gen) */
+	TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL),	/* X1 Carbon (9th gen) */
 };
 
 static int __init fan_init(struct ibm_init_struct *iibm)

From 8bf388a0a0fe257dd7be9db0352b5b71b4e9138a Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 18 May 2021 11:47:43 +0200
Subject: [PATCH 1932/3292] platform/x86: dell-wmi: Rename dell-wmi.c to
 dell-wmi-base.c

Rename dell-wmi.c to dell-wmi-base.c, so that we can have other
dell-wmi-foo.c files which can be added to dell-wmi.ko as "plugins"
controlled by separate boolean Kconfig options.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS                                               | 2 +-
 drivers/platform/x86/dell/Makefile                        | 1 +
 drivers/platform/x86/dell/{dell-wmi.c => dell-wmi-base.c} | 0
 3 files changed, 2 insertions(+), 1 deletion(-)
 rename drivers/platform/x86/dell/{dell-wmi.c => dell-wmi-base.c} (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6e848b244226..7b4a325af65a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5170,7 +5170,7 @@ DELL WMI NOTIFICATIONS DRIVER
 M:	Matthew Garrett <mjg59@srcf.ucam.org>
 M:	Pali Rohár <pali@kernel.org>
 S:	Maintained
-F:	drivers/platform/x86/dell/dell-wmi.c
+F:	drivers/platform/x86/dell/dell-wmi-base.c
 
 DELTA ST MEDIA DRIVER
 M:	Hugues Fruchet <hugues.fruchet@foss.st.com>
diff --git a/drivers/platform/x86/dell/Makefile b/drivers/platform/x86/dell/Makefile
index d720a3e42ae3..cc45410040cb 100644
--- a/drivers/platform/x86/dell/Makefile
+++ b/drivers/platform/x86/dell/Makefile
@@ -15,6 +15,7 @@ dell-smbios-$(CONFIG_DELL_SMBIOS_WMI)	+= dell-smbios-wmi.o
 dell-smbios-$(CONFIG_DELL_SMBIOS_SMM)	+= dell-smbios-smm.o
 obj-$(CONFIG_DELL_SMO8800)		+= dell-smo8800.o
 obj-$(CONFIG_DELL_WMI)			+= dell-wmi.o
+dell-wmi-objs				:= dell-wmi-base.o
 obj-$(CONFIG_DELL_WMI_AIO)		+= dell-wmi-aio.o
 obj-$(CONFIG_DELL_WMI_DESCRIPTOR)	+= dell-wmi-descriptor.o
 obj-$(CONFIG_DELL_WMI_LED)		+= dell-wmi-led.o
diff --git a/drivers/platform/x86/dell/dell-wmi.c b/drivers/platform/x86/dell/dell-wmi-base.c
similarity index 100%
rename from drivers/platform/x86/dell/dell-wmi.c
rename to drivers/platform/x86/dell/dell-wmi-base.c

From 8af9fa37b8a3637832cbf8fdd9bd828bd5f0de66 Mon Sep 17 00:00:00 2001
From: Perry Yuan <perry_yuan@dell.com>
Date: Thu, 6 May 2021 19:56:05 +0800
Subject: [PATCH 1933/3292] platform/x86: dell-privacy: Add support for Dell
 hardware privacy

add support for Dell privacy driver for the Dell units equipped
hardware privacy design, which protect users privacy of audio and
camera from hardware level. Once the audio or camera privacy mode
activated, any applications will not get any audio or video stream
when user pressed ctrl+F4 hotkey, audio privacy mode will be
enabled, micmute led will be also changed accordingly
The micmute led is fully controlled by hardware & EC(embedded controller)
and camera mute hotkey is Ctrl+F9. Currently design only emits
SW_CAMERA_LENS_COVER event while the camera lens shutter will be
changed by EC & HW(hardware) control

*The flow is like this:
1) User presses key. HW does stuff with this key (timeout timer is started)
2) WMI event is emitted from BIOS to kernel
3) WMI event is received by dell-privacy
4) KEY_MICMUTE emitted from dell-privacy
5) Userland picks up key and modifies kcontrol for SW mute
6) Codec kernel driver catches and calls ledtrig_audio_set
7) dell-privacy notifies EC, the timeout is cancelled and the HW mute
   is activated. If the EC is not notified then the HW mic mute will
   activate when the timeout triggers, just a bit later than with the
   active ack.

Signed-off-by: Perry Yuan <perry_yuan@dell.com>
Link: https://lore.kernel.org/r/20210506115605.1504-1-Perry_Yuan@Dell.com
[hdegoede@redhat.com: Rework Kconfig/Makefile bits + other small fixups]
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../testing/sysfs-platform-dell-privacy-wmi   |  55 +++
 MAINTAINERS                                   |   7 +
 drivers/platform/x86/dell/Kconfig             |   9 +
 drivers/platform/x86/dell/Makefile            |   1 +
 drivers/platform/x86/dell/dell-laptop.c       |  13 +-
 drivers/platform/x86/dell/dell-wmi-base.c     |  14 +-
 drivers/platform/x86/dell/dell-wmi-privacy.c  | 391 ++++++++++++++++++
 drivers/platform/x86/dell/dell-wmi-privacy.h  |  36 ++
 8 files changed, 522 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
 create mode 100644 drivers/platform/x86/dell/dell-wmi-privacy.c
 create mode 100644 drivers/platform/x86/dell/dell-wmi-privacy.h

diff --git a/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi b/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
new file mode 100644
index 000000000000..7f9e18705861
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
@@ -0,0 +1,55 @@
+What:		/sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
+Date:		Apr 2021
+KernelVersion:	5.13
+Contact:	"perry.yuan@dell.com>"
+Description:
+		Display which dell hardware level privacy devices are supported
+		“Dell Privacy” is a set of HW, FW, and SW features to enhance
+		Dell’s commitment to platform privacy for MIC, Camera, and
+		ePrivacy screens.
+		The supported hardware privacy devices are:
+Attributes:
+		Microphone Mute:
+				Identifies the local microphone can be muted by hardware, no applications
+				is available to capture system mic sound
+
+		Camera Shutter:
+				Identifies camera shutter controlled by hardware, which is a micromechanical
+				shutter assembly that is built onto the camera module to block capturing images
+				from outside the laptop
+
+		supported:
+				The privacy device is supported by this system
+
+		unsupported:
+				The privacy device is not supported on this system
+
+		For example to check which privacy devices are supported:
+
+		# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
+		[Microphone Mute] [supported]
+		[Camera Shutter] [supported]
+		[ePrivacy Screen] [unsupported]
+
+What:		/sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
+Date:		Apr 2021
+KernelVersion:	5.13
+Contact:	"perry.yuan@dell.com>"
+Description:
+		Allow user space to check current dell privacy device state.
+		Describes the Device State class exposed by BIOS which can be
+		consumed by various applications interested in knowing the Privacy
+		feature capabilities
+Attributes:
+		muted:
+			Identifies the privacy device is turned off and cannot send stream to OS applications
+
+		unmuted:
+			Identifies the privacy device is turned on ,audio or camera driver can get
+			stream from mic and camera module to OS applications
+
+		For example to check all supported current privacy device states:
+
+		# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
+		[Microphone] [unmuted]
+		[Camera Shutter] [unmuted]
diff --git a/MAINTAINERS b/MAINTAINERS
index 7b4a325af65a..67fc700c9a87 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5172,6 +5172,13 @@ M:	Pali Rohár <pali@kernel.org>
 S:	Maintained
 F:	drivers/platform/x86/dell/dell-wmi-base.c
 
+DELL WMI HARDWARE PRIVACY SUPPORT
+M:	Perry Yuan <Perry.Yuan@dell.com>
+L:	Dell.Client.Kernel@dell.com
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	drivers/platform/x86/dell/dell-wmi-privacy.c
+
 DELTA ST MEDIA DRIVER
 M:	Hugues Fruchet <hugues.fruchet@foss.st.com>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig
index e0a55337f51a..b5c6a6a32bf9 100644
--- a/drivers/platform/x86/dell/Kconfig
+++ b/drivers/platform/x86/dell/Kconfig
@@ -53,6 +53,7 @@ config DELL_LAPTOP
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	depends on RFKILL || RFKILL = n
+	depends on DELL_WMI || DELL_WMI = n
 	depends on SERIO_I8042
 	depends on DELL_SMBIOS
 	select POWER_SUPPLY
@@ -164,6 +165,14 @@ config DELL_WMI
 	  To compile this driver as a module, choose M here: the module will
 	  be called dell-wmi.
 
+config DELL_WMI_PRIVACY
+	bool "Dell WMI Hardware Privacy Support"
+	depends on DELL_WMI
+	depends on LEDS_TRIGGER_AUDIO
+	help
+	  This option adds integration with the "Dell Hardware Privacy"
+	  feature of Dell laptops to the dell-wmi driver.
+
 config DELL_WMI_AIO
 	tristate "WMI Hotkeys for Dell All-In-One series"
 	default m
diff --git a/drivers/platform/x86/dell/Makefile b/drivers/platform/x86/dell/Makefile
index cc45410040cb..ddba1df71e80 100644
--- a/drivers/platform/x86/dell/Makefile
+++ b/drivers/platform/x86/dell/Makefile
@@ -16,6 +16,7 @@ dell-smbios-$(CONFIG_DELL_SMBIOS_SMM)	+= dell-smbios-smm.o
 obj-$(CONFIG_DELL_SMO8800)		+= dell-smo8800.o
 obj-$(CONFIG_DELL_WMI)			+= dell-wmi.o
 dell-wmi-objs				:= dell-wmi-base.o
+dell-wmi-$(CONFIG_DELL_WMI_PRIVACY)	+= dell-wmi-privacy.o
 obj-$(CONFIG_DELL_WMI_AIO)		+= dell-wmi-aio.o
 obj-$(CONFIG_DELL_WMI_DESCRIPTOR)	+= dell-wmi-descriptor.o
 obj-$(CONFIG_DELL_WMI_LED)		+= dell-wmi-led.o
diff --git a/drivers/platform/x86/dell/dell-laptop.c b/drivers/platform/x86/dell/dell-laptop.c
index 70edc5bb3a14..8230e7a68a5e 100644
--- a/drivers/platform/x86/dell/dell-laptop.c
+++ b/drivers/platform/x86/dell/dell-laptop.c
@@ -31,6 +31,8 @@
 #include "dell-rbtn.h"
 #include "dell-smbios.h"
 
+#include "dell-wmi-privacy.h"
+
 struct quirk_entry {
 	bool touchpad_led;
 	bool kbd_led_not_present;
@@ -90,6 +92,7 @@ static struct rfkill *wifi_rfkill;
 static struct rfkill *bluetooth_rfkill;
 static struct rfkill *wwan_rfkill;
 static bool force_rfkill;
+static bool micmute_led_registered;
 
 module_param(force_rfkill, bool, 0444);
 MODULE_PARM_DESC(force_rfkill, "enable rfkill on non whitelisted models");
@@ -2205,11 +2208,13 @@ static int __init dell_init(void)
 	dell_laptop_register_notifier(&dell_laptop_notifier);
 
 	if (dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE) &&
-	    dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE)) {
+	    dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE) &&
+	    !dell_privacy_has_mic_mute()) {
 		micmute_led_cdev.brightness = ledtrig_audio_get(LED_AUDIO_MICMUTE);
 		ret = led_classdev_register(&platform_device->dev, &micmute_led_cdev);
 		if (ret < 0)
 			goto fail_led;
+		micmute_led_registered = true;
 	}
 
 	if (acpi_video_get_backlight_type() != acpi_backlight_vendor)
@@ -2257,7 +2262,8 @@ static int __init dell_init(void)
 fail_get_brightness:
 	backlight_device_unregister(dell_backlight_device);
 fail_backlight:
-	led_classdev_unregister(&micmute_led_cdev);
+	if (micmute_led_registered)
+		led_classdev_unregister(&micmute_led_cdev);
 fail_led:
 	dell_cleanup_rfkill();
 fail_rfkill:
@@ -2278,7 +2284,8 @@ static void __exit dell_exit(void)
 		touchpad_led_exit();
 	kbd_led_exit();
 	backlight_device_unregister(dell_backlight_device);
-	led_classdev_unregister(&micmute_led_cdev);
+	if (micmute_led_registered)
+		led_classdev_unregister(&micmute_led_cdev);
 	dell_cleanup_rfkill();
 	if (platform_device) {
 		platform_device_unregister(platform_device);
diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c
index 5e1b7f897df5..089c125e18f7 100644
--- a/drivers/platform/x86/dell/dell-wmi-base.c
+++ b/drivers/platform/x86/dell/dell-wmi-base.c
@@ -27,6 +27,7 @@
 #include <acpi/video.h>
 #include "dell-smbios.h"
 #include "dell-wmi-descriptor.h"
+#include "dell-wmi-privacy.h"
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
 MODULE_AUTHOR("Pali Rohár <pali@kernel.org>");
@@ -427,7 +428,6 @@ static void dell_wmi_notify(struct wmi_device *wdev,
 
 		switch (buffer_entry[1]) {
 		case 0x0000: /* One key pressed or event occurred */
-		case 0x0012: /* Event with extended data occurred */
 			if (len > 2)
 				dell_wmi_process_key(wdev, buffer_entry[1],
 						     buffer_entry[2]);
@@ -439,6 +439,13 @@ static void dell_wmi_notify(struct wmi_device *wdev,
 				dell_wmi_process_key(wdev, buffer_entry[1],
 						     buffer_entry[i]);
 			break;
+		case 0x0012:
+			if ((len > 4) && dell_privacy_process_event(buffer_entry[1], buffer_entry[3],
+								    buffer_entry[4]))
+				/* dell_privacy_process_event has handled the event */;
+			else if (len > 2)
+				dell_wmi_process_key(wdev, buffer_entry[1], buffer_entry[2]);
+			break;
 		default: /* Unknown event */
 			pr_info("Unknown WMI event type 0x%x\n",
 				(int)buffer_entry[1]);
@@ -747,6 +754,10 @@ static int __init dell_wmi_init(void)
 		}
 	}
 
+	err = dell_privacy_register_driver();
+	if (err)
+		return err;
+
 	return wmi_driver_register(&dell_wmi_driver);
 }
 late_initcall(dell_wmi_init);
@@ -757,6 +768,7 @@ static void __exit dell_wmi_exit(void)
 		dell_wmi_events_set_enabled(false);
 
 	wmi_driver_unregister(&dell_wmi_driver);
+	dell_privacy_unregister_driver();
 }
 module_exit(dell_wmi_exit);
 
diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c
new file mode 100644
index 000000000000..074b7e68c227
--- /dev/null
+++ b/drivers/platform/x86/dell/dell-wmi-privacy.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Dell privacy notification driver
+ *
+ * Copyright (C) 2021 Dell Inc. All Rights Reserved.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
+#include <linux/list.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/wmi.h>
+
+#include "dell-wmi-privacy.h"
+
+#define DELL_PRIVACY_GUID "6932965F-1671-4CEB-B988-D3AB0A901919"
+#define MICROPHONE_STATUS		BIT(0)
+#define CAMERA_STATUS		        BIT(1)
+#define DELL_PRIVACY_AUDIO_EVENT  0x1
+#define DELL_PRIVACY_CAMERA_EVENT 0x2
+#define led_to_priv(c)       container_of(c, struct privacy_wmi_data, cdev)
+
+/*
+ * The wmi_list is used to store the privacy_priv struct with mutex protecting
+ */
+static LIST_HEAD(wmi_list);
+static DEFINE_MUTEX(list_mutex);
+
+struct privacy_wmi_data {
+	struct input_dev *input_dev;
+	struct wmi_device *wdev;
+	struct list_head list;
+	struct led_classdev cdev;
+	u32 features_present;
+	u32 last_status;
+};
+
+/* DELL Privacy Type */
+enum dell_hardware_privacy_type {
+	DELL_PRIVACY_TYPE_AUDIO = 0,
+	DELL_PRIVACY_TYPE_CAMERA,
+	DELL_PRIVACY_TYPE_SCREEN,
+	DELL_PRIVACY_TYPE_MAX,
+};
+
+static const char * const privacy_types[DELL_PRIVACY_TYPE_MAX] = {
+	[DELL_PRIVACY_TYPE_AUDIO] = "Microphone",
+	[DELL_PRIVACY_TYPE_CAMERA] = "Camera Shutter",
+	[DELL_PRIVACY_TYPE_SCREEN] = "ePrivacy Screen",
+};
+
+/*
+ * Keymap for WMI privacy events of type 0x0012
+ */
+static const struct key_entry dell_wmi_keymap_type_0012[] = {
+	/* privacy mic mute */
+	{ KE_KEY, 0x0001, { KEY_MICMUTE } },
+	/* privacy camera mute */
+	{ KE_SW,  0x0002, { SW_CAMERA_LENS_COVER } },
+	{ KE_END, 0},
+};
+
+bool dell_privacy_has_mic_mute(void)
+{
+	struct privacy_wmi_data *priv;
+
+	mutex_lock(&list_mutex);
+	priv = list_first_entry_or_null(&wmi_list,
+			struct privacy_wmi_data,
+			list);
+	mutex_unlock(&list_mutex);
+
+	return priv && (priv->features_present & BIT(DELL_PRIVACY_TYPE_AUDIO));
+}
+EXPORT_SYMBOL_GPL(dell_privacy_has_mic_mute);
+
+/*
+ * The flow of privacy event:
+ * 1) User presses key. HW does stuff with this key (timeout is started)
+ * 2) WMI event is emitted from BIOS
+ * 3) WMI event is received by dell-privacy
+ * 4) KEY_MICMUTE emitted from dell-privacy
+ * 5) Userland picks up key and modifies kcontrol for SW mute
+ * 6) Codec kernel driver catches and calls ledtrig_audio_set which will call
+ *    led_set_brightness() on the LED registered by dell_privacy_leds_setup()
+ * 7) dell-privacy notifies EC, the timeout is cancelled and the HW mute activates.
+ *    If the EC is not notified then the HW mic mute will activate when the timeout
+ *    triggers, just a bit later than with the active ack.
+ */
+bool dell_privacy_process_event(int type, int code, int status)
+{
+	struct privacy_wmi_data *priv;
+	const struct key_entry *key;
+	bool ret = false;
+
+	mutex_lock(&list_mutex);
+	priv = list_first_entry_or_null(&wmi_list,
+			struct privacy_wmi_data,
+			list);
+	if (!priv)
+		goto error;
+
+	key = sparse_keymap_entry_from_scancode(priv->input_dev, (type << 16) | code);
+	if (!key) {
+		dev_warn(&priv->wdev->dev, "Unknown key with type 0x%04x and code 0x%04x pressed\n",
+			type, code);
+		goto error;
+	}
+	dev_dbg(&priv->wdev->dev, "Key with type 0x%04x and code 0x%04x pressed\n", type, code);
+
+	switch (code) {
+	case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */
+	case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */
+		priv->last_status = status;
+		sparse_keymap_report_entry(priv->input_dev, key, 1, true);
+		ret = true;
+		break;
+	default:
+		dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code);
+	}
+
+error:
+	mutex_unlock(&list_mutex);
+	return ret;
+}
+
+static ssize_t dell_privacy_supported_type_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct privacy_wmi_data *priv = dev_get_drvdata(dev);
+	enum dell_hardware_privacy_type type;
+	u32 privacy_list;
+	int len = 0;
+
+	privacy_list = priv->features_present;
+	for (type = DELL_PRIVACY_TYPE_AUDIO; type < DELL_PRIVACY_TYPE_MAX; type++) {
+		if (privacy_list & BIT(type))
+			len += sysfs_emit_at(buf, len, "[%s] [supported]\n", privacy_types[type]);
+		else
+			len += sysfs_emit_at(buf, len, "[%s] [unsupported]\n", privacy_types[type]);
+	}
+
+	return len;
+}
+
+static ssize_t dell_privacy_current_state_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct privacy_wmi_data *priv = dev_get_drvdata(dev);
+	u32 privacy_supported = priv->features_present;
+	enum dell_hardware_privacy_type type;
+	u32 privacy_state = priv->last_status;
+	int len = 0;
+
+	for (type = DELL_PRIVACY_TYPE_AUDIO; type < DELL_PRIVACY_TYPE_MAX; type++) {
+		if (privacy_supported & BIT(type)) {
+			if (privacy_state & BIT(type))
+				len += sysfs_emit_at(buf, len, "[%s] [unmuted]\n", privacy_types[type]);
+			else
+				len += sysfs_emit_at(buf, len, "[%s] [muted]\n", privacy_types[type]);
+		}
+	}
+
+	return len;
+}
+
+static DEVICE_ATTR_RO(dell_privacy_supported_type);
+static DEVICE_ATTR_RO(dell_privacy_current_state);
+
+static struct attribute *privacy_attributes[] = {
+	&dev_attr_dell_privacy_supported_type.attr,
+	&dev_attr_dell_privacy_current_state.attr,
+	NULL,
+};
+
+static const struct attribute_group privacy_attribute_group = {
+	.attrs = privacy_attributes
+};
+
+/*
+ * Describes the Device State class exposed by BIOS which can be consumed by
+ * various applications interested in knowing the Privacy feature capabilities.
+ * class DeviceState
+ * {
+ *  [key, read] string InstanceName;
+ *  [read] boolean ReadOnly;
+ *
+ *  [WmiDataId(1), read] uint32 DevicesSupported;
+ *   0 - None; 0x1 - Microphone; 0x2 - Camera; 0x4 - ePrivacy  Screen
+ *
+ *  [WmiDataId(2), read] uint32 CurrentState;
+ *   0 - Off; 1 - On; Bit0 - Microphone; Bit1 - Camera; Bit2 - ePrivacyScreen
+ * };
+ */
+static int get_current_status(struct wmi_device *wdev)
+{
+	struct privacy_wmi_data *priv = dev_get_drvdata(&wdev->dev);
+	union acpi_object *obj_present;
+	u32 *buffer;
+	int ret = 0;
+
+	if (!priv) {
+		dev_err(&wdev->dev, "dell privacy priv is NULL\n");
+		return -EINVAL;
+	}
+	/* check privacy support features and device states */
+	obj_present = wmidev_block_query(wdev, 0);
+	if (!obj_present) {
+		dev_err(&wdev->dev, "failed to read Binary MOF\n");
+		return -EIO;
+	}
+
+	if (obj_present->type != ACPI_TYPE_BUFFER) {
+		dev_err(&wdev->dev, "Binary MOF is not a buffer!\n");
+		ret = -EIO;
+		goto obj_free;
+	}
+	/*  Although it's not technically a failure, this would lead to
+	 *  unexpected behavior
+	 */
+	if (obj_present->buffer.length != 8) {
+		dev_err(&wdev->dev, "Dell privacy buffer has unexpected length (%d)!\n",
+				obj_present->buffer.length);
+		ret = -EINVAL;
+		goto obj_free;
+	}
+	buffer = (u32 *)obj_present->buffer.pointer;
+	priv->features_present = buffer[0];
+	priv->last_status = buffer[1];
+
+obj_free:
+	kfree(obj_present);
+	return ret;
+}
+
+static int dell_privacy_micmute_led_set(struct led_classdev *led_cdev,
+					enum led_brightness brightness)
+{
+	struct privacy_wmi_data *priv = led_to_priv(led_cdev);
+	static char *acpi_method = (char *)"ECAK";
+	acpi_status status;
+	acpi_handle handle;
+
+	handle = ec_get_handle();
+	if (!handle)
+		return -EIO;
+
+	if (!acpi_has_method(handle, acpi_method))
+		return -EIO;
+
+	status = acpi_evaluate_object(handle, acpi_method, NULL, NULL);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&priv->wdev->dev, "Error setting privacy EC ack value: %s\n",
+				acpi_format_exception(status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * Pressing the mute key activates a time delayed circuit to physically cut
+ * off the mute. The LED is in the same circuit, so it reflects the true
+ * state of the HW mute.  The reason for the EC "ack" is so that software
+ * can first invoke a SW mute before the HW circuit is cut off.  Without SW
+ * cutting this off first does not affect the time delayed muting or status
+ * of the LED but there is a possibility of a "popping" noise.
+ *
+ * If the EC receives the SW ack, the circuit will be activated before the
+ * delay completed.
+ *
+ * Exposing as an LED device allows the codec drivers notification path to
+ * EC ACK to work
+ */
+static int dell_privacy_leds_setup(struct device *dev)
+{
+	struct privacy_wmi_data *priv = dev_get_drvdata(dev);
+
+	priv->cdev.name = "dell-privacy::micmute";
+	priv->cdev.max_brightness = 1;
+	priv->cdev.brightness_set_blocking = dell_privacy_micmute_led_set;
+	priv->cdev.default_trigger = "audio-micmute";
+	priv->cdev.brightness = ledtrig_audio_get(LED_AUDIO_MICMUTE);
+	return devm_led_classdev_register(dev, &priv->cdev);
+}
+
+static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context)
+{
+	struct privacy_wmi_data *priv;
+	struct key_entry *keymap;
+	int ret, i;
+
+	ret = wmi_has_guid(DELL_PRIVACY_GUID);
+	if (!ret)
+		pr_debug("Unable to detect available Dell privacy devices!\n");
+
+	priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	dev_set_drvdata(&wdev->dev, priv);
+	priv->wdev = wdev;
+	/* create evdev passing interface */
+	priv->input_dev = devm_input_allocate_device(&wdev->dev);
+	if (!priv->input_dev)
+		return -ENOMEM;
+
+	/* remap the wmi keymap event to new keymap */
+	keymap = kcalloc(ARRAY_SIZE(dell_wmi_keymap_type_0012),
+			sizeof(struct key_entry), GFP_KERNEL);
+	if (!keymap)
+		return -ENOMEM;
+
+	/* remap the keymap code with Dell privacy key type 0x12 as prefix
+	 * KEY_MICMUTE scancode will be reported as 0x120001
+	 */
+	for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) {
+		keymap[i] = dell_wmi_keymap_type_0012[i];
+		keymap[i].code |= (0x0012 << 16);
+	}
+	ret = sparse_keymap_setup(priv->input_dev, keymap, NULL);
+	kfree(keymap);
+	if (ret)
+		return ret;
+
+	priv->input_dev->dev.parent = &wdev->dev;
+	priv->input_dev->name = "Dell Privacy Driver";
+	priv->input_dev->id.bustype = BUS_HOST;
+
+	ret = input_register_device(priv->input_dev);
+	if (ret)
+		return ret;
+
+	ret = get_current_status(priv->wdev);
+	if (ret)
+		return ret;
+
+	ret = devm_device_add_group(&wdev->dev, &privacy_attribute_group);
+	if (ret)
+		return ret;
+
+	if (priv->features_present & BIT(DELL_PRIVACY_TYPE_AUDIO)) {
+		ret = dell_privacy_leds_setup(&priv->wdev->dev);
+		if (ret)
+			return ret;
+	}
+	mutex_lock(&list_mutex);
+	list_add_tail(&priv->list, &wmi_list);
+	mutex_unlock(&list_mutex);
+	return 0;
+}
+
+static void dell_privacy_wmi_remove(struct wmi_device *wdev)
+{
+	struct privacy_wmi_data *priv = dev_get_drvdata(&wdev->dev);
+
+	mutex_lock(&list_mutex);
+	list_del(&priv->list);
+	mutex_unlock(&list_mutex);
+}
+
+static const struct wmi_device_id dell_wmi_privacy_wmi_id_table[] = {
+	{ .guid_string = DELL_PRIVACY_GUID },
+	{ },
+};
+
+static struct wmi_driver dell_privacy_wmi_driver = {
+	.driver = {
+		.name = "dell-privacy",
+	},
+	.probe = dell_privacy_wmi_probe,
+	.remove = dell_privacy_wmi_remove,
+	.id_table = dell_wmi_privacy_wmi_id_table,
+};
+
+int dell_privacy_register_driver(void)
+{
+	return wmi_driver_register(&dell_privacy_wmi_driver);
+}
+
+void dell_privacy_unregister_driver(void)
+{
+	wmi_driver_unregister(&dell_privacy_wmi_driver);
+}
diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.h b/drivers/platform/x86/dell/dell-wmi-privacy.h
new file mode 100644
index 000000000000..50c9b943dd47
--- /dev/null
+++ b/drivers/platform/x86/dell/dell-wmi-privacy.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Dell privacy notification driver
+ *
+ * Copyright (C) 2021 Dell Inc. All Rights Reserved.
+ */
+
+#ifndef _DELL_PRIVACY_WMI_H_
+#define _DELL_PRIVACY_WMI_H_
+
+#if IS_ENABLED(CONFIG_DELL_WMI_PRIVACY)
+bool dell_privacy_has_mic_mute(void);
+bool dell_privacy_process_event(int type, int code, int status);
+int dell_privacy_register_driver(void);
+void dell_privacy_unregister_driver(void);
+#else /* CONFIG_DELL_PRIVACY */
+static inline bool dell_privacy_has_mic_mute(void)
+{
+	return false;
+}
+
+static inline bool dell_privacy_process_event(int type, int code, int status)
+{
+	return false;
+}
+
+static inline int dell_privacy_register_driver(void)
+{
+	return 0;
+}
+
+static inline void dell_privacy_unregister_driver(void)
+{
+}
+#endif /* CONFIG_DELL_PRIVACY */
+#endif

From f7b056b48029d9f31628a21c5630263775e25793 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 30 May 2021 12:47:44 +0200
Subject: [PATCH 1934/3292] platform/x86: touchscreen_dmi: Fix Chuwi Hi10 Pro
 comment

Fix the comment on the entry for the Chuwi Hi10 Pro tablet:
1. Replace "Prus" type with "Pro".
2. Fix the model number, the Chuwi Hi10 Pro is the CWI529, not the CWI597.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210530104744.6720-1-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index bde740d6120e..455a8302773a 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -942,7 +942,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 		},
 	},
 	{
-		/* Chuwi Hi10 Prus (CWI597) */
+		/* Chuwi Hi10 Pro (CWI529) */
 		.driver_data = (void *)&chuwi_hi10_pro_data,
 		.matches = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"),

From 28e367127718a9cb85d615a71e152f7acee41bfc Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 2 Jun 2021 18:05:48 +0800
Subject: [PATCH 1935/3292] platform/x86: toshiba_acpi: Fix missing error code
 in toshiba_acpi_setup_keyboard()

The error code is missing in this code scenario, add the error code
'-EINVAL' to the return value 'error'.

Eliminate the follow smatch warning:

drivers/platform/x86/toshiba_acpi.c:2834 toshiba_acpi_setup_keyboard()
warn: missing error code 'error'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1622628348-87035-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/toshiba_acpi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index fa7232ad8c39..352508d30467 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -2831,6 +2831,7 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 
 	if (!dev->info_supported && !dev->system_event_supported) {
 		pr_warn("No hotkey query interface found\n");
+		error = -EINVAL;
 		goto err_remove_filter;
 	}
 

From 6cbaee2e109ed0f7327a2d3cbb412f36fd8873e0 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 3 Jun 2021 02:06:36 +0200
Subject: [PATCH 1936/3292] platform/surface: aggregator: Fix event disable
 function

Disabling events silently fails due to the wrong command ID being used.
Instead of the command ID for the disable call, the command ID for the
enable call was being used. This causes the disable call to enable the
event instead. As the event is already enabled when we call this
function, the EC silently drops this command and does nothing.

Use the correct command ID for disabling the event to fix this.

Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210603000636.568846-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/controller.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 8a70df60142c..a06964aa96e7 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -1907,7 +1907,7 @@ static int ssam_ssh_event_disable(struct ssam_controller *ctrl,
 {
 	int status;
 
-	status = __ssam_ssh_event_request(ctrl, reg, reg.cid_enable, id, flags);
+	status = __ssam_ssh_event_request(ctrl, reg, reg.cid_disable, id, flags);
 
 	if (status < 0 && status != -EINVAL) {
 		ssam_err(ctrl,

From 17b707fe5fbd3c019691873c1c11bddb0e0f7225 Mon Sep 17 00:00:00 2001
From: Mark Pearson <markpearson@lenovo.com>
Date: Sun, 30 May 2021 18:31:09 -0400
Subject: [PATCH 1937/3292] platform/x86: firmware_attributes_class: Create
 helper file for handling firmware-attributes class registration events

This offers shared code for registering the firmware_attributes_class,
which is used by the Dell and Lenovo WMI management drivers.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Mark Pearson <markpearson@lenovo.com>
Link: https://lore.kernel.org/r/20210530223111.25929-1-markpearson@lenovo.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/Kconfig                  |  4 ++
 drivers/platform/x86/Makefile                 |  1 +
 .../platform/x86/firmware_attributes_class.c  | 52 +++++++++++++++++++
 .../platform/x86/firmware_attributes_class.h  | 11 ++++
 4 files changed, 68 insertions(+)
 create mode 100644 drivers/platform/x86/firmware_attributes_class.c
 create mode 100644 drivers/platform/x86/firmware_attributes_class.h

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 9a668dae2738..d6fa071d78de 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1077,6 +1077,10 @@ config TOUCHSCREEN_DMI
 	  the OS-image for the device. This option supplies the missing info.
 	  Enable this for x86 tablets with Silead or Chipone touchscreens.
 
+config FW_ATTR_CLASS
+	tristate
+	default n
+
 config INTEL_IMR
 	bool "Intel Isolated Memory Region support"
 	depends on X86_INTEL_QUARK && IOSF_MBI
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 415bec18ece2..98c776967fa0 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_SYSTEM76_ACPI)	+= system76_acpi.o
 obj-$(CONFIG_TOPSTAR_LAPTOP)	+= topstar-laptop.o
 
 # Platform drivers
+obj-$(CONFIG_FW_ATTR_CLASS)		+= firmware_attributes_class.o
 obj-$(CONFIG_I2C_MULTI_INSTANTIATE)	+= i2c-multi-instantiate.o
 obj-$(CONFIG_MLX_PLATFORM)		+= mlx-platform.o
 obj-$(CONFIG_TOUCHSCREEN_DMI)		+= touchscreen_dmi.o
diff --git a/drivers/platform/x86/firmware_attributes_class.c b/drivers/platform/x86/firmware_attributes_class.c
new file mode 100644
index 000000000000..d62ec3d71ede
--- /dev/null
+++ b/drivers/platform/x86/firmware_attributes_class.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Firmware attributes class helper module */
+
+#include <linux/mutex.h>
+#include <linux/device/class.h>
+#include <linux/module.h>
+#include "firmware_attributes_class.h"
+
+static DEFINE_MUTEX(fw_attr_lock);
+int fw_attr_inuse;
+
+static struct class firmware_attributes_class = {
+	.name = "firmware-attributes",
+};
+
+int fw_attributes_class_get(struct class **fw_attr_class)
+{
+	int err;
+
+	mutex_lock(&fw_attr_lock);
+	if (!fw_attr_inuse) { /*first time class is being used*/
+		err = class_register(&firmware_attributes_class);
+		if (err) {
+			mutex_unlock(&fw_attr_lock);
+			return err;
+		}
+	}
+	fw_attr_inuse++;
+	*fw_attr_class = &firmware_attributes_class;
+	mutex_unlock(&fw_attr_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fw_attributes_class_get);
+
+int fw_attributes_class_put(void)
+{
+	mutex_lock(&fw_attr_lock);
+	if (!fw_attr_inuse) {
+		mutex_unlock(&fw_attr_lock);
+		return -EINVAL;
+	}
+	fw_attr_inuse--;
+	if (!fw_attr_inuse) /* No more consumers */
+		class_unregister(&firmware_attributes_class);
+	mutex_unlock(&fw_attr_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fw_attributes_class_put);
+
+MODULE_AUTHOR("Mark Pearson <markpearson@lenovo.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/firmware_attributes_class.h b/drivers/platform/x86/firmware_attributes_class.h
new file mode 100644
index 000000000000..486485cb1f54
--- /dev/null
+++ b/drivers/platform/x86/firmware_attributes_class.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Firmware attributes class helper module */
+
+#ifndef FW_ATTR_CLASS_H
+#define FW_ATTR_CLASS_H
+
+int fw_attributes_class_get(struct class **fw_attr_class);
+int fw_attributes_class_put(void);
+
+#endif /* FW_ATTR_CLASS_H */

From 8a1c379c5a03281295c436faf21b0e4716e5b8a8 Mon Sep 17 00:00:00 2001
From: Mark Pearson <markpearson@lenovo.com>
Date: Sun, 30 May 2021 18:31:10 -0400
Subject: [PATCH 1938/3292] platform/x86: dell-wmi-sysman: Use
 firmware_attributes_class helper

Update Dell WMI sysman driver to use newly implemented helper module.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mark Pearson <markpearson@lenovo.com>
Link: https://lore.kernel.org/r/20210530223111.25929-2-markpearson@lenovo.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/dell/Kconfig              |  1 +
 .../platform/x86/dell/dell-wmi-sysman/sysman.c | 18 ++++++++----------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig
index b5c6a6a32bf9..9b0a4d080f43 100644
--- a/drivers/platform/x86/dell/Kconfig
+++ b/drivers/platform/x86/dell/Kconfig
@@ -206,6 +206,7 @@ config DELL_WMI_SYSMAN
 	depends on ACPI_WMI
 	depends on DMI
 	select NLS
+	select FW_ATTR_CLASS
 	help
 	  This driver allows changing BIOS settings on many Dell machines from
 	  2018 and newer without the use of any additional software.
diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
index d21e84c7a694..1378c1878658 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
@@ -13,14 +13,11 @@
 #include <linux/kernel.h>
 #include <linux/wmi.h>
 #include "dell-wmi-sysman.h"
+#include "../../firmware_attributes_class.h"
 
 #define MAX_TYPES  4
 #include <linux/nls.h>
 
-static struct class firmware_attributes_class = {
-	.name = "firmware-attributes",
-};
-
 struct wmi_sysman_priv wmi_priv = {
 	.mutex = __MUTEX_INITIALIZER(wmi_priv.mutex),
 };
@@ -28,6 +25,7 @@ struct wmi_sysman_priv wmi_priv = {
 /* reset bios to defaults */
 static const char * const reset_types[] = {"builtinsafe", "lastknowngood", "factory", "custom"};
 static int reset_option = -1;
+struct class *fw_attr_class;
 
 
 /**
@@ -542,11 +540,11 @@ static int __init sysman_init(void)
 		goto err_exit_bios_attr_pass_interface;
 	}
 
-	ret = class_register(&firmware_attributes_class);
+	ret = fw_attributes_class_get(&fw_attr_class);
 	if (ret)
 		goto err_exit_bios_attr_pass_interface;
 
-	wmi_priv.class_dev = device_create(&firmware_attributes_class, NULL, MKDEV(0, 0),
+	wmi_priv.class_dev = device_create(fw_attr_class, NULL, MKDEV(0, 0),
 				  NULL, "%s", DRIVER_NAME);
 	if (IS_ERR(wmi_priv.class_dev)) {
 		ret = PTR_ERR(wmi_priv.class_dev);
@@ -603,10 +601,10 @@ err_release_attributes_data:
 	release_attributes_data();
 
 err_destroy_classdev:
-	device_destroy(&firmware_attributes_class, MKDEV(0, 0));
+	device_destroy(fw_attr_class, MKDEV(0, 0));
 
 err_unregister_class:
-	class_unregister(&firmware_attributes_class);
+	fw_attributes_class_put();
 
 err_exit_bios_attr_pass_interface:
 	exit_bios_attr_pass_interface();
@@ -620,8 +618,8 @@ err_exit_bios_attr_set_interface:
 static void __exit sysman_exit(void)
 {
 	release_attributes_data();
-	device_destroy(&firmware_attributes_class, MKDEV(0, 0));
-	class_unregister(&firmware_attributes_class);
+	device_destroy(fw_attr_class, MKDEV(0, 0));
+	fw_attributes_class_put();
 	exit_bios_attr_set_interface();
 	exit_bios_attr_pass_interface();
 }

From a40cd7ef22fbb11229cf982920f4ec96c1f49282 Mon Sep 17 00:00:00 2001
From: Mark Pearson <markpearson@lenovo.com>
Date: Sun, 30 May 2021 18:31:11 -0400
Subject: [PATCH 1939/3292] platform/x86: think-lmi: Add WMI interface support
 on Lenovo platforms

For Lenovo platforms that support a WMI interface to the BIOS add
support, using the firmware-attributes class, to allow users to access
and modify various BIOS related settings.

Signed-off-by: Mark Pearson <markpearson@lenovo.com>
Link: https://lore.kernel.org/r/20210530223111.25929-3-markpearson@lenovo.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../testing/sysfs-class-firmware-attributes   |  18 +-
 MAINTAINERS                                   |   7 +
 drivers/platform/x86/Kconfig                  |  11 +
 drivers/platform/x86/Makefile                 |   1 +
 drivers/platform/x86/think-lmi.c              | 891 ++++++++++++++++++
 drivers/platform/x86/think-lmi.h              |  81 ++
 6 files changed, 1008 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/x86/think-lmi.c
 create mode 100644 drivers/platform/x86/think-lmi.h

diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes
index 8ea59fea4709..3348bf80a37c 100644
--- a/Documentation/ABI/testing/sysfs-class-firmware-attributes
+++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes
@@ -197,8 +197,24 @@ Description:
 		Drivers may emit a CHANGE uevent when a password is set or unset
 		userspace may check it again.
 
-		On Dell systems, if Admin password is set, then all BIOS attributes
+		On Dell and Lenovo systems, if Admin password is set, then all BIOS attributes
 		require password validation.
+		On Lenovo systems if you change the Admin password the new password is not active until
+		the next boot.
+
+		Lenovo specific class extensions
+		------------------------------
+
+		On Lenovo systems the following additional settings are available:
+
+		lenovo_encoding:
+					The encoding method that is used. This can be either "ascii"
+					or "scancode". Default is set to "ascii"
+
+		lenovo_kbdlang:
+					The keyboard language method that is used. This is generally a
+					two char code (e.g. "us", "fr", "gr") and may vary per platform.
+					Default is set to "us"
 
 What:		/sys/class/firmware-attributes/*/attributes/pending_reboot
 Date:		February 2021
diff --git a/MAINTAINERS b/MAINTAINERS
index 67fc700c9a87..771ea74e7e2d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18163,6 +18163,13 @@ W:	http://thinkwiki.org/wiki/Ibm-acpi
 T:	git git://repo.or.cz/linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git
 F:	drivers/platform/x86/thinkpad_acpi.c
 
+THINKPAD LMI DRIVER
+M:	Mark Pearson <markpearson@lenovo.com>
+L:	platform-driver-x86@vger.kernel.org
+S:	Maintained
+F:	Documentation/ABI/testing/sysfs-class-firmware-attributes
+F:	drivers/platform/x86/think-lmi.?
+
 THUNDERBOLT DMA TRAFFIC TEST DRIVER
 M:	Isaac Hazan <isaac.hazan@intel.com>
 L:	linux-usb@vger.kernel.org
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index d6fa071d78de..1e538ce8feaf 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -640,6 +640,17 @@ config THINKPAD_ACPI_HOTKEY_POLL
 	  If you are not sure, say Y here.  The driver enables polling only if
 	  it is strictly necessary to do so.
 
+config THINKPAD_LMI
+	tristate "Lenovo WMI-based systems management driver"
+	depends on ACPI_WMI
+	select FW_ATTR_CLASS
+	help
+	  This driver allows changing BIOS settings on Lenovo machines whose
+	  BIOS support the WMI interface.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called think-lmi.
+
 config INTEL_ATOMISP2_LED
 	tristate "Intel AtomISP2 camera LED driver"
 	depends on GPIOLIB && LEDS_GPIO
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 98c776967fa0..ff620d653d39 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_IBM_RTL)		+= ibm_rtl.o
 obj-$(CONFIG_IDEAPAD_LAPTOP)	+= ideapad-laptop.o
 obj-$(CONFIG_SENSORS_HDAPS)	+= hdaps.o
 obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
+obj-$(CONFIG_THINKPAD_LMI)	+= think-lmi.o
 
 # Intel
 obj-$(CONFIG_INTEL_ATOMISP2_LED)	+= intel_atomisp2_led.o
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
new file mode 100644
index 000000000000..854427fed1a9
--- /dev/null
+++ b/drivers/platform/x86/think-lmi.c
@@ -0,0 +1,891 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Think LMI BIOS configuration driver
+ *
+ * Copyright(C) 2019-2021 Lenovo
+ *
+ * Original code from Thinkpad-wmi project https://github.com/iksaif/thinkpad-wmi
+ * Copyright(C) 2017 Corentin Chary <corentin.chary@gmail.com>
+ * Distributed under the GPL-2.0 license
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/wmi.h>
+#include "firmware_attributes_class.h"
+#include "think-lmi.h"
+
+/*
+ * Name:
+ *  Lenovo_BiosSetting
+ * Description:
+ *  Get item name and settings for current LMI instance.
+ * Type:
+ *  Query
+ * Returns:
+ *  "Item,Value"
+ * Example:
+ *  "WakeOnLAN,Enable"
+ */
+#define LENOVO_BIOS_SETTING_GUID "51F5230E-9677-46CD-A1CF-C0B23EE34DB7"
+
+/*
+ * Name:
+ *  Lenovo_SetBiosSetting
+ * Description:
+ *  Change the BIOS setting to the desired value using the Lenovo_SetBiosSetting
+ *  class. To save the settings, use the Lenovo_SaveBiosSetting class.
+ *  BIOS settings and values are case sensitive.
+ *  After making changes to the BIOS settings, you must reboot the computer
+ *  before the changes will take effect.
+ * Type:
+ *  Method
+ * Arguments:
+ *  "Item,Value,Password,Encoding,KbdLang;"
+ * Example:
+ *  "WakeOnLAN,Disable,pa55w0rd,ascii,us;"
+ */
+#define LENOVO_SET_BIOS_SETTINGS_GUID "98479A64-33F5-4E33-A707-8E251EBBC3A1"
+
+/*
+ * Name:
+ *  Lenovo_SaveBiosSettings
+ * Description:
+ *  Save any pending changes in settings.
+ * Type:
+ *  Method
+ * Arguments:
+ *  "Password,Encoding,KbdLang;"
+ * Example:
+ * "pa55w0rd,ascii,us;"
+ */
+#define LENOVO_SAVE_BIOS_SETTINGS_GUID "6A4B54EF-A5ED-4D33-9455-B0D9B48DF4B3"
+
+/*
+ * Name:
+ *  Lenovo_BiosPasswordSettings
+ * Description:
+ *  Return BIOS Password settings
+ * Type:
+ *  Query
+ * Returns:
+ *  PasswordMode, PasswordState, MinLength, MaxLength,
+ *  SupportedEncoding, SupportedKeyboard
+ */
+#define LENOVO_BIOS_PASSWORD_SETTINGS_GUID "8ADB159E-1E32-455C-BC93-308A7ED98246"
+
+/*
+ * Name:
+ *  Lenovo_SetBiosPassword
+ * Description:
+ *  Change a specific password.
+ *  - BIOS settings cannot be changed at the same boot as power-on
+ *    passwords (POP) and hard disk passwords (HDP). If you want to change
+ *    BIOS settings and POP or HDP, you must reboot the system after changing
+ *    one of them.
+ *  - A password cannot be set using this method when one does not already
+ *    exist. Passwords can only be updated or cleared.
+ * Type:
+ *  Method
+ * Arguments:
+ *  "PasswordType,CurrentPassword,NewPassword,Encoding,KbdLang;"
+ * Example:
+ *  "pop,pa55w0rd,newpa55w0rd,ascii,us;”
+ */
+#define LENOVO_SET_BIOS_PASSWORD_GUID "2651D9FD-911C-4B69-B94E-D0DED5963BD7"
+
+/*
+ * Name:
+ *  Lenovo_GetBiosSelections
+ * Description:
+ *  Return a list of valid settings for a given item.
+ * Type:
+ *  Method
+ * Arguments:
+ *  "Item"
+ * Returns:
+ *  "Value1,Value2,Value3,..."
+ * Example:
+ *  -> "FlashOverLAN"
+ *  <- "Enabled,Disabled"
+ */
+#define LENOVO_GET_BIOS_SELECTIONS_GUID	"7364651A-132F-4FE7-ADAA-40C6C7EE2E3B"
+
+#define TLMI_POP_PWD (1 << 0)
+#define TLMI_PAP_PWD (1 << 1)
+#define to_tlmi_pwd_setting(kobj)  container_of(kobj, struct tlmi_pwd_setting, kobj)
+#define to_tlmi_attr_setting(kobj)  container_of(kobj, struct tlmi_attr_setting, kobj)
+
+static const struct tlmi_err_codes tlmi_errs[] = {
+	{"Success", 0},
+	{"Not Supported", -EOPNOTSUPP},
+	{"Invalid Parameter", -EINVAL},
+	{"Access Denied", -EACCES},
+	{"System Busy", -EBUSY},
+};
+
+static const char * const encoding_options[] = {
+	[TLMI_ENCODING_ASCII] = "ascii",
+	[TLMI_ENCODING_SCANCODE] = "scancode",
+};
+static struct think_lmi tlmi_priv;
+struct class *fw_attr_class;
+
+/* ------ Utility functions ------------*/
+/* Convert BIOS WMI error string to suitable error code */
+static int tlmi_errstr_to_err(const char *errstr)
+{
+	int i;
+
+	for (i = 0; i < sizeof(tlmi_errs)/sizeof(struct tlmi_err_codes); i++) {
+		if (!strcmp(tlmi_errs[i].err_str, errstr))
+			return tlmi_errs[i].err_code;
+	}
+	return -EPERM;
+}
+
+/* Extract error string from WMI return buffer */
+static int tlmi_extract_error(const struct acpi_buffer *output)
+{
+	const union acpi_object *obj;
+
+	obj = output->pointer;
+	if (!obj)
+		return -ENOMEM;
+	if (obj->type != ACPI_TYPE_STRING || !obj->string.pointer)
+		return -EIO;
+
+	return tlmi_errstr_to_err(obj->string.pointer);
+}
+
+/* Utility function to execute WMI call to BIOS */
+static int tlmi_simple_call(const char *guid, const char *arg)
+{
+	const struct acpi_buffer input = { strlen(arg), (char *)arg };
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+	int i, err;
+
+	/*
+	 * Duplicated call required to match BIOS workaround for behavior
+	 * seen when WMI accessed via scripting on other OS.
+	 */
+	for (i = 0; i < 2; i++) {
+		/* (re)initialize output buffer to default state */
+		output.length = ACPI_ALLOCATE_BUFFER;
+		output.pointer = NULL;
+
+		status = wmi_evaluate_method(guid, 0, 0, &input, &output);
+		if (ACPI_FAILURE(status)) {
+			kfree(output.pointer);
+			return -EIO;
+		}
+		err = tlmi_extract_error(&output);
+		kfree(output.pointer);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/* Extract output string from WMI return buffer */
+static int tlmi_extract_output_string(const struct acpi_buffer *output,
+				      char **string)
+{
+	const union acpi_object *obj;
+	char *s;
+
+	obj = output->pointer;
+	if (!obj)
+		return -ENOMEM;
+	if (obj->type != ACPI_TYPE_STRING || !obj->string.pointer)
+		return -EIO;
+
+	s = kstrdup(obj->string.pointer, GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+	*string = s;
+	return 0;
+}
+
+/* ------ Core interface functions ------------*/
+
+/* Get password settings from BIOS */
+static int tlmi_get_pwd_settings(struct tlmi_pwdcfg *pwdcfg)
+{
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	const union acpi_object *obj;
+	acpi_status status;
+
+	if (!tlmi_priv.can_get_password_settings)
+		return -EOPNOTSUPP;
+
+	status = wmi_query_block(LENOVO_BIOS_PASSWORD_SETTINGS_GUID, 0,
+				 &output);
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	obj = output.pointer;
+	if (!obj)
+		return -ENOMEM;
+	if (obj->type != ACPI_TYPE_BUFFER || !obj->buffer.pointer) {
+		kfree(obj);
+		return -EIO;
+	}
+	/*
+	 * The size of thinkpad_wmi_pcfg on ThinkStation is larger than ThinkPad.
+	 * To make the driver compatible on different brands, we permit it to get
+	 * the data in below case.
+	 */
+	if (obj->buffer.length < sizeof(struct tlmi_pwdcfg)) {
+		pr_warn("Unknown pwdcfg buffer length %d\n", obj->buffer.length);
+		kfree(obj);
+		return -EIO;
+	}
+	memcpy(pwdcfg, obj->buffer.pointer, sizeof(struct tlmi_pwdcfg));
+	kfree(obj);
+	return 0;
+}
+
+static int tlmi_save_bios_settings(const char *password)
+{
+	return tlmi_simple_call(LENOVO_SAVE_BIOS_SETTINGS_GUID,
+				password);
+}
+
+static int tlmi_setting(int item, char **value, const char *guid_string)
+{
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+	int ret;
+
+	status = wmi_query_block(guid_string, item, &output);
+	if (ACPI_FAILURE(status)) {
+		kfree(output.pointer);
+		return -EIO;
+	}
+
+	ret = tlmi_extract_output_string(&output, value);
+	kfree(output.pointer);
+	return ret;
+}
+
+static int tlmi_get_bios_selections(const char *item, char **value)
+{
+	const struct acpi_buffer input = { strlen(item), (char *)item };
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+	int ret;
+
+	status = wmi_evaluate_method(LENOVO_GET_BIOS_SELECTIONS_GUID,
+				     0, 0, &input, &output);
+
+	if (ACPI_FAILURE(status)) {
+		kfree(output.pointer);
+		return -EIO;
+	}
+
+	ret = tlmi_extract_output_string(&output, value);
+	kfree(output.pointer);
+	return ret;
+}
+
+/* ---- Authentication sysfs --------------------------------------------------------- */
+static ssize_t is_enabled_show(struct kobject *kobj, struct kobj_attribute *attr,
+					  char *buf)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+
+	return sysfs_emit(buf, "%d\n", setting->valid);
+}
+
+static struct kobj_attribute auth_is_pass_set = __ATTR_RO(is_enabled);
+
+static ssize_t current_password_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+	size_t pwdlen;
+	char *p;
+
+	pwdlen = strlen(buf);
+	/* pwdlen == 0 is allowed to clear the password */
+	if (pwdlen && ((pwdlen < setting->minlen) || (pwdlen > setting->maxlen)))
+		return -EINVAL;
+
+	strscpy(setting->password, buf, setting->maxlen);
+	/* Strip out CR if one is present, setting password won't work if it is present */
+	p = strchrnul(setting->password, '\n');
+	*p = '\0';
+	return count;
+}
+
+static struct kobj_attribute auth_current_password = __ATTR_WO(current_password);
+
+static ssize_t new_password_store(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+	char *auth_str, *new_pwd, *p;
+	size_t pwdlen;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!tlmi_priv.can_set_bios_password)
+		return -EOPNOTSUPP;
+
+	new_pwd = kstrdup(buf, GFP_KERNEL);
+	if (!new_pwd)
+		return -ENOMEM;
+
+	/* Strip out CR if one is present, setting password won't work if it is present */
+	p = strchrnul(new_pwd, '\n');
+	*p = '\0';
+
+	pwdlen = strlen(new_pwd);
+	/* pwdlen == 0 is allowed to clear the password */
+	if (pwdlen && ((pwdlen < setting->minlen) || (pwdlen > setting->maxlen))) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Format: 'PasswordType,CurrentPw,NewPw,Encoding,KbdLang;' */
+	auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s,%s,%s;",
+		 setting->pwd_type, setting->password, new_pwd,
+		 encoding_options[setting->encoding], setting->kbdlang);
+	if (!auth_str) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = tlmi_simple_call(LENOVO_SET_BIOS_PASSWORD_GUID, auth_str);
+	kfree(auth_str);
+out:
+	kfree(new_pwd);
+	return ret ?: count;
+}
+
+static struct kobj_attribute auth_new_password = __ATTR_WO(new_password);
+
+static ssize_t min_password_length_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+
+	return sysfs_emit(buf, "%d\n", setting->minlen);
+}
+
+static struct kobj_attribute auth_min_pass_length = __ATTR_RO(min_password_length);
+
+static ssize_t max_password_length_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+
+	return sysfs_emit(buf, "%d\n", setting->maxlen);
+}
+static struct kobj_attribute auth_max_pass_length = __ATTR_RO(max_password_length);
+
+static ssize_t mechanism_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	return sysfs_emit(buf, "password\n");
+}
+static struct kobj_attribute auth_mechanism = __ATTR_RO(mechanism);
+
+static ssize_t encoding_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+
+	return sysfs_emit(buf, "%s\n", encoding_options[setting->encoding]);
+}
+
+static ssize_t encoding_store(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+	int i;
+
+	/* Scan for a matching profile */
+	i = sysfs_match_string(encoding_options, buf);
+	if (i < 0)
+		return -EINVAL;
+
+	setting->encoding = i;
+	return count;
+}
+
+static struct kobj_attribute auth_encoding = __ATTR_RW(encoding);
+
+static ssize_t kbdlang_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+
+	return sysfs_emit(buf, "%s\n", setting->kbdlang);
+}
+
+static ssize_t kbdlang_store(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+	int length;
+
+	length = strlen(buf);
+	if (buf[length-1] == '\n')
+		length--;
+
+	if (!length || (length >= TLMI_LANG_MAXLEN))
+		return -EINVAL;
+
+	memcpy(setting->kbdlang, buf, length);
+	setting->kbdlang[length] = '\0';
+	return count;
+}
+
+static struct kobj_attribute auth_kbdlang = __ATTR_RW(kbdlang);
+
+static ssize_t role_show(struct kobject *kobj, struct kobj_attribute *attr,
+			 char *buf)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+
+	return sysfs_emit(buf, "%s\n", setting->role);
+}
+static struct kobj_attribute auth_role = __ATTR_RO(role);
+
+static struct attribute *auth_attrs[] = {
+	&auth_is_pass_set.attr,
+	&auth_min_pass_length.attr,
+	&auth_max_pass_length.attr,
+	&auth_current_password.attr,
+	&auth_new_password.attr,
+	&auth_role.attr,
+	&auth_mechanism.attr,
+	&auth_encoding.attr,
+	&auth_kbdlang.attr,
+	NULL
+};
+
+static const struct attribute_group auth_attr_group = {
+	.attrs = auth_attrs,
+};
+
+/* ---- Attributes sysfs --------------------------------------------------------- */
+static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+
+	return sysfs_emit(buf, "%s\n", setting->display_name);
+}
+
+static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+	char *item;
+	int ret;
+
+	ret = tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID);
+	if (ret)
+		return ret;
+
+	ret = sysfs_emit(buf, "%s\n", item);
+	kfree(item);
+	return ret;
+}
+
+static ssize_t possible_values_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+
+	if (!tlmi_priv.can_get_bios_selections)
+		return -EOPNOTSUPP;
+
+	return sysfs_emit(buf, "%s\n", setting->possible_values);
+}
+
+static ssize_t current_value_store(struct kobject *kobj,
+		struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+	char *set_str = NULL, *new_setting = NULL;
+	char *auth_str = NULL;
+	char *p;
+	int ret;
+
+	if (!tlmi_priv.can_set_bios_settings)
+		return -EOPNOTSUPP;
+
+	new_setting = kstrdup(buf, GFP_KERNEL);
+	if (!new_setting)
+		return -ENOMEM;
+
+	/* Strip out CR if one is present */
+	p = strchrnul(new_setting, '\n');
+	*p = '\0';
+
+	if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password) {
+		auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s;",
+				tlmi_priv.pwd_admin->password,
+				encoding_options[tlmi_priv.pwd_admin->encoding],
+				tlmi_priv.pwd_admin->kbdlang);
+		if (!auth_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	if (auth_str)
+		set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name,
+				new_setting, auth_str);
+	else
+		set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name,
+				new_setting);
+	if (!set_str) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = tlmi_simple_call(LENOVO_SET_BIOS_SETTINGS_GUID, set_str);
+	if (ret)
+		goto out;
+
+	if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password)
+		ret = tlmi_save_bios_settings(auth_str);
+	else
+		ret = tlmi_save_bios_settings("");
+
+out:
+	kfree(auth_str);
+	kfree(set_str);
+	kfree(new_setting);
+	return ret ?: count;
+}
+
+static struct kobj_attribute attr_displ_name = __ATTR_RO(display_name);
+
+static struct kobj_attribute attr_possible_values = __ATTR_RO(possible_values);
+
+static struct kobj_attribute attr_current_val = __ATTR_RW_MODE(current_value, 0600);
+
+static struct attribute *tlmi_attrs[] = {
+	&attr_displ_name.attr,
+	&attr_current_val.attr,
+	&attr_possible_values.attr,
+	NULL
+};
+
+static const struct attribute_group tlmi_attr_group = {
+	.attrs = tlmi_attrs,
+};
+
+static ssize_t tlmi_attr_show(struct kobject *kobj, struct attribute *attr,
+				    char *buf)
+{
+	struct kobj_attribute *kattr;
+
+	kattr = container_of(attr, struct kobj_attribute, attr);
+	if (kattr->show)
+		return kattr->show(kobj, kattr, buf);
+	return -EIO;
+}
+
+static ssize_t tlmi_attr_store(struct kobject *kobj, struct attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct kobj_attribute *kattr;
+
+	kattr = container_of(attr, struct kobj_attribute, attr);
+	if (kattr->store)
+		return kattr->store(kobj, kattr, buf, count);
+	return -EIO;
+}
+
+static const struct sysfs_ops tlmi_kobj_sysfs_ops = {
+	.show	= tlmi_attr_show,
+	.store	= tlmi_attr_store,
+};
+
+static void tlmi_attr_setting_release(struct kobject *kobj)
+{
+	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
+
+	kfree(setting);
+}
+
+static void tlmi_pwd_setting_release(struct kobject *kobj)
+{
+	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
+
+	kfree(setting);
+}
+
+static struct kobj_type tlmi_attr_setting_ktype = {
+	.release        = &tlmi_attr_setting_release,
+	.sysfs_ops	= &tlmi_kobj_sysfs_ops,
+};
+
+static struct kobj_type tlmi_pwd_setting_ktype = {
+	.release        = &tlmi_pwd_setting_release,
+	.sysfs_ops	= &tlmi_kobj_sysfs_ops,
+};
+
+/* ---- Initialisation --------------------------------------------------------- */
+static void tlmi_release_attr(void)
+{
+	int i;
+
+	/* Attribute structures */
+	for (i = 0; i < TLMI_SETTINGS_COUNT; i++) {
+		if (tlmi_priv.setting[i]) {
+			kfree(tlmi_priv.setting[i]->possible_values);
+			sysfs_remove_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group);
+			kobject_put(&tlmi_priv.setting[i]->kobj);
+		}
+	}
+	kset_unregister(tlmi_priv.attribute_kset);
+
+	/* Authentication structures */
+	sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group);
+	kobject_put(&tlmi_priv.pwd_admin->kobj);
+	sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group);
+	kobject_put(&tlmi_priv.pwd_power->kobj);
+	kset_unregister(tlmi_priv.authentication_kset);
+}
+
+static int tlmi_sysfs_init(void)
+{
+	int i, ret;
+
+	ret = fw_attributes_class_get(&fw_attr_class);
+	if (ret)
+		return ret;
+
+	tlmi_priv.class_dev = device_create(fw_attr_class, NULL, MKDEV(0, 0),
+			NULL, "%s", "thinklmi");
+	if (IS_ERR(tlmi_priv.class_dev)) {
+		ret = PTR_ERR(tlmi_priv.class_dev);
+		goto fail_class_created;
+	}
+
+	tlmi_priv.attribute_kset = kset_create_and_add("attributes", NULL,
+			&tlmi_priv.class_dev->kobj);
+	if (!tlmi_priv.attribute_kset) {
+		ret = -ENOMEM;
+		goto fail_device_created;
+	}
+
+	for (i = 0; i < TLMI_SETTINGS_COUNT; i++) {
+		/* Check if index is a valid setting - skip if it isn't */
+		if (!tlmi_priv.setting[i])
+			continue;
+
+		/* Build attribute */
+		tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
+		ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,
+				NULL, "%s", tlmi_priv.setting[i]->display_name);
+		if (ret)
+			goto fail_create_attr;
+
+		ret = sysfs_create_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group);
+		if (ret)
+			goto fail_create_attr;
+	}
+
+	/* Create authentication entries */
+	tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
+								&tlmi_priv.class_dev->kobj);
+	if (!tlmi_priv.authentication_kset) {
+		ret = -ENOMEM;
+		goto fail_create_attr;
+	}
+	tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset;
+	ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype,
+			NULL, "%s", "Admin");
+	if (ret)
+		goto fail_create_attr;
+
+	ret = sysfs_create_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group);
+	if (ret)
+		goto fail_create_attr;
+
+	tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset;
+	ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype,
+			NULL, "%s", "System");
+	if (ret)
+		goto fail_create_attr;
+
+	ret = sysfs_create_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group);
+	if (ret)
+		goto fail_create_attr;
+
+	return ret;
+
+fail_create_attr:
+	tlmi_release_attr();
+fail_device_created:
+	device_destroy(fw_attr_class, MKDEV(0, 0));
+fail_class_created:
+	fw_attributes_class_put();
+	return ret;
+}
+
+/* ---- Base Driver -------------------------------------------------------- */
+static int tlmi_analyze(void)
+{
+	struct tlmi_pwdcfg pwdcfg;
+	acpi_status status;
+	int i, ret;
+
+	if (wmi_has_guid(LENOVO_SET_BIOS_SETTINGS_GUID) &&
+	    wmi_has_guid(LENOVO_SAVE_BIOS_SETTINGS_GUID))
+		tlmi_priv.can_set_bios_settings = true;
+
+	if (wmi_has_guid(LENOVO_GET_BIOS_SELECTIONS_GUID))
+		tlmi_priv.can_get_bios_selections = true;
+
+	if (wmi_has_guid(LENOVO_SET_BIOS_PASSWORD_GUID))
+		tlmi_priv.can_set_bios_password = true;
+
+	if (wmi_has_guid(LENOVO_BIOS_PASSWORD_SETTINGS_GUID))
+		tlmi_priv.can_get_password_settings = true;
+
+	/*
+	 * Try to find the number of valid settings of this machine
+	 * and use it to create sysfs attributes.
+	 */
+	for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) {
+		struct tlmi_attr_setting *setting;
+		char *item = NULL;
+		char *p;
+
+		tlmi_priv.setting[i] = NULL;
+		status = tlmi_setting(i, &item, LENOVO_BIOS_SETTING_GUID);
+		if (ACPI_FAILURE(status))
+			break;
+		if (!item)
+			break;
+		if (!*item)
+			continue;
+
+		/* It is not allowed to have '/' for file name. Convert it into '\'. */
+		strreplace(item, '/', '\\');
+
+		/* Remove the value part */
+		p = strchrnul(item, ',');
+		*p = '\0';
+
+		/* Create a setting entry */
+		setting = kzalloc(sizeof(*setting), GFP_KERNEL);
+		if (!setting) {
+			ret = -ENOMEM;
+			goto fail_clear_attr;
+		}
+		setting->index = i;
+		strscpy(setting->display_name, item, TLMI_SETTINGS_MAXLEN);
+		/* If BIOS selections supported, load those */
+		if (tlmi_priv.can_get_bios_selections) {
+			ret = tlmi_get_bios_selections(setting->display_name,
+					&setting->possible_values);
+			if (ret || !setting->possible_values)
+				pr_info("Error retrieving possible values for %d : %s\n",
+						i, setting->display_name);
+		}
+		tlmi_priv.setting[i] = setting;
+		tlmi_priv.settings_count++;
+		kfree(item);
+	}
+
+	/* Create password setting structure */
+	ret = tlmi_get_pwd_settings(&pwdcfg);
+	if (ret)
+		goto fail_clear_attr;
+
+	tlmi_priv.pwd_admin = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
+	if (!tlmi_priv.pwd_admin) {
+		ret = -ENOMEM;
+		goto fail_clear_attr;
+	}
+	strscpy(tlmi_priv.pwd_admin->display_name, "admin", TLMI_PWDTYPE_MAXLEN);
+	strscpy(tlmi_priv.pwd_admin->kbdlang, "us", TLMI_LANG_MAXLEN);
+	tlmi_priv.pwd_admin->encoding = TLMI_ENCODING_ASCII;
+	tlmi_priv.pwd_admin->pwd_type = "pap";
+	tlmi_priv.pwd_admin->role = "bios-admin";
+	tlmi_priv.pwd_admin->minlen = pwdcfg.min_length;
+	if (WARN_ON(pwdcfg.max_length >= TLMI_PWD_BUFSIZE))
+		pwdcfg.max_length = TLMI_PWD_BUFSIZE - 1;
+	tlmi_priv.pwd_admin->maxlen = pwdcfg.max_length;
+	if (pwdcfg.password_state & TLMI_PAP_PWD)
+		tlmi_priv.pwd_admin->valid = true;
+
+	tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
+	if (!tlmi_priv.pwd_power) {
+		ret = -ENOMEM;
+		goto fail_clear_attr;
+	}
+	strscpy(tlmi_priv.pwd_power->display_name, "power-on", TLMI_PWDTYPE_MAXLEN);
+	strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN);
+	tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII;
+	tlmi_priv.pwd_power->pwd_type = "pop";
+	tlmi_priv.pwd_power->role = "power-on";
+	tlmi_priv.pwd_power->minlen = pwdcfg.min_length;
+	tlmi_priv.pwd_power->maxlen = pwdcfg.max_length;
+
+	if (pwdcfg.password_state & TLMI_POP_PWD)
+		tlmi_priv.pwd_power->valid = true;
+
+	return 0;
+
+fail_clear_attr:
+	for (i = 0; i < TLMI_SETTINGS_COUNT; ++i)
+		kfree(tlmi_priv.setting[i]);
+	return ret;
+}
+
+static void tlmi_remove(struct wmi_device *wdev)
+{
+	tlmi_release_attr();
+	device_destroy(fw_attr_class, MKDEV(0, 0));
+	fw_attributes_class_put();
+}
+
+static int tlmi_probe(struct wmi_device *wdev, const void *context)
+{
+	tlmi_analyze();
+	return tlmi_sysfs_init();
+}
+
+static const struct wmi_device_id tlmi_id_table[] = {
+	{ .guid_string = LENOVO_BIOS_SETTING_GUID },
+	{ }
+};
+
+static struct wmi_driver tlmi_driver = {
+	.driver = {
+		.name = "think-lmi",
+	},
+	.id_table = tlmi_id_table,
+	.probe = tlmi_probe,
+	.remove = tlmi_remove,
+};
+
+MODULE_AUTHOR("Sugumaran L <slacshiminar@lenovo.com>");
+MODULE_AUTHOR("Mark Pearson <markpearson@lenovo.com>");
+MODULE_AUTHOR("Corentin Chary <corentin.chary@gmail.com>");
+MODULE_DESCRIPTION("ThinkLMI Driver");
+MODULE_LICENSE("GPL");
+
+module_wmi_driver(tlmi_driver);
diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h
new file mode 100644
index 000000000000..6cd5325cc50e
--- /dev/null
+++ b/drivers/platform/x86/think-lmi.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _THINK_LMI_H_
+#define _THINK_LMI_H_
+
+#include <linux/types.h>
+
+#define TLMI_SETTINGS_COUNT  256
+#define TLMI_SETTINGS_MAXLEN 512
+#define TLMI_PWD_BUFSIZE     129
+#define TLMI_PWDTYPE_MAXLEN   64
+#define TLMI_ENC_MAXLEN       64
+#define TLMI_LANG_MAXLEN       4
+#define TLMI_PWDTYPE_LEN       4
+/*
+ * Longest string should be in the set command: allow size of BIOS
+ * option and choice
+ */
+#define TLMI_GETSET_MAXLEN (TLMI_SETTINGS_MAXLEN + TLMI_SETTINGS_MAXLEN)
+
+/* Possible error values */
+struct tlmi_err_codes {
+	const char *err_str;
+	int err_code;
+};
+
+enum encoding_option {
+	TLMI_ENCODING_ASCII,
+	TLMI_ENCODING_SCANCODE,
+};
+
+/* password configuration details */
+struct tlmi_pwdcfg {
+	uint32_t password_mode;
+	uint32_t password_state;
+	uint32_t min_length;
+	uint32_t max_length;
+	uint32_t supported_encodings;
+	uint32_t supported_keyboard;
+};
+
+/* password setting details */
+struct tlmi_pwd_setting {
+	struct kobject kobj;
+	bool valid;
+	char display_name[TLMI_PWDTYPE_MAXLEN];
+	char password[TLMI_PWD_BUFSIZE];
+	const char *pwd_type;
+	const char *role;
+	int minlen;
+	int maxlen;
+	enum encoding_option encoding;
+	char kbdlang[TLMI_LANG_MAXLEN];
+};
+
+/* Attribute setting details */
+struct tlmi_attr_setting {
+	struct kobject kobj;
+	int index;
+	char display_name[TLMI_SETTINGS_MAXLEN];
+	char *possible_values;
+};
+
+struct think_lmi {
+	struct wmi_device *wmi_device;
+
+	int settings_count;
+	bool can_set_bios_settings;
+	bool can_get_bios_selections;
+	bool can_set_bios_password;
+	bool can_get_password_settings;
+
+	struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT];
+	struct device *class_dev;
+	struct kset *attribute_kset;
+	struct kset *authentication_kset;
+	struct tlmi_pwd_setting *pwd_admin;
+	struct tlmi_pwd_setting *pwd_power;
+};
+
+#endif /* !_THINK_LMI_H_ */

From 14227ce92a402f7a3d51d05dae14d9d22211e501 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 31 May 2021 15:59:11 +0200
Subject: [PATCH 1940/3292] platform/x86: thinkpad-lmi: Remove unused
 display_name member from struct tlmi_pwd_setting

The struct tlmi_pwd_setting display_name member is initialized,
but never read. Remove it and the TLMI_PWDTYPE_MAXLEN define.

While at it also remove some other unused [MAX]LEN defines.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210531135911.82582-1-hdegoede@redhat.com
---
 drivers/platform/x86/think-lmi.c | 2 --
 drivers/platform/x86/think-lmi.h | 9 ---------
 2 files changed, 11 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 854427fed1a9..782d8e3fe7a1 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -819,7 +819,6 @@ static int tlmi_analyze(void)
 		ret = -ENOMEM;
 		goto fail_clear_attr;
 	}
-	strscpy(tlmi_priv.pwd_admin->display_name, "admin", TLMI_PWDTYPE_MAXLEN);
 	strscpy(tlmi_priv.pwd_admin->kbdlang, "us", TLMI_LANG_MAXLEN);
 	tlmi_priv.pwd_admin->encoding = TLMI_ENCODING_ASCII;
 	tlmi_priv.pwd_admin->pwd_type = "pap";
@@ -836,7 +835,6 @@ static int tlmi_analyze(void)
 		ret = -ENOMEM;
 		goto fail_clear_attr;
 	}
-	strscpy(tlmi_priv.pwd_power->display_name, "power-on", TLMI_PWDTYPE_MAXLEN);
 	strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN);
 	tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII;
 	tlmi_priv.pwd_power->pwd_type = "pop";
diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h
index 6cd5325cc50e..6fa8da7af6c7 100644
--- a/drivers/platform/x86/think-lmi.h
+++ b/drivers/platform/x86/think-lmi.h
@@ -8,15 +8,7 @@
 #define TLMI_SETTINGS_COUNT  256
 #define TLMI_SETTINGS_MAXLEN 512
 #define TLMI_PWD_BUFSIZE     129
-#define TLMI_PWDTYPE_MAXLEN   64
-#define TLMI_ENC_MAXLEN       64
 #define TLMI_LANG_MAXLEN       4
-#define TLMI_PWDTYPE_LEN       4
-/*
- * Longest string should be in the set command: allow size of BIOS
- * option and choice
- */
-#define TLMI_GETSET_MAXLEN (TLMI_SETTINGS_MAXLEN + TLMI_SETTINGS_MAXLEN)
 
 /* Possible error values */
 struct tlmi_err_codes {
@@ -43,7 +35,6 @@ struct tlmi_pwdcfg {
 struct tlmi_pwd_setting {
 	struct kobject kobj;
 	bool valid;
-	char display_name[TLMI_PWDTYPE_MAXLEN];
 	char password[TLMI_PWD_BUFSIZE];
 	const char *pwd_type;
 	const char *role;

From ae8ee4c1e43af131088bb2da1163fdb864f6f6a2 Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Thu, 3 Jun 2021 23:39:36 +0800
Subject: [PATCH 1941/3292] platform/x86: dell-wmi-sysman: fw_attr_inuse can be
 static

drivers/platform/x86/firmware_attributes_class.c:11:5: warning: symbol 'fw_attr_inuse' was not declared. Should it be static?

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20210603153936.GA65404@7832cb195c0b
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/firmware_attributes_class.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/firmware_attributes_class.c b/drivers/platform/x86/firmware_attributes_class.c
index d62ec3d71ede..fafe8eaf6e3e 100644
--- a/drivers/platform/x86/firmware_attributes_class.c
+++ b/drivers/platform/x86/firmware_attributes_class.c
@@ -8,7 +8,7 @@
 #include "firmware_attributes_class.h"
 
 static DEFINE_MUTEX(fw_attr_lock);
-int fw_attr_inuse;
+static int fw_attr_inuse;
 
 static struct class firmware_attributes_class = {
 	.name = "firmware-attributes",

From a32348b743eec51ac01334735a0b6c979157b132 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:25:40 +0200
Subject: [PATCH 1942/3292] platform/surface: dtx: Add missing mutex_destroy()
 call in failure path

When we fail to open the device file due to DTX being shut down, the
mutex is initialized but never destroyed. We are destroying it when
releasing the file, so add the missing call in the failure path as well.

Fixes: 1d609992832e ("platform/surface: Add DTX driver")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210604132540.533036-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_dtx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/surface/surface_dtx.c b/drivers/platform/surface/surface_dtx.c
index 5d9b758a99bb..1203b9a82993 100644
--- a/drivers/platform/surface/surface_dtx.c
+++ b/drivers/platform/surface/surface_dtx.c
@@ -427,6 +427,7 @@ static int surface_dtx_open(struct inode *inode, struct file *file)
 	 */
 	if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &ddev->flags)) {
 		up_write(&ddev->client_lock);
+		mutex_destroy(&client->read_lock);
 		sdtx_device_put(client->ddev);
 		kfree(client);
 		return -ENODEV;

From 3d9907e181de05a32420db46b068b2557173a9f7 Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@nvidia.com>
Date: Thu, 3 Jun 2021 20:28:27 +0300
Subject: [PATCH 1943/3292] platform/mellanox: mlxreg-hotplug: Revert "move to
 use request_irq by IRQF_NO_AUTOEN flag"

It causes mlxreg-hotplug probing failure: request_threaded_irq()
 returns -EINVAL due to true value of condition:
((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN))
after flag "IRQF_NO_AUTOEN" has been added to:
	err = devm_request_irq(&pdev->dev, priv->irq,
			       mlxreg_hotplug_irq_handler, IRQF_TRIGGER_FALLING
			       | IRQF_SHARED | IRQF_NO_AUTOEN,
			       "mlxreg-hotplug", priv);

This reverts commit bee3ecfed0fc ("platform/mellanox: mlxreg-hotplug: move
to use request_irq by IRQF_NO_AUTOEN flag").

Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
Acked-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20210603172827.2599908-1-c_mykolak@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/mellanox/mlxreg-hotplug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
index a9db2f32658f..b013445147dd 100644
--- a/drivers/platform/mellanox/mlxreg-hotplug.c
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -683,13 +683,13 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
 
 	err = devm_request_irq(&pdev->dev, priv->irq,
 			       mlxreg_hotplug_irq_handler, IRQF_TRIGGER_FALLING
-			       | IRQF_SHARED | IRQF_NO_AUTOEN,
-			       "mlxreg-hotplug", priv);
+			       | IRQF_SHARED, "mlxreg-hotplug", priv);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to request irq: %d\n", err);
 		return err;
 	}
 
+	disable_irq(priv->irq);
 	spin_lock_init(&priv->lock);
 	INIT_DELAYED_WORK(&priv->dwork_irq, mlxreg_hotplug_work_handler);
 	dev_set_drvdata(&pdev->dev, priv);

From 0e8512fab9fd6d78e88931c02a43b04d15566d6b Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:47:49 +0200
Subject: [PATCH 1944/3292] platform/surface: aggregator: Allow registering
 notifiers without enabling events

Currently, each SSAM event notifier is directly tied to one group of
events. This makes sense as registering a notifier will automatically
take care of enabling the corresponding event group and normally drivers
only need notifications for a very limited number of events, associated
with different callbacks for each group.

However, there are rare cases, especially for debugging, when we want to
get notifications for a whole event target category instead of just a
single group of events in that category. Registering multiple notifiers,
i.e. one per group, may be infeasible due to two issues: a) we might not
know every event enable/disable specification as some events are
auto-enabled by the EC and b) forwarding this to the same callback will
lead to duplicate events as we might not know the full event
specification to perform the appropriate filtering.

This commit introduces observer-notifiers, which are notifiers that are
not tied to a specific event group and do not attempt to manage any
events. In other words, they can be registered without enabling any
event group or incrementing the corresponding reference count and just
act as silent observers, listening to all currently/previously enabled
events based on their match-specification.

Essentially, this allows us to register one single notifier for a full
event target category, meaning that we can process all events of that
target category in a single callback without duplication. Specifically,
this will be used in the cdev debug interface to forward events to
user-space via a device file from which the events can be read.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210604134755.535590-2-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../platform/surface/aggregator/controller.c  | 69 +++++++++++--------
 include/linux/surface_aggregator/controller.h | 17 +++++
 2 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index a06964aa96e7..cd3a6b77f48d 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -2127,9 +2127,15 @@ int ssam_ctrl_notif_d0_entry(struct ssam_controller *ctrl)
  * @ctrl: The controller to register the notifier on.
  * @n:    The event notifier to register.
  *
- * Register an event notifier and increment the usage counter of the
- * associated SAM event. If the event was previously not enabled, it will be
- * enabled during this call.
+ * Register an event notifier. Increment the usage counter of the associated
+ * SAM event if the notifier is not marked as an observer. If the event is not
+ * marked as an observer and is currently not enabled, it will be enabled
+ * during this call. If the notifier is marked as an observer, no attempt will
+ * be made at enabling any event and no reference count will be modified.
+ *
+ * Notifiers marked as observers do not need to be associated with one specific
+ * event, i.e. as long as no event matching is performed, only the event target
+ * category needs to be set.
  *
  * Return: Returns zero on success, %-ENOSPC if there have already been
  * %INT_MAX notifiers for the event ID/type associated with the notifier block
@@ -2138,11 +2144,10 @@ int ssam_ctrl_notif_d0_entry(struct ssam_controller *ctrl)
  * for the specific associated event, returns the status of the event-enable
  * EC-command.
  */
-int ssam_notifier_register(struct ssam_controller *ctrl,
-			   struct ssam_event_notifier *n)
+int ssam_notifier_register(struct ssam_controller *ctrl, struct ssam_event_notifier *n)
 {
 	u16 rqid = ssh_tc_to_rqid(n->event.id.target_category);
-	struct ssam_nf_refcount_entry *entry;
+	struct ssam_nf_refcount_entry *entry = NULL;
 	struct ssam_nf_head *nf_head;
 	struct ssam_nf *nf;
 	int status;
@@ -2155,29 +2160,32 @@ int ssam_notifier_register(struct ssam_controller *ctrl,
 
 	mutex_lock(&nf->lock);
 
-	entry = ssam_nf_refcount_inc(nf, n->event.reg, n->event.id);
-	if (IS_ERR(entry)) {
-		mutex_unlock(&nf->lock);
-		return PTR_ERR(entry);
-	}
+	if (!(n->flags & SSAM_EVENT_NOTIFIER_OBSERVER)) {
+		entry = ssam_nf_refcount_inc(nf, n->event.reg, n->event.id);
+		if (IS_ERR(entry)) {
+			mutex_unlock(&nf->lock);
+			return PTR_ERR(entry);
+		}
 
-	ssam_dbg(ctrl, "enabling event (reg: %#04x, tc: %#04x, iid: %#04x, rc: %d)\n",
-		 n->event.reg.target_category, n->event.id.target_category,
-		 n->event.id.instance, entry->refcount);
+		ssam_dbg(ctrl, "enabling event (reg: %#04x, tc: %#04x, iid: %#04x, rc: %d)\n",
+			 n->event.reg.target_category, n->event.id.target_category,
+			 n->event.id.instance, entry->refcount);
+	}
 
 	status = ssam_nfblk_insert(nf_head, &n->base);
 	if (status) {
-		entry = ssam_nf_refcount_dec(nf, n->event.reg, n->event.id);
-		if (entry->refcount == 0)
-			kfree(entry);
+		if (entry) {
+			entry = ssam_nf_refcount_dec(nf, n->event.reg, n->event.id);
+			if (entry->refcount == 0)
+				kfree(entry);
+		}
 
 		mutex_unlock(&nf->lock);
 		return status;
 	}
 
-	if (entry->refcount == 1) {
-		status = ssam_ssh_event_enable(ctrl, n->event.reg, n->event.id,
-					       n->event.flags);
+	if (entry && entry->refcount == 1) {
+		status = ssam_ssh_event_enable(ctrl, n->event.reg, n->event.id, n->event.flags);
 		if (status) {
 			ssam_nfblk_remove(&n->base);
 			kfree(ssam_nf_refcount_dec(nf, n->event.reg, n->event.id));
@@ -2188,7 +2196,7 @@ int ssam_notifier_register(struct ssam_controller *ctrl,
 
 		entry->flags = n->event.flags;
 
-	} else if (entry->flags != n->event.flags) {
+	} else if (entry && entry->flags != n->event.flags) {
 		ssam_warn(ctrl,
 			  "inconsistent flags when enabling event: got %#04x, expected %#04x (reg: %#04x, tc: %#04x, iid: %#04x)\n",
 			  n->event.flags, entry->flags, n->event.reg.target_category,
@@ -2205,17 +2213,16 @@ EXPORT_SYMBOL_GPL(ssam_notifier_register);
  * @ctrl: The controller the notifier has been registered on.
  * @n:    The event notifier to unregister.
  *
- * Unregister an event notifier and decrement the usage counter of the
- * associated SAM event. If the usage counter reaches zero, the event will be
- * disabled.
+ * Unregister an event notifier. Decrement the usage counter of the associated
+ * SAM event if the notifier is not marked as an observer. If the usage counter
+ * reaches zero, the event will be disabled.
  *
  * Return: Returns zero on success, %-ENOENT if the given notifier block has
  * not been registered on the controller. If the given notifier block was the
  * last one associated with its specific event, returns the status of the
  * event-disable EC-command.
  */
-int ssam_notifier_unregister(struct ssam_controller *ctrl,
-			     struct ssam_event_notifier *n)
+int ssam_notifier_unregister(struct ssam_controller *ctrl, struct ssam_event_notifier *n)
 {
 	u16 rqid = ssh_tc_to_rqid(n->event.id.target_category);
 	struct ssam_nf_refcount_entry *entry;
@@ -2236,6 +2243,13 @@ int ssam_notifier_unregister(struct ssam_controller *ctrl,
 		return -ENOENT;
 	}
 
+	/*
+	 * If this is an observer notifier, do not attempt to disable the
+	 * event, just remove it.
+	 */
+	if (n->flags & SSAM_EVENT_NOTIFIER_OBSERVER)
+		goto remove;
+
 	entry = ssam_nf_refcount_dec(nf, n->event.reg, n->event.id);
 	if (WARN_ON(!entry)) {
 		/*
@@ -2260,8 +2274,7 @@ int ssam_notifier_unregister(struct ssam_controller *ctrl,
 	}
 
 	if (entry->refcount == 0) {
-		status = ssam_ssh_event_disable(ctrl, n->event.reg, n->event.id,
-						n->event.flags);
+		status = ssam_ssh_event_disable(ctrl, n->event.reg, n->event.id, n->event.flags);
 		kfree(entry);
 	}
 
diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h
index 0806796eabcb..cf4bb48a850e 100644
--- a/include/linux/surface_aggregator/controller.h
+++ b/include/linux/surface_aggregator/controller.h
@@ -795,6 +795,20 @@ enum ssam_event_mask {
 #define SSAM_EVENT_REGISTRY_REG \
 	SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, 0x02, 0x01, 0x02)
 
+/**
+ * enum ssam_event_notifier_flags - Flags for event notifiers.
+ * @SSAM_EVENT_NOTIFIER_OBSERVER:
+ *	The corresponding notifier acts as observer. Registering a notifier
+ *	with this flag set will not attempt to enable any event. Equally,
+ *	unregistering will not attempt to disable any event. Note that a
+ *	notifier with this flag may not even correspond to a certain event at
+ *	all, only to a specific event target category. Event matching will not
+ *	be influenced by this flag.
+ */
+enum ssam_event_notifier_flags {
+	SSAM_EVENT_NOTIFIER_OBSERVER = BIT(0),
+};
+
 /**
  * struct ssam_event_notifier - Notifier block for SSAM events.
  * @base:        The base notifier block with callback function and priority.
@@ -803,6 +817,7 @@ enum ssam_event_mask {
  * @event.id:    ID specifying the event.
  * @event.mask:  Flags determining how events are matched to the notifier.
  * @event.flags: Flags used for enabling the event.
+ * @flags:       Notifier flags (see &enum ssam_event_notifier_flags).
  */
 struct ssam_event_notifier {
 	struct ssam_notifier_block base;
@@ -813,6 +828,8 @@ struct ssam_event_notifier {
 		enum ssam_event_mask mask;
 		u8 flags;
 	} event;
+
+	unsigned long flags;
 };
 
 int ssam_notifier_register(struct ssam_controller *ctrl,

From 4b38a1dcf378f5075884b54dc5afeb9d0dfe7681 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:47:50 +0200
Subject: [PATCH 1945/3292] platform/surface: aggregator: Allow enabling of
 events without notifiers

We can already enable and disable SAM events via one of two ways: either
via a (non-observer) notifier tied to a specific event group, or a
generic event enable/disable request. In some instances, however,
neither method may be desirable.

The first method will tie the event enable request to a specific
notifier, however, when we want to receive notifications for multiple
event groups of the same target category and forward this to the same
notifier callback, we may receive duplicate events, i.e. one event per
registered notifier. The second method will bypass the internal
reference counting mechanism, meaning that a disable request will
disable the event regardless of any other client driver using it, which
may break the functionality of that driver.

To address this problem, add new functions that allow enabling and
disabling of events via the event reference counting mechanism built
into the controller, without needing to register a notifier.

This can then be used in combination with observer notifiers to process
multiple events of the same target category without duplication in the
same callback function.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210604134755.535590-3-luzmaximilian@gmail.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../platform/surface/aggregator/controller.c  | 293 +++++++++++++++---
 include/linux/surface_aggregator/controller.h |   8 +
 2 files changed, 253 insertions(+), 48 deletions(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index cd3a6b77f48d..cedd0f779f7a 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -407,6 +407,31 @@ ssam_nf_refcount_dec(struct ssam_nf *nf, struct ssam_event_registry reg,
 	return NULL;
 }
 
+/**
+ * ssam_nf_refcount_dec_free() - Decrement reference-/activation-count of the
+ * given event and free its entry if the reference count reaches zero.
+ * @nf:  The notifier system reference.
+ * @reg: The registry used to enable/disable the event.
+ * @id:  The event ID.
+ *
+ * Decrements the reference-/activation-count of the specified event, freeing
+ * its entry if it reaches zero.
+ *
+ * Note: ``nf->lock`` must be held when calling this function.
+ */
+static void ssam_nf_refcount_dec_free(struct ssam_nf *nf,
+				      struct ssam_event_registry reg,
+				      struct ssam_event_id id)
+{
+	struct ssam_nf_refcount_entry *entry;
+
+	lockdep_assert_held(&nf->lock);
+
+	entry = ssam_nf_refcount_dec(nf, reg, id);
+	if (entry && entry->refcount == 0)
+		kfree(entry);
+}
+
 /**
  * ssam_nf_refcount_empty() - Test if the notification system has any
  * enabled/active events.
@@ -2122,6 +2147,109 @@ int ssam_ctrl_notif_d0_entry(struct ssam_controller *ctrl)
 
 /* -- Top-level event registry interface. ----------------------------------- */
 
+/**
+ * ssam_nf_refcount_enable() - Enable event for reference count entry if it has
+ * not already been enabled.
+ * @ctrl:  The controller to enable the event on.
+ * @entry: The reference count entry for the event to be enabled.
+ * @flags: The flags used for enabling the event on the EC.
+ *
+ * Enable the event associated with the given reference count entry if the
+ * reference count equals one, i.e. the event has not previously been enabled.
+ * If the event has already been enabled (i.e. reference count not equal to
+ * one), check that the flags used for enabling match and warn about this if
+ * they do not.
+ *
+ * This does not modify the reference count itself, which is done with
+ * ssam_nf_refcount_inc() / ssam_nf_refcount_dec().
+ *
+ * Note: ``nf->lock`` must be held when calling this function.
+ *
+ * Return: Returns zero on success. If the event is enabled by this call,
+ * returns the status of the event-enable EC command.
+ */
+static int ssam_nf_refcount_enable(struct ssam_controller *ctrl,
+				   struct ssam_nf_refcount_entry *entry, u8 flags)
+{
+	const struct ssam_event_registry reg = entry->key.reg;
+	const struct ssam_event_id id = entry->key.id;
+	struct ssam_nf *nf = &ctrl->cplt.event.notif;
+	int status;
+
+	lockdep_assert_held(&nf->lock);
+
+	ssam_dbg(ctrl, "enabling event (reg: %#04x, tc: %#04x, iid: %#04x, rc: %d)\n",
+		 reg.target_category, id.target_category, id.instance, entry->refcount);
+
+	if (entry->refcount == 1) {
+		status = ssam_ssh_event_enable(ctrl, reg, id, flags);
+		if (status)
+			return status;
+
+		entry->flags = flags;
+
+	} else if (entry->flags != flags) {
+		ssam_warn(ctrl,
+			  "inconsistent flags when enabling event: got %#04x, expected %#04x (reg: %#04x, tc: %#04x, iid: %#04x)\n",
+			  flags, entry->flags, reg.target_category, id.target_category,
+			  id.instance);
+	}
+
+	return 0;
+}
+
+/**
+ * ssam_nf_refcount_disable_free() - Disable event for reference count entry if it is
+ * no longer in use and free the corresponding entry.
+ * @ctrl:  The controller to disable the event on.
+ * @entry: The reference count entry for the event to be disabled.
+ * @flags: The flags used for enabling the event on the EC.
+ *
+ * If the reference count equals zero, i.e. the event is no longer requested by
+ * any client, the event will be disabled and the corresponding reference count
+ * entry freed. The reference count entry must not be used any more after a
+ * call to this function.
+ *
+ * Also checks if the flags used for disabling the event match the flags used
+ * for enabling the event and warns if they do not (regardless of reference
+ * count).
+ *
+ * This does not modify the reference count itself, which is done with
+ * ssam_nf_refcount_inc() / ssam_nf_refcount_dec().
+ *
+ * Note: ``nf->lock`` must be held when calling this function.
+ *
+ * Return: Returns zero on success. If the event is disabled by this call,
+ * returns the status of the event-enable EC command.
+ */
+static int ssam_nf_refcount_disable_free(struct ssam_controller *ctrl,
+					 struct ssam_nf_refcount_entry *entry, u8 flags)
+{
+	const struct ssam_event_registry reg = entry->key.reg;
+	const struct ssam_event_id id = entry->key.id;
+	struct ssam_nf *nf = &ctrl->cplt.event.notif;
+	int status;
+
+	lockdep_assert_held(&nf->lock);
+
+	ssam_dbg(ctrl, "disabling event (reg: %#04x, tc: %#04x, iid: %#04x, rc: %d)\n",
+		 reg.target_category, id.target_category, id.instance, entry->refcount);
+
+	if (entry->flags != flags) {
+		ssam_warn(ctrl,
+			  "inconsistent flags when disabling event: got %#04x, expected %#04x (reg: %#04x, tc: %#04x, iid: %#04x)\n",
+			  flags, entry->flags, reg.target_category, id.target_category,
+			  id.instance);
+	}
+
+	if (entry->refcount == 0) {
+		status = ssam_ssh_event_disable(ctrl, reg, id, flags);
+		kfree(entry);
+	}
+
+	return status;
+}
+
 /**
  * ssam_notifier_register() - Register an event notifier.
  * @ctrl: The controller to register the notifier on.
@@ -2166,41 +2294,26 @@ int ssam_notifier_register(struct ssam_controller *ctrl, struct ssam_event_notif
 			mutex_unlock(&nf->lock);
 			return PTR_ERR(entry);
 		}
-
-		ssam_dbg(ctrl, "enabling event (reg: %#04x, tc: %#04x, iid: %#04x, rc: %d)\n",
-			 n->event.reg.target_category, n->event.id.target_category,
-			 n->event.id.instance, entry->refcount);
 	}
 
 	status = ssam_nfblk_insert(nf_head, &n->base);
 	if (status) {
-		if (entry) {
-			entry = ssam_nf_refcount_dec(nf, n->event.reg, n->event.id);
-			if (entry->refcount == 0)
-				kfree(entry);
-		}
+		if (entry)
+			ssam_nf_refcount_dec_free(nf, n->event.reg, n->event.id);
 
 		mutex_unlock(&nf->lock);
 		return status;
 	}
 
-	if (entry && entry->refcount == 1) {
-		status = ssam_ssh_event_enable(ctrl, n->event.reg, n->event.id, n->event.flags);
+	if (entry) {
+		status = ssam_nf_refcount_enable(ctrl, entry, n->event.flags);
 		if (status) {
 			ssam_nfblk_remove(&n->base);
-			kfree(ssam_nf_refcount_dec(nf, n->event.reg, n->event.id));
+			ssam_nf_refcount_dec_free(nf, n->event.reg, n->event.id);
 			mutex_unlock(&nf->lock);
 			synchronize_srcu(&nf_head->srcu);
 			return status;
 		}
-
-		entry->flags = n->event.flags;
-
-	} else if (entry && entry->flags != n->event.flags) {
-		ssam_warn(ctrl,
-			  "inconsistent flags when enabling event: got %#04x, expected %#04x (reg: %#04x, tc: %#04x, iid: %#04x)\n",
-			  n->event.flags, entry->flags, n->event.reg.target_category,
-			  n->event.id.target_category, n->event.id.instance);
 	}
 
 	mutex_unlock(&nf->lock);
@@ -2247,35 +2360,20 @@ int ssam_notifier_unregister(struct ssam_controller *ctrl, struct ssam_event_not
 	 * If this is an observer notifier, do not attempt to disable the
 	 * event, just remove it.
 	 */
-	if (n->flags & SSAM_EVENT_NOTIFIER_OBSERVER)
-		goto remove;
+	if (!(n->flags & SSAM_EVENT_NOTIFIER_OBSERVER)) {
+		entry = ssam_nf_refcount_dec(nf, n->event.reg, n->event.id);
+		if (WARN_ON(!entry)) {
+			/*
+			 * If this does not return an entry, there's a logic
+			 * error somewhere: The notifier block is registered,
+			 * but the event refcount entry is not there. Remove
+			 * the notifier block anyways.
+			 */
+			status = -ENOENT;
+			goto remove;
+		}
 
-	entry = ssam_nf_refcount_dec(nf, n->event.reg, n->event.id);
-	if (WARN_ON(!entry)) {
-		/*
-		 * If this does not return an entry, there's a logic error
-		 * somewhere: The notifier block is registered, but the event
-		 * refcount entry is not there. Remove the notifier block
-		 * anyways.
-		 */
-		status = -ENOENT;
-		goto remove;
-	}
-
-	ssam_dbg(ctrl, "disabling event (reg: %#04x, tc: %#04x, iid: %#04x, rc: %d)\n",
-		 n->event.reg.target_category, n->event.id.target_category,
-		 n->event.id.instance, entry->refcount);
-
-	if (entry->flags != n->event.flags) {
-		ssam_warn(ctrl,
-			  "inconsistent flags when disabling event: got %#04x, expected %#04x (reg: %#04x, tc: %#04x, iid: %#04x)\n",
-			  n->event.flags, entry->flags, n->event.reg.target_category,
-			  n->event.id.target_category, n->event.id.instance);
-	}
-
-	if (entry->refcount == 0) {
-		status = ssam_ssh_event_disable(ctrl, n->event.reg, n->event.id, n->event.flags);
-		kfree(entry);
+		status = ssam_nf_refcount_disable_free(ctrl, entry, n->event.flags);
 	}
 
 remove:
@@ -2287,6 +2385,105 @@ remove:
 }
 EXPORT_SYMBOL_GPL(ssam_notifier_unregister);
 
+/**
+ * ssam_controller_event_enable() - Enable the specified event.
+ * @ctrl:  The controller to enable the event for.
+ * @reg:   The event registry to use for enabling the event.
+ * @id:    The event ID specifying the event to be enabled.
+ * @flags: The SAM event flags used for enabling the event.
+ *
+ * Increment the event reference count of the specified event. If the event has
+ * not been enabled previously, it will be enabled by this call.
+ *
+ * Note: In general, ssam_notifier_register() with a non-observer notifier
+ * should be preferred for enabling/disabling events, as this will guarantee
+ * proper ordering and event forwarding in case of errors during event
+ * enabling/disabling.
+ *
+ * Return: Returns zero on success, %-ENOSPC if the reference count for the
+ * specified event has reached its maximum, %-ENOMEM if the corresponding event
+ * entry could not be allocated. If this is the first time that this event has
+ * been enabled (i.e. the reference count was incremented from zero to one by
+ * this call), returns the status of the event-enable EC-command.
+ */
+int ssam_controller_event_enable(struct ssam_controller *ctrl,
+				 struct ssam_event_registry reg,
+				 struct ssam_event_id id, u8 flags)
+{
+	u16 rqid = ssh_tc_to_rqid(id.target_category);
+	struct ssam_nf *nf = &ctrl->cplt.event.notif;
+	struct ssam_nf_refcount_entry *entry;
+	int status;
+
+	if (!ssh_rqid_is_event(rqid))
+		return -EINVAL;
+
+	mutex_lock(&nf->lock);
+
+	entry = ssam_nf_refcount_inc(nf, reg, id);
+	if (IS_ERR(entry)) {
+		mutex_unlock(&nf->lock);
+		return PTR_ERR(entry);
+	}
+
+	status = ssam_nf_refcount_enable(ctrl, entry, flags);
+	if (status) {
+		ssam_nf_refcount_dec_free(nf, reg, id);
+		mutex_unlock(&nf->lock);
+		return status;
+	}
+
+	mutex_unlock(&nf->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ssam_controller_event_enable);
+
+/**
+ * ssam_controller_event_disable() - Disable the specified event.
+ * @ctrl:  The controller to disable the event for.
+ * @reg:   The event registry to use for disabling the event.
+ * @id:    The event ID specifying the event to be disabled.
+ * @flags: The flags used when enabling the event.
+ *
+ * Decrement the reference count of the specified event. If the reference count
+ * reaches zero, the event will be disabled.
+ *
+ * Note: In general, ssam_notifier_register()/ssam_notifier_unregister() with a
+ * non-observer notifier should be preferred for enabling/disabling events, as
+ * this will guarantee proper ordering and event forwarding in case of errors
+ * during event enabling/disabling.
+ *
+ * Return: Returns zero on success, %-ENOENT if the given event has not been
+ * enabled on the controller. If the reference count of the event reaches zero
+ * during this call, returns the status of the event-disable EC-command.
+ */
+int ssam_controller_event_disable(struct ssam_controller *ctrl,
+				  struct ssam_event_registry reg,
+				  struct ssam_event_id id, u8 flags)
+{
+	u16 rqid = ssh_tc_to_rqid(id.target_category);
+	struct ssam_nf *nf = &ctrl->cplt.event.notif;
+	struct ssam_nf_refcount_entry *entry;
+	int status = 0;
+
+	if (!ssh_rqid_is_event(rqid))
+		return -EINVAL;
+
+	mutex_lock(&nf->lock);
+
+	entry = ssam_nf_refcount_dec(nf, reg, id);
+	if (!entry) {
+		mutex_unlock(&nf->lock);
+		return -ENOENT;
+	}
+
+	status = ssam_nf_refcount_disable_free(ctrl, entry, flags);
+
+	mutex_unlock(&nf->lock);
+	return status;
+}
+EXPORT_SYMBOL_GPL(ssam_controller_event_disable);
+
 /**
  * ssam_notifier_disable_registered() - Disable events for all registered
  * notifiers.
diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h
index cf4bb48a850e..7965bdc669c5 100644
--- a/include/linux/surface_aggregator/controller.h
+++ b/include/linux/surface_aggregator/controller.h
@@ -838,4 +838,12 @@ int ssam_notifier_register(struct ssam_controller *ctrl,
 int ssam_notifier_unregister(struct ssam_controller *ctrl,
 			     struct ssam_event_notifier *n);
 
+int ssam_controller_event_enable(struct ssam_controller *ctrl,
+				 struct ssam_event_registry reg,
+				 struct ssam_event_id id, u8 flags);
+
+int ssam_controller_event_disable(struct ssam_controller *ctrl,
+				  struct ssam_event_registry reg,
+				  struct ssam_event_id id, u8 flags);
+
 #endif /* _LINUX_SURFACE_AGGREGATOR_CONTROLLER_H */

From b2763358feb28590f6b52a4c95c94a645dadfb26 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:47:51 +0200
Subject: [PATCH 1946/3292] platform/surface: aggregator: Update copyright

It's 2021, update the copyright accordingly.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210604134755.535590-4-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/Kconfig             | 2 +-
 drivers/platform/surface/aggregator/Makefile            | 2 +-
 drivers/platform/surface/aggregator/bus.c               | 2 +-
 drivers/platform/surface/aggregator/bus.h               | 2 +-
 drivers/platform/surface/aggregator/controller.c        | 2 +-
 drivers/platform/surface/aggregator/controller.h        | 2 +-
 drivers/platform/surface/aggregator/core.c              | 2 +-
 drivers/platform/surface/aggregator/ssh_msgb.h          | 2 +-
 drivers/platform/surface/aggregator/ssh_packet_layer.c  | 2 +-
 drivers/platform/surface/aggregator/ssh_packet_layer.h  | 2 +-
 drivers/platform/surface/aggregator/ssh_parser.c        | 2 +-
 drivers/platform/surface/aggregator/ssh_parser.h        | 2 +-
 drivers/platform/surface/aggregator/ssh_request_layer.c | 2 +-
 drivers/platform/surface/aggregator/ssh_request_layer.h | 2 +-
 drivers/platform/surface/aggregator/trace.h             | 2 +-
 include/linux/surface_aggregator/controller.h           | 2 +-
 include/linux/surface_aggregator/device.h               | 2 +-
 include/linux/surface_aggregator/serial_hub.h           | 2 +-
 18 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/platform/surface/aggregator/Kconfig b/drivers/platform/surface/aggregator/Kconfig
index 3aaeea9f0433..fd6dc452f3e8 100644
--- a/drivers/platform/surface/aggregator/Kconfig
+++ b/drivers/platform/surface/aggregator/Kconfig
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0+
-# Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+# Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
 
 menuconfig SURFACE_AGGREGATOR
 	tristate "Microsoft Surface System Aggregator Module Subsystem and Drivers"
diff --git a/drivers/platform/surface/aggregator/Makefile b/drivers/platform/surface/aggregator/Makefile
index c112e2c7112b..c8498c41e758 100644
--- a/drivers/platform/surface/aggregator/Makefile
+++ b/drivers/platform/surface/aggregator/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0+
-# Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+# Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
 
 # For include/trace/define_trace.h to include trace.h
 CFLAGS_core.o = -I$(src)
diff --git a/drivers/platform/surface/aggregator/bus.c b/drivers/platform/surface/aggregator/bus.c
index a9b660af0917..0169677c243e 100644
--- a/drivers/platform/surface/aggregator/bus.c
+++ b/drivers/platform/surface/aggregator/bus.c
@@ -2,7 +2,7 @@
 /*
  * Surface System Aggregator Module bus and device integration.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #include <linux/device.h>
diff --git a/drivers/platform/surface/aggregator/bus.h b/drivers/platform/surface/aggregator/bus.h
index 7712baaed6a5..ed032c2cbdb2 100644
--- a/drivers/platform/surface/aggregator/bus.h
+++ b/drivers/platform/surface/aggregator/bus.h
@@ -2,7 +2,7 @@
 /*
  * Surface System Aggregator Module bus and device integration.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _SURFACE_AGGREGATOR_BUS_H
diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index cedd0f779f7a..6646f4d6e10d 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -2,7 +2,7 @@
 /*
  * Main SSAM/SSH controller structure and functionality.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #include <linux/acpi.h>
diff --git a/drivers/platform/surface/aggregator/controller.h b/drivers/platform/surface/aggregator/controller.h
index 8297d34e7489..a0963c3562ff 100644
--- a/drivers/platform/surface/aggregator/controller.h
+++ b/drivers/platform/surface/aggregator/controller.h
@@ -2,7 +2,7 @@
 /*
  * Main SSAM/SSH controller structure and functionality.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _SURFACE_AGGREGATOR_CONTROLLER_H
diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c
index 8dc2c267bcd6..5d780e55f4a1 100644
--- a/drivers/platform/surface/aggregator/core.c
+++ b/drivers/platform/surface/aggregator/core.c
@@ -7,7 +7,7 @@
  * Handles communication via requests as well as enabling, disabling, and
  * relaying of events.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #include <linux/acpi.h>
diff --git a/drivers/platform/surface/aggregator/ssh_msgb.h b/drivers/platform/surface/aggregator/ssh_msgb.h
index 1221f642dda1..e562958ffdf0 100644
--- a/drivers/platform/surface/aggregator/ssh_msgb.h
+++ b/drivers/platform/surface/aggregator/ssh_msgb.h
@@ -2,7 +2,7 @@
 /*
  * SSH message builder functions.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _SURFACE_AGGREGATOR_SSH_MSGB_H
diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.c b/drivers/platform/surface/aggregator/ssh_packet_layer.c
index 15d96eac6811..5e08049fc3ac 100644
--- a/drivers/platform/surface/aggregator/ssh_packet_layer.c
+++ b/drivers/platform/surface/aggregator/ssh_packet_layer.c
@@ -2,7 +2,7 @@
 /*
  * SSH packet transport layer.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #include <asm/unaligned.h>
diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.h b/drivers/platform/surface/aggregator/ssh_packet_layer.h
index e8757d03f279..2eb329f0b91a 100644
--- a/drivers/platform/surface/aggregator/ssh_packet_layer.h
+++ b/drivers/platform/surface/aggregator/ssh_packet_layer.h
@@ -2,7 +2,7 @@
 /*
  * SSH packet transport layer.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _SURFACE_AGGREGATOR_SSH_PACKET_LAYER_H
diff --git a/drivers/platform/surface/aggregator/ssh_parser.c b/drivers/platform/surface/aggregator/ssh_parser.c
index e2dead8de94a..b77912f8f13b 100644
--- a/drivers/platform/surface/aggregator/ssh_parser.c
+++ b/drivers/platform/surface/aggregator/ssh_parser.c
@@ -2,7 +2,7 @@
 /*
  * SSH message parser.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #include <asm/unaligned.h>
diff --git a/drivers/platform/surface/aggregator/ssh_parser.h b/drivers/platform/surface/aggregator/ssh_parser.h
index 63c38d350988..3bd6e180fd16 100644
--- a/drivers/platform/surface/aggregator/ssh_parser.h
+++ b/drivers/platform/surface/aggregator/ssh_parser.h
@@ -2,7 +2,7 @@
 /*
  * SSH message parser.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _SURFACE_AGGREGATOR_SSH_PARSER_H
diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c
index 52a83a8fcf82..bfe1aaf38065 100644
--- a/drivers/platform/surface/aggregator/ssh_request_layer.c
+++ b/drivers/platform/surface/aggregator/ssh_request_layer.c
@@ -2,7 +2,7 @@
 /*
  * SSH request transport layer.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #include <asm/unaligned.h>
diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.h b/drivers/platform/surface/aggregator/ssh_request_layer.h
index cb35815858d1..9c3cbae2d4bd 100644
--- a/drivers/platform/surface/aggregator/ssh_request_layer.h
+++ b/drivers/platform/surface/aggregator/ssh_request_layer.h
@@ -2,7 +2,7 @@
 /*
  * SSH request transport layer.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _SURFACE_AGGREGATOR_SSH_REQUEST_LAYER_H
diff --git a/drivers/platform/surface/aggregator/trace.h b/drivers/platform/surface/aggregator/trace.h
index eb332bb53ae4..de64cf169060 100644
--- a/drivers/platform/surface/aggregator/trace.h
+++ b/drivers/platform/surface/aggregator/trace.h
@@ -2,7 +2,7 @@
 /*
  * Trace points for SSAM/SSH.
  *
- * Copyright (C) 2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2020-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #undef TRACE_SYSTEM
diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h
index 7965bdc669c5..068e1982ad37 100644
--- a/include/linux/surface_aggregator/controller.h
+++ b/include/linux/surface_aggregator/controller.h
@@ -6,7 +6,7 @@
  * managing access and communication to and from the SSAM EC, as well as main
  * communication structures and definitions.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _LINUX_SURFACE_AGGREGATOR_CONTROLLER_H
diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h
index 6ff9c58b3e17..f636c5310321 100644
--- a/include/linux/surface_aggregator/device.h
+++ b/include/linux/surface_aggregator/device.h
@@ -7,7 +7,7 @@
  * Provides support for non-platform/non-ACPI SSAM clients via dedicated
  * subsystem.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _LINUX_SURFACE_AGGREGATOR_DEVICE_H
diff --git a/include/linux/surface_aggregator/serial_hub.h b/include/linux/surface_aggregator/serial_hub.h
index 64276fbfa1d5..c3de43edcffa 100644
--- a/include/linux/surface_aggregator/serial_hub.h
+++ b/include/linux/surface_aggregator/serial_hub.h
@@ -6,7 +6,7 @@
  * Surface System Aggregator Module (SSAM). Provides the interface for basic
  * packet- and request-based communication with the SSAM EC via SSH.
  *
- * Copyright (C) 2019-2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2019-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _LINUX_SURFACE_AGGREGATOR_SERIAL_HUB_H

From 776c53c6a448905d8b9b161805b67f82301bfe91 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:47:52 +0200
Subject: [PATCH 1947/3292] platform/surface: aggregator_cdev: Add support for
 forwarding events to user-space

Currently, debugging unknown events requires writing a custom driver.
This is somewhat difficult, slow to adapt, and not entirely
user-friendly for quickly trying to figure out things on devices of some
third-party user. We can do better. We already have a user-space
interface intended for debugging SAM EC requests, so let's add support
for receiving events to that.

This commit provides support for receiving events by reading from the
controller file. It additionally introduces two new IOCTLs to control
which event categories will be forwarded. Specifically, a user-space
client can specify which target categories it wants to receive events
from by registering the corresponding notifier(s) via the IOCTLs and
after that, read the received events by reading from the controller
device.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210604134755.535590-5-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |   2 +-
 .../surface/surface_aggregator_cdev.c         | 462 +++++++++++++++++-
 include/uapi/linux/surface_aggregator/cdev.h  |  41 +-
 3 files changed, 478 insertions(+), 27 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 9bfc2b510c64..1409e40e6345 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -325,7 +325,7 @@ Code  Seq#    Include File                                           Comments
 0xA3  90-9F  linux/dtlk.h
 0xA4  00-1F  uapi/linux/tee.h                                        Generic TEE subsystem
 0xA4  00-1F  uapi/asm/sgx.h                                          <mailto:linux-sgx@vger.kernel.org>
-0xA5  01     linux/surface_aggregator/cdev.h                         Microsoft Surface Platform System Aggregator
+0xA5  01-05  linux/surface_aggregator/cdev.h                         Microsoft Surface Platform System Aggregator
                                                                      <mailto:luzmaximilian@gmail.com>
 0xA5  20-2F  linux/surface_aggregator/dtx.h                          Microsoft Surface DTX driver
                                                                      <mailto:luzmaximilian@gmail.com>
diff --git a/drivers/platform/surface/surface_aggregator_cdev.c b/drivers/platform/surface/surface_aggregator_cdev.c
index 79e28fab7e40..dcda377896b7 100644
--- a/drivers/platform/surface/surface_aggregator_cdev.c
+++ b/drivers/platform/surface/surface_aggregator_cdev.c
@@ -3,29 +3,69 @@
  * Provides user-space access to the SSAM EC via the /dev/surface/aggregator
  * misc device. Intended for debugging and development.
  *
- * Copyright (C) 2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2020-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #include <linux/fs.h>
+#include <linux/ioctl.h>
 #include <linux/kernel.h>
+#include <linux/kfifo.h>
 #include <linux/kref.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/poll.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/vmalloc.h>
 
 #include <linux/surface_aggregator/cdev.h>
 #include <linux/surface_aggregator/controller.h>
+#include <linux/surface_aggregator/serial_hub.h>
 
 #define SSAM_CDEV_DEVICE_NAME	"surface_aggregator_cdev"
 
+
+/* -- Main structures. ------------------------------------------------------ */
+
+enum ssam_cdev_device_state {
+	SSAM_CDEV_DEVICE_SHUTDOWN_BIT = BIT(0),
+};
+
 struct ssam_cdev {
 	struct kref kref;
 	struct rw_semaphore lock;
+
+	struct device *dev;
 	struct ssam_controller *ctrl;
 	struct miscdevice mdev;
+	unsigned long flags;
+
+	struct rw_semaphore client_lock;  /* Guards client list. */
+	struct list_head client_list;
+};
+
+struct ssam_cdev_client;
+
+struct ssam_cdev_notifier {
+	struct ssam_cdev_client *client;
+	struct ssam_event_notifier nf;
+};
+
+struct ssam_cdev_client {
+	struct ssam_cdev *cdev;
+	struct list_head node;
+
+	struct mutex notifier_lock;	/* Guards notifier access for registration */
+	struct ssam_cdev_notifier *notifier[SSH_NUM_EVENTS];
+
+	struct mutex read_lock;		/* Guards FIFO buffer read access */
+	struct mutex write_lock;	/* Guards FIFO buffer write access */
+	DECLARE_KFIFO(buffer, u8, 4096);
+
+	wait_queue_head_t waitq;
+	struct fasync_struct *fasync;
 };
 
 static void __ssam_cdev_release(struct kref *kref)
@@ -47,24 +87,169 @@ static void ssam_cdev_put(struct ssam_cdev *cdev)
 		kref_put(&cdev->kref, __ssam_cdev_release);
 }
 
-static int ssam_cdev_device_open(struct inode *inode, struct file *filp)
-{
-	struct miscdevice *mdev = filp->private_data;
-	struct ssam_cdev *cdev = container_of(mdev, struct ssam_cdev, mdev);
 
-	filp->private_data = ssam_cdev_get(cdev);
-	return stream_open(inode, filp);
-}
+/* -- Notifier handling. ---------------------------------------------------- */
 
-static int ssam_cdev_device_release(struct inode *inode, struct file *filp)
+static u32 ssam_cdev_notifier(struct ssam_event_notifier *nf, const struct ssam_event *in)
 {
-	ssam_cdev_put(filp->private_data);
+	struct ssam_cdev_notifier *cdev_nf = container_of(nf, struct ssam_cdev_notifier, nf);
+	struct ssam_cdev_client *client = cdev_nf->client;
+	struct ssam_cdev_event event;
+	size_t n = struct_size(&event, data, in->length);
+
+	/* Translate event. */
+	event.target_category = in->target_category;
+	event.target_id = in->target_id;
+	event.command_id = in->command_id;
+	event.instance_id = in->instance_id;
+	event.length = in->length;
+
+	mutex_lock(&client->write_lock);
+
+	/* Make sure we have enough space. */
+	if (kfifo_avail(&client->buffer) < n) {
+		dev_warn(client->cdev->dev,
+			 "buffer full, dropping event (tc: %#04x, tid: %#04x, cid: %#04x, iid: %#04x)\n",
+			 in->target_category, in->target_id, in->command_id, in->instance_id);
+		mutex_unlock(&client->write_lock);
+		return 0;
+	}
+
+	/* Copy event header and payload. */
+	kfifo_in(&client->buffer, (const u8 *)&event, struct_size(&event, data, 0));
+	kfifo_in(&client->buffer, &in->data[0], in->length);
+
+	mutex_unlock(&client->write_lock);
+
+	/* Notify waiting readers. */
+	kill_fasync(&client->fasync, SIGIO, POLL_IN);
+	wake_up_interruptible(&client->waitq);
+
+	/*
+	 * Don't mark events as handled, this is the job of a proper driver and
+	 * not the debugging interface.
+	 */
 	return 0;
 }
 
-static long ssam_cdev_request(struct ssam_cdev *cdev, unsigned long arg)
+static int ssam_cdev_notifier_register(struct ssam_cdev_client *client, u8 tc, int priority)
+{
+	const u16 rqid = ssh_tc_to_rqid(tc);
+	const u16 event = ssh_rqid_to_event(rqid);
+	struct ssam_cdev_notifier *nf;
+	int status;
+
+	/* Validate notifier target category. */
+	if (!ssh_rqid_is_event(rqid))
+		return -EINVAL;
+
+	mutex_lock(&client->notifier_lock);
+
+	/* Check if the notifier has already been registered. */
+	if (client->notifier[event]) {
+		mutex_unlock(&client->notifier_lock);
+		return -EEXIST;
+	}
+
+	/* Allocate new notifier. */
+	nf = kzalloc(sizeof(*nf), GFP_KERNEL);
+	if (!nf) {
+		mutex_unlock(&client->notifier_lock);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Create a dummy notifier with the minimal required fields for
+	 * observer registration. Note that we can skip fully specifying event
+	 * and registry here as we do not need any matching and use silent
+	 * registration, which does not enable the corresponding event.
+	 */
+	nf->client = client;
+	nf->nf.base.fn = ssam_cdev_notifier;
+	nf->nf.base.priority = priority;
+	nf->nf.event.id.target_category = tc;
+	nf->nf.event.mask = 0;	/* Do not do any matching. */
+	nf->nf.flags = SSAM_EVENT_NOTIFIER_OBSERVER;
+
+	/* Register notifier. */
+	status = ssam_notifier_register(client->cdev->ctrl, &nf->nf);
+	if (status)
+		kfree(nf);
+	else
+		client->notifier[event] = nf;
+
+	mutex_unlock(&client->notifier_lock);
+	return status;
+}
+
+static int ssam_cdev_notifier_unregister(struct ssam_cdev_client *client, u8 tc)
+{
+	const u16 rqid = ssh_tc_to_rqid(tc);
+	const u16 event = ssh_rqid_to_event(rqid);
+	int status;
+
+	/* Validate notifier target category. */
+	if (!ssh_rqid_is_event(rqid))
+		return -EINVAL;
+
+	mutex_lock(&client->notifier_lock);
+
+	/* Check if the notifier is currently registered. */
+	if (!client->notifier[event]) {
+		mutex_unlock(&client->notifier_lock);
+		return -ENOENT;
+	}
+
+	/* Unregister and free notifier. */
+	status = ssam_notifier_unregister(client->cdev->ctrl, &client->notifier[event]->nf);
+	kfree(client->notifier[event]);
+	client->notifier[event] = NULL;
+
+	mutex_unlock(&client->notifier_lock);
+	return status;
+}
+
+static void ssam_cdev_notifier_unregister_all(struct ssam_cdev_client *client)
+{
+	int i;
+
+	down_read(&client->cdev->lock);
+
+	/*
+	 * This function may be used during shutdown, thus we need to test for
+	 * cdev->ctrl instead of the SSAM_CDEV_DEVICE_SHUTDOWN_BIT bit.
+	 */
+	if (client->cdev->ctrl) {
+		for (i = 0; i < SSH_NUM_EVENTS; i++)
+			ssam_cdev_notifier_unregister(client, i + 1);
+
+	} else {
+		int count = 0;
+
+		/*
+		 * Device has been shut down. Any notifier remaining is a bug,
+		 * so warn about that as this would otherwise hardly be
+		 * noticeable. Nevertheless, free them as well.
+		 */
+		mutex_lock(&client->notifier_lock);
+		for (i = 0; i < SSH_NUM_EVENTS; i++) {
+			count += !!(client->notifier[i]);
+			kfree(client->notifier[i]);
+			client->notifier[i] = NULL;
+		}
+		mutex_unlock(&client->notifier_lock);
+
+		WARN_ON(count > 0);
+	}
+
+	up_read(&client->cdev->lock);
+}
+
+
+/* -- IOCTL functions. ------------------------------------------------------ */
+
+static long ssam_cdev_request(struct ssam_cdev_client *client, struct ssam_cdev_request __user *r)
 {
-	struct ssam_cdev_request __user *r;
 	struct ssam_cdev_request rqst;
 	struct ssam_request spec = {};
 	struct ssam_response rsp = {};
@@ -72,7 +257,6 @@ static long ssam_cdev_request(struct ssam_cdev *cdev, unsigned long arg)
 	void __user *rspdata;
 	int status = 0, ret = 0, tmp;
 
-	r = (struct ssam_cdev_request __user *)arg;
 	ret = copy_struct_from_user(&rqst, sizeof(rqst), r, sizeof(*r));
 	if (ret)
 		goto out;
@@ -152,7 +336,7 @@ static long ssam_cdev_request(struct ssam_cdev *cdev, unsigned long arg)
 	}
 
 	/* Perform request. */
-	status = ssam_request_sync(cdev->ctrl, &spec, &rsp);
+	status = ssam_request_sync(client->cdev->ctrl, &spec, &rsp);
 	if (status)
 		goto out;
 
@@ -177,48 +361,247 @@ out:
 	return ret;
 }
 
-static long __ssam_cdev_device_ioctl(struct ssam_cdev *cdev, unsigned int cmd,
+static long ssam_cdev_notif_register(struct ssam_cdev_client *client,
+				     const struct ssam_cdev_notifier_desc __user *d)
+{
+	struct ssam_cdev_notifier_desc desc;
+	long ret;
+
+	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
+	if (ret)
+		return ret;
+
+	return ssam_cdev_notifier_register(client, desc.target_category, desc.priority);
+}
+
+static long ssam_cdev_notif_unregister(struct ssam_cdev_client *client,
+				       const struct ssam_cdev_notifier_desc __user *d)
+{
+	struct ssam_cdev_notifier_desc desc;
+	long ret;
+
+	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
+	if (ret)
+		return ret;
+
+	return ssam_cdev_notifier_unregister(client, desc.target_category);
+}
+
+
+/* -- File operations. ------------------------------------------------------ */
+
+static int ssam_cdev_device_open(struct inode *inode, struct file *filp)
+{
+	struct miscdevice *mdev = filp->private_data;
+	struct ssam_cdev_client *client;
+	struct ssam_cdev *cdev = container_of(mdev, struct ssam_cdev, mdev);
+
+	/* Initialize client */
+	client = vzalloc(sizeof(*client));
+	if (!client)
+		return -ENOMEM;
+
+	client->cdev = ssam_cdev_get(cdev);
+
+	INIT_LIST_HEAD(&client->node);
+
+	mutex_init(&client->notifier_lock);
+
+	mutex_init(&client->read_lock);
+	mutex_init(&client->write_lock);
+	INIT_KFIFO(client->buffer);
+	init_waitqueue_head(&client->waitq);
+
+	filp->private_data = client;
+
+	/* Attach client. */
+	down_write(&cdev->client_lock);
+
+	if (test_bit(SSAM_CDEV_DEVICE_SHUTDOWN_BIT, &cdev->flags)) {
+		up_write(&cdev->client_lock);
+		mutex_destroy(&client->write_lock);
+		mutex_destroy(&client->read_lock);
+		mutex_destroy(&client->notifier_lock);
+		ssam_cdev_put(client->cdev);
+		vfree(client);
+		return -ENODEV;
+	}
+	list_add_tail(&client->node, &cdev->client_list);
+
+	up_write(&cdev->client_lock);
+
+	stream_open(inode, filp);
+	return 0;
+}
+
+static int ssam_cdev_device_release(struct inode *inode, struct file *filp)
+{
+	struct ssam_cdev_client *client = filp->private_data;
+
+	/* Force-unregister all remaining notifiers of this client. */
+	ssam_cdev_notifier_unregister_all(client);
+
+	/* Detach client. */
+	down_write(&client->cdev->client_lock);
+	list_del(&client->node);
+	up_write(&client->cdev->client_lock);
+
+	/* Free client. */
+	mutex_destroy(&client->write_lock);
+	mutex_destroy(&client->read_lock);
+
+	mutex_destroy(&client->notifier_lock);
+
+	ssam_cdev_put(client->cdev);
+	vfree(client);
+
+	return 0;
+}
+
+static long __ssam_cdev_device_ioctl(struct ssam_cdev_client *client, unsigned int cmd,
 				     unsigned long arg)
 {
 	switch (cmd) {
 	case SSAM_CDEV_REQUEST:
-		return ssam_cdev_request(cdev, arg);
+		return ssam_cdev_request(client, (struct ssam_cdev_request __user *)arg);
+
+	case SSAM_CDEV_NOTIF_REGISTER:
+		return ssam_cdev_notif_register(client,
+						(struct ssam_cdev_notifier_desc __user *)arg);
+
+	case SSAM_CDEV_NOTIF_UNREGISTER:
+		return ssam_cdev_notif_unregister(client,
+						  (struct ssam_cdev_notifier_desc __user *)arg);
 
 	default:
 		return -ENOTTY;
 	}
 }
 
-static long ssam_cdev_device_ioctl(struct file *file, unsigned int cmd,
-				   unsigned long arg)
+static long ssam_cdev_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct ssam_cdev *cdev = file->private_data;
+	struct ssam_cdev_client *client = file->private_data;
 	long status;
 
 	/* Ensure that controller is valid for as long as we need it. */
+	if (down_read_killable(&client->cdev->lock))
+		return -ERESTARTSYS;
+
+	if (test_bit(SSAM_CDEV_DEVICE_SHUTDOWN_BIT, &client->cdev->flags)) {
+		up_read(&client->cdev->lock);
+		return -ENODEV;
+	}
+
+	status = __ssam_cdev_device_ioctl(client, cmd, arg);
+
+	up_read(&client->cdev->lock);
+	return status;
+}
+
+static ssize_t ssam_cdev_read(struct file *file, char __user *buf, size_t count, loff_t *offs)
+{
+	struct ssam_cdev_client *client = file->private_data;
+	struct ssam_cdev *cdev = client->cdev;
+	unsigned int copied;
+	int status = 0;
+
 	if (down_read_killable(&cdev->lock))
 		return -ERESTARTSYS;
 
-	if (!cdev->ctrl) {
+	/* Make sure we're not shut down. */
+	if (test_bit(SSAM_CDEV_DEVICE_SHUTDOWN_BIT, &cdev->flags)) {
 		up_read(&cdev->lock);
 		return -ENODEV;
 	}
 
-	status = __ssam_cdev_device_ioctl(cdev, cmd, arg);
+	do {
+		/* Check availability, wait if necessary. */
+		if (kfifo_is_empty(&client->buffer)) {
+			up_read(&cdev->lock);
+
+			if (file->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			status = wait_event_interruptible(client->waitq,
+							  !kfifo_is_empty(&client->buffer) ||
+							  test_bit(SSAM_CDEV_DEVICE_SHUTDOWN_BIT,
+								   &cdev->flags));
+			if (status < 0)
+				return status;
+
+			if (down_read_killable(&cdev->lock))
+				return -ERESTARTSYS;
+
+			/* Need to check that we're not shut down again. */
+			if (test_bit(SSAM_CDEV_DEVICE_SHUTDOWN_BIT, &cdev->flags)) {
+				up_read(&cdev->lock);
+				return -ENODEV;
+			}
+		}
+
+		/* Try to read from FIFO. */
+		if (mutex_lock_interruptible(&client->read_lock)) {
+			up_read(&cdev->lock);
+			return -ERESTARTSYS;
+		}
+
+		status = kfifo_to_user(&client->buffer, buf, count, &copied);
+		mutex_unlock(&client->read_lock);
+
+		if (status < 0) {
+			up_read(&cdev->lock);
+			return status;
+		}
+
+		/* We might not have gotten anything, check this here. */
+		if (copied == 0 && (file->f_flags & O_NONBLOCK)) {
+			up_read(&cdev->lock);
+			return -EAGAIN;
+		}
+	} while (copied == 0);
 
 	up_read(&cdev->lock);
-	return status;
+	return copied;
+}
+
+static __poll_t ssam_cdev_poll(struct file *file, struct poll_table_struct *pt)
+{
+	struct ssam_cdev_client *client = file->private_data;
+	__poll_t events = 0;
+
+	if (test_bit(SSAM_CDEV_DEVICE_SHUTDOWN_BIT, &client->cdev->flags))
+		return EPOLLHUP | EPOLLERR;
+
+	poll_wait(file, &client->waitq, pt);
+
+	if (!kfifo_is_empty(&client->buffer))
+		events |= EPOLLIN | EPOLLRDNORM;
+
+	return events;
+}
+
+static int ssam_cdev_fasync(int fd, struct file *file, int on)
+{
+	struct ssam_cdev_client *client = file->private_data;
+
+	return fasync_helper(fd, file, on, &client->fasync);
 }
 
 static const struct file_operations ssam_controller_fops = {
 	.owner          = THIS_MODULE,
 	.open           = ssam_cdev_device_open,
 	.release        = ssam_cdev_device_release,
+	.read           = ssam_cdev_read,
+	.poll           = ssam_cdev_poll,
+	.fasync         = ssam_cdev_fasync,
 	.unlocked_ioctl = ssam_cdev_device_ioctl,
 	.compat_ioctl   = ssam_cdev_device_ioctl,
-	.llseek         = noop_llseek,
+	.llseek         = no_llseek,
 };
 
+
+/* -- Device and driver setup ----------------------------------------------- */
+
 static int ssam_dbg_device_probe(struct platform_device *pdev)
 {
 	struct ssam_controller *ctrl;
@@ -236,6 +619,7 @@ static int ssam_dbg_device_probe(struct platform_device *pdev)
 	kref_init(&cdev->kref);
 	init_rwsem(&cdev->lock);
 	cdev->ctrl = ctrl;
+	cdev->dev = &pdev->dev;
 
 	cdev->mdev.parent   = &pdev->dev;
 	cdev->mdev.minor    = MISC_DYNAMIC_MINOR;
@@ -243,6 +627,9 @@ static int ssam_dbg_device_probe(struct platform_device *pdev)
 	cdev->mdev.nodename = "surface/aggregator";
 	cdev->mdev.fops     = &ssam_controller_fops;
 
+	init_rwsem(&cdev->client_lock);
+	INIT_LIST_HEAD(&cdev->client_list);
+
 	status = misc_register(&cdev->mdev);
 	if (status) {
 		kfree(cdev);
@@ -256,8 +643,32 @@ static int ssam_dbg_device_probe(struct platform_device *pdev)
 static int ssam_dbg_device_remove(struct platform_device *pdev)
 {
 	struct ssam_cdev *cdev = platform_get_drvdata(pdev);
+	struct ssam_cdev_client *client;
 
-	misc_deregister(&cdev->mdev);
+	/*
+	 * Mark device as shut-down. Prevent new clients from being added and
+	 * new operations from being executed.
+	 */
+	set_bit(SSAM_CDEV_DEVICE_SHUTDOWN_BIT, &cdev->flags);
+
+	down_write(&cdev->client_lock);
+
+	/* Remove all notifiers registered by us. */
+	list_for_each_entry(client, &cdev->client_list, node) {
+		ssam_cdev_notifier_unregister_all(client);
+	}
+
+	/* Wake up async clients. */
+	list_for_each_entry(client, &cdev->client_list, node) {
+		kill_fasync(&client->fasync, SIGIO, POLL_HUP);
+	}
+
+	/* Wake up blocking clients. */
+	list_for_each_entry(client, &cdev->client_list, node) {
+		wake_up_interruptible(&client->waitq);
+	}
+
+	up_write(&cdev->client_lock);
 
 	/*
 	 * The controller is only guaranteed to be valid for as long as the
@@ -266,8 +677,11 @@ static int ssam_dbg_device_remove(struct platform_device *pdev)
 	 */
 	down_write(&cdev->lock);
 	cdev->ctrl = NULL;
+	cdev->dev = NULL;
 	up_write(&cdev->lock);
 
+	misc_deregister(&cdev->mdev);
+
 	ssam_cdev_put(cdev);
 	return 0;
 }
diff --git a/include/uapi/linux/surface_aggregator/cdev.h b/include/uapi/linux/surface_aggregator/cdev.h
index fbcce04abfe9..4f393fafc235 100644
--- a/include/uapi/linux/surface_aggregator/cdev.h
+++ b/include/uapi/linux/surface_aggregator/cdev.h
@@ -6,7 +6,7 @@
  * device. This device provides direct user-space access to the SSAM EC.
  * Intended for debugging and development.
  *
- * Copyright (C) 2020 Maximilian Luz <luzmaximilian@gmail.com>
+ * Copyright (C) 2020-2021 Maximilian Luz <luzmaximilian@gmail.com>
  */
 
 #ifndef _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H
@@ -73,6 +73,43 @@ struct ssam_cdev_request {
 	} response;
 } __attribute__((__packed__));
 
-#define SSAM_CDEV_REQUEST	_IOWR(0xA5, 1, struct ssam_cdev_request)
+/**
+ * struct ssam_cdev_notifier_desc - Notifier descriptor.
+ * @priority:        Priority value determining the order in which notifier
+ *                   callbacks will be called. A higher value means higher
+ *                   priority, i.e. the associated callback will be executed
+ *                   earlier than other (lower priority) callbacks.
+ * @target_category: The event target category for which this notifier should
+ *                   receive events.
+ *
+ * Specifies the notifier that should be registered or unregistered,
+ * specifically with which priority and for which target category of events.
+ */
+struct ssam_cdev_notifier_desc {
+	__s32 priority;
+	__u8 target_category;
+} __attribute__((__packed__));
+
+/**
+ * struct ssam_cdev_event - SSAM event sent by the EC.
+ * @target_category: Target category of the event source. See &enum ssam_ssh_tc.
+ * @target_id:       Target ID of the event source.
+ * @command_id:      Command ID of the event.
+ * @instance_id:     Instance ID of the event source.
+ * @length:          Length of the event payload in bytes.
+ * @data:            Event payload data.
+ */
+struct ssam_cdev_event {
+	__u8 target_category;
+	__u8 target_id;
+	__u8 command_id;
+	__u8 instance_id;
+	__u16 length;
+	__u8 data[];
+} __attribute__((__packed__));
+
+#define SSAM_CDEV_REQUEST		_IOWR(0xA5, 1, struct ssam_cdev_request)
+#define SSAM_CDEV_NOTIF_REGISTER	_IOW(0xA5, 2, struct ssam_cdev_notifier_desc)
+#define SSAM_CDEV_NOTIF_UNREGISTER	_IOW(0xA5, 3, struct ssam_cdev_notifier_desc)
 
 #endif /* _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H */

From e8e298a653856b1f3a2bb7b1fe31d3faa93cc7dc Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:47:53 +0200
Subject: [PATCH 1948/3292] platform/surface: aggregator_cdev: Allow enabling
 of events from user-space

While events can already be enabled and disabled via the generic request
IOCTL, this bypasses the internal reference counting mechanism of the
controller. Due to that, disabling an event will turn it off regardless
of any other client having requested said event, which may break
functionality of that client.

To solve this, add IOCTLs wrapping the ssam_controller_event_enable()
and ssam_controller_event_disable() functions, which have been
previously introduced for this specific purpose.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210604134755.535590-6-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../surface/surface_aggregator_cdev.c         | 58 +++++++++++++++++++
 include/uapi/linux/surface_aggregator/cdev.h  | 32 ++++++++++
 2 files changed, 90 insertions(+)

diff --git a/drivers/platform/surface/surface_aggregator_cdev.c b/drivers/platform/surface/surface_aggregator_cdev.c
index dcda377896b7..7b86b36eaaa0 100644
--- a/drivers/platform/surface/surface_aggregator_cdev.c
+++ b/drivers/platform/surface/surface_aggregator_cdev.c
@@ -387,6 +387,58 @@ static long ssam_cdev_notif_unregister(struct ssam_cdev_client *client,
 	return ssam_cdev_notifier_unregister(client, desc.target_category);
 }
 
+static long ssam_cdev_event_enable(struct ssam_cdev_client *client,
+				   const struct ssam_cdev_event_desc __user *d)
+{
+	struct ssam_cdev_event_desc desc;
+	struct ssam_event_registry reg;
+	struct ssam_event_id id;
+	long ret;
+
+	/* Read descriptor from user-space. */
+	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
+	if (ret)
+		return ret;
+
+	/* Translate descriptor. */
+	reg.target_category = desc.reg.target_category;
+	reg.target_id = desc.reg.target_id;
+	reg.cid_enable = desc.reg.cid_enable;
+	reg.cid_disable = desc.reg.cid_disable;
+
+	id.target_category = desc.id.target_category;
+	id.instance = desc.id.instance;
+
+	/* Disable event. */
+	return ssam_controller_event_enable(client->cdev->ctrl, reg, id, desc.flags);
+}
+
+static long ssam_cdev_event_disable(struct ssam_cdev_client *client,
+				    const struct ssam_cdev_event_desc __user *d)
+{
+	struct ssam_cdev_event_desc desc;
+	struct ssam_event_registry reg;
+	struct ssam_event_id id;
+	long ret;
+
+	/* Read descriptor from user-space. */
+	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
+	if (ret)
+		return ret;
+
+	/* Translate descriptor. */
+	reg.target_category = desc.reg.target_category;
+	reg.target_id = desc.reg.target_id;
+	reg.cid_enable = desc.reg.cid_enable;
+	reg.cid_disable = desc.reg.cid_disable;
+
+	id.target_category = desc.id.target_category;
+	id.instance = desc.id.instance;
+
+	/* Disable event. */
+	return ssam_controller_event_disable(client->cdev->ctrl, reg, id, desc.flags);
+}
+
 
 /* -- File operations. ------------------------------------------------------ */
 
@@ -473,6 +525,12 @@ static long __ssam_cdev_device_ioctl(struct ssam_cdev_client *client, unsigned i
 		return ssam_cdev_notif_unregister(client,
 						  (struct ssam_cdev_notifier_desc __user *)arg);
 
+	case SSAM_CDEV_EVENT_ENABLE:
+		return ssam_cdev_event_enable(client, (struct ssam_cdev_event_desc __user *)arg);
+
+	case SSAM_CDEV_EVENT_DISABLE:
+		return ssam_cdev_event_disable(client, (struct ssam_cdev_event_desc __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/uapi/linux/surface_aggregator/cdev.h b/include/uapi/linux/surface_aggregator/cdev.h
index 4f393fafc235..08f46b60b151 100644
--- a/include/uapi/linux/surface_aggregator/cdev.h
+++ b/include/uapi/linux/surface_aggregator/cdev.h
@@ -90,6 +90,36 @@ struct ssam_cdev_notifier_desc {
 	__u8 target_category;
 } __attribute__((__packed__));
 
+/**
+ * struct ssam_cdev_event_desc - Event descriptor.
+ * @reg:                 Registry via which the event will be enabled/disabled.
+ * @reg.target_category: Target category for the event registry requests.
+ * @reg.target_id:       Target ID for the event registry requests.
+ * @reg.cid_enable:      Command ID for the event-enable request.
+ * @reg.cid_disable:     Command ID for the event-disable request.
+ * @id:                  ID specifying the event.
+ * @id.target_category:  Target category of the event source.
+ * @id.instance:         Instance ID of the event source.
+ * @flags:               Flags used for enabling the event.
+ *
+ * Specifies which event should be enabled/disabled and how to do that.
+ */
+struct ssam_cdev_event_desc {
+	struct {
+		__u8 target_category;
+		__u8 target_id;
+		__u8 cid_enable;
+		__u8 cid_disable;
+	} reg;
+
+	struct {
+		__u8 target_category;
+		__u8 instance;
+	} id;
+
+	__u8 flags;
+} __attribute__((__packed__));
+
 /**
  * struct ssam_cdev_event - SSAM event sent by the EC.
  * @target_category: Target category of the event source. See &enum ssam_ssh_tc.
@@ -111,5 +141,7 @@ struct ssam_cdev_event {
 #define SSAM_CDEV_REQUEST		_IOWR(0xA5, 1, struct ssam_cdev_request)
 #define SSAM_CDEV_NOTIF_REGISTER	_IOW(0xA5, 2, struct ssam_cdev_notifier_desc)
 #define SSAM_CDEV_NOTIF_UNREGISTER	_IOW(0xA5, 3, struct ssam_cdev_notifier_desc)
+#define SSAM_CDEV_EVENT_ENABLE		_IOW(0xA5, 4, struct ssam_cdev_event_desc)
+#define SSAM_CDEV_EVENT_DISABLE		_IOW(0xA5, 5, struct ssam_cdev_event_desc)
 
 #endif /* _UAPI_LINUX_SURFACE_AGGREGATOR_CDEV_H */

From cbd224e0ddfe59eb1eb92e436825f3eca4de3c10 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:47:54 +0200
Subject: [PATCH 1949/3292] platform/surface: aggregator_cdev: Add lockdep
 support

Mark functions with locking requirements via the corresponding lockdep
calls for debugging and documentary purposes.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210604134755.535590-7-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../platform/surface/surface_aggregator_cdev.c   | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/platform/surface/surface_aggregator_cdev.c b/drivers/platform/surface/surface_aggregator_cdev.c
index 7b86b36eaaa0..30fb50fde450 100644
--- a/drivers/platform/surface/surface_aggregator_cdev.c
+++ b/drivers/platform/surface/surface_aggregator_cdev.c
@@ -139,6 +139,8 @@ static int ssam_cdev_notifier_register(struct ssam_cdev_client *client, u8 tc, i
 	struct ssam_cdev_notifier *nf;
 	int status;
 
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	/* Validate notifier target category. */
 	if (!ssh_rqid_is_event(rqid))
 		return -EINVAL;
@@ -188,6 +190,8 @@ static int ssam_cdev_notifier_unregister(struct ssam_cdev_client *client, u8 tc)
 	const u16 event = ssh_rqid_to_event(rqid);
 	int status;
 
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	/* Validate notifier target category. */
 	if (!ssh_rqid_is_event(rqid))
 		return -EINVAL;
@@ -257,6 +261,8 @@ static long ssam_cdev_request(struct ssam_cdev_client *client, struct ssam_cdev_
 	void __user *rspdata;
 	int status = 0, ret = 0, tmp;
 
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	ret = copy_struct_from_user(&rqst, sizeof(rqst), r, sizeof(*r));
 	if (ret)
 		goto out;
@@ -367,6 +373,8 @@ static long ssam_cdev_notif_register(struct ssam_cdev_client *client,
 	struct ssam_cdev_notifier_desc desc;
 	long ret;
 
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
 	if (ret)
 		return ret;
@@ -380,6 +388,8 @@ static long ssam_cdev_notif_unregister(struct ssam_cdev_client *client,
 	struct ssam_cdev_notifier_desc desc;
 	long ret;
 
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
 	if (ret)
 		return ret;
@@ -395,6 +405,8 @@ static long ssam_cdev_event_enable(struct ssam_cdev_client *client,
 	struct ssam_event_id id;
 	long ret;
 
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	/* Read descriptor from user-space. */
 	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
 	if (ret)
@@ -421,6 +433,8 @@ static long ssam_cdev_event_disable(struct ssam_cdev_client *client,
 	struct ssam_event_id id;
 	long ret;
 
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	/* Read descriptor from user-space. */
 	ret = copy_struct_from_user(&desc, sizeof(desc), d, sizeof(*d));
 	if (ret)
@@ -513,6 +527,8 @@ static int ssam_cdev_device_release(struct inode *inode, struct file *filp)
 static long __ssam_cdev_device_ioctl(struct ssam_cdev_client *client, unsigned int cmd,
 				     unsigned long arg)
 {
+	lockdep_assert_held_read(&client->cdev->lock);
+
 	switch (cmd) {
 	case SSAM_CDEV_REQUEST:
 		return ssam_cdev_request(client, (struct ssam_cdev_request __user *)arg);

From 8ae200547aa9dbb1001c22325d251b825113bdb3 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:47:55 +0200
Subject: [PATCH 1950/3292] docs: driver-api: Update Surface Aggregator
 user-space interface documentation

Update the controller-device user-space interface (cdev) documentation
for the newly introduced IOCTLs and event interface.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210604134755.535590-8-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../surface_aggregator/clients/cdev.rst       | 127 +++++++++++++++++-
 1 file changed, 122 insertions(+), 5 deletions(-)

diff --git a/Documentation/driver-api/surface_aggregator/clients/cdev.rst b/Documentation/driver-api/surface_aggregator/clients/cdev.rst
index 248c1372d879..0134a841a079 100644
--- a/Documentation/driver-api/surface_aggregator/clients/cdev.rst
+++ b/Documentation/driver-api/surface_aggregator/clients/cdev.rst
@@ -1,9 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
-.. |u8| replace:: :c:type:`u8 <u8>`
-.. |u16| replace:: :c:type:`u16 <u16>`
 .. |ssam_cdev_request| replace:: :c:type:`struct ssam_cdev_request <ssam_cdev_request>`
 .. |ssam_cdev_request_flags| replace:: :c:type:`enum ssam_cdev_request_flags <ssam_cdev_request_flags>`
+.. |ssam_cdev_event| replace:: :c:type:`struct ssam_cdev_event <ssam_cdev_event>`
 
 ==============================
 User-Space EC Interface (cdev)
@@ -23,6 +22,40 @@ These IOCTLs and their respective input/output parameter structs are defined in
 A small python library and scripts for accessing this interface can be found
 at https://github.com/linux-surface/surface-aggregator-module/tree/master/scripts/ssam.
 
+.. contents::
+
+
+Receiving Events
+================
+
+Events can be received by reading from the device-file. The are represented by
+the |ssam_cdev_event| datatype.
+
+Before events are available to be read, however, the desired notifiers must be
+registered via the ``SSAM_CDEV_NOTIF_REGISTER`` IOCTL. Notifiers are, in
+essence, callbacks, called when the EC sends an event. They are, in this
+interface, associated with a specific target category and device-file-instance.
+They forward any event of this category to the buffer of the corresponding
+instance, from which it can then be read.
+
+Notifiers themselves do not enable events on the EC. Thus, it may additionally
+be necessary to enable events via the ``SSAM_CDEV_EVENT_ENABLE`` IOCTL. While
+notifiers work per-client (i.e. per-device-file-instance), events are enabled
+globally, for the EC and all of its clients (regardless of userspace or
+non-userspace). The ``SSAM_CDEV_EVENT_ENABLE`` and ``SSAM_CDEV_EVENT_DISABLE``
+IOCTLs take care of reference counting the events, such that an event is
+enabled as long as there is a client that has requested it.
+
+Note that enabled events are not automatically disabled once the client
+instance is closed. Therefore any client process (or group of processes) should
+balance their event enable calls with the corresponding event disable calls. It
+is, however, perfectly valid to enable and disable events on different client
+instances. For example, it is valid to set up notifiers and read events on
+client instance ``A``, enable those events on instance ``B`` (note that these
+will also be received by A since events are enabled/disabled globally), and
+after no more events are desired, disable the previously enabled events via
+instance ``C``.
+
 
 Controller IOCTLs
 =================
@@ -45,9 +78,33 @@ The following IOCTLs are provided:
      - ``REQUEST``
      - Perform synchronous SAM request.
 
+   * - ``0xA5``
+     - ``2``
+     - ``W``
+     - ``NOTIF_REGISTER``
+     - Register event notifier.
 
-``REQUEST``
------------
+   * - ``0xA5``
+     - ``3``
+     - ``W``
+     - ``NOTIF_UNREGISTER``
+     - Unregister event notifier.
+
+   * - ``0xA5``
+     - ``4``
+     - ``W``
+     - ``EVENT_ENABLE``
+     - Enable event source.
+
+   * - ``0xA5``
+     - ``5``
+     - ``W``
+     - ``EVENT_DISABLE``
+     - Disable event source.
+
+
+``SSAM_CDEV_REQUEST``
+---------------------
 
 Defined as ``_IOWR(0xA5, 1, struct ssam_cdev_request)``.
 
@@ -82,6 +139,66 @@ submitted, and completed (i.e. handed back to user-space) successfully from
 inside the IOCTL, but the request ``status`` member may still be negative in
 case the actual execution of the request failed after it has been submitted.
 
-A full definition of the argument struct is provided below:
+A full definition of the argument struct is provided below.
+
+``SSAM_CDEV_NOTIF_REGISTER``
+----------------------------
+
+Defined as ``_IOW(0xA5, 2, struct ssam_cdev_notifier_desc)``.
+
+Register a notifier for the event target category specified in the given
+notifier description with the specified priority. Notifiers registration is
+required to receive events, but does not enable events themselves. After a
+notifier for a specific target category has been registered, all events of that
+category will be forwarded to the userspace client and can then be read from
+the device file instance. Note that events may have to be enabled, e.g. via the
+``SSAM_CDEV_EVENT_ENABLE`` IOCTL, before the EC will send them.
+
+Only one notifier can be registered per target category and client instance. If
+a notifier has already been registered, this IOCTL will fail with ``-EEXIST``.
+
+Notifiers will automatically be removed when the device file instance is
+closed.
+
+``SSAM_CDEV_NOTIF_UNREGISTER``
+------------------------------
+
+Defined as ``_IOW(0xA5, 3, struct ssam_cdev_notifier_desc)``.
+
+Unregisters the notifier associated with the specified target category. The
+priority field will be ignored by this IOCTL. If no notifier has been
+registered for this client instance and the given category, this IOCTL will
+fail with ``-ENOENT``.
+
+``SSAM_CDEV_EVENT_ENABLE``
+--------------------------
+
+Defined as ``_IOW(0xA5, 4, struct ssam_cdev_event_desc)``.
+
+Enable the event associated with the given event descriptor.
+
+Note that this call will not register a notifier itself, it will only enable
+events on the controller. If you want to receive events by reading from the
+device file, you will need to register the corresponding notifier(s) on that
+instance.
+
+Events are not automatically disabled when the device file is closed. This must
+be done manually, via a call to the ``SSAM_CDEV_EVENT_DISABLE`` IOCTL.
+
+``SSAM_CDEV_EVENT_DISABLE``
+---------------------------
+
+Defined as ``_IOW(0xA5, 5, struct ssam_cdev_event_desc)``.
+
+Disable the event associated with the given event descriptor.
+
+Note that this will not unregister any notifiers. Events may still be received
+and forwarded to user-space after this call. The only safe way of stopping
+events from being received is unregistering all previously registered
+notifiers.
+
+
+Structures and Enums
+====================
 
 .. kernel-doc:: include/uapi/linux/surface_aggregator/cdev.h

From 37ed76a745b099565b4ae7915f0441b1316bf108 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 23:09:06 +0200
Subject: [PATCH 1951/3292] platform/surface: aggregator: Do not return
 uninitialized value

The status variable in ssam_nf_refcount_disable_free() is only set when
the reference count equals zero. Otherwise, it is returned
uninitialized. Fix this by always initializing status to zero.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 640ee17199e4 ("platform/surface: aggregator: Allow enabling of events without notifiers")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210604210907.25738-2-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/controller.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 6646f4d6e10d..634399387d76 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -2228,7 +2228,7 @@ static int ssam_nf_refcount_disable_free(struct ssam_controller *ctrl,
 	const struct ssam_event_registry reg = entry->key.reg;
 	const struct ssam_event_id id = entry->key.id;
 	struct ssam_nf *nf = &ctrl->cplt.event.notif;
-	int status;
+	int status = 0;
 
 	lockdep_assert_held(&nf->lock);
 

From f9e7f9a2b2a0d76c03ebdbb8ffc7940017b326b9 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 23:09:07 +0200
Subject: [PATCH 1952/3292] platform/surface: aggregator: Drop unnecessary
 variable initialization

The status variable in ssam_controller_event_disable() is always set, no
need to initialize it.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210604210907.25738-3-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/controller.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 634399387d76..b8c377b3f932 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -2464,7 +2464,7 @@ int ssam_controller_event_disable(struct ssam_controller *ctrl,
 	u16 rqid = ssh_tc_to_rqid(id.target_category);
 	struct ssam_nf *nf = &ctrl->cplt.event.notif;
 	struct ssam_nf_refcount_entry *entry;
-	int status = 0;
+	int status;
 
 	if (!ssh_rqid_is_event(rqid))
 		return -EINVAL;

From a8aedd45d7dd7d3b6136c90bd755cb68743d930e Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:26:38 +0800
Subject: [PATCH 1953/3292] platform/surface: aggregator: Use list_move_tail
 instead of list_del/list_add_tail in ssh_request_layer.c

Using list_move_tail() instead of list_del() + list_add_tail() in ssh_request_layer.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210609072638.1358174-1-libaokun1@huawei.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../platform/surface/aggregator/ssh_request_layer.c    | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c
index bfe1aaf38065..790f7f0eee98 100644
--- a/drivers/platform/surface/aggregator/ssh_request_layer.c
+++ b/drivers/platform/surface/aggregator/ssh_request_layer.c
@@ -863,9 +863,7 @@ static void ssh_rtl_timeout_reap(struct work_struct *work)
 		clear_bit(SSH_REQUEST_SF_PENDING_BIT, &r->state);
 
 		atomic_dec(&rtl->pending.count);
-		list_del(&r->node);
-
-		list_add_tail(&r->node, &claimed);
+		list_move_tail(&r->node, &claimed);
 	}
 	spin_unlock(&rtl->pending.lock);
 
@@ -1204,8 +1202,7 @@ void ssh_rtl_shutdown(struct ssh_rtl *rtl)
 		smp_mb__before_atomic();
 		clear_bit(SSH_REQUEST_SF_QUEUED_BIT, &r->state);
 
-		list_del(&r->node);
-		list_add_tail(&r->node, &claimed);
+		list_move_tail(&r->node, &claimed);
 	}
 	spin_unlock(&rtl->queue.lock);
 
@@ -1238,8 +1235,7 @@ void ssh_rtl_shutdown(struct ssh_rtl *rtl)
 			smp_mb__before_atomic();
 			clear_bit(SSH_REQUEST_SF_PENDING_BIT, &r->state);
 
-			list_del(&r->node);
-			list_add_tail(&r->node, &claimed);
+			list_move_tail(&r->node, &claimed);
 		}
 		spin_unlock(&rtl->pending.lock);
 	}

From be9c4fa236e24af5cc3271a16e209eab098566c4 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:24:48 +0800
Subject: [PATCH 1954/3292] platform/surface: aggregator: Use list_move_tail
 instead of list_del/list_add_tail in ssh_packet_layer.c

Using list_move_tail() instead of list_del() + list_add_tail() in ssh_packet_layer.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210609072448.1357524-1-libaokun1@huawei.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/ssh_packet_layer.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.c b/drivers/platform/surface/aggregator/ssh_packet_layer.c
index 5e08049fc3ac..8a4451c1ffe5 100644
--- a/drivers/platform/surface/aggregator/ssh_packet_layer.c
+++ b/drivers/platform/surface/aggregator/ssh_packet_layer.c
@@ -1567,9 +1567,7 @@ static void ssh_ptl_timeout_reap(struct work_struct *work)
 		clear_bit(SSH_PACKET_SF_PENDING_BIT, &p->state);
 
 		atomic_dec(&ptl->pending.count);
-		list_del(&p->pending_node);
-
-		list_add_tail(&p->pending_node, &claimed);
+		list_move_tail(&p->pending_node, &claimed);
 	}
 
 	spin_unlock(&ptl->pending.lock);
@@ -1957,8 +1955,7 @@ void ssh_ptl_shutdown(struct ssh_ptl *ptl)
 		smp_mb__before_atomic();
 		clear_bit(SSH_PACKET_SF_QUEUED_BIT, &p->state);
 
-		list_del(&p->queue_node);
-		list_add_tail(&p->queue_node, &complete_q);
+		list_move_tail(&p->queue_node, &complete_q);
 	}
 	spin_unlock(&ptl->queue.lock);
 
@@ -1970,8 +1967,7 @@ void ssh_ptl_shutdown(struct ssh_ptl *ptl)
 		smp_mb__before_atomic();
 		clear_bit(SSH_PACKET_SF_PENDING_BIT, &p->state);
 
-		list_del(&p->pending_node);
-		list_add_tail(&p->pending_node, &complete_q);
+		list_move_tail(&p->pending_node, &complete_q);
 	}
 	atomic_set(&ptl->pending.count, 0);
 	spin_unlock(&ptl->pending.lock);

From c297937fbb50edd6c5ebc80fa4aa1d59246fb0ed Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 5 Jun 2021 22:38:04 +0200
Subject: [PATCH 1955/3292] platform/x86: hdaps: Constify static
 attribute_group struct

The only use of hdaps_attribute_group is to pass its address to
sysfs_create_group() and sysfs_remove_group(), both which takes pointers
to const attribute_group structs. Make it const to allow the compiler to
put it in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reviewed-by: Frank Seidel <frank@f-seidel.de>
Link: https://lore.kernel.org/r/20210605203807.60547-2-rikard.falkeborn@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/hdaps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/hdaps.c b/drivers/platform/x86/hdaps.c
index a72270932ec3..9996485f5295 100644
--- a/drivers/platform/x86/hdaps.c
+++ b/drivers/platform/x86/hdaps.c
@@ -462,7 +462,7 @@ static struct attribute *hdaps_attributes[] = {
 	NULL,
 };
 
-static struct attribute_group hdaps_attribute_group = {
+static const struct attribute_group hdaps_attribute_group = {
 	.attrs = hdaps_attributes,
 };
 

From d24023e375704860c6c8b91c3af3034669aa1bc5 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 5 Jun 2021 22:38:05 +0200
Subject: [PATCH 1956/3292] platform/x86: intel_pmt_crashlog: Constify static
 attribute_group struct

The only use of pmt_crashlog_group is to assign its address to the
attr_grp field in the intel_pmt_namespace struct, which is a pointer to
const attribute_group. Make it const to allow the compiler to put it in
read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Link: https://lore.kernel.org/r/20210605203807.60547-3-rikard.falkeborn@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel_pmt_crashlog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c
index 92d315a16cfd..56963ceb6345 100644
--- a/drivers/platform/x86/intel_pmt_crashlog.c
+++ b/drivers/platform/x86/intel_pmt_crashlog.c
@@ -218,7 +218,7 @@ static struct attribute *pmt_crashlog_attrs[] = {
 	NULL
 };
 
-static struct attribute_group pmt_crashlog_group = {
+static const struct attribute_group pmt_crashlog_group = {
 	.attrs	= pmt_crashlog_attrs,
 };
 

From 62ef96919720b30d5e84a193e64490da1d30d776 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 5 Jun 2021 22:38:06 +0200
Subject: [PATCH 1957/3292] platform/x86: tc1100-wmi: Constify static
 attribute_group struct

The only use of tc1100_attribute_group is to pass its address to
sysfs_create_group() and sysfs_remove_group(), both which takes pointer
to const attribute_group structs. Make it const to allow the compiler to
put it in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Link: https://lore.kernel.org/r/20210605203807.60547-4-rikard.falkeborn@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/tc1100-wmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c
index 803920b6f01d..9072eb302618 100644
--- a/drivers/platform/x86/tc1100-wmi.c
+++ b/drivers/platform/x86/tc1100-wmi.c
@@ -156,7 +156,7 @@ static struct attribute *tc1100_attributes[] = {
 	NULL
 };
 
-static struct attribute_group tc1100_attribute_group = {
+static const struct attribute_group tc1100_attribute_group = {
 	.attrs	= tc1100_attributes,
 };
 

From 77d06ec65a281c5382f4ea2398a267dc3bd7bfe3 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 5 Jun 2021 22:38:07 +0200
Subject: [PATCH 1958/3292] x86/platform/uv: Constify static attribute_group
 struct

The only use of base_attr_group and hubless_base_attr_group is to pass
their addresses to sysfs_create_group() and sysfs_remove_group(), both
which takes pointers to const attribute_group structs. Make them const
to allow the compiler to put them in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reviewed-by: Justin Ernst <justin.ernst@hpe.com>
Link: https://lore.kernel.org/r/20210605203807.60547-5-rikard.falkeborn@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/uv_sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/uv_sysfs.c b/drivers/platform/x86/uv_sysfs.c
index 7badcfa3f384..956a354b57c1 100644
--- a/drivers/platform/x86/uv_sysfs.c
+++ b/drivers/platform/x86/uv_sysfs.c
@@ -778,7 +778,7 @@ static struct attribute *base_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group base_attr_group = {
+static const struct attribute_group base_attr_group = {
 	.attrs = base_attrs
 };
 
@@ -823,7 +823,7 @@ static struct attribute *hubless_base_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group hubless_base_attr_group = {
+static const struct attribute_group hubless_base_attr_group = {
 	.attrs = hubless_base_attrs
 };
 

From 8f44f316d1da2ad521e62028a812284bb72ef3d4 Mon Sep 17 00:00:00 2001
From: yangerkun <yangerkun@huawei.com>
Date: Mon, 7 Jun 2021 09:47:02 +0800
Subject: [PATCH 1959/3292] platform/x86: intel_ips: fix set but unused warning
 in read_mgtv
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/platform/x86/intel_ips.c:832:6: warning: variable ‘ret’ set but
not used [-Wunused-but-set-variable]
  832 |  u16 ret;
      |      ^~~

Fix it by mark ret as '__maybe_unused'.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: yangerkun <yangerkun@huawei.com>
Link: https://lore.kernel.org/r/20210607014702.2981097-1-yangerkun@huawei.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel_ips.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index bffe548187ee..4dfdbfca6841 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -829,7 +829,7 @@ static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
 
 static u16 read_mgtv(struct ips_driver *ips)
 {
-	u16 ret;
+	u16 __maybe_unused ret;
 	u64 slope, offset;
 	u64 val;
 

From cb58c277ff1a35432cd84a6cc9768c60ce4c2cad Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Jun 2021 16:59:52 +0200
Subject: [PATCH 1960/3292] platform/x86: dell-wmi-sysman/think-lmi: Make
 fw_attr_class global static

The dell-wmi-sysman and think-lmi kernel modules both have a global
struct class *fw_attr_class variable, leading to the following compile
errors when both are builtin:

ld: drivers/platform/x86/think-lmi.o:(.bss+0x0): multiple definition of `fw_attr_class'; drivers/platform/x86/dell/dell-wmi-sysman/sysman.o:(.bss+0x0): first defined here

In both cases the variable is only used in the file where it is declared.
Make both declarations static to avoid the linker error.

Cc: Mark Pearson <markpearson@lenovo.com>
Cc: Dell.Client.Kernel@dell.com
Reported-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210609145952.113393-1-hdegoede@redhat.com
---
 drivers/platform/x86/dell/dell-wmi-sysman/sysman.c | 2 +-
 drivers/platform/x86/think-lmi.c                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
index 1378c1878658..636bdfa83284 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
@@ -25,7 +25,7 @@ struct wmi_sysman_priv wmi_priv = {
 /* reset bios to defaults */
 static const char * const reset_types[] = {"builtinsafe", "lastknowngood", "factory", "custom"};
 static int reset_option = -1;
-struct class *fw_attr_class;
+static struct class *fw_attr_class;
 
 
 /**
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 782d8e3fe7a1..c6413b906e4a 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -134,7 +134,7 @@ static const char * const encoding_options[] = {
 	[TLMI_ENCODING_SCANCODE] = "scancode",
 };
 static struct think_lmi tlmi_priv;
-struct class *fw_attr_class;
+static struct class *fw_attr_class;
 
 /* ------ Utility functions ------------*/
 /* Convert BIOS WMI error string to suitable error code */

From 86bb2e3daf5d84c02ef40da8bf26f7b851aaa8a7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Jun 2021 17:17:51 +0200
Subject: [PATCH 1961/3292] platform/x86: think-lmi: Fix check for admin
 password being set

tlmi_priv.pwd_admin->password is an array (not a pointer), so the correct
way to check for the password being set is to check for
tlmi_priv.pwd_admin->password[0] != 0.

For the second check, replace the check with checking that auth_str is
set instead.

Cc: Mark Pearson <markpearson@lenovo.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reported-by: coverity-bot <keescook+coverity-bot@chromium.org>
Addresses-Coverity-ID: 1505158 ("NO_EFFECT")
Fixes: a7314b3b1d8a ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210609151752.156902-1-hdegoede@redhat.com
---
 drivers/platform/x86/think-lmi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index c6413b906e4a..4881de4e669d 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -537,7 +537,7 @@ static ssize_t current_value_store(struct kobject *kobj,
 	p = strchrnul(new_setting, '\n');
 	*p = '\0';
 
-	if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password) {
+	if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) {
 		auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s;",
 				tlmi_priv.pwd_admin->password,
 				encoding_options[tlmi_priv.pwd_admin->encoding],
@@ -563,7 +563,7 @@ static ssize_t current_value_store(struct kobject *kobj,
 	if (ret)
 		goto out;
 
-	if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password)
+	if (auth_str)
 		ret = tlmi_save_bios_settings(auth_str);
 	else
 		ret = tlmi_save_bios_settings("");

From 0ddcf3a6b44209e73fb21b3c53e258884ea90cef Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Jun 2021 17:17:52 +0200
Subject: [PATCH 1962/3292] platform/x86: think-lmi: Avoid potential read
 before start of the buffer

If length equals 0 then reading buf[length-1] will read before the start
of the buffer.

Avoid this by moving the length == 0 check up.

Cc: Mark Pearson <markpearson@lenovo.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210609151752.156902-2-hdegoede@redhat.com
---
 drivers/platform/x86/think-lmi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 4881de4e669d..7771c9359449 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -443,10 +443,13 @@ static ssize_t kbdlang_store(struct kobject *kobj,
 	int length;
 
 	length = strlen(buf);
+	if (!length)
+		return -EINVAL;
+
 	if (buf[length-1] == '\n')
 		length--;
 
-	if (!length || (length >= TLMI_LANG_MAXLEN))
+	if (length >= TLMI_LANG_MAXLEN)
 		return -EINVAL;
 
 	memcpy(setting->kbdlang, buf, length);

From 039e6a3117b0f4c4c4884a560f68cb13d55ad0c4 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 16 Jun 2021 10:50:09 +0800
Subject: [PATCH 1963/3292] platform/x86: think-lmi: Add missing
 MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1623811809-65099-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 7771c9359449..c6c9fbb8a53e 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -873,6 +873,7 @@ static const struct wmi_device_id tlmi_id_table[] = {
 	{ .guid_string = LENOVO_BIOS_SETTING_GUID },
 	{ }
 };
+MODULE_DEVICE_TABLE(wmi, tlmi_id_table);
 
 static struct wmi_driver tlmi_driver = {
 	.driver = {

From 33ec58bd640a62a242d2e3e5f98ff7c478f1466c Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Mon, 14 Jun 2021 21:46:35 +0200
Subject: [PATCH 1964/3292] MAINTAINERS: Update IRC link for Surface System
 Aggregator subsystem

We have moved to libera.chat. Update the link accordingly.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210614194635.1681519-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 771ea74e7e2d..fecb2e42f956 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12190,7 +12190,7 @@ M:	Maximilian Luz <luzmaximilian@gmail.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 W:	https://github.com/linux-surface/surface-aggregator-module
-C:	irc://chat.freenode.net/##linux-surface
+C:	irc://irc.libera.chat/linux-surface
 F:	Documentation/driver-api/surface_aggregator/
 F:	drivers/platform/surface/aggregator/
 F:	drivers/platform/surface/surface_acpi_notify.c

From 5de691bffe57fd0fc2b4dcdcf13815c56d11db10 Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Thu, 3 Jun 2021 23:40:06 +0100
Subject: [PATCH 1965/3292] platform/x86: Add intel_skl_int3472 driver

ACPI devices with _HID INT3472 are currently matched to the tps68470
driver, however this does not cover all situations in which that _HID
occurs. We've encountered three possibilities:

1. On Chrome OS devices, an ACPI device with _HID INT3472 (representing
a physical TPS68470 device) that requires a GPIO and OpRegion driver
2. On devices designed for Windows, an ACPI device with _HID INT3472
(again representing a physical TPS68470 device) which requires GPIO,
Clock and Regulator drivers.
3. On other devices designed for Windows, an ACPI device with _HID
INT3472 which does **not** represent a physical TPS68470, and is instead
used as a dummy device to group some system GPIO lines which are meant
to be consumed by the sensor that is dependent on this entry.

This commit adds a new module, registering a platform driver to deal
with the 3rd scenario plus an i2c driver to deal with #1 and #2, by
querying the CLDB buffer found against INT3472 entries to determine
which is most appropriate.

Suggested-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Scally <djrscally@gmail.com>
Link: https://lore.kernel.org/r/20210603224007.120560-6-djrscally@gmail.com
[hdegoede@redhat.com Make skl_int3472_tps68470_calc_type() static]
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS                                   |   5 +
 drivers/platform/x86/Kconfig                  |   2 +
 drivers/platform/x86/Makefile                 |   1 +
 drivers/platform/x86/intel-int3472/Kconfig    |  30 ++
 drivers/platform/x86/intel-int3472/Makefile   |   5 +
 .../intel_skl_int3472_clk_and_regulator.c     | 196 ++++++++
 .../intel-int3472/intel_skl_int3472_common.c  | 106 +++++
 .../intel-int3472/intel_skl_int3472_common.h  | 118 +++++
 .../intel_skl_int3472_discrete.c              | 417 ++++++++++++++++++
 .../intel_skl_int3472_tps68470.c              | 137 ++++++
 10 files changed, 1017 insertions(+)
 create mode 100644 drivers/platform/x86/intel-int3472/Kconfig
 create mode 100644 drivers/platform/x86/intel-int3472/Makefile
 create mode 100644 drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
 create mode 100644 drivers/platform/x86/intel-int3472/intel_skl_int3472_common.c
 create mode 100644 drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
 create mode 100644 drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
 create mode 100644 drivers/platform/x86/intel-int3472/intel_skl_int3472_tps68470.c

diff --git a/MAINTAINERS b/MAINTAINERS
index fecb2e42f956..7da4c07364fa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9385,6 +9385,11 @@ S:	Maintained
 F:	arch/x86/include/asm/intel_scu_ipc.h
 F:	drivers/platform/x86/intel_scu_*
 
+INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER
+M:	Daniel Scally <djrscally@gmail.com>
+S:	Maintained
+F:	drivers/platform/x86/intel-int3472/
+
 INTEL SPEED SELECT TECHNOLOGY
 M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 1e538ce8feaf..4fd792f2a10a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -709,6 +709,8 @@ config INTEL_CHT_INT33FE
 	  device and CONFIG_TYPEC_FUSB302=m and CONFIG_BATTERY_MAX17042=m
 	  for Type-C device.
 
+source "drivers/platform/x86/intel-int3472/Kconfig"
+
 config INTEL_HID_EVENT
 	tristate "INTEL HID Event"
 	depends on ACPI
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index ff620d653d39..a1f64613af71 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_INTEL_HID_EVENT)		+= intel-hid.o
 obj-$(CONFIG_INTEL_INT0002_VGPIO)	+= intel_int0002_vgpio.o
 obj-$(CONFIG_INTEL_MENLOW)		+= intel_menlow.o
 obj-$(CONFIG_INTEL_OAKTRAIL)		+= intel_oaktrail.o
+obj-$(CONFIG_INTEL_SKL_INT3472)		+= intel-int3472/
 obj-$(CONFIG_INTEL_VBTN)		+= intel-vbtn.o
 
 # MSI
diff --git a/drivers/platform/x86/intel-int3472/Kconfig b/drivers/platform/x86/intel-int3472/Kconfig
new file mode 100644
index 000000000000..c112878e833b
--- /dev/null
+++ b/drivers/platform/x86/intel-int3472/Kconfig
@@ -0,0 +1,30 @@
+config INTEL_SKL_INT3472
+	tristate "Intel SkyLake ACPI INT3472 Driver"
+	depends on ACPI
+	depends on COMMON_CLK && CLKDEV_LOOKUP
+	depends on I2C
+	depends on GPIOLIB
+	depends on REGULATOR
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  This driver adds power controller support for the Intel SkyCam
+	  devices found on the Intel SkyLake platforms.
+
+	  The INT3472 is a camera power controller, a logical device found on
+	  Intel Skylake-based systems that can map to different hardware
+	  devices depending on the platform. On machines designed for Chrome OS
+	  it maps to a TPS68470 camera PMIC. On machines designed for Windows,
+	  it maps to either a TP68470 camera PMIC, a uP6641Q sensor PMIC, or a
+	  set of discrete GPIOs and power gates.
+
+	  If your device was designed for Chrome OS, this driver will provide
+	  an ACPI OpRegion, which must be available before any of the devices
+	  using it are probed. For this reason, you should select Y if your
+	  device was designed for ChromeOS. For the same reason the
+	  I2C_DESIGNWARE_PLATFORM option must be set to Y too.
+
+	  Say Y or M here if you have a SkyLake device designed for use
+	  with Windows or ChromeOS. Say N here if you are not sure.
+
+	  The module will be named "intel-skl-int3472".
diff --git a/drivers/platform/x86/intel-int3472/Makefile b/drivers/platform/x86/intel-int3472/Makefile
new file mode 100644
index 000000000000..48bd97f0a04e
--- /dev/null
+++ b/drivers/platform/x86/intel-int3472/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_INTEL_SKL_INT3472)		+= intel_skl_int3472.o
+intel_skl_int3472-objs			:= intel_skl_int3472_common.o \
+					   intel_skl_int3472_discrete.o \
+					   intel_skl_int3472_tps68470.o \
+					   intel_skl_int3472_clk_and_regulator.o
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
new file mode 100644
index 000000000000..ceee860e2c07
--- /dev/null
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dan Scally <djrscally@gmail.com> */
+
+#include <linux/acpi.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/driver.h>
+#include <linux/slab.h>
+
+#include "intel_skl_int3472_common.h"
+
+/*
+ * The regulators have to have .ops to be valid, but the only ops we actually
+ * support are .enable and .disable which are handled via .ena_gpiod. Pass an
+ * empty struct to clear the check without lying about capabilities.
+ */
+static const struct regulator_ops int3472_gpio_regulator_ops;
+
+static int skl_int3472_clk_prepare(struct clk_hw *hw)
+{
+	struct int3472_gpio_clock *clk = to_int3472_clk(hw);
+
+	gpiod_set_value_cansleep(clk->ena_gpio, 1);
+	gpiod_set_value_cansleep(clk->led_gpio, 1);
+
+	return 0;
+}
+
+static void skl_int3472_clk_unprepare(struct clk_hw *hw)
+{
+	struct int3472_gpio_clock *clk = to_int3472_clk(hw);
+
+	gpiod_set_value_cansleep(clk->ena_gpio, 0);
+	gpiod_set_value_cansleep(clk->led_gpio, 0);
+}
+
+static int skl_int3472_clk_enable(struct clk_hw *hw)
+{
+	/*
+	 * We're just turning a GPIO on to enable the clock, which operation
+	 * has the potential to sleep. Given .enable() cannot sleep, but
+	 * .prepare() can, we toggle the GPIO in .prepare() instead. Thus,
+	 * nothing to do here.
+	 */
+	return 0;
+}
+
+static void skl_int3472_clk_disable(struct clk_hw *hw)
+{
+	/* Likewise, nothing to do here... */
+}
+
+static unsigned int skl_int3472_get_clk_frequency(struct int3472_discrete_device *int3472)
+{
+	union acpi_object *obj;
+	unsigned int freq;
+
+	obj = skl_int3472_get_acpi_buffer(int3472->sensor, "SSDB");
+	if (IS_ERR(obj))
+		return 0; /* report rate as 0 on error */
+
+	if (obj->buffer.length < CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET + sizeof(u32)) {
+		dev_err(int3472->dev, "The buffer is too small\n");
+		kfree(obj);
+		return 0;
+	}
+
+	freq = *(u32 *)(obj->buffer.pointer + CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET);
+
+	kfree(obj);
+	return freq;
+}
+
+static unsigned long skl_int3472_clk_recalc_rate(struct clk_hw *hw,
+						 unsigned long parent_rate)
+{
+	struct int3472_gpio_clock *clk = to_int3472_clk(hw);
+
+	return clk->frequency;
+}
+
+static const struct clk_ops skl_int3472_clock_ops = {
+	.prepare = skl_int3472_clk_prepare,
+	.unprepare = skl_int3472_clk_unprepare,
+	.enable = skl_int3472_clk_enable,
+	.disable = skl_int3472_clk_disable,
+	.recalc_rate = skl_int3472_clk_recalc_rate,
+};
+
+int skl_int3472_register_clock(struct int3472_discrete_device *int3472)
+{
+	struct clk_init_data init = {
+		.ops = &skl_int3472_clock_ops,
+		.flags = CLK_GET_RATE_NOCACHE,
+	};
+	int ret;
+
+	init.name = kasprintf(GFP_KERNEL, "%s-clk",
+			      acpi_dev_name(int3472->adev));
+	if (!init.name)
+		return -ENOMEM;
+
+	int3472->clock.frequency = skl_int3472_get_clk_frequency(int3472);
+
+	int3472->clock.clk_hw.init = &init;
+	int3472->clock.clk = clk_register(&int3472->adev->dev,
+					  &int3472->clock.clk_hw);
+	if (IS_ERR(int3472->clock.clk)) {
+		ret = PTR_ERR(int3472->clock.clk);
+		goto out_free_init_name;
+	}
+
+	int3472->clock.cl = clkdev_create(int3472->clock.clk, NULL,
+					  int3472->sensor_name);
+	if (!int3472->clock.cl) {
+		ret = -ENOMEM;
+		goto err_unregister_clk;
+	}
+
+	kfree(init.name);
+	return 0;
+
+err_unregister_clk:
+	clk_unregister(int3472->clock.clk);
+out_free_init_name:
+	kfree(init.name);
+
+	return ret;
+}
+
+int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
+				   struct acpi_resource *ares)
+{
+	char *path = ares->data.gpio.resource_source.string_ptr;
+	const struct int3472_sensor_config *sensor_config;
+	struct regulator_consumer_supply supply_map;
+	struct regulator_init_data init_data = { };
+	struct regulator_config cfg = { };
+	int ret;
+
+	sensor_config = int3472->sensor_config;
+	if (IS_ERR(sensor_config)) {
+		dev_err(int3472->dev, "No sensor module config\n");
+		return PTR_ERR(sensor_config);
+	}
+
+	if (!sensor_config->supply_map.supply) {
+		dev_err(int3472->dev, "No supply name defined\n");
+		return -ENODEV;
+	}
+
+	init_data.constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
+	init_data.num_consumer_supplies = 1;
+	supply_map = sensor_config->supply_map;
+	supply_map.dev_name = int3472->sensor_name;
+	init_data.consumer_supplies = &supply_map;
+
+	snprintf(int3472->regulator.regulator_name,
+		 sizeof(int3472->regulator.regulator_name), "%s-regulator",
+		 acpi_dev_name(int3472->adev));
+	snprintf(int3472->regulator.supply_name,
+		 GPIO_REGULATOR_SUPPLY_NAME_LENGTH, "supply-0");
+
+	int3472->regulator.rdesc = INT3472_REGULATOR(
+						int3472->regulator.regulator_name,
+						int3472->regulator.supply_name,
+						&int3472_gpio_regulator_ops);
+
+	int3472->regulator.gpio = acpi_get_and_request_gpiod(path,
+							     ares->data.gpio.pin_table[0],
+							     "int3472,regulator");
+	if (IS_ERR(int3472->regulator.gpio)) {
+		dev_err(int3472->dev, "Failed to get regulator GPIO line\n");
+		return PTR_ERR(int3472->regulator.gpio);
+	}
+
+	cfg.dev = &int3472->adev->dev;
+	cfg.init_data = &init_data;
+	cfg.ena_gpiod = int3472->regulator.gpio;
+
+	int3472->regulator.rdev = regulator_register(&int3472->regulator.rdesc,
+						     &cfg);
+	if (IS_ERR(int3472->regulator.rdev)) {
+		ret = PTR_ERR(int3472->regulator.rdev);
+		goto err_free_gpio;
+	}
+
+	return 0;
+
+err_free_gpio:
+	gpiod_put(int3472->regulator.gpio);
+
+	return ret;
+}
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.c
new file mode 100644
index 000000000000..497e74fba75f
--- /dev/null
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dan Scally <djrscally@gmail.com> */
+
+#include <linux/acpi.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "intel_skl_int3472_common.h"
+
+union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev, char *id)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_handle handle = adev->handle;
+	union acpi_object *obj;
+	acpi_status status;
+
+	status = acpi_evaluate_object(handle, id, NULL, &buffer);
+	if (ACPI_FAILURE(status))
+		return ERR_PTR(-ENODEV);
+
+	obj = buffer.pointer;
+	if (!obj)
+		return ERR_PTR(-ENODEV);
+
+	if (obj->type != ACPI_TYPE_BUFFER) {
+		acpi_handle_err(handle, "%s object is not an ACPI buffer\n", id);
+		kfree(obj);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return obj;
+}
+
+int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb)
+{
+	union acpi_object *obj;
+	int ret;
+
+	obj = skl_int3472_get_acpi_buffer(adev, "CLDB");
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	if (obj->buffer.length > sizeof(*cldb)) {
+		acpi_handle_err(adev->handle, "The CLDB buffer is too large\n");
+		ret = -EINVAL;
+		goto out_free_obj;
+	}
+
+	memcpy(cldb, obj->buffer.pointer, obj->buffer.length);
+	ret = 0;
+
+out_free_obj:
+	kfree(obj);
+	return ret;
+}
+
+static const struct acpi_device_id int3472_device_id[] = {
+	{ "INT3472", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, int3472_device_id);
+
+static struct platform_driver int3472_discrete = {
+	.driver = {
+		.name = "int3472-discrete",
+		.acpi_match_table = int3472_device_id,
+	},
+	.probe = skl_int3472_discrete_probe,
+	.remove = skl_int3472_discrete_remove,
+};
+
+static struct i2c_driver int3472_tps68470 = {
+	.driver = {
+		.name = "int3472-tps68470",
+		.acpi_match_table = int3472_device_id,
+	},
+	.probe_new = skl_int3472_tps68470_probe,
+};
+
+static int skl_int3472_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&int3472_discrete);
+	if (ret)
+		return ret;
+
+	ret = i2c_register_driver(THIS_MODULE, &int3472_tps68470);
+	if (ret)
+		platform_driver_unregister(&int3472_discrete);
+
+	return ret;
+}
+module_init(skl_int3472_init);
+
+static void skl_int3472_exit(void)
+{
+	platform_driver_unregister(&int3472_discrete);
+	i2c_del_driver(&int3472_tps68470);
+}
+module_exit(skl_int3472_exit);
+
+MODULE_DESCRIPTION("Intel SkyLake INT3472 ACPI Device Driver");
+MODULE_AUTHOR("Daniel Scally <djrscally@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
new file mode 100644
index 000000000000..6fdf78584219
--- /dev/null
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Author: Dan Scally <djrscally@gmail.com> */
+
+#ifndef _INTEL_SKL_INT3472_H
+#define _INTEL_SKL_INT3472_H
+
+#include <linux/clk-provider.h>
+#include <linux/gpio/machine.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/types.h>
+
+/* FIXME drop this once the I2C_DEV_NAME_FORMAT macro has been added to include/linux/i2c.h */
+#ifndef I2C_DEV_NAME_FORMAT
+#define I2C_DEV_NAME_FORMAT					"i2c-%s"
+#endif
+
+/* PMIC GPIO Types */
+#define INT3472_GPIO_TYPE_RESET					0x00
+#define INT3472_GPIO_TYPE_POWERDOWN				0x01
+#define INT3472_GPIO_TYPE_POWER_ENABLE				0x0b
+#define INT3472_GPIO_TYPE_CLK_ENABLE				0x0c
+#define INT3472_GPIO_TYPE_PRIVACY_LED				0x0d
+
+#define INT3472_PDEV_MAX_NAME_LEN				23
+#define INT3472_MAX_SENSOR_GPIOS				3
+
+#define GPIO_REGULATOR_NAME_LENGTH				21
+#define GPIO_REGULATOR_SUPPLY_NAME_LENGTH			9
+
+#define CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET			86
+
+#define INT3472_REGULATOR(_name, _supply, _ops)			\
+	(const struct regulator_desc) {				\
+		.name = _name,					\
+		.supply_name = _supply,				\
+		.type = REGULATOR_VOLTAGE,			\
+		.ops = _ops,					\
+		.owner = THIS_MODULE,				\
+	}
+
+#define to_int3472_clk(hw)					\
+	container_of(hw, struct int3472_gpio_clock, clk_hw)
+
+#define to_int3472_device(clk)					\
+	container_of(clk, struct int3472_discrete_device, clock)
+
+struct acpi_device;
+struct i2c_client;
+struct platform_device;
+
+struct int3472_cldb {
+	u8 version;
+	/*
+	 * control logic type
+	 * 0: UNKNOWN
+	 * 1: DISCRETE(CRD-D)
+	 * 2: PMIC TPS68470
+	 * 3: PMIC uP6641
+	 */
+	u8 control_logic_type;
+	u8 control_logic_id;
+	u8 sensor_card_sku;
+	u8 reserved[28];
+};
+
+struct int3472_gpio_function_remap {
+	const char *documented;
+	const char *actual;
+};
+
+struct int3472_sensor_config {
+	const char *sensor_module_name;
+	struct regulator_consumer_supply supply_map;
+	const struct int3472_gpio_function_remap *function_maps;
+};
+
+struct int3472_discrete_device {
+	struct acpi_device *adev;
+	struct device *dev;
+	struct acpi_device *sensor;
+	const char *sensor_name;
+
+	const struct int3472_sensor_config *sensor_config;
+
+	struct int3472_gpio_regulator {
+		char regulator_name[GPIO_REGULATOR_NAME_LENGTH];
+		char supply_name[GPIO_REGULATOR_SUPPLY_NAME_LENGTH];
+		struct gpio_desc *gpio;
+		struct regulator_dev *rdev;
+		struct regulator_desc rdesc;
+	} regulator;
+
+	struct int3472_gpio_clock {
+		struct clk *clk;
+		struct clk_hw clk_hw;
+		struct clk_lookup *cl;
+		struct gpio_desc *ena_gpio;
+		struct gpio_desc *led_gpio;
+		u32 frequency;
+	} clock;
+
+	unsigned int ngpios; /* how many GPIOs have we seen */
+	unsigned int n_sensor_gpios; /* how many have we mapped to sensor */
+	struct gpiod_lookup_table gpios;
+};
+
+int skl_int3472_discrete_probe(struct platform_device *pdev);
+int skl_int3472_discrete_remove(struct platform_device *pdev);
+int skl_int3472_tps68470_probe(struct i2c_client *client);
+union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev,
+					       char *id);
+int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb);
+int skl_int3472_register_clock(struct int3472_discrete_device *int3472);
+int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
+				   struct acpi_resource *ares);
+
+#endif
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
new file mode 100644
index 000000000000..8c18dbff1c43
--- /dev/null
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dan Scally <djrscally@gmail.com> */
+
+#include <linux/acpi.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/platform_device.h>
+#include <linux/uuid.h>
+
+#include "intel_skl_int3472_common.h"
+
+/*
+ * 79234640-9e10-4fea-a5c1-b5aa8b19756f
+ * This _DSM GUID returns information about the GPIO lines mapped to a
+ * discrete INT3472 device. Function number 1 returns a count of the GPIO
+ * lines that are mapped. Subsequent functions return 32 bit ints encoding
+ * information about the GPIO line, including its purpose.
+ */
+static const guid_t int3472_gpio_guid =
+	GUID_INIT(0x79234640, 0x9e10, 0x4fea,
+		  0xa5, 0xc1, 0xb5, 0xaa, 0x8b, 0x19, 0x75, 0x6f);
+
+/*
+ * 822ace8f-2814-4174-a56b-5f029fe079ee
+ * This _DSM GUID returns a string from the sensor device, which acts as a
+ * module identifier.
+ */
+static const guid_t cio2_sensor_module_guid =
+	GUID_INIT(0x822ace8f, 0x2814, 0x4174,
+		  0xa5, 0x6b, 0x5f, 0x02, 0x9f, 0xe0, 0x79, 0xee);
+
+/*
+ * Here follows platform specific mapping information that we can pass to
+ * the functions mapping resources to the sensors. Where the sensors have
+ * a power enable pin defined in DSDT we need to provide a supply name so
+ * the sensor drivers can find the regulator. The device name will be derived
+ * from the sensor's ACPI device within the code. Optionally, we can provide a
+ * NULL terminated array of function name mappings to deal with any platform
+ * specific deviations from the documented behaviour of GPIOs.
+ *
+ * Map a GPIO function name to NULL to prevent the driver from mapping that
+ * GPIO at all.
+ */
+
+static const struct int3472_gpio_function_remap ov2680_gpio_function_remaps[] = {
+	{ "reset", NULL },
+	{ "powerdown", "reset" },
+	{ }
+};
+
+static const struct int3472_sensor_config int3472_sensor_configs[] = {
+	/* Lenovo Miix 510-12ISK - OV2680, Front */
+	{ "GNDF140809R", { 0 }, ov2680_gpio_function_remaps },
+	/* Lenovo Miix 510-12ISK - OV5648, Rear */
+	{ "GEFF150023R", REGULATOR_SUPPLY("avdd", NULL), NULL },
+	/* Surface Go 1&2 - OV5693, Front */
+	{ "YHCU", REGULATOR_SUPPLY("avdd", NULL), NULL },
+};
+
+static const struct int3472_sensor_config *
+skl_int3472_get_sensor_module_config(struct int3472_discrete_device *int3472)
+{
+	union acpi_object *obj;
+	unsigned int i;
+
+	obj = acpi_evaluate_dsm_typed(int3472->sensor->handle,
+				      &cio2_sensor_module_guid, 0x00,
+				      0x01, NULL, ACPI_TYPE_STRING);
+
+	if (!obj) {
+		dev_err(int3472->dev,
+			"Failed to get sensor module string from _DSM\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (obj->string.type != ACPI_TYPE_STRING) {
+		dev_err(int3472->dev,
+			"Sensor _DSM returned a non-string value\n");
+
+		ACPI_FREE(obj);
+		return ERR_PTR(-EINVAL);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(int3472_sensor_configs); i++) {
+		if (!strcmp(int3472_sensor_configs[i].sensor_module_name,
+			    obj->string.pointer))
+			break;
+	}
+
+	ACPI_FREE(obj);
+
+	if (i >= ARRAY_SIZE(int3472_sensor_configs))
+		return ERR_PTR(-EINVAL);
+
+	return &int3472_sensor_configs[i];
+}
+
+static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int3472,
+					  struct acpi_resource *ares,
+					  const char *func, u32 polarity)
+{
+	char *path = ares->data.gpio.resource_source.string_ptr;
+	const struct int3472_sensor_config *sensor_config;
+	struct gpiod_lookup *table_entry;
+	struct acpi_device *adev;
+	acpi_handle handle;
+	acpi_status status;
+	int ret;
+
+	if (int3472->n_sensor_gpios >= INT3472_MAX_SENSOR_GPIOS) {
+		dev_warn(int3472->dev, "Too many GPIOs mapped\n");
+		return -EINVAL;
+	}
+
+	sensor_config = int3472->sensor_config;
+	if (!IS_ERR(sensor_config) && sensor_config->function_maps) {
+		const struct int3472_gpio_function_remap *remap;
+
+		for (remap = sensor_config->function_maps; remap->documented; remap++) {
+			if (!strcmp(func, remap->documented)) {
+				func = remap->actual;
+				break;
+			}
+		}
+	}
+
+	/* Functions mapped to NULL should not be mapped to the sensor */
+	if (!func)
+		return 0;
+
+	status = acpi_get_handle(NULL, path, &handle);
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	ret = acpi_bus_get_device(handle, &adev);
+	if (ret)
+		return -ENODEV;
+
+	table_entry = &int3472->gpios.table[int3472->n_sensor_gpios];
+	table_entry->key = acpi_dev_name(adev);
+	table_entry->chip_hwnum = ares->data.gpio.pin_table[0];
+	table_entry->con_id = func;
+	table_entry->idx = 0;
+	table_entry->flags = polarity;
+
+	int3472->n_sensor_gpios++;
+
+	return 0;
+}
+
+static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472,
+				       struct acpi_resource *ares, u8 type)
+{
+	char *path = ares->data.gpio.resource_source.string_ptr;
+	struct gpio_desc *gpio;
+
+	switch (type) {
+	case INT3472_GPIO_TYPE_CLK_ENABLE:
+		gpio = acpi_get_and_request_gpiod(path, ares->data.gpio.pin_table[0],
+						  "int3472,clk-enable");
+		if (IS_ERR(gpio))
+			return (PTR_ERR(gpio));
+
+		int3472->clock.ena_gpio = gpio;
+		break;
+	case INT3472_GPIO_TYPE_PRIVACY_LED:
+		gpio = acpi_get_and_request_gpiod(path, ares->data.gpio.pin_table[0],
+						  "int3472,privacy-led");
+		if (IS_ERR(gpio))
+			return (PTR_ERR(gpio));
+
+		int3472->clock.led_gpio = gpio;
+		break;
+	default:
+		dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type);
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * skl_int3472_handle_gpio_resources: Map PMIC resources to consuming sensor
+ * @ares: A pointer to a &struct acpi_resource
+ * @data: A pointer to a &struct int3472_discrete_device
+ *
+ * This function handles GPIO resources that are against an INT3472
+ * ACPI device, by checking the value of the corresponding _DSM entry.
+ * This will return a 32bit int, where the lowest byte represents the
+ * function of the GPIO pin:
+ *
+ * 0x00 Reset
+ * 0x01 Power down
+ * 0x0b Power enable
+ * 0x0c Clock enable
+ * 0x0d Privacy LED
+ *
+ * There are some known platform specific quirks where that does not quite
+ * hold up; for example where a pin with type 0x01 (Power down) is mapped to
+ * a sensor pin that performs a reset function or entries in _CRS and _DSM that
+ * do not actually correspond to a physical connection. These will be handled
+ * by the mapping sub-functions.
+ *
+ * GPIOs will either be mapped directly to the sensor device or else used
+ * to create clocks and regulators via the usual frameworks.
+ *
+ * Return:
+ * * 1		- To continue the loop
+ * * 0		- When all resources found are handled properly.
+ * * -EINVAL	- If the resource is not a GPIO IO resource
+ * * -ENODEV	- If the resource has no corresponding _DSM entry
+ * * -Other	- Errors propagated from one of the sub-functions.
+ */
+static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
+					     void *data)
+{
+	struct int3472_discrete_device *int3472 = data;
+	struct acpi_resource_gpio *agpio;
+	union acpi_object *obj;
+	const char *err_msg;
+	int ret;
+	u8 type;
+
+	if (!acpi_gpio_get_io_resource(ares, &agpio))
+		return 1;
+
+	/*
+	 * ngpios + 2 because the index of this _DSM function is 1-based and
+	 * the first function is just a count.
+	 */
+	obj = acpi_evaluate_dsm_typed(int3472->adev->handle,
+				      &int3472_gpio_guid, 0x00,
+				      int3472->ngpios + 2,
+				      NULL, ACPI_TYPE_INTEGER);
+
+	if (!obj) {
+		dev_warn(int3472->dev, "No _DSM entry for GPIO pin %u\n",
+			 ares->data.gpio.pin_table[0]);
+		return 1;
+	}
+
+	type = obj->integer.value & 0xff;
+
+	switch (type) {
+	case INT3472_GPIO_TYPE_RESET:
+		ret = skl_int3472_map_gpio_to_sensor(int3472, ares, "reset",
+						     GPIO_ACTIVE_LOW);
+		if (ret)
+			err_msg = "Failed to map reset pin to sensor\n";
+
+		break;
+	case INT3472_GPIO_TYPE_POWERDOWN:
+		ret = skl_int3472_map_gpio_to_sensor(int3472, ares,
+						     "powerdown",
+						     GPIO_ACTIVE_LOW);
+		if (ret)
+			err_msg = "Failed to map powerdown pin to sensor\n";
+
+		break;
+	case INT3472_GPIO_TYPE_CLK_ENABLE:
+	case INT3472_GPIO_TYPE_PRIVACY_LED:
+		ret = skl_int3472_map_gpio_to_clk(int3472, ares, type);
+		if (ret)
+			err_msg = "Failed to map GPIO to clock\n";
+
+		break;
+	case INT3472_GPIO_TYPE_POWER_ENABLE:
+		ret = skl_int3472_register_regulator(int3472, ares);
+		if (ret)
+			err_msg = "Failed to map regulator to sensor\n";
+
+		break;
+	default:
+		dev_warn(int3472->dev,
+			 "GPIO type 0x%02x unknown; the sensor may not work\n",
+			 type);
+		ret = 1;
+		break;
+	}
+
+	int3472->ngpios++;
+	ACPI_FREE(obj);
+
+	if (ret)
+		return dev_err_probe(int3472->dev, ret, err_msg);
+
+	return 0;
+}
+
+static int skl_int3472_parse_crs(struct int3472_discrete_device *int3472)
+{
+	LIST_HEAD(resource_list);
+	int ret;
+
+	/*
+	 * No error check, because not having a sensor config is not necessarily
+	 * a failure mode.
+	 */
+	int3472->sensor_config = skl_int3472_get_sensor_module_config(int3472);
+
+	ret = acpi_dev_get_resources(int3472->adev, &resource_list,
+				     skl_int3472_handle_gpio_resources,
+				     int3472);
+	if (ret)
+		goto out_free_res_list;
+
+	/*
+	 * If we find no clock enable GPIO pin then the privacy LED won't work.
+	 * We've never seen that situation, but it's possible. Warn the user so
+	 * it's clear what's happened.
+	 */
+	if (int3472->clock.ena_gpio) {
+		ret = skl_int3472_register_clock(int3472);
+		if (ret)
+			goto out_free_res_list;
+	} else {
+		if (int3472->clock.led_gpio)
+			dev_warn(int3472->dev,
+				 "No clk GPIO. The privacy LED won't work\n");
+	}
+
+	int3472->gpios.dev_id = int3472->sensor_name;
+	gpiod_add_lookup_table(&int3472->gpios);
+
+out_free_res_list:
+	acpi_dev_free_resource_list(&resource_list);
+
+	return ret;
+}
+
+int skl_int3472_discrete_probe(struct platform_device *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	struct int3472_discrete_device *int3472;
+	struct int3472_cldb cldb;
+	int ret;
+
+	ret = skl_int3472_fill_cldb(adev, &cldb);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't fill CLDB structure\n");
+		return ret;
+	}
+
+	if (cldb.control_logic_type != 1) {
+		dev_err(&pdev->dev, "Unsupported control logic type %u\n",
+			cldb.control_logic_type);
+		return -EINVAL;
+	}
+
+	/* Max num GPIOs we've seen plus a terminator */
+	int3472 = devm_kzalloc(&pdev->dev, struct_size(int3472, gpios.table,
+			       INT3472_MAX_SENSOR_GPIOS + 1), GFP_KERNEL);
+	if (!int3472)
+		return -ENOMEM;
+
+	int3472->adev = adev;
+	int3472->dev = &pdev->dev;
+	platform_set_drvdata(pdev, int3472);
+
+	int3472->sensor = acpi_dev_get_first_consumer_dev(adev);
+	if (!int3472->sensor) {
+		dev_err(&pdev->dev, "INT3472 seems to have no dependents.\n");
+		return -ENODEV;
+	}
+
+	int3472->sensor_name = devm_kasprintf(int3472->dev, GFP_KERNEL,
+					      I2C_DEV_NAME_FORMAT,
+					      acpi_dev_name(int3472->sensor));
+	if (!int3472->sensor_name) {
+		ret = -ENOMEM;
+		goto err_put_sensor;
+	}
+
+	/*
+	 * Initialising this list means we can call gpiod_remove_lookup_table()
+	 * in failure paths without issue.
+	 */
+	INIT_LIST_HEAD(&int3472->gpios.list);
+
+	ret = skl_int3472_parse_crs(int3472);
+	if (ret) {
+		skl_int3472_discrete_remove(pdev);
+		return ret;
+	}
+
+	return 0;
+
+err_put_sensor:
+	acpi_dev_put(int3472->sensor);
+
+	return ret;
+}
+
+int skl_int3472_discrete_remove(struct platform_device *pdev)
+{
+	struct int3472_discrete_device *int3472 = platform_get_drvdata(pdev);
+
+	gpiod_remove_lookup_table(&int3472->gpios);
+	regulator_unregister(int3472->regulator.rdev);
+	clk_unregister(int3472->clock.clk);
+
+	if (int3472->clock.cl)
+		clkdev_drop(int3472->clock.cl);
+
+	gpiod_put(int3472->regulator.gpio);
+	gpiod_put(int3472->clock.ena_gpio);
+	gpiod_put(int3472->clock.led_gpio);
+
+	return 0;
+}
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_tps68470.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_tps68470.c
new file mode 100644
index 000000000000..c05b4cf502fe
--- /dev/null
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_tps68470.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dan Scally <djrscally@gmail.com> */
+
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/tps68470.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "intel_skl_int3472_common.h"
+
+#define DESIGNED_FOR_CHROMEOS		1
+#define DESIGNED_FOR_WINDOWS		2
+
+static const struct mfd_cell tps68470_cros[] = {
+	{ .name = "tps68470-gpio" },
+	{ .name = "tps68470_pmic_opregion" },
+};
+
+static const struct mfd_cell tps68470_win[] = {
+	{ .name = "tps68470-gpio" },
+	{ .name = "tps68470-clk" },
+	{ .name = "tps68470-regulator" },
+};
+
+static const struct regmap_config tps68470_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = TPS68470_REG_MAX,
+};
+
+static int tps68470_chip_init(struct device *dev, struct regmap *regmap)
+{
+	unsigned int version;
+	int ret;
+
+	/* Force software reset */
+	ret = regmap_write(regmap, TPS68470_REG_RESET, TPS68470_REG_RESET_MASK);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(regmap, TPS68470_REG_REVID, &version);
+	if (ret) {
+		dev_err(dev, "Failed to read revision register: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(dev, "TPS68470 REVID: 0x%02x\n", version);
+
+	return 0;
+}
+
+/** skl_int3472_tps68470_calc_type: Check what platform a device is designed for
+ * @adev: A pointer to a &struct acpi_device
+ *
+ * Check CLDB buffer against the PMIC's adev. If present, then we check
+ * the value of control_logic_type field and follow one of the
+ * following scenarios:
+ *
+ *	1. No CLDB - likely ACPI tables designed for ChromeOS. We
+ *	create platform devices for the GPIOs and OpRegion drivers.
+ *
+ *	2. CLDB, with control_logic_type = 2 - probably ACPI tables
+ *	made for Windows 2-in-1 platforms. Register pdevs for GPIO,
+ *	Clock and Regulator drivers to bind to.
+ *
+ *	3. Any other value in control_logic_type, we should never have
+ *	gotten to this point; fail probe and return.
+ *
+ * Return:
+ * * 1		Device intended for ChromeOS
+ * * 2		Device intended for Windows
+ * * -EINVAL	Where @adev has an object named CLDB but it does not conform to
+ *		our expectations
+ */
+static int skl_int3472_tps68470_calc_type(struct acpi_device *adev)
+{
+	struct int3472_cldb cldb = { 0 };
+	int ret;
+
+	/*
+	 * A CLDB buffer that exists, but which does not match our expectations
+	 * should trigger an error so we don't blindly continue.
+	 */
+	ret = skl_int3472_fill_cldb(adev, &cldb);
+	if (ret && ret != -ENODEV)
+		return ret;
+
+	if (ret)
+		return DESIGNED_FOR_CHROMEOS;
+
+	if (cldb.control_logic_type != 2)
+		return -EINVAL;
+
+	return DESIGNED_FOR_WINDOWS;
+}
+
+int skl_int3472_tps68470_probe(struct i2c_client *client)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&client->dev);
+	struct regmap *regmap;
+	int device_type;
+	int ret;
+
+	regmap = devm_regmap_init_i2c(client, &tps68470_regmap_config);
+	if (IS_ERR(regmap)) {
+		dev_err(&client->dev, "Failed to create regmap: %ld\n", PTR_ERR(regmap));
+		return PTR_ERR(regmap);
+	}
+
+	i2c_set_clientdata(client, regmap);
+
+	ret = tps68470_chip_init(&client->dev, regmap);
+	if (ret < 0) {
+		dev_err(&client->dev, "TPS68470 init error %d\n", ret);
+		return ret;
+	}
+
+	device_type = skl_int3472_tps68470_calc_type(adev);
+	switch (device_type) {
+	case DESIGNED_FOR_WINDOWS:
+		ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE,
+					   tps68470_win, ARRAY_SIZE(tps68470_win),
+					   NULL, 0, NULL);
+		break;
+	case DESIGNED_FOR_CHROMEOS:
+		ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE,
+					   tps68470_cros, ARRAY_SIZE(tps68470_cros),
+					   NULL, 0, NULL);
+		break;
+	default:
+		dev_err(&client->dev, "Failed to add MFD devices\n");
+		return device_type;
+	}
+
+	return ret;
+}

From ee55b92a7391bf871939330f662651b54be51b73 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 15 Jun 2021 13:45:55 -0400
Subject: [PATCH 1966/3292] dm writecache: flush origin device when writing and
 cache is full

Commit d53f1fafec9d086f1c5166436abefdaef30e0363 ("dm writecache: do
direct write if the cache is full") changed dm-writecache, so that it
writes directly to the origin device if the cache is full.
Unfortunately, it doesn't forward flush requests to the origin device,
so that there is a bug where flushes are being ignored.

Fix this by adding missing flush forwarding.

For PMEM mode, we fix this bug by disabling direct writes to the origin
device, because it performs better.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: d53f1fafec9d ("dm writecache: do direct write if the cache is full")
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 28bb6890fcf4..ddd368e0491d 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1297,8 +1297,12 @@ static int writecache_map(struct dm_target *ti, struct bio *bio)
 			writecache_flush(wc);
 			if (writecache_has_error(wc))
 				goto unlock_error;
+			if (unlikely(wc->cleaner))
+				goto unlock_remap_origin;
 			goto unlock_submit;
 		} else {
+			if (dm_bio_get_target_bio_nr(bio))
+				goto unlock_remap_origin;
 			writecache_offload_bio(wc, bio);
 			goto unlock_return;
 		}
@@ -1377,7 +1381,7 @@ read_next_block:
 			}
 			e = writecache_pop_from_freelist(wc, (sector_t)-1);
 			if (unlikely(!e)) {
-				if (!found_entry) {
+				if (!WC_MODE_PMEM(wc) && !found_entry) {
 direct_write:
 					e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
 					if (e) {
@@ -2484,7 +2488,7 @@ overflow:
 		goto bad;
 	}
 
-	ti->num_flush_bios = 1;
+	ti->num_flush_bios = WC_MODE_PMEM(wc) ? 1 : 2;
 	ti->flush_supported = true;
 	ti->num_discard_bios = 1;
 

From fda0b5ba9d5a9f6bfab9bc195f7a8fce13aedf61 Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Mon, 14 Jun 2021 14:41:09 -0700
Subject: [PATCH 1967/3292] docs: block/bfq: describe per-device weight

The functionality of setting per-device weight for BFQ was added
in v5.4 (commit 795fe54c2a828099), but the documentation was never
updated.

While at it, improve formatting a bit.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
Link: https://lore.kernel.org/r/20210614214109.207430-1-kolyshkin@gmail.com
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/block/bfq-iosched.rst | 38 ++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst
index 66c5a4e54130..df3a8a47f58c 100644
--- a/Documentation/block/bfq-iosched.rst
+++ b/Documentation/block/bfq-iosched.rst
@@ -553,20 +553,36 @@ throughput sustainable with bfq, because updating the blkio.bfq.*
 stats is rather costly, especially for some of the stats enabled by
 CONFIG_BFQ_CGROUP_DEBUG.
 
-Parameters to set
------------------
+Parameters
+----------
 
-For each group, there is only the following parameter to set.
+For each group, the following parameters can be set:
 
-weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
-group inside its parent. Available values: 1..1000 (default 100). The
-linear mapping between ioprio and weights, described at the beginning
-of the tunable section, is still valid, but all weights higher than
-IOPRIO_BE_NR*10 are mapped to ioprio 0.
+  weight
+        This specifies the default weight for the cgroup inside its parent.
+        Available values: 1..1000 (default: 100).
 
-Recall that, if low-latency is set, then BFQ automatically raises the
-weight of the queues associated with interactive and soft real-time
-applications. Unset this tunable if you need/want to control weights.
+        For cgroup v1, it is set by writing the value to `blkio.bfq.weight`.
+
+        For cgroup v2, it is set by writing the value to `io.bfq.weight`.
+        (with an optional prefix of `default` and a space).
+
+        The linear mapping between ioprio and weights, described at the beginning
+        of the tunable section, is still valid, but all weights higher than
+        IOPRIO_BE_NR*10 are mapped to ioprio 0.
+
+        Recall that, if low-latency is set, then BFQ automatically raises the
+        weight of the queues associated with interactive and soft real-time
+        applications. Unset this tunable if you need/want to control weights.
+
+  weight_device
+        This specifies a per-device weight for the cgroup. The syntax is
+        `minor:major weight`. A weight of `0` may be used to reset to the default
+        weight.
+
+        For cgroup v1, it is set by writing the value to `blkio.bfq.weight_device`.
+
+        For cgroup v2, the file name is `io.bfq.weight`.
 
 
 [1]

From 37fe403898b2fa5fc9d3616fcda2ee6629318aab Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Thu, 10 Jun 2021 20:07:36 -0700
Subject: [PATCH 1968/3292] docs/cgroup-v1/blkio: stop abusing itemized list

Fix many formatting issues by stop (ab)using itemized lists for
everything (mostly replaced by definition lists).

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 .../cgroup-v1/blkio-controller.rst            | 130 +++++++++---------
 1 file changed, 67 insertions(+), 63 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v1/blkio-controller.rst b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
index 36d43ae7dc13..8101dcba381d 100644
--- a/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
+++ b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
@@ -17,36 +17,37 @@ level logical devices like device mapper.
 
 HOWTO
 =====
+
 Throttling/Upper Limit policy
 -----------------------------
-- Enable Block IO controller::
+Enable Block IO controller::
 
 	CONFIG_BLK_CGROUP=y
 
-- Enable throttling in block layer::
+Enable throttling in block layer::
 
 	CONFIG_BLK_DEV_THROTTLING=y
 
-- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
+Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
 
         mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
 
-- Specify a bandwidth rate on particular device for root group. The format
-  for policy is "<major>:<minor>  <bytes_per_second>"::
+Specify a bandwidth rate on particular device for root group. The format
+for policy is "<major>:<minor>  <bytes_per_second>"::
 
         echo "8:16  1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
 
-  Above will put a limit of 1MB/second on reads happening for root group
-  on device having major/minor number 8:16.
+This will put a limit of 1MB/second on reads happening for root group
+on device having major/minor number 8:16.
 
-- Run dd to read a file and see if rate is throttled to 1MB/s or not::
+Run dd to read a file and see if rate is throttled to 1MB/s or not::
 
         # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
         1024+0 records in
         1024+0 records out
         4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
 
- Limits for writes can be put using blkio.throttle.write_bps_device file.
+Limits for writes can be put using blkio.throttle.write_bps_device file.
 
 Hierarchical Cgroups
 ====================
@@ -79,28 +80,31 @@ following::
 
 Various user visible config options
 ===================================
-CONFIG_BLK_CGROUP
-	- Block IO controller.
 
-CONFIG_BFQ_CGROUP_DEBUG
-	- Debug help. Right now some additional stats file show up in cgroup
+  CONFIG_BLK_CGROUP
+	  Block IO controller.
+
+  CONFIG_BFQ_CGROUP_DEBUG
+	  Debug help. Right now some additional stats file show up in cgroup
 	  if this option is enabled.
 
-CONFIG_BLK_DEV_THROTTLING
-	- Enable block device throttling support in block layer.
+  CONFIG_BLK_DEV_THROTTLING
+	  Enable block device throttling support in block layer.
 
 Details of cgroup files
 =======================
+
 Proportional weight policy files
 --------------------------------
-- blkio.weight
-	- Specifies per cgroup weight. This is default weight of the group
+
+  blkio.weight
+	  Specifies per cgroup weight. This is default weight of the group
 	  on all the devices until and unless overridden by per device rule.
 	  (See blkio.weight_device).
 	  Currently allowed range of weights is from 10 to 1000.
 
-- blkio.weight_device
-	- One can specify per cgroup per device rules using this interface.
+  blkio.weight_device
+	  One can specify per cgroup per device rules using this interface.
 	  These rules override the default value of group weight as specified
 	  by blkio.weight.
 
@@ -130,34 +134,34 @@ Proportional weight policy files
 	    dev     weight
 	    8:16    300
 
-- blkio.time
-	- disk time allocated to cgroup per device in milliseconds. First
+  blkio.time
+	  Disk time allocated to cgroup per device in milliseconds. First
 	  two fields specify the major and minor number of the device and
 	  third field specifies the disk time allocated to group in
 	  milliseconds.
 
-- blkio.sectors
-	- number of sectors transferred to/from disk by the group. First
+  blkio.sectors
+	  Number of sectors transferred to/from disk by the group. First
 	  two fields specify the major and minor number of the device and
 	  third field specifies the number of sectors transferred by the
 	  group to/from the device.
 
-- blkio.io_service_bytes
-	- Number of bytes transferred to/from the disk by the group. These
+  blkio.io_service_bytes
+	  Number of bytes transferred to/from the disk by the group. These
 	  are further divided by the type of operation - read or write, sync
 	  or async. First two fields specify the major and minor number of the
 	  device, third field specifies the operation type and the fourth field
 	  specifies the number of bytes.
 
-- blkio.io_serviced
-	- Number of IOs (bio) issued to the disk by the group. These
+  blkio.io_serviced
+	  Number of IOs (bio) issued to the disk by the group. These
 	  are further divided by the type of operation - read or write, sync
 	  or async. First two fields specify the major and minor number of the
 	  device, third field specifies the operation type and the fourth field
 	  specifies the number of IOs.
 
-- blkio.io_service_time
-	- Total amount of time between request dispatch and request completion
+  blkio.io_service_time
+	  Total amount of time between request dispatch and request completion
 	  for the IOs done by this cgroup. This is in nanoseconds to make it
 	  meaningful for flash devices too. For devices with queue depth of 1,
 	  this time represents the actual service time. When queue_depth > 1,
@@ -170,8 +174,8 @@ Proportional weight policy files
 	  specifies the operation type and the fourth field specifies the
 	  io_service_time in ns.
 
-- blkio.io_wait_time
-	- Total amount of time the IOs for this cgroup spent waiting in the
+  blkio.io_wait_time
+	  Total amount of time the IOs for this cgroup spent waiting in the
 	  scheduler queues for service. This can be greater than the total time
 	  elapsed since it is cumulative io_wait_time for all IOs. It is not a
 	  measure of total time the cgroup spent waiting but rather a measure of
@@ -185,24 +189,24 @@ Proportional weight policy files
 	  minor number of the device, third field specifies the operation type
 	  and the fourth field specifies the io_wait_time in ns.
 
-- blkio.io_merged
-	- Total number of bios/requests merged into requests belonging to this
+  blkio.io_merged
+	  Total number of bios/requests merged into requests belonging to this
 	  cgroup. This is further divided by the type of operation - read or
 	  write, sync or async.
 
-- blkio.io_queued
-	- Total number of requests queued up at any given instant for this
+  blkio.io_queued
+	  Total number of requests queued up at any given instant for this
 	  cgroup. This is further divided by the type of operation - read or
 	  write, sync or async.
 
-- blkio.avg_queue_size
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+  blkio.avg_queue_size
+	  Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  The average queue size for this cgroup over the entire time of this
 	  cgroup's existence. Queue size samples are taken each time one of the
 	  queues of this cgroup gets a timeslice.
 
-- blkio.group_wait_time
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+  blkio.group_wait_time
+	  Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  This is the amount of time the cgroup had to wait since it became busy
 	  (i.e., went from 0 to 1 request queued) to get a timeslice for one of
 	  its queues. This is different from the io_wait_time which is the
@@ -212,8 +216,8 @@ Proportional weight policy files
 	  will only report the group_wait_time accumulated till the last time it
 	  got a timeslice and will not include the current delta.
 
-- blkio.empty_time
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+  blkio.empty_time
+	  Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  This is the amount of time a cgroup spends without any pending
 	  requests when not being served, i.e., it does not include any time
 	  spent idling for one of the queues of the cgroup. This is in
@@ -221,8 +225,8 @@ Proportional weight policy files
 	  the stat will only report the empty_time accumulated till the last
 	  time it had a pending request and will not include the current delta.
 
-- blkio.idle_time
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
+  blkio.idle_time
+	  Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
 	  This is the amount of time spent by the IO scheduler idling for a
 	  given cgroup in anticipation of a better request than the existing ones
 	  from other queues/cgroups. This is in nanoseconds. If this is read
@@ -230,60 +234,60 @@ Proportional weight policy files
 	  idle_time accumulated till the last idle period and will not include
 	  the current delta.
 
-- blkio.dequeue
-	- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
+  blkio.dequeue
+	  Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
 	  gives the statistics about how many a times a group was dequeued
 	  from service tree of the device. First two fields specify the major
 	  and minor number of the device and third field specifies the number
 	  of times a group was dequeued from a particular device.
 
-- blkio.*_recursive
-	- Recursive version of various stats. These files show the
+  blkio.*_recursive
+	  Recursive version of various stats. These files show the
           same information as their non-recursive counterparts but
           include stats from all the descendant cgroups.
 
 Throttling/Upper limit policy files
 -----------------------------------
-- blkio.throttle.read_bps_device
-	- Specifies upper limit on READ rate from the device. IO rate is
+  blkio.throttle.read_bps_device
+	  Specifies upper limit on READ rate from the device. IO rate is
 	  specified in bytes per second. Rules are per device. Following is
 	  the format::
 
 	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
 
-- blkio.throttle.write_bps_device
-	- Specifies upper limit on WRITE rate to the device. IO rate is
+  blkio.throttle.write_bps_device
+	  Specifies upper limit on WRITE rate to the device. IO rate is
 	  specified in bytes per second. Rules are per device. Following is
 	  the format::
 
 	    echo "<major>:<minor>  <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
 
-- blkio.throttle.read_iops_device
-	- Specifies upper limit on READ rate from the device. IO rate is
+  blkio.throttle.read_iops_device
+	  Specifies upper limit on READ rate from the device. IO rate is
 	  specified in IO per second. Rules are per device. Following is
 	  the format::
 
 	   echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
 
-- blkio.throttle.write_iops_device
-	- Specifies upper limit on WRITE rate to the device. IO rate is
+  blkio.throttle.write_iops_device
+	  Specifies upper limit on WRITE rate to the device. IO rate is
 	  specified in io per second. Rules are per device. Following is
 	  the format::
 
 	    echo "<major>:<minor>  <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
 
-Note: If both BW and IOPS rules are specified for a device, then IO is
-      subjected to both the constraints.
+          Note: If both BW and IOPS rules are specified for a device, then IO is
+          subjected to both the constraints.
 
-- blkio.throttle.io_serviced
-	- Number of IOs (bio) issued to the disk by the group. These
+  blkio.throttle.io_serviced
+	  Number of IOs (bio) issued to the disk by the group. These
 	  are further divided by the type of operation - read or write, sync
 	  or async. First two fields specify the major and minor number of the
 	  device, third field specifies the operation type and the fourth field
 	  specifies the number of IOs.
 
-- blkio.throttle.io_service_bytes
-	- Number of bytes transferred to/from the disk by the group. These
+  blkio.throttle.io_service_bytes
+	  Number of bytes transferred to/from the disk by the group. These
 	  are further divided by the type of operation - read or write, sync
 	  or async. First two fields specify the major and minor number of the
 	  device, third field specifies the operation type and the fourth field
@@ -291,6 +295,6 @@ Note: If both BW and IOPS rules are specified for a device, then IO is
 
 Common files among various policies
 -----------------------------------
-- blkio.reset_stats
-	- Writing an int to this file will result in resetting all the stats
+  blkio.reset_stats
+	  Writing an int to this file will result in resetting all the stats
 	  for that cgroup.

From 828615950b5876e75587fdd5e9d1185be9cabba7 Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Thu, 10 Jun 2021 20:07:37 -0700
Subject: [PATCH 1969/3292] docs/cgroup-v1/blkio: update for 5.x kernels

Commit bf382fb0bcef4 ("block: remove legacy IO schedulers", Oct 12 2018)
removes the CFQ scheduler, together with blkio.weight and
blkio.weight_device described in cgroup v1 documentation. Users are
supposed to use the BFQ scheduler, which cgroup file for setting weight
is blkio.bfq.weight, but there is no way to set per-device weight.

Later, commit 795fe54c2a8 per-device weights for BFQ, meaning that
blkio.bfq.weight and blkio.bfq.weight_device can be used in a way
similar to the old CFQ cgroup interface.

Yet, the cgroup v1 docs were never updated. Fix this:
 - use the new file names;
 - fix the range for weight (used to be 10..1000, now 1..1000);
 - link to BFQ scheduler docs.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 .../cgroup-v1/blkio-controller.rst            | 31 ++++++++++---------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v1/blkio-controller.rst b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
index 8101dcba381d..16253eda192e 100644
--- a/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
+++ b/Documentation/admin-guide/cgroup-v1/blkio-controller.rst
@@ -97,40 +97,41 @@ Details of cgroup files
 Proportional weight policy files
 --------------------------------
 
-  blkio.weight
+  blkio.bfq.weight
 	  Specifies per cgroup weight. This is default weight of the group
-	  on all the devices until and unless overridden by per device rule.
-	  (See blkio.weight_device).
-	  Currently allowed range of weights is from 10 to 1000.
+	  on all the devices until and unless overridden by per device rule
+	  (see `blkio.bfq.weight_device` below).
 
-  blkio.weight_device
-	  One can specify per cgroup per device rules using this interface.
-	  These rules override the default value of group weight as specified
-	  by blkio.weight.
+	  Currently allowed range of weights is from 1 to 1000. For more details,
+          see Documentation/block/bfq-iosched.rst.
+
+  blkio.bfq.weight_device
+          Specifes per cgroup per device weights, overriding the default group
+          weight. For more details, see Documentation/block/bfq-iosched.rst.
 
 	  Following is the format::
 
-	    # echo dev_maj:dev_minor weight > blkio.weight_device
+	    # echo dev_maj:dev_minor weight > blkio.bfq.weight_device
 
 	  Configure weight=300 on /dev/sdb (8:16) in this cgroup::
 
-	    # echo 8:16 300 > blkio.weight_device
-	    # cat blkio.weight_device
+	    # echo 8:16 300 > blkio.bfq.weight_device
+	    # cat blkio.bfq.weight_device
 	    dev     weight
 	    8:16    300
 
 	  Configure weight=500 on /dev/sda (8:0) in this cgroup::
 
-	    # echo 8:0 500 > blkio.weight_device
-	    # cat blkio.weight_device
+	    # echo 8:0 500 > blkio.bfq.weight_device
+	    # cat blkio.bfq.weight_device
 	    dev     weight
 	    8:0     500
 	    8:16    300
 
 	  Remove specific weight for /dev/sda in this cgroup::
 
-	    # echo 8:0 0 > blkio.weight_device
-	    # cat blkio.weight_device
+	    # echo 8:0 0 > blkio.bfq.weight_device
+	    # cat blkio.bfq.weight_device
 	    dev     weight
 	    8:16    300
 

From db9b6d87a8d4552c691c9f008a233985f41a9e4d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 11 Jun 2021 18:58:48 +0200
Subject: [PATCH 1970/3292] ACPI: power: Use dev_dbg() to print some messages

The messages printed by acpi_resume_power_resources() and
acpi_turn_off_unused_power_resources() are not important enough to be
printed with pr_info(), so use dev_dbg() instead of it to get rid of
some noise in the kernel log.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
---
 drivers/acpi/power.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index f145f645ab06..eba7785047ca 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -1004,7 +1004,7 @@ void acpi_resume_power_resources(void)
 
 		if (state == ACPI_POWER_RESOURCE_STATE_OFF
 		    && resource->ref_count) {
-			dev_info(&resource->device.dev, "Turning ON\n");
+			dev_dbg(&resource->device.dev, "Turning ON\n");
 			__acpi_power_on(resource);
 		}
 
@@ -1034,7 +1034,7 @@ void acpi_turn_off_unused_power_resources(void)
 		 */
 		if (!resource->ref_count &&
 		    resource->state != ACPI_POWER_RESOURCE_STATE_OFF) {
-			dev_info(&resource->device.dev, "Turning OFF\n");
+			dev_dbg(&resource->device.dev, "Turning OFF\n");
 			__acpi_power_off(resource);
 		}
 

From 8744365e258459775bd9b49b705a82d66a21c2b4 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 28 May 2021 10:48:49 +0200
Subject: [PATCH 1971/3292] MAINTAINERS: netfilter: add irc channel

The community #netfilter IRC channel is now live on the libera.chat network
(https://libera.chat/).

CC: Arturo Borrero Gonzalez <arturo@netfilter.org>
Link: https://marc.info/?l=netfilter&m=162210948632717
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bfb3d0931cba..f3d44262d16e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12657,6 +12657,7 @@ W:	http://www.netfilter.org/
 W:	http://www.iptables.org/
 W:	http://www.nftables.org/
 Q:	http://patchwork.ozlabs.org/project/netfilter-devel/list/
+C:	irc://irc.libera.chat/netfilter
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
 F:	include/linux/netfilter*

From cdd73cc545c0fb9b1a1f7b209f4f536e7990cff4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 10 Jun 2021 20:20:30 +0200
Subject: [PATCH 1972/3292] netfilter: nft_exthdr: check for IPv6 packet before
 further processing

ipv6_find_hdr() does not validate that this is an IPv6 packet. Add a
sanity check for calling ipv6_find_hdr() to make sure an IPv6 packet
is passed for parsing.

Fixes: 96518518cc41 ("netfilter: add nftables")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_exthdr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index f64f0017e9a5..670dd146fb2b 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -42,6 +42,9 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
 	unsigned int offset = 0;
 	int err;
 
+	if (pkt->skb->protocol != htons(ETH_P_IPV6))
+		goto err;
+
 	err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
 	if (priv->flags & NFT_EXTHDR_F_PRESENT) {
 		nft_reg_store8(dest, err >= 0);

From 8f518d43f89ae00b9cf5460e10b91694944ca1a8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 10 Jun 2021 20:20:31 +0200
Subject: [PATCH 1973/3292] netfilter: nft_osf: check for TCP packet before
 further processing

The osf expression only supports for TCP packets, add a upfront sanity
check to skip packet parsing if this is not a TCP packet.

Fixes: b96af92d6eaf ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_osf.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index ac61f708b82d..d82677e83400 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -28,6 +28,11 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	struct nf_osf_data data;
 	struct tcphdr _tcph;
 
+	if (pkt->tprot != IPPROTO_TCP) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
 	tcp = skb_header_pointer(skb, ip_hdrlen(skb),
 				 sizeof(struct tcphdr), &_tcph);
 	if (!tcp) {

From 52f0f4e178c757b3d356087376aad8bd77271828 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 11 Jun 2021 19:26:56 +0200
Subject: [PATCH 1974/3292] netfilter: nft_tproxy: restrict support to TCP and
 UDP transport protocols

Add unfront check for TCP and UDP packets before performing further
processing.

Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_tproxy.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index accef672088c..5cb4d575d47f 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -30,6 +30,12 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
 	__be16 tport = 0;
 	struct sock *sk;
 
+	if (pkt->tprot != IPPROTO_TCP &&
+	    pkt->tprot != IPPROTO_UDP) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
 	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
 	if (!hp) {
 		regs->verdict.code = NFT_BREAK;
@@ -91,7 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
 
 	memset(&taddr, 0, sizeof(taddr));
 
-	if (!pkt->tprot_set) {
+	if (pkt->tprot != IPPROTO_TCP &&
+	    pkt->tprot != IPPROTO_UDP) {
 		regs->verdict.code = NFT_BREAK;
 		return;
 	}

From 61273f9d83148a38cf9b78d43016c29fc80f48e5 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 15 Jun 2021 17:21:55 +0000
Subject: [PATCH 1975/3292] net: stmmac: Fix error return code in
 ingenic_mac_probe()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 2bb4b98b60d7 ("net: stmmac: Add Ingenic SoCs MAC support.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
index 60984c1a154d..9a6d819b84ae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
@@ -263,6 +263,7 @@ static int ingenic_mac_probe(struct platform_device *pdev)
 	mac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "mode-reg");
 	if (IS_ERR(mac->regmap)) {
 		dev_err(&pdev->dev, "%s: Failed to get syscon regmap\n", __func__);
+		ret = PTR_ERR(mac->regmap);
 		goto err_remove_config_dt;
 	}
 

From c2ae34a7deaff463ecafb7db627b77faaca8e159 Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Tue, 15 Jun 2021 12:50:37 -0500
Subject: [PATCH 1976/3292] net: hsr: don't check sequence number if tag
 removal is offloaded

Don't check the sequence number when deciding when to update time_in in
the node table if tag removal is offloaded since the sequence number is
part of the tag. This fixes a problem where the times in the node table
wouldn't update when 0 appeared to be before or equal to seq_out when
tag removal was offloaded.

Signed-off-by: George McCollister <george.mccollister@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_framereg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index bb1351c38397..e31949479305 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -397,7 +397,8 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
 	 * ensures entries of restarted nodes gets pruned so that they can
 	 * re-register and resume communications.
 	 */
-	if (seq_nr_before(sequence_nr, node->seq_out[port->type]))
+	if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) &&
+	    seq_nr_before(sequence_nr, node->seq_out[port->type]))
 		return;
 
 	node->time_in[port->type] = jiffies;

From d917c35a451e4ebba5c12a51c92cbddce958c91e Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 15 Jun 2021 18:27:07 -0500
Subject: [PATCH 1977/3292] net: qualcomm: rmnet: Allow partial updates of
 IFLA_FLAGS

The idiomatic way to handle the changelink flags/mask pair seems to be
allow partial updates of the driver's link flags. In contrast the rmnet
driver masks the incoming flags and then use that as the new flags.

Change the rmnet driver to follow the common scheme, before the
introduction of IFLA_RMNET_FLAGS handling in iproute2 et al.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Alex Elder <elder@linaro.org>
Reviewed-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 8d51b0cb545c..27b1663c476e 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -163,7 +163,8 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 		struct ifla_rmnet_flags *flags;
 
 		flags = nla_data(data[IFLA_RMNET_FLAGS]);
-		data_format = flags->flags & flags->mask;
+		data_format &= ~flags->mask;
+		data_format |= flags->flags & flags->mask;
 	}
 
 	netdev_dbg(dev, "data format [0x%08X]\n", data_format);
@@ -336,7 +337,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 		old_data_format = port->data_format;
 		flags = nla_data(data[IFLA_RMNET_FLAGS]);
-		port->data_format = flags->flags & flags->mask;
+		port->data_format &= ~flags->mask;
+		port->data_format |= flags->flags & flags->mask;
 
 		if (rmnet_vnd_update_dev_mtu(port, real_dev)) {
 			port->data_format = old_data_format;

From a4fc566543c0dede64b85ca907f34a5d19636292 Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Tue, 15 Jun 2021 20:39:03 -0500
Subject: [PATCH 1978/3292] net: dsa: xrs700x: forward HSR supervision frames

Forward supervision frames between redunant HSR ports. This was broken
in the last commit.

Fixes: 1a42624aecba ("net: dsa: xrs700x: allow HSR/PRP supervision dupes for node_table")
Signed-off-by: George McCollister <george.mccollister@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/xrs700x/xrs700x.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c
index a79066174a77..130abb0f1438 100644
--- a/drivers/net/dsa/xrs700x/xrs700x.c
+++ b/drivers/net/dsa/xrs700x/xrs700x.c
@@ -337,7 +337,8 @@ static int xrs700x_port_add_bpdu_ipf(struct dsa_switch *ds, int port)
  * This is required to correctly populate the HSR/PRP node_table.
  * Leave the policy disabled, it will be enabled as needed.
  */
-static int xrs700x_port_add_hsrsup_ipf(struct dsa_switch *ds, int port)
+static int xrs700x_port_add_hsrsup_ipf(struct dsa_switch *ds, int port,
+				       int fwdport)
 {
 	struct xrs700x *priv = ds->priv;
 	unsigned int val = 0;
@@ -368,6 +369,9 @@ static int xrs700x_port_add_hsrsup_ipf(struct dsa_switch *ds, int port)
 	if (ret)
 		return ret;
 
+	if (fwdport >= 0)
+		val |= BIT(fwdport);
+
 	/* Allow must be set prevent duplicate discard */
 	ret = regmap_write(priv->regmap, XRS_ETH_ADDR_FWD_ALLOW(port, 1), val);
 	if (ret)
@@ -405,10 +409,6 @@ static int xrs700x_port_setup(struct dsa_switch *ds, int port)
 		ret = xrs700x_port_add_bpdu_ipf(ds, port);
 		if (ret)
 			return ret;
-
-		ret = xrs700x_port_add_hsrsup_ipf(ds, port);
-		if (ret)
-			return ret;
 	}
 
 	return 0;
@@ -562,6 +562,7 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port,
 	struct net_device *slave;
 	int ret, i, hsr_pair[2];
 	enum hsr_version ver;
+	bool fwd = false;
 
 	ret = hsr_get_version(hsr, &ver);
 	if (ret)
@@ -607,6 +608,7 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port,
 	if (ver == HSR_V1) {
 		val &= ~BIT(partner->index);
 		val &= ~BIT(port);
+		fwd = true;
 	}
 	val &= ~BIT(dsa_upstream_port(ds, port));
 	regmap_write(priv->regmap, XRS_PORT_FWD_MASK(partner->index), val);
@@ -616,10 +618,19 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port,
 			    XRS_PORT_FORWARDING);
 	regmap_fields_write(priv->ps_forward, port, XRS_PORT_FORWARDING);
 
-	/* Enable inbound policy added by xrs700x_port_add_hsrsup_ipf()
-	 * which allows HSR/PRP supervision forwarding to the CPU port without
-	 * discarding duplicates.
+	/* Enable inbound policy which allows HSR/PRP supervision forwarding
+	 * to the CPU port without discarding duplicates. Continue to
+	 * forward to redundant ports when in HSR mode while discarding
+	 * duplicates.
 	 */
+	ret = xrs700x_port_add_hsrsup_ipf(ds, partner->index, fwd ? port : -1);
+	if (ret)
+		return ret;
+
+	ret = xrs700x_port_add_hsrsup_ipf(ds, port, fwd ? partner->index : -1);
+	if (ret)
+		return ret;
+
 	regmap_update_bits(priv->regmap,
 			   XRS_ETH_ADDR_CFG(partner->index, 1), 1, 1);
 	regmap_update_bits(priv->regmap, XRS_ETH_ADDR_CFG(port, 1), 1, 1);

From f7246bdb3d7d85941efdc828802e9a48012f24f5 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Wed, 16 Jun 2021 15:25:40 +0800
Subject: [PATCH 1979/3292] net: iosm: remove the repeated declaration and
 comment

Function 'ipc_mmio_get_cp_version' is declared twice, so remove the
repeated declaration and wrong comments.

Cc: M Chetan Kumar <m.chetan.kumar@intel.com>
Cc: Intel Corporation <linuxwwan@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Reviewed-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_mmio.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h
index bcf77aea06e7..45e6923da78f 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h
@@ -120,16 +120,6 @@ void ipc_mmio_set_psi_addr_and_size(struct iosm_mmio *ipc_mmio, dma_addr_t addr,
 void ipc_mmio_set_contex_info_addr(struct iosm_mmio *ipc_mmio,
 				   phys_addr_t addr);
 
-/**
- * ipc_mmio_get_cp_version - Write context info and AP memory range addresses.
- *			     This needs to be called when CP is in
- *			     IPC_MEM_DEVICE_IPC_INIT state
- * @ipc_mmio:	Pointer to mmio instance
- *
- * Returns: cp version else failure value on error
- */
-int ipc_mmio_get_cp_version(struct iosm_mmio *ipc_mmio);
-
 /**
  * ipc_mmio_get_cp_version - Get the CP IPC version
  * @ipc_mmio:	Pointer to mmio instance

From 56a967c4f7e5fed2e66d90906ff5956abf69364a Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 16 Jun 2021 01:59:13 -0600
Subject: [PATCH 1980/3292] net: qualcomm: rmnet: Remove some unneeded casts

Remove the explicit casts in the checksum complement functions
and pass the actual protocol specific headers instead.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 39fba3a347fa..3ee5c1a8b46e 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -163,13 +163,12 @@ rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
 }
 #endif
 
-static void rmnet_map_complement_ipv4_txporthdr_csum_field(void *iphdr)
+static void rmnet_map_complement_ipv4_txporthdr_csum_field(struct iphdr *ip4h)
 {
-	struct iphdr *ip4h = (struct iphdr *)iphdr;
 	void *txphdr;
 	u16 *csum;
 
-	txphdr = iphdr + ip4h->ihl * 4;
+	txphdr = ip4h + ip4h->ihl * 4;
 
 	if (ip4h->protocol == IPPROTO_TCP || ip4h->protocol == IPPROTO_UDP) {
 		csum = (u16 *)rmnet_map_get_csum_field(ip4h->protocol, txphdr);
@@ -198,13 +197,13 @@ rmnet_map_ipv4_ul_csum_header(struct iphdr *iphdr,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void rmnet_map_complement_ipv6_txporthdr_csum_field(void *ip6hdr)
+static void
+rmnet_map_complement_ipv6_txporthdr_csum_field(struct ipv6hdr *ip6h)
 {
-	struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr;
 	void *txphdr;
 	u16 *csum;
 
-	txphdr = ip6hdr + sizeof(struct ipv6hdr);
+	txphdr = ip6h + sizeof(struct ipv6hdr);
 
 	if (ip6h->nexthdr == IPPROTO_TCP || ip6h->nexthdr == IPPROTO_UDP) {
 		csum = (u16 *)rmnet_map_get_csum_field(ip6h->nexthdr, txphdr);

From 775f25479df924611fc482a602d147a43ac93702 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Jun 2021 18:01:19 +0800
Subject: [PATCH 1981/3292] net: phy: change format of some declarations

Add a blank line after declarations, change the order of them and put the
assignments and declarations together.

Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm87xx.c  | 4 ++--
 drivers/net/phy/dp83640.c  | 1 +
 drivers/net/phy/et1011c.c  | 7 ++++---
 drivers/net/phy/mdio_bus.c | 1 +
 drivers/net/phy/qsemi.c    | 1 +
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c
index 4ac8fd190e9d..313563482690 100644
--- a/drivers/net/phy/bcm87xx.c
+++ b/drivers/net/phy/bcm87xx.c
@@ -54,9 +54,9 @@ static int bcm87xx_of_reg_init(struct phy_device *phydev)
 		u16 reg		= be32_to_cpup(paddr++);
 		u16 mask	= be32_to_cpup(paddr++);
 		u16 val_bits	= be32_to_cpup(paddr++);
-		int val;
 		u32 regnum = mdiobus_c45_addr(devid, reg);
-		val = 0;
+		int val = 0;
+
 		if (mask) {
 			val = phy_read(phydev, regnum);
 			if (val < 0) {
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 0d79f68f301c..10769bfb1298 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -615,6 +615,7 @@ static void prune_rx_ts(struct dp83640_private *dp83640)
 static void enable_broadcast(struct phy_device *phydev, int init_page, int on)
 {
 	int val;
+
 	phy_write(phydev, PAGESEL, 0);
 	val = phy_read(phydev, PHYCR2);
 	if (on)
diff --git a/drivers/net/phy/et1011c.c b/drivers/net/phy/et1011c.c
index 09e07b902d3a..07bb484ba402 100644
--- a/drivers/net/phy/et1011c.c
+++ b/drivers/net/phy/et1011c.c
@@ -46,8 +46,8 @@ MODULE_LICENSE("GPL");
 
 static int et1011c_config_aneg(struct phy_device *phydev)
 {
-	int ctl = 0;
-	ctl = phy_read(phydev, MII_BMCR);
+	int ctl = phy_read(phydev, MII_BMCR);
+
 	if (ctl < 0)
 		return ctl;
 	ctl &= ~(BMCR_FULLDPLX | BMCR_SPEED100 | BMCR_SPEED1000 |
@@ -60,9 +60,10 @@ static int et1011c_config_aneg(struct phy_device *phydev)
 
 static int et1011c_read_status(struct phy_device *phydev)
 {
+	static int speed;
 	int ret;
 	u32 val;
-	static int speed;
+
 	ret = genphy_read_status(phydev);
 
 	if (speed != phydev->speed) {
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6045ad3def12..24665670a89a 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -175,6 +175,7 @@ EXPORT_SYMBOL(mdiobus_alloc_size);
 static void mdiobus_release(struct device *d)
 {
 	struct mii_bus *bus = to_mii_bus(d);
+
 	BUG_ON(bus->state != MDIOBUS_RELEASED &&
 	       /* for compatibility with error handling in drivers */
 	       bus->state != MDIOBUS_ALLOCATED);
diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c
index d5c1aaa8236a..30d15f7c9b03 100644
--- a/drivers/net/phy/qsemi.c
+++ b/drivers/net/phy/qsemi.c
@@ -100,6 +100,7 @@ static int qs6612_ack_interrupt(struct phy_device *phydev)
 static int qs6612_config_intr(struct phy_device *phydev)
 {
 	int err;
+
 	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
 		/* clear any interrupts before enabling them */
 		err = qs6612_ack_interrupt(phydev);

From 1953feb022154e19c5953988fd3dd65ebc769dc9 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Jun 2021 18:01:20 +0800
Subject: [PATCH 1982/3292] net: phy: correct format of block comments

Block comments should not use a trailing */ on a separate line and every
line of a block comment should start with an '*'.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/lxt.c      | 4 ++--
 drivers/net/phy/national.c | 6 ++++--
 drivers/net/phy/phy-core.c | 3 ++-
 drivers/net/phy/phylink.c  | 9 ++++++---
 drivers/net/phy/vitesse.c  | 3 ++-
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c
index bde3356a2f86..e3bf827b7959 100644
--- a/drivers/net/phy/lxt.c
+++ b/drivers/net/phy/lxt.c
@@ -242,8 +242,8 @@ static int lxt973a2_read_status(struct phy_device *phydev)
 				return lpa;
 
 			/* If both registers are equal, it is suspect but not
-			* impossible, hence a new try
-			*/
+			 * impossible, hence a new try
+			 */
 		} while (lpa == adv && retry--);
 
 		mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa);
diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c
index 46160baaafe3..9ae9cc6b23c2 100644
--- a/drivers/net/phy/national.c
+++ b/drivers/net/phy/national.c
@@ -68,7 +68,8 @@ static int ns_ack_interrupt(struct phy_device *phydev)
 		return ret;
 
 	/* Clear the interrupt status bit by writing a “1”
-	 * to the corresponding bit in INT_CLEAR (2:0 are reserved) */
+	 * to the corresponding bit in INT_CLEAR (2:0 are reserved)
+	 */
 	ret = phy_write(phydev, DP83865_INT_CLEAR, ret & ~0x7);
 
 	return ret;
@@ -150,7 +151,8 @@ static int ns_config_init(struct phy_device *phydev)
 {
 	ns_giga_speed_fallback(phydev, ALL_FALLBACK_ON);
 	/* In the latest MAC or switches design, the 10 Mbps loopback
-	   is desired to be turned off. */
+	 * is desired to be turned off.
+	 */
 	ns_10_base_t_hdx_loopack(phydev, hdx_loopback_off);
 	return ns_ack_interrupt(phydev);
 }
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 8d333d3084ed..2870c33b8975 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -76,7 +76,8 @@ EXPORT_SYMBOL_GPL(phy_duplex_to_str);
 
 /* A mapping of all SUPPORTED settings to speed/duplex.  This table
  * must be grouped by speed and sorted in descending match priority
- * - iow, descending speed. */
+ * - iow, descending speed.
+ */
 
 #define PHY_SETTING(s, d, b) { .speed = SPEED_ ## s, .duplex = DUPLEX_ ## d, \
 			       .bit = ETHTOOL_LINK_MODE_ ## b ## _BIT}
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 8ce8db487596..35f22a936857 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -182,7 +182,8 @@ static int phylink_parse_fixedlink(struct phylink *pl,
 			pl->link_config.duplex = DUPLEX_FULL;
 
 		/* We treat the "pause" and "asym-pause" terminology as
-		 * defining the link partner's ability. */
+		 * defining the link partner's ability.
+		 */
 		if (fwnode_property_read_bool(fixed_node, "pause"))
 			__set_bit(ETHTOOL_LINK_MODE_Pause_BIT,
 				  pl->link_config.lp_advertising);
@@ -685,7 +686,8 @@ static void phylink_resolve(struct work_struct *w)
 			phylink_mac_pcs_get_state(pl, &link_state);
 
 			/* If we have a phy, the "up" state is the union of
-			 * both the PHY and the MAC */
+			 * both the PHY and the MAC
+			 */
 			if (pl->phydev)
 				link_state.link &= pl->phy_state.link;
 
@@ -694,7 +696,8 @@ static void phylink_resolve(struct work_struct *w)
 				link_state.interface = pl->phy_state.interface;
 
 				/* If we have a PHY, we need to update with
-				 * the PHY flow control bits. */
+				 * the PHY flow control bits.
+				 */
 				link_state.pause = pl->phy_state.pause;
 				mac_config = true;
 			}
diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 16704e243162..897b979ec03c 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -249,7 +249,8 @@ static int vsc73xx_config_aneg(struct phy_device *phydev)
 
 /* This adds a skew for both TX and RX clocks, so the skew should only be
  * applied to "rgmii-id" interfaces. It may not work as expected
- * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. */
+ * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces.
+ */
 static int vsc8601_add_skew(struct phy_device *phydev)
 {
 	int ret;

From e1f82127d67f53a11443d5ce76c7fe52d89ee588 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Jun 2021 18:01:21 +0800
Subject: [PATCH 1983/3292] net: phy: delete repeated words of comments

There are some repeated words in some comments, they should be deleted.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 2 +-
 drivers/net/phy/sfp.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index f4816b7d31b3..c617dbcad6ea 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -172,7 +172,7 @@ EXPORT_SYMBOL_GPL(genphy_c45_an_config_aneg);
  * @phydev: target phy_device struct
  *
  * Disable auto-negotiation in the Clause 45 PHY. The link parameters
- * parameters are controlled through the PMA/PMD MMD registers.
+ * are controlled through the PMA/PMD MMD registers.
  *
  * Returns zero on success, negative errno code on failure.
  */
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 37f722c763d7..34e90216bd2c 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -2153,7 +2153,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
 
 	case SFP_S_INIT:
 		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) {
-			/* TX_FAULT is still asserted after t_init or
+			/* TX_FAULT is still asserted after t_init
 			 * or t_start_up, so assume there is a fault.
 			 */
 			sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,

From 3bdee6a8e92e9f3403d3e290f4a16189ecc713c5 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Jun 2021 18:01:22 +0800
Subject: [PATCH 1984/3292] net: phy: fix space alignment issues

There are some space related issues, including spaces at the start of the
line, before tabs, after open parenthesis and before close parenthesis.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/davicom.c    |  6 +++---
 drivers/net/phy/sfp-bus.c    | 28 ++++++++++++++--------------
 drivers/net/phy/spi_ks8995.c | 10 +++++-----
 drivers/net/phy/ste10Xp.c    |  6 +++---
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c
index a3b3842c67e5..4ac4bce1bf32 100644
--- a/drivers/net/phy/davicom.c
+++ b/drivers/net/phy/davicom.c
@@ -43,10 +43,10 @@
 #define MII_DM9161_INTR_DPLX_CHANGE	0x0010
 #define MII_DM9161_INTR_SPD_CHANGE	0x0008
 #define MII_DM9161_INTR_LINK_CHANGE	0x0004
-#define MII_DM9161_INTR_INIT 		0x0000
+#define MII_DM9161_INTR_INIT		0x0000
 #define MII_DM9161_INTR_STOP	\
-(MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK \
- | MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK)
+	(MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK |	\
+	 MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK)
 #define MII_DM9161_INTR_CHANGE	\
 	(MII_DM9161_INTR_DPLX_CHANGE | \
 	 MII_DM9161_INTR_SPD_CHANGE | \
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 1db9cea13690..7362f8c3271c 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -629,14 +629,14 @@ static void sfp_upstream_clear(struct sfp_bus *bus)
  * be put via sfp_bus_put() when done.
  *
  * Returns:
- * 	    - on success, a pointer to the sfp_bus structure,
- *	    - %NULL if no SFP is specified,
- * 	    - on failure, an error pointer value:
+ *	- on success, a pointer to the sfp_bus structure,
+ *	- %NULL if no SFP is specified,
+ *	- on failure, an error pointer value:
  *
- * 	      - corresponding to the errors detailed for
- * 	        fwnode_property_get_reference_args().
- * 	      - %-ENOMEM if we failed to allocate the bus.
- *	      - an error from the upstream's connect_phy() method.
+ *	- corresponding to the errors detailed for
+ *	  fwnode_property_get_reference_args().
+ *	- %-ENOMEM if we failed to allocate the bus.
+ *	- an error from the upstream's connect_phy() method.
  */
 struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode)
 {
@@ -671,14 +671,14 @@ EXPORT_SYMBOL_GPL(sfp_bus_find_fwnode);
  * bus, so it is safe to put the bus after this call.
  *
  * Returns:
- * 	    - on success, a pointer to the sfp_bus structure,
- *	    - %NULL if no SFP is specified,
- * 	    - on failure, an error pointer value:
+ *	- on success, a pointer to the sfp_bus structure,
+ *	- %NULL if no SFP is specified,
+ *	- on failure, an error pointer value:
  *
- * 	      - corresponding to the errors detailed for
- * 	        fwnode_property_get_reference_args().
- * 	      - %-ENOMEM if we failed to allocate the bus.
- *	      - an error from the upstream's connect_phy() method.
+ *	- corresponding to the errors detailed for
+ *	  fwnode_property_get_reference_args().
+ *	- %-ENOMEM if we failed to allocate the bus.
+ *	- an error from the upstream's connect_phy() method.
  */
 int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
 			 const struct sfp_upstream_ops *ops)
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index ca49c1ad3efc..8b5445a724ce 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -160,11 +160,11 @@ static const struct spi_device_id ks8995_id[] = {
 MODULE_DEVICE_TABLE(spi, ks8995_id);
 
 static const struct of_device_id ks8895_spi_of_match[] = {
-        { .compatible = "micrel,ks8995" },
-        { .compatible = "micrel,ksz8864" },
-        { .compatible = "micrel,ksz8795" },
-        { },
- };
+	{ .compatible = "micrel,ks8995" },
+	{ .compatible = "micrel,ksz8864" },
+	{ .compatible = "micrel,ksz8795" },
+	{ },
+};
 MODULE_DEVICE_TABLE(of, ks8895_spi_of_match);
 
 static inline u8 get_chip_id(u8 val)
diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c
index 431fe5e0ce31..309e4c3496c4 100644
--- a/drivers/net/phy/ste10Xp.c
+++ b/drivers/net/phy/ste10Xp.c
@@ -20,12 +20,12 @@
 #include <linux/mii.h>
 #include <linux/phy.h>
 
-#define MII_XCIIS   	0x11	/* Configuration Info IRQ & Status Reg */
-#define MII_XIE     	0x12	/* Interrupt Enable Register */
+#define MII_XCIIS	0x11	/* Configuration Info IRQ & Status Reg */
+#define MII_XIE		0x12	/* Interrupt Enable Register */
 #define MII_XIE_DEFAULT_MASK 0x0070 /* ANE complete, Remote Fault, Link Down */
 
 #define STE101P_PHY_ID		0x00061c50
-#define STE100P_PHY_ID       	0x1c040011
+#define STE100P_PHY_ID		0x1c040011
 
 static int ste10Xp_config_init(struct phy_device *phydev)
 {

From 169d7a402dfae45e916e9c847f089482d65ddc4f Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Jun 2021 18:01:23 +0800
Subject: [PATCH 1985/3292] net: phy: fix formatting issues with braces

Fix following format issues:
1. open brace '{' following function definitions should go to the next
   line.
2. braces {} are not necessary for single line statements.
3. else should follow close brace '}'.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed_phy.c  | 4 ++--
 drivers/net/phy/phy.c        | 3 +--
 drivers/net/phy/phy_device.c | 9 ++++-----
 drivers/net/phy/phylink.c    | 5 ++---
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 18d81f43f2a8..c65fb5f5d2dc 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -161,8 +161,8 @@ static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr,
 }
 
 int fixed_phy_add(unsigned int irq, int phy_addr,
-		  struct fixed_phy_status *status) {
-
+		  struct fixed_phy_status *status)
+{
 	return fixed_phy_add_gpiod(irq, phy_addr, status, NULL);
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 1089a93d12f6..8eeb26d8aeb7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -380,8 +380,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 					else if (val & BMCR_SPEED100)
 						phydev->speed = SPEED_100;
 					else phydev->speed = SPEED_10;
-				}
-				else {
+				} else {
 					if (phydev->autoneg == AUTONEG_DISABLE)
 						change_autoneg = true;
 					phydev->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 85734309b580..5d5f9a9ee768 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3021,15 +3021,14 @@ static int phy_probe(struct device *dev)
 	 * a controller will attach, and may modify one
 	 * or both of these values
 	 */
-	if (phydrv->features) {
+	if (phydrv->features)
 		linkmode_copy(phydev->supported, phydrv->features);
-	} else if (phydrv->get_features) {
+	else if (phydrv->get_features)
 		err = phydrv->get_features(phydev);
-	} else if (phydev->is_c45) {
+	else if (phydev->is_c45)
 		err = genphy_c45_pma_read_abilities(phydev);
-	} else {
+	else
 		err = genphy_read_abilities(phydev);
-	}
 
 	if (err)
 		goto out;
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 35f22a936857..eb29ef53d971 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1383,11 +1383,10 @@ int phylink_ethtool_ksettings_get(struct phylink *pl,
 
 	ASSERT_RTNL();
 
-	if (pl->phydev) {
+	if (pl->phydev)
 		phy_ethtool_ksettings_get(pl->phydev, kset);
-	} else {
+	else
 		kset->base.port = pl->link_port;
-	}
 
 	linkmode_copy(kset->link_modes.supported, pl->supported);
 

From 450bf1f0c60e818d3da927f8a2d272559ef1915b Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Jun 2021 18:01:24 +0800
Subject: [PATCH 1986/3292] net: phy: print the function name by __func__
 instead of an fixed string

It's better to use __func__ than a fixed string to print a function's
name.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index 0837319a52d7..c94cb5382dc9 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -77,7 +77,7 @@ int mdio_device_register(struct mdio_device *mdiodev)
 {
 	int err;
 
-	dev_dbg(&mdiodev->dev, "mdio_device_register\n");
+	dev_dbg(&mdiodev->dev, "%s\n", __func__);
 
 	err = mdiobus_register_device(mdiodev);
 	if (err)
@@ -188,7 +188,7 @@ int mdio_driver_register(struct mdio_driver *drv)
 	struct mdio_driver_common *mdiodrv = &drv->mdiodrv;
 	int retval;
 
-	pr_debug("mdio_driver_register: %s\n", mdiodrv->driver.name);
+	pr_debug("%s: %s\n", __func__, mdiodrv->driver.name);
 
 	mdiodrv->driver.bus = &mdio_bus_type;
 	mdiodrv->driver.probe = mdio_probe;

From 33ab463220e59a74e803e4fc1c589c28b241b0ab Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Wed, 16 Jun 2021 18:01:25 +0800
Subject: [PATCH 1987/3292] net: phy: remove unnecessary line continuation

Avoid unnecessary line continuations, and put '|' at the end of line.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83640.c | 4 ++--
 drivers/net/phy/et1011c.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 10769bfb1298..705c16675b80 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -170,9 +170,9 @@ static ushort gpio_tab[GPIO_TABLE_SIZE] = {
 module_param(chosen_phy, int, 0444);
 module_param_array(gpio_tab, ushort, NULL, 0444);
 
-MODULE_PARM_DESC(chosen_phy, \
+MODULE_PARM_DESC(chosen_phy,
 	"The address of the PHY to use for the ancillary clock features");
-MODULE_PARM_DESC(gpio_tab, \
+MODULE_PARM_DESC(gpio_tab,
 	"Which GPIO line to use for which purpose: cal,perout,extts1,...,extts6");
 
 static void dp83640_gpio_defaults(struct ptp_pin_desc *pd)
diff --git a/drivers/net/phy/et1011c.c b/drivers/net/phy/et1011c.c
index 07bb484ba402..be1b71d7cab7 100644
--- a/drivers/net/phy/et1011c.c
+++ b/drivers/net/phy/et1011c.c
@@ -73,10 +73,10 @@ static int et1011c_read_status(struct phy_device *phydev)
 					ET1011C_GIGABIT_SPEED) {
 			val = phy_read(phydev, ET1011C_CONFIG_REG);
 			val &= ~ET1011C_TX_FIFO_MASK;
-			phy_write(phydev, ET1011C_CONFIG_REG, val\
-					| ET1011C_GMII_INTERFACE\
-					| ET1011C_SYS_CLK_EN\
-					| ET1011C_TX_FIFO_DEPTH_16);
+			phy_write(phydev, ET1011C_CONFIG_REG, val |
+					  ET1011C_GMII_INTERFACE |
+					  ET1011C_SYS_CLK_EN |
+					  ET1011C_TX_FIFO_DEPTH_16);
 
 		}
 	}

From 16d4d650966d9a607b32ceb709248f7833d88ed0 Mon Sep 17 00:00:00 2001
From: Weihang Li <liweihang@huawei.com>
Date: Wed, 16 Jun 2021 18:01:26 +0800
Subject: [PATCH 1988/3292] net: phy: replace if-else statements with switch

Switch statement is clearer than a group of 'if-else'.

Signed-off-by: Weihang Li <liweihang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 23751d95855b..3de93c9f2744 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -809,14 +809,19 @@ static int m88e1111_config_init_rgmii_delays(struct phy_device *phydev)
 {
 	int delay;
 
-	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) {
+	switch (phydev->interface) {
+	case PHY_INTERFACE_MODE_RGMII_ID:
 		delay = MII_M1111_RGMII_RX_DELAY | MII_M1111_RGMII_TX_DELAY;
-	} else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+		break;
+	case PHY_INTERFACE_MODE_RGMII_RXID:
 		delay = MII_M1111_RGMII_RX_DELAY;
-	} else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+		break;
+	case PHY_INTERFACE_MODE_RGMII_TXID:
 		delay = MII_M1111_RGMII_TX_DELAY;
-	} else {
+		break;
+	default:
 		delay = 0;
+		break;
 	}
 
 	return phy_modify(phydev, MII_M1111_PHY_EXT_CR,

From d33d24a7b45039f92d0da492a967f8ee803e5872 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:01:59 +0200
Subject: [PATCH 1989/3292] nfp: flower-ct: add delete flow handling for ct

Add functions to handle delete flow callbacks for ct flows. Also
accept the flows for offloading by returning 0 instead of -EOPNOTSUPP.
Flows will still not actually be offloaded to hw, but at this point
it's difficult to not accept the flows and also exercise the cleanup
paths properly. Traffic will still be handled safely through the
fallback path.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 38 +++++++++++++++++--
 .../ethernet/netronome/nfp/flower/conntrack.h |  6 +++
 .../ethernet/netronome/nfp/flower/offload.c   |  9 +++++
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index b1709affb52d..ea70e02d170e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -424,8 +424,7 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 	if (priv->ct_zone_wc)
 		nfp_ct_merge_tc_entries(ct_entry, priv->ct_zone_wc, zt);
 
-	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack action not supported");
-	return -EOPNOTSUPP;
+	return 0;
 }
 
 int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
@@ -487,6 +486,37 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 		nfp_ct_merge_tc_entries(ct_entry, zt, zt);
 	}
 
-	NL_SET_ERR_MSG_MOD(extack, "unsupported offload: Conntrack match not supported");
-	return -EOPNOTSUPP;
+	return 0;
+}
+
+int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
+{
+	struct nfp_fl_ct_flow_entry *ct_entry;
+	struct nfp_fl_ct_zone_entry *zt;
+	struct rhashtable *m_table;
+
+	zt = ct_map_ent->ct_entry->zt;
+	ct_entry = ct_map_ent->ct_entry;
+	m_table = &zt->priv->ct_map_table;
+
+	switch (ct_entry->type) {
+	case CT_TYPE_PRE_CT:
+		zt->pre_ct_count--;
+		rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
+				       nfp_ct_map_params);
+		nfp_fl_ct_clean_flow_entry(ct_entry);
+		kfree(ct_map_ent);
+		break;
+	case CT_TYPE_POST_CT:
+		zt->post_ct_count--;
+		rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
+				       nfp_ct_map_params);
+		nfp_fl_ct_clean_flow_entry(ct_entry);
+		kfree(ct_map_ent);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index 3d7d260c6e5c..dbb18fbbae69 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -152,4 +152,10 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
  * @entry:	Flow entry to cleanup
  */
 void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry);
+
+/**
+ * nfp_fl_ct_del_flow() - Handle flow_del callbacks for conntrack
+ * @ct_map_ent:	ct map entry for the flow that needs deleting
+ */
+int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent);
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 7e4ad5d58859..2406d33356ad 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1505,6 +1505,7 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 		       struct flow_cls_offload *flow)
 {
 	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_fl_ct_map_entry *ct_map_ent;
 	struct netlink_ext_ack *extack = NULL;
 	struct nfp_fl_payload *nfp_flow;
 	struct nfp_port *port = NULL;
@@ -1514,6 +1515,14 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 	if (nfp_netdev_is_nfp_repr(netdev))
 		port = nfp_port_from_netdev(netdev);
 
+	/* Check ct_map_table */
+	ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie,
+					    nfp_ct_map_params);
+	if (ct_map_ent) {
+		err = nfp_fl_ct_del_flow(ct_map_ent);
+		return err;
+	}
+
 	nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev);
 	if (!nfp_flow) {
 		NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot remove flow that does not exist");

From 62268e78145f633af07e832bfecc960c4b6dda90 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:02:00 +0200
Subject: [PATCH 1990/3292] nfp: flower-ct: add nft callback stubs

Add register/unregister of the nft callback. For now just add
stub code to accept the flows, but don't do anything with it.
Decided to accept the flows since netfilter will keep on trying
to offload a flow if it was rejected, which is quite noisy.
Follow-up patches will start implementing the functions to add
nft flows to the relevant tables.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 63 ++++++++++++++++++-
 .../ethernet/netronome/nfp/flower/conntrack.h | 11 ++++
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index ea70e02d170e..7fb51e13faea 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -384,6 +384,7 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 	struct flow_action_entry *ct_act, *ct_goto;
 	struct nfp_fl_ct_flow_entry *ct_entry;
 	struct nfp_fl_ct_zone_entry *zt;
+	int err;
 
 	ct_act = get_flow_act(flow, FLOW_ACTION_CT);
 	if (!ct_act) {
@@ -406,8 +407,15 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 		return PTR_ERR(zt);
 	}
 
-	if (!zt->nft)
+	if (!zt->nft) {
 		zt->nft = ct_act->ct.flow_table;
+		err = nf_flow_table_offload_add_cb(zt->nft, nfp_fl_ct_handle_nft_flow, zt);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "offload error: Could not register nft_callback");
+			return err;
+		}
+	}
 
 	/* Add entry to pre_ct_list */
 	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, extack);
@@ -489,6 +497,42 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 	return 0;
 }
 
+static int
+nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
+{
+	ASSERT_RTNL();
+
+	switch (flow->command) {
+	case FLOW_CLS_REPLACE:
+		return 0;
+	case FLOW_CLS_DESTROY:
+		return 0;
+	case FLOW_CLS_STATS:
+		return 0;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+	struct flow_cls_offload *flow = type_data;
+	struct nfp_fl_ct_zone_entry *zt = cb_priv;
+	int err = -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		rtnl_lock();
+		err = nfp_fl_ct_offload_nft_flow(zt, flow);
+		rtnl_unlock();
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return err;
+}
+
 int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
 {
 	struct nfp_fl_ct_flow_entry *ct_entry;
@@ -506,6 +550,23 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
 				       nfp_ct_map_params);
 		nfp_fl_ct_clean_flow_entry(ct_entry);
 		kfree(ct_map_ent);
+
+		/* If this is the last pre_ct_rule it means that it is
+		 * very likely that the nft table will be cleaned up next,
+		 * as this happens on the removal of the last act_ct flow.
+		 * However we cannot deregister the callback on the removal
+		 * of the last nft flow as this runs into a deadlock situation.
+		 * So deregister the callback on removal of the last pre_ct flow
+		 * and remove any remaining nft flow entries. We also cannot
+		 * save this state and delete the callback later since the
+		 * nft table would already have been freed at that time.
+		 */
+		if (!zt->pre_ct_count) {
+			nf_flow_table_offload_del_cb(zt->nft,
+						     nfp_fl_ct_handle_nft_flow,
+						     zt);
+			zt->nft = NULL;
+		}
 		break;
 	case CT_TYPE_POST_CT:
 		zt->post_ct_count--;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index dbb18fbbae69..b6e750dad929 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -4,6 +4,7 @@
 #ifndef __NFP_FLOWER_CONNTRACK_H__
 #define __NFP_FLOWER_CONNTRACK_H__ 1
 
+#include <net/netfilter/nf_flow_table.h>
 #include "main.h"
 
 #define NFP_FL_CT_NO_TUN	0xff
@@ -158,4 +159,14 @@ void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry);
  * @ct_map_ent:	ct map entry for the flow that needs deleting
  */
 int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent);
+
+/**
+ * nfp_fl_ct_handle_nft_flow() - Handle flower flow callbacks for nft table
+ * @type:	Type provided by callback
+ * @type_data:	Callback data
+ * @cb_priv:	Pointer to data provided when registering the callback, in this
+ *		case it's the zone table.
+ */
+int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data,
+			      void *cb_priv);
 #endif

From 95255017e0a84692faa33fdc0746433987b5aff0 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:02:01 +0200
Subject: [PATCH 1991/3292] nfp: flower-ct: add nft flows to nft list

Implement code to add and remove nft flows to the relevant list.
Registering and deregistering the callback function for the nft
table is quite complicated. The safest is to delete the callback
on the removal of the last pre_ct flow. This is because if this
is also the latest pre_ct flow in software it means that this
specific nft table will be freed, so there will not be a later
opportunity to do this. Another place where it looks possible
to delete the callback is when the last nft_flow is deleted,
but this happens under the flow_table lock, which is also taken
when deregistering the callback, leading to a deadlock situation.

This means the final solution here is to delete the callback
when removing the last pre_ct flow, and then clean up any
remaining nft_flow entries which may still be present, since
there will never be a callback now to do this, leaving them
orphaned if not cleaned up here as well.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 46 ++++++++++++++++++-
 .../ethernet/netronome/nfp/flower/conntrack.h |  6 +++
 .../ethernet/netronome/nfp/flower/metadata.c  | 26 +++++++++++
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 7fb51e13faea..1b527f0660a7 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -165,6 +165,7 @@ nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
 	/* init the various hash tables and lists*/
 	INIT_LIST_HEAD(&zt->pre_ct_list);
 	INIT_LIST_HEAD(&zt->post_ct_list);
+	INIT_LIST_HEAD(&zt->nft_flows_list);
 
 	err = rhashtable_init(&zt->tc_merge_tb, &nfp_tc_ct_merge_params);
 	if (err)
@@ -500,13 +501,31 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 static int
 nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
 {
+	struct nfp_fl_ct_map_entry *ct_map_ent;
+	struct nfp_fl_ct_flow_entry *ct_entry;
+	struct netlink_ext_ack *extack = NULL;
+
 	ASSERT_RTNL();
 
+	extack = flow->common.extack;
 	switch (flow->command) {
 	case FLOW_CLS_REPLACE:
+		/* Netfilter can request offload multiple times for the same
+		 * flow - protect against adding duplicates.
+		 */
+		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
+						    nfp_ct_map_params);
+		if (!ct_map_ent) {
+			ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, extack);
+			ct_entry->type = CT_TYPE_NFT;
+			list_add(&ct_entry->list_node, &zt->nft_flows_list);
+			zt->nft_flows_count++;
+		}
 		return 0;
 	case FLOW_CLS_DESTROY:
-		return 0;
+		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
+						    nfp_ct_map_params);
+		return nfp_fl_ct_del_flow(ct_map_ent);
 	case FLOW_CLS_STATS:
 		return 0;
 	default:
@@ -533,12 +552,30 @@ int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb
 	return err;
 }
 
+static void
+nfp_fl_ct_clean_nft_entries(struct nfp_fl_ct_zone_entry *zt)
+{
+	struct nfp_fl_ct_flow_entry *nft_entry, *ct_tmp;
+	struct nfp_fl_ct_map_entry *ct_map_ent;
+
+	list_for_each_entry_safe(nft_entry, ct_tmp, &zt->nft_flows_list,
+				 list_node) {
+		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table,
+						    &nft_entry->cookie,
+						    nfp_ct_map_params);
+		nfp_fl_ct_del_flow(ct_map_ent);
+	}
+}
+
 int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
 {
 	struct nfp_fl_ct_flow_entry *ct_entry;
 	struct nfp_fl_ct_zone_entry *zt;
 	struct rhashtable *m_table;
 
+	if (!ct_map_ent)
+		return -ENOENT;
+
 	zt = ct_map_ent->ct_entry->zt;
 	ct_entry = ct_map_ent->ct_entry;
 	m_table = &zt->priv->ct_map_table;
@@ -566,6 +603,7 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
 						     nfp_fl_ct_handle_nft_flow,
 						     zt);
 			zt->nft = NULL;
+			nfp_fl_ct_clean_nft_entries(zt);
 		}
 		break;
 	case CT_TYPE_POST_CT:
@@ -575,6 +613,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
 		nfp_fl_ct_clean_flow_entry(ct_entry);
 		kfree(ct_map_ent);
 		break;
+	case CT_TYPE_NFT:
+		zt->nft_flows_count--;
+		rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
+				       nfp_ct_map_params);
+		nfp_fl_ct_clean_flow_entry(ct_map_ent->ct_entry);
+		kfree(ct_map_ent);
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index b6e750dad929..def95c3e8bb7 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -28,6 +28,9 @@ extern const struct rhashtable_params nfp_tc_ct_merge_params;
  *
  * @tc_merge_tb:	The table of merged tc flows
  * @tc_merge_count:	Keep count of the number of merged tc entries
+ *
+ * @nft_flows_list:	The list of nft relatednfp_fl_ct_flow_entry entries
+ * @nft_flows_count:	Keep count of the number of nft_flow entries
  */
 struct nfp_fl_ct_zone_entry {
 	u16 zone;
@@ -44,6 +47,9 @@ struct nfp_fl_ct_zone_entry {
 
 	struct rhashtable tc_merge_tb;
 	unsigned int tc_merge_count;
+
+	struct list_head nft_flows_list;
+	unsigned int nft_flows_count;
 };
 
 enum ct_entry_type {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 8658c5cedf91..a0a0242567a6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -639,6 +639,32 @@ static void nfp_zone_table_entry_destroy(struct nfp_fl_ct_zone_entry *zt)
 		}
 	}
 
+	if (zt->nft) {
+		nf_flow_table_offload_del_cb(zt->nft,
+					     nfp_fl_ct_handle_nft_flow,
+					     zt);
+		zt->nft = NULL;
+	}
+
+	if (!list_empty(&zt->nft_flows_list)) {
+		struct rhashtable *m_table = &zt->priv->ct_map_table;
+		struct nfp_fl_ct_flow_entry *entry, *tmp;
+		struct nfp_fl_ct_map_entry *map;
+
+		WARN_ONCE(1, "nft_flows_list not empty as expected, cleaning up\n");
+		list_for_each_entry_safe(entry, tmp, &zt->nft_flows_list,
+					 list_node) {
+			map = rhashtable_lookup_fast(m_table,
+						     &entry->cookie,
+						     nfp_ct_map_params);
+			WARN_ON_ONCE(rhashtable_remove_fast(m_table,
+							    &map->hash_node,
+							    nfp_ct_map_params));
+			nfp_fl_ct_clean_flow_entry(entry);
+			kfree(map);
+		}
+	}
+
 	rhashtable_free_and_destroy(&zt->tc_merge_tb,
 				    nfp_check_rhashtable_empty, NULL);
 

From 4772ad3f58d2423ab4a6587e422eeac3ab8be369 Mon Sep 17 00:00:00 2001
From: Yinjun Zhang <yinjun.zhang@corigine.com>
Date: Wed, 16 Jun 2021 12:02:02 +0200
Subject: [PATCH 1992/3292] nfp: flower-ct: make a full copy of the rule when
 it is a NFT flow

The nft flow will be destroyed after offload cb returns. This means
we need save a full copy of it since it can be referenced through
other paths other than just the offload cb, for example when a new
pre_ct or post_ct entry is added, and it needs to be merged with
an existing nft entry.

Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 58 +++++++++++++++----
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 1b527f0660a7..2c636f8490e1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -194,8 +194,9 @@ static struct
 nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
 					 struct net_device *netdev,
 					 struct flow_cls_offload *flow,
-					 struct netlink_ext_ack *extack)
+					 bool is_nft, struct netlink_ext_ack *extack)
 {
+	struct nf_flow_match *nft_match = NULL;
 	struct nfp_fl_ct_flow_entry *entry;
 	struct nfp_fl_ct_map_entry *map;
 	struct flow_action_entry *act;
@@ -205,17 +206,39 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
 	if (!entry)
 		return ERR_PTR(-ENOMEM);
 
-	entry->zt = zt;
-	entry->netdev = netdev;
-	entry->cookie = flow->cookie;
 	entry->rule = flow_rule_alloc(flow->rule->action.num_entries);
 	if (!entry->rule) {
 		err = -ENOMEM;
-		goto err_pre_ct_act;
+		goto err_pre_ct_rule;
 	}
-	entry->rule->match.dissector = flow->rule->match.dissector;
-	entry->rule->match.mask = flow->rule->match.mask;
-	entry->rule->match.key = flow->rule->match.key;
+
+	/* nft flows gets destroyed after callback return, so need
+	 * to do a full copy instead of just a reference.
+	 */
+	if (is_nft) {
+		nft_match = kzalloc(sizeof(*nft_match), GFP_KERNEL);
+		if (!nft_match) {
+			err = -ENOMEM;
+			goto err_pre_ct_act;
+		}
+		memcpy(&nft_match->dissector, flow->rule->match.dissector,
+		       sizeof(nft_match->dissector));
+		memcpy(&nft_match->mask, flow->rule->match.mask,
+		       sizeof(nft_match->mask));
+		memcpy(&nft_match->key, flow->rule->match.key,
+		       sizeof(nft_match->key));
+		entry->rule->match.dissector = &nft_match->dissector;
+		entry->rule->match.mask = &nft_match->mask;
+		entry->rule->match.key = &nft_match->key;
+	} else {
+		entry->rule->match.dissector = flow->rule->match.dissector;
+		entry->rule->match.mask = flow->rule->match.mask;
+		entry->rule->match.key = flow->rule->match.key;
+	}
+
+	entry->zt = zt;
+	entry->netdev = netdev;
+	entry->cookie = flow->cookie;
 	entry->chain_index = flow->common.chain_index;
 	entry->tun_offset = NFP_FL_CT_NO_TUN;
 
@@ -276,8 +299,10 @@ err_ct_flow_insert:
 	if (entry->tun_offset != NFP_FL_CT_NO_TUN)
 		kfree(entry->rule->action.entries[entry->tun_offset].tunnel);
 err_pre_ct_tun_cp:
-	kfree(entry->rule);
+	kfree(nft_match);
 err_pre_ct_act:
+	kfree(entry->rule);
+err_pre_ct_rule:
 	kfree(entry);
 	return ERR_PTR(err);
 }
@@ -339,6 +364,15 @@ void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry)
 
 	if (entry->tun_offset != NFP_FL_CT_NO_TUN)
 		kfree(entry->rule->action.entries[entry->tun_offset].tunnel);
+
+	if (entry->type == CT_TYPE_NFT) {
+		struct nf_flow_match *nft_match;
+
+		nft_match = container_of(entry->rule->match.dissector,
+					 struct nf_flow_match, dissector);
+		kfree(nft_match);
+	}
+
 	kfree(entry->rule);
 	kfree(entry);
 }
@@ -419,7 +453,7 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 	}
 
 	/* Add entry to pre_ct_list */
-	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, extack);
+	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack);
 	if (IS_ERR(ct_entry))
 		return PTR_ERR(ct_entry);
 	ct_entry->type = CT_TYPE_PRE_CT;
@@ -464,7 +498,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
 	}
 
 	/* Add entry to post_ct_list */
-	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, extack);
+	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack);
 	if (IS_ERR(ct_entry))
 		return PTR_ERR(ct_entry);
 
@@ -516,7 +550,7 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
 		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
 						    nfp_ct_map_params);
 		if (!ct_map_ent) {
-			ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, extack);
+			ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, true, extack);
 			ct_entry->type = CT_TYPE_NFT;
 			list_add(&ct_entry->list_node, &zt->nft_flows_list);
 			zt->nft_flows_count++;

From b5e30c61d8cbd1002ac3456e29952b8c78bc542f Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:02:03 +0200
Subject: [PATCH 1993/3292] nfp: flower-ct: add nft_merge table

Add table and struct to save the result of the three-way merge
between pre_ct,post_ct, and nft flows. Merging code is to be
added in follow-up patches.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 14 ++++++++
 .../ethernet/netronome/nfp/flower/conntrack.h | 33 +++++++++++++++++++
 .../ethernet/netronome/nfp/flower/metadata.c  |  2 ++
 3 files changed, 49 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 2c636f8490e1..3ab09d040d4c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -11,6 +11,14 @@ const struct rhashtable_params nfp_tc_ct_merge_params = {
 	.automatic_shrinking	= true,
 };
 
+const struct rhashtable_params nfp_nft_ct_merge_params = {
+	.head_offset		= offsetof(struct nfp_fl_nft_tc_merge,
+					   hash_node),
+	.key_len		= sizeof(unsigned long) * 3,
+	.key_offset		= offsetof(struct nfp_fl_nft_tc_merge, cookie),
+	.automatic_shrinking	= true,
+};
+
 /**
  * get_hashentry() - Wrapper around hashtable lookup.
  * @ht:		hashtable where entry could be found
@@ -171,6 +179,10 @@ nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
 	if (err)
 		goto err_tc_merge_tb_init;
 
+	err = rhashtable_init(&zt->nft_merge_tb, &nfp_nft_ct_merge_params);
+	if (err)
+		goto err_nft_merge_tb_init;
+
 	if (wildcarded) {
 		priv->ct_zone_wc = zt;
 	} else {
@@ -184,6 +196,8 @@ nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
 	return zt;
 
 err_zone_insert:
+	rhashtable_destroy(&zt->nft_merge_tb);
+err_nft_merge_tb_init:
 	rhashtable_destroy(&zt->tc_merge_tb);
 err_tc_merge_tb_init:
 	kfree(zt);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index def95c3e8bb7..753a9eea5952 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -12,6 +12,7 @@
 extern const struct rhashtable_params nfp_zone_table_params;
 extern const struct rhashtable_params nfp_ct_map_params;
 extern const struct rhashtable_params nfp_tc_ct_merge_params;
+extern const struct rhashtable_params nfp_nft_ct_merge_params;
 
 /**
  * struct nfp_fl_ct_zone_entry - Zone entry containing conntrack flow information
@@ -31,6 +32,9 @@ extern const struct rhashtable_params nfp_tc_ct_merge_params;
  *
  * @nft_flows_list:	The list of nft relatednfp_fl_ct_flow_entry entries
  * @nft_flows_count:	Keep count of the number of nft_flow entries
+ *
+ * @nft_merge_tb:	The table of merged tc+nft flows
+ * @nft_merge_count:	Keep count of the number of merged tc+nft entries
  */
 struct nfp_fl_ct_zone_entry {
 	u16 zone;
@@ -50,6 +54,9 @@ struct nfp_fl_ct_zone_entry {
 
 	struct list_head nft_flows_list;
 	unsigned int nft_flows_count;
+
+	struct rhashtable nft_merge_tb;
+	unsigned int nft_merge_count;
 };
 
 enum ct_entry_type {
@@ -106,6 +113,32 @@ struct nfp_fl_ct_tc_merge {
 	struct list_head children;
 };
 
+/**
+ * struct nfp_fl_nft_tc_merge - Merge of tc_merge flows with nft flow
+ * @netdev:		Ingress netdev name
+ * @cookie:		Flow cookie, combination of tc_merge and nft cookies
+ * @hash_node:		Used by the hashtable
+ * @zt:	Reference to the zone table this belongs to
+ * @nft_flow_list:	This entry is part of a nft_flows_list
+ * @tc_merge_list:	This entry is part of a ct_merge_list
+ * @tc_m_parent:	The tc_merge parent
+ * @nft_parent:	The nft_entry parent
+ * @tc_flower_cookie:	The cookie of the flow offloaded to the nfp
+ * @flow_pay:	Reference to the offloaded flow struct
+ */
+struct nfp_fl_nft_tc_merge {
+	struct net_device *netdev;
+	unsigned long cookie[3];
+	struct rhash_head hash_node;
+	struct nfp_fl_ct_zone_entry *zt;
+	struct list_head nft_flow_list;
+	struct list_head tc_merge_list;
+	struct nfp_fl_ct_tc_merge *tc_m_parent;
+	struct nfp_fl_ct_flow_entry *nft_parent;
+	unsigned long tc_flower_cookie;
+	struct nfp_fl_payload *flow_pay;
+};
+
 /**
  * struct nfp_fl_ct_map_entry - Map between flow cookie and specific ct_flow
  * @cookie:	Flow cookie, same as original TC flow, used as key
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index a0a0242567a6..621113650a9b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -667,6 +667,8 @@ static void nfp_zone_table_entry_destroy(struct nfp_fl_ct_zone_entry *zt)
 
 	rhashtable_free_and_destroy(&zt->tc_merge_tb,
 				    nfp_check_rhashtable_empty, NULL);
+	rhashtable_free_and_destroy(&zt->nft_merge_tb,
+				    nfp_check_rhashtable_empty, NULL);
 
 	kfree(zt);
 }

From a6ffdd3a0e47fb8da9b6f3a8fae9f473644b94e0 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:02:04 +0200
Subject: [PATCH 1994/3292] nfp: flower-ct: implement code to save merge of tc
 and nft flows

Add in the code to merge the tc_merge objects with the flows
received from nft. At the moment flows are just merged blindly
as the validity check functions are stubbed out, this will
be populated in follow-up patches.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 187 ++++++++++++++++++
 1 file changed, 187 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 3ab09d040d4c..e5d5ce7f0ead 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -78,11 +78,122 @@ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1,
 	return 0;
 }
 
+static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry,
+				  struct nfp_fl_ct_flow_entry *post_ct_entry,
+				  struct nfp_fl_ct_flow_entry *nft_entry)
+{
+	return 0;
+}
+
+static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry,
+			     struct nfp_fl_ct_flow_entry *nft_entry)
+{
+	return 0;
+}
+
+static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
+{
+	return 0;
+}
+
+static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie,
+				 struct net_device *netdev)
+{
+	return 0;
+}
+
+static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt,
+			       struct nfp_fl_ct_flow_entry *nft_entry,
+			       struct nfp_fl_ct_tc_merge *tc_m_entry)
+{
+	struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry;
+	struct nfp_fl_nft_tc_merge *nft_m_entry;
+	unsigned long new_cookie[3];
+	int err;
+
+	pre_ct_entry = tc_m_entry->pre_ct_parent;
+	post_ct_entry = tc_m_entry->post_ct_parent;
+
+	err = nfp_ct_merge_act_check(pre_ct_entry, post_ct_entry, nft_entry);
+	if (err)
+		return err;
+
+	/* Check that the two tc flows are also compatible with
+	 * the nft entry. No need to check the pre_ct and post_ct
+	 * entries as that was already done during pre_merge.
+	 * The nft entry does not have a netdev or chain populated, so
+	 * skip this check.
+	 */
+	err = nfp_ct_merge_check(pre_ct_entry, nft_entry);
+	if (err)
+		return err;
+	err = nfp_ct_merge_check(post_ct_entry, nft_entry);
+	if (err)
+		return err;
+	err = nfp_ct_check_meta(post_ct_entry, nft_entry);
+	if (err)
+		return err;
+
+	/* Combine tc_merge and nft cookies for this cookie. */
+	new_cookie[0] = tc_m_entry->cookie[0];
+	new_cookie[1] = tc_m_entry->cookie[1];
+	new_cookie[2] = nft_entry->cookie;
+	nft_m_entry = get_hashentry(&zt->nft_merge_tb,
+				    &new_cookie,
+				    nfp_nft_ct_merge_params,
+				    sizeof(*nft_m_entry));
+
+	if (IS_ERR(nft_m_entry))
+		return PTR_ERR(nft_m_entry);
+
+	/* nft_m_entry already present, not merging again */
+	if (!memcmp(&new_cookie, nft_m_entry->cookie, sizeof(new_cookie)))
+		return 0;
+
+	memcpy(&nft_m_entry->cookie, &new_cookie, sizeof(new_cookie));
+	nft_m_entry->zt = zt;
+	nft_m_entry->tc_m_parent = tc_m_entry;
+	nft_m_entry->nft_parent = nft_entry;
+	nft_m_entry->tc_flower_cookie = 0;
+	/* Copy the netdev from one the pre_ct entry. When the tc_m_entry was created
+	 * it only combined them if the netdevs were the same, so can use any of them.
+	 */
+	nft_m_entry->netdev = pre_ct_entry->netdev;
+
+	/* Add this entry to the tc_m_list and nft_flow lists */
+	list_add(&nft_m_entry->tc_merge_list, &tc_m_entry->children);
+	list_add(&nft_m_entry->nft_flow_list, &nft_entry->children);
+
+	/* Generate offload structure and send to nfp */
+	err = nfp_fl_ct_add_offload(nft_m_entry);
+	if (err)
+		goto err_nft_ct_offload;
+
+	err = rhashtable_insert_fast(&zt->nft_merge_tb, &nft_m_entry->hash_node,
+				     nfp_nft_ct_merge_params);
+	if (err)
+		goto err_nft_ct_merge_insert;
+
+	zt->nft_merge_count++;
+
+	return err;
+
+err_nft_ct_merge_insert:
+	nfp_fl_ct_del_offload(zt->priv->app, nft_m_entry->tc_flower_cookie,
+			      nft_m_entry->netdev);
+err_nft_ct_offload:
+	list_del(&nft_m_entry->tc_merge_list);
+	list_del(&nft_m_entry->nft_flow_list);
+	kfree(nft_m_entry);
+	return err;
+}
+
 static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt,
 			      struct nfp_fl_ct_flow_entry *ct_entry1,
 			      struct nfp_fl_ct_flow_entry *ct_entry2)
 {
 	struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry;
+	struct nfp_fl_ct_flow_entry *nft_entry, *nft_tmp;
 	struct nfp_fl_ct_tc_merge *m_entry;
 	unsigned long new_cookie[2];
 	int err;
@@ -134,6 +245,12 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt,
 		goto err_ct_tc_merge_insert;
 	zt->tc_merge_count++;
 
+	/* Merge with existing nft flows */
+	list_for_each_entry_safe(nft_entry, nft_tmp, &zt->nft_flows_list,
+				 list_node) {
+		nfp_ct_do_nft_merge(zt, nft_entry, m_entry);
+	}
+
 	return 0;
 
 err_ct_tc_merge_insert:
@@ -321,8 +438,57 @@ err_pre_ct_rule:
 	return ERR_PTR(err);
 }
 
+static void cleanup_nft_merge_entry(struct nfp_fl_nft_tc_merge *m_entry)
+{
+	struct nfp_fl_ct_zone_entry *zt;
+	int err;
+
+	zt = m_entry->zt;
+
+	/* Flow is in HW, need to delete */
+	if (m_entry->tc_flower_cookie) {
+		err = nfp_fl_ct_del_offload(zt->priv->app, m_entry->tc_flower_cookie,
+					    m_entry->netdev);
+		if (err)
+			return;
+	}
+
+	WARN_ON_ONCE(rhashtable_remove_fast(&zt->nft_merge_tb,
+					    &m_entry->hash_node,
+					    nfp_nft_ct_merge_params));
+	zt->nft_merge_count--;
+	list_del(&m_entry->tc_merge_list);
+	list_del(&m_entry->nft_flow_list);
+
+	kfree(m_entry);
+}
+
 static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow)
 {
+	struct nfp_fl_nft_tc_merge *m_entry, *tmp;
+
+	/* These post entries are parts of two lists, one is a list of nft_entries
+	 * and the other is of from a list of tc_merge structures. Iterate
+	 * through the relevant list and cleanup the entries.
+	 */
+
+	if (is_nft_flow) {
+		/* Need to iterate through list of nft_flow entries*/
+		struct nfp_fl_ct_flow_entry *ct_entry = entry;
+
+		list_for_each_entry_safe(m_entry, tmp, &ct_entry->children,
+					 nft_flow_list) {
+			cleanup_nft_merge_entry(m_entry);
+		}
+	} else {
+		/* Need to iterate through list of tc_merged_flow entries*/
+		struct nfp_fl_ct_tc_merge *ct_entry = entry;
+
+		list_for_each_entry_safe(m_entry, tmp, &ct_entry->children,
+					 tc_merge_list) {
+			cleanup_nft_merge_entry(m_entry);
+		}
+	}
 }
 
 static void nfp_del_tc_merge_entry(struct nfp_fl_ct_tc_merge *m_ent)
@@ -425,6 +591,26 @@ nfp_ct_merge_tc_entries(struct nfp_fl_ct_flow_entry *ct_entry1,
 	}
 }
 
+static void
+nfp_ct_merge_nft_with_tc(struct nfp_fl_ct_flow_entry *nft_entry,
+			 struct nfp_fl_ct_zone_entry *zt)
+{
+	struct nfp_fl_ct_tc_merge *tc_merge_entry;
+	struct rhashtable_iter iter;
+
+	rhashtable_walk_enter(&zt->tc_merge_tb, &iter);
+	rhashtable_walk_start(&iter);
+	while ((tc_merge_entry = rhashtable_walk_next(&iter)) != NULL) {
+		if (IS_ERR(tc_merge_entry))
+			continue;
+		rhashtable_walk_stop(&iter);
+		nfp_ct_do_nft_merge(zt, nft_entry, tc_merge_entry);
+		rhashtable_walk_start(&iter);
+	}
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
+}
+
 int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 			    struct net_device *netdev,
 			    struct flow_cls_offload *flow,
@@ -568,6 +754,7 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
 			ct_entry->type = CT_TYPE_NFT;
 			list_add(&ct_entry->list_node, &zt->nft_flows_list);
 			zt->nft_flows_count++;
+			nfp_ct_merge_nft_with_tc(ct_entry, zt);
 		}
 		return 0;
 	case FLOW_CLS_DESTROY:

From c698e2adcc63a99cb0fce08d29cc181807f718a0 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:02:05 +0200
Subject: [PATCH 1995/3292] nfp: flower-ct: fill in ct merge check function

Replace merge check stub code with the actual implementation. This
checks that the match parts of two tc flows does not conflict.
Only overlapping keys needs to be checked, and only the narrowest
masked parts needs to be checked, so each key is masked with the
AND'd result of both masks before comparing.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 170 ++++++++++++++++++
 .../ethernet/netronome/nfp/flower/conntrack.h |  20 +++
 2 files changed, 190 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index e5d5ce7f0ead..8bab890390cf 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -75,7 +75,177 @@ bool is_post_ct_flow(struct flow_cls_offload *flow)
 static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1,
 			      struct nfp_fl_ct_flow_entry *entry2)
 {
+	unsigned int ovlp_keys = entry1->rule->match.dissector->used_keys &
+				 entry2->rule->match.dissector->used_keys;
+	bool out;
+
+	/* check the overlapped fields one by one, the unmasked part
+	 * should not conflict with each other.
+	 */
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match1, match2;
+
+		flow_rule_match_control(entry1->rule, &match1);
+		flow_rule_match_control(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match1, match2;
+
+		flow_rule_match_basic(entry1->rule, &match1);
+		flow_rule_match_basic(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match1, match2;
+
+		flow_rule_match_ipv4_addrs(entry1->rule, &match1);
+		flow_rule_match_ipv4_addrs(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match1, match2;
+
+		flow_rule_match_ipv6_addrs(entry1->rule, &match1);
+		flow_rule_match_ipv6_addrs(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match1, match2;
+
+		flow_rule_match_ports(entry1->rule, &match1);
+		flow_rule_match_ports(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match1, match2;
+
+		flow_rule_match_eth_addrs(entry1->rule, &match1);
+		flow_rule_match_eth_addrs(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match1, match2;
+
+		flow_rule_match_vlan(entry1->rule, &match1);
+		flow_rule_match_vlan(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_MPLS)) {
+		struct flow_match_mpls match1, match2;
+
+		flow_rule_match_mpls(entry1->rule, &match1);
+		flow_rule_match_mpls(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_match_tcp match1, match2;
+
+		flow_rule_match_tcp(entry1->rule, &match1);
+		flow_rule_match_tcp(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match1, match2;
+
+		flow_rule_match_ip(entry1->rule, &match1);
+		flow_rule_match_ip(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match1, match2;
+
+		flow_rule_match_enc_keyid(entry1->rule, &match1);
+		flow_rule_match_enc_keyid(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match1, match2;
+
+		flow_rule_match_enc_ipv4_addrs(entry1->rule, &match1);
+		flow_rule_match_enc_ipv4_addrs(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match1, match2;
+
+		flow_rule_match_enc_ipv6_addrs(entry1->rule, &match1);
+		flow_rule_match_enc_ipv6_addrs(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_match_control match1, match2;
+
+		flow_rule_match_enc_control(entry1->rule, &match1);
+		flow_rule_match_enc_control(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_match_ip match1, match2;
+
+		flow_rule_match_enc_ip(entry1->rule, &match1);
+		flow_rule_match_enc_ip(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
+	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+		struct flow_match_enc_opts match1, match2;
+
+		flow_rule_match_enc_opts(entry1->rule, &match1);
+		flow_rule_match_enc_opts(entry2->rule, &match2);
+		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
+		if (out)
+			goto check_failed;
+	}
+
 	return 0;
+
+check_failed:
+	return -EINVAL;
 }
 
 static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index 753a9eea5952..170b6cdb8cd0 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -9,6 +9,26 @@
 
 #define NFP_FL_CT_NO_TUN	0xff
 
+#define COMPARE_UNMASKED_FIELDS(__match1, __match2, __out)	\
+	do {							\
+		typeof(__match1) _match1 = (__match1);		\
+		typeof(__match2) _match2 = (__match2);		\
+		bool *_out = (__out);		\
+		int i, size = sizeof(*(_match1).key);		\
+		char *k1, *m1, *k2, *m2;			\
+		*_out = false;					\
+		k1 = (char *)_match1.key;			\
+		m1 = (char *)_match1.mask;			\
+		k2 = (char *)_match2.key;			\
+		m2 = (char *)_match2.mask;			\
+		for (i = 0; i < size; i++)			\
+			if ((k1[i] & m1[i] & m2[i]) ^		\
+			    (k2[i] & m1[i] & m2[i])) {		\
+				*_out = true;			\
+				break;				\
+			}					\
+	} while (0)						\
+
 extern const struct rhashtable_params nfp_zone_table_params;
 extern const struct rhashtable_params nfp_ct_map_params;
 extern const struct rhashtable_params nfp_tc_ct_merge_params;

From 5e5f08168db4b7ea5d056cc429781b0cf546ebb1 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:02:06 +0200
Subject: [PATCH 1996/3292] nfp: flower-ct: fill ct metadata check function

Fill in check_meta stub to check that ct_metadata action fields in
the nft flow matches the ct_match data of the post_ct flow.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 37 ++++++++++++++++---
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 8bab890390cf..6aecaf41d9cd 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -19,6 +19,9 @@ const struct rhashtable_params nfp_nft_ct_merge_params = {
 	.automatic_shrinking	= true,
 };
 
+static struct flow_action_entry *get_flow_act(struct flow_rule *rule,
+					      enum flow_action_id act_id);
+
 /**
  * get_hashentry() - Wrapper around hashtable lookup.
  * @ht:		hashtable where entry could be found
@@ -258,7 +261,31 @@ static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry,
 static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry,
 			     struct nfp_fl_ct_flow_entry *nft_entry)
 {
-	return 0;
+	struct flow_dissector *dissector = post_ct_entry->rule->match.dissector;
+	struct flow_action_entry *ct_met;
+	struct flow_match_ct ct;
+	int i;
+
+	ct_met = get_flow_act(nft_entry->rule, FLOW_ACTION_CT_METADATA);
+	if (ct_met && (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT))) {
+		u32 *act_lbl;
+
+		act_lbl = ct_met->ct_metadata.labels;
+		flow_rule_match_ct(post_ct_entry->rule, &ct);
+		for (i = 0; i < 4; i++) {
+			if ((ct.key->ct_labels[i] & ct.mask->ct_labels[i]) ^
+			    (act_lbl[i] & ct.mask->ct_labels[i]))
+				return -EINVAL;
+		}
+
+		if ((ct.key->ct_mark & ct.mask->ct_mark) ^
+		    (ct_met->ct_metadata.mark & ct.mask->ct_mark))
+			return -EINVAL;
+
+		return 0;
+	}
+
+	return -EINVAL;
 }
 
 static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
@@ -727,13 +754,13 @@ void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry)
 	kfree(entry);
 }
 
-static struct flow_action_entry *get_flow_act(struct flow_cls_offload *flow,
+static struct flow_action_entry *get_flow_act(struct flow_rule *rule,
 					      enum flow_action_id act_id)
 {
 	struct flow_action_entry *act = NULL;
 	int i;
 
-	flow_action_for_each(i, act, &flow->rule->action) {
+	flow_action_for_each(i, act, &rule->action) {
 		if (act->id == act_id)
 			return act;
 	}
@@ -791,14 +818,14 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
 	struct nfp_fl_ct_zone_entry *zt;
 	int err;
 
-	ct_act = get_flow_act(flow, FLOW_ACTION_CT);
+	ct_act = get_flow_act(flow->rule, FLOW_ACTION_CT);
 	if (!ct_act) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "unsupported offload: Conntrack action empty in conntrack offload");
 		return -EOPNOTSUPP;
 	}
 
-	ct_goto = get_flow_act(flow, FLOW_ACTION_GOTO);
+	ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO);
 	if (!ct_goto) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "unsupported offload: Conntrack requires ACTION_GOTO");

From 30c4a9f4fe3f47ffa5783329fa5553f8baef3a76 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@corigine.com>
Date: Wed, 16 Jun 2021 12:02:07 +0200
Subject: [PATCH 1997/3292] nfp: flower-ct: implement action_merge check

Fill in code stub to check that the flow actions are valid for
merge. The actions of the flow X should not conflict with the
matches of flow X+1. For now this check is quite strict and
set_actions are very limited, will need to update this when
NAT support is added.

Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/flower/conntrack.c | 119 ++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 6aecaf41d9cd..9ea77bb3b69c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -251,10 +251,129 @@ check_failed:
 	return -EINVAL;
 }
 
+static int nfp_ct_check_mangle_merge(struct flow_action_entry *a_in,
+				     struct flow_rule *rule)
+{
+	enum flow_action_mangle_base htype = a_in->mangle.htype;
+	u32 offset = a_in->mangle.offset;
+
+	switch (htype) {
+	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS))
+			return -EOPNOTSUPP;
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+			struct flow_match_ip match;
+
+			flow_rule_match_ip(rule, &match);
+			if (offset == offsetof(struct iphdr, ttl) &&
+			    match.mask->ttl)
+				return -EOPNOTSUPP;
+			if (offset == round_down(offsetof(struct iphdr, tos), 4) &&
+			    match.mask->tos)
+				return -EOPNOTSUPP;
+		}
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+			struct flow_match_ipv4_addrs match;
+
+			flow_rule_match_ipv4_addrs(rule, &match);
+			if (offset == offsetof(struct iphdr, saddr) &&
+			    match.mask->src)
+				return -EOPNOTSUPP;
+			if (offset == offsetof(struct iphdr, daddr) &&
+			    match.mask->dst)
+				return -EOPNOTSUPP;
+		}
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+			struct flow_match_ip match;
+
+			flow_rule_match_ip(rule, &match);
+			if (offset == round_down(offsetof(struct ipv6hdr, hop_limit), 4) &&
+			    match.mask->ttl)
+				return -EOPNOTSUPP;
+			/* for ipv6, tos and flow_lbl are in the same word */
+			if (offset == round_down(offsetof(struct ipv6hdr, flow_lbl), 4) &&
+			    match.mask->tos)
+				return -EOPNOTSUPP;
+		}
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+			struct flow_match_ipv6_addrs match;
+
+			flow_rule_match_ipv6_addrs(rule, &match);
+			if (offset >= offsetof(struct ipv6hdr, saddr) &&
+			    offset < offsetof(struct ipv6hdr, daddr) &&
+			    memchr_inv(&match.mask->src, 0, sizeof(match.mask->src)))
+				return -EOPNOTSUPP;
+			if (offset >= offsetof(struct ipv6hdr, daddr) &&
+			    offset < sizeof(struct ipv6hdr) &&
+			    memchr_inv(&match.mask->dst, 0, sizeof(match.mask->dst)))
+				return -EOPNOTSUPP;
+		}
+		break;
+	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+		/* currently only can modify ports */
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS))
+			return -EOPNOTSUPP;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
 static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry,
 				  struct nfp_fl_ct_flow_entry *post_ct_entry,
 				  struct nfp_fl_ct_flow_entry *nft_entry)
 {
+	struct flow_action_entry *act;
+	int err, i;
+
+	/* Check for pre_ct->action conflicts */
+	flow_action_for_each(i, act, &pre_ct_entry->rule->action) {
+		switch (act->id) {
+		case FLOW_ACTION_MANGLE:
+			err = nfp_ct_check_mangle_merge(act, nft_entry->rule);
+			if (err)
+				return err;
+			err = nfp_ct_check_mangle_merge(act, post_ct_entry->rule);
+			if (err)
+				return err;
+			break;
+		case FLOW_ACTION_VLAN_PUSH:
+		case FLOW_ACTION_VLAN_POP:
+		case FLOW_ACTION_VLAN_MANGLE:
+		case FLOW_ACTION_MPLS_PUSH:
+		case FLOW_ACTION_MPLS_POP:
+		case FLOW_ACTION_MPLS_MANGLE:
+			return -EOPNOTSUPP;
+		default:
+			break;
+		}
+	}
+
+	/* Check for nft->action conflicts */
+	flow_action_for_each(i, act, &nft_entry->rule->action) {
+		switch (act->id) {
+		case FLOW_ACTION_MANGLE:
+			err = nfp_ct_check_mangle_merge(act, post_ct_entry->rule);
+			if (err)
+				return err;
+			break;
+		case FLOW_ACTION_VLAN_PUSH:
+		case FLOW_ACTION_VLAN_POP:
+		case FLOW_ACTION_VLAN_MANGLE:
+		case FLOW_ACTION_MPLS_PUSH:
+		case FLOW_ACTION_MPLS_POP:
+		case FLOW_ACTION_MPLS_MANGLE:
+			return -EOPNOTSUPP;
+		default:
+			break;
+		}
+	}
 	return 0;
 }
 

From fb0a1dacf2bef929bf047c5434bfb976ac6a93e6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 16 Jun 2021 14:02:58 +0100
Subject: [PATCH 1998/3292] mlxsw: spectrum_router: remove redundant continue
 statement

The continue statement at the end of a for-loop has no effect,
remove it.

Addresses-Coverity: ("Continue has no effect")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index bc47ed766878..7e221ef01437 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5407,7 +5407,6 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
 				    &rt->fib6_nh->fib_nh_gw6))
 			return nh;
-		continue;
 	}
 
 	return NULL;

From e0e4b8fa533858532f1b9ea9c6a4660d09beb37a Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Wed, 16 Jun 2021 16:52:55 +0200
Subject: [PATCH 1999/3292] net/smc: Add SMC statistics support

Add the ability to collect SMC statistics information. Per-cpu
variables are used to collect the statistic information for better
performance and for reducing concurrency pitfalls. The code that is
collecting statistic data is implemented in macros to increase code
reuse and readability.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/Makefile    |   2 +-
 net/smc/af_smc.c    |  89 ++++++++++++----
 net/smc/smc_core.c  |  13 ++-
 net/smc/smc_rx.c    |   8 ++
 net/smc/smc_stats.c |  35 ++++++
 net/smc/smc_stats.h | 253 ++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_tx.c    |  16 ++-
 7 files changed, 395 insertions(+), 21 deletions(-)
 create mode 100644 net/smc/smc_stats.c
 create mode 100644 net/smc/smc_stats.h

diff --git a/net/smc/Makefile b/net/smc/Makefile
index 77e54fe42b1c..99a0186cba5b 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -2,4 +2,4 @@
 obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5eff7cccceff..efeaed384769 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -49,6 +49,7 @@
 #include "smc_tx.h"
 #include "smc_rx.h"
 #include "smc_close.h"
+#include "smc_stats.h"
 
 static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
 						 * creation on server
@@ -508,9 +509,42 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->r0.qp_mtu;
 }
 
-static void smc_switch_to_fallback(struct smc_sock *smc)
+static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
+				       struct smc_stats_fback *fback_arr)
+{
+	int cnt;
+
+	for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
+		if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
+			fback_arr[cnt].count++;
+			break;
+		}
+		if (!fback_arr[cnt].fback_code) {
+			fback_arr[cnt].fback_code = smc->fallback_rsn;
+			fback_arr[cnt].count++;
+			break;
+		}
+	}
+}
+
+static void smc_stat_fallback(struct smc_sock *smc)
+{
+	mutex_lock(&smc_stat_fback_rsn);
+	if (smc->listen_smc) {
+		smc_stat_inc_fback_rsn_cnt(smc, fback_rsn.srv);
+		fback_rsn.srv_fback_cnt++;
+	} else {
+		smc_stat_inc_fback_rsn_cnt(smc, fback_rsn.clnt);
+		fback_rsn.clnt_fback_cnt++;
+	}
+	mutex_unlock(&smc_stat_fback_rsn);
+}
+
+static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
 {
 	smc->use_fallback = true;
+	smc->fallback_rsn = reason_code;
+	smc_stat_fallback(smc);
 	if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
 		smc->clcsock->file = smc->sk.sk_socket->file;
 		smc->clcsock->file->private_data = smc->clcsock;
@@ -522,8 +556,7 @@ static void smc_switch_to_fallback(struct smc_sock *smc)
 /* fall back during connect */
 static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
-	smc_switch_to_fallback(smc);
-	smc->fallback_rsn = reason_code;
+	smc_switch_to_fallback(smc, reason_code);
 	smc_copy_sock_settings_to_clc(smc);
 	smc->connect_nonblock = 0;
 	if (smc->sk.sk_state == SMC_INIT)
@@ -538,6 +571,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
 	int rc;
 
 	if (reason_code < 0) { /* error, fallback is not possible */
+		this_cpu_inc(smc_stats->clnt_hshake_err_cnt);
 		if (smc->sk.sk_state == SMC_INIT)
 			sock_put(&smc->sk); /* passive closing */
 		return reason_code;
@@ -545,6 +579,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
 	if (reason_code != SMC_CLC_DECL_PEERDECL) {
 		rc = smc_clc_send_decline(smc, reason_code, version);
 		if (rc < 0) {
+			this_cpu_inc(smc_stats->clnt_hshake_err_cnt);
 			if (smc->sk.sk_state == SMC_INIT)
 				sock_put(&smc->sk); /* passive closing */
 			return rc;
@@ -992,6 +1027,7 @@ static int __smc_connect(struct smc_sock *smc)
 	if (rc)
 		goto vlan_cleanup;
 
+	SMC_STAT_CLNT_SUCC_INC(aclc);
 	smc_connect_ism_vlan_cleanup(smc, ini);
 	kfree(buf);
 	kfree(ini);
@@ -1308,6 +1344,7 @@ static void smc_listen_out_err(struct smc_sock *new_smc)
 {
 	struct sock *newsmcsk = &new_smc->sk;
 
+	this_cpu_inc(smc_stats->srv_hshake_err_cnt);
 	if (newsmcsk->sk_state == SMC_INIT)
 		sock_put(&new_smc->sk); /* passive closing */
 	newsmcsk->sk_state = SMC_CLOSED;
@@ -1325,8 +1362,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
 		smc_listen_out_err(new_smc);
 		return;
 	}
-	smc_switch_to_fallback(new_smc);
-	new_smc->fallback_rsn = reason_code;
+	smc_switch_to_fallback(new_smc, reason_code);
 	if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
 		if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
 			smc_listen_out_err(new_smc);
@@ -1699,8 +1735,7 @@ static void smc_listen_work(struct work_struct *work)
 
 	/* check if peer is smc capable */
 	if (!tcp_sk(newclcsock->sk)->syn_smc) {
-		smc_switch_to_fallback(new_smc);
-		new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
+		smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
 		smc_listen_out_connected(new_smc);
 		return;
 	}
@@ -1778,6 +1813,7 @@ static void smc_listen_work(struct work_struct *work)
 	}
 	smc_conn_save_peer_info(new_smc, cclc);
 	smc_listen_out_connected(new_smc);
+	SMC_STAT_SERV_SUCC_INC(ini);
 	goto out_free;
 
 out_unlock:
@@ -1984,18 +2020,19 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (msg->msg_flags & MSG_FASTOPEN) {
 		if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
-			smc_switch_to_fallback(smc);
-			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
+			smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
 		} else {
 			rc = -EINVAL;
 			goto out;
 		}
 	}
 
-	if (smc->use_fallback)
+	if (smc->use_fallback) {
 		rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
-	else
+	} else {
 		rc = smc_tx_sendmsg(smc, msg, len);
+		SMC_STAT_TX_PAYLOAD(smc, len, rc);
+	}
 out:
 	release_sock(sk);
 	return rc;
@@ -2030,6 +2067,7 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	} else {
 		msg->msg_namelen = 0;
 		rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
+		SMC_STAT_RX_PAYLOAD(smc, rc, rc);
 	}
 
 out:
@@ -2194,8 +2232,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 	case TCP_FASTOPEN_NO_COOKIE:
 		/* option not supported by SMC */
 		if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
-			smc_switch_to_fallback(smc);
-			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
+			smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
 		} else {
 			rc = -EINVAL;
 		}
@@ -2204,18 +2241,22 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 		if (sk->sk_state != SMC_INIT &&
 		    sk->sk_state != SMC_LISTEN &&
 		    sk->sk_state != SMC_CLOSED) {
-			if (val)
+			if (val) {
+				SMC_STAT_INC(!smc->conn.lnk, ndly_cnt);
 				mod_delayed_work(smc->conn.lgr->tx_wq,
 						 &smc->conn.tx_work, 0);
+			}
 		}
 		break;
 	case TCP_CORK:
 		if (sk->sk_state != SMC_INIT &&
 		    sk->sk_state != SMC_LISTEN &&
 		    sk->sk_state != SMC_CLOSED) {
-			if (!val)
+			if (!val) {
+				SMC_STAT_INC(!smc->conn.lnk, cork_cnt);
 				mod_delayed_work(smc->conn.lgr->tx_wq,
 						 &smc->conn.tx_work, 0);
+			}
 		}
 		break;
 	case TCP_DEFER_ACCEPT:
@@ -2338,11 +2379,13 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
 		goto out;
 	}
 	release_sock(sk);
-	if (smc->use_fallback)
+	if (smc->use_fallback) {
 		rc = kernel_sendpage(smc->clcsock, page, offset,
 				     size, flags);
-	else
+	} else {
+		SMC_STAT_INC(!smc->conn.lnk, sendpage_cnt);
 		rc = sock_no_sendpage(sock, page, offset, size, flags);
+	}
 
 out:
 	return rc;
@@ -2391,6 +2434,7 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
 			flags = MSG_DONTWAIT;
 		else
 			flags = 0;
+		SMC_STAT_INC(!smc->conn.lnk, splice_cnt);
 		rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
 	}
 out:
@@ -2514,10 +2558,16 @@ static int __init smc_init(void)
 	if (!smc_close_wq)
 		goto out_alloc_hs_wq;
 
+	rc = smc_stats_init();
+	if (rc) {
+		pr_err("%s: smc_stats_init fails with %d\n", __func__, rc);
+		goto out_alloc_wqs;
+	}
+
 	rc = smc_core_init();
 	if (rc) {
 		pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
-		goto out_alloc_wqs;
+		goto out_smc_stat;
 	}
 
 	rc = smc_llc_init();
@@ -2569,6 +2619,8 @@ out_proto:
 	proto_unregister(&smc_proto);
 out_core:
 	smc_core_exit();
+out_smc_stat:
+	smc_stats_exit();
 out_alloc_wqs:
 	destroy_workqueue(smc_close_wq);
 out_alloc_hs_wq:
@@ -2591,6 +2643,7 @@ static void __exit smc_exit(void)
 	smc_ib_unregister_client();
 	destroy_workqueue(smc_close_wq);
 	destroy_workqueue(smc_hs_wq);
+	smc_stats_exit();
 	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 317bc2c90fab..d69f58f670a1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -33,6 +33,7 @@
 #include "smc_close.h"
 #include "smc_ism.h"
 #include "smc_netlink.h"
+#include "smc_stats.h"
 
 #define SMC_LGR_NUM_INCR		256
 #define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
@@ -2029,6 +2030,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 	struct smc_link_group *lgr = conn->lgr;
 	struct list_head *buf_list;
 	int bufsize, bufsize_short;
+	bool is_dgraded = false;
 	struct mutex *lock;	/* lock buffer list */
 	int sk_buf_size;
 
@@ -2056,6 +2058,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 		/* check for reusable slot in the link group */
 		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
 		if (buf_desc) {
+			SMC_STAT_RMB_SIZE(is_smcd, is_rmb, bufsize);
+			SMC_STAT_BUF_REUSE(is_smcd, is_rmb);
 			memset(buf_desc->cpu_addr, 0, bufsize);
 			break; /* found reusable slot */
 		}
@@ -2067,9 +2071,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 		if (PTR_ERR(buf_desc) == -ENOMEM)
 			break;
-		if (IS_ERR(buf_desc))
+		if (IS_ERR(buf_desc)) {
+			if (!is_dgraded) {
+				is_dgraded = true;
+				SMC_STAT_RMB_DOWNGRADED(is_smcd, is_rmb);
+			}
 			continue;
+		}
 
+		SMC_STAT_RMB_ALLOC(is_smcd, is_rmb);
+		SMC_STAT_RMB_SIZE(is_smcd, is_rmb, bufsize);
 		buf_desc->used = 1;
 		mutex_lock(lock);
 		list_add(&buf_desc->list, buf_list);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index fcfac59f8b72..ce1ae39923b1 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -21,6 +21,7 @@
 #include "smc_cdc.h"
 #include "smc_tx.h" /* smc_tx_consumer_update() */
 #include "smc_rx.h"
+#include "smc_stats.h"
 
 /* callback implementation to wakeup consumers blocked with smc_rx_wait().
  * indirectly called by smc_cdc_msg_recv_action().
@@ -227,6 +228,7 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
 	    conn->urg_state == SMC_URG_READ)
 		return -EINVAL;
 
+	SMC_STAT_INC(!conn->lnk, urg_data_cnt);
 	if (conn->urg_state == SMC_URG_VALID) {
 		if (!(flags & MSG_PEEK))
 			smc->conn.urg_state = SMC_URG_READ;
@@ -303,6 +305,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
+	readable = atomic_read(&conn->bytes_to_rcv);
+	if (readable >= conn->rmb_desc->len)
+		SMC_STAT_RMB_RX_FULL(!conn->lnk);
+
+	if (len < readable)
+		SMC_STAT_RMB_RX_SIZE_SMALL(!conn->lnk);
 	/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
 	rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
 
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
new file mode 100644
index 000000000000..76e938388520
--- /dev/null
+++ b/net/smc/smc_stats.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * SMC statistics netlink routines
+ *
+ * Copyright IBM Corp. 2021
+ *
+ * Author(s):  Guvenc Gulce
+ */
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/ctype.h>
+#include "smc_stats.h"
+
+/* serialize fallback reason statistic gathering */
+DEFINE_MUTEX(smc_stat_fback_rsn);
+struct smc_stats __percpu *smc_stats;	/* per cpu counters for SMC */
+struct smc_stats_reason fback_rsn;
+
+int __init smc_stats_init(void)
+{
+	memset(&fback_rsn, 0, sizeof(fback_rsn));
+	smc_stats = alloc_percpu(struct smc_stats);
+	if (!smc_stats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void smc_stats_exit(void)
+{
+	free_percpu(smc_stats);
+}
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
new file mode 100644
index 000000000000..928372114cf1
--- /dev/null
+++ b/net/smc/smc_stats.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Macros for SMC statistics
+ *
+ * Copyright IBM Corp. 2021
+ *
+ * Author(s):  Guvenc Gulce
+ */
+
+#ifndef NET_SMC_SMC_STATS_H_
+#define NET_SMC_SMC_STATS_H_
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/ctype.h>
+#include <linux/smc.h>
+
+#include "smc_clc.h"
+
+#define SMC_MAX_FBACK_RSN_CNT 30
+
+extern struct smc_stats __percpu *smc_stats;	/* per cpu counters for SMC */
+extern struct smc_stats_reason fback_rsn;
+extern struct mutex smc_stat_fback_rsn;
+
+enum {
+	SMC_BUF_8K,
+	SMC_BUF_16K,
+	SMC_BUF_32K,
+	SMC_BUF_64K,
+	SMC_BUF_128K,
+	SMC_BUF_256K,
+	SMC_BUF_512K,
+	SMC_BUF_1024K,
+	SMC_BUF_G_1024K,
+	SMC_BUF_MAX,
+};
+
+struct smc_stats_fback {
+	int	fback_code;
+	u16	count;
+};
+
+struct smc_stats_reason {
+	struct	smc_stats_fback srv[SMC_MAX_FBACK_RSN_CNT];
+	struct	smc_stats_fback clnt[SMC_MAX_FBACK_RSN_CNT];
+	u64			srv_fback_cnt;
+	u64			clnt_fback_cnt;
+};
+
+struct smc_stats_rmbcnt {
+	u64	buf_size_small_peer_cnt;
+	u64	buf_size_small_cnt;
+	u64	buf_full_peer_cnt;
+	u64	buf_full_cnt;
+	u64	reuse_cnt;
+	u64	alloc_cnt;
+	u64	dgrade_cnt;
+};
+
+struct smc_stats_memsize {
+	u64	buf[SMC_BUF_MAX];
+};
+
+struct smc_stats_tech {
+	struct smc_stats_memsize tx_rmbsize;
+	struct smc_stats_memsize rx_rmbsize;
+	struct smc_stats_memsize tx_pd;
+	struct smc_stats_memsize rx_pd;
+	struct smc_stats_rmbcnt rmb_tx;
+	struct smc_stats_rmbcnt rmb_rx;
+	u64			clnt_v1_succ_cnt;
+	u64			clnt_v2_succ_cnt;
+	u64			srv_v1_succ_cnt;
+	u64			srv_v2_succ_cnt;
+	u64			sendpage_cnt;
+	u64			urg_data_cnt;
+	u64			splice_cnt;
+	u64			cork_cnt;
+	u64			ndly_cnt;
+	u64			rx_bytes;
+	u64			tx_bytes;
+	u64			rx_cnt;
+	u64			tx_cnt;
+};
+
+struct smc_stats {
+	struct smc_stats_tech	smc[2];
+	u64			clnt_hshake_err_cnt;
+	u64			srv_hshake_err_cnt;
+};
+
+#define SMC_STAT_PAYLOAD_SUB(_tech, key, _len, _rc) \
+do { \
+	typeof(_tech) t = (_tech); \
+	typeof(_len) l = (_len); \
+	int _pos = fls64((l) >> 13); \
+	typeof(_rc) r = (_rc); \
+	int m = SMC_BUF_MAX - 1; \
+	this_cpu_inc((*smc_stats).smc[t].key ## _cnt); \
+	if (r <= 0) \
+		break; \
+	_pos = (_pos < m) ? ((l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
+	this_cpu_inc((*smc_stats).smc[t].key ## _pd.buf[_pos]); \
+	this_cpu_add((*smc_stats).smc[t].key ## _bytes, r); \
+} \
+while (0)
+
+#define SMC_STAT_TX_PAYLOAD(_smc, length, rcode) \
+do { \
+	typeof(_smc) __smc = _smc; \
+	typeof(length) _len = (length); \
+	typeof(rcode) _rc = (rcode); \
+	bool is_smcd = !__smc->conn.lnk; \
+	if (is_smcd) \
+		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_D, tx, _len, _rc); \
+	else \
+		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_R, tx, _len, _rc); \
+} \
+while (0)
+
+#define SMC_STAT_RX_PAYLOAD(_smc, length, rcode) \
+do { \
+	typeof(_smc) __smc = _smc; \
+	typeof(length) _len = (length); \
+	typeof(rcode) _rc = (rcode); \
+	bool is_smcd = !__smc->conn.lnk; \
+	if (is_smcd) \
+		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_D, rx, _len, _rc); \
+	else \
+		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_R, rx, _len, _rc); \
+} \
+while (0)
+
+#define SMC_STAT_RMB_SIZE_SUB(_tech, k, _len) \
+do { \
+	typeof(_len) _l = (_len); \
+	typeof(_tech) t = (_tech); \
+	int _pos = fls((_l) >> 13); \
+	int m = SMC_BUF_MAX - 1; \
+	_pos = (_pos < m) ? ((_l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
+	this_cpu_inc((*smc_stats).smc[t].k ## _rmbsize.buf[_pos]); \
+} \
+while (0)
+
+#define SMC_STAT_RMB_SUB(type, t, key) \
+	this_cpu_inc((*smc_stats).smc[t].rmb ## _ ## key.type ## _cnt)
+
+#define SMC_STAT_RMB_SIZE(_is_smcd, _is_rx, _len) \
+do { \
+	typeof(_is_smcd) is_d = (_is_smcd); \
+	typeof(_is_rx) is_r = (_is_rx); \
+	typeof(_len) l = (_len); \
+	if ((is_d) && (is_r)) \
+		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_D, rx, l); \
+	if ((is_d) && !(is_r)) \
+		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_D, tx, l); \
+	if (!(is_d) && (is_r)) \
+		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_R, rx, l); \
+	if (!(is_d) && !(is_r)) \
+		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_R, tx, l); \
+} \
+while (0)
+
+#define SMC_STAT_RMB(type, _is_smcd, _is_rx) \
+do { \
+	typeof(_is_smcd) is_d = (_is_smcd); \
+	typeof(_is_rx) is_r = (_is_rx); \
+	if ((is_d) && (is_r)) \
+		SMC_STAT_RMB_SUB(type, SMC_TYPE_D, rx); \
+	if ((is_d) && !(is_r)) \
+		SMC_STAT_RMB_SUB(type, SMC_TYPE_D, tx); \
+	if (!(is_d) && (is_r)) \
+		SMC_STAT_RMB_SUB(type, SMC_TYPE_R, rx); \
+	if (!(is_d) && !(is_r)) \
+		SMC_STAT_RMB_SUB(type, SMC_TYPE_R, tx); \
+} \
+while (0)
+
+#define SMC_STAT_BUF_REUSE(is_smcd, is_rx) \
+	SMC_STAT_RMB(reuse, is_smcd, is_rx)
+
+#define SMC_STAT_RMB_ALLOC(is_smcd, is_rx) \
+	SMC_STAT_RMB(alloc, is_smcd, is_rx)
+
+#define SMC_STAT_RMB_DOWNGRADED(is_smcd, is_rx) \
+	SMC_STAT_RMB(dgrade, is_smcd, is_rx)
+
+#define SMC_STAT_RMB_TX_PEER_FULL(is_smcd) \
+	SMC_STAT_RMB(buf_full_peer, is_smcd, false)
+
+#define SMC_STAT_RMB_TX_FULL(is_smcd) \
+	SMC_STAT_RMB(buf_full, is_smcd, false)
+
+#define SMC_STAT_RMB_TX_PEER_SIZE_SMALL(is_smcd) \
+	SMC_STAT_RMB(buf_size_small_peer, is_smcd, false)
+
+#define SMC_STAT_RMB_TX_SIZE_SMALL(is_smcd) \
+	SMC_STAT_RMB(buf_size_small, is_smcd, false)
+
+#define SMC_STAT_RMB_RX_SIZE_SMALL(is_smcd) \
+	SMC_STAT_RMB(buf_size_small, is_smcd, true)
+
+#define SMC_STAT_RMB_RX_FULL(is_smcd) \
+	SMC_STAT_RMB(buf_full, is_smcd, true)
+
+#define SMC_STAT_INC(is_smcd, type) \
+do { \
+	if ((is_smcd)) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].type); \
+	else \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_R].type); \
+} \
+while (0)
+
+#define SMC_STAT_CLNT_SUCC_INC(_aclc) \
+do { \
+	typeof(_aclc) acl = (_aclc); \
+	bool is_v2 = (acl->hdr.version == SMC_V2); \
+	bool is_smcd = (acl->hdr.typev1 == SMC_TYPE_D); \
+	if (is_v2 && is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].clnt_v2_succ_cnt); \
+	else if (is_v2 && !is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_R].clnt_v2_succ_cnt); \
+	else if (!is_v2 && is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].clnt_v1_succ_cnt); \
+	else if (!is_v2 && !is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_R].clnt_v1_succ_cnt); \
+} \
+while (0)
+
+#define SMC_STAT_SERV_SUCC_INC(_ini) \
+do { \
+	typeof(_ini) i = (_ini); \
+	bool is_v2 = (i->smcd_version & SMC_V2); \
+	bool is_smcd = (i->is_smcd); \
+	if (is_v2 && is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
+	else if (is_v2 && !is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_R].srv_v2_succ_cnt); \
+	else if (!is_v2 && is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v1_succ_cnt); \
+	else if (!is_v2 && !is_smcd) \
+		this_cpu_inc(smc_stats->smc[SMC_TYPE_R].srv_v1_succ_cnt); \
+} \
+while (0)
+
+int smc_stats_init(void) __init;
+void smc_stats_exit(void);
+
+#endif /* NET_SMC_SMC_STATS_H_ */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 4532c16bf85e..a043544d715f 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -27,6 +27,7 @@
 #include "smc_close.h"
 #include "smc_ism.h"
 #include "smc_tx.h"
+#include "smc_stats.h"
 
 #define SMC_TX_WORK_DELAY	0
 #define SMC_TX_CORK_DELAY	(HZ >> 2)	/* 250 ms */
@@ -45,6 +46,8 @@ static void smc_tx_write_space(struct sock *sk)
 
 	/* similar to sk_stream_write_space */
 	if (atomic_read(&smc->conn.sndbuf_space) && sock) {
+		if (test_bit(SOCK_NOSPACE, &sock->flags))
+			SMC_STAT_RMB_TX_FULL(!smc->conn.lnk);
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 		rcu_read_lock();
 		wq = rcu_dereference(sk->sk_wq);
@@ -151,6 +154,15 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 		goto out_err;
 	}
 
+	if (len > conn->sndbuf_desc->len)
+		SMC_STAT_RMB_TX_SIZE_SMALL(!conn->lnk);
+
+	if (len > conn->peer_rmbe_size)
+		SMC_STAT_RMB_TX_PEER_SIZE_SMALL(!conn->lnk);
+
+	if (msg->msg_flags & MSG_OOB)
+		SMC_STAT_INC(!conn->lnk, urg_data_cnt);
+
 	while (msg_data_left(msg)) {
 		if (sk->sk_state == SMC_INIT)
 			return -ENOTCONN;
@@ -419,8 +431,10 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
 	/* destination: RMBE */
 	/* cf. snd_wnd */
 	rmbespace = atomic_read(&conn->peer_rmbe_space);
-	if (rmbespace <= 0)
+	if (rmbespace <= 0) {
+		SMC_STAT_RMB_TX_PEER_FULL(!conn->lnk);
 		return 0;
+	}
 	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
 	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
 

From 8c40602b4be17571dfd75102f4f1e690311c5210 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Wed, 16 Jun 2021 16:52:56 +0200
Subject: [PATCH 2000/3292] net/smc: Add netlink support for SMC statistics

Add the netlink function which collects the statistics information and
delivers it to the userspace.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h |  69 ++++++++++
 net/smc/smc_netlink.c    |   6 +
 net/smc/smc_stats.c      | 279 +++++++++++++++++++++++++++++++++++++++
 net/smc/smc_stats.h      |   1 +
 4 files changed, 355 insertions(+)

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 3e68da07fba2..f32f11b30963 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -47,6 +47,7 @@ enum {
 	SMC_NETLINK_GET_LGR_SMCD,
 	SMC_NETLINK_GET_DEV_SMCD,
 	SMC_NETLINK_GET_DEV_SMCR,
+	SMC_NETLINK_GET_STATS,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -58,6 +59,7 @@ enum {
 	SMC_GEN_LGR_SMCD,		/* nest */
 	SMC_GEN_DEV_SMCD,		/* nest */
 	SMC_GEN_DEV_SMCR,		/* nest */
+	SMC_GEN_STATS,			/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
@@ -159,4 +161,71 @@ enum {
 	SMC_NLA_DEV_MAX = __SMC_NLA_DEV_MAX - 1
 };
 
+/* SMC_NLA_STATS_T_TX(RX)_RMB_SIZE nested attributes */
+/* SMC_NLA_STATS_TX(RX)PLOAD_SIZE nested attributes */
+enum {
+	SMC_NLA_STATS_PLOAD_PAD,
+	SMC_NLA_STATS_PLOAD_8K,		/* u64 */
+	SMC_NLA_STATS_PLOAD_16K,	/* u64 */
+	SMC_NLA_STATS_PLOAD_32K,	/* u64 */
+	SMC_NLA_STATS_PLOAD_64K,	/* u64 */
+	SMC_NLA_STATS_PLOAD_128K,	/* u64 */
+	SMC_NLA_STATS_PLOAD_256K,	/* u64 */
+	SMC_NLA_STATS_PLOAD_512K,	/* u64 */
+	SMC_NLA_STATS_PLOAD_1024K,	/* u64 */
+	SMC_NLA_STATS_PLOAD_G_1024K,	/* u64 */
+	__SMC_NLA_STATS_PLOAD_MAX,
+	SMC_NLA_STATS_PLOAD_MAX = __SMC_NLA_STATS_PLOAD_MAX - 1
+};
+
+/* SMC_NLA_STATS_T_TX(RX)_RMB_STATS nested attributes */
+enum {
+	SMC_NLA_STATS_RMB_PAD,
+	SMC_NLA_STATS_RMB_SIZE_SM_PEER_CNT,	/* u64 */
+	SMC_NLA_STATS_RMB_SIZE_SM_CNT,		/* u64 */
+	SMC_NLA_STATS_RMB_FULL_PEER_CNT,	/* u64 */
+	SMC_NLA_STATS_RMB_FULL_CNT,		/* u64 */
+	SMC_NLA_STATS_RMB_REUSE_CNT,		/* u64 */
+	SMC_NLA_STATS_RMB_ALLOC_CNT,		/* u64 */
+	SMC_NLA_STATS_RMB_DGRADE_CNT,		/* u64 */
+	__SMC_NLA_STATS_RMB_MAX,
+	SMC_NLA_STATS_RMB_MAX = __SMC_NLA_STATS_RMB_MAX - 1
+};
+
+/* SMC_NLA_STATS_SMCD_TECH and _SMCR_TECH nested attributes */
+enum {
+	SMC_NLA_STATS_T_PAD,
+	SMC_NLA_STATS_T_TX_RMB_SIZE,	/* nest */
+	SMC_NLA_STATS_T_RX_RMB_SIZE,	/* nest */
+	SMC_NLA_STATS_T_TXPLOAD_SIZE,	/* nest */
+	SMC_NLA_STATS_T_RXPLOAD_SIZE,	/* nest */
+	SMC_NLA_STATS_T_TX_RMB_STATS,	/* nest */
+	SMC_NLA_STATS_T_RX_RMB_STATS,	/* nest */
+	SMC_NLA_STATS_T_CLNT_V1_SUCC,	/* u64 */
+	SMC_NLA_STATS_T_CLNT_V2_SUCC,	/* u64 */
+	SMC_NLA_STATS_T_SRV_V1_SUCC,	/* u64 */
+	SMC_NLA_STATS_T_SRV_V2_SUCC,	/* u64 */
+	SMC_NLA_STATS_T_SENDPAGE_CNT,	/* u64 */
+	SMC_NLA_STATS_T_SPLICE_CNT,	/* u64 */
+	SMC_NLA_STATS_T_CORK_CNT,	/* u64 */
+	SMC_NLA_STATS_T_NDLY_CNT,	/* u64 */
+	SMC_NLA_STATS_T_URG_DATA_CNT,	/* u64 */
+	SMC_NLA_STATS_T_RX_BYTES,	/* u64 */
+	SMC_NLA_STATS_T_TX_BYTES,	/* u64 */
+	SMC_NLA_STATS_T_RX_CNT,		/* u64 */
+	SMC_NLA_STATS_T_TX_CNT,		/* u64 */
+	__SMC_NLA_STATS_T_MAX,
+	SMC_NLA_STATS_T_MAX = __SMC_NLA_STATS_T_MAX - 1
+};
+
+/* SMC_GEN_STATS attributes */
+enum {
+	SMC_NLA_STATS_PAD,
+	SMC_NLA_STATS_SMCD_TECH,	/* nest */
+	SMC_NLA_STATS_SMCR_TECH,	/* nest */
+	SMC_NLA_STATS_CLNT_HS_ERR_CNT,	/* u64 */
+	SMC_NLA_STATS_SRV_HS_ERR_CNT,	/* u64 */
+	__SMC_NLA_STATS_MAX,
+	SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1
+};
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index 140419a19dbf..30e30b23370f 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -19,6 +19,7 @@
 #include "smc_core.h"
 #include "smc_ism.h"
 #include "smc_ib.h"
+#include "smc_stats.h"
 #include "smc_netlink.h"
 
 #define SMC_CMD_MAX_ATTR 1
@@ -55,6 +56,11 @@ static const struct genl_ops smc_gen_nl_ops[] = {
 		/* can be retrieved by unprivileged users */
 		.dumpit = smcr_nl_get_device,
 	},
+	{
+		.cmd = SMC_NETLINK_GET_STATS,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smc_nl_get_stats,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index 76e938388520..72119d3d8558 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -12,6 +12,10 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/ctype.h>
+#include <linux/smc.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include "smc_netlink.h"
 #include "smc_stats.h"
 
 /* serialize fallback reason statistic gathering */
@@ -33,3 +37,278 @@ void smc_stats_exit(void)
 {
 	free_percpu(smc_stats);
 }
+
+static int smc_nl_fill_stats_rmb_data(struct sk_buff *skb,
+				      struct smc_stats *stats, int tech,
+				      int type)
+{
+	struct smc_stats_rmbcnt *stats_rmb_cnt;
+	struct nlattr *attrs;
+
+	if (type == SMC_NLA_STATS_T_TX_RMB_STATS)
+		stats_rmb_cnt = &stats->smc[tech].rmb_tx;
+	else
+		stats_rmb_cnt = &stats->smc[tech].rmb_rx;
+
+	attrs = nla_nest_start(skb, type);
+	if (!attrs)
+		goto errout;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_REUSE_CNT,
+			      stats_rmb_cnt->reuse_cnt,
+			      SMC_NLA_STATS_RMB_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_SIZE_SM_PEER_CNT,
+			      stats_rmb_cnt->buf_size_small_peer_cnt,
+			      SMC_NLA_STATS_RMB_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_SIZE_SM_CNT,
+			      stats_rmb_cnt->buf_size_small_cnt,
+			      SMC_NLA_STATS_RMB_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_FULL_PEER_CNT,
+			      stats_rmb_cnt->buf_full_peer_cnt,
+			      SMC_NLA_STATS_RMB_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_FULL_CNT,
+			      stats_rmb_cnt->buf_full_cnt,
+			      SMC_NLA_STATS_RMB_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_ALLOC_CNT,
+			      stats_rmb_cnt->alloc_cnt,
+			      SMC_NLA_STATS_RMB_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_DGRADE_CNT,
+			      stats_rmb_cnt->dgrade_cnt,
+			      SMC_NLA_STATS_RMB_PAD))
+		goto errattr;
+
+	nla_nest_end(skb, attrs);
+	return 0;
+
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	return -EMSGSIZE;
+}
+
+static int smc_nl_fill_stats_bufsize_data(struct sk_buff *skb,
+					  struct smc_stats *stats, int tech,
+					  int type)
+{
+	struct smc_stats_memsize *stats_pload;
+	struct nlattr *attrs;
+
+	if (type == SMC_NLA_STATS_T_TXPLOAD_SIZE)
+		stats_pload = &stats->smc[tech].tx_pd;
+	else if (type == SMC_NLA_STATS_T_RXPLOAD_SIZE)
+		stats_pload = &stats->smc[tech].rx_pd;
+	else if (type == SMC_NLA_STATS_T_TX_RMB_SIZE)
+		stats_pload = &stats->smc[tech].tx_rmbsize;
+	else if (type == SMC_NLA_STATS_T_RX_RMB_SIZE)
+		stats_pload = &stats->smc[tech].rx_rmbsize;
+	else
+		goto errout;
+
+	attrs = nla_nest_start(skb, type);
+	if (!attrs)
+		goto errout;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_8K,
+			      stats_pload->buf[SMC_BUF_8K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_16K,
+			      stats_pload->buf[SMC_BUF_16K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_32K,
+			      stats_pload->buf[SMC_BUF_32K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_64K,
+			      stats_pload->buf[SMC_BUF_64K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_128K,
+			      stats_pload->buf[SMC_BUF_128K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_256K,
+			      stats_pload->buf[SMC_BUF_256K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_512K,
+			      stats_pload->buf[SMC_BUF_512K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_1024K,
+			      stats_pload->buf[SMC_BUF_1024K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_G_1024K,
+			      stats_pload->buf[SMC_BUF_G_1024K],
+			      SMC_NLA_STATS_PLOAD_PAD))
+		goto errattr;
+
+	nla_nest_end(skb, attrs);
+	return 0;
+
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	return -EMSGSIZE;
+}
+
+static int smc_nl_fill_stats_tech_data(struct sk_buff *skb,
+				       struct smc_stats *stats, int tech)
+{
+	struct smc_stats_tech *smc_tech;
+	struct nlattr *attrs;
+
+	smc_tech = &stats->smc[tech];
+	if (tech == SMC_TYPE_D)
+		attrs = nla_nest_start(skb, SMC_NLA_STATS_SMCD_TECH);
+	else
+		attrs = nla_nest_start(skb, SMC_NLA_STATS_SMCR_TECH);
+
+	if (!attrs)
+		goto errout;
+	if (smc_nl_fill_stats_rmb_data(skb, stats, tech,
+				       SMC_NLA_STATS_T_TX_RMB_STATS))
+		goto errattr;
+	if (smc_nl_fill_stats_rmb_data(skb, stats, tech,
+				       SMC_NLA_STATS_T_RX_RMB_STATS))
+		goto errattr;
+	if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+					   SMC_NLA_STATS_T_TXPLOAD_SIZE))
+		goto errattr;
+	if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+					   SMC_NLA_STATS_T_RXPLOAD_SIZE))
+		goto errattr;
+	if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+					   SMC_NLA_STATS_T_TX_RMB_SIZE))
+		goto errattr;
+	if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+					   SMC_NLA_STATS_T_RX_RMB_SIZE))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CLNT_V1_SUCC,
+			      smc_tech->clnt_v1_succ_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CLNT_V2_SUCC,
+			      smc_tech->clnt_v2_succ_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SRV_V1_SUCC,
+			      smc_tech->srv_v1_succ_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SRV_V2_SUCC,
+			      smc_tech->srv_v2_succ_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_BYTES,
+			      smc_tech->rx_bytes,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_TX_BYTES,
+			      smc_tech->tx_bytes,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT,
+			      smc_tech->rx_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_TX_CNT,
+			      smc_tech->tx_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SENDPAGE_CNT,
+			      smc_tech->sendpage_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CORK_CNT,
+			      smc_tech->cork_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_NDLY_CNT,
+			      smc_tech->ndly_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SPLICE_CNT,
+			      smc_tech->splice_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_URG_DATA_CNT,
+			      smc_tech->urg_data_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+
+	nla_nest_end(skb, attrs);
+	return 0;
+
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	return -EMSGSIZE;
+}
+
+int smc_nl_get_stats(struct sk_buff *skb,
+		     struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct smc_stats *stats;
+	struct nlattr *attrs;
+	int cpu, i, size;
+	void *nlh;
+	u64 *src;
+	u64 *sum;
+
+	if (cb_ctx->pos[0])
+		goto errmsg;
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_STATS);
+	if (!nlh)
+		goto errmsg;
+
+	attrs = nla_nest_start(skb, SMC_GEN_STATS);
+	if (!attrs)
+		goto errnest;
+	stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		goto erralloc;
+	size = sizeof(*stats) / sizeof(u64);
+	for_each_possible_cpu(cpu) {
+		src = (u64 *)per_cpu_ptr(smc_stats, cpu);
+		sum = (u64 *)stats;
+		for (i = 0; i < size; i++)
+			*(sum++) += *(src++);
+	}
+	if (smc_nl_fill_stats_tech_data(skb, stats, SMC_TYPE_D))
+		goto errattr;
+	if (smc_nl_fill_stats_tech_data(skb, stats, SMC_TYPE_R))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_CLNT_HS_ERR_CNT,
+			      stats->clnt_hshake_err_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+	if (nla_put_u64_64bit(skb, SMC_NLA_STATS_SRV_HS_ERR_CNT,
+			      stats->srv_hshake_err_cnt,
+			      SMC_NLA_STATS_PAD))
+		goto errattr;
+
+	nla_nest_end(skb, attrs);
+	genlmsg_end(skb, nlh);
+	cb_ctx->pos[0] = 1;
+	kfree(stats);
+	return skb->len;
+
+errattr:
+	kfree(stats);
+erralloc:
+	nla_nest_cancel(skb, attrs);
+errnest:
+	genlmsg_cancel(skb, nlh);
+errmsg:
+	return skb->len;
+}
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index 928372114cf1..84baaca59eaf 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -247,6 +247,7 @@ do { \
 } \
 while (0)
 
+int smc_nl_get_stats(struct sk_buff *skb, struct netlink_callback *cb);
 int smc_stats_init(void) __init;
 void smc_stats_exit(void);
 

From f0dd7bf5e33066e554442c509ef6351728b95b51 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Wed, 16 Jun 2021 16:52:57 +0200
Subject: [PATCH 2001/3292] net/smc: Add netlink support for SMC fallback
 statistics

Add support to collect more detailed SMC fallback reason statistics and
provide these statistics to user space on the netlink interface.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc.h | 14 ++++++
 net/smc/smc_netlink.c    |  5 +++
 net/smc/smc_netlink.h    |  2 +-
 net/smc/smc_stats.c      | 92 ++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_stats.h      |  1 +
 5 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index f32f11b30963..0f7f87c70baf 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -48,6 +48,7 @@ enum {
 	SMC_NETLINK_GET_DEV_SMCD,
 	SMC_NETLINK_GET_DEV_SMCR,
 	SMC_NETLINK_GET_STATS,
+	SMC_NETLINK_GET_FBACK_STATS,
 };
 
 /* SMC_GENL_FAMILY top level attributes */
@@ -60,6 +61,7 @@ enum {
 	SMC_GEN_DEV_SMCD,		/* nest */
 	SMC_GEN_DEV_SMCR,		/* nest */
 	SMC_GEN_STATS,			/* nest */
+	SMC_GEN_FBACK_STATS,		/* nest */
 	__SMC_GEN_MAX,
 	SMC_GEN_MAX = __SMC_GEN_MAX - 1
 };
@@ -228,4 +230,16 @@ enum {
 	__SMC_NLA_STATS_MAX,
 	SMC_NLA_STATS_MAX = __SMC_NLA_STATS_MAX - 1
 };
+
+/* SMC_GEN_FBACK_STATS attributes */
+enum {
+	SMC_NLA_FBACK_STATS_PAD,
+	SMC_NLA_FBACK_STATS_TYPE,	/* u8 */
+	SMC_NLA_FBACK_STATS_SRV_CNT,	/* u64 */
+	SMC_NLA_FBACK_STATS_CLNT_CNT,	/* u64 */
+	SMC_NLA_FBACK_STATS_RSN_CODE,	/* u32 */
+	SMC_NLA_FBACK_STATS_RSN_CNT,	/* u16 */
+	__SMC_NLA_FBACK_STATS_MAX,
+	SMC_NLA_FBACK_STATS_MAX = __SMC_NLA_FBACK_STATS_MAX - 1
+};
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index 30e30b23370f..6fb6f96c1d17 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -61,6 +61,11 @@ static const struct genl_ops smc_gen_nl_ops[] = {
 		/* can be retrieved by unprivileged users */
 		.dumpit = smc_nl_get_stats,
 	},
+	{
+		.cmd = SMC_NETLINK_GET_FBACK_STATS,
+		/* can be retrieved by unprivileged users */
+		.dumpit = smc_nl_get_fback_stats,
+	},
 };
 
 static const struct nla_policy smc_gen_nl_policy[2] = {
diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h
index 3477265cba6c..5ce2c0a89ccd 100644
--- a/net/smc/smc_netlink.h
+++ b/net/smc/smc_netlink.h
@@ -18,7 +18,7 @@
 extern struct genl_family smc_gen_nl_family;
 
 struct smc_nl_dmp_ctx {
-	int pos[2];
+	int pos[3];
 };
 
 static inline struct smc_nl_dmp_ctx *smc_nl_dmp_ctx(struct netlink_callback *c)
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index 72119d3d8558..b3d279d29c52 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -312,3 +312,95 @@ errnest:
 errmsg:
 	return skb->len;
 }
+
+static int smc_nl_get_fback_details(struct sk_buff *skb,
+				    struct netlink_callback *cb, int pos,
+				    bool is_srv)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	int cnt_reported = cb_ctx->pos[2];
+	struct smc_stats_fback *trgt_arr;
+	struct nlattr *attrs;
+	int rc = 0;
+	void *nlh;
+
+	if (is_srv)
+		trgt_arr = &fback_rsn.srv[0];
+	else
+		trgt_arr = &fback_rsn.clnt[0];
+	if (!trgt_arr[pos].fback_code)
+		return -ENODATA;
+	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &smc_gen_nl_family, NLM_F_MULTI,
+			  SMC_NETLINK_GET_FBACK_STATS);
+	if (!nlh)
+		goto errmsg;
+	attrs = nla_nest_start(skb, SMC_GEN_FBACK_STATS);
+	if (!attrs)
+		goto errout;
+	if (nla_put_u8(skb, SMC_NLA_FBACK_STATS_TYPE, is_srv))
+		goto errattr;
+	if (!cnt_reported) {
+		if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_SRV_CNT,
+				      fback_rsn.srv_fback_cnt,
+				      SMC_NLA_FBACK_STATS_PAD))
+			goto errattr;
+		if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_CLNT_CNT,
+				      fback_rsn.clnt_fback_cnt,
+				      SMC_NLA_FBACK_STATS_PAD))
+			goto errattr;
+		cnt_reported = 1;
+	}
+
+	if (nla_put_u32(skb, SMC_NLA_FBACK_STATS_RSN_CODE,
+			trgt_arr[pos].fback_code))
+		goto errattr;
+	if (nla_put_u16(skb, SMC_NLA_FBACK_STATS_RSN_CNT,
+			trgt_arr[pos].count))
+		goto errattr;
+
+	cb_ctx->pos[2] = cnt_reported;
+	nla_nest_end(skb, attrs);
+	genlmsg_end(skb, nlh);
+	return rc;
+
+errattr:
+	nla_nest_cancel(skb, attrs);
+errout:
+	genlmsg_cancel(skb, nlh);
+errmsg:
+	return -EMSGSIZE;
+}
+
+int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	int rc_srv = 0, rc_clnt = 0, k;
+	int skip_serv = cb_ctx->pos[1];
+	int snum = cb_ctx->pos[0];
+	bool is_srv = true;
+
+	mutex_lock(&smc_stat_fback_rsn);
+	for (k = 0; k < SMC_MAX_FBACK_RSN_CNT; k++) {
+		if (k < snum)
+			continue;
+		if (!skip_serv) {
+			rc_srv = smc_nl_get_fback_details(skb, cb, k, is_srv);
+			if (rc_srv && rc_srv != ENODATA)
+				break;
+		} else {
+			skip_serv = 0;
+		}
+		rc_clnt = smc_nl_get_fback_details(skb, cb, k, !is_srv);
+		if (rc_clnt && rc_clnt != ENODATA) {
+			skip_serv = 1;
+			break;
+		}
+		if (rc_clnt == ENODATA && rc_srv == ENODATA)
+			break;
+	}
+	mutex_unlock(&smc_stat_fback_rsn);
+	cb_ctx->pos[1] = skip_serv;
+	cb_ctx->pos[0] = k;
+	return skb->len;
+}
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index 84baaca59eaf..7c35b22d9e29 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -248,6 +248,7 @@ do { \
 while (0)
 
 int smc_nl_get_stats(struct sk_buff *skb, struct netlink_callback *cb);
+int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb);
 int smc_stats_init(void) __init;
 void smc_stats_exit(void);
 

From 194730a9beb52d2b030ea45e12d94868d4a0e6fd Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Wed, 16 Jun 2021 16:52:58 +0200
Subject: [PATCH 2002/3292] net/smc: Make SMC statistics network namespace
 aware

Make the gathered SMC statistics network namespace aware, for each
namespace collect an own set of statistic information.

Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |   4 ++
 include/net/netns/smc.h     |  16 ++++++
 net/smc/af_smc.c            |  65 +++++++++++++--------
 net/smc/smc_core.c          |  10 ++--
 net/smc/smc_rx.c            |   6 +-
 net/smc/smc_stats.c         |  47 ++++++++-------
 net/smc/smc_stats.h         | 111 ++++++++++++++++++++----------------
 net/smc/smc_tx.c            |  12 ++--
 8 files changed, 163 insertions(+), 108 deletions(-)
 create mode 100644 include/net/netns/smc.h

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fa5887143f0d..befc5b93f311 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -32,6 +32,7 @@
 #include <net/netns/mpls.h>
 #include <net/netns/can.h>
 #include <net/netns/xdp.h>
+#include <net/netns/smc.h>
 #include <net/netns/bpf.h>
 #include <linux/ns_common.h>
 #include <linux/idr.h>
@@ -170,6 +171,9 @@ struct net {
 	struct sock		*crypto_nlsk;
 #endif
 	struct sock		*diag_nlsk;
+#if IS_ENABLED(CONFIG_SMC)
+	struct netns_smc	smc;
+#endif
 } __randomize_layout;
 
 #include <linux/seq_file_net.h>
diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
new file mode 100644
index 000000000000..ea8a9cf2619b
--- /dev/null
+++ b/include/net/netns/smc.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETNS_SMC_H__
+#define __NETNS_SMC_H__
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+
+struct smc_stats_rsn;
+struct smc_stats;
+struct netns_smc {
+	/* per cpu counters for SMC */
+	struct smc_stats __percpu	*smc_stats;
+	/* protect fback_rsn */
+	struct mutex			mutex_fback_rsn;
+	struct smc_stats_rsn		*fback_rsn;
+};
+#endif
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index efeaed384769..e41fdac606d4 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -529,15 +529,17 @@ static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
 
 static void smc_stat_fallback(struct smc_sock *smc)
 {
-	mutex_lock(&smc_stat_fback_rsn);
+	struct net *net = sock_net(&smc->sk);
+
+	mutex_lock(&net->smc.mutex_fback_rsn);
 	if (smc->listen_smc) {
-		smc_stat_inc_fback_rsn_cnt(smc, fback_rsn.srv);
-		fback_rsn.srv_fback_cnt++;
+		smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
+		net->smc.fback_rsn->srv_fback_cnt++;
 	} else {
-		smc_stat_inc_fback_rsn_cnt(smc, fback_rsn.clnt);
-		fback_rsn.clnt_fback_cnt++;
+		smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
+		net->smc.fback_rsn->clnt_fback_cnt++;
 	}
-	mutex_unlock(&smc_stat_fback_rsn);
+	mutex_unlock(&net->smc.mutex_fback_rsn);
 }
 
 static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
@@ -568,10 +570,11 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
 					u8 version)
 {
+	struct net *net = sock_net(&smc->sk);
 	int rc;
 
 	if (reason_code < 0) { /* error, fallback is not possible */
-		this_cpu_inc(smc_stats->clnt_hshake_err_cnt);
+		this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
 		if (smc->sk.sk_state == SMC_INIT)
 			sock_put(&smc->sk); /* passive closing */
 		return reason_code;
@@ -579,7 +582,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
 	if (reason_code != SMC_CLC_DECL_PEERDECL) {
 		rc = smc_clc_send_decline(smc, reason_code, version);
 		if (rc < 0) {
-			this_cpu_inc(smc_stats->clnt_hshake_err_cnt);
+			this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
 			if (smc->sk.sk_state == SMC_INIT)
 				sock_put(&smc->sk); /* passive closing */
 			return rc;
@@ -1027,7 +1030,7 @@ static int __smc_connect(struct smc_sock *smc)
 	if (rc)
 		goto vlan_cleanup;
 
-	SMC_STAT_CLNT_SUCC_INC(aclc);
+	SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
 	smc_connect_ism_vlan_cleanup(smc, ini);
 	kfree(buf);
 	kfree(ini);
@@ -1343,8 +1346,9 @@ static void smc_listen_out_connected(struct smc_sock *new_smc)
 static void smc_listen_out_err(struct smc_sock *new_smc)
 {
 	struct sock *newsmcsk = &new_smc->sk;
+	struct net *net = sock_net(newsmcsk);
 
-	this_cpu_inc(smc_stats->srv_hshake_err_cnt);
+	this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
 	if (newsmcsk->sk_state == SMC_INIT)
 		sock_put(&new_smc->sk); /* passive closing */
 	newsmcsk->sk_state = SMC_CLOSED;
@@ -1813,7 +1817,7 @@ static void smc_listen_work(struct work_struct *work)
 	}
 	smc_conn_save_peer_info(new_smc, cclc);
 	smc_listen_out_connected(new_smc);
-	SMC_STAT_SERV_SUCC_INC(ini);
+	SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
 	goto out_free;
 
 out_unlock:
@@ -2242,7 +2246,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 		    sk->sk_state != SMC_LISTEN &&
 		    sk->sk_state != SMC_CLOSED) {
 			if (val) {
-				SMC_STAT_INC(!smc->conn.lnk, ndly_cnt);
+				SMC_STAT_INC(smc, ndly_cnt);
 				mod_delayed_work(smc->conn.lgr->tx_wq,
 						 &smc->conn.tx_work, 0);
 			}
@@ -2253,7 +2257,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 		    sk->sk_state != SMC_LISTEN &&
 		    sk->sk_state != SMC_CLOSED) {
 			if (!val) {
-				SMC_STAT_INC(!smc->conn.lnk, cork_cnt);
+				SMC_STAT_INC(smc, cork_cnt);
 				mod_delayed_work(smc->conn.lgr->tx_wq,
 						 &smc->conn.tx_work, 0);
 			}
@@ -2383,7 +2387,7 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
 		rc = kernel_sendpage(smc->clcsock, page, offset,
 				     size, flags);
 	} else {
-		SMC_STAT_INC(!smc->conn.lnk, sendpage_cnt);
+		SMC_STAT_INC(smc, sendpage_cnt);
 		rc = sock_no_sendpage(sock, page, offset, size, flags);
 	}
 
@@ -2434,7 +2438,7 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
 			flags = MSG_DONTWAIT;
 		else
 			flags = 0;
-		SMC_STAT_INC(!smc->conn.lnk, splice_cnt);
+		SMC_STAT_INC(smc, splice_cnt);
 		rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
 	}
 out:
@@ -2523,6 +2527,16 @@ static void __net_exit smc_net_exit(struct net *net)
 	smc_pnet_net_exit(net);
 }
 
+static __net_init int smc_net_stat_init(struct net *net)
+{
+	return smc_stats_init(net);
+}
+
+static void __net_exit smc_net_stat_exit(struct net *net)
+{
+	smc_stats_exit(net);
+}
+
 static struct pernet_operations smc_net_ops = {
 	.init = smc_net_init,
 	.exit = smc_net_exit,
@@ -2530,6 +2544,11 @@ static struct pernet_operations smc_net_ops = {
 	.size = sizeof(struct smc_net),
 };
 
+static struct pernet_operations smc_net_stat_ops = {
+	.init = smc_net_stat_init,
+	.exit = smc_net_stat_exit,
+};
+
 static int __init smc_init(void)
 {
 	int rc;
@@ -2538,6 +2557,10 @@ static int __init smc_init(void)
 	if (rc)
 		return rc;
 
+	rc = register_pernet_subsys(&smc_net_stat_ops);
+	if (rc)
+		return rc;
+
 	smc_ism_init();
 	smc_clc_init();
 
@@ -2558,16 +2581,10 @@ static int __init smc_init(void)
 	if (!smc_close_wq)
 		goto out_alloc_hs_wq;
 
-	rc = smc_stats_init();
-	if (rc) {
-		pr_err("%s: smc_stats_init fails with %d\n", __func__, rc);
-		goto out_alloc_wqs;
-	}
-
 	rc = smc_core_init();
 	if (rc) {
 		pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
-		goto out_smc_stat;
+		goto out_alloc_wqs;
 	}
 
 	rc = smc_llc_init();
@@ -2619,8 +2636,6 @@ out_proto:
 	proto_unregister(&smc_proto);
 out_core:
 	smc_core_exit();
-out_smc_stat:
-	smc_stats_exit();
 out_alloc_wqs:
 	destroy_workqueue(smc_close_wq);
 out_alloc_hs_wq:
@@ -2643,11 +2658,11 @@ static void __exit smc_exit(void)
 	smc_ib_unregister_client();
 	destroy_workqueue(smc_close_wq);
 	destroy_workqueue(smc_hs_wq);
-	smc_stats_exit();
 	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
 	smc_nl_exit();
+	unregister_pernet_subsys(&smc_net_stat_ops);
 	unregister_pernet_subsys(&smc_net_ops);
 	rcu_barrier();
 }
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d69f58f670a1..cd0d7c908b2a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2058,8 +2058,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 		/* check for reusable slot in the link group */
 		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
 		if (buf_desc) {
-			SMC_STAT_RMB_SIZE(is_smcd, is_rmb, bufsize);
-			SMC_STAT_BUF_REUSE(is_smcd, is_rmb);
+			SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+			SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
 			memset(buf_desc->cpu_addr, 0, bufsize);
 			break; /* found reusable slot */
 		}
@@ -2074,13 +2074,13 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 		if (IS_ERR(buf_desc)) {
 			if (!is_dgraded) {
 				is_dgraded = true;
-				SMC_STAT_RMB_DOWNGRADED(is_smcd, is_rmb);
+				SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
 			}
 			continue;
 		}
 
-		SMC_STAT_RMB_ALLOC(is_smcd, is_rmb);
-		SMC_STAT_RMB_SIZE(is_smcd, is_rmb, bufsize);
+		SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
+		SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
 		buf_desc->used = 1;
 		mutex_lock(lock);
 		list_add(&buf_desc->list, buf_list);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index ce1ae39923b1..170b733bc736 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -228,7 +228,7 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
 	    conn->urg_state == SMC_URG_READ)
 		return -EINVAL;
 
-	SMC_STAT_INC(!conn->lnk, urg_data_cnt);
+	SMC_STAT_INC(smc, urg_data_cnt);
 	if (conn->urg_state == SMC_URG_VALID) {
 		if (!(flags & MSG_PEEK))
 			smc->conn.urg_state = SMC_URG_READ;
@@ -307,10 +307,10 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
 
 	readable = atomic_read(&conn->bytes_to_rcv);
 	if (readable >= conn->rmb_desc->len)
-		SMC_STAT_RMB_RX_FULL(!conn->lnk);
+		SMC_STAT_RMB_RX_FULL(smc, !conn->lnk);
 
 	if (len < readable)
-		SMC_STAT_RMB_RX_SIZE_SMALL(!conn->lnk);
+		SMC_STAT_RMB_RX_SIZE_SMALL(smc, !conn->lnk);
 	/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
 	rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
 
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index b3d279d29c52..614013e3b574 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -18,24 +18,28 @@
 #include "smc_netlink.h"
 #include "smc_stats.h"
 
-/* serialize fallback reason statistic gathering */
-DEFINE_MUTEX(smc_stat_fback_rsn);
-struct smc_stats __percpu *smc_stats;	/* per cpu counters for SMC */
-struct smc_stats_reason fback_rsn;
-
-int __init smc_stats_init(void)
+int smc_stats_init(struct net *net)
 {
-	memset(&fback_rsn, 0, sizeof(fback_rsn));
-	smc_stats = alloc_percpu(struct smc_stats);
-	if (!smc_stats)
-		return -ENOMEM;
-
+	net->smc.fback_rsn = kzalloc(sizeof(*net->smc.fback_rsn), GFP_KERNEL);
+	if (!net->smc.fback_rsn)
+		goto err_fback;
+	net->smc.smc_stats = alloc_percpu(struct smc_stats);
+	if (!net->smc.smc_stats)
+		goto err_stats;
+	mutex_init(&net->smc.mutex_fback_rsn);
 	return 0;
+
+err_stats:
+	kfree(net->smc.fback_rsn);
+err_fback:
+	return -ENOMEM;
 }
 
-void smc_stats_exit(void)
+void smc_stats_exit(struct net *net)
 {
-	free_percpu(smc_stats);
+	kfree(net->smc.fback_rsn);
+	if (net->smc.smc_stats)
+		free_percpu(net->smc.smc_stats);
 }
 
 static int smc_nl_fill_stats_rmb_data(struct sk_buff *skb,
@@ -256,6 +260,7 @@ int smc_nl_get_stats(struct sk_buff *skb,
 		     struct netlink_callback *cb)
 {
 	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct net *net = sock_net(skb->sk);
 	struct smc_stats *stats;
 	struct nlattr *attrs;
 	int cpu, i, size;
@@ -279,7 +284,7 @@ int smc_nl_get_stats(struct sk_buff *skb,
 		goto erralloc;
 	size = sizeof(*stats) / sizeof(u64);
 	for_each_possible_cpu(cpu) {
-		src = (u64 *)per_cpu_ptr(smc_stats, cpu);
+		src = (u64 *)per_cpu_ptr(net->smc.smc_stats, cpu);
 		sum = (u64 *)stats;
 		for (i = 0; i < size; i++)
 			*(sum++) += *(src++);
@@ -318,6 +323,7 @@ static int smc_nl_get_fback_details(struct sk_buff *skb,
 				    bool is_srv)
 {
 	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct net *net = sock_net(skb->sk);
 	int cnt_reported = cb_ctx->pos[2];
 	struct smc_stats_fback *trgt_arr;
 	struct nlattr *attrs;
@@ -325,9 +331,9 @@ static int smc_nl_get_fback_details(struct sk_buff *skb,
 	void *nlh;
 
 	if (is_srv)
-		trgt_arr = &fback_rsn.srv[0];
+		trgt_arr = &net->smc.fback_rsn->srv[0];
 	else
-		trgt_arr = &fback_rsn.clnt[0];
+		trgt_arr = &net->smc.fback_rsn->clnt[0];
 	if (!trgt_arr[pos].fback_code)
 		return -ENODATA;
 	nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
@@ -342,11 +348,11 @@ static int smc_nl_get_fback_details(struct sk_buff *skb,
 		goto errattr;
 	if (!cnt_reported) {
 		if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_SRV_CNT,
-				      fback_rsn.srv_fback_cnt,
+				      net->smc.fback_rsn->srv_fback_cnt,
 				      SMC_NLA_FBACK_STATS_PAD))
 			goto errattr;
 		if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_CLNT_CNT,
-				      fback_rsn.clnt_fback_cnt,
+				      net->smc.fback_rsn->clnt_fback_cnt,
 				      SMC_NLA_FBACK_STATS_PAD))
 			goto errattr;
 		cnt_reported = 1;
@@ -375,12 +381,13 @@ errmsg:
 int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+	struct net *net = sock_net(skb->sk);
 	int rc_srv = 0, rc_clnt = 0, k;
 	int skip_serv = cb_ctx->pos[1];
 	int snum = cb_ctx->pos[0];
 	bool is_srv = true;
 
-	mutex_lock(&smc_stat_fback_rsn);
+	mutex_lock(&net->smc.mutex_fback_rsn);
 	for (k = 0; k < SMC_MAX_FBACK_RSN_CNT; k++) {
 		if (k < snum)
 			continue;
@@ -399,7 +406,7 @@ int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb)
 		if (rc_clnt == ENODATA && rc_srv == ENODATA)
 			break;
 	}
-	mutex_unlock(&smc_stat_fback_rsn);
+	mutex_unlock(&net->smc.mutex_fback_rsn);
 	cb_ctx->pos[1] = skip_serv;
 	cb_ctx->pos[0] = k;
 	return skb->len;
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index 7c35b22d9e29..84b7ecd8c05c 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -21,10 +21,6 @@
 
 #define SMC_MAX_FBACK_RSN_CNT 30
 
-extern struct smc_stats __percpu *smc_stats;	/* per cpu counters for SMC */
-extern struct smc_stats_reason fback_rsn;
-extern struct mutex smc_stat_fback_rsn;
-
 enum {
 	SMC_BUF_8K,
 	SMC_BUF_16K,
@@ -43,7 +39,7 @@ struct smc_stats_fback {
 	u16	count;
 };
 
-struct smc_stats_reason {
+struct smc_stats_rsn {
 	struct	smc_stats_fback srv[SMC_MAX_FBACK_RSN_CNT];
 	struct	smc_stats_fback clnt[SMC_MAX_FBACK_RSN_CNT];
 	u64			srv_fback_cnt;
@@ -92,122 +88,135 @@ struct smc_stats {
 	u64			srv_hshake_err_cnt;
 };
 
-#define SMC_STAT_PAYLOAD_SUB(_tech, key, _len, _rc) \
+#define SMC_STAT_PAYLOAD_SUB(_smc_stats, _tech, key, _len, _rc) \
 do { \
+	typeof(_smc_stats) stats = (_smc_stats); \
 	typeof(_tech) t = (_tech); \
 	typeof(_len) l = (_len); \
 	int _pos = fls64((l) >> 13); \
 	typeof(_rc) r = (_rc); \
 	int m = SMC_BUF_MAX - 1; \
-	this_cpu_inc((*smc_stats).smc[t].key ## _cnt); \
+	this_cpu_inc((*stats).smc[t].key ## _cnt); \
 	if (r <= 0) \
 		break; \
 	_pos = (_pos < m) ? ((l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
-	this_cpu_inc((*smc_stats).smc[t].key ## _pd.buf[_pos]); \
-	this_cpu_add((*smc_stats).smc[t].key ## _bytes, r); \
+	this_cpu_inc((*stats).smc[t].key ## _pd.buf[_pos]); \
+	this_cpu_add((*stats).smc[t].key ## _bytes, r); \
 } \
 while (0)
 
 #define SMC_STAT_TX_PAYLOAD(_smc, length, rcode) \
 do { \
 	typeof(_smc) __smc = _smc; \
+	struct net *_net = sock_net(&__smc->sk); \
+	struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
 	typeof(length) _len = (length); \
 	typeof(rcode) _rc = (rcode); \
 	bool is_smcd = !__smc->conn.lnk; \
 	if (is_smcd) \
-		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_D, tx, _len, _rc); \
+		SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_D, tx, _len, _rc); \
 	else \
-		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_R, tx, _len, _rc); \
+		SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_R, tx, _len, _rc); \
 } \
 while (0)
 
 #define SMC_STAT_RX_PAYLOAD(_smc, length, rcode) \
 do { \
 	typeof(_smc) __smc = _smc; \
+	struct net *_net = sock_net(&__smc->sk); \
+	struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
 	typeof(length) _len = (length); \
 	typeof(rcode) _rc = (rcode); \
 	bool is_smcd = !__smc->conn.lnk; \
 	if (is_smcd) \
-		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_D, rx, _len, _rc); \
+		SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_D, rx, _len, _rc); \
 	else \
-		SMC_STAT_PAYLOAD_SUB(SMC_TYPE_R, rx, _len, _rc); \
+		SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_R, rx, _len, _rc); \
 } \
 while (0)
 
-#define SMC_STAT_RMB_SIZE_SUB(_tech, k, _len) \
+#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _len) \
 do { \
 	typeof(_len) _l = (_len); \
 	typeof(_tech) t = (_tech); \
 	int _pos = fls((_l) >> 13); \
 	int m = SMC_BUF_MAX - 1; \
 	_pos = (_pos < m) ? ((_l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
-	this_cpu_inc((*smc_stats).smc[t].k ## _rmbsize.buf[_pos]); \
+	this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \
 } \
 while (0)
 
-#define SMC_STAT_RMB_SUB(type, t, key) \
-	this_cpu_inc((*smc_stats).smc[t].rmb ## _ ## key.type ## _cnt)
+#define SMC_STAT_RMB_SUB(_smc_stats, type, t, key) \
+	this_cpu_inc((*(_smc_stats)).smc[t].rmb ## _ ## key.type ## _cnt)
 
-#define SMC_STAT_RMB_SIZE(_is_smcd, _is_rx, _len) \
+#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _len) \
 do { \
+	struct net *_net = sock_net(&(_smc)->sk); \
+	struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
 	typeof(_is_smcd) is_d = (_is_smcd); \
 	typeof(_is_rx) is_r = (_is_rx); \
 	typeof(_len) l = (_len); \
 	if ((is_d) && (is_r)) \
-		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_D, rx, l); \
+		SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, l); \
 	if ((is_d) && !(is_r)) \
-		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_D, tx, l); \
+		SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, l); \
 	if (!(is_d) && (is_r)) \
-		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_R, rx, l); \
+		SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, l); \
 	if (!(is_d) && !(is_r)) \
-		SMC_STAT_RMB_SIZE_SUB(SMC_TYPE_R, tx, l); \
+		SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, l); \
 } \
 while (0)
 
-#define SMC_STAT_RMB(type, _is_smcd, _is_rx) \
+#define SMC_STAT_RMB(_smc, type, _is_smcd, _is_rx) \
 do { \
+	struct net *net = sock_net(&(_smc)->sk); \
+	struct smc_stats __percpu *_smc_stats = net->smc.smc_stats; \
 	typeof(_is_smcd) is_d = (_is_smcd); \
 	typeof(_is_rx) is_r = (_is_rx); \
 	if ((is_d) && (is_r)) \
-		SMC_STAT_RMB_SUB(type, SMC_TYPE_D, rx); \
+		SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_D, rx); \
 	if ((is_d) && !(is_r)) \
-		SMC_STAT_RMB_SUB(type, SMC_TYPE_D, tx); \
+		SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_D, tx); \
 	if (!(is_d) && (is_r)) \
-		SMC_STAT_RMB_SUB(type, SMC_TYPE_R, rx); \
+		SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_R, rx); \
 	if (!(is_d) && !(is_r)) \
-		SMC_STAT_RMB_SUB(type, SMC_TYPE_R, tx); \
+		SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_R, tx); \
 } \
 while (0)
 
-#define SMC_STAT_BUF_REUSE(is_smcd, is_rx) \
-	SMC_STAT_RMB(reuse, is_smcd, is_rx)
+#define SMC_STAT_BUF_REUSE(smc, is_smcd, is_rx) \
+	SMC_STAT_RMB(smc, reuse, is_smcd, is_rx)
 
-#define SMC_STAT_RMB_ALLOC(is_smcd, is_rx) \
-	SMC_STAT_RMB(alloc, is_smcd, is_rx)
+#define SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rx) \
+	SMC_STAT_RMB(smc, alloc, is_smcd, is_rx)
 
-#define SMC_STAT_RMB_DOWNGRADED(is_smcd, is_rx) \
-	SMC_STAT_RMB(dgrade, is_smcd, is_rx)
+#define SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rx) \
+	SMC_STAT_RMB(smc, dgrade, is_smcd, is_rx)
 
-#define SMC_STAT_RMB_TX_PEER_FULL(is_smcd) \
-	SMC_STAT_RMB(buf_full_peer, is_smcd, false)
+#define SMC_STAT_RMB_TX_PEER_FULL(smc, is_smcd) \
+	SMC_STAT_RMB(smc, buf_full_peer, is_smcd, false)
 
-#define SMC_STAT_RMB_TX_FULL(is_smcd) \
-	SMC_STAT_RMB(buf_full, is_smcd, false)
+#define SMC_STAT_RMB_TX_FULL(smc, is_smcd) \
+	SMC_STAT_RMB(smc, buf_full, is_smcd, false)
 
-#define SMC_STAT_RMB_TX_PEER_SIZE_SMALL(is_smcd) \
-	SMC_STAT_RMB(buf_size_small_peer, is_smcd, false)
+#define SMC_STAT_RMB_TX_PEER_SIZE_SMALL(smc, is_smcd) \
+	SMC_STAT_RMB(smc, buf_size_small_peer, is_smcd, false)
 
-#define SMC_STAT_RMB_TX_SIZE_SMALL(is_smcd) \
-	SMC_STAT_RMB(buf_size_small, is_smcd, false)
+#define SMC_STAT_RMB_TX_SIZE_SMALL(smc, is_smcd) \
+	SMC_STAT_RMB(smc, buf_size_small, is_smcd, false)
 
-#define SMC_STAT_RMB_RX_SIZE_SMALL(is_smcd) \
-	SMC_STAT_RMB(buf_size_small, is_smcd, true)
+#define SMC_STAT_RMB_RX_SIZE_SMALL(smc, is_smcd) \
+	SMC_STAT_RMB(smc, buf_size_small, is_smcd, true)
 
-#define SMC_STAT_RMB_RX_FULL(is_smcd) \
-	SMC_STAT_RMB(buf_full, is_smcd, true)
+#define SMC_STAT_RMB_RX_FULL(smc, is_smcd) \
+	SMC_STAT_RMB(smc, buf_full, is_smcd, true)
 
-#define SMC_STAT_INC(is_smcd, type) \
+#define SMC_STAT_INC(_smc, type) \
 do { \
+	typeof(_smc) __smc = _smc; \
+	bool is_smcd = !(__smc)->conn.lnk; \
+	struct net *net = sock_net(&(__smc)->sk); \
+	struct smc_stats __percpu *smc_stats = net->smc.smc_stats; \
 	if ((is_smcd)) \
 		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].type); \
 	else \
@@ -215,11 +224,12 @@ do { \
 } \
 while (0)
 
-#define SMC_STAT_CLNT_SUCC_INC(_aclc) \
+#define SMC_STAT_CLNT_SUCC_INC(net, _aclc) \
 do { \
 	typeof(_aclc) acl = (_aclc); \
 	bool is_v2 = (acl->hdr.version == SMC_V2); \
 	bool is_smcd = (acl->hdr.typev1 == SMC_TYPE_D); \
+	struct smc_stats __percpu *smc_stats = (net)->smc.smc_stats; \
 	if (is_v2 && is_smcd) \
 		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].clnt_v2_succ_cnt); \
 	else if (is_v2 && !is_smcd) \
@@ -231,11 +241,12 @@ do { \
 } \
 while (0)
 
-#define SMC_STAT_SERV_SUCC_INC(_ini) \
+#define SMC_STAT_SERV_SUCC_INC(net, _ini) \
 do { \
 	typeof(_ini) i = (_ini); \
 	bool is_v2 = (i->smcd_version & SMC_V2); \
 	bool is_smcd = (i->is_smcd); \
+	typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \
 	if (is_v2 && is_smcd) \
 		this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
 	else if (is_v2 && !is_smcd) \
@@ -249,7 +260,7 @@ while (0)
 
 int smc_nl_get_stats(struct sk_buff *skb, struct netlink_callback *cb);
 int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb);
-int smc_stats_init(void) __init;
-void smc_stats_exit(void);
+int smc_stats_init(struct net *net);
+void smc_stats_exit(struct net *net);
 
 #endif /* NET_SMC_SMC_STATS_H_ */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index a043544d715f..075c4f4b41cf 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -47,7 +47,7 @@ static void smc_tx_write_space(struct sock *sk)
 	/* similar to sk_stream_write_space */
 	if (atomic_read(&smc->conn.sndbuf_space) && sock) {
 		if (test_bit(SOCK_NOSPACE, &sock->flags))
-			SMC_STAT_RMB_TX_FULL(!smc->conn.lnk);
+			SMC_STAT_RMB_TX_FULL(smc, !smc->conn.lnk);
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 		rcu_read_lock();
 		wq = rcu_dereference(sk->sk_wq);
@@ -155,13 +155,13 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 	}
 
 	if (len > conn->sndbuf_desc->len)
-		SMC_STAT_RMB_TX_SIZE_SMALL(!conn->lnk);
+		SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk);
 
 	if (len > conn->peer_rmbe_size)
-		SMC_STAT_RMB_TX_PEER_SIZE_SMALL(!conn->lnk);
+		SMC_STAT_RMB_TX_PEER_SIZE_SMALL(smc, !conn->lnk);
 
 	if (msg->msg_flags & MSG_OOB)
-		SMC_STAT_INC(!conn->lnk, urg_data_cnt);
+		SMC_STAT_INC(smc, urg_data_cnt);
 
 	while (msg_data_left(msg)) {
 		if (sk->sk_state == SMC_INIT)
@@ -432,7 +432,9 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
 	/* cf. snd_wnd */
 	rmbespace = atomic_read(&conn->peer_rmbe_space);
 	if (rmbespace <= 0) {
-		SMC_STAT_RMB_TX_PEER_FULL(!conn->lnk);
+		struct smc_sock *smc = container_of(conn, struct smc_sock,
+						    conn);
+		SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk);
 		return 0;
 	}
 	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);

From 8b474a9f6b3793480392bbdfcc546bb8e21b3735 Mon Sep 17 00:00:00 2001
From: Serhiy Boiko <serhiy.boiko@plvision.eu>
Date: Wed, 16 Jun 2021 19:01:44 +0300
Subject: [PATCH 2003/3292] net: marvell: Implement TC flower offload

Add ACL infrastructure for Prestera Switch ASICs family devices to
offload cls_flower rules to be processed in the HW.

ACL implementation is based on tc filter api. The flower classifier
is supported to configure ACL rules/matches/action.

Supported actions:

    - drop
    - trap
    - pass

Supported dissector keys:

    - indev
    - src_mac
    - dst_mac
    - src_ip
    - dst_ip
    - ip_proto
    - src_port
    - dst_port
    - vlan_id
    - vlan_ethtype
    - icmp type/code

Co-developed-by: Volodymyr Mytnyk <vmytnyk@marvell.com>
Signed-off-by: Volodymyr Mytnyk <vmytnyk@marvell.com>
Signed-off-by: Serhiy Boiko <serhiy.boiko@plvision.eu>
Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/prestera/Makefile    |   3 +-
 .../net/ethernet/marvell/prestera/prestera.h  |   5 +
 .../ethernet/marvell/prestera/prestera_acl.c  | 374 ++++++++++++++++++
 .../ethernet/marvell/prestera/prestera_acl.h  | 123 ++++++
 .../ethernet/marvell/prestera/prestera_flow.c | 175 ++++++++
 .../ethernet/marvell/prestera/prestera_flow.h |  14 +
 .../marvell/prestera/prestera_flower.c        | 359 +++++++++++++++++
 .../marvell/prestera/prestera_flower.h        |  18 +
 .../ethernet/marvell/prestera/prestera_hw.c   | 292 ++++++++++++++
 .../ethernet/marvell/prestera/prestera_hw.h   |  17 +
 .../ethernet/marvell/prestera/prestera_main.c |  26 +-
 11 files changed, 1404 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_acl.c
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_acl.h
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_flow.c
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_flow.h
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_flower.c
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_flower.h

diff --git a/drivers/net/ethernet/marvell/prestera/Makefile b/drivers/net/ethernet/marvell/prestera/Makefile
index 93129e32ebc5..42327c4afdbf 100644
--- a/drivers/net/ethernet/marvell/prestera/Makefile
+++ b/drivers/net/ethernet/marvell/prestera/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_PRESTERA)	+= prestera.o
 prestera-objs		:= prestera_main.o prestera_hw.o prestera_dsa.o \
 			   prestera_rxtx.o prestera_devlink.o prestera_ethtool.o \
-			   prestera_switchdev.o
+			   prestera_switchdev.o prestera_acl.o prestera_flow.o \
+			   prestera_flower.o
 
 obj-$(CONFIG_PRESTERA_PCI)	+= prestera_pci.o
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index 6353f1c67638..bbbe780d0886 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -67,9 +67,12 @@ struct prestera_lag {
 	u16 lag_id;
 };
 
+struct prestera_flow_block;
+
 struct prestera_port {
 	struct net_device *dev;
 	struct prestera_switch *sw;
+	struct prestera_flow_block *flow_block;
 	struct devlink_port dl_port;
 	struct list_head lag_member;
 	struct prestera_lag *lag;
@@ -171,11 +174,13 @@ struct prestera_event {
 struct prestera_switchdev;
 struct prestera_rxtx;
 struct prestera_trap_data;
+struct prestera_acl;
 
 struct prestera_switch {
 	struct prestera_device *dev;
 	struct prestera_switchdev *swdev;
 	struct prestera_rxtx *rxtx;
+	struct prestera_acl *acl;
 	struct list_head event_handlers;
 	struct notifier_block netdev_nb;
 	struct prestera_trap_data *trap_data;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.c b/drivers/net/ethernet/marvell/prestera/prestera_acl.c
new file mode 100644
index 000000000000..64b66ba1c43f
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/rhashtable.h>
+
+#include "prestera.h"
+#include "prestera_hw.h"
+#include "prestera_acl.h"
+
+struct prestera_acl {
+	struct prestera_switch *sw;
+	struct list_head rules;
+};
+
+struct prestera_acl_ruleset {
+	struct rhashtable rule_ht;
+	struct prestera_switch *sw;
+	u16 id;
+};
+
+struct prestera_acl_rule {
+	struct rhash_head ht_node;
+	struct list_head list;
+	struct list_head match_list;
+	struct list_head action_list;
+	struct prestera_flow_block *block;
+	unsigned long cookie;
+	u32 priority;
+	u8 n_actions;
+	u8 n_matches;
+	u32 id;
+};
+
+static const struct rhashtable_params prestera_acl_rule_ht_params = {
+	.key_len = sizeof(unsigned long),
+	.key_offset = offsetof(struct prestera_acl_rule, cookie),
+	.head_offset = offsetof(struct prestera_acl_rule, ht_node),
+	.automatic_shrinking = true,
+};
+
+static struct prestera_acl_ruleset *
+prestera_acl_ruleset_create(struct prestera_switch *sw)
+{
+	struct prestera_acl_ruleset *ruleset;
+	int err;
+
+	ruleset = kzalloc(sizeof(*ruleset), GFP_KERNEL);
+	if (!ruleset)
+		return ERR_PTR(-ENOMEM);
+
+	err = rhashtable_init(&ruleset->rule_ht, &prestera_acl_rule_ht_params);
+	if (err)
+		goto err_rhashtable_init;
+
+	err = prestera_hw_acl_ruleset_create(sw, &ruleset->id);
+	if (err)
+		goto err_ruleset_create;
+
+	ruleset->sw = sw;
+
+	return ruleset;
+
+err_ruleset_create:
+	rhashtable_destroy(&ruleset->rule_ht);
+err_rhashtable_init:
+	kfree(ruleset);
+	return ERR_PTR(err);
+}
+
+static void prestera_acl_ruleset_destroy(struct prestera_acl_ruleset *ruleset)
+{
+	prestera_hw_acl_ruleset_del(ruleset->sw, ruleset->id);
+	rhashtable_destroy(&ruleset->rule_ht);
+	kfree(ruleset);
+}
+
+struct prestera_flow_block *
+prestera_acl_block_create(struct prestera_switch *sw, struct net *net)
+{
+	struct prestera_flow_block *block;
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (!block)
+		return NULL;
+	INIT_LIST_HEAD(&block->binding_list);
+	block->net = net;
+	block->sw = sw;
+
+	block->ruleset = prestera_acl_ruleset_create(sw);
+	if (IS_ERR(block->ruleset)) {
+		kfree(block);
+		return NULL;
+	}
+
+	return block;
+}
+
+void prestera_acl_block_destroy(struct prestera_flow_block *block)
+{
+	prestera_acl_ruleset_destroy(block->ruleset);
+	WARN_ON(!list_empty(&block->binding_list));
+	kfree(block);
+}
+
+static struct prestera_flow_block_binding *
+prestera_acl_block_lookup(struct prestera_flow_block *block,
+			  struct prestera_port *port)
+{
+	struct prestera_flow_block_binding *binding;
+
+	list_for_each_entry(binding, &block->binding_list, list)
+		if (binding->port == port)
+			return binding;
+
+	return NULL;
+}
+
+int prestera_acl_block_bind(struct prestera_flow_block *block,
+			    struct prestera_port *port)
+{
+	struct prestera_flow_block_binding *binding;
+	int err;
+
+	if (WARN_ON(prestera_acl_block_lookup(block, port)))
+		return -EEXIST;
+
+	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
+	if (!binding)
+		return -ENOMEM;
+	binding->port = port;
+
+	err = prestera_hw_acl_port_bind(port, block->ruleset->id);
+	if (err)
+		goto err_rules_bind;
+
+	list_add(&binding->list, &block->binding_list);
+	return 0;
+
+err_rules_bind:
+	kfree(binding);
+	return err;
+}
+
+int prestera_acl_block_unbind(struct prestera_flow_block *block,
+			      struct prestera_port *port)
+{
+	struct prestera_flow_block_binding *binding;
+
+	binding = prestera_acl_block_lookup(block, port);
+	if (!binding)
+		return -ENOENT;
+
+	list_del(&binding->list);
+
+	prestera_hw_acl_port_unbind(port, block->ruleset->id);
+
+	kfree(binding);
+	return 0;
+}
+
+struct prestera_acl_ruleset *
+prestera_acl_block_ruleset_get(struct prestera_flow_block *block)
+{
+	return block->ruleset;
+}
+
+u16 prestera_acl_rule_ruleset_id_get(const struct prestera_acl_rule *rule)
+{
+	return rule->block->ruleset->id;
+}
+
+struct net *prestera_acl_block_net(struct prestera_flow_block *block)
+{
+	return block->net;
+}
+
+struct prestera_switch *prestera_acl_block_sw(struct prestera_flow_block *block)
+{
+	return block->sw;
+}
+
+struct prestera_acl_rule *
+prestera_acl_rule_lookup(struct prestera_acl_ruleset *ruleset,
+			 unsigned long cookie)
+{
+	return rhashtable_lookup_fast(&ruleset->rule_ht, &cookie,
+				      prestera_acl_rule_ht_params);
+}
+
+struct prestera_acl_rule *
+prestera_acl_rule_create(struct prestera_flow_block *block,
+			 unsigned long cookie)
+{
+	struct prestera_acl_rule *rule;
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&rule->match_list);
+	INIT_LIST_HEAD(&rule->action_list);
+	rule->cookie = cookie;
+	rule->block = block;
+
+	return rule;
+}
+
+struct list_head *
+prestera_acl_rule_match_list_get(struct prestera_acl_rule *rule)
+{
+	return &rule->match_list;
+}
+
+struct list_head *
+prestera_acl_rule_action_list_get(struct prestera_acl_rule *rule)
+{
+	return &rule->action_list;
+}
+
+int prestera_acl_rule_action_add(struct prestera_acl_rule *rule,
+				 struct prestera_acl_rule_action_entry *entry)
+{
+	struct prestera_acl_rule_action_entry *a_entry;
+
+	a_entry = kmalloc(sizeof(*a_entry), GFP_KERNEL);
+	if (!a_entry)
+		return -ENOMEM;
+
+	memcpy(a_entry, entry, sizeof(*entry));
+	list_add(&a_entry->list, &rule->action_list);
+
+	rule->n_actions++;
+	return 0;
+}
+
+u8 prestera_acl_rule_action_len(struct prestera_acl_rule *rule)
+{
+	return rule->n_actions;
+}
+
+u32 prestera_acl_rule_priority_get(struct prestera_acl_rule *rule)
+{
+	return rule->priority;
+}
+
+void prestera_acl_rule_priority_set(struct prestera_acl_rule *rule,
+				    u32 priority)
+{
+	rule->priority = priority;
+}
+
+int prestera_acl_rule_match_add(struct prestera_acl_rule *rule,
+				struct prestera_acl_rule_match_entry *entry)
+{
+	struct prestera_acl_rule_match_entry *m_entry;
+
+	m_entry = kmalloc(sizeof(*m_entry), GFP_KERNEL);
+	if (!m_entry)
+		return -ENOMEM;
+
+	memcpy(m_entry, entry, sizeof(*entry));
+	list_add(&m_entry->list, &rule->match_list);
+
+	rule->n_matches++;
+	return 0;
+}
+
+u8 prestera_acl_rule_match_len(struct prestera_acl_rule *rule)
+{
+	return rule->n_matches;
+}
+
+void prestera_acl_rule_destroy(struct prestera_acl_rule *rule)
+{
+	struct prestera_acl_rule_action_entry *a_entry;
+	struct prestera_acl_rule_match_entry *m_entry;
+	struct list_head *pos, *n;
+
+	list_for_each_safe(pos, n, &rule->match_list) {
+		m_entry = list_entry(pos, typeof(*m_entry), list);
+		list_del(pos);
+		kfree(m_entry);
+	}
+
+	list_for_each_safe(pos, n, &rule->action_list) {
+		a_entry = list_entry(pos, typeof(*a_entry), list);
+		list_del(pos);
+		kfree(a_entry);
+	}
+
+	kfree(rule);
+}
+
+int prestera_acl_rule_add(struct prestera_switch *sw,
+			  struct prestera_acl_rule *rule)
+{
+	u32 rule_id;
+	int err;
+
+	/* try to add rule to hash table first */
+	err = rhashtable_insert_fast(&rule->block->ruleset->rule_ht,
+				     &rule->ht_node,
+				     prestera_acl_rule_ht_params);
+	if (err)
+		return err;
+
+	/* add rule to hw */
+	err = prestera_hw_acl_rule_add(sw, rule, &rule_id);
+	if (err)
+		goto err_rule_add;
+
+	rule->id = rule_id;
+
+	list_add_tail(&rule->list, &sw->acl->rules);
+
+	return 0;
+
+err_rule_add:
+	rhashtable_remove_fast(&rule->block->ruleset->rule_ht, &rule->ht_node,
+			       prestera_acl_rule_ht_params);
+	return err;
+}
+
+void prestera_acl_rule_del(struct prestera_switch *sw,
+			   struct prestera_acl_rule *rule)
+{
+	rhashtable_remove_fast(&rule->block->ruleset->rule_ht, &rule->ht_node,
+			       prestera_acl_rule_ht_params);
+	list_del(&rule->list);
+	prestera_hw_acl_rule_del(sw, rule->id);
+}
+
+int prestera_acl_rule_get_stats(struct prestera_switch *sw,
+				struct prestera_acl_rule *rule,
+				u64 *packets, u64 *bytes, u64 *last_use)
+{
+	u64 current_packets;
+	u64 current_bytes;
+	int err;
+
+	err = prestera_hw_acl_rule_stats_get(sw, rule->id, &current_packets,
+					     &current_bytes);
+	if (err)
+		return err;
+
+	*packets = current_packets;
+	*bytes = current_bytes;
+	*last_use = jiffies;
+
+	return 0;
+}
+
+int prestera_acl_init(struct prestera_switch *sw)
+{
+	struct prestera_acl *acl;
+
+	acl = kzalloc(sizeof(*acl), GFP_KERNEL);
+	if (!acl)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&acl->rules);
+	sw->acl = acl;
+	acl->sw = sw;
+
+	return 0;
+}
+
+void prestera_acl_fini(struct prestera_switch *sw)
+{
+	struct prestera_acl *acl = sw->acl;
+
+	WARN_ON(!list_empty(&acl->rules));
+	kfree(acl);
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.h b/drivers/net/ethernet/marvell/prestera/prestera_acl.h
new file mode 100644
index 000000000000..1b3f516778e5
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_ACL_H_
+#define _PRESTERA_ACL_H_
+
+enum prestera_acl_rule_match_entry_type {
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_TYPE = 1,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_DMAC,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_SMAC,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_PROTO,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_PORT,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_SRC,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_DST,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_SRC,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_DST,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_RANGE_SRC,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_RANGE_DST,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_ID,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_TPID,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_TYPE,
+	PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_CODE
+};
+
+enum prestera_acl_rule_action {
+	PRESTERA_ACL_RULE_ACTION_ACCEPT,
+	PRESTERA_ACL_RULE_ACTION_DROP,
+	PRESTERA_ACL_RULE_ACTION_TRAP
+};
+
+struct prestera_switch;
+struct prestera_port;
+struct prestera_acl_rule;
+struct prestera_acl_ruleset;
+
+struct prestera_flow_block_binding {
+	struct list_head list;
+	struct prestera_port *port;
+};
+
+struct prestera_flow_block {
+	struct list_head binding_list;
+	struct prestera_switch *sw;
+	struct net *net;
+	struct prestera_acl_ruleset *ruleset;
+	struct flow_block_cb *block_cb;
+};
+
+struct prestera_acl_rule_action_entry {
+	struct list_head list;
+	enum prestera_acl_rule_action id;
+};
+
+struct prestera_acl_rule_match_entry {
+	struct list_head list;
+	enum prestera_acl_rule_match_entry_type type;
+	union {
+		struct {
+			u8 key;
+			u8 mask;
+		} u8;
+		struct {
+			u16 key;
+			u16 mask;
+		} u16;
+		struct {
+			u32 key;
+			u32 mask;
+		} u32;
+		struct {
+			u64 key;
+			u64 mask;
+		} u64;
+		struct {
+			u8 key[ETH_ALEN];
+			u8 mask[ETH_ALEN];
+		} mac;
+	} keymask;
+};
+
+int prestera_acl_init(struct prestera_switch *sw);
+void prestera_acl_fini(struct prestera_switch *sw);
+struct prestera_flow_block *
+prestera_acl_block_create(struct prestera_switch *sw, struct net *net);
+void prestera_acl_block_destroy(struct prestera_flow_block *block);
+struct net *prestera_acl_block_net(struct prestera_flow_block *block);
+struct prestera_switch *prestera_acl_block_sw(struct prestera_flow_block *block);
+int prestera_acl_block_bind(struct prestera_flow_block *block,
+			    struct prestera_port *port);
+int prestera_acl_block_unbind(struct prestera_flow_block *block,
+			      struct prestera_port *port);
+struct prestera_acl_ruleset *
+prestera_acl_block_ruleset_get(struct prestera_flow_block *block);
+struct prestera_acl_rule *
+prestera_acl_rule_create(struct prestera_flow_block *block,
+			 unsigned long cookie);
+u32 prestera_acl_rule_priority_get(struct prestera_acl_rule *rule);
+void prestera_acl_rule_priority_set(struct prestera_acl_rule *rule,
+				    u32 priority);
+u16 prestera_acl_rule_ruleset_id_get(const struct prestera_acl_rule *rule);
+struct list_head *
+prestera_acl_rule_action_list_get(struct prestera_acl_rule *rule);
+u8 prestera_acl_rule_action_len(struct prestera_acl_rule *rule);
+u8 prestera_acl_rule_match_len(struct prestera_acl_rule *rule);
+int prestera_acl_rule_action_add(struct prestera_acl_rule *rule,
+				 struct prestera_acl_rule_action_entry *entry);
+struct list_head *
+prestera_acl_rule_match_list_get(struct prestera_acl_rule *rule);
+int prestera_acl_rule_match_add(struct prestera_acl_rule *rule,
+				struct prestera_acl_rule_match_entry *entry);
+void prestera_acl_rule_destroy(struct prestera_acl_rule *rule);
+struct prestera_acl_rule *
+prestera_acl_rule_lookup(struct prestera_acl_ruleset *ruleset,
+			 unsigned long cookie);
+int prestera_acl_rule_add(struct prestera_switch *sw,
+			  struct prestera_acl_rule *rule);
+void prestera_acl_rule_del(struct prestera_switch *sw,
+			   struct prestera_acl_rule *rule);
+int prestera_acl_rule_get_stats(struct prestera_switch *sw,
+				struct prestera_acl_rule *rule,
+				u64 *packets, u64 *bytes, u64 *last_use);
+
+#endif /* _PRESTERA_ACL_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flow.c b/drivers/net/ethernet/marvell/prestera/prestera_flow.c
new file mode 100644
index 000000000000..12a36723e2a5
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flow.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+#include "prestera.h"
+#include "prestera_acl.h"
+#include "prestera_flow.h"
+#include "prestera_flower.h"
+
+static LIST_HEAD(prestera_block_cb_list);
+
+static int prestera_flow_block_flower_cb(struct prestera_flow_block *block,
+					 struct flow_cls_offload *f)
+{
+	if (f->common.chain_index != 0)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case FLOW_CLS_REPLACE:
+		return prestera_flower_replace(block, f);
+	case FLOW_CLS_DESTROY:
+		prestera_flower_destroy(block, f);
+		return 0;
+	case FLOW_CLS_STATS:
+		return prestera_flower_stats(block, f);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int prestera_flow_block_cb(enum tc_setup_type type,
+				  void *type_data, void *cb_priv)
+{
+	struct prestera_flow_block *block = cb_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return prestera_flow_block_flower_cb(block, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void prestera_flow_block_release(void *cb_priv)
+{
+	struct prestera_flow_block *block = cb_priv;
+
+	prestera_acl_block_destroy(block);
+}
+
+static struct prestera_flow_block *
+prestera_flow_block_get(struct prestera_switch *sw,
+			struct flow_block_offload *f,
+			bool *register_block)
+{
+	struct prestera_flow_block *block;
+	struct flow_block_cb *block_cb;
+
+	block_cb = flow_block_cb_lookup(f->block,
+					prestera_flow_block_cb, sw);
+	if (!block_cb) {
+		block = prestera_acl_block_create(sw, f->net);
+		if (!block)
+			return ERR_PTR(-ENOMEM);
+
+		block_cb = flow_block_cb_alloc(prestera_flow_block_cb,
+					       sw, block,
+					       prestera_flow_block_release);
+		if (IS_ERR(block_cb)) {
+			prestera_acl_block_destroy(block);
+			return ERR_CAST(block_cb);
+		}
+
+		block->block_cb = block_cb;
+		*register_block = true;
+	} else {
+		block = flow_block_cb_priv(block_cb);
+		*register_block = false;
+	}
+
+	flow_block_cb_incref(block_cb);
+
+	return block;
+}
+
+static void prestera_flow_block_put(struct prestera_flow_block *block)
+{
+	struct flow_block_cb *block_cb = block->block_cb;
+
+	if (flow_block_cb_decref(block_cb))
+		return;
+
+	flow_block_cb_free(block_cb);
+	prestera_acl_block_destroy(block);
+}
+
+static int prestera_setup_flow_block_bind(struct prestera_port *port,
+					  struct flow_block_offload *f)
+{
+	struct prestera_switch *sw = port->sw;
+	struct prestera_flow_block *block;
+	struct flow_block_cb *block_cb;
+	bool register_block;
+	int err;
+
+	block = prestera_flow_block_get(sw, f, &register_block);
+	if (IS_ERR(block))
+		return PTR_ERR(block);
+
+	block_cb = block->block_cb;
+
+	err = prestera_acl_block_bind(block, port);
+	if (err)
+		goto err_block_bind;
+
+	if (register_block) {
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &prestera_block_cb_list);
+	}
+
+	port->flow_block = block;
+	return 0;
+
+err_block_bind:
+	prestera_flow_block_put(block);
+
+	return err;
+}
+
+static void prestera_setup_flow_block_unbind(struct prestera_port *port,
+					     struct flow_block_offload *f)
+{
+	struct prestera_switch *sw = port->sw;
+	struct prestera_flow_block *block;
+	struct flow_block_cb *block_cb;
+	int err;
+
+	block_cb = flow_block_cb_lookup(f->block, prestera_flow_block_cb, sw);
+	if (!block_cb)
+		return;
+
+	block = flow_block_cb_priv(block_cb);
+
+	err = prestera_acl_block_unbind(block, port);
+	if (err)
+		goto error;
+
+	if (!flow_block_cb_decref(block_cb)) {
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+	}
+error:
+	port->flow_block = NULL;
+}
+
+int prestera_flow_block_setup(struct prestera_port *port,
+			      struct flow_block_offload *f)
+{
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	f->driver_block_list = &prestera_block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		return prestera_setup_flow_block_bind(port, f);
+	case FLOW_BLOCK_UNBIND:
+		prestera_setup_flow_block_unbind(port, f);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flow.h b/drivers/net/ethernet/marvell/prestera/prestera_flow.h
new file mode 100644
index 000000000000..467c7038cace
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flow.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_FLOW_H_
+#define _PRESTERA_FLOW_H_
+
+#include <net/flow_offload.h>
+
+struct prestera_port;
+
+int prestera_flow_block_setup(struct prestera_port *port,
+			      struct flow_block_offload *f);
+
+#endif /* _PRESTERA_FLOW_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
new file mode 100644
index 000000000000..e571ba09ec08
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved */
+
+#include "prestera.h"
+#include "prestera_acl.h"
+#include "prestera_flower.h"
+
+static int prestera_flower_parse_actions(struct prestera_flow_block *block,
+					 struct prestera_acl_rule *rule,
+					 struct flow_action *flow_action,
+					 struct netlink_ext_ack *extack)
+{
+	struct prestera_acl_rule_action_entry a_entry;
+	const struct flow_action_entry *act;
+	int err, i;
+
+	if (!flow_action_has_entries(flow_action))
+		return 0;
+
+	flow_action_for_each(i, act, flow_action) {
+		memset(&a_entry, 0, sizeof(a_entry));
+
+		switch (act->id) {
+		case FLOW_ACTION_ACCEPT:
+			a_entry.id = PRESTERA_ACL_RULE_ACTION_ACCEPT;
+			break;
+		case FLOW_ACTION_DROP:
+			a_entry.id = PRESTERA_ACL_RULE_ACTION_DROP;
+			break;
+		case FLOW_ACTION_TRAP:
+			a_entry.id = PRESTERA_ACL_RULE_ACTION_TRAP;
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
+			pr_err("Unsupported action\n");
+			return -EOPNOTSUPP;
+		}
+
+		err = prestera_acl_rule_action_add(rule, &a_entry);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int prestera_flower_parse_meta(struct prestera_acl_rule *rule,
+				      struct flow_cls_offload *f,
+				      struct prestera_flow_block *block)
+{
+	struct flow_rule *f_rule = flow_cls_offload_flow_rule(f);
+	struct prestera_acl_rule_match_entry m_entry = {0};
+	struct net_device *ingress_dev;
+	struct flow_match_meta match;
+	struct prestera_port *port;
+
+	flow_rule_match_meta(f_rule, &match);
+	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
+		NL_SET_ERR_MSG_MOD(f->common.extack,
+				   "Unsupported ingress ifindex mask");
+		return -EINVAL;
+	}
+
+	ingress_dev = __dev_get_by_index(prestera_acl_block_net(block),
+					 match.key->ingress_ifindex);
+	if (!ingress_dev) {
+		NL_SET_ERR_MSG_MOD(f->common.extack,
+				   "Can't find specified ingress port to match on");
+		return -EINVAL;
+	}
+
+	if (!prestera_netdev_check(ingress_dev)) {
+		NL_SET_ERR_MSG_MOD(f->common.extack,
+				   "Can't match on switchdev ingress port");
+		return -EINVAL;
+	}
+	port = netdev_priv(ingress_dev);
+
+	m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_PORT;
+	m_entry.keymask.u64.key = port->hw_id | ((u64)port->dev_id << 32);
+	m_entry.keymask.u64.mask = ~(u64)0;
+
+	return prestera_acl_rule_match_add(rule, &m_entry);
+}
+
+static int prestera_flower_parse(struct prestera_flow_block *block,
+				 struct prestera_acl_rule *rule,
+				 struct flow_cls_offload *f)
+{
+	struct flow_rule *f_rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = f_rule->match.dissector;
+	struct prestera_acl_rule_match_entry m_entry;
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
+	u16 addr_type = 0;
+	u8 ip_proto = 0;
+	int err;
+
+	if (dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_META) |
+	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_ICMP) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN))) {
+		NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported key");
+		return -EOPNOTSUPP;
+	}
+
+	prestera_acl_rule_priority_set(rule, f->common.prio);
+
+	if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_META)) {
+		err = prestera_flower_parse_meta(rule, f, block);
+		if (err)
+			return err;
+	}
+
+	if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(f_rule, &match);
+		addr_type = match.key->addr_type;
+	}
+
+	if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(f_rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+
+		/* add eth type key,mask */
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_TYPE;
+		m_entry.keymask.u16.key = n_proto_key;
+		m_entry.keymask.u16.mask = n_proto_mask;
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+
+		/* add ip proto key,mask */
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_PROTO;
+		m_entry.keymask.u8.key = match.key->ip_proto;
+		m_entry.keymask.u8.mask = match.mask->ip_proto;
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+
+		ip_proto = match.key->ip_proto;
+	}
+
+	if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(f_rule, &match);
+
+		/* add ethernet dst key,mask */
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_DMAC;
+		memcpy(&m_entry.keymask.mac.key,
+		       &match.key->dst, sizeof(match.key->dst));
+		memcpy(&m_entry.keymask.mac.mask,
+		       &match.mask->dst, sizeof(match.mask->dst));
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+
+		/* add ethernet src key,mask */
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_SMAC;
+		memcpy(&m_entry.keymask.mac.key,
+		       &match.key->src, sizeof(match.key->src));
+		memcpy(&m_entry.keymask.mac.mask,
+		       &match.mask->src, sizeof(match.mask->src));
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(f_rule, &match);
+
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_SRC;
+		memcpy(&m_entry.keymask.u32.key,
+		       &match.key->src, sizeof(match.key->src));
+		memcpy(&m_entry.keymask.u32.mask,
+		       &match.mask->src, sizeof(match.mask->src));
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_DST;
+		memcpy(&m_entry.keymask.u32.key,
+		       &match.key->dst, sizeof(match.key->dst));
+		memcpy(&m_entry.keymask.u32.mask,
+		       &match.mask->dst, sizeof(match.mask->dst));
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+	}
+
+	if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+			NL_SET_ERR_MSG_MOD
+			    (f->common.extack,
+			     "Only UDP and TCP keys are supported");
+			return -EINVAL;
+		}
+
+		flow_rule_match_ports(f_rule, &match);
+
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_SRC;
+		m_entry.keymask.u16.key = ntohs(match.key->src);
+		m_entry.keymask.u16.mask = ntohs(match.mask->src);
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_DST;
+		m_entry.keymask.u16.key = ntohs(match.key->dst);
+		m_entry.keymask.u16.mask = ntohs(match.mask->dst);
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+	}
+
+	if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(f_rule, &match);
+
+		if (match.mask->vlan_id != 0) {
+			memset(&m_entry, 0, sizeof(m_entry));
+			m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_ID;
+			m_entry.keymask.u16.key = match.key->vlan_id;
+			m_entry.keymask.u16.mask = match.mask->vlan_id;
+			err = prestera_acl_rule_match_add(rule, &m_entry);
+			if (err)
+				return err;
+		}
+
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_TPID;
+		m_entry.keymask.u16.key = ntohs(match.key->vlan_tpid);
+		m_entry.keymask.u16.mask = ntohs(match.mask->vlan_tpid);
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+	}
+
+	if (flow_rule_match_key(f_rule, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_match_icmp match;
+
+		flow_rule_match_icmp(f_rule, &match);
+
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_TYPE;
+		m_entry.keymask.u8.key = match.key->type;
+		m_entry.keymask.u8.mask = match.mask->type;
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+
+		memset(&m_entry, 0, sizeof(m_entry));
+		m_entry.type = PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_CODE;
+		m_entry.keymask.u8.key = match.key->code;
+		m_entry.keymask.u8.mask = match.mask->code;
+		err = prestera_acl_rule_match_add(rule, &m_entry);
+		if (err)
+			return err;
+	}
+
+	return prestera_flower_parse_actions(block, rule,
+					     &f->rule->action,
+					     f->common.extack);
+}
+
+int prestera_flower_replace(struct prestera_flow_block *block,
+			    struct flow_cls_offload *f)
+{
+	struct prestera_switch *sw = prestera_acl_block_sw(block);
+	struct prestera_acl_rule *rule;
+	int err;
+
+	rule = prestera_acl_rule_create(block, f->cookie);
+	if (IS_ERR(rule))
+		return PTR_ERR(rule);
+
+	err = prestera_flower_parse(block, rule, f);
+	if (err)
+		goto err_flower_parse;
+
+	err = prestera_acl_rule_add(sw, rule);
+	if (err)
+		goto err_rule_add;
+
+	return 0;
+
+err_rule_add:
+err_flower_parse:
+	prestera_acl_rule_destroy(rule);
+	return err;
+}
+
+void prestera_flower_destroy(struct prestera_flow_block *block,
+			     struct flow_cls_offload *f)
+{
+	struct prestera_acl_rule *rule;
+	struct prestera_switch *sw;
+
+	rule = prestera_acl_rule_lookup(prestera_acl_block_ruleset_get(block),
+					f->cookie);
+	if (rule) {
+		sw = prestera_acl_block_sw(block);
+		prestera_acl_rule_del(sw, rule);
+		prestera_acl_rule_destroy(rule);
+	}
+}
+
+int prestera_flower_stats(struct prestera_flow_block *block,
+			  struct flow_cls_offload *f)
+{
+	struct prestera_switch *sw = prestera_acl_block_sw(block);
+	struct prestera_acl_rule *rule;
+	u64 packets;
+	u64 lastuse;
+	u64 bytes;
+	int err;
+
+	rule = prestera_acl_rule_lookup(prestera_acl_block_ruleset_get(block),
+					f->cookie);
+	if (!rule)
+		return -EINVAL;
+
+	err = prestera_acl_rule_get_stats(sw, rule, &packets, &bytes, &lastuse);
+	if (err)
+		return err;
+
+	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
+			  FLOW_ACTION_HW_STATS_IMMEDIATE);
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.h b/drivers/net/ethernet/marvell/prestera/prestera_flower.h
new file mode 100644
index 000000000000..91e045eec58b
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_FLOWER_H_
+#define _PRESTERA_FLOWER_H_
+
+#include <net/pkt_cls.h>
+
+struct prestera_flow_block;
+
+int prestera_flower_replace(struct prestera_flow_block *block,
+			    struct flow_cls_offload *f);
+void prestera_flower_destroy(struct prestera_flow_block *block,
+			     struct flow_cls_offload *f);
+int prestera_flower_stats(struct prestera_flow_block *block,
+			  struct flow_cls_offload *f);
+
+#endif /* _PRESTERA_FLOWER_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index a4e3dc8d3abe..42b8d9f56468 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -8,6 +8,7 @@
 
 #include "prestera.h"
 #include "prestera_hw.h"
+#include "prestera_acl.h"
 
 #define PRESTERA_SWITCH_INIT_TIMEOUT_MS (30 * 1000)
 
@@ -37,6 +38,14 @@ enum prestera_cmd_type_t {
 	PRESTERA_CMD_TYPE_BRIDGE_PORT_ADD = 0x402,
 	PRESTERA_CMD_TYPE_BRIDGE_PORT_DELETE = 0x403,
 
+	PRESTERA_CMD_TYPE_ACL_RULE_ADD = 0x500,
+	PRESTERA_CMD_TYPE_ACL_RULE_DELETE = 0x501,
+	PRESTERA_CMD_TYPE_ACL_RULE_STATS_GET = 0x510,
+	PRESTERA_CMD_TYPE_ACL_RULESET_CREATE = 0x520,
+	PRESTERA_CMD_TYPE_ACL_RULESET_DELETE = 0x521,
+	PRESTERA_CMD_TYPE_ACL_PORT_BIND = 0x530,
+	PRESTERA_CMD_TYPE_ACL_PORT_UNBIND = 0x531,
+
 	PRESTERA_CMD_TYPE_RXTX_INIT = 0x800,
 	PRESTERA_CMD_TYPE_RXTX_PORT_INIT = 0x801,
 
@@ -301,6 +310,73 @@ struct prestera_msg_bridge_resp {
 	u16 bridge;
 };
 
+struct prestera_msg_acl_action {
+	u32 id;
+};
+
+struct prestera_msg_acl_match {
+	u32 type;
+	union {
+		struct {
+			u8 key;
+			u8 mask;
+		} u8;
+		struct {
+			u16 key;
+			u16 mask;
+		} u16;
+		struct {
+			u32 key;
+			u32 mask;
+		} u32;
+		struct {
+			u64 key;
+			u64 mask;
+		} u64;
+		struct {
+			u8 key[ETH_ALEN];
+			u8 mask[ETH_ALEN];
+		} mac;
+	} __packed keymask;
+};
+
+struct prestera_msg_acl_rule_req {
+	struct prestera_msg_cmd cmd;
+	u32 id;
+	u32 priority;
+	u16 ruleset_id;
+	u8 n_actions;
+	u8 n_matches;
+};
+
+struct prestera_msg_acl_rule_resp {
+	struct prestera_msg_ret ret;
+	u32 id;
+};
+
+struct prestera_msg_acl_rule_stats_resp {
+	struct prestera_msg_ret ret;
+	u64 packets;
+	u64 bytes;
+};
+
+struct prestera_msg_acl_ruleset_bind_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+	u32 dev;
+	u16 ruleset_id;
+};
+
+struct prestera_msg_acl_ruleset_req {
+	struct prestera_msg_cmd cmd;
+	u16 id;
+};
+
+struct prestera_msg_acl_ruleset_resp {
+	struct prestera_msg_ret ret;
+	u16 id;
+};
+
 struct prestera_msg_stp_req {
 	struct prestera_msg_cmd cmd;
 	u32 port;
@@ -763,6 +839,222 @@ int prestera_hw_port_remote_fc_get(const struct prestera_port *port,
 	return 0;
 }
 
+int prestera_hw_acl_ruleset_create(struct prestera_switch *sw, u16 *ruleset_id)
+{
+	struct prestera_msg_acl_ruleset_resp resp;
+	struct prestera_msg_acl_ruleset_req req;
+	int err;
+
+	err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_ACL_RULESET_CREATE,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*ruleset_id = resp.id;
+
+	return 0;
+}
+
+int prestera_hw_acl_ruleset_del(struct prestera_switch *sw, u16 ruleset_id)
+{
+	struct prestera_msg_acl_ruleset_req req = {
+		.id = ruleset_id,
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_ACL_RULESET_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
+static int prestera_hw_acl_actions_put(struct prestera_msg_acl_action *action,
+				       struct prestera_acl_rule *rule)
+{
+	struct list_head *a_list = prestera_acl_rule_action_list_get(rule);
+	struct prestera_acl_rule_action_entry *a_entry;
+	int i = 0;
+
+	list_for_each_entry(a_entry, a_list, list) {
+		action[i].id = a_entry->id;
+
+		switch (a_entry->id) {
+		case PRESTERA_ACL_RULE_ACTION_ACCEPT:
+		case PRESTERA_ACL_RULE_ACTION_DROP:
+		case PRESTERA_ACL_RULE_ACTION_TRAP:
+			/* just rule action id, no specific data */
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		i++;
+	}
+
+	return 0;
+}
+
+static int prestera_hw_acl_matches_put(struct prestera_msg_acl_match *match,
+				       struct prestera_acl_rule *rule)
+{
+	struct list_head *m_list = prestera_acl_rule_match_list_get(rule);
+	struct prestera_acl_rule_match_entry *m_entry;
+	int i = 0;
+
+	list_for_each_entry(m_entry, m_list, list) {
+		match[i].type = m_entry->type;
+
+		switch (m_entry->type) {
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_TYPE:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_SRC:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_DST:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_ID:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_VLAN_TPID:
+			match[i].keymask.u16.key = m_entry->keymask.u16.key;
+			match[i].keymask.u16.mask = m_entry->keymask.u16.mask;
+			break;
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_TYPE:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ICMP_CODE:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_PROTO:
+			match[i].keymask.u8.key = m_entry->keymask.u8.key;
+			match[i].keymask.u8.mask = m_entry->keymask.u8.mask;
+			break;
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_SMAC:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_ETH_DMAC:
+			memcpy(match[i].keymask.mac.key,
+			       m_entry->keymask.mac.key,
+			       sizeof(match[i].keymask.mac.key));
+			memcpy(match[i].keymask.mac.mask,
+			       m_entry->keymask.mac.mask,
+			       sizeof(match[i].keymask.mac.mask));
+			break;
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_SRC:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_IP_DST:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_RANGE_SRC:
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_L4_PORT_RANGE_DST:
+			match[i].keymask.u32.key = m_entry->keymask.u32.key;
+			match[i].keymask.u32.mask = m_entry->keymask.u32.mask;
+			break;
+		case PRESTERA_ACL_RULE_MATCH_ENTRY_TYPE_PORT:
+			match[i].keymask.u64.key = m_entry->keymask.u64.key;
+			match[i].keymask.u64.mask = m_entry->keymask.u64.mask;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		i++;
+	}
+
+	return 0;
+}
+
+int prestera_hw_acl_rule_add(struct prestera_switch *sw,
+			     struct prestera_acl_rule *rule,
+			     u32 *rule_id)
+{
+	struct prestera_msg_acl_action *actions;
+	struct prestera_msg_acl_match *matches;
+	struct prestera_msg_acl_rule_resp resp;
+	struct prestera_msg_acl_rule_req *req;
+	u8 n_actions;
+	u8 n_matches;
+	void *buff;
+	u32 size;
+	int err;
+
+	n_actions = prestera_acl_rule_action_len(rule);
+	n_matches = prestera_acl_rule_match_len(rule);
+
+	size = sizeof(*req) + sizeof(*actions) * n_actions +
+		sizeof(*matches) * n_matches;
+
+	buff = kzalloc(size, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+	req = buff;
+	actions = buff + sizeof(*req);
+	matches = buff + sizeof(*req) + sizeof(*actions) * n_actions;
+
+	/* put acl actions into the message */
+	err = prestera_hw_acl_actions_put(actions, rule);
+	if (err)
+		goto free_buff;
+
+	/* put acl matches into the message */
+	err = prestera_hw_acl_matches_put(matches, rule);
+	if (err)
+		goto free_buff;
+
+	req->ruleset_id = prestera_acl_rule_ruleset_id_get(rule);
+	req->priority = prestera_acl_rule_priority_get(rule);
+	req->n_actions = prestera_acl_rule_action_len(rule);
+	req->n_matches = prestera_acl_rule_match_len(rule);
+
+	err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_ACL_RULE_ADD,
+			       &req->cmd, size, &resp.ret, sizeof(resp));
+	if (err)
+		goto free_buff;
+
+	*rule_id = resp.id;
+free_buff:
+	kfree(buff);
+	return err;
+}
+
+int prestera_hw_acl_rule_del(struct prestera_switch *sw, u32 rule_id)
+{
+	struct prestera_msg_acl_rule_req req = {
+		.id = rule_id
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_ACL_RULE_DELETE,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_acl_rule_stats_get(struct prestera_switch *sw, u32 rule_id,
+				   u64 *packets, u64 *bytes)
+{
+	struct prestera_msg_acl_rule_stats_resp resp;
+	struct prestera_msg_acl_rule_req req = {
+		.id = rule_id
+	};
+	int err;
+
+	err = prestera_cmd_ret(sw, PRESTERA_CMD_TYPE_ACL_RULE_STATS_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*packets = resp.packets;
+	*bytes = resp.bytes;
+
+	return 0;
+}
+
+int prestera_hw_acl_port_bind(const struct prestera_port *port, u16 ruleset_id)
+{
+	struct prestera_msg_acl_ruleset_bind_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.ruleset_id = ruleset_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_ACL_PORT_BIND,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_acl_port_unbind(const struct prestera_port *port,
+				u16 ruleset_id)
+{
+	struct prestera_msg_acl_ruleset_bind_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.ruleset_id = ruleset_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_ACL_PORT_UNBIND,
+			    &req.cmd, sizeof(req));
+}
+
 int prestera_hw_port_type_get(const struct prestera_port *port, u8 *type)
 {
 	struct prestera_msg_port_attr_req req = {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index 7f72d81cf918..c01d376574d2 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -100,6 +100,7 @@ struct prestera_port_stats;
 struct prestera_port_caps;
 enum prestera_event_type;
 struct prestera_event;
+struct prestera_acl_rule;
 
 typedef void (*prestera_event_cb_t)
 	(struct prestera_switch *sw, struct prestera_event *evt, void *arg);
@@ -171,6 +172,22 @@ int prestera_hw_bridge_delete(struct prestera_switch *sw, u16 bridge_id);
 int prestera_hw_bridge_port_add(struct prestera_port *port, u16 bridge_id);
 int prestera_hw_bridge_port_delete(struct prestera_port *port, u16 bridge_id);
 
+/* ACL API */
+int prestera_hw_acl_ruleset_create(struct prestera_switch *sw,
+				   u16 *ruleset_id);
+int prestera_hw_acl_ruleset_del(struct prestera_switch *sw,
+				u16 ruleset_id);
+int prestera_hw_acl_rule_add(struct prestera_switch *sw,
+			     struct prestera_acl_rule *rule,
+			     u32 *rule_id);
+int prestera_hw_acl_rule_del(struct prestera_switch *sw, u32 rule_id);
+int prestera_hw_acl_rule_stats_get(struct prestera_switch *sw,
+				   u32 rule_id, u64 *packets, u64 *bytes);
+int prestera_hw_acl_port_bind(const struct prestera_port *port,
+			      u16 ruleset_id);
+int prestera_hw_acl_port_unbind(const struct prestera_port *port,
+				u16 ruleset_id);
+
 /* Event handlers */
 int prestera_hw_event_handler_register(struct prestera_switch *sw,
 				       enum prestera_event_type type,
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index d825fbdfa86f..c3319d19c910 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -12,6 +12,8 @@
 
 #include "prestera.h"
 #include "prestera_hw.h"
+#include "prestera_acl.h"
+#include "prestera_flow.h"
 #include "prestera_rxtx.h"
 #include "prestera_devlink.h"
 #include "prestera_ethtool.h"
@@ -200,10 +202,25 @@ static void prestera_port_stats_update(struct work_struct *work)
 			   msecs_to_jiffies(PRESTERA_STATS_DELAY_MS));
 }
 
+static int prestera_port_setup_tc(struct net_device *dev,
+				  enum tc_setup_type type,
+				  void *type_data)
+{
+	struct prestera_port *port = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return prestera_flow_block_setup(port, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops prestera_netdev_ops = {
 	.ndo_open = prestera_port_open,
 	.ndo_stop = prestera_port_close,
 	.ndo_start_xmit = prestera_port_xmit,
+	.ndo_setup_tc = prestera_port_setup_tc,
 	.ndo_change_mtu = prestera_port_change_mtu,
 	.ndo_get_stats64 = prestera_port_get_stats64,
 	.ndo_set_mac_address = prestera_port_set_mac_address,
@@ -298,7 +315,7 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id)
 	if (err)
 		goto err_dl_port_register;
 
-	dev->features |= NETIF_F_NETNS_LOCAL;
+	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC;
 	dev->netdev_ops = &prestera_netdev_ops;
 	dev->ethtool_ops = &prestera_ethtool_ops;
 
@@ -824,6 +841,10 @@ static int prestera_switch_init(struct prestera_switch *sw)
 	if (err)
 		goto err_handlers_register;
 
+	err = prestera_acl_init(sw);
+	if (err)
+		goto err_acl_init;
+
 	err = prestera_devlink_register(sw);
 	if (err)
 		goto err_dl_register;
@@ -843,6 +864,8 @@ err_ports_create:
 err_lag_init:
 	prestera_devlink_unregister(sw);
 err_dl_register:
+	prestera_acl_fini(sw);
+err_acl_init:
 	prestera_event_handlers_unregister(sw);
 err_handlers_register:
 	prestera_rxtx_switch_fini(sw);
@@ -860,6 +883,7 @@ static void prestera_switch_fini(struct prestera_switch *sw)
 	prestera_destroy_ports(sw);
 	prestera_lag_fini(sw);
 	prestera_devlink_unregister(sw);
+	prestera_acl_fini(sw);
 	prestera_event_handlers_unregister(sw);
 	prestera_rxtx_switch_fini(sw);
 	prestera_switchdev_fini(sw);

From 13defa275eef90c07886dbd9e74e3dada8af7348 Mon Sep 17 00:00:00 2001
From: Serhiy Boiko <serhiy.boiko@plvision.eu>
Date: Wed, 16 Jun 2021 19:01:45 +0300
Subject: [PATCH 2004/3292] net: marvell: prestera: Add matchall support

- Introduce matchall filter support
- Add SPAN API to configure port mirroring.
- Add tc mirror action.

At this moment, only mirror (egress) action is supported.

Example:
    tc filter ... action mirred egress mirror dev DEV

Co-developed-by: Volodymyr Mytnyk <vmytnyk@marvell.com>
Signed-off-by: Volodymyr Mytnyk <vmytnyk@marvell.com>
Signed-off-by: Serhiy Boiko <serhiy.boiko@plvision.eu>
Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/prestera/Makefile    |   2 +-
 .../net/ethernet/marvell/prestera/prestera.h  |   2 +
 .../ethernet/marvell/prestera/prestera_acl.c  |   2 +
 .../ethernet/marvell/prestera/prestera_acl.h  |   1 +
 .../ethernet/marvell/prestera/prestera_flow.c |  19 ++
 .../ethernet/marvell/prestera/prestera_hw.c   |  69 +++++
 .../ethernet/marvell/prestera/prestera_hw.h   |   6 +
 .../ethernet/marvell/prestera/prestera_main.c |   8 +
 .../ethernet/marvell/prestera/prestera_span.c | 239 ++++++++++++++++++
 .../ethernet/marvell/prestera/prestera_span.h |  20 ++
 10 files changed, 367 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_span.c
 create mode 100644 drivers/net/ethernet/marvell/prestera/prestera_span.h

diff --git a/drivers/net/ethernet/marvell/prestera/Makefile b/drivers/net/ethernet/marvell/prestera/Makefile
index 42327c4afdbf..0609df8b913d 100644
--- a/drivers/net/ethernet/marvell/prestera/Makefile
+++ b/drivers/net/ethernet/marvell/prestera/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_PRESTERA)	+= prestera.o
 prestera-objs		:= prestera_main.o prestera_hw.o prestera_dsa.o \
 			   prestera_rxtx.o prestera_devlink.o prestera_ethtool.o \
 			   prestera_switchdev.o prestera_acl.o prestera_flow.o \
-			   prestera_flower.o
+			   prestera_flower.o prestera_span.o
 
 obj-$(CONFIG_PRESTERA_PCI)	+= prestera_pci.o
diff --git a/drivers/net/ethernet/marvell/prestera/prestera.h b/drivers/net/ethernet/marvell/prestera/prestera.h
index bbbe780d0886..f18fe664b373 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera.h
@@ -172,6 +172,7 @@ struct prestera_event {
 };
 
 struct prestera_switchdev;
+struct prestera_span;
 struct prestera_rxtx;
 struct prestera_trap_data;
 struct prestera_acl;
@@ -181,6 +182,7 @@ struct prestera_switch {
 	struct prestera_switchdev *swdev;
 	struct prestera_rxtx *rxtx;
 	struct prestera_acl *acl;
+	struct prestera_span *span;
 	struct list_head event_handlers;
 	struct notifier_block netdev_nb;
 	struct prestera_trap_data *trap_data;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.c b/drivers/net/ethernet/marvell/prestera/prestera_acl.c
index 64b66ba1c43f..83c75ffb1a1c 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_acl.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.c
@@ -6,6 +6,7 @@
 #include "prestera.h"
 #include "prestera_hw.h"
 #include "prestera_acl.h"
+#include "prestera_span.h"
 
 struct prestera_acl {
 	struct prestera_switch *sw;
@@ -127,6 +128,7 @@ int prestera_acl_block_bind(struct prestera_flow_block *block,
 	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
 	if (!binding)
 		return -ENOMEM;
+	binding->span_id = PRESTERA_SPAN_INVALID_ID;
 	binding->port = port;
 
 	err = prestera_hw_acl_port_bind(port, block->ruleset->id);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.h b/drivers/net/ethernet/marvell/prestera/prestera_acl.h
index 1b3f516778e5..39b7869be659 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_acl.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.h
@@ -36,6 +36,7 @@ struct prestera_acl_ruleset;
 struct prestera_flow_block_binding {
 	struct list_head list;
 	struct prestera_port *port;
+	int span_id;
 };
 
 struct prestera_flow_block {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flow.c b/drivers/net/ethernet/marvell/prestera/prestera_flow.c
index 12a36723e2a5..c9891e968259 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_flow.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flow.c
@@ -7,10 +7,25 @@
 #include "prestera.h"
 #include "prestera_acl.h"
 #include "prestera_flow.h"
+#include "prestera_span.h"
 #include "prestera_flower.h"
 
 static LIST_HEAD(prestera_block_cb_list);
 
+static int prestera_flow_block_mall_cb(struct prestera_flow_block *block,
+				       struct tc_cls_matchall_offload *f)
+{
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return prestera_span_replace(block, f);
+	case TC_CLSMATCHALL_DESTROY:
+		prestera_span_destroy(block);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int prestera_flow_block_flower_cb(struct prestera_flow_block *block,
 					 struct flow_cls_offload *f)
 {
@@ -38,6 +53,8 @@ static int prestera_flow_block_cb(enum tc_setup_type type,
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return prestera_flow_block_flower_cb(block, type_data);
+	case TC_SETUP_CLSMATCHALL:
+		return prestera_flow_block_mall_cb(block, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -143,6 +160,8 @@ static void prestera_setup_flow_block_unbind(struct prestera_port *port,
 
 	block = flow_block_cb_priv(block_cb);
 
+	prestera_span_destroy(block);
+
 	err = prestera_acl_block_unbind(block, port);
 	if (err)
 		goto error;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
index 42b8d9f56468..c1297859e471 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.c
@@ -56,6 +56,11 @@ enum prestera_cmd_type_t {
 
 	PRESTERA_CMD_TYPE_STP_PORT_SET = 0x1000,
 
+	PRESTERA_CMD_TYPE_SPAN_GET = 0x1100,
+	PRESTERA_CMD_TYPE_SPAN_BIND = 0x1101,
+	PRESTERA_CMD_TYPE_SPAN_UNBIND = 0x1102,
+	PRESTERA_CMD_TYPE_SPAN_RELEASE = 0x1103,
+
 	PRESTERA_CMD_TYPE_CPU_CODE_COUNTERS_GET = 0x2000,
 
 	PRESTERA_CMD_TYPE_ACK = 0x10000,
@@ -377,6 +382,18 @@ struct prestera_msg_acl_ruleset_resp {
 	u16 id;
 };
 
+struct prestera_msg_span_req {
+	struct prestera_msg_cmd cmd;
+	u32 port;
+	u32 dev;
+	u8 id;
+} __packed __aligned(4);
+
+struct prestera_msg_span_resp {
+	struct prestera_msg_ret ret;
+	u8 id;
+} __packed __aligned(4);
+
 struct prestera_msg_stp_req {
 	struct prestera_msg_cmd cmd;
 	u32 port;
@@ -1055,6 +1072,58 @@ int prestera_hw_acl_port_unbind(const struct prestera_port *port,
 			    &req.cmd, sizeof(req));
 }
 
+int prestera_hw_span_get(const struct prestera_port *port, u8 *span_id)
+{
+	struct prestera_msg_span_resp resp;
+	struct prestera_msg_span_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+	int err;
+
+	err = prestera_cmd_ret(port->sw, PRESTERA_CMD_TYPE_SPAN_GET,
+			       &req.cmd, sizeof(req), &resp.ret, sizeof(resp));
+	if (err)
+		return err;
+
+	*span_id = resp.id;
+
+	return 0;
+}
+
+int prestera_hw_span_bind(const struct prestera_port *port, u8 span_id)
+{
+	struct prestera_msg_span_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+		.id = span_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_SPAN_BIND,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_span_unbind(const struct prestera_port *port)
+{
+	struct prestera_msg_span_req req = {
+		.port = port->hw_id,
+		.dev = port->dev_id,
+	};
+
+	return prestera_cmd(port->sw, PRESTERA_CMD_TYPE_SPAN_UNBIND,
+			    &req.cmd, sizeof(req));
+}
+
+int prestera_hw_span_release(struct prestera_switch *sw, u8 span_id)
+{
+	struct prestera_msg_span_req req = {
+		.id = span_id
+	};
+
+	return prestera_cmd(sw, PRESTERA_CMD_TYPE_SPAN_RELEASE,
+			    &req.cmd, sizeof(req));
+}
+
 int prestera_hw_port_type_get(const struct prestera_port *port, u8 *type)
 {
 	struct prestera_msg_port_attr_req req = {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_hw.h b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
index c01d376574d2..546d5fd8240d 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_hw.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_hw.h
@@ -188,6 +188,12 @@ int prestera_hw_acl_port_bind(const struct prestera_port *port,
 int prestera_hw_acl_port_unbind(const struct prestera_port *port,
 				u16 ruleset_id);
 
+/* SPAN API */
+int prestera_hw_span_get(const struct prestera_port *port, u8 *span_id);
+int prestera_hw_span_bind(const struct prestera_port *port, u8 span_id);
+int prestera_hw_span_unbind(const struct prestera_port *port);
+int prestera_hw_span_release(struct prestera_switch *sw, u8 span_id);
+
 /* Event handlers */
 int prestera_hw_event_handler_register(struct prestera_switch *sw,
 				       enum prestera_event_type type,
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index c3319d19c910..226f4ff29f6e 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -14,6 +14,7 @@
 #include "prestera_hw.h"
 #include "prestera_acl.h"
 #include "prestera_flow.h"
+#include "prestera_span.h"
 #include "prestera_rxtx.h"
 #include "prestera_devlink.h"
 #include "prestera_ethtool.h"
@@ -845,6 +846,10 @@ static int prestera_switch_init(struct prestera_switch *sw)
 	if (err)
 		goto err_acl_init;
 
+	err = prestera_span_init(sw);
+	if (err)
+		goto err_span_init;
+
 	err = prestera_devlink_register(sw);
 	if (err)
 		goto err_dl_register;
@@ -864,6 +869,8 @@ err_ports_create:
 err_lag_init:
 	prestera_devlink_unregister(sw);
 err_dl_register:
+	prestera_span_fini(sw);
+err_span_init:
 	prestera_acl_fini(sw);
 err_acl_init:
 	prestera_event_handlers_unregister(sw);
@@ -883,6 +890,7 @@ static void prestera_switch_fini(struct prestera_switch *sw)
 	prestera_destroy_ports(sw);
 	prestera_lag_fini(sw);
 	prestera_devlink_unregister(sw);
+	prestera_span_fini(sw);
 	prestera_acl_fini(sw);
 	prestera_event_handlers_unregister(sw);
 	prestera_rxtx_switch_fini(sw);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_span.c b/drivers/net/ethernet/marvell/prestera/prestera_span.c
new file mode 100644
index 000000000000..3cafca827bb7
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_span.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2020 Marvell International Ltd. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+
+#include "prestera.h"
+#include "prestera_hw.h"
+#include "prestera_acl.h"
+#include "prestera_span.h"
+
+struct prestera_span_entry {
+	struct list_head list;
+	struct prestera_port *port;
+	refcount_t ref_count;
+	u8 id;
+};
+
+struct prestera_span {
+	struct prestera_switch *sw;
+	struct list_head entries;
+};
+
+static struct prestera_span_entry *
+prestera_span_entry_create(struct prestera_port *port, u8 span_id)
+{
+	struct prestera_span_entry *entry;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&entry->ref_count, 1);
+	entry->port = port;
+	entry->id = span_id;
+	list_add_tail(&entry->list, &port->sw->span->entries);
+
+	return entry;
+}
+
+static void prestera_span_entry_del(struct prestera_span_entry *entry)
+{
+	list_del(&entry->list);
+	kfree(entry);
+}
+
+static struct prestera_span_entry *
+prestera_span_entry_find_by_id(struct prestera_span *span, u8 span_id)
+{
+	struct prestera_span_entry *entry;
+
+	list_for_each_entry(entry, &span->entries, list) {
+		if (entry->id == span_id)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct prestera_span_entry *
+prestera_span_entry_find_by_port(struct prestera_span *span,
+				 struct prestera_port *port)
+{
+	struct prestera_span_entry *entry;
+
+	list_for_each_entry(entry, &span->entries, list) {
+		if (entry->port == port)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static int prestera_span_get(struct prestera_port *port, u8 *span_id)
+{
+	u8 new_span_id;
+	struct prestera_switch *sw = port->sw;
+	struct prestera_span_entry *entry;
+	int err;
+
+	entry = prestera_span_entry_find_by_port(sw->span, port);
+	if (entry) {
+		refcount_inc(&entry->ref_count);
+		*span_id = entry->id;
+		return 0;
+	}
+
+	err = prestera_hw_span_get(port, &new_span_id);
+	if (err)
+		return err;
+
+	entry = prestera_span_entry_create(port, new_span_id);
+	if (IS_ERR(entry)) {
+		prestera_hw_span_release(sw, new_span_id);
+		return PTR_ERR(entry);
+	}
+
+	*span_id = new_span_id;
+	return 0;
+}
+
+static int prestera_span_put(struct prestera_switch *sw, u8 span_id)
+{
+	struct prestera_span_entry *entry;
+	int err;
+
+	entry = prestera_span_entry_find_by_id(sw->span, span_id);
+	if (!entry)
+		return false;
+
+	if (!refcount_dec_and_test(&entry->ref_count))
+		return 0;
+
+	err = prestera_hw_span_release(sw, span_id);
+	if (err)
+		return err;
+
+	prestera_span_entry_del(entry);
+	return 0;
+}
+
+static int prestera_span_rule_add(struct prestera_flow_block_binding *binding,
+				  struct prestera_port *to_port)
+{
+	struct prestera_switch *sw = binding->port->sw;
+	u8 span_id;
+	int err;
+
+	if (binding->span_id != PRESTERA_SPAN_INVALID_ID)
+		/* port already in mirroring */
+		return -EEXIST;
+
+	err = prestera_span_get(to_port, &span_id);
+	if (err)
+		return err;
+
+	err = prestera_hw_span_bind(binding->port, span_id);
+	if (err) {
+		prestera_span_put(sw, span_id);
+		return err;
+	}
+
+	binding->span_id = span_id;
+	return 0;
+}
+
+static int prestera_span_rule_del(struct prestera_flow_block_binding *binding)
+{
+	int err;
+
+	err = prestera_hw_span_unbind(binding->port);
+	if (err)
+		return err;
+
+	err = prestera_span_put(binding->port->sw, binding->span_id);
+	if (err)
+		return err;
+
+	binding->span_id = PRESTERA_SPAN_INVALID_ID;
+	return 0;
+}
+
+int prestera_span_replace(struct prestera_flow_block *block,
+			  struct tc_cls_matchall_offload *f)
+{
+	struct prestera_flow_block_binding *binding;
+	__be16 protocol = f->common.protocol;
+	struct flow_action_entry *act;
+	struct prestera_port *port;
+	int err;
+
+	if (!flow_offload_has_one_action(&f->rule->action)) {
+		NL_SET_ERR_MSG(f->common.extack,
+			       "Only singular actions are supported");
+		return -EOPNOTSUPP;
+	}
+
+	act = &f->rule->action.entries[0];
+
+	if (!prestera_netdev_check(act->dev)) {
+		NL_SET_ERR_MSG(f->common.extack,
+			       "Only Marvell Prestera port is supported");
+		return -EINVAL;
+	}
+	if (!tc_cls_can_offload_and_chain0(act->dev, &f->common))
+		return -EOPNOTSUPP;
+	if (act->id != FLOW_ACTION_MIRRED)
+		return -EOPNOTSUPP;
+	if (protocol != htons(ETH_P_ALL))
+		return -EOPNOTSUPP;
+
+	port = netdev_priv(act->dev);
+
+	list_for_each_entry(binding, &block->binding_list, list) {
+		err = prestera_span_rule_add(binding, port);
+		if (err)
+			goto rollback;
+	}
+
+	return 0;
+
+rollback:
+	list_for_each_entry_continue_reverse(binding,
+					     &block->binding_list, list)
+		prestera_span_rule_del(binding);
+	return err;
+}
+
+void prestera_span_destroy(struct prestera_flow_block *block)
+{
+	struct prestera_flow_block_binding *binding;
+
+	list_for_each_entry(binding, &block->binding_list, list)
+		prestera_span_rule_del(binding);
+}
+
+int prestera_span_init(struct prestera_switch *sw)
+{
+	struct prestera_span *span;
+
+	span = kzalloc(sizeof(*span), GFP_KERNEL);
+	if (!span)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&span->entries);
+
+	sw->span = span;
+	span->sw = sw;
+
+	return 0;
+}
+
+void prestera_span_fini(struct prestera_switch *sw)
+{
+	struct prestera_span *span = sw->span;
+
+	WARN_ON(!list_empty(&span->entries));
+	kfree(span);
+}
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_span.h b/drivers/net/ethernet/marvell/prestera/prestera_span.h
new file mode 100644
index 000000000000..f0644521f78a
--- /dev/null
+++ b/drivers/net/ethernet/marvell/prestera/prestera_span.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019-2020 Marvell International Ltd. All rights reserved. */
+
+#ifndef _PRESTERA_SPAN_H_
+#define _PRESTERA_SPAN_H_
+
+#include <net/pkt_cls.h>
+
+#define PRESTERA_SPAN_INVALID_ID -1
+
+struct prestera_switch;
+struct prestera_flow_block;
+
+int prestera_span_init(struct prestera_switch *sw);
+void prestera_span_fini(struct prestera_switch *sw);
+int prestera_span_replace(struct prestera_flow_block *block,
+			  struct tc_cls_matchall_offload *f);
+void prestera_span_destroy(struct prestera_flow_block *block);
+
+#endif /* _PRESTERA_SPAN_H_ */

From 5acc44f39458f43dac9724cefa4da29847cfe997 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Fri, 11 Jun 2021 19:06:45 +0200
Subject: [PATCH 2005/3292] netfilter: nft_exthdr: Search chunks in SCTP
 packets only

Since user space does not generate a payload dependency, plain sctp
chunk matches cause searching in non-SCTP packets, too. Avoid this
potential mis-interpretation of packet data by checking pkt->tprot.

Fixes: 133dc203d77df ("netfilter: nft_exthdr: Support SCTP chunks")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_exthdr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 7f705b5c09de..9cf86be2cff4 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -312,6 +312,9 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
 	const struct sctp_chunkhdr *sch;
 	struct sctp_chunkhdr _sch;
 
+	if (pkt->tprot != IPPROTO_SCTP)
+		goto err;
+
 	do {
 		sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
 		if (!sch || !sch->length)
@@ -334,7 +337,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
 		}
 		offset += SCTP_PAD4(ntohs(sch->length));
 	} while (offset < pkt->skb->len);
-
+err:
 	if (priv->flags & NFT_EXTHDR_F_PRESENT)
 		nft_reg_store8(dest, false);
 	else

From 06e95f0a2aa24d480cbc0c3bd18ca49e1c85f868 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Fri, 11 Jun 2021 19:08:26 +0200
Subject: [PATCH 2006/3292] netfilter: nft_extdhr: Drop pointless check of
 tprot_set

Pablo says, tprot_set is only there to detect if tprot was set to
IPPROTO_IP as that evaluates to zero. Therefore, code asserting a
different value in tprot does not need to check tprot_set.

Fixes: 935b7f6430188 ("netfilter: nft_exthdr: add TCP option matching")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_exthdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 9cf86be2cff4..4f583d2e220e 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -164,7 +164,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
 {
 	struct tcphdr *tcph;
 
-	if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+	if (pkt->tprot != IPPROTO_TCP)
 		return NULL;
 
 	tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);

From 809ed84de8b3f2fd7b1d06efb94bf98fd318a7d7 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Wed, 16 Jun 2021 14:52:11 -0700
Subject: [PATCH 2007/3292] selftests/bpf: Whitelist test_progs.h from
 .gitignore

Somehow test_progs.h was being included by the existing rule:

    /test_progs*

This is bad because:

    1) test_progs.h is a checked in file
    2) grep-like tools like ripgrep[0] respect gitignore and
       test_progs.h was being hidden from searches

[0]: https://github.com/BurntSushi/ripgrep

Fixes: 74b5a5968fe8 ("selftests/bpf: Replace test_progs and test_maps w/ general rule")
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/a46f64944bf678bc652410ca6028d3450f4f7f4b.1623880296.git.dxu@dxuuu.xyz
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index eae6fc7d3ed8..addcfd8b615e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf
 fixdep
 test_dev_cgroup
 /test_progs*
+!test_progs.h
 test_verifier_log
 feature
 test_sock

From 836382dc24717af203ce06703530528827086955 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 16 Jun 2021 22:25:05 +0200
Subject: [PATCH 2008/3292] netfilter: nf_tables: add last expression

Add a new optional expression that tells you when last matching on a
given rule / set element element has happened.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h   |  1 +
 include/uapi/linux/netfilter/nf_tables.h | 15 ++++
 net/netfilter/Makefile                   |  2 +-
 net/netfilter/nf_tables_core.c           |  1 +
 net/netfilter/nft_last.c                 | 87 ++++++++++++++++++++++++
 5 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 net/netfilter/nft_last.c

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 46c8d5bb5d8d..0fa5a6d98a00 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -16,6 +16,7 @@ extern struct nft_expr_type nft_range_type;
 extern struct nft_expr_type nft_meta_type;
 extern struct nft_expr_type nft_rt_type;
 extern struct nft_expr_type nft_exthdr_type;
+extern struct nft_expr_type nft_last_type;
 
 #ifdef CONFIG_NETWORK_SECMARK
 extern struct nft_object_type nft_secmark_obj_type;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 19715e2679d1..e94d1fa554cb 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1195,6 +1195,21 @@ enum nft_counter_attributes {
 };
 #define NFTA_COUNTER_MAX	(__NFTA_COUNTER_MAX - 1)
 
+/**
+ * enum nft_last_attributes - nf_tables last expression netlink attributes
+ *
+ * @NFTA_LAST_SET: last update has been set, zero means never updated (NLA_U32)
+ * @NFTA_LAST_MSECS: milliseconds since last update (NLA_U64)
+ */
+enum nft_last_attributes {
+	NFTA_LAST_UNSPEC,
+	NFTA_LAST_SET,
+	NFTA_LAST_MSECS,
+	NFTA_LAST_PAD,
+	__NFTA_LAST_MAX
+};
+#define NFTA_LAST_MAX	(__NFTA_LAST_MAX - 1)
+
 /**
  * enum nft_log_attributes - nf_tables log expression netlink attributes
  *
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 87112dad1fd4..049890e00a3d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -74,7 +74,7 @@ obj-$(CONFIG_NF_DUP_NETDEV)	+= nf_dup_netdev.o
 nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
 		  nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
 		  nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
-		  nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
+		  nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \
 		  nft_chain_route.o nf_tables_offload.o \
 		  nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
 		  nft_set_pipapo.o
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 7780342e2f2d..866cfba04d6c 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -268,6 +268,7 @@ static struct nft_expr_type *nft_basic_types[] = {
 	&nft_meta_type,
 	&nft_rt_type,
 	&nft_exthdr_type,
+	&nft_last_type,
 };
 
 static struct nft_object_type *nft_basic_objects[] = {
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
new file mode 100644
index 000000000000..913ac45167f2
--- /dev/null
+++ b/net/netfilter/nft_last.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_last_priv {
+	unsigned long	last_jiffies;
+	unsigned int	last_set;
+};
+
+static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
+	[NFTA_LAST_SET] = { .type = NLA_U32 },
+	[NFTA_LAST_MSECS] = { .type = NLA_U64 },
+};
+
+static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+	u64 last_jiffies;
+	int err;
+
+	if (tb[NFTA_LAST_MSECS]) {
+		err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
+		if (err < 0)
+			return err;
+
+		priv->last_jiffies = jiffies + (unsigned long)last_jiffies;
+		priv->last_set = 1;
+	}
+
+	return 0;
+}
+
+static void nft_last_eval(const struct nft_expr *expr,
+			  struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+
+	priv->last_jiffies = jiffies;
+	priv->last_set = 1;
+}
+
+static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_last_priv *priv = nft_expr_priv(expr);
+	__be64 msecs;
+
+	if (time_before(jiffies, priv->last_jiffies))
+		priv->last_set = 0;
+
+	if (priv->last_set)
+		msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+	else
+		msecs = 0;
+
+	if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+	    nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nft_expr_ops nft_last_ops = {
+	.type		= &nft_last_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
+	.eval		= nft_last_eval,
+	.init		= nft_last_init,
+	.dump		= nft_last_dump,
+};
+
+struct nft_expr_type nft_last_type __read_mostly = {
+	.name		= "last",
+	.ops		= &nft_last_ops,
+	.policy		= nft_last_policy,
+	.maxattr	= NFTA_LAST_MAX,
+	.flags		= NFT_EXPR_STATEFUL,
+	.owner		= THIS_MODULE,
+};

From 712b78c697cd75b62b7707b5a20bcd914aedfdaa Mon Sep 17 00:00:00 2001
From: Shuyi Cheng <chengshuyi@linux.alibaba.com>
Date: Wed, 16 Jun 2021 10:04:36 +0800
Subject: [PATCH 2009/3292] bpf: Fix typo in kernel/bpf/bpf_lsm.c

Fix s/sleeable/sleepable/ typo in a comment.

Signed-off-by: Shuyi Cheng <chengshuyi@linux.alibaba.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1623809076-97907-1-git-send-email-chengshuyi@linux.alibaba.com
---
 kernel/bpf/bpf_lsm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 5efb2b24012c..99ada85da8bb 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -125,7 +125,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 }
 
 /* The set of hooks which are called without pagefaults disabled and are allowed
- * to "sleep" and thus can be used for sleeable BPF programs.
+ * to "sleep" and thus can be used for sleepable BPF programs.
  */
 BTF_SET_START(sleepable_lsm_hooks)
 BTF_ID(func, bpf_lsm_bpf)

From bf067f1c51239ba70be714c513820d2c4a5cd457 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 15 Jun 2021 21:55:54 +0800
Subject: [PATCH 2010/3292] samples/bpf: Add missing option to xdp_fwd usage

xdp_fwd usage() is missing the introduction of the "-S"
and "-F" options, this patch adds it.

Fixes: d50ecc46d18f ("samples/bpf: Attach XDP programs in driver mode by default")
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20210615135554.29158-1-wanghai38@huawei.com
---
 samples/bpf/xdp_fwd_user.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 74a4583d0d86..00061261a8da 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -67,6 +67,8 @@ static void usage(const char *prog)
 		"usage: %s [OPTS] interface-list\n"
 		"\nOPTS:\n"
 		"    -d    detach program\n"
+		"    -S    use skb-mode\n"
+		"    -F    force loading prog\n"
 		"    -D    direct table lookups (skip fib rules)\n",
 		prog);
 }

From dfdda1a0f4aad476ae25f2840c9426da3b99506d Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 15 Jun 2021 21:57:24 +0800
Subject: [PATCH 2011/3292] samples/bpf: Add missing option to xdp_sample_pkts
 usage

xdp_sample_pkts usage() is missing the introduction of the
"-S" option, this patch adds it.

Fixes: d50ecc46d18f ("samples/bpf: Attach XDP programs in driver mode by default")
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20210615135724.29528-1-wanghai38@huawei.com
---
 samples/bpf/xdp_sample_pkts_user.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index 706475e004cb..495e09897bd3 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -103,7 +103,8 @@ static void usage(const char *prog)
 	fprintf(stderr,
 		"%s: %s [OPTS] <ifname|ifindex>\n\n"
 		"OPTS:\n"
-		"    -F    force loading prog\n",
+		"    -F    force loading prog\n"
+		"    -S    use skb-mode\n",
 		__func__, prog);
 }
 

From 4a638d581a7a3f00f277349903ff419b6790e2ae Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Wed, 16 Jun 2021 09:36:35 +0100
Subject: [PATCH 2012/3292] libbpf: Fail compilation if target arch is missing

bpf2go is the Go equivalent of libbpf skeleton. The convention is that
the compiled BPF is checked into the repository to facilitate distributing
BPF as part of Go packages. To make this portable, bpf2go by default
generates both bpfel and bpfeb variants of the C.

Using bpf_tracing.h is inherently non-portable since the fields of
struct pt_regs differ between platforms, so CO-RE can't help us here.
The only way of working around this is to compile for each target
platform independently. bpf2go can't do this by default since there
are too many platforms.

Define the various PT_... macros when no target can be determined and
turn them into compilation failures. This works because bpf2go always
compiles for bpf targets, so the compiler fallback doesn't kick in.
Conditionally define __BPF_MISSING_TARGET so that we can inject a
more appropriate error message at build time. The user can then
choose which platform to target explicitly.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210616083635.11434-1-lmb@cloudflare.com
---
 tools/lib/bpf/bpf_tracing.h | 46 +++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index c0f3a26aa582..d6bfbe009296 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -25,26 +25,35 @@
 	#define bpf_target_sparc
 	#define bpf_target_defined
 #else
-	#undef bpf_target_defined
-#endif
 
 /* Fall back to what the compiler says */
-#ifndef bpf_target_defined
 #if defined(__x86_64__)
 	#define bpf_target_x86
+	#define bpf_target_defined
 #elif defined(__s390__)
 	#define bpf_target_s390
+	#define bpf_target_defined
 #elif defined(__arm__)
 	#define bpf_target_arm
+	#define bpf_target_defined
 #elif defined(__aarch64__)
 	#define bpf_target_arm64
+	#define bpf_target_defined
 #elif defined(__mips__)
 	#define bpf_target_mips
+	#define bpf_target_defined
 #elif defined(__powerpc__)
 	#define bpf_target_powerpc
+	#define bpf_target_defined
 #elif defined(__sparc__)
 	#define bpf_target_sparc
+	#define bpf_target_defined
+#endif /* no compiler target */
+
 #endif
+
+#ifndef __BPF_TARGET_MISSING
+#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\""
 #endif
 
 #if defined(bpf_target_x86)
@@ -287,7 +296,7 @@ struct pt_regs;
 #elif defined(bpf_target_sparc)
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
 #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
-#else
+#elif defined(bpf_target_defined)
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \
 	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
 #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \
@@ -295,6 +304,35 @@ struct pt_regs;
 			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif
 
+#if !defined(bpf_target_defined)
+
+#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#endif /* !defined(bpf_target_defined) */
+
 #ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
 #endif

From 1ad4f329fccb5d9eb7b0a38d7fdf0f4688c6b341 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 24 May 2021 10:11:58 +0800
Subject: [PATCH 2013/3292] PM / devfreq: userspace: Use DEVICE_ATTR_RW macro

Use DEVICE_ATTR_RW helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/governor_userspace.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 0fd6c4851071..ab9db7adb3ad 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -31,8 +31,8 @@ static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq)
 	return 0;
 }
 
-static ssize_t store_freq(struct device *dev, struct device_attribute *attr,
-			  const char *buf, size_t count)
+static ssize_t set_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
 {
 	struct devfreq *devfreq = to_devfreq(dev);
 	struct userspace_data *data;
@@ -52,8 +52,8 @@ static ssize_t store_freq(struct device *dev, struct device_attribute *attr,
 	return err;
 }
 
-static ssize_t show_freq(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static ssize_t set_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct devfreq *devfreq = to_devfreq(dev);
 	struct userspace_data *data;
@@ -70,7 +70,7 @@ static ssize_t show_freq(struct device *dev, struct device_attribute *attr,
 	return err;
 }
 
-static DEVICE_ATTR(set_freq, 0644, show_freq, store_freq);
+static DEVICE_ATTR_RW(set_freq);
 static struct attribute *dev_entries[] = {
 	&dev_attr_set_freq.attr,
 	NULL,

From 271ca53cb0c8b3a45c73e1140fc3336c2da42315 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Tue, 1 Jun 2021 05:23:18 +0300
Subject: [PATCH 2014/3292] dt-bindings: devfreq: tegra30-actmon: Convert to
 schema

Convert NVIDIA Tegra ACTMON binding to schema.

Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 .../arm/tegra/nvidia,tegra30-actmon.txt       |  57 ---------
 .../devfreq/nvidia,tegra30-actmon.yaml        | 121 ++++++++++++++++++
 2 files changed, 121 insertions(+), 57 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
 create mode 100644 Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml

diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
deleted file mode 100644
index 897eedfa2bc8..000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-NVIDIA Tegra Activity Monitor
-
-The activity monitor block collects statistics about the behaviour of other
-components in the system. This information can be used to derive the rate at
-which the external memory needs to be clocked in order to serve all requests
-from the monitored clients.
-
-Required properties:
-- compatible: should be "nvidia,tegra<chip>-actmon"
-- reg: offset and length of the register set for the device
-- interrupts: standard interrupt property
-- clocks: Must contain a phandle and clock specifier pair for each entry in
-clock-names. See ../../clock/clock-bindings.txt for details.
-- clock-names: Must include the following entries:
-  - actmon
-  - emc
-- resets: Must contain an entry for each entry in reset-names. See
-../../reset/reset.txt for details.
-- reset-names: Must include the following entries:
-  - actmon
-- operating-points-v2: See ../bindings/opp/opp.txt for details.
-- interconnects: Should contain entries for memory clients sitting on
-                 MC->EMC memory interconnect path.
-- interconnect-names: Should include name of the interconnect path for each
-                      interconnect entry. Consult TRM documentation for
-                      information about available memory clients, see MEMORY
-                      CONTROLLER section.
-
-For each opp entry in 'operating-points-v2' table:
-- opp-supported-hw: bitfield indicating SoC speedo ID mask
-- opp-peak-kBps: peak bandwidth of the memory channel
-
-Example:
-	dfs_opp_table: opp-table {
-		compatible = "operating-points-v2";
-
-		opp@12750000 {
-			opp-hz = /bits/ 64 <12750000>;
-			opp-supported-hw = <0x000F>;
-			opp-peak-kBps = <51000>;
-		};
-		...
-	};
-
-	actmon@6000c800 {
-		compatible = "nvidia,tegra124-actmon";
-		reg = <0x0 0x6000c800 0x0 0x400>;
-		interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
-			 <&tegra_car TEGRA124_CLK_EMC>;
-		clock-names = "actmon", "emc";
-		resets = <&tegra_car 119>;
-		reset-names = "actmon";
-		operating-points-v2 = <&dfs_opp_table>;
-		interconnects = <&mc TEGRA124_MC_MPCORER &emc>;
-		interconnect-names = "cpu";
-	};
diff --git a/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
new file mode 100644
index 000000000000..ba938eed28ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/devfreq/nvidia,tegra30-actmon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra30 Activity Monitor
+
+maintainers:
+  - Dmitry Osipenko <digetx@gmail.com>
+  - Jon Hunter <jonathanh@nvidia.com>
+  - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+  The activity monitor block collects statistics about the behaviour of other
+  components in the system. This information can be used to derive the rate at
+  which the external memory needs to be clocked in order to serve all requests
+  from the monitored clients.
+
+properties:
+  compatible:
+    enum:
+      - nvidia,tegra30-actmon
+      - nvidia,tegra114-actmon
+      - nvidia,tegra124-actmon
+      - nvidia,tegra210-actmon
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: actmon
+      - const: emc
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    items:
+      - const: actmon
+
+  interrupts:
+    maxItems: 1
+
+  interconnects:
+    minItems: 1
+    maxItems: 12
+
+  interconnect-names:
+    minItems: 1
+    maxItems: 12
+    description:
+      Should include name of the interconnect path for each interconnect
+      entry. Consult TRM documentation for information about available
+      memory clients, see MEMORY CONTROLLER and ACTIVITY MONITOR sections.
+
+  operating-points-v2:
+    description:
+      Should contain freqs and voltages and opp-supported-hw property, which
+      is a bitfield indicating SoC speedo ID mask.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - interrupts
+  - interconnects
+  - interconnect-names
+  - operating-points-v2
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/memory/tegra30-mc.h>
+
+    mc: memory-controller@7000f000 {
+        compatible = "nvidia,tegra30-mc";
+        reg = <0x7000f000 0x400>;
+        clocks = <&clk 32>;
+        clock-names = "mc";
+
+        interrupts = <0 77 4>;
+
+        #iommu-cells = <1>;
+        #reset-cells = <1>;
+        #interconnect-cells = <1>;
+    };
+
+    emc: external-memory-controller@7000f400 {
+        compatible = "nvidia,tegra30-emc";
+        reg = <0x7000f400 0x400>;
+        interrupts = <0 78 4>;
+        clocks = <&clk 57>;
+
+        nvidia,memory-controller = <&mc>;
+        operating-points-v2 = <&dvfs_opp_table>;
+        power-domains = <&domain>;
+
+        #interconnect-cells = <0>;
+    };
+
+    actmon@6000c800 {
+        compatible = "nvidia,tegra30-actmon";
+        reg = <0x6000c800 0x400>;
+        interrupts = <0 45 4>;
+        clocks = <&clk 119>, <&clk 57>;
+        clock-names = "actmon", "emc";
+        resets = <&rst 119>;
+        reset-names = "actmon";
+        operating-points-v2 = <&dvfs_opp_table>;
+        interconnects = <&mc TEGRA30_MC_MPCORER &emc>;
+        interconnect-names = "cpu-read";
+    };

From 6b61f55ecbe693d9d0d7ae14ebce01dabe10ecf1 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Tue, 1 Jun 2021 05:23:19 +0300
Subject: [PATCH 2015/3292] dt-bindings: devfreq: tegra30-actmon: Add
 cooling-cells

The ACTMON watches activity of memory clients. Decisions about a minimum
required frequency are made based on the info from ACTMON. We can use
ACTMON as a thermal cooling device by limiting the required frequency.
Document new cooling-cells property of NVIDIA Tegra ACTMON hardware unit.

Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 .../devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml   | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
index ba938eed28ee..e3379d106728 100644
--- a/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
+++ b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
@@ -63,6 +63,9 @@ properties:
       Should contain freqs and voltages and opp-supported-hw property, which
       is a bitfield indicating SoC speedo ID mask.
 
+  "#cooling-cells":
+    const: 2
+
 required:
   - compatible
   - reg
@@ -74,6 +77,7 @@ required:
   - interconnects
   - interconnect-names
   - operating-points-v2
+  - "#cooling-cells"
 
 additionalProperties: false
 
@@ -118,4 +122,5 @@ examples:
         operating-points-v2 = <&dvfs_opp_table>;
         interconnects = <&mc TEGRA30_MC_MPCORER &emc>;
         interconnect-names = "cpu-read";
+        #cooling-cells = <2>;
     };

From 28131e9d933339a92f78e7ab6429f4aaaa07061c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 16 Jun 2021 11:25:11 +0200
Subject: [PATCH 2016/3292] bpf: Fix up register-based shifts in interpreter to
 silence KUBSAN

syzbot reported a shift-out-of-bounds that KUBSAN observed in the
interpreter:

  [...]
  UBSAN: shift-out-of-bounds in kernel/bpf/core.c:1420:2
  shift exponent 255 is too large for 64-bit type 'long long unsigned int'
  CPU: 1 PID: 11097 Comm: syz-executor.4 Not tainted 5.12.0-rc2-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:79 [inline]
   dump_stack+0x141/0x1d7 lib/dump_stack.c:120
   ubsan_epilogue+0xb/0x5a lib/ubsan.c:148
   __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:327
   ___bpf_prog_run.cold+0x19/0x56c kernel/bpf/core.c:1420
   __bpf_prog_run32+0x8f/0xd0 kernel/bpf/core.c:1735
   bpf_dispatcher_nop_func include/linux/bpf.h:644 [inline]
   bpf_prog_run_pin_on_cpu include/linux/filter.h:624 [inline]
   bpf_prog_run_clear_cb include/linux/filter.h:755 [inline]
   run_filter+0x1a1/0x470 net/packet/af_packet.c:2031
   packet_rcv+0x313/0x13e0 net/packet/af_packet.c:2104
   dev_queue_xmit_nit+0x7c2/0xa90 net/core/dev.c:2387
   xmit_one net/core/dev.c:3588 [inline]
   dev_hard_start_xmit+0xad/0x920 net/core/dev.c:3609
   __dev_queue_xmit+0x2121/0x2e00 net/core/dev.c:4182
   __bpf_tx_skb net/core/filter.c:2116 [inline]
   __bpf_redirect_no_mac net/core/filter.c:2141 [inline]
   __bpf_redirect+0x548/0xc80 net/core/filter.c:2164
   ____bpf_clone_redirect net/core/filter.c:2448 [inline]
   bpf_clone_redirect+0x2ae/0x420 net/core/filter.c:2420
   ___bpf_prog_run+0x34e1/0x77d0 kernel/bpf/core.c:1523
   __bpf_prog_run512+0x99/0xe0 kernel/bpf/core.c:1737
   bpf_dispatcher_nop_func include/linux/bpf.h:644 [inline]
   bpf_test_run+0x3ed/0xc50 net/bpf/test_run.c:50
   bpf_prog_test_run_skb+0xabc/0x1c50 net/bpf/test_run.c:582
   bpf_prog_test_run kernel/bpf/syscall.c:3127 [inline]
   __do_sys_bpf+0x1ea9/0x4f00 kernel/bpf/syscall.c:4406
   do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
   entry_SYSCALL_64_after_hwframe+0x44/0xae
  [...]

Generally speaking, KUBSAN reports from the kernel should be fixed.
However, in case of BPF, this particular report caused concerns since
the large shift is not wrong from BPF point of view, just undefined.
In the verifier, K-based shifts that are >= {64,32} (depending on the
bitwidth of the instruction) are already rejected. The register-based
cases were not given their content might not be known at verification
time. Ideas such as verifier instruction rewrite with an additional
AND instruction for the source register were brought up, but regularly
rejected due to the additional runtime overhead they incur.

As Edward Cree rightly put it:

  Shifts by more than insn bitness are legal in the BPF ISA; they are
  implementation-defined behaviour [of the underlying architecture],
  rather than UB, and have been made legal for performance reasons.
  Each of the JIT backends compiles the BPF shift operations to machine
  instructions which produce implementation-defined results in such a
  case; the resulting contents of the register may be arbitrary but
  program behaviour as a whole remains defined.

  Guard checks in the fast path (i.e. affecting JITted code) will thus
  not be accepted.

  The case of division by zero is not truly analogous here, as division
  instructions on many of the JIT-targeted architectures will raise a
  machine exception / fault on division by zero, whereas (to the best
  of my knowledge) none will do so on an out-of-bounds shift.

Given the KUBSAN report only affects the BPF interpreter, but not JITs,
one solution is to add the ANDs with 63 or 31 into ___bpf_prog_run().
That would make the shifts defined, and thus shuts up KUBSAN, and the
compiler would optimize out the AND on any CPU that interprets the shift
amounts modulo the width anyway (e.g., confirmed from disassembly that
on x86-64 and arm64 the generated interpreter code is the same before
and after this fix).

The BPF interpreter is slow path, and most likely compiled out anyway
as distros select BPF_JIT_ALWAYS_ON to avoid speculative execution of
BPF instructions by the interpreter. Given the main argument was to
avoid sacrificing performance, the fact that the AND is optimized away
from compiler for mainstream archs helps as well as a solution moving
forward. Also add a comment on LSH/RSH/ARSH translation for JIT authors
to provide guidance when they see the ___bpf_prog_run() interpreter
code and use it as a model for a new JIT backend.

Reported-by: syzbot+bed360704c521841c85d@syzkaller.appspotmail.com
Reported-by: Kurt Manucredo <fuzzybritches0@gmail.com>
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Co-developed-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: syzbot+bed360704c521841c85d@syzkaller.appspotmail.com
Cc: Edward Cree <ecree.xilinx@gmail.com>
Link: https://lore.kernel.org/bpf/0000000000008f912605bd30d5d7@google.com
Link: https://lore.kernel.org/bpf/bac16d8d-c174-bdc4-91bd-bfa62b410190@gmail.com
---
 kernel/bpf/core.c | 61 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5e31ee9f7512..034ad93a1ad7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1392,29 +1392,54 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 select_insn:
 	goto *jumptable[insn->code];
 
-	/* ALU */
-#define ALU(OPCODE, OP)			\
-	ALU64_##OPCODE##_X:		\
-		DST = DST OP SRC;	\
-		CONT;			\
-	ALU_##OPCODE##_X:		\
-		DST = (u32) DST OP (u32) SRC;	\
-		CONT;			\
-	ALU64_##OPCODE##_K:		\
-		DST = DST OP IMM;		\
-		CONT;			\
-	ALU_##OPCODE##_K:		\
-		DST = (u32) DST OP (u32) IMM;	\
+	/* Explicitly mask the register-based shift amounts with 63 or 31
+	 * to avoid undefined behavior. Normally this won't affect the
+	 * generated code, for example, in case of native 64 bit archs such
+	 * as x86-64 or arm64, the compiler is optimizing the AND away for
+	 * the interpreter. In case of JITs, each of the JIT backends compiles
+	 * the BPF shift operations to machine instructions which produce
+	 * implementation-defined results in such a case; the resulting
+	 * contents of the register may be arbitrary, but program behaviour
+	 * as a whole remains defined. In other words, in case of JIT backends,
+	 * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
+	 */
+	/* ALU (shifts) */
+#define SHT(OPCODE, OP)					\
+	ALU64_##OPCODE##_X:				\
+		DST = DST OP (SRC & 63);		\
+		CONT;					\
+	ALU_##OPCODE##_X:				\
+		DST = (u32) DST OP ((u32) SRC & 31);	\
+		CONT;					\
+	ALU64_##OPCODE##_K:				\
+		DST = DST OP IMM;			\
+		CONT;					\
+	ALU_##OPCODE##_K:				\
+		DST = (u32) DST OP (u32) IMM;		\
+		CONT;
+	/* ALU (rest) */
+#define ALU(OPCODE, OP)					\
+	ALU64_##OPCODE##_X:				\
+		DST = DST OP SRC;			\
+		CONT;					\
+	ALU_##OPCODE##_X:				\
+		DST = (u32) DST OP (u32) SRC;		\
+		CONT;					\
+	ALU64_##OPCODE##_K:				\
+		DST = DST OP IMM;			\
+		CONT;					\
+	ALU_##OPCODE##_K:				\
+		DST = (u32) DST OP (u32) IMM;		\
 		CONT;
-
 	ALU(ADD,  +)
 	ALU(SUB,  -)
 	ALU(AND,  &)
 	ALU(OR,   |)
-	ALU(LSH, <<)
-	ALU(RSH, >>)
 	ALU(XOR,  ^)
 	ALU(MUL,  *)
+	SHT(LSH, <<)
+	SHT(RSH, >>)
+#undef SHT
 #undef ALU
 	ALU_NEG:
 		DST = (u32) -DST;
@@ -1439,13 +1464,13 @@ select_insn:
 		insn++;
 		CONT;
 	ALU_ARSH_X:
-		DST = (u64) (u32) (((s32) DST) >> SRC);
+		DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
 		CONT;
 	ALU_ARSH_K:
 		DST = (u64) (u32) (((s32) DST) >> IMM);
 		CONT;
 	ALU64_ARSH_X:
-		(*(s64 *) &DST) >>= SRC;
+		(*(s64 *) &DST) >>= (SRC & 63);
 		CONT;
 	ALU64_ARSH_K:
 		(*(s64 *) &DST) >>= IMM;

From f20792d425d2efd2680f2855c1e3fec01c2e569e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 16 Jun 2021 21:14:46 -0700
Subject: [PATCH 2017/3292] selftests/bpf: Fix selftests build with old
 system-wide headers

migrate_reuseport.c selftest relies on having TCP_FASTOPEN_CONNECT defined in
system-wide netinet/tcp.h. Selftests can use up-to-date uapi/linux/tcp.h, but
that one doesn't have SOL_TCP. So instead of switching everything to uapi
header, add #define for TCP_FASTOPEN_CONNECT to fix the build.

Fixes: c9d0bdef89a6 ("bpf: Test BPF_SK_REUSEPORT_SELECT_OR_MIGRATE.")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Link: https://lore.kernel.org/bpf/20210617041446.425283-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
index 0fa3f750567d..59adb4715394 100644
--- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -30,6 +30,10 @@
 #include "test_migrate_reuseport.skel.h"
 #include "network_helpers.h"
 
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
 #define IFINDEX_LO 1
 
 #define NR_SERVERS 5

From 24700e1f41f0dcbe389b8d9e5830aaca2192093c Mon Sep 17 00:00:00 2001
From: Daniel Scally <djrscally@gmail.com>
Date: Thu, 3 Jun 2021 23:40:07 +0100
Subject: [PATCH 2018/3292] mfd: tps68470: Remove tps68470 MFD driver

This driver only covered one scenario in which ACPI devices with _HID
INT3472 are found, and its functionality has been taken over by the
intel-skl-int3472 module, so remove it.

Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Scally <djrscally@gmail.com>
Link: https://lore.kernel.org/r/20210603224007.120560-7-djrscally@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/pmic/Kconfig |  2 +-
 drivers/gpio/Kconfig      |  2 +-
 drivers/mfd/Kconfig       | 18 --------
 drivers/mfd/Makefile      |  1 -
 drivers/mfd/tps68470.c    | 97 ---------------------------------------
 5 files changed, 2 insertions(+), 118 deletions(-)
 delete mode 100644 drivers/mfd/tps68470.c

diff --git a/drivers/acpi/pmic/Kconfig b/drivers/acpi/pmic/Kconfig
index 56bbcb2ce61b..f84b8f6038dc 100644
--- a/drivers/acpi/pmic/Kconfig
+++ b/drivers/acpi/pmic/Kconfig
@@ -52,7 +52,7 @@ endif	# PMIC_OPREGION
 
 config TPS68470_PMIC_OPREGION
 	bool "ACPI operation region support for TPS68470 PMIC"
-	depends on MFD_TPS68470
+	depends on INTEL_SKL_INT3472
 	help
 	  This config adds ACPI operation region support for TI TPS68470 PMIC.
 	  TPS68470 device is an advanced power management unit that powers
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 1dd0ec6727fd..10228abeee56 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1367,7 +1367,7 @@ config GPIO_TPS65912
 
 config GPIO_TPS68470
 	bool "TPS68470 GPIO"
-	depends on MFD_TPS68470
+	depends on INTEL_SKL_INT3472
 	help
 	  Select this option to enable GPIO driver for the TPS68470
 	  chip family.
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 5c7f2b100191..99c4e1a80ae0 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1499,24 +1499,6 @@ config MFD_TPS65217
 	  This driver can also be built as a module.  If so, the module
 	  will be called tps65217.
 
-config MFD_TPS68470
-	bool "TI TPS68470 Power Management / LED chips"
-	depends on ACPI && PCI && I2C=y
-	depends on I2C_DESIGNWARE_PLATFORM=y
-	select MFD_CORE
-	select REGMAP_I2C
-	help
-	  If you say yes here you get support for the TPS68470 series of
-	  Power Management / LED chips.
-
-	  These include voltage regulators, LEDs and other features
-	  that are often used in portable devices.
-
-	  This option is a bool as it provides an ACPI operation
-	  region, which must be available before any of the devices
-	  using this are probed. This option also configures the
-	  designware-i2c driver to be built-in, for the same reason.
-
 config MFD_TI_LP873X
 	tristate "TI LP873X Power Management IC"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 4f6d2b8a5f76..8b322d89a0c5 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -105,7 +105,6 @@ obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912-core.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
 obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
-obj-$(CONFIG_MFD_TPS68470)	+= tps68470.o
 obj-$(CONFIG_MFD_TPS80031)	+= tps80031.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
diff --git a/drivers/mfd/tps68470.c b/drivers/mfd/tps68470.c
deleted file mode 100644
index 4a4df4ffd18c..000000000000
--- a/drivers/mfd/tps68470.c
+++ /dev/null
@@ -1,97 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * TPS68470 chip Parent driver
- *
- * Copyright (C) 2017 Intel Corporation
- *
- * Authors:
- *	Rajmohan Mani <rajmohan.mani@intel.com>
- *	Tianshu Qiu <tian.shu.qiu@intel.com>
- *	Jian Xu Zheng <jian.xu.zheng@intel.com>
- *	Yuning Pu <yuning.pu@intel.com>
- */
-
-#include <linux/acpi.h>
-#include <linux/delay.h>
-#include <linux/i2c.h>
-#include <linux/init.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/tps68470.h>
-#include <linux/regmap.h>
-
-static const struct mfd_cell tps68470s[] = {
-	{ .name = "tps68470-gpio" },
-	{ .name = "tps68470_pmic_opregion" },
-};
-
-static const struct regmap_config tps68470_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.max_register = TPS68470_REG_MAX,
-};
-
-static int tps68470_chip_init(struct device *dev, struct regmap *regmap)
-{
-	unsigned int version;
-	int ret;
-
-	/* Force software reset */
-	ret = regmap_write(regmap, TPS68470_REG_RESET, TPS68470_REG_RESET_MASK);
-	if (ret)
-		return ret;
-
-	ret = regmap_read(regmap, TPS68470_REG_REVID, &version);
-	if (ret) {
-		dev_err(dev, "Failed to read revision register: %d\n", ret);
-		return ret;
-	}
-
-	dev_info(dev, "TPS68470 REVID: 0x%x\n", version);
-
-	return 0;
-}
-
-static int tps68470_probe(struct i2c_client *client)
-{
-	struct device *dev = &client->dev;
-	struct regmap *regmap;
-	int ret;
-
-	regmap = devm_regmap_init_i2c(client, &tps68470_regmap_config);
-	if (IS_ERR(regmap)) {
-		dev_err(dev, "devm_regmap_init_i2c Error %ld\n",
-			PTR_ERR(regmap));
-		return PTR_ERR(regmap);
-	}
-
-	i2c_set_clientdata(client, regmap);
-
-	ret = tps68470_chip_init(dev, regmap);
-	if (ret < 0) {
-		dev_err(dev, "TPS68470 Init Error %d\n", ret);
-		return ret;
-	}
-
-	ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, tps68470s,
-			      ARRAY_SIZE(tps68470s), NULL, 0, NULL);
-	if (ret < 0) {
-		dev_err(dev, "devm_mfd_add_devices failed: %d\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static const struct acpi_device_id tps68470_acpi_ids[] = {
-	{"INT3472"},
-	{},
-};
-
-static struct i2c_driver tps68470_driver = {
-	.driver = {
-		   .name = "tps68470",
-		   .acpi_match_table = tps68470_acpi_ids,
-	},
-	.probe_new = tps68470_probe,
-};
-builtin_i2c_driver(tps68470_driver);

From 7a2c4cc537fa9f05fe90812e7d789b9faf7eb869 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 8 Jun 2021 13:09:34 +0300
Subject: [PATCH 2019/3292] devm-helpers: Add resource managed version of work
 init

A few drivers which need a work-queue must cancel work at driver detach.
Some of those implement remove() solely for this purpose. Help drivers to
avoid unnecessary remove and error-branch implementation by adding managed
verision of work initialization. This will also help drivers to avoid
mixing manual and devm based unwinding when other resources are handled by
devm.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/94ff4175e7f2ff134ed2fa7d6e7641005cc9784b.1623146580.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/devm-helpers.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/include/linux/devm-helpers.h b/include/linux/devm-helpers.h
index f40f77717a24..74891802200d 100644
--- a/include/linux/devm-helpers.h
+++ b/include/linux/devm-helpers.h
@@ -51,4 +51,29 @@ static inline int devm_delayed_work_autocancel(struct device *dev,
 	return devm_add_action(dev, devm_delayed_work_drop, w);
 }
 
+static inline void devm_work_drop(void *res)
+{
+	cancel_work_sync(res);
+}
+
+/**
+ * devm_work_autocancel - Resource-managed work allocation
+ * @dev:	Device which lifetime work is bound to
+ * @w:		Work to be added (and automatically cancelled)
+ * @worker:	Worker function
+ *
+ * Initialize work which is automatically cancelled when driver is detached.
+ * A few drivers need to queue work which must be cancelled before driver
+ * is detached to avoid accessing removed resources.
+ * devm_work_autocancel() can be used to omit the explicit
+ * cancelleation when driver is detached.
+ */
+static inline int devm_work_autocancel(struct device *dev,
+				       struct work_struct *w,
+				       work_func_t worker)
+{
+	INIT_WORK(w, worker);
+	return devm_add_action(dev, devm_work_drop, w);
+}
+
 #endif

From 14ad76825f00b1471a7ec2eff30528d21ee2772b Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 8 Jun 2021 13:09:55 +0300
Subject: [PATCH 2020/3292] extcon: extcon-max14577: Fix potential work-queue
 cancellation race

The extcon IRQ schedules a work item. IRQ is requested using devm while
WQ is cancelld at remove(). This mixing of devm and manual unwinding has
potential case where the WQ has been emptied (.remove() was ran) but
devm unwinding of IRQ was not yet done. It is possible the IRQ is triggered
at this point scheduling new work item to the already flushed queue.

Use new devm_work_autocancel() to remove the remove() and to kill the bug.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Link: https://lore.kernel.org/r/ee8545f59ae3a93f0a70f640ecbd7e31cfadbcb9.1623146580.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/extcon/extcon-max14577.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/extcon/extcon-max14577.c b/drivers/extcon/extcon-max14577.c
index ace523924e58..5476f48ed74b 100644
--- a/drivers/extcon/extcon-max14577.c
+++ b/drivers/extcon/extcon-max14577.c
@@ -6,6 +6,7 @@
 // Chanwoo Choi <cw00.choi@samsung.com>
 // Krzysztof Kozlowski <krzk@kernel.org>
 
+#include <linux/devm-helpers.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
@@ -673,7 +674,10 @@ static int max14577_muic_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, info);
 	mutex_init(&info->mutex);
 
-	INIT_WORK(&info->irq_work, max14577_muic_irq_work);
+	ret = devm_work_autocancel(&pdev->dev, &info->irq_work,
+				   max14577_muic_irq_work);
+	if (ret)
+		return ret;
 
 	switch (max14577->dev_type) {
 	case MAXIM_DEVICE_TYPE_MAX77836:
@@ -766,15 +770,6 @@ static int max14577_muic_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int max14577_muic_remove(struct platform_device *pdev)
-{
-	struct max14577_muic_info *info = platform_get_drvdata(pdev);
-
-	cancel_work_sync(&info->irq_work);
-
-	return 0;
-}
-
 static const struct platform_device_id max14577_muic_id[] = {
 	{ "max14577-muic", MAXIM_DEVICE_TYPE_MAX14577, },
 	{ "max77836-muic", MAXIM_DEVICE_TYPE_MAX77836, },
@@ -797,7 +792,6 @@ static struct platform_driver max14577_muic_driver = {
 		.of_match_table = of_max14577_muic_dt_match,
 	},
 	.probe		= max14577_muic_probe,
-	.remove		= max14577_muic_remove,
 	.id_table	= max14577_muic_id,
 };
 

From 74047eaa2281982853afa144463ebe18d49022f2 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 8 Jun 2021 13:10:11 +0300
Subject: [PATCH 2021/3292] extcon: extcon-max77693.c: Fix potential work-queue
 cancellation race

The extcon IRQ schedules a work item. IRQ is requested using devm while
WQ is cancelld at remove(). This mixing of devm and manual unwinding has
potential case where the WQ has been emptied (.remove() was ran) but
devm unwinding of IRQ was not yet done. It may be possible the IRQ is
triggered at this point scheduling new work item to the already flushed
queue.

According to the input documentation the input device allocated by
devm_input_allocate_device() does not need to be explicitly unregistered.
Use the new devm_work_autocancel() and remove the remove() to simplify the
code.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Link: https://lore.kernel.org/r/cbe8205eed8276f6e6db5003cfe51b8b0d4ac966.1623146580.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/extcon/extcon-max77693.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 92af97e00828..1f1d9ab0c5c7 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -5,6 +5,7 @@
 // Copyright (C) 2012 Samsung Electrnoics
 // Chanwoo Choi <cw00.choi@samsung.com>
 
+#include <linux/devm-helpers.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
@@ -1127,7 +1128,10 @@ static int max77693_muic_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, info);
 	mutex_init(&info->mutex);
 
-	INIT_WORK(&info->irq_work, max77693_muic_irq_work);
+	ret = devm_work_autocancel(&pdev->dev, &info->irq_work,
+				   max77693_muic_irq_work);
+	if (ret)
+		return ret;
 
 	/* Support irq domain for MAX77693 MUIC device */
 	for (i = 0; i < ARRAY_SIZE(muic_irqs); i++) {
@@ -1254,22 +1258,11 @@ static int max77693_muic_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int max77693_muic_remove(struct platform_device *pdev)
-{
-	struct max77693_muic_info *info = platform_get_drvdata(pdev);
-
-	cancel_work_sync(&info->irq_work);
-	input_unregister_device(info->dock);
-
-	return 0;
-}
-
 static struct platform_driver max77693_muic_driver = {
 	.driver		= {
 		.name	= DEV_NAME,
 	},
 	.probe		= max77693_muic_probe,
-	.remove		= max77693_muic_remove,
 };
 
 module_platform_driver(max77693_muic_driver);

From 610bdc04830a864115e6928fc944f1171dfff6f3 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 8 Jun 2021 13:10:31 +0300
Subject: [PATCH 2022/3292] extcon: extcon-max8997: Fix IRQ freeing at error
 path

If reading MAX8997_MUIC_REG_STATUS1 fails at probe the driver exits
without freeing the requested IRQs.

Free the IRQs prior returning if reading the status fails.

Fixes: 3e34c8198960 ("extcon: max8997: Avoid forcing UART path on drive probe")
Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Link: https://lore.kernel.org/r/27ee4a48ee775c3f8c9d90459c18b6f2b15edc76.1623146580.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/extcon/extcon-max8997.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index e1408075ef7d..c15a612067af 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -733,7 +733,7 @@ static int max8997_muic_probe(struct platform_device *pdev)
 				2, info->status);
 	if (ret) {
 		dev_err(info->dev, "failed to read MUIC register\n");
-		return ret;
+		goto err_irq;
 	}
 	cable_type = max8997_muic_get_cable_type(info,
 					   MAX8997_CABLE_GROUP_ADC, &attached);

From 87ee8de23c9df3a368504f34cf3d7f9be9207717 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 8 Jun 2021 13:10:53 +0300
Subject: [PATCH 2023/3292] extcon: extcon-max8997: Simplify driver using devm

Simplify driver by switching to use the resource managed IRQ
requesting and resource managed work-queue initialization.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Link: https://lore.kernel.org/r/61190cc280a63baeb05ec570282bb3677bee8e7b.1623146580.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/extcon/extcon-max8997.c | 47 +++++++++++----------------------
 1 file changed, 16 insertions(+), 31 deletions(-)

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index c15a612067af..bbc592823570 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -5,6 +5,7 @@
 //  Copyright (C) 2012 Samsung Electronics
 //  Donggeun Kim <dg77.kim@samsung.com>
 
+#include <linux/devm-helpers.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
@@ -650,27 +651,30 @@ static int max8997_muic_probe(struct platform_device *pdev)
 	mutex_init(&info->mutex);
 
 	INIT_WORK(&info->irq_work, max8997_muic_irq_work);
+	ret = devm_work_autocancel(&pdev->dev, &info->irq_work,
+				   max8997_muic_irq_work);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < ARRAY_SIZE(muic_irqs); i++) {
 		struct max8997_muic_irq *muic_irq = &muic_irqs[i];
 		unsigned int virq = 0;
 
 		virq = irq_create_mapping(max8997->irq_domain, muic_irq->irq);
-		if (!virq) {
-			ret = -EINVAL;
-			goto err_irq;
-		}
+		if (!virq)
+			return -EINVAL;
+
 		muic_irq->virq = virq;
 
-		ret = request_threaded_irq(virq, NULL,
-				max8997_muic_irq_handler,
-				IRQF_NO_SUSPEND,
-				muic_irq->name, info);
+		ret = devm_request_threaded_irq(&pdev->dev, virq, NULL,
+						max8997_muic_irq_handler,
+						IRQF_NO_SUSPEND,
+						muic_irq->name, info);
 		if (ret) {
 			dev_err(&pdev->dev,
 				"failed: irq request (IRQ: %d, error :%d)\n",
 				muic_irq->irq, ret);
-			goto err_irq;
+			return ret;
 		}
 	}
 
@@ -678,14 +682,13 @@ static int max8997_muic_probe(struct platform_device *pdev)
 	info->edev = devm_extcon_dev_allocate(&pdev->dev, max8997_extcon_cable);
 	if (IS_ERR(info->edev)) {
 		dev_err(&pdev->dev, "failed to allocate memory for extcon\n");
-		ret = PTR_ERR(info->edev);
-		goto err_irq;
+		return PTR_ERR(info->edev);
 	}
 
 	ret = devm_extcon_dev_register(&pdev->dev, info->edev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register extcon device\n");
-		goto err_irq;
+		return ret;
 	}
 
 	if (pdata && pdata->muic_pdata) {
@@ -733,7 +736,7 @@ static int max8997_muic_probe(struct platform_device *pdev)
 				2, info->status);
 	if (ret) {
 		dev_err(info->dev, "failed to read MUIC register\n");
-		goto err_irq;
+		return ret;
 	}
 	cable_type = max8997_muic_get_cable_type(info,
 					   MAX8997_CABLE_GROUP_ADC, &attached);
@@ -756,23 +759,6 @@ static int max8997_muic_probe(struct platform_device *pdev)
 			delay_jiffies);
 
 	return 0;
-
-err_irq:
-	while (--i >= 0)
-		free_irq(muic_irqs[i].virq, info);
-	return ret;
-}
-
-static int max8997_muic_remove(struct platform_device *pdev)
-{
-	struct max8997_muic_info *info = platform_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(muic_irqs); i++)
-		free_irq(muic_irqs[i].virq, info);
-	cancel_work_sync(&info->irq_work);
-
-	return 0;
 }
 
 static struct platform_driver max8997_muic_driver = {
@@ -780,7 +766,6 @@ static struct platform_driver max8997_muic_driver = {
 		.name	= DEV_NAME,
 	},
 	.probe		= max8997_muic_probe,
-	.remove		= max8997_muic_remove,
 };
 
 module_platform_driver(max8997_muic_driver);

From ccb5ecdc2ddeaff744ee075b54cdff8a689e8fa7 Mon Sep 17 00:00:00 2001
From: Xiaofei Tan <tanxiaofei@huawei.com>
Date: Fri, 11 Jun 2021 20:37:07 +0800
Subject: [PATCH 2024/3292] ACPI: APEI: fix synchronous external aborts in
 user-mode

Before commit 8fcc4ae6faf8 ("arm64: acpi: Make apei_claim_sea()
synchronise with APEI's irq work"), do_sea() would unconditionally
signal the affected task from the arch code. Since that change,
the GHES driver sends the signals.

This exposes a problem as errors the GHES driver doesn't understand
or doesn't handle effectively are silently ignored. It will cause
the errors get taken again, and circulate endlessly. User-space task
get stuck in this loop.

Existing firmware on Kunpeng9xx systems reports cache errors with the
'ARM Processor Error' CPER records.

Do memory failure handling for ARM Processor Error Section just like
for Memory Error Section.

Fixes: 8fcc4ae6faf8 ("arm64: acpi: Make apei_claim_sea() synchronise with APEI's irq work")
Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
[ rjw: Subject edit ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 81 +++++++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 17 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index fce7ade2aba9..0c8330ed1ffd 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -441,28 +441,35 @@ static void ghes_kick_task_work(struct callback_head *head)
 	gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
 }
 
-static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
-				       int sev)
+static bool ghes_do_memory_failure(u64 physical_addr, int flags)
 {
 	unsigned long pfn;
-	int flags = -1;
-	int sec_sev = ghes_severity(gdata->error_severity);
-	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 
 	if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
 		return false;
 
-	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
-		return false;
-
-	pfn = mem_err->physical_addr >> PAGE_SHIFT;
+	pfn = PHYS_PFN(physical_addr);
 	if (!pfn_valid(pfn)) {
 		pr_warn_ratelimited(FW_WARN GHES_PFX
 		"Invalid address in generic error data: %#llx\n",
-		mem_err->physical_addr);
+		physical_addr);
 		return false;
 	}
 
+	memory_failure_queue(pfn, flags);
+	return true;
+}
+
+static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+				       int sev)
+{
+	int flags = -1;
+	int sec_sev = ghes_severity(gdata->error_severity);
+	struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+		return false;
+
 	/* iff following two events can be handled properly by now */
 	if (sec_sev == GHES_SEV_CORRECTED &&
 	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
@@ -470,14 +477,56 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
 	if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
 		flags = 0;
 
-	if (flags != -1) {
-		memory_failure_queue(pfn, flags);
-		return true;
-	}
+	if (flags != -1)
+		return ghes_do_memory_failure(mem_err->physical_addr, flags);
 
 	return false;
 }
 
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+{
+	struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+	bool queued = false;
+	int sec_sev, i;
+	char *p;
+
+	log_arm_hw_error(err);
+
+	sec_sev = ghes_severity(gdata->error_severity);
+	if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
+		return false;
+
+	p = (char *)(err + 1);
+	for (i = 0; i < err->err_info_num; i++) {
+		struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
+		bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+		bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
+		const char *error_type = "unknown error";
+
+		/*
+		 * The field (err_info->error_info & BIT(26)) is fixed to set to
+		 * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
+		 * firmware won't mix corrected errors in an uncorrected section,
+		 * and don't filter out 'corrected' error here.
+		 */
+		if (is_cache && has_pa) {
+			queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+			p += err_info->length;
+			continue;
+		}
+
+		if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
+			error_type = cper_proc_error_type_strs[err_info->type];
+
+		pr_warn_ratelimited(FW_WARN GHES_PFX
+				    "Unhandled processor error type: %s\n",
+				    error_type);
+		p += err_info->length;
+	}
+
+	return queued;
+}
+
 /*
  * PCIe AER errors need to be sent to the AER driver for reporting and
  * recovery. The GHES severities map to the following AER severities and
@@ -605,9 +654,7 @@ static bool ghes_do_proc(struct ghes *ghes,
 			ghes_handle_aer(gdata);
 		}
 		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
-			struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
-
-			log_arm_hw_error(err);
+			queued = ghes_handle_arm_hw_error(gdata, sev);
 		} else {
 			void *err = acpi_hest_get_payload(gdata);
 

From 795e0e38de2c36561a4f14e6e97b8a82f6f2e03c Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 15 Jun 2021 19:49:20 +0800
Subject: [PATCH 2025/3292] cpuidle: teo: remove unneeded semicolon in
 teo_select()

Fix following coccicheck warning:
drivers/cpuidle/governors/teo.c:315:10-11: Unneeded semicolon

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 1e0b2f828abb..7b91060e82f6 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -312,7 +312,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 	/* Check if there is any choice in the first place. */
 	if (drv->state_count < 2) {
-		idx = 0;;
+		idx = 0;
 		goto end;
 	}
 	if (!dev->states_usage[0].disable) {

From 6f2f92c6ae4261c6c3dc00a0397f70fa0ac267fb Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Tue, 15 Jun 2021 15:19:14 +0800
Subject: [PATCH 2026/3292] ACPI: sysfs: fix doc warnings in device_sysfs.c

Fixes the following W=1 kernel build warning(s):

 drivers/acpi/device_sysfs.c:278: warning: Function parameter or
  member 'dev' not described in 'acpi_device_uevent_modalias'
 drivers/acpi/device_sysfs.c:278: warning: Function parameter or
  member 'env' not described in 'acpi_device_uevent_modalias'
 drivers/acpi/device_sysfs.c:323: warning: Function parameter or
  member 'dev' not described in 'acpi_device_modalias'
 drivers/acpi/device_sysfs.c:323: warning: Function parameter or
  member 'buf' not described in 'acpi_device_modalias'
 drivers/acpi/device_sysfs.c:323: warning: Function parameter or
  member 'size' not described in 'acpi_device_modalias'

Signed-off-by: Baokun Li <libaokun1@huawei.com>
[ rjw: Fix spelling: acpi -> ACPI ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_sysfs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index a393e0e09381..61271e61c307 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -268,6 +268,8 @@ int __acpi_device_uevent_modalias(struct acpi_device *adev,
 
 /**
  * acpi_device_uevent_modalias - uevent modalias for ACPI-enumerated devices.
+ * @dev: Struct device to get ACPI device node.
+ * @env: Environment variables of the kobject uevent.
  *
  * Create the uevent modalias field for ACPI-enumerated devices.
  *
@@ -313,6 +315,9 @@ static int __acpi_device_modalias(struct acpi_device *adev, char *buf, int size)
 
 /**
  * acpi_device_modalias - modalias sysfs attribute for ACPI-enumerated devices.
+ * @dev: Struct device to get ACPI device node.
+ * @buf: The buffer to save pnp_modalias and of_modalias.
+ * @size: Size of buffer.
  *
  * Create the modalias sysfs attribute for ACPI-enumerated devices.
  *

From 120f4aa80b4cac2ae082666114a36c6c363b9df2 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Tue, 15 Jun 2021 15:43:17 +0800
Subject: [PATCH 2027/3292] ACPI: NVS: fix doc warnings in nvs.c

Fixes the following W=1 kernel build warning(s):

 drivers/acpi/nvs.c:94: warning: Function parameter or
  member 'start' not described in 'suspend_nvs_register'
 drivers/acpi/nvs.c:94: warning: Function parameter or
  member 'size' not described in 'suspend_nvs_register'

Signed-off-by: Baokun Li <libaokun1@huawei.com>
[ rjw: Subject edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/nvs.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/nvs.c b/drivers/acpi/nvs.c
index 9f8712a557b3..edd87c29ca3f 100644
--- a/drivers/acpi/nvs.c
+++ b/drivers/acpi/nvs.c
@@ -82,13 +82,13 @@ struct nvs_page {
 static LIST_HEAD(nvs_list);
 
 /**
- *	suspend_nvs_register - register platform NVS memory region to save
- *	@start - physical address of the region
- *	@size - size of the region
+ * suspend_nvs_register - register platform NVS memory region to save
+ * @start: Physical address of the region.
+ * @size: Size of the region.
  *
- *	The NVS region need not be page-aligned (both ends) and we arrange
- *	things so that the data from page-aligned addresses in this region will
- *	be copied into separate RAM pages.
+ * The NVS region need not be page-aligned (both ends) and we arrange
+ * things so that the data from page-aligned addresses in this region will
+ * be copied into separate RAM pages.
  */
 static int suspend_nvs_register(unsigned long start, unsigned long size)
 {
@@ -123,7 +123,7 @@ static int suspend_nvs_register(unsigned long start, unsigned long size)
 }
 
 /**
- *	suspend_nvs_free - free data pages allocated for saving NVS regions
+ * suspend_nvs_free - free data pages allocated for saving NVS regions
  */
 void suspend_nvs_free(void)
 {
@@ -147,7 +147,7 @@ void suspend_nvs_free(void)
 }
 
 /**
- *	suspend_nvs_alloc - allocate memory necessary for saving NVS regions
+ * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
  */
 int suspend_nvs_alloc(void)
 {
@@ -164,7 +164,7 @@ int suspend_nvs_alloc(void)
 }
 
 /**
- *	suspend_nvs_save - save NVS memory regions
+ * suspend_nvs_save - save NVS memory regions
  */
 int suspend_nvs_save(void)
 {
@@ -193,10 +193,10 @@ int suspend_nvs_save(void)
 }
 
 /**
- *	suspend_nvs_restore - restore NVS memory regions
+ * suspend_nvs_restore - restore NVS memory regions
  *
- *	This function is going to be called with interrupts disabled, so it
- *	cannot iounmap the virtual addresses used to access the NVS region.
+ * This function is going to be called with interrupts disabled, so it
+ * cannot iounmap the virtual addresses used to access the NVS region.
  */
 void suspend_nvs_restore(void)
 {

From d7a188bb87d8da78a1ef0dea53f1639f3daf0790 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 15 Jun 2021 17:21:57 +0000
Subject: [PATCH 2028/3292] ACPI: PRM: make symbol 'prm_module_list' static

The sparse tool complains as follows:

drivers/acpi/prmt.c:53:1: warning:
 symbol 'prm_module_list' was not declared. Should it be static?

This symbol is not used outside of prmt.c, so marks it static.

Fixes: cefc7ca46235 ("ACPI: PRM: implement OperationRegion handler for the PlatformRtMechanism subtype")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/prmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c
index 33c274698d07..31cf9aee5edd 100644
--- a/drivers/acpi/prmt.c
+++ b/drivers/acpi/prmt.c
@@ -50,7 +50,7 @@ struct prm_context_buffer {
 #pragma pack()
 
 
-LIST_HEAD(prm_module_list);
+static LIST_HEAD(prm_module_list);
 
 struct prm_handler_info {
 	guid_t guid;

From a0aac973a26d1ac814b9e131e209eb39472a67ce Mon Sep 17 00:00:00 2001
From: JK Kim <jongkang.kim2@gmail.com>
Date: Thu, 17 Jun 2021 15:02:17 +0900
Subject: [PATCH 2029/3292] nvme-pci: fix var. type for increasing cq_head

nvmeq->cq_head is compared with nvmeq->q_depth and changed the value
and cq_phase for handling the next cq db.

but, nvmeq->q_depth's type is u32 and max. value is 0x10000 when
CQP.MSQE is 0xffff and io_queue_depth is 0x10000.

current temp. variable for comparing with nvmeq->q_depth is overflowed
when previous nvmeq->cq_head is 0xffff.

in this case, nvmeq->cq_phase is not updated.
so, fix data type for temp. variable to u32.

Signed-off-by: JK Kim <jongkang.kim2@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8fbc4c87a0d8..5a72bdf5ad03 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1032,7 +1032,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 
 static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
 {
-	u16 tmp = nvmeq->cq_head + 1;
+	u32 tmp = nvmeq->cq_head + 1;
 
 	if (tmp == nvmeq->q_depth) {
 		nvmeq->cq_head = 0;

From cb1b10e7ac6c1438247ee3c7e4a2f2332a77ba07 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 7 Jun 2021 12:54:54 -0700
Subject: [PATCH 2030/3292] nvme-pci: remove trailing lines for helpers

Remove the extra white line at the end of the functions.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5a72bdf5ad03..138e7e7453dd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -559,7 +559,6 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
 		dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
 		dma_addr = next_dma_addr;
 	}
-
 }
 
 static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
@@ -576,7 +575,6 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
 		dma_pool_free(dev->prp_page_pool, sg_list, dma_addr);
 		dma_addr = next_dma_addr;
 	}
-
 }
 
 static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req)

From 73eefc270afa1f27d82c42fdb34562d07a834b40 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:28:23 -0700
Subject: [PATCH 2031/3292] nvme: add a helper to check ctrl sgl support

For various transports such as fc/tcp/pci it is common to check if
NVMe SGLs are supported or not by the controller.

In this preparation patch we add a helper to avoid the open coding of
such checks in the various transport.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/nvme.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1f397ecba16c..75420ceacc10 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -869,6 +869,11 @@ static inline void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
 }
 #endif
 
+static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
+{
+	return ctrl->sgls & ((1 << 0) | (1 << 1));
+}
+
 u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			 u8 opcode);
 void nvme_execute_passthru_rq(struct request *rq);

From b61678bcd43c6686a6d0cf965934a54b4225821d Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:28:24 -0700
Subject: [PATCH 2032/3292] nvme-fc: use ctrl sgl check helper

Use the helper to check NVMe controller's SGL support.

Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 256e87721a01..8a3c4814d21b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3111,7 +3111,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	/* FC-NVME supports normal SGL Data Block Descriptors */
-	if (!(ctrl->ctrl.sgls & ((1 << 0) | (1 << 1)))) {
+	if (!nvme_ctrl_sgl_supported(&ctrl->ctrl)) {
 		dev_err(ctrl->ctrl.device,
 			"Mandatory sgls are not supported!\n");
 		goto out_disconnect_admin_queue;

From 253a0b76a12a4cce14095b3d74004e67a6434d79 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:28:25 -0700
Subject: [PATCH 2033/3292] nvme-pci: use ctrl sgl check helper

Use the helper to check NVMe controller's SGL support.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 138e7e7453dd..12ffd58c27b1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -536,7 +536,7 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
 
 	avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
 
-	if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
+	if (!nvme_ctrl_sgl_supported(&dev->ctrl))
 		return false;
 	if (!iod->nvmeq->qid)
 		return false;
@@ -853,7 +853,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 							     &cmnd->rw, &bv);
 
 			if (iod->nvmeq->qid && sgl_threshold &&
-			    dev->ctrl.sgls & ((1 << 0) | (1 << 1)))
+			    nvme_ctrl_sgl_supported(&dev->ctrl))
 				return nvme_setup_sgl_simple(dev, req,
 							     &cmnd->rw, &bv);
 		}

From 3b54064fbce73a4dada6019dd400f0ce28ab5eb9 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:28:26 -0700
Subject: [PATCH 2034/3292] nvme-tcp: use ctrl sgl check helper

Use the helper to check NVMe controller's SGL support.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 6a65b0516180..c7bd37103cf4 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1993,7 +1993,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
 		goto destroy_admin;
 	}
 
-	if (!(ctrl->sgls & ((1 << 0) | (1 << 1)))) {
+	if (!nvme_ctrl_sgl_supported(ctrl)) {
 		ret = -EOPNOTSUPP;
 		dev_err(ctrl->device, "Mandatory sgls are not supported!\n");
 		goto destroy_admin;

From 2796a8e409429a67daeb813ed270eb645f56f817 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 14 Jun 2021 19:45:51 -0700
Subject: [PATCH 2035/3292] nvme-fabrics: remove memset in nvmf_reg_read64()

Declare and initialize structure variable to the zero values so that we
can get rid of the zeroout memset call.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 1239a63e3ac2..4753f1e5505e 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -190,11 +190,10 @@ EXPORT_SYMBOL_GPL(nvmf_reg_read32);
  */
 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 {
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 	union nvme_result res;
 	int ret;
 
-	memset(&cmd, 0, sizeof(cmd));
 	cmd.prop_get.opcode = nvme_fabrics_command;
 	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
 	cmd.prop_get.attrib = 1;

From c22c2720133d51d95da608a77cd703f29d29747e Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 14 Jun 2021 19:45:52 -0700
Subject: [PATCH 2036/3292] nvme-fabrics: remove memset in nvmf_reg_write32()

Declare and initialize structure variable to the zero values so that we
can get rid of the zeroout memset call.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 4753f1e5505e..09fe3d97bf44 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -235,10 +235,9 @@ EXPORT_SYMBOL_GPL(nvmf_reg_read64);
  */
 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 {
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 	int ret;
 
-	memset(&cmd, 0, sizeof(cmd));
 	cmd.prop_set.opcode = nvme_fabrics_command;
 	cmd.prop_set.fctype = nvme_fabrics_type_property_set;
 	cmd.prop_set.attrib = 0;

From bfa9d1222d6185a4aea603ebc7d74d75c747087c Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 14 Jun 2021 19:45:53 -0700
Subject: [PATCH 2037/3292] nvme-fabrics: remove memset in connect admin q

Declare and initialize structure variable to the zero values so that we
can get rid of the zeroout memset call.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 09fe3d97bf44..43c797e3380b 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -362,12 +362,11 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
  */
 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
 {
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 	union nvme_result res;
 	struct nvmf_connect_data *data;
 	int ret;
 
-	memset(&cmd, 0, sizeof(cmd));
 	cmd.connect.opcode = nvme_fabrics_command;
 	cmd.connect.fctype = nvme_fabrics_type_connect;
 	cmd.connect.qid = 0;

From eff4423ec0b03fedb8b7b420549ed8e424d246f1 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 14 Jun 2021 19:45:54 -0700
Subject: [PATCH 2038/3292] nvme-fabrics: remove memset in connect io q

Declare and initialize structure variable to the zero values so that we
can get rid of the zeroout memset call.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 43c797e3380b..1e6a7cc056ca 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -429,12 +429,11 @@ EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue);
  */
 int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll)
 {
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 	struct nvmf_connect_data *data;
 	union nvme_result res;
 	int ret;
 
-	memset(&cmd, 0, sizeof(cmd));
 	cmd.connect.opcode = nvme_fabrics_command;
 	cmd.connect.fctype = nvme_fabrics_type_connect;
 	cmd.connect.qid = cpu_to_le16(qid);

From 2a4a910aa4f0acc428dc8d10227c42e14ed21d10 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 25 May 2021 14:54:14 +0200
Subject: [PATCH 2039/3292] nvmet-fc: do not check for invalid target port in
 nvmet_fc_handle_fcp_rqst()

When parsing a request in nvmet_fc_handle_fcp_rqst() we should not
check for invalid target ports; if we do the command is aborted
from the fcp layer, causing the host to assume a transport error.
Rather we should still forward this request to the nvmet layer, which
will then correctly fail the command with an appropriate error status.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 19e113240fff..22b5108168a6 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2510,13 +2510,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	u32 xfrlen = be32_to_cpu(cmdiu->data_len);
 	int ret;
 
-	/*
-	 * if there is no nvmet mapping to the targetport there
-	 * shouldn't be requests. just terminate them.
-	 */
-	if (!tgtport->pe)
-		goto transport_error;
-
 	/*
 	 * Fused commands are currently not supported in the linux
 	 * implementation.
@@ -2544,7 +2537,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 
 	fod->req.cmd = &fod->cmdiubuf.sqe;
 	fod->req.cqe = &fod->rspiubuf.cqe;
-	fod->req.port = tgtport->pe->port;
+	if (tgtport->pe)
+		fod->req.port = tgtport->pe->port;
 
 	/* clear any response payload */
 	memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));

From e13b061589ace0aee18bdbf86f3ddb2b6b5b5ab8 Mon Sep 17 00:00:00 2001
From: Noam Gottlieb <ngottlieb@nvidia.com>
Date: Mon, 7 Jun 2021 12:23:21 +0300
Subject: [PATCH 2040/3292] nvmet: change sn size and check validity

According to the NVM specification, the serial_number should be 20 bytes
(bytes 23:04 of the Identify Controller data structure), and should
contain only ASCII characters.

In accordance, the serial_number size is changed to 20 bytes and before
any attempt to store a new value in serial_number we check that the
input is valid - i.e. contains only ASCII characters, is not empty and
does not exceed 20 bytes.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Noam Gottlieb <ngottlieb@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c |  4 +---
 drivers/nvme/target/configfs.c  | 33 +++++++++++++++++++++++----------
 drivers/nvme/target/core.c      |  4 +++-
 drivers/nvme/target/discovery.c |  4 +---
 drivers/nvme/target/nvmet.h     |  3 ++-
 5 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index dcd49a72f2f3..9c73dbfb8228 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -357,9 +357,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->vid = 0;
 	id->ssvid = 0;
 
-	memset(id->sn, ' ', sizeof(id->sn));
-	bin2hex(id->sn, &ctrl->subsys->serial,
-		min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
+	memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE);
 	memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
 		       strlen(subsys->model_number), ' ');
 	memcpy_and_pad(id->fr, sizeof(id->fr),
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 65a0cf99f557..027b28aaf7cd 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1030,24 +1030,43 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_version);
 
+/* See Section 1.5 of NVMe 1.4 */
+static bool nvmet_is_ascii(const char c)
+{
+	return c >= 0x20 && c <= 0x7e;
+}
+
 static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item,
 					     char *page)
 {
 	struct nvmet_subsys *subsys = to_subsys(item);
 
-	return snprintf(page, PAGE_SIZE, "%llx\n", subsys->serial);
+	return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial);
 }
 
 static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
 					      const char *page, size_t count)
 {
-	u64 serial;
+	struct nvmet_subsys *subsys = to_subsys(item);
+	int pos, len = strcspn(page, "\n");
 
-	if (sscanf(page, "%llx\n", &serial) != 1)
+	if (!len || len > NVMET_SN_MAX_SIZE) {
+		pr_err("Serial Number can not be empty or exceed %d Bytes\n",
+		       NVMET_SN_MAX_SIZE);
 		return -EINVAL;
+	}
+
+	for (pos = 0; pos < len; pos++) {
+		if (!nvmet_is_ascii(page[pos])) {
+			pr_err("Serial Number must contain only ASCII strings\n");
+			return -EINVAL;
+		}
+	}
 
 	down_write(&nvmet_config_sem);
-	to_subsys(item)->serial = serial;
+	mutex_lock(&subsys->lock);
+	memcpy_and_pad(subsys->serial, NVMET_SN_MAX_SIZE, page, len, ' ');
+	mutex_unlock(&subsys->lock);
 	up_write(&nvmet_config_sem);
 
 	return count;
@@ -1128,12 +1147,6 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
 	return ret;
 }
 
-/* See Section 1.5 of NVMe 1.4 */
-static bool nvmet_is_ascii(const char c)
-{
-	return c >= 0x20 && c <= 0x7e;
-}
-
 static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
 		const char *page, size_t count)
 {
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 4ae4bea6625d..213a0c2af4f7 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1493,6 +1493,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 		enum nvme_subsys_type type)
 {
 	struct nvmet_subsys *subsys;
+	char serial[NVMET_SN_MAX_SIZE / 2];
 
 	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
 	if (!subsys)
@@ -1500,7 +1501,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 
 	subsys->ver = NVMET_DEFAULT_VS;
 	/* generate a random serial number as our controllers are ephemeral: */
-	get_random_bytes(&subsys->serial, sizeof(subsys->serial));
+	get_random_bytes(&serial, sizeof(serial));
+	bin2hex(subsys->serial, &serial, sizeof(serial));
 
 	switch (type) {
 	case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index fc3645fc2c24..b7fdad13094a 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -262,9 +262,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
 		goto out;
 	}
 
-	memset(id->sn, ' ', sizeof(id->sn));
-	bin2hex(id->sn, &ctrl->subsys->serial,
-		min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
+	memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE);
 	memset(id->fr, ' ', sizeof(id->fr));
 	memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
 	memcpy_and_pad(id->fr, sizeof(id->fr),
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index d69a409515d6..0ae809ca428c 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -28,6 +28,7 @@
 #define NVMET_NO_ERROR_LOC		((u16)-1)
 #define NVMET_DEFAULT_CTRL_MODEL	"Linux"
 #define NVMET_MN_MAX_SIZE		40
+#define NVMET_SN_MAX_SIZE		20
 
 /*
  * Supported optional AENs:
@@ -229,7 +230,7 @@ struct nvmet_subsys {
 	u16			max_qid;
 
 	u64			ver;
-	u64			serial;
+	char			serial[NVMET_SN_MAX_SIZE];
 	char			*subsysnqn;
 	bool			pi_support;
 

From 7ae023c5aa644211bde26db11018fe08b8408bd5 Mon Sep 17 00:00:00 2001
From: Noam Gottlieb <ngottlieb@nvidia.com>
Date: Mon, 7 Jun 2021 12:23:22 +0300
Subject: [PATCH 2041/3292] nvmet: make sn stable once connection was
 established

Once some host has connected to the target, make sure that the serial
number is stable and cannot be changed.

Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Noam Gottlieb <ngottlieb@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c |  6 ++++++
 drivers/nvme/target/configfs.c  | 27 ++++++++++++++++++++++-----
 drivers/nvme/target/nvmet.h     |  1 +
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 9c73dbfb8228..5f15c3285a5b 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -337,6 +337,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	u32 cmd_capsule_size;
 	u16 status = 0;
 
+	if (!subsys->subsys_discovered) {
+		mutex_lock(&subsys->lock);
+		subsys->subsys_discovered = true;
+		mutex_unlock(&subsys->lock);
+	}
+
 	/*
 	 * If there is no model number yet, set it now.  It will then remain
 	 * stable for the life time of the subsystem.
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 027b28aaf7cd..a13da86fb374 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1044,12 +1044,18 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item,
 	return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial);
 }
 
-static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
-					      const char *page, size_t count)
+static ssize_t
+nvmet_subsys_attr_serial_store_locked(struct nvmet_subsys *subsys,
+		const char *page, size_t count)
 {
-	struct nvmet_subsys *subsys = to_subsys(item);
 	int pos, len = strcspn(page, "\n");
 
+	if (subsys->subsys_discovered) {
+		pr_err("Can't set serial number. %s is already assigned\n",
+		       subsys->serial);
+		return -EINVAL;
+	}
+
 	if (!len || len > NVMET_SN_MAX_SIZE) {
 		pr_err("Serial Number can not be empty or exceed %d Bytes\n",
 		       NVMET_SN_MAX_SIZE);
@@ -1063,13 +1069,24 @@ static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
 		}
 	}
 
+	memcpy_and_pad(subsys->serial, NVMET_SN_MAX_SIZE, page, len, ' ');
+
+	return count;
+}
+
+static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
+					      const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+	ssize_t ret;
+
 	down_write(&nvmet_config_sem);
 	mutex_lock(&subsys->lock);
-	memcpy_and_pad(subsys->serial, NVMET_SN_MAX_SIZE, page, len, ' ');
+	ret = nvmet_subsys_attr_serial_store_locked(subsys, page, count);
 	mutex_unlock(&subsys->lock);
 	up_write(&nvmet_config_sem);
 
-	return count;
+	return ret;
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_serial);
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 0ae809ca428c..bd0a0b91d843 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -231,6 +231,7 @@ struct nvmet_subsys {
 
 	u64			ver;
 	char			serial[NVMET_SN_MAX_SIZE];
+	bool			subsys_discovered;
 	char			*subsysnqn;
 	bool			pi_support;
 

From 0d148efdf0f0414b2ed2dd9c31e71302bb9ce123 Mon Sep 17 00:00:00 2001
From: Noam Gottlieb <ngottlieb@nvidia.com>
Date: Mon, 7 Jun 2021 12:23:23 +0300
Subject: [PATCH 2042/3292] nvmet: allow mn change if subsys not discovered

Currently, once the subsystem's model_number is set for the first time
there is no way to change it. However, as long as no connection was
established to nvmf target, there is no reason for such restriction and
we should allow to change the subsystem's model_number as many times as
needed.

In addition, in order to simplfy the changes and make the model number
flow more similar to the rest of the attributes in the Identify
Controller data structure, we set a default value for the model number
at the initiation of the subsystem.

Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Noam Gottlieb <ngottlieb@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 26 --------------------------
 drivers/nvme/target/configfs.c  | 10 ++--------
 drivers/nvme/target/core.c      | 21 +++++++++++++++++----
 drivers/nvme/target/discovery.c |  4 ++--
 4 files changed, 21 insertions(+), 40 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5f15c3285a5b..cd60a8184d04 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -313,22 +313,6 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
 	nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
 }
 
-static u16 nvmet_set_model_number(struct nvmet_subsys *subsys)
-{
-	u16 status = 0;
-
-	mutex_lock(&subsys->lock);
-	if (!subsys->model_number) {
-		subsys->model_number =
-			kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
-		if (!subsys->model_number)
-			status = NVME_SC_INTERNAL;
-	}
-	mutex_unlock(&subsys->lock);
-
-	return status;
-}
-
 static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -343,16 +327,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 		mutex_unlock(&subsys->lock);
 	}
 
-	/*
-	 * If there is no model number yet, set it now.  It will then remain
-	 * stable for the life time of the subsystem.
-	 */
-	if (!subsys->model_number) {
-		status = nvmet_set_model_number(subsys);
-		if (status)
-			goto out;
-	}
-
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
 	if (!id) {
 		status = NVME_SC_INTERNAL;
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index a13da86fb374..9ef8708b92c6 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1154,14 +1154,8 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
 					    char *page)
 {
 	struct nvmet_subsys *subsys = to_subsys(item);
-	int ret;
 
-	mutex_lock(&subsys->lock);
-	ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ?
-			subsys->model_number : NVMET_DEFAULT_CTRL_MODEL);
-	mutex_unlock(&subsys->lock);
-
-	return ret;
+	return snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number);
 }
 
 static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
@@ -1169,7 +1163,7 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
 {
 	int pos = 0, len;
 
-	if (subsys->model_number) {
+	if (subsys->subsys_discovered) {
 		pr_err("Can't set model number. %s is already assigned\n",
 		       subsys->model_number);
 		return -EINVAL;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 213a0c2af4f7..146909486b8f 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1494,6 +1494,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 {
 	struct nvmet_subsys *subsys;
 	char serial[NVMET_SN_MAX_SIZE / 2];
+	int ret;
 
 	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
 	if (!subsys)
@@ -1504,6 +1505,12 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	get_random_bytes(&serial, sizeof(serial));
 	bin2hex(subsys->serial, &serial, sizeof(serial));
 
+	subsys->model_number = kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
+	if (!subsys->model_number) {
+		ret = -ENOMEM;
+		goto free_subsys;
+	}
+
 	switch (type) {
 	case NVME_NQN_NVME:
 		subsys->max_qid = NVMET_NR_QUEUES;
@@ -1513,15 +1520,15 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 		break;
 	default:
 		pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
-		kfree(subsys);
-		return ERR_PTR(-EINVAL);
+		ret = -EINVAL;
+		goto free_mn;
 	}
 	subsys->type = type;
 	subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
 			GFP_KERNEL);
 	if (!subsys->subsysnqn) {
-		kfree(subsys);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto free_mn;
 	}
 	subsys->cntlid_min = NVME_CNTLID_MIN;
 	subsys->cntlid_max = NVME_CNTLID_MAX;
@@ -1533,6 +1540,12 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	INIT_LIST_HEAD(&subsys->hosts);
 
 	return subsys;
+
+free_mn:
+	kfree(subsys->model_number);
+free_subsys:
+	kfree(subsys);
+	return ERR_PTR(ret);
 }
 
 static void nvmet_subsys_free(struct kref *ref)
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index b7fdad13094a..7aa62bc6ae84 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -244,7 +244,6 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvme_id_ctrl *id;
-	const char model[] = "Linux";
 	u16 status = 0;
 
 	if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
@@ -264,7 +263,8 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
 
 	memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE);
 	memset(id->fr, ' ', sizeof(id->fr));
-	memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+	memcpy_and_pad(id->mn, sizeof(id->mn), ctrl->subsys->model_number,
+		       strlen(ctrl->subsys->model_number), ' ');
 	memcpy_and_pad(id->fr, sizeof(id->fr),
 		       UTS_RELEASE, strlen(UTS_RELEASE), ' ');
 

From 87fd4cc1c0dda038c9a3617c9d07d5159326e80f Mon Sep 17 00:00:00 2001
From: Noam Gottlieb <ngottlieb@nvidia.com>
Date: Mon, 7 Jun 2021 12:23:24 +0300
Subject: [PATCH 2043/3292] nvmet: make ver stable once connection established

Once some host has connected to the nvmf target, make sure that the
version number is stable and cannot be changed.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Noam Gottlieb <ngottlieb@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/configfs.c | 36 +++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 9ef8708b92c6..273555127188 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1007,13 +1007,26 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
 			NVME_MINOR(subsys->ver));
 }
 
-static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
-					       const char *page, size_t count)
+static ssize_t
+nvmet_subsys_attr_version_store_locked(struct nvmet_subsys *subsys,
+		const char *page, size_t count)
 {
-	struct nvmet_subsys *subsys = to_subsys(item);
 	int major, minor, tertiary = 0;
 	int ret;
 
+	if (subsys->subsys_discovered) {
+		if (NVME_TERTIARY(subsys->ver))
+			pr_err("Can't set version number. %llu.%llu.%llu is already assigned\n",
+			       NVME_MAJOR(subsys->ver),
+			       NVME_MINOR(subsys->ver),
+			       NVME_TERTIARY(subsys->ver));
+		else
+			pr_err("Can't set version number. %llu.%llu is already assigned\n",
+			       NVME_MAJOR(subsys->ver),
+			       NVME_MINOR(subsys->ver));
+		return -EINVAL;
+	}
+
 	/* passthru subsystems use the underlying controller's version */
 	if (nvmet_passthru_ctrl(subsys))
 		return -EINVAL;
@@ -1022,12 +1035,25 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
 	if (ret != 2 && ret != 3)
 		return -EINVAL;
 
-	down_write(&nvmet_config_sem);
 	subsys->ver = NVME_VS(major, minor, tertiary);
-	up_write(&nvmet_config_sem);
 
 	return count;
 }
+
+static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
+					       const char *page, size_t count)
+{
+	struct nvmet_subsys *subsys = to_subsys(item);
+	ssize_t ret;
+
+	down_write(&nvmet_config_sem);
+	mutex_lock(&subsys->lock);
+	ret = nvmet_subsys_attr_version_store_locked(subsys, page, count);
+	mutex_unlock(&subsys->lock);
+	up_write(&nvmet_config_sem);
+
+	return ret;
+}
 CONFIGFS_ATTR(nvmet_subsys_, attr_version);
 
 /* See Section 1.5 of NVMe 1.4 */

From 46eca4702d93dbb8ac1c8fa84e5838fc8a1e82a0 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 13 Jun 2021 18:58:46 -0700
Subject: [PATCH 2044/3292] nvmet: use req->cmd directly in bdev-ns fast path

The function nvmet_bdev_parse_io_cmd() is called from the fast path.
The local variable to that function cmd is only used once.

Remove the local variable and use req->cmd directly.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-bdev.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index f673679d258a..5d998e5873d3 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -429,9 +429,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
 
 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
 {
-	struct nvme_command *cmd = req->cmd;
-
-	switch (cmd->common.opcode) {
+	switch (req->cmd->common.opcode) {
 	case nvme_cmd_read:
 	case nvme_cmd_write:
 		req->execute = nvmet_bdev_execute_rw;

From f3dce2add3e1a06f4e16616408aa70bf2f7c8431 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 13 Jun 2021 18:58:47 -0700
Subject: [PATCH 2045/3292] nvmet: use req->cmd directly in file-ns fast path

The function nvmet_file_parse_io_cmd() is called from the fast path. The
local variable to that function cmd is only used once.

Remove the local variable and use req->cmd directly.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-file.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 7fdbdc496597..1dd1a0fe2e81 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -385,9 +385,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
 
 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
 {
-	struct nvme_command *cmd = req->cmd;
-
-	switch (cmd->common.opcode) {
+	switch (req->cmd->common.opcode) {
 	case nvme_cmd_read:
 	case nvme_cmd_write:
 		req->execute = nvmet_file_execute_rw;

From 86693c43bb01c2597b55ec2fac37214ed1094a49 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 13 Jun 2021 18:58:48 -0700
Subject: [PATCH 2046/3292] nvmet: use u32 for nvmet_subsys max_nsid

Use u32 type for the nsid_max member of the nvmet_subsys structure.
This avoids the type confusion when updating the subsys->nax_nsid from
ns->nsid. This also matches the nvmet_ns->nsid member.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/nvmet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index bd0a0b91d843..3468f25cb4b7 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -218,7 +218,7 @@ struct nvmet_subsys {
 
 	struct xarray		namespaces;
 	unsigned int		nr_namespaces;
-	unsigned int		max_nsid;
+	u32			max_nsid;
 	u16			cntlid_min;
 	u16			cntlid_max;
 

From 245067e37d52185a741d269e658afee40d467287 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 13 Jun 2021 18:58:49 -0700
Subject: [PATCH 2047/3292] nvmet: use u32 type for the local variable nsid

In function nvmet_max_nsid() we calculate the max nsid by iterating
over the XArray and store it in the variable nsid that has type of
unsigned long.

Since the value of this function is stored into the subsys->max_nsid
which is of type u32, change the local variable nsid type and the return
type of the same function to u32.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 146909486b8f..8494a132da35 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -122,11 +122,11 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
 	return 0;
 }
 
-static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
+static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
 {
-	unsigned long nsid = 0;
 	struct nvmet_ns *cur;
 	unsigned long idx;
+	u32 nsid = 0;
 
 	xa_for_each(&subsys->namespaces, idx, cur)
 		nsid = cur->nsid;

From 8bb6cb9b97ef0b0ae4a492db5a90f8156d2cbe85 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 13 Jun 2021 18:58:50 -0700
Subject: [PATCH 2048/3292] nvmet: use nvme status value directly

There is no point in keeping the status variable that is used only once
in the function nvmet_async_events_failall().

Remove the variable and use the value directly.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 8494a132da35..45a5b273b525 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -141,14 +141,13 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
 
 static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
 {
-	u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
 	struct nvmet_req *req;
 
 	mutex_lock(&ctrl->lock);
 	while (ctrl->nr_async_event_cmds) {
 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
 		mutex_unlock(&ctrl->lock);
-		nvmet_req_complete(req, status);
+		nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
 		mutex_lock(&ctrl->lock);
 	}
 	mutex_unlock(&ctrl->lock);

From 7860569ad47f9fbd7c9f93a5c2b7d2a18e4af831 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 13 Jun 2021 18:58:51 -0700
Subject: [PATCH 2049/3292] nvmet: remove local variable

In function errno_to_nvme_status() we store the value of the NVMe
status into the local variable and don't do anything useful with that
but just return.

Remove the local variable and return the value directly from switch.
This also removed extra break statements.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 45a5b273b525..c8708dcaeaa5 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -43,43 +43,34 @@ DECLARE_RWSEM(nvmet_ana_sem);
 
 inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
 {
-	u16 status;
-
 	switch (errno) {
 	case 0:
-		status = NVME_SC_SUCCESS;
-		break;
+		return NVME_SC_SUCCESS;
 	case -ENOSPC:
 		req->error_loc = offsetof(struct nvme_rw_command, length);
-		status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
-		break;
+		return NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
 	case -EREMOTEIO:
 		req->error_loc = offsetof(struct nvme_rw_command, slba);
-		status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
-		break;
+		return  NVME_SC_LBA_RANGE | NVME_SC_DNR;
 	case -EOPNOTSUPP:
 		req->error_loc = offsetof(struct nvme_common_command, opcode);
 		switch (req->cmd->common.opcode) {
 		case nvme_cmd_dsm:
 		case nvme_cmd_write_zeroes:
-			status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
-			break;
+			return NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
 		default:
-			status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+			return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 		}
 		break;
 	case -ENODATA:
 		req->error_loc = offsetof(struct nvme_rw_command, nsid);
-		status = NVME_SC_ACCESS_DENIED;
-		break;
+		return NVME_SC_ACCESS_DENIED;
 	case -EIO:
 		fallthrough;
 	default:
 		req->error_loc = offsetof(struct nvme_common_command, opcode);
-		status = NVME_SC_INTERNAL | NVME_SC_DNR;
+		return NVME_SC_INTERNAL | NVME_SC_DNR;
 	}
-
-	return status;
 }
 
 u16 nvmet_report_invalid_opcode(struct nvmet_req *req)

From c28a61471c5898e832c6e8634b2659249761b833 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:32:48 -0700
Subject: [PATCH 2050/3292] block: export blk_next_bio()

The block layer provides emulation of zone management operations
targeting all zones of a zoned block device only for the zone reset
operation (REQ_OP_ZONE_RESET). In order to correctly implement
exporting of zoned block devices with NVMeOF, emulating zone management
operations targeting all zones of a device is also necessary for the
open, close and finish zone operations (REQ_OP_ZONE_OPEN,
REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH).

Instead of duplicating the code, export the existing helper from block
layer so we can use a bio chaining pattern that is present in the block
layer for REQ_OP_ZONE RESET all emulation in the NVMeOF zoned block
device backend.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-lib.c     | 1 +
 include/linux/bio.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 7b256131b20b..9f09beadcbe3 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -21,6 +21,7 @@ struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
 
 	return new;
 }
+EXPORT_SYMBOL_GPL(blk_next_bio);
 
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, int flags,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a0b4cfdf62a4..b2491ead22a0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -822,4 +822,6 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
 		bio->bi_opf |= REQ_NOWAIT;
 }
 
+struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
+
 #endif /* __LINUX_BIO_H */

From 6e597263f990a2db99e7380debc4044c38867971 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:32:49 -0700
Subject: [PATCH 2051/3292] nvmet: add req cns error complete helper

We report error and complete the request when identify cns value is not
handled in nvmet_execute_identify(). This error reporting is also needed
for Zone Block Device backend for NVMeOF target.

Add a helper nvmet_req_cns_error_compplete() to report an error and
complete the request when idenitfy command cns not handled value.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 5 +----
 drivers/nvme/target/nvmet.h     | 8 ++++++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index cd60a8184d04..3de6a6c99b01 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -637,10 +637,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
 		return nvmet_execute_identify_desclist(req);
 	}
 
-	pr_debug("unhandled identify cns %d on qid %d\n",
-	       req->cmd->identify.cns, req->sq->qid);
-	req->error_loc = offsetof(struct nvme_identify, cns);
-	nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+	nvmet_req_cns_error_complete(req);
 }
 
 /*
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 3468f25cb4b7..002651f34c5e 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -624,4 +624,12 @@ static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
 	       req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
 }
 
+static inline void nvmet_req_cns_error_complete(struct nvmet_req *req)
+{
+	pr_debug("unhandled identify cns %d on qid %d\n",
+	       req->cmd->identify.cns, req->sq->qid);
+	req->error_loc = offsetof(struct nvme_identify, cns);
+	nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
 #endif /* _NVMET_H */

From 9a01b58c22ccabd00e990e9dc01c2de5865d6e4d Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:32:50 -0700
Subject: [PATCH 2052/3292] nvmet: add nvmet_req_bio put helper for backends

In current code there exists two backends which are using inline bio
optimization, that adds a duplicate code for freeing the bio.

For Zoned Block Device backend we also use the same optimzation and it
will lead to having duplicate code in the three backends: generic
bdev, passsthru, and generic zns.

Add a helper function to avoid duplicate code and update the respective
backends.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-bdev.c | 3 +--
 drivers/nvme/target/nvmet.h       | 6 ++++++
 drivers/nvme/target/passthru.c    | 3 +--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 5d998e5873d3..019cc994efcd 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -164,8 +164,7 @@ static void nvmet_bio_done(struct bio *bio)
 	struct nvmet_req *req = bio->bi_private;
 
 	nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
-	if (bio != &req->b.inline_bio)
-		bio_put(bio);
+	nvmet_req_bio_put(req, bio);
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 002651f34c5e..29b386de1d07 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -632,4 +632,10 @@ static inline void nvmet_req_cns_error_complete(struct nvmet_req *req)
 	nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
 }
 
+static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
+{
+	if (bio != &req->b.inline_bio)
+		bio_put(bio);
+}
+
 #endif /* _NVMET_H */
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 39b1473f7204..fced52de33ce 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -206,8 +206,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 	for_each_sg(req->sg, sg, req->sg_cnt, i) {
 		if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
 				    sg->offset) < sg->length) {
-			if (bio != &req->p.inline_bio)
-				bio_put(bio);
+			nvmet_req_bio_put(req, bio);
 			return -EINVAL;
 		}
 	}

From ab5d0b38c0475d6ff59f1a6ccf7c668b9ec2e0a4 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:32:51 -0700
Subject: [PATCH 2053/3292] nvmet: add Command Set Identifier support

NVMe TP 4056 allows controllers to support different command sets.
NVMeoF target currently only supports namespaces that contain
traditional logical blocks that may be randomly read and written. In
some applications there is a value in exposing namespaces that contain
logical blocks that have special access rules (e.g. sequentially write
required namespace such as Zoned Namespace (ZNS)).

In order to support the Zoned Block Devices (ZBD) backend, controllers
need to have support for ZNS Command Set Identifier (CSI).

In this preparation patch, we adjust the code such that it can now
support the default command set identifier. We update the namespace data
structure to store the CSI value which defaults to NVME_CSI_NVM
that represents traditional logical blocks namespace type.

The CSI support is required to implement the ZBD backend for NVMeOF
with host side NVMe ZNS interface, since ZNS commands belong to
the different command set than the default one.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 75 +++++++++++++++++++++++++++------
 drivers/nvme/target/core.c      | 28 +++++++++---
 drivers/nvme/target/nvmet.h     |  1 +
 include/linux/nvme.h            |  1 +
 4 files changed, 87 insertions(+), 18 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 3de6a6c99b01..93aaa7479e71 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -162,15 +162,8 @@ out:
 	nvmet_req_complete(req, status);
 }
 
-static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
+static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
 {
-	u16 status = NVME_SC_INTERNAL;
-	struct nvme_effects_log *log;
-
-	log = kzalloc(sizeof(*log), GFP_KERNEL);
-	if (!log)
-		goto out;
-
 	log->acs[nvme_admin_get_log_page]	= cpu_to_le32(1 << 0);
 	log->acs[nvme_admin_identify]		= cpu_to_le32(1 << 0);
 	log->acs[nvme_admin_abort_cmd]		= cpu_to_le32(1 << 0);
@@ -184,9 +177,30 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
 	log->iocs[nvme_cmd_flush]		= cpu_to_le32(1 << 0);
 	log->iocs[nvme_cmd_dsm]			= cpu_to_le32(1 << 0);
 	log->iocs[nvme_cmd_write_zeroes]	= cpu_to_le32(1 << 0);
+}
+
+static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
+{
+	struct nvme_effects_log *log;
+	u16 status = NVME_SC_SUCCESS;
+
+	log = kzalloc(sizeof(*log), GFP_KERNEL);
+	if (!log) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	switch (req->cmd->get_log_page.csi) {
+	case NVME_CSI_NVM:
+		nvmet_get_cmd_effects_nvm(log);
+		break;
+	default:
+		status = NVME_SC_INVALID_LOG_PAGE;
+		goto free;
+	}
 
 	status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
-
+free:
 	kfree(log);
 out:
 	nvmet_req_complete(req, status);
@@ -613,6 +627,12 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req)
 			goto out;
 	}
 
+	status = nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
+					  NVME_NIDT_CSI_LEN,
+					  &req->ns->csi, &off);
+	if (status)
+		goto out;
+
 	if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
 			off) != NVME_IDENTIFY_DATA_SIZE - off)
 		status = NVME_SC_INTERNAL | NVME_SC_DNR;
@@ -621,6 +641,17 @@ out:
 	nvmet_req_complete(req, status);
 }
 
+static bool nvmet_handle_identify_desclist(struct nvmet_req *req)
+{
+	switch (req->cmd->identify.csi) {
+	case NVME_CSI_NVM:
+		nvmet_execute_identify_desclist(req);
+		return true;
+	default:
+		return false;
+	}
+}
+
 static void nvmet_execute_identify(struct nvmet_req *req)
 {
 	if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
@@ -628,13 +659,31 @@ static void nvmet_execute_identify(struct nvmet_req *req)
 
 	switch (req->cmd->identify.cns) {
 	case NVME_ID_CNS_NS:
-		return nvmet_execute_identify_ns(req);
+		switch (req->cmd->identify.csi) {
+		case NVME_CSI_NVM:
+			return nvmet_execute_identify_ns(req);
+		default:
+			break;
+		}
+		break;
 	case NVME_ID_CNS_CTRL:
-		return nvmet_execute_identify_ctrl(req);
+		switch (req->cmd->identify.csi) {
+		case NVME_CSI_NVM:
+			return nvmet_execute_identify_ctrl(req);
+		}
+		break;
 	case NVME_ID_CNS_NS_ACTIVE_LIST:
-		return nvmet_execute_identify_nslist(req);
+		switch (req->cmd->identify.csi) {
+		case NVME_CSI_NVM:
+			return nvmet_execute_identify_nslist(req);
+		default:
+			break;
+		}
+		break;
 	case NVME_ID_CNS_NS_DESC_LIST:
-		return nvmet_execute_identify_desclist(req);
+		if (nvmet_handle_identify_desclist(req) == true)
+			return;
+		break;
 	}
 
 	nvmet_req_cns_error_complete(req);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index c8708dcaeaa5..77873d56cff5 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -682,6 +682,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 
 	uuid_gen(&ns->uuid);
 	ns->buffered_io = false;
+	ns->csi = NVME_CSI_NVM;
 
 	return ns;
 }
@@ -877,10 +878,14 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 		return ret;
 	}
 
-	if (req->ns->file)
-		return nvmet_file_parse_io_cmd(req);
-
-	return nvmet_bdev_parse_io_cmd(req);
+	switch (req->ns->csi) {
+	case NVME_CSI_NVM:
+		if (req->ns->file)
+			return nvmet_file_parse_io_cmd(req);
+		return nvmet_bdev_parse_io_cmd(req);
+	default:
+		return NVME_SC_INVALID_IO_CMD_SET;
+	}
 }
 
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
@@ -1102,6 +1107,17 @@ static inline u8 nvmet_cc_iocqes(u32 cc)
 	return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
 }
 
+static inline bool nvmet_css_supported(u8 cc_css)
+{
+	switch (cc_css <<= NVME_CC_CSS_SHIFT) {
+	case NVME_CC_CSS_NVM:
+	case NVME_CC_CSS_CSI:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 {
 	lockdep_assert_held(&ctrl->lock);
@@ -1121,7 +1137,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 
 	if (nvmet_cc_mps(ctrl->cc) != 0 ||
 	    nvmet_cc_ams(ctrl->cc) != 0 ||
-	    nvmet_cc_css(ctrl->cc) != 0) {
+	    !nvmet_css_supported(nvmet_cc_css(ctrl->cc))) {
 		ctrl->csts = NVME_CSTS_CFS;
 		return;
 	}
@@ -1172,6 +1188,8 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
 {
 	/* command sets supported: NVMe command set: */
 	ctrl->cap = (1ULL << 37);
+	/* Controller supports one or more I/O Command Sets */
+	ctrl->cap |= (1ULL << 43);
 	/* CC.EN timeout in 500msec units: */
 	ctrl->cap |= (15ULL << 24);
 	/* maximum queue entries supported: */
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 29b386de1d07..cc71918ff8fe 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -83,6 +83,7 @@ struct nvmet_ns {
 	struct pci_dev		*p2p_dev;
 	int			pi_type;
 	int			metadata_size;
+	u8			csi;
 };
 
 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index edcbd60b88b9..c7ba83144d52 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1504,6 +1504,7 @@ enum {
 	NVME_SC_NS_WRITE_PROTECTED	= 0x20,
 	NVME_SC_CMD_INTERRUPTED		= 0x21,
 	NVME_SC_TRANSIENT_TR_ERR	= 0x22,
+	NVME_SC_INVALID_IO_CMD_SET	= 0x2C,
 
 	NVME_SC_LBA_RANGE		= 0x80,
 	NVME_SC_CAP_EXCEEDED		= 0x81,

From aaf2e048af2704da5869f27b508b288f36d5c7b7 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 9 Jun 2021 18:32:52 -0700
Subject: [PATCH 2054/3292] nvmet: add ZBD over ZNS backend support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NVMe TP 4053 – Zoned Namespaces (ZNS) allows host software to
communicate with a non-volatile memory subsystem using zones for NVMe
protocol-based controllers. NVMeOF already support the ZNS NVMe
Protocol compliant devices on the target in the passthru mode. There
are generic zoned block devices like  Shingled Magnetic Recording (SMR)
HDDs that are not based on the NVMe protocol.

This patch adds ZNS backend support for non-ZNS zoned block devices as
NVMeOF targets.

This support includes implementing the new command set NVME_CSI_ZNS,
adding different command handlers for ZNS command set such as NVMe
Identify Controller, NVMe Identify Namespace, NVMe Zone Append,
NVMe Zone Management Send and NVMe Zone Management Receive.

With the new command set identifier, we also update the target command
effects logs to reflect the ZNS compliant commands.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/Makefile      |   1 +
 drivers/nvme/target/admin-cmd.c   |  41 ++
 drivers/nvme/target/core.c        |  15 +-
 drivers/nvme/target/io-cmd-bdev.c |  26 +-
 drivers/nvme/target/nvmet.h       |  20 +
 drivers/nvme/target/zns.c         | 615 ++++++++++++++++++++++++++++++
 include/linux/nvme.h              |   7 +
 7 files changed, 714 insertions(+), 11 deletions(-)
 create mode 100644 drivers/nvme/target/zns.c

diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index ebf91fc4c72e..9837e580fa7e 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_NVME_TARGET_TCP)		+= nvmet-tcp.o
 nvmet-y		+= core.o configfs.o admin-cmd.o fabrics-cmd.o \
 			discovery.o io-cmd-file.o io-cmd-bdev.o
 nvmet-$(CONFIG_NVME_TARGET_PASSTHRU)	+= passthru.o
+nvmet-$(CONFIG_BLK_DEV_ZONED)		+= zns.o
 nvme-loop-y	+= loop.o
 nvmet-rdma-y	+= rdma.o
 nvmet-fc-y	+= fc.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 93aaa7479e71..363e357d2f20 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -179,6 +179,13 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
 	log->iocs[nvme_cmd_write_zeroes]	= cpu_to_le32(1 << 0);
 }
 
+static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log)
+{
+	log->iocs[nvme_cmd_zone_append]		= cpu_to_le32(1 << 0);
+	log->iocs[nvme_cmd_zone_mgmt_send]	= cpu_to_le32(1 << 0);
+	log->iocs[nvme_cmd_zone_mgmt_recv]	= cpu_to_le32(1 << 0);
+}
+
 static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
 {
 	struct nvme_effects_log *log;
@@ -194,6 +201,14 @@ static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
 	case NVME_CSI_NVM:
 		nvmet_get_cmd_effects_nvm(log);
 		break;
+	case NVME_CSI_ZNS:
+		if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+			status = NVME_SC_INVALID_IO_CMD_SET;
+			goto free;
+		}
+		nvmet_get_cmd_effects_nvm(log);
+		nvmet_get_cmd_effects_zns(log);
+		break;
 	default:
 		status = NVME_SC_INVALID_LOG_PAGE;
 		goto free;
@@ -647,6 +662,12 @@ static bool nvmet_handle_identify_desclist(struct nvmet_req *req)
 	case NVME_CSI_NVM:
 		nvmet_execute_identify_desclist(req);
 		return true;
+	case NVME_CSI_ZNS:
+		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+			nvmet_execute_identify_desclist(req);
+			return true;
+		}
+		return false;
 	default:
 		return false;
 	}
@@ -666,12 +687,32 @@ static void nvmet_execute_identify(struct nvmet_req *req)
 			break;
 		}
 		break;
+	case NVME_ID_CNS_CS_NS:
+		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+			switch (req->cmd->identify.csi) {
+			case NVME_CSI_ZNS:
+				return nvmet_execute_identify_cns_cs_ns(req);
+			default:
+				break;
+			}
+		}
+		break;
 	case NVME_ID_CNS_CTRL:
 		switch (req->cmd->identify.csi) {
 		case NVME_CSI_NVM:
 			return nvmet_execute_identify_ctrl(req);
 		}
 		break;
+	case NVME_ID_CNS_CS_CTRL:
+		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+			switch (req->cmd->identify.csi) {
+			case NVME_CSI_ZNS:
+				return nvmet_execute_identify_cns_cs_ctrl(req);
+			default:
+				break;
+			}
+		}
+		break;
 	case NVME_ID_CNS_NS_ACTIVE_LIST:
 		switch (req->cmd->identify.csi) {
 		case NVME_CSI_NVM:
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 77873d56cff5..dd16704c9b6b 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -16,6 +16,7 @@
 #include "nvmet.h"
 
 struct workqueue_struct *buffered_io_wq;
+struct workqueue_struct *zbd_wq;
 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
 static DEFINE_IDA(cntlid_ida);
 
@@ -883,6 +884,10 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 		if (req->ns->file)
 			return nvmet_file_parse_io_cmd(req);
 		return nvmet_bdev_parse_io_cmd(req);
+	case NVME_CSI_ZNS:
+		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+			return nvmet_bdev_zns_parse_io_cmd(req);
+		return NVME_SC_INVALID_IO_CMD_SET;
 	default:
 		return NVME_SC_INVALID_IO_CMD_SET;
 	}
@@ -1592,11 +1597,15 @@ static int __init nvmet_init(void)
 
 	nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
 
+	zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
+	if (!zbd_wq)
+		return -ENOMEM;
+
 	buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
 			WQ_MEM_RECLAIM, 0);
 	if (!buffered_io_wq) {
 		error = -ENOMEM;
-		goto out;
+		goto out_free_zbd_work_queue;
 	}
 
 	error = nvmet_init_discovery();
@@ -1612,7 +1621,8 @@ out_exit_discovery:
 	nvmet_exit_discovery();
 out_free_work_queue:
 	destroy_workqueue(buffered_io_wq);
-out:
+out_free_zbd_work_queue:
+	destroy_workqueue(zbd_wq);
 	return error;
 }
 
@@ -1622,6 +1632,7 @@ static void __exit nvmet_exit(void)
 	nvmet_exit_discovery();
 	ida_destroy(&cntlid_ida);
 	destroy_workqueue(buffered_io_wq);
+	destroy_workqueue(zbd_wq);
 
 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 019cc994efcd..0fc2781ab970 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -47,6 +47,14 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
 	id->nows = to0based(ql->io_opt / ql->logical_block_size);
 }
 
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
+{
+	if (ns->bdev) {
+		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
+		ns->bdev = NULL;
+	}
+}
+
 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
 {
 	struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
@@ -86,15 +94,15 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
 	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
 		nvmet_bdev_ns_enable_integrity(ns);
 
-	return 0;
-}
-
-void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
-{
-	if (ns->bdev) {
-		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
-		ns->bdev = NULL;
+	if (bdev_is_zoned(ns->bdev)) {
+		if (!nvmet_bdev_zns_enable(ns)) {
+			nvmet_bdev_ns_disable(ns);
+			return -EINVAL;
+		}
+		ns->csi = NVME_CSI_ZNS;
 	}
+
+	return 0;
 }
 
 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
@@ -102,7 +110,7 @@ void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
 	ns->size = i_size_read(ns->bdev->bd_inode);
 }
 
-static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
+u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
 {
 	u16 status = NVME_SC_SUCCESS;
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index cc71918ff8fe..d719a1cd5dda 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -250,6 +250,10 @@ struct nvmet_subsys {
 	unsigned int		admin_timeout;
 	unsigned int		io_timeout;
 #endif /* CONFIG_NVME_TARGET_PASSTHRU */
+
+#ifdef CONFIG_BLK_DEV_ZONED
+	u8			zasl;
+#endif /* CONFIG_BLK_DEV_ZONED */
 };
 
 static inline struct nvmet_subsys *to_subsys(struct config_item *item)
@@ -335,6 +339,12 @@ struct nvmet_req {
 			struct work_struct      work;
 			bool			use_workqueue;
 		} p;
+#ifdef CONFIG_BLK_DEV_ZONED
+		struct {
+			struct bio		inline_bio;
+			struct work_struct	zmgmt_work;
+		} z;
+#endif /* CONFIG_BLK_DEV_ZONED */
 	};
 	int			sg_cnt;
 	int			metadata_sg_cnt;
@@ -354,6 +364,7 @@ struct nvmet_req {
 };
 
 extern struct workqueue_struct *buffered_io_wq;
+extern struct workqueue_struct *zbd_wq;
 
 static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
 {
@@ -403,6 +414,7 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req);
 u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
 u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
 u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
@@ -530,6 +542,14 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
 int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
 void nvmet_ns_revalidate(struct nvmet_ns *ns);
+u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
+
+bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
+void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req);
+void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req);
+void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req);
+void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req);
+void nvmet_bdev_execute_zone_append(struct nvmet_req *req);
 
 static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
 {
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
new file mode 100644
index 000000000000..17f8b7a45f21
--- /dev/null
+++ b/drivers/nvme/target/zns.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe ZNS-ZBD command implementation.
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/nvme.h>
+#include <linux/blkdev.h>
+#include "nvmet.h"
+
+/*
+ * We set the Memory Page Size Minimum (MPSMIN) for target controller to 0
+ * which gets added by 12 in the nvme_enable_ctrl() which results in 2^12 = 4k
+ * as page_shift value. When calculating the ZASL use shift by 12.
+ */
+#define NVMET_MPSMIN_SHIFT	12
+
+static inline u8 nvmet_zasl(unsigned int zone_append_sects)
+{
+	/*
+	 * Zone Append Size Limit (zasl) is expressed as a power of 2 value
+	 * with the minimum memory page size (i.e. 12) as unit.
+	 */
+	return ilog2(zone_append_sects >> (NVMET_MPSMIN_SHIFT - 9));
+}
+
+static int validate_conv_zones_cb(struct blk_zone *z,
+				  unsigned int i, void *data)
+{
+	if (z->type == BLK_ZONE_TYPE_CONVENTIONAL)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
+{
+	struct request_queue *q = ns->bdev->bd_disk->queue;
+	u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
+	struct gendisk *bd_disk = ns->bdev->bd_disk;
+	int ret;
+
+	if (ns->subsys->zasl) {
+		if (ns->subsys->zasl > zasl)
+			return false;
+	}
+	ns->subsys->zasl = zasl;
+
+	/*
+	 * Generic zoned block devices may have a smaller last zone which is
+	 * not supported by ZNS. Exclude zoned drives that have such smaller
+	 * last zone.
+	 */
+	if (get_capacity(bd_disk) & (bdev_zone_sectors(ns->bdev) - 1))
+		return false;
+	/*
+	 * ZNS does not define a conventional zone type. If the underlying
+	 * device has a bitmap set indicating the existence of conventional
+	 * zones, reject the device. Otherwise, use report zones to detect if
+	 * the device has conventional zones.
+	 */
+	if (ns->bdev->bd_disk->queue->conv_zones_bitmap)
+		return false;
+
+	ret = blkdev_report_zones(ns->bdev, 0, blkdev_nr_zones(bd_disk),
+				  validate_conv_zones_cb, NULL);
+	if (ret < 0)
+		return false;
+
+	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+	return true;
+}
+
+void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
+{
+	u8 zasl = req->sq->ctrl->subsys->zasl;
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvme_id_ctrl_zns *id;
+	u16 status;
+
+	id = kzalloc(sizeof(*id), GFP_KERNEL);
+	if (!id) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	if (ctrl->ops->get_mdts)
+		id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl);
+	else
+		id->zasl = zasl;
+
+	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+
+	kfree(id);
+out:
+	nvmet_req_complete(req, status);
+}
+
+void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
+{
+	struct nvme_id_ns_zns *id_zns;
+	u64 zsze;
+	u16 status;
+
+	if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
+		req->error_loc = offsetof(struct nvme_identify, nsid);
+		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+		goto out;
+	}
+
+	id_zns = kzalloc(sizeof(*id_zns), GFP_KERNEL);
+	if (!id_zns) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	status = nvmet_req_find_ns(req);
+	if (status) {
+		status = NVME_SC_INTERNAL;
+		goto done;
+	}
+
+	if (!bdev_is_zoned(req->ns->bdev)) {
+		req->error_loc = offsetof(struct nvme_identify, nsid);
+		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+		goto done;
+	}
+
+	nvmet_ns_revalidate(req->ns);
+	zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
+					req->ns->blksize_shift;
+	id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
+	id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev));
+	id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev));
+
+done:
+	status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
+	kfree(id_zns);
+out:
+	nvmet_req_complete(req, status);
+}
+
+static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
+{
+	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
+	u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
+
+	if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
+		req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba);
+		return NVME_SC_LBA_RANGE | NVME_SC_DNR;
+	}
+
+	if (out_bufsize < sizeof(struct nvme_zone_report)) {
+		req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd);
+		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+	}
+
+	if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) {
+		req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra);
+		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+	}
+
+	switch (req->cmd->zmr.pr) {
+	case 0:
+	case 1:
+		break;
+	default:
+		req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr);
+		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+	}
+
+	switch (req->cmd->zmr.zrasf) {
+	case NVME_ZRASF_ZONE_REPORT_ALL:
+	case NVME_ZRASF_ZONE_STATE_EMPTY:
+	case NVME_ZRASF_ZONE_STATE_IMP_OPEN:
+	case NVME_ZRASF_ZONE_STATE_EXP_OPEN:
+	case NVME_ZRASF_ZONE_STATE_CLOSED:
+	case NVME_ZRASF_ZONE_STATE_FULL:
+	case NVME_ZRASF_ZONE_STATE_READONLY:
+	case NVME_ZRASF_ZONE_STATE_OFFLINE:
+		break;
+	default:
+		req->error_loc =
+			offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf);
+		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+	}
+
+	return NVME_SC_SUCCESS;
+}
+
+struct nvmet_report_zone_data {
+	struct nvmet_req *req;
+	u64 out_buf_offset;
+	u64 out_nr_zones;
+	u64 nr_zones;
+	u8 zrasf;
+};
+
+static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
+{
+	static const unsigned int nvme_zrasf_to_blk_zcond[] = {
+		[NVME_ZRASF_ZONE_STATE_EMPTY]	 = BLK_ZONE_COND_EMPTY,
+		[NVME_ZRASF_ZONE_STATE_IMP_OPEN] = BLK_ZONE_COND_IMP_OPEN,
+		[NVME_ZRASF_ZONE_STATE_EXP_OPEN] = BLK_ZONE_COND_EXP_OPEN,
+		[NVME_ZRASF_ZONE_STATE_CLOSED]	 = BLK_ZONE_COND_CLOSED,
+		[NVME_ZRASF_ZONE_STATE_READONLY] = BLK_ZONE_COND_READONLY,
+		[NVME_ZRASF_ZONE_STATE_FULL]	 = BLK_ZONE_COND_FULL,
+		[NVME_ZRASF_ZONE_STATE_OFFLINE]	 = BLK_ZONE_COND_OFFLINE,
+	};
+	struct nvmet_report_zone_data *rz = d;
+
+	if (rz->zrasf != NVME_ZRASF_ZONE_REPORT_ALL &&
+	    z->cond != nvme_zrasf_to_blk_zcond[rz->zrasf])
+		return 0;
+
+	if (rz->nr_zones < rz->out_nr_zones) {
+		struct nvme_zone_descriptor zdesc = { };
+		u16 status;
+
+		zdesc.zcap = nvmet_sect_to_lba(rz->req->ns, z->capacity);
+		zdesc.zslba = nvmet_sect_to_lba(rz->req->ns, z->start);
+		zdesc.wp = nvmet_sect_to_lba(rz->req->ns, z->wp);
+		zdesc.za = z->reset ? 1 << 2 : 0;
+		zdesc.zs = z->cond << 4;
+		zdesc.zt = z->type;
+
+		status = nvmet_copy_to_sgl(rz->req, rz->out_buf_offset, &zdesc,
+					   sizeof(zdesc));
+		if (status)
+			return -EINVAL;
+
+		rz->out_buf_offset += sizeof(zdesc);
+	}
+
+	rz->nr_zones++;
+
+	return 0;
+}
+
+static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
+{
+	unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
+
+	return blkdev_nr_zones(req->ns->bdev->bd_disk) -
+		(sect >> ilog2(bdev_zone_sectors(req->ns->bdev)));
+}
+
+static unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 bufsize)
+{
+	if (bufsize <= sizeof(struct nvme_zone_report))
+		return 0;
+
+	return (bufsize - sizeof(struct nvme_zone_report)) /
+		sizeof(struct nvme_zone_descriptor);
+}
+
+static void nvmet_bdev_zone_zmgmt_recv_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work);
+	sector_t start_sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
+	unsigned long req_slba_nr_zones = nvmet_req_nr_zones_from_slba(req);
+	u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
+	__le64 nr_zones;
+	u16 status;
+	int ret;
+	struct nvmet_report_zone_data rz_data = {
+		.out_nr_zones = get_nr_zones_from_buf(req, out_bufsize),
+		/* leave the place for report zone header */
+		.out_buf_offset = sizeof(struct nvme_zone_report),
+		.zrasf = req->cmd->zmr.zrasf,
+		.nr_zones = 0,
+		.req = req,
+	};
+
+	status = nvmet_bdev_validate_zone_mgmt_recv(req);
+	if (status)
+		goto out;
+
+	if (!req_slba_nr_zones) {
+		status = NVME_SC_SUCCESS;
+		goto out;
+	}
+
+	ret = blkdev_report_zones(req->ns->bdev, start_sect, req_slba_nr_zones,
+				 nvmet_bdev_report_zone_cb, &rz_data);
+	if (ret < 0) {
+		status = NVME_SC_INTERNAL;
+		goto out;
+	}
+
+	/*
+	 * When partial bit is set nr_zones must indicate the number of zone
+	 * descriptors actually transferred.
+	 */
+	if (req->cmd->zmr.pr)
+		rz_data.nr_zones = min(rz_data.nr_zones, rz_data.out_nr_zones);
+
+	nr_zones = cpu_to_le64(rz_data.nr_zones);
+	status = nvmet_copy_to_sgl(req, 0, &nr_zones, sizeof(nr_zones));
+
+out:
+	nvmet_req_complete(req, status);
+}
+
+void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
+{
+	INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zone_zmgmt_recv_work);
+	queue_work(zbd_wq, &req->z.zmgmt_work);
+}
+
+static inline enum req_opf zsa_req_op(u8 zsa)
+{
+	switch (zsa) {
+	case NVME_ZONE_OPEN:
+		return REQ_OP_ZONE_OPEN;
+	case NVME_ZONE_CLOSE:
+		return REQ_OP_ZONE_CLOSE;
+	case NVME_ZONE_FINISH:
+		return REQ_OP_ZONE_FINISH;
+	case NVME_ZONE_RESET:
+		return REQ_OP_ZONE_RESET;
+	default:
+		return REQ_OP_LAST;
+	}
+}
+
+static u16 blkdev_zone_mgmt_errno_to_nvme_status(int ret)
+{
+	switch (ret) {
+	case 0:
+		return NVME_SC_SUCCESS;
+	case -EINVAL:
+	case -EIO:
+		return NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
+	default:
+		return NVME_SC_INTERNAL;
+	}
+}
+
+struct nvmet_zone_mgmt_send_all_data {
+	unsigned long *zbitmap;
+	struct nvmet_req *req;
+};
+
+static int zmgmt_send_scan_cb(struct blk_zone *z, unsigned i, void *d)
+{
+	struct nvmet_zone_mgmt_send_all_data *data = d;
+
+	switch (zsa_req_op(data->req->cmd->zms.zsa)) {
+	case REQ_OP_ZONE_OPEN:
+		switch (z->cond) {
+		case BLK_ZONE_COND_CLOSED:
+			break;
+		default:
+			return 0;
+		}
+		break;
+	case REQ_OP_ZONE_CLOSE:
+		switch (z->cond) {
+		case BLK_ZONE_COND_IMP_OPEN:
+		case BLK_ZONE_COND_EXP_OPEN:
+			break;
+		default:
+			return 0;
+		}
+		break;
+	case REQ_OP_ZONE_FINISH:
+		switch (z->cond) {
+		case BLK_ZONE_COND_IMP_OPEN:
+		case BLK_ZONE_COND_EXP_OPEN:
+		case BLK_ZONE_COND_CLOSED:
+			break;
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	set_bit(i, data->zbitmap);
+
+	return 0;
+}
+
+static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req)
+{
+	struct block_device *bdev = req->ns->bdev;
+	unsigned int nr_zones = blkdev_nr_zones(bdev->bd_disk);
+	struct request_queue *q = bdev_get_queue(bdev);
+	struct bio *bio = NULL;
+	sector_t sector = 0;
+	int ret;
+	struct nvmet_zone_mgmt_send_all_data d = {
+		.req = req,
+	};
+
+	d.zbitmap = kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(*(d.zbitmap)),
+				 GFP_NOIO, q->node);
+	if (!d.zbitmap) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Scan and build bitmap of the eligible zones */
+	ret = blkdev_report_zones(bdev, 0, nr_zones, zmgmt_send_scan_cb, &d);
+	if (ret != nr_zones) {
+		if (ret > 0)
+			ret = -EIO;
+		goto out;
+	} else {
+		/* We scanned all the zones */
+		ret = 0;
+	}
+
+	while (sector < get_capacity(bdev->bd_disk)) {
+		if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) {
+			bio = blk_next_bio(bio, 0, GFP_KERNEL);
+			bio->bi_opf = zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC;
+			bio->bi_iter.bi_sector = sector;
+			bio_set_dev(bio, bdev);
+			/* This may take a while, so be nice to others */
+			cond_resched();
+		}
+		sector += blk_queue_zone_sectors(q);
+	}
+
+	if (bio) {
+		ret = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+
+out:
+	kfree(d.zbitmap);
+
+	return blkdev_zone_mgmt_errno_to_nvme_status(ret);
+}
+
+static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req)
+{
+	int ret;
+
+	switch (zsa_req_op(req->cmd->zms.zsa)) {
+	case REQ_OP_ZONE_RESET:
+		ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0,
+				       get_capacity(req->ns->bdev->bd_disk),
+				       GFP_KERNEL);
+		if (ret < 0)
+			return blkdev_zone_mgmt_errno_to_nvme_status(ret);
+		break;
+	case REQ_OP_ZONE_OPEN:
+	case REQ_OP_ZONE_CLOSE:
+	case REQ_OP_ZONE_FINISH:
+		return nvmet_bdev_zone_mgmt_emulate_all(req);
+	default:
+		/* this is needed to quiet compiler warning */
+		req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
+		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+	}
+
+	return NVME_SC_SUCCESS;
+}
+
+static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work);
+	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
+	enum req_opf op = zsa_req_op(req->cmd->zms.zsa);
+	struct block_device *bdev = req->ns->bdev;
+	sector_t zone_sectors = bdev_zone_sectors(bdev);
+	u16 status = NVME_SC_SUCCESS;
+	int ret;
+
+	if (op == REQ_OP_LAST) {
+		req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
+		status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
+		goto out;
+	}
+
+	/* when select all bit is set slba field is ignored */
+	if (req->cmd->zms.select_all) {
+		status = nvmet_bdev_execute_zmgmt_send_all(req);
+		goto out;
+	}
+
+	if (sect >= get_capacity(bdev->bd_disk)) {
+		req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
+		status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+		goto out;
+	}
+
+	if (sect & (zone_sectors - 1)) {
+		req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto out;
+	}
+
+	ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL);
+	if (ret < 0)
+		status = blkdev_zone_mgmt_errno_to_nvme_status(ret);
+
+out:
+	nvmet_req_complete(req, status);
+}
+
+void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
+{
+	INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zmgmt_send_work);
+	queue_work(zbd_wq, &req->z.zmgmt_work);
+}
+
+static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
+{
+	struct nvmet_req *req = bio->bi_private;
+
+	if (bio->bi_status == BLK_STS_OK) {
+		req->cqe->result.u64 =
+			nvmet_sect_to_lba(req->ns, bio->bi_iter.bi_sector);
+	}
+
+	nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
+	nvmet_req_bio_put(req, bio);
+}
+
+void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
+{
+	sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
+	u16 status = NVME_SC_SUCCESS;
+	unsigned int total_len = 0;
+	struct scatterlist *sg;
+	struct bio *bio;
+	int sg_cnt;
+
+	/* Request is completed on len mismatch in nvmet_check_transter_len() */
+	if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
+		return;
+
+	if (!req->sg_cnt) {
+		nvmet_req_complete(req, 0);
+		return;
+	}
+
+	if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
+		req->error_loc = offsetof(struct nvme_rw_command, slba);
+		status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+		goto out;
+	}
+
+	if (sect & (bdev_zone_sectors(req->ns->bdev) - 1)) {
+		req->error_loc = offsetof(struct nvme_rw_command, slba);
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto out;
+	}
+
+	if (nvmet_use_inline_bvec(req)) {
+		bio = &req->z.inline_bio;
+		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+	} else {
+		bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
+	}
+
+	bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
+	bio->bi_end_io = nvmet_bdev_zone_append_bio_done;
+	bio_set_dev(bio, req->ns->bdev);
+	bio->bi_iter.bi_sector = sect;
+	bio->bi_private = req;
+	if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+		bio->bi_opf |= REQ_FUA;
+
+	for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
+		struct page *p = sg_page(sg);
+		unsigned int l = sg->length;
+		unsigned int o = sg->offset;
+		unsigned int ret;
+
+		ret = bio_add_zone_append_page(bio, p, l, o);
+		if (ret != sg->length) {
+			status = NVME_SC_INTERNAL;
+			goto out_put_bio;
+		}
+		total_len += sg->length;
+	}
+
+	if (total_len != nvmet_rw_data_len(req)) {
+		status = NVME_SC_INTERNAL | NVME_SC_DNR;
+		goto out_put_bio;
+	}
+
+	submit_bio(bio);
+	return;
+
+out_put_bio:
+	nvmet_req_bio_put(req, bio);
+out:
+	nvmet_req_complete(req, status);
+}
+
+u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	switch (cmd->common.opcode) {
+	case nvme_cmd_zone_append:
+		req->execute = nvmet_bdev_execute_zone_append;
+		return 0;
+	case nvme_cmd_zone_mgmt_recv:
+		req->execute = nvmet_bdev_execute_zone_mgmt_recv;
+		return 0;
+	case nvme_cmd_zone_mgmt_send:
+		req->execute = nvmet_bdev_execute_zone_mgmt_send;
+		return 0;
+	default:
+		return nvmet_bdev_parse_io_cmd(req);
+	}
+}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c7ba83144d52..cb1197f1cfed 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -944,6 +944,13 @@ struct nvme_zone_mgmt_recv_cmd {
 enum {
 	NVME_ZRA_ZONE_REPORT		= 0,
 	NVME_ZRASF_ZONE_REPORT_ALL	= 0,
+	NVME_ZRASF_ZONE_STATE_EMPTY	= 0x01,
+	NVME_ZRASF_ZONE_STATE_IMP_OPEN	= 0x02,
+	NVME_ZRASF_ZONE_STATE_EXP_OPEN	= 0x03,
+	NVME_ZRASF_ZONE_STATE_CLOSED	= 0x04,
+	NVME_ZRASF_ZONE_STATE_READONLY	= 0x05,
+	NVME_ZRASF_ZONE_STATE_FULL	= 0x06,
+	NVME_ZRASF_ZONE_STATE_OFFLINE	= 0x07,
 	NVME_REPORT_ZONE_PARTIAL	= 1,
 };
 

From 8abd7e2a753ad5ae59c3ca918e71f437c0c4b344 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 16 Jun 2021 15:15:51 -0700
Subject: [PATCH 2055/3292] nvmet: remove zeroout memset call for struct

Declare and initialize structure variables to zero values so that we can
remove zeroout memset calls in the target/rdma.c.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 7d607f435e36..891174ccd44b 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1257,7 +1257,7 @@ out_err:
 
 static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 {
-	struct ib_qp_init_attr qp_attr;
+	struct ib_qp_init_attr qp_attr = { };
 	struct nvmet_rdma_device *ndev = queue->dev;
 	int nr_cqe, ret, i, factor;
 
@@ -1275,7 +1275,6 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 		goto out;
 	}
 
-	memset(&qp_attr, 0, sizeof(qp_attr));
 	qp_attr.qp_context = queue;
 	qp_attr.event_handler = nvmet_rdma_qp_event;
 	qp_attr.send_cq = queue->cq;

From f66e2804d61aef690bb428d8de6a127f844bb240 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 16 Jun 2021 15:15:53 -0700
Subject: [PATCH 2056/3292] nvme-pci: remove zeroout memset call for struct

Declare and initialize structure variables to zero values so that we can
remove zeroout memset calls in the host/pci.c.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 12ffd58c27b1..d3c5086673bc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -307,13 +307,12 @@ static void nvme_dbbuf_free(struct nvme_queue *nvmeq)
 
 static void nvme_dbbuf_set(struct nvme_dev *dev)
 {
-	struct nvme_command c;
+	struct nvme_command c = { };
 	unsigned int i;
 
 	if (!dev->dbbuf_dbs)
 		return;
 
-	memset(&c, 0, sizeof(c));
 	c.dbbuf.opcode = nvme_admin_dbbuf;
 	c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr);
 	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
@@ -1112,9 +1111,8 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
 	struct nvme_queue *nvmeq = &dev->queues[0];
-	struct nvme_command c;
+	struct nvme_command c = { };
 
-	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_async_event;
 	c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
 	nvme_submit_cmd(nvmeq, &c, true);
@@ -1122,9 +1120,8 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
 
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
-	struct nvme_command c;
+	struct nvme_command c = { };
 
-	memset(&c, 0, sizeof(c));
 	c.delete_queue.opcode = opcode;
 	c.delete_queue.qid = cpu_to_le16(id);
 
@@ -1134,7 +1131,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 		struct nvme_queue *nvmeq, s16 vector)
 {
-	struct nvme_command c;
+	struct nvme_command c = { };
 	int flags = NVME_QUEUE_PHYS_CONTIG;
 
 	if (!test_bit(NVMEQ_POLLED, &nvmeq->flags))
@@ -1144,7 +1141,6 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	 * Note: we (ab)use the fact that the prp fields survive if no data
 	 * is attached to the request.
 	 */
-	memset(&c, 0, sizeof(c));
 	c.create_cq.opcode = nvme_admin_create_cq;
 	c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
 	c.create_cq.cqid = cpu_to_le16(qid);
@@ -1159,7 +1155,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 						struct nvme_queue *nvmeq)
 {
 	struct nvme_ctrl *ctrl = &dev->ctrl;
-	struct nvme_command c;
+	struct nvme_command c = { };
 	int flags = NVME_QUEUE_PHYS_CONTIG;
 
 	/*
@@ -1174,7 +1170,6 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	 * Note: we (ab)use the fact that the prp fields survive if no data
 	 * is attached to the request.
 	 */
-	memset(&c, 0, sizeof(c));
 	c.create_sq.opcode = nvme_admin_create_sq;
 	c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
 	c.create_sq.sqid = cpu_to_le16(qid);
@@ -1255,7 +1250,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_queue *nvmeq = iod->nvmeq;
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *abort_req;
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 	u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
 	/* If PCI error recovery process is happening, we cannot reset or
@@ -1335,7 +1330,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	}
 	iod->aborted = 1;
 
-	memset(&cmd, 0, sizeof(cmd));
 	cmd.abort.opcode = nvme_admin_abort_cmd;
 	cmd.abort.cid = req->tag;
 	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
@@ -1886,10 +1880,9 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
 {
 	u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT;
 	u64 dma_addr = dev->host_mem_descs_dma;
-	struct nvme_command c;
+	struct nvme_command c = { };
 	int ret;
 
-	memset(&c, 0, sizeof(c));
 	c.features.opcode	= nvme_admin_set_features;
 	c.features.fid		= cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
 	c.features.dword11	= cpu_to_le32(bits);
@@ -2263,9 +2256,8 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
 {
 	struct request_queue *q = nvmeq->dev->ctrl.admin_q;
 	struct request *req;
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 
-	memset(&cmd, 0, sizeof(cmd));
 	cmd.delete_queue.opcode = opcode;
 	cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
 

From cc72c4426764d1716839e9ec591ee8e161ed5cbc Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Wed, 16 Jun 2021 15:15:52 -0700
Subject: [PATCH 2057/3292] nvme: remove zeroout memset call for struct

Declare and initialize structure variables to zero values so that we can
remove zeroout memset calls in the host/core.c.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 177cae44b612..c7ef0b6684b5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -721,9 +721,7 @@ EXPORT_SYMBOL_GPL(__nvme_check_ready);
 
 static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
 {
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
+	struct nvme_command c = { };
 
 	c.directive.opcode = nvme_admin_directive_send;
 	c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
@@ -748,9 +746,8 @@ static int nvme_enable_streams(struct nvme_ctrl *ctrl)
 static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
 				  struct streams_directive_params *s, u32 nsid)
 {
-	struct nvme_command c;
+	struct nvme_command c = { };
 
-	memset(&c, 0, sizeof(c));
 	memset(s, 0, sizeof(*s));
 
 	c.directive.opcode = nvme_admin_directive_recv;
@@ -1460,10 +1457,9 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
 		unsigned int dword11, void *buffer, size_t buflen, u32 *result)
 {
 	union nvme_result res = { 0 };
-	struct nvme_command c;
+	struct nvme_command c = { };
 	int ret;
 
-	memset(&c, 0, sizeof(c));
 	c.features.opcode = op;
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
@@ -1591,9 +1587,8 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
 				u32 max_integrity_segments)
 {
-	struct blk_integrity integrity;
+	struct blk_integrity integrity = { };
 
-	memset(&integrity, 0, sizeof(integrity));
 	switch (pi_type) {
 	case NVME_NS_DPS_PI_TYPE3:
 		integrity.profile = &t10_pi_type3_crc;
@@ -1964,13 +1959,12 @@ static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
 static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
 				u64 key, u64 sa_key, u8 op)
 {
-	struct nvme_command c;
+	struct nvme_command c = { };
 	u8 data[16] = { 0, };
 
 	put_unaligned_le64(key, &data[0]);
 	put_unaligned_le64(sa_key, &data[8]);
 
-	memset(&c, 0, sizeof(c));
 	c.common.opcode = op;
 	c.common.cdw10 = cpu_to_le32(cdw10);
 
@@ -2042,9 +2036,8 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
 		bool send)
 {
 	struct nvme_ctrl *ctrl = data;
-	struct nvme_command cmd;
+	struct nvme_command cmd = { };
 
-	memset(&cmd, 0, sizeof(cmd));
 	if (send)
 		cmd.common.opcode = nvme_admin_security_send;
 	else

From 8cf486e131b351db4f224078bef8e1efedcf0340 Mon Sep 17 00:00:00 2001
From: Wesley Sheng <wesley.sheng@amd.com>
Date: Wed, 16 Jun 2021 13:25:08 +0800
Subject: [PATCH 2058/3292] nvme.h: add missing nvme_lba_range_type endianness
 annotations

Signed-off-by: Wesley Sheng <wesley.sheng@amd.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index cb1197f1cfed..b7c4c4130b65 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -636,8 +636,8 @@ struct nvme_lba_range_type {
 	__u8			type;
 	__u8			attributes;
 	__u8			rsvd2[14];
-	__u64			slba;
-	__u64			nlb;
+	__le64			slba;
+	__le64			nlb;
 	__u8			guid[16];
 	__u8			rsvd48[16];
 };

From 2d0795148a5a7dd33999daf600eb6fdeffabd6ba Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Jun 2021 16:06:52 +0200
Subject: [PATCH 2059/3292] ACPI: scan: Define acpi_bus_put_acpi_device() as
 static inline

Since acpi_bus_put_acpi_device() is a synonym for acpi_dev_put(),
define it as static inline in analogy with the latter.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/acpi/scan.c     | 5 -----
 include/acpi/acpi_bus.h | 9 +++++++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index f64119a17382..75b6bbc717b0 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -606,11 +606,6 @@ struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle)
 	return handle_to_device(handle, get_acpi_device);
 }
 
-void acpi_bus_put_acpi_device(struct acpi_device *adev)
-{
-	acpi_dev_put(adev);
-}
-
 static struct acpi_device_bus_id *acpi_device_bus_id_match(const char *dev_id)
 {
 	struct acpi_device_bus_id *acpi_device_bus_id;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 4bed30e61c5b..d8e7235b4cf0 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -504,8 +504,6 @@ extern int unregister_acpi_notifier(struct notifier_block *);
  */
 
 int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
-struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle);
-void acpi_bus_put_acpi_device(struct acpi_device *adev);
 acpi_status acpi_bus_get_status_handle(acpi_handle handle,
 				       unsigned long long *sta);
 int acpi_bus_get_status(struct acpi_device *device);
@@ -726,6 +724,13 @@ static inline void acpi_dev_put(struct acpi_device *adev)
 {
 	put_device(&adev->dev);
 }
+
+struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle);
+
+static inline void acpi_bus_put_acpi_device(struct acpi_device *adev)
+{
+	acpi_dev_put(adev);
+}
 #else	/* CONFIG_ACPI */
 
 static inline int register_acpi_bus_type(void *bus) { return 0; }

From ad4d451e14e58792e9b7c8a4bfc3276f0128e94a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Jun 2021 16:21:51 +0200
Subject: [PATCH 2060/3292] ACPI: scan: Rearrange
 acpi_dev_get_first_consumer_dev_cb()

Make acpi_dev_get_first_consumer_dev_cb() a bit more straightforward
and rewrite the comment in it.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/scan.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 75b6bbc717b0..524d85dc540c 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2101,13 +2101,12 @@ static int acpi_dev_get_first_consumer_dev_cb(struct acpi_dep_data *dep, void *d
 	struct acpi_device *adev;
 
 	adev = acpi_bus_get_acpi_device(dep->consumer);
-	if (!adev)
-		/* If we don't find an adev then we want to continue parsing */
-		return 0;
-
-	*(struct acpi_device **)data = adev;
-
-	return 1;
+	if (adev) {
+		*(struct acpi_device **)data = adev;
+		return 1;
+	}
+	/* Continue parsing if the device object is not present. */
+	return 0;
 }
 
 static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)

From aff0dbd03d3b750e2331f7cb93e01fe25ed27086 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Jun 2021 16:22:50 +0200
Subject: [PATCH 2061/3292] ACPI: scan: Make acpi_walk_dep_device_list()

Because acpi_walk_dep_device_list() is only called by the code in the
file in which it is defined, make it static, drop the export of it
and drop its header from acpi.h.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/scan.c  | 7 +++----
 include/linux/acpi.h | 3 ---
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 524d85dc540c..b7f9b7ac0d04 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2139,9 +2139,9 @@ static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
  * negative value is returned by the callback then the loop is broken and that
  * value is returned as the final error.
  */
-int acpi_walk_dep_device_list(acpi_handle handle,
-			      int (*callback)(struct acpi_dep_data *, void *),
-			      void *data)
+static int acpi_walk_dep_device_list(acpi_handle handle,
+				int (*callback)(struct acpi_dep_data *, void *),
+				void *data)
 {
 	struct acpi_dep_data *dep, *tmp;
 	int ret = 0;
@@ -2158,7 +2158,6 @@ int acpi_walk_dep_device_list(acpi_handle handle,
 
 	return ret > 0 ? 0 : ret;
 }
-EXPORT_SYMBOL_GPL(acpi_walk_dep_device_list);
 
 /**
  * acpi_dev_clear_dependencies - Inform consumers that the device is now active
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 170b9bebdb2b..0a6d2845fcaf 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -666,9 +666,6 @@ extern bool acpi_driver_match_device(struct device *dev,
 				     const struct device_driver *drv);
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
-int acpi_walk_dep_device_list(acpi_handle handle,
-			      int (*callback)(struct acpi_dep_data *, void *),
-			      void *data);
 
 struct platform_device *acpi_create_platform_device(struct acpi_device *,
 						    struct property_entry *);

From dc612486c91983a113adefedac030575ea7a4c4a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Jun 2021 16:23:44 +0200
Subject: [PATCH 2062/3292] ACPI: scan: Fix device object rescan in
 acpi_scan_clear_dep()

In general, acpi_bus_attach() can only be run safely under
acpi_scan_lock, but that lock cannot be acquired under
acpi_dep_list_lock, so make acpi_scan_clear_dep() schedule deferred
execution of acpi_bus_attach() under acpi_scan_lock instead of
calling it directly.

This also fixes a possible race between acpi_scan_clear_dep() and
device removal that might cause a device object that went away to
be accessed, because acpi_scan_clear_dep() is changed to acquire
a reference on the consumer device object.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/scan.c | 50 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index b7f9b7ac0d04..b0bdd9b90e44 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2109,16 +2109,56 @@ static int acpi_dev_get_first_consumer_dev_cb(struct acpi_dep_data *dep, void *d
 	return 0;
 }
 
+struct acpi_scan_clear_dep_work {
+	struct work_struct work;
+	struct acpi_device *adev;
+};
+
+static void acpi_scan_clear_dep_fn(struct work_struct *work)
+{
+	struct acpi_scan_clear_dep_work *cdw;
+
+	cdw = container_of(work, struct acpi_scan_clear_dep_work, work);
+
+	acpi_scan_lock_acquire();
+	acpi_bus_attach(cdw->adev, true);
+	acpi_scan_lock_release();
+
+	acpi_dev_put(cdw->adev);
+	kfree(cdw);
+}
+
+static bool acpi_scan_clear_dep_queue(struct acpi_device *adev)
+{
+	struct acpi_scan_clear_dep_work *cdw;
+
+	if (adev->dep_unmet)
+		return false;
+
+	cdw = kmalloc(sizeof(*cdw), GFP_KERNEL);
+	if (!cdw)
+		return false;
+
+	cdw->adev = adev;
+	INIT_WORK(&cdw->work, acpi_scan_clear_dep_fn);
+	/*
+	 * Since the work function may block on the lock until the entire
+	 * initial enumeration of devices is complete, put it into the unbound
+	 * workqueue.
+	 */
+	queue_work(system_unbound_wq, &cdw->work);
+
+	return true;
+}
+
 static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
 {
-	struct acpi_device *adev;
-
-	acpi_bus_get_device(dep->consumer, &adev);
+	struct acpi_device *adev = acpi_bus_get_acpi_device(dep->consumer);
 
 	if (adev) {
 		adev->dep_unmet--;
-		if (!adev->dep_unmet)
-			acpi_bus_attach(adev, true);
+		if (!acpi_scan_clear_dep_queue(adev))
+			acpi_dev_put(adev);
 	}
 
 	list_del(&dep->node);

From c6a493a1b603ed216ce69d1faac3f0ddc6a2f8eb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Jun 2021 16:24:30 +0200
Subject: [PATCH 2063/3292] ACPI: scan: Reorganize acpi_device_add()

Move the invocation of acpi_attach_data() in acpi_device_add()
into a separate function.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/scan.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index b0bdd9b90e44..c62ce287fdb9 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -633,23 +633,32 @@ static int acpi_device_set_name(struct acpi_device *device,
 	return 0;
 }
 
+static int acpi_tie_acpi_dev(struct acpi_device *adev)
+{
+	acpi_handle handle = adev->handle;
+	acpi_status status;
+
+	if (!handle)
+		return 0;
+
+	status = acpi_attach_data(handle, acpi_scan_drop_device, adev);
+	if (ACPI_FAILURE(status)) {
+		acpi_handle_err(handle, "Unable to attach device data\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 int acpi_device_add(struct acpi_device *device,
 		    void (*release)(struct device *))
 {
 	struct acpi_device_bus_id *acpi_device_bus_id;
 	int result;
 
-	if (device->handle) {
-		acpi_status status;
-
-		status = acpi_attach_data(device->handle, acpi_scan_drop_device,
-					  device);
-		if (ACPI_FAILURE(status)) {
-			acpi_handle_err(device->handle,
-					"Unable to attach device data\n");
-			return -ENODEV;
-		}
-	}
+	result = acpi_tie_acpi_dev(device);
+	if (result)
+		return result;
 
 	/*
 	 * Linkage

From 5f4ce26078fde9cd406c008ba35e31bbb26a23a1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 17 Jun 2021 15:57:07 +0200
Subject: [PATCH 2064/3292] ACPI: scan: Fix race related to dropping
 dependencies

If acpi_add_single_object() runs concurrently with respect to
acpi_scan_clear_dep() which deletes a dependencies list entry where
the device being added is the consumer, the device's dep_unmet
counter may not be updated to reflect that change.

Namely, if the dependencies list entry is deleted right after
calling acpi_scan_dep_init() and before calling acpi_device_add(),
acpi_scan_clear_dep() will not find the device object corresponding
to the consumer device ACPI handle and it will not update its
dep_unmet counter to reflect the deletion of the list entry.
Consequently, the dep_unmet counter of the device will never
become zero going forward which may prevent it from being
completely enumerated.

To address this problem, modify acpi_add_single_object() to run
acpi_tie_acpi_dev(), to attach the ACPI device object created by it
to the corresponding ACPI namespace node, under acpi_dep_list_lock
along with acpi_scan_dep_init() whenever the latter is called.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/acpi/scan.c | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index c62ce287fdb9..1c6205661000 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -650,16 +650,12 @@ static int acpi_tie_acpi_dev(struct acpi_device *adev)
 	return 0;
 }
 
-int acpi_device_add(struct acpi_device *device,
-		    void (*release)(struct device *))
+static int __acpi_device_add(struct acpi_device *device,
+			     void (*release)(struct device *))
 {
 	struct acpi_device_bus_id *acpi_device_bus_id;
 	int result;
 
-	result = acpi_tie_acpi_dev(device);
-	if (result)
-		return result;
-
 	/*
 	 * Linkage
 	 * -------
@@ -748,6 +744,17 @@ err_unlock:
 	return result;
 }
 
+int acpi_device_add(struct acpi_device *adev, void (*release)(struct device *))
+{
+	int ret;
+
+	ret = acpi_tie_acpi_dev(adev);
+	if (ret)
+		return ret;
+
+	return __acpi_device_add(adev, release);
+}
+
 /* --------------------------------------------------------------------------
                                  Device Enumeration
    -------------------------------------------------------------------------- */
@@ -1675,14 +1682,10 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
 {
 	struct acpi_dep_data *dep;
 
-	mutex_lock(&acpi_dep_list_lock);
-
 	list_for_each_entry(dep, &acpi_dep_list, node) {
 		if (dep->consumer == adev->handle)
 			adev->dep_unmet++;
 	}
-
-	mutex_unlock(&acpi_dep_list_lock);
 }
 
 void acpi_device_add_finalize(struct acpi_device *device)
@@ -1701,6 +1704,7 @@ static int acpi_add_single_object(struct acpi_device **child,
 				  acpi_handle handle, int type, bool dep_init)
 {
 	struct acpi_device *device;
+	bool release_dep_lock = false;
 	int result;
 
 	device = kzalloc(sizeof(struct acpi_device), GFP_KERNEL);
@@ -1714,16 +1718,31 @@ static int acpi_add_single_object(struct acpi_device **child,
 	 * this must be done before the get power-/wakeup_dev-flags calls.
 	 */
 	if (type == ACPI_BUS_TYPE_DEVICE || type == ACPI_BUS_TYPE_PROCESSOR) {
-		if (dep_init)
+		if (dep_init) {
+			mutex_lock(&acpi_dep_list_lock);
+			/*
+			 * Hold the lock until the acpi_tie_acpi_dev() call
+			 * below to prevent concurrent acpi_scan_clear_dep()
+			 * from deleting a dependency list entry without
+			 * updating dep_unmet for the device.
+			 */
+			release_dep_lock = true;
 			acpi_scan_dep_init(device);
-
+		}
 		acpi_scan_init_status(device);
 	}
 
 	acpi_bus_get_power_flags(device);
 	acpi_bus_get_wakeup_device_flags(device);
 
-	result = acpi_device_add(device, acpi_device_release);
+	result = acpi_tie_acpi_dev(device);
+
+	if (release_dep_lock)
+		mutex_unlock(&acpi_dep_list_lock);
+
+	if (!result)
+		result = __acpi_device_add(device, acpi_device_release);
+
 	if (result) {
 		acpi_device_release(&device->dev);
 		return result;

From d0b040f5f2557b2f507c01e88ad8cff424fdc6a9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 12 Apr 2021 12:23:33 +0200
Subject: [PATCH 2065/3292] ext4: fix overflow in ext4_iomap_alloc()

A code in iomap alloc may overflow block number when converting it to
byte offset. Luckily this is mostly harmless as we will just use more
expensive method of writing using unwritten extents even though we are
writing beyond i_size.

Cc: stable@kernel.org
Fixes: 378f32bab371 ("ext4: introduce direct I/O write using iomap infrastructure")
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210412102333.2676-4-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fe6045a46599..211acfba3af7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3418,7 +3418,7 @@ retry:
 	 * i_disksize out to i_size. This could be beyond where direct I/O is
 	 * happening and thus expose allocated blocks to direct I/O reads.
 	 */
-	else if ((map->m_lblk * (1 << blkbits)) >= i_size_read(inode))
+	else if (((loff_t)map->m_lblk << blkbits) >= i_size_read(inode))
 		m_flags = EXT4_GET_BLOCKS_CREATE;
 	else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT;

From 5c680150d7f43484fde6b87271229f2206bfff7c Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Mon, 26 Apr 2021 14:29:47 +0800
Subject: [PATCH 2066/3292] ext4: remove redundant check buffer_uptodate()

Now set_buffer_uptodate() will test first and then set, so we don't have
to check buffer_uptodate() first, remove it to simplify code.

Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619418587-5580-1-git-send-email-joseph.qi@linux.alibaba.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 37002663d521..639ab5405d6a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3784,7 +3784,7 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
 	 * have to read the block because we may read the old data
 	 * successfully.
 	 */
-	if (!buffer_uptodate(bh) && buffer_write_io_error(bh))
+	if (buffer_write_io_error(bh))
 		set_buffer_uptodate(bh);
 	return buffer_uptodate(bh);
 }

From 1fc57ca5a2cd26e0a526e5eb2b0fc0c054117a5b Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 29 Apr 2021 18:16:49 +0800
Subject: [PATCH 2067/3292] ext4: remove redundant assignment to error

Variable error is set to zero but this value is never read as it's not
used later on, hence it is a redundant assignment and can be removed.

Cleans up the following clang-analyzer warning:

fs/ext4/ioctl.c:657:3: warning: Value stored to 'error' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619691409-83160-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ioctl.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 31627f7dc5cd..a96d6721cef9 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -659,10 +659,9 @@ static int ext4_ioc_getfsmap(struct super_block *sb,
 	info.gi_sb = sb;
 	info.gi_data = arg;
 	error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info);
-	if (error == EXT4_QUERY_RANGE_ABORT) {
-		error = 0;
+	if (error == EXT4_QUERY_RANGE_ABORT)
 		aborted = true;
-	} else if (error)
+	else if (error)
 		return error;
 
 	/* If we didn't abort, set the "last" flag in the last fmx */

From 618f003199c6188e01472b03cdbba227f1dc5f24 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 30 Apr 2021 21:50:46 +0300
Subject: [PATCH 2068/3292] ext4: fix memory leak in ext4_fill_super

static int kthread(void *_create) will return -ENOMEM
or -EINTR in case of internal failure or
kthread_stop() call happens before threadfn call.

To prevent fancy error checking and make code
more straightforward we moved all cleanup code out
of kmmpd threadfn.

Also, dropped struct mmpd_data at all. Now struct super_block
is a threadfn data and struct buffer_head embedded into
struct ext4_sb_info.

Reported-by: syzbot+d9e482e303930fa4f6ff@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20210430185046.15742-1-paskripkin@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h  |  4 ++++
 fs/ext4/mmp.c   | 28 +++++++++++++---------------
 fs/ext4/super.c | 10 ++++------
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 639ab5405d6a..2b51cbee3907 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1488,6 +1488,7 @@ struct ext4_sb_info {
 	struct kobject s_kobj;
 	struct completion s_kobj_unregister;
 	struct super_block *s_sb;
+	struct buffer_head *s_mmp_bh;
 
 	/* Journaling */
 	struct journal_s *s_journal;
@@ -3720,6 +3721,9 @@ extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
 
+/* mmp.c */
+extern void ext4_stop_mmpd(struct ext4_sb_info *sbi);
+
 /* verity.c */
 extern const struct fsverity_operations ext4_verityops;
 
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 68fbeedd627b..6cb598b549ca 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -127,9 +127,9 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
  */
 static int kmmpd(void *data)
 {
-	struct super_block *sb = ((struct mmpd_data *) data)->sb;
-	struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
+	struct super_block *sb = (struct super_block *) data;
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+	struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh;
 	struct mmp_struct *mmp;
 	ext4_fsblk_t mmp_block;
 	u32 seq = 0;
@@ -245,12 +245,18 @@ static int kmmpd(void *data)
 	retval = write_mmp_block(sb, bh);
 
 exit_thread:
-	EXT4_SB(sb)->s_mmp_tsk = NULL;
-	kfree(data);
-	brelse(bh);
 	return retval;
 }
 
+void ext4_stop_mmpd(struct ext4_sb_info *sbi)
+{
+	if (sbi->s_mmp_tsk) {
+		kthread_stop(sbi->s_mmp_tsk);
+		brelse(sbi->s_mmp_bh);
+		sbi->s_mmp_tsk = NULL;
+	}
+}
+
 /*
  * Get a random new sequence number but make sure it is not greater than
  * EXT4_MMP_SEQ_MAX.
@@ -275,7 +281,6 @@ int ext4_multi_mount_protect(struct super_block *sb,
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct buffer_head *bh = NULL;
 	struct mmp_struct *mmp = NULL;
-	struct mmpd_data *mmpd_data;
 	u32 seq;
 	unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 	unsigned int wait_time = 0;
@@ -364,24 +369,17 @@ skip:
 		goto failed;
 	}
 
-	mmpd_data = kmalloc(sizeof(*mmpd_data), GFP_KERNEL);
-	if (!mmpd_data) {
-		ext4_warning(sb, "not enough memory for mmpd_data");
-		goto failed;
-	}
-	mmpd_data->sb = sb;
-	mmpd_data->bh = bh;
+	EXT4_SB(sb)->s_mmp_bh = bh;
 
 	/*
 	 * Start a kernel thread to update the MMP block periodically.
 	 */
-	EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s",
+	EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s",
 					     (int)sizeof(mmp->mmp_bdevname),
 					     bdevname(bh->b_bdev,
 						      mmp->mmp_bdevname));
 	if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 		EXT4_SB(sb)->s_mmp_tsk = NULL;
-		kfree(mmpd_data);
 		ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 			     sb->s_id);
 		goto failed;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d29f6aa7d96e..b6fe1a027c78 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1245,8 +1245,8 @@ static void ext4_put_super(struct super_block *sb)
 	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
 	sbi->s_ea_block_cache = NULL;
 
-	if (sbi->s_mmp_tsk)
-		kthread_stop(sbi->s_mmp_tsk);
+	ext4_stop_mmpd(sbi);
+
 	brelse(sbi->s_sbh);
 	sb->s_fs_info = NULL;
 	/*
@@ -5186,8 +5186,7 @@ failed_mount3a:
 failed_mount3:
 	flush_work(&sbi->s_error_work);
 	del_timer_sync(&sbi->s_err_report);
-	if (sbi->s_mmp_tsk)
-		kthread_stop(sbi->s_mmp_tsk);
+	ext4_stop_mmpd(sbi);
 failed_mount2:
 	rcu_read_lock();
 	group_desc = rcu_dereference(sbi->s_group_desc);
@@ -5989,8 +5988,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 				 */
 				ext4_mark_recovery_complete(sb, es);
 			}
-			if (sbi->s_mmp_tsk)
-				kthread_stop(sbi->s_mmp_tsk);
+			ext4_stop_mmpd(sbi);
 		} else {
 			/* Make sure we can mount this feature set readwrite */
 			if (ext4_has_feature_readonly(sb) ||

From ce3aba43599f0b50adbebff133df8d08a3d5fffe Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Fri, 7 May 2021 00:26:54 +0530
Subject: [PATCH 2069/3292] ext4: fix kernel infoleak via ext4_extent_header

Initialize eh_generation of struct ext4_extent_header to prevent leaking
info to userspace. Fixes KMSAN kernel-infoleak bug reported by syzbot at:
http://syzkaller.appspot.com/bug?id=78e9ad0e6952a3ca16e8234724b2fa92d041b9b8

Cc: stable@kernel.org
Reported-by: syzbot+2dcfeaf8cb49b05e8f1a@syzkaller.appspotmail.com
Fixes: a86c61812637 ("[PATCH] ext3: add extent map support")
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210506185655.7118-1-mail@anirudhrb.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/extents.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cbf37b2cf871..1293de50c8d4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -825,6 +825,7 @@ void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 	eh->eh_entries = 0;
 	eh->eh_magic = EXT4_EXT_MAGIC;
 	eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
+	eh->eh_generation = 0;
 	ext4_mark_inode_dirty(handle, inode);
 }
 
@@ -1090,6 +1091,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
 	neh->eh_magic = EXT4_EXT_MAGIC;
 	neh->eh_depth = 0;
+	neh->eh_generation = 0;
 
 	/* move remainder of path[depth] to the new leaf */
 	if (unlikely(path[depth].p_hdr->eh_entries !=
@@ -1167,6 +1169,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		neh->eh_magic = EXT4_EXT_MAGIC;
 		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
 		neh->eh_depth = cpu_to_le16(depth - i);
+		neh->eh_generation = 0;
 		fidx = EXT_FIRST_INDEX(neh);
 		fidx->ei_block = border;
 		ext4_idx_store_pblock(fidx, oldblock);

From b9a037b7f3c401d3c63e0423e56aef606b1ffaaf Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Fri, 7 May 2021 15:19:04 +0800
Subject: [PATCH 2070/3292] ext4: cleanup in-core orphan list if
 ext4_truncate() failed to get a transaction handle

In ext4_orphan_cleanup(), if ext4_truncate() failed to get a transaction
handle, it didn't remove the inode from the in-core orphan list, which
may probably trigger below error dump in ext4_destroy_inode() during the
final iput() and could lead to memory corruption on the later orphan
list changes.

 EXT4-fs (sda): Inode 6291467 (00000000b8247c67): orphan list check failed!
 00000000b8247c67: 0001f30a 00000004 00000000 00000023  ............#...
 00000000e24cde71: 00000006 014082a3 00000000 00000000  ......@.........
 0000000072c6a5ee: 00000000 00000000 00000000 00000000  ................
 ...

This patch fix this by cleanup in-core orphan list manually if
ext4_truncate() return error.

Cc: stable@kernel.org
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210507071904.160808-1-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b6fe1a027c78..7e05973d410a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3101,8 +3101,15 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 			inode_lock(inode);
 			truncate_inode_pages(inode->i_mapping, inode->i_size);
 			ret = ext4_truncate(inode);
-			if (ret)
+			if (ret) {
+				/*
+				 * We need to clean up the in-core orphan list
+				 * manually if ext4_truncate() failed to get a
+				 * transaction handle.
+				 */
+				ext4_orphan_del(NULL, inode);
 				ext4_std_error(inode->i_sb, ret);
+			}
 			inode_unlock(inode);
 			nr_truncates++;
 		} else {

From 8f6840c4fd1e7bd715e403074fb161c1a04cda73 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 10 May 2021 19:10:51 +0800
Subject: [PATCH 2071/3292] ext4: return error code when ext4_fill_flex_info()
 fails

After commit c89128a00838 ("ext4: handle errors on
ext4_commit_super"), 'ret' may be set to 0 before calling
ext4_fill_flex_info(), if ext4_fill_flex_info() fails ext4_mount()
doesn't return error code, it makes 'root' is null which causes crash
in legacy_get_tree().

Fixes: c89128a00838 ("ext4: handle errors on ext4_commit_super")
Reported-by: Hulk Robot <hulkci@huawei.com>
Cc: <stable@vger.kernel.org> # v4.18+
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20210510111051.55650-1-yangyingliang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7e05973d410a..3b6203543607 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5065,6 +5065,7 @@ no_journal:
 			ext4_msg(sb, KERN_ERR,
 			       "unable to initialize "
 			       "flex_bg meta info!");
+			ret = -ENOMEM;
 			goto failed_mount6;
 		}
 

From bdd56d7d8931e842775d2e5b93d426a8d1940e33 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Jun 2021 20:03:32 +0300
Subject: [PATCH 2072/3292] ACPI: sysfs: Make sparse happy about address space
 in use

Sparse is not happy about address space in use in acpi_data_show():

drivers/acpi/sysfs.c:428:14: warning: incorrect type in assignment (different address spaces)
drivers/acpi/sysfs.c:428:14:    expected void [noderef] __iomem *base
drivers/acpi/sysfs.c:428:14:    got void *
drivers/acpi/sysfs.c:431:59: warning: incorrect type in argument 4 (different address spaces)
drivers/acpi/sysfs.c:431:59:    expected void const *from
drivers/acpi/sysfs.c:431:59:    got void [noderef] __iomem *base
drivers/acpi/sysfs.c:433:30: warning: incorrect type in argument 1 (different address spaces)
drivers/acpi/sysfs.c:433:30:    expected void *logical_address
drivers/acpi/sysfs.c:433:30:    got void [noderef] __iomem *base

Indeed, acpi_os_map_memory() returns a void pointer with dropped specific
address space. Hence, we don't need to carry out __iomem in acpi_data_show().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index d6626aba4a6a..faab6f1c6165 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -420,7 +420,7 @@ static ssize_t acpi_data_show(struct file *filp, struct kobject *kobj,
 			      loff_t offset, size_t count)
 {
 	struct acpi_data_attr *data_attr;
-	void __iomem *base;
+	void *base;
 	ssize_t rc;
 
 	data_attr = container_of(bin_attr, struct acpi_data_attr, attr);

From d3121e64ad78ba944596d43d23914cf5f0131666 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Jun 2021 20:03:33 +0300
Subject: [PATCH 2073/3292] ACPI: sysfs: Allow bitmap list to be supplied to
 acpi_mask_gpe

Currently we need to use as many acpi_mask_gpe options as we want to have
GPEs to be masked. Even with two it already becomes inconveniently large
the kernel command line.

Instead, allow acpi_mask_gpe to represent bitmap list.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  2 +-
 drivers/acpi/sysfs.c                            | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc46..a161e58af5cc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -113,7 +113,7 @@
 			the GPE dispatcher.
 			This facility can be used to prevent such uncontrolled
 			GPE floodings.
-			Format: <byte>
+			Format: <byte> or <bitmap-list>
 
 	acpi_no_auto_serialize	[HW,ACPI]
 			Disable auto-serialization of AML methods
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index faab6f1c6165..20588c64cd1b 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -5,6 +5,7 @@
 
 #define pr_fmt(fmt) "ACPI: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
@@ -790,6 +791,7 @@ end:
  * the GPE flooding for GPE 00, they need to specify the following boot
  * parameter:
  *   acpi_mask_gpe=0x00
+ * Note, the parameter can be a list (see bitmap_parselist() for the details).
  * The masking status can be modified by the following runtime controlling
  * interface:
  *   echo unmask > /sys/firmware/acpi/interrupts/gpe00
@@ -799,11 +801,16 @@ static DECLARE_BITMAP(acpi_masked_gpes_map, ACPI_MASKABLE_GPE_MAX) __initdata;
 
 static int __init acpi_gpe_set_masked_gpes(char *val)
 {
+	int ret;
 	u8 gpe;
 
-	if (kstrtou8(val, 0, &gpe))
-		return -EINVAL;
-	set_bit(gpe, acpi_masked_gpes_map);
+	ret = kstrtou8(val, 0, &gpe);
+	if (ret) {
+		ret = bitmap_parselist(val, acpi_masked_gpes_map, ACPI_MASKABLE_GPE_MAX);
+		if (ret)
+			return ret;
+	} else
+		set_bit(gpe, acpi_masked_gpes_map);
 
 	return 1;
 }

From b272c05984679c855ff2d08c9d54957bdcfd9b3b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Jun 2021 20:03:34 +0300
Subject: [PATCH 2074/3292] ACPI: sysfs: Unify pattern of memory allocations

Use the form of foo = kmalloc(sizeof(*foo)) everywhere in order to
unify pattern of memory allocations.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 20588c64cd1b..5edad9131289 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -389,8 +389,7 @@ acpi_status acpi_sysfs_table_handler(u32 event, void *table, void *context)
 
 	switch (event) {
 	case ACPI_TABLE_EVENT_INSTALL:
-		table_attr =
-		    kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL);
+		table_attr = kzalloc(sizeof(*table_attr), GFP_KERNEL);
 		if (!table_attr)
 			return AE_NO_MEMORY;
 
@@ -842,13 +841,11 @@ void acpi_irq_stats_init(void)
 	num_gpes = acpi_current_gpe_count;
 	num_counters = num_gpes + ACPI_NUM_FIXED_EVENTS + NUM_COUNTERS_EXTRA;
 
-	all_attrs = kcalloc(num_counters + 1, sizeof(struct attribute *),
-			    GFP_KERNEL);
+	all_attrs = kcalloc(num_counters + 1, sizeof(*all_attrs), GFP_KERNEL);
 	if (all_attrs == NULL)
 		return;
 
-	all_counters = kcalloc(num_counters, sizeof(struct event_counter),
-			       GFP_KERNEL);
+	all_counters = kcalloc(num_counters, sizeof(*all_counters), GFP_KERNEL);
 	if (all_counters == NULL)
 		goto fail;
 
@@ -856,8 +853,7 @@ void acpi_irq_stats_init(void)
 	if (ACPI_FAILURE(status))
 		goto fail;
 
-	counter_attrs = kcalloc(num_counters, sizeof(struct kobj_attribute),
-				GFP_KERNEL);
+	counter_attrs = kcalloc(num_counters, sizeof(*counter_attrs), GFP_KERNEL);
 	if (counter_attrs == NULL)
 		goto fail;
 

From 0023b28bdbb4cfd000da066d05b0743aa4f8732a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Jun 2021 20:03:35 +0300
Subject: [PATCH 2075/3292] ACPI: sysfs: Refactor param_get_trace_state() to
 drop dead code

The param_get_trace_state() has a few dead code issues:
 - 'return 0;' is never reachable
 - a few 'else' keywords are redundant

Refactor param_get_trace_state() to drop dead code.

Note, leave one 'else' in order to have the best readability.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 5edad9131289..769f0d940998 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -255,16 +255,12 @@ static int param_get_trace_state(char *buffer, const struct kernel_param *kp)
 {
 	if (!(acpi_gbl_trace_flags & ACPI_TRACE_ENABLED))
 		return sprintf(buffer, "disable\n");
-	else {
-		if (acpi_gbl_trace_method_name) {
-			if (acpi_gbl_trace_flags & ACPI_TRACE_ONESHOT)
-				return sprintf(buffer, "method-once\n");
-			else
-				return sprintf(buffer, "method\n");
-		} else
-			return sprintf(buffer, "enable\n");
-	}
-	return 0;
+	if (!acpi_gbl_trace_method_name)
+		return sprintf(buffer, "enable\n");
+	if (acpi_gbl_trace_flags & ACPI_TRACE_ONESHOT)
+		return sprintf(buffer, "method-once\n");
+	else
+		return sprintf(buffer, "method\n");
 }
 
 module_param_call(trace_state, param_set_trace_state, param_get_trace_state,

From 44497fab681ba4ad439792eea6d118743f3e84d4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Jun 2021 20:03:38 +0300
Subject: [PATCH 2076/3292] ACPI: sysfs: Sort headers alphabetically

For the sake of better maintenance, sort included headers alphabetically.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 769f0d940998..f02fe25c6099 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -5,11 +5,11 @@
 
 #define pr_fmt(fmt) "ACPI: " fmt
 
+#include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
-#include <linux/acpi.h>
 
 #include "internal.h"
 

From e8d46b384129acb0c6b240f5da3ca1484cfefcc7 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 17 Jun 2021 13:08:37 +0100
Subject: [PATCH 2077/3292] isofs: remove redundant continue statement

The continue statement in the while-loop has no effect,
remove it.

Addresses-Coverity: ("Continue has no effect")
Link: https://lore.kernel.org/r/20210617120837.11994-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/isofs/dir.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index b9e6a7ec78be..eb2f8273e6f1 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -235,8 +235,6 @@ static int do_isofs_readdir(struct inode *inode, struct file *file,
 				break;
 		}
 		ctx->pos += de_len;
-
-		continue;
 	}
 	if (bh)
 		brelse(bh);

From 0e03496d1967abf1ebb151a24318c07d07f41f7f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 17 Jun 2021 10:08:11 -0600
Subject: [PATCH 2078/3292] io-wq: use private CPU mask

In preparation for allowing user specific CPU masks for IO thread
creation, switch to using a mask embedded in the per-node wqe
structure.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 56 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 897b94530b57..2af8e1df4646 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -94,6 +94,8 @@ struct io_wqe {
 
 	struct io_wq *wq;
 	struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
+
+	cpumask_var_t cpu_mask;
 };
 
 /*
@@ -638,7 +640,7 @@ fail:
 
 	tsk->pf_io_worker = worker;
 	worker->task = tsk;
-	set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
+	set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
 	tsk->flags |= PF_NO_SETAFFINITY;
 
 	raw_spin_lock_irq(&wqe->lock);
@@ -922,6 +924,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 		wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
 		if (!wqe)
 			goto err;
+		if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL))
+			goto err;
+		cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
 		wq->wqes[node] = wqe;
 		wqe->node = alloc_node;
 		wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
@@ -947,8 +952,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 err:
 	io_wq_put_hash(data->hash);
 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
-	for_each_node(node)
+	for_each_node(node) {
+		if (!wq->wqes[node])
+			continue;
+		free_cpumask_var(wq->wqes[node]->cpu_mask);
 		kfree(wq->wqes[node]);
+	}
 err_wq:
 	kfree(wq);
 	return ERR_PTR(ret);
@@ -1018,6 +1027,7 @@ static void io_wq_destroy(struct io_wq *wq)
 			.cancel_all	= true,
 		};
 		io_wqe_cancel_pending_work(wqe, &match);
+		free_cpumask_var(wqe->cpu_mask);
 		kfree(wqe);
 	}
 	io_wq_put_hash(wq->hash);
@@ -1032,23 +1042,49 @@ void io_wq_put_and_exit(struct io_wq *wq)
 	io_wq_destroy(wq);
 }
 
+struct online_data {
+	unsigned int cpu;
+	bool online;
+};
+
 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
 {
-	set_cpus_allowed_ptr(worker->task, cpumask_of_node(worker->wqe->node));
+	struct online_data *od = data;
 
+	if (od->online)
+		cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask);
+	else
+		cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask);
 	return false;
 }
 
+static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online)
+{
+	struct online_data od = {
+		.cpu = cpu,
+		.online = online
+	};
+	int i;
+
+	rcu_read_lock();
+	for_each_node(i)
+		io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od);
+	rcu_read_unlock();
+	return 0;
+}
+
 static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
 	struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
-	int i;
 
-	rcu_read_lock();
-	for_each_node(i)
-		io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
-	rcu_read_unlock();
-	return 0;
+	return __io_wq_cpu_online(wq, cpu, true);
+}
+
+static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+	struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+
+	return __io_wq_cpu_online(wq, cpu, false);
 }
 
 static __init int io_wq_init(void)
@@ -1056,7 +1092,7 @@ static __init int io_wq_init(void)
 	int ret;
 
 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
-					io_wq_cpu_online, NULL);
+					io_wq_cpu_online, io_wq_cpu_offline);
 	if (ret < 0)
 		return ret;
 	io_wq_online = ret;

From 638a0c8c8861cb8a3b54203e632ea5dcc23d8ca5 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 6 May 2021 08:40:04 -0700
Subject: [PATCH 2079/3292] ice: fix incorrect payload indicator on PTYPE

The entry for PTYPE 90 indicates that the payload is layer 3. This does
not match the specification in the datasheet which indicates the packet
is a MAC, IPv6, UDP packet, with a payload in layer 4.

Fix the lookup table to match the data sheet.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 21329ed3087e..fc3b56c13786 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -744,7 +744,7 @@ static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = {
 	/* Non Tunneled IPv6 */
 	ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
 	ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
-	ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY3),
+	ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
 	ICE_PTT_UNUSED_ENTRY(91),
 	ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
 	ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),

From 0c526d440f76676733cb470b454db9d5507a3a50 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 6 May 2021 08:40:05 -0700
Subject: [PATCH 2080/3292] ice: mark PTYPE 2 as reserved

The entry for PTYPE 2 in the ice_ptype_lkup table incorrectly states
that this is an L2 packet with no payload. According to the datasheet,
this PTYPE is actually unused and reserved.

Fix the lookup entry to indicate this is an unused entry that is
reserved.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index fc3b56c13786..4238ab0433ee 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -630,7 +630,7 @@ static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = {
 	/* L2 Packet types */
 	ICE_PTT_UNUSED_ENTRY(0),
 	ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
-	ICE_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT_UNUSED_ENTRY(2),
 	ICE_PTT_UNUSED_ENTRY(3),
 	ICE_PTT_UNUSED_ENTRY(4),
 	ICE_PTT_UNUSED_ENTRY(5),

From b6b0501d8d9a5ae26b9184164fa0d21052096941 Mon Sep 17 00:00:00 2001
From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Date: Thu, 6 May 2021 08:40:07 -0700
Subject: [PATCH 2081/3292] ice: reduce scope of variables

There are some places where the scope of a variable can
be reduced so do that.

Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c  | 8 ++++----
 drivers/net/ethernet/intel/ice/ice_main.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index a46aba5e9c12..cb858be8f4de 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1205,11 +1205,11 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 	num_q_vectors = vsi->num_q_vectors;
 	/* reserve slots from OS requested IRQs */
 	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
-		struct ice_vf *vf;
 		int i;
 
 		ice_for_each_vf(pf, i) {
-			vf = &pf->vf[i];
+			struct ice_vf *vf = &pf->vf[i];
+
 			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) {
 				base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
 				break;
@@ -2873,11 +2873,11 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	 * cleared in the same manner.
 	 */
 	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
-		struct ice_vf *vf;
 		int i;
 
 		ice_for_each_vf(pf, i) {
-			vf = &pf->vf[i];
+			struct ice_vf *vf = &pf->vf[i];
+
 			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI)
 				break;
 		}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 96276533822e..dbf4a5493ea7 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5631,7 +5631,6 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_ring **rings,
 static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
 {
 	struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
-	struct ice_ring *ring;
 	u64 pkts, bytes;
 	int i;
 
@@ -5655,7 +5654,8 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
 
 	/* update Rx rings counters */
 	ice_for_each_rxq(vsi, i) {
-		ring = READ_ONCE(vsi->rx_rings[i]);
+		struct ice_ring *ring = READ_ONCE(vsi->rx_rings[i]);
+
 		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
 		vsi_stats->rx_packets += pkts;
 		vsi_stats->rx_bytes += bytes;

From c73bf3bd83e84a84e19631c552196139a04921e8 Mon Sep 17 00:00:00 2001
From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Date: Thu, 6 May 2021 08:40:08 -0700
Subject: [PATCH 2082/3292] ice: remove local variable

Remove the local variable since it's only used once. Instead, use it
directly.

Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index dbf4a5493ea7..5ca6c0356499 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3078,7 +3078,6 @@ static void ice_set_netdev_features(struct net_device *netdev)
  */
 static int ice_cfg_netdev(struct ice_vsi *vsi)
 {
-	struct ice_pf *pf = vsi->back;
 	struct ice_netdev_priv *np;
 	struct net_device *netdev;
 	u8 mac_addr[ETH_ALEN];
@@ -3098,7 +3097,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	ice_set_ops(netdev);
 
 	if (vsi->type == ICE_VSI_PF) {
-		SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf));
+		SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
 		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
 		ether_addr_copy(netdev->dev_addr, mac_addr);
 		ether_addr_copy(netdev->perm_addr, mac_addr);

From b13ad3e08df7b434db5e675f47c3201051f41b05 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 24 May 2021 16:39:01 +0800
Subject: [PATCH 2083/3292] ice: Remove the repeated declaration

Function 'ice_is_vsi_valid' is declared twice, remove the
repeated declaration.

Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_switch.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 6bb7358ff67b..c5db8d56133f 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -247,7 +247,6 @@ ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
 
 enum ice_status ice_init_def_sw_recp(struct ice_hw *hw);
 u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle);
-bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
 
 enum ice_status ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle);
 void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw);

From 1e00113413a48b6e683244817ea04df95c515d46 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 14 Jun 2021 09:59:16 -0700
Subject: [PATCH 2084/3292] ice: remove unnecessary NULL checks before
 ptp_read_system_*

The ptp_read_system_prets and ptp_read_system_postts functions already
check for the NULL value of the ptp_system_timestamp structure pointer.
There is no need to check this manually in the ice driver code. Remove
the checks.

Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ptp.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index e14f81321768..609f433a4b96 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -219,14 +219,12 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
 
 	tmr_idx = ice_get_ptp_src_clock_index(hw);
 	/* Read the system timestamp pre PHC read */
-	if (sts)
-		ptp_read_system_prets(sts);
+	ptp_read_system_prets(sts);
 
 	lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
 
 	/* Read the system timestamp post PHC read */
-	if (sts)
-		ptp_read_system_postts(sts);
+	ptp_read_system_postts(sts);
 
 	hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
 	lo2 = rd32(hw, GLTSYN_TIME_L(tmr_idx));
@@ -235,11 +233,9 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
 		/* if TIME_L rolled over read TIME_L again and update
 		 * system timestamps
 		 */
-		if (sts)
-			ptp_read_system_prets(sts);
+		ptp_read_system_prets(sts);
 		lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
-		if (sts)
-			ptp_read_system_postts(sts);
+		ptp_read_system_postts(sts);
 		hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
 	}
 

From 4d7f75fe8006a1345e6a52b3e3a4c82633f20564 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 15 Jun 2021 16:14:12 +0200
Subject: [PATCH 2085/3292] net: ice: ptp: fix compilation warning if
 PTP_1588_CLOCK is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compilation warning if PTP_1588_CLOCK is not enabled

drivers/net/ethernet/intel/ice/ice_ptp.h:149:1:
   error: return type defaults to ‘int’ [-Werror=return-type]
   ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)

Fixes: ea9b847cda647 ("ice: enable transmit timestamps for E810 devices")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ptp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 41e14f98f0e6..d01507eba036 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -145,7 +145,7 @@ static inline int ice_get_ptp_clock_index(struct ice_pf *pf)
 	return -1;
 }
 
-static inline
+static inline s8
 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
 {
 	return -1;

From 587b839de733a8cdef3cbb805014e05229e7c96b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 15 Jun 2021 15:28:47 +0100
Subject: [PATCH 2086/3292] ice: remove redundant continue statement in a
 for-loop

The continue statement in the for-loop is redundant. Re-work the hw_lock
check to remove it.

Addresses-Coverity: ("Continue has no effect")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index 267312fad59a..3eca0e4eab0b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -410,13 +410,11 @@ bool ice_ptp_lock(struct ice_hw *hw)
 	for (i = 0; i < MAX_TRIES; i++) {
 		hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
 		hw_lock = hw_lock & PFTSYN_SEM_BUSY_M;
-		if (hw_lock) {
-			/* Somebody is holding the lock */
-			usleep_range(10000, 20000);
-			continue;
-		} else {
+		if (!hw_lock)
 			break;
-		}
+
+		/* Somebody is holding the lock */
+		usleep_range(10000, 20000);
 	}
 
 	return !hw_lock;

From fe76421d1da1dcdb3a2cd8428ac40106bff28bc0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 17 Jun 2021 10:19:54 -0600
Subject: [PATCH 2087/3292] io_uring: allow user configurable IO thread CPU
 affinity

io-wq defaults to per-node masks for IO workers. This works fine by
default, but isn't particularly handy for workloads that prefer more
specific affinities, for either performance or isolation reasons.

This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU
mask that is then applied to IO thread workers, and an
IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the
default of per-node.

Note that no care is given to existing IO threads, they will need to go
through a reschedule before the affinity is correct if they are already
running or sleeping.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c                    | 17 ++++++++++++
 fs/io-wq.h                    |  2 ++
 fs/io_uring.c                 | 51 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  4 +++
 4 files changed, 74 insertions(+)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 2af8e1df4646..bb4d3ee9592e 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
 	return __io_wq_cpu_online(wq, cpu, false);
 }
 
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
+{
+	int i;
+
+	rcu_read_lock();
+	for_each_node(i) {
+		struct io_wqe *wqe = wq->wqes[i];
+
+		if (mask)
+			cpumask_copy(wqe->cpu_mask, mask);
+		else
+			cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
 static __init int io_wq_init(void)
 {
 	int ret;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index af2df0680ee2..02299cdcf55c 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_hash_work(struct io_wq_work *work, void *val);
 
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
+
 static inline bool io_wq_is_hashed(struct io_wq_work *work)
 {
 	return work->flags & IO_WQ_WORK_HASHED;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d916eb2cef09..46a25a7cb70a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
 	return -EINVAL;
 }
 
+static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
+				unsigned len)
+{
+	struct io_uring_task *tctx = current->io_uring;
+	cpumask_var_t new_mask;
+	int ret;
+
+	if (!tctx || !tctx->io_wq)
+		return -EINVAL;
+
+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_clear(new_mask);
+	if (len > cpumask_size())
+		len = cpumask_size();
+
+	if (copy_from_user(new_mask, arg, len)) {
+		free_cpumask_var(new_mask);
+		return -EFAULT;
+	}
+
+	ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
+	free_cpumask_var(new_mask);
+	return ret;
+}
+
+static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
+{
+	struct io_uring_task *tctx = current->io_uring;
+
+	if (!tctx || !tctx->io_wq)
+		return -EINVAL;
+
+	return io_wq_cpu_affinity(tctx->io_wq, NULL);
+}
+
 static bool io_register_op_must_quiesce(int op)
 {
 	switch (op) {
@@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_REGISTER_FILES_UPDATE2:
 	case IORING_REGISTER_BUFFERS2:
 	case IORING_REGISTER_BUFFERS_UPDATE:
+	case IORING_REGISTER_IOWQ_AFF:
+	case IORING_UNREGISTER_IOWQ_AFF:
 		return false;
 	default:
 		return true;
@@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 		ret = io_register_rsrc_update(ctx, arg, nr_args,
 					      IORING_RSRC_BUFFER);
 		break;
+	case IORING_REGISTER_IOWQ_AFF:
+		ret = -EINVAL;
+		if (!arg || !nr_args)
+			break;
+		ret = io_register_iowq_aff(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_IOWQ_AFF:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_unregister_iowq_aff(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 162ff99ed2cb..f1f9ac114b51 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -306,6 +306,10 @@ enum {
 	IORING_REGISTER_BUFFERS2		= 15,
 	IORING_REGISTER_BUFFERS_UPDATE		= 16,
 
+	/* set/clear io-wq thread affinities */
+	IORING_REGISTER_IOWQ_AFF		= 17,
+	IORING_UNREGISTER_IOWQ_AFF		= 18,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };

From bdac593c981b3b8986a8d37e020946ba1f6dfaa4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 8 Feb 2021 23:49:39 +0100
Subject: [PATCH 2088/3292] mt76: mt7915: add MSI support

Move IRQ processing to a tasklet, similar to MT7615/MT7663

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/init.c  |  1 +
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  7 +++-
 .../net/wireless/mediatek/mt76/mt7915/pci.c   | 39 ++++++++++++++++---
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 822f3aa6bb8b..a8fd822cc46e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -752,6 +752,7 @@ void mt7915_unregister_device(struct mt7915_dev *dev)
 	mt7915_mcu_exit(dev);
 	mt7915_tx_token_put(dev);
 	mt7915_dma_cleanup(dev);
+	tasklet_disable(&dev->irq_tasklet);
 
 	mt76_free_device(&dev->mt76);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 4ea8972d4e2f..7a3c172afc98 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -169,6 +169,7 @@ struct mt7915_dev {
 	struct mt7915_hif *hif2;
 
 	const struct mt76_bus_ops *bus_ops;
+	struct tasklet_struct irq_tasklet;
 	struct mt7915_phy phy;
 
 	u16 chainmask;
@@ -374,9 +375,11 @@ void mt7915_dual_hif_set_irq_mask(struct mt7915_dev *dev, bool write_reg,
 static inline void mt7915_irq_enable(struct mt7915_dev *dev, u32 mask)
 {
 	if (dev->hif2)
-		mt7915_dual_hif_set_irq_mask(dev, true, 0, mask);
+		mt7915_dual_hif_set_irq_mask(dev, false, 0, mask);
 	else
-		mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, 0, mask);
+		mt76_set_irq_mask(&dev->mt76, 0, 0, mask);
+
+	tasklet_schedule(&dev->irq_tasklet);
 }
 
 static inline void mt7915_irq_disable(struct mt7915_dev *dev, u32 mask)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
index 643f171884cf..aae2fb3ccad1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -94,11 +94,15 @@ mt7915_rx_poll_complete(struct mt76_dev *mdev, enum mt76_rxq_id q)
 }
 
 /* TODO: support 2/4/6/8 MSI-X vectors */
-static irqreturn_t mt7915_irq_handler(int irq, void *dev_instance)
+static void mt7915_irq_tasklet(struct tasklet_struct *t)
 {
-	struct mt7915_dev *dev = dev_instance;
+	struct mt7915_dev *dev = from_tasklet(dev, t, irq_tasklet);
 	u32 intr, intr1, mask;
 
+	mt76_wr(dev, MT_INT_MASK_CSR, 0);
+	if (dev->hif2)
+		mt76_wr(dev, MT_INT1_MASK_CSR, 0);
+
 	intr = mt76_rr(dev, MT_INT_SOURCE_CSR);
 	intr &= dev->mt76.mmio.irqmask;
 	mt76_wr(dev, MT_INT_SOURCE_CSR, intr);
@@ -111,9 +115,6 @@ static irqreturn_t mt7915_irq_handler(int irq, void *dev_instance)
 		intr |= intr1;
 	}
 
-	if (!test_bit(MT76_STATE_INITIALIZED, &dev->mphy.state))
-		return IRQ_NONE;
-
 	trace_dev_irq(&dev->mt76, intr, dev->mt76.mmio.irqmask);
 
 	mask = intr & MT_INT_RX_DONE_ALL;
@@ -150,6 +151,20 @@ static irqreturn_t mt7915_irq_handler(int irq, void *dev_instance)
 			wake_up(&dev->reset_wait);
 		}
 	}
+}
+
+static irqreturn_t mt7915_irq_handler(int irq, void *dev_instance)
+{
+	struct mt7915_dev *dev = dev_instance;
+
+	mt76_wr(dev, MT_INT_MASK_CSR, 0);
+	if (dev->hif2)
+		mt76_wr(dev, MT_INT1_MASK_CSR, 0);
+
+	if (!test_bit(MT76_STATE_INITIALIZED, &dev->mphy.state))
+		return IRQ_NONE;
+
+	tasklet_schedule(&dev->irq_tasklet);
 
 	return IRQ_HANDLED;
 }
@@ -250,10 +265,18 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
 
 	dev = container_of(mdev, struct mt7915_dev, mt76);
 
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (ret < 0)
+		goto free;
+
 	ret = mt7915_mmio_init(mdev, pcim_iomap_table(pdev)[0], pdev->irq);
 	if (ret)
 		goto error;
 
+	tasklet_setup(&dev->irq_tasklet, mt7915_irq_tasklet);
+
+	mt76_wr(dev, MT_INT_MASK_CSR, 0);
+
 	/* master switch of PCIe tnterrupt enable */
 	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 
@@ -266,10 +289,14 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
 
 	ret = mt7915_register_device(dev);
 	if (ret)
-		goto error;
+		goto free_irq;
 
 	return 0;
+free_irq:
+	devm_free_irq(mdev->dev, pdev->irq, dev);
 error:
+	pci_free_irq_vectors(pdev);
+free:
 	mt76_free_device(&dev->mt76);
 
 	return ret;

From 03b3dedc5de184735bb9b6b3e5871fe4384913f2 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 6 May 2021 19:03:03 +0200
Subject: [PATCH 2089/3292] mt76: mt7915: disable ASPM

The same is done on the other chips already, so it is very likely needed
on MT7915 as well

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
index aae2fb3ccad1..340b364da5f0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -255,6 +255,8 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
+	mt76_pci_disable_aspm(pdev);
+
 	if (id->device == 0x7916)
 		return mt7915_pci_hif2_probe(pdev);
 

From 338330bd26b1febc7923ce3ebb6b76f6a5d980b8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 12:52:42 +0200
Subject: [PATCH 2090/3292] mt76: mt7915: move mt7915_queue_rx_skb to mac.c

It is not really DMA specific, and moving it makes it possible to make some
functions in mac.c static

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/dma.c   | 33 --------------
 .../net/wireless/mediatek/mt76/mt7915/mac.c   | 44 ++++++++++++++++---
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  3 --
 3 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
index 11d0b760abd7..69a7e3dce113 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
@@ -19,39 +19,6 @@ int mt7915_init_tx_queues(struct mt7915_phy *phy, int idx, int n_desc)
 	return 0;
 }
 
-void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
-			 struct sk_buff *skb)
-{
-	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
-	__le32 *rxd = (__le32 *)skb->data;
-	enum rx_pkt_type type;
-
-	type = FIELD_GET(MT_RXD0_PKT_TYPE, le32_to_cpu(rxd[0]));
-
-	switch (type) {
-	case PKT_TYPE_TXRX_NOTIFY:
-		mt7915_mac_tx_free(dev, skb);
-		break;
-	case PKT_TYPE_RX_EVENT:
-		mt7915_mcu_rx_event(dev, skb);
-		break;
-#ifdef CONFIG_NL80211_TESTMODE
-	case PKT_TYPE_TXRXV:
-		mt7915_mac_fill_rx_vector(dev, skb);
-		break;
-#endif
-	case PKT_TYPE_NORMAL:
-		if (!mt7915_mac_fill_rx(dev, skb)) {
-			mt76_rx(&dev->mt76, q, skb);
-			return;
-		}
-		fallthrough;
-	default:
-		dev_kfree_skb(skb);
-		break;
-	}
-}
-
 static void
 mt7915_tx_cleanup(struct mt7915_dev *dev)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 7a9759fb79d8..2c4516f8dbf1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -307,7 +307,8 @@ mt7915_mac_decode_he_radiotap(struct sk_buff *skb,
 	}
 }
 
-int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
+static int
+mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	struct mt76_phy *mphy = &dev->mt76.phy;
@@ -610,9 +611,10 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	return 0;
 }
 
-#ifdef CONFIG_NL80211_TESTMODE
-void mt7915_mac_fill_rx_vector(struct mt7915_dev *dev, struct sk_buff *skb)
+static void
+mt7915_mac_fill_rx_vector(struct mt7915_dev *dev, struct sk_buff *skb)
 {
+#ifdef CONFIG_NL80211_TESTMODE
 	struct mt7915_phy *phy = &dev->phy;
 	__le32 *rxd = (__le32 *)skb->data;
 	__le32 *rxv_hdr = rxd + 2;
@@ -650,10 +652,10 @@ void mt7915_mac_fill_rx_vector(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	phy->test.last_freq_offset = foe;
 	phy->test.last_snr = snr;
+#endif
 
 	dev_kfree_skb(skb);
 }
-#endif
 
 static void
 mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,
@@ -1129,7 +1131,8 @@ void mt7915_txp_skb_unmap(struct mt76_dev *dev,
 				 le16_to_cpu(txp->len[i]), DMA_TO_DEVICE);
 }
 
-void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
+static void
+mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 {
 	struct mt7915_tx_free *free = (struct mt7915_tx_free *)skb->data;
 	struct mt76_dev *mdev = &dev->mt76;
@@ -1233,6 +1236,37 @@ void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 	}
 }
 
+void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
+			 struct sk_buff *skb)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	__le32 *rxd = (__le32 *)skb->data;
+	enum rx_pkt_type type;
+
+	type = FIELD_GET(MT_RXD0_PKT_TYPE, le32_to_cpu(rxd[0]));
+
+	switch (type) {
+	case PKT_TYPE_TXRX_NOTIFY:
+		mt7915_mac_tx_free(dev, skb);
+		break;
+	case PKT_TYPE_RX_EVENT:
+		mt7915_mcu_rx_event(dev, skb);
+		break;
+	case PKT_TYPE_TXRXV:
+		mt7915_mac_fill_rx_vector(dev, skb);
+		break;
+	case PKT_TYPE_NORMAL:
+		if (!mt7915_mac_fill_rx(dev, skb)) {
+			mt76_rx(&dev->mt76, q, skb);
+			return;
+		}
+		fallthrough;
+	default:
+		dev_kfree_skb(skb);
+		break;
+	}
+}
+
 void mt7915_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 {
 	struct mt7915_dev *dev;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 7a3c172afc98..279332b557a8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -398,9 +398,6 @@ void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
 			   struct sk_buff *skb, struct mt76_wcid *wcid,
 			   struct ieee80211_key_conf *key, bool beacon);
 void mt7915_mac_set_timing(struct mt7915_phy *phy);
-int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb);
-void mt7915_mac_fill_rx_vector(struct mt7915_dev *dev, struct sk_buff *skb);
-void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb);
 int mt7915_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta);
 void mt7915_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,

From ec8f1a90d006f7cedcf86ef19fd034a406a213d6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 14:07:53 +0200
Subject: [PATCH 2091/3292] mt76: mt7615: fix fixed-rate tx status reporting

Rely on the txs fixed-rate bit instead of info->control.rates

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index e2dcfee6be81..7bdf3378a4d1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1235,22 +1235,20 @@ static bool mt7615_fill_txs(struct mt7615_dev *dev, struct mt7615_sta *sta,
 	int first_idx = 0, last_idx;
 	int i, idx, count;
 	bool fixed_rate, ack_timeout;
-	bool probe, ampdu, cck = false;
+	bool ampdu, cck = false;
 	bool rs_idx;
 	u32 rate_set_tsf;
 	u32 final_rate, final_rate_flags, final_nss, txs;
 
-	fixed_rate = info->status.rates[0].count;
-	probe = !!(info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE);
-
 	txs = le32_to_cpu(txs_data[1]);
-	ampdu = !fixed_rate && (txs & MT_TXS1_AMPDU);
+	ampdu = txs & MT_TXS1_AMPDU;
 
 	txs = le32_to_cpu(txs_data[3]);
 	count = FIELD_GET(MT_TXS3_TX_COUNT, txs);
 	last_idx = FIELD_GET(MT_TXS3_LAST_TX_RATE, txs);
 
 	txs = le32_to_cpu(txs_data[0]);
+	fixed_rate = txs & MT_TXS0_FIXED_RATE;
 	final_rate = FIELD_GET(MT_TXS0_TX_RATE, txs);
 	ack_timeout = txs & MT_TXS0_ACK_TIMEOUT;
 
@@ -1272,7 +1270,7 @@ static bool mt7615_fill_txs(struct mt7615_dev *dev, struct mt7615_sta *sta,
 
 	first_idx = max_t(int, 0, last_idx - (count - 1) / MT7615_RATE_RETRY);
 
-	if (fixed_rate && !probe) {
+	if (fixed_rate) {
 		info->status.rates[0].count = count;
 		i = 0;
 		goto out;

From 7172534f63c493462f5bb96e3eb7fa03d889560e Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 19:46:23 +0200
Subject: [PATCH 2092/3292] mt76: mt7615: avoid use of
 ieee80211_tx_info_clear_status

It overwrites mt76_tx_cb data in the skb

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 7bdf3378a4d1..7153f1da92d0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1389,7 +1389,7 @@ static bool mt7615_mac_add_txs_skb(struct mt7615_dev *dev,
 		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 		if (!mt7615_fill_txs(dev, sta, info, txs_data)) {
-			ieee80211_tx_info_clear_status(info);
+			info->status.rates[0].count = 0;
 			info->status.rates[0].idx = -1;
 		}
 

From 1d85dc67c4c720c8332f869965fe8091c757cc0d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 19:49:00 +0200
Subject: [PATCH 2093/3292] mt76: mt7603: avoid use of
 ieee80211_tx_info_clear_status

It overwrites mt76_tx_cb data in the skb

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index fbceb07c5f37..4e76f9868b9b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1213,7 +1213,7 @@ mt7603_mac_add_txs_skb(struct mt7603_dev *dev, struct mt7603_sta *sta, int pid,
 		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
 		if (!mt7603_fill_txs(dev, sta, info, txs_data)) {
-			ieee80211_tx_info_clear_status(info);
+			info->status.rates[0].count = 0;
 			info->status.rates[0].idx = -1;
 		}
 

From 6d51cae28c8de95ac41c64affec2b00ef8bcaf87 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 9 May 2021 22:49:07 +0200
Subject: [PATCH 2094/3292] mt76: intialize tx queue entry wcid to 0xffff by
 default

Avoid accidentally mapping them to WCID 0 on completion

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/dma.c  | 1 +
 drivers/net/wireless/mediatek/mt76/sdio.c | 1 +
 drivers/net/wireless/mediatek/mt76/usb.c  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 72b1cc0ecfda..75c1f54b1fe5 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -191,6 +191,7 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
 
 	q->entry[idx].txwi = txwi;
 	q->entry[idx].skb = skb;
+	q->entry[idx].wcid = 0xffff;
 
 	return idx;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c
index a18d2896ee1f..1665fe88ebb8 100644
--- a/drivers/net/wireless/mediatek/mt76/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/sdio.c
@@ -256,6 +256,7 @@ mt76s_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 
 	q->entry[q->head].skb = tx_info.skb;
 	q->entry[q->head].buf_sz = len;
+	q->entry[q->head].wcid = 0xffff;
 
 	smp_wmb();
 
diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index 30bc54e98c58..1e9f60bb811a 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -925,6 +925,7 @@ mt76u_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 
 	q->head = (q->head + 1) % q->ndesc;
 	q->entry[idx].skb = tx_info.skb;
+	q->entry[idx].wcid = 0xffff;
 	q->queued++;
 
 	return idx;

From 0fe88644c06063352b202f82dbead3c0df053c10 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 17:01:45 +0200
Subject: [PATCH 2095/3292] mt76: improve tx status codepath

Use ieee80211_tx_status_ext instead of ieee80211_free_skb and
ieee80211_tx_status. This makes it compatible with 802.3 encap offload
and improves performance by removing a redundant sta lookup

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h |  9 ++-
 drivers/net/wireless/mediatek/mt76/tx.c   | 75 +++++++++++++++--------
 2 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 36ede65919f8..d3859eda2a4f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -1056,7 +1056,14 @@ struct sk_buff *mt76_tx_status_skb_get(struct mt76_dev *dev,
 				       struct sk_buff_head *list);
 void mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb,
 			     struct sk_buff_head *list);
-void mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid, struct sk_buff *skb);
+void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid, struct sk_buff *skb,
+			    struct list_head *free_list);
+static inline void
+mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid, struct sk_buff *skb)
+{
+    __mt76_tx_complete_skb(dev, wcid, skb, NULL);
+}
+
 void mt76_tx_status_check(struct mt76_dev *dev, struct mt76_wcid *wcid,
 			  bool flush);
 int mt76_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 53ea8de82df0..70a830132a37 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -54,11 +54,23 @@ mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list)
 
 	spin_unlock_bh(&dev->status_list.lock);
 
+	rcu_read_lock();
 	while ((skb = __skb_dequeue(list)) != NULL) {
-		hw = mt76_tx_status_get_hw(dev, skb);
-		ieee80211_tx_status(hw, skb);
-	}
+		struct ieee80211_tx_status status = {
+			.skb = skb,
+			.info = IEEE80211_SKB_CB(skb),
+		};
+		struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);
+		struct mt76_wcid *wcid;
 
+		wcid = rcu_dereference(dev->wcid[cb->wcid]);
+		if (wcid)
+			status.sta = wcid_to_sta(wcid);
+
+		hw = mt76_tx_status_get_hw(dev, skb);
+		ieee80211_tx_status_ext(hw, &status);
+	}
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(mt76_tx_status_unlock);
 
@@ -80,7 +92,7 @@ __mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb, u8 flags,
 
 	/* Tx status can be unreliable. if it fails, mark the frame as ACKed */
 	if (flags & MT_TX_CB_TXS_FAILED) {
-		ieee80211_tx_info_clear_status(info);
+		info->status.rates[0].count = 0;
 		info->status.rates[0].idx = -1;
 		info->flags |= IEEE80211_TX_STAT_ACK;
 	}
@@ -173,36 +185,37 @@ mt76_tx_status_check(struct mt76_dev *dev, struct mt76_wcid *wcid, bool flush)
 EXPORT_SYMBOL_GPL(mt76_tx_status_check);
 
 static void
-mt76_tx_check_non_aql(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *skb)
+mt76_tx_check_non_aql(struct mt76_dev *dev, struct mt76_wcid *wcid,
+		      struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct mt76_wcid *wcid;
 	int pending;
 
-	if (info->tx_time_est)
+	if (!wcid || info->tx_time_est)
 		return;
 
-	if (wcid_idx >= ARRAY_SIZE(dev->wcid))
-		return;
-
-	rcu_read_lock();
-
-	wcid = rcu_dereference(dev->wcid[wcid_idx]);
-	if (wcid) {
-		pending = atomic_dec_return(&wcid->non_aql_packets);
-		if (pending < 0)
-			atomic_cmpxchg(&wcid->non_aql_packets, pending, 0);
-	}
-
-	rcu_read_unlock();
+	pending = atomic_dec_return(&wcid->non_aql_packets);
+	if (pending < 0)
+		atomic_cmpxchg(&wcid->non_aql_packets, pending, 0);
 }
 
-void mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *skb)
+void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *skb,
+			    struct list_head *free_list)
 {
+	struct ieee80211_tx_status status = {
+		.skb = skb,
+		.free_list = free_list,
+	};
+	struct mt76_wcid *wcid = NULL;
 	struct ieee80211_hw *hw;
 	struct sk_buff_head list;
 
-	mt76_tx_check_non_aql(dev, wcid_idx, skb);
+	rcu_read_lock();
+
+	if (wcid_idx < ARRAY_SIZE(dev->wcid))
+		wcid = rcu_dereference(dev->wcid[wcid_idx]);
+
+	mt76_tx_check_non_aql(dev, wcid, skb);
 
 #ifdef CONFIG_NL80211_TESTMODE
 	if (mt76_is_testmode_skb(dev, skb, &hw)) {
@@ -214,21 +227,25 @@ void mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *sk
 			wake_up(&dev->tx_wait);
 
 		dev_kfree_skb_any(skb);
-		return;
+		goto out;
 	}
 #endif
 
 	if (!skb->prev) {
 		hw = mt76_tx_status_get_hw(dev, skb);
-		ieee80211_free_txskb(hw, skb);
-		return;
+		status.sta = wcid_to_sta(wcid);
+		ieee80211_tx_status_ext(hw, &status);
+		goto out;
 	}
 
 	mt76_tx_status_lock(dev, &list);
 	__mt76_tx_status_skb_done(dev, skb, MT_TX_CB_DMA_DONE, &list);
 	mt76_tx_status_unlock(dev, &list);
+
+out:
+	rcu_read_unlock();
 }
-EXPORT_SYMBOL_GPL(mt76_tx_complete_skb);
+EXPORT_SYMBOL_GPL(__mt76_tx_complete_skb);
 
 static int
 __mt76_tx_queue_skb(struct mt76_phy *phy, int qid, struct sk_buff *skb,
@@ -244,11 +261,15 @@ __mt76_tx_queue_skb(struct mt76_phy *phy, int qid, struct sk_buff *skb,
 
 	non_aql = !info->tx_time_est;
 	idx = dev->queue_ops->tx_queue_skb(dev, q, skb, wcid, sta);
-	if (idx < 0 || !sta || !non_aql)
+	if (idx < 0 || !sta)
 		return idx;
 
 	wcid = (struct mt76_wcid *)sta->drv_priv;
 	q->entry[idx].wcid = wcid->idx;
+
+	if (!non_aql)
+		return idx;
+
 	pending = atomic_inc_return(&wcid->non_aql_packets);
 	if (stop && pending >= MT_MAX_NON_AQL_PKT)
 		*stop = true;

From 94e4f5794627a80ce036c35b32a9900daeb31be3 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 9 May 2021 20:23:01 +0200
Subject: [PATCH 2096/3292] mt76: dma: use ieee80211_tx_status_ext to free
 packets when tx fails

Fixes AQL issues on full queues, especially with 802.3 encap offload

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/dma.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 75c1f54b1fe5..5e1c1506a4c6 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -350,6 +350,9 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 		      struct sk_buff *skb, struct mt76_wcid *wcid,
 		      struct ieee80211_sta *sta)
 {
+	struct ieee80211_tx_status status = {
+		.sta = sta,
+	};
 	struct mt76_tx_info tx_info = {
 		.skb = skb,
 	};
@@ -361,11 +364,9 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 	u8 *txwi;
 
 	t = mt76_get_txwi(dev);
-	if (!t) {
-		hw = mt76_tx_status_get_hw(dev, skb);
-		ieee80211_free_txskb(hw, skb);
-		return -ENOMEM;
-	}
+	if (!t)
+		goto free_skb;
+
 	txwi = mt76_get_txwi_ptr(dev, t);
 
 	skb->prev = skb->next = NULL;
@@ -428,8 +429,13 @@ free:
 	}
 #endif
 
-	dev_kfree_skb(tx_info.skb);
 	mt76_put_txwi(dev, t);
+
+free_skb:
+	status.skb = tx_info.skb;
+	hw = mt76_tx_status_get_hw(dev, tx_info.skb);
+	ieee80211_tx_status_ext(hw, &status);
+
 	return ret;
 }
 

From 223fd4f843081059bc8f8d8ba6363bfcc2e5848f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 18:51:41 +0200
Subject: [PATCH 2097/3292] mt76: mt7915: rework tx rate reporting

Instead of attaching the last reported rate to tx packets, use
ieee80211_tx_status_ext to immediately pass the rate to mac80211 after
receiving it from the firmware.
Preparation for implementing full tx status reporting

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/mac.c   | 145 +++++++-----------
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   |   8 +
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |   2 -
 3 files changed, 66 insertions(+), 89 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 2c4516f8dbf1..9b453e45fde0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1073,54 +1073,7 @@ mt7915_tx_check_aggr(struct ieee80211_sta *sta, __le32 *txwi)
 }
 
 static void
-mt7915_tx_complete_status(struct mt76_dev *mdev, struct sk_buff *skb,
-			  struct ieee80211_sta *sta, u8 stat,
-			  struct list_head *free_list)
-{
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_tx_status status = {
-		.sta = sta,
-		.info = info,
-		.skb = skb,
-		.free_list = free_list,
-	};
-	struct ieee80211_hw *hw;
-
-	if (sta) {
-		struct mt7915_sta *msta;
-
-		msta = (struct mt7915_sta *)sta->drv_priv;
-		status.rate = &msta->stats.tx_rate;
-	}
-
-#ifdef CONFIG_NL80211_TESTMODE
-	if (mt76_is_testmode_skb(mdev, skb, &hw)) {
-		struct mt7915_phy *phy = mt7915_hw_phy(hw);
-		struct ieee80211_vif *vif = phy->monitor_vif;
-		struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
-
-		mt76_tx_complete_skb(mdev, mvif->sta.wcid.idx, skb);
-		return;
-	}
-#endif
-
-	hw = mt76_tx_status_get_hw(mdev, skb);
-
-	if (info->flags & IEEE80211_TX_CTL_AMPDU)
-		info->flags |= IEEE80211_TX_STAT_AMPDU;
-
-	if (stat)
-		ieee80211_tx_info_clear_status(info);
-
-	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
-		info->flags |= IEEE80211_TX_STAT_ACK;
-
-	info->status.tx_time = 0;
-	ieee80211_tx_status_ext(hw, &status);
-}
-
-void mt7915_txp_skb_unmap(struct mt76_dev *dev,
-			  struct mt76_txwi_cache *t)
+mt7915_txp_skb_unmap(struct mt76_dev *dev, struct mt76_txwi_cache *t)
 {
 	struct mt7915_txp *txp;
 	int i;
@@ -1131,6 +1084,59 @@ void mt7915_txp_skb_unmap(struct mt76_dev *dev,
 				 le16_to_cpu(txp->len[i]), DMA_TO_DEVICE);
 }
 
+static void
+mt7915_txwi_free(struct mt7915_dev *dev, struct mt76_txwi_cache *t,
+		 struct ieee80211_sta *sta, struct list_head *free_list)
+{
+	struct ieee80211_tx_status status = {};
+	struct mt76_dev *mdev = &dev->mt76;
+	struct ieee80211_tx_info *info;
+	struct mt76_wcid *wcid;
+	__le32 *txwi;
+	u16 wcid_idx;
+
+	mt7915_txp_skb_unmap(mdev, t);
+	if (!t->skb)
+		goto out;
+
+	txwi = (__le32 *)mt76_get_txwi_ptr(mdev, t);
+	if (sta) {
+		wcid = (struct mt76_wcid *)sta->drv_priv;
+		wcid_idx = wcid->idx;
+
+		if (likely(t->skb->protocol != cpu_to_be16(ETH_P_PAE)))
+			mt7915_tx_check_aggr(sta, txwi);
+	} else {
+		wcid_idx = FIELD_GET(MT_TXD1_WLAN_IDX, le32_to_cpu(txwi[1]));
+	}
+
+	info = IEEE80211_SKB_CB(t->skb);
+	if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
+		struct ieee80211_hw *hw;
+
+		if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
+			info->flags |= IEEE80211_TX_STAT_ACK;
+
+		info->status.rates[0].count = 0;
+		info->status.rates[0].idx = -1;
+
+		status.skb = t->skb;
+		status.sta = sta;
+		status.info = info;
+
+		hw = mt76_tx_status_get_hw(mdev, t->skb);
+		ieee80211_tx_status_ext(hw, &status);
+
+		goto out;
+	}
+
+	__mt76_tx_complete_skb(mdev, wcid_idx, t->skb, free_list);
+
+out:
+	t->skb = NULL;
+	mt76_put_txwi(mdev, t);
+}
+
 static void
 mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 {
@@ -1197,28 +1203,7 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 		if (!txwi)
 			continue;
 
-		mt7915_txp_skb_unmap(mdev, txwi);
-		if (txwi->skb) {
-			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txwi->skb);
-			void *txwi_ptr = mt76_get_txwi_ptr(mdev, txwi);
-
-			if (likely(txwi->skb->protocol != cpu_to_be16(ETH_P_PAE)))
-				mt7915_tx_check_aggr(sta, txwi_ptr);
-
-			if (sta && !info->tx_time_est) {
-				struct mt76_wcid *wcid = (struct mt76_wcid *)sta->drv_priv;
-				int pending;
-
-				pending = atomic_dec_return(&wcid->non_aql_packets);
-				if (pending < 0)
-					atomic_cmpxchg(&wcid->non_aql_packets, pending, 0);
-			}
-
-			mt7915_tx_complete_status(mdev, txwi->skb, sta, stat, &free_list);
-			txwi->skb = NULL;
-		}
-
-		mt76_put_txwi(mdev, txwi);
+		mt7915_txwi_free(dev, txwi, sta, &free_list);
 	}
 
 	mt7915_mac_sta_poll(dev);
@@ -1288,15 +1273,8 @@ void mt7915_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 		e->skb = t ? t->skb : NULL;
 	}
 
-	if (e->skb) {
-		struct mt76_tx_cb *cb = mt76_tx_skb_cb(e->skb);
-		struct mt76_wcid *wcid;
-
-		wcid = rcu_dereference(dev->mt76.wcid[cb->wcid]);
-
-		mt7915_tx_complete_status(mdev, e->skb, wcid_to_sta(wcid), 0,
-					  NULL);
-	}
+	if (e->skb)
+		mt76_tx_complete_skb(mdev, e->wcid, e->skb);
 }
 
 void mt7915_mac_cca_stats_reset(struct mt7915_phy *phy)
@@ -1582,14 +1560,7 @@ void mt7915_tx_token_put(struct mt7915_dev *dev)
 
 	spin_lock_bh(&dev->mt76.token_lock);
 	idr_for_each_entry(&dev->mt76.token, txwi, id) {
-		mt7915_txp_skb_unmap(&dev->mt76, txwi);
-		if (txwi->skb) {
-			struct ieee80211_hw *hw;
-
-			hw = mt76_tx_status_get_hw(&dev->mt76, txwi->skb);
-			ieee80211_free_txskb(hw, txwi->skb);
-		}
-		mt76_put_txwi(&dev->mt76, txwi);
+		mt7915_txwi_free(dev, txwi, NULL, NULL);
 		dev->mt76.token_count--;
 	}
 	spin_unlock_bh(&dev->mt76.token_lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index b3f14ff67c5a..9ec414de317e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -469,6 +469,7 @@ mt7915_mcu_tx_rate_report(struct mt7915_dev *dev, struct sk_buff *skb)
 	u16 attempts = le16_to_cpu(ra->attempts);
 	u16 curr = le16_to_cpu(ra->curr_rate);
 	u16 wcidx = le16_to_cpu(ra->wlan_idx);
+	struct ieee80211_tx_status status = {};
 	struct mt76_phy *mphy = &dev->mphy;
 	struct mt7915_sta_stats *stats;
 	struct mt7915_sta *msta;
@@ -500,6 +501,13 @@ mt7915_mcu_tx_rate_report(struct mt7915_dev *dev, struct sk_buff *skb)
 
 		stats->per = 1000 * (attempts - success) / attempts;
 	}
+
+	status.sta = wcid_to_sta(wcid);
+	if (!status.sta)
+		return;
+
+	status.rate = &stats->tx_rate;
+	ieee80211_tx_status_ext(mphy->hw, &status);
 }
 
 static void
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 279332b557a8..0342729145e7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -417,8 +417,6 @@ void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
 			 struct sk_buff *skb);
 void mt7915_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps);
 void mt7915_stats_work(struct work_struct *work);
-void mt7915_txp_skb_unmap(struct mt76_dev *dev,
-			  struct mt76_txwi_cache *txwi);
 int mt76_dfs_start_rdd(struct mt7915_dev *dev, bool force);
 int mt7915_dfs_init_radar_detector(struct mt7915_phy *phy);
 void mt7915_set_stream_he_caps(struct mt7915_phy *phy);

From 3de4cb1756565a22321039eb9ae0193519bed967 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 15:18:09 +0200
Subject: [PATCH 2098/3292] mt76: mt7915: add support for tx status reporting

For now, this only reports ACK status

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/mac.c   | 124 ++++++++++++++----
 .../net/wireless/mediatek/mt76/mt7915/mac.h   |  56 ++++++++
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   |   2 +-
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |   2 +-
 4 files changed, 154 insertions(+), 30 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 9b453e45fde0..f8bb043f9be7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -887,7 +887,7 @@ mt7915_mac_write_txwi_80211(struct mt7915_dev *dev, __le32 *txwi,
 }
 
 void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
-			   struct sk_buff *skb, struct mt76_wcid *wcid,
+			   struct sk_buff *skb, struct mt76_wcid *wcid, int pid,
 			   struct ieee80211_key_conf *key, bool beacon)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -946,7 +946,12 @@ void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
 
 	txwi[3] = cpu_to_le32(val);
 	txwi[4] = 0;
-	txwi[5] = 0;
+
+	val = FIELD_PREP(MT_TXD5_PID, pid);
+	if (pid >= MT_PACKET_ID_FIRST)
+		val |= MT_TXD5_TX_STATUS_HOST;
+	txwi[5] = cpu_to_le32(val);
+
 	txwi[6] = 0;
 	txwi[7] = wcid->amsdu ? cpu_to_le32(MT_TXD7_HW_AMSDU) : 0;
 
@@ -986,11 +991,11 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
 	struct ieee80211_key_conf *key = info->control.hw_key;
 	struct ieee80211_vif *vif = info->control.vif;
-	struct mt76_tx_cb *cb = mt76_tx_skb_cb(tx_info->skb);
 	struct mt76_txwi_cache *t;
 	struct mt7915_txp *txp;
 	int id, i, nbuf = tx_info->nbuf - 1;
 	u8 *txwi = (u8 *)txwi_ptr;
+	int pid;
 
 	if (unlikely(tx_info->skb->len <= ETH_HLEN))
 		return -EINVAL;
@@ -998,10 +1003,10 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	if (!wcid)
 		wcid = &dev->mt76.global_wcid;
 
-	mt7915_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, key,
-			      false);
+	pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
 
-	cb->wcid = wcid->idx;
+	mt7915_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, pid, key,
+			      false);
 
 	txp = (struct mt7915_txp *)(txwi + MT_TXD_SIZE);
 	for (i = 0; i < nbuf; i++) {
@@ -1088,9 +1093,7 @@ static void
 mt7915_txwi_free(struct mt7915_dev *dev, struct mt76_txwi_cache *t,
 		 struct ieee80211_sta *sta, struct list_head *free_list)
 {
-	struct ieee80211_tx_status status = {};
 	struct mt76_dev *mdev = &dev->mt76;
-	struct ieee80211_tx_info *info;
 	struct mt76_wcid *wcid;
 	__le32 *txwi;
 	u16 wcid_idx;
@@ -1110,26 +1113,6 @@ mt7915_txwi_free(struct mt7915_dev *dev, struct mt76_txwi_cache *t,
 		wcid_idx = FIELD_GET(MT_TXD1_WLAN_IDX, le32_to_cpu(txwi[1]));
 	}
 
-	info = IEEE80211_SKB_CB(t->skb);
-	if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
-		struct ieee80211_hw *hw;
-
-		if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
-			info->flags |= IEEE80211_TX_STAT_ACK;
-
-		info->status.rates[0].count = 0;
-		info->status.rates[0].idx = -1;
-
-		status.skb = t->skb;
-		status.sta = sta;
-		status.info = info;
-
-		hw = mt76_tx_status_get_hw(mdev, t->skb);
-		ieee80211_tx_status_ext(hw, &status);
-
-		goto out;
-	}
-
 	__mt76_tx_complete_skb(mdev, wcid_idx, t->skb, free_list);
 
 out:
@@ -1221,11 +1204,89 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 	}
 }
 
+static bool
+mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
+		       __le32 *txs_data)
+{
+	struct mt76_dev *mdev = &dev->mt76;
+	struct ieee80211_tx_info *info;
+	struct sk_buff_head list;
+	struct sk_buff *skb;
+
+	mt76_tx_status_lock(mdev, &list);
+	skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
+	if (!skb)
+		goto out;
+
+	info = IEEE80211_SKB_CB(skb);
+	if (!(txs_data[0] & le32_to_cpu(MT_TXS0_ACK_ERROR_MASK)))
+		info->flags |= IEEE80211_TX_STAT_ACK;
+
+	info->status.ampdu_len = 1;
+	info->status.ampdu_ack_len = !!(info->flags &
+					IEEE80211_TX_STAT_ACK);
+
+	info->status.rates[0].idx = -1;
+	mt76_tx_status_skb_done(mdev, skb, &list);
+
+out:
+	mt76_tx_status_unlock(mdev, &list);
+
+	return !!skb;
+}
+
+static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data)
+{
+	struct mt7915_sta *msta = NULL;
+	struct mt76_wcid *wcid;
+	__le32 *txs_data = data;
+	u16 wcidx;
+	u32 txs;
+	u8 pid;
+
+	txs = le32_to_cpu(txs_data[0]);
+	if (FIELD_GET(MT_TXS0_TXS_FORMAT, txs) > 1)
+		return;
+
+	txs = le32_to_cpu(txs_data[2]);
+	wcidx = FIELD_GET(MT_TXS2_WCID, txs);
+
+	txs = le32_to_cpu(txs_data[3]);
+	pid = FIELD_GET(MT_TXS3_PID, txs);
+
+	if (pid < MT_PACKET_ID_FIRST)
+		return;
+
+	if (wcidx >= MT7915_WTBL_SIZE)
+		return;
+
+	rcu_read_lock();
+
+	wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+	if (!wcid)
+		goto out;
+
+	mt7915_mac_add_txs_skb(dev, wcid, pid, txs_data);
+
+	if (!wcid->sta)
+		goto out;
+
+	msta = container_of(wcid, struct mt7915_sta, wcid);
+	spin_lock_bh(&dev->sta_poll_lock);
+	if (list_empty(&msta->poll_list))
+		list_add_tail(&msta->poll_list, &dev->sta_poll_list);
+	spin_unlock_bh(&dev->sta_poll_lock);
+
+out:
+	rcu_read_unlock();
+}
+
 void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
 			 struct sk_buff *skb)
 {
 	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
 	__le32 *rxd = (__le32 *)skb->data;
+	__le32 *end = (__le32 *)&skb->data[skb->len];
 	enum rx_pkt_type type;
 
 	type = FIELD_GET(MT_RXD0_PKT_TYPE, le32_to_cpu(rxd[0]));
@@ -1240,6 +1301,11 @@ void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
 	case PKT_TYPE_TXRXV:
 		mt7915_mac_fill_rx_vector(dev, skb);
 		break;
+	case PKT_TYPE_TXS:
+		for (rxd += 2; rxd + 8 <= end; rxd += 8)
+		    mt7915_mac_add_txs(dev, rxd);
+		dev_kfree_skb(skb);
+		break;
 	case PKT_TYPE_NORMAL:
 		if (!mt7915_mac_fill_rx(dev, skb)) {
 			mt76_rx(&dev->mt76, q, skb);
@@ -1775,6 +1841,8 @@ void mt7915_mac_work(struct work_struct *work)
 
 	mutex_unlock(&mphy->dev->mutex);
 
+	mt76_tx_status_check(mphy->dev, NULL, false);
+
 	ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
 				     MT7915_WATCHDOG_TIME);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
index 0f929fb53027..eb1885f4bd8e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
@@ -304,6 +304,62 @@ struct mt7915_tx_free {
 /* will support this field in further revision */
 #define MT_TX_FREE_RATE			GENMASK(13, 0)
 
+#define MT_TXS0_FIXED_RATE		BIT(31)
+#define MT_TXS0_BW			GENMASK(30, 29)
+#define MT_TXS0_TID			GENMASK(28, 26)
+#define MT_TXS0_AMPDU			BIT(25)
+#define MT_TXS0_TXS_FORMAT		GENMASK(24, 23)
+#define MT_TXS0_BA_ERROR		BIT(22)
+#define MT_TXS0_PS_FLAG			BIT(21)
+#define MT_TXS0_TXOP_TIMEOUT		BIT(20)
+#define MT_TXS0_BIP_ERROR		BIT(19)
+
+#define MT_TXS0_QUEUE_TIMEOUT		BIT(18)
+#define MT_TXS0_RTS_TIMEOUT		BIT(17)
+#define MT_TXS0_ACK_TIMEOUT		BIT(16)
+#define MT_TXS0_ACK_ERROR_MASK		GENMASK(18, 16)
+
+#define MT_TXS0_TX_STATUS_HOST		BIT(15)
+#define MT_TXS0_TX_STATUS_MCU		BIT(14)
+#define MT_TXS0_TX_RATE			GENMASK(13, 0)
+
+#define MT_TXS1_SEQNO			GENMASK(31, 20)
+#define MT_TXS1_RESP_RATE		GENMASK(19, 16)
+#define MT_TXS1_RXV_SEQNO		GENMASK(15, 8)
+#define MT_TXS1_TX_POWER_DBM		GENMASK(7, 0)
+
+#define MT_TXS2_BF_STATUS		GENMASK(31, 30)
+#define MT_TXS2_LAST_TX_RATE		GENMASK(29, 27)
+#define MT_TXS2_SHARED_ANTENNA		BIT(26)
+#define MT_TXS2_WCID			GENMASK(25, 16)
+#define MT_TXS2_TX_DELAY		GENMASK(15, 0)
+
+#define MT_TXS3_PID			GENMASK(31, 24)
+#define MT_TXS3_ANT_ID			GENMASK(23, 0)
+
+#define MT_TXS4_TIMESTAMP		GENMASK(31, 0)
+
+#define MT_TXS5_F0_FINAL_MPDU		BIT(31)
+#define MT_TXS5_F0_QOS			BIT(30)
+#define MT_TXS5_F0_TX_COUNT		GENMASK(29, 25)
+#define MT_TXS5_F0_FRONT_TIME		GENMASK(24, 0)
+#define MT_TXS5_F1_MPDU_TX_COUNT	GENMASK(31, 24)
+#define MT_TXS5_F1_MPDU_TX_BYTES	GENMASK(23, 0)
+
+#define MT_TXS6_F0_NOISE_3		GENMASK(31, 24)
+#define MT_TXS6_F0_NOISE_2		GENMASK(23, 16)
+#define MT_TXS6_F0_NOISE_1		GENMASK(15, 8)
+#define MT_TXS6_F0_NOISE_0		GENMASK(7, 0)
+#define MT_TXS6_F1_MPDU_FAIL_COUNT	GENMASK(31, 24)
+#define MT_TXS6_F1_MPDU_FAIL_BYTES	GENMASK(23, 0)
+
+#define MT_TXS7_F0_RCPI_3		GENMASK(31, 24)
+#define MT_TXS7_F0_RCPI_2		GENMASK(23, 16)
+#define MT_TXS7_F0_RCPI_1		GENMASK(15, 8)
+#define MT_TXS7_F0_RCPI_0		GENMASK(7, 0)
+#define MT_TXS7_F1_MPDU_RETRY_COUNT	GENMASK(31, 24)
+#define MT_TXS7_F1_MPDU_RETRY_BYTES	GENMASK(23, 0)
+
 struct mt7915_dfs_pulse {
 	u32 max_width;		/* us */
 	int max_pwr;		/* dbm */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 9ec414de317e..76e8aa604e8b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2440,7 +2440,7 @@ mt7915_mcu_beacon_cont(struct mt7915_dev *dev, struct sk_buff *rskb,
 		cont->csa_ofs = cpu_to_le16(offs->cntdwn_counter_offs[0] - 4);
 
 	buf = (u8 *)tlv + sizeof(*cont);
-	mt7915_mac_write_txwi(dev, (__le32 *)buf, skb, wcid, NULL,
+	mt7915_mac_write_txwi(dev, (__le32 *)buf, skb, wcid, 0, NULL,
 			      true);
 	memcpy(buf + MT_TXD_SIZE, skb->data, skb->len);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 0342729145e7..c4bf8edeb1dd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -395,7 +395,7 @@ void mt7915_mac_reset_counters(struct mt7915_phy *phy);
 void mt7915_mac_cca_stats_reset(struct mt7915_phy *phy);
 void mt7915_mac_enable_nf(struct mt7915_dev *dev, bool ext_phy);
 void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
-			   struct sk_buff *skb, struct mt76_wcid *wcid,
+			   struct sk_buff *skb, struct mt76_wcid *wcid, int pid,
 			   struct ieee80211_key_conf *key, bool beacon);
 void mt7915_mac_set_timing(struct mt7915_phy *phy);
 int mt7915_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,

From d356dbe23f607dc1a05eb3af887a6ea21c519cb3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 15 Jun 2021 14:52:53 +0100
Subject: [PATCH 2099/3292] net: pcs: xpcs: Fix a less than zero u16 comparison
 error

Currently the check for the u16 variable val being less than zero is
always false because val is unsigned. Fix this by using the int
variable for the assignment and less than zero check.

Addresses-Coverity: ("Unsigned compared against 0")
Fixes: f7380bba42fd ("net: pcs: xpcs: add support for NXP SJA1110")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs-nxp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs-nxp.c b/drivers/net/pcs/pcs-xpcs-nxp.c
index de99c37cf2ae..984c9f7f16a8 100644
--- a/drivers/net/pcs/pcs-xpcs-nxp.c
+++ b/drivers/net/pcs/pcs-xpcs-nxp.c
@@ -152,13 +152,13 @@ static int nxp_sja1110_pma_config(struct dw_xpcs *xpcs,
 	/* Enable TX and RX PLLs and circuits.
 	 * Release reset of PMA to enable data flow to/from PCS.
 	 */
-	val = xpcs_read(xpcs, MDIO_MMD_VEND2, SJA1110_POWERDOWN_ENABLE);
-	if (val < 0)
-		return val;
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, SJA1110_POWERDOWN_ENABLE);
+	if (ret < 0)
+		return ret;
 
-	val &= ~(SJA1110_TXPLL_PD | SJA1110_TXPD | SJA1110_RXCH_PD |
-		 SJA1110_RXBIAS_PD | SJA1110_RESET_SER_EN |
-		 SJA1110_RESET_SER | SJA1110_RESET_DES);
+	val = ret & ~(SJA1110_TXPLL_PD | SJA1110_TXPD | SJA1110_RXCH_PD |
+		      SJA1110_RXBIAS_PD | SJA1110_RESET_SER_EN |
+		      SJA1110_RESET_SER | SJA1110_RESET_DES);
 	val |= SJA1110_RXPKDETEN | SJA1110_RCVEN;
 
 	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, SJA1110_POWERDOWN_ENABLE, val);

From 01f1b6ed2b846ae124bb54c636ddadb4dd1813a3 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Wed, 16 Jun 2021 20:46:07 +0300
Subject: [PATCH 2100/3292] documentation: networking: devlink: fix
 prestera.rst formatting that causes build warnings

Fixes: 66826c43e63d ("documentation: networking: devlink: add prestera switched driver Documentation")

Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-trap.rst | 1 +
 Documentation/networking/devlink/index.rst        | 1 +
 Documentation/networking/devlink/prestera.rst     | 4 ++--
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index 935b6397e8cf..ef8928c355df 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -497,6 +497,7 @@ drivers:
 
   * :doc:`netdevsim`
   * :doc:`mlxsw`
+  * :doc:`prestera`
 
 .. _Generic-Packet-Trap-Groups:
 
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index 8428a1220723..b3b9e0692088 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -46,3 +46,4 @@ parameters, info versions, and other features it supports.
    qed
    ti-cpsw-switch
    am65-nuss-cpsw-switch
+   prestera
diff --git a/Documentation/networking/devlink/prestera.rst b/Documentation/networking/devlink/prestera.rst
index e8b52ffd4707..49409d1d3081 100644
--- a/Documentation/networking/devlink/prestera.rst
+++ b/Documentation/networking/devlink/prestera.rst
@@ -1,8 +1,8 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-=====================
+========================
 prestera devlink support
-=====================
+========================
 
 This document describes the devlink features implemented by the ``prestera``
 device driver.

From 2d8ea148e553e1dd4e80a87741abdfb229e2b323 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Thu, 17 Jun 2021 11:37:11 +0800
Subject: [PATCH 2101/3292] net: fix mistake path for netdev_features_strings

Th_strings arrays netdev_features_strings, tunable_strings, and
phy_tunable_strings has been moved to file net/ethtool/common.c.
So fixes the comment.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 2 +-
 include/uapi/linux/ethtool.h    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 3de38d6a0aea..2c6b9e416225 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -93,7 +93,7 @@ enum {
 
 	/*
 	 * Add your fresh new feature above and remember to update
-	 * netdev_features_strings[] in net/core/ethtool.c and maybe
+	 * netdev_features_strings[] in net/ethtool/common.c and maybe
 	 * some feature mask #defines below. Please also describe it
 	 * in Documentation/networking/netdev-features.rst.
 	 */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index cfef6b08169a..67aa7134b301 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -233,7 +233,7 @@ enum tunable_id {
 	ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
 	/*
 	 * Add your fresh new tunable attribute above and remember to update
-	 * tunable_strings[] in net/core/ethtool.c
+	 * tunable_strings[] in net/ethtool/common.c
 	 */
 	__ETHTOOL_TUNABLE_COUNT,
 };
@@ -297,7 +297,7 @@ enum phy_tunable_id {
 	ETHTOOL_PHY_EDPD,
 	/*
 	 * Add your fresh new phy tunable attribute above and remember to update
-	 * phy_tunable_strings[] in net/core/ethtool.c
+	 * phy_tunable_strings[] in net/ethtool/common.c
 	 */
 	__ETHTOOL_PHY_TUNABLE_COUNT,
 };

From b244163f2c45c12053cb0291c955f892e79ed8a9 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 17 Jun 2021 13:11:19 +0800
Subject: [PATCH 2102/3292] net: ipa: Add missing of_node_put() in
 ipa_firmware_load()

This node pointer is returned by of_parse_phandle() with refcount
incremented in this function. of_node_put() on it before exiting
this function.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 2243e3e5b7ea..f82130db32f6 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -530,6 +530,7 @@ static int ipa_firmware_load(struct device *dev)
 	}
 
 	ret = of_address_to_resource(node, 0, &res);
+	of_node_put(node);
 	if (ret) {
 		dev_err(dev, "error %d getting \"memory-region\" resource\n",
 			ret);

From 55d96f72e8ddc0a294e0b9c94016edbb699537e1 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 17 Jun 2021 16:02:07 +0800
Subject: [PATCH 2103/3292] net: sched: fix error return code in
 tcf_del_walker()

When nla_put_u32() fails, 'ret' could be 0, it should
return error code in tcf_del_walker().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f6d5755d669e..d17a66aab8ee 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -381,7 +381,8 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 	}
 	mutex_unlock(&idrinfo->lock);
 
-	if (nla_put_u32(skb, TCA_FCNT, n_i))
+	ret = nla_put_u32(skb, TCA_FCNT, n_i);
+	if (ret)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 

From d59a24fd1bdbeea88c621cae746a5a2531f62a50 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Thu, 17 Jun 2021 11:49:15 +0200
Subject: [PATCH 2104/3292] net: gianfar: Convert to ndo_get_stats64 interface

No reason to produce the legacy net_device_stats struct, only to have it
converted to rtnl_link_stats64.  And as a bonus, this allows for improving
counter size to 64 bit.

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 25 +++++++-----------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index f2945abdb041..a0277fe8cc60 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -274,32 +274,21 @@ static void gfar_configure_coalescing_all(struct gfar_private *priv)
 	gfar_configure_coalescing(priv, 0xFF, 0xFF);
 }
 
-static struct net_device_stats *gfar_get_stats(struct net_device *dev)
+static void gfar_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	unsigned long rx_packets = 0, rx_bytes = 0, rx_dropped = 0;
-	unsigned long tx_packets = 0, tx_bytes = 0;
 	int i;
 
 	for (i = 0; i < priv->num_rx_queues; i++) {
-		rx_packets += priv->rx_queue[i]->stats.rx_packets;
-		rx_bytes   += priv->rx_queue[i]->stats.rx_bytes;
-		rx_dropped += priv->rx_queue[i]->stats.rx_dropped;
+		stats->rx_packets += priv->rx_queue[i]->stats.rx_packets;
+		stats->rx_bytes   += priv->rx_queue[i]->stats.rx_bytes;
+		stats->rx_dropped += priv->rx_queue[i]->stats.rx_dropped;
 	}
 
-	dev->stats.rx_packets = rx_packets;
-	dev->stats.rx_bytes   = rx_bytes;
-	dev->stats.rx_dropped = rx_dropped;
-
 	for (i = 0; i < priv->num_tx_queues; i++) {
-		tx_bytes += priv->tx_queue[i]->stats.tx_bytes;
-		tx_packets += priv->tx_queue[i]->stats.tx_packets;
+		stats->tx_bytes += priv->tx_queue[i]->stats.tx_bytes;
+		stats->tx_packets += priv->tx_queue[i]->stats.tx_packets;
 	}
-
-	dev->stats.tx_bytes   = tx_bytes;
-	dev->stats.tx_packets = tx_packets;
-
-	return &dev->stats;
 }
 
 /* Set the appropriate hash bit for the given addr */
@@ -3157,7 +3146,7 @@ static const struct net_device_ops gfar_netdev_ops = {
 	.ndo_set_rx_mode = gfar_set_multi,
 	.ndo_tx_timeout = gfar_timeout,
 	.ndo_do_ioctl = gfar_ioctl,
-	.ndo_get_stats = gfar_get_stats,
+	.ndo_get_stats64 = gfar_get_stats64,
 	.ndo_change_carrier = fixed_phy_change_carrier,
 	.ndo_set_mac_address = gfar_set_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,

From 2658530d797ff33d110efd1513b7a7ef33ba5a30 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Thu, 17 Jun 2021 11:49:17 +0200
Subject: [PATCH 2105/3292] net: gianfar: Extend statistics counters to 64-bit

No reason to wrap counter values at 2^32.  Especially the bytes counters
can wrap pretty fast on Gbit networks.

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 5ea47df93e5e..d8ae5353e881 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -913,8 +913,8 @@ enum {
  * Per TX queue stats
  */
 struct tx_q_stats {
-	unsigned long tx_packets;
-	unsigned long tx_bytes;
+	u64 tx_packets;
+	u64 tx_bytes;
 };
 
 /**
@@ -963,9 +963,9 @@ struct gfar_priv_tx_q {
  * Per RX queue stats
  */
 struct rx_q_stats {
-	unsigned long rx_packets;
-	unsigned long rx_bytes;
-	unsigned long rx_dropped;
+	u64 rx_packets;
+	u64 rx_bytes;
+	u64 rx_dropped;
 };
 
 struct gfar_rx_buff {

From ef09487431a96029b64a7a6fc4ced46662e921e7 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Thu, 17 Jun 2021 11:49:20 +0200
Subject: [PATCH 2106/3292] net: gianfar: Clear CAR registers

The CAR1 and CAR2 registers are W1C style registers, to the memset does not
actually clear them.

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index a0277fe8cc60..ebd1065f39fa 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3103,6 +3103,9 @@ static void gfar_hw_init(struct gfar_private *priv)
 		/* Mask off the CAM interrupts */
 		gfar_write(&regs->rmon.cam1, 0xffffffff);
 		gfar_write(&regs->rmon.cam2, 0xffffffff);
+		/* Clear the CAR registers (w1c style) */
+		gfar_write(&regs->rmon.car1, 0xffffffff);
+		gfar_write(&regs->rmon.car2, 0xffffffff);
 	}
 
 	/* Initialize ECNTRL */

From e2dbbbe52c4a0f7704735b49f864fe8ea06111e5 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Thu, 17 Jun 2021 11:49:23 +0200
Subject: [PATCH 2107/3292] net: gianfar: Avoid 16 bytes of memset

The memset on CAMx is wrong, as it actually unmasks all carry irq's,
which we clearly are not interested in.

The memset on CARx registers is just pointless, as they are W1C.

So let's just stop the memset before CAR1.

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index ebd1065f39fa..4608c0c337bc 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3098,7 +3098,7 @@ static void gfar_hw_init(struct gfar_private *priv)
 
 	/* Zero out the rmon mib registers if it has them */
 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
-		memset_io(&(regs->rmon), 0, sizeof(struct rmon_mib));
+		memset_io(&regs->rmon, 0, offsetof(struct rmon_mib, car1));
 
 		/* Mask off the CAM interrupts */
 		gfar_write(&regs->rmon.cam1, 0xffffffff);

From 8da32a1071af4af7723269ad4e84cc6160e2dc41 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Thu, 17 Jun 2021 11:49:26 +0200
Subject: [PATCH 2108/3292] net: gianfar: Add definitions for CAR1 and CAM1
 register bits

These are for carry status and interrupt mask bits of statistics registers.

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.h | 54 ++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index d8ae5353e881..c8aa140a910f 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -445,6 +445,60 @@ struct ethtool_rx_list {
 #define RQFPR_PER		0x00000002
 #define RQFPR_EER		0x00000001
 
+/* CAR1 bits */
+#define CAR1_C164		0x80000000
+#define CAR1_C1127		0x40000000
+#define CAR1_C1255		0x20000000
+#define CAR1_C1511		0x10000000
+#define CAR1_C11K		0x08000000
+#define CAR1_C1MAX		0x04000000
+#define CAR1_C1MGV		0x02000000
+#define CAR1_C1REJ		0x00020000
+#define CAR1_C1RBY		0x00010000
+#define CAR1_C1RPK		0x00008000
+#define CAR1_C1RFC		0x00004000
+#define CAR1_C1RMC		0x00002000
+#define CAR1_C1RBC		0x00001000
+#define CAR1_C1RXC		0x00000800
+#define CAR1_C1RXP		0x00000400
+#define CAR1_C1RXU		0x00000200
+#define CAR1_C1RAL		0x00000100
+#define CAR1_C1RFL		0x00000080
+#define CAR1_C1RCD		0x00000040
+#define CAR1_C1RCS		0x00000020
+#define CAR1_C1RUN		0x00000010
+#define CAR1_C1ROV		0x00000008
+#define CAR1_C1RFR		0x00000004
+#define CAR1_C1RJB		0x00000002
+#define CAR1_C1RDR		0x00000001
+
+/* CAM1 bits */
+#define CAM1_M164		0x80000000
+#define CAM1_M1127		0x40000000
+#define CAM1_M1255		0x20000000
+#define CAM1_M1511		0x10000000
+#define CAM1_M11K		0x08000000
+#define CAM1_M1MAX		0x04000000
+#define CAM1_M1MGV		0x02000000
+#define CAM1_M1REJ		0x00020000
+#define CAM1_M1RBY		0x00010000
+#define CAM1_M1RPK		0x00008000
+#define CAM1_M1RFC		0x00004000
+#define CAM1_M1RMC		0x00002000
+#define CAM1_M1RBC		0x00001000
+#define CAM1_M1RXC		0x00000800
+#define CAM1_M1RXP		0x00000400
+#define CAM1_M1RXU		0x00000200
+#define CAM1_M1RAL		0x00000100
+#define CAM1_M1RFL		0x00000080
+#define CAM1_M1RCD		0x00000040
+#define CAM1_M1RCS		0x00000020
+#define CAM1_M1RUN		0x00000010
+#define CAM1_M1ROV		0x00000008
+#define CAM1_M1RFR		0x00000004
+#define CAM1_M1RJB		0x00000002
+#define CAM1_M1RDR		0x00000001
+
 /* TxBD status field bits */
 #define TXBD_READY		0x8000
 #define TXBD_PADCRC		0x4000

From 14870b75fe0be5c565339f008ba25326f86a7ce8 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Thu, 17 Jun 2021 11:49:28 +0200
Subject: [PATCH 2109/3292] net: gianfar: Implement rx_missed_errors counter

Devices with RMON support has a 16-bit RDRP counter.  It provides: "Receive
dropped packets counter. Increments for frames received which are streamed
to system but are later dropped due to lack of system resources."

To handle more than 2^16 dropped packets, a carry bit in CAR1 register is
set on overflow, so we enable irq when this is set, extending the counter
to 2^64 for handling situations where lots of packets are missed (e.g.
during heavy network storms).

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 50 ++++++++++++++++++++++--
 drivers/net/ethernet/freescale/gianfar.h | 10 +++++
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 4608c0c337bc..9646483137c4 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -289,6 +289,29 @@ static void gfar_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *s
 		stats->tx_bytes += priv->tx_queue[i]->stats.tx_bytes;
 		stats->tx_packets += priv->tx_queue[i]->stats.tx_packets;
 	}
+
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
+		struct rmon_mib __iomem *rmon = &priv->gfargrp[0].regs->rmon;
+		unsigned long flags;
+		u32 rdrp, car, car_before;
+		u64 rdrp_offset;
+
+		spin_lock_irqsave(&priv->rmon_overflow.lock, flags);
+		car = gfar_read(&rmon->car1) & CAR1_C1RDR;
+		do {
+			car_before = car;
+			rdrp = gfar_read(&rmon->rdrp);
+			car = gfar_read(&rmon->car1) & CAR1_C1RDR;
+		} while (car != car_before);
+		if (car) {
+			priv->rmon_overflow.rdrp++;
+			gfar_write(&rmon->car1, car);
+		}
+		rdrp_offset = priv->rmon_overflow.rdrp;
+		spin_unlock_irqrestore(&priv->rmon_overflow.lock, flags);
+
+		stats->rx_missed_errors = rdrp + (rdrp_offset << 16);
+	}
 }
 
 /* Set the appropriate hash bit for the given addr */
@@ -379,7 +402,8 @@ static void gfar_ints_enable(struct gfar_private *priv)
 	for (i = 0; i < priv->num_grps; i++) {
 		struct gfar __iomem *regs = priv->gfargrp[i].regs;
 		/* Unmask the interrupts we look for */
-		gfar_write(&regs->imask, IMASK_DEFAULT);
+		gfar_write(&regs->imask,
+			   IMASK_DEFAULT | priv->rmon_overflow.imask);
 	}
 }
 
@@ -2287,7 +2311,7 @@ static irqreturn_t gfar_receive(int irq, void *grp_id)
 	if (likely(napi_schedule_prep(&grp->napi_rx))) {
 		spin_lock_irqsave(&grp->grplock, flags);
 		imask = gfar_read(&grp->regs->imask);
-		imask &= IMASK_RX_DISABLED;
+		imask &= IMASK_RX_DISABLED | grp->priv->rmon_overflow.imask;
 		gfar_write(&grp->regs->imask, imask);
 		spin_unlock_irqrestore(&grp->grplock, flags);
 		__napi_schedule(&grp->napi_rx);
@@ -2311,7 +2335,7 @@ static irqreturn_t gfar_transmit(int irq, void *grp_id)
 	if (likely(napi_schedule_prep(&grp->napi_tx))) {
 		spin_lock_irqsave(&grp->grplock, flags);
 		imask = gfar_read(&grp->regs->imask);
-		imask &= IMASK_TX_DISABLED;
+		imask &= IMASK_TX_DISABLED | grp->priv->rmon_overflow.imask;
 		gfar_write(&grp->regs->imask, imask);
 		spin_unlock_irqrestore(&grp->grplock, flags);
 		__napi_schedule(&grp->napi_tx);
@@ -2682,6 +2706,18 @@ static irqreturn_t gfar_error(int irq, void *grp_id)
 		}
 		netif_dbg(priv, tx_err, dev, "Transmit Error\n");
 	}
+	if (events & IEVENT_MSRO) {
+		struct rmon_mib __iomem *rmon = &regs->rmon;
+		u32 car;
+
+		spin_lock(&priv->rmon_overflow.lock);
+		car = gfar_read(&rmon->car1) & CAR1_C1RDR;
+		if (car) {
+			priv->rmon_overflow.rdrp++;
+			gfar_write(&rmon->car1, car);
+		}
+		spin_unlock(&priv->rmon_overflow.lock);
+	}
 	if (events & IEVENT_BSY) {
 		dev->stats.rx_over_errors++;
 		atomic64_inc(&priv->extra_stats.rx_bsy);
@@ -3259,6 +3295,14 @@ static int gfar_probe(struct platform_device *ofdev)
 
 	gfar_hw_init(priv);
 
+	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) {
+		struct rmon_mib __iomem *rmon = &priv->gfargrp[0].regs->rmon;
+
+		spin_lock_init(&priv->rmon_overflow.lock);
+		priv->rmon_overflow.imask = IMASK_MSRO;
+		gfar_write(&rmon->cam1, gfar_read(&rmon->cam1) & ~CAM1_M1RDR);
+	}
+
 	/* Carrier starts down, phylib will bring it up */
 	netif_carrier_off(dev);
 
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index c8aa140a910f..ca5e14f908fe 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -663,6 +663,15 @@ struct rmon_mib
 	u32	cam2;	/* 0x.73c - Carry Mask Register Two */
 };
 
+struct rmon_overflow {
+	/* lock for synchronization of the rdrp field of this struct, and
+	 * CAR1/CAR2 registers
+	 */
+	spinlock_t lock;
+	u32	imask;
+	u64	rdrp;
+};
+
 struct gfar_extra_stats {
 	atomic64_t rx_alloc_err;
 	atomic64_t rx_large;
@@ -1150,6 +1159,7 @@ struct gfar_private {
 
 	/* Network Statistics */
 	struct gfar_extra_stats extra_stats;
+	struct rmon_overflow rmon_overflow;
 
 	/* PHY stuff */
 	phy_interface_t interface;

From b67fda9a8280b14d44712d2ad6413e0074b070b8 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Thu, 17 Jun 2021 18:00:15 +0800
Subject: [PATCH 2110/3292] r8152: store the information of the pipes

Store the information of the pipes to avoid calling usb_rcvctrlpipe(),
usb_sndctrlpipe(), usb_rcvbulkpipe(), usb_sndbulkpipe(), and
usb_rcvintpipe() frequently.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 85039e17f4cd..62cd48dc2878 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -931,6 +931,8 @@ struct r8152 {
 	u32 rx_pending;
 	u32 fc_pause_on, fc_pause_off;
 
+	unsigned int pipe_in, pipe_out, pipe_intr, pipe_ctrl_in, pipe_ctrl_out;
+
 	u32 support_2500full:1;
 	u32 lenovo_macpassthru:1;
 	u32 dell_tb_rx_agg_bug:1;
@@ -1198,7 +1200,7 @@ int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
 	if (!tmp)
 		return -ENOMEM;
 
-	ret = usb_control_msg(tp->udev, usb_rcvctrlpipe(tp->udev, 0),
+	ret = usb_control_msg(tp->udev, tp->pipe_ctrl_in,
 			      RTL8152_REQ_GET_REGS, RTL8152_REQT_READ,
 			      value, index, tmp, size, 500);
 	if (ret < 0)
@@ -1221,7 +1223,7 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
 	if (!tmp)
 		return -ENOMEM;
 
-	ret = usb_control_msg(tp->udev, usb_sndctrlpipe(tp->udev, 0),
+	ret = usb_control_msg(tp->udev, tp->pipe_ctrl_out,
 			      RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE,
 			      value, index, tmp, size, 500);
 
@@ -2041,7 +2043,7 @@ static int alloc_all_mem(struct r8152 *tp)
 		goto err1;
 
 	tp->intr_interval = (int)ep_intr->desc.bInterval;
-	usb_fill_int_urb(tp->intr_urb, tp->udev, usb_rcvintpipe(tp->udev, 3),
+	usb_fill_int_urb(tp->intr_urb, tp->udev, tp->pipe_intr,
 			 tp->intr_buff, INTBUFSIZE, intr_callback,
 			 tp, tp->intr_interval);
 
@@ -2305,7 +2307,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 	if (ret < 0)
 		goto out_tx_fill;
 
-	usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2),
+	usb_fill_bulk_urb(agg->urb, tp->udev, tp->pipe_out,
 			  agg->head, (int)(tx_data - (u8 *)agg->head),
 			  (usb_complete_t)write_bulk_callback, agg);
 
@@ -2620,7 +2622,7 @@ int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags)
 	    !test_bit(WORK_ENABLE, &tp->flags) || !netif_carrier_ok(tp->netdev))
 		return 0;
 
-	usb_fill_bulk_urb(agg->urb, tp->udev, usb_rcvbulkpipe(tp->udev, 1),
+	usb_fill_bulk_urb(agg->urb, tp->udev, tp->pipe_in,
 			  agg->buffer, tp->rx_buf_sz,
 			  (usb_complete_t)read_bulk_callback, agg);
 
@@ -9507,6 +9509,12 @@ static int rtl8152_probe(struct usb_interface *intf,
 	tp->intf = intf;
 	tp->version = version;
 
+	tp->pipe_ctrl_in = usb_rcvctrlpipe(udev, 0);
+	tp->pipe_ctrl_out = usb_sndctrlpipe(udev, 0);
+	tp->pipe_in = usb_rcvbulkpipe(udev, 1);
+	tp->pipe_out = usb_sndbulkpipe(udev, 2);
+	tp->pipe_intr = usb_rcvintpipe(udev, 3);
+
 	switch (version) {
 	case RTL_VER_01:
 	case RTL_VER_02:

From 70ef608c224af39c7eee850d763b986954594de6 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 17 Jun 2021 15:29:03 +0300
Subject: [PATCH 2111/3292] net: mdio: setup of_node for the MDIO device

By mistake, the of_node of the MDIO device was not setup in the patch
linked below. As a consequence, any PHY driver that depends on the
of_node in its probe callback was not be able to successfully finish its
probe on a PHY, thus the Generic PHY driver was used instead.

Fix this by actually setting up the of_node.

Fixes: bc1bee3b87ee ("net: mdiobus: Introduce fwnode_mdiobus_register_phy()")
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/fwnode_mdio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index e96766da8de4..283ddb1185bd 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -65,6 +65,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
 	 * can be looked up later
 	 */
 	fwnode_handle_get(child);
+	phy->mdio.dev.of_node = to_of_node(child);
 	phy->mdio.dev.fwnode = child;
 
 	/* All data is now stored in the phy struct;

From 43e76d463c09a0272b84775bcc727c1eb8b384b2 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 17 Jun 2021 15:29:04 +0300
Subject: [PATCH 2112/3292] driver core: add a helper to setup both the of_node
 and fwnode of a device

There are many places where both the fwnode_handle and the of_node of a
device need to be populated. Add a function which does both so that we
have consistency.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/core.c    | 7 +++++++
 include/linux/device.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 628e33939aca..b6836bfa985c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4723,6 +4723,13 @@ void device_set_of_node_from_dev(struct device *dev, const struct device *dev2)
 }
 EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);
 
+void device_set_node(struct device *dev, struct fwnode_handle *fwnode)
+{
+	dev->fwnode = fwnode;
+	dev->of_node = to_of_node(fwnode);
+}
+EXPORT_SYMBOL_GPL(device_set_node);
+
 int device_match_name(struct device *dev, const void *name)
 {
 	return sysfs_streq(dev_name(dev), name);
diff --git a/include/linux/device.h b/include/linux/device.h
index 38a2071cf776..a1e7cab2c7bf 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -819,6 +819,7 @@ int device_online(struct device *dev);
 void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
 void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
 void device_set_of_node_from_dev(struct device *dev, const struct device *dev2);
+void device_set_node(struct device *dev, struct fwnode_handle *fwnode);
 
 static inline int dev_num_vf(struct device *dev)
 {

From 7e33d84db1a8a6c3000e9b02c074c17819680755 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 17 Jun 2021 15:29:05 +0300
Subject: [PATCH 2113/3292] net: mdio: use device_set_node() to setup both
 fwnode and of

Use the newly introduced helper to setup both the of_node and the
fwnode for a given device.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/fwnode_mdio.c | 3 +--
 drivers/net/mdio/of_mdio.c     | 9 ++++-----
 drivers/net/phy/mdio_bus.c     | 3 +--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index 283ddb1185bd..1becb1a731f6 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -65,8 +65,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
 	 * can be looked up later
 	 */
 	fwnode_handle_get(child);
-	phy->mdio.dev.of_node = to_of_node(child);
-	phy->mdio.dev.fwnode = child;
+	device_set_node(&phy->mdio.dev, child);
 
 	/* All data is now stored in the phy struct;
 	 * register it
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 8744b1e1c2b1..9e3c815a070f 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -51,6 +51,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
 static int of_mdiobus_register_device(struct mii_bus *mdio,
 				      struct device_node *child, u32 addr)
 {
+	struct fwnode_handle *fwnode = of_fwnode_handle(child);
 	struct mdio_device *mdiodev;
 	int rc;
 
@@ -61,9 +62,8 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
 	/* Associate the OF node with the device structure so it
 	 * can be looked up later.
 	 */
-	of_node_get(child);
-	mdiodev->dev.of_node = child;
-	mdiodev->dev.fwnode = of_fwnode_handle(child);
+	fwnode_handle_get(fwnode);
+	device_set_node(&mdiodev->dev, fwnode);
 
 	/* All data is now stored in the mdiodev struct; register it. */
 	rc = mdio_device_register(mdiodev);
@@ -162,8 +162,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 	 * the device tree are populated after the bus has been registered */
 	mdio->phy_mask = ~0;
 
-	mdio->dev.of_node = np;
-	mdio->dev.fwnode = of_fwnode_handle(np);
+	device_set_node(&mdio->dev, of_fwnode_handle(np));
 
 	/* Get bus level PHY reset GPIO details */
 	mdio->reset_delay_us = DEFAULT_GPIO_RESET_DELAY;
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 24665670a89a..53f034fc2ef7 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -459,8 +459,7 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus,
 			continue;
 
 		if (addr == mdiodev->addr) {
-			dev->of_node = child;
-			dev->fwnode = of_fwnode_handle(child);
+			device_set_node(dev, of_fwnode_handle(child));
 			return;
 		}
 	}

From f271606f5289c87a3c18cb1cee9ff9ac03c6cb64 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 17 Jun 2021 22:03:14 +0800
Subject: [PATCH 2114/3292] net: hdlc_ppp: remove redundant blank lines

This patch removes some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_ppp.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 261b53fc8e04..e25b2f058d8c 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -58,7 +58,6 @@ struct cp_header {
 	__be16 len;
 };
 
-
 struct proto {
 	struct net_device *dev;
 	struct timer_list timer;
@@ -160,7 +159,6 @@ static __be16 ppp_type_trans(struct sk_buff *skb, struct net_device *dev)
 	}
 }
 
-
 static int ppp_hard_header(struct sk_buff *skb, struct net_device *dev,
 			   u16 type, const void *daddr, const void *saddr,
 			   unsigned int len)
@@ -193,7 +191,6 @@ static int ppp_hard_header(struct sk_buff *skb, struct net_device *dev,
 	return sizeof(struct hdlc_header);
 }
 
-
 static void ppp_tx_flush(void)
 {
 	struct sk_buff *skb;
@@ -256,7 +253,6 @@ static void ppp_tx_cp(struct net_device *dev, u16 pid, u8 code,
 	skb_queue_tail(&tx_queue, skb);
 }
 
-
 /* State transition table (compare STD-51)
    Events                                   Actions
    TO+  = Timeout with counter > 0          irc = Initialize-Restart-Count
@@ -294,7 +290,6 @@ static int cp_table[EVENTS][STATES] = {
 	{    0    ,      1      ,  1  ,    1    ,  1  ,    1    ,IRC|STR|2}, /* RXJ- */
 };
 
-
 /* SCA: RCR+ must supply id, len and data
    SCN: RCR- must supply code, id, len and data
    STA: RTR must supply id
@@ -369,7 +364,6 @@ static void ppp_cp_event(struct net_device *dev, u16 pid, u16 event, u8 code,
 #endif
 }
 
-
 static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id,
 			    unsigned int req_len, const u8 *data)
 {
@@ -615,7 +609,6 @@ static void ppp_timer(struct timer_list *t)
 	ppp_tx_flush();
 }
 
-
 static void ppp_start(struct net_device *dev)
 {
 	struct ppp *ppp = get_ppp(dev);
@@ -707,7 +700,6 @@ static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
 	return -EINVAL;
 }
 
-
 static int __init mod_init(void)
 {
 	skb_queue_head_init(&tx_queue);
@@ -720,7 +712,6 @@ static void __exit mod_exit(void)
 	unregister_hdlc_protocol(&proto);
 }
 
-
 module_init(mod_init);
 module_exit(mod_exit);
 

From 2b57681f94aff059e75a7a5041f0f66ce0627fc4 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 17 Jun 2021 22:03:15 +0800
Subject: [PATCH 2115/3292] net: hdlc_ppp: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_ppp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index e25b2f058d8c..32f01d7e12d4 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -41,6 +41,7 @@ static const char *const code_names[CP_CODES] = {
 	"0", "ConfReq", "ConfAck", "ConfNak", "ConfRej", "TermReq",
 	"TermAck", "CodeRej", "ProtoRej", "EchoReq", "EchoReply", "Discard"
 };
+
 static char debug_buffer[64 + 3 * DEBUG_CP];
 #endif
 
@@ -90,6 +91,7 @@ static const char *const state_names[STATES] = {
 	"Closed", "Stopped", "Stopping", "ReqSent", "AckRecv", "AckSent",
 	"Opened"
 };
+
 static const char *const event_names[EVENTS] = {
 	"Start", "Stop", "TO+", "TO-", "RCR+", "RCR-", "RCA", "RCN",
 	"RTR", "RTA", "RUC", "RXJ+", "RXJ-"
@@ -194,6 +196,7 @@ static int ppp_hard_header(struct sk_buff *skb, struct net_device *dev,
 static void ppp_tx_flush(void)
 {
 	struct sk_buff *skb;
+
 	while ((skb = skb_dequeue(&tx_queue)) != NULL)
 		dev_queue_xmit(skb);
 }
@@ -616,6 +619,7 @@ static void ppp_start(struct net_device *dev)
 
 	for (i = 0; i < IDX_COUNT; i++) {
 		struct proto *proto = &ppp->protos[i];
+
 		proto->dev = dev;
 		timer_setup(&proto->timer, ppp_timer, 0);
 		proto->state = CLOSED;

From cb36c4112c528ffa6b2005bb083559c54d66810e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 17 Jun 2021 22:03:16 +0800
Subject: [PATCH 2116/3292] net: hdlc_ppp: fix the code style issue about "foo*
 bar"

Fix the checkpatch error as "foo* bar" or "foo*bar" should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_ppp.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 32f01d7e12d4..861491206f12 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -102,12 +102,12 @@ static struct sk_buff_head tx_queue; /* used when holding the spin lock */
 
 static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr);
 
-static inline struct ppp* get_ppp(struct net_device *dev)
+static inline struct ppp *get_ppp(struct net_device *dev)
 {
 	return (struct ppp *)dev_to_hdlc(dev)->state;
 }
 
-static inline struct proto* get_proto(struct net_device *dev, u16 pid)
+static inline struct proto *get_proto(struct net_device *dev, u16 pid)
 {
 	struct ppp *ppp = get_ppp(dev);
 
@@ -123,7 +123,7 @@ static inline struct proto* get_proto(struct net_device *dev, u16 pid)
 	}
 }
 
-static inline const char* proto_name(u16 pid)
+static inline const char *proto_name(u16 pid)
 {
 	switch (pid) {
 	case PID_LCP:
@@ -139,7 +139,7 @@ static inline const char* proto_name(u16 pid)
 
 static __be16 ppp_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
-	struct hdlc_header *data = (struct hdlc_header*)skb->data;
+	struct hdlc_header *data = (struct hdlc_header *)skb->data;
 
 	if (skb->len < sizeof(struct hdlc_header))
 		return htons(ETH_P_HDLC);
@@ -171,7 +171,7 @@ static int ppp_hard_header(struct sk_buff *skb, struct net_device *dev,
 #endif
 
 	skb_push(skb, sizeof(struct hdlc_header));
-	data = (struct hdlc_header*)skb->data;
+	data = (struct hdlc_header *)skb->data;
 
 	data->address = HDLC_ADDR_ALLSTATIONS;
 	data->control = HDLC_CTRL_UI;
@@ -432,7 +432,7 @@ err_out:
 
 static int ppp_rx(struct sk_buff *skb)
 {
-	struct hdlc_header *hdr = (struct hdlc_header*)skb->data;
+	struct hdlc_header *hdr = (struct hdlc_header *)skb->data;
 	struct net_device *dev = skb->dev;
 	struct ppp *ppp = get_ppp(dev);
 	struct proto *proto;
@@ -490,7 +490,7 @@ static int ppp_rx(struct sk_buff *skb)
 	if (pid == PID_LCP)
 		switch (cp->code) {
 		case LCP_PROTO_REJ:
-			pid = ntohs(*(__be16*)skb->data);
+			pid = ntohs(*(__be16 *)skb->data);
 			if (pid == PID_LCP || pid == PID_IPCP ||
 			    pid == PID_IPV6CP)
 				ppp_cp_event(dev, pid, RXJ_BAD, 0, 0,

From 4ec479527b9a637d4115d428f0b59d28f0760723 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 17 Jun 2021 22:03:17 +0800
Subject: [PATCH 2117/3292] net: hdlc_ppp: move out assignment in if condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_ppp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 861491206f12..fb5102c1afc6 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -375,7 +375,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id,
 	u8 *out;
 	unsigned int len = req_len, nak_len = 0, rej_len = 0;
 
-	if (!(out = kmalloc(len, GFP_ATOMIC))) {
+	out = kmalloc(len, GFP_ATOMIC);
+	if (!out) {
 		dev->stats.rx_dropped++;
 		return;	/* out of memory, ignore CR packet */
 	}

From ee58a3c7c6bbabb0bf3685a8b437b5d4be33890c Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 17 Jun 2021 22:03:18 +0800
Subject: [PATCH 2118/3292] net: hdlc_ppp: remove unnecessary out of memory
 message

This patch removes unnecessary out of memory message,
to fix the following checkpatch.pl warning:
"WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_ppp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index fb5102c1afc6..aec9cc4d5b48 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -219,10 +219,9 @@ static void ppp_tx_cp(struct net_device *dev, u16 pid, u8 code,
 
 	skb = dev_alloc_skb(sizeof(struct hdlc_header) +
 			    sizeof(struct cp_header) + magic_len + len);
-	if (!skb) {
-		netdev_warn(dev, "out of memory in ppp_tx_cp()\n");
+	if (!skb)
 		return;
-	}
+
 	skb_reserve(skb, sizeof(struct hdlc_header));
 
 	cp = skb_put(skb, sizeof(struct cp_header));

From 37cb4b9ce062d9a6657bcce639644c2e3d07ccf8 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 17 Jun 2021 22:03:19 +0800
Subject: [PATCH 2119/3292] net: hdlc_ppp: add required space

Add space required after that ','.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hdlc_ppp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index aec9cc4d5b48..834be2ae3e9e 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -677,7 +677,8 @@ static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
 
 		/* no settable parameters */
 
-		result = hdlc->attach(dev, ENCODING_NRZ,PARITY_CRC16_PR1_CCITT);
+		result = hdlc->attach(dev, ENCODING_NRZ,
+				      PARITY_CRC16_PR1_CCITT);
 		if (result)
 			return result;
 

From 2f4348e5a86198704368a699a7c4cdeb21d569f5 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 11 Jun 2021 10:01:18 -0700
Subject: [PATCH 2120/3292] EDAC/skx_common: Add new ADXL components for
 2-level memory

Some Intel servers may configure memory in 2 levels, using
fast "near" memory (e.g. DDR) as a cache for larger, slower,
"far" memory (e.g. 3D X-point).

In these configurations the BIOS ADXL address translation for
an address in a 2-level memory range will provide details of
both the "near" and far components.

Current exported ADXL components are only for 1-level memory
system or for 2nd level memory of 2-level memory system. So
add new ADXL components for 1st level memory of 2-level memory
system to fully support 2-level memory system and the detection
of memory error source(1st level memory or 2nd level memory).

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210611170123.1057025-2-tony.luck@intel.com
---
 drivers/edac/skx_common.c | 67 ++++++++++++++++++++++++++++++++-------
 drivers/edac/skx_common.h | 11 +++++++
 2 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 81c3e2ec6f56..c8691abb720d 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -23,10 +23,13 @@
 #include "skx_common.h"
 
 static const char * const component_names[] = {
-	[INDEX_SOCKET]	= "ProcessorSocketId",
-	[INDEX_MEMCTRL]	= "MemoryControllerId",
-	[INDEX_CHANNEL]	= "ChannelId",
-	[INDEX_DIMM]	= "DimmSlotId",
+	[INDEX_SOCKET]		= "ProcessorSocketId",
+	[INDEX_MEMCTRL]		= "MemoryControllerId",
+	[INDEX_CHANNEL]		= "ChannelId",
+	[INDEX_DIMM]		= "DimmSlotId",
+	[INDEX_NM_MEMCTRL]	= "NmMemoryControllerId",
+	[INDEX_NM_CHANNEL]	= "NmChannelId",
+	[INDEX_NM_DIMM]		= "NmDimmSlotId",
 };
 
 static int component_indices[ARRAY_SIZE(component_names)];
@@ -34,12 +37,14 @@ static int adxl_component_count;
 static const char * const *adxl_component_names;
 static u64 *adxl_values;
 static char *adxl_msg;
+static unsigned long adxl_nm_bitmap;
 
 static char skx_msg[MSG_SIZE];
 static skx_decode_f skx_decode;
 static skx_show_retry_log_f skx_show_retry_rd_err_log;
 static u64 skx_tolm, skx_tohm;
 static LIST_HEAD(dev_edac_list);
+static bool skx_mem_cfg_2lm;
 
 int __init skx_adxl_get(void)
 {
@@ -56,14 +61,25 @@ int __init skx_adxl_get(void)
 		for (j = 0; names[j]; j++) {
 			if (!strcmp(component_names[i], names[j])) {
 				component_indices[i] = j;
+
+				if (i >= INDEX_NM_FIRST)
+					adxl_nm_bitmap |= 1 << i;
+
 				break;
 			}
 		}
 
-		if (!names[j])
+		if (!names[j] && i < INDEX_NM_FIRST)
 			goto err;
 	}
 
+	if (skx_mem_cfg_2lm) {
+		if (!adxl_nm_bitmap)
+			skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
+		else
+			edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
+	}
+
 	adxl_component_names = names;
 	while (*names++)
 		adxl_component_count++;
@@ -99,7 +115,7 @@ void __exit skx_adxl_put(void)
 	kfree(adxl_msg);
 }
 
-static bool skx_adxl_decode(struct decoded_addr *res)
+static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
 {
 	struct skx_dev *d;
 	int i, len = 0;
@@ -116,11 +132,20 @@ static bool skx_adxl_decode(struct decoded_addr *res)
 	}
 
 	res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
-	res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
-	res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
-	res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
+	if (error_in_1st_level_mem) {
+		res->imc     = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
+			       (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
+		res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
+			       (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
+		res->dimm    = (adxl_nm_bitmap & BIT_NM_DIMM) ?
+			       (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
+	} else {
+		res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+		res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+		res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
+	}
 
-	if (res->imc > NUM_IMC - 1) {
+	if (res->imc > NUM_IMC - 1 || res->imc < 0) {
 		skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
 		return false;
 	}
@@ -151,6 +176,11 @@ static bool skx_adxl_decode(struct decoded_addr *res)
 	return true;
 }
 
+void skx_set_mem_cfg(bool mem_cfg_2lm)
+{
+	skx_mem_cfg_2lm = mem_cfg_2lm;
+}
+
 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
 {
 	skx_decode = decode;
@@ -578,6 +608,21 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 			     optype, skx_msg);
 }
 
+static bool skx_error_in_1st_level_mem(const struct mce *m)
+{
+	u32 errcode;
+
+	if (!skx_mem_cfg_2lm)
+		return false;
+
+	errcode = GET_BITFIELD(m->status, 0, 15);
+
+	if ((errcode & 0xef80) != 0x280)
+		return false;
+
+	return true;
+}
+
 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 			void *data)
 {
@@ -597,7 +642,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	res.addr = mce->addr;
 
 	if (adxl_component_count) {
-		if (!skx_adxl_decode(&res))
+		if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
 			return NOTIFY_DONE;
 	} else if (!skx_decode || !skx_decode(&res)) {
 		return NOTIFY_DONE;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index bf56bebff138..8b5a49058ce4 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -9,6 +9,8 @@
 #ifndef _SKX_COMM_EDAC_H
 #define _SKX_COMM_EDAC_H
 
+#include <linux/bits.h>
+
 #define MSG_SIZE		1024
 
 /*
@@ -92,9 +94,17 @@ enum {
 	INDEX_MEMCTRL,
 	INDEX_CHANNEL,
 	INDEX_DIMM,
+	INDEX_NM_FIRST,
+	INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
+	INDEX_NM_CHANNEL,
+	INDEX_NM_DIMM,
 	INDEX_MAX
 };
 
+#define BIT_NM_MEMCTRL	BIT_ULL(INDEX_NM_MEMCTRL)
+#define BIT_NM_CHANNEL	BIT_ULL(INDEX_NM_CHANNEL)
+#define BIT_NM_DIMM	BIT_ULL(INDEX_NM_DIMM)
+
 struct decoded_addr {
 	struct skx_dev *dev;
 	u64	addr;
@@ -133,6 +143,7 @@ typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int le
 int __init skx_adxl_get(void);
 void __exit skx_adxl_put(void);
 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
+void skx_set_mem_cfg(bool mem_cfg_2lm);
 
 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
 int skx_get_node_id(struct skx_dev *d, u8 *id);

From 4bd4d32e9a38d7ffb091b4109ab63c8f601e5678 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 11 Jun 2021 10:01:19 -0700
Subject: [PATCH 2121/3292] EDAC/i10nm: Add detection of memory levels for
 ICX/SPR servers

Current i10nm_edac driver is only for system configured in 1-level
memory. If the system is configured in 2-level memory, the driver
doesn't report the 1st level memory DIMM for the error address, even
if the error occurs in the 1st level memory.

Both Ice Lake servers and Sapphire Rapids servers can be configured
in 2-level memory. Add detection of memory levels to i10nm_edac for
the two kinds of servers so that the driver can report the 2nd level
memory DIMM or the 1st level memory DIMM according to error source.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210611170123.1057025-3-tony.luck@intel.com
---
 drivers/edac/i10nm_base.c | 39 +++++++++++++++++++++++++++++++++++++++
 drivers/edac/skx_common.h |  3 +++
 2 files changed, 42 insertions(+)

diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 238a4ad1e526..91431d8922a0 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -24,6 +24,8 @@
 	pci_read_config_dword((d)->uracu, 0xd0, &(reg))
 #define I10NM_GET_IMC_BAR(d, i, reg)	\
 	pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
+#define I10NM_GET_SAD(d, offset, i, reg)\
+	pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
 #define I10NM_GET_DIMMMTR(m, i, j)	\
 	readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4)
 #define I10NM_GET_MCDDRTCFG(m, i, j)	\
@@ -38,6 +40,10 @@
 #define I10NM_GET_IMC_MMIO_SIZE(reg)	((GET_BITFIELD(reg, 13, 23) - \
 					 GET_BITFIELD(reg, 0, 10) + 1) << 12)
 
+#define I10NM_MAX_SAD			16
+#define I10NM_SAD_ENABLE(reg)		GET_BITFIELD(reg, 0, 0)
+#define I10NM_SAD_NM_CACHEABLE(reg)	GET_BITFIELD(reg, 5, 5)
+
 static struct list_head *i10nm_edac_list;
 
 static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
@@ -63,6 +69,31 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
 	return pdev;
 }
 
+static bool i10nm_check_2lm(struct res_config *cfg)
+{
+	struct skx_dev *d;
+	u32 reg;
+	int i;
+
+	list_for_each_entry(d, i10nm_edac_list, list) {
+		d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[1],
+						 PCI_SLOT(cfg->sad_all_devfn),
+						 PCI_FUNC(cfg->sad_all_devfn));
+		if (!d->sad_all)
+			continue;
+
+		for (i = 0; i < I10NM_MAX_SAD; i++) {
+			I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
+			if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
+				edac_dbg(2, "2-level memory configuration.\n");
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
 static int i10nm_get_all_munits(void)
 {
 	struct pci_dev *mdev;
@@ -132,6 +163,8 @@ static struct res_config i10nm_cfg0 = {
 	.decs_did		= 0x3452,
 	.busno_cfg_offset	= 0xcc,
 	.ddr_chan_mmio_sz	= 0x4000,
+	.sad_all_devfn		= PCI_DEVFN(29, 0),
+	.sad_all_offset		= 0x108,
 };
 
 static struct res_config i10nm_cfg1 = {
@@ -139,6 +172,8 @@ static struct res_config i10nm_cfg1 = {
 	.decs_did		= 0x3452,
 	.busno_cfg_offset	= 0xd0,
 	.ddr_chan_mmio_sz	= 0x4000,
+	.sad_all_devfn		= PCI_DEVFN(29, 0),
+	.sad_all_offset		= 0x108,
 };
 
 static struct res_config spr_cfg = {
@@ -147,6 +182,8 @@ static struct res_config spr_cfg = {
 	.busno_cfg_offset	= 0xd0,
 	.ddr_chan_mmio_sz	= 0x8000,
 	.support_ddr5		= true,
+	.sad_all_devfn		= PCI_DEVFN(10, 0),
+	.sad_all_offset		= 0x300,
 };
 
 static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -296,6 +333,8 @@ static int __init i10nm_init(void)
 		return -ENODEV;
 	}
 
+	skx_set_mem_cfg(i10nm_check_2lm(cfg));
+
 	rc = i10nm_get_all_munits();
 	if (rc < 0)
 		goto fail;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 8b5a49058ce4..34e89f7ddf93 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -133,6 +133,9 @@ struct res_config {
 	/* Per DDR channel memory-mapped I/O size */
 	int ddr_chan_mmio_sz;
 	bool support_ddr5;
+	/* SAD device number and function number */
+	unsigned int sad_all_devfn;
+	int sad_all_offset;
 };
 
 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,

From c945088384d00e6eb61535cc4ba25bc062090909 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 11 Jun 2021 10:01:20 -0700
Subject: [PATCH 2122/3292] EDAC/i10nm: Add support for high bandwidth memory

A future Xeon processor will include in-package HBM (high bandwidth
memory). The in-package HBM memory controller shares the same
architecture with the regular DDR memory controller.

Add the HBM memory controller devices for EDAC support.

Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210611170123.1057025-4-tony.luck@intel.com
---
 drivers/edac/i10nm_base.c | 132 ++++++++++++++++++++++++++++++++++----
 drivers/edac/skx_common.c |  15 +++--
 drivers/edac/skx_common.h |  20 +++++-
 3 files changed, 148 insertions(+), 19 deletions(-)

diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 91431d8922a0..fb7e72d3fd2c 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -13,7 +13,7 @@
 #include "edac_module.h"
 #include "skx_common.h"
 
-#define I10NM_REVISION	"v0.0.4"
+#define I10NM_REVISION	"v0.0.5"
 #define EDAC_MOD_STR	"i10nm_edac"
 
 /* Debug macros */
@@ -26,19 +26,33 @@
 	pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
 #define I10NM_GET_SAD(d, offset, i, reg)\
 	pci_read_config_dword((d)->sad_all, (offset) + (i) * 8, &(reg))
+#define I10NM_GET_HBM_IMC_BAR(d, reg)	\
+	pci_read_config_dword((d)->uracu, 0xd4, &(reg))
+#define I10NM_GET_CAPID3_CFG(d, reg)	\
+	pci_read_config_dword((d)->pcu_cr3, 0x90, &(reg))
 #define I10NM_GET_DIMMMTR(m, i, j)	\
-	readl((m)->mbase + 0x2080c + (i) * (m)->chan_mmio_sz + (j) * 4)
+	readl((m)->mbase + ((m)->hbm_mc ? 0x80c : 0x2080c) + \
+	(i) * (m)->chan_mmio_sz + (j) * 4)
 #define I10NM_GET_MCDDRTCFG(m, i, j)	\
-	readl((m)->mbase + 0x20970 + (i) * (m)->chan_mmio_sz + (j) * 4)
+	readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
+	(i) * (m)->chan_mmio_sz + (j) * 4)
 #define I10NM_GET_MCMTR(m, i)		\
-	readl((m)->mbase + 0x20ef8 + (i) * (m)->chan_mmio_sz)
+	readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : 0x20ef8) + \
+	(i) * (m)->chan_mmio_sz)
 #define I10NM_GET_AMAP(m, i)		\
-	readl((m)->mbase + 0x20814 + (i) * (m)->chan_mmio_sz)
+	readl((m)->mbase + ((m)->hbm_mc ? 0x814 : 0x20814) + \
+	(i) * (m)->chan_mmio_sz)
 
 #define I10NM_GET_SCK_MMIO_BASE(reg)	(GET_BITFIELD(reg, 0, 28) << 23)
 #define I10NM_GET_IMC_MMIO_OFFSET(reg)	(GET_BITFIELD(reg, 0, 10) << 12)
 #define I10NM_GET_IMC_MMIO_SIZE(reg)	((GET_BITFIELD(reg, 13, 23) - \
 					 GET_BITFIELD(reg, 0, 10) + 1) << 12)
+#define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg)	\
+	((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
+
+#define I10NM_HBM_IMC_MMIO_SIZE		0x9000
+#define I10NM_IS_HBM_PRESENT(reg)	GET_BITFIELD(reg, 27, 30)
+#define I10NM_IS_HBM_IMC(reg)		GET_BITFIELD(reg, 29, 29)
 
 #define I10NM_MAX_SAD			16
 #define I10NM_SAD_ENABLE(reg)		GET_BITFIELD(reg, 0, 0)
@@ -94,7 +108,7 @@ static bool i10nm_check_2lm(struct res_config *cfg)
 	return false;
 }
 
-static int i10nm_get_all_munits(void)
+static int i10nm_get_ddr_munits(void)
 {
 	struct pci_dev *mdev;
 	void __iomem *mbase;
@@ -122,7 +136,7 @@ static int i10nm_get_all_munits(void)
 		edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
 			 j++, base, reg);
 
-		for (i = 0; i < I10NM_NUM_IMC; i++) {
+		for (i = 0; i < I10NM_NUM_DDR_IMC; i++) {
 			mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
 						   12 + i, 0);
 			if (i == 0 && !mdev) {
@@ -158,6 +172,90 @@ static int i10nm_get_all_munits(void)
 	return 0;
 }
 
+static bool i10nm_check_hbm_imc(struct skx_dev *d)
+{
+	u32 reg;
+
+	if (I10NM_GET_CAPID3_CFG(d, reg)) {
+		i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
+		return false;
+	}
+
+	return I10NM_IS_HBM_PRESENT(reg) != 0;
+}
+
+static int i10nm_get_hbm_munits(void)
+{
+	struct pci_dev *mdev;
+	void __iomem *mbase;
+	u32 reg, off, mcmtr;
+	struct skx_dev *d;
+	int i, lmc;
+	u64 base;
+
+	list_for_each_entry(d, i10nm_edac_list, list) {
+		d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[1], 30, 3);
+		if (!d->pcu_cr3)
+			return -ENODEV;
+
+		if (!i10nm_check_hbm_imc(d)) {
+			i10nm_printk(KERN_DEBUG, "No hbm memory\n");
+			return -ENODEV;
+		}
+
+		if (I10NM_GET_SCK_BAR(d, reg)) {
+			i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
+			return -ENODEV;
+		}
+		base = I10NM_GET_SCK_MMIO_BASE(reg);
+
+		if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
+			i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
+			return -ENODEV;
+		}
+		base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
+
+		lmc = I10NM_NUM_DDR_IMC;
+
+		for (i = 0; i < I10NM_NUM_HBM_IMC; i++) {
+			mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
+						   12 + i / 4, 1 + i % 4);
+			if (i == 0 && !mdev) {
+				i10nm_printk(KERN_ERR, "No hbm mc found\n");
+				return -ENODEV;
+			}
+			if (!mdev)
+				continue;
+
+			d->imc[lmc].mdev = mdev;
+			off = i * I10NM_HBM_IMC_MMIO_SIZE;
+
+			edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
+				 lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
+
+			mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
+			if (!mbase) {
+				i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
+					     base + off);
+				return -ENOMEM;
+			}
+
+			d->imc[lmc].mbase = mbase;
+			d->imc[lmc].hbm_mc = true;
+
+			mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
+			if (!I10NM_IS_HBM_IMC(mcmtr)) {
+				i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
+				return -ENODEV;
+			}
+
+			lmc++;
+		}
+	}
+
+	return 0;
+}
+
 static struct res_config i10nm_cfg0 = {
 	.type			= I10NM,
 	.decs_did		= 0x3452,
@@ -181,6 +279,7 @@ static struct res_config spr_cfg = {
 	.decs_did		= 0x3252,
 	.busno_cfg_offset	= 0xd0,
 	.ddr_chan_mmio_sz	= 0x8000,
+	.hbm_chan_mmio_sz	= 0x4000,
 	.support_ddr5		= true,
 	.sad_all_devfn		= PCI_DEVFN(10, 0),
 	.sad_all_offset		= 0x300,
@@ -216,13 +315,13 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
 	struct dimm_info *dimm;
 	int i, j, ndimms;
 
-	for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
+	for (i = 0; i < imc->num_channels; i++) {
 		if (!imc->mbase)
 			continue;
 
 		ndimms = 0;
 		amap = I10NM_GET_AMAP(imc, i);
-		for (j = 0; j < I10NM_NUM_DIMMS; j++) {
+		for (j = 0; j < imc->num_dimms; j++) {
 			dimm = edac_get_dimm(mci, i, j, 0);
 			mtr = I10NM_GET_DIMMMTR(imc, i, j);
 			mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
@@ -335,8 +434,9 @@ static int __init i10nm_init(void)
 
 	skx_set_mem_cfg(i10nm_check_2lm(cfg));
 
-	rc = i10nm_get_all_munits();
-	if (rc < 0)
+	rc = i10nm_get_ddr_munits();
+
+	if (i10nm_get_hbm_munits() && rc)
 		goto fail;
 
 	list_for_each_entry(d, i10nm_edac_list, list) {
@@ -357,7 +457,15 @@ static int __init i10nm_init(void)
 			d->imc[i].lmc = i;
 			d->imc[i].src_id  = src_id;
 			d->imc[i].node_id = node_id;
-			d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
+			if (d->imc[i].hbm_mc) {
+				d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
+				d->imc[i].num_channels = I10NM_NUM_HBM_CHANNELS;
+				d->imc[i].num_dimms    = I10NM_NUM_HBM_DIMMS;
+			} else {
+				d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
+				d->imc[i].num_channels = I10NM_NUM_DDR_CHANNELS;
+				d->imc[i].num_dimms    = I10NM_NUM_DDR_DIMMS;
+			}
 
 			rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
 					      "Intel_10nm Socket", EDAC_MOD_STR,
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index c8691abb720d..5e83f59bef8a 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -343,9 +343,9 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
 
 	ranks = numrank(mtr);
 	rows = numrow(mtr);
-	cols = numcol(mtr);
+	cols = imc->hbm_mc ? 6 : numcol(mtr);
 
-	if (cfg->support_ddr5 && (amap & 0x8)) {
+	if (cfg->support_ddr5 && ((amap & 0x8) || imc->hbm_mc)) {
 		banks = 32;
 		mtype = MEM_DDR5;
 	} else {
@@ -374,8 +374,13 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
 	dimm->dtype = get_width(mtr);
 	dimm->mtype = mtype;
 	dimm->edac_mode = EDAC_SECDED; /* likely better than this */
-	snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
-		 imc->src_id, imc->lmc, chan, dimmno);
+
+	if (imc->hbm_mc)
+		snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
+			 imc->src_id, imc->lmc, chan);
+	else
+		snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+			 imc->src_id, imc->lmc, chan, dimmno);
 
 	return 1;
 }
@@ -703,6 +708,8 @@ void skx_remove(void)
 		}
 		if (d->util_all)
 			pci_dev_put(d->util_all);
+		if (d->pcu_cr3)
+			pci_dev_put(d->pcu_cr3);
 		if (d->sad_all)
 			pci_dev_put(d->sad_all);
 		if (d->uracu)
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 34e89f7ddf93..01f67e731766 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -32,9 +32,17 @@
 #define SKX_NUM_CHANNELS	3	/* Channels per memory controller */
 #define SKX_NUM_DIMMS		2	/* Max DIMMS per channel */
 
-#define I10NM_NUM_IMC		4
-#define I10NM_NUM_CHANNELS	2
-#define I10NM_NUM_DIMMS		2
+#define I10NM_NUM_DDR_IMC	4
+#define I10NM_NUM_DDR_CHANNELS	2
+#define I10NM_NUM_DDR_DIMMS	2
+
+#define I10NM_NUM_HBM_IMC	16
+#define I10NM_NUM_HBM_CHANNELS	2
+#define I10NM_NUM_HBM_DIMMS	1
+
+#define I10NM_NUM_IMC		(I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
+#define I10NM_NUM_CHANNELS	MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
+#define I10NM_NUM_DIMMS		MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
 
 #define MAX(a, b)	((a) > (b) ? (a) : (b))
 #define NUM_IMC		MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
@@ -56,12 +64,16 @@ struct skx_dev {
 	struct pci_dev *sad_all;
 	struct pci_dev *util_all;
 	struct pci_dev *uracu; /* for i10nm CPU */
+	struct pci_dev *pcu_cr3; /* for HBM memory detection */
 	u32 mcroute;
 	struct skx_imc {
 		struct mem_ctl_info *mci;
 		struct pci_dev *mdev; /* for i10nm CPU */
 		void __iomem *mbase;  /* for i10nm CPU */
 		int chan_mmio_sz;     /* for i10nm CPU */
+		int num_channels; /* channels per memory controller */
+		int num_dimms; /* dimms per channel */
+		bool hbm_mc;
 		u8 mc;	/* system wide mc# */
 		u8 lmc;	/* socket relative mc# */
 		u8 src_id, node_id;
@@ -132,6 +144,8 @@ struct res_config {
 	int busno_cfg_offset;
 	/* Per DDR channel memory-mapped I/O size */
 	int ddr_chan_mmio_sz;
+	/* Per HBM channel memory-mapped I/O size */
+	int hbm_chan_mmio_sz;
 	bool support_ddr5;
 	/* SAD device number and function number */
 	unsigned int sad_all_devfn;

From 4e591c056819850366d2fcb642f4f40dd4eef93a Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 11 Jun 2021 10:01:21 -0700
Subject: [PATCH 2123/3292] EDAC/igen6: Add Intel ICL-NNPI SoC support

The Ice Lake Neural Network Processor for Deep Learning Inference
(ICL-NNPI) SoC shares the same memory controller and In-Band ECC with
Elkhart Lake SoC. Add the ICL-NNPI compute die IDs for EDAC support.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210611170123.1057025-5-tony.luck@intel.com
---
 drivers/edac/igen6_edac.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 6be9986fc6bd..0fedf2d206d3 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -183,6 +183,12 @@ static struct work_struct ecclog_work;
 #define DID_EHL_SKU14	0x4534
 #define DID_EHL_SKU15	0x4536
 
+/* Compute die IDs for ICL-NNPI with IBECC */
+#define DID_ICL_SKU8	0x4581
+#define DID_ICL_SKU10	0x4585
+#define DID_ICL_SKU11	0x4589
+#define DID_ICL_SKU12	0x458d
+
 static bool ehl_ibecc_available(struct pci_dev *pdev)
 {
 	u32 v;
@@ -212,6 +218,17 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
 	return eaddr;
 }
 
+static bool icl_ibecc_available(struct pci_dev *pdev)
+{
+	u32 v;
+
+	if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
+		return false;
+
+	return !(CAPID_C_IBECC & v) &&
+		(boot_cpu_data.x86_stepping >= 1);
+}
+
 static struct res_config ehl_cfg = {
 	.num_imc	 = 1,
 	.ibecc_base	 = 0xdc00,
@@ -220,6 +237,14 @@ static struct res_config ehl_cfg = {
 	.err_addr_to_imc_addr  = ehl_err_addr_to_imc_addr,
 };
 
+static struct res_config icl_cfg = {
+	.num_imc	 = 1,
+	.ibecc_base	 = 0xd800,
+	.ibecc_available = icl_ibecc_available,
+	.err_addr_to_sys_addr  = ehl_err_addr_to_sys_addr,
+	.err_addr_to_imc_addr  = ehl_err_addr_to_imc_addr,
+};
+
 static const struct pci_device_id igen6_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
@@ -232,6 +257,10 @@ static const struct pci_device_id igen6_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
 	{ },
 };
 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);

From 0b7338b27e821a61cfa695077aa352312c0ab2f6 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 11 Jun 2021 10:01:22 -0700
Subject: [PATCH 2124/3292] EDAC/igen6: Add Intel Tiger Lake SoC support

Tiger Lake SoC shares the same memory controller and In-Band ECC
(IBECC) IP with Elkhart Lake SoC. The main differences are that Tiger
Lake has two memory controllers each associated with one IBECC and
uses Machine Check for the memory error notification.

So add Tiger Lake compute die IDs, MCE decoding chain registration,
and memory slice decoding for Tiger Lake EDAC support.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210611170123.1057025-6-tony.luck@intel.com
---
 drivers/edac/igen6_edac.c | 273 +++++++++++++++++++++++++++++++++++---
 1 file changed, 253 insertions(+), 20 deletions(-)

diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 0fedf2d206d3..9f653ace50e7 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -22,6 +22,7 @@
 #include <linux/io.h>
 #include <asm/mach_traps.h>
 #include <asm/nmi.h>
+#include <asm/mce.h>
 
 #include "edac_mc.h"
 #include "edac_module.h"
@@ -40,7 +41,7 @@
 
 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
 
-#define NUM_IMC				1 /* Max memory controllers */
+#define NUM_IMC				2 /* Max memory controllers */
 #define NUM_CHANNELS			2 /* Max channels */
 #define NUM_DIMMS			2 /* Max DIMMs per channel */
 
@@ -54,6 +55,10 @@
 #define CAPID_C_OFFSET			0xec
 #define CAPID_C_IBECC			BIT(15)
 
+/* Capability register E */
+#define CAPID_E_OFFSET			0xf0
+#define CAPID_E_IBECC			BIT(12)
+
 /* Error Status */
 #define ERRSTS_OFFSET			0xc8
 #define ERRSTS_CE			BIT_ULL(6)
@@ -109,12 +114,20 @@
 #define CHANNEL_HASH_LSB_MASK_BIT(v)	GET_BITFIELD(v, 24, 26)
 #define CHANNEL_HASH_MODE(v)		GET_BITFIELD(v, 28, 28)
 
+/* Parameters for memory slice decode stage */
+#define MEM_SLICE_HASH_MASK(v)		(GET_BITFIELD(v, 6, 19) << 6)
+#define MEM_SLICE_HASH_LSB_MASK_BIT(v)	GET_BITFIELD(v, 24, 26)
+
 static struct res_config {
+	bool machine_check;
 	int num_imc;
+	u32 cmf_base;
+	u32 cmf_size;
+	u32 ms_hash_offset;
 	u32 ibecc_base;
 	bool (*ibecc_available)(struct pci_dev *pdev);
 	/* Convert error address logged in IBECC to system physical address */
-	u64 (*err_addr_to_sys_addr)(u64 eaddr);
+	u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
 	/* Convert error address logged in IBECC to integrated memory controller address */
 	u64 (*err_addr_to_imc_addr)(u64 eaddr);
 } *res_cfg;
@@ -125,6 +138,7 @@ struct igen6_imc {
 	struct pci_dev *pdev;
 	struct device dev;
 	void __iomem *window;
+	u64 size;
 	u64 ch_s_size;
 	int ch_l_map;
 	u64 dimm_s_size[NUM_CHANNELS];
@@ -134,6 +148,9 @@ struct igen6_imc {
 
 static struct igen6_pvt {
 	struct igen6_imc imc[NUM_IMC];
+	u64 ms_hash;
+	u64 ms_s_size;
+	int ms_l_map;
 } *igen6_pvt;
 
 /* The top of low usable DRAM */
@@ -189,6 +206,9 @@ static struct work_struct ecclog_work;
 #define DID_ICL_SKU11	0x4589
 #define DID_ICL_SKU12	0x458d
 
+/* Compute die IDs for Tiger Lake with IBECC */
+#define DID_TGL_SKU	0x9a14
+
 static bool ehl_ibecc_available(struct pci_dev *pdev)
 {
 	u32 v;
@@ -199,7 +219,7 @@ static bool ehl_ibecc_available(struct pci_dev *pdev)
 	return !!(CAPID_C_IBECC & v);
 }
 
-static u64 ehl_err_addr_to_sys_addr(u64 eaddr)
+static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
 {
 	return eaddr;
 }
@@ -229,20 +249,103 @@ static bool icl_ibecc_available(struct pci_dev *pdev)
 		(boot_cpu_data.x86_stepping >= 1);
 }
 
+static bool tgl_ibecc_available(struct pci_dev *pdev)
+{
+	u32 v;
+
+	if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
+		return false;
+
+	return !(CAPID_E_IBECC & v);
+}
+
+static u64 mem_addr_to_sys_addr(u64 maddr)
+{
+	if (maddr < igen6_tolud)
+		return maddr;
+
+	if (igen6_tom <= _4GB)
+		return maddr - igen6_tolud + _4GB;
+
+	if (maddr < _4GB)
+		return maddr - igen6_tolud + igen6_tom;
+
+	return maddr;
+}
+
+static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
+{
+	u64 hash_addr = addr & mask, hash = hash_init;
+	u64 intlv = (addr >> intlv_bit) & 1;
+	int i;
+
+	for (i = 6; i < 20; i++)
+		hash ^= (hash_addr >> i) & 1;
+
+	return hash ^ intlv;
+}
+
+static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
+{
+	u64 maddr, hash, mask, ms_s_size;
+	int intlv_bit;
+	u32 ms_hash;
+
+	ms_s_size = igen6_pvt->ms_s_size;
+	if (eaddr >= ms_s_size)
+		return eaddr + ms_s_size;
+
+	ms_hash = igen6_pvt->ms_hash;
+
+	mask = MEM_SLICE_HASH_MASK(ms_hash);
+	intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
+
+	maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
+		GET_BITFIELD(eaddr, 0, intlv_bit - 1);
+
+	hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
+
+	return maddr | (hash << intlv_bit);
+}
+
+static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
+{
+	u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
+
+	return mem_addr_to_sys_addr(maddr);
+}
+
+static u64 tgl_err_addr_to_imc_addr(u64 eaddr)
+{
+	return eaddr;
+}
+
 static struct res_config ehl_cfg = {
-	.num_imc	 = 1,
-	.ibecc_base	 = 0xdc00,
-	.ibecc_available = ehl_ibecc_available,
-	.err_addr_to_sys_addr  = ehl_err_addr_to_sys_addr,
-	.err_addr_to_imc_addr  = ehl_err_addr_to_imc_addr,
+	.num_imc		= 1,
+	.ibecc_base		= 0xdc00,
+	.ibecc_available	= ehl_ibecc_available,
+	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
+	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
 };
 
 static struct res_config icl_cfg = {
-	.num_imc	 = 1,
-	.ibecc_base	 = 0xd800,
-	.ibecc_available = icl_ibecc_available,
-	.err_addr_to_sys_addr  = ehl_err_addr_to_sys_addr,
-	.err_addr_to_imc_addr  = ehl_err_addr_to_imc_addr,
+	.num_imc		= 1,
+	.ibecc_base		= 0xd800,
+	.ibecc_available	= icl_ibecc_available,
+	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
+	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
+};
+
+static struct res_config tgl_cfg = {
+	.machine_check		= true,
+	.num_imc		= 2,
+	.cmf_base		= 0x11000,
+	.cmf_size		= 0x800,
+	.ms_hash_offset		= 0xac,
+	.ibecc_base		= 0xd400,
+	.ibecc_available	= tgl_ibecc_available,
+	.err_addr_to_sys_addr	= tgl_err_addr_to_sys_addr,
+	.err_addr_to_imc_addr	= tgl_err_addr_to_imc_addr,
 };
 
 static const struct pci_device_id igen6_pci_tbl[] = {
@@ -261,6 +364,7 @@ static const struct pci_device_id igen6_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
 	{ },
 };
 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
@@ -519,7 +623,7 @@ static void ecclog_work_cb(struct work_struct *work)
 		eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
 			ECC_ERROR_LOG_ADDR_SHIFT;
 		res.mc	     = node->mc;
-		res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr);
+		res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
 		res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr);
 
 		mci = igen6_pvt->imc[res.mc].mci;
@@ -569,6 +673,57 @@ static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 	return NMI_HANDLED;
 }
 
+static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
+			      void *data)
+{
+	struct mce *mce = (struct mce *)data;
+	char *type;
+
+	if (mce->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
+
+	/*
+	 * Ignore unless this is a memory related error.
+	 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
+	 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
+	 */
+	if ((mce->status & 0xefff) >> 7 != 1)
+		return NOTIFY_DONE;
+
+	if (mce->mcgstatus & MCG_STATUS_MCIP)
+		type = "Exception";
+	else
+		type = "Event";
+
+	edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
+		 mce->extcpu, type, mce->mcgstatus,
+		 mce->bank, mce->status);
+	edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
+	edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
+	edac_dbg(0, "MISC 0x%llx\n", mce->misc);
+	edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
+		 mce->cpuvendor, mce->cpuid, mce->time,
+		 mce->socketid, mce->apicid);
+	/*
+	 * We just use the Machine Check for the memory error notification.
+	 * Each memory controller is associated with an IBECC instance.
+	 * Directly read and clear the error information(error address and
+	 * error type) on all the IBECC instances so that we know on which
+	 * memory controller the memory error(s) occurred.
+	 */
+	if (!ecclog_handler())
+		return NOTIFY_DONE;
+
+	mce->kflags |= MCE_HANDLED_EDAC;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ecclog_mce_dec = {
+	.notifier_call	= ecclog_mce_handler,
+	.priority	= MCE_PRIO_EDAC,
+};
+
 static bool igen6_check_ecc(struct igen6_imc *imc)
 {
 	u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
@@ -602,6 +757,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
 		imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
 		imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
 		imc->dimm_l_map[i]  = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
+		imc->size += imc->dimm_s_size[i];
+		imc->size += imc->dimm_l_size[i];
 		ndimms = 0;
 
 		for (j = 0; j < NUM_DIMMS; j++) {
@@ -637,6 +794,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
 		}
 	}
 
+	edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
+
 	return 0;
 }
 
@@ -886,6 +1045,77 @@ static void igen6_unregister_mcis(void)
 	}
 }
 
+static int igen6_mem_slice_setup(u64 mchbar)
+{
+	struct igen6_imc *imc = &igen6_pvt->imc[0];
+	u64 base = mchbar + res_cfg->cmf_base;
+	u32 offset = res_cfg->ms_hash_offset;
+	u32 size = res_cfg->cmf_size;
+	u64 ms_s_size, ms_hash;
+	void __iomem *cmf;
+	int ms_l_map;
+
+	edac_dbg(2, "\n");
+
+	if (imc[0].size < imc[1].size) {
+		ms_s_size = imc[0].size;
+		ms_l_map  = 1;
+	} else {
+		ms_s_size = imc[1].size;
+		ms_l_map  = 0;
+	}
+
+	igen6_pvt->ms_s_size = ms_s_size;
+	igen6_pvt->ms_l_map  = ms_l_map;
+
+	edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
+		 ms_s_size >> 20, ms_l_map);
+
+	cmf = ioremap(base, size);
+	if (!cmf) {
+		igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
+		return -ENODEV;
+	}
+
+	ms_hash = readq(cmf + offset);
+	igen6_pvt->ms_hash = ms_hash;
+
+	edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
+
+	iounmap(cmf);
+
+	return 0;
+}
+
+static int register_err_handler(void)
+{
+	int rc;
+
+	if (res_cfg->machine_check) {
+		mce_register_decode_chain(&ecclog_mce_dec);
+		return 0;
+	}
+
+	rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
+				  0, IGEN6_NMI_NAME);
+	if (rc) {
+		igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static void unregister_err_handler(void)
+{
+	if (res_cfg->machine_check) {
+		mce_unregister_decode_chain(&ecclog_mce_dec);
+		return;
+	}
+
+	unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
+}
+
 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	u64 mchbar;
@@ -909,6 +1139,12 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			goto fail2;
 	}
 
+	if (res_cfg->num_imc > 1) {
+		rc = igen6_mem_slice_setup(mchbar);
+		if (rc)
+			goto fail2;
+	}
+
 	ecclog_pool = ecclog_gen_pool_create();
 	if (!ecclog_pool) {
 		rc = -ENOMEM;
@@ -921,12 +1157,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Check if any pending errors before registering the NMI handler */
 	ecclog_handler();
 
-	rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
-				  0, IGEN6_NMI_NAME);
-	if (rc) {
-		igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
+	rc = register_err_handler();
+	if (rc)
 		goto fail3;
-	}
 
 	/* Enable error reporting */
 	rc = errcmd_enable_error_reporting(true);
@@ -954,7 +1187,7 @@ static void igen6_remove(struct pci_dev *pdev)
 
 	igen6_debug_teardown();
 	errcmd_enable_error_reporting(false);
-	unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
+	unregister_err_handler();
 	irq_work_sync(&ecclog_irq_work);
 	flush_work(&ecclog_work);
 	gen_pool_destroy(ecclog_pool);

From ad774bd5a8c23a319773ac3668382f24d62a39a8 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Fri, 11 Jun 2021 10:01:23 -0700
Subject: [PATCH 2125/3292] EDAC/igen6: Add Intel Alder Lake SoC support

Alder Lake SoC shares the same memory controller and In-Band ECC
(IBECC) IP with Tiger Lake SoC. Like Tiger Lake, it also has two
memory controllers each associated one IBECC instance. The minor
differences include the MMIO offset of each memory controller and
the type of memory error address logged in the IBECC.

So add Alder Lake compute die IDs, adjust the MMIO offset for each
memory controller and handle the type of memory error address logged
in the IBECC for Alder Lake EDAC support.

Tested-by: Vrukesh V Panse <vrukesh.v.panse@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210611170123.1057025-7-tony.luck@intel.com
---
 drivers/edac/igen6_edac.c | 84 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 11 deletions(-)

diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 9f653ace50e7..a07bbfd075d0 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -27,7 +27,7 @@
 #include "edac_mc.h"
 #include "edac_module.h"
 
-#define IGEN6_REVISION	"v2.4"
+#define IGEN6_REVISION	"v2.5"
 
 #define EDAC_MOD_STR	"igen6_edac"
 #define IGEN6_NMI_NAME	"igen6_ibecc"
@@ -75,7 +75,7 @@
 #define IBECC_ACTIVATE_EN		BIT(0)
 
 /* IBECC error log */
-#define ECC_ERROR_LOG_OFFSET		(IBECC_BASE + 0x170)
+#define ECC_ERROR_LOG_OFFSET		(IBECC_BASE + res_cfg->ibecc_error_log_offset)
 #define ECC_ERROR_LOG_CE		BIT_ULL(62)
 #define ECC_ERROR_LOG_UE		BIT_ULL(63)
 #define ECC_ERROR_LOG_ADDR_SHIFT	5
@@ -89,27 +89,32 @@
 #define MCHBAR_SIZE			0x10000
 
 /* Parameters for the channel decode stage */
-#define MAD_INTER_CHANNEL_OFFSET	0x5000
+#define IMC_BASE			(res_cfg->imc_base)
+#define MAD_INTER_CHANNEL_OFFSET	IMC_BASE
 #define MAD_INTER_CHANNEL_DDR_TYPE(v)	GET_BITFIELD(v, 0, 2)
 #define MAD_INTER_CHANNEL_ECHM(v)	GET_BITFIELD(v, 3, 3)
 #define MAD_INTER_CHANNEL_CH_L_MAP(v)	GET_BITFIELD(v, 4, 4)
 #define MAD_INTER_CHANNEL_CH_S_SIZE(v)	((u64)GET_BITFIELD(v, 12, 19) << 29)
 
 /* Parameters for DRAM decode stage */
-#define MAD_INTRA_CH0_OFFSET		0x5004
+#define MAD_INTRA_CH0_OFFSET		(IMC_BASE + 4)
 #define MAD_INTRA_CH_DIMM_L_MAP(v)	GET_BITFIELD(v, 0, 0)
 
 /* DIMM characteristics */
-#define MAD_DIMM_CH0_OFFSET		0x500c
+#define MAD_DIMM_CH0_OFFSET		(IMC_BASE + 0xc)
 #define MAD_DIMM_CH_DIMM_L_SIZE(v)	((u64)GET_BITFIELD(v, 0, 6) << 29)
 #define MAD_DIMM_CH_DLW(v)		GET_BITFIELD(v, 7, 8)
 #define MAD_DIMM_CH_DIMM_S_SIZE(v)	((u64)GET_BITFIELD(v, 16, 22) << 29)
 #define MAD_DIMM_CH_DSW(v)		GET_BITFIELD(v, 24, 25)
 
+/* Hash for memory controller selection */
+#define MAD_MC_HASH_OFFSET		(IMC_BASE + 0x1b8)
+#define MAC_MC_HASH_LSB(v)		GET_BITFIELD(v, 1, 3)
+
 /* Hash for channel selection */
-#define CHANNEL_HASH_OFFSET		0X5024
+#define CHANNEL_HASH_OFFSET		(IMC_BASE + 0x24)
 /* Hash for enhanced channel selection */
-#define CHANNEL_EHASH_OFFSET		0X5028
+#define CHANNEL_EHASH_OFFSET		(IMC_BASE + 0x28)
 #define CHANNEL_HASH_MASK(v)		(GET_BITFIELD(v, 6, 19) << 6)
 #define CHANNEL_HASH_LSB_MASK_BIT(v)	GET_BITFIELD(v, 24, 26)
 #define CHANNEL_HASH_MODE(v)		GET_BITFIELD(v, 28, 28)
@@ -121,15 +126,17 @@
 static struct res_config {
 	bool machine_check;
 	int num_imc;
+	u32 imc_base;
 	u32 cmf_base;
 	u32 cmf_size;
 	u32 ms_hash_offset;
 	u32 ibecc_base;
+	u32 ibecc_error_log_offset;
 	bool (*ibecc_available)(struct pci_dev *pdev);
 	/* Convert error address logged in IBECC to system physical address */
 	u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
 	/* Convert error address logged in IBECC to integrated memory controller address */
-	u64 (*err_addr_to_imc_addr)(u64 eaddr);
+	u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
 } *res_cfg;
 
 struct igen6_imc {
@@ -209,6 +216,12 @@ static struct work_struct ecclog_work;
 /* Compute die IDs for Tiger Lake with IBECC */
 #define DID_TGL_SKU	0x9a14
 
+/* Compute die IDs for Alder Lake with IBECC */
+#define DID_ADL_SKU1	0x4601
+#define DID_ADL_SKU2	0x4602
+#define DID_ADL_SKU3	0x4621
+#define DID_ADL_SKU4	0x4641
+
 static bool ehl_ibecc_available(struct pci_dev *pdev)
 {
 	u32 v;
@@ -224,7 +237,7 @@ static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
 	return eaddr;
 }
 
-static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
+static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
 {
 	if (eaddr < igen6_tolud)
 		return eaddr;
@@ -315,22 +328,51 @@ static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
 	return mem_addr_to_sys_addr(maddr);
 }
 
-static u64 tgl_err_addr_to_imc_addr(u64 eaddr)
+static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
 {
 	return eaddr;
 }
 
+static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
+{
+	return mem_addr_to_sys_addr(eaddr);
+}
+
+static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
+{
+	u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
+	struct igen6_imc *imc = &igen6_pvt->imc[mc];
+	int intlv_bit;
+	u32 mc_hash;
+
+	if (eaddr >= 2 * ms_s_size)
+		return eaddr - ms_s_size;
+
+	mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
+
+	intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
+
+	imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
+		   GET_BITFIELD(eaddr, 0, intlv_bit - 1);
+
+	return imc_addr;
+}
+
 static struct res_config ehl_cfg = {
 	.num_imc		= 1,
+	.imc_base		= 0x5000,
 	.ibecc_base		= 0xdc00,
 	.ibecc_available	= ehl_ibecc_available,
+	.ibecc_error_log_offset	= 0x170,
 	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
 	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
 };
 
 static struct res_config icl_cfg = {
 	.num_imc		= 1,
+	.imc_base		= 0x5000,
 	.ibecc_base		= 0xd800,
+	.ibecc_error_log_offset	= 0x170,
 	.ibecc_available	= icl_ibecc_available,
 	.err_addr_to_sys_addr	= ehl_err_addr_to_sys_addr,
 	.err_addr_to_imc_addr	= ehl_err_addr_to_imc_addr,
@@ -339,15 +381,28 @@ static struct res_config icl_cfg = {
 static struct res_config tgl_cfg = {
 	.machine_check		= true,
 	.num_imc		= 2,
+	.imc_base		= 0x5000,
 	.cmf_base		= 0x11000,
 	.cmf_size		= 0x800,
 	.ms_hash_offset		= 0xac,
 	.ibecc_base		= 0xd400,
+	.ibecc_error_log_offset	= 0x170,
 	.ibecc_available	= tgl_ibecc_available,
 	.err_addr_to_sys_addr	= tgl_err_addr_to_sys_addr,
 	.err_addr_to_imc_addr	= tgl_err_addr_to_imc_addr,
 };
 
+static struct res_config adl_cfg = {
+	.machine_check		= true,
+	.num_imc		= 2,
+	.imc_base		= 0xd800,
+	.ibecc_base		= 0xd400,
+	.ibecc_error_log_offset	= 0x68,
+	.ibecc_available	= tgl_ibecc_available,
+	.err_addr_to_sys_addr	= adl_err_addr_to_sys_addr,
+	.err_addr_to_imc_addr	= adl_err_addr_to_imc_addr,
+};
+
 static const struct pci_device_id igen6_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
@@ -365,6 +420,10 @@ static const struct pci_device_id igen6_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
 	{ PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
+	{ PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
 	{ },
 };
 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
@@ -624,7 +683,7 @@ static void ecclog_work_cb(struct work_struct *work)
 			ECC_ERROR_LOG_ADDR_SHIFT;
 		res.mc	     = node->mc;
 		res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
-		res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr);
+		res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
 
 		mci = igen6_pvt->imc[res.mc].mci;
 
@@ -1071,6 +1130,9 @@ static int igen6_mem_slice_setup(u64 mchbar)
 	edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
 		 ms_s_size >> 20, ms_l_map);
 
+	if (!size)
+		return 0;
+
 	cmf = ioremap(base, size);
 	if (!cmf) {
 		igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);

From f0a029fff4a50eb01648810a77ba1873e829fdd4 Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Tue, 15 Jun 2021 10:44:19 -0700
Subject: [PATCH 2126/3292] EDAC/Intel: Do not load EDAC driver when running as
 a guest

There's little to no point in loading an EDAC driver running in a guest:
1) The CPU model reported by CPUID may not represent actual h/w
2) The hypervisor likely does not pass in access to memory controller devices
3) Hypervisors generally do not pass corrected error details to guests

Add a check in each of the Intel EDAC drivers for X86_FEATURE_HYPERVISOR
and simply return -ENODEV in the init routine.

Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210615174419.GA1087688@agluck-desk2.amr.corp.intel.com
---
 drivers/edac/i10nm_base.c | 3 +++
 drivers/edac/pnd2_edac.c  | 3 +++
 drivers/edac/sb_edac.c    | 3 +++
 drivers/edac/skx_base.c   | 3 +++
 4 files changed, 12 insertions(+)

diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index fb7e72d3fd2c..6ce0ed2ffaaf 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -414,6 +414,9 @@ static int __init i10nm_init(void)
 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
 		return -EBUSY;
 
+	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+		return -ENODEV;
+
 	id = x86_match_cpu(i10nm_cpuids);
 	if (!id)
 		return -ENODEV;
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 928f63a374c7..c94ca1f790c4 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1554,6 +1554,9 @@ static int __init pnd2_init(void)
 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
 		return -EBUSY;
 
+	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+		return -ENODEV;
+
 	id = x86_match_cpu(pnd2_cpuids);
 	if (!id)
 		return -ENODEV;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 93daa4297f2e..4c626fcd4dcb 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3510,6 +3510,9 @@ static int __init sbridge_init(void)
 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
 		return -EBUSY;
 
+	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+		return -ENODEV;
+
 	id = x86_match_cpu(sbridge_cpuids);
 	if (!id)
 		return -ENODEV;
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 6a4f0b27c654..4dbd46575bfb 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -656,6 +656,9 @@ static int __init skx_init(void)
 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
 		return -EBUSY;
 
+	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+		return -ENODEV;
+
 	id = x86_match_cpu(skx_cpuids);
 	if (!id)
 		return -ENODEV;

From 0c38740c08962ab109267cb23f4a40df2ccf2bbf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 17 Jun 2021 17:28:24 -0700
Subject: [PATCH 2127/3292] selftests/bpf: Fix ringbuf test fetching map FD

Seems like 4d1b62986125 ("selftests/bpf: Convert few tests to light skeleton.")
and 704e2beba23c ("selftests/bpf: Test ringbuf mmap read-only and read-write
restrictions") were done independently on bpf and bpf-next trees and are in
conflict with each other, despite a clean merge. Fix fetching of ringbuf's
map_fd to use light skeleton properly.

Fixes: 704e2beba23c ("selftests/bpf: Test ringbuf mmap read-only and read-write restrictions")
Fixes: 4d1b62986125 ("selftests/bpf: Convert few tests to light skeleton.")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210618002824.2081922-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/ringbuf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index a01788090c31..4706cee84360 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -100,7 +100,7 @@ void test_ringbuf(void)
 	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
 		goto cleanup;
 
-	rb_fd = bpf_map__fd(skel->maps.ringbuf);
+	rb_fd = skel->maps.ringbuf.map_fd;
 	/* good read/write cons_pos */
 	mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
 	ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");

From 4fa82a87ba55f5eca7d194055572110652daa264 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Wed, 16 Jun 2021 13:33:35 +0800
Subject: [PATCH 2128/3292] opp: Allow required-opps to be used for non genpd
 use cases

Don't limit required_opp_table to genpd only. One possible use case is
cpufreq based devfreq governor, which can use required-opps property to
derive devfreq from cpufreq.

Though the OPP core still doesn't support non-genpd required-opps in
_set_required_opps().

Suggested-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
[ Viresh: Update _set_required_opps() to check for genpd ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 10 ++++++++++
 drivers/opp/of.c   | 24 ++----------------------
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index e366218d6736..b335c077f215 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -893,6 +893,16 @@ static int _set_required_opps(struct device *dev,
 	if (!required_opp_tables)
 		return 0;
 
+	/*
+	 * We only support genpd's OPPs in the "required-opps" for now, as we
+	 * don't know much about other use cases. Error out if the required OPP
+	 * doesn't belong to a genpd.
+	 */
+	if (unlikely(!required_opp_tables[0]->is_genpd)) {
+		dev_err(dev, "required-opps don't belong to a genpd\n");
+		return -ENOENT;
+	}
+
 	/* required-opps not fully initialized yet */
 	if (lazy_linking_pending(opp_table))
 		return -EBUSY;
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index aa75a1caf08a..d298e38aaf7e 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -197,21 +197,8 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
 		required_opp_tables[i] = _find_table_of_opp_np(required_np);
 		of_node_put(required_np);
 
-		if (IS_ERR(required_opp_tables[i])) {
+		if (IS_ERR(required_opp_tables[i]))
 			lazy = true;
-			continue;
-		}
-
-		/*
-		 * We only support genpd's OPPs in the "required-opps" for now,
-		 * as we don't know how much about other cases. Error out if the
-		 * required OPP doesn't belong to a genpd.
-		 */
-		if (!required_opp_tables[i]->is_genpd) {
-			dev_err(dev, "required-opp doesn't belong to genpd: %pOF\n",
-				required_np);
-			goto free_required_tables;
-		}
 	}
 
 	/* Let's do the linking later on */
@@ -379,13 +366,6 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
 	struct dev_pm_opp *opp;
 	int i, ret;
 
-	/*
-	 * We only support genpd's OPPs in the "required-opps" for now,
-	 * as we don't know much about other cases.
-	 */
-	if (!new_table->is_genpd)
-		return;
-
 	mutex_lock(&opp_table_lock);
 
 	list_for_each_entry_safe(opp_table, temp, &lazy_opp_tables, lazy) {
@@ -873,7 +853,7 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
 		return ERR_PTR(-ENOMEM);
 
 	ret = _read_opp_key(new_opp, opp_table, np, &rate_not_available);
-	if (ret < 0 && !opp_table->is_genpd) {
+	if (ret < 0) {
 		dev_err(dev, "%s: opp key field not found\n", __func__);
 		goto free_opp;
 	}

From 62eec0d73393a136b4523952cecbda1438f1f1b9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 16 Jun 2021 22:06:19 +0200
Subject: [PATCH 2129/3292] netfilter: conntrack: pass hook state to log
 functions

The packet logger backend is unable to provide the incoming (or
outgoing) interface name because that information isn't available.

Pass the hook state, it contains the network namespace, the protocol
family, the network interfaces and other things.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 20 ++++++++++-------
 net/netfilter/nf_conntrack_proto.c           | 16 ++++++++------
 net/netfilter/nf_conntrack_proto_dccp.c      | 14 ++++++------
 net/netfilter/nf_conntrack_proto_icmp.c      |  7 +++---
 net/netfilter/nf_conntrack_proto_icmpv6.c    |  3 +--
 net/netfilter/nf_conntrack_proto_sctp.c      |  2 +-
 net/netfilter/nf_conntrack_proto_tcp.c       | 23 ++++++++++----------
 net/netfilter/nf_conntrack_proto_udp.c       |  6 ++---
 8 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 96f9cf81f46b..1f47bef51722 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -159,22 +159,26 @@ unsigned int nf_ct_port_nlattr_tuple_size(void);
 extern const struct nla_policy nf_ct_port_nla_policy[];
 
 #ifdef CONFIG_SYSCTL
-__printf(3, 4) __cold
+__printf(4, 5) __cold
 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 			       const struct nf_conn *ct,
+			       const struct nf_hook_state *state,
 			       const char *fmt, ...);
-__printf(5, 6) __cold
+__printf(4, 5) __cold
 void nf_l4proto_log_invalid(const struct sk_buff *skb,
-			    struct net *net,
-			    u16 pf, u8 protonum,
+			    const struct nf_hook_state *state,
+			    u8 protonum,
 			    const char *fmt, ...);
 #else
-static inline __printf(5, 6) __cold
-void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net,
-			    u16 pf, u8 protonum, const char *fmt, ...) {}
-static inline __printf(3, 4) __cold
+static inline __printf(4, 5) __cold
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+			    const struct nf_hook_state *state,
+			    u8 protonum,
+			    const char *fmt, ...) {}
+static inline __printf(4, 5) __cold
 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 			       const struct nf_conn *ct,
+			       const struct nf_hook_state *state,
 			       const char *fmt, ...) { }
 #endif /* CONFIG_SYSCTL */
 
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index be14e0bea4c8..55647409a9be 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -45,12 +45,13 @@
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
 #ifdef CONFIG_SYSCTL
-__printf(5, 6)
+__printf(4, 5)
 void nf_l4proto_log_invalid(const struct sk_buff *skb,
-			    struct net *net,
-			    u16 pf, u8 protonum,
+			    const struct nf_hook_state *state,
+			    u8 protonum,
 			    const char *fmt, ...)
 {
+	struct net *net = state->net;
 	struct va_format vaf;
 	va_list args;
 
@@ -62,15 +63,16 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-		      "nf_ct_proto_%d: %pV ", protonum, &vaf);
+	nf_log_packet(net, state->pf, 0, skb, state->in, state->out,
+		      NULL, "nf_ct_proto_%d: %pV ", protonum, &vaf);
 	va_end(args);
 }
 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
 
-__printf(3, 4)
+__printf(4, 5)
 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 			       const struct nf_conn *ct,
+			       const struct nf_hook_state *state,
 			       const char *fmt, ...)
 {
 	struct va_format vaf;
@@ -85,7 +87,7 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+	nf_l4proto_log_invalid(skb, state,
 			       nf_ct_protonum(ct), "%pV", &vaf);
 	va_end(args);
 }
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4f33307fa3cf..c1557d47ccd1 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -382,7 +382,8 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 
 static noinline bool
 dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
-	 const struct dccp_hdr *dh)
+	 const struct dccp_hdr *dh,
+	 const struct nf_hook_state *hook_state)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_dccp_net *dn;
@@ -414,7 +415,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 
 out_invalid:
-	nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
+	nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg);
 	return false;
 }
 
@@ -464,8 +465,7 @@ static bool dccp_error(const struct dccp_hdr *dh,
 	}
 	return false;
 out_invalid:
-	nf_l4proto_log_invalid(skb, state->net, state->pf,
-			       IPPROTO_DCCP, "%s", msg);
+	nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
 	return true;
 }
 
@@ -488,7 +488,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
 		return -NF_ACCEPT;
 
 	type = dh->dccph_type;
-	if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
+	if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
 		return -NF_ACCEPT;
 
 	if (type == DCCP_PKT_RESET &&
@@ -543,11 +543,11 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
 		ct->proto.dccp.last_pkt = type;
 
 		spin_unlock_bh(&ct->lock);
-		nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
+		nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet");
 		return NF_ACCEPT;
 	case CT_DCCP_INVALID:
 		spin_unlock_bh(&ct->lock);
-		nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
+		nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition");
 		return -NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 4efd8741c105..b38b7164acd5 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -170,12 +170,12 @@ int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
 	ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
 	if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
 		if (state->pf == AF_INET) {
-			nf_l4proto_log_invalid(skb, state->net, state->pf,
+			nf_l4proto_log_invalid(skb, state,
 					       l4proto,
 					       "outer daddr %pI4 != inner %pI4",
 					       &outer_daddr->ip, &ct_daddr->ip);
 		} else if (state->pf == AF_INET6) {
-			nf_l4proto_log_invalid(skb, state->net, state->pf,
+			nf_l4proto_log_invalid(skb, state,
 					       l4proto,
 					       "outer daddr %pI6 != inner %pI6",
 					       &outer_daddr->ip6, &ct_daddr->ip6);
@@ -197,8 +197,7 @@ static void icmp_error_log(const struct sk_buff *skb,
 			   const struct nf_hook_state *state,
 			   const char *msg)
 {
-	nf_l4proto_log_invalid(skb, state->net, state->pf,
-			       IPPROTO_ICMP, "%s", msg);
+	nf_l4proto_log_invalid(skb, state, IPPROTO_ICMP, "%s", msg);
 }
 
 /* Small and modified version of icmp_rcv */
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index facd8c64ec4e..61e3b05cf02c 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -126,8 +126,7 @@ static void icmpv6_error_log(const struct sk_buff *skb,
 			     const struct nf_hook_state *state,
 			     const char *msg)
 {
-	nf_l4proto_log_invalid(skb, state->net, state->pf,
-			       IPPROTO_ICMPV6, "%s", msg);
+	nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg);
 }
 
 int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fb8dc02e502f..2394238d01c9 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -351,7 +351,7 @@ static bool sctp_error(struct sk_buff *skb,
 	}
 	return false;
 out_invalid:
-	nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
+	nf_l4proto_log_invalid(skb, state, IPPROTO_SCTP, "%s", logmsg);
 	return true;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index de840fc41a2e..f7e8baf59b51 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -446,14 +446,15 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 	}
 }
 
-static bool tcp_in_window(const struct nf_conn *ct,
-			  struct ip_ct_tcp *state,
+static bool tcp_in_window(struct nf_conn *ct,
 			  enum ip_conntrack_dir dir,
 			  unsigned int index,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
-			  const struct tcphdr *tcph)
+			  const struct tcphdr *tcph,
+			  const struct nf_hook_state *hook_state)
 {
+	struct ip_ct_tcp *state = &ct->proto.tcp;
 	struct net *net = nf_ct_net(ct);
 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
@@ -670,7 +671,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		    tn->tcp_be_liberal)
 			res = true;
 		if (!res) {
-			nf_ct_l4proto_log_invalid(skb, ct,
+			nf_ct_l4proto_log_invalid(skb, ct, hook_state,
 			"%s",
 			before(seq, sender->td_maxend + 1) ?
 			in_recv_win ?
@@ -710,7 +711,7 @@ static void tcp_error_log(const struct sk_buff *skb,
 			  const struct nf_hook_state *state,
 			  const char *msg)
 {
-	nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
+	nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg);
 }
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
@@ -970,7 +971,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 					IP_CT_EXP_CHALLENGE_ACK;
 		}
 		spin_unlock_bh(&ct->lock);
-		nf_ct_l4proto_log_invalid(skb, ct,
+		nf_ct_l4proto_log_invalid(skb, ct, state,
 					  "packet (index %d) in dir %d ignored, state %s",
 					  index, dir,
 					  tcp_conntrack_names[old_state]);
@@ -995,7 +996,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 			 dir, get_conntrack_index(th), old_state);
 		spin_unlock_bh(&ct->lock);
-		nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
+		nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state");
 		return -NF_ACCEPT;
 	case TCP_CONNTRACK_TIME_WAIT:
 		/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -1010,7 +1011,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 			/* Detected RFC5961 challenge ACK */
 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
 			spin_unlock_bh(&ct->lock);
-			nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
+			nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored");
 			return NF_ACCEPT; /* Don't change state */
 		}
 		break;
@@ -1035,7 +1036,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 			if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
 				/* Invalid RST  */
 				spin_unlock_bh(&ct->lock);
-				nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
+				nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
 				return -NF_ACCEPT;
 			}
 
@@ -1079,8 +1080,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
 		break;
 	}
 
-	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
-			   skb, dataoff, th)) {
+	if (!tcp_in_window(ct, dir, index,
+			   skb, dataoff, th, state)) {
 		spin_unlock_bh(&ct->lock);
 		return -NF_ACCEPT;
 	}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 68911fcaa0f1..698fee49e732 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,8 +38,7 @@ static void udp_error_log(const struct sk_buff *skb,
 			  const struct nf_hook_state *state,
 			  const char *msg)
 {
-	nf_l4proto_log_invalid(skb, state->net, state->pf,
-			       IPPROTO_UDP, "%s", msg);
+	nf_l4proto_log_invalid(skb, state, IPPROTO_UDP, "%s", msg);
 }
 
 static bool udp_error(struct sk_buff *skb,
@@ -130,8 +129,7 @@ static void udplite_error_log(const struct sk_buff *skb,
 			      const struct nf_hook_state *state,
 			      const char *msg)
 {
-	nf_l4proto_log_invalid(skb, state->net, state->pf,
-			       IPPROTO_UDPLITE, "%s", msg);
+	nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg);
 }
 
 static bool udplite_error(struct sk_buff *skb,

From 159f130f60f402273b235801d1fde3fc115c6795 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 12 May 2021 03:17:32 -0700
Subject: [PATCH 2130/3292] tools/power/x86/intel-speed-select: Fix uncore
 memory frequency display

The uncore memory frequency value from the mailbox command
CONFIG_TDP_GET_MEM_FREQ needs to be scaled based on the platform for
display. There is no single constant multiplier.

This change introduces CPU model specific memory frequency multiplier.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-config.c | 16 ++++++++++++++++
 tools/power/x86/intel-speed-select/isst-core.c   | 15 +++++++++++++++
 .../power/x86/intel-speed-select/isst-display.c  |  2 +-
 tools/power/x86/intel-speed-select/isst.h        |  2 ++
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index ab940c508ef0..d4f0a7872e49 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -106,6 +106,22 @@ int is_skx_based_platform(void)
 	return 0;
 }
 
+int is_spr_platform(void)
+{
+	if (cpu_model == 0x8F)
+		return 1;
+
+	return 0;
+}
+
+int is_icx_platform(void)
+{
+	if (cpu_model == 0x6A || cpu_model == 0x6C)
+		return 1;
+
+	return 0;
+}
+
 static int update_cpu_model(void)
 {
 	unsigned int ebx, ecx, edx;
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 6a26d5769984..4431c8a0d40a 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -201,6 +201,7 @@ void isst_get_uncore_mem_freq(int cpu, int config_index,
 {
 	unsigned int resp;
 	int ret;
+
 	ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_GET_MEM_FREQ,
 				     0, config_index, &resp);
 	if (ret) {
@@ -209,6 +210,20 @@ void isst_get_uncore_mem_freq(int cpu, int config_index,
 	}
 
 	ctdp_level->mem_freq = resp & GENMASK(7, 0);
+	if (is_spr_platform()) {
+		ctdp_level->mem_freq *= 200;
+	} else if (is_icx_platform()) {
+		if (ctdp_level->mem_freq < 7) {
+			ctdp_level->mem_freq = (12 - ctdp_level->mem_freq) * 133.33 * 2 * 10;
+			ctdp_level->mem_freq /= 10;
+			if (ctdp_level->mem_freq % 10 > 5)
+				ctdp_level->mem_freq++;
+		} else {
+			ctdp_level->mem_freq = 0;
+		}
+	} else {
+		ctdp_level->mem_freq = 0;
+	}
 	debug_printf(
 		"cpu:%d ctdp:%d CONFIG_TDP_GET_MEM_FREQ resp:%x uncore mem_freq:%d\n",
 		cpu, config_index, resp, ctdp_level->mem_freq);
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 3bf1820c0da1..f97d8859ada7 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -446,7 +446,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
 		if (ctdp_level->mem_freq) {
 			snprintf(header, sizeof(header), "mem-frequency(MHz)");
 			snprintf(value, sizeof(value), "%d",
-				 ctdp_level->mem_freq * DISP_FREQ_MULTIPLIER);
+				 ctdp_level->mem_freq);
 			format_and_print(outf, level + 2, header, value);
 		}
 
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 0cac6c54be87..1aa15d5ea57c 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -257,5 +257,7 @@ extern int get_cpufreq_base_freq(int cpu);
 extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
 extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
 extern int is_skx_based_platform(void);
+extern int is_spr_platform(void);
+extern int is_icx_platform(void);
 extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
 #endif

From 307722e872658ee8cfa4ee0f9a7aa9a1b2207417 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 10 Jun 2021 14:36:35 -0700
Subject: [PATCH 2131/3292] tools/power/x86/intel-speed-select: v1.10 release

This release adds following change:
- Fix reporting of memory frequency

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index d4f0a7872e49..bf9fd3549a1d 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.9";
+static const char *version_str = "v1.10";
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;

From 1e42de8e53d32bbd7a732df49d872a30b4f888b4 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 16 Jun 2021 15:13:28 -0700
Subject: [PATCH 2132/3292] platform/x86: ISST: Optimize CPU to PCI device
 mapping

It was observed that some of the high performance benchmarks are spending
more time in kernel depending on which CPU package they are executing.
The difference is significant and benchmark scores varies more than 10%.
These benchmarks adjust class of service to improve thread performance
which run in parallel. This class of service change causes access to
MMIO region of Intel Speed Select PCI devices depending on the CPU
package they are executing.

This mapping from CPU to PCI device instance uses a standard Linux PCI
interface "pci_get_domain_bus_and_slot()". This function does a linear
search to get to a PCI device. Since these platforms have 100+ PCI
devices, this search can be expensive in fast path for benchmarks.

Since the device and function of PCI device is fixed for Intel
Speed Select PCI devices, the CPU to PCI device information can be cached
at the same time when bus number for the CPU is read. In this way during
runtime the cached information can be used. This improves performance
of these benchmarks significantly.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/r/20210616221329.1909276-1-srinivas.pandruvada@linux.intel.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../intel_speed_select_if/isst_if_common.c    | 29 +++++++++++++++----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c
index 0c2aa22c7a12..bbd46b1d9c10 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c
@@ -281,11 +281,27 @@ static int isst_if_get_platform_info(void __user *argp)
 struct isst_if_cpu_info {
 	/* For BUS 0 and BUS 1 only, which we need for PUNIT interface */
 	int bus_info[2];
+	struct pci_dev *pci_dev[2];
 	int punit_cpu_id;
 };
 
 static struct isst_if_cpu_info *isst_cpu_info;
 
+static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn)
+{
+	int bus_number;
+
+	if (bus_no < 0 || bus_no > 1 || cpu < 0 || cpu >= nr_cpu_ids ||
+	    cpu >= num_possible_cpus())
+		return NULL;
+
+	bus_number = isst_cpu_info[cpu].bus_info[bus_no];
+	if (bus_number < 0)
+		return NULL;
+
+	return pci_get_domain_bus_and_slot(0, bus_number, PCI_DEVFN(dev, fn));
+}
+
 /**
  * isst_if_get_pci_dev() - Get the PCI device instance for a CPU
  * @cpu: Logical CPU number.
@@ -300,17 +316,18 @@ static struct isst_if_cpu_info *isst_cpu_info;
  */
 struct pci_dev *isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn)
 {
-	int bus_number;
+	struct pci_dev *pci_dev;
 
 	if (bus_no < 0 || bus_no > 1 || cpu < 0 || cpu >= nr_cpu_ids ||
 	    cpu >= num_possible_cpus())
 		return NULL;
 
-	bus_number = isst_cpu_info[cpu].bus_info[bus_no];
-	if (bus_number < 0)
-		return NULL;
+	pci_dev = isst_cpu_info[cpu].pci_dev[bus_no];
 
-	return pci_get_domain_bus_and_slot(0, bus_number, PCI_DEVFN(dev, fn));
+	if (pci_dev && pci_dev->devfn == PCI_DEVFN(dev, fn))
+		return pci_dev;
+
+	return _isst_if_get_pci_dev(cpu, bus_no, dev, fn);
 }
 EXPORT_SYMBOL_GPL(isst_if_get_pci_dev);
 
@@ -327,6 +344,8 @@ static int isst_if_cpu_online(unsigned int cpu)
 	} else {
 		isst_cpu_info[cpu].bus_info[0] = data & 0xff;
 		isst_cpu_info[cpu].bus_info[1] = (data >> 8) & 0xff;
+		isst_cpu_info[cpu].pci_dev[0] = _isst_if_get_pci_dev(cpu, 0, 0, 1);
+		isst_cpu_info[cpu].pci_dev[1] = _isst_if_get_pci_dev(cpu, 1, 30, 1);
 	}
 
 	ret = rdmsrl_safe(MSR_THREAD_ID_INFO, &data);

From aa2ddd24257213bdfd2f65058531810ac57455dc Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 16 Jun 2021 15:13:29 -0700
Subject: [PATCH 2133/3292] platform/x86: ISST: Use numa node id for cpu pci
 dev mapping

There is a problem in mapping CPU to a PCI device instance when the
bus numbers are reused in different packages. This was observed on
some Sapphire Rapids systems.

The current implementation reads bus number assigned to a CPU package
via MSR 0x128. This allows to establish relationship between a CPU
and a PCI device. This allows to update power related parameters to a
MMIO offset in a PCI device space which is unique to a CPU. But if
two packages uses same bus number then this mapping will not be unique.

When bus number is reused, PCI device will use different domain number
or segment number. So we need to be aware of this domain information
while matching CPU to PCI bus number. This domain information is not
available via any MSR. So need to use ACPI numa node information.

There is an interface already available in the Linux to read numa
node for a CPU and a PCI device. This change uses this interface
to check the numa node of a match PCI device with bus number.
If the bus number and numa node matches with the CPU's assigned
bus number and numa node, the matched PCI device instance will be
returned to the caller.

It is possible that before Sapphire Rapids, the numa node is not
defined for the Speed Select PCI device in some OEM systems. In this
case to restore old behavior, return the last matched PCI device
for domain 0 unlsess there are more than one matches.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/r/20210616221329.1909276-2-srinivas.pandruvada@linux.intel.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../intel_speed_select_if/isst_if_common.c    | 48 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c
index bbd46b1d9c10..6f0cc679c8e5 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c
@@ -283,13 +283,18 @@ struct isst_if_cpu_info {
 	int bus_info[2];
 	struct pci_dev *pci_dev[2];
 	int punit_cpu_id;
+	int numa_node;
 };
 
 static struct isst_if_cpu_info *isst_cpu_info;
+#define ISST_MAX_PCI_DOMAINS	8
 
 static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn)
 {
-	int bus_number;
+	struct pci_dev *matched_pci_dev = NULL;
+	struct pci_dev *pci_dev = NULL;
+	int no_matches = 0;
+	int i, bus_number;
 
 	if (bus_no < 0 || bus_no > 1 || cpu < 0 || cpu >= nr_cpu_ids ||
 	    cpu >= num_possible_cpus())
@@ -299,7 +304,45 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
 	if (bus_number < 0)
 		return NULL;
 
-	return pci_get_domain_bus_and_slot(0, bus_number, PCI_DEVFN(dev, fn));
+	for (i = 0; i < ISST_MAX_PCI_DOMAINS; ++i) {
+		struct pci_dev *_pci_dev;
+		int node;
+
+		_pci_dev = pci_get_domain_bus_and_slot(i, bus_number, PCI_DEVFN(dev, fn));
+		if (!_pci_dev)
+			continue;
+
+		++no_matches;
+		if (!matched_pci_dev)
+			matched_pci_dev = _pci_dev;
+
+		node = dev_to_node(&_pci_dev->dev);
+		if (node == NUMA_NO_NODE) {
+			pr_info("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n",
+				cpu, bus_no, dev, fn);
+			continue;
+		}
+
+		if (node == isst_cpu_info[cpu].numa_node) {
+			pci_dev = _pci_dev;
+			break;
+		}
+	}
+
+	/*
+	 * If there is no numa matched pci_dev, then there can be following cases:
+	 * 1. CONFIG_NUMA is not defined: In this case if there is only single device
+	 *    match, then we don't need numa information. Simply return last match.
+	 *    Othewise return NULL.
+	 * 2. NUMA information is not exposed via _SEG method. In this case it is similar
+	 *    to case 1.
+	 * 3. Numa information doesn't match with CPU numa node and more than one match
+	 *    return NULL.
+	 */
+	if (!pci_dev && no_matches == 1)
+		pci_dev = matched_pci_dev;
+
+	return pci_dev;
 }
 
 /**
@@ -354,6 +397,7 @@ static int isst_if_cpu_online(unsigned int cpu)
 		return ret;
 	}
 	isst_cpu_info[cpu].punit_cpu_id = data;
+	isst_cpu_info[cpu].numa_node = cpu_to_node(cpu);
 
 	isst_restore_msr_local(cpu);
 

From f0c1c4d2864ed614f90d2da1bab1a1c42907b940 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 9 Jun 2021 14:30:46 +0800
Subject: [PATCH 2134/3292] blk-mq: fix use-after-free in blk_mq_exit_sched

tagset can't be used after blk_cleanup_queue() is returned because
freeing tagset usually follows blk_clenup_queue(). Commit d97e594c5166
("blk-mq: Use request queue-wide tags for tagset-wide sbitmap") adds
check on q->tag_set->flags in blk_mq_exit_sched(), and causes
use-after-free.

Fixes it by using hctx->flags.

Reported-by: syzbot+77ba3d171a25c56756ea@syzkaller.appspotmail.com
Fixes: d97e594c5166 ("blk-mq: Use request queue-wide tags for tagset-wide sbitmap")
Cc: John Garry <john.garry@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/20210609063046.122843-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index a9182d2f8ad3..80273245d11a 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -680,6 +680,7 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 {
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int i;
+	unsigned int flags = 0;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		blk_mq_debugfs_unregister_sched_hctx(hctx);
@@ -687,12 +688,13 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 			e->type->ops.exit_hctx(hctx, i);
 			hctx->sched_data = NULL;
 		}
+		flags = hctx->flags;
 	}
 	blk_mq_debugfs_unregister_sched(q);
 	if (e->type->ops.exit_sched)
 		e->type->ops.exit_sched(e);
 	blk_mq_sched_tags_teardown(q);
-	if (blk_mq_is_sbitmap_shared(q->tag_set->flags))
+	if (blk_mq_is_sbitmap_shared(flags))
 		blk_mq_exit_sched_shared_sbitmap(q);
 	q->elevator = NULL;
 }

From e42cfb1da0bf33c313318da201730324c423351d Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 18 Jun 2021 10:59:22 +0900
Subject: [PATCH 2135/3292] block: Remove unnecessary elevator operation checks

The insert_requests and dispatch_request elevator operations are
mandatory for the correct execution of an elevator, and all implemented
elevators (bfq, kyber and mq-deadline) implement them. As a result,
there is no need to check for these operations before calling them when
a queue has an elevator set. This simplifies the code in
__blk_mq_sched_dispatch_requests() and blk_mq_sched_insert_request().

To avoid out-of-tree elevators to crash the kernel in case of bad
implementation, add a check in elv_register() to verify that these
operations are implemented.

A small, probably not significant, IOPS improvement of 0.1% is observed
with this patch applied (4.117 MIOPS to 4.123 MIOPS, average of 20 fio
runs doing 4K random direct reads with psync and 32 jobs).

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210618015922.713999-1-damien.lemoal@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c | 13 ++++++-------
 block/elevator.c     |  4 ++++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 80273245d11a..2403a5c2b053 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -294,8 +294,7 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
 static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
-	struct elevator_queue *e = q->elevator;
-	const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
+	const bool has_sched = q->elevator;
 	int ret = 0;
 	LIST_HEAD(rq_list);
 
@@ -326,12 +325,12 @@ static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	if (!list_empty(&rq_list)) {
 		blk_mq_sched_mark_restart_hctx(hctx);
 		if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
-			if (has_sched_dispatch)
+			if (has_sched)
 				ret = blk_mq_do_dispatch_sched(hctx);
 			else
 				ret = blk_mq_do_dispatch_ctx(hctx);
 		}
-	} else if (has_sched_dispatch) {
+	} else if (has_sched) {
 		ret = blk_mq_do_dispatch_sched(hctx);
 	} else if (hctx->dispatch_busy) {
 		/* dequeue request one by one from sw queue if queue is busy */
@@ -463,7 +462,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 		goto run;
 	}
 
-	if (e && e->type->ops.insert_requests) {
+	if (e) {
 		LIST_HEAD(list);
 
 		list_add(&rq->queuelist, &list);
@@ -494,9 +493,9 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
 	percpu_ref_get(&q->q_usage_counter);
 
 	e = hctx->queue->elevator;
-	if (e && e->type->ops.insert_requests)
+	if (e) {
 		e->type->ops.insert_requests(hctx, list, false);
-	else {
+	} else {
 		/*
 		 * try to issue requests directly if the hw queue isn't
 		 * busy in case of 'none' scheduler, and this way may save
diff --git a/block/elevator.c b/block/elevator.c
index 06e203426410..85d0d4adbb64 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -522,6 +522,10 @@ void elv_unregister_queue(struct request_queue *q)
 
 int elv_register(struct elevator_type *e)
 {
+	/* insert_requests and dispatch_request are mandatory */
+	if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
+		return -EINVAL;
+
 	/* create icq_cache if requested */
 	if (e->icq_size) {
 		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||

From 2b9ac22b12a266eb4fec246a07b504dd4983b16b Mon Sep 17 00:00:00 2001
From: Kristian Klausen <kristian@klausen.dk>
Date: Fri, 18 Jun 2021 13:51:57 +0200
Subject: [PATCH 2136/3292] loop: Fix missing discard support when using
 LOOP_CONFIGURE

Without calling loop_config_discard() the discard flag and parameters
aren't set/updated for the loop device and worst-case they could
indicate discard support when it isn't the case (ex: if the
LOOP_SET_STATUS ioctl was used with a different file prior to
LOOP_CONFIGURE).

Cc: <stable@vger.kernel.org> # 5.8.x-
Fixes: 3448914e8cc5 ("loop: Add LOOP_CONFIGURE ioctl")
Signed-off-by: Kristian Klausen <kristian@klausen.dk>
Link: https://lore.kernel.org/r/20210618115157.31452-1-kristian@klausen.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d58d68f3c7cd..fda071fae541 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1154,6 +1154,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	blk_queue_physical_block_size(lo->lo_queue, bsize);
 	blk_queue_io_min(lo->lo_queue, bsize);
 
+	loop_config_discard(lo);
 	loop_update_rotational(lo);
 	loop_update_dio(lo);
 	loop_sysfs_init(lo);

From 52d7e288444906aa5c99888e80a9cc1a1423ed92 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 18 Jun 2021 16:45:22 +0300
Subject: [PATCH 2137/3292] blk-mq: fix an IS_ERR() vs NULL bug

The __blk_mq_alloc_disk() function doesn't return NULLs it returns
error pointers.

Fixes: b461dfc49eb6 ("blk-mq: add the blk_mq_alloc_disk APIs")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/YMyjci35WBqrtqG+@mwanda
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 02a4aab0aeac..fd2de2b422ed 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -431,7 +431,7 @@ enum {
 	static struct lock_class_key __key;				\
 	struct gendisk *__disk = __blk_mq_alloc_disk(set, queuedata);	\
 									\
-	if (__disk)							\
+	if (!IS_ERR(__disk))						\
 		lockdep_init_map(&__disk->lockdep_map,			\
 			"(bio completion)", &__key, 0);			\
 	__disk;								\

From 2f99619820c2269534eb2c0cde44870313c6d353 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 17 Jun 2021 11:22:55 +0200
Subject: [PATCH 2138/3292] xsk: Fix missing validation for skb and unaligned
 mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a missing validation of a Tx descriptor when executing in skb mode
and the umem is in unaligned mode. A descriptor could point to a
buffer straddling the end of the umem, thus effectively tricking the
kernel to read outside the allowed umem region. This could lead to a
kernel crash if that part of memory is not mapped.

In zero-copy mode, the descriptor validation code rejects such
descriptors by checking a bit in the DMA address that tells us if the
next page is physically contiguous or not. For the last page in the
umem, this bit is not set, therefore any descriptor pointing to a
packet straddling this last page boundary will be rejected. However,
the skb path does not use this bit since it copies out data and can do
so to two different pages. (It also does not have the array of DMA
address, so it cannot even store this bit.) The code just returned
that the packet is always physically contiguous. But this is
unfortunately also returned for the last page in the umem, which means
that packets that cross the end of the umem are being allowed, which
they should not be.

Fix this by introducing a check for this in the SKB path only, not
penalizing the zero-copy path.

Fixes: 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/bpf/20210617092255.3487-1-magnus.karlsson@gmail.com
---
 include/net/xsk_buff_pool.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index eaa8386dbc63..7a9a23e7a604 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -147,11 +147,16 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
 {
 	bool cross_pg = (addr & (PAGE_SIZE - 1)) + len > PAGE_SIZE;
 
-	if (pool->dma_pages_cnt && cross_pg) {
+	if (likely(!cross_pg))
+		return false;
+
+	if (pool->dma_pages_cnt) {
 		return !(pool->dma_pages[addr >> PAGE_SHIFT] &
 			 XSK_NEXT_PG_CONTIG_MASK);
 	}
-	return false;
+
+	/* skb path */
+	return addr + len > pool->addrs_cnt;
 }
 
 static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)

From f654fae47e83e56b454fbbfd0af0a4f232e356d6 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 18 Jun 2021 09:58:05 +0200
Subject: [PATCH 2139/3292] xsk: Fix broken Tx ring validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix broken Tx ring validation for AF_XDP. The commit under the Fixes
tag, fixed an off-by-one error in the validation but introduced
another error. Descriptors are now let through even if they straddle a
chunk boundary which they are not allowed to do in aligned mode. Worse
is that they are let through even if they straddle the end of the umem
itself, tricking the kernel to read data outside the allowed umem
region which might or might not be mapped at all.

Fix this by reintroducing the old code, but subtract the length by one
to fix the off-by-one error that the original patch was
addressing. The test chunk != chunk_end makes sure packets do not
straddle chunk boundraries. Note that packets of zero length are
allowed in the interface, therefore the test if the length is
non-zero.

Fixes: ac31565c2193 ("xsk: Fix for xp_aligned_validate_desc() when len == chunk_size")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/bpf/20210618075805.14412-1-magnus.karlsson@gmail.com
---
 net/xdp/xsk_queue.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 9d2a89d793c0..9ae13cccfb28 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -128,12 +128,15 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
 static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
 					    struct xdp_desc *desc)
 {
-	u64 chunk;
-
-	if (desc->len > pool->chunk_size)
-		return false;
+	u64 chunk, chunk_end;
 
 	chunk = xp_aligned_extract_addr(pool, desc->addr);
+	if (likely(desc->len)) {
+		chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1);
+		if (chunk != chunk_end)
+			return false;
+	}
+
 	if (chunk >= pool->addrs_cnt)
 		return false;
 

From 61e8aeda9398925f8c6fc290585bdd9727d154c4 Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Thu, 17 Jun 2021 23:14:04 -0700
Subject: [PATCH 2140/3292] bpf: Fix libelf endian handling in resolv_btfids

The vmlinux ".BTF_ids" ELF section is declared in btf_ids.h to hold a list
of zero-filled BTF IDs, which is then patched at link-time with correct
values by resolv_btfids. The section is flagged as "allocable" to preclude
compression, but notably the section contents (BTF IDs) are untyped.

When patching the BTF IDs, resolve_btfids writes in host-native endianness
and relies on libelf for any required translation on reading and updating
vmlinux. However, since the type of the .BTF_ids section content defaults
to ELF_T_BYTE (i.e. unsigned char), no translation occurs. This results in
incorrect patched values when cross-compiling to non-native endianness,
and can manifest as kernel Oops and test failures which are difficult to
troubleshoot [1].

Explicitly set the type of patched data to ELF_T_WORD, the architecture-
neutral ELF type corresponding to the u32 BTF IDs. This enables libelf to
transparently perform any needed endian conversions.

Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object")
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Frank Eigler <fche@redhat.com>
Cc: Mark Wielaard <mark@klomp.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/CAPGftE_eY-Zdi3wBcgDfkz_iOr1KF10n=9mJHm1_a_PykcsoeA@mail.gmail.com [1]
Link: https://lore.kernel.org/bpf/20210618061404.818569-1-Tony.Ambardar@gmail.com
---
 tools/bpf/resolve_btfids/main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 7550fd9c3188..3ad9301b0f00 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -655,6 +655,9 @@ static int symbols_patch(struct object *obj)
 	if (sets_patch(obj))
 		return -1;
 
+	/* Set type to ensure endian translation occurs. */
+	obj->efile.idlist->d_type = ELF_T_WORD;
+
 	elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
 
 	err = elf_update(obj->efile.elf, ELF_C_WRITE);

From e6ab8991c5d0b0deae0961dc22c0edd1dee328f5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:13:59 +0100
Subject: [PATCH 2141/3292] io_uring: fix false WARN_ONCE

WARNING: CPU: 1 PID: 11749 at fs/io-wq.c:244 io_wqe_wake_worker fs/io-wq.c:244 [inline]
WARNING: CPU: 1 PID: 11749 at fs/io-wq.c:244 io_wqe_enqueue+0x7f6/0x910 fs/io-wq.c:751

A WARN_ON_ONCE() in io_wqe_wake_worker() can be triggered by a valid
userspace setup. Replace it with pr_warn.

Reported-by: syzbot+ea2f1484cffe5109dc10@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f7ede342c3342c4c26668f5168e2993e38bbd99c.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index bb4d3ee9592e..843d4a7bcd6e 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -240,7 +240,8 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
 	 * Most likely an attempt to queue unbounded work on an io_wq that
 	 * wasn't setup with any unbounded workers.
 	 */
-	WARN_ON_ONCE(!acct->max_workers);
+	if (unlikely(!acct->max_workers))
+		pr_warn_once("io-wq is not configured for unbound workers");
 
 	rcu_read_lock();
 	ret = io_wqe_activate_free_worker(wqe);
@@ -901,6 +902,8 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 
 	if (WARN_ON_ONCE(!data->free_work || !data->do_work))
 		return ERR_PTR(-EINVAL);
+	if (WARN_ON_ONCE(!bounded))
+		return ERR_PTR(-EINVAL);
 
 	wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL);
 	if (!wq)

From 2a2758f26df519fab011f49d53440382dda8e1a5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:00 +0100
Subject: [PATCH 2142/3292] io_uring: refactor io_submit_flush_completions()

struct io_comp_state is always contained in struct io_ring_ctx, don't
pass them into io_submit_flush_completions() separately, it makes the
interface cleaner and simplifies it for the compiler.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/44d6ca57003a82484338e95197024dbd65a1b376.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 46a25a7cb70a..1e5ffc602e1f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1059,8 +1059,7 @@ static void __io_queue_sqe(struct io_kiocb *req);
 static void io_rsrc_put_work(struct work_struct *work);
 
 static void io_req_task_queue(struct io_kiocb *req);
-static void io_submit_flush_completions(struct io_comp_state *cs,
-					struct io_ring_ctx *ctx);
+static void io_submit_flush_completions(struct io_ring_ctx *ctx);
 static bool io_poll_remove_waitqs(struct io_kiocb *req);
 static int io_req_prep_async(struct io_kiocb *req);
 
@@ -1879,7 +1878,7 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx)
 		return;
 	if (ctx->submit_state.comp.nr) {
 		mutex_lock(&ctx->uring_lock);
-		io_submit_flush_completions(&ctx->submit_state.comp, ctx);
+		io_submit_flush_completions(ctx);
 		mutex_unlock(&ctx->uring_lock);
 	}
 	percpu_ref_put(&ctx->refs);
@@ -2127,9 +2126,9 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
 		list_add(&req->compl.list, &state->comp.free_list);
 }
 
-static void io_submit_flush_completions(struct io_comp_state *cs,
-					struct io_ring_ctx *ctx)
+static void io_submit_flush_completions(struct io_ring_ctx *ctx)
 {
+	struct io_comp_state *cs = &ctx->submit_state.comp;
 	int i, nr = cs->nr;
 	struct io_kiocb *req;
 	struct req_batch rb;
@@ -6451,7 +6450,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
 
 			cs->reqs[cs->nr++] = req;
 			if (cs->nr == ARRAY_SIZE(cs->reqs))
-				io_submit_flush_completions(cs, ctx);
+				io_submit_flush_completions(ctx);
 		} else {
 			io_put_req(req);
 		}
@@ -6651,7 +6650,7 @@ static void io_submit_state_end(struct io_submit_state *state,
 	if (state->link.head)
 		io_queue_sqe(state->link.head);
 	if (state->comp.nr)
-		io_submit_flush_completions(&state->comp, ctx);
+		io_submit_flush_completions(ctx);
 	if (state->plug_started)
 		blk_finish_plug(&state->plug);
 	io_state_file_put(state);

From c10d1f986b4e2a906862148c77a97f186cc08b9e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:01 +0100
Subject: [PATCH 2143/3292] io_uring: move creds from io-wq work to io_kiocb

io-wq now doesn't have anything to do with creds now, so move ->creds
from struct io_wq_work into request (aka struct io_kiocb).

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/8520c72ab8b8f4b96db12a228a2ab4c094ae64e1.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.h    |  1 -
 fs/io_uring.c | 24 +++++++++++++-----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/io-wq.h b/fs/io-wq.h
index 02299cdcf55c..3999ee58ff26 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -87,7 +87,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
 
 struct io_wq_work {
 	struct io_wq_work_node list;
-	const struct cred *creds;
 	unsigned flags;
 };
 
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1e5ffc602e1f..1b8d0ad9a16d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -851,6 +851,8 @@ struct io_kiocb {
 	struct hlist_node		hash_node;
 	struct async_poll		*apoll;
 	struct io_wq_work		work;
+	const struct cred 		*creds;
+
 	/* store used ubuf, so we can prevent reloading */
 	struct io_mapped_ubuf		*imu;
 };
@@ -1234,8 +1236,8 @@ static void io_prep_async_work(struct io_kiocb *req)
 	const struct io_op_def *def = &io_op_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
 
-	if (!req->work.creds)
-		req->work.creds = get_current_cred();
+	if (!req->creds)
+		req->creds = get_current_cred();
 
 	req->work.list.next = NULL;
 	req->work.flags = 0;
@@ -1745,9 +1747,9 @@ static void io_dismantle_req(struct io_kiocb *req)
 		percpu_ref_put(req->fixed_rsrc_refs);
 	if (req->async_data)
 		kfree(req->async_data);
-	if (req->work.creds) {
-		put_cred(req->work.creds);
-		req->work.creds = NULL;
+	if (req->creds) {
+		put_cred(req->creds);
+		req->creds = NULL;
 	}
 }
 
@@ -6139,8 +6141,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	const struct cred *creds = NULL;
 	int ret;
 
-	if (req->work.creds && req->work.creds != current_cred())
-		creds = override_creds(req->work.creds);
+	if (req->creds && req->creds != current_cred())
+		creds = override_creds(req->creds);
 
 	switch (req->opcode) {
 	case IORING_OP_NOP:
@@ -6532,7 +6534,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	atomic_set(&req->refs, 2);
 	req->task = current;
 	req->result = 0;
-	req->work.creds = NULL;
+	req->creds = NULL;
 
 	/* enforce forwards compatibility on users */
 	if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
@@ -6550,10 +6552,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
 	personality = READ_ONCE(sqe->personality);
 	if (personality) {
-		req->work.creds = xa_load(&ctx->personalities, personality);
-		if (!req->work.creds)
+		req->creds = xa_load(&ctx->personalities, personality);
+		if (!req->creds)
 			return -EINVAL;
-		get_cred(req->work.creds);
+		get_cred(req->creds);
 	}
 	state = &ctx->submit_state;
 

From b8e64b530011162adda0e176150774d22326c50c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:02 +0100
Subject: [PATCH 2144/3292] io_uring: track request creds with a flag

Currently, if req->creds is not NULL, then there are creds assigned.
Track the invariant with a new flag in req->flags. No need to clear the
field at init, and also cleanup can be efficiently moved into
io_clean_op().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/5f8baeb8d3b909487f555542350e2eac97005556.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1b8d0ad9a16d..af93f790a1da 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -718,6 +718,7 @@ enum {
 	REQ_F_COMPLETE_INLINE_BIT,
 	REQ_F_REISSUE_BIT,
 	REQ_F_DONT_REISSUE_BIT,
+	REQ_F_CREDS_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_ASYNC_READ_BIT,
 	REQ_F_ASYNC_WRITE_BIT,
@@ -771,6 +772,8 @@ enum {
 	REQ_F_ASYNC_WRITE	= BIT(REQ_F_ASYNC_WRITE_BIT),
 	/* regular file */
 	REQ_F_ISREG		= BIT(REQ_F_ISREG_BIT),
+	/* has creds assigned */
+	REQ_F_CREDS		= BIT(REQ_F_CREDS_BIT),
 };
 
 struct async_poll {
@@ -1236,8 +1239,10 @@ static void io_prep_async_work(struct io_kiocb *req)
 	const struct io_op_def *def = &io_op_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
 
-	if (!req->creds)
+	if (!(req->flags & REQ_F_CREDS)) {
+		req->flags |= REQ_F_CREDS;
 		req->creds = get_current_cred();
+	}
 
 	req->work.list.next = NULL;
 	req->work.flags = 0;
@@ -1623,7 +1628,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 static inline bool io_req_needs_clean(struct io_kiocb *req)
 {
 	return req->flags & (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP |
-				REQ_F_POLLED | REQ_F_INFLIGHT);
+				REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS);
 }
 
 static void io_req_complete_state(struct io_kiocb *req, long res,
@@ -1747,10 +1752,6 @@ static void io_dismantle_req(struct io_kiocb *req)
 		percpu_ref_put(req->fixed_rsrc_refs);
 	if (req->async_data)
 		kfree(req->async_data);
-	if (req->creds) {
-		put_cred(req->creds);
-		req->creds = NULL;
-	}
 }
 
 /* must to be called somewhat shortly after putting a request */
@@ -6133,6 +6134,10 @@ static void io_clean_op(struct io_kiocb *req)
 		atomic_dec(&tctx->inflight_tracked);
 		req->flags &= ~REQ_F_INFLIGHT;
 	}
+	if (req->flags & REQ_F_CREDS) {
+		put_cred(req->creds);
+		req->flags &= ~REQ_F_CREDS;
+	}
 }
 
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
@@ -6141,7 +6146,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	const struct cred *creds = NULL;
 	int ret;
 
-	if (req->creds && req->creds != current_cred())
+	if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
 		creds = override_creds(req->creds);
 
 	switch (req->opcode) {
@@ -6534,7 +6539,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	atomic_set(&req->refs, 2);
 	req->task = current;
 	req->result = 0;
-	req->creds = NULL;
 
 	/* enforce forwards compatibility on users */
 	if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
@@ -6556,6 +6560,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		if (!req->creds)
 			return -EINVAL;
 		get_cred(req->creds);
+		req->flags |= REQ_F_CREDS;
 	}
 	state = &ctx->submit_state;
 

From 1dacb4df4ebe61ec2005d7ab82ee38ffa7125ee7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:03 +0100
Subject: [PATCH 2145/3292] io_uring: simplify iovec freeing in io_clean_op()

We don't get REQ_F_NEED_CLEANUP for rw unless there is ->free_iovec set,
so remove the optimisation of NULL checking it inline, kfree() will take
care if that would ever be the case.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a233dc655d3d45bd4f69b73d55a61de46d914415.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index af93f790a1da..8543f1c37e2b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6092,8 +6092,8 @@ static void io_clean_op(struct io_kiocb *req)
 		case IORING_OP_WRITE_FIXED:
 		case IORING_OP_WRITE: {
 			struct io_async_rw *io = req->async_data;
-			if (io->free_iovec)
-				kfree(io->free_iovec);
+
+			kfree(io->free_iovec);
 			break;
 			}
 		case IORING_OP_RECVMSG:

From c854357bc1b965e1e261c612d5be1297dfb3e406 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:04 +0100
Subject: [PATCH 2146/3292] io_uring: clean all flags in io_clean_op() at once

Clean all flags in io_clean_op() in the end in one operation, will save
us a couple of operation and binary size.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/b8efe1f022a037f74e7fe497c69fb554d59bfeaf.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8543f1c37e2b..474705a97f29 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -109,6 +109,8 @@
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
 				IOSQE_BUFFER_SELECT)
+#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
+				REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS)
 
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
@@ -1627,8 +1629,7 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 
 static inline bool io_req_needs_clean(struct io_kiocb *req)
 {
-	return req->flags & (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP |
-				REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS);
+	return req->flags & IO_REQ_CLEAN_FLAGS;
 }
 
 static void io_req_complete_state(struct io_kiocb *req, long res,
@@ -6080,7 +6081,6 @@ static void io_clean_op(struct io_kiocb *req)
 			kfree(req->sr_msg.kbuf);
 			break;
 		}
-		req->flags &= ~REQ_F_BUFFER_SELECTED;
 	}
 
 	if (req->flags & REQ_F_NEED_CLEANUP) {
@@ -6121,7 +6121,6 @@ static void io_clean_op(struct io_kiocb *req)
 			putname(req->unlink.filename);
 			break;
 		}
-		req->flags &= ~REQ_F_NEED_CLEANUP;
 	}
 	if ((req->flags & REQ_F_POLLED) && req->apoll) {
 		kfree(req->apoll->double_poll);
@@ -6132,12 +6131,11 @@ static void io_clean_op(struct io_kiocb *req)
 		struct io_uring_task *tctx = req->task->io_uring;
 
 		atomic_dec(&tctx->inflight_tracked);
-		req->flags &= ~REQ_F_INFLIGHT;
 	}
-	if (req->flags & REQ_F_CREDS) {
+	if (req->flags & REQ_F_CREDS)
 		put_cred(req->creds);
-		req->flags &= ~REQ_F_CREDS;
-	}
+
+	req->flags &= ~IO_REQ_CLEAN_FLAGS;
 }
 
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)

From a3dbdf54da80326fd12bc11ad75ecd699a82374f Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:05 +0100
Subject: [PATCH 2147/3292] io_uring: refactor io_get_sequence()

Clean up io_get_sequence() and add a comment describing the magic around
sequence correction.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f55dc409936b8afa4698d24b8677a34d31077ccb.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 474705a97f29..c41e9a925fa2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5993,13 +5993,12 @@ static int io_req_prep_async(struct io_kiocb *req)
 
 static u32 io_get_sequence(struct io_kiocb *req)
 {
-	struct io_kiocb *pos;
-	struct io_ring_ctx *ctx = req->ctx;
-	u32 nr_reqs = 0;
+	u32 seq = req->ctx->cached_sq_head;
 
-	io_for_each_link(pos, req)
-		nr_reqs++;
-	return ctx->cached_sq_head - nr_reqs;
+	/* need original cached_sq_head, but it was increased for each req */
+	io_for_each_link(req, req)
+		seq--;
+	return seq;
 }
 
 static bool io_drain_req(struct io_kiocb *req)

From 3f18407dc6f2db0968daaa36c39a772c2c9f8ea7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:06 +0100
Subject: [PATCH 2148/3292] io_uring: inline __tctx_task_work()

Inline __tctx_task_work() into tctx_task_work() in preparation for
further optimisations.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f9c05c4bc9763af7bd8e25ebc3c5f7b6f69148f8.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 67 ++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c41e9a925fa2..dc71850d7a49 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1888,48 +1888,43 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx)
 	percpu_ref_put(&ctx->refs);
 }
 
-static bool __tctx_task_work(struct io_uring_task *tctx)
-{
-	struct io_ring_ctx *ctx = NULL;
-	struct io_wq_work_list list;
-	struct io_wq_work_node *node;
-
-	if (wq_list_empty(&tctx->task_list))
-		return false;
-
-	spin_lock_irq(&tctx->task_lock);
-	list = tctx->task_list;
-	INIT_WQ_LIST(&tctx->task_list);
-	spin_unlock_irq(&tctx->task_lock);
-
-	node = list.first;
-	while (node) {
-		struct io_wq_work_node *next = node->next;
-		struct io_kiocb *req;
-
-		req = container_of(node, struct io_kiocb, io_task_work.node);
-		if (req->ctx != ctx) {
-			ctx_flush_and_put(ctx);
-			ctx = req->ctx;
-			percpu_ref_get(&ctx->refs);
-		}
-
-		req->task_work.func(&req->task_work);
-		node = next;
-	}
-
-	ctx_flush_and_put(ctx);
-	return list.first != NULL;
-}
-
 static void tctx_task_work(struct callback_head *cb)
 {
-	struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work);
+	struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
+						  task_work);
 
 	clear_bit(0, &tctx->task_state);
 
-	while (__tctx_task_work(tctx))
+	while (!wq_list_empty(&tctx->task_list)) {
+		struct io_ring_ctx *ctx = NULL;
+		struct io_wq_work_list list;
+		struct io_wq_work_node *node;
+
+		spin_lock_irq(&tctx->task_lock);
+		list = tctx->task_list;
+		INIT_WQ_LIST(&tctx->task_list);
+		spin_unlock_irq(&tctx->task_lock);
+
+		node = list.first;
+		while (node) {
+			struct io_wq_work_node *next = node->next;
+			struct io_kiocb *req = container_of(node, struct io_kiocb,
+							    io_task_work.node);
+
+			if (req->ctx != ctx) {
+				ctx_flush_and_put(ctx);
+				ctx = req->ctx;
+				percpu_ref_get(&ctx->refs);
+			}
+			req->task_work.func(&req->task_work);
+			node = next;
+		}
+
+		ctx_flush_and_put(ctx);
+		if (!list.first)
+			break;
 		cond_resched();
+	}
 }
 
 static int io_req_task_work_add(struct io_kiocb *req)

From ebd0df2e63426bbd9ed50966e888c87eac88fc30 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:07 +0100
Subject: [PATCH 2149/3292] io_uring: optimise task_work submit flushing

tctx_task_work() tries to fetch a next batch of requests, but before it
would flush completions from the previous batch that may be sub-optimal.
E.g. io_req_task_queue() executes a head of the link where all the
linked may be enqueued through the same io_req_task_queue(). And there
are more cases for that.

Do the flushing at the end, so it can cache completions of several waves
of a single tctx_task_work(), and do the flush at the very end.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/3cac83934e4fbce520ff8025c3524398b3ae0270.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index dc71850d7a49..49f06484ba0e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1890,13 +1890,13 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx)
 
 static void tctx_task_work(struct callback_head *cb)
 {
+	struct io_ring_ctx *ctx = NULL;
 	struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
 						  task_work);
 
 	clear_bit(0, &tctx->task_state);
 
 	while (!wq_list_empty(&tctx->task_list)) {
-		struct io_ring_ctx *ctx = NULL;
 		struct io_wq_work_list list;
 		struct io_wq_work_node *node;
 
@@ -1920,11 +1920,12 @@ static void tctx_task_work(struct callback_head *cb)
 			node = next;
 		}
 
-		ctx_flush_and_put(ctx);
 		if (!list.first)
 			break;
 		cond_resched();
 	}
+
+	ctx_flush_and_put(ctx);
 }
 
 static int io_req_task_work_add(struct io_kiocb *req)

From c6538be9e4883d1371adaff45712b1b2172773dd Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:08 +0100
Subject: [PATCH 2150/3292] io_uring: refactor tctx task_work list splicing

We don't need a full copy of tctx->task_list in tctx_task_work(), but
only a first one, so just assign node directly.

Taking into account that task_works are run in a context of a task,
it's very unlikely to first see non-empty tctx->task_list and then
splice it empty, can only happen with task_work cancellations that is
not-normal slow path anyway. Hence, get rid of the check in the end,
it's there not for validity but "performance" purposes.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d076c83fedb8253baf43acb23b8fafd7c5da1714.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 49f06484ba0e..51db0d80b67b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1897,15 +1897,13 @@ static void tctx_task_work(struct callback_head *cb)
 	clear_bit(0, &tctx->task_state);
 
 	while (!wq_list_empty(&tctx->task_list)) {
-		struct io_wq_work_list list;
 		struct io_wq_work_node *node;
 
 		spin_lock_irq(&tctx->task_lock);
-		list = tctx->task_list;
+		node = tctx->task_list.first;
 		INIT_WQ_LIST(&tctx->task_list);
 		spin_unlock_irq(&tctx->task_lock);
 
-		node = list.first;
 		while (node) {
 			struct io_wq_work_node *next = node->next;
 			struct io_kiocb *req = container_of(node, struct io_kiocb,
@@ -1919,9 +1917,6 @@ static void tctx_task_work(struct callback_head *cb)
 			req->task_work.func(&req->task_work);
 			node = next;
 		}
-
-		if (!list.first)
-			break;
 		cond_resched();
 	}
 

From 16f72070386fca59312bde696cff917bb04b183e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:09 +0100
Subject: [PATCH 2151/3292] io_uring: don't resched with empty task_list

Entering tctx_task_work() with empty task_list is a strange scenario,
that can happen only on rare occasion during task exit, so let's not
check for task_list emptiness in advance and do it do-while style. The
code still correct for the empty case, just would do extra work about
which we don't care.

Do extra step and do the check before cond_resched(), so we don't
resched if have nothing to execute.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/c4173e288e69793d03c7d7ce826f9d28afba718a.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 51db0d80b67b..55bc348ed8fe 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1896,7 +1896,7 @@ static void tctx_task_work(struct callback_head *cb)
 
 	clear_bit(0, &tctx->task_state);
 
-	while (!wq_list_empty(&tctx->task_list)) {
+	while (1) {
 		struct io_wq_work_node *node;
 
 		spin_lock_irq(&tctx->task_lock);
@@ -1917,6 +1917,8 @@ static void tctx_task_work(struct callback_head *cb)
 			req->task_work.func(&req->task_work);
 			node = next;
 		}
+		if (wq_list_empty(&tctx->task_list))
+			break;
 		cond_resched();
 	}
 

From 7a778f9dc32deae4f748903f6f9169dc01cbcd28 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 17 Jun 2021 18:14:10 +0100
Subject: [PATCH 2152/3292] io_uring: improve in tctx_task_work() resubmission

If task_state is cleared, io_req_task_work_add() will go the slow path
adding a task_work, setting the task_state, waking up the task and so
on. Not to mention it's expensive. tctx_task_work() first clears the
state and then executes all the work items queued, so if any of them
resubmits or adds new task_work items, it would unnecessarily go through
the slow path of io_req_task_work_add().

Let's clear the ->task_state at the end. We still have to check
->task_list for emptiness afterward to synchronise with
io_req_task_work_add(), do that, and set the state back if we're going
to retry, because clearing not-ours task_state on the next iteration
would be buggy.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1ef72cdac7022adf0cd7ce4bfe3bb5c82a62eb93.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 55bc348ed8fe..fc8637f591a6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1894,8 +1894,6 @@ static void tctx_task_work(struct callback_head *cb)
 	struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
 						  task_work);
 
-	clear_bit(0, &tctx->task_state);
-
 	while (1) {
 		struct io_wq_work_node *node;
 
@@ -1917,8 +1915,14 @@ static void tctx_task_work(struct callback_head *cb)
 			req->task_work.func(&req->task_work);
 			node = next;
 		}
-		if (wq_list_empty(&tctx->task_list))
-			break;
+		if (wq_list_empty(&tctx->task_list)) {
+			clear_bit(0, &tctx->task_state);
+			if (wq_list_empty(&tctx->task_list))
+				break;
+			/* another tctx_task_work() is enqueued, yield */
+			if (test_and_set_bit(0, &tctx->task_state))
+				break;
+		}
 		cond_resched();
 	}
 

From dda90cb90a5ced4ebfb75e3f06d59fa1abb58f65 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue, 23 Feb 2021 15:47:05 -0800
Subject: [PATCH 2153/3292] ice: report hash type such as L2/L3/L4

The hardware is reporting the type of the hash used for RSS
as a PTYPE field in the receive descriptor. Use this value to set
the skb packet hash type by extending the hash type table to
cover all 10-bits of possible values (requiring some variables
to be changed from u8 to u16), and then use that table to convert
to one of the possible values in enum pkt_hash_types.

While we're here, remove the unused ptype struct value, which
makes table init easier for the zero entries, and use ranged
initializer to remove a bunch of code (works with gcc and clang).

Without this change, the kernel will recalculate the hash in software,
which can consume extra CPU cycles.

Co-developed-by: Kiran Patil <kiran.patil@intel.com>
Signed-off-by: Kiran Patil <kiran.patil@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_lan_tx_rx.h    | 147 ++++--------------
 drivers/net/ethernet/intel/ice/ice_txrx.c     |   2 +-
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c |  23 ++-
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h |   2 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c      |   2 +-
 5 files changed, 50 insertions(+), 126 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 4238ab0433ee..80736e0ec0dc 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -161,7 +161,6 @@ struct ice_fltr_desc {
 #define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES	0x1ULL
 
 struct ice_rx_ptype_decoded {
-	u32 ptype:10;
 	u32 known:1;
 	u32 outer_ip:1;
 	u32 outer_ip_ver:2;
@@ -606,9 +605,32 @@ struct ice_tlan_ctx {
 	u8 int_q_state;	/* width not needed - internal - DO NOT WRITE!!! */
 };
 
-/* macro to make the table lines short */
+/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT ice_ptype_lkup[ptype].known
+ * THEN
+ *      Packet is unknown
+ * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP
+ *      Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ *      Use the enum ice_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short, use explicit indexing with [PTYPE] */
 #define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
-	{	PTYPE, \
+	[PTYPE] = { \
 		1, \
 		ICE_RX_PTYPE_OUTER_##OUTER_IP, \
 		ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \
@@ -619,14 +641,14 @@ struct ice_tlan_ctx {
 		ICE_RX_PTYPE_INNER_PROT_##I, \
 		ICE_RX_PTYPE_PAYLOAD_LAYER_##PL }
 
-#define ICE_PTT_UNUSED_ENTRY(PTYPE) { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 
 /* shorter macros makes the table fit but are terse */
 #define ICE_RX_PTYPE_NOF		ICE_RX_PTYPE_NOT_FRAG
 #define ICE_RX_PTYPE_FRG		ICE_RX_PTYPE_FRAG
 
-/* Lookup table mapping the HW PTYPE to the bit field for decoding */
-static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = {
+/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */
+static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = {
 	/* L2 Packet types */
 	ICE_PTT_UNUSED_ENTRY(0),
 	ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
@@ -832,118 +854,7 @@ static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = {
 	ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
 
 	/* unused entries */
-	ICE_PTT_UNUSED_ENTRY(154),
-	ICE_PTT_UNUSED_ENTRY(155),
-	ICE_PTT_UNUSED_ENTRY(156),
-	ICE_PTT_UNUSED_ENTRY(157),
-	ICE_PTT_UNUSED_ENTRY(158),
-	ICE_PTT_UNUSED_ENTRY(159),
-
-	ICE_PTT_UNUSED_ENTRY(160),
-	ICE_PTT_UNUSED_ENTRY(161),
-	ICE_PTT_UNUSED_ENTRY(162),
-	ICE_PTT_UNUSED_ENTRY(163),
-	ICE_PTT_UNUSED_ENTRY(164),
-	ICE_PTT_UNUSED_ENTRY(165),
-	ICE_PTT_UNUSED_ENTRY(166),
-	ICE_PTT_UNUSED_ENTRY(167),
-	ICE_PTT_UNUSED_ENTRY(168),
-	ICE_PTT_UNUSED_ENTRY(169),
-
-	ICE_PTT_UNUSED_ENTRY(170),
-	ICE_PTT_UNUSED_ENTRY(171),
-	ICE_PTT_UNUSED_ENTRY(172),
-	ICE_PTT_UNUSED_ENTRY(173),
-	ICE_PTT_UNUSED_ENTRY(174),
-	ICE_PTT_UNUSED_ENTRY(175),
-	ICE_PTT_UNUSED_ENTRY(176),
-	ICE_PTT_UNUSED_ENTRY(177),
-	ICE_PTT_UNUSED_ENTRY(178),
-	ICE_PTT_UNUSED_ENTRY(179),
-
-	ICE_PTT_UNUSED_ENTRY(180),
-	ICE_PTT_UNUSED_ENTRY(181),
-	ICE_PTT_UNUSED_ENTRY(182),
-	ICE_PTT_UNUSED_ENTRY(183),
-	ICE_PTT_UNUSED_ENTRY(184),
-	ICE_PTT_UNUSED_ENTRY(185),
-	ICE_PTT_UNUSED_ENTRY(186),
-	ICE_PTT_UNUSED_ENTRY(187),
-	ICE_PTT_UNUSED_ENTRY(188),
-	ICE_PTT_UNUSED_ENTRY(189),
-
-	ICE_PTT_UNUSED_ENTRY(190),
-	ICE_PTT_UNUSED_ENTRY(191),
-	ICE_PTT_UNUSED_ENTRY(192),
-	ICE_PTT_UNUSED_ENTRY(193),
-	ICE_PTT_UNUSED_ENTRY(194),
-	ICE_PTT_UNUSED_ENTRY(195),
-	ICE_PTT_UNUSED_ENTRY(196),
-	ICE_PTT_UNUSED_ENTRY(197),
-	ICE_PTT_UNUSED_ENTRY(198),
-	ICE_PTT_UNUSED_ENTRY(199),
-
-	ICE_PTT_UNUSED_ENTRY(200),
-	ICE_PTT_UNUSED_ENTRY(201),
-	ICE_PTT_UNUSED_ENTRY(202),
-	ICE_PTT_UNUSED_ENTRY(203),
-	ICE_PTT_UNUSED_ENTRY(204),
-	ICE_PTT_UNUSED_ENTRY(205),
-	ICE_PTT_UNUSED_ENTRY(206),
-	ICE_PTT_UNUSED_ENTRY(207),
-	ICE_PTT_UNUSED_ENTRY(208),
-	ICE_PTT_UNUSED_ENTRY(209),
-
-	ICE_PTT_UNUSED_ENTRY(210),
-	ICE_PTT_UNUSED_ENTRY(211),
-	ICE_PTT_UNUSED_ENTRY(212),
-	ICE_PTT_UNUSED_ENTRY(213),
-	ICE_PTT_UNUSED_ENTRY(214),
-	ICE_PTT_UNUSED_ENTRY(215),
-	ICE_PTT_UNUSED_ENTRY(216),
-	ICE_PTT_UNUSED_ENTRY(217),
-	ICE_PTT_UNUSED_ENTRY(218),
-	ICE_PTT_UNUSED_ENTRY(219),
-
-	ICE_PTT_UNUSED_ENTRY(220),
-	ICE_PTT_UNUSED_ENTRY(221),
-	ICE_PTT_UNUSED_ENTRY(222),
-	ICE_PTT_UNUSED_ENTRY(223),
-	ICE_PTT_UNUSED_ENTRY(224),
-	ICE_PTT_UNUSED_ENTRY(225),
-	ICE_PTT_UNUSED_ENTRY(226),
-	ICE_PTT_UNUSED_ENTRY(227),
-	ICE_PTT_UNUSED_ENTRY(228),
-	ICE_PTT_UNUSED_ENTRY(229),
-
-	ICE_PTT_UNUSED_ENTRY(230),
-	ICE_PTT_UNUSED_ENTRY(231),
-	ICE_PTT_UNUSED_ENTRY(232),
-	ICE_PTT_UNUSED_ENTRY(233),
-	ICE_PTT_UNUSED_ENTRY(234),
-	ICE_PTT_UNUSED_ENTRY(235),
-	ICE_PTT_UNUSED_ENTRY(236),
-	ICE_PTT_UNUSED_ENTRY(237),
-	ICE_PTT_UNUSED_ENTRY(238),
-	ICE_PTT_UNUSED_ENTRY(239),
-
-	ICE_PTT_UNUSED_ENTRY(240),
-	ICE_PTT_UNUSED_ENTRY(241),
-	ICE_PTT_UNUSED_ENTRY(242),
-	ICE_PTT_UNUSED_ENTRY(243),
-	ICE_PTT_UNUSED_ENTRY(244),
-	ICE_PTT_UNUSED_ENTRY(245),
-	ICE_PTT_UNUSED_ENTRY(246),
-	ICE_PTT_UNUSED_ENTRY(247),
-	ICE_PTT_UNUSED_ENTRY(248),
-	ICE_PTT_UNUSED_ENTRY(249),
-
-	ICE_PTT_UNUSED_ENTRY(250),
-	ICE_PTT_UNUSED_ENTRY(251),
-	ICE_PTT_UNUSED_ENTRY(252),
-	ICE_PTT_UNUSED_ENTRY(253),
-	ICE_PTT_UNUSED_ENTRY(254),
-	ICE_PTT_UNUSED_ENTRY(255),
+	[154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 };
 
 static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 917eba7fdd0c..e9e9edb32c6f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1082,7 +1082,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		u16 stat_err_bits;
 		int rx_buf_pgcnt;
 		u16 vlan_tag = 0;
-		u8 rx_ptype;
+		u16 rx_ptype;
 
 		/* get the Rx desc from Rx ring based on 'next_to_clean' */
 		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 166cf25d1139..171397dcf00a 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -38,10 +38,23 @@ void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val)
  * ice_ptype_to_htype - get a hash type
  * @ptype: the ptype value from the descriptor
  *
- * Returns a hash type to be used by skb_set_hash
+ * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by
+ * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
+ * Rx desc.
  */
-static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+static enum pkt_hash_types ice_ptype_to_htype(u16 ptype)
 {
+	struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype);
+
+	if (!decoded.known)
+		return PKT_HASH_TYPE_NONE;
+	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+		return PKT_HASH_TYPE_L4;
+	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+		return PKT_HASH_TYPE_L3;
+	if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
+		return PKT_HASH_TYPE_L2;
+
 	return PKT_HASH_TYPE_NONE;
 }
 
@@ -54,7 +67,7 @@ static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
  */
 static void
 ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
-	    struct sk_buff *skb, u8 rx_ptype)
+	    struct sk_buff *skb, u16 rx_ptype)
 {
 	struct ice_32b_rx_flex_desc_nic *nic_mdid;
 	u32 hash;
@@ -81,7 +94,7 @@ ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
  */
 static void
 ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
-	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+	    union ice_32b_rx_flex_desc *rx_desc, u16 ptype)
 {
 	struct ice_rx_ptype_decoded decoded;
 	u16 rx_status0, rx_status1;
@@ -167,7 +180,7 @@ checksum_fail:
 void
 ice_process_skb_fields(struct ice_ring *rx_ring,
 		       union ice_32b_rx_flex_desc *rx_desc,
-		       struct sk_buff *skb, u8 ptype)
+		       struct sk_buff *skb, u16 ptype)
 {
 	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 58ff58f0f972..05ac30752902 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -53,7 +53,7 @@ void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val);
 void
 ice_process_skb_fields(struct ice_ring *rx_ring,
 		       union ice_32b_rx_flex_desc *rx_desc,
-		       struct sk_buff *skb, u8 ptype);
+		       struct sk_buff *skb, u16 ptype);
 void
 ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
 #endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 239b9bf10794..52acbe325db3 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -528,7 +528,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 		struct sk_buff *skb;
 		u16 stat_err_bits;
 		u16 vlan_tag = 0;
-		u8 rx_ptype;
+		u16 rx_ptype;
 
 		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
 

From c6e088bf30dccb9fb7b7df7c394a2fe10eb3a27a Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue, 23 Feb 2021 15:47:06 -0800
Subject: [PATCH 2154/3292] i40e: clean up packet type lookup table

Remove the unused ptype struct value, which makes table init easier for
the zero entries, and use ranged initializer to remove a bunch of code
(works with gcc and clang). There is no significant functional change.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_common.c | 124 +-----------------
 drivers/net/ethernet/intel/i40e/i40e_type.h   |   1 -
 2 files changed, 6 insertions(+), 119 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 67cb0b47416a..b4d3fed0d2f2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -552,9 +552,9 @@ i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw,
  * ENDIF
  */
 
-/* macro to make the table lines short */
+/* macro to make the table lines short, use explicit indexing with [PTYPE] */
 #define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
-	{	PTYPE, \
+	[PTYPE] = { \
 		1, \
 		I40E_RX_PTYPE_OUTER_##OUTER_IP, \
 		I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
@@ -565,16 +565,15 @@ i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw,
 		I40E_RX_PTYPE_INNER_PROT_##I, \
 		I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
 
-#define I40E_PTT_UNUSED_ENTRY(PTYPE) \
-		{ PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+#define I40E_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 
 /* shorter macros makes the table fit but are terse */
 #define I40E_RX_PTYPE_NOF		I40E_RX_PTYPE_NOT_FRAG
 #define I40E_RX_PTYPE_FRG		I40E_RX_PTYPE_FRAG
 #define I40E_RX_PTYPE_INNER_PROT_TS	I40E_RX_PTYPE_INNER_PROT_TIMESYNC
 
-/* Lookup table mapping the HW PTYPE to the bit field for decoding */
-struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
+/* Lookup table mapping in the 8-bit HW PTYPE to the bit field for decoding */
+struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = {
 	/* L2 Packet types */
 	I40E_PTT_UNUSED_ENTRY(0),
 	I40E_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
@@ -780,118 +779,7 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
 	I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
 
 	/* unused entries */
-	I40E_PTT_UNUSED_ENTRY(154),
-	I40E_PTT_UNUSED_ENTRY(155),
-	I40E_PTT_UNUSED_ENTRY(156),
-	I40E_PTT_UNUSED_ENTRY(157),
-	I40E_PTT_UNUSED_ENTRY(158),
-	I40E_PTT_UNUSED_ENTRY(159),
-
-	I40E_PTT_UNUSED_ENTRY(160),
-	I40E_PTT_UNUSED_ENTRY(161),
-	I40E_PTT_UNUSED_ENTRY(162),
-	I40E_PTT_UNUSED_ENTRY(163),
-	I40E_PTT_UNUSED_ENTRY(164),
-	I40E_PTT_UNUSED_ENTRY(165),
-	I40E_PTT_UNUSED_ENTRY(166),
-	I40E_PTT_UNUSED_ENTRY(167),
-	I40E_PTT_UNUSED_ENTRY(168),
-	I40E_PTT_UNUSED_ENTRY(169),
-
-	I40E_PTT_UNUSED_ENTRY(170),
-	I40E_PTT_UNUSED_ENTRY(171),
-	I40E_PTT_UNUSED_ENTRY(172),
-	I40E_PTT_UNUSED_ENTRY(173),
-	I40E_PTT_UNUSED_ENTRY(174),
-	I40E_PTT_UNUSED_ENTRY(175),
-	I40E_PTT_UNUSED_ENTRY(176),
-	I40E_PTT_UNUSED_ENTRY(177),
-	I40E_PTT_UNUSED_ENTRY(178),
-	I40E_PTT_UNUSED_ENTRY(179),
-
-	I40E_PTT_UNUSED_ENTRY(180),
-	I40E_PTT_UNUSED_ENTRY(181),
-	I40E_PTT_UNUSED_ENTRY(182),
-	I40E_PTT_UNUSED_ENTRY(183),
-	I40E_PTT_UNUSED_ENTRY(184),
-	I40E_PTT_UNUSED_ENTRY(185),
-	I40E_PTT_UNUSED_ENTRY(186),
-	I40E_PTT_UNUSED_ENTRY(187),
-	I40E_PTT_UNUSED_ENTRY(188),
-	I40E_PTT_UNUSED_ENTRY(189),
-
-	I40E_PTT_UNUSED_ENTRY(190),
-	I40E_PTT_UNUSED_ENTRY(191),
-	I40E_PTT_UNUSED_ENTRY(192),
-	I40E_PTT_UNUSED_ENTRY(193),
-	I40E_PTT_UNUSED_ENTRY(194),
-	I40E_PTT_UNUSED_ENTRY(195),
-	I40E_PTT_UNUSED_ENTRY(196),
-	I40E_PTT_UNUSED_ENTRY(197),
-	I40E_PTT_UNUSED_ENTRY(198),
-	I40E_PTT_UNUSED_ENTRY(199),
-
-	I40E_PTT_UNUSED_ENTRY(200),
-	I40E_PTT_UNUSED_ENTRY(201),
-	I40E_PTT_UNUSED_ENTRY(202),
-	I40E_PTT_UNUSED_ENTRY(203),
-	I40E_PTT_UNUSED_ENTRY(204),
-	I40E_PTT_UNUSED_ENTRY(205),
-	I40E_PTT_UNUSED_ENTRY(206),
-	I40E_PTT_UNUSED_ENTRY(207),
-	I40E_PTT_UNUSED_ENTRY(208),
-	I40E_PTT_UNUSED_ENTRY(209),
-
-	I40E_PTT_UNUSED_ENTRY(210),
-	I40E_PTT_UNUSED_ENTRY(211),
-	I40E_PTT_UNUSED_ENTRY(212),
-	I40E_PTT_UNUSED_ENTRY(213),
-	I40E_PTT_UNUSED_ENTRY(214),
-	I40E_PTT_UNUSED_ENTRY(215),
-	I40E_PTT_UNUSED_ENTRY(216),
-	I40E_PTT_UNUSED_ENTRY(217),
-	I40E_PTT_UNUSED_ENTRY(218),
-	I40E_PTT_UNUSED_ENTRY(219),
-
-	I40E_PTT_UNUSED_ENTRY(220),
-	I40E_PTT_UNUSED_ENTRY(221),
-	I40E_PTT_UNUSED_ENTRY(222),
-	I40E_PTT_UNUSED_ENTRY(223),
-	I40E_PTT_UNUSED_ENTRY(224),
-	I40E_PTT_UNUSED_ENTRY(225),
-	I40E_PTT_UNUSED_ENTRY(226),
-	I40E_PTT_UNUSED_ENTRY(227),
-	I40E_PTT_UNUSED_ENTRY(228),
-	I40E_PTT_UNUSED_ENTRY(229),
-
-	I40E_PTT_UNUSED_ENTRY(230),
-	I40E_PTT_UNUSED_ENTRY(231),
-	I40E_PTT_UNUSED_ENTRY(232),
-	I40E_PTT_UNUSED_ENTRY(233),
-	I40E_PTT_UNUSED_ENTRY(234),
-	I40E_PTT_UNUSED_ENTRY(235),
-	I40E_PTT_UNUSED_ENTRY(236),
-	I40E_PTT_UNUSED_ENTRY(237),
-	I40E_PTT_UNUSED_ENTRY(238),
-	I40E_PTT_UNUSED_ENTRY(239),
-
-	I40E_PTT_UNUSED_ENTRY(240),
-	I40E_PTT_UNUSED_ENTRY(241),
-	I40E_PTT_UNUSED_ENTRY(242),
-	I40E_PTT_UNUSED_ENTRY(243),
-	I40E_PTT_UNUSED_ENTRY(244),
-	I40E_PTT_UNUSED_ENTRY(245),
-	I40E_PTT_UNUSED_ENTRY(246),
-	I40E_PTT_UNUSED_ENTRY(247),
-	I40E_PTT_UNUSED_ENTRY(248),
-	I40E_PTT_UNUSED_ENTRY(249),
-
-	I40E_PTT_UNUSED_ENTRY(250),
-	I40E_PTT_UNUSED_ENTRY(251),
-	I40E_PTT_UNUSED_ENTRY(252),
-	I40E_PTT_UNUSED_ENTRY(253),
-	I40E_PTT_UNUSED_ENTRY(254),
-	I40E_PTT_UNUSED_ENTRY(255)
+	[154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index c81109a63e90..36a4ca1ffb1a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -804,7 +804,6 @@ enum i40e_rx_l2_ptype {
 };
 
 struct i40e_rx_ptype_decoded {
-	u32 ptype:8;
 	u32 known:1;
 	u32 outer_ip:1;
 	u32 outer_ip_ver:1;

From 37dc8fea8656f149e0fa5a03a2736b32350fe2b1 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue, 23 Feb 2021 15:47:07 -0800
Subject: [PATCH 2155/3292] iavf: clean up packet type lookup table

Remove the unused ptype struct value, which makes table init easier for
the zero entries, and use ranged initializer to remove a bunch of code
(works with gcc and clang). There is no significant functional change.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_common.c | 124 +-----------------
 drivers/net/ethernet/intel/iavf/iavf_type.h   |   1 -
 2 files changed, 6 insertions(+), 119 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
index 8547fc8fdfd6..e9cc7f6ddc46 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_common.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
@@ -522,9 +522,9 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
  * ENDIF
  */
 
-/* macro to make the table lines short */
+/* macro to make the table lines short, use explicit indexing with [PTYPE] */
 #define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
-	{	PTYPE, \
+	[PTYPE] = { \
 		1, \
 		IAVF_RX_PTYPE_OUTER_##OUTER_IP, \
 		IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \
@@ -535,16 +535,15 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
 		IAVF_RX_PTYPE_INNER_PROT_##I, \
 		IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL }
 
-#define IAVF_PTT_UNUSED_ENTRY(PTYPE) \
-		{ PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+#define IAVF_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 
 /* shorter macros makes the table fit but are terse */
 #define IAVF_RX_PTYPE_NOF		IAVF_RX_PTYPE_NOT_FRAG
 #define IAVF_RX_PTYPE_FRG		IAVF_RX_PTYPE_FRAG
 #define IAVF_RX_PTYPE_INNER_PROT_TS	IAVF_RX_PTYPE_INNER_PROT_TIMESYNC
 
-/* Lookup table mapping the HW PTYPE to the bit field for decoding */
-struct iavf_rx_ptype_decoded iavf_ptype_lookup[] = {
+/* Lookup table mapping the 8-bit HW PTYPE to the bit field for decoding */
+struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = {
 	/* L2 Packet types */
 	IAVF_PTT_UNUSED_ENTRY(0),
 	IAVF_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
@@ -750,118 +749,7 @@ struct iavf_rx_ptype_decoded iavf_ptype_lookup[] = {
 	IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
 
 	/* unused entries */
-	IAVF_PTT_UNUSED_ENTRY(154),
-	IAVF_PTT_UNUSED_ENTRY(155),
-	IAVF_PTT_UNUSED_ENTRY(156),
-	IAVF_PTT_UNUSED_ENTRY(157),
-	IAVF_PTT_UNUSED_ENTRY(158),
-	IAVF_PTT_UNUSED_ENTRY(159),
-
-	IAVF_PTT_UNUSED_ENTRY(160),
-	IAVF_PTT_UNUSED_ENTRY(161),
-	IAVF_PTT_UNUSED_ENTRY(162),
-	IAVF_PTT_UNUSED_ENTRY(163),
-	IAVF_PTT_UNUSED_ENTRY(164),
-	IAVF_PTT_UNUSED_ENTRY(165),
-	IAVF_PTT_UNUSED_ENTRY(166),
-	IAVF_PTT_UNUSED_ENTRY(167),
-	IAVF_PTT_UNUSED_ENTRY(168),
-	IAVF_PTT_UNUSED_ENTRY(169),
-
-	IAVF_PTT_UNUSED_ENTRY(170),
-	IAVF_PTT_UNUSED_ENTRY(171),
-	IAVF_PTT_UNUSED_ENTRY(172),
-	IAVF_PTT_UNUSED_ENTRY(173),
-	IAVF_PTT_UNUSED_ENTRY(174),
-	IAVF_PTT_UNUSED_ENTRY(175),
-	IAVF_PTT_UNUSED_ENTRY(176),
-	IAVF_PTT_UNUSED_ENTRY(177),
-	IAVF_PTT_UNUSED_ENTRY(178),
-	IAVF_PTT_UNUSED_ENTRY(179),
-
-	IAVF_PTT_UNUSED_ENTRY(180),
-	IAVF_PTT_UNUSED_ENTRY(181),
-	IAVF_PTT_UNUSED_ENTRY(182),
-	IAVF_PTT_UNUSED_ENTRY(183),
-	IAVF_PTT_UNUSED_ENTRY(184),
-	IAVF_PTT_UNUSED_ENTRY(185),
-	IAVF_PTT_UNUSED_ENTRY(186),
-	IAVF_PTT_UNUSED_ENTRY(187),
-	IAVF_PTT_UNUSED_ENTRY(188),
-	IAVF_PTT_UNUSED_ENTRY(189),
-
-	IAVF_PTT_UNUSED_ENTRY(190),
-	IAVF_PTT_UNUSED_ENTRY(191),
-	IAVF_PTT_UNUSED_ENTRY(192),
-	IAVF_PTT_UNUSED_ENTRY(193),
-	IAVF_PTT_UNUSED_ENTRY(194),
-	IAVF_PTT_UNUSED_ENTRY(195),
-	IAVF_PTT_UNUSED_ENTRY(196),
-	IAVF_PTT_UNUSED_ENTRY(197),
-	IAVF_PTT_UNUSED_ENTRY(198),
-	IAVF_PTT_UNUSED_ENTRY(199),
-
-	IAVF_PTT_UNUSED_ENTRY(200),
-	IAVF_PTT_UNUSED_ENTRY(201),
-	IAVF_PTT_UNUSED_ENTRY(202),
-	IAVF_PTT_UNUSED_ENTRY(203),
-	IAVF_PTT_UNUSED_ENTRY(204),
-	IAVF_PTT_UNUSED_ENTRY(205),
-	IAVF_PTT_UNUSED_ENTRY(206),
-	IAVF_PTT_UNUSED_ENTRY(207),
-	IAVF_PTT_UNUSED_ENTRY(208),
-	IAVF_PTT_UNUSED_ENTRY(209),
-
-	IAVF_PTT_UNUSED_ENTRY(210),
-	IAVF_PTT_UNUSED_ENTRY(211),
-	IAVF_PTT_UNUSED_ENTRY(212),
-	IAVF_PTT_UNUSED_ENTRY(213),
-	IAVF_PTT_UNUSED_ENTRY(214),
-	IAVF_PTT_UNUSED_ENTRY(215),
-	IAVF_PTT_UNUSED_ENTRY(216),
-	IAVF_PTT_UNUSED_ENTRY(217),
-	IAVF_PTT_UNUSED_ENTRY(218),
-	IAVF_PTT_UNUSED_ENTRY(219),
-
-	IAVF_PTT_UNUSED_ENTRY(220),
-	IAVF_PTT_UNUSED_ENTRY(221),
-	IAVF_PTT_UNUSED_ENTRY(222),
-	IAVF_PTT_UNUSED_ENTRY(223),
-	IAVF_PTT_UNUSED_ENTRY(224),
-	IAVF_PTT_UNUSED_ENTRY(225),
-	IAVF_PTT_UNUSED_ENTRY(226),
-	IAVF_PTT_UNUSED_ENTRY(227),
-	IAVF_PTT_UNUSED_ENTRY(228),
-	IAVF_PTT_UNUSED_ENTRY(229),
-
-	IAVF_PTT_UNUSED_ENTRY(230),
-	IAVF_PTT_UNUSED_ENTRY(231),
-	IAVF_PTT_UNUSED_ENTRY(232),
-	IAVF_PTT_UNUSED_ENTRY(233),
-	IAVF_PTT_UNUSED_ENTRY(234),
-	IAVF_PTT_UNUSED_ENTRY(235),
-	IAVF_PTT_UNUSED_ENTRY(236),
-	IAVF_PTT_UNUSED_ENTRY(237),
-	IAVF_PTT_UNUSED_ENTRY(238),
-	IAVF_PTT_UNUSED_ENTRY(239),
-
-	IAVF_PTT_UNUSED_ENTRY(240),
-	IAVF_PTT_UNUSED_ENTRY(241),
-	IAVF_PTT_UNUSED_ENTRY(242),
-	IAVF_PTT_UNUSED_ENTRY(243),
-	IAVF_PTT_UNUSED_ENTRY(244),
-	IAVF_PTT_UNUSED_ENTRY(245),
-	IAVF_PTT_UNUSED_ENTRY(246),
-	IAVF_PTT_UNUSED_ENTRY(247),
-	IAVF_PTT_UNUSED_ENTRY(248),
-	IAVF_PTT_UNUSED_ENTRY(249),
-
-	IAVF_PTT_UNUSED_ENTRY(250),
-	IAVF_PTT_UNUSED_ENTRY(251),
-	IAVF_PTT_UNUSED_ENTRY(252),
-	IAVF_PTT_UNUSED_ENTRY(253),
-	IAVF_PTT_UNUSED_ENTRY(254),
-	IAVF_PTT_UNUSED_ENTRY(255)
+	[154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
index de9fda78b43a..9f1f523807c4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_type.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
@@ -370,7 +370,6 @@ enum iavf_rx_l2_ptype {
 };
 
 struct iavf_rx_ptype_decoded {
-	u32 ptype:8;
 	u32 known:1;
 	u32 outer_ip:1;
 	u32 outer_ip_ver:1;

From a9c4cf299f5f79d5016c8a9646fa1fc49381a8c1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 16:41:27 +0300
Subject: [PATCH 2156/3292] ACPI: sysfs: Use __ATTR_RO() and __ATTR_RW() macros

We have a few open-coded __ATTR_RO() and __ATTR_RW() macros.
Replace the custom code with generic macros.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index f02fe25c6099..5358c5635163 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -909,27 +909,22 @@ static void __exit interrupt_stats_exit(void)
 	delete_gpe_attr_array();
 }
 
-static ssize_t
-acpi_show_profile(struct kobject *kobj, struct kobj_attribute *attr,
-		  char *buf)
+static ssize_t pm_profile_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%d\n", acpi_gbl_FADT.preferred_profile);
 }
 
-static const struct kobj_attribute pm_profile_attr =
-	__ATTR(pm_profile, S_IRUGO, acpi_show_profile, NULL);
+static const struct kobj_attribute pm_profile_attr = __ATTR_RO(pm_profile);
 
-static ssize_t hotplug_enabled_show(struct kobject *kobj,
-				    struct kobj_attribute *attr, char *buf)
+static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 {
 	struct acpi_hotplug_profile *hotplug = to_acpi_hotplug_profile(kobj);
 
 	return sprintf(buf, "%d\n", hotplug->enabled);
 }
 
-static ssize_t hotplug_enabled_store(struct kobject *kobj,
-				     struct kobj_attribute *attr,
-				     const char *buf, size_t size)
+static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
+			     const char *buf, size_t size)
 {
 	struct acpi_hotplug_profile *hotplug = to_acpi_hotplug_profile(kobj);
 	unsigned int val;
@@ -941,9 +936,7 @@ static ssize_t hotplug_enabled_store(struct kobject *kobj,
 	return size;
 }
 
-static struct kobj_attribute hotplug_enabled_attr =
-	__ATTR(enabled, S_IRUGO | S_IWUSR, hotplug_enabled_show,
-		hotplug_enabled_store);
+static struct kobj_attribute hotplug_enabled_attr = __ATTR_RW(enabled);
 
 static struct attribute *hotplug_profile_attrs[] = {
 	&hotplug_enabled_attr.attr,
@@ -1001,9 +994,7 @@ static ssize_t force_remove_store(struct kobject *kobj,
 	return size;
 }
 
-static const struct kobj_attribute force_remove_attr =
-	__ATTR(force_remove, S_IRUGO | S_IWUSR, force_remove_show,
-	       force_remove_store);
+static const struct kobj_attribute force_remove_attr = __ATTR_RW(force_remove);
 
 int __init acpi_sysfs_init(void)
 {

From df35ee70864111c20ecb36745ffc5f821301d0e7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 16:41:28 +0300
Subject: [PATCH 2157/3292] ACPI: sysfs: Remove tailing return statement in
 void function

The tail return statement is redundant in void functions. Remove it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sysfs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 5358c5635163..4b4f2a0ab981 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -899,7 +899,6 @@ void acpi_irq_stats_init(void)
 
 fail:
 	delete_gpe_attr_array();
-	return;
 }
 
 static void __exit interrupt_stats_exit(void)

From 904d4a6c074b9d69b673c7cd7d66f55cfa7610ea Mon Sep 17 00:00:00 2001
From: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Date: Thu, 17 Jun 2021 11:42:08 -0500
Subject: [PATCH 2158/3292] ACPI: PM: s2idle: Use correct revision id

AMD spec mentions only revision 0. With this change,
device constraint list is populated properly.

Signed-off-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Tested-by: Julian Sikorski <belegdol@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 2d7ddb8a8cb6..da27c1c45c9f 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -96,7 +96,7 @@ static void lpi_device_get_constraints_amd(void)
 	int i, j, k;
 
 	out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid,
-					  1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS,
+					  rev_id, ACPI_LPS0_GET_DEVICE_CONSTRAINTS,
 					  NULL, ACPI_TYPE_PACKAGE);
 
 	if (!out_obj)

From 4a012dc82d504f9b6a9654e4a28d1938c9a6cb2f Mon Sep 17 00:00:00 2001
From: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Date: Thu, 17 Jun 2021 11:42:09 -0500
Subject: [PATCH 2159/3292] ACPI: PM: s2idle: Refactor common code

Refactor common code to prepare for upcoming changes.
 * Remove unused struct.
 * Print error before returning.
 * Frees ACPI obj if _DSM type is not as expected.
 * Treat lps0_dsm_func_mask as an integer rather than character
 * Remove extra out_obj
 * Move rev_id

Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Tested-by: Julian Sikorski <belegdol@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 67 ++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index da27c1c45c9f..c0cba025072f 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -49,7 +49,7 @@ static const struct acpi_device_id lps0_device_ids[] = {
 
 static acpi_handle lps0_device_handle;
 static guid_t lps0_dsm_guid;
-static char lps0_dsm_func_mask;
+static int lps0_dsm_func_mask;
 
 /* Device constraint entry structure */
 struct lpi_device_info {
@@ -70,15 +70,7 @@ struct lpi_constraints {
 	int min_dstate;
 };
 
-/* AMD */
-/* Device constraint entry structure */
-struct lpi_device_info_amd {
-	int revision;
-	int count;
-	union acpi_object *package;
-};
-
-/* Constraint package structure */
+/* AMD Constraint package structure */
 struct lpi_device_constraint_amd {
 	char *name;
 	int enabled;
@@ -99,12 +91,12 @@ static void lpi_device_get_constraints_amd(void)
 					  rev_id, ACPI_LPS0_GET_DEVICE_CONSTRAINTS,
 					  NULL, ACPI_TYPE_PACKAGE);
 
-	if (!out_obj)
-		return;
-
 	acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n",
 			  out_obj ? "successful" : "failed");
 
+	if (!out_obj)
+		return;
+
 	for (i = 0; i < out_obj->package.count; i++) {
 		union acpi_object *package = &out_obj->package.elements[i];
 
@@ -336,11 +328,33 @@ static bool acpi_s2idle_vendor_amd(void)
 	return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
 }
 
+static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *dsm_guid)
+{
+	union acpi_object *obj;
+	int ret = -EINVAL;
+
+	guid_parse(uuid, dsm_guid);
+	obj = acpi_evaluate_dsm(handle, dsm_guid, rev, 0, NULL);
+
+	/* Check if the _DSM is present and as expected. */
+	if (!obj || obj->type != ACPI_TYPE_BUFFER || obj->buffer.length == 0 ||
+	    obj->buffer.length > sizeof(u32)) {
+		acpi_handle_debug(handle,
+				"_DSM UUID %s rev %d function 0 evaluation failed\n", uuid, rev);
+		goto out;
+	}
+
+	ret = *(int *)obj->buffer.pointer;
+	acpi_handle_debug(handle, "_DSM UUID %s rev %d function mask: 0x%x\n", uuid, rev, ret);
+
+out:
+	ACPI_FREE(obj);
+	return ret;
+}
+
 static int lps0_device_attach(struct acpi_device *adev,
 			      const struct acpi_device_id *not_used)
 {
-	union acpi_object *out_obj;
-
 	if (lps0_device_handle)
 		return 0;
 
@@ -348,28 +362,17 @@ static int lps0_device_attach(struct acpi_device *adev,
 		return 0;
 
 	if (acpi_s2idle_vendor_amd()) {
-		guid_parse(ACPI_LPS0_DSM_UUID_AMD, &lps0_dsm_guid);
-		out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 0, 0, NULL);
 		rev_id = 0;
+		lps0_dsm_func_mask = validate_dsm(adev->handle,
+					ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
 	} else {
-		guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid);
-		out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL);
 		rev_id = 1;
+		lps0_dsm_func_mask = validate_dsm(adev->handle,
+					ACPI_LPS0_DSM_UUID, rev_id, &lps0_dsm_guid);
 	}
 
-	/* Check if the _DSM is present and as expected. */
-	if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER) {
-		acpi_handle_debug(adev->handle,
-				  "_DSM function 0 evaluation failed\n");
-		return 0;
-	}
-
-	lps0_dsm_func_mask = *(char *)out_obj->buffer.pointer;
-
-	ACPI_FREE(out_obj);
-
-	acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
-			  lps0_dsm_func_mask);
+	if (lps0_dsm_func_mask < 0)
+		return 0;//function eval failed
 
 	lps0_device_handle = adev->handle;
 

From 3f4b116c0b3955a9c30479c1d8177874b2e828e6 Mon Sep 17 00:00:00 2001
From: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Date: Thu, 17 Jun 2021 11:42:10 -0500
Subject: [PATCH 2160/3292] ACPI: PM: s2idle: Add support for multiple func
 mask

Required for follow-up patch adding new UUID needing new function
mask.

Signed-off-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Tested-by: Julian Sikorski <belegdol@gmail.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index c0cba025072f..0d19669ac7ad 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -309,14 +309,15 @@ static void lpi_check_constraints(void)
 	}
 }
 
-static void acpi_sleep_run_lps0_dsm(unsigned int func)
+static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, guid_t dsm_guid)
 {
 	union acpi_object *out_obj;
 
-	if (!(lps0_dsm_func_mask & (1 << func)))
+	if (!(func_mask & (1 << func)))
 		return;
 
-	out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, rev_id, func, NULL);
+	out_obj = acpi_evaluate_dsm(lps0_device_handle, &dsm_guid,
+					rev_id, func, NULL);
 	ACPI_FREE(out_obj);
 
 	acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n",
@@ -412,11 +413,15 @@ int acpi_s2idle_prepare_late(void)
 		lpi_check_constraints();
 
 	if (acpi_s2idle_vendor_amd()) {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY_AMD);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD,
+				lps0_dsm_func_mask, lps0_dsm_guid);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY_AMD,
+				lps0_dsm_func_mask, lps0_dsm_guid);
 	} else {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF,
+				lps0_dsm_func_mask, lps0_dsm_guid);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY,
+				lps0_dsm_func_mask, lps0_dsm_guid);
 	}
 
 	return 0;
@@ -428,11 +433,15 @@ void acpi_s2idle_restore_early(void)
 		return;
 
 	if (acpi_s2idle_vendor_amd()) {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT_AMD);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT_AMD,
+				lps0_dsm_func_mask, lps0_dsm_guid);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD,
+				lps0_dsm_func_mask, lps0_dsm_guid);
 	} else {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT,
+				lps0_dsm_func_mask, lps0_dsm_guid);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON,
+				lps0_dsm_func_mask, lps0_dsm_guid);
 	}
 }
 

From 5dbf509975780851251361f2db287fdce11b7cae Mon Sep 17 00:00:00 2001
From: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Date: Thu, 17 Jun 2021 11:42:11 -0500
Subject: [PATCH 2161/3292] ACPI: PM: s2idle: Add support for new Microsoft
 UUID

This adds supports for _DSM notifications to the Microsoft UUID
described by Microsoft documentation for s2idle.

Link: https://docs.microsoft.com/en-us/windows-hardware/design/device-experiences/modern-standby-firmware-notifications
Co-developed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Pratik Vishwakarma <Pratik.Vishwakarma@amd.com>
Tested-by: Julian Sikorski <belegdol@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 0d19669ac7ad..3f2a90648ec9 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -32,6 +32,9 @@ static const struct acpi_device_id lps0_device_ids[] = {
 	{"", },
 };
 
+/* Microsoft platform agnostic UUID */
+#define ACPI_LPS0_DSM_UUID_MICROSOFT      "11e00d56-ce64-47ce-837b-1f898f9aa461"
+
 #define ACPI_LPS0_DSM_UUID	"c4eb40a0-6cd2-11e2-bcfd-0800200c9a66"
 
 #define ACPI_LPS0_GET_DEVICE_CONSTRAINTS	1
@@ -39,6 +42,8 @@ static const struct acpi_device_id lps0_device_ids[] = {
 #define ACPI_LPS0_SCREEN_ON	4
 #define ACPI_LPS0_ENTRY		5
 #define ACPI_LPS0_EXIT		6
+#define ACPI_LPS0_MS_ENTRY      7
+#define ACPI_LPS0_MS_EXIT       8
 
 /* AMD */
 #define ACPI_LPS0_DSM_UUID_AMD      "e3f32452-febc-43ce-9039-932122d37721"
@@ -51,6 +56,9 @@ static acpi_handle lps0_device_handle;
 static guid_t lps0_dsm_guid;
 static int lps0_dsm_func_mask;
 
+static guid_t lps0_dsm_guid_microsoft;
+static int lps0_dsm_func_mask_microsoft;
+
 /* Device constraint entry structure */
 struct lpi_device_info {
 	char *name;
@@ -366,14 +374,18 @@ static int lps0_device_attach(struct acpi_device *adev,
 		rev_id = 0;
 		lps0_dsm_func_mask = validate_dsm(adev->handle,
 					ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
+		lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle,
+					ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id,
+					&lps0_dsm_guid_microsoft);
 	} else {
 		rev_id = 1;
 		lps0_dsm_func_mask = validate_dsm(adev->handle,
 					ACPI_LPS0_DSM_UUID, rev_id, &lps0_dsm_guid);
+		lps0_dsm_func_mask_microsoft = -EINVAL;
 	}
 
-	if (lps0_dsm_func_mask < 0)
-		return 0;//function eval failed
+	if (lps0_dsm_func_mask < 0 && lps0_dsm_func_mask_microsoft < 0)
+		return 0; //function evaluation failed
 
 	lps0_device_handle = adev->handle;
 
@@ -412,7 +424,14 @@ int acpi_s2idle_prepare_late(void)
 	if (pm_debug_messages_on)
 		lpi_check_constraints();
 
-	if (acpi_s2idle_vendor_amd()) {
+	if (lps0_dsm_func_mask_microsoft > 0) {
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF,
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT,
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY,
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+	} else if (acpi_s2idle_vendor_amd()) {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD,
 				lps0_dsm_func_mask, lps0_dsm_guid);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY_AMD,
@@ -432,7 +451,14 @@ void acpi_s2idle_restore_early(void)
 	if (!lps0_device_handle || sleep_no_lps0)
 		return;
 
-	if (acpi_s2idle_vendor_amd()) {
+	if (lps0_dsm_func_mask_microsoft > 0) {
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT,
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY,
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON,
+				lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+	} else if (acpi_s2idle_vendor_amd()) {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT_AMD,
 				lps0_dsm_func_mask, lps0_dsm_guid);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD,

From 8fbd6c15ea0a1d5e5d4e8ce4cc31e31afbcc1678 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Thu, 17 Jun 2021 11:42:12 -0500
Subject: [PATCH 2162/3292] ACPI: PM: Adjust behavior for field problems on AMD
 systems

Some AMD Systems with uPEP _HID AMD004/AMDI005 have an off by one bug
in their function mask return.  This means that they will call entrance
but not exit for matching functions.

Other AMD systems with this HID should use the Microsoft generic UUID.

AMD systems with uPEP HID AMDI006 should be using the Microsoft method.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Tested-by: Julian Sikorski <belegdol@gmail.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/x86/s2idle.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 3f2a90648ec9..816bf2c34b7a 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -371,12 +371,27 @@ static int lps0_device_attach(struct acpi_device *adev,
 		return 0;
 
 	if (acpi_s2idle_vendor_amd()) {
+		/* AMD0004, AMDI0005:
+		 * - Should use rev_id 0x0
+		 * - function mask > 0x3: Should use AMD method, but has off by one bug
+		 * - function mask = 0x3: Should use Microsoft method
+		 * AMDI0006:
+		 * - should use rev_id 0x0
+		 * - function mask = 0x3: Should use Microsoft method
+		 */
+		const char *hid = acpi_device_hid(adev);
 		rev_id = 0;
 		lps0_dsm_func_mask = validate_dsm(adev->handle,
 					ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
 		lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle,
 					ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id,
 					&lps0_dsm_guid_microsoft);
+		if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") ||
+						 !strcmp(hid, "AMDI0005"))) {
+			lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1;
+			acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n",
+					  ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask);
+		}
 	} else {
 		rev_id = 1;
 		lps0_dsm_func_mask = validate_dsm(adev->handle,

From a79da21b48cc5f81b047ae4e70b4d9cb49c93a6a Mon Sep 17 00:00:00 2001
From: lijiazi <jqqlijiazi@gmail.com>
Date: Fri, 18 Jun 2021 11:17:20 +0800
Subject: [PATCH 2163/3292] blk-wbt: remove outdated comment

Now wbt_wait() returns void, so remove now outdated comment.

Signed-off-by: lijiazi <lijiazi@xiaomi.com>
Link: https://lore.kernel.org/r/1623986240-13878-1-git-send-email-lijiazi@xiaomi.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-wbt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 42aed0160f86..b363b0532704 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -563,7 +563,6 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
 }
 
 /*
- * Returns true if the IO request should be accounted, false if not.
  * May sleep, if we have exceeded the writeback limits. Caller can pass
  * in an irq held spinlock, if it holds one when calling this function.
  * If we do sleep, we'll release and re-grab it.

From 85102ba58b4125ebad941d7555c3c248b23efd16 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 16 Jun 2021 12:23:24 +0800
Subject: [PATCH 2164/3292] samples/bpf: Fix Segmentation fault for
 xdp_redirect command

A Segmentation fault error is caused when the following command
is executed.

$ sudo ./samples/bpf/xdp_redirect lo
Segmentation fault

This command is missing a device <IFNAME|IFINDEX> as an argument, resulting
in out-of-bounds access from argv.

If the number of devices for the xdp_redirect parameter is not 2,
we should report an error and exit.

Fixes: 24251c264798 ("samples/bpf: add option for native and skb mode for redirect apps")
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210616042324.314832-1-wanghai38@huawei.com
---
 samples/bpf/xdp_redirect_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 41d705c3a1f7..eb876629109a 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -130,7 +130,7 @@ int main(int argc, char **argv)
 	if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
 		xdp_flags |= XDP_FLAGS_DRV_MODE;
 
-	if (optind == argc) {
+	if (optind + 2 != argc) {
 		printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
 		return 1;
 	}

From 7c6090ee2a7b3315410cfc83a94c3eb057407b25 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 16 Jun 2021 12:25:34 +0800
Subject: [PATCH 2165/3292] samples/bpf: Fix the error return code of
 xdp_redirect's main()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

If bpf_map_update_elem() failed, main() should return a negative error.

Fixes: 832622e6bd18 ("xdp: sample program for new bpf_redirect helper")
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210616042534.315097-1-wanghai38@huawei.com
---
 samples/bpf/xdp_redirect_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index eb876629109a..93854e135134 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -213,5 +213,5 @@ int main(int argc, char **argv)
 	poll_stats(2, ifindex_out);
 
 out:
-	return 0;
+	return ret;
 }

From 275b51c27cc382325cf833dfbe1ce44071c2e2c4 Mon Sep 17 00:00:00 2001
From: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Date: Thu, 17 Jun 2021 14:36:32 +0300
Subject: [PATCH 2166/3292] drivers: net: netdevsim: fix devlink_trap selftests
 failing

devlink_trap tests for the netdevsim fail due to misspelled
debugfs file name. Change this name, as well as name of callback
function, to match the naming as in the devlink itself - 'trap_drop_counter'.

Test-results:
selftests: drivers/net/netdevsim: devlink_trap.sh
TEST: Initialization                                                [ OK ]
TEST: Trap action                                                   [ OK ]
TEST: Trap metadata                                                 [ OK ]
TEST: Non-existing trap                                             [ OK ]
TEST: Non-existing trap action                                      [ OK ]
TEST: Trap statistics                                               [ OK ]
TEST: Trap group action                                             [ OK ]
TEST: Non-existing trap group                                       [ OK ]
TEST: Trap group statistics                                         [ OK ]
TEST: Trap policer                                                  [ OK ]
TEST: Trap policer binding                                          [ OK ]
TEST: Port delete                                                   [ OK ]
TEST: Device delete                                                 [ OK ]

Fixes: a7b3527a43fe ("drivers: net: netdevsim: add devlink trap_drop_counter_get implementation")
Signed-off-by: Oleksandr Mazur <oleksandr.mazur@plvision.eu>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c       | 14 +++++++-------
 drivers/net/netdevsim/netdevsim.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index d85521989753..6348307bfa84 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -269,9 +269,9 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
 		err = PTR_ERR(nsim_dev->nodes_ddir);
 		goto err_out;
 	}
-	debugfs_create_bool("fail_trap_counter_get", 0600,
+	debugfs_create_bool("fail_trap_drop_counter_get", 0600,
 			    nsim_dev->ddir,
-			    &nsim_dev->fail_trap_counter_get);
+			    &nsim_dev->fail_trap_drop_counter_get);
 	nsim_udp_tunnels_debugfs_create(nsim_dev);
 	return 0;
 
@@ -1208,14 +1208,14 @@ static int nsim_rate_node_parent_set(struct devlink_rate *child,
 }
 
 static int
-nsim_dev_devlink_trap_hw_counter_get(struct devlink *devlink,
-				     const struct devlink_trap *trap,
-				     u64 *p_drops)
+nsim_dev_devlink_trap_drop_counter_get(struct devlink *devlink,
+				       const struct devlink_trap *trap,
+				       u64 *p_drops)
 {
 	struct nsim_dev *nsim_dev = devlink_priv(devlink);
 	u64 *cnt;
 
-	if (nsim_dev->fail_trap_counter_get)
+	if (nsim_dev->fail_trap_drop_counter_get)
 		return -EINVAL;
 
 	cnt = &nsim_dev->trap_data->trap_pkt_cnt;
@@ -1247,7 +1247,7 @@ static const struct devlink_ops nsim_dev_devlink_ops = {
 	.rate_node_del = nsim_rate_node_del,
 	.rate_leaf_parent_set = nsim_rate_leaf_parent_set,
 	.rate_node_parent_set = nsim_rate_node_parent_set,
-	.trap_drop_counter_get = nsim_dev_devlink_trap_hw_counter_get,
+	.trap_drop_counter_get = nsim_dev_devlink_trap_drop_counter_get,
 };
 
 #define NSIM_DEV_MAX_MACS_DEFAULT 32
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index f2304e61919a..ae462957dcee 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -249,7 +249,7 @@ struct nsim_dev {
 	bool fail_trap_group_set;
 	bool fail_trap_policer_set;
 	bool fail_trap_policer_counter_get;
-	bool fail_trap_counter_get;
+	bool fail_trap_drop_counter_get;
 	struct {
 		struct udp_tunnel_nic_shared utn_shared;
 		u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];

From d1434cf513583a6abe5b65f1824c741e9e7af764 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 17 Jun 2021 13:14:49 +0100
Subject: [PATCH 2167/3292] net: neterion: vxge: remove redundant continue
 statement

The continue statement at the end of a for-loop has no effect,
invert the if expression and remove the continue.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/neterion/vxge/vxge-main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 0528b8f49061..82eef4c72f01 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3678,10 +3678,9 @@ static int vxge_config_vpaths(struct vxge_hw_device_config *device_config,
 			driver_config->vpath_per_dev = 1;
 
 		for (i = 0; i < VXGE_HW_MAX_VIRTUAL_PATHS; i++)
-			if (!vxge_bVALn(vpath_mask, i, 1))
-				continue;
-			else
+			if (vxge_bVALn(vpath_mask, i, 1))
 				default_no_vpath++;
+
 		if (default_no_vpath < driver_config->vpath_per_dev)
 			driver_config->vpath_per_dev = default_no_vpath;
 

From 79ab2b37034b7790bd598597faddf689f5b10676 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 17 Jun 2021 18:55:51 +0300
Subject: [PATCH 2168/3292] Documentation: ACPI: DSD: include phy.rst in the
 toctree

Include the new phy.rst into the index of the ACPI support
documentation.

Fixes: e71305acd81c ("Documentation: ACPI: DSD: Document MDIO PHY")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/firmware-guide/acpi/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst
index f72b5f1769fb..a99ee402b212 100644
--- a/Documentation/firmware-guide/acpi/index.rst
+++ b/Documentation/firmware-guide/acpi/index.rst
@@ -11,6 +11,7 @@ ACPI Support
    dsd/graph
    dsd/data-node-references
    dsd/leds
+   dsd/phy
    enumeration
    osi
    method-customizing

From 5a336f97f1f5011cdca5467ef96372fd6d2fd128 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 17 Jun 2021 18:55:52 +0300
Subject: [PATCH 2169/3292] Documentation: ACPI: DSD: fix block code comments

Use the '.. code-block:: none' to properly highlight the documented DSDT
entries. This also fixes warnings in the documentation build process.

Fixes: e71305acd81c ("Documentation: ACPI: DSD: Document MDIO PHY")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/firmware-guide/acpi/dsd/phy.rst | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst
index 7d01ae8b3cc6..0d49bad2ea9c 100644
--- a/Documentation/firmware-guide/acpi/dsd/phy.rst
+++ b/Documentation/firmware-guide/acpi/dsd/phy.rst
@@ -27,7 +27,8 @@ network interfaces that have PHYs connected to MAC via MDIO bus.
 During the MDIO bus driver initialization, PHYs on this bus are probed
 using the _ADR object as shown below and are registered on the MDIO bus.
 
-::
+.. code-block:: none
+
       Scope(\_SB.MDI0)
       {
         Device(PHY1) {
@@ -60,7 +61,9 @@ component (PHYs on the MDIO bus).
 a) Silicon Component
 This node describes the MDIO controller, MDI0
 ---------------------------------------------
-::
+
+.. code-block:: none
+
 	Scope(_SB)
 	{
 	  Device(MDI0) {
@@ -80,7 +83,9 @@ This node describes the MDIO controller, MDI0
 b) Platform Component
 The PHY1 and PHY2 nodes represent the PHYs connected to MDIO bus MDI0
 ---------------------------------------------------------------------
-::
+
+.. code-block:: none
+
 	Scope(\_SB.MDI0)
 	{
 	  Device(PHY1) {
@@ -98,7 +103,9 @@ DSDT entries representing MAC nodes
 Below are the MAC nodes where PHY nodes are referenced.
 phy-mode and phy-handle are used as explained earlier.
 ------------------------------------------------------
-::
+
+.. code-block:: none
+
 	Scope(\_SB.MCE0.PR17)
 	{
 	  Name (_DSD, Package () {

From 8b532109bf885b7b59b93487bc4672eb6d071b78 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Thu, 17 Jun 2021 19:16:44 +0200
Subject: [PATCH 2170/3292] seg6: add support for SRv6 End.DT46 Behavior

IETF RFC 8986 [1] includes the definition of SRv6 End.DT4, End.DT6, and
End.DT46 Behaviors.

The current SRv6 code in the Linux kernel only implements End.DT4 and
End.DT6 which can be used respectively to support IPv4-in-IPv6 and
IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.

The proposed End.DT46 implementation is meant to support the decapsulation
of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
The implementation of the SRv6 End.DT46 Behavior in the Linux kernel
greatly simplifies the setup and operations of SRv6 VPNs.

The SRv6 End.DT46 Behavior leverages the infrastructure of SRv6 End.DT{4,6}
Behaviors implemented so far, because it makes use of a VRF device in
order to force the routing lookup into the associated routing table.

To make the End.DT46 work properly, it must be guaranteed that the routing
table used for routing lookup operations is bound to one and only one VRF
during the tunnel creation. Such constraint has to be enforced by enabling
the VRF strict_mode sysctl parameter, i.e.:

 $ sysctl -wq net.vrf.strict_mode=1

Note that the same approach is used for the SRv6 End.DT4 Behavior and for
the End.DT6 Behavior in VRF mode.

The command used to instantiate an SRv6 End.DT46 Behavior is
straightforward, i.e.:

 $ ip -6 route add 2001:db8::1 encap seg6local action End.DT46 vrftable 100 dev vrf100.

[1] https://www.rfc-editor.org/rfc/rfc8986.html#name-enddt46-decapsulation-and-s

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Performance and impact of SRv6 End.DT46 Behavior on the SRv6 Networking
=======================================================================

This patch aims to add the SRv6 End.DT46 Behavior with minimal impact on
the performance of SRv6 End.DT4 and End.DT6 Behaviors.
In order to verify this, we tested the performance of the newly introduced
SRv6 End.DT46 Behavior and compared it with the performance of SRv6
End.DT{4,6} Behaviors, considering both the patched kernel and the kernel
before applying the End.DT46 patch (referred to as vanilla kernel).

In details, the following decapsulation scenarios were considered:

 1.a) IPv6 traffic in SRv6 End.DT46 Behavior on patched kernel;
 1.b) IPv4 traffic in SRv6 End.DT46 Behavior on patched kernel;
 2.a) SRv6 End.DT6 Behavior (VRF mode) on patched kernel;
 2.b) SRv6 End.DT4 Behavior on patched kernel;
 3.a) SRv6 End.DT6 Behavior (VRF mode) on vanilla kernel (without the
      End.DT46 patch);
 3.b) SRv6 End.DT4 Behavior on vanilla kernel (without the End.DT46 patch).

All tests were performed on a testbed deployed on the CloudLab [2]
facilities. We considered IPv{4,6} traffic handled by a single core (at 2.4
GHz on a Xeon(R) CPU E5-2630 v3) on kernel 5.13-rc1 using packets of size
~ 100 bytes.

Scenario (1.a): average 684.70 kpps; std. dev. 0.7 kpps;
Scenario (1.b): average 711.69 kpps; std. dev. 1.2 kpps;
Scenario (2.a): average 690.70 kpps; std. dev. 1.2 kpps;
Scenario (2.b): average 722.22 kpps; std. dev. 1.7 kpps;
Scenario (3.a): average 690.02 kpps; std. dev. 2.6 kpps;
Scenario (3.b): average 721.91 kpps; std. dev. 1.2 kpps;

Considering the results for the patched kernel (1.a, 1.b, 2.a, 2.b) we
observe that the performance degradation incurred in using End.DT46 rather
than End.DT6 and End.DT4 respectively for IPv6 and IPv4 traffic is minimal,
around 0.9% and 1.5%. Such very minimal performance degradation is the
price to be paid if one prefers to use a single tunnel capable of handling
both types of traffic (IPv4 and IPv6).

Comparing the results for End.DT4 and End.DT6 under the patched and the
vanilla kernel (2.a, 2.b, 3.a, 3.b) we observe that the introduction of the
End.DT46 patch has no impact on the performance of End.DT4 and End.DT6.

[2] https://www.cloudlab.us

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/seg6_local.h |  2 +
 net/ipv6/seg6_local.c           | 94 +++++++++++++++++++++++++--------
 2 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index 5ae3ace84de0..332b18f318f8 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -64,6 +64,8 @@ enum {
 	SEG6_LOCAL_ACTION_END_AM	= 14,
 	/* custom BPF action */
 	SEG6_LOCAL_ACTION_END_BPF	= 15,
+	/* decap and lookup of DA in v4 or v6 table */
+	SEG6_LOCAL_ACTION_END_DT46	= 16,
 
 	__SEG6_LOCAL_ACTION_MAX,
 };
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 4ff38cb08f4b..60bf3b877957 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -87,10 +87,10 @@ struct seg6_end_dt_info {
 	int vrf_ifindex;
 	int vrf_table;
 
-	/* tunneled packet proto and family (IPv4 or IPv6) */
-	__be16 proto;
+	/* tunneled packet family (IPv4 or IPv6).
+	 * Protocol and header length are inferred from family.
+	 */
 	u16 family;
-	int hdrlen;
 };
 
 struct pcpu_seg6_local_counters {
@@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
 	info->net = net;
 	info->vrf_ifindex = vrf_ifindex;
 
-	switch (family) {
-	case AF_INET:
-		info->proto = htons(ETH_P_IP);
-		info->hdrlen = sizeof(struct iphdr);
-		break;
-	case AF_INET6:
-		info->proto = htons(ETH_P_IPV6);
-		info->hdrlen = sizeof(struct ipv6hdr);
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	info->family = family;
 	info->mode = DT_VRF_MODE;
 
@@ -622,22 +609,44 @@ error:
 }
 
 static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
-				       struct seg6_local_lwt *slwt)
+				       struct seg6_local_lwt *slwt, u16 family)
 {
 	struct seg6_end_dt_info *info = &slwt->dt_info;
 	struct net_device *vrf;
+	__be16 protocol;
+	int hdrlen;
 
 	vrf = end_dt_get_vrf_rcu(skb, info);
 	if (unlikely(!vrf))
 		goto drop;
 
-	skb->protocol = info->proto;
+	switch (family) {
+	case AF_INET:
+		protocol = htons(ETH_P_IP);
+		hdrlen = sizeof(struct iphdr);
+		break;
+	case AF_INET6:
+		protocol = htons(ETH_P_IPV6);
+		hdrlen = sizeof(struct ipv6hdr);
+		break;
+	case AF_UNSPEC:
+		fallthrough;
+	default:
+		goto drop;
+	}
+
+	if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
+		pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
+		goto drop;
+	}
+
+	skb->protocol = protocol;
 
 	skb_dst_drop(skb);
 
-	skb_set_transport_header(skb, info->hdrlen);
+	skb_set_transport_header(skb, hdrlen);
 
-	return end_dt_vrf_rcv(skb, info->family, vrf);
+	return end_dt_vrf_rcv(skb, family, vrf);
 
 drop:
 	kfree_skb(skb);
@@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb,
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto drop;
 
-	skb = end_dt_vrf_core(skb, slwt);
+	skb = end_dt_vrf_core(skb, slwt, AF_INET);
 	if (!skb)
 		/* packet has been processed and consumed by the VRF */
 		return 0;
@@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
 		goto legacy_mode;
 
 	/* DT6_VRF_MODE */
-	skb = end_dt_vrf_core(skb, slwt);
+	skb = end_dt_vrf_core(skb, slwt, AF_INET6);
 	if (!skb)
 		/* packet has been processed and consumed by the VRF */
 		return 0;
@@ -767,6 +776,36 @@ drop:
 	return -EINVAL;
 }
 
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
+			       struct netlink_ext_ack *extack)
+{
+	return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
+}
+
+static int input_action_end_dt46(struct sk_buff *skb,
+				 struct seg6_local_lwt *slwt)
+{
+	unsigned int off = 0;
+	int nexthdr;
+
+	nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
+	if (unlikely(nexthdr < 0))
+		goto drop;
+
+	switch (nexthdr) {
+	case IPPROTO_IPIP:
+		return input_action_end_dt4(skb, slwt);
+	case IPPROTO_IPV6:
+		return input_action_end_dt6(skb, slwt);
+	}
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+#endif
+
 /* push an SRH on top of the current one */
 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
 {
@@ -968,6 +1007,17 @@ static struct seg6_action_desc seg6_action_table[] = {
 #endif
 		.input		= input_action_end_dt6,
 	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DT46,
+		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
+		.optattrs	= SEG6_F_LOCAL_COUNTERS,
+#ifdef CONFIG_NET_L3_MASTER_DEV
+		.input		= input_action_end_dt46,
+		.slwt_ops	= {
+					.build_state = seg6_end_dt46_build,
+				  },
+#endif
+	},
 	{
 		.action		= SEG6_LOCAL_ACTION_END_B6,
 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_SRH),

From 03a0b567a03d6449e9d68a591b3f12373b1fc091 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Thu, 17 Jun 2021 19:16:45 +0200
Subject: [PATCH 2171/3292] selftests: seg6: add selftest for SRv6 End.DT46
 Behavior

this selftest is designed for evaluating the new SRv6 End.DT46 Behavior
used, in this example, for implementing IPv4/IPv6 L3 VPN use cases.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/srv6_end_dt46_l3vpn_test.sh | 573 ++++++++++++++++++
 1 file changed, 573 insertions(+)
 create mode 100755 tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh

diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
new file mode 100755
index 000000000000..75ada17ac061
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -0,0 +1,573 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for
+# implementing IPv4/IPv6 L3 VPN use cases.
+#
+# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and
+# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and
+# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
+# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.
+# The SRv6 End.DT46 Behavior implementation is meant to support the
+# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
+# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies
+# the setup and operations of SRv6 VPNs.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with
+# each other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the
+# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected
+# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100
+# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach
+# hs-t200-3 and vice versa.
+#
+# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT46 Behavior and c) VRF.
+#
+# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly
+# consider an example where, within the same domain of tenant 100, the host
+# hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a arp/ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2,
+# the router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the
+# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6)
+# core network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the
+# host hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the roles of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6
+# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are
+# able to connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::1/64 |  |                                   |  |  cafe::2/64 |  |
+# |  | 10.0.0.1/24 |  |                                   |  | 10.0.0.2/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | |  cafe::254/64 |                 |   |                 |  cafe::254/64 | |
+# | | 10.0.0.254/24 |    +----------+ |   | +----------+    | 10.0.0.254/24 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | |  cafe::254/64 |                 |   |                 |  cafe::254/64 | |
+# | | 10.0.0.254/24 |                 |   |                 | 10.0.0.254/24 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::3/64 |  |                                   |  |  cafe::4/64 |  |
+# |  | 10.0.0.3/24 |  |                                   |  | 10.0.0.4/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID              |Action                          |
+# +--------------------------------------------------+
+# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::2    |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t100               |
+# +---------------------------------------------------+
+# |10.0.0.2   |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::4    |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t200               |
+# +---------------------------------------------------+
+# |10.0.0.4   |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID              |Action                          |
+# +--------------------------------------------------+
+# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::1    |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t100               |
+# +---------------------------------------------------+
+# |10.0.0.1   |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::3    |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t200               |
+# +---------------------------------------------------+
+# |10.0.0.3   |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200               |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	# destroy routers rt-* and hosts hs-*
+	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+		ip netns del ${ns} || true
+	done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt=$1
+	local nsname=rt-${rt}
+
+	ip netns add ${nsname}
+	ip link set veth-rt-${rt} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hs=$1
+	local rt=$2
+	local tid=$3
+	local hsname=hs-t${tid}-${hs}
+	local rtname=rt-${rt}
+	local rtveth=veth-t${tid}
+
+	# set the networking for the host
+	ip netns add ${hsname}
+
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+	ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	local hssrc_name=hs-t${tid}-${hssrc}
+	local hsdst_name=hs-t${tid}-${hsdst}
+	local rtsrc_name=rt-${rtsrc}
+	local rtdst_name=rt-${rtdst}
+	local rtveth=veth-t${tid}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
+
+	ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT46 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+
+	# set default routes to unreachable for both ipv4 and ipv6
+	ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \
+		vrf vrf-${tid}
+
+	ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \
+		vrf vrf-${tid}
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+	check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+	check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_ipv6_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+	check_hs_ipv4_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv4/IPv6 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}

From 752e906732c69412087f716e93baa0330cb7cce3 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:07 -0700
Subject: [PATCH 2172/3292] mptcp: add csum_enabled in mptcp_sock

This patch added a new member named csum_enabled in struct mptcp_sock,
used a dummy mptcp_is_checksum_enabled() helper to initialize it.

Also added a new member named mptcpi_csum_enabled in struct mptcp_info
to expose the csum_enabled flag.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mptcp.h | 1 +
 net/mptcp/mptcp_diag.c     | 1 +
 net/mptcp/protocol.c       | 1 +
 net/mptcp/protocol.h       | 2 ++
 4 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 8eb3c0844bff..7b05f7102321 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -105,6 +105,7 @@ struct mptcp_info {
 	__u64	mptcpi_rcv_nxt;
 	__u8	mptcpi_local_addr_used;
 	__u8	mptcpi_local_addr_max;
+	__u8	mptcpi_csum_enabled;
 };
 
 /*
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f16d9b5ee978..8f88ddeab6a2 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 	info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
 	info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
 	info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
+	info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
 	unlock_sock_fast(sk, slow);
 }
 
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 993095089990..2caca0dc2c1c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2453,6 +2453,7 @@ static int __mptcp_init_sock(struct sock *sk)
 	msk->ack_hint = NULL;
 	msk->first = NULL;
 	inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
+	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
 
 	mptcp_pm_data_init(msk);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 89f6b73783d5..1fc6693e257e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -234,6 +234,7 @@ struct mptcp_sock {
 	bool		snd_data_fin_enable;
 	bool		rcv_fastclose;
 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
+	bool		csum_enabled;
 	spinlock_t	join_list_lock;
 	struct sock	*ack_hint;
 	struct work_struct work;
@@ -525,6 +526,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
 
 int mptcp_is_enabled(struct net *net);
 unsigned int mptcp_get_add_addr_timeout(struct net *net);
+static inline int mptcp_is_checksum_enabled(struct net *net) { return false; }
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 				     struct mptcp_options_received *mp_opt);
 bool mptcp_subflow_data_available(struct sock *sk);

From d0cc298745f5abb3c43319cb9485daf3471d6f94 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:08 -0700
Subject: [PATCH 2173/3292] mptcp: generate the data checksum

This patch added a new member named csum in struct mptcp_ext, implemented
a new function named mptcp_generate_data_checksum().

Generate the data checksum in mptcp_sendmsg_frag, save it in mpext->csum.

Note that we must generate the csum for zero window probe, too.

Do the csum update incrementally, to avoid multiple csum computation
when the data is appended to existing skb.

Note that in a later patch we will skip unneeded csum related operation.
Changes not included here to keep the delta small.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/protocol.c | 18 +++++++++++++++++-
 net/mptcp/protocol.h |  7 +++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 83f23774b908..23bbd439e115 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -23,6 +23,7 @@ struct mptcp_ext {
 	u64		data_seq;
 	u32		subflow_seq;
 	u16		data_len;
+	__sum16		csum;
 	u8		use_map:1,
 			dsn64:1,
 			data_fin:1,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2caca0dc2c1c..f0da067301f6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1308,6 +1308,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
 	return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
 }
 
+/* note: this always recompute the csum on the whole skb, even
+ * if we just appended a single frag. More status info needed
+ */
+static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
+{
+	struct mptcp_ext *mpext = mptcp_get_ext(skb);
+	__wsum csum = ~csum_unfold(mpext->csum);
+	int offset = skb->len - added;
+
+	mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
+}
+
 static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 			      struct mptcp_data_frag *dfrag,
 			      struct mptcp_sendmsg_info *info)
@@ -1402,10 +1414,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	if (zero_window_probe) {
 		mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
 		mpext->frozen = 1;
-		ret = 0;
+		if (READ_ONCE(msk->csum_enabled))
+			mptcp_update_data_checksum(tail, ret);
 		tcp_push_pending_frames(ssk);
+		return 0;
 	}
 out:
+	if (READ_ONCE(msk->csum_enabled))
+		mptcp_update_data_checksum(tail, ret);
 	mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
 	return ret;
 }
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 1fc6693e257e..4913ac7b6d19 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -336,6 +336,13 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
 	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
 }
 
+struct csum_pseudo_header {
+	__be64 data_seq;
+	__be32 subflow_seq;
+	__be16 data_len;
+	__sum16 csum;
+};
+
 struct mptcp_subflow_request_sock {
 	struct	tcp_request_sock sk;
 	u16	mp_capable : 1,

From 06fe1719aa501e3b574b1b2b3a7ad2ddac5fb9cb Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:09 -0700
Subject: [PATCH 2174/3292] mptcp: add csum_reqd in mptcp_out_options

This patch added a new member csum_reqd in struct mptcp_out_options and
struct mptcp_subflow_request_sock. Initialized it with the helper
function mptcp_is_checksum_enabled().

In mptcp_write_options, if this field is enabled, send out the MP_CAPABLE
suboption with the MPTCP_CAP_CHECKSUM_REQD flag.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h  |  5 +++--
 net/mptcp/options.c  | 11 +++++++++--
 net/mptcp/protocol.h |  3 ++-
 net/mptcp/subflow.c  |  1 +
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 23bbd439e115..33af68eea96f 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -64,8 +64,9 @@ struct mptcp_out_options {
 	struct mptcp_rm_list rm_list;
 	u8 join_id;
 	u8 backup;
-	u8 reset_reason:4;
-	u8 reset_transient:1;
+	u8 reset_reason:4,
+	   reset_transient:1,
+	   csum_reqd:1;
 	u32 nonce;
 	u64 thmac;
 	u32 token;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 6b825fb3fa83..bb3a1f3b6e99 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -380,6 +380,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
 	subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
 	if (subflow->request_mptcp) {
 		opts->suboptions = OPTION_MPTCP_MPC_SYN;
+		opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
 		*size = TCPOLEN_MPTCP_MPC_SYN;
 		return true;
 	} else if (subflow->request_join) {
@@ -435,6 +436,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
 					 struct mptcp_out_options *opts)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 	struct mptcp_ext *mpext;
 	unsigned int data_len;
 
@@ -465,6 +467,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
 		opts->suboptions = OPTION_MPTCP_MPC_ACK;
 		opts->sndr_key = subflow->local_key;
 		opts->rcvr_key = subflow->remote_key;
+		opts->csum_reqd = READ_ONCE(msk->csum_enabled);
 
 		/* Section 3.1.
 		 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
@@ -789,6 +792,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
 	if (subflow_req->mp_capable) {
 		opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
 		opts->sndr_key = subflow_req->local_key;
+		opts->csum_reqd = subflow_req->csum_reqd;
 		*size = TCPOLEN_MPTCP_MPC_SYNACK;
 		pr_debug("subflow_req=%p, local_key=%llu",
 			 subflow_req, subflow_req->local_key);
@@ -1123,7 +1127,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 {
 	if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
 	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
-		u8 len;
+		u8 len, flag = MPTCP_CAP_HMAC_SHA256;
 
 		if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
 			len = TCPOLEN_MPTCP_MPC_SYN;
@@ -1134,9 +1138,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 		else
 			len = TCPOLEN_MPTCP_MPC_ACK;
 
+		if (opts->csum_reqd)
+			flag |= MPTCP_CAP_CHECKSUM_REQD;
+
 		*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
 				      MPTCP_SUPPORTED_VERSION,
-				      MPTCP_CAP_HMAC_SHA256);
+				      flag);
 
 		if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
 		    opts->suboptions))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 4913ac7b6d19..09e94726e030 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -347,7 +347,8 @@ struct mptcp_subflow_request_sock {
 	struct	tcp_request_sock sk;
 	u16	mp_capable : 1,
 		mp_join : 1,
-		backup : 1;
+		backup : 1,
+		csum_reqd : 1;
 	u8	local_id;
 	u8	remote_id;
 	u64	local_key;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 33956337c46b..45acab63c387 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -108,6 +108,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
 
 	subflow_req->mp_capable = 0;
 	subflow_req->mp_join = 0;
+	subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
 	subflow_req->msk = NULL;
 	mptcp_token_init_request(req);
 }

From c94b1f96dcfb2e5bd072b10f3429ccf28778ad58 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:10 -0700
Subject: [PATCH 2175/3292] mptcp: send out checksum for MP_CAPABLE with data

If the checksum is enabled, send out the data checksum with the
MP_CAPABLE suboption with data.

In mptcp_established_options_mp, save the data checksum in
opts->ext_copy.csum. In mptcp_write_options, adjust the option length and
send it out with the MP_CAPABLE suboption.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 52 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index bb3a1f3b6e99..b4da08db1221 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -439,6 +439,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 	struct mptcp_ext *mpext;
 	unsigned int data_len;
+	u8 len;
 
 	/* When skb is not available, we better over-estimate the emitted
 	 * options len. A full DSS option (28 bytes) is longer than
@@ -474,10 +475,16 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
 		 * packets that start the first subflow of an MPTCP connection,
 		 * as well as the first packet that carries data
 		 */
-		if (data_len > 0)
-			*size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
-		else
+		if (data_len > 0) {
+			len = TCPOLEN_MPTCP_MPC_ACK_DATA;
+			if (opts->csum_reqd) {
+				opts->ext_copy.csum = mpext->csum;
+				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+			}
+			*size = ALIGN(len, 4);
+		} else {
 			*size = TCPOLEN_MPTCP_MPC_ACK;
+		}
 
 		pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
 			 subflow, subflow->local_key, subflow->remote_key,
@@ -1122,6 +1129,25 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
 		WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
 }
 
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+	struct csum_pseudo_header header;
+	__wsum csum;
+
+	/* cfr RFC 8684 3.3.1.:
+	 * the data sequence number used in the pseudo-header is
+	 * always the 64-bit value, irrespective of what length is used in the
+	 * DSS option itself.
+	 */
+	header.data_seq = cpu_to_be64(mpext->data_seq);
+	header.subflow_seq = htonl(mpext->subflow_seq);
+	header.data_len = htons(mpext->data_len);
+	header.csum = 0;
+
+	csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+	return (__force u16)csum_fold(csum);
+}
+
 void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 			 struct mptcp_out_options *opts)
 {
@@ -1129,14 +1155,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
 		u8 len, flag = MPTCP_CAP_HMAC_SHA256;
 
-		if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
+		if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
 			len = TCPOLEN_MPTCP_MPC_SYN;
-		else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
+		} else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
 			len = TCPOLEN_MPTCP_MPC_SYNACK;
-		else if (opts->ext_copy.data_len)
+		} else if (opts->ext_copy.data_len) {
 			len = TCPOLEN_MPTCP_MPC_ACK_DATA;
-		else
+			if (opts->csum_reqd)
+				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+		} else {
 			len = TCPOLEN_MPTCP_MPC_ACK;
+		}
 
 		if (opts->csum_reqd)
 			flag |= MPTCP_CAP_CHECKSUM_REQD;
@@ -1159,8 +1188,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 		if (!opts->ext_copy.data_len)
 			goto mp_capable_done;
 
-		put_unaligned_be32(opts->ext_copy.data_len << 16 |
-				   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+		if (opts->csum_reqd) {
+			put_unaligned_be32(opts->ext_copy.data_len << 16 |
+					   mptcp_make_csum(&opts->ext_copy), ptr);
+		} else {
+			put_unaligned_be32(opts->ext_copy.data_len << 16 |
+					   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+		}
 		ptr += 1;
 	}
 

From c5b39e26d0036423be09c39ad142e91a2d5d278b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:11 -0700
Subject: [PATCH 2176/3292] mptcp: send out checksum for DSS

In mptcp_write_options, if the checksum is enabled, adjust the option
length and send out the data checksum with DSS suboption.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b4da08db1221..1468774f1f87 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -478,6 +478,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
 		if (data_len > 0) {
 			len = TCPOLEN_MPTCP_MPC_ACK_DATA;
 			if (opts->csum_reqd) {
+				/* we need to propagate more info to csum the pseudo hdr */
+				opts->ext_copy.data_seq = mpext->data_seq;
+				opts->ext_copy.subflow_seq = mpext->subflow_seq;
 				opts->ext_copy.csum = mpext->csum;
 				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
 			}
@@ -545,18 +548,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 	bool ret = false;
 	u64 ack_seq;
 
+	opts->csum_reqd = READ_ONCE(msk->csum_enabled);
 	mpext = skb ? mptcp_get_ext(skb) : NULL;
 
 	if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
-		unsigned int map_size;
+		unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
 
-		map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
+		if (mpext) {
+			if (opts->csum_reqd)
+				map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
+
+			opts->ext_copy = *mpext;
+		}
 
 		remaining -= map_size;
 		dss_size = map_size;
-		if (mpext)
-			opts->ext_copy = *mpext;
-
 		if (skb && snd_data_fin_enable)
 			mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
 		ret = true;
@@ -1346,6 +1352,9 @@ mp_capable_done:
 			flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
 			if (mpext->data_fin)
 				flags |= MPTCP_DSS_DATA_FIN;
+
+			if (opts->csum_reqd)
+				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
 		}
 
 		*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@ -1365,8 +1374,13 @@ mp_capable_done:
 			ptr += 2;
 			put_unaligned_be32(mpext->subflow_seq, ptr);
 			ptr += 1;
-			put_unaligned_be32(mpext->data_len << 16 |
-					   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+			if (opts->csum_reqd) {
+				put_unaligned_be32(mpext->data_len << 16 |
+						   mptcp_make_csum(mpext), ptr);
+			} else {
+				put_unaligned_be32(mpext->data_len << 16 |
+						   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+			}
 		}
 	}
 

From c863225b79426459feca2ef5b0cc2f07e8e68771 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:12 -0700
Subject: [PATCH 2177/3292] mptcp: add sk parameter for mptcp_get_options

This patch added a new parameter name sk in mptcp_get_options().

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  |  5 +++--
 net/mptcp/protocol.h |  3 ++-
 net/mptcp/subflow.c  | 10 +++++-----
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 1468774f1f87..ae69059583a7 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -323,7 +323,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 	}
 }
 
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+		       const struct sk_buff *skb,
 		       struct mptcp_options_received *mp_opt)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -1024,7 +1025,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	mptcp_get_options(skb, &mp_opt);
+	mptcp_get_options(sk, skb, &mp_opt);
 	if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
 		return;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 09e94726e030..a7ed0b8eb9bc 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -586,7 +586,8 @@ int __init mptcp_proto_v6_init(void);
 struct sock *mptcp_sk_clone(const struct sock *sk,
 			    const struct mptcp_options_received *mp_opt,
 			    struct request_sock *req);
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+		       const struct sk_buff *skb,
 		       struct mptcp_options_received *mp_opt);
 
 void mptcp_finish_connect(struct sock *sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 45acab63c387..aa6b307b27c8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -151,7 +151,7 @@ static int subflow_check_req(struct request_sock *req,
 		return -EINVAL;
 #endif
 
-	mptcp_get_options(skb, &mp_opt);
+	mptcp_get_options(sk_listener, skb, &mp_opt);
 
 	if (mp_opt.mp_capable) {
 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -248,7 +248,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
 	int err;
 
 	subflow_init_req(req, sk_listener);
-	mptcp_get_options(skb, &mp_opt);
+	mptcp_get_options(sk_listener, skb, &mp_opt);
 
 	if (mp_opt.mp_capable && mp_opt.mp_join)
 		return -EINVAL;
@@ -395,7 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 	subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
 	pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
 
-	mptcp_get_options(skb, &mp_opt);
+	mptcp_get_options(sk, skb, &mp_opt);
 	if (subflow->request_mptcp) {
 		if (!mp_opt.mp_capable) {
 			MPTCP_INC_STATS(sock_net(sk),
@@ -639,7 +639,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 		 * reordered MPC will cause fallback, but we don't have other
 		 * options.
 		 */
-		mptcp_get_options(skb, &mp_opt);
+		mptcp_get_options(sk, skb, &mp_opt);
 		if (!mp_opt.mp_capable) {
 			fallback = true;
 			goto create_child;
@@ -649,7 +649,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 		if (!new_msk)
 			fallback = true;
 	} else if (subflow_req->mp_join) {
-		mptcp_get_options(skb, &mp_opt);
+		mptcp_get_options(sk, skb, &mp_opt);
 		if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
 		    !mptcp_can_accept_new_subflow(subflow_req->msk)) {
 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);

From 0625118115cf2ee8e435bf86d1c1f0bfdee9d7c8 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:13 -0700
Subject: [PATCH 2178/3292] mptcp: add csum_reqd in mptcp_options_received

This patch added a new flag csum_reqd in struct mptcp_options_received, if
the flag MPTCP_CAP_CHECKSUM_REQD is set in the receiving MP_CAPABLE
suboption, set this flag.

In mptcp_sk_clone and subflow_finish_connect, if the csum_reqd flag is set,
enable the msk->csum_enabled flag.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 7 ++++---
 net/mptcp/protocol.c | 2 ++
 net/mptcp/protocol.h | 1 +
 net/mptcp/subflow.c  | 2 ++
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index ae69059583a7..2e2551590ecd 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -71,11 +71,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		 * "If a checksum is not present when its use has been
 		 * negotiated, the receiver MUST close the subflow with a RST as
 		 * it is considered broken."
-		 *
-		 * We don't implement DSS checksum - fall back to TCP.
 		 */
 		if (flags & MPTCP_CAP_CHECKSUM_REQD)
-			break;
+			mp_opt->csum_reqd = 1;
 
 		mp_opt->mp_capable = 1;
 		if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@ -327,6 +325,8 @@ void mptcp_get_options(const struct sock *sk,
 		       const struct sk_buff *skb,
 		       struct mptcp_options_received *mp_opt)
 {
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 	const struct tcphdr *th = tcp_hdr(skb);
 	const unsigned char *ptr;
 	int length;
@@ -342,6 +342,7 @@ void mptcp_get_options(const struct sock *sk,
 	mp_opt->dss = 0;
 	mp_opt->mp_prio = 0;
 	mp_opt->reset = 0;
+	mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f0da067301f6..b6e5c0930533 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2810,6 +2810,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	msk->token = subflow_req->token;
 	msk->subflow = NULL;
 	WRITE_ONCE(msk->fully_established, false);
+	if (mp_opt->csum_reqd)
+		WRITE_ONCE(msk->csum_enabled, true);
 
 	msk->write_seq = subflow_req->idsn + 1;
 	msk->snd_nxt = msk->write_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a7ed0b8eb9bc..66e5063ac6c9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -133,6 +133,7 @@ struct mptcp_options_received {
 		rm_addr : 1,
 		mp_prio : 1,
 		echo : 1,
+		csum_reqd : 1,
 		backup : 1;
 	u32	token;
 	u32	nonce;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index aa6b307b27c8..9b82ce635c6e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -405,6 +405,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 			goto fallback;
 		}
 
+		if (mp_opt.csum_reqd)
+			WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
 		subflow->mp_capable = 1;
 		subflow->can_ack = 1;
 		subflow->remote_key = mp_opt.sndr_key;

From 208e8f66926c5d73e3f359385c1dd49dbc48d067 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:14 -0700
Subject: [PATCH 2179/3292] mptcp: receive checksum for MP_CAPABLE with data

This patch added a new member named csum in struct mptcp_options_received.

When parsing the MP_CAPABLE with data, if the checksum is enabled,
adjust the expected_opsize. If the receiving option length matches the
length with the data checksum, get the checksum value and save it in
mp_opt->csum. And in mptcp_incoming_options, pass it to mpext->csum.

We always parse any csum/nocsum combination and delay the presence check
to later code, to allow reset if missing.

Additionally, in the TX path, use the newly introduce ext field to avoid
MPTCP csum recomputation on TCP retransmission and unneeded csum update
on when setting the data fin_flag.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h  |  3 ++-
 net/mptcp/options.c  | 35 ++++++++++++++++++++++++++---------
 net/mptcp/protocol.h |  3 +++
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 33af68eea96f..d61bbbf11979 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -32,7 +32,8 @@ struct mptcp_ext {
 			mpc_map:1,
 			frozen:1,
 			reset_transient:1;
-	u8		reset_reason:4;
+	u8		reset_reason:4,
+			csum_reqd:1;
 };
 
 #define MPTCP_RM_IDS_MAX	8
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 2e2551590ecd..8cbc75868969 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			else
 				expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
 		}
-		if (opsize != expected_opsize)
+
+		/* Cfr RFC 8684 Section 3.3.0:
+		 * If a checksum is present but its use had
+		 * not been negotiated in the MP_CAPABLE handshake, the receiver MUST
+		 * close the subflow with a RST, as it is not behaving as negotiated.
+		 * If a checksum is not present when its use has been negotiated, the
+		 * receiver MUST close the subflow with a RST, as it is considered
+		 * broken
+		 * We parse even option with mismatching csum presence, so that
+		 * later in subflow_data_ready we can trigger the reset.
+		 */
+		if (opsize != expected_opsize &&
+		    (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
+		     opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
 			break;
 
 		/* try to be gentle vs future versions on the initial syn */
@@ -66,11 +79,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		 * host requires the use of checksums, checksums MUST be used.
 		 * In other words, the only way for checksums not to be used
 		 * is if both hosts in their SYNs set A=0."
-		 *
-		 * Section 3.3.0:
-		 * "If a checksum is not present when its use has been
-		 * negotiated, the receiver MUST close the subflow with a RST as
-		 * it is considered broken."
 		 */
 		if (flags & MPTCP_CAP_CHECKSUM_REQD)
 			mp_opt->csum_reqd = 1;
@@ -84,7 +92,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			mp_opt->rcvr_key = get_unaligned_be64(ptr);
 			ptr += 8;
 		}
-		if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+		if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
 			/* Section 3.1.:
 			 * "the data parameters in a MP_CAPABLE are semantically
 			 * equivalent to those in a DSS option and can be used
@@ -96,9 +104,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			mp_opt->data_len = get_unaligned_be16(ptr);
 			ptr += 2;
 		}
-		pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+		if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
+			mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+			mp_opt->csum_reqd = 1;
+			ptr += 2;
+		}
+		pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
 			 version, flags, opsize, mp_opt->sndr_key,
-			 mp_opt->rcvr_key, mp_opt->data_len);
+			 mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
 		break;
 
 	case MPTCPOPT_MP_JOIN:
@@ -1118,6 +1131,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		}
 		mpext->data_len = mp_opt.data_len;
 		mpext->use_map = 1;
+		mpext->csum_reqd = mp_opt.csum_reqd;
+
+		if (mpext->csum_reqd)
+			mpext->csum = mp_opt.csum;
 	}
 }
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 66e5063ac6c9..76194babc754 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -68,6 +68,8 @@
 #define TCPOLEN_MPTCP_FASTCLOSE		12
 #define TCPOLEN_MPTCP_RST		4
 
+#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
+
 /* MPTCP MP_JOIN flags */
 #define MPTCPOPT_BACKUP		BIT(0)
 #define MPTCPOPT_HMAC_LEN	20
@@ -124,6 +126,7 @@ struct mptcp_options_received {
 	u64	data_seq;
 	u32	subflow_seq;
 	u16	data_len;
+	__sum16	csum;
 	u16	mp_capable : 1,
 		mp_join : 1,
 		fastclose : 1,

From 390b95a5fb84e7999eedb021382c96d1500e01fc Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:15 -0700
Subject: [PATCH 2180/3292] mptcp: receive checksum for DSS

In mptcp_parse_option, adjust the expected_opsize, and always parse the
data checksum value from the receiving DSS regardless of csum presence.
Then save it in mp_opt->csum.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8cbc75868969..1aec01686c1a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -182,10 +182,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 				expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
 		}
 
-		/* RFC 6824, Section 3.3:
-		 * If a checksum is present, but its use had
-		 * not been negotiated in the MP_CAPABLE handshake,
-		 * the checksum field MUST be ignored.
+		/* Always parse any csum presence combination, we will enforce
+		 * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
 		 */
 		if (opsize != expected_opsize &&
 		    opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
@@ -220,9 +218,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			mp_opt->data_len = get_unaligned_be16(ptr);
 			ptr += 2;
 
-			pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+			if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+				mp_opt->csum_reqd = 1;
+				mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+				ptr += 2;
+			}
+
+			pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
 				 mp_opt->data_seq, mp_opt->subflow_seq,
-				 mp_opt->data_len);
+				 mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
 		}
 
 		break;

From dd8bcd1768ff76bf2da1154897871adcc4ec078a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 17 Jun 2021 16:46:16 -0700
Subject: [PATCH 2181/3292] mptcp: validate the data checksum

This patch added three new members named data_csum, csum_len and
map_csum in struct mptcp_subflow_context, implemented a new function
named mptcp_validate_data_checksum().

If the current mapping is valid and csum is enabled traverse the later
pending skbs and compute csum incrementally till the whole mapping has
been covered. If not enough data is available in the rx queue, return
MAPPING_EMPTY - that is, no data.

Next subflow_data_ready invocation will trigger again csum computation.

When the full DSS is available, validate the csum and return to the
caller an appropriate error code, to trigger subflow reset of fallback
as required by the RFC.

Additionally:
- if the csum prevence in the DSS don't match the negotiated value e.g.
  csum present, but not requested, return invalid mapping to trigger
  subflow reset.
- keep some csum state, to avoid re-compute the csum on the same data
  when multiple rx queue traversal are required.
- clean-up the uncompleted mapping from the receive queue on close, to
  allow proper subflow disposal

Co-developed-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.h |   4 ++
 net/mptcp/subflow.c  | 105 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 103 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 76194babc754..12637299b42e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -400,6 +400,8 @@ struct mptcp_subflow_context {
 	u32	map_subflow_seq;
 	u32	ssn_offset;
 	u32	map_data_len;
+	__wsum	map_data_csum;
+	u32	map_csum_len;
 	u32	request_mptcp : 1,  /* send MP_CAPABLE */
 		request_join : 1,   /* send MP_JOIN */
 		request_bkup : 1,
@@ -409,6 +411,8 @@ struct mptcp_subflow_context {
 		pm_notified : 1,    /* PM hook called for established status */
 		conn_finished : 1,
 		map_valid : 1,
+		map_csum_reqd : 1,
+		map_data_fin : 1,
 		mpc_map : 1,
 		backup : 1,
 		send_mp_prio : 1,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9b82ce635c6e..9ccc4686d0d4 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -827,10 +827,90 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
 	return true;
 }
 
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
+					      bool csum_reqd)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+	struct csum_pseudo_header header;
+	u32 offset, seq, delta;
+	__wsum csum;
+	int len;
+
+	if (!csum_reqd)
+		return MAPPING_OK;
+
+	/* mapping already validated on previous traversal */
+	if (subflow->map_csum_len == subflow->map_data_len)
+		return MAPPING_OK;
+
+	/* traverse the receive queue, ensuring it contains a full
+	 * DSS mapping and accumulating the related csum.
+	 * Preserve the accoumlate csum across multiple calls, to compute
+	 * the csum only once
+	 */
+	delta = subflow->map_data_len - subflow->map_csum_len;
+	for (;;) {
+		seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
+		offset = seq - TCP_SKB_CB(skb)->seq;
+
+		/* if the current skb has not been accounted yet, csum its contents
+		 * up to the amount covered by the current DSS
+		 */
+		if (offset < skb->len) {
+			__wsum csum;
+
+			len = min(skb->len - offset, delta);
+			csum = skb_checksum(skb, offset, len, 0);
+			subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
+								subflow->map_csum_len);
+
+			delta -= len;
+			subflow->map_csum_len += len;
+		}
+		if (delta == 0)
+			break;
+
+		if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
+			/* if this subflow is closed, the partial mapping
+			 * will be never completed; flush the pending skbs, so
+			 * that subflow_sched_work_if_closed() can kick in
+			 */
+			if (unlikely(ssk->sk_state == TCP_CLOSE))
+				while ((skb = skb_peek(&ssk->sk_receive_queue)))
+					sk_eat_skb(ssk, skb);
+
+			/* not enough data to validate the csum */
+			return MAPPING_EMPTY;
+		}
+
+		/* the DSS mapping for next skbs will be validated later,
+		 * when a get_mapping_status call will process such skb
+		 */
+		skb = skb->next;
+	}
+
+	/* note that 'map_data_len' accounts only for the carried data, does
+	 * not include the eventual seq increment due to the data fin,
+	 * while the pseudo header requires the original DSS data len,
+	 * including that
+	 */
+	header.data_seq = cpu_to_be64(subflow->map_seq);
+	header.subflow_seq = htonl(subflow->map_subflow_seq);
+	header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
+	header.csum = 0;
+
+	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
+	if (unlikely(csum_fold(csum)))
+		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+
+	return MAPPING_OK;
+}
+
 static enum mapping_status get_mapping_status(struct sock *ssk,
 					      struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+	bool csum_reqd = READ_ONCE(msk->csum_enabled);
 	struct mptcp_ext *mpext;
 	struct sk_buff *skb;
 	u16 data_len;
@@ -923,9 +1003,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 		/* Allow replacing only with an identical map */
 		if (subflow->map_seq == map_seq &&
 		    subflow->map_subflow_seq == mpext->subflow_seq &&
-		    subflow->map_data_len == data_len) {
+		    subflow->map_data_len == data_len &&
+		    subflow->map_csum_reqd == mpext->csum_reqd) {
 			skb_ext_del(skb, SKB_EXT_MPTCP);
-			return MAPPING_OK;
+			goto validate_csum;
 		}
 
 		/* If this skb data are fully covered by the current mapping,
@@ -937,17 +1018,27 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 		}
 
 		/* will validate the next map after consuming the current one */
-		return MAPPING_OK;
+		goto validate_csum;
 	}
 
 	subflow->map_seq = map_seq;
 	subflow->map_subflow_seq = mpext->subflow_seq;
 	subflow->map_data_len = data_len;
 	subflow->map_valid = 1;
+	subflow->map_data_fin = mpext->data_fin;
 	subflow->mpc_map = mpext->mpc_map;
-	pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
+	subflow->map_csum_reqd = mpext->csum_reqd;
+	subflow->map_csum_len = 0;
+	subflow->map_data_csum = csum_unfold(mpext->csum);
+
+	/* Cfr RFC 8684 Section 3.3.0 */
+	if (unlikely(subflow->map_csum_reqd != csum_reqd))
+		return MAPPING_INVALID;
+
+	pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
 		 subflow->map_seq, subflow->map_subflow_seq,
-		 subflow->map_data_len);
+		 subflow->map_data_len, subflow->map_csum_reqd,
+		 subflow->map_data_csum);
 
 validate_seq:
 	/* we revalidate valid mapping on new skb, because we must ensure
@@ -957,7 +1048,9 @@ validate_seq:
 		return MAPPING_INVALID;
 
 	skb_ext_del(skb, SKB_EXT_MPTCP);
-	return MAPPING_OK;
+
+validate_csum:
+	return validate_data_csum(ssk, skb, csum_reqd);
 }
 
 static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,

From 4e14867d5e9185e38f730d65c89b728640d68dd1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 17 Jun 2021 16:46:17 -0700
Subject: [PATCH 2182/3292] mptcp: tune re-injections for csum enabled mode

If the MPTCP-level checksum is enabled, on re-injections we
must spool a complete DSS, or the receive side will not be
able to compute the csum and process any data.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b6e5c0930533..42fc7187beee 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2375,8 +2375,8 @@ static void __mptcp_retrans(struct sock *sk)
 
 	/* limit retransmission to the bytes already sent on some subflows */
 	info.sent = 0;
-	info.limit = dfrag->already_sent;
-	while (info.sent < dfrag->already_sent) {
+	info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
+	while (info.sent < info.limit) {
 		if (!mptcp_alloc_tx_skb(sk, ssk))
 			break;
 
@@ -2388,9 +2388,11 @@ static void __mptcp_retrans(struct sock *sk)
 		copied += ret;
 		info.sent += ret;
 	}
-	if (copied)
+	if (copied) {
+		dfrag->already_sent = max(dfrag->already_sent, info.sent);
 		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
 			 info.size_goal);
+	}
 
 	mptcp_set_timeout(sk, ssk);
 	release_sock(ssk);

From fe3ab1cbd357d9d0903f2d00038c2cb7141e7fe5 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:18 -0700
Subject: [PATCH 2183/3292] mptcp: add the mib for data checksum

This patch added the mib for the data checksum, MPTCP_MIB_DATACSUMERR.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/mib.c     | 1 +
 net/mptcp/mib.h     | 1 +
 net/mptcp/subflow.c | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index eb2dc6dbe212..e7e60bc1fb96 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
 	SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
 	SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+	SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
 	SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
 	SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
 	SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index f0da4f060fe1..92e56c0cfbdd 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_JOINACKMAC,		/* HMAC was wrong on ACK + MP_JOIN */
 	MPTCP_MIB_DSSNOMATCH,		/* Received a new mapping that did not match the previous one */
 	MPTCP_MIB_INFINITEMAPRX,	/* Received an infinite mapping */
+	MPTCP_MIB_DATACSUMERR,		/* The data checksum fail */
 	MPTCP_MIB_OFOQUEUETAIL,	/* Segments inserted into OoO queue tail */
 	MPTCP_MIB_OFOQUEUE,		/* Segments inserted into OoO queue */
 	MPTCP_MIB_OFOMERGE,		/* Segments merged in OoO queue */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ccc4686d0d4..6b1cd4257edf 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -900,8 +900,10 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
 	header.csum = 0;
 
 	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
-	if (unlikely(csum_fold(csum)))
+	if (unlikely(csum_fold(csum))) {
+		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
 		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+	}
 
 	return MAPPING_OK;
 }

From fc3c82eebf8e2e193412612f509530b4ff5611bf Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:19 -0700
Subject: [PATCH 2184/3292] mptcp: add a new sysctl checksum_enabled

This patch added a new sysctl, named checksum_enabled, to control
whether DSS checksum can be enabled.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/mptcp-sysctl.rst |  8 ++++++++
 net/mptcp/ctrl.c                          | 16 ++++++++++++++++
 net/mptcp/protocol.h                      |  2 +-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index 3b352e5f6300..ee06fd782465 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -24,3 +24,11 @@ add_addr_timeout - INTEGER (seconds)
 	sysctl.
 
 	Default: 120
+
+checksum_enabled - BOOLEAN
+	Control whether DSS checksum can be enabled.
+
+	DSS checksum can be enabled if the value is nonzero. This is a
+	per-namespace sysctl.
+
+	Default: 0
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 1ec4d36a39f0..6c2639bb9c19 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -23,6 +23,7 @@ struct mptcp_pernet {
 
 	u8 mptcp_enabled;
 	unsigned int add_addr_timeout;
+	u8 checksum_enabled;
 };
 
 static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
@@ -40,10 +41,16 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net)
 	return mptcp_get_pernet(net)->add_addr_timeout;
 }
 
+int mptcp_is_checksum_enabled(struct net *net)
+{
+	return mptcp_get_pernet(net)->checksum_enabled;
+}
+
 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 {
 	pernet->mptcp_enabled = 1;
 	pernet->add_addr_timeout = TCP_RTO_MAX;
+	pernet->checksum_enabled = 0;
 }
 
 #ifdef CONFIG_SYSCTL
@@ -65,6 +72,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
 		.mode = 0644,
 		.proc_handler = proc_dointvec_jiffies,
 	},
+	{
+		.procname = "checksum_enabled",
+		.maxlen = sizeof(u8),
+		.mode = 0644,
+		.proc_handler = proc_dou8vec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE
+	},
 	{}
 };
 
@@ -82,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
 
 	table[0].data = &pernet->mptcp_enabled;
 	table[1].data = &pernet->add_addr_timeout;
+	table[2].data = &pernet->checksum_enabled;
 
 	hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
 	if (!hdr)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 12637299b42e..16e50caf200e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -542,7 +542,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
 
 int mptcp_is_enabled(struct net *net);
 unsigned int mptcp_get_add_addr_timeout(struct net *net);
-static inline int mptcp_is_checksum_enabled(struct net *net) { return false; }
+int mptcp_is_checksum_enabled(struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 				     struct mptcp_options_received *mp_opt);
 bool mptcp_subflow_data_available(struct sock *sk);

From 401e3030e68f1c761a7137dc6f0cf39f585ab4bd Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:20 -0700
Subject: [PATCH 2185/3292] mptcp: dump csum fields in mptcp_dump_mpext

In mptcp_dump_mpext, dump the csum fields, csum and csum_reqd in struct
mptcp_dump_mpext too.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/mptcp.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index 775a46d0b0f0..6bf43176f14c 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -73,6 +73,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
 		__field(u64, data_seq)
 		__field(u32, subflow_seq)
 		__field(u16, data_len)
+		__field(u16, csum)
 		__field(u8, use_map)
 		__field(u8, dsn64)
 		__field(u8, data_fin)
@@ -82,6 +83,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
 		__field(u8, frozen)
 		__field(u8, reset_transient)
 		__field(u8, reset_reason)
+		__field(u8, csum_reqd)
 	),
 
 	TP_fast_assign(
@@ -89,6 +91,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
 		__entry->data_seq = mpext->data_seq;
 		__entry->subflow_seq = mpext->subflow_seq;
 		__entry->data_len = mpext->data_len;
+		__entry->csum = (__force u16)mpext->csum;
 		__entry->use_map = mpext->use_map;
 		__entry->dsn64 = mpext->dsn64;
 		__entry->data_fin = mpext->data_fin;
@@ -98,16 +101,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
 		__entry->frozen = mpext->frozen;
 		__entry->reset_transient = mpext->reset_transient;
 		__entry->reset_reason = mpext->reset_reason;
+		__entry->csum_reqd = mpext->csum_reqd;
 	),
 
-	TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u",
+	TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u",
 		  __entry->data_ack, __entry->data_seq,
 		  __entry->subflow_seq, __entry->data_len,
-		  __entry->use_map, __entry->dsn64,
-		  __entry->data_fin, __entry->use_ack,
-		  __entry->ack64, __entry->mpc_map,
-		  __entry->frozen, __entry->reset_transient,
-		  __entry->reset_reason)
+		  __entry->csum, __entry->use_map,
+		  __entry->dsn64, __entry->data_fin,
+		  __entry->use_ack, __entry->ack64,
+		  __entry->mpc_map, __entry->frozen,
+		  __entry->reset_transient, __entry->reset_reason,
+		  __entry->csum_reqd)
 );
 
 DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status,

From 94d66ba1d8e4803066b9c6a16274343a425ed1bf Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:21 -0700
Subject: [PATCH 2186/3292] selftests: mptcp: enable checksum in
 mptcp_connect.sh

This patch added a new argument "-C" for the mptcp_connect.sh script to
set the sysctl checksum_enabled to 1 in ns1, ns2, ns3 and ns4 to enable
the data checksum.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 9ca5f1ba461e..69351c3eb68c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
 
 time_start=$(date +%s)
 
-optstring="S:R:d:e:l:r:h4cm:f:t"
+optstring="S:R:d:e:l:r:h4cm:f:tC"
 ret=0
 sin=""
 sout=""
@@ -22,6 +22,7 @@ sndbuf=0
 rcvbuf=0
 options_log=true
 do_tcp=0
+checksum=false
 filesize=0
 
 if [ $tc_loss -eq 100 ];then
@@ -47,6 +48,7 @@ usage() {
 	echo -e "\t-R: set rcvbuf value (default: use kernel default)"
 	echo -e "\t-m: test mode (poll, sendfile; default: poll)"
 	echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
+	echo -e "\t-C: enable the MPTCP data checksum"
 }
 
 while getopts "$optstring" option;do
@@ -104,6 +106,9 @@ while getopts "$optstring" option;do
 	"t")
 		do_tcp=$((do_tcp+1))
 		;;
+	"C")
+		checksum=true
+		;;
 	"?")
 		usage $0
 		exit 1
@@ -200,6 +205,12 @@ ip -net "$ns4" route add default via dead:beef:3::2
 # use TCP syn cookies, even if no flooding was detected.
 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
 
+if $checksum; then
+	for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+		ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
+	done
+fi
+
 set_ethtool_flags() {
 	local ns="$1"
 	local dev="$2"

From af66d3e1c3fa65f2187ab418b9934068049ea27a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 17 Jun 2021 16:46:22 -0700
Subject: [PATCH 2187/3292] selftests: mptcp: enable checksum in mptcp_join.sh

This patch added a new argument "-C" for the mptcp_join.sh script to set
the sysctl checksum_enabled to 1 in ns1 and ns2 to enable the data
checksum.

In chk_join_nr, check the counter of the mib for the data checksum.

Also added a new argument "-S" for the mptcp_join.sh script to start the
test cases that verify the checksum handshake:

  * Sender and listener both have checksums off
  * Sender and listener both have checksums on
  * Sender checksums off, listener checksums on
  * Sender checksums on, listener checksums off

The output looks like this:

 01 checksum test 0 0                  sum[ ok ] - csum  [ ok ]
 02 checksum test 1 1                  sum[ ok ] - csum  [ ok ]
 03 checksum test 0 1                  sum[ ok ] - csum  [ ok ]
 04 checksum test 1 0                  sum[ ok ] - csum  [ ok ]
 05 no JOIN                            syn[ ok ] - synack[ ok ] - ack[ ok ]
                                       sum[ ok ] - csum  [ ok ]
 06 single subflow, limited by client  syn[ ok ] - synack[ ok ] - ack[ ok ]
                                       sum[ ok ] - csum  [ ok ]

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/mptcp/mptcp_join.sh | 107 +++++++++++++++++-
 1 file changed, 103 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fd99485cf2a4..523c7797f30a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -12,6 +12,7 @@ timeout_poll=30
 timeout_test=$((timeout_poll * 2 + 1))
 mptcp_connect=""
 capture=0
+checksum=0
 do_all_tests=1
 
 TEST_COUNT=0
@@ -49,6 +50,9 @@ init()
 		ip netns exec $netns sysctl -q net.mptcp.enabled=1
 		ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
 		ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+		if [ $checksum -eq 1 ]; then
+			ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
+		fi
 	done
 
 	#  ns1              ns2
@@ -124,6 +128,17 @@ reset_with_add_addr_timeout()
 		-j DROP
 }
 
+reset_with_checksum()
+{
+	local ns1_enable=$1
+	local ns2_enable=$2
+
+	reset
+
+	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
+	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+}
+
 ip -Version > /dev/null 2>&1
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without ip tool"
@@ -476,6 +491,45 @@ run_tests()
 	fi
 }
 
+chk_csum_nr()
+{
+	local msg=${1:-""}
+	local count
+	local dump_stats
+
+	if [ ! -z "$msg" ]; then
+		printf "%02u" "$TEST_COUNT"
+	else
+		echo -n "  "
+	fi
+	printf " %-36s %s" "$msg" "sum"
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != 0 ]; then
+		echo "[fail] got $count data checksum error[s] expected 0"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+	echo -n " - csum  "
+	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != 0 ]; then
+		echo "[fail] got $count data checksum error[s] expected 0"
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+	if [ "${dump_stats}" = 1 ]; then
+		echo Server ns stats
+		ip netns exec $ns1 nstat -as | grep MPTcp
+		echo Client ns stats
+		ip netns exec $ns2 nstat -as | grep MPTcp
+	fi
+}
+
 chk_join_nr()
 {
 	local msg="$1"
@@ -523,6 +577,9 @@ chk_join_nr()
 		echo Client ns stats
 		ip netns exec $ns2 nstat -as | grep MPTcp
 	fi
+	if [ $checksum -eq 1 ]; then
+		chk_csum_nr
+	fi
 }
 
 chk_add_nr()
@@ -1374,6 +1431,37 @@ syncookies_tests()
 	chk_add_nr 1 1
 }
 
+checksum_tests()
+{
+	# checksum test 0 0
+	reset_with_checksum 0 0
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_csum_nr "checksum test 0 0"
+
+	# checksum test 1 1
+	reset_with_checksum 1 1
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_csum_nr "checksum test 1 1"
+
+	# checksum test 0 1
+	reset_with_checksum 0 1
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_csum_nr "checksum test 0 1"
+
+	# checksum test 1 0
+	reset_with_checksum 1 0
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_csum_nr "checksum test 1 0"
+}
+
 all_tests()
 {
 	subflows_tests
@@ -1387,6 +1475,7 @@ all_tests()
 	backup_tests
 	add_addr_ports_tests
 	syncookies_tests
+	checksum_tests
 }
 
 usage()
@@ -1403,7 +1492,9 @@ usage()
 	echo "  -b backup_tests"
 	echo "  -p add_addr_ports_tests"
 	echo "  -k syncookies_tests"
+	echo "  -S checksum_tests"
 	echo "  -c capture pcap files"
+	echo "  -C enable data checksum"
 	echo "  -h help"
 }
 
@@ -1418,13 +1509,16 @@ make_file "$sin" "server" 1
 trap cleanup EXIT
 
 for arg in "$@"; do
-	# check for "capture" arg before launching tests
+	# check for "capture/checksum" args before launching tests
 	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
 		capture=1
 	fi
+	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
+		checksum=1
+	fi
 
-	# exception for the capture option, the rest means: a part of the tests
-	if [ "${arg}" != "-c" ]; then
+	# exception for the capture/checksum options, the rest means: a part of the tests
+	if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
 		do_all_tests=0
 	fi
 done
@@ -1434,7 +1528,7 @@ if [ $do_all_tests -eq 1 ]; then
 	exit $ret
 fi
 
-while getopts 'fsltra64bpkch' opt; do
+while getopts 'fsltra64bpkchCS' opt; do
 	case $opt in
 		f)
 			subflows_tests
@@ -1469,8 +1563,13 @@ while getopts 'fsltra64bpkch' opt; do
 		k)
 			syncookies_tests
 			;;
+		S)
+			checksum_tests
+			;;
 		c)
 			;;
+		C)
+			;;
 		h | *)
 			usage
 			;;

From fefed8af5ed40b861ee4d95c3e32804e7a33df96 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 18 Jun 2021 10:32:18 +0800
Subject: [PATCH 2188/3292] net: hostess_sv11: fix the code style issue about
 "foo* bar"

Fix the checkpatch error as "foo* bar" should be "foo *bar".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hostess_sv11.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 6c05c4c8914a..a18c09dc335c 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -48,7 +48,7 @@ static int dma;
  *	Network driver support routines
  */
 
-static inline struct z8530_dev* dev_to_sv(struct net_device *dev)
+static inline struct z8530_dev *dev_to_sv(struct net_device *dev)
 {
 	return (struct z8530_dev *)dev_to_hdlc(dev)->priv;
 }

From fe9be8daef8a7e6bd539fa57a9e14c6b4f8261f7 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 18 Jun 2021 10:32:19 +0800
Subject: [PATCH 2189/3292] net: hostess_sv11: move out assignment in if
 condition

Should not use assignment in if condition.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hostess_sv11.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index a18c09dc335c..8dce8b190420 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -340,7 +340,8 @@ static struct z8530_dev *sv11_unit;
 
 int init_module(void)
 {
-	if ((sv11_unit = sv11_init(io, irq)) == NULL)
+	sv11_unit = sv11_init(io, irq);
+	if (!sv11_unit)
 		return -ENODEV;
 	return 0;
 }

From 534f76d46245a18f82e008cdf449ad2408d07760 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 18 Jun 2021 10:32:20 +0800
Subject: [PATCH 2190/3292] net: hostess_sv11: remove trailing whitespace

This patch removes trailing whitespace.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hostess_sv11.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 8dce8b190420..8914cdd833c4 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -9,7 +9,7 @@
  *
  *	It supports DMA using two DMA channels in SYNC mode. The driver doesn't
  *	use these facilities
- *	
+ *
  *	The control port is at io+1, the data at io+3 and turning off the DMA
  *	is done by writing 0 to io+4
  *

From 9562aef3c0c38a8577f1d3c6f80a496e3f4f686d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 18 Jun 2021 10:32:21 +0800
Subject: [PATCH 2191/3292] net: hostess_sv11: fix the code style issue about
 switch and case

According to the chackpatch.pl,
switch and case should be at the same indent.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hostess_sv11.c | 54 +++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 8914cdd833c4..7a59d7e5d62e 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -85,15 +85,15 @@ static int hostess_open(struct net_device *d)
 	 *	Link layer up
 	 */
 	switch (dma) {
-		case 0:
-			err = z8530_sync_open(d, &sv11->chanA);
-			break;
-		case 1:
-			err = z8530_sync_dma_open(d, &sv11->chanA);
-			break;
-		case 2:
-			err = z8530_sync_txdma_open(d, &sv11->chanA);
-			break;
+	case 0:
+		err = z8530_sync_open(d, &sv11->chanA);
+		break;
+	case 1:
+		err = z8530_sync_dma_open(d, &sv11->chanA);
+		break;
+	case 2:
+		err = z8530_sync_txdma_open(d, &sv11->chanA);
+		break;
 	}
 
 	if (err)
@@ -102,15 +102,15 @@ static int hostess_open(struct net_device *d)
 	err = hdlc_open(d);
 	if (err) {
 		switch (dma) {
-			case 0:
-				z8530_sync_close(d, &sv11->chanA);
-				break;
-			case 1:
-				z8530_sync_dma_close(d, &sv11->chanA);
-				break;
-			case 2:
-				z8530_sync_txdma_close(d, &sv11->chanA);
-				break;
+		case 0:
+			z8530_sync_close(d, &sv11->chanA);
+			break;
+		case 1:
+			z8530_sync_dma_close(d, &sv11->chanA);
+			break;
+		case 2:
+			z8530_sync_txdma_close(d, &sv11->chanA);
+			break;
 		}
 		return err;
 	}
@@ -136,15 +136,15 @@ static int hostess_close(struct net_device *d)
 	netif_stop_queue(d);
 
 	switch (dma) {
-		case 0:
-			z8530_sync_close(d, &sv11->chanA);
-			break;
-		case 1:
-			z8530_sync_dma_close(d, &sv11->chanA);
-			break;
-		case 2:
-			z8530_sync_txdma_close(d, &sv11->chanA);
-			break;
+	case 0:
+		z8530_sync_close(d, &sv11->chanA);
+		break;
+	case 1:
+		z8530_sync_dma_close(d, &sv11->chanA);
+		break;
+	case 2:
+		z8530_sync_txdma_close(d, &sv11->chanA);
+		break;
 	}
 	return 0;
 }

From d25a944693c7949842bbba3597481d7fd264e3ed Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 18 Jun 2021 10:32:22 +0800
Subject: [PATCH 2192/3292] net: hostess_sv11: remove dead code

This patch removes the dead code.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hostess_sv11.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 7a59d7e5d62e..4e11c86f97dd 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -151,8 +151,6 @@ static int hostess_close(struct net_device *d)
 
 static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
 {
-	/* struct z8530_dev *sv11=dev_to_sv(d);
-	   z8530_ioctl(d,&sv11->chanA,ifr,cmd) */
 	return hdlc_ioctl(d, ifr, cmd);
 }
 

From 67c1876897da8445f78fe6ca665a4b56f447c6d1 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 18 Jun 2021 10:32:23 +0800
Subject: [PATCH 2193/3292] net: hostess_sv11: fix the comments style issue

Networking block comments don't use an empty /* line,
use /* Comment...

Block comments use * on subsequent lines.
Block comments use a trailing */ on a separate line.

This patch fixes the comments style issues.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hostess_sv11.c | 48 +++++++++++++---------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 4e11c86f97dd..992181ad2cad 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- *	Comtrol SV11 card driver
+/*	Comtrol SV11 card driver
  *
  *	This is a slightly odd Z85230 synchronous driver. All you need to
  *	know basically is
@@ -44,8 +43,7 @@
 
 static int dma;
 
-/*
- *	Network driver support routines
+/*	Network driver support routines
  */
 
 static inline struct z8530_dev *dev_to_sv(struct net_device *dev)
@@ -53,8 +51,7 @@ static inline struct z8530_dev *dev_to_sv(struct net_device *dev)
 	return (struct z8530_dev *)dev_to_hdlc(dev)->priv;
 }
 
-/*
- *	Frame receive. Simple for our card as we do HDLC and there
+/*	Frame receive. Simple for our card as we do HDLC and there
  *	is no funny garbage involved
  */
 
@@ -65,15 +62,13 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
 	skb->protocol = hdlc_type_trans(skb, c->netdevice);
 	skb_reset_mac_header(skb);
 	skb->dev = c->netdevice;
-	/*
-	 *	Send it to the PPP layer. We don't have time to process
+	/*	Send it to the PPP layer. We don't have time to process
 	 *	it right now.
 	 */
 	netif_rx(skb);
 }
 
-/*
- *	We've been placed in the UP state
+/*	We've been placed in the UP state
  */
 
 static int hostess_open(struct net_device *d)
@@ -81,8 +76,7 @@ static int hostess_open(struct net_device *d)
 	struct z8530_dev *sv11 = dev_to_sv(d);
 	int err = -1;
 
-	/*
-	 *	Link layer up
+	/*	Link layer up
 	 */
 	switch (dma) {
 	case 0:
@@ -127,8 +121,7 @@ static int hostess_open(struct net_device *d)
 static int hostess_close(struct net_device *d)
 {
 	struct z8530_dev *sv11 = dev_to_sv(d);
-	/*
-	 *	Discard new frames
+	/*	Discard new frames
 	 */
 	sv11->chanA.rx_function = z8530_null_rx;
 
@@ -154,8 +147,7 @@ static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
 	return hdlc_ioctl(d, ifr, cmd);
 }
 
-/*
- *	Passed network frames, fire them downwind.
+/*	Passed network frames, fire them downwind.
  */
 
 static netdev_tx_t hostess_queue_xmit(struct sk_buff *skb,
@@ -172,8 +164,7 @@ static int hostess_attach(struct net_device *dev, unsigned short encoding,
 	return -EINVAL;
 }
 
-/*
- *	Description block for a Comtrol Hostess SV11 card
+/*	Description block for a Comtrol Hostess SV11 card
  */
 
 static const struct net_device_ops hostess_ops = {
@@ -187,8 +178,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
 {
 	struct z8530_dev *sv;
 	struct net_device *netdev;
-	/*
-	 *	Get the needed I/O space
+	/*	Get the needed I/O space
 	 */
 
 	if (!request_region(iobase, 8, "Comtrol SV11")) {
@@ -200,8 +190,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
 	if (!sv)
 		goto err_kzalloc;
 
-	/*
-	 *	Stuff in the I/O addressing
+	/*	Stuff in the I/O addressing
 	 */
 
 	sv->active = 0;
@@ -216,7 +205,8 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
 	outb(0, iobase + 4);		/* DMA off */
 
 	/* We want a fast IRQ for this device. Actually we'd like an even faster
-	   IRQ ;) - This is one driver RtLinux is made for */
+	 * IRQ ;) - This is one driver RtLinux is made for
+	 */
 
 	if (request_irq(irq, z8530_interrupt, 0,
 			"Hostess SV11", sv) < 0) {
@@ -230,8 +220,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
 	sv->chanB.dev = sv;
 
 	if (dma) {
-		/*
-		 *	You can have DMA off or 1 and 3 thats the lot
+		/*	You can have DMA off or 1 and 3 thats the lot
 		 *	on the Comtrol.
 		 */
 		sv->chanA.txdma = 3;
@@ -246,11 +235,11 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
 	}
 
 	/* Kill our private IRQ line the hostess can end up chattering
-	   until the configuration is set */
+	 * until the configuration is set
+	 */
 	disable_irq(irq);
 
-	/*
-	 *	Begin normal initialise
+	/*	Begin normal initialise
 	 */
 
 	if (z8530_init(sv)) {
@@ -266,8 +255,7 @@ static struct z8530_dev *sv11_init(int iobase, int irq)
 
 	enable_irq(irq);
 
-	/*
-	 *	Now we can take the IRQ
+	/*	Now we can take the IRQ
 	 */
 
 	sv->chanA.netdevice = netdev = alloc_hdlcdev(sv);

From 7d40bfc1933efbbd65762b0bcb63287c07125370 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 18 Jun 2021 10:32:24 +0800
Subject: [PATCH 2194/3292] net: hostess_sv11: fix the alignment issue

Alignment should match open parenthesis.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/hostess_sv11.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 992181ad2cad..fd61a7cc4fdf 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -151,7 +151,7 @@ static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
  */
 
 static netdev_tx_t hostess_queue_xmit(struct sk_buff *skb,
-					    struct net_device *d)
+				      struct net_device *d)
 {
 	return z8530_queue_xmit(&dev_to_sv(d)->chanA, skb);
 }

From 30bba69d7db40e732d6c0aa6d4890c60d717e314 Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Fri, 18 Jun 2021 10:53:34 +0800
Subject: [PATCH 2195/3292] stmmac: pci: Add dwmac support for Loongson

This GMAC module is integrated into the Loongson-2K SoC and the LS7A
bridge chip.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig   |   9 +
 drivers/net/ethernet/stmicro/stmmac/Makefile  |   1 +
 .../ethernet/stmicro/stmmac/dwmac-loongson.c  | 218 ++++++++++++++++++
 3 files changed, 228 insertions(+)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 9a19e4d9da02..ac3c248d4f9b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -250,6 +250,15 @@ config DWMAC_INTEL
 	  This selects the Intel platform specific bus support for the
 	  stmmac driver. This driver is used for Intel Quark/EHL/TGL.
 
+config DWMAC_LOONGSON
+	tristate "Loongson PCI DWMAC support"
+	default MACH_LOONGSON64
+	depends on STMMAC_ETH && PCI
+	depends on COMMON_CLK
+	help
+	  This selects the LOONGSON PCI bus support for the stmmac driver,
+	  Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge.
+
 config STMMAC_PCI
 	tristate "STMMAC PCI bus support"
 	depends on STMMAC_ETH && PCI
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 6471f93889ee..d4e12e9ace4f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -37,4 +37,5 @@ dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
 
 obj-$(CONFIG_STMMAC_PCI)	+= stmmac-pci.o
 obj-$(CONFIG_DWMAC_INTEL)	+= dwmac-intel.o
+obj-$(CONFIG_DWMAC_LOONGSON)	+= dwmac-loongson.o
 stmmac-pci-objs:= stmmac_pci.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
new file mode 100644
index 000000000000..8cd4e2e8ec40
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Loongson Corporation
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/pci.h>
+#include <linux/dmi.h>
+#include <linux/device.h>
+#include <linux/of_irq.h>
+#include "stmmac.h"
+
+static int loongson_default_data(struct plat_stmmacenet_data *plat)
+{
+	plat->clk_csr = 2;	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
+	plat->has_gmac = 1;
+	plat->force_sf_dma_mode = 1;
+
+	/* Set default value for multicast hash bins */
+	plat->multicast_filter_bins = HASH_TABLE_SIZE;
+
+	/* Set default value for unicast filter entries */
+	plat->unicast_filter_entries = 1;
+
+	/* Set the maxmtu to a default of JUMBO_LEN */
+	plat->maxmtu = JUMBO_LEN;
+
+	/* Set default number of RX and TX queues to use */
+	plat->tx_queues_to_use = 1;
+	plat->rx_queues_to_use = 1;
+
+	/* Disable Priority config by default */
+	plat->tx_queues_cfg[0].use_prio = false;
+	plat->rx_queues_cfg[0].use_prio = false;
+
+	/* Disable RX queues routing by default */
+	plat->rx_queues_cfg[0].pkt_route = 0x0;
+
+	/* Default to phy auto-detection */
+	plat->phy_addr = -1;
+
+	plat->dma_cfg->pbl = 32;
+	plat->dma_cfg->pblx8 = true;
+
+	plat->multicast_filter_bins = 256;
+	return 0;
+}
+
+static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct plat_stmmacenet_data *plat;
+	struct stmmac_resources res;
+	int ret, i, mdio;
+	struct device_node *np;
+
+	np = dev_of_node(&pdev->dev);
+
+	if (!np) {
+		pr_info("dwmac_loongson_pci: No OF node\n");
+		return -ENODEV;
+	}
+
+	if (!of_device_is_compatible(np, "loongson, pci-gmac")) {
+		pr_info("dwmac_loongson_pci: Incompatible OF node\n");
+		return -ENODEV;
+	}
+
+	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
+	if (!plat)
+		return -ENOMEM;
+
+	if (plat->mdio_node) {
+		dev_err(&pdev->dev, "Found MDIO subnode\n");
+		mdio = true;
+	}
+
+	if (mdio) {
+		plat->mdio_bus_data = devm_kzalloc(&pdev->dev,
+						   sizeof(*plat->mdio_bus_data),
+						   GFP_KERNEL);
+		if (!plat->mdio_bus_data)
+			return -ENOMEM;
+		plat->mdio_bus_data->needs_reset = true;
+	}
+
+	plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL);
+	if (!plat->dma_cfg)
+		return -ENOMEM;
+
+	/* Enable pci device */
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__);
+		return ret;
+	}
+
+	/* Get the base address of device */
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (pci_resource_len(pdev, i) == 0)
+			continue;
+		ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+		if (ret)
+			return ret;
+		break;
+	}
+
+	plat->bus_id = of_alias_get_id(np, "ethernet");
+	if (plat->bus_id < 0)
+		plat->bus_id = pci_dev_id(pdev);
+
+	plat->phy_interface = device_get_phy_mode(&pdev->dev);
+	if (plat->phy_interface < 0)
+		dev_err(&pdev->dev, "phy_mode not found\n");
+
+	plat->interface = PHY_INTERFACE_MODE_GMII;
+
+	pci_set_master(pdev);
+
+	loongson_default_data(plat);
+	pci_enable_msi(pdev);
+	memset(&res, 0, sizeof(res));
+	res.addr = pcim_iomap_table(pdev)[0];
+
+	res.irq = of_irq_get_byname(np, "macirq");
+	if (res.irq < 0) {
+		dev_err(&pdev->dev, "IRQ macirq not found\n");
+		ret = -ENODEV;
+	}
+
+	res.wol_irq = of_irq_get_byname(np, "eth_wake_irq");
+	if (res.wol_irq < 0) {
+		dev_info(&pdev->dev, "IRQ eth_wake_irq not found, using macirq\n");
+		res.wol_irq = res.irq;
+	}
+
+	res.lpi_irq = of_irq_get_byname(np, "eth_lpi");
+	if (res.lpi_irq < 0) {
+		dev_err(&pdev->dev, "IRQ eth_lpi not found\n");
+		ret = -ENODEV;
+	}
+
+	return stmmac_dvr_probe(&pdev->dev, plat, &res);
+}
+
+static void loongson_dwmac_remove(struct pci_dev *pdev)
+{
+	int i;
+
+	stmmac_dvr_remove(&pdev->dev);
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (pci_resource_len(pdev, i) == 0)
+			continue;
+		pcim_iounmap_regions(pdev, BIT(i));
+		break;
+	}
+
+	pci_disable_device(pdev);
+}
+
+static int __maybe_unused loongson_dwmac_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = stmmac_suspend(dev);
+	if (ret)
+		return ret;
+
+	ret = pci_save_state(pdev);
+	if (ret)
+		return ret;
+
+	pci_disable_device(pdev);
+	pci_wake_from_d3(pdev, true);
+	return 0;
+}
+
+static int __maybe_unused loongson_dwmac_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	pci_restore_state(pdev);
+	pci_set_power_state(pdev, PCI_D0);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	pci_set_master(pdev);
+
+	return stmmac_resume(dev);
+}
+
+static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend,
+			 loongson_dwmac_resume);
+
+static const struct pci_device_id loongson_dwmac_id_table[] = {
+	{ PCI_VDEVICE(LOONGSON, 0x7a03) },
+	{}
+};
+MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table);
+
+struct pci_driver loongson_dwmac_driver = {
+	.name = "dwmac-loongson-pci",
+	.id_table = loongson_dwmac_id_table,
+	.probe = loongson_dwmac_probe,
+	.remove = loongson_dwmac_remove,
+	.driver = {
+		.pm = &loongson_dwmac_pm_ops,
+	},
+};
+
+module_pci_driver(loongson_dwmac_driver);
+
+MODULE_DESCRIPTION("Loongson DWMAC PCI driver");
+MODULE_AUTHOR("Qing Zhang <zhangqing@loongson.cn>");
+MODULE_LICENSE("GPL v2");

From f8a11425075ff11b4b5784f077cb84f3d2dfb3f0 Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Fri, 18 Jun 2021 10:53:35 +0800
Subject: [PATCH 2196/3292] MIPS: Loongson64: Add GMAC support for
 Loongson-2K1000

The GMAC module is now supported, enable it.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../boot/dts/loongson/loongson64-2k1000.dtsi  | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
index 569e814def83..5747f171de29 100644
--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
+++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi
@@ -114,6 +114,52 @@
 			ranges = <0x01000000 0x0 0x00000000 0x0 0x18000000  0x0 0x00010000>,
 				 <0x02000000 0x0 0x40000000 0x0 0x40000000  0x0 0x40000000>;
 
+			gmac@3,0 {
+				compatible = "pci0014,7a03.0",
+						   "pci0014,7a03",
+						   "pciclass0c0320",
+						   "pciclass0c03",
+						   "loongson, pci-gmac";
+
+				reg = <0x1800 0x0 0x0 0x0 0x0>;
+				interrupts = <12 IRQ_TYPE_LEVEL_LOW>,
+					     <13 IRQ_TYPE_LEVEL_LOW>;
+				interrupt-names = "macirq", "eth_lpi";
+				interrupt-parent = <&liointc0>;
+				phy-mode = "rgmii";
+				mdio {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					compatible = "snps,dwmac-mdio";
+					phy0: ethernet-phy@0 {
+						reg = <0>;
+					};
+				};
+			};
+
+			gmac@3,1 {
+				compatible = "pci0014,7a03.0",
+						   "pci0014,7a03",
+						   "pciclass0c0320",
+						   "pciclass0c03",
+						   "loongson, pci-gmac";
+
+				reg = <0x1900 0x0 0x0 0x0 0x0>;
+				interrupts = <14 IRQ_TYPE_LEVEL_LOW>,
+					     <15 IRQ_TYPE_LEVEL_LOW>;
+				interrupt-names = "macirq", "eth_lpi";
+				interrupt-parent = <&liointc0>;
+				phy-mode = "rgmii";
+				mdio {
+					#address-cells = <1>;
+					#size-cells = <0>;
+					compatible = "snps,dwmac-mdio";
+					phy1: ethernet-phy@1 {
+						reg = <0>;
+					};
+				};
+			};
+
 			ehci@4,1 {
 				compatible = "pci0014,7a14.0",
 						   "pci0014,7a14",

From 47a311c31a51e44130c92dc11f9f0d7c8c1f9be0 Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Fri, 18 Jun 2021 10:53:36 +0800
Subject: [PATCH 2197/3292] MIPS: Loongson64: DTS: Add GMAC support for LS7A
 PCH

The GMAC module is now supported, enable it.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/boot/dts/loongson/ls7a-pch.dtsi | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
index f99a7a11fded..58b9bb47c58a 100644
--- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
+++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi
@@ -186,7 +186,8 @@
 				compatible = "pci0014,7a03.0",
 						   "pci0014,7a03",
 						   "pciclass020000",
-						   "pciclass0200";
+						   "pciclass0200",
+						   "loongson, pci-gmac";
 
 				reg = <0x1800 0x0 0x0 0x0 0x0>;
 				interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
@@ -208,7 +209,8 @@
 				compatible = "pci0014,7a03.0",
 						   "pci0014,7a03",
 						   "pciclass020000",
-						   "pciclass0200";
+						   "pciclass0200",
+						   "loongson, pci-gmac";
 
 				reg = <0x1900 0x0 0x0 0x0 0x0>;
 				interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,

From 68277749a0133fa6f9f5ec8576691e5fc9718610 Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Fri, 18 Jun 2021 10:53:37 +0800
Subject: [PATCH 2198/3292] dt-bindings: dwmac: Add bindings for new Loongson
 SoC and bridge chip

Add the dwmac bindings for the Loongson-2K SoC and the LS7A
bridge chip.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 9c0ce92e9212..56f2235f5fb5 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -51,11 +51,15 @@ properties:
         - allwinner,sun8i-r40-emac
         - allwinner,sun8i-v3s-emac
         - allwinner,sun50i-a64-emac
+        - loongson,ls2k-dwmac
+        - loongson,ls7a-dwmac
         - amlogic,meson6-dwmac
         - amlogic,meson8b-dwmac
         - amlogic,meson8m2-dwmac
         - amlogic,meson-gxbb-dwmac
         - amlogic,meson-axg-dwmac
+        - loongson,ls2k-dwmac
+        - loongson,ls7a-dwmac
         - ingenic,jz4775-mac
         - ingenic,x1000-mac
         - ingenic,x1600-mac
@@ -363,6 +367,8 @@ allOf:
               - allwinner,sun8i-r40-emac
               - allwinner,sun8i-v3s-emac
               - allwinner,sun50i-a64-emac
+              - loongson,ls2k-dwmac
+              - loongson,ls7a-dwmac
               - ingenic,jz4775-mac
               - ingenic,x1000-mac
               - ingenic,x1600-mac

From 9d72b8da9f13349be11914823d7bd8186c6a91ce Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 17 Jun 2021 21:55:56 -0700
Subject: [PATCH 2199/3292] net: vlan: pass thru all GSO_SOFTWARE in
 hw_enc_features

Currently UDP tunnel devices on top of VLANs lose the ability
to offload UDP GSO. Widen the pass thru features from TSO
to all GSO_SOFTWARE.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index e3f6ff05a528..1a705a4ef7fa 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -108,7 +108,8 @@ static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev)
 	netdev_features_t ret;
 
 	ret = real_dev->hw_enc_features &
-	      (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL);
+	      (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE |
+	       NETIF_F_GSO_ENCAP_ALL);
 
 	if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK))
 		return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM;

From 9fd2bc3206b31c8ff6d54d643730d4c3470471d6 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Fri, 18 Jun 2021 15:32:04 +0800
Subject: [PATCH 2200/3292] net: caif: modify the label out_err to out

Modify the label out_err to out to avoid the meanless kfree.

Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cac30e676ac9..23267c8db7c4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -480,7 +480,7 @@ got_phyid:
 	phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
 	if (!phyinfo) {
 		res = -ENOMEM;
-		goto out_err;
+		goto out;
 	}
 
 	phy_layer->id = phyid;

From e44dc724826cc26bd5406eab156d2f633bb44d8d Mon Sep 17 00:00:00 2001
From: dingsenjie <dingsenjie@yulong.com>
Date: Fri, 18 Jun 2021 15:34:31 +0800
Subject: [PATCH 2201/3292] ethernet: marvell/octeontx2: Simplify the return
 expression of npc_is_same

Simplify the return expression in the rvu_npc_fs.c

Signed-off-by: dingsenjie <dingsenjie@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 87d7c6ab047f..68633145a8b8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -123,11 +123,8 @@ static bool npc_is_field_present(struct rvu *rvu, enum key_fields type, u8 intf)
 static bool npc_is_same(struct npc_key_field *input,
 			struct npc_key_field *field)
 {
-	int ret;
-
-	ret = memcmp(&input->layer_mdata, &field->layer_mdata,
-		     sizeof(struct npc_layer_mdata));
-	return ret == 0;
+	return memcmp(&input->layer_mdata, &field->layer_mdata,
+		     sizeof(struct npc_layer_mdata)) == 0;
 }
 
 static void npc_set_layer_mdata(struct npc_mcam *mcam, enum key_fields type,

From 96a19319921ceb4b2f4c49d1b9bf9de1161e30ca Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Fri, 18 Jun 2021 17:10:16 +0800
Subject: [PATCH 2202/3292] NFC: nxp-nci: remove unnecessary labels

Simplify the code by removing unnecessary labels and returning directly.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/nxp-nci/core.c | 39 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c
index a0ce95a287c5..2b0c7232e91f 100644
--- a/drivers/nfc/nxp-nci/core.c
+++ b/drivers/nfc/nxp-nci/core.c
@@ -70,21 +70,16 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
 	struct nxp_nci_info *info = nci_get_drvdata(ndev);
 	int r;
 
-	if (!info->phy_ops->write) {
-		r = -ENOTSUPP;
-		goto send_exit;
-	}
+	if (!info->phy_ops->write)
+		return -EOPNOTSUPP;
 
-	if (info->mode != NXP_NCI_MODE_NCI) {
-		r = -EINVAL;
-		goto send_exit;
-	}
+	if (info->mode != NXP_NCI_MODE_NCI)
+		return -EINVAL;
 
 	r = info->phy_ops->write(info->phy_id, skb);
 	if (r < 0)
 		kfree_skb(skb);
 
-send_exit:
 	return r;
 }
 
@@ -104,10 +99,8 @@ int nxp_nci_probe(void *phy_id, struct device *pdev,
 	int r;
 
 	info = devm_kzalloc(pdev, sizeof(struct nxp_nci_info), GFP_KERNEL);
-	if (!info) {
-		r = -ENOMEM;
-		goto probe_exit;
-	}
+	if (!info)
+		return -ENOMEM;
 
 	info->phy_id = phy_id;
 	info->pdev = pdev;
@@ -120,31 +113,25 @@ int nxp_nci_probe(void *phy_id, struct device *pdev,
 	if (info->phy_ops->set_mode) {
 		r = info->phy_ops->set_mode(info->phy_id, NXP_NCI_MODE_COLD);
 		if (r < 0)
-			goto probe_exit;
+			return r;
 	}
 
 	info->mode = NXP_NCI_MODE_COLD;
 
 	info->ndev = nci_allocate_device(&nxp_nci_ops, NXP_NCI_NFC_PROTOCOLS,
 					 NXP_NCI_HDR_LEN, 0);
-	if (!info->ndev) {
-		r = -ENOMEM;
-		goto probe_exit;
-	}
+	if (!info->ndev)
+		return -ENOMEM;
 
 	nci_set_parent_dev(info->ndev, pdev);
 	nci_set_drvdata(info->ndev, info);
 	r = nci_register_device(info->ndev);
-	if (r < 0)
-		goto probe_exit_free_nci;
+	if (r < 0) {
+		nci_free_device(info->ndev);
+		return r;
+	}
 
 	*ndev = info->ndev;
-
-	goto probe_exit;
-
-probe_exit_free_nci:
-	nci_free_device(info->ndev);
-probe_exit:
 	return r;
 }
 EXPORT_SYMBOL(nxp_nci_probe);

From bd70957438f0cc4879cbdff8bbc8614bc1cddf49 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@denx.de>
Date: Fri, 18 Jun 2021 11:35:26 +0200
Subject: [PATCH 2203/3292] net: pxa168_eth: Fix a potential data race in
 pxa168_eth_remove

Commit 0571a753cb07 cancelled delayed work too late, keeping small
race. Cancel work sooner to close it completely.

Signed-off-by: Pavel Machek (CIP) <pavel@denx.de>
Fixes: 0571a753cb07 ("net: pxa168_eth: Fix a potential data race in pxa168_eth_remove")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/pxa168_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index e967867828d8..9b48ae4bac39 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1528,6 +1528,7 @@ static int pxa168_eth_remove(struct platform_device *pdev)
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 
+	cancel_work_sync(&pep->tx_timeout_task);
 	if (pep->htpr) {
 		dma_free_coherent(pep->dev->dev.parent, HASH_ADDR_TABLE_SIZE,
 				  pep->htpr, pep->htpr_dma);
@@ -1539,7 +1540,6 @@ static int pxa168_eth_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pep->clk);
 	mdiobus_unregister(pep->smi_bus);
 	mdiobus_free(pep->smi_bus);
-	cancel_work_sync(&pep->tx_timeout_task);
 	unregister_netdev(dev);
 	free_netdev(dev);
 	return 0;

From c44924c532fb9bb80b48d141a0f8391e9c280112 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 18 Jun 2021 10:44:25 +0100
Subject: [PATCH 2204/3292] net: stmmac: remove redundant continue statement

The continue statement in the for-loop has no effect, remove it.

Addresses-Coverity: ("Continue has no effect")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 4e70efc45458..92dab609d4f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -573,10 +573,8 @@ static int tc_add_flow(struct stmmac_priv *priv,
 
 	for (i = 0; i < ARRAY_SIZE(tc_flow_parsers); i++) {
 		ret = tc_flow_parsers[i].fn(priv, cls, entry);
-		if (!ret) {
+		if (!ret)
 			entry->in_use = true;
-			continue;
-		}
 	}
 
 	if (!entry->in_use)

From 040c12570e6865b1a219c9d7f7f4a924a6570d1e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 18 Jun 2021 11:01:55 +0100
Subject: [PATCH 2205/3292] net: bridge: remove redundant continue statement

The continue statement at the end of a for-loop has no effect,
invert the if expression and remove the continue.

Addresses-Coverity: ("Continue has no effect")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index da3256a3eed0..8789a57af543 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -113,9 +113,7 @@ static void __vlan_add_list(struct net_bridge_vlan *v)
 	headp = &vg->vlan_list;
 	list_for_each_prev(hpos, headp) {
 		vent = list_entry(hpos, struct net_bridge_vlan, vlist);
-		if (v->vid < vent->vid)
-			continue;
-		else
+		if (v->vid >= vent->vid)
 			break;
 	}
 	list_add_rcu(&v->vlist, hpos);

From 60ae9f883138f27021c2eafed9a6f22d833f1436 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 18 Jun 2021 11:19:19 +0100
Subject: [PATCH 2206/3292] qlcnic: remove redundant continue statement

The continue statement at the end of a for-loop has no effect,
it is redundant and can be removed.

Addresses-Coverity: ("Continue has no effect")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 8a31ce29ecfc..14282472c7a6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -3341,9 +3341,6 @@ qlcnic_can_start_firmware(struct qlcnic_adapter *adapter)
 	do {
 		msleep(1000);
 		prev_state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
-
-		if (prev_state == QLCNIC_DEV_QUISCENT)
-			continue;
 	} while ((prev_state != QLCNIC_DEV_READY) && --dev_init_timeo);
 
 	if (!dev_init_timeo) {

From cb5a82d2b9aaca66ed74c424c9d79f0a5bfdbac4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 18 Jun 2021 14:52:54 +0300
Subject: [PATCH 2207/3292] net: dsa: sja1105: properly power down the
 microcontroller clock for SJA1110

It turns out that powering down the BASE_TIMER_CLK does not turn off the
microcontroller, just its timers, including the one for the watchdog.
So the embedded microcontroller is still running, and potentially still
doing things.

To prevent unwanted interference, we should power down the BASE_MCSS_CLK
as well (MCSS = microcontroller subsystem).

The trouble is that currently we turn off the BASE_TIMER_CLK for SJA1110
from the .clocking_setup() method, mostly because this is a Clock
Generation Unit (CGU) setting which was traditionally configured in that
method for SJA1105. But in SJA1105, the CGU was used for bringing up the
port clocks at the proper speeds, and in SJA1110 it's not (but rather
for initial configuration), so it's best that we rebrand the
sja1110_clocking_setup() method into what it really is - an implementation
of the .disable_microcontroller() method.

Since disabling the microcontroller only needs to be done once, at probe
time, we can choose the best place to do that as being in sja1105_setup(),
before we upload the static config to the device. This guarantees that
the static config being used by the switch afterwards is really ours.

Note that the procedure to upload a static config necessarily resets the
switch. This already did not reset the microcontroller, only the switch
core, so since the .disable_microcontroller() method is guaranteed to be
called by that point, if it's disabled, it remains disabled. Add a
comment to make that clear.

With the code movement for SJA1110 from .clocking_setup() to
.disable_microcontroller(), both methods are optional and are guarded by
"if" conditions.

Tested by enabling in the device tree the rev-mii switch port 0 that
goes towards the microcontroller, and flashing a firmware that would
have networking. Without this patch, the microcontroller can be pinged,
with this patch it cannot.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h          |  3 +-
 drivers/net/dsa/sja1105/sja1105_clocking.c | 20 ++++++++++++--
 drivers/net/dsa/sja1105/sja1105_main.c     | 32 +++++++++++++++++-----
 drivers/net/dsa/sja1105/sja1105_spi.c      | 14 ++++++----
 4 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 39124726bdd9..221c7abdef0e 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -136,6 +136,7 @@ struct sja1105_info {
 	int (*clocking_setup)(struct sja1105_private *priv);
 	int (*pcs_mdio_read)(struct mii_bus *bus, int phy, int reg);
 	int (*pcs_mdio_write)(struct mii_bus *bus, int phy, int reg, u16 val);
+	int (*disable_microcontroller)(struct sja1105_private *priv);
 	const char *name;
 	bool supports_mii[SJA1105_MAX_NUM_PORTS];
 	bool supports_rmii[SJA1105_MAX_NUM_PORTS];
@@ -363,7 +364,7 @@ int sja1105pqrs_setup_rgmii_delay(const void *ctx, int port);
 int sja1110_setup_rgmii_delay(const void *ctx, int port);
 int sja1105_clocking_setup_port(struct sja1105_private *priv, int port);
 int sja1105_clocking_setup(struct sja1105_private *priv);
-int sja1110_clocking_setup(struct sja1105_private *priv);
+int sja1110_disable_microcontroller(struct sja1105_private *priv);
 
 /* From sja1105_ethtool.c */
 void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data);
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 645edea5a81f..387a1f2f161c 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -6,6 +6,7 @@
 #include "sja1105.h"
 
 #define SJA1105_SIZE_CGU_CMD	4
+#define SJA1110_BASE_MCSS_CLK	SJA1110_CGU_ADDR(0x70)
 #define SJA1110_BASE_TIMER_CLK	SJA1110_CGU_ADDR(0x74)
 
 /* Common structure for CFG_PAD_MIIx_RX and CFG_PAD_MIIx_TX */
@@ -832,17 +833,30 @@ sja1110_cgu_outclk_packing(void *buf, struct sja1110_cgu_outclk *outclk,
 	sja1105_packing(buf, &outclk->pd,         0,  0, size, op);
 }
 
-/* Power down the BASE_TIMER_CLK in order to disable the watchdog */
-int sja1110_clocking_setup(struct sja1105_private *priv)
+int sja1110_disable_microcontroller(struct sja1105_private *priv)
 {
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
+	struct sja1110_cgu_outclk outclk_6_c = {
+		.clksrc = 0x3,
+		.pd = true,
+	};
 	struct sja1110_cgu_outclk outclk_7_c = {
 		.clksrc = 0x5,
 		.pd = true,
 	};
+	int rc;
 
+	/* Power down the BASE_TIMER_CLK to disable the watchdog timer */
 	sja1110_cgu_outclk_packing(packed_buf, &outclk_7_c, PACK);
 
-	return sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_TIMER_CLK,
+	rc = sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_TIMER_CLK,
+			      packed_buf, SJA1105_SIZE_CGU_CMD);
+	if (rc)
+		return rc;
+
+	/* Power down the BASE_MCSS_CLOCK to gate the microcontroller off */
+	sja1110_cgu_outclk_packing(packed_buf, &outclk_6_c, PACK);
+
+	return sja1105_xfer_buf(priv, SPI_WRITE, SJA1110_BASE_MCSS_CLK,
 				packed_buf, SJA1105_SIZE_CGU_CMD);
 }
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 8e5cdf93c23b..57ccd4548911 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1922,9 +1922,11 @@ out_unlock_ptp:
 	 * For these interfaces there is no dynamic configuration
 	 * needed, since PLLs have same settings at all speeds.
 	 */
-	rc = priv->info->clocking_setup(priv);
-	if (rc < 0)
-		goto out;
+	if (priv->info->clocking_setup) {
+		rc = priv->info->clocking_setup(priv);
+		if (rc < 0)
+			goto out;
+	}
 
 	for (i = 0; i < ds->num_ports; i++) {
 		struct dw_xpcs *xpcs = priv->xpcs[i];
@@ -3032,18 +3034,34 @@ static int sja1105_setup(struct dsa_switch *ds)
 		goto out_ptp_clock_unregister;
 	}
 
+	if (priv->info->disable_microcontroller) {
+		rc = priv->info->disable_microcontroller(priv);
+		if (rc < 0) {
+			dev_err(ds->dev,
+				"Failed to disable microcontroller: %pe\n",
+				ERR_PTR(rc));
+			goto out_mdiobus_unregister;
+		}
+	}
+
 	/* Create and send configuration down to device */
 	rc = sja1105_static_config_load(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to load static config: %d\n", rc);
 		goto out_mdiobus_unregister;
 	}
+
 	/* Configure the CGU (PHY link modes and speeds) */
-	rc = priv->info->clocking_setup(priv);
-	if (rc < 0) {
-		dev_err(ds->dev, "Failed to configure MII clocking: %d\n", rc);
-		goto out_static_config_free;
+	if (priv->info->clocking_setup) {
+		rc = priv->info->clocking_setup(priv);
+		if (rc < 0) {
+			dev_err(ds->dev,
+				"Failed to configure MII clocking: %pe\n",
+				ERR_PTR(rc));
+			goto out_static_config_free;
+		}
 	}
+
 	/* On SJA1105, VLAN filtering per se is always enabled in hardware.
 	 * The only thing we can do to disable it is lie about what the 802.1Q
 	 * EtherType is.
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 4aed16d23f21..08cc5dbf2fa6 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -199,7 +199,11 @@ static int sja1110_reset_cmd(struct dsa_switch *ds)
 	const struct sja1105_regs *regs = priv->info->regs;
 	u32 switch_reset = BIT(20);
 
-	/* Switch core reset */
+	/* Only reset the switch core.
+	 * A full cold reset would re-enable the BASE_MCSS_CLOCK PLL which
+	 * would turn on the microcontroller, potentially letting it execute
+	 * code which could interfere with our configuration.
+	 */
 	return sja1105_xfer_u32(priv, SPI_WRITE, regs->rgu, &switch_reset, NULL);
 }
 
@@ -796,7 +800,7 @@ const struct sja1105_info sja1110a_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
-	.clocking_setup		= sja1110_clocking_setup,
+	.disable_microcontroller = sja1110_disable_microcontroller,
 	.pcs_mdio_read		= sja1110_pcs_mdio_read,
 	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {
@@ -847,7 +851,7 @@ const struct sja1105_info sja1110b_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
-	.clocking_setup		= sja1110_clocking_setup,
+	.disable_microcontroller = sja1110_disable_microcontroller,
 	.pcs_mdio_read		= sja1110_pcs_mdio_read,
 	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {
@@ -898,7 +902,7 @@ const struct sja1105_info sja1110c_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
-	.clocking_setup		= sja1110_clocking_setup,
+	.disable_microcontroller = sja1110_disable_microcontroller,
 	.pcs_mdio_read		= sja1110_pcs_mdio_read,
 	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {
@@ -949,7 +953,7 @@ const struct sja1105_info sja1110d_info = {
 	.ptp_cmd_packing	= sja1105pqrs_ptp_cmd_packing,
 	.rxtstamp		= sja1110_rxtstamp,
 	.txtstamp		= sja1110_txtstamp,
-	.clocking_setup		= sja1110_clocking_setup,
+	.disable_microcontroller = sja1110_disable_microcontroller,
 	.pcs_mdio_read		= sja1110_pcs_mdio_read,
 	.pcs_mdio_write		= sja1110_pcs_mdio_write,
 	.port_speed		= {

From 961045004b774aae7a244fa0435f8a6a2495c234 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 18 Jun 2021 20:09:45 +0800
Subject: [PATCH 2208/3292] net: hns3: fix reuse conflict of the rx page

In the current rx page reuse handling process, the rx page buffer may
have conflict between driver and stack in high-pressure scenario.

To fix this problem, we need to check whether the page is only owned
by driver at the begin and at the end of a page to make sure there is
no reuse conflict between driver and stack when desc_cb->page_offset
is rollbacked to zero or increased.

Fixes: fa7711b888f2 ("net: hns3: optimize the rx page reuse handling process")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 51bbf5f760c5..cdb5f14fb6bc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3537,21 +3537,33 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	int size = le16_to_cpu(desc->rx.size);
 	u32 truesize = hns3_buf_size(ring);
 	u32 frag_size = size - pull_len;
+	bool reused;
 
 	/* Avoid re-using remote or pfmem page */
 	if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
 		goto out;
 
-	/* Stack is not using and current page_offset is non-zero, we can
-	 * reuse from the zero offset.
+	reused = hns3_can_reuse_page(desc_cb);
+
+	/* Rx page can be reused when:
+	 * 1. Rx page is only owned by the driver when page_offset
+	 *    is zero, which means 0 @ truesize will be used by
+	 *    stack after skb_add_rx_frag() is called, and the rest
+	 *    of rx page can be reused by driver.
+	 * Or
+	 * 2. Rx page is only owned by the driver when page_offset
+	 *    is non-zero, which means page_offset @ truesize will
+	 *    be used by stack after skb_add_rx_frag() is called,
+	 *    and 0 @ truesize can be reused by driver.
 	 */
-	if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) {
-		desc_cb->page_offset = 0;
-		desc_cb->reuse_flag = 1;
-	} else if (desc_cb->page_offset + truesize * 2 <=
-		   hns3_page_size(ring)) {
+	if ((!desc_cb->page_offset && reused) ||
+	    ((desc_cb->page_offset + truesize + truesize) <=
+	     hns3_page_size(ring) && desc_cb->page_offset)) {
 		desc_cb->page_offset += truesize;
 		desc_cb->reuse_flag = 1;
+	} else if (desc_cb->page_offset && reused) {
+		desc_cb->page_offset = 0;
+		desc_cb->reuse_flag = 1;
 	} else if (frag_size <= ring->rx_copybreak) {
 		void *frag = napi_alloc_frag(frag_size);
 

From 1303e7f9b64f5da10d0f59dfda84aa014f968eae Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 18 Jun 2021 16:44:00 +0300
Subject: [PATCH 2209/3292] net: dsa: sja1105: allow the TTEthernet
 configuration in the static config for SJA1110

Currently sja1105_static_config_check_valid() is coded up to detect
whether TTEthernet is supported based on device ID, and this check was
not updated to cover SJA1110.

However, it is desirable to have as few checks for the device ID as
possible, so the driver core is more generic. So what we can do is look
at the static config table operations implemented by that specific
switch family (populated by sja1105_static_config_init) whether the
schedule table has a non-zero maximum entry count (meaning that it is
supported) or not.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_static_config.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 1491b72008f3..7a422ef4deb6 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -1052,8 +1052,7 @@ sja1105_static_config_check_valid(const struct sja1105_static_config *config,
 	(tables[blk_idx].entry_count == tables[blk_idx].ops->max_entry_count)
 
 	if (tables[BLK_IDX_SCHEDULE].entry_count) {
-		if (config->device_id != SJA1105T_DEVICE_ID &&
-		    config->device_id != SJA1105QS_DEVICE_ID)
+		if (!tables[BLK_IDX_SCHEDULE].ops->max_entry_count)
 			return SJA1105_TTETHERNET_NOT_SUPPORTED;
 
 		if (tables[BLK_IDX_SCHEDULE_ENTRY_POINTS].entry_count == 0)

From 61c77533b82ba810452e47cd9429aeb95effdd8c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 18 Jun 2021 16:48:12 +0300
Subject: [PATCH 2210/3292] net: dsa: sja1105: completely error out in
 sja1105_static_config_reload if something fails

If reloading the static config fails for whatever reason, for example if
sja1105_static_config_check_valid() fails, then we "goto out_unlock_ptp"
but we print anyway that "Reset switch and programmed static config.",
which is confusing because we didn't. We also do a bunch of other stuff
like reprogram the XPCS and reload the credit-based shapers, as if a
switch reset took place, which didn't.

So just unlock the PTP lock and goto out, skipping all of that.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 57ccd4548911..a9777eb564c6 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1886,17 +1886,23 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 	mutex_lock(&priv->ptp_data.lock);
 
 	rc = __sja1105_ptp_gettimex(ds, &now, &ptp_sts_before);
-	if (rc < 0)
-		goto out_unlock_ptp;
+	if (rc < 0) {
+		mutex_unlock(&priv->ptp_data.lock);
+		goto out;
+	}
 
 	/* Reset switch and send updated static configuration */
 	rc = sja1105_static_config_upload(priv);
-	if (rc < 0)
-		goto out_unlock_ptp;
+	if (rc < 0) {
+		mutex_unlock(&priv->ptp_data.lock);
+		goto out;
+	}
 
 	rc = __sja1105_ptp_settime(ds, 0, &ptp_sts_after);
-	if (rc < 0)
-		goto out_unlock_ptp;
+	if (rc < 0) {
+		mutex_unlock(&priv->ptp_data.lock);
+		goto out;
+	}
 
 	t1 = timespec64_to_ns(&ptp_sts_before.pre_ts);
 	t2 = timespec64_to_ns(&ptp_sts_before.post_ts);
@@ -1911,7 +1917,6 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 
 	__sja1105_ptp_adjtime(ds, now);
 
-out_unlock_ptp:
 	mutex_unlock(&priv->ptp_data.lock);
 
 	dev_info(priv->ds->dev,

From 7437a2230e3993bb374fe546e5137b94b3ec302b Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Fri, 18 Jun 2021 16:52:26 +0800
Subject: [PATCH 2211/3292] NFC: nxp-nci: remove unnecessary label

Remove unnecessary label chunk_exit and return directly.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/nxp-nci/firmware.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/nfc/nxp-nci/firmware.c b/drivers/nfc/nxp-nci/firmware.c
index dae0c8030e95..119bf305c642 100644
--- a/drivers/nfc/nxp-nci/firmware.c
+++ b/drivers/nfc/nxp-nci/firmware.c
@@ -95,10 +95,8 @@ static int nxp_nci_fw_send_chunk(struct nxp_nci_info *info)
 	int r;
 
 	skb = nci_skb_alloc(info->ndev, info->max_payload, GFP_KERNEL);
-	if (!skb) {
-		r = -ENOMEM;
-		goto chunk_exit;
-	}
+	if (!skb)
+		return -ENOMEM;
 
 	chunk_len = info->max_payload - NXP_NCI_FW_HDR_LEN - NXP_NCI_FW_CRC_LEN;
 	remaining_len = fw_info->frame_size - fw_info->written;
@@ -124,7 +122,6 @@ static int nxp_nci_fw_send_chunk(struct nxp_nci_info *info)
 
 	kfree_skb(skb);
 
-chunk_exit:
 	return r;
 }
 

From cc97141afd768d36eaef1b3e1afea2a74da7df27 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 18 Jun 2021 15:35:24 +0200
Subject: [PATCH 2212/3292] vsock: rename vsock_has_data()

vsock_has_data() is used only by STREAM and SEQPACKET sockets,
so let's rename it to vsock_connectible_has_data(), using the same
nomenclature (connectible) used in other functions after the
introduction of SEQPACKET.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 67954afef4e1..de8249483081 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -860,7 +860,7 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_stream_has_data);
 
-static s64 vsock_has_data(struct vsock_sock *vsk)
+static s64 vsock_connectible_has_data(struct vsock_sock *vsk)
 {
 	struct sock *sk = sk_vsock(vsk);
 
@@ -1880,7 +1880,7 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
 	err = 0;
 	transport = vsk->transport;
 
-	while ((data = vsock_has_data(vsk)) == 0) {
+	while ((data = vsock_connectible_has_data(vsk)) == 0) {
 		prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
 
 		if (sk->sk_err != 0 ||

From 0de5b2e67275695d6ad7369c594feb1578f891fd Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 18 Jun 2021 15:35:25 +0200
Subject: [PATCH 2213/3292] vsock: rename vsock_wait_data()

vsock_wait_data() is used only by STREAM and SEQPACKET sockets,
so let's rename it to vsock_connectible_wait_data(), using the same
nomenclature (connectible) used in other functions after the
introduction of SEQPACKET.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index de8249483081..21ccf450e249 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1866,10 +1866,11 @@ out:
 	return err;
 }
 
-static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
-			   long timeout,
-			   struct vsock_transport_recv_notify_data *recv_data,
-			   size_t target)
+static int vsock_connectible_wait_data(struct sock *sk,
+				       struct wait_queue_entry *wait,
+				       long timeout,
+				       struct vsock_transport_recv_notify_data *recv_data,
+				       size_t target)
 {
 	const struct vsock_transport *transport;
 	struct vsock_sock *vsk;
@@ -1967,7 +1968,8 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
 	while (1) {
 		ssize_t read;
 
-		err = vsock_wait_data(sk, &wait, timeout, &recv_data, target);
+		err = vsock_connectible_wait_data(sk, &wait, timeout,
+						  &recv_data, target);
 		if (err <= 0)
 			break;
 
@@ -2022,7 +2024,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
 
 	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
-	err = vsock_wait_data(sk, &wait, timeout, NULL, 0);
+	err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0);
 	if (err <= 0)
 		goto out;
 

From 91aa49a8fa0ffa66966be275b2575009cc12fd3b Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 18 Jun 2021 15:35:26 +0200
Subject: [PATCH 2214/3292] vsock/virtio: remove redundant `copy_failed`
 variable

When memcpy_to_msg() fails in virtio_transport_seqpacket_do_dequeue(),
we already set `dequeued_len` with the negative error value returned
by memcpy_to_msg().

So we can directly check `dequeued_len` value instead of using a
dedicated flag variable to skip the copy path for the rest of
fragments.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 23704a6bc437..f014ccfdd9c2 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -413,7 +413,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 	struct virtio_vsock_pkt *pkt;
 	int dequeued_len = 0;
 	size_t user_buf_len = msg_data_left(msg);
-	bool copy_failed = false;
 	bool msg_ready = false;
 
 	spin_lock_bh(&vvs->rx_lock);
@@ -426,7 +425,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 	while (!msg_ready) {
 		pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
 
-		if (!copy_failed) {
+		if (dequeued_len >= 0) {
 			size_t pkt_len;
 			size_t bytes_to_copy;
 
@@ -443,11 +442,9 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 
 				err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
 				if (err) {
-					/* Copy of message failed, set flag to skip
-					 * copy path for rest of fragments. Rest of
+					/* Copy of message failed. Rest of
 					 * fragments will be freed without copy.
 					 */
-					copy_failed = true;
 					dequeued_len = err;
 				} else {
 					user_buf_len -= bytes_to_copy;

From 1f3c98eaddec857e16a7a1c6cd83317b3dc89438 Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Fri, 18 Jun 2021 22:32:47 +0800
Subject: [PATCH 2215/3292] net: add pf_family_names[] for protocol family

Modify the pr_info content from int to char *, this looks more readable.

Signed-off-by: Yejune Deng <yejune.deng@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net.h | 48 ++++++++++++++++++++++++++++++++++++++++
 net/socket.c             |  2 +-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h
index 4dabec6bd957..a28caaf620c7 100644
--- a/include/uapi/linux/net.h
+++ b/include/uapi/linux/net.h
@@ -55,4 +55,52 @@ typedef enum {
 
 #define __SO_ACCEPTCON	(1 << 16)	/* performed a listen		*/
 
+static const char * const pf_family_names[] = {
+	[PF_UNSPEC]	= "PF_UNSPEC",
+	[PF_UNIX]	= "PF_UNIX/PF_LOCAL",
+	[PF_INET]	= "PF_INET",
+	[PF_AX25]	= "PF_AX25",
+	[PF_IPX]	= "PF_IPX",
+	[PF_APPLETALK]	= "PF_APPLETALK",
+	[PF_NETROM]	= "PF_NETROM",
+	[PF_BRIDGE]	= "PF_BRIDGE",
+	[PF_ATMPVC]	= "PF_ATMPVC",
+	[PF_X25]	= "PF_X25",
+	[PF_INET6]	= "PF_INET6",
+	[PF_ROSE]	= "PF_ROSE",
+	[PF_DECnet]	= "PF_DECnet",
+	[PF_NETBEUI]	= "PF_NETBEUI",
+	[PF_SECURITY]	= "PF_SECURITY",
+	[PF_KEY]	= "PF_KEY",
+	[PF_NETLINK]	= "PF_NETLINK/PF_ROUTE",
+	[PF_PACKET]	= "PF_PACKET",
+	[PF_ASH]	= "PF_ASH",
+	[PF_ECONET]	= "PF_ECONET",
+	[PF_ATMSVC]	= "PF_ATMSVC",
+	[PF_RDS]	= "PF_RDS",
+	[PF_SNA]	= "PF_SNA",
+	[PF_IRDA]	= "PF_IRDA",
+	[PF_PPPOX]	= "PF_PPPOX",
+	[PF_WANPIPE]	= "PF_WANPIPE",
+	[PF_LLC]	= "PF_LLC",
+	[PF_IB]		= "PF_IB",
+	[PF_MPLS]	= "PF_MPLS",
+	[PF_CAN]	= "PF_CAN",
+	[PF_TIPC]	= "PF_TIPC",
+	[PF_BLUETOOTH]	= "PF_BLUETOOTH",
+	[PF_IUCV]	= "PF_IUCV",
+	[PF_RXRPC]	= "PF_RXRPC",
+	[PF_ISDN]	= "PF_ISDN",
+	[PF_PHONET]	= "PF_PHONET",
+	[PF_IEEE802154]	= "PF_IEEE802154",
+	[PF_CAIF]	= "PF_CAIF",
+	[PF_ALG]	= "PF_ALG",
+	[PF_NFC]	= "PF_NFC",
+	[PF_VSOCK]	= "PF_VSOCK",
+	[PF_KCM]	= "PF_KCM",
+	[PF_QIPCRTR]	= "PF_QIPCRTR",
+	[PF_SMC]	= "PF_SMC",
+	[PF_XDP]	= "PF_XDP",
+};
+
 #endif /* _UAPI_LINUX_NET_H */
diff --git a/net/socket.c b/net/socket.c
index 27e3e7d53f8e..ff544cf50321 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2988,7 +2988,7 @@ int sock_register(const struct net_proto_family *ops)
 	}
 	spin_unlock(&net_family_lock);
 
-	pr_info("NET: Registered protocol family %d\n", ops->family);
+	pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
 	return err;
 }
 EXPORT_SYMBOL(sock_register);

From 103ebe658a262ef5b5db7f01d83857cf82a087d0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 18 Jun 2021 13:02:45 -0700
Subject: [PATCH 2216/3292] Revert "net: add pf_family_names[] for protocol
 family"

This reverts commit 1f3c98eaddec857e16a7a1c6cd83317b3dc89438.

Does not build...

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net.h | 48 ----------------------------------------
 net/socket.c             |  2 +-
 2 files changed, 1 insertion(+), 49 deletions(-)

diff --git a/include/uapi/linux/net.h b/include/uapi/linux/net.h
index a28caaf620c7..4dabec6bd957 100644
--- a/include/uapi/linux/net.h
+++ b/include/uapi/linux/net.h
@@ -55,52 +55,4 @@ typedef enum {
 
 #define __SO_ACCEPTCON	(1 << 16)	/* performed a listen		*/
 
-static const char * const pf_family_names[] = {
-	[PF_UNSPEC]	= "PF_UNSPEC",
-	[PF_UNIX]	= "PF_UNIX/PF_LOCAL",
-	[PF_INET]	= "PF_INET",
-	[PF_AX25]	= "PF_AX25",
-	[PF_IPX]	= "PF_IPX",
-	[PF_APPLETALK]	= "PF_APPLETALK",
-	[PF_NETROM]	= "PF_NETROM",
-	[PF_BRIDGE]	= "PF_BRIDGE",
-	[PF_ATMPVC]	= "PF_ATMPVC",
-	[PF_X25]	= "PF_X25",
-	[PF_INET6]	= "PF_INET6",
-	[PF_ROSE]	= "PF_ROSE",
-	[PF_DECnet]	= "PF_DECnet",
-	[PF_NETBEUI]	= "PF_NETBEUI",
-	[PF_SECURITY]	= "PF_SECURITY",
-	[PF_KEY]	= "PF_KEY",
-	[PF_NETLINK]	= "PF_NETLINK/PF_ROUTE",
-	[PF_PACKET]	= "PF_PACKET",
-	[PF_ASH]	= "PF_ASH",
-	[PF_ECONET]	= "PF_ECONET",
-	[PF_ATMSVC]	= "PF_ATMSVC",
-	[PF_RDS]	= "PF_RDS",
-	[PF_SNA]	= "PF_SNA",
-	[PF_IRDA]	= "PF_IRDA",
-	[PF_PPPOX]	= "PF_PPPOX",
-	[PF_WANPIPE]	= "PF_WANPIPE",
-	[PF_LLC]	= "PF_LLC",
-	[PF_IB]		= "PF_IB",
-	[PF_MPLS]	= "PF_MPLS",
-	[PF_CAN]	= "PF_CAN",
-	[PF_TIPC]	= "PF_TIPC",
-	[PF_BLUETOOTH]	= "PF_BLUETOOTH",
-	[PF_IUCV]	= "PF_IUCV",
-	[PF_RXRPC]	= "PF_RXRPC",
-	[PF_ISDN]	= "PF_ISDN",
-	[PF_PHONET]	= "PF_PHONET",
-	[PF_IEEE802154]	= "PF_IEEE802154",
-	[PF_CAIF]	= "PF_CAIF",
-	[PF_ALG]	= "PF_ALG",
-	[PF_NFC]	= "PF_NFC",
-	[PF_VSOCK]	= "PF_VSOCK",
-	[PF_KCM]	= "PF_KCM",
-	[PF_QIPCRTR]	= "PF_QIPCRTR",
-	[PF_SMC]	= "PF_SMC",
-	[PF_XDP]	= "PF_XDP",
-};
-
 #endif /* _UAPI_LINUX_NET_H */
diff --git a/net/socket.c b/net/socket.c
index ff544cf50321..27e3e7d53f8e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2988,7 +2988,7 @@ int sock_register(const struct net_proto_family *ops)
 	}
 	spin_unlock(&net_family_lock);
 
-	pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
+	pr_info("NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
 EXPORT_SYMBOL(sock_register);

From 60302ce4ea075369641426ef407c110e36ea8ba1 Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan@gerhold.net>
Date: Fri, 18 Jun 2021 19:36:09 +0200
Subject: [PATCH 2217/3292] rpmsg: core: Add driver_data for rpmsg_device_id

Most device_id structs provide a driver_data field that can be used
by drivers to associate data more easily for a particular device ID.
Add the same for the rpmsg_device_id.

Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/rpmsg/rpmsg_core.c      | 4 +++-
 include/linux/mod_devicetable.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index e5daee4f9373..c1404d3dae2c 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -459,8 +459,10 @@ static int rpmsg_dev_match(struct device *dev, struct device_driver *drv)
 
 	if (ids)
 		for (i = 0; ids[i].name[0]; i++)
-			if (rpmsg_id_match(rpdev, &ids[i]))
+			if (rpmsg_id_match(rpdev, &ids[i])) {
+				rpdev->id.driver_data = ids[i].driver_data;
 				return 1;
+			}
 
 	return of_driver_match_device(dev, drv);
 }
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 7d45b5f989b0..8e291cfdaf06 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -447,6 +447,7 @@ struct hv_vmbus_device_id {
 
 struct rpmsg_device_id {
 	char name[RPMSG_NAME_SIZE];
+	kernel_ulong_t driver_data;
 };
 
 /* i2c */

From 5e90abf49c2adfbd6954429c2a1aafdfe9fcab92 Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan@gerhold.net>
Date: Fri, 18 Jun 2021 19:36:10 +0200
Subject: [PATCH 2218/3292] net: wwan: Add RPMSG WWAN CTRL driver

The remote processor messaging (rpmsg) subsystem provides an interface
to communicate with other remote processors. On many Qualcomm SoCs this
is used to communicate with an integrated modem DSP that implements most
of the modem functionality and provides high-level protocols like
QMI or AT to allow controlling the modem.

For QMI, most older Qualcomm SoCs (e.g. MSM8916/MSM8974) have
a standalone "DATA5_CNTL" channel that allows exchanging QMI messages.
Note that newer SoCs (e.g. SDM845) only allow exchanging QMI messages
via a shared QRTR channel that is available via a socket API on Linux.

For AT, the "DATA4" channel accepts at least a limited set of AT
commands, on many older and newer Qualcomm SoCs, although QMI is
typically the preferred control protocol.

Often there are additional QMI/AT channels (usually named DATA*_CNTL
for QMI and DATA* for AT), but it is not clear if those are really
functional on all devices. Also, at the moment there is no use case
for having multiple QMI/AT ports. If needed more channels could be
added later after more testing.

Note that the data path (network interface) is entirely separate
from the control path and varies between Qualcomm SoCs, e.g. "IPA"
on newer Qualcomm SoCs or "BAM-DMUX" on some older ones.

The RPMSG WWAN CTRL driver exposes the QMI/AT control ports via the
WWAN subsystem, and therefore allows userspace like ModemManager to
set up the modem. Until now, ModemManager had to use the RPMSG-specific
rpmsg-char where the channels must be explicitly exposed as a char
device first and don't show up directly in sysfs.

The driver is a fairly simple glue layer between WWAN and RPMSG
and is mostly based on the existing mhi_wwan_ctrl.c and rpmsg_char.c.

Cc: Loic Poulain <loic.poulain@linaro.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                        |   7 ++
 drivers/net/wwan/Kconfig           |  18 ++++
 drivers/net/wwan/Makefile          |   1 +
 drivers/net/wwan/rpmsg_wwan_ctrl.c | 143 +++++++++++++++++++++++++++++
 4 files changed, 169 insertions(+)
 create mode 100644 drivers/net/wwan/rpmsg_wwan_ctrl.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 183cc61e2dc0..fbf792962d7b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15587,6 +15587,13 @@ F:	include/linux/rpmsg/
 F:	include/uapi/linux/rpmsg.h
 F:	samples/rpmsg/
 
+REMOTE PROCESSOR MESSAGING (RPMSG) WWAN CONTROL DRIVER
+M:	Stephan Gerhold <stephan@gerhold.net>
+L:	netdev@vger.kernel.org
+L:	linux-remoteproc@vger.kernel.org
+S:	Maintained
+F:	drivers/net/wwan/rpmsg_wwan_ctrl.c
+
 RENESAS CLOCK DRIVERS
 M:	Geert Uytterhoeven <geert+renesas@glider.be>
 L:	linux-renesas-soc@vger.kernel.org
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index 249b3f1ed62b..de9384326bc8 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -38,6 +38,24 @@ config MHI_WWAN_CTRL
 	  To compile this driver as a module, choose M here: the module will be
 	  called mhi_wwan_ctrl.
 
+config RPMSG_WWAN_CTRL
+	tristate "RPMSG WWAN control driver"
+	depends on RPMSG
+	help
+	  RPMSG WWAN CTRL allows modems available via RPMSG channels to expose
+	  different modem protocols/ports to userspace, including AT and QMI.
+	  These protocols can be accessed directly from userspace
+	  (e.g. AT commands) or via libraries/tools (e.g. libqmi, libqcdm...).
+
+	  This is mainly used for modems integrated into many Qualcomm SoCs,
+	  e.g. for AT and QMI on Qualcomm MSM8916 or MSM8974. Note that many
+	  newer Qualcomm SoCs (e.g. SDM845) still provide an AT port through
+	  this driver but the QMI messages can only be sent through
+	  QRTR network sockets (CONFIG_QRTR).
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called rpmsg_wwan_ctrl.
+
 config IOSM
 	tristate "IOSM Driver for Intel M.2 WWAN Device"
 	depends on INTEL_IOMMU
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
index 83dd3482ffc3..d90ac33abaef 100644
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -9,4 +9,5 @@ wwan-objs += wwan_core.o
 obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
 
 obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
+obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
 obj-$(CONFIG_IOSM) += iosm/
diff --git a/drivers/net/wwan/rpmsg_wwan_ctrl.c b/drivers/net/wwan/rpmsg_wwan_ctrl.c
new file mode 100644
index 000000000000..de226cdb69fd
--- /dev/null
+++ b/drivers/net/wwan/rpmsg_wwan_ctrl.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021, Stephan Gerhold <stephan@gerhold.net> */
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rpmsg.h>
+#include <linux/wwan.h>
+
+struct rpmsg_wwan_dev {
+	/* Lower level is a rpmsg dev, upper level is a wwan port */
+	struct rpmsg_device *rpdev;
+	struct wwan_port *wwan_port;
+	struct rpmsg_endpoint *ept;
+};
+
+static int rpmsg_wwan_ctrl_callback(struct rpmsg_device *rpdev,
+				    void *buf, int len, void *priv, u32 src)
+{
+	struct rpmsg_wwan_dev *rpwwan = priv;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_put_data(skb, buf, len);
+	wwan_port_rx(rpwwan->wwan_port, skb);
+	return 0;
+}
+
+static int rpmsg_wwan_ctrl_start(struct wwan_port *port)
+{
+	struct rpmsg_wwan_dev *rpwwan = wwan_port_get_drvdata(port);
+	struct rpmsg_channel_info chinfo = {
+		.src = rpwwan->rpdev->src,
+		.dst = RPMSG_ADDR_ANY,
+	};
+
+	strncpy(chinfo.name, rpwwan->rpdev->id.name, RPMSG_NAME_SIZE);
+	rpwwan->ept = rpmsg_create_ept(rpwwan->rpdev, rpmsg_wwan_ctrl_callback,
+				       rpwwan, chinfo);
+	if (!rpwwan->ept)
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static void rpmsg_wwan_ctrl_stop(struct wwan_port *port)
+{
+	struct rpmsg_wwan_dev *rpwwan = wwan_port_get_drvdata(port);
+
+	rpmsg_destroy_ept(rpwwan->ept);
+	rpwwan->ept = NULL;
+}
+
+static int rpmsg_wwan_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
+{
+	struct rpmsg_wwan_dev *rpwwan = wwan_port_get_drvdata(port);
+	int ret;
+
+	ret = rpmsg_trysend(rpwwan->ept, skb->data, skb->len);
+	if (ret)
+		return ret;
+
+	consume_skb(skb);
+	return 0;
+}
+
+static const struct wwan_port_ops rpmsg_wwan_pops = {
+	.start = rpmsg_wwan_ctrl_start,
+	.stop = rpmsg_wwan_ctrl_stop,
+	.tx = rpmsg_wwan_ctrl_tx,
+};
+
+static struct device *rpmsg_wwan_find_parent(struct device *dev)
+{
+	/* Select first platform device as parent for the WWAN ports.
+	 * On Qualcomm platforms this is usually the platform device that
+	 * represents the modem remote processor. This might need to be
+	 * adjusted when adding device IDs for other platforms.
+	 */
+	for (dev = dev->parent; dev; dev = dev->parent) {
+		if (dev_is_platform(dev))
+			return dev;
+	}
+	return NULL;
+}
+
+static int rpmsg_wwan_ctrl_probe(struct rpmsg_device *rpdev)
+{
+	struct rpmsg_wwan_dev *rpwwan;
+	struct wwan_port *port;
+	struct device *parent;
+
+	parent = rpmsg_wwan_find_parent(&rpdev->dev);
+	if (!parent)
+		return -ENODEV;
+
+	rpwwan = devm_kzalloc(&rpdev->dev, sizeof(*rpwwan), GFP_KERNEL);
+	if (!rpwwan)
+		return -ENOMEM;
+
+	rpwwan->rpdev = rpdev;
+	dev_set_drvdata(&rpdev->dev, rpwwan);
+
+	/* Register as a wwan port, id.driver_data contains wwan port type */
+	port = wwan_create_port(parent, rpdev->id.driver_data,
+				&rpmsg_wwan_pops, rpwwan);
+	if (IS_ERR(port))
+		return PTR_ERR(port);
+
+	rpwwan->wwan_port = port;
+
+	return 0;
+};
+
+static void rpmsg_wwan_ctrl_remove(struct rpmsg_device *rpdev)
+{
+	struct rpmsg_wwan_dev *rpwwan = dev_get_drvdata(&rpdev->dev);
+
+	wwan_remove_port(rpwwan->wwan_port);
+}
+
+static const struct rpmsg_device_id rpmsg_wwan_ctrl_id_table[] = {
+	/* RPMSG channels for Qualcomm SoCs with integrated modem */
+	{ .name = "DATA5_CNTL", .driver_data = WWAN_PORT_QMI },
+	{ .name = "DATA4", .driver_data = WWAN_PORT_AT },
+	{},
+};
+MODULE_DEVICE_TABLE(rpmsg, rpmsg_wwan_ctrl_id_table);
+
+static struct rpmsg_driver rpmsg_wwan_ctrl_driver = {
+	.drv.name = "rpmsg_wwan_ctrl",
+	.id_table = rpmsg_wwan_ctrl_id_table,
+	.probe = rpmsg_wwan_ctrl_probe,
+	.remove = rpmsg_wwan_ctrl_remove,
+};
+module_rpmsg_driver(rpmsg_wwan_ctrl_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("RPMSG WWAN CTRL Driver");
+MODULE_AUTHOR("Stephan Gerhold <stephan@gerhold.net>");

From 31c143f712750143abaca396236bbe8707700111 Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan@gerhold.net>
Date: Fri, 18 Jun 2021 19:36:11 +0200
Subject: [PATCH 2219/3292] net: wwan: Allow WWAN drivers to provide blocking
 tx and poll function

At the moment, the WWAN core provides wwan_port_txon/off() to implement
blocking writes. The tx() port operation should not block, instead
wwan_port_txon/off() should be called when the TX queue is full or has
free space again.

However, in some cases it is not straightforward to make use of that
functionality. For example, the RPMSG API used by rpmsg_wwan_ctrl.c
does not provide any way to be notified when the TX queue has space
again. Instead, it only provides the following operations:

  - rpmsg_send(): blocking write (wait until there is space)
  - rpmsg_trysend(): non-blocking write (return error if no space)
  - rpmsg_poll(): set poll flags depending on TX queue state

Generally that's totally sufficient for implementing a char device,
but it does not fit well to the currently provided WWAN port ops.

Most of the time, using the non-blocking rpmsg_trysend() in the
WWAN tx() port operation works just fine. However, with high-frequent
writes to the char device it is possible to trigger a situation
where this causes issues. For example, consider the following
(somewhat unrealistic) example:

 # dd if=/dev/zero bs=1000 of=/dev/wwan0qmi0
 dd: error writing '/dev/wwan0qmi0': Resource temporarily unavailable
 1+0 records out

This fails immediately after writing the first record. It's likely
only a matter of time until this triggers issues for some real application
(e.g. ModemManager sending a lot of large QMI packets).

The rpmsg_char device does not have this problem, because it uses
rpmsg_trysend() and rpmsg_poll() to support non-blocking operations.
Make it possible to use the same in the RPMSG WWAN driver by adding
two new optional wwan_port_ops:

  - tx_blocking(): send data blocking if allowed
  - tx_poll(): set additional TX poll flags

This integrates nicely with the RPMSG API and does not require
any change in existing WWAN drivers.

With these changes, the dd example above blocks instead of exiting
with an error.

Cc: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/rpmsg_wwan_ctrl.c | 23 +++++++++++++++++++++++
 drivers/net/wwan/wwan_core.c       | 16 ++++++++++++----
 include/linux/wwan.h               | 13 +++++++++++--
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wwan/rpmsg_wwan_ctrl.c b/drivers/net/wwan/rpmsg_wwan_ctrl.c
index de226cdb69fd..31c24420ab2e 100644
--- a/drivers/net/wwan/rpmsg_wwan_ctrl.c
+++ b/drivers/net/wwan/rpmsg_wwan_ctrl.c
@@ -67,10 +67,33 @@ static int rpmsg_wwan_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
 	return 0;
 }
 
+static int rpmsg_wwan_ctrl_tx_blocking(struct wwan_port *port, struct sk_buff *skb)
+{
+	struct rpmsg_wwan_dev *rpwwan = wwan_port_get_drvdata(port);
+	int ret;
+
+	ret = rpmsg_send(rpwwan->ept, skb->data, skb->len);
+	if (ret)
+		return ret;
+
+	consume_skb(skb);
+	return 0;
+}
+
+static __poll_t rpmsg_wwan_ctrl_tx_poll(struct wwan_port *port,
+					struct file *filp, poll_table *wait)
+{
+	struct rpmsg_wwan_dev *rpwwan = wwan_port_get_drvdata(port);
+
+	return rpmsg_poll(rpwwan->ept, filp, wait);
+}
+
 static const struct wwan_port_ops rpmsg_wwan_pops = {
 	.start = rpmsg_wwan_ctrl_start,
 	.stop = rpmsg_wwan_ctrl_stop,
 	.tx = rpmsg_wwan_ctrl_tx,
+	.tx_blocking = rpmsg_wwan_ctrl_tx_blocking,
+	.tx_poll = rpmsg_wwan_ctrl_tx_poll,
 };
 
 static struct device *rpmsg_wwan_find_parent(struct device *dev)
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 7e728042fc41..165afec1dbd1 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -500,7 +500,8 @@ static void wwan_port_op_stop(struct wwan_port *port)
 	mutex_unlock(&port->ops_lock);
 }
 
-static int wwan_port_op_tx(struct wwan_port *port, struct sk_buff *skb)
+static int wwan_port_op_tx(struct wwan_port *port, struct sk_buff *skb,
+			   bool nonblock)
 {
 	int ret;
 
@@ -510,7 +511,10 @@ static int wwan_port_op_tx(struct wwan_port *port, struct sk_buff *skb)
 		goto out_unlock;
 	}
 
-	ret = port->ops->tx(port, skb);
+	if (nonblock || !port->ops->tx_blocking)
+		ret = port->ops->tx(port, skb);
+	else
+		ret = port->ops->tx_blocking(port, skb);
 
 out_unlock:
 	mutex_unlock(&port->ops_lock);
@@ -637,7 +641,7 @@ static ssize_t wwan_port_fops_write(struct file *filp, const char __user *buf,
 		return -EFAULT;
 	}
 
-	ret = wwan_port_op_tx(port, skb);
+	ret = wwan_port_op_tx(port, skb, !!(filp->f_flags & O_NONBLOCK));
 	if (ret) {
 		kfree_skb(skb);
 		return ret;
@@ -653,12 +657,16 @@ static __poll_t wwan_port_fops_poll(struct file *filp, poll_table *wait)
 
 	poll_wait(filp, &port->waitqueue, wait);
 
-	if (!is_write_blocked(port))
+	mutex_lock(&port->ops_lock);
+	if (port->ops && port->ops->tx_poll)
+		mask |= port->ops->tx_poll(port, filp, wait);
+	else if (!is_write_blocked(port))
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	if (!is_read_blocked(port))
 		mask |= EPOLLIN | EPOLLRDNORM;
 	if (!port->ops)
 		mask |= EPOLLHUP | EPOLLERR;
+	mutex_unlock(&port->ops_lock);
 
 	return mask;
 }
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 430a3a0817de..34222230360c 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -6,6 +6,7 @@
 
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/poll.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 
@@ -40,15 +41,23 @@ struct wwan_port;
 /** struct wwan_port_ops - The WWAN port operations
  * @start: The routine for starting the WWAN port device.
  * @stop: The routine for stopping the WWAN port device.
- * @tx: The routine that sends WWAN port protocol data to the device.
+ * @tx: Non-blocking routine that sends WWAN port protocol data to the device.
+ * @tx_blocking: Optional blocking routine that sends WWAN port protocol data
+ *               to the device.
+ * @tx_poll: Optional routine that sets additional TX poll flags.
  *
  * The wwan_port_ops structure contains a list of low-level operations
- * that control a WWAN port device. All functions are mandatory.
+ * that control a WWAN port device. All functions are mandatory unless specified.
  */
 struct wwan_port_ops {
 	int (*start)(struct wwan_port *port);
 	void (*stop)(struct wwan_port *port);
 	int (*tx)(struct wwan_port *port, struct sk_buff *skb);
+
+	/* Optional operations */
+	int (*tx_blocking)(struct wwan_port *port, struct sk_buff *skb);
+	__poll_t (*tx_poll)(struct wwan_port *port, struct file *filp,
+			    poll_table *wait);
 };
 
 /**

From f42cfb469f9b4a1c002a03cce3d9329376800a6f Mon Sep 17 00:00:00 2001
From: Grant Seltzer <grantseltzer@gmail.com>
Date: Fri, 18 Jun 2021 14:04:59 +0000
Subject: [PATCH 2220/3292] bpf: Add documentation for libbpf including API
 autogen

This patch is meant to start the initiative to document libbpf.
It includes .rst files which are text documentation describing building,
API naming convention, as well as an index to generated API documentation.

In this approach the generated API documentation is enabled by the kernels
existing kernel documentation system which uses sphinx. The resulting docs
would then be synced to kernel.org/doc

You can test this by running `make htmldocs` and serving the html in
Documentation/output. Since libbpf does not yet have comments in kernel
doc format, see kernel.org/doc/html/latest/doc-guide/kernel-doc.html for
an example so you can test this.

The advantage of this approach is to use the existing sphinx
infrastructure that the kernel has, and have libbpf docs in
the same place as everything else.

The current plan is to have the libbpf mirror sync the generated docs
and version them based on the libbpf releases which are cut on github.

This patch includes the addition of libbpf_api.rst which pulls comment
documentation from header files in libbpf under tools/lib/bpf/. The comment
docs would be of the standard kernel doc format.

Signed-off-by: Grant Seltzer <grantseltzer@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210618140459.9887-2-grantseltzer@gmail.com
---
 Documentation/bpf/index.rst                   | 13 +++++++
 Documentation/bpf/libbpf/libbpf.rst           | 14 +++++++
 Documentation/bpf/libbpf/libbpf_api.rst       | 27 ++++++++++++++
 Documentation/bpf/libbpf/libbpf_build.rst     | 37 +++++++++++++++++++
 .../bpf/libbpf/libbpf_naming_convention.rst   | 30 ++++++---------
 5 files changed, 103 insertions(+), 18 deletions(-)
 create mode 100644 Documentation/bpf/libbpf/libbpf.rst
 create mode 100644 Documentation/bpf/libbpf/libbpf_api.rst
 create mode 100644 Documentation/bpf/libbpf/libbpf_build.rst
 rename tools/lib/bpf/README.rst => Documentation/bpf/libbpf/libbpf_naming_convention.rst (90%)

diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 93e8cf12a6d4..baea6c2abba5 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -12,6 +12,19 @@ BPF instruction-set.
 The Cilium project also maintains a `BPF and XDP Reference Guide`_
 that goes into great technical depth about the BPF Architecture.
 
+libbpf
+======
+
+Libbpf is a userspace library for loading and interacting with bpf programs.
+
+.. toctree::
+   :maxdepth: 1
+
+   libbpf/libbpf
+   libbpf/libbpf_api
+   libbpf/libbpf_build
+   libbpf/libbpf_naming_convention
+
 BPF Type Format (BTF)
 =====================
 
diff --git a/Documentation/bpf/libbpf/libbpf.rst b/Documentation/bpf/libbpf/libbpf.rst
new file mode 100644
index 000000000000..1b1e61d5ead1
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+libbpf
+======
+
+This is documentation for libbpf, a userspace library for loading and
+interacting with bpf programs.
+
+All general BPF questions, including kernel functionality, libbpf APIs and
+their application, should be sent to bpf@vger.kernel.org mailing list.
+You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
+mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
+Please search the archive before asking new questions. It very well might
+be that this was already addressed or answered before.
diff --git a/Documentation/bpf/libbpf/libbpf_api.rst b/Documentation/bpf/libbpf/libbpf_api.rst
new file mode 100644
index 000000000000..f07eecd054da
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_api.rst
@@ -0,0 +1,27 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+API
+===
+
+This documentation is autogenerated from header files in libbpf, tools/lib/bpf
+
+.. kernel-doc:: tools/lib/bpf/libbpf.h
+   :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf.h
+   :internal:
+
+.. kernel-doc:: tools/lib/bpf/btf.h
+   :internal:
+
+.. kernel-doc:: tools/lib/bpf/xsk.h
+   :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf_tracing.h
+   :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf_core_read.h
+   :internal:
+
+.. kernel-doc:: tools/lib/bpf/bpf_endian.h
+   :internal:
\ No newline at end of file
diff --git a/Documentation/bpf/libbpf/libbpf_build.rst b/Documentation/bpf/libbpf/libbpf_build.rst
new file mode 100644
index 000000000000..8e8c23e8093d
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_build.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+Building libbpf
+===============
+
+libelf and zlib are internal dependencies of libbpf and thus are required to link
+against and must be installed on the system for applications to work.
+pkg-config is used by default to find libelf, and the program called
+can be overridden with PKG_CONFIG.
+
+If using pkg-config at build time is not desired, it can be disabled by
+setting NO_PKG_CONFIG=1 when calling make.
+
+To build both static libbpf.a and shared libbpf.so:
+
+.. code-block:: bash
+
+    $ cd src
+    $ make
+
+To build only static libbpf.a library in directory build/ and install them
+together with libbpf headers in a staging directory root/:
+
+.. code-block:: bash
+
+    $ cd src
+    $ mkdir build root
+    $ BUILD_STATIC_ONLY=y OBJDIR=build DESTDIR=root make install
+
+To build both static libbpf.a and shared libbpf.so against a custom libelf
+dependency installed in /build/root/ and install them together with libbpf
+headers in a build directory /build/root/:
+
+.. code-block:: bash
+
+    $ cd src
+    $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make
\ No newline at end of file
diff --git a/tools/lib/bpf/README.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
similarity index 90%
rename from tools/lib/bpf/README.rst
rename to Documentation/bpf/libbpf/libbpf_naming_convention.rst
index 8928f7787f2d..3de1d51e41da 100644
--- a/tools/lib/bpf/README.rst
+++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
@@ -1,7 +1,7 @@
 .. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 
-libbpf API naming convention
-============================
+API naming convention
+=====================
 
 libbpf API provides access to a few logically separated groups of
 functions and types. Every group has its own naming convention
@@ -10,14 +10,14 @@ new function or type is added to keep libbpf API clean and consistent.
 
 All types and functions provided by libbpf API should have one of the
 following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``perf_buffer_``.
+``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``.
 
 System call wrappers
 --------------------
 
 System call wrappers are simple wrappers for commands supported by
 sys_bpf system call. These wrappers should go to ``bpf.h`` header file
-and map one-on-one to corresponding commands.
+and map one to one to corresponding commands.
 
 For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
 command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
@@ -49,10 +49,6 @@ object, ``bpf_object``, double underscore and ``open`` that defines the
 purpose of the function to open ELF file and create ``bpf_object`` from
 it.
 
-Another example: ``bpf_program__load`` is named for corresponding
-object, ``bpf_program``, that is separated from other part of the name
-by double underscore.
-
 All objects and corresponding functions other than BTF related should go
 to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
 
@@ -72,11 +68,7 @@ of both low-level ring access functions and high-level configuration
 functions. These can be mixed and matched. Note that these functions
 are not reentrant for performance reasons.
 
-Please take a look at Documentation/networking/af_xdp.rst in the Linux
-kernel source tree on how to use XDP sockets and for some common
-mistakes in case you do not get any traffic up to user space.
-
-libbpf ABI
+ABI
 ==========
 
 libbpf can be both linked statically or used as DSO. To avoid possible
@@ -116,7 +108,8 @@ This bump in ABI version is at most once per kernel development cycle.
 
 For example, if current state of ``libbpf.map`` is:
 
-.. code-block::
+.. code-block:: c
+
         LIBBPF_0.0.1 {
         	global:
                         bpf_func_a;
@@ -128,7 +121,8 @@ For example, if current state of ``libbpf.map`` is:
 , and a new symbol ``bpf_func_c`` is being introduced, then
 ``libbpf.map`` should be changed like this:
 
-.. code-block::
+.. code-block:: c
+
         LIBBPF_0.0.1 {
         	global:
                         bpf_func_a;
@@ -148,7 +142,7 @@ Format of version script and ways to handle ABI changes, including
 incompatible ones, described in details in [1].
 
 Stand-alone build
-=================
+-------------------
 
 Under https://github.com/libbpf/libbpf there is a (semi-)automated
 mirror of the mainline's version of libbpf for a stand-alone build.
@@ -157,12 +151,12 @@ However, all changes to libbpf's code base must be upstreamed through
 the mainline kernel tree.
 
 License
-=======
+-------------------
 
 libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
 
 Links
-=====
+-------------------
 
 [1] https://www.akkadia.org/drepper/dsohowto.pdf
     (Chapter 3. Maintaining APIs and ABIs).

From 3078d964c0fe6cf8eba197c862d1011cb7c0e7b4 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Thu, 17 Jun 2021 09:50:06 +0300
Subject: [PATCH 2221/3292] docs: networking: Update connection tracking
 offload sysctl parameters

Document the following connection offload configuration parameters:
- nf_flowtable_tcp_timeout
- nf_flowtable_tcp_pickup
- nf_flowtable_udp_timeout
- nf_flowtable_udp_pickup

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 .../networking/nf_conntrack-sysctl.rst        | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 11a9b76786cb..0467b30e4abe 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -177,3 +177,27 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
 
 	This extended timeout will be used in case there is an GRE stream
 	detected.
+
+nf_flowtable_tcp_timeout - INTEGER (seconds)
+        default 30
+
+        Control offload timeout for tcp connections.
+        TCP connections may be offloaded from nf conntrack to nf flow table.
+        Once aged, the connection is returned to nf conntrack with tcp pickup timeout.
+
+nf_flowtable_tcp_pickup - INTEGER (seconds)
+        default 120
+
+        TCP connection timeout after being aged from nf flow table offload.
+
+nf_flowtable_udp_timeout - INTEGER (seconds)
+        default 30
+
+        Control offload timeout for udp connections.
+        UDP connections may be offloaded from nf conntrack to nf flow table.
+        Once aged, the connection is returned to nf conntrack with udp pickup timeout.
+
+nf_flowtable_udp_pickup - INTEGER (seconds)
+        default 30
+
+        UDP connection timeout after being aged from nf flow table offload.

From 54b8fdebe4f45aafb61a0c73872a50d53791b091 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 29 Apr 2021 18:32:56 +0200
Subject: [PATCH 2222/3292] mt76: move mt76_rates in mt76 module

Move mt76_rates array in mt76 module and remove duplicated code since it
is shared by all drivers

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 16 +++++++++
 drivers/net/wireless/mediatek/mt76/mt76.h     | 15 ++++++++
 .../net/wireless/mediatek/mt76/mt7603/init.c  | 32 ++---------------
 .../net/wireless/mediatek/mt76/mt7615/init.c  | 33 ++---------------
 .../wireless/mediatek/mt76/mt7615/pci_init.c  |  4 +--
 .../wireless/mediatek/mt76/mt7615/usb_sdio.c  |  4 +--
 .../net/wireless/mediatek/mt76/mt76x02_util.c | 16 +++------
 .../net/wireless/mediatek/mt76/mt7915/init.c  | 36 +++----------------
 .../net/wireless/mediatek/mt76/mt7921/init.c  | 32 ++---------------
 9 files changed, 50 insertions(+), 138 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 03fe62837557..447bc9a3abb0 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -83,6 +83,22 @@ static const struct ieee80211_tpt_blink mt76_tpt_blink[] = {
 	{ .throughput = 300 * 1024, .blink_time =  50 },
 };
 
+struct ieee80211_rate mt76_rates[] = {
+	CCK_RATE(0, 10),
+	CCK_RATE(1, 20),
+	CCK_RATE(2, 55),
+	CCK_RATE(3, 110),
+	OFDM_RATE(11, 60),
+	OFDM_RATE(15, 90),
+	OFDM_RATE(10, 120),
+	OFDM_RATE(14, 180),
+	OFDM_RATE(9,  240),
+	OFDM_RATE(13, 360),
+	OFDM_RATE(8,  480),
+	OFDM_RATE(12, 540),
+};
+EXPORT_SYMBOL_GPL(mt76_rates);
+
 static int mt76_led_init(struct mt76_dev *dev)
 {
 	struct device_node *np = dev->dev->of_node;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index d3859eda2a4f..ebacd55cb0cd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -738,6 +738,21 @@ enum mt76_phy_type {
 	MT_PHY_TYPE_HE_MU,
 };
 
+#define CCK_RATE(_idx, _rate) {					\
+	.bitrate = _rate,					\
+	.flags = IEEE80211_RATE_SHORT_PREAMBLE,			\
+	.hw_value = (MT_PHY_TYPE_CCK << 8) | (_idx),		\
+	.hw_value_short = (MT_PHY_TYPE_CCK << 8) | (4 + _idx),	\
+}
+
+#define OFDM_RATE(_idx, _rate) {				\
+	.bitrate = _rate,					\
+	.hw_value = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
+	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),	\
+}
+
+extern struct ieee80211_rate mt76_rates[12];
+
 #define __mt76_rr(dev, ...)	(dev)->bus->rr((dev), __VA_ARGS__)
 #define __mt76_wr(dev, ...)	(dev)->bus->wr((dev), __VA_ARGS__)
 #define __mt76_rmw(dev, ...)	(dev)->bus->rmw((dev), __VA_ARGS__)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
index e1b2cfa56074..031d39a48a55 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
@@ -304,34 +304,6 @@ mt7603_init_hardware(struct mt7603_dev *dev)
 	return 0;
 }
 
-#define CCK_RATE(_idx, _rate) {					\
-	.bitrate = _rate,					\
-	.flags = IEEE80211_RATE_SHORT_PREAMBLE,			\
-	.hw_value = (MT_PHY_TYPE_CCK << 8) | (_idx),		\
-	.hw_value_short = (MT_PHY_TYPE_CCK << 8) | (4 + _idx),	\
-}
-
-#define OFDM_RATE(_idx, _rate) {				\
-	.bitrate = _rate,					\
-	.hw_value = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
-	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),	\
-}
-
-static struct ieee80211_rate mt7603_rates[] = {
-	CCK_RATE(0, 10),
-	CCK_RATE(1, 20),
-	CCK_RATE(2, 55),
-	CCK_RATE(3, 110),
-	OFDM_RATE(11, 60),
-	OFDM_RATE(15, 90),
-	OFDM_RATE(10, 120),
-	OFDM_RATE(14, 180),
-	OFDM_RATE(9,  240),
-	OFDM_RATE(13, 360),
-	OFDM_RATE(8,  480),
-	OFDM_RATE(12, 540),
-};
-
 static const struct ieee80211_iface_limit if_limits[] = {
 	{
 		.max = 1,
@@ -569,8 +541,8 @@ int mt7603_register_device(struct mt7603_dev *dev)
 
 	wiphy->reg_notifier = mt7603_regd_notifier;
 
-	ret = mt76_register_device(&dev->mt76, true, mt7603_rates,
-				   ARRAY_SIZE(mt7603_rates));
+	ret = mt76_register_device(&dev->mt76, true, mt76_rates,
+				   ARRAY_SIZE(mt76_rates));
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index d20f05a7717d..ecc3ca9eb658 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -174,35 +174,6 @@ bool mt7615_wait_for_mcu_init(struct mt7615_dev *dev)
 }
 EXPORT_SYMBOL_GPL(mt7615_wait_for_mcu_init);
 
-#define CCK_RATE(_idx, _rate) {						\
-	.bitrate = _rate,						\
-	.flags = IEEE80211_RATE_SHORT_PREAMBLE,				\
-	.hw_value = (MT_PHY_TYPE_CCK << 8) | (_idx),			\
-	.hw_value_short = (MT_PHY_TYPE_CCK << 8) | (4 + (_idx)),	\
-}
-
-#define OFDM_RATE(_idx, _rate) {					\
-	.bitrate = _rate,						\
-	.hw_value = (MT_PHY_TYPE_OFDM << 8) | (_idx),			\
-	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
-}
-
-struct ieee80211_rate mt7615_rates[] = {
-	CCK_RATE(0, 10),
-	CCK_RATE(1, 20),
-	CCK_RATE(2, 55),
-	CCK_RATE(3, 110),
-	OFDM_RATE(11, 60),
-	OFDM_RATE(15, 90),
-	OFDM_RATE(10, 120),
-	OFDM_RATE(14, 180),
-	OFDM_RATE(9,  240),
-	OFDM_RATE(13, 360),
-	OFDM_RATE(8,  480),
-	OFDM_RATE(12, 540),
-};
-EXPORT_SYMBOL_GPL(mt7615_rates);
-
 static const struct ieee80211_iface_limit if_limits[] = {
 	{
 		.max = 1,
@@ -472,8 +443,8 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	for (i = 0; i <= MT_TXQ_PSD ; i++)
 		mphy->q_tx[i] = dev->mphy.q_tx[i];
 
-	ret = mt76_register_phy(mphy, true, mt7615_rates,
-				ARRAY_SIZE(mt7615_rates));
+	ret = mt76_register_phy(mphy, true, mt76_rates,
+				ARRAY_SIZE(mt76_rates));
 	if (ret)
 		ieee80211_free_hw(mphy->hw);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index ec8ec1a2033f..273fda08bfa2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -147,8 +147,8 @@ int mt7615_register_device(struct mt7615_dev *dev)
 	if (ret)
 		return ret;
 
-	ret = mt76_register_device(&dev->mt76, true, mt7615_rates,
-				   ARRAY_SIZE(mt7615_rates));
+	ret = mt76_register_device(&dev->mt76, true, mt76_rates,
+				   ARRAY_SIZE(mt76_rates));
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
index f8d3673c2cae..b2b02614f8e9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
@@ -323,8 +323,8 @@ int mt7663_usb_sdio_register_device(struct mt7615_dev *dev)
 			hw->max_tx_fragments = 1;
 	}
 
-	err = mt76_register_device(&dev->mt76, true, mt7615_rates,
-				   ARRAY_SIZE(mt7615_rates));
+	err = mt76_register_device(&dev->mt76, true, mt76_rates,
+				   ARRAY_SIZE(mt76_rates));
 	if (err < 0)
 		return err;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index 02db5d66735d..ccdbab341271 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -7,24 +7,18 @@
 #include <linux/module.h>
 #include "mt76x02.h"
 
-#define CCK_RATE(_idx, _rate) {					\
+#define MT76x02_CCK_RATE(_idx, _rate) {					\
 	.bitrate = _rate,					\
 	.flags = IEEE80211_RATE_SHORT_PREAMBLE,			\
 	.hw_value = (MT_PHY_TYPE_CCK << 8) | (_idx),		\
 	.hw_value_short = (MT_PHY_TYPE_CCK << 8) | (8 + (_idx)),	\
 }
 
-#define OFDM_RATE(_idx, _rate) {				\
-	.bitrate = _rate,					\
-	.hw_value = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
-	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),	\
-}
-
 struct ieee80211_rate mt76x02_rates[] = {
-	CCK_RATE(0, 10),
-	CCK_RATE(1, 20),
-	CCK_RATE(2, 55),
-	CCK_RATE(3, 110),
+	MT76x02_CCK_RATE(0, 10),
+	MT76x02_CCK_RATE(1, 20),
+	MT76x02_CCK_RATE(2, 55),
+	MT76x02_CCK_RATE(3, 110),
 	OFDM_RATE(0, 60),
 	OFDM_RATE(1, 90),
 	OFDM_RATE(2, 120),
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index a8fd822cc46e..c103175d9954 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -7,34 +7,6 @@
 #include "mcu.h"
 #include "eeprom.h"
 
-#define CCK_RATE(_idx, _rate) {						\
-	.bitrate = _rate,						\
-	.flags = IEEE80211_RATE_SHORT_PREAMBLE,				\
-	.hw_value = (MT_PHY_TYPE_CCK << 8) | (_idx),			\
-	.hw_value_short = (MT_PHY_TYPE_CCK << 8) | (4 + (_idx)),	\
-}
-
-#define OFDM_RATE(_idx, _rate) {					\
-	.bitrate = _rate,						\
-	.hw_value = (MT_PHY_TYPE_OFDM << 8) | (_idx),			\
-	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
-}
-
-static struct ieee80211_rate mt7915_rates[] = {
-	CCK_RATE(0, 10),
-	CCK_RATE(1, 20),
-	CCK_RATE(2, 55),
-	CCK_RATE(3, 110),
-	OFDM_RATE(11, 60),
-	OFDM_RATE(15, 90),
-	OFDM_RATE(10, 120),
-	OFDM_RATE(14, 180),
-	OFDM_RATE(9,  240),
-	OFDM_RATE(13, 360),
-	OFDM_RATE(8,  480),
-	OFDM_RATE(12, 540),
-};
-
 static const struct ieee80211_iface_limit if_limits[] = {
 	{
 		.max = 1,
@@ -281,8 +253,8 @@ static int mt7915_register_ext_phy(struct mt7915_dev *dev)
 	if (ret)
 		goto error;
 
-	ret = mt76_register_phy(mphy, true, mt7915_rates,
-				ARRAY_SIZE(mt7915_rates));
+	ret = mt76_register_phy(mphy, true, mt76_rates,
+				ARRAY_SIZE(mt76_rates));
 	if (ret)
 		goto error;
 
@@ -731,8 +703,8 @@ int mt7915_register_device(struct mt7915_dev *dev)
 	dev->mt76.test_ops = &mt7915_testmode_ops;
 #endif
 
-	ret = mt76_register_device(&dev->mt76, true, mt7915_rates,
-				   ARRAY_SIZE(mt7915_rates));
+	ret = mt76_register_device(&dev->mt76, true, mt76_rates,
+				   ARRAY_SIZE(mt76_rates));
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 1763ea0614ce..703f36bedc17 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -7,34 +7,6 @@
 #include "mcu.h"
 #include "eeprom.h"
 
-#define CCK_RATE(_idx, _rate) {						\
-	.bitrate = _rate,						\
-	.flags = IEEE80211_RATE_SHORT_PREAMBLE,				\
-	.hw_value = (MT_PHY_TYPE_CCK << 8) | (_idx),			\
-	.hw_value_short = (MT_PHY_TYPE_CCK << 8) | (4 + (_idx)),	\
-}
-
-#define OFDM_RATE(_idx, _rate) {					\
-	.bitrate = _rate,						\
-	.hw_value = (MT_PHY_TYPE_OFDM << 8) | (_idx),			\
-	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
-}
-
-static struct ieee80211_rate mt7921_rates[] = {
-	CCK_RATE(0, 10),
-	CCK_RATE(1, 20),
-	CCK_RATE(2, 55),
-	CCK_RATE(3, 110),
-	OFDM_RATE(11, 60),
-	OFDM_RATE(15, 90),
-	OFDM_RATE(10, 120),
-	OFDM_RATE(14, 180),
-	OFDM_RATE(9,  240),
-	OFDM_RATE(13, 360),
-	OFDM_RATE(8,  480),
-	OFDM_RATE(12, 540),
-};
-
 static const struct ieee80211_iface_limit if_limits[] = {
 	{
 		.max = MT7921_MAX_INTERFACES,
@@ -260,8 +232,8 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	mt76_set_stream_caps(&dev->mphy, true);
 	mt7921_set_stream_he_caps(&dev->phy);
 
-	ret = mt76_register_device(&dev->mt76, true, mt7921_rates,
-				   ARRAY_SIZE(mt7921_rates));
+	ret = mt76_register_device(&dev->mt76, true, mt76_rates,
+				   ARRAY_SIZE(mt76_rates));
 	if (ret)
 		return ret;
 

From 64cf5ad3c2fa841e4b416343a7ea69c63d60fa4e Mon Sep 17 00:00:00 2001
From: Evelyn Tsai <evelyn.tsai@mediatek.com>
Date: Mon, 3 May 2021 16:04:37 +0800
Subject: [PATCH 2223/3292] mt76: mt7915: fix tssi indication field of DBDC
 NICs

Correct the bitfield which indicates TSSI on/off for MT7915D NIC.

Signed-off-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
index 033fb592bdf0..7896e983209a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
@@ -99,12 +99,15 @@ static inline bool
 mt7915_tssi_enabled(struct mt7915_dev *dev, enum nl80211_band band)
 {
 	u8 *eep = dev->mt76.eeprom.data;
+	u8 val = eep[MT_EE_WIFI_CONF + 7];
 
-	/* TODO: DBDC */
-	if (band == NL80211_BAND_5GHZ)
-		return eep[MT_EE_WIFI_CONF + 7] & MT_EE_WIFI_CONF7_TSSI0_5G;
+	if (band == NL80211_BAND_2GHZ)
+		return val & MT_EE_WIFI_CONF7_TSSI0_2G;
+
+	if (dev->dbdc_support)
+		return val & MT_EE_WIFI_CONF7_TSSI1_5G;
 	else
-		return eep[MT_EE_WIFI_CONF + 7] & MT_EE_WIFI_CONF7_TSSI0_2G;
+		return val & MT_EE_WIFI_CONF7_TSSI0_5G;
 }
 
 extern const u8 mt7915_sku_group_len[MAX_SKU_RATE_GROUP_NUM];

From 861fad474ec7638aeca46a508da4ea81612374b9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 3 May 2021 17:53:59 +0300
Subject: [PATCH 2224/3292] mt76: mt7915: fix a signedness bug in
 mt7915_mcu_apply_tx_dpd()

"idx" needs to be signed for the error handling to work.

Fixes: 495184ac91bb ("mt76: mt7915: add support for applying pre-calibration data")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 76e8aa604e8b..ce7c55fda6cf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3448,8 +3448,9 @@ int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy)
 {
 	struct mt7915_dev *dev = phy->dev;
 	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
-	u16 total = 2, idx, center_freq = chandef->center_freq1;
+	u16 total = 2, center_freq = chandef->center_freq1;
 	u8 *cal = dev->cal, *eep = dev->mt76.eeprom.data;
+	int idx;
 
 	if (!(eep[MT_EE_DO_PRE_CAL] & MT_EE_WIFI_CAL_DPD))
 		return 0;

From 5b0b5c6a1c2195942ac48ec8bbf567789f903353 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 6 May 2021 20:13:32 +0200
Subject: [PATCH 2225/3292] mt76: mt7921: enable rx hw de-amsdu

Enable hw rx-amsdu de-aggregation support available in 7921 devices.
This is a preliminary patch to enable rx checksum offload.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c |  4 ++--
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c  | 16 +++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.h  |  3 +++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 703f36bedc17..73741e148240 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -125,8 +125,8 @@ void mt7921_mac_init(struct mt7921_dev *dev)
 	int i;
 
 	mt76_rmw_field(dev, MT_MDP_DCR1, MT_MDP_DCR1_MAX_RX_LEN, 1536);
-	/* disable hardware de-agg */
-	mt76_clear(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);
+	/* enable hardware de-agg */
+	mt76_set(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);
 	mt76_clear(dev, MT_MDP_DCR0, MT_MDP_DCR0_RX_HDR_TRANS_EN);
 
 	for (i = 0; i < MT7921_WTBL_SIZE; i++)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index decf2d5f0ce3..7e57d230e63a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -319,8 +319,9 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	u32 rxd1 = le32_to_cpu(rxd[1]);
 	u32 rxd2 = le32_to_cpu(rxd[2]);
 	u32 rxd3 = le32_to_cpu(rxd[3]);
+	u32 rxd4 = le32_to_cpu(rxd[4]);
 	bool unicast, insert_ccmp_hdr = false;
-	u8 remove_pad;
+	u8 remove_pad, amsdu_info;
 	int i, idx;
 	u8 chfreq;
 
@@ -332,6 +333,9 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	if (!test_bit(MT76_STATE_RUNNING, &mphy->state))
 		return -EINVAL;
 
+	if (rxd2 & MT_RXD2_NORMAL_AMSDU_ERR)
+		return -EINVAL;
+
 	chfreq = FIELD_GET(MT_RXD3_NORMAL_CH_FREQ, rxd3);
 	unicast = FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, rxd3) == MT_RXD3_NORMAL_U2M;
 	idx = FIELD_GET(MT_RXD1_NORMAL_WLAN_IDX, rxd1);
@@ -540,6 +544,16 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 
 	skb_pull(skb, (u8 *)rxd - skb->data + 2 * remove_pad);
 
+	amsdu_info = FIELD_GET(MT_RXD4_NORMAL_PAYLOAD_FORMAT, rxd4);
+	status->amsdu = !!amsdu_info;
+	if (status->amsdu) {
+		status->first_amsdu = amsdu_info == MT_RXD4_FIRST_AMSDU_FRAME;
+		status->last_amsdu = amsdu_info == MT_RXD4_LAST_AMSDU_FRAME;
+		memmove(skb->data + 2, skb->data,
+			ieee80211_get_hdrlen_from_skb(skb));
+		skb_pull(skb, 2);
+	}
+
 	if (insert_ccmp_hdr) {
 		u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
index 109c8849d106..435c138c27d1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
@@ -88,6 +88,9 @@ enum rx_pkt_type {
 
 /* RXD DW4 */
 #define MT_RXD4_NORMAL_PAYLOAD_FORMAT	GENMASK(1, 0)
+#define MT_RXD4_FIRST_AMSDU_FRAME	GENMASK(1, 0)
+#define MT_RXD4_MID_AMSDU_FRAME		BIT(1)
+#define MT_RXD4_LAST_AMSDU_FRAME	BIT(0)
 #define MT_RXD4_NORMAL_PATTERN_DROP	BIT(9)
 #define MT_RXD4_NORMAL_CLS		BIT(10)
 #define MT_RXD4_NORMAL_OFLD		GENMASK(12, 11)

From 868fe07ee612f81a493504190cdfcc9d344c9dc3 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 6 May 2021 20:13:33 +0200
Subject: [PATCH 2226/3292] mt76: connac: add missing configuration in
 mt76_connac_mcu_wtbl_hdr_trans_tlv

Add missing configuration parameters in mt76_connac_mcu_wtbl_hdr_trans_tlv
routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c      |  8 ++++----
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 11 +++++++++++
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h |  1 +
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index aa42af9ebfd6..32a2cb76b583 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1037,8 +1037,8 @@ mt7615_mcu_wtbl_sta_add(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		if (sta)
 			mt76_connac_mcu_wtbl_ht_tlv(&dev->mt76, wskb, sta,
 						    NULL, wtbl_hdr);
-		mt76_connac_mcu_wtbl_hdr_trans_tlv(wskb, &msta->wcid, NULL,
-						   wtbl_hdr);
+		mt76_connac_mcu_wtbl_hdr_trans_tlv(wskb, vif, &msta->wcid,
+						   NULL, wtbl_hdr);
 	}
 
 	cmd = enable ? MCU_EXT_CMD_WTBL_UPDATE : MCU_EXT_CMD_STA_REC_UPDATE;
@@ -1167,8 +1167,8 @@ int mt7615_mcu_sta_update_hdr_trans(struct mt7615_dev *dev,
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
-	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, &msta->wcid, NULL, wtbl_hdr);
-
+	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, vif, &msta->wcid, NULL,
+					   wtbl_hdr);
 	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD_WTBL_UPDATE,
 				     true);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 619561606f96..443e5109c1d9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -393,6 +393,7 @@ mt76_connac_mcu_sta_uapsd(struct sk_buff *skb, struct ieee80211_vif *vif,
 }
 
 void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
+					struct ieee80211_vif *vif,
 					struct mt76_wcid *wcid,
 					void *sta_wtbl, void *wtbl_tlv)
 {
@@ -404,6 +405,16 @@ void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
 					     wtbl_tlv, sta_wtbl);
 	htr = (struct wtbl_hdr_trans *)tlv;
 	htr->no_rx_trans = !test_bit(MT_WCID_FLAG_HDR_TRANS, &wcid->flags);
+
+	if (vif->type == NL80211_IFTYPE_STATION)
+		htr->to_ds = true;
+	else
+		htr->from_ds = true;
+
+	if (test_bit(MT_WCID_FLAG_4ADDR, &wcid->flags)) {
+		htr->to_ds = true;
+		htr->from_ds = true;
+	}
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_wtbl_hdr_trans_tlv);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index a1096861d04a..f0493924fa89 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -969,6 +969,7 @@ void mt76_connac_mcu_wtbl_generic_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 				      struct ieee80211_sta *sta, void *sta_wtbl,
 				      void *wtbl_tlv);
 void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
+					struct ieee80211_vif *vif,
 					struct mt76_wcid *wcid,
 					void *sta_wtbl, void *wtbl_tlv);
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,

From 24299fc869f7caded8ae30a33f205ab37be729d4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 6 May 2021 20:13:34 +0200
Subject: [PATCH 2227/3292] mt76: mt7921: enable rx header traslation offload

As already done for mt7615 and mt7915, enable rx header translation
offload for mt7921 in order to reduce cpu load in the rx path.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt76_connac_mcu.c  | 29 +++++++++++
 .../wireless/mediatek/mt76/mt76_connac_mcu.h  |  3 ++
 .../net/wireless/mediatek/mt76/mt7921/init.c  |  4 +-
 .../net/wireless/mediatek/mt76/mt7921/mac.c   | 52 +++++++++++++------
 .../net/wireless/mediatek/mt76/mt7921/mac.h   | 11 ++++
 .../net/wireless/mediatek/mt76/mt7921/main.c  | 18 +++++++
 6 files changed, 101 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 443e5109c1d9..7b8f8e6f431d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -418,6 +418,33 @@ void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_wtbl_hdr_trans_tlv);
 
+int mt76_connac_mcu_sta_update_hdr_trans(struct mt76_dev *dev,
+					 struct ieee80211_vif *vif,
+					 struct mt76_wcid *wcid, int cmd)
+{
+	struct mt76_vif *mvif = (struct mt76_vif *)vif->drv_priv;
+	struct wtbl_req_hdr *wtbl_hdr;
+	struct tlv *sta_wtbl;
+	struct sk_buff *skb;
+
+	skb = mt76_connac_mcu_alloc_sta_req(dev, mvif, wcid);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	sta_wtbl = mt76_connac_mcu_add_tlv(skb, STA_REC_WTBL,
+					   sizeof(struct tlv));
+
+	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(dev, wcid, WTBL_SET,
+						  sta_wtbl, &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
+	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, vif, wcid, sta_wtbl, wtbl_hdr);
+
+	return mt76_mcu_skb_send_msg(dev, skb, cmd, true);
+}
+EXPORT_SYMBOL_GPL(mt76_connac_mcu_sta_update_hdr_trans);
+
 void mt76_connac_mcu_wtbl_generic_tlv(struct mt76_dev *dev,
 				      struct sk_buff *skb,
 				      struct ieee80211_vif *vif,
@@ -870,6 +897,8 @@ int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
 		mt76_connac_mcu_wtbl_generic_tlv(dev, skb, info->vif,
 						 info->sta, sta_wtbl,
 						 wtbl_hdr);
+		mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, info->vif, info->wcid,
+						   sta_wtbl, wtbl_hdr);
 		if (info->sta)
 			mt76_connac_mcu_wtbl_ht_tlv(dev, skb, info->sta,
 						    sta_wtbl, wtbl_hdr);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index f0493924fa89..01fc9f2c2f4a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -972,6 +972,9 @@ void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
 					struct ieee80211_vif *vif,
 					struct mt76_wcid *wcid,
 					void *sta_wtbl, void *wtbl_tlv);
+int mt76_connac_mcu_sta_update_hdr_trans(struct mt76_dev *dev,
+					 struct ieee80211_vif *vif,
+					 struct mt76_wcid *wcid, int cmd);
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 			     struct ieee80211_sta *sta,
 			     struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 73741e148240..d5ad92a440c3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -78,6 +78,7 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS);
 	ieee80211_hw_set(hw, HAS_RATE_CONTROL);
 	ieee80211_hw_set(hw, SUPPORTS_TX_ENCAP_OFFLOAD);
+	ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD);
 	ieee80211_hw_set(hw, WANT_MONITOR_VIF);
 	ieee80211_hw_set(hw, SUPPORTS_PS);
 	ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
@@ -127,7 +128,8 @@ void mt7921_mac_init(struct mt7921_dev *dev)
 	mt76_rmw_field(dev, MT_MDP_DCR1, MT_MDP_DCR1_MAX_RX_LEN, 1536);
 	/* enable hardware de-agg */
 	mt76_set(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);
-	mt76_clear(dev, MT_MDP_DCR0, MT_MDP_DCR0_RX_HDR_TRANS_EN);
+	/* enable hardware rx header translation */
+	mt76_set(dev, MT_MDP_DCR0, MT_MDP_DCR0_RX_HDR_TRANS_EN);
 
 	for (i = 0; i < MT7921_WTBL_SIZE; i++)
 		mt7921_mac_wtbl_update(dev, i,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 7e57d230e63a..74974f689462 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -309,6 +309,8 @@ mt7921_mac_assoc_rssi(struct mt7921_dev *dev, struct sk_buff *skb)
 int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
+	bool hdr_trans, unicast, insert_ccmp_hdr = false;
+	u8 chfreq, qos_ctl = 0, remove_pad, amsdu_info;
 	struct mt76_phy *mphy = &dev->mt76.phy;
 	struct mt7921_phy *phy = &dev->phy;
 	struct ieee80211_supported_band *sband;
@@ -320,10 +322,9 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	u32 rxd2 = le32_to_cpu(rxd[2]);
 	u32 rxd3 = le32_to_cpu(rxd[3]);
 	u32 rxd4 = le32_to_cpu(rxd[4]);
-	bool unicast, insert_ccmp_hdr = false;
-	u8 remove_pad, amsdu_info;
+	u16 seq_ctrl = 0;
+	__le16 fc = 0;
 	int i, idx;
-	u8 chfreq;
 
 	memset(status, 0, sizeof(*status));
 
@@ -339,6 +340,7 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	chfreq = FIELD_GET(MT_RXD3_NORMAL_CH_FREQ, rxd3);
 	unicast = FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, rxd3) == MT_RXD3_NORMAL_U2M;
 	idx = FIELD_GET(MT_RXD1_NORMAL_WLAN_IDX, rxd1);
+	hdr_trans = rxd2 & MT_RXD2_NORMAL_HDR_TRANS;
 	status->wcid = mt7921_rx_get_wcid(dev, idx, unicast);
 
 	if (status->wcid) {
@@ -381,6 +383,13 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 
 	rxd += 6;
 	if (rxd1 & MT_RXD1_NORMAL_GROUP_4) {
+		u32 v0 = le32_to_cpu(rxd[0]);
+		u32 v2 = le32_to_cpu(rxd[2]);
+
+		fc = cpu_to_le16(FIELD_GET(MT_RXD6_FRAME_CONTROL, v0));
+		seq_ctrl = FIELD_GET(MT_RXD8_SEQ_CTRL, v2);
+		qos_ctl = FIELD_GET(MT_RXD8_QOS_CTL, v2);
+
 		rxd += 4;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
@@ -549,15 +558,30 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	if (status->amsdu) {
 		status->first_amsdu = amsdu_info == MT_RXD4_FIRST_AMSDU_FRAME;
 		status->last_amsdu = amsdu_info == MT_RXD4_LAST_AMSDU_FRAME;
-		memmove(skb->data + 2, skb->data,
-			ieee80211_get_hdrlen_from_skb(skb));
-		skb_pull(skb, 2);
+		if (!hdr_trans) {
+			memmove(skb->data + 2, skb->data,
+				ieee80211_get_hdrlen_from_skb(skb));
+			skb_pull(skb, 2);
+		}
 	}
 
-	if (insert_ccmp_hdr) {
-		u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
+	if (!hdr_trans) {
+		if (insert_ccmp_hdr) {
+			u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
 
-		mt76_insert_ccmp_hdr(skb, key_id);
+			mt76_insert_ccmp_hdr(skb, key_id);
+		}
+
+		hdr = mt76_skb_get_hdr(skb);
+		fc = hdr->frame_control;
+		if (ieee80211_is_data_qos(fc)) {
+			seq_ctrl = le16_to_cpu(hdr->seq_ctrl);
+			qos_ctl = *ieee80211_get_qos_ctl(hdr);
+		}
+	} else {
+		status->flag &= ~(RX_FLAG_RADIOTAP_HE |
+				  RX_FLAG_RADIOTAP_HE_MU);
+		status->flag |= RX_FLAG_8023;
 	}
 
 	mt7921_mac_assoc_rssi(dev, skb);
@@ -565,14 +589,12 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	if (rxv && status->flag & RX_FLAG_RADIOTAP_HE)
 		mt7921_mac_decode_he_radiotap(skb, status, rxv, mode);
 
-	hdr = mt76_skb_get_hdr(skb);
-	if (!status->wcid || !ieee80211_is_data_qos(hdr->frame_control))
+	if (!status->wcid || !ieee80211_is_data_qos(fc))
 		return 0;
 
-	status->aggr = unicast &&
-		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
-	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
-	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+	status->aggr = unicast && !ieee80211_is_qos_nullfunc(fc);
+	status->seqno = IEEE80211_SEQ_TO_SN(seq_ctrl);
+	status->qos_ctl = qos_ctl;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
index 435c138c27d1..3af67fac213d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
@@ -100,6 +100,17 @@ enum rx_pkt_type {
 #define MT_RXD3_NORMAL_PF_MODE		BIT(29)
 #define MT_RXD3_NORMAL_PF_STS		GENMASK(31, 30)
 
+/* RXD GROUP4 */
+#define MT_RXD6_FRAME_CONTROL		GENMASK(15, 0)
+#define MT_RXD6_TA_LO			GENMASK(31, 16)
+
+#define MT_RXD7_TA_HI			GENMASK(31, 0)
+
+#define MT_RXD8_SEQ_CTRL		GENMASK(15, 0)
+#define MT_RXD8_QOS_CTL			GENMASK(31, 16)
+
+#define MT_RXD9_HT_CONTROL		GENMASK(31, 0)
+
 /* P-RXV DW0 */
 #define MT_PRXV_TX_RATE			GENMASK(6, 0)
 #define MT_PRXV_TX_DCM			BIT(4)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 97a0ef331ac3..8016f8377c8f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -1163,6 +1163,23 @@ static void mt7921_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			   HZ / 2);
 }
 
+static void mt7921_sta_set_decap_offload(struct ieee80211_hw *hw,
+					 struct ieee80211_vif *vif,
+					 struct ieee80211_sta *sta,
+					 bool enabled)
+{
+	struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
+	struct mt7921_dev *dev = mt7921_hw_dev(hw);
+
+	if (enabled)
+		set_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+	else
+		clear_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+
+	mt76_connac_mcu_sta_update_hdr_trans(&dev->mt76, vif, &msta->wcid,
+					     MCU_UNI_CMD_STA_REC_UPDATE);
+}
+
 const struct ieee80211_ops mt7921_ops = {
 	.tx = mt7921_tx,
 	.start = mt7921_start,
@@ -1177,6 +1194,7 @@ const struct ieee80211_ops mt7921_ops = {
 	.sta_remove = mt7921_sta_remove,
 	.sta_pre_rcu_remove = mt76_sta_pre_rcu_remove,
 	.set_key = mt7921_set_key,
+	.sta_set_decap_offload = mt7921_sta_set_decap_offload,
 	.ampdu_action = mt7921_ampdu_action,
 	.set_rts_threshold = mt7921_set_rts_threshold,
 	.wake_tx_queue = mt76_wake_tx_queue,

From 0e75732764e86a7741d7807b4408cd02cbaf1e0c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 6 May 2021 20:13:35 +0200
Subject: [PATCH 2228/3292] mt76: mt7921: enable rx csum offload

As already done for mt7615 and mt7915, enable hw rx checksum offload.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c |  1 +
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c  | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index d5ad92a440c3..b899b82bbf04 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -50,6 +50,7 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	hw->queues = 4;
 	hw->max_rx_aggregation_subframes = 64;
 	hw->max_tx_aggregation_subframes = 128;
+	hw->netdev_features = NETIF_F_RXCSUM;
 
 	hw->radiotap_timestamp.units_pos =
 		IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 74974f689462..44b8918db95b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -308,22 +308,23 @@ mt7921_mac_assoc_rssi(struct mt7921_dev *dev, struct sk_buff *skb)
 
 int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 {
+	u32 csum_mask = MT_RXD0_NORMAL_IP_SUM | MT_RXD0_NORMAL_UDP_TCP_SUM;
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	bool hdr_trans, unicast, insert_ccmp_hdr = false;
 	u8 chfreq, qos_ctl = 0, remove_pad, amsdu_info;
+	__le32 *rxv = NULL, *rxd = (__le32 *)skb->data;
 	struct mt76_phy *mphy = &dev->mt76.phy;
 	struct mt7921_phy *phy = &dev->phy;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_hdr *hdr;
-	__le32 *rxd = (__le32 *)skb->data;
-	__le32 *rxv = NULL;
-	u32 mode = 0;
+	u32 rxd0 = le32_to_cpu(rxd[0]);
 	u32 rxd1 = le32_to_cpu(rxd[1]);
 	u32 rxd2 = le32_to_cpu(rxd[2]);
 	u32 rxd3 = le32_to_cpu(rxd[3]);
 	u32 rxd4 = le32_to_cpu(rxd[4]);
 	u16 seq_ctrl = 0;
 	__le16 fc = 0;
+	u32 mode = 0;
 	int i, idx;
 
 	memset(status, 0, sizeof(*status));
@@ -363,6 +364,9 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	if (!sband->channels)
 		return -EINVAL;
 
+	if ((rxd0 & csum_mask) == csum_mask)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
 	if (rxd1 & MT_RXD1_NORMAL_FCS_ERR)
 		status->flag |= RX_FLAG_FAILED_FCS_CRC;
 

From a441a77a736cd20e2f6529be4d610e5956bac6fa Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 27 Apr 2021 07:14:26 +0800
Subject: [PATCH 2229/3292] mt76: mt7915: cleanup
 mt7915_mcu_sta_rate_ctrl_tlv()

Remove obsoleted codes. This is the preparation for .set_bitrate_mask().

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 74 +++++--------------
 .../net/wireless/mediatek/mt76/mt7915/mcu.h   |  2 +-
 2 files changed, 21 insertions(+), 55 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index ce7c55fda6cf..b22dfdc39f1e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -147,10 +147,10 @@ mt7915_get_he_phy_cap(struct mt7915_phy *phy, struct ieee80211_vif *vif)
 }
 
 static u8
-mt7915_get_phy_mode(struct mt76_phy *mphy, struct ieee80211_vif *vif,
-		    struct ieee80211_sta *sta)
+mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta)
 {
-	enum nl80211_band band = mphy->chandef.chan->band;
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	enum nl80211_band band = mvif->phy->mt76->chandef.chan->band;
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
 	const struct ieee80211_sta_he_cap *he_cap;
@@ -163,7 +163,7 @@ mt7915_get_phy_mode(struct mt76_phy *mphy, struct ieee80211_vif *vif,
 	} else {
 		struct ieee80211_supported_band *sband;
 
-		sband = mphy->hw->wiphy->bands[band];
+		sband = mvif->phy->mt76->hw->wiphy->bands[band];
 
 		ht_cap = &sband->ht_cap;
 		vht_cap = &sband->vht_cap;
@@ -741,7 +741,7 @@ mt7915_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 		memcpy(bss->bssid, vif->bss_conf.bssid, ETH_ALEN);
 		bss->bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int);
 		bss->dtim_period = vif->bss_conf.dtim_period;
-		bss->phy_mode = mt7915_get_phy_mode(phy->mt76, vif, NULL);
+		bss->phy_mode = mt7915_get_phy_mode(vif, NULL);
 	} else {
 		memcpy(bss->bssid, phy->mt76->macaddr, ETH_ALEN);
 	}
@@ -2087,47 +2087,39 @@ static void
 mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 			     struct ieee80211_vif *vif, struct ieee80211_sta *sta)
 {
-	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
-	struct mt76_phy *mphy = &dev->mphy;
-	enum nl80211_band band;
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct cfg80211_chan_def *chandef = &mvif->phy->mt76->chandef;
+	enum nl80211_band band = chandef->chan->band;
 	struct sta_rec_ra *ra;
 	struct tlv *tlv;
-	u32 supp_rate, n_rates, cap = sta->wme ? STA_CAP_WMM : 0;
-	u8 i, nss = sta->rx_nss, mcs = 0;
+	u32 supp_rate = sta->supp_rates[band];
+	u32 cap = sta->wme ? STA_CAP_WMM : 0;
+	u8 i, nss = sta->rx_nss;
 
 	tlv = mt7915_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra));
 	ra = (struct sta_rec_ra *)tlv;
 
-	if (msta->wcid.ext_phy && dev->mt76.phy2)
-		mphy = dev->mt76.phy2;
-
-	band = mphy->chandef.chan->band;
-	supp_rate = sta->supp_rates[band];
-	n_rates = hweight32(supp_rate);
-
 	ra->valid = true;
 	ra->auto_rate = true;
-	ra->phy_mode = mt7915_get_phy_mode(mphy, vif, sta);
-	ra->channel = mphy->chandef.chan->hw_value;
+	ra->phy_mode = mt7915_get_phy_mode(vif, sta);
+	ra->channel = chandef->chan->hw_value;
 	ra->bw = sta->bandwidth;
-	ra->rate_len = n_rates;
 	ra->phy.bw = sta->bandwidth;
 
-	if (n_rates) {
+	if (supp_rate) {
+		ra->rate_len = hweight32(supp_rate);
+
 		if (band == NL80211_BAND_2GHZ) {
 			ra->supp_mode = MODE_CCK;
 			ra->supp_cck_rate = supp_rate & GENMASK(3, 0);
-			ra->phy.type = MT_PHY_TYPE_CCK;
 
-			if (n_rates > 4) {
+			if (ra->rate_len > 4) {
 				ra->supp_mode |= MODE_OFDM;
 				ra->supp_ofdm_rate = supp_rate >> 4;
-				ra->phy.type = MT_PHY_TYPE_OFDM;
 			}
 		} else {
 			ra->supp_mode = MODE_OFDM;
 			ra->supp_ofdm_rate = supp_rate;
-			ra->phy.type = MT_PHY_TYPE_OFDM;
 		}
 	}
 
@@ -2137,7 +2129,6 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 
 		ra->supp_ht_mcs = *(__le32 *)ra->ht_mcs;
 		ra->supp_mode |= MODE_HT;
-		mcs = hweight32(le32_to_cpu(ra->supp_ht_mcs)) - 1;
 		ra->af = sta->ht_cap.ampdu_factor;
 		ra->ht_gf = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD);
 
@@ -2157,7 +2148,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 	if (sta->vht_cap.vht_supported) {
 		u16 mcs_map = le16_to_cpu(sta->vht_cap.vht_mcs.rx_mcs_map);
 		u16 vht_mcs;
-		u8 af, mcs_prev;
+		u8 af;
 
 		af = FIELD_GET(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK,
 			       sta->vht_cap.cap);
@@ -2176,7 +2167,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 			cap |= STA_CAP_VHT_LDPC;
 
 		ra->supp_mode |= MODE_VHT;
-		for (mcs = 0, i = 0; i < nss; i++, mcs_map >>= 2) {
+		for (i = 0; i < nss; i++, mcs_map >>= 2) {
 			switch (mcs_map & 0x3) {
 			case IEEE80211_VHT_MCS_SUPPORT_0_9:
 				vht_mcs = GENMASK(9, 0);
@@ -2193,10 +2184,6 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 
 			ra->supp_vht_mcs[i] = cpu_to_le16(vht_mcs);
 
-			mcs_prev = hweight16(vht_mcs) - 1;
-			if (mcs_prev > mcs)
-				mcs = mcs_prev;
-
 			/* only support 2ss on 160MHz */
 			if (i > 1 && (ra->bw == CMD_CBW_160MHZ ||
 				      ra->bw == CMD_CBW_8080MHZ))
@@ -2209,28 +2196,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 		cap |= STA_CAP_HE;
 	}
 
-	ra->sta_status = cpu_to_le32(cap);
-
-	switch (BIT(fls(ra->supp_mode) - 1)) {
-	case MODE_VHT:
-		ra->phy.type = MT_PHY_TYPE_VHT;
-		ra->phy.mcs = mcs;
-		ra->phy.nss = nss;
-		ra->phy.stbc = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_TXSTBC);
-		ra->phy.ldpc = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_RXLDPC);
-		ra->phy.sgi =
-			!!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80);
-		break;
-	case MODE_HT:
-		ra->phy.type = MT_PHY_TYPE_HT;
-		ra->phy.mcs = mcs;
-		ra->phy.ldpc = sta->ht_cap.cap & IEEE80211_HT_CAP_LDPC_CODING;
-		ra->phy.stbc = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC);
-		ra->phy.sgi = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20);
-		break;
-	default:
-		break;
-	}
+	ra->sta_cap = cpu_to_le32(cap);
 }
 
 int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 42582a66e42d..f95920d58a40 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -919,7 +919,7 @@ struct sta_rec_ra {
 	u8 op_vht_rx_nss;
 	u8 op_vht_rx_nss_type;
 
-	__le32 sta_status;
+	__le32 sta_cap;
 
 	struct ra_phy phy;
 } __packed;

From 76be6c076c0774844670df818233c488538bae02 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 27 Apr 2021 07:14:27 +0800
Subject: [PATCH 2230/3292] mt76: mt7915: add .set_bitrate_mask() callback

Add runtime configuration for bitrate mask. This update firmware rate
control to add a boundary on top of table to limit the rate selection
for each peer, so when user set bitrates vht-mcs-5 1:9, which actually
means nss = 1 mcs = 0~9. This only applies to data frames as for other
mgmt, mcast, bcast still use legacy rates as it is.

Note that driver does not support GI configuration.

Example:
iw dev wlan0 set bitrates vht-mcs-5 1:9 he-mcs-5 2:7
iw dev wlan0 set bitrates legacy-5 6 he-mcs-5 2:0-11

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/mac.c   |   4 +-
 .../net/wireless/mediatek/mt76/mt7915/main.c  |  64 +++++-
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 185 ++++++++++++++----
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |   3 +
 4 files changed, 213 insertions(+), 43 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index f8bb043f9be7..642a11e72bef 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1804,8 +1804,10 @@ void mt7915_mac_sta_rc_work(struct work_struct *work)
 
 		if (changed & (IEEE80211_RC_SUPP_RATES_CHANGED |
 			       IEEE80211_RC_NSS_CHANGED |
-			       IEEE80211_RC_BW_CHANGED))
+			       IEEE80211_RC_BW_CHANGED)) {
+			mt7915_mcu_add_he(dev, vif, sta);
 			mt7915_mcu_add_rate_ctrl(dev, vif, sta);
+		}
 
 		if (changed & IEEE80211_RC_SMPS_CHANGED)
 			mt7915_mcu_add_smps(dev, vif, sta);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index e5bd687546b6..64f9ebe4424a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -172,6 +172,22 @@ static int get_omac_idx(enum nl80211_iftype type, u64 mask)
 	return -1;
 }
 
+static void mt7915_init_bitrate_mask(struct ieee80211_vif *vif)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mvif->bitrate_mask.control); i++) {
+		mvif->bitrate_mask.control[i].legacy = GENMASK(31, 0);
+		memset(mvif->bitrate_mask.control[i].ht_mcs, GENMASK(7, 0),
+		       sizeof(mvif->bitrate_mask.control[i].ht_mcs));
+		memset(mvif->bitrate_mask.control[i].vht_mcs, GENMASK(15, 0),
+		       sizeof(mvif->bitrate_mask.control[i].vht_mcs));
+		memset(mvif->bitrate_mask.control[i].he_mcs, GENMASK(15, 0),
+		       sizeof(mvif->bitrate_mask.control[i].he_mcs));
+	}
+}
+
 static int mt7915_add_interface(struct ieee80211_hw *hw,
 				struct ieee80211_vif *vif)
 {
@@ -241,6 +257,8 @@ static int mt7915_add_interface(struct ieee80211_hw *hw,
 		vif->offload_flags = 0;
 	vif->offload_flags |= IEEE80211_OFFLOAD_ENCAP_4ADDR;
 
+	mt7915_init_bitrate_mask(vif);
+
 out:
 	mutex_unlock(&dev->mt76.mutex);
 
@@ -911,17 +929,15 @@ static void mt7915_sta_statistics(struct ieee80211_hw *hw,
 	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
 }
 
-static void
-mt7915_sta_rc_update(struct ieee80211_hw *hw,
-		     struct ieee80211_vif *vif,
-		     struct ieee80211_sta *sta,
-		     u32 changed)
+static void mt7915_sta_rc_work(void *data, struct ieee80211_sta *sta)
 {
-	struct mt7915_dev *dev = mt7915_hw_dev(hw);
 	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct mt7915_dev *dev = msta->vif->phy->dev;
+	struct ieee80211_hw *hw = msta->vif->phy->mt76->hw;
+	u32 *changed = data;
 
 	spin_lock_bh(&dev->sta_poll_lock);
-	msta->stats.changed |= changed;
+	msta->stats.changed |= *changed;
 	if (list_empty(&msta->rc_list))
 		list_add_tail(&msta->rc_list, &dev->sta_rc_list);
 	spin_unlock_bh(&dev->sta_poll_lock);
@@ -929,6 +945,39 @@ mt7915_sta_rc_update(struct ieee80211_hw *hw,
 	ieee80211_queue_work(hw, &dev->rc_work);
 }
 
+static void mt7915_sta_rc_update(struct ieee80211_hw *hw,
+				 struct ieee80211_vif *vif,
+				 struct ieee80211_sta *sta,
+				 u32 changed)
+{
+	mt7915_sta_rc_work(&changed, sta);
+}
+
+static int
+mt7915_set_bitrate_mask(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			const struct cfg80211_bitrate_mask *mask)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	enum nl80211_band band = mvif->phy->mt76->chandef.chan->band;
+	u32 changed;
+
+	if (mask->control[band].gi == NL80211_TXRATE_FORCE_LGI)
+		return -EINVAL;
+
+	changed = IEEE80211_RC_SUPP_RATES_CHANGED;
+	mvif->bitrate_mask = *mask;
+
+	/* Update firmware rate control to add a boundary on top of table
+	 * to limit the rate selection for each peer, so when set bitrates
+	 * vht-mcs-5 1:9, which actually means nss = 1 mcs = 0~9. This only
+	 * applies to data frames as for the other mgmt, mcast, bcast still
+	 * use legacy rates as it is.
+	 */
+	ieee80211_iterate_stations_atomic(hw, mt7915_sta_rc_work, &changed);
+
+	return 0;
+}
+
 static void mt7915_sta_set_4addr(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_sta *sta,
@@ -990,6 +1039,7 @@ const struct ieee80211_ops mt7915_ops = {
 	.get_survey = mt76_get_survey,
 	.get_antenna = mt76_get_antenna,
 	.set_antenna = mt7915_set_antenna,
+	.set_bitrate_mask = mt7915_set_bitrate_mask,
 	.set_coverage_class = mt7915_set_coverage_class,
 	.sta_statistics = mt7915_sta_statistics,
 	.sta_set_4addr = mt7915_sta_set_4addr,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index b22dfdc39f1e..e5302ff4c9dd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -209,6 +209,112 @@ mt7915_mcu_get_sta_nss(u16 mcs_map)
 	return nss - 1;
 }
 
+static void
+mt7915_mcu_set_sta_he_mcs(struct ieee80211_sta *sta, __le16 *he_mcs,
+			  const u16 *mask)
+{
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct cfg80211_chan_def *chandef = &msta->vif->phy->mt76->chandef;
+	int nss, max_nss = sta->rx_nss > 3 ? 4 : sta->rx_nss;
+	u16 mcs_map;
+
+	switch (chandef->width) {
+	case NL80211_CHAN_WIDTH_80P80:
+		mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_80p80);
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_160);
+		break;
+	default:
+		mcs_map = le16_to_cpu(sta->he_cap.he_mcs_nss_supp.rx_mcs_80);
+		break;
+	}
+
+	for (nss = 0; nss < max_nss; nss++) {
+		int mcs;
+
+		switch ((mcs_map >> (2 * nss)) & 0x3) {
+		case IEEE80211_HE_MCS_SUPPORT_0_11:
+			mcs = GENMASK(11, 0);
+			break;
+		case IEEE80211_HE_MCS_SUPPORT_0_9:
+			mcs = GENMASK(9, 0);
+			break;
+		case IEEE80211_HE_MCS_SUPPORT_0_7:
+			mcs = GENMASK(7, 0);
+			break;
+		default:
+			mcs = 0;
+		}
+
+		mcs = mcs ? fls(mcs & mask[nss]) - 1 : -1;
+
+		switch (mcs) {
+		case 0 ... 7:
+			mcs = IEEE80211_HE_MCS_SUPPORT_0_7;
+			break;
+		case 8 ... 9:
+			mcs = IEEE80211_HE_MCS_SUPPORT_0_9;
+			break;
+		case 10 ... 11:
+			mcs = IEEE80211_HE_MCS_SUPPORT_0_11;
+			break;
+		default:
+			mcs = IEEE80211_HE_MCS_NOT_SUPPORTED;
+			break;
+		}
+		mcs_map &= ~(0x3 << (nss * 2));
+		mcs_map |= mcs << (nss * 2);
+
+		/* only support 2ss on 160MHz */
+		if (nss > 1 && (sta->bandwidth == IEEE80211_STA_RX_BW_160))
+			break;
+	}
+
+	*he_mcs = cpu_to_le16(mcs_map);
+}
+
+static void
+mt7915_mcu_set_sta_vht_mcs(struct ieee80211_sta *sta, __le16 *vht_mcs,
+			   const u16 *mask)
+{
+	u16 mcs_map = le16_to_cpu(sta->vht_cap.vht_mcs.rx_mcs_map);
+	int nss, max_nss = sta->rx_nss > 3 ? 4 : sta->rx_nss;
+	u16 mcs;
+
+	for (nss = 0; nss < max_nss; nss++, mcs_map >>= 2) {
+		switch (mcs_map & 0x3) {
+		case IEEE80211_VHT_MCS_SUPPORT_0_9:
+			mcs = GENMASK(9, 0);
+			break;
+		case IEEE80211_VHT_MCS_SUPPORT_0_8:
+			mcs = GENMASK(8, 0);
+			break;
+		case IEEE80211_VHT_MCS_SUPPORT_0_7:
+			mcs = GENMASK(7, 0);
+			break;
+		default:
+			mcs = 0;
+		}
+
+		vht_mcs[nss] = cpu_to_le16(mcs & mask[nss]);
+
+		/* only support 2ss on 160MHz */
+		if (nss > 1 && (sta->bandwidth == IEEE80211_STA_RX_BW_160))
+			break;
+	}
+}
+
+static void
+mt7915_mcu_set_sta_ht_mcs(struct ieee80211_sta *sta, u8 *ht_mcs,
+			  const u8 *mask)
+{
+	int nss, max_nss = sta->rx_nss > 3 ? 4 : sta->rx_nss;
+
+	for (nss = 0; nss < max_nss; nss++)
+		ht_mcs[nss] = sta->ht_cap.mcs.rx_mask[nss] & mask[nss];
+}
+
 static int
 mt7915_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 			  struct sk_buff *skb, int seq)
@@ -1344,8 +1450,11 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 static void
 mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
 {
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
 	struct ieee80211_sta_he_cap *he_cap = &sta->he_cap;
 	struct ieee80211_he_cap_elem *elem = &he_cap->he_cap_elem;
+	enum nl80211_band band = msta->vif->phy->mt76->chandef.chan->band;
+	const u16 *mcs_mask = msta->vif->bitrate_mask.control[band].he_mcs;
 	struct sta_rec_he *he;
 	struct tlv *tlv;
 	u32 cap = 0;
@@ -1436,15 +1545,18 @@ mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
 	case IEEE80211_STA_RX_BW_160:
 		if (elem->phy_cap_info[0] &
 		    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)
-			he->max_nss_mcs[CMD_HE_MCS_BW8080] =
-				he_cap->he_mcs_nss_supp.rx_mcs_80p80;
+			mt7915_mcu_set_sta_he_mcs(sta,
+						  &he->max_nss_mcs[CMD_HE_MCS_BW8080],
+						  mcs_mask);
 
-		he->max_nss_mcs[CMD_HE_MCS_BW160] =
-				he_cap->he_mcs_nss_supp.rx_mcs_160;
+		mt7915_mcu_set_sta_he_mcs(sta,
+					  &he->max_nss_mcs[CMD_HE_MCS_BW160],
+					  mcs_mask);
 		fallthrough;
 	default:
-		he->max_nss_mcs[CMD_HE_MCS_BW80] =
-				he_cap->he_mcs_nss_supp.rx_mcs_80;
+		mt7915_mcu_set_sta_he_mcs(sta,
+					  &he->max_nss_mcs[CMD_HE_MCS_BW80],
+					  mcs_mask);
 		break;
 	}
 
@@ -2089,12 +2201,12 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 {
 	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
 	struct cfg80211_chan_def *chandef = &mvif->phy->mt76->chandef;
+	struct cfg80211_bitrate_mask *mask = &mvif->bitrate_mask;
 	enum nl80211_band band = chandef->chan->band;
 	struct sta_rec_ra *ra;
 	struct tlv *tlv;
 	u32 supp_rate = sta->supp_rates[band];
 	u32 cap = sta->wme ? STA_CAP_WMM : 0;
-	u8 i, nss = sta->rx_nss;
 
 	tlv = mt7915_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra));
 	ra = (struct sta_rec_ra *)tlv;
@@ -2107,6 +2219,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 	ra->phy.bw = sta->bandwidth;
 
 	if (supp_rate) {
+		supp_rate &= mask->control[band].legacy;
 		ra->rate_len = hweight32(supp_rate);
 
 		if (band == NL80211_BAND_2GHZ) {
@@ -2124,10 +2237,8 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 	}
 
 	if (sta->ht_cap.ht_supported) {
-		for (i = 0; i < nss; i++)
-			ra->ht_mcs[i] = sta->ht_cap.mcs.rx_mask[i];
+		const u8 *mcs_mask = mask->control[band].ht_mcs;
 
-		ra->supp_ht_mcs = *(__le32 *)ra->ht_mcs;
 		ra->supp_mode |= MODE_HT;
 		ra->af = sta->ht_cap.ampdu_factor;
 		ra->ht_gf = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD);
@@ -2143,13 +2254,16 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 			cap |= STA_CAP_RX_STBC;
 		if (sta->ht_cap.cap & IEEE80211_HT_CAP_LDPC_CODING)
 			cap |= STA_CAP_LDPC;
+
+		mt7915_mcu_set_sta_ht_mcs(sta, ra->ht_mcs, mcs_mask);
+		ra->supp_ht_mcs = *(__le32 *)ra->ht_mcs;
 	}
 
 	if (sta->vht_cap.vht_supported) {
-		u16 mcs_map = le16_to_cpu(sta->vht_cap.vht_mcs.rx_mcs_map);
-		u16 vht_mcs;
+		const u16 *mcs_mask = mask->control[band].vht_mcs;
 		u8 af;
 
+		ra->supp_mode |= MODE_VHT;
 		af = FIELD_GET(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK,
 			       sta->vht_cap.cap);
 		ra->af = max_t(u8, ra->af, af);
@@ -2166,29 +2280,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_RXLDPC)
 			cap |= STA_CAP_VHT_LDPC;
 
-		ra->supp_mode |= MODE_VHT;
-		for (i = 0; i < nss; i++, mcs_map >>= 2) {
-			switch (mcs_map & 0x3) {
-			case IEEE80211_VHT_MCS_SUPPORT_0_9:
-				vht_mcs = GENMASK(9, 0);
-				break;
-			case IEEE80211_VHT_MCS_SUPPORT_0_8:
-				vht_mcs = GENMASK(8, 0);
-				break;
-			case IEEE80211_VHT_MCS_SUPPORT_0_7:
-				vht_mcs = GENMASK(7, 0);
-				break;
-			default:
-				vht_mcs = 0;
-			}
-
-			ra->supp_vht_mcs[i] = cpu_to_le16(vht_mcs);
-
-			/* only support 2ss on 160MHz */
-			if (i > 1 && (ra->bw == CMD_CBW_160MHZ ||
-				      ra->bw == CMD_CBW_8080MHZ))
-				break;
-		}
+		mt7915_mcu_set_sta_vht_mcs(sta, ra->supp_vht_mcs, mcs_mask);
 	}
 
 	if (sta->he_cap.has_he) {
@@ -2217,6 +2309,29 @@ int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 				     MCU_EXT_CMD(STA_REC_UPDATE), true);
 }
 
+int mt7915_mcu_add_he(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		      struct ieee80211_sta *sta)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct sk_buff *skb;
+	int len;
+
+	if (!sta->he_cap.has_he)
+		return 0;
+
+	len = sizeof(struct sta_req_hdr) + sizeof(struct sta_rec_he);
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta, len);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	mt7915_mcu_sta_he_tlv(skb, sta);
+
+	return mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				     MCU_EXT_CMD(STA_REC_UPDATE), true);
+}
+
 int mt7915_mcu_add_sta_adv(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta, bool enable)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index c4bf8edeb1dd..47341876da09 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -100,6 +100,7 @@ struct mt7915_vif {
 	struct mt7915_phy *phy;
 
 	struct ieee80211_tx_queue_params queue_params[IEEE80211_NUM_ACS];
+	struct cfg80211_bitrate_mask bitrate_mask;
 };
 
 struct mib_stats {
@@ -323,6 +324,8 @@ int mt7915_mcu_add_obss_spr(struct mt7915_dev *dev, struct ieee80211_vif *vif,
                             bool enable);
 int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			     struct ieee80211_sta *sta);
+int mt7915_mcu_add_he(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		      struct ieee80211_sta *sta);
 int mt7915_mcu_add_smps(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta);
 int mt7915_set_channel(struct mt7915_phy *phy);

From d7400a2f3e295b8cee692c7a66e10f60015a3c37 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 27 Apr 2021 12:05:00 +0200
Subject: [PATCH 2231/3292] mt76: fix possible NULL pointer dereference in
 mt76_tx

Even if this is not a real issue since mt76_tx is never run with wcid set
to NULL, fix a theoretical NULL pointer dereference in mt76_tx routine

Fixes: db9f11d3433f7 ("mt76: store wcid tx rate info in one u32 reduce locking")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 70a830132a37..5cc3e4d75c4f 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -306,7 +306,7 @@ mt76_tx(struct mt76_phy *phy, struct ieee80211_sta *sta,
 		skb_set_queue_mapping(skb, qid);
 	}
 
-	if (!(wcid->tx_info & MT_WCID_TX_INFO_SET))
+	if (wcid && !(wcid->tx_info & MT_WCID_TX_INFO_SET))
 		ieee80211_get_tx_rates(info->control.vif, sta, skb,
 				       info->control.rates, 1);
 

From 8d3cdc1bbb1d355f0ebef973175ae5fd74286feb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 27 Apr 2021 12:07:14 +0200
Subject: [PATCH 2232/3292] mt76: mt7615: fix NULL pointer dereference in
 tx_prepare_skb()

Fix theoretical NULL pointer dereference in mt7615_tx_prepare_skb and
mt7663_usb_sdio_tx_prepare_skb routines. This issue has been identified
by code analysis.

Fixes: 6aa4ed7927f11 ("mt76: mt7615: implement DMA support for MT7622")
Fixes: 4bb586bc33b98 ("mt76: mt7663u: sync probe sampling with rate configuration")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c  | 5 +++--
 drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index d7cbef752f9f..cc278d8cb888 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -131,20 +131,21 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  struct mt76_tx_info *tx_info)
 {
 	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
-	struct mt7615_sta *msta = container_of(wcid, struct mt7615_sta, wcid);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
 	struct ieee80211_key_conf *key = info->control.hw_key;
 	int pid, id;
 	u8 *txwi = (u8 *)txwi_ptr;
 	struct mt76_txwi_cache *t;
+	struct mt7615_sta *msta;
 	void *txp;
 
+	msta = wcid ? container_of(wcid, struct mt7615_sta, wcid) : NULL;
 	if (!wcid)
 		wcid = &dev->mt76.global_wcid;
 
 	pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
 
-	if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
+	if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && msta) {
 		struct mt7615_phy *phy = &dev->phy;
 
 		if ((info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY) && mdev->phy2)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
index b2b02614f8e9..75a05f8dd7e7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
@@ -191,14 +191,15 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 				   struct ieee80211_sta *sta,
 				   struct mt76_tx_info *tx_info)
 {
-	struct mt7615_sta *msta = container_of(wcid, struct mt7615_sta, wcid);
 	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
 	struct sk_buff *skb = tx_info->skb;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct mt7615_sta *msta;
 	int pad;
 
+	msta = wcid ? container_of(wcid, struct mt7615_sta, wcid) : NULL;
 	if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) &&
-	    !msta->rate_probe) {
+	    msta && !msta->rate_probe) {
 		/* request to configure sampling rate */
 		spin_lock_bh(&dev->mt76.lock);
 		mt7615_mac_set_rates(&dev->phy, msta, &info->control.rates[0],

From 47cbf73cf924ce2202332f61e705f4ed9bd6a74e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 7 May 2021 12:26:11 +0200
Subject: [PATCH 2233/3292] mt76: mt76x0: use dev_debug instead of dev_err for
 hw_rf_ctrl

BIT(0) in MT_EE_NIC_CONF_1 is use to notify the driver if the radio RF
switch is controlled through a gpio. Use dev_debug instead of dev_err
to log this info.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
index dd66fd12a2e6..cea24213186c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
@@ -68,7 +68,7 @@ static void mt76x0_set_chip_cap(struct mt76x02_dev *dev)
 		nic_conf1 &= 0xff00;
 
 	if (nic_conf1 & MT_EE_NIC_CONF_1_HW_RF_CTRL)
-		dev_err(dev->mt76.dev,
+		dev_dbg(dev->mt76.dev,
 			"driver does not support HW RF ctrl\n");
 
 	if (!mt76x02_field_valid(nic_conf0 >> 8))

From 2f83054342dcce87e2f47b60af9aa7cfbcf4b80e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 10 May 2021 09:13:03 +0200
Subject: [PATCH 2234/3292] mt76: mt7615: free irq if mt7615_mmio_probe fails

As already done for mt7915 and mt7921, free registered irq line if
mt7615_mmio_probe routine fails

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index 202ea235415e..71719c787511 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -229,7 +229,7 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 			       GFP_KERNEL);
 	if (!bus_ops) {
 		ret = -ENOMEM;
-		goto error;
+		goto err_free_dev;
 	}
 
 	bus_ops->rr = mt7615_rr;
@@ -242,17 +242,20 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 	ret = devm_request_irq(mdev->dev, irq, mt7615_irq_handler,
 			       IRQF_SHARED, KBUILD_MODNAME, dev);
 	if (ret)
-		goto error;
+		goto err_free_dev;
 
 	if (is_mt7663(mdev))
 		mt76_wr(dev, MT_PCIE_IRQ_ENABLE, 1);
 
 	ret = mt7615_register_device(dev);
 	if (ret)
-		goto error;
+		goto err_free_irq;
 
 	return 0;
-error:
+
+err_free_irq:
+	devm_free_irq(pdev, irq, dev);
+err_free_dev:
 	mt76_free_device(&dev->mt76);
 
 	return ret;

From 54c31b9e6507cd8183a27fa862cc3a407a332076 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 10 May 2021 10:13:07 +0200
Subject: [PATCH 2235/3292] mt76: mt7663: enable hw rx header translation

As already done for mt7615 and mt7915, enable rx header translation
offload for mt7663 in order to reduce cpu load in the rx path.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7615/main.c  |  2 +-
 .../net/wireless/mediatek/mt76/mt7615/mcu.c   | 66 +++++++++++++------
 .../wireless/mediatek/mt76/mt7615/mt7615.h    |  7 +-
 3 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 39733b351ac4..faae60775b16 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -1162,7 +1162,7 @@ static void mt7615_sta_set_decap_offload(struct ieee80211_hw *hw,
 	else
 		clear_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
 
-	mt7615_mcu_sta_update_hdr_trans(dev, vif, sta);
+	mt7615_mcu_set_sta_decap_offload(dev, vif, sta);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 32a2cb76b583..32090e01b4d2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1058,6 +1058,26 @@ mt7615_mcu_wtbl_sta_add(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	return mt76_mcu_skb_send_msg(&dev->mt76, skb, cmd, true);
 }
 
+static int
+mt7615_mcu_wtbl_update_hdr_trans(struct mt7615_dev *dev,
+				 struct ieee80211_vif *vif,
+				 struct ieee80211_sta *sta)
+{
+	struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv;
+	struct wtbl_req_hdr *wtbl_hdr;
+	struct sk_buff *skb = NULL;
+
+	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(&dev->mt76, &msta->wcid,
+						  WTBL_SET, NULL, &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
+	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, vif, &msta->wcid, NULL,
+					   wtbl_hdr);
+	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD_WTBL_UPDATE,
+				     true);
+}
+
 static const struct mt7615_mcu_ops wtbl_update_ops = {
 	.add_beacon_offload = mt7615_mcu_add_beacon_offload,
 	.set_pm_state = mt7615_mcu_ctrl_pm_state,
@@ -1068,6 +1088,7 @@ static const struct mt7615_mcu_ops wtbl_update_ops = {
 	.sta_add = mt7615_mcu_wtbl_sta_add,
 	.set_drv_ctrl = mt7615_mcu_drv_pmctrl,
 	.set_fw_ctrl = mt7615_mcu_fw_pmctrl,
+	.set_sta_decap_offload = mt7615_mcu_wtbl_update_hdr_trans,
 };
 
 static int
@@ -1142,6 +1163,18 @@ mt7615_mcu_add_sta(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 				    MCU_EXT_CMD_STA_REC_UPDATE);
 }
 
+static int
+mt7615_mcu_sta_update_hdr_trans(struct mt7615_dev *dev,
+				struct ieee80211_vif *vif,
+				struct ieee80211_sta *sta)
+{
+	struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv;
+
+	return mt76_connac_mcu_sta_update_hdr_trans(&dev->mt76,
+						    vif, &msta->wcid,
+						    MCU_EXT_CMD_STA_REC_UPDATE);
+}
+
 static const struct mt7615_mcu_ops sta_update_ops = {
 	.add_beacon_offload = mt7615_mcu_add_beacon_offload,
 	.set_pm_state = mt7615_mcu_ctrl_pm_state,
@@ -1152,27 +1185,9 @@ static const struct mt7615_mcu_ops sta_update_ops = {
 	.sta_add = mt7615_mcu_add_sta,
 	.set_drv_ctrl = mt7615_mcu_drv_pmctrl,
 	.set_fw_ctrl = mt7615_mcu_fw_pmctrl,
+	.set_sta_decap_offload = mt7615_mcu_sta_update_hdr_trans,
 };
 
-int mt7615_mcu_sta_update_hdr_trans(struct mt7615_dev *dev,
-				    struct ieee80211_vif *vif,
-				    struct ieee80211_sta *sta)
-{
-	struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv;
-	struct wtbl_req_hdr *wtbl_hdr;
-	struct sk_buff *skb = NULL;
-
-	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(&dev->mt76, &msta->wcid,
-						  WTBL_SET, NULL, &skb);
-	if (IS_ERR(wtbl_hdr))
-		return PTR_ERR(wtbl_hdr);
-
-	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, vif, &msta->wcid, NULL,
-					   wtbl_hdr);
-	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD_WTBL_UPDATE,
-				     true);
-}
-
 static int
 mt7615_mcu_uni_ctrl_pm_state(struct mt7615_dev *dev, int band, int state)
 {
@@ -1338,6 +1353,18 @@ mt7615_mcu_uni_rx_ba(struct mt7615_dev *dev,
 				     MCU_UNI_CMD_STA_REC_UPDATE, true);
 }
 
+static int
+mt7615_mcu_sta_uni_update_hdr_trans(struct mt7615_dev *dev,
+				    struct ieee80211_vif *vif,
+				    struct ieee80211_sta *sta)
+{
+	struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv;
+
+	return mt76_connac_mcu_sta_update_hdr_trans(&dev->mt76,
+						    vif, &msta->wcid,
+						    MCU_UNI_CMD_STA_REC_UPDATE);
+}
+
 static const struct mt7615_mcu_ops uni_update_ops = {
 	.add_beacon_offload = mt7615_mcu_uni_add_beacon_offload,
 	.set_pm_state = mt7615_mcu_uni_ctrl_pm_state,
@@ -1348,6 +1375,7 @@ static const struct mt7615_mcu_ops uni_update_ops = {
 	.sta_add = mt7615_mcu_uni_add_sta,
 	.set_drv_ctrl = mt7615_mcu_lp_drv_pmctrl,
 	.set_fw_ctrl = mt7615_mcu_fw_pmctrl,
+	.set_sta_decap_offload = mt7615_mcu_sta_uni_update_hdr_trans,
 };
 
 int mt7615_mcu_restart(struct mt76_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 989f05ed4377..2ba86bd96a31 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -202,6 +202,7 @@ struct mt7615_phy {
 #define mt7615_mcu_set_pm(dev, ...)	(dev)->mcu_ops->set_pm_state((dev),  __VA_ARGS__)
 #define mt7615_mcu_set_drv_ctrl(dev)	(dev)->mcu_ops->set_drv_ctrl((dev))
 #define mt7615_mcu_set_fw_ctrl(dev)	(dev)->mcu_ops->set_fw_ctrl((dev))
+#define mt7615_mcu_set_sta_decap_offload(dev, ...) (dev)->mcu_ops->set_sta_decap_offload((dev), __VA_ARGS__)
 struct mt7615_mcu_ops {
 	int (*add_tx_ba)(struct mt7615_dev *dev,
 			 struct ieee80211_ampdu_params *params,
@@ -221,6 +222,9 @@ struct mt7615_mcu_ops {
 	int (*set_pm_state)(struct mt7615_dev *dev, int band, int state);
 	int (*set_drv_ctrl)(struct mt7615_dev *dev);
 	int (*set_fw_ctrl)(struct mt7615_dev *dev);
+	int (*set_sta_decap_offload)(struct mt7615_dev *dev,
+				     struct ieee80211_vif *vif,
+				     struct ieee80211_sta *sta);
 };
 
 struct mt7615_dev {
@@ -518,9 +522,6 @@ void mt7615_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 void mt7615_mac_work(struct work_struct *work);
 void mt7615_txp_skb_unmap(struct mt76_dev *dev,
 			  struct mt76_txwi_cache *txwi);
-int mt7615_mcu_sta_update_hdr_trans(struct mt7615_dev *dev,
-				    struct ieee80211_vif *vif,
-				    struct ieee80211_sta *sta);
 int mt7615_mcu_set_rx_hdr_trans_blacklist(struct mt7615_dev *dev);
 int mt7615_mcu_set_fcc5_lpn(struct mt7615_dev *dev, int val);
 int mt7615_mcu_set_pulse_th(struct mt7615_dev *dev,

From 20eb83c749609199443972cf80fb6004fc36afc6 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 10 May 2021 23:14:49 +0800
Subject: [PATCH 2236/3292] mt76: mt7921: fix mt7921_wfsys_reset sequence

WiFi subsytem reset should control MT_WFSYS_SW_RST_B and then poll the
same register until the bit WFSYS_SW_INIT_DONE bit is set.

Fixes: 0c1ce9884607 ("mt76: mt7921: add wifi reset support")
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index 71e664ee7652..bd9143dc865f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -313,9 +313,9 @@ static int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
 
 int mt7921_wfsys_reset(struct mt7921_dev *dev)
 {
-	mt76_set(dev, 0x70002600, BIT(0));
-	msleep(200);
-	mt76_clear(dev, 0x70002600, BIT(0));
+	mt76_clear(dev, MT_WFSYS_SW_RST_B, WFSYS_SW_RST_B);
+	msleep(50);
+	mt76_set(dev, MT_WFSYS_SW_RST_B, WFSYS_SW_RST_B);
 
 	if (!__mt76_poll_msec(&dev->mt76, MT_WFSYS_SW_RST_B,
 			      WFSYS_SW_INIT_DONE, WFSYS_SW_INIT_DONE, 500))

From 2c80c02a682aefc073df2cfbb48c77c74579cb4a Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 10 May 2021 23:14:50 +0800
Subject: [PATCH 2237/3292] mt76: mt7921: Don't alter Rx path classifier

Keep Rx path classifier the mt7921 firmware prefers to allow frames pass
through MCU.

Fixes: 5c14a5f944b9 ("mt76: mt7921: introduce mt7921e support")
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/init.c   | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index b899b82bbf04..9a28da5abb11 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -90,30 +90,12 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 static void
 mt7921_mac_init_band(struct mt7921_dev *dev, u8 band)
 {
-	u32 mask, set;
-
 	mt76_rmw_field(dev, MT_TMAC_CTCR0(band),
 		       MT_TMAC_CTCR0_INS_DDLMT_REFTIME, 0x3f);
 	mt76_set(dev, MT_TMAC_CTCR0(band),
 		 MT_TMAC_CTCR0_INS_DDLMT_VHT_SMPDU_EN |
 		 MT_TMAC_CTCR0_INS_DDLMT_EN);
 
-	mask = MT_MDP_RCFR0_MCU_RX_MGMT |
-	       MT_MDP_RCFR0_MCU_RX_CTL_NON_BAR |
-	       MT_MDP_RCFR0_MCU_RX_CTL_BAR;
-	set = FIELD_PREP(MT_MDP_RCFR0_MCU_RX_MGMT, MT_MDP_TO_HIF) |
-	      FIELD_PREP(MT_MDP_RCFR0_MCU_RX_CTL_NON_BAR, MT_MDP_TO_HIF) |
-	      FIELD_PREP(MT_MDP_RCFR0_MCU_RX_CTL_BAR, MT_MDP_TO_HIF);
-	mt76_rmw(dev, MT_MDP_BNRCFR0(band), mask, set);
-
-	mask = MT_MDP_RCFR1_MCU_RX_BYPASS |
-	       MT_MDP_RCFR1_RX_DROPPED_UCAST |
-	       MT_MDP_RCFR1_RX_DROPPED_MCAST;
-	set = FIELD_PREP(MT_MDP_RCFR1_MCU_RX_BYPASS, MT_MDP_TO_HIF) |
-	      FIELD_PREP(MT_MDP_RCFR1_RX_DROPPED_UCAST, MT_MDP_TO_HIF) |
-	      FIELD_PREP(MT_MDP_RCFR1_RX_DROPPED_MCAST, MT_MDP_TO_HIF);
-	mt76_rmw(dev, MT_MDP_BNRCFR1(band), mask, set);
-
 	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
 	mt76_set(dev, MT_WF_RMAC_MIB_AIRTIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
 

From 4bfa291251623486711693a69d9eaa539478d340 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 10 May 2021 23:14:51 +0800
Subject: [PATCH 2238/3292] mt76: connac: fw_own rely on all packet memory all
 being free

If the device is MMIO-based, we must ensure all TxD/TxP on the host
memory all being consumed by the device prior to safely switching to
fw_own state.

Fixes: ec7bd7b4a9c0 ("mt76: connac: check wake refcount in mcu_fw_pmctrl")
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 6c889b90fd12..337c5ece7ec3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -127,8 +127,12 @@ mt76_connac_pm_unref(struct mt76_connac_pm *pm)
 static inline bool
 mt76_connac_skip_fw_pmctrl(struct mt76_phy *phy, struct mt76_connac_pm *pm)
 {
+	struct mt76_dev *dev = phy->dev;
 	bool ret;
 
+	if (dev->token_count)
+		return true;
+
 	spin_lock_bh(&pm->wake.lock);
 	ret = pm->wake.count || test_and_set_bit(MT76_STATE_PM, &phy->state);
 	spin_unlock_bh(&pm->wake.lock);

From 7bf0a71e839822bb6ba04a6e163ad334314e2659 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 20 May 2021 11:46:37 +0800
Subject: [PATCH 2239/3292] mt76: mt7921: fix reset under the deep sleep is
 enabled

To fix possibly the race to access register between the WiFi reset
and the other context that is caused by explicitly cancelling ps_work
and wake_work to break PM_STATE consistency.

Deep sleep would cause the hardware into the inactive state,
so we forcely put device drv_own state before we start to reset.

The patch also ignore the reset request when the procedure is in
progress to avoid the consecutive WiFi resets.

localhost ~ # [ 2932.073966] SError Interrupt on CPU7, code 0xbe000011
[ 2932.073967] CPU: 7 PID: 8761 Comm: kworker/u16:2 Not tainted 5.4.112 #30
[ 2932.073968] Hardware name: MediaTek Asurada rev1 board (DT)
[ 2932.073968] Workqueue: phy0 ieee80211_reconfig_filter [mac80211]
[ 2932.073969] pstate: 80400089 (Nzcv daIf +PAN -UAO)
[ 2932.073969] pc : el1_irq+0x78/0x180
[ 2932.073970] lr : mt76_mmio_rmw+0x30/0x5c [mt76]
[ 2932.073970] sp : ffffffc01142bad0
[ 2932.073970] x29: ffffffc01142bc00 x28: ffffff8f96fb1e00
[ 2932.073971] x27: ffffffd2cdc12138 x26: ffffffd2cdaeb018
[ 2932.073972] x25: 0000000000000000 x24: ffffff8fa8e14c08
[ 2932.073973] x23: 0000000080c00009 x22: ffffffd2a5603918
[ 2932.073974] x21: ffffffc01142bc10 x20: 0000007fffffffff
[ 2932.073975] x19: 0000000000000000 x18: 0000000000000400
[ 2932.073975] x17: 0000000000000400 x16: ffffffd2cd2b87dc
[ 2932.073976] x15: 0000000000000000 x14: 0000000000000000
[ 2932.073977] x13: 0000000000000001 x12: 0000000000000001
[ 2932.073978] x11: 0000000000000001 x10: 000000000010e000
[ 2932.073978] x9 : 0000000000000000 x8 : ffffffc013921404
[ 2932.073979] x7 : 000000b2b5593519 x6 : 0000000000300000
[ 2932.073980] x5 : 0000000000000000 x4 : ffffffc01142bbc8
[ 2932.073980] x3 : 00000000000001f0 x2 : 0000000000000000
[ 2932.073981] x1 : 0000000000021404 x0 : ffffff8fa8e12300
[ 2932.073982] Kernel panic - not syncing: Asynchronous SError Interrupt
[ 2932.073983] CPU: 7 PID: 8761 Comm: kworker/u16:2 Not tainted 5.4.112 #30
[ 2932.073983] Hardware name: MediaTek Asurada rev1 board (DT)
[ 2932.073984] Workqueue: phy0 ieee80211_reconfig_filter [mac80211]
[ 2932.073984] Call trace:
[ 2932.073985]  dump_backtrace+0x0/0x14c
[ 2932.073985]  show_stack+0x20/0x2c
[ 2932.073985]  dump_stack+0xa0/0xf8
[ 2932.073986]  panic+0x154/0x360
[ 2932.073986]  test_taint+0x0/0x44
[ 2932.073986]  arm64_serror_panic+0x78/0x84
[ 2932.073987]  do_serror+0x0/0x118
[ 2932.073987]  do_serror+0xa4/0x118
[ 2932.073987]  el1_error+0x84/0xf8
[ 2932.073988]  el1_irq+0x78/0x180
[ 2932.073988]  mt76_mmio_rr+0x30/0xf0 [mt76]
[ 2932.073988]  mt76_mmio_rmw+0x30/0x5c [mt76]
[ 2932.073989]  mt7921_rmw+0x4c/0x5c [mt7921e]
[ 2932.073989]  mt7921_configure_filter+0x138/0x160 [mt7921e]
[ 2932.073990]  ieee80211_configure_filter+0x2f0/0x3e0 [mac80211]
[ 2932.073990]  ieee80211_reconfig_filter+0x1c/0x28 [mac80211]
[ 2932.073990]  process_one_work+0x208/0x3c8
[ 2932.073991]  worker_thread+0x23c/0x3e8
[ 2932.073991]  kthread+0x140/0x17c
[ 2932.073992]  ret_from_fork+0x10/0x18
[ 2932.074071] SMP: stopping secondary CPUs
[ 2932.074071] Kernel Offset: 0x12bc800000 from 0xffffffc010000000
[ 2932.074072] PHYS_OFFSET: 0xfffffff180000000
[ 2932.074072] CPU features: 0x080026,2a80aa18
[ 2932.074072] Memory Limit: none

Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/mac.c   | 21 +++++++++--------
 .../net/wireless/mediatek/mt76/mt7921/mcu.c   | 23 ++++++++++++++-----
 .../wireless/mediatek/mt76/mt7921/mt7921.h    |  1 +
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 44b8918db95b..31741da14099 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1281,9 +1281,10 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 	mt76_worker_enable(&dev->mt76.tx_worker);
 
 	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
-	clear_bit(MT76_STATE_PM, &dev->mphy.state);
 
-	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
+	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA,
+		MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
+		MT_INT_MCU_CMD);
 	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 
 	err = mt7921_run_firmware(dev);
@@ -1301,22 +1302,23 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 /* system error recovery */
 void mt7921_mac_reset_work(struct work_struct *work)
 {
-	struct ieee80211_hw *hw;
-	struct mt7921_dev *dev;
+	struct mt7921_dev *dev = container_of(work, struct mt7921_dev,
+					      reset_work);
+	struct ieee80211_hw *hw = mt76_hw(dev);
+	struct mt76_connac_pm *pm = &dev->pm;
 	int i;
 
-	dev = container_of(work, struct mt7921_dev, reset_work);
-	hw = mt76_hw(dev);
-
 	dev_err(dev->mt76.dev, "chip reset\n");
 	ieee80211_stop_queues(hw);
 
 	cancel_delayed_work_sync(&dev->mphy.mac_work);
-	cancel_delayed_work_sync(&dev->pm.ps_work);
-	cancel_work_sync(&dev->pm.wake_work);
+	cancel_delayed_work_sync(&pm->ps_work);
+	cancel_work_sync(&pm->wake_work);
 
 	mutex_lock(&dev->mt76.mutex);
 	for (i = 0; i < 10; i++) {
+		__mt7921_mcu_drv_pmctrl(dev);
+
 		if (!mt7921_mac_reset(dev))
 			break;
 	}
@@ -1337,6 +1339,7 @@ void mt7921_mac_reset_work(struct work_struct *work)
 	ieee80211_iterate_active_interfaces(hw,
 					    IEEE80211_IFACE_ITER_RESUME_ALL,
 					    mt7921_vif_connect_iter, NULL);
+	mt76_connac_power_save_sched(&dev->mt76.phy, pm);
 }
 
 void mt7921_reset(struct mt76_dev *mdev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 67dc4b4cc094..ef3e454862ad 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1288,17 +1288,12 @@ int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
 	return mt76_connac_mcu_add_sta_cmd(&dev->mphy, &info);
 }
 
-int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
+int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
 	struct mt76_connac_pm *pm = &dev->pm;
 	int i, err = 0;
 
-	mutex_lock(&pm->mutex);
-
-	if (!test_bit(MT76_STATE_PM, &mphy->state))
-		goto out;
-
 	for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) {
 		mt76_wr(dev, MT_CONN_ON_LPCTL, PCIE_LPCR_HOST_CLR_OWN);
 		if (mt76_poll_msec(dev, MT_CONN_ON_LPCTL,
@@ -1318,6 +1313,22 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 	pm->stats.last_wake_event = jiffies;
 	pm->stats.doze_time += pm->stats.last_wake_event -
 			       pm->stats.last_doze_event;
+out:
+	return err;
+}
+
+int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
+{
+	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
+	int err = 0;
+
+	mutex_lock(&pm->mutex);
+
+	if (!test_bit(MT76_STATE_PM, &mphy->state))
+		goto out;
+
+	err = __mt7921_mcu_drv_pmctrl(dev);
 out:
 	mutex_unlock(&pm->mutex);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 59862ea4951c..03bcb210c357 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -368,6 +368,7 @@ int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 			     bool enable);
 int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 			  bool enable);
+int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev);
 int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev);
 int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev);
 void mt7921_pm_wake_work(struct work_struct *work);

From 193e5f22eeb2a9661bff8bc0d8519e6ded48c807 Mon Sep 17 00:00:00 2001
From: YN Chen <yn.chen@mediatek.com>
Date: Mon, 10 May 2021 23:14:54 +0800
Subject: [PATCH 2240/3292] mt76: connac: fix WoW with disconnetion and bitmap
 pattern

Update MCU command usage to fix WoW configuration with disconnection
and bitmap pattern and to avoid magic number.

Fixes: ffa1bf97425b ("mt76: mt7921: introduce PM support")
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: YN Chen <yn.chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 11 +++++++----
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h |  8 ++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 7b8f8e6f431d..54612a479215 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1979,7 +1979,7 @@ mt76_connac_mcu_set_wow_pattern(struct mt76_dev *dev,
 	ptlv->index = index;
 
 	memcpy(ptlv->pattern, pattern->pattern, pattern->pattern_len);
-	memcpy(ptlv->mask, pattern->mask, pattern->pattern_len / 8);
+	memcpy(ptlv->mask, pattern->mask, DIV_ROUND_UP(pattern->pattern_len, 8));
 
 	return mt76_mcu_skb_send_msg(dev, skb, MCU_UNI_CMD_SUSPEND, true);
 }
@@ -2014,14 +2014,17 @@ mt76_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif,
 	};
 
 	if (wowlan->magic_pkt)
-		req.wow_ctrl_tlv.trigger |= BIT(0);
+		req.wow_ctrl_tlv.trigger |= UNI_WOW_DETECT_TYPE_MAGIC;
 	if (wowlan->disconnect)
-		req.wow_ctrl_tlv.trigger |= BIT(2);
+		req.wow_ctrl_tlv.trigger |= (UNI_WOW_DETECT_TYPE_DISCONNECT |
+					     UNI_WOW_DETECT_TYPE_BCN_LOST);
 	if (wowlan->nd_config) {
 		mt76_connac_mcu_sched_scan_req(phy, vif, wowlan->nd_config);
-		req.wow_ctrl_tlv.trigger |= BIT(5);
+		req.wow_ctrl_tlv.trigger |= UNI_WOW_DETECT_TYPE_SCH_SCAN_HIT;
 		mt76_connac_mcu_sched_scan_enable(phy, vif, suspend);
 	}
+	if (wowlan->n_patterns)
+		req.wow_ctrl_tlv.trigger |= UNI_WOW_DETECT_TYPE_BITMAP;
 
 	if (mt76_is_mmio(dev))
 		req.wow_ctrl_tlv.wakeup_hif = WOW_PCIE;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 01fc9f2c2f4a..676b1c6bc959 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -590,6 +590,14 @@ enum {
 	UNI_OFFLOAD_OFFLOAD_BMC_RPY_DETECT,
 };
 
+#define UNI_WOW_DETECT_TYPE_MAGIC		BIT(0)
+#define UNI_WOW_DETECT_TYPE_ANY			BIT(1)
+#define UNI_WOW_DETECT_TYPE_DISCONNECT		BIT(2)
+#define UNI_WOW_DETECT_TYPE_GTK_REKEY_FAIL	BIT(3)
+#define UNI_WOW_DETECT_TYPE_BCN_LOST		BIT(4)
+#define UNI_WOW_DETECT_TYPE_SCH_SCAN_HIT	BIT(5)
+#define UNI_WOW_DETECT_TYPE_BITMAP		BIT(6)
+
 enum {
 	UNI_SUSPEND_MODE_SETTING,
 	UNI_SUSPEND_WOW_CTRL,

From edb5aebc1c3db312e74e1dcf75b8626ee5300596 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 10 May 2021 23:14:55 +0800
Subject: [PATCH 2241/3292] mt76: mt7921: consider the invalid value for
 to_rssi

It is possible the RCPI from the certain antenna is an invalid value,
especially packets are receiving while the system is frequently entering
deep sleep mode, so consider calculating RSSI with the reasonable upper
bound to avoid report the wrong value to the mac80211 layer.

Fixes: 163f4d22c118 ("mt76: mt7921: add MAC support")
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 31741da14099..96a6c0aabe06 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -461,16 +461,19 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 		status->chain_signal[1] = to_rssi(MT_PRXV_RCPI1, v1);
 		status->chain_signal[2] = to_rssi(MT_PRXV_RCPI2, v1);
 		status->chain_signal[3] = to_rssi(MT_PRXV_RCPI3, v1);
-		status->signal = status->chain_signal[0];
-
-		for (i = 1; i < hweight8(mphy->antenna_mask); i++) {
-			if (!(status->chains & BIT(i)))
+		status->signal = -128;
+		for (i = 0; i < hweight8(mphy->antenna_mask); i++) {
+			if (!(status->chains & BIT(i)) ||
+			    status->chain_signal[i] >= 0)
 				continue;
 
 			status->signal = max(status->signal,
 					     status->chain_signal[i]);
 		}
 
+		if (status->signal == -128)
+			status->flag |= RX_FLAG_NO_SIGNAL_VAL;
+
 		stbc = FIELD_GET(MT_PRXV_STBC, v0);
 		gi = FIELD_GET(MT_PRXV_SGI, v0);
 		cck = false;

From 10de032a31683585292cd10b598d896d7bcf276f Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 10 May 2021 23:14:57 +0800
Subject: [PATCH 2242/3292] mt76: mt7921: add back connection monitor support

Hw beacon cmd to the mt7921 firmware doesn't only filter out the beacon,
but also performs its own connection monitoring, including periodic
keep-alives to the AP and probing the AP on beacon loss. Will indicate
the host with the event when the firmware detects the connection is lost.

Fixes: 1d8efc741df8 ("mt76: mt7921: introduce Runtime PM support")
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: YN Chen <yn.chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/init.c  |  4 +++
 .../net/wireless/mediatek/mt76/mt7921/mcu.c   | 32 +++++++++++++------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 9a28da5abb11..a3517ed76813 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -45,6 +45,7 @@ static void
 mt7921_init_wiphy(struct ieee80211_hw *hw)
 {
 	struct mt7921_phy *phy = mt7921_hw_phy(hw);
+	struct mt7921_dev *dev = phy->dev;
 	struct wiphy *wiphy = hw->wiphy;
 
 	hw->queues = 4;
@@ -84,6 +85,9 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, SUPPORTS_PS);
 	ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
 
+	if (dev->pm.enable)
+		ieee80211_hw_set(hw, CONNECTION_MONITOR);
+
 	hw->max_tx_fragments = 4;
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index ef3e454862ad..06209d58ce27 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -450,22 +450,33 @@ mt7921_mcu_scan_event(struct mt7921_dev *dev, struct sk_buff *skb)
 }
 
 static void
-mt7921_mcu_beacon_loss_event(struct mt7921_dev *dev, struct sk_buff *skb)
+mt7921_mcu_connection_loss_iter(void *priv, u8 *mac,
+				struct ieee80211_vif *vif)
+{
+	struct mt76_vif *mvif = (struct mt76_vif *)vif->drv_priv;
+	struct mt76_connac_beacon_loss_event *event = priv;
+
+	if (mvif->idx != event->bss_idx)
+		return;
+
+	if (!(vif->driver_flags & IEEE80211_VIF_BEACON_FILTER))
+		return;
+
+	ieee80211_connection_loss(vif);
+}
+
+static void
+mt7921_mcu_connection_loss_event(struct mt7921_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_connac_beacon_loss_event *event;
-	struct mt76_phy *mphy;
-	u8 band_idx = 0; /* DBDC support */
+	struct mt76_phy *mphy = &dev->mt76.phy;
 
 	skb_pull(skb, sizeof(struct mt7921_mcu_rxd));
 	event = (struct mt76_connac_beacon_loss_event *)skb->data;
-	if (band_idx && dev->mt76.phy2)
-		mphy = dev->mt76.phy2;
-	else
-		mphy = &dev->mt76.phy;
 
 	ieee80211_iterate_active_interfaces_atomic(mphy->hw,
 					IEEE80211_IFACE_ITER_RESUME_ALL,
-					mt76_connac_mcu_beacon_loss_iter, event);
+					mt7921_mcu_connection_loss_iter, event);
 }
 
 static void
@@ -530,7 +541,7 @@ mt7921_mcu_rx_unsolicited_event(struct mt7921_dev *dev, struct sk_buff *skb)
 
 	switch (rxd->eid) {
 	case MCU_EVENT_BSS_BEACON_LOSS:
-		mt7921_mcu_beacon_loss_event(dev, skb);
+		mt7921_mcu_connection_loss_event(dev, skb);
 		break;
 	case MCU_EVENT_SCHED_SCAN_DONE:
 	case MCU_EVENT_SCAN_DONE:
@@ -1379,6 +1390,7 @@ mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 {
 	struct mt7921_phy *phy = priv;
 	struct mt7921_dev *dev = phy->dev;
+	struct ieee80211_hw *hw = mt76_hw(dev);
 	int ret;
 
 	if (dev->pm.enable)
@@ -1391,9 +1403,11 @@ mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 
 	if (dev->pm.enable) {
 		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER;
+		ieee80211_hw_set(hw, CONNECTION_MONITOR);
 		mt76_set(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
 	} else {
 		vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER;
+		__clear_bit(IEEE80211_HW_CONNECTION_MONITOR, hw->flags);
 		mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
 	}
 }

From 33fe9c639c13de03ce04a1dc2c904d66d1cd02eb Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 24 Apr 2021 06:02:04 +0800
Subject: [PATCH 2243/3292] mt76: mt7915: add thermal sensor device support

This provides userspace with a unified interface, hwmon sysfs, to monitor
temperature in the hardware and can be adapted to system monitoring tools.

For reading temperature, cat /sys/class/ieee80211/phy*/hwmon*/temp1_input

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7915/debugfs.c   | 14 -----
 .../net/wireless/mediatek/mt76/mt7915/init.c  | 51 +++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   |  7 +--
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  2 +-
 4 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 6a8ddeeecbe9..f1e8b076d54c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -224,18 +224,6 @@ mt7915_tx_stats_show(struct seq_file *file, void *data)
 
 DEFINE_SHOW_ATTRIBUTE(mt7915_tx_stats);
 
-static int mt7915_read_temperature(struct seq_file *s, void *data)
-{
-	struct mt7915_dev *dev = dev_get_drvdata(s->private);
-	int temp;
-
-	/* cpu */
-	temp = mt7915_mcu_get_temperature(dev, 0);
-	seq_printf(s, "Temperature: %d\n", temp);
-
-	return 0;
-}
-
 static int
 mt7915_queues_acq(struct seq_file *s, void *data)
 {
@@ -390,8 +378,6 @@ int mt7915_init_debugfs(struct mt7915_dev *dev)
 	debugfs_create_file("radar_trigger", 0200, dir, dev,
 			    &fops_radar_trigger);
 	debugfs_create_file("ser_trigger", 0200, dir, dev, &fops_ser_trigger);
-	debugfs_create_devm_seqfile(dev->mt76.dev, "temperature", dir,
-				    mt7915_read_temperature);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "txpower_sku", dir,
 				    mt7915_read_rate_txpower);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index c103175d9954..ff7c38b87ed1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -2,6 +2,8 @@
 /* Copyright (C) 2020 MediaTek Inc. */
 
 #include <linux/etherdevice.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
 #include "mt7915.h"
 #include "mac.h"
 #include "mcu.h"
@@ -39,6 +41,47 @@ static const struct ieee80211_iface_combination if_comb[] = {
 	}
 };
 
+static ssize_t mt7915_thermal_show_temp(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct mt7915_phy *phy = dev_get_drvdata(dev);
+	int temperature;
+
+	temperature = mt7915_mcu_get_temperature(phy);
+	if (temperature < 0)
+		return temperature;
+
+	/* display in millidegree celcius */
+	return sprintf(buf, "%u\n", temperature * 1000);
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, 0444, mt7915_thermal_show_temp,
+			  NULL, 0);
+
+static struct attribute *mt7915_hwmon_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mt7915_hwmon);
+
+static int mt7915_thermal_init(struct mt7915_phy *phy)
+{
+	struct wiphy *wiphy = phy->mt76->hw->wiphy;
+	struct device *hwmon;
+
+	if (!IS_REACHABLE(CONFIG_HWMON))
+		return 0;
+
+	hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev,
+						       wiphy_name(wiphy), phy,
+						       mt7915_hwmon_groups);
+	if (IS_ERR(hwmon))
+		return PTR_ERR(hwmon);
+
+	return 0;
+}
+
 static void
 mt7915_init_txpower(struct mt7915_dev *dev,
 		    struct ieee80211_supported_band *sband)
@@ -258,6 +301,10 @@ static int mt7915_register_ext_phy(struct mt7915_dev *dev)
 	if (ret)
 		goto error;
 
+	ret = mt7915_thermal_init(phy);
+	if (ret)
+		goto error;
+
 	return 0;
 
 error:
@@ -708,6 +755,10 @@ int mt7915_register_device(struct mt7915_dev *dev)
 	if (ret)
 		return ret;
 
+	ret = mt7915_thermal_init(&dev->phy);
+	if (ret)
+		return ret;
+
 	ieee80211_queue_work(mt76_hw(dev), &dev->init_work);
 
 	ret = mt7915_register_ext_phy(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index e5302ff4c9dd..f40a2090837f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3559,16 +3559,17 @@ int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy)
 	return 0;
 }
 
-int mt7915_mcu_get_temperature(struct mt7915_dev *dev, int index)
+int mt7915_mcu_get_temperature(struct mt7915_phy *phy)
 {
+	struct mt7915_dev *dev = phy->dev;
 	struct {
 		u8 ctrl_id;
 		u8 action;
-		u8 band;
+		u8 dbdc_idx;
 		u8 rsv[5];
 	} req = {
 		.ctrl_id = THERMAL_SENSOR_TEMP_QUERY,
-		.action = index,
+		.dbdc_idx = phy != &dev->phy,
 	};
 
 	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(THERMAL_CTRL), &req,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 47341876da09..3ff549577574 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -356,7 +356,7 @@ int mt7915_mcu_set_radar_th(struct mt7915_dev *dev, int index,
 			    const struct mt7915_dfs_pattern *pattern);
 int mt7915_mcu_apply_group_cal(struct mt7915_dev *dev);
 int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy);
-int mt7915_mcu_get_temperature(struct mt7915_dev *dev, int index);
+int mt7915_mcu_get_temperature(struct mt7915_phy *phy);
 int mt7915_mcu_get_tx_rate(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx);
 int mt7915_mcu_get_rx_rate(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta, struct rate_info *rate);

From 34b877d972bec8cbf397a57393317672cf92996f Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 24 Apr 2021 06:02:05 +0800
Subject: [PATCH 2244/3292] mt76: mt7915: add thermal cooling device support

Thermal cooling device support is added to control the temperature by
throttling the data transmission for the given duration. Throttling is
done by adjusting Tx period by given percentage of time. The thermal
device allows user to configure duty cycle.

Throttling can be disabled by setting the duty cycle to 0. The cooling
device can be found under /sys/class/thermal/cooling_deviceX/.
Corresponding soft link to this device can be found under phy folder

To set duty cycle as 80%,
echo 80 > /sys/class/ieee80211/phy*/cooling_device/cur_state

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/init.c  | 73 +++++++++++++++++
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 80 +++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt7915/mcu.h   | 35 ++++++++
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  6 ++
 4 files changed, 194 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index ff7c38b87ed1..39f3639aa096 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -4,6 +4,7 @@
 #include <linux/etherdevice.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
+#include <linux/thermal.h>
 #include "mt7915.h"
 #include "mac.h"
 #include "mcu.h"
@@ -65,11 +66,81 @@ static struct attribute *mt7915_hwmon_attrs[] = {
 };
 ATTRIBUTE_GROUPS(mt7915_hwmon);
 
+static int
+mt7915_thermal_get_max_throttle_state(struct thermal_cooling_device *cdev,
+				      unsigned long *state)
+{
+	*state = MT7915_THERMAL_THROTTLE_MAX;
+
+	return 0;
+}
+
+static int
+mt7915_thermal_get_cur_throttle_state(struct thermal_cooling_device *cdev,
+				      unsigned long *state)
+{
+	struct mt7915_phy *phy = cdev->devdata;
+
+	*state = phy->throttle_state;
+
+	return 0;
+}
+
+static int
+mt7915_thermal_set_cur_throttle_state(struct thermal_cooling_device *cdev,
+				      unsigned long state)
+{
+	struct mt7915_phy *phy = cdev->devdata;
+	int ret;
+
+	if (state > MT7915_THERMAL_THROTTLE_MAX)
+		return -EINVAL;
+
+	if (state == phy->throttle_state)
+		return 0;
+
+	ret = mt7915_mcu_set_thermal_throttling(phy, state);
+	if (ret)
+		return ret;
+
+	phy->throttle_state = state;
+
+	return 0;
+}
+
+static const struct thermal_cooling_device_ops mt7915_thermal_ops = {
+	.get_max_state = mt7915_thermal_get_max_throttle_state,
+	.get_cur_state = mt7915_thermal_get_cur_throttle_state,
+	.set_cur_state = mt7915_thermal_set_cur_throttle_state,
+};
+
+static void mt7915_unregister_thermal(struct mt7915_phy *phy)
+{
+	struct wiphy *wiphy = phy->mt76->hw->wiphy;
+
+	if (!phy->cdev)
+	    return;
+
+	sysfs_remove_link(&wiphy->dev.kobj, "cooling_device");
+	thermal_cooling_device_unregister(phy->cdev);
+}
+
 static int mt7915_thermal_init(struct mt7915_phy *phy)
 {
 	struct wiphy *wiphy = phy->mt76->hw->wiphy;
+	struct thermal_cooling_device *cdev;
 	struct device *hwmon;
 
+	cdev = thermal_cooling_device_register(wiphy_name(wiphy), phy,
+					       &mt7915_thermal_ops);
+	if (!IS_ERR(cdev)) {
+		if (sysfs_create_link(&wiphy->dev.kobj, &cdev->device.kobj,
+				      "cooling_device") < 0)
+			thermal_cooling_device_unregister(cdev);
+		else
+			phy->cdev = cdev;
+	}
+
 	if (!IS_REACHABLE(CONFIG_HWMON))
 		return 0;
 
@@ -709,6 +780,7 @@ static void mt7915_unregister_ext_phy(struct mt7915_dev *dev)
 	if (!phy)
 		return;
 
+	mt7915_unregister_thermal(phy);
 	mt76_unregister_phy(mphy);
 	ieee80211_free_hw(mphy->hw);
 }
@@ -771,6 +843,7 @@ int mt7915_register_device(struct mt7915_dev *dev)
 void mt7915_unregister_device(struct mt7915_dev *dev)
 {
 	mt7915_unregister_ext_phy(dev);
+	mt7915_unregister_thermal(&dev->phy);
 	mt76_unregister_device(&dev->mt76);
 	mt7915_mcu_exit(dev);
 	mt7915_tx_token_put(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index f40a2090837f..05ba45d0b3e1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -455,6 +455,24 @@ mt7915_mcu_rx_csa_notify(struct mt7915_dev *dev, struct sk_buff *skb)
 			mt7915_mcu_csa_finish, mphy->hw);
 }
 
+static void
+mt7915_mcu_rx_thermal_notify(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt7915_mcu_thermal_notify *t;
+	struct mt7915_phy *phy;
+
+	t = (struct mt7915_mcu_thermal_notify *)skb->data;
+	if (t->ctrl.ctrl_id != THERMAL_PROTECT_ENABLE)
+		return;
+
+	if (t->ctrl.band_idx && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+
+	phy = (struct mt7915_phy *)mphy->priv;
+	phy->throttle_state = t->ctrl.duty.duty_cycle;
+}
+
 static void
 mt7915_mcu_rx_radar_detected(struct mt7915_dev *dev, struct sk_buff *skb)
 {
@@ -645,6 +663,9 @@ mt7915_mcu_rx_ext_event(struct mt7915_dev *dev, struct sk_buff *skb)
 	struct mt7915_mcu_rxd *rxd = (struct mt7915_mcu_rxd *)skb->data;
 
 	switch (rxd->ext_eid) {
+	case MCU_EXT_EVENT_THERMAL_PROTECT:
+		mt7915_mcu_rx_thermal_notify(dev, skb);
+		break;
 	case MCU_EXT_EVENT_RDD_REPORT:
 		mt7915_mcu_rx_radar_detected(dev, skb);
 		break;
@@ -3576,6 +3597,65 @@ int mt7915_mcu_get_temperature(struct mt7915_phy *phy)
 				 sizeof(req), true);
 }
 
+int mt7915_mcu_set_thermal_throttling(struct mt7915_phy *phy, u8 state)
+{
+	struct mt7915_dev *dev = phy->dev;
+	struct {
+		struct mt7915_mcu_thermal_ctrl ctrl;
+
+		__le32 trigger_temp;
+		__le32 restore_temp;
+		__le16 sustain_time;
+		u8 rsv[2];
+	} __packed req = {
+		.ctrl = {
+			.band_idx = phy != &dev->phy,
+		},
+	};
+	int level;
+
+#define TRIGGER_TEMPERATURE	122
+#define RESTORE_TEMPERATURE	116
+#define SUSTAIN_PERIOD		10
+
+	if (!state) {
+		req.ctrl.ctrl_id = THERMAL_PROTECT_DISABLE;
+		goto out;
+	}
+
+	/* set duty cycle and level */
+	for (level = 0; level < 4; level++) {
+		int ret;
+
+		req.ctrl.ctrl_id = THERMAL_PROTECT_DUTY_CONFIG;
+		req.ctrl.duty.duty_level = level;
+		req.ctrl.duty.duty_cycle = state;
+		state = state * 4 / 5;
+
+		ret = mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(THERMAL_PROT),
+					&req, sizeof(req.ctrl), false);
+		if (ret)
+			return ret;
+	}
+
+	/* currently use fixed values for throttling, and would be better
+	 * to implement thermal zone for dynamic trip in the long run.
+	 */
+
+	/* set high-temperature trigger threshold */
+	req.ctrl.ctrl_id = THERMAL_PROTECT_ENABLE;
+	req.trigger_temp = cpu_to_le32(TRIGGER_TEMPERATURE);
+	req.restore_temp = cpu_to_le32(RESTORE_TEMPERATURE);
+	req.sustain_time = cpu_to_le16(SUSTAIN_PERIOD);
+
+out:
+	req.ctrl.type.protect_type = 1;
+	req.ctrl.type.trigger_type = 1;
+
+	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(THERMAL_PROT),
+				 &req, sizeof(req), false);
+}
+
 int mt7915_mcu_get_tx_rate(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx)
 {
 	struct {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index f95920d58a40..7e3432384633 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -68,6 +68,29 @@ struct mt7915_mcu_rxd {
 	u8 s2d_index;
 };
 
+struct mt7915_mcu_thermal_ctrl {
+	u8 ctrl_id;
+	u8 band_idx;
+	union {
+		struct {
+			u8 protect_type; /* 1: duty admit, 2: radio off */
+			u8 trigger_type; /* 0: low, 1: high */
+		} __packed type;
+		struct {
+			u8 duty_level;	/* level 0~3 */
+			u8 duty_cycle;
+		} __packed duty;
+	};
+} __packed;
+
+struct mt7915_mcu_thermal_notify {
+	struct mt7915_mcu_rxd rxd;
+
+	struct mt7915_mcu_thermal_ctrl ctrl;
+	__le32 temperature;
+	u8 rsv[8];
+} __packed;
+
 struct mt7915_mcu_csa_notify {
 	struct mt7915_mcu_rxd rxd;
 
@@ -262,6 +285,7 @@ enum {
 	MCU_EXT_CMD_FW_LOG_2_HOST = 0x13,
 	MCU_EXT_CMD_TXBF_ACTION = 0x1e,
 	MCU_EXT_CMD_EFUSE_BUFFER_MODE = 0x21,
+	MCU_EXT_CMD_THERMAL_PROT = 0x23,
 	MCU_EXT_CMD_STA_REC_UPDATE = 0x25,
 	MCU_EXT_CMD_BSS_INFO_UPDATE = 0x26,
 	MCU_EXT_CMD_EDCA_UPDATE = 0x27,
@@ -1066,6 +1090,17 @@ enum {
 	THERMAL_SENSOR_TASK_CTRL,
 };
 
+enum {
+	THERMAL_PROTECT_PARAMETER_CTRL,
+	THERMAL_PROTECT_BASIC_INFO,
+	THERMAL_PROTECT_ENABLE,
+	THERMAL_PROTECT_DISABLE,
+	THERMAL_PROTECT_DUTY_CONFIG,
+	THERMAL_PROTECT_MECH_INFO,
+	THERMAL_PROTECT_DUTY_INFO,
+	THERMAL_PROTECT_STATE_ACT,
+};
+
 enum {
 	MT_EBF = BIT(0),	/* explicit beamforming */
 	MT_IBF = BIT(1)		/* implicit beamforming */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 3ff549577574..f3ffa907bf87 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -38,6 +38,8 @@
 #define MT7915_5G_RATE_DEFAULT		0x4b	/* OFDM 6M */
 #define MT7915_2G_RATE_DEFAULT		0x0	/* CCK 1M */
 
+#define MT7915_THERMAL_THROTTLE_MAX	100
+
 struct mt7915_vif;
 struct mt7915_sta;
 struct mt7915_dfs_pulse;
@@ -127,6 +129,9 @@ struct mt7915_phy {
 
 	struct ieee80211_vif *monitor_vif;
 
+	struct thermal_cooling_device *cdev;
+	u8 throttle_state;
+
 	u32 rxfilter;
 	u64 omac_mask;
 
@@ -357,6 +362,7 @@ int mt7915_mcu_set_radar_th(struct mt7915_dev *dev, int index,
 int mt7915_mcu_apply_group_cal(struct mt7915_dev *dev);
 int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy);
 int mt7915_mcu_get_temperature(struct mt7915_phy *phy);
+int mt7915_mcu_set_thermal_throttling(struct mt7915_phy *phy, u8 state);
 int mt7915_mcu_get_tx_rate(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx);
 int mt7915_mcu_get_rx_rate(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta, struct rate_info *rate);

From 109e505ad944dc207aaa9ee134b0994be09d291d Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 24 Apr 2021 06:02:06 +0800
Subject: [PATCH 2245/3292] mt76: mt7615: add thermal sensor device support

Similar to mt7915, switching to use standard hwmon sysfs.
For reading temperature, cat /sys/class/ieee80211/phy*/hwmon*/temp1_input

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7615/debugfs.c   | 20 --------
 .../net/wireless/mediatek/mt76/mt7615/init.c  | 50 +++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt7615/mcu.c   |  6 +--
 .../wireless/mediatek/mt76/mt7615/mt7615.h    |  3 +-
 .../wireless/mediatek/mt76/mt7615/pci_init.c  |  4 ++
 5 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 676bb22726d6..8cb4426e757c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -319,24 +319,6 @@ mt7615_radio_read(struct seq_file *s, void *data)
 	return 0;
 }
 
-static int mt7615_read_temperature(struct seq_file *s, void *data)
-{
-	struct mt7615_dev *dev = dev_get_drvdata(s->private);
-	int temp;
-
-	if (!mt7615_wait_for_mcu_init(dev))
-		return 0;
-
-	/* cpu */
-	mt7615_mutex_acquire(dev);
-	temp = mt7615_mcu_get_temperature(dev, 0);
-	mt7615_mutex_release(dev);
-
-	seq_printf(s, "Temperature: %d\n", temp);
-
-	return 0;
-}
-
 static int
 mt7615_queues_acq(struct seq_file *s, void *data)
 {
@@ -566,8 +548,6 @@ int mt7615_init_debugfs(struct mt7615_dev *dev)
 
 	debugfs_create_file("reset_test", 0200, dir, dev,
 			    &fops_reset_test);
-	debugfs_create_devm_seqfile(dev->mt76.dev, "temperature", dir,
-				    mt7615_read_temperature);
 	debugfs_create_file("ext_mac_addr", 0600, dir, dev, &fops_ext_mac_addr);
 
 	debugfs_create_u32("rf_wfidx", 0600, dir, &dev->debugfs_rf_wf);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index ecc3ca9eb658..4aa7877a6383 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -8,11 +8,61 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
 #include "mt7615.h"
 #include "mac.h"
 #include "mcu.h"
 #include "eeprom.h"
 
+static ssize_t mt7615_thermal_show_temp(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct mt7615_dev *mdev = dev_get_drvdata(dev);
+	int temperature;
+
+	if (!mt7615_wait_for_mcu_init(mdev))
+		return 0;
+
+	mt7615_mutex_acquire(mdev);
+	temperature = mt7615_mcu_get_temperature(mdev);
+	mt7615_mutex_release(mdev);
+
+	if (temperature < 0)
+		return temperature;
+
+	/* display in millidegree celcius */
+	return sprintf(buf, "%u\n", temperature * 1000);
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, 0444, mt7615_thermal_show_temp,
+			  NULL, 0);
+
+static struct attribute *mt7615_hwmon_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mt7615_hwmon);
+
+int mt7615_thermal_init(struct mt7615_dev *dev)
+{
+	struct wiphy *wiphy = mt76_hw(dev)->wiphy;
+	struct device *hwmon;
+
+	if (!IS_REACHABLE(CONFIG_HWMON))
+		return 0;
+
+	hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev,
+						       wiphy_name(wiphy), dev,
+						       mt7615_hwmon_groups);
+	if (IS_ERR(hwmon))
+		return PTR_ERR(hwmon);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mt7615_thermal_init);
+
 static void
 mt7615_phy_init(struct mt7615_dev *dev)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 32090e01b4d2..a59bd7af81be 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2350,14 +2350,12 @@ int mt7615_mcu_set_chan_info(struct mt7615_phy *phy, int cmd)
 	return mt76_mcu_send_msg(&dev->mt76, cmd, &req, sizeof(req), true);
 }
 
-int mt7615_mcu_get_temperature(struct mt7615_dev *dev, int index)
+int mt7615_mcu_get_temperature(struct mt7615_dev *dev)
 {
 	struct {
 		u8 action;
 		u8 rsv[3];
-	} req = {
-		.action = index,
-	};
+	} req = {};
 
 	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_GET_TEMP, &req,
 				 sizeof(req), true);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 2ba86bd96a31..8f03dddba8cf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -360,6 +360,7 @@ static inline int mt7622_wmac_init(struct mt7615_dev *dev)
 }
 #endif
 
+int mt7615_thermal_init(struct mt7615_dev *dev);
 int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 		      int irq, const u32 *map);
 u32 mt7615_reg_map(struct mt7615_dev *dev, u32 addr);
@@ -498,7 +499,7 @@ u32 mt7615_rf_rr(struct mt7615_dev *dev, u32 wf, u32 reg);
 int mt7615_rf_wr(struct mt7615_dev *dev, u32 wf, u32 reg, u32 val);
 int mt7615_mcu_set_dbdc(struct mt7615_dev *dev);
 int mt7615_mcu_set_eeprom(struct mt7615_dev *dev);
-int mt7615_mcu_get_temperature(struct mt7615_dev *dev, int index);
+int mt7615_mcu_get_temperature(struct mt7615_dev *dev);
 int mt7615_mcu_set_tx_power(struct mt7615_phy *phy);
 void mt7615_mcu_exit(struct mt7615_dev *dev);
 void mt7615_mcu_fill_msg(struct mt7615_dev *dev, struct sk_buff *skb,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 273fda08bfa2..261cff78de40 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -152,6 +152,10 @@ int mt7615_register_device(struct mt7615_dev *dev)
 	if (ret)
 		return ret;
 
+	ret = mt7615_thermal_init(dev);
+	if (ret)
+		return ret;
+
 	ieee80211_queue_work(mt76_hw(dev), &dev->mcu_work);
 	mt7615_init_txpower(dev, &dev->mphy.sband_2g.sband);
 	mt7615_init_txpower(dev, &dev->mphy.sband_5g.sband);

From f011c3691a7efe66b1ef76bbf7821973b8490a15 Mon Sep 17 00:00:00 2001
From: Deren Wu <deren.wu@mediatek.com>
Date: Thu, 20 May 2021 11:46:36 +0800
Subject: [PATCH 2246/3292] mt76: connac: update BA win size in Rx direction

Update BA size used data transimission in the Rx direction to improve Rx
throughput.

Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Leon Yen <leon.yen@mediatek.com>
Signed-off-by: YN Chen <yn.chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 54612a479215..dd4a28b75e4e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -935,8 +935,10 @@ void mt76_connac_mcu_wtbl_ba_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 		ba->rst_ba_sb = 1;
 	}
 
-	if (is_mt7921(dev))
+	if (is_mt7921(dev)) {
+		ba->ba_winsize = enable ? cpu_to_le16(params->buf_size) : 0;
 		return;
+	}
 
 	if (enable && tx) {
 		u8 ba_range[] = { 4, 8, 12, 24, 36, 48, 54, 64 };

From f07ac384b4579f294bb1e0380ed501156219ed71 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 20 May 2021 11:46:38 +0800
Subject: [PATCH 2247/3292] mt76: mt7921: avoid unnecessary consecutive WiFi
 resets

Avoid unnecessary consecutive WiFi resets by dropping reset
request when reset work is working.

Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 5 ++++-
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 96a6c0aabe06..ffc73fabb5b9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1312,6 +1312,7 @@ void mt7921_mac_reset_work(struct work_struct *work)
 	int i;
 
 	dev_err(dev->mt76.dev, "chip reset\n");
+	dev->hw_full_reset = true;
 	ieee80211_stop_queues(hw);
 
 	cancel_delayed_work_sync(&dev->mphy.mac_work);
@@ -1338,6 +1339,7 @@ void mt7921_mac_reset_work(struct work_struct *work)
 		ieee80211_scan_completed(dev->mphy.hw, &info);
 	}
 
+	dev->hw_full_reset = false;
 	ieee80211_wake_queues(hw);
 	ieee80211_iterate_active_interfaces(hw,
 					    IEEE80211_IFACE_ITER_RESUME_ALL,
@@ -1349,7 +1351,8 @@ void mt7921_reset(struct mt76_dev *mdev)
 {
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 
-	queue_work(dev->mt76.wq, &dev->reset_work);
+	if (!dev->hw_full_reset)
+		queue_work(dev->mt76.wq, &dev->reset_work);
 }
 
 static void
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 03bcb210c357..710ad242fd53 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -156,6 +156,7 @@ struct mt7921_dev {
 	u16 chainmask;
 
 	struct work_struct reset_work;
+	bool hw_full_reset;
 
 	struct list_head sta_poll_list;
 	spinlock_t sta_poll_lock;

From f86625ae0e35924ed495cdf0ff2d3133cb6e3010 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 20 May 2021 11:46:39 +0800
Subject: [PATCH 2248/3292] mt76: mt7921: fix invalid register access in
 wake_work

Make sure mt7921_pm_wake_work wouldn't be scheduled after the driver is
in suspend mode to fix the following the kernel crash.

[ 3515.390012] mt7921e 0000:01:00.0: calling pci_pm_suspend+0x0/0x22c @ 2869, parent: 0000:00:00.0
[ 3515.390015] mt7921e 0000:01:00.0: mt7921_pci_suspend +
[ 3515.396395] anx7625 3-0058: anx7625_suspend+0x0/0x6c returned 0 after 0 usecs
[ 3515.405965] mt7921e 0000:01:00.0: mt7921_pci_suspend -
[ 3515.411336] usb 1-1.4: usb_dev_suspend+0x0/0x2c returned 0 after 1 usecs
[ 3515.411513] SError Interrupt on CPU7, code 0xbe000011 -- SError
[ 3515.411515] CPU: 7 PID: 2849 Comm: kworker/u16:27 Not tainted 5.4.114 #44
[ 3515.411516] Hardware name: MediaTek Asurada rev1 board (DT)
[ 3515.411517] Workqueue: mt76 mt7921_pm_wake_work [mt7921e]
[ 3515.411518] pstate: 80c00009 (Nzcv daif +PAN +UAO)
[ 3515.411519] pc : mt76_mmio_rr+0x30/0xf0 [mt76]
[ 3515.411520] lr : mt7921_rr+0x38/0x44 [mt7921e]
[ 3515.411520] sp : ffffffc015813c50
[ 3515.411521] x29: ffffffc015813c50 x28: 0000000000000402
[ 3515.411522] x27: ffffffe5a2012138 x26: ffffffe5a1eea018
[ 3515.411524] x25: 00000000328be505 x24: 00000000000a0002
[ 3515.411525] x23: 0000000000000006 x22: ffffffbd29b7a300
[ 3515.411527] x21: ffffffbd29b7a300 x20: 00000000000e0010
[ 3515.411528] x19: 00000000eac08f43 x18: 0000000000000000
[ 3515.411529] x17: 0000000000000000 x16: ffffffe5a16b2914
[ 3515.411531] x15: 0000000000000010 x14: 0000000000000010
[ 3515.411532] x13: 00000000003dd3a2 x12: 0000000000010000
[ 3515.411533] x11: ffffffe597abec14 x10: 0000000000000010
[ 3515.411535] x9 : ffffffe597abeba8 x8 : ffffffc013ce0010
[ 3515.411536] x7 : 000000b2b5593519 x6 : 0000000000300000
[ 3515.411537] x5 : 0000000000000000 x4 : 0000000000000032
[ 3515.411539] x3 : 0000000000000000 x2 : 0000000000000004
[ 3515.411540] x1 : 00000000000e0010 x0 : ffffffbd29b7a300
[ 3515.411542] Kernel panic - not syncing: Asynchronous SError Interrupt
[ 3515.411543] CPU: 7 PID: 2849 Comm: kworker/u16:27 Not tainted 5.4.114 #44
[ 3515.411544] Hardware name: MediaTek Asurada rev1 board (DT)
[ 3515.411544] Workqueue: mt76 mt7921_pm_wake_work [mt7921e]
[ 3515.411545] Call trace:
[ 3515.411546]  dump_backtrace+0x0/0x14c
[ 3515.411546]  show_stack+0x20/0x2c
[ 3515.411547]  dump_stack+0xa0/0xfc
[ 3515.411548]  panic+0x154/0x350
[ 3515.411548]  panic+0x0/0x350
[ 3515.411549]  arm64_serror_panic+0x78/0x84
[ 3515.411550]  do_serror+0x0/0x118
[ 3515.411550]  do_serror+0xa4/0x118
[ 3515.411551]  el1_error+0x84/0xf8
[ 3515.411552]  mt76_mmio_rr+0x30/0xf0 [mt76]
[ 3515.411552]  mt7921_rr+0x38/0x44 [mt7921e]
[ 3515.411553]  __mt76_poll_msec+0x5c/0x9c [mt76]
[ 3515.411554]  __mt7921_mcu_drv_pmctrl+0x50/0x94 [mt7921e]
[ 3515.411555]  mt7921_mcu_drv_pmctrl+0x38/0xb0 [mt7921e]
[ 3515.411555]  mt7921_pm_wake_work+0x34/0xd4 [mt7921e]
[ 3515.411556]  process_one_work+0x208/0x3c8
[ 3515.411557]  worker_thread+0x23c/0x3e8
[ 3515.411557]  kthread+0x144/0x178
[ 3515.411558]  ret_from_fork+0x10/0x18
[ 3515.418831] SMP: stopping secondary CPUs
[ 3515.418832] Kernel Offset: 0x2590c00000 from 0xffffffc010000000
[ 3515.418832] PHYS_OFFSET: 0xffffffc400000000
[ 3515.418833] CPU features: 0x080026,2a80aa18
[ 3515.418834] Memory Limit: none
[DL] 00000000 00000000 010701

Fixes: 1d8efc741df80 ("mt76: mt7921: introduce Runtime PM support")
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt76_connac.h  |  1 +
 .../wireless/mediatek/mt76/mt76_connac_mac.c  |  6 +++++
 .../net/wireless/mediatek/mt76/mt7921/pci.c   | 25 +++++++++++++------
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 337c5ece7ec3..63c1d1a68a70 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -45,6 +45,7 @@ enum {
 
 struct mt76_connac_pm {
 	bool enable;
+	bool suspended;
 
 	spinlock_t txq_lock;
 	struct {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index 6f180c92d413..5f2705fbd680 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -17,6 +17,9 @@ int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm)
 	if (!test_bit(MT76_STATE_PM, &phy->state))
 		return 0;
 
+	if (pm->suspended)
+		return 0;
+
 	queue_work(dev->wq, &pm->wake_work);
 	if (!wait_event_timeout(pm->wait,
 				!test_bit(MT76_STATE_PM, &phy->state),
@@ -40,6 +43,9 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy,
 	if (!pm->enable)
 		return;
 
+	if (pm->suspended)
+		return;
+
 	pm->last_activity = jiffies;
 
 	if (!test_bit(MT76_STATE_PM, &phy->state)) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index fa02d934f0bf..13263f50dc00 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -188,21 +188,26 @@ static int mt7921_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct mt76_dev *mdev = pci_get_drvdata(pdev);
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+	struct mt76_connac_pm *pm = &dev->pm;
 	bool hif_suspend;
 	int i, err;
 
-	err = mt76_connac_pm_wake(&dev->mphy, &dev->pm);
+	pm->suspended = true;
+	cancel_delayed_work_sync(&pm->ps_work);
+	cancel_work_sync(&pm->wake_work);
+
+	err = mt7921_mcu_drv_pmctrl(dev);
 	if (err < 0)
-		return err;
+		goto restore_suspend;
 
 	hif_suspend = !test_bit(MT76_STATE_SUSPEND, &dev->mphy.state);
 	if (hif_suspend) {
 		err = mt76_connac_mcu_set_hif_suspend(mdev, true);
 		if (err)
-			return err;
+			goto restore_suspend;
 	}
 
-	if (!dev->pm.enable)
+	if (!pm->enable)
 		mt76_connac_mcu_set_deep_sleep(&dev->mt76, true);
 
 	napi_disable(&mdev->tx_napi);
@@ -231,27 +236,30 @@ static int mt7921_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	err = mt7921_mcu_fw_pmctrl(dev);
 	if (err)
-		goto restore;
+		goto restore_napi;
 
 	pci_save_state(pdev);
 	err = pci_set_power_state(pdev, pci_choose_state(pdev, state));
 	if (err)
-		goto restore;
+		goto restore_napi;
 
 	return 0;
 
-restore:
+restore_napi:
 	mt76_for_each_q_rx(mdev, i) {
 		napi_enable(&mdev->napi[i]);
 	}
 	napi_enable(&mdev->tx_napi);
 
-	if (!dev->pm.enable)
+	if (!pm->enable)
 		mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);
 
 	if (hif_suspend)
 		mt76_connac_mcu_set_hif_suspend(mdev, false);
 
+restore_suspend:
+	pm->suspended = false;
+
 	return err;
 }
 
@@ -261,6 +269,7 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 	int i, err;
 
+	dev->pm.suspended = false;
 	err = pci_set_power_state(pdev, PCI_D0);
 	if (err)
 		return err;

From 213f87289ea01514acdbfeed9f65bcb5f12aef70 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 20 May 2021 11:46:40 +0800
Subject: [PATCH 2249/3292] mt76: mt7921: fix OMAC idx usage

OMAC idx have to be same with BSS idx according to firmware usage.

Fixes: e0f9fdda81bd ("mt76: mt7921: add ieee80211_ops")
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: YN Chen <yn.chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/main.c  | 55 +------------------
 1 file changed, 1 insertion(+), 54 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 8016f8377c8f..175030ec221d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -223,54 +223,6 @@ static void mt7921_stop(struct ieee80211_hw *hw)
 	mt7921_mutex_release(dev);
 }
 
-static inline int get_free_idx(u32 mask, u8 start, u8 end)
-{
-	return ffs(~mask & GENMASK(end, start));
-}
-
-static int get_omac_idx(enum nl80211_iftype type, u64 mask)
-{
-	int i;
-
-	switch (type) {
-	case NL80211_IFTYPE_STATION:
-		/* prefer hw bssid slot 1-3 */
-		i = get_free_idx(mask, HW_BSSID_1, HW_BSSID_3);
-		if (i)
-			return i - 1;
-
-		/* next, try to find a free repeater entry for the sta */
-		i = get_free_idx(mask >> REPEATER_BSSID_START, 0,
-				 REPEATER_BSSID_MAX - REPEATER_BSSID_START);
-		if (i)
-			return i + 32 - 1;
-
-		i = get_free_idx(mask, EXT_BSSID_1, EXT_BSSID_MAX);
-		if (i)
-			return i - 1;
-
-		if (~mask & BIT(HW_BSSID_0))
-			return HW_BSSID_0;
-
-		break;
-	case NL80211_IFTYPE_MONITOR:
-		/* ap uses hw bssid 0 and ext bssid */
-		if (~mask & BIT(HW_BSSID_0))
-			return HW_BSSID_0;
-
-		i = get_free_idx(mask, EXT_BSSID_1, EXT_BSSID_MAX);
-		if (i)
-			return i - 1;
-
-		break;
-	default:
-		WARN_ON(1);
-		break;
-	}
-
-	return -1;
-}
-
 static int mt7921_add_interface(struct ieee80211_hw *hw,
 				struct ieee80211_vif *vif)
 {
@@ -292,12 +244,7 @@ static int mt7921_add_interface(struct ieee80211_hw *hw,
 		goto out;
 	}
 
-	idx = get_omac_idx(vif->type, phy->omac_mask);
-	if (idx < 0) {
-		ret = -ENOSPC;
-		goto out;
-	}
-	mvif->mt76.omac_idx = idx;
+	mvif->mt76.omac_idx = mvif->mt76.idx;
 	mvif->phy = phy;
 	mvif->mt76.band_idx = 0;
 	mvif->mt76.wmm_idx = mvif->mt76.idx % MT7921_MAX_WMM_SETS;

From a2d3442e3db1028f57efdeff26cd6e72a82cb648 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 20 May 2021 11:46:41 +0800
Subject: [PATCH 2250/3292] mt76: mt7921: enable runtime pm by default

mt7921 is mainly used in CE/IoT market so enable runtime-pm by default

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c  | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index a3517ed76813..c1a5bb5e5e02 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -185,7 +185,6 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	mutex_init(&dev->pm.mutex);
 	init_waitqueue_head(&dev->pm.wait);
 	spin_lock_init(&dev->pm.txq_lock);
-	set_bit(MT76_STATE_PM, &dev->mphy.state);
 	INIT_LIST_HEAD(&dev->phy.stats_list);
 	INIT_DELAYED_WORK(&dev->mphy.mac_work, mt7921_mac_work);
 	INIT_DELAYED_WORK(&dev->phy.scan_work, mt7921_scan_work);
@@ -200,6 +199,7 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	dev->pm.idle_timeout = MT7921_PM_TIMEOUT;
 	dev->pm.stats.last_wake_event = jiffies;
 	dev->pm.stats.last_doze_event = jiffies;
+	dev->pm.enable = true;
 
 	ret = mt7921_init_hardware(dev);
 	if (ret)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 06209d58ce27..37d8cbd12ff9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -945,8 +945,6 @@ static int mt7921_load_firmware(struct mt7921_dev *dev)
 	dev->mt76.hw->wiphy->wowlan = &mt76_connac_wowlan_support;
 #endif /* CONFIG_PM */
 
-	clear_bit(MT76_STATE_PM, &dev->mphy.state);
-
 	dev_err(dev->mt76.dev, "Firmware init done\n");
 
 	return 0;

From b4b880b90cb3863ca98e4ad55107d159742a79ae Mon Sep 17 00:00:00 2001
From: YN Chen <yn.chen@mediatek.com>
Date: Thu, 20 May 2021 11:46:35 +0800
Subject: [PATCH 2251/3292] mt76: connac: add bss color support for sta mode

Add bss color support for sta mode

Signed-off-by: Jayden.Kuo <jayden.kuo@mediatek.com>
Signed-off-by: YN Chen <yn.chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   | 14 ++++++++++++++
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   |  9 +++++++++
 2 files changed, 23 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index dd4a28b75e4e..9066c57b1e32 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1313,6 +1313,7 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy,
 				u8 pad[3];
 			} __packed hdr;
 			struct bss_info_uni_he he;
+			struct bss_info_uni_bss_color bss_color;
 		} he_req = {
 			.hdr = {
 				.bss_idx = mvif->idx,
@@ -1321,8 +1322,21 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy,
 				.tag = cpu_to_le16(UNI_BSS_INFO_HE_BASIC),
 				.len = cpu_to_le16(sizeof(struct bss_info_uni_he)),
 			},
+			.bss_color = {
+				.tag = cpu_to_le16(UNI_BSS_INFO_BSS_COLOR),
+				.len = cpu_to_le16(sizeof(struct bss_info_uni_bss_color)),
+				.enable = 0,
+				.bss_color = 0,
+			},
 		};
 
+		if (enable) {
+			he_req.bss_color.enable =
+				vif->bss_conf.he_bss_color.enabled;
+			he_req.bss_color.bss_color =
+				vif->bss_conf.he_bss_color.color;
+		}
+
 		mt76_connac_mcu_uni_bss_he_tlv(phy, vif,
 					       (struct tlv *)&he_req.he);
 		err = mt76_mcu_send_msg(mdev, MCU_UNI_CMD_BSS_INFO_UPDATE,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 676b1c6bc959..13f7d6a57889 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -575,6 +575,7 @@ enum {
 enum {
 	UNI_BSS_INFO_BASIC = 0,
 	UNI_BSS_INFO_RLM = 2,
+	UNI_BSS_INFO_BSS_COLOR = 4,
 	UNI_BSS_INFO_HE_BASIC = 5,
 	UNI_BSS_INFO_BCN_CONTENT = 7,
 	UNI_BSS_INFO_QBSS = 15,
@@ -789,6 +790,14 @@ struct mt76_connac_sched_scan_done {
 	__le16 pad;
 } __packed;
 
+struct bss_info_uni_bss_color {
+	__le16 tag;
+	__le16 len;
+	u8 enable;
+	u8 bss_color;
+	u8 rsv[2];
+} __packed;
+
 struct bss_info_uni_he {
 	__le16 tag;
 	__le16 len;

From f5e3db30135f1157d2fac83f16e0347b1e38b9c5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 23 May 2021 23:03:26 +0200
Subject: [PATCH 2252/3292] mt76: mt7921: return proper error value in
 mt7921_mac_init

Return possible error values in mt7921_mac_init routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   | 8 +++-----
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 5 ++++-
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index c1a5bb5e5e02..99701cda0b9d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -108,7 +108,7 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band)
 	mt76_clear(dev, MT_DMA_DCR0(band), MT_DMA_DCR0_RXD_G5_EN);
 }
 
-void mt7921_mac_init(struct mt7921_dev *dev)
+int mt7921_mac_init(struct mt7921_dev *dev)
 {
 	int i;
 
@@ -124,7 +124,7 @@ void mt7921_mac_init(struct mt7921_dev *dev)
 	for (i = 0; i < 2; i++)
 		mt7921_mac_init_band(dev, i);
 
-	mt76_connac_mcu_set_rts_thresh(&dev->mt76, 0x92b, 0);
+	return mt76_connac_mcu_set_rts_thresh(&dev->mt76, 0x92b, 0);
 }
 
 static int mt7921_init_hardware(struct mt7921_dev *dev)
@@ -164,9 +164,7 @@ static int mt7921_init_hardware(struct mt7921_dev *dev)
 	dev->mt76.global_wcid.tx_info |= MT_WCID_TX_INFO_SET;
 	rcu_assign_pointer(dev->mt76.wcid[idx], &dev->mt76.global_wcid);
 
-	mt7921_mac_init(dev);
-
-	return 0;
+	return mt7921_mac_init(dev);
 }
 
 int mt7921_register_device(struct mt7921_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index ffc73fabb5b9..1e52e92c5ad8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1298,7 +1298,10 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 	if (err)
 		return err;
 
-	mt7921_mac_init(dev);
+	err = mt7921_mac_init(dev);
+	if (err)
+		return err;
+
 	return __mt7921_start(&dev->phy);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 710ad242fd53..1e65a8f6f211 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -319,7 +319,7 @@ static inline bool mt7921_dma_need_reinit(struct mt7921_dev *dev)
 	return !mt76_get_field(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
 }
 
-void mt7921_mac_init(struct mt7921_dev *dev);
+int mt7921_mac_init(struct mt7921_dev *dev);
 bool mt7921_mac_wtbl_update(struct mt7921_dev *dev, int idx, u32 mask);
 void mt7921_mac_reset_counters(struct mt7921_phy *phy);
 void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,

From d74c4b5667425c35d74906795a08e02e29df5b46 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 23 May 2021 23:08:05 +0200
Subject: [PATCH 2253/3292] mt76: mt7921: do not schedule hw reset if the
 device is not running

Do not schedule hw full reset if the device is not fully initialized
(e.g if the channel has not been configured yet). This patch fixes
the kernel crash reported below

[   44.440266] mt7921e 0000:01:00.0: chip reset failed
[   44.527575] Unable to handle kernel paging request at virtual address ffffffc02f3e0000
[   44.535771] Mem abort info:
[   44.538646]   ESR = 0x96000006
[   44.541792]   EC = 0x25: DABT (current EL), IL = 32 bits
[   44.547268]   SET = 0, FnV = 0
[   44.550413]   EA = 0, S1PTW = 0
[   44.553648] Data abort info:
[   44.556613]   ISV = 0, ISS = 0x00000006
[   44.560563]   CM = 0, WnR = 0
[   44.563619] swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000000955000
[   44.570530] [ffffffc02f3e0000] pgd=100000003ffff003, p4d=100000003ffff003, pud=100000003ffff003, pmd=0000000000000000
[   44.581489] Internal error: Oops: 96000006 [#1] SMP
[   44.606406] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W         5.13.0-rc1-espressobin-12875-g6dc7f82ebc26 #33
[   44.617264] Hardware name: Globalscale Marvell ESPRESSOBin Board (DT)
[   44.623905] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO BTYPE=--)
[   44.630100] pc : __queue_work+0x1f0/0x500
[   44.634249] lr : __queue_work+0x1e8/0x500
[   44.638384] sp : ffffffc010003d70
[   44.641798] x29: ffffffc010003d70 x28: 0000000000000000 x27: ffffff8003989200
[   44.649166] x26: ffffffc010c08510 x25: 0000000000000002 x24: ffffffc010ad90b0
[   44.656533] x23: ffffffc010c08508 x22: 0000000000000012 x21: 0000000000000000
[   44.663899] x20: ffffff8006385238 x19: ffffffc02f3e0000 x18: 00000000000003c9
[   44.671266] x17: 0000000000000000 x16: 0000000000000000 x15: 000009b1a8a3bf90
[   44.678632] x14: 0098968000000000 x13: 0000000000000000 x12: 0000000000000325
[   44.685998] x11: ffffff803fda1928 x10: 0000000000000001 x9 : ffffffc010003e98
[   44.693365] x8 : 0000000000000032 x7 : fff8000000000000 x6 : 0000000000000035
[   44.700732] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffffffc010adf700
[   44.708098] x2 : ffffff8006385238 x1 : 000000007fffffff x0 : 0000000000000000
[   44.715465] Call trace:
[   44.717982]  __queue_work+0x1f0/0x500
[   44.721760]  delayed_work_timer_fn+0x18/0x20
[   44.726167]  call_timer_fn+0x2c/0x178
[   44.729947]  run_timer_softirq+0x488/0x5c8
[   44.734172]  _stext+0x11c/0x378
[   44.737411]  irq_exit+0x100/0x108
[   44.740830]  __handle_domain_irq+0x60/0xb0
[   44.745059]  gic_handle_irq+0x70/0x2b4
[   44.748929]  el1_irq+0xb8/0x13c
[   44.752167]  arch_cpu_idle+0x14/0x30
[   44.755858]  default_idle_call+0x38/0x168
[   44.759994]  do_idle+0x1fc/0x210
[   44.763325]  cpu_startup_entry+0x20/0x58
[   44.767372]  rest_init+0xb8/0xc8
[   44.770703]  arch_call_rest_init+0xc/0x14
[   44.774841]  start_kernel+0x408/0x424
[   44.778623] Code: aa1403e0 97fff54f aa0003f5 b5fff500 (f9400275)
[   44.784907] ---[ end trace be73c3142d8c36a9 ]---
[   44.789668] Kernel panic - not syncing: Oops: Fatal exception in interrupt

Fixes: 0c1ce9884607 ("mt76: mt7921: add wifi reset support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 1e52e92c5ad8..4d1050ae9f39 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1354,6 +1354,9 @@ void mt7921_reset(struct mt76_dev *mdev)
 {
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 
+	if (!test_bit(MT76_STATE_RUNNING, &dev->mphy.state))
+		return;
+
 	if (!dev->hw_full_reset)
 		queue_work(dev->mt76.wq, &dev->reset_work);
 }

From 01f7da40917923bf9d8fd8d5c9a6ed646004e47c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 23 May 2021 23:10:12 +0200
Subject: [PATCH 2254/3292] mt76: mt7921: reset wfsys during hw probe

This patch fixes a mcu hang during device probe on
Marvell ESPRESSObin after a hot reboot.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index bd9143dc865f..7fca7dc466b8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -402,6 +402,10 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	if (ret)
 		return ret;
 
+	ret = mt7921_wfsys_reset(dev);
+	if (ret)
+		return ret;
+
 	/* init tx queue */
 	ret = mt7921_init_tx_queues(&dev->phy, MT7921_TXQ_BAND0,
 				    MT7921_TX_RING_SIZE);

From 160731341845171fd0e5ecd39fe0a43cc9d9af36 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 25 May 2021 16:45:14 +0800
Subject: [PATCH 2255/3292] mt76: mt7915: add .offset_tsf callback

It's much more accurate than .get_tsf + .set_tsf, and switch to use
mt76_rmw to operate tsf registers.

Tested-by: Xing Song <xing.song@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/main.c  | 33 +++++++++++++++++--
 .../net/wireless/mediatek/mt76/mt7915/regs.h  |  2 ++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 64f9ebe4424a..2485f65766e7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -816,7 +816,8 @@ mt7915_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 
 	n = mvif->omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : mvif->omac_idx;
 	/* TSF software read */
-	mt76_set(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_MODE);
+	mt76_rmw(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_MODE,
+		 MT_LPON_TCR_SW_READ);
 	tsf.t32[0] = mt76_rr(dev, MT_LPON_UTTR0(band));
 	tsf.t32[1] = mt76_rr(dev, MT_LPON_UTTR1(band));
 
@@ -845,7 +846,34 @@ mt7915_set_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	mt76_wr(dev, MT_LPON_UTTR0(band), tsf.t32[0]);
 	mt76_wr(dev, MT_LPON_UTTR1(band), tsf.t32[1]);
 	/* TSF software overwrite */
-	mt76_set(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_WRITE);
+	mt76_rmw(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_MODE,
+		 MT_LPON_TCR_SW_WRITE);
+
+	mutex_unlock(&dev->mt76.mutex);
+}
+
+static void
+mt7915_offset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  s64 timestamp)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	bool band = phy != &dev->phy;
+	union {
+		u64 t64;
+		u32 t32[2];
+	} tsf = { .t64 = timestamp, };
+	u16 n;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	n = mvif->omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : mvif->omac_idx;
+	mt76_wr(dev, MT_LPON_UTTR0(band), tsf.t32[0]);
+	mt76_wr(dev, MT_LPON_UTTR1(band), tsf.t32[1]);
+	/* TSF software adjust*/
+	mt76_rmw(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_MODE,
+		 MT_LPON_TCR_SW_ADJUST);
 
 	mutex_unlock(&dev->mt76.mutex);
 }
@@ -1036,6 +1064,7 @@ const struct ieee80211_ops mt7915_ops = {
 	.get_stats = mt7915_get_stats,
 	.get_tsf = mt7915_get_tsf,
 	.set_tsf = mt7915_set_tsf,
+	.offset_tsf = mt7915_offset_tsf,
 	.get_survey = mt76_get_survey,
 	.get_antenna = mt76_get_antenna,
 	.set_antenna = mt7915_set_antenna,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index efe0f2904c66..e36b30d84f07 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -124,6 +124,8 @@
 #define MT_LPON_TCR(_band, n)		MT_WF_LPON(_band, 0x0a8 + (n) * 4)
 #define MT_LPON_TCR_SW_MODE		GENMASK(1, 0)
 #define MT_LPON_TCR_SW_WRITE		BIT(0)
+#define MT_LPON_TCR_SW_ADJUST		BIT(1)
+#define MT_LPON_TCR_SW_READ		GENMASK(1, 0)
 
 /* MIB: band 0(0x24800), band 1(0xa4800) */
 #define MT_WF_MIB_BASE(_band)		((_band) ? 0xa4800 : 0x24800)

From accbcea4f301e7db084b0a393de8100bdae26ce6 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 25 May 2021 17:22:24 +0800
Subject: [PATCH 2256/3292] mt76: mt7615: add .offset_tsf callback

It's much more accurate than .get_tsf + .set_tsf and switch to use
mt76_rmw to operate tsf registers.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7615/mac.c   |  2 +-
 .../net/wireless/mediatek/mt76/mt7615/main.c  | 32 +++++++++++++++++--
 .../net/wireless/mediatek/mt76/mt7615/regs.h  |  2 ++
 .../wireless/mediatek/mt76/mt7615/usb_sdio.c  |  2 +-
 4 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 7153f1da92d0..4daa0540e051 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1062,7 +1062,7 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
 	addr = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
 
-	mt76_set(dev, addr, MT_LPON_TCR_MODE); /* TSF read */
+	mt76_rmw(dev, addr, MT_LPON_TCR_MODE, MT_LPON_TCR_READ); /* TSF read */
 	sta->rate_set_tsf = mt76_rr(dev, MT_LPON_UTTR0) & ~BIT(0);
 	sta->rate_set_tsf |= rd.rateset;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index faae60775b16..c2a122ca2e83 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -881,7 +881,8 @@ mt7615_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 
 	mt7615_mutex_acquire(dev);
 
-	mt76_set(dev, reg, MT_LPON_TCR_MODE); /* TSF read */
+	/* TSF read */
+	mt76_rmw(dev, reg, MT_LPON_TCR_MODE, MT_LPON_TCR_READ);
 	tsf.t32[0] = mt76_rr(dev, MT_LPON_UTTR0);
 	tsf.t32[1] = mt76_rr(dev, MT_LPON_UTTR1);
 
@@ -911,7 +912,33 @@ mt7615_set_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	mt76_wr(dev, MT_LPON_UTTR0, tsf.t32[0]);
 	mt76_wr(dev, MT_LPON_UTTR1, tsf.t32[1]);
 	/* TSF software overwrite */
-	mt76_set(dev, reg, MT_LPON_TCR_WRITE);
+	mt76_rmw(dev, reg, MT_LPON_TCR_MODE, MT_LPON_TCR_WRITE);
+
+	mt7615_mutex_release(dev);
+}
+
+static void
+mt7615_offset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  s64 timestamp)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	union {
+		u64 t64;
+		u32 t32[2];
+	} tsf = { .t64 = timestamp, };
+	u16 idx = mvif->mt76.omac_idx;
+	u32 reg;
+
+	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
+	reg = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
+
+	mt7615_mutex_acquire(dev);
+
+	mt76_wr(dev, MT_LPON_UTTR0, tsf.t32[0]);
+	mt76_wr(dev, MT_LPON_UTTR1, tsf.t32[1]);
+	/* TSF software adjust*/
+	mt76_rmw(dev, reg, MT_LPON_TCR_MODE, MT_LPON_TCR_ADJUST);
 
 	mt7615_mutex_release(dev);
 }
@@ -1278,6 +1305,7 @@ const struct ieee80211_ops mt7615_ops = {
 	.get_stats = mt7615_get_stats,
 	.get_tsf = mt7615_get_tsf,
 	.set_tsf = mt7615_set_tsf,
+	.offset_tsf = mt7615_offset_tsf,
 	.get_survey = mt76_get_survey,
 	.get_antenna = mt76_get_antenna,
 	.set_antenna = mt7615_set_antenna,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 63c081bb04d0..6712ad9faeaa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -463,7 +463,9 @@ enum mt7615_reg_base {
 #define MT_LPON_TCR0(_n)		MT_LPON(0x010 + ((_n) * 4))
 #define MT_LPON_TCR2(_n)		MT_LPON(0x0f8 + ((_n) - 2) * 4)
 #define MT_LPON_TCR_MODE		GENMASK(1, 0)
+#define MT_LPON_TCR_READ		GENMASK(1, 0)
 #define MT_LPON_TCR_WRITE		BIT(0)
+#define MT_LPON_TCR_ADJUST		BIT(1)
 
 #define MT_LPON_UTTR0			MT_LPON(0x018)
 #define MT_LPON_UTTR1			MT_LPON(0x01c)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
index 75a05f8dd7e7..996d48cca18a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
@@ -123,7 +123,7 @@ static int mt7663_usb_sdio_set_rates(struct mt7615_dev *dev,
 	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
 	addr = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
 
-	mt76_set(dev, addr, MT_LPON_TCR_MODE); /* TSF read */
+	mt76_rmw(dev, addr, MT_LPON_TCR_MODE, MT_LPON_TCR_READ); /* TSF read */
 	val = mt76_rr(dev, MT_LPON_UTTR0);
 	sta->rate_set_tsf = (val & ~BIT(0)) | rate->rateset;
 

From ae130bb8d4bd601f72ff7c93f049a498ecc58b87 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Tue, 25 May 2021 18:34:09 +0800
Subject: [PATCH 2257/3292] mt76: mt7915: use mt7915_mcu_get_txpower_sku() to
 get per-rate txpower

Get per-rate txpower with mcu command. This is the preparation of
co-driver for the next chipset, which has different tmac power registers
but can share this same command.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7915/debugfs.c   | 49 ++++---------------
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 34 ++++++++++++-
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  3 ++
 .../net/wireless/mediatek/mt76/mt7915/regs.h  |  5 --
 4 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index f1e8b076d54c..3961d46e0df8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -295,54 +295,23 @@ mt7915_puts_rate_txpower(struct seq_file *s, struct mt7915_phy *phy)
 		"RU26", "RU52", "RU106", "RU242/SU20",
 		"RU484/SU40", "RU996/SU80", "RU2x996/SU160"
 	};
-	struct mt7915_dev *dev = dev_get_drvdata(s->private);
-	bool ext_phy = phy != &dev->phy;
-	u32 reg_base;
-	int i, idx = 0;
+	s8 txpower[MT7915_SKU_RATE_NUM], *buf;
+	int i;
 
 	if (!phy)
 		return;
 
-	reg_base = MT_TMAC_FP0R0(ext_phy);
-	seq_printf(s, "\nBand %d\n", ext_phy);
+	seq_printf(s, "\nBand %d\n", phy != &phy->dev->phy);
 
-	for (i = 0; i < ARRAY_SIZE(mt7915_sku_group_len); i++) {
-		u8 cnt, mcs_num = mt7915_sku_group_len[i];
-		s8 txpower[12];
-		int j;
+	mt7915_mcu_get_txpower_sku(phy, txpower, sizeof(txpower));
+	for (i = 0, buf = txpower; i < ARRAY_SIZE(mt7915_sku_group_len); i++) {
+		u8 mcs_num = mt7915_sku_group_len[i];
 
-		if (i == SKU_HT_BW20 || i == SKU_HT_BW40) {
-			mcs_num = 8;
-		} else if (i >= SKU_VHT_BW20 && i <= SKU_VHT_BW160) {
+		if (i >= SKU_VHT_BW20 && i <= SKU_VHT_BW160)
 			mcs_num = 10;
-		} else if (i == SKU_HE_RU26) {
-			reg_base = MT_TMAC_FP0R18(ext_phy);
-			idx = 0;
-		}
 
-		for (j = 0, cnt = 0; j < DIV_ROUND_UP(mcs_num, 4); j++) {
-			u32 val;
-
-			if (i == SKU_VHT_BW160 && idx == 60) {
-				reg_base = MT_TMAC_FP0R15(ext_phy);
-				idx = 0;
-			}
-
-			val = mt76_rr(dev, reg_base + (idx / 4) * 4);
-
-			if (idx && idx % 4)
-				val >>= (idx % 4) * 8;
-
-			while (val > 0 && cnt < mcs_num) {
-				s8 pwr = FIELD_GET(MT_TMAC_FP_MASK, val);
-
-				txpower[cnt++] = pwr;
-				val >>= 8;
-				idx++;
-			}
-		}
-
-		mt76_seq_puts_array(s, sku_group_name[i], txpower, mcs_num);
+		mt76_seq_puts_array(s, sku_group_name[i], buf, mcs_num);
+		buf += mt7915_sku_group_len[i];
 	}
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 05ba45d0b3e1..3418ecb0a98f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3676,7 +3676,6 @@ int mt7915_mcu_get_tx_rate(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx)
 
 int mt7915_mcu_set_txpower_sku(struct mt7915_phy *phy)
 {
-#define MT7915_SKU_RATE_NUM		161
 	struct mt7915_dev *dev = phy->dev;
 	struct mt76_phy *mphy = phy->mt76;
 	struct ieee80211_hw *hw = mphy->hw;
@@ -3726,6 +3725,39 @@ int mt7915_mcu_set_txpower_sku(struct mt7915_phy *phy)
 				 sizeof(req), true);
 }
 
+int mt7915_mcu_get_txpower_sku(struct mt7915_phy *phy, s8 *txpower, int len)
+{
+#define RATE_POWER_INFO	2
+	struct mt7915_dev *dev = phy->dev;
+	struct {
+		u8 format_id;
+		u8 category;
+		u8 band;
+		u8 _rsv;
+	} __packed req = {
+		.format_id = 7,
+		.category = RATE_POWER_INFO,
+		.band = phy != &dev->phy,
+	};
+	s8 res[MT7915_SKU_RATE_NUM][2];
+	struct sk_buff *skb;
+	int ret, i;
+
+	ret = mt76_mcu_send_and_get_msg(&dev->mt76,
+					MCU_EXT_CMD(TX_POWER_FEATURE_CTRL),
+					&req, sizeof(req), true, &skb);
+	if (ret)
+		return ret;
+
+	memcpy(res, skb->data + 4, sizeof(res));
+	for (i = 0; i < len; i++)
+		txpower[i] = res[i][req.band];
+
+	dev_kfree_skb(skb);
+
+	return 0;
+}
+
 int mt7915_mcu_set_test_param(struct mt7915_dev *dev, u8 param, bool test_mode,
 			      u8 en)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index f3ffa907bf87..bd6c555814a8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -40,6 +40,8 @@
 
 #define MT7915_THERMAL_THROTTLE_MAX	100
 
+#define MT7915_SKU_RATE_NUM		161
+
 struct mt7915_vif;
 struct mt7915_sta;
 struct mt7915_dfs_pulse;
@@ -351,6 +353,7 @@ int mt7915_mcu_set_rts_thresh(struct mt7915_phy *phy, u32 val);
 int mt7915_mcu_set_pm(struct mt7915_dev *dev, int band, int enter);
 int mt7915_mcu_set_sku_en(struct mt7915_phy *phy, bool enable);
 int mt7915_mcu_set_txpower_sku(struct mt7915_phy *phy);
+int mt7915_mcu_get_txpower_sku(struct mt7915_phy *phy, s8 *txpower, int len);
 int mt7915_mcu_set_txbf_type(struct mt7915_dev *dev);
 int mt7915_mcu_set_txbf_module(struct mt7915_dev *dev);
 int mt7915_mcu_set_txbf_sounding(struct mt7915_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index e36b30d84f07..8b080cb8f10f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -82,11 +82,6 @@
 #define MT_TMAC_CTCR0_INS_DDLMT_EN		BIT(17)
 #define MT_TMAC_CTCR0_INS_DDLMT_VHT_SMPDU_EN	BIT(18)
 
-#define MT_TMAC_FP0R0(_band)		MT_WF_TMAC(_band, 0x020)
-#define MT_TMAC_FP0R15(_band)		MT_WF_TMAC(_band, 0x080)
-#define MT_TMAC_FP0R18(_band)		MT_WF_TMAC(_band, 0x270)
-#define MT_TMAC_FP_MASK			GENMASK(7, 0)
-
 #define MT_TMAC_TFCR0(_band)		MT_WF_TMAC(_band, 0x1e0)
 
 #define MT_WF_DMA_BASE(_band)		((_band) ? 0xa1e00 : 0x21e00)

From ffce39bfb6073ff6f74a1332e6563b2d18392a5b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 27 May 2021 13:01:24 +0200
Subject: [PATCH 2258/3292] mt76: mt7615: remove useless if condition in
 mt7615_add_interface()

Get rid of unnecessary if condition in mt7615_add_interface routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index c2a122ca2e83..7c9a55c57578 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -240,8 +240,6 @@ static int mt7615_add_interface(struct ieee80211_hw *hw,
 	}
 
 	ret = mt7615_mcu_add_dev_info(phy, vif, true);
-	if (ret)
-		goto out;
 out:
 	mt7615_mutex_release(dev);
 

From fe2c3b1fc64ea0c7a5b2ca2f671b4572ff99baf8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 27 May 2021 13:35:28 +0200
Subject: [PATCH 2259/3292] mt76: testmode: fix memory leak in
 mt76_testmode_alloc_skb

Free all pending frames in case of failure in mt76_testmode_alloc_skb
routine

Fixes: 2601dda8faa76 ("mt76: testmode: add support to send larger packet")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/testmode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/testmode.c b/drivers/net/wireless/mediatek/mt76/testmode.c
index 001d0ba5f73e..f40387a866ee 100644
--- a/drivers/net/wireless/mediatek/mt76/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/testmode.c
@@ -158,8 +158,11 @@ int mt76_testmode_alloc_skb(struct mt76_phy *phy, u32 len)
 			frag_len = MT_TXP_MAX_LEN;
 
 		frag = alloc_skb(frag_len, GFP_KERNEL);
-		if (!frag)
+		if (!frag) {
+			mt76_testmode_free_skb(phy);
+			dev_kfree_skb(head);
 			return -ENOMEM;
+		}
 
 		__skb_put_zero(frag, frag_len);
 		head->len += frag->len;

From d705ae86852d7676214c0a71479b52f528bdd0d1 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 27 May 2021 13:35:29 +0200
Subject: [PATCH 2260/3292] mt76: testmode: remove unnecessary function calls
 in mt76_testmode_free_skb

Get rid of unnecessary function calls in mt76_testmode_free_skb routine
since they are already managed by dev_kfree_skb

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/testmode.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/testmode.c b/drivers/net/wireless/mediatek/mt76/testmode.c
index f40387a866ee..c516469278a4 100644
--- a/drivers/net/wireless/mediatek/mt76/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/testmode.c
@@ -88,17 +88,8 @@ static void
 mt76_testmode_free_skb(struct mt76_phy *phy)
 {
 	struct mt76_testmode_data *td = &phy->test;
-	struct sk_buff *skb = td->tx_skb;
 
-	if (!skb)
-		return;
-
-	if (skb_has_frag_list(skb)) {
-		kfree_skb_list(skb_shinfo(skb)->frag_list);
-		skb_shinfo(skb)->frag_list = NULL;
-	}
-
-	dev_kfree_skb(skb);
+	dev_kfree_skb(td->tx_skb);
 	td->tx_skb = NULL;
 }
 

From 223cea6d3c974acd393bfac2d168b2945a6cf1e5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 27 May 2021 13:35:30 +0200
Subject: [PATCH 2261/3292] mt76: testmode: remove undefined behaviour in
 mt76_testmode_alloc_skb

Get rid of an undefined behaviour in mt76_testmode_alloc_skb routine
allocating skb frames

Fixes: 2601dda8faa76 ("mt76: testmode: add support to send larger packet")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/testmode.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/testmode.c b/drivers/net/wireless/mediatek/mt76/testmode.c
index c516469278a4..c6a85a0cfc89 100644
--- a/drivers/net/wireless/mediatek/mt76/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/testmode.c
@@ -159,12 +159,8 @@ int mt76_testmode_alloc_skb(struct mt76_phy *phy, u32 len)
 		head->len += frag->len;
 		head->data_len += frag->len;
 
-		if (*frag_tail) {
-			(*frag_tail)->next = frag;
-			frag_tail = &frag;
-		} else {
-			*frag_tail = frag;
-		}
+		*frag_tail = frag;
+		frag_tail = &(*frag_tail)->next;
 	}
 
 	mt76_testmode_free_skb(phy);

From 3253f8fddd954aba9ac88ce3c34551dcca505b21 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 22 Apr 2021 06:20:03 +0800
Subject: [PATCH 2262/3292] mt76: mt7615: fix potential overflow on large shift

Fix the following static checker warning:
error: undefined (user controlled) shift '(((1))) << (c->omac_idx)'

Fixes: 402a695b1ae6 ("mt76: mt7615: fix CSA notification for DBDC")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index a59bd7af81be..fc9cd8da2a11 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -411,6 +411,9 @@ mt7615_mcu_rx_csa_notify(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	c = (struct mt7615_mcu_csa_notify *)skb->data;
 
+	if (c->omac_idx > EXT_BSSID_MAX)
+		return;
+
 	if (ext_phy && ext_phy->omac_mask & BIT_ULL(c->omac_idx))
 		mphy = dev->mt76.phy2;
 

From 3cce2b98e0241ff238e25eca1dbb480534a7695a Mon Sep 17 00:00:00 2001
From: Deren Wu <deren.wu@mediatek.com>
Date: Fri, 28 May 2021 19:38:09 +0800
Subject: [PATCH 2263/3292] mt76: mt7921: introduce mac tx done handling

Instead of read tx status from mac table, add new mechanisam to hanele
tx done event for data frame, every 250ms

This event indicate the real tx status of this pkt in mac layer and
would help mac80211 correct status more frequently

Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/mac.c   | 69 +++++++++++--------
 .../net/wireless/mediatek/mt76/mt7921/mcu.c   | 47 +++++++++++++
 .../net/wireless/mediatek/mt76/mt7921/mcu.h   | 28 ++++++++
 .../wireless/mediatek/mt76/mt7921/mt7921.h    |  2 +
 4 files changed, 117 insertions(+), 29 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 4d1050ae9f39..99bf20d48e4c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -13,6 +13,36 @@
 #define HE_PREP(f, m, v)	le16_encode_bits(le32_get_bits(v, MT_CRXV_HE_##m),\
 						 IEEE80211_RADIOTAP_HE_##f)
 
+static u8
+mt7921_next_pid(struct mt7921_dev *dev, struct mt76_wcid *wcid)
+{
+	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
+	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
+	    wcid->packet_id == MT_PACKET_ID_NO_SKB)
+		wcid->packet_id = MT_PACKET_ID_FIRST;
+	return wcid->packet_id;
+}
+
+static unsigned long
+mt7921_next_txs_set(struct mt7921_dev *dev, struct mt76_wcid *wcid,
+		    u32 timeout)
+{
+	struct mt7921_sta *msta;
+
+	msta = container_of(wcid, struct mt7921_sta, wcid);
+	msta->next_txs_ts = jiffies + msecs_to_jiffies(timeout);
+	return msta->next_txs_ts;
+}
+
+static bool
+mt7921_next_txs_timeout(struct mt7921_dev *dev, struct mt76_wcid *wcid)
+{
+	struct mt7921_sta *msta;
+
+	msta = container_of(wcid, struct mt7921_sta, wcid);
+	return time_is_before_jiffies(msta->next_txs_ts);
+}
+
 static struct mt76_wcid *mt7921_rx_get_wcid(struct mt7921_dev *dev,
 					    u16 idx, bool unicast)
 {
@@ -726,7 +756,7 @@ void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_vif *vif = info->control.vif;
 	struct mt76_phy *mphy = &dev->mphy;
-	u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
+	u8 pid, p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
 	bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
 	u16 tx_count = 15;
 	u32 val;
@@ -795,6 +825,15 @@ void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 		txwi[6] |= cpu_to_le32(val);
 		txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
 	}
+
+	if ((FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]) &
+		(IEEE80211_FTYPE_DATA >> 2)) &&
+		mt7921_next_txs_timeout(dev, wcid)) {
+		mt7921_next_txs_set(dev, wcid, 250);
+		pid = mt7921_next_pid(dev, wcid);
+		val = MT_TXD5_TX_STATUS_MCU | FIELD_PREP(MT_TXD5_PID, pid);
+		txwi[5] |= cpu_to_le32(val);
+	}
 }
 
 static void
@@ -1392,30 +1431,6 @@ mt7921_mac_update_mib_stats(struct mt7921_phy *phy)
 	}
 }
 
-static void
-mt7921_mac_sta_stats_work(struct mt7921_phy *phy)
-{
-	struct mt7921_dev *dev = phy->dev;
-	struct mt7921_sta *msta;
-	LIST_HEAD(list);
-
-	spin_lock_bh(&dev->sta_poll_lock);
-	list_splice_init(&phy->stats_list, &list);
-
-	while (!list_empty(&list)) {
-		msta = list_first_entry(&list, struct mt7921_sta, stats_list);
-		list_del_init(&msta->stats_list);
-		spin_unlock_bh(&dev->sta_poll_lock);
-
-		/* query wtbl info to report tx rate for further devices */
-		mt7921_get_wtbl_info(dev, msta->wcid.idx);
-
-		spin_lock_bh(&dev->sta_poll_lock);
-	}
-
-	spin_unlock_bh(&dev->sta_poll_lock);
-}
-
 void mt7921_mac_work(struct work_struct *work)
 {
 	struct mt7921_phy *phy;
@@ -1433,10 +1448,6 @@ void mt7921_mac_work(struct work_struct *work)
 
 		mt7921_mac_update_mib_stats(phy);
 	}
-	if (++phy->sta_work_count == 4) {
-		phy->sta_work_count = 0;
-		mt7921_mac_sta_stats_work(phy);
-	}
 
 	mt7921_mutex_release(phy->dev);
 	ieee80211_queue_delayed_work(phy->mt76->hw, &mphy->mac_work,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 37d8cbd12ff9..7b366df4a8ea 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -534,6 +534,49 @@ mt7921_mcu_low_power_event(struct mt7921_dev *dev, struct sk_buff *skb)
 	trace_lp_event(dev, event->state);
 }
 
+static void
+mt7921_mcu_tx_done_event(struct mt7921_dev *dev, struct sk_buff *skb)
+{
+	struct mt7921_mcu_tx_done_event *event;
+	struct mt7921_sta *msta;
+	struct mt7921_phy *mphy = &dev->phy;
+	struct mt7921_mcu_peer_cap peer;
+	struct ieee80211_sta *sta;
+	LIST_HEAD(list);
+
+	skb_pull(skb, sizeof(struct mt7921_mcu_rxd));
+	event = (struct mt7921_mcu_tx_done_event *)skb->data;
+
+	spin_lock_bh(&dev->sta_poll_lock);
+	list_splice_init(&mphy->stats_list, &list);
+
+	while (!list_empty(&list)) {
+		msta = list_first_entry(&list, struct mt7921_sta, stats_list);
+		list_del_init(&msta->stats_list);
+
+		if (msta->wcid.idx != event->wlan_idx)
+			continue;
+
+		spin_unlock_bh(&dev->sta_poll_lock);
+
+		sta = wcid_to_sta(&msta->wcid);
+
+		/* peer config based on IEEE SPEC */
+		memset(&peer, 0x0, sizeof(peer));
+		peer.bw = event->bw;
+		peer.g2 = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20);
+		peer.g4 = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40);
+		peer.g8 = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80);
+		peer.g16 = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_160);
+		mt7921_mcu_tx_rate_parse(mphy->mt76, &peer,
+					 &msta->stats.tx_rate, event->tx_rate);
+
+		spin_lock_bh(&dev->sta_poll_lock);
+		break;
+	}
+	spin_unlock_bh(&dev->sta_poll_lock);
+}
+
 static void
 mt7921_mcu_rx_unsolicited_event(struct mt7921_dev *dev, struct sk_buff *skb)
 {
@@ -560,6 +603,9 @@ mt7921_mcu_rx_unsolicited_event(struct mt7921_dev *dev, struct sk_buff *skb)
 	case MCU_EVENT_LP_INFO:
 		mt7921_mcu_low_power_event(dev, skb);
 		break;
+	case MCU_EVENT_TX_DONE:
+		mt7921_mcu_tx_done_event(dev, skb);
+		break;
 	default:
 		break;
 	}
@@ -580,6 +626,7 @@ void mt7921_mcu_rx_event(struct mt7921_dev *dev, struct sk_buff *skb)
 	    rxd->eid == MCU_EVENT_SCHED_SCAN_DONE ||
 	    rxd->eid == MCU_EVENT_BSS_ABSENCE ||
 	    rxd->eid == MCU_EVENT_SCAN_DONE ||
+	    rxd->eid == MCU_EVENT_TX_DONE ||
 	    rxd->eid == MCU_EVENT_DBG_MSG ||
 	    rxd->eid == MCU_EVENT_COREDUMP ||
 	    rxd->eid == MCU_EVENT_LP_INFO ||
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index 49823d0a3d0a..22ebef4add00 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -81,6 +81,7 @@ enum {
 	MCU_EVENT_REG_ACCESS = 0x05,
 	MCU_EVENT_LP_INFO = 0x07,
 	MCU_EVENT_SCAN_DONE = 0x0d,
+	MCU_EVENT_TX_DONE = 0x0f,
 	MCU_EVENT_BSS_ABSENCE  = 0x11,
 	MCU_EVENT_BSS_BEACON_LOSS = 0x13,
 	MCU_EVENT_CH_PRIVILEGE = 0x18,
@@ -407,4 +408,31 @@ struct mt7921_txpwr_event {
 	struct mt7921_txpwr txpwr;
 } __packed;
 
+struct mt7921_mcu_tx_done_event {
+	u8 pid;
+	u8 status;
+	u16 seq;
+
+	u8 wlan_idx;
+	u8 tx_cnt;
+	u16 tx_rate;
+
+	u8 flag;
+	u8 tid;
+	u8 rsp_rate;
+	u8 mcs;
+
+	u8 bw;
+	u8 tx_pwr;
+	u8 reason;
+	u8 rsv0[1];
+
+	u32 delay;
+	u32 timestamp;
+	u32 applied_flag;
+
+	u8 txs[28];
+
+	u8 rsv1[32];
+} __packed;
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 1e65a8f6f211..a9b21fcc321c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -92,6 +92,8 @@ struct mt7921_sta {
 	unsigned long ampdu_state;
 
 	struct mt7921_sta_key_conf bip;
+
+	unsigned long next_txs_ts;
 };
 
 DECLARE_EWMA(rssi, 10, 8);

From e3fd9934cf6e22c30e843d1902695b2379318be0 Mon Sep 17 00:00:00 2001
From: Deren Wu <deren.wu@mediatek.com>
Date: Fri, 28 May 2021 01:05:33 +0800
Subject: [PATCH 2264/3292] mt76: mt7921: update statistic in active mode only

wakeup chip every 250ms may cause huge power consumption

try to update statistic counter only if in active status only,
and it would lead fewer power cost

Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 99bf20d48e4c..882bb74d0406 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1483,13 +1483,15 @@ void mt7921_pm_power_save_work(struct work_struct *work)
 {
 	struct mt7921_dev *dev;
 	unsigned long delta;
+	struct mt76_phy *mphy;
 
 	dev = (struct mt7921_dev *)container_of(work, struct mt7921_dev,
 						pm.ps_work.work);
+	mphy = dev->phy.mt76;
 
 	delta = dev->pm.idle_timeout;
-	if (test_bit(MT76_HW_SCANNING, &dev->mphy.state) ||
-	    test_bit(MT76_HW_SCHED_SCANNING, &dev->mphy.state))
+	if (test_bit(MT76_HW_SCANNING, &mphy->state) ||
+	    test_bit(MT76_HW_SCHED_SCANNING, &mphy->state))
 		goto out;
 
 	if (time_is_after_jiffies(dev->pm.last_activity + delta)) {
@@ -1497,8 +1499,10 @@ void mt7921_pm_power_save_work(struct work_struct *work)
 		goto out;
 	}
 
-	if (!mt7921_mcu_fw_pmctrl(dev))
+	if (!mt7921_mcu_fw_pmctrl(dev)) {
+		cancel_delayed_work_sync(&mphy->mac_work);
 		return;
+	}
 out:
 	queue_delayed_work(dev->mt76.wq, &dev->pm.ps_work, delta);
 }

From 8af414e8835be1a214ac16c37fc8686ef68218e6 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 28 May 2021 13:02:24 +0200
Subject: [PATCH 2265/3292] mt76: allow hw driver code to overwrite wiphy
 interface_modes

Move wiphy interface_modes configuration in mt76_alloc_device and
mt76_alloc_phy in order to be overwritten by hw specific code
since some drivers do not support all operating modes (mt7921
supports sta only in the current codebase)

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 33 ++++++++++++-------
 .../net/wireless/mediatek/mt76/mt7921/init.c  |  2 ++
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 447bc9a3abb0..49da219d4e52 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -331,17 +331,6 @@ mt76_phy_init(struct mt76_phy *phy, struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, MFP_CAPABLE);
 	ieee80211_hw_set(hw, AP_LINK_PS);
 	ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS);
-
-	wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
-	wiphy->interface_modes =
-		BIT(NL80211_IFTYPE_STATION) |
-		BIT(NL80211_IFTYPE_AP) |
-#ifdef CONFIG_MAC80211_MESH
-		BIT(NL80211_IFTYPE_MESH_POINT) |
-#endif
-		BIT(NL80211_IFTYPE_P2P_CLIENT) |
-		BIT(NL80211_IFTYPE_P2P_GO) |
-		BIT(NL80211_IFTYPE_ADHOC);
 }
 
 struct mt76_phy *
@@ -362,6 +351,17 @@ mt76_alloc_phy(struct mt76_dev *dev, unsigned int size,
 	phy->hw = hw;
 	phy->priv = hw->priv + phy_size;
 
+	hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_AP) |
+#ifdef CONFIG_MAC80211_MESH
+		BIT(NL80211_IFTYPE_MESH_POINT) |
+#endif
+		BIT(NL80211_IFTYPE_P2P_CLIENT) |
+		BIT(NL80211_IFTYPE_P2P_GO) |
+		BIT(NL80211_IFTYPE_ADHOC);
+
 	return phy;
 }
 EXPORT_SYMBOL_GPL(mt76_alloc_phy);
@@ -444,6 +444,17 @@ mt76_alloc_device(struct device *pdev, unsigned int size,
 	mutex_init(&dev->mcu.mutex);
 	dev->tx_worker.fn = mt76_tx_worker;
 
+	hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
+	hw->wiphy->interface_modes =
+		BIT(NL80211_IFTYPE_STATION) |
+		BIT(NL80211_IFTYPE_AP) |
+#ifdef CONFIG_MAC80211_MESH
+		BIT(NL80211_IFTYPE_MESH_POINT) |
+#endif
+		BIT(NL80211_IFTYPE_P2P_CLIENT) |
+		BIT(NL80211_IFTYPE_P2P_GO) |
+		BIT(NL80211_IFTYPE_ADHOC);
+
 	spin_lock_init(&dev->token_lock);
 	idr_init(&dev->token);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 99701cda0b9d..9253706c24a0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -62,6 +62,8 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	hw->vif_data_size = sizeof(struct mt7921_vif);
 
 	wiphy->iface_combinations = if_comb;
+	wiphy->flags &= ~WIPHY_FLAG_IBSS_RSN;
+	wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
 	wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
 	wiphy->max_scan_ie_len = MT76_CONNAC_SCAN_IE_LEN;
 	wiphy->max_scan_ssids = 4;

From 7f731405eeca869c51c8792544ef5b26c28c31c3 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 1 Jun 2021 22:26:31 +0200
Subject: [PATCH 2266/3292] mt76: mt7915: improve error recovery reliability

- Remove no-op code for queue lock/unlock, which is no longer needed
- Set a missing DMA flag
- Wait for full completion of error recovery before restarting tx
- Schedule IRQ tasklet to ensure that IRQ mask gets written

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/mac.c   | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 642a11e72bef..ecd00dfa68ff 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1608,14 +1608,18 @@ mt7915_dma_reset(struct mt7915_dev *dev)
 	mt76_set(dev, MT_WFDMA0_GLO_CFG,
 		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
 	mt76_set(dev, MT_WFDMA1_GLO_CFG,
-		 MT_WFDMA1_GLO_CFG_TX_DMA_EN | MT_WFDMA1_GLO_CFG_RX_DMA_EN);
+		 MT_WFDMA1_GLO_CFG_TX_DMA_EN | MT_WFDMA1_GLO_CFG_RX_DMA_EN |
+		 MT_WFDMA1_GLO_CFG_OMIT_TX_INFO |
+		 MT_WFDMA1_GLO_CFG_OMIT_RX_INFO);
 	if (dev->hif2) {
 		mt76_set(dev, MT_WFDMA0_GLO_CFG + hif1_ofs,
 			(MT_WFDMA0_GLO_CFG_TX_DMA_EN |
 			 MT_WFDMA0_GLO_CFG_RX_DMA_EN));
 		mt76_set(dev, MT_WFDMA1_GLO_CFG + hif1_ofs,
 			(MT_WFDMA1_GLO_CFG_TX_DMA_EN |
-			 MT_WFDMA1_GLO_CFG_RX_DMA_EN));
+			 MT_WFDMA1_GLO_CFG_RX_DMA_EN |
+			 MT_WFDMA1_GLO_CFG_OMIT_TX_INFO |
+			 MT_WFDMA1_GLO_CFG_OMIT_RX_INFO));
 	}
 }
 
@@ -1659,11 +1663,6 @@ void mt7915_mac_reset_work(struct work_struct *work)
 		set_bit(MT76_RESET, &phy2->mt76->state);
 		cancel_delayed_work_sync(&phy2->mt76->mac_work);
 	}
-	/* lock/unlock all queues to ensure that no tx is pending */
-	mt76_txq_schedule_all(&dev->mphy);
-	if (ext_phy)
-		mt76_txq_schedule_all(ext_phy);
-
 	mt76_worker_disable(&dev->mt76.tx_worker);
 	napi_disable(&dev->mt76.napi[0]);
 	napi_disable(&dev->mt76.napi[1]);
@@ -1689,10 +1688,6 @@ void mt7915_mac_reset_work(struct work_struct *work)
 	if (phy2)
 		clear_bit(MT76_RESET, &phy2->mt76->state);
 
-	mt76_worker_enable(&dev->mt76.tx_worker);
-	napi_enable(&dev->mt76.tx_napi);
-	napi_schedule(&dev->mt76.tx_napi);
-
 	napi_enable(&dev->mt76.napi[0]);
 	napi_schedule(&dev->mt76.napi[0]);
 
@@ -1701,14 +1696,20 @@ void mt7915_mac_reset_work(struct work_struct *work)
 
 	napi_enable(&dev->mt76.napi[2]);
 	napi_schedule(&dev->mt76.napi[2]);
+	tasklet_schedule(&dev->irq_tasklet);
+
+	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_RESET_DONE);
+	mt7915_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
+
+	mt76_worker_enable(&dev->mt76.tx_worker);
+
+	napi_enable(&dev->mt76.tx_napi);
+	napi_schedule(&dev->mt76.tx_napi);
 
 	ieee80211_wake_queues(mt76_hw(dev));
 	if (ext_phy)
 		ieee80211_wake_queues(ext_phy->hw);
 
-	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_RESET_DONE);
-	mt7915_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
-
 	mutex_unlock(&dev->mt76.mutex);
 
 	mt7915_update_beacons(dev);

From 5ff4c4aab79b39810b30f5fd63117245d7554cb5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 28 May 2021 19:11:42 +0200
Subject: [PATCH 2267/3292] mt76: mt7921: set MT76_RESET during mac reset

Set MT76_RESET during mt7921_mac_reset in order to avoid packet
transmissions. Move tx scheduling at the end of reset routine.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/mac.c   | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 882bb74d0406..ed886f8633b1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1294,6 +1294,7 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
 	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0x0);
 
+	set_bit(MT76_RESET, &dev->mphy.state);
 	set_bit(MT76_MCU_RESET, &dev->mphy.state);
 	wake_up(&dev->mt76.mcu.wait);
 	skb_queue_purge(&dev->mt76.mcu.res_q);
@@ -1309,19 +1310,13 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 	mt7921_tx_token_put(dev);
 	idr_init(&dev->mt76.token);
 
-	err = mt7921_wpdma_reset(dev, true);
-	if (err)
-		return err;
+	mt7921_wpdma_reset(dev, true);
 
 	mt76_for_each_q_rx(&dev->mt76, i) {
 		napi_enable(&dev->mt76.napi[i]);
 		napi_schedule(&dev->mt76.napi[i]);
 	}
 
-	napi_enable(&dev->mt76.tx_napi);
-	napi_schedule(&dev->mt76.tx_napi);
-	mt76_worker_enable(&dev->mt76.tx_worker);
-
 	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
 
 	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA,
@@ -1331,17 +1326,25 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 
 	err = mt7921_run_firmware(dev);
 	if (err)
-		return err;
+		goto out;
 
 	err = mt7921_mcu_set_eeprom(dev);
 	if (err)
-		return err;
+		goto out;
 
 	err = mt7921_mac_init(dev);
 	if (err)
-		return err;
+		goto out;
 
-	return __mt7921_start(&dev->phy);
+	err = __mt7921_start(&dev->phy);
+out:
+	clear_bit(MT76_RESET, &dev->mphy.state);
+
+	napi_enable(&dev->mt76.tx_napi);
+	napi_schedule(&dev->mt76.tx_napi);
+	mt76_worker_enable(&dev->mt76.tx_worker);
+
+	return err;
 }
 
 /* system error recovery */

From 6543002811960d882d722127b4b11e835af0db40 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 15 May 2021 12:17:29 +0800
Subject: [PATCH 2268/3292] mt76: mt7915: use mt7915_mcu_get_mib_info() to get
 survey data

Firmware functions (SCS, MU ...) also require read-clear phy counters,
hence firmware prepares a global task to read shared fields out to a
shared pool to avoid concurrency. Switch to event format accordingly.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/init.c  |  1 -
 .../net/wireless/mediatek/mt76/mt7915/mac.c   | 38 +++------------
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 46 +++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt7915/mcu.h   | 14 ++++++
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  2 +
 .../net/wireless/mediatek/mt76/mt7915/regs.h  | 20 +-------
 6 files changed, 69 insertions(+), 52 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 39f3639aa096..10c8d9244dc9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -287,7 +287,6 @@ mt7915_mac_init_band(struct mt7915_dev *dev, u8 band)
 	      FIELD_PREP(MT_MDP_RCFR1_RX_DROPPED_MCAST, MT_MDP_TO_HIF);
 	mt76_rmw(dev, MT_MDP_BNRCFR1(band), mask, set);
 
-	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
 	mt76_set(dev, MT_WF_RMAC_MIB_AIRTIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
 
 	mt76_rmw_field(dev, MT_DMA_DCR0(band), MT_DMA_DCR0_MAX_RX_LEN, 1536);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index ecd00dfa68ff..741899aaaed8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1374,14 +1374,10 @@ void mt7915_mac_reset_counters(struct mt7915_phy *phy)
 	memset(&dev->mt76.aggr_stats[i], 0, sizeof(dev->mt76.aggr_stats) / 2);
 
 	/* reset airtime counters */
-	mt76_rr(dev, MT_MIB_SDR9(ext_phy));
-	mt76_rr(dev, MT_MIB_SDR36(ext_phy));
-	mt76_rr(dev, MT_MIB_SDR37(ext_phy));
-
-	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(ext_phy),
-		 MT_WF_RMAC_MIB_RXTIME_CLR);
 	mt76_set(dev, MT_WF_RMAC_MIB_AIRTIME0(ext_phy),
 		 MT_WF_RMAC_MIB_RXTIME_CLR);
+
+	mt7915_mcu_get_chan_mib_info(phy, true);
 }
 
 void mt7915_mac_set_timing(struct mt7915_phy *phy)
@@ -1478,20 +1474,11 @@ mt7915_phy_get_nf(struct mt7915_phy *phy, int idx)
 static void
 mt7915_phy_update_channel(struct mt76_phy *mphy, int idx)
 {
-	struct mt7915_dev *dev = container_of(mphy->dev, struct mt7915_dev, mt76);
 	struct mt7915_phy *phy = (struct mt7915_phy *)mphy->priv;
-	struct mt76_channel_state *state;
-	u64 busy_time, tx_time, rx_time, obss_time;
+	struct mt76_channel_state *state = mphy->chan_state;
 	int nf;
 
-	busy_time = mt76_get_field(dev, MT_MIB_SDR9(idx),
-				   MT_MIB_SDR9_BUSY_MASK);
-	tx_time = mt76_get_field(dev, MT_MIB_SDR36(idx),
-				 MT_MIB_SDR36_TXTIME_MASK);
-	rx_time = mt76_get_field(dev, MT_MIB_SDR37(idx),
-				 MT_MIB_SDR37_RXTIME_MASK);
-	obss_time = mt76_get_field(dev, MT_WF_RMAC_MIB_AIRTIME14(idx),
-				   MT_MIB_OBSSTIME_MASK);
+	mt7915_mcu_get_chan_mib_info(phy, false);
 
 	nf = mt7915_phy_get_nf(phy, idx);
 	if (!phy->noise)
@@ -1499,27 +1486,14 @@ mt7915_phy_update_channel(struct mt76_phy *mphy, int idx)
 	else if (nf)
 		phy->noise += nf - (phy->noise >> 4);
 
-	state = mphy->chan_state;
-	state->cc_busy += busy_time;
-	state->cc_tx += tx_time;
-	state->cc_rx += rx_time + obss_time;
-	state->cc_bss_rx += rx_time;
 	state->noise = -(phy->noise >> 4);
 }
 
 void mt7915_update_channel(struct mt76_dev *mdev)
 {
-	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
-
 	mt7915_phy_update_channel(&mdev->phy, 0);
 	if (mdev->phy2)
 		mt7915_phy_update_channel(mdev->phy2, 1);
-
-	/* reset obss airtime */
-	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(0), MT_WF_RMAC_MIB_RXTIME_CLR);
-	if (mdev->phy2)
-		mt76_set(dev, MT_WF_RMAC_MIB_TIME0(1),
-			 MT_WF_RMAC_MIB_RXTIME_CLR);
 }
 
 static bool
@@ -1723,7 +1697,7 @@ void mt7915_mac_reset_work(struct work_struct *work)
 }
 
 static void
-mt7915_mac_update_mib_stats(struct mt7915_phy *phy)
+mt7915_mac_update_stats(struct mt7915_phy *phy)
 {
 	struct mt7915_dev *dev = phy->dev;
 	struct mib_stats *mib = &phy->mib;
@@ -1834,7 +1808,7 @@ void mt7915_mac_work(struct work_struct *work)
 	if (++mphy->mac_work_count == 5) {
 		mphy->mac_work_count = 0;
 
-		mt7915_mac_update_mib_stats(phy);
+		mt7915_mac_update_stats(phy);
 	}
 
 	if (++phy->sta_work_count == 10) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 3418ecb0a98f..59806ec96829 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3580,6 +3580,52 @@ int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy)
 	return 0;
 }
 
+int mt7915_mcu_get_chan_mib_info(struct mt7915_phy *phy, bool chan_switch)
+{
+	/* strict order */
+	static const enum mt7915_chan_mib_offs offs[] = {
+		MIB_BUSY_TIME, MIB_TX_TIME, MIB_RX_TIME, MIB_OBSS_AIRTIME
+	};
+	struct mt76_channel_state *state = phy->mt76->chan_state;
+	struct mt76_channel_state *state_ts = &phy->state_ts;
+	struct mt7915_dev *dev = phy->dev;
+	struct mt7915_mcu_mib *res, req[4];
+	struct sk_buff *skb;
+	int i, ret;
+
+	for (i = 0; i < 4; i++) {
+		req[i].band = cpu_to_le32(phy != &dev->phy);
+		req[i].offs = cpu_to_le32(offs[i]);
+	}
+
+	ret = mt76_mcu_send_and_get_msg(&dev->mt76, MCU_EXT_CMD(GET_MIB_INFO),
+					req, sizeof(req), true, &skb);
+	if (ret)
+		return ret;
+
+	res = (struct mt7915_mcu_mib *)(skb->data + 20);
+
+	if (chan_switch)
+		goto out;
+
+#define __res_u64(s) le64_to_cpu(res[s].data)
+	state->cc_busy += __res_u64(0) - state_ts->cc_busy;
+	state->cc_tx += __res_u64(1) - state_ts->cc_tx;
+	state->cc_bss_rx += __res_u64(2) - state_ts->cc_bss_rx;
+	state->cc_rx += __res_u64(2) + __res_u64(3) - state_ts->cc_rx;
+
+out:
+	state_ts->cc_busy = __res_u64(0);
+	state_ts->cc_tx = __res_u64(1);
+	state_ts->cc_bss_rx = __res_u64(2);
+	state_ts->cc_rx = __res_u64(2) + __res_u64(3);
+#undef __res_u64
+
+	dev_kfree_skb(skb);
+
+	return 0;
+}
+
 int mt7915_mcu_get_temperature(struct mt7915_phy *phy)
 {
 	struct mt7915_dev *dev = phy->dev;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 7e3432384633..70ab06d9f954 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -216,6 +216,19 @@ struct mt7915_mcu_phy_rx_info {
 #define MT_RA_RATE_DCM_EN		BIT(4)
 #define MT_RA_RATE_BW			GENMASK(14, 13)
 
+struct mt7915_mcu_mib {
+	__le32 band;
+	__le32 offs;
+	__le64 data;
+} __packed;
+
+enum mt7915_chan_mib_offs {
+	MIB_BUSY_TIME = 14,
+	MIB_TX_TIME = 81,
+	MIB_RX_TIME,
+	MIB_OBSS_AIRTIME = 86
+};
+
 struct edca {
 	u8 queue;
 	u8 set;
@@ -301,6 +314,7 @@ enum {
 	MCU_EXT_CMD_MUAR_UPDATE = 0x48,
 	MCU_EXT_CMD_SET_RX_PATH = 0x4e,
 	MCU_EXT_CMD_TX_POWER_FEATURE_CTRL = 0x58,
+	MCU_EXT_CMD_GET_MIB_INFO = 0x5a,
 	MCU_EXT_CMD_MWDS_SUPPORT = 0x80,
 	MCU_EXT_CMD_SET_SER_TRIGGER = 0x81,
 	MCU_EXT_CMD_SCS_CTRL = 0x82,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index bd6c555814a8..3cce464bc2c5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -149,6 +149,7 @@ struct mt7915_phy {
 	u32 ampdu_ref;
 
 	struct mib_stats mib;
+	struct mt76_channel_state state_ts;
 	struct list_head stats_list;
 
 	u8 sta_work_count;
@@ -364,6 +365,7 @@ int mt7915_mcu_set_radar_th(struct mt7915_dev *dev, int index,
 			    const struct mt7915_dfs_pattern *pattern);
 int mt7915_mcu_apply_group_cal(struct mt7915_dev *dev);
 int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy);
+int mt7915_mcu_get_chan_mib_info(struct mt7915_phy *phy, bool chan_switch);
 int mt7915_mcu_get_temperature(struct mt7915_phy *phy);
 int mt7915_mcu_set_thermal_throttling(struct mt7915_phy *phy, u8 state);
 int mt7915_mcu_get_tx_rate(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index 8b080cb8f10f..56c33eaa9d79 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -129,20 +129,9 @@
 #define MT_MIB_SDR3(_band)		MT_WF_MIB(_band, 0x014)
 #define MT_MIB_SDR3_FCS_ERR_MASK	GENMASK(15, 0)
 
-#define MT_MIB_SDR9(_band)		MT_WF_MIB(_band, 0x02c)
-#define MT_MIB_SDR9_BUSY_MASK		GENMASK(23, 0)
-
-#define MT_MIB_SDR16(_band)		MT_WF_MIB(_band, 0x048)
-#define MT_MIB_SDR16_BUSY_MASK		GENMASK(23, 0)
-
 #define MT_MIB_SDR34(_band)		MT_WF_MIB(_band, 0x090)
 #define MT_MIB_MU_BF_TX_CNT		GENMASK(15, 0)
 
-#define MT_MIB_SDR36(_band)		MT_WF_MIB(_band, 0x098)
-#define MT_MIB_SDR36_TXTIME_MASK	GENMASK(23, 0)
-#define MT_MIB_SDR37(_band)		MT_WF_MIB(_band, 0x09c)
-#define MT_MIB_SDR37_RXTIME_MASK	GENMASK(23, 0)
-
 #define MT_MIB_DR8(_band)		MT_WF_MIB(_band, 0x0c0)
 #define MT_MIB_DR9(_band)		MT_WF_MIB(_band, 0x0c4)
 #define MT_MIB_DR11(_band)		MT_WF_MIB(_band, 0x0cc)
@@ -155,9 +144,6 @@
 #define MT_MIB_BA_MISS_COUNT_MASK	GENMASK(15, 0)
 #define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(31, 16)
 
-#define MT_MIB_MB_SDR2(_band, n)	MT_WF_MIB(_band, 0x108 + ((n) << 4))
-#define MT_MIB_FRAME_RETRIES_COUNT_MASK	GENMASK(15, 0)
-
 #define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(_band, 0x0a8 + ((n) << 2))
 #define MT_TX_AGG_CNT2(_band, n)	MT_WF_MIB(_band, 0x164 + ((n) << 2))
 #define MT_MIB_ARNG(_band, n)		MT_WF_MIB(_band, 0x4b8 + ((n) << 2))
@@ -255,14 +241,10 @@
 #define MT_WF_RFCR1_DROP_CFEND		BIT(7)
 #define MT_WF_RFCR1_DROP_CFACK		BIT(8)
 
-#define MT_WF_RMAC_MIB_TIME0(_band)	MT_WF_RMAC(_band, 0x03c4)
+#define MT_WF_RMAC_MIB_AIRTIME0(_band)	MT_WF_RMAC(_band, 0x0380)
 #define MT_WF_RMAC_MIB_RXTIME_CLR	BIT(31)
 #define MT_WF_RMAC_MIB_RXTIME_EN	BIT(30)
 
-#define MT_WF_RMAC_MIB_AIRTIME14(_band)	MT_WF_RMAC(_band, 0x03b8)
-#define MT_MIB_OBSSTIME_MASK		GENMASK(23, 0)
-#define MT_WF_RMAC_MIB_AIRTIME0(_band)	MT_WF_RMAC(_band, 0x0380)
-
 /* WFDMA0 */
 #define MT_WFDMA0_BASE			0xd4000
 #define MT_WFDMA0(ofs)			(MT_WFDMA0_BASE + (ofs))

From 83d229d28b10d5da9b71a06bee6395567bee732a Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Sat, 29 May 2021 19:11:50 +0800
Subject: [PATCH 2269/3292] mt76: mt7915: read all eeprom fields from fw in
 efuse mode

If efuse mode is used, read all values from fw during eeprom init,
which makes it more convinient to check if rf values in efuse are
properly burned.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7915/eeprom.c    | 44 +++++++++----------
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   |  5 ++-
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  1 +
 3 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
index 8ededf2e5279..ee3d64434821 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
@@ -4,22 +4,12 @@
 #include "mt7915.h"
 #include "eeprom.h"
 
-static u32 mt7915_eeprom_read(struct mt7915_dev *dev, u32 offset)
-{
-	u8 *data = dev->mt76.eeprom.data;
-
-	if (data[offset] == 0xff && !dev->flash_mode)
-		mt7915_mcu_get_eeprom(dev, offset);
-
-	return data[offset];
-}
-
 static int mt7915_eeprom_load_precal(struct mt7915_dev *dev)
 {
 	struct mt76_dev *mdev = &dev->mt76;
-	u32 val;
+	u8 *eeprom = mdev->eeprom.data;
+	u32 val = eeprom[MT_EE_DO_PRE_CAL];
 
-	val = mt7915_eeprom_read(dev, MT_EE_DO_PRE_CAL);
 	if (val != (MT_EE_WIFI_CAL_DPD | MT_EE_WIFI_CAL_GROUP))
 		return 0;
 
@@ -43,7 +33,13 @@ static int mt7915_eeprom_load(struct mt7915_dev *dev)
 		dev->flash_mode = true;
 		ret = mt7915_eeprom_load_precal(dev);
 	} else {
-		memset(dev->mt76.eeprom.data, -1, MT7915_EEPROM_SIZE);
+		u32 block_num, i;
+
+		block_num = DIV_ROUND_UP(MT7915_EEPROM_SIZE,
+					 MT7915_EEPROM_BLOCK_SIZE);
+		for (i = 0; i < block_num; i++)
+			mt7915_mcu_get_eeprom(dev,
+					      i * MT7915_EEPROM_BLOCK_SIZE);
 	}
 
 	return ret;
@@ -52,10 +48,7 @@ static int mt7915_eeprom_load(struct mt7915_dev *dev)
 static int mt7915_check_eeprom(struct mt7915_dev *dev)
 {
 	u8 *eeprom = dev->mt76.eeprom.data;
-	u16 val;
-
-	mt7915_eeprom_read(dev, MT_EE_CHIP_ID);
-	val = get_unaligned_le16(eeprom);
+	u16 val = get_unaligned_le16(eeprom);
 
 	switch (val) {
 	case 0x7915:
@@ -69,9 +62,10 @@ void mt7915_eeprom_parse_band_config(struct mt7915_phy *phy)
 {
 	struct mt7915_dev *dev = phy->dev;
 	bool ext_phy = phy != &dev->phy;
+	u8 *eeprom = dev->mt76.eeprom.data;
 	u32 val;
 
-	val = mt7915_eeprom_read(dev, MT_EE_WIFI_CONF + ext_phy);
+	val = eeprom[MT_EE_WIFI_CONF + ext_phy];
 	val = FIELD_GET(MT_EE_WIFI_CONF0_BAND_SEL, val);
 	if (val == MT_EE_BAND_SEL_DEFAULT && dev->dbdc_support)
 		val = ext_phy ? MT_EE_BAND_SEL_5GHZ : MT_EE_BAND_SEL_2GHZ;
@@ -143,6 +137,7 @@ int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 				   struct ieee80211_channel *chan,
 				   u8 chain_idx)
 {
+	u8 *eeprom = dev->mt76.eeprom.data;
 	int index, target_power;
 	bool tssi_on;
 
@@ -153,18 +148,18 @@ int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 
 	if (chan->band == NL80211_BAND_2GHZ) {
 		index = MT_EE_TX0_POWER_2G + chain_idx * 3;
-		target_power = mt7915_eeprom_read(dev, index);
+		target_power = eeprom[index];
 
 		if (!tssi_on)
-			target_power += mt7915_eeprom_read(dev, index + 1);
+			target_power += eeprom[index + 1];
 	} else {
 		int group = mt7915_get_channel_group(chan->hw_value);
 
 		index = MT_EE_TX0_POWER_5G + chain_idx * 12;
-		target_power = mt7915_eeprom_read(dev, index + group);
+		target_power = eeprom[index + group];
 
 		if (!tssi_on)
-			target_power += mt7915_eeprom_read(dev, index + 8);
+			target_power += eeprom[index + 8];
 	}
 
 	return target_power;
@@ -172,13 +167,14 @@ int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 
 s8 mt7915_eeprom_get_power_delta(struct mt7915_dev *dev, int band)
 {
+	u8 *eeprom = dev->mt76.eeprom.data;
 	u32 val;
 	s8 delta;
 
 	if (band == NL80211_BAND_2GHZ)
-		val = mt7915_eeprom_read(dev, MT_EE_RATE_DELTA_2G);
+		val = eeprom[MT_EE_RATE_DELTA_2G];
 	else
-		val = mt7915_eeprom_read(dev, MT_EE_RATE_DELTA_5G);
+		val = eeprom[MT_EE_RATE_DELTA_5G];
 
 	if (!(val & MT_EE_RATE_DELTA_EN))
 		return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 59806ec96829..059da720d957 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3417,7 +3417,8 @@ int mt7915_mcu_set_eeprom(struct mt7915_dev *dev)
 int mt7915_mcu_get_eeprom(struct mt7915_dev *dev, u32 offset)
 {
 	struct mt7915_mcu_eeprom_info req = {
-		.addr = cpu_to_le32(round_down(offset, 16)),
+		.addr = cpu_to_le32(round_down(offset,
+				    MT7915_EEPROM_BLOCK_SIZE)),
 	};
 	struct mt7915_mcu_eeprom_info *res;
 	struct sk_buff *skb;
@@ -3431,7 +3432,7 @@ int mt7915_mcu_get_eeprom(struct mt7915_dev *dev, u32 offset)
 
 	res = (struct mt7915_mcu_eeprom_info *)skb->data;
 	buf = dev->mt76.eeprom.data + le32_to_cpu(res->addr);
-	memcpy(buf, res->data, 16);
+	memcpy(buf, res->data, MT7915_EEPROM_BLOCK_SIZE);
 	dev_kfree_skb(skb);
 
 	return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 3cce464bc2c5..a4b32e0d64e9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -31,6 +31,7 @@
 #define MT7915_ROM_PATCH		"mediatek/mt7915_rom_patch.bin"
 
 #define MT7915_EEPROM_SIZE		3584
+#define MT7915_EEPROM_BLOCK_SIZE	16
 #define MT7915_TOKEN_SIZE		8192
 
 #define MT7915_CFEND_RATE_DEFAULT	0x49	/* OFDM 24M */

From a60951d4faa0ef2e475797dd217c2eaee32ed1c2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 31 May 2021 08:33:18 +0200
Subject: [PATCH 2270/3292] mt76: mt7921: enable hw offloading for wep keys

Enable wep key hw offloading for sta mode. This patch fixes
WoW support for wep connections.

Tested-by: Deren.Wu <deren.wu@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/main.c  | 22 ++++++++++++++-----
 .../wireless/mediatek/mt76/mt7921/mt7921.h    |  2 ++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 175030ec221d..a6cf4a0e286a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -376,6 +376,10 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIE;
 		wcid_keyidx = &wcid->hw_key_idx2;
 		break;
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+		if (!mvif->wep_sta)
+			return -EOPNOTSUPP;
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
@@ -383,8 +387,6 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	case WLAN_CIPHER_SUITE_GCMP_256:
 	case WLAN_CIPHER_SUITE_SMS4:
 		break;
-	case WLAN_CIPHER_SUITE_WEP40:
-	case WLAN_CIPHER_SUITE_WEP104:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -402,6 +404,12 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 			    cmd == SET_KEY ? key : NULL);
 
 	err = mt7921_mcu_add_key(dev, vif, msta, key, cmd);
+	if (err)
+		goto out;
+
+	if (key->cipher == WLAN_CIPHER_SUITE_WEP104 ||
+	    key->cipher == WLAN_CIPHER_SUITE_WEP40)
+		err = mt7921_mcu_add_key(dev, vif, mvif->wep_sta, key, cmd);
 out:
 	mt7921_mutex_release(dev);
 
@@ -608,9 +616,12 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	if (ret)
 		return ret;
 
-	if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls)
-		mt76_connac_mcu_uni_add_bss(&dev->mphy, vif, &mvif->sta.wcid,
-					    true);
+	if (vif->type == NL80211_IFTYPE_STATION) {
+		mvif->wep_sta = msta;
+		if (!sta->tdls)
+			mt76_connac_mcu_uni_add_bss(&dev->mphy, vif,
+						    &mvif->sta.wcid, true);
+	}
 
 	mt7921_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
@@ -640,6 +651,7 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	if (vif->type == NL80211_IFTYPE_STATION) {
 		struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
 
+		mvif->wep_sta = NULL;
 		ewma_rssi_init(&mvif->rssi);
 		if (!sta->tdls)
 			mt76_connac_mcu_uni_add_bss(&dev->mphy, vif,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index a9b21fcc321c..30902231a4bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -102,6 +102,8 @@ struct mt7921_vif {
 	struct mt76_vif mt76; /* must be first */
 
 	struct mt7921_sta sta;
+	struct mt7921_sta *wep_sta;
+
 	struct mt7921_phy *phy;
 
 	struct ewma_rssi rssi;

From 15539a5ba6fc1ee6c84e06b2f4977032ca5be202 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 31 May 2021 17:53:29 +0200
Subject: [PATCH 2271/3292] mt76: mt7921: remove mt7921_get_wtbl_info routine

Since now the fw reports tx rate events without polling,
mt7921_get_wtbl_info and related structures are no longer used.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/mcu.c   |  57 ---------
 .../net/wireless/mediatek/mt76/mt7921/mcu.h   | 115 ------------------
 .../wireless/mediatek/mt76/mt7921/mt7921.h    |   1 -
 3 files changed, 173 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 7b366df4a8ea..ffc83717fd0d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -398,43 +398,6 @@ mt7921_mcu_tx_rate_parse(struct mt76_phy *mphy,
 	}
 }
 
-static void
-mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
-			  u16 wlan_idx)
-{
-	struct mt7921_mcu_wlan_info_event *wtbl_info;
-	struct mt76_phy *mphy = &dev->mphy;
-	struct mt7921_sta_stats *stats;
-	struct rate_info rate = {};
-	struct mt7921_sta *msta;
-	struct mt76_wcid *wcid;
-	u8 idx;
-
-	if (wlan_idx >= MT76_N_WCIDS)
-		return;
-
-	wtbl_info = (struct mt7921_mcu_wlan_info_event *)skb->data;
-	idx = wtbl_info->rate_info.rate_idx;
-	if (idx >= ARRAY_SIZE(wtbl_info->rate_info.rate))
-		return;
-
-	rcu_read_lock();
-
-	wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
-	if (!wcid)
-		goto out;
-
-	msta = container_of(wcid, struct mt7921_sta, wcid);
-	stats = &msta->stats;
-
-	/* current rate */
-	mt7921_mcu_tx_rate_parse(mphy, &wtbl_info->peer_cap, &rate,
-				 le16_to_cpu(wtbl_info->rate_info.rate[idx]));
-	stats->tx_rate = rate;
-out:
-	rcu_read_unlock();
-}
-
 static void
 mt7921_mcu_scan_event(struct mt7921_dev *dev, struct sk_buff *skb)
 {
@@ -1192,26 +1155,6 @@ int mt7921_mcu_get_eeprom(struct mt7921_dev *dev, u32 offset)
 	return 0;
 }
 
-u32 mt7921_get_wtbl_info(struct mt7921_dev *dev, u32 wlan_idx)
-{
-	struct mt7921_mcu_wlan_info wtbl_info = {
-		.wlan_idx = cpu_to_le32(wlan_idx),
-	};
-	struct sk_buff *skb;
-	int ret;
-
-	ret = mt76_mcu_send_and_get_msg(&dev->mt76, MCU_CMD_GET_WTBL,
-					&wtbl_info, sizeof(wtbl_info), true,
-					&skb);
-	if (ret)
-		return ret;
-
-	mt7921_mcu_tx_rate_report(dev, skb, wlan_idx);
-	dev_kfree_skb(skb);
-
-	return 0;
-}
-
 int mt7921_mcu_uni_bss_ps(struct mt7921_dev *dev, struct ieee80211_vif *vif)
 {
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index 22ebef4add00..89fed2f71161 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -255,86 +255,6 @@ struct mt7921_mcu_reg_event {
 	__le32 val;
 } __packed;
 
-struct mt7921_mcu_tx_config {
-	u8 peer_addr[ETH_ALEN];
-	u8 sw;
-	u8 dis_rx_hdr_tran;
-
-	u8 aad_om;
-	u8 pfmu_idx;
-	__le16 partial_aid;
-
-	u8 ibf;
-	u8 ebf;
-	u8 is_ht;
-	u8 is_vht;
-
-	u8 mesh;
-	u8 baf_en;
-	u8 cf_ack;
-	u8 rdg_ba;
-
-	u8 rdg;
-	u8 pm;
-	u8 rts;
-	u8 smps;
-
-	u8 txop_ps;
-	u8 not_update_ipsm;
-	u8 skip_tx;
-	u8 ldpc;
-
-	u8 qos;
-	u8 from_ds;
-	u8 to_ds;
-	u8 dyn_bw;
-
-	u8 amdsu_cross_lg;
-	u8 check_per;
-	u8 gid_63;
-	u8 he;
-
-	u8 vht_ibf;
-	u8 vht_ebf;
-	u8 vht_ldpc;
-	u8 he_ldpc;
-} __packed;
-
-struct mt7921_mcu_sec_config {
-	u8 wpi_flag;
-	u8 rv;
-	u8 ikv;
-	u8 rkv;
-
-	u8 rcid;
-	u8 rca1;
-	u8 rca2;
-	u8 even_pn;
-
-	u8 key_id;
-	u8 muar_idx;
-	u8 cipher_suit;
-	u8 rsv[1];
-} __packed;
-
-struct mt7921_mcu_key_config {
-	u8 key[32];
-} __packed;
-
-struct mt7921_mcu_rate_info {
-	u8 mpdu_fail;
-	u8 mpdu_tx;
-	u8 rate_idx;
-	u8 rsv[1];
-	__le16 rate[8];
-} __packed;
-
-struct mt7921_mcu_ba_config {
-	u8 ba_en;
-	u8 rsv[3];
-	__le32 ba_winsize;
-} __packed;
-
 struct mt7921_mcu_ant_id_config {
 	u8 ant_id[4];
 } __packed;
@@ -358,41 +278,6 @@ struct mt7921_mcu_peer_cap {
 	u8 rsv[1];
 } __packed;
 
-struct mt7921_mcu_rx_cnt {
-	u8 rx_rcpi[4];
-	u8 rx_cc[4];
-	u8 rx_cc_sel;
-	u8 ce_rmsd;
-	u8 rsv[2];
-} __packed;
-
-struct mt7921_mcu_tx_cnt {
-	__le16 rate1_cnt;
-	__le16 rate1_fail_cnt;
-	__le16 rate2_cnt;
-	__le16 rate3_cnt;
-	__le16 cur_bw_tx_cnt;
-	__le16 cur_bw_tx_fail_cnt;
-	__le16 other_bw_tx_cnt;
-	__le16 other_bw_tx_fail_cnt;
-} __packed;
-
-struct mt7921_mcu_wlan_info_event {
-	struct mt7921_mcu_tx_config tx_config;
-	struct mt7921_mcu_sec_config sec_config;
-	struct mt7921_mcu_key_config key_config;
-	struct mt7921_mcu_rate_info rate_info;
-	struct mt7921_mcu_ba_config ba_config;
-	struct mt7921_mcu_peer_cap peer_cap;
-	struct mt7921_mcu_rx_cnt rx_cnt;
-	struct mt7921_mcu_tx_cnt tx_cnt;
-} __packed;
-
-struct mt7921_mcu_wlan_info {
-	__le32 wlan_idx;
-	struct mt7921_mcu_wlan_info_event event;
-} __packed;
-
 struct mt7921_txpwr_req {
 	u8 ver;
 	u8 action;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 30902231a4bf..8aa8d2ecdffa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -367,7 +367,6 @@ int mt7921_mcu_uni_rx_ba(struct mt7921_dev *dev,
 			 struct ieee80211_ampdu_params *params,
 			 bool enable);
 void mt7921_scan_work(struct work_struct *work);
-u32 mt7921_get_wtbl_info(struct mt7921_dev *dev, u32 wlan_idx);
 int mt7921_mcu_uni_bss_ps(struct mt7921_dev *dev, struct ieee80211_vif *vif);
 int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 			     bool enable);

From 9f367c81de94a8171f7149f14a5f740f0009dd27 Mon Sep 17 00:00:00 2001
From: Deren Wu <deren.wu@mediatek.com>
Date: Tue, 1 Jun 2021 01:01:22 +0800
Subject: [PATCH 2272/3292] mt76: mt7921: enable random mac address during
 sched_scan

Enable src address randomization during scheduled scanning

Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   | 14 ++++++++------
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   |  4 +++-
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |  3 ++-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 9066c57b1e32..a5312d225d93 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1519,14 +1519,16 @@ int mt76_connac_mcu_sched_scan_req(struct mt76_phy *phy,
 	req->version = 1;
 	req->seq_num = mvif->scan_seq_num | ext_phy << 7;
 
-	if (is_mt7663(phy->dev) &&
-	    (sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) {
-		get_random_mask_addr(req->mt7663.random_mac, sreq->mac_addr,
-				     sreq->mac_addr_mask);
+	if (sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+		u8 *addr = is_mt7663(phy->dev) ? req->mt7663.random_mac
+					       : req->mt7921.random_mac;
+
 		req->scan_func = 1;
-	} else if (is_mt7921(phy->dev)) {
-		req->mt7921.bss_idx = mvif->idx;
+		get_random_mask_addr(addr, sreq->mac_addr,
+				     sreq->mac_addr_mask);
 	}
+	if (is_mt7921(phy->dev))
+		req->mt7921.bss_idx = mvif->idx;
 
 	req->ssids_num = sreq->n_ssids;
 	for (i = 0; i < req->ssids_num; i++) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 13f7d6a57889..b574d444ddca 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -779,7 +779,9 @@ struct mt76_connac_sched_scan_req {
 		} mt7663;
 		struct {
 			u8 bss_idx;
-			u8 pad2[63];
+			u8 pad2[19];
+			u8 random_mac[ETH_ALEN];
+			u8 pad3[38];
 		} mt7921;
 	};
 } __packed;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 9253706c24a0..06fca78fe149 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -76,7 +76,8 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
 	wiphy->reg_notifier = mt7921_regd_notifier;
 
-	wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+	wiphy->features |= NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
+			   NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_SET_SCAN_DWELL);
 
 	ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS);

From 798bffd8004fd10c084131b07519d0f6f0c2fac0 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Wed, 2 Jun 2021 09:50:17 +0800
Subject: [PATCH 2273/3292] mt76: mt7915: setup drr group for peers

This is a prerequisite for MU functionality.

Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 059da720d957..e28396938ce9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2353,6 +2353,32 @@ int mt7915_mcu_add_he(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 				     MCU_EXT_CMD(STA_REC_UPDATE), true);
 }
 
+static int
+mt7915_mcu_add_group(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		     struct ieee80211_sta *sta)
+{
+#define MT_STA_BSS_GROUP		1
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct {
+		__le32 action;
+		u8 wlan_idx_lo;
+		u8 status;
+		u8 wlan_idx_hi;
+		u8 rsv0[5];
+		__le32 val;
+		u8 rsv1[8];
+	} __packed req = {
+		.action = cpu_to_le32(MT_STA_BSS_GROUP),
+		.wlan_idx_lo = to_wcid_lo(msta->wcid.idx),
+		.wlan_idx_hi = to_wcid_hi(msta->wcid.idx),
+		.val = cpu_to_le32(mvif->idx % 16),
+	};
+
+	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(SET_DRR_CTRL), &req,
+				 sizeof(req), true);
+}
+
 int mt7915_mcu_add_sta_adv(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta, bool enable)
 {
@@ -2362,6 +2388,10 @@ int mt7915_mcu_add_sta_adv(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		return 0;
 
 	/* must keep the order */
+	ret = mt7915_mcu_add_group(dev, vif, sta);
+	if (ret)
+		return ret;
+
 	ret = mt7915_mcu_add_txbf(dev, vif, sta, enable);
 	if (ret)
 		return ret;

From e3343d0fef6a368cf5b5b34e99e4a4768ee51242 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Wed, 2 Jun 2021 11:04:03 +0800
Subject: [PATCH 2274/3292] mt76: mt7615: update radar parameters

Patch radar parameters to match the SDK to avoid possible false alarms.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 17 ++++++++++-------
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c |  4 ++++
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 4daa0540e051..f540b6188ba1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -20,7 +20,7 @@
 #define to_rssi(field, rxv)		((FIELD_GET(field, rxv) - 220) / 2)
 
 static const struct mt7615_dfs_radar_spec etsi_radar_specs = {
-	.pulse_th = { 40, -10, -80, 800, 3360, 128, 5200 },
+	.pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 },
 	.radar_pattern = {
 		[5] =  { 1, 0,  6, 32, 28, 0, 17,  990, 5010, 1, 1 },
 		[6] =  { 1, 0,  9, 32, 28, 0, 27,  615, 5010, 1, 1 },
@@ -34,7 +34,7 @@ static const struct mt7615_dfs_radar_spec etsi_radar_specs = {
 };
 
 static const struct mt7615_dfs_radar_spec fcc_radar_specs = {
-	.pulse_th = { 40, -10, -80, 800, 3360, 128, 5200 },
+	.pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 },
 	.radar_pattern = {
 		[0] = { 1, 0,  9,  32, 28, 0, 13, 508, 3076, 1,  1 },
 		[1] = { 1, 0, 12,  32, 28, 0, 17, 140,  240, 1,  1 },
@@ -45,7 +45,7 @@ static const struct mt7615_dfs_radar_spec fcc_radar_specs = {
 };
 
 static const struct mt7615_dfs_radar_spec jp_radar_specs = {
-	.pulse_th = { 40, -10, -80, 800, 3360, 128, 5200 },
+	.pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 },
 	.radar_pattern = {
 		[0] =  { 1, 0,  8, 32, 28, 0, 13,  508, 3076, 1,  1 },
 		[1] =  { 1, 0, 12, 32, 28, 0, 17,  140,  240, 1,  1 },
@@ -2047,14 +2047,12 @@ mt7615_dfs_init_radar_specs(struct mt7615_phy *phy)
 {
 	const struct mt7615_dfs_radar_spec *radar_specs;
 	struct mt7615_dev *dev = phy->dev;
-	int err, i;
+	int err, i, lpn = 500;
 
 	switch (dev->mt76.region) {
 	case NL80211_DFS_FCC:
 		radar_specs = &fcc_radar_specs;
-		err = mt7615_mcu_set_fcc5_lpn(dev, 8);
-		if (err < 0)
-			return err;
+		lpn = 8;
 		break;
 	case NL80211_DFS_ETSI:
 		radar_specs = &etsi_radar_specs;
@@ -2066,6 +2064,11 @@ mt7615_dfs_init_radar_specs(struct mt7615_phy *phy)
 		return -EINVAL;
 	}
 
+	/* avoid FCC radar detection in non-FCC region */
+	err = mt7615_mcu_set_fcc5_lpn(dev, lpn);
+	if (err < 0)
+		return err;
+
 	for (i = 0; i < ARRAY_SIZE(radar_specs->radar_pattern); i++) {
 		err = mt7615_mcu_set_radar_th(dev, i,
 					      &radar_specs->radar_pattern[i]);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index fc9cd8da2a11..cf4a4f2637d5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -430,6 +430,10 @@ mt7615_mcu_rx_radar_detected(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	r = (struct mt7615_mcu_rdd_report *)skb->data;
 
+	if (!dev->radar_pattern.n_pulses && !r->long_detected &&
+	    !r->constant_prf_detected && !r->staggered_prf_detected)
+		return;
+
 	if (r->band_idx && dev->mt76.phy2)
 		mphy = dev->mt76.phy2;
 

From ee8ba94f9cc9afab570fd71ad421292f6360983c Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Mon, 17 May 2021 12:45:58 +0800
Subject: [PATCH 2275/3292] mt76: mt7915: fix MT_EE_CAL_GROUP_SIZE

Fix wrong offset for pre-calibration data.

Fixes: 495184ac91bb ("mt76: mt7915: add support for applying pre-calibration data")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
index 7896e983209a..a43389a41800 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
@@ -33,7 +33,7 @@ enum mt7915_eeprom_field {
 #define MT_EE_WIFI_CAL_GROUP			BIT(0)
 #define MT_EE_WIFI_CAL_DPD			GENMASK(2, 1)
 #define MT_EE_CAL_UNIT				1024
-#define MT_EE_CAL_GROUP_SIZE			(44 * MT_EE_CAL_UNIT)
+#define MT_EE_CAL_GROUP_SIZE			(49 * MT_EE_CAL_UNIT + 16)
 #define MT_EE_CAL_DPD_SIZE			(54 * MT_EE_CAL_UNIT)
 
 #define MT_EE_WIFI_CONF0_TX_PATH		GENMASK(2, 0)

From 435d68f9cffda3ea3c6f65897ddac4357bcb8fd8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 6 Jun 2021 09:45:54 +0200
Subject: [PATCH 2276/3292] mt76: mt7921: enable VHT BFee capability

Enables VHT beamformee functionality

Signed-off-by: Leon Yen <Leon.Yen@mediatek.com>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 06fca78fe149..2d682e59ab52 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -215,7 +215,11 @@ int mt7921_register_device(struct mt7921_dev *dev)
 			IEEE80211_HT_CAP_MAX_AMSDU;
 	dev->mphy.sband_5g.sband.vht_cap.cap |=
 			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
-			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK |
+			IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+			IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE |
+			(3 << IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT);
+
 	dev->mphy.hw->wiphy->available_antennas_rx = dev->mphy.chainmask;
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->mphy.chainmask;
 

From 82453b1cbf9ef166364c12b5464251f16bac5f51 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 2 Jun 2021 18:00:14 +0200
Subject: [PATCH 2277/3292] mt76: connac: fix UC entry is being overwritten

Fix UC entry is being overwritten by BC entry

Tested-by: Deren Wu <deren.wu@mediatek.com>
Co-developed-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c      |  8 +++++---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 10 ++++++----
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h |  1 +
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c      |  1 +
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index cf4a4f2637d5..ea1f23e99ca1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1148,12 +1148,14 @@ mt7615_mcu_sta_rx_ba(struct mt7615_dev *dev,
 
 static int
 __mt7615_mcu_add_sta(struct mt76_phy *phy, struct ieee80211_vif *vif,
-		     struct ieee80211_sta *sta, bool enable, int cmd)
+		     struct ieee80211_sta *sta, bool enable, int cmd,
+		     bool offload_fw)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct mt76_sta_cmd_info info = {
 		.sta = sta,
 		.vif = vif,
+		.offload_fw = offload_fw,
 		.enable = enable,
 		.cmd = cmd,
 	};
@@ -1167,7 +1169,7 @@ mt7615_mcu_add_sta(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		   struct ieee80211_sta *sta, bool enable)
 {
 	return __mt7615_mcu_add_sta(phy->mt76, vif, sta, enable,
-				    MCU_EXT_CMD_STA_REC_UPDATE);
+				    MCU_EXT_CMD_STA_REC_UPDATE, false);
 }
 
 static int
@@ -1302,7 +1304,7 @@ mt7615_mcu_uni_add_sta(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta, bool enable)
 {
 	return __mt7615_mcu_add_sta(phy->mt76, vif, sta, enable,
-				    MCU_UNI_CMD_STA_REC_UPDATE);
+				    MCU_UNI_CMD_STA_REC_UPDATE, true);
 }
 
 static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index a5312d225d93..4b22625a1d4d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -879,10 +879,12 @@ int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
-	mt76_connac_mcu_sta_basic_tlv(skb, info->vif, info->sta, info->enable);
-	if (info->enable && info->sta)
-		mt76_connac_mcu_sta_tlv(phy, skb, info->sta, info->vif,
-					info->rcpi);
+	if (info->sta || !info->offload_fw)
+		mt76_connac_mcu_sta_basic_tlv(skb, info->vif, info->sta,
+					      info->enable);
+	if (info->sta && info->enable)
+		mt76_connac_mcu_sta_tlv(phy, skb, info->sta,
+					info->vif, info->rcpi);
 
 	sta_wtbl = mt76_connac_mcu_add_tlv(skb, STA_REC_WTBL,
 					   sizeof(struct tlv));
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index b574d444ddca..f12c304958c0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -910,6 +910,7 @@ struct mt76_sta_cmd_info {
 
 	struct ieee80211_vif *vif;
 
+	bool offload_fw;
 	bool enable;
 	int cmd;
 	u8 rcpi;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index ffc83717fd0d..bd94d1244975 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1277,6 +1277,7 @@ int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
 		.vif = vif,
 		.enable = enable,
 		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
+		.offload_fw = true,
 		.rcpi = to_rcpi(rssi),
 	};
 	struct mt7921_sta *msta;

From 6ab079e2aba283e3e356cc60dd3d0648adc15b1d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 2 Jun 2021 23:25:28 +0200
Subject: [PATCH 2278/3292] mt76: connac: add mt76_connac_power_save_sched in
 mt76_connac_pm_unref

Schedule power_save work running mt76_connac_pm_unref in order to reduce
power consumption

Tested-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c      | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt7615/main.c     | 6 +++---
 drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c | 2 +-
 drivers/net/wireless/mediatek/mt76/mt76_connac.h     | 8 ++++++--
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c      | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt7921/main.c     | 4 ++--
 6 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index 8004ae5c16a9..b6184234cad2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -81,7 +81,7 @@ static int mt7615_poll_tx(struct napi_struct *napi, int budget)
 	if (napi_complete(napi))
 		mt7615_irq_enable(dev, mt7615_tx_mcu_int_mask(dev));
 
-	mt76_connac_pm_unref(&dev->pm);
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 
 	return 0;
 }
@@ -99,7 +99,7 @@ static int mt7615_poll_rx(struct napi_struct *napi, int budget)
 		return 0;
 	}
 	done = mt76_dma_rx_poll(napi, budget);
-	mt76_connac_pm_unref(&dev->pm);
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 
 	return done;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 7c9a55c57578..bd2f42ef5ad7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -693,7 +693,7 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw,
 	msta->n_rates = i;
 	if (mt76_connac_pm_ref(phy->mt76, &dev->pm)) {
 		mt7615_mac_set_rates(phy, msta, NULL, msta->rates);
-		mt76_connac_pm_unref(&dev->pm);
+		mt76_connac_pm_unref(phy->mt76, &dev->pm);
 	}
 	spin_unlock_bh(&dev->mt76.lock);
 }
@@ -709,7 +709,7 @@ void mt7615_tx_worker(struct mt76_worker *w)
 	}
 
 	mt76_tx_worker_run(&dev->mt76);
-	mt76_connac_pm_unref(&dev->pm);
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 }
 
 static void mt7615_tx(struct ieee80211_hw *hw,
@@ -739,7 +739,7 @@ static void mt7615_tx(struct ieee80211_hw *hw,
 
 	if (mt76_connac_pm_ref(mphy, &dev->pm)) {
 		mt76_tx(mphy, control->sta, wcid, skb);
-		mt76_connac_pm_unref(&dev->pm);
+		mt76_connac_pm_unref(mphy, &dev->pm);
 		return;
 	}
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 261cff78de40..a2465b49ecd0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -98,7 +98,7 @@ mt7615_led_set_config(struct led_classdev *led_cdev,
 	addr = mt7615_reg_map(dev, MT_LED_CTRL);
 	mt76_wr(dev, addr, val);
 
-	mt76_connac_pm_unref(&dev->pm);
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 }
 
 static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 63c1d1a68a70..0dfa09902ffd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -117,11 +117,15 @@ out:
 }
 
 static inline void
-mt76_connac_pm_unref(struct mt76_connac_pm *pm)
+mt76_connac_pm_unref(struct mt76_phy *phy, struct mt76_connac_pm *pm)
 {
 	spin_lock_bh(&pm->wake.lock);
-	pm->wake.count--;
+
 	pm->last_activity = jiffies;
+	if (--pm->wake.count == 0 &&
+	    test_bit(MT76_STATE_MCU_RUNNING, &phy->state))
+		mt76_connac_power_save_sched(phy, pm);
+
 	spin_unlock_bh(&pm->wake.lock);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index 7fca7dc466b8..5e745e9c2185 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -74,7 +74,7 @@ static int mt7921_poll_tx(struct napi_struct *napi, int budget)
 	mt7921_tx_cleanup(dev);
 	if (napi_complete(napi))
 		mt7921_irq_enable(dev, MT_INT_TX_DONE_ALL);
-	mt76_connac_pm_unref(&dev->pm);
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 
 	return 0;
 }
@@ -92,7 +92,7 @@ static int mt7921_poll_rx(struct napi_struct *napi, int budget)
 		return 0;
 	}
 	done = mt76_dma_rx_poll(napi, budget);
-	mt76_connac_pm_unref(&dev->pm);
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 
 	return done;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index a6cf4a0e286a..0cd519f6dc00 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -679,7 +679,7 @@ void mt7921_tx_worker(struct mt76_worker *w)
 	}
 
 	mt76_txq_schedule_all(&dev->mphy);
-	mt76_connac_pm_unref(&dev->pm);
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 }
 
 static void mt7921_tx(struct ieee80211_hw *hw,
@@ -709,7 +709,7 @@ static void mt7921_tx(struct ieee80211_hw *hw,
 
 	if (mt76_connac_pm_ref(mphy, &dev->pm)) {
 		mt76_tx(mphy, control->sta, wcid, skb);
-		mt76_connac_pm_unref(&dev->pm);
+		mt76_connac_pm_unref(mphy, &dev->pm);
 		return;
 	}
 

From 271fa685365842962f56651c9d1a33a0d0d3b30b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 5 Jun 2021 13:46:03 +0200
Subject: [PATCH 2279/3292] mt76: mt7921: wake the device before dumping power
 table

Always wake the device up before dumping the single_sku power table
otherwise the device can hang.

Fixes: ea29acc97c555 ("mt76: mt7921: add dumping Tx power table")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 6ee423dd4027..6602903c0d02 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -184,7 +184,10 @@ mt7921_txpwr(struct seq_file *s, void *data)
 	struct mt7921_txpwr txpwr;
 	int ret;
 
+	mt7921_mutex_acquire(dev);
 	ret = mt7921_get_txpwr_info(dev, &txpwr);
+	mt7921_mutex_release(dev);
+
 	if (ret)
 		return ret;
 

From 5bc52dee44f667507f6d54c70cd22bd2fa52e26b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 5 Jun 2021 15:12:48 +0200
Subject: [PATCH 2280/3292] mt76: mt7921: make mt7921_set_channel static

Make mt7921_set_channel routine static since it is only used in main.c

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 0cd519f6dc00..5fc6cf7e5455 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -316,7 +316,7 @@ static void mt7921_remove_interface(struct ieee80211_hw *hw,
 	spin_unlock_bh(&dev->sta_poll_lock);
 }
 
-int mt7921_set_channel(struct mt7921_phy *phy)
+static int mt7921_set_channel(struct mt7921_phy *phy)
 {
 	struct mt7921_dev *dev = phy->dev;
 	int ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 8aa8d2ecdffa..087067e7ea5b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -261,7 +261,6 @@ int mt7921_mcu_init(struct mt7921_dev *dev);
 int mt7921_mcu_add_key(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 		       struct mt7921_sta *msta, struct ieee80211_key_conf *key,
 		       enum set_key_cmd cmd);
-int mt7921_set_channel(struct mt7921_phy *phy);
 int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
 		       struct ieee80211_vif *vif, bool enable);
 int mt7921_mcu_set_chan_info(struct mt7921_phy *phy, int cmd);

From f7d2958ca4614a53b155b9ac37c400b216357394 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 6 Jun 2021 15:18:12 +0200
Subject: [PATCH 2281/3292] mt76: connac: add
 mt76_connac_mcu_get_nic_capability utility routine

Introduce mt76_connac_mcu_get_nic_capability utility routine to poll
device capabilities returned by mcu fw for CE devices (mt7663/mt7921).
This is a preliminary patch to introduce 6GHz support.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
 .../wireless/mediatek/mt76/mt76_connac_mcu.c  | 54 +++++++++++++++++++
 .../wireless/mediatek/mt76/mt76_connac_mcu.h  | 24 +++++++++
 .../net/wireless/mediatek/mt76/mt7921/mcu.c   |  2 +-
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index ebacd55cb0cd..338219024ba7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -320,6 +320,7 @@ enum {
 struct mt76_hw_cap {
 	bool has_2ghz;
 	bool has_5ghz;
+	bool has_6ghz;
 };
 
 #define MT_DRV_TXWI_NO_FREE		BIT(0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 4b22625a1d4d..d7d7cede955b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1630,6 +1630,60 @@ void mt76_connac_mcu_coredump_event(struct mt76_dev *dev, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_coredump_event);
 
+int mt76_connac_mcu_get_nic_capability(struct mt76_phy *phy)
+{
+	struct mt76_connac_cap_hdr {
+		__le16 n_element;
+		u8 rsv[2];
+	} __packed * hdr;
+	struct sk_buff *skb;
+	int ret, i;
+
+	ret = mt76_mcu_send_and_get_msg(phy->dev, MCU_CMD_GET_NIC_CAPAB, NULL,
+					0, true, &skb);
+	if (ret)
+		return ret;
+
+	hdr = (struct mt76_connac_cap_hdr *)skb->data;
+	if (skb->len < sizeof(*hdr)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	skb_pull(skb, sizeof(*hdr));
+
+	for (i = 0; i < le16_to_cpu(hdr->n_element); i++) {
+		struct tlv_hdr {
+			__le32 type;
+			__le32 len;
+		} __packed * tlv = (struct tlv_hdr *)skb->data;
+		int len;
+
+		if (skb->len < sizeof(*tlv))
+			break;
+
+		skb_pull(skb, sizeof(*tlv));
+
+		len = le32_to_cpu(tlv->len);
+		if (skb->len < len)
+			break;
+
+		switch (le32_to_cpu(tlv->type)) {
+		case MT_NIC_CAP_6G:
+			phy->cap.has_6ghz = skb->data[0];
+			break;
+		default:
+			break;
+		}
+		skb_pull(skb, len);
+	}
+out:
+	dev_kfree_skb(skb);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mt76_connac_mcu_get_nic_capability);
+
 static void
 mt76_connac_mcu_build_sku(struct mt76_dev *dev, s8 *sku,
 			  struct mt76_power_limits *limits,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index f12c304958c0..549e2ab95563 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -559,6 +559,7 @@ enum {
 	MCU_CMD_SET_RATE_TX_POWER = MCU_CE_PREFIX | 0x5d,
 	MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61,
 	MCU_CMD_SCHED_SCAN_REQ = MCU_CE_PREFIX | 0x62,
+	MCU_CMD_GET_NIC_CAPAB = MCU_CE_PREFIX | 0x8a,
 	MCU_CMD_REG_WRITE = MCU_CE_PREFIX | 0xc0,
 	MCU_CMD_REG_READ = MCU_CE_PREFIX | MCU_QUERY_MASK | 0xc0,
 	MCU_CMD_CHIP_CONFIG = MCU_CE_PREFIX | 0xca,
@@ -591,6 +592,28 @@ enum {
 	UNI_OFFLOAD_OFFLOAD_BMC_RPY_DETECT,
 };
 
+enum {
+	MT_NIC_CAP_TX_RESOURCE,
+	MT_NIC_CAP_TX_EFUSE_ADDR,
+	MT_NIC_CAP_COEX,
+	MT_NIC_CAP_SINGLE_SKU,
+	MT_NIC_CAP_CSUM_OFFLOAD,
+	MT_NIC_CAP_HW_VER,
+	MT_NIC_CAP_SW_VER,
+	MT_NIC_CAP_MAC_ADDR,
+	MT_NIC_CAP_PHY,
+	MT_NIC_CAP_MAC,
+	MT_NIC_CAP_FRAME_BUF,
+	MT_NIC_CAP_BEAM_FORM,
+	MT_NIC_CAP_LOCATION,
+	MT_NIC_CAP_MUMIMO,
+	MT_NIC_CAP_BUFFER_MODE_INFO,
+	MT_NIC_CAP_HW_ADIE_VERSION = 0x14,
+	MT_NIC_CAP_ANTSWP = 0x16,
+	MT_NIC_CAP_WFDMA_REALLOC,
+	MT_NIC_CAP_6G,
+};
+
 #define UNI_WOW_DETECT_TYPE_MAGIC		BIT(0)
 #define UNI_WOW_DETECT_TYPE_ANY			BIT(1)
 #define UNI_WOW_DETECT_TYPE_DISCONNECT		BIT(2)
@@ -1032,6 +1055,7 @@ int mt76_connac_mcu_init_download(struct mt76_dev *dev, u32 addr, u32 len,
 int mt76_connac_mcu_start_patch(struct mt76_dev *dev);
 int mt76_connac_mcu_patch_sem_ctrl(struct mt76_dev *dev, bool get);
 int mt76_connac_mcu_start_firmware(struct mt76_dev *dev, u32 addr, u32 option);
+int mt76_connac_mcu_get_nic_capability(struct mt76_phy *phy);
 
 int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
 			    struct ieee80211_scan_request *scan_req);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index bd94d1244975..ca481e37d22c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -988,7 +988,7 @@ int mt7921_run_firmware(struct mt7921_dev *dev)
 	set_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
 	mt7921_mcu_fw_log_2_host(dev, 1);
 
-	return 0;
+	return mt76_connac_mcu_get_nic_capability(&dev->mphy);
 }
 
 int mt7921_mcu_init(struct mt7921_dev *dev)

From a0d65f627ba0fc7d93fddda3c11d3543dbe1c425 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Tue, 8 Jun 2021 14:55:57 +0800
Subject: [PATCH 2282/3292] mt76: testmode: move chip-specific stats dump
 before common stats

Move chip-specific stats dumping part before common stats dumping
to provide flexibility for per-chip driver to modify the value of
common stats.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/testmode.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/testmode.c b/drivers/net/wireless/mediatek/mt76/testmode.c
index c6a85a0cfc89..f73ffbd6e622 100644
--- a/drivers/net/wireless/mediatek/mt76/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/testmode.c
@@ -521,6 +521,14 @@ mt76_testmode_dump_stats(struct mt76_phy *phy, struct sk_buff *msg)
 	u64 rx_fcs_error = 0;
 	int i;
 
+	if (dev->test_ops->dump_stats) {
+		int ret;
+
+		ret = dev->test_ops->dump_stats(phy, msg);
+		if (ret)
+			return ret;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(td->rx_stats.packets); i++) {
 		rx_packets += td->rx_stats.packets[i];
 		rx_fcs_error += td->rx_stats.fcs_error[i];
@@ -535,9 +543,6 @@ mt76_testmode_dump_stats(struct mt76_phy *phy, struct sk_buff *msg)
 			      MT76_TM_STATS_ATTR_PAD))
 		return -EMSGSIZE;
 
-	if (dev->test_ops->dump_stats)
-		return dev->test_ops->dump_stats(phy, msg);
-
 	return 0;
 }
 

From 89043529c8b833d87391f1844e9d1cc1643393eb Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Tue, 8 Jun 2021 14:55:58 +0800
Subject: [PATCH 2283/3292] mt76: mt7915: fix rx fcs error count in testmode

FCS error packets are filtered by default and won't be reported to
driver, so that RX fcs error and PER in testmode always show zero.
Fix this issue by reading fcs error count from hw counter.

We did't fix this issue by disabling fcs error rx filter since it may
let HW suffer some SER errors.

Fixes: 5d8a83f09941 ("mt76: mt7915: implement testmode rx support")
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7915/testmode.c  | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
index f9d81e36ef09..b220b334906b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
@@ -464,10 +464,17 @@ mt7915_tm_set_tx_frames(struct mt7915_phy *phy, bool en)
 static void
 mt7915_tm_set_rx_frames(struct mt7915_phy *phy, bool en)
 {
-	if (en)
+	mt7915_tm_set_trx(phy, TM_MAC_RX_RXV, false);
+
+	if (en) {
+		struct mt7915_dev *dev = phy->dev;
+
 		mt7915_tm_update_channel(phy);
 
-	mt7915_tm_set_trx(phy, TM_MAC_RX_RXV, en);
+		/* read-clear */
+		mt76_rr(dev, MT_MIB_SDR3(phy != &dev->phy));
+		mt7915_tm_set_trx(phy, TM_MAC_RX_RXV, en);
+	}
 }
 
 static int
@@ -690,7 +697,11 @@ static int
 mt7915_tm_dump_stats(struct mt76_phy *mphy, struct sk_buff *msg)
 {
 	struct mt7915_phy *phy = mphy->priv;
+	struct mt7915_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	enum mt76_rxq_id q;
 	void *rx, *rssi;
+	u16 fcs_err;
 	int i;
 
 	rx = nla_nest_start(msg, MT76_TM_STATS_ATTR_LAST_RX);
@@ -735,6 +746,12 @@ mt7915_tm_dump_stats(struct mt76_phy *mphy, struct sk_buff *msg)
 
 	nla_nest_end(msg, rx);
 
+	fcs_err = mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
+				 MT_MIB_SDR3_FCS_ERR_MASK);
+	q = ext_phy ? MT_RXQ_EXT : MT_RXQ_MAIN;
+	mphy->test.rx_stats.packets[q] += fcs_err;
+	mphy->test.rx_stats.fcs_error[q] += fcs_err;
+
 	return 0;
 }
 

From abded041a07467c2f3dfe10afd9ea10572c63cc9 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Wed, 9 Jun 2021 14:15:32 +0800
Subject: [PATCH 2284/3292] mt76: connac: fix the maximum interval schedule
 scan can support

Maximum interval (in seconds) for schedule scan plan supported by
the offload firmware can be U16_MAX.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c     | 2 +-
 drivers/net/wireless/mediatek/mt76/mt76_connac.h     | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/init.c     | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 4aa7877a6383..2f1ac644e018 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -383,7 +383,7 @@ mt7615_init_wiphy(struct ieee80211_hw *hw)
 	wiphy->reg_notifier = mt7615_regd_notifier;
 
 	wiphy->max_sched_scan_plan_interval =
-		MT76_CONNAC_MAX_SCHED_SCAN_INTERVAL;
+		MT76_CONNAC_MAX_TIME_SCHED_SCAN_INTERVAL;
 	wiphy->max_sched_scan_ie_len = IEEE80211_MAX_DATA_LEN;
 	wiphy->max_scan_ie_len = MT76_CONNAC_SCAN_IE_LEN;
 	wiphy->max_sched_scan_ssids = MT76_CONNAC_MAX_SCHED_SCAN_SSID;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 0dfa09902ffd..9b3f8d22f17e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -7,7 +7,8 @@
 #include "mt76.h"
 
 #define MT76_CONNAC_SCAN_IE_LEN			600
-#define MT76_CONNAC_MAX_SCHED_SCAN_INTERVAL	10
+#define MT76_CONNAC_MAX_NUM_SCHED_SCAN_INTERVAL	 10
+#define MT76_CONNAC_MAX_TIME_SCHED_SCAN_INTERVAL U16_MAX
 #define MT76_CONNAC_MAX_SCHED_SCAN_SSID		10
 #define MT76_CONNAC_MAX_SCAN_MATCH		16
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 549e2ab95563..9f3c87902db8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -794,7 +794,7 @@ struct mt76_connac_sched_scan_req {
 	u8 intervals_num;
 	u8 scan_func; /* MT7663: BIT(0) eable random mac address */
 	struct mt76_connac_mcu_scan_channel channels[64];
-	__le16 intervals[MT76_CONNAC_MAX_SCHED_SCAN_INTERVAL];
+	__le16 intervals[MT76_CONNAC_MAX_NUM_SCHED_SCAN_INTERVAL];
 	union {
 		struct {
 			u8 random_mac[ETH_ALEN];
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 2d682e59ab52..b399f3b8b5d7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -68,7 +68,7 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	wiphy->max_scan_ie_len = MT76_CONNAC_SCAN_IE_LEN;
 	wiphy->max_scan_ssids = 4;
 	wiphy->max_sched_scan_plan_interval =
-		MT76_CONNAC_MAX_SCHED_SCAN_INTERVAL;
+		MT76_CONNAC_MAX_TIME_SCHED_SCAN_INTERVAL;
 	wiphy->max_sched_scan_ie_len = IEEE80211_MAX_DATA_LEN;
 	wiphy->max_sched_scan_ssids = MT76_CONNAC_MAX_SCHED_SCAN_SSID;
 	wiphy->max_match_sets = MT76_CONNAC_MAX_SCAN_MATCH;

From 49c9a263d76a0cf2bb0e36f193036b45545d4eee Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 9 Jun 2021 17:13:58 +0200
Subject: [PATCH 2285/3292] mt76: reduce rx buffer size to 2048

Reduce rx buffer size to 2048 for mt7921/mt7915/mt7615 since we
now support rx amsdu offload

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c |  9 ++-------
 drivers/net/wireless/mediatek/mt76/mt7915/dma.c | 12 +++++-------
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c |  8 +++-----
 3 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index b6184234cad2..00aefea1bf61 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -222,14 +222,9 @@ void mt7615_dma_start(struct mt7615_dev *dev)
 int mt7615_dma_init(struct mt7615_dev *dev)
 {
 	int rx_ring_size = MT7615_RX_RING_SIZE;
-	int rx_buf_size = MT_RX_BUF_SIZE;
 	u32 mask;
 	int ret;
 
-	/* Increase buffer size to receive large VHT MPDUs */
-	if (dev->mphy.cap.has_5ghz)
-		rx_buf_size *= 2;
-
 	mt76_dma_attach(&dev->mt76);
 
 	mt76_wr(dev, MT_WPDMA_GLO_CFG,
@@ -270,7 +265,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 
 	/* init rx queues */
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU], 1,
-			       MT7615_RX_MCU_RING_SIZE, rx_buf_size,
+			       MT7615_RX_MCU_RING_SIZE, MT_RX_BUF_SIZE,
 			       MT_RX_RING_BASE);
 	if (ret)
 		return ret;
@@ -279,7 +274,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 	    rx_ring_size /= 2;
 
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MAIN], 0,
-			       rx_ring_size, rx_buf_size, MT_RX_RING_BASE);
+			       rx_ring_size, MT_RX_BUF_SIZE, MT_RX_RING_BASE);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
index 69a7e3dce113..9182568f95c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
@@ -79,8 +79,6 @@ void mt7915_dma_prefetch(struct mt7915_dev *dev)
 
 int mt7915_dma_init(struct mt7915_dev *dev)
 {
-	/* Increase buffer size to receive large VHT/HE MPDUs */
-	int rx_buf_size = MT_RX_BUF_SIZE * 2;
 	u32 hif1_ofs = 0;
 	int ret;
 
@@ -144,28 +142,28 @@ int mt7915_dma_init(struct mt7915_dev *dev)
 	/* event from WM */
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU],
 			       MT7915_RXQ_MCU_WM, MT7915_RX_MCU_RING_SIZE,
-			       rx_buf_size, MT_RX_EVENT_RING_BASE);
+			       MT_RX_BUF_SIZE, MT_RX_EVENT_RING_BASE);
 	if (ret)
 		return ret;
 
 	/* event from WA */
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU_WA],
 			       MT7915_RXQ_MCU_WA, MT7915_RX_MCU_RING_SIZE,
-			       rx_buf_size, MT_RX_EVENT_RING_BASE);
+			       MT_RX_BUF_SIZE, MT_RX_EVENT_RING_BASE);
 	if (ret)
 		return ret;
 
 	/* rx data queue */
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MAIN],
 			       MT7915_RXQ_BAND0, MT7915_RX_RING_SIZE,
-			       rx_buf_size, MT_RX_DATA_RING_BASE);
+			       MT_RX_BUF_SIZE, MT_RX_DATA_RING_BASE);
 	if (ret)
 		return ret;
 
 	if (dev->dbdc_support) {
 		ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_EXT],
 				       MT7915_RXQ_BAND1, MT7915_RX_RING_SIZE,
-				       rx_buf_size,
+				       MT_RX_BUF_SIZE,
 				       MT_RX_DATA_RING_BASE + hif1_ofs);
 		if (ret)
 			return ret;
@@ -174,7 +172,7 @@ int mt7915_dma_init(struct mt7915_dev *dev)
 		ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_EXT_WA],
 				       MT7915_RXQ_MCU_WA_EXT,
 				       MT7915_RX_MCU_RING_SIZE,
-				       rx_buf_size,
+				       MT_RX_BUF_SIZE,
 				       MT_RX_EVENT_RING_BASE + hif1_ofs);
 		if (ret)
 			return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index 5e745e9c2185..7d7d43a5422f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -380,9 +380,7 @@ int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev)
 
 int mt7921_dma_init(struct mt7921_dev *dev)
 {
-	/* Increase buffer size to receive large VHT/HE MPDUs */
 	struct mt76_bus_ops *bus_ops;
-	int rx_buf_size = MT_RX_BUF_SIZE * 2;
 	int ret;
 
 	dev->bus_ops = dev->mt76.bus;
@@ -430,7 +428,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU],
 			       MT7921_RXQ_MCU_WM,
 			       MT7921_RX_MCU_RING_SIZE,
-			       rx_buf_size, MT_RX_EVENT_RING_BASE);
+			       MT_RX_BUF_SIZE, MT_RX_EVENT_RING_BASE);
 	if (ret)
 		return ret;
 
@@ -438,14 +436,14 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU_WA],
 			       MT7921_RXQ_MCU_WM,
 			       MT7921_RX_MCU_RING_SIZE,
-			       rx_buf_size, MT_WFDMA0(0x540));
+			       MT_RX_BUF_SIZE, MT_WFDMA0(0x540));
 	if (ret)
 		return ret;
 
 	/* rx data */
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MAIN],
 			       MT7921_RXQ_BAND0, MT7921_RX_RING_SIZE,
-			       rx_buf_size, MT_RX_DATA_RING_BASE);
+			       MT_RX_BUF_SIZE, MT_RX_DATA_RING_BASE);
 	if (ret)
 		return ret;
 

From 90052b844d7a6db8649239434b262d28b1430cf4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 10 Jun 2021 09:44:12 +0200
Subject: [PATCH 2286/3292] mt76: move mt76_get_next_pkt_id in mt76.h

In order to remove duplicated code, move mt76_get_next_pkt_id routine
in mt76.h

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h       | 11 +++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 15 +++------------
 drivers/net/wireless/mediatek/mt76/tx.c         |  7 +------
 3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 338219024ba7..a50ba8e9344e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -1276,4 +1276,15 @@ mt76_token_put(struct mt76_dev *dev, int token)
 
 	return txwi;
 }
+
+static inline int
+mt76_get_next_pkt_id(struct mt76_wcid *wcid)
+{
+	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
+	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
+	    wcid->packet_id == MT_PACKET_ID_NO_SKB)
+		wcid->packet_id = MT_PACKET_ID_FIRST;
+
+	return wcid->packet_id;
+}
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index ed886f8633b1..8e2252c1acd4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -13,16 +13,6 @@
 #define HE_PREP(f, m, v)	le16_encode_bits(le32_get_bits(v, MT_CRXV_HE_##m),\
 						 IEEE80211_RADIOTAP_HE_##f)
 
-static u8
-mt7921_next_pid(struct mt7921_dev *dev, struct mt76_wcid *wcid)
-{
-	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
-	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
-	    wcid->packet_id == MT_PACKET_ID_NO_SKB)
-		wcid->packet_id = MT_PACKET_ID_FIRST;
-	return wcid->packet_id;
-}
-
 static unsigned long
 mt7921_next_txs_set(struct mt7921_dev *dev, struct mt76_wcid *wcid,
 		    u32 timeout)
@@ -756,7 +746,7 @@ void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_vif *vif = info->control.vif;
 	struct mt76_phy *mphy = &dev->mphy;
-	u8 pid, p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
+	u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
 	bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
 	u16 tx_count = 15;
 	u32 val;
@@ -829,8 +819,9 @@ void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 	if ((FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]) &
 		(IEEE80211_FTYPE_DATA >> 2)) &&
 		mt7921_next_txs_timeout(dev, wcid)) {
+		u8 pid = mt76_get_next_pkt_id(wcid);
+
 		mt7921_next_txs_set(dev, wcid, 250);
-		pid = mt7921_next_pid(dev, wcid);
 		val = MT_TXD5_TX_STATUS_MCU | FIELD_PREP(MT_TXD5_PID, pid);
 		txwi[5] |= cpu_to_le32(val);
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 5cc3e4d75c4f..f0f7a913eaab 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -129,12 +129,7 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
 	spin_lock_bh(&dev->status_list.lock);
 
 	memset(cb, 0, sizeof(*cb));
-	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
-	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
-	    wcid->packet_id == MT_PACKET_ID_NO_SKB)
-		wcid->packet_id = MT_PACKET_ID_FIRST;
-
-	pid = wcid->packet_id;
+	pid = mt76_get_next_pkt_id(wcid);
 	cb->wcid = wcid->idx;
 	cb->pktid = pid;
 	cb->jiffies = jiffies;

From 1f9dde02aab74a32e896ef10e15d72c39e54bb14 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 5 Jun 2021 13:50:52 +0200
Subject: [PATCH 2287/3292] mt76: connac: check band caps in
 mt76_connac_mcu_set_rate_txpower

Check device band capabilities before configuring single-sku

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt76_connac_mcu.c    | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index d7d7cede955b..78498d86efc3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1809,11 +1809,20 @@ int mt76_connac_mcu_set_rate_txpower(struct mt76_phy *phy)
 {
 	int err;
 
-	err = mt76_connac_mcu_rate_txpower_band(phy, NL80211_BAND_2GHZ);
-	if (err < 0)
-		return err;
+	if (phy->cap.has_2ghz) {
+		err = mt76_connac_mcu_rate_txpower_band(phy,
+							NL80211_BAND_2GHZ);
+		if (err < 0)
+			return err;
+	}
+	if (phy->cap.has_5ghz) {
+		err = mt76_connac_mcu_rate_txpower_band(phy,
+							NL80211_BAND_5GHZ);
+		if (err < 0)
+			return err;
+	}
 
-	return mt76_connac_mcu_rate_txpower_band(phy, NL80211_BAND_5GHZ);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_set_rate_txpower);
 

From 0d733327c531814c0fb9ef4eea86ca0d6eddfe9e Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 25 May 2021 18:45:04 +0200
Subject: [PATCH 2288/3292] mt76: mt7915: drop the use of repeater entries for
 station interfaces

There are firmware or hardware issues, which are currently causing tx hangs
when attempting to use these interfaces

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   | 6 ------
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 2485f65766e7..c25f8da590dd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -139,12 +139,6 @@ static int get_omac_idx(enum nl80211_iftype type, u64 mask)
 		if (type != NL80211_IFTYPE_STATION)
 			break;
 
-		/* next, try to find a free repeater entry for the sta */
-		i = get_free_idx(mask >> REPEATER_BSSID_START, 0,
-				 REPEATER_BSSID_MAX - REPEATER_BSSID_START);
-		if (i)
-			return i + 32 - 1;
-
 		i = get_free_idx(mask, EXT_BSSID_1, EXT_BSSID_MAX);
 		if (i)
 			return i - 1;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index a4b32e0d64e9..b0cdd53098ef 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -9,7 +9,7 @@
 #include "../mt76.h"
 #include "regs.h"
 
-#define MT7915_MAX_INTERFACES		32
+#define MT7915_MAX_INTERFACES		19
 #define MT7915_MAX_WMM_SETS		4
 #define MT7915_WTBL_SIZE		288
 #define MT7915_WTBL_RESERVED		(MT7915_WTBL_SIZE - 1)

From c560b137a2164c7160f4edc1813f3e335de6bdff Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 11 Jun 2021 02:43:45 +0800
Subject: [PATCH 2289/3292] mt76: make mt76_update_survey() per phy

Reduce duplicated survey for DBDC.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 15 +++---
 drivers/net/wireless/mediatek/mt76/mt76.h     |  4 +-
 .../net/wireless/mediatek/mt76/mt7603/mac.c   |  8 ++--
 .../wireless/mediatek/mt76/mt7603/mt7603.h    |  2 +-
 .../net/wireless/mediatek/mt76/mt7615/mac.c   | 48 +++++++++----------
 .../wireless/mediatek/mt76/mt7615/mt7615.h    |  2 +-
 .../net/wireless/mediatek/mt76/mt76x02_mac.c  |  8 ++--
 .../net/wireless/mediatek/mt76/mt76x02_mac.h  |  2 +-
 .../net/wireless/mediatek/mt76/mt7915/mac.c   | 15 ++----
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  2 +-
 .../net/wireless/mediatek/mt76/mt7921/mac.c   | 12 ++---
 .../wireless/mediatek/mt76/mt7921/mt7921.h    |  2 +-
 12 files changed, 55 insertions(+), 65 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 49da219d4e52..d03aedc3286b 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -659,20 +659,19 @@ void mt76_update_survey_active_time(struct mt76_phy *phy, ktime_t time)
 }
 EXPORT_SYMBOL_GPL(mt76_update_survey_active_time);
 
-void mt76_update_survey(struct mt76_dev *dev)
+void mt76_update_survey(struct mt76_phy *phy)
 {
+	struct mt76_dev *dev = phy->dev;
 	ktime_t cur_time;
 
 	if (dev->drv->update_survey)
-		dev->drv->update_survey(dev);
+		dev->drv->update_survey(phy);
 
 	cur_time = ktime_get_boottime();
-	mt76_update_survey_active_time(&dev->phy, cur_time);
-	if (dev->phy2)
-		mt76_update_survey_active_time(dev->phy2, cur_time);
+	mt76_update_survey_active_time(phy, cur_time);
 
 	if (dev->drv->drv_flags & MT_DRV_SW_RX_AIRTIME) {
-		struct mt76_channel_state *state = dev->phy.chan_state;
+		struct mt76_channel_state *state = phy->chan_state;
 
 		spin_lock_bh(&dev->cc_lock);
 		state->cc_bss_rx += dev->cur_cc_bss_rx;
@@ -691,7 +690,7 @@ void mt76_set_channel(struct mt76_phy *phy)
 	int timeout = HZ / 5;
 
 	wait_event_timeout(dev->tx_wait, !mt76_has_tx_pending(phy), timeout);
-	mt76_update_survey(dev);
+	mt76_update_survey(phy);
 
 	phy->chandef = *chandef;
 	phy->chan_state = mt76_channel_state(phy, chandef->chan);
@@ -716,7 +715,7 @@ int mt76_get_survey(struct ieee80211_hw *hw, int idx,
 
 	mutex_lock(&dev->mutex);
 	if (idx == 0 && dev->drv->update_survey)
-		mt76_update_survey(dev);
+		mt76_update_survey(phy);
 
 	sband = &phy->sband_2g;
 	if (idx >= sband->sband.n_channels) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index a50ba8e9344e..022e9c573ddd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -337,7 +337,7 @@ struct mt76_driver_ops {
 	u16 token_size;
 	u8 mcs_rates;
 
-	void (*update_survey)(struct mt76_dev *dev);
+	void (*update_survey)(struct mt76_phy *phy);
 
 	int (*tx_prepare_skb)(struct mt76_dev *dev, void *txwi_ptr,
 			      enum mt76_txq_id qid, struct mt76_wcid *wcid,
@@ -1047,7 +1047,7 @@ void mt76_release_buffered_frames(struct ieee80211_hw *hw,
 				  bool more_data);
 bool mt76_has_tx_pending(struct mt76_phy *phy);
 void mt76_set_channel(struct mt76_phy *phy);
-void mt76_update_survey(struct mt76_dev *dev);
+void mt76_update_survey(struct mt76_phy *phy);
 void mt76_update_survey_active_time(struct mt76_phy *phy, ktime_t time);
 int mt76_get_survey(struct ieee80211_hw *hw, int idx,
 		    struct survey_info *survey);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 4e76f9868b9b..8435e9597688 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1584,12 +1584,12 @@ trigger:
 	return true;
 }
 
-void mt7603_update_channel(struct mt76_dev *mdev)
+void mt7603_update_channel(struct mt76_phy *mphy)
 {
-	struct mt7603_dev *dev = container_of(mdev, struct mt7603_dev, mt76);
+	struct mt7603_dev *dev = container_of(mphy->dev, struct mt7603_dev, mt76);
 	struct mt76_channel_state *state;
 
-	state = mdev->phy.chan_state;
+	state = mphy->chan_state;
 	state->cc_busy += mt76_rr(dev, MT_MIB_STAT_CCA);
 }
 
@@ -1806,7 +1806,7 @@ void mt7603_mac_work(struct work_struct *work)
 	mutex_lock(&dev->mt76.mutex);
 
 	dev->mphy.mac_work_count++;
-	mt76_update_survey(&dev->mt76);
+	mt76_update_survey(&dev->mphy);
 	mt7603_edcca_check(dev);
 
 	for (i = 0, idx = 0; i < 2; i++) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
index 1df5b9fed2bb..0fd46d907638 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
@@ -256,7 +256,7 @@ void mt7603_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 
 void mt7603_pre_tbtt_tasklet(struct tasklet_struct *t);
 
-void mt7603_update_channel(struct mt76_dev *mdev);
+void mt7603_update_channel(struct mt76_phy *mphy);
 
 void mt7603_edcca_set_strict(struct mt7603_dev *dev, bool val);
 void mt7603_cca_stats_reset(struct mt7603_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index f540b6188ba1..a057859aa050 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1819,43 +1819,41 @@ mt7615_phy_update_channel(struct mt76_phy *mphy, int idx)
 	state->noise = -(phy->noise >> 4);
 }
 
-static void __mt7615_update_channel(struct mt7615_dev *dev)
-{
-	struct mt76_dev *mdev = &dev->mt76;
-
-	mt7615_phy_update_channel(&mdev->phy, 0);
-	if (mdev->phy2)
-		mt7615_phy_update_channel(mdev->phy2, 1);
-
-	/* reset obss airtime */
-	mt76_set(dev, MT_WF_RMAC_MIB_TIME0, MT_WF_RMAC_MIB_RXTIME_CLR);
-}
-
-void mt7615_update_channel(struct mt76_dev *mdev)
-{
-	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
-
-	if (mt76_connac_pm_wake(&dev->mphy, &dev->pm))
-		return;
-
-	__mt7615_update_channel(dev);
-	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
-}
-EXPORT_SYMBOL_GPL(mt7615_update_channel);
-
 static void mt7615_update_survey(struct mt7615_dev *dev)
 {
 	struct mt76_dev *mdev = &dev->mt76;
 	ktime_t cur_time;
 
-	__mt7615_update_channel(dev);
+	/* MT7615 can only update both phys simultaneously
+	 * since some reisters are shared across bands.
+	 */
+
+	mt7615_phy_update_channel(&mdev->phy, 0);
+	if (mdev->phy2)
+		mt7615_phy_update_channel(mdev->phy2, 1);
+
 	cur_time = ktime_get_boottime();
 
 	mt76_update_survey_active_time(&mdev->phy, cur_time);
 	if (mdev->phy2)
 		mt76_update_survey_active_time(mdev->phy2, cur_time);
+
+	/* reset obss airtime */
+	mt76_set(dev, MT_WF_RMAC_MIB_TIME0, MT_WF_RMAC_MIB_RXTIME_CLR);
 }
 
+void mt7615_update_channel(struct mt76_phy *mphy)
+{
+	struct mt7615_dev *dev = container_of(mphy->dev, struct mt7615_dev, mt76);
+
+	if (mt76_connac_pm_wake(&dev->mphy, &dev->pm))
+		return;
+
+	mt7615_update_survey(dev);
+	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
+}
+EXPORT_SYMBOL_GPL(mt7615_update_channel);
+
 static void
 mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 8f03dddba8cf..8fbaf8356e1a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -471,7 +471,7 @@ int mt7615_set_channel(struct mt7615_phy *phy);
 void mt7615_init_work(struct mt7615_dev *dev);
 
 int mt7615_mcu_restart(struct mt76_dev *dev);
-void mt7615_update_channel(struct mt76_dev *mdev);
+void mt7615_update_channel(struct mt76_phy *mphy);
 bool mt7615_mac_wtbl_update(struct mt7615_dev *dev, int idx, u32 mask);
 void mt7615_mac_reset_counters(struct mt7615_dev *dev);
 void mt7615_mac_cca_stats_reset(struct mt7615_phy *phy);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 0da37867cb64..7572c793aa51 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -1022,12 +1022,12 @@ void mt76x02_mac_set_tx_protection(struct mt76x02_dev *dev, bool legacy_prot,
 		mt76_wr(dev, MT_TX_PROT_CFG6 + i * 4, vht_prot[i]);
 }
 
-void mt76x02_update_channel(struct mt76_dev *mdev)
+void mt76x02_update_channel(struct mt76_phy *mphy)
 {
-	struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, mt76);
+	struct mt76x02_dev *dev = container_of(mphy->dev, struct mt76x02_dev, mt76);
 	struct mt76_channel_state *state;
 
-	state = mdev->phy.chan_state;
+	state = mphy->chan_state;
 	state->cc_busy += mt76_rr(dev, MT_CH_BUSY);
 
 	spin_lock_bh(&dev->mt76.cc_lock);
@@ -1169,7 +1169,7 @@ void mt76x02_mac_work(struct work_struct *work)
 
 	mutex_lock(&dev->mt76.mutex);
 
-	mt76_update_survey(&dev->mt76);
+	mt76_update_survey(&dev->mphy);
 	for (i = 0, idx = 0; i < 16; i++) {
 		u32 val = mt76_rr(dev, MT_TX_AGG_CNT(i));
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h
index 0cfbaca50210..5dc6c834111e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h
@@ -195,7 +195,7 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
 			    struct ieee80211_sta *sta, int len);
 void mt76x02_mac_poll_tx_status(struct mt76x02_dev *dev, bool irq);
 void mt76x02_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e);
-void mt76x02_update_channel(struct mt76_dev *mdev);
+void mt76x02_update_channel(struct mt76_phy *mphy);
 void mt76x02_mac_work(struct work_struct *work);
 
 void mt76x02_mac_cc_reset(struct mt76x02_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 741899aaaed8..c093c13bf1f1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1471,16 +1471,16 @@ mt7915_phy_get_nf(struct mt7915_phy *phy, int idx)
 	return sum / n;
 }
 
-static void
-mt7915_phy_update_channel(struct mt76_phy *mphy, int idx)
+void mt7915_update_channel(struct mt76_phy *mphy)
 {
 	struct mt7915_phy *phy = (struct mt7915_phy *)mphy->priv;
 	struct mt76_channel_state *state = mphy->chan_state;
+	bool ext_phy = phy != &phy->dev->phy;
 	int nf;
 
 	mt7915_mcu_get_chan_mib_info(phy, false);
 
-	nf = mt7915_phy_get_nf(phy, idx);
+	nf = mt7915_phy_get_nf(phy, ext_phy);
 	if (!phy->noise)
 		phy->noise = nf << 4;
 	else if (nf)
@@ -1489,13 +1489,6 @@ mt7915_phy_update_channel(struct mt76_phy *mphy, int idx)
 	state->noise = -(phy->noise >> 4);
 }
 
-void mt7915_update_channel(struct mt76_dev *mdev)
-{
-	mt7915_phy_update_channel(&mdev->phy, 0);
-	if (mdev->phy2)
-		mt7915_phy_update_channel(mdev->phy2, 1);
-}
-
 static bool
 mt7915_wait_reset_state(struct mt7915_dev *dev, u32 state)
 {
@@ -1804,7 +1797,7 @@ void mt7915_mac_work(struct work_struct *work)
 
 	mutex_lock(&mphy->dev->mutex);
 
-	mt76_update_survey(mphy->dev);
+	mt76_update_survey(mphy);
 	if (++mphy->mac_work_count == 5) {
 		mphy->mac_work_count = 0;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index b0cdd53098ef..7833f6e3781d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -436,7 +436,7 @@ int mt76_dfs_start_rdd(struct mt7915_dev *dev, bool force);
 int mt7915_dfs_init_radar_detector(struct mt7915_phy *phy);
 void mt7915_set_stream_he_caps(struct mt7915_phy *phy);
 void mt7915_set_stream_vht_txbf_caps(struct mt7915_phy *phy);
-void mt7915_update_channel(struct mt76_dev *mdev);
+void mt7915_update_channel(struct mt76_phy *mphy);
 int mt7915_init_debugfs(struct mt7915_dev *dev);
 #ifdef CONFIG_MAC80211_DEBUGFS
 void mt7915_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 8e2252c1acd4..1d710f276884 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1227,18 +1227,18 @@ mt7921_phy_update_channel(struct mt76_phy *mphy, int idx)
 	state->noise = -(phy->noise >> 4);
 }
 
-void mt7921_update_channel(struct mt76_dev *mdev)
+void mt7921_update_channel(struct mt76_phy *mphy)
 {
-	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+	struct mt7921_dev *dev = container_of(mphy->dev, struct mt7921_dev, mt76);
 
-	if (mt76_connac_pm_wake(&dev->mphy, &dev->pm))
+	if (mt76_connac_pm_wake(mphy, &dev->pm))
 		return;
 
-	mt7921_phy_update_channel(&mdev->phy, 0);
+	mt7921_phy_update_channel(mphy, 0);
 	/* reset obss airtime */
 	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(0), MT_WF_RMAC_MIB_RXTIME_CLR);
 
-	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
+	mt76_connac_power_save_sched(mphy, &dev->pm);
 }
 
 void mt7921_tx_token_put(struct mt7921_dev *dev)
@@ -1436,7 +1436,7 @@ void mt7921_mac_work(struct work_struct *work)
 
 	mt7921_mutex_acquire(phy->dev);
 
-	mt76_update_survey(mphy->dev);
+	mt76_update_survey(mphy);
 	if (++mphy->mac_work_count == 2) {
 		mphy->mac_work_count = 0;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 087067e7ea5b..a6ff704d0023 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -356,7 +356,7 @@ void mt7921_stats_work(struct work_struct *work);
 void mt7921_txp_skb_unmap(struct mt76_dev *dev,
 			  struct mt76_txwi_cache *txwi);
 void mt7921_set_stream_he_caps(struct mt7921_phy *phy);
-void mt7921_update_channel(struct mt76_dev *mdev);
+void mt7921_update_channel(struct mt76_phy *mphy);
 int mt7921_init_debugfs(struct mt7921_dev *dev);
 
 int mt7921_mcu_uni_tx_ba(struct mt7921_dev *dev,

From fd843822231337f356f2cb2af2f7e43efac015bb Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 11 Jun 2021 02:43:46 +0800
Subject: [PATCH 2290/3292] mt76: mt7915: introduce mt7915_mcu_set_txbf()

Use mt7915_mcu_set_txbf() to reduce global functions. This can be
easily extended to support other TxBF commands in further patches.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7915/debugfs.c   |  3 +-
 .../net/wireless/mediatek/mt76/mt7915/init.c  |  7 +-
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 83 +++++++++----------
 .../net/wireless/mediatek/mt76/mt7915/mcu.h   |  6 ++
 .../wireless/mediatek/mt76/mt7915/mt7915.h    |  4 +-
 5 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 3961d46e0df8..c6e9a7038311 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -3,6 +3,7 @@
 
 #include "mt7915.h"
 #include "eeprom.h"
+#include "mcu.h"
 
 /** global debugfs **/
 
@@ -16,7 +17,7 @@ mt7915_implicit_txbf_set(void *data, u64 val)
 
 	dev->ibf = !!val;
 
-	return mt7915_mcu_set_txbf_type(dev);
+	return mt7915_mcu_set_txbf(dev, MT_BF_TYPE_UPDATE);
 }
 
 static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 10c8d9244dc9..36bb7121c57f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -313,20 +313,19 @@ static int mt7915_txbf_init(struct mt7915_dev *dev)
 {
 	int ret;
 
-
 	if (dev->dbdc_support) {
-		ret = mt7915_mcu_set_txbf_module(dev);
+		ret = mt7915_mcu_set_txbf(dev, MT_BF_MODULE_UPDATE);
 		if (ret)
 			return ret;
 	}
 
 	/* trigger sounding packets */
-	ret = mt7915_mcu_set_txbf_sounding(dev);
+	ret = mt7915_mcu_set_txbf(dev, MT_BF_SOUNDING_ON);
 	if (ret)
 		return ret;
 
 	/* enable eBF */
-	return mt7915_mcu_set_txbf_type(dev);
+	return mt7915_mcu_set_txbf(dev, MT_BF_TYPE_UPDATE);
 }
 
 static int mt7915_register_ext_phy(struct mt7915_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index e28396938ce9..ca633c1bb8c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3893,57 +3893,50 @@ int mt7915_mcu_set_ser(struct mt7915_dev *dev, u8 action, u8 set, u8 band)
 				 &req, sizeof(req), false);
 }
 
-int mt7915_mcu_set_txbf_module(struct mt7915_dev *dev)
+int mt7915_mcu_set_txbf(struct mt7915_dev *dev, u8 action)
 {
-#define MT_BF_MODULE_UPDATE               25
 	struct {
 		u8 action;
-		u8 bf_num;
-		u8 bf_bitmap;
-		u8 bf_sel[8];
-		u8 rsv[8];
+		union {
+			struct {
+				u8 snd_mode;
+				u8 sta_num;
+				u8 rsv;
+				u8 wlan_idx[4];
+				__le32 snd_period;	/* ms */
+			} __packed snd;
+			struct {
+				bool ebf;
+				bool ibf;
+				u8 rsv;
+			} __packed type;
+			struct {
+				u8 bf_num;
+				u8 bf_bitmap;
+				u8 bf_sel[8];
+				u8 rsv[5];
+			} __packed mod;
+		};
 	} __packed req = {
-		.action = MT_BF_MODULE_UPDATE,
-		.bf_num = 2,
-		.bf_bitmap = GENMASK(1, 0),
+		.action = action,
 	};
 
-	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(TXBF_ACTION), &req,
-				 sizeof(req), true);
-}
-
-int mt7915_mcu_set_txbf_type(struct mt7915_dev *dev)
-{
-#define MT_BF_TYPE_UPDATE		20
-	struct {
-		u8 action;
-		bool ebf;
-		bool ibf;
-		u8 rsv;
-	} __packed req = {
-		.action = MT_BF_TYPE_UPDATE,
-		.ebf = true,
-		.ibf = dev->ibf,
-	};
-
-	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(TXBF_ACTION), &req,
-				 sizeof(req), true);
-}
-
-int mt7915_mcu_set_txbf_sounding(struct mt7915_dev *dev)
-{
-#define MT_BF_PROCESSING		4
-	struct {
-		u8 action;
-		u8 snd_mode;
-		u8 sta_num;
-		u8 rsv;
-		u8 wlan_idx[4];
-		__le32 snd_period;	/* ms */
-	} __packed req = {
-		.action = true,
-		.snd_mode = MT_BF_PROCESSING,
-	};
+#define MT_BF_PROCESSING	4
+	switch (action) {
+	case MT_BF_SOUNDING_ON:
+		req.snd.snd_mode = MT_BF_PROCESSING;
+		break;
+	case MT_BF_TYPE_UPDATE:
+		req.type.ebf = true;
+		req.type.ibf = dev->ibf;
+		break;
+	case MT_BF_MODULE_UPDATE:
+		req.mod.bf_num = 2;
+		req.mod.bf_bitmap = GENMASK(1, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(TXBF_ACTION), &req,
 				 sizeof(req), true);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 70ab06d9f954..9087a7771c35 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -1120,6 +1120,12 @@ enum {
 	MT_IBF = BIT(1)		/* implicit beamforming */
 };
 
+enum {
+	MT_BF_SOUNDING_ON = 1,
+	MT_BF_TYPE_UPDATE = 20,
+	MT_BF_MODULE_UPDATE = 25
+};
+
 #define MT7915_WTBL_UPDATE_MAX_SIZE	(sizeof(struct wtbl_req_hdr) +	\
 					 sizeof(struct wtbl_generic) +	\
 					 sizeof(struct wtbl_rx) +	\
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 7833f6e3781d..3f613fae6218 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -356,9 +356,7 @@ int mt7915_mcu_set_pm(struct mt7915_dev *dev, int band, int enter);
 int mt7915_mcu_set_sku_en(struct mt7915_phy *phy, bool enable);
 int mt7915_mcu_set_txpower_sku(struct mt7915_phy *phy);
 int mt7915_mcu_get_txpower_sku(struct mt7915_phy *phy, s8 *txpower, int len);
-int mt7915_mcu_set_txbf_type(struct mt7915_dev *dev);
-int mt7915_mcu_set_txbf_module(struct mt7915_dev *dev);
-int mt7915_mcu_set_txbf_sounding(struct mt7915_dev *dev);
+int mt7915_mcu_set_txbf(struct mt7915_dev *dev, u8 action);
 int mt7915_mcu_set_fcc5_lpn(struct mt7915_dev *dev, int val);
 int mt7915_mcu_set_pulse_th(struct mt7915_dev *dev,
 			    const struct mt7915_dfs_pulse *pulse);

From b70946ced192a04a4d462e384ee1b44caed79acd Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 11 Jun 2021 04:03:26 +0800
Subject: [PATCH 2291/3292] mt76: mt7915: improve MU stability

- Adjust starec flow since VHT MU group is only updated by starec_vht
  follows starec_bf settings.
- Drop unnecessary MU BF checks.

TX MPDU PER (Status = Success):
                                           TOT_MPDU_CNT  FAIL_MPDU_CNT  TX_PER
WCID Rate
1      VHT_BW80_2SS_MCS7_LGI_LDPC_MUBF              114              0   0.00%
       VHT_BW80_2SS_MCS7_LGI_LDPC_MUBF_MU            64              0   0.00%
       VHT_BW80_2SS_MCS7_SGI_LDPC_MUBF              128              0   0.00%
       VHT_BW80_2SS_MCS7_SGI_LDPC_MUBF_MU           745              0   0.00%
       VHT_BW80_2SS_MCS8_LGI_LDPC_MUBF_MU           856              0   0.00%
       VHT_BW80_2SS_MCS8_SGI_LDPC_MUBF_MU          1430              4   0.28%
       VHT_BW80_2SS_MCS9_LGI_LDPC_MUBF_MU          5220             31   0.59%
       VHT_BW80_2SS_MCS9_LGI_LDPC_iBF                59              0   0.00%
       VHT_BW80_2SS_MCS9_SGI_LDPC_MUBF               64              2   3.12%
       VHT_BW80_2SS_MCS9_SGI_LDPC_MUBF_MU         22132             76   0.34%
       VHT_BW80_2SS_MCS9_SGI_LDPC_iBF              2866              1   0.03%
2      VHT_BW80_2SS_MCS7_LGI_LDPC_MUBF_MU          3781              5   0.13%
       VHT_BW80_2SS_MCS7_SGI_LDPC_MUBF_MU           735              0   0.00%
       VHT_BW80_2SS_MCS8_LGI_LDPC_MUBF_MU          1270            365  28.74%
       VHT_BW80_2SS_MCS8_SGI_LDPC_MUBF_MU          3420             57   1.67%
       VHT_BW80_2SS_MCS9_LGI_LDPC_MUBF              128              0   0.00%
       VHT_BW80_2SS_MCS9_LGI_LDPC_MUBF_MU            64              0   0.00%
       VHT_BW80_2SS_MCS9_SGI_LDPC_MUBF              191              0   0.00%
       VHT_BW80_2SS_MCS9_SGI_LDPC_MUBF_MU         18833            320   1.70%
       VHT_BW80_2SS_MCS9_SGI_LDPC_iBF              6040              0   0.00%
287    OFDM 6M

Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7915/debugfs.c   | 12 ++-
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 98 +++++++++----------
 .../net/wireless/mediatek/mt76/mt7915/regs.h  |  5 +
 3 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index c6e9a7038311..64048243e34b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -148,6 +148,9 @@ mt7915_txbf_stat_read_phy(struct mt7915_phy *phy, struct seq_file *s)
 {
 	struct mt7915_dev *dev = s->private;
 	bool ext_phy = phy != &dev->phy;
+	static const char * const bw[] = {
+		"BW20", "BW40", "BW80", "BW160"
+	};
 	int cnt;
 
 	if (!phy)
@@ -165,11 +168,16 @@ mt7915_txbf_stat_read_phy(struct mt7915_phy *phy, struct seq_file *s)
 	seq_puts(s, "Tx Beamformer Rx feedback statistics: ");
 
 	cnt = mt76_rr(dev, MT_ETBF_RX_FB_CNT(ext_phy));
-	seq_printf(s, "All: %ld, HE: %ld, VHT: %ld, HT: %ld\n",
+	seq_printf(s, "All: %ld, HE: %ld, VHT: %ld, HT: %ld, ",
 		   FIELD_GET(MT_ETBF_RX_FB_ALL, cnt),
 		   FIELD_GET(MT_ETBF_RX_FB_HE, cnt),
 		   FIELD_GET(MT_ETBF_RX_FB_VHT, cnt),
 		   FIELD_GET(MT_ETBF_RX_FB_HT, cnt));
+	cnt = mt76_rr(dev, MT_ETBF_RX_FB_CONT(ext_phy));
+	seq_printf(s, "%s, NC: %ld, NR: %ld\n",
+		   bw[FIELD_GET(MT_ETBF_RX_FB_BW, cnt)],
+		   FIELD_GET(MT_ETBF_RX_FB_NC, cnt),
+		   FIELD_GET(MT_ETBF_RX_FB_NR, cnt));
 
 	/* Tx Beamformee Rx NDPA & Tx feedback report */
 	cnt = mt76_rr(dev, MT_ETBF_TX_NDP_BFRP(ext_phy));
@@ -205,7 +213,7 @@ mt7915_tx_stats_show(struct seq_file *file, void *data)
 	mt7915_txbf_stat_read_phy(mt7915_ext_phy(dev), file);
 
 	/* Tx amsdu info */
-	seq_puts(file, "Tx MSDU stat:\n");
+	seq_puts(file, "Tx MSDU statistics:\n");
 	for (i = 0, n = 0; i < ARRAY_SIZE(stat); i++) {
 		stat[i] = mt76_rr(dev,  MT_PLE_AMSDU_PACK_MSDU_CNT(i));
 		n += stat[i];
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index ca633c1bb8c7..b565024404cf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1685,27 +1685,18 @@ mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
 		HE_PHY(CAP2_UL_MU_PARTIAL_MU_MIMO, elem->phy_cap_info[2]);
 }
 
-static int
-mt7915_mcu_add_mu(struct mt7915_dev *dev, struct ieee80211_vif *vif,
-		  struct ieee80211_sta *sta)
+static void
+mt7915_mcu_sta_vht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
 {
-	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
-	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
-	struct sk_buff *skb;
-	int len = sizeof(struct sta_req_hdr) + sizeof(struct sta_rec_muru);
+	struct sta_rec_vht *vht;
+	struct tlv *tlv;
 
-	if (!sta->vht_cap.vht_supported && !sta->he_cap.has_he)
-		return 0;
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_VHT, sizeof(*vht));
 
-	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta, len);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
-	/* starec muru */
-	mt7915_mcu_sta_muru_tlv(skb, sta);
-
-	return mt76_mcu_skb_send_msg(&dev->mt76, skb,
-				     MCU_EXT_CMD(STA_REC_UPDATE), true);
+	vht = (struct sta_rec_vht *)tlv;
+	vht->vht_cap = cpu_to_le32(sta->vht_cap.cap);
+	vht->vht_rx_mcs_map = sta->vht_cap.vht_mcs.rx_mcs_map;
+	vht->vht_tx_mcs_map = sta->vht_cap.vht_mcs.tx_mcs_map;
 }
 
 static void
@@ -1757,17 +1748,6 @@ mt7915_mcu_sta_tlv(struct mt7915_dev *dev, struct sk_buff *skb,
 			mt7915_mcu_sta_amsdu_tlv(skb, sta);
 	}
 
-	/* starec vht */
-	if (sta->vht_cap.vht_supported) {
-		struct sta_rec_vht *vht;
-
-		tlv = mt7915_mcu_add_tlv(skb, STA_REC_VHT, sizeof(*vht));
-		vht = (struct sta_rec_vht *)tlv;
-		vht->vht_cap = cpu_to_le32(sta->vht_cap.cap);
-		vht->vht_rx_mcs_map = sta->vht_cap.vht_mcs.rx_mcs_map;
-		vht->vht_tx_mcs_map = sta->vht_cap.vht_mcs.tx_mcs_map;
-	}
-
 	/* starec he */
 	if (sta->he_cap.has_he)
 		mt7915_mcu_sta_he_tlv(skb, sta);
@@ -2157,26 +2137,21 @@ mt7915_mcu_add_txbf(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		vc = mt7915_get_he_phy_cap(phy, vif);
 		ve = &vc->he_cap_elem;
 
-		ebfee = !!((HE_PHY(CAP3_SU_BEAMFORMER, pe->phy_cap_info[3]) ||
-			    HE_PHY(CAP4_MU_BEAMFORMER, pe->phy_cap_info[4])) &&
+		ebfee = !!(HE_PHY(CAP3_SU_BEAMFORMER, pe->phy_cap_info[3]) &&
 			   HE_PHY(CAP4_SU_BEAMFORMEE, ve->phy_cap_info[4]));
-		ebf = !!((HE_PHY(CAP3_SU_BEAMFORMER, ve->phy_cap_info[3]) ||
-			  HE_PHY(CAP4_MU_BEAMFORMER, ve->phy_cap_info[4])) &&
+		ebf = !!(HE_PHY(CAP3_SU_BEAMFORMER, ve->phy_cap_info[3]) &&
 			 HE_PHY(CAP4_SU_BEAMFORMEE, pe->phy_cap_info[4]));
 	} else if (sta->vht_cap.vht_supported) {
 		struct ieee80211_sta_vht_cap *pc;
 		struct ieee80211_sta_vht_cap *vc;
-		u32 cr, ce;
 
 		pc = &sta->vht_cap;
 		vc = &phy->mt76->sband_5g.sband.vht_cap;
-		cr = IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
-		     IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE;
-		ce = IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
-		     IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
 
-		ebfee = !!((pc->cap & cr) && (vc->cap & ce));
-		ebf = !!((vc->cap & cr) && (pc->cap & ce));
+		ebfee = !!((pc->cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) &&
+			   (vc->cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE));
+		ebf = !!((vc->cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) &&
+			 (pc->cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE));
 	}
 
 	/* must keep each tag independent */
@@ -2379,6 +2354,38 @@ mt7915_mcu_add_group(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 				 sizeof(req), true);
 }
 
+static int
+mt7915_mcu_add_mu(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		  struct ieee80211_sta *sta)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct sk_buff *skb;
+	int ret;
+
+	if (!sta->vht_cap.vht_supported && !sta->he_cap.has_he)
+		return 0;
+
+	ret = mt7915_mcu_add_group(dev, vif, sta);
+	if (ret)
+		return ret;
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	/* wait until TxBF and MU ready to update stare vht */
+
+	/* starec muru */
+	mt7915_mcu_sta_muru_tlv(skb, sta);
+	/* starec vht */
+	mt7915_mcu_sta_vht_tlv(skb, sta);
+
+	return mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				     MCU_EXT_CMD(STA_REC_UPDATE), true);
+}
+
 int mt7915_mcu_add_sta_adv(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta, bool enable)
 {
@@ -2388,22 +2395,15 @@ int mt7915_mcu_add_sta_adv(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		return 0;
 
 	/* must keep the order */
-	ret = mt7915_mcu_add_group(dev, vif, sta);
-	if (ret)
-		return ret;
-
 	ret = mt7915_mcu_add_txbf(dev, vif, sta, enable);
-	if (ret)
+	if (ret || !enable)
 		return ret;
 
 	ret = mt7915_mcu_add_mu(dev, vif, sta);
 	if (ret)
 		return ret;
 
-	if (enable)
-		return mt7915_mcu_add_rate_ctrl(dev, vif, sta);
-
-	return 0;
+	return mt7915_mcu_add_rate_ctrl(dev, vif, sta);
 }
 
 int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index 56c33eaa9d79..a213b5cb82f8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -99,6 +99,11 @@
 #define MT_ETBF_TX_FB_CPL		GENMASK(31, 16)
 #define MT_ETBF_TX_FB_TRI		GENMASK(15, 0)
 
+#define MT_ETBF_RX_FB_CONT(_band)	MT_WF_ETBF(_band, 0x068)
+#define MT_ETBF_RX_FB_BW		GENMASK(7, 6)
+#define MT_ETBF_RX_FB_NC		GENMASK(5, 3)
+#define MT_ETBF_RX_FB_NR		GENMASK(2, 0)
+
 #define MT_ETBF_TX_APP_CNT(_band)	MT_WF_ETBF(_band, 0x0f0)
 #define MT_ETBF_TX_IBF_CNT		GENMASK(31, 16)
 #define MT_ETBF_TX_EBF_CNT		GENMASK(15, 0)

From c44ccf1dcce89c1d29500d209d93092acd32349d Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 10 Jun 2021 14:44:37 -0700
Subject: [PATCH 2292/3292] mt76: add a space between comment char and SPDX tag

checkpatch expects a space between '#' and 'SPDX...'
Add a space.

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/Makefile | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/Makefile | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/Makefile | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/Makefile b/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
index e8fc4a7ae9bc..83f9861ff522 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
@@ -1,4 +1,4 @@
-#SPDX-License-Identifier: ISC
+# SPDX-License-Identifier: ISC
 
 obj-$(CONFIG_MT7615_COMMON) += mt7615-common.o
 obj-$(CONFIG_MT7615E) += mt7615e.o
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/Makefile b/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
index 40c8061787e9..80e49244348e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
@@ -1,4 +1,4 @@
-#SPDX-License-Identifier: ISC
+# SPDX-License-Identifier: ISC
 
 obj-$(CONFIG_MT7915E) += mt7915e.o
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/Makefile b/drivers/net/wireless/mediatek/mt76/mt7921/Makefile
index e531666f9fb4..0ebb59966a08 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/Makefile
@@ -1,4 +1,4 @@
-#SPDX-License-Identifier: ISC
+# SPDX-License-Identifier: ISC
 
 obj-$(CONFIG_MT7921E) += mt7921e.o
 

From e7f1c44192df90c17d486d8e3614a338772132d5 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 10 Jun 2021 14:44:38 -0700
Subject: [PATCH 2293/3292] mt76: use SPDX header file comment style

header files should use '/* SPDX ... */
Change from c file comment syle to header style

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/sdio.h     | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/testmode.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.h b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.h
index 05180971de84..03877d89e152 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: ISC
+/* SPDX-License-Identifier: ISC */
 /* Copyright (C) 2020 MediaTek Inc.
  *
  * Author: Sean Wang <sean.wang@mediatek.com>
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.h b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.h
index 8f8533ef9859..397a6b5532bc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: ISC
+/* SPDX-License-Identifier: ISC */
 /* Copyright (C) 2020 MediaTek Inc. */
 
 #ifndef __MT7915_TESTMODE_H

From 2707ff4dd7b1479dbd44ebb3c74788084cc95245 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 12 Jun 2021 02:04:20 +0800
Subject: [PATCH 2294/3292] mt76: mt7915: fix IEEE80211_HE_PHY_CAP7_MAX_NC for
 station mode

The value of station mode is always 0.

Fixed: 00b2e16e0063 ("mt76: mt7915: add TxBF capabilities")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 36bb7121c57f..7af1cdbbfebd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -568,6 +568,9 @@ mt7915_set_stream_he_txbf_caps(struct ieee80211_sta_he_cap *he_cap,
 	if (nss < 2)
 		return;
 
+	/* the maximum cap is 4 x 3, (Nr, Nc) = (3, 2) */
+	elem->phy_cap_info[7] |= min_t(int, nss - 1, 2) << 3;
+
 	if (vif != NL80211_IFTYPE_AP)
 		return;
 
@@ -581,9 +584,6 @@ mt7915_set_stream_he_txbf_caps(struct ieee80211_sta_he_cap *he_cap,
 	c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB |
 	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB;
 	elem->phy_cap_info[6] |= c;
-
-	/* the maximum cap is 4 x 3, (Nr, Nc) = (3, 2) */
-	elem->phy_cap_info[7] |= min_t(int, nss - 1, 2) << 3;
 }
 
 static void

From f5056657f995f0e36bc9e30e5f608ff55c1bdf72 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 10 May 2021 23:14:52 +0800
Subject: [PATCH 2295/3292] mt76: mt7921: enable deep sleep at runtime

Enable the deep sleep mode with that firmware is able to trap into
the doze state at runtime to reduce the power consumption further.

The deep sleep mode is not allowed in the STA state transition with
the firmware to have the fast connection experience as we've done in
the full power mode

Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7615/mcu.c   |  8 ++-
 .../wireless/mediatek/mt76/mt76_connac_mcu.c  | 40 +++++++++---
 .../wireless/mediatek/mt76/mt76_connac_mcu.h  | 20 ++++--
 .../net/wireless/mediatek/mt76/mt7921/init.c  |  6 +-
 .../net/wireless/mediatek/mt76/mt7921/main.c  | 63 ++++++++++++-------
 .../net/wireless/mediatek/mt76/mt7921/mcu.c   |  9 ++-
 .../wireless/mediatek/mt76/mt7921/mt7921.h    |  7 ++-
 .../net/wireless/mediatek/mt76/mt7921/pci.c   |  1 +
 8 files changed, 110 insertions(+), 44 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index ea1f23e99ca1..f8a09692d3e4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1028,9 +1028,10 @@ mt7615_mcu_wtbl_sta_add(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	if (IS_ERR(sskb))
 		return PTR_ERR(sskb);
 
-	mt76_connac_mcu_sta_basic_tlv(sskb, vif, sta, enable);
+	mt76_connac_mcu_sta_basic_tlv(sskb, vif, sta, enable, true);
 	if (enable && sta)
-		mt76_connac_mcu_sta_tlv(phy->mt76, sskb, sta, vif, 0);
+		mt76_connac_mcu_sta_tlv(phy->mt76, sskb, sta, vif, 0,
+					MT76_STA_INFO_STATE_ASSOC);
 
 	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(&dev->mt76, &msta->wcid,
 						  WTBL_RESET_AND_SET, NULL,
@@ -1157,11 +1158,12 @@ __mt7615_mcu_add_sta(struct mt76_phy *phy, struct ieee80211_vif *vif,
 		.vif = vif,
 		.offload_fw = offload_fw,
 		.enable = enable,
+		.newly = true,
 		.cmd = cmd,
 	};
 
 	info.wcid = sta ? (struct mt76_wcid *)sta->drv_priv : &mvif->sta.wcid;
-	return mt76_connac_mcu_add_sta_cmd(phy, &info);
+	return mt76_connac_mcu_sta_cmd(phy, &info);
 }
 
 static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 78498d86efc3..302318e3a964 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -304,7 +304,7 @@ EXPORT_SYMBOL_GPL(mt76_connac_mcu_alloc_wtbl_req);
 void mt76_connac_mcu_sta_basic_tlv(struct sk_buff *skb,
 				   struct ieee80211_vif *vif,
 				   struct ieee80211_sta *sta,
-				   bool enable)
+				   bool enable, bool newly)
 {
 	struct sta_rec_basic *basic;
 	struct tlv *tlv;
@@ -316,7 +316,8 @@ void mt76_connac_mcu_sta_basic_tlv(struct sk_buff *skb,
 	basic->extra_info = cpu_to_le16(EXTRA_INFO_VER);
 
 	if (enable) {
-		basic->extra_info |= cpu_to_le16(EXTRA_INFO_NEW);
+		if (newly)
+			basic->extra_info |= cpu_to_le16(EXTRA_INFO_NEW);
 		basic->conn_state = CONN_STATE_PORT_SECURE;
 	} else {
 		basic->conn_state = CONN_STATE_DISCONNECT;
@@ -709,7 +710,7 @@ mt76_connac_get_phy_mode_v2(struct mt76_phy *mphy, struct ieee80211_vif *vif,
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 			     struct ieee80211_sta *sta,
 			     struct ieee80211_vif *vif,
-			     u8 rcpi)
+			     u8 rcpi, u8 sta_state)
 {
 	struct cfg80211_chan_def *chandef = &mphy->chandef;
 	enum nl80211_band band = chandef->chan->band;
@@ -774,7 +775,7 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 
 	tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_STATE, sizeof(*state));
 	state = (struct sta_rec_state *)tlv;
-	state->state = 2;
+	state->state = sta_state;
 
 	if (sta->vht_cap.vht_supported) {
 		state->vht_opmode = sta->bandwidth;
@@ -866,8 +867,8 @@ void mt76_connac_mcu_wtbl_ht_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_wtbl_ht_tlv);
 
-int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
-				struct mt76_sta_cmd_info *info)
+int mt76_connac_mcu_sta_cmd(struct mt76_phy *phy,
+			    struct mt76_sta_cmd_info *info)
 {
 	struct mt76_vif *mvif = (struct mt76_vif *)info->vif->drv_priv;
 	struct mt76_dev *dev = phy->dev;
@@ -881,10 +882,11 @@ int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
 
 	if (info->sta || !info->offload_fw)
 		mt76_connac_mcu_sta_basic_tlv(skb, info->vif, info->sta,
-					      info->enable);
+					      info->enable, info->newly);
 	if (info->sta && info->enable)
 		mt76_connac_mcu_sta_tlv(phy, skb, info->sta,
-					info->vif, info->rcpi);
+					info->vif, info->rcpi,
+					info->state);
 
 	sta_wtbl = mt76_connac_mcu_add_tlv(skb, STA_REC_WTBL,
 					   sizeof(struct tlv));
@@ -908,7 +910,7 @@ int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
 
 	return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true);
 }
-EXPORT_SYMBOL_GPL(mt76_connac_mcu_add_sta_cmd);
+EXPORT_SYMBOL_GPL(mt76_connac_mcu_sta_cmd);
 
 void mt76_connac_mcu_wtbl_ba_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 				 struct ieee80211_ampdu_params *params,
@@ -1616,6 +1618,26 @@ int mt76_connac_mcu_set_deep_sleep(struct mt76_dev *dev, bool enable)
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_set_deep_sleep);
 
+int mt76_connac_sta_state_dp(struct mt76_dev *dev,
+			     enum ieee80211_sta_state old_state,
+			     enum ieee80211_sta_state new_state)
+{
+	if ((old_state == IEEE80211_STA_ASSOC &&
+	     new_state == IEEE80211_STA_AUTHORIZED) ||
+	    (old_state == IEEE80211_STA_NONE &&
+	     new_state == IEEE80211_STA_NOTEXIST))
+		mt76_connac_mcu_set_deep_sleep(dev, true);
+
+	if ((old_state == IEEE80211_STA_NOTEXIST &&
+	     new_state == IEEE80211_STA_NONE) ||
+	    (old_state == IEEE80211_STA_AUTHORIZED &&
+	     new_state == IEEE80211_STA_ASSOC))
+		mt76_connac_mcu_set_deep_sleep(dev, false);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mt76_connac_sta_state_dp);
+
 void mt76_connac_mcu_coredump_event(struct mt76_dev *dev, struct sk_buff *skb,
 				    struct mt76_connac_coredump *coredump)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 9f3c87902db8..1c73beb22677 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -927,6 +927,12 @@ struct mt76_connac_suspend_tlv {
 	u8 pad[5];
 } __packed;
 
+enum mt76_sta_info_state {
+	MT76_STA_INFO_STATE_NONE,
+	MT76_STA_INFO_STATE_AUTH,
+	MT76_STA_INFO_STATE_ASSOC
+};
+
 struct mt76_sta_cmd_info {
 	struct ieee80211_sta *sta;
 	struct mt76_wcid *wcid;
@@ -935,8 +941,10 @@ struct mt76_sta_cmd_info {
 
 	bool offload_fw;
 	bool enable;
+	bool newly;
 	int cmd;
 	u8 rcpi;
+	u8 state;
 };
 
 #define MT_SKU_POWER_LIMIT	161
@@ -1006,7 +1014,8 @@ int mt76_connac_mcu_set_channel_domain(struct mt76_phy *phy);
 int mt76_connac_mcu_set_vif_ps(struct mt76_dev *dev, struct ieee80211_vif *vif);
 void mt76_connac_mcu_sta_basic_tlv(struct sk_buff *skb,
 				   struct ieee80211_vif *vif,
-				   struct ieee80211_sta *sta, bool enable);
+				   struct ieee80211_sta *sta, bool enable,
+				   bool newly);
 void mt76_connac_mcu_wtbl_generic_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 				      struct ieee80211_vif *vif,
 				      struct ieee80211_sta *sta, void *sta_wtbl,
@@ -1021,7 +1030,7 @@ int mt76_connac_mcu_sta_update_hdr_trans(struct mt76_dev *dev,
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 			     struct ieee80211_sta *sta,
 			     struct ieee80211_vif *vif,
-			     u8 rcpi);
+			     u8 rcpi, u8 state);
 void mt76_connac_mcu_wtbl_ht_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 				 struct ieee80211_sta *sta, void *sta_wtbl,
 				 void *wtbl_tlv);
@@ -1043,8 +1052,8 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy,
 				struct ieee80211_vif *vif,
 				struct mt76_wcid *wcid,
 				bool enable);
-int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
-				struct mt76_sta_cmd_info *info);
+int mt76_connac_mcu_sta_cmd(struct mt76_phy *phy,
+			    struct mt76_sta_cmd_info *info);
 void mt76_connac_mcu_beacon_loss_iter(void *priv, u8 *mac,
 				      struct ieee80211_vif *vif);
 int mt76_connac_mcu_set_rts_thresh(struct mt76_dev *dev, u32 val, u8 band);
@@ -1076,6 +1085,9 @@ int mt76_connac_mcu_update_gtk_rekey(struct ieee80211_hw *hw,
 int mt76_connac_mcu_set_hif_suspend(struct mt76_dev *dev, bool suspend);
 void mt76_connac_mcu_set_suspend_iter(void *priv, u8 *mac,
 				      struct ieee80211_vif *vif);
+int mt76_connac_sta_state_dp(struct mt76_dev *dev,
+			     enum ieee80211_sta_state old_state,
+			     enum ieee80211_sta_state new_state);
 int mt76_connac_mcu_chip_config(struct mt76_dev *dev);
 int mt76_connac_mcu_set_deep_sleep(struct mt76_dev *dev, bool enable);
 void mt76_connac_mcu_coredump_event(struct mt76_dev *dev, struct sk_buff *skb,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index b399f3b8b5d7..aca057c7576b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -231,7 +231,11 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	if (ret)
 		return ret;
 
-	return mt7921_init_debugfs(dev);
+	ret = mt7921_init_debugfs(dev);
+	if (ret)
+		return ret;
+
+	return mt76_connac_mcu_set_deep_sleep(&dev->mt76, dev->pm.enable);
 }
 
 void mt7921_unregister_device(struct mt7921_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 5fc6cf7e5455..07e86bab0348 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -577,7 +577,8 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
 		mt7921_mcu_uni_bss_ps(dev, vif);
 
 	if (changed & BSS_CHANGED_ASSOC) {
-		mt7921_mcu_sta_add(dev, NULL, vif, true);
+		mt7921_mcu_sta_update(dev, NULL, vif, true,
+				      MT76_STA_INFO_STATE_ASSOC);
 		mt7921_bss_bcnft_apply(dev, vif, info->assoc);
 	}
 
@@ -616,17 +617,14 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	if (ret)
 		return ret;
 
-	if (vif->type == NL80211_IFTYPE_STATION) {
+	if (vif->type == NL80211_IFTYPE_STATION)
 		mvif->wep_sta = msta;
-		if (!sta->tdls)
-			mt76_connac_mcu_uni_add_bss(&dev->mphy, vif,
-						    &mvif->sta.wcid, true);
-	}
 
 	mt7921_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
-	ret = mt7921_mcu_sta_add(dev, sta, vif, true);
+	ret = mt7921_mcu_sta_update(dev, sta, vif, true,
+				    MT76_STA_INFO_STATE_NONE);
 	if (ret)
 		return ret;
 
@@ -635,6 +633,27 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	return 0;
 }
 
+void mt7921_mac_sta_assoc(struct mt76_dev *mdev, struct ieee80211_vif *vif,
+			  struct ieee80211_sta *sta)
+{
+	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+	struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
+	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+
+	mt7921_mutex_acquire(dev);
+
+	if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls)
+		mt76_connac_mcu_uni_add_bss(&dev->mphy, vif, &mvif->sta.wcid,
+					    true);
+
+	mt7921_mac_wtbl_update(dev, msta->wcid.idx,
+			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
+
+	mt7921_mcu_sta_update(dev, sta, vif, true, MT76_STA_INFO_STATE_ASSOC);
+
+	mt7921_mutex_release(dev);
+}
+
 void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta)
 {
@@ -644,7 +663,7 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt76_connac_free_pending_tx_skbs(&dev->pm, &msta->wcid);
 	mt76_connac_pm_wake(&dev->mphy, &dev->pm);
 
-	mt7921_mcu_sta_add(dev, sta, vif, false);
+	mt7921_mcu_sta_update(dev, sta, vif, false, MT76_STA_INFO_STATE_NONE);
 	mt7921_mac_wtbl_update(dev, msta->wcid.idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
@@ -790,20 +809,21 @@ mt7921_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	return ret;
 }
 
-static int
-mt7921_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-	       struct ieee80211_sta *sta)
+static int mt7921_sta_state(struct ieee80211_hw *hw,
+			    struct ieee80211_vif *vif,
+			    struct ieee80211_sta *sta,
+			    enum ieee80211_sta_state old_state,
+			    enum ieee80211_sta_state new_state)
 {
-	return mt76_sta_state(hw, vif, sta, IEEE80211_STA_NOTEXIST,
-			      IEEE80211_STA_NONE);
-}
+	struct mt7921_dev *dev = mt7921_hw_dev(hw);
 
-static int
-mt7921_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-		  struct ieee80211_sta *sta)
-{
-	return mt76_sta_state(hw, vif, sta, IEEE80211_STA_NONE,
-			      IEEE80211_STA_NOTEXIST);
+	if (dev->pm.enable) {
+		mt7921_mutex_acquire(dev);
+		mt76_connac_sta_state_dp(&dev->mt76, old_state, new_state);
+		mt7921_mutex_release(dev);
+	}
+
+	return mt76_sta_state(hw, vif, sta, old_state, new_state);
 }
 
 static int
@@ -1149,8 +1169,7 @@ const struct ieee80211_ops mt7921_ops = {
 	.conf_tx = mt7921_conf_tx,
 	.configure_filter = mt7921_configure_filter,
 	.bss_info_changed = mt7921_bss_info_changed,
-	.sta_add = mt7921_sta_add,
-	.sta_remove = mt7921_sta_remove,
+	.sta_state = mt7921_sta_state,
 	.sta_pre_rcu_remove = mt76_sta_pre_rcu_remove,
 	.set_key = mt7921_set_key,
 	.sta_set_decap_offload = mt7921_sta_set_decap_offload,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index ca481e37d22c..23ec0c816d64 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1267,8 +1267,9 @@ int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 				 sizeof(req), false);
 }
 
-int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
-		       struct ieee80211_vif *vif, bool enable)
+int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta,
+			  struct ieee80211_vif *vif, bool enable,
+			  enum mt76_sta_info_state state)
 {
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
 	int rssi = -ewma_rssi_read(&mvif->rssi);
@@ -1277,6 +1278,7 @@ int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
 		.vif = vif,
 		.enable = enable,
 		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
+		.state = state,
 		.offload_fw = true,
 		.rcpi = to_rcpi(rssi),
 	};
@@ -1284,8 +1286,9 @@ int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
 
 	msta = sta ? (struct mt7921_sta *)sta->drv_priv : NULL;
 	info.wcid = msta ? &msta->wcid : &mvif->sta.wcid;
+	info.newly = msta ? state != MT76_STA_INFO_STATE_ASSOC : true;
 
-	return mt76_connac_mcu_add_sta_cmd(&dev->mphy, &info);
+	return mt76_connac_mcu_sta_cmd(&dev->mphy, &info);
 }
 
 int __mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index a6ff704d0023..92cf38444b46 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -261,8 +261,9 @@ int mt7921_mcu_init(struct mt7921_dev *dev);
 int mt7921_mcu_add_key(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 		       struct mt7921_sta *msta, struct ieee80211_key_conf *key,
 		       enum set_key_cmd cmd);
-int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
-		       struct ieee80211_vif *vif, bool enable);
+int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta,
+			  struct ieee80211_vif *vif, bool enable,
+			  enum mt76_sta_info_state state);
 int mt7921_mcu_set_chan_info(struct mt7921_phy *phy, int cmd);
 int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif);
 int mt7921_mcu_set_eeprom(struct mt7921_dev *dev);
@@ -334,6 +335,8 @@ void mt7921_mac_fill_rx_vector(struct mt7921_dev *dev, struct sk_buff *skb);
 void mt7921_mac_tx_free(struct mt7921_dev *dev, struct sk_buff *skb);
 int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta);
+void mt7921_mac_sta_assoc(struct mt76_dev *mdev, struct ieee80211_vif *vif,
+			  struct ieee80211_sta *sta);
 void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta);
 void mt7921_mac_work(struct work_struct *work);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 13263f50dc00..27906b2cd912 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -106,6 +106,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
 		.rx_poll_complete = mt7921_rx_poll_complete,
 		.sta_ps = mt7921_sta_ps,
 		.sta_add = mt7921_mac_sta_add,
+		.sta_assoc = mt7921_mac_sta_assoc,
 		.sta_remove = mt7921_mac_sta_remove,
 		.update_survey = mt7921_update_channel,
 	};

From aa967eb791198f9cf5304493c93f7567dfc8a5ff Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 10 May 2021 23:14:53 +0800
Subject: [PATCH 2296/3292] mt76: mt7921: add deep sleep control to runtime-pm
 knob

Add addtional the deep sleep control to runtime-pm knob to
allow us to control driver switching between the full power
mode and the deep sleep mode the firmware is able to support.

Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7921/debugfs.c    | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 6602903c0d02..c8cba1821cd7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -250,6 +250,9 @@ mt7921_pm_set(void *data, u64 val)
 	ieee80211_iterate_active_interfaces(mphy->hw,
 					    IEEE80211_IFACE_ITER_RESUME_ALL,
 					    mt7921_pm_interface_iter, mphy->priv);
+
+	mt76_connac_mcu_set_deep_sleep(&dev->mt76, !!pm->enable);
+
 	mt7921_mutex_release(dev);
 
 	return 0;
@@ -267,6 +270,20 @@ mt7921_pm_get(void *data, u64 *val)
 
 DEFINE_DEBUGFS_ATTRIBUTE(fops_pm, mt7921_pm_get, mt7921_pm_set, "%lld\n");
 
+static int
+mt7921_deep_sleep_set(void *data, u64 val)
+{
+	struct mt7921_dev *dev = data;
+
+	mt7921_mutex_acquire(dev);
+	mt76_connac_mcu_set_deep_sleep(&dev->mt76, !!val);
+	mt7921_mutex_release(dev);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ds, NULL, mt7921_deep_sleep_set, "%lld\n");
+
 static int
 mt7921_pm_stats(struct seq_file *s, void *data)
 {
@@ -358,6 +375,7 @@ int mt7921_init_debugfs(struct mt7921_dev *dev)
 	debugfs_create_file("chip_reset", 0600, dir, dev, &fops_reset);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "runtime_pm_stats", dir,
 				    mt7921_pm_stats);
+	debugfs_create_file("deep-sleep", 0600, dir, dev, &fops_ds);
 
 	return 0;
 }

From e5bca8c5d2cd3502c15170a57c81a7587a38e957 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 12 Jun 2021 14:43:03 +0200
Subject: [PATCH 2297/3292] mt76: mt7921: improve code readability for
 mt7921_update_txs

Introduce mt7921_update_txs routine in order to improve code readability
for tx timestamp parsing/configuration.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/mac.c   | 47 +++++++------------
 1 file changed, 18 insertions(+), 29 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 1d710f276884..70f9618eee4a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -13,26 +13,6 @@
 #define HE_PREP(f, m, v)	le16_encode_bits(le32_get_bits(v, MT_CRXV_HE_##m),\
 						 IEEE80211_RADIOTAP_HE_##f)
 
-static unsigned long
-mt7921_next_txs_set(struct mt7921_dev *dev, struct mt76_wcid *wcid,
-		    u32 timeout)
-{
-	struct mt7921_sta *msta;
-
-	msta = container_of(wcid, struct mt7921_sta, wcid);
-	msta->next_txs_ts = jiffies + msecs_to_jiffies(timeout);
-	return msta->next_txs_ts;
-}
-
-static bool
-mt7921_next_txs_timeout(struct mt7921_dev *dev, struct mt76_wcid *wcid)
-{
-	struct mt7921_sta *msta;
-
-	msta = container_of(wcid, struct mt7921_sta, wcid);
-	return time_is_before_jiffies(msta->next_txs_ts);
-}
-
 static struct mt76_wcid *mt7921_rx_get_wcid(struct mt7921_dev *dev,
 					    u16 idx, bool unicast)
 {
@@ -739,6 +719,23 @@ mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi,
 	txwi[7] |= cpu_to_le32(val);
 }
 
+static void mt7921_update_txs(struct mt76_wcid *wcid, __le32 *txwi)
+{
+	struct mt7921_sta *msta = container_of(wcid, struct mt7921_sta, wcid);
+	u32 pid, frame_type = FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]);
+
+	if (!(frame_type & (IEEE80211_FTYPE_DATA >> 2)))
+		return;
+
+	if (time_is_after_eq_jiffies(msta->next_txs_ts))
+		return;
+
+	msta->next_txs_ts = jiffies + msecs_to_jiffies(250);
+	pid = mt76_get_next_pkt_id(wcid);
+	txwi[5] |= cpu_to_le32(MT_TXD5_TX_STATUS_MCU |
+			       FIELD_PREP(MT_TXD5_PID, pid));
+}
+
 void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 			   struct sk_buff *skb, struct mt76_wcid *wcid,
 			   struct ieee80211_key_conf *key, bool beacon)
@@ -816,15 +813,7 @@ void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 		txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
 	}
 
-	if ((FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]) &
-		(IEEE80211_FTYPE_DATA >> 2)) &&
-		mt7921_next_txs_timeout(dev, wcid)) {
-		u8 pid = mt76_get_next_pkt_id(wcid);
-
-		mt7921_next_txs_set(dev, wcid, 250);
-		val = MT_TXD5_TX_STATUS_MCU | FIELD_PREP(MT_TXD5_PID, pid);
-		txwi[5] |= cpu_to_le32(val);
-	}
+	mt7921_update_txs(wcid, txwi);
 }
 
 static void

From 8225816d2974204c09228f94c0451bd959575475 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 12 Jun 2021 14:48:48 +0200
Subject: [PATCH 2298/3292] mt76: mt7921: limit txpower according to userlevel
 power

Limit tx power for single-sku according to userlevel power.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt76_connac_mcu.c  | 28 +++++++++++++------
 .../net/wireless/mediatek/mt76/mt7921/main.c  |  3 ++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 302318e3a964..5c3a81e5f559 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1768,12 +1768,15 @@ mt76_connac_mcu_rate_txpower_band(struct mt76_phy *phy,
 		142, 144, 149, 151, 153, 155, 157,
 		159, 161, 165
 	};
+	int i, n_chan, batch_size, idx = 0, tx_power, last_ch;
 	struct mt76_connac_sku_tlv sku_tlbv;
-	int i, n_chan, batch_size, idx = 0;
 	struct mt76_power_limits limits;
 	const u8 *ch_list;
 
 	sku_len = is_mt7921(dev) ? sizeof(sku_tlbv) : sizeof(sku_tlbv) - 92;
+	tx_power = 2 * phy->hw->conf.power_level;
+	if (!tx_power)
+		tx_power = 127;
 
 	if (band == NL80211_BAND_2GHZ) {
 		n_chan = ARRAY_SIZE(chan_list_2ghz);
@@ -1784,39 +1787,48 @@ mt76_connac_mcu_rate_txpower_band(struct mt76_phy *phy,
 	}
 	batch_size = DIV_ROUND_UP(n_chan, batch_len);
 
+	if (!phy->cap.has_5ghz)
+		last_ch = chan_list_2ghz[n_chan - 1];
+	else
+		last_ch = chan_list_5ghz[n_chan - 1];
+
 	for (i = 0; i < batch_size; i++) {
-		bool last_msg = i == batch_size - 1;
-		int num_ch = last_msg ? n_chan % batch_len : batch_len;
 		struct mt76_connac_tx_power_limit_tlv tx_power_tlv = {
 			.band = band == NL80211_BAND_2GHZ ? 1 : 2,
-			.n_chan = num_ch,
-			.last_msg = last_msg,
 		};
+		int j, err, msg_len, num_ch;
 		struct sk_buff *skb;
-		int j, err, msg_len;
 
+		num_ch = i == batch_size - 1 ? n_chan % batch_len : batch_len;
 		msg_len = sizeof(tx_power_tlv) + num_ch * sizeof(sku_tlbv);
 		skb = mt76_mcu_msg_alloc(dev, NULL, msg_len);
 		if (!skb)
 			return -ENOMEM;
 
+		skb_reserve(skb, sizeof(tx_power_tlv));
+
 		BUILD_BUG_ON(sizeof(dev->alpha2) > sizeof(tx_power_tlv.alpha2));
 		memcpy(tx_power_tlv.alpha2, dev->alpha2, sizeof(dev->alpha2));
+		tx_power_tlv.n_chan = num_ch;
 
-		skb_put_data(skb, &tx_power_tlv, sizeof(tx_power_tlv));
 		for (j = 0; j < num_ch; j++, idx++) {
 			struct ieee80211_channel chan = {
 				.hw_value = ch_list[idx],
 				.band = band,
 			};
 
-			mt76_get_rate_power_limits(phy, &chan, &limits, 127);
+			mt76_get_rate_power_limits(phy, &chan, &limits,
+						   tx_power);
 
+			tx_power_tlv.last_msg = ch_list[idx] == last_ch;
 			sku_tlbv.channel = ch_list[idx];
+
 			mt76_connac_mcu_build_sku(dev, sku_tlbv.pwr_limit,
 						  &limits, band);
 			skb_put_data(skb, &sku_tlbv, sku_len);
 		}
+		__skb_push(skb, sizeof(tx_power_tlv));
+		memcpy(skb->data, &tx_power_tlv, sizeof(tx_power_tlv));
 
 		err = mt76_mcu_skb_send_msg(dev, skb,
 					    MCU_CMD_SET_RATE_TX_POWER, false);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 07e86bab0348..6fd5c869bb4d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -432,6 +432,9 @@ static int mt7921_config(struct ieee80211_hw *hw, u32 changed)
 
 	mt7921_mutex_acquire(dev);
 
+	if (changed & IEEE80211_CONF_CHANGE_POWER)
+		mt76_connac_mcu_set_rate_txpower(phy->mt76);
+
 	if (changed & IEEE80211_CONF_CHANGE_MONITOR) {
 		bool enabled = !!(hw->conf.flags & IEEE80211_CONF_MONITOR);
 

From 495cd981afe78b12fee635bfe35897eae427d89e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 12 Jun 2021 16:49:30 +0200
Subject: [PATCH 2299/3292] mt76: mt7921: introduce dedicated control for
 deep_sleep

Introduce ds_enable switch to fully control fw deep_sleep capability

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt76_connac.h  |  1 +
 .../wireless/mediatek/mt76/mt7921/debugfs.c   | 22 ++++++++++++++++---
 .../net/wireless/mediatek/mt76/mt7921/init.c  |  3 ++-
 .../net/wireless/mediatek/mt76/mt7921/main.c  |  2 +-
 .../net/wireless/mediatek/mt76/mt7921/pci.c   | 14 +++++++-----
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 9b3f8d22f17e..93a37ed0c483 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -46,6 +46,7 @@ enum {
 
 struct mt76_connac_pm {
 	bool enable;
+	bool ds_enable;
 	bool suspended;
 
 	spinlock_t txq_lock;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index c8cba1821cd7..77468bdae460 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -251,7 +251,7 @@ mt7921_pm_set(void *data, u64 val)
 					    IEEE80211_IFACE_ITER_RESUME_ALL,
 					    mt7921_pm_interface_iter, mphy->priv);
 
-	mt76_connac_mcu_set_deep_sleep(&dev->mt76, !!pm->enable);
+	mt76_connac_mcu_set_deep_sleep(&dev->mt76, pm->ds_enable);
 
 	mt7921_mutex_release(dev);
 
@@ -274,15 +274,31 @@ static int
 mt7921_deep_sleep_set(void *data, u64 val)
 {
 	struct mt7921_dev *dev = data;
+	struct mt76_connac_pm *pm = &dev->pm;
+	bool enable = !!val;
 
 	mt7921_mutex_acquire(dev);
-	mt76_connac_mcu_set_deep_sleep(&dev->mt76, !!val);
+	if (pm->ds_enable != enable) {
+		mt76_connac_mcu_set_deep_sleep(&dev->mt76, enable);
+		pm->ds_enable = enable;
+	}
 	mt7921_mutex_release(dev);
 
 	return 0;
 }
 
-DEFINE_DEBUGFS_ATTRIBUTE(fops_ds, NULL, mt7921_deep_sleep_set, "%lld\n");
+static int
+mt7921_deep_sleep_get(void *data, u64 *val)
+{
+	struct mt7921_dev *dev = data;
+
+	*val = dev->pm.ds_enable;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ds, mt7921_deep_sleep_get,
+			 mt7921_deep_sleep_set, "%lld\n");
 
 static int
 mt7921_pm_stats(struct seq_file *s, void *data)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index aca057c7576b..9925c15ac9df 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -201,6 +201,7 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	dev->pm.stats.last_wake_event = jiffies;
 	dev->pm.stats.last_doze_event = jiffies;
 	dev->pm.enable = true;
+	dev->pm.ds_enable = true;
 
 	ret = mt7921_init_hardware(dev);
 	if (ret)
@@ -235,7 +236,7 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	if (ret)
 		return ret;
 
-	return mt76_connac_mcu_set_deep_sleep(&dev->mt76, dev->pm.enable);
+	return mt76_connac_mcu_set_deep_sleep(&dev->mt76, dev->pm.ds_enable);
 }
 
 void mt7921_unregister_device(struct mt7921_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 6fd5c869bb4d..0fb152ac4d87 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -820,7 +820,7 @@ static int mt7921_sta_state(struct ieee80211_hw *hw,
 {
 	struct mt7921_dev *dev = mt7921_hw_dev(hw);
 
-	if (dev->pm.enable) {
+	if (dev->pm.ds_enable) {
 		mt7921_mutex_acquire(dev);
 		mt76_connac_sta_state_dp(&dev->mt76, old_state, new_state);
 		mt7921_mutex_release(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 27906b2cd912..c3905bcab360 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -208,8 +208,10 @@ static int mt7921_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 			goto restore_suspend;
 	}
 
-	if (!pm->enable)
-		mt76_connac_mcu_set_deep_sleep(&dev->mt76, true);
+	/* always enable deep sleep during suspend to reduce
+	 * power consumption
+	 */
+	mt76_connac_mcu_set_deep_sleep(&dev->mt76, true);
 
 	napi_disable(&mdev->tx_napi);
 	mt76_worker_disable(&mdev->tx_worker);
@@ -252,7 +254,7 @@ restore_napi:
 	}
 	napi_enable(&mdev->tx_napi);
 
-	if (!pm->enable)
+	if (!pm->ds_enable)
 		mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);
 
 	if (hif_suspend)
@@ -268,9 +270,10 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 {
 	struct mt76_dev *mdev = pci_get_drvdata(pdev);
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+	struct mt76_connac_pm *pm = &dev->pm;
 	int i, err;
 
-	dev->pm.suspended = false;
+	pm->suspended = false;
 	err = pci_set_power_state(pdev, PCI_D0);
 	if (err)
 		return err;
@@ -301,7 +304,8 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 	napi_enable(&mdev->tx_napi);
 	napi_schedule(&mdev->tx_napi);
 
-	if (!dev->pm.enable)
+	/* restore previous ds setting */
+	if (!pm->ds_enable)
 		mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);
 
 	if (!test_bit(MT76_STATE_SUSPEND, &dev->mphy.state))

From 78b0328ff8c46fce64eb969d2572c3f631735dc1 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Wed, 16 Jun 2021 05:31:10 +0800
Subject: [PATCH 2300/3292] mt76: mt7921: fix kernel warning when reset on vif
 is not sta

ieee80211_disconnect is only called for the staton mode.

[  714.050429] WARNING: CPU: 1 PID: 382 at net/mac80211/mlme.c:2787
ieee80211_disconnect+0x108/0x118 [mac80211]
[  714.116704] Hardware name: MediaTek Asurada rev1 board (DT)
[  714.122303] Workqueue: mt76 mt7921_mac_reset_work [mt7921e]
[  714.127877] pstate: 20c00009 (nzCv daif +PAN +UAO)
[  714.132761] pc : ieee80211_disconnect+0x108/0x118 [mac80211]
[  714.138430] lr : mt7921_vif_connect_iter+0x28/0x54 [mt7921e]
[  714.144083] sp : ffffffc0107cbbd0
[  714.147394] x29: ffffffc0107cbbd0 x28: ffffffb26c9cb928
[  714.152706] x27: ffffffb26c9cbd98 x26: 0000000000000000
[  714.158017] x25: 0000000000000003 x24: ffffffb26c9c9c38
[  714.163328] x23: ffffffb26c9c9c38 x22: ffffffb26c9c8860
[  714.168639] x21: ffffffb23b940000 x20: ffffffb26c9c8860
[  714.173950] x19: 0000000000000001 x18: 000000000000b67e
[  714.179261] x17: 00000000064dd409 x16: ffffffd739cb28f0
[  714.184571] x15: 0000000000000000 x14: 0000000000000227
[  714.189881] x13: 0000000000000400 x12: ffffffd73a4eb060
[  714.195191] x11: 0000000000000000 x10: 0000000000000000
[  714.200502] x9 : ffffffd703a0a000 x8 : 0000000000000006
[  714.205812] x7 : 2828282828282828 x6 : ffffffb200440396
[  714.211122] x5 : 0000000000000000 x4 : 0000000000000004
[  714.216432] x3 : 0000000000000000 x2 : ffffffb23b940c90
[  714.221743] x1 : 0000000000000001 x0 : ffffffb23b940c90
[  714.227054] Call trace:
[  714.229594]  ieee80211_disconnect+0x108/0x118 [mac80211]
[  714.234913]  mt7921_vif_connect_iter+0x28/0x54 [mt7921e]
[  714.240313]  __iterate_interfaces+0xc4/0xdc [mac80211]
[  714.245541]  ieee80211_iterate_interfaces+0x4c/0x68 [mac80211]
[  714.251381]  mt7921_mac_reset_work+0x410/0x468 [mt7921e]
[  714.256696]  process_one_work+0x208/0x3c8
[  714.260706]  worker_thread+0x23c/0x3e8
[  714.264456]  kthread+0x140/0x17c
[  714.267685]  ret_from_fork+0x10/0x18

Fixes: 0c1ce9884607 ("mt76: mt7921: add wifi reset support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 70f9618eee4a..740773da2193 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1258,7 +1258,8 @@ mt7921_vif_connect_iter(void *priv, u8 *mac,
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
 	struct mt7921_dev *dev = mvif->phy->dev;
 
-	ieee80211_disconnect(vif, true);
+	if (vif->type == NL80211_IFTYPE_STATION)
+		ieee80211_disconnect(vif, true);
 
 	mt76_connac_mcu_uni_add_dev(&dev->mphy, vif, &mvif->sta.wcid, true);
 	mt7921_mcu_set_tx(dev, vif);

From 723885a6750102e5d807429b3d06aa6b0d29cc66 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 17 Jun 2021 09:39:19 +0800
Subject: [PATCH 2301/3292] mt76: mt7921: fix the coredump is being truncated

Fix the maximum size of the coredump generated with current mt7921
firmware. Otherwise, a truncated coredump would be reported to userland
via dev_coredumpv.

Also, there is an additional error handling enhanced in the patch to avoid
the possible invalid buffer access when the system failed to create the
buffer to hold the coredump.

Fixes: 0da3c795d07b ("mt76: mt7921: add coredump support")
Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac.h | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c  | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 93a37ed0c483..f49d97d0a1c5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -13,7 +13,7 @@
 #define MT76_CONNAC_MAX_SCAN_MATCH		16
 
 #define MT76_CONNAC_COREDUMP_TIMEOUT		(HZ / 20)
-#define MT76_CONNAC_COREDUMP_SZ			(128 * 1024)
+#define MT76_CONNAC_COREDUMP_SZ			(1300 * 1024)
 
 enum {
 	CMD_CBW_20MHZ = IEEE80211_STA_RX_BW_20,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 740773da2193..b8e64c58b9ed 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1548,7 +1548,7 @@ void mt7921_coredump_work(struct work_struct *work)
 			break;
 
 		skb_pull(skb, sizeof(struct mt7921_mcu_rxd));
-		if (data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ) {
+		if (!dump || data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ) {
 			dev_kfree_skb(skb);
 			continue;
 		}
@@ -1558,7 +1558,10 @@ void mt7921_coredump_work(struct work_struct *work)
 
 		dev_kfree_skb(skb);
 	}
-	dev_coredumpv(dev->mt76.dev, dump, MT76_CONNAC_COREDUMP_SZ,
-		      GFP_KERNEL);
+
+	if (dump)
+		dev_coredumpv(dev->mt76.dev, dump, MT76_CONNAC_COREDUMP_SZ,
+			      GFP_KERNEL);
+
 	mt7921_reset(&dev->mt76);
 }

From c368362c36d3d4cedbc9a1c9caa95960912cc429 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 17 Jun 2021 15:17:49 +0800
Subject: [PATCH 2302/3292] mt76: fix iv and CCMP header insertion

The iv from RXD is only for TKIP_RSC/CCMP_PN/GCMP_PN, and it needs a
check for CCMP header insertion. Move mt76_cipher_type to mt76.h to
reduce duplicated code.

Signed-off-by: Xing Song <xing.song@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h     | 16 +++++
 .../net/wireless/mediatek/mt76/mt7603/mac.c   | 33 +++++++---
 .../net/wireless/mediatek/mt76/mt7603/regs.h  | 12 ----
 .../net/wireless/mediatek/mt76/mt7615/mac.c   | 64 +++++++++++++++----
 .../net/wireless/mediatek/mt76/mt7615/mac.h   | 42 ------------
 .../net/wireless/mediatek/mt76/mt76x02_mac.c  | 28 ++++----
 .../net/wireless/mediatek/mt76/mt76x02_regs.h | 18 +++---
 .../net/wireless/mediatek/mt76/mt7915/mac.c   | 29 ++++++---
 .../net/wireless/mediatek/mt76/mt7915/mcu.c   | 30 ++++-----
 .../net/wireless/mediatek/mt76/mt7915/mcu.h   | 23 ++++---
 .../net/wireless/mediatek/mt76/mt7921/mac.c   | 29 ++++++---
 .../net/wireless/mediatek/mt76/mt7921/mcu.c   | 30 ++++-----
 .../net/wireless/mediatek/mt76/mt7921/mcu.h   | 23 ++++---
 13 files changed, 208 insertions(+), 169 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 022e9c573ddd..25c5ceef5257 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -87,6 +87,22 @@ enum mt76_rxq_id {
 	__MT_RXQ_MAX
 };
 
+enum mt76_cipher_type {
+	MT_CIPHER_NONE,
+	MT_CIPHER_WEP40,
+	MT_CIPHER_TKIP,
+	MT_CIPHER_TKIP_NO_MIC,
+	MT_CIPHER_AES_CCMP,
+	MT_CIPHER_WEP104,
+	MT_CIPHER_BIP_CMAC_128,
+	MT_CIPHER_WEP128,
+	MT_CIPHER_WAPI,
+	MT_CIPHER_CCMP_CCX,
+	MT_CIPHER_CCMP_256,
+	MT_CIPHER_GCMP,
+	MT_CIPHER_GCMP_256,
+};
+
 struct mt76_queue_buf {
 	dma_addr_t addr;
 	u16 len;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 8435e9597688..3972c56136a2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -550,14 +550,27 @@ mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb)
 		u8 *data = (u8 *)rxd;
 
 		if (status->flag & RX_FLAG_DECRYPTED) {
-			status->iv[0] = data[5];
-			status->iv[1] = data[4];
-			status->iv[2] = data[3];
-			status->iv[3] = data[2];
-			status->iv[4] = data[1];
-			status->iv[5] = data[0];
-
-			insert_ccmp_hdr = FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+			switch (FIELD_GET(MT_RXD2_NORMAL_SEC_MODE, rxd2)) {
+			case MT_CIPHER_AES_CCMP:
+			case MT_CIPHER_CCMP_CCX:
+			case MT_CIPHER_CCMP_256:
+				insert_ccmp_hdr =
+					FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+				fallthrough;
+			case MT_CIPHER_TKIP:
+			case MT_CIPHER_TKIP_NO_MIC:
+			case MT_CIPHER_GCMP:
+			case MT_CIPHER_GCMP_256:
+				status->iv[0] = data[5];
+				status->iv[1] = data[4];
+				status->iv[2] = data[3];
+				status->iv[3] = data[2];
+				status->iv[4] = data[1];
+				status->iv[5] = data[0];
+				break;
+			default:
+				break;
+			}
 		}
 
 		rxd += 4;
@@ -831,7 +844,7 @@ void mt7603_wtbl_set_rates(struct mt7603_dev *dev, struct mt7603_sta *sta,
 	sta->wcid.tx_info |= MT_WCID_TX_INFO_SET;
 }
 
-static enum mt7603_cipher_type
+static enum mt76_cipher_type
 mt7603_mac_get_key_info(struct ieee80211_key_conf *key, u8 *key_data)
 {
 	memset(key_data, 0, 32);
@@ -863,7 +876,7 @@ mt7603_mac_get_key_info(struct ieee80211_key_conf *key, u8 *key_data)
 int mt7603_wtbl_set_key(struct mt7603_dev *dev, int wcid,
 			struct ieee80211_key_conf *key)
 {
-	enum mt7603_cipher_type cipher;
+	enum mt76_cipher_type cipher;
 	u32 addr = mt7603_wtbl3_addr(wcid);
 	u8 key_data[32];
 	int key_len = sizeof(key_data);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/regs.h b/drivers/net/wireless/mediatek/mt76/mt7603/regs.h
index 6741e6907194..3b901090b29c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/regs.h
@@ -765,16 +765,4 @@ enum {
 #define MT_WTBL1_OR			(MT_WTBL1_BASE + 0x2300)
 #define MT_WTBL1_OR_PSM_WRITE		BIT(31)
 
-enum mt7603_cipher_type {
-	MT_CIPHER_NONE,
-	MT_CIPHER_WEP40,
-	MT_CIPHER_TKIP,
-	MT_CIPHER_TKIP_NO_MIC,
-	MT_CIPHER_AES_CCMP,
-	MT_CIPHER_WEP104,
-	MT_CIPHER_BIP_CMAC_128,
-	MT_CIPHER_WEP128,
-	MT_CIPHER_WAPI,
-};
-
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index a057859aa050..f41fbb641e87 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -57,6 +57,33 @@ static const struct mt7615_dfs_radar_spec jp_radar_specs = {
 	},
 };
 
+static enum mt76_cipher_type
+mt7615_mac_get_cipher(int cipher)
+{
+	switch (cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		return MT_CIPHER_WEP40;
+	case WLAN_CIPHER_SUITE_WEP104:
+		return MT_CIPHER_WEP104;
+	case WLAN_CIPHER_SUITE_TKIP:
+		return MT_CIPHER_TKIP;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		return MT_CIPHER_BIP_CMAC_128;
+	case WLAN_CIPHER_SUITE_CCMP:
+		return MT_CIPHER_AES_CCMP;
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		return MT_CIPHER_CCMP_256;
+	case WLAN_CIPHER_SUITE_GCMP:
+		return MT_CIPHER_GCMP;
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		return MT_CIPHER_GCMP_256;
+	case WLAN_CIPHER_SUITE_SMS4:
+		return MT_CIPHER_WAPI;
+	default:
+		return MT_CIPHER_NONE;
+	}
+}
+
 static struct mt76_wcid *mt7615_rx_get_wcid(struct mt7615_dev *dev,
 					    u8 idx, bool unicast)
 {
@@ -313,14 +340,27 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 		u8 *data = (u8 *)rxd;
 
 		if (status->flag & RX_FLAG_DECRYPTED) {
-			status->iv[0] = data[5];
-			status->iv[1] = data[4];
-			status->iv[2] = data[3];
-			status->iv[3] = data[2];
-			status->iv[4] = data[1];
-			status->iv[5] = data[0];
-
-			insert_ccmp_hdr = FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+			switch (FIELD_GET(MT_RXD2_NORMAL_SEC_MODE, rxd2)) {
+			case MT_CIPHER_AES_CCMP:
+			case MT_CIPHER_CCMP_CCX:
+			case MT_CIPHER_CCMP_256:
+				insert_ccmp_hdr =
+					FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+				fallthrough;
+			case MT_CIPHER_TKIP:
+			case MT_CIPHER_TKIP_NO_MIC:
+			case MT_CIPHER_GCMP:
+			case MT_CIPHER_GCMP_256:
+				status->iv[0] = data[5];
+				status->iv[1] = data[4];
+				status->iv[2] = data[3];
+				status->iv[3] = data[2];
+				status->iv[4] = data[1];
+				status->iv[5] = data[0];
+				break;
+			default:
+				break;
+			}
 		}
 		rxd += 4;
 		if ((u8 *)rxd - skb->data >= skb->len)
@@ -1078,7 +1118,7 @@ EXPORT_SYMBOL_GPL(mt7615_mac_set_rates);
 static int
 mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			   struct ieee80211_key_conf *key,
-			   enum mt7615_cipher_type cipher, u16 cipher_mask,
+			   enum mt76_cipher_type cipher, u16 cipher_mask,
 			   enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx) + 30 * 4;
@@ -1118,7 +1158,7 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 static int
 mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			  enum mt7615_cipher_type cipher, u16 cipher_mask,
+			  enum mt76_cipher_type cipher, u16 cipher_mask,
 			  int keyidx, enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx), w0, w1;
@@ -1157,7 +1197,7 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 static void
 mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			      enum mt7615_cipher_type cipher, u16 cipher_mask,
+			      enum mt76_cipher_type cipher, u16 cipher_mask,
 			      enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx);
@@ -1183,7 +1223,7 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 			      struct ieee80211_key_conf *key,
 			      enum set_key_cmd cmd)
 {
-	enum mt7615_cipher_type cipher;
+	enum mt76_cipher_type cipher;
 	u16 cipher_mask = wcid->cipher;
 	int err;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index 6bf9da040196..46f283eb8d0f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -383,48 +383,6 @@ struct mt7615_dfs_radar_spec {
 	struct mt7615_dfs_pattern radar_pattern[16];
 };
 
-enum mt7615_cipher_type {
-	MT_CIPHER_NONE,
-	MT_CIPHER_WEP40,
-	MT_CIPHER_TKIP,
-	MT_CIPHER_TKIP_NO_MIC,
-	MT_CIPHER_AES_CCMP,
-	MT_CIPHER_WEP104,
-	MT_CIPHER_BIP_CMAC_128,
-	MT_CIPHER_WEP128,
-	MT_CIPHER_WAPI,
-	MT_CIPHER_CCMP_256 = 10,
-	MT_CIPHER_GCMP,
-	MT_CIPHER_GCMP_256,
-};
-
-static inline enum mt7615_cipher_type
-mt7615_mac_get_cipher(int cipher)
-{
-	switch (cipher) {
-	case WLAN_CIPHER_SUITE_WEP40:
-		return MT_CIPHER_WEP40;
-	case WLAN_CIPHER_SUITE_WEP104:
-		return MT_CIPHER_WEP104;
-	case WLAN_CIPHER_SUITE_TKIP:
-		return MT_CIPHER_TKIP;
-	case WLAN_CIPHER_SUITE_AES_CMAC:
-		return MT_CIPHER_BIP_CMAC_128;
-	case WLAN_CIPHER_SUITE_CCMP:
-		return MT_CIPHER_AES_CCMP;
-	case WLAN_CIPHER_SUITE_CCMP_256:
-		return MT_CIPHER_CCMP_256;
-	case WLAN_CIPHER_SUITE_GCMP:
-		return MT_CIPHER_GCMP;
-	case WLAN_CIPHER_SUITE_GCMP_256:
-		return MT_CIPHER_GCMP_256;
-	case WLAN_CIPHER_SUITE_SMS4:
-		return MT_CIPHER_WAPI;
-	default:
-		return MT_CIPHER_NONE;
-	}
-}
-
 static inline struct mt7615_txp_common *
 mt7615_txwi_to_txp(struct mt76_dev *dev, struct mt76_txwi_cache *t)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 7572c793aa51..c32e6dc68773 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -34,24 +34,24 @@ mt76x02_mac_get_key_info(struct ieee80211_key_conf *key, u8 *key_data)
 {
 	memset(key_data, 0, 32);
 	if (!key)
-		return MT_CIPHER_NONE;
+		return MT76X02_CIPHER_NONE;
 
 	if (key->keylen > 32)
-		return MT_CIPHER_NONE;
+		return MT76X02_CIPHER_NONE;
 
 	memcpy(key_data, key->key, key->keylen);
 
 	switch (key->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
-		return MT_CIPHER_WEP40;
+		return MT76X02_CIPHER_WEP40;
 	case WLAN_CIPHER_SUITE_WEP104:
-		return MT_CIPHER_WEP104;
+		return MT76X02_CIPHER_WEP104;
 	case WLAN_CIPHER_SUITE_TKIP:
-		return MT_CIPHER_TKIP;
+		return MT76X02_CIPHER_TKIP;
 	case WLAN_CIPHER_SUITE_CCMP:
-		return MT_CIPHER_AES_CCMP;
+		return MT76X02_CIPHER_AES_CCMP;
 	default:
-		return MT_CIPHER_NONE;
+		return MT76X02_CIPHER_NONE;
 	}
 }
 
@@ -63,7 +63,7 @@ int mt76x02_mac_shared_key_setup(struct mt76x02_dev *dev, u8 vif_idx,
 	u32 val;
 
 	cipher = mt76x02_mac_get_key_info(key, key_data);
-	if (cipher == MT_CIPHER_NONE && key)
+	if (cipher == MT76X02_CIPHER_NONE && key)
 		return -EOPNOTSUPP;
 
 	val = mt76_rr(dev, MT_SKEY_MODE(vif_idx));
@@ -91,10 +91,10 @@ void mt76x02_mac_wcid_sync_pn(struct mt76x02_dev *dev, u8 idx,
 	eiv = mt76_rr(dev, MT_WCID_IV(idx) + 4);
 
 	pn = (u64)eiv << 16;
-	if (cipher == MT_CIPHER_TKIP) {
+	if (cipher == MT76X02_CIPHER_TKIP) {
 		pn |= (iv >> 16) & 0xff;
 		pn |= (iv & 0xff) << 8;
-	} else if (cipher >= MT_CIPHER_AES_CCMP) {
+	} else if (cipher >= MT76X02_CIPHER_AES_CCMP) {
 		pn |= iv & 0xffff;
 	} else {
 		return;
@@ -112,7 +112,7 @@ int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx,
 	u64 pn;
 
 	cipher = mt76x02_mac_get_key_info(key, key_data);
-	if (cipher == MT_CIPHER_NONE && key)
+	if (cipher == MT76X02_CIPHER_NONE && key)
 		return -EOPNOTSUPP;
 
 	mt76_wr_copy(dev, MT_WCID_KEY(idx), key_data, sizeof(key_data));
@@ -126,16 +126,16 @@ int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx,
 		pn = atomic64_read(&key->tx_pn);
 
 		iv_data[3] = key->keyidx << 6;
-		if (cipher >= MT_CIPHER_TKIP) {
+		if (cipher >= MT76X02_CIPHER_TKIP) {
 			iv_data[3] |= 0x20;
 			put_unaligned_le32(pn >> 16, &iv_data[4]);
 		}
 
-		if (cipher == MT_CIPHER_TKIP) {
+		if (cipher == MT76X02_CIPHER_TKIP) {
 			iv_data[0] = (pn >> 8) & 0xff;
 			iv_data[1] = (iv_data[0] | 0x20) & 0x7f;
 			iv_data[2] = pn & 0xff;
-		} else if (cipher >= MT_CIPHER_AES_CCMP) {
+		} else if (cipher >= MT76X02_CIPHER_AES_CCMP) {
 			put_unaligned_le16((pn & 0xffff), &iv_data[0]);
 		}
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h
index 3e722276b5c2..fa7872ac22bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_regs.h
@@ -692,15 +692,15 @@ struct mt76_wcid_key {
 } __packed __aligned(4);
 
 enum mt76x02_cipher_type {
-	MT_CIPHER_NONE,
-	MT_CIPHER_WEP40,
-	MT_CIPHER_WEP104,
-	MT_CIPHER_TKIP,
-	MT_CIPHER_AES_CCMP,
-	MT_CIPHER_CKIP40,
-	MT_CIPHER_CKIP104,
-	MT_CIPHER_CKIP128,
-	MT_CIPHER_WAPI,
+	MT76X02_CIPHER_NONE,
+	MT76X02_CIPHER_WEP40,
+	MT76X02_CIPHER_WEP104,
+	MT76X02_CIPHER_TKIP,
+	MT76X02_CIPHER_AES_CCMP,
+	MT76X02_CIPHER_CKIP40,
+	MT76X02_CIPHER_CKIP104,
+	MT76X02_CIPHER_CKIP128,
+	MT76X02_CIPHER_WAPI,
 };
 
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index c093c13bf1f1..2462704094b0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -413,14 +413,27 @@ mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 		u8 *data = (u8 *)rxd;
 
 		if (status->flag & RX_FLAG_DECRYPTED) {
-			status->iv[0] = data[5];
-			status->iv[1] = data[4];
-			status->iv[2] = data[3];
-			status->iv[3] = data[2];
-			status->iv[4] = data[1];
-			status->iv[5] = data[0];
-
-			insert_ccmp_hdr = FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+			switch (FIELD_GET(MT_RXD1_NORMAL_SEC_MODE, rxd1)) {
+			case MT_CIPHER_AES_CCMP:
+			case MT_CIPHER_CCMP_CCX:
+			case MT_CIPHER_CCMP_256:
+				insert_ccmp_hdr =
+					FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+				fallthrough;
+			case MT_CIPHER_TKIP:
+			case MT_CIPHER_TKIP_NO_MIC:
+			case MT_CIPHER_GCMP:
+			case MT_CIPHER_GCMP_256:
+				status->iv[0] = data[5];
+				status->iv[1] = data[4];
+				status->iv[2] = data[3];
+				status->iv[3] = data[2];
+				status->iv[4] = data[1];
+				status->iv[5] = data[0];
+				break;
+			default:
+				break;
+			}
 		}
 		rxd += 4;
 		if ((u8 *)rxd - skb->data >= skb->len)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index b565024404cf..863aa18b3024 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -88,28 +88,28 @@ struct mt7915_fw_region {
 #define HE_PHY(p, c)			u8_get_bits(c, IEEE80211_HE_PHY_##p)
 #define HE_MAC(m, c)			u8_get_bits(c, IEEE80211_HE_MAC_##m)
 
-static enum mt7915_cipher_type
+static enum mcu_cipher_type
 mt7915_mcu_get_cipher(int cipher)
 {
 	switch (cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
-		return MT_CIPHER_WEP40;
+		return MCU_CIPHER_WEP40;
 	case WLAN_CIPHER_SUITE_WEP104:
-		return MT_CIPHER_WEP104;
+		return MCU_CIPHER_WEP104;
 	case WLAN_CIPHER_SUITE_TKIP:
-		return MT_CIPHER_TKIP;
+		return MCU_CIPHER_TKIP;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
-		return MT_CIPHER_BIP_CMAC_128;
+		return MCU_CIPHER_BIP_CMAC_128;
 	case WLAN_CIPHER_SUITE_CCMP:
-		return MT_CIPHER_AES_CCMP;
+		return MCU_CIPHER_AES_CCMP;
 	case WLAN_CIPHER_SUITE_CCMP_256:
-		return MT_CIPHER_CCMP_256;
+		return MCU_CIPHER_CCMP_256;
 	case WLAN_CIPHER_SUITE_GCMP:
-		return MT_CIPHER_GCMP;
+		return MCU_CIPHER_GCMP;
 	case WLAN_CIPHER_SUITE_GCMP_256:
-		return MT_CIPHER_GCMP_256;
+		return MCU_CIPHER_GCMP_256;
 	case WLAN_CIPHER_SUITE_SMS4:
-		return MT_CIPHER_WAPI;
+		return MCU_CIPHER_WAPI;
 	default:
 		return MT_CIPHER_NONE;
 	}
@@ -1207,14 +1207,14 @@ mt7915_mcu_sta_key_tlv(struct mt7915_sta *msta, struct sk_buff *skb,
 		sec_key = &sec->key[0];
 		sec_key->cipher_len = sizeof(*sec_key);
 
-		if (cipher == MT_CIPHER_BIP_CMAC_128) {
-			sec_key->cipher_id = MT_CIPHER_AES_CCMP;
+		if (cipher == MCU_CIPHER_BIP_CMAC_128) {
+			sec_key->cipher_id = MCU_CIPHER_AES_CCMP;
 			sec_key->key_id = bip->keyidx;
 			sec_key->key_len = 16;
 			memcpy(sec_key->key, bip->key, 16);
 
 			sec_key = &sec->key[1];
-			sec_key->cipher_id = MT_CIPHER_BIP_CMAC_128;
+			sec_key->cipher_id = MCU_CIPHER_BIP_CMAC_128;
 			sec_key->cipher_len = sizeof(*sec_key);
 			sec_key->key_len = 16;
 			memcpy(sec_key->key, key->key, 16);
@@ -1226,14 +1226,14 @@ mt7915_mcu_sta_key_tlv(struct mt7915_sta *msta, struct sk_buff *skb,
 			sec_key->key_len = key->keylen;
 			memcpy(sec_key->key, key->key, key->keylen);
 
-			if (cipher == MT_CIPHER_TKIP) {
+			if (cipher == MCU_CIPHER_TKIP) {
 				/* Rx/Tx MIC keys are swapped */
 				memcpy(sec_key->key + 16, key->key + 24, 8);
 				memcpy(sec_key->key + 24, key->key + 16, 8);
 			}
 
 			/* store key_conf for BIP batch update */
-			if (cipher == MT_CIPHER_AES_CCMP) {
+			if (cipher == MCU_CIPHER_AES_CCMP) {
 				memcpy(bip->key, key->key, key->keylen);
 				bip->keyidx = key->keyidx;
 			}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 9087a7771c35..edd3ba3a0c2d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -1072,18 +1072,17 @@ enum {
 	STA_REC_MAX_NUM
 };
 
-enum mt7915_cipher_type {
-	MT_CIPHER_NONE,
-	MT_CIPHER_WEP40,
-	MT_CIPHER_WEP104,
-	MT_CIPHER_WEP128,
-	MT_CIPHER_TKIP,
-	MT_CIPHER_AES_CCMP,
-	MT_CIPHER_CCMP_256,
-	MT_CIPHER_GCMP,
-	MT_CIPHER_GCMP_256,
-	MT_CIPHER_WAPI,
-	MT_CIPHER_BIP_CMAC_128,
+enum mcu_cipher_type {
+	MCU_CIPHER_WEP40 = 1,
+	MCU_CIPHER_WEP104,
+	MCU_CIPHER_WEP128,
+	MCU_CIPHER_TKIP,
+	MCU_CIPHER_AES_CCMP,
+	MCU_CIPHER_CCMP_256,
+	MCU_CIPHER_GCMP,
+	MCU_CIPHER_GCMP_256,
+	MCU_CIPHER_WAPI,
+	MCU_CIPHER_BIP_CMAC_128,
 };
 
 enum {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index b8e64c58b9ed..143dae97ef77 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -403,14 +403,27 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 		u8 *data = (u8 *)rxd;
 
 		if (status->flag & RX_FLAG_DECRYPTED) {
-			status->iv[0] = data[5];
-			status->iv[1] = data[4];
-			status->iv[2] = data[3];
-			status->iv[3] = data[2];
-			status->iv[4] = data[1];
-			status->iv[5] = data[0];
-
-			insert_ccmp_hdr = FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+			switch (FIELD_GET(MT_RXD1_NORMAL_SEC_MODE, rxd1)) {
+			case MT_CIPHER_AES_CCMP:
+			case MT_CIPHER_CCMP_CCX:
+			case MT_CIPHER_CCMP_256:
+				insert_ccmp_hdr =
+					FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+				fallthrough;
+			case MT_CIPHER_TKIP:
+			case MT_CIPHER_TKIP_NO_MIC:
+			case MT_CIPHER_GCMP:
+			case MT_CIPHER_GCMP_256:
+				status->iv[0] = data[5];
+				status->iv[1] = data[4];
+				status->iv[2] = data[3];
+				status->iv[3] = data[2];
+				status->iv[4] = data[1];
+				status->iv[5] = data[0];
+				break;
+			default:
+				break;
+			}
 		}
 		rxd += 4;
 		if ((u8 *)rxd - skb->data >= skb->len)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 23ec0c816d64..c2c4dc196802 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -88,28 +88,28 @@ struct mt7921_fw_region {
 #define to_wcid_lo(id)			FIELD_GET(GENMASK(7, 0), (u16)id)
 #define to_wcid_hi(id)			FIELD_GET(GENMASK(9, 8), (u16)id)
 
-static enum mt7921_cipher_type
+static enum mcu_cipher_type
 mt7921_mcu_get_cipher(int cipher)
 {
 	switch (cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
-		return MT_CIPHER_WEP40;
+		return MCU_CIPHER_WEP40;
 	case WLAN_CIPHER_SUITE_WEP104:
-		return MT_CIPHER_WEP104;
+		return MCU_CIPHER_WEP104;
 	case WLAN_CIPHER_SUITE_TKIP:
-		return MT_CIPHER_TKIP;
+		return MCU_CIPHER_TKIP;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
-		return MT_CIPHER_BIP_CMAC_128;
+		return MCU_CIPHER_BIP_CMAC_128;
 	case WLAN_CIPHER_SUITE_CCMP:
-		return MT_CIPHER_AES_CCMP;
+		return MCU_CIPHER_AES_CCMP;
 	case WLAN_CIPHER_SUITE_CCMP_256:
-		return MT_CIPHER_CCMP_256;
+		return MCU_CIPHER_CCMP_256;
 	case WLAN_CIPHER_SUITE_GCMP:
-		return MT_CIPHER_GCMP;
+		return MCU_CIPHER_GCMP;
 	case WLAN_CIPHER_SUITE_GCMP_256:
-		return MT_CIPHER_GCMP_256;
+		return MCU_CIPHER_GCMP_256;
 	case WLAN_CIPHER_SUITE_SMS4:
-		return MT_CIPHER_WAPI;
+		return MCU_CIPHER_WAPI;
 	default:
 		return MT_CIPHER_NONE;
 	}
@@ -625,14 +625,14 @@ mt7921_mcu_sta_key_tlv(struct mt7921_sta *msta, struct sk_buff *skb,
 		sec_key = &sec->key[0];
 		sec_key->cipher_len = sizeof(*sec_key);
 
-		if (cipher == MT_CIPHER_BIP_CMAC_128) {
-			sec_key->cipher_id = MT_CIPHER_AES_CCMP;
+		if (cipher == MCU_CIPHER_BIP_CMAC_128) {
+			sec_key->cipher_id = MCU_CIPHER_AES_CCMP;
 			sec_key->key_id = bip->keyidx;
 			sec_key->key_len = 16;
 			memcpy(sec_key->key, bip->key, 16);
 
 			sec_key = &sec->key[1];
-			sec_key->cipher_id = MT_CIPHER_BIP_CMAC_128;
+			sec_key->cipher_id = MCU_CIPHER_BIP_CMAC_128;
 			sec_key->cipher_len = sizeof(*sec_key);
 			sec_key->key_len = 16;
 			memcpy(sec_key->key, key->key, 16);
@@ -644,14 +644,14 @@ mt7921_mcu_sta_key_tlv(struct mt7921_sta *msta, struct sk_buff *skb,
 			sec_key->key_len = key->keylen;
 			memcpy(sec_key->key, key->key, key->keylen);
 
-			if (cipher == MT_CIPHER_TKIP) {
+			if (cipher == MCU_CIPHER_TKIP) {
 				/* Rx/Tx MIC keys are swapped */
 				memcpy(sec_key->key + 16, key->key + 24, 8);
 				memcpy(sec_key->key + 24, key->key + 16, 8);
 			}
 
 			/* store key_conf for BIP batch update */
-			if (cipher == MT_CIPHER_AES_CCMP) {
+			if (cipher == MCU_CIPHER_AES_CCMP) {
 				memcpy(bip->key, key->key, key->keylen);
 				bip->keyidx = key->keyidx;
 			}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index 89fed2f71161..d76cf8f8dfdf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -198,18 +198,17 @@ struct sta_rec_sec {
 	struct sec_key key[2];
 } __packed;
 
-enum mt7921_cipher_type {
-	MT_CIPHER_NONE,
-	MT_CIPHER_WEP40,
-	MT_CIPHER_WEP104,
-	MT_CIPHER_WEP128,
-	MT_CIPHER_TKIP,
-	MT_CIPHER_AES_CCMP,
-	MT_CIPHER_CCMP_256,
-	MT_CIPHER_GCMP,
-	MT_CIPHER_GCMP_256,
-	MT_CIPHER_WAPI,
-	MT_CIPHER_BIP_CMAC_128,
+enum mcu_cipher_type {
+	MCU_CIPHER_WEP40 = 1,
+	MCU_CIPHER_WEP104,
+	MCU_CIPHER_WEP128,
+	MCU_CIPHER_TKIP,
+	MCU_CIPHER_AES_CCMP,
+	MCU_CIPHER_CCMP_256,
+	MCU_CIPHER_GCMP,
+	MCU_CIPHER_GCMP_256,
+	MCU_CIPHER_WAPI,
+	MCU_CIPHER_BIP_CMAC_128,
 };
 
 enum {

From 5512c974437c6b9d2be9c9e723b7dfc21c32a5f8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 17 Jun 2021 13:02:09 +0200
Subject: [PATCH 2303/3292] mt76: disable TWT capabilities for the moment

Disable TWT REQ/RES mac capabilities since TWT is not supported
yet in mt7915/mt7921.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 4 ----
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 --
 2 files changed, 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 7af1cdbbfebd..4798d6344305 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -667,8 +667,6 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
 
 		switch (i) {
 		case NL80211_IFTYPE_AP:
-			he_cap_elem->mac_cap_info[0] |=
-				IEEE80211_HE_MAC_CAP0_TWT_RES;
 			he_cap_elem->mac_cap_info[2] |=
 				IEEE80211_HE_MAC_CAP2_BSR;
 			he_cap_elem->mac_cap_info[4] |=
@@ -682,8 +680,6 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
 				IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT;
 			break;
 		case NL80211_IFTYPE_STATION:
-			he_cap_elem->mac_cap_info[0] |=
-				IEEE80211_HE_MAC_CAP0_TWT_REQ;
 			he_cap_elem->mac_cap_info[1] |=
 				IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 0fb152ac4d87..45aefa3132ae 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -84,8 +84,6 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 
 		switch (i) {
 		case NL80211_IFTYPE_STATION:
-			he_cap_elem->mac_cap_info[0] |=
-				IEEE80211_HE_MAC_CAP0_TWT_REQ;
 			he_cap_elem->mac_cap_info[1] |=
 				IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US;
 

From 1153668eefca6860dba3a6e94f24bf7146d62d50 Mon Sep 17 00:00:00 2001
From: Deren Wu <deren.wu@mediatek.com>
Date: Thu, 17 Jun 2021 22:38:25 +0800
Subject: [PATCH 2304/3292] mt76: mt7921: enable HE BFee capability

Enables HE MU/SU beamformee functionality

Signed-off-by: Eric-SY Chang <Eric-SY.Chang@mediatek.com>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 45aefa3132ae..7fd21049ff5a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -79,8 +79,11 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 		he_cap_elem->phy_cap_info[1] =
 			IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD;
 		he_cap_elem->phy_cap_info[2] =
+			IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
 			IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
-			IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ;
+			IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
+			IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
+			IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO;
 
 		switch (i) {
 		case NL80211_IFTYPE_STATION:
@@ -100,7 +103,15 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 			he_cap_elem->phy_cap_info[3] |=
 				IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK |
 				IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK;
+			he_cap_elem->phy_cap_info[4] |=
+				IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
+				IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4;
+			he_cap_elem->phy_cap_info[5] |=
+				IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK |
+				IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK;
 			he_cap_elem->phy_cap_info[6] |=
+				IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU |
+				IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU |
 				IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB |
 				IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE |
 				IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT;

From aac5104bf631e27032944346a526533b106506d5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 18 Jun 2021 10:08:20 +0200
Subject: [PATCH 2305/3292] mt76: sdio: do not run mt76_txq_schedule directly

In order to support runtime-pm for sdio, do not run mt76_txq_schedule
directly, but schedule tx_worker instead

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/sdio.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c
index 1665fe88ebb8..783a15635ec5 100644
--- a/drivers/net/wireless/mediatek/mt76/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/sdio.c
@@ -184,9 +184,6 @@ static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q)
 	if (!q->queued)
 		wake_up(&dev->tx_wait);
 
-	if (!mcu)
-		mt76_txq_schedule(&dev->phy, q->qid);
-
 	return nframes;
 }
 
@@ -195,19 +192,28 @@ static void mt76s_status_worker(struct mt76_worker *w)
 	struct mt76_sdio *sdio = container_of(w, struct mt76_sdio,
 					      status_worker);
 	struct mt76_dev *dev = container_of(sdio, struct mt76_dev, sdio);
+	bool resched = false;
 	int i, nframes;
 
 	do {
+		int ndata_frames = 0;
+
 		nframes = mt76s_process_tx_queue(dev, dev->q_mcu[MT_MCUQ_WM]);
 
 		for (i = 0; i <= MT_TXQ_PSD; i++)
-			nframes += mt76s_process_tx_queue(dev,
-							  dev->phy.q_tx[i]);
+			ndata_frames += mt76s_process_tx_queue(dev,
+							       dev->phy.q_tx[i]);
+		nframes += ndata_frames;
+		if (ndata_frames > 0)
+			resched = true;
 
 		if (dev->drv->tx_status_data &&
 		    !test_and_set_bit(MT76_READING_STATS, &dev->phy.state))
 			queue_work(dev->wq, &dev->sdio.stat_work);
 	} while (nframes > 0);
+
+	if (resched)
+		mt76_worker_schedule(&dev->sdio.txrx_worker);
 }
 
 static void mt76s_tx_status_data(struct work_struct *work)

From 50a97efe218e848e26b7fd4d09fb6d9f88f90e6e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 18 Jun 2021 10:08:21 +0200
Subject: [PATCH 2306/3292] mt76: mt7663s: rely on pm reference counting

As already done for mt7921 and mt7663e, rely on pm reference counting in
drv/fw_own

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../wireless/mediatek/mt76/mt7615/sdio_mcu.c  | 36 +++++++++++--------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
index d1be78b0711c..6c23c6dbf1c6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
@@ -55,6 +55,7 @@ static int __mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
 {
 	struct sdio_func *func = dev->mt76.sdio.func;
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
 	u32 status;
 	int ret;
 
@@ -64,39 +65,44 @@ static int __mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
 
 	ret = readx_poll_timeout(mt7663s_read_pcr, dev, status,
 				 status & WHLPCR_IS_DRIVER_OWN, 2000, 1000000);
-	if (ret < 0) {
+	if (ret < 0)
 		dev_err(dev->mt76.dev, "Cannot get ownership from device");
-		set_bit(MT76_STATE_PM, &mphy->state);
-		sdio_release_host(func);
-
-		return ret;
-	}
+	else
+		clear_bit(MT76_STATE_PM, &mphy->state);
 
 	sdio_release_host(func);
-	dev->pm.last_activity = jiffies;
+	pm->last_activity = jiffies;
 
-	return 0;
+	return ret;
 }
 
 static int mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	int ret = 0;
 
-	if (test_and_clear_bit(MT76_STATE_PM, &mphy->state))
-		return __mt7663s_mcu_drv_pmctrl(dev);
+	mutex_lock(&dev->pm.mutex);
 
-	return 0;
+	if (test_bit(MT76_STATE_PM, &mphy->state))
+		ret = __mt7663s_mcu_drv_pmctrl(dev);
+
+	mutex_unlock(&dev->pm.mutex);
+
+	return ret;
 }
 
 static int mt7663s_mcu_fw_pmctrl(struct mt7615_dev *dev)
 {
 	struct sdio_func *func = dev->mt76.sdio.func;
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
+	int ret = 0;
 	u32 status;
-	int ret;
 
-	if (test_and_set_bit(MT76_STATE_PM, &mphy->state))
-		return 0;
+	mutex_lock(&pm->mutex);
+
+	if (mt76_connac_skip_fw_pmctrl(mphy, pm))
+		goto out;
 
 	sdio_claim_host(func);
 
@@ -110,6 +116,8 @@ static int mt7663s_mcu_fw_pmctrl(struct mt7615_dev *dev)
 	}
 
 	sdio_release_host(func);
+out:
+	mutex_unlock(&pm->mutex);
 
 	return ret;
 }

From c2f9e631f098caf7176cbabda7a9cc2721352ff2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 18 Jun 2021 10:08:22 +0200
Subject: [PATCH 2307/3292] mt76: mt7663s: rely on
 mt76_connac_pm_ref/mt76_connac_pm_unref in tx path

Similar to mt7663e, rely on mt76_connac_pm_ref/mt76_connac_pm_unref to
check PM state and increment/decrement wake counter

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  | 14 ++++++++++----
 .../wireless/mediatek/mt76/mt7615/sdio_txrx.c    | 16 ++++++++++++----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index f41fbb641e87..bd0d18914994 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1942,12 +1942,18 @@ void mt7615_pm_wake_work(struct work_struct *work)
 	mphy = dev->phy.mt76;
 
 	if (!mt7615_mcu_set_drv_ctrl(dev)) {
+		struct mt76_dev *mdev = &dev->mt76;
 		int i;
 
-		mt76_for_each_q_rx(&dev->mt76, i)
-			napi_schedule(&dev->mt76.napi[i]);
-		mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
-		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], false);
+		if (mt76_is_sdio(mdev)) {
+			mt76_worker_schedule(&mdev->sdio.txrx_worker);
+		} else {
+			mt76_for_each_q_rx(mdev, i)
+				napi_schedule(&mdev->napi[i]);
+			mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
+			mt76_queue_tx_cleanup(dev, mdev->q_mcu[MT_MCUQ_WM],
+					      false);
+		}
 		if (test_bit(MT76_STATE_RUNNING, &mphy->state))
 			ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
 						     MT7615_WATCHDOG_TIME);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
index 4393dd21ebbb..04f4c89b7499 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
@@ -283,9 +283,15 @@ void mt7663s_txrx_worker(struct mt76_worker *w)
 {
 	struct mt76_sdio *sdio = container_of(w, struct mt76_sdio,
 					      txrx_worker);
-	struct mt76_dev *dev = container_of(sdio, struct mt76_dev, sdio);
+	struct mt76_dev *mdev = container_of(sdio, struct mt76_dev, sdio);
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
 	int i, nframes, ret;
 
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm)) {
+		queue_work(mdev->wq, &dev->pm.wake_work);
+		return;
+	}
+
 	/* disable interrupt */
 	sdio_claim_host(sdio->func);
 	sdio_writel(sdio->func, WHLPCR_INT_EN_CLR, MCR_WHLPCR, NULL);
@@ -295,16 +301,16 @@ void mt7663s_txrx_worker(struct mt76_worker *w)
 
 		/* tx */
 		for (i = 0; i <= MT_TXQ_PSD; i++) {
-			ret = mt7663s_tx_run_queue(dev, dev->phy.q_tx[i]);
+			ret = mt7663s_tx_run_queue(mdev, mdev->phy.q_tx[i]);
 			if (ret > 0)
 				nframes += ret;
 		}
-		ret = mt7663s_tx_run_queue(dev, dev->q_mcu[MT_MCUQ_WM]);
+		ret = mt7663s_tx_run_queue(mdev, mdev->q_mcu[MT_MCUQ_WM]);
 		if (ret > 0)
 			nframes += ret;
 
 		/* rx */
-		ret = mt7663s_rx_handler(dev);
+		ret = mt7663s_rx_handler(mdev);
 		if (ret > 0)
 			nframes += ret;
 	} while (nframes > 0);
@@ -312,6 +318,8 @@ void mt7663s_txrx_worker(struct mt76_worker *w)
 	/* enable interrupt */
 	sdio_writel(sdio->func, WHLPCR_INT_EN_SET, MCR_WHLPCR, NULL);
 	sdio_release_host(sdio->func);
+
+	mt76_connac_pm_unref(&dev->mphy, &dev->pm);
 }
 
 void mt7663s_sdio_irq(struct sdio_func *func)

From 8aff2d915d7f434582d6e840535f19a88a8c71ab Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 18 Jun 2021 10:08:23 +0200
Subject: [PATCH 2308/3292] mt76: mt7663s: enable runtime-pm

Allow the user to enable runtime-pm for mt7663s driver

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c |  2 +-
 .../net/wireless/mediatek/mt76/mt7615/sdio_mcu.c    | 13 ++++++++++---
 .../net/wireless/mediatek/mt76/mt76_connac_mac.c    |  4 ++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 8cb4426e757c..cb4659771fd9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -75,7 +75,7 @@ mt7615_pm_set(void *data, u64 val)
 	if (!mt7615_wait_for_mcu_init(dev))
 		return 0;
 
-	if (!mt7615_firmware_offload(dev) || !mt76_is_mmio(&dev->mt76))
+	if (!mt7615_firmware_offload(dev) || mt76_is_usb(&dev->mt76))
 		return -EOPNOTSUPP;
 
 	if (val == pm->enable)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
index 6c23c6dbf1c6..45c1cd3b9f49 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
@@ -65,13 +65,16 @@ static int __mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
 
 	ret = readx_poll_timeout(mt7663s_read_pcr, dev, status,
 				 status & WHLPCR_IS_DRIVER_OWN, 2000, 1000000);
-	if (ret < 0)
+	if (ret < 0) {
 		dev_err(dev->mt76.dev, "Cannot get ownership from device");
-	else
+	} else {
 		clear_bit(MT76_STATE_PM, &mphy->state);
 
+		pm->stats.last_wake_event = jiffies;
+		pm->stats.doze_time += pm->stats.last_wake_event -
+				       pm->stats.last_doze_event;
+	}
 	sdio_release_host(func);
-	pm->last_activity = jiffies;
 
 	return ret;
 }
@@ -113,6 +116,10 @@ static int mt7663s_mcu_fw_pmctrl(struct mt7615_dev *dev)
 	if (ret < 0) {
 		dev_err(dev->mt76.dev, "Cannot set ownership to device");
 		clear_bit(MT76_STATE_PM, &mphy->state);
+	} else {
+		pm->stats.last_doze_event = jiffies;
+		pm->stats.awake_time += pm->stats.last_doze_event -
+					pm->stats.last_wake_event;
 	}
 
 	sdio_release_host(func);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index 5f2705fbd680..af43bcb54578 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -10,7 +10,7 @@ int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm)
 	if (!pm->enable)
 		return 0;
 
-	if (!mt76_is_mmio(dev))
+	if (mt76_is_usb(dev))
 		return 0;
 
 	cancel_delayed_work_sync(&pm->ps_work);
@@ -37,7 +37,7 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy,
 {
 	struct mt76_dev *dev = phy->dev;
 
-	if (!mt76_is_mmio(dev))
+	if (mt76_is_usb(dev))
 		return;
 
 	if (!pm->enable)

From a27238a0488e14b2e558053b8b8a9a501e9385c6 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 18 Jun 2021 10:08:24 +0200
Subject: [PATCH 2309/3292] mt76: mt7615: set macwork timeout according to
 runtime-pm

Set macwork timeout value according to runtime-pm in order to reduce
power consumption

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7615/mac.c    | 15 +++++++++++----
 .../net/wireless/mediatek/mt76/mt7615/main.c   | 18 +++++++++++-------
 .../net/wireless/mediatek/mt76/mt7615/mt7615.h |  7 ++++++-
 .../wireless/mediatek/mt76/mt7615/pci_mac.c    |  7 ++++---
 4 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index bd0d18914994..ff3f85e4087c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1954,9 +1954,14 @@ void mt7615_pm_wake_work(struct work_struct *work)
 			mt76_queue_tx_cleanup(dev, mdev->q_mcu[MT_MCUQ_WM],
 					      false);
 		}
-		if (test_bit(MT76_STATE_RUNNING, &mphy->state))
+
+		if (test_bit(MT76_STATE_RUNNING, &mphy->state)) {
+			unsigned long timeout;
+
+			timeout = mt7615_get_macwork_timeout(dev);
 			ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
-						     MT7615_WATCHDOG_TIME);
+						     timeout);
+		}
 	}
 
 	ieee80211_wake_queues(mphy->hw);
@@ -1991,6 +1996,7 @@ void mt7615_mac_work(struct work_struct *work)
 {
 	struct mt7615_phy *phy;
 	struct mt76_phy *mphy;
+	unsigned long timeout;
 
 	mphy = (struct mt76_phy *)container_of(work, struct mt76_phy,
 					       mac_work.work);
@@ -2009,8 +2015,9 @@ void mt7615_mac_work(struct work_struct *work)
 	mt7615_mutex_release(phy->dev);
 
 	mt76_tx_status_check(mphy->dev, NULL, false);
-	ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
-				     MT7615_WATCHDOG_TIME);
+
+	timeout = mt7615_get_macwork_timeout(phy->dev);
+	ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work, timeout);
 }
 
 void mt7615_tx_token_put(struct mt7615_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index bd2f42ef5ad7..dada43d6d879 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -28,6 +28,7 @@ static int mt7615_start(struct ieee80211_hw *hw)
 {
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
+	unsigned long timeout;
 	bool running;
 	int ret;
 
@@ -78,8 +79,8 @@ static int mt7615_start(struct ieee80211_hw *hw)
 
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
 
-	ieee80211_queue_delayed_work(hw, &phy->mt76->mac_work,
-				     MT7615_WATCHDOG_TIME);
+	timeout = mt7615_get_macwork_timeout(dev);
+	ieee80211_queue_delayed_work(hw, &phy->mt76->mac_work, timeout);
 
 	if (!running)
 		mt7615_mac_reset_counters(dev);
@@ -350,10 +351,12 @@ out:
 	mt7615_mutex_release(dev);
 
 	mt76_worker_schedule(&dev->mt76.tx_worker);
-	if (!mt76_testmode_enabled(phy->mt76))
+	if (!mt76_testmode_enabled(phy->mt76)) {
+		unsigned long timeout = mt7615_get_macwork_timeout(dev);
+
 		ieee80211_queue_delayed_work(phy->mt76->hw,
-					     &phy->mt76->mac_work,
-					     MT7615_WATCHDOG_TIME);
+					     &phy->mt76->mac_work, timeout);
+	}
 
 	return ret;
 }
@@ -1225,6 +1228,7 @@ static int mt7615_resume(struct ieee80211_hw *hw)
 {
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	unsigned long timeout;
 	bool running;
 
 	mt7615_mutex_acquire(dev);
@@ -1248,8 +1252,8 @@ static int mt7615_resume(struct ieee80211_hw *hw)
 					    mt76_connac_mcu_set_suspend_iter,
 					    phy->mt76);
 
-	ieee80211_queue_delayed_work(hw, &phy->mt76->mac_work,
-				     MT7615_WATCHDOG_TIME);
+	timeout = mt7615_get_macwork_timeout(dev);
+	ieee80211_queue_delayed_work(hw, &phy->mt76->mac_work, timeout);
 
 	mt7615_mutex_release(dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 8fbaf8356e1a..d0c64a9b09cf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -20,7 +20,6 @@
 					 MT7615_MAX_INTERFACES)
 
 #define MT7615_PM_TIMEOUT		(HZ / 12)
-#define MT7615_WATCHDOG_TIME		(HZ / 10)
 #define MT7615_HW_SCAN_TIMEOUT		(HZ / 10)
 #define MT7615_RESET_TIMEOUT		(30 * HZ)
 #define MT7615_RATE_RETRY		2
@@ -461,6 +460,12 @@ static inline u32 mt7615_tx_mcu_int_mask(struct mt7615_dev *dev)
 	return MT_INT_TX_DONE(dev->mt76.q_mcu[MT_MCUQ_WM]->hw_idx);
 }
 
+static inline unsigned long
+mt7615_get_macwork_timeout(struct mt7615_dev *dev)
+{
+	return dev->pm.enable ? HZ / 3 : HZ / 10;
+}
+
 void mt7615_dma_reset(struct mt7615_dev *dev);
 void mt7615_scan_work(struct work_struct *work);
 void mt7615_roc_work(struct work_struct *work);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index cc278d8cb888..da87c02a73eb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -268,6 +268,7 @@ void mt7615_mac_reset_work(struct work_struct *work)
 	struct mt7615_phy *phy2;
 	struct mt76_phy *ext_phy;
 	struct mt7615_dev *dev;
+	unsigned long timeout;
 
 	dev = container_of(work, struct mt7615_dev, reset_work);
 	ext_phy = dev->mt76.phy2;
@@ -345,11 +346,11 @@ void mt7615_mac_reset_work(struct work_struct *work)
 
 	mt7615_mutex_release(dev);
 
+	timeout = mt7615_get_macwork_timeout(dev);
 	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mphy.mac_work,
-				     MT7615_WATCHDOG_TIME);
+				     timeout);
 	if (phy2)
 		ieee80211_queue_delayed_work(ext_phy->hw,
-					     &phy2->mt76->mac_work,
-					     MT7615_WATCHDOG_TIME);
+					     &phy2->mt76->mac_work, timeout);
 
 }

From 61a1f99dd1e3c145fce9d601b52e7adb8f37322a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 18 Jun 2021 12:30:47 +0200
Subject: [PATCH 2310/3292] mt76: mt7921: allow chip reset during device
 restart

Disable chip full reset just during device probing but allow
it during hw restart.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   | 8 +++++++-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 8 +++++---
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 3 ++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 9925c15ac9df..a9ce10b98827 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -236,7 +236,13 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	if (ret)
 		return ret;
 
-	return mt76_connac_mcu_set_deep_sleep(&dev->mt76, dev->pm.ds_enable);
+	ret = mt76_connac_mcu_set_deep_sleep(&dev->mt76, dev->pm.ds_enable);
+	if (ret)
+		return ret;
+
+	dev->hw_init_done = true;
+
+	return 0;
 }
 
 void mt7921_unregister_device(struct mt7921_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 143dae97ef77..7fe2e3a50428 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1390,11 +1390,13 @@ void mt7921_reset(struct mt76_dev *mdev)
 {
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 
-	if (!test_bit(MT76_STATE_RUNNING, &dev->mphy.state))
+	if (!dev->hw_init_done)
 		return;
 
-	if (!dev->hw_full_reset)
-		queue_work(dev->mt76.wq, &dev->reset_work);
+	if (dev->hw_full_reset)
+		return;
+
+	queue_work(dev->mt76.wq, &dev->reset_work);
 }
 
 static void
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 92cf38444b46..2d8bd6bfc820 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -160,7 +160,8 @@ struct mt7921_dev {
 	u16 chainmask;
 
 	struct work_struct reset_work;
-	bool hw_full_reset;
+	bool hw_full_reset:1;
+	bool hw_init_done:1;
 
 	struct list_head sta_poll_list;
 	spinlock_t sta_poll_lock;

From c240b044edefa3c3af4014a4030e017dd95b59a1 Mon Sep 17 00:00:00 2001
From: Pascal Terjan <pterjan@google.com>
Date: Sat, 24 Apr 2021 18:29:59 +0100
Subject: [PATCH 2311/3292] rtl8xxxu: Fix device info for RTL8192EU devices

Based on 2001:3319 and 2357:0109 which I used to test the fix and
0bda:818b and 2357:0108 for which I found efuse dumps online.

== 2357:0109 ==
=== Before ===
Vendor: Realtek
Product: \x03802.11n NI
Serial:
=== After ===
Vendor: Realtek
Product: 802.11n NIC
Serial not available.

== 2001:3319 ==
=== Before ===
Vendor: Realtek
Product: Wireless N
Serial: no USB Adap
=== After ===
Vendor: Realtek
Product: Wireless N Nano USB Adapter
Serial not available.

Signed-off-by: Pascal Terjan <pterjan@google.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210424172959.1559890-1-pterjan@google.com
---
 .../net/wireless/realtek/rtl8xxxu/rtl8xxxu.h  | 11 +---
 .../realtek/rtl8xxxu/rtl8xxxu_8192e.c         | 59 +++++++++++++++++--
 2 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
index d1a566cc0c9e..01735776345a 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
@@ -853,15 +853,10 @@ struct rtl8192eu_efuse {
 	u8 usb_optional_function;
 	u8 res9[2];
 	u8 mac_addr[ETH_ALEN];		/* 0xd7 */
-	u8 res10[2];
-	u8 vendor_name[7];
-	u8 res11[2];
-	u8 device_name[0x0b];		/* 0xe8 */
-	u8 res12[2];
-	u8 serial[0x0b];		/* 0xf5 */
-	u8 res13[0x30];
+	u8 device_info[80];
+	u8 res11[3];
 	u8 unknown[0x0d];		/* 0x130 */
-	u8 res14[0xc3];
+	u8 res12[0xc3];
 };
 
 struct rtl8xxxu_reg8val {
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
index cfe2dfdae928..b06508d0cdf8 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
@@ -554,9 +554,43 @@ rtl8192e_set_tx_power(struct rtl8xxxu_priv *priv, int channel, bool ht40)
 	}
 }
 
+static void rtl8192eu_log_next_device_info(struct rtl8xxxu_priv *priv,
+					   char *record_name,
+					   char *device_info,
+					   unsigned int *record_offset)
+{
+	char *record = device_info + *record_offset;
+
+	/* A record is [ total length | 0x03 | value ] */
+	unsigned char l = record[0];
+
+	/*
+	 * The whole device info section seems to be 80 characters, make sure
+	 * we don't read further.
+	 */
+	if (*record_offset + l > 80) {
+		dev_warn(&priv->udev->dev,
+			 "invalid record length %d while parsing \"%s\" at offset %u.\n",
+			 l, record_name, *record_offset);
+		return;
+	}
+
+	if (l >= 2) {
+		char value[80];
+
+		memcpy(value, &record[2], l - 2);
+		value[l - 2] = '\0';
+		dev_info(&priv->udev->dev, "%s: %s\n", record_name, value);
+		*record_offset = *record_offset + l;
+	} else {
+		dev_info(&priv->udev->dev, "%s not available.\n", record_name);
+	}
+}
+
 static int rtl8192eu_parse_efuse(struct rtl8xxxu_priv *priv)
 {
 	struct rtl8192eu_efuse *efuse = &priv->efuse_wifi.efuse8192eu;
+	unsigned int record_offset;
 	int i;
 
 	if (efuse->rtl_id != cpu_to_le16(0x8129))
@@ -604,12 +638,25 @@ static int rtl8192eu_parse_efuse(struct rtl8xxxu_priv *priv)
 	priv->has_xtalk = 1;
 	priv->xtalk = priv->efuse_wifi.efuse8192eu.xtal_k & 0x3f;
 
-	dev_info(&priv->udev->dev, "Vendor: %.7s\n", efuse->vendor_name);
-	dev_info(&priv->udev->dev, "Product: %.11s\n", efuse->device_name);
-	if (memchr_inv(efuse->serial, 0xff, 11))
-		dev_info(&priv->udev->dev, "Serial: %.11s\n", efuse->serial);
-	else
-		dev_info(&priv->udev->dev, "Serial not available.\n");
+	/*
+	 * device_info section seems to be laid out as records
+	 * [ total length | 0x03 | value ] so:
+	 * - vendor length + 2
+	 * - 0x03
+	 * - vendor string (not null terminated)
+	 * - product length + 2
+	 * - 0x03
+	 * - product string (not null terminated)
+	 * Then there is one or 2 0x00 on all the 4 devices I own or found
+	 * dumped online.
+	 * As previous version of the code handled an optional serial
+	 * string, I now assume there may be a third record if the
+	 * length is not 0.
+	 */
+	record_offset = 0;
+	rtl8192eu_log_next_device_info(priv, "Vendor", efuse->device_info, &record_offset);
+	rtl8192eu_log_next_device_info(priv, "Product", efuse->device_info, &record_offset);
+	rtl8192eu_log_next_device_info(priv, "Serial", efuse->device_info, &record_offset);
 
 	if (rtl8xxxu_debug & RTL8XXXU_DEBUG_EFUSE) {
 		unsigned char *raw = priv->efuse_wifi.raw;

From adf6a0f8c0a656df3d29403f314bf3e0dbb2dd77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Tue, 11 May 2021 09:19:27 +0200
Subject: [PATCH 2312/3292] rtl8xxxu: avoid parsing short RX packet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

One USB data buffer can contain multiple received network
packets. If that's the case, they're processed this way:
1. Original buffer is cloned
2. Original buffer is trimmed to contain only the first
   network packet
3. This first network packet is passed to network stack
4. Cloned buffer is trimmed to eliminate the first network
   packet
5. Repeat with the cloned buffer until there are no more
   network packets inside

However, if the space remaining in original buffer after
the first network packet is not enough to contain at least
another network packet descriptor, it is not cloned.

The loop parsing this packets ended if remaining space == 0.
But if the remaining space was > 0 but < packet descriptor
size, another iteration of the loop was done, processing again
the previous packet because cloning didn't happen. Moreover,
the ownership of this packet had been passed to network
stack in the previous iteration.

This patch ensures that no extra iteration is done if the
remaining size is not enough for one packet, and also avoid
the first iteration for the same reason.

Probably this doesn't happen in practice, but can happen
theoretically.

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511071926.8951-1-ihuguet@redhat.com
---
 drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 9ff09cf7eb62..ac1061caacd6 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -5554,6 +5554,11 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
 	urb_len = skb->len;
 	pkt_cnt = 0;
 
+	if (urb_len < sizeof(struct rtl8xxxu_rxdesc16)) {
+		kfree_skb(skb);
+		return RX_TYPE_ERROR;
+	}
+
 	do {
 		rx_desc = (struct rtl8xxxu_rxdesc16 *)skb->data;
 		_rx_desc_le = (__le32 *)skb->data;
@@ -5581,7 +5586,7 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
 		 * at least cover the rx descriptor
 		 */
 		if (pkt_cnt > 1 &&
-		    urb_len > (pkt_offset + sizeof(struct rtl8xxxu_rxdesc16)))
+		    urb_len >= (pkt_offset + sizeof(struct rtl8xxxu_rxdesc16)))
 			next_skb = skb_clone(skb, GFP_ATOMIC);
 
 		rx_status = IEEE80211_SKB_RXCB(skb);
@@ -5627,7 +5632,9 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb)
 
 		pkt_cnt--;
 		urb_len -= pkt_offset;
-	} while (skb && urb_len > 0 && pkt_cnt > 0);
+		next_skb = NULL;
+	} while (skb && pkt_cnt > 0 &&
+		 urb_len >= sizeof(struct rtl8xxxu_rxdesc16));
 
 	return RX_TYPE_DATA_PKT;
 }

From 8a952a955de705724b715e4b595a89bee8c11b9f Mon Sep 17 00:00:00 2001
From: Caleb Connolly <caleb@connolly.tech>
Date: Thu, 17 Jun 2021 11:29:40 +0300
Subject: [PATCH 2313/3292] ath10k: demote chan info without scan request
 warning

Some devices/firmwares cause this to be printed every 5-15 seconds,
though it has no impact on functionality. Demote this to a debug
message.

I see this on SDM845 and MSM8998 platforms, specifically the OnePlus 6 devices,
PocoPhone F1 and OnePlus 5.  On the OnePlus 6 (SDM845) we are stuck with the
following signed vendor fw:

[    9.339873] ath10k_snoc 18800000.wifi: qmi chip_id 0x30214 chip_family 0x4001 board_id 0xff soc_id 0x40030001
[    9.339897] ath10k_snoc 18800000.wifi: qmi fw_version 0x20060029 fw_build_timestamp 2019-07-12 02:14 fw_build_id QC_IMAGE_VERSION_STRING=WLAN.HL.2.0.c8-00041-QCAHLSWMTPLZ-1

The OnePlus 5 (MSM8998) is using firmware:

[ 6096.956799] ath10k_snoc 18800000.wifi: qmi chip_id 0x30214 chip_family 0x4001 board_id 0xff soc_id 0x40010002
[ 6096.956824] ath10k_snoc 18800000.wifi: qmi fw_version 0x1007007e fw_build_timestamp 2020-04-14 22:45 fw_build_id QC_IMAGE_VERSION_STRING=WLAN.HL.1.0.c6-00126-QCAHLSWMTPLZ-1.211883.1.278648.

Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.2.0.c8-00041-QCAHLSWMTPLZ-1
Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.1.0.c6-00126-QCAHLSWMTPLZ-1.211883.1.278648

Signed-off-by: Caleb Connolly <caleb@connolly.tech>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210522171609.299611-1-caleb@connolly.tech
---
 drivers/net/wireless/ath/ath10k/wmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index f42bf2c8f9e7..b8a4bbfe10b8 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -2795,7 +2795,7 @@ void ath10k_wmi_event_chan_info(struct ath10k *ar, struct sk_buff *skb)
 	switch (ar->scan.state) {
 	case ATH10K_SCAN_IDLE:
 	case ATH10K_SCAN_STARTING:
-		ath10k_warn(ar, "received chan info event without a scan request, ignoring\n");
+		ath10k_dbg(ar, ATH10K_DBG_WMI, "received chan info event without a scan request, ignoring\n");
 		goto exit;
 	case ATH10K_SCAN_RUNNING:
 	case ATH10K_SCAN_ABORTING:

From 49f5b114e36ebc69318ab95f98b57df7458b0f42 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Thu, 17 Jun 2021 11:29:40 +0300
Subject: [PATCH 2314/3292] ath11k: Enable QCN9074 device

The issues mentioned in commit 4e80946197a8
("ath11k: add qcn9074 pci device support") are fixed in firmware.
This patch enables QCN9074 device.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1-01838-QCAHKSWPL_SILICONZ-1

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210615211348.92168-1-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index f8f6b2090dad..646ad79f309c 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -41,7 +41,7 @@
 static const struct pci_device_id ath11k_pci_id_table[] = {
 	{ PCI_VDEVICE(QCOM, QCA6390_DEVICE_ID) },
 	{ PCI_VDEVICE(QCOM, WCN6855_DEVICE_ID) },
-	/* TODO: add QCN9074_DEVICE_ID) once firmware issues are resolved */
+	{ PCI_VDEVICE(QCOM, QCN9074_DEVICE_ID) },
 	{0}
 };
 

From e3a5de6d81d8b2199935c7eb3f7d17a50a7075b7 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 18 Jun 2021 17:57:31 +0300
Subject: [PATCH 2315/3292] net: ethernet: aeroflex: fix UAF in greth_of_remove

static int greth_of_remove(struct platform_device *of_dev)
{
...
	struct greth_private *greth = netdev_priv(ndev);
...
	unregister_netdev(ndev);
	free_netdev(ndev);

	of_iounmap(&of_dev->resource[0], greth->regs, resource_size(&of_dev->resource[0]));
...
}

greth is netdev private data, but it is used
after free_netdev(). It can cause use-after-free when accessing greth
pointer. So, fix it by moving free_netdev() after of_iounmap()
call.

Fixes: d4c41139df6e ("net: Add Aeroflex Gaisler 10/100/1G Ethernet MAC driver")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aeroflex/greth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index d77fafbc1530..c560ad06f0be 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1539,10 +1539,11 @@ static int greth_of_remove(struct platform_device *of_dev)
 	mdiobus_unregister(greth->mdio);
 
 	unregister_netdev(ndev);
-	free_netdev(ndev);
 
 	of_iounmap(&of_dev->resource[0], greth->regs, resource_size(&of_dev->resource[0]));
 
+	free_netdev(ndev);
+
 	return 0;
 }
 

From e4b8700e07a86e8eab6916aa5c5ba99042c34089 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 18 Jun 2021 19:14:31 +0300
Subject: [PATCH 2316/3292] net: ethernet: ezchip: fix UAF in nps_enet_remove

priv is netdev private data, but it is used
after free_netdev(). It can cause use-after-free when accessing priv
pointer. So, fix it by moving free_netdev() after netif_napi_del()
call.

Fixes: 0dd077093636 ("NET: Add ezchip ethernet driver")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ezchip/nps_enet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index e3954d8835e7..20d2c2bb26e4 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -642,8 +642,8 @@ static s32 nps_enet_remove(struct platform_device *pdev)
 	struct nps_enet_priv *priv = netdev_priv(ndev);
 
 	unregister_netdev(ndev);
-	free_netdev(ndev);
 	netif_napi_del(&priv->napi);
+	free_netdev(ndev);
 
 	return 0;
 }

From 4ae85b23e1f052379f0316e42494e2f84f2a3e6f Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 18 Jun 2021 19:14:37 +0300
Subject: [PATCH 2317/3292] net: ethernet: ezchip: remove redundant check

err varibale will be set everytime, when code gets
into this path. This check will just slowdown the execution
and that's all.

Fixes: 0dd077093636 ("NET: Add ezchip ethernet driver")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ezchip/nps_enet.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 20d2c2bb26e4..c562a1e83913 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -630,8 +630,7 @@ static s32 nps_enet_probe(struct platform_device *pdev)
 out_netif_api:
 	netif_napi_del(&priv->napi);
 out_netdev:
-	if (err)
-		free_netdev(ndev);
+	free_netdev(ndev);
 
 	return err;
 }

From 0de449d599594f5472e00267d651615c7f2c6c1d Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 18 Jun 2021 19:14:47 +0300
Subject: [PATCH 2318/3292] net: ethernet: ezchip: fix error handling

As documented at drivers/base/platform.c for platform_get_irq:

 * Gets an IRQ for a platform device and prints an error message if finding the
 * IRQ fails. Device drivers should check the return value for errors so as to
 * not pass a negative integer value to the request_irq() APIs.

So, the driver should check that platform_get_irq() return value
is _negative_, not that it's equal to zero, because -ENXIO (return
value from request_irq() if irq was not found) will
pass this check and it leads to passing negative irq to request_irq()

Fixes: 0dd077093636 ("NET: Add ezchip ethernet driver")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ezchip/nps_enet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index c562a1e83913..f9a288a6ec8c 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -607,7 +607,7 @@ static s32 nps_enet_probe(struct platform_device *pdev)
 
 	/* Get IRQ number */
 	priv->irq = platform_get_irq(pdev, 0);
-	if (!priv->irq) {
+	if (priv->irq < 0) {
 		dev_err(dev, "failed to retrieve <irq Rx-Tx> value from device tree\n");
 		err = -ENODEV;
 		goto out_netdev;

From d5f9023fa61ee8b94f37a93f08e94b136cf1e463 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Sat, 19 Jun 2021 13:18:13 -0300
Subject: [PATCH 2319/3292] can: bcm: delay release of struct bcm_op after
 synchronize_rcu()

can_rx_register() callbacks may be called concurrently to the call to
can_rx_unregister(). The callbacks and callback data, though, are
protected by RCU and the struct sock reference count.

So the callback data is really attached to the life of sk, meaning
that it should be released on sk_destruct. However, bcm_remove_op()
calls tasklet_kill(), and RCU callbacks may be called under RCU
softirq, so that cannot be used on kernels before the introduction of
HRTIMER_MODE_SOFT.

However, bcm_rx_handler() is called under RCU protection, so after
calling can_rx_unregister(), we may call synchronize_rcu() in order to
wait for any RCU read-side critical sections to finish. That is,
bcm_rx_handler() won't be called anymore for those ops. So, we only
free them, after we do that synchronize_rcu().

Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol")
Link: https://lore.kernel.org/r/20210619161813.2098382-1-cascardo@canonical.com
Cc: linux-stable <stable@vger.kernel.org>
Reported-by: syzbot+0f7e7e5e2f4f40fa89c0@syzkaller.appspotmail.com
Reported-by: Norbert Slusarek <nslusarek@gmx.net>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/bcm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index f3e4d9528fa3..0928a39c4423 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -785,6 +785,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
 						  bcm_rx_handler, op);
 
 			list_del(&op->list);
+			synchronize_rcu();
 			bcm_remove_op(op);
 			return 1; /* done */
 		}
@@ -1533,9 +1534,13 @@ static int bcm_release(struct socket *sock)
 					  REGMASK(op->can_id),
 					  bcm_rx_handler, op);
 
-		bcm_remove_op(op);
 	}
 
+	synchronize_rcu();
+
+	list_for_each_entry_safe(op, next, &bo->rx_ops, list)
+		bcm_remove_op(op);
+
 #if IS_ENABLED(CONFIG_PROC_FS)
 	/* remove procfs entry */
 	if (net->can.bcmproc_dir && bo->bcm_proc_read)

From fb8696ab14adadb2e3f6c17c18ed26b3ecd96691 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 18 Jun 2021 19:36:45 +0200
Subject: [PATCH 2320/3292] can: gw: synchronize rcu operations before removing
 gw job entry

can_can_gw_rcv() is called under RCU protection, so after calling
can_rx_unregister(), we have to call synchronize_rcu in order to wait
for any RCU read-side critical sections to finish before removing the
kmem_cache entry with the referenced gw job entry.

Link: https://lore.kernel.org/r/20210618173645.2238-1-socketcan@hartkopp.net
Fixes: c1aabdf379bc ("can-gw: add netlink based CAN routing")
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/gw.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/can/gw.c b/net/can/gw.c
index ba4124805602..d8861e862f15 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -596,6 +596,7 @@ static int cgw_notifier(struct notifier_block *nb,
 			if (gwj->src.dev == dev || gwj->dst.dev == dev) {
 				hlist_del(&gwj->list);
 				cgw_unregister_filter(net, gwj);
+				synchronize_rcu();
 				kmem_cache_free(cgw_cache, gwj);
 			}
 		}
@@ -1154,6 +1155,7 @@ static void cgw_remove_all_jobs(struct net *net)
 	hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
 		hlist_del(&gwj->list);
 		cgw_unregister_filter(net, gwj);
+		synchronize_rcu();
 		kmem_cache_free(cgw_cache, gwj);
 	}
 }
@@ -1222,6 +1224,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 		hlist_del(&gwj->list);
 		cgw_unregister_filter(net, gwj);
+		synchronize_rcu();
 		kmem_cache_free(cgw_cache, gwj);
 		err = 0;
 		break;

From 14a4696bc3118ba49da28f79280e1d55603aa737 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 18 Jun 2021 19:37:13 +0200
Subject: [PATCH 2321/3292] can: isotp: isotp_release(): omit unintended
 hrtimer restart on socket release

When closing the isotp socket, the potentially running hrtimers are
canceled before removing the subscription for CAN identifiers via
can_rx_unregister().

This may lead to an unintended (re)start of a hrtimer in
isotp_rcv_cf() and isotp_rcv_fc() in the case that a CAN frame is
received by isotp_rcv() while the subscription removal is processed.

However, isotp_rcv() is called under RCU protection, so after calling
can_rx_unregister, we may call synchronize_rcu in order to wait for
any RCU read-side critical sections to finish. This prevents the
reception of CAN frames after hrtimer_cancel() and therefore the
unintended (re)start of the hrtimers.

Link: https://lore.kernel.org/r/20210618173713.2296-1-socketcan@hartkopp.net
Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol")
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index be6183f8ca11..234cc4ad179a 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1028,9 +1028,6 @@ static int isotp_release(struct socket *sock)
 
 	lock_sock(sk);
 
-	hrtimer_cancel(&so->txtimer);
-	hrtimer_cancel(&so->rxtimer);
-
 	/* remove current filters & unregister */
 	if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
 		if (so->ifindex) {
@@ -1042,10 +1039,14 @@ static int isotp_release(struct socket *sock)
 						  SINGLE_MASK(so->rxid),
 						  isotp_rcv, sk);
 				dev_put(dev);
+				synchronize_rcu();
 			}
 		}
 	}
 
+	hrtimer_cancel(&so->txtimer);
+	hrtimer_cancel(&so->rxtimer);
+
 	so->ifindex = 0;
 	so->bound = 0;
 

From 22c696fed25c63c7f67508309820358b94a96b6d Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Thu, 17 Jun 2021 15:06:23 +0200
Subject: [PATCH 2322/3292] can: j1939: j1939_sk_init(): set SOCK_RCU_FREE to
 call sk_destruct() after RCU is done

Set SOCK_RCU_FREE to let RCU to call sk_destruct() on completion.
Without this patch, we will run in to j1939_can_recv() after priv was
freed by j1939_sk_release()->j1939_sk_sock_destruct()

Fixes: 25fe97cb7620 ("can: j1939: move j1939_priv_put() into sk_destruct callback")
Link: https://lore.kernel.org/r/20210617130623.12705-1-o.rempel@pengutronix.de
Cc: linux-stable <stable@vger.kernel.org>
Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Reported-by: syzbot+bdf710cfc41c186fdff3@syzkaller.appspotmail.com
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/main.c   | 4 ++++
 net/can/j1939/socket.c | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index da3a7a7bcff2..08c8606cfd9c 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -193,6 +193,10 @@ static void j1939_can_rx_unregister(struct j1939_priv *priv)
 	can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
 			  j1939_can_recv, priv);
 
+	/* The last reference of priv is dropped by the RCU deferred
+	 * j1939_sk_sock_destruct() of the last socket, so we can
+	 * safely drop this reference here.
+	 */
 	j1939_priv_put(priv);
 }
 
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 56aa66147d5a..fce8bc8afeb7 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -398,6 +398,9 @@ static int j1939_sk_init(struct sock *sk)
 	atomic_set(&jsk->skb_pending, 0);
 	spin_lock_init(&jsk->sk_session_queue_lock);
 	INIT_LIST_HEAD(&jsk->sk_session_queue);
+
+	/* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
+	sock_set_flag(sk, SOCK_RCU_FREE);
 	sk->sk_destruct = j1939_sk_sock_destruct;
 	sk->sk_protocol = CAN_J1939;
 

From ab4a0b8fcb9a95c02909b62049811bd2e586aaa4 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 17 Jun 2021 21:51:30 +0300
Subject: [PATCH 2323/3292] net: can: ems_usb: fix use-after-free in
 ems_usb_disconnect()

In ems_usb_disconnect() dev pointer, which is netdev private data, is
used after free_candev() call:
| 	if (dev) {
| 		unregister_netdev(dev->netdev);
| 		free_candev(dev->netdev);
|
| 		unlink_all_urbs(dev);
|
| 		usb_free_urb(dev->intr_urb);
|
| 		kfree(dev->intr_in_buffer);
| 		kfree(dev->tx_msg_buffer);
| 	}

Fix it by simply moving free_candev() at the end of the block.

Fail log:
| BUG: KASAN: use-after-free in ems_usb_disconnect
| Read of size 8 at addr ffff88804e041008 by task kworker/1:2/2895
|
| CPU: 1 PID: 2895 Comm: kworker/1:2 Not tainted 5.13.0-rc5+ #164
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.4
| Workqueue: usb_hub_wq hub_event
| Call Trace:
|     dump_stack (lib/dump_stack.c:122)
|     print_address_description.constprop.0.cold (mm/kasan/report.c:234)
|     kasan_report.cold (mm/kasan/report.c:420 mm/kasan/report.c:436)
|     ems_usb_disconnect (drivers/net/can/usb/ems_usb.c:683 drivers/net/can/usb/ems_usb.c:1058)

Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface")
Link: https://lore.kernel.org/r/20210617185130.5834-1-paskripkin@gmail.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/ems_usb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 5af69787d9d5..0a37af4a3fa4 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -1053,7 +1053,6 @@ static void ems_usb_disconnect(struct usb_interface *intf)
 
 	if (dev) {
 		unregister_netdev(dev->netdev);
-		free_candev(dev->netdev);
 
 		unlink_all_urbs(dev);
 
@@ -1061,6 +1060,8 @@ static void ems_usb_disconnect(struct usb_interface *intf)
 
 		kfree(dev->intr_in_buffer);
 		kfree(dev->tx_msg_buffer);
+
+		free_candev(dev->netdev);
 	}
 }
 

From 0a9ece9ba154dd6205709108180952c55e630833 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 19 Jun 2021 09:02:03 -0700
Subject: [PATCH 2324/3292] EDAC/igen6: fix core dependency

igen6_edac needs mce_register()/unregister() functions,
so it should depend on X86_MCE (or X86_MCE_INTEL).

That change prevents these build errors:

ld: drivers/edac/igen6_edac.o: in function `igen6_remove':
igen6_edac.c:(.text+0x494): undefined reference to `mce_unregister_decode_chain'
ld: drivers/edac/igen6_edac.o: in function `igen6_probe':
igen6_edac.c:(.text+0xf5b): undefined reference to `mce_register_decode_chain'

Fixes: 10590a9d4f23e ("EDAC/igen6: Add EDAC driver for Intel client SoCs using IBECC")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210619160203.2026-1-rdunlap@infradead.org
---
 drivers/edac/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 1e836e320edd..91164c5f0757 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -270,7 +270,8 @@ config EDAC_PND2
 
 config EDAC_IGEN6
 	tristate "Intel client SoC Integrated MC"
-	depends on PCI && X86_64 && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
+	depends on PCI && PCI_MMCONFIG && ARCH_HAVE_NMI_SAFE_CMPXCHG
+	depends on X64_64 && X86_MCE_INTEL
 	help
 	  Support for error detection and correction on the Intel
 	  client SoC Integrated Memory Controller using In-Band ECC IP.

From 7d815f4afa87f2032b650ae1bba7534b550a6b8b Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Tue, 25 May 2021 16:17:33 -0700
Subject: [PATCH 2325/3292] PCI: hv: Add check for hyperv_initialized in
 init_hv_pci_drv()

Add check for hv_is_hyperv_initialized() at the top of
init_hv_pci_drv(), so if the pci-hyperv driver is force-loaded on non
Hyper-V platforms, the init_hv_pci_drv() will exit immediately, without
any side effects, like assignments to hvpci_block_ops, etc.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reported-and-tested-by: Mohammad Alqayeem <mohammad.alqyeem@nutanix.com>
Reviewed-by: Wei Liu <wei.liu@kernel.org>
Link: https://lore.kernel.org/r/1621984653-1210-1-git-send-email-haiyangz@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/pci/controller/pci-hyperv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 6511648271b2..bebe3eeebc4e 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -3476,6 +3476,9 @@ static void __exit exit_hv_pci_drv(void)
 
 static int __init init_hv_pci_drv(void)
 {
+	if (!hv_is_hyperv_initialized())
+		return -ENODEV;
+
 	/* Set the invalid domain number's bit, so it will not be used */
 	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
 

From 5e538959f019f9d356ba5b16deba25ef4da892ca Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Fri, 28 May 2021 16:32:48 +0200
Subject: [PATCH 2326/3292] cifs: fix ipv6 formating in cifs_ses_add_channel

Use %pI6 for IPv6 addresses

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/sess.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index a92a1fb7cb52..cd19aa11f27e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -195,7 +195,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 			 ses, iface->speed, iface->rdma_capable ? "yes" : "no",
 			 &ipv4->sin_addr);
 	else
-		cifs_dbg(FYI, "adding channel to ses %p (speed:%zu bps rdma:%s ip:%pI4)\n",
+		cifs_dbg(FYI, "adding channel to ses %p (speed:%zu bps rdma:%s ip:%pI6)\n",
 			 ses, iface->speed, iface->rdma_capable ? "yes" : "no",
 			 &ipv6->sin6_addr);
 

From f2756527d37563b98d18fe9f1fa56ada2e79f7cf Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 5 Jun 2021 15:33:00 -0500
Subject: [PATCH 2327/3292] cifs: remove duplicated prototype

smb2_find_smb_ses was defined twice in smb2proto.h

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2proto.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index a5f87b02cfaf..4152077e3c07 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -64,8 +64,6 @@ extern void smb2_echo_request(struct work_struct *work);
 extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
 extern bool smb2_is_valid_oplock_break(char *buffer,
 				       struct TCP_Server_Info *srv);
-extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
-					  __u64 ses_id);
 extern int smb3_handle_read_data(struct TCP_Server_Info *server,
 				 struct mid_q_entry *mid);
 

From a23a71abca941fdbac32e504c6d78cf99c6df70f Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 29 May 2021 19:48:48 +0800
Subject: [PATCH 2328/3292] cifs: Remove unused inline function
 is_sysvol_or_netlogon()

is_sysvol_or_netlogon() is never used, so can remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dfs_cache.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index b1fa30fefe1f..03c81c39a507 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -319,17 +319,6 @@ static inline unsigned int cache_entry_hash(const void *data, int size)
 	return h & (CACHE_HTABLE_SIZE - 1);
 }
 
-/* Check whether second path component of @path is SYSVOL or NETLOGON */
-static inline bool is_sysvol_or_netlogon(const char *path)
-{
-	const char *s;
-	char sep = path[0];
-
-	s = strchr(path + 1, sep) + 1;
-	return !strncasecmp(s, "sysvol", strlen("sysvol")) ||
-		!strncasecmp(s, "netlogon", strlen("netlogon"));
-}
-
 /* Return target hint of a DFS cache entry */
 static inline char *get_tgt_name(const struct cache_entry *ce)
 {

From 57c8ce7ab3fc2ac894d4a0f26776fbd7a5fd48bc Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Wed, 26 May 2021 22:35:45 +0200
Subject: [PATCH 2329/3292] cifs: Constify static struct genl_ops

The only usage of cifs_genl_ops[] is to assign its address to the ops
field in the genl_family struct, which is a pointer to const. Make it
const to allow the compiler to put it in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c
index 5aaabe4cc0a7..291cb606f149 100644
--- a/fs/cifs/netlink.c
+++ b/fs/cifs/netlink.c
@@ -30,7 +30,7 @@ static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = {
 	[CIFS_GENL_ATTR_SWN_RESOURCE_NAME]	= { .type = NLA_STRING},
 };
 
-static struct genl_ops cifs_genl_ops[] = {
+static const struct genl_ops cifs_genl_ops[] = {
 	{
 		.cmd = CIFS_GENL_CMD_SWN_NOTIFY,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,

From 032e091d3e3b260f3f85d8bd781d99690589085c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 13 Jun 2021 15:01:23 +0100
Subject: [PATCH 2330/3292] cifs: remove redundant initialization of variable
 rc

The variable rc is being initialized with a value that is never read, the
assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifssmb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 41f74163cc1c..234337865b96 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1220,7 +1220,7 @@ SMBLegacyOpen(const unsigned int xid, struct cifs_tcon *tcon,
 	    int *pOplock, FILE_ALL_INFO *pfile_info,
 	    const struct nls_table *nls_codepage, int remap)
 {
-	int rc = -EACCES;
+	int rc;
 	OPENX_REQ *pSMB = NULL;
 	OPENX_RSP *pSMBr = NULL;
 	int bytes_returned;

From aaf36df3eddde763e6fe2b082db101820f2d2a90 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Thu, 10 Jun 2021 19:48:17 +0800
Subject: [PATCH 2331/3292] cifs: fix doc warnings in cifs_dfs_ref.c

Add description for `cifs_compose_mount_options` to fix the W=1 warnings:

 fs/cifs/cifs_dfs_ref.c:139: warning: Function parameter or
  member 'devname' not described in 'cifs_compose_mount_options'

Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_dfs_ref.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index c87c37cf2914..ec57cdb1590f 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -125,7 +125,7 @@ cifs_build_devname(char *nodename, const char *prepath)
  * @sb_mountdata:	parent/root DFS mount options (template)
  * @fullpath:		full path in UNC format
  * @ref:		optional server's referral
- *
+ * @devname:		return the built cifs device name if passed pointer not NULL
  * creates mount options for submount based on template options sb_mountdata
  * and replacing unc,ip,prefixpath options with ones we've got form ref_unc.
  *

From 966a3cb7c7db786452a87afdc3b48858fc4d4d6b Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Thu, 3 Jun 2021 15:31:01 +1000
Subject: [PATCH 2332/3292] cifs: improve fallocate emulation

RHBZ: 1866684

We don't have a real fallocate in the SMB2 protocol so we used to emulate fallocate
by simply switching the file to become non-sparse. But as that could potantially consume
a lot more data than we intended to fallocate (large sparse file and fallocating a thin
slice in the middle) we would only do this IFF the fallocate request was for virtually
the entire file.

This patch improves this and starts allowing us to fallocate smaller chunks of a file by
overwriting the region with 0, for the parts that are unallocated.

The method used is to first query the server for FSCTL_QUERY_ALLOCATED_RANGES to find what
is unallocated in the fallocate range and then to only overwrite-with-zero the unallocated
ranges to fill in the holes.

As overwriting-with-zero is different from just allocating blocks, and potentially much
more expensive, we limit this to only allow fallocate ranges up to 1Mb in size.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Acked-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 21ef51d338e0..b68ba92893b6 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3601,6 +3601,119 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
 	return rc;
 }
 
+static int smb3_simple_fallocate_write_range(unsigned int xid,
+					     struct cifs_tcon *tcon,
+					     struct cifsFileInfo *cfile,
+					     loff_t off, loff_t len,
+					     char *buf)
+{
+	struct cifs_io_parms io_parms = {0};
+	int nbytes;
+	struct kvec iov[2];
+
+	io_parms.netfid = cfile->fid.netfid;
+	io_parms.pid = current->tgid;
+	io_parms.tcon = tcon;
+	io_parms.persistent_fid = cfile->fid.persistent_fid;
+	io_parms.volatile_fid = cfile->fid.volatile_fid;
+	io_parms.offset = off;
+	io_parms.length = len;
+
+	/* iov[0] is reserved for smb header */
+	iov[1].iov_base = buf;
+	iov[1].iov_len = io_parms.length;
+	return SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+}
+
+static int smb3_simple_fallocate_range(unsigned int xid,
+				       struct cifs_tcon *tcon,
+				       struct cifsFileInfo *cfile,
+				       loff_t off, loff_t len)
+{
+	struct file_allocated_range_buffer in_data, *out_data = NULL, *tmp_data;
+	u32 out_data_len;
+	char *buf = NULL;
+	loff_t l;
+	int rc;
+
+	in_data.file_offset = cpu_to_le64(off);
+	in_data.length = cpu_to_le64(len);
+	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+			cfile->fid.volatile_fid,
+			FSCTL_QUERY_ALLOCATED_RANGES, true,
+			(char *)&in_data, sizeof(in_data),
+			1024 * sizeof(struct file_allocated_range_buffer),
+			(char **)&out_data, &out_data_len);
+	if (rc)
+		goto out;
+	/*
+	 * It is already all allocated
+	 */
+	if (out_data_len == 0)
+		goto out;
+
+	buf = kzalloc(1024 * 1024, GFP_KERNEL);
+	if (buf == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	tmp_data = out_data;
+	while (len) {
+		/*
+		 * The rest of the region is unmapped so write it all.
+		 */
+		if (out_data_len == 0) {
+			rc = smb3_simple_fallocate_write_range(xid, tcon,
+					       cfile, off, len, buf);
+			goto out;
+		}
+
+		if (out_data_len < sizeof(struct file_allocated_range_buffer)) {
+			rc = -EINVAL;
+			goto out;
+		}
+
+		if (off < le64_to_cpu(tmp_data->file_offset)) {
+			/*
+			 * We are at a hole. Write until the end of the region
+			 * or until the next allocated data,
+			 * whichever comes next.
+			 */
+			l = le64_to_cpu(tmp_data->file_offset) - off;
+			if (len < l)
+				l = len;
+			rc = smb3_simple_fallocate_write_range(xid, tcon,
+					       cfile, off, l, buf);
+			if (rc)
+				goto out;
+			off = off + l;
+			len = len - l;
+			if (len == 0)
+				goto out;
+		}
+		/*
+		 * We are at a section of allocated data, just skip forward
+		 * until the end of the data or the end of the region
+		 * we are supposed to fallocate, whichever comes first.
+		 */
+		l = le64_to_cpu(tmp_data->length);
+		if (len < l)
+			l = len;
+		off += l;
+		len -= l;
+
+		tmp_data = &tmp_data[1];
+		out_data_len -= sizeof(struct file_allocated_range_buffer);
+	}
+
+ out:
+	kfree(out_data);
+	kfree(buf);
+	return rc;
+}
+
+
 static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
 			    loff_t off, loff_t len, bool keep_size)
 {
@@ -3661,6 +3774,26 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
 	}
 
 	if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
+		/*
+		 * At this point, we are trying to fallocate an internal
+		 * regions of a sparse file. Since smb2 does not have a
+		 * fallocate command we have two otions on how to emulate this.
+		 * We can either turn the entire file to become non-sparse
+		 * which we only do if the fallocate is for virtually
+		 * the whole file,  or we can overwrite the region with zeroes
+		 * using SMB2_write, which could be prohibitevly expensive
+		 * if len is large.
+		 */
+		/*
+		 * We are only trying to fallocate a small region so
+		 * just write it with zero.
+		 */
+		if (len <= 1024 * 1024) {
+			rc = smb3_simple_fallocate_range(xid, tcon, cfile,
+							 off, len);
+			goto out;
+		}
+
 		/*
 		 * Check if falloc starts within first few pages of file
 		 * and ends within a few pages of the end of file to

From f3191fc8004d6d200d850395bbad8dab8f367945 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Fri, 4 Jun 2021 19:25:27 -0300
Subject: [PATCH 2333/3292] cifs: do not send tree disconnect to ipc shares

On session close, the IPC is closed and the server must release all
tcons of the session.  It doesn't matter if we send a ipc close or
not.

Besides, it will make the server to not close durable and resilient
files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an
SMB2 LOGOFF Request.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/connect.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 495c395f9def..ecac8cab7581 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1557,29 +1557,25 @@ out:
 /**
  * cifs_free_ipc - helper to release the session IPC tcon
  *
- * Needs to be called everytime a session is destroyed
+ * Needs to be called everytime a session is destroyed.
+ *
+ * On session close, the IPC is closed and the server must release all tcons of the session.
+ * No need to send a tree disconnect here.
+ *
+ * Besides, it will make the server to not close durable and resilient files on session close, as
+ * specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request.
  */
 static int
 cifs_free_ipc(struct cifs_ses *ses)
 {
-	int rc = 0, xid;
 	struct cifs_tcon *tcon = ses->tcon_ipc;
 
 	if (tcon == NULL)
 		return 0;
 
-	if (ses->server->ops->tree_disconnect) {
-		xid = get_xid();
-		rc = ses->server->ops->tree_disconnect(xid, tcon);
-		free_xid(xid);
-	}
-
-	if (rc)
-		cifs_dbg(FYI, "failed to disconnect IPC tcon (rc=%d)\n", rc);
-
 	tconInfoFree(tcon);
 	ses->tcon_ipc = NULL;
-	return rc;
+	return 0;
 }
 
 static struct cifs_ses *

From 2b133b7e216b9dae42fe504c7aa090363d96745a Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Fri, 4 Jun 2021 19:25:28 -0300
Subject: [PATCH 2334/3292] cifs: get rid of @noreq param in __dfs_cache_find()

@noreq param isn't used anywhere, so just remove it.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dfs_cache.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 03c81c39a507..70383e14ca4a 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -690,8 +690,7 @@ static int update_cache_entry(const char *path,
  * handle them properly.
  */
 static int __dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
-			    const struct nls_table *nls_codepage, int remap,
-			    const char *path, bool noreq)
+			    const struct nls_table *nls_codepage, int remap, const char *path)
 {
 	int rc;
 	unsigned int hash;
@@ -705,16 +704,6 @@ static int __dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
 	down_read(&htable_rw_lock);
 
 	ce = lookup_cache_entry(path, &hash);
-
-	/*
-	 * If @noreq is set, no requests will be sent to the server. Just return
-	 * the cache entry.
-	 */
-	if (noreq) {
-		up_read(&htable_rw_lock);
-		return PTR_ERR_OR_ZERO(ce);
-	}
-
 	if (!IS_ERR(ce)) {
 		if (!cache_entry_expired(ce)) {
 			dump_ce(ce);
@@ -879,7 +868,7 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
 	if (rc)
 		return rc;
 
-	rc = __dfs_cache_find(xid, ses, nls_codepage, remap, npath, false);
+	rc = __dfs_cache_find(xid, ses, nls_codepage, remap, npath);
 	if (rc)
 		goto out_free_path;
 
@@ -991,7 +980,7 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
 
 	cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath);
 
-	rc = __dfs_cache_find(xid, ses, nls_codepage, remap, npath, false);
+	rc = __dfs_cache_find(xid, ses, nls_codepage, remap, npath);
 	if (rc)
 		goto out_free_path;
 

From c9f7110399059100e69dba08a58658890be75f6f Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Fri, 4 Jun 2021 19:25:29 -0300
Subject: [PATCH 2335/3292] cifs: keep referral server sessions alive

At every mount, keep all sessions alive that were used for chasing the
DFS referrals as long as the dfs mounts are active.

Use those sessions in DFS cache to refresh all active tcons as well as
cached entries.  They will be managed by a list of mount_group
structures that will be indexed by a randomly generated uuid at mount
time, so we can put all the sessions related to specific dfs mounts
and avoid leaking them.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_fs_sb.h |   2 +
 fs/cifs/connect.c    |  56 ++--
 fs/cifs/dfs_cache.c  | 711 ++++++++++++++++---------------------------
 fs/cifs/dfs_cache.h  |   8 +-
 4 files changed, 292 insertions(+), 485 deletions(-)

diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 9c45b3a82ad9..2dfd10c34ace 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -77,6 +77,8 @@ struct cifs_sb_info {
 	 * failover properly.
 	 */
 	char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */
+	/* randomly generated 128-bit number for indexing dfs mount groups in referral cache */
+	uuid_t dfs_mount_id;
 	/*
 	 * Indicate whether serverino option was turned off later
 	 * (cifs_autodisable_serverino) in order to match new mounts.
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ecac8cab7581..7f0dd635ce9c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -368,13 +368,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 			cifs_server_dbg(VFS, "%s: failed to update DFS target hint: rc = %d\n",
 				 __func__, rc);
 		}
-		rc = dfs_cache_update_vol(cifs_sb->origin_fullpath, server);
-		if (rc) {
-			cifs_server_dbg(VFS, "%s: failed to update vol info in DFS cache: rc = %d\n",
-				 __func__, rc);
-		}
 		dfs_cache_free_tgts(&tgt_list);
-
 	}
 
 	cifs_put_tcp_super(sb);
@@ -1601,7 +1595,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
 {
 	unsigned int rc, xid;
 	struct TCP_Server_Info *server = ses->server;
-
 	cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
 
 	spin_lock(&cifs_tcp_ses_lock);
@@ -1609,6 +1602,10 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
 		spin_unlock(&cifs_tcp_ses_lock);
 		return;
 	}
+
+	cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
+	cifs_dbg(FYI, "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->treeName : "NONE");
+
 	if (--ses->ses_count > 0) {
 		spin_unlock(&cifs_tcp_ses_lock);
 		return;
@@ -3284,25 +3281,23 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *
 }
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
-static void set_root_ses(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+static void set_root_ses(struct cifs_sb_info *cifs_sb, const uuid_t *mount_id, struct cifs_ses *ses,
 			 struct cifs_ses **root_ses)
 {
 	if (ses) {
 		spin_lock(&cifs_tcp_ses_lock);
 		ses->ses_count++;
-		if (ses->tcon_ipc)
+		cifs_dbg(FYI, "%s: new ses_count=%d\n", __func__, ses->ses_count);
+		if (ses->tcon_ipc) {
+			cifs_dbg(FYI, "%s: ipc tcon: %s\n", __func__, ses->tcon_ipc->treeName);
 			ses->tcon_ipc->remap = cifs_remap(cifs_sb);
+		}
 		spin_unlock(&cifs_tcp_ses_lock);
+		dfs_cache_add_refsrv_session(mount_id, ses);
 	}
 	*root_ses = ses;
 }
 
-static void put_root_ses(struct cifs_ses *ses)
-{
-	if (ses)
-		cifs_put_smb_ses(ses);
-}
-
 /* Set up next dfs prefix path in @dfs_path */
 static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
 			    const unsigned int xid, struct TCP_Server_Info *server,
@@ -3382,9 +3377,9 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	struct cifs_ses *ses = NULL, *root_ses = NULL;
 	struct cifs_tcon *tcon = NULL;
 	int count = 0;
+	uuid_t mount_id = {0};
 	char *ref_path = NULL, *full_path = NULL;
 	char *oldmnt = NULL;
-	char *mntdata = NULL;
 	bool ref_server = false;
 
 	rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
@@ -3407,12 +3402,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 		if (rc != -EREMOTE)
 			goto error;
 	}
-	/* Save mount options */
-	mntdata = kstrdup(cifs_sb->ctx->mount_options, GFP_KERNEL);
-	if (!mntdata) {
-		rc = -ENOMEM;
-		goto error;
-	}
+
 	/* Get path of DFS root */
 	ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
 	if (IS_ERR(ref_path)) {
@@ -3421,7 +3411,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 		goto error;
 	}
 
-	set_root_ses(cifs_sb, ses, &root_ses);
+	uuid_gen(&mount_id);
+	set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
 	do {
 		/* Save full path of last DFS path we used to resolve final target server */
 		kfree(full_path);
@@ -3455,10 +3446,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 		rc = is_referral_server(ref_path + 1, tcon, &ref_server);
 		if (rc)
 			break;
-		if (ref_server) {
-			put_root_ses(root_ses);
-			set_root_ses(cifs_sb, ses, &root_ses);
-		}
+		if (ref_server)
+			set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
 
 		/* Get next dfs path and then continue chasing them if -EREMOTE */
 		rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
@@ -3469,8 +3458,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 
 	if (rc)
 		goto error;
-	put_root_ses(root_ses);
-	root_ses = NULL;
+
 	kfree(ref_path);
 	ref_path = NULL;
 	/*
@@ -3492,10 +3480,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	tcon->remap = cifs_remap(cifs_sb);
 	spin_unlock(&cifs_tcp_ses_lock);
 
-	/* Add original context for DFS cache to be used when refreshing referrals */
-	rc = dfs_cache_add_vol(mntdata, ctx, cifs_sb->origin_fullpath);
-	if (rc)
-		goto error;
 	/*
 	 * After reconnecting to a different server, unique ids won't
 	 * match anymore, so we disable serverino. This prevents
@@ -3510,6 +3494,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	kfree(cifs_sb->prepath);
 	cifs_sb->prepath = ctx->prepath;
 	ctx->prepath = NULL;
+	uuid_copy(&cifs_sb->dfs_mount_id, &mount_id);
 
 out:
 	free_xid(xid);
@@ -3519,9 +3504,8 @@ out:
 error:
 	kfree(ref_path);
 	kfree(full_path);
-	kfree(mntdata);
 	kfree(cifs_sb->origin_fullpath);
-	put_root_ses(root_ses);
+	dfs_cache_put_refsrv_sessions(&mount_id);
 	mount_put_conns(cifs_sb, xid, server, ses, tcon);
 	return rc;
 }
@@ -3751,7 +3735,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
 
 	kfree(cifs_sb->prepath);
 #ifdef CONFIG_CIFS_DFS_UPCALL
-	dfs_cache_del_vol(cifs_sb->origin_fullpath);
+	dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id);
 	kfree(cifs_sb->origin_fullpath);
 #endif
 	call_rcu(&cifs_sb->rcu, delayed_free);
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 70383e14ca4a..846d67028522 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -11,6 +11,7 @@
 #include <linux/proc_fs.h>
 #include <linux/nls.h>
 #include <linux/workqueue.h>
+#include <linux/uuid.h>
 #include "cifsglob.h"
 #include "smb2pdu.h"
 #include "smb2proto.h"
@@ -18,7 +19,6 @@
 #include "cifs_debug.h"
 #include "cifs_unicode.h"
 #include "smb2glob.h"
-#include "fs_context.h"
 
 #include "dfs_cache.h"
 
@@ -48,14 +48,15 @@ struct cache_entry {
 	struct cache_dfs_tgt *tgthint;
 };
 
-struct vol_info {
-	char *fullpath;
-	spinlock_t ctx_lock;
-	struct smb3_fs_context ctx;
-	char *mntdata;
+/* List of referral server sessions per dfs mount */
+struct mount_group {
 	struct list_head list;
-	struct list_head rlist;
-	struct kref refcnt;
+	uuid_t id;
+	struct cifs_ses *sessions[CACHE_MAX_ENTRIES];
+	int num_sessions;
+	spinlock_t lock;
+	struct list_head refresh_list;
+	struct kref refcount;
 };
 
 static struct kmem_cache *cache_slab __read_mostly;
@@ -74,13 +75,106 @@ static atomic_t cache_count;
 static struct hlist_head cache_htable[CACHE_HTABLE_SIZE];
 static DECLARE_RWSEM(htable_rw_lock);
 
-static LIST_HEAD(vol_list);
-static DEFINE_SPINLOCK(vol_list_lock);
+static LIST_HEAD(mount_group_list);
+static DEFINE_MUTEX(mount_group_list_lock);
 
 static void refresh_cache_worker(struct work_struct *work);
 
 static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker);
 
+static void get_ipc_unc(const char *ref_path, char *ipc, size_t ipclen)
+{
+	const char *host;
+	size_t len;
+
+	extract_unc_hostname(ref_path, &host, &len);
+	scnprintf(ipc, ipclen, "\\\\%.*s\\IPC$", (int)len, host);
+}
+
+static struct cifs_ses *find_ipc_from_server_path(struct cifs_ses **ses, const char *path)
+{
+	char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0};
+
+	get_ipc_unc(path, unc, sizeof(unc));
+	for (; *ses; ses++) {
+		if (!strcasecmp(unc, (*ses)->tcon_ipc->treeName))
+			return *ses;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static void __mount_group_release(struct mount_group *mg)
+{
+	int i;
+
+	for (i = 0; i < mg->num_sessions; i++)
+		cifs_put_smb_ses(mg->sessions[i]);
+	kfree(mg);
+}
+
+static void mount_group_release(struct kref *kref)
+{
+	struct mount_group *mg = container_of(kref, struct mount_group, refcount);
+
+	mutex_lock(&mount_group_list_lock);
+	list_del(&mg->list);
+	mutex_unlock(&mount_group_list_lock);
+	__mount_group_release(mg);
+}
+
+static struct mount_group *find_mount_group_locked(const uuid_t *id)
+{
+	struct mount_group *mg;
+
+	list_for_each_entry(mg, &mount_group_list, list) {
+		if (uuid_equal(&mg->id, id))
+			return mg;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+static struct mount_group *__get_mount_group_locked(const uuid_t *id)
+{
+	struct mount_group *mg;
+
+	mg = find_mount_group_locked(id);
+	if (!IS_ERR(mg))
+		return mg;
+
+	mg = kmalloc(sizeof(*mg), GFP_KERNEL);
+	if (!mg)
+		return ERR_PTR(-ENOMEM);
+	kref_init(&mg->refcount);
+	uuid_copy(&mg->id, id);
+	mg->num_sessions = 0;
+	spin_lock_init(&mg->lock);
+	list_add(&mg->list, &mount_group_list);
+	return mg;
+}
+
+static struct mount_group *get_mount_group(const uuid_t *id)
+{
+	struct mount_group *mg;
+
+	mutex_lock(&mount_group_list_lock);
+	mg = __get_mount_group_locked(id);
+	if (!IS_ERR(mg))
+		kref_get(&mg->refcount);
+	mutex_unlock(&mount_group_list_lock);
+
+	return mg;
+}
+
+static void free_mount_group_list(void)
+{
+	struct mount_group *mg, *tmp_mg;
+
+	list_for_each_entry_safe(mg, tmp_mg, &mount_group_list, list) {
+		list_del_init(&mg->list);
+		__mount_group_release(mg);
+	}
+}
+
 static int get_normalized_path(const char *path, const char **npath)
 {
 	if (!path || strlen(path) < 3 || (*path != '\\' && *path != '/'))
@@ -284,8 +378,7 @@ int dfs_cache_init(void)
 	int rc;
 	int i;
 
-	dfscache_wq = alloc_workqueue("cifs-dfscache",
-				      WQ_FREEZABLE | WQ_MEM_RECLAIM, 1);
+	dfscache_wq = alloc_workqueue("cifs-dfscache", WQ_FREEZABLE | WQ_UNBOUND, 1);
 	if (!dfscache_wq)
 		return -ENOMEM;
 
@@ -426,8 +519,7 @@ static struct cache_entry *alloc_cache_entry(const char *path,
 	return ce;
 }
 
-/* Must be called with htable_rw_lock held */
-static void remove_oldest_entry(void)
+static void remove_oldest_entry_locked(void)
 {
 	int i;
 	struct cache_entry *ce;
@@ -456,8 +548,8 @@ static void remove_oldest_entry(void)
 }
 
 /* Add a new DFS cache entry */
-static int add_cache_entry(const char *path, unsigned int hash,
-			   struct dfs_info3_param *refs, int numrefs)
+static int add_cache_entry_locked(const char *path, unsigned int hash,
+				  struct dfs_info3_param *refs, int numrefs)
 {
 	struct cache_entry *ce;
 
@@ -475,10 +567,8 @@ static int add_cache_entry(const char *path, unsigned int hash,
 	}
 	spin_unlock(&cache_ttl_lock);
 
-	down_write(&htable_rw_lock);
 	hlist_add_head(&ce->hlist, &cache_htable[hash]);
 	dump_ce(ce);
-	up_write(&htable_rw_lock);
 
 	return 0;
 }
@@ -573,34 +663,6 @@ out:
 	return ce;
 }
 
-static void __vol_release(struct vol_info *vi)
-{
-	kfree(vi->fullpath);
-	kfree(vi->mntdata);
-	smb3_cleanup_fs_context_contents(&vi->ctx);
-	kfree(vi);
-}
-
-static void vol_release(struct kref *kref)
-{
-	struct vol_info *vi = container_of(kref, struct vol_info, refcnt);
-
-	spin_lock(&vol_list_lock);
-	list_del(&vi->list);
-	spin_unlock(&vol_list_lock);
-	__vol_release(vi);
-}
-
-static inline void free_vol_list(void)
-{
-	struct vol_info *vi, *nvi;
-
-	list_for_each_entry_safe(vi, nvi, &vol_list, list) {
-		list_del_init(&vi->list);
-		__vol_release(vi);
-	}
-}
-
 /**
  * dfs_cache_destroy - destroy DFS referral cache
  */
@@ -608,7 +670,7 @@ void dfs_cache_destroy(void)
 {
 	cancel_delayed_work_sync(&refresh_task);
 	unload_nls(cache_nlsc);
-	free_vol_list();
+	free_mount_group_list();
 	flush_cache_ents();
 	kmem_cache_destroy(cache_slab);
 	destroy_workqueue(dfscache_wq);
@@ -616,10 +678,9 @@ void dfs_cache_destroy(void)
 	cifs_dbg(FYI, "%s: destroyed DFS referral cache\n", __func__);
 }
 
-/* Must be called with htable_rw_lock held */
-static int __update_cache_entry(const char *path,
-				const struct dfs_info3_param *refs,
-				int numrefs)
+/* Update a cache entry with the new referral in @refs */
+static int update_cache_entry_locked(const char *path, const struct dfs_info3_param *refs,
+				     int numrefs)
 {
 	int rc;
 	struct cache_entry *ce;
@@ -665,32 +726,17 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
 					       nls_codepage, remap);
 }
 
-/* Update an expired cache entry by getting a new DFS referral from server */
-static int update_cache_entry(const char *path,
-			      const struct dfs_info3_param *refs,
-			      int numrefs)
-{
-
-	int rc;
-
-	down_write(&htable_rw_lock);
-	rc = __update_cache_entry(path, refs, numrefs);
-	up_write(&htable_rw_lock);
-
-	return rc;
-}
-
 /*
  * Find, create or update a DFS cache entry.
  *
  * If the entry wasn't found, it will create a new one. Or if it was found but
  * expired, then it will update the entry accordingly.
  *
- * For interlinks, __cifs_dfs_mount() and expand_dfs_referral() are supposed to
+ * For interlinks, cifs_mount() and expand_dfs_referral() are supposed to
  * handle them properly.
  */
-static int __dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
-			    const struct nls_table *nls_codepage, int remap, const char *path)
+static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses,
+			      const struct nls_table *nls_codepage, int remap, const char *path)
 {
 	int rc;
 	unsigned int hash;
@@ -701,52 +747,46 @@ static int __dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
 
 	cifs_dbg(FYI, "%s: search path: %s\n", __func__, path);
 
-	down_read(&htable_rw_lock);
+	down_write(&htable_rw_lock);
 
 	ce = lookup_cache_entry(path, &hash);
 	if (!IS_ERR(ce)) {
 		if (!cache_entry_expired(ce)) {
 			dump_ce(ce);
-			up_read(&htable_rw_lock);
+			up_write(&htable_rw_lock);
 			return 0;
 		}
 	} else {
 		newent = true;
 	}
 
-	up_read(&htable_rw_lock);
-
 	/*
-	 * No entry was found.
-	 *
-	 * Request a new DFS referral in order to create a new cache entry, or
-	 * updating an existing one.
+	 * Either the entry was not found, or it is expired.
+	 * Request a new DFS referral in order to create or update a cache entry.
 	 */
 	rc = get_dfs_referral(xid, ses, nls_codepage, remap, path,
 			      &refs, &numrefs);
 	if (rc)
-		return rc;
+		goto out_unlock;
 
 	dump_refs(refs, numrefs);
 
 	if (!newent) {
-		rc = update_cache_entry(path, refs, numrefs);
-		goto out_free_refs;
+		rc = update_cache_entry_locked(path, refs, numrefs);
+		goto out_unlock;
 	}
 
 	if (atomic_read(&cache_count) >= CACHE_MAX_ENTRIES) {
-		cifs_dbg(FYI, "%s: reached max cache size (%d)\n",
-			 __func__, CACHE_MAX_ENTRIES);
-		down_write(&htable_rw_lock);
-		remove_oldest_entry();
-		up_write(&htable_rw_lock);
+		cifs_dbg(FYI, "%s: reached max cache size (%d)\n", __func__, CACHE_MAX_ENTRIES);
+		remove_oldest_entry_locked();
 	}
 
-	rc = add_cache_entry(path, hash, refs, numrefs);
+	rc = add_cache_entry_locked(path, hash, refs, numrefs);
 	if (!rc)
 		atomic_inc(&cache_count);
 
-out_free_refs:
+out_unlock:
+	up_write(&htable_rw_lock);
 	free_dfs_info_array(refs, numrefs);
 	return rc;
 }
@@ -868,7 +908,7 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
 	if (rc)
 		return rc;
 
-	rc = __dfs_cache_find(xid, ses, nls_codepage, remap, npath);
+	rc = cache_refresh_path(xid, ses, nls_codepage, remap, npath);
 	if (rc)
 		goto out_free_path;
 
@@ -980,7 +1020,7 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
 
 	cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath);
 
-	rc = __dfs_cache_find(xid, ses, nls_codepage, remap, npath);
+	rc = cache_refresh_path(xid, ses, nls_codepage, remap, npath);
 	if (rc)
 		goto out_free_path;
 
@@ -1122,126 +1162,51 @@ out_unlock:
 }
 
 /**
- * dfs_cache_add_vol - add a cifs context during mount() that will be handled by
- * DFS cache refresh worker.
+ * dfs_cache_add_refsrv_session - add SMB session of referral server
  *
- * @mntdata: mount data.
- * @ctx: cifs context.
- * @fullpath: origin full path.
- *
- * Return zero if context was set up correctly, otherwise non-zero.
+ * @mount_id: mount group uuid to lookup.
+ * @ses: reference counted SMB session of referral server.
  */
-int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx, const char *fullpath)
+void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses)
 {
-	int rc;
-	struct vol_info *vi;
+	struct mount_group *mg;
 
-	if (!ctx || !fullpath || !mntdata)
-		return -EINVAL;
-
-	cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
-
-	vi = kzalloc(sizeof(*vi), GFP_KERNEL);
-	if (!vi)
-		return -ENOMEM;
-
-	vi->fullpath = kstrdup(fullpath, GFP_KERNEL);
-	if (!vi->fullpath) {
-		rc = -ENOMEM;
-		goto err_free_vi;
-	}
-
-	rc = smb3_fs_context_dup(&vi->ctx, ctx);
-	if (rc)
-		goto err_free_fullpath;
-
-	vi->mntdata = mntdata;
-	spin_lock_init(&vi->ctx_lock);
-	kref_init(&vi->refcnt);
-
-	spin_lock(&vol_list_lock);
-	list_add_tail(&vi->list, &vol_list);
-	spin_unlock(&vol_list_lock);
-
-	return 0;
-
-err_free_fullpath:
-	kfree(vi->fullpath);
-err_free_vi:
-	kfree(vi);
-	return rc;
-}
-
-/* Must be called with vol_list_lock held */
-static struct vol_info *find_vol(const char *fullpath)
-{
-	struct vol_info *vi;
-
-	list_for_each_entry(vi, &vol_list, list) {
-		cifs_dbg(FYI, "%s: vi->fullpath: %s\n", __func__, vi->fullpath);
-		if (!strcasecmp(vi->fullpath, fullpath))
-			return vi;
-	}
-	return ERR_PTR(-ENOENT);
-}
-
-/**
- * dfs_cache_update_vol - update vol info in DFS cache after failover
- *
- * @fullpath: fullpath to look up in volume list.
- * @server: TCP ses pointer.
- *
- * Return zero if volume was updated, otherwise non-zero.
- */
-int dfs_cache_update_vol(const char *fullpath, struct TCP_Server_Info *server)
-{
-	struct vol_info *vi;
-
-	if (!fullpath || !server)
-		return -EINVAL;
-
-	cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
-
-	spin_lock(&vol_list_lock);
-	vi = find_vol(fullpath);
-	if (IS_ERR(vi)) {
-		spin_unlock(&vol_list_lock);
-		return PTR_ERR(vi);
-	}
-	kref_get(&vi->refcnt);
-	spin_unlock(&vol_list_lock);
-
-	cifs_dbg(FYI, "%s: updating volume info\n", __func__);
-	spin_lock(&vi->ctx_lock);
-	memcpy(&vi->ctx.dstaddr, &server->dstaddr,
-	       sizeof(vi->ctx.dstaddr));
-	spin_unlock(&vi->ctx_lock);
-
-	kref_put(&vi->refcnt, vol_release);
-
-	return 0;
-}
-
-/**
- * dfs_cache_del_vol - remove volume info in DFS cache during umount()
- *
- * @fullpath: fullpath to look up in volume list.
- */
-void dfs_cache_del_vol(const char *fullpath)
-{
-	struct vol_info *vi;
-
-	if (!fullpath || !*fullpath)
+	if (WARN_ON_ONCE(!mount_id || uuid_is_null(mount_id) || !ses))
 		return;
 
-	cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
+	mg = get_mount_group(mount_id);
+	if (WARN_ON_ONCE(IS_ERR(mg)))
+		return;
 
-	spin_lock(&vol_list_lock);
-	vi = find_vol(fullpath);
-	spin_unlock(&vol_list_lock);
+	spin_lock(&mg->lock);
+	if (mg->num_sessions < ARRAY_SIZE(mg->sessions))
+		mg->sessions[mg->num_sessions++] = ses;
+	spin_unlock(&mg->lock);
+	kref_put(&mg->refcount, mount_group_release);
+}
 
-	if (!IS_ERR(vi))
-		kref_put(&vi->refcnt, vol_release);
+/**
+ * dfs_cache_put_refsrv_sessions - put all referral server sessions
+ *
+ * Put all SMB sessions from the given mount group id.
+ *
+ * @mount_id: mount group uuid to lookup.
+ */
+void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id)
+{
+	struct mount_group *mg;
+
+	if (!mount_id || uuid_is_null(mount_id))
+		return;
+
+	mutex_lock(&mount_group_list_lock);
+	mg = find_mount_group_locked(mount_id);
+	if (IS_ERR(mg)) {
+		mutex_unlock(&mount_group_list_lock);
+		return;
+	}
+	mutex_unlock(&mount_group_list_lock);
+	kref_put(&mg->refcount, mount_group_release);
 }
 
 /**
@@ -1310,278 +1275,136 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
 	return 0;
 }
 
-/* Get all tcons that are within a DFS namespace and can be refreshed */
-static void get_tcons(struct TCP_Server_Info *server, struct list_head *head)
+/*
+ * Refresh all active dfs mounts regardless of whether they are in cache or not.
+ * (cache can be cleared)
+ */
+static void refresh_mounts(struct cifs_ses **sessions)
 {
+	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
-	struct cifs_tcon *tcon;
+	struct cifs_tcon *tcon, *ntcon;
+	struct list_head tcons;
+	unsigned int xid;
 
-	INIT_LIST_HEAD(head);
+	INIT_LIST_HEAD(&tcons);
 
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
-		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
-			if (!tcon->need_reconnect && !tcon->need_reopen_files &&
-			    tcon->dfs_path) {
-				tcon->tc_count++;
-				list_add_tail(&tcon->ulist, head);
+	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+		list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+			list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+				if (tcon->dfs_path) {
+					tcon->tc_count++;
+					list_add_tail(&tcon->ulist, &tcons);
+				}
 			}
 		}
-		if (ses->tcon_ipc && !ses->tcon_ipc->need_reconnect &&
-		    ses->tcon_ipc->dfs_path) {
-			list_add_tail(&ses->tcon_ipc->ulist, head);
-		}
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
-}
 
-static bool is_dfs_link(const char *path)
-{
-	char *s;
+	list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
+		const char *path = tcon->dfs_path + 1;
+		int rc = 0;
 
-	s = strchr(path + 1, '\\');
-	if (!s)
-		return false;
-	return !!strchr(s + 1, '\\');
-}
-
-static char *get_dfs_root(const char *path)
-{
-	char *s, *npath;
-
-	s = strchr(path + 1, '\\');
-	if (!s)
-		return ERR_PTR(-EINVAL);
-
-	s = strchr(s + 1, '\\');
-	if (!s)
-		return ERR_PTR(-EINVAL);
-
-	npath = kstrndup(path, s - path, GFP_KERNEL);
-	if (!npath)
-		return ERR_PTR(-ENOMEM);
-
-	return npath;
-}
-
-static inline void put_tcp_server(struct TCP_Server_Info *server)
-{
-	cifs_put_tcp_session(server, 0);
-}
-
-static struct TCP_Server_Info *get_tcp_server(struct smb3_fs_context *ctx)
-{
-	struct TCP_Server_Info *server;
-
-	server = cifs_find_tcp_session(ctx);
-	if (IS_ERR_OR_NULL(server))
-		return NULL;
-
-	spin_lock(&GlobalMid_Lock);
-	if (server->tcpStatus != CifsGood) {
-		spin_unlock(&GlobalMid_Lock);
-		put_tcp_server(server);
-		return NULL;
-	}
-	spin_unlock(&GlobalMid_Lock);
-
-	return server;
-}
-
-/* Find root SMB session out of a DFS link path */
-static struct cifs_ses *find_root_ses(struct vol_info *vi,
-				      struct cifs_tcon *tcon,
-				      const char *path)
-{
-	char *rpath;
-	int rc;
-	struct cache_entry *ce;
-	struct dfs_info3_param ref = {0};
-	char *mdata = NULL, *devname = NULL;
-	struct TCP_Server_Info *server;
-	struct cifs_ses *ses;
-	struct smb3_fs_context ctx = {NULL};
-
-	rpath = get_dfs_root(path);
-	if (IS_ERR(rpath))
-		return ERR_CAST(rpath);
-
-	down_read(&htable_rw_lock);
-
-	ce = lookup_cache_entry(rpath, NULL);
-	if (IS_ERR(ce)) {
-		up_read(&htable_rw_lock);
-		ses = ERR_CAST(ce);
-		goto out;
-	}
-
-	rc = setup_referral(path, ce, &ref, get_tgt_name(ce));
-	if (rc) {
-		up_read(&htable_rw_lock);
-		ses = ERR_PTR(rc);
-		goto out;
-	}
-
-	up_read(&htable_rw_lock);
-
-	mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref,
-					   &devname);
-	free_dfs_info_param(&ref);
-
-	if (IS_ERR(mdata)) {
-		ses = ERR_CAST(mdata);
-		mdata = NULL;
-		goto out;
-	}
-
-	rc = cifs_setup_volume_info(&ctx, NULL, devname);
-
-	if (rc) {
-		ses = ERR_PTR(rc);
-		goto out;
-	}
-
-	server = get_tcp_server(&ctx);
-	if (!server) {
-		ses = ERR_PTR(-EHOSTDOWN);
-		goto out;
-	}
-
-	ses = cifs_get_smb_ses(server, &ctx);
-
-out:
-	smb3_cleanup_fs_context_contents(&ctx);
-	kfree(mdata);
-	kfree(rpath);
-	kfree(devname);
-
-	return ses;
-}
-
-/* Refresh DFS cache entry from a given tcon */
-static int refresh_tcon(struct vol_info *vi, struct cifs_tcon *tcon)
-{
-	int rc = 0;
-	unsigned int xid;
-	const char *path, *npath;
-	struct cache_entry *ce;
-	struct cifs_ses *root_ses = NULL, *ses;
-	struct dfs_info3_param *refs = NULL;
-	int numrefs = 0;
-
-	xid = get_xid();
-
-	path = tcon->dfs_path + 1;
-
-	rc = get_normalized_path(path, &npath);
-	if (rc)
-		goto out_free_xid;
-
-	down_read(&htable_rw_lock);
-
-	ce = lookup_cache_entry(npath, NULL);
-	if (IS_ERR(ce)) {
-		rc = PTR_ERR(ce);
-		up_read(&htable_rw_lock);
-		goto out_free_path;
-	}
-
-	if (!cache_entry_expired(ce)) {
-		up_read(&htable_rw_lock);
-		goto out_free_path;
-	}
-
-	up_read(&htable_rw_lock);
-
-	/* If it's a DFS Link, then use root SMB session for refreshing it */
-	if (is_dfs_link(npath)) {
-		ses = root_ses = find_root_ses(vi, tcon, npath);
-		if (IS_ERR(ses)) {
-			rc = PTR_ERR(ses);
-			root_ses = NULL;
-			goto out_free_path;
+		list_del_init(&tcon->ulist);
+		ses = find_ipc_from_server_path(sessions, path);
+		if (!IS_ERR(ses)) {
+			xid = get_xid();
+			cache_refresh_path(xid, ses, cache_nlsc, tcon->remap, path);
+			free_xid(xid);
 		}
-	} else {
-		ses = tcon->ses;
+		cifs_put_tcon(tcon);
 	}
+}
 
-	rc = get_dfs_referral(xid, ses, cache_nlsc, tcon->remap, npath, &refs,
-			      &numrefs);
-	if (!rc) {
-		dump_refs(refs, numrefs);
-		rc = update_cache_entry(npath, refs, numrefs);
-		free_dfs_info_array(refs, numrefs);
+static void refresh_cache(struct cifs_ses **sessions)
+{
+	int i;
+	struct cifs_ses *ses;
+	unsigned int xid;
+	int rc;
+
+	/*
+	 * Refresh all cached entries.
+	 * The cache entries may cover more paths than the active mounts
+	 * (e.g. domain-based DFS referrals or multi tier DFS setups).
+	 */
+	down_write(&htable_rw_lock);
+	for (i = 0; i < CACHE_HTABLE_SIZE; i++) {
+		struct cache_entry *ce;
+		struct hlist_head *l = &cache_htable[i];
+
+		hlist_for_each_entry(ce, l, hlist) {
+			struct dfs_info3_param *refs = NULL;
+			int numrefs = 0;
+
+			if (hlist_unhashed(&ce->hlist) || !cache_entry_expired(ce))
+				continue;
+
+			ses = find_ipc_from_server_path(sessions, ce->path);
+			if (IS_ERR(ses))
+				continue;
+
+			xid = get_xid();
+			rc = get_dfs_referral(xid, ses, cache_nlsc, NO_MAP_UNI_RSVD, ce->path,
+					      &refs, &numrefs);
+			free_xid(xid);
+
+			if (!rc)
+				update_cache_entry_locked(ce->path, refs, numrefs);
+
+			free_dfs_info_array(refs, numrefs);
+		}
 	}
-
-	if (root_ses)
-		cifs_put_smb_ses(root_ses);
-
-out_free_path:
-	free_normalized_path(path, npath);
-
-out_free_xid:
-	free_xid(xid);
-	return rc;
+	up_write(&htable_rw_lock);
 }
 
 /*
- * Worker that will refresh DFS cache based on lowest TTL value from a DFS
+ * Worker that will refresh DFS cache and active mounts based on lowest TTL value from a DFS
  * referral.
  */
 static void refresh_cache_worker(struct work_struct *work)
 {
-	struct vol_info *vi, *nvi;
-	struct TCP_Server_Info *server;
-	LIST_HEAD(vols);
-	LIST_HEAD(tcons);
-	struct cifs_tcon *tcon, *ntcon;
-	int rc;
+	struct list_head mglist;
+	struct mount_group *mg, *tmp_mg;
+	struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
+	int max_sessions = ARRAY_SIZE(sessions) - 1;
+	int i = 0, count;
 
-	/*
-	 * Find SMB volumes that are eligible (server->tcpStatus == CifsGood)
-	 * for refreshing.
-	 */
-	spin_lock(&vol_list_lock);
-	list_for_each_entry(vi, &vol_list, list) {
-		server = get_tcp_server(&vi->ctx);
-		if (!server)
-			continue;
+	INIT_LIST_HEAD(&mglist);
 
-		kref_get(&vi->refcnt);
-		list_add_tail(&vi->rlist, &vols);
-		put_tcp_server(server);
+	/* Get refereces of mount groups */
+	mutex_lock(&mount_group_list_lock);
+	list_for_each_entry(mg, &mount_group_list, list) {
+		kref_get(&mg->refcount);
+		list_add(&mg->refresh_list, &mglist);
 	}
-	spin_unlock(&vol_list_lock);
+	mutex_unlock(&mount_group_list_lock);
 
-	/* Walk through all TCONs and refresh any expired cache entry */
-	list_for_each_entry_safe(vi, nvi, &vols, rlist) {
-		spin_lock(&vi->ctx_lock);
-		server = get_tcp_server(&vi->ctx);
-		spin_unlock(&vi->ctx_lock);
+	/* Fill in local array with an NULL-terminated list of all referral server sessions */
+	list_for_each_entry(mg, &mglist, refresh_list) {
+		if (i >= max_sessions)
+			break;
 
-		if (!server)
-			goto next_vol;
+		spin_lock(&mg->lock);
+		if (i + mg->num_sessions > max_sessions)
+			count = max_sessions - i;
+		else
+			count = mg->num_sessions;
+		memcpy(&sessions[i], mg->sessions, count * sizeof(mg->sessions[0]));
+		spin_unlock(&mg->lock);
+		i += count;
+	}
 
-		get_tcons(server, &tcons);
-		rc = 0;
+	if (sessions[0]) {
+		/* Refresh all active mounts and cached entries */
+		refresh_mounts(sessions);
+		refresh_cache(sessions);
+	}
 
-		list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
-			/*
-			 * Skip tcp server if any of its tcons failed to refresh
-			 * (possibily due to reconnects).
-			 */
-			if (!rc)
-				rc = refresh_tcon(vi, tcon);
-
-			list_del_init(&tcon->ulist);
-			cifs_put_tcon(tcon);
-		}
-
-		put_tcp_server(server);
-
-next_vol:
-		list_del_init(&vi->rlist);
-		kref_put(&vi->refcnt, vol_release);
+	list_for_each_entry_safe(mg, tmp_mg, &mglist, refresh_list) {
+		list_del_init(&mg->refresh_list);
+		kref_put(&mg->refcount, mount_group_release);
 	}
 
 	spin_lock(&cache_ttl_lock);
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index 1afc4f590c47..c23b08530a7f 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -10,6 +10,7 @@
 
 #include <linux/nls.h>
 #include <linux/list.h>
+#include <linux/uuid.h>
 #include "cifsglob.h"
 
 struct dfs_cache_tgt_list {
@@ -44,13 +45,10 @@ dfs_cache_noreq_update_tgthint(const char *path,
 extern int dfs_cache_get_tgt_referral(const char *path,
 				      const struct dfs_cache_tgt_iterator *it,
 				      struct dfs_info3_param *ref);
-extern int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx,
-			const char *fullpath);
-extern int dfs_cache_update_vol(const char *fullpath,
-				struct TCP_Server_Info *server);
-extern void dfs_cache_del_vol(const char *fullpath);
 extern int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
 				   char **share, char **prefix);
+void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
+void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
 
 static inline struct dfs_cache_tgt_iterator *
 dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl,

From c870a8e70e682764dfca6771d3ee5d632babb3e5 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Fri, 4 Jun 2021 19:25:30 -0300
Subject: [PATCH 2336/3292] cifs: handle different charsets in dfs cache

Convert all dfs paths to dfs cache's local codepage (@cache_cp) and
avoid mixing them with different charsets.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_fs_sb.h |   5 +-
 fs/cifs/cifsglob.h   |   3 +-
 fs/cifs/connect.c    |  63 +++++++++++------
 fs/cifs/dfs_cache.c  | 165 ++++++++++++++++++++-----------------------
 fs/cifs/dfs_cache.h  |  37 +++++-----
 5 files changed, 136 insertions(+), 137 deletions(-)

diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 2dfd10c34ace..64990ccd1b9c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -72,9 +72,8 @@ struct cifs_sb_info {
 	char *prepath;
 
 	/*
-	 * Path initially provided by the mount call. We might connect
-	 * to something different via DFS but we want to keep it to do
-	 * failover properly.
+	 * Canonical DFS path initially provided by the mount call. We might connect to something
+	 * different via DFS but we want to keep it to do failover properly.
 	 */
 	char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */
 	/* randomly generated 128-bit number for indexing dfs mount groups in referral cache */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8488d7024462..d85ef3b41bd9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1093,8 +1093,7 @@ struct cifs_tcon {
 	struct cached_fid crfid; /* Cached root fid */
 	/* BB add field for back pointer to sb struct(s)? */
 #ifdef CONFIG_CIFS_DFS_UPCALL
-	char *dfs_path;
-	int remap:2;
+	char *dfs_path; /* canonical DFS path */
 	struct list_head ulist; /* cache update list */
 #endif
 };
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7f0dd635ce9c..595de4034fa7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3010,9 +3010,8 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
 	return rc;
 }
 
-static inline int get_next_dfs_tgt(const char *path,
-				   struct dfs_cache_tgt_list *tgt_list,
-				   struct dfs_cache_tgt_iterator **tgt_it)
+static int get_next_dfs_tgt(struct dfs_cache_tgt_list *tgt_list,
+			    struct dfs_cache_tgt_iterator **tgt_it)
 {
 	if (!*tgt_it)
 		*tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
@@ -3052,6 +3051,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 			   struct cifs_ses **ses, struct cifs_tcon **tcon)
 {
 	int rc;
+	char *npath = NULL;
 	struct dfs_cache_tgt_list tgt_list = {0};
 	struct dfs_cache_tgt_iterator *tgt_it = NULL;
 	struct smb3_fs_context tmp_ctx = {NULL};
@@ -3059,11 +3059,15 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
 		return -EOPNOTSUPP;
 
-	cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, path, full_path);
+	npath = dfs_cache_canonical_path(path, cifs_sb->local_nls, cifs_remap(cifs_sb));
+	if (IS_ERR(npath))
+		return PTR_ERR(npath);
 
-	rc = dfs_cache_noreq_find(path, NULL, &tgt_list);
+	cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, npath, full_path);
+
+	rc = dfs_cache_noreq_find(npath, NULL, &tgt_list);
 	if (rc)
-		return rc;
+		goto out;
 	/*
 	 * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
 	 * test connection against new DFS targets.
@@ -3077,11 +3081,11 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 		char *fake_devname = NULL, *mdata = NULL;
 
 		/* Get next DFS target server - if any */
-		rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it);
+		rc = get_next_dfs_tgt(&tgt_list, &tgt_it);
 		if (rc)
 			break;
 
-		rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
+		rc = dfs_cache_get_tgt_referral(npath, tgt_it, &ref);
 		if (rc)
 			break;
 
@@ -3130,6 +3134,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 	}
 
 out:
+	kfree(npath);
 	smb3_cleanup_fs_context_contents(&tmp_ctx);
 	dfs_cache_free_tgts(&tgt_list);
 	return rc;
@@ -3287,11 +3292,6 @@ static void set_root_ses(struct cifs_sb_info *cifs_sb, const uuid_t *mount_id, s
 	if (ses) {
 		spin_lock(&cifs_tcp_ses_lock);
 		ses->ses_count++;
-		cifs_dbg(FYI, "%s: new ses_count=%d\n", __func__, ses->ses_count);
-		if (ses->tcon_ipc) {
-			cifs_dbg(FYI, "%s: ipc tcon: %s\n", __func__, ses->tcon_ipc->treeName);
-			ses->tcon_ipc->remap = cifs_remap(cifs_sb);
-		}
 		spin_unlock(&cifs_tcp_ses_lock);
 		dfs_cache_add_refsrv_session(mount_id, ses);
 	}
@@ -3343,17 +3343,25 @@ out:
 }
 
 /* Check if resolved targets can handle any DFS referrals */
-static int is_referral_server(const char *ref_path, struct cifs_tcon *tcon, bool *ref_server)
+static int is_referral_server(const char *ref_path, struct cifs_sb_info *cifs_sb,
+			      struct cifs_tcon *tcon, bool *ref_server)
 {
 	int rc;
 	struct dfs_info3_param ref = {0};
 
+	cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+
 	if (is_tcon_dfs(tcon)) {
 		*ref_server = true;
 	} else {
-		cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+		char *npath;
 
-		rc = dfs_cache_noreq_find(ref_path, &ref, NULL);
+		npath = dfs_cache_canonical_path(ref_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
+		if (IS_ERR(npath))
+			return PTR_ERR(npath);
+
+		rc = dfs_cache_noreq_find(npath, &ref, NULL);
+		kfree(npath);
 		if (rc) {
 			cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
 			return rc;
@@ -3443,7 +3451,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 			continue;
 
 		/* Make sure that requests go through new root servers */
-		rc = is_referral_server(ref_path + 1, tcon, &ref_server);
+		rc = is_referral_server(ref_path + 1, cifs_sb, tcon, &ref_server);
 		if (rc)
 			break;
 		if (ref_server)
@@ -3460,7 +3468,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 		goto error;
 
 	kfree(ref_path);
-	ref_path = NULL;
 	/*
 	 * Store DFS full path in both superblock and tree connect structures.
 	 *
@@ -3469,15 +3476,25 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	 * links, the prefix path is included in both and may be changed during reconnect.  See
 	 * cifs_tree_connect().
 	 */
-	cifs_sb->origin_fullpath = kstrdup(full_path, GFP_KERNEL);
-	if (!cifs_sb->origin_fullpath) {
+	ref_path = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
+	kfree(full_path);
+	full_path = NULL;
+
+	if (IS_ERR(ref_path)) {
+		rc = PTR_ERR(ref_path);
+		ref_path = NULL;
+		goto error;
+	}
+	cifs_sb->origin_fullpath = ref_path;
+
+	ref_path = kstrdup(cifs_sb->origin_fullpath, GFP_KERNEL);
+	if (!ref_path) {
 		rc = -ENOMEM;
 		goto error;
 	}
 	spin_lock(&cifs_tcp_ses_lock);
-	tcon->dfs_path = full_path;
-	full_path = NULL;
-	tcon->remap = cifs_remap(cifs_sb);
+	tcon->dfs_path = ref_path;
+	ref_path = NULL;
 	spin_unlock(&cifs_tcp_ses_lock);
 
 	/*
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 846d67028522..b5166384de49 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -65,7 +65,7 @@ static struct workqueue_struct *dfscache_wq __read_mostly;
 static int cache_ttl;
 static DEFINE_SPINLOCK(cache_ttl_lock);
 
-static struct nls_table *cache_nlsc;
+static struct nls_table *cache_cp;
 
 /*
  * Number of entries in the cache
@@ -175,27 +175,45 @@ static void free_mount_group_list(void)
 	}
 }
 
-static int get_normalized_path(const char *path, const char **npath)
+/**
+ * dfs_cache_canonical_path - get a canonical DFS path
+ *
+ * @path: DFS path
+ * @cp: codepage
+ * @remap: mapping type
+ *
+ * Return canonical path if success, otherwise error.
+ */
+char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap)
 {
+	char *tmp;
+	int plen = 0;
+	char *npath;
+
 	if (!path || strlen(path) < 3 || (*path != '\\' && *path != '/'))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
-	if (*path == '\\') {
-		*npath = path;
+	if (unlikely(strcmp(cp->charset, cache_cp->charset))) {
+		tmp = (char *)cifs_strndup_to_utf16(path, strlen(path), &plen, cp, remap);
+		if (!tmp) {
+			cifs_dbg(VFS, "%s: failed to convert path to utf16\n", __func__);
+			return ERR_PTR(-EINVAL);
+		}
+
+		npath = cifs_strndup_from_utf16(tmp, plen, true, cache_cp);
+		kfree(tmp);
+
+		if (!npath) {
+			cifs_dbg(VFS, "%s: failed to convert path from utf16\n", __func__);
+			return ERR_PTR(-EINVAL);
+		}
 	} else {
-		char *s = kstrdup(path, GFP_KERNEL);
-		if (!s)
-			return -ENOMEM;
-		convert_delimiter(s, '\\');
-		*npath = s;
+		npath = kstrdup(path, GFP_KERNEL);
+		if (!npath)
+			return ERR_PTR(-ENOMEM);
 	}
-	return 0;
-}
-
-static inline void free_normalized_path(const char *path, const char *npath)
-{
-	if (path != npath)
-		kfree(npath);
+	convert_delimiter(npath, '\\');
+	return npath;
 }
 
 static inline bool cache_entry_expired(const struct cache_entry *ce)
@@ -394,7 +412,9 @@ int dfs_cache_init(void)
 		INIT_HLIST_HEAD(&cache_htable[i]);
 
 	atomic_set(&cache_count, 0);
-	cache_nlsc = load_nls_default();
+	cache_cp = load_nls("utf8");
+	if (!cache_cp)
+		cache_cp = load_nls_default();
 
 	cifs_dbg(FYI, "%s: initialized DFS referral cache\n", __func__);
 	return 0;
@@ -669,7 +689,7 @@ out:
 void dfs_cache_destroy(void)
 {
 	cancel_delayed_work_sync(&refresh_task);
-	unload_nls(cache_nlsc);
+	unload_nls(cache_cp);
 	free_mount_group_list();
 	flush_cache_ents();
 	kmem_cache_destroy(cache_slab);
@@ -707,23 +727,21 @@ static int update_cache_entry_locked(const char *path, const struct dfs_info3_pa
 	return rc;
 }
 
-static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
-			    const struct nls_table *nls_codepage, int remap,
-			    const char *path,  struct dfs_info3_param **refs,
-			    int *numrefs)
+static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const char *path,
+			    struct dfs_info3_param **refs, int *numrefs)
 {
 	cifs_dbg(FYI, "%s: get an DFS referral for %s\n", __func__, path);
 
 	if (!ses || !ses->server || !ses->server->ops->get_dfs_refer)
 		return -EOPNOTSUPP;
-	if (unlikely(!nls_codepage))
+	if (unlikely(!cache_cp))
 		return -EINVAL;
 
 	*refs = NULL;
 	*numrefs = 0;
 
-	return ses->server->ops->get_dfs_refer(xid, ses, path, refs, numrefs,
-					       nls_codepage, remap);
+	return ses->server->ops->get_dfs_refer(xid, ses, path, refs, numrefs, cache_cp,
+					       NO_MAP_UNI_RSVD);
 }
 
 /*
@@ -735,8 +753,7 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
  * For interlinks, cifs_mount() and expand_dfs_referral() are supposed to
  * handle them properly.
  */
-static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses,
-			      const struct nls_table *nls_codepage, int remap, const char *path)
+static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path)
 {
 	int rc;
 	unsigned int hash;
@@ -764,8 +781,7 @@ static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses,
 	 * Either the entry was not found, or it is expired.
 	 * Request a new DFS referral in order to create or update a cache entry.
 	 */
-	rc = get_dfs_referral(xid, ses, nls_codepage, remap, path,
-			      &refs, &numrefs);
+	rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
 	if (rc)
 		goto out_unlock;
 
@@ -886,7 +902,7 @@ err_free_it:
  * needs to be issued:
  * @xid: syscall xid
  * @ses: smb session to issue the request on
- * @nls_codepage: charset conversion
+ * @cp: codepage
  * @remap: path character remapping type
  * @path: path to lookup in DFS referral cache.
  *
@@ -895,20 +911,19 @@ err_free_it:
  *
  * Return zero if the target was found, otherwise non-zero.
  */
-int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
-		   const struct nls_table *nls_codepage, int remap,
-		   const char *path, struct dfs_info3_param *ref,
+int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *cp,
+		   int remap, const char *path, struct dfs_info3_param *ref,
 		   struct dfs_cache_tgt_list *tgt_list)
 {
 	int rc;
 	const char *npath;
 	struct cache_entry *ce;
 
-	rc = get_normalized_path(path, &npath);
-	if (rc)
-		return rc;
+	npath = dfs_cache_canonical_path(path, cp, remap);
+	if (IS_ERR(npath))
+		return PTR_ERR(npath);
 
-	rc = cache_refresh_path(xid, ses, nls_codepage, remap, npath);
+	rc = cache_refresh_path(xid, ses, npath);
 	if (rc)
 		goto out_free_path;
 
@@ -931,7 +946,7 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
 	up_read(&htable_rw_lock);
 
 out_free_path:
-	free_normalized_path(path, npath);
+	kfree(npath);
 	return rc;
 }
 
@@ -943,7 +958,7 @@ out_free_path:
  * expired, nor create a new cache entry if @path hasn't been found. It heavily
  * relies on an existing cache entry.
  *
- * @path: path to lookup in the DFS referral cache.
+ * @path: canonical DFS path to lookup in the DFS referral cache.
  * @ref: when non-NULL, store single DFS referral result in it.
  * @tgt_list: when non-NULL, store complete DFS target list in it.
  *
@@ -955,18 +970,13 @@ int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
 			 struct dfs_cache_tgt_list *tgt_list)
 {
 	int rc;
-	const char *npath;
 	struct cache_entry *ce;
 
-	rc = get_normalized_path(path, &npath);
-	if (rc)
-		return rc;
-
-	cifs_dbg(FYI, "%s: path: %s\n", __func__, npath);
+	cifs_dbg(FYI, "%s: path: %s\n", __func__, path);
 
 	down_read(&htable_rw_lock);
 
-	ce = lookup_cache_entry(npath, NULL);
+	ce = lookup_cache_entry(path, NULL);
 	if (IS_ERR(ce)) {
 		rc = PTR_ERR(ce);
 		goto out_unlock;
@@ -981,8 +991,6 @@ int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
 
 out_unlock:
 	up_read(&htable_rw_lock);
-	free_normalized_path(path, npath);
-
 	return rc;
 }
 
@@ -997,16 +1005,15 @@ out_unlock:
  *
  * @xid: syscall id
  * @ses: smb session
- * @nls_codepage: charset conversion
+ * @cp: codepage
  * @remap: type of character remapping for paths
- * @path: path to lookup in DFS referral cache.
+ * @path: path to lookup in DFS referral cache
  * @it: DFS target iterator
  *
  * Return zero if the target hint was updated successfully, otherwise non-zero.
  */
 int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
-			     const struct nls_table *nls_codepage, int remap,
-			     const char *path,
+			     const struct nls_table *cp, int remap, const char *path,
 			     const struct dfs_cache_tgt_iterator *it)
 {
 	int rc;
@@ -1014,13 +1021,13 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
 	struct cache_entry *ce;
 	struct cache_dfs_tgt *t;
 
-	rc = get_normalized_path(path, &npath);
-	if (rc)
-		return rc;
+	npath = dfs_cache_canonical_path(path, cp, remap);
+	if (IS_ERR(npath))
+		return PTR_ERR(npath);
 
 	cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath);
 
-	rc = cache_refresh_path(xid, ses, nls_codepage, remap, npath);
+	rc = cache_refresh_path(xid, ses, npath);
 	if (rc)
 		goto out_free_path;
 
@@ -1049,8 +1056,7 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
 out_unlock:
 	up_write(&htable_rw_lock);
 out_free_path:
-	free_normalized_path(path, npath);
-
+	kfree(npath);
 	return rc;
 }
 
@@ -1062,32 +1068,26 @@ out_free_path:
  * expired, nor create a new cache entry if @path hasn't been found. It heavily
  * relies on an existing cache entry.
  *
- * @path: path to lookup in DFS referral cache.
+ * @path: canonical DFS path to lookup in DFS referral cache.
  * @it: target iterator which contains the target hint to update the cache
  * entry with.
  *
  * Return zero if the target hint was updated successfully, otherwise non-zero.
  */
-int dfs_cache_noreq_update_tgthint(const char *path,
-				   const struct dfs_cache_tgt_iterator *it)
+int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it)
 {
 	int rc;
-	const char *npath;
 	struct cache_entry *ce;
 	struct cache_dfs_tgt *t;
 
 	if (!it)
 		return -EINVAL;
 
-	rc = get_normalized_path(path, &npath);
-	if (rc)
-		return rc;
-
-	cifs_dbg(FYI, "%s: path: %s\n", __func__, npath);
+	cifs_dbg(FYI, "%s: path: %s\n", __func__, path);
 
 	down_write(&htable_rw_lock);
 
-	ce = lookup_cache_entry(npath, NULL);
+	ce = lookup_cache_entry(path, NULL);
 	if (IS_ERR(ce)) {
 		rc = PTR_ERR(ce);
 		goto out_unlock;
@@ -1110,8 +1110,6 @@ int dfs_cache_noreq_update_tgthint(const char *path,
 
 out_unlock:
 	up_write(&htable_rw_lock);
-	free_normalized_path(path, npath);
-
 	return rc;
 }
 
@@ -1119,32 +1117,26 @@ out_unlock:
  * dfs_cache_get_tgt_referral - returns a DFS referral (@ref) from a given
  * target iterator (@it).
  *
- * @path: path to lookup in DFS referral cache.
+ * @path: canonical DFS path to lookup in DFS referral cache.
  * @it: DFS target iterator.
  * @ref: DFS referral pointer to set up the gathered information.
  *
  * Return zero if the DFS referral was set up correctly, otherwise non-zero.
  */
-int dfs_cache_get_tgt_referral(const char *path,
-			       const struct dfs_cache_tgt_iterator *it,
+int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it,
 			       struct dfs_info3_param *ref)
 {
 	int rc;
-	const char *npath;
 	struct cache_entry *ce;
 
 	if (!it || !ref)
 		return -EINVAL;
 
-	rc = get_normalized_path(path, &npath);
-	if (rc)
-		return rc;
-
-	cifs_dbg(FYI, "%s: path: %s\n", __func__, npath);
+	cifs_dbg(FYI, "%s: path: %s\n", __func__, path);
 
 	down_read(&htable_rw_lock);
 
-	ce = lookup_cache_entry(npath, NULL);
+	ce = lookup_cache_entry(path, NULL);
 	if (IS_ERR(ce)) {
 		rc = PTR_ERR(ce);
 		goto out_unlock;
@@ -1156,8 +1148,6 @@ int dfs_cache_get_tgt_referral(const char *path,
 
 out_unlock:
 	up_read(&htable_rw_lock);
-	free_normalized_path(path, npath);
-
 	return rc;
 }
 
@@ -1219,8 +1209,8 @@ void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id)
  *
  * Return zero if target was parsed correctly, otherwise non-zero.
  */
-int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
-			    char **share, char **prefix)
+int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share,
+			    char **prefix)
 {
 	char *s, sep, *p;
 	size_t len;
@@ -1310,7 +1300,7 @@ static void refresh_mounts(struct cifs_ses **sessions)
 		ses = find_ipc_from_server_path(sessions, path);
 		if (!IS_ERR(ses)) {
 			xid = get_xid();
-			cache_refresh_path(xid, ses, cache_nlsc, tcon->remap, path);
+			cache_refresh_path(xid, ses, path);
 			free_xid(xid);
 		}
 		cifs_put_tcon(tcon);
@@ -1346,8 +1336,7 @@ static void refresh_cache(struct cifs_ses **sessions)
 				continue;
 
 			xid = get_xid();
-			rc = get_dfs_referral(xid, ses, cache_nlsc, NO_MAP_UNI_RSVD, ce->path,
-					      &refs, &numrefs);
+			rc = get_dfs_referral(xid, ses, ce->path, &refs, &numrefs);
 			free_xid(xid);
 
 			if (!rc)
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index c23b08530a7f..b29d3ae64829 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -24,31 +24,26 @@ struct dfs_cache_tgt_iterator {
 	struct list_head it_list;
 };
 
-extern int dfs_cache_init(void);
-extern void dfs_cache_destroy(void);
+int dfs_cache_init(void);
+void dfs_cache_destroy(void);
 extern const struct proc_ops dfscache_proc_ops;
 
-extern int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
-			  const struct nls_table *nls_codepage, int remap,
-			  const char *path, struct dfs_info3_param *ref,
-			  struct dfs_cache_tgt_list *tgt_list);
-extern int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
-				struct dfs_cache_tgt_list *tgt_list);
-extern int dfs_cache_update_tgthint(const unsigned int xid,
-				    struct cifs_ses *ses,
-				    const struct nls_table *nls_codepage,
-				    int remap, const char *path,
-				    const struct dfs_cache_tgt_iterator *it);
-extern int
-dfs_cache_noreq_update_tgthint(const char *path,
-			       const struct dfs_cache_tgt_iterator *it);
-extern int dfs_cache_get_tgt_referral(const char *path,
-				      const struct dfs_cache_tgt_iterator *it,
-				      struct dfs_info3_param *ref);
-extern int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
-				   char **share, char **prefix);
+int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *cp,
+		   int remap, const char *path, struct dfs_info3_param *ref,
+		   struct dfs_cache_tgt_list *tgt_list);
+int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
+			 struct dfs_cache_tgt_list *tgt_list);
+int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
+			     const struct nls_table *cp, int remap, const char *path,
+			     const struct dfs_cache_tgt_iterator *it);
+int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it);
+int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it,
+			       struct dfs_info3_param *ref);
+int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share,
+			    char **prefix);
 void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
 void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
+char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap);
 
 static inline struct dfs_cache_tgt_iterator *
 dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl,

From 42caeba713b12ee0dfc085f6fb2f7cde2a64f91a Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Fri, 4 Jun 2021 19:25:31 -0300
Subject: [PATCH 2337/3292] cifs: fix path comparison and hash calc

Fix cache lookup and hash calculations when handling paths with
different cases.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dfs_cache.c | 168 ++++++++++++++++++++++++--------------------
 1 file changed, 93 insertions(+), 75 deletions(-)

diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index b5166384de49..66be23975aec 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -424,12 +424,24 @@ out_destroy_wq:
 	return rc;
 }
 
-static inline unsigned int cache_entry_hash(const void *data, int size)
+static int cache_entry_hash(const void *data, int size, unsigned int *hash)
 {
-	unsigned int h;
+	int i, clen;
+	const unsigned char *s = data;
+	wchar_t c;
+	unsigned int h = 0;
 
-	h = jhash(data, size, 0);
-	return h & (CACHE_HTABLE_SIZE - 1);
+	for (i = 0; i < size; i += clen) {
+		clen = cache_cp->char2uni(&s[i], size - i, &c);
+		if (unlikely(clen < 0)) {
+			cifs_dbg(VFS, "%s: can't convert char\n", __func__);
+			return clen;
+		}
+		c = cifs_toupper(c);
+		h = jhash(&c, sizeof(c), h);
+	}
+	*hash = h % CACHE_HTABLE_SIZE;
+	return 0;
 }
 
 /* Return target hint of a DFS cache entry */
@@ -511,9 +523,7 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
 }
 
 /* Allocate a new cache entry */
-static struct cache_entry *alloc_cache_entry(const char *path,
-					     const struct dfs_info3_param *refs,
-					     int numrefs)
+static struct cache_entry *alloc_cache_entry(struct dfs_info3_param *refs, int numrefs)
 {
 	struct cache_entry *ce;
 	int rc;
@@ -522,11 +532,9 @@ static struct cache_entry *alloc_cache_entry(const char *path,
 	if (!ce)
 		return ERR_PTR(-ENOMEM);
 
-	ce->path = kstrdup(path, GFP_KERNEL);
-	if (!ce->path) {
-		kmem_cache_free(cache_slab, ce);
-		return ERR_PTR(-ENOMEM);
-	}
+	ce->path = refs[0].path_name;
+	refs[0].path_name = NULL;
+
 	INIT_HLIST_NODE(&ce->hlist);
 	INIT_LIST_HEAD(&ce->tlist);
 
@@ -568,12 +576,18 @@ static void remove_oldest_entry_locked(void)
 }
 
 /* Add a new DFS cache entry */
-static int add_cache_entry_locked(const char *path, unsigned int hash,
-				  struct dfs_info3_param *refs, int numrefs)
+static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
 {
+	int rc;
 	struct cache_entry *ce;
+	unsigned int hash;
 
-	ce = alloc_cache_entry(path, refs, numrefs);
+	convert_delimiter(refs[0].path_name, '\\');
+	rc = cache_entry_hash(refs[0].path_name, strlen(refs[0].path_name), &hash);
+	if (rc)
+		return rc;
+
+	ce = alloc_cache_entry(refs, numrefs);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
@@ -593,57 +607,69 @@ static int add_cache_entry_locked(const char *path, unsigned int hash,
 	return 0;
 }
 
-static struct cache_entry *__lookup_cache_entry(const char *path)
+/* Check if two DFS paths are equal.  @s1 and @s2 are expected to be in @cache_cp's charset */
+static bool dfs_path_equal(const char *s1, int len1, const char *s2, int len2)
+{
+	int i, l1, l2;
+	wchar_t c1, c2;
+
+	if (len1 != len2)
+		return false;
+
+	for (i = 0; i < len1; i += l1) {
+		l1 = cache_cp->char2uni(&s1[i], len1 - i, &c1);
+		l2 = cache_cp->char2uni(&s2[i], len2 - i, &c2);
+		if (unlikely(l1 < 0 && l2 < 0)) {
+			if (s1[i] != s2[i])
+				return false;
+			l1 = 1;
+			continue;
+		}
+		if (l1 != l2)
+			return false;
+		if (cifs_toupper(c1) != cifs_toupper(c2))
+			return false;
+	}
+	return true;
+}
+
+static struct cache_entry *__lookup_cache_entry(const char *path, unsigned int hash, int len)
 {
 	struct cache_entry *ce;
-	unsigned int h;
-	bool found = false;
 
-	h = cache_entry_hash(path, strlen(path));
-
-	hlist_for_each_entry(ce, &cache_htable[h], hlist) {
-		if (!strcasecmp(path, ce->path)) {
-			found = true;
+	hlist_for_each_entry(ce, &cache_htable[hash], hlist) {
+		if (dfs_path_equal(ce->path, strlen(ce->path), path, len)) {
 			dump_ce(ce);
-			break;
+			return ce;
 		}
 	}
-
-	if (!found)
-		ce = ERR_PTR(-ENOENT);
-	return ce;
+	return ERR_PTR(-EEXIST);
 }
 
 /*
- * Find a DFS cache entry in hash table and optionally check prefix path against
- * @path.
- * Use whole path components in the match.
- * Must be called with htable_rw_lock held.
+ * Find a DFS cache entry in hash table and optionally check prefix path against normalized @path.
  *
- * Return ERR_PTR(-ENOENT) if the entry is not found.
+ * Use whole path components in the match.  Must be called with htable_rw_lock held.
+ *
+ * Return ERR_PTR(-EEXIST) if the entry is not found.
  */
-static struct cache_entry *lookup_cache_entry(const char *path, unsigned int *hash)
+static struct cache_entry *lookup_cache_entry(const char *path)
 {
-	struct cache_entry *ce = ERR_PTR(-ENOENT);
-	unsigned int h;
+	struct cache_entry *ce;
 	int cnt = 0;
-	char *npath;
-	char *s, *e;
-	char sep;
+	const char *s = path, *e;
+	char sep = *s;
+	unsigned int hash;
+	int rc;
 
-	npath = kstrdup(path, GFP_KERNEL);
-	if (!npath)
-		return ERR_PTR(-ENOMEM);
-
-	s = npath;
-	sep = *npath;
 	while ((s = strchr(s, sep)) && ++cnt < 3)
 		s++;
 
 	if (cnt < 3) {
-		h = cache_entry_hash(path, strlen(path));
-		ce = __lookup_cache_entry(path);
-		goto out;
+		rc = cache_entry_hash(path, strlen(path), &hash);
+		if (rc)
+			return ERR_PTR(rc);
+		return __lookup_cache_entry(path, hash, strlen(path));
 	}
 	/*
 	 * Handle paths that have more than two path components and are a complete prefix of the DFS
@@ -651,36 +677,29 @@ static struct cache_entry *lookup_cache_entry(const char *path, unsigned int *ha
 	 *
 	 * See MS-DFSC 3.2.5.5 "Receiving a Root Referral Request or Link Referral Request".
 	 */
-	h = cache_entry_hash(npath, strlen(npath));
-	e = npath + strlen(npath) - 1;
+	e = path + strlen(path) - 1;
 	while (e > s) {
-		char tmp;
+		int len;
 
 		/* skip separators */
 		while (e > s && *e == sep)
 			e--;
 		if (e == s)
-			goto out;
-
-		tmp = *(e+1);
-		*(e+1) = 0;
-
-		ce = __lookup_cache_entry(npath);
-		if (!IS_ERR(ce)) {
-			h = cache_entry_hash(npath, strlen(npath));
 			break;
-		}
 
-		*(e+1) = tmp;
+		len = e + 1 - path;
+		rc = cache_entry_hash(path, len, &hash);
+		if (rc)
+			return ERR_PTR(rc);
+		ce = __lookup_cache_entry(path, hash, len);
+		if (!IS_ERR(ce))
+			return ce;
+
 		/* backward until separator */
 		while (e > s && *e != sep)
 			e--;
 	}
-out:
-	if (hash)
-		*hash = h;
-	kfree(npath);
-	return ce;
+	return ERR_PTR(-EEXIST);
 }
 
 /**
@@ -706,7 +725,7 @@ static int update_cache_entry_locked(const char *path, const struct dfs_info3_pa
 	struct cache_entry *ce;
 	char *s, *th = NULL;
 
-	ce = lookup_cache_entry(path, NULL);
+	ce = lookup_cache_entry(path);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
@@ -756,7 +775,6 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const
 static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path)
 {
 	int rc;
-	unsigned int hash;
 	struct cache_entry *ce;
 	struct dfs_info3_param *refs = NULL;
 	int numrefs = 0;
@@ -766,7 +784,7 @@ static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, cons
 
 	down_write(&htable_rw_lock);
 
-	ce = lookup_cache_entry(path, &hash);
+	ce = lookup_cache_entry(path);
 	if (!IS_ERR(ce)) {
 		if (!cache_entry_expired(ce)) {
 			dump_ce(ce);
@@ -797,7 +815,7 @@ static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, cons
 		remove_oldest_entry_locked();
 	}
 
-	rc = add_cache_entry_locked(path, hash, refs, numrefs);
+	rc = add_cache_entry_locked(refs, numrefs);
 	if (!rc)
 		atomic_inc(&cache_count);
 
@@ -929,7 +947,7 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl
 
 	down_read(&htable_rw_lock);
 
-	ce = lookup_cache_entry(npath, NULL);
+	ce = lookup_cache_entry(npath);
 	if (IS_ERR(ce)) {
 		up_read(&htable_rw_lock);
 		rc = PTR_ERR(ce);
@@ -976,7 +994,7 @@ int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
 
 	down_read(&htable_rw_lock);
 
-	ce = lookup_cache_entry(path, NULL);
+	ce = lookup_cache_entry(path);
 	if (IS_ERR(ce)) {
 		rc = PTR_ERR(ce);
 		goto out_unlock;
@@ -1033,7 +1051,7 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
 
 	down_write(&htable_rw_lock);
 
-	ce = lookup_cache_entry(npath, NULL);
+	ce = lookup_cache_entry(npath);
 	if (IS_ERR(ce)) {
 		rc = PTR_ERR(ce);
 		goto out_unlock;
@@ -1087,7 +1105,7 @@ int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_
 
 	down_write(&htable_rw_lock);
 
-	ce = lookup_cache_entry(path, NULL);
+	ce = lookup_cache_entry(path);
 	if (IS_ERR(ce)) {
 		rc = PTR_ERR(ce);
 		goto out_unlock;
@@ -1136,7 +1154,7 @@ int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iter
 
 	down_read(&htable_rw_lock);
 
-	ce = lookup_cache_entry(path, NULL);
+	ce = lookup_cache_entry(path);
 	if (IS_ERR(ce)) {
 		rc = PTR_ERR(ce);
 		goto out_unlock;

From c950fc7af9a7c20b60d7c1ffe5fd4d982ec8cd6c Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Fri, 4 Jun 2021 19:25:32 -0300
Subject: [PATCH 2338/3292] cifs: set a minimum of 2 minutes for refreshing dfs
 cache

We don't want to refresh the dfs cache in very short intervals, so
setting a minimum interval of 2 minutes is OK.

If it needs to be refreshed immediately, one could have the cache
cleared with

	$ echo 0 > /proc/fs/cifs/dfscache

and then remounting the dfs share.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dfs_cache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 66be23975aec..d9c2c75a2989 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -24,6 +24,7 @@
 
 #define CACHE_HTABLE_SIZE 32
 #define CACHE_MAX_ENTRIES 64
+#define CACHE_MIN_TTL 120 /* 2 minutes */
 
 #define IS_INTERLINK_SET(v) ((v) & (DFSREF_REFERRAL_SERVER | \
 				    DFSREF_STORAGE_SERVER))
@@ -492,7 +493,7 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
 {
 	int i;
 
-	ce->ttl = refs[0].ttl;
+	ce->ttl = max_t(int, refs[0].ttl, CACHE_MIN_TTL);
 	ce->etime = get_expire_time(ce->ttl);
 	ce->srvtype = refs[0].server_type;
 	ce->hdr_flags = refs[0].flags;

From f3c852b0b0fc0e4ecabbf2e8480c2a088d54b588 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Fri, 4 Jun 2021 19:25:33 -0300
Subject: [PATCH 2339/3292] cifs: do not share tcp servers with dfs mounts

It isn't enough to have unshared tcons because multiple DFS mounts can
connect to same target server and failover to different servers, so we
can't use a single tcp server for such cases.

For the simplest solution, use nosharesock option to achieve that.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/connect.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 595de4034fa7..43c7e4b27f46 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1944,10 +1944,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each(tmp, &ses->tcon_list) {
 		tcon = list_entry(tmp, struct cifs_tcon, tcon_list);
-#ifdef CONFIG_CIFS_DFS_UPCALL
-		if (tcon->dfs_path)
-			continue;
-#endif
+
 		if (!match_tcon(tcon, ctx))
 			continue;
 		++tcon->tc_count;
@@ -3411,6 +3408,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 			goto error;
 	}
 
+	ctx->nosharesock = true;
+
 	/* Get path of DFS root */
 	ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
 	if (IS_ERR(ref_path)) {

From e695a9ad0305af6e8b0cbc24a54976ac2120cbb3 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad@microsoft.com>
Date: Sun, 23 May 2021 16:54:42 +0000
Subject: [PATCH 2340/3292] cifs: missed ref-counting smb session in find

When we lookup an smb session based on session id,
we did not up the ref-count for the session. This can
potentially cause issues if the session is freed from under us.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2transport.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index e6fa76ab70be..0ab1d464e8c8 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -154,6 +154,7 @@ smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
 	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
 		if (ses->Suid != ses_id)
 			continue;
+		++ses->ses_count;
 		return ses;
 	}
 
@@ -205,7 +206,14 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32  tid)
 		return NULL;
 	}
 	tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
+	if (!tcon) {
+		cifs_put_smb_ses(ses);
+		spin_unlock(&cifs_tcp_ses_lock);
+		return NULL;
+	}
 	spin_unlock(&cifs_tcp_ses_lock);
+	/* tcon already has a ref to ses, so we don't need ses anymore */
+	cifs_put_smb_ses(ses);
 
 	return tcon;
 }
@@ -239,7 +247,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
 		if (rc) {
 			cifs_server_dbg(VFS,
 					"%s: sha256 alloc failed\n", __func__);
-			return rc;
+			goto out;
 		}
 		shash = &sdesc->shash;
 	} else {
@@ -290,6 +298,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
 out:
 	if (allocate_crypto)
 		cifs_free_hash(&hash, &sdesc);
+	if (ses)
+		cifs_put_smb_ses(ses);
 	return rc;
 }
 

From 0d52df81e07739db25afe72e10dcc623b271d905 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 8 Jun 2021 16:43:41 -0500
Subject: [PATCH 2341/3292] cifs: enable extended stats by default

CONFIG_CIFS_STATS2 can be very useful since it shows
latencies by command, and allows enabling the slow response
dynamic tracepoint which can be useful to identify
performance problems.

For example:

Total time spent processing by command. Time units are jiffies (1000 per second)
  SMB3 CMD	Number	Total Time	Fastest	Slowest
  --------	------	----------	-------	-------
  0		1	2		2	2
  1		2	6		2	4
  2		0	0		0	0
  3		4	11		2	4
  4		2	16		5	11
  5		4546	34104		2	487
  6		4421	32901		2	487
  7		0	0		0	0
  8		695	2781		2	39
  9		391	1708		2	27
  10		0	0		0	0
  11		4	6		1	2
  12		0	0		0	0
  13		0	0		0	0
  14		3887	17696		0	128
  15		0	0		0	0
  16		1471	9950		1	487
  17		169	2695		9	116
  18		80	381		2	10
  1		2	6		2	4
  2		0	0		0	0
  3		4	11		2	4
  4		2	16		5	11
  5		4546	34104		2	487
  6		4421	32901		2	487
  7		0	0		0	0
  8		695	2781		2	39
  9		391	1708		2	27
  10		0	0		0	0
  11		4	6		1	2
  12		0	0		0	0
  13		0	0		0	0
  14		3887	17696		0	128
  15		0	0		0	0
  16		1471	9950		1	487
  17		169	2695		9	116
  18		80	381		2	10

Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index bf52e9326ebe..55bc57e9f812 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -57,6 +57,7 @@ config CIFS
 config CIFS_STATS2
 	bool "Extended statistics"
 	depends on CIFS
+	default y
 	help
 	  Enabling this option will allow more detailed statistics on SMB
 	  request timing to be displayed in /proc/fs/cifs/DebugData and also
@@ -65,8 +66,7 @@ config CIFS_STATS2
 	  for more details. These additional statistics may have a minor effect
 	  on performance and memory utilization.
 
-	  Unless you are a developer or are doing network performance analysis
-	  or tuning, say N.
+	  If unsure, say Y.
 
 config CIFS_ALLOW_INSECURE_LEGACY
 	bool "Support legacy servers which use less secure dialects"

From 1023e90b733acd1da98ba7067aa0fa8b998eed19 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Tue, 8 Jun 2021 13:50:06 -0300
Subject: [PATCH 2342/3292] cifs: avoid starvation when refreshing dfs cache

When refreshing the DFS cache, keep SMB2 IOCTL calls as much outside
critical sections as possible and avoid read/write starvation when
getting new DFS referrals by using broken or slow connections.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dfs_cache.c | 153 +++++++++++++++++++++++++++++++-------------
 1 file changed, 109 insertions(+), 44 deletions(-)

diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index d9c2c75a2989..775dbc7b0d7a 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -554,6 +554,8 @@ static void remove_oldest_entry_locked(void)
 	struct cache_entry *ce;
 	struct cache_entry *to_del = NULL;
 
+	WARN_ON(!rwsem_is_locked(&htable_rw_lock));
+
 	for (i = 0; i < CACHE_HTABLE_SIZE; i++) {
 		struct hlist_head *l = &cache_htable[i];
 
@@ -583,7 +585,13 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
 	struct cache_entry *ce;
 	unsigned int hash;
 
-	convert_delimiter(refs[0].path_name, '\\');
+	WARN_ON(!rwsem_is_locked(&htable_rw_lock));
+
+	if (atomic_read(&cache_count) >= CACHE_MAX_ENTRIES) {
+		cifs_dbg(FYI, "%s: reached max cache size (%d)\n", __func__, CACHE_MAX_ENTRIES);
+		remove_oldest_entry_locked();
+	}
+
 	rc = cache_entry_hash(refs[0].path_name, strlen(refs[0].path_name), &hash);
 	if (rc)
 		return rc;
@@ -605,6 +613,8 @@ static int add_cache_entry_locked(struct dfs_info3_param *refs, int numrefs)
 	hlist_add_head(&ce->hlist, &cache_htable[hash]);
 	dump_ce(ce);
 
+	atomic_inc(&cache_count);
+
 	return 0;
 }
 
@@ -719,16 +729,13 @@ void dfs_cache_destroy(void)
 }
 
 /* Update a cache entry with the new referral in @refs */
-static int update_cache_entry_locked(const char *path, const struct dfs_info3_param *refs,
+static int update_cache_entry_locked(struct cache_entry *ce, const struct dfs_info3_param *refs,
 				     int numrefs)
 {
 	int rc;
-	struct cache_entry *ce;
 	char *s, *th = NULL;
 
-	ce = lookup_cache_entry(path);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
+	WARN_ON(!rwsem_is_locked(&htable_rw_lock));
 
 	if (ce->tgthint) {
 		s = ce->tgthint->name;
@@ -750,18 +757,28 @@ static int update_cache_entry_locked(const char *path, const struct dfs_info3_pa
 static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const char *path,
 			    struct dfs_info3_param **refs, int *numrefs)
 {
+	int rc;
+	int i;
+
 	cifs_dbg(FYI, "%s: get an DFS referral for %s\n", __func__, path);
 
+	*refs = NULL;
+	*numrefs = 0;
+
 	if (!ses || !ses->server || !ses->server->ops->get_dfs_refer)
 		return -EOPNOTSUPP;
 	if (unlikely(!cache_cp))
 		return -EINVAL;
 
-	*refs = NULL;
-	*numrefs = 0;
+	rc =  ses->server->ops->get_dfs_refer(xid, ses, path, refs, numrefs, cache_cp,
+					      NO_MAP_UNI_RSVD);
+	if (!rc) {
+		struct dfs_info3_param *ref = *refs;
 
-	return ses->server->ops->get_dfs_refer(xid, ses, path, refs, numrefs, cache_cp,
-					       NO_MAP_UNI_RSVD);
+		for (i = 0; i < *numrefs; i++)
+			convert_delimiter(ref[i].path_name, '\\');
+	}
+	return rc;
 }
 
 /*
@@ -807,18 +824,11 @@ static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, cons
 	dump_refs(refs, numrefs);
 
 	if (!newent) {
-		rc = update_cache_entry_locked(path, refs, numrefs);
+		rc = update_cache_entry_locked(ce, refs, numrefs);
 		goto out_unlock;
 	}
 
-	if (atomic_read(&cache_count) >= CACHE_MAX_ENTRIES) {
-		cifs_dbg(FYI, "%s: reached max cache size (%d)\n", __func__, CACHE_MAX_ENTRIES);
-		remove_oldest_entry_locked();
-	}
-
 	rc = add_cache_entry_locked(refs, numrefs);
-	if (!rc)
-		atomic_inc(&cache_count);
 
 out_unlock:
 	up_write(&htable_rw_lock);
@@ -1313,15 +1323,43 @@ static void refresh_mounts(struct cifs_ses **sessions)
 
 	list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
 		const char *path = tcon->dfs_path + 1;
+		struct cache_entry *ce;
+		struct dfs_info3_param *refs = NULL;
+		int numrefs = 0;
+		bool needs_refresh = false;
 		int rc = 0;
 
 		list_del_init(&tcon->ulist);
+
 		ses = find_ipc_from_server_path(sessions, path);
-		if (!IS_ERR(ses)) {
-			xid = get_xid();
-			cache_refresh_path(xid, ses, path);
-			free_xid(xid);
+		if (IS_ERR(ses))
+			goto next_tcon;
+
+		down_read(&htable_rw_lock);
+		ce = lookup_cache_entry(path);
+		needs_refresh = IS_ERR(ce) || cache_entry_expired(ce);
+		up_read(&htable_rw_lock);
+
+		if (!needs_refresh)
+			goto next_tcon;
+
+		xid = get_xid();
+		rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
+		free_xid(xid);
+
+		/* Create or update a cache entry with the new referral */
+		if (!rc) {
+			down_write(&htable_rw_lock);
+			ce = lookup_cache_entry(path);
+			if (IS_ERR(ce))
+				add_cache_entry_locked(refs, numrefs);
+			else if (cache_entry_expired(ce))
+				update_cache_entry_locked(ce, refs, numrefs);
+			up_write(&htable_rw_lock);
 		}
+
+next_tcon:
+		free_dfs_info_array(refs, numrefs);
 		cifs_put_tcon(tcon);
 	}
 }
@@ -1331,40 +1369,67 @@ static void refresh_cache(struct cifs_ses **sessions)
 	int i;
 	struct cifs_ses *ses;
 	unsigned int xid;
-	int rc;
+	char *ref_paths[CACHE_MAX_ENTRIES];
+	int count = 0;
+	struct cache_entry *ce;
 
 	/*
-	 * Refresh all cached entries.
+	 * Refresh all cached entries.  Get all new referrals outside critical section to avoid
+	 * starvation while performing SMB2 IOCTL on broken or slow connections.
+
 	 * The cache entries may cover more paths than the active mounts
 	 * (e.g. domain-based DFS referrals or multi tier DFS setups).
 	 */
-	down_write(&htable_rw_lock);
+	down_read(&htable_rw_lock);
 	for (i = 0; i < CACHE_HTABLE_SIZE; i++) {
-		struct cache_entry *ce;
 		struct hlist_head *l = &cache_htable[i];
 
 		hlist_for_each_entry(ce, l, hlist) {
-			struct dfs_info3_param *refs = NULL;
-			int numrefs = 0;
-
-			if (hlist_unhashed(&ce->hlist) || !cache_entry_expired(ce))
+			if (count == ARRAY_SIZE(ref_paths))
+				goto out_unlock;
+			if (hlist_unhashed(&ce->hlist) || !cache_entry_expired(ce) ||
+			    IS_ERR(find_ipc_from_server_path(sessions, ce->path)))
 				continue;
-
-			ses = find_ipc_from_server_path(sessions, ce->path);
-			if (IS_ERR(ses))
-				continue;
-
-			xid = get_xid();
-			rc = get_dfs_referral(xid, ses, ce->path, &refs, &numrefs);
-			free_xid(xid);
-
-			if (!rc)
-				update_cache_entry_locked(ce->path, refs, numrefs);
-
-			free_dfs_info_array(refs, numrefs);
+			ref_paths[count++] = kstrdup(ce->path, GFP_ATOMIC);
 		}
 	}
-	up_write(&htable_rw_lock);
+
+out_unlock:
+	up_read(&htable_rw_lock);
+
+	for (i = 0; i < count; i++) {
+		char *path = ref_paths[i];
+		struct dfs_info3_param *refs = NULL;
+		int numrefs = 0;
+		int rc = 0;
+
+		if (!path)
+			continue;
+
+		ses = find_ipc_from_server_path(sessions, path);
+		if (IS_ERR(ses))
+			goto next_referral;
+
+		xid = get_xid();
+		rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
+		free_xid(xid);
+
+		if (!rc) {
+			down_write(&htable_rw_lock);
+			ce = lookup_cache_entry(path);
+			/*
+			 * We need to re-check it because other tasks might have it deleted or
+			 * updated.
+			 */
+			if (!IS_ERR(ce) && cache_entry_expired(ce))
+				update_cache_entry_locked(ce, refs, numrefs);
+			up_write(&htable_rw_lock);
+		}
+
+next_referral:
+		kfree(path);
+		free_dfs_info_array(refs, numrefs);
+	}
 }
 
 /*

From 0475c3655e6ebd1d6d6f0e705eba97fce39a08e3 Mon Sep 17 00:00:00 2001
From: Hyunchul Lee <hyc.lee@gmail.com>
Date: Tue, 8 Jun 2021 23:53:14 +0900
Subject: [PATCH 2343/3292] cifs: decoding negTokenInit with generic ASN1
 decoder

Decode negTokenInit with lib/asn1_decoder. For that,
add OIDs in linux/oid_registry.h and a negTokenInit
ASN1 file, "spnego_negtokeninit.asn1".
And define decoder's callback functions, which
are the gssapi_this_mech for checking SPENGO oid and
the neg_token_init_mech_type for getting authentication
mechanisms supported by a server.

Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/Kconfig                       |   2 +
 fs/cifs/Makefile                      |   8 +-
 fs/cifs/asn1.c                        | 647 ++------------------------
 fs/cifs/cifs_spnego_negtokeninit.asn1 |  40 ++
 include/linux/oid_registry.h          |   8 +
 5 files changed, 105 insertions(+), 600 deletions(-)
 create mode 100644 fs/cifs/cifs_spnego_negtokeninit.asn1

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 55bc57e9f812..7364950a9ef4 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -19,6 +19,8 @@ config CIFS
 	select CRYPTO_LIB_DES
 	select KEYS
 	select DNS_RESOLVER
+	select ASN1
+	select OID_REGISTRY
 	help
 	  This is the client VFS module for the SMB3 family of NAS protocols,
 	  (including support for the most recent, most secure dialect SMB3.1.1)
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 3ee3b7de4ded..87fcacdf3de7 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,12 +6,16 @@ ccflags-y += -I$(src)		# needed for trace events
 obj-$(CONFIG_CIFS) += cifs.o
 
 cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
-	  inode.o link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
+	  inode.o link.o misc.o netmisc.o smbencrypt.o transport.o \
 	  cifs_unicode.o nterr.o cifsencrypt.o \
 	  readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \
 	  smb2ops.o smb2maperror.o smb2transport.o \
 	  smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o \
-	  dns_resolve.o
+	  dns_resolve.o cifs_spnego_negtokeninit.asn1.o asn1.o
+
+$(obj)/asn1.o: $(obj)/cifs_spnego_negtokeninit.asn1.h
+
+$(obj)/cifs_spnego_negtokeninit.asn1.o: $(obj)/cifs_spnego_negtokeninit.asn1.c $(obj)/cifs_spnego_negtokeninit.asn1.h
 
 cifs-$(CONFIG_CIFS_XATTR) += xattr.o
 
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 3150c19cdc2f..b5724ef9f182 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -1,612 +1,63 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The ASB.1/BER parsing code is derived from ip_nat_snmp_basic.c which was in
- * turn derived from the gxsnmp package by Gregory McLean & Jochen Friedrich
- *
- * Copyright (c) 2000 RP Internet (www.rpi.net.au).
- */
 
 #include <linux/module.h>
-#include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include "cifspdu.h"
+#include <linux/oid_registry.h>
 #include "cifsglob.h"
 #include "cifs_debug.h"
 #include "cifsproto.h"
-
-/*****************************************************************************
- *
- * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* Class */
-#define ASN1_UNI	0	/* Universal */
-#define ASN1_APL	1	/* Application */
-#define ASN1_CTX	2	/* Context */
-#define ASN1_PRV	3	/* Private */
-
-/* Tag */
-#define ASN1_EOC	0	/* End Of Contents or N/A */
-#define ASN1_BOL	1	/* Boolean */
-#define ASN1_INT	2	/* Integer */
-#define ASN1_BTS	3	/* Bit String */
-#define ASN1_OTS	4	/* Octet String */
-#define ASN1_NUL	5	/* Null */
-#define ASN1_OJI	6	/* Object Identifier  */
-#define ASN1_OJD	7	/* Object Description */
-#define ASN1_EXT	8	/* External */
-#define ASN1_ENUM	10	/* Enumerated */
-#define ASN1_SEQ	16	/* Sequence */
-#define ASN1_SET	17	/* Set */
-#define ASN1_NUMSTR	18	/* Numerical String */
-#define ASN1_PRNSTR	19	/* Printable String */
-#define ASN1_TEXSTR	20	/* Teletext String */
-#define ASN1_VIDSTR	21	/* Video String */
-#define ASN1_IA5STR	22	/* IA5 String */
-#define ASN1_UNITIM	23	/* Universal Time */
-#define ASN1_GENTIM	24	/* General Time */
-#define ASN1_GRASTR	25	/* Graphical String */
-#define ASN1_VISSTR	26	/* Visible String */
-#define ASN1_GENSTR	27	/* General String */
-
-/* Primitive / Constructed methods*/
-#define ASN1_PRI	0	/* Primitive */
-#define ASN1_CON	1	/* Constructed */
-
-/*
- * Error codes.
- */
-#define ASN1_ERR_NOERROR		0
-#define ASN1_ERR_DEC_EMPTY		2
-#define ASN1_ERR_DEC_EOC_MISMATCH	3
-#define ASN1_ERR_DEC_LENGTH_MISMATCH	4
-#define ASN1_ERR_DEC_BADVALUE		5
-
-#define SPNEGO_OID_LEN 7
-#define NTLMSSP_OID_LEN  10
-#define KRB5_OID_LEN  7
-#define KRB5U2U_OID_LEN  8
-#define MSKRB5_OID_LEN  7
-static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
-static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
-static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
-static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
-static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
-
-/*
- * ASN.1 context.
- */
-struct asn1_ctx {
-	int error;		/* Error condition */
-	unsigned char *pointer;	/* Octet just to be decoded */
-	unsigned char *begin;	/* First octet */
-	unsigned char *end;	/* Octet after last octet */
-};
-
-/*
- * Octet string (not null terminated)
- */
-struct asn1_octstr {
-	unsigned char *data;
-	unsigned int len;
-};
-
-static void
-asn1_open(struct asn1_ctx *ctx, unsigned char *buf, unsigned int len)
-{
-	ctx->begin = buf;
-	ctx->end = buf + len;
-	ctx->pointer = buf;
-	ctx->error = ASN1_ERR_NOERROR;
-}
-
-static unsigned char
-asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
-{
-	if (ctx->pointer >= ctx->end) {
-		ctx->error = ASN1_ERR_DEC_EMPTY;
-		return 0;
-	}
-	*ch = *(ctx->pointer)++;
-	return 1;
-}
-
-#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */
-static unsigned char
-asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val)
-{
-	unsigned char ch;
-
-	if (ctx->pointer >= ctx->end) {
-		ctx->error = ASN1_ERR_DEC_EMPTY;
-		return 0;
-	}
-
-	ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to length octet */
-	if ((ch) == ASN1_ENUM)  /* if ch value is ENUM, 0xa */
-		*val = *(++(ctx->pointer)); /* value has enum value */
-	else
-		return 0;
-
-	ctx->pointer++;
-	return 1;
-}
-#endif
-
-static unsigned char
-asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
-{
-	unsigned char ch;
-
-	*tag = 0;
-
-	do {
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-		*tag <<= 7;
-		*tag |= ch & 0x7F;
-	} while ((ch & 0x80) == 0x80);
-	return 1;
-}
-
-static unsigned char
-asn1_id_decode(struct asn1_ctx *ctx,
-	       unsigned int *cls, unsigned int *con, unsigned int *tag)
-{
-	unsigned char ch;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*cls = (ch & 0xC0) >> 6;
-	*con = (ch & 0x20) >> 5;
-	*tag = (ch & 0x1F);
-
-	if (*tag == 0x1F) {
-		if (!asn1_tag_decode(ctx, tag))
-			return 0;
-	}
-	return 1;
-}
-
-static unsigned char
-asn1_length_decode(struct asn1_ctx *ctx, unsigned int *def, unsigned int *len)
-{
-	unsigned char ch, cnt;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	if (ch == 0x80)
-		*def = 0;
-	else {
-		*def = 1;
-
-		if (ch < 0x80)
-			*len = ch;
-		else {
-			cnt = (unsigned char) (ch & 0x7F);
-			*len = 0;
-
-			while (cnt > 0) {
-				if (!asn1_octet_decode(ctx, &ch))
-					return 0;
-				*len <<= 8;
-				*len |= ch;
-				cnt--;
-			}
-		}
-	}
-
-	/* don't trust len bigger than ctx buffer */
-	if (*len > ctx->end - ctx->pointer)
-		return 0;
-
-	return 1;
-}
-
-static unsigned char
-asn1_header_decode(struct asn1_ctx *ctx,
-		   unsigned char **eoc,
-		   unsigned int *cls, unsigned int *con, unsigned int *tag)
-{
-	unsigned int def = 0;
-	unsigned int len = 0;
-
-	if (!asn1_id_decode(ctx, cls, con, tag))
-		return 0;
-
-	if (!asn1_length_decode(ctx, &def, &len))
-		return 0;
-
-	/* primitive shall be definite, indefinite shall be constructed */
-	if (*con == ASN1_PRI && !def)
-		return 0;
-
-	if (def)
-		*eoc = ctx->pointer + len;
-	else
-		*eoc = NULL;
-	return 1;
-}
-
-static unsigned char
-asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
-	unsigned char ch;
-
-	if (eoc == NULL) {
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		if (ch != 0x00) {
-			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		if (ch != 0x00) {
-			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
-			return 0;
-		}
-		return 1;
-	} else {
-		if (ctx->pointer != eoc) {
-			ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
-			return 0;
-		}
-		return 1;
-	}
-}
-
-/* static unsigned char asn1_null_decode(struct asn1_ctx *ctx,
-				      unsigned char *eoc)
-{
-	ctx->pointer = eoc;
-	return 1;
-}
-
-static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
-				      unsigned char *eoc, long *integer)
-{
-	unsigned char ch;
-	unsigned int len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = (signed char) ch;
-	len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof(long)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
-				      unsigned char *eoc,
-				      unsigned int *integer)
-{
-	unsigned char ch;
-	unsigned int len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = ch;
-	if (ch == 0)
-		len = 0;
-	else
-		len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof(unsigned int)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
-				       unsigned char *eoc,
-				       unsigned long *integer)
-{
-	unsigned char ch;
-	unsigned int len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = ch;
-	if (ch == 0)
-		len = 0;
-	else
-		len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof(unsigned long)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char
-asn1_octets_decode(struct asn1_ctx *ctx,
-		   unsigned char *eoc,
-		   unsigned char **octets, unsigned int *len)
-{
-	unsigned char *ptr;
-
-	*len = 0;
-
-	*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
-	if (*octets == NULL) {
-		return 0;
-	}
-
-	ptr = *octets;
-	while (ctx->pointer < eoc) {
-		if (!asn1_octet_decode(ctx, (unsigned char *) ptr++)) {
-			kfree(*octets);
-			*octets = NULL;
-			return 0;
-		}
-		(*len)++;
-	}
-	return 1;
-} */
-
-static unsigned char
-asn1_subid_decode(struct asn1_ctx *ctx, unsigned long *subid)
-{
-	unsigned char ch;
-
-	*subid = 0;
-
-	do {
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*subid <<= 7;
-		*subid |= ch & 0x7F;
-	} while ((ch & 0x80) == 0x80);
-	return 1;
-}
-
-static int
-asn1_oid_decode(struct asn1_ctx *ctx,
-		unsigned char *eoc, unsigned long **oid, unsigned int *len)
-{
-	unsigned long subid;
-	unsigned int size;
-	unsigned long *optr;
-
-	size = eoc - ctx->pointer + 1;
-
-	/* first subid actually encodes first two subids */
-	if (size < 2 || size > UINT_MAX/sizeof(unsigned long))
-		return 0;
-
-	*oid = kmalloc_array(size, sizeof(unsigned long), GFP_ATOMIC);
-	if (*oid == NULL)
-		return 0;
-
-	optr = *oid;
-
-	if (!asn1_subid_decode(ctx, &subid)) {
-		kfree(*oid);
-		*oid = NULL;
-		return 0;
-	}
-
-	if (subid < 40) {
-		optr[0] = 0;
-		optr[1] = subid;
-	} else if (subid < 80) {
-		optr[0] = 1;
-		optr[1] = subid - 40;
-	} else {
-		optr[0] = 2;
-		optr[1] = subid - 80;
-	}
-
-	*len = 2;
-	optr += 2;
-
-	while (ctx->pointer < eoc) {
-		if (++(*len) > size) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			kfree(*oid);
-			*oid = NULL;
-			return 0;
-		}
-
-		if (!asn1_subid_decode(ctx, optr++)) {
-			kfree(*oid);
-			*oid = NULL;
-			return 0;
-		}
-	}
-	return 1;
-}
-
-static int
-compare_oid(unsigned long *oid1, unsigned int oid1len,
-	    unsigned long *oid2, unsigned int oid2len)
-{
-	unsigned int i;
-
-	if (oid1len != oid2len)
-		return 0;
-	else {
-		for (i = 0; i < oid1len; i++) {
-			if (oid1[i] != oid2[i])
-				return 0;
-		}
-		return 1;
-	}
-}
-
-	/* BB check for endian conversion issues here */
+#include "cifs_spnego_negtokeninit.asn1.h"
 
 int
 decode_negTokenInit(unsigned char *security_blob, int length,
 		    struct TCP_Server_Info *server)
 {
-	struct asn1_ctx ctx;
-	unsigned char *end;
-	unsigned char *sequence_end;
-	unsigned long *oid = NULL;
-	unsigned int cls, con, tag, oidlen, rc;
-
-	/* cifs_dump_mem(" Received SecBlob ", security_blob, length); */
-
-	asn1_open(&ctx, security_blob, length);
-
-	/* GSSAPI header */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cifs_dbg(FYI, "Error decoding negTokenInit header\n");
+	if (asn1_ber_decoder(&cifs_spnego_negtokeninit_decoder, server,
+			     security_blob, length) == 0)
+		return 1;
+	else
 		return 0;
-	} else if ((cls != ASN1_APL) || (con != ASN1_CON)
-		   || (tag != ASN1_EOC)) {
-		cifs_dbg(FYI, "cls = %d con = %d tag = %d\n", cls, con, tag);
-		return 0;
-	}
-
-	/* Check for SPNEGO OID -- remember to free obj->oid */
-	rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
-	if (rc) {
-		if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
-		    (cls == ASN1_UNI)) {
-			rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
-			if (rc) {
-				rc = compare_oid(oid, oidlen, SPNEGO_OID,
-						 SPNEGO_OID_LEN);
-				kfree(oid);
-			}
-		} else
-			rc = 0;
-	}
-
-	/* SPNEGO OID not present or garbled -- bail out */
-	if (!rc) {
-		cifs_dbg(FYI, "Error decoding negTokenInit header\n");
-		return 0;
-	}
-
-	/* SPNEGO */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cifs_dbg(FYI, "Error decoding negTokenInit\n");
-		return 0;
-	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
-		   || (tag != ASN1_EOC)) {
-		cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
-			 cls, con, tag, end);
-		return 0;
-	}
-
-	/* negTokenInit */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cifs_dbg(FYI, "Error decoding negTokenInit\n");
-		return 0;
-	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-		   || (tag != ASN1_SEQ)) {
-		cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 1\n",
-			 cls, con, tag, end);
-		return 0;
-	}
-
-	/* sequence */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cifs_dbg(FYI, "Error decoding 2nd part of negTokenInit\n");
-		return 0;
-	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
-		   || (tag != ASN1_EOC)) {
-		cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
-			 cls, con, tag, end);
-		return 0;
-	}
-
-	/* sequence of */
-	if (asn1_header_decode
-	    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
-		cifs_dbg(FYI, "Error decoding 2nd part of negTokenInit\n");
-		return 0;
-	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-		   || (tag != ASN1_SEQ)) {
-		cifs_dbg(FYI, "cls = %d con = %d tag = %d sequence_end = %p exit 1\n",
-			 cls, con, tag, sequence_end);
-		return 0;
-	}
-
-	/* list of security mechanisms */
-	while (!asn1_eoc_decode(&ctx, sequence_end)) {
-		rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
-		if (!rc) {
-			cifs_dbg(FYI, "Error decoding negTokenInit hdr exit2\n");
-			return 0;
-		}
-		if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
-			if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
-
-				cifs_dbg(FYI, "OID len = %d oid = 0x%lx 0x%lx 0x%lx 0x%lx\n",
-					 oidlen, *oid, *(oid + 1), *(oid + 2),
-					 *(oid + 3));
-
-				if (compare_oid(oid, oidlen, MSKRB5_OID,
-						MSKRB5_OID_LEN))
-					server->sec_mskerberos = true;
-				else if (compare_oid(oid, oidlen, KRB5U2U_OID,
-						     KRB5U2U_OID_LEN))
-					server->sec_kerberosu2u = true;
-				else if (compare_oid(oid, oidlen, KRB5_OID,
-						     KRB5_OID_LEN))
-					server->sec_kerberos = true;
-				else if (compare_oid(oid, oidlen, NTLMSSP_OID,
-						     NTLMSSP_OID_LEN))
-					server->sec_ntlmssp = true;
-
-				kfree(oid);
-			}
-		} else {
-			cifs_dbg(FYI, "Should be an oid what is going on?\n");
-		}
-	}
-
-	/*
-	 * We currently ignore anything at the end of the SPNEGO blob after
-	 * the mechTypes have been parsed, since none of that info is
-	 * used at the moment.
-	 */
-	return 1;
+}
+
+int cifs_gssapi_this_mech(void *context, size_t hdrlen,
+			  unsigned char tag, const void *value, size_t vlen)
+{
+	enum OID oid;
+
+	oid = look_up_OID(value, vlen);
+	if (oid != OID_spnego) {
+		char buf[50];
+
+		sprint_oid(value, vlen, buf, sizeof(buf));
+		cifs_dbg(FYI, "Error decoding negTokenInit header: unexpected OID %s\n",
+			 buf);
+		return -EBADMSG;
+	}
+	return 0;
+}
+
+int cifs_neg_token_init_mech_type(void *context, size_t hdrlen,
+				  unsigned char tag,
+				  const void *value, size_t vlen)
+{
+	struct TCP_Server_Info *server = context;
+	enum OID oid;
+
+	oid = look_up_OID(value, vlen);
+	if (oid == OID_mskrb5)
+		server->sec_mskerberos = true;
+	else if (oid == OID_krb5u2u)
+		server->sec_kerberosu2u = true;
+	else if (oid == OID_krb5)
+		server->sec_kerberos = true;
+	else if (oid == OID_ntlmssp)
+		server->sec_ntlmssp = true;
+	else {
+		char buf[50];
+
+		sprint_oid(value, vlen, buf, sizeof(buf));
+		cifs_dbg(FYI, "Decoding negTokenInit: unsupported OID %s\n",
+			 buf);
+	}
+	return 0;
 }
diff --git a/fs/cifs/cifs_spnego_negtokeninit.asn1 b/fs/cifs/cifs_spnego_negtokeninit.asn1
new file mode 100644
index 000000000000..181c083887d5
--- /dev/null
+++ b/fs/cifs/cifs_spnego_negtokeninit.asn1
@@ -0,0 +1,40 @@
+GSSAPI ::=
+	[APPLICATION 0] IMPLICIT SEQUENCE {
+		thisMech
+			OBJECT IDENTIFIER ({cifs_gssapi_this_mech}),
+		negotiationToken
+			NegotiationToken
+	}
+
+MechType ::= OBJECT IDENTIFIER ({cifs_neg_token_init_mech_type})
+
+MechTypeList ::= SEQUENCE OF MechType
+
+NegHints ::= SEQUENCE {
+	hintName
+		[0] GeneralString OPTIONAL,
+	hintAddress
+		[1] OCTET STRING OPTIONAL
+	}
+
+NegTokenInit2 ::=
+	SEQUENCE {
+		mechTypes
+			[0] MechTypeList OPTIONAL,
+		reqFlags
+			[1] BIT STRING OPTIONAL,
+		mechToken
+			[2] OCTET STRING OPTIONAL,
+		negHints
+			[3] NegHints OPTIONAL,
+		mechListMIC
+			[3] OCTET STRING OPTIONAL
+	}
+
+NegotiationToken ::=
+	CHOICE {
+		negTokenInit
+			[0] NegTokenInit2,
+		negTokenTarg
+			[1] ANY
+	}
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 461b7aa587ba..3d8db1f6a5db 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -54,6 +54,10 @@ enum OID {
 	OID_md4,			/* 1.2.840.113549.2.4 */
 	OID_md5,			/* 1.2.840.113549.2.5 */
 
+	OID_mskrb5,			/* 1.2.840.48018.1.2.2 */
+	OID_krb5,			/* 1.2.840.113554.1.2.2 */
+	OID_krb5u2u,			/* 1.2.840.113554.1.2.2.3 */
+
 	/* Microsoft Authenticode & Software Publishing */
 	OID_msIndirectData,		/* 1.3.6.1.4.1.311.2.1.4 */
 	OID_msStatementType,		/* 1.3.6.1.4.1.311.2.1.11 */
@@ -62,6 +66,10 @@ enum OID {
 	OID_msIndividualSPKeyPurpose,	/* 1.3.6.1.4.1.311.2.1.21 */
 	OID_msOutlookExpress,		/* 1.3.6.1.4.1.311.16.4 */
 
+	OID_ntlmssp,			/* 1.3.6.1.4.1.311.2.2.10 */
+
+	OID_spnego,			/* 1.3.6.1.5.5.2 */
+
 	OID_certAuthInfoAccess,		/* 1.3.6.1.5.5.7.1.1 */
 	OID_sha1,			/* 1.3.14.3.2.26 */
 	OID_id_ansip384r1,		/* 1.3.132.0.34 */

From 889c2a700799f3b6f82210925e1faf4a9b833c4a Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Mon, 14 Jun 2021 12:58:20 -0300
Subject: [PATCH 2344/3292] cifs: fix check of dfs interlinks

Interlink is a special type of DFS link that resolves to a different
DFS domain-based namespace.  To determine whether it is an interlink
or not, check if ReferralServers and StorageServers bits are set to 1
and 0 respectively in ReferralHeaderFlags, as specified in MS-DFSC
3.1.5.4.5 Determining Whether a Referral Response is an Interlink.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dfs_cache.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 775dbc7b0d7a..7c1769714609 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -26,8 +26,7 @@
 #define CACHE_MAX_ENTRIES 64
 #define CACHE_MIN_TTL 120 /* 2 minutes */
 
-#define IS_INTERLINK_SET(v) ((v) & (DFSREF_REFERRAL_SERVER | \
-				    DFSREF_STORAGE_SERVER))
+#define IS_DFS_INTERLINK(v) (((v) & DFSREF_REFERRAL_SERVER) && !((v) & DFSREF_STORAGE_SERVER))
 
 struct cache_dfs_tgt {
 	char *name;
@@ -284,7 +283,7 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
 				   "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
 				   ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
 				   ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags,
-				   IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
+				   IS_DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no",
 				   ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no");
 
 			list_for_each_entry(t, &ce->tlist, list) {
@@ -353,7 +352,7 @@ static inline void dump_ce(const struct cache_entry *ce)
 		 ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl,
 		 ce->etime.tv_nsec,
 		 ce->hdr_flags, ce->ref_flags,
-		 IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
+		 IS_DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no",
 		 ce->path_consumed,
 		 cache_entry_expired(ce) ? "yes" : "no");
 	dump_tgts(ce);

From 6efa994e35a402ae4ae2161b6439c94b64816cee Mon Sep 17 00:00:00 2001
From: Thiago Rafael Becker <trbecker@gmail.com>
Date: Tue, 15 Jun 2021 13:42:56 -0300
Subject: [PATCH 2345/3292] cifs: retry lookup and readdir when EAGAIN is
 returned.

According to the investigation performed by Jacob Shivers at Red Hat,
cifs_lookup and cifs_readdir leak EAGAIN when the user session is
deleted on the server. Fix this issue by implementing a retry with
limits, as is implemented in cifs_revalidate_dentry_attr.

Reproducer based on the work by Jacob Shivers:

  ~~~
  $ cat readdir-cifs-test.sh
  #!/bin/bash

  # Install and configure powershell and sshd on the windows
  #  server as descibed in
  # https://docs.microsoft.com/en-us/windows-server/administration/openssh/openssh_overview
  # This script uses expect(1)

  USER=dude
  SERVER=192.168.0.2
  RPATH=root
  PASS='password'

  function debug_funcs {
  	for line in $@ ; do
  		echo "func $line +p" > /sys/kernel/debug/dynamic_debug/control
  	done
  }

  function setup {
  	echo 1 > /proc/fs/cifs/cifsFYI
  	debug_funcs wait_for_compound_request \
                smb2_query_dir_first cifs_readdir \
                compound_send_recv cifs_reconnect_tcon \
                generic_ip_connect cifs_reconnect \
                smb2_reconnect_server smb2_reconnect \
                cifs_readv_from_socket cifs_readv_receive
  	tcpdump -i eth0 -w cifs.pcap host 192.168.2.182 & sleep 5
  	dmesg -C
  }

  function test_call {
  	if [[ $1 == 1 ]] ; then
  		tracer="strace -tt -f -s 4096 -o trace-$(date -Iseconds).txt"
  	fi
        # Change the command here to anything appropriate
  	$tracer ls $2 > /dev/null
  	res=$?
  	if [[ $1 == 1 ]] ; then
  		if [[ $res == 0 ]] ; then
  			1>&2 echo success
  		else
  			1>&2 echo "failure ($res)"
  		fi
  	fi
  }

  mountpoint /mnt > /dev/null || mount -t cifs -o username=$USER,pass=$PASS //$SERVER/$RPATH /mnt

  test_call 0 /mnt/

  /usr/bin/expect << EOF
  	set timeout 60

  	spawn ssh $USER@$SERVER

  	expect "yes/no" {
  		send "yes\r"
  		expect "*?assword" { send "$PASS\r" }
  	} "*?assword" { send "$PASS\r" }

  	expect ">" { send "powershell close-smbsession -force\r" }
  	expect ">" { send "exit\r" }
  	expect eof
  EOF

  sysctl -w vm.drop_caches=2 > /dev/null
  sysctl -w vm.drop_caches=2 > /dev/null

  setup

  test_call 1 /mnt/
  ~~~

Signed-off-by: Thiago Rafael Becker <trbecker@gmail.com>
Acked-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dir.c     | 4 ++++
 fs/cifs/smb2ops.c | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 6bcd3e8f7cda..7c641f9a3dac 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -630,6 +630,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	struct inode *newInode = NULL;
 	const char *full_path;
 	void *page;
+	int retry_count = 0;
 
 	xid = get_xid();
 
@@ -673,6 +674,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
 		 full_path, d_inode(direntry));
 
+again:
 	if (pTcon->posix_extensions)
 		rc = smb311_posix_get_inode_info(&newInode, full_path, parent_dir_inode->i_sb, xid);
 	else if (pTcon->unix_ext) {
@@ -687,6 +689,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		/* since paths are not looked up by component - the parent
 		   directories are presumed to be good here */
 		renew_parental_timestamps(direntry);
+	} else if (rc == -EAGAIN && retry_count++ < 10) {
+		goto again;
 	} else if (rc == -ENOENT) {
 		cifs_set_time(direntry, jiffies);
 		newInode = NULL;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b68ba92893b6..903de7449aa3 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2325,6 +2325,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	struct smb2_query_directory_rsp *qd_rsp = NULL;
 	struct smb2_create_rsp *op_rsp = NULL;
 	struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
+	int retry_count = 0;
 
 	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
 	if (!utf16_path)
@@ -2372,10 +2373,14 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 
 	smb2_set_related(&rqst[1]);
 
+again:
 	rc = compound_send_recv(xid, tcon->ses, server,
 				flags, 2, rqst,
 				resp_buftype, rsp_iov);
 
+	if (rc == -EAGAIN && retry_count++ < 10)
+		goto again;
+
 	/* If the open failed there is nothing to do */
 	op_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
 	if (op_rsp == NULL || op_rsp->sync_hdr.Status != STATUS_SUCCESS) {

From ca38fabc31ecf8a2cd14ff92a68936db59106409 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Fri, 18 Jun 2021 10:58:30 +1000
Subject: [PATCH 2346/3292] cifs: avoid extra calls in posix_info_parse

In posix_info_parse() we call posix_info_sid_size twice for each of the owner and the group
sid. The first time to check that it is valid, i.e. >= 0 and the second time
to just pass it in as a length to memcpy().
As this is a pure function we know that it can not be negative the second time and this
is technically a false warning in coverity.
However, as it is a pure function we are just wasting cycles by calling it a second time.
Record the length from the first time we call it and save some cycles as well as make
Coverity happy.

Addresses-Coverity-ID: 1491379 ("Argument can not be negative")

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c205f93e0a10..4a244cc4e902 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -4498,7 +4498,7 @@ int posix_info_parse(const void *beg, const void *end,
 
 {
 	int total_len = 0;
-	int sid_len;
+	int owner_len, group_len;
 	int name_len;
 	const void *owner_sid;
 	const void *group_sid;
@@ -4521,17 +4521,17 @@ int posix_info_parse(const void *beg, const void *end,
 
 	/* check owner sid */
 	owner_sid = beg + total_len;
-	sid_len = posix_info_sid_size(owner_sid, end);
-	if (sid_len < 0)
+	owner_len = posix_info_sid_size(owner_sid, end);
+	if (owner_len < 0)
 		return -1;
-	total_len += sid_len;
+	total_len += owner_len;
 
 	/* check group sid */
 	group_sid = beg + total_len;
-	sid_len = posix_info_sid_size(group_sid, end);
-	if (sid_len < 0)
+	group_len = posix_info_sid_size(group_sid, end);
+	if (group_len < 0)
 		return -1;
-	total_len += sid_len;
+	total_len += group_len;
 
 	/* check name len */
 	if (beg + total_len + 4 > end)
@@ -4552,10 +4552,8 @@ int posix_info_parse(const void *beg, const void *end,
 		out->size = total_len;
 		out->name_len = name_len;
 		out->name = name;
-		memcpy(&out->owner, owner_sid,
-		       posix_info_sid_size(owner_sid, end));
-		memcpy(&out->group, group_sid,
-		       posix_info_sid_size(group_sid, end));
+		memcpy(&out->owner, owner_sid, owner_len);
+		memcpy(&out->group, group_sid, group_len);
 	}
 	return total_len;
 }

From 647f592734a033b7f23865190de5df05e50381d0 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Fri, 18 Jun 2021 12:02:32 +0800
Subject: [PATCH 2347/3292] cifs: convert list_for_each to entry variant in
 smb2misc.c

convert list_for_each() to list_for_each_entry() where
applicable.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2misc.c | 33 ++++++++-------------------------
 1 file changed, 8 insertions(+), 25 deletions(-)

diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 06d555d4da9a..aba048153f79 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -164,12 +164,10 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
 		struct smb2_transform_hdr *thdr =
 			(struct smb2_transform_hdr *)buf;
 		struct cifs_ses *ses = NULL;
-		struct list_head *tmp;
 
 		/* decrypt frame now that it is completely read in */
 		spin_lock(&cifs_tcp_ses_lock);
-		list_for_each(tmp, &srvr->smb_ses_list) {
-			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+		list_for_each_entry(ses, &srvr->smb_ses_list, smb_ses_list) {
 			if (ses->Suid == thdr->SessionId)
 				break;
 
@@ -548,7 +546,6 @@ static bool
 smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp)
 {
 	__u8 lease_state;
-	struct list_head *tmp;
 	struct cifsFileInfo *cfile;
 	struct cifsInodeInfo *cinode;
 	int ack_req = le32_to_cpu(rsp->Flags &
@@ -556,8 +553,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp)
 
 	lease_state = le32_to_cpu(rsp->NewLeaseState);
 
-	list_for_each(tmp, &tcon->openFileList) {
-		cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
 		cinode = CIFS_I(d_inode(cfile->dentry));
 
 		if (memcmp(cinode->lease_key, rsp->LeaseKey,
@@ -618,7 +614,6 @@ static bool
 smb2_is_valid_lease_break(char *buffer)
 {
 	struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
-	struct list_head *tmp, *tmp1, *tmp2;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
@@ -628,15 +623,9 @@ smb2_is_valid_lease_break(char *buffer)
 
 	/* look up tcon based on tid & uid */
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each(tmp, &cifs_tcp_ses_list) {
-		server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list);
-
-		list_for_each(tmp1, &server->smb_ses_list) {
-			ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
-
-			list_for_each(tmp2, &ses->tcon_list) {
-				tcon = list_entry(tmp2, struct cifs_tcon,
-						  tcon_list);
+	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+		list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+			list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
 				spin_lock(&tcon->open_file_lock);
 				cifs_stats_inc(
 				    &tcon->stats.cifs_stats.num_oplock_brks);
@@ -687,7 +676,6 @@ bool
 smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 {
 	struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
-	struct list_head *tmp, *tmp1, *tmp2;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
 	struct cifsInodeInfo *cinode;
@@ -710,16 +698,11 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 
 	/* look up tcon based on tid & uid */
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each(tmp, &server->smb_ses_list) {
-		ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
-
-		list_for_each(tmp1, &ses->tcon_list) {
-			tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
 
 			spin_lock(&tcon->open_file_lock);
-			list_for_each(tmp2, &tcon->openFileList) {
-				cfile = list_entry(tmp2, struct cifsFileInfo,
-						     tlist);
+			list_for_each_entry(cfile, &tcon->openFileList, tlist) {
 				if (rsp->PersistentFid !=
 				    cfile->fid.persistent_fid ||
 				    rsp->VolatileFid !=

From a506ccb47c9e93fde843532670a7805452bf699b Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Thu, 17 Jun 2021 19:36:40 +0800
Subject: [PATCH 2348/3292] cifs: convert list_for_each to entry variant in
 cifs_debug.c

convert list_for_each() to list_for_each_entry() where
applicable.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_debug.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 68e8e5b27841..8857ac7e7a14 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -50,7 +50,6 @@ void cifs_dump_detail(void *buf, struct TCP_Server_Info *server)
 void cifs_dump_mids(struct TCP_Server_Info *server)
 {
 #ifdef CONFIG_CIFS_DEBUG2
-	struct list_head *tmp;
 	struct mid_q_entry *mid_entry;
 
 	if (server == NULL)
@@ -58,8 +57,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
 
 	cifs_dbg(VFS, "Dump pending requests:\n");
 	spin_lock(&GlobalMid_Lock);
-	list_for_each(tmp, &server->pending_mid_q) {
-		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+	list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
 		cifs_dbg(VFS, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu\n",
 			 mid_entry->mid_state,
 			 le16_to_cpu(mid_entry->command),
@@ -168,7 +166,7 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
 
 static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
 {
-	struct list_head *stmp, *tmp, *tmp1, *tmp2;
+	struct list_head *tmp, *tmp1, *tmp2;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
@@ -183,9 +181,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
 	seq_printf(m, " <filename>\n");
 #endif /* CIFS_DEBUG2 */
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each(stmp, &cifs_tcp_ses_list) {
-		server = list_entry(stmp, struct TCP_Server_Info,
-				    tcp_ses_list);
+	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
 		list_for_each(tmp, &server->smb_ses_list) {
 			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
 			list_for_each(tmp1, &ses->tcon_list) {
@@ -220,7 +216,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
 
 static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 {
-	struct list_head *tmp1, *tmp2, *tmp3;
+	struct list_head *tmp2, *tmp3;
 	struct mid_q_entry *mid_entry;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
@@ -278,11 +274,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 
 	c = 0;
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each(tmp1, &cifs_tcp_ses_list) {
-		server = list_entry(tmp1, struct TCP_Server_Info,
-				    tcp_ses_list);
-
-		/* channel info will be printed as a part of sessions below */
+	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
 		if (server->is_channel)
 			continue;
 
@@ -563,7 +555,7 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
 #ifdef CONFIG_CIFS_STATS2
 	int j;
 #endif /* STATS2 */
-	struct list_head *tmp1, *tmp2, *tmp3;
+	struct list_head *tmp2, *tmp3;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
@@ -594,9 +586,7 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
 
 	i = 0;
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each(tmp1, &cifs_tcp_ses_list) {
-		server = list_entry(tmp1, struct TCP_Server_Info,
-				    tcp_ses_list);
+	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
 		seq_printf(m, "\nMax requests in flight: %d", server->max_in_flight);
 #ifdef CONFIG_CIFS_STATS2
 		seq_puts(m, "\nTotal time spent processing by command. Time ");

From 929be906fa83151c4c77c07cd3d780439d38b153 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 18 Jun 2021 00:31:49 -0500
Subject: [PATCH 2349/3292] cifs: use SPDX-Licence-Identifier

Add SPDX license identifier and replace license boilerplate.
Corrects various checkpatch errors with the older format for
noting the LGPL license.

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cache.c         | 14 +-------------
 fs/cifs/cifs_debug.h    |  2 +-
 fs/cifs/cifs_fs_sb.h    | 11 +----------
 fs/cifs/cifs_ioctl.h    | 11 +----------
 fs/cifs/cifs_spnego.c   | 14 +-------------
 fs/cifs/cifs_spnego.h   | 14 +-------------
 fs/cifs/cifsacl.c       | 14 +-------------
 fs/cifs/cifsacl.h       | 15 +--------------
 fs/cifs/cifsencrypt.c   | 14 +-------------
 fs/cifs/cifsfs.c        | 14 +-------------
 fs/cifs/cifsfs.h        | 14 +-------------
 fs/cifs/cifsglob.h      | 11 +----------
 fs/cifs/cifspdu.h       | 14 +-------------
 fs/cifs/cifsproto.h     | 14 +-------------
 fs/cifs/cifssmb.c       | 14 +-------------
 fs/cifs/connect.c       | 14 +-------------
 fs/cifs/dir.c           | 14 +-------------
 fs/cifs/dns_resolve.c   | 14 +-------------
 fs/cifs/dns_resolve.h   | 14 +-------------
 fs/cifs/export.c        | 14 +-------------
 fs/cifs/file.c          | 14 +-------------
 fs/cifs/fscache.c       | 14 +-------------
 fs/cifs/fscache.h       | 14 +-------------
 fs/cifs/inode.c         | 14 +-------------
 fs/cifs/ioctl.c         | 14 +-------------
 fs/cifs/link.c          | 14 +-------------
 fs/cifs/misc.c          | 14 +-------------
 fs/cifs/ntlmssp.h       | 14 +-------------
 fs/cifs/readdir.c       | 14 +-------------
 fs/cifs/rfc1002pdu.h    | 14 +-------------
 fs/cifs/sess.c          | 14 +-------------
 fs/cifs/smb2file.c      | 14 +-------------
 fs/cifs/smb2glob.h      | 11 +----------
 fs/cifs/smb2inode.c     | 14 +-------------
 fs/cifs/smb2maperror.c  | 14 +-------------
 fs/cifs/smb2misc.c      | 14 +-------------
 fs/cifs/smb2pdu.c       | 14 +-------------
 fs/cifs/smb2pdu.h       | 14 +-------------
 fs/cifs/smb2proto.h     | 14 +-------------
 fs/cifs/smb2status.h    | 14 +-------------
 fs/cifs/smb2transport.c | 14 +-------------
 fs/cifs/smberr.h        | 14 +-------------
 fs/cifs/smbfsctl.h      | 14 +-------------
 fs/cifs/transport.c     | 14 +-------------
 fs/cifs/xattr.c         | 14 +-------------
 45 files changed, 45 insertions(+), 562 deletions(-)

diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 488fe0ffc1ef..8a3b30ec860c 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/cache.c - CIFS filesystem cache index structure definitions
  *
  *   Copyright (c) 2010 Novell, Inc.
  *   Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include "fscache.h"
 #include "cifs_debug.h"
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 5e66dab712d0..ee4ea2b60c0f 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -3,7 +3,7 @@
  *
  *   Copyright (c) International Business Machines  Corp., 2000,2002
  *   Modified by Steve French (sfrench@us.ibm.com)
-*/
+ */
 
 #ifndef _H_CIFS_DEBUG
 #define _H_CIFS_DEBUG
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 64990ccd1b9c..4fd788586399 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -1,19 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifs_fs_sb.h
  *
  *   Copyright (c) International Business Machines  Corp., 2002,2004
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
  */
 #include <linux/rbtree.h>
 
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 37fc7d6ac457..ef723be358af 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifs_ioctl.h
  *
@@ -5,16 +6,6 @@
  *
  *   Copyright (c) 2015 Steve French <steve.french@primarydata.com>
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
  */
 
 struct smb_mnt_fs_info {
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7b9b876b513b..8fa26a8530f8 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/cifs_spnego.c -- SPNEGO upcall management for CIFS
  *
  *   Copyright (c) 2007 Red Hat, Inc.
  *   Author(s): Jeff Layton (jlayton@redhat.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/list.h>
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index 31bef9ee078b..31387d0ea32e 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifs_spnego.h -- SPNEGO upcall management for CIFS
  *
@@ -5,19 +6,6 @@
  *   Author(s): Jeff Layton (jlayton@redhat.com)
  *              Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #ifndef _CIFS_SPNEGO_H
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 784407f9280f..3898a9e6d3c6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/cifsacl.c
  *
@@ -6,19 +7,6 @@
  *
  *   Contains the routines for mapping CIFS/NTFS ACLs
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/fs.h>
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index d9e704979d99..f8292bcf8594 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -1,28 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifsacl.h
  *
  *   Copyright (c) International Business Machines  Corp., 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #ifndef _CIFSACL_H
 #define _CIFSACL_H
 
-
 #define NUM_AUTHS (6)	/* number of authority fields */
 #define SID_MAX_SUB_AUTHORITIES (15) /* max number of sub authority fields */
 
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index b8f1ff9a83f3..ecf15d845dbd 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/cifsencrypt.c
  *
@@ -7,19 +8,6 @@
  *   Copyright (C) International Business Machines  Corp., 2005,2013
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/fs.h>
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2ffcb29d5c8f..9fb874dd8d24 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/cifsfs.c
  *
@@ -6,19 +7,6 @@
  *
  *   Common Internet FileSystem (CIFS) client
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 /* Note that BB means BUGBUG (ie something to fix eventually) */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6beddb108ba0..177f3e7ab86d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifsfs.h
  *
  *   Copyright (c) International Business Machines  Corp., 2002, 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #ifndef _CIFSFS_H
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d85ef3b41bd9..988346904fd0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifsglob.h
  *
@@ -5,16 +6,6 @@
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *              Jeremy Allison (jra@samba.org)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
  */
 #ifndef _CIFS_GLOB_H
 #define _CIFS_GLOB_H
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 554d64fe171e..0923f72d27e9 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifspdu.h
  *
  *   Copyright (c) International Business Machines  Corp., 2002,2009
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #ifndef _CIFSPDU_H
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index d30cba44ba29..e0def0f0714b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/cifsproto.h
  *
  *   Copyright (c) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #ifndef _CIFSPROTO_H
 #define _CIFSPROTO_H
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 234337865b96..58ebec4d4413 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/cifssmb.c
  *
@@ -6,19 +7,6 @@
  *
  *   Contains the routines for constructing the SMB PDUs themselves
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
  /* SMB/CIFS PDU handling routines here - except for leftovers in connect.c   */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 43c7e4b27f46..8d95607a9312 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/connect.c
  *
  *   Copyright (C) International Business Machines  Corp., 2002,2011
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/net.h>
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7c641f9a3dac..912333082b18 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/dir.c
  *
@@ -6,19 +7,6 @@
  *   Copyright (C) International Business Machines  Corp., 2002,2009
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 534cbba72789..d15b82d569ef 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *  fs/cifs/dns_resolve.c
  *
@@ -10,19 +11,6 @@
  *   Contains the CIFS DFS upcall routines used for hostname to
  *   IP address translation.
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/slab.h>
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index d3f5d27f4d06..5be060b82b13 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS
  *                            Handles host name to IP address resolution
@@ -5,19 +6,6 @@
  *   Copyright (c) International Business Machines  Corp., 2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #ifndef _DNS_RESOLVE_H
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index eb0bb8ca8e63..747a540db954 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/export.c
  *
@@ -8,19 +9,6 @@
  *
  *   Operations related to support for exporting files via NFSD
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
  /*
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 379a427f3c2f..cd108607a070 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/file.c
  *
@@ -7,19 +8,6 @@
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *              Jeremy Allison (jra@samba.org)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/backing-dev.h>
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 20d24af33ee2..dd625033cd6b 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/fscache.c - CIFS filesystem cache interface
  *
  *   Copyright (c) 2010 Novell, Inc.
  *   Author(s): Suresh Jayaraman <sjayaraman@suse.de>
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include "fscache.h"
 #include "cifsglob.h"
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index e811f2dd7619..3d55cb2ef055 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/fscache.h - CIFS filesystem cache interface definitions
  *
  *   Copyright (c) 2010 Novell, Inc.
  *   Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #ifndef _CIFS_FSCACHE_H
 #define _CIFS_FSCACHE_H
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1dfa57982522..046b51dd34df 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/inode.c
  *
  *   Copyright (C) International Business Machines  Corp., 2002,2010
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index d67d281ab863..42c6a0bac6c8 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/ioctl.c
  *
@@ -6,19 +7,6 @@
  *   Copyright (C) International Business Machines  Corp., 2005,2013
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/fs.h>
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 970fcf2adb08..f0a6d63bc08c 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/link.c
  *
  *   Copyright (C) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 7207a63819cb..184138b4eb8c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/misc.c
  *
  *   Copyright (C) International Business Machines  Corp., 2002,2008
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/slab.h>
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 3079b38f0afb..378133ce8869 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/ntlmssp.h
  *
  *   Copyright (c) International Business Machines  Corp., 2002,2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #define NTLMSSP_SIGNATURE "NTLMSSP"
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 63bfc533c9fb..00b6b953d13c 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/readdir.c
  *
@@ -7,19 +8,6 @@
  *   Copyright (C) Red Hat, Inc., 2011
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/pagemap.h>
diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h
index 8b69fcceb597..137f7c95afd6 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/cifs/rfc1002pdu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/rfc1002pdu.h
  *
@@ -6,19 +7,6 @@
  *   Copyright (c) International Business Machines  Corp., 2004
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 /* NB: unlike smb/cifs packets, the RFC1002 structures are big endian */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index cd19aa11f27e..c5785fd3f52e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/sess.c
  *
@@ -6,19 +7,6 @@
  *   Copyright (c) International Business Machines  Corp., 2006, 2009
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include "cifspdu.h"
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 2fa3ba354cc9..c9d8a50062b8 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/smb2file.c
  *
@@ -5,19 +6,6 @@
  *   Author(s): Steve French (sfrench@us.ibm.com),
  *              Pavel Shilovsky ((pshilovsky@samba.org) 2012
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index d9a990c99121..d0e9f3782bd9 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/smb2glob.h
  *
@@ -9,16 +10,6 @@
  *              Jeremy Allison (jra@samba.org)
  *              Pavel Shilovsky (pshilovsky@samba.org) 2012
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
  */
 #ifndef _SMB2_GLOB_H
 #define _SMB2_GLOB_H
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 9a61209a283e..957b2594f02e 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/smb2inode.c
  *
@@ -6,19 +7,6 @@
  *   Author(s): Pavel Shilovsky (pshilovsky@samba.org),
  *              Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index c775682ee973..cea39bcecbab 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/smb2/smb2maperror.c
  *
@@ -6,19 +7,6 @@
  *   Copyright (C) International Business Machines  Corp., 2009
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/errno.h>
 #include "cifsglob.h"
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index aba048153f79..c6bb2ea1983b 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/smb2misc.c
  *
@@ -6,19 +7,6 @@
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *              Pavel Shilovsky (pshilovsky@samba.org) 2012
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/ctype.h>
 #include "smb2pdu.h"
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 4a244cc4e902..29b729de2b27 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/smb2pdu.c
  *
@@ -8,19 +9,6 @@
  *
  *   Contains the routines for constructing the SMB2 PDUs themselves
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
  /* SMB2 PDU handling routines here - except for leftovers (eg session setup) */
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 6442dc1c292b..653486243cdf 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/smb2pdu.h
  *
@@ -6,19 +7,6 @@
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *              Pavel Shilovsky (pshilovsky@samba.org) 2012
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #ifndef _SMB2PDU_H
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 4152077e3c07..263767f644f8 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/smb2proto.h
  *
@@ -6,19 +7,6 @@
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *              Pavel Shilovsky (pshilovsky@samba.org) 2012
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #ifndef _SMB2PROTO_H
 #define _SMB2PROTO_H
diff --git a/fs/cifs/smb2status.h b/fs/cifs/smb2status.h
index 7505056e9580..0215ef36e240 100644
--- a/fs/cifs/smb2status.h
+++ b/fs/cifs/smb2status.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/smb2status.h
  *
@@ -7,19 +8,6 @@
  *   Copyright (c) International Business Machines  Corp., 2009,2011
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 /*
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 0ab1d464e8c8..6f7952ea4941 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/smb2transport.c
  *
@@ -7,19 +8,6 @@
  *              Jeremy Allison (jra@samba.org) 2006
  *              Pavel Shilovsky (pshilovsky@samba.org) 2012
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/fs.h>
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index 7f16cb825fe5..60189efb3236 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/smberr.h
  *
@@ -7,19 +8,6 @@
  *   See Error Codes section of the SNIA CIFS Specification
  *   for more information
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #define SUCCESS	0x00	/* The request was successful. */
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index a0e84747f567..d0fc42061f49 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
 /*
  *   fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
  *
  *   Copyright (c) International Business Machines  Corp., 2002,2013
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 /* IOCTL information */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c1725b55f364..f65f9a692ca2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/transport.c
  *
@@ -5,19 +6,6 @@
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *   Jeremy Allison (jra@samba.org) 2006.
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/fs.h>
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index aa3e8ca0457c..9ed481e79ce0 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  *   fs/cifs/xattr.c
  *
  *   Copyright (c) International Business Machines  Corp., 2003, 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
 #include <linux/fs.h>

From 3559134eccad022fc4a5a56910e8f7d38ab0d30d Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 19 Jun 2021 12:01:37 -0500
Subject: [PATCH 2350/3292] cifs: fix unneeded null check

tcon can not be null in SMB2_tcon function so the check
is not relevant and removing it makes Coverity happy.

Acked-by: Ronnie Sahlberg <lsahlber@redhat.com>
Addresses-Coverity: 13250131 ("Dereference before null check")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 29b729de2b27..0005989d281a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1779,10 +1779,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
 	trace_smb3_tcon(xid, tcon->tid, ses->Suid, tree, rc);
 	if (rc != 0) {
-		if (tcon) {
-			cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE);
-			tcon->need_reconnect = true;
-		}
+		cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE);
+		tcon->need_reconnect = true;
 		goto tcon_error_exit;
 	}
 

From ff93b71a3eff25fe9d4364ef13b6e01d935600c6 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 19 Jun 2021 12:22:20 -0500
Subject: [PATCH 2351/3292] smb3: fix uninitialized value for port in witness
 protocol move

Although in practice this can not occur (since IPv4 and IPv6 are the
only two cases currently supported), it is cleaner to avoid uninitialized
variable warnings.

Addresses smatch warning:
  fs/cifs/cifs_swn.c:468 cifs_swn_store_swn_addr() error: uninitialized symbol 'port'.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
CC: Samuel Cabrero <scabrero@suse.de>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_swn.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index d829b8bf833e..93b47818c6c2 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -447,15 +447,13 @@ static int cifs_swn_store_swn_addr(const struct sockaddr_storage *new,
 				   const struct sockaddr_storage *old,
 				   struct sockaddr_storage *dst)
 {
-	__be16 port;
+	__be16 port = cpu_to_be16(CIFS_PORT);
 
 	if (old->ss_family == AF_INET) {
 		struct sockaddr_in *ipv4 = (struct sockaddr_in *)old;
 
 		port = ipv4->sin_port;
-	}
-
-	if (old->ss_family == AF_INET6) {
+	} else if (old->ss_family == AF_INET6) {
 		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)old;
 
 		port = ipv6->sin6_port;
@@ -465,9 +463,7 @@ static int cifs_swn_store_swn_addr(const struct sockaddr_storage *new,
 		struct sockaddr_in *ipv4 = (struct sockaddr_in *)new;
 
 		ipv4->sin_port = port;
-	}
-
-	if (new->ss_family == AF_INET6) {
+	} else if (new->ss_family == AF_INET6) {
 		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)new;
 
 		ipv6->sin6_port = port;

From e39df24169a2ceb0d359eb3a05ff982711f2eb32 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 19 Jun 2021 15:53:18 -0500
Subject: [PATCH 2352/3292] cifs: fix SMB1 error path in
 cifs_get_file_info_unix

We were trying to fill in uninitialized file attributes in the error case.

Addresses-Coverity: 139689 ("Uninitialized variables")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 046b51dd34df..b96b253e7635 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -355,9 +355,12 @@ cifs_get_file_info_unix(struct file *filp)
 	} else if (rc == -EREMOTE) {
 		cifs_create_dfs_fattr(&fattr, inode->i_sb);
 		rc = 0;
-	}
+	} else
+		goto cifs_gfiunix_out;
 
 	rc = cifs_fattr_to_inode(inode, &fattr);
+
+cifs_gfiunix_out:
 	free_xid(xid);
 	return rc;
 }

From e0ae8a9aaeed7262f349e025b2162e8a13cb58d9 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 19 Jun 2021 16:19:09 -0500
Subject: [PATCH 2353/3292] smb311: remove dead code for non compounded posix
 query info

Although we may need this in some cases in the future, remove the
currently unused, non-compounded version of POSIX query info,
SMB11_posix_query_info (instead smb311_posix_query_path_info is now
called e.g. when revalidating dentries or retrieving info for getattr)

Addresses-Coverity: 1495708 ("Resource leaks")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0005989d281a..31784e3fa96f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3470,6 +3470,8 @@ int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 			  NULL);
 }
 
+#if 0
+/* currently unused, as now we are doing compounding instead (see smb311_posix_query_path_info) */
 int
 SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 		u64 persistent_fid, u64 volatile_fid, struct smb311_posix_qinfo *data, u32 *plen)
@@ -3481,7 +3483,9 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 	return query_info(xid, tcon, persistent_fid, volatile_fid,
 			  SMB_FIND_FILE_POSIX_INFO, SMB2_O_INFO_FILE, 0,
 			  output_len, sizeof(struct smb311_posix_qinfo), (void **)&data, plen);
+	/* Note caller must free "data" (passed in above). It may be allocated in query_info call */
 }
+#endif
 
 int
 SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,

From 3c3ee16532c1be92350a2a88bd19283b7bdf32e9 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Sun, 20 Jun 2021 20:01:09 -0700
Subject: [PATCH 2354/3292] nvmet: use NVMET_MAX_NAMESPACES to set nn value

For Spec regarding MNAN value:-

If the controller supports Asymmetric Namespace Access Reporting, then
this field shall be set to a non-zero value that is less than or equal
to the NN value.

Instead of using subsys->max_nsid that gets calculated dynamically,
use NVMET_MAX_NAMESPACES value to report NN. This way we will maintain
the MNAN value spec compliant with NN.

Without this patch, code results in the following error :-

[337976.409142] nvme nvme1: Invalid MNAN value 1024

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 363e357d2f20..0cb98f2bbc8c 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -422,7 +422,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
 	id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
 
-	id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
+	id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
 	id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
 	id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
 			NVME_CTRL_ONCS_WRITE_ZEROES);

From cfc61c598e43772cc4f76b8fc40c5ec70675716b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 18 Jun 2021 15:51:56 +0200
Subject: [PATCH 2355/3292] xfrm: replay: avoid xfrm replay notify indirection

replay protection is implemented using a callback structure and then
called via

   x->repl->notify(), x->repl->recheck(), and so on.

all the differect functions are always built-in, so this could be direct
calls instead.

This first patch prepares for removal of the x->repl structure.
Add an enum with the three available replay modes to the xfrm_state
structure and then replace all x->repl->notify() calls by the new
xfrm_replay_notify() helper.

The helper checks the enum internally to adapt behaviour as needed.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     | 11 ++++++++++-
 net/xfrm/xfrm_replay.c | 45 ++++++++++++++++++++++++++----------------
 net/xfrm/xfrm_state.c  |  2 +-
 3 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3a01570410ab..9a79e41defa7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -145,6 +145,12 @@ enum {
 	XFRM_MODE_FLAG_TUNNEL = 1,
 };
 
+enum xfrm_replay_mode {
+	XFRM_REPLAY_MODE_LEGACY,
+	XFRM_REPLAY_MODE_BMP,
+	XFRM_REPLAY_MODE_ESN,
+};
+
 /* Full description of state of transformer. */
 struct xfrm_state {
 	possible_net_t		xs_net;
@@ -218,6 +224,8 @@ struct xfrm_state {
 	/* The functions for replay detection. */
 	const struct xfrm_replay *repl;
 
+	/* replay detection mode */
+	enum xfrm_replay_mode    repl_mode;
 	/* internal flag that only holds state for delayed aevent at the
 	 * moment
 	*/
@@ -305,7 +313,6 @@ struct xfrm_replay {
 	int	(*recheck)(struct xfrm_state *x,
 			   struct sk_buff *skb,
 			   __be32 net_seq);
-	void	(*notify)(struct xfrm_state *x, int event);
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
@@ -1715,6 +1722,8 @@ static inline int xfrm_policy_id2dir(u32 index)
 }
 
 #ifdef CONFIG_XFRM
+void xfrm_replay_notify(struct xfrm_state *x, int event);
+
 static inline int xfrm_aevent_is_on(struct net *net)
 {
 	struct sock *nlsk;
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index c6a4338a0d08..5feeb65f00b3 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -34,8 +34,11 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq)
 	return seq_hi;
 }
 EXPORT_SYMBOL(xfrm_replay_seqhi);
-;
-static void xfrm_replay_notify(struct xfrm_state *x, int event)
+
+static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event);
+static void xfrm_replay_notify_esn(struct xfrm_state *x, int event);
+
+void xfrm_replay_notify(struct xfrm_state *x, int event)
 {
 	struct km_event c;
 	/* we send notify messages in case
@@ -48,6 +51,17 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
 	 *  The state structure must be locked!
 	 */
 
+	switch (x->repl_mode) {
+	case XFRM_REPLAY_MODE_LEGACY:
+		break;
+	case XFRM_REPLAY_MODE_BMP:
+		xfrm_replay_notify_bmp(x, event);
+		return;
+	case XFRM_REPLAY_MODE_ESN:
+		xfrm_replay_notify_esn(x, event);
+		return;
+	}
+
 	switch (event) {
 	case XFRM_REPLAY_UPDATE:
 		if (!x->replay_maxdiff ||
@@ -98,7 +112,7 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
 			return err;
 		}
 		if (xfrm_aevent_is_on(net))
-			x->repl->notify(x, XFRM_REPLAY_UPDATE);
+			xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 	}
 
 	return err;
@@ -157,7 +171,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
 	}
 
 	if (xfrm_aevent_is_on(xs_net(x)))
-		x->repl->notify(x, XFRM_REPLAY_UPDATE);
+		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 }
 
 static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
@@ -178,7 +192,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
 			return err;
 		}
 		if (xfrm_aevent_is_on(net))
-			x->repl->notify(x, XFRM_REPLAY_UPDATE);
+			xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 	}
 
 	return err;
@@ -273,7 +287,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
 	replay_esn->bmp[nr] |= (1U << bitnr);
 
 	if (xfrm_aevent_is_on(xs_net(x)))
-		x->repl->notify(x, XFRM_REPLAY_UPDATE);
+		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 }
 
 static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
@@ -416,7 +430,7 @@ static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
 			}
 		}
 		if (xfrm_aevent_is_on(net))
-			x->repl->notify(x, XFRM_REPLAY_UPDATE);
+			xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 	}
 
 	return err;
@@ -548,7 +562,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
 	replay_esn->bmp[nr] |= (1U << bitnr);
 
 	if (xfrm_aevent_is_on(xs_net(x)))
-		x->repl->notify(x, XFRM_REPLAY_UPDATE);
+		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 }
 
 #ifdef CONFIG_XFRM_OFFLOAD
@@ -585,7 +599,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk
 		x->replay.oseq = oseq;
 
 		if (xfrm_aevent_is_on(net))
-			x->repl->notify(x, XFRM_REPLAY_UPDATE);
+			xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 	}
 
 	return err;
@@ -625,7 +639,7 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff
 		}
 
 		if (xfrm_aevent_is_on(net))
-			x->repl->notify(x, XFRM_REPLAY_UPDATE);
+			xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 	}
 
 	return err;
@@ -674,7 +688,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
 		replay_esn->oseq = oseq;
 
 		if (xfrm_aevent_is_on(net))
-			x->repl->notify(x, XFRM_REPLAY_UPDATE);
+			xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
 	}
 
 	return err;
@@ -684,7 +698,6 @@ static const struct xfrm_replay xfrm_replay_legacy = {
 	.advance	= xfrm_replay_advance,
 	.check		= xfrm_replay_check,
 	.recheck	= xfrm_replay_check,
-	.notify		= xfrm_replay_notify,
 	.overflow	= xfrm_replay_overflow_offload,
 };
 
@@ -692,7 +705,6 @@ static const struct xfrm_replay xfrm_replay_bmp = {
 	.advance	= xfrm_replay_advance_bmp,
 	.check		= xfrm_replay_check_bmp,
 	.recheck	= xfrm_replay_check_bmp,
-	.notify		= xfrm_replay_notify_bmp,
 	.overflow	= xfrm_replay_overflow_offload_bmp,
 };
 
@@ -700,7 +712,6 @@ static const struct xfrm_replay xfrm_replay_esn = {
 	.advance	= xfrm_replay_advance_esn,
 	.check		= xfrm_replay_check_esn,
 	.recheck	= xfrm_replay_recheck_esn,
-	.notify		= xfrm_replay_notify_esn,
 	.overflow	= xfrm_replay_overflow_offload_esn,
 };
 #else
@@ -708,7 +719,6 @@ static const struct xfrm_replay xfrm_replay_legacy = {
 	.advance	= xfrm_replay_advance,
 	.check		= xfrm_replay_check,
 	.recheck	= xfrm_replay_check,
-	.notify		= xfrm_replay_notify,
 	.overflow	= xfrm_replay_overflow,
 };
 
@@ -716,7 +726,6 @@ static const struct xfrm_replay xfrm_replay_bmp = {
 	.advance	= xfrm_replay_advance_bmp,
 	.check		= xfrm_replay_check_bmp,
 	.recheck	= xfrm_replay_check_bmp,
-	.notify		= xfrm_replay_notify_bmp,
 	.overflow	= xfrm_replay_overflow_bmp,
 };
 
@@ -724,7 +733,6 @@ static const struct xfrm_replay xfrm_replay_esn = {
 	.advance	= xfrm_replay_advance_esn,
 	.check		= xfrm_replay_check_esn,
 	.recheck	= xfrm_replay_recheck_esn,
-	.notify		= xfrm_replay_notify_esn,
 	.overflow	= xfrm_replay_overflow_esn,
 };
 #endif
@@ -742,11 +750,14 @@ int xfrm_init_replay(struct xfrm_state *x)
 			if (replay_esn->replay_window == 0)
 				return -EINVAL;
 			x->repl = &xfrm_replay_esn;
+			x->repl_mode = XFRM_REPLAY_MODE_ESN;
 		} else {
 			x->repl = &xfrm_replay_bmp;
+			x->repl_mode = XFRM_REPLAY_MODE_BMP;
 		}
 	} else {
 		x->repl = &xfrm_replay_legacy;
+		x->repl_mode = XFRM_REPLAY_MODE_LEGACY;
 	}
 
 	return 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8f6058e56f7f..c2ce1e6f4760 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2177,7 +2177,7 @@ static void xfrm_replay_timer_handler(struct timer_list *t)
 
 	if (x->km.state == XFRM_STATE_VALID) {
 		if (xfrm_aevent_is_on(xs_net(x)))
-			x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
+			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
 		else
 			x->xflags |= XFRM_TIME_DEFER;
 	}

From c7f877833c9f361be8e88d6b140d8314e80892aa Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 18 Jun 2021 15:51:57 +0200
Subject: [PATCH 2356/3292] xfrm: replay: remove advance indirection

Similar to other patches: add a new helper to avoid
an indirection.

v2: fix 'net/xfrm/xfrm_replay.c:519:13: warning: 'seq' may be used
uninitialized in this function' warning.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |  2 +-
 net/xfrm/xfrm_input.c  |  2 +-
 net/xfrm/xfrm_replay.c | 24 +++++++++++++++---------
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9a79e41defa7..a7f997b13198 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -306,7 +306,6 @@ struct km_event {
 };
 
 struct xfrm_replay {
-	void	(*advance)(struct xfrm_state *x, __be32 net_seq);
 	int	(*check)(struct xfrm_state *x,
 			 struct sk_buff *skb,
 			 __be32 net_seq);
@@ -1722,6 +1721,7 @@ static inline int xfrm_policy_id2dir(u32 index)
 }
 
 #ifdef CONFIG_XFRM
+void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq);
 void xfrm_replay_notify(struct xfrm_state *x, int event);
 
 static inline int xfrm_aevent_is_on(struct net *net)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1158cd0311d7..c8971e4b33ab 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -665,7 +665,7 @@ resume:
 			goto drop_unlock;
 		}
 
-		x->repl->advance(x, seq);
+		xfrm_replay_advance(x, seq);
 
 		x->curlft.bytes += skb->len;
 		x->curlft.packets++;
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 5feeb65f00b3..9565b0f7d380 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -150,14 +150,26 @@ err:
 	return -EINVAL;
 }
 
-static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
+static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq);
+static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq);
+
+void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
 {
-	u32 diff;
-	u32 seq = ntohl(net_seq);
+	u32 diff, seq;
+
+	switch (x->repl_mode) {
+	case XFRM_REPLAY_MODE_LEGACY:
+		break;
+	case XFRM_REPLAY_MODE_BMP:
+		return xfrm_replay_advance_bmp(x, net_seq);
+	case XFRM_REPLAY_MODE_ESN:
+		return xfrm_replay_advance_esn(x, net_seq);
+	}
 
 	if (!x->props.replay_window)
 		return;
 
+	seq = ntohl(net_seq);
 	if (seq > x->replay.seq) {
 		diff = seq - x->replay.seq;
 		if (diff < x->props.replay_window)
@@ -695,42 +707,36 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
 }
 
 static const struct xfrm_replay xfrm_replay_legacy = {
-	.advance	= xfrm_replay_advance,
 	.check		= xfrm_replay_check,
 	.recheck	= xfrm_replay_check,
 	.overflow	= xfrm_replay_overflow_offload,
 };
 
 static const struct xfrm_replay xfrm_replay_bmp = {
-	.advance	= xfrm_replay_advance_bmp,
 	.check		= xfrm_replay_check_bmp,
 	.recheck	= xfrm_replay_check_bmp,
 	.overflow	= xfrm_replay_overflow_offload_bmp,
 };
 
 static const struct xfrm_replay xfrm_replay_esn = {
-	.advance	= xfrm_replay_advance_esn,
 	.check		= xfrm_replay_check_esn,
 	.recheck	= xfrm_replay_recheck_esn,
 	.overflow	= xfrm_replay_overflow_offload_esn,
 };
 #else
 static const struct xfrm_replay xfrm_replay_legacy = {
-	.advance	= xfrm_replay_advance,
 	.check		= xfrm_replay_check,
 	.recheck	= xfrm_replay_check,
 	.overflow	= xfrm_replay_overflow,
 };
 
 static const struct xfrm_replay xfrm_replay_bmp = {
-	.advance	= xfrm_replay_advance_bmp,
 	.check		= xfrm_replay_check_bmp,
 	.recheck	= xfrm_replay_check_bmp,
 	.overflow	= xfrm_replay_overflow_bmp,
 };
 
 static const struct xfrm_replay xfrm_replay_esn = {
-	.advance	= xfrm_replay_advance_esn,
 	.check		= xfrm_replay_check_esn,
 	.recheck	= xfrm_replay_recheck_esn,
 	.overflow	= xfrm_replay_overflow_esn,

From 25cfb8bc97c2b8447f86b1ad376ee672b6b173d4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 18 Jun 2021 15:51:58 +0200
Subject: [PATCH 2357/3292] xfrm: replay: remove recheck indirection

Adds new xfrm_replay_recheck() helper and calls it from
xfrm input path instead of the indirection.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |  4 +---
 net/xfrm/xfrm_input.c  |  2 +-
 net/xfrm/xfrm_replay.c | 22 ++++++++++++++++------
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a7f997b13198..3a219b34cb8c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -309,9 +309,6 @@ struct xfrm_replay {
 	int	(*check)(struct xfrm_state *x,
 			 struct sk_buff *skb,
 			 __be32 net_seq);
-	int	(*recheck)(struct xfrm_state *x,
-			   struct sk_buff *skb,
-			   __be32 net_seq);
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
@@ -1723,6 +1720,7 @@ static inline int xfrm_policy_id2dir(u32 index)
 #ifdef CONFIG_XFRM
 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq);
 void xfrm_replay_notify(struct xfrm_state *x, int event);
+int xfrm_replay_recheck(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
 
 static inline int xfrm_aevent_is_on(struct net *net)
 {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index c8971e4b33ab..8046ef1a6680 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -660,7 +660,7 @@ resume:
 		/* only the first xfrm gets the encap type */
 		encap_type = 0;
 
-		if (x->repl->recheck(x, skb, seq)) {
+		if (xfrm_replay_recheck(x, skb, seq)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
 			goto drop_unlock;
 		}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 9565b0f7d380..59391dc80fa3 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -519,6 +519,22 @@ static int xfrm_replay_recheck_esn(struct xfrm_state *x,
 	return xfrm_replay_check_esn(x, skb, net_seq);
 }
 
+int xfrm_replay_recheck(struct xfrm_state *x,
+			struct sk_buff *skb, __be32 net_seq)
+{
+	switch (x->repl_mode) {
+	case XFRM_REPLAY_MODE_LEGACY:
+		break;
+	case XFRM_REPLAY_MODE_BMP:
+		/* no special recheck treatment */
+		return xfrm_replay_check_bmp(x, skb, net_seq);
+	case XFRM_REPLAY_MODE_ESN:
+		return xfrm_replay_recheck_esn(x, skb, net_seq);
+	}
+
+	return xfrm_replay_check(x, skb, net_seq);
+}
+
 static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
 {
 	unsigned int bitnr, nr, i;
@@ -708,37 +724,31 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
 
 static const struct xfrm_replay xfrm_replay_legacy = {
 	.check		= xfrm_replay_check,
-	.recheck	= xfrm_replay_check,
 	.overflow	= xfrm_replay_overflow_offload,
 };
 
 static const struct xfrm_replay xfrm_replay_bmp = {
 	.check		= xfrm_replay_check_bmp,
-	.recheck	= xfrm_replay_check_bmp,
 	.overflow	= xfrm_replay_overflow_offload_bmp,
 };
 
 static const struct xfrm_replay xfrm_replay_esn = {
 	.check		= xfrm_replay_check_esn,
-	.recheck	= xfrm_replay_recheck_esn,
 	.overflow	= xfrm_replay_overflow_offload_esn,
 };
 #else
 static const struct xfrm_replay xfrm_replay_legacy = {
 	.check		= xfrm_replay_check,
-	.recheck	= xfrm_replay_check,
 	.overflow	= xfrm_replay_overflow,
 };
 
 static const struct xfrm_replay xfrm_replay_bmp = {
 	.check		= xfrm_replay_check_bmp,
-	.recheck	= xfrm_replay_check_bmp,
 	.overflow	= xfrm_replay_overflow_bmp,
 };
 
 static const struct xfrm_replay xfrm_replay_esn = {
 	.check		= xfrm_replay_check_esn,
-	.recheck	= xfrm_replay_recheck_esn,
 	.overflow	= xfrm_replay_overflow_esn,
 };
 #endif

From adfc2fdbae30d42edebad01d0ea1eed43036f1fe Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 18 Jun 2021 15:51:59 +0200
Subject: [PATCH 2358/3292] xfrm: replay: avoid replay indirection

Add and use xfrm_replay_check helper instead of indirection.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |  4 +---
 net/xfrm/xfrm_input.c  |  2 +-
 net/xfrm/xfrm_replay.c | 27 ++++++++++++++++++---------
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3a219b34cb8c..0206d80ec291 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -306,9 +306,6 @@ struct km_event {
 };
 
 struct xfrm_replay {
-	int	(*check)(struct xfrm_state *x,
-			 struct sk_buff *skb,
-			 __be32 net_seq);
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
@@ -1719,6 +1716,7 @@ static inline int xfrm_policy_id2dir(u32 index)
 
 #ifdef CONFIG_XFRM
 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq);
+int xfrm_replay_check(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
 void xfrm_replay_notify(struct xfrm_state *x, int event);
 int xfrm_replay_recheck(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 8046ef1a6680..3df0861d4390 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -612,7 +612,7 @@ lock:
 			goto drop_unlock;
 		}
 
-		if (x->repl->check(x, skb, seq)) {
+		if (xfrm_replay_check(x, skb, seq)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
 			goto drop_unlock;
 		}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 59391dc80fa3..e8703aa8d06a 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -118,8 +118,8 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
 	return err;
 }
 
-static int xfrm_replay_check(struct xfrm_state *x,
-		      struct sk_buff *skb, __be32 net_seq)
+static int xfrm_replay_check_legacy(struct xfrm_state *x,
+				    struct sk_buff *skb, __be32 net_seq)
 {
 	u32 diff;
 	u32 seq = ntohl(net_seq);
@@ -507,6 +507,21 @@ err:
 	return -EINVAL;
 }
 
+int xfrm_replay_check(struct xfrm_state *x,
+		      struct sk_buff *skb, __be32 net_seq)
+{
+	switch (x->repl_mode) {
+	case XFRM_REPLAY_MODE_LEGACY:
+		break;
+	case XFRM_REPLAY_MODE_BMP:
+		return xfrm_replay_check_bmp(x, skb, net_seq);
+	case XFRM_REPLAY_MODE_ESN:
+		return xfrm_replay_check_esn(x, skb, net_seq);
+	}
+
+	return xfrm_replay_check_legacy(x, skb, net_seq);
+}
+
 static int xfrm_replay_recheck_esn(struct xfrm_state *x,
 				   struct sk_buff *skb, __be32 net_seq)
 {
@@ -532,7 +547,7 @@ int xfrm_replay_recheck(struct xfrm_state *x,
 		return xfrm_replay_recheck_esn(x, skb, net_seq);
 	}
 
-	return xfrm_replay_check(x, skb, net_seq);
+	return xfrm_replay_check_legacy(x, skb, net_seq);
 }
 
 static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
@@ -723,32 +738,26 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
 }
 
 static const struct xfrm_replay xfrm_replay_legacy = {
-	.check		= xfrm_replay_check,
 	.overflow	= xfrm_replay_overflow_offload,
 };
 
 static const struct xfrm_replay xfrm_replay_bmp = {
-	.check		= xfrm_replay_check_bmp,
 	.overflow	= xfrm_replay_overflow_offload_bmp,
 };
 
 static const struct xfrm_replay xfrm_replay_esn = {
-	.check		= xfrm_replay_check_esn,
 	.overflow	= xfrm_replay_overflow_offload_esn,
 };
 #else
 static const struct xfrm_replay xfrm_replay_legacy = {
-	.check		= xfrm_replay_check,
 	.overflow	= xfrm_replay_overflow,
 };
 
 static const struct xfrm_replay xfrm_replay_bmp = {
-	.check		= xfrm_replay_check_bmp,
 	.overflow	= xfrm_replay_overflow_bmp,
 };
 
 static const struct xfrm_replay xfrm_replay_esn = {
-	.check		= xfrm_replay_check_esn,
 	.overflow	= xfrm_replay_overflow_esn,
 };
 #endif

From b5a1d1fe0cbb9d20ba661134a09561af1dc9ebf5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 18 Jun 2021 15:52:00 +0200
Subject: [PATCH 2359/3292] xfrm: replay: remove last replay indirection

This replaces the overflow indirection with the new xfrm_replay_overflow
helper.  After this, the 'repl' pointer in xfrm_state is no longer
needed and can be removed as well.

xfrm_replay_overflow() is added in two incarnations, one is used
when the kernel is compiled with xfrm hardware offload support enabled,
the other when its disabled.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |  8 +------
 net/xfrm/xfrm_output.c |  2 +-
 net/xfrm/xfrm_replay.c | 51 +++++++++++++++++++++---------------------
 3 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0206d80ec291..d2a0559c255f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -221,9 +221,6 @@ struct xfrm_state {
 	struct xfrm_replay_state preplay;
 	struct xfrm_replay_state_esn *preplay_esn;
 
-	/* The functions for replay detection. */
-	const struct xfrm_replay *repl;
-
 	/* replay detection mode */
 	enum xfrm_replay_mode    repl_mode;
 	/* internal flag that only holds state for delayed aevent at the
@@ -305,10 +302,6 @@ struct km_event {
 	struct net *net;
 };
 
-struct xfrm_replay {
-	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
-};
-
 struct xfrm_if_cb {
 	struct xfrm_if	*(*decode_session)(struct sk_buff *skb,
 					   unsigned short family);
@@ -1718,6 +1711,7 @@ static inline int xfrm_policy_id2dir(u32 index)
 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq);
 int xfrm_replay_check(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
 void xfrm_replay_notify(struct xfrm_state *x, int event);
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_replay_recheck(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq);
 
 static inline int xfrm_aevent_is_on(struct net *net)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 0b2975ef0668..527da58464f3 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -525,7 +525,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 			goto error;
 		}
 
-		err = x->repl->overflow(x, skb);
+		err = xfrm_replay_overflow(x, skb);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR);
 			goto error;
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index e8703aa8d06a..9277d81b344c 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -95,7 +95,7 @@ void xfrm_replay_notify(struct xfrm_state *x, int event)
 		x->xflags &= ~XFRM_TIME_DEFER;
 }
 
-static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+static int __xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = 0;
 	struct net *net = xs_net(x);
@@ -617,7 +617,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk
 	__u32 oseq = x->replay.oseq;
 
 	if (!xo)
-		return xfrm_replay_overflow(x, skb);
+		return __xfrm_replay_overflow(x, skb);
 
 	if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
 		if (!skb_is_gso(skb)) {
@@ -737,29 +737,33 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
 	return err;
 }
 
-static const struct xfrm_replay xfrm_replay_legacy = {
-	.overflow	= xfrm_replay_overflow_offload,
-};
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+{
+	switch (x->repl_mode) {
+	case XFRM_REPLAY_MODE_LEGACY:
+		break;
+	case XFRM_REPLAY_MODE_BMP:
+		return xfrm_replay_overflow_offload_bmp(x, skb);
+	case XFRM_REPLAY_MODE_ESN:
+		return xfrm_replay_overflow_offload_esn(x, skb);
+	}
 
-static const struct xfrm_replay xfrm_replay_bmp = {
-	.overflow	= xfrm_replay_overflow_offload_bmp,
-};
-
-static const struct xfrm_replay xfrm_replay_esn = {
-	.overflow	= xfrm_replay_overflow_offload_esn,
-};
+	return xfrm_replay_overflow_offload(x, skb);
+}
 #else
-static const struct xfrm_replay xfrm_replay_legacy = {
-	.overflow	= xfrm_replay_overflow,
-};
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+{
+	switch (x->repl_mode) {
+	case XFRM_REPLAY_MODE_LEGACY:
+		break;
+	case XFRM_REPLAY_MODE_BMP:
+		return xfrm_replay_overflow_bmp(x, skb);
+	case XFRM_REPLAY_MODE_ESN:
+		return xfrm_replay_overflow_esn(x, skb);
+	}
 
-static const struct xfrm_replay xfrm_replay_bmp = {
-	.overflow	= xfrm_replay_overflow_bmp,
-};
-
-static const struct xfrm_replay xfrm_replay_esn = {
-	.overflow	= xfrm_replay_overflow_esn,
-};
+	return __xfrm_replay_overflow(x, skb);
+}
 #endif
 
 int xfrm_init_replay(struct xfrm_state *x)
@@ -774,14 +778,11 @@ int xfrm_init_replay(struct xfrm_state *x)
 		if (x->props.flags & XFRM_STATE_ESN) {
 			if (replay_esn->replay_window == 0)
 				return -EINVAL;
-			x->repl = &xfrm_replay_esn;
 			x->repl_mode = XFRM_REPLAY_MODE_ESN;
 		} else {
-			x->repl = &xfrm_replay_bmp;
 			x->repl_mode = XFRM_REPLAY_MODE_BMP;
 		}
 	} else {
-		x->repl = &xfrm_replay_legacy;
 		x->repl_mode = XFRM_REPLAY_MODE_LEGACY;
 	}
 

From b22afcdf04c96ca58327784e280e10288cfd3303 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 27 Mar 2021 22:01:36 +0100
Subject: [PATCH 2360/3292] cpu/hotplug: Cure the cpusets trainwreck

Alexey and Joshua tried to solve a cpusets related hotplug problem which is
user space visible and results in unexpected behaviour for some time after
a CPU has been plugged in and the corresponding uevent was delivered.

cpusets delegate the hotplug work (rebuilding cpumasks etc.) to a
workqueue. This is done because the cpusets code has already a lock
nesting of cgroups_mutex -> cpu_hotplug_lock. A synchronous callback or
waiting for the work to finish with cpu_hotplug_lock held can and will
deadlock because that results in the reverse lock order.

As a consequence the uevent can be delivered before cpusets have consistent
state which means that a user space invocation of sched_setaffinity() to
move a task to the plugged CPU fails up to the point where the scheduled
work has been processed.

The same is true for CPU unplug, but that does not create user observable
failure (yet).

It's still inconsistent to claim that an operation is finished before it
actually is and that's the real issue at hand. uevents just make it
reliably observable.

Obviously the problem should be fixed in cpusets/cgroups, but untangling
that is pretty much impossible because according to the changelog of the
commit which introduced this 8 years ago:

 3a5a6d0c2b03("cpuset: don't nest cgroup_mutex inside get_online_cpus()")

the lock order cgroups_mutex -> cpu_hotplug_lock is a design decision and
the whole code is built around that.

So bite the bullet and invoke the relevant cpuset function, which waits for
the work to finish, in _cpu_up/down() after dropping cpu_hotplug_lock and
only when tasks are not frozen by suspend/hibernate because that would
obviously wait forever.

Waiting there with cpu_add_remove_lock, which is protecting the present
and possible CPU maps, held is not a problem at all because neither work
queues nor cpusets/cgroups have any lockchains related to that lock.

Waiting in the hotplug machinery is not problematic either because there
are already state callbacks which wait for hardware queues to drain. It
makes the operations slightly slower, but hotplug is slow anyway.

This ensures that state is consistent before returning from a hotplug
up/down operation. It's still inconsistent during the operation, but that's
a different story.

Add a large comment which explains why this is done and why this is not a
dump ground for the hack of the day to work around half thought out locking
schemes. Document also the implications vs. hotplug operations and
serialization or the lack of it.

Thanks to Alexy and Joshua for analyzing why this temporary
sched_setaffinity() failure happened.

Fixes: 3a5a6d0c2b03("cpuset: don't nest cgroup_mutex inside get_online_cpus()")
Reported-by: Alexey Klimov <aklimov@redhat.com>
Reported-by: Joshua Baker <jobaker@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Alexey Klimov <aklimov@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/87tuowcnv3.ffs@nanos.tec.linutronix.de
---
 kernel/cpu.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index e538518556f4..d2e1692d7bdf 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -32,6 +32,7 @@
 #include <linux/relay.h>
 #include <linux/slab.h>
 #include <linux/percpu-rwsem.h>
+#include <linux/cpuset.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -873,6 +874,52 @@ void __init cpuhp_threads_init(void)
 	kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
+/*
+ *
+ * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
+ * protected region.
+ *
+ * The operation is still serialized against concurrent CPU hotplug via
+ * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
+ * serialized against other hotplug related activity like adding or
+ * removing of state callbacks and state instances, which invoke either the
+ * startup or the teardown callback of the affected state.
+ *
+ * This is required for subsystems which are unfixable vs. CPU hotplug and
+ * evade lock inversion problems by scheduling work which has to be
+ * completed _before_ cpu_up()/_cpu_down() returns.
+ *
+ * Don't even think about adding anything to this for any new code or even
+ * drivers. It's only purpose is to keep existing lock order trainwrecks
+ * working.
+ *
+ * For cpu_down() there might be valid reasons to finish cleanups which are
+ * not required to be done under cpu_hotplug_lock, but that's a different
+ * story and would be not invoked via this.
+ */
+static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
+{
+	/*
+	 * cpusets delegate hotplug operations to a worker to "solve" the
+	 * lock order problems. Wait for the worker, but only if tasks are
+	 * _not_ frozen (suspend, hibernate) as that would wait forever.
+	 *
+	 * The wait is required because otherwise the hotplug operation
+	 * returns with inconsistent state, which could even be observed in
+	 * user space when a new CPU is brought up. The CPU plug uevent
+	 * would be delivered and user space reacting on it would fail to
+	 * move tasks to the newly plugged CPU up to the point where the
+	 * work has finished because up to that point the newly plugged CPU
+	 * is not assignable in cpusets/cgroups. On unplug that's not
+	 * necessarily a visible issue, but it is still inconsistent state,
+	 * which is the real problem which needs to be "fixed". This can't
+	 * prevent the transient state between scheduling the work and
+	 * returning from waiting for it.
+	 */
+	if (!tasks_frozen)
+		cpuset_wait_for_hotplug();
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 #ifndef arch_clear_mm_cpumask_cpu
 #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
@@ -1108,6 +1155,7 @@ out:
 	 */
 	lockup_detector_cleanup();
 	arch_smt_update();
+	cpu_up_down_serialize_trainwrecks(tasks_frozen);
 	return ret;
 }
 
@@ -1302,6 +1350,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 out:
 	cpus_write_unlock();
 	arch_smt_update();
+	cpu_up_down_serialize_trainwrecks(tasks_frozen);
 	return ret;
 }
 

From 4249cb7d920060dfa925d3b9f6a37f0a7c025a16 Mon Sep 17 00:00:00 2001
From: Huilong Deng <denghuilong@cdjrlc.com>
Date: Sun, 20 Jun 2021 22:29:15 +0800
Subject: [PATCH 2361/3292] printk: Remove trailing semicolon in macros

Macros should not use a trailing semicolon.

Signed-off-by: Huilong Deng <denghuilong@cdjrlc.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/dev_printk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h
index 6f009559ee54..82d3d46005a1 100644
--- a/include/linux/dev_printk.h
+++ b/include/linux/dev_printk.h
@@ -236,7 +236,7 @@ do {									\
  * using WARN/WARN_ONCE to include file/line information and a backtrace.
  */
 #define dev_WARN(dev, format, arg...) \
-	WARN(1, "%s %s: " format, dev_driver_string(dev), dev_name(dev), ## arg);
+	WARN(1, "%s %s: " format, dev_driver_string(dev), dev_name(dev), ## arg)
 
 #define dev_WARN_ONCE(dev, condition, format, arg...) \
 	WARN_ONCE(condition, "%s %s: " format, \

From 9f2470fbc4cb4583c080bb729a998933ba61aca4 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:35 -0700
Subject: [PATCH 2362/3292] skmsg: Improve udp_bpf_recvmsg() accuracy

I tried to reuse sk_msg_wait_data() for different protocols,
but it turns out it can not be simply reused. For example,
UDP actually uses two queues to receive skb:
udp_sk(sk)->reader_queue and sk->sk_receive_queue. So we have
to check both of them to know whether we have received any
packet.

Also, UDP does not lock the sock during BH Rx path, it makes
no sense for its ->recvmsg() to lock the sock. It is always
possible for ->recvmsg() to be called before packets actually
arrive in the receive queue, we just use best effort to make
it accurate here.

Fixes: 1f5be6b3b063 ("udp: Implement udp_bpf_recvmsg() for sockmap")
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-2-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h |  2 --
 net/core/skmsg.c      | 23 ---------------------
 net/ipv4/tcp_bpf.c    | 24 +++++++++++++++++++++-
 net/ipv4/udp_bpf.c    | 47 ++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index aba0f0f429be..e3d080c299f6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -126,8 +126,6 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			      struct sk_msg *msg, u32 bytes);
 int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 			     struct sk_msg *msg, u32 bytes);
-int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
-		     long timeo, int *err);
 int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 		   int len, int flags);
 
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 43ce17a6a585..f9a81b314e4c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,29 +399,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
 
-int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
-		     long timeo, int *err)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	int ret = 0;
-
-	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		return 1;
-
-	if (!timeo)
-		return ret;
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	ret = sk_wait_event(sk, &timeo,
-			    !list_empty(&psock->ingress_msg) ||
-			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	remove_wait_queue(sk_sleep(sk), &wait);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sk_msg_wait_data);
-
 /* Receive sk_msg from psock->ingress_msg to @msg. */
 int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 		   int len, int flags)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ad9d17923fc5..bb49b52d7be8 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -163,6 +163,28 @@ static bool tcp_bpf_stream_read(const struct sock *sk)
 	return !empty;
 }
 
+static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+			     long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret = 0;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return 1;
+
+	if (!timeo)
+		return ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = sk_wait_event(sk, &timeo,
+			    !list_empty(&psock->ingress_msg) ||
+			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+
 static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		    int nonblock, int flags, int *addr_len)
 {
@@ -188,7 +210,7 @@ msg_bytes_ready:
 		long timeo;
 
 		timeo = sock_rcvtimeo(sk, nonblock);
-		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		data = tcp_msg_wait_data(sk, psock, flags, timeo, &err);
 		if (data) {
 			if (!sk_psock_queue_empty(psock))
 				goto msg_bytes_ready;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 954c4591a6fd..565a70040c57 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -21,6 +21,45 @@ static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
 }
 
+static bool udp_sk_has_data(struct sock *sk)
+{
+	return !skb_queue_empty(&udp_sk(sk)->reader_queue) ||
+	       !skb_queue_empty(&sk->sk_receive_queue);
+}
+
+static bool psock_has_data(struct sk_psock *psock)
+{
+	return !skb_queue_empty(&psock->ingress_skb) ||
+	       !sk_psock_queue_empty(psock);
+}
+
+#define udp_msg_has_data(__sk, __psock)	\
+		({ udp_sk_has_data(__sk) || psock_has_data(__psock); })
+
+static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+			     long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret = 0;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return 1;
+
+	if (!timeo)
+		return ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = udp_msg_has_data(sk, psock);
+	if (!ret) {
+		wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+		ret = udp_msg_has_data(sk, psock);
+	}
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+
 static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			   int nonblock, int flags, int *addr_len)
 {
@@ -34,8 +73,7 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (unlikely(!psock))
 		return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 
-	lock_sock(sk);
-	if (sk_psock_queue_empty(psock)) {
+	if (!psock_has_data(psock)) {
 		ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 		goto out;
 	}
@@ -47,9 +85,9 @@ msg_bytes_ready:
 		long timeo;
 
 		timeo = sock_rcvtimeo(sk, nonblock);
-		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		data = udp_msg_wait_data(sk, psock, flags, timeo, &err);
 		if (data) {
-			if (!sk_psock_queue_empty(psock))
+			if (psock_has_data(psock))
 				goto msg_bytes_ready;
 			ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 			goto out;
@@ -62,7 +100,6 @@ msg_bytes_ready:
 	}
 	ret = copied;
 out:
-	release_sock(sk);
 	sk_psock_put(sk, psock);
 	return ret;
 }

From a7e65fe7d8201527129206754db1a2db6a6b2fde Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:36 -0700
Subject: [PATCH 2363/3292] selftests/bpf: Retry for EAGAIN in
 udp_redir_to_connected()

We use non-blocking sockets for testing sockmap redirections,
and got some random EAGAIN errors from UDP tests.

There is no guarantee the packet would be immediately available
to receive as soon as it is sent out, even on the local host.
For UDP, this is especially true because it does not lock the
sock during BH (unlike the TCP path). This is probably why we
only saw this error in UDP cases.

No matter how hard we try to make the queue empty check accurate,
it is always possible for recvmsg() to beat ->sk_data_ready().
Therefore, we should just retry in case of EAGAIN.

Fixes: d6378af615275 ("selftests/bpf: Add a test case for udp sockmap")
Reported-by: Jiang Wang <jiang.wang@bytedance.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-3-xiyou.wangcong@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/sockmap_listen.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 648d9ae898d2..01ab11259809 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1610,6 +1610,7 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
 	struct sockaddr_storage addr;
 	int c0, c1, p0, p1;
 	unsigned int pass;
+	int retries = 100;
 	socklen_t len;
 	int err, n;
 	u64 value;
@@ -1686,9 +1687,13 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
 	if (pass != 1)
 		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
 
+again:
 	n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
-	if (n < 0)
+	if (n < 0) {
+		if (errno == EAGAIN && retries--)
+			goto again;
 		FAIL_ERRNO("%s: read", log_prefix);
+	}
 	if (n == 0)
 		FAIL("%s: incomplete read", log_prefix);
 

From e00a5c331bf57f41fcfdc5da4f5caeafe5e54c1d Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:37 -0700
Subject: [PATCH 2364/3292] udp: Fix a memory leak in udp_read_sock()

sk_psock_verdict_recv() clones the skb and uses the clone
afterward, so udp_read_sock() should free the skb after using
it, regardless of error or not.

This fixes a real kmemleak.

Fixes: d7f571188ecf ("udp: Implement ->read_sock() for sockmap")
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-4-xiyou.wangcong@gmail.com
---
 net/ipv4/udp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1307ad0d3b9e..8091276cb85b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1798,11 +1798,13 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		if (used <= 0) {
 			if (!copied)
 				copied = used;
+			kfree_skb(skb);
 			break;
 		} else if (used <= skb->len) {
 			copied += used;
 		}
 
+		kfree_skb(skb);
 		if (!desc->count)
 			break;
 	}

From 30b9c54a707db4155735cf71f4600241c1b7b6ff Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:38 -0700
Subject: [PATCH 2365/3292] skmsg: Clear skb redirect pointer before dropping
 it

When we drop skb inside sk_psock_skb_redirect(), we have to clear
its skb->_sk_redir pointer too, otherwise kfree_skb() would
misinterpret it as a valid skb->_skb_refdst and dst_release()
would eventually complain.

Fixes: e3526bb92a20 ("skmsg: Move sk_redir from TCP_SKB_CB to skb")
Reported-by: Jiang Wang <jiang.wang@bytedance.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-5-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index f9a81b314e4c..4334720e2a04 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -843,12 +843,14 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
 	 * a socket that is in this state so we drop the skb.
 	 */
 	if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
+		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
 		return;
 	}
 	spin_lock_bh(&psock_other->ingress_lock);
 	if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
 		spin_unlock_bh(&psock_other->ingress_lock);
+		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
 		return;
 	}

From 0cf6672b23c8aa9d9274798dd63cbf6ede77ef90 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:39 -0700
Subject: [PATCH 2366/3292] skmsg: Fix a memory leak in
 sk_psock_verdict_apply()

If the dest psock does not set SK_PSOCK_TX_ENABLED,
the skb can't be queued anywhere so must be dropped.

This one is found during code review.

Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-6-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 4334720e2a04..5464477e2d3d 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -924,8 +924,13 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 			if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
 				skb_queue_tail(&psock->ingress_skb, skb);
 				schedule_work(&psock->work);
+				err = 0;
 			}
 			spin_unlock_bh(&psock->ingress_lock);
+			if (err < 0) {
+				skb_bpf_redirect_clear(skb);
+				goto out_free;
+			}
 		}
 		break;
 	case __SK_REDIRECT:

From 1581a6c1c3291a8320b080f4411345f60229976d Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:40 -0700
Subject: [PATCH 2367/3292] skmsg: Teach sk_psock_verdict_apply() to return
 errors

Currently sk_psock_verdict_apply() is void, but it handles some
error conditions too. Its caller is impossible to learn whether
it succeeds or fails, especially sk_psock_verdict_recv().

Make it return int to indicate error cases and propagate errors
to callers properly.

Fixes: ef5659280eb1 ("bpf, sockmap: Allow skipping sk_skb parser program")
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-7-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 5464477e2d3d..e3d210811db4 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -824,7 +824,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
 
-static void sk_psock_skb_redirect(struct sk_buff *skb)
+static int sk_psock_skb_redirect(struct sk_buff *skb)
 {
 	struct sk_psock *psock_other;
 	struct sock *sk_other;
@@ -835,7 +835,7 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
 	 */
 	if (unlikely(!sk_other)) {
 		kfree_skb(skb);
-		return;
+		return -EIO;
 	}
 	psock_other = sk_psock(sk_other);
 	/* This error indicates the socket is being torn down or had another
@@ -845,19 +845,20 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
 	if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
 		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
-		return;
+		return -EIO;
 	}
 	spin_lock_bh(&psock_other->ingress_lock);
 	if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
 		spin_unlock_bh(&psock_other->ingress_lock);
 		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
-		return;
+		return -EIO;
 	}
 
 	skb_queue_tail(&psock_other->ingress_skb, skb);
 	schedule_work(&psock_other->work);
 	spin_unlock_bh(&psock_other->ingress_lock);
+	return 0;
 }
 
 static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
@@ -894,14 +895,15 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
 
-static void sk_psock_verdict_apply(struct sk_psock *psock,
-				   struct sk_buff *skb, int verdict)
+static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+				  int verdict)
 {
 	struct sock *sk_other;
-	int err = -EIO;
+	int err = 0;
 
 	switch (verdict) {
 	case __SK_PASS:
+		err = -EIO;
 		sk_other = psock->sk;
 		if (sock_flag(sk_other, SOCK_DEAD) ||
 		    !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
@@ -934,13 +936,15 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 		}
 		break;
 	case __SK_REDIRECT:
-		sk_psock_skb_redirect(skb);
+		err = sk_psock_skb_redirect(skb);
 		break;
 	case __SK_DROP:
 	default:
 out_free:
 		kfree_skb(skb);
 	}
+
+	return err;
 }
 
 static void sk_psock_write_space(struct sock *sk)
@@ -1107,7 +1111,8 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
-	sk_psock_verdict_apply(psock, skb, ret);
+	if (sk_psock_verdict_apply(psock, skb, ret) < 0)
+		len = 0;
 out:
 	rcu_read_unlock();
 	return len;

From 42830571f1fd9751b3fbf38084bbb253320e185f Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:41 -0700
Subject: [PATCH 2368/3292] skmsg: Pass source psock to sk_psock_skb_redirect()

sk_psock_skb_redirect() only takes skb as a parameter, we
will need to know where this skb is from, so just pass
the source psock to this function as a new parameter.
This patch prepares for the next one.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-8-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index e3d210811db4..3aa9065811ad 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -824,7 +824,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
 
-static int sk_psock_skb_redirect(struct sk_buff *skb)
+static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
 {
 	struct sk_psock *psock_other;
 	struct sock *sk_other;
@@ -861,11 +861,12 @@ static int sk_psock_skb_redirect(struct sk_buff *skb)
 	return 0;
 }
 
-static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
+static void sk_psock_tls_verdict_apply(struct sk_buff *skb,
+				       struct sk_psock *from, int verdict)
 {
 	switch (verdict) {
 	case __SK_REDIRECT:
-		sk_psock_skb_redirect(skb);
+		sk_psock_skb_redirect(from, skb);
 		break;
 	case __SK_PASS:
 	case __SK_DROP:
@@ -889,7 +890,7 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
-	sk_psock_tls_verdict_apply(skb, psock->sk, ret);
+	sk_psock_tls_verdict_apply(skb, psock, ret);
 	rcu_read_unlock();
 	return ret;
 }
@@ -936,7 +937,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 		}
 		break;
 	case __SK_REDIRECT:
-		err = sk_psock_skb_redirect(skb);
+		err = sk_psock_skb_redirect(psock, skb);
 		break;
 	case __SK_DROP:
 	default:

From 781dd0431eb549f9cb1fdddf91a50d985febe884 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 14 Jun 2021 19:13:42 -0700
Subject: [PATCH 2369/3292] skmsg: Increase sk->sk_drops when dropping packets

It is hard to observe packet drops without increasing relevant
drop counters, here we should increase sk->sk_drops which is
a protocol-independent counter. Fortunately psock is always
associated with a struct sock, we can just use psock->sk.

Suggested-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210615021342.7416-9-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 3aa9065811ad..9b6160a191f8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -578,6 +578,12 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 	return sk_psock_skb_ingress(psock, skb);
 }
 
+static void sock_drop(struct sock *sk, struct sk_buff *skb)
+{
+	sk_drops_add(sk, skb);
+	kfree_skb(skb);
+}
+
 static void sk_psock_backlog(struct work_struct *work)
 {
 	struct sk_psock *psock = container_of(work, struct sk_psock, work);
@@ -617,7 +623,7 @@ start:
 				/* Hard errors break pipe and stop xmit. */
 				sk_psock_report_error(psock, ret ? -ret : EPIPE);
 				sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
-				kfree_skb(skb);
+				sock_drop(psock->sk, skb);
 				goto end;
 			}
 			off += ret;
@@ -708,7 +714,7 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
 
 	while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
 		skb_bpf_redirect_clear(skb);
-		kfree_skb(skb);
+		sock_drop(psock->sk, skb);
 	}
 	__sk_psock_purge_ingress_msg(psock);
 }
@@ -834,7 +840,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
 	 * return code, but then didn't set a redirect interface.
 	 */
 	if (unlikely(!sk_other)) {
-		kfree_skb(skb);
+		sock_drop(from->sk, skb);
 		return -EIO;
 	}
 	psock_other = sk_psock(sk_other);
@@ -844,14 +850,14 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
 	 */
 	if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
 		skb_bpf_redirect_clear(skb);
-		kfree_skb(skb);
+		sock_drop(from->sk, skb);
 		return -EIO;
 	}
 	spin_lock_bh(&psock_other->ingress_lock);
 	if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
 		spin_unlock_bh(&psock_other->ingress_lock);
 		skb_bpf_redirect_clear(skb);
-		kfree_skb(skb);
+		sock_drop(from->sk, skb);
 		return -EIO;
 	}
 
@@ -942,7 +948,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 	case __SK_DROP:
 	default:
 out_free:
-		kfree_skb(skb);
+		sock_drop(psock->sk, skb);
 	}
 
 	return err;
@@ -977,7 +983,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 	sk = strp->sk;
 	psock = sk_psock(sk);
 	if (unlikely(!psock)) {
-		kfree_skb(skb);
+		sock_drop(sk, skb);
 		goto out;
 	}
 	prog = READ_ONCE(psock->progs.stream_verdict);
@@ -1098,7 +1104,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	psock = sk_psock(sk);
 	if (unlikely(!psock)) {
 		len = 0;
-		kfree_skb(skb);
+		sock_drop(sk, skb);
 		goto out;
 	}
 	prog = READ_ONCE(psock->progs.stream_verdict);

From 5c10a3dbe9220ca7bcee716c13c8a8563bcb010a Mon Sep 17 00:00:00 2001
From: Jonathan Edwards <jonathan.edwards@165gc.onmicrosoft.com>
Date: Sat, 19 Jun 2021 11:10:07 -0400
Subject: [PATCH 2370/3292] libbpf: Add extra BPF_PROG_TYPE check to
 bpf_object__probe_loading

eBPF has been backported for RHEL 7 w/ kernel 3.10-940+ [0]. However only
the following program types are supported [1]:

  BPF_PROG_TYPE_KPROBE
  BPF_PROG_TYPE_TRACEPOINT
  BPF_PROG_TYPE_PERF_EVENT

For libbpf this causes an EINVAL return during the bpf_object__probe_loading
call which only checks to see if programs of type BPF_PROG_TYPE_SOCKET_FILTER
can load.

The following will try BPF_PROG_TYPE_TRACEPOINT as a fallback attempt before
erroring out. BPF_PROG_TYPE_KPROBE was not a good candidate because on some
kernels it requires knowledge of the LINUX_VERSION_CODE.

  [0] https://www.redhat.com/en/blog/introduction-ebpf-red-hat-enterprise-linux-7
  [1] https://access.redhat.com/articles/3550581

Signed-off-by: Jonathan Edwards <jonathan.edwards@165gc.onmicrosoft.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210619151007.GA6963@165gc.onmicrosoft.com
---
 tools/lib/bpf/libbpf.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 48c0ade05ab1..1e04ce724240 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4000,6 +4000,10 @@ bpf_object__probe_loading(struct bpf_object *obj)
 	attr.license = "GPL";
 
 	ret = bpf_load_program_xattr(&attr, NULL, 0);
+	if (ret < 0) {
+		attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+		ret = bpf_load_program_xattr(&attr, NULL, 0);
+	}
 	if (ret < 0) {
 		ret = errno;
 		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));

From 8d287e8292ea126d55beb29f2b3f07dfad5b6bc0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 16 Jun 2021 16:05:50 +0200
Subject: [PATCH 2371/3292] ACPI: scan: Simplify acpi_table_events_fn()

Notice that the table field of struct acpi_table_events_work is never
read and its event field is always equal to ACPI_TABLE_EVENT_LOAD, so
both of them are redundant.

Accordingly, drop struct acpi_table_events_work and use struct
work_struct directly instead of it, simplify acpi_scan_table_handler()
and rename it to acpi_scan_table_notify().

Moreover, make acpi_bus_table_handler() check the event code against
ACPI_TABLE_EVENT_LOAD before calling acpi_scan_table_notify(), so it
is not necessary to do that check in the latter.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c      |  3 ++-
 drivers/acpi/internal.h |  2 +-
 drivers/acpi/scan.c     | 38 ++++++++++----------------------------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index be7da23fad76..a3a56c00bd08 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1206,7 +1206,8 @@ void __init acpi_subsystem_init(void)
 
 static acpi_status acpi_bus_table_handler(u32 event, void *table, void *context)
 {
-	acpi_scan_table_handler(event, table, context);
+	if (event == ACPI_TABLE_EVENT_LOAD)
+		acpi_scan_table_notify();
 
 	return acpi_sysfs_table_handler(event, table, context);
 }
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index f973bbe90e5e..47947d612b43 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -88,7 +88,7 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src);
 bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent);
 
 acpi_status acpi_sysfs_table_handler(u32 event, void *table, void *context);
-void acpi_scan_table_handler(u32 event, void *table, void *context);
+void acpi_scan_table_notify(void);
 
 /* --------------------------------------------------------------------------
                      Device Node Initialization / Removal
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1c6205661000..f8eaddb83484 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2533,46 +2533,28 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
 	return count;
 }
 
-struct acpi_table_events_work {
-	struct work_struct work;
-	void *table;
-	u32 event;
-};
-
 static void acpi_table_events_fn(struct work_struct *work)
 {
-	struct acpi_table_events_work *tew;
+	acpi_scan_lock_acquire();
+	acpi_bus_scan(ACPI_ROOT_OBJECT);
+	acpi_scan_lock_release();
 
-	tew = container_of(work, struct acpi_table_events_work, work);
-
-	if (tew->event == ACPI_TABLE_EVENT_LOAD) {
-		acpi_scan_lock_acquire();
-		acpi_bus_scan(ACPI_ROOT_OBJECT);
-		acpi_scan_lock_release();
-	}
-
-	kfree(tew);
+	kfree(work);
 }
 
-void acpi_scan_table_handler(u32 event, void *table, void *context)
+void acpi_scan_table_notify(void)
 {
-	struct acpi_table_events_work *tew;
+	struct work_struct *work;
 
 	if (!acpi_scan_initialized)
 		return;
 
-	if (event != ACPI_TABLE_EVENT_LOAD)
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work)
 		return;
 
-	tew = kmalloc(sizeof(*tew), GFP_KERNEL);
-	if (!tew)
-		return;
-
-	INIT_WORK(&tew->work, acpi_table_events_fn);
-	tew->table = table;
-	tew->event = event;
-
-	schedule_work(&tew->work);
+	INIT_WORK(work, acpi_table_events_fn);
+	schedule_work(work);
 }
 
 int acpi_reconfig_notifier_register(struct notifier_block *nb)

From 4370cbf350dbaca984dbda9f9ce3fac45d6949d5 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Mon, 21 Jun 2021 09:37:27 +0800
Subject: [PATCH 2372/3292] ACPI: EC: trust DSDT GPE for certain HP laptop

On HP Pavilion Gaming Laptop 15-cx0xxx, the ECDT EC and DSDT EC share
the same port addresses but different GPEs. And the DSDT GPE is the
right one to use.

The current code duplicates DSDT EC with ECDT EC if the port addresses
are the same, and uses ECDT GPE as a result, which breaks this machine.

Introduce a new quirk for the HP laptop to trust the DSDT GPE,
and avoid duplicating even if the port addresses are the same.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=209989
Reported-and-tested-by: Shao Fu, Chen <leo881003@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index e8c5da2b964a..87c3b4a099b9 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -183,6 +183,7 @@ static struct workqueue_struct *ec_query_wq;
 
 static int EC_FLAGS_CORRECT_ECDT; /* Needs ECDT port address correction */
 static int EC_FLAGS_IGNORE_DSDT_GPE; /* Needs ECDT GPE as correction setting */
+static int EC_FLAGS_TRUST_DSDT_GPE; /* Needs DSDT GPE as correction setting */
 static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
 
 /* --------------------------------------------------------------------------
@@ -1593,7 +1594,8 @@ static int acpi_ec_add(struct acpi_device *device)
 		}
 
 		if (boot_ec && ec->command_addr == boot_ec->command_addr &&
-		    ec->data_addr == boot_ec->data_addr) {
+		    ec->data_addr == boot_ec->data_addr &&
+		    !EC_FLAGS_TRUST_DSDT_GPE) {
 			/*
 			 * Trust PNP0C09 namespace location rather than
 			 * ECDT ID. But trust ECDT GPE rather than _GPE
@@ -1816,6 +1818,18 @@ static int ec_correct_ecdt(const struct dmi_system_id *id)
 	return 0;
 }
 
+/*
+ * Some ECDTs contain wrong GPE setting, but they share the same port addresses
+ * with DSDT EC, don't duplicate the DSDT EC with ECDT EC in this case.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=209989
+ */
+static int ec_honor_dsdt_gpe(const struct dmi_system_id *id)
+{
+	pr_debug("Detected system needing DSDT GPE setting.\n");
+	EC_FLAGS_TRUST_DSDT_GPE = 1;
+	return 0;
+}
+
 /*
  * Some DSDTs contain wrong GPE setting.
  * Asus FX502VD/VE, GL702VMK, X550VXK, X580VD
@@ -1870,6 +1884,11 @@ static const struct dmi_system_id ec_dmi_table[] __initconst = {
 	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 	DMI_MATCH(DMI_PRODUCT_NAME, "X580VD"),}, NULL},
 	{
+	/* https://bugzilla.kernel.org/show_bug.cgi?id=209989 */
+	ec_honor_dsdt_gpe, "HP Pavilion Gaming Laptop 15-cx0xxx", {
+	DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Gaming Laptop 15-cx0xxx"),}, NULL},
+	{
 	ec_clear_on_resume, "Samsung hardware", {
 	DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL},
 	{},

From 603113c514e95c3350598bc3cccbd03af7ea4ab2 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Fri, 18 Jun 2021 17:15:53 +0200
Subject: [PATCH 2373/3292] vrf: do not push non-ND strict packets with a
 source LLA through packet taps again

Non-ND strict packets with a source LLA go through the packet taps
again, while non-ND strict packets with other source addresses do not,
and we can see a clone of those packets on the vrf interface (we should
not). This is due to a series of changes:

Commit 6f12fa775530[1] made non-ND strict packets not being pushed again
in the packet taps. This changed with commit 205704c618af[2] for those
packets having a source LLA, as they need a lookup with the orig_iif.

The issue now is those packets do not skip the 'vrf_ip6_rcv' function to
the end (as the ones without a source LLA) and go through the check to
call packet taps again. This check was changed by commit 6f12fa775530[1]
and do not exclude non-strict packets anymore. Packets matching
'need_strict && !is_ndisc && is_ll_src' are now being sent through the
packet taps again. This can be seen by dumping packets on the vrf
interface.

Fix this by having the same code path for all non-ND strict packets and
selectively lookup with the orig_iif for those with a source LLA. This
has the effect to revert to the pre-205704c618af[2] condition, which
should also be easier to maintain.

[1] 6f12fa775530 ("vrf: mark skb for multicast or link-local as enslaved to VRF")
[2] 205704c618af ("vrf: packets with lladdr src needs dst at input with orig_iif when needs strict")

Fixes: 205704c618af ("vrf: packets with lladdr src needs dst at input with orig_iif when needs strict")
Cc: Stephen Suryaputra <ssuryaextr@gmail.com>
Reported-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 28a6c4cfe9b8..414afcb0a23f 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1366,22 +1366,22 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 	int orig_iif = skb->skb_iif;
 	bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
 	bool is_ndisc = ipv6_ndisc_frame(skb);
-	bool is_ll_src;
 
 	/* loopback, multicast & non-ND link-local traffic; do not push through
 	 * packet taps again. Reset pkt_type for upper layers to process skb.
-	 * for packets with lladdr src, however, skip so that the dst can be
-	 * determine at input using original ifindex in the case that daddr
-	 * needs strict
+	 * For strict packets with a source LLA, determine the dst using the
+	 * original ifindex.
 	 */
-	is_ll_src = ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL;
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (need_strict && !is_ndisc && !is_ll_src)) {
+	if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
 		skb->dev = vrf_dev;
 		skb->skb_iif = vrf_dev->ifindex;
 		IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
+
 		if (skb->pkt_type == PACKET_LOOPBACK)
 			skb->pkt_type = PACKET_HOST;
+		else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)
+			vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
+
 		goto out;
 	}
 

From 89837eb4b2463c556a123437f242d6c2bc62ce81 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Thu, 17 Jun 2021 09:04:14 +0800
Subject: [PATCH 2374/3292] net: sched: add barrier to ensure correct ordering
 for lockless qdisc

The spin_trylock() was assumed to contain the implicit
barrier needed to ensure the correct ordering between
STATE_MISSED setting/clearing and STATE_MISSED checking
in commit a90c57f2cedd ("net: sched: fix packet stuck
problem for lockless qdisc").

But it turns out that spin_trylock() only has load-acquire
semantic, for strongly-ordered system(like x86), the compiler
barrier implicitly contained in spin_trylock() seems enough
to ensure the correct ordering. But for weakly-orderly system
(like arm64), the store-release semantic is needed to ensure
the correct ordering as clear_bit() and test_bit() is store
operation, see queued_spin_lock().

So add the explicit barrier to ensure the correct ordering
for the above case.

Fixes: a90c57f2cedd ("net: sched: fix packet stuck problem for lockless qdisc")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1e625519ae96..57710303908c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -163,6 +163,12 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 		if (spin_trylock(&qdisc->seqlock))
 			goto nolock_empty;
 
+		/* Paired with smp_mb__after_atomic() to make sure
+		 * STATE_MISSED checking is synchronized with clearing
+		 * in pfifo_fast_dequeue().
+		 */
+		smp_mb__before_atomic();
+
 		/* If the MISSED flag is set, it means other thread has
 		 * set the MISSED flag before second spin_trylock(), so
 		 * we can return false here to avoid multi cpus doing
@@ -180,6 +186,12 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 		 */
 		set_bit(__QDISC_STATE_MISSED, &qdisc->state);
 
+		/* spin_trylock() only has load-acquire semantic, so use
+		 * smp_mb__after_atomic() to ensure STATE_MISSED is set
+		 * before doing the second spin_trylock().
+		 */
+		smp_mb__after_atomic();
+
 		/* Retry again in case other CPU may not see the new flag
 		 * after it releases the lock at the end of qdisc_run_end().
 		 */

From 2b4cd14fd995e0a863b2ced4cba0bcd804d89ebc Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 17 Jun 2021 09:38:17 +0200
Subject: [PATCH 2375/3292] net/netif_receive_skb_core: Use migrate_disable()

The preempt disable around do_xdp_generic() has been introduced in
commit
   bbbe211c295ff ("net: rcu lock and preempt disable missing around generic xdp")

For BPF it is enough to use migrate_disable() and the code was updated
as it can be seen in commit
   3c58482a382ba ("bpf: Provide bpf_prog_run_pin_on_cpu() helper")

This is a leftover which was not converted.

Use migrate_disable() before invoking do_xdp_generic().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index ef8cf7619baf..439faadab0c2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5277,9 +5277,9 @@ another_round:
 	if (static_branch_unlikely(&generic_xdp_needed_key)) {
 		int ret2;
 
-		preempt_disable();
+		migrate_disable();
 		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
-		preempt_enable();
+		migrate_enable();
 
 		if (ret2 != XDP_PASS) {
 			ret = NET_RX_DROP;

From 536ba2e06d1aaaed8a11c30e1609281cd955082e Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Fri, 18 Jun 2021 12:35:39 -0700
Subject: [PATCH 2376/3292] hv_netvsc: Set needed_headroom according to VF

Set needed_headroom according to VF if VF needs a bigger
headroom.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f682a5572d84..382bebc2420d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2384,6 +2384,9 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	dev_hold(vf_netdev);
 	rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
 
+	if (ndev->needed_headroom < vf_netdev->needed_headroom)
+		ndev->needed_headroom = vf_netdev->needed_headroom;
+
 	vf_netdev->wanted_features = ndev->features;
 	netdev_update_features(vf_netdev);
 
@@ -2462,6 +2465,8 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 	RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
 	dev_put(vf_netdev);
 
+	ndev->needed_headroom = RNDIS_AND_PPI_SIZE;
+
 	return NOTIFY_OK;
 }
 

From baa00119d69e3318da8d99867fc1170ebddf09ce Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 18 Jun 2021 13:25:03 -0700
Subject: [PATCH 2377/3292] selftests: tls: clean up uninitialized warnings

A bunch of tests uses uninitialized stack memory as random
data to send. This is harmless but generates compiler warnings.
Explicitly init the buffers with random data.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tls.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 426d07875a48..58fea6eb588d 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -25,6 +25,18 @@
 #define TLS_PAYLOAD_MAX_LEN 16384
 #define SOL_TLS 282
 
+static void memrnd(void *s, size_t n)
+{
+	int *dword = s;
+	char *byte;
+
+	for (; n >= 4; n -= 4)
+		*dword++ = rand();
+	byte = (void *)dword;
+	while (n--)
+		*byte++ = rand();
+}
+
 FIXTURE(tls_basic)
 {
 	int fd, cfd;
@@ -308,6 +320,8 @@ TEST_F(tls, recv_max)
 	char recv_mem[TLS_PAYLOAD_MAX_LEN];
 	char buf[TLS_PAYLOAD_MAX_LEN];
 
+	memrnd(buf, sizeof(buf));
+
 	EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
 	EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
 	EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
@@ -588,6 +602,8 @@ TEST_F(tls, recvmsg_single_max)
 	struct iovec vec;
 	struct msghdr hdr;
 
+	memrnd(send_mem, sizeof(send_mem));
+
 	EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
 	vec.iov_base = (char *)recv_mem;
 	vec.iov_len = TLS_PAYLOAD_MAX_LEN;
@@ -610,6 +626,8 @@ TEST_F(tls, recvmsg_multiple)
 	struct msghdr hdr;
 	int i;
 
+	memrnd(buf, sizeof(buf));
+
 	EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
 	for (i = 0; i < msg_iovlen; i++) {
 		iov_base[i] = (char *)malloc(iov_len);
@@ -634,6 +652,8 @@ TEST_F(tls, single_send_multiple_recv)
 	char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
 	char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
 
+	memrnd(send_mem, sizeof(send_mem));
+
 	EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
 	memset(recv_mem, 0, total_len);
 

From 291c53e4dacd3a2cc3152d8af37f07f8496c594a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 18 Jun 2021 13:25:04 -0700
Subject: [PATCH 2378/3292] selftests: tls: fix chacha+bidir tests

ChaCha support did not adjust the bidirectional test.
We need to set up KTLS in reverse direction correctly,
otherwise these two cases will fail:

  tls.12_chacha.bidir
  tls.13_chacha.bidir

Fixes: 4f336e88a870 ("selftests/tls: add CHACHA20-POLY1305 to tls selftests")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tls.c | 67 ++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 58fea6eb588d..112d41d01b12 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -25,6 +25,35 @@
 #define TLS_PAYLOAD_MAX_LEN 16384
 #define SOL_TLS 282
 
+struct tls_crypto_info_keys {
+	union {
+		struct tls12_crypto_info_aes_gcm_128 aes128;
+		struct tls12_crypto_info_chacha20_poly1305 chacha20;
+	};
+	size_t len;
+};
+
+static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
+				 struct tls_crypto_info_keys *tls12)
+{
+	memset(tls12, 0, sizeof(*tls12));
+
+	switch (cipher_type) {
+	case TLS_CIPHER_CHACHA20_POLY1305:
+		tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305);
+		tls12->chacha20.info.version = tls_version;
+		tls12->chacha20.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_AES_GCM_128:
+		tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128);
+		tls12->aes128.info.version = tls_version;
+		tls12->aes128.info.cipher_type = cipher_type;
+		break;
+	default:
+		break;
+	}
+}
+
 static void memrnd(void *s, size_t n)
 {
 	int *dword = s;
@@ -145,33 +174,16 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha)
 
 FIXTURE_SETUP(tls)
 {
-	union {
-		struct tls12_crypto_info_aes_gcm_128 aes128;
-		struct tls12_crypto_info_chacha20_poly1305 chacha20;
-	} tls12;
+	struct tls_crypto_info_keys tls12;
 	struct sockaddr_in addr;
 	socklen_t len;
 	int sfd, ret;
-	size_t tls12_sz;
 
 	self->notls = false;
 	len = sizeof(addr);
 
-	memset(&tls12, 0, sizeof(tls12));
-	switch (variant->cipher_type) {
-	case TLS_CIPHER_CHACHA20_POLY1305:
-		tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305);
-		tls12.chacha20.info.version = variant->tls_version;
-		tls12.chacha20.info.cipher_type = variant->cipher_type;
-		break;
-	case TLS_CIPHER_AES_GCM_128:
-		tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128);
-		tls12.aes128.info.version = variant->tls_version;
-		tls12.aes128.info.cipher_type = variant->cipher_type;
-		break;
-	default:
-		tls12_sz = 0;
-	}
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+			     &tls12);
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -199,7 +211,7 @@ FIXTURE_SETUP(tls)
 
 	if (!self->notls) {
 		ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
-				 tls12_sz);
+				 tls12.len);
 		ASSERT_EQ(ret, 0);
 	}
 
@@ -212,7 +224,7 @@ FIXTURE_SETUP(tls)
 		ASSERT_EQ(ret, 0);
 
 		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
-				 tls12_sz);
+				 tls12.len);
 		ASSERT_EQ(ret, 0);
 	}
 
@@ -854,18 +866,17 @@ TEST_F(tls, bidir)
 	int ret;
 
 	if (!self->notls) {
-		struct tls12_crypto_info_aes_gcm_128 tls12;
+		struct tls_crypto_info_keys tls12;
 
-		memset(&tls12, 0, sizeof(tls12));
-		tls12.info.version = variant->tls_version;
-		tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+		tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+				     &tls12);
 
 		ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
+				 tls12.len);
 		ASSERT_EQ(ret, 0);
 
 		ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
+				 tls12.len);
 		ASSERT_EQ(ret, 0);
 	}
 

From faebad853455b7126450c1690f7c31e048213543 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Jun 2021 16:47:38 +0300
Subject: [PATCH 2379/3292] net: hns3: fix different snprintf() limit

This patch doesn't affect runtime at all, it's just a correctness issue.

The ptp->info.name[] buffer has 16 characters but the snprintf() limit
was capped at 32 characters.  Fortunately, HCLGE_DRIVER_NAME is "hclge"
which isn't close to 16 characters so we're fine.

Fixes: 0bf5eb788512 ("net: hns3: add support for PTP")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
index b3eb8f109dbb..3b1f84502e36 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -415,8 +415,6 @@ int hclge_ptp_get_ts_info(struct hnae3_handle *handle,
 
 static int hclge_ptp_create_clock(struct hclge_dev *hdev)
 {
-#define HCLGE_PTP_NAME_LEN	32
-
 	struct hclge_ptp *ptp;
 
 	ptp = devm_kzalloc(&hdev->pdev->dev, sizeof(*ptp), GFP_KERNEL);
@@ -424,7 +422,7 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev)
 		return -ENOMEM;
 
 	ptp->hdev = hdev;
-	snprintf(ptp->info.name, HCLGE_PTP_NAME_LEN, "%s",
+	snprintf(ptp->info.name, sizeof(ptp->info.name), "%s",
 		 HCLGE_DRIVER_NAME);
 	ptp->info.owner = THIS_MODULE;
 	ptp->info.max_adj = HCLGE_PTP_CYCLE_ADJ_MAX;

From 956c3ae411b2746c5018e0454909eb8c662b31ef Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Jun 2021 16:49:18 +0300
Subject: [PATCH 2380/3292] net: hns3: fix a double shift bug

These flags are used to set and test bits like this:

	if (!test_bit(HCLGE_PTP_FLAG_TX_EN, &ptp->flags) ||

The issue is that test_bit() takes a bit number like 1, but we are
passing BIT(1) instead and it's testing BIT(BIT(1)).  This does not
cause a problem because it is always done consistently and the bit
values are very small.

Fixes: 0bf5eb788512 ("net: hns3: add support for PTP")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
index b3ca7afdaaa6..5a202b775471 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
@@ -43,9 +43,9 @@
 #define HCLGE_PTP_SEC_H_OFFSET		32u
 #define HCLGE_PTP_SEC_L_MASK		GENMASK(31, 0)
 
-#define HCLGE_PTP_FLAG_EN		BIT(0)
-#define HCLGE_PTP_FLAG_TX_EN		BIT(1)
-#define HCLGE_PTP_FLAG_RX_EN		BIT(2)
+#define HCLGE_PTP_FLAG_EN		0
+#define HCLGE_PTP_FLAG_TX_EN		1
+#define HCLGE_PTP_FLAG_RX_EN		2
 
 struct hclge_ptp {
 	struct hclge_dev *hdev;

From 1a1100d53f12451d50bc5ebbc941517760912ab8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Jun 2021 16:50:21 +0300
Subject: [PATCH 2381/3292] net/smc: Fix ENODATA tests in
 smc_nl_get_fback_stats()

These functions return negative ENODATA but the minus sign was left out
in the tests.

Fixes: f0dd7bf5e330 ("net/smc: Add netlink support for SMC fallback statistics")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_stats.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index 614013e3b574..e80e34f7ac15 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -393,17 +393,17 @@ int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 		if (!skip_serv) {
 			rc_srv = smc_nl_get_fback_details(skb, cb, k, is_srv);
-			if (rc_srv && rc_srv != ENODATA)
+			if (rc_srv && rc_srv != -ENODATA)
 				break;
 		} else {
 			skip_serv = 0;
 		}
 		rc_clnt = smc_nl_get_fback_details(skb, cb, k, !is_srv);
-		if (rc_clnt && rc_clnt != ENODATA) {
+		if (rc_clnt && rc_clnt != -ENODATA) {
 			skip_serv = 1;
 			break;
 		}
-		if (rc_clnt == ENODATA && rc_srv == ENODATA)
+		if (rc_clnt == -ENODATA && rc_srv == -ENODATA)
 			break;
 	}
 	mutex_unlock(&net->smc.mutex_fback_rsn);

From d5fff4629beadf262559cae79012ce0dee268c1d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Jun 2021 16:51:26 +0300
Subject: [PATCH 2382/3292] net: iosm: remove an unnecessary NULL check

The address of &ipc_mux->ul_adb can't be NULL because it points to the
middle of a non-NULL struct.

Fixes: 9413491e20e1 ("net: iosm: encode or decode datagram")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: M Chetan Kumar <m.chetan.kumar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
index fbf3cab3394c..e634ffc6ec08 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
@@ -477,7 +477,7 @@ static void ipc_mux_ul_adgh_finish(struct iosm_mux *ipc_mux)
 	long long bytes;
 	char *str;
 
-	if (!ul_adb || !ul_adb->dest_skb) {
+	if (!ul_adb->dest_skb) {
 		dev_err(ipc_mux->dev, "no dest skb");
 		return;
 	}

From 753ba09aa3ea14b593b168d3ef541da00f4659f5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Jun 2021 16:52:22 +0300
Subject: [PATCH 2383/3292] net: qualcomm: rmnet: fix two pointer math bugs

We recently changed these two pointers from void pointers to struct
pointers and it breaks the pointer math so now the "txphdr" points
beyond the end of the buffer.

Fixes: 56a967c4f7e5 ("net: qualcomm: rmnet: Remove some unneeded casts")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 3ee5c1a8b46e..3676976c875b 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -168,7 +168,7 @@ static void rmnet_map_complement_ipv4_txporthdr_csum_field(struct iphdr *ip4h)
 	void *txphdr;
 	u16 *csum;
 
-	txphdr = ip4h + ip4h->ihl * 4;
+	txphdr = (void *)ip4h + ip4h->ihl * 4;
 
 	if (ip4h->protocol == IPPROTO_TCP || ip4h->protocol == IPPROTO_UDP) {
 		csum = (u16 *)rmnet_map_get_csum_field(ip4h->protocol, txphdr);
@@ -203,7 +203,7 @@ rmnet_map_complement_ipv6_txporthdr_csum_field(struct ipv6hdr *ip6h)
 	void *txphdr;
 	u16 *csum;
 
-	txphdr = ip6h + sizeof(struct ipv6hdr);
+	txphdr = ip6h + 1;
 
 	if (ip6h->nexthdr == IPPROTO_TCP || ip6h->nexthdr == IPPROTO_UDP) {
 		csum = (u16 *)rmnet_map_get_csum_field(ip6h->nexthdr, txphdr);

From 43c9a8111680043d065025510a034336417084d1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Jun 2021 16:53:26 +0300
Subject: [PATCH 2384/3292] nfp: flower-ct: check for error in
 nfp_fl_ct_offload_nft_flow()

The nfp_fl_ct_add_flow() function can fail so we need to check for
failure.

Fixes: 95255017e0a8 ("nfp: flower-ct: add nft flows to nft list")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Louis Peens <louis.peens@corigine.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/conntrack.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index 9ea77bb3b69c..273d529d43c2 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -1067,6 +1067,8 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
 						    nfp_ct_map_params);
 		if (!ct_map_ent) {
 			ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, true, extack);
+			if (IS_ERR(ct_entry))
+				return PTR_ERR(ct_entry);
 			ct_entry->type = CT_TYPE_NFT;
 			list_add(&ct_entry->list_node, &zt->nft_flows_list);
 			zt->nft_flows_count++;

From 185ab886d3fb283e837283c343bf539c371e26cf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:26 +0000
Subject: [PATCH 2385/3292] af_unix: take address assignment/hash insertion
 into a new helper

Duplicated logics in all bind variants (autobind, bind-to-path,
bind-to-abstract) gets taken into a common helper.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c9dfec7b71e7..91447624a364 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -262,6 +262,14 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 	sk_add_node(sk, list);
 }
 
+static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
+			    unsigned hash)
+{
+	__unix_remove_socket(sk);
+	smp_store_release(&unix_sk(sk)->addr, addr);
+	__unix_insert_socket(&unix_socket_table[hash], sk);
+}
+
 static inline void unix_remove_socket(struct sock *sk)
 {
 	spin_lock(&unix_table_lock);
@@ -913,9 +921,7 @@ retry:
 	}
 	addr->hash ^= sk->sk_type;
 
-	__unix_remove_socket(sk);
-	smp_store_release(&u->addr, addr);
-	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
+	__unix_set_addr(sk, addr, addr->hash);
 	spin_unlock(&unix_table_lock);
 	err = 0;
 
@@ -1018,7 +1024,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	int err;
 	unsigned int hash;
 	struct unix_address *addr;
-	struct hlist_head *list;
 	struct path path = { };
 
 	err = -EINVAL;
@@ -1070,25 +1075,20 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 		spin_lock(&unix_table_lock);
 		u->path = path;
-		list = &unix_socket_table[hash];
 	} else {
 		spin_lock(&unix_table_lock);
 		err = -EADDRINUSE;
 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
 					      sk->sk_type, hash)) {
+			spin_unlock(&unix_table_lock);
 			unix_release_addr(addr);
-			goto out_unlock;
+			goto out_up;
 		}
-
-		list = &unix_socket_table[addr->hash];
+		hash = addr->hash;
 	}
 
 	err = 0;
-	__unix_remove_socket(sk);
-	smp_store_release(&u->addr, addr);
-	__unix_insert_socket(list, sk);
-
-out_unlock:
+	__unix_set_addr(sk, addr, hash);
 	spin_unlock(&unix_table_lock);
 out_up:
 	mutex_unlock(&u->bindlock);

From c34d4582518ff83a4848c2d33a46be82e2499a5b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:27 +0000
Subject: [PATCH 2386/3292] unix_bind(): allocate addr earlier

makes it easier to massage; we do pay for that by extra work
(kmalloc+memcpy+kfree) in some error cases, but those are not
on the hot paths anyway.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 91447624a364..a984cf3d946d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1040,6 +1040,15 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (err < 0)
 		goto out;
 	addr_len = err;
+	err = -ENOMEM;
+	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
+	if (!addr)
+		goto out;
+
+	memcpy(addr->name, sunaddr, addr_len);
+	addr->len = addr_len;
+	addr->hash = hash ^ sk->sk_type;
+	refcount_set(&addr->refcnt, 1);
 
 	if (sun_path[0]) {
 		umode_t mode = S_IFSOCK |
@@ -1048,7 +1057,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		if (err) {
 			if (err == -EEXIST)
 				err = -EADDRINUSE;
-			goto out;
+			goto out_addr;
 		}
 	}
 
@@ -1060,16 +1069,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (u->addr)
 		goto out_up;
 
-	err = -ENOMEM;
-	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
-	if (!addr)
-		goto out_up;
-
-	memcpy(addr->name, sunaddr, addr_len);
-	addr->len = addr_len;
-	addr->hash = hash ^ sk->sk_type;
-	refcount_set(&addr->refcnt, 1);
-
 	if (sun_path[0]) {
 		addr->hash = UNIX_HASH_SIZE;
 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
@@ -1081,20 +1080,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
 					      sk->sk_type, hash)) {
 			spin_unlock(&unix_table_lock);
-			unix_release_addr(addr);
 			goto out_up;
 		}
 		hash = addr->hash;
 	}
 
-	err = 0;
 	__unix_set_addr(sk, addr, hash);
 	spin_unlock(&unix_table_lock);
+	addr = NULL;
+	err = 0;
 out_up:
 	mutex_unlock(&u->bindlock);
 out_put:
 	if (err)
 		path_put(&path);
+out_addr:
+	if (addr)
+		unix_release_addr(addr);
 out:
 	return err;
 }

From aee515170576609a0aa3413dc06a7f36f05a5fe2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:28 +0000
Subject: [PATCH 2387/3292] unix_bind(): separate BSD and abstract cases

We do get some duplication that way, but it's minor compared to
parts that are different.  What we get is an ability to change
locking in BSD case without making failure exits very hard to
follow.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 55 ++++++++++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a984cf3d946d..84ddfb25bc64 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1024,7 +1024,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	int err;
 	unsigned int hash;
 	struct unix_address *addr;
-	struct path path = { };
 
 	err = -EINVAL;
 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
@@ -1051,6 +1050,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	refcount_set(&addr->refcnt, 1);
 
 	if (sun_path[0]) {
+		struct path path = { };
 		umode_t mode = S_IFSOCK |
 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
 		err = unix_mknod(sun_path, mode, &path);
@@ -1059,41 +1059,54 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 				err = -EADDRINUSE;
 			goto out_addr;
 		}
-	}
 
-	err = mutex_lock_interruptible(&u->bindlock);
-	if (err)
-		goto out_put;
+		err = mutex_lock_interruptible(&u->bindlock);
+		if (err) {
+			path_put(&path);
+			goto out_addr;
+		}
 
-	err = -EINVAL;
-	if (u->addr)
-		goto out_up;
+		err = -EINVAL;
+		if (u->addr) {
+			mutex_unlock(&u->bindlock);
+			path_put(&path);
+			goto out_addr;
+		}
 
-	if (sun_path[0]) {
 		addr->hash = UNIX_HASH_SIZE;
 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 		spin_lock(&unix_table_lock);
 		u->path = path;
+		__unix_set_addr(sk, addr, hash);
+		spin_unlock(&unix_table_lock);
+		mutex_unlock(&u->bindlock);
+		addr = NULL;
+		err = 0;
 	} else {
+		err = mutex_lock_interruptible(&u->bindlock);
+		if (err)
+			goto out_addr;
+
+		err = -EINVAL;
+		if (u->addr) {
+			mutex_unlock(&u->bindlock);
+			goto out_addr;
+		}
+
 		spin_lock(&unix_table_lock);
 		err = -EADDRINUSE;
 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
 					      sk->sk_type, hash)) {
 			spin_unlock(&unix_table_lock);
-			goto out_up;
+			mutex_unlock(&u->bindlock);
+			goto out_addr;
 		}
-		hash = addr->hash;
+		__unix_set_addr(sk, addr, addr->hash);
+		spin_unlock(&unix_table_lock);
+		mutex_unlock(&u->bindlock);
+		addr = NULL;
+		err = 0;
 	}
-
-	__unix_set_addr(sk, addr, hash);
-	spin_unlock(&unix_table_lock);
-	addr = NULL;
-	err = 0;
-out_up:
-	mutex_unlock(&u->bindlock);
-out_put:
-	if (err)
-		path_put(&path);
 out_addr:
 	if (addr)
 		unix_release_addr(addr);

From fa42d910a38ee310d5c6826563dd58a08735d5b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:29 +0000
Subject: [PATCH 2388/3292] unix_bind(): take BSD and abstract address cases
 into new helpers

unix_bind_bsd() and unix_bind_abstract() respectively.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 147 +++++++++++++++++++++++----------------------
 1 file changed, 74 insertions(+), 73 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 84ddfb25bc64..d48ebfb182c7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1014,104 +1014,105 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 	return err;
 }
 
+static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
+{
+	struct unix_sock *u = unix_sk(sk);
+	struct path path = { };
+	umode_t mode = S_IFSOCK |
+	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
+	unsigned int hash;
+	int err;
+
+	err = unix_mknod(addr->name->sun_path, mode, &path);
+	if (err)
+		return err;
+
+	err = mutex_lock_interruptible(&u->bindlock);
+	if (err) {
+		path_put(&path);
+		return err;
+	}
+
+	if (u->addr) {
+		mutex_unlock(&u->bindlock);
+		path_put(&path);
+		return -EINVAL;
+	}
+
+	addr->hash = UNIX_HASH_SIZE;
+	hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+	spin_lock(&unix_table_lock);
+	u->path = path;
+	__unix_set_addr(sk, addr, hash);
+	spin_unlock(&unix_table_lock);
+	mutex_unlock(&u->bindlock);
+	return 0;
+}
+
+static int unix_bind_abstract(struct sock *sk, unsigned hash,
+			      struct unix_address *addr)
+{
+	struct unix_sock *u = unix_sk(sk);
+	int err;
+
+	err = mutex_lock_interruptible(&u->bindlock);
+	if (err)
+		return err;
+
+	if (u->addr) {
+		mutex_unlock(&u->bindlock);
+		return -EINVAL;
+	}
+
+	spin_lock(&unix_table_lock);
+	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
+				      sk->sk_type, hash)) {
+		spin_unlock(&unix_table_lock);
+		mutex_unlock(&u->bindlock);
+		return -EADDRINUSE;
+	}
+	__unix_set_addr(sk, addr, addr->hash);
+	spin_unlock(&unix_table_lock);
+	mutex_unlock(&u->bindlock);
+	return 0;
+}
+
 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
-	struct net *net = sock_net(sk);
-	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	char *sun_path = sunaddr->sun_path;
 	int err;
 	unsigned int hash;
 	struct unix_address *addr;
 
-	err = -EINVAL;
 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 	    sunaddr->sun_family != AF_UNIX)
-		goto out;
+		return -EINVAL;
 
-	if (addr_len == sizeof(short)) {
-		err = unix_autobind(sock);
-		goto out;
-	}
+	if (addr_len == sizeof(short))
+		return unix_autobind(sock);
 
 	err = unix_mkname(sunaddr, addr_len, &hash);
 	if (err < 0)
-		goto out;
+		return err;
 	addr_len = err;
-	err = -ENOMEM;
 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 	if (!addr)
-		goto out;
+		return -ENOMEM;
 
 	memcpy(addr->name, sunaddr, addr_len);
 	addr->len = addr_len;
 	addr->hash = hash ^ sk->sk_type;
 	refcount_set(&addr->refcnt, 1);
 
-	if (sun_path[0]) {
-		struct path path = { };
-		umode_t mode = S_IFSOCK |
-		       (SOCK_INODE(sock)->i_mode & ~current_umask());
-		err = unix_mknod(sun_path, mode, &path);
-		if (err) {
-			if (err == -EEXIST)
-				err = -EADDRINUSE;
-			goto out_addr;
-		}
-
-		err = mutex_lock_interruptible(&u->bindlock);
-		if (err) {
-			path_put(&path);
-			goto out_addr;
-		}
-
-		err = -EINVAL;
-		if (u->addr) {
-			mutex_unlock(&u->bindlock);
-			path_put(&path);
-			goto out_addr;
-		}
-
-		addr->hash = UNIX_HASH_SIZE;
-		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
-		spin_lock(&unix_table_lock);
-		u->path = path;
-		__unix_set_addr(sk, addr, hash);
-		spin_unlock(&unix_table_lock);
-		mutex_unlock(&u->bindlock);
-		addr = NULL;
-		err = 0;
-	} else {
-		err = mutex_lock_interruptible(&u->bindlock);
-		if (err)
-			goto out_addr;
-
-		err = -EINVAL;
-		if (u->addr) {
-			mutex_unlock(&u->bindlock);
-			goto out_addr;
-		}
-
-		spin_lock(&unix_table_lock);
-		err = -EADDRINUSE;
-		if (__unix_find_socket_byname(net, sunaddr, addr_len,
-					      sk->sk_type, hash)) {
-			spin_unlock(&unix_table_lock);
-			mutex_unlock(&u->bindlock);
-			goto out_addr;
-		}
-		__unix_set_addr(sk, addr, addr->hash);
-		spin_unlock(&unix_table_lock);
-		mutex_unlock(&u->bindlock);
-		addr = NULL;
-		err = 0;
-	}
-out_addr:
-	if (addr)
+	if (sun_path[0])
+		err = unix_bind_bsd(sk, addr);
+	else
+		err = unix_bind_abstract(sk, hash, addr);
+	if (err)
 		unix_release_addr(addr);
-out:
-	return err;
+	return err == -EEXIST ? -EADDRINUSE : err;
 }
 
 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)

From 71e6be6f7d2bada7099d79205779c4452d4fd35b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:30 +0000
Subject: [PATCH 2389/3292] fold unix_mknod() into unix_bind_bsd()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d48ebfb182c7..fe337dc98400 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -984,46 +984,38 @@ fail:
 	return NULL;
 }
 
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
 {
+	struct unix_sock *u = unix_sk(sk);
+	umode_t mode = S_IFSOCK |
+	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
+	struct path parent, path;
+	struct user_namespace *ns; // barf...
 	struct dentry *dentry;
-	struct path path;
-	int err = 0;
+	unsigned int hash;
+	int err;
+
 	/*
 	 * Get the parent directory, calculate the hash for last
 	 * component.
 	 */
-	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-	err = PTR_ERR(dentry);
+	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
 	if (IS_ERR(dentry))
-		return err;
+		return PTR_ERR(dentry);
+	ns = mnt_user_ns(parent.mnt);
 
 	/*
 	 * All right, let's create it.
 	 */
-	err = security_path_mknod(&path, dentry, mode, 0);
+	err = security_path_mknod(&parent, dentry, mode, 0);
 	if (!err) {
-		err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
-				dentry, mode, 0);
+		err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
 		if (!err) {
-			res->mnt = mntget(path.mnt);
-			res->dentry = dget(dentry);
+			path.mnt = mntget(parent.mnt);
+			path.dentry = dget(dentry);
 		}
 	}
-	done_path_create(&path, dentry);
-	return err;
-}
-
-static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
-{
-	struct unix_sock *u = unix_sk(sk);
-	struct path path = { };
-	umode_t mode = S_IFSOCK |
-	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
-	unsigned int hash;
-	int err;
-
-	err = unix_mknod(addr->name->sun_path, mode, &path);
+	done_path_create(&parent, dentry);
 	if (err)
 		return err;
 

From 56c1731b280dc71febf5df80fcac1923ea973ab8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:31 +0000
Subject: [PATCH 2390/3292] unix_bind_bsd(): move done_path_create() call after
 dealing with ->bindlock

Final preparations for doing unlink on failure past the successful
mknod.  We can't hold ->bindlock over ->mknod() or ->unlink(), since
either might do sb_start_write() (e.g. on overlayfs).  However, we
can do it while holding filesystem and VFS locks - doing
	kern_path_create()
	vfs_mknod()
	grab ->bindlock
	if u->addr had been set
		drop ->bindlock
		done_path_create
		return -EINVAL
	else
		assign the address to socket
		drop ->bindlock
		done_path_create
		return 0
would be deadlock-free.  Here we massage unix_bind_bsd() to that
form.  We are still doing equivalent transformations.

Next commit will *not* be an equivalent transformation - it will
add a call of vfs_unlink() before done_path_create() in "alread bound"
case.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fe337dc98400..25dda9ca9d15 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -989,8 +989,8 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
 	struct unix_sock *u = unix_sk(sk);
 	umode_t mode = S_IFSOCK |
 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
-	struct path parent, path;
 	struct user_namespace *ns; // barf...
+	struct path parent;
 	struct dentry *dentry;
 	unsigned int hash;
 	int err;
@@ -1008,36 +1008,32 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
 	 * All right, let's create it.
 	 */
 	err = security_path_mknod(&parent, dentry, mode, 0);
-	if (!err) {
+	if (!err)
 		err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
-		if (!err) {
-			path.mnt = mntget(parent.mnt);
-			path.dentry = dget(dentry);
-		}
-	}
-	done_path_create(&parent, dentry);
-	if (err)
+	if (err) {
+		done_path_create(&parent, dentry);
 		return err;
-
+	}
 	err = mutex_lock_interruptible(&u->bindlock);
 	if (err) {
-		path_put(&path);
+		done_path_create(&parent, dentry);
 		return err;
 	}
-
 	if (u->addr) {
 		mutex_unlock(&u->bindlock);
-		path_put(&path);
+		done_path_create(&parent, dentry);
 		return -EINVAL;
 	}
 
 	addr->hash = UNIX_HASH_SIZE;
-	hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+	hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 	spin_lock(&unix_table_lock);
-	u->path = path;
+	u->path.mnt = mntget(parent.mnt);
+	u->path.dentry = dget(dentry);
 	__unix_set_addr(sk, addr, hash);
 	spin_unlock(&unix_table_lock);
 	mutex_unlock(&u->bindlock);
+	done_path_create(&parent, dentry);
 	return 0;
 }
 

From c0c3b8d380a8f54c75786d41f6f9efbe761dac6c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:32 +0000
Subject: [PATCH 2391/3292] unix_bind_bsd(): unlink if we fail after successful
 mknod

We can do that more or less safely, since the parent is
held locked all along.  Yes, somebody might observe the
object via dcache, only to have it disappear afterwards,
but there's really no good way to prevent that.  It won't
race with other bind(2) or attempts to move the sucker
elsewhere, or put something else in its place - locked
parent prevents that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 25dda9ca9d15..42a9e0730344 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1010,20 +1010,13 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
 	err = security_path_mknod(&parent, dentry, mode, 0);
 	if (!err)
 		err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
-	if (err) {
-		done_path_create(&parent, dentry);
-		return err;
-	}
+	if (err)
+		goto out;
 	err = mutex_lock_interruptible(&u->bindlock);
-	if (err) {
-		done_path_create(&parent, dentry);
-		return err;
-	}
-	if (u->addr) {
-		mutex_unlock(&u->bindlock);
-		done_path_create(&parent, dentry);
-		return -EINVAL;
-	}
+	if (err)
+		goto out_unlink;
+	if (u->addr)
+		goto out_unlock;
 
 	addr->hash = UNIX_HASH_SIZE;
 	hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
@@ -1035,6 +1028,16 @@ static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
 	mutex_unlock(&u->bindlock);
 	done_path_create(&parent, dentry);
 	return 0;
+
+out_unlock:
+	mutex_unlock(&u->bindlock);
+	err = -EINVAL;
+out_unlink:
+	/* failed after successful mknod?  unlink what we'd created... */
+	vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
+out:
+	done_path_create(&parent, dentry);
+	return err;
 }
 
 static int unix_bind_abstract(struct sock *sk, unsigned hash,

From be752283a2a2b4bfc2df512b5d9b03a34aece252 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Jun 2021 03:50:33 +0000
Subject: [PATCH 2392/3292] __unix_find_socket_byname(): don't pass hash and
 type separately

We only care about exclusive or of those, so pass that directly.
Makes life simpler for callers as well...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 42a9e0730344..58c2f318b0a8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -286,11 +286,11 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 
 static struct sock *__unix_find_socket_byname(struct net *net,
 					      struct sockaddr_un *sunname,
-					      int len, int type, unsigned int hash)
+					      int len, unsigned int hash)
 {
 	struct sock *s;
 
-	sk_for_each(s, &unix_socket_table[hash ^ type]) {
+	sk_for_each(s, &unix_socket_table[hash]) {
 		struct unix_sock *u = unix_sk(s);
 
 		if (!net_eq(sock_net(s), net))
@@ -305,13 +305,12 @@ static struct sock *__unix_find_socket_byname(struct net *net,
 
 static inline struct sock *unix_find_socket_byname(struct net *net,
 						   struct sockaddr_un *sunname,
-						   int len, int type,
-						   unsigned int hash)
+						   int len, unsigned int hash)
 {
 	struct sock *s;
 
 	spin_lock(&unix_table_lock);
-	s = __unix_find_socket_byname(net, sunname, len, type, hash);
+	s = __unix_find_socket_byname(net, sunname, len, hash);
 	if (s)
 		sock_hold(s);
 	spin_unlock(&unix_table_lock);
@@ -899,12 +898,12 @@ static int unix_autobind(struct socket *sock)
 retry:
 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
+	addr->hash ^= sk->sk_type;
 
 	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
 
-	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
-				      addr->hash)) {
+	if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
 		spin_unlock(&unix_table_lock);
 		/*
 		 * __unix_find_socket_byname() may take long time if many names
@@ -919,7 +918,6 @@ retry:
 		}
 		goto retry;
 	}
-	addr->hash ^= sk->sk_type;
 
 	__unix_set_addr(sk, addr, addr->hash);
 	spin_unlock(&unix_table_lock);
@@ -966,7 +964,7 @@ static struct sock *unix_find_other(struct net *net,
 		}
 	} else {
 		err = -ECONNREFUSED;
-		u = unix_find_socket_byname(net, sunname, len, type, hash);
+		u = unix_find_socket_byname(net, sunname, len, type ^ hash);
 		if (u) {
 			struct dentry *dentry;
 			dentry = unix_sk(u)->path.dentry;
@@ -1040,8 +1038,7 @@ out:
 	return err;
 }
 
-static int unix_bind_abstract(struct sock *sk, unsigned hash,
-			      struct unix_address *addr)
+static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
 {
 	struct unix_sock *u = unix_sk(sk);
 	int err;
@@ -1057,7 +1054,7 @@ static int unix_bind_abstract(struct sock *sk, unsigned hash,
 
 	spin_lock(&unix_table_lock);
 	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
-				      sk->sk_type, hash)) {
+				      addr->hash)) {
 		spin_unlock(&unix_table_lock);
 		mutex_unlock(&u->bindlock);
 		return -EADDRINUSE;
@@ -1100,7 +1097,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (sun_path[0])
 		err = unix_bind_bsd(sk, addr);
 	else
-		err = unix_bind_abstract(sk, hash, addr);
+		err = unix_bind_abstract(sk, addr);
 	if (err)
 		unix_release_addr(addr);
 	return err == -EEXIST ? -EADDRINUSE : err;

From 2afd6c8b43c1ee50444d410e953d7d2adf86b5ea Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 21 Jun 2021 12:56:22 -0500
Subject: [PATCH 2393/3292] dt-bindings: net: qcom,ipa: add support for MSM8998

Add support for "qcom,msm8998-ipa", which uses IPA v3.1.

Originally proposed by AngeloGioacchino Del Regno.

Link: https://lore.kernel.org/linux-arm-msm/20210211175015.200772-8-angelogioacchino.delregno@somainline.org
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 5fe6d3dceb08..ed88ba4b94df 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -44,6 +44,7 @@ description:
 properties:
   compatible:
     enum:
+      - qcom,msm8998-ipa
       - qcom,sc7180-ipa
       - qcom,sc7280-ipa
       - qcom,sdm845-ipa

From c31d73494fa5016596f0b36aeb3e388160c9c473 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 21 Jun 2021 12:56:23 -0500
Subject: [PATCH 2394/3292] net: ipa: inter-EE interrupts aren't always
 available

The GSI inter-EE interrupts are not supported prior to IPA v3.5.
Don't attempt to initialize them in gsi_irq_setup() for hardware
that does not support them.

Originally proposed by AngeloGioacchino Del Regno.

Link: https://lore.kernel.org/netdev/20210211175015.200772-4-angelogioacchino.delregno@somainline.org
Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: AngeloGioacchino Del Regno
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c     | 13 ++++++++++---
 drivers/net/ipa/gsi_reg.h |  3 ++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index e374079603cf..efd826e508bc 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -210,9 +210,16 @@ static void gsi_irq_setup(struct gsi *gsi)
 	iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
 	iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
 
-	/* The inter-EE registers are in the non-adjusted address range */
-	iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET);
-	iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET);
+	/* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
+	if (gsi->version > IPA_VERSION_3_1) {
+		u32 offset;
+
+		/* These registers are in the non-adjusted address range */
+		offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
+		iowrite32(0, gsi->virt_raw + offset);
+		offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
+		iowrite32(0, gsi->virt_raw + offset);
+	}
 
 	iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
 }
diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h
index cb42c5ae86fa..bf9593d9eaea 100644
--- a/drivers/net/ipa/gsi_reg.h
+++ b/drivers/net/ipa/gsi_reg.h
@@ -52,7 +52,8 @@
  */
 #define GSI_EE_REG_ADJUST			0x0000d000	/* IPA v4.5+ */
 
-/* The two inter-EE IRQ register offsets are relative to gsi->virt_raw */
+/* The inter-EE IRQ registers are relative to gsi->virt_raw (IPA v3.5+) */
+
 #define GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET \
 			GSI_INTER_EE_N_SRC_CH_IRQ_MSK_OFFSET(GSI_EE_AP)
 #define GSI_INTER_EE_N_SRC_CH_IRQ_MSK_OFFSET(ee) \

From 3833d0abd2c5827fb50a6cc9f1654d7ea0b64b2c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 21 Jun 2021 12:56:24 -0500
Subject: [PATCH 2395/3292] net: ipa: disable misc clock gating for IPA v3.1

For IPA v3.1, a workaround is needed to disable gating on a MISC
clock.  I have no further explanation, but this is what the
downstream code (msm-4.4) does.

This was suggested in a patch from AngeloGioacchino Del Regno.

Link: https://lore.kernel.org/netdev/20210211175015.200772-2-angelogioacchino.delregno@somainline.org
Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: AngeloGioacchino Del Regno
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index f82130db32f6..20a83c7f671f 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -400,16 +400,20 @@ static void ipa_hardware_config(struct ipa *ipa, const struct ipa_data *data)
 
 	/* Implement some hardware workarounds */
 	if (version >= IPA_VERSION_4_0 && version < IPA_VERSION_4_5) {
-		/* Enable open global clocks (not needed for IPA v4.5) */
-		val = GLOBAL_FMASK;
-		val |= GLOBAL_2X_CLK_FMASK;
-		iowrite32(val, ipa->reg_virt + IPA_REG_CLKON_CFG_OFFSET);
-
 		/* Disable PA mask to allow HOLB drop */
 		val = ioread32(ipa->reg_virt + IPA_REG_TX_CFG_OFFSET);
 		val &= ~PA_MASK_EN_FMASK;
 		iowrite32(val, ipa->reg_virt + IPA_REG_TX_CFG_OFFSET);
+
+		/* Enable open global clocks in the CLKON configuration */
+		val = GLOBAL_FMASK | GLOBAL_2X_CLK_FMASK;
+	} else if (version == IPA_VERSION_3_1) {
+		val = MISC_FMASK;	/* Disable MISC clock gating */
+	} else {
+		val = 0;		/* No CLKON configuration needed */
 	}
+	if (val)
+		iowrite32(val, ipa->reg_virt + IPA_REG_CLKON_CFG_OFFSET);
 
 	ipa_hardware_config_comp(ipa);
 

From 110971d1ee4db10f48374a9303e86db158da354e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 21 Jun 2021 12:56:25 -0500
Subject: [PATCH 2396/3292] net: ipa: FLAVOR_0 register doesn't exist until IPA
 v3.5

The FLAVOR_0 version first appears in IPA v3.5, so avoid attempting
to read it for versions prior to that.

This register contains a concise definition of the number and
direction of endpoints supported by the hardware, and without it
we can't verify endpoint configuration in ipa_endpoint_config().
In this case, just indicate that any endpoint number is available
for use.

Originally proposed by AngeloGioacchino Del Regno.

Link: https://lore.kernel.org/netdev/20210211175015.200772-3-angelogioacchino.delregno@somainline.org
Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: AngeloGioacchino Del Regno
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 3520852936ed..ab02669bae4e 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1731,6 +1731,21 @@ int ipa_endpoint_config(struct ipa *ipa)
 	u32 max;
 	u32 val;
 
+	/* Prior to IPAv3.5, the FLAVOR_0 register was not supported.
+	 * Furthermore, the endpoints were not grouped such that TX
+	 * endpoint numbers started with 0 and RX endpoints had numbers
+	 * higher than all TX endpoints, so we can't do the simple
+	 * direction check used for newer hardware below.
+	 *
+	 * For hardware that doesn't support the FLAVOR_0 register,
+	 * just set the available mask to support any endpoint, and
+	 * assume the configuration is valid.
+	 */
+	if (ipa->version < IPA_VERSION_3_5) {
+		ipa->available = ~0;
+		return 0;
+	}
+
 	/* Find out about the endpoints supplied by the hardware, and ensure
 	 * the highest one doesn't exceed the number we support.
 	 */

From bae70a803a771d0f1e55cfe1db195d8af2765dd8 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 21 Jun 2021 12:56:26 -0500
Subject: [PATCH 2397/3292] net: ipa: introduce gsi_ring_setup()

Prior to IPA v3.5.1, there is no HW_PARAM_2 GSI register, which we
use to determine the number of channels and endpoints per execution
environment.  In that case, we will just assume the number supported
is the maximum supported by the driver.

Introduce gsi_ring_setup() to encapsulate the code that determines
the number of channels and endpoints.

Update GSI_EVT_RING_COUNT_MAX so it is big enough to handle any
available channel for all supported hardware (IPA v4.9 can have 23
channels and 24 event rings).

Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: AngeloGioacchino Del Regno
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c | 77 ++++++++++++++++++++++++++++---------------
 drivers/net/ipa/gsi.h |  2 +-
 2 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index efd826e508bc..427c68b2ad8f 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -224,6 +224,51 @@ static void gsi_irq_setup(struct gsi *gsi)
 	iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
 }
 
+/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
+static int gsi_ring_setup(struct gsi *gsi)
+{
+	struct device *dev = gsi->dev;
+	u32 count;
+	u32 val;
+
+	if (gsi->version < IPA_VERSION_3_5_1) {
+		/* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
+		gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
+		gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
+
+		return 0;
+	}
+
+	val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
+
+	count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
+	if (!count) {
+		dev_err(dev, "GSI reports zero channels supported\n");
+		return -EINVAL;
+	}
+	if (count > GSI_CHANNEL_COUNT_MAX) {
+		dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
+			 GSI_CHANNEL_COUNT_MAX, count);
+		count = GSI_CHANNEL_COUNT_MAX;
+	}
+	gsi->channel_count = count;
+
+	count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
+	if (!count) {
+		dev_err(dev, "GSI reports zero event rings supported\n");
+		return -EINVAL;
+	}
+	if (count > GSI_EVT_RING_COUNT_MAX) {
+		dev_warn(dev,
+			 "limiting to %u event rings; hardware supports %u\n",
+			 GSI_EVT_RING_COUNT_MAX, count);
+		count = GSI_EVT_RING_COUNT_MAX;
+	}
+	gsi->evt_ring_count = count;
+
+	return 0;
+}
+
 /* Event ring commands are performed one at a time.  Their completion
  * is signaled by the event ring control GSI interrupt type, which is
  * only enabled when we issue an event ring command.  Only the event
@@ -1834,43 +1879,21 @@ static void gsi_channel_teardown(struct gsi *gsi)
 /* Setup function for GSI.  GSI firmware must be loaded and initialized */
 int gsi_setup(struct gsi *gsi)
 {
-	struct device *dev = gsi->dev;
 	u32 val;
+	int ret;
 
 	/* Here is where we first touch the GSI hardware */
 	val = ioread32(gsi->virt + GSI_GSI_STATUS_OFFSET);
 	if (!(val & ENABLED_FMASK)) {
-		dev_err(dev, "GSI has not been enabled\n");
+		dev_err(gsi->dev, "GSI has not been enabled\n");
 		return -EIO;
 	}
 
 	gsi_irq_setup(gsi);		/* No matching teardown required */
 
-	val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
-
-	gsi->channel_count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
-	if (!gsi->channel_count) {
-		dev_err(dev, "GSI reports zero channels supported\n");
-		return -EINVAL;
-	}
-	if (gsi->channel_count > GSI_CHANNEL_COUNT_MAX) {
-		dev_warn(dev,
-			 "limiting to %u channels; hardware supports %u\n",
-			 GSI_CHANNEL_COUNT_MAX, gsi->channel_count);
-		gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
-	}
-
-	gsi->evt_ring_count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
-	if (!gsi->evt_ring_count) {
-		dev_err(dev, "GSI reports zero event rings supported\n");
-		return -EINVAL;
-	}
-	if (gsi->evt_ring_count > GSI_EVT_RING_COUNT_MAX) {
-		dev_warn(dev,
-			 "limiting to %u event rings; hardware supports %u\n",
-			 GSI_EVT_RING_COUNT_MAX, gsi->evt_ring_count);
-		gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
-	}
+	ret = gsi_ring_setup(gsi);	/* No matching teardown required */
+	if (ret)
+		return ret;
 
 	/* Initialize the error log */
 	iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET);
diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h
index d5996bdb20ef..81cd7b07f6e1 100644
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -17,7 +17,7 @@
 
 /* Maximum number of channels and event rings supported by the driver */
 #define GSI_CHANNEL_COUNT_MAX	23
-#define GSI_EVT_RING_COUNT_MAX	20
+#define GSI_EVT_RING_COUNT_MAX	24
 
 /* Maximum TLV FIFO size for a channel; 64 here is arbitrary (and high) */
 #define GSI_TLV_MAX		64

From 1bb1a117878b925dcb959830cd3384c11ed0dea5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 21 Jun 2021 12:56:27 -0500
Subject: [PATCH 2398/3292] net: ipa: add IPA v3.1 configuration data

Add support for the MSM8998 SoC, which includes IPA version 3.1.

Originally proposed by AngeloGioacchino Del Regno.

Link: https://lore.kernel.org/netdev/20210211175015.200772-6-angelogioacchino.delregno@somainline.org
Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: AngeloGioacchino Del Regno
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Makefile        |   6 +-
 drivers/net/ipa/ipa_data-v3.1.c | 533 ++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_data.h      |   1 +
 drivers/net/ipa/ipa_main.c      |   4 +
 4 files changed, 541 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ipa/ipa_data-v3.1.c

diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index bd34fce8f6e6..506f8d5cd4ee 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -10,6 +10,6 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o \
 				ipa_sysfs.o
 
-ipa-y			+=	ipa_data-v3.5.1.o ipa_data-v4.2.o \
-				ipa_data-v4.5.o ipa_data-v4.9.o \
-				ipa_data-v4.11.o
+ipa-y			+=	ipa_data-v3.1.o ipa_data-v3.5.1.o \
+				ipa_data-v4.2.o ipa_data-v4.5.o \
+				ipa_data-v4.9.o ipa_data-v4.11.o
diff --git a/drivers/net/ipa/ipa_data-v3.1.c b/drivers/net/ipa/ipa_data-v3.1.c
new file mode 100644
index 000000000000..4c28189462a7
--- /dev/null
+++ b/drivers/net/ipa/ipa_data-v3.1.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2019-2021 Linaro Ltd.
+ */
+
+#include <linux/log2.h>
+
+#include "gsi.h"
+#include "ipa_data.h"
+#include "ipa_endpoint.h"
+#include "ipa_mem.h"
+
+/** enum ipa_resource_type - IPA resource types for an SoC having IPA v3.1 */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_HDR_SECTORS,
+	IPA_RESOURCE_TYPE_SRC_HDRI1_BUFFER,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HDRI2_BUFFERS,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DATA_SECTOR_LISTS,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
+/* Resource groups used for an SoC having IPA v3.1 */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_UL		= 0,
+	IPA_RSRC_GROUP_SRC_DL,
+	IPA_RSRC_GROUP_SRC_DIAG,
+	IPA_RSRC_GROUP_SRC_DMA,
+	IPA_RSRC_GROUP_SRC_UNUSED,
+	IPA_RSRC_GROUP_SRC_UC_RX_Q,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_UL		= 0,
+	IPA_RSRC_GROUP_DST_DL,
+	IPA_RSRC_GROUP_DST_DIAG_DPL,
+	IPA_RSRC_GROUP_DST_DMA,
+	IPA_RSRC_GROUP_DST_Q6ZIP_GENERAL,
+	IPA_RSRC_GROUP_DST_Q6ZIP_ENGINE,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
+};
+
+/* QSB configuration data for an SoC having IPA v3.1 */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes	= 8,
+		.max_reads	= 8,
+	},
+	[IPA_QSB_MASTER_PCIE] = {
+		.max_writes	= 2,
+		.max_reads	= 8,
+	},
+};
+
+/* Endpoint data for an SoC having IPA v3.1 */
+static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
+	[IPA_ENDPOINT_AP_COMMAND_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 6,
+		.endpoint_id	= 22,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 18,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL,
+				.dma_mode	= true,
+				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_LAN_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 7,
+		.endpoint_id	= 15,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 8,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL,
+				.aggregation	= true,
+				.status_enable	= true,
+				.rx = {
+					.pad_align	= ilog2(sizeof(u32)),
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 5,
+		.endpoint_id	= 3,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 512,
+			.event_count	= 512,
+			.tlv_count	= 16,
+		},
+		.endpoint = {
+			.filter_support	= true,
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL,
+				.checksum	= true,
+				.qmap		= true,
+				.status_enable	= true,
+				.tx = {
+					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
+					.status_endpoint =
+						IPA_ENDPOINT_MODEM_AP_RX,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 8,
+		.endpoint_id	= 16,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 8,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_DL,
+				.checksum	= true,
+				.qmap		= true,
+				.aggregation	= true,
+				.rx = {
+					.aggr_close_eof	= true,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_MODEM_LAN_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 4,
+		.endpoint_id	= 9,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 0,
+		.endpoint_id	= 5,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_RX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 5,
+		.endpoint_id	= 18,
+		.toward_ipa	= false,
+	},
+};
+
+/* Source resource configuration data for an SoC having IPA v3.1 */
+static const struct ipa_resource ipa_resource_src[] = {
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 3,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 3,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 1,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 1,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 2,	.max = 255,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HDR_SECTORS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,	.max = 255,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HDRI1_BUFFER] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,	.max = 255,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 14,	.max = 14,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 16,	.max = 16,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 5,	.max = 5,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 5,	.max = 5,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 19,	.max = 19,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 26,	.max = 26,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 5,	.max = 5,	/* 3 downstream */
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 5,	.max = 5,	/* 7 downstream */
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HDRI2_BUFFERS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,	.max = 255,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,	.max = 255,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL] = {
+			.min = 19,	.max = 19,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DL] = {
+			.min = 26,	.max = 26,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DIAG] = {
+			.min = 5,	.max = 5,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 5,	.max = 5,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+};
+
+/* Destination resource configuration data for an SoC having IPA v3.1 */
+static const struct ipa_resource ipa_resource_dst[] = {
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL] = {
+			.min = 3,	.max = 3,	/* 2 downstream */
+		},
+		.limits[IPA_RSRC_GROUP_DST_DL] = {
+			.min = 3,	.max = 3,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DIAG_DPL] = {
+			.min = 1,	.max = 1,	/* 0 downstream */
+		},
+		/* IPA_RSRC_GROUP_DST_DMA uses 2 downstream */
+		.limits[IPA_RSRC_GROUP_DST_Q6ZIP_GENERAL] = {
+			.min = 3,	.max = 3,
+		},
+		.limits[IPA_RSRC_GROUP_DST_Q6ZIP_ENGINE] = {
+			.min = 3,	.max = 3,
+		},
+	},
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTOR_LISTS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DIAG_DPL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DMA] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_DST_Q6ZIP_GENERAL] = {
+			.min = 0,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_DST_Q6ZIP_ENGINE] = {
+			.min = 0,	.max = 255,
+		},
+	},
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL] = {
+			.min = 1,	.max = 1,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DL] = {
+			.min = 1,	.max = 1,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DIAG_DPL] = {
+			.min = 1,	.max = 1,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DMA] = {
+			.min = 1,	.max = 1,
+		},
+		.limits[IPA_RSRC_GROUP_DST_Q6ZIP_GENERAL] = {
+			.min = 1,	.max = 1,
+		},
+	},
+};
+
+/* Resource configuration data for an SoC having IPA v3.1 */
+static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
+	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
+	.resource_src		= ipa_resource_src,
+	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
+	.resource_dst		= ipa_resource_dst,
+};
+
+/* IPA-resident memory region data for an SoC having IPA v3.1 */
+static const struct ipa_mem ipa_mem_local_data[] = {
+	{
+		.id		= IPA_MEM_UC_SHARED,
+		.offset		= 0x0000,
+		.size		= 0x0080,
+		.canary_count	= 0,
+	},
+	{
+		.id		= IPA_MEM_UC_INFO,
+		.offset		= 0x0080,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	{
+		.id		= IPA_MEM_V4_FILTER_HASHED,
+		.offset		= 0x0288,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_V4_FILTER,
+		.offset		= 0x0308,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_V6_FILTER_HASHED,
+		.offset		= 0x0388,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_V6_FILTER,
+		.offset		= 0x0408,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_V4_ROUTE_HASHED,
+		.offset		= 0x0488,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_V4_ROUTE,
+		.offset		= 0x0508,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_V6_ROUTE_HASHED,
+		.offset		= 0x0588,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_V6_ROUTE,
+		.offset		= 0x0608,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_MODEM_HEADER,
+		.offset		= 0x0688,
+		.size		= 0x0140,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_MODEM_PROC_CTX,
+		.offset		= 0x07d0,
+		.size		= 0x0200,
+		.canary_count	= 2,
+	},
+	{
+		.id		= IPA_MEM_AP_PROC_CTX,
+		.offset		= 0x09d0,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	{
+		.id		= IPA_MEM_MODEM,
+		.offset		= 0x0bd8,
+		.size		= 0x1424,
+		.canary_count	= 0,
+	},
+	{
+		.id		= IPA_MEM_END_MARKER,
+		.offset		= 0x2000,
+		.size		= 0,
+		.canary_count	= 1,
+	},
+};
+
+/* Memory configuration data for an SoC having IPA v3.1 */
+static const struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x146bd000,
+	.imem_size	= 0x00002000,
+	.smem_id	= 497,
+	.smem_size	= 0x00002000,
+};
+
+/* Interconnect bandwidths are in 1000 byte/second units */
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
+	{
+		.name			= "memory",
+		.peak_bandwidth		= 640000,	/* 640 MBps */
+		.average_bandwidth	= 80000,	/* 80 MBps */
+	},
+	{
+		.name			= "imem",
+		.peak_bandwidth		= 640000,	/* 640 MBps */
+		.average_bandwidth	= 80000,	/* 80 MBps */
+	},
+	/* Average bandwidth is unused for the next interconnect */
+	{
+		.name			= "config",
+		.peak_bandwidth		= 80000,	/* 80 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+};
+
+/* Clock and interconnect configuration data for an SoC having IPA v3.1 */
+static const struct ipa_clock_data ipa_clock_data = {
+	.core_clock_rate	= 16 * 1000 * 1000,	/* Hz */
+	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
+	.interconnect_data	= ipa_interconnect_data,
+};
+
+/* Configuration data for an SoC having IPA v3.1 */
+const struct ipa_data ipa_data_v3_1 = {
+	.version	= IPA_VERSION_3_1,
+	.backward_compat = BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
+	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
+	.endpoint_data	= ipa_gsi_endpoint_data,
+	.resource_data	= &ipa_resource_data,
+	.mem_data	= &ipa_mem_data,
+	.clock_data	= &ipa_clock_data,
+};
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 5c4c8d72d7d8..5bc244c8f94e 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -300,6 +300,7 @@ struct ipa_data {
 	const struct ipa_clock_data *clock_data;
 };
 
+extern const struct ipa_data ipa_data_v3_1;
 extern const struct ipa_data ipa_data_v3_5_1;
 extern const struct ipa_data ipa_data_v4_2;
 extern const struct ipa_data ipa_data_v4_5;
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 20a83c7f671f..9810c61a0320 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -578,6 +578,10 @@ out_release_firmware:
 }
 
 static const struct of_device_id ipa_match[] = {
+	{
+		.compatible	= "qcom,msm8998-ipa",
+		.data		= &ipa_data_v3_1,
+	},
 	{
 		.compatible	= "qcom,sdm845-ipa",
 		.data		= &ipa_data_v3_5_1,

From d51ea60e01f9fab3269e18d46657a9ae0c2fa3ad Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 21 Jun 2021 10:50:39 +0300
Subject: [PATCH 2399/3292] mlxsw: reg: Add bank number to MCIA register

Add bank number to MCIA (Management Cable Info Access) register in order
to allow access to banked pages on EEPROMs using CMIS (Common Management
Interface Specification) memory map.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 93f1db3927af..cd60a0f91933 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9714,6 +9714,12 @@ MLXSW_ITEM32(reg, mcia, page_number, 0x04, 16, 8);
  */
 MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16);
 
+/* reg_mcia_bank_number
+ * Bank number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mcia, bank_number, 0x08, 16, 8);
+
 /* reg_mcia_size
  * Number of bytes to read/write (up to 48 bytes).
  * Access: RW

From cecefb3a6eeb5d835abd051e67e27e1506289ccf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 21 Jun 2021 10:50:40 +0300
Subject: [PATCH 2400/3292] mlxsw: reg: Document possible MCIA status values

Will be used to emit meaningful messages to user space via extack in a
subsequent patch.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index cd60a0f91933..6fbda6ebd590 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9690,6 +9690,20 @@ MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1);
  */
 MLXSW_ITEM32(reg, mcia, module, 0x00, 16, 8);
 
+enum {
+	MLXSW_REG_MCIA_STATUS_GOOD = 0,
+	/* No response from module's EEPROM. */
+	MLXSW_REG_MCIA_STATUS_NO_EEPROM_MODULE = 1,
+	/* Module type not supported by the device. */
+	MLXSW_REG_MCIA_STATUS_MODULE_NOT_SUPPORTED = 2,
+	/* No module present indication. */
+	MLXSW_REG_MCIA_STATUS_MODULE_NOT_CONNECTED = 3,
+	/* Error occurred while trying to access module's EEPROM using I2C. */
+	MLXSW_REG_MCIA_STATUS_I2C_ERROR = 9,
+	/* Module is disabled. */
+	MLXSW_REG_MCIA_STATUS_MODULE_DISABLED = 16,
+};
+
 /* reg_mcia_status
  * Module status.
  * Access: RO

From 1e27b9e408039af6210708f76c6fe735c2c2c9c1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 21 Jun 2021 10:50:41 +0300
Subject: [PATCH 2401/3292] mlxsw: core: Add support for module EEPROM read by
 page

Add support for ethtool_ops::get_module_eeprom_by_page() which allows
user space to read transceiver module EEPROM based on passed parameters.

The I2C address is not validated in order to avoid module-specific code.
In case of wrong address, error will be returned from device's firmware.

Tested by comparing output with legacy method (ioctl) output.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/core_env.c    | 74 +++++++++++++++++++
 .../net/ethernet/mellanox/mlxsw/core_env.h    |  7 ++
 drivers/net/ethernet/mellanox/mlxsw/minimal.c | 13 ++++
 .../mellanox/mlxsw/spectrum_ethtool.c         | 14 ++++
 4 files changed, 108 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index b3ca5bd33a7f..4a0dbdb6730b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <linux/err.h>
+#include <linux/ethtool.h>
 #include <linux/sfp.h>
 
 #include "core.h"
@@ -315,6 +316,79 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev,
 }
 EXPORT_SYMBOL(mlxsw_env_get_module_eeprom);
 
+static int mlxsw_env_mcia_status_process(const char *mcia_pl,
+					 struct netlink_ext_ack *extack)
+{
+	u8 status = mlxsw_reg_mcia_status_get(mcia_pl);
+
+	switch (status) {
+	case MLXSW_REG_MCIA_STATUS_GOOD:
+		return 0;
+	case MLXSW_REG_MCIA_STATUS_NO_EEPROM_MODULE:
+		NL_SET_ERR_MSG_MOD(extack, "No response from module's EEPROM");
+		return -EIO;
+	case MLXSW_REG_MCIA_STATUS_MODULE_NOT_SUPPORTED:
+		NL_SET_ERR_MSG_MOD(extack, "Module type not supported by the device");
+		return -EOPNOTSUPP;
+	case MLXSW_REG_MCIA_STATUS_MODULE_NOT_CONNECTED:
+		NL_SET_ERR_MSG_MOD(extack, "No module present indication");
+		return -EIO;
+	case MLXSW_REG_MCIA_STATUS_I2C_ERROR:
+		NL_SET_ERR_MSG_MOD(extack, "Error occurred while trying to access module's EEPROM using I2C");
+		return -EIO;
+	case MLXSW_REG_MCIA_STATUS_MODULE_DISABLED:
+		NL_SET_ERR_MSG_MOD(extack, "Module is disabled");
+		return -EIO;
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "Unknown error");
+		return -EIO;
+	}
+}
+
+int
+mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
+				    const struct ethtool_module_eeprom *page,
+				    struct netlink_ext_ack *extack)
+{
+	u32 bytes_read = 0;
+	u16 device_addr;
+
+	/* Offset cannot be larger than 2 * ETH_MODULE_EEPROM_PAGE_LEN */
+	device_addr = page->offset;
+
+	while (bytes_read < page->length) {
+		char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
+		char mcia_pl[MLXSW_REG_MCIA_LEN];
+		u8 size;
+		int err;
+
+		size = min_t(u8, page->length - bytes_read,
+			     MLXSW_REG_MCIA_EEPROM_SIZE);
+
+		mlxsw_reg_mcia_pack(mcia_pl, module, 0, page->page,
+				    device_addr + bytes_read, size,
+				    page->i2c_address);
+		mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank);
+
+		err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to access module's EEPROM");
+			return err;
+		}
+
+		err = mlxsw_env_mcia_status_process(mcia_pl, extack);
+		if (err)
+			return err;
+
+		mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+		memcpy(page->data + bytes_read, eeprom_tmp, size);
+		bytes_read += size;
+	}
+
+	return bytes_read;
+}
+EXPORT_SYMBOL(mlxsw_env_get_module_eeprom_by_page);
+
 static int mlxsw_env_module_has_temp_sensor(struct mlxsw_core *mlxsw_core,
 					    u8 module,
 					    bool *p_has_temp_sensor)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
index 2b23f8a87862..0bf5bd0f8a7e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
@@ -4,6 +4,8 @@
 #ifndef _MLXSW_CORE_ENV_H
 #define _MLXSW_CORE_ENV_H
 
+#include <linux/ethtool.h>
+
 struct ethtool_modinfo;
 struct ethtool_eeprom;
 
@@ -17,6 +19,11 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev,
 				struct mlxsw_core *mlxsw_core, int module,
 				struct ethtool_eeprom *ee, u8 *data);
 
+int
+mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
+				    const struct ethtool_module_eeprom *page,
+				    struct netlink_ext_ack *extack);
+
 int
 mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module,
 				      u64 *p_counter);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index 68102726c6a7..d9d56c44e994 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -112,10 +112,23 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
 					   ee, data);
 }
 
+static int
+mlxsw_m_get_module_eeprom_by_page(struct net_device *netdev,
+				  const struct ethtool_module_eeprom *page,
+				  struct netlink_ext_ack *extack)
+{
+	struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev);
+	struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core;
+
+	return mlxsw_env_get_module_eeprom_by_page(core, mlxsw_m_port->module,
+						   page, extack);
+}
+
 static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
 	.get_drvinfo		= mlxsw_m_module_get_drvinfo,
 	.get_module_info	= mlxsw_m_get_module_info,
 	.get_module_eeprom	= mlxsw_m_get_module_eeprom,
+	.get_module_eeprom_by_page = mlxsw_m_get_module_eeprom_by_page,
 };
 
 static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index c8061beed6db..267590a0eee7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -1050,6 +1050,19 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
 	return err;
 }
 
+static int
+mlxsw_sp_get_module_eeprom_by_page(struct net_device *dev,
+				   const struct ethtool_module_eeprom *page,
+				   struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	u8 module = mlxsw_sp_port->mapping.module;
+
+	return mlxsw_env_get_module_eeprom_by_page(mlxsw_sp->core, module, page,
+						   extack);
+}
+
 static int
 mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
 {
@@ -1199,6 +1212,7 @@ const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 	.set_link_ksettings		= mlxsw_sp_port_set_link_ksettings,
 	.get_module_info		= mlxsw_sp_get_module_info,
 	.get_module_eeprom		= mlxsw_sp_get_module_eeprom,
+	.get_module_eeprom_by_page	= mlxsw_sp_get_module_eeprom_by_page,
 	.get_ts_info			= mlxsw_sp_get_ts_info,
 	.get_eth_phy_stats		= mlxsw_sp_get_eth_phy_stats,
 	.get_eth_mac_stats		= mlxsw_sp_get_eth_mac_stats,

From 4f7d2247f839c8cf266662a83238c85ef21ea9c6 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 19 Jun 2021 15:28:36 +0800
Subject: [PATCH 2402/3292] net: c101: add blank line after declarations

This patch fixes the checkpatch error about missing a blank line
after declarations.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/c101.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index 7e431e5b6e85..94b852fa22a1 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -416,6 +416,7 @@ static void __exit c101_cleanup(void)
 
 	while (card) {
 		card_t *ptr = card;
+
 		card = card->next_card;
 		unregister_hdlc_device(port_to_dev(ptr));
 		c101_destroy_card(ptr);

From 7774318b9e5eb8ecfb42be04d65ff8ac657b5faa Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 19 Jun 2021 15:28:37 +0800
Subject: [PATCH 2403/3292] net: c101: replace comparison to NULL with "!card"

According to the chackpatch.pl, comparison to NULL could
be written "!card".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/c101.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index 94b852fa22a1..f33192eb9517 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -307,7 +307,7 @@ static int __init c101_run(unsigned long irq, unsigned long winbase)
 	}
 
 	card = kzalloc(sizeof(card_t), GFP_KERNEL);
-	if (card == NULL)
+	if (!card)
 		return -ENOBUFS;
 
 	card->dev = alloc_hdlcdev(card);
@@ -381,7 +381,7 @@ static int __init c101_run(unsigned long irq, unsigned long winbase)
 
 static int __init c101_init(void)
 {
-	if (hw == NULL) {
+	if (!hw) {
 #ifdef MODULE
 		pr_info("no card initialized\n");
 #endif

From 41505d3f0f51329c959e92479ed1219a2e85f5c2 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 19 Jun 2021 15:28:38 +0800
Subject: [PATCH 2404/3292] net: c101: remove redundant spaces

According to the chackpatch.pl, no space before tabs.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/c101.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c
index f33192eb9517..059c2f7133be 100644
--- a/drivers/net/wan/c101.c
+++ b/drivers/net/wan/c101.c
@@ -92,7 +92,7 @@ static card_t **new_card = &first_card;
 #define phy_node(port)		   (0)
 #define winsize(card)		   (C101_WINDOW_SIZE)
 #define win0base(card)		   ((card)->win0base)
-#define winbase(card)      	   ((card)->win0base + 0x2000)
+#define winbase(card)		   ((card)->win0base + 0x2000)
 #define get_port(card, port)	   (card)
 static void sca_msci_intr(port_t *port);
 

From d452d48b9f8b1a7f8152d33ef52cfd7fe1735b0a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 18 Jun 2021 13:34:06 -0700
Subject: [PATCH 2405/3292] tls: prevent oversized sendfile() hangs by ignoring
 MSG_MORE

We got multiple reports that multi_chunk_sendfile test
case from tls selftest fails. This was sort of expected,
as the original fix was never applied (see it in the first
Link:). The test in question uses sendfile() with count
larger than the size of the underlying file. This will
make splice set MSG_MORE on all sendpage calls, meaning
TLS will never close and flush the last partial record.

Eric seem to have addressed a similar problem in
commit 35f9c09fe9c7 ("tcp: tcp_sendpages() should call tcp_push() once")
by introducing MSG_SENDPAGE_NOTLAST. Unlike MSG_MORE
MSG_SENDPAGE_NOTLAST is not set on the last call
of a "pipefull" of data (PIPE_DEF_BUFFERS == 16,
so every 16 pages or whenever we run out of data).

Having a break every 16 pages should be fine, TLS
can pack exactly 4 pages into a record, so for
aligned reads there should be no difference,
unaligned may see one extra record per sendpage().

Sticking to TCP semantics seems preferable to modifying
splice, but we can revisit it if real life scenarios
show a regression.

Reported-by: Vadim Fedorenko <vfedorenko@novek.ru>
Reported-by: Seth Forshee <seth.forshee@canonical.com>
Link: https://lore.kernel.org/netdev/1591392508-14592-1-git-send-email-pooja.trivedi@stackpath.com/
Fixes: 3c4d7559159b ("tls: kernel TLS support")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 694de024d0ee..74e5701034aa 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1153,7 +1153,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
 	int ret = 0;
 	bool eor;
 
-	eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
+	eor = !(flags & MSG_SENDPAGE_NOTLAST);
 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	/* Call the sk_stream functions to manage the sndbuf mem. */

From 8674f8d310215d2bb7469b80e6cfccc044f717b3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Jun 2021 19:42:14 +0300
Subject: [PATCH 2406/3292] net: dsa: assert uniqueness of dsa,member
 properties

The cross-chip notifiers work by comparing each ds->index against the
info->sw_index value from the notifier. The ds->index is retrieved from
the device tree dsa,member property.

If a single tree cross-chip topology does not declare unique switch IDs,
this will result in hard-to-debug issues/voodoo effects such as the
cross-chip notifier for one switch port also matching the port with the
same number from another switch.

Check in dsa_switch_parse_member_of() whether the DSA switch tree
contains a DSA switch with the index we're preparing to add, before
actually adding it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa2.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index b71e87909f0e..ba244fbd9646 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -1259,6 +1259,13 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
 	if (!ds->dst)
 		return -ENOMEM;
 
+	if (dsa_switch_find(ds->dst->index, ds->index)) {
+		dev_err(ds->dev,
+			"A DSA switch with index %d already exists in tree %d\n",
+			ds->index, ds->dst->index);
+		return -EEXIST;
+	}
+
 	return 0;
 }
 

From a8986681ccada614a30df7248390780e7708a763 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Jun 2021 19:42:15 +0300
Subject: [PATCH 2407/3292] net: dsa: export the dsa_port_is_{user,cpu,dsa}
 helpers

The difference between dsa_is_user_port and dsa_port_is_user is that the
former needs to look up the list of ports of the DSA switch tree in
order to find the struct dsa_port, while the latter directly receives it
as an argument.

dsa_is_user_port is already in widespread use and has its place, so
there isn't any chance of converting all callers to a single form.
But being able to do:
	dsa_port_is_user(dp)
instead of
	dsa_is_user_port(dp->ds, dp->index)

is much more efficient too, especially when the "dp" comes from an
iterator over the DSA switch tree - this reduces the complexity from
quadratic to linear.

Move these helpers from dsa2.c to include/net/dsa.h so that others can
use them too.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 15 +++++++++++++++
 net/dsa/dsa2.c    | 15 ---------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 289d68e82da0..ea47783d5695 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -409,6 +409,21 @@ static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
 	return NULL;
 }
 
+static inline bool dsa_port_is_dsa(struct dsa_port *port)
+{
+	return port->type == DSA_PORT_TYPE_DSA;
+}
+
+static inline bool dsa_port_is_cpu(struct dsa_port *port)
+{
+	return port->type == DSA_PORT_TYPE_CPU;
+}
+
+static inline bool dsa_port_is_user(struct dsa_port *dp)
+{
+	return dp->type == DSA_PORT_TYPE_USER;
+}
+
 static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
 {
 	return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index ba244fbd9646..9000a8c84baf 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -219,21 +219,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst)
 		kref_put(&dst->refcount, dsa_tree_release);
 }
 
-static bool dsa_port_is_dsa(struct dsa_port *port)
-{
-	return port->type == DSA_PORT_TYPE_DSA;
-}
-
-static bool dsa_port_is_cpu(struct dsa_port *port)
-{
-	return port->type == DSA_PORT_TYPE_CPU;
-}
-
-static bool dsa_port_is_user(struct dsa_port *dp)
-{
-	return dp->type == DSA_PORT_TYPE_USER;
-}
-
 static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
 						   struct device_node *dn)
 {

From abd49535c3801f33c3ca42d81271d7e535adce81 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Jun 2021 19:42:16 +0300
Subject: [PATCH 2408/3292] net: dsa: execute dsa_switch_mdb_add only for
 routing port in cross-chip topologies

Currently, the notifier for adding a multicast MAC address matches on
the targeted port and on all DSA links in the system, be they upstream
or downstream links.

This leads to a considerable amount of useless traffic.

Consider this daisy chain topology, and a MDB add notifier emitted on
sw0p0. It matches on sw0p0, sw0p3, sw1p3 and sw2p4.

   sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
[  user ] [  user ] [  user ] [  dsa  ] [  cpu  ]
[   x   ] [       ] [       ] [   x   ] [       ]
                                  |
                                  +---------+
                                            |
   sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
[  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]
[       ] [       ] [       ] [   x   ] [   x   ]
                                  |
                                  +---------+
                                            |
   sw2p0     sw2p1     sw2p2     sw2p3     sw2p4
[  user ] [  user ] [  user ] [  user ] [  dsa  ]
[       ] [       ] [       ] [       ] [   x   ]

But switch 0 has no reason to send the multicast traffic for that MAC
address on sw0p3, which is how it reaches switches 1 and 2. Those
switches don't expect, according to the user configuration, to receive
this multicast address from switch 1, and they will drop it anyway,
because the only valid destination is the port they received it on.
They only need to configure themselves to deliver that multicast address
_towards_ switch 1, where the MDB entry is installed.

Similarly, switch 1 should not send this multicast traffic towards
sw1p3, because that is how it reaches switch 2.

With this change, the heat map for this MDB notifier changes as follows:

   sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
[  user ] [  user ] [  user ] [  dsa  ] [  cpu  ]
[   x   ] [       ] [       ] [       ] [       ]
                                  |
                                  +---------+
                                            |
   sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
[  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]
[       ] [       ] [       ] [       ] [   x   ]
                                  |
                                  +---------+
                                            |
   sw2p0     sw2p1     sw2p2     sw2p3     sw2p4
[  user ] [  user ] [  user ] [  user ] [  dsa  ]
[       ] [       ] [       ] [       ] [   x   ]

Now the mdb notifier behaves the same as the fdb notifier.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/switch.c | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 9bf8e20ecdf3..8b601ced6b45 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -232,36 +232,15 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds,
 	return 0;
 }
 
-static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
-				 struct dsa_notifier_mdb_info *info)
-{
-	if (ds->index == info->sw_index && port == info->port)
-		return true;
-
-	if (dsa_is_dsa_port(ds, port))
-		return true;
-
-	return false;
-}
-
 static int dsa_switch_mdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
-	int err = 0;
-	int port;
+	int port = dsa_towards_port(ds, info->sw_index, info->port);
 
 	if (!ds->ops->port_mdb_add)
 		return -EOPNOTSUPP;
 
-	for (port = 0; port < ds->num_ports; port++) {
-		if (dsa_switch_mdb_match(ds, port, info)) {
-			err = ds->ops->port_mdb_add(ds, port, info->mdb);
-			if (err)
-				break;
-		}
-	}
-
-	return err;
+	return ds->ops->port_mdb_add(ds, port, info->mdb);
 }
 
 static int dsa_switch_mdb_del(struct dsa_switch *ds,

From 4e4ab7950044d195f6e3d4dac328f506badb6efa Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Jun 2021 19:42:17 +0300
Subject: [PATCH 2409/3292] net: dsa: calculate the largest_mtu across all
 ports in the tree

If we have a cross-chip topology like this:

   sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
[  cpu  ] [  user ] [  user ] [  dsa  ] [  user ]
                                  |
                                  +---------+
                                            |
   sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
[  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]

and we issue the following commands:

1. ip link set sw0p1 mtu 1700
2. ip link set sw1p1 mtu 1600

we notice the following happening:

Command 1. emits a non-targeted MTU notifier for the CPU port (sw0p0)
with the largest_mtu calculated across switch 0, of 1700. This matches
sw0p0, sw0p3 and sw1p4 (all CPU ports and DSA links).
Then, it emits a targeted MTU notifier for the user port (sw0p1), again
with MTU 1700 (this doesn't matter).

Command 2. emits a non-targeted MTU notifier for the CPU port (sw0p0)
with the largest_mtu calculated across switch 1, of 1600. This matches
the same group of ports as above, and decreases the MTU for the CPU port
and the DSA links from 1700 to 1600.

As a result, the sw0p1 user port can no longer communicate with its CPU
port at MTU 1700.

To address this, we should calculate the largest_mtu across all switches
that may share a CPU port, and only emit MTU notifiers with that value.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 798944aa847a..ac2ca5f75af3 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1528,6 +1528,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp_iter;
 	struct dsa_port *cpu_dp;
 	int port = p->dp->index;
 	int largest_mtu = 0;
@@ -1535,31 +1536,31 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 	int old_master_mtu;
 	int mtu_limit;
 	int cpu_mtu;
-	int err, i;
+	int err;
 
 	if (!ds->ops->port_change_mtu)
 		return -EOPNOTSUPP;
 
-	for (i = 0; i < ds->num_ports; i++) {
+	list_for_each_entry(dp_iter, &ds->dst->ports, list) {
 		int slave_mtu;
 
-		if (!dsa_is_user_port(ds, i))
+		if (!dsa_port_is_user(dp_iter))
 			continue;
 
 		/* During probe, this function will be called for each slave
 		 * device, while not all of them have been allocated. That's
 		 * ok, it doesn't change what the maximum is, so ignore it.
 		 */
-		if (!dsa_to_port(ds, i)->slave)
+		if (!dp_iter->slave)
 			continue;
 
 		/* Pretend that we already applied the setting, which we
 		 * actually haven't (still haven't done all integrity checks)
 		 */
-		if (i == port)
+		if (dp_iter == dp)
 			slave_mtu = new_mtu;
 		else
-			slave_mtu = dsa_to_port(ds, i)->slave->mtu;
+			slave_mtu = dp_iter->slave->mtu;
 
 		if (largest_mtu < slave_mtu)
 			largest_mtu = slave_mtu;

From 88faba20e2100c1f367133af56612742ad37df08 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Jun 2021 19:42:18 +0300
Subject: [PATCH 2410/3292] net: dsa: targeted MTU notifiers should only match
 on one port

dsa_slave_change_mtu() calls dsa_port_mtu_change() twice:
- it sends a cross-chip notifier with the MTU of the CPU port which is
  used to update the DSA links.
- it sends one targeted MTU notifier which is supposed to only match the
  user port on which we are changing the MTU. The "propagate_upstream"
  variable is used here to bypass the cross-chip notifier system from
  switch.c

But due to a mistake, the second, targeted notifier matches not only on
the user port, but also on the DSA link which is a member of the same
switch, if that exists.

And because the DSA links of the entire dst were programmed in a
previous round to the largest_mtu via a "propagate_upstream == true"
notification, then the dsa_port_mtu_change(propagate_upstream == false)
call that is immediately upcoming will break the MTU on the one DSA link
which is chip-wise local to the dp whose MTU is changing right now.

Example given this daisy chain topology:

   sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
[  cpu  ] [  user ] [  user ] [  dsa  ] [  user ]
[   x   ] [       ] [       ] [   x   ] [       ]
                                  |
                                  +---------+
                                            |
   sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
[  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]
[       ] [       ] [       ] [       ] [   x   ]

ip link set sw0p1 mtu 9000
ip link set sw1p1 mtu 9000 # at this stage, sw0p1 and sw1p1 can talk
                           # to one another using jumbo frames
ip link set sw0p2 mtu 1500 # this programs the sw0p3 DSA link first to
                           # the largest_mtu of 9000, then reprograms it to
                           # 1500 with the "propagate_upstream == false"
                           # notifier, breaking communication between
                           # sw0p1 and sw1p1

To escape from this situation, make the targeted match really match on a
single port - the user port, and rename the "propagate_upstream"
variable to "targeted_match" to clarify the intention and avoid future
issues.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h | 4 ++--
 net/dsa/port.c     | 4 ++--
 net/dsa/slave.c    | 9 +++++----
 net/dsa/switch.c   | 9 ++++++---
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index b8b17474b72b..b0811253d101 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -84,7 +84,7 @@ struct dsa_notifier_vlan_info {
 
 /* DSA_NOTIFIER_MTU */
 struct dsa_notifier_mtu_info {
-	bool propagate_upstream;
+	bool targeted_match;
 	int sw_index;
 	int port;
 	int mtu;
@@ -200,7 +200,7 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
 int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
-			bool propagate_upstream);
+			bool targeted_match);
 int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
 		     u16 vid);
 int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 6379d66a6bb3..5c93f1e1a03d 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -567,11 +567,11 @@ int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
 }
 
 int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
-			bool propagate_upstream)
+			bool targeted_match)
 {
 	struct dsa_notifier_mtu_info info = {
 		.sw_index = dp->ds->index,
-		.propagate_upstream = propagate_upstream,
+		.targeted_match = targeted_match,
 		.port = dp->index,
 		.mtu = new_mtu,
 	};
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ac2ca5f75af3..5e668e529575 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1586,14 +1586,15 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 			goto out_master_failed;
 
 		/* We only need to propagate the MTU of the CPU port to
-		 * upstream switches.
+		 * upstream switches, so create a non-targeted notifier which
+		 * updates all switches.
 		 */
-		err = dsa_port_mtu_change(cpu_dp, cpu_mtu, true);
+		err = dsa_port_mtu_change(cpu_dp, cpu_mtu, false);
 		if (err)
 			goto out_cpu_failed;
 	}
 
-	err = dsa_port_mtu_change(dp, new_mtu, false);
+	err = dsa_port_mtu_change(dp, new_mtu, true);
 	if (err)
 		goto out_port_failed;
 
@@ -1607,7 +1608,7 @@ out_port_failed:
 	if (new_master_mtu != old_master_mtu)
 		dsa_port_mtu_change(cpu_dp, old_master_mtu -
 				    dsa_tag_protocol_overhead(cpu_dp->tag_ops),
-				    true);
+				    false);
 out_cpu_failed:
 	if (new_master_mtu != old_master_mtu)
 		dev_set_mtu(master, old_master_mtu);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 8b601ced6b45..75f567390a6b 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -52,10 +52,13 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds,
 static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port,
 				 struct dsa_notifier_mtu_info *info)
 {
-	if (ds->index == info->sw_index)
-		return (port == info->port) || dsa_is_dsa_port(ds, port);
+	if (ds->index == info->sw_index && port == info->port)
+		return true;
 
-	if (!info->propagate_upstream)
+	/* Do not propagate to other switches in the tree if the notifier was
+	 * targeted for a single switch.
+	 */
+	if (info->targeted_match)
 		return false;
 
 	if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))

From f9bcdc362c7776b875c0f390e982cbac597d660f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 21 Jun 2021 19:42:19 +0300
Subject: [PATCH 2411/3292] net: dsa: remove cross-chip support from the MRP
 notifiers

With MRP hardware assist being supported only by the ocelot switch
family, which by design does not support cross-chip bridging, the
current match functions are at best a guess and have not been confirmed
in any way to do anything relevant in a multi-switch topology.

Drop the code and make the notifiers match only on the targeted switch
port.

Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/switch.c | 55 ++++++------------------------------------------
 1 file changed, 7 insertions(+), 48 deletions(-)

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 75f567390a6b..c1e5afafe633 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -346,36 +346,16 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
 	return 0;
 }
 
-static bool dsa_switch_mrp_match(struct dsa_switch *ds, int port,
-				 struct dsa_notifier_mrp_info *info)
-{
-	if (ds->index == info->sw_index && port == info->port)
-		return true;
-
-	if (dsa_is_dsa_port(ds, port))
-		return true;
-
-	return false;
-}
-
 static int dsa_switch_mrp_add(struct dsa_switch *ds,
 			      struct dsa_notifier_mrp_info *info)
 {
-	int err = 0;
-	int port;
-
 	if (!ds->ops->port_mrp_add)
 		return -EOPNOTSUPP;
 
-	for (port = 0; port < ds->num_ports; port++) {
-		if (dsa_switch_mrp_match(ds, port, info)) {
-			err = ds->ops->port_mrp_add(ds, port, info->mrp);
-			if (err)
-				break;
-		}
-	}
+	if (ds->index == info->sw_index)
+		return ds->ops->port_mrp_add(ds, info->port, info->mrp);
 
-	return err;
+	return 0;
 }
 
 static int dsa_switch_mrp_del(struct dsa_switch *ds,
@@ -390,39 +370,18 @@ static int dsa_switch_mrp_del(struct dsa_switch *ds,
 	return 0;
 }
 
-static bool
-dsa_switch_mrp_ring_role_match(struct dsa_switch *ds, int port,
-			       struct dsa_notifier_mrp_ring_role_info *info)
-{
-	if (ds->index == info->sw_index && port == info->port)
-		return true;
-
-	if (dsa_is_dsa_port(ds, port))
-		return true;
-
-	return false;
-}
-
 static int
 dsa_switch_mrp_add_ring_role(struct dsa_switch *ds,
 			     struct dsa_notifier_mrp_ring_role_info *info)
 {
-	int err = 0;
-	int port;
-
 	if (!ds->ops->port_mrp_add)
 		return -EOPNOTSUPP;
 
-	for (port = 0; port < ds->num_ports; port++) {
-		if (dsa_switch_mrp_ring_role_match(ds, port, info)) {
-			err = ds->ops->port_mrp_add_ring_role(ds, port,
-							      info->mrp);
-			if (err)
-				break;
-		}
-	}
+	if (ds->index == info->sw_index)
+		return ds->ops->port_mrp_add_ring_role(ds, info->port,
+						       info->mrp);
 
-	return err;
+	return 0;
 }
 
 static int

From 24610ed80df65a564d6165d15505a950d05f9f5a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 19 Jun 2021 16:55:46 +0300
Subject: [PATCH 2412/3292] netfilter: nfnetlink_hook: fix check for snprintf()
 overflow

The kernel version of snprintf() can't return negatives.  The
"ret > (int)sizeof(sym)" check is off by one because and it should be
>=.  Finally, we need to set a negative error code.

Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_hook.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 58fda6ac663b..50b4e3c9347a 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -126,8 +126,10 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
 
 #ifdef CONFIG_KALLSYMS
 	ret = snprintf(sym, sizeof(sym), "%ps", ops->hook);
-	if (ret < 0 || ret > (int)sizeof(sym))
+	if (ret >= sizeof(sym)) {
+		ret = -EINVAL;
 		goto nla_put_failure;
+	}
 
 	module_name = strstr(sym, " [");
 	if (module_name) {

From 867de40c4c23e6d7f89f9ce4272a5d1b1484c122 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 21 Jun 2021 14:48:29 -0400
Subject: [PATCH 2413/3292] dm writecache: write at least 4k when committing

SSDs perform badly with sub-4k writes (because they perfrorm
read-modify-write internally), so make sure writecache writes at least
4k when committing.

Fixes: 991bd8d7bc78 ("dm writecache: commit just one block, not a full page")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index ddd368e0491d..558d39764e6d 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -532,7 +532,11 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
 
 	region.bdev = wc->ssd_dev->bdev;
 	region.sector = 0;
-	region.count = wc->block_size >> SECTOR_SHIFT;
+	region.count = max(4096U, wc->block_size) >> SECTOR_SHIFT;
+
+	if (unlikely(region.sector + region.count > wc->metadata_sectors))
+		region.count = wc->metadata_sectors - region.sector;
+
 	region.sector += wc->start_sector;
 
 	req.bi_op = REQ_OP_WRITE;

From 3c5e44622011b9ea21bd425875dcccfc9a158f5f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 19 Jun 2021 00:55:20 +0200
Subject: [PATCH 2414/3292] netfilter: nf_tables: memleak in hw offload abort
 path

Release flow from the abort path, this is easy to reproduce since
b72920f6e4a9 ("netfilter: nftables: counter hardware offload support").
If the preparation phase fails, then the abort path is exercised without
releasing the flow rule object.

unreferenced object 0xffff8881f0fa7700 (size 128):
  comm "nft", pid 1335, jiffies 4294931120 (age 4163.740s)
  hex dump (first 32 bytes):
    08 e4 de 13 82 88 ff ff 98 e4 de 13 82 88 ff ff  ................
    48 e4 de 13 82 88 ff ff 01 00 00 00 00 00 00 00  H...............
  backtrace:
    [<00000000634547e7>] flow_rule_alloc+0x26/0x80
    [<00000000c8426156>] nft_flow_rule_create+0xc9/0x3f0 [nf_tables]
    [<0000000075ff8e46>] nf_tables_newrule+0xc79/0x10a0 [nf_tables]
    [<00000000ba65e40e>] nfnetlink_rcv_batch+0xaac/0xf90 [nfnetlink]
    [<00000000505c614a>] nfnetlink_rcv+0x1bb/0x1f0 [nfnetlink]
    [<00000000eb78e1fe>] netlink_unicast+0x34b/0x480
    [<00000000a8f72c94>] netlink_sendmsg+0x3af/0x690
    [<000000009cb1ddf4>] sock_sendmsg+0x96/0xa0
    [<0000000039d06e44>] ____sys_sendmsg+0x3fe/0x440
    [<00000000137e82ca>] ___sys_sendmsg+0xd8/0x140
    [<000000000c6bf6a6>] __sys_sendmsg+0xb3/0x130
    [<0000000043bd6268>] do_syscall_64+0x40/0xb0
    [<00000000afdebc2d>] entry_SYSCALL_64_after_hwframe+0x44/0xae

Remove flow rule release from the offload commit path, otherwise error
from the offload commit phase might trigger a double-free due to the
execution of the abort_offload -> abort. After this patch, the abort
path takes care of releasing the flow rule.

This fix also needs to move the nft_flow_rule_create() call before the
transaction object is added otherwise the abort path might find a NULL
pointer to the flow rule object for the NFT_CHAIN_HW_OFFLOAD case.

While at it, rename BASIC-like goto tags to slightly more meaningful
names rather than adding a new "err3" tag.

Fixes: 63b48c73ff56 ("netfilter: nf_tables_offload: undo updates if transaction fails")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c     | 51 +++++++++++++++++++------------
 net/netfilter/nf_tables_offload.c | 17 -----------
 2 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index bf4d6ec9fc55..ca9ec8721e6c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3243,9 +3243,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 	u8 genmask = nft_genmask_next(info->net);
 	struct nft_rule *rule, *old_rule = NULL;
 	struct nft_expr_info *expr_info = NULL;
+	struct nft_flow_rule *flow = NULL;
 	int family = nfmsg->nfgen_family;
 	struct net *net = info->net;
-	struct nft_flow_rule *flow;
 	struct nft_userdata *udata;
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -3340,13 +3340,13 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
 			err = -EINVAL;
 			if (nla_type(tmp) != NFTA_LIST_ELEM)
-				goto err1;
+				goto err_release_expr;
 			if (n == NFT_RULE_MAXEXPRS)
-				goto err1;
+				goto err_release_expr;
 			err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
 			if (err < 0) {
 				NL_SET_BAD_ATTR(extack, tmp);
-				goto err1;
+				goto err_release_expr;
 			}
 			size += expr_info[n].ops->size;
 			n++;
@@ -3355,7 +3355,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 	/* Check for overflow of dlen field */
 	err = -EFBIG;
 	if (size >= 1 << 12)
-		goto err1;
+		goto err_release_expr;
 
 	if (nla[NFTA_RULE_USERDATA]) {
 		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
@@ -3366,7 +3366,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 	err = -ENOMEM;
 	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
 	if (rule == NULL)
-		goto err1;
+		goto err_release_expr;
 
 	nft_activate_next(net, rule);
 
@@ -3385,7 +3385,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 		err = nf_tables_newexpr(&ctx, &expr_info[i], expr);
 		if (err < 0) {
 			NL_SET_BAD_ATTR(extack, expr_info[i].attr);
-			goto err2;
+			goto err_release_rule;
 		}
 
 		if (expr_info[i].ops->validate)
@@ -3395,16 +3395,24 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 		expr = nft_expr_next(expr);
 	}
 
+	if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+		flow = nft_flow_rule_create(net, rule);
+		if (IS_ERR(flow)) {
+			err = PTR_ERR(flow);
+			goto err_release_rule;
+		}
+	}
+
 	if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
 		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
 		if (trans == NULL) {
 			err = -ENOMEM;
-			goto err2;
+			goto err_destroy_flow_rule;
 		}
 		err = nft_delrule(&ctx, old_rule);
 		if (err < 0) {
 			nft_trans_destroy(trans);
-			goto err2;
+			goto err_destroy_flow_rule;
 		}
 
 		list_add_tail_rcu(&rule->list, &old_rule->list);
@@ -3412,7 +3420,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
 		if (!trans) {
 			err = -ENOMEM;
-			goto err2;
+			goto err_destroy_flow_rule;
 		}
 
 		if (info->nlh->nlmsg_flags & NLM_F_APPEND) {
@@ -3430,21 +3438,19 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 	kvfree(expr_info);
 	chain->use++;
 
+	if (flow)
+		nft_trans_flow_rule(trans) = flow;
+
 	if (nft_net->validate_state == NFT_VALIDATE_DO)
 		return nft_table_validate(net, table);
 
-	if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
-		flow = nft_flow_rule_create(net, rule);
-		if (IS_ERR(flow))
-			return PTR_ERR(flow);
-
-		nft_trans_flow_rule(trans) = flow;
-	}
-
 	return 0;
-err2:
+
+err_destroy_flow_rule:
+	nft_flow_rule_destroy(flow);
+err_release_rule:
 	nf_tables_rule_release(&ctx, rule);
-err1:
+err_release_expr:
 	for (i = 0; i < n; i++) {
 		if (expr_info[i].ops) {
 			module_put(expr_info[i].ops->type->owner);
@@ -8839,11 +8845,16 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 			nft_rule_expr_deactivate(&trans->ctx,
 						 nft_trans_rule(trans),
 						 NFT_TRANS_ABORT);
+			if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+				nft_flow_rule_destroy(nft_trans_flow_rule(trans));
 			break;
 		case NFT_MSG_DELRULE:
 			trans->ctx.chain->use++;
 			nft_clear(trans->ctx.net, nft_trans_rule(trans));
 			nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
+			if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+				nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSET:
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index a48c5fd53a80..ec701b84844f 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -594,23 +594,6 @@ int nft_flow_rule_offload_commit(struct net *net)
 		}
 	}
 
-	list_for_each_entry(trans, &nft_net->commit_list, list) {
-		if (trans->ctx.family != NFPROTO_NETDEV)
-			continue;
-
-		switch (trans->msg_type) {
-		case NFT_MSG_NEWRULE:
-		case NFT_MSG_DELRULE:
-			if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
-				continue;
-
-			nft_flow_rule_destroy(nft_trans_flow_rule(trans));
-			break;
-		default:
-			break;
-		}
-	}
-
 	return err;
 }
 

From ea45fdf82cc90430bb7c280e5e53821e833782c5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 19 Jun 2021 01:25:14 +0200
Subject: [PATCH 2415/3292] netfilter: nf_tables_offload: check
 FLOW_DISSECTOR_KEY_BASIC in VLAN transfer logic

The VLAN transfer logic should actually check for
FLOW_DISSECTOR_KEY_BASIC, not FLOW_DISSECTOR_KEY_CONTROL. Moreover, do
not fallback to case 2) .n_proto is set to 802.1q or 802.1ad, if
FLOW_DISSECTOR_KEY_BASIC is unset.

Fixes: 783003f3bb8a ("netfilter: nftables_offload: special ethertype handling for VLAN")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_offload.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index ec701b84844f..b58d73a96523 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -54,15 +54,10 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
 					struct nft_flow_rule *flow)
 {
 	struct nft_flow_match *match = &flow->match;
-	struct nft_offload_ethertype ethertype;
-
-	if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL) &&
-	    match->key.basic.n_proto != htons(ETH_P_8021Q) &&
-	    match->key.basic.n_proto != htons(ETH_P_8021AD))
-		return;
-
-	ethertype.value = match->key.basic.n_proto;
-	ethertype.mask = match->mask.basic.n_proto;
+	struct nft_offload_ethertype ethertype = {
+		.value	= match->key.basic.n_proto,
+		.mask	= match->mask.basic.n_proto,
+	};
 
 	if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
 	    (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
@@ -76,7 +71,9 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
 		match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
 			offsetof(struct nft_flow_key, cvlan);
 		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
-	} else {
+	} else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) &&
+		   (match->key.basic.n_proto == htons(ETH_P_8021Q) ||
+		    match->key.basic.n_proto == htons(ETH_P_8021AD))) {
 		match->key.basic.n_proto = match->key.vlan.vlan_tpid;
 		match->mask.basic.n_proto = match->mask.vlan.vlan_tpid;
 		match->key.vlan.vlan_tpid = ethertype.value;

From 5f6776ba413ce273f7cb211f1cf8771f0cde7c81 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:41 -0700
Subject: [PATCH 2416/3292] block/Kconfig: Make the BLK_WBT and BLK_WBT_MQ
 entries consecutive

These entries were consecutive at the time of their introduction but are no
longer consecutive. Make these again consecutive. Additionally, modify the
help text since it refers to blk-mq and since the legacy block layer has
been removed.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Link: https://lore.kernel.org/r/20210618004456.7280-2-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index a2297edfdde8..6685578b2a20 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -133,6 +133,13 @@ config BLK_WBT
 	dynamically on an algorithm loosely based on CoDel, factoring in
 	the realtime performance of the disk.
 
+config BLK_WBT_MQ
+	bool "Enable writeback throttling by default"
+	default y
+	depends on BLK_WBT
+	help
+	Enable writeback throttling by default for request-based block devices.
+
 config BLK_CGROUP_IOLATENCY
 	bool "Enable support for latency based cgroup IO protection"
 	depends on BLK_CGROUP=y
@@ -155,13 +162,6 @@ config BLK_CGROUP_IOCOST
 	distributes IO capacity between different groups based on
 	their share of the overall weight distribution.
 
-config BLK_WBT_MQ
-	bool "Multiqueue writeback throttling"
-	default y
-	depends on BLK_WBT
-	help
-	Enable writeback throttling by default on multiqueue devices.
-
 config BLK_DEBUG_FS
 	bool "Block layer debugging information in debugfs"
 	default y

From 19688d7f9592b8222f530037d9328fdc90fff14c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:42 -0700
Subject: [PATCH 2417/3292] block/blk-cgroup: Swap the blk_throtl_init() and
 blk_iolatency_init() calls

Before adding more calls in this function, simplify the error path.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20210618004456.7280-3-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d169e2055158..3b0f6efaa2b6 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1183,15 +1183,14 @@ int blkcg_init_queue(struct request_queue *q)
 	if (preloaded)
 		radix_tree_preload_end();
 
+	ret = blk_iolatency_init(q);
+	if (ret)
+		goto err_destroy_all;
+
 	ret = blk_throtl_init(q);
 	if (ret)
 		goto err_destroy_all;
 
-	ret = blk_iolatency_init(q);
-	if (ret) {
-		blk_throtl_exit(q);
-		goto err_destroy_all;
-	}
 	return 0;
 
 err_destroy_all:

From fb44023e70224c3bd9eb949bd3ab66876bd14c56 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:43 -0700
Subject: [PATCH 2418/3292] block/blk-rq-qos: Move a function from a header
 file into a C file

rq_qos_id_to_name() is only used in blk-mq-debugfs.c so move that function
into in blk-mq-debugfs.c.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Link: https://lore.kernel.org/r/20210618004456.7280-4-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 13 +++++++++++++
 block/blk-rq-qos.h     | 13 -------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 2a75bc7401df..6ac1c86f62ef 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -937,6 +937,19 @@ void blk_mq_debugfs_unregister_sched(struct request_queue *q)
 	q->sched_debugfs_dir = NULL;
 }
 
+static const char *rq_qos_id_to_name(enum rq_qos_id id)
+{
+	switch (id) {
+	case RQ_QOS_WBT:
+		return "wbt";
+	case RQ_QOS_LATENCY:
+		return "latency";
+	case RQ_QOS_COST:
+		return "cost";
+	}
+	return "unknown";
+}
+
 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 {
 	debugfs_remove_recursive(rqos->debugfs_dir);
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 2bcb3495e376..a77afbdd472c 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -79,19 +79,6 @@ static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
 	return rq_qos_id(q, RQ_QOS_LATENCY);
 }
 
-static inline const char *rq_qos_id_to_name(enum rq_qos_id id)
-{
-	switch (id) {
-	case RQ_QOS_WBT:
-		return "wbt";
-	case RQ_QOS_LATENCY:
-		return "latency";
-	case RQ_QOS_COST:
-		return "cost";
-	}
-	return "unknown";
-}
-
 static inline void rq_wait_init(struct rq_wait *rq_wait)
 {
 	atomic_set(&rq_wait->inflight, 0);

From 556910e39249d55e23deaec479f49e7d85bc0d24 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:44 -0700
Subject: [PATCH 2419/3292] block: Introduce the ioprio rq-qos policy

Introduce an rq-qos policy that assigns an I/O priority to requests based
on blk-cgroup configuration settings. This policy has the following
advantages over the ioprio_set() system call:
- This policy is cgroup based so it has all the advantages of cgroups.
- While ioprio_set() does not affect page cache writeback I/O, this rq-qos
  controller affects page cache writeback I/O for filesystems that support
  assiociating a cgroup with writeback I/O. See also
  Documentation/admin-guide/cgroup-v2.rst.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-5-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst |  55 +++++
 block/Kconfig                           |   9 +
 block/Makefile                          |   1 +
 block/blk-cgroup.c                      |   5 +
 block/blk-ioprio.c                      | 262 ++++++++++++++++++++++++
 block/blk-ioprio.h                      |  19 ++
 block/blk-mq-debugfs.c                  |   2 +
 block/blk-rq-qos.h                      |   1 +
 8 files changed, 354 insertions(+)
 create mode 100644 block/blk-ioprio.c
 create mode 100644 block/blk-ioprio.h

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index b1e81aa8598a..4e59925e6583 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -56,6 +56,7 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
        5-3-3. IO Latency
          5-3-3-1. How IO Latency Throttling Works
          5-3-3-2. IO Latency Interface Files
+       5-3-4. IO Priority
      5-4. PID
        5-4-1. PID Interface Files
      5-5. Cpuset
@@ -1866,6 +1867,60 @@ IO Latency Interface Files
 		duration of time between evaluation events.  Windows only elapse
 		with IO activity.  Idle periods extend the most recent window.
 
+IO Priority
+~~~~~~~~~~~
+
+A single attribute controls the behavior of the I/O priority cgroup policy,
+namely the blkio.prio.class attribute. The following values are accepted for
+that attribute:
+
+  no-change
+	Do not modify the I/O priority class.
+
+  none-to-rt
+	For requests that do not have an I/O priority class (NONE),
+	change the I/O priority class into RT. Do not modify
+	the I/O priority class of other requests.
+
+  restrict-to-be
+	For requests that do not have an I/O priority class or that have I/O
+	priority class RT, change it into BE. Do not modify the I/O priority
+	class of requests that have priority class IDLE.
+
+  idle
+	Change the I/O priority class of all requests into IDLE, the lowest
+	I/O priority class.
+
+The following numerical values are associated with the I/O priority policies:
+
++-------------+---+
+| no-change   | 0 |
++-------------+---+
+| none-to-rt  | 1 |
++-------------+---+
+| rt-to-be    | 2 |
++-------------+---+
+| all-to-idle | 3 |
++-------------+---+
+
+The numerical value that corresponds to each I/O priority class is as follows:
+
++-------------------------------+---+
+| IOPRIO_CLASS_NONE             | 0 |
++-------------------------------+---+
+| IOPRIO_CLASS_RT (real-time)   | 1 |
++-------------------------------+---+
+| IOPRIO_CLASS_BE (best effort) | 2 |
++-------------------------------+---+
+| IOPRIO_CLASS_IDLE             | 3 |
++-------------------------------+---+
+
+The algorithm to set the I/O priority class for a request is as follows:
+
+- Translate the I/O priority class policy into a number.
+- Change the request I/O priority class into the maximum of the I/O priority
+  class policy number and the numerical I/O priority class.
+
 PID
 ---
 
diff --git a/block/Kconfig b/block/Kconfig
index 6685578b2a20..e71c63eaaf52 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -162,6 +162,15 @@ config BLK_CGROUP_IOCOST
 	distributes IO capacity between different groups based on
 	their share of the overall weight distribution.
 
+config BLK_CGROUP_IOPRIO
+	bool "Cgroup I/O controller for assigning an I/O priority class"
+	depends on BLK_CGROUP
+	help
+	Enable the .prio interface for assigning an I/O priority class to
+	requests. The I/O priority class affects the order in which an I/O
+	scheduler and block devices process requests. Only some I/O schedulers
+	and some block devices support I/O priorities.
+
 config BLK_DEBUG_FS
 	bool "Block layer debugging information in debugfs"
 	default y
diff --git a/block/Makefile b/block/Makefile
index 8d841f5f986f..af3d044abaf1 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 obj-$(CONFIG_BLK_CGROUP_RWSTAT)	+= blk-cgroup-rwstat.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
+obj-$(CONFIG_BLK_CGROUP_IOPRIO)	+= blk-ioprio.o
 obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 obj-$(CONFIG_BLK_CGROUP_IOCOST)	+= blk-iocost.o
 obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 3b0f6efaa2b6..7b06a5fa3cac 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -31,6 +31,7 @@
 #include <linux/tracehook.h>
 #include <linux/psi.h>
 #include "blk.h"
+#include "blk-ioprio.h"
 
 /*
  * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
@@ -1187,6 +1188,10 @@ int blkcg_init_queue(struct request_queue *q)
 	if (ret)
 		goto err_destroy_all;
 
+	ret = blk_ioprio_init(q);
+	if (ret)
+		goto err_destroy_all;
+
 	ret = blk_throtl_init(q);
 	if (ret)
 		goto err_destroy_all;
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
new file mode 100644
index 000000000000..332a07761bf8
--- /dev/null
+++ b/block/blk-ioprio.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Block rq-qos policy for assigning an I/O priority class to requests.
+ *
+ * Using an rq-qos policy for assigning I/O priority class has two advantages
+ * over using the ioprio_set() system call:
+ *
+ * - This policy is cgroup based so it has all the advantages of cgroups.
+ * - While ioprio_set() does not affect page cache writeback I/O, this rq-qos
+ *   controller affects page cache writeback I/O for filesystems that support
+ *   assiociating a cgroup with writeback I/O. See also
+ *   Documentation/admin-guide/cgroup-v2.rst.
+ */
+
+#include <linux/blk-cgroup.h>
+#include <linux/blk-mq.h>
+#include <linux/blk_types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "blk-ioprio.h"
+#include "blk-rq-qos.h"
+
+/**
+ * enum prio_policy - I/O priority class policy.
+ * @POLICY_NO_CHANGE: (default) do not modify the I/O priority class.
+ * @POLICY_NONE_TO_RT: modify IOPRIO_CLASS_NONE into IOPRIO_CLASS_RT.
+ * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into
+ *		IOPRIO_CLASS_BE.
+ * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE.
+ *
+ * See also <linux/ioprio.h>.
+ */
+enum prio_policy {
+	POLICY_NO_CHANGE	= 0,
+	POLICY_NONE_TO_RT	= 1,
+	POLICY_RESTRICT_TO_BE	= 2,
+	POLICY_ALL_TO_IDLE	= 3,
+};
+
+static const char *policy_name[] = {
+	[POLICY_NO_CHANGE]	= "no-change",
+	[POLICY_NONE_TO_RT]	= "none-to-rt",
+	[POLICY_RESTRICT_TO_BE]	= "restrict-to-be",
+	[POLICY_ALL_TO_IDLE]	= "idle",
+};
+
+static struct blkcg_policy ioprio_policy;
+
+/**
+ * struct ioprio_blkg - Per (cgroup, request queue) data.
+ * @pd: blkg_policy_data structure.
+ */
+struct ioprio_blkg {
+	struct blkg_policy_data pd;
+};
+
+/**
+ * struct ioprio_blkcg - Per cgroup data.
+ * @cpd: blkcg_policy_data structure.
+ * @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>.
+ */
+struct ioprio_blkcg {
+	struct blkcg_policy_data cpd;
+	enum prio_policy	 prio_policy;
+};
+
+static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
+{
+	return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL;
+}
+
+static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg)
+{
+	return container_of(blkcg_to_cpd(blkcg, &ioprio_policy),
+			    struct ioprio_blkcg, cpd);
+}
+
+static struct ioprio_blkcg *
+ioprio_blkcg_from_css(struct cgroup_subsys_state *css)
+{
+	return blkcg_to_ioprio_blkcg(css_to_blkcg(css));
+}
+
+static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio)
+{
+	struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy);
+
+	if (!pd)
+		return NULL;
+
+	return blkcg_to_ioprio_blkcg(pd->blkg->blkcg);
+}
+
+static int ioprio_show_prio_policy(struct seq_file *sf, void *v)
+{
+	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf));
+
+	seq_printf(sf, "%s\n", policy_name[blkcg->prio_policy]);
+	return 0;
+}
+
+static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
+				      size_t nbytes, loff_t off)
+{
+	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(of_css(of));
+	int ret;
+
+	if (off != 0)
+		return -EIO;
+	/* kernfs_fop_write_iter() terminates 'buf' with '\0'. */
+	ret = sysfs_match_string(policy_name, buf);
+	if (ret < 0)
+		return ret;
+	blkcg->prio_policy = ret;
+
+	return nbytes;
+}
+
+static struct blkg_policy_data *
+ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg)
+{
+	struct ioprio_blkg *ioprio_blkg;
+
+	ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp);
+	if (!ioprio_blkg)
+		return NULL;
+
+	return &ioprio_blkg->pd;
+}
+
+static void ioprio_free_pd(struct blkg_policy_data *pd)
+{
+	struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd);
+
+	kfree(ioprio_blkg);
+}
+
+static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp)
+{
+	struct ioprio_blkcg *blkcg;
+
+	blkcg = kzalloc(sizeof(*blkcg), gfp);
+	if (!blkcg)
+		return NULL;
+	blkcg->prio_policy = POLICY_NO_CHANGE;
+	return &blkcg->cpd;
+}
+
+static void ioprio_free_cpd(struct blkcg_policy_data *cpd)
+{
+	struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd);
+
+	kfree(blkcg);
+}
+
+#define IOPRIO_ATTRS						\
+	{							\
+		.name		= "prio.class",			\
+		.seq_show	= ioprio_show_prio_policy,	\
+		.write		= ioprio_set_prio_policy,	\
+	},							\
+	{ } /* sentinel */
+
+/* cgroup v2 attributes */
+static struct cftype ioprio_files[] = {
+	IOPRIO_ATTRS
+};
+
+/* cgroup v1 attributes */
+static struct cftype ioprio_legacy_files[] = {
+	IOPRIO_ATTRS
+};
+
+static struct blkcg_policy ioprio_policy = {
+	.dfl_cftypes	= ioprio_files,
+	.legacy_cftypes = ioprio_legacy_files,
+
+	.cpd_alloc_fn	= ioprio_alloc_cpd,
+	.cpd_free_fn	= ioprio_free_cpd,
+
+	.pd_alloc_fn	= ioprio_alloc_pd,
+	.pd_free_fn	= ioprio_free_pd,
+};
+
+struct blk_ioprio {
+	struct rq_qos rqos;
+};
+
+static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
+			       struct bio *bio)
+{
+	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
+
+	/*
+	 * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
+	 * correspond to a lower priority. Hence, the max_t() below selects
+	 * the lower priority of bi_ioprio and the cgroup I/O priority class.
+	 * If the cgroup policy has been set to POLICY_NO_CHANGE == 0, the
+	 * bio I/O priority is not modified. If the bio I/O priority equals
+	 * IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio.
+	 */
+	bio->bi_ioprio = max_t(u16, bio->bi_ioprio,
+			       IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
+}
+
+static void blkcg_ioprio_exit(struct rq_qos *rqos)
+{
+	struct blk_ioprio *blkioprio_blkg =
+		container_of(rqos, typeof(*blkioprio_blkg), rqos);
+
+	blkcg_deactivate_policy(rqos->q, &ioprio_policy);
+	kfree(blkioprio_blkg);
+}
+
+static struct rq_qos_ops blkcg_ioprio_ops = {
+	.track	= blkcg_ioprio_track,
+	.exit	= blkcg_ioprio_exit,
+};
+
+int blk_ioprio_init(struct request_queue *q)
+{
+	struct blk_ioprio *blkioprio_blkg;
+	struct rq_qos *rqos;
+	int ret;
+
+	blkioprio_blkg = kzalloc(sizeof(*blkioprio_blkg), GFP_KERNEL);
+	if (!blkioprio_blkg)
+		return -ENOMEM;
+
+	ret = blkcg_activate_policy(q, &ioprio_policy);
+	if (ret) {
+		kfree(blkioprio_blkg);
+		return ret;
+	}
+
+	rqos = &blkioprio_blkg->rqos;
+	rqos->id = RQ_QOS_IOPRIO;
+	rqos->ops = &blkcg_ioprio_ops;
+	rqos->q = q;
+
+	/*
+	 * Registering the rq-qos policy after activating the blk-cgroup
+	 * policy guarantees that ioprio_blkcg_from_bio(bio) != NULL in the
+	 * rq-qos callbacks.
+	 */
+	rq_qos_add(q, rqos);
+
+	return 0;
+}
+
+static int __init ioprio_init(void)
+{
+	return blkcg_policy_register(&ioprio_policy);
+}
+
+static void __exit ioprio_exit(void)
+{
+	blkcg_policy_unregister(&ioprio_policy);
+}
+
+module_init(ioprio_init);
+module_exit(ioprio_exit);
diff --git a/block/blk-ioprio.h b/block/blk-ioprio.h
new file mode 100644
index 000000000000..a7785c2f1aea
--- /dev/null
+++ b/block/blk-ioprio.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BLK_IOPRIO_H_
+#define _BLK_IOPRIO_H_
+
+#include <linux/kconfig.h>
+
+struct request_queue;
+
+#ifdef CONFIG_BLK_CGROUP_IOPRIO
+int blk_ioprio_init(struct request_queue *q);
+#else
+static inline int blk_ioprio_init(struct request_queue *q)
+{
+	return 0;
+}
+#endif
+
+#endif /* _BLK_IOPRIO_H_ */
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 6ac1c86f62ef..4b66d2776eda 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -946,6 +946,8 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
 		return "latency";
 	case RQ_QOS_COST:
 		return "cost";
+	case RQ_QOS_IOPRIO:
+		return "ioprio";
 	}
 	return "unknown";
 }
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index a77afbdd472c..f000f83e0621 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -17,6 +17,7 @@ enum rq_qos_id {
 	RQ_QOS_WBT,
 	RQ_QOS_LATENCY,
 	RQ_QOS_COST,
+	RQ_QOS_IOPRIO,
 };
 
 struct rq_wait {

From 46eae2e32a6adc368230b4df0501082c5233e99c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:45 -0700
Subject: [PATCH 2420/3292] block/mq-deadline: Add several comments

Make the code easier to read by adding more comments.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-6-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 8eea2cbf2bf4..31418e9ce9e2 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -139,6 +139,9 @@ static void dd_request_merged(struct request_queue *q, struct request *req,
 	}
 }
 
+/*
+ * Callback function that is invoked after @next has been merged into @req.
+ */
 static void dd_merged_requests(struct request_queue *q, struct request *req,
 			       struct request *next)
 {
@@ -375,6 +378,8 @@ done:
 }
 
 /*
+ * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
+ *
  * One confusing aspect here is that we get called for a specific
  * hardware queue, but we may return a request that is for a
  * different hardware queue. This is because mq-deadline has shared
@@ -438,6 +443,10 @@ static int dd_init_queue(struct request_queue *q, struct elevator_type *e)
 	return 0;
 }
 
+/*
+ * Try to merge @bio into an existing request. If @bio has been merged into
+ * an existing request, store the pointer to that request into *@rq.
+ */
 static int dd_request_merge(struct request_queue *q, struct request **rq,
 			    struct bio *bio)
 {
@@ -461,6 +470,10 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
 	return ELEVATOR_NO_MERGE;
 }
 
+/*
+ * Attempt to merge a bio into an existing request. This function is called
+ * before @bio is associated with a request.
+ */
 static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
@@ -518,6 +531,9 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	}
 }
 
+/*
+ * Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests().
+ */
 static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
 			       struct list_head *list, bool at_head)
 {
@@ -544,6 +560,8 @@ static void dd_prepare_request(struct request *rq)
 }
 
 /*
+ * Callback from inside blk_mq_free_request().
+ *
  * For zoned block devices, write unlock the target zone of
  * completed write requests. Do this while holding the zone lock
  * spinlock so that the zone is never unlocked while deadline_fifo_request()

From 3bd473f41ae990815d6f75d285b161eebf361278 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:46 -0700
Subject: [PATCH 2421/3292] block/mq-deadline: Add two lockdep_assert_held()
 statements

Document the locking strategy by adding two lockdep_assert_held()
statements.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-7-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 31418e9ce9e2..191ff5ce629c 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -279,6 +279,8 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	bool reads, writes;
 	int data_dir;
 
+	lockdep_assert_held(&dd->lock);
+
 	if (!list_empty(&dd->dispatch)) {
 		rq = list_first_entry(&dd->dispatch, struct request, queuelist);
 		list_del_init(&rq->queuelist);
@@ -501,6 +503,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const int data_dir = rq_data_dir(rq);
 
+	lockdep_assert_held(&dd->lock);
+
 	/*
 	 * This may be a requeue of a write request that has locked its
 	 * target zone. If it is the case, this releases the zone lock.

From 2f295beab40f13ab93c004d45372238f2066a5ee Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:47 -0700
Subject: [PATCH 2422/3292] block/mq-deadline: Remove two local variables

Make __dd_dispatch_request() easier to read by removing two local
variables.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-8-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 191ff5ce629c..caa438f62a4d 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -276,7 +276,6 @@ deadline_next_request(struct deadline_data *dd, int data_dir)
 static struct request *__dd_dispatch_request(struct deadline_data *dd)
 {
 	struct request *rq, *next_rq;
-	bool reads, writes;
 	int data_dir;
 
 	lockdep_assert_held(&dd->lock);
@@ -287,9 +286,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 		goto done;
 	}
 
-	reads = !list_empty(&dd->fifo_list[READ]);
-	writes = !list_empty(&dd->fifo_list[WRITE]);
-
 	/*
 	 * batches are currently reads XOR writes
 	 */
@@ -306,7 +302,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	 * data direction (read / write)
 	 */
 
-	if (reads) {
+	if (!list_empty(&dd->fifo_list[READ])) {
 		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
 
 		if (deadline_fifo_request(dd, WRITE) &&
@@ -322,7 +318,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	 * there are either no reads or writes have been starved
 	 */
 
-	if (writes) {
+	if (!list_empty(&dd->fifo_list[WRITE])) {
 dispatch_writes:
 		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
 

From 3e9a99eba058f79736dccaf25934f8d6ca380fb3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:48 -0700
Subject: [PATCH 2423/3292] block/mq-deadline: Rename dd_init_queue() and
 dd_exit_queue()

Change "queue" into "sched" to make the function names reflect better the
purpose of these functions.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-9-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index caa438f62a4d..d823ba7cb084 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -395,7 +395,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	return rq;
 }
 
-static void dd_exit_queue(struct elevator_queue *e)
+static void dd_exit_sched(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
 
@@ -408,7 +408,7 @@ static void dd_exit_queue(struct elevator_queue *e)
 /*
  * initialize elevator private data (deadline_data).
  */
-static int dd_init_queue(struct request_queue *q, struct elevator_type *e)
+static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 {
 	struct deadline_data *dd;
 	struct elevator_queue *eq;
@@ -800,8 +800,8 @@ static struct elevator_type mq_deadline = {
 		.requests_merged	= dd_merged_requests,
 		.request_merged		= dd_request_merged,
 		.has_work		= dd_has_work,
-		.init_sched		= dd_init_queue,
-		.exit_sched		= dd_exit_queue,
+		.init_sched		= dd_init_sched,
+		.exit_sched		= dd_exit_sched,
 	},
 
 #ifdef CONFIG_BLK_DEBUG_FS

From 004a26b327c2e1ea88b2638cf16c0e30e82f297e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:49 -0700
Subject: [PATCH 2424/3292] block/mq-deadline: Improve compile-time argument
 checking

Modern compilers complain if an out-of-range value is passed to a function
argument that has an enumeration type. Let the compiler detect out-of-range
data direction arguments instead of verifying the data_dir argument at
runtime.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-10-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 96 +++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index d823ba7cb084..69126beff77d 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -35,6 +35,13 @@ static const int writes_starved = 2;    /* max times reads can starve a write */
 static const int fifo_batch = 16;       /* # of sequential requests treated as one
 				     by the above parameters. For throughput. */
 
+enum dd_data_dir {
+	DD_READ		= READ,
+	DD_WRITE	= WRITE,
+};
+
+enum { DD_DIR_COUNT = 2 };
+
 struct deadline_data {
 	/*
 	 * run time data
@@ -43,20 +50,20 @@ struct deadline_data {
 	/*
 	 * requests (deadline_rq s) are present on both sort_list and fifo_list
 	 */
-	struct rb_root sort_list[2];
-	struct list_head fifo_list[2];
+	struct rb_root sort_list[DD_DIR_COUNT];
+	struct list_head fifo_list[DD_DIR_COUNT];
 
 	/*
 	 * next in sort order. read, write or both are NULL
 	 */
-	struct request *next_rq[2];
+	struct request *next_rq[DD_DIR_COUNT];
 	unsigned int batching;		/* number of sequential requests made */
 	unsigned int starved;		/* times reads have starved writes */
 
 	/*
 	 * settings that change how the i/o scheduler behaves
 	 */
-	int fifo_expire[2];
+	int fifo_expire[DD_DIR_COUNT];
 	int fifo_batch;
 	int writes_starved;
 	int front_merges;
@@ -97,7 +104,7 @@ deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 static inline void
 deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
 {
-	const int data_dir = rq_data_dir(rq);
+	const enum dd_data_dir data_dir = rq_data_dir(rq);
 
 	if (dd->next_rq[data_dir] == rq)
 		dd->next_rq[data_dir] = deadline_latter_request(rq);
@@ -169,10 +176,10 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
 static void
 deadline_move_request(struct deadline_data *dd, struct request *rq)
 {
-	const int data_dir = rq_data_dir(rq);
+	const enum dd_data_dir data_dir = rq_data_dir(rq);
 
-	dd->next_rq[READ] = NULL;
-	dd->next_rq[WRITE] = NULL;
+	dd->next_rq[DD_READ] = NULL;
+	dd->next_rq[DD_WRITE] = NULL;
 	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
 	/*
@@ -185,9 +192,10 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
  * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  */
-static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
+static inline int deadline_check_fifo(struct deadline_data *dd,
+				      enum dd_data_dir data_dir)
 {
-	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
+	struct request *rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
 
 	/*
 	 * rq is expired!
@@ -203,19 +211,16 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
  * dispatch using arrival ordered lists.
  */
 static struct request *
-deadline_fifo_request(struct deadline_data *dd, int data_dir)
+deadline_fifo_request(struct deadline_data *dd, enum dd_data_dir data_dir)
 {
 	struct request *rq;
 	unsigned long flags;
 
-	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
-		return NULL;
-
 	if (list_empty(&dd->fifo_list[data_dir]))
 		return NULL;
 
 	rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
-	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
 		return rq;
 
 	/*
@@ -223,7 +228,7 @@ deadline_fifo_request(struct deadline_data *dd, int data_dir)
 	 * an unlocked target zone.
 	 */
 	spin_lock_irqsave(&dd->zone_lock, flags);
-	list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
+	list_for_each_entry(rq, &dd->fifo_list[DD_WRITE], queuelist) {
 		if (blk_req_can_dispatch_to_zone(rq))
 			goto out;
 	}
@@ -239,19 +244,16 @@ out:
  * dispatch using sector position sorted lists.
  */
 static struct request *
-deadline_next_request(struct deadline_data *dd, int data_dir)
+deadline_next_request(struct deadline_data *dd, enum dd_data_dir data_dir)
 {
 	struct request *rq;
 	unsigned long flags;
 
-	if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
-		return NULL;
-
 	rq = dd->next_rq[data_dir];
 	if (!rq)
 		return NULL;
 
-	if (data_dir == READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
 		return rq;
 
 	/*
@@ -276,7 +278,7 @@ deadline_next_request(struct deadline_data *dd, int data_dir)
 static struct request *__dd_dispatch_request(struct deadline_data *dd)
 {
 	struct request *rq, *next_rq;
-	int data_dir;
+	enum dd_data_dir data_dir;
 
 	lockdep_assert_held(&dd->lock);
 
@@ -289,9 +291,9 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	/*
 	 * batches are currently reads XOR writes
 	 */
-	rq = deadline_next_request(dd, WRITE);
+	rq = deadline_next_request(dd, DD_WRITE);
 	if (!rq)
-		rq = deadline_next_request(dd, READ);
+		rq = deadline_next_request(dd, DD_READ);
 
 	if (rq && dd->batching < dd->fifo_batch)
 		/* we have a next request are still entitled to batch */
@@ -302,14 +304,14 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	 * data direction (read / write)
 	 */
 
-	if (!list_empty(&dd->fifo_list[READ])) {
-		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
+	if (!list_empty(&dd->fifo_list[DD_READ])) {
+		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[DD_READ]));
 
-		if (deadline_fifo_request(dd, WRITE) &&
+		if (deadline_fifo_request(dd, DD_WRITE) &&
 		    (dd->starved++ >= dd->writes_starved))
 			goto dispatch_writes;
 
-		data_dir = READ;
+		data_dir = DD_READ;
 
 		goto dispatch_find_request;
 	}
@@ -318,13 +320,13 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	 * there are either no reads or writes have been starved
 	 */
 
-	if (!list_empty(&dd->fifo_list[WRITE])) {
+	if (!list_empty(&dd->fifo_list[DD_WRITE])) {
 dispatch_writes:
-		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
+		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[DD_WRITE]));
 
 		dd->starved = 0;
 
-		data_dir = WRITE;
+		data_dir = DD_WRITE;
 
 		goto dispatch_find_request;
 	}
@@ -399,8 +401,8 @@ static void dd_exit_sched(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
 
-	BUG_ON(!list_empty(&dd->fifo_list[READ]));
-	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
+	BUG_ON(!list_empty(&dd->fifo_list[DD_READ]));
+	BUG_ON(!list_empty(&dd->fifo_list[DD_WRITE]));
 
 	kfree(dd);
 }
@@ -424,12 +426,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	}
 	eq->elevator_data = dd;
 
-	INIT_LIST_HEAD(&dd->fifo_list[READ]);
-	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
-	dd->sort_list[READ] = RB_ROOT;
-	dd->sort_list[WRITE] = RB_ROOT;
-	dd->fifo_expire[READ] = read_expire;
-	dd->fifo_expire[WRITE] = write_expire;
+	INIT_LIST_HEAD(&dd->fifo_list[DD_READ]);
+	INIT_LIST_HEAD(&dd->fifo_list[DD_WRITE]);
+	dd->sort_list[DD_READ] = RB_ROOT;
+	dd->sort_list[DD_WRITE] = RB_ROOT;
+	dd->fifo_expire[DD_READ] = read_expire;
+	dd->fifo_expire[DD_WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
 	dd->front_merges = 1;
 	dd->fifo_batch = fifo_batch;
@@ -497,7 +499,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
 	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
-	const int data_dir = rq_data_dir(rq);
+	const enum dd_data_dir data_dir = rq_data_dir(rq);
 
 	lockdep_assert_held(&dd->lock);
 
@@ -585,7 +587,7 @@ static void dd_finish_request(struct request *rq)
 
 		spin_lock_irqsave(&dd->zone_lock, flags);
 		blk_req_zone_write_unlock(rq);
-		if (!list_empty(&dd->fifo_list[WRITE]))
+		if (!list_empty(&dd->fifo_list[DD_WRITE]))
 			blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
 		spin_unlock_irqrestore(&dd->zone_lock, flags);
 	}
@@ -626,8 +628,8 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 		__data = jiffies_to_msecs(__data);			\
 	return deadline_var_show(__data, (page));			\
 }
-SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
-SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
+SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[DD_READ], 1);
+SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[DD_WRITE], 1);
 SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
 SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
 SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
@@ -649,8 +651,8 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
 		*(__PTR) = __data;					\
 	return count;							\
 }
-STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
-STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
+STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX, 1);
+STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX, 1);
 STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
 STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
 STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
@@ -717,8 +719,8 @@ static int deadline_##name##_next_rq_show(void *data,			\
 		__blk_mq_debugfs_rq_show(m, rq);			\
 	return 0;							\
 }
-DEADLINE_DEBUGFS_DDIR_ATTRS(READ, read)
-DEADLINE_DEBUGFS_DDIR_ATTRS(WRITE, write)
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_READ, read)
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_WRITE, write)
 #undef DEADLINE_DEBUGFS_DDIR_ATTRS
 
 static int deadline_batching_show(void *data, struct seq_file *m)

From d6d7f013d65491eaff477b9bd83b80111f5be9e4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:50 -0700
Subject: [PATCH 2425/3292] block/mq-deadline: Improve the sysfs show and store
 macros

Define separate macros for integers and jiffies to improve readability.
Use sysfs_emit() and kstrtoint() instead of sprintf() and simple_strtol().
The former functions are the recommended functions.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-11-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 64 ++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 36 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 69126beff77d..f92224ff0256 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -605,58 +605,50 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
 /*
  * sysfs parts below
  */
-static ssize_t
-deadline_var_show(int var, char *page)
-{
-	return sprintf(page, "%d\n", var);
-}
-
-static void
-deadline_var_store(int *var, const char *page)
-{
-	char *p = (char *) page;
-
-	*var = simple_strtol(p, &p, 10);
-}
-
-#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
+#define SHOW_INT(__FUNC, __VAR)						\
 static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 {									\
 	struct deadline_data *dd = e->elevator_data;			\
-	int __data = __VAR;						\
-	if (__CONV)							\
-		__data = jiffies_to_msecs(__data);			\
-	return deadline_var_show(__data, (page));			\
+									\
+	return sysfs_emit(page, "%d\n", __VAR);				\
 }
-SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[DD_READ], 1);
-SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[DD_WRITE], 1);
-SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
-SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
-SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
-#undef SHOW_FUNCTION
+#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
+SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
+SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
+SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
+SHOW_INT(deadline_front_merges_show, dd->front_merges);
+SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
+#undef SHOW_INT
+#undef SHOW_JIFFIES
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct deadline_data *dd = e->elevator_data;			\
-	int __data;							\
-	deadline_var_store(&__data, (page));				\
+	int __data, __ret;						\
+									\
+	__ret = kstrtoint(page, 0, &__data);				\
+	if (__ret < 0)							\
+		return __ret;						\
 	if (__data < (MIN))						\
 		__data = (MIN);						\
 	else if (__data > (MAX))					\
 		__data = (MAX);						\
-	if (__CONV)							\
-		*(__PTR) = msecs_to_jiffies(__data);			\
-	else								\
-		*(__PTR) = __data;					\
+	*(__PTR) = __CONV(__data);					\
 	return count;							\
 }
-STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX, 1);
-STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX, 1);
-STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
-STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
-STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
+#define STORE_INT(__FUNC, __PTR, MIN, MAX)				\
+	STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
+#define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX)				\
+	STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
+STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
+STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
+STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
+STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
+STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
 #undef STORE_FUNCTION
+#undef STORE_INT
+#undef STORE_JIFFIES
 
 #define DD_ATTR(name) \
 	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)

From 07757588e5076748308dd95ee2e3cd0b82ebb8c4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:51 -0700
Subject: [PATCH 2426/3292] block/mq-deadline: Reserve 25% of scheduler tags
 for synchronous requests

For interactive workloads it is important that synchronous requests are
not delayed. Hence reserve 25% of scheduler tags for synchronous requests.
This patch still allows asynchronous requests to fill the hardware queues
since blk_mq_init_sched() makes sure that the number of scheduler requests
is the double of the hardware queue depth. From blk_mq_init_sched():

	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
				   BLKDEV_MAX_RQ);

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-12-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 55 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index f92224ff0256..44da481c3fea 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -67,6 +67,7 @@ struct deadline_data {
 	int fifo_batch;
 	int writes_starved;
 	int front_merges;
+	u32 async_depth;
 
 	spinlock_t lock;
 	spinlock_t zone_lock;
@@ -397,6 +398,44 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	return rq;
 }
 
+/*
+ * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
+ * function is used by __blk_mq_get_tag().
+ */
+static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
+{
+	struct deadline_data *dd = data->q->elevator->elevator_data;
+
+	/* Do not throttle synchronous reads. */
+	if (op_is_sync(op) && !op_is_write(op))
+		return;
+
+	/*
+	 * Throttle asynchronous requests and writes such that these requests
+	 * do not block the allocation of synchronous requests.
+	 */
+	data->shallow_depth = dd->async_depth;
+}
+
+/* Called by blk_mq_update_nr_requests(). */
+static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
+{
+	struct request_queue *q = hctx->queue;
+	struct deadline_data *dd = q->elevator->elevator_data;
+	struct blk_mq_tags *tags = hctx->sched_tags;
+
+	dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
+
+	sbitmap_queue_min_shallow_depth(tags->bitmap_tags, dd->async_depth);
+}
+
+/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
+static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+	dd_depth_updated(hctx);
+	return 0;
+}
+
 static void dd_exit_sched(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
@@ -617,6 +656,7 @@ SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
 SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
 SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
 SHOW_INT(deadline_front_merges_show, dd->front_merges);
+SHOW_INT(deadline_async_depth_show, dd->front_merges);
 SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
 #undef SHOW_INT
 #undef SHOW_JIFFIES
@@ -645,6 +685,7 @@ STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX)
 STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
 STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
 STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
+STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
 STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
 #undef STORE_FUNCTION
 #undef STORE_INT
@@ -658,6 +699,7 @@ static struct elv_fs_entry deadline_attrs[] = {
 	DD_ATTR(write_expire),
 	DD_ATTR(writes_starved),
 	DD_ATTR(front_merges),
+	DD_ATTR(async_depth),
 	DD_ATTR(fifo_batch),
 	__ATTR_NULL
 };
@@ -733,6 +775,15 @@ static int deadline_starved_show(void *data, struct seq_file *m)
 	return 0;
 }
 
+static int dd_async_depth_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	seq_printf(m, "%u\n", dd->async_depth);
+	return 0;
+}
+
 static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos)
 	__acquires(&dd->lock)
 {
@@ -775,6 +826,7 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
 	DEADLINE_QUEUE_DDIR_ATTRS(write),
 	{"batching", 0400, deadline_batching_show},
 	{"starved", 0400, deadline_starved_show},
+	{"async_depth", 0400, dd_async_depth_show},
 	{"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
 	{},
 };
@@ -783,6 +835,8 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
 
 static struct elevator_type mq_deadline = {
 	.ops = {
+		.depth_updated		= dd_depth_updated,
+		.limit_depth		= dd_limit_depth,
 		.insert_requests	= dd_insert_requests,
 		.dispatch_request	= dd_dispatch_request,
 		.prepare_request	= dd_prepare_request,
@@ -796,6 +850,7 @@ static struct elevator_type mq_deadline = {
 		.has_work		= dd_has_work,
 		.init_sched		= dd_init_sched,
 		.exit_sched		= dd_exit_sched,
+		.init_hctx		= dd_init_hctx,
 	},
 
 #ifdef CONFIG_BLK_DEBUG_FS

From d672d325b1492f5b0e54b7226f01e2d57b58bfb4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:52 -0700
Subject: [PATCH 2427/3292] block/mq-deadline: Micro-optimize the batching
 algorithm

When dispatching the first request of a batch, the deadline_move_request()
call clears .next_rq[] for the opposite data direction. .next_rq[] is not
restored when changing data direction. Fix this by not clearing .next_rq[]
and by keeping track of the data direction of a batch in a variable instead.

This patch is a micro-optimization because:
- The number of deadline_next_request() calls for the read direction is
  halved.
- The number of times that deadline_next_request() returns NULL is reduced.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-13-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 44da481c3fea..b09ae1f332a2 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -53,6 +53,8 @@ struct deadline_data {
 	struct rb_root sort_list[DD_DIR_COUNT];
 	struct list_head fifo_list[DD_DIR_COUNT];
 
+	/* Data direction of latest dispatched request. */
+	enum dd_data_dir last_dir;
 	/*
 	 * next in sort order. read, write or both are NULL
 	 */
@@ -179,8 +181,6 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
 {
 	const enum dd_data_dir data_dir = rq_data_dir(rq);
 
-	dd->next_rq[DD_READ] = NULL;
-	dd->next_rq[DD_WRITE] = NULL;
 	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
 	/*
@@ -292,10 +292,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	/*
 	 * batches are currently reads XOR writes
 	 */
-	rq = deadline_next_request(dd, DD_WRITE);
-	if (!rq)
-		rq = deadline_next_request(dd, DD_READ);
-
+	rq = deadline_next_request(dd, dd->last_dir);
 	if (rq && dd->batching < dd->fifo_batch)
 		/* we have a next request are still entitled to batch */
 		goto dispatch_request;
@@ -361,6 +358,7 @@ dispatch_find_request:
 	if (!rq)
 		return NULL;
 
+	dd->last_dir = data_dir;
 	dd->batching = 0;
 
 dispatch_request:
@@ -473,6 +471,7 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	dd->fifo_expire[DD_WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
 	dd->front_merges = 1;
+	dd->last_dir = DD_WRITE;
 	dd->fifo_batch = fifo_batch;
 	spin_lock_init(&dd->lock);
 	spin_lock_init(&dd->zone_lock);

From c807ab520fc3fd056c47c74ced63f9d3991a171b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:53 -0700
Subject: [PATCH 2428/3292] block/mq-deadline: Add I/O priority support

Maintain one dispatch list and one FIFO list per I/O priority class: RT, BE
and IDLE. Maintain statistics for each priority level. Split the debugfs
attributes per priority level as follows:

$ ls /sys/kernel/debug/block/.../sched/
async_depth  dispatch2        read_next_rq      write2_fifo_list
batching     read0_fifo_list  starved           write_next_rq
dispatch0    read1_fifo_list  write0_fifo_list
dispatch1    read2_fifo_list  write1_fifo_list

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-14-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 342 +++++++++++++++++++++++++++++---------------
 1 file changed, 228 insertions(+), 114 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index b09ae1f332a2..aba672a5be1e 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -42,23 +42,36 @@ enum dd_data_dir {
 
 enum { DD_DIR_COUNT = 2 };
 
+enum dd_prio {
+	DD_RT_PRIO	= 0,
+	DD_BE_PRIO	= 1,
+	DD_IDLE_PRIO	= 2,
+	DD_PRIO_MAX	= 2,
+};
+
+enum { DD_PRIO_COUNT = 3 };
+
+/*
+ * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
+ * present on both sort_list[] and fifo_list[].
+ */
+struct dd_per_prio {
+	struct list_head dispatch;
+	struct rb_root sort_list[DD_DIR_COUNT];
+	struct list_head fifo_list[DD_DIR_COUNT];
+	/* Next request in FIFO order. Read, write or both are NULL. */
+	struct request *next_rq[DD_DIR_COUNT];
+};
+
 struct deadline_data {
 	/*
 	 * run time data
 	 */
 
-	/*
-	 * requests (deadline_rq s) are present on both sort_list and fifo_list
-	 */
-	struct rb_root sort_list[DD_DIR_COUNT];
-	struct list_head fifo_list[DD_DIR_COUNT];
+	struct dd_per_prio per_prio[DD_PRIO_COUNT];
 
 	/* Data direction of latest dispatched request. */
 	enum dd_data_dir last_dir;
-	/*
-	 * next in sort order. read, write or both are NULL
-	 */
-	struct request *next_rq[DD_DIR_COUNT];
 	unsigned int batching;		/* number of sequential requests made */
 	unsigned int starved;		/* times reads have starved writes */
 
@@ -73,13 +86,29 @@ struct deadline_data {
 
 	spinlock_t lock;
 	spinlock_t zone_lock;
-	struct list_head dispatch;
+};
+
+/* Maps an I/O priority class to a deadline scheduler priority. */
+static const enum dd_prio ioprio_class_to_prio[] = {
+	[IOPRIO_CLASS_NONE]	= DD_BE_PRIO,
+	[IOPRIO_CLASS_RT]	= DD_RT_PRIO,
+	[IOPRIO_CLASS_BE]	= DD_BE_PRIO,
+	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
 };
 
 static inline struct rb_root *
-deadline_rb_root(struct deadline_data *dd, struct request *rq)
+deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
 {
-	return &dd->sort_list[rq_data_dir(rq)];
+	return &per_prio->sort_list[rq_data_dir(rq)];
+}
+
+/*
+ * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
+ * request.
+ */
+static u8 dd_rq_ioclass(struct request *rq)
+{
+	return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
 }
 
 /*
@@ -97,38 +126,38 @@ deadline_latter_request(struct request *rq)
 }
 
 static void
-deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
+deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
 {
-	struct rb_root *root = deadline_rb_root(dd, rq);
+	struct rb_root *root = deadline_rb_root(per_prio, rq);
 
 	elv_rb_add(root, rq);
 }
 
 static inline void
-deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
+deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
 {
 	const enum dd_data_dir data_dir = rq_data_dir(rq);
 
-	if (dd->next_rq[data_dir] == rq)
-		dd->next_rq[data_dir] = deadline_latter_request(rq);
+	if (per_prio->next_rq[data_dir] == rq)
+		per_prio->next_rq[data_dir] = deadline_latter_request(rq);
 
-	elv_rb_del(deadline_rb_root(dd, rq), rq);
+	elv_rb_del(deadline_rb_root(per_prio, rq), rq);
 }
 
 /*
  * remove rq from rbtree and fifo.
  */
-static void deadline_remove_request(struct request_queue *q, struct request *rq)
+static void deadline_remove_request(struct request_queue *q,
+				    struct dd_per_prio *per_prio,
+				    struct request *rq)
 {
-	struct deadline_data *dd = q->elevator->elevator_data;
-
 	list_del_init(&rq->queuelist);
 
 	/*
 	 * We might not be on the rbtree, if we are doing an insert merge
 	 */
 	if (!RB_EMPTY_NODE(&rq->rb_node))
-		deadline_del_rq_rb(dd, rq);
+		deadline_del_rq_rb(per_prio, rq);
 
 	elv_rqhash_del(q, rq);
 	if (q->last_merge == rq)
@@ -139,13 +168,16 @@ static void dd_request_merged(struct request_queue *q, struct request *req,
 			      enum elv_merge type)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
+	const u8 ioprio_class = dd_rq_ioclass(req);
+	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 */
 	if (type == ELEVATOR_FRONT_MERGE) {
-		elv_rb_del(deadline_rb_root(dd, req), req);
-		deadline_add_rq_rb(dd, req);
+		elv_rb_del(deadline_rb_root(per_prio, req), req);
+		deadline_add_rq_rb(per_prio, req);
 	}
 }
 
@@ -155,6 +187,9 @@ static void dd_request_merged(struct request_queue *q, struct request *req,
 static void dd_merged_requests(struct request_queue *q, struct request *req,
 			       struct request *next)
 {
+	const u8 ioprio_class = dd_rq_ioclass(next);
+	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+
 	/*
 	 * if next expires before rq, assign its expire time to rq
 	 * and move into next position (next will be deleted) in fifo
@@ -170,33 +205,34 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
 	/*
 	 * kill knowledge of next, this one is a goner
 	 */
-	deadline_remove_request(q, next);
+	deadline_remove_request(q, &dd->per_prio[prio], next);
 }
 
 /*
  * move an entry to dispatch queue
  */
 static void
-deadline_move_request(struct deadline_data *dd, struct request *rq)
+deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+		      struct request *rq)
 {
 	const enum dd_data_dir data_dir = rq_data_dir(rq);
 
-	dd->next_rq[data_dir] = deadline_latter_request(rq);
+	per_prio->next_rq[data_dir] = deadline_latter_request(rq);
 
 	/*
 	 * take it off the sort and fifo list
 	 */
-	deadline_remove_request(rq->q, rq);
+	deadline_remove_request(rq->q, per_prio, rq);
 }
 
 /*
  * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  */
-static inline int deadline_check_fifo(struct deadline_data *dd,
+static inline int deadline_check_fifo(struct dd_per_prio *per_prio,
 				      enum dd_data_dir data_dir)
 {
-	struct request *rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
+	struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
 
 	/*
 	 * rq is expired!
@@ -212,15 +248,16 @@ static inline int deadline_check_fifo(struct deadline_data *dd,
  * dispatch using arrival ordered lists.
  */
 static struct request *
-deadline_fifo_request(struct deadline_data *dd, enum dd_data_dir data_dir)
+deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+		      enum dd_data_dir data_dir)
 {
 	struct request *rq;
 	unsigned long flags;
 
-	if (list_empty(&dd->fifo_list[data_dir]))
+	if (list_empty(&per_prio->fifo_list[data_dir]))
 		return NULL;
 
-	rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
+	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
 	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
 		return rq;
 
@@ -229,7 +266,7 @@ deadline_fifo_request(struct deadline_data *dd, enum dd_data_dir data_dir)
 	 * an unlocked target zone.
 	 */
 	spin_lock_irqsave(&dd->zone_lock, flags);
-	list_for_each_entry(rq, &dd->fifo_list[DD_WRITE], queuelist) {
+	list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
 		if (blk_req_can_dispatch_to_zone(rq))
 			goto out;
 	}
@@ -245,12 +282,13 @@ out:
  * dispatch using sector position sorted lists.
  */
 static struct request *
-deadline_next_request(struct deadline_data *dd, enum dd_data_dir data_dir)
+deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+		      enum dd_data_dir data_dir)
 {
 	struct request *rq;
 	unsigned long flags;
 
-	rq = dd->next_rq[data_dir];
+	rq = per_prio->next_rq[data_dir];
 	if (!rq)
 		return NULL;
 
@@ -276,15 +314,17 @@ deadline_next_request(struct deadline_data *dd, enum dd_data_dir data_dir)
  * deadline_dispatch_requests selects the best request according to
  * read/write expire, fifo_batch, etc
  */
-static struct request *__dd_dispatch_request(struct deadline_data *dd)
+static struct request *__dd_dispatch_request(struct deadline_data *dd,
+					     struct dd_per_prio *per_prio)
 {
 	struct request *rq, *next_rq;
 	enum dd_data_dir data_dir;
 
 	lockdep_assert_held(&dd->lock);
 
-	if (!list_empty(&dd->dispatch)) {
-		rq = list_first_entry(&dd->dispatch, struct request, queuelist);
+	if (!list_empty(&per_prio->dispatch)) {
+		rq = list_first_entry(&per_prio->dispatch, struct request,
+				      queuelist);
 		list_del_init(&rq->queuelist);
 		goto done;
 	}
@@ -292,7 +332,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	/*
 	 * batches are currently reads XOR writes
 	 */
-	rq = deadline_next_request(dd, dd->last_dir);
+	rq = deadline_next_request(dd, per_prio, dd->last_dir);
 	if (rq && dd->batching < dd->fifo_batch)
 		/* we have a next request are still entitled to batch */
 		goto dispatch_request;
@@ -302,10 +342,10 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	 * data direction (read / write)
 	 */
 
-	if (!list_empty(&dd->fifo_list[DD_READ])) {
-		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[DD_READ]));
+	if (!list_empty(&per_prio->fifo_list[DD_READ])) {
+		BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
 
-		if (deadline_fifo_request(dd, DD_WRITE) &&
+		if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
 		    (dd->starved++ >= dd->writes_starved))
 			goto dispatch_writes;
 
@@ -318,9 +358,9 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd)
 	 * there are either no reads or writes have been starved
 	 */
 
-	if (!list_empty(&dd->fifo_list[DD_WRITE])) {
+	if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
 dispatch_writes:
-		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[DD_WRITE]));
+		BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
 
 		dd->starved = 0;
 
@@ -335,14 +375,14 @@ dispatch_find_request:
 	/*
 	 * we are not running a batch, find best request for selected data_dir
 	 */
-	next_rq = deadline_next_request(dd, data_dir);
-	if (deadline_check_fifo(dd, data_dir) || !next_rq) {
+	next_rq = deadline_next_request(dd, per_prio, data_dir);
+	if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
 		/*
 		 * A deadline has expired, the last request was in the other
 		 * direction, or we have run out of higher-sectored requests.
 		 * Start again from the request with the earliest expiry time.
 		 */
-		rq = deadline_fifo_request(dd, data_dir);
+		rq = deadline_fifo_request(dd, per_prio, data_dir);
 	} else {
 		/*
 		 * The last req was the same dir and we have a next request in
@@ -366,7 +406,7 @@ dispatch_request:
 	 * rq is the selected appropriate request.
 	 */
 	dd->batching++;
-	deadline_move_request(dd, rq);
+	deadline_move_request(dd, per_prio, rq);
 done:
 	/*
 	 * If the request needs its target zone locked, do it.
@@ -388,9 +428,14 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 {
 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
 	struct request *rq;
+	enum dd_prio prio;
 
 	spin_lock(&dd->lock);
-	rq = __dd_dispatch_request(dd);
+	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
+		rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
+		if (rq)
+			break;
+	}
 	spin_unlock(&dd->lock);
 
 	return rq;
@@ -437,9 +482,14 @@ static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 static void dd_exit_sched(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
+	enum dd_prio prio;
 
-	BUG_ON(!list_empty(&dd->fifo_list[DD_READ]));
-	BUG_ON(!list_empty(&dd->fifo_list[DD_WRITE]));
+	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
+		struct dd_per_prio *per_prio = &dd->per_prio[prio];
+
+		WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
+		WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
+	}
 
 	kfree(dd);
 }
@@ -451,22 +501,28 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 {
 	struct deadline_data *dd;
 	struct elevator_queue *eq;
+	enum dd_prio prio;
+	int ret = -ENOMEM;
 
 	eq = elevator_alloc(q, e);
 	if (!eq)
-		return -ENOMEM;
+		return ret;
 
 	dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
-	if (!dd) {
-		kobject_put(&eq->kobj);
-		return -ENOMEM;
-	}
+	if (!dd)
+		goto put_eq;
+
 	eq->elevator_data = dd;
 
-	INIT_LIST_HEAD(&dd->fifo_list[DD_READ]);
-	INIT_LIST_HEAD(&dd->fifo_list[DD_WRITE]);
-	dd->sort_list[DD_READ] = RB_ROOT;
-	dd->sort_list[DD_WRITE] = RB_ROOT;
+	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
+		struct dd_per_prio *per_prio = &dd->per_prio[prio];
+
+		INIT_LIST_HEAD(&per_prio->dispatch);
+		INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
+		INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
+		per_prio->sort_list[DD_READ] = RB_ROOT;
+		per_prio->sort_list[DD_WRITE] = RB_ROOT;
+	}
 	dd->fifo_expire[DD_READ] = read_expire;
 	dd->fifo_expire[DD_WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
@@ -475,10 +531,13 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	dd->fifo_batch = fifo_batch;
 	spin_lock_init(&dd->lock);
 	spin_lock_init(&dd->zone_lock);
-	INIT_LIST_HEAD(&dd->dispatch);
 
 	q->elevator = eq;
 	return 0;
+
+put_eq:
+	kobject_put(&eq->kobj);
+	return ret;
 }
 
 /*
@@ -489,13 +548,16 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
 			    struct bio *bio)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
+	const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
+	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];
 	sector_t sector = bio_end_sector(bio);
 	struct request *__rq;
 
 	if (!dd->front_merges)
 		return ELEVATOR_NO_MERGE;
 
-	__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
+	__rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
 	if (__rq) {
 		BUG_ON(sector != blk_rq_pos(__rq));
 
@@ -538,6 +600,10 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const enum dd_data_dir data_dir = rq_data_dir(rq);
+	u16 ioprio = req_get_ioprio(rq);
+	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
+	struct dd_per_prio *per_prio;
+	enum dd_prio prio;
 
 	lockdep_assert_held(&dd->lock);
 
@@ -547,15 +613,18 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	 */
 	blk_req_zone_write_unlock(rq);
 
+	prio = ioprio_class_to_prio[ioprio_class];
+
 	if (blk_mq_sched_try_insert_merge(q, rq))
 		return;
 
 	trace_block_rq_insert(rq);
 
+	per_prio = &dd->per_prio[prio];
 	if (at_head) {
-		list_add(&rq->queuelist, &dd->dispatch);
+		list_add(&rq->queuelist, &per_prio->dispatch);
 	} else {
-		deadline_add_rq_rb(dd, rq);
+		deadline_add_rq_rb(per_prio, rq);
 
 		if (rq_mergeable(rq)) {
 			elv_rqhash_add(q, rq);
@@ -567,7 +636,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 		 * set expire time and add to fifo list
 		 */
 		rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
-		list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
+		list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
 	}
 }
 
@@ -618,26 +687,39 @@ static void dd_prepare_request(struct request *rq)
 static void dd_finish_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
+	struct deadline_data *dd = q->elevator->elevator_data;
+	const u8 ioprio_class = dd_rq_ioclass(rq);
+	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
 	if (blk_queue_is_zoned(q)) {
-		struct deadline_data *dd = q->elevator->elevator_data;
 		unsigned long flags;
 
 		spin_lock_irqsave(&dd->zone_lock, flags);
 		blk_req_zone_write_unlock(rq);
-		if (!list_empty(&dd->fifo_list[DD_WRITE]))
+		if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
 			blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
 		spin_unlock_irqrestore(&dd->zone_lock, flags);
 	}
 }
 
+static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
+{
+	return !list_empty_careful(&per_prio->dispatch) ||
+		!list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
+		!list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
+}
+
 static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
 {
 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+	enum dd_prio prio;
 
-	return !list_empty_careful(&dd->dispatch) ||
-		!list_empty_careful(&dd->fifo_list[0]) ||
-		!list_empty_careful(&dd->fifo_list[1]);
+	for (prio = 0; prio <= DD_PRIO_MAX; prio++)
+		if (dd_has_work_for_prio(&dd->per_prio[prio]))
+			return true;
+
+	return false;
 }
 
 /*
@@ -704,16 +786,17 @@ static struct elv_fs_entry deadline_attrs[] = {
 };
 
 #ifdef CONFIG_BLK_DEBUG_FS
-#define DEADLINE_DEBUGFS_DDIR_ATTRS(ddir, name)				\
+#define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name)		\
 static void *deadline_##name##_fifo_start(struct seq_file *m,		\
 					  loff_t *pos)			\
 	__acquires(&dd->lock)						\
 {									\
 	struct request_queue *q = m->private;				\
 	struct deadline_data *dd = q->elevator->elevator_data;		\
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
 									\
 	spin_lock(&dd->lock);						\
-	return seq_list_start(&dd->fifo_list[ddir], *pos);		\
+	return seq_list_start(&per_prio->fifo_list[data_dir], *pos);	\
 }									\
 									\
 static void *deadline_##name##_fifo_next(struct seq_file *m, void *v,	\
@@ -721,8 +804,9 @@ static void *deadline_##name##_fifo_next(struct seq_file *m, void *v,	\
 {									\
 	struct request_queue *q = m->private;				\
 	struct deadline_data *dd = q->elevator->elevator_data;		\
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
 									\
-	return seq_list_next(v, &dd->fifo_list[ddir], pos);		\
+	return seq_list_next(v, &per_prio->fifo_list[data_dir], pos);	\
 }									\
 									\
 static void deadline_##name##_fifo_stop(struct seq_file *m, void *v)	\
@@ -746,14 +830,20 @@ static int deadline_##name##_next_rq_show(void *data,			\
 {									\
 	struct request_queue *q = data;					\
 	struct deadline_data *dd = q->elevator->elevator_data;		\
-	struct request *rq = dd->next_rq[ddir];				\
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
+	struct request *rq = per_prio->next_rq[data_dir];		\
 									\
 	if (rq)								\
 		__blk_mq_debugfs_rq_show(m, rq);			\
 	return 0;							\
 }
-DEADLINE_DEBUGFS_DDIR_ATTRS(DD_READ, read)
-DEADLINE_DEBUGFS_DDIR_ATTRS(DD_WRITE, write)
+
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
+DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
 #undef DEADLINE_DEBUGFS_DDIR_ATTRS
 
 static int deadline_batching_show(void *data, struct seq_file *m)
@@ -783,50 +873,74 @@ static int dd_async_depth_show(void *data, struct seq_file *m)
 	return 0;
 }
 
-static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos)
-	__acquires(&dd->lock)
-{
-	struct request_queue *q = m->private;
-	struct deadline_data *dd = q->elevator->elevator_data;
-
-	spin_lock(&dd->lock);
-	return seq_list_start(&dd->dispatch, *pos);
+#define DEADLINE_DISPATCH_ATTR(prio)					\
+static void *deadline_dispatch##prio##_start(struct seq_file *m,	\
+					     loff_t *pos)		\
+	__acquires(&dd->lock)						\
+{									\
+	struct request_queue *q = m->private;				\
+	struct deadline_data *dd = q->elevator->elevator_data;		\
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
+									\
+	spin_lock(&dd->lock);						\
+	return seq_list_start(&per_prio->dispatch, *pos);		\
+}									\
+									\
+static void *deadline_dispatch##prio##_next(struct seq_file *m,		\
+					    void *v, loff_t *pos)	\
+{									\
+	struct request_queue *q = m->private;				\
+	struct deadline_data *dd = q->elevator->elevator_data;		\
+	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
+									\
+	return seq_list_next(v, &per_prio->dispatch, pos);		\
+}									\
+									\
+static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v)	\
+	__releases(&dd->lock)						\
+{									\
+	struct request_queue *q = m->private;				\
+	struct deadline_data *dd = q->elevator->elevator_data;		\
+									\
+	spin_unlock(&dd->lock);						\
+}									\
+									\
+static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \
+	.start	= deadline_dispatch##prio##_start,			\
+	.next	= deadline_dispatch##prio##_next,			\
+	.stop	= deadline_dispatch##prio##_stop,			\
+	.show	= blk_mq_debugfs_rq_show,				\
 }
 
-static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct request_queue *q = m->private;
-	struct deadline_data *dd = q->elevator->elevator_data;
+DEADLINE_DISPATCH_ATTR(0);
+DEADLINE_DISPATCH_ATTR(1);
+DEADLINE_DISPATCH_ATTR(2);
+#undef DEADLINE_DISPATCH_ATTR
 
-	return seq_list_next(v, &dd->dispatch, pos);
-}
-
-static void deadline_dispatch_stop(struct seq_file *m, void *v)
-	__releases(&dd->lock)
-{
-	struct request_queue *q = m->private;
-	struct deadline_data *dd = q->elevator->elevator_data;
-
-	spin_unlock(&dd->lock);
-}
-
-static const struct seq_operations deadline_dispatch_seq_ops = {
-	.start	= deadline_dispatch_start,
-	.next	= deadline_dispatch_next,
-	.stop	= deadline_dispatch_stop,
-	.show	= blk_mq_debugfs_rq_show,
-};
-
-#define DEADLINE_QUEUE_DDIR_ATTRS(name)						\
-	{#name "_fifo_list", 0400, .seq_ops = &deadline_##name##_fifo_seq_ops},	\
+#define DEADLINE_QUEUE_DDIR_ATTRS(name)					\
+	{#name "_fifo_list", 0400,					\
+			.seq_ops = &deadline_##name##_fifo_seq_ops}
+#define DEADLINE_NEXT_RQ_ATTR(name)					\
 	{#name "_next_rq", 0400, deadline_##name##_next_rq_show}
 static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
-	DEADLINE_QUEUE_DDIR_ATTRS(read),
-	DEADLINE_QUEUE_DDIR_ATTRS(write),
+	DEADLINE_QUEUE_DDIR_ATTRS(read0),
+	DEADLINE_QUEUE_DDIR_ATTRS(write0),
+	DEADLINE_QUEUE_DDIR_ATTRS(read1),
+	DEADLINE_QUEUE_DDIR_ATTRS(write1),
+	DEADLINE_QUEUE_DDIR_ATTRS(read2),
+	DEADLINE_QUEUE_DDIR_ATTRS(write2),
+	DEADLINE_NEXT_RQ_ATTR(read0),
+	DEADLINE_NEXT_RQ_ATTR(write0),
+	DEADLINE_NEXT_RQ_ATTR(read1),
+	DEADLINE_NEXT_RQ_ATTR(write1),
+	DEADLINE_NEXT_RQ_ATTR(read2),
+	DEADLINE_NEXT_RQ_ATTR(write2),
 	{"batching", 0400, deadline_batching_show},
 	{"starved", 0400, deadline_starved_show},
 	{"async_depth", 0400, dd_async_depth_show},
-	{"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
+	{"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops},
+	{"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops},
+	{"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops},
 	{},
 };
 #undef DEADLINE_QUEUE_DDIR_ATTRS
@@ -876,6 +990,6 @@ static void __exit deadline_exit(void)
 module_init(deadline_init);
 module_exit(deadline_exit);
 
-MODULE_AUTHOR("Jens Axboe");
+MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MQ deadline IO scheduler");

From 38ba64d12d4cf9fa260c45d7398e2a24afaceefa Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:54 -0700
Subject: [PATCH 2429/3292] block/mq-deadline: Track I/O statistics

Track I/O statistics per I/O priority and export these statistics to
debugfs. These statistics help developers of the deadline scheduler.

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-15-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 100 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index aba672a5be1e..04d9d6b3745b 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -51,6 +51,19 @@ enum dd_prio {
 
 enum { DD_PRIO_COUNT = 3 };
 
+/* I/O statistics per I/O priority. */
+struct io_stats_per_prio {
+	local_t inserted;
+	local_t merged;
+	local_t dispatched;
+	local_t completed;
+};
+
+/* I/O statistics for all I/O priorities (enum dd_prio). */
+struct io_stats {
+	struct io_stats_per_prio stats[DD_PRIO_COUNT];
+};
+
 /*
  * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
  * present on both sort_list[] and fifo_list[].
@@ -75,6 +88,8 @@ struct deadline_data {
 	unsigned int batching;		/* number of sequential requests made */
 	unsigned int starved;		/* times reads have starved writes */
 
+	struct io_stats __percpu *stats;
+
 	/*
 	 * settings that change how the i/o scheduler behaves
 	 */
@@ -88,6 +103,33 @@ struct deadline_data {
 	spinlock_t zone_lock;
 };
 
+/* Count one event of type 'event_type' and with I/O priority 'prio' */
+#define dd_count(dd, event_type, prio) do {				\
+	struct io_stats *io_stats = get_cpu_ptr((dd)->stats);		\
+									\
+	BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));	\
+	BUILD_BUG_ON(!__same_type((prio), enum dd_prio));		\
+	local_inc(&io_stats->stats[(prio)].event_type);			\
+	put_cpu_ptr(io_stats);						\
+} while (0)
+
+/*
+ * Returns the total number of dd_count(dd, event_type, prio) calls across all
+ * CPUs. No locking or barriers since it is fine if the returned sum is slightly
+ * outdated.
+ */
+#define dd_sum(dd, event_type, prio) ({					\
+	unsigned int cpu;						\
+	u32 sum = 0;							\
+									\
+	BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));	\
+	BUILD_BUG_ON(!__same_type((prio), enum dd_prio));		\
+	for_each_present_cpu(cpu)					\
+		sum += local_read(&per_cpu_ptr((dd)->stats, cpu)->	\
+				  stats[(prio)].event_type);		\
+	sum;								\
+})
+
 /* Maps an I/O priority class to a deadline scheduler priority. */
 static const enum dd_prio ioprio_class_to_prio[] = {
 	[IOPRIO_CLASS_NONE]	= DD_BE_PRIO,
@@ -187,9 +229,12 @@ static void dd_request_merged(struct request_queue *q, struct request *req,
 static void dd_merged_requests(struct request_queue *q, struct request *req,
 			       struct request *next)
 {
+	struct deadline_data *dd = q->elevator->elevator_data;
 	const u8 ioprio_class = dd_rq_ioclass(next);
 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
 
+	dd_count(dd, merged, prio);
+
 	/*
 	 * if next expires before rq, assign its expire time to rq
 	 * and move into next position (next will be deleted) in fifo
@@ -225,6 +270,12 @@ deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 	deadline_remove_request(rq->q, per_prio, rq);
 }
 
+/* Number of requests queued for a given priority level. */
+static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
+{
+	return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
+}
+
 /*
  * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
@@ -319,6 +370,8 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 {
 	struct request *rq, *next_rq;
 	enum dd_data_dir data_dir;
+	enum dd_prio prio;
+	u8 ioprio_class;
 
 	lockdep_assert_held(&dd->lock);
 
@@ -408,6 +461,9 @@ dispatch_request:
 	dd->batching++;
 	deadline_move_request(dd, per_prio, rq);
 done:
+	ioprio_class = dd_rq_ioclass(rq);
+	prio = ioprio_class_to_prio[ioprio_class];
+	dd_count(dd, dispatched, prio);
 	/*
 	 * If the request needs its target zone locked, do it.
 	 */
@@ -491,6 +547,8 @@ static void dd_exit_sched(struct elevator_queue *e)
 		WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
 	}
 
+	free_percpu(dd->stats);
+
 	kfree(dd);
 }
 
@@ -514,6 +572,11 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 
 	eq->elevator_data = dd;
 
+	dd->stats = alloc_percpu_gfp(typeof(*dd->stats),
+				     GFP_KERNEL | __GFP_ZERO);
+	if (!dd->stats)
+		goto free_dd;
+
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
@@ -535,6 +598,9 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	q->elevator = eq;
 	return 0;
 
+free_dd:
+	kfree(dd);
+
 put_eq:
 	kobject_put(&eq->kobj);
 	return ret;
@@ -614,6 +680,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	blk_req_zone_write_unlock(rq);
 
 	prio = ioprio_class_to_prio[ioprio_class];
+	dd_count(dd, inserted, prio);
 
 	if (blk_mq_sched_try_insert_merge(q, rq))
 		return;
@@ -692,6 +759,8 @@ static void dd_finish_request(struct request *rq)
 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
 	struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
+	dd_count(dd, completed, prio);
+
 	if (blk_queue_is_zoned(q)) {
 		unsigned long flags;
 
@@ -873,6 +942,35 @@ static int dd_async_depth_show(void *data, struct seq_file *m)
 	return 0;
 }
 
+static int dd_queued_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO),
+		   dd_queued(dd, DD_BE_PRIO),
+		   dd_queued(dd, DD_IDLE_PRIO));
+	return 0;
+}
+
+/* Number of requests owned by the block driver for a given priority. */
+static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
+{
+	return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio)
+		- dd_sum(dd, completed, prio);
+}
+
+static int dd_owned_by_driver_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO),
+		   dd_owned_by_driver(dd, DD_BE_PRIO),
+		   dd_owned_by_driver(dd, DD_IDLE_PRIO));
+	return 0;
+}
+
 #define DEADLINE_DISPATCH_ATTR(prio)					\
 static void *deadline_dispatch##prio##_start(struct seq_file *m,	\
 					     loff_t *pos)		\
@@ -941,6 +1039,8 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
 	{"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops},
 	{"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops},
 	{"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops},
+	{"owned_by_driver", 0400, dd_owned_by_driver_show},
+	{"queued", 0400, dd_queued_show},
 	{},
 };
 #undef DEADLINE_QUEUE_DDIR_ATTRS

From 08a9ad8bf607388d768a341957d53eae64250c2d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:55 -0700
Subject: [PATCH 2430/3292] block/mq-deadline: Add cgroup support

Maintain statistics per cgroup and export these to user space. These
statistics are essential for verifying whether the proper I/O priorities
have been assigned to requests. An example of the statistics data with
this patch applied:

$ cat /sys/fs/cgroup/io.stat
11:2 rbytes=0 wbytes=0 rios=3 wios=0 dbytes=0 dios=0 [NONE] dispatched=0 inserted=0 merged=171 [RT] dispatched=0 inserted=0 merged=0 [BE] dispatched=0 inserted=0 merged=0 [IDLE] dispatched=0 inserted=0 merged=0
8:32 rbytes=2142720 wbytes=0 rios=105 wios=0 dbytes=0 dios=0 [NONE] dispatched=0 inserted=0 merged=171 [RT] dispatched=0 inserted=0 merged=0 [BE] dispatched=0 inserted=0 merged=0 [IDLE] dispatched=0 inserted=0 merged=0

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-16-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig.iosched                       |   6 +
 block/Makefile                              |   2 +
 block/mq-deadline-cgroup.c                  | 126 ++++++++++++++++++++
 block/mq-deadline-cgroup.h                  | 114 ++++++++++++++++++
 block/{mq-deadline.c => mq-deadline-main.c} |  74 +++++++++---
 5 files changed, 308 insertions(+), 14 deletions(-)
 create mode 100644 block/mq-deadline-cgroup.c
 create mode 100644 block/mq-deadline-cgroup.h
 rename block/{mq-deadline.c => mq-deadline-main.c} (95%)

diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 2f2158e05a91..64053d67a97b 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -9,6 +9,12 @@ config MQ_IOSCHED_DEADLINE
 	help
 	  MQ version of the deadline IO scheduler.
 
+config MQ_IOSCHED_DEADLINE_CGROUP
+       tristate
+       default y
+       depends on MQ_IOSCHED_DEADLINE
+       depends on BLK_CGROUP
+
 config MQ_IOSCHED_KYBER
 	tristate "Kyber I/O scheduler"
 	default y
diff --git a/block/Makefile b/block/Makefile
index af3d044abaf1..b9db5d4edfc8 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -21,6 +21,8 @@ obj-$(CONFIG_BLK_CGROUP_IOPRIO)	+= blk-ioprio.o
 obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 obj-$(CONFIG_BLK_CGROUP_IOCOST)	+= blk-iocost.o
 obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
+mq-deadline-y += mq-deadline-main.o
+mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o
 obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
 obj-$(CONFIG_IOSCHED_BFQ)	+= bfq.o
diff --git a/block/mq-deadline-cgroup.c b/block/mq-deadline-cgroup.c
new file mode 100644
index 000000000000..3b4bfddec39f
--- /dev/null
+++ b/block/mq-deadline-cgroup.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/blk-cgroup.h>
+#include <linux/ioprio.h>
+
+#include "mq-deadline-cgroup.h"
+
+static struct blkcg_policy dd_blkcg_policy;
+
+static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp)
+{
+	struct dd_blkcg *pd;
+
+	pd = kzalloc(sizeof(*pd), gfp);
+	if (!pd)
+		return NULL;
+	pd->stats = alloc_percpu_gfp(typeof(*pd->stats),
+				     GFP_KERNEL | __GFP_ZERO);
+	if (!pd->stats) {
+		kfree(pd);
+		return NULL;
+	}
+	return &pd->cpd;
+}
+
+static void dd_cpd_free(struct blkcg_policy_data *cpd)
+{
+	struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd);
+
+	free_percpu(dd_blkcg->stats);
+	kfree(dd_blkcg);
+}
+
+static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd)
+{
+	return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy),
+			    struct dd_blkcg, cpd);
+}
+
+/*
+ * Convert an association between a block cgroup and a request queue into a
+ * pointer to the mq-deadline information associated with a (blkcg, queue) pair.
+ */
+struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
+{
+	struct blkg_policy_data *pd;
+
+	pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy);
+	if (!pd)
+		return NULL;
+
+	return dd_blkcg_from_pd(pd);
+}
+
+static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
+{
+	static const char *const prio_class_name[] = {
+		[IOPRIO_CLASS_NONE]	= "NONE",
+		[IOPRIO_CLASS_RT]	= "RT",
+		[IOPRIO_CLASS_BE]	= "BE",
+		[IOPRIO_CLASS_IDLE]	= "IDLE",
+	};
+	struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd);
+	int res = 0;
+	u8 prio;
+
+	for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++)
+		res += scnprintf(buf + res, size - res,
+			" [%s] dispatched=%u inserted=%u merged=%u",
+			prio_class_name[prio],
+			ddcg_sum(blkcg, dispatched, prio) +
+			ddcg_sum(blkcg, merged, prio) -
+			ddcg_sum(blkcg, completed, prio),
+			ddcg_sum(blkcg, inserted, prio) -
+			ddcg_sum(blkcg, completed, prio),
+			ddcg_sum(blkcg, merged, prio));
+
+	return res;
+}
+
+static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q,
+					    struct blkcg *blkcg)
+{
+	struct dd_blkg *pd;
+
+	pd = kzalloc(sizeof(*pd), gfp);
+	if (!pd)
+		return NULL;
+	return &pd->pd;
+}
+
+static void dd_pd_free(struct blkg_policy_data *pd)
+{
+	struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd);
+
+	kfree(dd_blkg);
+}
+
+static struct blkcg_policy dd_blkcg_policy = {
+	.cpd_alloc_fn		= dd_cpd_alloc,
+	.cpd_free_fn		= dd_cpd_free,
+
+	.pd_alloc_fn		= dd_pd_alloc,
+	.pd_free_fn		= dd_pd_free,
+	.pd_stat_fn		= dd_pd_stat,
+};
+
+int dd_activate_policy(struct request_queue *q)
+{
+	return blkcg_activate_policy(q, &dd_blkcg_policy);
+}
+
+void dd_deactivate_policy(struct request_queue *q)
+{
+	blkcg_deactivate_policy(q, &dd_blkcg_policy);
+}
+
+int __init dd_blkcg_init(void)
+{
+	return blkcg_policy_register(&dd_blkcg_policy);
+}
+
+void __exit dd_blkcg_exit(void)
+{
+	blkcg_policy_unregister(&dd_blkcg_policy);
+}
diff --git a/block/mq-deadline-cgroup.h b/block/mq-deadline-cgroup.h
new file mode 100644
index 000000000000..0143fd74f3ce
--- /dev/null
+++ b/block/mq-deadline-cgroup.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if !defined(_MQ_DEADLINE_CGROUP_H_)
+#define _MQ_DEADLINE_CGROUP_H_
+
+#include <linux/blk-cgroup.h>
+
+struct request_queue;
+
+/**
+ * struct io_stats_per_prio - I/O statistics per I/O priority class.
+ * @inserted: Number of inserted requests.
+ * @merged: Number of merged requests.
+ * @dispatched: Number of dispatched requests.
+ * @completed: Number of I/O completions.
+ */
+struct io_stats_per_prio {
+	local_t inserted;
+	local_t merged;
+	local_t dispatched;
+	local_t completed;
+};
+
+/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */
+struct blkcg_io_stats {
+	struct io_stats_per_prio stats[4];
+};
+
+/**
+ * struct dd_blkcg - Per cgroup data.
+ * @cpd: blkcg_policy_data structure.
+ * @stats: I/O statistics.
+ */
+struct dd_blkcg {
+	struct blkcg_policy_data cpd;	/* must be the first member */
+	struct blkcg_io_stats __percpu *stats;
+};
+
+/*
+ * Count one event of type 'event_type' and with I/O priority class
+ * 'prio_class'.
+ */
+#define ddcg_count(ddcg, event_type, prio_class) do {			\
+if (ddcg) {								\
+	struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats);	\
+									\
+	BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *));		\
+	BUILD_BUG_ON(!__same_type((prio_class), u8));			\
+	local_inc(&io_stats->stats[(prio_class)].event_type);		\
+	put_cpu_ptr(io_stats);						\
+}									\
+} while (0)
+
+/*
+ * Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls
+ * across all CPUs. No locking or barriers since it is fine if the returned
+ * sum is slightly outdated.
+ */
+#define ddcg_sum(ddcg, event_type, prio) ({				\
+	unsigned int cpu;						\
+	u32 sum = 0;							\
+									\
+	BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *));		\
+	BUILD_BUG_ON(!__same_type((prio), u8));				\
+	for_each_present_cpu(cpu)					\
+		sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)->	\
+				  stats[(prio)].event_type);		\
+	sum;								\
+})
+
+#ifdef CONFIG_BLK_CGROUP
+
+/**
+ * struct dd_blkg - Per (cgroup, request queue) data.
+ * @pd: blkg_policy_data structure.
+ */
+struct dd_blkg {
+	struct blkg_policy_data pd;	/* must be the first member */
+};
+
+struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio);
+int dd_activate_policy(struct request_queue *q);
+void dd_deactivate_policy(struct request_queue *q);
+int __init dd_blkcg_init(void);
+void __exit dd_blkcg_exit(void);
+
+#else /* CONFIG_BLK_CGROUP */
+
+static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio)
+{
+	return NULL;
+}
+
+static inline int dd_activate_policy(struct request_queue *q)
+{
+	return 0;
+}
+
+static inline void dd_deactivate_policy(struct request_queue *q)
+{
+}
+
+static inline int dd_blkcg_init(void)
+{
+	return 0;
+}
+
+static inline void dd_blkcg_exit(void)
+{
+}
+
+#endif /* CONFIG_BLK_CGROUP */
+
+#endif /* _MQ_DEADLINE_CGROUP_H_ */
diff --git a/block/mq-deadline.c b/block/mq-deadline-main.c
similarity index 95%
rename from block/mq-deadline.c
rename to block/mq-deadline-main.c
index 04d9d6b3745b..58a401ea8f56 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline-main.c
@@ -25,6 +25,7 @@
 #include "blk-mq-debugfs.h"
 #include "blk-mq-tag.h"
 #include "blk-mq-sched.h"
+#include "mq-deadline-cgroup.h"
 
 /*
  * See Documentation/block/deadline-iosched.rst
@@ -51,14 +52,6 @@ enum dd_prio {
 
 enum { DD_PRIO_COUNT = 3 };
 
-/* I/O statistics per I/O priority. */
-struct io_stats_per_prio {
-	local_t inserted;
-	local_t merged;
-	local_t dispatched;
-	local_t completed;
-};
-
 /* I/O statistics for all I/O priorities (enum dd_prio). */
 struct io_stats {
 	struct io_stats_per_prio stats[DD_PRIO_COUNT];
@@ -81,6 +74,9 @@ struct deadline_data {
 	 * run time data
 	 */
 
+	/* Request queue that owns this data structure. */
+	struct request_queue *queue;
+
 	struct dd_per_prio per_prio[DD_PRIO_COUNT];
 
 	/* Data direction of latest dispatched request. */
@@ -232,8 +228,10 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
 	struct deadline_data *dd = q->elevator->elevator_data;
 	const u8 ioprio_class = dd_rq_ioclass(next);
 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+	struct dd_blkcg *blkcg = next->elv.priv[0];
 
 	dd_count(dd, merged, prio);
+	ddcg_count(blkcg, merged, ioprio_class);
 
 	/*
 	 * if next expires before rq, assign its expire time to rq
@@ -370,6 +368,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 {
 	struct request *rq, *next_rq;
 	enum dd_data_dir data_dir;
+	struct dd_blkcg *blkcg;
 	enum dd_prio prio;
 	u8 ioprio_class;
 
@@ -464,6 +463,8 @@ done:
 	ioprio_class = dd_rq_ioclass(rq);
 	prio = ioprio_class_to_prio[ioprio_class];
 	dd_count(dd, dispatched, prio);
+	blkcg = rq->elv.priv[0];
+	ddcg_count(blkcg, dispatched, ioprio_class);
 	/*
 	 * If the request needs its target zone locked, do it.
 	 */
@@ -540,6 +541,8 @@ static void dd_exit_sched(struct elevator_queue *e)
 	struct deadline_data *dd = e->elevator_data;
 	enum dd_prio prio;
 
+	dd_deactivate_policy(dd->queue);
+
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
@@ -553,7 +556,7 @@ static void dd_exit_sched(struct elevator_queue *e)
 }
 
 /*
- * initialize elevator private data (deadline_data).
+ * Initialize elevator private data (deadline_data) and associate with blkcg.
  */
 static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 {
@@ -562,6 +565,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	enum dd_prio prio;
 	int ret = -ENOMEM;
 
+	/*
+	 * Initialization would be very tricky if the queue is not frozen,
+	 * hence the warning statement below.
+	 */
+	WARN_ON_ONCE(!percpu_ref_is_zero(&q->q_usage_counter));
+
 	eq = elevator_alloc(q, e);
 	if (!eq)
 		return ret;
@@ -577,6 +586,8 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	if (!dd->stats)
 		goto free_dd;
 
+	dd->queue = q;
+
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
@@ -595,9 +606,17 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	spin_lock_init(&dd->lock);
 	spin_lock_init(&dd->zone_lock);
 
+	ret = dd_activate_policy(q);
+	if (ret)
+		goto free_stats;
+
+	ret = 0;
 	q->elevator = eq;
 	return 0;
 
+free_stats:
+	free_percpu(dd->stats);
+
 free_dd:
 	kfree(dd);
 
@@ -670,6 +689,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
 	struct dd_per_prio *per_prio;
 	enum dd_prio prio;
+	struct dd_blkcg *blkcg;
 
 	lockdep_assert_held(&dd->lock);
 
@@ -679,8 +699,19 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	 */
 	blk_req_zone_write_unlock(rq);
 
+	/*
+	 * If a block cgroup has been associated with the submitter and if an
+	 * I/O priority has been set in the associated block cgroup, use the
+	 * lowest of the cgroup priority and the request priority for the
+	 * request. If no priority has been set in the request, use the cgroup
+	 * priority.
+	 */
 	prio = ioprio_class_to_prio[ioprio_class];
 	dd_count(dd, inserted, prio);
+	blkcg = dd_blkcg_from_bio(rq->bio);
+	ddcg_count(blkcg, inserted, ioprio_class);
+	WARN_ON_ONCE(rq->elv.priv[0]);
+	rq->elv.priv[0] = blkcg;
 
 	if (blk_mq_sched_try_insert_merge(q, rq))
 		return;
@@ -727,12 +758,10 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
 	spin_unlock(&dd->lock);
 }
 
-/*
- * Nothing to do here. This is defined only to ensure that .finish_request
- * method is called upon request completion.
- */
+/* Callback from inside blk_mq_rq_ctx_init(). */
 static void dd_prepare_request(struct request *rq)
 {
+	rq->elv.priv[0] = NULL;
 }
 
 /*
@@ -755,11 +784,13 @@ static void dd_finish_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	struct deadline_data *dd = q->elevator->elevator_data;
+	struct dd_blkcg *blkcg = rq->elv.priv[0];
 	const u8 ioprio_class = dd_rq_ioclass(rq);
 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
 	struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
 	dd_count(dd, completed, prio);
+	ddcg_count(blkcg, completed, ioprio_class);
 
 	if (blk_queue_is_zoned(q)) {
 		unsigned long flags;
@@ -1079,11 +1110,26 @@ MODULE_ALIAS("mq-deadline-iosched");
 
 static int __init deadline_init(void)
 {
-	return elv_register(&mq_deadline);
+	int ret;
+
+	ret = elv_register(&mq_deadline);
+	if (ret)
+		goto out;
+	ret = dd_blkcg_init();
+	if (ret)
+		goto unreg;
+
+out:
+	return ret;
+
+unreg:
+	elv_unregister(&mq_deadline);
+	goto out;
 }
 
 static void __exit deadline_exit(void)
 {
+	dd_blkcg_exit();
 	elv_unregister(&mq_deadline);
 }
 

From fb926032b3209300f9dc454a36b8299582ae545c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Jun 2021 17:44:56 -0700
Subject: [PATCH 2431/3292] block/mq-deadline: Prioritize high-priority
 requests

While one or more requests with a certain I/O priority are pending, do not
dispatch lower priority requests. Dispatch lower priority requests anyway
after the "aging" time has expired.

This patch has been tested as follows:

modprobe scsi_debug ndelay=1000000 max_queue=16 &&
sd='' &&
while [ -z "$sd" ]; do
  sd=/dev/$(basename /sys/bus/pseudo/drivers/scsi_debug/adapter*/host*/target*/*/block/*)
done &&
echo $((100*1000)) > /sys/block/$sd/queue/iosched/aging_expire &&
cd /sys/fs/cgroup/blkio/ &&
echo $$ >cgroup.procs &&
echo restrict-to-be >blkio.prio.class &&
mkdir -p hipri &&
cd hipri &&
echo none-to-rt >blkio.prio.class &&
{ max-iops -a1 -d32 -j1 -e mq-deadline $sd >& ~/low-pri.txt & } &&
echo $$ >cgroup.procs &&
max-iops -a1 -d32 -j1 -e mq-deadline $sd >& ~/hi-pri.txt

Result:
* 11000 IOPS for the high-priority job
*    40 IOPS for the low-priority job

If the aging expiry time is changed from 100s into 0, the IOPS results change
into 6712 and 6796 IOPS.

The max-iops script is a script that runs fio with the following arguments:
--bs=4K --gtod_reduce=1 --ioengine=libaio --ioscheduler=${arg_e} --runtime=60
--norandommap --rw=read --thread --buffered=0 --numjobs=${arg_j}
--iodepth=${arg_d} --iodepth_batch_submit=${arg_a}
--iodepth_batch_complete=$((arg_d / 2)) --name=${positional_argument_1}
--filename=${positional_argument_1}

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210618004456.7280-17-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline-main.c | 42 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/block/mq-deadline-main.c b/block/mq-deadline-main.c
index 58a401ea8f56..4815e536091f 100644
--- a/block/mq-deadline-main.c
+++ b/block/mq-deadline-main.c
@@ -32,6 +32,11 @@
  */
 static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
 static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
+/*
+ * Time after which to dispatch lower priority requests even if higher
+ * priority requests are pending.
+ */
+static const int aging_expire = 10 * HZ;
 static const int writes_starved = 2;    /* max times reads can starve a write */
 static const int fifo_batch = 16;       /* # of sequential requests treated as one
 				     by the above parameters. For throughput. */
@@ -94,6 +99,7 @@ struct deadline_data {
 	int writes_starved;
 	int front_merges;
 	u32 async_depth;
+	int aging_expire;
 
 	spinlock_t lock;
 	spinlock_t zone_lock;
@@ -361,10 +367,11 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 
 /*
  * deadline_dispatch_requests selects the best request according to
- * read/write expire, fifo_batch, etc
+ * read/write expire, fifo_batch, etc and with a start time <= @latest.
  */
 static struct request *__dd_dispatch_request(struct deadline_data *dd,
-					     struct dd_per_prio *per_prio)
+					     struct dd_per_prio *per_prio,
+					     u64 latest_start_ns)
 {
 	struct request *rq, *next_rq;
 	enum dd_data_dir data_dir;
@@ -377,6 +384,8 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	if (!list_empty(&per_prio->dispatch)) {
 		rq = list_first_entry(&per_prio->dispatch, struct request,
 				      queuelist);
+		if (rq->start_time_ns > latest_start_ns)
+			return NULL;
 		list_del_init(&rq->queuelist);
 		goto done;
 	}
@@ -454,6 +463,8 @@ dispatch_find_request:
 	dd->batching = 0;
 
 dispatch_request:
+	if (rq->start_time_ns > latest_start_ns)
+		return NULL;
 	/*
 	 * rq is the selected appropriate request.
 	 */
@@ -484,15 +495,32 @@ done:
 static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 {
 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
-	struct request *rq;
+	const u64 now_ns = ktime_get_ns();
+	struct request *rq = NULL;
 	enum dd_prio prio;
 
 	spin_lock(&dd->lock);
-	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
-		rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
+	/*
+	 * Start with dispatching requests whose deadline expired more than
+	 * aging_expire jiffies ago.
+	 */
+	for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
+		rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns -
+					   jiffies_to_nsecs(dd->aging_expire));
 		if (rq)
+			goto unlock;
+	}
+	/*
+	 * Next, dispatch requests in priority order. Ignore lower priority
+	 * requests if any higher priority requests are pending.
+	 */
+	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
+		rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns);
+		if (rq || dd_queued(dd, prio))
 			break;
 	}
+
+unlock:
 	spin_unlock(&dd->lock);
 
 	return rq;
@@ -603,6 +631,7 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	dd->front_merges = 1;
 	dd->last_dir = DD_WRITE;
 	dd->fifo_batch = fifo_batch;
+	dd->aging_expire = aging_expire;
 	spin_lock_init(&dd->lock);
 	spin_lock_init(&dd->zone_lock);
 
@@ -835,6 +864,7 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 #define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
 SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
 SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
+SHOW_JIFFIES(deadline_aging_expire_show, dd->aging_expire);
 SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
 SHOW_INT(deadline_front_merges_show, dd->front_merges);
 SHOW_INT(deadline_async_depth_show, dd->front_merges);
@@ -864,6 +894,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
 	STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
 STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
 STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
+STORE_JIFFIES(deadline_aging_expire_store, &dd->aging_expire, 0, INT_MAX);
 STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
 STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
 STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
@@ -882,6 +913,7 @@ static struct elv_fs_entry deadline_attrs[] = {
 	DD_ATTR(front_merges),
 	DD_ATTR(async_depth),
 	DD_ATTR(fifo_batch),
+	DD_ATTR(aging_expire),
 	__ATTR_NULL
 };
 

From 1d0903d61e9645c6330b94247b96dd873dfc11c8 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Sat, 19 Jun 2021 17:36:59 +0800
Subject: [PATCH 2432/3292] blk-wbt: introduce a new disable state to prevent
 false positive by rwb_enabled()

Now that we disable wbt by simply zero out rwb->wb_normal in
wbt_disable_default() when switch elevator to bfq, but it's not safe
because it will become false positive if we change queue depth. If it
become false positive between wbt_wait() and wbt_track() when submit
write request, it will lead to drop rqw->inflight to -1 in wbt_done(),
which will end up trigger IO hung. Fix this issue by introduce a new
state which mean the wbt was disabled.

Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://lore.kernel.org/r/20210619093700.920393-2-yi.zhang@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-wbt.c | 5 +++--
 block/blk-wbt.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index b363b0532704..0ce0883df3d6 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -77,7 +77,8 @@ enum {
 
 static inline bool rwb_enabled(struct rq_wb *rwb)
 {
-	return rwb && rwb->wb_normal != 0;
+	return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
+		      rwb->wb_normal != 0;
 }
 
 static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
@@ -701,7 +702,7 @@ void wbt_disable_default(struct request_queue *q)
 	rwb = RQWB(rqos);
 	if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
 		blk_stat_deactivate(rwb->cb);
-		rwb->wb_normal = 0;
+		rwb->enable_state = WBT_STATE_OFF_DEFAULT;
 	}
 }
 EXPORT_SYMBOL_GPL(wbt_disable_default);
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 16bdc85b8df9..2eb01becde8c 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -34,6 +34,7 @@ enum {
 enum {
 	WBT_STATE_ON_DEFAULT	= 1,
 	WBT_STATE_ON_MANUAL	= 2,
+	WBT_STATE_OFF_DEFAULT
 };
 
 struct rq_wb {

From 76a8040817b4b9c69b53f9b326987fa891b4082a Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Sat, 19 Jun 2021 17:37:00 +0800
Subject: [PATCH 2433/3292] blk-wbt: make sure throttle is enabled properly

After commit a79050434b45 ("blk-rq-qos: refactor out common elements of
blk-wbt"), if throttle was disabled by wbt_disable_default(), we could
not enable again, fix this by set enable_state back to
WBT_STATE_ON_DEFAULT.

Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://lore.kernel.org/r/20210619093700.920393-3-yi.zhang@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-wbt.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0ce0883df3d6..3ed71b8da887 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -636,9 +636,13 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
 void wbt_enable_default(struct request_queue *q)
 {
 	struct rq_qos *rqos = wbt_rq_qos(q);
+
 	/* Throttling already enabled? */
-	if (rqos)
+	if (rqos) {
+		if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
 		return;
+	}
 
 	/* Queue not registered? Maybe shutting down... */
 	if (!blk_queue_registered(q))

From 511a2699237611b062df7798476bf3a1392910b9 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 19 Jun 2021 16:09:42 +0200
Subject: [PATCH 2434/3292] block, bfq: let also stably merged queues enjoy
 weight raising

Merged bfq_queues are kept out of weight-raising (low-latency)
mechanisms. The reason is that these queues are usually created for
non-interactive and non-soft-real-time tasks. Yet this is not the case
for stably-merged queues. These queues are merged just because they
are created shortly after each other. So they may easily serve the I/O
of an interactive or soft-real time application, if the application
happens to spawn multiple processes.

To address this issue, this commits lets also stably-merged queued
enjoy weight raising.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210619140948.98712-2-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index acd1f881273e..da2363f12e53 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1729,10 +1729,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
 		bfqq->entity.new_weight == 40;
 	*interactive = !in_burst && idle_for_long_time &&
 		bfqq->entity.new_weight == 40;
+	/*
+	 * Merged bfq_queues are kept out of weight-raising
+	 * (low-latency) mechanisms. The reason is that these queues
+	 * are usually created for non-interactive and
+	 * non-soft-real-time tasks. Yet this is not the case for
+	 * stably-merged queues. These queues are merged just because
+	 * they are created shortly after each other. So they may
+	 * easily serve the I/O of an interactive or soft-real time
+	 * application, if the application happens to spawn multiple
+	 * processes. So let also stably-merged queued enjoy weight
+	 * raising.
+	 */
 	wr_or_deserves_wr = bfqd->low_latency &&
 		(bfqq->wr_coeff > 1 ||
 		 (bfq_bfqq_sync(bfqq) &&
-		  bfqq->bic && (*interactive || soft_rt)));
+		  (bfqq->bic || RQ_BIC(rq)->stably_merged) &&
+		   (*interactive || soft_rt)));
 
 	/*
 	 * Using the last flag, update budget and check whether bfqq

From e03f2ab78a4a673e4af23c3b855591c48b9de4d7 Mon Sep 17 00:00:00 2001
From: Luca Mariotti <mariottiluca1@hotmail.it>
Date: Sat, 19 Jun 2021 16:09:43 +0200
Subject: [PATCH 2435/3292] block, bfq: fix delayed stable merge check

When attempting to schedule a merge of a given bfq_queue with the currently
in-service bfq_queue or with a cooperating bfq_queue among the scheduled
bfq_queues, delayed stable merge is checked for rotational or non-queueing
devs. For this stable merge to be performed, some conditions must be met.
If the current bfq_queue underwent some split from some merged bfq_queue,
one of these conditions is that two hundred milliseconds must elapse from
split, otherwise this condition is always met.

Unfortunately, by mistake, time_is_after_jiffies() was written instead of
time_is_before_jiffies() for this check, verifying that less than two
hundred milliseconds have elapsed instead of verifying that at least two
hundred milliseconds have elapsed.

Fix this issue by replacing time_is_after_jiffies() with
time_is_before_jiffies().

Signed-off-by: Luca Mariotti <mariottiluca1@hotmail.it>
Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Signed-off-by: Pietro Pedroni <pedroni.pietro.96@gmail.com>
Link: https://lore.kernel.org/r/20210619140948.98712-3-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index da2363f12e53..c5c0e74977d4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2710,7 +2710,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	if (unlikely(!bfqd->nonrot_with_queueing)) {
 		if (bic->stable_merge_bfqq &&
 		    !bfq_bfqq_just_created(bfqq) &&
-		    time_is_after_jiffies(bfqq->split_time +
+		    time_is_before_jiffies(bfqq->split_time +
 					  msecs_to_jiffies(200))) {
 			struct bfq_queue *stable_merge_bfqq =
 				bic->stable_merge_bfqq;

From d4f49983fa3944416c28379c35fbe10c68455ea4 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 19 Jun 2021 16:09:44 +0200
Subject: [PATCH 2436/3292] block, bfq: consider also creation time in delayed
 stable merge

Since commit 430a67f9d616 ("block, bfq: merge bursts of newly-created
queues"), BFQ may schedule a merge between a newly created sync
bfq_queue and the last sync bfq_queue created. Such a merging is not
performed immediately, because BFQ needs first to find out whether the
newly created queue actually reaches a higher throughput if not merged
at all (and in that case BFQ will not perform any stable merging). To
check that, a little time must be waited after the creation of the new
queue, so that some I/O can flow in the queue, and statistics on such
I/O can be computed.

Yet, to evaluate the above waiting time, the last split time is
considered as start time, instead of the creation time of the
queue. This is a mistake, because considering the split time is
correct only in the following scenario.

The queue undergoes a non-stable merges on the arrival of its very
first I/O request, due to close I/O with some other queue. While the
queue is merged for close I/O, stable merging is not considered. Yet
the queue may then happen to be split, if the close I/O finishes (or
happens to be a false positive). From this time on, the queue can
again be considered for stable merging. But, again, a little time must
elapse, to let some new I/O flow in the queue and to get updated
statistics. To wait for this time, the split time is to be taken into
account.

Yet, if the queue does not undergo a non-stable merge on the arrival
of its very first request, then BFQ immediately checks whether the
stable merge is to be performed. It happens because the split time for
a queue is initialized to minus infinity when the queue is created.

This commit fixes this mistake by adding the missing condition. Now
the check for delayed stable-merge is performed after a little time is
elapsed not only from the last queue split time, but also from the
creation time of the queue.

Fixes: 430a67f9d616 ("block, bfq: merge bursts of newly-created queues")
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210619140948.98712-4-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c5c0e74977d4..2a5c1a660f3b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2711,7 +2711,9 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		if (bic->stable_merge_bfqq &&
 		    !bfq_bfqq_just_created(bfqq) &&
 		    time_is_before_jiffies(bfqq->split_time +
-					  msecs_to_jiffies(200))) {
+					  msecs_to_jiffies(200)) &&
+		    time_is_before_jiffies(bfqq->creation_time +
+					   msecs_to_jiffies(200))) {
 			struct bfq_queue *stable_merge_bfqq =
 				bic->stable_merge_bfqq;
 			int proc_ref = min(bfqq_process_refs(bfqq),

From 7812472f973047a886e4ed9a91d98d6627dd746f Mon Sep 17 00:00:00 2001
From: Pietro Pedroni <pedroni.pietro.96@gmail.com>
Date: Sat, 19 Jun 2021 16:09:45 +0200
Subject: [PATCH 2437/3292] block, bfq: boost throughput by extending
 queue-merging times

One of the methods with which bfq boosts throughput is by merging queues.
One of the merging variants in bfq is the stable merge.
This mechanism is activated between two queues only if they are created
within a certain maximum time T1 from each other.
Merging can happen soon or be delayed. In the second case, before
merging, bfq needs to evaluate a throughput-boost parameter that
indicates whether the queue generates a high throughput is served alone.
Merging occurs when this throughput-boost is not high enough.
In particular, this parameter is evaluated and late merging may occur
only after at least a time T2 from the creation of the queue.

Currently T1 and T2 are set to 180ms and 200ms, respectively.
In this way the merging mechanism rarely occurs because time is not
enough. This results in a noticeable lowering of the overall throughput
with some workloads (see the example below).

This commit introduces two constants bfq_activation_stable_merging and
bfq_late_stable_merging in order to increase the duration of T1 and T2.
Both the stable merging activation time and the late merging
time are set to 600ms. This value has been experimentally evaluated
using sqlite benchmark in the Phoronix Test Suite on a HDD.
The duration of the benchmark before this fix was 111.02s, while now
it has reached 97.02s, a better result than that of all the other
schedulers.

Signed-off-by: Pietro Pedroni <pedroni.pietro.96@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210619140948.98712-5-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 2a5c1a660f3b..98a42ddb1760 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -364,6 +364,16 @@ static int ref_wr_duration[2];
  */
 static const unsigned long max_service_from_wr = 120000;
 
+/*
+ * Maximum time between the creation of two queues, for stable merge
+ * to be activated (in ms)
+ */
+static const unsigned long bfq_activation_stable_merging = 600;
+/*
+ * Minimum time to be waited before evaluating delayed stable merge (in ms)
+ */
+static const unsigned long bfq_late_stable_merging = 600;
+
 #define RQ_BIC(rq)		icq_to_bic((rq)->elv.priv[0])
 #define RQ_BFQQ(rq)		((rq)->elv.priv[1])
 
@@ -2711,9 +2721,9 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 		if (bic->stable_merge_bfqq &&
 		    !bfq_bfqq_just_created(bfqq) &&
 		    time_is_before_jiffies(bfqq->split_time +
-					  msecs_to_jiffies(200)) &&
+					  msecs_to_jiffies(bfq_late_stable_merging)) &&
 		    time_is_before_jiffies(bfqq->creation_time +
-					   msecs_to_jiffies(200))) {
+					   msecs_to_jiffies(bfq_late_stable_merging))) {
 			struct bfq_queue *stable_merge_bfqq =
 				bic->stable_merge_bfqq;
 			int proc_ref = min(bfqq_process_refs(bfqq),
@@ -5494,7 +5504,7 @@ static struct bfq_queue *bfq_do_or_sched_stable_merge(struct bfq_data *bfqd,
 	 */
 	if (!last_bfqq_created ||
 	    time_before(last_bfqq_created->creation_time +
-			bfqd->bfq_burst_interval,
+			msecs_to_jiffies(bfq_activation_stable_merging),
 			bfqq->creation_time) ||
 		bfqq->entity.parent != last_bfqq_created->entity.parent ||
 		bfqq->ioprio != last_bfqq_created->ioprio ||

From bd3664b362381c4c1473753ebedf0ab242a60d1d Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 19 Jun 2021 16:09:46 +0200
Subject: [PATCH 2438/3292] block, bfq: avoid delayed merge of async queues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 430a67f9d616 ("block, bfq: merge bursts of newly-created
queues"), BFQ may schedule a merge between a newly created sync
bfq_queue, say Q2, and the last sync bfq_queue created, say Q1. To this
goal, BFQ stores the address of Q1 in the field bic->stable_merge_bfqq
of the bic associated with Q2. So, when the time for the possible merge
arrives, BFQ knows which bfq_queue to merge Q2 with. In particular,
BFQ checks for possible merges on request arrivals.

Yet the same bic may also be associated with an async bfq_queue, say
Q3. So, if a request for Q3 arrives, then the above check may happen
to be executed while the bfq_queue at hand is Q3, instead of Q2. In
this case, Q1 happens to be merged with an async bfq_queue. This is
not only a conceptual mistake, because async queues are to be kept out
of queue merging, but also a bug that leads to inconsistent states.

This commits simply filters async queues out of delayed merges.

Fixes: 430a67f9d616 ("block, bfq: merge bursts of newly-created queues")
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210619140948.98712-6-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 98a42ddb1760..7bf073ef9443 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2718,7 +2718,13 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	 * costly and complicated.
 	 */
 	if (unlikely(!bfqd->nonrot_with_queueing)) {
-		if (bic->stable_merge_bfqq &&
+		/*
+		 * Make sure also that bfqq is sync, because
+		 * bic->stable_merge_bfqq may point to some queue (for
+		 * stable merging) also if bic is associated with a
+		 * sync queue, but this bfqq is async
+		 */
+		if (bfq_bfqq_sync(bfqq) && bic->stable_merge_bfqq &&
 		    !bfq_bfqq_just_created(bfqq) &&
 		    time_is_before_jiffies(bfqq->split_time +
 					  msecs_to_jiffies(bfq_late_stable_merging)) &&

From efc72524b3a9e4e7bc7c07f756528736409ec1b7 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 19 Jun 2021 16:09:47 +0200
Subject: [PATCH 2439/3292] block, bfq: check waker only for queues with no
 in-flight I/O

Consider two bfq_queues, say Q1 and Q2, with Q2 empty. If a request of
Q1 gets completed shortly before a new request arrives for Q2, then
BFQ flags Q1 as a candidate waker for Q2. Yet, the arrival of this new
request may have a different cause, in the following case. If also Q2
has requests in flight while waiting for the arrival of a new request,
then the completion of its own requests may be the actual cause of the
awakening of the process that sends I/O to Q2. So Q1 may be flagged
wrongly as a candidate waker.

This commit avoids this deceptive flagging, by disabling
candidate-waker flagging for Q2, if Q2 has in-flight I/O.

Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210619140948.98712-7-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7bf073ef9443..a273b2bcea2a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1985,14 +1985,18 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
  * Turning back to the detection of a waker queue, a queue Q is deemed
  * as a waker queue for bfqq if, for three consecutive times, bfqq
  * happens to become non empty right after a request of Q has been
- * completed. In particular, on the first time, Q is tentatively set
- * as a candidate waker queue, while on the third consecutive time
- * that Q is detected, the field waker_bfqq is set to Q, to confirm
- * that Q is a waker queue for bfqq. These detection steps are
- * performed only if bfqq has a long think time, so as to make it more
- * likely that bfqq's I/O is actually being blocked by a
- * synchronization. This last filter, plus the above three-times
- * requirement, make false positives less likely.
+ * completed. In this respect, even if bfqq is empty, we do not check
+ * for a waker if it still has some in-flight I/O. In fact, in this
+ * case bfqq is actually still being served by the drive, and may
+ * receive new I/O on the completion of some of the in-flight
+ * requests. In particular, on the first time, Q is tentatively set as
+ * a candidate waker queue, while on the third consecutive time that Q
+ * is detected, the field waker_bfqq is set to Q, to confirm that Q is
+ * a waker queue for bfqq. These detection steps are performed only if
+ * bfqq has a long think time, so as to make it more likely that
+ * bfqq's I/O is actually being blocked by a synchronization. This
+ * last filter, plus the above three-times requirement, make false
+ * positives less likely.
  *
  * NOTE
  *
@@ -2018,6 +2022,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 	if (!bfqd->last_completed_rq_bfqq ||
 	    bfqd->last_completed_rq_bfqq == bfqq ||
 	    bfq_bfqq_has_short_ttime(bfqq) ||
+	    bfqq->dispatched > 0 ||
 	    now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
 	    bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
 		return;

From 9a2ac41b13c573703d6689f51f3e27dd658324be Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Sat, 19 Jun 2021 16:09:48 +0200
Subject: [PATCH 2440/3292] block, bfq: reset waker pointer with shared queues

Commit 85686d0dc194 ("block, bfq: keep shared queues out of the waker
mechanism") leaves shared bfq_queues out of the waker-detection
mechanism. It attains this goal by not updating the pointer
last_completed_rq_bfqq, if the last request completed belongs to a
shared bfq_queue (so that the pointer will not point to the shared
bfq_queue).

Yet this has a side effect: the pointer last_completed_rq_bfqq keeps
pointing, deceptively, to a bfq_queue that actually is not the last
one to have had a request completed. As a consequence, such a
bfq_queue may deceptively be considered as a waker of some bfq_queue,
even of some shared bfq_queue.

To address this issue, reset last_completed_rq_bfqq if the last
request completed belongs to a shared queue.

Fixes: 85686d0dc194 ("block, bfq: keep shared queues out of the waker mechanism")
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210619140948.98712-8-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index a273b2bcea2a..fedb0a8fd388 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6165,11 +6165,13 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
 	 * of other queues. But a false waker will unjustly steal
 	 * bandwidth to its supposedly woken queue. So considering
 	 * also shared queues in the waking mechanism may cause more
-	 * control troubles than throughput benefits. Then do not set
-	 * last_completed_rq_bfqq to bfqq if bfqq is a shared queue.
+	 * control troubles than throughput benefits. Then reset
+	 * last_completed_rq_bfqq if bfqq is a shared queue.
 	 */
 	if (!bfq_bfqq_coop(bfqq))
 		bfqd->last_completed_rq_bfqq = bfqq;
+	else
+		bfqd->last_completed_rq_bfqq = NULL;
 
 	/*
 	 * If we are waiting to discover whether the request pattern

From ddcc5c544eb0991501761622b651cf43ce660a22 Mon Sep 17 00:00:00 2001
From: Thomas Bracht Laumann Jespersen <t@laumann.xyz>
Date: Sat, 19 Jun 2021 21:51:31 +0200
Subject: [PATCH 2441/3292] block/partitions/msdos: Fix typo inidicator ->
 indicator

Just a fix for a small typo in msdos_partition().

Signed-off-by: Thomas Bracht Laumann Jespersen <t@laumann.xyz>
Link: https://lore.kernel.org/r/20210619195130.19348-1-t@laumann.xyz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/msdos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 8f2fcc080264..63e4f6f8b6e9 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -622,7 +622,7 @@ int msdos_partition(struct parsed_partitions *state)
 	for (slot = 1; slot <= 4; slot++, p++) {
 		if (p->boot_ind != 0 && p->boot_ind != 0x80) {
 			/*
-			 * Even without a valid boot inidicator value
+			 * Even without a valid boot indicator value
 			 * its still possible this is valid FAT filesystem
 			 * without a partition table.
 			 */

From 1502328f17ab0684ca5ed6764433aa0a83bdaf95 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 18 Jun 2021 15:02:20 -0700
Subject: [PATCH 2442/3292] mptcp: fix bad handling of 32 bit ack wrap-around

When receiving 32 bits DSS ack from the peer, the MPTCP need
to expand them to 64 bits value. The current code is buggy
WRT detecting 32 bits ack wrap-around: when the wrap-around
happens the current unsigned 32 bit ack value is lower than
the previous one.

Additionally check for possible reverse wrap and make the helper
visible, so that we could re-use it for the next patch.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/204
Fixes: cc9d25669866 ("mptcp: update per unacked sequence on pkt reception")
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 25 +++++++++++++------------
 net/mptcp/protocol.h |  8 ++++++++
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9b263f27ce9b..b87e46f515fb 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -896,19 +896,20 @@ reset:
 	return false;
 }
 
-static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
+u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
 {
-	u32 old_ack32, cur_ack32;
+	u32 old_seq32, cur_seq32;
 
-	if (use_64bit)
-		return cur_ack;
+	old_seq32 = (u32)old_seq;
+	cur_seq32 = (u32)cur_seq;
+	cur_seq = (old_seq & GENMASK_ULL(63, 32)) + cur_seq32;
+	if (unlikely(cur_seq32 < old_seq32 && before(old_seq32, cur_seq32)))
+		return cur_seq + (1LL << 32);
 
-	old_ack32 = (u32)old_ack;
-	cur_ack32 = (u32)cur_ack;
-	cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
-	if (unlikely(before(cur_ack32, old_ack32)))
-		return cur_ack + (1LL << 32);
-	return cur_ack;
+	/* reverse wrap could happen, too */
+	if (unlikely(cur_seq32 > old_seq32 && after(old_seq32, cur_seq32)))
+		return cur_seq - (1LL << 32);
+	return cur_seq;
 }
 
 static void ack_update_msk(struct mptcp_sock *msk,
@@ -926,7 +927,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
 	 * more dangerous than missing an ack
 	 */
 	old_snd_una = msk->snd_una;
-	new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
+	new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
 
 	/* ACK for data not even sent yet? Ignore. */
 	if (after64(new_snd_una, snd_nxt))
@@ -963,7 +964,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us
 		return false;
 
 	WRITE_ONCE(msk->rcv_data_fin_seq,
-		   expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
+		   mptcp_expand_seq(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
 	WRITE_ONCE(msk->rcv_data_fin, 1);
 
 	return true;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 385796f0ef19..5d7c44028e47 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -593,6 +593,14 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
 int mptcp_getsockopt(struct sock *sk, int level, int optname,
 		     char __user *optval, int __user *option);
 
+u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq);
+static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
+{
+	if (use_64bit)
+		return cur_seq;
+
+	return __mptcp_expand_seq(old_seq, cur_seq);
+}
 void __mptcp_check_push(struct sock *sk, struct sock *ssk);
 void __mptcp_data_acked(struct sock *sk);
 void __mptcp_error_report(struct sock *sk);

From 5957a8901db44c03540505ccedd95031c21ef2f2 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 18 Jun 2021 15:02:21 -0700
Subject: [PATCH 2443/3292] mptcp: fix 32 bit DSN expansion

The current implementation of 32 bit DSN expansion is buggy.
After the previous patch, we can simply reuse the newly
introduced helper to do the expansion safely.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/120
Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index be1de4084196..037fba41e170 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -775,15 +775,6 @@ enum mapping_status {
 	MAPPING_DUMMY
 };
 
-static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
-{
-	if ((u32)seq == (u32)old_seq)
-		return old_seq;
-
-	/* Assume map covers data not mapped yet. */
-	return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
-}
-
 static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
 {
 	pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
@@ -907,13 +898,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 		data_len--;
 	}
 
-	if (!mpext->dsn64) {
-		map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
-				     mpext->data_seq);
-		pr_debug("expanded seq=%llu", subflow->map_seq);
-	} else {
-		map_seq = mpext->data_seq;
-	}
+	map_seq = mptcp_expand_seq(READ_ONCE(msk->ack_seq), mpext->data_seq, mpext->dsn64);
 	WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
 
 	if (subflow->map_valid) {

From 471ff4455d61c9929ae912328859921708e1eafc Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Mon, 21 Jun 2021 14:27:36 +0800
Subject: [PATCH 2444/3292] net: fec: add FEC_QUIRK_HAS_MULTI_QUEUES represents
 i.MX6SX ENET IP

Frieder Schrempf reported a TX throuthput issue [1], it happens quite often
that the measured bandwidth in TX direction drops from its expected/nominal
value to something like ~50% (for 100M) or ~67% (for 1G) connections.

[1] https://lore.kernel.org/linux-arm-kernel/421cc86c-b66f-b372-32f7-21e59f9a98bc@kontron.de/

The issue becomes clear after digging into it, Net core would select
queues when transmitting packets. Since FEC have not impletemented
ndo_select_queue callback yet, so it will call netdev_pick_tx to select
queues randomly.

For i.MX6SX ENET IP with AVB support, driver default enables this
feature. According to the setting of QOS/RCMRn/DMAnCFG registers, AVB
configured to Credit-based scheme, 50% bandwidth of each queue 1&2.

With below tests let me think more:
1) With FEC_QUIRK_HAS_AVB quirk, can reproduce TX bandwidth fluctuations issue.
2) Without FEC_QUIRK_HAS_AVB quirk, can't reproduce TX bandwidth fluctuations issue.

The related difference with or w/o FEC_QUIRK_HAS_AVB quirk is that, whether we
program FTYPE field of TxBD or not. As I describe above, AVB feature is
enabled by default. With FEC_QUIRK_HAS_AVB quirk, frames in queue 0
marked as non-AVB, and frames in queue 1&2 marked as AVB Class A&B. It's
unreasonable if frames in queue 1&2 are not required to be time-sensitive.
So when Net core select tx queues ramdomly, Credit-based scheme would work
and lead to TX bandwidth fluctuated. On the other hand, w/o
FEC_QUIRK_HAS_AVB quirk, frames in queue 1&2 are all marked as non-AVB, so
Credit-based scheme would not work.

Till now, how can we fix this TX throughput issue? Yes, please remove
FEC_QUIRK_HAS_AVB quirk if you suffer it from time-nonsensitive networking.
However, this quirk is used to indicate i.MX6SX, other setting depends
on it. So this patch adds a new quirk FEC_QUIRK_HAS_MULTI_QUEUES to
represent i.MX6SX, it is safe for us remove FEC_QUIRK_HAS_AVB quirk
now.

FEC_QUIRK_HAS_AVB quirk is set by default in the driver, and users may
not know much about driver details, they would waste effort to find the
root cause, that is not we want. The following patch is a implementation
to fix it and users don't need to modify the driver.

Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Reported-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.h      |  5 +++++
 drivers/net/ethernet/freescale/fec_main.c | 11 ++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 0602d5d5d2ee..2e002e4b4b4a 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -467,6 +467,11 @@ struct bufdesc_ex {
  */
 #define FEC_QUIRK_NO_HARD_RESET		(1 << 18)
 
+/* i.MX6SX ENET IP supports multiple queues (3 queues), use this quirk to
+ * represents this ENET IP.
+ */
+#define FEC_QUIRK_HAS_MULTI_QUEUES	(1 << 19)
+
 struct bufdesc_prop {
 	int qid;
 	/* Address of Rx and Tx buffers */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index ad82cffc6f3f..98cd38379275 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -122,7 +122,7 @@ static const struct fec_devinfo fec_imx6x_info = {
 		  FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
 		  FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
 		  FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
-		  FEC_QUIRK_CLEAR_SETUP_MII,
+		  FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES,
 };
 
 static const struct fec_devinfo fec_imx6ul_info = {
@@ -421,6 +421,7 @@ fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
 				estatus |= FEC_TX_BD_FTYPE(txq->bd.qid);
 			if (skb->ip_summed == CHECKSUM_PARTIAL)
 				estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
+
 			ebdp->cbd_bdu = 0;
 			ebdp->cbd_esc = cpu_to_fec32(estatus);
 		}
@@ -954,7 +955,7 @@ fec_restart(struct net_device *ndev)
 	 * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
 	 * instead of reset MAC itself.
 	 */
-	if (fep->quirks & FEC_QUIRK_HAS_AVB ||
+	if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES ||
 	    ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) {
 		writel(0, fep->hwp + FEC_ECNTRL);
 	} else {
@@ -1165,7 +1166,7 @@ fec_stop(struct net_device *ndev)
 	 * instead of reset MAC itself.
 	 */
 	if (!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
-		if (fep->quirks & FEC_QUIRK_HAS_AVB) {
+		if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) {
 			writel(0, fep->hwp + FEC_ECNTRL);
 		} else {
 			writel(1, fep->hwp + FEC_ECNTRL);
@@ -2570,7 +2571,7 @@ static void fec_enet_itr_coal_set(struct net_device *ndev)
 
 	writel(tx_itr, fep->hwp + FEC_TXIC0);
 	writel(rx_itr, fep->hwp + FEC_RXIC0);
-	if (fep->quirks & FEC_QUIRK_HAS_AVB) {
+	if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) {
 		writel(tx_itr, fep->hwp + FEC_TXIC1);
 		writel(rx_itr, fep->hwp + FEC_RXIC1);
 		writel(tx_itr, fep->hwp + FEC_TXIC2);
@@ -3371,7 +3372,7 @@ static int fec_enet_init(struct net_device *ndev)
 		fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
 	}
 
-	if (fep->quirks & FEC_QUIRK_HAS_AVB) {
+	if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) {
 		fep->tx_align = 0;
 		fep->rx_align = 0x3f;
 	}

From 52c4a1a85f4b346c39c896c0168f4a843b3385ff Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Mon, 21 Jun 2021 14:27:37 +0800
Subject: [PATCH 2445/3292] net: fec: add ndo_select_queue to fix TX bandwidth
 fluctuations

As we know that AVB is enabled by default, and the ENET IP design is
queue 0 for best effort, queue 1&2 for AVB Class A&B. Bandwidth of each
queue 1&2 set in driver is 50%, TX bandwidth fluctuated when selecting
tx queues randomly with FEC_QUIRK_HAS_AVB quirk available.

This patch adds ndo_select_queue callback to select queues for
transmitting to fix this issue. It will always return queue 0 if this is
not a vlan packet, and return queue 1 or 2 based on priority of vlan
packet.

You may complain that in fact we only use single queue for trasmitting
if we are not targeted to VLAN. Yes, but seems we have no choice, since
AVB is enabled when the driver probed, we can't switch this feature
dynamicly. After compare multiple queues to single queue, TX throughput
almost no improvement.

One way we can implemet is to configure the driver to multiple queues
with Round-robin scheme by default. Then add ndo_setup_tc callback to
enable/disable AVB feature for users. Unfortunately, ENET AVB IP seems
not follow the standard 802.1Qav spec. We only can program
DMAnCFG[IDLE_SLOPE] field to calculate bandwidth fraction. And idle
slope is restricted to certain valus (a total of 19). It's far away from
CBS QDisc implemented in Linux TC framework. If you strongly suggest to do
this, I think we only can support limited numbers of bandwidth and reject
others, but it's really urgly and wried.

With this patch, VLAN tagged packets route to queue 0/1/2 based on vlan
priority; VLAN untagged packets route to queue 0.

Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Reported-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 32 +++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 98cd38379275..8aea707a65a7 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -76,6 +76,8 @@ static void fec_enet_itr_coal_init(struct net_device *ndev);
 
 #define DRIVER_NAME	"fec"
 
+static const u16 fec_enet_vlan_pri_to_queue[8] = {0, 0, 1, 1, 1, 2, 2, 2};
+
 /* Pause frame feild and FIFO threshold */
 #define FEC_ENET_FCE	(1 << 5)
 #define FEC_ENET_RSEM_V	0x84
@@ -3240,10 +3242,40 @@ static int fec_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static u16 fec_enet_get_raw_vlan_tci(struct sk_buff *skb)
+{
+	struct vlan_ethhdr *vhdr;
+	unsigned short vlan_TCI = 0;
+
+	if (skb->protocol == htons(ETH_P_ALL)) {
+		vhdr = (struct vlan_ethhdr *)(skb->data);
+		vlan_TCI = ntohs(vhdr->h_vlan_TCI);
+	}
+
+	return vlan_TCI;
+}
+
+static u16 fec_enet_select_queue(struct net_device *ndev, struct sk_buff *skb,
+				 struct net_device *sb_dev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	u16 vlan_tag;
+
+	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
+		return netdev_pick_tx(ndev, skb, NULL);
+
+	vlan_tag = fec_enet_get_raw_vlan_tci(skb);
+	if (!vlan_tag)
+		return vlan_tag;
+
+	return fec_enet_vlan_pri_to_queue[vlan_tag >> 13];
+}
+
 static const struct net_device_ops fec_netdev_ops = {
 	.ndo_open		= fec_enet_open,
 	.ndo_stop		= fec_enet_close,
 	.ndo_start_xmit		= fec_enet_start_xmit,
+	.ndo_select_queue       = fec_enet_select_queue,
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_tx_timeout		= fec_timeout,

From 6a1e5a4af17e440dd82a58a2c5f40ff17a82b722 Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Sun, 20 Jun 2021 15:24:14 +0000
Subject: [PATCH 2446/3292] atm: nicstar: use 'dma_free_coherent' instead of
 'kfree'

When 'nicstar_init_one' fails, 'ns_init_card_error' will be executed for
error handling, but the correct memory free function should be used,
otherwise it will cause an error. Since 'card->rsq.org' and
'card->tsq.org' are allocated using 'dma_alloc_coherent' function, they
should be freed using 'dma_free_coherent'.

Fix this by using 'dma_free_coherent' instead of 'kfree'

This log reveals it:

[    3.440294] kernel BUG at mm/slub.c:4206!
[    3.441059] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[    3.441430] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.12.4-g70e7f0549188-dirty #141
[    3.441986] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[    3.442780] RIP: 0010:kfree+0x26a/0x300
[    3.443065] Code: e8 3a c3 b9 ff e9 d6 fd ff ff 49 8b 45 00 31 db a9 00 00 01 00 75 4d 49 8b 45 00 a9 00 00 01 00 75 0a 49 8b 45 08 a8 01 75 02 <0f> 0b 89 d9 b8 00 10 00 00 be 06 00 00 00 48 d3 e0 f7 d8 48 63 d0
[    3.443396] RSP: 0000:ffffc90000017b70 EFLAGS: 00010246
[    3.443396] RAX: dead000000000100 RBX: 0000000000000000 RCX: 0000000000000000
[    3.443396] RDX: 0000000000000000 RSI: ffffffff85d3df94 RDI: ffffffff85df38e6
[    3.443396] RBP: ffffc90000017b90 R08: 0000000000000001 R09: 0000000000000001
[    3.443396] R10: 0000000000000000 R11: 0000000000000001 R12: ffff888107dc0000
[    3.443396] R13: ffffea00001f0100 R14: ffff888101a8bf00 R15: ffff888107dc0160
[    3.443396] FS:  0000000000000000(0000) GS:ffff88817bc80000(0000) knlGS:0000000000000000
[    3.443396] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.443396] CR2: 0000000000000000 CR3: 000000000642e000 CR4: 00000000000006e0
[    3.443396] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    3.443396] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    3.443396] Call Trace:
[    3.443396]  ns_init_card_error+0x12c/0x220
[    3.443396]  nicstar_init_one+0x10d2/0x1130
[    3.443396]  local_pci_probe+0x4a/0xb0
[    3.443396]  pci_device_probe+0x126/0x1d0
[    3.443396]  ? pci_device_remove+0x100/0x100
[    3.443396]  really_probe+0x27e/0x650
[    3.443396]  driver_probe_device+0x84/0x1d0
[    3.443396]  ? mutex_lock_nested+0x16/0x20
[    3.443396]  device_driver_attach+0x63/0x70
[    3.443396]  __driver_attach+0x117/0x1a0
[    3.443396]  ? device_driver_attach+0x70/0x70
[    3.443396]  bus_for_each_dev+0xb6/0x110
[    3.443396]  ? rdinit_setup+0x40/0x40
[    3.443396]  driver_attach+0x22/0x30
[    3.443396]  bus_add_driver+0x1e6/0x2a0
[    3.443396]  driver_register+0xa4/0x180
[    3.443396]  __pci_register_driver+0x77/0x80
[    3.443396]  ? uPD98402_module_init+0xd/0xd
[    3.443396]  nicstar_init+0x1f/0x75
[    3.443396]  do_one_initcall+0x7a/0x3d0
[    3.443396]  ? rdinit_setup+0x40/0x40
[    3.443396]  ? rcu_read_lock_sched_held+0x4a/0x70
[    3.443396]  kernel_init_freeable+0x2a7/0x2f9
[    3.443396]  ? rest_init+0x2c0/0x2c0
[    3.443396]  kernel_init+0x13/0x180
[    3.443396]  ? rest_init+0x2c0/0x2c0
[    3.443396]  ? rest_init+0x2c0/0x2c0
[    3.443396]  ret_from_fork+0x1f/0x30
[    3.443396] Modules linked in:
[    3.443396] Dumping ftrace buffer:
[    3.443396]    (ftrace buffer empty)
[    3.458593] ---[ end trace 3c6f8f0d8ef59bcd ]---
[    3.458922] RIP: 0010:kfree+0x26a/0x300
[    3.459198] Code: e8 3a c3 b9 ff e9 d6 fd ff ff 49 8b 45 00 31 db a9 00 00 01 00 75 4d 49 8b 45 00 a9 00 00 01 00 75 0a 49 8b 45 08 a8 01 75 02 <0f> 0b 89 d9 b8 00 10 00 00 be 06 00 00 00 48 d3 e0 f7 d8 48 63 d0
[    3.460499] RSP: 0000:ffffc90000017b70 EFLAGS: 00010246
[    3.460870] RAX: dead000000000100 RBX: 0000000000000000 RCX: 0000000000000000
[    3.461371] RDX: 0000000000000000 RSI: ffffffff85d3df94 RDI: ffffffff85df38e6
[    3.461873] RBP: ffffc90000017b90 R08: 0000000000000001 R09: 0000000000000001
[    3.462372] R10: 0000000000000000 R11: 0000000000000001 R12: ffff888107dc0000
[    3.462871] R13: ffffea00001f0100 R14: ffff888101a8bf00 R15: ffff888107dc0160
[    3.463368] FS:  0000000000000000(0000) GS:ffff88817bc80000(0000) knlGS:0000000000000000
[    3.463949] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.464356] CR2: 0000000000000000 CR3: 000000000642e000 CR4: 00000000000006e0
[    3.464856] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    3.465356] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    3.465860] Kernel panic - not syncing: Fatal exception
[    3.466370] Dumping ftrace buffer:
[    3.466616]    (ftrace buffer empty)
[    3.466871] Kernel Offset: disabled
[    3.467122] Rebooting in 1 seconds..

Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/nicstar.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 5c7e4df159b9..e031f6d74e7e 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -839,10 +839,12 @@ static void ns_init_card_error(ns_dev *card, int error)
 			dev_kfree_skb_any(hb);
 	}
 	if (error >= 12) {
-		kfree(card->rsq.org);
+		dma_free_coherent(&card->pcidev->dev, NS_RSQSIZE + NS_RSQ_ALIGNMENT,
+				card->rsq.org, card->rsq.dma);
 	}
 	if (error >= 11) {
-		kfree(card->tsq.org);
+		dma_free_coherent(&card->pcidev->dev, NS_TSQSIZE + NS_TSQ_ALIGNMENT,
+				card->tsq.org, card->tsq.dma);
 	}
 	if (error >= 10) {
 		free_irq(card->pcidev->irq, card);

From 70b639dc41ad499384e41e106fce72e36805c9f2 Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Sun, 20 Jun 2021 15:24:15 +0000
Subject: [PATCH 2447/3292] atm: nicstar: register the interrupt handler in the
 right place

Because the error handling is sequential, the application of resources
should be carried out in the order of error handling, so the operation
of registering the interrupt handler should be put in front, so as not
to free the unregistered interrupt handler during error handling.

This log reveals it:

[    3.438724] Trying to free already-free IRQ 23
[    3.439060] WARNING: CPU: 5 PID: 1 at kernel/irq/manage.c:1825 free_irq+0xfb/0x480
[    3.440039] Modules linked in:
[    3.440257] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 5.12.4-g70e7f0549188-dirty #142
[    3.440793] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[    3.441561] RIP: 0010:free_irq+0xfb/0x480
[    3.441845] Code: 6e 08 74 6f 4d 89 f4 e8 c3 78 09 00 4d 8b 74 24 18 4d 85 f6 75 e3 e8 b4 78 09 00 8b 75 c8 48 c7 c7 a0 ac d5 85 e8 95 d7 f5 ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 87 c5 90 03 48 8b 43 40 4c 8b a0 80
[    3.443121] RSP: 0000:ffffc90000017b50 EFLAGS: 00010086
[    3.443483] RAX: 0000000000000000 RBX: ffff888107c6f000 RCX: 0000000000000000
[    3.443972] RDX: 0000000000000000 RSI: ffffffff8123f301 RDI: 00000000ffffffff
[    3.444462] RBP: ffffc90000017b90 R08: 0000000000000001 R09: 0000000000000003
[    3.444950] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
[    3.444994] R13: ffff888107dc0000 R14: ffff888104f6bf00 R15: ffff888107c6f0a8
[    3.444994] FS:  0000000000000000(0000) GS:ffff88817bd40000(0000) knlGS:0000000000000000
[    3.444994] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.444994] CR2: 0000000000000000 CR3: 000000000642e000 CR4: 00000000000006e0
[    3.444994] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    3.444994] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    3.444994] Call Trace:
[    3.444994]  ns_init_card_error+0x18e/0x250
[    3.444994]  nicstar_init_one+0x10d2/0x1130
[    3.444994]  local_pci_probe+0x4a/0xb0
[    3.444994]  pci_device_probe+0x126/0x1d0
[    3.444994]  ? pci_device_remove+0x100/0x100
[    3.444994]  really_probe+0x27e/0x650
[    3.444994]  driver_probe_device+0x84/0x1d0
[    3.444994]  ? mutex_lock_nested+0x16/0x20
[    3.444994]  device_driver_attach+0x63/0x70
[    3.444994]  __driver_attach+0x117/0x1a0
[    3.444994]  ? device_driver_attach+0x70/0x70
[    3.444994]  bus_for_each_dev+0xb6/0x110
[    3.444994]  ? rdinit_setup+0x40/0x40
[    3.444994]  driver_attach+0x22/0x30
[    3.444994]  bus_add_driver+0x1e6/0x2a0
[    3.444994]  driver_register+0xa4/0x180
[    3.444994]  __pci_register_driver+0x77/0x80
[    3.444994]  ? uPD98402_module_init+0xd/0xd
[    3.444994]  nicstar_init+0x1f/0x75
[    3.444994]  do_one_initcall+0x7a/0x3d0
[    3.444994]  ? rdinit_setup+0x40/0x40
[    3.444994]  ? rcu_read_lock_sched_held+0x4a/0x70
[    3.444994]  kernel_init_freeable+0x2a7/0x2f9
[    3.444994]  ? rest_init+0x2c0/0x2c0
[    3.444994]  kernel_init+0x13/0x180
[    3.444994]  ? rest_init+0x2c0/0x2c0
[    3.444994]  ? rest_init+0x2c0/0x2c0
[    3.444994]  ret_from_fork+0x1f/0x30
[    3.444994] Kernel panic - not syncing: panic_on_warn set ...
[    3.444994] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 5.12.4-g70e7f0549188-dirty #142
[    3.444994] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[    3.444994] Call Trace:
[    3.444994]  dump_stack+0xba/0xf5
[    3.444994]  ? free_irq+0xfb/0x480
[    3.444994]  panic+0x155/0x3ed
[    3.444994]  ? __warn+0xed/0x150
[    3.444994]  ? free_irq+0xfb/0x480
[    3.444994]  __warn+0x103/0x150
[    3.444994]  ? free_irq+0xfb/0x480
[    3.444994]  report_bug+0x119/0x1c0
[    3.444994]  handle_bug+0x3b/0x80
[    3.444994]  exc_invalid_op+0x18/0x70
[    3.444994]  asm_exc_invalid_op+0x12/0x20
[    3.444994] RIP: 0010:free_irq+0xfb/0x480
[    3.444994] Code: 6e 08 74 6f 4d 89 f4 e8 c3 78 09 00 4d 8b 74 24 18 4d 85 f6 75 e3 e8 b4 78 09 00 8b 75 c8 48 c7 c7 a0 ac d5 85 e8 95 d7 f5 ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 87 c5 90 03 48 8b 43 40 4c 8b a0 80
[    3.444994] RSP: 0000:ffffc90000017b50 EFLAGS: 00010086
[    3.444994] RAX: 0000000000000000 RBX: ffff888107c6f000 RCX: 0000000000000000
[    3.444994] RDX: 0000000000000000 RSI: ffffffff8123f301 RDI: 00000000ffffffff
[    3.444994] RBP: ffffc90000017b90 R08: 0000000000000001 R09: 0000000000000003
[    3.444994] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
[    3.444994] R13: ffff888107dc0000 R14: ffff888104f6bf00 R15: ffff888107c6f0a8
[    3.444994]  ? vprintk_func+0x71/0x110
[    3.444994]  ns_init_card_error+0x18e/0x250
[    3.444994]  nicstar_init_one+0x10d2/0x1130
[    3.444994]  local_pci_probe+0x4a/0xb0
[    3.444994]  pci_device_probe+0x126/0x1d0
[    3.444994]  ? pci_device_remove+0x100/0x100
[    3.444994]  really_probe+0x27e/0x650
[    3.444994]  driver_probe_device+0x84/0x1d0
[    3.444994]  ? mutex_lock_nested+0x16/0x20
[    3.444994]  device_driver_attach+0x63/0x70
[    3.444994]  __driver_attach+0x117/0x1a0
[    3.444994]  ? device_driver_attach+0x70/0x70
[    3.444994]  bus_for_each_dev+0xb6/0x110
[    3.444994]  ? rdinit_setup+0x40/0x40
[    3.444994]  driver_attach+0x22/0x30
[    3.444994]  bus_add_driver+0x1e6/0x2a0
[    3.444994]  driver_register+0xa4/0x180
[    3.444994]  __pci_register_driver+0x77/0x80
[    3.444994]  ? uPD98402_module_init+0xd/0xd
[    3.444994]  nicstar_init+0x1f/0x75
[    3.444994]  do_one_initcall+0x7a/0x3d0
[    3.444994]  ? rdinit_setup+0x40/0x40
[    3.444994]  ? rcu_read_lock_sched_held+0x4a/0x70
[    3.444994]  kernel_init_freeable+0x2a7/0x2f9
[    3.444994]  ? rest_init+0x2c0/0x2c0
[    3.444994]  kernel_init+0x13/0x180
[    3.444994]  ? rest_init+0x2c0/0x2c0
[    3.444994]  ? rest_init+0x2c0/0x2c0
[    3.444994]  ret_from_fork+0x1f/0x30
[    3.444994] Dumping ftrace buffer:
[    3.444994]    (ftrace buffer empty)
[    3.444994] Kernel Offset: disabled
[    3.444994] Rebooting in 1 seconds..

Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/nicstar.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index e031f6d74e7e..e5117144347f 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -527,6 +527,15 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 	/* Set the VPI/VCI MSb mask to zero so we can receive OAM cells */
 	writel(0x00000000, card->membase + VPM);
 
+	card->intcnt = 0;
+	if (request_irq
+	    (pcidev->irq, &ns_irq_handler, IRQF_SHARED, "nicstar", card) != 0) {
+		pr_err("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq);
+		error = 9;
+		ns_init_card_error(card, error);
+		return error;
+	}
+
 	/* Initialize TSQ */
 	card->tsq.org = dma_alloc_coherent(&card->pcidev->dev,
 					   NS_TSQSIZE + NS_TSQ_ALIGNMENT,
@@ -753,15 +762,6 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
 
 	card->efbie = 1;
 
-	card->intcnt = 0;
-	if (request_irq
-	    (pcidev->irq, &ns_irq_handler, IRQF_SHARED, "nicstar", card) != 0) {
-		printk("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq);
-		error = 9;
-		ns_init_card_error(card, error);
-		return error;
-	}
-
 	/* Register device */
 	card->atmdev = atm_dev_register("nicstar", &card->pcidev->dev, &atm_ops,
 					-1, NULL);

From b40d7af798a0a459d65bd95f34e3dff004eb554a Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 20 Jun 2021 11:49:40 +0200
Subject: [PATCH 2448/3292] net: hns3: Fix a memory leak in an error handling
 path in 'hclge_handle_error_info_log()'

If this 'kzalloc()' fails we must free some resources as in all the other
error handling paths of this function.

Fixes: 2e2deee7618b ("net: hns3: add the RAS compatibility adaptation solution")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index bad9fda19398..ec9a7f8bc3fe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -2330,8 +2330,10 @@ int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
 	buf_size = buf_len / sizeof(u32);
 
 	desc_data = kzalloc(buf_len, GFP_KERNEL);
-	if (!desc_data)
-		return -ENOMEM;
+	if (!desc_data) {
+		ret = -ENOMEM;
+		goto err_desc;
+	}
 
 	buf = kzalloc(buf_len, GFP_KERNEL);
 	if (!buf) {

From 19e068b18e729aecca4fbe5b261b05b59230c80f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
 <zhouyanjie@wanyeetech.com>
Date: Sun, 20 Jun 2021 20:38:49 +0800
Subject: [PATCH 2449/3292] dt-bindings: dwmac: Remove unexpected item.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the unexpected "snps,dwmac" item in the example.

Fixes: 3b8401066e5a ("dt-bindings: dwmac: Add bindings for new Ingenic SoCs.")

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ingenic,mac.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/ingenic,mac.yaml b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
index 5e93d4f9a080..d08a88125a5c 100644
--- a/Documentation/devicetree/bindings/net/ingenic,mac.yaml
+++ b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
@@ -61,7 +61,7 @@ examples:
     #include <dt-bindings/clock/x1000-cgu.h>
 
     mac: ethernet@134b0000 {
-        compatible = "ingenic,x1000-mac", "snps,dwmac";
+        compatible = "ingenic,x1000-mac";
         reg = <0x134b0000 0x2000>;
 
         interrupt-parent = <&intc>;

From b90788459cd6d140171b046f0b37fad341ade0a3 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 20 Jun 2021 15:43:28 +0200
Subject: [PATCH 2450/3292] net: mana: Fix a memory leak in an error handling
 path in 'mana_create_txq()'

If this test fails we must free some resources as in all the other error
handling paths of this function.

Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 04d067243457..1ed25e48f616 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1230,8 +1230,10 @@ static int mana_create_txq(struct mana_port_context *apc,
 
 		cq->gdma_id = cq->gdma_cq->id;
 
-		if (WARN_ON(cq->gdma_id >= gc->max_num_cqs))
-			return -EINVAL;
+		if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) {
+			err = -EINVAL;
+			goto out;
+		}
 
 		gc->cq_table[cq->gdma_id] = cq->gdma_cq;
 

From fe0bdbde0756e29784ec9770d3a418c9d1640eee Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Mon, 21 Jun 2021 13:12:25 +0800
Subject: [PATCH 2451/3292] net: add pf_family_names[] for protocol family

Modify the pr_info content from int to char * in sock_register() and
sock_unregister(), this looks more readable.

Fixed build error in ARCH=sparc64.

Signed-off-by: Yejune Deng <yejune.deng@gmail.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 4f2c6d2795d0..bd9233da2497 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -165,6 +165,54 @@ static const struct file_operations socket_file_ops = {
 	.show_fdinfo =	sock_show_fdinfo,
 };
 
+static const char * const pf_family_names[] = {
+	[PF_UNSPEC]	= "PF_UNSPEC",
+	[PF_UNIX]	= "PF_UNIX/PF_LOCAL",
+	[PF_INET]	= "PF_INET",
+	[PF_AX25]	= "PF_AX25",
+	[PF_IPX]	= "PF_IPX",
+	[PF_APPLETALK]	= "PF_APPLETALK",
+	[PF_NETROM]	= "PF_NETROM",
+	[PF_BRIDGE]	= "PF_BRIDGE",
+	[PF_ATMPVC]	= "PF_ATMPVC",
+	[PF_X25]	= "PF_X25",
+	[PF_INET6]	= "PF_INET6",
+	[PF_ROSE]	= "PF_ROSE",
+	[PF_DECnet]	= "PF_DECnet",
+	[PF_NETBEUI]	= "PF_NETBEUI",
+	[PF_SECURITY]	= "PF_SECURITY",
+	[PF_KEY]	= "PF_KEY",
+	[PF_NETLINK]	= "PF_NETLINK/PF_ROUTE",
+	[PF_PACKET]	= "PF_PACKET",
+	[PF_ASH]	= "PF_ASH",
+	[PF_ECONET]	= "PF_ECONET",
+	[PF_ATMSVC]	= "PF_ATMSVC",
+	[PF_RDS]	= "PF_RDS",
+	[PF_SNA]	= "PF_SNA",
+	[PF_IRDA]	= "PF_IRDA",
+	[PF_PPPOX]	= "PF_PPPOX",
+	[PF_WANPIPE]	= "PF_WANPIPE",
+	[PF_LLC]	= "PF_LLC",
+	[PF_IB]		= "PF_IB",
+	[PF_MPLS]	= "PF_MPLS",
+	[PF_CAN]	= "PF_CAN",
+	[PF_TIPC]	= "PF_TIPC",
+	[PF_BLUETOOTH]	= "PF_BLUETOOTH",
+	[PF_IUCV]	= "PF_IUCV",
+	[PF_RXRPC]	= "PF_RXRPC",
+	[PF_ISDN]	= "PF_ISDN",
+	[PF_PHONET]	= "PF_PHONET",
+	[PF_IEEE802154]	= "PF_IEEE802154",
+	[PF_CAIF]	= "PF_CAIF",
+	[PF_ALG]	= "PF_ALG",
+	[PF_NFC]	= "PF_NFC",
+	[PF_VSOCK]	= "PF_VSOCK",
+	[PF_KCM]	= "PF_KCM",
+	[PF_QIPCRTR]	= "PF_QIPCRTR",
+	[PF_SMC]	= "PF_SMC",
+	[PF_XDP]	= "PF_XDP",
+};
+
 /*
  *	The protocol list. Each protocol is registered in here.
  */
@@ -2975,7 +3023,7 @@ int sock_register(const struct net_proto_family *ops)
 	}
 	spin_unlock(&net_family_lock);
 
-	pr_info("NET: Registered protocol family %d\n", ops->family);
+	pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
 	return err;
 }
 EXPORT_SYMBOL(sock_register);
@@ -3003,7 +3051,7 @@ void sock_unregister(int family)
 
 	synchronize_rcu();
 
-	pr_info("NET: Unregistered protocol family %d\n", family);
+	pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
 }
 EXPORT_SYMBOL(sock_unregister);
 

From c7ff9cff70601ea19245d997bb977344663434c7 Mon Sep 17 00:00:00 2001
From: "Longpeng(Mike)" <longpeng2@huawei.com>
Date: Mon, 21 Jun 2021 14:26:01 +0800
Subject: [PATCH 2452/3292] vsock: notify server to shutdown when client has
 pending signal

The client's sk_state will be set to TCP_ESTABLISHED if the server
replay the client's connect request.

However, if the client has pending signal, its sk_state will be set
to TCP_CLOSE without notify the server, so the server will hold the
corrupt connection.

            client                        server

1. sk_state=TCP_SYN_SENT         |
2. call ->connect()              |
3. wait reply                    |
                                 | 4. sk_state=TCP_ESTABLISHED
                                 | 5. insert to connected list
                                 | 6. reply to the client
7. sk_state=TCP_ESTABLISHED      |
8. insert to connected list      |
9. *signal pending* <--------------------- the user kill client
10. sk_state=TCP_CLOSE           |
client is exiting...             |
11. call ->release()             |
     virtio_transport_close
      if (!(sk->sk_state == TCP_ESTABLISHED ||
	      sk->sk_state == TCP_CLOSING))
		return true; *return at here, the server cannot notice the connection is corrupt*

So the client should notify the peer in this case.

Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jorgen Hansen <jhansen@vmware.com>
Cc: Norbert Slusarek <nslusarek@gmx.net>
Cc: Andra Paraschiv <andraprs@amazon.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: David Brazdil <dbrazdil@google.com>
Cc: Alexander Popov <alex.popov@linux.com>
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lkml.org/lkml/2021/5/17/418
Signed-off-by: lixianming <lixianming5@huawei.com>
Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 92a72f0e0d94..ae11311807fd 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1369,7 +1369,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 
 		if (signal_pending(current)) {
 			err = sock_intr_errno(timeout);
-			sk->sk_state = TCP_CLOSE;
+			sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
 			sock->state = SS_UNCONNECTED;
 			vsock_transport_cancel_pkt(vsk);
 			goto out_wait;

From ce03b94ba682a67e8233c9ee3066071656ded58f Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Mon, 21 Jun 2021 10:20:08 +0200
Subject: [PATCH 2453/3292] net: ll_temac: Remove left-over debug message

Fixes: f63963411942 ("net: ll_temac: Avoid ndo_start_xmit returning NETDEV_TX_BUSY")
Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/ll_temac_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 9a13953ea70f..60a4f79b8fa1 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -942,10 +942,8 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	wmb();
 	lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */
 
-	if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
-		netdev_info(ndev, "%s -> netif_stop_queue\n", __func__);
+	if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
 		netif_stop_queue(ndev);
-	}
 
 	return NETDEV_TX_OK;
 }

From b8b79c414eca4e9bcab645e02cb92c48db974ce9 Mon Sep 17 00:00:00 2001
From: Eldar Gasanov <eldargasanov2@gmail.com>
Date: Mon, 21 Jun 2021 11:54:38 +0300
Subject: [PATCH 2454/3292] net: dsa: mv88e6xxx: Fix adding vlan 0

8021q module adds vlan 0 to all interfaces when it starts.
When 8021q module is loaded it isn't possible to create bond
with mv88e6xxx interfaces, bonding module dipslay error
"Couldn't add bond vlan ids", because it tries to add vlan 0
to slave interfaces.

There is unexpected behavior in the switch. When a PVID
is assigned to a port the switch changes VID to PVID
in ingress frames with VID 0 on the port. Expected
that the switch doesn't assign PVID to tagged frames
with VID 0. But there isn't a way to change this behavior
in the switch.

Fixes: 57e661aae6a8 ("net: dsa: mv88e6xxx: Link aggregation support")
Signed-off-by: Eldar Gasanov <eldargasanov2@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index eca285aaf72f..961fa6b75cad 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1618,9 +1618,6 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
 	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 
-	if (!vid)
-		return -EOPNOTSUPP;
-
 	/* DSA and CPU ports have to be members of multiple vlans */
 	if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
 		return 0;
@@ -2109,6 +2106,9 @@ static int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
 	u8 member;
 	int err;
 
+	if (!vlan->vid)
+		return 0;
+
 	err = mv88e6xxx_port_vlan_prepare(ds, port, vlan);
 	if (err)
 		return err;

From 6d5516177d3b723fe9701b89e69db18cf0ca0421 Mon Sep 17 00:00:00 2001
From: Boris Sukholitko <boris.sukholitko@broadcom.com>
Date: Mon, 21 Jun 2021 12:24:29 +0300
Subject: [PATCH 2455/3292] Revert "net/sched: cls_flower: Remove match on
 n_proto"

This reverts commit 0dca2c7404a938cb10c85d0515cee40ed5348788.

The commit in question breaks hardware offload of flower filters.

Quoting Vladimir Oltean <olteanv@gmail.com>:

 fl_hw_replace_filter() and fl_reoffload() create a struct
 flow_cls_offload with a rule->match.mask member derived from the mask
 of the software classifier: &f->mask->key - that same mask that is used
 for initializing the flow dissector keys, and the one from which Boris
 removed the basic.n_proto member because it was bothering him.

Reported-by: Vadym Kochan <vadym.kochan@plvision.eu>
Signed-off-by: Boris Sukholitko <boris.sukholitko@broadcom.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2e704c7a105a..d7869a984881 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1531,13 +1531,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 						       &mask->basic.n_proto,
 						       TCA_FLOWER_UNSPEC,
 						       sizeof(key->basic.n_proto));
-					mask->basic.n_proto = cpu_to_be16(0);
 				} else {
 					key->basic.n_proto = ethertype;
+					mask->basic.n_proto = cpu_to_be16(~0);
 				}
 			}
 		} else {
 			key->basic.n_proto = ethertype;
+			mask->basic.n_proto = cpu_to_be16(~0);
 		}
 	}
 

From 0cd58e5c53babb9237b741dbef711f0a9eb6d3fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Jun 2021 10:54:49 -0700
Subject: [PATCH 2456/3292] pkt_sched: sch_qfq: fix qfq_change_class() error
 path

If qfq_change_class() is unable to allocate memory for qfq_aggregate,
it frees the class that has been inserted in the class hash table,
but does not unhash it.

Defer the insertion after the problematic allocation.

BUG: KASAN: use-after-free in hlist_add_head include/linux/list.h:884 [inline]
BUG: KASAN: use-after-free in qdisc_class_hash_insert+0x200/0x210 net/sched/sch_api.c:731
Write of size 8 at addr ffff88814a534f10 by task syz-executor.4/31478

CPU: 0 PID: 31478 Comm: syz-executor.4 Not tainted 5.13.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0x141/0x1d7 lib/dump_stack.c:120
 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:233
 __kasan_report mm/kasan/report.c:419 [inline]
 kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:436
 hlist_add_head include/linux/list.h:884 [inline]
 qdisc_class_hash_insert+0x200/0x210 net/sched/sch_api.c:731
 qfq_change_class+0x96c/0x1990 net/sched/sch_qfq.c:489
 tc_ctl_tclass+0x514/0xe50 net/sched/sch_api.c:2113
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5564
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504
 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340
 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1929
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:674
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433
 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x4665d9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fdc7b5f0188 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 000000000056bf80 RCX: 00000000004665d9
RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003
RBP: 00007fdc7b5f01d0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002
R13: 00007ffcf7310b3f R14: 00007fdc7b5f0300 R15: 0000000000022000

Allocated by task 31445:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
 kasan_set_track mm/kasan/common.c:46 [inline]
 set_alloc_info mm/kasan/common.c:428 [inline]
 ____kasan_kmalloc mm/kasan/common.c:507 [inline]
 ____kasan_kmalloc mm/kasan/common.c:466 [inline]
 __kasan_kmalloc+0x9b/0xd0 mm/kasan/common.c:516
 kmalloc include/linux/slab.h:556 [inline]
 kzalloc include/linux/slab.h:686 [inline]
 qfq_change_class+0x705/0x1990 net/sched/sch_qfq.c:464
 tc_ctl_tclass+0x514/0xe50 net/sched/sch_api.c:2113
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5564
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504
 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340
 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1929
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:674
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433
 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Freed by task 31445:
 kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
 kasan_set_track+0x1c/0x30 mm/kasan/common.c:46
 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:357
 ____kasan_slab_free mm/kasan/common.c:360 [inline]
 ____kasan_slab_free mm/kasan/common.c:325 [inline]
 __kasan_slab_free+0xfb/0x130 mm/kasan/common.c:368
 kasan_slab_free include/linux/kasan.h:212 [inline]
 slab_free_hook mm/slub.c:1583 [inline]
 slab_free_freelist_hook+0xdf/0x240 mm/slub.c:1608
 slab_free mm/slub.c:3168 [inline]
 kfree+0xe5/0x7f0 mm/slub.c:4212
 qfq_change_class+0x10fb/0x1990 net/sched/sch_qfq.c:518
 tc_ctl_tclass+0x514/0xe50 net/sched/sch_api.c:2113
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5564
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504
 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340
 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1929
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:674
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433
 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

The buggy address belongs to the object at ffff88814a534f00
 which belongs to the cache kmalloc-128 of size 128
The buggy address is located 16 bytes inside of
 128-byte region [ffff88814a534f00, ffff88814a534f80)
The buggy address belongs to the page:
page:ffffea0005294d00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x14a534
flags: 0x57ff00000000200(slab|node=1|zone=2|lastcpupid=0x7ff)
raw: 057ff00000000200 ffffea00004fee00 0000000600000006 ffff8880110418c0
raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as allocated
page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 29797, ts 604817765317, free_ts 604810151744
 prep_new_page mm/page_alloc.c:2358 [inline]
 get_page_from_freelist+0x1033/0x2b60 mm/page_alloc.c:3994
 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5200
 alloc_pages+0x18c/0x2a0 mm/mempolicy.c:2272
 alloc_slab_page mm/slub.c:1646 [inline]
 allocate_slab+0x2c5/0x4c0 mm/slub.c:1786
 new_slab mm/slub.c:1849 [inline]
 new_slab_objects mm/slub.c:2595 [inline]
 ___slab_alloc+0x4a1/0x810 mm/slub.c:2758
 __slab_alloc.constprop.0+0xa7/0xf0 mm/slub.c:2798
 slab_alloc_node mm/slub.c:2880 [inline]
 slab_alloc mm/slub.c:2922 [inline]
 __kmalloc+0x315/0x330 mm/slub.c:4050
 kmalloc include/linux/slab.h:561 [inline]
 kzalloc include/linux/slab.h:686 [inline]
 __register_sysctl_table+0x112/0x1090 fs/proc/proc_sysctl.c:1318
 mpls_dev_sysctl_register+0x1b7/0x2d0 net/mpls/af_mpls.c:1421
 mpls_add_dev net/mpls/af_mpls.c:1472 [inline]
 mpls_dev_notify+0x214/0x8b0 net/mpls/af_mpls.c:1588
 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83
 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:2121
 call_netdevice_notifiers_extack net/core/dev.c:2133 [inline]
 call_netdevice_notifiers net/core/dev.c:2147 [inline]
 register_netdevice+0x106b/0x1500 net/core/dev.c:10312
 veth_newlink+0x585/0xac0 drivers/net/veth.c:1547
 __rtnl_newlink+0x1062/0x1710 net/core/rtnetlink.c:3452
 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3500
page last free stack trace:
 reset_page_owner include/linux/page_owner.h:24 [inline]
 free_pages_prepare mm/page_alloc.c:1298 [inline]
 free_pcp_prepare+0x223/0x300 mm/page_alloc.c:1342
 free_unref_page_prepare mm/page_alloc.c:3250 [inline]
 free_unref_page+0x12/0x1d0 mm/page_alloc.c:3298
 __vunmap+0x783/0xb60 mm/vmalloc.c:2566
 free_work+0x58/0x70 mm/vmalloc.c:80
 process_one_work+0x98d/0x1600 kernel/workqueue.c:2276
 worker_thread+0x64c/0x1120 kernel/workqueue.c:2422
 kthread+0x3b1/0x4a0 kernel/kthread.c:313
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

Memory state around the buggy address:
 ffff88814a534e00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff88814a534e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88814a534f00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                         ^
 ffff88814a534f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88814a535000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Fixes: 462dbc9101acd ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_qfq.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 1db9d4a2ef5e..b692a0de1ad5 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -485,11 +485,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (cl->qdisc != &noop_qdisc)
 		qdisc_hash_add(cl->qdisc, true);
-	sch_tree_lock(sch);
-	qdisc_class_hash_insert(&q->clhash, &cl->common);
-	sch_tree_unlock(sch);
-
-	qdisc_class_hash_grow(sch, &q->clhash);
 
 set_change_agg:
 	sch_tree_lock(sch);
@@ -507,8 +502,11 @@ set_change_agg:
 	}
 	if (existing)
 		qfq_deact_rm_from_agg(q, cl);
+	else
+		qdisc_class_hash_insert(&q->clhash, &cl->common);
 	qfq_add_to_agg(q, new_agg, cl);
 	sch_tree_unlock(sch);
+	qdisc_class_hash_grow(sch, &q->clhash);
 
 	*arg = (unsigned long)cl;
 	return 0;

From a3fa449ffcf5bcf9c3dddf62c11599cdc79ef54a Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 21 Jun 2021 22:08:49 +0200
Subject: [PATCH 2457/3292] net: handle ARPHRD_IP6GRE in
 dev_is_mac_header_xmit()

Similar to commit 3b707c3008ca ("net: dev_is_mac_header_xmit() true for
ARPHRD_RAWIP"), add ARPHRD_IP6GRE to dev_is_mac_header_xmit(), to make
ip6gre compatible with act_mirred and __bpf_redirect().

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index bf5c5f32c65e..b712217f7030 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -48,6 +48,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
 	case ARPHRD_TUNNEL6:
 	case ARPHRD_SIT:
 	case ARPHRD_IPGRE:
+	case ARPHRD_IP6GRE:
 	case ARPHRD_VOID:
 	case ARPHRD_NONE:
 	case ARPHRD_RAWIP:

From ef2c3ddaa4ed0b1d9de34378d08d3e24a3fec7ac Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 21 Jun 2021 14:35:09 -0700
Subject: [PATCH 2458/3292] ibmvnic: Use strscpy() instead of strncpy()

Since these strings are expected to be NUL-terminated and the buffers
are exactly sized (in vnic_client_data_len()) with no padding, strncpy()
can be safely replaced with strscpy() here, as strncpy() on
NUL-terminated string is considered deprecated[1]. This has the
side-effect of silencing a -Warray-bounds warning due to the compiler
being confused about the vlcd incrementing:

In file included from ./include/linux/string.h:253,
                 from ./include/linux/bitmap.h:10,
                 from ./include/linux/cpumask.h:12,
                 from ./include/linux/mm_types_task.h:14,
                 from ./include/linux/mm_types.h:5,
                 from ./include/linux/buildid.h:5,
                 from ./include/linux/module.h:14,
                 from drivers/net/ethernet/ibm/ibmvnic.c:35:
In function '__fortify_strncpy',
    inlined from 'vnic_add_client_data' at drivers/net/ethernet/ibm/ibmvnic.c:3919:2:
./include/linux/fortify-string.h:39:30: warning: '__builtin_strncpy' offset 12 from the object at 'v
lcd' is out of the bounds of referenced subobject 'name' with type 'char[]' at offset 12 [-Warray-bo
unds]
   39 | #define __underlying_strncpy __builtin_strncpy
      |                              ^
./include/linux/fortify-string.h:51:9: note: in expansion of macro '__underlying_strncpy'
   51 |  return __underlying_strncpy(p, q, size);
      |         ^~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/ibm/ibmvnic.c: In function 'vnic_add_client_data':
drivers/net/ethernet/ibm/ibmvnic.c:3883:7: note: subobject 'name' declared here
 3883 |  char name[];
      |       ^~~~

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings

Cc: Dany Madden <drt@linux.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Cc: Thomas Falcon <tlfalcon@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2d8804ebdf96..adb0d5ca9ff1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3909,21 +3909,21 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
 	vlcd->type = 1;
 	len = strlen(os_name) + 1;
 	vlcd->len = cpu_to_be16(len);
-	strncpy(vlcd->name, os_name, len);
+	strscpy(vlcd->name, os_name, len);
 	vlcd = (struct vnic_login_client_data *)(vlcd->name + len);
 
 	/* Type 2 - LPAR name */
 	vlcd->type = 2;
 	len = strlen(utsname()->nodename) + 1;
 	vlcd->len = cpu_to_be16(len);
-	strncpy(vlcd->name, utsname()->nodename, len);
+	strscpy(vlcd->name, utsname()->nodename, len);
 	vlcd = (struct vnic_login_client_data *)(vlcd->name + len);
 
 	/* Type 3 - device name */
 	vlcd->type = 3;
 	len = strlen(adapter->netdev->name) + 1;
 	vlcd->len = cpu_to_be16(len);
-	strncpy(vlcd->name, adapter->netdev->name, len);
+	strscpy(vlcd->name, adapter->netdev->name, len);
 }
 
 static int send_login(struct ibmvnic_adapter *adapter)

From 78ca0c0549803ab0f5ecab43e295111b7ccdc7f2 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:42 +0930
Subject: [PATCH 2459/3292] ipmi: kcs_bmc_aspeed: Use of match data to extract
 KCS properties

Unpack and remove the aspeed_kcs_probe_of_v[12]() functions to aid
rearranging how the private device-driver memory is allocated.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-Id: <20210608104757.582199-2-andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc_aspeed.c | 168 ++++++++++++++---------------
 1 file changed, 84 insertions(+), 84 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index eefe362f65f0..69be873f40ac 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/poll.h>
 #include <linux/regmap.h>
@@ -63,6 +64,10 @@ struct aspeed_kcs_bmc {
 	struct regmap *map;
 };
 
+struct aspeed_kcs_of_ops {
+	int (*get_channel)(struct platform_device *pdev);
+	int (*get_io_address)(struct platform_device *pdev);
+};
 
 static u8 aspeed_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
 {
@@ -231,13 +236,10 @@ static const struct kcs_ioreg ast_kcs_bmc_ioregs[KCS_CHANNEL_MAX] = {
 	{ .idr = LPC_IDR4, .odr = LPC_ODR4, .str = LPC_STR4 },
 };
 
-static struct kcs_bmc *aspeed_kcs_probe_of_v1(struct platform_device *pdev)
+static int aspeed_kcs_of_v1_get_channel(struct platform_device *pdev)
 {
-	struct aspeed_kcs_bmc *priv;
 	struct device_node *np;
-	struct kcs_bmc *kcs;
 	u32 channel;
-	u32 slave;
 	int rc;
 
 	np = pdev->dev.of_node;
@@ -245,105 +247,81 @@ static struct kcs_bmc *aspeed_kcs_probe_of_v1(struct platform_device *pdev)
 	rc = of_property_read_u32(np, "kcs_chan", &channel);
 	if ((rc != 0) || (channel == 0 || channel > KCS_CHANNEL_MAX)) {
 		dev_err(&pdev->dev, "no valid 'kcs_chan' configured\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
-	kcs = kcs_bmc_alloc(&pdev->dev, sizeof(struct aspeed_kcs_bmc), channel);
-	if (!kcs)
-		return ERR_PTR(-ENOMEM);
-
-	priv = kcs_bmc_priv(kcs);
-	priv->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
-	if (IS_ERR(priv->map)) {
-		dev_err(&pdev->dev, "Couldn't get regmap\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	rc = of_property_read_u32(np, "kcs_addr", &slave);
-	if (rc) {
-		dev_err(&pdev->dev, "no valid 'kcs_addr' configured\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	kcs->ioreg = ast_kcs_bmc_ioregs[channel - 1];
-	aspeed_kcs_set_address(kcs, slave);
-
-	return kcs;
+	return channel;
 }
 
-static int aspeed_kcs_calculate_channel(const struct kcs_ioreg *regs)
+static int aspeed_kcs_of_v1_get_io_address(struct platform_device *pdev)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(ast_kcs_bmc_ioregs); i++) {
-		if (!memcmp(&ast_kcs_bmc_ioregs[i], regs, sizeof(*regs)))
-			return i + 1;
-	}
-
-	return -EINVAL;
-}
-
-static struct kcs_bmc *aspeed_kcs_probe_of_v2(struct platform_device *pdev)
-{
-	struct aspeed_kcs_bmc *priv;
-	struct device_node *np;
-	struct kcs_ioreg ioreg;
-	struct kcs_bmc *kcs;
-	const __be32 *reg;
-	int channel;
 	u32 slave;
 	int rc;
 
+	rc = of_property_read_u32(pdev->dev.of_node, "kcs_addr", &slave);
+	if (rc || slave > 0xffff) {
+		dev_err(&pdev->dev, "no valid 'kcs_addr' configured\n");
+		return -EINVAL;
+	}
+
+	return slave;
+}
+
+static int aspeed_kcs_of_v2_get_channel(struct platform_device *pdev)
+{
+	struct device_node *np;
+	struct kcs_ioreg ioreg;
+	const __be32 *reg;
+	int i;
+
 	np = pdev->dev.of_node;
 
 	/* Don't translate addresses, we want offsets for the regmaps */
 	reg = of_get_address(np, 0, NULL, NULL);
 	if (!reg)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	ioreg.idr = be32_to_cpup(reg);
 
 	reg = of_get_address(np, 1, NULL, NULL);
 	if (!reg)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	ioreg.odr = be32_to_cpup(reg);
 
 	reg = of_get_address(np, 2, NULL, NULL);
 	if (!reg)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	ioreg.str = be32_to_cpup(reg);
 
-	channel = aspeed_kcs_calculate_channel(&ioreg);
-	if (channel < 0)
-		return ERR_PTR(channel);
-
-	kcs = kcs_bmc_alloc(&pdev->dev, sizeof(struct aspeed_kcs_bmc), channel);
-	if (!kcs)
-		return ERR_PTR(-ENOMEM);
-
-	kcs->ioreg = ioreg;
-
-	priv = kcs_bmc_priv(kcs);
-	priv->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
-	if (IS_ERR(priv->map)) {
-		dev_err(&pdev->dev, "Couldn't get regmap\n");
-		return ERR_PTR(-ENODEV);
+	for (i = 0; i < ARRAY_SIZE(ast_kcs_bmc_ioregs); i++) {
+		if (!memcmp(&ast_kcs_bmc_ioregs[i], &ioreg, sizeof(ioreg)))
+			return i + 1;
 	}
 
-	rc = of_property_read_u32(np, "aspeed,lpc-io-reg", &slave);
-	if (rc)
-		return ERR_PTR(rc);
+	return -EINVAL;
+}
 
-	aspeed_kcs_set_address(kcs, slave);
+static int aspeed_kcs_of_v2_get_io_address(struct platform_device *pdev)
+{
+	uint32_t slave;
+	int rc;
 
-	return kcs;
+	rc = of_property_read_u32(pdev->dev.of_node, "aspeed,lpc-io-reg", &slave);
+	if (rc || slave > 0xffff) {
+		dev_err(&pdev->dev, "no valid 'aspeed,lpc-io-reg' configured\n");
+		return -EINVAL;
+	}
+
+	return slave;
 }
 
 static int aspeed_kcs_probe(struct platform_device *pdev)
 {
+	const struct aspeed_kcs_of_ops *ops;
 	struct device *dev = &pdev->dev;
+	struct aspeed_kcs_bmc *priv;
 	struct kcs_bmc *kcs_bmc;
 	struct device_node *np;
-	int rc;
+	int rc, channel, addr;
 
 	np = dev->of_node->parent;
 	if (!of_device_is_compatible(np, "aspeed,ast2400-lpc-v2") &&
@@ -352,23 +330,35 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 		dev_err(dev, "unsupported LPC device binding\n");
 		return -ENODEV;
 	}
-
-	np = dev->of_node;
-	if (of_device_is_compatible(np, "aspeed,ast2400-kcs-bmc") ||
-	    of_device_is_compatible(np, "aspeed,ast2500-kcs-bmc"))
-		kcs_bmc = aspeed_kcs_probe_of_v1(pdev);
-	else if (of_device_is_compatible(np, "aspeed,ast2400-kcs-bmc-v2") ||
-		 of_device_is_compatible(np, "aspeed,ast2500-kcs-bmc-v2"))
-		kcs_bmc = aspeed_kcs_probe_of_v2(pdev);
-	else
+	ops = of_device_get_match_data(&pdev->dev);
+	if (!ops)
 		return -EINVAL;
 
-	if (IS_ERR(kcs_bmc))
-		return PTR_ERR(kcs_bmc);
+	channel = ops->get_channel(pdev);
+	if (channel < 0)
+		return channel;
 
+	kcs_bmc = kcs_bmc_alloc(&pdev->dev, sizeof(struct aspeed_kcs_bmc), channel);
+	if (!kcs_bmc)
+		return -ENOMEM;
+
+	kcs_bmc->ioreg = ast_kcs_bmc_ioregs[channel - 1];
 	kcs_bmc->io_inputb = aspeed_kcs_inb;
 	kcs_bmc->io_outputb = aspeed_kcs_outb;
 
+	addr = ops->get_io_address(pdev);
+	if (addr < 0)
+		return addr;
+
+	priv = kcs_bmc_priv(kcs_bmc);
+	priv->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
+	if (IS_ERR(priv->map)) {
+		dev_err(&pdev->dev, "Couldn't get regmap\n");
+		return -ENODEV;
+	}
+
+	aspeed_kcs_set_address(kcs_bmc, addr);
+
 	rc = aspeed_kcs_config_irq(kcs_bmc, pdev);
 	if (rc)
 		return rc;
@@ -400,11 +390,21 @@ static int aspeed_kcs_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct aspeed_kcs_of_ops of_v1_ops = {
+	.get_channel = aspeed_kcs_of_v1_get_channel,
+	.get_io_address = aspeed_kcs_of_v1_get_io_address,
+};
+
+static const struct aspeed_kcs_of_ops of_v2_ops = {
+	.get_channel = aspeed_kcs_of_v2_get_channel,
+	.get_io_address = aspeed_kcs_of_v2_get_io_address,
+};
+
 static const struct of_device_id ast_kcs_bmc_match[] = {
-	{ .compatible = "aspeed,ast2400-kcs-bmc" },
-	{ .compatible = "aspeed,ast2500-kcs-bmc" },
-	{ .compatible = "aspeed,ast2400-kcs-bmc-v2" },
-	{ .compatible = "aspeed,ast2500-kcs-bmc-v2" },
+	{ .compatible = "aspeed,ast2400-kcs-bmc", .data = &of_v1_ops },
+	{ .compatible = "aspeed,ast2500-kcs-bmc", .data = &of_v1_ops },
+	{ .compatible = "aspeed,ast2400-kcs-bmc-v2", .data = &of_v2_ops },
+	{ .compatible = "aspeed,ast2500-kcs-bmc-v2", .data = &of_v2_ops },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, ast_kcs_bmc_match);

From ec6f0cf17a639743e622d4156b0a5009263fd479 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:43 +0930
Subject: [PATCH 2460/3292] ipmi: kcs_bmc: Make status update atomic

Enable more efficient implementation of read-modify-write sequences.
Both device drivers for the KCS BMC stack use regmaps. The new callback
allows us to exploit regmap_update_bits().

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-3-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c         |  7 +------
 drivers/char/ipmi/kcs_bmc.h         |  1 +
 drivers/char/ipmi/kcs_bmc_aspeed.c  |  9 +++++++++
 drivers/char/ipmi/kcs_bmc_npcm7xx.c | 10 ++++++++++
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index f292e74bd4a5..58fb1a7bd50d 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -67,12 +67,7 @@ static inline void write_status(struct kcs_bmc *kcs_bmc, u8 data)
 
 static void update_status_bits(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
 {
-	u8 tmp = read_status(kcs_bmc);
-
-	tmp &= ~mask;
-	tmp |= val & mask;
-
-	write_status(kcs_bmc, tmp);
+	kcs_bmc->io_updateb(kcs_bmc, kcs_bmc->ioreg.str, mask, val);
 }
 
 static inline void set_state(struct kcs_bmc *kcs_bmc, u8 state)
diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
index eb9ea4ce78b8..970f53892f2d 100644
--- a/drivers/char/ipmi/kcs_bmc.h
+++ b/drivers/char/ipmi/kcs_bmc.h
@@ -76,6 +76,7 @@ struct kcs_bmc {
 	struct kcs_ioreg ioreg;
 	u8 (*io_inputb)(struct kcs_bmc *kcs_bmc, u32 reg);
 	void (*io_outputb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 b);
+	void (*io_updateb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 val);
 
 	enum kcs_phases phase;
 	enum kcs_errors error;
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 69be873f40ac..01ebb9da3d49 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -90,6 +90,14 @@ static void aspeed_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
 }
 
+static void aspeed_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 val)
+{
+	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	int rc;
+
+	rc = regmap_update_bits(priv->map, reg, mask, val);
+	WARN(rc != 0, "regmap_update_bits() failed: %d\n", rc);
+}
 
 /*
  * AST_usrGuide_KCS.pdf
@@ -345,6 +353,7 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	kcs_bmc->ioreg = ast_kcs_bmc_ioregs[channel - 1];
 	kcs_bmc->io_inputb = aspeed_kcs_inb;
 	kcs_bmc->io_outputb = aspeed_kcs_outb;
+	kcs_bmc->io_updateb = aspeed_kcs_updateb;
 
 	addr = ops->get_io_address(pdev);
 	if (addr < 0)
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
index 722f7391fe1f..1f44aadec9e8 100644
--- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -97,6 +97,15 @@ static void npcm7xx_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
 }
 
+static void npcm7xx_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 data)
+{
+	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	int rc;
+
+	rc = regmap_update_bits(priv->map, reg, mask, data);
+	WARN(rc != 0, "regmap_update_bits() failed: %d\n", rc);
+}
+
 static void npcm7xx_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
 {
 	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
@@ -163,6 +172,7 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 	kcs_bmc->ioreg.str = priv->reg->sts;
 	kcs_bmc->io_inputb = npcm7xx_kcs_inb;
 	kcs_bmc->io_outputb = npcm7xx_kcs_outb;
+	kcs_bmc->io_updateb = npcm7xx_kcs_updateb;
 
 	dev_set_drvdata(dev, kcs_bmc);
 

From 961f7a3af9a9e60bbd2f95ae2d3e9e5e6f41fe6a Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:44 +0930
Subject: [PATCH 2461/3292] ipmi: kcs_bmc: Rename {read,write}_{status,data}()
 functions

Rename the functions in preparation for separating the IPMI chardev out
from the KCS BMC core.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-4-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c | 52 ++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index 58fb1a7bd50d..c4336c1f2d6d 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -45,42 +45,42 @@ enum kcs_states {
 #define KCS_CMD_WRITE_END         0x62
 #define KCS_CMD_READ_BYTE         0x68
 
-static inline u8 read_data(struct kcs_bmc *kcs_bmc)
+static inline u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc)
 {
 	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
 }
 
-static inline void write_data(struct kcs_bmc *kcs_bmc, u8 data)
+static inline void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data)
 {
 	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
 }
 
-static inline u8 read_status(struct kcs_bmc *kcs_bmc)
+static inline u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc)
 {
 	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
 }
 
-static inline void write_status(struct kcs_bmc *kcs_bmc, u8 data)
+static inline void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data)
 {
 	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
 }
 
-static void update_status_bits(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
+static void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
 {
 	kcs_bmc->io_updateb(kcs_bmc, kcs_bmc->ioreg.str, mask, val);
 }
 
 static inline void set_state(struct kcs_bmc *kcs_bmc, u8 state)
 {
-	update_status_bits(kcs_bmc, KCS_STATUS_STATE_MASK,
+	kcs_bmc_update_status(kcs_bmc, KCS_STATUS_STATE_MASK,
 					KCS_STATUS_STATE(state));
 }
 
 static void kcs_force_abort(struct kcs_bmc *kcs_bmc)
 {
 	set_state(kcs_bmc, ERROR_STATE);
-	read_data(kcs_bmc);
-	write_data(kcs_bmc, KCS_ZERO_DATA);
+	kcs_bmc_read_data(kcs_bmc);
+	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
 
 	kcs_bmc->phase = KCS_PHASE_ERROR;
 	kcs_bmc->data_in_avail = false;
@@ -99,9 +99,9 @@ static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
 	case KCS_PHASE_WRITE_DATA:
 		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
 			set_state(kcs_bmc, WRITE_STATE);
-			write_data(kcs_bmc, KCS_ZERO_DATA);
+			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
 			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
-						read_data(kcs_bmc);
+						kcs_bmc_read_data(kcs_bmc);
 		} else {
 			kcs_force_abort(kcs_bmc);
 			kcs_bmc->error = KCS_LENGTH_ERROR;
@@ -112,7 +112,7 @@ static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
 		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
 			set_state(kcs_bmc, READ_STATE);
 			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
-						read_data(kcs_bmc);
+						kcs_bmc_read_data(kcs_bmc);
 			kcs_bmc->phase = KCS_PHASE_WRITE_DONE;
 			kcs_bmc->data_in_avail = true;
 			wake_up_interruptible(&kcs_bmc->queue);
@@ -126,34 +126,34 @@ static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
 		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
 			set_state(kcs_bmc, IDLE_STATE);
 
-		data = read_data(kcs_bmc);
+		data = kcs_bmc_read_data(kcs_bmc);
 		if (data != KCS_CMD_READ_BYTE) {
 			set_state(kcs_bmc, ERROR_STATE);
-			write_data(kcs_bmc, KCS_ZERO_DATA);
+			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
 			break;
 		}
 
 		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
-			write_data(kcs_bmc, KCS_ZERO_DATA);
+			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
 			kcs_bmc->phase = KCS_PHASE_IDLE;
 			break;
 		}
 
-		write_data(kcs_bmc,
+		kcs_bmc_write_data(kcs_bmc,
 			kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
 		break;
 
 	case KCS_PHASE_ABORT_ERROR1:
 		set_state(kcs_bmc, READ_STATE);
-		read_data(kcs_bmc);
-		write_data(kcs_bmc, kcs_bmc->error);
+		kcs_bmc_read_data(kcs_bmc);
+		kcs_bmc_write_data(kcs_bmc, kcs_bmc->error);
 		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
 		break;
 
 	case KCS_PHASE_ABORT_ERROR2:
 		set_state(kcs_bmc, IDLE_STATE);
-		read_data(kcs_bmc);
-		write_data(kcs_bmc, KCS_ZERO_DATA);
+		kcs_bmc_read_data(kcs_bmc);
+		kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
 		kcs_bmc->phase = KCS_PHASE_IDLE;
 		break;
 
@@ -168,9 +168,9 @@ static void kcs_bmc_handle_cmd(struct kcs_bmc *kcs_bmc)
 	u8 cmd;
 
 	set_state(kcs_bmc, WRITE_STATE);
-	write_data(kcs_bmc, KCS_ZERO_DATA);
+	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
 
-	cmd = read_data(kcs_bmc);
+	cmd = kcs_bmc_read_data(kcs_bmc);
 	switch (cmd) {
 	case KCS_CMD_WRITE_START:
 		kcs_bmc->phase = KCS_PHASE_WRITE_START;
@@ -212,7 +212,7 @@ int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
 
 	spin_lock_irqsave(&kcs_bmc->lock, flags);
 
-	status = read_status(kcs_bmc);
+	status = kcs_bmc_read_status(kcs_bmc);
 	if (status & KCS_STATUS_IBF) {
 		if (!kcs_bmc->running)
 			kcs_force_abort(kcs_bmc);
@@ -350,7 +350,7 @@ static ssize_t kcs_bmc_write(struct file *filp, const char __user *buf,
 		kcs_bmc->data_out_idx = 1;
 		kcs_bmc->data_out_len = count;
 		memcpy(kcs_bmc->data_out, kcs_bmc->kbuffer, count);
-		write_data(kcs_bmc, kcs_bmc->data_out[0]);
+		kcs_bmc_write_data(kcs_bmc, kcs_bmc->data_out[0]);
 		ret = count;
 	} else {
 		ret = -EINVAL;
@@ -373,13 +373,11 @@ static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
 
 	switch (cmd) {
 	case IPMI_BMC_IOCTL_SET_SMS_ATN:
-		update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
-				   KCS_STATUS_SMS_ATN);
+		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, KCS_STATUS_SMS_ATN);
 		break;
 
 	case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
-		update_status_bits(kcs_bmc, KCS_STATUS_SMS_ATN,
-				   0);
+		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, 0);
 		break;
 
 	case IPMI_BMC_IOCTL_FORCE_ABORT:

From 55ab48b4e356212fbe084ca110db73bb9a6e7058 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:45 +0930
Subject: [PATCH 2462/3292] ipmi: kcs_bmc: Split out kcs_bmc_cdev_ipmi

Take steps towards defining a coherent API to separate the KCS device
drivers from the userspace interface. Decreasing the coupling will
improve the separation of concerns and enable the introduction of
alternative userspace interfaces.

For now, simply split the chardev logic out to a separate file. The code
continues to build into the same module.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-5-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/Makefile            |   2 +-
 drivers/char/ipmi/kcs_bmc.c           | 423 +------------------------
 drivers/char/ipmi/kcs_bmc.h           |  10 +-
 drivers/char/ipmi/kcs_bmc_cdev_ipmi.c | 428 ++++++++++++++++++++++++++
 4 files changed, 451 insertions(+), 412 deletions(-)
 create mode 100644 drivers/char/ipmi/kcs_bmc_cdev_ipmi.c

diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index 0822adc2ec41..a302bc865370 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
-obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o
+obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o kcs_bmc_cdev_ipmi.o
 obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
 obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o
 obj-$(CONFIG_NPCM7XX_KCS_IPMI_BMC) += kcs_bmc_npcm7xx.o
diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index c4336c1f2d6d..ef5c48ffe74a 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -3,446 +3,51 @@
  * Copyright (c) 2015-2018, Intel Corporation.
  */
 
-#define pr_fmt(fmt) "kcs-bmc: " fmt
-
-#include <linux/errno.h>
-#include <linux/io.h>
-#include <linux/ipmi_bmc.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
 
 #include "kcs_bmc.h"
 
-#define DEVICE_NAME "ipmi-kcs"
-
-#define KCS_MSG_BUFSIZ    1000
-
-#define KCS_ZERO_DATA     0
-
-
-/* IPMI 2.0 - Table 9-1, KCS Interface Status Register Bits */
-#define KCS_STATUS_STATE(state) (state << 6)
-#define KCS_STATUS_STATE_MASK   GENMASK(7, 6)
-#define KCS_STATUS_CMD_DAT      BIT(3)
-#define KCS_STATUS_SMS_ATN      BIT(2)
-#define KCS_STATUS_IBF          BIT(1)
-#define KCS_STATUS_OBF          BIT(0)
-
-/* IPMI 2.0 - Table 9-2, KCS Interface State Bits */
-enum kcs_states {
-	IDLE_STATE  = 0,
-	READ_STATE  = 1,
-	WRITE_STATE = 2,
-	ERROR_STATE = 3,
-};
-
-/* IPMI 2.0 - Table 9-3, KCS Interface Control Codes */
-#define KCS_CMD_GET_STATUS_ABORT  0x60
-#define KCS_CMD_WRITE_START       0x61
-#define KCS_CMD_WRITE_END         0x62
-#define KCS_CMD_READ_BYTE         0x68
-
-static inline u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc)
+u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc)
 {
 	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
 }
+EXPORT_SYMBOL(kcs_bmc_read_data);
 
-static inline void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data)
+void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data)
 {
 	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
 }
+EXPORT_SYMBOL(kcs_bmc_write_data);
 
-static inline u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc)
+u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc)
 {
 	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
 }
+EXPORT_SYMBOL(kcs_bmc_read_status);
 
-static inline void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data)
+void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data)
 {
 	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
 }
+EXPORT_SYMBOL(kcs_bmc_write_status);
 
-static void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
+void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
 {
 	kcs_bmc->io_updateb(kcs_bmc, kcs_bmc->ioreg.str, mask, val);
 }
+EXPORT_SYMBOL(kcs_bmc_update_status);
 
-static inline void set_state(struct kcs_bmc *kcs_bmc, u8 state)
-{
-	kcs_bmc_update_status(kcs_bmc, KCS_STATUS_STATE_MASK,
-					KCS_STATUS_STATE(state));
-}
-
-static void kcs_force_abort(struct kcs_bmc *kcs_bmc)
-{
-	set_state(kcs_bmc, ERROR_STATE);
-	kcs_bmc_read_data(kcs_bmc);
-	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-
-	kcs_bmc->phase = KCS_PHASE_ERROR;
-	kcs_bmc->data_in_avail = false;
-	kcs_bmc->data_in_idx = 0;
-}
-
-static void kcs_bmc_handle_data(struct kcs_bmc *kcs_bmc)
-{
-	u8 data;
-
-	switch (kcs_bmc->phase) {
-	case KCS_PHASE_WRITE_START:
-		kcs_bmc->phase = KCS_PHASE_WRITE_DATA;
-		fallthrough;
-
-	case KCS_PHASE_WRITE_DATA:
-		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
-			set_state(kcs_bmc, WRITE_STATE);
-			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
-						kcs_bmc_read_data(kcs_bmc);
-		} else {
-			kcs_force_abort(kcs_bmc);
-			kcs_bmc->error = KCS_LENGTH_ERROR;
-		}
-		break;
-
-	case KCS_PHASE_WRITE_END_CMD:
-		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
-			set_state(kcs_bmc, READ_STATE);
-			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
-						kcs_bmc_read_data(kcs_bmc);
-			kcs_bmc->phase = KCS_PHASE_WRITE_DONE;
-			kcs_bmc->data_in_avail = true;
-			wake_up_interruptible(&kcs_bmc->queue);
-		} else {
-			kcs_force_abort(kcs_bmc);
-			kcs_bmc->error = KCS_LENGTH_ERROR;
-		}
-		break;
-
-	case KCS_PHASE_READ:
-		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
-			set_state(kcs_bmc, IDLE_STATE);
-
-		data = kcs_bmc_read_data(kcs_bmc);
-		if (data != KCS_CMD_READ_BYTE) {
-			set_state(kcs_bmc, ERROR_STATE);
-			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-			break;
-		}
-
-		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
-			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-			kcs_bmc->phase = KCS_PHASE_IDLE;
-			break;
-		}
-
-		kcs_bmc_write_data(kcs_bmc,
-			kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
-		break;
-
-	case KCS_PHASE_ABORT_ERROR1:
-		set_state(kcs_bmc, READ_STATE);
-		kcs_bmc_read_data(kcs_bmc);
-		kcs_bmc_write_data(kcs_bmc, kcs_bmc->error);
-		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
-		break;
-
-	case KCS_PHASE_ABORT_ERROR2:
-		set_state(kcs_bmc, IDLE_STATE);
-		kcs_bmc_read_data(kcs_bmc);
-		kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-		kcs_bmc->phase = KCS_PHASE_IDLE;
-		break;
-
-	default:
-		kcs_force_abort(kcs_bmc);
-		break;
-	}
-}
-
-static void kcs_bmc_handle_cmd(struct kcs_bmc *kcs_bmc)
-{
-	u8 cmd;
-
-	set_state(kcs_bmc, WRITE_STATE);
-	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-
-	cmd = kcs_bmc_read_data(kcs_bmc);
-	switch (cmd) {
-	case KCS_CMD_WRITE_START:
-		kcs_bmc->phase = KCS_PHASE_WRITE_START;
-		kcs_bmc->error = KCS_NO_ERROR;
-		kcs_bmc->data_in_avail = false;
-		kcs_bmc->data_in_idx = 0;
-		break;
-
-	case KCS_CMD_WRITE_END:
-		if (kcs_bmc->phase != KCS_PHASE_WRITE_DATA) {
-			kcs_force_abort(kcs_bmc);
-			break;
-		}
-
-		kcs_bmc->phase = KCS_PHASE_WRITE_END_CMD;
-		break;
-
-	case KCS_CMD_GET_STATUS_ABORT:
-		if (kcs_bmc->error == KCS_NO_ERROR)
-			kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
-
-		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
-		kcs_bmc->data_in_avail = false;
-		kcs_bmc->data_in_idx = 0;
-		break;
-
-	default:
-		kcs_force_abort(kcs_bmc);
-		kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
-		break;
-	}
-}
-
+int kcs_bmc_ipmi_event(struct kcs_bmc *kcs_bmc);
 int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
 {
-	unsigned long flags;
-	int ret = -ENODATA;
-	u8 status;
-
-	spin_lock_irqsave(&kcs_bmc->lock, flags);
-
-	status = kcs_bmc_read_status(kcs_bmc);
-	if (status & KCS_STATUS_IBF) {
-		if (!kcs_bmc->running)
-			kcs_force_abort(kcs_bmc);
-		else if (status & KCS_STATUS_CMD_DAT)
-			kcs_bmc_handle_cmd(kcs_bmc);
-		else
-			kcs_bmc_handle_data(kcs_bmc);
-
-		ret = 0;
-	}
-
-	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
-
-	return ret;
+	return kcs_bmc_ipmi_event(kcs_bmc);
 }
 EXPORT_SYMBOL(kcs_bmc_handle_event);
 
-static inline struct kcs_bmc *to_kcs_bmc(struct file *filp)
-{
-	return container_of(filp->private_data, struct kcs_bmc, miscdev);
-}
-
-static int kcs_bmc_open(struct inode *inode, struct file *filp)
-{
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
-	int ret = 0;
-
-	spin_lock_irq(&kcs_bmc->lock);
-	if (!kcs_bmc->running)
-		kcs_bmc->running = 1;
-	else
-		ret = -EBUSY;
-	spin_unlock_irq(&kcs_bmc->lock);
-
-	return ret;
-}
-
-static __poll_t kcs_bmc_poll(struct file *filp, poll_table *wait)
-{
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
-	__poll_t mask = 0;
-
-	poll_wait(filp, &kcs_bmc->queue, wait);
-
-	spin_lock_irq(&kcs_bmc->lock);
-	if (kcs_bmc->data_in_avail)
-		mask |= EPOLLIN;
-	spin_unlock_irq(&kcs_bmc->lock);
-
-	return mask;
-}
-
-static ssize_t kcs_bmc_read(struct file *filp, char __user *buf,
-			    size_t count, loff_t *ppos)
-{
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
-	bool data_avail;
-	size_t data_len;
-	ssize_t ret;
-
-	if (!(filp->f_flags & O_NONBLOCK))
-		wait_event_interruptible(kcs_bmc->queue,
-					 kcs_bmc->data_in_avail);
-
-	mutex_lock(&kcs_bmc->mutex);
-
-	spin_lock_irq(&kcs_bmc->lock);
-	data_avail = kcs_bmc->data_in_avail;
-	if (data_avail) {
-		data_len = kcs_bmc->data_in_idx;
-		memcpy(kcs_bmc->kbuffer, kcs_bmc->data_in, data_len);
-	}
-	spin_unlock_irq(&kcs_bmc->lock);
-
-	if (!data_avail) {
-		ret = -EAGAIN;
-		goto out_unlock;
-	}
-
-	if (count < data_len) {
-		pr_err("channel=%u with too large data : %zu\n",
-			kcs_bmc->channel, data_len);
-
-		spin_lock_irq(&kcs_bmc->lock);
-		kcs_force_abort(kcs_bmc);
-		spin_unlock_irq(&kcs_bmc->lock);
-
-		ret = -EOVERFLOW;
-		goto out_unlock;
-	}
-
-	if (copy_to_user(buf, kcs_bmc->kbuffer, data_len)) {
-		ret = -EFAULT;
-		goto out_unlock;
-	}
-
-	ret = data_len;
-
-	spin_lock_irq(&kcs_bmc->lock);
-	if (kcs_bmc->phase == KCS_PHASE_WRITE_DONE) {
-		kcs_bmc->phase = KCS_PHASE_WAIT_READ;
-		kcs_bmc->data_in_avail = false;
-		kcs_bmc->data_in_idx = 0;
-	} else {
-		ret = -EAGAIN;
-	}
-	spin_unlock_irq(&kcs_bmc->lock);
-
-out_unlock:
-	mutex_unlock(&kcs_bmc->mutex);
-
-	return ret;
-}
-
-static ssize_t kcs_bmc_write(struct file *filp, const char __user *buf,
-			     size_t count, loff_t *ppos)
-{
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
-	ssize_t ret;
-
-	/* a minimum response size '3' : netfn + cmd + ccode */
-	if (count < 3 || count > KCS_MSG_BUFSIZ)
-		return -EINVAL;
-
-	mutex_lock(&kcs_bmc->mutex);
-
-	if (copy_from_user(kcs_bmc->kbuffer, buf, count)) {
-		ret = -EFAULT;
-		goto out_unlock;
-	}
-
-	spin_lock_irq(&kcs_bmc->lock);
-	if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
-		kcs_bmc->phase = KCS_PHASE_READ;
-		kcs_bmc->data_out_idx = 1;
-		kcs_bmc->data_out_len = count;
-		memcpy(kcs_bmc->data_out, kcs_bmc->kbuffer, count);
-		kcs_bmc_write_data(kcs_bmc, kcs_bmc->data_out[0]);
-		ret = count;
-	} else {
-		ret = -EINVAL;
-	}
-	spin_unlock_irq(&kcs_bmc->lock);
-
-out_unlock:
-	mutex_unlock(&kcs_bmc->mutex);
-
-	return ret;
-}
-
-static long kcs_bmc_ioctl(struct file *filp, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
-	long ret = 0;
-
-	spin_lock_irq(&kcs_bmc->lock);
-
-	switch (cmd) {
-	case IPMI_BMC_IOCTL_SET_SMS_ATN:
-		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, KCS_STATUS_SMS_ATN);
-		break;
-
-	case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
-		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, 0);
-		break;
-
-	case IPMI_BMC_IOCTL_FORCE_ABORT:
-		kcs_force_abort(kcs_bmc);
-		break;
-
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	spin_unlock_irq(&kcs_bmc->lock);
-
-	return ret;
-}
-
-static int kcs_bmc_release(struct inode *inode, struct file *filp)
-{
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
-
-	spin_lock_irq(&kcs_bmc->lock);
-	kcs_bmc->running = 0;
-	kcs_force_abort(kcs_bmc);
-	spin_unlock_irq(&kcs_bmc->lock);
-
-	return 0;
-}
-
-static const struct file_operations kcs_bmc_fops = {
-	.owner          = THIS_MODULE,
-	.open           = kcs_bmc_open,
-	.read           = kcs_bmc_read,
-	.write          = kcs_bmc_write,
-	.release        = kcs_bmc_release,
-	.poll           = kcs_bmc_poll,
-	.unlocked_ioctl = kcs_bmc_ioctl,
-};
-
+struct kcs_bmc *kcs_bmc_ipmi_alloc(struct device *dev, int sizeof_priv, u32 channel);
 struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv, u32 channel)
 {
-	struct kcs_bmc *kcs_bmc;
-
-	kcs_bmc = devm_kzalloc(dev, sizeof(*kcs_bmc) + sizeof_priv, GFP_KERNEL);
-	if (!kcs_bmc)
-		return NULL;
-
-	spin_lock_init(&kcs_bmc->lock);
-	kcs_bmc->channel = channel;
-
-	mutex_init(&kcs_bmc->mutex);
-	init_waitqueue_head(&kcs_bmc->queue);
-
-	kcs_bmc->data_in = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-	kcs_bmc->data_out = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-	kcs_bmc->kbuffer = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-
-	kcs_bmc->miscdev.minor = MISC_DYNAMIC_MINOR;
-	kcs_bmc->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "%s%u",
-					       DEVICE_NAME, channel);
-	if (!kcs_bmc->data_in || !kcs_bmc->data_out || !kcs_bmc->kbuffer ||
-	    !kcs_bmc->miscdev.name)
-		return NULL;
-	kcs_bmc->miscdev.fops = &kcs_bmc_fops;
-
-	return kcs_bmc;
+	return kcs_bmc_ipmi_alloc(dev, sizeof_priv, channel);
 }
 EXPORT_SYMBOL(kcs_bmc_alloc);
 
diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
index 970f53892f2d..febea0c8deb4 100644
--- a/drivers/char/ipmi/kcs_bmc.h
+++ b/drivers/char/ipmi/kcs_bmc.h
@@ -104,6 +104,12 @@ static inline void *kcs_bmc_priv(struct kcs_bmc *kcs_bmc)
 }
 
 int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
-struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv,
-					u32 channel);
+struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv, u32 channel);
+
+u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc);
+void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data);
+u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc);
+void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data);
+void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val);
+
 #endif /* __KCS_BMC_H__ */
diff --git a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
new file mode 100644
index 000000000000..82c77994e481
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2015-2018, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) "kcs-bmc: " fmt
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/ipmi_bmc.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "kcs_bmc.h"
+
+#define DEVICE_NAME "ipmi-kcs"
+
+#define KCS_MSG_BUFSIZ    1000
+
+#define KCS_ZERO_DATA     0
+
+
+/* IPMI 2.0 - Table 9-1, KCS Interface Status Register Bits */
+#define KCS_STATUS_STATE(state) (state << 6)
+#define KCS_STATUS_STATE_MASK   GENMASK(7, 6)
+#define KCS_STATUS_CMD_DAT      BIT(3)
+#define KCS_STATUS_SMS_ATN      BIT(2)
+#define KCS_STATUS_IBF          BIT(1)
+#define KCS_STATUS_OBF          BIT(0)
+
+/* IPMI 2.0 - Table 9-2, KCS Interface State Bits */
+enum kcs_states {
+	IDLE_STATE  = 0,
+	READ_STATE  = 1,
+	WRITE_STATE = 2,
+	ERROR_STATE = 3,
+};
+
+/* IPMI 2.0 - Table 9-3, KCS Interface Control Codes */
+#define KCS_CMD_GET_STATUS_ABORT  0x60
+#define KCS_CMD_WRITE_START       0x61
+#define KCS_CMD_WRITE_END         0x62
+#define KCS_CMD_READ_BYTE         0x68
+
+static inline void set_state(struct kcs_bmc *kcs_bmc, u8 state)
+{
+	kcs_bmc_update_status(kcs_bmc, KCS_STATUS_STATE_MASK,
+					KCS_STATUS_STATE(state));
+}
+
+static void kcs_bmc_ipmi_force_abort(struct kcs_bmc *kcs_bmc)
+{
+	set_state(kcs_bmc, ERROR_STATE);
+	kcs_bmc_read_data(kcs_bmc);
+	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+
+	kcs_bmc->phase = KCS_PHASE_ERROR;
+	kcs_bmc->data_in_avail = false;
+	kcs_bmc->data_in_idx = 0;
+}
+
+static void kcs_bmc_ipmi_handle_data(struct kcs_bmc *kcs_bmc)
+{
+	u8 data;
+
+	switch (kcs_bmc->phase) {
+	case KCS_PHASE_WRITE_START:
+		kcs_bmc->phase = KCS_PHASE_WRITE_DATA;
+		fallthrough;
+
+	case KCS_PHASE_WRITE_DATA:
+		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
+			set_state(kcs_bmc, WRITE_STATE);
+			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
+						kcs_bmc_read_data(kcs_bmc);
+		} else {
+			kcs_bmc_ipmi_force_abort(kcs_bmc);
+			kcs_bmc->error = KCS_LENGTH_ERROR;
+		}
+		break;
+
+	case KCS_PHASE_WRITE_END_CMD:
+		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
+			set_state(kcs_bmc, READ_STATE);
+			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
+						kcs_bmc_read_data(kcs_bmc);
+			kcs_bmc->phase = KCS_PHASE_WRITE_DONE;
+			kcs_bmc->data_in_avail = true;
+			wake_up_interruptible(&kcs_bmc->queue);
+		} else {
+			kcs_bmc_ipmi_force_abort(kcs_bmc);
+			kcs_bmc->error = KCS_LENGTH_ERROR;
+		}
+		break;
+
+	case KCS_PHASE_READ:
+		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
+			set_state(kcs_bmc, IDLE_STATE);
+
+		data = kcs_bmc_read_data(kcs_bmc);
+		if (data != KCS_CMD_READ_BYTE) {
+			set_state(kcs_bmc, ERROR_STATE);
+			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+			break;
+		}
+
+		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
+			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+			kcs_bmc->phase = KCS_PHASE_IDLE;
+			break;
+		}
+
+		kcs_bmc_write_data(kcs_bmc,
+			kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
+		break;
+
+	case KCS_PHASE_ABORT_ERROR1:
+		set_state(kcs_bmc, READ_STATE);
+		kcs_bmc_read_data(kcs_bmc);
+		kcs_bmc_write_data(kcs_bmc, kcs_bmc->error);
+		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
+		break;
+
+	case KCS_PHASE_ABORT_ERROR2:
+		set_state(kcs_bmc, IDLE_STATE);
+		kcs_bmc_read_data(kcs_bmc);
+		kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+		kcs_bmc->phase = KCS_PHASE_IDLE;
+		break;
+
+	default:
+		kcs_bmc_ipmi_force_abort(kcs_bmc);
+		break;
+	}
+}
+
+static void kcs_bmc_ipmi_handle_cmd(struct kcs_bmc *kcs_bmc)
+{
+	u8 cmd;
+
+	set_state(kcs_bmc, WRITE_STATE);
+	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+
+	cmd = kcs_bmc_read_data(kcs_bmc);
+	switch (cmd) {
+	case KCS_CMD_WRITE_START:
+		kcs_bmc->phase = KCS_PHASE_WRITE_START;
+		kcs_bmc->error = KCS_NO_ERROR;
+		kcs_bmc->data_in_avail = false;
+		kcs_bmc->data_in_idx = 0;
+		break;
+
+	case KCS_CMD_WRITE_END:
+		if (kcs_bmc->phase != KCS_PHASE_WRITE_DATA) {
+			kcs_bmc_ipmi_force_abort(kcs_bmc);
+			break;
+		}
+
+		kcs_bmc->phase = KCS_PHASE_WRITE_END_CMD;
+		break;
+
+	case KCS_CMD_GET_STATUS_ABORT:
+		if (kcs_bmc->error == KCS_NO_ERROR)
+			kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
+
+		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
+		kcs_bmc->data_in_avail = false;
+		kcs_bmc->data_in_idx = 0;
+		break;
+
+	default:
+		kcs_bmc_ipmi_force_abort(kcs_bmc);
+		kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
+		break;
+	}
+}
+
+int kcs_bmc_ipmi_event(struct kcs_bmc *kcs_bmc);
+int kcs_bmc_ipmi_event(struct kcs_bmc *kcs_bmc)
+{
+	unsigned long flags;
+	int ret = -ENODATA;
+	u8 status;
+
+	spin_lock_irqsave(&kcs_bmc->lock, flags);
+
+	status = kcs_bmc_read_status(kcs_bmc);
+	if (status & KCS_STATUS_IBF) {
+		if (!kcs_bmc->running)
+			kcs_bmc_ipmi_force_abort(kcs_bmc);
+		else if (status & KCS_STATUS_CMD_DAT)
+			kcs_bmc_ipmi_handle_cmd(kcs_bmc);
+		else
+			kcs_bmc_ipmi_handle_data(kcs_bmc);
+
+		ret = 0;
+	}
+
+	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(kcs_bmc_ipmi_event);
+
+static inline struct kcs_bmc *to_kcs_bmc(struct file *filp)
+{
+	return container_of(filp->private_data, struct kcs_bmc, miscdev);
+}
+
+static int kcs_bmc_ipmi_open(struct inode *inode, struct file *filp)
+{
+	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	int ret = 0;
+
+	spin_lock_irq(&kcs_bmc->lock);
+	if (!kcs_bmc->running)
+		kcs_bmc->running = 1;
+	else
+		ret = -EBUSY;
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return ret;
+}
+
+static __poll_t kcs_bmc_ipmi_poll(struct file *filp, poll_table *wait)
+{
+	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	__poll_t mask = 0;
+
+	poll_wait(filp, &kcs_bmc->queue, wait);
+
+	spin_lock_irq(&kcs_bmc->lock);
+	if (kcs_bmc->data_in_avail)
+		mask |= EPOLLIN;
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return mask;
+}
+
+static ssize_t kcs_bmc_ipmi_read(struct file *filp, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	bool data_avail;
+	size_t data_len;
+	ssize_t ret;
+
+	if (!(filp->f_flags & O_NONBLOCK))
+		wait_event_interruptible(kcs_bmc->queue,
+					 kcs_bmc->data_in_avail);
+
+	mutex_lock(&kcs_bmc->mutex);
+
+	spin_lock_irq(&kcs_bmc->lock);
+	data_avail = kcs_bmc->data_in_avail;
+	if (data_avail) {
+		data_len = kcs_bmc->data_in_idx;
+		memcpy(kcs_bmc->kbuffer, kcs_bmc->data_in, data_len);
+	}
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	if (!data_avail) {
+		ret = -EAGAIN;
+		goto out_unlock;
+	}
+
+	if (count < data_len) {
+		pr_err("channel=%u with too large data : %zu\n",
+			kcs_bmc->channel, data_len);
+
+		spin_lock_irq(&kcs_bmc->lock);
+		kcs_bmc_ipmi_force_abort(kcs_bmc);
+		spin_unlock_irq(&kcs_bmc->lock);
+
+		ret = -EOVERFLOW;
+		goto out_unlock;
+	}
+
+	if (copy_to_user(buf, kcs_bmc->kbuffer, data_len)) {
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	ret = data_len;
+
+	spin_lock_irq(&kcs_bmc->lock);
+	if (kcs_bmc->phase == KCS_PHASE_WRITE_DONE) {
+		kcs_bmc->phase = KCS_PHASE_WAIT_READ;
+		kcs_bmc->data_in_avail = false;
+		kcs_bmc->data_in_idx = 0;
+	} else {
+		ret = -EAGAIN;
+	}
+	spin_unlock_irq(&kcs_bmc->lock);
+
+out_unlock:
+	mutex_unlock(&kcs_bmc->mutex);
+
+	return ret;
+}
+
+static ssize_t kcs_bmc_ipmi_write(struct file *filp, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	ssize_t ret;
+
+	/* a minimum response size '3' : netfn + cmd + ccode */
+	if (count < 3 || count > KCS_MSG_BUFSIZ)
+		return -EINVAL;
+
+	mutex_lock(&kcs_bmc->mutex);
+
+	if (copy_from_user(kcs_bmc->kbuffer, buf, count)) {
+		ret = -EFAULT;
+		goto out_unlock;
+	}
+
+	spin_lock_irq(&kcs_bmc->lock);
+	if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
+		kcs_bmc->phase = KCS_PHASE_READ;
+		kcs_bmc->data_out_idx = 1;
+		kcs_bmc->data_out_len = count;
+		memcpy(kcs_bmc->data_out, kcs_bmc->kbuffer, count);
+		kcs_bmc_write_data(kcs_bmc, kcs_bmc->data_out[0]);
+		ret = count;
+	} else {
+		ret = -EINVAL;
+	}
+	spin_unlock_irq(&kcs_bmc->lock);
+
+out_unlock:
+	mutex_unlock(&kcs_bmc->mutex);
+
+	return ret;
+}
+
+static long kcs_bmc_ipmi_ioctl(struct file *filp, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	long ret = 0;
+
+	spin_lock_irq(&kcs_bmc->lock);
+
+	switch (cmd) {
+	case IPMI_BMC_IOCTL_SET_SMS_ATN:
+		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, KCS_STATUS_SMS_ATN);
+		break;
+
+	case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
+		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, 0);
+		break;
+
+	case IPMI_BMC_IOCTL_FORCE_ABORT:
+		kcs_bmc_ipmi_force_abort(kcs_bmc);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return ret;
+}
+
+static int kcs_bmc_ipmi_release(struct inode *inode, struct file *filp)
+{
+	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+
+	spin_lock_irq(&kcs_bmc->lock);
+	kcs_bmc->running = 0;
+	kcs_bmc_ipmi_force_abort(kcs_bmc);
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return 0;
+}
+
+static const struct file_operations kcs_bmc_fops = {
+	.owner          = THIS_MODULE,
+	.open           = kcs_bmc_ipmi_open,
+	.read           = kcs_bmc_ipmi_read,
+	.write          = kcs_bmc_ipmi_write,
+	.release        = kcs_bmc_ipmi_release,
+	.poll           = kcs_bmc_ipmi_poll,
+	.unlocked_ioctl = kcs_bmc_ipmi_ioctl,
+};
+
+struct kcs_bmc *kcs_bmc_ipmi_alloc(struct device *dev, int sizeof_priv, u32 channel);
+struct kcs_bmc *kcs_bmc_ipmi_alloc(struct device *dev, int sizeof_priv, u32 channel)
+{
+	struct kcs_bmc *kcs_bmc;
+
+	kcs_bmc = devm_kzalloc(dev, sizeof(*kcs_bmc) + sizeof_priv, GFP_KERNEL);
+	if (!kcs_bmc)
+		return NULL;
+
+	spin_lock_init(&kcs_bmc->lock);
+	kcs_bmc->channel = channel;
+
+	mutex_init(&kcs_bmc->mutex);
+	init_waitqueue_head(&kcs_bmc->queue);
+
+	kcs_bmc->data_in = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	kcs_bmc->data_out = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	kcs_bmc->kbuffer = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+
+	kcs_bmc->miscdev.minor = MISC_DYNAMIC_MINOR;
+	kcs_bmc->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "%s%u",
+					       DEVICE_NAME, channel);
+	if (!kcs_bmc->data_in || !kcs_bmc->data_out || !kcs_bmc->kbuffer ||
+	    !kcs_bmc->miscdev.name)
+		return NULL;
+	kcs_bmc->miscdev.fops = &kcs_bmc_fops;
+
+	return kcs_bmc;
+}
+EXPORT_SYMBOL(kcs_bmc_ipmi_alloc);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
+MODULE_DESCRIPTION("KCS BMC to handle the IPMI request from system software");

From d7096970075ef47c9906fd241cc4939cc11ddd01 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:46 +0930
Subject: [PATCH 2463/3292] ipmi: kcs_bmc: Turn the driver data-structures
 inside-out

Make the KCS device drivers responsible for allocating their own memory.

Until now the private data for the device driver was allocated internal
to the private data for the chardev interface. This coupling required
the slightly awkward API of passing through the struct size for the
driver private data to the chardev constructor, and then retrieving a
pointer to the driver private data from the allocated chardev memory.

In addition to being awkward, the arrangement prevents the
implementation of alternative userspace interfaces as the device driver
private data is not independent.

Peel a layer off the onion and turn the data-structures inside out by
exploiting container_of() and embedding `struct kcs_device` in the
driver private data.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-6-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c           | 17 ++++++--
 drivers/char/ipmi/kcs_bmc.h           | 12 ++----
 drivers/char/ipmi/kcs_bmc_aspeed.c    | 54 +++++++++++++-----------
 drivers/char/ipmi/kcs_bmc_cdev_ipmi.c | 60 ++++++++++++++++++---------
 drivers/char/ipmi/kcs_bmc_npcm7xx.c   | 34 +++++++++------
 5 files changed, 110 insertions(+), 67 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index ef5c48ffe74a..07bb6747f29a 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -44,12 +44,21 @@ int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
 }
 EXPORT_SYMBOL(kcs_bmc_handle_event);
 
-struct kcs_bmc *kcs_bmc_ipmi_alloc(struct device *dev, int sizeof_priv, u32 channel);
-struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv, u32 channel)
+int kcs_bmc_ipmi_add_device(struct kcs_bmc *kcs_bmc);
+int kcs_bmc_add_device(struct kcs_bmc *kcs_bmc)
 {
-	return kcs_bmc_ipmi_alloc(dev, sizeof_priv, channel);
+	return kcs_bmc_ipmi_add_device(kcs_bmc);
 }
-EXPORT_SYMBOL(kcs_bmc_alloc);
+EXPORT_SYMBOL(kcs_bmc_add_device);
+
+int kcs_bmc_ipmi_remove_device(struct kcs_bmc *kcs_bmc);
+void kcs_bmc_remove_device(struct kcs_bmc *kcs_bmc)
+{
+	if (kcs_bmc_ipmi_remove_device(kcs_bmc))
+		pr_warn("Failed to remove device for KCS channel %d\n",
+			kcs_bmc->channel);
+}
+EXPORT_SYMBOL(kcs_bmc_remove_device);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
index febea0c8deb4..f3ed89e7da98 100644
--- a/drivers/char/ipmi/kcs_bmc.h
+++ b/drivers/char/ipmi/kcs_bmc.h
@@ -67,6 +67,8 @@ struct kcs_ioreg {
 };
 
 struct kcs_bmc {
+	struct device *dev;
+
 	spinlock_t lock;
 
 	u32 channel;
@@ -94,17 +96,11 @@ struct kcs_bmc {
 	u8 *kbuffer;
 
 	struct miscdevice miscdev;
-
-	unsigned long priv[];
 };
 
-static inline void *kcs_bmc_priv(struct kcs_bmc *kcs_bmc)
-{
-	return kcs_bmc->priv;
-}
-
 int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
-struct kcs_bmc *kcs_bmc_alloc(struct device *dev, int sizeof_priv, u32 channel);
+int kcs_bmc_add_device(struct kcs_bmc *kcs_bmc);
+void kcs_bmc_remove_device(struct kcs_bmc *kcs_bmc);
 
 u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc);
 void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data);
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 01ebb9da3d49..b07cbc423dd5 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -61,6 +61,8 @@
 #define LPC_STR4             0x11C
 
 struct aspeed_kcs_bmc {
+	struct kcs_bmc kcs_bmc;
+
 	struct regmap *map;
 };
 
@@ -69,9 +71,14 @@ struct aspeed_kcs_of_ops {
 	int (*get_io_address)(struct platform_device *pdev);
 };
 
+static inline struct aspeed_kcs_bmc *to_aspeed_kcs_bmc(struct kcs_bmc *kcs_bmc)
+{
+	return container_of(kcs_bmc, struct aspeed_kcs_bmc, kcs_bmc);
+}
+
 static u8 aspeed_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
 {
-	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 	u32 val = 0;
 	int rc;
 
@@ -83,7 +90,7 @@ static u8 aspeed_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
 
 static void aspeed_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 {
-	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 	int rc;
 
 	rc = regmap_write(priv->map, reg, data);
@@ -92,7 +99,7 @@ static void aspeed_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 
 static void aspeed_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 val)
 {
-	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 	int rc;
 
 	rc = regmap_update_bits(priv->map, reg, mask, val);
@@ -114,7 +121,7 @@ static void aspeed_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 val
  */
 static void aspeed_kcs_set_address(struct kcs_bmc *kcs_bmc, u16 addr)
 {
-	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 
 	switch (kcs_bmc->channel) {
 	case 1:
@@ -148,7 +155,7 @@ static void aspeed_kcs_set_address(struct kcs_bmc *kcs_bmc, u16 addr)
 
 static void aspeed_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
 {
-	struct aspeed_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 
 	switch (kcs_bmc->channel) {
 	case 1:
@@ -325,17 +332,16 @@ static int aspeed_kcs_of_v2_get_io_address(struct platform_device *pdev)
 static int aspeed_kcs_probe(struct platform_device *pdev)
 {
 	const struct aspeed_kcs_of_ops *ops;
-	struct device *dev = &pdev->dev;
 	struct aspeed_kcs_bmc *priv;
 	struct kcs_bmc *kcs_bmc;
 	struct device_node *np;
 	int rc, channel, addr;
 
-	np = dev->of_node->parent;
+	np = pdev->dev.of_node->parent;
 	if (!of_device_is_compatible(np, "aspeed,ast2400-lpc-v2") &&
 	    !of_device_is_compatible(np, "aspeed,ast2500-lpc-v2") &&
 	    !of_device_is_compatible(np, "aspeed,ast2600-lpc-v2")) {
-		dev_err(dev, "unsupported LPC device binding\n");
+		dev_err(&pdev->dev, "unsupported LPC device binding\n");
 		return -ENODEV;
 	}
 	ops = of_device_get_match_data(&pdev->dev);
@@ -346,20 +352,22 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	if (channel < 0)
 		return channel;
 
-	kcs_bmc = kcs_bmc_alloc(&pdev->dev, sizeof(struct aspeed_kcs_bmc), channel);
-	if (!kcs_bmc)
+	addr = ops->get_io_address(pdev);
+	if (addr < 0)
+		return addr;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
+	kcs_bmc = &priv->kcs_bmc;
+	kcs_bmc->dev = &pdev->dev;
+	kcs_bmc->channel = channel;
 	kcs_bmc->ioreg = ast_kcs_bmc_ioregs[channel - 1];
 	kcs_bmc->io_inputb = aspeed_kcs_inb;
 	kcs_bmc->io_outputb = aspeed_kcs_outb;
 	kcs_bmc->io_updateb = aspeed_kcs_updateb;
 
-	addr = ops->get_io_address(pdev);
-	if (addr < 0)
-		return addr;
-
-	priv = kcs_bmc_priv(kcs_bmc);
 	priv->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
 	if (IS_ERR(priv->map)) {
 		dev_err(&pdev->dev, "Couldn't get regmap\n");
@@ -372,29 +380,27 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	dev_set_drvdata(dev, kcs_bmc);
+	platform_set_drvdata(pdev, priv);
 
 	aspeed_kcs_enable_channel(kcs_bmc, true);
 
-	rc = misc_register(&kcs_bmc->miscdev);
+	rc = kcs_bmc_add_device(&priv->kcs_bmc);
 	if (rc) {
-		dev_err(dev, "Unable to register device\n");
+		dev_warn(&pdev->dev, "Failed to register channel %d: %d\n", kcs_bmc->channel, rc);
 		return rc;
 	}
 
-	dev_dbg(&pdev->dev,
-		"Probed KCS device %d (IDR=0x%x, ODR=0x%x, STR=0x%x)\n",
-		kcs_bmc->channel, kcs_bmc->ioreg.idr, kcs_bmc->ioreg.odr,
-		kcs_bmc->ioreg.str);
+	dev_info(&pdev->dev, "Initialised channel %d at 0x%x\n", kcs_bmc->channel, addr);
 
 	return 0;
 }
 
 static int aspeed_kcs_remove(struct platform_device *pdev)
 {
-	struct kcs_bmc *kcs_bmc = dev_get_drvdata(&pdev->dev);
+	struct aspeed_kcs_bmc *priv = platform_get_drvdata(pdev);
+	struct kcs_bmc *kcs_bmc = &priv->kcs_bmc;
 
-	misc_deregister(&kcs_bmc->miscdev);
+	kcs_bmc_remove_device(kcs_bmc);
 
 	return 0;
 }
diff --git a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
index 82c77994e481..5060643bf530 100644
--- a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
+++ b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
@@ -382,7 +382,7 @@ static int kcs_bmc_ipmi_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static const struct file_operations kcs_bmc_fops = {
+static const struct file_operations kcs_bmc_ipmi_fops = {
 	.owner          = THIS_MODULE,
 	.open           = kcs_bmc_ipmi_open,
 	.read           = kcs_bmc_ipmi_read,
@@ -392,36 +392,58 @@ static const struct file_operations kcs_bmc_fops = {
 	.unlocked_ioctl = kcs_bmc_ipmi_ioctl,
 };
 
-struct kcs_bmc *kcs_bmc_ipmi_alloc(struct device *dev, int sizeof_priv, u32 channel);
-struct kcs_bmc *kcs_bmc_ipmi_alloc(struct device *dev, int sizeof_priv, u32 channel)
+int kcs_bmc_ipmi_add_device(struct kcs_bmc *kcs_bmc);
+int kcs_bmc_ipmi_add_device(struct kcs_bmc *kcs_bmc)
 {
-	struct kcs_bmc *kcs_bmc;
-
-	kcs_bmc = devm_kzalloc(dev, sizeof(*kcs_bmc) + sizeof_priv, GFP_KERNEL);
-	if (!kcs_bmc)
-		return NULL;
+	int rc;
 
 	spin_lock_init(&kcs_bmc->lock);
-	kcs_bmc->channel = channel;
-
 	mutex_init(&kcs_bmc->mutex);
 	init_waitqueue_head(&kcs_bmc->queue);
 
-	kcs_bmc->data_in = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-	kcs_bmc->data_out = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-	kcs_bmc->kbuffer = devm_kmalloc(dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	kcs_bmc->data_in = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	kcs_bmc->data_out = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	kcs_bmc->kbuffer = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
 
 	kcs_bmc->miscdev.minor = MISC_DYNAMIC_MINOR;
-	kcs_bmc->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "%s%u",
-					       DEVICE_NAME, channel);
+	kcs_bmc->miscdev.name = devm_kasprintf(kcs_bmc->dev, GFP_KERNEL, "%s%u",
+					       DEVICE_NAME, kcs_bmc->channel);
 	if (!kcs_bmc->data_in || !kcs_bmc->data_out || !kcs_bmc->kbuffer ||
 	    !kcs_bmc->miscdev.name)
-		return NULL;
-	kcs_bmc->miscdev.fops = &kcs_bmc_fops;
+		return -ENOMEM;
 
-	return kcs_bmc;
+	kcs_bmc->miscdev.fops = &kcs_bmc_ipmi_fops;
+
+	rc = misc_register(&kcs_bmc->miscdev);
+	if (rc) {
+		dev_err(kcs_bmc->dev, "Unable to register device: %d\n", rc);
+		return rc;
+	}
+
+	dev_info(kcs_bmc->dev, "Initialised IPMI client for channel %d", kcs_bmc->channel);
+
+	return 0;
 }
-EXPORT_SYMBOL(kcs_bmc_ipmi_alloc);
+EXPORT_SYMBOL(kcs_bmc_ipmi_add_device);
+
+int kcs_bmc_ipmi_remove_device(struct kcs_bmc *kcs_bmc);
+int kcs_bmc_ipmi_remove_device(struct kcs_bmc *kcs_bmc)
+{
+	misc_deregister(&kcs_bmc->miscdev);
+
+	spin_lock_irq(&kcs_bmc->lock);
+	kcs_bmc->running = 0;
+	kcs_bmc_ipmi_force_abort(kcs_bmc);
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	devm_kfree(kcs_bmc->dev, kcs_bmc->kbuffer);
+	devm_kfree(kcs_bmc->dev, kcs_bmc->data_out);
+	devm_kfree(kcs_bmc->dev, kcs_bmc->data_in);
+	devm_kfree(kcs_bmc->dev, kcs_bmc);
+
+	return 0;
+}
+EXPORT_SYMBOL(kcs_bmc_ipmi_remove_device);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
index 1f44aadec9e8..e06250285113 100644
--- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -65,6 +65,8 @@ struct npcm7xx_kcs_reg {
 };
 
 struct npcm7xx_kcs_bmc {
+	struct kcs_bmc kcs_bmc;
+
 	struct regmap *map;
 
 	const struct npcm7xx_kcs_reg *reg;
@@ -76,9 +78,14 @@ static const struct npcm7xx_kcs_reg npcm7xx_kcs_reg_tbl[KCS_CHANNEL_MAX] = {
 	{ .sts = KCS3ST, .dob = KCS3DO, .dib = KCS3DI, .ctl = KCS3CTL, .ie = KCS3IE },
 };
 
+static inline struct npcm7xx_kcs_bmc *to_npcm7xx_kcs_bmc(struct kcs_bmc *kcs_bmc)
+{
+	return container_of(kcs_bmc, struct npcm7xx_kcs_bmc, kcs_bmc);
+}
+
 static u8 npcm7xx_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
 {
-	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 	u32 val = 0;
 	int rc;
 
@@ -90,7 +97,7 @@ static u8 npcm7xx_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
 
 static void npcm7xx_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 {
-	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 	int rc;
 
 	rc = regmap_write(priv->map, reg, data);
@@ -99,7 +106,7 @@ static void npcm7xx_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 
 static void npcm7xx_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 data)
 {
-	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 	int rc;
 
 	rc = regmap_update_bits(priv->map, reg, mask, data);
@@ -108,7 +115,7 @@ static void npcm7xx_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 da
 
 static void npcm7xx_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
 {
-	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 
 	regmap_update_bits(priv->map, priv->reg->ctl, KCS_CTL_IBFIE,
 			   enable ? KCS_CTL_IBFIE : 0);
@@ -155,11 +162,10 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	kcs_bmc = kcs_bmc_alloc(dev, sizeof(*priv), chan);
-	if (!kcs_bmc)
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
-	priv = kcs_bmc_priv(kcs_bmc);
 	priv->map = syscon_node_to_regmap(dev->parent->of_node);
 	if (IS_ERR(priv->map)) {
 		dev_err(dev, "Couldn't get regmap\n");
@@ -167,6 +173,9 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 	}
 	priv->reg = &npcm7xx_kcs_reg_tbl[chan - 1];
 
+	kcs_bmc = &priv->kcs_bmc;
+	kcs_bmc->dev = &pdev->dev;
+	kcs_bmc->channel = chan;
 	kcs_bmc->ioreg.idr = priv->reg->dib;
 	kcs_bmc->ioreg.odr = priv->reg->dob;
 	kcs_bmc->ioreg.str = priv->reg->sts;
@@ -174,16 +183,16 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 	kcs_bmc->io_outputb = npcm7xx_kcs_outb;
 	kcs_bmc->io_updateb = npcm7xx_kcs_updateb;
 
-	dev_set_drvdata(dev, kcs_bmc);
+	platform_set_drvdata(pdev, priv);
 
 	npcm7xx_kcs_enable_channel(kcs_bmc, true);
 	rc = npcm7xx_kcs_config_irq(kcs_bmc, pdev);
 	if (rc)
 		return rc;
 
-	rc = misc_register(&kcs_bmc->miscdev);
+	rc = kcs_bmc_add_device(kcs_bmc);
 	if (rc) {
-		dev_err(dev, "Unable to register device\n");
+		dev_warn(&pdev->dev, "Failed to register channel %d: %d\n", kcs_bmc->channel, rc);
 		return rc;
 	}
 
@@ -196,9 +205,10 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 
 static int npcm7xx_kcs_remove(struct platform_device *pdev)
 {
-	struct kcs_bmc *kcs_bmc = dev_get_drvdata(&pdev->dev);
+	struct npcm7xx_kcs_bmc *priv = platform_get_drvdata(pdev);
+	struct kcs_bmc *kcs_bmc = &priv->kcs_bmc;
 
-	misc_deregister(&kcs_bmc->miscdev);
+	kcs_bmc_remove_device(kcs_bmc);
 
 	return 0;
 }

From faae6e391eda73a5b9870c78349064282a625bfa Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:47 +0930
Subject: [PATCH 2464/3292] ipmi: kcs_bmc: Split headers into device and client

Strengthen the distinction between code that abstracts the
implementation of the KCS behaviours (device drivers) and code that
exploits KCS behaviours (clients). Neither needs to know about the APIs
required by the other, so provide separate headers.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-Id: <20210608104757.582199-7-andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c           | 23 ++++++++++------
 drivers/char/ipmi/kcs_bmc.h           | 27 +++++++++----------
 drivers/char/ipmi/kcs_bmc_aspeed.c    | 17 ++++++------
 drivers/char/ipmi/kcs_bmc_cdev_ipmi.c | 39 ++++++++++++++++++---------
 drivers/char/ipmi/kcs_bmc_client.h    | 27 +++++++++++++++++++
 drivers/char/ipmi/kcs_bmc_device.h    | 19 +++++++++++++
 drivers/char/ipmi/kcs_bmc_npcm7xx.c   | 17 ++++++------
 7 files changed, 117 insertions(+), 52 deletions(-)
 create mode 100644 drivers/char/ipmi/kcs_bmc_client.h
 create mode 100644 drivers/char/ipmi/kcs_bmc_device.h

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index 07bb6747f29a..c347cf6f9337 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -1,46 +1,52 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015-2018, Intel Corporation.
+ * Copyright (c) 2021, IBM Corp.
  */
 
 #include <linux/module.h>
 
 #include "kcs_bmc.h"
 
+/* Implement both the device and client interfaces here */
+#include "kcs_bmc_device.h"
+#include "kcs_bmc_client.h"
+
+/* Consumer data access */
+
 u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc)
 {
-	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
+	return kcs_bmc->ops->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
 }
 EXPORT_SYMBOL(kcs_bmc_read_data);
 
 void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data)
 {
-	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
+	kcs_bmc->ops->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
 }
 EXPORT_SYMBOL(kcs_bmc_write_data);
 
 u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc)
 {
-	return kcs_bmc->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
+	return kcs_bmc->ops->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
 }
 EXPORT_SYMBOL(kcs_bmc_read_status);
 
 void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data)
 {
-	kcs_bmc->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
+	kcs_bmc->ops->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
 }
 EXPORT_SYMBOL(kcs_bmc_write_status);
 
 void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
 {
-	kcs_bmc->io_updateb(kcs_bmc, kcs_bmc->ioreg.str, mask, val);
+	kcs_bmc->ops->io_updateb(kcs_bmc, kcs_bmc->ioreg.str, mask, val);
 }
 EXPORT_SYMBOL(kcs_bmc_update_status);
 
-int kcs_bmc_ipmi_event(struct kcs_bmc *kcs_bmc);
-int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
+irqreturn_t kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
 {
-	return kcs_bmc_ipmi_event(kcs_bmc);
+	return kcs_bmc->client.ops->event(&kcs_bmc->client);
 }
 EXPORT_SYMBOL(kcs_bmc_handle_event);
 
@@ -62,4 +68,5 @@ EXPORT_SYMBOL(kcs_bmc_remove_device);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
+MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
 MODULE_DESCRIPTION("KCS BMC to handle the IPMI request from system software");
diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
index f3ed89e7da98..f42843d240ed 100644
--- a/drivers/char/ipmi/kcs_bmc.h
+++ b/drivers/char/ipmi/kcs_bmc.h
@@ -8,6 +8,12 @@
 
 #include <linux/miscdevice.h>
 
+#include "kcs_bmc_client.h"
+
+#define KCS_BMC_STR_OBF		BIT(0)
+#define KCS_BMC_STR_IBF		BIT(1)
+#define KCS_BMC_STR_CMD_DAT	BIT(3)
+
 /* Different phases of the KCS BMC module.
  *  KCS_PHASE_IDLE:
  *            BMC should not be expecting nor sending any data.
@@ -66,19 +72,21 @@ struct kcs_ioreg {
 	u32 str;
 };
 
+struct kcs_bmc_device_ops;
+
 struct kcs_bmc {
 	struct device *dev;
 
+	const struct kcs_bmc_device_ops *ops;
+
+	struct kcs_bmc_client client;
+
 	spinlock_t lock;
 
 	u32 channel;
 	int running;
 
-	/* Setup by BMC KCS controller driver */
 	struct kcs_ioreg ioreg;
-	u8 (*io_inputb)(struct kcs_bmc *kcs_bmc, u32 reg);
-	void (*io_outputb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 b);
-	void (*io_updateb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 val);
 
 	enum kcs_phases phase;
 	enum kcs_errors error;
@@ -97,15 +105,4 @@ struct kcs_bmc {
 
 	struct miscdevice miscdev;
 };
-
-int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
-int kcs_bmc_add_device(struct kcs_bmc *kcs_bmc);
-void kcs_bmc_remove_device(struct kcs_bmc *kcs_bmc);
-
-u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc);
-void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data);
-u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc);
-void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data);
-void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val);
-
 #endif /* __KCS_BMC_H__ */
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index b07cbc423dd5..fdfba745302a 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 
-#include "kcs_bmc.h"
+#include "kcs_bmc_device.h"
 
 
 #define DEVICE_NAME     "ast-kcs-bmc"
@@ -220,14 +220,17 @@ static void aspeed_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
 	}
 }
 
+static const struct kcs_bmc_device_ops aspeed_kcs_ops = {
+	.io_inputb = aspeed_kcs_inb,
+	.io_outputb = aspeed_kcs_outb,
+	.io_updateb = aspeed_kcs_updateb,
+};
+
 static irqreturn_t aspeed_kcs_irq(int irq, void *arg)
 {
 	struct kcs_bmc *kcs_bmc = arg;
 
-	if (!kcs_bmc_handle_event(kcs_bmc))
-		return IRQ_HANDLED;
-
-	return IRQ_NONE;
+	return kcs_bmc_handle_event(kcs_bmc);
 }
 
 static int aspeed_kcs_config_irq(struct kcs_bmc *kcs_bmc,
@@ -364,9 +367,7 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	kcs_bmc->dev = &pdev->dev;
 	kcs_bmc->channel = channel;
 	kcs_bmc->ioreg = ast_kcs_bmc_ioregs[channel - 1];
-	kcs_bmc->io_inputb = aspeed_kcs_inb;
-	kcs_bmc->io_outputb = aspeed_kcs_outb;
-	kcs_bmc->io_updateb = aspeed_kcs_updateb;
+	kcs_bmc->ops = &aspeed_kcs_ops;
 
 	priv->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
 	if (IS_ERR(priv->map)) {
diff --git a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
index 5060643bf530..476ad6d541d5 100644
--- a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
+++ b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
@@ -22,7 +22,6 @@
 
 #define KCS_ZERO_DATA     0
 
-
 /* IPMI 2.0 - Table 9-1, KCS Interface Status Register Bits */
 #define KCS_STATUS_STATE(state) (state << 6)
 #define KCS_STATUS_STATE_MASK   GENMASK(7, 6)
@@ -179,12 +178,19 @@ static void kcs_bmc_ipmi_handle_cmd(struct kcs_bmc *kcs_bmc)
 	}
 }
 
-int kcs_bmc_ipmi_event(struct kcs_bmc *kcs_bmc);
-int kcs_bmc_ipmi_event(struct kcs_bmc *kcs_bmc)
+static inline struct kcs_bmc *client_to_kcs_bmc(struct kcs_bmc_client *client)
 {
+	return container_of(client, struct kcs_bmc, client);
+}
+
+static irqreturn_t kcs_bmc_ipmi_event(struct kcs_bmc_client *client)
+{
+	struct kcs_bmc *kcs_bmc;
 	unsigned long flags;
-	int ret = -ENODATA;
 	u8 status;
+	int ret;
+
+	kcs_bmc = client_to_kcs_bmc(client);
 
 	spin_lock_irqsave(&kcs_bmc->lock, flags);
 
@@ -197,23 +203,28 @@ int kcs_bmc_ipmi_event(struct kcs_bmc *kcs_bmc)
 		else
 			kcs_bmc_ipmi_handle_data(kcs_bmc);
 
-		ret = 0;
+		ret = IRQ_HANDLED;
+	} else {
+		ret = IRQ_NONE;
 	}
 
 	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
 
 	return ret;
 }
-EXPORT_SYMBOL(kcs_bmc_ipmi_event);
 
-static inline struct kcs_bmc *to_kcs_bmc(struct file *filp)
+static const struct kcs_bmc_client_ops kcs_bmc_ipmi_client_ops = {
+	.event = kcs_bmc_ipmi_event,
+};
+
+static inline struct kcs_bmc *file_to_kcs_bmc(struct file *filp)
 {
 	return container_of(filp->private_data, struct kcs_bmc, miscdev);
 }
 
 static int kcs_bmc_ipmi_open(struct inode *inode, struct file *filp)
 {
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
 	int ret = 0;
 
 	spin_lock_irq(&kcs_bmc->lock);
@@ -228,7 +239,7 @@ static int kcs_bmc_ipmi_open(struct inode *inode, struct file *filp)
 
 static __poll_t kcs_bmc_ipmi_poll(struct file *filp, poll_table *wait)
 {
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
 	__poll_t mask = 0;
 
 	poll_wait(filp, &kcs_bmc->queue, wait);
@@ -244,7 +255,7 @@ static __poll_t kcs_bmc_ipmi_poll(struct file *filp, poll_table *wait)
 static ssize_t kcs_bmc_ipmi_read(struct file *filp, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
 	bool data_avail;
 	size_t data_len;
 	ssize_t ret;
@@ -306,7 +317,7 @@ out_unlock:
 static ssize_t kcs_bmc_ipmi_write(struct file *filp, const char __user *buf,
 			     size_t count, loff_t *ppos)
 {
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
 	ssize_t ret;
 
 	/* a minimum response size '3' : netfn + cmd + ccode */
@@ -342,7 +353,7 @@ out_unlock:
 static long kcs_bmc_ipmi_ioctl(struct file *filp, unsigned int cmd,
 			  unsigned long arg)
 {
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
 	long ret = 0;
 
 	spin_lock_irq(&kcs_bmc->lock);
@@ -372,7 +383,7 @@ static long kcs_bmc_ipmi_ioctl(struct file *filp, unsigned int cmd,
 
 static int kcs_bmc_ipmi_release(struct inode *inode, struct file *filp)
 {
-	struct kcs_bmc *kcs_bmc = to_kcs_bmc(filp);
+	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
 
 	spin_lock_irq(&kcs_bmc->lock);
 	kcs_bmc->running = 0;
@@ -401,6 +412,8 @@ int kcs_bmc_ipmi_add_device(struct kcs_bmc *kcs_bmc)
 	mutex_init(&kcs_bmc->mutex);
 	init_waitqueue_head(&kcs_bmc->queue);
 
+	kcs_bmc->client.dev = kcs_bmc;
+	kcs_bmc->client.ops = &kcs_bmc_ipmi_client_ops;
 	kcs_bmc->data_in = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
 	kcs_bmc->data_out = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
 	kcs_bmc->kbuffer = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
diff --git a/drivers/char/ipmi/kcs_bmc_client.h b/drivers/char/ipmi/kcs_bmc_client.h
new file mode 100644
index 000000000000..dad8774aebce
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc_client.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, IBM Corp. */
+
+#ifndef __KCS_BMC_CONSUMER_H__
+#define __KCS_BMC_CONSUMER_H__
+
+#include <linux/irqreturn.h>
+
+struct kcs_bmc;
+struct kcs_bmc_client_ops;
+
+struct kcs_bmc_client {
+	const struct kcs_bmc_client_ops *ops;
+
+	struct kcs_bmc *dev;
+};
+
+struct kcs_bmc_client_ops {
+	irqreturn_t (*event)(struct kcs_bmc_client *client);
+};
+
+u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc);
+void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data);
+u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc);
+void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data);
+void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val);
+#endif
diff --git a/drivers/char/ipmi/kcs_bmc_device.h b/drivers/char/ipmi/kcs_bmc_device.h
new file mode 100644
index 000000000000..dd8bf1307ad2
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc_device.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, IBM Corp. */
+
+#ifndef __KCS_BMC_DEVICE_H__
+#define __KCS_BMC_DEVICE_H__
+
+#include "kcs_bmc.h"
+
+struct kcs_bmc_device_ops {
+	u8 (*io_inputb)(struct kcs_bmc *kcs_bmc, u32 reg);
+	void (*io_outputb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 b);
+	void (*io_updateb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 b);
+};
+
+irqreturn_t kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
+int kcs_bmc_add_device(struct kcs_bmc *kcs_bmc);
+void kcs_bmc_remove_device(struct kcs_bmc *kcs_bmc);
+
+#endif
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
index e06250285113..ebb691af28c5 100644
--- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -17,7 +17,7 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
-#include "kcs_bmc.h"
+#include "kcs_bmc_device.h"
 
 #define DEVICE_NAME	"npcm-kcs-bmc"
 #define KCS_CHANNEL_MAX	3
@@ -128,10 +128,7 @@ static irqreturn_t npcm7xx_kcs_irq(int irq, void *arg)
 {
 	struct kcs_bmc *kcs_bmc = arg;
 
-	if (!kcs_bmc_handle_event(kcs_bmc))
-		return IRQ_HANDLED;
-
-	return IRQ_NONE;
+	return kcs_bmc_handle_event(kcs_bmc);
 }
 
 static int npcm7xx_kcs_config_irq(struct kcs_bmc *kcs_bmc,
@@ -148,6 +145,12 @@ static int npcm7xx_kcs_config_irq(struct kcs_bmc *kcs_bmc,
 				dev_name(dev), kcs_bmc);
 }
 
+static const struct kcs_bmc_device_ops npcm7xx_kcs_ops = {
+	.io_inputb = npcm7xx_kcs_inb,
+	.io_outputb = npcm7xx_kcs_outb,
+	.io_updateb = npcm7xx_kcs_updateb,
+};
+
 static int npcm7xx_kcs_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -179,9 +182,7 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 	kcs_bmc->ioreg.idr = priv->reg->dib;
 	kcs_bmc->ioreg.odr = priv->reg->dob;
 	kcs_bmc->ioreg.str = priv->reg->sts;
-	kcs_bmc->io_inputb = npcm7xx_kcs_inb;
-	kcs_bmc->io_outputb = npcm7xx_kcs_outb;
-	kcs_bmc->io_updateb = npcm7xx_kcs_updateb;
+	kcs_bmc->ops = &npcm7xx_kcs_ops;
 
 	platform_set_drvdata(pdev, priv);
 

From d4e7ac68f771addc19352121706d8584eb0166cd Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:48 +0930
Subject: [PATCH 2465/3292] ipmi: kcs_bmc: Strip private client data from
 struct kcs_bmc

Move all client-private data out of `struct kcs_bmc` into the KCS client
implementation.

With this change the KCS BMC core code now only concerns itself with
abstract `struct kcs_bmc` and `struct kcs_bmc_client` types, achieving
expected separation of concerns. Further, the change clears the path for
implementation of alternative userspace interfaces.

The chardev data-structures are rearranged in the same manner applied to
the KCS device driver data-structures in an earlier patch - `struct
kcs_bmc_client` is embedded in the client's private data and we exploit
container_of() to translate as required.

Finally, now that it is free of client data, `struct kcs_bmc` is renamed
to `struct kcs_bmc_device` to contrast `struct kcs_bmc_client`.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-8-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c           |  70 ++++-
 drivers/char/ipmi/kcs_bmc.h           |  83 +----
 drivers/char/ipmi/kcs_bmc_aspeed.c    |  22 +-
 drivers/char/ipmi/kcs_bmc_cdev_ipmi.c | 428 ++++++++++++++++----------
 drivers/char/ipmi/kcs_bmc_client.h    |  28 +-
 drivers/char/ipmi/kcs_bmc_device.h    |  14 +-
 drivers/char/ipmi/kcs_bmc_npcm7xx.c   |  20 +-
 7 files changed, 369 insertions(+), 296 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index c347cf6f9337..2ec934f0dba3 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2021, IBM Corp.
  */
 
+#include <linux/device.h>
 #include <linux/module.h>
 
 #include "kcs_bmc.h"
@@ -14,51 +15,98 @@
 
 /* Consumer data access */
 
-u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc)
+u8 kcs_bmc_read_data(struct kcs_bmc_device *kcs_bmc)
 {
 	return kcs_bmc->ops->io_inputb(kcs_bmc, kcs_bmc->ioreg.idr);
 }
 EXPORT_SYMBOL(kcs_bmc_read_data);
 
-void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data)
+void kcs_bmc_write_data(struct kcs_bmc_device *kcs_bmc, u8 data)
 {
 	kcs_bmc->ops->io_outputb(kcs_bmc, kcs_bmc->ioreg.odr, data);
 }
 EXPORT_SYMBOL(kcs_bmc_write_data);
 
-u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc)
+u8 kcs_bmc_read_status(struct kcs_bmc_device *kcs_bmc)
 {
 	return kcs_bmc->ops->io_inputb(kcs_bmc, kcs_bmc->ioreg.str);
 }
 EXPORT_SYMBOL(kcs_bmc_read_status);
 
-void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data)
+void kcs_bmc_write_status(struct kcs_bmc_device *kcs_bmc, u8 data)
 {
 	kcs_bmc->ops->io_outputb(kcs_bmc, kcs_bmc->ioreg.str, data);
 }
 EXPORT_SYMBOL(kcs_bmc_write_status);
 
-void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val)
+void kcs_bmc_update_status(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 val)
 {
 	kcs_bmc->ops->io_updateb(kcs_bmc, kcs_bmc->ioreg.str, mask, val);
 }
 EXPORT_SYMBOL(kcs_bmc_update_status);
 
-irqreturn_t kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
+irqreturn_t kcs_bmc_handle_event(struct kcs_bmc_device *kcs_bmc)
 {
-	return kcs_bmc->client.ops->event(&kcs_bmc->client);
+	struct kcs_bmc_client *client;
+	irqreturn_t rc;
+
+	spin_lock(&kcs_bmc->lock);
+	client = kcs_bmc->client;
+	if (client) {
+		rc = client->ops->event(client);
+	} else {
+		u8 status;
+
+		status = kcs_bmc_read_status(kcs_bmc);
+		if (status & KCS_BMC_STR_IBF) {
+			/* Ack the event by reading the data */
+			kcs_bmc_read_data(kcs_bmc);
+			rc = IRQ_HANDLED;
+		} else {
+			rc = IRQ_NONE;
+		}
+	}
+	spin_unlock(&kcs_bmc->lock);
+
+	return rc;
 }
 EXPORT_SYMBOL(kcs_bmc_handle_event);
 
-int kcs_bmc_ipmi_add_device(struct kcs_bmc *kcs_bmc);
-int kcs_bmc_add_device(struct kcs_bmc *kcs_bmc)
+int kcs_bmc_enable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client)
+{
+	int rc;
+
+	spin_lock_irq(&kcs_bmc->lock);
+	if (kcs_bmc->client) {
+		rc = -EBUSY;
+	} else {
+		kcs_bmc->client = client;
+		rc = 0;
+	}
+	spin_unlock_irq(&kcs_bmc->lock);
+
+	return rc;
+}
+EXPORT_SYMBOL(kcs_bmc_enable_device);
+
+void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client)
+{
+	spin_lock_irq(&kcs_bmc->lock);
+	if (client == kcs_bmc->client)
+		kcs_bmc->client = NULL;
+	spin_unlock_irq(&kcs_bmc->lock);
+}
+EXPORT_SYMBOL(kcs_bmc_disable_device);
+
+int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc);
+int kcs_bmc_add_device(struct kcs_bmc_device *kcs_bmc)
 {
 	return kcs_bmc_ipmi_add_device(kcs_bmc);
 }
 EXPORT_SYMBOL(kcs_bmc_add_device);
 
-int kcs_bmc_ipmi_remove_device(struct kcs_bmc *kcs_bmc);
-void kcs_bmc_remove_device(struct kcs_bmc *kcs_bmc)
+int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc);
+void kcs_bmc_remove_device(struct kcs_bmc_device *kcs_bmc)
 {
 	if (kcs_bmc_ipmi_remove_device(kcs_bmc))
 		pr_warn("Failed to remove device for KCS channel %d\n",
diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
index f42843d240ed..3f0cce315b4f 100644
--- a/drivers/char/ipmi/kcs_bmc.h
+++ b/drivers/char/ipmi/kcs_bmc.h
@@ -6,61 +6,12 @@
 #ifndef __KCS_BMC_H__
 #define __KCS_BMC_H__
 
-#include <linux/miscdevice.h>
-
-#include "kcs_bmc_client.h"
+#include <linux/list.h>
 
 #define KCS_BMC_STR_OBF		BIT(0)
 #define KCS_BMC_STR_IBF		BIT(1)
 #define KCS_BMC_STR_CMD_DAT	BIT(3)
 
-/* Different phases of the KCS BMC module.
- *  KCS_PHASE_IDLE:
- *            BMC should not be expecting nor sending any data.
- *  KCS_PHASE_WRITE_START:
- *            BMC is receiving a WRITE_START command from system software.
- *  KCS_PHASE_WRITE_DATA:
- *            BMC is receiving a data byte from system software.
- *  KCS_PHASE_WRITE_END_CMD:
- *            BMC is waiting a last data byte from system software.
- *  KCS_PHASE_WRITE_DONE:
- *            BMC has received the whole request from system software.
- *  KCS_PHASE_WAIT_READ:
- *            BMC is waiting the response from the upper IPMI service.
- *  KCS_PHASE_READ:
- *            BMC is transferring the response to system software.
- *  KCS_PHASE_ABORT_ERROR1:
- *            BMC is waiting error status request from system software.
- *  KCS_PHASE_ABORT_ERROR2:
- *            BMC is waiting for idle status afer error from system software.
- *  KCS_PHASE_ERROR:
- *            BMC has detected a protocol violation at the interface level.
- */
-enum kcs_phases {
-	KCS_PHASE_IDLE,
-
-	KCS_PHASE_WRITE_START,
-	KCS_PHASE_WRITE_DATA,
-	KCS_PHASE_WRITE_END_CMD,
-	KCS_PHASE_WRITE_DONE,
-
-	KCS_PHASE_WAIT_READ,
-	KCS_PHASE_READ,
-
-	KCS_PHASE_ABORT_ERROR1,
-	KCS_PHASE_ABORT_ERROR2,
-	KCS_PHASE_ERROR
-};
-
-/* IPMI 2.0 - Table 9-4, KCS Interface Status Codes */
-enum kcs_errors {
-	KCS_NO_ERROR                = 0x00,
-	KCS_ABORTED_BY_COMMAND      = 0x01,
-	KCS_ILLEGAL_CONTROL_CODE    = 0x02,
-	KCS_LENGTH_ERROR            = 0x06,
-	KCS_UNSPECIFIED_ERROR       = 0xFF
-};
-
 /* IPMI 2.0 - 9.5, KCS Interface Registers
  * @idr: Input Data Register
  * @odr: Output Data Register
@@ -73,36 +24,20 @@ struct kcs_ioreg {
 };
 
 struct kcs_bmc_device_ops;
+struct kcs_bmc_client;
+
+struct kcs_bmc_device {
+	struct list_head entry;
 
-struct kcs_bmc {
 	struct device *dev;
-
-	const struct kcs_bmc_device_ops *ops;
-
-	struct kcs_bmc_client client;
-
-	spinlock_t lock;
-
 	u32 channel;
-	int running;
 
 	struct kcs_ioreg ioreg;
 
-	enum kcs_phases phase;
-	enum kcs_errors error;
+	const struct kcs_bmc_device_ops *ops;
 
-	wait_queue_head_t queue;
-	bool data_in_avail;
-	int  data_in_idx;
-	u8  *data_in;
-
-	int  data_out_idx;
-	int  data_out_len;
-	u8  *data_out;
-
-	struct mutex mutex;
-	u8 *kbuffer;
-
-	struct miscdevice miscdev;
+	spinlock_t lock;
+	struct kcs_bmc_client *client;
 };
+
 #endif /* __KCS_BMC_H__ */
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index fdfba745302a..08d4290464d8 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -61,7 +61,7 @@
 #define LPC_STR4             0x11C
 
 struct aspeed_kcs_bmc {
-	struct kcs_bmc kcs_bmc;
+	struct kcs_bmc_device kcs_bmc;
 
 	struct regmap *map;
 };
@@ -71,12 +71,12 @@ struct aspeed_kcs_of_ops {
 	int (*get_io_address)(struct platform_device *pdev);
 };
 
-static inline struct aspeed_kcs_bmc *to_aspeed_kcs_bmc(struct kcs_bmc *kcs_bmc)
+static inline struct aspeed_kcs_bmc *to_aspeed_kcs_bmc(struct kcs_bmc_device *kcs_bmc)
 {
 	return container_of(kcs_bmc, struct aspeed_kcs_bmc, kcs_bmc);
 }
 
-static u8 aspeed_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
+static u8 aspeed_kcs_inb(struct kcs_bmc_device *kcs_bmc, u32 reg)
 {
 	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 	u32 val = 0;
@@ -88,7 +88,7 @@ static u8 aspeed_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
 	return rc == 0 ? (u8) val : 0;
 }
 
-static void aspeed_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
+static void aspeed_kcs_outb(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 data)
 {
 	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 	int rc;
@@ -97,7 +97,7 @@ static void aspeed_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
 }
 
-static void aspeed_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 val)
+static void aspeed_kcs_updateb(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 mask, u8 val)
 {
 	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 	int rc;
@@ -119,7 +119,7 @@ static void aspeed_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 val
  *     C. KCS4
  *        D / C : CA4h / CA5h
  */
-static void aspeed_kcs_set_address(struct kcs_bmc *kcs_bmc, u16 addr)
+static void aspeed_kcs_set_address(struct kcs_bmc_device *kcs_bmc, u16 addr)
 {
 	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 
@@ -153,7 +153,7 @@ static void aspeed_kcs_set_address(struct kcs_bmc *kcs_bmc, u16 addr)
 	}
 }
 
-static void aspeed_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
+static void aspeed_kcs_enable_channel(struct kcs_bmc_device *kcs_bmc, bool enable)
 {
 	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 
@@ -228,12 +228,12 @@ static const struct kcs_bmc_device_ops aspeed_kcs_ops = {
 
 static irqreturn_t aspeed_kcs_irq(int irq, void *arg)
 {
-	struct kcs_bmc *kcs_bmc = arg;
+	struct kcs_bmc_device *kcs_bmc = arg;
 
 	return kcs_bmc_handle_event(kcs_bmc);
 }
 
-static int aspeed_kcs_config_irq(struct kcs_bmc *kcs_bmc,
+static int aspeed_kcs_config_irq(struct kcs_bmc_device *kcs_bmc,
 			struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -335,8 +335,8 @@ static int aspeed_kcs_of_v2_get_io_address(struct platform_device *pdev)
 static int aspeed_kcs_probe(struct platform_device *pdev)
 {
 	const struct aspeed_kcs_of_ops *ops;
+	struct kcs_bmc_device *kcs_bmc;
 	struct aspeed_kcs_bmc *priv;
-	struct kcs_bmc *kcs_bmc;
 	struct device_node *np;
 	int rc, channel, addr;
 
@@ -399,7 +399,7 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 static int aspeed_kcs_remove(struct platform_device *pdev)
 {
 	struct aspeed_kcs_bmc *priv = platform_get_drvdata(pdev);
-	struct kcs_bmc *kcs_bmc = &priv->kcs_bmc;
+	struct kcs_bmc_device *kcs_bmc = &priv->kcs_bmc;
 
 	kcs_bmc_remove_device(kcs_bmc);
 
diff --git a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
index 476ad6d541d5..865d8b93f3b7 100644
--- a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
+++ b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
@@ -8,13 +8,88 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/ipmi_bmc.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-#include "kcs_bmc.h"
+#include "kcs_bmc_client.h"
+
+/* Different phases of the KCS BMC module.
+ *  KCS_PHASE_IDLE:
+ *            BMC should not be expecting nor sending any data.
+ *  KCS_PHASE_WRITE_START:
+ *            BMC is receiving a WRITE_START command from system software.
+ *  KCS_PHASE_WRITE_DATA:
+ *            BMC is receiving a data byte from system software.
+ *  KCS_PHASE_WRITE_END_CMD:
+ *            BMC is waiting a last data byte from system software.
+ *  KCS_PHASE_WRITE_DONE:
+ *            BMC has received the whole request from system software.
+ *  KCS_PHASE_WAIT_READ:
+ *            BMC is waiting the response from the upper IPMI service.
+ *  KCS_PHASE_READ:
+ *            BMC is transferring the response to system software.
+ *  KCS_PHASE_ABORT_ERROR1:
+ *            BMC is waiting error status request from system software.
+ *  KCS_PHASE_ABORT_ERROR2:
+ *            BMC is waiting for idle status afer error from system software.
+ *  KCS_PHASE_ERROR:
+ *            BMC has detected a protocol violation at the interface level.
+ */
+enum kcs_ipmi_phases {
+	KCS_PHASE_IDLE,
+
+	KCS_PHASE_WRITE_START,
+	KCS_PHASE_WRITE_DATA,
+	KCS_PHASE_WRITE_END_CMD,
+	KCS_PHASE_WRITE_DONE,
+
+	KCS_PHASE_WAIT_READ,
+	KCS_PHASE_READ,
+
+	KCS_PHASE_ABORT_ERROR1,
+	KCS_PHASE_ABORT_ERROR2,
+	KCS_PHASE_ERROR
+};
+
+/* IPMI 2.0 - Table 9-4, KCS Interface Status Codes */
+enum kcs_ipmi_errors {
+	KCS_NO_ERROR                = 0x00,
+	KCS_ABORTED_BY_COMMAND      = 0x01,
+	KCS_ILLEGAL_CONTROL_CODE    = 0x02,
+	KCS_LENGTH_ERROR            = 0x06,
+	KCS_UNSPECIFIED_ERROR       = 0xFF
+};
+
+struct kcs_bmc_ipmi {
+	struct list_head entry;
+
+	struct kcs_bmc_client client;
+
+	spinlock_t lock;
+
+	enum kcs_ipmi_phases phase;
+	enum kcs_ipmi_errors error;
+
+	wait_queue_head_t queue;
+	bool data_in_avail;
+	int  data_in_idx;
+	u8  *data_in;
+
+	int  data_out_idx;
+	int  data_out_len;
+	u8  *data_out;
+
+	struct mutex mutex;
+	u8 *kbuffer;
+
+	struct miscdevice miscdev;
+};
 
 #define DEVICE_NAME "ipmi-kcs"
 
@@ -44,171 +119,169 @@ enum kcs_states {
 #define KCS_CMD_WRITE_END         0x62
 #define KCS_CMD_READ_BYTE         0x68
 
-static inline void set_state(struct kcs_bmc *kcs_bmc, u8 state)
+static inline void set_state(struct kcs_bmc_ipmi *priv, u8 state)
 {
-	kcs_bmc_update_status(kcs_bmc, KCS_STATUS_STATE_MASK,
-					KCS_STATUS_STATE(state));
+	kcs_bmc_update_status(priv->client.dev, KCS_STATUS_STATE_MASK, KCS_STATUS_STATE(state));
 }
 
-static void kcs_bmc_ipmi_force_abort(struct kcs_bmc *kcs_bmc)
+static void kcs_bmc_ipmi_force_abort(struct kcs_bmc_ipmi *priv)
 {
-	set_state(kcs_bmc, ERROR_STATE);
-	kcs_bmc_read_data(kcs_bmc);
-	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+	set_state(priv, ERROR_STATE);
+	kcs_bmc_read_data(priv->client.dev);
+	kcs_bmc_write_data(priv->client.dev, KCS_ZERO_DATA);
 
-	kcs_bmc->phase = KCS_PHASE_ERROR;
-	kcs_bmc->data_in_avail = false;
-	kcs_bmc->data_in_idx = 0;
+	priv->phase = KCS_PHASE_ERROR;
+	priv->data_in_avail = false;
+	priv->data_in_idx = 0;
 }
 
-static void kcs_bmc_ipmi_handle_data(struct kcs_bmc *kcs_bmc)
+static void kcs_bmc_ipmi_handle_data(struct kcs_bmc_ipmi *priv)
 {
+	struct kcs_bmc_device *dev;
 	u8 data;
 
-	switch (kcs_bmc->phase) {
+	dev = priv->client.dev;
+
+	switch (priv->phase) {
 	case KCS_PHASE_WRITE_START:
-		kcs_bmc->phase = KCS_PHASE_WRITE_DATA;
+		priv->phase = KCS_PHASE_WRITE_DATA;
 		fallthrough;
 
 	case KCS_PHASE_WRITE_DATA:
-		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
-			set_state(kcs_bmc, WRITE_STATE);
-			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
-						kcs_bmc_read_data(kcs_bmc);
+		if (priv->data_in_idx < KCS_MSG_BUFSIZ) {
+			set_state(priv, WRITE_STATE);
+			kcs_bmc_write_data(dev, KCS_ZERO_DATA);
+			priv->data_in[priv->data_in_idx++] = kcs_bmc_read_data(dev);
 		} else {
-			kcs_bmc_ipmi_force_abort(kcs_bmc);
-			kcs_bmc->error = KCS_LENGTH_ERROR;
+			kcs_bmc_ipmi_force_abort(priv);
+			priv->error = KCS_LENGTH_ERROR;
 		}
 		break;
 
 	case KCS_PHASE_WRITE_END_CMD:
-		if (kcs_bmc->data_in_idx < KCS_MSG_BUFSIZ) {
-			set_state(kcs_bmc, READ_STATE);
-			kcs_bmc->data_in[kcs_bmc->data_in_idx++] =
-						kcs_bmc_read_data(kcs_bmc);
-			kcs_bmc->phase = KCS_PHASE_WRITE_DONE;
-			kcs_bmc->data_in_avail = true;
-			wake_up_interruptible(&kcs_bmc->queue);
+		if (priv->data_in_idx < KCS_MSG_BUFSIZ) {
+			set_state(priv, READ_STATE);
+			priv->data_in[priv->data_in_idx++] = kcs_bmc_read_data(dev);
+			priv->phase = KCS_PHASE_WRITE_DONE;
+			priv->data_in_avail = true;
+			wake_up_interruptible(&priv->queue);
 		} else {
-			kcs_bmc_ipmi_force_abort(kcs_bmc);
-			kcs_bmc->error = KCS_LENGTH_ERROR;
+			kcs_bmc_ipmi_force_abort(priv);
+			priv->error = KCS_LENGTH_ERROR;
 		}
 		break;
 
 	case KCS_PHASE_READ:
-		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len)
-			set_state(kcs_bmc, IDLE_STATE);
+		if (priv->data_out_idx == priv->data_out_len)
+			set_state(priv, IDLE_STATE);
 
-		data = kcs_bmc_read_data(kcs_bmc);
+		data = kcs_bmc_read_data(dev);
 		if (data != KCS_CMD_READ_BYTE) {
-			set_state(kcs_bmc, ERROR_STATE);
-			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+			set_state(priv, ERROR_STATE);
+			kcs_bmc_write_data(dev, KCS_ZERO_DATA);
 			break;
 		}
 
-		if (kcs_bmc->data_out_idx == kcs_bmc->data_out_len) {
-			kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-			kcs_bmc->phase = KCS_PHASE_IDLE;
+		if (priv->data_out_idx == priv->data_out_len) {
+			kcs_bmc_write_data(dev, KCS_ZERO_DATA);
+			priv->phase = KCS_PHASE_IDLE;
 			break;
 		}
 
-		kcs_bmc_write_data(kcs_bmc,
-			kcs_bmc->data_out[kcs_bmc->data_out_idx++]);
+		kcs_bmc_write_data(dev, priv->data_out[priv->data_out_idx++]);
 		break;
 
 	case KCS_PHASE_ABORT_ERROR1:
-		set_state(kcs_bmc, READ_STATE);
-		kcs_bmc_read_data(kcs_bmc);
-		kcs_bmc_write_data(kcs_bmc, kcs_bmc->error);
-		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR2;
+		set_state(priv, READ_STATE);
+		kcs_bmc_read_data(dev);
+		kcs_bmc_write_data(dev, priv->error);
+		priv->phase = KCS_PHASE_ABORT_ERROR2;
 		break;
 
 	case KCS_PHASE_ABORT_ERROR2:
-		set_state(kcs_bmc, IDLE_STATE);
-		kcs_bmc_read_data(kcs_bmc);
-		kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
-		kcs_bmc->phase = KCS_PHASE_IDLE;
+		set_state(priv, IDLE_STATE);
+		kcs_bmc_read_data(dev);
+		kcs_bmc_write_data(dev, KCS_ZERO_DATA);
+		priv->phase = KCS_PHASE_IDLE;
 		break;
 
 	default:
-		kcs_bmc_ipmi_force_abort(kcs_bmc);
+		kcs_bmc_ipmi_force_abort(priv);
 		break;
 	}
 }
 
-static void kcs_bmc_ipmi_handle_cmd(struct kcs_bmc *kcs_bmc)
+static void kcs_bmc_ipmi_handle_cmd(struct kcs_bmc_ipmi *priv)
 {
 	u8 cmd;
 
-	set_state(kcs_bmc, WRITE_STATE);
-	kcs_bmc_write_data(kcs_bmc, KCS_ZERO_DATA);
+	set_state(priv, WRITE_STATE);
+	kcs_bmc_write_data(priv->client.dev, KCS_ZERO_DATA);
 
-	cmd = kcs_bmc_read_data(kcs_bmc);
+	cmd = kcs_bmc_read_data(priv->client.dev);
 	switch (cmd) {
 	case KCS_CMD_WRITE_START:
-		kcs_bmc->phase = KCS_PHASE_WRITE_START;
-		kcs_bmc->error = KCS_NO_ERROR;
-		kcs_bmc->data_in_avail = false;
-		kcs_bmc->data_in_idx = 0;
+		priv->phase = KCS_PHASE_WRITE_START;
+		priv->error = KCS_NO_ERROR;
+		priv->data_in_avail = false;
+		priv->data_in_idx = 0;
 		break;
 
 	case KCS_CMD_WRITE_END:
-		if (kcs_bmc->phase != KCS_PHASE_WRITE_DATA) {
-			kcs_bmc_ipmi_force_abort(kcs_bmc);
+		if (priv->phase != KCS_PHASE_WRITE_DATA) {
+			kcs_bmc_ipmi_force_abort(priv);
 			break;
 		}
 
-		kcs_bmc->phase = KCS_PHASE_WRITE_END_CMD;
+		priv->phase = KCS_PHASE_WRITE_END_CMD;
 		break;
 
 	case KCS_CMD_GET_STATUS_ABORT:
-		if (kcs_bmc->error == KCS_NO_ERROR)
-			kcs_bmc->error = KCS_ABORTED_BY_COMMAND;
+		if (priv->error == KCS_NO_ERROR)
+			priv->error = KCS_ABORTED_BY_COMMAND;
 
-		kcs_bmc->phase = KCS_PHASE_ABORT_ERROR1;
-		kcs_bmc->data_in_avail = false;
-		kcs_bmc->data_in_idx = 0;
+		priv->phase = KCS_PHASE_ABORT_ERROR1;
+		priv->data_in_avail = false;
+		priv->data_in_idx = 0;
 		break;
 
 	default:
-		kcs_bmc_ipmi_force_abort(kcs_bmc);
-		kcs_bmc->error = KCS_ILLEGAL_CONTROL_CODE;
+		kcs_bmc_ipmi_force_abort(priv);
+		priv->error = KCS_ILLEGAL_CONTROL_CODE;
 		break;
 	}
 }
 
-static inline struct kcs_bmc *client_to_kcs_bmc(struct kcs_bmc_client *client)
+static inline struct kcs_bmc_ipmi *client_to_kcs_bmc_ipmi(struct kcs_bmc_client *client)
 {
-	return container_of(client, struct kcs_bmc, client);
+	return container_of(client, struct kcs_bmc_ipmi, client);
 }
 
 static irqreturn_t kcs_bmc_ipmi_event(struct kcs_bmc_client *client)
 {
-	struct kcs_bmc *kcs_bmc;
-	unsigned long flags;
+	struct kcs_bmc_ipmi *priv;
 	u8 status;
 	int ret;
 
-	kcs_bmc = client_to_kcs_bmc(client);
+	priv = client_to_kcs_bmc_ipmi(client);
+	if (!priv)
+		return IRQ_NONE;
 
-	spin_lock_irqsave(&kcs_bmc->lock, flags);
+	spin_lock(&priv->lock);
 
-	status = kcs_bmc_read_status(kcs_bmc);
+	status = kcs_bmc_read_status(client->dev);
 	if (status & KCS_STATUS_IBF) {
-		if (!kcs_bmc->running)
-			kcs_bmc_ipmi_force_abort(kcs_bmc);
-		else if (status & KCS_STATUS_CMD_DAT)
-			kcs_bmc_ipmi_handle_cmd(kcs_bmc);
+		if (status & KCS_STATUS_CMD_DAT)
+			kcs_bmc_ipmi_handle_cmd(priv);
 		else
-			kcs_bmc_ipmi_handle_data(kcs_bmc);
+			kcs_bmc_ipmi_handle_data(priv);
 
 		ret = IRQ_HANDLED;
 	} else {
 		ret = IRQ_NONE;
 	}
 
-	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
+	spin_unlock(&priv->lock);
 
 	return ret;
 }
@@ -217,37 +290,29 @@ static const struct kcs_bmc_client_ops kcs_bmc_ipmi_client_ops = {
 	.event = kcs_bmc_ipmi_event,
 };
 
-static inline struct kcs_bmc *file_to_kcs_bmc(struct file *filp)
+static inline struct kcs_bmc_ipmi *to_kcs_bmc(struct file *filp)
 {
-	return container_of(filp->private_data, struct kcs_bmc, miscdev);
+	return container_of(filp->private_data, struct kcs_bmc_ipmi, miscdev);
 }
 
 static int kcs_bmc_ipmi_open(struct inode *inode, struct file *filp)
 {
-	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
-	int ret = 0;
+	struct kcs_bmc_ipmi *priv = to_kcs_bmc(filp);
 
-	spin_lock_irq(&kcs_bmc->lock);
-	if (!kcs_bmc->running)
-		kcs_bmc->running = 1;
-	else
-		ret = -EBUSY;
-	spin_unlock_irq(&kcs_bmc->lock);
-
-	return ret;
+	return kcs_bmc_enable_device(priv->client.dev, &priv->client);
 }
 
 static __poll_t kcs_bmc_ipmi_poll(struct file *filp, poll_table *wait)
 {
-	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
+	struct kcs_bmc_ipmi *priv = to_kcs_bmc(filp);
 	__poll_t mask = 0;
 
-	poll_wait(filp, &kcs_bmc->queue, wait);
+	poll_wait(filp, &priv->queue, wait);
 
-	spin_lock_irq(&kcs_bmc->lock);
-	if (kcs_bmc->data_in_avail)
+	spin_lock_irq(&priv->lock);
+	if (priv->data_in_avail)
 		mask |= EPOLLIN;
-	spin_unlock_irq(&kcs_bmc->lock);
+	spin_unlock_irq(&priv->lock);
 
 	return mask;
 }
@@ -255,24 +320,24 @@ static __poll_t kcs_bmc_ipmi_poll(struct file *filp, poll_table *wait)
 static ssize_t kcs_bmc_ipmi_read(struct file *filp, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
+	struct kcs_bmc_ipmi *priv = to_kcs_bmc(filp);
 	bool data_avail;
 	size_t data_len;
 	ssize_t ret;
 
 	if (!(filp->f_flags & O_NONBLOCK))
-		wait_event_interruptible(kcs_bmc->queue,
-					 kcs_bmc->data_in_avail);
+		wait_event_interruptible(priv->queue,
+					 priv->data_in_avail);
 
-	mutex_lock(&kcs_bmc->mutex);
+	mutex_lock(&priv->mutex);
 
-	spin_lock_irq(&kcs_bmc->lock);
-	data_avail = kcs_bmc->data_in_avail;
+	spin_lock_irq(&priv->lock);
+	data_avail = priv->data_in_avail;
 	if (data_avail) {
-		data_len = kcs_bmc->data_in_idx;
-		memcpy(kcs_bmc->kbuffer, kcs_bmc->data_in, data_len);
+		data_len = priv->data_in_idx;
+		memcpy(priv->kbuffer, priv->data_in, data_len);
 	}
-	spin_unlock_irq(&kcs_bmc->lock);
+	spin_unlock_irq(&priv->lock);
 
 	if (!data_avail) {
 		ret = -EAGAIN;
@@ -281,35 +346,35 @@ static ssize_t kcs_bmc_ipmi_read(struct file *filp, char __user *buf,
 
 	if (count < data_len) {
 		pr_err("channel=%u with too large data : %zu\n",
-			kcs_bmc->channel, data_len);
+			priv->client.dev->channel, data_len);
 
-		spin_lock_irq(&kcs_bmc->lock);
-		kcs_bmc_ipmi_force_abort(kcs_bmc);
-		spin_unlock_irq(&kcs_bmc->lock);
+		spin_lock_irq(&priv->lock);
+		kcs_bmc_ipmi_force_abort(priv);
+		spin_unlock_irq(&priv->lock);
 
 		ret = -EOVERFLOW;
 		goto out_unlock;
 	}
 
-	if (copy_to_user(buf, kcs_bmc->kbuffer, data_len)) {
+	if (copy_to_user(buf, priv->kbuffer, data_len)) {
 		ret = -EFAULT;
 		goto out_unlock;
 	}
 
 	ret = data_len;
 
-	spin_lock_irq(&kcs_bmc->lock);
-	if (kcs_bmc->phase == KCS_PHASE_WRITE_DONE) {
-		kcs_bmc->phase = KCS_PHASE_WAIT_READ;
-		kcs_bmc->data_in_avail = false;
-		kcs_bmc->data_in_idx = 0;
+	spin_lock_irq(&priv->lock);
+	if (priv->phase == KCS_PHASE_WRITE_DONE) {
+		priv->phase = KCS_PHASE_WAIT_READ;
+		priv->data_in_avail = false;
+		priv->data_in_idx = 0;
 	} else {
 		ret = -EAGAIN;
 	}
-	spin_unlock_irq(&kcs_bmc->lock);
+	spin_unlock_irq(&priv->lock);
 
 out_unlock:
-	mutex_unlock(&kcs_bmc->mutex);
+	mutex_unlock(&priv->mutex);
 
 	return ret;
 }
@@ -317,35 +382,35 @@ out_unlock:
 static ssize_t kcs_bmc_ipmi_write(struct file *filp, const char __user *buf,
 			     size_t count, loff_t *ppos)
 {
-	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
+	struct kcs_bmc_ipmi *priv = to_kcs_bmc(filp);
 	ssize_t ret;
 
 	/* a minimum response size '3' : netfn + cmd + ccode */
 	if (count < 3 || count > KCS_MSG_BUFSIZ)
 		return -EINVAL;
 
-	mutex_lock(&kcs_bmc->mutex);
+	mutex_lock(&priv->mutex);
 
-	if (copy_from_user(kcs_bmc->kbuffer, buf, count)) {
+	if (copy_from_user(priv->kbuffer, buf, count)) {
 		ret = -EFAULT;
 		goto out_unlock;
 	}
 
-	spin_lock_irq(&kcs_bmc->lock);
-	if (kcs_bmc->phase == KCS_PHASE_WAIT_READ) {
-		kcs_bmc->phase = KCS_PHASE_READ;
-		kcs_bmc->data_out_idx = 1;
-		kcs_bmc->data_out_len = count;
-		memcpy(kcs_bmc->data_out, kcs_bmc->kbuffer, count);
-		kcs_bmc_write_data(kcs_bmc, kcs_bmc->data_out[0]);
+	spin_lock_irq(&priv->lock);
+	if (priv->phase == KCS_PHASE_WAIT_READ) {
+		priv->phase = KCS_PHASE_READ;
+		priv->data_out_idx = 1;
+		priv->data_out_len = count;
+		memcpy(priv->data_out, priv->kbuffer, count);
+		kcs_bmc_write_data(priv->client.dev, priv->data_out[0]);
 		ret = count;
 	} else {
 		ret = -EINVAL;
 	}
-	spin_unlock_irq(&kcs_bmc->lock);
+	spin_unlock_irq(&priv->lock);
 
 out_unlock:
-	mutex_unlock(&kcs_bmc->mutex);
+	mutex_unlock(&priv->mutex);
 
 	return ret;
 }
@@ -353,22 +418,22 @@ out_unlock:
 static long kcs_bmc_ipmi_ioctl(struct file *filp, unsigned int cmd,
 			  unsigned long arg)
 {
-	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
+	struct kcs_bmc_ipmi *priv = to_kcs_bmc(filp);
 	long ret = 0;
 
-	spin_lock_irq(&kcs_bmc->lock);
+	spin_lock_irq(&priv->lock);
 
 	switch (cmd) {
 	case IPMI_BMC_IOCTL_SET_SMS_ATN:
-		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, KCS_STATUS_SMS_ATN);
+		kcs_bmc_update_status(priv->client.dev, KCS_STATUS_SMS_ATN, KCS_STATUS_SMS_ATN);
 		break;
 
 	case IPMI_BMC_IOCTL_CLEAR_SMS_ATN:
-		kcs_bmc_update_status(kcs_bmc, KCS_STATUS_SMS_ATN, 0);
+		kcs_bmc_update_status(priv->client.dev, KCS_STATUS_SMS_ATN, 0);
 		break;
 
 	case IPMI_BMC_IOCTL_FORCE_ABORT:
-		kcs_bmc_ipmi_force_abort(kcs_bmc);
+		kcs_bmc_ipmi_force_abort(priv);
 		break;
 
 	default:
@@ -376,19 +441,17 @@ static long kcs_bmc_ipmi_ioctl(struct file *filp, unsigned int cmd,
 		break;
 	}
 
-	spin_unlock_irq(&kcs_bmc->lock);
+	spin_unlock_irq(&priv->lock);
 
 	return ret;
 }
 
 static int kcs_bmc_ipmi_release(struct inode *inode, struct file *filp)
 {
-	struct kcs_bmc *kcs_bmc = file_to_kcs_bmc(filp);
+	struct kcs_bmc_ipmi *priv = to_kcs_bmc(filp);
 
-	spin_lock_irq(&kcs_bmc->lock);
-	kcs_bmc->running = 0;
-	kcs_bmc_ipmi_force_abort(kcs_bmc);
-	spin_unlock_irq(&kcs_bmc->lock);
+	kcs_bmc_ipmi_force_abort(priv);
+	kcs_bmc_disable_device(priv->client.dev, &priv->client);
 
 	return 0;
 }
@@ -403,56 +466,78 @@ static const struct file_operations kcs_bmc_ipmi_fops = {
 	.unlocked_ioctl = kcs_bmc_ipmi_ioctl,
 };
 
-int kcs_bmc_ipmi_add_device(struct kcs_bmc *kcs_bmc);
-int kcs_bmc_ipmi_add_device(struct kcs_bmc *kcs_bmc)
+static DEFINE_SPINLOCK(kcs_bmc_ipmi_instances_lock);
+static LIST_HEAD(kcs_bmc_ipmi_instances);
+
+int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc);
+int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc)
 {
+	struct kcs_bmc_ipmi *priv;
 	int rc;
 
-	spin_lock_init(&kcs_bmc->lock);
-	mutex_init(&kcs_bmc->mutex);
-	init_waitqueue_head(&kcs_bmc->queue);
-
-	kcs_bmc->client.dev = kcs_bmc;
-	kcs_bmc->client.ops = &kcs_bmc_ipmi_client_ops;
-	kcs_bmc->data_in = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-	kcs_bmc->data_out = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-	kcs_bmc->kbuffer = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
-
-	kcs_bmc->miscdev.minor = MISC_DYNAMIC_MINOR;
-	kcs_bmc->miscdev.name = devm_kasprintf(kcs_bmc->dev, GFP_KERNEL, "%s%u",
-					       DEVICE_NAME, kcs_bmc->channel);
-	if (!kcs_bmc->data_in || !kcs_bmc->data_out || !kcs_bmc->kbuffer ||
-	    !kcs_bmc->miscdev.name)
+	priv = devm_kzalloc(kcs_bmc->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
-	kcs_bmc->miscdev.fops = &kcs_bmc_ipmi_fops;
+	spin_lock_init(&priv->lock);
+	mutex_init(&priv->mutex);
 
-	rc = misc_register(&kcs_bmc->miscdev);
+	init_waitqueue_head(&priv->queue);
+
+	priv->client.dev = kcs_bmc;
+	priv->client.ops = &kcs_bmc_ipmi_client_ops;
+	priv->data_in = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	priv->data_out = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+	priv->kbuffer = devm_kmalloc(kcs_bmc->dev, KCS_MSG_BUFSIZ, GFP_KERNEL);
+
+	priv->miscdev.minor = MISC_DYNAMIC_MINOR;
+	priv->miscdev.name = devm_kasprintf(kcs_bmc->dev, GFP_KERNEL, "%s%u", DEVICE_NAME,
+					   kcs_bmc->channel);
+	if (!priv->data_in || !priv->data_out || !priv->kbuffer || !priv->miscdev.name)
+		return -EINVAL;
+
+	priv->miscdev.fops = &kcs_bmc_ipmi_fops;
+
+	rc = misc_register(&priv->miscdev);
 	if (rc) {
 		dev_err(kcs_bmc->dev, "Unable to register device: %d\n", rc);
 		return rc;
 	}
 
+	spin_lock_irq(&kcs_bmc_ipmi_instances_lock);
+	list_add(&priv->entry, &kcs_bmc_ipmi_instances);
+	spin_unlock_irq(&kcs_bmc_ipmi_instances_lock);
+
 	dev_info(kcs_bmc->dev, "Initialised IPMI client for channel %d", kcs_bmc->channel);
 
 	return 0;
 }
 EXPORT_SYMBOL(kcs_bmc_ipmi_add_device);
 
-int kcs_bmc_ipmi_remove_device(struct kcs_bmc *kcs_bmc);
-int kcs_bmc_ipmi_remove_device(struct kcs_bmc *kcs_bmc)
+int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc);
+int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc)
 {
-	misc_deregister(&kcs_bmc->miscdev);
+	struct kcs_bmc_ipmi *priv = NULL, *pos;
 
-	spin_lock_irq(&kcs_bmc->lock);
-	kcs_bmc->running = 0;
-	kcs_bmc_ipmi_force_abort(kcs_bmc);
-	spin_unlock_irq(&kcs_bmc->lock);
+	spin_lock_irq(&kcs_bmc_ipmi_instances_lock);
+	list_for_each_entry(pos, &kcs_bmc_ipmi_instances, entry) {
+		if (pos->client.dev == kcs_bmc) {
+			priv = pos;
+			list_del(&pos->entry);
+			break;
+		}
+	}
+	spin_unlock_irq(&kcs_bmc_ipmi_instances_lock);
 
-	devm_kfree(kcs_bmc->dev, kcs_bmc->kbuffer);
-	devm_kfree(kcs_bmc->dev, kcs_bmc->data_out);
-	devm_kfree(kcs_bmc->dev, kcs_bmc->data_in);
-	devm_kfree(kcs_bmc->dev, kcs_bmc);
+	if (!priv)
+		return -ENODEV;
+
+	misc_deregister(&priv->miscdev);
+	kcs_bmc_disable_device(priv->client.dev, &priv->client);
+	devm_kfree(kcs_bmc->dev, priv->kbuffer);
+	devm_kfree(kcs_bmc->dev, priv->data_out);
+	devm_kfree(kcs_bmc->dev, priv->data_in);
+	devm_kfree(kcs_bmc->dev, priv);
 
 	return 0;
 }
@@ -460,4 +545,5 @@ EXPORT_SYMBOL(kcs_bmc_ipmi_remove_device);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
+MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
 MODULE_DESCRIPTION("KCS BMC to handle the IPMI request from system software");
diff --git a/drivers/char/ipmi/kcs_bmc_client.h b/drivers/char/ipmi/kcs_bmc_client.h
index dad8774aebce..a32fdc4a7707 100644
--- a/drivers/char/ipmi/kcs_bmc_client.h
+++ b/drivers/char/ipmi/kcs_bmc_client.h
@@ -6,22 +6,24 @@
 
 #include <linux/irqreturn.h>
 
-struct kcs_bmc;
-struct kcs_bmc_client_ops;
-
-struct kcs_bmc_client {
-	const struct kcs_bmc_client_ops *ops;
-
-	struct kcs_bmc *dev;
-};
+#include "kcs_bmc.h"
 
 struct kcs_bmc_client_ops {
 	irqreturn_t (*event)(struct kcs_bmc_client *client);
 };
 
-u8 kcs_bmc_read_data(struct kcs_bmc *kcs_bmc);
-void kcs_bmc_write_data(struct kcs_bmc *kcs_bmc, u8 data);
-u8 kcs_bmc_read_status(struct kcs_bmc *kcs_bmc);
-void kcs_bmc_write_status(struct kcs_bmc *kcs_bmc, u8 data);
-void kcs_bmc_update_status(struct kcs_bmc *kcs_bmc, u8 mask, u8 val);
+struct kcs_bmc_client {
+	const struct kcs_bmc_client_ops *ops;
+
+	struct kcs_bmc_device *dev;
+};
+
+int kcs_bmc_enable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client);
+void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client);
+
+u8 kcs_bmc_read_data(struct kcs_bmc_device *kcs_bmc);
+void kcs_bmc_write_data(struct kcs_bmc_device *kcs_bmc, u8 data);
+u8 kcs_bmc_read_status(struct kcs_bmc_device *kcs_bmc);
+void kcs_bmc_write_status(struct kcs_bmc_device *kcs_bmc, u8 data);
+void kcs_bmc_update_status(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 val);
 #endif
diff --git a/drivers/char/ipmi/kcs_bmc_device.h b/drivers/char/ipmi/kcs_bmc_device.h
index dd8bf1307ad2..c9a4237bbf40 100644
--- a/drivers/char/ipmi/kcs_bmc_device.h
+++ b/drivers/char/ipmi/kcs_bmc_device.h
@@ -4,16 +4,18 @@
 #ifndef __KCS_BMC_DEVICE_H__
 #define __KCS_BMC_DEVICE_H__
 
+#include <linux/irqreturn.h>
+
 #include "kcs_bmc.h"
 
 struct kcs_bmc_device_ops {
-	u8 (*io_inputb)(struct kcs_bmc *kcs_bmc, u32 reg);
-	void (*io_outputb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 b);
-	void (*io_updateb)(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 b);
+	u8 (*io_inputb)(struct kcs_bmc_device *kcs_bmc, u32 reg);
+	void (*io_outputb)(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 b);
+	void (*io_updateb)(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 mask, u8 b);
 };
 
-irqreturn_t kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc);
-int kcs_bmc_add_device(struct kcs_bmc *kcs_bmc);
-void kcs_bmc_remove_device(struct kcs_bmc *kcs_bmc);
+irqreturn_t kcs_bmc_handle_event(struct kcs_bmc_device *kcs_bmc);
+int kcs_bmc_add_device(struct kcs_bmc_device *kcs_bmc);
+void kcs_bmc_remove_device(struct kcs_bmc_device *kcs_bmc);
 
 #endif
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
index ebb691af28c5..3b96486c25c3 100644
--- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -65,7 +65,7 @@ struct npcm7xx_kcs_reg {
 };
 
 struct npcm7xx_kcs_bmc {
-	struct kcs_bmc kcs_bmc;
+	struct kcs_bmc_device kcs_bmc;
 
 	struct regmap *map;
 
@@ -78,12 +78,12 @@ static const struct npcm7xx_kcs_reg npcm7xx_kcs_reg_tbl[KCS_CHANNEL_MAX] = {
 	{ .sts = KCS3ST, .dob = KCS3DO, .dib = KCS3DI, .ctl = KCS3CTL, .ie = KCS3IE },
 };
 
-static inline struct npcm7xx_kcs_bmc *to_npcm7xx_kcs_bmc(struct kcs_bmc *kcs_bmc)
+static inline struct npcm7xx_kcs_bmc *to_npcm7xx_kcs_bmc(struct kcs_bmc_device *kcs_bmc)
 {
 	return container_of(kcs_bmc, struct npcm7xx_kcs_bmc, kcs_bmc);
 }
 
-static u8 npcm7xx_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
+static u8 npcm7xx_kcs_inb(struct kcs_bmc_device *kcs_bmc, u32 reg)
 {
 	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 	u32 val = 0;
@@ -95,7 +95,7 @@ static u8 npcm7xx_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
 	return rc == 0 ? (u8)val : 0;
 }
 
-static void npcm7xx_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
+static void npcm7xx_kcs_outb(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 data)
 {
 	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 	int rc;
@@ -104,7 +104,7 @@ static void npcm7xx_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
 	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
 }
 
-static void npcm7xx_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 data)
+static void npcm7xx_kcs_updateb(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 mask, u8 data)
 {
 	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 	int rc;
@@ -113,7 +113,7 @@ static void npcm7xx_kcs_updateb(struct kcs_bmc *kcs_bmc, u32 reg, u8 mask, u8 da
 	WARN(rc != 0, "regmap_update_bits() failed: %d\n", rc);
 }
 
-static void npcm7xx_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
+static void npcm7xx_kcs_enable_channel(struct kcs_bmc_device *kcs_bmc, bool enable)
 {
 	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 
@@ -126,12 +126,12 @@ static void npcm7xx_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
 
 static irqreturn_t npcm7xx_kcs_irq(int irq, void *arg)
 {
-	struct kcs_bmc *kcs_bmc = arg;
+	struct kcs_bmc_device *kcs_bmc = arg;
 
 	return kcs_bmc_handle_event(kcs_bmc);
 }
 
-static int npcm7xx_kcs_config_irq(struct kcs_bmc *kcs_bmc,
+static int npcm7xx_kcs_config_irq(struct kcs_bmc_device *kcs_bmc,
 				  struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -155,7 +155,7 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct npcm7xx_kcs_bmc *priv;
-	struct kcs_bmc *kcs_bmc;
+	struct kcs_bmc_device *kcs_bmc;
 	u32 chan;
 	int rc;
 
@@ -207,7 +207,7 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 static int npcm7xx_kcs_remove(struct platform_device *pdev)
 {
 	struct npcm7xx_kcs_bmc *priv = platform_get_drvdata(pdev);
-	struct kcs_bmc *kcs_bmc = &priv->kcs_bmc;
+	struct kcs_bmc_device *kcs_bmc = &priv->kcs_bmc;
 
 	kcs_bmc_remove_device(kcs_bmc);
 

From 7cafff991e32d4b97251982ab0665601b65f2736 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:49 +0930
Subject: [PATCH 2466/3292] ipmi: kcs_bmc: Decouple the IPMI chardev from the
 core

Now that we have untangled the data-structures, split the userspace
interface out into its own module. Userspace interfaces and drivers are
registered to the KCS BMC core to support arbitrary binding of either.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-Id: <20210608104757.582199-9-andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/Kconfig             | 13 +++++
 drivers/char/ipmi/Makefile            |  3 +-
 drivers/char/ipmi/kcs_bmc.c           | 80 +++++++++++++++++++++++++--
 drivers/char/ipmi/kcs_bmc_cdev_ipmi.c | 31 +++++++++--
 drivers/char/ipmi/kcs_bmc_client.h    | 14 +++++
 5 files changed, 128 insertions(+), 13 deletions(-)

diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 07847d9a459a..bc5f81899b62 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -124,6 +124,19 @@ config NPCM7XX_KCS_IPMI_BMC
 	  This support is also available as a module.  If so, the module
 	  will be called kcs_bmc_npcm7xx.
 
+config IPMI_KCS_BMC_CDEV_IPMI
+	depends on IPMI_KCS_BMC
+	tristate "IPMI character device interface for BMC KCS devices"
+	help
+	  Provides a BMC-side character device implementing IPMI
+	  semantics for KCS IPMI devices.
+
+	  Say YES if you wish to expose KCS devices on the BMC for IPMI
+	  purposes.
+
+	  This support is also available as a module. The module will be
+	  called kcs_bmc_cdev_ipmi.
+
 config ASPEED_BT_IPMI_BMC
 	depends on ARCH_ASPEED || COMPILE_TEST
 	depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index a302bc865370..fcfa676afddb 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -22,7 +22,8 @@ obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
-obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o kcs_bmc_cdev_ipmi.o
+obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o
+obj-$(CONFIG_IPMI_KCS_BMC_CDEV_IPMI) += kcs_bmc_cdev_ipmi.o
 obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
 obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o
 obj-$(CONFIG_NPCM7XX_KCS_IPMI_BMC) += kcs_bmc_npcm7xx.o
diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index 2ec934f0dba3..9860c7b75157 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -5,7 +5,9 @@
  */
 
 #include <linux/device.h>
+#include <linux/list.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 
 #include "kcs_bmc.h"
 
@@ -13,6 +15,11 @@
 #include "kcs_bmc_device.h"
 #include "kcs_bmc_client.h"
 
+/* Record registered devices and drivers */
+static DEFINE_MUTEX(kcs_bmc_lock);
+static LIST_HEAD(kcs_bmc_devices);
+static LIST_HEAD(kcs_bmc_drivers);
+
 /* Consumer data access */
 
 u8 kcs_bmc_read_data(struct kcs_bmc_device *kcs_bmc)
@@ -98,22 +105,83 @@ void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_clien
 }
 EXPORT_SYMBOL(kcs_bmc_disable_device);
 
-int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc);
 int kcs_bmc_add_device(struct kcs_bmc_device *kcs_bmc)
 {
-	return kcs_bmc_ipmi_add_device(kcs_bmc);
+	struct kcs_bmc_driver *drv;
+	int error = 0;
+	int rc;
+
+	spin_lock_init(&kcs_bmc->lock);
+	kcs_bmc->client = NULL;
+
+	mutex_lock(&kcs_bmc_lock);
+	list_add(&kcs_bmc->entry, &kcs_bmc_devices);
+	list_for_each_entry(drv, &kcs_bmc_drivers, entry) {
+		rc = drv->ops->add_device(kcs_bmc);
+		if (!rc)
+			continue;
+
+		dev_err(kcs_bmc->dev, "Failed to add chardev for KCS channel %d: %d",
+			kcs_bmc->channel, rc);
+		error = rc;
+	}
+	mutex_unlock(&kcs_bmc_lock);
+
+	return error;
 }
 EXPORT_SYMBOL(kcs_bmc_add_device);
 
-int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc);
 void kcs_bmc_remove_device(struct kcs_bmc_device *kcs_bmc)
 {
-	if (kcs_bmc_ipmi_remove_device(kcs_bmc))
-		pr_warn("Failed to remove device for KCS channel %d\n",
-			kcs_bmc->channel);
+	struct kcs_bmc_driver *drv;
+	int rc;
+
+	mutex_lock(&kcs_bmc_lock);
+	list_del(&kcs_bmc->entry);
+	list_for_each_entry(drv, &kcs_bmc_drivers, entry) {
+		rc = drv->ops->remove_device(kcs_bmc);
+		if (rc)
+			dev_err(kcs_bmc->dev, "Failed to remove chardev for KCS channel %d: %d",
+				kcs_bmc->channel, rc);
+	}
+	mutex_unlock(&kcs_bmc_lock);
 }
 EXPORT_SYMBOL(kcs_bmc_remove_device);
 
+void kcs_bmc_register_driver(struct kcs_bmc_driver *drv)
+{
+	struct kcs_bmc_device *kcs_bmc;
+	int rc;
+
+	mutex_lock(&kcs_bmc_lock);
+	list_add(&drv->entry, &kcs_bmc_drivers);
+	list_for_each_entry(kcs_bmc, &kcs_bmc_devices, entry) {
+		rc = drv->ops->add_device(kcs_bmc);
+		if (rc)
+			dev_err(kcs_bmc->dev, "Failed to add driver for KCS channel %d: %d",
+				kcs_bmc->channel, rc);
+	}
+	mutex_unlock(&kcs_bmc_lock);
+}
+EXPORT_SYMBOL(kcs_bmc_register_driver);
+
+void kcs_bmc_unregister_driver(struct kcs_bmc_driver *drv)
+{
+	struct kcs_bmc_device *kcs_bmc;
+	int rc;
+
+	mutex_lock(&kcs_bmc_lock);
+	list_del(&drv->entry);
+	list_for_each_entry(kcs_bmc, &kcs_bmc_devices, entry) {
+		rc = drv->ops->remove_device(kcs_bmc);
+		if (rc)
+			dev_err(kcs_bmc->dev, "Failed to remove driver for KCS channel %d: %d",
+				kcs_bmc->channel, rc);
+	}
+	mutex_unlock(&kcs_bmc_lock);
+}
+EXPORT_SYMBOL(kcs_bmc_unregister_driver);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
 MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
diff --git a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
index 865d8b93f3b7..486834a962c3 100644
--- a/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
+++ b/drivers/char/ipmi/kcs_bmc_cdev_ipmi.c
@@ -469,8 +469,7 @@ static const struct file_operations kcs_bmc_ipmi_fops = {
 static DEFINE_SPINLOCK(kcs_bmc_ipmi_instances_lock);
 static LIST_HEAD(kcs_bmc_ipmi_instances);
 
-int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc);
-int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc)
+static int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc)
 {
 	struct kcs_bmc_ipmi *priv;
 	int rc;
@@ -512,10 +511,8 @@ int kcs_bmc_ipmi_add_device(struct kcs_bmc_device *kcs_bmc)
 
 	return 0;
 }
-EXPORT_SYMBOL(kcs_bmc_ipmi_add_device);
 
-int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc);
-int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc)
+static int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc)
 {
 	struct kcs_bmc_ipmi *priv = NULL, *pos;
 
@@ -541,7 +538,29 @@ int kcs_bmc_ipmi_remove_device(struct kcs_bmc_device *kcs_bmc)
 
 	return 0;
 }
-EXPORT_SYMBOL(kcs_bmc_ipmi_remove_device);
+
+static const struct kcs_bmc_driver_ops kcs_bmc_ipmi_driver_ops = {
+	.add_device = kcs_bmc_ipmi_add_device,
+	.remove_device = kcs_bmc_ipmi_remove_device,
+};
+
+static struct kcs_bmc_driver kcs_bmc_ipmi_driver = {
+	.ops = &kcs_bmc_ipmi_driver_ops,
+};
+
+static int kcs_bmc_ipmi_init(void)
+{
+	kcs_bmc_register_driver(&kcs_bmc_ipmi_driver);
+
+	return 0;
+}
+module_init(kcs_bmc_ipmi_init);
+
+static void kcs_bmc_ipmi_exit(void)
+{
+	kcs_bmc_unregister_driver(&kcs_bmc_ipmi_driver);
+}
+module_exit(kcs_bmc_ipmi_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
diff --git a/drivers/char/ipmi/kcs_bmc_client.h b/drivers/char/ipmi/kcs_bmc_client.h
index a32fdc4a7707..814ad8e052ef 100644
--- a/drivers/char/ipmi/kcs_bmc_client.h
+++ b/drivers/char/ipmi/kcs_bmc_client.h
@@ -8,6 +8,17 @@
 
 #include "kcs_bmc.h"
 
+struct kcs_bmc_driver_ops {
+	int (*add_device)(struct kcs_bmc_device *kcs_bmc);
+	int (*remove_device)(struct kcs_bmc_device *kcs_bmc);
+};
+
+struct kcs_bmc_driver {
+	struct list_head entry;
+
+	const struct kcs_bmc_driver_ops *ops;
+};
+
 struct kcs_bmc_client_ops {
 	irqreturn_t (*event)(struct kcs_bmc_client *client);
 };
@@ -18,6 +29,9 @@ struct kcs_bmc_client {
 	struct kcs_bmc_device *dev;
 };
 
+void kcs_bmc_register_driver(struct kcs_bmc_driver *drv);
+void kcs_bmc_unregister_driver(struct kcs_bmc_driver *drv);
+
 int kcs_bmc_enable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client);
 void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client);
 

From 28651e6c4237f4aee5e0744ce37d3a50e7b1f36b Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:50 +0930
Subject: [PATCH 2467/3292] ipmi: kcs_bmc: Allow clients to control KCS IRQ
 state

Add a mechanism for controlling whether the client associated with a
KCS device will receive Input Buffer Full (IBF) and Output Buffer Empty
(OBE) events. This enables an abstract implementation of poll() for KCS
devices.

A wart in the implementation is that the ASPEED KCS devices don't
support an OBE interrupt for the BMC. Instead we pretend it has one by
polling the status register waiting for the Output Buffer Full (OBF) bit
to clear, and generating an event when OBE is observed.

Cc: CS20 KWLiu <KWLIU@nuvoton.com>
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-10-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c         |   6 ++
 drivers/char/ipmi/kcs_bmc.h         |   3 +
 drivers/char/ipmi/kcs_bmc_aspeed.c  | 152 ++++++++++++++++++----------
 drivers/char/ipmi/kcs_bmc_client.h  |   2 +
 drivers/char/ipmi/kcs_bmc_device.h  |   1 +
 drivers/char/ipmi/kcs_bmc_npcm7xx.c |  26 ++++-
 6 files changed, 132 insertions(+), 58 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index 9860c7b75157..91eb43746cef 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -182,6 +182,12 @@ void kcs_bmc_unregister_driver(struct kcs_bmc_driver *drv)
 }
 EXPORT_SYMBOL(kcs_bmc_unregister_driver);
 
+void kcs_bmc_update_event_mask(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 events)
+{
+	kcs_bmc->ops->irq_mask_update(kcs_bmc, mask, events);
+}
+EXPORT_SYMBOL(kcs_bmc_update_event_mask);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
 MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
diff --git a/drivers/char/ipmi/kcs_bmc.h b/drivers/char/ipmi/kcs_bmc.h
index 3f0cce315b4f..fa408b802c79 100644
--- a/drivers/char/ipmi/kcs_bmc.h
+++ b/drivers/char/ipmi/kcs_bmc.h
@@ -8,6 +8,9 @@
 
 #include <linux/list.h>
 
+#define KCS_BMC_EVENT_TYPE_OBE	BIT(0)
+#define KCS_BMC_EVENT_TYPE_IBF	BIT(1)
+
 #define KCS_BMC_STR_OBF		BIT(0)
 #define KCS_BMC_STR_IBF		BIT(1)
 #define KCS_BMC_STR_CMD_DAT	BIT(3)
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 08d4290464d8..5bf8d013e4ae 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -60,10 +60,18 @@
 #define LPC_ODR4             0x118
 #define LPC_STR4             0x11C
 
+#define OBE_POLL_PERIOD	     (HZ / 2)
+
 struct aspeed_kcs_bmc {
 	struct kcs_bmc_device kcs_bmc;
 
 	struct regmap *map;
+
+	struct {
+		spinlock_t lock;
+		bool remove;
+		struct timer_list timer;
+	} obe;
 };
 
 struct aspeed_kcs_of_ops {
@@ -159,68 +167,89 @@ static void aspeed_kcs_enable_channel(struct kcs_bmc_device *kcs_bmc, bool enabl
 
 	switch (kcs_bmc->channel) {
 	case 1:
-		if (enable) {
-			regmap_update_bits(priv->map, LPC_HICR2,
-					LPC_HICR2_IBFIF1, LPC_HICR2_IBFIF1);
-			regmap_update_bits(priv->map, LPC_HICR0,
-					LPC_HICR0_LPC1E, LPC_HICR0_LPC1E);
-		} else {
-			regmap_update_bits(priv->map, LPC_HICR0,
-					LPC_HICR0_LPC1E, 0);
-			regmap_update_bits(priv->map, LPC_HICR2,
-					LPC_HICR2_IBFIF1, 0);
-		}
-		break;
-
+		regmap_update_bits(priv->map, LPC_HICR0, LPC_HICR0_LPC1E, enable * LPC_HICR0_LPC1E);
+		return;
 	case 2:
-		if (enable) {
-			regmap_update_bits(priv->map, LPC_HICR2,
-					LPC_HICR2_IBFIF2, LPC_HICR2_IBFIF2);
-			regmap_update_bits(priv->map, LPC_HICR0,
-					LPC_HICR0_LPC2E, LPC_HICR0_LPC2E);
-		} else {
-			regmap_update_bits(priv->map, LPC_HICR0,
-					LPC_HICR0_LPC2E, 0);
-			regmap_update_bits(priv->map, LPC_HICR2,
-					LPC_HICR2_IBFIF2, 0);
-		}
-		break;
-
+		regmap_update_bits(priv->map, LPC_HICR0, LPC_HICR0_LPC2E, enable * LPC_HICR0_LPC2E);
+		return;
 	case 3:
-		if (enable) {
-			regmap_update_bits(priv->map, LPC_HICR2,
-					LPC_HICR2_IBFIF3, LPC_HICR2_IBFIF3);
-			regmap_update_bits(priv->map, LPC_HICR0,
-					LPC_HICR0_LPC3E, LPC_HICR0_LPC3E);
-			regmap_update_bits(priv->map, LPC_HICR4,
-					LPC_HICR4_KCSENBL, LPC_HICR4_KCSENBL);
-		} else {
-			regmap_update_bits(priv->map, LPC_HICR0,
-					LPC_HICR0_LPC3E, 0);
-			regmap_update_bits(priv->map, LPC_HICR4,
-					LPC_HICR4_KCSENBL, 0);
-			regmap_update_bits(priv->map, LPC_HICR2,
-					LPC_HICR2_IBFIF3, 0);
-		}
-		break;
-
+		regmap_update_bits(priv->map, LPC_HICR0, LPC_HICR0_LPC3E, enable * LPC_HICR0_LPC3E);
+		regmap_update_bits(priv->map, LPC_HICR4,
+				   LPC_HICR4_KCSENBL, enable * LPC_HICR4_KCSENBL);
+		return;
 	case 4:
-		if (enable)
-			regmap_update_bits(priv->map, LPC_HICRB,
-					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E,
-					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E);
-		else
-			regmap_update_bits(priv->map, LPC_HICRB,
-					LPC_HICRB_IBFIF4 | LPC_HICRB_LPC4E,
-					0);
-		break;
-
+		regmap_update_bits(priv->map, LPC_HICRB, LPC_HICRB_LPC4E, enable * LPC_HICRB_LPC4E);
+		return;
 	default:
-		break;
+		pr_warn("%s: Unsupported channel: %d", __func__, kcs_bmc->channel);
+		return;
+	}
+}
+
+static void aspeed_kcs_check_obe(struct timer_list *timer)
+{
+	struct aspeed_kcs_bmc *priv = container_of(timer, struct aspeed_kcs_bmc, obe.timer);
+	unsigned long flags;
+	u8 str;
+
+	spin_lock_irqsave(&priv->obe.lock, flags);
+	if (priv->obe.remove) {
+		spin_unlock_irqrestore(&priv->obe.lock, flags);
+		return;
+	}
+
+	str = aspeed_kcs_inb(&priv->kcs_bmc, priv->kcs_bmc.ioreg.str);
+	if (str & KCS_BMC_STR_OBF) {
+		mod_timer(timer, jiffies + OBE_POLL_PERIOD);
+		spin_unlock_irqrestore(&priv->obe.lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&priv->obe.lock, flags);
+
+	kcs_bmc_handle_event(&priv->kcs_bmc);
+}
+
+static void aspeed_kcs_irq_mask_update(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 state)
+{
+	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
+
+	/* We don't have an OBE IRQ, emulate it */
+	if (mask & KCS_BMC_EVENT_TYPE_OBE) {
+		if (KCS_BMC_EVENT_TYPE_OBE & state)
+			mod_timer(&priv->obe.timer, jiffies + OBE_POLL_PERIOD);
+		else
+			del_timer(&priv->obe.timer);
+	}
+
+	if (mask & KCS_BMC_EVENT_TYPE_IBF) {
+		const bool enable = !!(state & KCS_BMC_EVENT_TYPE_IBF);
+
+		switch (kcs_bmc->channel) {
+		case 1:
+			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIF1,
+					   enable * LPC_HICR2_IBFIF1);
+			return;
+		case 2:
+			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIF2,
+					   enable * LPC_HICR2_IBFIF2);
+			return;
+		case 3:
+			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIF3,
+					   enable * LPC_HICR2_IBFIF3);
+			return;
+		case 4:
+			regmap_update_bits(priv->map, LPC_HICRB, LPC_HICRB_IBFIF4,
+					   enable * LPC_HICRB_IBFIF4);
+			return;
+		default:
+			pr_warn("%s: Unsupported channel: %d", __func__, kcs_bmc->channel);
+			return;
+		}
 	}
 }
 
 static const struct kcs_bmc_device_ops aspeed_kcs_ops = {
+	.irq_mask_update = aspeed_kcs_irq_mask_update,
 	.io_inputb = aspeed_kcs_inb,
 	.io_outputb = aspeed_kcs_outb,
 	.io_updateb = aspeed_kcs_updateb,
@@ -375,6 +404,10 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	spin_lock_init(&priv->obe.lock);
+	priv->obe.remove = false;
+	timer_setup(&priv->obe.timer, aspeed_kcs_check_obe, 0);
+
 	aspeed_kcs_set_address(kcs_bmc, addr);
 
 	rc = aspeed_kcs_config_irq(kcs_bmc, pdev);
@@ -383,6 +416,8 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
+	aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
+				   KCS_BMC_EVENT_TYPE_IBF);
 	aspeed_kcs_enable_channel(kcs_bmc, true);
 
 	rc = kcs_bmc_add_device(&priv->kcs_bmc);
@@ -403,6 +438,15 @@ static int aspeed_kcs_remove(struct platform_device *pdev)
 
 	kcs_bmc_remove_device(kcs_bmc);
 
+	aspeed_kcs_enable_channel(kcs_bmc, false);
+	aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
+
+	/* Make sure it's proper dead */
+	spin_lock_irq(&priv->obe.lock);
+	priv->obe.remove = true;
+	spin_unlock_irq(&priv->obe.lock);
+	del_timer_sync(&priv->obe.timer);
+
 	return 0;
 }
 
diff --git a/drivers/char/ipmi/kcs_bmc_client.h b/drivers/char/ipmi/kcs_bmc_client.h
index 814ad8e052ef..6fdcde0a7169 100644
--- a/drivers/char/ipmi/kcs_bmc_client.h
+++ b/drivers/char/ipmi/kcs_bmc_client.h
@@ -35,6 +35,8 @@ void kcs_bmc_unregister_driver(struct kcs_bmc_driver *drv);
 int kcs_bmc_enable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client);
 void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client);
 
+void kcs_bmc_update_event_mask(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 events);
+
 u8 kcs_bmc_read_data(struct kcs_bmc_device *kcs_bmc);
 void kcs_bmc_write_data(struct kcs_bmc_device *kcs_bmc, u8 data);
 u8 kcs_bmc_read_status(struct kcs_bmc_device *kcs_bmc);
diff --git a/drivers/char/ipmi/kcs_bmc_device.h b/drivers/char/ipmi/kcs_bmc_device.h
index c9a4237bbf40..17c572f25c54 100644
--- a/drivers/char/ipmi/kcs_bmc_device.h
+++ b/drivers/char/ipmi/kcs_bmc_device.h
@@ -9,6 +9,7 @@
 #include "kcs_bmc.h"
 
 struct kcs_bmc_device_ops {
+	void (*irq_mask_update)(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 enable);
 	u8 (*io_inputb)(struct kcs_bmc_device *kcs_bmc, u32 reg);
 	void (*io_outputb)(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 b);
 	void (*io_updateb)(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 mask, u8 b);
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
index 3b96486c25c3..ef81a715f88a 100644
--- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -38,6 +38,7 @@
 #define KCS2CTL		0x2A
 #define KCS3CTL		0x3C
 #define    KCS_CTL_IBFIE	BIT(0)
+#define    KCS_CTL_OBEIE	BIT(1)
 
 #define KCS1IE		0x1C
 #define KCS2IE		0x2E
@@ -117,13 +118,23 @@ static void npcm7xx_kcs_enable_channel(struct kcs_bmc_device *kcs_bmc, bool enab
 {
 	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
 
-	regmap_update_bits(priv->map, priv->reg->ctl, KCS_CTL_IBFIE,
-			   enable ? KCS_CTL_IBFIE : 0);
-
 	regmap_update_bits(priv->map, priv->reg->ie, KCS_IE_IRQE | KCS_IE_HIRQE,
 			   enable ? KCS_IE_IRQE | KCS_IE_HIRQE : 0);
 }
 
+static void npcm7xx_kcs_irq_mask_update(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 state)
+{
+	struct npcm7xx_kcs_bmc *priv = to_npcm7xx_kcs_bmc(kcs_bmc);
+
+	if (mask & KCS_BMC_EVENT_TYPE_OBE)
+		regmap_update_bits(priv->map, priv->reg->ctl, KCS_CTL_OBEIE,
+				   !!(state & KCS_BMC_EVENT_TYPE_OBE) * KCS_CTL_OBEIE);
+
+	if (mask & KCS_BMC_EVENT_TYPE_IBF)
+		regmap_update_bits(priv->map, priv->reg->ctl, KCS_CTL_IBFIE,
+				   !!(state & KCS_BMC_EVENT_TYPE_IBF) * KCS_CTL_IBFIE);
+}
+
 static irqreturn_t npcm7xx_kcs_irq(int irq, void *arg)
 {
 	struct kcs_bmc_device *kcs_bmc = arg;
@@ -146,6 +157,7 @@ static int npcm7xx_kcs_config_irq(struct kcs_bmc_device *kcs_bmc,
 }
 
 static const struct kcs_bmc_device_ops npcm7xx_kcs_ops = {
+	.irq_mask_update = npcm7xx_kcs_irq_mask_update,
 	.io_inputb = npcm7xx_kcs_inb,
 	.io_outputb = npcm7xx_kcs_outb,
 	.io_updateb = npcm7xx_kcs_updateb,
@@ -186,11 +198,14 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
-	npcm7xx_kcs_enable_channel(kcs_bmc, true);
 	rc = npcm7xx_kcs_config_irq(kcs_bmc, pdev);
 	if (rc)
 		return rc;
 
+	npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
+				    KCS_BMC_EVENT_TYPE_IBF);
+	npcm7xx_kcs_enable_channel(kcs_bmc, true);
+
 	rc = kcs_bmc_add_device(kcs_bmc);
 	if (rc) {
 		dev_warn(&pdev->dev, "Failed to register channel %d: %d\n", kcs_bmc->channel, rc);
@@ -211,6 +226,9 @@ static int npcm7xx_kcs_remove(struct platform_device *pdev)
 
 	kcs_bmc_remove_device(kcs_bmc);
 
+	npcm7xx_kcs_enable_channel(kcs_bmc, false);
+	npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
+
 	return 0;
 }
 

From fb6379f524ff56314fa857bb5c84bd22eee41eb7 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:51 +0930
Subject: [PATCH 2468/3292] ipmi: kcs_bmc: Enable IBF on open

This way devices don't get delivered IRQs when no-one is interested.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-Id: <20210608104757.582199-11-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c         | 25 ++++++++++---------------
 drivers/char/ipmi/kcs_bmc_aspeed.c  |  3 +--
 drivers/char/ipmi/kcs_bmc_npcm7xx.c |  3 +--
 3 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index 91eb43746cef..03d02a848f3a 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -55,24 +55,12 @@ EXPORT_SYMBOL(kcs_bmc_update_status);
 irqreturn_t kcs_bmc_handle_event(struct kcs_bmc_device *kcs_bmc)
 {
 	struct kcs_bmc_client *client;
-	irqreturn_t rc;
+	irqreturn_t rc = IRQ_NONE;
 
 	spin_lock(&kcs_bmc->lock);
 	client = kcs_bmc->client;
-	if (client) {
+	if (client)
 		rc = client->ops->event(client);
-	} else {
-		u8 status;
-
-		status = kcs_bmc_read_status(kcs_bmc);
-		if (status & KCS_BMC_STR_IBF) {
-			/* Ack the event by reading the data */
-			kcs_bmc_read_data(kcs_bmc);
-			rc = IRQ_HANDLED;
-		} else {
-			rc = IRQ_NONE;
-		}
-	}
 	spin_unlock(&kcs_bmc->lock);
 
 	return rc;
@@ -87,7 +75,10 @@ int kcs_bmc_enable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client
 	if (kcs_bmc->client) {
 		rc = -EBUSY;
 	} else {
+		u8 mask = KCS_BMC_EVENT_TYPE_IBF;
+
 		kcs_bmc->client = client;
+		kcs_bmc_update_event_mask(kcs_bmc, mask, mask);
 		rc = 0;
 	}
 	spin_unlock_irq(&kcs_bmc->lock);
@@ -99,8 +90,12 @@ EXPORT_SYMBOL(kcs_bmc_enable_device);
 void kcs_bmc_disable_device(struct kcs_bmc_device *kcs_bmc, struct kcs_bmc_client *client)
 {
 	spin_lock_irq(&kcs_bmc->lock);
-	if (client == kcs_bmc->client)
+	if (client == kcs_bmc->client) {
+		u8 mask = KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE;
+
+		kcs_bmc_update_event_mask(kcs_bmc, mask, 0);
 		kcs_bmc->client = NULL;
+	}
 	spin_unlock_irq(&kcs_bmc->lock);
 }
 EXPORT_SYMBOL(kcs_bmc_disable_device);
diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 5bf8d013e4ae..43994688dc8a 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -416,8 +416,7 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
-	aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
-				   KCS_BMC_EVENT_TYPE_IBF);
+	aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
 	aspeed_kcs_enable_channel(kcs_bmc, true);
 
 	rc = kcs_bmc_add_device(&priv->kcs_bmc);
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
index ef81a715f88a..7961fec56476 100644
--- a/drivers/char/ipmi/kcs_bmc_npcm7xx.c
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -202,8 +202,7 @@ static int npcm7xx_kcs_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE),
-				    KCS_BMC_EVENT_TYPE_IBF);
+	npcm7xx_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
 	npcm7xx_kcs_enable_channel(kcs_bmc, true);
 
 	rc = kcs_bmc_add_device(kcs_bmc);

From 3a3d2f6a4c64b9e709edc13b685ce46c8629060e Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:52 +0930
Subject: [PATCH 2469/3292] ipmi: kcs_bmc: Add serio adaptor

kcs_bmc_serio acts as a bridge between the KCS drivers in the IPMI
subsystem and the existing userspace interfaces available through the
serio subsystem. This is useful when userspace would like to make use of
the BMC KCS devices for purposes that aren't IPMI.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-Id: <20210608104757.582199-12-andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/Kconfig         |  14 +++
 drivers/char/ipmi/Makefile        |   1 +
 drivers/char/ipmi/kcs_bmc_serio.c | 157 ++++++++++++++++++++++++++++++
 3 files changed, 172 insertions(+)
 create mode 100644 drivers/char/ipmi/kcs_bmc_serio.c

diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index bc5f81899b62..249b31197eea 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -137,6 +137,20 @@ config IPMI_KCS_BMC_CDEV_IPMI
 	  This support is also available as a module. The module will be
 	  called kcs_bmc_cdev_ipmi.
 
+config IPMI_KCS_BMC_SERIO
+	depends on IPMI_KCS_BMC && SERIO
+	tristate "SerIO adaptor for BMC KCS devices"
+	help
+	  Adapts the BMC KCS device for the SerIO subsystem. This allows users
+	  to take advantage of userspace interfaces provided by SerIO where
+	  appropriate.
+
+	  Say YES if you wish to expose KCS devices on the BMC via SerIO
+	  interfaces.
+
+	  This support is also available as a module. The module will be
+	  called kcs_bmc_serio.
+
 config ASPEED_BT_IPMI_BMC
 	depends on ARCH_ASPEED || COMPILE_TEST
 	depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index fcfa676afddb..84f47d18007f 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
 obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o
+obj-$(CONFIG_IPMI_KCS_BMC_SERIO) += kcs_bmc_serio.o
 obj-$(CONFIG_IPMI_KCS_BMC_CDEV_IPMI) += kcs_bmc_cdev_ipmi.o
 obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
 obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o
diff --git a/drivers/char/ipmi/kcs_bmc_serio.c b/drivers/char/ipmi/kcs_bmc_serio.c
new file mode 100644
index 000000000000..7948cabde50b
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc_serio.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2021 IBM Corp. */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/sched/signal.h>
+#include <linux/serio.h>
+#include <linux/slab.h>
+
+#include "kcs_bmc_client.h"
+
+struct kcs_bmc_serio {
+	struct list_head entry;
+
+	struct kcs_bmc_client client;
+	struct serio *port;
+
+	spinlock_t lock;
+};
+
+static inline struct kcs_bmc_serio *client_to_kcs_bmc_serio(struct kcs_bmc_client *client)
+{
+	return container_of(client, struct kcs_bmc_serio, client);
+}
+
+static irqreturn_t kcs_bmc_serio_event(struct kcs_bmc_client *client)
+{
+	struct kcs_bmc_serio *priv;
+	u8 handled = IRQ_NONE;
+	u8 status;
+
+	priv = client_to_kcs_bmc_serio(client);
+
+	spin_lock(&priv->lock);
+
+	status = kcs_bmc_read_status(client->dev);
+
+	if (status & KCS_BMC_STR_IBF)
+		handled = serio_interrupt(priv->port, kcs_bmc_read_data(client->dev), 0);
+
+	spin_unlock(&priv->lock);
+
+	return handled;
+}
+
+static const struct kcs_bmc_client_ops kcs_bmc_serio_client_ops = {
+	.event = kcs_bmc_serio_event,
+};
+
+static int kcs_bmc_serio_open(struct serio *port)
+{
+	struct kcs_bmc_serio *priv = port->port_data;
+
+	return kcs_bmc_enable_device(priv->client.dev, &priv->client);
+}
+
+static void kcs_bmc_serio_close(struct serio *port)
+{
+	struct kcs_bmc_serio *priv = port->port_data;
+
+	kcs_bmc_disable_device(priv->client.dev, &priv->client);
+}
+
+static DEFINE_SPINLOCK(kcs_bmc_serio_instances_lock);
+static LIST_HEAD(kcs_bmc_serio_instances);
+
+static int kcs_bmc_serio_add_device(struct kcs_bmc_device *kcs_bmc)
+{
+	struct kcs_bmc_serio *priv;
+	struct serio *port;
+
+	priv = devm_kzalloc(kcs_bmc->dev, sizeof(*priv), GFP_KERNEL);
+
+	/* Use kzalloc() as the allocation is cleaned up with kfree() via serio_unregister_port() */
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!(priv && port))
+		return -ENOMEM;
+
+	port->id.type = SERIO_8042;
+	port->open = kcs_bmc_serio_open;
+	port->close = kcs_bmc_serio_close;
+	port->port_data = priv;
+	port->dev.parent = kcs_bmc->dev;
+
+	spin_lock_init(&priv->lock);
+	priv->port = port;
+	priv->client.dev = kcs_bmc;
+	priv->client.ops = &kcs_bmc_serio_client_ops;
+
+	spin_lock_irq(&kcs_bmc_serio_instances_lock);
+	list_add(&priv->entry, &kcs_bmc_serio_instances);
+	spin_unlock_irq(&kcs_bmc_serio_instances_lock);
+
+	serio_register_port(port);
+
+	dev_info(kcs_bmc->dev, "Initialised serio client for channel %d", kcs_bmc->channel);
+
+	return 0;
+}
+
+static int kcs_bmc_serio_remove_device(struct kcs_bmc_device *kcs_bmc)
+{
+	struct kcs_bmc_serio *priv = NULL, *pos;
+
+	spin_lock_irq(&kcs_bmc_serio_instances_lock);
+	list_for_each_entry(pos, &kcs_bmc_serio_instances, entry) {
+		if (pos->client.dev == kcs_bmc) {
+			priv = pos;
+			list_del(&pos->entry);
+			break;
+		}
+	}
+	spin_unlock_irq(&kcs_bmc_serio_instances_lock);
+
+	if (!priv)
+		return -ENODEV;
+
+	/* kfree()s priv->port via put_device() */
+	serio_unregister_port(priv->port);
+
+	/* Ensure the IBF IRQ is disabled if we were the active client */
+	kcs_bmc_disable_device(kcs_bmc, &priv->client);
+
+	devm_kfree(priv->client.dev->dev, priv);
+
+	return 0;
+}
+
+static const struct kcs_bmc_driver_ops kcs_bmc_serio_driver_ops = {
+	.add_device = kcs_bmc_serio_add_device,
+	.remove_device = kcs_bmc_serio_remove_device,
+};
+
+static struct kcs_bmc_driver kcs_bmc_serio_driver = {
+	.ops = &kcs_bmc_serio_driver_ops,
+};
+
+static int kcs_bmc_serio_init(void)
+{
+	kcs_bmc_register_driver(&kcs_bmc_serio_driver);
+
+	return 0;
+}
+module_init(kcs_bmc_serio_init);
+
+static void kcs_bmc_serio_exit(void)
+{
+	kcs_bmc_unregister_driver(&kcs_bmc_serio_driver);
+}
+module_exit(kcs_bmc_serio_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
+MODULE_DESCRIPTION("Adapter driver for serio access to BMC KCS devices");

From e880275ccfa120bf6235180ca76f01271b7b97ec Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:53 +0930
Subject: [PATCH 2470/3292] dt-bindings: ipmi: Convert ASPEED KCS binding to
 schema

Given the deprecated binding, improve the ability to detect issues in
the platform devicetrees. Further, a subsequent patch will introduce a
new interrupts property for specifying SerIRQ behaviour, so convert
before we do any further additions.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-13-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 .../bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml | 92 +++++++++++++++++++
 .../bindings/ipmi/aspeed-kcs-bmc.txt          | 33 -------
 2 files changed, 92 insertions(+), 33 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml
 delete mode 100644 Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt

diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml
new file mode 100644
index 000000000000..697ca575454f
--- /dev/null
+++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/ipmi/aspeed,ast2400-kcs-bmc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ASPEED BMC KCS Devices
+
+maintainers:
+  - Andrew Jeffery <andrew@aj.id.au>
+
+description: |
+  The Aspeed BMC SoCs typically use the Keyboard-Controller-Style (KCS)
+  interfaces on the LPC bus for in-band IPMI communication with their host.
+
+properties:
+  compatible:
+    oneOf:
+      - description: Channel ID derived from reg
+        items:
+          enum:
+            - aspeed,ast2400-kcs-bmc-v2
+            - aspeed,ast2500-kcs-bmc-v2
+            - aspeed,ast2600-kcs-bmc
+
+      - description: Old-style with explicit channel ID, no reg
+        deprecated: true
+        items:
+          enum:
+            - aspeed,ast2400-kcs-bmc
+            - aspeed,ast2500-kcs-bmc
+
+  interrupts:
+    maxItems: 1
+
+  reg:
+    # maxItems: 3
+    items:
+      - description: IDR register
+      - description: ODR register
+      - description: STR register
+
+  aspeed,lpc-io-reg:
+    $ref: '/schemas/types.yaml#/definitions/uint32-array'
+    minItems: 1
+    maxItems: 2
+    description: |
+      The host CPU LPC IO data and status addresses for the device. For most
+      channels the status address is derived from the data address, but the
+      status address may be optionally provided.
+
+  kcs_chan:
+    deprecated: true
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: The LPC channel number in the controller
+
+  kcs_addr:
+    deprecated: true
+    $ref: '/schemas/types.yaml#/definitions/uint32'
+    description: The host CPU IO map address
+
+required:
+  - compatible
+  - interrupts
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - aspeed,ast2400-kcs-bmc
+              - aspeed,ast2500-kcs-bmc
+    then:
+      required:
+        - kcs_chan
+        - kcs_addr
+    else:
+      required:
+        - reg
+        - aspeed,lpc-io-reg
+
+examples:
+  - |
+    kcs3: kcs@24 {
+        compatible = "aspeed,ast2600-kcs-bmc";
+        reg = <0x24 0x1>, <0x30 0x1>, <0x3c 0x1>;
+        aspeed,lpc-io-reg = <0xca2>;
+        interrupts = <8>;
+    };
diff --git a/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
deleted file mode 100644
index 193e71ca96b0..000000000000
--- a/Documentation/devicetree/bindings/ipmi/aspeed-kcs-bmc.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-# Aspeed KCS (Keyboard Controller Style) IPMI interface
-
-The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs
-(Baseboard Management Controllers) and the KCS interface can be
-used to perform in-band IPMI communication with their host.
-
-## v1
-Required properties:
-- compatible : should be one of
-    "aspeed,ast2400-kcs-bmc"
-    "aspeed,ast2500-kcs-bmc"
-- interrupts : interrupt generated by the controller
-- kcs_chan : The LPC channel number in the controller
-- kcs_addr : The host CPU IO map address
-
-## v2
-Required properties:
-- compatible : should be one of
-    "aspeed,ast2400-kcs-bmc-v2"
-    "aspeed,ast2500-kcs-bmc-v2"
-- reg : The address and size of the IDR, ODR and STR registers
-- interrupts : interrupt generated by the controller
-- aspeed,lpc-io-reg : The host CPU LPC IO address for the device
-
-Example:
-
-    kcs3: kcs@24 {
-        compatible = "aspeed,ast2500-kcs-bmc-v2";
-        reg = <0x24 0x1>, <0x30 0x1>, <0x3c 0x1>;
-        aspeed,lpc-reg = <0xca2>;
-        interrupts = <8>;
-        status = "okay";
-    };

From a7fd43d95054fe03cac3878538dcf12caa854889 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:54 +0930
Subject: [PATCH 2471/3292] dt-bindings: ipmi: Add optional SerIRQ property to
 ASPEED KCS devices

Allocating IO and IRQ resources to LPC devices is in-theory an operation
for the host, however ASPEED don't appear to expose this capability
outside the BMC (e.g. SuperIO). Instead, we are left with BMC-internal
registers for managing these resources, so introduce a devicetree
property for KCS devices to describe SerIRQ properties.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Rob Herring <robh@kernel.org>
Message-Id: <20210608104757.582199-14-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 .../bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml      | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml
index 697ca575454f..4ff6fabfcb30 100644
--- a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml
+++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml
@@ -49,6 +49,18 @@ properties:
       channels the status address is derived from the data address, but the
       status address may be optionally provided.
 
+  aspeed,lpc-interrupts:
+    $ref: "/schemas/types.yaml#/definitions/uint32-array"
+    minItems: 2
+    maxItems: 2
+    description: |
+      A 2-cell property expressing the LPC SerIRQ number and the interrupt
+      level/sense encoding (specified in the standard fashion).
+
+      Note that the generated interrupt is issued from the BMC to the host, and
+      thus the target interrupt controller is not captured by the BMC's
+      devicetree.
+
   kcs_chan:
     deprecated: true
     $ref: '/schemas/types.yaml#/definitions/uint32'
@@ -84,9 +96,11 @@ allOf:
 
 examples:
   - |
+    #include <dt-bindings/interrupt-controller/irq.h>
     kcs3: kcs@24 {
         compatible = "aspeed,ast2600-kcs-bmc";
         reg = <0x24 0x1>, <0x30 0x1>, <0x3c 0x1>;
         aspeed,lpc-io-reg = <0xca2>;
+        aspeed,lpc-interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
         interrupts = <8>;
     };

From a5e4236d560579a20e2bc870f6e6f32e4e7c0354 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:55 +0930
Subject: [PATCH 2472/3292] ipmi: kcs_bmc_aspeed: Implement KCS SerIRQ
 configuration

Apply the SerIRQ ID and level/sense behaviours from the devicetree if
provided.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Message-Id: <20210608104757.582199-15-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc_aspeed.c | 182 ++++++++++++++++++++++++++++-
 1 file changed, 180 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 43994688dc8a..922fe41f6b05 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -28,6 +29,22 @@
 
 #define KCS_CHANNEL_MAX     4
 
+/*
+ * Field class descriptions
+ *
+ * LPCyE	Enable LPC channel y
+ * IBFIEy	Input Buffer Full IRQ Enable for LPC channel y
+ * IRQxEy	Assert SerIRQ x for LPC channel y (Deprecated, use IDyIRQX, IRQXEy)
+ * IDyIRQX	Use the specified 4-bit SerIRQ for LPC channel y
+ * SELyIRQX	SerIRQ polarity for LPC channel y (low: 0, high: 1)
+ * IRQXEy	Assert the SerIRQ specified in IDyIRQX for LPC channel y
+ */
+
+#define LPC_TYIRQX_LOW       0b00
+#define LPC_TYIRQX_HIGH      0b01
+#define LPC_TYIRQX_RSVD      0b10
+#define LPC_TYIRQX_RISING    0b11
+
 #define LPC_HICR0            0x000
 #define     LPC_HICR0_LPC3E          BIT(7)
 #define     LPC_HICR0_LPC2E          BIT(6)
@@ -39,6 +56,19 @@
 #define LPC_HICR4            0x010
 #define     LPC_HICR4_LADR12AS       BIT(7)
 #define     LPC_HICR4_KCSENBL        BIT(2)
+#define LPC_SIRQCR0	     0x070
+/* IRQ{12,1}E1 are deprecated as of AST2600 A3 but necessary for prior chips */
+#define     LPC_SIRQCR0_IRQ12E1	     BIT(1)
+#define     LPC_SIRQCR0_IRQ1E1	     BIT(0)
+#define LPC_HICR5	     0x080
+#define     LPC_HICR5_ID3IRQX_MASK   GENMASK(23, 20)
+#define     LPC_HICR5_ID3IRQX_SHIFT  20
+#define     LPC_HICR5_ID2IRQX_MASK   GENMASK(19, 16)
+#define     LPC_HICR5_ID2IRQX_SHIFT  16
+#define     LPC_HICR5_SEL3IRQX       BIT(15)
+#define     LPC_HICR5_IRQXE3         BIT(14)
+#define     LPC_HICR5_SEL2IRQX       BIT(13)
+#define     LPC_HICR5_IRQXE2         BIT(12)
 #define LPC_LADR3H           0x014
 #define LPC_LADR3L           0x018
 #define LPC_LADR12H          0x01C
@@ -55,6 +85,13 @@
 #define LPC_HICRB            0x100
 #define     LPC_HICRB_IBFIF4         BIT(1)
 #define     LPC_HICRB_LPC4E          BIT(0)
+#define LPC_HICRC            0x104
+#define     LPC_HICRC_ID4IRQX_MASK   GENMASK(7, 4)
+#define     LPC_HICRC_ID4IRQX_SHIFT  4
+#define     LPC_HICRC_TY4IRQX_MASK   GENMASK(3, 2)
+#define     LPC_HICRC_TY4IRQX_SHIFT  2
+#define     LPC_HICRC_OBF4_AUTO_CLR  BIT(1)
+#define     LPC_HICRC_IRQXE4         BIT(0)
 #define LPC_LADR4            0x110
 #define LPC_IDR4             0x114
 #define LPC_ODR4             0x118
@@ -62,11 +99,21 @@
 
 #define OBE_POLL_PERIOD	     (HZ / 2)
 
+enum aspeed_kcs_irq_mode {
+	aspeed_kcs_irq_none,
+	aspeed_kcs_irq_serirq,
+};
+
 struct aspeed_kcs_bmc {
 	struct kcs_bmc_device kcs_bmc;
 
 	struct regmap *map;
 
+	struct {
+		enum aspeed_kcs_irq_mode mode;
+		int id;
+	} upstream_irq;
+
 	struct {
 		spinlock_t lock;
 		bool remove;
@@ -103,6 +150,49 @@ static void aspeed_kcs_outb(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 data)
 
 	rc = regmap_write(priv->map, reg, data);
 	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
+
+	/* Trigger the upstream IRQ on ODR writes, if enabled */
+
+	switch (reg) {
+	case LPC_ODR1:
+	case LPC_ODR2:
+	case LPC_ODR3:
+	case LPC_ODR4:
+		break;
+	default:
+		return;
+	}
+
+	if (priv->upstream_irq.mode != aspeed_kcs_irq_serirq)
+		return;
+
+	switch (kcs_bmc->channel) {
+	case 1:
+		switch (priv->upstream_irq.id) {
+		case 12:
+			regmap_update_bits(priv->map, LPC_SIRQCR0, LPC_SIRQCR0_IRQ12E1,
+					   LPC_SIRQCR0_IRQ12E1);
+			break;
+		case 1:
+			regmap_update_bits(priv->map, LPC_SIRQCR0, LPC_SIRQCR0_IRQ1E1,
+					   LPC_SIRQCR0_IRQ1E1);
+			break;
+		default:
+			break;
+		}
+		break;
+	case 2:
+		regmap_update_bits(priv->map, LPC_HICR5, LPC_HICR5_IRQXE2, LPC_HICR5_IRQXE2);
+		break;
+	case 3:
+		regmap_update_bits(priv->map, LPC_HICR5, LPC_HICR5_IRQXE3, LPC_HICR5_IRQXE3);
+		break;
+	case 4:
+		regmap_update_bits(priv->map, LPC_HICRC, LPC_HICRC_IRQXE4, LPC_HICRC_IRQXE4);
+		break;
+	default:
+		break;
+	}
 }
 
 static void aspeed_kcs_updateb(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 mask, u8 val)
@@ -161,6 +251,73 @@ static void aspeed_kcs_set_address(struct kcs_bmc_device *kcs_bmc, u16 addr)
 	}
 }
 
+static inline int aspeed_kcs_map_serirq_type(u32 dt_type)
+{
+	switch (dt_type) {
+	case IRQ_TYPE_EDGE_RISING:
+		return LPC_TYIRQX_RISING;
+	case IRQ_TYPE_LEVEL_HIGH:
+		return LPC_TYIRQX_HIGH;
+	case IRQ_TYPE_LEVEL_LOW:
+		return LPC_TYIRQX_LOW;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int aspeed_kcs_config_upstream_irq(struct aspeed_kcs_bmc *priv, u32 id, u32 dt_type)
+{
+	unsigned int mask, val, hw_type;
+
+	if (id > 15)
+		return -EINVAL;
+
+	hw_type = aspeed_kcs_map_serirq_type(dt_type);
+	if (hw_type < 0)
+		return hw_type;
+
+	priv->upstream_irq.mode = aspeed_kcs_irq_serirq;
+	priv->upstream_irq.id = id;
+
+	switch (priv->kcs_bmc.channel) {
+	case 1:
+		/* Needs IRQxE1 rather than (ID1IRQX, SEL1IRQX, IRQXE1) before AST2600 A3 */
+		break;
+	case 2:
+		if (!(hw_type == LPC_TYIRQX_LOW || hw_type == LPC_TYIRQX_HIGH))
+			return -EINVAL;
+
+		mask = LPC_HICR5_SEL2IRQX | LPC_HICR5_ID2IRQX_MASK;
+		val = (id << LPC_HICR5_ID2IRQX_SHIFT);
+		val |= (hw_type == LPC_TYIRQX_HIGH) ? LPC_HICR5_SEL2IRQX : 0;
+		regmap_update_bits(priv->map, LPC_HICR5, mask, val);
+
+		break;
+	case 3:
+		if (!(hw_type == LPC_TYIRQX_LOW || hw_type == LPC_TYIRQX_HIGH))
+			return -EINVAL;
+
+		mask = LPC_HICR5_SEL3IRQX | LPC_HICR5_ID3IRQX_MASK;
+		val = (id << LPC_HICR5_ID3IRQX_SHIFT);
+		val |= (hw_type == LPC_TYIRQX_HIGH) ? LPC_HICR5_SEL3IRQX : 0;
+		regmap_update_bits(priv->map, LPC_HICR5, mask, val);
+
+		break;
+	case 4:
+		mask = LPC_HICRC_ID4IRQX_MASK | LPC_HICRC_TY4IRQX_MASK | LPC_HICRC_OBF4_AUTO_CLR;
+		val = (id << LPC_HICRC_ID4IRQX_SHIFT) | (hw_type << LPC_HICRC_TY4IRQX_SHIFT);
+		regmap_update_bits(priv->map, LPC_HICRC, mask, val);
+		break;
+	default:
+		dev_warn(priv->kcs_bmc.dev,
+			 "SerIRQ configuration not supported on KCS channel %d\n",
+			 priv->kcs_bmc.channel);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void aspeed_kcs_enable_channel(struct kcs_bmc_device *kcs_bmc, bool enable)
 {
 	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
@@ -262,7 +419,7 @@ static irqreturn_t aspeed_kcs_irq(int irq, void *arg)
 	return kcs_bmc_handle_event(kcs_bmc);
 }
 
-static int aspeed_kcs_config_irq(struct kcs_bmc_device *kcs_bmc,
+static int aspeed_kcs_config_downstream_irq(struct kcs_bmc_device *kcs_bmc,
 			struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -368,6 +525,8 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	struct aspeed_kcs_bmc *priv;
 	struct device_node *np;
 	int rc, channel, addr;
+	bool have_upstream_irq;
+	u32 upstream_irq[2];
 
 	np = pdev->dev.of_node->parent;
 	if (!of_device_is_compatible(np, "aspeed,ast2400-lpc-v2") &&
@@ -376,6 +535,7 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "unsupported LPC device binding\n");
 		return -ENODEV;
 	}
+
 	ops = of_device_get_match_data(&pdev->dev);
 	if (!ops)
 		return -EINVAL;
@@ -388,6 +548,13 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	if (addr < 0)
 		return addr;
 
+	np = pdev->dev.of_node;
+	rc = of_property_read_u32_array(np, "aspeed,lpc-interrupts", upstream_irq, 2);
+	if (rc && rc != -EINVAL)
+		return -EINVAL;
+
+	have_upstream_irq = !rc;
+
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
@@ -410,10 +577,20 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 
 	aspeed_kcs_set_address(kcs_bmc, addr);
 
-	rc = aspeed_kcs_config_irq(kcs_bmc, pdev);
+	/* Host to BMC IRQ */
+	rc = aspeed_kcs_config_downstream_irq(kcs_bmc, pdev);
 	if (rc)
 		return rc;
 
+	/* BMC to Host IRQ */
+	if (have_upstream_irq) {
+		rc = aspeed_kcs_config_upstream_irq(priv, upstream_irq[0], upstream_irq[1]);
+		if (rc < 0)
+			return rc;
+	} else {
+		priv->upstream_irq.mode = aspeed_kcs_irq_none;
+	}
+
 	platform_set_drvdata(pdev, priv);
 
 	aspeed_kcs_irq_mask_update(kcs_bmc, (KCS_BMC_EVENT_TYPE_IBF | KCS_BMC_EVENT_TYPE_OBE), 0);
@@ -480,4 +657,5 @@ module_platform_driver(ast_kcs_bmc_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
+MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
 MODULE_DESCRIPTION("Aspeed device interface to the KCS BMC device");

From 78ff5a1666cd4a1129ca53dad5c68700fe63ab6a Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:56 +0930
Subject: [PATCH 2473/3292] ipmi: kcs_bmc_aspeed: Fix IBFIE typo from datasheet

Input Buffer Full Interrupt Enable (IBFIE) is typoed as IBFIF for some
registers in the datasheet. Fix the driver to use the sensible acronym.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Zev Weiss <zweiss@equinix.com>
Message-Id: <20210608104757.582199-16-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc_aspeed.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 922fe41f6b05..461cb2c9cc7e 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -50,9 +50,9 @@
 #define     LPC_HICR0_LPC2E          BIT(6)
 #define     LPC_HICR0_LPC1E          BIT(5)
 #define LPC_HICR2            0x008
-#define     LPC_HICR2_IBFIF3         BIT(3)
-#define     LPC_HICR2_IBFIF2         BIT(2)
-#define     LPC_HICR2_IBFIF1         BIT(1)
+#define     LPC_HICR2_IBFIE3         BIT(3)
+#define     LPC_HICR2_IBFIE2         BIT(2)
+#define     LPC_HICR2_IBFIE1         BIT(1)
 #define LPC_HICR4            0x010
 #define     LPC_HICR4_LADR12AS       BIT(7)
 #define     LPC_HICR4_KCSENBL        BIT(2)
@@ -83,7 +83,7 @@
 #define LPC_STR2             0x040
 #define LPC_STR3             0x044
 #define LPC_HICRB            0x100
-#define     LPC_HICRB_IBFIF4         BIT(1)
+#define     LPC_HICRB_IBFIE4         BIT(1)
 #define     LPC_HICRB_LPC4E          BIT(0)
 #define LPC_HICRC            0x104
 #define     LPC_HICRC_ID4IRQX_MASK   GENMASK(7, 4)
@@ -383,20 +383,20 @@ static void aspeed_kcs_irq_mask_update(struct kcs_bmc_device *kcs_bmc, u8 mask,
 
 		switch (kcs_bmc->channel) {
 		case 1:
-			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIF1,
-					   enable * LPC_HICR2_IBFIF1);
+			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIE1,
+					   enable * LPC_HICR2_IBFIE1);
 			return;
 		case 2:
-			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIF2,
-					   enable * LPC_HICR2_IBFIF2);
+			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIE2,
+					   enable * LPC_HICR2_IBFIE2);
 			return;
 		case 3:
-			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIF3,
-					   enable * LPC_HICR2_IBFIF3);
+			regmap_update_bits(priv->map, LPC_HICR2, LPC_HICR2_IBFIE3,
+					   enable * LPC_HICR2_IBFIE3);
 			return;
 		case 4:
-			regmap_update_bits(priv->map, LPC_HICRB, LPC_HICRB_IBFIF4,
-					   enable * LPC_HICRB_IBFIF4);
+			regmap_update_bits(priv->map, LPC_HICRB, LPC_HICRB_IBFIE4,
+					   enable * LPC_HICRB_IBFIE4);
 			return;
 		default:
 			pr_warn("%s: Unsupported channel: %d", __func__, kcs_bmc->channel);

From d0ec795c8c275ceca2076861f7fe3cc5f18da348 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 8 Jun 2021 20:17:57 +0930
Subject: [PATCH 2474/3292] ipmi: kcs_bmc_aspeed: Optionally apply status
 address

Some Aspeed KCS devices can derive the status register address from the
address of the data register. As such, the address of the status
register can be implicit in the configuration if desired. On the other
hand, sometimes address schemes might be requested that are incompatible
with the default addressing scheme. Allow these requests where possible
if the devicetree specifies the status register address.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Chia-Wei Wang <chiawei_wang@aspeedtech.com>
Message-Id: <20210608104757.582199-17-andrew@aj.id.au>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc_aspeed.c | 116 +++++++++++++++++++++--------
 1 file changed, 83 insertions(+), 33 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 461cb2c9cc7e..0401089f8895 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -83,6 +83,8 @@
 #define LPC_STR2             0x040
 #define LPC_STR3             0x044
 #define LPC_HICRB            0x100
+#define     LPC_HICRB_EN16LADR2      BIT(5)
+#define     LPC_HICRB_EN16LADR1      BIT(4)
 #define     LPC_HICRB_IBFIE4         BIT(1)
 #define     LPC_HICRB_LPC4E          BIT(0)
 #define LPC_HICRC            0x104
@@ -96,6 +98,11 @@
 #define LPC_IDR4             0x114
 #define LPC_ODR4             0x118
 #define LPC_STR4             0x11C
+#define LPC_LSADR12	     0x120
+#define     LPC_LSADR12_LSADR2_MASK  GENMASK(31, 16)
+#define     LPC_LSADR12_LSADR2_SHIFT 16
+#define     LPC_LSADR12_LSADR1_MASK  GENMASK(15, 0)
+#define     LPC_LSADR12_LSADR1_SHIFT 0
 
 #define OBE_POLL_PERIOD	     (HZ / 2)
 
@@ -123,7 +130,7 @@ struct aspeed_kcs_bmc {
 
 struct aspeed_kcs_of_ops {
 	int (*get_channel)(struct platform_device *pdev);
-	int (*get_io_address)(struct platform_device *pdev);
+	int (*get_io_address)(struct platform_device *pdev, u32 addrs[2]);
 };
 
 static inline struct aspeed_kcs_bmc *to_aspeed_kcs_bmc(struct kcs_bmc_device *kcs_bmc)
@@ -217,38 +224,64 @@ static void aspeed_kcs_updateb(struct kcs_bmc_device *kcs_bmc, u32 reg, u8 mask,
  *     C. KCS4
  *        D / C : CA4h / CA5h
  */
-static void aspeed_kcs_set_address(struct kcs_bmc_device *kcs_bmc, u16 addr)
+static int aspeed_kcs_set_address(struct kcs_bmc_device *kcs_bmc, u32 addrs[2], int nr_addrs)
 {
 	struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc);
 
-	switch (kcs_bmc->channel) {
+	if (WARN_ON(nr_addrs < 1 || nr_addrs > 2))
+		return -EINVAL;
+
+	switch (priv->kcs_bmc.channel) {
 	case 1:
-		regmap_update_bits(priv->map, LPC_HICR4,
-				LPC_HICR4_LADR12AS, 0);
-		regmap_write(priv->map, LPC_LADR12H, addr >> 8);
-		regmap_write(priv->map, LPC_LADR12L, addr & 0xFF);
+		regmap_update_bits(priv->map, LPC_HICR4, LPC_HICR4_LADR12AS, 0);
+		regmap_write(priv->map, LPC_LADR12H, addrs[0] >> 8);
+		regmap_write(priv->map, LPC_LADR12L, addrs[0] & 0xFF);
+		if (nr_addrs == 2) {
+			regmap_update_bits(priv->map, LPC_LSADR12, LPC_LSADR12_LSADR1_MASK,
+					   addrs[1] << LPC_LSADR12_LSADR1_SHIFT);
+
+			regmap_update_bits(priv->map, LPC_HICRB, LPC_HICRB_EN16LADR1,
+					   LPC_HICRB_EN16LADR1);
+		}
 		break;
 
 	case 2:
-		regmap_update_bits(priv->map, LPC_HICR4,
-				LPC_HICR4_LADR12AS, LPC_HICR4_LADR12AS);
-		regmap_write(priv->map, LPC_LADR12H, addr >> 8);
-		regmap_write(priv->map, LPC_LADR12L, addr & 0xFF);
+		regmap_update_bits(priv->map, LPC_HICR4, LPC_HICR4_LADR12AS, LPC_HICR4_LADR12AS);
+		regmap_write(priv->map, LPC_LADR12H, addrs[0] >> 8);
+		regmap_write(priv->map, LPC_LADR12L, addrs[0] & 0xFF);
+		if (nr_addrs == 2) {
+			regmap_update_bits(priv->map, LPC_LSADR12, LPC_LSADR12_LSADR2_MASK,
+					   addrs[1] << LPC_LSADR12_LSADR2_SHIFT);
+
+			regmap_update_bits(priv->map, LPC_HICRB, LPC_HICRB_EN16LADR2,
+					   LPC_HICRB_EN16LADR2);
+		}
 		break;
 
 	case 3:
-		regmap_write(priv->map, LPC_LADR3H, addr >> 8);
-		regmap_write(priv->map, LPC_LADR3L, addr & 0xFF);
+		if (nr_addrs == 2) {
+			dev_err(priv->kcs_bmc.dev,
+				"Channel 3 only supports inferred status IO address\n");
+			return -EINVAL;
+		}
+
+		regmap_write(priv->map, LPC_LADR3H, addrs[0] >> 8);
+		regmap_write(priv->map, LPC_LADR3L, addrs[0] & 0xFF);
 		break;
 
 	case 4:
-		regmap_write(priv->map, LPC_LADR4, ((addr + 1) << 16) |
-			addr);
+		if (nr_addrs == 1)
+			regmap_write(priv->map, LPC_LADR4, ((addrs[0] + 1) << 16) | addrs[0]);
+		else
+			regmap_write(priv->map, LPC_LADR4, (addrs[1] << 16) | addrs[0]);
+
 		break;
 
 	default:
-		break;
+		return -EINVAL;
 	}
+
+	return 0;
 }
 
 static inline int aspeed_kcs_map_serirq_type(u32 dt_type)
@@ -457,18 +490,18 @@ static int aspeed_kcs_of_v1_get_channel(struct platform_device *pdev)
 	return channel;
 }
 
-static int aspeed_kcs_of_v1_get_io_address(struct platform_device *pdev)
+static int
+aspeed_kcs_of_v1_get_io_address(struct platform_device *pdev, u32 addrs[2])
 {
-	u32 slave;
 	int rc;
 
-	rc = of_property_read_u32(pdev->dev.of_node, "kcs_addr", &slave);
-	if (rc || slave > 0xffff) {
+	rc = of_property_read_u32(pdev->dev.of_node, "kcs_addr", addrs);
+	if (rc || addrs[0] > 0xffff) {
 		dev_err(&pdev->dev, "no valid 'kcs_addr' configured\n");
 		return -EINVAL;
 	}
 
-	return slave;
+	return 1;
 }
 
 static int aspeed_kcs_of_v2_get_channel(struct platform_device *pdev)
@@ -504,18 +537,30 @@ static int aspeed_kcs_of_v2_get_channel(struct platform_device *pdev)
 	return -EINVAL;
 }
 
-static int aspeed_kcs_of_v2_get_io_address(struct platform_device *pdev)
+static int
+aspeed_kcs_of_v2_get_io_address(struct platform_device *pdev, u32 addrs[2])
 {
-	uint32_t slave;
 	int rc;
 
-	rc = of_property_read_u32(pdev->dev.of_node, "aspeed,lpc-io-reg", &slave);
-	if (rc || slave > 0xffff) {
-		dev_err(&pdev->dev, "no valid 'aspeed,lpc-io-reg' configured\n");
+	rc = of_property_read_variable_u32_array(pdev->dev.of_node,
+						 "aspeed,lpc-io-reg",
+						 addrs, 1, 2);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "No valid 'aspeed,lpc-io-reg' configured\n");
+		return rc;
+	}
+
+	if (addrs[0] > 0xffff) {
+		dev_err(&pdev->dev, "Invalid data address in 'aspeed,lpc-io-reg'\n");
 		return -EINVAL;
 	}
 
-	return slave;
+	if (rc == 2 && addrs[1] > 0xffff) {
+		dev_err(&pdev->dev, "Invalid status address in 'aspeed,lpc-io-reg'\n");
+		return -EINVAL;
+	}
+
+	return rc;
 }
 
 static int aspeed_kcs_probe(struct platform_device *pdev)
@@ -524,9 +569,11 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	struct kcs_bmc_device *kcs_bmc;
 	struct aspeed_kcs_bmc *priv;
 	struct device_node *np;
-	int rc, channel, addr;
 	bool have_upstream_irq;
 	u32 upstream_irq[2];
+	int rc, channel;
+	int nr_addrs;
+	u32 addrs[2];
 
 	np = pdev->dev.of_node->parent;
 	if (!of_device_is_compatible(np, "aspeed,ast2400-lpc-v2") &&
@@ -544,9 +591,9 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	if (channel < 0)
 		return channel;
 
-	addr = ops->get_io_address(pdev);
-	if (addr < 0)
-		return addr;
+	nr_addrs = ops->get_io_address(pdev, addrs);
+	if (nr_addrs < 0)
+		return nr_addrs;
 
 	np = pdev->dev.of_node;
 	rc = of_property_read_u32_array(np, "aspeed,lpc-interrupts", upstream_irq, 2);
@@ -575,7 +622,9 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 	priv->obe.remove = false;
 	timer_setup(&priv->obe.timer, aspeed_kcs_check_obe, 0);
 
-	aspeed_kcs_set_address(kcs_bmc, addr);
+	rc = aspeed_kcs_set_address(kcs_bmc, addrs, nr_addrs);
+	if (rc)
+		return rc;
 
 	/* Host to BMC IRQ */
 	rc = aspeed_kcs_config_downstream_irq(kcs_bmc, pdev);
@@ -602,7 +651,8 @@ static int aspeed_kcs_probe(struct platform_device *pdev)
 		return rc;
 	}
 
-	dev_info(&pdev->dev, "Initialised channel %d at 0x%x\n", kcs_bmc->channel, addr);
+	dev_info(&pdev->dev, "Initialised channel %d at 0x%x\n",
+			kcs_bmc->channel, addrs[0]);
 
 	return 0;
 }

From 5b32dd281ee0a269f39ecf6b48f0cd3f37264842 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 16 Jun 2021 17:29:13 +0100
Subject: [PATCH 2475/3292] ipmi: kcs_bmc_aspeed: Fix less than zero comparison
 of a unsigned int

The comparisons of the unsigned int hw_type to less than zero always
false because it is unsigned. Fix this by using an int for the
assignment and less than zero check.

Addresses-Coverity: ("Unsigned compared against 0")
Fixes: 9d2df9a0ad80 ("ipmi: kcs_bmc_aspeed: Implement KCS SerIRQ configuration")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Message-Id: <20210616162913.15259-1-colin.king@canonical.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc_aspeed.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c
index 0401089f8895..92a37b33494c 100644
--- a/drivers/char/ipmi/kcs_bmc_aspeed.c
+++ b/drivers/char/ipmi/kcs_bmc_aspeed.c
@@ -301,13 +301,15 @@ static inline int aspeed_kcs_map_serirq_type(u32 dt_type)
 static int aspeed_kcs_config_upstream_irq(struct aspeed_kcs_bmc *priv, u32 id, u32 dt_type)
 {
 	unsigned int mask, val, hw_type;
+	int ret;
 
 	if (id > 15)
 		return -EINVAL;
 
-	hw_type = aspeed_kcs_map_serirq_type(dt_type);
-	if (hw_type < 0)
-		return hw_type;
+	ret = aspeed_kcs_map_serirq_type(dt_type);
+	if (ret < 0)
+		return ret;
+	hw_type = ret;
 
 	priv->upstream_irq.mode = aspeed_kcs_irq_serirq;
 	priv->upstream_irq.id = id;

From fbcff33d4204cbcb8e60faa3465a2156fdda398a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 21 Jun 2021 14:44:46 -0700
Subject: [PATCH 2476/3292] cifs: Avoid field over-reading memcpy()

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally reading across neighboring fields.

Instead of using memcpy to read across multiple struct members, just
perform per-member assignments as already done for other members.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 31784e3fa96f..962826dc3316 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2892,7 +2892,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 #endif /* CIFS_DEBUG2 */
 
 	if (buf) {
-		memcpy(buf, &rsp->CreationTime, 32);
+		buf->CreationTime = rsp->CreationTime;
+		buf->LastAccessTime = rsp->LastAccessTime;
+		buf->LastWriteTime = rsp->LastWriteTime;
+		buf->ChangeTime = rsp->ChangeTime;
 		buf->AllocationSize = rsp->AllocationSize;
 		buf->EndOfFile = rsp->EndofFile;
 		buf->Attributes = rsp->FileAttributes;

From dd72fadf2186fc8a6018f97fe72f4d5ca05df440 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Tue, 22 Jun 2021 09:25:31 +0530
Subject: [PATCH 2477/3292] xfrm: Fix xfrm offload fallback fail case

In case of xfrm offload, if xdo_dev_state_add() of driver returns
-EOPNOTSUPP, xfrm offload fallback is failed.
In xfrm state_add() both xso->dev and xso->real_dev are initialized to
dev and when err(-EOPNOTSUPP) is returned only xso->dev is set to null.

So in this scenario the condition in func validate_xmit_xfrm(),
if ((x->xso.dev != dev) && (x->xso.real_dev == dev))
                return skb;
returns true, due to which skb is returned without calling esp_xmit()
below which has fallback code. Hence the CRYPTO_FALLBACK is failing.

So fixing this with by keeping x->xso.real_dev as NULL when err is
returned in func xfrm_dev_state_add().

Fixes: bdfd2d1fa79a ("bonding/xfrm: use real_dev instead of slave_dev")
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6d6917b68856..e843b0d9e2a6 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -268,6 +268,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		xso->num_exthdrs = 0;
 		xso->flags = 0;
 		xso->dev = NULL;
+		xso->real_dev = NULL;
 		dev_put(dev);
 
 		if (err != -EOPNOTSUPP)

From 766c268bc6d39b8124e50d075a36b8a3305bc8e2 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Thu, 17 Jun 2021 11:56:50 +0206
Subject: [PATCH 2478/3292] lib/dump_stack: move cpu lock to printk.c

dump_stack() implements its own cpu-reentrant spinning lock to
best-effort serialize stack traces in the printk log. However,
there are other functions (such as show_regs()) that can also
benefit from this serialization.

Move the cpu-reentrant spinning lock (cpu lock) into new helper
functions printk_cpu_lock_irqsave()/printk_cpu_unlock_irqrestore()
so that it is available for others as well. For !CONFIG_SMP the
cpu lock is a NOP.

Note that having multiple cpu locks in the system can easily
lead to deadlock. Code needing a cpu lock should use the
printk cpu lock, since the printk cpu lock could be acquired
from any code and any context.

Also note that it is not necessary for a cpu lock to disable
interrupts. However, in upcoming work this cpu lock will be used
for emergency tasks (for example, atomic consoles during kernel
crashes) and any interruptions while holding the cpu lock should
be avoided if possible.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
[pmladek@suse.com: Backported on top of 5.13-rc1.]
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210617095051.4808-2-john.ogness@linutronix.de
---
 include/linux/printk.h | 41 +++++++++++++++++++++++++
 kernel/printk/printk.c | 69 ++++++++++++++++++++++++++++++++++++++++++
 lib/dump_stack.c       | 38 ++---------------------
 3 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index fe7eb2351610..1790a5521fd9 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -282,6 +282,47 @@ static inline void printk_safe_flush_on_panic(void)
 }
 #endif
 
+#ifdef CONFIG_SMP
+extern int __printk_cpu_trylock(void);
+extern void __printk_wait_on_cpu_lock(void);
+extern void __printk_cpu_unlock(void);
+
+/**
+ * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
+ *                             lock and disable interrupts.
+ * @flags: Stack-allocated storage for saving local interrupt state,
+ *         to be passed to printk_cpu_unlock_irqrestore().
+ *
+ * If the lock is owned by another CPU, spin until it becomes available.
+ * Interrupts are restored while spinning.
+ */
+#define printk_cpu_lock_irqsave(flags)		\
+	for (;;) {				\
+		local_irq_save(flags);		\
+		if (__printk_cpu_trylock())	\
+			break;			\
+		local_irq_restore(flags);	\
+		__printk_wait_on_cpu_lock();	\
+	}
+
+/**
+ * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
+ *                                  lock and restore interrupts.
+ * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ */
+#define printk_cpu_unlock_irqrestore(flags)	\
+	do {					\
+		__printk_cpu_unlock();		\
+		local_irq_restore(flags);	\
+	} while (0)				\
+
+#else
+
+#define printk_cpu_lock_irqsave(flags) ((void)flags)
+#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
+
+#endif /* CONFIG_SMP */
+
 extern int kptr_restrict;
 
 /**
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 421c35571797..9dfad0efb67f 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3531,3 +3531,72 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
 EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
 
 #endif
+
+#ifdef CONFIG_SMP
+static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
+static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
+
+/**
+ * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
+ *                               spinning lock is not owned by any CPU.
+ *
+ * Context: Any context.
+ */
+void __printk_wait_on_cpu_lock(void)
+{
+	do {
+		cpu_relax();
+	} while (atomic_read(&printk_cpulock_owner) != -1);
+}
+EXPORT_SYMBOL(__printk_wait_on_cpu_lock);
+
+/**
+ * __printk_cpu_trylock() - Try to acquire the printk cpu-reentrant
+ *                          spinning lock.
+ *
+ * If no processor has the lock, the calling processor takes the lock and
+ * becomes the owner. If the calling processor is already the owner of the
+ * lock, this function succeeds immediately.
+ *
+ * Context: Any context. Expects interrupts to be disabled.
+ * Return: 1 on success, otherwise 0.
+ */
+int __printk_cpu_trylock(void)
+{
+	int cpu;
+	int old;
+
+	cpu = smp_processor_id();
+
+	old = atomic_cmpxchg(&printk_cpulock_owner, -1, cpu);
+	if (old == -1) {
+		/* This CPU is now the owner. */
+		return 1;
+	} else if (old == cpu) {
+		/* This CPU is already the owner. */
+		atomic_inc(&printk_cpulock_nested);
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__printk_cpu_trylock);
+
+/**
+ * __printk_cpu_unlock() - Release the printk cpu-reentrant spinning lock.
+ *
+ * The calling processor must be the owner of the lock.
+ *
+ * Context: Any context. Expects interrupts to be disabled.
+ */
+void __printk_cpu_unlock(void)
+{
+	if (atomic_read(&printk_cpulock_nested)) {
+		atomic_dec(&printk_cpulock_nested);
+		return;
+	}
+
+	atomic_set(&printk_cpulock_owner, -1);
+}
+EXPORT_SYMBOL(__printk_cpu_unlock);
+#endif /* CONFIG_SMP */
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index f5a33b6f773f..5ebf4375fa8c 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -84,50 +84,16 @@ static void __dump_stack(void)
  *
  * Architectures can override this implementation by implementing its own.
  */
-#ifdef CONFIG_SMP
-static atomic_t dump_lock = ATOMIC_INIT(-1);
-
 asmlinkage __visible void dump_stack(void)
 {
 	unsigned long flags;
-	int was_locked;
-	int old;
-	int cpu;
 
 	/*
 	 * Permit this cpu to perform nested stack dumps while serialising
 	 * against other CPUs
 	 */
-retry:
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	old = atomic_cmpxchg(&dump_lock, -1, cpu);
-	if (old == -1) {
-		was_locked = 0;
-	} else if (old == cpu) {
-		was_locked = 1;
-	} else {
-		local_irq_restore(flags);
-		/*
-		 * Wait for the lock to release before jumping to
-		 * atomic_cmpxchg() in order to mitigate the thundering herd
-		 * problem.
-		 */
-		do { cpu_relax(); } while (atomic_read(&dump_lock) != -1);
-		goto retry;
-	}
-
+	printk_cpu_lock_irqsave(flags);
 	__dump_stack();
-
-	if (!was_locked)
-		atomic_set(&dump_lock, -1);
-
-	local_irq_restore(flags);
+	printk_cpu_unlock_irqrestore(flags);
 }
-#else
-asmlinkage __visible void dump_stack(void)
-{
-	__dump_stack();
-}
-#endif
 EXPORT_SYMBOL(dump_stack);

From 3342aa8e6b4f6e3f1521e9b4cf5cfe50dbc37774 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Thu, 17 Jun 2021 11:56:51 +0206
Subject: [PATCH 2479/3292] printk: fix cpu lock ordering

The cpu lock implementation uses a full memory barrier to take
the lock, but no memory barriers when releasing the lock. This
means that changes performed by a lock owner may not be seen by
the next lock owner. This may have been "good enough" for use
by dump_stack() as a serialization mechanism, but it is not
enough to provide proper protection for a critical section.

Correct this problem by using acquire/release memory barriers
for lock/unlock, respectively.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210617095051.4808-3-john.ogness@linutronix.de
---
 kernel/printk/printk.c | 53 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9dfad0efb67f..142a58d124d9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3568,10 +3568,33 @@ int __printk_cpu_trylock(void)
 
 	cpu = smp_processor_id();
 
-	old = atomic_cmpxchg(&printk_cpulock_owner, -1, cpu);
+	/*
+	 * Guarantee loads and stores from this CPU when it is the lock owner
+	 * are _not_ visible to the previous lock owner. This pairs with
+	 * __printk_cpu_unlock:B.
+	 *
+	 * Memory barrier involvement:
+	 *
+	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B, then
+	 * __printk_cpu_unlock:A can never read from __printk_cpu_trylock:B.
+	 *
+	 * Relies on:
+	 *
+	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
+	 * of the previous CPU
+	 *    matching
+	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
+	 * of this CPU
+	 */
+	old = atomic_cmpxchg_acquire(&printk_cpulock_owner, -1,
+				     cpu); /* LMM(__printk_cpu_trylock:A) */
 	if (old == -1) {
-		/* This CPU is now the owner. */
+		/*
+		 * This CPU is now the owner and begins loading/storing
+		 * data: LMM(__printk_cpu_trylock:B)
+		 */
 		return 1;
+
 	} else if (old == cpu) {
 		/* This CPU is already the owner. */
 		atomic_inc(&printk_cpulock_nested);
@@ -3596,7 +3619,31 @@ void __printk_cpu_unlock(void)
 		return;
 	}
 
-	atomic_set(&printk_cpulock_owner, -1);
+	/*
+	 * This CPU is finished loading/storing data:
+	 * LMM(__printk_cpu_unlock:A)
+	 */
+
+	/*
+	 * Guarantee loads and stores from this CPU when it was the
+	 * lock owner are visible to the next lock owner. This pairs
+	 * with __printk_cpu_trylock:A.
+	 *
+	 * Memory barrier involvement:
+	 *
+	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B,
+	 * then __printk_cpu_trylock:B reads from __printk_cpu_unlock:A.
+	 *
+	 * Relies on:
+	 *
+	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
+	 * of this CPU
+	 *    matching
+	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
+	 * of the next CPU
+	 */
+	atomic_set_release(&printk_cpulock_owner,
+			   -1); /* LMM(__printk_cpu_unlock:B) */
 }
 EXPORT_SYMBOL(__printk_cpu_unlock);
 #endif /* CONFIG_SMP */

From ce1b06c5f5e7fd2af8bc89323d9eb3102d8e3fe2 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Fri, 18 Jun 2021 09:47:20 +0800
Subject: [PATCH 2480/3292] quota: remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory. After that, the static
function getdqbuf() becomes unnecessary, get rid of it and instead call
kmalloc() directly.

Link: https://lore.kernel.org/r/20210618014720.1977-2-thunder.leizhen@huawei.com
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota_tree.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index c5562c871c8b..d3e995e1046f 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -47,15 +47,6 @@ static int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
 	       / info->dqi_entry_size;
 }
 
-static char *getdqbuf(size_t size)
-{
-	char *buf = kmalloc(size, GFP_NOFS);
-	if (!buf)
-		printk(KERN_WARNING
-		       "VFS: Not enough memory for quota buffers.\n");
-	return buf;
-}
-
 static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
 {
 	struct super_block *sb = info->dqi_sb;
@@ -83,7 +74,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
 /* Remove empty block from list and return it */
 static int get_free_dqblk(struct qtree_mem_dqinfo *info)
 {
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
 	int ret, blk;
 
@@ -132,7 +123,7 @@ static int put_free_dqblk(struct qtree_mem_dqinfo *info, char *buf, uint blk)
 static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf,
 			       uint blk)
 {
-	char *tmpbuf = getdqbuf(info->dqi_usable_bs);
+	char *tmpbuf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
 	uint nextblk = le32_to_cpu(dh->dqdh_next_free);
 	uint prevblk = le32_to_cpu(dh->dqdh_prev_free);
@@ -179,7 +170,7 @@ out_buf:
 static int insert_free_dqentry(struct qtree_mem_dqinfo *info, char *buf,
 			       uint blk)
 {
-	char *tmpbuf = getdqbuf(info->dqi_usable_bs);
+	char *tmpbuf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
 	int err;
 
@@ -227,7 +218,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
 {
 	uint blk, i;
 	struct qt_disk_dqdbheader *dh;
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	char *ddquot;
 
 	*err = 0;
@@ -298,7 +289,7 @@ out_buf:
 static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 			  uint *treeblk, int depth)
 {
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	int ret = 0, newson = 0, newact = 0;
 	__le32 *ref;
 	uint newblk;
@@ -375,7 +366,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	int type = dquot->dq_id.type;
 	struct super_block *sb = dquot->dq_sb;
 	ssize_t ret;
-	char *ddquot = getdqbuf(info->dqi_entry_size);
+	char *ddquot = kmalloc(info->dqi_entry_size, GFP_NOFS);
 
 	if (!ddquot)
 		return -ENOMEM;
@@ -414,7 +405,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 			uint blk)
 {
 	struct qt_disk_dqdbheader *dh;
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	int ret = 0;
 
 	if (!buf)
@@ -474,7 +465,7 @@ out_buf:
 static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
 		       uint *blk, int depth)
 {
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	int ret = 0;
 	uint newblk;
 	__le32 *ref = (__le32 *)buf;
@@ -533,7 +524,7 @@ EXPORT_SYMBOL(qtree_delete_dquot);
 static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
 				 struct dquot *dquot, uint blk)
 {
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	loff_t ret = 0;
 	int i;
 	char *ddquot;
@@ -571,7 +562,7 @@ out_buf:
 static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
 				struct dquot *dquot, uint blk, int depth)
 {
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	loff_t ret = 0;
 	__le32 *ref = (__le32 *)buf;
 
@@ -635,7 +626,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 		}
 		dquot->dq_off = offset;
 	}
-	ddquot = getdqbuf(info->dqi_entry_size);
+	ddquot = kmalloc(info->dqi_entry_size, GFP_NOFS);
 	if (!ddquot)
 		return -ENOMEM;
 	ret = sb->s_op->quota_read(sb, type, ddquot, info->dqi_entry_size,
@@ -679,7 +670,7 @@ EXPORT_SYMBOL(qtree_release_dquot);
 static int find_next_id(struct qtree_mem_dqinfo *info, qid_t *id,
 			unsigned int blk, int depth)
 {
-	char *buf = getdqbuf(info->dqi_usable_bs);
+	char *buf = kmalloc(info->dqi_usable_bs, GFP_NOFS);
 	__le32 *ref = (__le32 *)buf;
 	ssize_t ret;
 	unsigned int epb = info->dqi_usable_bs >> 2;

From 3ece696c1acaa2ecac2e55143fc0c3ac413369c0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:09 +0300
Subject: [PATCH 2481/3292] platform/x86: Remove "default n" entries

Linus already once did that for PDx86, don't repeat our mistakes.
TL;DR: 'n' *is* the default 'default'.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210618125516.53510-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/Kconfig      | 1 -
 drivers/platform/x86/dell/Kconfig | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 4fd792f2a10a..ccb827b57f1f 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1092,7 +1092,6 @@ config TOUCHSCREEN_DMI
 
 config FW_ATTR_CLASS
 	tristate
-	default n
 
 config INTEL_IMR
 	bool "Intel Isolated Memory Region support"
diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig
index 9b0a4d080f43..9e7314d90bea 100644
--- a/drivers/platform/x86/dell/Kconfig
+++ b/drivers/platform/x86/dell/Kconfig
@@ -5,7 +5,6 @@
 
 menuconfig X86_PLATFORM_DRIVERS_DELL
 	bool "Dell X86 Platform Specific Device Drivers"
-	default n
 	depends on X86_PLATFORM_DEVICES
 	help
 	  Say Y here to get to see options for device drivers for various

From a4310246430096a876a8d54ca7ad52d6603b9cda Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:10 +0300
Subject: [PATCH 2482/3292] platform/x86: intel_skl_int3472: Free ACPI device
 resources after use

We may free ACPI device resources immediately after use.
Refactor skl_int3472_parse_crs() accordingly.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Tested-by: Daniel Scally <djrscally@gmail.com>
Link: https://lore.kernel.org/r/20210618125516.53510-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../x86/intel-int3472/intel_skl_int3472_discrete.c  | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
index 8c18dbff1c43..48a00a1f4fb6 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
@@ -308,8 +308,10 @@ static int skl_int3472_parse_crs(struct int3472_discrete_device *int3472)
 	ret = acpi_dev_get_resources(int3472->adev, &resource_list,
 				     skl_int3472_handle_gpio_resources,
 				     int3472);
-	if (ret)
-		goto out_free_res_list;
+	if (ret < 0)
+		return ret;
+
+	acpi_dev_free_resource_list(&resource_list);
 
 	/*
 	 * If we find no clock enable GPIO pin then the privacy LED won't work.
@@ -319,7 +321,7 @@ static int skl_int3472_parse_crs(struct int3472_discrete_device *int3472)
 	if (int3472->clock.ena_gpio) {
 		ret = skl_int3472_register_clock(int3472);
 		if (ret)
-			goto out_free_res_list;
+			return ret;
 	} else {
 		if (int3472->clock.led_gpio)
 			dev_warn(int3472->dev,
@@ -329,10 +331,7 @@ static int skl_int3472_parse_crs(struct int3472_discrete_device *int3472)
 	int3472->gpios.dev_id = int3472->sensor_name;
 	gpiod_add_lookup_table(&int3472->gpios);
 
-out_free_res_list:
-	acpi_dev_free_resource_list(&resource_list);
-
-	return ret;
+	return 0;
 }
 
 int skl_int3472_discrete_probe(struct platform_device *pdev)

From a438dd11081a6ff1b8aa13cc96f07e2ca2f33a36 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:11 +0300
Subject: [PATCH 2483/3292] platform/x86: intel_skl_int3472: Fix dependencies
 (drop CLKDEV_LOOKUP)

Besides the fact that COMMON_CLK selects CLKDEV_LOOKUP, the latter
is going to be removed from clock framework.

Reviewed-by: Daniel Scally <djrscally@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210618125516.53510-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel-int3472/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/intel-int3472/Kconfig b/drivers/platform/x86/intel-int3472/Kconfig
index c112878e833b..62e5d4cf9ee5 100644
--- a/drivers/platform/x86/intel-int3472/Kconfig
+++ b/drivers/platform/x86/intel-int3472/Kconfig
@@ -1,7 +1,7 @@
 config INTEL_SKL_INT3472
 	tristate "Intel SkyLake ACPI INT3472 Driver"
 	depends on ACPI
-	depends on COMMON_CLK && CLKDEV_LOOKUP
+	depends on COMMON_CLK
 	depends on I2C
 	depends on GPIOLIB
 	depends on REGULATOR

From 719941878bc95af5e1368eca56fd4dcbd3633f10 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:12 +0300
Subject: [PATCH 2484/3292] platform/x86: intel_skl_int3472: Use ACPI GPIO
 resource directly

When we call acpi_gpio_get_io_resource(), the output will be
the pointer to the ACPI GPIO resource. Use it directly instead of
dereferencing the generic resource.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Tested-by: Daniel Scally <djrscally@gmail.com>
Link: https://lore.kernel.org/r/20210618125516.53510-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../intel_skl_int3472_clk_and_regulator.c     |  7 ++---
 .../intel-int3472/intel_skl_int3472_common.h  |  2 +-
 .../intel_skl_int3472_discrete.c              | 28 +++++++++----------
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
index ceee860e2c07..49ea1e86c193 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
@@ -131,10 +131,10 @@ out_free_init_name:
 }
 
 int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
-				   struct acpi_resource *ares)
+				   struct acpi_resource_gpio *agpio)
 {
-	char *path = ares->data.gpio.resource_source.string_ptr;
 	const struct int3472_sensor_config *sensor_config;
+	char *path = agpio->resource_source.string_ptr;
 	struct regulator_consumer_supply supply_map;
 	struct regulator_init_data init_data = { };
 	struct regulator_config cfg = { };
@@ -168,8 +168,7 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
 						int3472->regulator.supply_name,
 						&int3472_gpio_regulator_ops);
 
-	int3472->regulator.gpio = acpi_get_and_request_gpiod(path,
-							     ares->data.gpio.pin_table[0],
+	int3472->regulator.gpio = acpi_get_and_request_gpiod(path, agpio->pin_table[0],
 							     "int3472,regulator");
 	if (IS_ERR(int3472->regulator.gpio)) {
 		dev_err(int3472->dev, "Failed to get regulator GPIO line\n");
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
index 6fdf78584219..765e01ec1604 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
@@ -113,6 +113,6 @@ union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev,
 int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb);
 int skl_int3472_register_clock(struct int3472_discrete_device *int3472);
 int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
-				   struct acpi_resource *ares);
+				   struct acpi_resource_gpio *agpio);
 
 #endif
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
index 48a00a1f4fb6..fd681d2a73fe 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
@@ -103,11 +103,11 @@ skl_int3472_get_sensor_module_config(struct int3472_discrete_device *int3472)
 }
 
 static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int3472,
-					  struct acpi_resource *ares,
+					  struct acpi_resource_gpio *agpio,
 					  const char *func, u32 polarity)
 {
-	char *path = ares->data.gpio.resource_source.string_ptr;
 	const struct int3472_sensor_config *sensor_config;
+	char *path = agpio->resource_source.string_ptr;
 	struct gpiod_lookup *table_entry;
 	struct acpi_device *adev;
 	acpi_handle handle;
@@ -145,7 +145,7 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
 
 	table_entry = &int3472->gpios.table[int3472->n_sensor_gpios];
 	table_entry->key = acpi_dev_name(adev);
-	table_entry->chip_hwnum = ares->data.gpio.pin_table[0];
+	table_entry->chip_hwnum = agpio->pin_table[0];
 	table_entry->con_id = func;
 	table_entry->idx = 0;
 	table_entry->flags = polarity;
@@ -156,23 +156,22 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347
 }
 
 static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472,
-				       struct acpi_resource *ares, u8 type)
+				       struct acpi_resource_gpio *agpio, u8 type)
 {
-	char *path = ares->data.gpio.resource_source.string_ptr;
+	char *path = agpio->resource_source.string_ptr;
+	u16 pin = agpio->pin_table[0];
 	struct gpio_desc *gpio;
 
 	switch (type) {
 	case INT3472_GPIO_TYPE_CLK_ENABLE:
-		gpio = acpi_get_and_request_gpiod(path, ares->data.gpio.pin_table[0],
-						  "int3472,clk-enable");
+		gpio = acpi_get_and_request_gpiod(path, pin, "int3472,clk-enable");
 		if (IS_ERR(gpio))
 			return (PTR_ERR(gpio));
 
 		int3472->clock.ena_gpio = gpio;
 		break;
 	case INT3472_GPIO_TYPE_PRIVACY_LED:
-		gpio = acpi_get_and_request_gpiod(path, ares->data.gpio.pin_table[0],
-						  "int3472,privacy-led");
+		gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led");
 		if (IS_ERR(gpio))
 			return (PTR_ERR(gpio));
 
@@ -242,7 +241,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 
 	if (!obj) {
 		dev_warn(int3472->dev, "No _DSM entry for GPIO pin %u\n",
-			 ares->data.gpio.pin_table[0]);
+			 agpio->pin_table[0]);
 		return 1;
 	}
 
@@ -250,15 +249,14 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 
 	switch (type) {
 	case INT3472_GPIO_TYPE_RESET:
-		ret = skl_int3472_map_gpio_to_sensor(int3472, ares, "reset",
+		ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, "reset",
 						     GPIO_ACTIVE_LOW);
 		if (ret)
 			err_msg = "Failed to map reset pin to sensor\n";
 
 		break;
 	case INT3472_GPIO_TYPE_POWERDOWN:
-		ret = skl_int3472_map_gpio_to_sensor(int3472, ares,
-						     "powerdown",
+		ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, "powerdown",
 						     GPIO_ACTIVE_LOW);
 		if (ret)
 			err_msg = "Failed to map powerdown pin to sensor\n";
@@ -266,13 +264,13 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 		break;
 	case INT3472_GPIO_TYPE_CLK_ENABLE:
 	case INT3472_GPIO_TYPE_PRIVACY_LED:
-		ret = skl_int3472_map_gpio_to_clk(int3472, ares, type);
+		ret = skl_int3472_map_gpio_to_clk(int3472, agpio, type);
 		if (ret)
 			err_msg = "Failed to map GPIO to clock\n";
 
 		break;
 	case INT3472_GPIO_TYPE_POWER_ENABLE:
-		ret = skl_int3472_register_regulator(int3472, ares);
+		ret = skl_int3472_register_regulator(int3472, agpio);
 		if (ret)
 			err_msg = "Failed to map regulator to sensor\n";
 

From 7b2baa407c3c9e6f74c7edfa181eeb001e75ed3e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:13 +0300
Subject: [PATCH 2485/3292] platform/x86: intel_skl_int3472: Provide
 skl_int3472_unregister_regulator()

For the sake of APIs to be properly layered provide
skl_int3472_unregister_regulator().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Tested-by: Daniel Scally <djrscally@gmail.com>
Link: https://lore.kernel.org/r/20210618125516.53510-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c | 6 ++++++
 .../platform/x86/intel-int3472/intel_skl_int3472_common.h   | 2 ++
 .../platform/x86/intel-int3472/intel_skl_int3472_discrete.c | 4 ++--
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
index 49ea1e86c193..60c7128f44ee 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
@@ -193,3 +193,9 @@ err_free_gpio:
 
 	return ret;
 }
+
+void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472)
+{
+	regulator_unregister(int3472->regulator.rdev);
+	gpiod_put(int3472->regulator.gpio);
+}
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
index 765e01ec1604..50f73c6eab44 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
@@ -112,7 +112,9 @@ union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev,
 					       char *id);
 int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb);
 int skl_int3472_register_clock(struct int3472_discrete_device *int3472);
+
 int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
 				   struct acpi_resource_gpio *agpio);
+void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472);
 
 #endif
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
index fd681d2a73fe..2638d375e226 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
@@ -400,15 +400,15 @@ int skl_int3472_discrete_remove(struct platform_device *pdev)
 	struct int3472_discrete_device *int3472 = platform_get_drvdata(pdev);
 
 	gpiod_remove_lookup_table(&int3472->gpios);
-	regulator_unregister(int3472->regulator.rdev);
 	clk_unregister(int3472->clock.clk);
 
 	if (int3472->clock.cl)
 		clkdev_drop(int3472->clock.cl);
 
-	gpiod_put(int3472->regulator.gpio);
 	gpiod_put(int3472->clock.ena_gpio);
 	gpiod_put(int3472->clock.led_gpio);
 
+	skl_int3472_unregister_regulator(int3472);
+
 	return 0;
 }

From 7540599a5ef1cbe8d20993ca0c3202d8409338e1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:14 +0300
Subject: [PATCH 2486/3292] platform/x86: intel_skl_int3472: Provide
 skl_int3472_unregister_clock()

For the sake of APIs to be properly layered provide
skl_int3472_unregister_clock().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Tested-by: Daniel Scally <djrscally@gmail.com>
Link: https://lore.kernel.org/r/20210618125516.53510-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c | 6 ++++++
 .../platform/x86/intel-int3472/intel_skl_int3472_common.h   | 2 ++
 .../platform/x86/intel-int3472/intel_skl_int3472_discrete.c | 5 ++---
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
index 60c7128f44ee..1700e7557a82 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
@@ -130,6 +130,12 @@ out_free_init_name:
 	return ret;
 }
 
+void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472)
+{
+	clkdev_drop(int3472->clock.cl);
+	clk_unregister(int3472->clock.clk);
+}
+
 int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
 				   struct acpi_resource_gpio *agpio)
 {
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
index 50f73c6eab44..714fde73b524 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
@@ -111,7 +111,9 @@ int skl_int3472_tps68470_probe(struct i2c_client *client);
 union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev,
 					       char *id);
 int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb);
+
 int skl_int3472_register_clock(struct int3472_discrete_device *int3472);
+void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472);
 
 int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
 				   struct acpi_resource_gpio *agpio);
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
index 2638d375e226..17c6fe830765 100644
--- a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
+++ b/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
@@ -400,10 +400,9 @@ int skl_int3472_discrete_remove(struct platform_device *pdev)
 	struct int3472_discrete_device *int3472 = platform_get_drvdata(pdev);
 
 	gpiod_remove_lookup_table(&int3472->gpios);
-	clk_unregister(int3472->clock.clk);
 
-	if (int3472->clock.cl)
-		clkdev_drop(int3472->clock.cl);
+	if (int3472->clock.ena_gpio)
+		skl_int3472_unregister_clock(int3472);
 
 	gpiod_put(int3472->clock.ena_gpio);
 	gpiod_put(int3472->clock.led_gpio);

From 8bd836feb6cad6bd746da09a86bda0f5ee5c4b01 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:15 +0300
Subject: [PATCH 2487/3292] platform/x86: intel_skl_int3472: Move to intel/
 subfolder

Start collecting Intel x86 related drivers in its own subfolder.
Move intel_skl_int3472 first.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210618125516.53510-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS                                   |  2 +-
 drivers/platform/x86/Kconfig                  |  4 ++--
 drivers/platform/x86/Makefile                 |  3 ++-
 drivers/platform/x86/intel/Kconfig            | 21 +++++++++++++++++++
 drivers/platform/x86/intel/Makefile           |  7 +++++++
 .../{intel-int3472 => intel/int3472}/Kconfig  |  0
 .../{intel-int3472 => intel/int3472}/Makefile |  0
 .../intel_skl_int3472_clk_and_regulator.c     |  0
 .../int3472}/intel_skl_int3472_common.c       |  0
 .../int3472}/intel_skl_int3472_common.h       |  0
 .../int3472}/intel_skl_int3472_discrete.c     |  0
 .../int3472}/intel_skl_int3472_tps68470.c     |  0
 12 files changed, 33 insertions(+), 4 deletions(-)
 create mode 100644 drivers/platform/x86/intel/Kconfig
 create mode 100644 drivers/platform/x86/intel/Makefile
 rename drivers/platform/x86/{intel-int3472 => intel/int3472}/Kconfig (100%)
 rename drivers/platform/x86/{intel-int3472 => intel/int3472}/Makefile (100%)
 rename drivers/platform/x86/{intel-int3472 => intel/int3472}/intel_skl_int3472_clk_and_regulator.c (100%)
 rename drivers/platform/x86/{intel-int3472 => intel/int3472}/intel_skl_int3472_common.c (100%)
 rename drivers/platform/x86/{intel-int3472 => intel/int3472}/intel_skl_int3472_common.h (100%)
 rename drivers/platform/x86/{intel-int3472 => intel/int3472}/intel_skl_int3472_discrete.c (100%)
 rename drivers/platform/x86/{intel-int3472 => intel/int3472}/intel_skl_int3472_tps68470.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7da4c07364fa..5ca79321b9c5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9388,7 +9388,7 @@ F:	drivers/platform/x86/intel_scu_*
 INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER
 M:	Daniel Scally <djrscally@gmail.com>
 S:	Maintained
-F:	drivers/platform/x86/intel-int3472/
+F:	drivers/platform/x86/intel/int3472/
 
 INTEL SPEED SELECT TECHNOLOGY
 M:	Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index ccb827b57f1f..79d095c0ab61 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -651,6 +651,8 @@ config THINKPAD_LMI
 	  To compile this driver as a module, choose M here: the module will
 	  be called think-lmi.
 
+source "drivers/platform/x86/intel/Kconfig"
+
 config INTEL_ATOMISP2_LED
 	tristate "Intel AtomISP2 camera LED driver"
 	depends on GPIOLIB && LEDS_GPIO
@@ -709,8 +711,6 @@ config INTEL_CHT_INT33FE
 	  device and CONFIG_TYPEC_FUSB302=m and CONFIG_BATTERY_MAX17042=m
 	  for Type-C device.
 
-source "drivers/platform/x86/intel-int3472/Kconfig"
-
 config INTEL_HID_EVENT
 	tristate "INTEL HID Event"
 	depends on ACPI
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index a1f64613af71..e03b59ce3f9f 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -66,6 +66,8 @@ obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
 obj-$(CONFIG_THINKPAD_LMI)	+= think-lmi.o
 
 # Intel
+obj-$(CONFIG_X86_PLATFORM_DRIVERS_INTEL)		+= intel/
+
 obj-$(CONFIG_INTEL_ATOMISP2_LED)	+= intel_atomisp2_led.o
 obj-$(CONFIG_INTEL_ATOMISP2_PM)		+= intel_atomisp2_pm.o
 obj-$(CONFIG_INTEL_CHT_INT33FE)		+= intel_cht_int33fe.o
@@ -76,7 +78,6 @@ obj-$(CONFIG_INTEL_HID_EVENT)		+= intel-hid.o
 obj-$(CONFIG_INTEL_INT0002_VGPIO)	+= intel_int0002_vgpio.o
 obj-$(CONFIG_INTEL_MENLOW)		+= intel_menlow.o
 obj-$(CONFIG_INTEL_OAKTRAIL)		+= intel_oaktrail.o
-obj-$(CONFIG_INTEL_SKL_INT3472)		+= intel-int3472/
 obj-$(CONFIG_INTEL_VBTN)		+= intel-vbtn.o
 
 # MSI
diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig
new file mode 100644
index 000000000000..33f2dab03d3d
--- /dev/null
+++ b/drivers/platform/x86/intel/Kconfig
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel x86 Platform Specific Drivers
+#
+
+menuconfig X86_PLATFORM_DRIVERS_INTEL
+	bool "Intel x86 Platform Specific Device Drivers"
+	default y
+	help
+	  Say Y here to get to see options for device drivers for
+	  various Intel x86 platforms, including vendor-specific
+	  drivers. This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped
+	  and disabled.
+
+if X86_PLATFORM_DRIVERS_INTEL
+
+source "drivers/platform/x86/intel/int3472/Kconfig"
+
+endif # X86_PLATFORM_DRIVERS_INTEL
diff --git a/drivers/platform/x86/intel/Makefile b/drivers/platform/x86/intel/Makefile
new file mode 100644
index 000000000000..3ac795d810f1
--- /dev/null
+++ b/drivers/platform/x86/intel/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for drivers/platform/x86/intel
+# Intel x86 Platform-Specific Drivers
+#
+
+obj-$(CONFIG_INTEL_SKL_INT3472)		+= int3472/
diff --git a/drivers/platform/x86/intel-int3472/Kconfig b/drivers/platform/x86/intel/int3472/Kconfig
similarity index 100%
rename from drivers/platform/x86/intel-int3472/Kconfig
rename to drivers/platform/x86/intel/int3472/Kconfig
diff --git a/drivers/platform/x86/intel-int3472/Makefile b/drivers/platform/x86/intel/int3472/Makefile
similarity index 100%
rename from drivers/platform/x86/intel-int3472/Makefile
rename to drivers/platform/x86/intel/int3472/Makefile
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_clk_and_regulator.c
similarity index 100%
rename from drivers/platform/x86/intel-int3472/intel_skl_int3472_clk_and_regulator.c
rename to drivers/platform/x86/intel/int3472/intel_skl_int3472_clk_and_regulator.c
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.c
similarity index 100%
rename from drivers/platform/x86/intel-int3472/intel_skl_int3472_common.c
rename to drivers/platform/x86/intel/int3472/intel_skl_int3472_common.c
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h b/drivers/platform/x86/intel/int3472/intel_skl_int3472_common.h
similarity index 100%
rename from drivers/platform/x86/intel-int3472/intel_skl_int3472_common.h
rename to drivers/platform/x86/intel/int3472/intel_skl_int3472_common.h
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c
similarity index 100%
rename from drivers/platform/x86/intel-int3472/intel_skl_int3472_discrete.c
rename to drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c
diff --git a/drivers/platform/x86/intel-int3472/intel_skl_int3472_tps68470.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_tps68470.c
similarity index 100%
rename from drivers/platform/x86/intel-int3472/intel_skl_int3472_tps68470.c
rename to drivers/platform/x86/intel/int3472/intel_skl_int3472_tps68470.c

From 72fbcac2f40e690e1a5584358750e546a2678c2c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Jun 2021 15:55:16 +0300
Subject: [PATCH 2488/3292] platform/x86: intel_cht_int33fe: Move to its own
 subfolder

Since we have started collecting Intel x86 specific drivers in their own
folder, move intel_cht_int33fe to its own subfolder there.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210618125516.53510-8-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/Kconfig                  | 24 -------------------
 drivers/platform/x86/Makefile                 |  4 ----
 drivers/platform/x86/intel/Kconfig            |  1 +
 drivers/platform/x86/intel/Makefile           |  1 +
 drivers/platform/x86/intel/int33fe/Kconfig    | 24 +++++++++++++++++++
 drivers/platform/x86/intel/int33fe/Makefile   |  5 ++++
 .../int33fe}/intel_cht_int33fe_common.c       |  0
 .../int33fe}/intel_cht_int33fe_common.h       |  0
 .../int33fe}/intel_cht_int33fe_microb.c       |  0
 .../int33fe}/intel_cht_int33fe_typec.c        |  0
 10 files changed, 31 insertions(+), 28 deletions(-)
 create mode 100644 drivers/platform/x86/intel/int33fe/Kconfig
 create mode 100644 drivers/platform/x86/intel/int33fe/Makefile
 rename drivers/platform/x86/{ => intel/int33fe}/intel_cht_int33fe_common.c (100%)
 rename drivers/platform/x86/{ => intel/int33fe}/intel_cht_int33fe_common.h (100%)
 rename drivers/platform/x86/{ => intel/int33fe}/intel_cht_int33fe_microb.c (100%)
 rename drivers/platform/x86/{ => intel/int33fe}/intel_cht_int33fe_typec.c (100%)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 79d095c0ab61..7d385c3b2239 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -687,30 +687,6 @@ config INTEL_ATOMISP2_PM
 	  To compile this driver as a module, choose M here: the module
 	  will be called intel_atomisp2_pm.
 
-config INTEL_CHT_INT33FE
-	tristate "Intel Cherry Trail ACPI INT33FE Driver"
-	depends on X86 && ACPI && I2C && REGULATOR
-	depends on CHARGER_BQ24190=y || (CHARGER_BQ24190=m && m)
-	depends on USB_ROLES_INTEL_XHCI=y || (USB_ROLES_INTEL_XHCI=m && m)
-	depends on TYPEC_MUX_PI3USB30532=y || (TYPEC_MUX_PI3USB30532=m && m)
-	help
-	  This driver add support for the INT33FE ACPI device found on
-	  some Intel Cherry Trail devices.
-
-	  There are two kinds of INT33FE ACPI device possible: for hardware
-	  with USB Type-C and Micro-B connectors. This driver supports both.
-
-	  The INT33FE ACPI device has a CRS table with I2cSerialBusV2
-	  resources for Fuel Gauge Controller and (in the Type-C variant)
-	  FUSB302 USB Type-C Controller and PI3USB30532 USB switch.
-	  This driver instantiates i2c-clients for these, so that standard
-	  i2c drivers for these chips can bind to the them.
-
-	  If you enable this driver it is advised to also select
-	  CONFIG_BATTERY_BQ27XXX=m or CONFIG_BATTERY_BQ27XXX_I2C=m for Micro-B
-	  device and CONFIG_TYPEC_FUSB302=m and CONFIG_BATTERY_MAX17042=m
-	  for Type-C device.
-
 config INTEL_HID_EVENT
 	tristate "INTEL HID Event"
 	depends on ACPI
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index e03b59ce3f9f..7ee369aab10d 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -70,10 +70,6 @@ obj-$(CONFIG_X86_PLATFORM_DRIVERS_INTEL)		+= intel/
 
 obj-$(CONFIG_INTEL_ATOMISP2_LED)	+= intel_atomisp2_led.o
 obj-$(CONFIG_INTEL_ATOMISP2_PM)		+= intel_atomisp2_pm.o
-obj-$(CONFIG_INTEL_CHT_INT33FE)		+= intel_cht_int33fe.o
-intel_cht_int33fe-objs			:= intel_cht_int33fe_common.o \
-					   intel_cht_int33fe_typec.o \
-					   intel_cht_int33fe_microb.o
 obj-$(CONFIG_INTEL_HID_EVENT)		+= intel-hid.o
 obj-$(CONFIG_INTEL_INT0002_VGPIO)	+= intel_int0002_vgpio.o
 obj-$(CONFIG_INTEL_MENLOW)		+= intel_menlow.o
diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig
index 33f2dab03d3d..f2eef337eb98 100644
--- a/drivers/platform/x86/intel/Kconfig
+++ b/drivers/platform/x86/intel/Kconfig
@@ -16,6 +16,7 @@ menuconfig X86_PLATFORM_DRIVERS_INTEL
 
 if X86_PLATFORM_DRIVERS_INTEL
 
+source "drivers/platform/x86/intel/int33fe/Kconfig"
 source "drivers/platform/x86/intel/int3472/Kconfig"
 
 endif # X86_PLATFORM_DRIVERS_INTEL
diff --git a/drivers/platform/x86/intel/Makefile b/drivers/platform/x86/intel/Makefile
index 3ac795d810f1..0653055942d5 100644
--- a/drivers/platform/x86/intel/Makefile
+++ b/drivers/platform/x86/intel/Makefile
@@ -4,4 +4,5 @@
 # Intel x86 Platform-Specific Drivers
 #
 
+obj-$(CONFIG_INTEL_CHT_INT33FE)		+= int33fe/
 obj-$(CONFIG_INTEL_SKL_INT3472)		+= int3472/
diff --git a/drivers/platform/x86/intel/int33fe/Kconfig b/drivers/platform/x86/intel/int33fe/Kconfig
new file mode 100644
index 000000000000..2f7329a2e399
--- /dev/null
+++ b/drivers/platform/x86/intel/int33fe/Kconfig
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INTEL_CHT_INT33FE
+	tristate "Intel Cherry Trail ACPI INT33FE Driver"
+	depends on X86 && ACPI && I2C && REGULATOR
+	depends on CHARGER_BQ24190=y || (CHARGER_BQ24190=m && m)
+	depends on USB_ROLES_INTEL_XHCI=y || (USB_ROLES_INTEL_XHCI=m && m)
+	depends on TYPEC_MUX_PI3USB30532=y || (TYPEC_MUX_PI3USB30532=m && m)
+	help
+	  This driver add support for the INT33FE ACPI device found on
+	  some Intel Cherry Trail devices.
+
+	  There are two kinds of INT33FE ACPI device possible: for hardware
+	  with USB Type-C and Micro-B connectors. This driver supports both.
+
+	  The INT33FE ACPI device has a CRS table with I2cSerialBusV2
+	  resources for Fuel Gauge Controller and (in the Type-C variant)
+	  FUSB302 USB Type-C Controller and PI3USB30532 USB switch.
+	  This driver instantiates i2c-clients for these, so that standard
+	  i2c drivers for these chips can bind to the them.
+
+	  If you enable this driver it is advised to also select
+	  CONFIG_BATTERY_BQ27XXX=m or CONFIG_BATTERY_BQ27XXX_I2C=m for Micro-B
+	  device and CONFIG_TYPEC_FUSB302=m and CONFIG_BATTERY_MAX17042=m
+	  for Type-C device.
diff --git a/drivers/platform/x86/intel/int33fe/Makefile b/drivers/platform/x86/intel/int33fe/Makefile
new file mode 100644
index 000000000000..cc11183ce179
--- /dev/null
+++ b/drivers/platform/x86/intel/int33fe/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_INTEL_CHT_INT33FE)		+= intel_cht_int33fe.o
+intel_cht_int33fe-objs			:= intel_cht_int33fe_common.o \
+					   intel_cht_int33fe_typec.o \
+					   intel_cht_int33fe_microb.o
diff --git a/drivers/platform/x86/intel_cht_int33fe_common.c b/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.c
similarity index 100%
rename from drivers/platform/x86/intel_cht_int33fe_common.c
rename to drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.c
diff --git a/drivers/platform/x86/intel_cht_int33fe_common.h b/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.h
similarity index 100%
rename from drivers/platform/x86/intel_cht_int33fe_common.h
rename to drivers/platform/x86/intel/int33fe/intel_cht_int33fe_common.h
diff --git a/drivers/platform/x86/intel_cht_int33fe_microb.c b/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_microb.c
similarity index 100%
rename from drivers/platform/x86/intel_cht_int33fe_microb.c
rename to drivers/platform/x86/intel/int33fe/intel_cht_int33fe_microb.c
diff --git a/drivers/platform/x86/intel_cht_int33fe_typec.c b/drivers/platform/x86/intel/int33fe/intel_cht_int33fe_typec.c
similarity index 100%
rename from drivers/platform/x86/intel_cht_int33fe_typec.c
rename to drivers/platform/x86/intel/int33fe/intel_cht_int33fe_typec.c

From 71d69e82f4168713afd89804d6e5f52d0e3848a2 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 21 Jun 2021 21:36:48 +0200
Subject: [PATCH 2489/3292] platform/x86: think-lmi: Return EINVAL when kbdlang
 gets set to a 0 length string
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 0ddcf3a6b442 ("platform/x86: think-lmi: Avoid potential read before
start of the buffer") moved the length == 0 up to before stripping the '\n'
which typically gets added when users echo a value to a sysfs-attribute
from the shell.

This avoids a potential buffer-underrun, but it also causes a behavioral
change, prior to this change "echo > kbdlang", iow writing just a single
'\n' would result in an EINVAL error, but after the change this gets
accepted setting kbdlang to an empty string.

Fix this by replacing the manual '\n' check with using strchrnul() to get
the length till '\n' or terminating 0 in one go; and then do the
length != 0 check after this.

Fixes: 0ddcf3a6b442 ("platform/x86: think-lmi: Avoid potential read before start of the buffer")
Reported-by: Juha Leppänen <juha_efku@dnainternet.net>
Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210621193648.44138-1-hdegoede@redhat.com
---
 drivers/platform/x86/think-lmi.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index c6c9fbb8a53e..b57061079288 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -442,14 +442,9 @@ static ssize_t kbdlang_store(struct kobject *kobj,
 	struct tlmi_pwd_setting *setting = to_tlmi_pwd_setting(kobj);
 	int length;
 
-	length = strlen(buf);
-	if (!length)
-		return -EINVAL;
-
-	if (buf[length-1] == '\n')
-		length--;
-
-	if (length >= TLMI_LANG_MAXLEN)
+	/* Calculate length till '\n' or terminating 0 */
+	length = strchrnul(buf, '\n') - buf;
+	if (!length || length >= TLMI_LANG_MAXLEN)
 		return -EINVAL;
 
 	memcpy(setting->kbdlang, buf, length);

From 534799097a777e82910f77a4f9d289c815a9a64e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Jun 2021 11:45:11 +0200
Subject: [PATCH 2490/3292] netfilter: nf_tables: skip netlink portID
 validation if zero

nft_table_lookup() allows us to obtain the table object by the name and
the family. The netlink portID validation needs to be skipped for the
dump path, since the ownership only applies to commands to update the
given table. Skip validation if the specified netlink PortID is zero
when calling nft_table_lookup().

Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ca9ec8721e6c..1d62b1a83299 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -571,7 +571,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
 		    table->family == family &&
 		    nft_active_genmask(table, genmask)) {
 			if (nft_table_has_owner(table) &&
-			    table->nlpid != nlpid)
+			    nlpid && table->nlpid != nlpid)
 				return ERR_PTR(-EPERM);
 
 			return table;

From e31f072ffab0397a328b31a9589dcf9733dc9c72 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 22 Jun 2021 12:10:49 +0200
Subject: [PATCH 2491/3292] netfilter: nf_tables: do not allow to delete table
 with owner by handle

nft_table_lookup_byhandle() also needs to validate the netlink PortID
owner when deleting a table by handle.

Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1d62b1a83299..fcb15b8904e8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -583,7 +583,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
 
 static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
 						   const struct nlattr *nla,
-						   u8 genmask)
+						   u8 genmask, u32 nlpid)
 {
 	struct nftables_pernet *nft_net;
 	struct nft_table *table;
@@ -591,8 +591,13 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
 	nft_net = nft_pernet(net);
 	list_for_each_entry(table, &nft_net->tables, list) {
 		if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
-		    nft_active_genmask(table, genmask))
+		    nft_active_genmask(table, genmask)) {
+			if (nft_table_has_owner(table) &&
+			    nlpid && table->nlpid != nlpid)
+				return ERR_PTR(-EPERM);
+
 			return table;
+		}
 	}
 
 	return ERR_PTR(-ENOENT);
@@ -1279,7 +1284,8 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
 
 	if (nla[NFTA_TABLE_HANDLE]) {
 		attr = nla[NFTA_TABLE_HANDLE];
-		table = nft_table_lookup_byhandle(net, attr, genmask);
+		table = nft_table_lookup_byhandle(net, attr, genmask,
+						  NETLINK_CB(skb).portid);
 	} else {
 		attr = nla[NFTA_TABLE_NAME];
 		table = nft_table_lookup(net, attr, family, genmask,

From 7119f02b5d3449cea7736161590ae45289a57963 Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Sat, 12 Jun 2021 14:32:34 +0300
Subject: [PATCH 2492/3292] iwlwifi: mvm: support BIOS enable/disable for 11ax
 in Russia

Read the new BIOS DSM and Pass to FW if to disable\enable
11ax for Russia according to the BIOS key. This is
needed to enable OEMs to control enable/disable 11ax in Russia.
Also add support for future "enable 11ax in country X" features.

Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.a705f7cedff8.I580f1021cabcc37e88f5ec5e9a6bbf00aae514b6@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 50 +++++++++++++++-----
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h |  9 ++++
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c  | 26 ++++++----
 3 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index e31bba836c6f..8cf7bc3aa09a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -163,6 +163,27 @@ int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func,
 }
 IWL_EXPORT_SYMBOL(iwl_acpi_get_dsm_u8);
 
+/*
+ * Evaluate a DSM with no arguments and a u32 return value,
+ */
+int iwl_acpi_get_dsm_u32(struct device *dev, int rev, int func,
+			 const guid_t *guid, u32 *value)
+{
+	int ret;
+	u64 val;
+
+	ret = iwl_acpi_get_dsm_integer(dev, rev, func,
+				       guid, &val, sizeof(u32));
+
+	if (ret < 0)
+		return ret;
+
+	/* cast val (u64) to be u32 */
+	*value = (u32)val;
+	return 0;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_dsm_u32);
+
 union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
 					 union acpi_object *data,
 					 int data_size, int *tbl_rev)
@@ -734,30 +755,35 @@ out:
 
 __le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
 {
-	u32 ret;
+	int ret;
+	u8 value;
 	__le32 config_bitmap = 0;
 
 	/*
 	 ** Evaluate func 'DSM_FUNC_ENABLE_INDONESIA_5G2'
 	 */
-	ret = iwl_acpi_eval_dsm_func(fwrt->dev, DSM_FUNC_ENABLE_INDONESIA_5G2);
+	ret = iwl_acpi_get_dsm_u8(fwrt->dev, 0,
+				  DSM_FUNC_ENABLE_INDONESIA_5G2,
+				  &iwl_guid, &value);
 
-	if (ret == DSM_VALUE_INDONESIA_ENABLE)
+	if (!ret && value == DSM_VALUE_INDONESIA_ENABLE)
 		config_bitmap |=
 			cpu_to_le32(LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK);
 
 	/*
 	 ** Evaluate func 'DSM_FUNC_DISABLE_SRD'
 	 */
-	ret = iwl_acpi_eval_dsm_func(fwrt->dev, DSM_FUNC_DISABLE_SRD);
-
-	if (ret == DSM_VALUE_SRD_PASSIVE)
-		config_bitmap |=
-			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK);
-
-	else if (ret == DSM_VALUE_SRD_DISABLE)
-		config_bitmap |=
-			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
+	ret = iwl_acpi_get_dsm_u8(fwrt->dev, 0,
+				  DSM_FUNC_DISABLE_SRD,
+				  &iwl_guid, &value);
+	if (!ret) {
+		if (value == DSM_VALUE_SRD_PASSIVE)
+			config_bitmap |=
+				cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK);
+		else if (value == DSM_VALUE_SRD_DISABLE)
+			config_bitmap |=
+				cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
+	}
 
 	return config_bitmap;
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index d16e6ec08c9f..9fe64476083d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -116,6 +116,9 @@ void *iwl_acpi_get_object(struct device *dev, acpi_string method);
 int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func,
 			const guid_t *guid, u8 *value);
 
+int iwl_acpi_get_dsm_u32(struct device *dev, int rev, int func,
+			 const guid_t *guid, u32 *value);
+
 union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
 					 union acpi_object *data,
 					 int data_size, int *tbl_rev);
@@ -182,6 +185,12 @@ static inline int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func,
 	return -ENOENT;
 }
 
+static inline int iwl_acpi_get_dsm_u32(struct device *dev, int rev, int func,
+				       const guid_t *guid, u32 *value)
+{
+	return -ENOENT;
+}
+
 static inline union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
 						       union acpi_object *data,
 						       int data_size,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 8aa5f1a2c58c..9f2a5dee59d8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1139,14 +1139,19 @@ static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
 
 static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 {
-	int cmd_ret;
+	int ret;
+	u32 value;
 	struct iwl_lari_config_change_cmd_v3 cmd = {};
 
 	cmd.config_bitmap = iwl_acpi_get_lari_config_bitmap(&mvm->fwrt);
 
+	ret = iwl_acpi_get_dsm_u32((&mvm->fwrt)->dev, 0, DSM_FUNC_11AX_ENABLEMENT,
+				   &iwl_guid, &value);
+	if (!ret)
+		cmd.oem_11ax_allow_bitmap = cpu_to_le32(value);
 	/* apply more config masks here */
 
-	if (cmd.config_bitmap) {
+	if (cmd.config_bitmap || cmd.oem_11ax_allow_bitmap) {
 		size_t cmd_size;
 		u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw,
 						   REGULATORY_AND_NVM_GROUP,
@@ -1159,16 +1164,17 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 			cmd_size = sizeof(struct iwl_lari_config_change_cmd_v1);
 
 		IWL_DEBUG_RADIO(mvm,
-				"sending LARI_CONFIG_CHANGE, config_bitmap=0x%x\n",
-				le32_to_cpu(cmd.config_bitmap));
-		cmd_ret = iwl_mvm_send_cmd_pdu(mvm,
-					       WIDE_ID(REGULATORY_AND_NVM_GROUP,
-						       LARI_CONFIG_CHANGE),
-					       0, cmd_size, &cmd);
-		if (cmd_ret < 0)
+				"sending LARI_CONFIG_CHANGE, config_bitmap=0x%x, oem_11ax_allow_bitmap=0x%x\n",
+				le32_to_cpu(cmd.config_bitmap),
+				le32_to_cpu(cmd.oem_11ax_allow_bitmap));
+		ret = iwl_mvm_send_cmd_pdu(mvm,
+					   WIDE_ID(REGULATORY_AND_NVM_GROUP,
+						   LARI_CONFIG_CHANGE),
+					   0, cmd_size, &cmd);
+		if (ret < 0)
 			IWL_DEBUG_RADIO(mvm,
 					"Failed to send LARI_CONFIG_CHANGE (%d)\n",
-					cmd_ret);
+					ret);
 	}
 }
 #else /* CONFIG_ACPI */

From c4ae8b9d0f3217308766e1ed3eaad14054b02467 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 12 Jun 2021 14:32:35 +0300
Subject: [PATCH 2493/3292] iwlwifi: mvm: pass the clock type to
 iwl_mvm_get_sync_time()

Allow the caller to pass the clock type to iwl_mvm_get_sync_time() so
callers with different needs can decide whether to use boottime or
realtime.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.093f6660e69b.Ifd2328ac2130269f729c9c1bceec44ba01d79e88@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c  |  4 ++--
 .../net/wireless/intel/iwlwifi/mvm/ftm-initiator.c    |  5 +++--
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h          |  5 ++++-
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c        | 11 ++++++++---
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 38d0bfb649cc..7d9faeffd154 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -460,7 +460,7 @@ static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file,
 	int pos = 0;
 
 	mutex_lock(&mvm->mutex);
-	iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os);
+	iwl_mvm_get_sync_time(mvm, CLOCK_BOOTTIME, &curr_gp2, &curr_os, NULL);
 	mutex_unlock(&mvm->mutex);
 
 	do_div(curr_os, NSEC_PER_USEC);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index a456b8a0ae58..a24e6c0490e9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <linux/etherdevice.h>
 #include <linux/math64.h>
@@ -879,7 +879,8 @@ static u64 iwl_mvm_ftm_get_host_time(struct iwl_mvm *mvm, __le32 fw_gp2_ts)
 	u32 curr_gp2, diff;
 	u64 now_from_boot_ns;
 
-	iwl_mvm_get_sync_time(mvm, &curr_gp2, &now_from_boot_ns);
+	iwl_mvm_get_sync_time(mvm, CLOCK_BOOTTIME, &curr_gp2,
+			      &now_from_boot_ns, NULL);
 
 	if (curr_gp2 >= gp2_ts)
 		diff = curr_gp2 - gp2_ts;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 4d9d4d6892fc..b137f8130b6d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -16,6 +16,8 @@
 #include <linux/thermal.h>
 #endif
 
+#include <linux/ktime.h>
+
 #include "iwl-op-mode.h"
 #include "iwl-trans.h"
 #include "fw/notif-wait.h"
@@ -1450,7 +1452,8 @@ u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac);
 void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm);
 u8 first_antenna(u8 mask);
 u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx);
-void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime);
+void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, int clock_type, u32 *gp2,
+			   u64 *boottime, ktime_t *realtime);
 u32 iwl_mvm_get_systime(struct iwl_mvm *mvm);
 
 /* Tx / Host Commands */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index c566be99a4c7..99105272139d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
  */
@@ -1398,7 +1398,8 @@ u32 iwl_mvm_get_systime(struct iwl_mvm *mvm)
 	return iwl_read_prph(mvm->trans, reg_addr);
 }
 
-void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
+void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, int clock_type,
+			   u32 *gp2, u64 *boottime, ktime_t *realtime)
 {
 	bool ps_disabled;
 
@@ -1412,7 +1413,11 @@ void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
 	}
 
 	*gp2 = iwl_mvm_get_systime(mvm);
-	*boottime = ktime_get_boottime_ns();
+
+	if (clock_type == CLOCK_BOOTTIME && boottime)
+		*boottime = ktime_get_boottime_ns();
+	else if (clock_type == CLOCK_REALTIME && realtime)
+		*realtime = ktime_get_real();
 
 	if (!ps_disabled) {
 		mvm->ps_disabled = ps_disabled;

From e348b8a62c147a2def03ebfa8218f1c8de157bf8 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 12 Jun 2021 14:32:36 +0300
Subject: [PATCH 2494/3292] iwlwifi: mvm: fix indentation in some scan
 functions

Two functions had indentation mistakes which were causing sparse
warnings.  Fix them.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.12f3b9fea57e.I42a7556d43de78ec6387e3a699eca10482b0485d@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 5a0696c44f6d..0368b7101222 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -2327,9 +2327,9 @@ static int iwl_mvm_scan_umac_v12(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 					     &scan_p->general_params,
 					     gen_flags);
 
-	 ret = iwl_mvm_fill_scan_sched_params(params,
-					      scan_p->periodic_params.schedule,
-					      &scan_p->periodic_params.delay);
+	ret = iwl_mvm_fill_scan_sched_params(params,
+					     scan_p->periodic_params.schedule,
+					     &scan_p->periodic_params.delay);
 	if (ret)
 		return ret;
 
@@ -2362,9 +2362,9 @@ static int iwl_mvm_scan_umac_v14(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 					     &scan_p->general_params,
 					     gen_flags);
 
-	 ret = iwl_mvm_fill_scan_sched_params(params,
-					      scan_p->periodic_params.schedule,
-					      &scan_p->periodic_params.delay);
+	ret = iwl_mvm_fill_scan_sched_params(params,
+					     scan_p->periodic_params.schedule,
+					     &scan_p->periodic_params.delay);
 	if (ret)
 		return ret;
 

From 7a9a44456d742bdf66a3394a6e718c6cece20f69 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 12 Jun 2021 14:32:37 +0300
Subject: [PATCH 2495/3292] iwlwifi: remove unused REMOTE_WAKE_CONFIG_CMD
 definitions

We don't use this command anymore and it is going to be removed from
the FW.  Remove all related definitions.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.549b282ae9a4.Iced05882d73b869e19f50e6a6e7bf9ce6cd7899b@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../wireless/intel/iwlwifi/fw/api/commands.h  |  5 --
 .../net/wireless/intel/iwlwifi/fw/api/d3.h    | 51 +------------------
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c  |  1 -
 3 files changed, 1 insertion(+), 56 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
index c625d319142e..ce060c3dfd7b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
@@ -534,11 +534,6 @@ enum iwl_legacy_cmds {
 	 */
 	OFFLOADS_QUERY_CMD = 0xd5,
 
-	/**
-	 * @REMOTE_WAKE_CONFIG_CMD: &struct iwl_wowlan_remote_wake_config
-	 */
-	REMOTE_WAKE_CONFIG_CMD = 0xd6,
-
 	/**
 	 * @D0I3_END_CMD: End D0i3/D3 state, no command data
 	 */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index 758639084e0c..6488c0f8b471 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
  */
@@ -683,55 +683,6 @@ static inline u8 iwlmvm_wowlan_gtk_idx(struct iwl_wowlan_gtk_status *gtk)
 	return gtk->key_flags & IWL_WOWLAN_GTK_IDX_MASK;
 }
 
-#define IWL_WOWLAN_TCP_MAX_PACKET_LEN		64
-#define IWL_WOWLAN_REMOTE_WAKE_MAX_PACKET_LEN	128
-#define IWL_WOWLAN_REMOTE_WAKE_MAX_TOKENS	2048
-
-struct iwl_tcp_packet_info {
-	__le16 tcp_pseudo_header_checksum;
-	__le16 tcp_payload_length;
-} __packed; /* TCP_PACKET_INFO_API_S_VER_2 */
-
-struct iwl_tcp_packet {
-	struct iwl_tcp_packet_info info;
-	u8 rx_mask[IWL_WOWLAN_MAX_PATTERN_LEN / 8];
-	u8 data[IWL_WOWLAN_TCP_MAX_PACKET_LEN];
-} __packed; /* TCP_PROTOCOL_PACKET_API_S_VER_1 */
-
-struct iwl_remote_wake_packet {
-	struct iwl_tcp_packet_info info;
-	u8 rx_mask[IWL_WOWLAN_MAX_PATTERN_LEN / 8];
-	u8 data[IWL_WOWLAN_REMOTE_WAKE_MAX_PACKET_LEN];
-} __packed; /* TCP_PROTOCOL_PACKET_API_S_VER_1 */
-
-struct iwl_wowlan_remote_wake_config {
-	__le32 connection_max_time; /* unused */
-	/* TCP_PROTOCOL_CONFIG_API_S_VER_1 */
-	u8 max_syn_retries;
-	u8 max_data_retries;
-	u8 tcp_syn_ack_timeout;
-	u8 tcp_ack_timeout;
-
-	struct iwl_tcp_packet syn_tx;
-	struct iwl_tcp_packet synack_rx;
-	struct iwl_tcp_packet keepalive_ack_rx;
-	struct iwl_tcp_packet fin_tx;
-
-	struct iwl_remote_wake_packet keepalive_tx;
-	struct iwl_remote_wake_packet wake_rx;
-
-	/* REMOTE_WAKE_OFFSET_INFO_API_S_VER_1 */
-	u8 sequence_number_offset;
-	u8 sequence_number_length;
-	u8 token_offset;
-	u8 token_length;
-	/* REMOTE_WAKE_PROTOCOL_PARAMS_API_S_VER_1 */
-	__le32 initial_sequence_number;
-	__le16 keepalive_interval;
-	__le16 num_tokens;
-	u8 tokens[IWL_WOWLAN_REMOTE_WAKE_MAX_TOKENS];
-} __packed; /* REMOTE_WAKE_CONFIG_API_S_VER_2 */
-
 /* TODO: NetDetect API */
 
 #endif /* __iwl_fw_api_d3_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index ebed82c590e5..af5688af9cfb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -445,7 +445,6 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = {
 	HCMD_NAME(D3_CONFIG_CMD),
 	HCMD_NAME(PROT_OFFLOAD_CONFIG_CMD),
 	HCMD_NAME(OFFLOADS_QUERY_CMD),
-	HCMD_NAME(REMOTE_WAKE_CONFIG_CMD),
 	HCMD_NAME(MATCH_FOUND_NOTIFICATION),
 	HCMD_NAME(DTS_MEASUREMENT_NOTIFICATION),
 	HCMD_NAME(WOWLAN_PATTERNS),

From 8835a64f74c46baebfc946cd5a2c861b866ebcee Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 14:32:38 +0300
Subject: [PATCH 2496/3292] iwlwifi: mvm: don't change band on bound PHY
 contexts

When we have a P2P Device active, we attempt to only change the
PHY context it uses when we get a new remain-on-channel, if the
P2P Device is the only user of the PHY context.

This is fine if we're switching within a band, but if we're
switching bands then the switch implies a removal and re-add
of the PHY context, which isn't permitted by the firmware while
it's bound to an interface.

Fix the code to skip the unbind/release/... cycle only if the
band doesn't change (or we have old devices that can switch the
band on the fly as well.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.e9ac313f70f3.I713b9d109957df7e7d9ed0861d5377ce3f8fccd3@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 607d5d564928..141d9fc299b0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3800,6 +3800,7 @@ static int iwl_mvm_roc(struct ieee80211_hw *hw,
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct cfg80211_chan_def chandef;
 	struct iwl_mvm_phy_ctxt *phy_ctxt;
+	bool band_change_removal;
 	int ret, i;
 
 	IWL_DEBUG_MAC80211(mvm, "enter (%d, %d, %d)\n", channel->hw_value,
@@ -3880,19 +3881,30 @@ static int iwl_mvm_roc(struct ieee80211_hw *hw,
 	cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT);
 
 	/*
-	 * Change the PHY context configuration as it is currently referenced
-	 * only by the P2P Device MAC
+	 * Check if the remain-on-channel is on a different band and that
+	 * requires context removal, see iwl_mvm_phy_ctxt_changed(). If
+	 * so, we'll need to release and then re-configure here, since we
+	 * must not remove a PHY context that's part of a binding.
 	 */
-	if (mvmvif->phy_ctxt->ref == 1) {
+	band_change_removal =
+		fw_has_capa(&mvm->fw->ucode_capa,
+			    IWL_UCODE_TLV_CAPA_BINDING_CDB_SUPPORT) &&
+		mvmvif->phy_ctxt->channel->band != chandef.chan->band;
+
+	if (mvmvif->phy_ctxt->ref == 1 && !band_change_removal) {
+		/*
+		 * Change the PHY context configuration as it is currently
+		 * referenced only by the P2P Device MAC (and we can modify it)
+		 */
 		ret = iwl_mvm_phy_ctxt_changed(mvm, mvmvif->phy_ctxt,
 					       &chandef, 1, 1);
 		if (ret)
 			goto out_unlock;
 	} else {
 		/*
-		 * The PHY context is shared with other MACs. Need to remove the
-		 * P2P Device from the binding, allocate an new PHY context and
-		 * create a new binding
+		 * The PHY context is shared with other MACs (or we're trying to
+		 * switch bands), so remove the P2P Device from the binding,
+		 * allocate an new PHY context and create a new binding.
 		 */
 		phy_ctxt = iwl_mvm_get_free_phy_ctxt(mvm);
 		if (!phy_ctxt) {

From f00c3f9e2cfc144d5f40803ea3cd0d0cb09745cc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 14:32:39 +0300
Subject: [PATCH 2497/3292] iwlwifi: pcie: handle pcim_iomap_table() failures
 better

pcim_iomap_table() might return NULL, so we shouldn't unconditionally
dereference the return value by taking the [0] entry.

Handle this better by checking for NULL first, and then separately
checking if the [0] entry is NULL.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.9aa4f0e3574a.I458b283f203d5f927f00be1bfbd4b8ebf11c5ae4@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 239bc177a3e5..1009e3d254cd 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3413,6 +3413,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 	struct iwl_trans *trans;
 	int ret, addr_size;
 	const struct iwl_trans_ops *ops = &trans_ops_pcie_gen2;
+	void __iomem * const *table;
 
 	if (!cfg_trans->gen2)
 		ops = &trans_ops_pcie;
@@ -3485,9 +3486,16 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 		goto out_no_pci;
 	}
 
-	trans_pcie->hw_base = pcim_iomap_table(pdev)[0];
-	if (!trans_pcie->hw_base) {
+	table = pcim_iomap_table(pdev);
+	if (!table) {
 		dev_err(&pdev->dev, "pcim_iomap_table failed\n");
+		ret = -ENOMEM;
+		goto out_no_pci;
+	}
+
+	trans_pcie->hw_base = table[0];
+	if (!trans_pcie->hw_base) {
+		dev_err(&pdev->dev, "couldn't find IO mem in first BAR\n");
 		ret = -ENODEV;
 		goto out_no_pci;
 	}

From 5cc816ef9db1fe03f73e56e9d8f118add9c6efe4 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 12 Jun 2021 14:32:40 +0300
Subject: [PATCH 2498/3292] iwlwifi: increase PNVM load timeout

The FW has a watchdog of 200ms in the PNVM load flow, so the driver
should have a slightly higher timeout.  Change the timeout from 100ms
to 250ms.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Fixes: 70d3ca86b025 ("iwlwifi: mvm: ring the doorbell and wait for PNVM load completion")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.ba22aec1e2be.I36bfadc28c480f4fc57266c075a79e8ea4a6934f@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/pnvm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h
index e4f91bce222d..61d3d4e0b7d9 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /******************************************************************************
  *
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
  *
  *****************************************************************************/
 
@@ -10,7 +10,7 @@
 
 #include "fw/notif-wait.h"
 
-#define MVM_UCODE_PNVM_TIMEOUT	(HZ / 10)
+#define MVM_UCODE_PNVM_TIMEOUT	(HZ / 4)
 
 int iwl_pnvm_load(struct iwl_trans *trans,
 		  struct iwl_notif_wait_data *notif_wait);

From 7e2c14372bd89ffe4cefd678b8b1743cac376f4c Mon Sep 17 00:00:00 2001
From: Matti Gottlieb <matti.gottlieb@intel.com>
Date: Sat, 12 Jun 2021 14:32:41 +0300
Subject: [PATCH 2499/3292] iwlwifi: pcie: Add support for AX231 radio module
 with Ma devices

Add support for AX231 radio modules, which we call Fm.
These modules can be used with the Ma family of devices
and above.

Signed-off-by: Matti Gottlieb <matti.gottlieb@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.c1fdd153d686.I7ee0485c52fb429de1fe171cb6dc0ae593a26788@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 12 ++++++++++++
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  3 +++
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   |  5 +++++
 3 files changed, 20 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index c2315dea9a23..0256d0042f71 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -47,6 +47,7 @@
 #define IWL_MA_A_GF_A_FW_PRE		"iwlwifi-ma-a0-gf-a0-"
 #define IWL_MA_A_GF4_A_FW_PRE		"iwlwifi-ma-a0-gf4-a0-"
 #define IWL_MA_A_MR_A_FW_PRE		"iwlwifi-ma-a0-mr-a0-"
+#define IWL_MA_A_FM_A_FW_PRE		"iwlwifi-ma-a0-fm-a0-"
 #define IWL_SNJ_A_MR_A_FW_PRE		"iwlwifi-SoSnj-a0-mr-a0-"
 #define IWL_BZ_A_HR_B_FW_PRE		"iwlwifi-bz-a0-hr-b0-"
 #define IWL_BZ_A_GF_A_FW_PRE		"iwlwifi-bz-a0-gf-a0-"
@@ -93,6 +94,8 @@
 	IWL_MA_A_GF4_A_FW_PRE __stringify(api) ".ucode"
 #define IWL_MA_A_MR_A_FW_MODULE_FIRMWARE(api) \
 	IWL_MA_A_MR_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_MA_A_FM_A_FW_MODULE_FIRMWARE(api)		\
+	IWL_MA_A_FM_A_FW_PRE __stringify(api) ".ucode"
 #define IWL_SNJ_A_MR_A_MODULE_FIRMWARE(api) \
 	IWL_SNJ_A_MR_A_FW_PRE __stringify(api) ".ucode"
 #define IWL_BZ_A_HR_B_MODULE_FIRMWARE(api) \
@@ -389,6 +392,7 @@ const char iwl_ax201_name[] = "Intel(R) Wi-Fi 6 AX201 160MHz";
 const char iwl_ax203_name[] = "Intel(R) Wi-Fi 6 AX203";
 const char iwl_ax211_name[] = "Intel(R) Wi-Fi 6E AX211 160MHz";
 const char iwl_ax221_name[] = "Intel(R) Wi-Fi 6E AX221 160MHz";
+const char iwl_ax231_name[] = "Intel(R) Wi-Fi 6E AX231 160MHz";
 const char iwl_ax411_name[] = "Intel(R) Wi-Fi 6E AX411 160MHz";
 
 const char iwl_ax200_killer_1650w_name[] =
@@ -724,6 +728,13 @@ const struct iwl_cfg iwl_cfg_ma_a0_mr_a0 = {
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
+const struct iwl_cfg iwl_cfg_ma_a0_fm_a0 = {
+	.fw_name_pre = IWL_MA_A_FM_A_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+};
+
 const struct iwl_cfg iwl_cfg_snj_a0_mr_a0 = {
 	.fw_name_pre = IWL_SNJ_A_MR_A_FW_PRE,
 	.uhb_supported = true,
@@ -797,6 +808,7 @@ MODULE_FIRMWARE(IWL_MA_A_HR_B_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_MA_A_GF_A_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_MA_A_GF4_A_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_MA_A_MR_A_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_MA_A_FM_A_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_SNJ_A_MR_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_BZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_BZ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index b35ffdfdf14b..fc2ba1ce4370 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -426,6 +426,7 @@ struct iwl_cfg {
 #define IWL_CFG_RF_TYPE_HR1		0x10C
 #define IWL_CFG_RF_TYPE_GF		0x10D
 #define IWL_CFG_RF_TYPE_MR		0x110
+#define IWL_CFG_RF_TYPE_FM		0x112
 
 #define IWL_CFG_RF_ID_TH		0x1
 #define IWL_CFG_RF_ID_TH1		0x1
@@ -507,6 +508,7 @@ extern const char iwl_ax210_killer_1675w_name[];
 extern const char iwl_ax210_killer_1675x_name[];
 extern const char iwl_ax211_name[];
 extern const char iwl_ax221_name[];
+extern const char iwl_ax231_name[];
 extern const char iwl_ax411_name[];
 #if IS_ENABLED(CONFIG_IWLDVM)
 extern const struct iwl_cfg iwl5300_agn_cfg;
@@ -613,6 +615,7 @@ extern const struct iwl_cfg iwl_cfg_ma_a0_hr_b0;
 extern const struct iwl_cfg iwl_cfg_ma_a0_gf_a0;
 extern const struct iwl_cfg iwl_cfg_ma_a0_gf4_a0;
 extern const struct iwl_cfg iwl_cfg_ma_a0_mr_a0;
+extern const struct iwl_cfg iwl_cfg_ma_a0_fm_a0;
 extern const struct iwl_cfg iwl_cfg_snj_a0_mr_a0;
 extern const struct iwl_cfg iwl_cfg_so_a0_hr_a0;
 extern const struct iwl_cfg iwl_cfg_quz_a0_hr_b0;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index d94bd8d732e9..c0765bbd006f 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1029,6 +1029,11 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 		      IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,
 		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
 		      iwl_cfg_ma_a0_mr_a0, iwl_ax221_name),
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY,
+		      IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+		      iwl_cfg_ma_a0_fm_a0, iwl_ax231_name),
 	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
 		      IWL_CFG_MAC_TYPE_SNJ, IWL_CFG_ANY,
 		      IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,

From 57e6492cf0fd2e39feaa7ac39c68383f44bde6ac Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 14:32:42 +0300
Subject: [PATCH 2500/3292] iwlwifi: pcie: print interrupt number, not index

Printing the interrupt index in our local array isn't very
useful in an error message, print the interrupt number (as
also shown in e.g. /proc/interrupts) instead.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.05bc5157e606.Ifb65b5ed2e5296fd8258c40c4287b5443b06d337@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 1009e3d254cd..5b40833932a0 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1648,7 +1648,7 @@ static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans)
 		if (ret)
 			IWL_ERR(trans_pcie->trans,
 				"Failed to set affinity mask for IRQ %d\n",
-				i);
+				trans_pcie->msix_entries[i].vector);
 	}
 }
 

From 163c36150179503dae869f0f17355eedb32b7af4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 14:32:43 +0300
Subject: [PATCH 2501/3292] iwlwifi: pcie: remove CSR_HW_RF_ID_TYPE_CHIP_ID

This is duplicated with CSR_HW_RFID_TYPE so just use the latter
for less typing/shorter lines.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.00b220f4ba53.I1fe216a46e7d9c1316d681daa293064f16ff1899@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-csr.h  |  5 +----
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 12 ++++++------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index db312abd2e09..47e5a17c0f48 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2016 Intel Deutschland GmbH
  */
@@ -325,9 +325,6 @@ enum {
 #define CSR_HW_RF_ID_TYPE_GF		(0x0010D000)
 #define CSR_HW_RF_ID_TYPE_GF4		(0x0010E000)
 
-/* HW_RF CHIP ID  */
-#define CSR_HW_RF_ID_TYPE_CHIP_ID(_val) (((_val) >> 12) & 0xFFF)
-
 /* HW_RF CHIP STEP  */
 #define CSR_HW_RF_STEP(_val) (((_val) >> 8) & 0xF)
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index c0765bbd006f..a92c5f0044cd 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1214,14 +1214,14 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (cfg == &iwlax210_2ax_cfg_so_hr_a0) {
 		if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_TY) {
 			iwl_trans->cfg = &iwlax210_2ax_cfg_ty_gf_a0;
-		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_JF)) {
+		} else if (CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) ==
+			   CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF)) {
 			iwl_trans->cfg = &iwlax210_2ax_cfg_so_jf_b0;
-		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF)) {
+		} else if (CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) ==
+			   CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_GF)) {
 			iwl_trans->cfg = &iwlax211_2ax_cfg_so_gf_a0;
-		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF4)) {
+		} else if (CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) ==
+			   CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_GF4)) {
 			iwl_trans->cfg = &iwlax411_2ax_cfg_so_gf4_a0;
 		}
 	}

From 7e10d7ae960212f84972a2c59dd9a1a5e23fd4a4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 12 Jun 2021 14:32:44 +0300
Subject: [PATCH 2502/3292] iwlwifi: remove duplicate iwl_ax201_cfg_qu_hr
 declaration

This configuration struct is declared twice, remove one of the
declarations.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.a08c905ec25b.Iff706f9d5b7b666e306549c419d04dcd4d81e5fd@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-config.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index fc2ba1ce4370..3e4c6a809595 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -588,7 +588,6 @@ extern const struct iwl_cfg iwl_qu_b0_hr_b0;
 extern const struct iwl_cfg iwl_qu_c0_hr_b0;
 extern const struct iwl_cfg iwl_ax200_cfg_cc;
 extern const struct iwl_cfg iwl_ax201_cfg_qu_hr;
-extern const struct iwl_cfg iwl_ax201_cfg_qu_hr;
 extern const struct iwl_cfg iwl_ax201_cfg_qu_c0_hr_b0;
 extern const struct iwl_cfg iwl_ax201_cfg_quz_hr;
 extern const struct iwl_cfg iwl_ax1650i_cfg_quz_hr;

From a451b823074ca40bda686f3fb48875103e17d7da Mon Sep 17 00:00:00 2001
From: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
Date: Sat, 12 Jun 2021 14:32:45 +0300
Subject: [PATCH 2503/3292] iwlwifi: yoyo: support region TLV version 2

Region TLV version 2 now includes more data, but it is not
relevant for the driver.
In order to support this new version, just mask the new part out.

Signed-off-by: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210612142637.60dd4c60ab49.I44fe02af389d3ab089363bf9bde0d99a4c1ff383@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h |  3 ++-
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c         |  9 ++++++++-
 drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c    | 13 ++++++++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
index 996d5cc5bd9a..5a2d9a1f7e73 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #ifndef __iwl_fw_dbg_tlv_h__
 #define __iwl_fw_dbg_tlv_h__
@@ -11,6 +11,7 @@
 #define IWL_FW_INI_MAX_NAME			32
 #define IWL_FW_INI_MAX_CFG_NAME			64
 #define IWL_FW_INI_DOMAIN_ALWAYS_ON		0
+#define IWL_FW_INI_REGION_V2_MASK		0x0000FFFF
 
 /**
  * struct iwl_fw_ini_hcmd
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index cc4e18ca9566..5a534d70f253 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1933,6 +1933,13 @@ static u32 iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt, struct list_head *list,
 	u32 num_of_ranges, i, size;
 	void *range;
 
+	/*
+	 * The higher part of the ID in version 2 is irrelevant for
+	 * us, so mask it out.
+	 */
+	if (le32_to_cpu(reg->hdr.version) == 2)
+		id &= IWL_FW_INI_REGION_V2_MASK;
+
 	if (!ops->get_num_of_ranges || !ops->get_size || !ops->fill_mem_hdr ||
 	    !ops->fill_range)
 		return 0;
@@ -1957,7 +1964,7 @@ static u32 iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt, struct list_head *list,
 	num_of_ranges = ops->get_num_of_ranges(fwrt, reg_data);
 
 	header = (void *)tlv->data;
-	header->region_id = reg->id;
+	header->region_id = cpu_to_le32(id);
 	header->num_of_ranges = cpu_to_le32(num_of_ranges);
 	header->name_len = cpu_to_le32(IWL_FW_INI_MAX_NAME);
 	memcpy(header->name, reg->name, IWL_FW_INI_MAX_NAME);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 4cd8c39cc3e9..0ddd255a8cc1 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -57,7 +57,7 @@ dbg_ver_table[IWL_DBG_TLV_TYPE_NUM] = {
 	[IWL_DBG_TLV_TYPE_DEBUG_INFO]	= {.min_ver = 1, .max_ver = 1,},
 	[IWL_DBG_TLV_TYPE_BUF_ALLOC]	= {.min_ver = 1, .max_ver = 1,},
 	[IWL_DBG_TLV_TYPE_HCMD]		= {.min_ver = 1, .max_ver = 1,},
-	[IWL_DBG_TLV_TYPE_REGION]	= {.min_ver = 1, .max_ver = 1,},
+	[IWL_DBG_TLV_TYPE_REGION]	= {.min_ver = 1, .max_ver = 2,},
 	[IWL_DBG_TLV_TYPE_TRIGGER]	= {.min_ver = 1, .max_ver = 1,},
 };
 
@@ -178,9 +178,20 @@ static int iwl_dbg_tlv_alloc_region(struct iwl_trans *trans,
 	u32 type = le32_to_cpu(reg->type);
 	u32 tlv_len = sizeof(*tlv) + le32_to_cpu(tlv->length);
 
+	/*
+	 * The higher part of the ID in version 2 is irrelevant for
+	 * us, so mask it out.
+	 */
+	if (le32_to_cpu(reg->hdr.version) == 2)
+		id &= IWL_FW_INI_REGION_V2_MASK;
+
 	if (le32_to_cpu(tlv->length) < sizeof(*reg))
 		return -EINVAL;
 
+	/* for safe use of a string from FW, limit it to IWL_FW_INI_MAX_NAME */
+	IWL_DEBUG_FW(trans, "WRT: parsing region: %.*s\n",
+		     IWL_FW_INI_MAX_NAME, reg->name);
+
 	if (id >= IWL_FW_INI_MAX_REGION_ID) {
 		IWL_ERR(trans, "WRT: Invalid region id %u\n", id);
 		return -EINVAL;

From aa899e683fe537793eb81e06ee93ee8ec7cf3f78 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Jun 2021 10:08:41 +0300
Subject: [PATCH 2504/3292] iwlwifi: pcie: identify the RF module

Identify and print out the RF module to be able to identify
(from logs and through debugfs) which one (and version) is
present on the system.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.cd1ef97b2c04.Iad42a59902a87a50b45b9ce88705863686a83b54@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-prph.h |  4 +-
 .../wireless/intel/iwlwifi/pcie/internal.h    |  5 +-
 .../wireless/intel/iwlwifi/pcie/trans-gen2.c  | 71 +++++++++++++++++++
 .../net/wireless/intel/iwlwifi/pcie/trans.c   | 18 +++++
 4 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 3ce77e4eb7e3..9a9e714bf9af 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016 Intel Deutschland GmbH
  */
@@ -412,6 +412,8 @@ enum {
 #define UREG_DOORBELL_TO_ISR6_RESUME	BIT(19)
 #define UREG_DOORBELL_TO_ISR6_PNVM	BIT(20)
 
+#define CNVI_MBOX_C			0xA3400C
+
 #define FSEQ_ERROR_CODE			0xA340C8
 #define FSEQ_TOP_INIT_VERSION		0xA34038
 #define FSEQ_CNVIO_INIT_VERSION		0xA3403C
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 76a512cd2e5c..907781714680 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2003-2015, 2018-2020 Intel Corporation
+ * Copyright (C) 2003-2015, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -317,6 +317,7 @@ struct cont_rec {
  * @alloc_page_lock: spinlock for the page allocator
  * @alloc_page: allocated page to still use parts of
  * @alloc_page_used: how much of the allocated page was already used (bytes)
+ * @rf_name: name/version of the CRF, if any
  */
 struct iwl_trans_pcie {
 	struct iwl_rxq *rxq;
@@ -409,6 +410,8 @@ struct iwl_trans_pcie {
 	bool fw_reset_handshake;
 	bool fw_reset_done;
 	wait_queue_head_t fw_reset_waitq;
+
+	char rf_name[32];
 };
 
 static inline struct iwl_trans_pcie *
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 1bcd36e9e008..56162c4500d7 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -240,6 +240,75 @@ static int iwl_pcie_gen2_nic_init(struct iwl_trans *trans)
 	return 0;
 }
 
+static void iwl_pcie_get_rf_name(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	char *buf = trans_pcie->rf_name;
+	size_t buflen = sizeof(trans_pcie->rf_name);
+	size_t pos;
+	u32 version;
+
+	if (buf[0])
+		return;
+
+	switch (CSR_HW_RFID_TYPE(trans->hw_rf_id)) {
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF):
+		pos = scnprintf(buf, buflen, "JF");
+		break;
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_GF):
+		pos = scnprintf(buf, buflen, "GF");
+		break;
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_GF4):
+		pos = scnprintf(buf, buflen, "GF4");
+		break;
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HR):
+		pos = scnprintf(buf, buflen, "HR");
+		break;
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HR1):
+		pos = scnprintf(buf, buflen, "HR1");
+		break;
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HRCDB):
+		pos = scnprintf(buf, buflen, "HRCDB");
+		break;
+	default:
+		return;
+	}
+
+	switch (CSR_HW_RFID_TYPE(trans->hw_rf_id)) {
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HR):
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HR1):
+	case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HRCDB):
+		version = iwl_read_prph(trans, CNVI_MBOX_C);
+		switch (version) {
+		case 0x20000:
+			pos += scnprintf(buf + pos, buflen - pos, " B3");
+			break;
+		case 0x120000:
+			pos += scnprintf(buf + pos, buflen - pos, " B5");
+			break;
+		default:
+			pos += scnprintf(buf + pos, buflen - pos,
+					 " (0x%x)", version);
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	pos += scnprintf(buf + pos, buflen - pos, ", rfid=0x%x",
+			 trans->hw_rf_id);
+
+	IWL_INFO(trans, "Detected RF %s\n", buf);
+
+	/*
+	 * also add a \n for debugfs - need to do it after printing
+	 * since our IWL_INFO machinery wants to see a static \n at
+	 * the end of the string
+	 */
+	pos += scnprintf(buf + pos, buflen - pos, "\n");
+}
+
 void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -263,6 +332,8 @@ void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 	iwl_enable_interrupts(trans);
 	mutex_lock(&trans_pcie->mutex);
 	iwl_pcie_check_hw_rf_kill(trans);
+
+	iwl_pcie_get_rf_name(trans);
 	mutex_unlock(&trans_pcie->mutex);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 5b40833932a0..1331a6bfd767 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -2848,11 +2848,28 @@ static ssize_t iwl_dbgfs_monitor_data_read(struct file *file,
 	return bytes_copied;
 }
 
+static ssize_t iwl_dbgfs_rf_read(struct file *file,
+				 char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	struct iwl_trans *trans = file->private_data;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	if (!trans_pcie->rf_name[0])
+		return -ENODEV;
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       trans_pcie->rf_name,
+				       strlen(trans_pcie->rf_name));
+}
+
 DEBUGFS_READ_WRITE_FILE_OPS(interrupt);
 DEBUGFS_READ_FILE_OPS(fh_reg);
 DEBUGFS_READ_FILE_OPS(rx_queue);
 DEBUGFS_WRITE_FILE_OPS(csr);
 DEBUGFS_READ_WRITE_FILE_OPS(rfkill);
+DEBUGFS_READ_FILE_OPS(rf);
+
 static const struct file_operations iwl_dbgfs_tx_queue_ops = {
 	.owner = THIS_MODULE,
 	.open = iwl_dbgfs_tx_queue_open,
@@ -2879,6 +2896,7 @@ void iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
 	DEBUGFS_ADD_FILE(fh_reg, dir, 0400);
 	DEBUGFS_ADD_FILE(rfkill, dir, 0600);
 	DEBUGFS_ADD_FILE(monitor_data, dir, 0400);
+	DEBUGFS_ADD_FILE(rf, dir, 0400);
 }
 
 static void iwl_trans_pcie_debugfs_cleanup(struct iwl_trans *trans)

From 46d1da21d0cbf237d9f80ba66261fb1435ba2103 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Jun 2021 10:08:42 +0300
Subject: [PATCH 2505/3292] iwlwifi: mvm: don't request SMPS in AP mode

This is not valid (in the spec) and mac80211 will soon
warn on it, in addition to ignoring it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.d568df20e273.Id45ae38f9b16b3c56fa62266e3e89a1421ea07b0@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 99105272139d..e1e45eca09b5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -621,7 +621,7 @@ void iwl_mvm_update_smps(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			 enum ieee80211_smps_mode smps_request)
 {
 	struct iwl_mvm_vif *mvmvif;
-	enum ieee80211_smps_mode smps_mode;
+	enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_AUTOMATIC;
 	int i;
 
 	lockdep_assert_held(&mvm->mutex);
@@ -630,10 +630,8 @@ void iwl_mvm_update_smps(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	if (num_of_ant(iwl_mvm_get_valid_rx_ant(mvm)) == 1)
 		return;
 
-	if (vif->type == NL80211_IFTYPE_AP)
-		smps_mode = IEEE80211_SMPS_OFF;
-	else
-		smps_mode = IEEE80211_SMPS_AUTOMATIC;
+	if (vif->type != NL80211_IFTYPE_STATION)
+		return;
 
 	mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	mvmvif->smps_requests[req_type] = smps_request;

From a171399fd687a7d2fa56a10c9a2d7084a647677d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Jun 2021 10:08:43 +0300
Subject: [PATCH 2506/3292] iwlwifi: mvm: apply RX diversity per PHY context

SMPS requests may differ per interfaces due to e.g. Bluetooth
only interfering on 2.4 GHz, so if that's the case we should,
in the case of multiple PHY contexts, still allow RX diversity
on PHY context that have no interfaces with SMPS requests.

Fix the code to pass through the PHY context in question and
skip interfaces with non-matching PHY context while iterating.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.123c6b05809d.I992e3d1c6a29850d02eeec01712b5b685b963a87@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  |  3 +-
 .../net/wireless/intel/iwlwifi/mvm/phy-ctxt.c | 15 ++++++----
 .../net/wireless/intel/iwlwifi/mvm/utils.c    | 28 ++++++++++++++-----
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index b137f8130b6d..0b8658c7d088 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1830,7 +1830,8 @@ int iwl_mvm_disable_beacon_filter(struct iwl_mvm *mvm,
 void iwl_mvm_update_smps(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				enum iwl_mvm_smps_type_request req_type,
 				enum ieee80211_smps_mode smps_request);
-bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm);
+bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm,
+				  struct iwl_mvm_phy_ctxt *ctxt);
 
 /* Low latency */
 int iwl_mvm_update_low_latency(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
index 0fd51f6aa206..4ed2338027d1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2017 Intel Deutschland GmbH
  */
@@ -76,6 +76,7 @@ static void iwl_mvm_phy_ctxt_cmd_hdr(struct iwl_mvm_phy_ctxt *ctxt,
 }
 
 static void iwl_mvm_phy_ctxt_set_rxchain(struct iwl_mvm *mvm,
+					 struct iwl_mvm_phy_ctxt *ctxt,
 					 __le32 *rxchain_info,
 					 u8 chains_static,
 					 u8 chains_dynamic)
@@ -93,7 +94,7 @@ static void iwl_mvm_phy_ctxt_set_rxchain(struct iwl_mvm *mvm,
 	 * between the two antennas is sufficiently different to impact
 	 * performance.
 	 */
-	if (active_cnt == 1 && iwl_mvm_rx_diversity_allowed(mvm)) {
+	if (active_cnt == 1 && iwl_mvm_rx_diversity_allowed(mvm, ctxt)) {
 		idle_cnt = 2;
 		active_cnt = 2;
 	}
@@ -113,6 +114,7 @@ static void iwl_mvm_phy_ctxt_set_rxchain(struct iwl_mvm *mvm,
  * Add the phy configuration to the PHY context command
  */
 static void iwl_mvm_phy_ctxt_cmd_data_v1(struct iwl_mvm *mvm,
+					 struct iwl_mvm_phy_ctxt *ctxt,
 					 struct iwl_phy_context_cmd_v1 *cmd,
 					 struct cfg80211_chan_def *chandef,
 					 u8 chains_static, u8 chains_dynamic)
@@ -123,7 +125,7 @@ static void iwl_mvm_phy_ctxt_cmd_data_v1(struct iwl_mvm *mvm,
 	/* Set the channel info data */
 	iwl_mvm_set_chan_info_chandef(mvm, &cmd->ci, chandef);
 
-	iwl_mvm_phy_ctxt_set_rxchain(mvm, &tail->rxchain_info,
+	iwl_mvm_phy_ctxt_set_rxchain(mvm, ctxt, &tail->rxchain_info,
 				     chains_static, chains_dynamic);
 
 	tail->txchain_info = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm));
@@ -133,6 +135,7 @@ static void iwl_mvm_phy_ctxt_cmd_data_v1(struct iwl_mvm *mvm,
  * Add the phy configuration to the PHY context command
  */
 static void iwl_mvm_phy_ctxt_cmd_data(struct iwl_mvm *mvm,
+				      struct iwl_mvm_phy_ctxt *ctxt,
 				      struct iwl_phy_context_cmd *cmd,
 				      struct cfg80211_chan_def *chandef,
 				      u8 chains_static, u8 chains_dynamic)
@@ -143,7 +146,7 @@ static void iwl_mvm_phy_ctxt_cmd_data(struct iwl_mvm *mvm,
 	/* Set the channel info data */
 	iwl_mvm_set_chan_info_chandef(mvm, &cmd->ci, chandef);
 
-	iwl_mvm_phy_ctxt_set_rxchain(mvm, &cmd->rxchain_info,
+	iwl_mvm_phy_ctxt_set_rxchain(mvm, ctxt, &cmd->rxchain_info,
 				     chains_static, chains_dynamic);
 }
 
@@ -170,7 +173,7 @@ static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm,
 		iwl_mvm_phy_ctxt_cmd_hdr(ctxt, &cmd, action);
 
 		/* Set the command data */
-		iwl_mvm_phy_ctxt_cmd_data(mvm, &cmd, chandef,
+		iwl_mvm_phy_ctxt_cmd_data(mvm, ctxt, &cmd, chandef,
 					  chains_static,
 					  chains_dynamic);
 
@@ -186,7 +189,7 @@ static int iwl_mvm_phy_ctxt_apply(struct iwl_mvm *mvm,
 					 action);
 
 		/* Set the command data */
-		iwl_mvm_phy_ctxt_cmd_data_v1(mvm, &cmd, chandef,
+		iwl_mvm_phy_ctxt_cmd_data_v1(mvm, ctxt, &cmd, chandef,
 					     chains_static,
 					     chains_dynamic);
 		ret = iwl_mvm_send_cmd_pdu(mvm, PHY_CONTEXT_CMD,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index e1e45eca09b5..0e8ad798ab57 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -681,23 +681,37 @@ void iwl_mvm_accu_radio_stats(struct iwl_mvm *mvm)
 	mvm->accu_radio_stats.on_time_scan += mvm->radio_stats.on_time_scan;
 }
 
+struct iwl_mvm_diversity_iter_data {
+	struct iwl_mvm_phy_ctxt *ctxt;
+	bool result;
+};
+
 static void iwl_mvm_diversity_iter(void *_data, u8 *mac,
 				   struct ieee80211_vif *vif)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-	bool *result = _data;
+	struct iwl_mvm_diversity_iter_data *data = _data;
 	int i;
 
+	if (mvmvif->phy_ctxt != data->ctxt)
+		return;
+
 	for (i = 0; i < NUM_IWL_MVM_SMPS_REQ; i++) {
 		if (mvmvif->smps_requests[i] == IEEE80211_SMPS_STATIC ||
-		    mvmvif->smps_requests[i] == IEEE80211_SMPS_DYNAMIC)
-			*result = false;
+		    mvmvif->smps_requests[i] == IEEE80211_SMPS_DYNAMIC) {
+			data->result = false;
+			break;
+		}
 	}
 }
 
-bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm)
+bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm,
+				  struct iwl_mvm_phy_ctxt *ctxt)
 {
-	bool result = true;
+	struct iwl_mvm_diversity_iter_data data = {
+		.ctxt = ctxt,
+		.result = true,
+	};
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -709,9 +723,9 @@ bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm)
 
 	ieee80211_iterate_active_interfaces_atomic(
 			mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
-			iwl_mvm_diversity_iter, &result);
+			iwl_mvm_diversity_iter, &data);
 
-	return result;
+	return data.result;
 }
 
 void iwl_mvm_send_low_latency_cmd(struct iwl_mvm *mvm,

From 2a7ce54ccc23e6a6f2e619cfe657a587accb1a3e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Jun 2021 10:08:44 +0300
Subject: [PATCH 2507/3292] iwlwifi: mvm: honour firmware SMPS requests

The firmware can now request SMPS (due to thermal conditions), add
some code to honour such requests and bubble them up through the
stack, subject to our other SMPS constraints, e.g. from Bluetooth.

Then, if the firmware requests SMPS, then we know that it supports
a small extension to the PHY configuration API where a chain mask
of 0 means "use 1 but pick which one yourself", so in this case we
use that extension.

During firmware restart, we stay in the previous state, and the FW
will send us a notification at startup (only) if the temperature is
below the lower or above the high threshold, to sync the state.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.85656b7684b9.I7a661a0758d070a750d3a91874d1a0f5fab9febc@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../wireless/intel/iwlwifi/fw/api/datapath.h  | 26 +++++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  |  4 ++
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c  | 39 +++++++++++++++++++
 .../net/wireless/intel/iwlwifi/mvm/phy-ctxt.c | 11 ++++++
 4 files changed, 80 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
index d299bba3aa54..985b0dc5b52a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
@@ -63,6 +63,12 @@ enum iwl_data_path_subcmd_ids {
 	 */
 	RX_NO_DATA_NOTIF = 0xF5,
 
+	/**
+	 * @THERMAL_DUAL_CHAIN_DISABLE_REQ: firmware request for SMPS mode,
+	 *	&struct iwl_thermal_dual_chain_request
+	 */
+	THERMAL_DUAL_CHAIN_REQUEST = 0xF6,
+
 	/**
 	 * @TLC_MNG_UPDATE_NOTIF: &struct iwl_tlc_update_notif
 	 */
@@ -169,4 +175,24 @@ struct iwl_datapath_monitor_notif {
 	u8 reserved[3];
 } __packed; /* MONITOR_NTF_API_S_VER_1 */
 
+/**
+ * enum iwl_thermal_dual_chain_req_events - firmware SMPS request event
+ * @THERMAL_DUAL_CHAIN_REQ_ENABLE: (re-)enable dual-chain operation
+ *	(subject to other constraints)
+ * @THERMAL_DUAL_CHAIN_REQ_DISABLE: disable dual-chain operation
+ *	(static SMPS)
+ */
+enum iwl_thermal_dual_chain_req_events {
+	THERMAL_DUAL_CHAIN_REQ_ENABLE,
+	THERMAL_DUAL_CHAIN_REQ_DISABLE,
+}; /* THERMAL_DUAL_CHAIN_DISABLE_STATE_API_E_VER_1 */
+
+/**
+ * struct iwl_thermal_dual_chain_request - SMPS request
+ * @event: the type of request, see &enum iwl_thermal_dual_chain_req_events
+ */
+struct iwl_thermal_dual_chain_request {
+	__le32 event;
+} __packed; /* THERMAL_DUAL_CHAIN_DISABLE_REQ_NTFY_API_S_VER_1 */
+
 #endif /* __iwl_fw_api_datapath_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 0b8658c7d088..d89c73ae2848 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -197,6 +197,7 @@ enum iwl_mvm_smps_type_request {
 	IWL_MVM_SMPS_REQ_BT_COEX,
 	IWL_MVM_SMPS_REQ_TT,
 	IWL_MVM_SMPS_REQ_PROT,
+	IWL_MVM_SMPS_REQ_FW,
 	NUM_IWL_MVM_SMPS_REQ,
 };
 
@@ -993,6 +994,8 @@ struct iwl_mvm {
 	 */
 	bool temperature_test;  /* Debug test temperature is enabled */
 
+	bool fw_static_smps_request;
+
 	unsigned long bt_coex_last_tcm_ts;
 	struct iwl_mvm_tcm tcm;
 
@@ -1832,6 +1835,7 @@ void iwl_mvm_update_smps(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				enum ieee80211_smps_mode smps_request);
 bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm,
 				  struct iwl_mvm_phy_ctxt *ctxt);
+void iwl_mvm_apply_fw_smps_request(struct ieee80211_vif *vif);
 
 /* Low latency */
 int iwl_mvm_update_low_latency(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index af5688af9cfb..20e8d343a950 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -210,6 +210,39 @@ static void iwl_mvm_rx_monitor_notif(struct iwl_mvm *mvm,
 	ieee80211_disconnect(vif, true);
 }
 
+void iwl_mvm_apply_fw_smps_request(struct ieee80211_vif *vif)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	struct iwl_mvm *mvm = mvmvif->mvm;
+
+	iwl_mvm_update_smps(mvm, vif, IWL_MVM_SMPS_REQ_FW,
+			    mvm->fw_static_smps_request ?
+				IEEE80211_SMPS_STATIC :
+				IEEE80211_SMPS_AUTOMATIC);
+}
+
+static void iwl_mvm_intf_dual_chain_req(void *data, u8 *mac,
+					struct ieee80211_vif *vif)
+{
+	iwl_mvm_apply_fw_smps_request(vif);
+}
+
+static void iwl_mvm_rx_thermal_dual_chain_req(struct iwl_mvm *mvm,
+					      struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	struct iwl_thermal_dual_chain_request *req = (void *)pkt->data;
+
+	/*
+	 * We could pass it to the iterator data, but also need to remember
+	 * it for new interfaces that are added while in this state.
+	 */
+	mvm->fw_static_smps_request =
+		req->event == cpu_to_le32(THERMAL_DUAL_CHAIN_REQ_DISABLE);
+	ieee80211_iterate_interfaces(mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
+				     iwl_mvm_intf_dual_chain_req, NULL);
+}
+
 /**
  * enum iwl_rx_handler_context context for Rx handler
  * @RX_HANDLER_SYNC : this means that it will be called in the Rx path
@@ -358,6 +391,11 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = {
 	RX_HANDLER_GRP(DATA_PATH_GROUP, MONITOR_NOTIF,
 		       iwl_mvm_rx_monitor_notif, RX_HANDLER_ASYNC_LOCKED,
 		       struct iwl_datapath_monitor_notif),
+
+	RX_HANDLER_GRP(DATA_PATH_GROUP, THERMAL_DUAL_CHAIN_REQUEST,
+		       iwl_mvm_rx_thermal_dual_chain_req,
+		       RX_HANDLER_ASYNC_LOCKED,
+		       struct iwl_thermal_dual_chain_request),
 };
 #undef RX_HANDLER
 #undef RX_HANDLER_GRP
@@ -502,6 +540,7 @@ static const struct iwl_hcmd_names iwl_mvm_data_path_names[] = {
 	HCMD_NAME(TLC_MNG_CONFIG_CMD),
 	HCMD_NAME(CHEST_COLLECTOR_FILTER_CONFIG_CMD),
 	HCMD_NAME(MONITOR_NOTIF),
+	HCMD_NAME(THERMAL_DUAL_CHAIN_REQUEST),
 	HCMD_NAME(STA_PM_NOTIF),
 	HCMD_NAME(MU_GROUP_MGMT_NOTIF),
 	HCMD_NAME(RX_QUEUES_NOTIFICATION),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
index 4ed2338027d1..035336a9e755 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
@@ -99,6 +99,17 @@ static void iwl_mvm_phy_ctxt_set_rxchain(struct iwl_mvm *mvm,
 		active_cnt = 2;
 	}
 
+	/*
+	 * If the firmware requested it, then we know that it supports
+	 * getting zero for the values to indicate "use one, but pick
+	 * which one yourself", which means it can dynamically pick one
+	 * that e.g. has better RSSI.
+	 */
+	if (mvm->fw_static_smps_request && active_cnt == 1 && idle_cnt == 1) {
+		idle_cnt = 0;
+		active_cnt = 0;
+	}
+
 	*rxchain_info = cpu_to_le32(iwl_mvm_get_valid_rx_ant(mvm) <<
 					PHY_RX_CHAIN_VALID_POS);
 	*rxchain_info |= cpu_to_le32(idle_cnt << PHY_RX_CHAIN_CNT_POS);

From 976ac0af7ba2c5424bc305b926c0807d96fdcc83 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Thu, 17 Jun 2021 10:08:45 +0300
Subject: [PATCH 2508/3292] iwlwifi: mvm: fix error print when session
 protection ends

When the session protection ends and the Driver is not
associated or a beacon was not heard, the Driver
prints "No beacons heard...".
That's confusing for the case where not associated.
Change the print when not associated to "Not associated...".

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.41a5a5a894fa.I9eabb76e7a3a7f4abbed8f2ef918f1df8e825726@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 83342a6a6d5b..f19081a6f046 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -310,6 +310,8 @@ static void iwl_mvm_te_handle_notif(struct iwl_mvm *mvm,
 			 * and know the dtim period.
 			 */
 			iwl_mvm_te_check_disconnect(mvm, te_data->vif,
+				!te_data->vif->bss_conf.assoc ?
+				"Not associated and the time event is over already..." :
 				"No beacon heard and the time event is over already...");
 			break;
 		default:
@@ -808,6 +810,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
 			 * and know the dtim period.
 			 */
 			iwl_mvm_te_check_disconnect(mvm, vif,
+						    !vif->bss_conf.assoc ?
+						    "Not associated and the session protection is over already..." :
 						    "No beacon heard and the session protection is over already...");
 			spin_lock_bh(&mvm->time_event_lock);
 			iwl_mvm_te_clear_data(mvm, te_data);

From b26d4996c862864c5f74f858ee343002530473fb Mon Sep 17 00:00:00 2001
From: Harish Mitty <harish.mitty@intel.com>
Date: Thu, 17 Jun 2021 10:08:46 +0300
Subject: [PATCH 2509/3292] iwlwifi: mvm: Call NMI instead of REPLY_ERROR

For IWL_DEVICE_FAMILY_22000 & greater, driver will call
NMI instead of REPLY_ERROR as FW->Infra does not support
this command for this family onwards.

Signed-off-by: Harish Mitty <harish.mitty@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.597f4246c79d.Ia0a1bbc2e66b4e849174db685208fc2b8bd5732e@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 63d65018d098..95f883aba148 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1023,7 +1023,9 @@ static ssize_t iwl_dbgfs_fw_restart_write(struct iwl_mvm *mvm, char *buf,
 		mvm->fw_restart++;
 
 	/* take the return value to make compiler happy - it will fail anyway */
-	ret = iwl_mvm_send_cmd_pdu(mvm, REPLY_ERROR, 0, 0, NULL);
+	ret = iwl_mvm_send_cmd_pdu(mvm,
+				   WIDE_ID(LONG_GROUP, REPLY_ERROR),
+				   0, 0, NULL);
 
 	mutex_unlock(&mvm->mutex);
 

From 1381eb5c8ed5141bbf39325b80153072647186b6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Jun 2021 10:08:47 +0300
Subject: [PATCH 2510/3292] iwlwifi: correct HE capabilities

The (default) HE capabilities for our devices weren't handled
correctly, adjust them to match the correct capabilities of
the devices.

Since the device regulatory will not allow 160 MHz on 5 GHz,
don't advertise this capability by default; do it only if an
NVM file is being loaded that might change the regulatory
parameters.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.e8d0b02ec86b.Ia6ef8cc0480d38af25e6ac45fad9fb15bdfcbc2c@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../wireless/intel/iwlwifi/iwl-nvm-parse.c    | 112 ++++++++++--------
 1 file changed, 65 insertions(+), 47 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index fc75d049046d..bff6533b76a8 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -549,8 +549,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 				.mac_cap_info[2] =
 					IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP,
 				.mac_cap_info[3] =
-					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2,
+					IEEE80211_HE_MAC_CAP3_OMI_CONTROL,
 				.mac_cap_info[4] =
 					IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU |
 					IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39,
@@ -579,25 +578,20 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
 					IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8 |
 					IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8,
-				.phy_cap_info[5] =
-					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
-					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2,
 				.phy_cap_info[6] =
 					IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB |
 					IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB |
 					IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT,
 				.phy_cap_info[7] =
 					IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP |
-					IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI |
-					IEEE80211_HE_PHY_CAP7_MAX_NC_1,
+					IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI,
 				.phy_cap_info[8] =
 					IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI |
 					IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
 					IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
 					IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU |
-					IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996,
+					IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242,
 				.phy_cap_info[9] =
-					IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK |
 					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
 					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
 					IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED,
@@ -632,19 +626,11 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 				.mac_cap_info[1] =
 					IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
 					IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
-				.mac_cap_info[2] =
-					IEEE80211_HE_MAC_CAP2_BSR,
 				.mac_cap_info[3] =
-					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2,
-				.mac_cap_info[4] =
-					IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU,
-				.mac_cap_info[5] =
-					IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU,
+					IEEE80211_HE_MAC_CAP3_OMI_CONTROL,
 				.phy_cap_info[0] =
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G |
-					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
-					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G,
+					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G,
 				.phy_cap_info[1] =
 					IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD,
 				.phy_cap_info[2] =
@@ -654,27 +640,14 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 |
 					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM |
 					IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1,
-				.phy_cap_info[4] =
-					IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
-					IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8 |
-					IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8,
-				.phy_cap_info[5] =
-					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
-					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2,
 				.phy_cap_info[6] =
 					IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT,
 				.phy_cap_info[7] =
-					IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI |
-					IEEE80211_HE_PHY_CAP7_MAX_NC_1,
+					IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI,
 				.phy_cap_info[8] =
 					IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI |
-					IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
-					IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
-					IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU |
-					IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996,
+					IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242,
 				.phy_cap_info[9] =
-					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
-					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
 					IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED,
 			},
 			/*
@@ -745,12 +718,66 @@ static void iwl_init_he_6ghz_capa(struct iwl_trans *trans,
 		iftype_data[i].he_6ghz_capa.capa = cpu_to_le16(he_6ghz_capa);
 }
 
+static void
+iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
+			 struct ieee80211_supported_band *sband,
+			 struct ieee80211_sband_iftype_data *iftype_data,
+			 u8 tx_chains, u8 rx_chains)
+{
+	bool is_ap = iftype_data->types_mask & BIT(NL80211_IFTYPE_AP);
+
+	/* Advertise an A-MPDU exponent extension based on
+	 * operating band
+	 */
+	if (sband->band != NL80211_BAND_2GHZ)
+		iftype_data->he_cap.he_cap_elem.mac_cap_info[3] |=
+			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_1;
+	else
+		iftype_data->he_cap.he_cap_elem.mac_cap_info[3] |=
+			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3;
+
+	if (is_ap && iwlwifi_mod_params.nvm_file)
+		iftype_data->he_cap.he_cap_elem.phy_cap_info[0] |=
+			IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+
+	if ((tx_chains & rx_chains) == ANT_AB) {
+		iftype_data->he_cap.he_cap_elem.phy_cap_info[5] |=
+			IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
+			IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2;
+		if (!is_ap)
+			iftype_data->he_cap.he_cap_elem.phy_cap_info[7] |=
+				IEEE80211_HE_PHY_CAP7_MAX_NC_2;
+	} else if (!is_ap) {
+		/* If not 2x2, we need to indicate 1x1 in the
+		 * Midamble RX Max NSTS - but not for AP mode
+		 */
+		iftype_data->he_cap.he_cap_elem.phy_cap_info[1] &=
+			~IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS;
+		iftype_data->he_cap.he_cap_elem.phy_cap_info[2] &=
+			~IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS;
+		iftype_data->he_cap.he_cap_elem.phy_cap_info[7] |=
+			IEEE80211_HE_PHY_CAP7_MAX_NC_1;
+	}
+
+	switch (CSR_HW_RFID_TYPE(trans->hw_rf_id)) {
+	case IWL_CFG_RF_TYPE_GF:
+	case IWL_CFG_RF_TYPE_MR:
+		iftype_data->he_cap.he_cap_elem.phy_cap_info[9] |=
+			IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU;
+		if (!is_ap)
+			iftype_data->he_cap.he_cap_elem.phy_cap_info[9] |=
+				IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU;
+		break;
+	}
+}
+
 static void iwl_init_he_hw_capab(struct iwl_trans *trans,
 				 struct iwl_nvm_data *data,
 				 struct ieee80211_supported_band *sband,
 				 u8 tx_chains, u8 rx_chains)
 {
 	struct ieee80211_sband_iftype_data *iftype_data;
+	int i;
 
 	/* should only initialize once */
 	if (WARN_ON(sband->iftype_data))
@@ -777,19 +804,10 @@ static void iwl_init_he_hw_capab(struct iwl_trans *trans,
 	sband->iftype_data = iftype_data;
 	sband->n_iftype_data = ARRAY_SIZE(iwl_he_capa);
 
-	/* If not 2x2, we need to indicate 1x1 in the Midamble RX Max NSTS */
-	if ((tx_chains & rx_chains) != ANT_AB) {
-		int i;
+	for (i = 0; i < sband->n_iftype_data; i++)
+		iwl_nvm_fixup_sband_iftd(trans, sband, &iftype_data[i],
+					 tx_chains, rx_chains);
 
-		for (i = 0; i < sband->n_iftype_data; i++) {
-			iftype_data[i].he_cap.he_cap_elem.phy_cap_info[1] &=
-				~IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS;
-			iftype_data[i].he_cap.he_cap_elem.phy_cap_info[2] &=
-				~IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS;
-			iftype_data[i].he_cap.he_cap_elem.phy_cap_info[7] &=
-				~IEEE80211_HE_PHY_CAP7_MAX_NC_MASK;
-		}
-	}
 	iwl_init_he_6ghz_capa(trans, data, sband, tx_chains, rx_chains);
 }
 

From 5c1f09422e666a00f99c5f821a40b46df5f871c8 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Thu, 17 Jun 2021 10:08:48 +0300
Subject: [PATCH 2511/3292] iwlwifi: mvm: support LMR feedback

If the LMR feedback is set in the ranging request, set the
corresponding flag in the fw command.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.0c00dd724f5c.I8283b95c26f4226deaea42e7be35aa9d41eb7580@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index a24e6c0490e9..59cef0d89a6d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -430,6 +430,10 @@ iwl_mvm_ftm_put_target_common(struct iwl_mvm *mvm,
 		FTM_PUT_FLAG(TB);
 	else if (peer->ftm.non_trigger_based)
 		FTM_PUT_FLAG(NON_TB);
+
+	if ((peer->ftm.trigger_based || peer->ftm.non_trigger_based) &&
+	    peer->ftm.lmr_feedback)
+		FTM_PUT_FLAG(LMR_FEEDBACK);
 }
 
 static int

From 03470ba71fde9698efcfe28fc36a5c3a05045c32 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Thu, 17 Jun 2021 10:08:49 +0300
Subject: [PATCH 2512/3292] iwlwifi: advertise broadcast TWT support

If the firmware supports broadcast TWT (know by TLV),
add the broadcast TWT HE MAC capability.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.80fee3171b53.Idfb69643f4044ec26865d023d0c2a1d6466694aa@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/file.h  |  3 +-
 .../wireless/intel/iwlwifi/iwl-nvm-parse.c    | 28 +++++++++++++------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index f9c5cf538ad1..d189e5de478b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2008-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2008-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -411,6 +411,7 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_PROTECTED_TWT		= (__force iwl_ucode_tlv_capa_t)56,
 	IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE		= (__force iwl_ucode_tlv_capa_t)57,
 	IWL_UCODE_TLV_CAPA_PASSIVE_6GHZ_SCAN		= (__force iwl_ucode_tlv_capa_t)58,
+	IWL_UCODE_TLV_CAPA_BROADCAST_TWT		= (__force iwl_ucode_tlv_capa_t)60,
 
 	/* set 2 */
 	IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE		= (__force iwl_ucode_tlv_capa_t)64,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index bff6533b76a8..850648ebd61c 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -722,7 +722,8 @@ static void
 iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
 			 struct ieee80211_supported_band *sband,
 			 struct ieee80211_sband_iftype_data *iftype_data,
-			 u8 tx_chains, u8 rx_chains)
+			 u8 tx_chains, u8 rx_chains,
+			 const struct iwl_fw *fw)
 {
 	bool is_ap = iftype_data->types_mask & BIT(NL80211_IFTYPE_AP);
 
@@ -769,12 +770,17 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
 				IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU;
 		break;
 	}
+
+	if (fw_has_capa(&fw->ucode_capa, IWL_UCODE_TLV_CAPA_BROADCAST_TWT))
+		iftype_data->he_cap.he_cap_elem.mac_cap_info[2] |=
+			IEEE80211_HE_MAC_CAP2_BCAST_TWT;
 }
 
 static void iwl_init_he_hw_capab(struct iwl_trans *trans,
 				 struct iwl_nvm_data *data,
 				 struct ieee80211_supported_band *sband,
-				 u8 tx_chains, u8 rx_chains)
+				 u8 tx_chains, u8 rx_chains,
+				 const struct iwl_fw *fw)
 {
 	struct ieee80211_sband_iftype_data *iftype_data;
 	int i;
@@ -806,7 +812,7 @@ static void iwl_init_he_hw_capab(struct iwl_trans *trans,
 
 	for (i = 0; i < sband->n_iftype_data; i++)
 		iwl_nvm_fixup_sband_iftd(trans, sband, &iftype_data[i],
-					 tx_chains, rx_chains);
+					 tx_chains, rx_chains, fw);
 
 	iwl_init_he_6ghz_capa(trans, data, sband, tx_chains, rx_chains);
 }
@@ -814,7 +820,8 @@ static void iwl_init_he_hw_capab(struct iwl_trans *trans,
 static void iwl_init_sbands(struct iwl_trans *trans,
 			    struct iwl_nvm_data *data,
 			    const void *nvm_ch_flags, u8 tx_chains,
-			    u8 rx_chains, u32 sbands_flags, bool v4)
+			    u8 rx_chains, u32 sbands_flags, bool v4,
+			    const struct iwl_fw *fw)
 {
 	struct device *dev = trans->dev;
 	const struct iwl_cfg *cfg = trans->cfg;
@@ -834,7 +841,8 @@ static void iwl_init_sbands(struct iwl_trans *trans,
 			     tx_chains, rx_chains);
 
 	if (data->sku_cap_11ax_enable && !iwlwifi_mod_params.disable_11ax)
-		iwl_init_he_hw_capab(trans, data, sband, tx_chains, rx_chains);
+		iwl_init_he_hw_capab(trans, data, sband, tx_chains, rx_chains,
+				     fw);
 
 	sband = &data->bands[NL80211_BAND_5GHZ];
 	sband->band = NL80211_BAND_5GHZ;
@@ -849,7 +857,8 @@ static void iwl_init_sbands(struct iwl_trans *trans,
 				      tx_chains, rx_chains);
 
 	if (data->sku_cap_11ax_enable && !iwlwifi_mod_params.disable_11ax)
-		iwl_init_he_hw_capab(trans, data, sband, tx_chains, rx_chains);
+		iwl_init_he_hw_capab(trans, data, sband, tx_chains, rx_chains,
+				     fw);
 
 	/* 6GHz band. */
 	sband = &data->bands[NL80211_BAND_6GHZ];
@@ -861,7 +870,8 @@ static void iwl_init_sbands(struct iwl_trans *trans,
 					  NL80211_BAND_6GHZ);
 
 	if (data->sku_cap_11ax_enable && !iwlwifi_mod_params.disable_11ax)
-		iwl_init_he_hw_capab(trans, data, sband, tx_chains, rx_chains);
+		iwl_init_he_hw_capab(trans, data, sband, tx_chains, rx_chains,
+				     fw);
 	else
 		sband->n_channels = 0;
 	if (n_channels != n_used)
@@ -1172,7 +1182,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 		sbands_flags |= IWL_NVM_SBANDS_FLAGS_NO_WIDE_IN_5GHZ;
 
 	iwl_init_sbands(trans, data, ch_section, tx_chains, rx_chains,
-			sbands_flags, false);
+			sbands_flags, false, fw);
 	data->calib_version = 255;
 
 	return data;
@@ -1679,7 +1689,7 @@ struct iwl_nvm_data *iwl_get_nvm(struct iwl_trans *trans,
 			channel_profile,
 			nvm->valid_tx_ant & fw->valid_tx_ant,
 			nvm->valid_rx_ant & fw->valid_rx_ant,
-			sbands_flags, v4);
+			sbands_flags, v4, fw);
 
 	iwl_free_resp(&hcmd);
 	return nvm;

From 5dec6d96d12d33900ec315972c8e47a73bcc378d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Fri, 18 Jun 2021 03:55:26 -0700
Subject: [PATCH 2513/3292] bpf: Fix regression on BPF_OBJ_GET with non-O_RDWR
 flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit d37300ed1821 ("bpf: program: Refuse non-O_RDWR flags
in BPF_OBJ_GET"). It breaks Android userspace which expects to be able to
fetch programs with just read permissions.

See: https://cs.android.com/android/platform/superproject/+/master:frameworks/libs/net/common/native/bpf_syscall_wrappers/include/BpfSyscallWrappers.h;drc=7005c764be23d31fa1d69e826b4a2f6689a8c81e;l=124

Side-note: another option to fix it would be to extend bpf_prog_new_fd()
and to pass in used file mode flags in the same way as we do for maps via
bpf_map_new_fd(). Meaning, they'd end up in anon_inode_getfd() and thus
would be retained for prog fd operations with bpf() syscall. Right now
these flags are not checked with progs since they are immutable for their
lifetime (as opposed to maps which can be updated from user space). In
future this could potentially change with new features, but at that point
it's still fine to do the bpf_prog_new_fd() extension when needed. For a
simple stable fix, a revert is less churn.

Fixes: d37300ed1821 ("bpf: program: Refuse non-O_RDWR flags in BPF_OBJ_GET")
Signed-off-by: Maciej Żenczykowski <maze@google.com>
[ Daniel: added side-note to commit message ]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Acked-by: Greg Kroah-Hartman <gregkh@google.com>
Link: https://lore.kernel.org/bpf/20210618105526.265003-1-zenczykowski@gmail.com
---
 kernel/bpf/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index b4ebd60a6c16..80da1db47c68 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -543,7 +543,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 		return PTR_ERR(raw);
 
 	if (type == BPF_TYPE_PROG)
-		ret = (f_flags != O_RDWR) ? -EINVAL : bpf_prog_new_fd(raw);
+		ret = bpf_prog_new_fd(raw);
 	else if (type == BPF_TYPE_MAP)
 		ret = bpf_map_new_fd(raw, f_flags);
 	else if (type == BPF_TYPE_LINK)

From bef99c7d9177b268eb08b959eed28797eff6bdae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Jun 2021 10:08:50 +0300
Subject: [PATCH 2514/3292] iwlwifi: pcie: fix some kernel-doc comments

"ubd" is really called "used_bd", fix that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.2d4b46c656bb.Iff9ee6a7e65d439169202911dad2cbea626fb887@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 907781714680..1c740c382b9b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -109,8 +109,8 @@ struct iwl_rx_completion_desc {
  *	Address size is 32 bit in pre-9000 devices and 64 bit in 9000 devices.
  *	In AX210 devices it is a pointer to a list of iwl_rx_transfer_desc's
  * @bd_dma: bus address of buffer of receive buffer descriptors (rbd)
- * @ubd: driver's pointer to buffer of used receive buffer descriptors (rbd)
- * @ubd_dma: physical address of buffer of used receive buffer descriptors (rbd)
+ * @used_bd: driver's pointer to buffer of used receive buffer descriptors (rbd)
+ * @used_bd_dma: physical address of buffer of used receive buffer descriptors (rbd)
  * @tr_tail: driver's pointer to the transmission ring tail buffer
  * @tr_tail_dma: physical address of the buffer for the transmission ring tail
  * @cr_tail: driver's pointer to the completion ring tail buffer

From 8e08e191fc932b4fc2de014c358f8946a4af57e1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 17 Jun 2021 11:07:28 +0300
Subject: [PATCH 2515/3292] iwlwifi: pcie: remove TR/CR tail allocations

The TR/CR tail data are meant to be per-queue-arrays, however,
we allocate them completely wrong (we have a separate allocation
per queue).

Looking at this more closely, it turns out that the hardware
never uses these - we have a separate free list per RX queue
and maintain a write pointer for that in a register, and the
RX itself is indicated in the RB status (rb_stts) DMA region.

Despite nothing using the tail pointers, the hardware will
unconditionally access them to write updates, even when we aren't
using CRs/TRs.

Give it dummy values that we never use/update so it can do that
without causing trouble.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617110647.5f5764e04c46.I4d5de1929be048085767f1234a1e07b517ab6a2d@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../intel/iwlwifi/pcie/ctxt-info-gen3.c       | 28 +++++++--------
 .../wireless/intel/iwlwifi/pcie/internal.h    | 11 ------
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c  | 34 -------------------
 3 files changed, 14 insertions(+), 59 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index cecc32e7dbe8..49560e508b5e 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -138,8 +138,15 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 
 	/* Allocate prph information
 	 * currently we don't assign to the prph info anything, but it would get
-	 * assigned later */
-	prph_info = dma_alloc_coherent(trans->dev, sizeof(*prph_info),
+	 * assigned later
+	 *
+	 * We also use the second half of this page to give the device some
+	 * dummy TR/CR tail pointers - which shouldn't be necessary as we don't
+	 * use this, but the hardware still reads/writes there and we can't let
+	 * it go do that with a NULL pointer.
+	 */
+	BUILD_BUG_ON(sizeof(*prph_info) > PAGE_SIZE / 2);
+	prph_info = dma_alloc_coherent(trans->dev, PAGE_SIZE,
 				       &trans_pcie->prph_info_dma_addr,
 				       GFP_KERNEL);
 	if (!prph_info) {
@@ -166,13 +173,9 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 	ctxt_info_gen3->cr_head_idx_arr_base_addr =
 		cpu_to_le64(trans_pcie->rxq->rb_stts_dma);
 	ctxt_info_gen3->tr_tail_idx_arr_base_addr =
-		cpu_to_le64(trans_pcie->rxq->tr_tail_dma);
+		cpu_to_le64(trans_pcie->prph_info_dma_addr + PAGE_SIZE / 2);
 	ctxt_info_gen3->cr_tail_idx_arr_base_addr =
-		cpu_to_le64(trans_pcie->rxq->cr_tail_dma);
-	ctxt_info_gen3->cr_idx_arr_size =
-		cpu_to_le16(IWL_NUM_OF_COMPLETION_RINGS);
-	ctxt_info_gen3->tr_idx_arr_size =
-		cpu_to_le16(IWL_NUM_OF_TRANSFER_RINGS);
+		cpu_to_le64(trans_pcie->prph_info_dma_addr + 3 * PAGE_SIZE / 4);
 	ctxt_info_gen3->mtr_base_addr =
 		cpu_to_le64(trans->txqs.txq[trans->txqs.cmd.q_id]->dma_addr);
 	ctxt_info_gen3->mcr_base_addr =
@@ -216,10 +219,8 @@ err_free_ctxt_info:
 			  trans_pcie->ctxt_info_dma_addr);
 	trans_pcie->ctxt_info_gen3 = NULL;
 err_free_prph_info:
-	dma_free_coherent(trans->dev,
-			  sizeof(*prph_info),
-			prph_info,
-			trans_pcie->prph_info_dma_addr);
+	dma_free_coherent(trans->dev, PAGE_SIZE, prph_info,
+			  trans_pcie->prph_info_dma_addr);
 
 err_free_prph_scratch:
 	dma_free_coherent(trans->dev,
@@ -251,8 +252,7 @@ void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans)
 	trans_pcie->prph_scratch_dma_addr = 0;
 	trans_pcie->prph_scratch = NULL;
 
-	dma_free_coherent(trans->dev, sizeof(*trans_pcie->prph_info),
-			  trans_pcie->prph_info,
+	dma_free_coherent(trans->dev, PAGE_SIZE, trans_pcie->prph_info,
 			  trans_pcie->prph_info_dma_addr);
 	trans_pcie->prph_info_dma_addr = 0;
 	trans_pcie->prph_info = NULL;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 1c740c382b9b..292b972a25db 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -111,10 +111,6 @@ struct iwl_rx_completion_desc {
  * @bd_dma: bus address of buffer of receive buffer descriptors (rbd)
  * @used_bd: driver's pointer to buffer of used receive buffer descriptors (rbd)
  * @used_bd_dma: physical address of buffer of used receive buffer descriptors (rbd)
- * @tr_tail: driver's pointer to the transmission ring tail buffer
- * @tr_tail_dma: physical address of the buffer for the transmission ring tail
- * @cr_tail: driver's pointer to the completion ring tail buffer
- * @cr_tail_dma: physical address of the buffer for the completion ring tail
  * @read: Shared index to newest available Rx buffer
  * @write: Shared index to oldest written Rx packet
  * @free_count: Number of pre-allocated buffers in rx_free
@@ -142,10 +138,6 @@ struct iwl_rxq {
 		struct iwl_rx_completion_desc *cd;
 	};
 	dma_addr_t used_bd_dma;
-	__le16 *tr_tail;
-	dma_addr_t tr_tail_dma;
-	__le16 *cr_tail;
-	dma_addr_t cr_tail_dma;
 	u32 read;
 	u32 write;
 	u32 free_count;
@@ -533,9 +525,6 @@ static inline void _iwl_disable_interrupts(struct iwl_trans *trans)
 	IWL_DEBUG_ISR(trans, "Disabled interrupts\n");
 }
 
-#define IWL_NUM_OF_COMPLETION_RINGS	31
-#define IWL_NUM_OF_TRANSFER_RINGS	527
-
 static inline int iwl_pcie_get_num_sections(const struct fw_img *fw,
 					    int start)
 {
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index fb8491412be4..4f6f4b2720f0 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -663,7 +663,6 @@ static int iwl_pcie_free_bd_size(struct iwl_trans *trans, bool use_rx_td)
 static void iwl_pcie_free_rxq_dma(struct iwl_trans *trans,
 				  struct iwl_rxq *rxq)
 {
-	struct device *dev = trans->dev;
 	bool use_rx_td = (trans->trans_cfg->device_family >=
 			  IWL_DEVICE_FAMILY_AX210);
 	int free_size = iwl_pcie_free_bd_size(trans, use_rx_td);
@@ -685,21 +684,6 @@ static void iwl_pcie_free_rxq_dma(struct iwl_trans *trans,
 				  rxq->used_bd, rxq->used_bd_dma);
 	rxq->used_bd_dma = 0;
 	rxq->used_bd = NULL;
-
-	if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
-		return;
-
-	if (rxq->tr_tail)
-		dma_free_coherent(dev, sizeof(__le16),
-				  rxq->tr_tail, rxq->tr_tail_dma);
-	rxq->tr_tail_dma = 0;
-	rxq->tr_tail = NULL;
-
-	if (rxq->cr_tail)
-		dma_free_coherent(dev, sizeof(__le16),
-				  rxq->cr_tail, rxq->cr_tail_dma);
-	rxq->cr_tail_dma = 0;
-	rxq->cr_tail = NULL;
 }
 
 static int iwl_pcie_alloc_rxq_dma(struct iwl_trans *trans,
@@ -744,21 +728,6 @@ static int iwl_pcie_alloc_rxq_dma(struct iwl_trans *trans,
 	rxq->rb_stts_dma =
 		trans_pcie->base_rb_stts_dma + rxq->id * rb_stts_size;
 
-	if (!use_rx_td)
-		return 0;
-
-	/* Allocate the driver's pointer to TR tail */
-	rxq->tr_tail = dma_alloc_coherent(dev, sizeof(__le16),
-					  &rxq->tr_tail_dma, GFP_KERNEL);
-	if (!rxq->tr_tail)
-		goto err;
-
-	/* Allocate the driver's pointer to CR tail */
-	rxq->cr_tail = dma_alloc_coherent(dev, sizeof(__le16),
-					  &rxq->cr_tail_dma, GFP_KERNEL);
-	if (!rxq->cr_tail)
-		goto err;
-
 	return 0;
 
 err:
@@ -1590,9 +1559,6 @@ restart:
 out:
 	/* Backtrack one entry */
 	rxq->read = i;
-	/* update cr tail with the rxq read pointer */
-	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
-		*rxq->cr_tail = cpu_to_le16(r);
 	spin_unlock(&rxq->lock);
 
 	/*

From 54b4fda5a761f97b8918607dbb4cd3b9e711aab6 Mon Sep 17 00:00:00 2001
From: Abhishek Naik <abhishek.naik@intel.com>
Date: Thu, 17 Jun 2021 10:08:52 +0300
Subject: [PATCH 2516/3292] iwlwifi: mvm: Read acpi dsm to get unii4
 enable/disable bitmap.

Read the UNII4 setting from the ACPI table and use it in the
LARI_CONFIG_CHANGE_CMD accordingly.

This setting allows OEMs to enable or disable UNII4, bypassing the FW
defaults.

Signed-off-by: Abhishek Naik <abhishek.naik@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210617100544.223090c509c4.If03cb5393607ae494041b6187bcec134d6a1e06d@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h  |  1 +
 .../wireless/intel/iwlwifi/fw/api/nvm-reg.h   | 19 ++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c   | 20 ++++++++++++++++---
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index 9fe64476083d..b858e998999c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -78,6 +78,7 @@ enum iwl_dsm_funcs_rev_0 {
 	DSM_FUNC_DISABLE_SRD = 1,
 	DSM_FUNC_ENABLE_INDONESIA_5G2 = 2,
 	DSM_FUNC_11AX_ENABLEMENT = 6,
+	DSM_FUNC_ENABLE_UNII4_CHAN = 7
 };
 
 enum iwl_dsm_values_srd {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index dc8f2777e944..cf48c6fa8f65 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -452,6 +452,25 @@ struct iwl_lari_config_change_cmd_v3 {
 	__le32 oem_11ax_allow_bitmap;
 } __packed; /* LARI_CHANGE_CONF_CMD_S_VER_3 */
 
+/**
+ * struct iwl_lari_config_change_cmd_v4 - change LARI configuration
+ * @config_bitmap: Bitmap of the config commands. Each bit will trigger a
+ *     different predefined FW config operation.
+ * @oem_uhb_allow_bitmap: Bitmap of UHB enabled MCC sets.
+ * @oem_11ax_allow_bitmap: Bitmap of 11ax allowed MCCs. There are two bits
+ *     per country, one to indicate whether to override and the other to
+ *     indicate the value to use.
+ * @oem_unii4_allow_bitmap: Bitmap of unii4 allowed MCCs.There are two bits
+ *     per country, one to indicate whether to override and the other to
+ *     indicate allow/disallow unii4 channels.
+ */
+struct iwl_lari_config_change_cmd_v4 {
+	__le32 config_bitmap;
+	__le32 oem_uhb_allow_bitmap;
+	__le32 oem_11ax_allow_bitmap;
+	__le32 oem_unii4_allow_bitmap;
+} __packed; /* LARI_CHANGE_CONF_CMD_S_VER_4 */
+
 /**
  * struct iwl_pnvm_init_complete_ntfy - PNVM initialization complete
  * @status: PNVM image loading status
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 9f2a5dee59d8..38fd5886af2d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1141,7 +1141,7 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 {
 	int ret;
 	u32 value;
-	struct iwl_lari_config_change_cmd_v3 cmd = {};
+	struct iwl_lari_config_change_cmd_v4 cmd = {};
 
 	cmd.config_bitmap = iwl_acpi_get_lari_config_bitmap(&mvm->fwrt);
 
@@ -1151,12 +1151,22 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 		cmd.oem_11ax_allow_bitmap = cpu_to_le32(value);
 	/* apply more config masks here */
 
-	if (cmd.config_bitmap || cmd.oem_11ax_allow_bitmap) {
+	ret = iwl_acpi_get_dsm_u32((&mvm->fwrt)->dev, 0,
+				   DSM_FUNC_ENABLE_UNII4_CHAN,
+				   &iwl_guid, &value);
+	if (!ret)
+		cmd.oem_unii4_allow_bitmap = cpu_to_le32(value);
+
+	if (cmd.config_bitmap ||
+	    cmd.oem_11ax_allow_bitmap ||
+	    cmd.oem_unii4_allow_bitmap) {
 		size_t cmd_size;
 		u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw,
 						   REGULATORY_AND_NVM_GROUP,
 						   LARI_CONFIG_CHANGE, 1);
-		if (cmd_ver == 3)
+		if (cmd_ver == 4)
+			cmd_size = sizeof(struct iwl_lari_config_change_cmd_v4);
+		else if (cmd_ver == 3)
 			cmd_size = sizeof(struct iwl_lari_config_change_cmd_v3);
 		else if (cmd_ver == 2)
 			cmd_size = sizeof(struct iwl_lari_config_change_cmd_v2);
@@ -1167,6 +1177,10 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 				"sending LARI_CONFIG_CHANGE, config_bitmap=0x%x, oem_11ax_allow_bitmap=0x%x\n",
 				le32_to_cpu(cmd.config_bitmap),
 				le32_to_cpu(cmd.oem_11ax_allow_bitmap));
+		IWL_DEBUG_RADIO(mvm,
+				"sending LARI_CONFIG_CHANGE, oem_unii4_allow_bitmap=0x%x, cmd_ver=%d\n",
+				le32_to_cpu(cmd.oem_unii4_allow_bitmap),
+				cmd_ver);
 		ret = iwl_mvm_send_cmd_pdu(mvm,
 					   WIDE_ID(REGULATORY_AND_NVM_GROUP,
 						   LARI_CONFIG_CHANGE),

From 7b3954a1d69a992a781e71036950f9254f8147f6 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 18 Jun 2021 11:01:12 +0300
Subject: [PATCH 2517/3292] iwlwifi: mvm: Explicitly stop session protection
 before unbinding

In case of unbinding, the FW would remove the session protection time
events without sending a notification, so explicitly cancel the
session protection, so future requests for mgd_prepare_tx() would not
assume that the session protection is running.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.7c30f85ed241.Ibc19fdbefca7135f2c4ea83d0aef6b81b5033dcd@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c |  1 -
 .../wireless/intel/iwlwifi/mvm/time-event.c   | 41 ++++++++++++++-----
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 141d9fc299b0..bafff5f2c638 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -4223,7 +4223,6 @@ static void __iwl_mvm_unassign_vif_chanctx(struct iwl_mvm *mvm,
 	struct ieee80211_vif *disabled_vif = NULL;
 
 	lockdep_assert_held(&mvm->mutex);
-
 	iwl_mvm_remove_time_event(mvm, mvmvif, &mvmvif->time_event_data);
 
 	switch (vif->type) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index f19081a6f046..d3307a11fcac 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -31,6 +31,13 @@ void iwl_mvm_te_clear_data(struct iwl_mvm *mvm,
 		return;
 
 	list_del(&te_data->list);
+
+	/*
+	 * the list is only used for AUX ROC events so make sure it is always
+	 * initialized
+	 */
+	INIT_LIST_HEAD(&te_data->list);
+
 	te_data->running = false;
 	te_data->uid = 0;
 	te_data->id = TE_MAX;
@@ -609,14 +616,15 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm,
 }
 
 static void iwl_mvm_cancel_session_protection(struct iwl_mvm *mvm,
-					      struct iwl_mvm_vif *mvmvif)
+					      struct iwl_mvm_vif *mvmvif,
+					      u32 id)
 {
 	struct iwl_mvm_session_prot_cmd cmd = {
 		.id_and_color =
 			cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
 							mvmvif->color)),
 		.action = cpu_to_le32(FW_CTXT_ACTION_REMOVE),
-		.conf_id = cpu_to_le32(mvmvif->time_event_data.id),
+		.conf_id = cpu_to_le32(id),
 	};
 	int ret;
 
@@ -634,6 +642,12 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
 {
 	u32 id;
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
+	enum nl80211_iftype iftype;
+
+	if (!te_data->vif)
+		return false;
+
+	iftype = te_data->vif->type;
 
 	/*
 	 * It is possible that by the time we got to this point the time
@@ -658,8 +672,8 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
 			IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
 		if (mvmvif && id < SESSION_PROTECT_CONF_MAX_ID) {
 			/* Session protection is still ongoing. Cancel it */
-			iwl_mvm_cancel_session_protection(mvm, mvmvif);
-			if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
+			iwl_mvm_cancel_session_protection(mvm, mvmvif, id);
+			if (iftype == NL80211_IFTYPE_P2P_DEVICE) {
 				set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
 				iwl_mvm_roc_finished(mvm);
 			}
@@ -740,11 +754,6 @@ void iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
 		IWL_ERR(mvm, "Couldn't remove the time event\n");
 }
 
-/*
- * When the firmware supports the session protection API,
- * this is not needed since it'll automatically remove the
- * session protection after association + beacon reception.
- */
 void iwl_mvm_stop_session_protection(struct iwl_mvm *mvm,
 				     struct ieee80211_vif *vif)
 {
@@ -758,7 +767,15 @@ void iwl_mvm_stop_session_protection(struct iwl_mvm *mvm,
 	id = te_data->id;
 	spin_unlock_bh(&mvm->time_event_lock);
 
-	if (id != TE_BSS_STA_AGGRESSIVE_ASSOC) {
+	if (fw_has_capa(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
+		if (id != SESSION_PROTECT_CONF_ASSOC) {
+			IWL_DEBUG_TE(mvm,
+				     "don't remove session protection id=%u\n",
+				     id);
+			return;
+		}
+	} else if (id != TE_BSS_STA_AGGRESSIVE_ASSOC) {
 		IWL_DEBUG_TE(mvm,
 			     "don't remove TE with id=%u (not session protection)\n",
 			     id);
@@ -985,7 +1002,8 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 		mvmvif = iwl_mvm_vif_from_mac80211(vif);
 
 		if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
-			iwl_mvm_cancel_session_protection(mvm, mvmvif);
+			iwl_mvm_cancel_session_protection(mvm, mvmvif,
+							  mvmvif->time_event_data.id);
 			set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
 		} else {
 			iwl_mvm_remove_aux_roc_te(mvm, mvmvif,
@@ -1145,6 +1163,7 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm,
 
 	iwl_mvm_te_clear_data(mvm, te_data);
 	te_data->duration = le32_to_cpu(cmd.duration_tu);
+	te_data->vif = vif;
 	spin_unlock_bh(&mvm->time_event_lock);
 
 	IWL_DEBUG_TE(mvm, "Add new session protection, duration %d TU\n",

From b1c6cec04bbc1fe7e83cc7a1b054cc962feffb7e Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali.goldstein@intel.com>
Date: Fri, 18 Jun 2021 11:01:13 +0300
Subject: [PATCH 2518/3292] iwlwifi: mvm: don't request mac80211 to
 disable/enable sta's queues

When operating in AP mode with NICs supporting the AP_LINK_PS hw flag,
mac80211 doesn't need to start/stop queueing tx for connected stations
because the FW already handles that.

Signed-off-by: Naftali Goldstein <naftali.goldstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.64df994c8fbb.I0fa5cda3a5f893a396eef30a01522422be359e69@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index f618368eda83..9c45a64c5009 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -3794,8 +3794,12 @@ void iwl_mvm_sta_modify_disable_tx_ap(struct iwl_mvm *mvm,
 
 	mvm_sta->disable_tx = disable;
 
-	/* Tell mac80211 to start/stop queuing tx for this station */
-	ieee80211_sta_block_awake(mvm->hw, sta, disable);
+	/*
+	 * If sta PS state is handled by mac80211, tell it to start/stop
+	 * queuing tx for this station.
+	 */
+	if (!ieee80211_hw_check(mvm->hw, AP_LINK_PS))
+		ieee80211_sta_block_awake(mvm->hw, sta, disable);
 
 	iwl_mvm_sta_modify_disable_tx(mvm, mvm_sta, disable);
 

From 5b16565a7f9d82c6aa475ede72d62424b70f7726 Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali.goldstein@intel.com>
Date: Fri, 18 Jun 2021 11:01:14 +0300
Subject: [PATCH 2519/3292] iwlwifi: support ver 6 of WOWLAN_CONFIGURATION and
 ver 10 of WOWLAN_GET_STATUSES

These two version updates deprecate the need to set/get the nonqos sequence
counter during suspend/resume flow respectively; NICs supporting this
version maintain this counter internally and don't lose it during the
suspend/resume flow.

Note that this means that for such NICs the NON_QOS_TX_COUNTER_CMD is no
longer ever sent.

Signed-off-by: Naftali Goldstein <naftali.goldstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.dd25dd667798.I8db9adcdbb133304b58cf417f8698611138c83b4@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/d3.h    | 12 +++++----
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c   | 26 ++++++++++++-------
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index 6488c0f8b471..5373182c1364 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -339,9 +339,10 @@ enum iwl_wowlan_flags {
 };
 
 /**
- * struct iwl_wowlan_config_cmd - WoWLAN configuration
+ * struct iwl_wowlan_config_cmd - WoWLAN configuration (versions 5 and 6)
  * @wakeup_filter: filter from &enum iwl_wowlan_wakeup_filters
- * @non_qos_seq: non-QoS sequence counter to use next
+ * @non_qos_seq: non-QoS sequence counter to use next.
+ *               Reserved if the struct has version >= 6.
  * @qos_seq: QoS sequence counters to use next
  * @wowlan_ba_teardown_tids: bitmap of BA sessions to tear down
  * @is_11n_connection: indicates HT connection
@@ -604,12 +605,13 @@ struct iwl_wowlan_status_v7 {
 } __packed; /* WOWLAN_STATUSES_API_S_VER_7 */
 
 /**
- * struct iwl_wowlan_status_v9 - WoWLAN status (version 9)
+ * struct iwl_wowlan_status_v9 - WoWLAN status (versions 9 and 10)
  * @gtk: GTK data
  * @igtk: IGTK data
  * @replay_ctr: GTK rekey replay counter
  * @pattern_number: number of the matched pattern
- * @non_qos_seq_ctr: non-QoS sequence counter to use next
+ * @non_qos_seq_ctr: non-QoS sequence counter to use next.
+ *                   Reserved if the struct has version >= 10.
  * @qos_seq_ctr: QoS sequence counters to use next
  * @wakeup_reasons: wakeup reasons, see &enum iwl_wowlan_wakeup_reason
  * @num_of_gtk_rekeys: number of GTK rekeys
@@ -638,7 +640,7 @@ struct iwl_wowlan_status_v9 {
 	u8 tid_tear_down;
 	u8 reserved[3];
 	u8 wake_packet[]; /* can be truncated from _length to _bufsize */
-} __packed; /* WOWLAN_STATUSES_API_S_VER_9 */
+} __packed; /* WOWLAN_STATUSES_RSP_API_S_VER_9 */
 
 /**
  * struct iwl_wowlan_status - WoWLAN status
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 2e28cf299ef4..e86f0e949b86 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -636,7 +636,6 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm,
 			  struct ieee80211_vif *vif, struct iwl_mvm_vif *mvmvif,
 			  struct ieee80211_sta *ap_sta)
 {
-	int ret;
 	struct iwl_mvm_sta *mvm_ap_sta = iwl_mvm_sta_from_mac80211(ap_sta);
 
 	/* TODO: wowlan_config_cmd->wowlan_ba_teardown_tids */
@@ -646,12 +645,16 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm,
 	wowlan_config_cmd->flags = ENABLE_L3_FILTERING |
 		ENABLE_NBNS_FILTERING | ENABLE_DHCP_FILTERING;
 
-	/* Query the last used seqno and set it */
-	ret = iwl_mvm_get_last_nonqos_seq(mvm, vif);
-	if (ret < 0)
-		return ret;
+	if (iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+				  WOWLAN_CONFIGURATION, 0) < 6) {
+		/* Query the last used seqno and set it */
+		int ret = iwl_mvm_get_last_nonqos_seq(mvm, vif);
 
-	wowlan_config_cmd->non_qos_seq = cpu_to_le16(ret);
+		if (ret < 0)
+			return ret;
+
+		wowlan_config_cmd->non_qos_seq = cpu_to_le16(ret);
+	}
 
 	iwl_mvm_set_wowlan_qos_seq(mvm_ap_sta, wowlan_config_cmd);
 
@@ -1534,9 +1537,12 @@ static bool iwl_mvm_setup_connection_keep(struct iwl_mvm *mvm,
 	}
 
 out:
-	mvmvif->seqno_valid = true;
-	/* +0x10 because the set API expects next-to-use, not last-used */
-	mvmvif->seqno = le16_to_cpu(status->non_qos_seq_ctr) + 0x10;
+	if (iwl_fw_lookup_notif_ver(mvm->fw, LONG_GROUP,
+				    WOWLAN_GET_STATUSES, 0) < 10) {
+		mvmvif->seqno_valid = true;
+		/* +0x10 because the set API expects next-to-use, not last-used */
+		mvmvif->seqno = le16_to_cpu(status->non_qos_seq_ctr) + 0x10;
+	}
 
 	return true;
 }
@@ -1654,7 +1660,7 @@ struct iwl_wowlan_status *iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm)
 
 		status->gtk[0] = v7->gtk[0];
 		status->igtk[0] = v7->igtk[0];
-	} else if (notif_ver == 9) {
+	} else if (notif_ver == 9 || notif_ver == 10) {
 		struct iwl_wowlan_status_v9 *v9 = (void *)cmd.resp_pkt->data;
 
 		status = iwl_mvm_parse_wowlan_status_common_v9(mvm,

From d65ab7c0e0b92056754185d3f6925d7318730e94 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 18 Jun 2021 11:01:15 +0300
Subject: [PATCH 2520/3292] iwlwifi: mvm: support LONG_GROUP for
 WOWLAN_GET_STATUSES version

It's been a while that the firmware uses LONG_GROUP by default
and not LEGACY_GROUP.
Until now the firmware wrongly advertise the WOWLAN_GET_STATUS
command's version with LEGACY_GROUP, but it is now being fixed.
In order to support both firmwares, first try to get the version
number of the command with the LONG_GROUP and if the firmware
didn't advertise the command version with LONG_GROUP, try to get
the command version with LEGACY_GROUP.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.cd6f4e421430.Iec07c746c8e65bc267e4750f38e4f74f2010ca45@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index e86f0e949b86..6617fe5a7ece 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1614,8 +1614,11 @@ struct iwl_wowlan_status *iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm)
 	len = iwl_rx_packet_payload_len(cmd.resp_pkt);
 
 	/* default to 7 (when we have IWL_UCODE_TLV_API_WOWLAN_KEY_MATERIAL) */
-	notif_ver = iwl_fw_lookup_notif_ver(mvm->fw, LEGACY_GROUP,
-					    WOWLAN_GET_STATUSES, 7);
+	notif_ver = iwl_fw_lookup_notif_ver(mvm->fw, LONG_GROUP,
+					    WOWLAN_GET_STATUSES, 0);
+	if (!notif_ver)
+		notif_ver = iwl_fw_lookup_notif_ver(mvm->fw, LEGACY_GROUP,
+						    WOWLAN_GET_STATUSES, 7);
 
 	if (!fw_has_api(&mvm->fw->ucode_capa,
 			IWL_UCODE_TLV_API_WOWLAN_KEY_MATERIAL)) {

From 310f60f53a86eba680d9bc20a371e13b06a5f903 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 11:01:16 +0300
Subject: [PATCH 2521/3292] iwlwifi: pcie: free IML DMA memory allocation

In the case of gen3 devices with image loader (IML) support,
we were leaking the IML DMA allocation and never freeing it.
Fix that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.07e117dbedb7.I7bb9ebbe0617656986c2a598ea5e827b533bd3b9@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c  | 15 ++++++++++-----
 .../net/wireless/intel/iwlwifi/pcie/internal.h    |  3 +++
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index 49560e508b5e..c7b9ca264429 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -79,7 +79,6 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 	struct iwl_prph_scratch *prph_scratch;
 	struct iwl_prph_scratch_ctrl_cfg *prph_sc_ctrl;
 	struct iwl_prph_info *prph_info;
-	void *iml_img;
 	u32 control_flags = 0;
 	int ret;
 	int cmdq_size = max_t(u32, IWL_CMD_QUEUE_SIZE,
@@ -190,14 +189,15 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 	trans_pcie->prph_scratch = prph_scratch;
 
 	/* Allocate IML */
-	iml_img = dma_alloc_coherent(trans->dev, trans->iml_len,
-				     &trans_pcie->iml_dma_addr, GFP_KERNEL);
-	if (!iml_img) {
+	trans_pcie->iml = dma_alloc_coherent(trans->dev, trans->iml_len,
+					     &trans_pcie->iml_dma_addr,
+					     GFP_KERNEL);
+	if (!trans_pcie->iml) {
 		ret = -ENOMEM;
 		goto err_free_ctxt_info;
 	}
 
-	memcpy(iml_img, trans->iml, trans->iml_len);
+	memcpy(trans_pcie->iml, trans->iml, trans->iml_len);
 
 	iwl_enable_fw_load_int_ctx_info(trans);
 
@@ -244,6 +244,11 @@ void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans)
 	trans_pcie->ctxt_info_dma_addr = 0;
 	trans_pcie->ctxt_info_gen3 = NULL;
 
+	dma_free_coherent(trans->dev, trans->iml_len, trans_pcie->iml,
+			  trans_pcie->iml_dma_addr);
+	trans_pcie->iml_dma_addr = 0;
+	trans_pcie->iml = NULL;
+
 	iwl_pcie_ctxt_info_free_fw_img(trans);
 
 	dma_free_coherent(trans->dev, sizeof(*trans_pcie->prph_scratch),
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 292b972a25db..69289e9f8d7e 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -271,6 +271,8 @@ struct cont_rec {
  *	Context information addresses will be taken from here.
  *	This is driver's local copy for keeping track of size and
  *	count for allocating and freeing the memory.
+ * @iml: image loader image virtual address
+ * @iml_dma_addr: image loader image DMA address
  * @trans: pointer to the generic transport area
  * @scd_base_addr: scheduler sram base address in SRAM
  * @kw: keep warm address
@@ -322,6 +324,7 @@ struct iwl_trans_pcie {
 	};
 	struct iwl_prph_info *prph_info;
 	struct iwl_prph_scratch *prph_scratch;
+	void *iml;
 	dma_addr_t ctxt_info_dma_addr;
 	dma_addr_t prph_info_dma_addr;
 	dma_addr_t prph_scratch_dma_addr;

From 26d18c75a7496c4c52b0b6789e713dc76ebfbc87 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 11:01:17 +0300
Subject: [PATCH 2522/3292] iwlwifi: pcie: fix context info freeing

After firmware alive, iwl_trans_pcie_gen2_fw_alive() is called
to free the context info. However, on gen3 that will then free
the context info with the wrong size.

Since we free this allocation later, let it stick around until
the device is stopped for now, freeing some of it earlier is a
separate change.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.afb63fb8cbc1.If4968db8e09f4ce2a1d27a6d750bca3d132d7d70@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 56162c4500d7..93b957866beb 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -323,7 +323,8 @@ void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 	/* now that we got alive we can free the fw image & the context info.
 	 * paging memory cannot be freed included since FW will still use it
 	 */
-	iwl_pcie_ctxt_info_free(trans);
+	if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
+		iwl_pcie_ctxt_info_free(trans);
 
 	/*
 	 * Re-enable all the interrupts, including the RF-Kill one, now that

From fa331068a591d9df5f345173c0c9c44234b61569 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 11:01:18 +0300
Subject: [PATCH 2523/3292] iwlwifi: mvm: fill phy_data.d1 for no-data RX

We don't fill in phy_data.d1 in no-data RX, and thus we
pretend some data is actually filled in radiotap when it
isn't or has default (zero) values.

Fill in phy_data.d1 appropriately, and while at it also
move the info_type initialization into the initializer.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.3d488885f77c.Ib97a2bc57c1e9fb98927dc6f802568db313abe3b@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 8e26422ca326..c0babb8d5b5c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
  */
@@ -2001,8 +2001,10 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
 	struct sk_buff *skb;
 	u8 channel, energy_a, energy_b;
 	struct iwl_mvm_rx_phy_data phy_data = {
+		.info_type = le32_get_bits(desc->phy_info[1],
+					   IWL_RX_PHY_DATA1_INFO_TYPE_MASK),
 		.d0 = desc->phy_info[0],
-		.info_type = IWL_RX_PHY_INFO_TYPE_NONE,
+		.d1 = desc->phy_info[1],
 	};
 
 	if (unlikely(iwl_rx_packet_payload_len(pkt) < sizeof(*desc)))
@@ -2015,10 +2017,6 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
 	energy_b = (rssi & RX_NO_DATA_CHAIN_B_MSK) >> RX_NO_DATA_CHAIN_B_POS;
 	channel = (rssi & RX_NO_DATA_CHANNEL_MSK) >> RX_NO_DATA_CHANNEL_POS;
 
-	phy_data.info_type =
-		le32_get_bits(desc->phy_info[1],
-			      IWL_RX_PHY_DATA1_INFO_TYPE_MASK);
-
 	/* Dont use dev_alloc_skb(), we'll have enough headroom once
 	 * ieee80211_hdr pulled.
 	 */

From 947689756352af9bd0486c1a19fffc7837ae0335 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 11:01:19 +0300
Subject: [PATCH 2524/3292] iwlwifi: pcie: free some DMA memory earlier

In gen3, after firmware is alive, we no longer need the
firmware and image loader images, only the context info
itself and PRPH info/scratch need to remain.

Call iwl_pcie_ctxt_info_gen3_free() appropriately in the
alive callback (iwl_trans_pcie_gen2_fw_alive()) with a new
argument indicating whether it can free everything or only
partially.

The context info and PRPH scratch are also not needed after
PNVM load, but we don't have a good hook for freeing after
that, so keep them for now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.8230d91a46c1.Ia7db71e5e6265ca87363f1481eac1bc3bbebb15c@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../intel/iwlwifi/iwl-context-info-gen3.h     |  4 ++--
 .../intel/iwlwifi/pcie/ctxt-info-gen3.c       | 23 ++++++++++++-------
 .../wireless/intel/iwlwifi/pcie/trans-gen2.c  |  6 +++--
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
index 2be605cc6fbf..518a1bc79584 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2018, 2020 Intel Corporation
+ * Copyright (C) 2018, 2020-2021 Intel Corporation
  */
 #ifndef __iwl_context_info_file_gen3_h__
 #define __iwl_context_info_file_gen3_h__
@@ -245,7 +245,7 @@ struct iwl_context_info_gen3 {
 
 int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 				 const struct fw_img *fw);
-void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans);
+void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans, bool alive);
 
 int iwl_trans_pcie_ctx_info_gen3_set_pnvm(struct iwl_trans *trans,
 					  const void *data, u32 len);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index c7b9ca264429..c69a1541e678 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -231,32 +231,39 @@ err_free_prph_scratch:
 
 }
 
-void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans)
+void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans, bool alive)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
+	if (trans_pcie->iml) {
+		dma_free_coherent(trans->dev, trans->iml_len, trans_pcie->iml,
+				  trans_pcie->iml_dma_addr);
+		trans_pcie->iml_dma_addr = 0;
+		trans_pcie->iml = NULL;
+	}
+
+	iwl_pcie_ctxt_info_free_fw_img(trans);
+
+	if (alive)
+		return;
+
 	if (!trans_pcie->ctxt_info_gen3)
 		return;
 
+	/* ctxt_info_gen3 and prph_scratch are still needed for PNVM load */
 	dma_free_coherent(trans->dev, sizeof(*trans_pcie->ctxt_info_gen3),
 			  trans_pcie->ctxt_info_gen3,
 			  trans_pcie->ctxt_info_dma_addr);
 	trans_pcie->ctxt_info_dma_addr = 0;
 	trans_pcie->ctxt_info_gen3 = NULL;
 
-	dma_free_coherent(trans->dev, trans->iml_len, trans_pcie->iml,
-			  trans_pcie->iml_dma_addr);
-	trans_pcie->iml_dma_addr = 0;
-	trans_pcie->iml = NULL;
-
-	iwl_pcie_ctxt_info_free_fw_img(trans);
-
 	dma_free_coherent(trans->dev, sizeof(*trans_pcie->prph_scratch),
 			  trans_pcie->prph_scratch,
 			  trans_pcie->prph_scratch_dma_addr);
 	trans_pcie->prph_scratch_dma_addr = 0;
 	trans_pcie->prph_scratch = NULL;
 
+	/* this is needed for the entire lifetime */
 	dma_free_coherent(trans->dev, PAGE_SIZE, trans_pcie->prph_info,
 			  trans_pcie->prph_info_dma_addr);
 	trans_pcie->prph_info_dma_addr = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 93b957866beb..a34009357227 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -149,7 +149,7 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
 
 	iwl_pcie_ctxt_info_free_paging(trans);
 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
-		iwl_pcie_ctxt_info_gen3_free(trans);
+		iwl_pcie_ctxt_info_gen3_free(trans, false);
 	else
 		iwl_pcie_ctxt_info_free(trans);
 
@@ -323,7 +323,9 @@ void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 	/* now that we got alive we can free the fw image & the context info.
 	 * paging memory cannot be freed included since FW will still use it
 	 */
-	if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
+	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+		iwl_pcie_ctxt_info_gen3_free(trans, true);
+	else
 		iwl_pcie_ctxt_info_free(trans);
 
 	/*

From 12236e9af903f7a36f24d24a9b70ba8f8e2859e4 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 18 Jun 2021 11:01:20 +0300
Subject: [PATCH 2525/3292] iwlwifi: fix NUM_IWL_UCODE_TLV_* definitions to
 avoid sparse errors

We were assigning these macros manually when sparse is running, but
with newer versions of sparse, it started causing other warnings.  Fix
it by making it a macro when sparse is running.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.dc658639e07f.I69ab6d59ff10c55c8517621eb20a52194dc4783a@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/file.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index d189e5de478b..ef1a24504c8b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -277,10 +277,11 @@ enum iwl_ucode_tlv_api {
 	IWL_UCODE_TLV_API_BAND_IN_RX_DATA	= (__force iwl_ucode_tlv_api_t)59,
 
 
-	NUM_IWL_UCODE_TLV_API
 #ifdef __CHECKER__
-		/* sparse says it cannot increment the previous enum member */
-		= 128
+	/* sparse says it cannot increment the previous enum member */
+#define NUM_IWL_UCODE_TLV_API 128
+#else
+	NUM_IWL_UCODE_TLV_API
 #endif
 };
 
@@ -447,10 +448,11 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_BIGTK_SUPPORT		= (__force iwl_ucode_tlv_capa_t)100,
 	IWL_UCODE_TLV_CAPA_RFIM_SUPPORT			= (__force iwl_ucode_tlv_capa_t)102,
 
-	NUM_IWL_UCODE_TLV_CAPA
 #ifdef __CHECKER__
-		/* sparse says it cannot increment the previous enum member */
-		= 128
+	/* sparse says it cannot increment the previous enum member */
+#define NUM_IWL_UCODE_TLV_CAPA 128
+#else
+	NUM_IWL_UCODE_TLV_CAPA
 #endif
 };
 

From b60bc716ba26319205d570406187fd941a96bdf3 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 18 Jun 2021 11:01:21 +0300
Subject: [PATCH 2526/3292] iwlwifi: mvm: introduce iwl_proto_offload_cmd_v4

We need to pass the station id to tell the firmware
on which station we want to configure the protocol
offload.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618105614.c25913d2c08c.Ic0fefac81afb9a2fe396d73528e30e09a8c5eae0@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/d3.h    | 16 ++++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c   | 11 +++++---
 .../wireless/intel/iwlwifi/mvm/offloading.c   | 26 ++++++++++++++-----
 3 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index 5373182c1364..a9e8f30ef91d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -159,6 +159,22 @@ struct iwl_proto_offload_cmd_v3_large {
 	struct iwl_ns_config ns_config[IWL_PROTO_OFFLOAD_NUM_NS_CONFIG_V3L];
 } __packed; /* PROT_OFFLOAD_CONFIG_CMD_DB_S_VER_3 */
 
+/**
+ * struct iwl_proto_offload_cmd_v4 - ARP/NS offload configuration
+ * @sta_id: station id
+ * @common: common/IPv4 configuration
+ * @num_valid_ipv6_addrs: number of valid IPv6 addresses
+ * @targ_addrs: target IPv6 addresses
+ * @ns_config: NS offload configurations
+ */
+struct iwl_proto_offload_cmd_v4 {
+	__le32 sta_id;
+	struct iwl_proto_offload_cmd_common common;
+	__le32 num_valid_ipv6_addrs;
+	struct iwl_targ_addr targ_addrs[IWL_PROTO_OFFLOAD_NUM_IPV6_ADDRS_V3L];
+	struct iwl_ns_config ns_config[IWL_PROTO_OFFLOAD_NUM_NS_CONFIG_V3L];
+} __packed; /* PROT_OFFLOAD_CONFIG_CMD_DB_S_VER_4 */
+
 /*
  * WOWLAN_PATTERNS
  */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 6617fe5a7ece..7b13c4fc1b58 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1693,10 +1693,13 @@ iwl_mvm_get_wakeup_status(struct iwl_mvm *mvm)
 {
 	int ret;
 
-	/* only for tracing for now */
-	ret = iwl_mvm_send_cmd_pdu(mvm, OFFLOADS_QUERY_CMD, 0, 0, NULL);
-	if (ret)
-		IWL_ERR(mvm, "failed to query offload statistics (%d)\n", ret);
+	if (!mvm->net_detect) {
+		/* only for tracing for now */
+		int ret = iwl_mvm_send_cmd_pdu(mvm, OFFLOADS_QUERY_CMD, 0,
+					       0, NULL);
+		if (ret)
+			IWL_ERR(mvm, "failed to query offload statistics (%d)\n", ret);
+	}
 
 	return iwl_mvm_send_wowlan_get_status(mvm);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/offloading.c b/drivers/net/wireless/intel/iwlwifi/mvm/offloading.c
index 1cc90e61367b..41880517e8bb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/offloading.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/offloading.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014 Intel Corporation
+ * Copyright (C) 2012-2014, 2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015 Intel Deutschland GmbH
  */
@@ -36,7 +36,7 @@ int iwl_mvm_send_proto_offload(struct iwl_mvm *mvm,
 		struct iwl_proto_offload_cmd_v1 v1;
 		struct iwl_proto_offload_cmd_v2 v2;
 		struct iwl_proto_offload_cmd_v3_small v3s;
-		struct iwl_proto_offload_cmd_v3_large v3l;
+		struct iwl_proto_offload_cmd_v4 v4;
 	} cmd = {};
 	struct iwl_host_cmd hcmd = {
 		.id = PROT_OFFLOAD_CONFIG_CMD,
@@ -47,6 +47,9 @@ int iwl_mvm_send_proto_offload(struct iwl_mvm *mvm,
 	struct iwl_proto_offload_cmd_common *common;
 	u32 enabled = 0, size;
 	u32 capa_flags = mvm->fw->ucode_capa.flags;
+	int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+					PROT_OFFLOAD_CONFIG_CMD, 0);
+
 #if IS_ENABLED(CONFIG_IPV6)
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	int i;
@@ -72,9 +75,9 @@ int iwl_mvm_send_proto_offload(struct iwl_mvm *mvm,
 			addrs = cmd.v3s.targ_addrs;
 			n_addrs = IWL_PROTO_OFFLOAD_NUM_IPV6_ADDRS_V3S;
 		} else {
-			nsc = cmd.v3l.ns_config;
+			nsc = cmd.v4.ns_config;
 			n_nsc = IWL_PROTO_OFFLOAD_NUM_NS_CONFIG_V3L;
-			addrs = cmd.v3l.targ_addrs;
+			addrs = cmd.v4.targ_addrs;
 			n_addrs = IWL_PROTO_OFFLOAD_NUM_IPV6_ADDRS_V3L;
 		}
 
@@ -116,7 +119,7 @@ int iwl_mvm_send_proto_offload(struct iwl_mvm *mvm,
 			cmd.v3s.num_valid_ipv6_addrs =
 				cpu_to_le32(i - num_skipped);
 		else
-			cmd.v3l.num_valid_ipv6_addrs =
+			cmd.v4.num_valid_ipv6_addrs =
 				cpu_to_le32(i - num_skipped);
 	} else if (capa_flags & IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS) {
 		bool found = false;
@@ -171,8 +174,17 @@ int iwl_mvm_send_proto_offload(struct iwl_mvm *mvm,
 		common = &cmd.v3s.common;
 		size = sizeof(cmd.v3s);
 	} else if (capa_flags & IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE) {
-		common = &cmd.v3l.common;
-		size = sizeof(cmd.v3l);
+		common = &cmd.v4.common;
+		size = sizeof(cmd.v4);
+		if (ver < 4) {
+			/*
+			 * This basically uses iwl_proto_offload_cmd_v3_large
+			 * which doesn't have the sta_id parameter before the
+			 * common part.
+			 */
+			size -= sizeof(cmd.v4.sta_id);
+			hcmd.data[0] = common;
+		}
 	} else if (capa_flags & IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS) {
 		common = &cmd.v2.common;
 		size = sizeof(cmd.v2);

From 0b35991a80762773078aa8ba044baf485b293e45 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 21 Jun 2021 10:37:32 +0300
Subject: [PATCH 2527/3292] iwlwifi: mvm: update iwl_wowlan_patterns_cmd

We need to pass the station id to tell the firmware
on which station we want to configure the patterns.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.73eceb822890.I37347afbc01497a8a9e4d4afe4fa9a965abd31ac@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/api/d3.h | 14 ++++++++++++--
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c    | 13 ++++++++++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index a9e8f30ef91d..4dbf24128a98 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -318,13 +318,23 @@ struct iwl_wowlan_patterns_cmd {
 	/**
 	 * @n_patterns: number of patterns
 	 */
-	__le32 n_patterns;
+	u8 n_patterns;
+
+	/**
+	 * @n_patterns: sta_id
+	 */
+	u8 sta_id;
+
+	/**
+	 * @reserved: reserved for alignment
+	 */
+	__le16 reserved;
 
 	/**
 	 * @patterns: the patterns, array length in @n_patterns
 	 */
 	struct iwl_wowlan_pattern_v2 patterns[];
-} __packed; /* WOWLAN_PATTERN_ARRAY_API_S_VER_2 */
+} __packed; /* WOWLAN_PATTERN_ARRAY_API_S_VER_3 */
 
 enum iwl_wowlan_wakeup_filters {
 	IWL_WOWLAN_WAKEUP_MAGIC_PACKET			= BIT(0),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 7b13c4fc1b58..8e5814a3b178 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -393,14 +393,19 @@ static int iwl_mvm_send_patterns_v1(struct iwl_mvm *mvm,
 }
 
 static int iwl_mvm_send_patterns(struct iwl_mvm *mvm,
+				 struct ieee80211_vif *vif,
 				 struct cfg80211_wowlan *wowlan)
 {
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_wowlan_patterns_cmd *pattern_cmd;
 	struct iwl_host_cmd cmd = {
 		.id = WOWLAN_PATTERNS,
 		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
 	};
 	int i, err;
+	int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+					WOWLAN_PATTERNS,
+					IWL_FW_CMD_VER_UNKNOWN);
 
 	if (!wowlan->n_patterns)
 		return 0;
@@ -408,11 +413,13 @@ static int iwl_mvm_send_patterns(struct iwl_mvm *mvm,
 	cmd.len[0] = sizeof(*pattern_cmd) +
 		wowlan->n_patterns * sizeof(struct iwl_wowlan_pattern_v2);
 
-	pattern_cmd = kmalloc(cmd.len[0], GFP_KERNEL);
+	pattern_cmd = kzalloc(cmd.len[0], GFP_KERNEL);
 	if (!pattern_cmd)
 		return -ENOMEM;
 
-	pattern_cmd->n_patterns = cpu_to_le32(wowlan->n_patterns);
+	pattern_cmd->n_patterns = wowlan->n_patterns;
+	if (ver >= 3)
+		pattern_cmd->sta_id = mvmvif->ap_sta_id;
 
 	for (i = 0; i < wowlan->n_patterns; i++) {
 		int mask_len = DIV_ROUND_UP(wowlan->patterns[i].pattern_len, 8);
@@ -887,7 +894,7 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm,
 
 	if (fw_has_api(&mvm->fw->ucode_capa,
 		       IWL_UCODE_TLV_API_WOWLAN_TCP_SYN_WAKE))
-		ret = iwl_mvm_send_patterns(mvm, wowlan);
+		ret = iwl_mvm_send_patterns(mvm, vif, wowlan);
 	else
 		ret = iwl_mvm_send_patterns_v1(mvm, wowlan);
 	if (ret)

From 80e6711919d4a13d00dfed185d850316b7f993ce Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 21 Jun 2021 10:37:33 +0300
Subject: [PATCH 2528/3292] iwlwifi: mvm: introduce
 iwl_wowlan_kek_kck_material_cmd_v4

We need to pass the station id to teach the firmware on which
station id we want to configure the key material.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.13417410e9ea.I140c16e70f8ac91cec7e8189e182e2f672c39258@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/d3.h    | 13 ++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c   | 30 +++++++++++++------
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index 4dbf24128a98..ea2bd34e32a3 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -483,6 +483,19 @@ struct iwl_wowlan_kek_kck_material_cmd_v3 {
 	__le32  bigtk_cipher;
 } __packed; /* KEK_KCK_MATERIAL_API_S_VER_3 */
 
+struct iwl_wowlan_kek_kck_material_cmd_v4 {
+	__le32  sta_id;
+	u8	kck[IWL_KCK_MAX_SIZE];
+	u8	kek[IWL_KEK_MAX_SIZE];
+	__le16	kck_len;
+	__le16	kek_len;
+	__le64	replay_ctr;
+	__le32  akm;
+	__le32  gtk_cipher;
+	__le32  igtk_cipher;
+	__le32  bigtk_cipher;
+} __packed; /* KEK_KCK_MATERIAL_API_S_VER_4 */
+
 #define RF_KILL_INDICATOR_FOR_WOWLAN	0x87
 
 enum iwl_wowlan_rekey_status {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 8e5814a3b178..0777a709740b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -104,7 +104,7 @@ static const u8 *iwl_mvm_find_max_pn(struct ieee80211_key_conf *key,
 struct wowlan_key_data {
 	struct iwl_wowlan_rsc_tsc_params_cmd *rsc_tsc;
 	struct iwl_wowlan_tkip_params_cmd *tkip;
-	struct iwl_wowlan_kek_kck_material_cmd_v3 *kek_kck_cmd;
+	struct iwl_wowlan_kek_kck_material_cmd_v4 *kek_kck_cmd;
 	bool error, use_rsc_tsc, use_tkip, configure_keys;
 	int wep_key_idx;
 };
@@ -716,7 +716,8 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
 					    struct ieee80211_vif *vif,
 					    u32 cmd_flags)
 {
-	struct iwl_wowlan_kek_kck_material_cmd_v3 kek_kck_cmd = {};
+	struct iwl_wowlan_kek_kck_material_cmd_v4 kek_kck_cmd = {};
+	struct iwl_wowlan_kek_kck_material_cmd_v4 *_kek_kck_cmd = &kek_kck_cmd;
 	struct iwl_wowlan_tkip_params_cmd tkip_cmd = {};
 	bool unified = fw_has_capa(&mvm->fw->ucode_capa,
 				   IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
@@ -725,7 +726,7 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
 		.use_rsc_tsc = false,
 		.tkip = &tkip_cmd,
 		.use_tkip = false,
-		.kek_kck_cmd = &kek_kck_cmd,
+		.kek_kck_cmd = _kek_kck_cmd,
 	};
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	int ret;
@@ -819,13 +820,9 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
 						IWL_ALWAYS_LONG_GROUP,
 						WOWLAN_KEK_KCK_MATERIAL,
 						IWL_FW_CMD_VER_UNKNOWN);
-		if (WARN_ON(cmd_ver != 2 && cmd_ver != 3 &&
+		if (WARN_ON(cmd_ver != 2 && cmd_ver != 3 && cmd_ver != 4 &&
 			    cmd_ver != IWL_FW_CMD_VER_UNKNOWN))
 			return -EINVAL;
-		if (cmd_ver == 3)
-			cmd_size = sizeof(struct iwl_wowlan_kek_kck_material_cmd_v3);
-		else
-			cmd_size = sizeof(struct iwl_wowlan_kek_kck_material_cmd_v2);
 
 		memcpy(kek_kck_cmd.kck, mvmvif->rekey_data.kck,
 		       mvmvif->rekey_data.kck_len);
@@ -835,6 +832,21 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
 		kek_kck_cmd.kek_len = cpu_to_le16(mvmvif->rekey_data.kek_len);
 		kek_kck_cmd.replay_ctr = mvmvif->rekey_data.replay_ctr;
 		kek_kck_cmd.akm = cpu_to_le32(mvmvif->rekey_data.akm);
+		kek_kck_cmd.sta_id = cpu_to_le32(mvmvif->ap_sta_id);
+
+		if (cmd_ver == 4) {
+			cmd_size = sizeof(struct iwl_wowlan_kek_kck_material_cmd_v4);
+		} else {
+			if (cmd_ver == 3)
+				cmd_size =
+					sizeof(struct iwl_wowlan_kek_kck_material_cmd_v3);
+			else
+				cmd_size =
+					sizeof(struct iwl_wowlan_kek_kck_material_cmd_v2);
+			/* skip the sta_id at the beginning */
+			_kek_kck_cmd = (void *)
+				((u8 *)_kek_kck_cmd) + sizeof(kek_kck_cmd.sta_id);
+		}
 
 		IWL_DEBUG_WOWLAN(mvm, "setting akm %d\n",
 				 mvmvif->rekey_data.akm);
@@ -842,7 +854,7 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
 		ret = iwl_mvm_send_cmd_pdu(mvm,
 					   WOWLAN_KEK_KCK_MATERIAL, cmd_flags,
 					   cmd_size,
-					   &kek_kck_cmd);
+					   _kek_kck_cmd);
 		if (ret)
 			goto out;
 	}

From 5c157941cda00e9a1127a7a909177900f9195e19 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 21 Jun 2021 10:37:34 +0300
Subject: [PATCH 2529/3292] iwlwifi: mvm: introduce iwl_wowlan_get_status_cmd

We need to pass the station id to teach the firmware on which
station id we want to get the status.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.45218d913d07.I61a086936508230d86b454636945ceb0b9ea09fd@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/d3.h    |  4 +++
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c   | 31 ++++++++++++++-----
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  |  1 -
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index ea2bd34e32a3..b2e7ef3ddc88 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -496,6 +496,10 @@ struct iwl_wowlan_kek_kck_material_cmd_v4 {
 	__le32  bigtk_cipher;
 } __packed; /* KEK_KCK_MATERIAL_API_S_VER_4 */
 
+struct iwl_wowlan_get_status_cmd {
+	__le32  sta_id;
+} __packed; /* WOWLAN_GET_STATUSES_CMD_API_S_VER_1 */
+
 #define RF_KILL_INDICATOR_FOR_WOWLAN	0x87
 
 enum iwl_wowlan_rekey_status {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 0777a709740b..6a259d867d90 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1612,15 +1612,27 @@ iwl_mvm_parse_wowlan_status_common(v6)
 iwl_mvm_parse_wowlan_status_common(v7)
 iwl_mvm_parse_wowlan_status_common(v9)
 
-struct iwl_wowlan_status *iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm)
+static struct iwl_wowlan_status *
+iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm, u8 sta_id)
 {
 	struct iwl_wowlan_status *status;
+	struct iwl_wowlan_get_status_cmd get_status_cmd = {
+		.sta_id = cpu_to_le32(sta_id),
+	};
 	struct iwl_host_cmd cmd = {
 		.id = WOWLAN_GET_STATUSES,
 		.flags = CMD_WANT_SKB,
+		.data = { &get_status_cmd, },
+		.len = { sizeof(get_status_cmd), },
 	};
 	int ret, len;
 	u8 notif_ver;
+	u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+					   WOWLAN_GET_STATUSES,
+					   IWL_FW_CMD_VER_UNKNOWN);
+
+	if (cmd_ver == IWL_FW_CMD_VER_UNKNOWN)
+		cmd.len[0] = 0;
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -1708,32 +1720,37 @@ out_free_resp:
 }
 
 static struct iwl_wowlan_status *
-iwl_mvm_get_wakeup_status(struct iwl_mvm *mvm)
+iwl_mvm_get_wakeup_status(struct iwl_mvm *mvm, u8 sta_id)
 {
-	int ret;
+	u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+					   OFFLOADS_QUERY_CMD,
+					   IWL_FW_CMD_VER_UNKNOWN);
+	__le32 station_id = cpu_to_le32(sta_id);
+	u32 cmd_size = cmd_ver != IWL_FW_CMD_VER_UNKNOWN ? sizeof(station_id) : 0;
 
 	if (!mvm->net_detect) {
 		/* only for tracing for now */
 		int ret = iwl_mvm_send_cmd_pdu(mvm, OFFLOADS_QUERY_CMD, 0,
-					       0, NULL);
+					       cmd_size, &station_id);
 		if (ret)
 			IWL_ERR(mvm, "failed to query offload statistics (%d)\n", ret);
 	}
 
-	return iwl_mvm_send_wowlan_get_status(mvm);
+	return iwl_mvm_send_wowlan_get_status(mvm, sta_id);
 }
 
 /* releases the MVM mutex */
 static bool iwl_mvm_query_wakeup_reasons(struct iwl_mvm *mvm,
 					 struct ieee80211_vif *vif)
 {
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_wowlan_status_data status;
 	struct iwl_wowlan_status *fw_status;
 	int i;
 	bool keep;
 	struct iwl_mvm_sta *mvm_ap_sta;
 
-	fw_status = iwl_mvm_get_wakeup_status(mvm);
+	fw_status = iwl_mvm_get_wakeup_status(mvm, mvmvif->ap_sta_id);
 	if (IS_ERR_OR_NULL(fw_status))
 		goto out_unlock;
 
@@ -1911,7 +1928,7 @@ static void iwl_mvm_query_netdetect_reasons(struct iwl_mvm *mvm,
 	u32 reasons = 0;
 	int i, n_matches, ret;
 
-	fw_status = iwl_mvm_get_wakeup_status(mvm);
+	fw_status = iwl_mvm_get_wakeup_status(mvm, IWL_MVM_INVALID_STA);
 	if (!IS_ERR_OR_NULL(fw_status)) {
 		reasons = le32_to_cpu(fw_status->wakeup_reasons);
 		kfree(fw_status);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index d89c73ae2848..bf99eed23a9f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1775,7 +1775,6 @@ void iwl_mvm_ipv6_addr_change(struct ieee80211_hw *hw,
 void iwl_mvm_set_default_unicast_key(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif, int idx);
 extern const struct file_operations iwl_dbgfs_d3_test_ops;
-struct iwl_wowlan_status *iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm);
 #ifdef CONFIG_PM
 void iwl_mvm_set_last_nonqos_seq(struct iwl_mvm *mvm,
 				 struct ieee80211_vif *vif);

From 84c3c9952afbf7df39937095aa0ad70b58703e91 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Mon, 21 Jun 2021 10:37:35 +0300
Subject: [PATCH 2530/3292] iwlwifi: move UEFI code to a separate file

We are going to read more variables from UEFI, so it's cleaner to have
all the code that handles UEFI variables in a separate file.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.c705ac86f2e9.Ia7421c17fe52929e4098b4f0cf070809ed3ef906@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/Makefile  |   1 +
 drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 105 +++++--------------
 drivers/net/wireless/intel/iwlwifi/fw/uefi.c |  64 +++++++++++
 drivers/net/wireless/intel/iwlwifi/fw/uefi.h |  25 +++++
 4 files changed, 114 insertions(+), 81 deletions(-)
 create mode 100644 drivers/net/wireless/intel/iwlwifi/fw/uefi.c
 create mode 100644 drivers/net/wireless/intel/iwlwifi/fw/uefi.h

diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile
index 14b0db28143b..a8428c27286c 100644
--- a/drivers/net/wireless/intel/iwlwifi/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/Makefile
@@ -19,6 +19,7 @@ iwlwifi-objs		+= fw/img.o fw/notif-wait.o
 iwlwifi-objs		+= fw/dbg.o fw/pnvm.o
 iwlwifi-$(CONFIG_IWLMVM) += fw/paging.o fw/smem.o fw/init.o
 iwlwifi-$(CONFIG_ACPI) += fw/acpi.o
+iwlwifi-$(CONFIG_EFI)	+= fw/uefi.o
 iwlwifi-$(CONFIG_IWLWIFI_DEBUGFS) += fw/debugfs.o
 
 iwlwifi-objs += $(iwlwifi-m)
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
index 40f2109a097f..565c19475155 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
@@ -10,13 +10,22 @@
 #include "fw/api/commands.h"
 #include "fw/api/nvm-reg.h"
 #include "fw/api/alive.h"
-#include <linux/efi.h>
+#include "fw/uefi.h"
 
 struct iwl_pnvm_section {
 	__le32 offset;
 	const u8 data[];
 } __packed;
 
+struct pnvm_sku_package {
+	u8 rev;
+	u8 reserved1[3];
+	u32 total_size;
+	u8 n_skus;
+	u8 reserved2[11];
+	u8 data[];
+};
+
 static bool iwl_pnvm_complete_fn(struct iwl_notif_wait_data *notif_wait,
 				 struct iwl_rx_packet *pkt, void *data)
 {
@@ -220,83 +229,6 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data,
 	return -ENOENT;
 }
 
-#if defined(CONFIG_EFI)
-
-#define IWL_EFI_VAR_GUID EFI_GUID(0x92daaf2f, 0xc02b, 0x455b,	\
-				  0xb2, 0xec, 0xf5, 0xa3,	\
-				  0x59, 0x4f, 0x4a, 0xea)
-
-#define IWL_UEFI_OEM_PNVM_NAME	L"UefiCnvWlanOemSignedPnvm"
-
-#define IWL_HARDCODED_PNVM_SIZE 4096
-
-struct pnvm_sku_package {
-	u8 rev;
-	u8 reserved1[3];
-	u32 total_size;
-	u8 n_skus;
-	u8 reserved2[11];
-	u8 data[];
-};
-
-static int iwl_pnvm_get_from_efi(struct iwl_trans *trans,
-				 u8 **data, size_t *len)
-{
-	struct efivar_entry *pnvm_efivar;
-	struct pnvm_sku_package *package;
-	unsigned long package_size;
-	int err;
-
-	pnvm_efivar = kzalloc(sizeof(*pnvm_efivar), GFP_KERNEL);
-	if (!pnvm_efivar)
-		return -ENOMEM;
-
-	memcpy(&pnvm_efivar->var.VariableName, IWL_UEFI_OEM_PNVM_NAME,
-	       sizeof(IWL_UEFI_OEM_PNVM_NAME));
-	pnvm_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
-
-	/*
-	 * TODO: we hardcode a maximum length here, because reading
-	 * from the UEFI is not working.  To implement this properly,
-	 * we have to call efivar_entry_size().
-	 */
-	package_size = IWL_HARDCODED_PNVM_SIZE;
-
-	package = kmalloc(package_size, GFP_KERNEL);
-	if (!package) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	err = efivar_entry_get(pnvm_efivar, NULL, &package_size, package);
-	if (err) {
-		IWL_DEBUG_FW(trans,
-			     "PNVM UEFI variable not found %d (len %lu)\n",
-			     err, package_size);
-		goto out;
-	}
-
-	IWL_DEBUG_FW(trans, "Read PNVM fro UEFI with size %lu\n", package_size);
-
-	*data = kmemdup(package->data, *len, GFP_KERNEL);
-	if (!*data)
-		err = -ENOMEM;
-	*len = package_size - sizeof(*package);
-
-out:
-	kfree(package);
-	kfree(pnvm_efivar);
-
-	return err;
-}
-#else /* CONFIG_EFI */
-static inline int iwl_pnvm_get_from_efi(struct iwl_trans *trans,
-					u8 **data, size_t *len)
-{
-	return -EOPNOTSUPP;
-}
-#endif /* CONFIG_EFI */
-
 static int iwl_pnvm_get_from_fs(struct iwl_trans *trans, u8 **data, size_t *len)
 {
 	const struct firmware *pnvm;
@@ -335,6 +267,7 @@ int iwl_pnvm_load(struct iwl_trans *trans,
 {
 	u8 *data;
 	size_t len;
+	struct pnvm_sku_package *package;
 	struct iwl_notification_wait pnvm_wait;
 	static const u16 ntf_cmds[] = { WIDE_ID(REGULATORY_AND_NVM_GROUP,
 						PNVM_INIT_COMPLETE_NTFY) };
@@ -356,9 +289,19 @@ int iwl_pnvm_load(struct iwl_trans *trans,
 	}
 
 	/* First attempt to get the PNVM from BIOS */
-	ret = iwl_pnvm_get_from_efi(trans, &data, &len);
-	if (!ret)
-		goto parse;
+	package = iwl_uefi_get_pnvm(trans, &len);
+	if (!IS_ERR_OR_NULL(package)) {
+		data = kmemdup(package->data, len, GFP_KERNEL);
+
+		/* free package regardless of whether kmemdup succeeded */
+		kfree(package);
+
+		if (data) {
+			/* we need only the data size */
+			len -= sizeof(*package);
+			goto parse;
+		}
+	}
 
 	/* If it's not available, try from the filesystem */
 	ret = iwl_pnvm_get_from_fs(trans, &data, &len);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
new file mode 100644
index 000000000000..bdcdca178eda
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include "iwl-drv.h"
+#include "pnvm.h"
+#include "iwl-prph.h"
+#include "iwl-io.h"
+
+#include "fw/uefi.h"
+#include <linux/efi.h>
+
+#define IWL_EFI_VAR_GUID EFI_GUID(0x92daaf2f, 0xc02b, 0x455b,	\
+				  0xb2, 0xec, 0xf5, 0xa3,	\
+				  0x59, 0x4f, 0x4a, 0xea)
+
+void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
+{
+	struct efivar_entry *pnvm_efivar;
+	void *data;
+	unsigned long package_size;
+	int err;
+
+	pnvm_efivar = kzalloc(sizeof(*pnvm_efivar), GFP_KERNEL);
+	if (!pnvm_efivar)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&pnvm_efivar->var.VariableName, IWL_UEFI_OEM_PNVM_NAME,
+	       sizeof(IWL_UEFI_OEM_PNVM_NAME));
+	pnvm_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
+
+	/*
+	 * TODO: we hardcode a maximum length here, because reading
+	 * from the UEFI is not working.  To implement this properly,
+	 * we have to call efivar_entry_size().
+	 */
+	package_size = IWL_HARDCODED_PNVM_SIZE;
+
+	data = kmalloc(package_size, GFP_KERNEL);
+	if (!data) {
+		data = ERR_PTR(-ENOMEM);
+		*len = 0;
+		goto out;
+	}
+
+	err = efivar_entry_get(pnvm_efivar, NULL, &package_size, data);
+	if (err) {
+		IWL_DEBUG_FW(trans,
+			     "PNVM UEFI variable not found %d (len %zd)\n",
+			     err, package_size);
+		kfree(data);
+		data = ERR_PTR(err);
+		goto out;
+	}
+
+	IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %zd\n", package_size);
+	*len = package_size;
+
+out:
+	kfree(pnvm_efivar);
+
+	return data;
+}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
new file mode 100644
index 000000000000..48f1b54e3e76
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+
+#define IWL_UEFI_OEM_PNVM_NAME	L"UefiCnvWlanOemSignedPnvm"
+
+/*
+ * TODO: we have these hardcoded values that the caller must pass,
+ * because reading from the UEFI is not working.  To implement this
+ * properly, we have to change iwl_pnvm_get_from_uefi() to call
+ * efivar_entry_size() and return the value to the caller instead.
+ */
+#define IWL_HARDCODED_PNVM_SIZE 4096
+
+#ifdef CONFIG_EFI
+void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len);
+#else /* CONFIG_EFI */
+static inline
+void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+#endif /* CONFIG_EFI */

From 9dad325f9d57508b154f0bebbc341a8528e5729c Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Mon, 21 Jun 2021 10:37:36 +0300
Subject: [PATCH 2531/3292] iwlwifi: support loading the reduced power table
 from UEFI

This new feature allows OEMs to set a special reduced power table in a
UEFI variable, which we use to tell the firmware to change the TX
power tables.

Read the variable and store it in a dram block to pass it to the
firmware.  We do this as part of the PNVM loading flow.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.259a33ba5074.I2e0bb142d2a9c412547cba89b62dd077b328fdc4@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/file.h  |   3 +-
 drivers/net/wireless/intel/iwlwifi/fw/pnvm.c  |  33 ++-
 drivers/net/wireless/intel/iwlwifi/fw/uefi.c  | 200 +++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/fw/uefi.h  |  21 +-
 .../intel/iwlwifi/iwl-context-info-gen3.h     |  16 +-
 .../net/wireless/intel/iwlwifi/iwl-trans.h    |  17 ++
 .../intel/iwlwifi/pcie/ctxt-info-gen3.c       |  34 +++
 .../wireless/intel/iwlwifi/pcie/internal.h    |   1 +
 .../net/wireless/intel/iwlwifi/pcie/trans.c   |   7 +
 9 files changed, 318 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index ef1a24504c8b..74e25a6ecc3d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -52,7 +52,8 @@ enum iwl_ucode_tlv_type {
 	IWL_UCODE_TLV_INIT_DATA		= 4,
 	IWL_UCODE_TLV_BOOT		= 5,
 	IWL_UCODE_TLV_PROBE_MAX_LEN	= 6, /* a u32 value */
-	IWL_UCODE_TLV_PAN		= 7,
+	IWL_UCODE_TLV_PAN		= 7, /* deprecated -- only used in DVM */
+	IWL_UCODE_TLV_MEM_DESC		= 7, /* replaces PAN in non-DVM */
 	IWL_UCODE_TLV_RUNT_EVTLOG_PTR	= 8,
 	IWL_UCODE_TLV_RUNT_EVTLOG_SIZE	= 9,
 	IWL_UCODE_TLV_RUNT_ERRLOG_PTR	= 10,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
index 565c19475155..2403490cbc26 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
@@ -17,15 +17,6 @@ struct iwl_pnvm_section {
 	const u8 data[];
 } __packed;
 
-struct pnvm_sku_package {
-	u8 rev;
-	u8 reserved1[3];
-	u32 total_size;
-	u8 n_skus;
-	u8 reserved2[11];
-	u8 data[];
-};
-
 static bool iwl_pnvm_complete_fn(struct iwl_notif_wait_data *notif_wait,
 				 struct iwl_rx_packet *pkt, void *data)
 {
@@ -322,6 +313,30 @@ parse:
 	kfree(data);
 
 skip_parse:
+	data = NULL;
+	/* now try to get the reduce power table, if not loaded yet */
+	if (!trans->reduce_power_loaded) {
+		data = iwl_uefi_get_reduced_power(trans, &len);
+		if (IS_ERR_OR_NULL(data)) {
+			/*
+			 * Pretend we've loaded it - at least we've tried and
+			 * couldn't load it at all, so there's no point in
+			 * trying again over and over.
+			 */
+			trans->reduce_power_loaded = true;
+
+			goto skip_reduce_power;
+		}
+	}
+
+	ret = iwl_trans_set_reduce_power(trans, data, len);
+	if (ret)
+		IWL_DEBUG_FW(trans,
+			     "Failed to set reduce power table %d\n",
+			     ret);
+	kfree(data);
+
+skip_reduce_power:
 	iwl_init_notification_wait(notif_wait, &pnvm_wait,
 				   ntf_cmds, ARRAY_SIZE(ntf_cmds),
 				   iwl_pnvm_complete_fn, trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
index bdcdca178eda..a7c79d814aa4 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.c
@@ -9,6 +9,7 @@
 #include "iwl-io.h"
 
 #include "fw/uefi.h"
+#include "fw/api/alive.h"
 #include <linux/efi.h>
 
 #define IWL_EFI_VAR_GUID EFI_GUID(0x92daaf2f, 0xc02b, 0x455b,	\
@@ -22,6 +23,8 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
 	unsigned long package_size;
 	int err;
 
+	*len = 0;
+
 	pnvm_efivar = kzalloc(sizeof(*pnvm_efivar), GFP_KERNEL);
 	if (!pnvm_efivar)
 		return ERR_PTR(-ENOMEM);
@@ -40,7 +43,6 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
 	data = kmalloc(package_size, GFP_KERNEL);
 	if (!data) {
 		data = ERR_PTR(-ENOMEM);
-		*len = 0;
 		goto out;
 	}
 
@@ -62,3 +64,199 @@ out:
 
 	return data;
 }
+
+static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
+					   const u8 *data, size_t len)
+{
+	struct iwl_ucode_tlv *tlv;
+	u8 *reduce_power_data = NULL, *tmp;
+	u32 size = 0;
+
+	IWL_DEBUG_FW(trans, "Handling REDUCE_POWER section\n");
+
+	while (len >= sizeof(*tlv)) {
+		u32 tlv_len, tlv_type;
+
+		len -= sizeof(*tlv);
+		tlv = (void *)data;
+
+		tlv_len = le32_to_cpu(tlv->length);
+		tlv_type = le32_to_cpu(tlv->type);
+
+		if (len < tlv_len) {
+			IWL_ERR(trans, "invalid TLV len: %zd/%u\n",
+				len, tlv_len);
+			reduce_power_data = ERR_PTR(-EINVAL);
+			goto out;
+		}
+
+		data += sizeof(*tlv);
+
+		switch (tlv_type) {
+		case IWL_UCODE_TLV_MEM_DESC: {
+			IWL_DEBUG_FW(trans,
+				     "Got IWL_UCODE_TLV_MEM_DESC len %d\n",
+				     tlv_len);
+
+			IWL_DEBUG_FW(trans, "Adding data (size %d)\n", tlv_len);
+
+			tmp = krealloc(reduce_power_data, size + tlv_len, GFP_KERNEL);
+			if (!tmp) {
+				IWL_DEBUG_FW(trans,
+					     "Couldn't allocate (more) reduce_power_data\n");
+
+				reduce_power_data = ERR_PTR(-ENOMEM);
+				goto out;
+			}
+
+			reduce_power_data = tmp;
+
+			memcpy(reduce_power_data + size, data, tlv_len);
+
+			size += tlv_len;
+
+			break;
+		}
+		case IWL_UCODE_TLV_PNVM_SKU:
+			IWL_DEBUG_FW(trans,
+				     "New REDUCE_POWER section started, stop parsing.\n");
+			goto done;
+		default:
+			IWL_DEBUG_FW(trans, "Found TLV 0x%0x, len %d\n",
+				     tlv_type, tlv_len);
+			break;
+		}
+
+		len -= ALIGN(tlv_len, 4);
+		data += ALIGN(tlv_len, 4);
+	}
+
+done:
+	if (!size) {
+		IWL_DEBUG_FW(trans, "Empty REDUCE_POWER, skipping.\n");
+		reduce_power_data = ERR_PTR(-ENOENT);
+		goto out;
+	}
+
+	IWL_INFO(trans, "loaded REDUCE_POWER\n");
+
+out:
+	return reduce_power_data;
+}
+
+static void *iwl_uefi_reduce_power_parse(struct iwl_trans *trans,
+					 const u8 *data, size_t len)
+{
+	struct iwl_ucode_tlv *tlv;
+	void *sec_data;
+
+	IWL_DEBUG_FW(trans, "Parsing REDUCE_POWER data\n");
+
+	while (len >= sizeof(*tlv)) {
+		u32 tlv_len, tlv_type;
+
+		len -= sizeof(*tlv);
+		tlv = (void *)data;
+
+		tlv_len = le32_to_cpu(tlv->length);
+		tlv_type = le32_to_cpu(tlv->type);
+
+		if (len < tlv_len) {
+			IWL_ERR(trans, "invalid TLV len: %zd/%u\n",
+				len, tlv_len);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (tlv_type == IWL_UCODE_TLV_PNVM_SKU) {
+			struct iwl_sku_id *sku_id =
+				(void *)(data + sizeof(*tlv));
+
+			IWL_DEBUG_FW(trans,
+				     "Got IWL_UCODE_TLV_PNVM_SKU len %d\n",
+				     tlv_len);
+			IWL_DEBUG_FW(trans, "sku_id 0x%0x 0x%0x 0x%0x\n",
+				     le32_to_cpu(sku_id->data[0]),
+				     le32_to_cpu(sku_id->data[1]),
+				     le32_to_cpu(sku_id->data[2]));
+
+			data += sizeof(*tlv) + ALIGN(tlv_len, 4);
+			len -= ALIGN(tlv_len, 4);
+
+			if (trans->sku_id[0] == le32_to_cpu(sku_id->data[0]) &&
+			    trans->sku_id[1] == le32_to_cpu(sku_id->data[1]) &&
+			    trans->sku_id[2] == le32_to_cpu(sku_id->data[2])) {
+				sec_data = iwl_uefi_reduce_power_section(trans,
+									 data,
+									 len);
+				if (!IS_ERR(sec_data))
+					return sec_data;
+			} else {
+				IWL_DEBUG_FW(trans, "SKU ID didn't match!\n");
+			}
+		} else {
+			data += sizeof(*tlv) + ALIGN(tlv_len, 4);
+			len -= ALIGN(tlv_len, 4);
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
+{
+	struct efivar_entry *reduce_power_efivar;
+	struct pnvm_sku_package *package;
+	void *data = NULL;
+	unsigned long package_size;
+	int err;
+
+	*len = 0;
+
+	reduce_power_efivar = kzalloc(sizeof(*reduce_power_efivar), GFP_KERNEL);
+	if (!reduce_power_efivar)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&reduce_power_efivar->var.VariableName, IWL_UEFI_REDUCED_POWER_NAME,
+	       sizeof(IWL_UEFI_REDUCED_POWER_NAME));
+	reduce_power_efivar->var.VendorGuid = IWL_EFI_VAR_GUID;
+
+	/*
+	 * TODO: we hardcode a maximum length here, because reading
+	 * from the UEFI is not working.  To implement this properly,
+	 * we have to call efivar_entry_size().
+	 */
+	package_size = IWL_HARDCODED_REDUCE_POWER_SIZE;
+
+	package = kmalloc(package_size, GFP_KERNEL);
+	if (!package) {
+		package = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	err = efivar_entry_get(reduce_power_efivar, NULL, &package_size, package);
+	if (err) {
+		IWL_DEBUG_FW(trans,
+			     "Reduced Power UEFI variable not found %d (len %lu)\n",
+			     err, package_size);
+		kfree(package);
+		data = ERR_PTR(err);
+		goto out;
+	}
+
+	IWL_DEBUG_FW(trans, "Read reduced power from UEFI with size %lu\n",
+		     package_size);
+	*len = package_size;
+
+	IWL_DEBUG_FW(trans, "rev %d, total_size %d, n_skus %d\n",
+		     package->rev, package->total_size, package->n_skus);
+
+	data = iwl_uefi_reduce_power_parse(trans, package->data,
+					   *len - sizeof(*package));
+
+	kfree(package);
+
+out:
+	kfree(reduce_power_efivar);
+
+	return data;
+}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
index 48f1b54e3e76..45d0b36d79b5 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/uefi.h
@@ -4,7 +4,8 @@
  */
 
 
-#define IWL_UEFI_OEM_PNVM_NAME	L"UefiCnvWlanOemSignedPnvm"
+#define IWL_UEFI_OEM_PNVM_NAME		L"UefiCnvWlanOemSignedPnvm"
+#define IWL_UEFI_REDUCED_POWER_NAME	L"UefiCnvWlanReducedPower"
 
 /*
  * TODO: we have these hardcoded values that the caller must pass,
@@ -12,14 +13,30 @@
  * properly, we have to change iwl_pnvm_get_from_uefi() to call
  * efivar_entry_size() and return the value to the caller instead.
  */
-#define IWL_HARDCODED_PNVM_SIZE 4096
+#define IWL_HARDCODED_PNVM_SIZE		4096
+#define IWL_HARDCODED_REDUCE_POWER_SIZE	32768
+
+struct pnvm_sku_package {
+	u8 rev;
+	u32 total_size;
+	u8 n_skus;
+	u32 reserved[2];
+	u8 data[];
+} __packed;
 
 #ifdef CONFIG_EFI
 void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len);
+void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len);
 #else /* CONFIG_EFI */
 static inline
 void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+
+static inline
+void *iwl_uefi_get_reduced_power(struct iwl_trans *trans, size_t *len)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_EFI */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
index 518a1bc79584..e1fec23ac07f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
@@ -127,6 +127,17 @@ struct iwl_prph_scratch_rbd_cfg {
 	__le32 reserved;
 } __packed; /* PERIPH_SCRATCH_RBD_CFG_S */
 
+/*
+ * struct iwl_prph_scratch_uefi_cfg - prph scratch reduce power table
+ * @base_addr: reduce power table address
+ * @size: table size in dwords
+ */
+struct iwl_prph_scratch_uefi_cfg {
+	__le64 base_addr;
+	__le32 size;
+	__le32 reserved;
+} __packed; /* PERIPH_SCRATCH_UEFI_CFG_S */
+
 /*
  * struct iwl_prph_scratch_ctrl_cfg - prph scratch ctrl and config
  * @version: version information of context info and HW
@@ -141,6 +152,7 @@ struct iwl_prph_scratch_ctrl_cfg {
 	struct iwl_prph_scratch_pnvm_cfg pnvm_cfg;
 	struct iwl_prph_scratch_hwm_cfg hwm_cfg;
 	struct iwl_prph_scratch_rbd_cfg rbd_cfg;
+	struct iwl_prph_scratch_uefi_cfg reduce_power_cfg;
 } __packed; /* PERIPH_SCRATCH_CTRL_CFG_S */
 
 /*
@@ -151,7 +163,7 @@ struct iwl_prph_scratch_ctrl_cfg {
  */
 struct iwl_prph_scratch {
 	struct iwl_prph_scratch_ctrl_cfg ctrl_cfg;
-	__le32 reserved[16];
+	__le32 reserved[12];
 	struct iwl_context_info_dram dram;
 } __packed; /* PERIPH_SCRATCH_S */
 
@@ -249,5 +261,7 @@ void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans, bool alive);
 
 int iwl_trans_pcie_ctx_info_gen3_set_pnvm(struct iwl_trans *trans,
 					  const void *data, u32 len);
+int iwl_trans_pcie_ctx_info_gen3_set_reduce_power(struct iwl_trans *trans,
+						  const void *data, u32 len);
 
 #endif /* __iwl_context_info_file_gen3_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index bf569f856ad8..8d745e0c0394 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -589,6 +589,8 @@ struct iwl_trans_ops {
 	void (*debugfs_cleanup)(struct iwl_trans *trans);
 	void (*sync_nmi)(struct iwl_trans *trans);
 	int (*set_pnvm)(struct iwl_trans *trans, const void *data, u32 len);
+	int (*set_reduce_power)(struct iwl_trans *trans,
+				const void *data, u32 len);
 	void (*interrupts)(struct iwl_trans *trans, bool enable);
 };
 
@@ -957,6 +959,7 @@ struct iwl_trans {
 	bool pm_support;
 	bool ltr_enabled;
 	u8 pnvm_loaded:1;
+	u8 reduce_power_loaded:1;
 
 	const struct iwl_hcmd_arr *command_groups;
 	int command_groups_size;
@@ -1420,6 +1423,20 @@ static inline int iwl_trans_set_pnvm(struct iwl_trans *trans,
 	return 0;
 }
 
+static inline int iwl_trans_set_reduce_power(struct iwl_trans *trans,
+					     const void *data, u32 len)
+{
+	if (trans->ops->set_reduce_power) {
+		int ret = trans->ops->set_reduce_power(trans, data, len);
+
+		if (ret)
+			return ret;
+	}
+
+	trans->reduce_power_loaded = true;
+	return 0;
+}
+
 static inline bool iwl_trans_dbg_ini_valid(struct iwl_trans *trans)
 {
 	return trans->dbg.internal_ini_cfg != IWL_INI_CFG_STATE_NOT_LOADED ||
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index c69a1541e678..239a722cd79d 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -302,3 +302,37 @@ int iwl_trans_pcie_ctx_info_gen3_set_pnvm(struct iwl_trans *trans,
 
 	return 0;
 }
+
+int iwl_trans_pcie_ctx_info_gen3_set_reduce_power(struct iwl_trans *trans,
+						  const void *data, u32 len)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	struct iwl_prph_scratch_ctrl_cfg *prph_sc_ctrl =
+		&trans_pcie->prph_scratch->ctrl_cfg;
+	int ret;
+
+	if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
+		return 0;
+
+	/* only allocate the DRAM if not allocated yet */
+	if (!trans->reduce_power_loaded) {
+		if (WARN_ON(prph_sc_ctrl->reduce_power_cfg.size))
+			return -EBUSY;
+
+		ret = iwl_pcie_ctxt_info_alloc_dma(trans, data, len,
+					   &trans_pcie->reduce_power_dram);
+		if (ret < 0) {
+			IWL_DEBUG_FW(trans,
+				     "Failed to allocate reduce power DMA %d.\n",
+				     ret);
+			return ret;
+		}
+	}
+
+	prph_sc_ctrl->reduce_power_cfg.base_addr =
+		cpu_to_le64(trans_pcie->reduce_power_dram.physical);
+	prph_sc_ctrl->reduce_power_cfg.size =
+		cpu_to_le32(trans_pcie->reduce_power_dram.size);
+
+	return 0;
+}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 69289e9f8d7e..cc550f6ef957 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -349,6 +349,7 @@ struct iwl_trans_pcie {
 	struct iwl_dma_ptr kw;
 
 	struct iwl_dram_data pnvm_dram;
+	struct iwl_dram_data reduce_power_dram;
 
 	struct iwl_txq *txq_memory;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 1331a6bfd767..bee6b4574226 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1943,6 +1943,12 @@ void iwl_trans_pcie_free(struct iwl_trans *trans)
 				  trans_pcie->pnvm_dram.block,
 				  trans_pcie->pnvm_dram.physical);
 
+	if (trans_pcie->reduce_power_dram.size)
+		dma_free_coherent(trans->dev,
+				  trans_pcie->reduce_power_dram.size,
+				  trans_pcie->reduce_power_dram.block,
+				  trans_pcie->reduce_power_dram.physical);
+
 	mutex_destroy(&trans_pcie->mutex);
 	iwl_trans_free(trans);
 }
@@ -3418,6 +3424,7 @@ static const struct iwl_trans_ops trans_ops_pcie_gen2 = {
 	.wait_txq_empty = iwl_trans_pcie_wait_txq_empty,
 	.rxq_dma_data = iwl_trans_pcie_rxq_dma_data,
 	.set_pnvm = iwl_trans_pcie_ctx_info_gen3_set_pnvm,
+	.set_reduce_power = iwl_trans_pcie_ctx_info_gen3_set_reduce_power,
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	.debugfs_cleanup = iwl_trans_pcie_debugfs_cleanup,
 #endif

From 4db7cf1e0108ce4376e111ac23693be12128e2f3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 21 Jun 2021 10:37:37 +0300
Subject: [PATCH 2532/3292] iwlwifi: move error dump to fw utils

Conceptually, this belongs more into the firmware utils
rather than the mvm opmode, so move the collection and
output there.

Note that this slightly changes the format of the Status
line.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.b82b60d81346.Ide3b688107f6a59c7fc7eb1d8f2002b0a5c1f2d2@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/Makefile   |   2 +-
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c   |  38 --
 drivers/net/wireless/intel/iwlwifi/fw/dbg.h   |   4 +-
 drivers/net/wireless/intel/iwlwifi/fw/dump.c  | 358 ++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  |   7 +-
 .../net/wireless/intel/iwlwifi/mvm/utils.c    | 310 ---------------
 6 files changed, 368 insertions(+), 351 deletions(-)
 create mode 100644 drivers/net/wireless/intel/iwlwifi/fw/dump.c

diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile
index a8428c27286c..d86918d162aa 100644
--- a/drivers/net/wireless/intel/iwlwifi/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/Makefile
@@ -16,7 +16,7 @@ iwlwifi-objs		+= iwl-trans.o
 iwlwifi-objs		+= queue/tx.o
 
 iwlwifi-objs		+= fw/img.o fw/notif-wait.o
-iwlwifi-objs		+= fw/dbg.o fw/pnvm.o
+iwlwifi-objs		+= fw/dbg.o fw/pnvm.o fw/dump.o
 iwlwifi-$(CONFIG_IWLMVM) += fw/paging.o fw/smem.o fw/init.o
 iwlwifi-$(CONFIG_ACPI) += fw/acpi.o
 iwlwifi-$(CONFIG_EFI)	+= fw/uefi.o
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 5a534d70f253..df7c55e06f54 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2759,44 +2759,6 @@ void iwl_fw_dbg_stop_sync(struct iwl_fw_runtime *fwrt)
 }
 IWL_EXPORT_SYMBOL(iwl_fw_dbg_stop_sync);
 
-#define FSEQ_REG(x) { .addr = (x), .str = #x, }
-
-void iwl_fw_error_print_fseq_regs(struct iwl_fw_runtime *fwrt)
-{
-	struct iwl_trans *trans = fwrt->trans;
-	int i;
-	struct {
-		u32 addr;
-		const char *str;
-	} fseq_regs[] = {
-		FSEQ_REG(FSEQ_ERROR_CODE),
-		FSEQ_REG(FSEQ_TOP_INIT_VERSION),
-		FSEQ_REG(FSEQ_CNVIO_INIT_VERSION),
-		FSEQ_REG(FSEQ_OTP_VERSION),
-		FSEQ_REG(FSEQ_TOP_CONTENT_VERSION),
-		FSEQ_REG(FSEQ_ALIVE_TOKEN),
-		FSEQ_REG(FSEQ_CNVI_ID),
-		FSEQ_REG(FSEQ_CNVR_ID),
-		FSEQ_REG(CNVI_AUX_MISC_CHIP),
-		FSEQ_REG(CNVR_AUX_MISC_CHIP),
-		FSEQ_REG(CNVR_SCU_SD_REGS_SD_REG_DIG_DCDC_VTRIM),
-		FSEQ_REG(CNVR_SCU_SD_REGS_SD_REG_ACTIVE_VDIG_MIRROR),
-	};
-
-	if (!iwl_trans_grab_nic_access(trans))
-		return;
-
-	IWL_ERR(fwrt, "Fseq Registers:\n");
-
-	for (i = 0; i < ARRAY_SIZE(fseq_regs); i++)
-		IWL_ERR(fwrt, "0x%08X | %s\n",
-			iwl_read_prph_no_grab(trans, fseq_regs[i].addr),
-			fseq_regs[i].str);
-
-	iwl_trans_release_nic_access(trans);
-}
-IWL_EXPORT_SYMBOL(iwl_fw_error_print_fseq_regs);
-
 static int iwl_fw_dbg_suspend_resume_hcmd(struct iwl_trans *trans, bool suspend)
 {
 	struct iwl_dbg_suspend_resume_cmd cmd = {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index 49fa2f5f8c7e..c0e84ef84f5d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2005-2014, 2018-2019 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2019, 2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
  */
@@ -321,4 +321,6 @@ static inline void iwl_fwrt_update_fw_versions(struct iwl_fw_runtime *fwrt,
 		fwrt->dump.fw_ver.umac_minor = le32_to_cpu(umac->umac_minor);
 	}
 }
+
+void iwl_fwrt_dump_error_logs(struct iwl_fw_runtime *fwrt);
 #endif  /* __iwl_fw_dbg_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dump.c b/drivers/net/wireless/intel/iwlwifi/fw/dump.c
new file mode 100644
index 000000000000..66f86d2a7cca
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dump.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
+ * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2015-2017 Intel Deutschland GmbH
+ */
+#include <linux/devcoredump.h>
+#include "iwl-drv.h"
+#include "runtime.h"
+#include "dbg.h"
+#include "debugfs.h"
+#include "iwl-io.h"
+#include "iwl-prph.h"
+#include "iwl-csr.h"
+
+/*
+ * Note: This structure is read from the device with IO accesses,
+ * and the reading already does the endian conversion. As it is
+ * read with u32-sized accesses, any members with a different size
+ * need to be ordered correctly though!
+ */
+struct iwl_error_event_table_v1 {
+	u32 valid;		/* (nonzero) valid, (0) log is empty */
+	u32 error_id;		/* type of error */
+	u32 pc;			/* program counter */
+	u32 blink1;		/* branch link */
+	u32 blink2;		/* branch link */
+	u32 ilink1;		/* interrupt link */
+	u32 ilink2;		/* interrupt link */
+	u32 data1;		/* error-specific data */
+	u32 data2;		/* error-specific data */
+	u32 data3;		/* error-specific data */
+	u32 bcon_time;		/* beacon timer */
+	u32 tsf_low;		/* network timestamp function timer */
+	u32 tsf_hi;		/* network timestamp function timer */
+	u32 gp1;		/* GP1 timer register */
+	u32 gp2;		/* GP2 timer register */
+	u32 gp3;		/* GP3 timer register */
+	u32 ucode_ver;		/* uCode version */
+	u32 hw_ver;		/* HW Silicon version */
+	u32 brd_ver;		/* HW board version */
+	u32 log_pc;		/* log program counter */
+	u32 frame_ptr;		/* frame pointer */
+	u32 stack_ptr;		/* stack pointer */
+	u32 hcmd;		/* last host command header */
+	u32 isr0;		/* isr status register LMPM_NIC_ISR0:
+				 * rxtx_flag */
+	u32 isr1;		/* isr status register LMPM_NIC_ISR1:
+				 * host_flag */
+	u32 isr2;		/* isr status register LMPM_NIC_ISR2:
+				 * enc_flag */
+	u32 isr3;		/* isr status register LMPM_NIC_ISR3:
+				 * time_flag */
+	u32 isr4;		/* isr status register LMPM_NIC_ISR4:
+				 * wico interrupt */
+	u32 isr_pref;		/* isr status register LMPM_NIC_PREF_STAT */
+	u32 wait_event;		/* wait event() caller address */
+	u32 l2p_control;	/* L2pControlField */
+	u32 l2p_duration;	/* L2pDurationField */
+	u32 l2p_mhvalid;	/* L2pMhValidBits */
+	u32 l2p_addr_match;	/* L2pAddrMatchStat */
+	u32 lmpm_pmg_sel;	/* indicate which clocks are turned on
+				 * (LMPM_PMG_SEL) */
+	u32 u_timestamp;	/* indicate when the date and time of the
+				 * compilation */
+	u32 flow_handler;	/* FH read/write pointers, RX credit */
+} __packed /* LOG_ERROR_TABLE_API_S_VER_1 */;
+
+struct iwl_error_event_table {
+	u32 valid;		/* (nonzero) valid, (0) log is empty */
+	u32 error_id;		/* type of error */
+	u32 trm_hw_status0;	/* TRM HW status */
+	u32 trm_hw_status1;	/* TRM HW status */
+	u32 blink2;		/* branch link */
+	u32 ilink1;		/* interrupt link */
+	u32 ilink2;		/* interrupt link */
+	u32 data1;		/* error-specific data */
+	u32 data2;		/* error-specific data */
+	u32 data3;		/* error-specific data */
+	u32 bcon_time;		/* beacon timer */
+	u32 tsf_low;		/* network timestamp function timer */
+	u32 tsf_hi;		/* network timestamp function timer */
+	u32 gp1;		/* GP1 timer register */
+	u32 gp2;		/* GP2 timer register */
+	u32 fw_rev_type;	/* firmware revision type */
+	u32 major;		/* uCode version major */
+	u32 minor;		/* uCode version minor */
+	u32 hw_ver;		/* HW Silicon version */
+	u32 brd_ver;		/* HW board version */
+	u32 log_pc;		/* log program counter */
+	u32 frame_ptr;		/* frame pointer */
+	u32 stack_ptr;		/* stack pointer */
+	u32 hcmd;		/* last host command header */
+	u32 isr0;		/* isr status register LMPM_NIC_ISR0:
+				 * rxtx_flag */
+	u32 isr1;		/* isr status register LMPM_NIC_ISR1:
+				 * host_flag */
+	u32 isr2;		/* isr status register LMPM_NIC_ISR2:
+				 * enc_flag */
+	u32 isr3;		/* isr status register LMPM_NIC_ISR3:
+				 * time_flag */
+	u32 isr4;		/* isr status register LMPM_NIC_ISR4:
+				 * wico interrupt */
+	u32 last_cmd_id;	/* last HCMD id handled by the firmware */
+	u32 wait_event;		/* wait event() caller address */
+	u32 l2p_control;	/* L2pControlField */
+	u32 l2p_duration;	/* L2pDurationField */
+	u32 l2p_mhvalid;	/* L2pMhValidBits */
+	u32 l2p_addr_match;	/* L2pAddrMatchStat */
+	u32 lmpm_pmg_sel;	/* indicate which clocks are turned on
+				 * (LMPM_PMG_SEL) */
+	u32 u_timestamp;	/* indicate when the date and time of the
+				 * compilation */
+	u32 flow_handler;	/* FH read/write pointers, RX credit */
+} __packed /* LOG_ERROR_TABLE_API_S_VER_3 */;
+
+/*
+ * UMAC error struct - relevant starting from family 8000 chip.
+ * Note: This structure is read from the device with IO accesses,
+ * and the reading already does the endian conversion. As it is
+ * read with u32-sized accesses, any members with a different size
+ * need to be ordered correctly though!
+ */
+struct iwl_umac_error_event_table {
+	u32 valid;		/* (nonzero) valid, (0) log is empty */
+	u32 error_id;		/* type of error */
+	u32 blink1;		/* branch link */
+	u32 blink2;		/* branch link */
+	u32 ilink1;		/* interrupt link */
+	u32 ilink2;		/* interrupt link */
+	u32 data1;		/* error-specific data */
+	u32 data2;		/* error-specific data */
+	u32 data3;		/* error-specific data */
+	u32 umac_major;
+	u32 umac_minor;
+	u32 frame_pointer;	/* core register 27*/
+	u32 stack_pointer;	/* core register 28 */
+	u32 cmd_header;		/* latest host cmd sent to UMAC */
+	u32 nic_isr_pref;	/* ISR status register */
+} __packed;
+
+#define ERROR_START_OFFSET  (1 * sizeof(u32))
+#define ERROR_ELEM_SIZE     (7 * sizeof(u32))
+
+static void iwl_fwrt_dump_umac_error_log(struct iwl_fw_runtime *fwrt)
+{
+	struct iwl_trans *trans = fwrt->trans;
+	struct iwl_umac_error_event_table table = {};
+	u32 base = fwrt->trans->dbg.umac_error_event_table;
+
+	if (!base &&
+	    !(fwrt->trans->dbg.error_event_table_tlv_status &
+	      IWL_ERROR_EVENT_TABLE_UMAC))
+		return;
+
+	iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table));
+
+	if (table.valid)
+		fwrt->dump.umac_err_id = table.error_id;
+
+	if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) {
+		IWL_ERR(trans, "Start IWL Error Log Dump:\n");
+		IWL_ERR(trans, "Transport status: 0x%08lX, valid: %d\n",
+			fwrt->trans->status, table.valid);
+	}
+
+	IWL_ERR(fwrt, "0x%08X | %s\n", table.error_id,
+		iwl_fw_lookup_assert_desc(table.error_id));
+	IWL_ERR(fwrt, "0x%08X | umac branchlink1\n", table.blink1);
+	IWL_ERR(fwrt, "0x%08X | umac branchlink2\n", table.blink2);
+	IWL_ERR(fwrt, "0x%08X | umac interruptlink1\n", table.ilink1);
+	IWL_ERR(fwrt, "0x%08X | umac interruptlink2\n", table.ilink2);
+	IWL_ERR(fwrt, "0x%08X | umac data1\n", table.data1);
+	IWL_ERR(fwrt, "0x%08X | umac data2\n", table.data2);
+	IWL_ERR(fwrt, "0x%08X | umac data3\n", table.data3);
+	IWL_ERR(fwrt, "0x%08X | umac major\n", table.umac_major);
+	IWL_ERR(fwrt, "0x%08X | umac minor\n", table.umac_minor);
+	IWL_ERR(fwrt, "0x%08X | frame pointer\n", table.frame_pointer);
+	IWL_ERR(fwrt, "0x%08X | stack pointer\n", table.stack_pointer);
+	IWL_ERR(fwrt, "0x%08X | last host cmd\n", table.cmd_header);
+	IWL_ERR(fwrt, "0x%08X | isr status reg\n", table.nic_isr_pref);
+}
+
+static void iwl_fwrt_dump_lmac_error_log(struct iwl_fw_runtime *fwrt, u8 lmac_num)
+{
+	struct iwl_trans *trans = fwrt->trans;
+	struct iwl_error_event_table table = {};
+	u32 val, base = fwrt->trans->dbg.lmac_error_event_table[lmac_num];
+
+	if (fwrt->cur_fw_img == IWL_UCODE_INIT) {
+		if (!base)
+			base = fwrt->fw->init_errlog_ptr;
+	} else {
+		if (!base)
+			base = fwrt->fw->inst_errlog_ptr;
+	}
+
+	if (base < 0x400000) {
+		IWL_ERR(fwrt,
+			"Not valid error log pointer 0x%08X for %s uCode\n",
+			base,
+			(fwrt->cur_fw_img == IWL_UCODE_INIT)
+			? "Init" : "RT");
+		return;
+	}
+
+	/* check if there is a HW error */
+	val = iwl_trans_read_mem32(trans, base);
+	if (((val & ~0xf) == 0xa5a5a5a0) || ((val & ~0xf) == 0x5a5a5a50)) {
+		int err;
+
+		IWL_ERR(trans, "HW error, resetting before reading\n");
+
+		/* reset the device */
+		iwl_trans_sw_reset(trans);
+
+		err = iwl_finish_nic_init(trans, trans->trans_cfg);
+		if (err)
+			return;
+	}
+
+	iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table));
+
+	if (table.valid)
+		fwrt->dump.lmac_err_id[lmac_num] = table.error_id;
+
+	if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) {
+		IWL_ERR(trans, "Start IWL Error Log Dump:\n");
+		IWL_ERR(trans, "Transport status: 0x%08lX, valid: %d\n",
+			fwrt->trans->status, table.valid);
+	}
+
+	/* Do not change this output - scripts rely on it */
+
+	IWL_ERR(fwrt, "Loaded firmware version: %s\n", fwrt->fw->fw_version);
+
+	IWL_ERR(fwrt, "0x%08X | %-28s\n", table.error_id,
+		iwl_fw_lookup_assert_desc(table.error_id));
+	IWL_ERR(fwrt, "0x%08X | trm_hw_status0\n", table.trm_hw_status0);
+	IWL_ERR(fwrt, "0x%08X | trm_hw_status1\n", table.trm_hw_status1);
+	IWL_ERR(fwrt, "0x%08X | branchlink2\n", table.blink2);
+	IWL_ERR(fwrt, "0x%08X | interruptlink1\n", table.ilink1);
+	IWL_ERR(fwrt, "0x%08X | interruptlink2\n", table.ilink2);
+	IWL_ERR(fwrt, "0x%08X | data1\n", table.data1);
+	IWL_ERR(fwrt, "0x%08X | data2\n", table.data2);
+	IWL_ERR(fwrt, "0x%08X | data3\n", table.data3);
+	IWL_ERR(fwrt, "0x%08X | beacon time\n", table.bcon_time);
+	IWL_ERR(fwrt, "0x%08X | tsf low\n", table.tsf_low);
+	IWL_ERR(fwrt, "0x%08X | tsf hi\n", table.tsf_hi);
+	IWL_ERR(fwrt, "0x%08X | time gp1\n", table.gp1);
+	IWL_ERR(fwrt, "0x%08X | time gp2\n", table.gp2);
+	IWL_ERR(fwrt, "0x%08X | uCode revision type\n", table.fw_rev_type);
+	IWL_ERR(fwrt, "0x%08X | uCode version major\n", table.major);
+	IWL_ERR(fwrt, "0x%08X | uCode version minor\n", table.minor);
+	IWL_ERR(fwrt, "0x%08X | hw version\n", table.hw_ver);
+	IWL_ERR(fwrt, "0x%08X | board version\n", table.brd_ver);
+	IWL_ERR(fwrt, "0x%08X | hcmd\n", table.hcmd);
+	IWL_ERR(fwrt, "0x%08X | isr0\n", table.isr0);
+	IWL_ERR(fwrt, "0x%08X | isr1\n", table.isr1);
+	IWL_ERR(fwrt, "0x%08X | isr2\n", table.isr2);
+	IWL_ERR(fwrt, "0x%08X | isr3\n", table.isr3);
+	IWL_ERR(fwrt, "0x%08X | isr4\n", table.isr4);
+	IWL_ERR(fwrt, "0x%08X | last cmd Id\n", table.last_cmd_id);
+	IWL_ERR(fwrt, "0x%08X | wait_event\n", table.wait_event);
+	IWL_ERR(fwrt, "0x%08X | l2p_control\n", table.l2p_control);
+	IWL_ERR(fwrt, "0x%08X | l2p_duration\n", table.l2p_duration);
+	IWL_ERR(fwrt, "0x%08X | l2p_mhvalid\n", table.l2p_mhvalid);
+	IWL_ERR(fwrt, "0x%08X | l2p_addr_match\n", table.l2p_addr_match);
+	IWL_ERR(fwrt, "0x%08X | lmpm_pmg_sel\n", table.lmpm_pmg_sel);
+	IWL_ERR(fwrt, "0x%08X | timestamp\n", table.u_timestamp);
+	IWL_ERR(fwrt, "0x%08X | flow_handler\n", table.flow_handler);
+}
+
+static void iwl_fwrt_dump_iml_error_log(struct iwl_fw_runtime *fwrt)
+{
+	struct iwl_trans *trans = fwrt->trans;
+	u32 error, data1;
+
+	if (fwrt->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_22000) {
+		error = UMAG_SB_CPU_2_STATUS;
+		data1 = UMAG_SB_CPU_1_STATUS;
+	} else if (fwrt->trans->trans_cfg->device_family >=
+		   IWL_DEVICE_FAMILY_8000) {
+		error = SB_CPU_2_STATUS;
+		data1 = SB_CPU_1_STATUS;
+	} else {
+		return;
+	}
+
+	error = iwl_read_umac_prph(trans, UMAG_SB_CPU_2_STATUS);
+
+	IWL_ERR(trans, "IML/ROM dump:\n");
+
+	if (error & 0xFFFF0000)
+		IWL_ERR(trans, "0x%04X | IML/ROM SYSASSERT\n", error >> 16);
+
+	IWL_ERR(fwrt, "0x%08X | IML/ROM error/state\n", error);
+	IWL_ERR(fwrt, "0x%08X | IML/ROM data1\n",
+		iwl_read_umac_prph(trans, data1));
+
+	if (fwrt->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_22000)
+		IWL_ERR(fwrt, "0x%08X | IML/ROM WFPM_AUTH_KEY_0\n",
+			iwl_read_umac_prph(trans, SB_MODIFY_CFG_FLAG));
+}
+
+#define FSEQ_REG(x) { .addr = (x), .str = #x, }
+
+static void iwl_fwrt_dump_fseq_regs(struct iwl_fw_runtime *fwrt)
+{
+	struct iwl_trans *trans = fwrt->trans;
+	int i;
+	struct {
+		u32 addr;
+		const char *str;
+	} fseq_regs[] = {
+		FSEQ_REG(FSEQ_ERROR_CODE),
+		FSEQ_REG(FSEQ_TOP_INIT_VERSION),
+		FSEQ_REG(FSEQ_CNVIO_INIT_VERSION),
+		FSEQ_REG(FSEQ_OTP_VERSION),
+		FSEQ_REG(FSEQ_TOP_CONTENT_VERSION),
+		FSEQ_REG(FSEQ_ALIVE_TOKEN),
+		FSEQ_REG(FSEQ_CNVI_ID),
+		FSEQ_REG(FSEQ_CNVR_ID),
+		FSEQ_REG(CNVI_AUX_MISC_CHIP),
+		FSEQ_REG(CNVR_AUX_MISC_CHIP),
+		FSEQ_REG(CNVR_SCU_SD_REGS_SD_REG_DIG_DCDC_VTRIM),
+		FSEQ_REG(CNVR_SCU_SD_REGS_SD_REG_ACTIVE_VDIG_MIRROR),
+	};
+
+	if (!iwl_trans_grab_nic_access(trans))
+		return;
+
+	IWL_ERR(fwrt, "Fseq Registers:\n");
+
+	for (i = 0; i < ARRAY_SIZE(fseq_regs); i++)
+		IWL_ERR(fwrt, "0x%08X | %s\n",
+			iwl_read_prph_no_grab(trans, fseq_regs[i].addr),
+			fseq_regs[i].str);
+
+	iwl_trans_release_nic_access(trans);
+}
+
+void iwl_fwrt_dump_error_logs(struct iwl_fw_runtime *fwrt)
+{
+	if (!test_bit(STATUS_DEVICE_ENABLED, &fwrt->trans->status)) {
+		IWL_ERR(fwrt,
+			"DEVICE_ENABLED bit is not set. Aborting dump.\n");
+		return;
+	}
+
+	iwl_fwrt_dump_lmac_error_log(fwrt, 0);
+	if (fwrt->trans->dbg.lmac_error_event_table[1])
+		iwl_fwrt_dump_lmac_error_log(fwrt, 1);
+	iwl_fwrt_dump_umac_error_log(fwrt);
+	iwl_fwrt_dump_iml_error_log(fwrt);
+	iwl_fwrt_dump_fseq_regs(fwrt);
+}
+IWL_EXPORT_SYMBOL(iwl_fwrt_dump_error_logs);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index bf99eed23a9f..b50942f28bb7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1452,7 +1452,12 @@ void iwl_mvm_hwrate_to_tx_rate(u32 rate_n_flags,
 			       struct ieee80211_tx_rate *r);
 u8 iwl_mvm_mac80211_idx_to_hwrate(int rate_idx);
 u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac);
-void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm);
+
+static inline void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm)
+{
+	iwl_fwrt_dump_error_logs(&mvm->fwrt);
+}
+
 u8 first_antenna(u8 mask);
 u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx);
 void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, int clock_type, u32 *gp2,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 0e8ad798ab57..4a3d2971a98b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -238,316 +238,6 @@ u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx)
 	return last_idx;
 }
 
-/*
- * Note: This structure is read from the device with IO accesses,
- * and the reading already does the endian conversion. As it is
- * read with u32-sized accesses, any members with a different size
- * need to be ordered correctly though!
- */
-struct iwl_error_event_table_v1 {
-	u32 valid;		/* (nonzero) valid, (0) log is empty */
-	u32 error_id;		/* type of error */
-	u32 pc;			/* program counter */
-	u32 blink1;		/* branch link */
-	u32 blink2;		/* branch link */
-	u32 ilink1;		/* interrupt link */
-	u32 ilink2;		/* interrupt link */
-	u32 data1;		/* error-specific data */
-	u32 data2;		/* error-specific data */
-	u32 data3;		/* error-specific data */
-	u32 bcon_time;		/* beacon timer */
-	u32 tsf_low;		/* network timestamp function timer */
-	u32 tsf_hi;		/* network timestamp function timer */
-	u32 gp1;		/* GP1 timer register */
-	u32 gp2;		/* GP2 timer register */
-	u32 gp3;		/* GP3 timer register */
-	u32 ucode_ver;		/* uCode version */
-	u32 hw_ver;		/* HW Silicon version */
-	u32 brd_ver;		/* HW board version */
-	u32 log_pc;		/* log program counter */
-	u32 frame_ptr;		/* frame pointer */
-	u32 stack_ptr;		/* stack pointer */
-	u32 hcmd;		/* last host command header */
-	u32 isr0;		/* isr status register LMPM_NIC_ISR0:
-				 * rxtx_flag */
-	u32 isr1;		/* isr status register LMPM_NIC_ISR1:
-				 * host_flag */
-	u32 isr2;		/* isr status register LMPM_NIC_ISR2:
-				 * enc_flag */
-	u32 isr3;		/* isr status register LMPM_NIC_ISR3:
-				 * time_flag */
-	u32 isr4;		/* isr status register LMPM_NIC_ISR4:
-				 * wico interrupt */
-	u32 isr_pref;		/* isr status register LMPM_NIC_PREF_STAT */
-	u32 wait_event;		/* wait event() caller address */
-	u32 l2p_control;	/* L2pControlField */
-	u32 l2p_duration;	/* L2pDurationField */
-	u32 l2p_mhvalid;	/* L2pMhValidBits */
-	u32 l2p_addr_match;	/* L2pAddrMatchStat */
-	u32 lmpm_pmg_sel;	/* indicate which clocks are turned on
-				 * (LMPM_PMG_SEL) */
-	u32 u_timestamp;	/* indicate when the date and time of the
-				 * compilation */
-	u32 flow_handler;	/* FH read/write pointers, RX credit */
-} __packed /* LOG_ERROR_TABLE_API_S_VER_1 */;
-
-struct iwl_error_event_table {
-	u32 valid;		/* (nonzero) valid, (0) log is empty */
-	u32 error_id;		/* type of error */
-	u32 trm_hw_status0;	/* TRM HW status */
-	u32 trm_hw_status1;	/* TRM HW status */
-	u32 blink2;		/* branch link */
-	u32 ilink1;		/* interrupt link */
-	u32 ilink2;		/* interrupt link */
-	u32 data1;		/* error-specific data */
-	u32 data2;		/* error-specific data */
-	u32 data3;		/* error-specific data */
-	u32 bcon_time;		/* beacon timer */
-	u32 tsf_low;		/* network timestamp function timer */
-	u32 tsf_hi;		/* network timestamp function timer */
-	u32 gp1;		/* GP1 timer register */
-	u32 gp2;		/* GP2 timer register */
-	u32 fw_rev_type;	/* firmware revision type */
-	u32 major;		/* uCode version major */
-	u32 minor;		/* uCode version minor */
-	u32 hw_ver;		/* HW Silicon version */
-	u32 brd_ver;		/* HW board version */
-	u32 log_pc;		/* log program counter */
-	u32 frame_ptr;		/* frame pointer */
-	u32 stack_ptr;		/* stack pointer */
-	u32 hcmd;		/* last host command header */
-	u32 isr0;		/* isr status register LMPM_NIC_ISR0:
-				 * rxtx_flag */
-	u32 isr1;		/* isr status register LMPM_NIC_ISR1:
-				 * host_flag */
-	u32 isr2;		/* isr status register LMPM_NIC_ISR2:
-				 * enc_flag */
-	u32 isr3;		/* isr status register LMPM_NIC_ISR3:
-				 * time_flag */
-	u32 isr4;		/* isr status register LMPM_NIC_ISR4:
-				 * wico interrupt */
-	u32 last_cmd_id;	/* last HCMD id handled by the firmware */
-	u32 wait_event;		/* wait event() caller address */
-	u32 l2p_control;	/* L2pControlField */
-	u32 l2p_duration;	/* L2pDurationField */
-	u32 l2p_mhvalid;	/* L2pMhValidBits */
-	u32 l2p_addr_match;	/* L2pAddrMatchStat */
-	u32 lmpm_pmg_sel;	/* indicate which clocks are turned on
-				 * (LMPM_PMG_SEL) */
-	u32 u_timestamp;	/* indicate when the date and time of the
-				 * compilation */
-	u32 flow_handler;	/* FH read/write pointers, RX credit */
-} __packed /* LOG_ERROR_TABLE_API_S_VER_3 */;
-
-/*
- * UMAC error struct - relevant starting from family 8000 chip.
- * Note: This structure is read from the device with IO accesses,
- * and the reading already does the endian conversion. As it is
- * read with u32-sized accesses, any members with a different size
- * need to be ordered correctly though!
- */
-struct iwl_umac_error_event_table {
-	u32 valid;		/* (nonzero) valid, (0) log is empty */
-	u32 error_id;		/* type of error */
-	u32 blink1;		/* branch link */
-	u32 blink2;		/* branch link */
-	u32 ilink1;		/* interrupt link */
-	u32 ilink2;		/* interrupt link */
-	u32 data1;		/* error-specific data */
-	u32 data2;		/* error-specific data */
-	u32 data3;		/* error-specific data */
-	u32 umac_major;
-	u32 umac_minor;
-	u32 frame_pointer;	/* core register 27*/
-	u32 stack_pointer;	/* core register 28 */
-	u32 cmd_header;		/* latest host cmd sent to UMAC */
-	u32 nic_isr_pref;	/* ISR status register */
-} __packed;
-
-#define ERROR_START_OFFSET  (1 * sizeof(u32))
-#define ERROR_ELEM_SIZE     (7 * sizeof(u32))
-
-static void iwl_mvm_dump_umac_error_log(struct iwl_mvm *mvm)
-{
-	struct iwl_trans *trans = mvm->trans;
-	struct iwl_umac_error_event_table table = {};
-	u32 base = mvm->trans->dbg.umac_error_event_table;
-
-	if (!base &&
-	    !(mvm->trans->dbg.error_event_table_tlv_status &
-	      IWL_ERROR_EVENT_TABLE_UMAC))
-		return;
-
-	iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table));
-
-	if (table.valid)
-		mvm->fwrt.dump.umac_err_id = table.error_id;
-
-	if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) {
-		IWL_ERR(trans, "Start IWL Error Log Dump:\n");
-		IWL_ERR(trans, "Status: 0x%08lX, count: %d\n",
-			mvm->status, table.valid);
-	}
-
-	IWL_ERR(mvm, "0x%08X | %s\n", table.error_id,
-		iwl_fw_lookup_assert_desc(table.error_id));
-	IWL_ERR(mvm, "0x%08X | umac branchlink1\n", table.blink1);
-	IWL_ERR(mvm, "0x%08X | umac branchlink2\n", table.blink2);
-	IWL_ERR(mvm, "0x%08X | umac interruptlink1\n", table.ilink1);
-	IWL_ERR(mvm, "0x%08X | umac interruptlink2\n", table.ilink2);
-	IWL_ERR(mvm, "0x%08X | umac data1\n", table.data1);
-	IWL_ERR(mvm, "0x%08X | umac data2\n", table.data2);
-	IWL_ERR(mvm, "0x%08X | umac data3\n", table.data3);
-	IWL_ERR(mvm, "0x%08X | umac major\n", table.umac_major);
-	IWL_ERR(mvm, "0x%08X | umac minor\n", table.umac_minor);
-	IWL_ERR(mvm, "0x%08X | frame pointer\n", table.frame_pointer);
-	IWL_ERR(mvm, "0x%08X | stack pointer\n", table.stack_pointer);
-	IWL_ERR(mvm, "0x%08X | last host cmd\n", table.cmd_header);
-	IWL_ERR(mvm, "0x%08X | isr status reg\n", table.nic_isr_pref);
-}
-
-static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u8 lmac_num)
-{
-	struct iwl_trans *trans = mvm->trans;
-	struct iwl_error_event_table table = {};
-	u32 val, base = mvm->trans->dbg.lmac_error_event_table[lmac_num];
-
-	if (mvm->fwrt.cur_fw_img == IWL_UCODE_INIT) {
-		if (!base)
-			base = mvm->fw->init_errlog_ptr;
-	} else {
-		if (!base)
-			base = mvm->fw->inst_errlog_ptr;
-	}
-
-	if (base < 0x400000) {
-		IWL_ERR(mvm,
-			"Not valid error log pointer 0x%08X for %s uCode\n",
-			base,
-			(mvm->fwrt.cur_fw_img == IWL_UCODE_INIT)
-			? "Init" : "RT");
-		return;
-	}
-
-	/* check if there is a HW error */
-	val = iwl_trans_read_mem32(trans, base);
-	if (((val & ~0xf) == 0xa5a5a5a0) || ((val & ~0xf) == 0x5a5a5a50)) {
-		int err;
-
-		IWL_ERR(trans, "HW error, resetting before reading\n");
-
-		/* reset the device */
-		iwl_trans_sw_reset(trans);
-
-		err = iwl_finish_nic_init(trans, trans->trans_cfg);
-		if (err)
-			return;
-	}
-
-	iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table));
-
-	if (table.valid)
-		mvm->fwrt.dump.lmac_err_id[lmac_num] = table.error_id;
-
-	if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) {
-		IWL_ERR(trans, "Start IWL Error Log Dump:\n");
-		IWL_ERR(trans, "Status: 0x%08lX, count: %d\n",
-			mvm->status, table.valid);
-	}
-
-	/* Do not change this output - scripts rely on it */
-
-	IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version);
-
-	IWL_ERR(mvm, "0x%08X | %-28s\n", table.error_id,
-		iwl_fw_lookup_assert_desc(table.error_id));
-	IWL_ERR(mvm, "0x%08X | trm_hw_status0\n", table.trm_hw_status0);
-	IWL_ERR(mvm, "0x%08X | trm_hw_status1\n", table.trm_hw_status1);
-	IWL_ERR(mvm, "0x%08X | branchlink2\n", table.blink2);
-	IWL_ERR(mvm, "0x%08X | interruptlink1\n", table.ilink1);
-	IWL_ERR(mvm, "0x%08X | interruptlink2\n", table.ilink2);
-	IWL_ERR(mvm, "0x%08X | data1\n", table.data1);
-	IWL_ERR(mvm, "0x%08X | data2\n", table.data2);
-	IWL_ERR(mvm, "0x%08X | data3\n", table.data3);
-	IWL_ERR(mvm, "0x%08X | beacon time\n", table.bcon_time);
-	IWL_ERR(mvm, "0x%08X | tsf low\n", table.tsf_low);
-	IWL_ERR(mvm, "0x%08X | tsf hi\n", table.tsf_hi);
-	IWL_ERR(mvm, "0x%08X | time gp1\n", table.gp1);
-	IWL_ERR(mvm, "0x%08X | time gp2\n", table.gp2);
-	IWL_ERR(mvm, "0x%08X | uCode revision type\n", table.fw_rev_type);
-	IWL_ERR(mvm, "0x%08X | uCode version major\n", table.major);
-	IWL_ERR(mvm, "0x%08X | uCode version minor\n", table.minor);
-	IWL_ERR(mvm, "0x%08X | hw version\n", table.hw_ver);
-	IWL_ERR(mvm, "0x%08X | board version\n", table.brd_ver);
-	IWL_ERR(mvm, "0x%08X | hcmd\n", table.hcmd);
-	IWL_ERR(mvm, "0x%08X | isr0\n", table.isr0);
-	IWL_ERR(mvm, "0x%08X | isr1\n", table.isr1);
-	IWL_ERR(mvm, "0x%08X | isr2\n", table.isr2);
-	IWL_ERR(mvm, "0x%08X | isr3\n", table.isr3);
-	IWL_ERR(mvm, "0x%08X | isr4\n", table.isr4);
-	IWL_ERR(mvm, "0x%08X | last cmd Id\n", table.last_cmd_id);
-	IWL_ERR(mvm, "0x%08X | wait_event\n", table.wait_event);
-	IWL_ERR(mvm, "0x%08X | l2p_control\n", table.l2p_control);
-	IWL_ERR(mvm, "0x%08X | l2p_duration\n", table.l2p_duration);
-	IWL_ERR(mvm, "0x%08X | l2p_mhvalid\n", table.l2p_mhvalid);
-	IWL_ERR(mvm, "0x%08X | l2p_addr_match\n", table.l2p_addr_match);
-	IWL_ERR(mvm, "0x%08X | lmpm_pmg_sel\n", table.lmpm_pmg_sel);
-	IWL_ERR(mvm, "0x%08X | timestamp\n", table.u_timestamp);
-	IWL_ERR(mvm, "0x%08X | flow_handler\n", table.flow_handler);
-}
-
-static void iwl_mvm_dump_iml_error_log(struct iwl_mvm *mvm)
-{
-	struct iwl_trans *trans = mvm->trans;
-	u32 error, data1;
-
-	if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_22000) {
-		error = UMAG_SB_CPU_2_STATUS;
-		data1 = UMAG_SB_CPU_1_STATUS;
-	} else if (mvm->trans->trans_cfg->device_family >=
-		   IWL_DEVICE_FAMILY_8000) {
-		error = SB_CPU_2_STATUS;
-		data1 = SB_CPU_1_STATUS;
-	} else {
-		return;
-	}
-
-	error = iwl_read_umac_prph(trans, UMAG_SB_CPU_2_STATUS);
-
-	IWL_ERR(trans, "IML/ROM dump:\n");
-
-	if (error & 0xFFFF0000)
-		IWL_ERR(trans, "0x%04X | IML/ROM SYSASSERT\n", error >> 16);
-
-	IWL_ERR(mvm, "0x%08X | IML/ROM error/state\n", error);
-	IWL_ERR(mvm, "0x%08X | IML/ROM data1\n",
-		iwl_read_umac_prph(trans, data1));
-
-	if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_22000)
-		IWL_ERR(mvm, "0x%08X | IML/ROM WFPM_AUTH_KEY_0\n",
-			iwl_read_umac_prph(trans, SB_MODIFY_CFG_FLAG));
-}
-
-void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm)
-{
-	if (!test_bit(STATUS_DEVICE_ENABLED, &mvm->trans->status)) {
-		IWL_ERR(mvm,
-			"DEVICE_ENABLED bit is not set. Aborting dump.\n");
-		return;
-	}
-
-	iwl_mvm_dump_lmac_error_log(mvm, 0);
-
-	if (mvm->trans->dbg.lmac_error_event_table[1])
-		iwl_mvm_dump_lmac_error_log(mvm, 1);
-
-	iwl_mvm_dump_umac_error_log(mvm);
-
-	iwl_mvm_dump_iml_error_log(mvm);
-
-	iwl_fw_error_print_fseq_regs(&mvm->fwrt);
-}
-
 int iwl_mvm_reconfig_scd(struct iwl_mvm *mvm, int queue, int fifo, int sta_id,
 			 int tid, int frame_limit, u16 ssn)
 {

From c863797b8198e1b34516023198708ddb0f9fd2b9 Mon Sep 17 00:00:00 2001
From: ybaruch <yaara.baruch@intel.com>
Date: Mon, 21 Jun 2021 10:37:38 +0300
Subject: [PATCH 2533/3292] iwlwifi: add 9560 killer device

add new killer devices configurations.

Signed-off-by: ybaruch <yaara.baruch@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.4179f7191531.I3d5ed6b2b39fcd42863a679e21bda23a6c14253e@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/9000.c   | 6 +++++-
 drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 ++
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 2 ++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
index df1297358379..871533beff30 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <linux/module.h>
 #include <linux/stringify.h>
@@ -171,8 +171,12 @@ const char iwl9260_killer_1550_name[] =
 	"Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW) 160MHz";
 const char iwl9560_killer_1550i_name[] =
 	"Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)";
+const char iwl9560_killer_1550i_160_name[] =
+	"Killer(R) Wireless-AC 1550i Wireless Network Adapter (9560NGW) 160MHz";
 const char iwl9560_killer_1550s_name[] =
 	"Killer (R) Wireless-AC 1550s Wireless Network Adapter (9560NGW)";
+const char iwl9560_killer_1550s_160_name[] =
+	"Killer(R) Wireless-AC 1550s Wireless Network Adapter (9560D2W) 160MHz";
 
 const struct iwl_cfg iwl9260_2ac_cfg = {
 	.fw_name_pre = IWL9260_FW_PRE,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 3e4c6a809595..bf6ee56d4d96 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -506,6 +506,8 @@ extern const char iwl_ax201_killer_1650s_name[];
 extern const char iwl_ax201_killer_1650i_name[];
 extern const char iwl_ax210_killer_1675w_name[];
 extern const char iwl_ax210_killer_1675x_name[];
+extern const char iwl9560_killer_1550i_160_name[];
+extern const char iwl9560_killer_1550s_160_name[];
 extern const char iwl_ax211_name[];
 extern const char iwl_ax221_name[];
 extern const char iwl_ax231_name[];
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index a92c5f0044cd..16baee3d52ae 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -532,6 +532,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 	IWL_DEV_INFO(0x31DC, 0x1552, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_name),
 	IWL_DEV_INFO(0xA370, 0x1551, iwl9560_2ac_cfg_soc, iwl9560_killer_1550s_name),
 	IWL_DEV_INFO(0xA370, 0x1552, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_name),
+	IWL_DEV_INFO(0x51F0, 0x1552, iwl9560_2ac_cfg_soc, iwl9560_killer_1550s_160_name),
+	IWL_DEV_INFO(0x51F0, 0x1551, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_160_name),
 
 	IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name),
 

From 48d0c8d5a0b9999f4111efc6a1afa85199f039ea Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 21 Jun 2021 10:37:39 +0300
Subject: [PATCH 2534/3292] iwlwifi: fw: dump TCM error table if present

If the TCM is present in the hardware (as advertised in the firmware
file TLV data), dump its error log table during firmware error dumps.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.2d2149f6654f.Id831f8fbca59900ba7efc623ffca0ca938b664d3@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/dump.c  | 60 +++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/fw/file.h  |  5 ++
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c  | 11 ++++
 .../net/wireless/intel/iwlwifi/iwl-trans.h    |  3 +
 4 files changed, 79 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dump.c b/drivers/net/wireless/intel/iwlwifi/fw/dump.c
index 66f86d2a7cca..a1842205e86a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dump.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dump.c
@@ -271,6 +271,65 @@ static void iwl_fwrt_dump_lmac_error_log(struct iwl_fw_runtime *fwrt, u8 lmac_nu
 	IWL_ERR(fwrt, "0x%08X | flow_handler\n", table.flow_handler);
 }
 
+/*
+ * TCM error struct.
+ * Note: This structure is read from the device with IO accesses,
+ * and the reading already does the endian conversion. As it is
+ * read with u32-sized accesses, any members with a different size
+ * need to be ordered correctly though!
+ */
+struct iwl_tcm_error_event_table {
+	u32 valid;
+	u32 error_id;
+	u32 blink2;
+	u32 ilink1;
+	u32 ilink2;
+	u32 data1, data2, data3;
+	u32 logpc;
+	u32 frame_pointer;
+	u32 stack_pointer;
+	u32 msgid;
+	u32 isr;
+	u32 hw_status[5];
+	u32 sw_status[1];
+	u32 reserved[4];
+} __packed; /* TCM_LOG_ERROR_TABLE_API_S_VER_1 */
+
+static void iwl_fwrt_dump_tcm_error_log(struct iwl_fw_runtime *fwrt)
+{
+	struct iwl_trans *trans = fwrt->trans;
+	struct iwl_tcm_error_event_table table = {};
+	u32 base = fwrt->trans->dbg.tcm_error_event_table;
+	int i;
+
+	if (!base ||
+	    !(fwrt->trans->dbg.error_event_table_tlv_status &
+	      IWL_ERROR_EVENT_TABLE_TCM))
+		return;
+
+	iwl_trans_read_mem_bytes(trans, base, &table, sizeof(table));
+
+	IWL_ERR(fwrt, "TCM status:\n");
+	IWL_ERR(fwrt, "0x%08X | error ID\n", table.error_id);
+	IWL_ERR(fwrt, "0x%08X | tcm branchlink2\n", table.blink2);
+	IWL_ERR(fwrt, "0x%08X | tcm interruptlink1\n", table.ilink1);
+	IWL_ERR(fwrt, "0x%08X | tcm interruptlink2\n", table.ilink2);
+	IWL_ERR(fwrt, "0x%08X | tcm data1\n", table.data1);
+	IWL_ERR(fwrt, "0x%08X | tcm data2\n", table.data2);
+	IWL_ERR(fwrt, "0x%08X | tcm data3\n", table.data3);
+	IWL_ERR(fwrt, "0x%08X | tcm log PC\n", table.logpc);
+	IWL_ERR(fwrt, "0x%08X | tcm frame pointer\n", table.frame_pointer);
+	IWL_ERR(fwrt, "0x%08X | tcm stack pointer\n", table.stack_pointer);
+	IWL_ERR(fwrt, "0x%08X | tcm msg ID\n", table.msgid);
+	IWL_ERR(fwrt, "0x%08X | tcm ISR status\n", table.isr);
+	for (i = 0; i < ARRAY_SIZE(table.hw_status); i++)
+		IWL_ERR(fwrt, "0x%08X | tcm HW status[%d]\n",
+			table.hw_status[i], i);
+	for (i = 0; i < ARRAY_SIZE(table.sw_status); i++)
+		IWL_ERR(fwrt, "0x%08X | tcm SW status[%d]\n",
+			table.sw_status[i], i);
+}
+
 static void iwl_fwrt_dump_iml_error_log(struct iwl_fw_runtime *fwrt)
 {
 	struct iwl_trans *trans = fwrt->trans;
@@ -352,6 +411,7 @@ void iwl_fwrt_dump_error_logs(struct iwl_fw_runtime *fwrt)
 	if (fwrt->trans->dbg.lmac_error_event_table[1])
 		iwl_fwrt_dump_lmac_error_log(fwrt, 1);
 	iwl_fwrt_dump_umac_error_log(fwrt);
+	iwl_fwrt_dump_tcm_error_log(fwrt);
 	iwl_fwrt_dump_iml_error_log(fwrt);
 	iwl_fwrt_dump_fseq_regs(fwrt);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 74e25a6ecc3d..9a8c7b7a0816 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -98,6 +98,7 @@ enum iwl_ucode_tlv_type {
 
 	IWL_UCODE_TLV_PNVM_VERSION		= 62,
 	IWL_UCODE_TLV_PNVM_SKU			= 64,
+	IWL_UCODE_TLV_TCM_DEBUG_ADDRS		= 65,
 
 	IWL_UCODE_TLV_FW_NUM_STATIONS		= IWL_UCODE_TLV_CONST_BASE + 0,
 
@@ -950,6 +951,10 @@ struct iwl_fw_cmd_version {
 	u8 notif_ver;
 } __packed;
 
+struct iwl_fw_tcm_error_addr {
+	__le32 addr;
+}; /* FW_TLV_TCM_ERROR_INFO_ADDRS_S */
+
 static inline size_t _iwl_tlv_array_len(const struct iwl_ucode_tlv *tlv,
 					size_t fixed_size, size_t var_size)
 {
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 884750bf7840..977dce686bdb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1117,6 +1117,17 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
 				IWL_ERROR_EVENT_TABLE_LMAC1;
 			break;
 			}
+		case IWL_UCODE_TLV_TCM_DEBUG_ADDRS: {
+			struct iwl_fw_tcm_error_addr *ptr = (void *)tlv_data;
+
+			if (tlv_len != sizeof(*ptr))
+				goto invalid_tlv_len;
+			drv->trans->dbg.tcm_error_event_table =
+				le32_to_cpu(ptr->addr) & ~FW_ADDR_CACHE_CONTROL;
+			drv->trans->dbg.error_event_table_tlv_status |=
+				IWL_ERROR_EVENT_TABLE_TCM;
+			break;
+			}
 		case IWL_UCODE_TLV_TYPE_DEBUG_INFO:
 		case IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION:
 		case IWL_UCODE_TLV_TYPE_HCMD:
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 8d745e0c0394..0199d7a5a648 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -193,6 +193,7 @@ enum iwl_error_event_table_status {
 	IWL_ERROR_EVENT_TABLE_LMAC1 = BIT(0),
 	IWL_ERROR_EVENT_TABLE_LMAC2 = BIT(1),
 	IWL_ERROR_EVENT_TABLE_UMAC = BIT(2),
+	IWL_ERROR_EVENT_TABLE_TCM = BIT(3),
 };
 
 /**
@@ -708,6 +709,7 @@ struct iwl_self_init_dram {
  * @trigger_tlv: array of pointers to triggers TLVs for debug
  * @lmac_error_event_table: addrs of lmacs error tables
  * @umac_error_event_table: addr of umac error table
+ * @tcm_error_event_table: address of TCM error table
  * @error_event_table_tlv_status: bitmap that indicates what error table
  *	pointers was recevied via TLV. uses enum &iwl_error_event_table_status
  * @internal_ini_cfg: internal debug cfg state. Uses &enum iwl_ini_cfg_state
@@ -734,6 +736,7 @@ struct iwl_trans_debug {
 
 	u32 lmac_error_event_table[2];
 	u32 umac_error_event_table;
+	u32 tcm_error_event_table;
 	unsigned int error_event_table_tlv_status;
 
 	enum iwl_ini_cfg_state internal_ini_cfg;

From 4c59eac6ac434e08b65edd3d4bef41adfa90f58e Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Mon, 21 Jun 2021 10:37:40 +0300
Subject: [PATCH 2535/3292] iwlwifi: bump FW API to 64 for AX devices

Start supporting API version 64 for AX devices.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210621103449.8144a5b7d9a7.Ibf77fd7daa7d22f7c46d1c4a572ab9441a761299@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 0256d0042f71..7f1faa9d97b4 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <linux/module.h>
 #include <linux/stringify.h>
@@ -9,7 +9,7 @@
 #include "iwl-prph.h"
 
 /* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX	63
+#define IWL_22000_UCODE_API_MAX	64
 
 /* Lowest firmware API version supported */
 #define IWL_22000_UCODE_API_MIN	39

From 64ab7071254c178e81a6d0203354aad6521258ea Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Tue, 8 Jun 2021 10:43:05 +0800
Subject: [PATCH 2536/3292] clockevents: Add missing parameter documentation

Add the missing documentation for the @cpu parameter of
tick_cleanup_dead_cpu().

Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210608024305.2750999-1-libaokun1@huawei.com
---
 kernel/time/clockevents.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 0056d2bed53e..bb9d2fe58480 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -629,6 +629,7 @@ void tick_offline_cpu(unsigned int cpu)
 
 /**
  * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
+ * @cpu:	The dead CPU
  */
 void tick_cleanup_dead_cpu(int cpu)
 {

From db3a34e17433de2390eb80d436970edcebd0ca3e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:19 -0700
Subject: [PATCH 2537/3292] clocksource: Retry clock read if long delays
 detected

When the clocksource watchdog marks a clock as unstable, this might be due
to that clock being unstable or it might be due to delays that happen to
occur between the reads of the two clocks.  Yes, interrupts are disabled
across those two reads, but there are no shortage of things that can delay
interrupts-disabled regions of code ranging from SMI handlers to vCPU
preemption.  It would be good to have some indication as to why the clock
was marked unstable.

Therefore, re-read the watchdog clock on either side of the read from the
clock under test.  If the watchdog clock shows an excessive time delta
between its pair of reads, the reads are retried.

The maximum number of retries is specified by a new kernel boot parameter
clocksource.max_cswd_read_retries, which defaults to three, that is, up to
four reads, one initial and up to three retries.  If more than one retry
was required, a message is printed on the console (the occasional single
retry is expected behavior, especially in guest OSes).  If the maximum
number of retries is exceeded, the clock under test will be marked
unstable.  However, the probability of this happening due to various sorts
of delays is quite small.  In addition, the reason (clock-read delays) for
the unstable marking will be apparent.

Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-1-paulmck@kernel.org
---
 .../admin-guide/kernel-parameters.txt         |  6 +++
 kernel/time/clocksource.c                     | 53 ++++++++++++++++---
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc46..995deccc28bc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -581,6 +581,12 @@
 			loops can be debugged more effectively on production
 			systems.
 
+	clocksource.max_cswd_read_retries= [KNL]
+			Number of clocksource_watchdog() retries due to
+			external delays before the clock will be marked
+			unstable.  Defaults to three retries, that is,
+			four attempts to read the clock under test.
+
 	clearcpuid=BITNUM[,BITNUM...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 2cd902592fc1..43243f2be98e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -124,6 +124,13 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating);
 #define WATCHDOG_INTERVAL (HZ >> 1)
 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 
+/*
+ * Maximum permissible delay between two readouts of the watchdog
+ * clocksource surrounding a read of the clocksource being validated.
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions.
+ */
+#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+
 static void clocksource_watchdog_work(struct work_struct *work)
 {
 	/*
@@ -184,12 +191,45 @@ void clocksource_mark_unstable(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
+static ulong max_cswd_read_retries = 3;
+module_param(max_cswd_read_retries, ulong, 0644);
+
+static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+{
+	unsigned int nretries;
+	u64 wd_end, wd_delta;
+	int64_t wd_delay;
+
+	for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
+		local_irq_disable();
+		*wdnow = watchdog->read(watchdog);
+		*csnow = cs->read(cs);
+		wd_end = watchdog->read(watchdog);
+		local_irq_enable();
+
+		wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
+		wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
+					      watchdog->shift);
+		if (wd_delay <= WATCHDOG_MAX_SKEW) {
+			if (nretries > 1 || nretries >= max_cswd_read_retries) {
+				pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
+					smp_processor_id(), watchdog->name, nretries);
+			}
+			return true;
+		}
+	}
+
+	pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
+		smp_processor_id(), watchdog->name, wd_delay, nretries);
+	return false;
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
-	struct clocksource *cs;
 	u64 csnow, wdnow, cslast, wdlast, delta;
-	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
+	int64_t wd_nsec, cs_nsec;
+	struct clocksource *cs;
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -206,10 +246,11 @@ static void clocksource_watchdog(struct timer_list *unused)
 			continue;
 		}
 
-		local_irq_disable();
-		csnow = cs->read(cs);
-		wdnow = watchdog->read(watchdog);
-		local_irq_enable();
+		if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
+			/* Clock readout unreliable, so give it up. */
+			__clocksource_unstable(cs);
+			continue;
+		}
 
 		/* Clocksource initialized ? */
 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||

From 7560c02bdffb7c52d1457fa551b9e745d4b9e754 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:20 -0700
Subject: [PATCH 2538/3292] clocksource: Check per-CPU clock synchronization
 when marked unstable

Some sorts of per-CPU clock sources have a history of going out of
synchronization with each other.  However, this problem has purportedy been
solved in the past ten years.  Except that it is all too possible that the
problem has instead simply been made less likely, which might mean that
some of the occasional "Marking clocksource 'tsc' as unstable" messages
might be due to desynchronization.  How would anyone know?

Therefore apply CPU-to-CPU synchronization checking to newly unstable
clocksource that are marked with the new CLOCK_SOURCE_VERIFY_PERCPU flag.
Lists of desynchronized CPUs are printed, with the caveat that if it
is the reporting CPU that is itself desynchronized, it will appear that
all the other clocks are wrong.  Just like in real life.

Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-2-paulmck@kernel.org
---
 arch/x86/kernel/tsc.c       |  3 +-
 include/linux/clocksource.h |  2 +-
 kernel/time/clocksource.c   | 60 +++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 57ec01192180..6eb1b097e97e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1152,7 +1152,8 @@ static struct clocksource clocksource_tsc = {
 	.mask			= CLOCKSOURCE_MASK(64),
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
 				  CLOCK_SOURCE_VALID_FOR_HRES |
-				  CLOCK_SOURCE_MUST_VERIFY,
+				  CLOCK_SOURCE_MUST_VERIFY |
+				  CLOCK_SOURCE_VERIFY_PERCPU,
 	.vdso_clock_mode	= VDSO_CLOCKMODE_TSC,
 	.enable			= tsc_cs_enable,
 	.resume			= tsc_resume,
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d6ab416ee2d2..7f83d51c0fd7 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -137,7 +137,7 @@ struct clocksource {
 #define CLOCK_SOURCE_UNSTABLE			0x40
 #define CLOCK_SOURCE_SUSPEND_NONSTOP		0x80
 #define CLOCK_SOURCE_RESELECT			0x100
-
+#define CLOCK_SOURCE_VERIFY_PERCPU		0x200
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 43243f2be98e..cb12225bf050 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -224,6 +224,60 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
 	return false;
 }
 
+static u64 csnow_mid;
+static cpumask_t cpus_ahead;
+static cpumask_t cpus_behind;
+
+static void clocksource_verify_one_cpu(void *csin)
+{
+	struct clocksource *cs = (struct clocksource *)csin;
+
+	csnow_mid = cs->read(cs);
+}
+
+static void clocksource_verify_percpu(struct clocksource *cs)
+{
+	int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
+	u64 csnow_begin, csnow_end;
+	int cpu, testcpu;
+	s64 delta;
+
+	cpumask_clear(&cpus_ahead);
+	cpumask_clear(&cpus_behind);
+	preempt_disable();
+	testcpu = smp_processor_id();
+	pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu);
+	for_each_online_cpu(cpu) {
+		if (cpu == testcpu)
+			continue;
+		csnow_begin = cs->read(cs);
+		smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
+		csnow_end = cs->read(cs);
+		delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
+		if (delta < 0)
+			cpumask_set_cpu(cpu, &cpus_behind);
+		delta = (csnow_end - csnow_mid) & cs->mask;
+		if (delta < 0)
+			cpumask_set_cpu(cpu, &cpus_ahead);
+		delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
+		cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+		if (cs_nsec > cs_nsec_max)
+			cs_nsec_max = cs_nsec;
+		if (cs_nsec < cs_nsec_min)
+			cs_nsec_min = cs_nsec;
+	}
+	preempt_enable();
+	if (!cpumask_empty(&cpus_ahead))
+		pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
+			cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
+	if (!cpumask_empty(&cpus_behind))
+		pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
+			cpumask_pr_args(&cpus_behind), testcpu, cs->name);
+	if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
+		pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
+			testcpu, cs_nsec_min, cs_nsec_max, cs->name);
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
 	u64 csnow, wdnow, cslast, wdlast, delta;
@@ -448,6 +502,12 @@ static int __clocksource_watchdog_kthread(void)
 	unsigned long flags;
 	int select = 0;
 
+	/* Do any required per-CPU skew verification. */
+	if (curr_clocksource &&
+	    curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
+	    curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
+		clocksource_verify_percpu(curr_clocksource);
+
 	spin_lock_irqsave(&watchdog_lock, flags);
 	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {

From fa218f1cce6ba40069c8daab8821de7e6be1cdd0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:21 -0700
Subject: [PATCH 2539/3292] clocksource: Limit number of CPUs checked for clock
 synchronization

Currently, if skew is detected on a clock marked CLOCK_SOURCE_VERIFY_PERCPU,
that clock is checked on all CPUs.  This is thorough, but might not be
what you want on a system with a few tens of CPUs, let alone a few hundred
of them.

Therefore, by default check only up to eight randomly chosen CPUs.  Also
provide a new clocksource.verify_n_cpus kernel boot parameter.  A value of
-1 says to check all of the CPUs, and a non-negative value says to randomly
select that number of CPUs, without concern about selecting the same CPU
multiple times.  However, make use of a cpumask so that a given CPU will be
checked at most once.

Suggested-by: Thomas Gleixner <tglx@linutronix.de> # For verify_n_cpus=1.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-3-paulmck@kernel.org
---
 .../admin-guide/kernel-parameters.txt         | 10 +++
 kernel/time/clocksource.c                     | 74 ++++++++++++++++++-
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 995deccc28bc..9ec9ea1a51f2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -587,6 +587,16 @@
 			unstable.  Defaults to three retries, that is,
 			four attempts to read the clock under test.
 
+	clocksource.verify_n_cpus= [KNL]
+			Limit the number of CPUs checked for clocksources
+			marked with CLOCK_SOURCE_VERIFY_PERCPU that
+			are marked unstable due to excessive skew.
+			A negative value says to check all CPUs, while
+			zero says not to check any.  Values larger than
+			nr_cpu_ids are silently truncated to nr_cpu_ids.
+			The actual CPUs are chosen randomly, with
+			no replacement if the same CPU is chosen twice.
+
 	clearcpuid=BITNUM[,BITNUM...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cb12225bf050..e4beab21a1fa 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -14,6 +14,8 @@
 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 #include <linux/tick.h>
 #include <linux/kthread.h>
+#include <linux/prandom.h>
+#include <linux/cpu.h>
 
 #include "tick-internal.h"
 #include "timekeeping_internal.h"
@@ -193,6 +195,8 @@ void clocksource_mark_unstable(struct clocksource *cs)
 
 static ulong max_cswd_read_retries = 3;
 module_param(max_cswd_read_retries, ulong, 0644);
+static int verify_n_cpus = 8;
+module_param(verify_n_cpus, int, 0644);
 
 static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
 {
@@ -227,6 +231,55 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
 static u64 csnow_mid;
 static cpumask_t cpus_ahead;
 static cpumask_t cpus_behind;
+static cpumask_t cpus_chosen;
+
+static void clocksource_verify_choose_cpus(void)
+{
+	int cpu, i, n = verify_n_cpus;
+
+	if (n < 0) {
+		/* Check all of the CPUs. */
+		cpumask_copy(&cpus_chosen, cpu_online_mask);
+		cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
+		return;
+	}
+
+	/* If no checking desired, or no other CPU to check, leave. */
+	cpumask_clear(&cpus_chosen);
+	if (n == 0 || num_online_cpus() <= 1)
+		return;
+
+	/* Make sure to select at least one CPU other than the current CPU. */
+	cpu = cpumask_next(-1, cpu_online_mask);
+	if (cpu == smp_processor_id())
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
+		return;
+	cpumask_set_cpu(cpu, &cpus_chosen);
+
+	/* Force a sane value for the boot parameter. */
+	if (n > nr_cpu_ids)
+		n = nr_cpu_ids;
+
+	/*
+	 * Randomly select the specified number of CPUs.  If the same
+	 * CPU is selected multiple times, that CPU is checked only once,
+	 * and no replacement CPU is selected.  This gracefully handles
+	 * situations where verify_n_cpus is greater than the number of
+	 * CPUs that are currently online.
+	 */
+	for (i = 1; i < n; i++) {
+		cpu = prandom_u32() % nr_cpu_ids;
+		cpu = cpumask_next(cpu - 1, cpu_online_mask);
+		if (cpu >= nr_cpu_ids)
+			cpu = cpumask_next(-1, cpu_online_mask);
+		if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
+			cpumask_set_cpu(cpu, &cpus_chosen);
+	}
+
+	/* Don't verify ourselves. */
+	cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
+}
 
 static void clocksource_verify_one_cpu(void *csin)
 {
@@ -242,12 +295,22 @@ static void clocksource_verify_percpu(struct clocksource *cs)
 	int cpu, testcpu;
 	s64 delta;
 
+	if (verify_n_cpus == 0)
+		return;
 	cpumask_clear(&cpus_ahead);
 	cpumask_clear(&cpus_behind);
+	get_online_cpus();
 	preempt_disable();
+	clocksource_verify_choose_cpus();
+	if (cpumask_weight(&cpus_chosen) == 0) {
+		preempt_enable();
+		put_online_cpus();
+		pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
+		return;
+	}
 	testcpu = smp_processor_id();
-	pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu);
-	for_each_online_cpu(cpu) {
+	pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
+	for_each_cpu(cpu, &cpus_chosen) {
 		if (cpu == testcpu)
 			continue;
 		csnow_begin = cs->read(cs);
@@ -267,6 +330,7 @@ static void clocksource_verify_percpu(struct clocksource *cs)
 			cs_nsec_min = cs_nsec;
 	}
 	preempt_enable();
+	put_online_cpus();
 	if (!cpumask_empty(&cpus_ahead))
 		pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
 			cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
@@ -337,6 +401,12 @@ static void clocksource_watchdog(struct timer_list *unused)
 				watchdog->name, wdnow, wdlast, watchdog->mask);
 			pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
 				cs->name, csnow, cslast, cs->mask);
+			if (curr_clocksource == cs)
+				pr_warn("                      '%s' is current clocksource.\n", cs->name);
+			else if (curr_clocksource)
+				pr_warn("                      '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
+			else
+				pr_warn("                      No current clocksource.\n");
 			__clocksource_unstable(cs);
 			continue;
 		}

From 2e27e793e280ff12cb5c202a1214c08b0d3a0f26 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:22 -0700
Subject: [PATCH 2540/3292] clocksource: Reduce clocksource-skew threshold

Currently, WATCHDOG_THRESHOLD is set to detect a 62.5-millisecond skew in
a 500-millisecond WATCHDOG_INTERVAL.  This requires that clocks be skewed
by more than 12.5% in order to be marked unstable.  Except that a clock
that is skewed by that much is probably destroying unsuspecting software
right and left.  And given that there are now checks for false-positive
skews due to delays between reading the two clocks, it should be possible
to greatly decrease WATCHDOG_THRESHOLD, at least for fine-grained clocks
such as TSC.

Therefore, add a new uncertainty_margin field to the clocksource structure
that contains the maximum uncertainty in nanoseconds for the corresponding
clock.  This field may be initialized manually, as it is for
clocksource_tsc_early and clocksource_jiffies, which is copied to
refined_jiffies.  If the field is not initialized manually, it will be
computed at clock-registry time as the period of the clock in question
based on the scale and freq parameters to __clocksource_update_freq_scale()
function.  If either of those two parameters are zero, the
tens-of-milliseconds WATCHDOG_THRESHOLD is used as a cowardly alternative
to dividing by zero.  No matter how the uncertainty_margin field is
calculated, it is bounded below by twice WATCHDOG_MAX_SKEW, that is, by 100
microseconds.

Note that manually initialized uncertainty_margin fields are not adjusted,
but there is a WARN_ON_ONCE() that triggers if any such field is less than
twice WATCHDOG_MAX_SKEW.  This WARN_ON_ONCE() is intended to discourage
production use of the one-nanosecond uncertainty_margin values that are
used to test the clock-skew code itself.

The actual clock-skew check uses the sum of the uncertainty_margin fields
of the two clocksource structures being compared.  Integer overflow is
avoided because the largest computed value of the uncertainty_margin
fields is one billion (10^9), and double that value fits into an
unsigned int.  However, if someone manually specifies (say) UINT_MAX,
they will get what they deserve.

Note that the refined_jiffies uncertainty_margin field is initialized to
TICK_NSEC, which means that skew checks involving this clocksource will
be sufficently forgiving.  In a similar vein, the clocksource_tsc_early
uncertainty_margin field is initialized to 32*NSEC_PER_MSEC, which
replicates the current behavior and allows custom setting if needed
in order to address the rare skews detected for this clocksource in
current mainline.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-4-paulmck@kernel.org
---
 arch/x86/kernel/tsc.c       |  1 +
 include/linux/clocksource.h |  3 +++
 kernel/time/clocksource.c   | 48 +++++++++++++++++++++++++++++--------
 kernel/time/jiffies.c       | 15 ++++++------
 4 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6eb1b097e97e..2e076a459a0c 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1128,6 +1128,7 @@ static int tsc_cs_enable(struct clocksource *cs)
 static struct clocksource clocksource_tsc_early = {
 	.name			= "tsc-early",
 	.rating			= 299,
+	.uncertainty_margin	= 32 * NSEC_PER_MSEC,
 	.read			= read_tsc,
 	.mask			= CLOCKSOURCE_MASK(64),
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7f83d51c0fd7..895203727cb5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -43,6 +43,8 @@ struct module;
  * @shift:		Cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	Maximum idle time permitted by the clocksource (nsecs)
  * @maxadj:		Maximum adjustment value to mult (~11%)
+ * @uncertainty_margin:	Maximum uncertainty in nanoseconds per half second.
+ *			Zero says to use default WATCHDOG_THRESHOLD.
  * @archdata:		Optional arch-specific data
  * @max_cycles:		Maximum safe cycle value which won't overflow on
  *			multiplication
@@ -98,6 +100,7 @@ struct clocksource {
 	u32			shift;
 	u64			max_idle_ns;
 	u32			maxadj;
+	u32			uncertainty_margin;
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e4beab21a1fa..9b27888a6e75 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -95,6 +95,20 @@ static char override_name[CS_NAME_LEN];
 static int finished_booting;
 static u64 suspend_start;
 
+/*
+ * Threshold: 0.0312s, when doubled: 0.0625s.
+ * Also a default for cs->uncertainty_margin when registering clocks.
+ */
+#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
+
+/*
+ * Maximum permissible delay between two readouts of the watchdog
+ * clocksource surrounding a read of the clocksource being validated.
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
+ * a lower bound for cs->uncertainty_margin values when registering clocks.
+ */
+#define WATCHDOG_MAX_SKEW (50 * NSEC_PER_USEC)
+
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
 static void clocksource_select(void);
@@ -121,17 +135,9 @@ static int clocksource_watchdog_kthread(void *data);
 static void __clocksource_change_rating(struct clocksource *cs, int rating);
 
 /*
- * Interval: 0.5sec Threshold: 0.0625s
+ * Interval: 0.5sec.
  */
 #define WATCHDOG_INTERVAL (HZ >> 1)
-#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
-
-/*
- * Maximum permissible delay between two readouts of the watchdog
- * clocksource surrounding a read of the clocksource being validated.
- * This delay could be due to SMIs, NMIs, or to VCPU preemptions.
- */
-#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
 
 static void clocksource_watchdog_work(struct work_struct *work)
 {
@@ -348,6 +354,7 @@ static void clocksource_watchdog(struct timer_list *unused)
 	int next_cpu, reset_pending;
 	int64_t wd_nsec, cs_nsec;
 	struct clocksource *cs;
+	u32 md;
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -394,7 +401,8 @@ static void clocksource_watchdog(struct timer_list *unused)
 			continue;
 
 		/* Check the deviation from the watchdog clocksource. */
-		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+		md = cs->uncertainty_margin + watchdog->uncertainty_margin;
+		if (abs(cs_nsec - wd_nsec) > md) {
 			pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				smp_processor_id(), cs->name);
 			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
@@ -1047,6 +1055,26 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq
 		clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 				       NSEC_PER_SEC / scale, sec * scale);
 	}
+
+	/*
+	 * If the uncertainty margin is not specified, calculate it.
+	 * If both scale and freq are non-zero, calculate the clock
+	 * period, but bound below at 2*WATCHDOG_MAX_SKEW.  However,
+	 * if either of scale or freq is zero, be very conservative and
+	 * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the
+	 * uncertainty margin.  Allow stupidly small uncertainty margins
+	 * to be specified by the caller for testing purposes, but warn
+	 * to discourage production use of this capability.
+	 */
+	if (scale && freq && !cs->uncertainty_margin) {
+		cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
+		if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
+			cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
+	} else if (!cs->uncertainty_margin) {
+		cs->uncertainty_margin = WATCHDOG_THRESHOLD;
+	}
+	WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
+
 	/*
 	 * Ensure clocksources that have large 'mult' values don't overflow
 	 * when adjusted.
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a492e4da69ba..01935aafdb46 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -49,13 +49,14 @@ static u64 jiffies_read(struct clocksource *cs)
  * for "tick-less" systems.
  */
 static struct clocksource clocksource_jiffies = {
-	.name		= "jiffies",
-	.rating		= 1, /* lowest valid rating*/
-	.read		= jiffies_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-	.mult		= TICK_NSEC << JIFFIES_SHIFT, /* details above */
-	.shift		= JIFFIES_SHIFT,
-	.max_cycles	= 10,
+	.name			= "jiffies",
+	.rating			= 1, /* lowest valid rating*/
+	.uncertainty_margin	= 32 * NSEC_PER_MSEC,
+	.read			= jiffies_read,
+	.mask			= CLOCKSOURCE_MASK(32),
+	.mult			= TICK_NSEC << JIFFIES_SHIFT, /* details above */
+	.shift			= JIFFIES_SHIFT,
+	.max_cycles		= 10,
 };
 
 __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);

From 1253b9b87e42ab6a3d5c2cb27af2bdd67d7e50ff Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:23 -0700
Subject: [PATCH 2541/3292] clocksource: Provide kernel module to test
 clocksource watchdog

When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks.  It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.

Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option.  This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel.  Very large systems
that boot slowly may need to increase the value of this module parameter.

This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications.  This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely.  It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed.  Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.

This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.

This facility is intended for diagnostic use only, and should be avoided
on production systems.

Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
---
 .../admin-guide/kernel-parameters.txt         |   6 +
 include/linux/clocksource.h                   |   3 +
 kernel/time/Makefile                          |   1 +
 kernel/time/clocksource-wdtest.c              | 202 ++++++++++++++++++
 kernel/time/clocksource.c                     |   6 +-
 lib/Kconfig.debug                             |  12 ++
 6 files changed, 228 insertions(+), 2 deletions(-)
 create mode 100644 kernel/time/clocksource-wdtest.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9ec9ea1a51f2..591048ed1365 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -597,6 +597,12 @@
 			The actual CPUs are chosen randomly, with
 			no replacement if the same CPU is chosen twice.
 
+	clocksource-wdtest.holdoff= [KNL]
+			Set the time in seconds that the clocksource
+			watchdog test waits before commencing its tests.
+			Defaults to zero when built as a module and to
+			10 seconds when built into the kernel.
+
 	clearcpuid=BITNUM[,BITNUM...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 895203727cb5..1d42d4b17327 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -291,4 +291,7 @@ static inline void timer_probe(void) {}
 #define TIMER_ACPI_DECLARE(name, table_id, fn)		\
 	ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
 
+extern ulong max_cswd_read_retries;
+void clocksource_verify_percpu(struct clocksource *cs);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 1fb1c1ef6a19..1ed85b25b096 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_HAVE_GENERIC_VDSO)			+= vsyscall.o
 obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
 obj-$(CONFIG_TIME_NS)				+= namespace.o
+obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG)		+= clocksource-wdtest.o
diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c
new file mode 100644
index 000000000000..01df12395c0e
--- /dev/null
+++ b/kernel/time/clocksource-wdtest.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Unit test for the clocksource watchdog.
+ *
+ * Copyright (C) 2021 Facebook, Inc.
+ *
+ * Author: Paul E. McKenney <paulmck@kernel.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
+#include <linux/tick.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/prandom.h>
+#include <linux/cpu.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
+
+static int holdoff = IS_BUILTIN(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) ? 10 : 0;
+module_param(holdoff, int, 0444);
+MODULE_PARM_DESC(holdoff, "Time to wait to start test (s).");
+
+/* Watchdog kthread's task_struct pointer for debug purposes. */
+static struct task_struct *wdtest_task;
+
+static u64 wdtest_jiffies_read(struct clocksource *cs)
+{
+	return (u64)jiffies;
+}
+
+/* Assume HZ > 100. */
+#define JIFFIES_SHIFT	8
+
+static struct clocksource clocksource_wdtest_jiffies = {
+	.name			= "wdtest-jiffies",
+	.rating			= 1, /* lowest valid rating*/
+	.uncertainty_margin	= TICK_NSEC,
+	.read			= wdtest_jiffies_read,
+	.mask			= CLOCKSOURCE_MASK(32),
+	.flags			= CLOCK_SOURCE_MUST_VERIFY,
+	.mult			= TICK_NSEC << JIFFIES_SHIFT, /* details above */
+	.shift			= JIFFIES_SHIFT,
+	.max_cycles		= 10,
+};
+
+static int wdtest_ktime_read_ndelays;
+static bool wdtest_ktime_read_fuzz;
+
+static u64 wdtest_ktime_read(struct clocksource *cs)
+{
+	int wkrn = READ_ONCE(wdtest_ktime_read_ndelays);
+	static int sign = 1;
+	u64 ret;
+
+	if (wkrn) {
+		udelay(cs->uncertainty_margin / 250);
+		WRITE_ONCE(wdtest_ktime_read_ndelays, wkrn - 1);
+	}
+	ret = ktime_get_real_fast_ns();
+	if (READ_ONCE(wdtest_ktime_read_fuzz)) {
+		sign = -sign;
+		ret = ret + sign * 100 * NSEC_PER_MSEC;
+	}
+	return ret;
+}
+
+static void wdtest_ktime_cs_mark_unstable(struct clocksource *cs)
+{
+	pr_info("--- Marking %s unstable due to clocksource watchdog.\n", cs->name);
+}
+
+#define KTIME_FLAGS (CLOCK_SOURCE_IS_CONTINUOUS | \
+		     CLOCK_SOURCE_VALID_FOR_HRES | \
+		     CLOCK_SOURCE_MUST_VERIFY | \
+		     CLOCK_SOURCE_VERIFY_PERCPU)
+
+static struct clocksource clocksource_wdtest_ktime = {
+	.name			= "wdtest-ktime",
+	.rating			= 300,
+	.read			= wdtest_ktime_read,
+	.mask			= CLOCKSOURCE_MASK(64),
+	.flags			= KTIME_FLAGS,
+	.mark_unstable		= wdtest_ktime_cs_mark_unstable,
+	.list			= LIST_HEAD_INIT(clocksource_wdtest_ktime.list),
+};
+
+/* Reset the clocksource if needed. */
+static void wdtest_ktime_clocksource_reset(void)
+{
+	if (clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE) {
+		clocksource_unregister(&clocksource_wdtest_ktime);
+		clocksource_wdtest_ktime.flags = KTIME_FLAGS;
+		schedule_timeout_uninterruptible(HZ / 10);
+		clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
+	}
+}
+
+/* Run the specified series of watchdog tests. */
+static int wdtest_func(void *arg)
+{
+	unsigned long j1, j2;
+	char *s;
+	int i;
+
+	schedule_timeout_uninterruptible(holdoff * HZ);
+
+	/*
+	 * Verify that jiffies-like clocksources get the manually
+	 * specified uncertainty margin.
+	 */
+	pr_info("--- Verify jiffies-like uncertainty margin.\n");
+	__clocksource_register(&clocksource_wdtest_jiffies);
+	WARN_ON_ONCE(clocksource_wdtest_jiffies.uncertainty_margin != TICK_NSEC);
+
+	j1 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
+	schedule_timeout_uninterruptible(HZ);
+	j2 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
+	WARN_ON_ONCE(j1 == j2);
+
+	clocksource_unregister(&clocksource_wdtest_jiffies);
+
+	/*
+	 * Verify that tsc-like clocksources are assigned a reasonable
+	 * uncertainty margin.
+	 */
+	pr_info("--- Verify tsc-like uncertainty margin.\n");
+	clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
+	WARN_ON_ONCE(clocksource_wdtest_ktime.uncertainty_margin < NSEC_PER_USEC);
+
+	j1 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
+	udelay(1);
+	j2 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
+	pr_info("--- tsc-like times: %lu - %lu = %lu.\n", j2, j1, j2 - j1);
+	WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC));
+
+	/* Verify tsc-like stability with various numbers of errors injected. */
+	for (i = 0; i <= max_cswd_read_retries + 1; i++) {
+		if (i <= 1 && i < max_cswd_read_retries)
+			s = "";
+		else if (i <= max_cswd_read_retries)
+			s = ", expect message";
+		else
+			s = ", expect clock skew";
+		pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s);
+		WRITE_ONCE(wdtest_ktime_read_ndelays, i);
+		schedule_timeout_uninterruptible(2 * HZ);
+		WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays));
+		WARN_ON_ONCE((i <= max_cswd_read_retries) !=
+			     !(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
+		wdtest_ktime_clocksource_reset();
+	}
+
+	/* Verify tsc-like stability with clock-value-fuzz error injection. */
+	pr_info("--- Watchdog clock-value-fuzz error injection, expect clock skew and per-CPU mismatches.\n");
+	WRITE_ONCE(wdtest_ktime_read_fuzz, true);
+	schedule_timeout_uninterruptible(2 * HZ);
+	WARN_ON_ONCE(!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
+	clocksource_verify_percpu(&clocksource_wdtest_ktime);
+	WRITE_ONCE(wdtest_ktime_read_fuzz, false);
+
+	clocksource_unregister(&clocksource_wdtest_ktime);
+
+	pr_info("--- Done with test.\n");
+	return 0;
+}
+
+static void wdtest_print_module_parms(void)
+{
+	pr_alert("--- holdoff=%d\n", holdoff);
+}
+
+/* Cleanup function. */
+static void clocksource_wdtest_cleanup(void)
+{
+}
+
+static int __init clocksource_wdtest_init(void)
+{
+	int ret = 0;
+
+	wdtest_print_module_parms();
+
+	/* Create watchdog-test task. */
+	wdtest_task = kthread_run(wdtest_func, NULL, "wdtest");
+	if (IS_ERR(wdtest_task)) {
+		ret = PTR_ERR(wdtest_task);
+		pr_warn("%s: Failed to create wdtest kthread.\n", __func__);
+		wdtest_task = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+module_init(clocksource_wdtest_init);
+module_exit(clocksource_wdtest_cleanup);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 9b27888a6e75..74d6a234fd14 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -199,8 +199,9 @@ void clocksource_mark_unstable(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
-static ulong max_cswd_read_retries = 3;
+ulong max_cswd_read_retries = 3;
 module_param(max_cswd_read_retries, ulong, 0644);
+EXPORT_SYMBOL_GPL(max_cswd_read_retries);
 static int verify_n_cpus = 8;
 module_param(verify_n_cpus, int, 0644);
 
@@ -294,7 +295,7 @@ static void clocksource_verify_one_cpu(void *csin)
 	csnow_mid = cs->read(cs);
 }
 
-static void clocksource_verify_percpu(struct clocksource *cs)
+void clocksource_verify_percpu(struct clocksource *cs)
 {
 	int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
 	u64 csnow_begin, csnow_end;
@@ -347,6 +348,7 @@ static void clocksource_verify_percpu(struct clocksource *cs)
 		pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
 			testcpu, cs_nsec_min, cs_nsec_max, cs->name);
 }
+EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
 
 static void clocksource_watchdog(struct timer_list *unused)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 678c13967580..0a5a70c742e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2571,6 +2571,18 @@ config TEST_FPU
 
 	  If unsure, say N.
 
+config TEST_CLOCKSOURCE_WATCHDOG
+	tristate "Test clocksource watchdog in kernel space"
+	depends on CLOCKSOURCE_WATCHDOG
+	help
+	  Enable this option to create a kernel module that will trigger
+	  a test of the clocksource watchdog.  This module may be loaded
+	  via modprobe or insmod in which case it will run upon being
+	  loaded, or it may be built in, in which case it will run
+	  shortly after boot.
+
+	  If unsure, say N.
+
 endif # RUNTIME_TESTING_MENU
 
 config ARCH_USE_MEMTEST

From 22a22383371667962b46bd90d534cc57669537ac Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Thu, 27 May 2021 12:01:24 -0700
Subject: [PATCH 2542/3292] clocksource: Print deviation in nanoseconds when a
 clocksource becomes unstable

Currently when an unstable clocksource is detected, the raw counters of
that clocksource and watchdog will be printed, which can only be understood
after some math calculation.

So print the delta in nanoseconds as well to make it easier for humans to
check the results.

[ paulmck: Fix typo. ]

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210527190124.440372-6-paulmck@kernel.org
---
 kernel/time/clocksource.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 74d6a234fd14..b89c76e1c02c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -407,10 +407,10 @@ static void clocksource_watchdog(struct timer_list *unused)
 		if (abs(cs_nsec - wd_nsec) > md) {
 			pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				smp_processor_id(), cs->name);
-			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
-				watchdog->name, wdnow, wdlast, watchdog->mask);
-			pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
-				cs->name, csnow, cslast, cs->mask);
+			pr_warn("                      '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
+				watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
+			pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
+				cs->name, cs_nsec, csnow, cslast, cs->mask);
 			if (curr_clocksource == cs)
 				pr_warn("                      '%s' is current clocksource.\n", cs->name);
 			else if (curr_clocksource)

From 0ae64fb6b645e0f976e08bc3c05e518856f19d00 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 19 Jun 2021 09:44:53 +0530
Subject: [PATCH 2543/3292] libbpf: Add request buffer type for netlink
 messages

Coverity complains about OOB writes to nlmsghdr. There is no OOB as we
write to the trailing buffer, but static analyzers and compilers may
rightfully be confused as the nlmsghdr pointer has subobject provenance
(and hence subobject bounds).

Fix this by using an explicit request structure containing the nlmsghdr,
struct tcmsg/ifinfomsg, and attribute buffer.

Also switch nh_tail (renamed to req_tail) to cast req * to char * so
that it can be understood as arithmetic on pointer to the representation
array (hence having same bound as request structure), which should
further appease analyzers.

As a bonus, callers don't have to pass sizeof(req) all the time now, as
size is implicitly obtained using the pointer. While at it, also reduce
the size of attribute buffer to 128 bytes (132 for ifinfomsg using
functions due to the padding).

Summary of problem:

  Even though C standard allows interconvertibility of pointer to first
  member and pointer to struct, for the purposes of alias analysis it
  would still consider the first as having pointer value "pointer to T"
  where T is type of first member hence having subobject bounds,
  allowing analyzers within reason to complain when object is accessed
  beyond the size of pointed to object.

  The only exception to this rule may be when a char * is formed to a
  member subobject. It is not possible for the compiler to be able to
  tell the intent of the programmer that it is a pointer to member
  object or the underlying representation array of the containing
  object, so such diagnosis is suppressed.

Fixes: 715c5ce454a6 ("libbpf: Add low level TC-BPF management API")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210619041454.417577-1-memxor@gmail.com
---
 tools/lib/bpf/netlink.c | 113 +++++++++++++++-------------------------
 tools/lib/bpf/nlattr.h  |  34 +++++++-----
 2 files changed, 65 insertions(+), 82 deletions(-)

diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index cf9381f03b16..bfaa9a5c50f3 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -154,7 +154,7 @@ done:
 	return ret;
 }
 
-static int libbpf_netlink_send_recv(struct nlmsghdr *nh,
+static int libbpf_netlink_send_recv(struct libbpf_nla_req *req,
 				    __dump_nlmsg_t parse_msg,
 				    libbpf_dump_nlmsg_t parse_attr,
 				    void *cookie)
@@ -166,15 +166,15 @@ static int libbpf_netlink_send_recv(struct nlmsghdr *nh,
 	if (sock < 0)
 		return sock;
 
-	nh->nlmsg_pid = 0;
-	nh->nlmsg_seq = time(NULL);
+	req->nh.nlmsg_pid = 0;
+	req->nh.nlmsg_seq = time(NULL);
 
-	if (send(sock, nh, nh->nlmsg_len, 0) < 0) {
+	if (send(sock, req, req->nh.nlmsg_len, 0) < 0) {
 		ret = -errno;
 		goto out;
 	}
 
-	ret = libbpf_netlink_recv(sock, nl_pid, nh->nlmsg_seq,
+	ret = libbpf_netlink_recv(sock, nl_pid, req->nh.nlmsg_seq,
 				  parse_msg, parse_attr, cookie);
 out:
 	libbpf_netlink_close(sock);
@@ -186,11 +186,7 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
 {
 	struct nlattr *nla;
 	int ret;
-	struct {
-		struct nlmsghdr  nh;
-		struct ifinfomsg ifinfo;
-		char             attrbuf[64];
-	} req;
+	struct libbpf_nla_req req;
 
 	memset(&req, 0, sizeof(req));
 	req.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg));
@@ -199,27 +195,26 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
 	req.ifinfo.ifi_family = AF_UNSPEC;
 	req.ifinfo.ifi_index  = ifindex;
 
-	nla = nlattr_begin_nested(&req.nh, sizeof(req), IFLA_XDP);
+	nla = nlattr_begin_nested(&req, IFLA_XDP);
 	if (!nla)
 		return -EMSGSIZE;
-	ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FD, &fd, sizeof(fd));
+	ret = nlattr_add(&req, IFLA_XDP_FD, &fd, sizeof(fd));
 	if (ret < 0)
 		return ret;
 	if (flags) {
-		ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FLAGS, &flags,
-				 sizeof(flags));
+		ret = nlattr_add(&req, IFLA_XDP_FLAGS, &flags, sizeof(flags));
 		if (ret < 0)
 			return ret;
 	}
 	if (flags & XDP_FLAGS_REPLACE) {
-		ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_EXPECTED_FD,
-				 &old_fd, sizeof(old_fd));
+		ret = nlattr_add(&req, IFLA_XDP_EXPECTED_FD, &old_fd,
+				 sizeof(old_fd));
 		if (ret < 0)
 			return ret;
 	}
-	nlattr_end_nested(&req.nh, nla);
+	nlattr_end_nested(&req, nla);
 
-	return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+	return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
 }
 
 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
@@ -314,14 +309,11 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 	struct xdp_id_md xdp_id = {};
 	__u32 mask;
 	int ret;
-	struct {
-		struct nlmsghdr  nh;
-		struct ifinfomsg ifm;
-	} req = {
-		.nh.nlmsg_len   = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
-		.nh.nlmsg_type  = RTM_GETLINK,
-		.nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
-		.ifm.ifi_family = AF_PACKET,
+	struct libbpf_nla_req req = {
+		.nh.nlmsg_len      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+		.nh.nlmsg_type     = RTM_GETLINK,
+		.nh.nlmsg_flags    = NLM_F_DUMP | NLM_F_REQUEST,
+		.ifinfo.ifi_family = AF_PACKET,
 	};
 
 	if (flags & ~XDP_FLAGS_MASK || !info_size)
@@ -336,7 +328,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 	xdp_id.ifindex = ifindex;
 	xdp_id.flags = flags;
 
-	ret = libbpf_netlink_send_recv(&req.nh, __dump_link_nlmsg,
+	ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg,
 				       get_xdp_info, &xdp_id);
 	if (!ret) {
 		size_t sz = min(info_size, sizeof(xdp_id.info));
@@ -376,15 +368,14 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
 	return libbpf_err(ret);
 }
 
-typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t,
-			      size_t maxsz);
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
 
-static int clsact_config(struct nlmsghdr *nh, struct tcmsg *t, size_t maxsz)
+static int clsact_config(struct libbpf_nla_req *req)
 {
-	t->tcm_parent = TC_H_CLSACT;
-	t->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
+	req->tc.tcm_parent = TC_H_CLSACT;
+	req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
 
-	return nlattr_add(nh, maxsz, TCA_KIND, "clsact", sizeof("clsact"));
+	return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
 }
 
 static int attach_point_to_config(struct bpf_tc_hook *hook,
@@ -431,11 +422,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
 {
 	qdisc_config_t config;
 	int ret;
-	struct {
-		struct nlmsghdr nh;
-		struct tcmsg tc;
-		char buf[256];
-	} req;
+	struct libbpf_nla_req req;
 
 	ret = attach_point_to_config(hook, &config);
 	if (ret < 0)
@@ -448,11 +435,11 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
 	req.tc.tcm_family  = AF_UNSPEC;
 	req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
 
-	ret = config(&req.nh, &req.tc, sizeof(req));
+	ret = config(&req);
 	if (ret < 0)
 		return ret;
 
-	return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+	return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
 }
 
 static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
@@ -544,7 +531,7 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
 	return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO);
 }
 
-static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd)
+static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
 {
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
@@ -555,7 +542,7 @@ static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd)
 	if (ret < 0)
 		return ret;
 
-	ret = nlattr_add(nh, maxsz, TCA_BPF_FD, &fd, sizeof(fd));
+	ret = nlattr_add(req, TCA_BPF_FD, &fd, sizeof(fd));
 	if (ret < 0)
 		return ret;
 	len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id);
@@ -563,7 +550,7 @@ static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd)
 		return -errno;
 	if (len >= sizeof(name))
 		return -ENAMETOOLONG;
-	return nlattr_add(nh, maxsz, TCA_BPF_NAME, name, len + 1);
+	return nlattr_add(req, TCA_BPF_NAME, name, len + 1);
 }
 
 int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
@@ -571,12 +558,8 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 	__u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags;
 	int ret, ifindex, attach_point, prog_fd;
 	struct bpf_cb_ctx info = {};
+	struct libbpf_nla_req req;
 	struct nlattr *nla;
-	struct {
-		struct nlmsghdr nh;
-		struct tcmsg tc;
-		char buf[256];
-	} req;
 
 	if (!hook || !opts ||
 	    !OPTS_VALID(hook, bpf_tc_hook) ||
@@ -618,25 +601,24 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 		return libbpf_err(ret);
 	req.tc.tcm_parent = parent;
 
-	ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
+	ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
 	if (ret < 0)
 		return libbpf_err(ret);
-	nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS);
+	nla = nlattr_begin_nested(&req, TCA_OPTIONS);
 	if (!nla)
 		return libbpf_err(-EMSGSIZE);
-	ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd);
+	ret = tc_add_fd_and_name(&req, prog_fd);
 	if (ret < 0)
 		return libbpf_err(ret);
 	bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
-	ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags,
-			 sizeof(bpf_flags));
+	ret = nlattr_add(&req, TCA_BPF_FLAGS, &bpf_flags, sizeof(bpf_flags));
 	if (ret < 0)
 		return libbpf_err(ret);
-	nlattr_end_nested(&req.nh, nla);
+	nlattr_end_nested(&req, nla);
 
 	info.opts = opts;
 
-	ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
+	ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
 	if (ret < 0)
 		return libbpf_err(ret);
 	if (!info.processed)
@@ -650,11 +632,7 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
 {
 	__u32 protocol = 0, handle, priority, parent, prog_id, flags;
 	int ret, ifindex, attach_point, prog_fd;
-	struct {
-		struct nlmsghdr nh;
-		struct tcmsg tc;
-		char buf[256];
-	} req;
+	struct libbpf_nla_req req;
 
 	if (!hook ||
 	    !OPTS_VALID(hook, bpf_tc_hook) ||
@@ -701,13 +679,12 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
 	req.tc.tcm_parent = parent;
 
 	if (!flush) {
-		ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND,
-				 "bpf", sizeof("bpf"));
+		ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
 		if (ret < 0)
 			return ret;
 	}
 
-	return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL);
+	return libbpf_netlink_send_recv(&req, NULL, NULL, NULL);
 }
 
 int bpf_tc_detach(const struct bpf_tc_hook *hook,
@@ -727,11 +704,7 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 	__u32 protocol, handle, priority, parent, prog_id, flags;
 	int ret, ifindex, attach_point, prog_fd;
 	struct bpf_cb_ctx info = {};
-	struct {
-		struct nlmsghdr nh;
-		struct tcmsg tc;
-		char buf[256];
-	} req;
+	struct libbpf_nla_req req;
 
 	if (!hook || !opts ||
 	    !OPTS_VALID(hook, bpf_tc_hook) ||
@@ -770,13 +743,13 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 		return libbpf_err(ret);
 	req.tc.tcm_parent = parent;
 
-	ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
+	ret = nlattr_add(&req, TCA_KIND, "bpf", sizeof("bpf"));
 	if (ret < 0)
 		return libbpf_err(ret);
 
 	info.opts = opts;
 
-	ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
+	ret = libbpf_netlink_send_recv(&req, get_tc_info, NULL, &info);
 	if (ret < 0)
 		return libbpf_err(ret);
 	if (!info.processed)
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 3c780ab6d022..76cbfeb21955 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -13,6 +13,7 @@
 #include <string.h>
 #include <errno.h>
 #include <linux/netlink.h>
+#include <linux/rtnetlink.h>
 
 /* avoid multiple definition of netlink features */
 #define __LINUX_NETLINK_H
@@ -52,6 +53,15 @@ struct libbpf_nla_policy {
 	uint16_t	maxlen;
 };
 
+struct libbpf_nla_req {
+	struct nlmsghdr nh;
+	union {
+		struct ifinfomsg ifinfo;
+		struct tcmsg tc;
+	};
+	char buf[128];
+};
+
 /**
  * @ingroup attr
  * Iterate over a stream of attributes
@@ -111,44 +121,44 @@ static inline struct nlattr *nla_data(struct nlattr *nla)
 	return (struct nlattr *)((char *)nla + NLA_HDRLEN);
 }
 
-static inline struct nlattr *nh_tail(struct nlmsghdr *nh)
+static inline struct nlattr *req_tail(struct libbpf_nla_req *req)
 {
-	return (struct nlattr *)((char *)nh + NLMSG_ALIGN(nh->nlmsg_len));
+	return (struct nlattr *)((char *)req + NLMSG_ALIGN(req->nh.nlmsg_len));
 }
 
-static inline int nlattr_add(struct nlmsghdr *nh, size_t maxsz, int type,
+static inline int nlattr_add(struct libbpf_nla_req *req, int type,
 			     const void *data, int len)
 {
 	struct nlattr *nla;
 
-	if (NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > maxsz)
+	if (NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > sizeof(*req))
 		return -EMSGSIZE;
 	if (!!data != !!len)
 		return -EINVAL;
 
-	nla = nh_tail(nh);
+	nla = req_tail(req);
 	nla->nla_type = type;
 	nla->nla_len = NLA_HDRLEN + len;
 	if (data)
 		memcpy(nla_data(nla), data, len);
-	nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(nla->nla_len);
+	req->nh.nlmsg_len = NLMSG_ALIGN(req->nh.nlmsg_len) + NLA_ALIGN(nla->nla_len);
 	return 0;
 }
 
-static inline struct nlattr *nlattr_begin_nested(struct nlmsghdr *nh,
-						 size_t maxsz, int type)
+static inline struct nlattr *nlattr_begin_nested(struct libbpf_nla_req *req, int type)
 {
 	struct nlattr *tail;
 
-	tail = nh_tail(nh);
-	if (nlattr_add(nh, maxsz, type | NLA_F_NESTED, NULL, 0))
+	tail = req_tail(req);
+	if (nlattr_add(req, type | NLA_F_NESTED, NULL, 0))
 		return NULL;
 	return tail;
 }
 
-static inline void nlattr_end_nested(struct nlmsghdr *nh, struct nlattr *tail)
+static inline void nlattr_end_nested(struct libbpf_nla_req *req,
+				     struct nlattr *tail)
 {
-	tail->nla_len = (char *)nh_tail(nh) - (char *)tail;
+	tail->nla_len = (char *)req_tail(req) - (char *)tail;
 }
 
 #endif /* __LIBBPF_NLATTR_H */

From ee62a5c6bb100b6fb07f3da3818c10a24d440e10 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 19 Jun 2021 09:44:54 +0530
Subject: [PATCH 2544/3292] libbpf: Switch to void * casting in netlink helpers

Netlink helpers I added in 8bbb77b7c7a2 ("libbpf: Add various netlink
helpers") used char * casts everywhere, and there were a few more that
existed from before.

Convert all of them to void * cast, as it is treated equivalently by
clang/gcc for the purposes of pointer arithmetic and to follow the
convention elsewhere in the kernel/libbpf.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210619041454.417577-2-memxor@gmail.com
---
 tools/lib/bpf/netlink.c | 2 +-
 tools/lib/bpf/nlattr.c  | 2 +-
 tools/lib/bpf/nlattr.h  | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index bfaa9a5c50f3..39f25e09b51e 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -524,7 +524,7 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
 	struct nlattr *tb[TCA_MAX + 1];
 
 	libbpf_nla_parse(tb, TCA_MAX,
-			 (struct nlattr *)((char *)tc + NLMSG_ALIGN(sizeof(*tc))),
+			 (struct nlattr *)((void *)tc + NLMSG_ALIGN(sizeof(*tc))),
 			 NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL);
 	if (!tb[TCA_KIND])
 		return NL_CONT;
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index b607fa9852b1..f57e77a6e40f 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -27,7 +27,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
 	int totlen = NLA_ALIGN(nla->nla_len);
 
 	*remaining -= totlen;
-	return (struct nlattr *) ((char *) nla + totlen);
+	return (struct nlattr *)((void *)nla + totlen);
 }
 
 static int nla_ok(const struct nlattr *nla, int remaining)
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 76cbfeb21955..4d15ae2ff812 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -81,7 +81,7 @@ struct libbpf_nla_req {
  */
 static inline void *libbpf_nla_data(const struct nlattr *nla)
 {
-	return (char *) nla + NLA_HDRLEN;
+	return (void *)nla + NLA_HDRLEN;
 }
 
 static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
@@ -118,12 +118,12 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
 
 static inline struct nlattr *nla_data(struct nlattr *nla)
 {
-	return (struct nlattr *)((char *)nla + NLA_HDRLEN);
+	return (struct nlattr *)((void *)nla + NLA_HDRLEN);
 }
 
 static inline struct nlattr *req_tail(struct libbpf_nla_req *req)
 {
-	return (struct nlattr *)((char *)req + NLMSG_ALIGN(req->nh.nlmsg_len));
+	return (struct nlattr *)((void *)req + NLMSG_ALIGN(req->nh.nlmsg_len));
 }
 
 static inline int nlattr_add(struct libbpf_nla_req *req, int type,
@@ -158,7 +158,7 @@ static inline struct nlattr *nlattr_begin_nested(struct libbpf_nla_req *req, int
 static inline void nlattr_end_nested(struct libbpf_nla_req *req,
 				     struct nlattr *tail)
 {
-	tail->nla_len = (char *)req_tail(req) - (char *)tail;
+	tail->nla_len = (void *)req_tail(req) - (void *)tail;
 }
 
 #endif /* __LIBBPF_NLATTR_H */

From 4e82d2e20f3b11f253bc5c6e92f05ed3694a1ae3 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:02:42 +0800
Subject: [PATCH 2545/3292] clockevents: Use list_move() instead of
 list_del()/list_add()

Simplify the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210609070242.1322450-1-libaokun1@huawei.com
---
 kernel/time/clockevents.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index bb9d2fe58480..003ccf338d20 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -347,8 +347,7 @@ static void clockevents_notify_released(void)
 	while (!list_empty(&clockevents_released)) {
 		dev = list_entry(clockevents_released.next,
 				 struct clock_event_device, list);
-		list_del(&dev->list);
-		list_add(&dev->list, &clockevent_devices);
+		list_move(&dev->list, &clockevent_devices);
 		tick_check_new_device(dev);
 	}
 }
@@ -576,8 +575,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
 	if (old) {
 		module_put(old->owner);
 		clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED);
-		list_del(&old->list);
-		list_add(&old->list, &clockevents_released);
+		list_move(&old->list, &clockevents_released);
 	}
 
 	if (new) {

From cd96e22bc1da0a7ddbe0769f6e393022aa8be1f1 Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Mon, 26 Apr 2021 09:32:50 +0800
Subject: [PATCH 2546/3292] rtw88: add beacon filter support

Adding this supports beacon filter and CQM.
Let firmware perform connection quality monitor and beacon processing.
This make host CPU wakeup less under power save mode.
To make mechanisms work as usual, fw will notify driver events such as
signal change and beacon loss.

This feature needs firmware 9.9.8 or newer to support it, and driver is
compatible with older firmware.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210426013252.5665-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c       | 91 +++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/fw.h       | 39 ++++++++
 drivers/net/wireless/realtek/rtw88/mac80211.c |  7 ++
 drivers/net/wireless/realtek/rtw88/main.c     |  4 +-
 drivers/net/wireless/realtek/rtw88/main.h     |  1 +
 5 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index ea2cd4db1d3c..797b08b2a494 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -127,6 +127,51 @@ static void rtw_fw_ra_report_handle(struct rtw_dev *rtwdev, u8 *payload,
 	rtw_iterate_stas_atomic(rtwdev, rtw_fw_ra_report_iter, &ra_data);
 }
 
+struct rtw_beacon_filter_iter_data {
+	struct rtw_dev *rtwdev;
+	u8 *payload;
+};
+
+static void rtw_fw_bcn_filter_notify_vif_iter(void *data, u8 *mac,
+					      struct ieee80211_vif *vif)
+{
+	struct rtw_beacon_filter_iter_data *iter_data = data;
+	struct rtw_dev *rtwdev = iter_data->rtwdev;
+	u8 *payload = iter_data->payload;
+	u8 type = GET_BCN_FILTER_NOTIFY_TYPE(payload);
+	u8 event = GET_BCN_FILTER_NOTIFY_EVENT(payload);
+	s8 sig = (s8)GET_BCN_FILTER_NOTIFY_RSSI(payload);
+
+	switch (type) {
+	case BCN_FILTER_NOTIFY_SIGNAL_CHANGE:
+		event = event ? NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH :
+			NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW;
+		ieee80211_cqm_rssi_notify(vif, event, sig, GFP_KERNEL);
+		break;
+	case BCN_FILTER_CONNECTION_LOSS:
+		ieee80211_connection_loss(vif);
+		break;
+	case BCN_FILTER_CONNECTED:
+		rtwdev->beacon_loss = false;
+		break;
+	case BCN_FILTER_NOTIFY_BEACON_LOSS:
+		rtwdev->beacon_loss = true;
+		rtw_leave_lps(rtwdev);
+		break;
+	}
+}
+
+static void rtw_fw_bcn_filter_notify(struct rtw_dev *rtwdev, u8 *payload,
+				     u8 length)
+{
+	struct rtw_beacon_filter_iter_data dev_iter_data;
+
+	dev_iter_data.rtwdev = rtwdev;
+	dev_iter_data.payload = payload;
+	rtw_iterate_vifs(rtwdev, rtw_fw_bcn_filter_notify_vif_iter,
+			 &dev_iter_data);
+}
+
 void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
 {
 	struct rtw_c2h_cmd *c2h;
@@ -152,6 +197,9 @@ void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
 	case C2H_WLAN_INFO:
 		rtw_coex_wl_fwdbginfo_notify(rtwdev, c2h->payload, len);
 		break;
+	case C2H_BCN_FILTER_NOTIFY:
+		rtw_fw_bcn_filter_notify(rtwdev, c2h->payload, len);
+		break;
 	case C2H_HALMAC:
 		rtw_fw_c2h_cmd_handle_ext(rtwdev, skb);
 		break;
@@ -527,6 +575,49 @@ void rtw_fw_update_wl_phy_info(struct rtw_dev *rtwdev)
 	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
 }
 
+void rtw_fw_beacon_filter_config(struct rtw_dev *rtwdev, bool connect,
+				 struct ieee80211_vif *vif)
+{
+	struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+	struct ieee80211_sta *sta = ieee80211_find_sta(vif, bss_conf->bssid);
+	static const u8 rssi_min = 0, rssi_max = 100, rssi_offset = 100;
+	struct rtw_sta_info *si =
+		sta ? (struct rtw_sta_info *)sta->drv_priv : NULL;
+	s32 threshold = bss_conf->cqm_rssi_thold + rssi_offset;
+	struct rtw_fw_state *fw = &rtwdev->fw;
+	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
+
+	if (!(fw->feature & FW_FEATURE_BCN_FILTER))
+		return;
+
+	if (!connect) {
+		SET_H2C_CMD_ID_CLASS(h2c_pkt, H2C_CMD_BCN_FILTER_OFFLOAD_P1);
+		SET_BCN_FILTER_OFFLOAD_P1_ENABLE(h2c_pkt, connect);
+		rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
+
+		return;
+	}
+	SET_H2C_CMD_ID_CLASS(h2c_pkt, H2C_CMD_BCN_FILTER_OFFLOAD_P0);
+	ether_addr_copy(&h2c_pkt[1], bss_conf->bssid);
+	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
+
+	memset(h2c_pkt, 0, sizeof(h2c_pkt));
+	threshold = clamp_t(s32, threshold, rssi_min, rssi_max);
+	SET_H2C_CMD_ID_CLASS(h2c_pkt, H2C_CMD_BCN_FILTER_OFFLOAD_P1);
+	SET_BCN_FILTER_OFFLOAD_P1_ENABLE(h2c_pkt, connect);
+	SET_BCN_FILTER_OFFLOAD_P1_OFFLOAD_MODE(h2c_pkt,
+					       BCN_FILTER_OFFLOAD_MODE_DEFAULT);
+	SET_BCN_FILTER_OFFLOAD_P1_THRESHOLD(h2c_pkt, (u8)threshold);
+	SET_BCN_FILTER_OFFLOAD_P1_BCN_LOSS_CNT(h2c_pkt, BCN_LOSS_CNT);
+	if (si)
+		SET_BCN_FILTER_OFFLOAD_P1_MACID(h2c_pkt, si->mac_id);
+	else
+		rtw_warn(rtwdev, "CQM config with station not found\n");
+	SET_BCN_FILTER_OFFLOAD_P1_HYST(h2c_pkt, bss_conf->cqm_rssi_hyst);
+	SET_BCN_FILTER_OFFLOAD_P1_BCN_INTERVAL(h2c_pkt, bss_conf->beacon_int);
+	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
+}
+
 void rtw_fw_set_pwr_mode(struct rtw_dev *rtwdev)
 {
 	struct rtw_lps_conf *conf = &rtwdev->lps_conf;
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index 7c5b1d75e26f..3bfee27b1e1a 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -24,6 +24,12 @@
 #define DLFW_BLK_SIZE_LEGACY		4
 #define FW_START_ADDR_LEGACY		0x1000
 
+#define BCN_LOSS_CNT			10
+#define BCN_FILTER_NOTIFY_SIGNAL_CHANGE	0
+#define BCN_FILTER_CONNECTION_LOSS	1
+#define BCN_FILTER_CONNECTED		2
+#define BCN_FILTER_NOTIFY_BEACON_LOSS	3
+
 enum rtw_c2h_cmd_id {
 	C2H_CCX_TX_RPT = 0x03,
 	C2H_BT_INFO = 0x09,
@@ -32,6 +38,7 @@ enum rtw_c2h_cmd_id {
 	C2H_HW_FEATURE_REPORT = 0x19,
 	C2H_WLAN_INFO = 0x27,
 	C2H_WLAN_RFON = 0x32,
+	C2H_BCN_FILTER_NOTIFY = 0x36,
 	C2H_HW_FEATURE_DUMP = 0xfd,
 	C2H_HALMAC = 0xff,
 };
@@ -78,9 +85,19 @@ enum rtw_fw_feature {
 	FW_FEATURE_LPS_C2H = BIT(1),
 	FW_FEATURE_LCLK = BIT(2),
 	FW_FEATURE_PG = BIT(3),
+	FW_FEATURE_BCN_FILTER = BIT(5),
 	FW_FEATURE_MAX = BIT(31),
 };
 
+enum rtw_beacon_filter_offload_mode {
+	BCN_FILTER_OFFLOAD_MODE_0 = 0,
+	BCN_FILTER_OFFLOAD_MODE_1,
+	BCN_FILTER_OFFLOAD_MODE_2,
+	BCN_FILTER_OFFLOAD_MODE_3,
+
+	BCN_FILTER_OFFLOAD_MODE_DEFAULT = BCN_FILTER_OFFLOAD_MODE_1,
+};
+
 struct rtw_coex_info_req {
 	u8 seq;
 	u8 op_code;
@@ -237,6 +254,10 @@ struct rtw_fw_hdr_legacy {
 #define GET_RA_REPORT_BW(c2h_payload)		(c2h_payload[6])
 #define GET_RA_REPORT_MACID(c2h_payload)	(c2h_payload[1])
 
+#define GET_BCN_FILTER_NOTIFY_TYPE(c2h_payload)	(c2h_payload[1] & 0xf)
+#define GET_BCN_FILTER_NOTIFY_EVENT(c2h_payload)	(c2h_payload[1] & 0x10)
+#define GET_BCN_FILTER_NOTIFY_RSSI(c2h_payload)	(c2h_payload[2] - 100)
+
 /* PKT H2C */
 #define H2C_PKT_CMD_ID 0xFF
 #define H2C_PKT_CATEGORY 0x01
@@ -345,6 +366,8 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 #define H2C_CMD_LPS_PG_INFO		0x2b
 #define H2C_CMD_RA_INFO			0x40
 #define H2C_CMD_RSSI_MONITOR		0x42
+#define H2C_CMD_BCN_FILTER_OFFLOAD_P0	0x56
+#define H2C_CMD_BCN_FILTER_OFFLOAD_P1	0x57
 #define H2C_CMD_WL_PHY_INFO		0x58
 
 #define H2C_CMD_COEX_TDMA_TYPE		0x60
@@ -381,6 +404,20 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(15, 8))
 #define SET_WL_PHY_INFO_RX_EVM(h2c_pkt, value)				       \
 	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(23, 16))
+#define SET_BCN_FILTER_OFFLOAD_P1_MACID(h2c_pkt, value)			       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(15, 8))
+#define SET_BCN_FILTER_OFFLOAD_P1_ENABLE(h2c_pkt, value)		       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, BIT(16))
+#define SET_BCN_FILTER_OFFLOAD_P1_HYST(h2c_pkt, value)			       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(20, 17))
+#define SET_BCN_FILTER_OFFLOAD_P1_OFFLOAD_MODE(h2c_pkt, value)		       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(23, 21))
+#define SET_BCN_FILTER_OFFLOAD_P1_THRESHOLD(h2c_pkt, value)		       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(31, 24))
+#define SET_BCN_FILTER_OFFLOAD_P1_BCN_LOSS_CNT(h2c_pkt, value)		       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(3, 0))
+#define SET_BCN_FILTER_OFFLOAD_P1_BCN_INTERVAL(h2c_pkt, value)		       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(13, 4))
 
 #define SET_PWR_MODE_SET_MODE(h2c_pkt, value)                                  \
 	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(14, 8))
@@ -577,6 +614,8 @@ void rtw_fw_send_rssi_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si);
 void rtw_fw_send_ra_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si);
 void rtw_fw_media_status_report(struct rtw_dev *rtwdev, u8 mac_id, bool conn);
 void rtw_fw_update_wl_phy_info(struct rtw_dev *rtwdev);
+void rtw_fw_beacon_filter_config(struct rtw_dev *rtwdev, bool connect,
+				 struct ieee80211_vif *vif);
 int rtw_fw_write_data_rsvd_page(struct rtw_dev *rtwdev, u16 pg_addr,
 				u8 *buf, u32 size);
 void rtw_remove_rsvd_page(struct rtw_dev *rtwdev,
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index 333df6b38113..9087c5b1ea80 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -148,11 +148,15 @@ static int rtw_ops_add_interface(struct ieee80211_hw *hw,
 {
 	struct rtw_dev *rtwdev = hw->priv;
 	struct rtw_vif *rtwvif = (struct rtw_vif *)vif->drv_priv;
+	struct rtw_fw_state *fw = &rtwdev->fw;
 	enum rtw_net_type net_type;
 	u32 config = 0;
 	u8 port = 0;
 	u8 bcn_ctrl = 0;
 
+	if (fw->feature & FW_FEATURE_BCN_FILTER)
+		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
+				     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 	rtwvif->port = port;
 	rtwvif->stats.tx_unicast = 0;
 	rtwvif->stats.rx_unicast = 0;
@@ -399,6 +403,8 @@ static void rtw_ops_bss_info_changed(struct ieee80211_hw *hw,
 			rtw_write32_clr(rtwdev, REG_FWHW_TXQ_CTRL,
 					BIT_EN_BCNQ_DL);
 	}
+	if (changed & BSS_CHANGED_CQM)
+		rtw_fw_beacon_filter_config(rtwdev, true, vif);
 
 	if (changed & BSS_CHANGED_MU_GROUPS)
 		rtw_chip_set_gid_table(rtwdev, vif, conf);
@@ -450,6 +456,7 @@ static int rtw_ops_sta_remove(struct ieee80211_hw *hw,
 {
 	struct rtw_dev *rtwdev = hw->priv;
 
+	rtw_fw_beacon_filter_config(rtwdev, false, vif);
 	mutex_lock(&rtwdev->mutex);
 	rtw_sta_remove(rtwdev, sta, true);
 	mutex_unlock(&rtwdev->mutex);
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index f3a3a86fa9b5..94fadef5c131 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -239,7 +239,8 @@ static void rtw_watch_dog_work(struct work_struct *work)
 	 * get that vif and check if device is having traffic more than the
 	 * threshold.
 	 */
-	if (rtwdev->ps_enabled && data.rtwvif && !ps_active)
+	if (rtwdev->ps_enabled && data.rtwvif && !ps_active &&
+	    !rtwdev->beacon_loss)
 		rtw_enter_lps(rtwdev, data.rtwvif->port);
 
 	rtwdev->watch_dog_cnt++;
@@ -292,6 +293,7 @@ int rtw_sta_add(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
 	rtw_fw_media_status_report(rtwdev, si->mac_id, true);
 
 	rtwdev->sta_cnt++;
+	rtwdev->beacon_loss = false;
 	rtw_info(rtwdev, "sta %pM joined with macid %d\n",
 		 sta->addr, si->mac_id);
 
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index dc3744847ba9..321667c03b16 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1837,6 +1837,7 @@ struct rtw_dev {
 	/* lps power state & handler work */
 	struct rtw_lps_conf lps_conf;
 	bool ps_enabled;
+	bool beacon_loss;
 	struct completion lps_leave_check;
 
 	struct dentry *debugfs;

From 1188301fd8ef370ef344a98fbbf04b8b07148294 Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Mon, 26 Apr 2021 09:32:51 +0800
Subject: [PATCH 2547/3292] rtw88: add path diversity

This feature chooses to transmit with antenna that has better signal
strength periodically under 1ss rate.

It can benefit connection quality in the following cases:
1. User is far away from the AP.
2. The far-field pattern of the antenna showed significant signal
strength difference.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210426013252.5665-2-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/debug.h    |  1 +
 drivers/net/wireless/realtek/rtw88/main.h     | 15 ++++
 drivers/net/wireless/realtek/rtw88/phy.c      | 81 +++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/phy.h      |  1 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 39 +++++++--
 5 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/debug.h b/drivers/net/wireless/realtek/rtw88/debug.h
index c8efd1900a34..0dd3f9a88c8d 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.h
+++ b/drivers/net/wireless/realtek/rtw88/debug.h
@@ -20,6 +20,7 @@ enum rtw_debug_mask {
 	RTW_DBG_BF		= 0x00000800,
 	RTW_DBG_WOW		= 0x00001000,
 	RTW_DBG_CFO		= 0x00002000,
+	RTW_DBG_PATH_DIV	= 0x00004000,
 
 	RTW_DBG_ALL		= 0xffffffff
 };
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 321667c03b16..02ad175055cb 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -841,6 +841,10 @@ struct rtw_chip_ops {
 			     u8 fixrate_en, u8 *new_rate);
 	void (*cfo_init)(struct rtw_dev *rtwdev);
 	void (*cfo_track)(struct rtw_dev *rtwdev);
+	void (*config_tx_path)(struct rtw_dev *rtwdev, u8 tx_path,
+			       enum rtw_bb_path tx_path_1ss,
+			       enum rtw_bb_path tx_path_cck,
+			       bool is_tx2_path);
 
 	/* for coex */
 	void (*coex_set_init)(struct rtw_dev *rtwdev);
@@ -1136,7 +1140,9 @@ struct rtw_chip_info {
 	u8 max_power_index;
 
 	u16 fw_fifo_addr[RTW_FW_FIFO_MAX];
+	u8 default_1ss_tx_path;
 
+	bool path_div_supported;
 	bool ht_supported;
 	bool vht_supported;
 	u8 lps_deep_mode_supported;
@@ -1781,6 +1787,14 @@ struct rtw_hal {
 		     [DESC_RATE_MAX];
 };
 
+struct rtw_path_div {
+	enum rtw_bb_path current_tx_path;
+	u32 path_a_sum;
+	u32 path_b_sum;
+	u16 path_a_cnt;
+	u16 path_b_cnt;
+};
+
 struct rtw_dev {
 	struct ieee80211_hw *hw;
 	struct device *dev;
@@ -1849,6 +1863,7 @@ struct rtw_dev {
 	DECLARE_BITMAP(flags, NUM_OF_RTW_FLAGS);
 
 	u8 mp_mode;
+	struct rtw_path_div dm_path_div;
 
 	struct rtw_fw_state wow_fw;
 	struct rtw_wow_param wow;
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index 8146acaf1893..569dd3cfde35 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -127,6 +127,17 @@ static void rtw_phy_cfo_init(struct rtw_dev *rtwdev)
 		chip->ops->cfo_init(rtwdev);
 }
 
+static void rtw_phy_tx_path_div_init(struct rtw_dev *rtwdev)
+{
+	struct rtw_path_div *path_div = &rtwdev->dm_path_div;
+
+	path_div->current_tx_path = rtwdev->chip->default_1ss_tx_path;
+	path_div->path_a_cnt = 0;
+	path_div->path_a_sum = 0;
+	path_div->path_b_cnt = 0;
+	path_div->path_b_sum = 0;
+}
+
 void rtw_phy_init(struct rtw_dev *rtwdev)
 {
 	struct rtw_chip_info *chip = rtwdev->chip;
@@ -149,6 +160,7 @@ void rtw_phy_init(struct rtw_dev *rtwdev)
 
 	dm_info->iqk.done = false;
 	rtw_phy_cfo_init(rtwdev);
+	rtw_phy_tx_path_div_init(rtwdev);
 }
 EXPORT_SYMBOL(rtw_phy_init);
 
@@ -695,6 +707,7 @@ void rtw_phy_dynamic_mechanism(struct rtw_dev *rtwdev)
 	rtw_phy_dig(rtwdev);
 	rtw_phy_cck_pd(rtwdev);
 	rtw_phy_ra_track(rtwdev);
+	rtw_phy_tx_path_diversity(rtwdev);
 	rtw_phy_cfo_track(rtwdev);
 	rtw_phy_dpk_track(rtwdev);
 	rtw_phy_pwr_track(rtwdev);
@@ -2315,3 +2328,71 @@ bool rtw_phy_pwrtrack_need_iqk(struct rtw_dev *rtwdev)
 	return false;
 }
 EXPORT_SYMBOL(rtw_phy_pwrtrack_need_iqk);
+
+static void rtw_phy_set_tx_path_by_reg(struct rtw_dev *rtwdev,
+				       enum rtw_bb_path tx_path_sel_1ss)
+{
+	struct rtw_path_div *path_div = &rtwdev->dm_path_div;
+	enum rtw_bb_path tx_path_sel_cck = tx_path_sel_1ss;
+	struct rtw_chip_info *chip = rtwdev->chip;
+
+	if (tx_path_sel_1ss == path_div->current_tx_path)
+		return;
+
+	path_div->current_tx_path = tx_path_sel_1ss;
+	rtw_dbg(rtwdev, RTW_DBG_PATH_DIV, "Switch TX path=%s\n",
+		tx_path_sel_1ss == BB_PATH_A ? "A" : "B");
+	chip->ops->config_tx_path(rtwdev, rtwdev->hal.antenna_tx,
+				  tx_path_sel_1ss, tx_path_sel_cck, false);
+}
+
+static void rtw_phy_tx_path_div_select(struct rtw_dev *rtwdev)
+{
+	struct rtw_path_div *path_div = &rtwdev->dm_path_div;
+	enum rtw_bb_path path = path_div->current_tx_path;
+	s32 rssi_a = 0, rssi_b = 0;
+
+	if (path_div->path_a_cnt)
+		rssi_a = path_div->path_a_sum / path_div->path_a_cnt;
+	else
+		rssi_a = 0;
+	if (path_div->path_b_cnt)
+		rssi_b = path_div->path_b_sum / path_div->path_b_cnt;
+	else
+		rssi_b = 0;
+
+	if (rssi_a != rssi_b)
+		path = (rssi_a > rssi_b) ? BB_PATH_A : BB_PATH_B;
+
+	path_div->path_a_cnt = 0;
+	path_div->path_a_sum = 0;
+	path_div->path_b_cnt = 0;
+	path_div->path_b_sum = 0;
+	rtw_phy_set_tx_path_by_reg(rtwdev, path);
+}
+
+static void rtw_phy_tx_path_diversity_2ss(struct rtw_dev *rtwdev)
+{
+	if (rtwdev->hal.antenna_rx != BB_PATH_AB) {
+		rtw_dbg(rtwdev, RTW_DBG_PATH_DIV,
+			"[Return] tx_Path_en=%d, rx_Path_en=%d\n",
+			rtwdev->hal.antenna_tx, rtwdev->hal.antenna_rx);
+		return;
+	}
+	if (rtwdev->sta_cnt == 0) {
+		rtw_dbg(rtwdev, RTW_DBG_PATH_DIV, "No Link\n");
+		return;
+	}
+
+	rtw_phy_tx_path_div_select(rtwdev);
+}
+
+void rtw_phy_tx_path_diversity(struct rtw_dev *rtwdev)
+{
+	struct rtw_chip_info *chip = rtwdev->chip;
+
+	if (!chip->path_div_supported)
+		return;
+
+	rtw_phy_tx_path_diversity_2ss(rtwdev);
+}
diff --git a/drivers/net/wireless/realtek/rtw88/phy.h b/drivers/net/wireless/realtek/rtw88/phy.h
index 0b6f2fc8193c..112ed125970a 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.h
+++ b/drivers/net/wireless/realtek/rtw88/phy.h
@@ -61,6 +61,7 @@ void rtw_phy_config_swing_table(struct rtw_dev *rtwdev,
 				struct rtw_swing_table *swing_table);
 void rtw_phy_parsing_cfo(struct rtw_dev *rtwdev,
 			 struct rtw_rx_pkt_stat *pkt_stat);
+void rtw_phy_tx_path_diversity(struct rtw_dev *rtwdev);
 
 struct rtw_txpwr_lmt_cfg_pair {
 	u8 regd;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 6cb593cc33c2..b6b43654e5c6 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -80,6 +80,13 @@ static void rtw8822c_header_file_init(struct rtw_dev *rtwdev, bool pre)
 		rtw_write32_set(rtwdev, REG_ENCCK, BIT_CCK_OFDM_BLK_EN);
 }
 
+static void rtw8822c_bb_reset(struct rtw_dev *rtwdev)
+{
+	rtw_write16_set(rtwdev, REG_SYS_FUNC_EN, BIT_FEN_BB_RSTB);
+	rtw_write16_clr(rtwdev, REG_SYS_FUNC_EN, BIT_FEN_BB_RSTB);
+	rtw_write16_set(rtwdev, REG_SYS_FUNC_EN, BIT_FEN_BB_RSTB);
+}
+
 static void rtw8822c_dac_backup_reg(struct rtw_dev *rtwdev,
 				    struct rtw_backup_info *backup,
 				    struct rtw_backup_info *backup_rf)
@@ -2424,10 +2431,11 @@ static void rtw8822c_config_cck_tx_path(struct rtw_dev *rtwdev, u8 tx_path,
 		else
 			rtw_write32_mask(rtwdev, REG_RXCCKSEL, 0xf0000000, 0x8);
 	}
+	rtw8822c_bb_reset(rtwdev);
 }
 
 static void rtw8822c_config_ofdm_tx_path(struct rtw_dev *rtwdev, u8 tx_path,
-					 bool is_tx2_path)
+					 enum rtw_bb_path tx_path_sel_1ss)
 {
 	if (tx_path == BB_PATH_A) {
 		rtw_write32_mask(rtwdev, REG_ANTMAP0, 0xff, 0x11);
@@ -2436,21 +2444,28 @@ static void rtw8822c_config_ofdm_tx_path(struct rtw_dev *rtwdev, u8 tx_path,
 		rtw_write32_mask(rtwdev, REG_ANTMAP0, 0xff, 0x12);
 		rtw_write32_mask(rtwdev, REG_TXLGMAP, 0xff, 0x0);
 	} else {
-		if (is_tx2_path) {
+		if (tx_path_sel_1ss == BB_PATH_AB) {
 			rtw_write32_mask(rtwdev, REG_ANTMAP0, 0xff, 0x33);
 			rtw_write32_mask(rtwdev, REG_TXLGMAP, 0xffff, 0x0404);
-		} else {
+		} else if (tx_path_sel_1ss == BB_PATH_B) {
+			rtw_write32_mask(rtwdev, REG_ANTMAP0, 0xff, 0x32);
+			rtw_write32_mask(rtwdev, REG_TXLGMAP, 0xffff, 0x0400);
+		} else if (tx_path_sel_1ss == BB_PATH_A) {
 			rtw_write32_mask(rtwdev, REG_ANTMAP0, 0xff, 0x31);
 			rtw_write32_mask(rtwdev, REG_TXLGMAP, 0xffff, 0x0400);
 		}
 	}
+	rtw8822c_bb_reset(rtwdev);
 }
 
 static void rtw8822c_config_tx_path(struct rtw_dev *rtwdev, u8 tx_path,
+				    enum rtw_bb_path tx_path_sel_1ss,
+				    enum rtw_bb_path tx_path_cck,
 				    bool is_tx2_path)
 {
-	rtw8822c_config_cck_tx_path(rtwdev, tx_path, is_tx2_path);
-	rtw8822c_config_ofdm_tx_path(rtwdev, tx_path, is_tx2_path);
+	rtw8822c_config_cck_tx_path(rtwdev, tx_path_cck, is_tx2_path);
+	rtw8822c_config_ofdm_tx_path(rtwdev, tx_path, tx_path_sel_1ss);
+	rtw8822c_bb_reset(rtwdev);
 }
 
 static void rtw8822c_config_trx_mode(struct rtw_dev *rtwdev, u8 tx_path,
@@ -2466,7 +2481,8 @@ static void rtw8822c_config_trx_mode(struct rtw_dev *rtwdev, u8 tx_path,
 		rtw_write32_mask(rtwdev, REG_ORITXCODE2, MASK20BITS, 0x11111);
 
 	rtw8822c_config_rx_path(rtwdev, rx_path);
-	rtw8822c_config_tx_path(rtwdev, tx_path, is_tx2_path);
+	rtw8822c_config_tx_path(rtwdev, tx_path, BB_PATH_A, BB_PATH_A,
+				is_tx2_path);
 
 	rtw8822c_toggle_igi(rtwdev);
 }
@@ -2517,6 +2533,7 @@ static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status,
 static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status,
 				   struct rtw_rx_pkt_stat *pkt_stat)
 {
+	struct rtw_path_div *p_div = &rtwdev->dm_path_div;
 	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
 	u8 rxsc, bw;
 	s8 min_rx_power = -120;
@@ -2559,6 +2576,13 @@ static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status,
 	for (path = 0; path <= rtwdev->hal.rf_path_num; path++) {
 		rssi = rtw_phy_rf_power_2_rssi(&pkt_stat->rx_power[path], 1);
 		dm_info->rssi[path] = rssi;
+		if (path == RF_PATH_A) {
+			p_div->path_a_sum += rssi;
+			p_div->path_a_cnt++;
+		} else if (path == RF_PATH_B) {
+			p_div->path_b_sum += rssi;
+			p_div->path_b_cnt++;
+		}
 		dm_info->rx_snr[path] = pkt_stat->rx_snr[path] >> 1;
 		dm_info->cfo_tail[path] = (pkt_stat->cfo_tail[path] * 5) >> 1;
 
@@ -4851,6 +4875,7 @@ static struct rtw_chip_ops rtw8822c_ops = {
 	.cfg_csi_rate		= rtw_bf_cfg_csi_rate,
 	.cfo_init		= rtw8822c_cfo_init,
 	.cfo_track		= rtw8822c_cfo_track,
+	.config_tx_path		= rtw8822c_config_tx_path,
 
 	.coex_set_init		= rtw8822c_coex_cfg_init,
 	.coex_set_ant_switch	= NULL,
@@ -5192,6 +5217,8 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.band = RTW_BAND_2G | RTW_BAND_5G,
 	.page_size = 128,
 	.dig_min = 0x20,
+	.default_1ss_tx_path = BB_PATH_A,
+	.path_div_supported = true,
 	.ht_supported = true,
 	.vht_supported = true,
 	.lps_deep_mode_supported = BIT(LPS_DEEP_MODE_LCLK) | BIT(LPS_DEEP_MODE_PG),

From 05684fd583e1acc34dddea283838fbfbed4904a0 Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Mon, 26 Apr 2021 09:32:52 +0800
Subject: [PATCH 2548/3292] rtw88: 8822c: fix lc calibration timing

Before this patch, we use value from 2 seconds ago to decide
whether we should do lc calibration.
Although this don't happen frequently, fix flow to the way it should be.

Fixes: 7ae7784ec2a8 ("rtw88: 8822c: add LC calibration for RTL8822C")
Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210426013252.5665-3-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index b6b43654e5c6..436347f3b60f 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -4395,26 +4395,28 @@ static void rtw8822c_pwrtrack_set(struct rtw_dev *rtwdev, u8 rf_path)
 	}
 }
 
-static void rtw8822c_pwr_track_path(struct rtw_dev *rtwdev,
-				    struct rtw_swing_table *swing_table,
-				    u8 path)
+static void rtw8822c_pwr_track_stats(struct rtw_dev *rtwdev, u8 path)
 {
-	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
-	u8 thermal_value, delta;
+	u8 thermal_value;
 
 	if (rtwdev->efuse.thermal_meter[path] == 0xff)
 		return;
 
 	thermal_value = rtw_read_rf(rtwdev, path, RF_T_METER, 0x7e);
-
 	rtw_phy_pwrtrack_avg(rtwdev, thermal_value, path);
+}
+
+static void rtw8822c_pwr_track_path(struct rtw_dev *rtwdev,
+				    struct rtw_swing_table *swing_table,
+				    u8 path)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	u8 delta;
 
 	delta = rtw_phy_pwrtrack_get_delta(rtwdev, path);
-
 	dm_info->delta_power_index[path] =
 		rtw_phy_pwrtrack_get_pwridx(rtwdev, swing_table, path, path,
 					    delta);
-
 	rtw8822c_pwrtrack_set(rtwdev, path);
 }
 
@@ -4425,12 +4427,12 @@ static void __rtw8822c_pwr_track(struct rtw_dev *rtwdev)
 
 	rtw_phy_config_swing_table(rtwdev, &swing_table);
 
+	for (i = 0; i < rtwdev->hal.rf_path_num; i++)
+		rtw8822c_pwr_track_stats(rtwdev, i);
 	if (rtw_phy_pwrtrack_need_lck(rtwdev))
 		rtw8822c_do_lck(rtwdev);
-
 	for (i = 0; i < rtwdev->hal.rf_path_num; i++)
 		rtw8822c_pwr_track_path(rtwdev, &swing_table, i);
-
 }
 
 static void rtw8822c_pwr_track(struct rtw_dev *rtwdev)

From 3eab8ca6b1756d551da42e958c6f48f68cf470d3 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Fri, 30 Apr 2021 10:49:50 +0800
Subject: [PATCH 2549/3292] rtw88: Remove duplicate include of coex.h

In commit fb8517f4fade4 ("rtw88: 8822c: add CFO tracking"),
"coex.h" was added here which caused the duplicate include.
Remove the later duplicate include.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210430024951.33406-1-wanjiabing@vivo.com
---
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 436347f3b60f..1a6721611dc1 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -17,7 +17,6 @@
 #include "util.h"
 #include "bf.h"
 #include "efuse.h"
-#include "coex.h"
 
 #define IQK_DONE_8822C 0xaa
 

From 7a1baaaee6c866455c9c77bf9b0405941a3678c7 Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Thu, 6 May 2021 16:36:43 +0800
Subject: [PATCH 2550/3292] rtw88: 8822c: update RF parameter tables to v62

Update RTL8822C devices' RF tables to v62.
This fixes higher than expected spur in 2400 MHz under CCK mask.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210506083643.18317-1-pkshih@realtek.com
---
 .../wireless/realtek/rtw88/rtw8822c_table.c   | 1008 ++++++++---------
 1 file changed, 504 insertions(+), 504 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
index 822f3da91f1b..f9e3d0779c59 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
@@ -16812,53 +16812,53 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00010E46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x93000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x93000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x93000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x93000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x94000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0x95000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x0003D646,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -18762,53 +18762,53 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -18957,53 +18957,53 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -19152,53 +19152,53 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -19347,53 +19347,53 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x93000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x94000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000005,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000006,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000015,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0x95000016,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00031E46,
+		0x03F, 0x0003D646,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -19610,21 +19610,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19633,21 +19633,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19656,21 +19656,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19679,21 +19679,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19702,21 +19702,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000006,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19725,21 +19725,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19748,21 +19748,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19771,21 +19771,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000001,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19794,21 +19794,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19817,21 +19817,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19840,21 +19840,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19863,21 +19863,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19886,21 +19886,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000006,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19909,21 +19909,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19932,21 +19932,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19955,21 +19955,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000001,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -19978,21 +19978,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -20001,21 +20001,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -20024,21 +20024,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -20047,21 +20047,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -20070,21 +20070,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000006,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -20093,21 +20093,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -20116,21 +20116,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -20139,21 +20139,21 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x000008C8,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x000008CB,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x000008CE,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x000008D1,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x000008D4,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000DD1,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0xA0000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000487,
@@ -38484,21 +38484,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38507,21 +38507,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38530,21 +38530,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38553,21 +38553,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38576,21 +38576,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000006,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38599,21 +38599,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38622,21 +38622,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38645,21 +38645,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000001,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38668,21 +38668,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38691,21 +38691,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38714,21 +38714,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38737,21 +38737,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38760,21 +38760,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000006,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38783,21 +38783,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38806,21 +38806,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38829,21 +38829,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000001,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38852,21 +38852,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38875,21 +38875,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38898,21 +38898,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38921,21 +38921,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38944,21 +38944,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000006,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38967,21 +38967,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -38990,21 +38990,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0x95000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
@@ -39013,21 +39013,21 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000062,
 		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000CC6,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000CC9,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000CCC,
 		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
+		0x03F, 0x00000CCF,
 		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
+		0x03F, 0x00000CD2,
 		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
+		0x03F, 0x00000CD5,
 		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
+		0x03F, 0x00000DD4,
 		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
+		0x03F, 0x00000DD7,
 	0xA0000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000487,

From 9a711831c4e71f29897b3489c3097081aea580c4 Mon Sep 17 00:00:00 2001
From: Chin-Yen Lee <timlee@realtek.com>
Date: Fri, 14 May 2021 15:55:16 +0800
Subject: [PATCH 2551/3292] rtw88: add rtw_fw_feature_check api

add api to check if a certain feature is supported.

Signed-off-by: Chin-Yen Lee <timlee@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210514075517.14216-2-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c       | 3 +--
 drivers/net/wireless/realtek/rtw88/fw.h       | 6 ++++++
 drivers/net/wireless/realtek/rtw88/mac80211.c | 3 +--
 drivers/net/wireless/realtek/rtw88/main.c     | 4 ++--
 drivers/net/wireless/realtek/rtw88/ps.c       | 4 ++--
 5 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 797b08b2a494..00c56ab6306b 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -584,10 +584,9 @@ void rtw_fw_beacon_filter_config(struct rtw_dev *rtwdev, bool connect,
 	struct rtw_sta_info *si =
 		sta ? (struct rtw_sta_info *)sta->drv_priv : NULL;
 	s32 threshold = bss_conf->cqm_rssi_thold + rssi_offset;
-	struct rtw_fw_state *fw = &rtwdev->fw;
 	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
 
-	if (!(fw->feature & FW_FEATURE_BCN_FILTER))
+	if (!rtw_fw_feature_check(&rtwdev->fw, FW_FEATURE_BCN_FILTER))
 		return;
 
 	if (!connect) {
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index 3bfee27b1e1a..832ef2bfe5f9 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -591,6 +591,12 @@ static inline struct rtw_c2h_cmd *get_c2h_from_skb(struct sk_buff *skb)
 	return (struct rtw_c2h_cmd *)(skb->data + pkt_offset);
 }
 
+static inline bool rtw_fw_feature_check(struct rtw_fw_state *fw,
+					enum rtw_fw_feature feature)
+{
+	return !!(fw->feature & feature);
+}
+
 void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset,
 			       struct sk_buff *skb);
 void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb);
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index 9087c5b1ea80..8f46b16c8d5d 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -148,13 +148,12 @@ static int rtw_ops_add_interface(struct ieee80211_hw *hw,
 {
 	struct rtw_dev *rtwdev = hw->priv;
 	struct rtw_vif *rtwvif = (struct rtw_vif *)vif->drv_priv;
-	struct rtw_fw_state *fw = &rtwdev->fw;
 	enum rtw_net_type net_type;
 	u32 config = 0;
 	u8 port = 0;
 	u8 bcn_ctrl = 0;
 
-	if (fw->feature & FW_FEATURE_BCN_FILTER)
+	if (rtw_fw_feature_check(&rtwdev->fw, FW_FEATURE_BCN_FILTER))
 		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
 				     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 	rtwvif->port = port;
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 94fadef5c131..df115bb7fdf7 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1111,11 +1111,11 @@ static enum rtw_lps_deep_mode rtw_update_lps_deep_mode(struct rtw_dev *rtwdev,
 		return LPS_DEEP_MODE_NONE;
 
 	if ((chip->lps_deep_mode_supported & BIT(LPS_DEEP_MODE_PG)) &&
-	    (fw->feature & FW_FEATURE_PG))
+	    rtw_fw_feature_check(fw, FW_FEATURE_PG))
 		return LPS_DEEP_MODE_PG;
 
 	if ((chip->lps_deep_mode_supported & BIT(LPS_DEEP_MODE_LCLK)) &&
-	    (fw->feature & FW_FEATURE_LCLK))
+	    rtw_fw_feature_check(fw, FW_FEATURE_LCLK))
 		return LPS_DEEP_MODE_LCLK;
 
 	return LPS_DEEP_MODE_NONE;
diff --git a/drivers/net/wireless/realtek/rtw88/ps.c b/drivers/net/wireless/realtek/rtw88/ps.c
index 3bead34c3d10..3f0ac33156d6 100644
--- a/drivers/net/wireless/realtek/rtw88/ps.c
+++ b/drivers/net/wireless/realtek/rtw88/ps.c
@@ -152,7 +152,7 @@ static void rtw_fw_leave_lps_check(struct rtw_dev *rtwdev)
 	else
 		fw = &rtwdev->fw;
 
-	if (fw->feature & FW_FEATURE_LPS_C2H)
+	if (rtw_fw_feature_check(fw, FW_FEATURE_LPS_C2H))
 		ret = __rtw_fw_leave_lps_check_c2h(rtwdev);
 	else
 		ret = __rtw_fw_leave_lps_check_reg(rtwdev);
@@ -172,7 +172,7 @@ static void rtw_fw_leave_lps_check_prepare(struct rtw_dev *rtwdev)
 	else
 		fw = &rtwdev->fw;
 
-	if (fw->feature & FW_FEATURE_LPS_C2H)
+	if (rtw_fw_feature_check(fw, FW_FEATURE_LPS_C2H))
 		reinit_completion(&rtwdev->lps_leave_check);
 }
 

From a853d234e179086040912a8bbb3341829c079495 Mon Sep 17 00:00:00 2001
From: Chin-Yen Lee <timlee@realtek.com>
Date: Fri, 14 May 2021 15:55:17 +0800
Subject: [PATCH 2552/3292] rtw88: notify fw when driver in scan-period to
 avoid potential problem

It is found that driver scan could be affected by dynamic mechanism
of firmware, so we notify firmware to stop it in the scan period.
Another, firmware will detect the background noise and report to
driver for further use.

Signed-off-by: Chin-Yen Lee <timlee@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210514075517.14216-3-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c       | 25 +++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/fw.h       | 10 +++++++-
 drivers/net/wireless/realtek/rtw88/mac80211.c |  3 +++
 drivers/net/wireless/realtek/rtw88/main.c     | 17 +++++++++++++
 drivers/net/wireless/realtek/rtw88/main.h     |  4 +++
 5 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 00c56ab6306b..58f4e47aa96a 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -172,6 +172,17 @@ static void rtw_fw_bcn_filter_notify(struct rtw_dev *rtwdev, u8 *payload,
 			 &dev_iter_data);
 }
 
+static void rtw_fw_scan_result(struct rtw_dev *rtwdev, u8 *payload,
+			       u8 length)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+
+	dm_info->scan_density = payload[0];
+
+	rtw_dbg(rtwdev, RTW_DBG_FW, "scan.density = %x\n",
+		dm_info->scan_density);
+}
+
 void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
 {
 	struct rtw_c2h_cmd *c2h;
@@ -235,6 +246,10 @@ void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset,
 	case C2H_WLAN_RFON:
 		complete(&rtwdev->lps_leave_check);
 		break;
+	case C2H_SCAN_RESULT:
+		complete(&rtwdev->fw_scan_density);
+		rtw_fw_scan_result(rtwdev, c2h->payload, len);
+		break;
 	default:
 		/* pass offset for further operation */
 		*((u32 *)skb->cb) = pkt_offset;
@@ -1703,3 +1718,13 @@ void rtw_fw_channel_switch(struct rtw_dev *rtwdev, bool enable)
 
 	rtw_fw_send_h2c_packet(rtwdev, h2c_pkt);
 }
+
+void rtw_fw_scan_notify(struct rtw_dev *rtwdev, bool start)
+{
+	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
+
+	SET_H2C_CMD_ID_CLASS(h2c_pkt, H2C_CMD_SCAN);
+	SET_SCAN_START(h2c_pkt, start);
+
+	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
+}
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index 832ef2bfe5f9..a8a7162fbe64 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -30,6 +30,8 @@
 #define BCN_FILTER_CONNECTED		2
 #define BCN_FILTER_NOTIFY_BEACON_LOSS	3
 
+#define SCAN_NOTIFY_TIMEOUT  msecs_to_jiffies(10)
+
 enum rtw_c2h_cmd_id {
 	C2H_CCX_TX_RPT = 0x03,
 	C2H_BT_INFO = 0x09,
@@ -39,6 +41,7 @@ enum rtw_c2h_cmd_id {
 	C2H_WLAN_INFO = 0x27,
 	C2H_WLAN_RFON = 0x32,
 	C2H_BCN_FILTER_NOTIFY = 0x36,
+	C2H_SCAN_RESULT = 0x38,
 	C2H_HW_FEATURE_DUMP = 0xfd,
 	C2H_HALMAC = 0xff,
 };
@@ -86,6 +89,7 @@ enum rtw_fw_feature {
 	FW_FEATURE_LCLK = BIT(2),
 	FW_FEATURE_PG = BIT(3),
 	FW_FEATURE_BCN_FILTER = BIT(5),
+	FW_FEATURE_NOTIFY_SCAN = BIT(6),
 	FW_FEATURE_MAX = BIT(31),
 };
 
@@ -369,6 +373,7 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 #define H2C_CMD_BCN_FILTER_OFFLOAD_P0	0x56
 #define H2C_CMD_BCN_FILTER_OFFLOAD_P1	0x57
 #define H2C_CMD_WL_PHY_INFO		0x58
+#define H2C_CMD_SCAN			0x59
 
 #define H2C_CMD_COEX_TDMA_TYPE		0x60
 #define H2C_CMD_QUERY_BT_INFO		0x61
@@ -419,6 +424,9 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 #define SET_BCN_FILTER_OFFLOAD_P1_BCN_INTERVAL(h2c_pkt, value)		       \
 	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(13, 4))
 
+#define SET_SCAN_START(h2c_pkt, value)					       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, BIT(8))
+
 #define SET_PWR_MODE_SET_MODE(h2c_pkt, value)                                  \
 	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(14, 8))
 #define SET_PWR_MODE_SET_RLBM(h2c_pkt, value)                                  \
@@ -652,5 +660,5 @@ void rtw_fw_h2c_cmd_dbg(struct rtw_dev *rtwdev, u8 *h2c);
 void rtw_fw_c2h_cmd_isr(struct rtw_dev *rtwdev);
 int rtw_fw_dump_fifo(struct rtw_dev *rtwdev, u8 fifo_sel, u32 addr, u32 size,
 		     u32 *buffer);
-
+void rtw_fw_scan_notify(struct rtw_dev *rtwdev, bool start);
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index 8f46b16c8d5d..8a180c95e7e6 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -605,6 +605,7 @@ static void rtw_ops_sw_scan_start(struct ieee80211_hw *hw,
 	rtw_vif_port_config(rtwdev, rtwvif, config);
 
 	rtw_coex_scan_notify(rtwdev, COEX_SCAN_START);
+	rtw_core_fw_scan_notify(rtwdev, true);
 
 	set_bit(RTW_FLAG_DIG_DISABLE, rtwdev->flags);
 	set_bit(RTW_FLAG_SCANNING, rtwdev->flags);
@@ -624,6 +625,8 @@ static void rtw_ops_sw_scan_complete(struct ieee80211_hw *hw,
 	clear_bit(RTW_FLAG_SCANNING, rtwdev->flags);
 	clear_bit(RTW_FLAG_DIG_DISABLE, rtwdev->flags);
 
+	rtw_core_fw_scan_notify(rtwdev, false);
+
 	ether_addr_copy(rtwvif->mac_addr, vif->addr);
 	config |= PORT_SET_MAC_ADDR;
 	rtw_vif_port_config(rtwdev, rtwvif, config);
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index df115bb7fdf7..47f4838d0c58 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1185,6 +1185,22 @@ err:
 	return ret;
 }
 
+void rtw_core_fw_scan_notify(struct rtw_dev *rtwdev, bool start)
+{
+	if (!rtw_fw_feature_check(&rtwdev->fw, FW_FEATURE_NOTIFY_SCAN))
+		return;
+
+	if (start) {
+		rtw_fw_scan_notify(rtwdev, true);
+	} else {
+		reinit_completion(&rtwdev->fw_scan_density);
+		rtw_fw_scan_notify(rtwdev, false);
+		if (!wait_for_completion_timeout(&rtwdev->fw_scan_density,
+						 SCAN_NOTIFY_TIMEOUT))
+			rtw_warn(rtwdev, "firmware failed to report density after scan\n");
+	}
+}
+
 int rtw_core_start(struct rtw_dev *rtwdev)
 {
 	int ret;
@@ -1763,6 +1779,7 @@ int rtw_core_init(struct rtw_dev *rtwdev)
 
 	init_waitqueue_head(&rtwdev->coex.wait);
 	init_completion(&rtwdev->lps_leave_check);
+	init_completion(&rtwdev->fw_scan_density);
 
 	rtwdev->sec.total_cam_num = 32;
 	rtwdev->hal.current_channel = 1;
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 02ad175055cb..20b20a6db9cc 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1620,6 +1620,8 @@ struct rtw_dm_info {
 	struct rtw_iqk_info iqk;
 	struct rtw_gapk_info gapk;
 	bool is_bt_iqk_timeout;
+
+	u8 scan_density;
 };
 
 struct rtw_efuse {
@@ -1869,6 +1871,7 @@ struct rtw_dev {
 	struct rtw_wow_param wow;
 
 	bool need_rfk;
+	struct completion fw_scan_density;
 
 	/* hci related data, must be last */
 	u8 priv[] __aligned(sizeof(void *));
@@ -1974,6 +1977,7 @@ int rtw_sta_add(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
 void rtw_sta_remove(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
 		    bool fw_exist);
 void rtw_fw_recovery(struct rtw_dev *rtwdev);
+void rtw_core_fw_scan_notify(struct rtw_dev *rtwdev, bool start);
 int rtw_dump_fw(struct rtw_dev *rtwdev, const u32 ocp_src, u32 size,
 		const char *prefix_str);
 int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size,

From 7b80f3e48c4b0ff85ff91945a2537d0bbc3e3cc4 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 28 May 2021 11:29:00 +0800
Subject: [PATCH 2553/3292] rtw88: dump FW crash via devcoredump

Use device coredump framework instead of print_hex_dump to support
FW crash dump. Pass data to the framework if preparing and dumping
are successful. The framework will take the ownership of the data.
The data will be freed after the framework determines its lifetime
is over. A new coredump will not work if the previous one still
exists.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210528032901.12927-2-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/debug.c    |   7 +
 drivers/net/wireless/realtek/rtw88/main.c     | 170 ++++++++++++------
 drivers/net/wireless/realtek/rtw88/main.h     |  37 +++-
 drivers/net/wireless/realtek/rtw88/rtw8822c.c |  51 +++++-
 4 files changed, 201 insertions(+), 64 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c
index 18ab472ea46c..dfd52cff5d02 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.c
+++ b/drivers/net/wireless/realtek/rtw88/debug.c
@@ -11,6 +11,7 @@
 #include "debug.h"
 #include "phy.h"
 #include "reg.h"
+#include "ps.h"
 
 #ifdef CONFIG_RTW88_DEBUGFS
 
@@ -847,7 +848,13 @@ static ssize_t rtw_debugfs_set_fw_crash(struct file *filp,
 	if (!input)
 		return -EINVAL;
 
+	if (test_bit(RTW_FLAG_RESTARTING, rtwdev->flags))
+		return -EINPROGRESS;
+
+	mutex_lock(&rtwdev->mutex);
+	rtw_leave_lps_deep(rtwdev);
 	rtw_write8(rtwdev, REG_HRCV_MSG, 1);
+	mutex_unlock(&rtwdev->mutex);
 
 	return count;
 }
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 47f4838d0c58..4a9a8544e8ca 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -2,6 +2,8 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
+#include <linux/devcoredump.h>
+
 #include "main.h"
 #include "regd.h"
 #include "fw.h"
@@ -320,59 +322,131 @@ void rtw_sta_remove(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
 		 sta->addr, si->mac_id);
 }
 
-static bool rtw_fw_dump_crash_log(struct rtw_dev *rtwdev)
+struct rtw_fwcd_hdr {
+	u32 item;
+	u32 size;
+	u32 padding1;
+	u32 padding2;
+} __packed;
+
+static int rtw_fwcd_prep(struct rtw_dev *rtwdev)
+{
+	struct rtw_chip_info *chip = rtwdev->chip;
+	struct rtw_fwcd_desc *desc = &rtwdev->fw.fwcd_desc;
+	const struct rtw_fwcd_segs *segs = chip->fwcd_segs;
+	u32 prep_size = chip->fw_rxff_size + sizeof(struct rtw_fwcd_hdr);
+	u8 i;
+
+	if (segs) {
+		prep_size += segs->num * sizeof(struct rtw_fwcd_hdr);
+
+		for (i = 0; i < segs->num; i++)
+			prep_size += segs->segs[i];
+	}
+
+	desc->data = vmalloc(prep_size);
+	if (!desc->data)
+		return -ENOMEM;
+
+	desc->size = prep_size;
+	desc->next = desc->data;
+
+	return 0;
+}
+
+static u8 *rtw_fwcd_next(struct rtw_dev *rtwdev, u32 item, u32 size)
+{
+	struct rtw_fwcd_desc *desc = &rtwdev->fw.fwcd_desc;
+	struct rtw_fwcd_hdr *hdr;
+	u8 *next;
+
+	if (!desc->data) {
+		rtw_dbg(rtwdev, RTW_DBG_FW, "fwcd isn't prepared successfully\n");
+		return NULL;
+	}
+
+	next = desc->next + sizeof(struct rtw_fwcd_hdr);
+	if (next - desc->data + size > desc->size) {
+		rtw_dbg(rtwdev, RTW_DBG_FW, "fwcd isn't prepared enough\n");
+		return NULL;
+	}
+
+	hdr = (struct rtw_fwcd_hdr *)(desc->next);
+	hdr->item = item;
+	hdr->size = size;
+	hdr->padding1 = 0x01234567;
+	hdr->padding2 = 0x89abcdef;
+	desc->next = next + size;
+
+	return next;
+}
+
+static void rtw_fwcd_dump(struct rtw_dev *rtwdev)
+{
+	struct rtw_fwcd_desc *desc = &rtwdev->fw.fwcd_desc;
+
+	rtw_dbg(rtwdev, RTW_DBG_FW, "dump fwcd\n");
+
+	/* Data will be freed after lifetime of device coredump. After calling
+	 * dev_coredump, data is supposed to be handled by the device coredump
+	 * framework. Note that a new dump will be discarded if a previous one
+	 * hasn't been released yet.
+	 */
+	dev_coredumpv(rtwdev->dev, desc->data, desc->size, GFP_KERNEL);
+}
+
+static void rtw_fwcd_free(struct rtw_dev *rtwdev, bool free_self)
+{
+	struct rtw_fwcd_desc *desc = &rtwdev->fw.fwcd_desc;
+
+	if (free_self) {
+		rtw_dbg(rtwdev, RTW_DBG_FW, "free fwcd by self\n");
+		vfree(desc->data);
+	}
+
+	desc->data = NULL;
+	desc->next = NULL;
+}
+
+static int rtw_fw_dump_crash_log(struct rtw_dev *rtwdev)
 {
 	u32 size = rtwdev->chip->fw_rxff_size;
 	u32 *buf;
 	u8 seq;
-	bool ret = true;
 
-	buf = vmalloc(size);
+	buf = (u32 *)rtw_fwcd_next(rtwdev, RTW_FWCD_TLV, size);
 	if (!buf)
-		goto exit;
+		return -ENOMEM;
 
 	if (rtw_fw_dump_fifo(rtwdev, RTW_FW_FIFO_SEL_RXBUF_FW, 0, size, buf)) {
 		rtw_dbg(rtwdev, RTW_DBG_FW, "dump fw fifo fail\n");
-		goto free_buf;
+		return -EINVAL;
 	}
 
 	if (GET_FW_DUMP_LEN(buf) == 0) {
 		rtw_dbg(rtwdev, RTW_DBG_FW, "fw crash dump's length is 0\n");
-		goto free_buf;
+		return -EINVAL;
 	}
 
 	seq = GET_FW_DUMP_SEQ(buf);
-	if (seq > 0 && seq != (rtwdev->fw.prev_dump_seq + 1)) {
+	if (seq > 0) {
 		rtw_dbg(rtwdev, RTW_DBG_FW,
 			"fw crash dump's seq is wrong: %d\n", seq);
-		goto free_buf;
+		return -EINVAL;
 	}
 
-	print_hex_dump(KERN_ERR, "rtw88 fw dump: ", DUMP_PREFIX_OFFSET, 16, 1,
-		       buf, size, true);
-
-	if (GET_FW_DUMP_MORE(buf) == 1) {
-		rtwdev->fw.prev_dump_seq = seq;
-		ret = false;
-	}
-
-free_buf:
-	vfree(buf);
-exit:
-	rtw_write8(rtwdev, REG_MCU_TST_CFG, 0);
-
-	return ret;
+	return 0;
 }
 
 int rtw_dump_fw(struct rtw_dev *rtwdev, const u32 ocp_src, u32 size,
-		const char *prefix_str)
+		u32 fwcd_item)
 {
 	u32 rxff = rtwdev->chip->fw_rxff_size;
 	u32 dump_size, done_size = 0;
 	u8 *buf;
 	int ret;
 
-	buf = vzalloc(size);
+	buf = rtw_fwcd_next(rtwdev, fwcd_item, size);
 	if (!buf)
 		return -ENOMEM;
 
@@ -385,7 +459,7 @@ int rtw_dump_fw(struct rtw_dev *rtwdev, const u32 ocp_src, u32 size,
 			rtw_err(rtwdev,
 				"ddma fw 0x%x [+0x%x] to fw fifo fail\n",
 				ocp_src, done_size);
-			goto exit;
+			return ret;
 		}
 
 		ret = rtw_fw_dump_fifo(rtwdev, RTW_FW_FIFO_SEL_RXBUF_FW, 0,
@@ -394,24 +468,18 @@ int rtw_dump_fw(struct rtw_dev *rtwdev, const u32 ocp_src, u32 size,
 			rtw_err(rtwdev,
 				"dump fw 0x%x [+0x%x] from fw fifo fail\n",
 				ocp_src, done_size);
-			goto exit;
+			return ret;
 		}
 
 		size -= dump_size;
 		done_size += dump_size;
 	}
 
-	print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 1,
-		       buf, done_size, true);
-
-exit:
-	vfree(buf);
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(rtw_dump_fw);
 
-int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size,
-		 const char *prefix_str)
+int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size)
 {
 	u8 *buf;
 	u32 i;
@@ -421,17 +489,13 @@ int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size,
 		return -EINVAL;
 	}
 
-	buf = vzalloc(size);
+	buf = rtw_fwcd_next(rtwdev, RTW_FWCD_REG, size);
 	if (!buf)
 		return -ENOMEM;
 
 	for (i = 0; i < size; i += 4)
 		*(u32 *)(buf + i) = rtw_read32(rtwdev, addr + i);
 
-	print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf,
-		       size, true);
-
-	vfree(buf);
 	return 0;
 }
 EXPORT_SYMBOL(rtw_dump_reg);
@@ -489,20 +553,24 @@ void rtw_fw_recovery(struct rtw_dev *rtwdev)
 
 static void __fw_recovery_work(struct rtw_dev *rtwdev)
 {
-
-	/* rtw_fw_dump_crash_log() returns false indicates that there are
-	 * still more log to dump. Driver set 0x1cf[7:0] = 0x1 to tell firmware
-	 * to dump the remaining part of the log, and firmware will trigger an
-	 * IMR_C2HCMD interrupt to inform driver the log is ready.
-	 */
-	if (!rtw_fw_dump_crash_log(rtwdev)) {
-		rtw_write8(rtwdev, REG_HRCV_MSG, 1);
-		return;
-	}
-	rtwdev->fw.prev_dump_seq = 0;
+	int ret = 0;
 
 	set_bit(RTW_FLAG_RESTARTING, rtwdev->flags);
-	rtw_chip_dump_fw_crash(rtwdev);
+
+	ret = rtw_fwcd_prep(rtwdev);
+	if (ret)
+		goto free;
+	ret = rtw_fw_dump_crash_log(rtwdev);
+	if (ret)
+		goto free;
+	ret = rtw_chip_dump_fw_crash(rtwdev);
+	if (ret)
+		goto free;
+
+	rtw_fwcd_dump(rtwdev);
+free:
+	rtw_fwcd_free(rtwdev, !!ret);
+	rtw_write8(rtwdev, REG_MCU_TST_CFG, 0);
 
 	WARN(1, "firmware crash, start reset and recover\n");
 
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 20b20a6db9cc..e5af375b3dd0 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -806,7 +806,7 @@ struct rtw_regulatory {
 
 struct rtw_chip_ops {
 	int (*mac_init)(struct rtw_dev *rtwdev);
-	void (*dump_fw_crash)(struct rtw_dev *rtwdev);
+	int (*dump_fw_crash)(struct rtw_dev *rtwdev);
 	void (*shutdown)(struct rtw_dev *rtwdev);
 	int (*read_efuse)(struct rtw_dev *rtwdev, u8 *map);
 	void (*phy_set_param)(struct rtw_dev *rtwdev);
@@ -1112,6 +1112,15 @@ enum rtw_fw_fifo_sel {
 	RTW_FW_FIFO_MAX,
 };
 
+enum rtw_fwcd_item {
+	RTW_FWCD_TLV,
+	RTW_FWCD_REG,
+	RTW_FWCD_ROM,
+	RTW_FWCD_IMEM,
+	RTW_FWCD_DMEM,
+	RTW_FWCD_EMEM,
+};
+
 /* hardware configuration for each IC */
 struct rtw_chip_info {
 	struct rtw_chip_ops *ops;
@@ -1140,6 +1149,8 @@ struct rtw_chip_info {
 	u8 max_power_index;
 
 	u16 fw_fifo_addr[RTW_FW_FIFO_MAX];
+	const struct rtw_fwcd_segs *fwcd_segs;
+
 	u8 default_1ss_tx_path;
 
 	bool path_div_supported;
@@ -1725,6 +1736,17 @@ struct rtw_fifo_conf {
 	const struct rtw_rqpn *rqpn;
 };
 
+struct rtw_fwcd_desc {
+	u32 size;
+	u8 *next;
+	u8 *data;
+};
+
+struct rtw_fwcd_segs {
+	const u32 *segs;
+	u8 num;
+};
+
 #define FW_CD_TYPE 0xffff
 #define FW_CD_LEN 4
 #define FW_CD_VAL 0xaabbccdd
@@ -1732,11 +1754,11 @@ struct rtw_fw_state {
 	const struct firmware *firmware;
 	struct rtw_dev *rtwdev;
 	struct completion completion;
+	struct rtw_fwcd_desc fwcd_desc;
 	u16 version;
 	u8 sub_version;
 	u8 sub_index;
 	u16 h2c_version;
-	u8 prev_dump_seq;
 	u32 feature;
 };
 
@@ -1942,10 +1964,12 @@ static inline void rtw_release_macid(struct rtw_dev *rtwdev, u8 mac_id)
 	clear_bit(mac_id, rtwdev->mac_id_map);
 }
 
-static inline void rtw_chip_dump_fw_crash(struct rtw_dev *rtwdev)
+static inline int rtw_chip_dump_fw_crash(struct rtw_dev *rtwdev)
 {
 	if (rtwdev->chip->ops->dump_fw_crash)
-		rtwdev->chip->ops->dump_fw_crash(rtwdev);
+		return rtwdev->chip->ops->dump_fw_crash(rtwdev);
+
+	return 0;
 }
 
 void rtw_get_channel_params(struct cfg80211_chan_def *chandef,
@@ -1979,8 +2003,7 @@ void rtw_sta_remove(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
 void rtw_fw_recovery(struct rtw_dev *rtwdev);
 void rtw_core_fw_scan_notify(struct rtw_dev *rtwdev, bool start);
 int rtw_dump_fw(struct rtw_dev *rtwdev, const u32 ocp_src, u32 size,
-		const char *prefix_str);
-int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size,
-		 const char *prefix_str);
+		u32 fwcd_item);
+int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size);
 
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 1a6721611dc1..8bf3cd3a3678 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -2109,13 +2109,51 @@ static int rtw8822c_mac_init(struct rtw_dev *rtwdev)
 	return 0;
 }
 
-static void rtw8822c_dump_fw_crash(struct rtw_dev *rtwdev)
+#define FWCD_SIZE_REG_8822C 0x2000
+#define FWCD_SIZE_DMEM_8822C 0x10000
+#define FWCD_SIZE_IMEM_8822C 0x10000
+#define FWCD_SIZE_EMEM_8822C 0x20000
+#define FWCD_SIZE_ROM_8822C 0x10000
+
+static const u32 __fwcd_segs_8822c[] = {
+	FWCD_SIZE_REG_8822C,
+	FWCD_SIZE_DMEM_8822C,
+	FWCD_SIZE_IMEM_8822C,
+	FWCD_SIZE_EMEM_8822C,
+	FWCD_SIZE_ROM_8822C,
+};
+
+static const struct rtw_fwcd_segs rtw8822c_fwcd_segs = {
+	.segs = __fwcd_segs_8822c,
+	.num = ARRAY_SIZE(__fwcd_segs_8822c),
+};
+
+static int rtw8822c_dump_fw_crash(struct rtw_dev *rtwdev)
 {
-	rtw_dump_reg(rtwdev, 0x0, 0x2000, "rtw8822c reg_");
-	rtw_dump_fw(rtwdev, OCPBASE_DMEM_88XX, 0x10000, "rtw8822c DMEM_");
-	rtw_dump_fw(rtwdev, OCPBASE_IMEM_88XX, 0x10000, "rtw8822c IMEM_");
-	rtw_dump_fw(rtwdev, OCPBASE_EMEM_88XX, 0x20000, "rtw8822c EMEM_");
-	rtw_dump_fw(rtwdev, OCPBASE_ROM_88XX, 0x10000, "rtw8822c ROM_");
+#define __dump_fw_8822c(_dev, _mem) \
+	rtw_dump_fw(_dev, OCPBASE_ ## _mem ## _88XX, \
+		    FWCD_SIZE_ ## _mem ## _8822C, RTW_FWCD_ ## _mem)
+	int ret;
+
+	ret = rtw_dump_reg(rtwdev, 0x0, FWCD_SIZE_REG_8822C);
+	if (ret)
+		return ret;
+	ret = __dump_fw_8822c(rtwdev, DMEM);
+	if (ret)
+		return ret;
+	ret = __dump_fw_8822c(rtwdev, IMEM);
+	if (ret)
+		return ret;
+	ret = __dump_fw_8822c(rtwdev, EMEM);
+	if (ret)
+		return ret;
+	ret = __dump_fw_8822c(rtwdev, ROM);
+	if (ret)
+		return ret;
+
+	return 0;
+
+#undef __dump_fw_8822c
 }
 
 static void rtw8822c_rstb_3wire(struct rtw_dev *rtwdev, bool enable)
@@ -5287,6 +5325,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.coex_info_hw_regs = coex_info_hw_regs_8822c,
 
 	.fw_fifo_addr = {0x780, 0x700, 0x780, 0x660, 0x650, 0x680},
+	.fwcd_segs = &rtw8822c_fwcd_segs,
 };
 EXPORT_SYMBOL(rtw8822c_hw_spec);
 

From ae04f15b1a83e813f5c270f44692766dc3c5a6ce Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Fri, 28 May 2021 11:29:01 +0800
Subject: [PATCH 2554/3292] rtw88: refine unwanted h2c command

Don't send beacon filter h2c when there is no valid context.
Return early instead of printing out warning messages, so others
won't get confused.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210528032901.12927-3-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 58f4e47aa96a..176e8b67530e 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -601,7 +601,7 @@ void rtw_fw_beacon_filter_config(struct rtw_dev *rtwdev, bool connect,
 	s32 threshold = bss_conf->cqm_rssi_thold + rssi_offset;
 	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
 
-	if (!rtw_fw_feature_check(&rtwdev->fw, FW_FEATURE_BCN_FILTER))
+	if (!rtw_fw_feature_check(&rtwdev->fw, FW_FEATURE_BCN_FILTER) || !si)
 		return;
 
 	if (!connect) {
@@ -623,10 +623,7 @@ void rtw_fw_beacon_filter_config(struct rtw_dev *rtwdev, bool connect,
 					       BCN_FILTER_OFFLOAD_MODE_DEFAULT);
 	SET_BCN_FILTER_OFFLOAD_P1_THRESHOLD(h2c_pkt, (u8)threshold);
 	SET_BCN_FILTER_OFFLOAD_P1_BCN_LOSS_CNT(h2c_pkt, BCN_LOSS_CNT);
-	if (si)
-		SET_BCN_FILTER_OFFLOAD_P1_MACID(h2c_pkt, si->mac_id);
-	else
-		rtw_warn(rtwdev, "CQM config with station not found\n");
+	SET_BCN_FILTER_OFFLOAD_P1_MACID(h2c_pkt, si->mac_id);
 	SET_BCN_FILTER_OFFLOAD_P1_HYST(h2c_pkt, bss_conf->cqm_rssi_hyst);
 	SET_BCN_FILTER_OFFLOAD_P1_BCN_INTERVAL(h2c_pkt, bss_conf->beacon_int);
 	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);

From 956c6d4f20c5446727e0c912dd8f527f2dc7b779 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 7 Jun 2021 09:22:54 +0800
Subject: [PATCH 2555/3292] rtw88: add quirks to disable pci capabilities

8821CE with ASPM cannot work properly on Protempo Ltd L116HTN6SPW. Add a
quirk to disable the cap.

The reporter describes the symptom is that this module (driver) causes
frequent freezes, randomly but usually within a few minutes of running
(thus very soon after boot): screen display remains frozen, no response
to either keyboard or mouse input. All I can do is to hold the power
button to power off, then reboot.

Reported-by: Paul Szabo <psz2036@gmail.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210607012254.6306-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/pci.c | 32 ++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index f59a4c462e3b..e7d17ab8f113 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
+#include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include "main.h"
@@ -1673,6 +1674,36 @@ static void rtw_pci_napi_deinit(struct rtw_dev *rtwdev)
 	netif_napi_del(&rtwpci->napi);
 }
 
+enum rtw88_quirk_dis_pci_caps {
+	QUIRK_DIS_PCI_CAP_MSI,
+	QUIRK_DIS_PCI_CAP_ASPM,
+};
+
+static int disable_pci_caps(const struct dmi_system_id *dmi)
+{
+	uintptr_t dis_caps = (uintptr_t)dmi->driver_data;
+
+	if (dis_caps & BIT(QUIRK_DIS_PCI_CAP_MSI))
+		rtw_disable_msi = true;
+	if (dis_caps & BIT(QUIRK_DIS_PCI_CAP_ASPM))
+		rtw_pci_disable_aspm = true;
+
+	return 1;
+}
+
+static const struct dmi_system_id rtw88_pci_quirks[] = {
+	{
+		.callback = disable_pci_caps,
+		.ident = "Protempo Ltd L116HTN6SPW",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Protempo Ltd"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "L116HTN6SPW"),
+		},
+		.driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
+	},
+	{}
+};
+
 int rtw_pci_probe(struct pci_dev *pdev,
 		  const struct pci_device_id *id)
 {
@@ -1723,6 +1754,7 @@ int rtw_pci_probe(struct pci_dev *pdev,
 		goto err_destroy_pci;
 	}
 
+	dmi_check_system(rtw88_pci_quirks);
 	rtw_pci_phy_cfg(rtwdev);
 
 	ret = rtw_register_hw(rtwdev, hw);

From b38678a73c4d8a3616ca14713154e062b4c4db63 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Thu, 20 May 2021 08:55:45 +0800
Subject: [PATCH 2556/3292] rtw88: coex: remove unnecessary variable and label

In some funciton, the variable ret just used as return value,and
out label just return ret,so ret and out label are unnecessary,
we should delete these and use return true/false to replace.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210520005545.31272-1-samirweng1979@163.com
---
 drivers/net/wireless/realtek/rtw88/coex.c | 40 ++++++-----------------
 1 file changed, 10 insertions(+), 30 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c
index cedbf3825848..103e87745be6 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.c
+++ b/drivers/net/wireless/realtek/rtw88/coex.c
@@ -630,20 +630,16 @@ static bool rtw_coex_get_bt_scan_type(struct rtw_dev *rtwdev, u8 *scan_type)
 	struct rtw_coex_info_req req = {0};
 	struct sk_buff *skb;
 	u8 *payload;
-	bool ret = false;
 
 	req.op_code = BT_MP_INFO_OP_SCAN_TYPE;
 	skb = rtw_coex_info_request(rtwdev, &req);
 	if (!skb)
-		goto out;
+		return false;
 
 	payload = get_payload_from_coex_resp(skb);
 	*scan_type = GET_COEX_RESP_BT_SCAN_TYPE(payload);
 	dev_kfree_skb_any(skb);
-	ret = true;
-
-out:
-	return ret;
+	return true;
 }
 
 static bool rtw_coex_set_lna_constrain_level(struct rtw_dev *rtwdev,
@@ -651,19 +647,15 @@ static bool rtw_coex_set_lna_constrain_level(struct rtw_dev *rtwdev,
 {
 	struct rtw_coex_info_req req = {0};
 	struct sk_buff *skb;
-	bool ret = false;
 
 	req.op_code = BT_MP_INFO_OP_LNA_CONSTRAINT;
 	req.para1 = lna_constrain_level;
 	skb = rtw_coex_info_request(rtwdev, &req);
 	if (!skb)
-		goto out;
+		return false;
 
 	dev_kfree_skb_any(skb);
-	ret = true;
-
-out:
-	return ret;
+	return true;
 }
 
 #define case_BTSTATUS(src) \
@@ -3533,19 +3525,15 @@ static bool rtw_coex_get_bt_patch_version(struct rtw_dev *rtwdev,
 	struct rtw_coex_info_req req = {0};
 	struct sk_buff *skb;
 	u8 *payload;
-	bool ret = false;
 
 	req.op_code = BT_MP_INFO_OP_PATCH_VER;
 	skb = rtw_coex_info_request(rtwdev, &req);
 	if (!skb)
-		goto out;
+		return false;
 
 	payload = get_payload_from_coex_resp(skb);
 	*patch_version = GET_COEX_RESP_BT_PATCH_VER(payload);
-	ret = true;
-
-out:
-	return ret;
+	return true;
 }
 
 static bool rtw_coex_get_bt_supported_version(struct rtw_dev *rtwdev,
@@ -3554,19 +3542,15 @@ static bool rtw_coex_get_bt_supported_version(struct rtw_dev *rtwdev,
 	struct rtw_coex_info_req req = {0};
 	struct sk_buff *skb;
 	u8 *payload;
-	bool ret = false;
 
 	req.op_code = BT_MP_INFO_OP_SUPP_VER;
 	skb = rtw_coex_info_request(rtwdev, &req);
 	if (!skb)
-		goto out;
+		return false;
 
 	payload = get_payload_from_coex_resp(skb);
 	*supported_version = GET_COEX_RESP_BT_SUPP_VER(payload);
-	ret = true;
-
-out:
-	return ret;
+	return true;
 }
 
 static bool rtw_coex_get_bt_supported_feature(struct rtw_dev *rtwdev,
@@ -3575,19 +3559,15 @@ static bool rtw_coex_get_bt_supported_feature(struct rtw_dev *rtwdev,
 	struct rtw_coex_info_req req = {0};
 	struct sk_buff *skb;
 	u8 *payload;
-	bool ret = false;
 
 	req.op_code = BT_MP_INFO_OP_SUPP_FEAT;
 	skb = rtw_coex_info_request(rtwdev, &req);
 	if (!skb)
-		goto out;
+		return false;
 
 	payload = get_payload_from_coex_resp(skb);
 	*supported_feature = GET_COEX_RESP_BT_SUPP_FEAT(payload);
-	ret = true;
-
-out:
-	return ret;
+	return true;
 }
 
 struct rtw_coex_sta_stat_iter_data {

From 70ca8441ebfc4412dc9d3c56409e73dba959ab34 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 13:39:51 -0700
Subject: [PATCH 2557/3292] orinoco: Avoid field-overflowing memcpy()

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring array fields.

Validate the expected key size and introduce a wrapping structure
to use as the multi-field memcpy() destination so that overflows
can be correctly detected.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210616203952.1248910-1-keescook@chromium.org
---
 drivers/net/wireless/intersil/orinoco/hw.c   | 18 +++++++++++-------
 drivers/net/wireless/intersil/orinoco/hw.h   |  5 +++--
 drivers/net/wireless/intersil/orinoco/wext.c |  2 +-
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/intersil/orinoco/hw.c b/drivers/net/wireless/intersil/orinoco/hw.c
index 2c7adb4be100..0aea35c9c11c 100644
--- a/drivers/net/wireless/intersil/orinoco/hw.c
+++ b/drivers/net/wireless/intersil/orinoco/hw.c
@@ -988,15 +988,18 @@ int __orinoco_hw_setup_enc(struct orinoco_private *priv)
  * tsc must be NULL or up to 8 bytes
  */
 int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx,
-			      int set_tx, const u8 *key, const u8 *rsc,
-			      size_t rsc_len, const u8 *tsc, size_t tsc_len)
+			      int set_tx, const u8 *key, size_t key_len,
+			      const u8 *rsc, size_t rsc_len,
+			      const u8 *tsc, size_t tsc_len)
 {
 	struct {
 		__le16 idx;
 		u8 rsc[ORINOCO_SEQ_LEN];
-		u8 key[TKIP_KEYLEN];
-		u8 tx_mic[MIC_KEYLEN];
-		u8 rx_mic[MIC_KEYLEN];
+		struct {
+			u8 key[TKIP_KEYLEN];
+			u8 tx_mic[MIC_KEYLEN];
+			u8 rx_mic[MIC_KEYLEN];
+		} tkip;
 		u8 tsc[ORINOCO_SEQ_LEN];
 	} __packed buf;
 	struct hermes *hw = &priv->hw;
@@ -1011,8 +1014,9 @@ int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx,
 		key_idx |= 0x8000;
 
 	buf.idx = cpu_to_le16(key_idx);
-	memcpy(buf.key, key,
-	       sizeof(buf.key) + sizeof(buf.tx_mic) + sizeof(buf.rx_mic));
+	if (key_len != sizeof(buf.tkip))
+		return -EINVAL;
+	memcpy(&buf.tkip, key, sizeof(buf.tkip));
 
 	if (rsc_len > sizeof(buf.rsc))
 		rsc_len = sizeof(buf.rsc);
diff --git a/drivers/net/wireless/intersil/orinoco/hw.h b/drivers/net/wireless/intersil/orinoco/hw.h
index 466d1ede76f1..da5804dbdf34 100644
--- a/drivers/net/wireless/intersil/orinoco/hw.h
+++ b/drivers/net/wireless/intersil/orinoco/hw.h
@@ -38,8 +38,9 @@ int __orinoco_hw_set_wap(struct orinoco_private *priv);
 int __orinoco_hw_setup_wepkeys(struct orinoco_private *priv);
 int __orinoco_hw_setup_enc(struct orinoco_private *priv);
 int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx,
-			      int set_tx, const u8 *key, const u8 *rsc,
-			      size_t rsc_len, const u8 *tsc, size_t tsc_len);
+			      int set_tx, const u8 *key, size_t key_len,
+			      const u8 *rsc, size_t rsc_len,
+			      const u8 *tsc, size_t tsc_len);
 int orinoco_clear_tkip_key(struct orinoco_private *priv, int key_idx);
 int __orinoco_hw_set_multicast_list(struct orinoco_private *priv,
 				    struct net_device *dev,
diff --git a/drivers/net/wireless/intersil/orinoco/wext.c b/drivers/net/wireless/intersil/orinoco/wext.c
index 7b6c4ae8ddb3..4a01260027bc 100644
--- a/drivers/net/wireless/intersil/orinoco/wext.c
+++ b/drivers/net/wireless/intersil/orinoco/wext.c
@@ -791,7 +791,7 @@ static int orinoco_ioctl_set_encodeext(struct net_device *dev,
 
 			err = __orinoco_hw_set_tkip_key(priv, idx,
 				 ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY,
-				 priv->keys[idx].key,
+				 priv->keys[idx].key, priv->keys[idx].key_len,
 				 tkip_iv, ORINOCO_SEQ_LEN, NULL, 0);
 			if (err)
 				printk(KERN_ERR "%s: Error %d setting TKIP key"

From 3f26f7665c5ddc880444e3daaecb3a46794ba3a4 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 21:14:31 -0700
Subject: [PATCH 2558/3292] mwl8k: Avoid memcpy() over-reading of mcs.rx_mask

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally reading across neighboring array fields. Use the
sub-structure address directly.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210617041431.2168953-1-keescook@chromium.org
---
 drivers/net/wireless/marvell/mwl8k.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c
index 84b32a5f01ee..3bf6571f4149 100644
--- a/drivers/net/wireless/marvell/mwl8k.c
+++ b/drivers/net/wireless/marvell/mwl8k.c
@@ -4552,7 +4552,7 @@ static int mwl8k_cmd_update_stadb_add(struct ieee80211_hw *hw,
 	else
 		rates = sta->supp_rates[NL80211_BAND_5GHZ] << 5;
 	legacy_rate_mask_to_array(p->legacy_rates, rates);
-	memcpy(p->ht_rates, sta->ht_cap.mcs.rx_mask, 16);
+	memcpy(p->ht_rates, &sta->ht_cap.mcs, 16);
 	p->interop = 1;
 	p->amsdu_enabled = 0;
 
@@ -5034,7 +5034,7 @@ mwl8k_bss_info_changed_sta(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			ap_legacy_rates =
 				ap->supp_rates[NL80211_BAND_5GHZ] << 5;
 		}
-		memcpy(ap_mcs_rates, ap->ht_cap.mcs.rx_mask, 16);
+		memcpy(ap_mcs_rates, &ap->ht_cap.mcs, 16);
 
 		rcu_read_unlock();
 

From 0d5e743db480642818401fb34bbc3f0da28abdfb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Jun 2021 10:13:17 -0700
Subject: [PATCH 2559/3292] rtlwifi: rtl8192de: Fully initialize curvecount_val

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring array fields.

The size argument to memset() is bytes, but the array element size
of curvecount_val is u32, so "CV_CURVE_CNT * 2" was only 1/4th of the
contents of curvecount_val. Adjust memset() to wipe full buffer size.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210617171317.3410722-1-keescook@chromium.org
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
index 68ec009ea157..76dd881ef9bb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
@@ -2574,7 +2574,7 @@ static void _rtl92d_phy_lc_calibrate_sw(struct ieee80211_hw *hw, bool is2t)
 			RTPRINT(rtlpriv, FINIT, INIT_IQK,
 				"path-B / 2.4G LCK\n");
 		}
-		memset(&curvecount_val[0], 0, CV_CURVE_CNT * 2);
+		memset(curvecount_val, 0, sizeof(curvecount_val));
 		/* Set LC calibration off */
 		rtl_set_rfreg(hw, (enum radio_path)index, RF_CHNLBW,
 			      0x08000, 0x0);

From 59c668d700be72bdf76932f5a7db0af947ee0539 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Jun 2021 10:15:22 -0700
Subject: [PATCH 2560/3292] mwifiex: Avoid memset() over-write of WEP
 key_material

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memset(), avoid intentionally writing across
neighboring array fields.

When preparing to call mwifiex_set_keyparamset_wep(), key_material is
treated very differently from its structure layout (which has only a
single struct mwifiex_ie_type_key_param_set). Instead, add a new type to
the union so memset() can correctly reason about the size of the
structure.

Note that the union ("params", 196 bytes) containing key_material was
not large enough to hold the target of this memset(): sizeof(struct
mwifiex_ie_type_key_param_set) == 60, NUM_WEP_KEYS = 4, so 240
bytes, or 44 bytes past the end of "params". The good news is that
it appears that the command buffer, as allocated, is 2048 bytes
(MWIFIEX_SIZE_OF_CMD_BUFFER), so no neighboring memory appears to be
getting clobbered.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210617171522.3410951-1-keescook@chromium.org
---
 drivers/net/wireless/marvell/mwifiex/fw.h      |  6 ++++++
 drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 11 ++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 470d669c7f14..2ff23ab259ab 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -995,6 +995,11 @@ struct host_cmd_ds_802_11_key_material {
 	struct mwifiex_ie_type_key_param_set key_param_set;
 } __packed;
 
+struct host_cmd_ds_802_11_key_material_wep {
+	__le16 action;
+	struct mwifiex_ie_type_key_param_set key_param_set[NUM_WEP_KEYS];
+} __packed;
+
 struct host_cmd_ds_gen {
 	__le16 command;
 	__le16 size;
@@ -2347,6 +2352,7 @@ struct host_cmd_ds_command {
 		struct host_cmd_ds_wmm_get_status get_wmm_status;
 		struct host_cmd_ds_802_11_key_material key_material;
 		struct host_cmd_ds_802_11_key_material_v2 key_material_v2;
+		struct host_cmd_ds_802_11_key_material_wep key_material_wep;
 		struct host_cmd_ds_version_ext verext;
 		struct host_cmd_ds_mgmt_frame_reg reg_mask;
 		struct host_cmd_ds_remain_on_chan roc_cfg;
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
index d3a968ef21ef..48ea00da1fc9 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
@@ -840,14 +840,15 @@ mwifiex_cmd_802_11_key_material_v1(struct mwifiex_private *priv,
 	}
 
 	if (!enc_key) {
-		memset(&key_material->key_param_set, 0,
-		       (NUM_WEP_KEYS *
-			sizeof(struct mwifiex_ie_type_key_param_set)));
+		struct host_cmd_ds_802_11_key_material_wep *key_material_wep =
+			(struct host_cmd_ds_802_11_key_material_wep *)key_material;
+		memset(key_material_wep->key_param_set, 0,
+		       sizeof(key_material_wep->key_param_set));
 		ret = mwifiex_set_keyparamset_wep(priv,
-						  &key_material->key_param_set,
+						  &key_material_wep->key_param_set[0],
 						  &key_param_len);
 		cmd->size = cpu_to_le16(key_param_len +
-				    sizeof(key_material->action) + S_DS_GEN);
+				    sizeof(key_material_wep->action) + S_DS_GEN);
 		return ret;
 	} else
 		memset(&key_material->key_param_set, 0,

From 829eea7c94e0bac804e65975639a2f2e5f147033 Mon Sep 17 00:00:00 2001
From: Wei Mingzhi <whistler@member.fsf.org>
Date: Sat, 19 Jun 2021 00:08:40 +0800
Subject: [PATCH 2561/3292] mt7601u: add USB device ID for some versions of
 XiaoDu WiFi Dongle.

USB device ID of some versions of XiaoDu WiFi Dongle is 2955:1003
instead of 2955:1001. Both are the same mt7601u hardware.

Signed-off-by: Wei Mingzhi <whistler@member.fsf.org>
Acked-by: Jakub Kicinski <kubakici@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210618160840.305024-1-whistler@member.fsf.org
---
 drivers/net/wireless/mediatek/mt7601u/usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c
index 6bcc4a13ae6c..cc772045d526 100644
--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
+++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
@@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = {
 	{ USB_DEVICE(0x2717, 0x4106) },
 	{ USB_DEVICE(0x2955, 0x0001) },
 	{ USB_DEVICE(0x2955, 0x1001) },
+	{ USB_DEVICE(0x2955, 0x1003) },
 	{ USB_DEVICE(0x2a5f, 0x1000) },
 	{ USB_DEVICE(0x7392, 0x7710) },
 	{ 0, }

From c8bcd82a4efd053cdd5ce515a8b0003011a5f756 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 12:54:10 -0700
Subject: [PATCH 2562/3292] ath11k: Avoid memcpy() over-reading of he_cap

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring array fields.

Since peer_he_cap_{mac,phy}info and he_cap_elem.{mac,phy}_cap_info are not
the same sizes, memcpy() was reading beyond field boundaries. Instead,
correctly cap the copy length and pad out any difference in size
(peer_he_cap_macinfo is 8 bytes whereas mac_cap_info is 6, and
peer_he_cap_phyinfo is 12 bytes whereas phy_cap_info is 11).

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210616195410.1232119-1-keescook@chromium.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index eb52332dbe3f..e9b3689331ec 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -1314,10 +1314,16 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar,
 
 	arg->he_flag = true;
 
-	memcpy(&arg->peer_he_cap_macinfo, he_cap->he_cap_elem.mac_cap_info,
-	       sizeof(arg->peer_he_cap_macinfo));
-	memcpy(&arg->peer_he_cap_phyinfo, he_cap->he_cap_elem.phy_cap_info,
-	       sizeof(arg->peer_he_cap_phyinfo));
+	memcpy_and_pad(&arg->peer_he_cap_macinfo,
+		       sizeof(arg->peer_he_cap_macinfo),
+		       he_cap->he_cap_elem.mac_cap_info,
+		       sizeof(he_cap->he_cap_elem.mac_cap_info),
+		       0);
+	memcpy_and_pad(&arg->peer_he_cap_phyinfo,
+		       sizeof(arg->peer_he_cap_phyinfo),
+		       he_cap->he_cap_elem.phy_cap_info,
+		       sizeof(he_cap->he_cap_elem.phy_cap_info),
+		       0);
 	arg->peer_he_ops = vif->bss_conf.he_oper.params;
 
 	/* the top most byte is used to indicate BSS color info */

From d3a1a18ab034fcbec575d10f016b4ae02358cbde Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Jun 2021 10:10:58 -0700
Subject: [PATCH 2563/3292] wcn36xx: Avoid memset() beyond end of struct field

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memset(), avoid intentionally writing across
neighboring array fields.

Instead of writing past the end of the header to reach the rest of
the body, replace the redundant function with existing macro to wipe
struct contents and set field values. Additionally adjusts macro to add
missing parens.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210617171058.3410494-1-keescook@chromium.org
---
 drivers/net/wireless/ath/wcn36xx/smd.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index cf8e52cbdd9b..0e3be17d8cea 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -445,22 +445,12 @@ out:
 	return ret;
 }
 
-static void init_hal_msg(struct wcn36xx_hal_msg_header *hdr,
-			 enum wcn36xx_hal_host_msg_type msg_type,
-			 size_t msg_size)
-{
-	memset(hdr, 0, msg_size + sizeof(*hdr));
-	hdr->msg_type = msg_type;
-	hdr->msg_version = WCN36XX_HAL_MSG_VERSION0;
-	hdr->len = msg_size + sizeof(*hdr);
-}
-
 #define __INIT_HAL_MSG(msg_body, type, version) \
 	do {								\
-		memset(&msg_body, 0, sizeof(msg_body));			\
-		msg_body.header.msg_type = type;			\
-		msg_body.header.msg_version = version;			\
-		msg_body.header.len = sizeof(msg_body);			\
+		memset(&(msg_body), 0, sizeof(msg_body));		\
+		(msg_body).header.msg_type = type;			\
+		(msg_body).header.msg_version = version;		\
+		(msg_body).header.len = sizeof(msg_body);		\
 	} while (0)							\
 
 #define INIT_HAL_MSG(msg_body, type)	\
@@ -2729,8 +2719,7 @@ int wcn36xx_smd_set_mc_list(struct wcn36xx *wcn,
 
 	msg_body = (struct wcn36xx_hal_rcv_flt_pkt_set_mc_list_req_msg *)
 		   wcn->hal_buf;
-	init_hal_msg(&msg_body->header, WCN36XX_HAL_8023_MULTICAST_LIST_REQ,
-		     sizeof(msg_body->mc_addr_list));
+	INIT_HAL_MSG(*msg_body, WCN36XX_HAL_8023_MULTICAST_LIST_REQ);
 
 	/* An empty list means all mc traffic will be received */
 	if (fp)

From 85e8b032d6ebb0f698a34dd22c2f13443d905888 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Jun 2021 07:44:17 -0700
Subject: [PATCH 2564/3292] vxlan: add missing rcu_read_lock() in
 neigh_reduce()

syzbot complained in neigh_reduce(), because rcu_read_lock_bh()
is treated differently than rcu_read_lock()

WARNING: suspicious RCU usage
5.13.0-rc6-syzkaller #0 Not tainted
-----------------------------
include/net/addrconf.h:313 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
3 locks held by kworker/0:0/5:
 #0: ffff888011064d38 ((wq_completion)events){+.+.}-{0:0}, at: arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
 #0: ffff888011064d38 ((wq_completion)events){+.+.}-{0:0}, at: atomic64_set include/asm-generic/atomic-instrumented.h:856 [inline]
 #0: ffff888011064d38 ((wq_completion)events){+.+.}-{0:0}, at: atomic_long_set include/asm-generic/atomic-long.h:41 [inline]
 #0: ffff888011064d38 ((wq_completion)events){+.+.}-{0:0}, at: set_work_data kernel/workqueue.c:617 [inline]
 #0: ffff888011064d38 ((wq_completion)events){+.+.}-{0:0}, at: set_work_pool_and_clear_pending kernel/workqueue.c:644 [inline]
 #0: ffff888011064d38 ((wq_completion)events){+.+.}-{0:0}, at: process_one_work+0x871/0x1600 kernel/workqueue.c:2247
 #1: ffffc90000ca7da8 ((work_completion)(&port->wq)){+.+.}-{0:0}, at: process_one_work+0x8a5/0x1600 kernel/workqueue.c:2251
 #2: ffffffff8bf795c0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x1da/0x3130 net/core/dev.c:4180

stack backtrace:
CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.13.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: events ipvlan_process_multicast
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0x141/0x1d7 lib/dump_stack.c:120
 __in6_dev_get include/net/addrconf.h:313 [inline]
 __in6_dev_get include/net/addrconf.h:311 [inline]
 neigh_reduce drivers/net/vxlan.c:2167 [inline]
 vxlan_xmit+0x34d5/0x4c30 drivers/net/vxlan.c:2919
 __netdev_start_xmit include/linux/netdevice.h:4944 [inline]
 netdev_start_xmit include/linux/netdevice.h:4958 [inline]
 xmit_one net/core/dev.c:3654 [inline]
 dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3670
 __dev_queue_xmit+0x2133/0x3130 net/core/dev.c:4246
 ipvlan_process_multicast+0xa99/0xd70 drivers/net/ipvlan/ipvlan_core.c:287
 process_one_work+0x98d/0x1600 kernel/workqueue.c:2276
 worker_thread+0x64c/0x1120 kernel/workqueue.c:2422
 kthread+0x3b1/0x4a0 kernel/kthread.c:313
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

Fixes: f564f45c4518 ("vxlan: add ipv6 proxy support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 02a14f1b938a..5a8df5a195cb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2164,6 +2164,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
 	struct neighbour *n;
 	struct nd_msg *msg;
 
+	rcu_read_lock();
 	in6_dev = __in6_dev_get(dev);
 	if (!in6_dev)
 		goto out;
@@ -2215,6 +2216,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
 	}
 
 out:
+	rcu_read_unlock();
 	consume_skb(skb);
 	return NETDEV_TX_OK;
 }

From 64295f0d01ae0661a2cea42c598070b1c87ca6e0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Jun 2021 07:53:48 -0700
Subject: [PATCH 2565/3292] virtio/vsock: avoid NULL deref in
 virtio_transport_seqpacket_allow()

Make sure the_virtio_vsock is not NULL before dereferencing it.

general protection fault, probably for non-canonical address 0xdffffc0000000071: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000388-0x000000000000038f]
CPU: 0 PID: 8452 Comm: syz-executor406 Not tainted 5.13.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:virtio_transport_seqpacket_allow+0xbf/0x210 net/vmw_vsock/virtio_transport.c:503
Code: e8 c6 d9 ab f8 84 db 0f 84 0f 01 00 00 e8 09 d3 ab f8 48 8d bd 88 03 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 06 0f 8e 2a 01 00 00 44 0f b6 a5 88 03 00 00
RSP: 0018:ffffc90003757c18 EFLAGS: 00010206
RAX: dffffc0000000000 RBX: 0000000000000001 RCX: 0000000000000000
RDX: 0000000000000071 RSI: ffffffff88c908e7 RDI: 0000000000000388
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff88c90a06 R11: 0000000000000000 R12: 0000000000000000
R13: ffffffff88c90840 R14: 0000000000000000 R15: 0000000000000001
FS:  0000000001bee300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000082 CR3: 000000002847e000 CR4: 00000000001506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 vsock_assign_transport+0x575/0x700 net/vmw_vsock/af_vsock.c:490
 vsock_connect+0x200/0xc00 net/vmw_vsock/af_vsock.c:1337
 __sys_connect_file+0x155/0x1a0 net/socket.c:1824
 __sys_connect+0x161/0x190 net/socket.c:1841
 __do_sys_connect net/socket.c:1851 [inline]
 __se_sys_connect net/socket.c:1848 [inline]
 __x64_sys_connect+0x6f/0xb0 net/socket.c:1848
 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x43ee69
Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffd49e7c788 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000000400488 RCX: 000000000043ee69
RDX: 0000000000000010 RSI: 0000000020000080 RDI: 0000000000000003
RBP: 0000000000402e50 R08: 0000000000000000 R09: 0000000000400488
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000402ee0
R13: 0000000000000000 R14: 00000000004ac018 R15: 0000000000400488

Fixes: 53efbba12cc7 ("virtio/vsock: enable SEQPACKET for transport")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Arseny Krasnov <arseny.krasnov@kaspersky.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e73ce652bf3c..ed1664e7bd88 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -498,9 +498,11 @@ static bool virtio_transport_seqpacket_allow(u32 remote_cid)
 	struct virtio_vsock *vsock;
 	bool seqpacket_allow;
 
+	seqpacket_allow = false;
 	rcu_read_lock();
 	vsock = rcu_dereference(the_virtio_vsock);
-	seqpacket_allow = vsock->seqpacket_allow;
+	if (vsock)
+		seqpacket_allow = vsock->seqpacket_allow;
 	rcu_read_unlock();
 
 	return seqpacket_allow;

From 45423cff1db66cf0993e8a9bd0ac93e740149e49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Mon, 21 Jun 2021 17:32:35 +0200
Subject: [PATCH 2566/3292] sfc: avoid double pci_remove of VFs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If pci_remove was called for a PF with VFs, the removal of the VFs was
called twice from efx_ef10_sriov_fini: one directly with pci_driver->remove
and another implicit by calling pci_disable_sriov, which also perform
the VFs remove. This was leading to crashing the kernel on the second
attempt.

Given that pci_disable_sriov already calls to pci remove function, get
rid of the direct call to pci_driver->remove from the driver.

2 different ways to trigger the bug:
- Create one or more VFs, then attach the PF to a virtual machine (at
  least with qemu/KVM)
- Create one or more VFs, then remove the PF with:
  echo 1 > /sys/bus/pci/devices/PF_PCI_ID/remove

Removing sfc module does not trigger the error, at least for me, because
it removes the VF first, and then the PF.

Example of a log with the error:
    list_del corruption, ffff967fd20a8ad0->next is LIST_POISON1 (dead000000000100)
    ------------[ cut here ]------------
    kernel BUG at lib/list_debug.c:47!
    [...trimmed...]
    RIP: 0010:__list_del_entry_valid.cold.1+0x12/0x4c
    [...trimmed...]
    Call Trace:
    efx_dissociate+0x1f/0x140 [sfc]
    efx_pci_remove+0x27/0x150 [sfc]
    pci_device_remove+0x3b/0xc0
    device_release_driver_internal+0x103/0x1f0
    pci_stop_bus_device+0x69/0x90
    pci_stop_and_remove_bus_device+0xe/0x20
    pci_iov_remove_virtfn+0xba/0x120
    sriov_disable+0x2f/0xe0
    efx_ef10_pci_sriov_disable+0x52/0x80 [sfc]
    ? pcie_aer_is_native+0x12/0x40
    efx_ef10_sriov_fini+0x72/0x110 [sfc]
    efx_pci_remove+0x62/0x150 [sfc]
    pci_device_remove+0x3b/0xc0
    device_release_driver_internal+0x103/0x1f0
    unbind_store+0xf6/0x130
    kernfs_fop_write+0x116/0x190
    vfs_write+0xa5/0x1a0
    ksys_write+0x4f/0xb0
    do_syscall_64+0x5b/0x1a0
    entry_SYSCALL_64_after_hwframe+0x65/0xca

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10_sriov.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 21fa6c0e8873..a5d28b0f75ba 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -439,7 +439,6 @@ int efx_ef10_sriov_init(struct efx_nic *efx)
 void efx_ef10_sriov_fini(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	unsigned int i;
 	int rc;
 
 	if (!nic_data->vf) {
@@ -449,14 +448,7 @@ void efx_ef10_sriov_fini(struct efx_nic *efx)
 		return;
 	}
 
-	/* Remove any VFs in the host */
-	for (i = 0; i < efx->vf_count; ++i) {
-		struct efx_nic *vf_efx = nic_data->vf[i].efx;
-
-		if (vf_efx)
-			vf_efx->pci_dev->driver->remove(vf_efx->pci_dev);
-	}
-
+	/* Disable SRIOV and remove any VFs in the host */
 	rc = efx_ef10_pci_sriov_disable(efx, true);
 	if (rc)
 		netif_dbg(efx, drv, efx->net_dev,

From 1ebe4feb8b442884f5a28d2437040096723dd1ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Mon, 21 Jun 2021 17:32:36 +0200
Subject: [PATCH 2567/3292] sfc: error code if SRIOV cannot be disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If SRIOV cannot be disabled during device removal or module unloading,
return error code so it can be logged properly in the calling function.

Note that this can only happen if any VF is currently attached to a
guest using Xen, but not with vfio/KVM. Despite that in that case the
VFs won't work properly with PF removed and/or the module unloaded, I
have let it as is because I don't know what side effects may have
changing it, and also it seems to be the same that other drivers are
doing in this situation.

In the case of being called during SRIOV reconfiguration, the behavior
hasn't changed because the function is called with force=false.

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10_sriov.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index a5d28b0f75ba..84041cd587d7 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -402,12 +402,17 @@ fail1:
 	return rc;
 }
 
+/* Disable SRIOV and remove VFs
+ * If some VFs are attached to a guest (using Xen, only) nothing is
+ * done if force=false, and vports are freed if force=true (for the non
+ * attachedc ones, only) but SRIOV is not disabled and VFs are not
+ * removed in either case.
+ */
 static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force)
 {
 	struct pci_dev *dev = efx->pci_dev;
-	unsigned int vfs_assigned = 0;
-
-	vfs_assigned = pci_vfs_assigned(dev);
+	unsigned int vfs_assigned = pci_vfs_assigned(dev);
+	int rc = 0;
 
 	if (vfs_assigned && !force) {
 		netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; "
@@ -417,10 +422,12 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force)
 
 	if (!vfs_assigned)
 		pci_disable_sriov(dev);
+	else
+		rc = -EBUSY;
 
 	efx_ef10_sriov_free_vf_vswitching(efx);
 	efx->vf_count = 0;
-	return 0;
+	return rc;
 }
 
 int efx_ef10_sriov_configure(struct efx_nic *efx, int num_vfs)

From 9a022e76500e8ce86dbce83c7856cf59820dce24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Mon, 21 Jun 2021 17:32:37 +0200
Subject: [PATCH 2568/3292] sfc: explain that "attached" VFs only refer to Xen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During SRIOV disabling it is checked wether any VF is currently attached
to a guest, using pci_vfs_assigned function. However, this check only
works with VFs attached with Xen, not with vfio/KVM. Added comments
clarifying this point.

Also, replaced manual check of PCI_DEV_FLAGS_ASSIGNED flag and used the
helper function pci_is_dev_assigned instead.

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 3 ++-
 drivers/net/ethernet/sfc/ef10_sriov.c | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index c3f35da1b82a..bea961013f7c 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1070,7 +1070,8 @@ static int efx_ef10_probe_vf(struct efx_nic *efx)
 
 	/* If the parent PF has no VF data structure, it doesn't know about this
 	 * VF so fail probe.  The VF needs to be re-created.  This can happen
-	 * if the PF driver is unloaded while the VF is assigned to a guest.
+	 * if the PF driver was unloaded while any VF was assigned to a guest
+	 * (using Xen, only).
 	 */
 	pci_dev_pf = efx->pci_dev->physfn;
 	if (pci_dev_pf) {
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 84041cd587d7..f8f8fbe51ef8 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -122,8 +122,7 @@ static void efx_ef10_sriov_free_vf_vports(struct efx_nic *efx)
 		struct ef10_vf *vf = nic_data->vf + i;
 
 		/* If VF is assigned, do not free the vport  */
-		if (vf->pci_dev &&
-		    vf->pci_dev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)
+		if (vf->pci_dev && pci_is_dev_assigned(vf->pci_dev))
 			continue;
 
 		if (vf->vport_assigned) {
@@ -449,7 +448,9 @@ void efx_ef10_sriov_fini(struct efx_nic *efx)
 	int rc;
 
 	if (!nic_data->vf) {
-		/* Remove any un-assigned orphaned VFs */
+		/* Remove any un-assigned orphaned VFs. This can happen if the PF driver
+		 * was unloaded while any VF was assigned to a guest (using Xen, only).
+		 */
 		if (pci_num_vf(efx->pci_dev) && !pci_vfs_assigned(efx->pci_dev))
 			pci_disable_sriov(efx->pci_dev);
 		return;

From 3ddd6e2f71092766b6040b9c33cf9906577b4025 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Mon, 21 Jun 2021 17:32:38 +0200
Subject: [PATCH 2569/3292] sfc: avoid duplicated code in ef10_sriov
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fail path of efx_ef10_sriov_alloc_vf_vswitching is identical to the
full content of efx_ef10_sriov_free_vf_vswitching, so replace it for a
single call to efx_ef10_sriov_free_vf_vswitching.

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10_sriov.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index f8f8fbe51ef8..752d6406f07e 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -206,9 +206,7 @@ static int efx_ef10_sriov_alloc_vf_vswitching(struct efx_nic *efx)
 
 	return 0;
 fail:
-	efx_ef10_sriov_free_vf_vports(efx);
-	kfree(nic_data->vf);
-	nic_data->vf = NULL;
+	efx_ef10_sriov_free_vf_vswitching(efx);
 	return rc;
 }
 

From 7c4d7ca8cce3c8167e10f52a5afb553851f2086b Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 21 Jun 2021 19:30:23 +0200
Subject: [PATCH 2570/3292] Documentation: ACPI: DSD: describe additional MAC
 configuration

Document additional MAC configuration modes which can be processed
by the existing fwnode_ phylink helpers:

* "managed" standard ACPI _DSD property [1]
* "fixed-link" data-only subnode linked in the _DSD package via
  generic mechanism of the hierarchical data extension [2]

[1] https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
[2] https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/firmware-guide/acpi/dsd/phy.rst | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst
index 0d49bad2ea9c..680ad179e5f9 100644
--- a/Documentation/firmware-guide/acpi/dsd/phy.rst
+++ b/Documentation/firmware-guide/acpi/dsd/phy.rst
@@ -50,6 +50,21 @@ phy-mode
 The "phy-mode" _DSD property is used to describe the connection to
 the PHY. The valid values for "phy-mode" are defined in [4].
 
+managed
+-------
+Optional property, which specifies the PHY management type.
+The valid values for "managed" are defined in [4].
+
+fixed-link
+----------
+The "fixed-link" is described by a data-only subnode of the
+MAC port, which is linked in the _DSD package via
+hierarchical data extension (UUID dbb8e3e6-5886-4ba6-8795-1319f52a966b
+in accordance with [5] "_DSD Implementation Guide" document).
+The subnode should comprise a required property ("speed") and
+possibly the optional ones - complete list of parameters and
+their values are specified in [4].
+
 The following ASL example illustrates the usage of these properties.
 
 DSDT entry for MDIO node
@@ -128,6 +143,48 @@ phy-mode and phy-handle are used as explained earlier.
 	  })
 	}
 
+MAC node example where "managed" property is specified.
+-------------------------------------------------------
+
+.. code-block:: none
+
+	Scope(\_SB.PP21.ETH0)
+	{
+	  Name (_DSD, Package () {
+	     ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+		 Package () {
+		     Package () {"phy-mode", "sgmii"},
+		     Package () {"managed", "in-band-status"}
+		 }
+	   })
+	}
+
+MAC node example with a "fixed-link" subnode.
+---------------------------------------------
+
+.. code-block:: none
+
+	Scope(\_SB.PP21.ETH1)
+	{
+	  Name (_DSD, Package () {
+	    ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+		 Package () {
+		     Package () {"phy-mode", "sgmii"},
+		 },
+	    ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+		 Package () {
+		     Package () {"fixed-link", "LNK0"}
+		 }
+	  })
+	  Name (LNK0, Package(){ // Data-only subnode of port
+	    ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+		 Package () {
+		     Package () {"speed", 1000},
+		     Package () {"full-duplex", 1}
+		 }
+	  })
+	}
+
 References
 ==========
 
@@ -138,3 +195,5 @@ References
 [3] Documentation/firmware-guide/acpi/DSD-properties-rules.rst
 
 [4] Documentation/devicetree/bindings/net/ethernet-controller.yaml
+
+[5] https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf

From 62a6ef6a996f5eec73d30d079573a1fa8f95fcd9 Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 21 Jun 2021 19:30:24 +0200
Subject: [PATCH 2571/3292] net: mdiobus: Introduce fwnode_mdbiobus_register()

This patch introduces a new helper function that
wraps acpi_/of_ mdiobus_register() and allows its
usage via common fwnode_ interface.

Fall back to raw mdiobus_register() in case CONFIG_FWNODE_MDIO
is not enabled, in order to satisfy compatibility
in all future user drivers.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/fwnode_mdio.c | 22 ++++++++++++++++++++++
 include/linux/fwnode_mdio.h    | 12 ++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index 1becb1a731f6..ae0bf71a9932 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -7,8 +7,10 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/acpi_mdio.h>
 #include <linux/fwnode_mdio.h>
 #include <linux/of.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
 
 MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>");
@@ -142,3 +144,23 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus,
 	return 0;
 }
 EXPORT_SYMBOL(fwnode_mdiobus_register_phy);
+
+/**
+ * fwnode_mdiobus_register - bring up all the PHYs on a given MDIO bus and
+ *	attach them to it.
+ * @bus: Target MDIO bus.
+ * @fwnode: Pointer to fwnode of the MDIO controller.
+ *
+ * Return values are determined accordingly to acpi_/of_ mdiobus_register()
+ * operation.
+ */
+int fwnode_mdiobus_register(struct mii_bus *bus, struct fwnode_handle *fwnode)
+{
+	if (is_acpi_node(fwnode))
+		return acpi_mdiobus_register(bus, fwnode);
+	else if (is_of_node(fwnode))
+		return of_mdiobus_register(bus, to_of_node(fwnode));
+	else
+		return -EINVAL;
+}
+EXPORT_SYMBOL(fwnode_mdiobus_register);
diff --git a/include/linux/fwnode_mdio.h b/include/linux/fwnode_mdio.h
index faf603c48c86..13d4ae8fee0a 100644
--- a/include/linux/fwnode_mdio.h
+++ b/include/linux/fwnode_mdio.h
@@ -16,6 +16,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
 int fwnode_mdiobus_register_phy(struct mii_bus *bus,
 				struct fwnode_handle *child, u32 addr);
 
+int fwnode_mdiobus_register(struct mii_bus *bus, struct fwnode_handle *fwnode);
 #else /* CONFIG_FWNODE_MDIO */
 int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
 				       struct phy_device *phy,
@@ -30,6 +31,17 @@ static inline int fwnode_mdiobus_register_phy(struct mii_bus *bus,
 {
 	return -EINVAL;
 }
+
+static inline int fwnode_mdiobus_register(struct mii_bus *bus,
+					  struct fwnode_handle *fwnode)
+{
+	/*
+	 * Fall back to mdiobus_register() function to register a bus.
+	 * This way, we don't have to keep compat bits around in drivers.
+	 */
+
+	return mdiobus_register(mdio);
+}
 #endif
 
 #endif /* __LINUX_FWNODE_MDIO_H */

From 33fc11f0983b969f6da3a295567aa814e958980b Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 21 Jun 2021 19:30:25 +0200
Subject: [PATCH 2572/3292] net/fsl: switch to fwnode_mdiobus_register

Utilize the newly added helper routine
for registering the MDIO bus via fwnode_
interface.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/Kconfig      |  4 +---
 drivers/net/ethernet/freescale/xgmac_mdio.c | 11 ++---------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 2d1abdd58fab..92a390576b88 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -67,9 +67,7 @@ config FSL_PQ_MDIO
 
 config FSL_XGMAC_MDIO
 	tristate "Freescale XGMAC MDIO"
-	select PHYLIB
-	depends on OF
-	select OF_MDIO
+	depends on FWNODE_MDIO
 	help
 	  This driver supports the MDIO bus on the Fman 10G Ethernet MACs, and
 	  on the FMan mEMAC (which supports both Clauses 22 and 45)
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 0b68852379da..2d99edc8a647 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/acpi_mdio.h>
+#include <linux/fwnode_mdio.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mdio.h>
@@ -246,7 +246,6 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 
 static int xgmac_mdio_probe(struct platform_device *pdev)
 {
-	struct fwnode_handle *fwnode;
 	struct mdio_fsl_priv *priv;
 	struct resource *res;
 	struct mii_bus *bus;
@@ -291,13 +290,7 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 	priv->has_a011043 = device_property_read_bool(&pdev->dev,
 						      "fsl,erratum-a011043");
 
-	fwnode = pdev->dev.fwnode;
-	if (is_of_node(fwnode))
-		ret = of_mdiobus_register(bus, to_of_node(fwnode));
-	else if (is_acpi_node(fwnode))
-		ret = acpi_mdiobus_register(bus, fwnode);
-	else
-		ret = -EINVAL;
+	ret = fwnode_mdiobus_register(bus, pdev->dev.fwnode);
 	if (ret) {
 		dev_err(&pdev->dev, "cannot register MDIO bus\n");
 		goto err_registration;

From c54da4c1acb1d62b3aec36b18473c430675e26d4 Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 21 Jun 2021 19:30:26 +0200
Subject: [PATCH 2573/3292] net: mvmdio: add ACPI support

This patch introducing ACPI support for the mvmdio driver by adding
acpi_match_table with two entries:

* "MRVL0100" for the SMI operation
* "MRVL0101" for the XSMI mode

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvmdio.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index d14762d93640..7537ee3f6622 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -17,8 +17,10 @@
  * warranty of any kind, whether express or implied.
  */
 
+#include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/fwnode_mdio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -281,7 +283,7 @@ static int orion_mdio_probe(struct platform_device *pdev)
 	struct orion_mdio_dev *dev;
 	int i, ret;
 
-	type = (enum orion_mdio_bus_type)of_device_get_match_data(&pdev->dev);
+	type = (enum orion_mdio_bus_type)device_get_match_data(&pdev->dev);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!r) {
@@ -369,7 +371,7 @@ static int orion_mdio_probe(struct platform_device *pdev)
 		goto out_mdio;
 	}
 
-	ret = of_mdiobus_register(bus, pdev->dev.of_node);
+	ret = fwnode_mdiobus_register(bus, pdev->dev.fwnode);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret);
 		goto out_mdio;
@@ -421,12 +423,20 @@ static const struct of_device_id orion_mdio_match[] = {
 };
 MODULE_DEVICE_TABLE(of, orion_mdio_match);
 
+static const struct acpi_device_id orion_mdio_acpi_match[] = {
+	{ "MRVL0100", BUS_TYPE_SMI },
+	{ "MRVL0101", BUS_TYPE_XSMI },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match);
+
 static struct platform_driver orion_mdio_driver = {
 	.probe = orion_mdio_probe,
 	.remove = orion_mdio_remove,
 	.driver = {
 		.name = "orion-mdio",
 		.of_match_table = orion_mdio_match,
+		.acpi_match_table = ACPI_PTR(orion_mdio_acpi_match),
 	},
 };
 

From dfce1bab8fdc2a2603a896ed836905ba63f13384 Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 21 Jun 2021 19:30:27 +0200
Subject: [PATCH 2574/3292] net: mvpp2: enable using phylink with ACPI

Now that the MDIO and phylink are supported in the ACPI
world, enable to use them in the mvpp2 driver. Ensure a backward
compatibility with the firmware whose ACPI description does
not contain the necessary elements for the proper phy handling
and fall back to relying on the link interrupts instead.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 9bca8c8f9f8d..a66ed3194015 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4793,9 +4793,8 @@ static int mvpp2_open(struct net_device *dev)
 		goto err_cleanup_txqs;
 	}
 
-	/* Phylink isn't supported yet in ACPI mode */
-	if (port->of_node) {
-		err = phylink_of_phy_connect(port->phylink, port->of_node, 0);
+	if (port->phylink) {
+		err = phylink_fwnode_phy_connect(port->phylink, port->fwnode, 0);
 		if (err) {
 			netdev_err(port->dev, "could not attach PHY (%d)\n",
 				   err);
@@ -6703,6 +6702,19 @@ static void mvpp2_acpi_start(struct mvpp2_port *port)
 			  SPEED_UNKNOWN, DUPLEX_UNKNOWN, false, false);
 }
 
+/* In order to ensure backward compatibility for ACPI, check if the port
+ * firmware node comprises the necessary description allowing to use phylink.
+ */
+static bool mvpp2_use_acpi_compat_mode(struct fwnode_handle *port_fwnode)
+{
+	if (!is_acpi_node(port_fwnode))
+		return false;
+
+	return (!fwnode_property_present(port_fwnode, "phy-handle") &&
+		!fwnode_property_present(port_fwnode, "managed") &&
+		!fwnode_get_named_child_node(port_fwnode, "fixed-link"));
+}
+
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
 			    struct fwnode_handle *port_fwnode,
@@ -6921,8 +6933,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
 	dev->dev.of_node = port_node;
 
-	/* Phylink isn't used w/ ACPI as of now */
-	if (port_node) {
+	if (!mvpp2_use_acpi_compat_mode(port_fwnode)) {
 		port->phylink_config.dev = &dev->dev;
 		port->phylink_config.type = PHYLINK_NETDEV;
 
@@ -6934,6 +6945,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		}
 		port->phylink = phylink;
 	} else {
+		dev_warn(&pdev->dev, "Use link irqs for port#%d. FW update required\n", port->id);
 		port->phylink = NULL;
 	}
 

From 8d909440ab3b118627b002e4e19e806b866371ba Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 21 Jun 2021 19:30:28 +0200
Subject: [PATCH 2575/3292] net: mvpp2: remove unused 'has_phy' field

The 'has_phy' field from struct mvpp2_port is no longer used.
Remove it.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2.h      | 3 ---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 -
 2 files changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 4a61c90003b5..b9fbc9f000f2 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -1197,9 +1197,6 @@ struct mvpp2_port {
 	/* Firmware node associated to the port */
 	struct fwnode_handle *fwnode;
 
-	/* Is a PHY always connected to the port */
-	bool has_phy;
-
 	/* Per-port registers' base address */
 	void __iomem *base;
 	void __iomem *stats_base;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index a66ed3194015..8362e64a3b28 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6790,7 +6790,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	port = netdev_priv(dev);
 	port->dev = dev;
 	port->fwnode = port_fwnode;
-	port->has_phy = !!of_find_property(port_node, "phy", NULL);
 	port->ntxqs = ntxqs;
 	port->nrxqs = nrxqs;
 	port->priv = priv;

From 8ce568ed06ce4ca38c0b67d8de9b8d75b731f90a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 21 Jun 2021 15:54:33 -0700
Subject: [PATCH 2576/3292] mptcp: drop tx skb cache

The mentioned cache was introduced to reduce the number of skb
allocation in atomic context, but the required complexity is
excessive.

This change remove the mentioned cache.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 91 +++-----------------------------------------
 net/mptcp/protocol.h |  2 -
 2 files changed, 5 insertions(+), 88 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b5f2f504b85b..77c90d6f04df 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -902,22 +902,14 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
 		df->data_seq + df->data_len == msk->write_seq;
 }
 
-static int mptcp_wmem_with_overhead(struct sock *sk, int size)
+static int mptcp_wmem_with_overhead(int size)
 {
-	struct mptcp_sock *msk = mptcp_sk(sk);
-	int ret, skbs;
-
-	ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
-	skbs = (msk->tx_pending_data + size) / msk->size_goal_cache;
-	if (skbs < msk->skb_tx_cache.qlen)
-		return ret;
-
-	return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER);
+	return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
 }
 
 static void __mptcp_wmem_reserve(struct sock *sk, int size)
 {
-	int amount = mptcp_wmem_with_overhead(sk, size);
+	int amount = mptcp_wmem_with_overhead(size);
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
 	WARN_ON_ONCE(msk->wmem_reserved);
@@ -1212,49 +1204,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
 	return NULL;
 }
 
-static bool mptcp_tx_cache_refill(struct sock *sk, int size,
-				  struct sk_buff_head *skbs, int *total_ts)
-{
-	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct sk_buff *skb;
-	int space_needed;
-
-	if (unlikely(tcp_under_memory_pressure(sk))) {
-		mptcp_mem_reclaim_partial(sk);
-
-		/* under pressure pre-allocate at most a single skb */
-		if (msk->skb_tx_cache.qlen)
-			return true;
-		space_needed = msk->size_goal_cache;
-	} else {
-		space_needed = msk->tx_pending_data + size -
-			       msk->skb_tx_cache.qlen * msk->size_goal_cache;
-	}
-
-	while (space_needed > 0) {
-		skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation);
-		if (unlikely(!skb)) {
-			/* under memory pressure, try to pass the caller a
-			 * single skb to allow forward progress
-			 */
-			while (skbs->qlen > 1) {
-				skb = __skb_dequeue_tail(skbs);
-				*total_ts -= skb->truesize;
-				__kfree_skb(skb);
-			}
-			return skbs->qlen > 0;
-		}
-
-		*total_ts += skb->truesize;
-		__skb_queue_tail(skbs, skb);
-		space_needed -= msk->size_goal_cache;
-	}
-	return true;
-}
-
 static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
 {
-	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct sk_buff *skb;
 
 	if (ssk->sk_tx_skb_cache) {
@@ -1265,22 +1216,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
 		return true;
 	}
 
-	skb = skb_peek(&msk->skb_tx_cache);
-	if (skb) {
-		if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
-			skb = __skb_dequeue(&msk->skb_tx_cache);
-			if (WARN_ON_ONCE(!skb))
-				return false;
-
-			mptcp_wmem_uncharge(sk, skb->truesize);
-			ssk->sk_tx_skb_cache = skb;
-			return true;
-		}
-
-		/* over memory limit, no point to try to allocate a new skb */
-		return false;
-	}
-
 	skb = __mptcp_do_alloc_tx_skb(sk, gfp);
 	if (!skb)
 		return false;
@@ -1296,7 +1231,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
 static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
 {
 	return !ssk->sk_tx_skb_cache &&
-	       !skb_peek(&mptcp_sk(sk)->skb_tx_cache) &&
 	       tcp_under_memory_pressure(sk);
 }
 
@@ -1339,7 +1273,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	/* compute send limit */
 	info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
 	avail_size = info->size_goal;
-	msk->size_goal_cache = info->size_goal;
 	skb = tcp_write_queue_tail(ssk);
 	if (skb) {
 		/* Limit the write to the size available in the
@@ -1688,7 +1621,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	while (msg_data_left(msg)) {
 		int total_ts, frag_truesize = 0;
 		struct mptcp_data_frag *dfrag;
-		struct sk_buff_head skbs;
 		bool dfrag_collapsed;
 		size_t psize, offset;
 
@@ -1721,16 +1653,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		psize = pfrag->size - offset;
 		psize = min_t(size_t, psize, msg_data_left(msg));
 		total_ts = psize + frag_truesize;
-		__skb_queue_head_init(&skbs);
-		if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts))
+
+		if (!mptcp_wmem_alloc(sk, total_ts))
 			goto wait_for_memory;
 
-		if (!mptcp_wmem_alloc(sk, total_ts)) {
-			__skb_queue_purge(&skbs);
-			goto wait_for_memory;
-		}
-
-		skb_queue_splice_tail(&skbs, &msk->skb_tx_cache);
 		if (copy_page_from_iter(dfrag->page, offset, psize,
 					&msg->msg_iter) != psize) {
 			mptcp_wmem_uncharge(sk, psize + frag_truesize);
@@ -2462,13 +2388,11 @@ static int __mptcp_init_sock(struct sock *sk)
 	INIT_LIST_HEAD(&msk->rtx_queue);
 	INIT_WORK(&msk->work, mptcp_worker);
 	__skb_queue_head_init(&msk->receive_queue);
-	__skb_queue_head_init(&msk->skb_tx_cache);
 	msk->out_of_order_queue = RB_ROOT;
 	msk->first_pending = NULL;
 	msk->wmem_reserved = 0;
 	msk->rmem_released = 0;
 	msk->tx_pending_data = 0;
-	msk->size_goal_cache = TCP_BASE_MSS;
 
 	msk->ack_hint = NULL;
 	msk->first = NULL;
@@ -2525,15 +2449,10 @@ static void __mptcp_clear_xmit(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_data_frag *dtmp, *dfrag;
-	struct sk_buff *skb;
 
 	WRITE_ONCE(msk->first_pending, NULL);
 	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
 		dfrag_clear(sk, dfrag);
-	while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) {
-		sk->sk_forward_alloc += skb->truesize;
-		kfree_skb(skb);
-	}
 }
 
 static void mptcp_cancel_work(struct sock *sk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 160d716ebc2b..160c2ab09f19 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -245,9 +245,7 @@ struct mptcp_sock {
 	struct sk_buff  *ooo_last_skb;
 	struct rb_root  out_of_order_queue;
 	struct sk_buff_head receive_queue;
-	struct sk_buff_head skb_tx_cache;	/* this is wmem accounted */
 	int		tx_pending_data;
-	int		size_goal_cache;
 	struct list_head conn_list;
 	struct list_head rtx_queue;
 	struct mptcp_data_frag *first_pending;

From 75e908c33615999abe1f3a8429d25dea30d28e4e Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 21 Jun 2021 15:54:34 -0700
Subject: [PATCH 2577/3292] mptcp: use fast lock for subflows when possible

There are a bunch of callsite where the ssk socket
lock is acquired using the full-blown version eligible for
the fast variant. Let's move to the latter.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 10 ++++++----
 net/mptcp/protocol.c   | 15 +++++++++------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 09722598994d..d4732a4f223e 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -540,6 +540,7 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
 	subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
 	if (subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		bool slow;
 
 		spin_unlock_bh(&msk->pm.lock);
 		pr_debug("send ack for %s%s%s",
@@ -547,9 +548,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
 			 mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
 			 mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
 
-		lock_sock(ssk);
+		slow = lock_sock_fast(ssk);
 		tcp_send_ack(ssk);
-		release_sock(ssk);
+		unlock_sock_fast(ssk, slow);
 		spin_lock_bh(&msk->pm.lock);
 	}
 }
@@ -566,6 +567,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 		struct sock *sk = (struct sock *)msk;
 		struct mptcp_addr_info local;
+		bool slow;
 
 		local_address((struct sock_common *)ssk, &local);
 		if (!addresses_equal(&local, addr, addr->port))
@@ -578,9 +580,9 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 
 		spin_unlock_bh(&msk->pm.lock);
 		pr_debug("send ack for mp_prio");
-		lock_sock(ssk);
+		slow = lock_sock_fast(ssk);
 		tcp_send_ack(ssk);
-		release_sock(ssk);
+		unlock_sock_fast(ssk, slow);
 		spin_lock_bh(&msk->pm.lock);
 
 		return 0;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 77c90d6f04df..c47ce074737d 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -433,23 +433,25 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
 
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		bool slow;
 
-		lock_sock(ssk);
+		slow = lock_sock_fast(ssk);
 		if (tcp_can_send_ack(ssk))
 			tcp_send_ack(ssk);
-		release_sock(ssk);
+		unlock_sock_fast(ssk, slow);
 	}
 }
 
 static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
 {
+	bool slow;
 	int ret;
 
-	lock_sock(ssk);
+	slow = lock_sock_fast(ssk);
 	ret = tcp_can_send_ack(ssk);
 	if (ret)
 		tcp_cleanup_rbuf(ssk, 1);
-	release_sock(ssk);
+	unlock_sock_fast(ssk, slow);
 	return ret;
 }
 
@@ -2252,13 +2254,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
 
 	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
 		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+		bool slow;
 
-		lock_sock(tcp_sk);
+		slow = lock_sock_fast(tcp_sk);
 		if (tcp_sk->sk_state != TCP_CLOSE) {
 			tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
 			tcp_set_state(tcp_sk, TCP_CLOSE);
 		}
-		release_sock(tcp_sk);
+		unlock_sock_fast(tcp_sk, slow);
 	}
 
 	inet_sk_state_store(sk, TCP_CLOSE);

From 3c90e377a1e87a35a7f868ed1c53ea4d62379a8d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 21 Jun 2021 15:54:35 -0700
Subject: [PATCH 2578/3292] mptcp: don't clear MPTCP_DATA_READY in
 sk_wait_event()

If we don't flush entirely the receive queue, we need set
again such bit later. We can simply avoid clearing it.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c47ce074737d..3e088e9d20fd 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1715,7 +1715,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	sk_wait_event(sk, timeo,
-		      test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
+		      test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
 
 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	remove_wait_queue(sk_sleep(sk), &wait);
@@ -2039,10 +2039,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		 */
 		if (unlikely(__mptcp_move_skbs(msk)))
 			set_bit(MPTCP_DATA_READY, &msk->flags);
-	} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
-		/* data to read but mptcp_wait_data() cleared DATA_READY */
-		set_bit(MPTCP_DATA_READY, &msk->flags);
 	}
+
 out_err:
 	if (cmsg_flags && copied >= 0) {
 		if (cmsg_flags & MPTCP_CMSG_TS)

From 8cfc47fc2eb0fd2d6eaa9e4b23b4bf6ef1bfaeef Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 21 Jun 2021 15:54:36 -0700
Subject: [PATCH 2579/3292] mptcp: drop redundant test in move_skbs_to_msk()

Currently we check the msk state to avoid enqueuing new
skbs at msk shutdown time.

Such test is racy - as we can't acquire the msk socket lock -
and useless, as the caller already checked the subflow
field 'disposable', covering the same scenario in a race
free manner - read and updated under the ssk socket lock.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3e088e9d20fd..cf75be02eb00 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -686,9 +686,6 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 	struct sock *sk = (struct sock *)msk;
 	unsigned int moved = 0;
 
-	if (inet_sk_state_load(sk) == TCP_CLOSE)
-		return false;
-
 	__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
 	__mptcp_ofo_queue(msk);
 	if (unlikely(ssk->sk_err)) {

From 06285da96a1cdbad265a212f6729e19a515127a2 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 21 Jun 2021 15:54:37 -0700
Subject: [PATCH 2580/3292] mptcp: add MIB counter for invalid mapping

Account this exceptional events for better introspection.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/mib.c     | 1 +
 net/mptcp/mib.h     | 1 +
 net/mptcp/subflow.c | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index e7e60bc1fb96..52ea2517e856 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
 	SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
 	SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+	SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
 	SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
 	SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
 	SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 92e56c0cfbdd..193466c9b549 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,7 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_JOINACKMAC,		/* HMAC was wrong on ACK + MP_JOIN */
 	MPTCP_MIB_DSSNOMATCH,		/* Received a new mapping that did not match the previous one */
 	MPTCP_MIB_INFINITEMAPRX,	/* Received an infinite mapping */
+	MPTCP_MIB_DSSTCPMISMATCH,	/* DSS-mapping did not map with TCP's sequence numbers */
 	MPTCP_MIB_DATACSUMERR,		/* The data checksum fail */
 	MPTCP_MIB_OFOQUEUETAIL,	/* Segments inserted into OoO queue tail */
 	MPTCP_MIB_OFOQUEUE,		/* Segments inserted into OoO queue */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8976ff586b87..585951e7e52f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1046,8 +1046,10 @@ validate_seq:
 	/* we revalidate valid mapping on new skb, because we must ensure
 	 * the current skb is completely covered by the available mapping
 	 */
-	if (!validate_mapping(ssk, skb))
+	if (!validate_mapping(ssk, skb)) {
+		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH);
 		return MAPPING_INVALID;
+	}
 
 	skb_ext_del(skb, SKB_EXT_MPTCP);
 

From a4debc4772f44737358ea4210f6fca1f19f5c783 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Mon, 21 Jun 2021 15:54:38 -0700
Subject: [PATCH 2581/3292] selftests: mptcp: display proper reason to abort
 tests

Without this modification, we were often displaying this error messages:

  FAIL: Could not even run loopback test

But $ret could have been set to a non 0 value in many different cases:

- net.mptcp.enabled=0 is not working as expected
- setsockopt(..., TCP_ULP, "mptcp", ...) is allowed
- ping between each netns are failing
- tests between ns1 as a receiver and ns>1 are failing
- other tests not involving ns1 as a receiver are failing

So not only for the loopback test.

Now a clearer message, including the time it took to run all tests, is
displayed.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/mptcp/mptcp_connect.sh      | 52 +++++++++++++------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 2484fb6a9a8d..559173a8e387 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -680,6 +680,25 @@ run_tests_peekmode()
 	run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
 }
 
+display_time()
+{
+	time_end=$(date +%s)
+	time_run=$((time_end-time_start))
+
+	echo "Time: ${time_run} seconds"
+}
+
+stop_if_error()
+{
+	local msg="$1"
+
+	if [ ${ret} -ne 0 ]; then
+		echo "FAIL: ${msg}" 1>&2
+		display_time
+		exit ${ret}
+	fi
+}
+
 make_file "$cin" "client"
 make_file "$sin" "server"
 
@@ -687,6 +706,8 @@ check_mptcp_disabled
 
 check_mptcp_ulp_setsockopt
 
+stop_if_error "The kernel configuration is not valid for MPTCP"
+
 echo "INFO: validating network environment with pings"
 for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
 	do_ping "$ns1" $sender 10.0.1.1
@@ -706,6 +727,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
 	do_ping "$ns4" $sender dead:beef:3::1
 done
 
+stop_if_error "Could not even run ping tests"
+
 [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
 echo -n "INFO: Using loss of $tc_loss "
 test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
@@ -733,18 +756,13 @@ echo "on ns3eth4"
 
 tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
 
-for sender in $ns1 $ns2 $ns3 $ns4;do
-	run_tests_lo "$ns1" "$sender" 10.0.1.1 1
-	if [ $ret -ne 0 ] ;then
-		echo "FAIL: Could not even run loopback test" 1>&2
-		exit $ret
-	fi
-	run_tests_lo "$ns1" $sender dead:beef:1::1 1
-	if [ $ret -ne 0 ] ;then
-		echo "FAIL: Could not even run loopback v6 test" 2>&1
-		exit $ret
-	fi
+run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
+stop_if_error "Could not even run loopback test"
 
+run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
+stop_if_error "Could not even run loopback v6 test"
+
+for sender in $ns1 $ns2 $ns3 $ns4;do
 	# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
 	# mptcp syncookie support.
 	if [ $sender = $ns1 ]; then
@@ -753,6 +771,9 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
 		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
 	fi
 
+	run_tests "$ns1" $sender 10.0.1.1
+	run_tests "$ns1" $sender dead:beef:1::1
+
 	run_tests "$ns2" $sender 10.0.1.2
 	run_tests "$ns2" $sender dead:beef:1::2
 	run_tests "$ns2" $sender 10.0.2.1
@@ -765,14 +786,13 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
 
 	run_tests "$ns4" $sender 10.0.3.1
 	run_tests "$ns4" $sender dead:beef:3::1
+
+	stop_if_error "Tests with $sender as a sender have failed"
 done
 
 run_tests_peekmode "saveWithPeek"
 run_tests_peekmode "saveAfterPeek"
+stop_if_error "Tests with peek mode have failed"
 
-time_end=$(date +%s)
-time_run=$((time_end-time_start))
-
-echo "Time: ${time_run} seconds"
-
+display_time
 exit $ret

From f842f48891ad962c1dcac2c162f72862643fc221 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:51 +0300
Subject: [PATCH 2582/3292] wwan_hwsim: support network interface creation

Add support for networking interface creation via the WWAN core by
registering the WWAN netdev creation ops for each simulated WWAN device.
Implemented minimalistic netdev support where the xmit callback just
consumes all egress skbs.

This should help with WWAN network interfaces creation testing.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_hwsim.c | 48 +++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c
index 472cae544a2b..c1e850b9c087 100644
--- a/drivers/net/wwan/wwan_hwsim.c
+++ b/drivers/net/wwan/wwan_hwsim.c
@@ -14,10 +14,13 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/skbuff.h>
+#include <linux/netdevice.h>
 #include <linux/wwan.h>
 #include <linux/debugfs.h>
 #include <linux/workqueue.h>
 
+#include <net/arp.h>
+
 static int wwan_hwsim_devsnum = 2;
 module_param_named(devices, wwan_hwsim_devsnum, int, 0444);
 MODULE_PARM_DESC(devices, "Number of simulated devices");
@@ -64,6 +67,38 @@ static const struct file_operations wwan_hwsim_debugfs_devdestroy_fops;
 static void wwan_hwsim_port_del_work(struct work_struct *work);
 static void wwan_hwsim_dev_del_work(struct work_struct *work);
 
+static netdev_tx_t wwan_hwsim_netdev_xmit(struct sk_buff *skb,
+					  struct net_device *ndev)
+{
+	ndev->stats.tx_packets++;
+	ndev->stats.tx_bytes += skb->len;
+	consume_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops wwan_hwsim_netdev_ops = {
+	.ndo_start_xmit = wwan_hwsim_netdev_xmit,
+};
+
+static void wwan_hwsim_netdev_setup(struct net_device *ndev)
+{
+	ndev->netdev_ops = &wwan_hwsim_netdev_ops;
+	ndev->needs_free_netdev = true;
+
+	ndev->mtu = ETH_DATA_LEN;
+	ndev->min_mtu = ETH_MIN_MTU;
+	ndev->max_mtu = ETH_MAX_MTU;
+
+	ndev->type = ARPHRD_NONE;
+	ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+}
+
+static const struct wwan_ops wwan_hwsim_wwan_rtnl_ops = {
+	.owner = THIS_MODULE,
+	.priv_size = 0,			/* No private data */
+	.setup = wwan_hwsim_netdev_setup,
+};
+
 static int wwan_hwsim_port_start(struct wwan_port *wport)
 {
 	struct wwan_hwsim_port *port = wwan_port_get_drvdata(wport);
@@ -254,6 +289,10 @@ static struct wwan_hwsim_dev *wwan_hwsim_dev_new(void)
 
 	INIT_WORK(&dev->del_work, wwan_hwsim_dev_del_work);
 
+	err = wwan_register_ops(&dev->dev, &wwan_hwsim_wwan_rtnl_ops, dev);
+	if (err)
+		goto err_unreg_dev;
+
 	dev->debugfs_topdir = debugfs_create_dir(dev_name(&dev->dev),
 						 wwan_hwsim_debugfs_topdir);
 	debugfs_create_file("destroy", 0200, dev->debugfs_topdir, dev,
@@ -265,6 +304,12 @@ static struct wwan_hwsim_dev *wwan_hwsim_dev_new(void)
 
 	return dev;
 
+err_unreg_dev:
+	device_unregister(&dev->dev);
+	/* Memory will be freed in the device release callback */
+
+	return ERR_PTR(err);
+
 err_free_dev:
 	kfree(dev);
 
@@ -290,6 +335,9 @@ static void wwan_hwsim_dev_del(struct wwan_hwsim_dev *dev)
 
 	debugfs_remove(dev->debugfs_topdir);
 
+	/* This will remove all child netdev(s) */
+	wwan_unregister_ops(&dev->dev);
+
 	/* Make sure that there is no pending deletion work */
 	if (current_work() != &dev->del_work)
 		cancel_work_sync(&dev->del_work);

From 355a4e7e0a231af80fc0f470235dc6747d2e0936 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:52 +0300
Subject: [PATCH 2583/3292] wwan: core: relocate ops registering code

It is unlikely that RTNL callbacks will call WWAN ops (un-)register
functions, but it is highly likely that the ops (un-)register functions
will use RTNL link create/destroy handlers. So move the WWAN network
interface ops (un-)register functions below the RTNL callbacks to be
able to call them without forward declarations.

No functional changes, just code relocation.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 142 +++++++++++++++++------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 165afec1dbd1..688a7278a396 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -789,77 +789,6 @@ static const struct file_operations wwan_port_fops = {
 	.llseek = noop_llseek,
 };
 
-/**
- * wwan_register_ops - register WWAN device ops
- * @parent: Device to use as parent and shared by all WWAN ports and
- *	created netdevs
- * @ops: operations to register
- * @ctxt: context to pass to operations
- *
- * Returns: 0 on success, a negative error code on failure
- */
-int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
-		      void *ctxt)
-{
-	struct wwan_device *wwandev;
-
-	if (WARN_ON(!parent || !ops))
-		return -EINVAL;
-
-	wwandev = wwan_create_dev(parent);
-	if (!wwandev)
-		return -ENOMEM;
-
-	if (WARN_ON(wwandev->ops)) {
-		wwan_remove_dev(wwandev);
-		return -EBUSY;
-	}
-
-	if (!try_module_get(ops->owner)) {
-		wwan_remove_dev(wwandev);
-		return -ENODEV;
-	}
-
-	wwandev->ops = ops;
-	wwandev->ops_ctxt = ctxt;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wwan_register_ops);
-
-/**
- * wwan_unregister_ops - remove WWAN device ops
- * @parent: Device to use as parent and shared by all WWAN ports and
- *	created netdevs
- */
-void wwan_unregister_ops(struct device *parent)
-{
-	struct wwan_device *wwandev = wwan_dev_get_by_parent(parent);
-	bool has_ops;
-
-	if (WARN_ON(IS_ERR(wwandev)))
-		return;
-
-	has_ops = wwandev->ops;
-
-	/* put the reference obtained by wwan_dev_get_by_parent(),
-	 * we should still have one (that the owner is giving back
-	 * now) due to the ops being assigned, check that below
-	 * and return if not.
-	 */
-	put_device(&wwandev->dev);
-
-	if (WARN_ON(!has_ops))
-		return;
-
-	module_put(wwandev->ops->owner);
-
-	wwandev->ops = NULL;
-	wwandev->ops_ctxt = NULL;
-	wwan_remove_dev(wwandev);
-}
-EXPORT_SYMBOL_GPL(wwan_unregister_ops);
-
 static int wwan_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
 			      struct netlink_ext_ack *extack)
 {
@@ -974,6 +903,77 @@ static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = {
 	.policy = wwan_rtnl_policy,
 };
 
+/**
+ * wwan_register_ops - register WWAN device ops
+ * @parent: Device to use as parent and shared by all WWAN ports and
+ *	created netdevs
+ * @ops: operations to register
+ * @ctxt: context to pass to operations
+ *
+ * Returns: 0 on success, a negative error code on failure
+ */
+int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
+		      void *ctxt)
+{
+	struct wwan_device *wwandev;
+
+	if (WARN_ON(!parent || !ops))
+		return -EINVAL;
+
+	wwandev = wwan_create_dev(parent);
+	if (!wwandev)
+		return -ENOMEM;
+
+	if (WARN_ON(wwandev->ops)) {
+		wwan_remove_dev(wwandev);
+		return -EBUSY;
+	}
+
+	if (!try_module_get(ops->owner)) {
+		wwan_remove_dev(wwandev);
+		return -ENODEV;
+	}
+
+	wwandev->ops = ops;
+	wwandev->ops_ctxt = ctxt;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wwan_register_ops);
+
+/**
+ * wwan_unregister_ops - remove WWAN device ops
+ * @parent: Device to use as parent and shared by all WWAN ports and
+ *	created netdevs
+ */
+void wwan_unregister_ops(struct device *parent)
+{
+	struct wwan_device *wwandev = wwan_dev_get_by_parent(parent);
+	bool has_ops;
+
+	if (WARN_ON(IS_ERR(wwandev)))
+		return;
+
+	has_ops = wwandev->ops;
+
+	/* put the reference obtained by wwan_dev_get_by_parent(),
+	 * we should still have one (that the owner is giving back
+	 * now) due to the ops being assigned, check that below
+	 * and return if not.
+	 */
+	put_device(&wwandev->dev);
+
+	if (WARN_ON(!has_ops))
+		return;
+
+	module_put(wwandev->ops->owner);
+
+	wwandev->ops = NULL;
+	wwandev->ops_ctxt = NULL;
+	wwan_remove_dev(wwandev);
+}
+EXPORT_SYMBOL_GPL(wwan_unregister_ops);
+
 static int __init wwan_init(void)
 {
 	int err;

From 58c3b421c62edd30b0b660e3e6711ad91842c271 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:53 +0300
Subject: [PATCH 2584/3292] wwan: core: require WWAN netdev setup callback
 existence

The setup callback will be unconditionally passed to the
alloc_netdev_mqs(), where the NULL pointer dereference will cause the
kernel panic. So refuse to register WWAN netdev ops with warning
generation if the setup callback is not provided.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 688a7278a396..1bd472195813 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -917,7 +917,7 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
 {
 	struct wwan_device *wwandev;
 
-	if (WARN_ON(!parent || !ops))
+	if (WARN_ON(!parent || !ops || !ops->setup))
 		return -EINVAL;
 
 	wwandev = wwan_create_dev(parent);

From f492fccf3d62ba8e8b4d75d3f2ab82af25b18ffa Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:54 +0300
Subject: [PATCH 2585/3292] wwan: core: multiple netdevs deletion support

Use unregister_netdevice_queue() instead of simple
unregister_netdevice() if the WWAN netdev ops does not provide a dellink
callback. This will help to accelerate deletion of multiple netdevs.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 1bd472195813..b6b9c52f617c 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -882,7 +882,7 @@ static void wwan_rtnl_dellink(struct net_device *dev, struct list_head *head)
 	if (wwandev->ops->dellink)
 		wwandev->ops->dellink(wwandev->ops_ctxt, dev, head);
 	else
-		unregister_netdevice(dev);
+		unregister_netdevice_queue(dev, head);
 
 out:
 	/* release the reference */

From 2f75238014f074daddd79ccc17fa1caf72ff3815 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:55 +0300
Subject: [PATCH 2586/3292] wwan: core: remove all netdevs on ops unregistering

We use the ops owner module hold to protect against ops memory
disappearing. But this approach does not protect us from a driver that
unregisters ops but forgets to remove netdev(s) that were created using
this ops. In such case, we are left with netdev(s), which can not be
removed since ops is gone. Moreover, batch netdevs removing on
deinitialization is a desireable option for WWAN drivers as it is a
quite common task.

Implement deletion of all created links on WWAN netdev ops unregistering
in the same way that RTNL removes all links on RTNL ops unregistering.
Simply remove all child netdevs of a device whose WWAN netdev ops is
unregistering. This way we protecting the kernel from buggy drivers and
make it easier to write a driver deinitialization code.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 40 ++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index b6b9c52f617c..ec6a69b23dd1 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -941,6 +941,17 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
 }
 EXPORT_SYMBOL_GPL(wwan_register_ops);
 
+/* Enqueue child netdev deletion */
+static int wwan_child_dellink(struct device *dev, void *data)
+{
+	struct list_head *kill_list = data;
+
+	if (dev->type == &wwan_type)
+		wwan_rtnl_dellink(to_net_dev(dev), kill_list);
+
+	return 0;
+}
+
 /**
  * wwan_unregister_ops - remove WWAN device ops
  * @parent: Device to use as parent and shared by all WWAN ports and
@@ -949,26 +960,37 @@ EXPORT_SYMBOL_GPL(wwan_register_ops);
 void wwan_unregister_ops(struct device *parent)
 {
 	struct wwan_device *wwandev = wwan_dev_get_by_parent(parent);
-	bool has_ops;
+	struct module *owner;
+	LIST_HEAD(kill_list);
 
 	if (WARN_ON(IS_ERR(wwandev)))
 		return;
-
-	has_ops = wwandev->ops;
+	if (WARN_ON(!wwandev->ops)) {
+		put_device(&wwandev->dev);
+		return;
+	}
 
 	/* put the reference obtained by wwan_dev_get_by_parent(),
 	 * we should still have one (that the owner is giving back
-	 * now) due to the ops being assigned, check that below
-	 * and return if not.
+	 * now) due to the ops being assigned.
 	 */
 	put_device(&wwandev->dev);
 
-	if (WARN_ON(!has_ops))
-		return;
+	owner = wwandev->ops->owner;	/* Preserve ops owner */
 
-	module_put(wwandev->ops->owner);
+	rtnl_lock();	/* Prevent concurent netdev(s) creation/destroying */
+
+	/* Remove all child netdev(s), using batch removing */
+	device_for_each_child(&wwandev->dev, &kill_list,
+			      wwan_child_dellink);
+	unregister_netdevice_many(&kill_list);
+
+	wwandev->ops = NULL;	/* Finally remove ops */
+
+	rtnl_unlock();
+
+	module_put(owner);
 
-	wwandev->ops = NULL;
 	wwandev->ops_ctxt = NULL;
 	wwan_remove_dev(wwandev);
 }

From 322a0ba99c50d6abadeda709f0552eb8dac6668c Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:56 +0300
Subject: [PATCH 2587/3292] net: iosm: drop custom netdev(s) removing

Since the last commit, the WWAN core will remove all our network
interfaces for us at the time of the WWAN netdev ops unregistering.
Therefore, we can safely drop the custom code that cleans the list of
created netdevs. Anyway it no longer removes any netdev, since all
netdevs were removed earlier in the wwan_unregister_ops() call.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: M Chetan Kumar <m.chetan.kumar@intel.com>
CC: M Chetan Kumar <m.chetan.kumar@intel.com>
CC: Intel Corporation <linuxwwan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_wwan.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
index 1711b79fc616..bee9b278223d 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -329,22 +329,9 @@ struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev)
 
 void ipc_wwan_deinit(struct iosm_wwan *ipc_wwan)
 {
-	int if_id;
-
+	/* This call will remove all child netdev(s) */
 	wwan_unregister_ops(ipc_wwan->dev);
 
-	for (if_id = 0; if_id < ARRAY_SIZE(ipc_wwan->sub_netlist); if_id++) {
-		struct iosm_netdev_priv *priv;
-
-		priv = rcu_access_pointer(ipc_wwan->sub_netlist[if_id]);
-		if (!priv)
-			continue;
-
-		rtnl_lock();
-		ipc_wwan_dellink(ipc_wwan, priv->netdev, NULL);
-		rtnl_unlock();
-	}
-
 	mutex_destroy(&ipc_wwan->if_mutex);
 
 	kfree(ipc_wwan);

From 9f0248ea476ee59d336d7c8bf1a5d0919d93d030 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:57 +0300
Subject: [PATCH 2588/3292] wwan: core: no more hold netdev ops owning module

The WWAN netdev ops owner holding was used to protect from the
unexpected memory disappear. This approach causes a dependency cycle
(driver -> core -> driver) and effectively prevents a WWAN driver
unloading. E.g. WWAN hwsim could not be unloaded until all simulated
devices are removed:

~# modprobe wwan_hwsim devices=2
~# lsmod | grep wwan
wwan_hwsim             16384  2
wwan                   20480  1 wwan_hwsim
~# rmmod wwan_hwsim
rmmod: ERROR: Module wwan_hwsim is in use
~# echo > /sys/kernel/debug/wwan_hwsim/hwsim0/destroy
~# echo > /sys/kernel/debug/wwan_hwsim/hwsim1/destroy
~# lsmod | grep wwan
wwan_hwsim             16384  0
wwan                   20480  1 wwan_hwsim
~# rmmod wwan_hwsim

For a real device driver this will cause an inability to unload module
until a served device is physically detached.

Since the last commit we are removing all child netdev(s) when a driver
unregister the netdev ops. This allows us to permit the driver
unloading, since any sane driver will call ops unregistering on a device
deinitialization. So, remove the holding of an ops owner to make it
easier to unload a driver module. The owner field has also beed removed
from the ops structure as there are no more users of this field.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/net.c         |  3 +--
 drivers/net/wwan/wwan_core.c  | 10 ----------
 drivers/net/wwan/wwan_hwsim.c |  1 -
 include/linux/wwan.h          |  2 --
 4 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index 6aa753387372..ffd1c01b3f35 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -383,7 +383,6 @@ static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
 }
 
 static const struct wwan_ops mhi_wwan_ops = {
-	.owner = THIS_MODULE,
 	.priv_size = sizeof(struct mhi_net_dev),
 	.setup = mhi_net_setup,
 	.newlink = mhi_net_newlink,
@@ -436,7 +435,7 @@ static void mhi_net_remove(struct mhi_device *mhi_dev)
 	struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
 	struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
 
-	/* rtnetlink takes care of removing remaining links */
+	/* WWAN core takes care of removing remaining links */
 	wwan_unregister_ops(&cntrl->mhi_dev->dev);
 
 	if (create_default_iface)
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index ec6a69b23dd1..b634a0ba1196 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -929,11 +929,6 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
 		return -EBUSY;
 	}
 
-	if (!try_module_get(ops->owner)) {
-		wwan_remove_dev(wwandev);
-		return -ENODEV;
-	}
-
 	wwandev->ops = ops;
 	wwandev->ops_ctxt = ctxt;
 
@@ -960,7 +955,6 @@ static int wwan_child_dellink(struct device *dev, void *data)
 void wwan_unregister_ops(struct device *parent)
 {
 	struct wwan_device *wwandev = wwan_dev_get_by_parent(parent);
-	struct module *owner;
 	LIST_HEAD(kill_list);
 
 	if (WARN_ON(IS_ERR(wwandev)))
@@ -976,8 +970,6 @@ void wwan_unregister_ops(struct device *parent)
 	 */
 	put_device(&wwandev->dev);
 
-	owner = wwandev->ops->owner;	/* Preserve ops owner */
-
 	rtnl_lock();	/* Prevent concurent netdev(s) creation/destroying */
 
 	/* Remove all child netdev(s), using batch removing */
@@ -989,8 +981,6 @@ void wwan_unregister_ops(struct device *parent)
 
 	rtnl_unlock();
 
-	module_put(owner);
-
 	wwandev->ops_ctxt = NULL;
 	wwan_remove_dev(wwandev);
 }
diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c
index c1e850b9c087..a8582a58a385 100644
--- a/drivers/net/wwan/wwan_hwsim.c
+++ b/drivers/net/wwan/wwan_hwsim.c
@@ -94,7 +94,6 @@ static void wwan_hwsim_netdev_setup(struct net_device *ndev)
 }
 
 static const struct wwan_ops wwan_hwsim_wwan_rtnl_ops = {
-	.owner = THIS_MODULE,
 	.priv_size = 0,			/* No private data */
 	.setup = wwan_hwsim_netdev_setup,
 };
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 34222230360c..e1981ea3a2fd 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -128,14 +128,12 @@ void *wwan_port_get_drvdata(struct wwan_port *port);
 
 /**
  * struct wwan_ops - WWAN device ops
- * @owner: module owner of the WWAN ops
  * @priv_size: size of private netdev data area
  * @setup: set up a new netdev
  * @newlink: register the new netdev
  * @dellink: remove the given netdev
  */
 struct wwan_ops {
-	struct module *owner;
 	unsigned int priv_size;
 	void (*setup)(struct net_device *dev);
 	int (*newlink)(void *ctxt, struct net_device *dev,

From ca374290aaade741a4781ae5f6e1ba7515e4e5fa Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:58 +0300
Subject: [PATCH 2589/3292] wwan: core: support default netdev creation

Most, if not each WWAN device driver will create a netdev for the
default data channel. Therefore, add an option for the WWAN netdev ops
registration function to create a default netdev for the WWAN device.

A WWAN device driver should pass a default data channel link id to the
ops registering function to request the creation of a default netdev, or
a special value WWAN_NO_DEFAULT_LINK to inform the WWAN core that the
default netdev should not be created.

For now, only wwan_hwsim utilize the default link creation option. Other
drivers will be reworked next.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
CC: M Chetan Kumar <m.chetan.kumar@intel.com>
CC: Intel Corporation <linuxwwan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/net.c                 |  3 +-
 drivers/net/wwan/iosm/iosm_ipc_wwan.c |  3 +-
 drivers/net/wwan/wwan_core.c          | 75 ++++++++++++++++++++++++++-
 drivers/net/wwan/wwan_hwsim.c         |  2 +-
 include/linux/wwan.h                  |  8 ++-
 5 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index ffd1c01b3f35..f36ca5c0dfe9 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -397,7 +397,8 @@ static int mhi_net_probe(struct mhi_device *mhi_dev,
 	struct net_device *ndev;
 	int err;
 
-	err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev);
+	err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev,
+				WWAN_NO_DEFAULT_LINK);
 	if (err)
 		return err;
 
diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
index bee9b278223d..adb2bd40a404 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -317,7 +317,8 @@ struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev)
 	ipc_wwan->dev = dev;
 	ipc_wwan->ipc_imem = ipc_imem;
 
-	if (wwan_register_ops(ipc_wwan->dev, &iosm_wwan_ops, ipc_wwan)) {
+	if (wwan_register_ops(ipc_wwan->dev, &iosm_wwan_ops, ipc_wwan,
+			      WWAN_NO_DEFAULT_LINK)) {
 		kfree(ipc_wwan);
 		return NULL;
 	}
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index b634a0ba1196..ef6ec641d877 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -903,17 +903,81 @@ static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = {
 	.policy = wwan_rtnl_policy,
 };
 
+static void wwan_create_default_link(struct wwan_device *wwandev,
+				     u32 def_link_id)
+{
+	struct nlattr *tb[IFLA_MAX + 1], *linkinfo[IFLA_INFO_MAX + 1];
+	struct nlattr *data[IFLA_WWAN_MAX + 1];
+	struct net_device *dev;
+	struct nlmsghdr *nlh;
+	struct sk_buff *msg;
+
+	/* Forge attributes required to create a WWAN netdev. We first
+	 * build a netlink message and then parse it. This looks
+	 * odd, but such approach is less error prone.
+	 */
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (WARN_ON(!msg))
+		return;
+	nlh = nlmsg_put(msg, 0, 0, RTM_NEWLINK, 0, 0);
+	if (WARN_ON(!nlh))
+		goto free_attrs;
+
+	if (nla_put_string(msg, IFLA_PARENT_DEV_NAME, dev_name(&wwandev->dev)))
+		goto free_attrs;
+	tb[IFLA_LINKINFO] = nla_nest_start(msg, IFLA_LINKINFO);
+	if (!tb[IFLA_LINKINFO])
+		goto free_attrs;
+	linkinfo[IFLA_INFO_DATA] = nla_nest_start(msg, IFLA_INFO_DATA);
+	if (!linkinfo[IFLA_INFO_DATA])
+		goto free_attrs;
+	if (nla_put_u32(msg, IFLA_WWAN_LINK_ID, def_link_id))
+		goto free_attrs;
+	nla_nest_end(msg, linkinfo[IFLA_INFO_DATA]);
+	nla_nest_end(msg, tb[IFLA_LINKINFO]);
+
+	nlmsg_end(msg, nlh);
+
+	/* The next three parsing calls can not fail */
+	nlmsg_parse_deprecated(nlh, 0, tb, IFLA_MAX, NULL, NULL);
+	nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO],
+				    NULL, NULL);
+	nla_parse_nested_deprecated(data, IFLA_WWAN_MAX,
+				    linkinfo[IFLA_INFO_DATA], NULL, NULL);
+
+	rtnl_lock();
+
+	dev = rtnl_create_link(&init_net, "wwan%d", NET_NAME_ENUM,
+			       &wwan_rtnl_link_ops, tb, NULL);
+	if (WARN_ON(IS_ERR(dev)))
+		goto unlock;
+
+	if (WARN_ON(wwan_rtnl_newlink(&init_net, dev, tb, data, NULL))) {
+		free_netdev(dev);
+		goto unlock;
+	}
+
+unlock:
+	rtnl_unlock();
+
+free_attrs:
+	nlmsg_free(msg);
+}
+
 /**
  * wwan_register_ops - register WWAN device ops
  * @parent: Device to use as parent and shared by all WWAN ports and
  *	created netdevs
  * @ops: operations to register
  * @ctxt: context to pass to operations
+ * @def_link_id: id of the default link that will be automatically created by
+ *	the WWAN core for the WWAN device. The default link will not be created
+ *	if the passed value is WWAN_NO_DEFAULT_LINK.
  *
  * Returns: 0 on success, a negative error code on failure
  */
 int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
-		      void *ctxt)
+		      void *ctxt, u32 def_link_id)
 {
 	struct wwan_device *wwandev;
 
@@ -932,6 +996,15 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
 	wwandev->ops = ops;
 	wwandev->ops_ctxt = ctxt;
 
+	/* NB: we do not abort ops registration in case of default link
+	 * creation failure. Link ops is the management interface, while the
+	 * default link creation is a service option. And we should not prevent
+	 * a user from manually creating a link latter if service option failed
+	 * now.
+	 */
+	if (def_link_id != WWAN_NO_DEFAULT_LINK)
+		wwan_create_default_link(wwandev, def_link_id);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(wwan_register_ops);
diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c
index a8582a58a385..5b62cf3b3c42 100644
--- a/drivers/net/wwan/wwan_hwsim.c
+++ b/drivers/net/wwan/wwan_hwsim.c
@@ -288,7 +288,7 @@ static struct wwan_hwsim_dev *wwan_hwsim_dev_new(void)
 
 	INIT_WORK(&dev->del_work, wwan_hwsim_dev_del_work);
 
-	err = wwan_register_ops(&dev->dev, &wwan_hwsim_wwan_rtnl_ops, dev);
+	err = wwan_register_ops(&dev->dev, &wwan_hwsim_wwan_rtnl_ops, dev, 1);
 	if (err)
 		goto err_unreg_dev;
 
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index e1981ea3a2fd..91590db70a12 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -126,6 +126,12 @@ void wwan_port_txon(struct wwan_port *port);
  */
 void *wwan_port_get_drvdata(struct wwan_port *port);
 
+/*
+ * Used to indicate that the WWAN core should not create a default network
+ * link.
+ */
+#define WWAN_NO_DEFAULT_LINK		U32_MAX
+
 /**
  * struct wwan_ops - WWAN device ops
  * @priv_size: size of private netdev data area
@@ -143,7 +149,7 @@ struct wwan_ops {
 };
 
 int wwan_register_ops(struct device *parent, const struct wwan_ops *ops,
-		      void *ctxt);
+		      void *ctxt, u32 def_link_id);
 
 void wwan_unregister_ops(struct device *parent);
 

From 83068395bbfcd96db74af75c6dc3a87a4f952220 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:50:59 +0300
Subject: [PATCH 2590/3292] net: iosm: create default link via WWAN core

Utilize the just introduced WWAN core feature to create a default netdev
for the default data (IP MUX) channel.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
CC: M Chetan Kumar <m.chetan.kumar@intel.com>
CC: Intel Corporation <linuxwwan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/iosm/iosm_ipc_imem_ops.h | 3 +++
 drivers/net/wwan/iosm/iosm_ipc_wwan.c     | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
index 84087cf33329..fd356dafbdd6 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
@@ -30,6 +30,9 @@
 #define IP_MUX_SESSION_START 1
 #define IP_MUX_SESSION_END 8
 
+/* Default IP MUX channel */
+#define IP_MUX_SESSION_DEFAULT	1
+
 /**
  * ipc_imem_sys_port_open - Open a port link to CP.
  * @ipc_imem:	Imem instance.
diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
index adb2bd40a404..d3cb28107836 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -317,8 +317,9 @@ struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev)
 	ipc_wwan->dev = dev;
 	ipc_wwan->ipc_imem = ipc_imem;
 
+	/* WWAN core will create a netdev for the default IP MUX channel */
 	if (wwan_register_ops(ipc_wwan->dev, &iosm_wwan_ops, ipc_wwan,
-			      WWAN_NO_DEFAULT_LINK)) {
+			      IP_MUX_SESSION_DEFAULT)) {
 		kfree(ipc_wwan);
 		return NULL;
 	}

From 699409240389c2994e5fa1cb7d7599129bc7cfdf Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Tue, 22 Jun 2021 01:51:00 +0300
Subject: [PATCH 2591/3292] wwan: core: add WWAN common private data for netdev

The WWAN core not only multiplex the netdev configuration data, but
process it too, and needs some space to store its private data
associated with the netdev. Add a structure to keep common WWAN core
data. The structure will be stored inside the netdev private data before
WWAN driver private data and have a field to make it easier to access
the driver data. Also add a helper function that simplifies drivers
access to their data.

At the moment we use the common WWAN private data to store the WWAN data
link (channel) id at the time the link is created, and report it back to
user using the .fill_info() RTNL callback. This should help the user to
be aware which network interface is bound to which WWAN device data
channel.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
CC: M Chetan Kumar <m.chetan.kumar@intel.com>
CC: Intel Corporation <linuxwwan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/net.c                 | 12 +++++------
 drivers/net/mhi/proto_mbim.c          |  5 +++--
 drivers/net/wwan/iosm/iosm_ipc_wwan.c | 12 +++++------
 drivers/net/wwan/wwan_core.c          | 29 ++++++++++++++++++++++++++-
 include/linux/wwan.h                  | 18 +++++++++++++++++
 5 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index f36ca5c0dfe9..e60e38c1f09d 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -32,7 +32,7 @@ struct mhi_device_info {
 
 static int mhi_ndo_open(struct net_device *ndev)
 {
-	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+	struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
 
 	/* Feed the rx buffer pool */
 	schedule_delayed_work(&mhi_netdev->rx_refill, 0);
@@ -47,7 +47,7 @@ static int mhi_ndo_open(struct net_device *ndev)
 
 static int mhi_ndo_stop(struct net_device *ndev)
 {
-	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+	struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
 
 	netif_stop_queue(ndev);
 	netif_carrier_off(ndev);
@@ -58,7 +58,7 @@ static int mhi_ndo_stop(struct net_device *ndev)
 
 static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
-	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+	struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
 	const struct mhi_net_proto *proto = mhi_netdev->proto;
 	struct mhi_device *mdev = mhi_netdev->mdev;
 	int err;
@@ -93,7 +93,7 @@ exit_drop:
 static void mhi_ndo_get_stats64(struct net_device *ndev,
 				struct rtnl_link_stats64 *stats)
 {
-	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+	struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
 	unsigned int start;
 
 	do {
@@ -322,7 +322,7 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
 	if (dev_get_drvdata(&mhi_dev->dev))
 		return -EBUSY;
 
-	mhi_netdev = netdev_priv(ndev);
+	mhi_netdev = wwan_netdev_drvpriv(ndev);
 
 	dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
 	mhi_netdev->ndev = ndev;
@@ -367,7 +367,7 @@ out_err:
 static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
 			    struct list_head *head)
 {
-	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+	struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
 	struct mhi_device *mhi_dev = ctxt;
 
 	if (head)
diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c
index fc72b3f6ec9e..bf1ad863237d 100644
--- a/drivers/net/mhi/proto_mbim.c
+++ b/drivers/net/mhi/proto_mbim.c
@@ -16,6 +16,7 @@
 #include <linux/ip.h>
 #include <linux/mii.h>
 #include <linux/netdevice.h>
+#include <linux/wwan.h>
 #include <linux/skbuff.h>
 #include <linux/usb.h>
 #include <linux/usb/cdc.h>
@@ -56,7 +57,7 @@ static void __mbim_errors_inc(struct mhi_net_dev *dev)
 
 static int mbim_rx_verify_nth16(struct sk_buff *skb)
 {
-	struct mhi_net_dev *dev = netdev_priv(skb->dev);
+	struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
 	struct mbim_context *ctx = dev->proto_data;
 	struct usb_cdc_ncm_nth16 *nth16;
 	int len;
@@ -102,7 +103,7 @@ static int mbim_rx_verify_nth16(struct sk_buff *skb)
 
 static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
 {
-	struct mhi_net_dev *dev = netdev_priv(skb->dev);
+	struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
 	int ret;
 
 	if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
index d3cb28107836..c999c64001f4 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -20,7 +20,7 @@
 #define IOSM_IF_ID_PAYLOAD 2
 
 /**
- * struct iosm_netdev_priv - netdev private data
+ * struct iosm_netdev_priv - netdev WWAN driver specific private data
  * @ipc_wwan:	Pointer to iosm_wwan struct
  * @netdev:	Pointer to network interface device structure
  * @if_id:	Interface id for device.
@@ -51,7 +51,7 @@ struct iosm_wwan {
 /* Bring-up the wwan net link */
 static int ipc_wwan_link_open(struct net_device *netdev)
 {
-	struct iosm_netdev_priv *priv = netdev_priv(netdev);
+	struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev);
 	struct iosm_wwan *ipc_wwan = priv->ipc_wwan;
 	int if_id = priv->if_id;
 	int ret;
@@ -88,7 +88,7 @@ out:
 /* Bring-down the wwan net link */
 static int ipc_wwan_link_stop(struct net_device *netdev)
 {
-	struct iosm_netdev_priv *priv = netdev_priv(netdev);
+	struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev);
 
 	netif_stop_queue(netdev);
 
@@ -105,7 +105,7 @@ static int ipc_wwan_link_stop(struct net_device *netdev)
 static int ipc_wwan_link_transmit(struct sk_buff *skb,
 				  struct net_device *netdev)
 {
-	struct iosm_netdev_priv *priv = netdev_priv(netdev);
+	struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev);
 	struct iosm_wwan *ipc_wwan = priv->ipc_wwan;
 	int if_id = priv->if_id;
 	int ret;
@@ -178,7 +178,7 @@ static int ipc_wwan_newlink(void *ctxt, struct net_device *dev,
 	    if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist))
 		return -EINVAL;
 
-	priv = netdev_priv(dev);
+	priv = wwan_netdev_drvpriv(dev);
 	priv->if_id = if_id;
 	priv->netdev = dev;
 	priv->ipc_wwan = ipc_wwan;
@@ -208,8 +208,8 @@ out_unlock:
 static void ipc_wwan_dellink(void *ctxt, struct net_device *dev,
 			     struct list_head *head)
 {
+	struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(dev);
 	struct iosm_wwan *ipc_wwan = ctxt;
-	struct iosm_netdev_priv *priv = netdev_priv(dev);
 	int if_id = priv->if_id;
 
 	if (WARN_ON(if_id < IP_MUX_SESSION_START ||
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index ef6ec641d877..3e16c318e705 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -815,6 +815,7 @@ static struct net_device *wwan_rtnl_alloc(struct nlattr *tb[],
 	const char *devname = nla_data(tb[IFLA_PARENT_DEV_NAME]);
 	struct wwan_device *wwandev = wwan_dev_get_by_name(devname);
 	struct net_device *dev;
+	unsigned int priv_size;
 
 	if (IS_ERR(wwandev))
 		return ERR_CAST(wwandev);
@@ -825,7 +826,8 @@ static struct net_device *wwan_rtnl_alloc(struct nlattr *tb[],
 		goto out;
 	}
 
-	dev = alloc_netdev_mqs(wwandev->ops->priv_size, ifname, name_assign_type,
+	priv_size = sizeof(struct wwan_netdev_priv) + wwandev->ops->priv_size;
+	dev = alloc_netdev_mqs(priv_size, ifname, name_assign_type,
 			       wwandev->ops->setup, num_tx_queues, num_rx_queues);
 
 	if (dev) {
@@ -845,6 +847,7 @@ static int wwan_rtnl_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct wwan_device *wwandev = wwan_dev_get_by_parent(dev->dev.parent);
 	u32 link_id = nla_get_u32(data[IFLA_WWAN_LINK_ID]);
+	struct wwan_netdev_priv *priv = netdev_priv(dev);
 	int ret;
 
 	if (IS_ERR(wwandev))
@@ -856,6 +859,7 @@ static int wwan_rtnl_newlink(struct net *src_net, struct net_device *dev,
 		goto out;
 	}
 
+	priv->link_id = link_id;
 	if (wwandev->ops->newlink)
 		ret = wwandev->ops->newlink(wwandev->ops_ctxt, dev,
 					    link_id, extack);
@@ -889,6 +893,27 @@ out:
 	put_device(&wwandev->dev);
 }
 
+static size_t wwan_rtnl_get_size(const struct net_device *dev)
+{
+	return
+		nla_total_size(4) +	/* IFLA_WWAN_LINK_ID */
+		0;
+}
+
+static int wwan_rtnl_fill_info(struct sk_buff *skb,
+			       const struct net_device *dev)
+{
+	struct wwan_netdev_priv *priv = netdev_priv(dev);
+
+	if (nla_put_u32(skb, IFLA_WWAN_LINK_ID, priv->link_id))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static const struct nla_policy wwan_rtnl_policy[IFLA_WWAN_MAX + 1] = {
 	[IFLA_WWAN_LINK_ID] = { .type = NLA_U32 },
 };
@@ -900,6 +925,8 @@ static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = {
 	.validate = wwan_rtnl_validate,
 	.newlink = wwan_rtnl_newlink,
 	.dellink = wwan_rtnl_dellink,
+	.get_size = wwan_rtnl_get_size,
+	.fill_info = wwan_rtnl_fill_info,
 	.policy = wwan_rtnl_policy,
 };
 
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
index 91590db70a12..9fac819f92e3 100644
--- a/include/linux/wwan.h
+++ b/include/linux/wwan.h
@@ -9,6 +9,7 @@
 #include <linux/poll.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/netdevice.h>
 
 /**
  * enum wwan_port_type - WWAN port types
@@ -126,6 +127,23 @@ void wwan_port_txon(struct wwan_port *port);
  */
 void *wwan_port_get_drvdata(struct wwan_port *port);
 
+/**
+ * struct wwan_netdev_priv - WWAN core network device private data
+ * @link_id: WWAN device data link id
+ * @drv_priv: driver private data area, size is determined in &wwan_ops
+ */
+struct wwan_netdev_priv {
+	u32 link_id;
+
+	/* must be last */
+	u8 drv_priv[] __aligned(sizeof(void *));
+};
+
+static inline void *wwan_netdev_drvpriv(struct net_device *dev)
+{
+	return ((struct wwan_netdev_priv *)netdev_priv(dev))->drv_priv;
+}
+
 /*
  * Used to indicate that the WWAN core should not create a default network
  * link.

From 0555b221528e9cb11f5766dcdee19c809187e42e Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 21 Jun 2021 16:25:20 -0500
Subject: [PATCH 2592/3292] smbdirect: missing rc checks while waiting for rdma
 events

There were two places where we weren't checking for error
(e.g. ERESTARTSYS) while waiting for rdma resolution.

Addresses-Coverity: 1462165 ("Unchecked return value")
Reviewed-by: Tom Talpey <tom@talpey.com>
Reviewed-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smbdirect.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 10dfe5006792..31ef64eb7fbb 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -572,8 +572,13 @@ static struct rdma_cm_id *smbd_create_id(
 		log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc);
 		goto out;
 	}
-	wait_for_completion_interruptible_timeout(
+	rc = wait_for_completion_interruptible_timeout(
 		&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
+	/* e.g. if interrupted returns -ERESTARTSYS */
+	if (rc < 0) {
+		log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc);
+		goto out;
+	}
 	rc = info->ri_rc;
 	if (rc) {
 		log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc);
@@ -586,8 +591,13 @@ static struct rdma_cm_id *smbd_create_id(
 		log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc);
 		goto out;
 	}
-	wait_for_completion_interruptible_timeout(
+	rc = wait_for_completion_interruptible_timeout(
 		&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
+	/* e.g. if interrupted returns -ERESTARTSYS */
+	if (rc < 0)  {
+		log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc);
+		goto out;
+	}
 	rc = info->ri_rc;
 	if (rc) {
 		log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc);

From 7dd5d437c258bbf4cc15b35229e5208b87b8b4e0 Mon Sep 17 00:00:00 2001
From: Bui Quang Minh <minhquangbui99@gmail.com>
Date: Sun, 13 Jun 2021 21:34:39 +0700
Subject: [PATCH 2593/3292] bpf: Fix integer overflow in argument calculation
 for bpf_map_area_alloc

In 32-bit architecture, the result of sizeof() is a 32-bit integer so
the expression becomes the multiplication between 2 32-bit integer which
can potentially leads to integer overflow. As a result,
bpf_map_area_alloc() allocates less memory than needed.

Fix this by casting 1 operand to u64.

Fixes: 0d2c4f964050 ("bpf: Eliminate rlimit-based memory accounting for sockmap and sockhash maps")
Fixes: 99c51064fb06 ("devmap: Use bpf_map_area_alloc() for allocating hash buckets")
Fixes: 546ac1ffb70d ("bpf: add devmap, a map for storing net device references")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210613143440.71975-1-minhquangbui99@gmail.com
---
 kernel/bpf/devmap.c | 4 ++--
 net/core/sock_map.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index aa516472ce46..3b45c23286c0 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -92,7 +92,7 @@ static struct hlist_head *dev_map_create_hash(unsigned int entries,
 	int i;
 	struct hlist_head *hash;
 
-	hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
+	hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node);
 	if (hash != NULL)
 		for (i = 0; i < entries; i++)
 			INIT_HLIST_HEAD(&hash[i]);
@@ -143,7 +143,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 
 		spin_lock_init(&dtab->index_lock);
 	} else {
-		dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
+		dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries *
 						      sizeof(struct bpf_dtab_netdev *),
 						      dtab->map.numa_node);
 		if (!dtab->netdev_map)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 6f1b82b8ad49..60decd6420ca 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -48,7 +48,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 	bpf_map_init_from_attr(&stab->map, attr);
 	raw_spin_lock_init(&stab->lock);
 
-	stab->sks = bpf_map_area_alloc(stab->map.max_entries *
+	stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
 				       sizeof(struct sock *),
 				       stab->map.numa_node);
 	if (!stab->sks) {

From ee8e7622e09af0675f4d7c1dec9702791591d02b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 21 Jun 2021 14:54:19 -0700
Subject: [PATCH 2594/3292] octeontx2-af: Avoid field-overflowing memcpy()

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

To avoid having memcpy() think a u64 "prof" is being written beyond,
adjust the prof member type by adding struct nix_bandprof_s to the union
to match the other structs. This silences the following future warning:

In file included from ./include/linux/string.h:253,
                 from ./include/linux/bitmap.h:10,
                 from ./include/linux/cpumask.h:12,
                 from ./arch/x86/include/asm/cpumask.h:5,
                 from ./arch/x86/include/asm/msr.h:11,
                 from ./arch/x86/include/asm/processor.h:22,
                 from ./arch/x86/include/asm/timex.h:5,
                 from ./include/linux/timex.h:65,
                 from ./include/linux/time32.h:13,
                 from ./include/linux/time.h:60,
                 from ./include/linux/stat.h:19,
                 from ./include/linux/module.h:13,
                 from drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c:11:
In function '__fortify_memcpy_chk',
    inlined from '__fortify_memcpy' at ./include/linux/fortify-string.h:310:2,
    inlined from 'rvu_nix_blk_aq_enq_inst' at drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c:910:5:
./include/linux/fortify-string.h:268:4: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); please use struct_group() [-Wattribute-warning]
  268 |    __write_overflow_field();
      |    ^~~~~~~~~~~~~~~~~~~~~~~~

drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c:
...
                        else if (req->ctype == NIX_AQ_CTYPE_BANDPROF)
                                memcpy(&rsp->prof, ctx,
                                       sizeof(struct nix_bandprof_s));
...

Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Subbaraya Sundeep<sbhatta@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 7d7dfa8d8a3f..770d86262838 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -746,7 +746,7 @@ struct nix_aq_enq_rsp {
 		struct nix_cq_ctx_s cq;
 		struct nix_rsse_s   rss;
 		struct nix_rx_mce_s mce;
-		u64 prof;
+		struct nix_bandprof_s prof;
 	};
 };
 

From 64a81b24487f0d2fba0f033029eec2abc7d82cee Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 21 Jun 2021 15:10:55 -0700
Subject: [PATCH 2595/3292] net: dsa: b53: Create default VLAN entry explicitly

In case CONFIG_VLAN_8021Q is not set, there will be no call down to the
b53 driver to ensure that the default PVID VLAN entry will be configured
with the appropriate untagged attribute towards the CPU port. We were
implicitly relying on dsa_slave_vlan_rx_add_vid() to do that for us,
instead make it explicit.

Reported-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 6e199454e41d..b23e3488695b 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -728,6 +728,13 @@ static u16 b53_default_pvid(struct b53_device *dev)
 		return 0;
 }
 
+static bool b53_vlan_port_needs_forced_tagged(struct dsa_switch *ds, int port)
+{
+	struct b53_device *dev = ds->priv;
+
+	return dev->tag_protocol == DSA_TAG_PROTO_NONE && dsa_is_cpu_port(ds, port);
+}
+
 int b53_configure_vlan(struct dsa_switch *ds)
 {
 	struct b53_device *dev = ds->priv;
@@ -748,9 +755,20 @@ int b53_configure_vlan(struct dsa_switch *ds)
 
 	b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering);
 
-	b53_for_each_port(dev, i)
+	/* Create an untagged VLAN entry for the default PVID in case
+	 * CONFIG_VLAN_8021Q is disabled and there are no calls to
+	 * dsa_slave_vlan_rx_add_vid() to create the default VLAN
+	 * entry. Do this only when the tagging protocol is not
+	 * DSA_TAG_PROTO_NONE
+	 */
+	b53_for_each_port(dev, i) {
+		v = &dev->vlans[def_vid];
+		v->members |= BIT(i);
+		if (!b53_vlan_port_needs_forced_tagged(ds, i))
+			v->untag = v->members;
 		b53_write16(dev, B53_VLAN_PAGE,
 			    B53_VLAN_PORT_DEF_TAG(i), def_vid);
+	}
 
 	/* Upon initial call we have not set-up any VLANs, but upon
 	 * system resume, we need to restore all VLAN entries.
@@ -1460,13 +1478,6 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static bool b53_vlan_port_needs_forced_tagged(struct dsa_switch *ds, int port)
-{
-	struct b53_device *dev = ds->priv;
-
-	return dev->tag_protocol == DSA_TAG_PROTO_NONE && dsa_is_cpu_port(ds, port);
-}
-
 int b53_vlan_add(struct dsa_switch *ds, int port,
 		 const struct switchdev_obj_port_vlan *vlan,
 		 struct netlink_ext_ack *extack)

From f2fcffe392c1fd8324f131bf33d7d350eff44bb6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 21 Jun 2021 15:21:12 -0700
Subject: [PATCH 2596/3292] hv_netvsc: Avoid field-overflowing memcpy()

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally writing across neighboring fields.

Add flexible array to represent start of buf_info, improving readability
and avoid future warning where memcpy() thinks it is writing past the
end of the structure.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h   | 1 +
 drivers/net/hyperv/rndis_filter.c | 6 ++----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 442c520ab8f3..9e5eee44f7d3 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -1163,6 +1163,7 @@ struct rndis_set_request {
 	u32 info_buflen;
 	u32 info_buf_offset;
 	u32 dev_vc_handle;
+	u8  info_buf[];
 };
 
 /* Response to NdisSetRequest */
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index c0e89e107d57..033ed6ed78c5 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1051,10 +1051,8 @@ static int rndis_filter_set_packet_filter(struct rndis_device *dev,
 	set = &request->request_msg.msg.set_req;
 	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
 	set->info_buflen = sizeof(u32);
-	set->info_buf_offset = sizeof(struct rndis_set_request);
-
-	memcpy((void *)(unsigned long)set + sizeof(struct rndis_set_request),
-	       &new_filter, sizeof(u32));
+	set->info_buf_offset = offsetof(typeof(*set), info_buf);
+	memcpy(set->info_buf, &new_filter, sizeof(u32));
 
 	ret = rndis_filter_send_request(dev, request);
 	if (ret == 0) {

From 490274b47468793e3e157c2df6b2da0e646cc4a9 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 21 Jun 2021 17:33:08 -0700
Subject: [PATCH 2597/3292] mptcp: avoid race on msk state changes

The msk socket state is currently updated in a few spots without
owning the msk socket lock itself.

Some of such operations are safe, as they happens before exposing
the msk socket to user-space and can't race with other changes.

A couple of them, at connect time, can actually race with close()
or shutdown(), leaving breaking the socket state machine.

This change addresses the issue moving such update under the msk
socket lock with the usual:

<acquire spinlock>
<check sk lock onwers>
<ev defer to release_cb>

scheme.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/56
Fixes: 8fd738049ac3 ("mptcp: fallback in case of simultaneous connect")
Fixes: c3c123d16c0e ("net: mptcp: don't hang in mptcp_sendmsg() after TCP fallback")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c |  5 +++++
 net/mptcp/protocol.h |  2 ++
 net/mptcp/subflow.c  | 30 ++++++++++++++++++++++--------
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 632350018fb6..8ead550df8b1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2946,6 +2946,11 @@ static void mptcp_release_cb(struct sock *sk)
 		spin_lock_bh(&sk->sk_lock.slock);
 	}
 
+	/* be sure to set the current sk state before tacking actions
+	 * depending on sk_state
+	 */
+	if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags))
+		__mptcp_set_connected(sk);
 	if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
 		__mptcp_clean_una_wakeup(sk);
 	if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 5d7c44028e47..7b634568f49c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -109,6 +109,7 @@
 #define MPTCP_ERROR_REPORT	8
 #define MPTCP_RETRANSMIT	9
 #define MPTCP_WORK_SYNC_SETSOCKOPT 10
+#define MPTCP_CONNECTED		11
 
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
@@ -579,6 +580,7 @@ void mptcp_get_options(const struct sk_buff *skb,
 		       struct mptcp_options_received *mp_opt);
 
 void mptcp_finish_connect(struct sock *sk);
+void __mptcp_set_connected(struct sock *sk);
 static inline bool mptcp_is_fully_established(struct sock *sk)
 {
 	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 037fba41e170..9f934603bfe8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -371,6 +371,24 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
 	return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
 }
 
+void __mptcp_set_connected(struct sock *sk)
+{
+	if (sk->sk_state == TCP_SYN_SENT) {
+		inet_sk_state_store(sk, TCP_ESTABLISHED);
+		sk->sk_state_change(sk);
+	}
+}
+
+static void mptcp_set_connected(struct sock *sk)
+{
+	mptcp_data_lock(sk);
+	if (!sock_owned_by_user(sk))
+		__mptcp_set_connected(sk);
+	else
+		set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
+	mptcp_data_unlock(sk);
+}
+
 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -379,10 +397,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 
 	subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
 
-	if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
-		inet_sk_state_store(parent, TCP_ESTABLISHED);
-		parent->sk_state_change(parent);
-	}
 
 	/* be sure no special action on any packet other than syn-ack */
 	if (subflow->conn_finished)
@@ -411,6 +425,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 			 subflow->remote_key);
 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
 		mptcp_finish_connect(sk);
+		mptcp_set_connected(parent);
 	} else if (subflow->request_join) {
 		u8 hmac[SHA256_DIGEST_SIZE];
 
@@ -451,6 +466,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 	} else if (mptcp_check_fallback(sk)) {
 fallback:
 		mptcp_rcv_space_init(mptcp_sk(parent), sk);
+		mptcp_set_connected(parent);
 	}
 	return;
 
@@ -558,6 +574,7 @@ static void mptcp_sock_destruct(struct sock *sk)
 
 static void mptcp_force_close(struct sock *sk)
 {
+	/* the msk is not yet exposed to user-space */
 	inet_sk_state_store(sk, TCP_CLOSE);
 	sk_common_release(sk);
 }
@@ -1474,10 +1491,7 @@ static void subflow_state_change(struct sock *sk)
 		mptcp_rcv_space_init(mptcp_sk(parent), sk);
 		pr_fallback(mptcp_sk(parent));
 		subflow->conn_finished = 1;
-		if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
-			inet_sk_state_store(parent, TCP_ESTABLISHED);
-			parent->sk_state_change(parent);
-		}
+		mptcp_set_connected(parent);
 	}
 
 	/* as recvmsg() does not acquire the subflow socket for ssk selection

From 597dbae77ee5a2347b1b800c25c89a9181dd8a57 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 21 Jun 2021 17:33:09 -0700
Subject: [PATCH 2598/3292] mptcp: drop duplicate mptcp_setsockopt()
 declaration

commit 7896248983ef ("mptcp: add skeleton to sync msk socket
options to subflows") introduced a duplicate declaration of
mptcp_setsockopt(), just drop it.

Reported-by: Florian Westphal <fw@strlen.de>
Fixes: 7896248983ef ("mptcp: add skeleton to sync msk socket options to subflows")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7b634568f49c..78ac28902f55 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -762,9 +762,6 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
 
-int mptcp_setsockopt(struct sock *sk, int level, int optname,
-		     sockptr_t optval, unsigned int optlen);
-
 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
 void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
 

From a6e3f2985a80ef6a45a17d2d9d9151f17ea3ce07 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 21 Jun 2021 18:52:54 -0700
Subject: [PATCH 2599/3292] ip6_tunnel: fix GRE6 segmentation

Commit 6c11fbf97e69 ("ip6_tunnel: add MPLS transmit support")
moved assiging inner_ipproto down from ipxip6_tnl_xmit() to
its callee ip6_tnl_xmit(). The latter is also used by GRE.

Since commit 38720352412a ("gre: Use inner_proto to obtain inner
header protocol") GRE had been depending on skb->inner_protocol
during segmentation. It sets it in gre_build_header() and reads
it in gre_gso_segment(). Changes to ip6_tnl_xmit() overwrite
the protocol, resulting in GSO skbs getting dropped.

Note that inner_protocol is a union with inner_ipproto,
GRE uses the former while the change switched it to the latter
(always setting it to just IPPROTO_GRE).

Restore the original location of skb_set_inner_ipproto(),
it is unclear why it was moved in the first place.

Fixes: 6c11fbf97e69 ("ip6_tunnel: add MPLS transmit support")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 288bafded998..28ca70af014a 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1239,8 +1239,6 @@ route_lookup:
 	if (max_headroom > dev->needed_headroom)
 		dev->needed_headroom = max_headroom;
 
-	skb_set_inner_ipproto(skb, proto);
-
 	err = ip6_tnl_encap(skb, t, &proto, fl6);
 	if (err)
 		return err;
@@ -1377,6 +1375,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
+	skb_set_inner_ipproto(skb, protocol);
+
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
 			   protocol);
 	if (err != 0) {

From 3c9ef511b9fa128a4c62e3aa0aac4c6b190f0d55 Mon Sep 17 00:00:00 2001
From: Di Zhu <zhudi21@huawei.com>
Date: Tue, 22 Jun 2021 11:09:29 +0800
Subject: [PATCH 2600/3292] bonding: avoid adding slave device with IFF_MASTER
 flag

The following steps will definitely cause the kernel to crash:
	ip link add vrf1 type vrf table 1
	modprobe bonding.ko max_bonds=1
	echo "+vrf1" >/sys/class/net/bond0/bonding/slaves
	rmmod bonding

The root cause is that: When the VRF is added to the slave device,
it will fail, and some cleaning work will be done. because VRF device
has IFF_MASTER flag, cleanup process  will not clear the IFF_BONDING flag.
Then, when we unload the bonding module, unregister_netdevice_notifier()
will treat the VRF device as a bond master device and treat netdev_priv()
as struct bonding{} which actually is struct net_vrf{}.

By analyzing the processing logic of bond_enslave(), it seems that
it is not allowed to add the slave device with the IFF_MASTER flag, so
we need to add a code check for this situation.

Signed-off-by: Di Zhu <zhudi21@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c5a646d06102..16840c9bc00d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1601,6 +1601,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 	int link_reporting;
 	int res = 0, i;
 
+	if (slave_dev->flags & IFF_MASTER) {
+		netdev_err(bond_dev,
+			   "Error: Device with IFF_MASTER cannot be enslaved\n");
+		return -EPERM;
+	}
+
 	if (!bond->params.use_carrier &&
 	    slave_dev->ethtool_ops->get_link == NULL &&
 	    slave_ops->ndo_do_ioctl == NULL) {

From c69f114d09891adfa3e301a35d9e872b8b7b5a50 Mon Sep 17 00:00:00 2001
From: Miao Wang <shankerwangmiao@gmail.com>
Date: Tue, 22 Jun 2021 12:24:50 +0800
Subject: [PATCH 2601/3292] net/ipv4: swap flow ports when validating source

When doing source address validation, the flowi4 struct used for
fib_lookup should be in the reverse direction to the given skb.
fl4_dport and fl4_sport returned by fib4_rules_early_flow_dissect
should thus be swapped.

Fixes: 5a847a6e1477 ("net/ipv4: Initialize proto and ports in flow struct")
Signed-off-by: Miao Wang <shankerwangmiao@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 84bb707bd88d..647bceab56c2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -371,6 +371,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 		fl4.flowi4_proto = 0;
 		fl4.fl4_sport = 0;
 		fl4.fl4_dport = 0;
+	} else {
+		swap(fl4.fl4_sport, fl4.fl4_dport);
 	}
 
 	if (fib_lookup(net, &fl4, &res, 0))

From ddeacc4f6494e07cbb6f033627926623f3e7a9d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Tue, 22 Jun 2021 07:24:15 +0200
Subject: [PATCH 2602/3292] net: broadcom: bcm4908_enet: reset DMA rings sw
 indexes properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resetting software indexes in bcm4908_dma_alloc_buf_descs() is not
enough as it's called during device probe only. Driver resets DMA on
every .ndo_open callback and it's required to reset indexes then.

This fixes inconsistent rings state and stalled traffic after interface
down & up sequence.

Fixes: 4feffeadbcb2 ("net: broadcom: bcm4908enet: add BCM4908 controller driver")
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcm4908_enet.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 60d908507f51..02a569500234 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -174,9 +174,6 @@ static int bcm4908_dma_alloc_buf_descs(struct bcm4908_enet *enet,
 	if (!ring->slots)
 		goto err_free_buf_descs;
 
-	ring->read_idx = 0;
-	ring->write_idx = 0;
-
 	return 0;
 
 err_free_buf_descs:
@@ -304,6 +301,9 @@ static void bcm4908_enet_dma_ring_init(struct bcm4908_enet *enet,
 
 	enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR,
 		   (uint32_t)ring->dma_addr);
+
+	ring->read_idx = 0;
+	ring->write_idx = 0;
 }
 
 static void bcm4908_enet_dma_uninit(struct bcm4908_enet *enet)

From 98534fce52efc76d961f5fe4188a97a5db93c7dd Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Mon, 21 Jun 2021 23:05:19 -0700
Subject: [PATCH 2603/3292] bridge: cfm: remove redundant return

Return statements are not needed in Void function.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_cfm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
index 001064f7583d..a3c755d0a09d 100644
--- a/net/bridge/br_cfm.c
+++ b/net/bridge/br_cfm.c
@@ -142,7 +142,7 @@ static void br_cfm_notify(int event, const struct net_bridge_port *port)
 {
 	u32 filter = RTEXT_FILTER_CFM_STATUS;
 
-	return br_info_notify(event, port->br, NULL, filter);
+	br_info_notify(event, port->br, NULL, filter);
 }
 
 static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep)

From 78c57f22e3c87ab0a2844d7c9a120eba51ae34f4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 22 Jun 2021 09:50:46 +0300
Subject: [PATCH 2604/3292] ethtool: Use correct command name in title

The command is called 'ETHTOOL_MSG_MODULE_EEPROM_GET', not
'ETHTOOL_MSG_MODULE_EEPROM'.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 25131df3c2bd..c3600f9c8988 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1363,8 +1363,8 @@ in an implementation specific way.
 ``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP
 module parameters. This does not mean autonegotiation.
 
-MODULE_EEPROM
-=============
+MODULE_EEPROM_GET
+=================
 
 Fetch module EEPROM data dump.
 This interface is designed to allow dumps of at most 1/2 page at once. This

From 913d026fbfaf114ff87afcc77fa4e9309f87f114 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 22 Jun 2021 09:50:47 +0300
Subject: [PATCH 2605/3292] ethtool: Document correct attribute type

'ETHTOOL_A_MODULE_EEPROM_DATA' is a binary attribute, not a nested one.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 2 +-
 include/uapi/linux/ethtool_netlink.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index c3600f9c8988..8ae644f800f0 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1388,7 +1388,7 @@ Kernel response contents:
  +---------------------------------------------+--------+---------------------+
  | ``ETHTOOL_A_MODULE_EEPROM_HEADER``          | nested | reply header        |
  +---------------------------------------------+--------+---------------------+
- | ``ETHTOOL_A_MODULE_EEPROM_DATA``            | nested | array of bytes from |
+ | ``ETHTOOL_A_MODULE_EEPROM_DATA``            | binary | array of bytes from |
  |                                             |        | module EEPROM       |
  +---------------------------------------------+--------+---------------------+
 
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 825cfda1c5d5..c7135c9c37a5 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -675,7 +675,7 @@ enum {
 	ETHTOOL_A_MODULE_EEPROM_PAGE,			/* u8 */
 	ETHTOOL_A_MODULE_EEPROM_BANK,			/* u8 */
 	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,		/* u8 */
-	ETHTOOL_A_MODULE_EEPROM_DATA,			/* nested */
+	ETHTOOL_A_MODULE_EEPROM_DATA,			/* binary */
 
 	__ETHTOOL_A_MODULE_EEPROM_CNT,
 	ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)

From f5fe211d13af52077bb66e89a5410fa75f691fe8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 22 Jun 2021 09:50:48 +0300
Subject: [PATCH 2606/3292] ethtool: Decrease size of module EEPROM get policy
 array

The 'ETHTOOL_A_MODULE_EEPROM_DATA' attribute is not part of the get
request.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 90b10966b16b..3e25a47fd482 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -380,7 +380,7 @@ extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_T
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
 extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
 extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
-extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1];
+extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
 extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);

From 37a025e83902903df658489665499a548a53423b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 22 Jun 2021 09:50:49 +0300
Subject: [PATCH 2607/3292] ethtool: Document behavior when module EEPROM bank
 attribute is omitted

The kernel assumes bank 0 when 'ETHTOOL_MSG_MODULE_EEPROM_GET' is sent
without 'ETHTOOL_A_MODULE_EEPROM_BANK'.

Document it as part of the interface documentation.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 8ae644f800f0..6ea91e41593f 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1383,6 +1383,8 @@ Request contents:
   ``ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS``  u8      page I2C address
   =======================================  ======  ==========================
 
+If ``ETHTOOL_A_MODULE_EEPROM_BANK`` is not specified, bank 0 is assumed.
+
 Kernel response contents:
 
  +---------------------------------------------+--------+---------------------+

From b8c48be23c2d03834fe01c3ea757d9df8b97013d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 22 Jun 2021 09:50:50 +0300
Subject: [PATCH 2608/3292] ethtool: Use kernel data types for internal EEPROM
 struct

The struct is not visible to user space and therefore should not use the
user visible data types.

Instead, use internal data types like other structures in the file.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e030f7510cd3..29dbb603bc91 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -401,12 +401,12 @@ struct ethtool_rmon_stats {
  * required information to the driver.
  */
 struct ethtool_module_eeprom {
-	__u32	offset;
-	__u32	length;
-	__u8	page;
-	__u8	bank;
-	__u8	i2c_address;
-	__u8	*data;
+	u32	offset;
+	u32	length;
+	u8	page;
+	u8	bank;
+	u8	i2c_address;
+	u8	*data;
 };
 
 /**

From 0dc7dd02ba7ab5f623f5e3a36443ec441364285a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 22 Jun 2021 09:50:51 +0300
Subject: [PATCH 2609/3292] ethtool: Validate module EEPROM length as part of
 policy

Validate the number of bytes to read from the module EEPROM as part of
the netlink policy and remove the corresponding check from the code.

This also makes it possible to query the length range from user space:

 $ genl ctrl policy name ethtool
 ...
 ID: 0x14  policy[32]:attr[3]: type=U32 range:[1,128]
 ...

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/eeprom.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 5d38e90895ac..1e75d9c1b154 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -159,9 +159,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr *
 	request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]);
 	request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]);
 
-	if (!request->length)
-		return -EINVAL;
-
 	/* The following set of conditions limit the API to only dump 1/2
 	 * EEPROM page without crossing low page boundary located at offset 128.
 	 * This means user may only request dumps of length limited to 128 from
@@ -237,7 +234,8 @@ const struct ethnl_request_ops ethnl_module_eeprom_request_ops = {
 const struct nla_policy ethnl_module_eeprom_get_policy[] = {
 	[ETHTOOL_A_MODULE_EEPROM_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy),
 	[ETHTOOL_A_MODULE_EEPROM_OFFSET]	= { .type = NLA_U32 },
-	[ETHTOOL_A_MODULE_EEPROM_LENGTH]	= { .type = NLA_U32 },
+	[ETHTOOL_A_MODULE_EEPROM_LENGTH]	=
+		NLA_POLICY_RANGE(NLA_U32, 1, ETH_MODULE_EEPROM_PAGE_LEN),
 	[ETHTOOL_A_MODULE_EEPROM_PAGE]		= { .type = NLA_U8 },
 	[ETHTOOL_A_MODULE_EEPROM_BANK]		= { .type = NLA_U8 },
 	[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]	=

From 88f9a87afeeec5dfdda3651f3db96d0006172d91 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 22 Jun 2021 09:50:52 +0300
Subject: [PATCH 2610/3292] ethtool: Validate module EEPROM offset as part of
 policy

Validate the offset to read from module EEPROM as part of the netlink
policy and remove the corresponding check from the code.

This also makes it possible to query the offset range from user space:

 $ genl ctrl policy name ethtool
 ...
 ID: 0x14  policy[32]:attr[2]: type=U32 range:[0,255]
 ...

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/eeprom.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 1e75d9c1b154..7e6b37a54add 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -177,10 +177,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr *
 		NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
 				    "reading cross half page boundary is illegal");
 		return -EINVAL;
-	} else if (request->offset >= ETH_MODULE_EEPROM_PAGE_LEN * 2) {
-		NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_OFFSET],
-				    "offset is out of bounds");
-		return -EINVAL;
 	} else if (request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN * 2) {
 		NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
 				    "reading cross page boundary is illegal");
@@ -233,7 +229,8 @@ const struct ethnl_request_ops ethnl_module_eeprom_request_ops = {
 
 const struct nla_policy ethnl_module_eeprom_get_policy[] = {
 	[ETHTOOL_A_MODULE_EEPROM_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy),
-	[ETHTOOL_A_MODULE_EEPROM_OFFSET]	= { .type = NLA_U32 },
+	[ETHTOOL_A_MODULE_EEPROM_OFFSET]	=
+		NLA_POLICY_MAX(NLA_U32, ETH_MODULE_EEPROM_PAGE_LEN * 2 - 1),
 	[ETHTOOL_A_MODULE_EEPROM_LENGTH]	=
 		NLA_POLICY_RANGE(NLA_U32, 1, ETH_MODULE_EEPROM_PAGE_LEN),
 	[ETHTOOL_A_MODULE_EEPROM_PAGE]		= { .type = NLA_U8 },

From b0e03950dd71315204c24a3dffb2d9fc477e82de Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 22 Jun 2021 14:51:43 +0300
Subject: [PATCH 2611/3292] stmmac: dwmac-loongson: fix uninitialized variable
 in loongson_dwmac_probe()

The "mdio" variable is never set to false.  Also it should be a bool
type instead of int.

Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
index 8cd4e2e8ec40..e108b0d2bd28 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
@@ -49,7 +49,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id
 {
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_resources res;
-	int ret, i, mdio;
+	bool mdio = false;
+	int ret, i;
 	struct device_node *np;
 
 	np = dev_of_node(&pdev->dev);

From c4ab7b56be0f6f18f025ddc8d469cce54f82415a Mon Sep 17 00:00:00 2001
From: Aaron Conole <aconole@redhat.com>
Date: Tue, 22 Jun 2021 10:02:33 -0400
Subject: [PATCH 2612/3292] openvswitch: add trace points

This makes openvswitch module use the event tracing framework
to log the upcall interface and action execution pipeline.  When
using openvswitch as the packet forwarding engine, some types of
debugging are made possible simply by using the ovs-vswitchd's
ofproto/trace command.  However, such a command has some
limitations:

  1. When trying to trace packets that go through the CT action,
     the state of the packet can't be determined, and probably
     would be potentially wrong.

  2. Deducing problem packets can sometimes be difficult as well
     even if many of the flows are known

  3. It's possible to use the openvswitch module even without
     the ovs-vswitchd (although, not common use).

Introduce the event tracing points here to make it possible for
working through these problems in kernel space.  The style is
copied from the mac80211 driver-trace / trace code for
consistency - this creates some checkpatch splats, but the
official 'guide' for adding tracepoints, as well as the existing
examples all add the same splats so it seems acceptable.

Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/Makefile            |   3 +
 net/openvswitch/actions.c           |   4 +
 net/openvswitch/datapath.c          |   4 +
 net/openvswitch/openvswitch_trace.c |  10 ++
 net/openvswitch/openvswitch_trace.h | 158 ++++++++++++++++++++++++++++
 5 files changed, 179 insertions(+)
 create mode 100644 net/openvswitch/openvswitch_trace.c
 create mode 100644 net/openvswitch/openvswitch_trace.h

diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 41109c326f3a..28982630bef3 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -13,6 +13,7 @@ openvswitch-y := \
 	flow_netlink.o \
 	flow_table.o \
 	meter.o \
+	openvswitch_trace.o \
 	vport.o \
 	vport-internal_dev.o \
 	vport-netdev.o
@@ -24,3 +25,5 @@ endif
 obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
 obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
 obj-$(CONFIG_OPENVSWITCH_GRE)	+= vport-gre.o
+
+CFLAGS_openvswitch_trace.o = -I$(src)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 77d924ab8cdb..ef15d9eb4774 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -30,6 +30,7 @@
 #include "conntrack.h"
 #include "vport.h"
 #include "flow_netlink.h"
+#include "openvswitch_trace.h"
 
 struct deferred_action {
 	struct sk_buff *skb;
@@ -1242,6 +1243,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 	     a = nla_next(a, &rem)) {
 		int err = 0;
 
+		if (trace_ovs_do_execute_action_enabled())
+			trace_ovs_do_execute_action(dp, skb, key, a, rem);
+
 		switch (nla_type(a)) {
 		case OVS_ACTION_ATTR_OUTPUT: {
 			int port = nla_get_u32(a);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9d6ef6cb9b26..bc164b35e67d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -43,6 +43,7 @@
 #include "flow_table.h"
 #include "flow_netlink.h"
 #include "meter.h"
+#include "openvswitch_trace.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
@@ -275,6 +276,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 	struct dp_stats_percpu *stats;
 	int err;
 
+	if (trace_ovs_dp_upcall_enabled())
+		trace_ovs_dp_upcall(dp, skb, key, upcall_info);
+
 	if (upcall_info->portid == 0) {
 		err = -ENOTCONN;
 		goto err;
diff --git a/net/openvswitch/openvswitch_trace.c b/net/openvswitch/openvswitch_trace.c
new file mode 100644
index 000000000000..62c5f7d6f023
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/* bug in tracepoint.h, it should include this */
+#include <linux/module.h>
+
+/* sparse isn't too happy with all macros... */
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "openvswitch_trace.h"
+
+#endif
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
new file mode 100644
index 000000000000..3eb35d9eb700
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM openvswitch
+
+#if !defined(_TRACE_OPENVSWITCH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OPENVSWITCH_H
+
+#include <linux/tracepoint.h>
+
+#include "datapath.h"
+
+TRACE_EVENT(ovs_do_execute_action,
+
+	TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+		 struct sw_flow_key *key, const struct nlattr *a, int rem),
+
+	TP_ARGS(dp, skb, key, a, rem),
+
+	TP_STRUCT__entry(
+		__field(	void *,		dpaddr			)
+		__string(	dp_name,	ovs_dp_name(dp)		)
+		__string(	dev_name,	skb->dev->name		)
+		__field(	void *,		skbaddr			)
+		__field(	unsigned int,	len			)
+		__field(	unsigned int,	data_len		)
+		__field(	unsigned int,	truesize		)
+		__field(	u8,		nr_frags		)
+		__field(	u16,		gso_size		)
+		__field(	u16,		gso_type		)
+		__field(	u32,		ovs_flow_hash		)
+		__field(	u32,		recirc_id		)
+		__field(	void *,		keyaddr			)
+		__field(	u16,		key_eth_type		)
+		__field(	u8,		key_ct_state		)
+		__field(	u8,		key_ct_orig_proto	)
+		__field(	u16,		key_ct_zone		)
+		__field(	unsigned int,	flow_key_valid		)
+		__field(	u8,		action_type		)
+		__field(	unsigned int,	action_len		)
+		__field(	void *,		action_data		)
+		__field(	u8,		is_last			)
+	),
+
+	TP_fast_assign(
+		__entry->dpaddr = dp;
+		__assign_str(dp_name, ovs_dp_name(dp));
+		__assign_str(dev_name, skb->dev->name);
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__entry->data_len = skb->data_len;
+		__entry->truesize = skb->truesize;
+		__entry->nr_frags = skb_shinfo(skb)->nr_frags;
+		__entry->gso_size = skb_shinfo(skb)->gso_size;
+		__entry->gso_type = skb_shinfo(skb)->gso_type;
+		__entry->ovs_flow_hash = key->ovs_flow_hash;
+		__entry->recirc_id = key->recirc_id;
+		__entry->keyaddr = key;
+		__entry->key_eth_type = key->eth.type;
+		__entry->key_ct_state = key->ct_state;
+		__entry->key_ct_orig_proto = key->ct_orig_proto;
+		__entry->key_ct_zone = key->ct_zone;
+		__entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID);
+		__entry->action_type = nla_type(a);
+		__entry->action_len = nla_len(a);
+		__entry->action_data = nla_data(a);
+		__entry->is_last = nla_is_last(a, rem);
+	),
+
+	TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_Zone=%04x flow_key_valid=%d action_type=%u action_len=%u action_data=%p is_last=%d",
+		  __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+		  __entry->skbaddr, __entry->len, __entry->data_len,
+		  __entry->truesize, __entry->nr_frags, __entry->gso_size,
+		  __entry->gso_type, __entry->ovs_flow_hash,
+		  __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+		  __entry->key_ct_state, __entry->key_ct_orig_proto,
+		  __entry->key_ct_zone,
+		  __entry->flow_key_valid,
+		  __entry->action_type, __entry->action_len,
+		  __entry->action_data, __entry->is_last)
+);
+
+TRACE_EVENT(ovs_dp_upcall,
+
+	TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+		 const struct sw_flow_key *key,
+		 const struct dp_upcall_info *upcall_info),
+
+	TP_ARGS(dp, skb, key, upcall_info),
+
+	TP_STRUCT__entry(
+		__field(	void *,		dpaddr			)
+		__string(	dp_name,	ovs_dp_name(dp)		)
+		__string(	dev_name,	skb->dev->name		)
+		__field(	void *,		skbaddr			)
+		__field(	unsigned int,	len			)
+		__field(	unsigned int,	data_len		)
+		__field(	unsigned int,	truesize		)
+		__field(	u8,		nr_frags		)
+		__field(	u16,		gso_size		)
+		__field(	u16,		gso_type		)
+		__field(	u32,		ovs_flow_hash		)
+		__field(	u32,		recirc_id		)
+		__field(	const void *,	keyaddr			)
+		__field(	u16,		key_eth_type		)
+		__field(	u8,		key_ct_state		)
+		__field(	u8,		key_ct_orig_proto	)
+		__field(	u16,		key_ct_zone		)
+		__field(	unsigned int,	flow_key_valid		)
+		__field(	u8,		upcall_cmd		)
+		__field(	u32,		upcall_port		)
+		__field(	u16,		upcall_mru		)
+	),
+
+	TP_fast_assign(
+		__entry->dpaddr = dp;
+		__assign_str(dp_name, ovs_dp_name(dp));
+		__assign_str(dev_name, skb->dev->name);
+		__entry->skbaddr = skb;
+		__entry->len = skb->len;
+		__entry->data_len = skb->data_len;
+		__entry->truesize = skb->truesize;
+		__entry->nr_frags = skb_shinfo(skb)->nr_frags;
+		__entry->gso_size = skb_shinfo(skb)->gso_size;
+		__entry->gso_type = skb_shinfo(skb)->gso_type;
+		__entry->ovs_flow_hash = key->ovs_flow_hash;
+		__entry->recirc_id = key->recirc_id;
+		__entry->keyaddr = key;
+		__entry->key_eth_type = key->eth.type;
+		__entry->key_ct_state = key->ct_state;
+		__entry->key_ct_orig_proto = key->ct_orig_proto;
+		__entry->key_ct_zone = key->ct_zone;
+		__entry->flow_key_valid =  !(key->mac_proto & SW_FLOW_KEY_INVALID);
+		__entry->upcall_cmd = upcall_info->cmd;
+		__entry->upcall_port = upcall_info->portid;
+		__entry->upcall_mru = upcall_info->mru;
+	),
+
+	TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_zone=%04x flow_key_valid=%d upcall_cmd=%u upcall_port=%u upcall_mru=%u",
+		  __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+		  __entry->skbaddr, __entry->len, __entry->data_len,
+		  __entry->truesize, __entry->nr_frags, __entry->gso_size,
+		  __entry->gso_type, __entry->ovs_flow_hash,
+		  __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+		  __entry->key_ct_state, __entry->key_ct_orig_proto,
+		  __entry->key_ct_zone,
+		  __entry->flow_key_valid,
+		  __entry->upcall_cmd, __entry->upcall_port,
+		  __entry->upcall_mru)
+);
+
+#endif /* _TRACE_OPENVSWITCH_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE openvswitch_trace
+#include <trace/define_trace.h>

From 1b134d8d756a944deb5f8cc14e7ea6573730442f Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 22 Jun 2021 16:21:40 +0200
Subject: [PATCH 2613/3292] MAINTAINERS: network: add entry for WWAN

This patch adds maintainer info for drivers/net/wwan subdir, including
WWAN core and drivers. Adding Sergey and myself as maintainers and
Johannes as reviewer.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Acked-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 395b052635ca..cc375fda89d0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19803,6 +19803,16 @@ F:	Documentation/core-api/workqueue.rst
 F:	include/linux/workqueue.h
 F:	kernel/workqueue.c
 
+WWAN DRIVERS
+M:	Loic Poulain <loic.poulain@linaro.org>
+M:	Sergey Ryazanov <ryazanov.s.a@gmail.com>
+R:	Johannes Berg <johannes@sipsolutions.net>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/wwan/
+F:	include/linux/wwan.h
+F:	include/uapi/linux/wwan.h
+
 X-POWERS AXP288 PMIC DRIVERS
 M:	Hans de Goede <hdegoede@redhat.com>
 S:	Maintained

From ce8eb4c728ef40b554b4f3d8963f11ed44502e00 Mon Sep 17 00:00:00 2001
From: Vignesh Raghavendra <vigneshr@ti.com>
Date: Tue, 22 Jun 2021 20:08:57 +0530
Subject: [PATCH 2614/3292] net: ti: am65-cpsw-nuss: Fix crash when changing
 number of TX queues

When changing number of TX queues using ethtool:

	# ethtool -L eth0 tx 1
	[  135.301047] Unable to handle kernel paging request at virtual address 00000000af5d0000
	[...]
	[  135.525128] Call trace:
	[  135.525142]  dma_release_from_dev_coherent+0x2c/0xb0
	[  135.525148]  dma_free_attrs+0x54/0xe0
	[  135.525156]  k3_cppi_desc_pool_destroy+0x50/0xa0
	[  135.525164]  am65_cpsw_nuss_remove_tx_chns+0x88/0xdc
	[  135.525171]  am65_cpsw_set_channels+0x3c/0x70
	[...]

This is because k3_cppi_desc_pool_destroy() which is called after
k3_udma_glue_release_tx_chn() in am65_cpsw_nuss_remove_tx_chns()
references struct device that is unregistered at the end of
k3_udma_glue_release_tx_chn()

Therefore the right order is to call k3_cppi_desc_pool_destroy() and
destroy desc pool before calling k3_udma_glue_release_tx_chn().
Fix this throughout the driver.

Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver")
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 6a67b026df0b..718539cdd2f2 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1506,12 +1506,12 @@ static void am65_cpsw_nuss_free_tx_chns(void *data)
 	for (i = 0; i < common->tx_ch_num; i++) {
 		struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i];
 
-		if (!IS_ERR_OR_NULL(tx_chn->tx_chn))
-			k3_udma_glue_release_tx_chn(tx_chn->tx_chn);
-
 		if (!IS_ERR_OR_NULL(tx_chn->desc_pool))
 			k3_cppi_desc_pool_destroy(tx_chn->desc_pool);
 
+		if (!IS_ERR_OR_NULL(tx_chn->tx_chn))
+			k3_udma_glue_release_tx_chn(tx_chn->tx_chn);
+
 		memset(tx_chn, 0, sizeof(*tx_chn));
 	}
 }
@@ -1531,12 +1531,12 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
 
 		netif_napi_del(&tx_chn->napi_tx);
 
-		if (!IS_ERR_OR_NULL(tx_chn->tx_chn))
-			k3_udma_glue_release_tx_chn(tx_chn->tx_chn);
-
 		if (!IS_ERR_OR_NULL(tx_chn->desc_pool))
 			k3_cppi_desc_pool_destroy(tx_chn->desc_pool);
 
+		if (!IS_ERR_OR_NULL(tx_chn->tx_chn))
+			k3_udma_glue_release_tx_chn(tx_chn->tx_chn);
+
 		memset(tx_chn, 0, sizeof(*tx_chn));
 	}
 }
@@ -1624,11 +1624,11 @@ static void am65_cpsw_nuss_free_rx_chns(void *data)
 
 	rx_chn = &common->rx_chns;
 
-	if (!IS_ERR_OR_NULL(rx_chn->rx_chn))
-		k3_udma_glue_release_rx_chn(rx_chn->rx_chn);
-
 	if (!IS_ERR_OR_NULL(rx_chn->desc_pool))
 		k3_cppi_desc_pool_destroy(rx_chn->desc_pool);
+
+	if (!IS_ERR_OR_NULL(rx_chn->rx_chn))
+		k3_udma_glue_release_rx_chn(rx_chn->rx_chn);
 }
 
 static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)

From b4fd096cbb871340be837491fa1795864a48b2d9 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Tue, 22 Jun 2021 12:05:00 -0300
Subject: [PATCH 2615/3292] tc-testing: fix list handling

python lists don't have an 'add' method, but 'append'.

Fixes: 14e5175e9e04 ("tc-testing: introduce scapyPlugin for basic traffic")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
index 229ee185b27e..a7b21658af9b 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
@@ -36,7 +36,7 @@ class SubPlugin(TdcPlugin):
         for k in scapy_keys:
             if k not in scapyinfo:
                 keyfail = True
-                missing_keys.add(k)
+                missing_keys.append(k)
         if keyfail:
             print('{}: Scapy block present in the test, but is missing info:'
                 .format(self.sub_class))

From 11f04de9021a14566f2ddee0fc9edd9c1783077c Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Tue, 22 Jun 2021 12:05:01 -0300
Subject: [PATCH 2616/3292] tc-testing: add support for sending various scapy
 packets

It can be worth sending different scapy packets on a given test, as in the
last patch of this series. For that, lets listify the scapy attribute and
simply iterate over it.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/plugin-lib/scapyPlugin.py      | 40 ++++++++++---------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
index a7b21658af9b..254136e3da5a 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
@@ -29,22 +29,26 @@ class SubPlugin(TdcPlugin):
             return
 
         # Check for required fields
-        scapyinfo = self.args.caseinfo['scapy']
-        scapy_keys = ['iface', 'count', 'packet']
-        missing_keys = []
-        keyfail = False
-        for k in scapy_keys:
-            if k not in scapyinfo:
-                keyfail = True
-                missing_keys.append(k)
-        if keyfail:
-            print('{}: Scapy block present in the test, but is missing info:'
-                .format(self.sub_class))
-            print('{}'.format(missing_keys))
+        lscapyinfo = self.args.caseinfo['scapy']
+        if type(lscapyinfo) != list:
+            lscapyinfo = [ lscapyinfo, ]
 
-        pkt = eval(scapyinfo['packet'])
-        if '$' in scapyinfo['iface']:
-            tpl = Template(scapyinfo['iface'])
-            scapyinfo['iface'] = tpl.safe_substitute(NAMES)
-        for count in range(scapyinfo['count']):
-            sendp(pkt, iface=scapyinfo['iface'])
+        for scapyinfo in lscapyinfo:
+            scapy_keys = ['iface', 'count', 'packet']
+            missing_keys = []
+            keyfail = False
+            for k in scapy_keys:
+                if k not in scapyinfo:
+                    keyfail = True
+                    missing_keys.append(k)
+            if keyfail:
+                print('{}: Scapy block present in the test, but is missing info:'
+                    .format(self.sub_class))
+                print('{}'.format(missing_keys))
+
+            pkt = eval(scapyinfo['packet'])
+            if '$' in scapyinfo['iface']:
+                tpl = Template(scapyinfo['iface'])
+                scapyinfo['iface'] = tpl.safe_substitute(NAMES)
+            for count in range(scapyinfo['count']):
+                sendp(pkt, iface=scapyinfo['iface'])

From e46905641316d7ffed867651310f176b1434ef69 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Tue, 22 Jun 2021 12:05:02 -0300
Subject: [PATCH 2617/3292] tc-testing: add test for ct DNAT tuple collision

When this test fails, /proc/net/nf_conntrack gets only 1 entry:
ipv4     2 tcp      6 119 SYN_SENT src=10.0.0.10 dst=10.0.0.10 sport=5000 dport=10 [UNREPLIED] src=20.0.0.1 dst=10.0.0.10 sport=10 dport=5000 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2

When it works, it gets 2 entries:
ipv4     2 tcp      6 119 SYN_SENT src=10.0.0.10 dst=10.0.0.20 sport=5000 dport=10 [UNREPLIED] src=20.0.0.1 dst=10.0.0.10 sport=10 dport=58203 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2
ipv4     2 tcp      6 119 SYN_SENT src=10.0.0.10 dst=10.0.0.10 sport=5000 dport=10 [UNREPLIED] src=20.0.0.1 dst=10.0.0.10 sport=10 dport=5000 mark=0 secctx=system_u:object_r:unlabeled_t:s0 zone=0 use=2

The missing entry is because the 2nd packet hits a tuple collusion and the
conntrack entry doesn't get allocated.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/ct.json       | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 4202e95e27b9..bd843ab00a58 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -406,5 +406,50 @@
         "teardown": [
             "$TC actions flush action ct"
         ]
+    },
+    {
+        "id": "3992",
+        "name": "Add ct action triggering DNAT tuple conflict",
+        "category": [
+            "actions",
+            "ct",
+	    "scapy"
+        ],
+	"plugins": {
+		"requires": [
+			"nsPlugin",
+			"scapyPlugin"
+		]
+	},
+        "setup": [
+            [
+                "$TC qdisc del dev $DEV1 ingress",
+                0,
+                1,
+		2,
+                255
+            ],
+	    "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 flower ct_state -trk action ct commit nat dst addr 20.0.0.1 port 10 pipe action drop",
+	"scapy": [
+	    {
+		"iface": "$DEV0",
+		"count": 1,
+		"packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.10')/TCP(sport=5000,dport=10)"
+	    },
+	    {
+		"iface": "$DEV0",
+		"count": 1,
+		"packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+	    }
+	],
+        "expExitCode": "0",
+        "verifyCmd": "cat /proc/net/nf_conntrack",
+        "matchPattern": "dst=10.0.0.20",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
     }
 ]

From aff0824dc4d6ef5ee164c0e98e64d91731ca585b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 22 Jun 2021 19:18:31 +0200
Subject: [PATCH 2618/3292] net: marvell: return csum computation result from
 mvneta_rx_csum/mvpp2_rx_csum

This is a preliminary patch to add hw csum hint support to
mvneta/mvpp2 xdp implementation

Tested-by: Matteo Croce <mcroce@linux.microsoft.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c         | 19 +++++++------------
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 14 +++++---------
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index c15ce06427d0..88a755034c39 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1805,18 +1805,14 @@ static void mvneta_rx_error(struct mvneta_port *pp,
 }
 
 /* Handle RX checksum offload based on the descriptor's status */
-static void mvneta_rx_csum(struct mvneta_port *pp, u32 status,
-			   struct sk_buff *skb)
+static int mvneta_rx_csum(struct mvneta_port *pp, u32 status)
 {
 	if ((pp->dev->features & NETIF_F_RXCSUM) &&
 	    (status & MVNETA_RXD_L3_IP4) &&
-	    (status & MVNETA_RXD_L4_CSUM_OK)) {
-		skb->csum = 0;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		return;
-	}
+	    (status & MVNETA_RXD_L4_CSUM_OK))
+		return CHECKSUM_UNNECESSARY;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	return CHECKSUM_NONE;
 }
 
 /* Return tx queue pointer (find last set bit) according to <cause> returned
@@ -2335,7 +2331,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
 
 	skb_reserve(skb, xdp->data - xdp->data_hard_start);
 	skb_put(skb, xdp->data_end - xdp->data);
-	mvneta_rx_csum(pp, desc_status, skb);
+	skb->ip_summed = mvneta_rx_csum(pp, desc_status);
 
 	for (i = 0; i < num_frags; i++) {
 		skb_frag_t *frag = &sinfo->frags[i];
@@ -2535,7 +2531,7 @@ err_drop_frame:
 				     rx_bytes);
 
 			skb->protocol = eth_type_trans(skb, dev);
-			mvneta_rx_csum(pp, rx_status, skb);
+			skb->ip_summed = mvneta_rx_csum(pp, rx_status);
 			napi_gro_receive(napi, skb);
 
 			rcvd_pkts++;
@@ -2584,8 +2580,7 @@ err_drop_frame:
 		skb_put(skb, rx_bytes);
 
 		skb->protocol = eth_type_trans(skb, dev);
-
-		mvneta_rx_csum(pp, rx_status, skb);
+		skb->ip_summed = mvneta_rx_csum(pp, rx_status);
 
 		napi_gro_receive(napi, skb);
 	}
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 8362e64a3b28..3135220a8942 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3543,21 +3543,17 @@ static void mvpp2_rx_error(struct mvpp2_port *port,
 }
 
 /* Handle RX checksum offload */
-static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
-			  struct sk_buff *skb)
+static int mvpp2_rx_csum(struct mvpp2_port *port, u32 status)
 {
 	if (((status & MVPP2_RXD_L3_IP4) &&
 	     !(status & MVPP2_RXD_IP4_HEADER_ERR)) ||
 	    (status & MVPP2_RXD_L3_IP6))
 		if (((status & MVPP2_RXD_L4_UDP) ||
 		     (status & MVPP2_RXD_L4_TCP)) &&
-		     (status & MVPP2_RXD_L4_CSUM_OK)) {
-			skb->csum = 0;
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			return;
-		}
+		     (status & MVPP2_RXD_L4_CSUM_OK))
+			return CHECKSUM_UNNECESSARY;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	return CHECKSUM_NONE;
 }
 
 /* Allocate a new skb and add it to BM pool */
@@ -4012,7 +4008,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 
 		skb_reserve(skb, MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM);
 		skb_put(skb, rx_bytes);
-		mvpp2_rx_csum(port, rx_status, skb);
+		skb->ip_summed = mvpp2_rx_csum(port, rx_status);
 		skb->protocol = eth_type_trans(skb, dev);
 
 		napi_gro_receive(napi, skb);

From 380d53c45ff21f66870ee965b62613137f9d010d Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 21 Jun 2021 16:18:20 -0700
Subject: [PATCH 2619/3292] compiler_attributes.h: define __no_profile, add to
 noinstr

noinstr implies that we would like the compiler to avoid instrumenting a
function.  Add support for the compiler attribute
no_profile_instrument_function to compiler_attributes.h, then add
__no_profile to the definition of noinstr.

Link: https://lore.kernel.org/lkml/20210614162018.GD68749@worktop.programming.kicks-ass.net/
Link: https://reviews.llvm.org/D104257
Link: https://reviews.llvm.org/D104475
Link: https://reviews.llvm.org/D104658
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80223
Reviewed-by: Fangrui Song <maskray@google.com>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210621231822.2848305-2-ndesaulniers@google.com
---
 include/linux/compiler_attributes.h | 13 +++++++++++++
 include/linux/compiler_types.h      |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index c043b8d2b17b..225511b17223 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -33,6 +33,7 @@
 # define __GCC4_has_attribute___externally_visible__  1
 # define __GCC4_has_attribute___no_caller_saved_registers__ 0
 # define __GCC4_has_attribute___noclone__             1
+# define __GCC4_has_attribute___no_profile_instrument_function__ 0
 # define __GCC4_has_attribute___nonstring__           0
 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
 # define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9)
@@ -237,6 +238,18 @@
 # define __nonstring
 #endif
 
+/*
+ * Optional: only supported since GCC >= 7.1, clang >= 13.0.
+ *
+ *      gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fprofile_005finstrument_005ffunction-function-attribute
+ *    clang: https://clang.llvm.org/docs/AttributeReference.html#no-profile-instrument-function
+ */
+#if __has_attribute(__no_profile_instrument_function__)
+# define __no_profile                  __attribute__((__no_profile_instrument_function__))
+#else
+# define __no_profile
+#endif
+
 /*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute
  * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index d29bda7f6ebd..d509169860f1 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -210,7 +210,7 @@ struct ftrace_likely_data {
 /* Section for code which can't be instrumented at all */
 #define noinstr								\
 	noinline notrace __attribute((__section__(".noinstr.text")))	\
-	__no_kcsan __no_sanitize_address
+	__no_kcsan __no_sanitize_address __no_profile
 
 #endif /* __KERNEL__ */
 

From ae4d682dfd3350d9836dafeed1fc5aa1e27c4963 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 21 Jun 2021 16:18:21 -0700
Subject: [PATCH 2620/3292] compiler_attributes.h: cleanups for GCC 4.9+

Since
commit 6ec4476ac825 ("Raise gcc version requirement to 4.9")
we no longer support building the kernel with GCC 4.8; drop the
preprocess checks for __GNUC_MINOR__ version. It's implied that if
__GNUC_MAJOR__ is 4, then the only supported version of __GNUC_MINOR__
left is 9.

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210621231822.2848305-3-ndesaulniers@google.com
---
 include/linux/compiler_attributes.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 225511b17223..84b1c970acb3 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -27,7 +27,7 @@
  */
 #ifndef __has_attribute
 # define __has_attribute(x) __GCC4_has_attribute_##x
-# define __GCC4_has_attribute___assume_aligned__      (__GNUC_MINOR__ >= 9)
+# define __GCC4_has_attribute___assume_aligned__      1
 # define __GCC4_has_attribute___copy__                0
 # define __GCC4_has_attribute___designated_init__     0
 # define __GCC4_has_attribute___externally_visible__  1
@@ -35,8 +35,8 @@
 # define __GCC4_has_attribute___noclone__             1
 # define __GCC4_has_attribute___no_profile_instrument_function__ 0
 # define __GCC4_has_attribute___nonstring__           0
-# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
-# define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9)
+# define __GCC4_has_attribute___no_sanitize_address__ 1
+# define __GCC4_has_attribute___no_sanitize_undefined__ 1
 # define __GCC4_has_attribute___fallthrough__         0
 #endif
 

From 51c2ee6d121ceb31ab8d35aff4ce53007aefb455 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 21 Jun 2021 16:18:22 -0700
Subject: [PATCH 2621/3292] Kconfig: Introduce ARCH_WANTS_NO_INSTR and
 CC_HAS_NO_PROFILE_FN_ATTR

We don't want compiler instrumentation to touch noinstr functions,
which are annotated with the no_profile_instrument_function function
attribute. Add a Kconfig test for this and make GCOV depend on it, and
in the future, PGO.

If an architecture is using noinstr, it should denote that via this
Kconfig value. That makes Kconfigs that depend on noinstr able to express
dependencies in an architecturally agnostic way.

Cc: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/lkml/YMTn9yjuemKFLbws@hirez.programming.kicks-ass.net/
Link: https://lore.kernel.org/lkml/YMcssV%2Fn5IBGv4f0@hirez.programming.kicks-ass.net/
Suggested-by: Nathan Chancellor <nathan@kernel.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210621231822.2848305-4-ndesaulniers@google.com
---
 arch/Kconfig        | 7 +++++++
 arch/arm64/Kconfig  | 1 +
 arch/s390/Kconfig   | 1 +
 arch/x86/Kconfig    | 1 +
 init/Kconfig        | 3 +++
 kernel/gcov/Kconfig | 1 +
 6 files changed, 14 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index c45b770d3579..129df498a8e1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -285,6 +285,13 @@ config ARCH_THREAD_STACK_ALLOCATOR
 config ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	bool
 
+config ARCH_WANTS_NO_INSTR
+	bool
+	help
+	  An architecture should select this if the noinstr macro is being used on
+	  functions to denote that the toolchain should avoid instrumenting such
+	  functions and is required for correctness.
+
 config ARCH_32BIT_OFF_T
 	bool
 	depends on !64BIT
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9f1d8566bbf9..39bf982b06f8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -93,6 +93,7 @@ config ARM64
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 	select ARCH_WANT_LD_ORPHAN_WARN
+	select ARCH_WANTS_NO_INSTR
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b4c7c34069f8..bd60310f33b9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -117,6 +117,7 @@ config S390
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_TABLE_SORT
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 12fa0d7bfa64..268a5dec8b3f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -113,6 +113,7 @@ config X86
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_DEFAULT_BPF_JIT	if X86_64
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANT_HUGE_PMD_SHARE
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANTS_THP_SWAP		if X86_64
diff --git a/init/Kconfig b/init/Kconfig
index 1ea12c64e4c9..31397a7a45fb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -83,6 +83,9 @@ config TOOLS_SUPPORT_RELR
 config CC_HAS_ASM_INLINE
 	def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
 
+config CC_HAS_NO_PROFILE_FN_ATTR
+	def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror)
+
 config CONSTRUCTORS
 	bool
 
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 58f87a3092f3..053447183ac5 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -5,6 +5,7 @@ config GCOV_KERNEL
 	bool "Enable gcov-based kernel profiling"
 	depends on DEBUG_FS
 	depends on !CC_IS_CLANG || CLANG_VERSION >= 110000
+	depends on !ARCH_WANTS_NO_INSTR || CC_HAS_NO_PROFILE_FN_ATTR
 	select CONSTRUCTORS
 	default n
 	help

From 745a32117b5a0799ce1dd28d5a74dc2b7bf37692 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:47 -0400
Subject: [PATCH 2622/3292] sctp: add pad chunk and its make function and event
 table

This chunk is defined in rfc4820#section-3, and used to pad an
SCTP packet. The receiver must discard this chunk and continue
processing the rest of the chunks in the packet.

Add it now, as it will be bundled with a heartbeat chunk to probe
pmtu in the following patches.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h     |  7 +++++++
 include/net/sctp/sm.h    |  1 +
 net/sctp/sm_make_chunk.c | 26 ++++++++++++++++++++++++++
 net/sctp/sm_statetable.c | 23 +++++++++++++++++++++++
 4 files changed, 57 insertions(+)

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index bb1926589693..a86e852507b3 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -98,6 +98,7 @@ enum sctp_cid {
 	SCTP_CID_I_FWD_TSN		= 0xC2,
 	SCTP_CID_ASCONF_ACK		= 0x80,
 	SCTP_CID_RECONF			= 0x82,
+	SCTP_CID_PAD			= 0x84,
 }; /* enum */
 
 
@@ -410,6 +411,12 @@ struct sctp_heartbeat_chunk {
 };
 
 
+/* PAD chunk could be bundled with heartbeat chunk to probe pmtu */
+struct sctp_pad_chunk {
+	struct sctp_chunkhdr uh;
+};
+
+
 /* For the abort and shutdown ACK we must carry the init tag in the
  * common header. Just the common header is all that is needed with a
  * chunk descriptor.
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index fd223c94589a..09c59154634d 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -230,6 +230,7 @@ struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
 					   const struct sctp_chunk *chunk,
 					   const void *payload,
 					   const size_t paylen);
+struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len);
 struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
 				      const struct sctp_chunk *chunk,
 				      __be16 cause_code, const void *payload,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5b44d228b6ca..e5d470cd7c40 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1218,6 +1218,32 @@ nodata:
 	return retval;
 }
 
+/* RFC4820 3. Padding Chunk (PAD)
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Type = 0x84   |   Flags=0     |             Length            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                                                               |
+ * \                         Padding Data                          /
+ * /                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len)
+{
+	struct sctp_chunk *retval;
+
+	retval = sctp_make_control(asoc, SCTP_CID_PAD, 0, len, GFP_ATOMIC);
+	if (!retval)
+		return NULL;
+
+	skb_put_zero(retval->skb, len);
+	retval->chunk_hdr->length = htons(ntohs(retval->chunk_hdr->length) + len);
+	retval->chunk_end = skb_tail_pointer(retval->skb);
+
+	return retval;
+}
+
 /* Create an Operation Error chunk with the specified space reserved.
  * This routine can be used for containing multiple causes in the chunk.
  */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 88ea87f4f0e7..c82c4233ec6b 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -526,6 +526,26 @@ auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_AUTH,
 }; /*state_fn_t auth_chunk_event_table[][] */
 
+static const struct sctp_sm_table_entry
+pad_chunk_event_table[SCTP_STATE_NUM_STATES] = {
+	/* SCTP_STATE_CLOSED */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+	/* SCTP_STATE_COOKIE_WAIT */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+	/* SCTP_STATE_COOKIE_ECHOED */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+	/* SCTP_STATE_ESTABLISHED */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+	/* SCTP_STATE_SHUTDOWN_PENDING */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+	/* SCTP_STATE_SHUTDOWN_SENT */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */
+	TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+};	/* chunk pad */
+
 static const struct sctp_sm_table_entry
 chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 	/* SCTP_STATE_CLOSED */
@@ -992,6 +1012,9 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
 
 	case SCTP_CID_AUTH:
 		return &auth_chunk_event_table[0][state];
+
+	case SCTP_CID_PAD:
+		return &pad_chunk_event_table[state];
 	}
 
 	return &chunk_event_table_unknown[state];

From d1e462a7a5f359cbb9a0e8fbfafcfb6657034105 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:48 -0400
Subject: [PATCH 2623/3292] sctp: add probe_interval in sysctl and
 sock/asoc/transport

PLPMTUD can be enabled by doing 'sysctl -w net.sctp.probe_interval=n'.
'n' is the interval for PLPMTUD probe timer in milliseconds, and it
can't be less than 5000 if it's not 0.

All asoc/transport's PLPMTUD in a new socket will be enabled by default.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst |  8 ++++++
 include/net/netns/sctp.h               |  3 +++
 include/net/sctp/constants.h           |  2 ++
 include/net/sctp/structs.h             |  3 +++
 net/sctp/associola.c                   |  2 ++
 net/sctp/socket.c                      |  1 +
 net/sctp/sysctl.c                      | 35 ++++++++++++++++++++++++++
 7 files changed, 54 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b0436d3a4f11..8bff728b3a1e 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2834,6 +2834,14 @@ encap_port - INTEGER
 
 	Default: 0
 
+plpmtud_probe_interval - INTEGER
+        The time interval (in milliseconds) for sending PLPMTUD probe chunks.
+        These chunks are sent at the specified interval with a variable size
+        to probe the mtu of a given path between 2 endpoints. PLPMTUD will
+        be disabled when 0 is set, and other values for it must be >= 5000.
+
+	Default: 0
+
 
 ``/proc/sys/net/core/*``
 ========================
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index a0f315effa94..40240722cdca 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -84,6 +84,9 @@ struct netns_sctp {
 	/* HB.interval		    - 30 seconds  */
 	unsigned int hb_interval;
 
+	/* The interval for PLPMTUD probe timer */
+	unsigned int probe_interval;
+
 	/* Association.Max.Retrans  - 10 attempts
 	 * Path.Max.Retrans	    - 5	 attempts (per destination address)
 	 * Max.Init.Retransmits	    - 8	 attempts
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 14a0d22c9113..449cf9cb428b 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -424,4 +424,6 @@ enum {
  */
 #define SCTP_AUTH_RANDOM_LENGTH 32
 
+#define SCTP_PROBE_TIMER_MIN	5000
+
 #endif /* __sctp_constants_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 1aa585216f34..bf5d22deaefb 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -177,6 +177,7 @@ struct sctp_sock {
 	 * will be inherited by all new associations.
 	 */
 	__u32 hbinterval;
+	__u32 probe_interval;
 
 	__be16 udp_port;
 	__be16 encap_port;
@@ -858,6 +859,7 @@ struct sctp_transport {
 	 * the destination address every heartbeat interval.
 	 */
 	unsigned long hbinterval;
+	unsigned long probe_interval;
 
 	/* SACK delay timeout */
 	unsigned long sackdelay;
@@ -1795,6 +1797,7 @@ struct sctp_association {
 	 * will be inherited by all new transports.
 	 */
 	unsigned long hbinterval;
+	unsigned long probe_interval;
 
 	__be16 encap_port;
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 336df4b36655..e01895edd3a4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -98,6 +98,7 @@ static struct sctp_association *sctp_association_init(
 	 * sock configured value.
 	 */
 	asoc->hbinterval = msecs_to_jiffies(sp->hbinterval);
+	asoc->probe_interval = msecs_to_jiffies(sp->probe_interval);
 
 	asoc->encap_port = sp->encap_port;
 
@@ -625,6 +626,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	 * association configured value.
 	 */
 	peer->hbinterval = asoc->hbinterval;
+	peer->probe_interval = asoc->probe_interval;
 
 	peer->encap_port = asoc->encap_port;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a79d193ff872..d2960ab665a5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4989,6 +4989,7 @@ static int sctp_init_sock(struct sock *sk)
 	atomic_set(&sp->pd_mode, 0);
 	skb_queue_head_init(&sp->pd_lobby);
 	sp->frag_interleave = 0;
+	sp->probe_interval = net->sctp.probe_interval;
 
 	/* Create a per socket endpoint structure.  Even if we
 	 * change the data structure relationships, this may still
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 55871b277f47..b46a416787ec 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -55,6 +55,8 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
 				   void *buffer, size_t *lenp, loff_t *ppos);
 static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
 			     void *buffer, size_t *lenp, loff_t *ppos);
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+				       void *buffer, size_t *lenp, loff_t *ppos);
 
 static struct ctl_table sctp_table[] = {
 	{
@@ -293,6 +295,13 @@ static struct ctl_table sctp_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "plpmtud_probe_interval",
+		.data		= &init_net.sctp.probe_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_sctp_do_probe_interval,
+	},
 	{
 		.procname	= "udp_port",
 		.data		= &init_net.sctp.udp_port,
@@ -539,6 +548,32 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+				       void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct net *net = current->nsproxy->net_ns;
+	struct ctl_table tbl;
+	int ret, new_value;
+
+	memset(&tbl, 0, sizeof(struct ctl_table));
+	tbl.maxlen = sizeof(unsigned int);
+
+	if (write)
+		tbl.data = &new_value;
+	else
+		tbl.data = &net->sctp.probe_interval;
+
+	ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
+	if (write && ret == 0) {
+		if (new_value && new_value < SCTP_PROBE_TIMER_MIN)
+			return -EINVAL;
+
+		net->sctp.probe_interval = new_value;
+	}
+
+	return ret;
+}
+
 int sctp_sysctl_net_register(struct net *net)
 {
 	struct ctl_table *table;

From 3190b649b4d9391be7bde3edd8e924e451c5d2f6 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:49 -0400
Subject: [PATCH 2624/3292] sctp: add SCTP_PLPMTUD_PROBE_INTERVAL sockopt for
 sock/asoc/transport

With this socket option, users can change probe_interval for
a transport, asoc or sock after it's created.

Note that if the change is for an asoc, also apply the change
to each transport in this asoc.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |   8 +++
 net/sctp/socket.c         | 118 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index cb78e7a739da..c4ff1ebd8bcc 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -141,6 +141,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_EXPOSE_POTENTIALLY_FAILED_STATE	131
 #define SCTP_EXPOSE_PF_STATE	SCTP_EXPOSE_POTENTIALLY_FAILED_STATE
 #define SCTP_REMOTE_UDP_ENCAPS_PORT	132
+#define SCTP_PLPMTUD_PROBE_INTERVAL	133
 
 /* PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE	0x0000
@@ -1213,4 +1214,11 @@ enum sctp_sched_type {
 	SCTP_SS_MAX = SCTP_SS_RR
 };
 
+/* Probe Interval socket option */
+struct sctp_probeinterval {
+	sctp_assoc_t spi_assoc_id;
+	struct sockaddr_storage spi_address;
+	__u32 spi_interval;
+};
+
 #endif /* _UAPI_SCTP_H */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d2960ab665a5..aba576f53458 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4481,6 +4481,58 @@ static int sctp_setsockopt_encap_port(struct sock *sk,
 	return 0;
 }
 
+static int sctp_setsockopt_probe_interval(struct sock *sk,
+					  struct sctp_probeinterval *params,
+					  unsigned int optlen)
+{
+	struct sctp_association *asoc;
+	struct sctp_transport *t;
+	__u32 probe_interval;
+
+	if (optlen != sizeof(*params))
+		return -EINVAL;
+
+	probe_interval = params->spi_interval;
+	if (probe_interval && probe_interval < SCTP_PROBE_TIMER_MIN)
+		return -EINVAL;
+
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
+	 */
+	if (!sctp_is_any(sk, (union sctp_addr *)&params->spi_address)) {
+		t = sctp_addr_id2transport(sk, &params->spi_address,
+					   params->spi_assoc_id);
+		if (!t)
+			return -EINVAL;
+
+		t->probe_interval = msecs_to_jiffies(probe_interval);
+		return 0;
+	}
+
+	/* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, params->spi_assoc_id);
+	if (!asoc && params->spi_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	/* If changes are for association, also apply probe_interval to
+	 * each transport.
+	 */
+	if (asoc) {
+		list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
+			t->probe_interval = msecs_to_jiffies(probe_interval);
+
+		asoc->probe_interval = msecs_to_jiffies(probe_interval);
+		return 0;
+	}
+
+	sctp_sk(sk)->probe_interval = probe_interval;
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4703,6 +4755,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_REMOTE_UDP_ENCAPS_PORT:
 		retval = sctp_setsockopt_encap_port(sk, kopt, optlen);
 		break;
+	case SCTP_PLPMTUD_PROBE_INTERVAL:
+		retval = sctp_setsockopt_probe_interval(sk, kopt, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7906,6 +7961,66 @@ out:
 	return 0;
 }
 
+static int sctp_getsockopt_probe_interval(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_probeinterval params;
+	struct sctp_association *asoc;
+	struct sctp_transport *t;
+	__u32 probe_interval;
+
+	if (len < sizeof(params))
+		return -EINVAL;
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		return -EFAULT;
+
+	/* If an address other than INADDR_ANY is specified, and
+	 * no transport is found, then the request is invalid.
+	 */
+	if (!sctp_is_any(sk, (union sctp_addr *)&params.spi_address)) {
+		t = sctp_addr_id2transport(sk, &params.spi_address,
+					   params.spi_assoc_id);
+		if (!t) {
+			pr_debug("%s: failed no transport\n", __func__);
+			return -EINVAL;
+		}
+
+		probe_interval = jiffies_to_msecs(t->probe_interval);
+		goto out;
+	}
+
+	/* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+	 * socket is a one to many style socket, and an association
+	 * was not found, then the id was invalid.
+	 */
+	asoc = sctp_id2assoc(sk, params.spi_assoc_id);
+	if (!asoc && params.spi_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		pr_debug("%s: failed no association\n", __func__);
+		return -EINVAL;
+	}
+
+	if (asoc) {
+		probe_interval = jiffies_to_msecs(asoc->probe_interval);
+		goto out;
+	}
+
+	probe_interval = sctp_sk(sk)->probe_interval;
+
+out:
+	params.spi_interval = probe_interval;
+	if (copy_to_user(optval, &params, len))
+		return -EFAULT;
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -8129,6 +8244,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_REMOTE_UDP_ENCAPS_PORT:
 		retval = sctp_getsockopt_encap_port(sk, len, optval, optlen);
 		break;
+	case SCTP_PLPMTUD_PROBE_INTERVAL:
+		retval = sctp_getsockopt_probe_interval(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;

From d9e2e410ae301d4b540e965daca51de0e65e8a26 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:50 -0400
Subject: [PATCH 2625/3292] sctp: add the constants/variables and states and
 some APIs for transport

These are 4 constants described in rfc8899#section-5.1.2:

  MAX_PROBES, MIN_PLPMTU, MAX_PLPMTU, BASE_PLPMTU;

And 2 variables described in rfc8899#section-5.1.3:

  PROBED_SIZE, PROBE_COUNT;

And 5 states described in rfc8899#section-5.2:

  DISABLED, BASE, SEARCH, SEARCH_COMPLETE, ERROR;

And these 4 APIs are used to reset/update PLPMTUD, check if PLPMTUD is
enabled, and calculate the additional headers length for a transport.

Note the member 'probe_high' in transport will be set to the probe
size when a probe fails with this probe size in the next patches.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 17 +++++++++++++
 include/net/sctp/sctp.h      | 48 +++++++++++++++++++++++++++++++++---
 include/net/sctp/structs.h   |  8 ++++++
 3 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 449cf9cb428b..85f6a105c59d 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -200,6 +200,23 @@ enum sctp_sock_state {
 	SCTP_SS_CLOSING        = TCP_CLOSE_WAIT,
 };
 
+enum sctp_plpmtud_state {
+	SCTP_PL_DISABLED,
+	SCTP_PL_BASE,
+	SCTP_PL_SEARCH,
+	SCTP_PL_COMPLETE,
+	SCTP_PL_ERROR,
+};
+
+#define	SCTP_BASE_PLPMTU	1200
+#define	SCTP_MAX_PLPMTU		9000
+#define	SCTP_MIN_PLPMTU		512
+
+#define	SCTP_MAX_PROBES		3
+
+#define SCTP_PL_BIG_STEP	32
+#define SCTP_PL_MIN_STEP	4
+
 /* These functions map various type to printable names.  */
 const char *sctp_cname(const union sctp_subtype id);	/* chunk types */
 const char *sctp_oname(const union sctp_subtype id);	/* other events */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 86f74f2fe6de..08347d3f004f 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -573,14 +573,15 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *
 /* Calculate max payload size given a MTU, or the total overhead if
  * given MTU is zero
  */
-static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
-				     __u32 mtu, __u32 extra)
+static inline __u32 __sctp_mtu_payload(const struct sctp_sock *sp,
+				       const struct sctp_transport *t,
+				       __u32 mtu, __u32 extra)
 {
 	__u32 overhead = sizeof(struct sctphdr) + extra;
 
 	if (sp) {
 		overhead += sp->pf->af->net_header_len;
-		if (sp->udp_port)
+		if (sp->udp_port && (!t || t->encap_port))
 			overhead += sizeof(struct udphdr);
 	} else {
 		overhead += sizeof(struct ipv6hdr);
@@ -592,6 +593,12 @@ static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
 	return mtu ? mtu - overhead : overhead;
 }
 
+static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp,
+				     __u32 mtu, __u32 extra)
+{
+	return __sctp_mtu_payload(sp, NULL, mtu, extra);
+}
+
 static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
 {
 	return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst),
@@ -615,6 +622,41 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
 	return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
 }
 
+static inline int sctp_transport_pl_hlen(struct sctp_transport *t)
+{
+	return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0);
+}
+
+static inline void sctp_transport_pl_reset(struct sctp_transport *t)
+{
+	if (t->probe_interval && (t->param_flags & SPP_PMTUD_ENABLE) &&
+	    (t->state == SCTP_ACTIVE || t->state == SCTP_UNKNOWN)) {
+		if (t->pl.state == SCTP_PL_DISABLED) {
+			t->pl.state = SCTP_PL_BASE;
+			t->pl.pmtu = SCTP_BASE_PLPMTU;
+			t->pl.probe_size = SCTP_BASE_PLPMTU;
+		}
+	} else {
+		if (t->pl.state != SCTP_PL_DISABLED)
+			t->pl.state = SCTP_PL_DISABLED;
+	}
+}
+
+static inline void sctp_transport_pl_update(struct sctp_transport *t)
+{
+	if (t->pl.state == SCTP_PL_DISABLED)
+		return;
+
+	t->pl.state = SCTP_PL_BASE;
+	t->pl.pmtu = SCTP_BASE_PLPMTU;
+	t->pl.probe_size = SCTP_BASE_PLPMTU;
+}
+
+static inline bool sctp_transport_pl_enabled(struct sctp_transport *t)
+{
+	return t->pl.state != SCTP_PL_DISABLED;
+}
+
 static inline bool sctp_newsk_ready(const struct sock *sk)
 {
 	return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index bf5d22deaefb..85d3566c2227 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -978,6 +978,14 @@ struct sctp_transport {
 		char cacc_saw_newack;
 	} cacc;
 
+	struct {
+		__u16 pmtu;
+		__u16 probe_size;
+		__u16 probe_high;
+		__u8 probe_count;
+		__u8 state;
+	} pl; /* plpmtud related */
+
 	/* 64-bit random number sent with heartbeat. */
 	__u64 hb_nonce;
 

From 92548ec2f1f92d0c0b60ce59592b645571672568 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:51 -0400
Subject: [PATCH 2626/3292] sctp: add the probe timer in transport for PLPMTUD

There are 3 timers described in rfc8899#section-5.1.1:

  PROBE_TIMER, PMTU_RAISE_TIMER, CONFIRMATION_TIMER

This patches adds a 'probe_timer' in transport, and it works as either
PROBE_TIMER or PMTU_RAISE_TIMER. At most time, it works as PROBE_TIMER
and expires every a 'probe_interval' time to send the HB probe packet.
When transport pl enters COMPLETE state, it works as PMTU_RAISE_TIMER
and expires in 'probe_interval * 30' time to go back to SEARCH state
and do searching again.

SCTP HB is an acknowledged packet, CONFIRMATION_TIMER is not needed.

The timer will start when transport pl enters BASE state and stop
when it enters DISABLED state.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h   |  1 +
 include/net/sctp/constants.h |  1 +
 include/net/sctp/sctp.h      |  9 ++++++++-
 include/net/sctp/sm.h        |  2 ++
 include/net/sctp/structs.h   |  4 ++++
 net/sctp/debug.c             |  1 +
 net/sctp/sm_sideeffect.c     | 37 ++++++++++++++++++++++++++++++++++++
 net/sctp/sm_statefuns.c      | 17 +++++++++++++++++
 net/sctp/sm_statetable.c     | 20 +++++++++++++++++++
 net/sctp/transport.c         | 18 ++++++++++++++++++
 10 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 5e848884ff61..2058fabffbf6 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -59,6 +59,7 @@ enum sctp_verb {
 	SCTP_CMD_HB_TIMERS_START,    /* Start the heartbeat timers. */
 	SCTP_CMD_HB_TIMER_UPDATE,    /* Update a heartbeat timers.  */
 	SCTP_CMD_HB_TIMERS_STOP,     /* Stop the heartbeat timers.  */
+	SCTP_CMD_PROBE_TIMER_UPDATE, /* Update a probe timer.  */
 	SCTP_CMD_TRANSPORT_HB_SENT,  /* Reset the status of a transport. */
 	SCTP_CMD_TRANSPORT_IDLE,     /* Do manipulations on idle transport */
 	SCTP_CMD_TRANSPORT_ON,       /* Mark the transport as active. */
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 85f6a105c59d..265fffa33dad 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -77,6 +77,7 @@ enum sctp_event_timeout {
 	SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
 	SCTP_EVENT_TIMEOUT_HEARTBEAT,
 	SCTP_EVENT_TIMEOUT_RECONF,
+	SCTP_EVENT_TIMEOUT_PROBE,
 	SCTP_EVENT_TIMEOUT_SACK,
 	SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 08347d3f004f..f7e083602c10 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -635,10 +635,14 @@ static inline void sctp_transport_pl_reset(struct sctp_transport *t)
 			t->pl.state = SCTP_PL_BASE;
 			t->pl.pmtu = SCTP_BASE_PLPMTU;
 			t->pl.probe_size = SCTP_BASE_PLPMTU;
+			sctp_transport_reset_probe_timer(t);
 		}
 	} else {
-		if (t->pl.state != SCTP_PL_DISABLED)
+		if (t->pl.state != SCTP_PL_DISABLED) {
+			if (del_timer(&t->probe_timer))
+				sctp_transport_put(t);
 			t->pl.state = SCTP_PL_DISABLED;
+		}
 	}
 }
 
@@ -647,6 +651,9 @@ static inline void sctp_transport_pl_update(struct sctp_transport *t)
 	if (t->pl.state == SCTP_PL_DISABLED)
 		return;
 
+	if (del_timer(&t->probe_timer))
+		sctp_transport_put(t);
+
 	t->pl.state = SCTP_PL_BASE;
 	t->pl.pmtu = SCTP_BASE_PLPMTU;
 	t->pl.probe_size = SCTP_BASE_PLPMTU;
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 09c59154634d..45542e2bac93 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -151,6 +151,7 @@ sctp_state_fn_t sctp_sf_cookie_wait_icmp_abort;
 /* Prototypes for timeout event state functions.  */
 sctp_state_fn_t sctp_sf_do_6_3_3_rtx;
 sctp_state_fn_t sctp_sf_send_reconf;
+sctp_state_fn_t sctp_sf_send_probe;
 sctp_state_fn_t sctp_sf_do_6_2_sack;
 sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 
@@ -311,6 +312,7 @@ int sctp_do_sm(struct net *net, enum sctp_event_type event_type,
 void sctp_generate_t3_rtx_event(struct timer_list *t);
 void sctp_generate_heartbeat_event(struct timer_list *t);
 void sctp_generate_reconf_event(struct timer_list *t);
+void sctp_generate_probe_event(struct timer_list *t);
 void sctp_generate_proto_unreach_event(struct timer_list *t);
 
 void sctp_ootb_pkt_free(struct sctp_packet *packet);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 85d3566c2227..a3772f8ee7f6 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -936,6 +936,9 @@ struct sctp_transport {
 	/* Timer to handler reconf chunk rtx */
 	struct timer_list reconf_timer;
 
+	/* Timer to send a probe HB packet for PLPMTUD */
+	struct timer_list probe_timer;
+
 	/* Since we're using per-destination retransmission timers
 	 * (see above), we're also using per-destination "transmitted"
 	 * queues.  This probably ought to be a private struct
@@ -1003,6 +1006,7 @@ void sctp_transport_free(struct sctp_transport *);
 void sctp_transport_reset_t3_rtx(struct sctp_transport *);
 void sctp_transport_reset_hb_timer(struct sctp_transport *);
 void sctp_transport_reset_reconf_timer(struct sctp_transport *transport);
+void sctp_transport_reset_probe_timer(struct sctp_transport *transport);
 int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index c4d9c7feffb9..ccd773e4c371 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -154,6 +154,7 @@ static const char *const sctp_timer_tbl[] = {
 	"TIMEOUT_T5_SHUTDOWN_GUARD",
 	"TIMEOUT_HEARTBEAT",
 	"TIMEOUT_RECONF",
+	"TIMEOUT_PROBE",
 	"TIMEOUT_SACK",
 	"TIMEOUT_AUTOCLOSE",
 };
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index ce15d590a615..b3815b568e8e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -471,6 +471,38 @@ out_unlock:
 	sctp_transport_put(transport);
 }
 
+/* Handle the timeout of the probe timer. */
+void sctp_generate_probe_event(struct timer_list *t)
+{
+	struct sctp_transport *transport = from_timer(transport, t, probe_timer);
+	struct sctp_association *asoc = transport->asoc;
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
+	int error = 0;
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
+		pr_debug("%s: sock is busy\n", __func__);
+
+		/* Try again later.  */
+		if (!mod_timer(&transport->probe_timer, jiffies + (HZ / 20)))
+			sctp_transport_hold(transport);
+		goto out_unlock;
+	}
+
+	error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
+			   SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_PROBE),
+			   asoc->state, asoc->ep, asoc,
+			   transport, GFP_ATOMIC);
+
+	if (error)
+		sk->sk_err = -error;
+
+out_unlock:
+	bh_unlock_sock(sk);
+	sctp_transport_put(transport);
+}
+
 /* Inject a SACK Timeout event into the state machine.  */
 static void sctp_generate_sack_event(struct timer_list *t)
 {
@@ -1641,6 +1673,11 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
 			sctp_cmd_hb_timers_stop(commands, asoc);
 			break;
 
+		case SCTP_CMD_PROBE_TIMER_UPDATE:
+			t = cmd->obj.transport;
+			sctp_transport_reset_probe_timer(t);
+			break;
+
 		case SCTP_CMD_REPORT_ERROR:
 			error = cmd->obj.error;
 			break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4f30388a0dd0..3b99eda50618 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1095,6 +1095,23 @@ enum sctp_disposition sctp_sf_send_reconf(struct net *net,
 	return SCTP_DISPOSITION_CONSUME;
 }
 
+/* send hb chunk with padding for PLPMUTD.  */
+enum sctp_disposition sctp_sf_send_probe(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
+{
+	struct sctp_transport *transport = (struct sctp_transport *)arg;
+
+	/* The actual handling will be performed here in a later patch. */
+	sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
+			SCTP_TRANSPORT(transport));
+
+	return SCTP_DISPOSITION_CONSUME;
+}
+
 /*
  * Process an heartbeat request.
  *
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index c82c4233ec6b..1816a4410b2b 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -967,6 +967,25 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 }
 
+#define TYPE_SCTP_EVENT_TIMEOUT_PROBE { \
+	/* SCTP_STATE_CLOSED */ \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	/* SCTP_STATE_COOKIE_WAIT */ \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	/* SCTP_STATE_COOKIE_ECHOED */ \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	/* SCTP_STATE_ESTABLISHED */ \
+	TYPE_SCTP_FUNC(sctp_sf_send_probe), \
+	/* SCTP_STATE_SHUTDOWN_PENDING */ \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	/* SCTP_STATE_SHUTDOWN_SENT */ \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+}
+
 static const struct sctp_sm_table_entry
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_EVENT_TIMEOUT_NONE,
@@ -978,6 +997,7 @@ timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
 	TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT,
 	TYPE_SCTP_EVENT_TIMEOUT_RECONF,
+	TYPE_SCTP_EVENT_TIMEOUT_PROBE,
 	TYPE_SCTP_EVENT_TIMEOUT_SACK,
 	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index bf0ac467e757..ca3343c2c80e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -75,6 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
 	timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
 	timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
 	timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
+	timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0);
 	timer_setup(&peer->proto_unreach_timer,
 		    sctp_generate_proto_unreach_event, 0);
 
@@ -131,6 +132,9 @@ void sctp_transport_free(struct sctp_transport *transport)
 	if (del_timer(&transport->reconf_timer))
 		sctp_transport_put(transport);
 
+	if (del_timer(&transport->probe_timer))
+		sctp_transport_put(transport);
+
 	/* Delete the ICMP proto unreachable timer if it's active. */
 	if (del_timer(&transport->proto_unreach_timer))
 		sctp_transport_put(transport);
@@ -207,6 +211,20 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
 			sctp_transport_hold(transport);
 }
 
+void sctp_transport_reset_probe_timer(struct sctp_transport *transport)
+{
+	int scale = 1;
+
+	if (timer_pending(&transport->probe_timer))
+		return;
+	if (transport->pl.state == SCTP_PL_COMPLETE &&
+	    transport->pl.probe_count == 1)
+		scale = 30; /* works as PMTU_RAISE_TIMER */
+	if (!mod_timer(&transport->probe_timer,
+		       jiffies + transport->probe_interval * scale))
+		sctp_transport_hold(transport);
+}
+
 /* This transport has been assigned to an association.
  * Initialize fields from the association or from the sock itself.
  * Register the reference count in the association.

From fe59379b9ab7ddad157f5379fa47dbf84c9b5e09 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:52 -0400
Subject: [PATCH 2627/3292] sctp: do the basic send and recv for PLPMTUD probe

This patch does exactly what rfc8899#section-6.2.1.2 says:

   The SCTP sender needs to be able to determine the total size of a
   probe packet.  The HEARTBEAT chunk could carry a Heartbeat
   Information parameter that includes, besides the information
   suggested in [RFC4960], the probe size to help an implementation
   associate a HEARTBEAT ACK with the size of probe that was sent.  The
   sender could also use other methods, such as sending a nonce and
   verifying the information returned also contains the corresponding
   nonce.  The length of the PAD chunk is computed by reducing the
   probing size by the size of the SCTP common header and the HEARTBEAT
   chunk.

Note that HB ACK chunk will carry back whatever HB chunk carried, including
the probe_size we put it in; We also check hbinfo->probe_size in the HB ACK
against link->pl.probe_size to validate this HB ACK chunk.

v1->v2:
  - Remove the unused 'sp' and add static for sctp_packet_bundle_pad().

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h      |  3 ++-
 include/net/sctp/structs.h |  2 ++
 net/sctp/output.c          | 30 +++++++++++++++++++++++++++++-
 net/sctp/outqueue.c        | 13 +++++++++++--
 net/sctp/sm_make_chunk.c   |  5 ++++-
 net/sctp/sm_statefuns.c    | 20 ++++++++++++++++++--
 6 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 45542e2bac93..2eb6d7c2c931 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -226,7 +226,8 @@ struct sctp_chunk *sctp_make_new_encap_port(
 					const struct sctp_association *asoc,
 					const struct sctp_chunk *chunk);
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
-				       const struct sctp_transport *transport);
+				       const struct sctp_transport *transport,
+				       __u32 probe_size);
 struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
 					   const struct sctp_chunk *chunk,
 					   const void *payload,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a3772f8ee7f6..f7b056f5af37 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -386,6 +386,7 @@ struct sctp_sender_hb_info {
 	union sctp_addr daddr;
 	unsigned long sent_at;
 	__u64 hb_nonce;
+	__u32 probe_size;
 };
 
 int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
@@ -657,6 +658,7 @@ struct sctp_chunk {
 		data_accepted:1,	/* At least 1 chunk accepted */
 		auth:1,			/* IN: was auth'ed | OUT: needs auth */
 		has_asconf:1,		/* IN: have seen an asconf before */
+		pmtu_probe:1,		/* Used by PLPMTUD, can be set in s HB chunk */
 		tsn_missing_report:2,	/* Data chunk missing counter. */
 		fast_retransmit:2;	/* Is this chunk fast retransmitted? */
 };
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a6aa17df09ef..ceefb0616d9d 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -211,6 +211,30 @@ enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
 	return retval;
 }
 
+/* Try to bundle a pad chunk into a packet with a heartbeat chunk for PLPMTUTD probe */
+static enum sctp_xmit sctp_packet_bundle_pad(struct sctp_packet *pkt, struct sctp_chunk *chunk)
+{
+	struct sctp_transport *t = pkt->transport;
+	struct sctp_chunk *pad;
+	int overhead = 0;
+
+	if (!chunk->pmtu_probe)
+		return SCTP_XMIT_OK;
+
+	/* calculate the Padding Data size for the pad chunk */
+	overhead += sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+	overhead += sizeof(struct sctp_sender_hb_info) + sizeof(struct sctp_pad_chunk);
+	pad = sctp_make_pad(t->asoc, t->pl.probe_size - overhead);
+	if (!pad)
+		return SCTP_XMIT_DELAY;
+
+	list_add_tail(&pad->list, &pkt->chunk_list);
+	pkt->size += SCTP_PAD4(ntohs(pad->chunk_hdr->length));
+	chunk->transport = t;
+
+	return SCTP_XMIT_OK;
+}
+
 /* Try to bundle an auth chunk into the packet. */
 static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
 					      struct sctp_chunk *chunk)
@@ -382,6 +406,10 @@ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
 		goto finish;
 
 	retval = __sctp_packet_append_chunk(packet, chunk);
+	if (retval != SCTP_XMIT_OK)
+		goto finish;
+
+	retval = sctp_packet_bundle_pad(packet, chunk);
 
 finish:
 	return retval;
@@ -553,7 +581,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	sk = chunk->skb->sk;
 
 	/* check gso */
-	if (packet->size > tp->pathmtu && !packet->ipfragok) {
+	if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) {
 		if (!sk_can_gso(sk)) {
 			pr_err_once("Trying to GSO but underlying device doesn't support it.");
 			goto out;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 5cb1aa5f067b..ff47091c385e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -769,7 +769,11 @@ static int sctp_packet_singleton(struct sctp_transport *transport,
 
 	sctp_packet_init(&singleton, transport, sport, dport);
 	sctp_packet_config(&singleton, vtag, 0);
-	sctp_packet_append_chunk(&singleton, chunk);
+	if (sctp_packet_append_chunk(&singleton, chunk) != SCTP_XMIT_OK) {
+		list_del_init(&chunk->list);
+		sctp_chunk_free(chunk);
+		return -ENOMEM;
+	}
 	return sctp_packet_transmit(&singleton, gfp);
 }
 
@@ -929,8 +933,13 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
 			one_packet = 1;
 			fallthrough;
 
-		case SCTP_CID_SACK:
 		case SCTP_CID_HEARTBEAT:
+			if (chunk->pmtu_probe) {
+				sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
+				break;
+			}
+			fallthrough;
+		case SCTP_CID_SACK:
 		case SCTP_CID_SHUTDOWN:
 		case SCTP_CID_ECN_ECNE:
 		case SCTP_CID_ASCONF:
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e5d470cd7c40..b0eaa93a9cc6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1160,7 +1160,8 @@ nodata:
 
 /* Make a HEARTBEAT chunk.  */
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
-				       const struct sctp_transport *transport)
+				       const struct sctp_transport *transport,
+				       __u32 probe_size)
 {
 	struct sctp_sender_hb_info hbinfo;
 	struct sctp_chunk *retval;
@@ -1176,6 +1177,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 	hbinfo.daddr = transport->ipaddr;
 	hbinfo.sent_at = jiffies;
 	hbinfo.hb_nonce = transport->hb_nonce;
+	hbinfo.probe_size = probe_size;
 
 	/* Cast away the 'const', as this is just telling the chunk
 	 * what transport it belongs to.
@@ -1183,6 +1185,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 	retval->transport = (struct sctp_transport *) transport;
 	retval->subh.hbs_hdr = sctp_addto_chunk(retval, sizeof(hbinfo),
 						&hbinfo);
+	retval->pmtu_probe = !!probe_size;
 
 nodata:
 	return retval;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 3b99eda50618..8edb9186112a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1004,7 +1004,7 @@ static enum sctp_disposition sctp_sf_heartbeat(
 	struct sctp_chunk *reply;
 
 	/* Send a heartbeat to our peer.  */
-	reply = sctp_make_heartbeat(asoc, transport);
+	reply = sctp_make_heartbeat(asoc, transport, 0);
 	if (!reply)
 		return SCTP_DISPOSITION_NOMEM;
 
@@ -1104,8 +1104,15 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net,
 					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *)arg;
+	struct sctp_chunk *reply;
 
-	/* The actual handling will be performed here in a later patch. */
+	if (!sctp_transport_pl_enabled(transport))
+		return SCTP_DISPOSITION_CONSUME;
+
+	reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+	if (!reply)
+		return SCTP_DISPOSITION_NOMEM;
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
 	sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
 			SCTP_TRANSPORT(transport));
 
@@ -1260,6 +1267,15 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
 	if (hbinfo->hb_nonce != link->hb_nonce)
 		return SCTP_DISPOSITION_DISCARD;
 
+	if (hbinfo->probe_size) {
+		if (hbinfo->probe_size != link->pl.probe_size ||
+		    !sctp_transport_pl_enabled(link))
+			return SCTP_DISPOSITION_DISCARD;
+
+		/* The actual handling will be performed here in a later patch. */
+		return SCTP_DISPOSITION_CONSUME;
+	}
+
 	max_interval = link->hbinterval + link->rto;
 
 	/* Check if the timestamp looks valid.  */

From 1dc68c194571acc4027de5f8378227d0c0ff7e13 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:53 -0400
Subject: [PATCH 2628/3292] sctp: do state transition when PROBE_COUNT ==
 MAX_PROBES on HB send path

The state transition is described in rfc8899#section-5.2,
PROBE_COUNT == MAX_PROBES means the probe fails for MAX times, and the
state transition includes:

  - Base -> Error, occurs when BASE_PLPMTU Confirmation Fails,
    pl.pmtu is set to SCTP_MIN_PLPMTU,
    probe_size is still SCTP_BASE_PLPMTU;

  - Search -> Base, occurs when Black Hole Detected,
    pl.pmtu is set to SCTP_BASE_PLPMTU,
    probe_size is set back to SCTP_BASE_PLPMTU;

  - Search Complete -> Base, occurs when Black Hole Detected
    pl.pmtu is set to SCTP_BASE_PLPMTU,
    probe_size is set back to SCTP_BASE_PLPMTU;

Note a black hole is encountered when a sender is unaware that packets
are not being delivered to the destination endpoint. So it includes the
probe failures with equal probe_size to pl.pmtu, and definitely not
include that with greater probe_size than pl.pmtu. The later one is the
normal probe failure where probe_size should decrease back to pl.pmtu
and pl.probe_high is set.  pl.probe_high would be used on HB ACK recv
path in the next patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 net/sctp/sm_statefuns.c    |  2 ++
 net/sctp/transport.c       | 44 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f7b056f5af37..31165720b28a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1023,6 +1023,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
+void sctp_transport_pl_send(struct sctp_transport *t);
 
 
 /* This is the structure we use to queue packets as they come into
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8edb9186112a..66c409e5b47c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1109,6 +1109,8 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net,
 	if (!sctp_transport_pl_enabled(transport))
 		return SCTP_DISPOSITION_CONSUME;
 
+	sctp_transport_pl_send(transport);
+
 	reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
 	if (!reply)
 		return SCTP_DISPOSITION_NOMEM;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ca3343c2c80e..99620d86e317 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -261,6 +261,50 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
+void sctp_transport_pl_send(struct sctp_transport *t)
+{
+	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+	if (t->pl.probe_count < SCTP_MAX_PROBES) {
+		t->pl.probe_count++;
+		return;
+	}
+
+	if (t->pl.state == SCTP_PL_BASE) {
+		if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
+			t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+			t->pl.pmtu = SCTP_MIN_PLPMTU;
+			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+			sctp_assoc_sync_pmtu(t->asoc);
+		}
+	} else if (t->pl.state == SCTP_PL_SEARCH) {
+		if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+			t->pl.state = SCTP_PL_BASE;  /* Search -> Base */
+			t->pl.probe_size = SCTP_BASE_PLPMTU;
+			t->pl.probe_high = 0;
+
+			t->pl.pmtu = SCTP_BASE_PLPMTU;
+			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+			sctp_assoc_sync_pmtu(t->asoc);
+		} else { /* Normal probe failure. */
+			t->pl.probe_high = t->pl.probe_size;
+			t->pl.probe_size = t->pl.pmtu;
+		}
+	} else if (t->pl.state == SCTP_PL_COMPLETE) {
+		if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+			t->pl.state = SCTP_PL_BASE;  /* Search Complete -> Base */
+			t->pl.probe_size = SCTP_BASE_PLPMTU;
+
+			t->pl.pmtu = SCTP_BASE_PLPMTU;
+			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+			sctp_assoc_sync_pmtu(t->asoc);
+		}
+	}
+	t->pl.probe_count = 1;
+}
+
 bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
 	struct dst_entry *dst = sctp_transport_dst_check(t);

From b87641aff9e772fda15d3386d159646eada2ceef Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:54 -0400
Subject: [PATCH 2629/3292] sctp: do state transition when a probe succeeds on
 HB ACK recv path

As described in rfc8899#section-5.2, when a probe succeeds, there might
be the following state transitions:

  - Base -> Search, occurs when probe succeeds with BASE_PLPMTU,
    pl.pmtu is not changing,
    pl.probe_size increases by SCTP_PL_BIG_STEP,

  - Error -> Search, occurs when probe succeeds with BASE_PLPMTU,
    pl.pmtu is changed from SCTP_MIN_PLPMTU to SCTP_BASE_PLPMTU,
    pl.probe_size increases by SCTP_PL_BIG_STEP.

  - Search -> Search Complete, occurs when probe succeeds with the probe
    size SCTP_MAX_PLPMTU less than pl.probe_high,
    pl.pmtu is not changing, but update *pathmtu* with it,
    pl.probe_size is set back to pl.pmtu to double check it.

  - Search Complete -> Search, occurs when probe succeeds with the probe
    size equal to pl.pmtu,
    pl.pmtu is not changing,
    pl.probe_size increases by SCTP_PL_MIN_STEP.

So search process can be described as:

 1. When it just enters 'Search' state, *pathmtu* is not updated with
    pl.pmtu, and probe_size increases by a big step (SCTP_PL_BIG_STEP)
    each round.

 2. Until pl.probe_high is set when a probe fails, and probe_size
    decreases back to pl.pmtu, as described in the last patch.

 3. When the probe with the new size succeeds, probe_size changes to
    increase by a small step (SCTP_PL_MIN_STEP) due to pl.probe_high
    is set.

 4. Until probe_size is next to pl.probe_high, the searching finishes and
    it goes to 'Complete' state and updates *pathmtu* with pl.pmtu, and
    then probe_size is set to pl.pmtu to confirm by once more probe.

 5. This probe occurs after "30 * probe_inteval", a much longer time than
    that in Search state. Once it is done it goes to 'Search' state again
    with probe_size increased by SCTP_PL_MIN_STEP.

As we can see above, during the searching, pl.pmtu changes while *pathmtu*
doesn't. *pathmtu* is only updated when the search finishes by which it
gets an optimal value for it. A big step is used at the beginning until
it gets close to the optimal value, then it changes to a small step until
it has this optimal value.

The small step is also used in 'Complete' until it goes to 'Search' state
again and the probe with 'pmtu + the small step' succeeds, which means a
higher size could be used. Then probe_size changes to increase by a big
step again until it gets close to the next optimal value.

Note that anytime when black hole is detected, it goes directly to 'Base'
state with pl.pmtu set to SCTP_BASE_PLPMTU, as described in the last patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 net/sctp/sm_statefuns.c    |  2 +-
 net/sctp/transport.c       | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 31165720b28a..9eaa701cda23 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1024,6 +1024,7 @@ void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
 void sctp_transport_pl_send(struct sctp_transport *t);
+void sctp_transport_pl_recv(struct sctp_transport *t);
 
 
 /* This is the structure we use to queue packets as they come into
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 66c409e5b47c..d29b579da904 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1274,7 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
 		    !sctp_transport_pl_enabled(link))
 			return SCTP_DISPOSITION_DISCARD;
 
-		/* The actual handling will be performed here in a later patch. */
+		sctp_transport_pl_recv(link);
 		return SCTP_DISPOSITION_CONSUME;
 	}
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 99620d86e317..79ff5ca6b472 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -305,6 +305,44 @@ void sctp_transport_pl_send(struct sctp_transport *t)
 	t->pl.probe_count = 1;
 }
 
+void sctp_transport_pl_recv(struct sctp_transport *t)
+{
+	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+	t->pl.pmtu = t->pl.probe_size;
+	t->pl.probe_count = 0;
+	if (t->pl.state == SCTP_PL_BASE) {
+		t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */
+		t->pl.probe_size += SCTP_PL_BIG_STEP;
+	} else if (t->pl.state == SCTP_PL_ERROR) {
+		t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */
+
+		t->pl.pmtu = t->pl.probe_size;
+		t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+		sctp_assoc_sync_pmtu(t->asoc);
+		t->pl.probe_size += SCTP_PL_BIG_STEP;
+	} else if (t->pl.state == SCTP_PL_SEARCH) {
+		if (!t->pl.probe_high) {
+			t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+					       SCTP_MAX_PLPMTU);
+			return;
+		}
+		t->pl.probe_size += SCTP_PL_MIN_STEP;
+		if (t->pl.probe_size >= t->pl.probe_high) {
+			t->pl.probe_high = 0;
+			t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */
+
+			t->pl.probe_size = t->pl.pmtu;
+			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+			sctp_assoc_sync_pmtu(t->asoc);
+		}
+	} else if (t->pl.state == SCTP_PL_COMPLETE) {
+		t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+		t->pl.probe_size += SCTP_PL_MIN_STEP;
+	}
+}
+
 bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
 	struct dst_entry *dst = sctp_transport_dst_check(t);

From 83696408317735d105ad86a5470b39879ad2ec4d Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:55 -0400
Subject: [PATCH 2630/3292] sctp: do state transition when receiving an icmp
 TOOBIG packet

PLPMTUD will short-circuit the old process for icmp TOOBIG packets.
This part is described in rfc8899#section-4.6.2 (PL_PTB_SIZE =
PTB_SIZE - other_headers_len). Note that from rfc8899#section-5.2
State Machine, each case below is for some specific states only:

  a) PL_PTB_SIZE < MIN_PLPMTU || PL_PTB_SIZE >= PROBED_SIZE,
     discard it, for any state

  b) MIN_PLPMTU < PL_PTB_SIZE < BASE_PLPMTU,
     Base -> Error, for Base state

  c) BASE_PLPMTU <= PL_PTB_SIZE < PLPMTU,
     Search -> Base or Complete -> Base, for Search and Complete states.

  d) PLPMTU < PL_PTB_SIZE < PROBED_SIZE,
     set pl.probe_size to PL_PTB_SIZE then verify it, for Search state.

The most important one is case d), which will help find the optimal
fast during searching. Like when pathmtu = 1392 for SCTP over IPv4,
the search will be (20 is iphdr_len):

  1. probe with 1200 - 20
  2. probe with 1232 - 20
  3. probe with 1264 - 20
  ...
  7. probe with 1388 - 20
  8. probe with 1420 - 20

When sending the probe with 1420 - 20, TOOBIG may come with PL_PTB_SIZE =
1392 - 20. Then it matches case d), and saves some rounds to try with the
1392 - 20 probe. But of course, PLPMTUD doesn't trust TOOBIG packets, and
it will go back to the common searching once the probe with the new size
can't be verified.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c     |  4 +++-
 net/sctp/transport.c | 51 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index d508f6f3dd08..9ffdbd6526e9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -385,7 +385,9 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 			   struct sctp_transport *t, __u32 pmtu)
 {
-	if (!t || (t->pathmtu <= pmtu))
+	if (!t ||
+	    (t->pathmtu <= pmtu &&
+	     t->pl.probe_size + sctp_transport_pl_hlen(t) <= pmtu))
 		return;
 
 	if (sock_owned_by_user(sk)) {
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 79ff5ca6b472..5cefb4eab8a0 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -343,10 +343,55 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
 	}
 }
 
+static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
+{
+	pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n",
+		 __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu);
+
+	if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size)
+		return false;
+
+	if (t->pl.state == SCTP_PL_BASE) {
+		if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) {
+			t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+			t->pl.pmtu = SCTP_MIN_PLPMTU;
+			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+		}
+	} else if (t->pl.state == SCTP_PL_SEARCH) {
+		if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+			t->pl.state = SCTP_PL_BASE;  /* Search -> Base */
+			t->pl.probe_size = SCTP_BASE_PLPMTU;
+			t->pl.probe_count = 0;
+
+			t->pl.probe_high = 0;
+			t->pl.pmtu = SCTP_BASE_PLPMTU;
+			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+		} else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) {
+			t->pl.probe_size = pmtu;
+			t->pl.probe_count = 0;
+
+			return false;
+		}
+	} else if (t->pl.state == SCTP_PL_COMPLETE) {
+		if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+			t->pl.state = SCTP_PL_BASE;  /* Complete -> Base */
+			t->pl.probe_size = SCTP_BASE_PLPMTU;
+			t->pl.probe_count = 0;
+
+			t->pl.probe_high = 0;
+			t->pl.pmtu = SCTP_BASE_PLPMTU;
+			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+		}
+	}
+
+	return true;
+}
+
 bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
-	struct dst_entry *dst = sctp_transport_dst_check(t);
 	struct sock *sk = t->asoc->base.sk;
+	struct dst_entry *dst;
 	bool change = true;
 
 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
@@ -357,6 +402,10 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 	}
 	pmtu = SCTP_TRUNC4(pmtu);
 
+	if (sctp_transport_pl_enabled(t))
+		return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t));
+
+	dst = sctp_transport_dst_check(t);
 	if (dst) {
 		struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
 		union sctp_addr addr;

From 7307e4fa4d295f6dc017fe4b19467c486a1275d7 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:56 -0400
Subject: [PATCH 2631/3292] sctp: enable PLPMTUD when the transport is ready

sctp_transport_pl_reset() is called whenever any of these 3 members in
transport is changed:

  - probe_interval
  - param_flags & SPP_PMTUD_ENABLE
  - state == ACTIVE

If all are true, start the PLPMTUD when it's not yet started. If any of
these is false, stop the PLPMTUD when it's already running.

sctp_transport_pl_update() is called when the transport dst has changed.
It will restart the PLPMTUD probe. Again, the pathmtu won't change but
use the dst's mtu until the Search phase is done.

Note that after using PLPMTUD, the pathmtu is only initialized with the
dst mtu when the transport dst changes. At other time it is updated by
pl.pmtu. So sctp_transport_pmtu_check() will be called only when PLPMTUD
is disabled in sctp_packet_config().

After this patch, the PLPMTUD feature from RFC8899 will be activated
and can be used by users.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 4 ++++
 net/sctp/output.c    | 3 ++-
 net/sctp/socket.c    | 6 +++++-
 net/sctp/transport.c | 2 ++
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index e01895edd3a4..be29da09cc7a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -716,6 +716,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 		return NULL;
 	}
 
+	sctp_transport_pl_reset(peer);
+
 	/* Attach the remote transport to our asoc.  */
 	list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
 	asoc->peer.transport_count++;
@@ -814,6 +816,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 			spc_state = SCTP_ADDR_CONFIRMED;
 
 		transport->state = SCTP_ACTIVE;
+		sctp_transport_pl_reset(transport);
 		break;
 
 	case SCTP_TRANSPORT_DOWN:
@@ -823,6 +826,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 		 */
 		if (transport->state != SCTP_UNCONFIRMED) {
 			transport->state = SCTP_INACTIVE;
+			sctp_transport_pl_reset(transport);
 			spc_state = SCTP_ADDR_UNREACHABLE;
 		} else {
 			sctp_transport_dst_release(transport);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index ceefb0616d9d..9032ce60d50e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -103,7 +103,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 		sctp_transport_route(tp, NULL, sp);
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
 			sctp_assoc_sync_pmtu(asoc);
-	} else if (!sctp_transport_pmtu_check(tp)) {
+	} else if (!sctp_transport_pl_enabled(tp) &&
+		   !sctp_transport_pmtu_check(tp)) {
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
 			sctp_assoc_sync_pmtu(asoc);
 	}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aba576f53458..e64e01f61b11 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2496,6 +2496,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 				sctp_transport_pmtu(trans, sctp_opt2sk(sp));
 				sctp_assoc_sync_pmtu(asoc);
 			}
+			sctp_transport_pl_reset(trans);
 		} else if (asoc) {
 			asoc->param_flags =
 				(asoc->param_flags & ~SPP_PMTUD) | pmtud_change;
@@ -4506,6 +4507,7 @@ static int sctp_setsockopt_probe_interval(struct sock *sk,
 			return -EINVAL;
 
 		t->probe_interval = msecs_to_jiffies(probe_interval);
+		sctp_transport_pl_reset(t);
 		return 0;
 	}
 
@@ -4522,8 +4524,10 @@ static int sctp_setsockopt_probe_interval(struct sock *sk,
 	 * each transport.
 	 */
 	if (asoc) {
-		list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
+		list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
 			t->probe_interval = msecs_to_jiffies(probe_interval);
+			sctp_transport_pl_reset(t);
+		}
 
 		asoc->probe_interval = msecs_to_jiffies(probe_interval);
 		return 0;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 5cefb4eab8a0..f27b856ea8ce 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -259,6 +259,8 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 		transport->pathmtu = sctp_dst_mtu(transport->dst);
 	else
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
+
+	sctp_transport_pl_update(transport);
 }
 
 void sctp_transport_pl_send(struct sctp_transport *t)

From 237a6a2e318c1ed7429e72f2640054bdda91646f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:57 -0400
Subject: [PATCH 2632/3292] sctp: remove the unessessary hold for idev in
 sctp_v6_err

Same as in tcp_v6_err() and __udp6_lib_err(), there's no need to
hold idev in sctp_v6_err(), so just call __in6_dev_get() instead.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index bd08807c9e44..50ed4de18069 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -126,7 +126,6 @@ static struct notifier_block sctp_inet6addr_notifier = {
 static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			u8 type, u8 code, int offset, __be32 info)
 {
-	struct inet6_dev *idev;
 	struct sock *sk;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
@@ -135,8 +134,6 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	int err, ret = 0;
 	struct net *net = dev_net(skb->dev);
 
-	idev = in6_dev_get(skb->dev);
-
 	/* Fix up skb to look at the embedded net header. */
 	saveip	 = skb->network_header;
 	savesctp = skb->transport_header;
@@ -147,9 +144,8 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	skb->network_header   = saveip;
 	skb->transport_header = savesctp;
 	if (!sk) {
-		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
-		ret = -ENOENT;
-		goto out;
+		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		return -ENOENT;
 	}
 
 	/* Warning:  The sock lock is held.  Remember to call
@@ -185,10 +181,6 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 out_unlock:
 	sctp_err_finish(sk, transport);
-out:
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
-
 	return ret;
 }
 

From f6549bd37b927655c6fecad88428a731cd8a4a34 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:58 -0400
Subject: [PATCH 2633/3292] sctp: extract sctp_v6_err_handle function from
 sctp_v6_err

This patch is to extract sctp_v6_err_handle() from sctp_v6_err() to
only handle the icmp err after the sock lookup, and it also makes
the code clearer.

sctp_v6_err_handle() will be used in sctp over udp's err handling
in the following patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 84 ++++++++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 50ed4de18069..6ad422f2d0d0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,17 +122,51 @@ static struct notifier_block sctp_inet6addr_notifier = {
 	.notifier_call = sctp_inet6addr_event,
 };
 
+static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+			       __u8 type, __u8 code, __u32 info)
+{
+	struct sctp_association *asoc = t->asoc;
+	struct sock *sk = asoc->base.sk;
+	struct ipv6_pinfo *np;
+	int err = 0;
+
+	switch (type) {
+	case ICMPV6_PKT_TOOBIG:
+		if (ip6_sk_accept_pmtu(sk))
+			sctp_icmp_frag_needed(sk, asoc, t, info);
+		return;
+	case ICMPV6_PARAMPROB:
+		if (ICMPV6_UNK_NEXTHDR == code) {
+			sctp_icmp_proto_unreachable(sk, asoc, t);
+			return;
+		}
+		break;
+	case NDISC_REDIRECT:
+		sctp_icmp_redirect(sk, t, skb);
+		return;
+	default:
+		break;
+	}
+
+	np = inet6_sk(sk);
+	icmpv6_err_convert(type, code, &err);
+	if (!sock_owned_by_user(sk) && np->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else {
+		sk->sk_err_soft = err;
+	}
+}
+
 /* ICMP error handler. */
 static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-			u8 type, u8 code, int offset, __be32 info)
+		       u8 type, u8 code, int offset, __be32 info)
 {
-	struct sock *sk;
-	struct sctp_association *asoc;
-	struct sctp_transport *transport;
-	struct ipv6_pinfo *np;
-	__u16 saveip, savesctp;
-	int err, ret = 0;
 	struct net *net = dev_net(skb->dev);
+	struct sctp_transport *transport;
+	struct sctp_association *asoc;
+	__u16 saveip, savesctp;
+	struct sock *sk;
 
 	/* Fix up skb to look at the embedded net header. */
 	saveip	 = skb->network_header;
@@ -148,40 +182,10 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return -ENOENT;
 	}
 
-	/* Warning:  The sock lock is held.  Remember to call
-	 * sctp_err_finish!
-	 */
-
-	switch (type) {
-	case ICMPV6_PKT_TOOBIG:
-		if (ip6_sk_accept_pmtu(sk))
-			sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info));
-		goto out_unlock;
-	case ICMPV6_PARAMPROB:
-		if (ICMPV6_UNK_NEXTHDR == code) {
-			sctp_icmp_proto_unreachable(sk, asoc, transport);
-			goto out_unlock;
-		}
-		break;
-	case NDISC_REDIRECT:
-		sctp_icmp_redirect(sk, transport, skb);
-		goto out_unlock;
-	default:
-		break;
-	}
-
-	np = inet6_sk(sk);
-	icmpv6_err_convert(type, code, &err);
-	if (!sock_owned_by_user(sk) && np->recverr) {
-		sk->sk_err = err;
-		sk->sk_error_report(sk);
-	} else {  /* Only an error on timeout */
-		sk->sk_err_soft = err;
-	}
-
-out_unlock:
+	sctp_v6_err_handle(transport, skb, type, code, ntohl(info));
 	sctp_err_finish(sk, transport);
-	return ret;
+
+	return 0;
 }
 
 static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)

From d83060759a652ccb64d7486fe38c8347b4a64048 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:04:59 -0400
Subject: [PATCH 2634/3292] sctp: extract sctp_v4_err_handle function from
 sctp_v4_err

This patch is to extract sctp_v4_err_handle() from sctp_v4_err() to
only handle the icmp err after the sock lookup, and it also makes
the code clearer.

sctp_v4_err_handle() will be used in sctp over udp's err handling
in the following patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 108 ++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 58 deletions(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 9ffdbd6526e9..83d58d42ea45 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -556,6 +556,49 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
 	sctp_transport_put(t);
 }
 
+static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+			       __u8 type, __u8 code, __u32 info)
+{
+	struct sctp_association *asoc = t->asoc;
+	struct sock *sk = asoc->base.sk;
+	int err = 0;
+
+	switch (type) {
+	case ICMP_PARAMETERPROB:
+		err = EPROTO;
+		break;
+	case ICMP_DEST_UNREACH:
+		if (code > NR_ICMP_UNREACH)
+			return;
+		if (code == ICMP_FRAG_NEEDED) {
+			sctp_icmp_frag_needed(sk, asoc, t, SCTP_TRUNC4(info));
+			return;
+		}
+		if (code == ICMP_PROT_UNREACH) {
+			sctp_icmp_proto_unreachable(sk, asoc, t);
+			return;
+		}
+		err = icmp_err_convert[code].errno;
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code == ICMP_EXC_FRAGTIME)
+			return;
+
+		err = EHOSTUNREACH;
+		break;
+	case ICMP_REDIRECT:
+		sctp_icmp_redirect(sk, t, skb);
+	default:
+		return;
+	}
+	if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	} else {  /* Only an error on timeout */
+		sk->sk_err_soft = err;
+	}
+}
+
 /*
  * This routine is called by the ICMP module when it gets some
  * sort of error condition.  If err < 0 then the socket should
@@ -574,22 +617,19 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
 int sctp_v4_err(struct sk_buff *skb, __u32 info)
 {
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
-	const int ihlen = iph->ihl * 4;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
-	struct sock *sk;
-	struct sctp_association *asoc = NULL;
-	struct sctp_transport *transport;
-	struct inet_sock *inet;
-	__u16 saveip, savesctp;
-	int err;
 	struct net *net = dev_net(skb->dev);
+	struct sctp_transport *transport;
+	struct sctp_association *asoc;
+	__u16 saveip, savesctp;
+	struct sock *sk;
 
 	/* Fix up skb to look at the embedded net header. */
 	saveip = skb->network_header;
 	savesctp = skb->transport_header;
 	skb_reset_network_header(skb);
-	skb_set_transport_header(skb, ihlen);
+	skb_set_transport_header(skb, iph->ihl * 4);
 	sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original values. */
 	skb->network_header = saveip;
@@ -598,58 +638,10 @@ int sctp_v4_err(struct sk_buff *skb, __u32 info)
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return -ENOENT;
 	}
-	/* Warning:  The sock lock is held.  Remember to call
-	 * sctp_err_finish!
-	 */
 
-	switch (type) {
-	case ICMP_PARAMETERPROB:
-		err = EPROTO;
-		break;
-	case ICMP_DEST_UNREACH:
-		if (code > NR_ICMP_UNREACH)
-			goto out_unlock;
-
-		/* PMTU discovery (RFC1191) */
-		if (ICMP_FRAG_NEEDED == code) {
-			sctp_icmp_frag_needed(sk, asoc, transport,
-					      SCTP_TRUNC4(info));
-			goto out_unlock;
-		} else {
-			if (ICMP_PROT_UNREACH == code) {
-				sctp_icmp_proto_unreachable(sk, asoc,
-							    transport);
-				goto out_unlock;
-			}
-		}
-		err = icmp_err_convert[code].errno;
-		break;
-	case ICMP_TIME_EXCEEDED:
-		/* Ignore any time exceeded errors due to fragment reassembly
-		 * timeouts.
-		 */
-		if (ICMP_EXC_FRAGTIME == code)
-			goto out_unlock;
-
-		err = EHOSTUNREACH;
-		break;
-	case ICMP_REDIRECT:
-		sctp_icmp_redirect(sk, transport, skb);
-		/* Fall through to out_unlock. */
-	default:
-		goto out_unlock;
-	}
-
-	inet = inet_sk(sk);
-	if (!sock_owned_by_user(sk) && inet->recverr) {
-		sk->sk_err = err;
-		sk->sk_error_report(sk);
-	} else {  /* Only an error on timeout */
-		sk->sk_err_soft = err;
-	}
-
-out_unlock:
+	sctp_v4_err_handle(transport, skb, type, code, info);
 	sctp_err_finish(sk, transport);
+
 	return 0;
 }
 

From 9e47df005cab63e545671dba8dfd6852fff1c2cf Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 22 Jun 2021 14:05:00 -0400
Subject: [PATCH 2635/3292] sctp: process sctp over udp icmp err on sctp side

Previously, sctp over udp was using udp tunnel's icmp err process, which
only does sk lookup on sctp side. However for sctp's icmp error process,
there are more things to do, like syncing assoc pmtu/retransmit packets
for toobig type err, and starting proto_unreach_timer for unreach type
err etc.

Now after adding PLPMTUD, which also requires to process toobig type err
on sctp side. This patch is to process icmp err on sctp side by parsing
the type/code/info in .encap_err_lookup and call sctp's icmp processing
functions. Note as the 'redirect' err process needs to know the outer
ip(v6) header's, we have to leave it to udp(v6)_err to handle it.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h |  2 ++
 net/sctp/input.c        | 30 ++++++++++++++++++++++++++++++
 net/sctp/ipv6.c         | 30 ++++++++++++++++++++++++++++++
 net/sctp/protocol.c     | 21 ++-------------------
 4 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index f7e083602c10..69bab88ad66b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -145,6 +145,8 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
 			     struct sctphdr *, struct sctp_association **,
 			     struct sctp_transport **);
 void sctp_err_finish(struct sock *, struct sctp_transport *);
+int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb);
+int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb);
 void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
 			   struct sctp_transport *t, __u32 pmtu);
 void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 83d58d42ea45..fe6429cc012f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -645,6 +645,36 @@ int sctp_v4_err(struct sk_buff *skb, __u32 info)
 	return 0;
 }
 
+int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb)
+{
+	struct net *net = dev_net(skb->dev);
+	struct sctp_association *asoc;
+	struct sctp_transport *t;
+	struct icmphdr *hdr;
+	__u32 info = 0;
+
+	skb->transport_header += sizeof(struct udphdr);
+	sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &t);
+	if (!sk) {
+		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+		return -ENOENT;
+	}
+
+	skb->transport_header -= sizeof(struct udphdr);
+	hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr));
+	if (hdr->type == ICMP_REDIRECT) {
+		/* can't be handled without outer iphdr known, leave it to udp_err */
+		sctp_err_finish(sk, t);
+		return 0;
+	}
+	if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED)
+		info = ntohs(hdr->un.frag.mtu);
+	sctp_v4_err_handle(t, skb, hdr->type, hdr->code, info);
+
+	sctp_err_finish(sk, t);
+	return 1;
+}
+
 /*
  * RFC 2960, 8.4 - Handle "Out of the blue" Packets.
  *
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 6ad422f2d0d0..05f81a4d0ee7 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -188,6 +188,36 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	return 0;
 }
 
+int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb)
+{
+	struct net *net = dev_net(skb->dev);
+	struct sctp_association *asoc;
+	struct sctp_transport *t;
+	struct icmp6hdr *hdr;
+	__u32 info = 0;
+
+	skb->transport_header += sizeof(struct udphdr);
+	sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &t);
+	if (!sk) {
+		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+		return -ENOENT;
+	}
+
+	skb->transport_header -= sizeof(struct udphdr);
+	hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr));
+	if (hdr->icmp6_type == NDISC_REDIRECT) {
+		/* can't be handled without outer ip6hdr known, leave it to udpv6_err */
+		sctp_err_finish(sk, t);
+		return 0;
+	}
+	if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG)
+		info = ntohl(hdr->icmp6_mtu);
+	sctp_v6_err_handle(t, skb, hdr->icmp6_type, hdr->icmp6_code, info);
+
+	sctp_err_finish(sk, t);
+	return 1;
+}
+
 static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
 {
 	struct dst_entry *dst = dst_clone(t->dst);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index baa4e770e4ba..bc5db0b404ce 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -850,23 +850,6 @@ static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-static int sctp_udp_err_lookup(struct sock *sk, struct sk_buff *skb)
-{
-	struct sctp_association *asoc;
-	struct sctp_transport *t;
-	int family;
-
-	skb->transport_header += sizeof(struct udphdr);
-	family = (ip_hdr(skb)->version == 4) ? AF_INET : AF_INET6;
-	sk = sctp_err_lookup(dev_net(skb->dev), family, skb, sctp_hdr(skb),
-			     &asoc, &t);
-	if (!sk)
-		return -ENOENT;
-
-	sctp_err_finish(sk, t);
-	return 0;
-}
-
 int sctp_udp_sock_start(struct net *net)
 {
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
@@ -885,7 +868,7 @@ int sctp_udp_sock_start(struct net *net)
 
 	tuncfg.encap_type = 1;
 	tuncfg.encap_rcv = sctp_udp_rcv;
-	tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+	tuncfg.encap_err_lookup = sctp_udp_v4_err;
 	setup_udp_tunnel_sock(net, sock, &tuncfg);
 	net->sctp.udp4_sock = sock->sk;
 
@@ -907,7 +890,7 @@ int sctp_udp_sock_start(struct net *net)
 
 	tuncfg.encap_type = 1;
 	tuncfg.encap_rcv = sctp_udp_rcv;
-	tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+	tuncfg.encap_err_lookup = sctp_udp_v6_err;
 	setup_udp_tunnel_sock(net, sock, &tuncfg);
 	net->sctp.udp6_sock = sock->sk;
 #endif

From 28a5501c3383f0e6643012c187b7c2027ef42aea Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Wed, 16 Jun 2021 10:09:01 +0800
Subject: [PATCH 2636/3292] ieee802154: hwsim: Fix memory leak in hwsim_add_one

No matter from hwsim_remove or hwsim_del_radio_nl, hwsim_del fails to
remove the entry in the edges list. Take the example below, phy0, phy1
and e0 will be deleted, resulting in e1 not freed and accessed in the
future.

              hwsim_phys
                  |
    ------------------------------
    |                            |
phy0 (edges)                 phy1 (edges)
   ----> e1 (idx = 1)             ----> e0 (idx = 0)

Fix this by deleting and freeing all the entries in the edges list
between hwsim_edge_unsubscribe_me and list_del(&phy->list).

Reported-by: syzbot+b80c9959009a9325cdff@syzkaller.appspotmail.com
Fixes: 1c9f4a3fce77 ("ieee802154: hwsim: fix rcu handling")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20210616020901.2759466-1-mudongliangabcd@gmail.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/mac802154_hwsim.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index 366eaae3550a..baa7e21b7f4f 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -824,12 +824,17 @@ err_pib:
 static void hwsim_del(struct hwsim_phy *phy)
 {
 	struct hwsim_pib *pib;
+	struct hwsim_edge *e;
 
 	hwsim_edge_unsubscribe_me(phy);
 
 	list_del(&phy->list);
 
 	rcu_read_lock();
+	list_for_each_entry_rcu(e, &phy->edges, list) {
+		list_del_rcu(&e->list);
+		hwsim_free_edge(e);
+	}
 	pib = rcu_dereference(phy->pib);
 	rcu_read_unlock();
 

From 0303b30375dff5351a79cc2c3c87dfa4fda29bed Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Jun 2021 11:02:44 -0700
Subject: [PATCH 2637/3292] ieee802154: hwsim: avoid possible crash in
 hwsim_del_edge_nl()

Both MAC802154_HWSIM_ATTR_RADIO_ID and MAC802154_HWSIM_ATTR_RADIO_EDGE
must be present to avoid a crash.

Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Stefan Schmidt <stefan@datenfreihafen.org>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20210621180244.882076-1-eric.dumazet@gmail.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/mac802154_hwsim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index baa7e21b7f4f..ebc976b7fcc2 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -480,7 +480,7 @@ static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info)
 	struct hwsim_edge *e;
 	u32 v0, v1;
 
-	if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] &&
+	if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] ||
 	    !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE])
 		return -EINVAL;
 

From d2f77960e5b03b2d373252b2ee150a4a14010f99 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 22 Jun 2021 12:25:18 -0700
Subject: [PATCH 2638/3292] mptcp: add sysctl allow_join_initial_addr_port

This patch added a new sysctl, named allow_join_initial_addr_port, to
control whether allow peers to send join requests to the IP address and
port number used by the initial subflow.

Suggested-by: Florian Westphal <fw@strlen.de>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/mptcp-sysctl.rst | 13 +++++++++++++
 net/mptcp/ctrl.c                          | 16 ++++++++++++++++
 net/mptcp/protocol.h                      |  1 +
 3 files changed, 30 insertions(+)

diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index ee06fd782465..76d939e688b8 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -32,3 +32,16 @@ checksum_enabled - BOOLEAN
 	per-namespace sysctl.
 
 	Default: 0
+
+allow_join_initial_addr_port - BOOLEAN
+	Allow peers to send join requests to the IP address and port number used
+	by the initial subflow if the value is 1. This controls a flag that is
+	sent to the peer at connection time, and whether such join requests are
+	accepted or denied.
+
+	Joins to addresses advertised with ADD_ADDR are not affected by this
+	value.
+
+	This is a per-namespace sysctl.
+
+	Default: 1
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 6c2639bb9c19..7d738bd06f2c 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -24,6 +24,7 @@ struct mptcp_pernet {
 	u8 mptcp_enabled;
 	unsigned int add_addr_timeout;
 	u8 checksum_enabled;
+	u8 allow_join_initial_addr_port;
 };
 
 static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
@@ -46,11 +47,17 @@ int mptcp_is_checksum_enabled(struct net *net)
 	return mptcp_get_pernet(net)->checksum_enabled;
 }
 
+int mptcp_allow_join_id0(struct net *net)
+{
+	return mptcp_get_pernet(net)->allow_join_initial_addr_port;
+}
+
 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 {
 	pernet->mptcp_enabled = 1;
 	pernet->add_addr_timeout = TCP_RTO_MAX;
 	pernet->checksum_enabled = 0;
+	pernet->allow_join_initial_addr_port = 1;
 }
 
 #ifdef CONFIG_SYSCTL
@@ -80,6 +87,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
 		.extra1       = SYSCTL_ZERO,
 		.extra2       = SYSCTL_ONE
 	},
+	{
+		.procname = "allow_join_initial_addr_port",
+		.maxlen = sizeof(u8),
+		.mode = 0644,
+		.proc_handler = proc_dou8vec_minmax,
+		.extra1       = SYSCTL_ZERO,
+		.extra2       = SYSCTL_ONE
+	},
 	{}
 };
 
@@ -98,6 +113,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
 	table[0].data = &pernet->mptcp_enabled;
 	table[1].data = &pernet->add_addr_timeout;
 	table[2].data = &pernet->checksum_enabled;
+	table[3].data = &pernet->allow_join_initial_addr_port;
 
 	hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
 	if (!hdr)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 160c2ab09f19..9aab5fb54716 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -540,6 +540,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
 int mptcp_is_enabled(struct net *net);
 unsigned int mptcp_get_add_addr_timeout(struct net *net);
 int mptcp_is_checksum_enabled(struct net *net);
+int mptcp_allow_join_id0(struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 				     struct mptcp_options_received *mp_opt);
 bool mptcp_subflow_data_available(struct sock *sk);

From bab6b88e056038f618b2fb977d95b05ad3da8d0c Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 22 Jun 2021 12:25:19 -0700
Subject: [PATCH 2639/3292] mptcp: add allow_join_id0 in mptcp_out_options

This patch defined a new flag MPTCP_CAP_DENY_JOIN_ID0 for the third bit,
labeled "C" of the MP_CAPABLE option.

Add a new flag allow_join_id0 in struct mptcp_out_options. If this flag is
set, send out the MP_CAPABLE option with the flag MPTCP_CAP_DENY_JOIN_ID0.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h  | 3 ++-
 net/mptcp/options.c  | 6 ++++++
 net/mptcp/protocol.h | 6 ++++--
 net/mptcp/subflow.c  | 1 +
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index d61bbbf11979..cb580b06152f 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -67,7 +67,8 @@ struct mptcp_out_options {
 	u8 backup;
 	u8 reset_reason:4,
 	   reset_transient:1,
-	   csum_reqd:1;
+	   csum_reqd:1,
+	   allow_join_id0:1;
 	u32 nonce;
 	u64 thmac;
 	u32 token;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 25189595ed1d..7a4b6d0bf3f6 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -402,6 +402,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
 	if (subflow->request_mptcp) {
 		opts->suboptions = OPTION_MPTCP_MPC_SYN;
 		opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
+		opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
 		*size = TCPOLEN_MPTCP_MPC_SYN;
 		return true;
 	} else if (subflow->request_join) {
@@ -490,6 +491,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
 		opts->sndr_key = subflow->local_key;
 		opts->rcvr_key = subflow->remote_key;
 		opts->csum_reqd = READ_ONCE(msk->csum_enabled);
+		opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
 
 		/* Section 3.1.
 		 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
@@ -827,6 +829,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
 		opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
 		opts->sndr_key = subflow_req->local_key;
 		opts->csum_reqd = subflow_req->csum_reqd;
+		opts->allow_join_id0 = subflow_req->allow_join_id0;
 		*size = TCPOLEN_MPTCP_MPC_SYNACK;
 		pr_debug("subflow_req=%p, local_key=%llu",
 			 subflow_req, subflow_req->local_key);
@@ -1201,6 +1204,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 		if (opts->csum_reqd)
 			flag |= MPTCP_CAP_CHECKSUM_REQD;
 
+		if (!opts->allow_join_id0)
+			flag |= MPTCP_CAP_DENY_JOIN_ID0;
+
 		*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
 				      MPTCP_SUPPORTED_VERSION,
 				      flag);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 9aab5fb54716..f2326f6074b9 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -79,8 +79,9 @@
 #define MPTCP_VERSION_MASK	(0x0F)
 #define MPTCP_CAP_CHECKSUM_REQD	BIT(7)
 #define MPTCP_CAP_EXTENSIBILITY	BIT(6)
+#define MPTCP_CAP_DENY_JOIN_ID0	BIT(5)
 #define MPTCP_CAP_HMAC_SHA256	BIT(0)
-#define MPTCP_CAP_FLAG_MASK	(0x3F)
+#define MPTCP_CAP_FLAG_MASK	(0x1F)
 
 /* MPTCP DSS flags */
 #define MPTCP_DSS_DATA_FIN	BIT(4)
@@ -350,7 +351,8 @@ struct mptcp_subflow_request_sock {
 	u16	mp_capable : 1,
 		mp_join : 1,
 		backup : 1,
-		csum_reqd : 1;
+		csum_reqd : 1,
+		allow_join_id0 : 1;
 	u8	local_id;
 	u8	remote_id;
 	u64	local_key;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 585951e7e52f..e9e8ce862218 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -109,6 +109,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
 	subflow_req->mp_capable = 0;
 	subflow_req->mp_join = 0;
 	subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
+	subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
 	subflow_req->msk = NULL;
 	mptcp_token_init_request(req);
 }

From df377be38725ced628251c1a3b954ef932a5586e Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 22 Jun 2021 12:25:20 -0700
Subject: [PATCH 2640/3292] mptcp: add deny_join_id0 in mptcp_options_received

This patch added a new flag named deny_join_id0 in struct
mptcp_options_received. Set it when MP_CAPABLE with the flag
MPTCP_CAP_DENYJOIN_ID0 is received.

Also add a new flag remote_deny_join_id0 in struct mptcp_pm_data. When the
flag deny_join_id0 is set, set this remote_deny_join_id0 flag.

In mptcp_pm_create_subflow_or_signal_addr, if the remote_deny_join_id0 flag
is set, and the remote address id is zero, stop this connection.

Suggested-by: Florian Westphal <fw@strlen.de>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c    | 7 +++++++
 net/mptcp/pm.c         | 1 +
 net/mptcp/pm_netlink.c | 3 ++-
 net/mptcp/protocol.h   | 4 +++-
 net/mptcp/subflow.c    | 2 ++
 5 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 7a4b6d0bf3f6..a05270996613 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -83,6 +83,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		if (flags & MPTCP_CAP_CHECKSUM_REQD)
 			mp_opt->csum_reqd = 1;
 
+		if (flags & MPTCP_CAP_DENY_JOIN_ID0)
+			mp_opt->deny_join_id0 = 1;
+
 		mp_opt->mp_capable = 1;
 		if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
 			mp_opt->sndr_key = get_unaligned_be64(ptr);
@@ -360,6 +363,7 @@ void mptcp_get_options(const struct sock *sk,
 	mp_opt->mp_prio = 0;
 	mp_opt->reset = 0;
 	mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
+	mp_opt->deny_join_id0 = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -908,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
 		return false;
 	}
 
+	if (mp_opt->deny_join_id0)
+		WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+
 	if (unlikely(!READ_ONCE(msk->pm.server_side)))
 		pr_warn_once("bogus mpc option on established client sk");
 	mptcp_subflow_fully_established(subflow, mp_opt);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 9d00fa6d22e9..639271e09604 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -320,6 +320,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
 	WRITE_ONCE(msk->pm.addr_signal, 0);
 	WRITE_ONCE(msk->pm.accept_addr, false);
 	WRITE_ONCE(msk->pm.accept_subflow, false);
+	WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
 	msk->pm.status = 0;
 
 	spin_lock_init(&msk->pm.lock);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index d4732a4f223e..d2591ebf01d9 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -451,7 +451,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 
 	/* check if should create a new subflow */
 	if (msk->pm.local_addr_used < local_addr_max &&
-	    msk->pm.subflows < subflows_max) {
+	    msk->pm.subflows < subflows_max &&
+	    !READ_ONCE(msk->pm.remote_deny_join_id0)) {
 		local = select_local_address(pernet, msk);
 		if (local) {
 			struct mptcp_addr_info remote = { 0 };
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f2326f6074b9..f4eaa5f57e3f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -138,7 +138,8 @@ struct mptcp_options_received {
 		mp_prio : 1,
 		echo : 1,
 		csum_reqd : 1,
-		backup : 1;
+		backup : 1,
+		deny_join_id0 : 1;
 	u32	token;
 	u32	nonce;
 	u64	thmac;
@@ -193,6 +194,7 @@ struct mptcp_pm_data {
 	bool		work_pending;
 	bool		accept_addr;
 	bool		accept_subflow;
+	bool		remote_deny_join_id0;
 	u8		add_addr_signaled;
 	u8		add_addr_accepted;
 	u8		local_addr_used;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e9e8ce862218..d55f4ef736a5 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -408,6 +408,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 
 		if (mp_opt.csum_reqd)
 			WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+		if (mp_opt.deny_join_id0)
+			WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
 		subflow->mp_capable = 1;
 		subflow->can_ack = 1;
 		subflow->remote_key = mp_opt.sndr_key;

From 0cddb4a6f4e3a008d0a62be31829bdde21cf0530 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 22 Jun 2021 12:25:21 -0700
Subject: [PATCH 2641/3292] selftests: mptcp: add deny_join_id0 testcases

This patch added a new argument '-d' for mptcp_join.sh script, to invoke
the testcases for the MP_CAPABLE 'C' flag.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/mptcp/mptcp_join.sh | 75 ++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 523c7797f30a..9a191c1a5de8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -139,6 +139,17 @@ reset_with_checksum()
 	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
 }
 
+reset_with_allow_join_id0()
+{
+	local ns1_enable=$1
+	local ns2_enable=$2
+
+	reset
+
+	ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
+	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
+}
+
 ip -Version > /dev/null 2>&1
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without ip tool"
@@ -1462,6 +1473,63 @@ checksum_tests()
 	chk_csum_nr "checksum test 1 0"
 }
 
+deny_join_id0_tests()
+{
+	# subflow allow join id0 ns1
+	reset_with_allow_join_id0 1 0
+	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "single subflow allow join id0 ns1" 1 1 1
+
+	# subflow allow join id0 ns2
+	reset_with_allow_join_id0 0 1
+	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "single subflow allow join id0 ns2" 0 0 0
+
+	# signal address allow join id0 ns1
+	# ADD_ADDRs are not affected by allow_join_id0 value.
+	reset_with_allow_join_id0 1 0
+	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "signal address allow join id0 ns1" 1 1 1
+	chk_add_nr 1 1
+
+	# signal address allow join id0 ns2
+	# ADD_ADDRs are not affected by allow_join_id0 value.
+	reset_with_allow_join_id0 0 1
+	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "signal address allow join id0 ns2" 1 1 1
+	chk_add_nr 1 1
+
+	# subflow and address allow join id0 ns1
+	reset_with_allow_join_id0 1 0
+	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "subflow and address allow join id0 1" 2 2 2
+
+	# subflow and address allow join id0 ns2
+	reset_with_allow_join_id0 0 1
+	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "subflow and address allow join id0 2" 1 1 1
+}
+
 all_tests()
 {
 	subflows_tests
@@ -1476,6 +1544,7 @@ all_tests()
 	add_addr_ports_tests
 	syncookies_tests
 	checksum_tests
+	deny_join_id0_tests
 }
 
 usage()
@@ -1493,6 +1562,7 @@ usage()
 	echo "  -p add_addr_ports_tests"
 	echo "  -k syncookies_tests"
 	echo "  -S checksum_tests"
+	echo "  -d deny_join_id0_tests"
 	echo "  -c capture pcap files"
 	echo "  -C enable data checksum"
 	echo "  -h help"
@@ -1528,7 +1598,7 @@ if [ $do_all_tests -eq 1 ]; then
 	exit $ret
 fi
 
-while getopts 'fsltra64bpkchCS' opt; do
+while getopts 'fsltra64bpkdchCS' opt; do
 	case $opt in
 		f)
 			subflows_tests
@@ -1566,6 +1636,9 @@ while getopts 'fsltra64bpkchCS' opt; do
 		S)
 			checksum_tests
 			;;
+		d)
+			deny_join_id0_tests
+			;;
 		c)
 			;;
 		C)

From d8e336f77e3bce440f78432d259c9ddedada7b79 Mon Sep 17 00:00:00 2001
From: Yonglong Li <liyonglong@chinatelecom.cn>
Date: Tue, 22 Jun 2021 12:25:22 -0700
Subject: [PATCH 2642/3292] selftests: mptcp: turn rp_filter off on each NIC

To turn rp_filter off we should:

  echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter

and

  echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter

before NIC created.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/simult_flows.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 3aeef3bcb101..fd63ebfe9a2b 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -60,6 +60,8 @@ setup()
 	for i in "$ns1" "$ns2" "$ns3";do
 		ip netns add $i || exit $ksft_skip
 		ip -net $i link set lo up
+		ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
+		ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
 	done
 
 	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
@@ -80,7 +82,6 @@ setup()
 
 	ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
 	ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
-	ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
 
 	ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
 	ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad

From fde56eea01f96b664eb63033990be0fd2a945da5 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 22 Jun 2021 12:25:23 -0700
Subject: [PATCH 2643/3292] mptcp: refine mptcp_cleanup_rbuf

The current cleanup rbuf tries a bit too hard to avoid acquiring
the subflow socket lock. We may end-up delaying the needed ack,
or skip acking a blocked subflow.

Address the above extending the conditions used to trigger the cleanup
to reflect more closely what TCP does and invoking tcp_cleanup_rbuf()
on all the active subflows.

Note that we can't replicate the exact tests implemented in
tcp_cleanup_rbuf(), as MPTCP lacks some of the required info - e.g.
ping-pong mode.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 56 ++++++++++++++++++--------------------------
 net/mptcp/protocol.h |  1 -
 2 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cf75be02eb00..ce0c45dfb79e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -442,49 +442,46 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
 	}
 }
 
-static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
 {
 	bool slow;
-	int ret;
 
 	slow = lock_sock_fast(ssk);
-	ret = tcp_can_send_ack(ssk);
-	if (ret)
+	if (tcp_can_send_ack(ssk))
 		tcp_cleanup_rbuf(ssk, 1);
 	unlock_sock_fast(ssk, slow);
-	return ret;
+}
+
+static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
+{
+	const struct inet_connection_sock *icsk = inet_csk(ssk);
+	bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+	const struct tcp_sock *tp = tcp_sk(ssk);
+
+	return (ack_pending & ICSK_ACK_SCHED) &&
+		((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
+		  READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
+		 (rx_empty && ack_pending &
+			      (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
 }
 
 static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
 {
-	struct sock *ack_hint = READ_ONCE(msk->ack_hint);
 	int old_space = READ_ONCE(msk->old_wspace);
 	struct mptcp_subflow_context *subflow;
 	struct sock *sk = (struct sock *)msk;
-	bool cleanup;
+	int space =  __mptcp_space(sk);
+	bool cleanup, rx_empty;
 
-	/* this is a simple superset of what tcp_cleanup_rbuf() implements
-	 * so that we don't have to acquire the ssk socket lock most of the time
-	 * to do actually nothing
-	 */
-	cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
-	if (!cleanup)
-		return;
+	cleanup = (space > 0) && (space >= (old_space << 1));
+	rx_empty = !atomic_read(&sk->sk_rmem_alloc);
 
-	/* if the hinted ssk is still active, try to use it */
-	if (likely(ack_hint)) {
-		mptcp_for_each_subflow(msk, subflow) {
-			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
-			if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
-				return;
-		}
+		if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
+			mptcp_subflow_cleanup_rbuf(ssk);
 	}
-
-	/* otherwise pick the first active subflow */
-	mptcp_for_each_subflow(msk, subflow)
-		if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
-			return;
 }
 
 static bool mptcp_check_data_fin(struct sock *sk)
@@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
 			break;
 		}
 	} while (more_data_avail);
-	WRITE_ONCE(msk->ack_hint, ssk);
 
 	*bytes += moved;
 	return done;
@@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
 		__mptcp_update_rmem(sk);
 		done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
 		mptcp_data_unlock(sk);
-		tcp_cleanup_rbuf(ssk, moved);
 
 		if (unlikely(ssk->sk_err))
 			__mptcp_error_report(sk);
@@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
 		ret |= __mptcp_ofo_queue(msk);
 		__mptcp_splice_receive_queue(sk);
 		mptcp_data_unlock(sk);
-		mptcp_cleanup_rbuf(msk);
 	}
 	if (ret)
 		mptcp_check_data_fin((struct sock *)msk);
@@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	if (ssk == msk->last_snd)
 		msk->last_snd = NULL;
 
-	if (ssk == msk->ack_hint)
-		msk->ack_hint = NULL;
-
 	if (ssk == msk->first)
 		msk->first = NULL;
 
@@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk)
 	msk->rmem_released = 0;
 	msk->tx_pending_data = 0;
 
-	msk->ack_hint = NULL;
 	msk->first = NULL;
 	inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
 	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f4eaa5f57e3f..d8ad3270dfab 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -243,7 +243,6 @@ struct mptcp_sock {
 	bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 	bool		csum_enabled;
 	spinlock_t	join_list_lock;
-	struct sock	*ack_hint;
 	struct work_struct work;
 	struct sk_buff  *ooo_last_skb;
 	struct rb_root  out_of_order_queue;

From 7506d211b932870155bcb39e3dd9e39fab45a7c7 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 16 Jun 2021 15:55:00 -0700
Subject: [PATCH 2644/3292] bpf: Fix null ptr deref with mixed tail calls and
 subprogs

The sub-programs prog->aux->poke_tab[] is populated in jit_subprogs() and
then used when emitting 'BPF_JMP|BPF_TAIL_CALL' insn->code from the
individual JITs. The poke_tab[] to use is stored in the insn->imm by
the code adding it to that array slot. The JIT then uses imm to find the
right entry for an individual instruction. In the x86 bpf_jit_comp.c
this is done by calling emit_bpf_tail_call_direct with the poke_tab[]
of the imm value.

However, we observed the below null-ptr-deref when mixing tail call
programs with subprog programs. For this to happen we just need to
mix bpf-2-bpf calls and tailcalls with some extra calls or instructions
that would be patched later by one of the fixup routines. So whats
happening?

Before the fixup_call_args() -- where the jit op is done -- various
code patching is done by do_misc_fixups(). This may increase the
insn count, for example when we patch map_lookup_up using map_gen_lookup
hook. This does two things. First, it means the instruction index,
insn_idx field, of a tail call instruction will move by a 'delta'.

In verifier code,

 struct bpf_jit_poke_descriptor desc = {
  .reason = BPF_POKE_REASON_TAIL_CALL,
  .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
  .tail_call.key = bpf_map_key_immediate(aux),
  .insn_idx = i + delta,
 };

Then subprog start values subprog_info[i].start will be updated
with the delta and any poke descriptor index will also be updated
with the delta in adjust_poke_desc(). If we look at the adjust
subprog starts though we see its only adjusted when the delta
occurs before the new instructions,

        /* NOTE: fake 'exit' subprog should be updated as well. */
        for (i = 0; i <= env->subprog_cnt; i++) {
                if (env->subprog_info[i].start <= off)
                        continue;

Earlier subprograms are not changed because their start values
are not moved. But, adjust_poke_desc() does the offset + delta
indiscriminately. The result is poke descriptors are potentially
corrupted.

Then in jit_subprogs() we only populate the poke_tab[]
when the above insn_idx is less than the next subprogram start. From
above we corrupted our insn_idx so we might incorrectly assume a
poke descriptor is not used in a subprogram omitting it from the
subprogram. And finally when the jit runs it does the deref of poke_tab
when emitting the instruction and crashes with below. Because earlier
step omitted the poke descriptor.

The fix is straight forward with above context. Simply move same logic
from adjust_subprog_starts() into adjust_poke_descs() and only adjust
insn_idx when needed.

[   82.396354] bpf_testmod: version magic '5.12.0-rc2alu+ SMP preempt mod_unload ' should be '5.12.0+ SMP preempt mod_unload '
[   82.623001] loop10: detected capacity change from 0 to 8
[   88.487424] ==================================================================
[   88.487438] BUG: KASAN: null-ptr-deref in do_jit+0x184a/0x3290
[   88.487455] Write of size 8 at addr 0000000000000008 by task test_progs/5295
[   88.487471] CPU: 7 PID: 5295 Comm: test_progs Tainted: G          I       5.12.0+ #386
[   88.487483] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019
[   88.487490] Call Trace:
[   88.487498]  dump_stack+0x93/0xc2
[   88.487515]  kasan_report.cold+0x5f/0xd8
[   88.487530]  ? do_jit+0x184a/0x3290
[   88.487542]  do_jit+0x184a/0x3290
 ...
[   88.487709]  bpf_int_jit_compile+0x248/0x810
 ...
[   88.487765]  bpf_check+0x3718/0x5140
 ...
[   88.487920]  bpf_prog_load+0xa22/0xf10

Fixes: a748c6975dea3 ("bpf: propagate poke descriptors to subprograms")
Reported-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6a27574242d..6e2ebcb0d66f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11459,7 +11459,7 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
 	}
 }
 
-static void adjust_poke_descs(struct bpf_prog *prog, u32 len)
+static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
 {
 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
 	int i, sz = prog->aux->size_poke_tab;
@@ -11467,6 +11467,8 @@ static void adjust_poke_descs(struct bpf_prog *prog, u32 len)
 
 	for (i = 0; i < sz; i++) {
 		desc = &tab[i];
+		if (desc->insn_idx <= off)
+			continue;
 		desc->insn_idx += len - 1;
 	}
 }
@@ -11487,7 +11489,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
 	if (adjust_insn_aux_data(env, new_prog, off, len))
 		return NULL;
 	adjust_subprog_starts(env, off, len);
-	adjust_poke_descs(new_prog, len);
+	adjust_poke_descs(new_prog, off, len);
 	return new_prog;
 }
 

From 9201ab5f55223760ce7e8890815b1beaf41e2e7a Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 22 Jun 2021 14:09:21 +0800
Subject: [PATCH 2645/3292] net/mlx5: Fix missing error code in mlx5_init_fs()

The error code is missing in this code scenario, add the error code
'-ENOMEM' to the return value 'err'.

Eliminate the follow smatch warning:

drivers/net/ethernet/mellanox/mlx5/core/fs_core.c:2973 mlx5_init_fs()
warn: missing error code 'err'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Fixes: 4a98544d1827 ("net/mlx5: Move chains ft pool to be used by all firmware steering").
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2cd7aea5d329..b8617458a3fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2969,8 +2969,11 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 		return err;
 
 	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
-	if (!steering)
+	if (!steering) {
+		err = -ENOMEM;
 		goto err;
+	}
+
 	steering->dev = dev;
 	dev->priv.steering = steering;
 

From d4472a4b8c61a76a1fdcca8a03d6470d7565d87c Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 17 Jun 2021 17:03:59 -0700
Subject: [PATCH 2646/3292] net/mlx5: Use cpumask_available() in
 mlx5_eq_create_generic()

When CONFIG_CPUMASK_OFFSTACK is unset, cpumask_var_t is not a pointer
but a single element array, meaning its address in a structure cannot be
NULL as long as it is not the first element, which it is not. This
results in a clang warning:

drivers/net/ethernet/mellanox/mlx5/core/eq.c:715:14: warning: address of
array 'param->affinity' will always evaluate to 'true'
[-Wpointer-bool-conversion]
        if (!param->affinity)
            ~~~~~~~~^~~~~~~~
1 warning generated.

The helper cpumask_available was added in commit f7e30f01a9e2 ("cpumask:
Add helper cpumask_available()") to handle situations like this so use
it to keep the meaning of the code the same while resolving the warning.

Fixes: e4e3f24b822f ("net/mlx5: Provide cpumask at EQ creation phase")
Link: https://github.com/ClangBuiltLinux/linux/issues/1400
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c7efd177da1f..6e074cc457de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -712,7 +712,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev,
 	struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
 	int err;
 
-	if (!param->affinity)
+	if (!cpumask_available(param->affinity))
 		return ERR_PTR(-EINVAL);
 
 	if (!eq)

From 2cc7dad75da2bbbe2cd39caf295e4b3343f51dcb Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 16 Jun 2021 15:19:50 +0100
Subject: [PATCH 2647/3292] net/mlx5: Fix spelling mistake "enught" -> "enough"

There is a spelling mistake in a mlx5_core_err error message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 27de8da8edf7..b25f764daa08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -479,7 +479,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
 	if (!mlx5_sf_max_functions(dev))
 		return 0;
 	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
-		mlx5_core_err(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
+		mlx5_core_err(dev, "Not enough IRQs for SFs. SF may run at lower performance\n");
 		return 0;
 	}
 

From 5bf3ee97f4669dc9353f5aaf1c9f1a38cfbcdb1c Mon Sep 17 00:00:00 2001
From: caihuoqing <caihuoqing@baidu.com>
Date: Thu, 17 Jun 2021 10:32:15 +0800
Subject: [PATCH 2648/3292] net/mlx5: remove "default n" from Kconfig

remove "default n" and "No" is default

Signed-off-by: caihuoqing <caihuoqing@baidu.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index d62f90aedade..e1a5a79e27c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -12,7 +12,6 @@ config MLX5_CORE
 	depends on MLXFW || !MLXFW
 	depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
 	depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE
-	default n
 	help
 	  Core driver for low level functionality of the ConnectX-4 and
 	  Connect-IB cards by Mellanox Technologies.
@@ -36,7 +35,6 @@ config MLX5_CORE_EN
 	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
 	select PAGE_POOL
 	select DIMLIB
-	default n
 	help
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
 
@@ -141,7 +139,6 @@ config MLX5_CORE_EN_DCB
 config MLX5_CORE_IPOIB
 	bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support"
 	depends on MLX5_CORE_EN
-	default n
 	help
 	  MLX5 IPoIB offloads & acceleration support.
 
@@ -149,7 +146,6 @@ config MLX5_FPGA_IPSEC
 	bool "Mellanox Technologies IPsec Innova support"
 	depends on MLX5_CORE
 	depends on MLX5_FPGA
-	default n
 	help
 	Build IPsec support for the Innova family of network cards by Mellanox
 	Technologies. Innova network cards are comprised of a ConnectX chip
@@ -163,7 +159,6 @@ config MLX5_IPSEC
 	depends on XFRM_OFFLOAD
 	depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
 	select MLX5_ACCEL
-	default n
 	help
 	Build IPsec support for the Connect-X family of network cards by Mellanox
 	Technologies.
@@ -176,7 +171,6 @@ config MLX5_EN_IPSEC
 	depends on XFRM_OFFLOAD
 	depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
 	depends on MLX5_FPGA_IPSEC || MLX5_IPSEC
-	default n
 	help
 	  Build support for IPsec cryptography-offload acceleration in the NIC.
 	  Note: Support for hardware with this capability needs to be selected
@@ -189,7 +183,6 @@ config MLX5_FPGA_TLS
 	depends on MLX5_CORE_EN
 	depends on MLX5_FPGA
 	select MLX5_EN_TLS
-	default n
 	help
 	Build TLS support for the Innova family of network cards by Mellanox
 	Technologies. Innova network cards are comprised of a ConnectX chip
@@ -204,7 +197,6 @@ config MLX5_TLS
 	depends on MLX5_CORE_EN
 	select MLX5_ACCEL
 	select MLX5_EN_TLS
-	default n
 	help
 	Build TLS support for the Connect-X family of network cards by Mellanox
 	Technologies.
@@ -227,7 +219,6 @@ config MLX5_SW_STEERING
 config MLX5_SF
 	bool "Mellanox Technologies subfunction device support using auxiliary device"
 	depends on MLX5_CORE && MLX5_CORE_EN
-	default n
 	help
 	Build support for subfuction device in the NIC. A Mellanox subfunction
 	device can support RDMA, netdevice and vdpa device.

From dd7cf00f87dc6cba8dd87dd6c9a5f18f187976ff Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@nvidia.com>
Date: Mon, 14 Jun 2021 17:33:47 +0300
Subject: [PATCH 2649/3292] net/mlx5: Optimize mlx5e_feature_checks for non
 IPsec packet

mlx5e_ipsec_feature_check belongs to mlx5e_tunnel_features_check.
Also, IPsec is not the default configuration so it should be
checked at the end instead of the beginning of mlx5e_features_check.

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h      | 15 +++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  8 +++++---
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index 3e80742a3caf..cfa98272e4a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -93,8 +93,8 @@ static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
 void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
 			       struct mlx5_wqe_eth_seg *eseg);
 
-static inline bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
-					     netdev_features_t features)
+static inline netdev_features_t
+mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
 {
 	struct sec_path *sp = skb_sec_path(skb);
 
@@ -102,9 +102,11 @@ static inline bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_dev
 		struct xfrm_state *x = sp->xvec[0];
 
 		if (x && x->xso.offload_handle)
-			return true;
+			return features;
 	}
-	return false;
+
+	/* Disable CSUM and GSO for software IPsec */
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
 #else
@@ -120,8 +122,9 @@ static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
 }
 
 static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; }
-static inline bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
-					     netdev_features_t features) { return false; }
+static inline netdev_features_t
+mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
+{ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); }
 #endif /* CONFIG_MLX5_EN_IPSEC */
 
 #endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7d732fac09f0..414a73d16619 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4330,6 +4330,11 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 		/* Support Geneve offload for default UDP port */
 		if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
 			return features;
+#endif
+		break;
+#ifdef CONFIG_MLX5_EN_IPSEC
+	case IPPROTO_ESP:
+		return mlx5e_ipsec_feature_check(skb, features);
 #endif
 	}
 
@@ -4347,9 +4352,6 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	features = vlan_features_check(skb, features);
 	features = vxlan_features_check(skb, features);
 
-	if (mlx5e_ipsec_feature_check(skb, netdev, features))
-		return features;
-
 	/* Validate if the tunneled packet is being offloaded by HW */
 	if (skb->encapsulation &&
 	    (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))

From fa4535238fb5f306f95de89371a993057b32b2a4 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@nvidia.com>
Date: Mon, 14 Jun 2021 17:33:48 +0300
Subject: [PATCH 2650/3292] net/xfrm: Add inner_ipproto into sec_path

The inner_ipproto saves the inner IP protocol of the plain
text packet. This allows vendor's IPsec feature making offload
decision at skb's features_check and configuring hardware at
ndo_start_xmit.

For example, ConnectX6-DX IPsec device needs the plaintext's
IP protocol to support partial checksum offload on
VXLAN/GENEVE packet over IPsec transport mode tunnel.

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/net/xfrm.h     |  1 +
 net/xfrm/xfrm_output.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c58a6d4eb610..1d803e890c76 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1024,6 +1024,7 @@ struct xfrm_offload {
 #define CRYPTO_INVALID_PROTOCOL			128
 
 	__u8			proto;
+	__u8			inner_ipproto;
 };
 
 struct sec_path {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4cb0ff4dcf4..e321fc63a2e9 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -565,6 +565,42 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
 	return 0;
 }
 
+/* For partial checksum offload, the outer header checksum is calculated
+ * by software and the inner header checksum is calculated by hardware.
+ * This requires hardware to know the inner packet type to calculate
+ * the inner header checksum. Save inner ip protocol here to avoid
+ * traversing the packet in the vendor's xmit code.
+ * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise,
+ * get the ip protocol from the IP header.
+ */
+static void xfrm_get_inner_ipproto(struct sk_buff *skb)
+{
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	const struct ethhdr *eth;
+
+	if (!xo)
+		return;
+
+	if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) {
+		xo->inner_ipproto = skb->inner_ipproto;
+		return;
+	}
+
+	if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
+		return;
+
+	eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+	switch (ntohs(eth->h_proto)) {
+	case ETH_P_IPV6:
+		xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr;
+		break;
+	case ETH_P_IP:
+		xo->inner_ipproto = inner_ip_hdr(skb)->protocol;
+		break;
+	}
+}
+
 int xfrm_output(struct sock *sk, struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
@@ -594,12 +630,15 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 			kfree_skb(skb);
 			return -ENOMEM;
 		}
-		skb->encapsulation = 1;
 
 		sp->olen++;
 		sp->xvec[sp->len++] = x;
 		xfrm_state_hold(x);
 
+		if (skb->encapsulation)
+			xfrm_get_inner_ipproto(skb);
+		skb->encapsulation = 1;
+
 		if (skb_is_gso(skb)) {
 			if (skb->inner_protocol)
 				return xfrm_output_gso(net, sk, skb);

From f1267798c9809283ff45664bc2c4e465f1500a4b Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@nvidia.com>
Date: Mon, 14 Jun 2021 17:33:49 +0300
Subject: [PATCH 2651/3292] net/mlx5: Fix checksum issue of VXLAN and IPsec
 crypto offload

The packet is VXLAN packet over IPsec transport mode tunnel
which has the following format: [IP1 | ESP | UDP | VXLAN | IP2 | TCP]
NVIDIA ConnectX card cannot do checksum offload for two L4 headers.
The solution is using the checksum partial offload similar to
VXLAN | TCP packet. Hardware calculates IP1, IP2 and TCP checksums and
software calculates UDP checksum. However, unlike VXLAN | TCP case,
IPsec's mlx5 driver cannot access the inner plaintext IP protocol type.
Therefore, inner_ipproto is added in the sec_path structure
to provide this information. Also, utilize the skb's csum_start to
program L4 inner checksum offset.

While at it, remove the call to mlx5e_set_eseg_swp and setup software parser
fields directly in mlx5e_ipsec_set_swp. mlx5e_set_eseg_swp is not
needed as the two features (GENEVE and IPsec) are different and adding
this sharing layer creates unnecessary complexity and affect
performance.

For the case VXLAN packet over IPsec tunnel mode tunnel, checksum offload
is disabled because the hardware does not support checksum offload for
three L3 (IP) headers.

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c  | 65 ++++++++++++++-----
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h  | 24 ++++++-
 2 files changed, 70 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index a97e8d205094..33de8f0092a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -136,8 +136,6 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
 				struct mlx5_wqe_eth_seg *eseg, u8 mode,
 				struct xfrm_offload *xo)
 {
-	struct mlx5e_swp_spec swp_spec = {};
-
 	/* Tunnel Mode:
 	 * SWP:      OutL3       InL3  InL4
 	 * Pkt: MAC  IP     ESP  IP    L4
@@ -146,23 +144,58 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
 	 * SWP:      OutL3       InL4
 	 *           InL3
 	 * Pkt: MAC  IP     ESP  L4
+	 *
+	 * Tunnel(VXLAN TCP/UDP) over Transport Mode
+	 * SWP:      OutL3                   InL3  InL4
+	 * Pkt: MAC  IP     ESP  UDP  VXLAN  IP    L4
 	 */
-	swp_spec.l3_proto = skb->protocol;
-	swp_spec.is_tun = mode == XFRM_MODE_TUNNEL;
-	if (swp_spec.is_tun) {
-		if (xo->proto == IPPROTO_IPV6) {
-			swp_spec.tun_l3_proto = htons(ETH_P_IPV6);
-			swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr;
-		} else {
-			swp_spec.tun_l3_proto = htons(ETH_P_IP);
-			swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol;
-		}
-	} else {
-		swp_spec.tun_l3_proto = skb->protocol;
-		swp_spec.tun_l4_proto = xo->proto;
+
+	/* Shared settings */
+	eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+	if (skb->protocol == htons(ETH_P_IPV6))
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+	/* Tunnel mode */
+	if (mode == XFRM_MODE_TUNNEL) {
+		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+		if (xo->proto == IPPROTO_IPV6)
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+		if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP)
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+		return;
 	}
 
-	mlx5e_set_eseg_swp(skb, eseg, &swp_spec);
+	/* Transport mode */
+	if (mode != XFRM_MODE_TRANSPORT)
+		return;
+
+	if (!xo->inner_ipproto) {
+		eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+		if (skb->protocol == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+		if (xo->proto == IPPROTO_UDP)
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+		return;
+	}
+
+	/* Tunnel(VXLAN TCP/UDP) over Transport Mode */
+	switch (xo->inner_ipproto) {
+	case IPPROTO_UDP:
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+		fallthrough;
+	case IPPROTO_TCP:
+		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+		eseg->swp_inner_l4_offset = (skb->csum_start + skb->head - skb->data) / 2;
+		if (skb->protocol == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+		break;
+	default:
+		break;
+	}
+
+	return;
 }
 
 void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index cfa98272e4a9..5120a59361e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -96,16 +96,34 @@ void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
 static inline netdev_features_t
 mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
 {
+	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct sec_path *sp = skb_sec_path(skb);
 
-	if (sp && sp->len) {
+	if (sp && sp->len && xo) {
 		struct xfrm_state *x = sp->xvec[0];
 
-		if (x && x->xso.offload_handle)
-			return features;
+		if (!x || !x->xso.offload_handle)
+			goto out_disable;
+
+		if (xo->inner_ipproto) {
+			/* Cannot support tunnel packet over IPsec tunnel mode
+			 * because we cannot offload three IP header csum
+			 */
+			if (x->props.mode == XFRM_MODE_TUNNEL)
+				goto out_disable;
+
+			/* Only support UDP or TCP L4 checksum */
+			if (xo->inner_ipproto != IPPROTO_UDP &&
+			    xo->inner_ipproto != IPPROTO_TCP)
+				goto out_disable;
+		}
+
+		return features;
+
 	}
 
 	/* Disable CSUM and GSO for software IPsec */
+out_disable:
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 

From 01d5d96542fd4e383da79593f8a3450995ce2257 Mon Sep 17 00:00:00 2001
From: Leah Rumancik <leah.rumancik@gmail.com>
Date: Tue, 18 May 2021 15:13:25 +0000
Subject: [PATCH 2652/3292] ext4: add discard/zeroout flags to journal flush

Add a flags argument to jbd2_journal_flush to enable discarding or
zero-filling the journal blocks while flushing the journal.

Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
Link: https://lore.kernel.org/r/20210518151327.130198-1-leah.rumancik@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c      |   4 +-
 fs/ext4/ioctl.c      |   6 +--
 fs/ext4/super.c      |   6 +--
 fs/jbd2/journal.c    | 119 +++++++++++++++++++++++++++++++++++++++++--
 fs/ocfs2/alloc.c     |   2 +-
 fs/ocfs2/journal.c   |   8 +--
 include/linux/jbd2.h |   6 ++-
 7 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 211acfba3af7..e1ff4eb3ccba 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3223,7 +3223,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 		ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
 		journal = EXT4_JOURNAL(inode);
 		jbd2_journal_lock_updates(journal);
-		err = jbd2_journal_flush(journal);
+		err = jbd2_journal_flush(journal, 0);
 		jbd2_journal_unlock_updates(journal);
 
 		if (err)
@@ -6005,7 +6005,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 	if (val)
 		ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
 	else {
-		err = jbd2_journal_flush(journal);
+		err = jbd2_journal_flush(journal, 0);
 		if (err < 0) {
 			jbd2_journal_unlock_updates(journal);
 			percpu_up_write(&sbi->s_writepages_rwsem);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a96d6721cef9..93e9419825b8 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -706,7 +706,7 @@ static long ext4_ioctl_group_add(struct file *file,
 	err = ext4_group_add(sb, input);
 	if (EXT4_SB(sb)->s_journal) {
 		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-		err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+		err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
 	if (err == 0)
@@ -884,7 +884,7 @@ setversion_out:
 		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
 		if (EXT4_SB(sb)->s_journal) {
 			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
 			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 		}
 		if (err == 0)
@@ -1027,7 +1027,7 @@ mext_out:
 		if (EXT4_SB(sb)->s_journal) {
 			ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
 			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
 			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 		}
 		if (err == 0)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3b6203543607..ad3919dbd49e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5653,7 +5653,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
 		return 0;
 	}
 	jbd2_journal_lock_updates(journal);
-	err = jbd2_journal_flush(journal);
+	err = jbd2_journal_flush(journal, 0);
 	if (err < 0)
 		goto out;
 
@@ -5795,7 +5795,7 @@ static int ext4_freeze(struct super_block *sb)
 		 * Don't clear the needs_recovery flag if we failed to
 		 * flush the journal.
 		 */
-		error = jbd2_journal_flush(journal);
+		error = jbd2_journal_flush(journal, 0);
 		if (error < 0)
 			goto out;
 
@@ -6389,7 +6389,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		 * otherwise be livelocked...
 		 */
 		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 		if (err)
 			return err;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2dc944442802..3a2ed60ea8b7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1686,6 +1686,110 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
 	write_unlock(&journal->j_state_lock);
 }
 
+/**
+ * __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
+ * @journal: The journal to erase.
+ * @flags: A discard/zeroout request is sent for each physically contigous
+ *	region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
+ *	JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
+ *	to perform.
+ *
+ * Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
+ * will be explicitly written if no hardware offload is available, see
+ * blkdev_issue_zeroout for more details.
+ */
+static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
+{
+	int err = 0;
+	unsigned long block, log_offset; /* logical */
+	unsigned long long phys_block, block_start, block_stop; /* physical */
+	loff_t byte_start, byte_stop, byte_count;
+	struct request_queue *q = bdev_get_queue(journal->j_dev);
+
+	/* flags must be set to either discard or zeroout */
+	if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
+			((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+			(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
+		return -EINVAL;
+
+	if (!q)
+		return -ENXIO;
+
+	if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+		return -EOPNOTSUPP;
+
+	/*
+	 * lookup block mapping and issue discard/zeroout for each
+	 * contiguous region
+	 */
+	log_offset = be32_to_cpu(journal->j_superblock->s_first);
+	block_start =  ~0ULL;
+	for (block = log_offset; block < journal->j_total_len; block++) {
+		err = jbd2_journal_bmap(journal, block, &phys_block);
+		if (err) {
+			pr_err("JBD2: bad block at offset %lu", block);
+			return err;
+		}
+
+		if (block_start == ~0ULL) {
+			block_start = phys_block;
+			block_stop = block_start - 1;
+		}
+
+		/*
+		 * last block not contiguous with current block,
+		 * process last contiguous region and return to this block on
+		 * next loop
+		 */
+		if (phys_block != block_stop + 1) {
+			block--;
+		} else {
+			block_stop++;
+			/*
+			 * if this isn't the last block of journal,
+			 * no need to process now because next block may also
+			 * be part of this contiguous region
+			 */
+			if (block != journal->j_total_len - 1)
+				continue;
+		}
+
+		/*
+		 * end of contiguous region or this is last block of journal,
+		 * take care of the region
+		 */
+		byte_start = block_start * journal->j_blocksize;
+		byte_stop = block_stop * journal->j_blocksize;
+		byte_count = (block_stop - block_start + 1) *
+				journal->j_blocksize;
+
+		truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
+				byte_start, byte_stop);
+
+		if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
+			err = blkdev_issue_discard(journal->j_dev,
+					byte_start >> SECTOR_SHIFT,
+					byte_count >> SECTOR_SHIFT,
+					GFP_NOFS, 0);
+		} else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
+			err = blkdev_issue_zeroout(journal->j_dev,
+					byte_start >> SECTOR_SHIFT,
+					byte_count >> SECTOR_SHIFT,
+					GFP_NOFS, 0);
+		}
+
+		if (unlikely(err != 0)) {
+			pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
+					err, block_start, block_stop);
+			return err;
+		}
+
+		/* reset start and stop after processing a region */
+		block_start = ~0ULL;
+	}
+
+	return blkdev_issue_flush(journal->j_dev);
+}
 
 /**
  * jbd2_journal_update_sb_errno() - Update error in the journal.
@@ -2246,13 +2350,18 @@ EXPORT_SYMBOL(jbd2_journal_clear_features);
 /**
  * jbd2_journal_flush() - Flush journal
  * @journal: Journal to act on.
+ * @flags: optional operation on the journal blocks after the flush (see below)
  *
  * Flush all data for a given journal to disk and empty the journal.
  * Filesystems can use this when remounting readonly to ensure that
- * recovery does not need to happen on remount.
+ * recovery does not need to happen on remount. Optionally, a discard or zeroout
+ * can be issued on the journal blocks after flushing.
+ *
+ * flags:
+ *	JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
+ *	JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
  */
-
-int jbd2_journal_flush(journal_t *journal)
+int jbd2_journal_flush(journal_t *journal, unsigned int flags)
 {
 	int err = 0;
 	transaction_t *transaction = NULL;
@@ -2306,6 +2415,10 @@ int jbd2_journal_flush(journal_t *journal)
 	 * commits of data to the journal will restore the current
 	 * s_start value. */
 	jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
+
+	if (flags)
+		err = __jbd2_journal_erase(journal, flags);
+
 	mutex_unlock(&journal->j_checkpoint_mutex);
 	write_lock(&journal->j_state_lock);
 	J_ASSERT(!journal->j_running_transaction);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index e032f2e2c2c5..f1cc8258d34a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6018,7 +6018,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	 * Then truncate log will be replayed resulting in cluster double free.
 	 */
 	jbd2_journal_lock_updates(journal->j_journal);
-	status = jbd2_journal_flush(journal->j_journal);
+	status = jbd2_journal_flush(journal->j_journal, 0);
 	jbd2_journal_unlock_updates(journal->j_journal);
 	if (status < 0) {
 		mlog_errno(status);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4e589ce2fce6..4f15750aac5d 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -308,7 +308,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
 	}
 
 	jbd2_journal_lock_updates(journal->j_journal);
-	status = jbd2_journal_flush(journal->j_journal);
+	status = jbd2_journal_flush(journal->j_journal, 0);
 	jbd2_journal_unlock_updates(journal->j_journal);
 	if (status < 0) {
 		up_write(&journal->j_trans_barrier);
@@ -1000,7 +1000,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
 
 	if (ocfs2_mount_local(osb)) {
 		jbd2_journal_lock_updates(journal->j_journal);
-		status = jbd2_journal_flush(journal->j_journal);
+		status = jbd2_journal_flush(journal->j_journal, 0);
 		jbd2_journal_unlock_updates(journal->j_journal);
 		if (status < 0)
 			mlog_errno(status);
@@ -1070,7 +1070,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
 
 	if (replayed) {
 		jbd2_journal_lock_updates(journal->j_journal);
-		status = jbd2_journal_flush(journal->j_journal);
+		status = jbd2_journal_flush(journal->j_journal, 0);
 		jbd2_journal_unlock_updates(journal->j_journal);
 		if (status < 0)
 			mlog_errno(status);
@@ -1666,7 +1666,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 
 	/* wipe the journal */
 	jbd2_journal_lock_updates(journal);
-	status = jbd2_journal_flush(journal);
+	status = jbd2_journal_flush(journal, 0);
 	jbd2_journal_unlock_updates(journal);
 	if (status < 0)
 		mlog_errno(status);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index db0e1920cb12..8543233b0388 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1370,6 +1370,10 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit,	FAST_COMMIT)
 						 * mode */
 #define JBD2_FAST_COMMIT_ONGOING	0x100	/* Fast commit is ongoing */
 #define JBD2_FULL_COMMIT_ONGOING	0x200	/* Full commit is ongoing */
+#define JBD2_JOURNAL_FLUSH_DISCARD	0x0001
+#define JBD2_JOURNAL_FLUSH_ZEROOUT	0x0002
+#define JBD2_JOURNAL_FLUSH_VALID	(JBD2_JOURNAL_FLUSH_DISCARD | \
+					JBD2_JOURNAL_FLUSH_ZEROOUT)
 
 /*
  * Function declarations for the journaling transaction and buffer
@@ -1500,7 +1504,7 @@ extern int	 jbd2_journal_invalidatepage(journal_t *,
 				struct page *, unsigned int, unsigned int);
 extern int	 jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
 extern int	 jbd2_journal_stop(handle_t *);
-extern int	 jbd2_journal_flush (journal_t *);
+extern int	 jbd2_journal_flush(journal_t *journal, unsigned int flags);
 extern void	 jbd2_journal_lock_updates (journal_t *);
 extern void	 jbd2_journal_unlock_updates (journal_t *);
 

From 351a0a3fbc3584a00036f05cfdb0cd3eb1dca92a Mon Sep 17 00:00:00 2001
From: Leah Rumancik <leah.rumancik@gmail.com>
Date: Tue, 18 May 2021 15:13:26 +0000
Subject: [PATCH 2653/3292] ext4: add ioctl EXT4_IOC_CHECKPOINT

ioctl EXT4_IOC_CHECKPOINT checkpoints and flushes the journal. This
includes forcing all the transactions to the log, checkpointing the
transactions, and flushing the log to disk. This ioctl takes u32 "flags"
as an argument. Three flags are supported. EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN
can be used to verify input to the ioctl. It returns error if there is any
invalid input, otherwise it returns success without performing
any checkpointing. The other two flags, EXT4_IOC_CHECKPOINT_FLAG_DISCARD
and EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT, can be used to issue requests to
discard or zeroout the journal logs blocks, respectively. At this
point, EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT is primarily added to enable
testing of this codepath on devices that don't support discard.
EXT4_IOC_CHECKPOINT_FLAG_DISCARD and EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT
cannot both be set.

Systems that wish to achieve content deletion SLO can set up a daemon
that calls this ioctl at a regular interval such that it matches with the
SLO requirement. Thus, with this patch, the ext4_dir_entry2 wipeout
patch[1], and the Ext4 "-o discard" mount option set, Ext4 can now
guarantee that all file contents, file metatdata, and filenames will not
be accessible through the filesystem and will have had discard or
zeroout requests issued for corresponding device blocks.

The __jbd2_journal_erase function could also be used to discard or
zero-fill the journal during journal load after recovery. This would
provide a potential solution to a journal replay bug reported earlier this
year[2]. After a successful journal recovery, e2fsck can call this ioctl to
discard the journal as well.

[1] https://lore.kernel.org/linux-ext4/YIHknqxngB1sUdie@mit.edu/
[2] https://lore.kernel.org/linux-ext4/YDZoaacIYStFQT8g@mit.edu/

Link: https://lore.kernel.org/r/20210518151327.130198-2-leah.rumancik@gmail.com
Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h  |  9 ++++++++
 fs/ext4/ioctl.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2b51cbee3907..a646bfcbd0e8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -720,6 +720,7 @@ enum {
 #define EXT4_IOC_CLEAR_ES_CACHE		_IO('f', 40)
 #define EXT4_IOC_GETSTATE		_IOW('f', 41, __u32)
 #define EXT4_IOC_GET_ES_CACHE		_IOWR('f', 42, struct fiemap)
+#define EXT4_IOC_CHECKPOINT		_IOW('f', 43, __u32)
 
 #define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
 
@@ -741,6 +742,14 @@ enum {
 #define EXT4_STATE_FLAG_NEWENTRY	0x00000004
 #define EXT4_STATE_FLAG_DA_ALLOC_CLOSE	0x00000008
 
+/* flags for ioctl EXT4_IOC_CHECKPOINT */
+#define EXT4_IOC_CHECKPOINT_FLAG_DISCARD	0x1
+#define EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT	0x2
+#define EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN	0x4
+#define EXT4_IOC_CHECKPOINT_FLAG_VALID		(EXT4_IOC_CHECKPOINT_FLAG_DISCARD | \
+						EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT | \
+						EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 93e9419825b8..5730aeca563c 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -799,6 +799,57 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg)
 	return error;
 }
 
+static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
+{
+	int err = 0;
+	__u32 flags = 0;
+	unsigned int flush_flags = 0;
+	struct super_block *sb = file_inode(filp)->i_sb;
+	struct request_queue *q;
+
+	if (copy_from_user(&flags, (__u32 __user *)arg,
+				sizeof(__u32)))
+		return -EFAULT;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* check for invalid bits set */
+	if ((flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) ||
+				((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+				(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
+		return -EINVAL;
+
+	if (!EXT4_SB(sb)->s_journal)
+		return -ENODEV;
+
+	if (flags & ~JBD2_JOURNAL_FLUSH_VALID)
+		return -EINVAL;
+
+	q = bdev_get_queue(EXT4_SB(sb)->s_journal->j_dev);
+	if (!q)
+		return -ENXIO;
+	if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+		return -EOPNOTSUPP;
+
+	if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
+		return 0;
+
+	if (flags & EXT4_IOC_CHECKPOINT_FLAG_DISCARD)
+		flush_flags |= JBD2_JOURNAL_FLUSH_DISCARD;
+
+	if (flags & EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT) {
+		flush_flags |= JBD2_JOURNAL_FLUSH_ZEROOUT;
+		pr_info_ratelimited("warning: checkpointing journal with EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT can be slow");
+	}
+
+	jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+	err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, flush_flags);
+	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+
+	return err;
+}
+
 static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -1210,6 +1261,9 @@ resizefs_out:
 		return fsverity_ioctl_read_metadata(filp,
 						    (const void __user *)arg);
 
+	case EXT4_IOC_CHECKPOINT:
+		return ext4_ioctl_checkpoint(filp, arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -1290,6 +1344,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case EXT4_IOC_CLEAR_ES_CACHE:
 	case EXT4_IOC_GETSTATE:
 	case EXT4_IOC_GET_ES_CACHE:
+	case EXT4_IOC_CHECKPOINT:
 		break;
 	default:
 		return -ENOIOCTLCMD;

From fd7b23be92059f14537cb9cac0f0894c3a9b1284 Mon Sep 17 00:00:00 2001
From: Leah Rumancik <leah.rumancik@gmail.com>
Date: Tue, 18 May 2021 15:13:27 +0000
Subject: [PATCH 2654/3292] ext4: update journal documentation

Add a section about journal checkpointing, including information about
the ioctl EXT4_IOC_CHECKPOINT which can be used to trigger a journal
checkpoint from userspace.

Also, update the journal allocation information to reflect that up to
10240000 blocks are used for the journal and that the journal is not
necessarily contiguous.

Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>

Changes in v5:
- clarify behavior of DRY_RUN flag
Link: https://lore.kernel.org/r/20210518151327.130198-3-leah.rumancik@gmail.com

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 Documentation/filesystems/ext4/journal.rst | 39 +++++++++++++++++-----
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst
index cdbfec473167..5fad38860f17 100644
--- a/Documentation/filesystems/ext4/journal.rst
+++ b/Documentation/filesystems/ext4/journal.rst
@@ -4,14 +4,14 @@ Journal (jbd2)
 --------------
 
 Introduced in ext3, the ext4 filesystem employs a journal to protect the
-filesystem against corruption in the case of a system crash. A small
-continuous region of disk (default 128MiB) is reserved inside the
-filesystem as a place to land “important” data writes on-disk as quickly
-as possible. Once the important data transaction is fully written to the
-disk and flushed from the disk write cache, a record of the data being
-committed is also written to the journal. At some later point in time,
-the journal code writes the transactions to their final locations on
-disk (this could involve a lot of seeking or a lot of small
+filesystem against metadata inconsistencies in the case of a system crash. Up
+to 10,240,000 file system blocks (see man mke2fs(8) for more details on journal
+size limits) can be reserved inside the filesystem as a place to land
+“important” data writes on-disk as quickly as possible. Once the important
+data transaction is fully written to the disk and flushed from the disk write
+cache, a record of the data being committed is also written to the journal. At
+some later point in time, the journal code writes the transactions to their
+final locations on disk (this could involve a lot of seeking or a lot of small
 read-write-erases) before erasing the commit record. Should the system
 crash during the second slow write, the journal can be replayed all the
 way to the latest commit record, guaranteeing the atomicity of whatever
@@ -731,3 +731,26 @@ point, the refcount for inode 11 is not reliable, but that gets fixed by the
 replay of last inode 11 tag. Thus, by converting a non-idempotent procedure
 into a series of idempotent outcomes, fast commits ensured idempotence during
 the replay.
+
+Journal Checkpoint
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Checkpointing the journal ensures all transactions and their associated buffers
+are submitted to the disk. In-progress transactions are waited upon and included
+in the checkpoint. Checkpointing is used internally during critical updates to
+the filesystem including journal recovery, filesystem resizing, and freeing of
+the journal_t structure.
+
+A journal checkpoint can be triggered from userspace via the ioctl
+EXT4_IOC_CHECKPOINT. This ioctl takes a single, u64 argument for flags.
+Currently, three flags are supported. First, EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN
+can be used to verify input to the ioctl. It returns error if there is any
+invalid input, otherwise it returns success without performing
+any checkpointing. This can be used to check whether the ioctl exists on a
+system and to verify there are no issues with arguments or flags. The
+other two flags are EXT4_IOC_CHECKPOINT_FLAG_DISCARD and
+EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT. These flags cause the journal blocks to be
+discarded or zero-filled, respectively, after the journal checkpoint is
+complete. EXT4_IOC_CHECKPOINT_FLAG_DISCARD and EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT
+cannot both be set. The ioctl may be useful when snapshotting a system or for
+complying with content deletion SLOs.

From b2d2e7573548295a14db999095fd1df40352c91a Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 20 May 2021 14:55:52 +0800
Subject: [PATCH 2655/3292] ext4: remove set but rewrite variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the ext4_dx_add_entry function, the at variable is assigned but will
reset just after “again:” label. So delete the unnecessary assignment.
this will not chang the logic.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Link: https://lore.kernel.org/r/1621493752-36890-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a4af26d4459a..5fd56f616cf0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2499,7 +2499,7 @@ again:
 
 			/* Which index block gets the new entry? */
 			if (at - entries >= icount1) {
-				frame->at = at = at - entries - icount1 + entries2;
+				frame->at = at - entries - icount1 + entries2;
 				frame->entries = entries = entries2;
 				swap(frame->bh, bh2);
 			}

From e5e7010e5444d923e4091cafff61d05f2d19cada Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Sat, 22 May 2021 18:30:44 +0800
Subject: [PATCH 2656/3292] ext4: remove check for zero nr_to_scan in
 ext4_es_scan()

After converting fs shrinkers to new scan/count API, we are no longer
pass zero nr_to_scan parameter to detect the number of objects to free,
just remove this check.

Fixes: 1ab6c4997e04 ("fs: convert fs shrinkers to new scan/count API")
Cc: stable@vger.kernel.org # 3.12+
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210522103045.690103-2-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/extents_status.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0a729027322d..db3cd70a72e4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1574,9 +1574,6 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
 	ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
 	trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
 
-	if (!nr_to_scan)
-		return ret;
-
 	nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
 
 	trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);

From 4fb7c70a889ead2e91e184895ac6e5354b759135 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Sat, 22 May 2021 18:30:45 +0800
Subject: [PATCH 2657/3292] ext4: correct the cache_nr in tracepoint
 ext4_es_shrink_exit

The cache_cnt parameter of tracepoint ext4_es_shrink_exit means the
remaining cache count after shrink, but now it is the cache count before
shrink, fix it by read sbi->s_extent_cache_cnt again.

Fixes: 1ab6c4997e04 ("fs: convert fs shrinkers to new scan/count API")
Cc: stable@vger.kernel.org # 3.12+
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210522103045.690103-3-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/extents_status.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index db3cd70a72e4..9a3a8996aacf 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1576,6 +1576,7 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
 
 	nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
 
+	ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
 	trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
 	return nr_shrunk;
 }

From c89849cc0259f3d33624cc3bd127685c3c0fa25d Mon Sep 17 00:00:00 2001
From: Pan Dong <pandong.peter@bytedance.com>
Date: Tue, 25 May 2021 15:36:56 +0800
Subject: [PATCH 2658/3292] ext4: fix avefreec in find_group_orlov

The avefreec should be average free clusters instead
of average free blocks, otherwize Orlov's allocator
will not work properly when bigalloc enabled.

Cc: stable@kernel.org
Signed-off-by: Pan Dong <pandong.peter@bytedance.com>
Link: https://lore.kernel.org/r/20210525073656.31594-1-pandong.peter@bytedance.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ialloc.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9bab7fd4ccd5..e89fc0f770b0 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -402,7 +402,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
  *
  * We always try to spread first-level directories.
  *
- * If there are blockgroups with both free inodes and free blocks counts
+ * If there are blockgroups with both free inodes and free clusters counts
  * not worse than average we return one with smallest directory count.
  * Otherwise we simply return a random group.
  *
@@ -411,7 +411,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
  * It's OK to put directory into a group unless
  * it has too many directories already (max_dirs) or
  * it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
+ * it has too few free clusters left (min_clusters) or
  * Parent's group is preferred, if it doesn't satisfy these
  * conditions we search cyclically through the rest. If none
  * of the groups look good we just look for a group with more
@@ -427,7 +427,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
 	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
 	unsigned int freei, avefreei, grp_free;
-	ext4_fsblk_t freeb, avefreec;
+	ext4_fsblk_t freec, avefreec;
 	unsigned int ndirs;
 	int max_dirs, min_inodes;
 	ext4_grpblk_t min_clusters;
@@ -446,9 +446,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 
 	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
 	avefreei = freei / ngroups;
-	freeb = EXT4_C2B(sbi,
-		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
-	avefreec = freeb;
+	freec = percpu_counter_read_positive(&sbi->s_freeclusters_counter);
+	avefreec = freec;
 	do_div(avefreec, ngroups);
 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 

From f9505c72b2ee80cb68af95449a5215906130e3be Mon Sep 17 00:00:00 2001
From: chenyichong <chenyichong@uniontech.com>
Date: Wed, 26 May 2021 13:29:30 +0800
Subject: [PATCH 2659/3292] ext4: use local variable ei instead of EXT4_I()
 macro

Signed-off-by: chenyichong <chenyichong@uniontech.com>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/20210526052930.11278-1-chenyichong@uniontech.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e1ff4eb3ccba..c5cf700e2c8f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -374,7 +374,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
 	ei->i_reserved_data_blocks -= used;
 	percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
 
-	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+	spin_unlock(&ei->i_block_reservation_lock);
 
 	/* Update quota subsystem for data blocks */
 	if (quota_claim)

From 4b9718b5a201eddcd00d9db6c36b18840125c7ee Mon Sep 17 00:00:00 2001
From: Ilya Maximets <i.maximets@ovn.org>
Date: Tue, 22 Jun 2021 20:56:47 +0200
Subject: [PATCH 2660/3292] docs, af_xdp: Consistent indentation in examples

Examples in this document use all kinds of indentation from 3 to 5
spaces and even mixed with tabs. Making them all even and equal to
4 spaces.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20210622185647.3705104-1-i.maximets@ovn.org
---
 Documentation/networking/af_xdp.rst | 32 ++++++++++++++---------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 2ccc5644cc98..42576880aa4a 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -290,19 +290,19 @@ round-robin example of distributing packets is shown below:
    #define MAX_SOCKS 16
 
    struct {
-        __uint(type, BPF_MAP_TYPE_XSKMAP);
-        __uint(max_entries, MAX_SOCKS);
-        __uint(key_size, sizeof(int));
-        __uint(value_size, sizeof(int));
+       __uint(type, BPF_MAP_TYPE_XSKMAP);
+       __uint(max_entries, MAX_SOCKS);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
    } xsks_map SEC(".maps");
 
    static unsigned int rr;
 
    SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
    {
-	rr = (rr + 1) & (MAX_SOCKS - 1);
+       rr = (rr + 1) & (MAX_SOCKS - 1);
 
-	return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
+       return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
    }
 
 Note, that since there is only a single set of FILL and COMPLETION
@@ -379,7 +379,7 @@ would look like this for the TX path:
 .. code-block:: c
 
    if (xsk_ring_prod__needs_wakeup(&my_tx_ring))
-      sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0);
+       sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0);
 
 I.e., only use the syscall if the flag is set.
 
@@ -442,9 +442,9 @@ purposes. The supported statistics are shown below:
 .. code-block:: c
 
    struct xdp_statistics {
-	  __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
-	  __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
-	  __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+       __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+       __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+       __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
    };
 
 XDP_OPTIONS getsockopt
@@ -483,15 +483,15 @@ like this:
 .. code-block:: c
 
     // struct xdp_rxtx_ring {
-    // 	__u32 *producer;
-    // 	__u32 *consumer;
-    // 	struct xdp_desc *desc;
+    //     __u32 *producer;
+    //     __u32 *consumer;
+    //     struct xdp_desc *desc;
     // };
 
     // struct xdp_umem_ring {
-    // 	__u32 *producer;
-    // 	__u32 *consumer;
-    // 	__u64 *desc;
+    //     __u32 *producer;
+    //     __u32 *consumer;
+    //     __u64 *desc;
     // };
 
     // typedef struct xdp_rxtx_ring RING;

From e93bdd78406da9ed01554c51e38b2a02c8ef8025 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 22 Apr 2021 15:00:32 -0500
Subject: [PATCH 2661/3292] wireless: wext-spy: Fix out-of-bounds warning

Fix the following out-of-bounds warning:

net/wireless/wext-spy.c:178:2: warning: 'memcpy' offset [25, 28] from the object at 'threshold' is out of the bounds of referenced subobject 'low' with type 'struct iw_quality' at offset 20 [-Warray-bounds]

The problem is that the original code is trying to copy data into a
couple of struct members adjacent to each other in a single call to
memcpy(). This causes a legitimate compiler warning because memcpy()
overruns the length of &threshold.low and &spydata->spy_thr_low. As
these are just a couple of struct members, fix this by using direct
assignments, instead of memcpy().

This helps with the ongoing efforts to globally enable -Warray-bounds
and get us closer to being able to tighten the FORTIFY_SOURCE routines
on memcpy().

Link: https://github.com/KSPP/linux/issues/109
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210422200032.GA168995@embeddedor
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/wext-spy.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c
index 33bef22e44e9..b379a0371653 100644
--- a/net/wireless/wext-spy.c
+++ b/net/wireless/wext-spy.c
@@ -120,8 +120,8 @@ int iw_handler_set_thrspy(struct net_device *	dev,
 		return -EOPNOTSUPP;
 
 	/* Just do it */
-	memcpy(&(spydata->spy_thr_low), &(threshold->low),
-	       2 * sizeof(struct iw_quality));
+	spydata->spy_thr_low = threshold->low;
+	spydata->spy_thr_high = threshold->high;
 
 	/* Clear flag */
 	memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
@@ -147,8 +147,8 @@ int iw_handler_get_thrspy(struct net_device *	dev,
 		return -EOPNOTSUPP;
 
 	/* Just do it */
-	memcpy(&(threshold->low), &(spydata->spy_thr_low),
-	       2 * sizeof(struct iw_quality));
+	threshold->low = spydata->spy_thr_low;
+	threshold->high = spydata->spy_thr_high;
 
 	return 0;
 }
@@ -173,10 +173,10 @@ static void iw_send_thrspy_event(struct net_device *	dev,
 	memcpy(threshold.addr.sa_data, address, ETH_ALEN);
 	threshold.addr.sa_family = ARPHRD_ETHER;
 	/* Copy stats */
-	memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+	threshold.qual = *wstats;
 	/* Copy also thresholds */
-	memcpy(&(threshold.low), &(spydata->spy_thr_low),
-	       2 * sizeof(struct iw_quality));
+	threshold.low = spydata->spy_thr_low;
+	threshold.high = spydata->spy_thr_high;
 
 	/* Send event to user space */
 	wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);

From c2a8637c055e4ea86cd414bbf02034d1449685cc Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 28 Apr 2021 17:59:05 +0800
Subject: [PATCH 2662/3292] net: wireless: wext_compat.c: Remove redundant
 assignment to ps

Variable 'ps' is set to wdev->ps but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

net/wireless/wext-compat.c:1170:7: warning: Value stored to 'ps' during
its initialization is never read [clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619603945-116891-1-git-send-email-yang.lee@linux.alibaba.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/wext-compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a8320dc59af7..50a2330de236 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1167,7 +1167,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
-	bool ps = wdev->ps;
+	bool ps;
 	int timeout = wdev->ps_timeout;
 	int err;
 

From 5eae2705004895a9aa917f5df6c1a2da8eeb4fd5 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 30 Apr 2021 17:21:23 +0800
Subject: [PATCH 2663/3292] mac80211: Remove redundant assignment to ret

Variable 'ret' is set to -ENODEV but this value is never read as it
is overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Clean up the following clang-analyzer warning:

net/mac80211/debugfs_netdev.c:60:2: warning: Value stored to 'ret' is
never read [clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619774483-116805-1-git-send-email-yang.lee@linux.alibaba.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs_netdev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 0ad3860852ff..f7aac8955681 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -57,7 +57,6 @@ static ssize_t ieee80211_if_write(
 		return -EFAULT;
 	buf[count] = '\0';
 
-	ret = -ENODEV;
 	rtnl_lock();
 	ret = (*write)(sdata, buf, count);
 	rtnl_unlock();

From 21b7805434f6598eaf70329f78cf3da0bd4aa3e9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 6 May 2021 22:12:00 +0200
Subject: [PATCH 2664/3292] cfg80211: remove
 CFG80211_MAX_NUM_DIFFERENT_CHANNELS

We no longer need to put any limits here, hardware will and
mac80211-hwsim can do whatever it likes. The reason we had
this was some accounting code (still mentioned in the comment)
but that code was deleted in commit c781944b71f8 ("cfg80211:
Remove unused cfg80211_can_use_iftype_chan()").

Link: https://lore.kernel.org/r/20210506221159.d1d61db1d31c.Iac4da68d54b9f1fdc18a03586bbe06aeb9515425@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 5 -----
 include/net/cfg80211.h                | 2 --
 net/wireless/core.c                   | 8 --------
 3 files changed, 15 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 7a6fd46d0c6e..9574afc0cdbf 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3796,11 +3796,6 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		return -EINVAL;
 	}
 
-	if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) {
-		GENL_SET_ERR_MSG(info, "too many channels specified");
-		return -EINVAL;
-	}
-
 	if (info->attrs[HWSIM_ATTR_NO_VIF])
 		param.no_vif = true;
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 58c2cd417e89..60325b62daae 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1245,8 +1245,6 @@ struct cfg80211_csa_settings {
 	u8 count;
 };
 
-#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
-
 /**
  * struct iface_combination_params - input parameters for interface combinations
  *
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 8d0883e81093..47f551301592 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -589,14 +589,6 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
 		if (WARN_ON(!c->num_different_channels))
 			return -EINVAL;
 
-		/*
-		 * Put a sane limit on maximum number of different
-		 * channels to simplify channel accounting code.
-		 */
-		if (WARN_ON(c->num_different_channels >
-				CFG80211_MAX_NUM_DIFFERENT_CHANNELS))
-			return -EINVAL;
-
 		/* DFS only works on one channel. */
 		if (WARN_ON(c->radar_detect_widths &&
 			    (c->num_different_channels > 1)))

From 5b5c9f3bd5f3d726d07ab8e4776d241863963a6e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 8 May 2021 16:03:51 +0300
Subject: [PATCH 2665/3292] cfg80211: clean up variable use in
 cfg80211_parse_colocated_ap()

The "ap_info->tbtt_info_len" and "length" variables are the same value
but it is confusing how the names are mixed up.  Let's use "length"
everywhere for consistency.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/YJaMNzZENkYFAYQX@mwanda
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4f06c1825029..a3941b19b516 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -618,7 +618,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
 
 		freq = ieee80211_channel_to_frequency(ap_info->channel, band);
 
-		if (end - pos < count * ap_info->tbtt_info_len)
+		if (end - pos < count * length)
 			break;
 
 		/*
@@ -630,7 +630,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
 		if (band != NL80211_BAND_6GHZ ||
 		    (length != IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM &&
 		     length < IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM)) {
-			pos += count * ap_info->tbtt_info_len;
+			pos += count * length;
 			continue;
 		}
 
@@ -653,7 +653,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
 				kfree(entry);
 			}
 
-			pos += ap_info->tbtt_info_len;
+			pos += length;
 		}
 	}
 

From 0edab4ff84b67fc585bb47aba37833da18f5a9dc Mon Sep 17 00:00:00 2001
From: Philipp Borgers <borgers@mi.fu-berlin.de>
Date: Mon, 17 May 2021 14:01:45 +0200
Subject: [PATCH 2666/3292] mac80211: minstrel_ht: ignore frame that was sent
 with noAck flag

QoS Data Frames that were sent with a No Ack policy should be ignored by
the minstrel statistics. There will never be an Ack for these frames so
there is no way to draw conclusions about the success of the transmission.

Signed-off-by: Philipp Borgers <borgers@mi.fu-berlin.de>
Link: https://lore.kernel.org/r/20210517120145.132814-1-borgers@mi.fu-berlin.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index a6f3fb4a9197..bc261d086410 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1211,6 +1211,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 	bool last, update = false;
 	int i;
 
+	/* Ignore packet that was sent with noAck flag */
+	if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+		return;
+
 	/* This packet was aggregated but doesn't carry status info */
 	if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
 	    !(info->flags & IEEE80211_TX_STAT_AMPDU))

From 0044cc177f23aff1f66589f87c5f1172e9f09fdc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 23:07:54 +0200
Subject: [PATCH 2667/3292] mac80211: unify queueing SKB to iface

We have a bunch of places that open-code the same to queue
an SKB to an interface, unify that.

Link: https://lore.kernel.org/r/20210517230754.113b65febd5a.Ie0e1d58a2885e75f242cb6e06f3b9660117fef93@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index af0ef456eb0f..ab9207e32f07 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -214,6 +214,16 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
 	return len;
 }
 
+static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+					 struct sta_info *sta,
+					 struct sk_buff *skb)
+{
+	skb_queue_tail(&sdata->skb_queue, skb);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+	if (sta)
+		sta->rx_stats.packets++;
+}
+
 static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
 					 struct sk_buff *skb,
 					 int rtap_space)
@@ -254,8 +264,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
 	if (!skb)
 		return;
 
-	skb_queue_tail(&sdata->skb_queue, skb);
-	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+	ieee80211_queue_skb_to_iface(sdata, NULL, skb);
 }
 
 /*
@@ -1339,7 +1348,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 				       struct sk_buff_head *frames)
 {
 	struct sk_buff *skb = rx->skb;
-	struct ieee80211_local *local = rx->local;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct sta_info *sta = rx->sta;
 	struct tid_ampdu_rx *tid_agg_rx;
@@ -1391,8 +1399,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
-		skb_queue_tail(&rx->sdata->skb_queue, skb);
-		ieee80211_queue_work(&local->hw, &rx->sdata->work);
+		ieee80211_queue_skb_to_iface(rx->sdata, NULL, skb);
 		return;
 	}
 
@@ -3493,10 +3500,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 
  queue:
-	skb_queue_tail(&sdata->skb_queue, rx->skb);
-	ieee80211_queue_work(&local->hw, &sdata->work);
-	if (rx->sta)
-		rx->sta->rx_stats.packets++;
+	ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
 	return RX_QUEUED;
 }
 
@@ -3644,10 +3648,7 @@ ieee80211_rx_h_ext(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 
 	/* for now only beacons are ext, so queue them */
-	skb_queue_tail(&sdata->skb_queue, rx->skb);
-	ieee80211_queue_work(&rx->local->hw, &sdata->work);
-	if (rx->sta)
-		rx->sta->rx_stats.packets++;
+	ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
 
 	return RX_QUEUED;
 }
@@ -3704,11 +3705,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 	}
 
-	/* queue up frame and kick off work to process it */
-	skb_queue_tail(&sdata->skb_queue, rx->skb);
-	ieee80211_queue_work(&rx->local->hw, &sdata->work);
-	if (rx->sta)
-		rx->sta->rx_stats.packets++;
+	ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
 
 	return RX_QUEUED;
 }

From 07bd1c79c9fbf038483c50031b0f302613a54eb6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 23:07:55 +0200
Subject: [PATCH 2668/3292] mac80211: refactor SKB queue processing a bit

This is a very long loop body, move it into its own function
instead, keeping only the kcov and free outside in the loop
body.

Link: https://lore.kernel.org/r/20210517230754.6bc6cdd68570.I28a86ebdb19601ca1965c4dc654cc49fc1064efa@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 228 ++++++++++++++++++++++---------------------
 1 file changed, 119 insertions(+), 109 deletions(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 137fa4c50e07..30dd3b3778f6 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1318,13 +1318,130 @@ static void ieee80211_if_setup_no_queue(struct net_device *dev)
 	dev->priv_flags |= IFF_NO_QUEUE;
 }
 
+static void ieee80211_iface_process_skb(struct ieee80211_local *local,
+					struct ieee80211_sub_if_data *sdata,
+					struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+	if (ieee80211_is_action(mgmt->frame_control) &&
+	    mgmt->u.action.category == WLAN_CATEGORY_BACK) {
+		struct sta_info *sta;
+		int len = skb->len;
+
+		mutex_lock(&local->sta_mtx);
+		sta = sta_info_get_bss(sdata, mgmt->sa);
+		if (sta) {
+			switch (mgmt->u.action.u.addba_req.action_code) {
+			case WLAN_ACTION_ADDBA_REQ:
+				ieee80211_process_addba_request(local, sta,
+								mgmt, len);
+				break;
+			case WLAN_ACTION_ADDBA_RESP:
+				ieee80211_process_addba_resp(local, sta,
+							     mgmt, len);
+				break;
+			case WLAN_ACTION_DELBA:
+				ieee80211_process_delba(sdata, sta,
+							mgmt, len);
+				break;
+			default:
+				WARN_ON(1);
+				break;
+			}
+		}
+		mutex_unlock(&local->sta_mtx);
+	} else if (ieee80211_is_action(mgmt->frame_control) &&
+		   mgmt->u.action.category == WLAN_CATEGORY_VHT) {
+		switch (mgmt->u.action.u.vht_group_notif.action_code) {
+		case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+			struct ieee80211_rx_status *status;
+			enum nl80211_band band;
+			struct sta_info *sta;
+			u8 opmode;
+
+			status = IEEE80211_SKB_RXCB(skb);
+			band = status->band;
+			opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+			mutex_lock(&local->sta_mtx);
+			sta = sta_info_get_bss(sdata, mgmt->sa);
+
+			if (sta)
+				ieee80211_vht_handle_opmode(sdata, sta, opmode,
+							    band);
+
+			mutex_unlock(&local->sta_mtx);
+			break;
+		}
+		case WLAN_VHT_ACTION_GROUPID_MGMT:
+			ieee80211_process_mu_groups(sdata, mgmt);
+			break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+	} else if (ieee80211_is_ext(mgmt->frame_control)) {
+		if (sdata->vif.type == NL80211_IFTYPE_STATION)
+			ieee80211_sta_rx_queued_ext(sdata, skb);
+		else
+			WARN_ON(1);
+	} else if (ieee80211_is_data_qos(mgmt->frame_control)) {
+		struct ieee80211_hdr *hdr = (void *)mgmt;
+		struct sta_info *sta;
+
+		/*
+		 * So the frame isn't mgmt, but frame_control
+		 * is at the right place anyway, of course, so
+		 * the if statement is correct.
+		 *
+		 * Warn if we have other data frame types here,
+		 * they must not get here.
+		 */
+		WARN_ON(hdr->frame_control &
+				cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
+		WARN_ON(!(hdr->seq_ctrl &
+				cpu_to_le16(IEEE80211_SCTL_FRAG)));
+		/*
+		 * This was a fragment of a frame, received while
+		 * a block-ack session was active. That cannot be
+		 * right, so terminate the session.
+		 */
+		mutex_lock(&local->sta_mtx);
+		sta = sta_info_get_bss(sdata, mgmt->sa);
+		if (sta) {
+			u16 tid = ieee80211_get_tid(hdr);
+
+			__ieee80211_stop_rx_ba_session(
+				sta, tid, WLAN_BACK_RECIPIENT,
+				WLAN_REASON_QSTA_REQUIRE_SETUP,
+				true);
+		}
+		mutex_unlock(&local->sta_mtx);
+	} else switch (sdata->vif.type) {
+	case NL80211_IFTYPE_STATION:
+		ieee80211_sta_rx_queued_mgmt(sdata, skb);
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		ieee80211_ibss_rx_queued_mgmt(sdata, skb);
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+		if (!ieee80211_vif_is_mesh(&sdata->vif))
+			break;
+		ieee80211_mesh_rx_queued_mgmt(sdata, skb);
+		break;
+	default:
+		WARN(1, "frame for unexpected interface type");
+		break;
+	}
+}
+
 static void ieee80211_iface_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data, work);
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
-	struct sta_info *sta;
 
 	if (!ieee80211_sdata_running(sdata))
 		return;
@@ -1337,116 +1454,9 @@ static void ieee80211_iface_work(struct work_struct *work)
 
 	/* first process frames */
 	while ((skb = skb_dequeue(&sdata->skb_queue))) {
-		struct ieee80211_mgmt *mgmt = (void *)skb->data;
-
 		kcov_remote_start_common(skb_get_kcov_handle(skb));
-		if (ieee80211_is_action(mgmt->frame_control) &&
-		    mgmt->u.action.category == WLAN_CATEGORY_BACK) {
-			int len = skb->len;
 
-			mutex_lock(&local->sta_mtx);
-			sta = sta_info_get_bss(sdata, mgmt->sa);
-			if (sta) {
-				switch (mgmt->u.action.u.addba_req.action_code) {
-				case WLAN_ACTION_ADDBA_REQ:
-					ieee80211_process_addba_request(
-							local, sta, mgmt, len);
-					break;
-				case WLAN_ACTION_ADDBA_RESP:
-					ieee80211_process_addba_resp(local, sta,
-								     mgmt, len);
-					break;
-				case WLAN_ACTION_DELBA:
-					ieee80211_process_delba(sdata, sta,
-								mgmt, len);
-					break;
-				default:
-					WARN_ON(1);
-					break;
-				}
-			}
-			mutex_unlock(&local->sta_mtx);
-		} else if (ieee80211_is_action(mgmt->frame_control) &&
-			   mgmt->u.action.category == WLAN_CATEGORY_VHT) {
-			switch (mgmt->u.action.u.vht_group_notif.action_code) {
-			case WLAN_VHT_ACTION_OPMODE_NOTIF: {
-				struct ieee80211_rx_status *status;
-				enum nl80211_band band;
-				u8 opmode;
-
-				status = IEEE80211_SKB_RXCB(skb);
-				band = status->band;
-				opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
-
-				mutex_lock(&local->sta_mtx);
-				sta = sta_info_get_bss(sdata, mgmt->sa);
-
-				if (sta)
-					ieee80211_vht_handle_opmode(sdata, sta,
-								    opmode,
-								    band);
-
-				mutex_unlock(&local->sta_mtx);
-				break;
-			}
-			case WLAN_VHT_ACTION_GROUPID_MGMT:
-				ieee80211_process_mu_groups(sdata, mgmt);
-				break;
-			default:
-				WARN_ON(1);
-				break;
-			}
-		} else if (ieee80211_is_ext(mgmt->frame_control)) {
-			if (sdata->vif.type == NL80211_IFTYPE_STATION)
-				ieee80211_sta_rx_queued_ext(sdata, skb);
-			else
-				WARN_ON(1);
-		} else if (ieee80211_is_data_qos(mgmt->frame_control)) {
-			struct ieee80211_hdr *hdr = (void *)mgmt;
-			/*
-			 * So the frame isn't mgmt, but frame_control
-			 * is at the right place anyway, of course, so
-			 * the if statement is correct.
-			 *
-			 * Warn if we have other data frame types here,
-			 * they must not get here.
-			 */
-			WARN_ON(hdr->frame_control &
-					cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
-			WARN_ON(!(hdr->seq_ctrl &
-					cpu_to_le16(IEEE80211_SCTL_FRAG)));
-			/*
-			 * This was a fragment of a frame, received while
-			 * a block-ack session was active. That cannot be
-			 * right, so terminate the session.
-			 */
-			mutex_lock(&local->sta_mtx);
-			sta = sta_info_get_bss(sdata, mgmt->sa);
-			if (sta) {
-				u16 tid = ieee80211_get_tid(hdr);
-
-				__ieee80211_stop_rx_ba_session(
-					sta, tid, WLAN_BACK_RECIPIENT,
-					WLAN_REASON_QSTA_REQUIRE_SETUP,
-					true);
-			}
-			mutex_unlock(&local->sta_mtx);
-		} else switch (sdata->vif.type) {
-		case NL80211_IFTYPE_STATION:
-			ieee80211_sta_rx_queued_mgmt(sdata, skb);
-			break;
-		case NL80211_IFTYPE_ADHOC:
-			ieee80211_ibss_rx_queued_mgmt(sdata, skb);
-			break;
-		case NL80211_IFTYPE_MESH_POINT:
-			if (!ieee80211_vif_is_mesh(&sdata->vif))
-				break;
-			ieee80211_mesh_rx_queued_mgmt(sdata, skb);
-			break;
-		default:
-			WARN(1, "frame for unexpected interface type");
-			break;
-		}
+		ieee80211_iface_process_skb(local, sdata, skb);
 
 		kfree_skb(skb);
 		kcov_remote_stop();

From f057d1403689309c6277961d5c348d4841959a9c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 23:07:56 +0200
Subject: [PATCH 2669/3292] mac80211: use sdata->skb_queue for TDLS

We need to differentiate these frames since the ones we
currently put on the skb_queue_tdls_chsw have already
been converted to ethernet format, but now that we've
got a single place to enqueue to the sdata->skb_queue
this isn't hard. Just differentiate based on protocol
and adjust the code to queue the SKBs appropriately.

Link: https://lore.kernel.org/r/20210517230754.17034990abef.I5342f2183c0d246b18d36c511eb3b6be298a6572@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 10 +++++-----
 net/mac80211/iface.c       |  5 ++++-
 net/mac80211/main.c        |  5 -----
 net/mac80211/rx.c          | 21 +++++++++++++--------
 net/mac80211/tdls.c        | 28 +---------------------------
 5 files changed, 23 insertions(+), 46 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 648696b49f89..b995777566e1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1414,10 +1414,6 @@ struct ieee80211_local {
 
 	/* extended capabilities provided by mac80211 */
 	u8 ext_capa[8];
-
-	/* TDLS channel switch */
-	struct work_struct tdls_chsw_work;
-	struct sk_buff_head skb_queue_tdls_chsw;
 };
 
 static inline struct ieee80211_sub_if_data *
@@ -2287,9 +2283,13 @@ void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
 					  struct net_device *dev,
 					  const u8 *addr);
 void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata);
-void ieee80211_tdls_chsw_work(struct work_struct *wk);
 void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
 				      const u8 *peer, u16 reason);
+void
+ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
+				      struct sk_buff *skb);
+
+
 const char *ieee80211_get_reason_code_string(u16 reason_code);
 u16 ieee80211_encode_usf(int val);
 u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 30dd3b3778f6..68375ef56b4a 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1456,7 +1456,10 @@ static void ieee80211_iface_work(struct work_struct *work)
 	while ((skb = skb_dequeue(&sdata->skb_queue))) {
 		kcov_remote_start_common(skb_get_kcov_handle(skb));
 
-		ieee80211_iface_process_skb(local, sdata, skb);
+		if (skb->protocol == cpu_to_be16(ETH_P_TDLS))
+			ieee80211_process_tdls_channel_switch(sdata, skb);
+		else
+			ieee80211_iface_process_skb(local, sdata, skb);
 
 		kfree_skb(skb);
 		kcov_remote_stop();
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f33a3acd7f96..822ff388410e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -739,8 +739,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	INIT_WORK(&local->sched_scan_stopped_work,
 		  ieee80211_sched_scan_stopped_work);
 
-	INIT_WORK(&local->tdls_chsw_work, ieee80211_tdls_chsw_work);
-
 	spin_lock_init(&local->ack_status_lock);
 	idr_init(&local->ack_status_frames);
 
@@ -757,7 +755,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 
 	skb_queue_head_init(&local->skb_queue);
 	skb_queue_head_init(&local->skb_queue_unreliable);
-	skb_queue_head_init(&local->skb_queue_tdls_chsw);
 
 	ieee80211_alloc_led_names(local);
 
@@ -1389,7 +1386,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	cancel_delayed_work_sync(&local->roc_work);
 	cancel_work_sync(&local->restart_work);
 	cancel_work_sync(&local->reconfig_filter);
-	cancel_work_sync(&local->tdls_chsw_work);
 	flush_work(&local->sched_scan_stopped_work);
 	flush_work(&local->radar_detected_work);
 
@@ -1401,7 +1397,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 		wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
 	skb_queue_purge(&local->skb_queue);
 	skb_queue_purge(&local->skb_queue_unreliable);
-	skb_queue_purge(&local->skb_queue_tdls_chsw);
 
 	wiphy_unregister(local->hw.wiphy);
 	destroy_workqueue(local->workqueue);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ab9207e32f07..a6400adf08bf 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -214,9 +214,9 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
 	return len;
 }
 
-static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
-					 struct sta_info *sta,
-					 struct sk_buff *skb)
+static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+					   struct sta_info *sta,
+					   struct sk_buff *skb)
 {
 	skb_queue_tail(&sdata->skb_queue, skb);
 	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
@@ -224,6 +224,14 @@ static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
 		sta->rx_stats.packets++;
 }
 
+static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+					 struct sta_info *sta,
+					 struct sk_buff *skb)
+{
+	skb->protocol = 0;
+	__ieee80211_queue_skb_to_iface(sdata, sta, skb);
+}
+
 static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
 					 struct sk_buff *skb,
 					 int rtap_space)
@@ -3016,11 +3024,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 		    tf->category == WLAN_CATEGORY_TDLS &&
 		    (tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_REQUEST ||
 		     tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_RESPONSE)) {
-			skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb);
-			schedule_work(&local->tdls_chsw_work);
-			if (rx->sta)
-				rx->sta->rx_stats.packets++;
-
+			rx->skb->protocol = cpu_to_be16(ETH_P_TDLS);
+			__ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
 			return RX_QUEUED;
 		}
 	}
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index f91d02b81b92..45e532ad1215 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1920,7 +1920,7 @@ out:
 	return ret;
 }
 
-static void
+void
 ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
 				      struct sk_buff *skb)
 {
@@ -1971,32 +1971,6 @@ void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
 	rcu_read_unlock();
 }
 
-void ieee80211_tdls_chsw_work(struct work_struct *wk)
-{
-	struct ieee80211_local *local =
-		container_of(wk, struct ieee80211_local, tdls_chsw_work);
-	struct ieee80211_sub_if_data *sdata;
-	struct sk_buff *skb;
-	struct ieee80211_tdls_data *tf;
-
-	wiphy_lock(local->hw.wiphy);
-	while ((skb = skb_dequeue(&local->skb_queue_tdls_chsw))) {
-		tf = (struct ieee80211_tdls_data *)skb->data;
-		list_for_each_entry(sdata, &local->interfaces, list) {
-			if (!ieee80211_sdata_running(sdata) ||
-			    sdata->vif.type != NL80211_IFTYPE_STATION ||
-			    !ether_addr_equal(tf->da, sdata->vif.addr))
-				continue;
-
-			ieee80211_process_tdls_channel_switch(sdata, skb);
-			break;
-		}
-
-		kfree_skb(skb);
-	}
-	wiphy_unlock(local->hw.wiphy);
-}
-
 void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
 				      const u8 *peer, u16 reason)
 {

From 4ebdce1dcbd44099b0e68db859b21d97b051492c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 23:07:57 +0200
Subject: [PATCH 2670/3292] mac80211: simplify ieee80211_add_station()

There's no need to do some kind of weird err and RCU dance
just use sta_info_insert() directly.

Link: https://lore.kernel.org/r/20210517230754.55abd10056c0.I6f5a3b7b23347b2cdaf64e6d5ce1d9e904059654@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7a99892e5aba..0d29a9d1f910 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1693,15 +1693,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	    test_sta_flag(sta, WLAN_STA_ASSOC))
 		rate_control_rate_init(sta);
 
-	err = sta_info_insert_rcu(sta);
-	if (err) {
-		rcu_read_unlock();
-		return err;
-	}
-
-	rcu_read_unlock();
-
-	return 0;
+	return sta_info_insert(sta);
 }
 
 static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,

From cff7b5ca25353bef5909e357a9912f3d44b32af5 Mon Sep 17 00:00:00 2001
From: Philipp Borgers <borgers@mi.fu-berlin.de>
Date: Wed, 19 May 2021 14:20:17 +0200
Subject: [PATCH 2671/3292] mac80211: add ieee80211_is_tx_data helper function

Add a helper function that checks if a frame is a data frame. Frames
with hardware encapsulation enabled are data frames.

Signed-off-by: Philipp Borgers <borgers@mi.fu-berlin.de>
Link: https://lore.kernel.org/r/20210519122019.92359-2-borgers@mi.fu-berlin.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e89530d0d9c6..4e876e4598e3 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6752,4 +6752,22 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw,
 struct sk_buff *
 ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
 					  struct ieee80211_vif *vif);
+
+/**
+ * ieee80211_is_tx_data - check if frame is a data frame
+ *
+ * The function is used to check if a frame is a data frame. Frames with
+ * hardware encapsulation enabled are data frames.
+ *
+ * @skb: the frame to be transmitted.
+ */
+static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (void *) skb->data;
+
+	return info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP ||
+	       ieee80211_is_data(hdr->frame_control);
+}
+
 #endif /* MAC80211_H */

From d333322361e7a099dc74df2498d3fa5fde5c4fa7 Mon Sep 17 00:00:00 2001
From: Philipp Borgers <borgers@mi.fu-berlin.de>
Date: Wed, 19 May 2021 14:20:18 +0200
Subject: [PATCH 2672/3292] mac80211: do not use low data rates for data frames
 with no ack flag

Data Frames with no ack flag set should be handled by the rate
controler. Make sure we reach the rate controler by returning early
from rate_control_send_low if the frame is a data frame with no ack
flag.

Signed-off-by: Philipp Borgers <borgers@mi.fu-berlin.de>
Link: https://lore.kernel.org/r/20210519122019.92359-3-borgers@mi.fu-berlin.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rate.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 63652c39c8e0..01d6407b0279 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -396,6 +396,10 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
 	int mcast_rate;
 	bool use_basicrate = false;
 
+	if (ieee80211_is_tx_data(txrc->skb) &&
+	    info->flags & IEEE80211_TX_CTL_NO_ACK)
+		return false;
+
 	if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
 		__rate_control_send_low(txrc->hw, sband, pubsta, info,
 					txrc->rate_idx_mask);

From 4e6c78bdcfbc3aad01a527e46b89e7ab70e0c332 Mon Sep 17 00:00:00 2001
From: Philipp Borgers <borgers@mi.fu-berlin.de>
Date: Wed, 19 May 2021 14:20:19 +0200
Subject: [PATCH 2673/3292] mac80211: refactor rc_no_data_or_no_ack_use_min
 function

Use newly introduced helper function ieee80211_is_tx_data to check if
frame is a data frame. Takes into account that hardware encapsulation
can be enabled for a frame and therefore no ieee80211 header is present.

Signed-off-by: Philipp Borgers <borgers@mi.fu-berlin.de>
Link: https://lore.kernel.org/r/20210519122019.92359-4-borgers@mi.fu-berlin.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rate.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 01d6407b0279..9418daa52f68 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -297,15 +297,11 @@ void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata)
 static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
 {
 	struct sk_buff *skb = txrc->skb;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	__le16 fc;
-
-	fc = hdr->frame_control;
 
 	return (info->flags & (IEEE80211_TX_CTL_NO_ACK |
 			       IEEE80211_TX_CTL_USE_MINRATE)) ||
-		!ieee80211_is_data(fc);
+		!ieee80211_is_tx_data(skb);
 }
 
 static void rc_send_low_basicrate(struct ieee80211_tx_rate *rate,

From 057e377af24a4f48f9d8340029e765df0345f048 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Fri, 28 May 2021 18:07:04 +0800
Subject: [PATCH 2674/3292] mac80211: remove the repeated declaration

Function 'ieee80211_sta_set_rx_nss' is declared twice, so remove the
repeated declaration.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/1622196424-62403-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b995777566e1..a7ce59150322 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1875,7 +1875,6 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta);
 enum ieee80211_sta_rx_bandwidth
 ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
 enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta);
-void ieee80211_sta_set_rx_nss(struct sta_info *sta);
 void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
 				 struct ieee80211_mgmt *mgmt);
 u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,

From 9df66d5b9f45c39b3925d16e8947cc10009b186d Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 9 Jun 2021 15:59:44 +0800
Subject: [PATCH 2675/3292] cfg80211: fix default HE tx bitrate mask in 2G band

In 2G band, a HE sta can only supports HT and HE, but not supports VHT.
In this case, default HE tx bitrate mask isn't filled, when we use iw to
set bitrates without any parameter.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Link: https://lore.kernel.org/r/20210609075944.51130-1-pkshih@realtek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fc9286afe3c9..912977bf3ec8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4781,11 +4781,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 		       sband->ht_cap.mcs.rx_mask,
 		       sizeof(mask->control[i].ht_mcs));
 
-		if (!sband->vht_cap.vht_supported)
-			continue;
-
-		vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
-		vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+		if (sband->vht_cap.vht_supported) {
+			vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
+			vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+		}
 
 		he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype);
 		if (!he_cap)

From d656a4c6ead6c3f252b2f2532bc9735598f7e317 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 10 Jun 2021 22:08:16 +0200
Subject: [PATCH 2676/3292] mac80211: consider per-CPU statistics if present

If we have been keeping per-CPU statistics, consider them
regardless of USES_RSS, because we may not actually fill
those, for example in non-fast-RX cases when the connection
is not compatible with fast-RX. If we didn't fill them, the
additional data will be zero and not affect anything, and
if we did fill them then it's more correct to consider them.

This fixes an issue in mesh mode where some statistics are
not updated due to USES_RSS being set, but fast-RX isn't
used.

Reported-by: Thiraviyam Mariyappan <tmariyap@codeaurora.org>
Link: https://lore.kernel.org/r/20210610220814.13b35f5797c5.I511e9b33c5694e0d6cef4b6ae755c873d7c22124@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f2fb69da9b6e..641a6657d0c9 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2093,10 +2093,9 @@ static struct ieee80211_sta_rx_stats *
 sta_get_last_rx_stats(struct sta_info *sta)
 {
 	struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
-	struct ieee80211_local *local = sta->local;
 	int cpu;
 
-	if (!ieee80211_hw_check(&local->hw, USES_RSS))
+	if (!sta->pcpu_rx_stats)
 		return stats;
 
 	for_each_possible_cpu(cpu) {
@@ -2196,9 +2195,7 @@ static void sta_set_tidstats(struct sta_info *sta,
 	int cpu;
 
 	if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
-		if (!ieee80211_hw_check(&local->hw, USES_RSS))
-			tidstats->rx_msdu +=
-				sta_get_tidstats_msdu(&sta->rx_stats, tid);
+		tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid);
 
 		if (sta->pcpu_rx_stats) {
 			for_each_possible_cpu(cpu) {
@@ -2277,7 +2274,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 		sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal;
 
 	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
-
 	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
 			 BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
 			 BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
@@ -2312,8 +2308,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 
 	if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
 			       BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
-		if (!ieee80211_hw_check(&local->hw, USES_RSS))
-			sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
+		sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
 
 		if (sta->pcpu_rx_stats) {
 			for_each_possible_cpu(cpu) {

From 3f9d9725cb7daf7e9a834aa4f24d88b049c3c1f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 16 Jun 2021 21:28:05 +0200
Subject: [PATCH 2677/3292] mac80211: don't open-code LED manipulations

We shouldn't open-code led_trigger_blink() or led_trigger_event(),
use them instead of badly open-coding them. This also fixes the
locking, led_trigger_blink() and led_trigger_event() now use
read_lock_irqsave().

Link: https://lore.kernel.org/r/20210616212804.b19ba1c60353.I8ea1b4defd5e12fc20ef281291e602feeec336a6@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/led.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index b275c8853074..6de8d0ad5497 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -259,7 +259,6 @@ static void tpt_trig_timer(struct timer_list *t)
 {
 	struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer);
 	struct ieee80211_local *local = tpt_trig->local;
-	struct led_classdev *led_cdev;
 	unsigned long on, off, tpt;
 	int i;
 
@@ -283,10 +282,7 @@ static void tpt_trig_timer(struct timer_list *t)
 		}
 	}
 
-	read_lock(&local->tpt_led.leddev_list_lock);
-	list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
-		led_blink_set(led_cdev, &on, &off);
-	read_unlock(&local->tpt_led.leddev_list_lock);
+	led_trigger_blink(&local->tpt_led, &on, &off);
 }
 
 const char *
@@ -341,7 +337,6 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
 static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
 {
 	struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
-	struct led_classdev *led_cdev;
 
 	if (!tpt_trig->running)
 		return;
@@ -349,10 +344,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
 	tpt_trig->running = false;
 	del_timer_sync(&tpt_trig->timer);
 
-	read_lock(&local->tpt_led.leddev_list_lock);
-	list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
-		led_set_brightness(led_cdev, LED_OFF);
-	read_unlock(&local->tpt_led.leddev_list_lock);
+	led_trigger_event(&local->tpt_led, LED_OFF);
 }
 
 void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,

From 358ae88881adc3ac1544104272eb7e9408f80b39 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 16 Jun 2021 23:28:26 +0300
Subject: [PATCH 2678/3292] cfg80211: expose the rfkill device to the low level
 driver

This will allow the low level driver to query the rfkill
state.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20210616202826.9833-1-emmanuel.grumbach@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h     |  9 ++++++++-
 net/wireless/core.c        | 34 +++++++++++++---------------------
 net/wireless/core.h        |  3 +--
 net/wireless/nl80211.c     |  4 ++--
 net/wireless/wext-compat.c |  6 +++---
 5 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 60325b62daae..b3bc58ec9098 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -22,6 +22,7 @@
 #include <linux/if_ether.h>
 #include <linux/ieee80211.h>
 #include <linux/net.h>
+#include <linux/rfkill.h>
 #include <net/regulatory.h>
 
 /**
@@ -4943,6 +4944,7 @@ struct wiphy_iftype_akm_suites {
  *	configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and
  *	%NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes
  * @sar_capa: SAR control capabilities
+ * @rfkill: a pointer to the rfkill structure
  */
 struct wiphy {
 	struct mutex mtx;
@@ -5085,6 +5087,8 @@ struct wiphy {
 
 	const struct cfg80211_sar_capa *sar_capa;
 
+	struct rfkill *rfkill;
+
 	char priv[] __aligned(NETDEV_ALIGN);
 };
 
@@ -6659,7 +6663,10 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy);
  * wiphy_rfkill_stop_polling - stop polling rfkill
  * @wiphy: the wiphy
  */
-void wiphy_rfkill_stop_polling(struct wiphy *wiphy);
+static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
+{
+	rfkill_pause_polling(wiphy->rfkill);
+}
 
 /**
  * DOC: Vendor commands
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 47f551301592..41c15cc7791f 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -532,11 +532,11 @@ use_default_name:
 	wiphy_net_set(&rdev->wiphy, &init_net);
 
 	rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block;
-	rdev->rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
-				   &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
-				   &rdev->rfkill_ops, rdev);
+	rdev->wiphy.rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
+					  &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
+					  &rdev->rfkill_ops, rdev);
 
-	if (!rdev->rfkill) {
+	if (!rdev->wiphy.rfkill) {
 		wiphy_free(&rdev->wiphy);
 		return NULL;
 	}
@@ -985,10 +985,10 @@ int wiphy_register(struct wiphy *wiphy)
 	rdev->wiphy.registered = true;
 	rtnl_unlock();
 
-	res = rfkill_register(rdev->rfkill);
+	res = rfkill_register(rdev->wiphy.rfkill);
 	if (res) {
-		rfkill_destroy(rdev->rfkill);
-		rdev->rfkill = NULL;
+		rfkill_destroy(rdev->wiphy.rfkill);
+		rdev->wiphy.rfkill = NULL;
 		wiphy_unregister(&rdev->wiphy);
 		return res;
 	}
@@ -1004,18 +1004,10 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy)
 	if (!rdev->ops->rfkill_poll)
 		return;
 	rdev->rfkill_ops.poll = cfg80211_rfkill_poll;
-	rfkill_resume_polling(rdev->rfkill);
+	rfkill_resume_polling(wiphy->rfkill);
 }
 EXPORT_SYMBOL(wiphy_rfkill_start_polling);
 
-void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
-{
-	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
-
-	rfkill_pause_polling(rdev->rfkill);
-}
-EXPORT_SYMBOL(wiphy_rfkill_stop_polling);
-
 void wiphy_unregister(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1027,8 +1019,8 @@ void wiphy_unregister(struct wiphy *wiphy)
 		wiphy_unlock(&rdev->wiphy);
 		__count == 0; }));
 
-	if (rdev->rfkill)
-		rfkill_unregister(rdev->rfkill);
+	if (rdev->wiphy.rfkill)
+		rfkill_unregister(rdev->wiphy.rfkill);
 
 	rtnl_lock();
 	wiphy_lock(&rdev->wiphy);
@@ -1080,7 +1072,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
 {
 	struct cfg80211_internal_bss *scan, *tmp;
 	struct cfg80211_beacon_registration *reg, *treg;
-	rfkill_destroy(rdev->rfkill);
+	rfkill_destroy(rdev->wiphy.rfkill);
 	list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
 		list_del(&reg->list);
 		kfree(reg);
@@ -1102,7 +1094,7 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
-	if (rfkill_set_hw_state_reason(rdev->rfkill, blocked, reason))
+	if (rfkill_set_hw_state_reason(wiphy->rfkill, blocked, reason))
 		schedule_work(&rdev->rfkill_block);
 }
 EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);
@@ -1495,7 +1487,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 					     wdev->use_4addr, 0))
 			return notifier_from_errno(-EOPNOTSUPP);
 
-		if (rfkill_blocked(rdev->rfkill))
+		if (rfkill_blocked(rdev->wiphy.rfkill))
 			return notifier_from_errno(-ERFKILL);
 		break;
 	default:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a7d19b4b40ac..b35d0db12f1d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
  * Wireless configuration interface internals.
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #ifndef __NET_WIRELESS_CORE_H
 #define __NET_WIRELESS_CORE_H
@@ -27,7 +27,6 @@ struct cfg80211_registered_device {
 
 	/* rfkill support */
 	struct rfkill_ops rfkill_ops;
-	struct rfkill *rfkill;
 	struct work_struct rfkill_block;
 
 	/* ISO / IEC 3166 alpha2 for which this device is receiving
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 912977bf3ec8..c62d61d8aa02 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13041,7 +13041,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
 	if (wdev_running(wdev))
 		return 0;
 
-	if (rfkill_blocked(rdev->rfkill))
+	if (rfkill_blocked(rdev->wiphy.rfkill))
 		return -ERFKILL;
 
 	err = rdev_start_p2p_device(rdev, wdev);
@@ -13083,7 +13083,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
 	if (wdev_running(wdev))
 		return -EEXIST;
 
-	if (rfkill_blocked(rdev->rfkill))
+	if (rfkill_blocked(rdev->wiphy.rfkill))
 		return -ERFKILL;
 
 	if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF])
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 50a2330de236..a32065d600a1 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -902,7 +902,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
 
 	/* only change when not disabling */
 	if (!data->txpower.disabled) {
-		rfkill_set_sw_state(rdev->rfkill, false);
+		rfkill_set_sw_state(rdev->wiphy.rfkill, false);
 
 		if (data->txpower.fixed) {
 			/*
@@ -927,7 +927,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
 			}
 		}
 	} else {
-		if (rfkill_set_sw_state(rdev->rfkill, true))
+		if (rfkill_set_sw_state(rdev->wiphy.rfkill, true))
 			schedule_work(&rdev->rfkill_block);
 		return 0;
 	}
@@ -963,7 +963,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev,
 
 	/* well... oh well */
 	data->txpower.fixed = 1;
-	data->txpower.disabled = rfkill_blocked(rdev->rfkill);
+	data->txpower.disabled = rfkill_blocked(rdev->wiphy.rfkill);
 	data->txpower.value = val;
 	data->txpower.flags = IW_TXPOW_DBM;
 

From 08a46c6420013c4ecb61262b4869fdd7e82f918a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 17 Jun 2021 18:31:11 +0200
Subject: [PATCH 2679/3292] mac80211: move A-MPDU session check from
 minstrel_ht to mac80211

This avoids calling back into tx handlers from within the rate control module.
Preparation for deferring rate control until tx dequeue

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20210617163113.75815-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h             |  5 +++++
 net/mac80211/rc80211_minstrel_ht.c | 28 +---------------------------
 net/mac80211/tx.c                  | 27 +++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 4e876e4598e3..c09cd0e4a6b3 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6184,6 +6184,11 @@ enum rate_control_capabilities {
 	 * otherwise the NSS difference doesn't bother us.
 	 */
 	RATE_CTRL_CAPA_VHT_EXT_NSS_BW = BIT(0),
+	/**
+	 * @RATE_CTRL_CAPA_AMPDU_TRIGGER:
+	 * mac80211 should start A-MPDU sessions on tx
+	 */
+	RATE_CTRL_CAPA_AMPDU_TRIGGER = BIT(1),
 };
 
 struct rate_control_ops {
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index bc261d086410..20f2e0bef96b 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1175,29 +1175,6 @@ minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary)
 	}
 }
 
-static void
-minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
-{
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
-	u16 tid;
-
-	if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
-		return;
-
-	if (unlikely(!ieee80211_is_data_qos(hdr->frame_control)))
-		return;
-
-	if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
-		return;
-
-	tid = ieee80211_get_tid(hdr);
-	if (likely(sta->ampdu_mlme.tid_tx[tid]))
-		return;
-
-	ieee80211_start_tx_ba_session(pubsta, tid, 0);
-}
-
 static void
 minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
                       void *priv_sta, struct ieee80211_tx_status *st)
@@ -1502,10 +1479,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 	struct minstrel_priv *mp = priv;
 	u16 sample_idx;
 
-	if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&
-	    !minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_prob_rate)))
-		minstrel_aggr_check(sta, txrc->skb);
-
 	info->flags |= mi->tx_flags;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
@@ -1911,6 +1884,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
 
 static const struct rate_control_ops mac80211_minstrel_ht = {
 	.name = "minstrel_ht",
+	.capa = RATE_CTRL_CAPA_AMPDU_TRIGGER,
 	.tx_status_ext = minstrel_ht_tx_status,
 	.get_rate = minstrel_ht_get_rate,
 	.rate_init = minstrel_ht_rate_init,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2651498d05e8..b5d05eccfae8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3949,6 +3949,29 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 
+static void
+ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
+		     struct sta_info *sta,
+		     struct sk_buff *skb)
+{
+	struct rate_control_ref *ref = sdata->local->rate_ctrl;
+	u16 tid;
+
+	if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
+		return;
+
+	if (!sta || !sta->sta.ht_cap.ht_supported ||
+	    !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
+	    skb->protocol == sdata->control_port_protocol)
+		return;
+
+	tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+	if (likely(sta->ampdu_mlme.tid_tx[tid]))
+		return;
+
+	ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
+}
+
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags,
@@ -3979,6 +4002,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 		skb_get_hash(skb);
 	}
 
+	ieee80211_aggr_check(sdata, sta, skb);
+
 	if (sta) {
 		struct ieee80211_fast_tx *fast_tx;
 
@@ -4242,6 +4267,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
 
 	memset(info, 0, sizeof(*info));
 
+	ieee80211_aggr_check(sdata, sta, skb);
+
 	tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 	tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
 	if (tid_tx) {

From 03c3911d2d67a43ad4ffd15b534a5905d6ce5c59 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 17 Jun 2021 18:31:12 +0200
Subject: [PATCH 2680/3292] mac80211: call ieee80211_tx_h_rate_ctrl() when
 dequeue

Make ieee80211_tx_h_rate_ctrl() get called on dequeue to improve
performance since it reduces the turnaround time for rate control.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Link: https://lore.kernel.org/r/20210617163113.75815-2-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 52 ++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b5d05eccfae8..b9ff455ee01c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1768,8 +1768,6 @@ static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
 	CALL_TXH(ieee80211_tx_h_ps_buf);
 	CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
 	CALL_TXH(ieee80211_tx_h_select_key);
-	if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
-		CALL_TXH(ieee80211_tx_h_rate_ctrl);
 
  txh_done:
 	if (unlikely(res == TX_DROP)) {
@@ -1802,6 +1800,9 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
 		goto txh_done;
 	}
 
+	if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
+		CALL_TXH(ieee80211_tx_h_rate_ctrl);
+
 	CALL_TXH(ieee80211_tx_h_michael_mic_add);
 	CALL_TXH(ieee80211_tx_h_sequence);
 	CALL_TXH(ieee80211_tx_h_fragment);
@@ -3389,15 +3390,21 @@ out:
  * Can be called while the sta lock is held. Anything that can cause packets to
  * be generated will cause deadlock!
  */
-static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
-				       struct sta_info *sta, u8 pn_offs,
-				       struct ieee80211_key *key,
-				       struct sk_buff *skb)
+static ieee80211_tx_result
+ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
+			   struct sta_info *sta, u8 pn_offs,
+			   struct ieee80211_key *key,
+			   struct ieee80211_tx_data *tx)
 {
+	struct sk_buff *skb = tx->skb;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	u8 tid = IEEE80211_NUM_TIDS;
 
+	if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL) &&
+	    ieee80211_tx_h_rate_ctrl(tx) != TX_CONTINUE)
+		return TX_DROP;
+
 	if (key)
 		info->control.hw_key = &key->conf;
 
@@ -3446,6 +3453,8 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
 			break;
 		}
 	}
+
+	return TX_CONTINUE;
 }
 
 static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
@@ -3549,24 +3558,17 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 	tx.sta = sta;
 	tx.key = fast_tx->key;
 
-	if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
-		tx.skb = skb;
-		r = ieee80211_tx_h_rate_ctrl(&tx);
-		skb = tx.skb;
-		tx.skb = NULL;
-
-		if (r != TX_CONTINUE) {
-			if (r != TX_QUEUED)
-				kfree_skb(skb);
-			return true;
-		}
-	}
-
 	if (ieee80211_queue_skb(local, sdata, sta, skb))
 		return true;
 
-	ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
-				   fast_tx->key, skb);
+	tx.skb = skb;
+	r = ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
+				       fast_tx->key, &tx);
+	tx.skb = NULL;
+	if (r == TX_DROP) {
+		kfree_skb(skb);
+		return true;
+	}
 
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		sdata = container_of(sdata->bss,
@@ -3683,8 +3685,12 @@ begin:
 		    (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
 			pn_offs = ieee80211_hdrlen(hdr->frame_control);
 
-		ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
-					   tx.key, skb);
+		r = ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
+					       tx.key, &tx);
+		if (r != TX_CONTINUE) {
+			ieee80211_free_txskb(&local->hw, skb);
+			goto begin;
+		}
 	} else {
 		if (invoke_tx_handlers_late(&tx))
 			goto begin;

From 3187ba0cea77c8a4cdaed44fbff02c6e63e509aa Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 17 Jun 2021 18:31:13 +0200
Subject: [PATCH 2681/3292] mac80211: add rate control support for encap
 offload

The software rate control cannot deal with encap offload, so fix it.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Link: https://lore.kernel.org/r/20210617163113.75815-3-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rate.c |  3 +--
 net/mac80211/tx.c   | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 9418daa52f68..e5935e3d7a07 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -870,7 +870,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
 			    int max_rates)
 {
 	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_supported_band *sband;
 
@@ -882,7 +881,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
 	sdata = vif_to_sdata(vif);
 	sband = sdata->local->hw.wiphy->bands[info->band];
 
-	if (ieee80211_is_data(hdr->frame_control))
+	if (ieee80211_is_tx_data(skb))
 		rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
 
 	if (dest[0].idx < 0)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b9ff455ee01c..ac506245bb94 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -666,6 +666,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	u32 len;
 	struct ieee80211_tx_rate_control txrc;
 	struct ieee80211_sta_rates *ratetbl = NULL;
+	bool encap = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
 	bool assoc = false;
 
 	memset(&txrc, 0, sizeof(txrc));
@@ -707,7 +708,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	 * just wants a probe response.
 	 */
 	if (tx->sdata->vif.bss_conf.use_short_preamble &&
-	    (ieee80211_is_data(hdr->frame_control) ||
+	    (ieee80211_is_tx_data(tx->skb) ||
 	     (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
 		txrc.short_preamble = true;
 
@@ -729,7 +730,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 		 "%s: Dropped data frame as no usable bitrate found while "
 		 "scanning and associated. Target station: "
 		 "%pM on %d GHz band\n",
-		 tx->sdata->name, hdr->addr1,
+		 tx->sdata->name,
+		 encap ? ((struct ethhdr *)hdr)->h_dest : hdr->addr1,
 		 info->band ? 5 : 2))
 		return TX_DROP;
 
@@ -763,7 +765,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 
 	if (txrc.reported_rate.idx < 0) {
 		txrc.reported_rate = tx->rate;
-		if (tx->sta && ieee80211_is_data(hdr->frame_control))
+		if (tx->sta && ieee80211_is_tx_data(tx->skb))
 			tx->sta->tx_stats.last_rate = txrc.reported_rate;
 	} else if (tx->sta)
 		tx->sta->tx_stats.last_rate = txrc.reported_rate;
@@ -3673,8 +3675,16 @@ begin:
 	else
 		info->flags &= ~IEEE80211_TX_CTL_AMPDU;
 
-	if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)
+	if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
+		if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+			r = ieee80211_tx_h_rate_ctrl(&tx);
+			if (r != TX_CONTINUE) {
+				ieee80211_free_txskb(&local->hw, skb);
+				goto begin;
+			}
+		}
 		goto encap_out;
+	}
 
 	if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
 		struct sta_info *sta = container_of(txq->sta, struct sta_info,

From 4f2e3eb6c985cc8c2b88d5089b1920b831bc5ed9 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 18 Jun 2021 04:38:59 +0800
Subject: [PATCH 2682/3292] mac80211: check per vif offload_flags in Tx path

offload_flags has been introduced to indicate encap status of each interface.
An interface can encap offload at runtime, or if it has some extra limitations
it can simply override the flags, so it's more flexible to check offload_flags
in Tx path.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Link: https://lore.kernel.org/r/177785418cf407808bf3a44760302d0647076990.1623961575.git.ryder.lee@mediatek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ac506245bb94..caa7caa89ab9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3287,6 +3287,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
 		return false;
 
+	if (sdata->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED)
+		return false;
+
 	if (skb_is_gso(skb))
 		return false;
 

From e6ed929b4140d293bf8523f0376ed9bbdce29c47 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Sun, 23 May 2021 23:36:24 -0400
Subject: [PATCH 2683/3292] wireless: add check of field VHT Extended NSS BW
 Capable for 160/80+80 MHz setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Table 9-251—Supported VHT-MCS and NSS Set subfields, it has subfield VHT Extended
NSS BW Capable, its definition is:
Indicates whether the STA is capable of interpreting the Extended NSS BW
Support subfield of the VHT Capabilities Information field.

This patch is to add check for the subfield.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Link: https://lore.kernel.org/r/20210524033624.16993-1-wgong@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/chan.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 285b8076054b..472c895823a4 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -942,7 +942,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 	struct ieee80211_sta_vht_cap *vht_cap;
 	struct ieee80211_edmg *edmg_cap;
 	u32 width, control_freq, cap;
-	bool support_80_80 = false;
+	bool ext_nss_cap, support_80_80 = false;
 
 	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
 		return false;
@@ -950,6 +950,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 	ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap;
 	vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap;
 	edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap;
+	ext_nss_cap = __le16_to_cpu(vht_cap->vht_mcs.tx_highest) &
+			IEEE80211_VHT_EXT_NSS_BW_CAPABLE;
 
 	if (edmg_cap->channels &&
 	    !cfg80211_edmg_usable(wiphy,
@@ -1015,7 +1017,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 			(cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) ||
 			(cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
 			 cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) ||
-			u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1;
+			(ext_nss_cap &&
+			 u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1);
 		if (chandef->chan->band != NL80211_BAND_6GHZ && !support_80_80)
 			return false;
 		fallthrough;
@@ -1037,7 +1040,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
 		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
 		    cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ &&
-		    !(vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))
+		    !(ext_nss_cap &&
+		      (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)))
 			return false;
 		break;
 	default:

From 626c30f9e77354301ff9162c3bdddaf92d9b5cf3 Mon Sep 17 00:00:00 2001
From: Weilun Du <wdu@google.com>
Date: Thu, 6 May 2021 11:05:29 -0700
Subject: [PATCH 2684/3292] mac80211_hwsim: add concurrent channels scanning
 support over virtio

This fixed the crash when setting channels to 2 or more when
communicating over virtio.

Signed-off-by: Weilun Du <wdu@google.com>
Link: https://lore.kernel.org/r/20210506180530.3418576-1-wdu@google.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 48 +++++++++++++++++++++------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 9574afc0cdbf..7a5764003f81 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -626,6 +626,7 @@ struct mac80211_hwsim_data {
 	u32 ciphers[ARRAY_SIZE(hwsim_ciphers)];
 
 	struct mac_address addresses[2];
+	struct ieee80211_chanctx_conf *chanctx;
 	int channels, idx;
 	bool use_chanctx;
 	bool destroy_on_close;
@@ -1257,7 +1258,8 @@ static inline u16 trans_tx_rate_flags_ieee2hwsim(struct ieee80211_tx_rate *rate)
 
 static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 				       struct sk_buff *my_skb,
-				       int dst_portid)
+				       int dst_portid,
+				       struct ieee80211_channel *channel)
 {
 	struct sk_buff *skb;
 	struct mac80211_hwsim_data *data = hw->priv;
@@ -1312,7 +1314,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 	if (nla_put_u32(skb, HWSIM_ATTR_FLAGS, hwsim_flags))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, HWSIM_ATTR_FREQ, data->channel->center_freq))
+	if (nla_put_u32(skb, HWSIM_ATTR_FREQ, channel->center_freq))
 		goto nla_put_failure;
 
 	/* We get the tx control (rate and retries) info*/
@@ -1659,7 +1661,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
 	_portid = READ_ONCE(data->wmediumd);
 
 	if (_portid || hwsim_virtio_enabled)
-		return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
+		return mac80211_hwsim_tx_frame_nl(hw, skb, _portid, channel);
 
 	/* NO wmediumd detected, perfect medium simulation */
 	data->tx_pkts++;
@@ -1775,7 +1777,7 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 	mac80211_hwsim_monitor_rx(hw, skb, chan);
 
 	if (_pid || hwsim_virtio_enabled)
-		return mac80211_hwsim_tx_frame_nl(hw, skb, _pid);
+		return mac80211_hwsim_tx_frame_nl(hw, skb, _pid, chan);
 
 	mac80211_hwsim_tx_frame_no_nl(hw, skb, chan);
 	dev_kfree_skb(skb);
@@ -2514,6 +2516,11 @@ static int mac80211_hwsim_croc(struct ieee80211_hw *hw,
 static int mac80211_hwsim_add_chanctx(struct ieee80211_hw *hw,
 				      struct ieee80211_chanctx_conf *ctx)
 {
+	struct mac80211_hwsim_data *hwsim = hw->priv;
+
+	mutex_lock(&hwsim->mutex);
+	hwsim->chanctx = ctx;
+	mutex_unlock(&hwsim->mutex);
 	hwsim_set_chanctx_magic(ctx);
 	wiphy_dbg(hw->wiphy,
 		  "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
@@ -2525,6 +2532,11 @@ static int mac80211_hwsim_add_chanctx(struct ieee80211_hw *hw,
 static void mac80211_hwsim_remove_chanctx(struct ieee80211_hw *hw,
 					  struct ieee80211_chanctx_conf *ctx)
 {
+	struct mac80211_hwsim_data *hwsim = hw->priv;
+
+	mutex_lock(&hwsim->mutex);
+	hwsim->chanctx = NULL;
+	mutex_unlock(&hwsim->mutex);
 	wiphy_dbg(hw->wiphy,
 		  "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
 		  ctx->def.chan->center_freq, ctx->def.width,
@@ -2537,6 +2549,11 @@ static void mac80211_hwsim_change_chanctx(struct ieee80211_hw *hw,
 					  struct ieee80211_chanctx_conf *ctx,
 					  u32 changed)
 {
+	struct mac80211_hwsim_data *hwsim = hw->priv;
+
+	mutex_lock(&hwsim->mutex);
+	hwsim->chanctx = ctx;
+	mutex_unlock(&hwsim->mutex);
 	hwsim_check_chanctx_magic(ctx);
 	wiphy_dbg(hw->wiphy,
 		  "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
@@ -3129,6 +3146,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 		hw->wiphy->max_remain_on_channel_duration = 1000;
 		data->if_combination.radar_detect_widths = 0;
 		data->if_combination.num_different_channels = data->channels;
+		data->chanctx = NULL;
 	} else {
 		data->if_combination.num_different_channels = 1;
 		data->if_combination.radar_detect_widths =
@@ -3638,6 +3656,7 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 	int frame_data_len;
 	void *frame_data;
 	struct sk_buff *skb = NULL;
+	struct ieee80211_channel *channel = NULL;
 
 	if (!info->attrs[HWSIM_ATTR_ADDR_RECEIVER] ||
 	    !info->attrs[HWSIM_ATTR_FRAME] ||
@@ -3664,6 +3683,17 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 	if (!data2)
 		goto out;
 
+	if (data2->use_chanctx) {
+		if (data2->tmp_chan)
+			channel = data2->tmp_chan;
+		else if (data2->chanctx)
+			channel = data2->chanctx->def.chan;
+	} else {
+		channel = data2->channel;
+	}
+	if (!channel)
+		goto out;
+
 	if (!hwsim_virtio_enabled) {
 		if (hwsim_net_get_netgroup(genl_info_net(info)) !=
 		    data2->netgroup)
@@ -3675,7 +3705,7 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 
 	/* check if radio is configured properly */
 
-	if (data2->idle || !data2->started)
+	if ((data2->idle && !data2->tmp_chan) || !data2->started)
 		goto out;
 
 	/* A frame is received from user space */
@@ -3688,18 +3718,16 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
 		mutex_lock(&data2->mutex);
 		rx_status.freq = nla_get_u32(info->attrs[HWSIM_ATTR_FREQ]);
 
-		if (rx_status.freq != data2->channel->center_freq &&
-		    (!data2->tmp_chan ||
-		     rx_status.freq != data2->tmp_chan->center_freq)) {
+		if (rx_status.freq != channel->center_freq) {
 			mutex_unlock(&data2->mutex);
 			goto out;
 		}
 		mutex_unlock(&data2->mutex);
 	} else {
-		rx_status.freq = data2->channel->center_freq;
+		rx_status.freq = channel->center_freq;
 	}
 
-	rx_status.band = data2->channel->band;
+	rx_status.band = channel->band;
 	rx_status.rate_idx = nla_get_u32(info->attrs[HWSIM_ATTR_RX_RATE]);
 	rx_status.signal = nla_get_u32(info->attrs[HWSIM_ATTR_SIGNAL]);
 

From 93efeeea0fe6b3352e492e855a7262bc5645af14 Mon Sep 17 00:00:00 2001
From: Nguyen Dinh Phi <phind.uet@gmail.com>
Date: Tue, 22 Jun 2021 23:24:29 +0800
Subject: [PATCH 2685/3292] mac80211_hwsim: record stats in non-netlink path

The data go through mac80211_hwsim_tx_frame_no_nl() wasn't counted.

Signed-off-by: Nguyen Dinh Phi <phind.uet@gmail.com>
Link: https://lore.kernel.org/r/20210622152429.881230-1-phind.uet@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 7a5764003f81..ffa894f7312a 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1779,6 +1779,8 @@ static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 	if (_pid || hwsim_virtio_enabled)
 		return mac80211_hwsim_tx_frame_nl(hw, skb, _pid, chan);
 
+	data->tx_pkts++;
+	data->tx_bytes += skb->len;
 	mac80211_hwsim_tx_frame_no_nl(hw, skb, chan);
 	dev_kfree_skb(skb);
 }

From 1806239dec0dacde373f0b53f076319f6c6d95cb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 19 Jun 2021 15:36:30 +0200
Subject: [PATCH 2686/3292] ieee80211: add the value for Category '6' in
 "rtw_ieee80211_category"

Preparation work for removing the "enum rtw_ieee80211_category" in
"drivers/staging/rtl8188eu/include/ieee80211.h" and
"drivers/staging/rtl8723bs/include/ieee80211.h".

This enum is similar to "enum ieee80211_category" from
"include/linux/ieee80211.h". However it defines the value '6' as
RTW_WLAN_CATEGORY_FT.

So add a corresponding value in "ieee80211_category"

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/66be0187869bd7dae1c0b0785a32db695ee9872e.1624108556.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2967437f1b11..67f3e51e7ecc 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2933,6 +2933,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_BACK = 3,
 	WLAN_CATEGORY_PUBLIC = 4,
 	WLAN_CATEGORY_RADIO_MEASUREMENT = 5,
+	WLAN_CATEGORY_FAST_BBS_TRANSITION = 6,
 	WLAN_CATEGORY_HT = 7,
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,

From e41eb3e408de27982a5f8f50b2dd8002bed96908 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 19 Jun 2021 12:15:17 +0200
Subject: [PATCH 2687/3292] mac80211: remove iwlwifi specific workaround that
 broke sta NDP tx

Sending nulldata packets is important for sw AP link probing and detecting
4-address mode links. The checks that dropped these packets were apparently
added to work around an iwlwifi firmware bug with multi-TID aggregation.

Fixes: 41cbb0f5a295 ("mac80211: add support for HE")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20210619101517.90806-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +++
 net/mac80211/mlme.c                         | 9 ---------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 1ad621d13ad3..0a13c2bda2ee 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1032,6 +1032,9 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 	if (WARN_ON_ONCE(mvmsta->sta_id == IWL_MVM_INVALID_STA))
 		return -1;
 
+	if (unlikely(ieee80211_is_any_nullfunc(fc)) && sta->he_cap.has_he)
+		return -1;
+
 	if (unlikely(ieee80211_is_probe_resp(fc)))
 		iwl_mvm_probe_resp_set_noa(mvm, skb);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3f2aad2e7436..b1c44fa63a06 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1094,11 +1094,6 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	struct ieee80211_hdr_3addr *nullfunc;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
-	/* Don't send NDPs when STA is connected HE */
-	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
-		return;
-
 	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
 		!ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
 	if (!skb)
@@ -1130,10 +1125,6 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
 	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
 		return;
 
-	/* Don't send NDPs when connected HE */
-	if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
-		return;
-
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30);
 	if (!skb)
 		return;

From 10a35c222bc6fdd71421e800003b4c4c02d41bba Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:32 +0300
Subject: [PATCH 2688/3292] mac80211: allow SMPS requests only in client mode

The code currently allows this for AP mode, but then ignores
it. Clarify that since the spec doesn't allow it in AP mode.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.0465f8bcbe32.Iba39fc559ecfa887be00a5f3beabd881e5c86e54@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ht.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3d62a80b5790..2eb7641f5556 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright 2017	Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -555,17 +555,15 @@ void ieee80211_request_smps(struct ieee80211_vif *vif,
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 
-	if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION &&
-			 vif->type != NL80211_IFTYPE_AP))
+	if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION))
 		return;
 
-	if (vif->type == NL80211_IFTYPE_STATION) {
-		if (sdata->u.mgd.driver_smps_mode == smps_mode)
-			return;
-		sdata->u.mgd.driver_smps_mode = smps_mode;
-		ieee80211_queue_work(&sdata->local->hw,
-				     &sdata->u.mgd.request_smps_work);
-	}
+	if (sdata->u.mgd.driver_smps_mode == smps_mode)
+		return;
+
+	sdata->u.mgd.driver_smps_mode = smps_mode;
+	ieee80211_queue_work(&sdata->local->hw,
+			     &sdata->u.mgd.request_smps_work);
 }
 /* this might change ... don't want non-open drivers using it */
 EXPORT_SYMBOL_GPL(ieee80211_request_smps);

From 79ea0a5fad749dabfd7b8a1b73dd6662383762d1 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Fri, 18 Jun 2021 13:41:34 +0300
Subject: [PATCH 2689/3292] mac80211: move SMPS mode setting after
 ieee80211_prep_connection

ieee80211_mgd_assoc calls ieee80211_prep_connection which
might call ieee80211_prep_channel and set smps_mode to OFF.
That will override the previous setting of smps_mode in
ieee80211_mgd_assoc and HT SMPS will be set to "disabled"
in the association request frame.

Move the setting of smps_mode in ieee80211_mgd_assoc to
after the call to ieee80211_prep_connection.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.d8e5cc4b527f.Icf3a67fffbdd8c408c0cadfe43f8f4cffdc90acb@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b1c44fa63a06..b33b155f3573 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5646,15 +5646,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		       2 * FILS_NONCE_LEN);
 
 	assoc_data->bss = req->bss;
-
-	if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
-		if (ifmgd->powersave)
-			sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
-		else
-			sdata->smps_mode = IEEE80211_SMPS_OFF;
-	} else
-		sdata->smps_mode = ifmgd->req_smps;
-
 	assoc_data->capability = req->bss->capability;
 	assoc_data->supp_rates = bss->supp_rates;
 	assoc_data->supp_rates_len = bss->supp_rates_len;
@@ -5761,6 +5752,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	if (err)
 		goto err_clear;
 
+	if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
+		if (ifmgd->powersave)
+			sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
+		else
+			sdata->smps_mode = IEEE80211_SMPS_OFF;
+	} else {
+		sdata->smps_mode = ifmgd->req_smps;
+	}
+
 	rcu_read_lock();
 	beacon_ies = rcu_dereference(req->bss->beacon_ies);
 

From 7d7b00759e56bd2c0ff8b1155cb00f452dfc1c5d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:33 +0300
Subject: [PATCH 2690/3292] mac80211: free skb in WEP error case

If, for some strange reason, ieee80211_wep_encrypt() fails in
ieee80211_send_auth() free the SKB instead of sending out the
useless frame, in addition to the warning. This can't really
happen since the SKB was freshly allocated.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.98f058d7a8b2.Ie605e6a10e72eae02f5734032826af48b85b6d11@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 060059ef9668..ee5410bfe9ec 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1693,7 +1693,10 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {
 		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 		err = ieee80211_wep_encrypt(local, skb, key, key_len, key_idx);
-		WARN_ON(err);
+		if (WARN_ON(err)) {
+			kfree_skb(skb);
+			return;
+		}
 	}
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |

From d6c375095ade4ea4d20ada1f020c821bf0bfe7fa Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 18 Jun 2021 13:41:30 +0300
Subject: [PATCH 2691/3292] mac80211: handle rate control (RC) racing with
 chanctx definition

chanctx represents the current phy configuration and rate scale uses
it for achieving max throughput, so if phy changes bandwidth to narrow
bandwidth, RC should be _first_ updated to avoid using the wider bandwidth
before updating the phy, and vice versa.

We assume in the patch that station interface is always updated before
updating phy context by calling ieee80211_vif_update_chandef.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.fc4e24496aa2.Ic40ea947c2f65739ea4b5fe3babd0a544240ced6@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/chan.c | 110 ++++++++++++++++++++++++++++----------------
 net/mac80211/mlme.c |  89 +----------------------------------
 2 files changed, 72 insertions(+), 127 deletions(-)

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 907bb1f748a1..76fc36a68750 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * mac80211 - channel management
+ * Copyright 2020 - 2021 Intel Corporation
  */
 
 #include <linux/nl80211.h>
@@ -308,8 +309,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
  * the max of min required widths of all the interfaces bound to this
  * channel context.
  */
-void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
-				      struct ieee80211_chanctx *ctx)
+static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
+					     struct ieee80211_chanctx *ctx)
 {
 	enum nl80211_chan_width max_bw;
 	struct cfg80211_chan_def min_def;
@@ -326,7 +327,7 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
 	    ctx->conf.def.width == NL80211_CHAN_WIDTH_16 ||
 	    ctx->conf.radar_enabled) {
 		ctx->conf.min_def = ctx->conf.def;
-		return;
+		return 0;
 	}
 
 	max_bw = ieee80211_get_chanctx_max_required_bw(local, &ctx->conf);
@@ -337,17 +338,21 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
 		ieee80211_chandef_downgrade(&min_def);
 
 	if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def))
-		return;
+		return 0;
 
 	ctx->conf.min_def = min_def;
 	if (!ctx->driver_present)
-		return;
+		return 0;
 
-	drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_MIN_WIDTH);
+	return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH;
 }
 
+/* calling this function is assuming that station vif is updated to
+ * lates changes by calling ieee80211_vif_update_chandef
+ */
 static void ieee80211_chan_bw_change(struct ieee80211_local *local,
-				     struct ieee80211_chanctx *ctx)
+				     struct ieee80211_chanctx *ctx,
+				     bool narrowed)
 {
 	struct sta_info *sta;
 	struct ieee80211_supported_band *sband =
@@ -366,9 +371,16 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
 			continue;
 
 		new_sta_bw = ieee80211_sta_cur_vht_bw(sta);
+
+		/* nothing change */
 		if (new_sta_bw == sta->sta.bandwidth)
 			continue;
 
+		/* vif changed to narrow BW and narrow BW for station wasn't
+		 * requested or vise versa */
+		if ((new_sta_bw < sta->sta.bandwidth) == !narrowed)
+			continue;
+
 		sta->sta.bandwidth = new_sta_bw;
 		rate_control_rate_update(local, sband, sta,
 					 IEEE80211_RC_BW_CHANGED);
@@ -376,21 +388,34 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
 	rcu_read_unlock();
 }
 
+/*
+ * recalc the min required chan width of the channel context, which is
+ * the max of min required widths of all the interfaces bound to this
+ * channel context.
+ */
+void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
+				      struct ieee80211_chanctx *ctx)
+{
+	u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx);
+
+	if (!changed)
+		return;
+
+	/* check is BW narrowed */
+	ieee80211_chan_bw_change(local, ctx, true);
+
+	drv_change_chanctx(local, ctx, changed);
+
+	/* check is BW wider */
+	ieee80211_chan_bw_change(local, ctx, false);
+}
+
 static void ieee80211_change_chanctx(struct ieee80211_local *local,
 				     struct ieee80211_chanctx *ctx,
+				     struct ieee80211_chanctx *old_ctx,
 				     const struct cfg80211_chan_def *chandef)
 {
-	enum nl80211_chan_width width;
-
-	if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
-		ieee80211_recalc_chanctx_min_def(local, ctx);
-		return;
-	}
-
-	WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
-
-	width = ctx->conf.def.width;
-	ctx->conf.def = *chandef;
+	u32 changed;
 
 	/* expected to handle only 20/40/80/160 channel widths */
 	switch (chandef->width) {
@@ -405,19 +430,33 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
 		WARN_ON(1);
 	}
 
-	if (chandef->width < width)
-		ieee80211_chan_bw_change(local, ctx);
+	/* Check maybe BW narrowed - we do this _before_ calling recalc_chanctx_min_def
+	 * due to maybe not returning from it, e.g in case new context was added
+	 * first time with all parameters up to date.
+	 */
+	ieee80211_chan_bw_change(local, old_ctx, true);
 
-	drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH);
-	ieee80211_recalc_chanctx_min_def(local, ctx);
+	if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
+		ieee80211_recalc_chanctx_min_def(local, ctx);
+		return;
+	}
+
+	WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+
+	ctx->conf.def = *chandef;
+
+	/* check if min chanctx also changed */
+	changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
+		  _ieee80211_recalc_chanctx_min_def(local, ctx);
+	drv_change_chanctx(local, ctx, changed);
 
 	if (!local->use_chanctx) {
 		local->_oper_chandef = *chandef;
 		ieee80211_hw_config(local, 0);
 	}
 
-	if (chandef->width > width)
-		ieee80211_chan_bw_change(local, ctx);
+	/* check is BW wider */
+	ieee80211_chan_bw_change(local, old_ctx, false);
 }
 
 static struct ieee80211_chanctx *
@@ -450,7 +489,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
 		if (!compat)
 			continue;
 
-		ieee80211_change_chanctx(local, ctx, compat);
+		ieee80211_change_chanctx(local, ctx, ctx, compat);
 
 		return ctx;
 	}
@@ -679,7 +718,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
 	if (!compat)
 		return;
 
-	ieee80211_change_chanctx(local, ctx, compat);
+	ieee80211_change_chanctx(local, ctx, ctx, compat);
 }
 
 static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
@@ -1107,13 +1146,12 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
 	if (WARN_ON(!chandef))
 		return -EINVAL;
 
-	if (old_ctx->conf.def.width > new_ctx->conf.def.width)
-		ieee80211_chan_bw_change(local, new_ctx);
+	if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
+		changed = BSS_CHANGED_BANDWIDTH;
 
-	ieee80211_change_chanctx(local, new_ctx, chandef);
+	ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
 
-	if (old_ctx->conf.def.width < new_ctx->conf.def.width)
-		ieee80211_chan_bw_change(local, new_ctx);
+	ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef);
 
 	vif_chsw[0].vif = &sdata->vif;
 	vif_chsw[0].old_ctx = &old_ctx->conf;
@@ -1142,14 +1180,9 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
 	if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
 		ieee80211_free_chanctx(local, old_ctx);
 
-	if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
-		changed = BSS_CHANGED_BANDWIDTH;
-
-	ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
-
+	ieee80211_recalc_chanctx_min_def(local, new_ctx);
 	ieee80211_recalc_smps_chanctx(local, new_ctx);
 	ieee80211_recalc_radar_chanctx(local, new_ctx);
-	ieee80211_recalc_chanctx_min_def(local, new_ctx);
 
 	if (changed)
 		ieee80211_bss_info_change_notify(sdata, changed);
@@ -1188,7 +1221,7 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
 	if (WARN_ON(!chandef))
 		return -EINVAL;
 
-	ieee80211_change_chanctx(local, new_ctx, chandef);
+	ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef);
 
 	list_del(&sdata->reserved_chanctx_list);
 	sdata->reserved_chanctx = NULL;
@@ -1505,7 +1538,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
 		ieee80211_recalc_smps_chanctx(local, ctx);
 		ieee80211_recalc_radar_chanctx(local, ctx);
 		ieee80211_recalc_chanctx_min_def(local, ctx);
-		ieee80211_chan_bw_change(local, ctx);
 
 		list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
 					 reserved_chanctx_list) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b33b155f3573..97efc20b9825 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -371,7 +371,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 	struct cfg80211_chan_def chandef;
 	u16 ht_opmode;
 	u32 flags;
-	enum ieee80211_sta_rx_bandwidth new_sta_bw;
 	u32 vht_cap_info = 0;
 	int ret;
 
@@ -450,35 +449,8 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 		return -EINVAL;
 	}
 
-	switch (chandef.width) {
-	case NL80211_CHAN_WIDTH_20_NOHT:
-	case NL80211_CHAN_WIDTH_20:
-		new_sta_bw = IEEE80211_STA_RX_BW_20;
-		break;
-	case NL80211_CHAN_WIDTH_40:
-		new_sta_bw = IEEE80211_STA_RX_BW_40;
-		break;
-	case NL80211_CHAN_WIDTH_80:
-		new_sta_bw = IEEE80211_STA_RX_BW_80;
-		break;
-	case NL80211_CHAN_WIDTH_80P80:
-	case NL80211_CHAN_WIDTH_160:
-		new_sta_bw = IEEE80211_STA_RX_BW_160;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (new_sta_bw > sta->cur_max_bandwidth)
-		new_sta_bw = sta->cur_max_bandwidth;
-
-	if (new_sta_bw < sta->sta.bandwidth) {
-		sta->sta.bandwidth = new_sta_bw;
-		rate_control_rate_update(local, sband, sta,
-					 IEEE80211_RC_BW_CHANGED);
-	}
-
 	ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed);
+
 	if (ret) {
 		sdata_info(sdata,
 			   "AP %pM changed bandwidth to incompatible one - disconnect\n",
@@ -486,12 +458,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 		return ret;
 	}
 
-	if (new_sta_bw > sta->sta.bandwidth) {
-		sta->sta.bandwidth = new_sta_bw;
-		rate_control_rate_update(local, sband, sta,
-					 IEEE80211_RC_BW_CHANGED);
-	}
-
 	return 0;
 }
 
@@ -1174,10 +1140,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	 */
 
 	if (sdata->reserved_chanctx) {
-		struct ieee80211_supported_band *sband = NULL;
-		struct sta_info *mgd_sta = NULL;
-		enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20;
-
 		/*
 		 * with multi-vif csa driver may call ieee80211_csa_finish()
 		 * many times while waiting for other interfaces to use their
@@ -1186,48 +1148,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 		if (sdata->reserved_ready)
 			goto out;
 
-		if (sdata->vif.bss_conf.chandef.width !=
-		    sdata->csa_chandef.width) {
-			/*
-			 * For managed interface, we need to also update the AP
-			 * station bandwidth and align the rate scale algorithm
-			 * on the bandwidth change. Here we only consider the
-			 * bandwidth of the new channel definition (as channel
-			 * switch flow does not have the full HT/VHT/HE
-			 * information), assuming that if additional changes are
-			 * required they would be done as part of the processing
-			 * of the next beacon from the AP.
-			 */
-			switch (sdata->csa_chandef.width) {
-			case NL80211_CHAN_WIDTH_20_NOHT:
-			case NL80211_CHAN_WIDTH_20:
-			default:
-				bw = IEEE80211_STA_RX_BW_20;
-				break;
-			case NL80211_CHAN_WIDTH_40:
-				bw = IEEE80211_STA_RX_BW_40;
-				break;
-			case NL80211_CHAN_WIDTH_80:
-				bw = IEEE80211_STA_RX_BW_80;
-				break;
-			case NL80211_CHAN_WIDTH_80P80:
-			case NL80211_CHAN_WIDTH_160:
-				bw = IEEE80211_STA_RX_BW_160;
-				break;
-			}
-
-			mgd_sta = sta_info_get(sdata, ifmgd->bssid);
-			sband =
-				local->hw.wiphy->bands[sdata->csa_chandef.chan->band];
-		}
-
-		if (sdata->vif.bss_conf.chandef.width >
-		    sdata->csa_chandef.width) {
-			mgd_sta->sta.bandwidth = bw;
-			rate_control_rate_update(local, sband, mgd_sta,
-						 IEEE80211_RC_BW_CHANGED);
-		}
-
 		ret = ieee80211_vif_use_reserved_context(sdata);
 		if (ret) {
 			sdata_info(sdata,
@@ -1238,13 +1158,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 			goto out;
 		}
 
-		if (sdata->vif.bss_conf.chandef.width <
-		    sdata->csa_chandef.width) {
-			mgd_sta->sta.bandwidth = bw;
-			rate_control_rate_update(local, sband, mgd_sta,
-						 IEEE80211_RC_BW_CHANGED);
-		}
-
 		goto out;
 	}
 

From 7da70d6cdf0dbc2c62e4a5759db9b63ef8d90c32 Mon Sep 17 00:00:00 2001
From: Krishnanand Prabhu <krishnanand.prabhu@intel.com>
Date: Fri, 18 Jun 2021 13:41:28 +0300
Subject: [PATCH 2692/3292] ieee80211: define timing measurement in extended
 capabilities IE

Define the bit used for timing measurement support in extended
capabilities IE, used for time synchronization.

Signed-off-by: Krishnanand Prabhu <krishnanand.prabhu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.b75f40765538.I92b50e43e29272c97d17ed5f37f216f4caf0f205@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 67f3e51e7ecc..0a0aaa2d5d9e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -9,7 +9,7 @@
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (c) 2018 - 2020 Intel Corporation
+ * Copyright (c) 2018 - 2021 Intel Corporation
  */
 
 #ifndef LINUX_IEEE80211_H
@@ -3111,6 +3111,11 @@ enum ieee80211_tdls_actioncode {
  */
 #define WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT	BIT(6)
 
+/* Timing Measurement protocol for time sync is set in the 7th bit of 3rd byte
+ * of the @WLAN_EID_EXT_CAPABILITY information element
+ */
+#define WLAN_EXT_CAPA3_TIMING_MEASUREMENT_SUPPORT	BIT(7)
+
 /* TDLS capabilities in the 4th byte of @WLAN_EID_EXT_CAPABILITY */
 #define WLAN_EXT_CAPA4_TDLS_BUFFER_STA		BIT(4)
 #define WLAN_EXT_CAPA4_TDLS_PEER_PSM		BIT(5)

From d8b261548dcf1058646cc48159c88d42d4b9a3b6 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Fri, 18 Jun 2021 13:41:35 +0300
Subject: [PATCH 2693/3292] mac80211: add to bss_conf if broadcast TWT is
 supported

Add to struct ieee80211_bss_conf a twt_broadcast field.
Set it to true if both STA and AP support broadcast TWT.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.f7c105237541.I50b302044e2b35e5ed4d3fb8bc7bd3d8bb89b1e1@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  4 +++-
 net/mac80211/mlme.c    | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c09cd0e4a6b3..5d3ce1bd5753 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2020 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
  */
 
 #ifndef MAC80211_H
@@ -526,6 +526,7 @@ struct ieee80211_fils_discovery {
  * @twt_responder: does this BSS support TWT requester (relevant for managed
  *	mode only, set if the AP advertises TWT responder role)
  * @twt_protected: does this BSS support protected TWT frames
+ * @twt_broadcast: does this BSS support broadcast TWT
  * @assoc: association status
  * @ibss_joined: indicates whether this station is part of an IBSS
  *	or not
@@ -642,6 +643,7 @@ struct ieee80211_bss_conf {
 	bool twt_requester;
 	bool twt_responder;
 	bool twt_protected;
+	bool twt_broadcast;
 	/* association related data */
 	bool assoc, ibss_joined;
 	bool ibss_creator;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 97efc20b9825..5e9f79beb8f3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3218,6 +3218,21 @@ static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
+static bool ieee80211_twt_bcast_support(struct ieee80211_bss_conf *bss_conf,
+					struct ieee80211_supported_band *sband,
+					struct sta_info *sta)
+{
+	const struct ieee80211_sta_he_cap *own_he_cap =
+		ieee80211_get_he_sta_cap(sband);
+
+	return bss_conf->he_support &&
+		(sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+			IEEE80211_HE_MAC_CAP2_BCAST_TWT) &&
+		own_he_cap &&
+		(own_he_cap->he_cap_elem.mac_cap_info[2] &
+			IEEE80211_HE_MAC_CAP2_BCAST_TWT);
+}
+
 static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 				    struct cfg80211_bss *cbss,
 				    struct ieee80211_mgmt *mgmt, size_t len,
@@ -3433,6 +3448,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		bss_conf->twt_protected = false;
 	}
 
+	bss_conf->twt_broadcast =
+		ieee80211_twt_bcast_support(bss_conf, sband, sta);
+
 	if (bss_conf->he_support) {
 		bss_conf->he_bss_color.color =
 			le32_get_bits(elems->he_operation->he_oper_params,

From dd3e4fc75b4ab8186a133cfe9d49666a2f8186e0 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 18 Jun 2021 13:41:36 +0300
Subject: [PATCH 2694/3292] nl80211/cfg80211: add BSS color to NDP ranging
 parameters

In NDP ranging, the initiator need to set the BSS color in the NDP
to the BSS color of the responder. Add the BSS color as a parameter
for NDP ranging.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.f097a6144b59.I27dec8b994df52e691925ea61be4dd4fa6d396c0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  8 ++++++--
 include/uapi/linux/nl80211.h |  6 +++++-
 net/wireless/pmsr.c          | 12 ++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b3bc58ec9098..c6812945b4b8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014 Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #include <linux/ethtool.h>
@@ -3521,7 +3521,10 @@ struct cfg80211_pmsr_result {
  *		 If neither @trigger_based nor @non_trigger_based is set,
  *		 EDCA based ranging will be used.
  * @lmr_feedback: negotiate for I2R LMR feedback. Only valid if either
- *	@trigger_based or @non_trigger_based is set.
+ *		 @trigger_based or @non_trigger_based is set.
+ * @bss_color: the bss color of the responder. Optional. Set to zero to
+ *	indicate the driver should set the BSS color. Only valid if
+ *	@non_trigger_based or @trigger_based is set.
  *
  * See also nl80211 for the respective attribute documentation.
  */
@@ -3539,6 +3542,7 @@ struct cfg80211_pmsr_ftm_request_peer {
 	u8 burst_duration;
 	u8 ftms_per_burst;
 	u8 ftmr_retries;
+	u8 bss_color;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f962c06e9818..771f238ccff1 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -11,7 +11,7 @@
  * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
  * Copyright 2008 Colin McCabe <colin@cozybit.com>
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -6912,6 +6912,9 @@ enum nl80211_peer_measurement_ftm_capa {
  * @NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK: negotiate for LMR feedback. Only
  *	valid if either %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED or
  *	%NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set.
+ * @NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR: optional. The BSS color of the
+ *	responder. Only valid if %NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED
+ *	or %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED is set.
  *
  * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal
  * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number
@@ -6931,6 +6934,7 @@ enum nl80211_peer_measurement_ftm_req {
 	NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED,
 	NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED,
 	NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK,
+	NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR,
 
 	/* keep last */
 	NUM_NL80211_PMSR_FTM_REQ_ATTR,
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index d245968b74cb..328cf54bda82 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -168,6 +168,18 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 	}
 
+	if (tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]) {
+		if (!out->ftm.non_trigger_based && !out->ftm.trigger_based) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR],
+					    "FTM: BSS color set for EDCA based ranging");
+			return -EINVAL;
+		}
+
+		out->ftm.bss_color =
+			nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]);
+	}
+
 	return 0;
 }
 

From 45daaa1318410794de956fb8e9d06aed2dbb23d0 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 18 Jun 2021 13:41:37 +0300
Subject: [PATCH 2695/3292] mac80211: Properly WARN on HW scan before restart

The following race was possible:

1. The device driver requests HW restart.
2. A scan is requested from user space and is propagated
   to the driver. During this flow HW_SCANNING flag is set.
3. The thread that handles the HW restart is scheduled,
   and before starting the actual reconfiguration it
   checks that HW_SCANNING is not set. The flow does so
   without acquiring any lock, and thus the WARN fires.

Fix this by checking that HW_SCANNING is on only after RTNL is
acquired, i.e., user space scan request handling is no longer
in transit.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.8238ab3e19ab.I2693c581c70251472b4f9089e37e06fb2c18268f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 822ff388410e..cde142fa8cb3 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -257,14 +257,13 @@ static void ieee80211_restart_work(struct work_struct *work)
 	/* wait for scan work complete */
 	flush_workqueue(local->workqueue);
 	flush_work(&local->sched_scan_stopped_work);
+	flush_work(&local->radar_detected_work);
+
+	rtnl_lock();
 
 	WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
 	     "%s called with hardware scan in progress\n", __func__);
 
-	flush_work(&local->radar_detected_work);
-	/* we might do interface manipulations, so need both */
-	rtnl_lock();
-	wiphy_lock(local->hw.wiphy);
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		/*
 		 * XXX: there may be more work for other vif types and even

From 9c7c637050b42b6e368bb39b8d0edff728268341 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:38 +0300
Subject: [PATCH 2696/3292] ieee80211: add defines for HE PHY cap byte 10

One bit out of the previously completely reserved byte 10 in
the PHY capabilities is used since 802.11ax D7.0, add a new
define for it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.c026feb3873d.I380f52a05ddb4153bc77ff7f276a3484819f69b2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 0a0aaa2d5d9e..a6730072d13a 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2179,6 +2179,8 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED		0xc0
 #define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK			0xc0
 
+#define IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF			0x01
+
 /* 802.11ax HE TX/RX MCS NSS Support  */
 #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS			(3)
 #define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_POS			(6)

From c74025f47ac855344d1188a4224a7af216843b22 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:44 +0300
Subject: [PATCH 2697/3292] mac80211: rearrange struct txq_info for fewer holes

We can slightly decrease the size of struct txq_info by
rearranging some fields for fewer holes, so do that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.1bf019a1fe2e.Ib54622b8d6dc1a9a7dc484e573c073119450538b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a7ce59150322..17068408b27d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -5,7 +5,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #ifndef IEEE80211_I_H
@@ -839,9 +839,12 @@ struct txq_info {
 	struct fq_tin tin;
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
-	struct sk_buff_head frags;
-	struct list_head schedule_order;
+
 	u16 schedule_round;
+	struct list_head schedule_order;
+
+	struct sk_buff_head frags;
+
 	unsigned long flags;
 
 	/* keep last! */

From 6516ee22f2a99efca7211ff61f23f778c988bfd4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:47 +0300
Subject: [PATCH 2698/3292] mac80211: improve AP disconnect message

If the AP changes capability/bandwidth in some fashion, the
message might be somewhat misleading and we don't know what
really changed. Modify the message to speak about "caps/bw"
instead of just "bandwidth", and print out the flags.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.dc22c48985fa.I4bf5fbc17ec783c21d4b50c8c35b1de390896ccd@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5e9f79beb8f3..6a599077e362 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -444,8 +444,8 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 				      IEEE80211_STA_DISABLE_160MHZ)) ||
 	    !cfg80211_chandef_valid(&chandef)) {
 		sdata_info(sdata,
-			   "AP %pM changed bandwidth in a way we can't support - disconnect\n",
-			   ifmgd->bssid);
+			   "AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n",
+			   ifmgd->bssid, flags, ifmgd->flags);
 		return -EINVAL;
 	}
 

From 64a8747238291c7c497517ab2590c473f708d9be Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:48 +0300
Subject: [PATCH 2699/3292] cfg80211: trace more information in assoc trace
 event

Add more information to the assoc trace event so we can see
more precisely what's going on and what options were used.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.86c58fca486d.Iabd8f036d2ef1d770fd20ed3ccd149f32154f430@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/rdev-ops.h | 12 +++++++++++-
 net/wireless/trace.h    | 36 ++++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 8b1358d04ca2..b1d37f582dc6 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -464,8 +464,18 @@ static inline int rdev_assoc(struct cfg80211_registered_device *rdev,
 			     struct net_device *dev,
 			     struct cfg80211_assoc_request *req)
 {
+	const struct cfg80211_bss_ies *bss_ies;
 	int ret;
-	trace_rdev_assoc(&rdev->wiphy, dev, req);
+
+	/*
+	 * Note: we might trace not exactly the data that's processed,
+	 * due to races and the driver/mac80211 getting a newer copy.
+	 */
+	rcu_read_lock();
+	bss_ies = rcu_dereference(req->bss->ies);
+	trace_rdev_assoc(&rdev->wiphy, dev, req, bss_ies);
+	rcu_read_unlock();
+
 	ret = rdev->ops->assoc(&rdev->wiphy, dev, req);
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 76b777d5903f..440bce5f0274 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1195,8 +1195,9 @@ TRACE_EVENT(rdev_auth,
 
 TRACE_EVENT(rdev_assoc,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
-		 struct cfg80211_assoc_request *req),
-	TP_ARGS(wiphy, netdev, req),
+		 struct cfg80211_assoc_request *req,
+		 const struct cfg80211_bss_ies *bss_ies),
+	TP_ARGS(wiphy, netdev, req, bss_ies),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		NETDEV_ENTRY
@@ -1204,6 +1205,17 @@ TRACE_EVENT(rdev_assoc,
 		MAC_ENTRY(prev_bssid)
 		__field(bool, use_mfp)
 		__field(u32, flags)
+		__dynamic_array(u8, bss_elements, bss_ies->len)
+		__field(bool, bss_elements_bcon)
+		__field(u64, bss_elements_tsf)
+		__dynamic_array(u8, elements, req->ie_len)
+		__array(u8, ht_capa, sizeof(struct ieee80211_ht_cap))
+		__array(u8, ht_capa_mask, sizeof(struct ieee80211_ht_cap))
+		__array(u8, vht_capa, sizeof(struct ieee80211_vht_cap))
+		__array(u8, vht_capa_mask, sizeof(struct ieee80211_vht_cap))
+		__dynamic_array(u8, fils_kek, req->fils_kek_len)
+		__dynamic_array(u8, fils_nonces,
+				req->fils_nonces ? 2 * FILS_NONCE_LEN : 0)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
@@ -1215,6 +1227,26 @@ TRACE_EVENT(rdev_assoc,
 		MAC_ASSIGN(prev_bssid, req->prev_bssid);
 		__entry->use_mfp = req->use_mfp;
 		__entry->flags = req->flags;
+		if (bss_ies->len)
+			memcpy(__get_dynamic_array(bss_elements),
+			       bss_ies->data, bss_ies->len);
+		__entry->bss_elements_bcon = bss_ies->from_beacon;
+		__entry->bss_elements_tsf = bss_ies->tsf;
+		if (req->ie)
+			memcpy(__get_dynamic_array(elements),
+			       req->ie, req->ie_len);
+		memcpy(__entry->ht_capa, &req->ht_capa, sizeof(req->ht_capa));
+		memcpy(__entry->ht_capa_mask, &req->ht_capa_mask,
+		       sizeof(req->ht_capa_mask));
+		memcpy(__entry->vht_capa, &req->vht_capa, sizeof(req->vht_capa));
+		memcpy(__entry->vht_capa_mask, &req->vht_capa_mask,
+		       sizeof(req->vht_capa_mask));
+		if (req->fils_kek)
+			memcpy(__get_dynamic_array(fils_kek),
+			       req->fils_kek, req->fils_kek_len);
+		if (req->fils_nonces)
+			memcpy(__get_dynamic_array(fils_nonces),
+			       req->fils_nonces, 2 * FILS_NONCE_LEN);
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
 		  ", previous bssid: " MAC_PR_FMT ", use mfp: %s, flags: %u",

From bac2fd3d753430032043098dd55543037e3f7a60 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:50 +0300
Subject: [PATCH 2700/3292] mac80211: remove use of ieee80211_get_he_sta_cap()

All uses of ieee80211_get_he_sta_cap() were actually wrong,
in net/mac80211/mlme.c they were wrong because that code is
also used for P2P (which is a different interface type), in
net/mac80211/main.c that should check all interface types.
Fix all that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.ede114bc8b46.Ibcd9a5d98430e936344eb6d242ef8a65c2f59b74@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/he.c   |  4 +++-
 net/mac80211/main.c |  9 +++++++--
 net/mac80211/mlme.c | 26 +++++++++++++++++---------
 net/mac80211/util.c |  3 ++-
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 0c0b970835ce..5984a9dac0bc 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -120,7 +120,9 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 
 	memset(he_cap, 0, sizeof(*he_cap));
 
-	if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband))
+	if (!he_cap_ie ||
+	    !ieee80211_get_he_iftype_cap(sband,
+					 ieee80211_vif_type_p2p(&sdata->vif)))
 		return;
 
 	/* Make sure size is OK */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index cde142fa8cb3..95a8300da2d0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1010,8 +1010,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		supp_ht = supp_ht || sband->ht_cap.ht_supported;
 		supp_vht = supp_vht || sband->vht_cap.vht_supported;
 
-		if (!supp_he)
-			supp_he = !!ieee80211_get_he_sta_cap(sband);
+		for (i = 0; i < sband->n_iftype_data; i++) {
+			const struct ieee80211_sband_iftype_data *iftd;
+
+			iftd = &sband->iftype_data[i];
+
+			supp_he = supp_he || (iftd && iftd->he_cap.has_he);
+		}
 
 		/* HT, VHT, HE require QoS, thus >= 4 queues */
 		if (WARN_ON(local->hw.queues < IEEE80211_NUM_ACS &&
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6a599077e362..9b9f6ab3b722 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -384,7 +384,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 
 	/* don't check HE if we associated as non-HE station */
 	if (ifmgd->flags & IEEE80211_STA_DISABLE_HE ||
-	    !ieee80211_get_he_sta_cap(sband))
+	    !ieee80211_get_he_iftype_cap(sband,
+					 ieee80211_vif_type_p2p(&sdata->vif)))
+
 		he_oper = NULL;
 
 	if (WARN_ON_ONCE(!sta))
@@ -642,7 +644,8 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_unlock();
 
-	he_cap = ieee80211_get_he_sta_cap(sband);
+	he_cap = ieee80211_get_he_iftype_cap(sband,
+					     ieee80211_vif_type_p2p(&sdata->vif));
 	if (!he_cap || !reg_cap)
 		return;
 
@@ -3218,12 +3221,14 @@ static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-static bool ieee80211_twt_bcast_support(struct ieee80211_bss_conf *bss_conf,
+static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata,
+					struct ieee80211_bss_conf *bss_conf,
 					struct ieee80211_supported_band *sband,
 					struct sta_info *sta)
 {
 	const struct ieee80211_sta_he_cap *own_he_cap =
-		ieee80211_get_he_sta_cap(sband);
+		ieee80211_get_he_iftype_cap(sband,
+					    ieee80211_vif_type_p2p(&sdata->vif));
 
 	return bss_conf->he_support &&
 		(sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
@@ -3449,7 +3454,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	}
 
 	bss_conf->twt_broadcast =
-		ieee80211_twt_bcast_support(bss_conf, sband, sta);
+		ieee80211_twt_bcast_support(sdata, bss_conf, sband, sta);
 
 	if (bss_conf->he_support) {
 		bss_conf->he_bss_color.color =
@@ -4851,11 +4856,13 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
 }
 
 static bool
-ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband,
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_supported_band *sband,
 				    const struct ieee80211_he_operation *he_op)
 {
 	const struct ieee80211_sta_he_cap *sta_he_cap =
-		ieee80211_get_he_sta_cap(sband);
+		ieee80211_get_he_iftype_cap(sband,
+					    ieee80211_vif_type_p2p(&sdata->vif));
 	u16 ap_min_req_set;
 	int i;
 
@@ -4949,7 +4956,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 	}
 
-	if (!ieee80211_get_he_sta_cap(sband))
+	if (!ieee80211_get_he_iftype_cap(sband,
+					 ieee80211_vif_type_p2p(&sdata->vif)))
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 
 	rcu_read_lock();
@@ -5007,7 +5015,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		else
 			he_oper = NULL;
 
-		if (!ieee80211_verify_sta_he_mcs_support(sband, he_oper))
+		if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
 			ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 	}
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ee5410bfe9ec..b352d1d87bf0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1937,7 +1937,8 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
 		*offset = noffset;
 	}
 
-	he_cap = ieee80211_get_he_sta_cap(sband);
+	he_cap = ieee80211_get_he_iftype_cap(sband,
+					     ieee80211_vif_type_p2p(&sdata->vif));
 	if (he_cap) {
 		pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
 		if (!pos)

From f253683e602996b250db7a3a7b77e0e908c9dbbc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:51 +0300
Subject: [PATCH 2701/3292] cfg80211: remove ieee80211_get_he_sta_cap()

This function turned out to be too easy to misuse since it
doesn't consider the interface type. Remove it now that we
no longer use it in mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.8c9c72f914b0.I68e9c0626dc77a0f67f238a05ae16a0b77b09895@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c6812945b4b8..7b4ef45d49b0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -534,18 +534,6 @@ ieee80211_get_he_iftype_cap(const struct ieee80211_supported_band *sband,
 	return NULL;
 }
 
-/**
- * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
- * @sband: the sband to search for the STA on
- *
- * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
- */
-static inline const struct ieee80211_sta_he_cap *
-ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
-{
-	return ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_STATION);
-}
-
 /**
  * ieee80211_get_he_6ghz_capa - return HE 6 GHz capabilities
  * @sband: the sband to search for the STA on

From ab4040df6efb87f92c7ec5bd65b5a093654d6a85 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 7 Jun 2021 23:00:47 +0800
Subject: [PATCH 2702/3292] mac80211: fix some spelling mistakes

Fix some spelling mistakes in comments:
freeed  ==> freed
addreses  ==> addresses
containging  ==> containing
capablity  ==> capability
sucess  ==> success
atleast  ==> at least

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Link: https://lore.kernel.org/r/20210607150047.2855962-1-zhengyongjun3@huawei.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh.h                | 2 +-
 net/mac80211/mesh_hwmp.c           | 2 +-
 net/mac80211/mesh_pathtbl.c        | 2 +-
 net/mac80211/mesh_plink.c          | 2 +-
 net/mac80211/mlme.c                | 2 +-
 net/mac80211/rc80211_minstrel_ht.c | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 40492d1bd8fd..77080b4f87b8 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -134,7 +134,7 @@ struct mesh_path {
  * gate's mpath may or may not be resolved and active.
  * @gates_lock: protects updates to known_gates
  * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
- * @walk_head: linked list containging all mesh_path objects
+ * @walk_head: linked list containing all mesh_path objects
  * @walk_lock: lock protecting walk_head
  * @entries: number of entries in the table
  */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 3db514c4c63a..a05b615deb51 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1124,7 +1124,7 @@ enddiscovery:
  * forwarding information is found.
  *
  * Returns: 0 if the next hop was found and -ENOENT if the frame was queued.
- * skb is freeed here if no mpath could be allocated.
+ * skb is freed here if no mpath could be allocated.
  */
 int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
 			 struct sk_buff *skb)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 620ecf922408..efbefcbac3ac 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -122,7 +122,7 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr,
 		hdr = (struct ieee80211_hdr *) skb->data;
 
 		/* we preserve the previous mesh header and only add
-		 * the new addreses */
+		 * the new addresses */
 		mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
 		mshdr->flags = MESH_FLAGS_AE_A5_A6;
 		memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index aca26df7587d..a6915847d78a 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -150,7 +150,7 @@ out:
  * mesh STA in a MBSS. Three HT protection modes are supported for now, non-HT
  * mixed mode, 20MHz-protection and no-protection mode. non-HT mixed mode is
  * selected if any non-HT peers are present in our MBSS.  20MHz-protection mode
- * is selected if all peers in our 20/40MHz MBSS support HT and atleast one
+ * is selected if all peers in our 20/40MHz MBSS support HT and at least one
  * HT20 peer is present. Otherwise no-protection mode is selected.
  */
 static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9b9f6ab3b722..b3123e00f118 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -585,7 +585,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 		cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
 
 	/*
-	 * If some other vif is using the MU-MIMO capablity we cannot associate
+	 * If some other vif is using the MU-MIMO capability we cannot associate
 	 * using MU-MIMO - this will lead to contradictions in the group-id
 	 * mechanism.
 	 * Ownership is defined since association request, in order to avoid
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 20f2e0bef96b..72b44d4c42d0 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -434,7 +434,7 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
 	unsigned int nsecs = 0, overhead = mi->overhead;
 	unsigned int ampdu_len = 1;
 
-	/* do not account throughput if sucess prob is below 10% */
+	/* do not account throughput if success prob is below 10% */
 	if (prob_avg < MINSTREL_FRAC(10, 100))
 		return 0;
 

From aeddc05fa9cff35402fc569cc6e7fca6ee36bac1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sosth=C3=A8ne=20Gu=C3=A9don?= <sosthene@guedon.gdn>
Date: Thu, 3 Jun 2021 19:39:39 +0200
Subject: [PATCH 2703/3292] nl80211: Fix typo pmsr->pmsr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was mis-spelled in the policy, fix that.

Signed-off-by: Sosthène Guédon <sosthene@guedon.gdn>
Link: https://lore.kernel.org/r/YLkT27RG0DaWLUot@arch.localdomain
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c62d61d8aa02..4c61cf66fe05 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -330,7 +330,7 @@ nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = {
 };
 
 static const struct nla_policy
-nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
+nl80211_pmsr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
 	[NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR,
 	[NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy),
 	[NL80211_PMSR_PEER_ATTR_REQ] =
@@ -345,7 +345,7 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = {
 	[NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT },
 	[NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT },
 	[NL80211_PMSR_ATTR_PEERS] =
-		NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy),
+		NLA_POLICY_NESTED_ARRAY(nl80211_pmsr_peer_attr_policy),
 };
 
 static const struct nla_policy

From b767ecdaf98a999ef710f4f290bdd89257a90db0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:56 +0300
Subject: [PATCH 2704/3292] cfg80211: reg: improve bad regulatory warning

There's a WARN_ON here but it says nothing, and the later
dump of the regdomain aren't usually printed. As a first
step, include the regdomain code in the WARN_ON message,
just like in other similar instances.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.853ffdd6c62b.I63e37b2ab184ee3653686e4df4dd23eb303687d2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0406ce7334fa..2f654a4fc53b 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3975,7 +3975,9 @@ static int __regulatory_set_wiphy_regd(struct wiphy *wiphy,
 		 "wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n"))
 		return -EPERM;
 
-	if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) {
+	if (WARN(!is_valid_rd(rd),
+		 "Invalid regulatory domain detected: %c%c\n",
+		 rd->alpha2[0], rd->alpha2[1])) {
 		print_regdomain_info(rd);
 		return -EINVAL;
 	}

From be989891e4f2ff5649bf22ab05a7cdd3a287e34b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:39 +0300
Subject: [PATCH 2705/3292] cfg80211: add cfg80211_any_usable_channels()

This helper function checks if there are any usable channels on
any of the given bands with the given properties (as expressed
by disallowed channel flags).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.2b613addaa85.Idaf8b859089490537878a7de5c7453a873a3f638@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 11 +++++++++++
 net/wireless/chan.c    | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7b4ef45d49b0..481e4e24800f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -894,6 +894,17 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
 	return chandef->chan->max_power;
 }
 
+/**
+ * cfg80211_any_usable_channels - check for usable channels
+ * @wiphy: the wiphy to check for
+ * @band_mask: which bands to check on
+ * @prohibited_flags: which channels to not consider usable,
+ *	%IEEE80211_CHAN_DISABLED is always taken into account
+ */
+bool cfg80211_any_usable_channels(struct wiphy *wiphy,
+				  unsigned long band_mask,
+				  u32 prohibited_flags);
+
 /**
  * enum survey_info_flags - survey information flags
  *
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 472c895823a4..869c43d4414c 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
  *
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright 2018-2020	Intel Corporation
+ * Copyright 2018-2021	Intel Corporation
  */
 
 #include <linux/export.h>
@@ -1339,3 +1339,34 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 		WARN_ON(1);
 	}
 }
+
+bool cfg80211_any_usable_channels(struct wiphy *wiphy,
+				  unsigned long sband_mask,
+				  u32 prohibited_flags)
+{
+	int idx;
+
+	prohibited_flags |= IEEE80211_CHAN_DISABLED;
+
+	for_each_set_bit(idx, &sband_mask, NUM_NL80211_BANDS) {
+		struct ieee80211_supported_band *sband = wiphy->bands[idx];
+		int chanidx;
+
+		if (!sband)
+			continue;
+
+		for (chanidx = 0; chanidx < sband->n_channels; chanidx++) {
+			struct ieee80211_channel *chan;
+
+			chan = &sband->channels[chanidx];
+
+			if (chan->flags & prohibited_flags)
+				continue;
+
+			return true;
+		}
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(cfg80211_any_usable_channels);

From 0bc47057b54b73e5f6d36bfc7c5c96e15be1f221 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:40 +0300
Subject: [PATCH 2706/3292] mac80211: conditionally advertise HE in probe
 requests

While building probe requests, only enable HE capability if
there are actually any channels in the band with HE enabled,
otherwise we're not really capable. We're doing the same in
association requests, so doing it here makes it consistent.

This also makes HE not appear available if it isn't due to
regulatory constraints.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.b5513f2af335.Ic01862678712ae4238cea43ad2185928865efad2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b352d1d87bf0..fab4bb1948e3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1939,7 +1939,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
 
 	he_cap = ieee80211_get_he_iftype_cap(sband,
 					     ieee80211_vif_type_p2p(&sdata->vif));
-	if (he_cap) {
+	if (he_cap &&
+	    cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+					 IEEE80211_CHAN_NO_HE)) {
 		pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
 		if (!pos)
 			goto out_err;

From 1b7b3ac8ff3317cdcf07a1c413de9bdb68019c2b Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Fri, 18 Jun 2021 13:41:46 +0300
Subject: [PATCH 2707/3292] cfg80211: set custom regdomain after wiphy
 registration

We used to set regulatory info before the registration of
the device and then the regulatory info didn't get set, because
the device isn't registered so there isn't a device to set the
regulatory info for. So set the regulatory info after the device
registration.
Call reg_process_self_managed_hints() once again after the device
registration because it does nothing before it.

Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.c96eadcffe80.I86799c2c866b5610b4cf91115c21d8ceb525c5aa@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 8 ++++----
 net/wireless/reg.c  | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 41c15cc7791f..03323121ca50 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -928,9 +928,6 @@ int wiphy_register(struct wiphy *wiphy)
 		return res;
 	}
 
-	/* set up regulatory info */
-	wiphy_regulatory_register(wiphy);
-
 	list_add_rcu(&rdev->list, &cfg80211_rdev_list);
 	cfg80211_rdev_list_generation++;
 
@@ -941,6 +938,9 @@ int wiphy_register(struct wiphy *wiphy)
 	cfg80211_debugfs_rdev_add(rdev);
 	nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
 
+	/* set up regulatory info */
+	wiphy_regulatory_register(wiphy);
+
 	if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
 		struct regulatory_request request;
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2f654a4fc53b..c2d0ff7f089f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -4051,6 +4051,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
 
 	wiphy_update_regulatory(wiphy, lr->initiator);
 	wiphy_all_share_dfs_chan_state(wiphy);
+	reg_process_self_managed_hints();
 }
 
 void wiphy_regulatory_deregister(struct wiphy *wiphy)

From f4f8650588d35deafaa4a4e28cceb3557a71e711 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:52 +0300
Subject: [PATCH 2708/3292] cfg80211: allow advertising vendor-specific
 capabilities

There may be cases where vendor-specific elements need to be
used over the air. Rather than have driver or firmware add
them and possibly cause problems that way, add them to the
iftype-data band capabilities. This way we can advertise to
userspace first, and use them in mac80211 next.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.e8c4f0347276.Iee5964682b3e9ec51fc1cd57a7c62383eaf6ddd7@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 7 +++++++
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/nl80211.c       | 5 +++++
 3 files changed, 15 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 481e4e24800f..c93a2cd77920 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -371,11 +371,18 @@ struct ieee80211_sta_he_cap {
  * @he_cap: holds the HE capabilities
  * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a
  *	6 GHz band channel (and 0 may be valid value).
+ * @vendor_elems: vendor element(s) to advertise
+ * @vendor_elems.data: vendor element(s) data
+ * @vendor_elems.len: vendor element(s) length
  */
 struct ieee80211_sband_iftype_data {
 	u16 types_mask;
 	struct ieee80211_sta_he_cap he_cap;
 	struct ieee80211_he_6ghz_capa he_6ghz_capa;
+	struct {
+		const u8 *data;
+		unsigned int len;
+	} vendor_elems;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 771f238ccff1..db474994fa73 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3654,6 +3654,8 @@ enum nl80211_mpath_info {
  *     defined
  * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16),
  *	given for all 6 GHz band channels
+ * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are
+ *	advertised on this band/for this iftype (binary)
  * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
  */
 enum nl80211_band_iftype_attr {
@@ -3665,6 +3667,7 @@ enum nl80211_band_iftype_attr {
 	NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
 	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
 	NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA,
+	NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS,
 
 	/* keep last */
 	__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4c61cf66fe05..50eb405b0690 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1731,6 +1731,11 @@ nl80211_send_iftype_data(struct sk_buff *msg,
 		    &iftdata->he_6ghz_capa))
 		return -ENOBUFS;
 
+	if (iftdata->vendor_elems.data && iftdata->vendor_elems.len &&
+	    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS,
+		    iftdata->vendor_elems.len, iftdata->vendor_elems.data))
+		return -ENOBUFS;
+
 	return 0;
 }
 

From 9bd6a83e53a7a4d82f95b354856b64f4359cdddc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:53 +0300
Subject: [PATCH 2709/3292] mac80211: add vendor-specific capabilities to assoc
 request

When sending an association request, add any vendor specific
capabilities at the end of the frame. This way, mac80211 is
still completely in charge of building the frame, but drivers
can determine what should be added depending on the band and
interface type.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.80d716d69a5f.I28097ff19be6b22aebdc33a72795d2662755d41f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b3123e00f118..0559c6b6ee71 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2020 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
  */
 
 #include <linux/delay.h>
@@ -681,6 +681,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	u32 rates = 0;
 	__le16 listen_int;
 	struct element *ext_capa = NULL;
+	enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+	const struct ieee80211_sband_iftype_data *iftd;
 
 	/* we know it's writable, cast away the const */
 	if (assoc_data->ie_len)
@@ -725,6 +727,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		}
 	}
 
+	iftd = ieee80211_get_sband_iftype_data(sband, iftype);
+
 	skb = alloc_skb(local->hw.extra_tx_headroom +
 			sizeof(*mgmt) + /* bit too much but doesn't matter */
 			2 + assoc_data->ssid_len + /* SSID */
@@ -739,7 +743,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 			2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
 			assoc_data->ie_len + /* extra IEs */
 			(assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
-			9, /* WMM */
+			9 + /* WMM */
+			(iftd ? iftd->vendor_elems.len : 0),
 			GFP_KERNEL);
 	if (!skb)
 		return;
@@ -1012,6 +1017,9 @@ skip_rates:
 		ieee80211_add_s1g_capab_ie(sdata, &sband->s1g_cap, skb);
 	}
 
+	if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len)
+		skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len);
+
 	/* add any remaining custom (i.e. vendor specific here) IEs */
 	if (assoc_data->ie_len) {
 		noffset = assoc_data->ie_len;

From 52bb205213a8169cc40e1eba96483a9e488c17d3 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 18 Jun 2021 13:41:41 +0300
Subject: [PATCH 2710/3292] cfg80211: Support hidden AP discovery over 6GHz
 band

To discover a hidden AP on the 6GHz band, the probe request
sent to the AP needs to include the AP's SSID, as some APs
would not respond with a probe response based only on short
SSID match.

To support hidden AP discovery over the 6GHz band,
when constructing the specific 6GHz band scan also include
SSIDs that were part of the original scan request, so these
can be used in the probe requests transmitted during scan.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.218df9d3203c.Ice0f7a2f6a65f1f9710b7898591481baeefaf490@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index a3941b19b516..f03c7ac8e184 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2016	Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -757,7 +757,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
 	}
 
 	request = kzalloc(struct_size(request, channels, n_channels) +
-			  sizeof(*request->scan_6ghz_params) * count,
+			  sizeof(*request->scan_6ghz_params) * count +
+			  sizeof(*request->ssids) * rdev_req->n_ssids,
 			  GFP_KERNEL);
 	if (!request) {
 		cfg80211_free_coloc_ap_list(&coloc_ap_list);
@@ -848,9 +849,18 @@ skip:
 
 	if (request->n_channels) {
 		struct cfg80211_scan_request *old = rdev->int_scan_req;
-
 		rdev->int_scan_req = request;
 
+		/*
+		 * Add the ssids from the parent scan request to the new scan
+		 * request, so the driver would be able to use them in its
+		 * probe requests to discover hidden APs on PSC channels.
+		 */
+		request->ssids = (void *)&request->channels[request->n_channels];
+		request->n_ssids = rdev_req->n_ssids;
+		memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) *
+		       request->n_ssids);
+
 		/*
 		 * If this scan follows a previous scan, save the scan start
 		 * info from the first part of the scan

From 7d29bc50b30e58102dd0e7a6beb1a72cc41029c5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:42 +0300
Subject: [PATCH 2711/3292] mac80211: always include HE 6GHz capability in
 probe request

If HE/6GHz is available (thus we consider dot11HE6GOptionImplemented
to be true), then always include the corresponding capability in the
probe request as required by the spec.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.25ee4a54a7d0.I8cebd799c85524c8123a11941a104dbdefc03762@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index fab4bb1948e3..d1ecac00bddb 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1945,8 +1945,18 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
 		pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
 		if (!pos)
 			goto out_err;
+	}
 
-		if (sband->band == NL80211_BAND_6GHZ) {
+	if (cfg80211_any_usable_channels(local->hw.wiphy,
+					 BIT(NL80211_BAND_6GHZ),
+					 IEEE80211_CHAN_NO_HE)) {
+		struct ieee80211_supported_band *sband6;
+
+		sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+		he_cap = ieee80211_get_he_iftype_cap(sband6,
+				ieee80211_vif_type_p2p(&sdata->vif));
+
+		if (he_cap) {
 			enum nl80211_iftype iftype =
 				ieee80211_vif_type_p2p(&sdata->vif);
 			__le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype);

From 15fae3410f1d879b18e08fe8ef293d538549dfcb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:55 +0300
Subject: [PATCH 2712/3292] mac80211: notify driver on mgd TX completion

We have mgd_prepare_tx(), but sometimes drivers may want/need
to take action when the exchange finishes, whether successfully
or not.

Add a notification to the driver on completion, i.e. call the
new method mgd_complete_tx().

To unify the two scenarios, and to add more information, make
both of them take a struct that has the duration (prepare only),
subtype (both) and success (complete only).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.5d94e78f6230.I6dc979606b6f28701b740d7aab725f7853a5a155@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath9k/main.c         |  2 +-
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c |  6 +-
 drivers/net/wireless/realtek/rtw88/mac80211.c |  2 +-
 include/net/mac80211.h                        | 28 +++++++-
 net/mac80211/driver-ops.h                     | 26 +++++--
 net/mac80211/mlme.c                           | 71 ++++++++++++++-----
 net/mac80211/trace.h                          | 33 +++++++--
 7 files changed, 133 insertions(+), 35 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 97c3a53f9cef..139831539da3 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2654,7 +2654,7 @@ static void ath9k_unassign_vif_chanctx(struct ieee80211_hw *hw,
 
 static void ath9k_mgd_prepare_tx(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif,
-				 u16 duration)
+				 struct ieee80211_prep_tx_info *info)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 607d5d564928..cc78f306ac1a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3306,14 +3306,14 @@ static int iwl_mvm_mac_conf_tx(struct ieee80211_hw *hw,
 
 static void iwl_mvm_mac_mgd_prepare_tx(struct ieee80211_hw *hw,
 				       struct ieee80211_vif *vif,
-				       u16 req_duration)
+				       struct ieee80211_prep_tx_info *info)
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 	u32 duration = IWL_MVM_TE_SESSION_PROTECTION_MAX_TIME_MS;
 	u32 min_duration = IWL_MVM_TE_SESSION_PROTECTION_MIN_TIME_MS;
 
-	if (req_duration > duration)
-		duration = req_duration;
+	if (info->duration > duration)
+		duration = info->duration;
 
 	mutex_lock(&mvm->mutex);
 	/* Try really hard to protect the session and hear a beacon
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index 333df6b38113..d8718b253f0b 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -629,7 +629,7 @@ static void rtw_ops_sw_scan_complete(struct ieee80211_hw *hw,
 
 static void rtw_ops_mgd_prepare_tx(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif,
-				   u16 duration)
+				   struct ieee80211_prep_tx_info *info)
 {
 	struct rtw_dev *rtwdev = hw->priv;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5d3ce1bd5753..9afbcac61c2a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3346,6 +3346,21 @@ enum ieee80211_reconfig_type {
 	IEEE80211_RECONFIG_TYPE_SUSPEND,
 };
 
+/**
+ * struct ieee80211_prep_tx_info - prepare TX information
+ * @duration: if non-zero, hint about the required duration,
+ *	only used with the mgd_prepare_tx() method.
+ * @subtype: frame subtype (auth, (re)assoc, deauth, disassoc)
+ * @success: whether the frame exchange was successful, only
+ *	used with the mgd_complete_tx() method, and then only
+ *	valid for auth and (re)assoc.
+ */
+struct ieee80211_prep_tx_info {
+	u16 duration;
+	u16 subtype;
+	u8 success:1;
+};
+
 /**
  * struct ieee80211_ops - callbacks from mac80211 to the driver
  *
@@ -3758,9 +3773,13 @@ enum ieee80211_reconfig_type {
  *	frame in case that no beacon was heard from the AP/P2P GO.
  *	The callback will be called before each transmission and upon return
  *	mac80211 will transmit the frame right away.
- *      If duration is greater than zero, mac80211 hints to the driver the
- *      duration for which the operation is requested.
+ *	Additional information is passed in the &struct ieee80211_prep_tx_info
+ *	data. If duration there is greater than zero, mac80211 hints to the
+ *	driver the duration for which the operation is requested.
  *	The callback is optional and can (should!) sleep.
+ * @mgd_complete_tx: Notify the driver that the response frame for a previously
+ *	transmitted frame announced with @mgd_prepare_tx was received, the data
+ *	is filled similarly to @mgd_prepare_tx though the duration is not used.
  *
  * @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending
  *	a TDLS discovery-request, we expect a reply to arrive on the AP's
@@ -4111,7 +4130,10 @@ struct ieee80211_ops {
 
 	void	(*mgd_prepare_tx)(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif,
-				  u16 duration);
+				  struct ieee80211_prep_tx_info *info);
+	void	(*mgd_complete_tx)(struct ieee80211_hw *hw,
+				   struct ieee80211_vif *vif,
+				   struct ieee80211_prep_tx_info *info);
 
 	void	(*mgd_protect_tdls_discover)(struct ieee80211_hw *hw,
 					     struct ieee80211_vif *vif);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 604ca59937f0..bcb7cc06db3d 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
 /*
 * Portions of this file
 * Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2019 Intel Corporation
+* Copyright (C) 2018 - 2019, 2021 Intel Corporation
 */
 
 #ifndef __MAC80211_DRIVER_OPS
@@ -821,7 +821,7 @@ drv_allow_buffered_frames(struct ieee80211_local *local,
 
 static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
 				      struct ieee80211_sub_if_data *sdata,
-				      u16 duration)
+				      struct ieee80211_prep_tx_info *info)
 {
 	might_sleep();
 
@@ -829,9 +829,27 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
 		return;
 	WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
 
-	trace_drv_mgd_prepare_tx(local, sdata, duration);
+	trace_drv_mgd_prepare_tx(local, sdata, info->duration,
+				 info->subtype, info->success);
 	if (local->ops->mgd_prepare_tx)
-		local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, duration);
+		local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, info);
+	trace_drv_return_void(local);
+}
+
+static inline void drv_mgd_complete_tx(struct ieee80211_local *local,
+				       struct ieee80211_sub_if_data *sdata,
+				       struct ieee80211_prep_tx_info *info)
+{
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+	WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
+
+	trace_drv_mgd_complete_tx(local, sdata, info->duration,
+				  info->subtype, info->success);
+	if (local->ops->mgd_complete_tx)
+		local->ops->mgd_complete_tx(&local->hw, &sdata->vif, info);
 	trace_drv_return_void(local);
 }
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0559c6b6ee71..a0572ce99826 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -683,6 +683,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	struct element *ext_capa = NULL;
 	enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
 	const struct ieee80211_sband_iftype_data *iftd;
+	struct ieee80211_prep_tx_info info = {};
 
 	/* we know it's writable, cast away the const */
 	if (assoc_data->ie_len)
@@ -784,12 +785,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		mgmt->u.reassoc_req.listen_interval = listen_int;
 		memcpy(mgmt->u.reassoc_req.current_ap, assoc_data->prev_bssid,
 		       ETH_ALEN);
+		info.subtype = IEEE80211_STYPE_REASSOC_REQ;
 	} else {
 		skb_put(skb, 4);
 		mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 						  IEEE80211_STYPE_ASSOC_REQ);
 		mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
 		mgmt->u.assoc_req.listen_interval = listen_int;
+		info.subtype = IEEE80211_STYPE_ASSOC_REQ;
 	}
 
 	/* SSID */
@@ -1037,7 +1040,7 @@ skip_rates:
 	ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
 	ifmgd->assoc_req_ies_len = pos - ie_start;
 
-	drv_mgd_prepare_tx(local, sdata, 0);
+	drv_mgd_prepare_tx(local, sdata, &info);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
@@ -2256,6 +2259,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
 	u32 changed = 0;
+	struct ieee80211_prep_tx_info info = {
+		.subtype = stype,
+	};
 
 	sdata_assert_lock(sdata);
 
@@ -2305,8 +2311,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 		 * driver requested so.
 		 */
 		if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
-		    !ifmgd->have_beacon)
-			drv_mgd_prepare_tx(sdata->local, sdata, 0);
+		    !ifmgd->have_beacon) {
+			drv_mgd_prepare_tx(sdata->local, sdata, &info);
+		}
 
 		ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid,
 					       ifmgd->bssid, stype, reason,
@@ -2317,6 +2324,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	if (tx)
 		ieee80211_flush_queues(local, sdata, false);
 
+	drv_mgd_complete_tx(sdata->local, sdata, &info);
+
 	/* clear bssid only after building the needed mgmt frames */
 	eth_zero_addr(ifmgd->bssid);
 
@@ -2867,6 +2876,9 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 	u8 *pos;
 	struct ieee802_11_elems elems;
 	u32 tx_flags = 0;
+	struct ieee80211_prep_tx_info info = {
+		.subtype = IEEE80211_STYPE_AUTH,
+	};
 
 	pos = mgmt->u.auth.variable;
 	ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
@@ -2874,7 +2886,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 	if (!elems.challenge)
 		return;
 	auth_data->expected_transaction = 4;
-	drv_mgd_prepare_tx(sdata->local, sdata, 0);
+	drv_mgd_prepare_tx(sdata->local, sdata, &info);
 	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
 		tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
 			   IEEE80211_TX_INTFL_MLME_CONN_TX;
@@ -2927,6 +2939,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 		.type = MLME_EVENT,
 		.u.mlme.data = AUTH_EVENT,
 	};
+	struct ieee80211_prep_tx_info info = {
+		.subtype = IEEE80211_STYPE_AUTH,
+	};
 
 	sdata_assert_lock(sdata);
 
@@ -2955,7 +2970,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 			   mgmt->sa, auth_alg, ifmgd->auth_data->algorithm,
 			   auth_transaction,
 			   ifmgd->auth_data->expected_transaction);
-		return;
+		goto notify_driver;
 	}
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
@@ -2966,7 +2981,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 		     (auth_transaction == 1 &&
 		      (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
 		       status_code == WLAN_STATUS_SAE_PK))))
-			return;
+			goto notify_driver;
 
 		sdata_info(sdata, "%pM denied authentication (status %d)\n",
 			   mgmt->sa, status_code);
@@ -2974,7 +2989,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 		event.u.mlme.status = MLME_DENIED;
 		event.u.mlme.reason = status_code;
 		drv_event_callback(sdata->local, sdata, &event);
-		return;
+		goto notify_driver;
 	}
 
 	switch (ifmgd->auth_data->algorithm) {
@@ -2996,10 +3011,11 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	default:
 		WARN_ONCE(1, "invalid auth alg %d",
 			  ifmgd->auth_data->algorithm);
-		return;
+		goto notify_driver;
 	}
 
 	event.u.mlme.status = MLME_SUCCESS;
+	info.success = 1;
 	drv_event_callback(sdata->local, sdata, &event);
 	if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE ||
 	    (auth_transaction == 2 &&
@@ -3013,6 +3029,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	}
 
 	cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+notify_driver:
+	drv_mgd_complete_tx(sdata->local, sdata, &info);
 }
 
 #define case_WLAN(type) \
@@ -3634,6 +3652,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		.type = MLME_EVENT,
 		.u.mlme.data = ASSOC_EVENT,
 	};
+	struct ieee80211_prep_tx_info info = {};
 
 	sdata_assert_lock(sdata);
 
@@ -3663,6 +3682,15 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		aid = 0; /* TODO */
 	}
 
+	/*
+	 * Note: this may not be perfect, AP might misbehave - if
+	 * anyone needs to rely on perfect complete notification
+	 * with the exact right subtype, then we need to track what
+	 * we actually transmitted.
+	 */
+	info.subtype = reassoc ? IEEE80211_STYPE_REASSOC_REQ :
+				 IEEE80211_STYPE_ASSOC_REQ;
+
 	sdata_info(sdata,
 		   "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n",
 		   reassoc ? "Rea" : "A", mgmt->sa,
@@ -3688,7 +3716,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		assoc_data->timeout_started = true;
 		if (ms > IEEE80211_ASSOC_TIMEOUT)
 			run_again(sdata, assoc_data->timeout);
-		return;
+		goto notify_driver;
 	}
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
@@ -3703,7 +3731,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 			/* oops -- internal error -- send timeout for now */
 			ieee80211_destroy_assoc_data(sdata, false, false);
 			cfg80211_assoc_timeout(sdata->dev, cbss);
-			return;
+			goto notify_driver;
 		}
 		event.u.mlme.status = MLME_SUCCESS;
 		drv_event_callback(sdata->local, sdata, &event);
@@ -3721,10 +3749,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			if (sdata->tx_conf[ac].uapsd)
 				uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
+
+		info.success = 1;
 	}
 
 	cfg80211_rx_assoc_resp(sdata->dev, cbss, (u8 *)mgmt, len, uapsd_queues,
 			       ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
+notify_driver:
+	drv_mgd_complete_tx(sdata->local, sdata, &info);
 }
 
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -4343,7 +4375,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
 	u32 tx_flags = 0;
 	u16 trans = 1;
 	u16 status = 0;
-	u16 prepare_tx_duration = 0;
+	struct ieee80211_prep_tx_info info = {
+		.subtype = IEEE80211_STYPE_AUTH,
+	};
 
 	sdata_assert_lock(sdata);
 
@@ -4366,10 +4400,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
 	}
 
 	if (auth_data->algorithm == WLAN_AUTH_SAE)
-		prepare_tx_duration =
-			jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE);
+		info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE);
 
-	drv_mgd_prepare_tx(local, sdata, prepare_tx_duration);
+	drv_mgd_prepare_tx(local, sdata, &info);
 
 	sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
 		   auth_data->bss->bssid, auth_data->tries,
@@ -5792,6 +5825,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
 	bool tx = !req->local_state_change;
+	struct ieee80211_prep_tx_info info = {
+		.subtype = IEEE80211_STYPE_DEAUTH,
+	};
 
 	if (ifmgd->auth_data &&
 	    ether_addr_equal(ifmgd->auth_data->bss->bssid, req->bssid)) {
@@ -5800,7 +5836,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 			   req->bssid, req->reason_code,
 			   ieee80211_get_reason_code_string(req->reason_code));
 
-		drv_mgd_prepare_tx(sdata->local, sdata, 0);
+		drv_mgd_prepare_tx(sdata->local, sdata, &info);
 		ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
 					       IEEE80211_STYPE_DEAUTH,
 					       req->reason_code, tx,
@@ -5809,7 +5845,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 		ieee80211_report_disconnect(sdata, frame_buf,
 					    sizeof(frame_buf), true,
 					    req->reason_code, false);
-
+		drv_mgd_complete_tx(sdata->local, sdata, &info);
 		return 0;
 	}
 
@@ -5820,7 +5856,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 			   req->bssid, req->reason_code,
 			   ieee80211_get_reason_code_string(req->reason_code));
 
-		drv_mgd_prepare_tx(sdata->local, sdata, 0);
+		drv_mgd_prepare_tx(sdata->local, sdata, &info);
 		ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
 					       IEEE80211_STYPE_DEAUTH,
 					       req->reason_code, tx,
@@ -5844,6 +5880,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 		ieee80211_report_disconnect(sdata, frame_buf,
 					    sizeof(frame_buf), true,
 					    req->reason_code, false);
+		drv_mgd_complete_tx(sdata->local, sdata, &info);
 		return 0;
 	}
 
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 8fcc39056402..f6ef15366938 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2,7 +2,7 @@
 /*
 * Portions of this file
 * Copyright(c) 2016-2017 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2020 Intel Corporation
+* Copyright (C) 2018 - 2021 Intel Corporation
 */
 
 #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -1461,31 +1461,52 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames,
 	TP_ARGS(local, sta, tids, num_frames, reason, more_data)
 );
 
-TRACE_EVENT(drv_mgd_prepare_tx,
+DECLARE_EVENT_CLASS(mgd_prepare_complete_tx_evt,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-		 u16 duration),
+		 u16 duration, u16 subtype, bool success),
 
-	TP_ARGS(local, sdata, duration),
+	TP_ARGS(local, sdata, duration, subtype, success),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		VIF_ENTRY
 		__field(u32, duration)
+		__field(u16, subtype)
+		__field(u8, success)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
 		__entry->duration = duration;
+		__entry->subtype = subtype;
+		__entry->success = success;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT " duration: %u",
-		LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration
+		LOCAL_PR_FMT VIF_PR_FMT " duration: %u, subtype:0x%x, success:%d",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration,
+		__entry->subtype, __entry->success
 	)
 );
 
+DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_prepare_tx,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 u16 duration, u16 subtype, bool success),
+
+	TP_ARGS(local, sdata, duration, subtype, success)
+);
+
+DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_complete_tx,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 u16 duration, u16 subtype, bool success),
+
+	TP_ARGS(local, sdata, duration, subtype, success)
+);
+
 DEFINE_EVENT(local_sdata_evt, drv_mgd_protect_tdls_discover,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata),

From 65be6aa36ded2d2e3bf5058f4d3385b5a2a7ef2e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 23 Jun 2021 13:05:47 +0200
Subject: [PATCH 2713/3292] mac80211: add HE 6 GHz capability only if supported

The HE 6 GHz capability should only be included if there
are actually available channels on 6 GHz, and if that's
the case we need to get it from the 6 GHz band data, not
whatever other band we're on now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.290bf5c87030.I178aff1c3a6e32456d4ac9238e4a2eb47d209ccd@changeid
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.05e935e8dd98.I83ff7eb2ae8ebdf2e30c4fa2461344d9e569f599@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d1ecac00bddb..05e96212b104 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  *
  * utilities for mac80211
  */
@@ -2960,12 +2960,15 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
 	u8 *pos;
 	u16 cap;
 
-	sband = ieee80211_get_sband(sdata);
-	if (!sband)
+	if (!cfg80211_any_usable_channels(sdata->local->hw.wiphy,
+					  BIT(NL80211_BAND_6GHZ),
+					  IEEE80211_CHAN_NO_HE))
 		return;
 
+	sband = sdata->local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+
 	iftd = ieee80211_get_sband_iftype_data(sband, iftype);
-	if (WARN_ON(!iftd))
+	if (!iftd)
 		return;
 
 	/* Check for device HE 6 GHz capability before adding element */

From 6e899fa027addf2dd069714184c58a7c8c4b3030 Mon Sep 17 00:00:00 2001
From: Bassem Dawood <bassem@morsemicro.com>
Date: Sat, 27 Feb 2021 16:58:15 +1100
Subject: [PATCH 2714/3292] mac80211: Enable power save after receiving NULL
 packet ACK

Trigger dynamic_ps_timer to re-evaluate power saving once a null
function packet (with PM = 1) is ACKed, otherwise dynamic PS is
not enabled at that point.

Signed-off-by: Bassem Dawood <bassem@morsemicro.com>
Link: https://lore.kernel.org/r/20210227055815.14838-1-bassem@morsemicro.com
[reformatting]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 9baf185ee4c7..b6ef96a25eac 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -1006,12 +1006,11 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
 	    ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
 	    !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
 	    local->ps_sdata && !(local->scanning)) {
-		if (info->flags & IEEE80211_TX_STAT_ACK) {
+		if (info->flags & IEEE80211_TX_STAT_ACK)
 			local->ps_sdata->u.mgd.flags |=
 					IEEE80211_STA_NULLFUNC_ACKED;
-		} else
-			mod_timer(&local->dynamic_ps_timer, jiffies +
-					msecs_to_jiffies(10));
+		mod_timer(&local->dynamic_ps_timer,
+			  jiffies + msecs_to_jiffies(10));
 	}
 
 	ieee80211_report_used_skb(local, skb, false);

From 7937bd532fe6f6342206b3e002bd791bf25085e0 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 29 Apr 2021 15:39:39 +0800
Subject: [PATCH 2715/3292] tpm_crb: Use IOMEM_ERR_PTR when function returns
 iomem

This is to simplify the code, and IOMEM_ERR_PTR(err) is same with
(__force void __iomem *)ERR_PTR(err).

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_crb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index a9dcf31eadd2..18606651d1aa 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -464,7 +464,7 @@ static void __iomem *crb_map_res(struct device *dev, struct resource *iores,
 
 	/* Detect a 64 bit address on a 32 bit system */
 	if (start != new_res.start)
-		return (void __iomem *) ERR_PTR(-EINVAL);
+		return IOMEM_ERR_PTR(-EINVAL);
 
 	if (!iores)
 		return devm_ioremap_resource(dev, &new_res);

From 114e43371c58992c3ceece219cc359f16314b2c9 Mon Sep 17 00:00:00 2001
From: Liguang Zhang <zhangliguang@linux.alibaba.com>
Date: Fri, 7 May 2021 22:52:55 +0800
Subject: [PATCH 2716/3292] tpm_tis_spi: set default probe function if device
 id not match

In DSDT table, TPM _CID was SMO0768, and no _HID definition. After a
kernel upgrade from 4.19 to 5.10, TPM probe function was changed which
causes device probe fails. In order to make newer kernel to be
compatible with the older acpi definition, it would be best set default
probe function.

Signed-off-by: Liguang Zhang <zhangliguang@linux.alibaba.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_spi_main.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index 3856f6ebcb34..da632a582621 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -240,10 +240,14 @@ static int tpm_tis_spi_driver_probe(struct spi_device *spi)
 	tpm_tis_spi_probe_func probe_func;
 
 	probe_func = of_device_get_match_data(&spi->dev);
-	if (!probe_func && spi_dev_id)
-		probe_func = (tpm_tis_spi_probe_func)spi_dev_id->driver_data;
-	if (!probe_func)
-		return -ENODEV;
+	if (!probe_func) {
+		if (spi_dev_id) {
+			probe_func = (tpm_tis_spi_probe_func)spi_dev_id->driver_data;
+			if (!probe_func)
+				return -ENODEV;
+		} else
+			probe_func = tpm_tis_spi_probe;
+	}
 
 	return probe_func(spi);
 }

From 446cd6f0f3045dd971845e2082ff7b5dbd235743 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Mon, 17 May 2021 09:18:44 +0800
Subject: [PATCH 2717/3292] char: tpm: move to use request_irq by
 IRQF_NO_AUTOEN flag

disable_irq() after request_irq() still has a time gap in which
interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will
disable IRQ auto-enable because of requesting.

Link: https://lore.kernel.org/patchwork/patch/1388765/
Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_i2c_cr50.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_i2c_cr50.c b/drivers/char/tpm/tpm_tis_i2c_cr50.c
index f19c227d20f4..44dde2fbe2fb 100644
--- a/drivers/char/tpm/tpm_tis_i2c_cr50.c
+++ b/drivers/char/tpm/tpm_tis_i2c_cr50.c
@@ -706,14 +706,14 @@ static int tpm_cr50_i2c_probe(struct i2c_client *client,
 
 	if (client->irq > 0) {
 		rc = devm_request_irq(dev, client->irq, tpm_cr50_i2c_int_handler,
-				      IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				      IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				      IRQF_NO_AUTOEN,
 				      dev->driver->name, chip);
 		if (rc < 0) {
 			dev_err(dev, "Failed to probe IRQ %d\n", client->irq);
 			return rc;
 		}
 
-		disable_irq(client->irq);
 		priv->irq = client->irq;
 	} else {
 		dev_warn(dev, "No IRQ, will use %ums delay for TPM ready\n",

From 5317677db4290366c95f4209de387d6c9b48707f Mon Sep 17 00:00:00 2001
From: Amir Mizinski <amirmizi6@gmail.com>
Date: Tue, 25 May 2021 14:13:25 +0300
Subject: [PATCH 2718/3292] tpm: add longer timeout for
 TPM2_CC_VERIFY_SIGNATURE

While running a TPM2_CC_VERIFY_SIGNATURE operation with RSA 3072-bit
keys the TPM driver fails with the following error:

"kernel: [ 2416.187522] tpm tpm0: Operation Timed out"

Since the TPM PC Client specification does not specify a number for
verify signature operation timeout, and the duration of
TPM2_CC_VERIFY_SIGNATURE with RSA 3072-bit keys exceeds the current timeout
of TPM_LONG (2 seconds), it is preferable to pick the longest timeout
possible.

Therefore, set the duration for TPM2_CC_VERIFY_SIGNATUE to TPM_LONG_LONG
(5 minutes).

[jarkko@kernel.org: mangled the short summary a bit]

Link: https://trustedcomputinggroup.org/resource/pc-client-specific-platform-firmware-profile-specification/
Signed-off-by: Amir Mizinski <amirmizi6@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm2-cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index c84d23951219..a25815a6f625 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -87,7 +87,7 @@ static u8 tpm2_ordinal_duration_index(u32 ordinal)
 		return TPM_MEDIUM;
 
 	case TPM2_CC_VERIFY_SIGNATURE:        /* 177 */
-		return TPM_LONG;
+		return TPM_LONG_LONG;
 
 	case TPM2_CC_PCR_EXTEND:              /* 182 */
 		return TPM_MEDIUM;

From c46ed2281bbe4b84e6f3d4bdfb0e4e9ab813fa9d Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Thu, 27 May 2021 17:23:52 +0200
Subject: [PATCH 2719/3292] tpm_tis_spi: add missing SPI device ID entries

The SPI core always reports a "MODALIAS=spi:<foo>", even if the device was
registered via OF. This means that this module won't auto-load if a DT has
for example has a node with a compatible "infineon,slb9670" string.

In that case kmod will expect a "MODALIAS=of:N*T*Cinfineon,slb9670" uevent
but instead will get a "MODALIAS=spi:slb9670", which is not present in the
kernel module aliases:

$ modinfo drivers/char/tpm/tpm_tis_spi.ko | grep alias
alias:          of:N*T*Cgoogle,cr50C*
alias:          of:N*T*Cgoogle,cr50
alias:          of:N*T*Ctcg,tpm_tis-spiC*
alias:          of:N*T*Ctcg,tpm_tis-spi
alias:          of:N*T*Cinfineon,slb9670C*
alias:          of:N*T*Cinfineon,slb9670
alias:          of:N*T*Cst,st33htpm-spiC*
alias:          of:N*T*Cst,st33htpm-spi
alias:          spi:cr50
alias:          spi:tpm_tis_spi
alias:          acpi*:SMO0768:*

To workaround this issue, add in the SPI device ID table all the entries
that are present in the OF device ID table.

Reported-by: Alexander Wellbrock <a.wellbrock@mailbox.org>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Tested-by: Peter Robinson <pbrobinson@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_spi_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index da632a582621..54584b4b00d1 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -264,6 +264,8 @@ static int tpm_tis_spi_remove(struct spi_device *dev)
 }
 
 static const struct spi_device_id tpm_tis_spi_id[] = {
+	{ "st33htpm-spi", (unsigned long)tpm_tis_spi_probe },
+	{ "slb9670", (unsigned long)tpm_tis_spi_probe },
 	{ "tpm_tis_spi", (unsigned long)tpm_tis_spi_probe },
 	{ "cr50", (unsigned long)cr50_spi_probe },
 	{}

From 6e0fe58b08e747c73b848de92ccec944f31dddce Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 1 Jun 2021 20:22:30 +0800
Subject: [PATCH 2720/3292] tpm: fix some doc warnings in tpm1-cmd.c

Fix the following make W=1 warnings:

  drivers/char/tpm/tpm1-cmd.c:325: warning: expecting prototype for tpm_startup(). Prototype was for tpm1_startup() instead
  drivers/char/tpm/tpm1-cmd.c:621: warning: expecting prototype for tpm_continue_selftest(). Prototype was for tpm1_continue_selftest() instead

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm1-cmd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c
index ca7158fa6e6c..f7dc986fa4a0 100644
--- a/drivers/char/tpm/tpm1-cmd.c
+++ b/drivers/char/tpm/tpm1-cmd.c
@@ -312,7 +312,7 @@ unsigned long tpm1_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal)
 #define TPM_ST_CLEAR 1
 
 /**
- * tpm_startup() - turn on the TPM
+ * tpm1_startup() - turn on the TPM
  * @chip: TPM chip to use
  *
  * Normally the firmware should start the TPM. This function is provided as a
@@ -611,7 +611,7 @@ out:
 
 #define TPM_ORD_CONTINUE_SELFTEST 83
 /**
- * tpm_continue_selftest() - run TPM's selftest
+ * tpm1_continue_selftest() - run TPM's selftest
  * @chip: TPM chip to use
  *
  * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing

From 5a118a39ec9207f9b8cddb013ad270c80bc84a1c Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Thu, 3 Jun 2021 15:49:55 +0800
Subject: [PATCH 2721/3292] tpm_tis: Use DEFINE_RES_MEM() to simplify code

Use DEFINE_RES_MEM() to save a couple of lines of code, which is simpler
and more readable. The start address does not need to appear twice.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 4ed6e660273a..d3f2e5364c27 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -363,11 +363,7 @@ static int tpm_tis_force_device(void)
 {
 	struct platform_device *pdev;
 	static const struct resource x86_resources[] = {
-		{
-			.start = 0xFED40000,
-			.end = 0xFED40000 + TIS_MEM_LEN - 1,
-			.flags = IORESOURCE_MEM,
-		},
+		DEFINE_RES_MEM(0xFED40000, TIS_MEM_LEN)
 	};
 
 	if (!force)

From 0178f9d0f60ba07e09bab57381a3ef18e2c1fd7f Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Wed, 9 Jun 2021 16:26:19 +0300
Subject: [PATCH 2722/3292] tpm: Replace WARN_ONCE() with dev_err_once() in
 tpm_tis_status()

Do not tear down the system when getting invalid status from a TPM chip.
This can happen when panic-on-warn is used.

Instead, introduce TPM_TIS_INVALID_STATUS bitflag and use it to trigger
once the error reporting per chip. In addition, print out the value of
TPM_STS for improved forensics.

Link: https://lore.kernel.org/keyrings/YKzlTR1AzUigShtZ@kroah.com/
Fixes: 55707d531af6 ("tpm_tis: Add a check for invalid status")
Cc: stable@vger.kernel.org
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/tpm/tpm_tis_core.c | 25 ++++++++++++++++++-------
 drivers/char/tpm/tpm_tis_core.h |  3 ++-
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 55b9d3965ae1..69579efb247b 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -196,13 +196,24 @@ static u8 tpm_tis_status(struct tpm_chip *chip)
 		return 0;
 
 	if (unlikely((status & TPM_STS_READ_ZERO) != 0)) {
-		/*
-		 * If this trips, the chances are the read is
-		 * returning 0xff because the locality hasn't been
-		 * acquired.  Usually because tpm_try_get_ops() hasn't
-		 * been called before doing a TPM operation.
-		 */
-		WARN_ONCE(1, "TPM returned invalid status\n");
+		if  (!test_and_set_bit(TPM_TIS_INVALID_STATUS, &priv->flags)) {
+			/*
+			 * If this trips, the chances are the read is
+			 * returning 0xff because the locality hasn't been
+			 * acquired.  Usually because tpm_try_get_ops() hasn't
+			 * been called before doing a TPM operation.
+			 */
+			dev_err(&chip->dev, "invalid TPM_STS.x 0x%02x, dumping stack for forensics\n",
+				status);
+
+			/*
+			 * Dump stack for forensics, as invalid TPM_STS.x could be
+			 * potentially triggered by impaired tpm_try_get_ops() or
+			 * tpm_find_get_ops().
+			 */
+			dump_stack();
+		}
+
 		return 0;
 	}
 
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 9b2d32a59f67..b2a3c6c72882 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -83,6 +83,7 @@ enum tis_defaults {
 
 enum tpm_tis_flags {
 	TPM_TIS_ITPM_WORKAROUND		= BIT(0),
+	TPM_TIS_INVALID_STATUS		= BIT(1),
 };
 
 struct tpm_tis_data {
@@ -90,7 +91,7 @@ struct tpm_tis_data {
 	int locality;
 	int irq;
 	bool irq_tested;
-	unsigned int flags;
+	unsigned long flags;
 	void __iomem *ilb_base_addr;
 	u16 clkrun_enabled;
 	wait_queue_head_t int_queue;

From 95f83ee8d857f006813755e89a126f1048b001e8 Mon Sep 17 00:00:00 2001
From: Abinaya Kalaiselvan <akalaise@codeaurora.org>
Date: Wed, 23 Jun 2021 20:10:44 +0530
Subject: [PATCH 2723/3292] mac80211: fix NULL ptr dereference during mesh peer
 connection for non HE devices

"sband->iftype_data" is not assigned with any value for non HE supported
devices, which causes NULL pointer access during mesh peer connection
in those devices. Fix this by accessing the pointer after HE
capabilities condition check.

Cc: stable@vger.kernel.org
Fixes: 7f7aa94bcaf0 (mac80211: reduce peer HE MCS/NSS to own capabilities)
Signed-off-by: Abinaya Kalaiselvan <akalaise@codeaurora.org>
Link: https://lore.kernel.org/r/1624459244-4497-1-git-send-email-akalaise@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/he.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 5984a9dac0bc..c05af7018f79 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -111,7 +111,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 				  struct sta_info *sta)
 {
 	struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
-	struct ieee80211_sta_he_cap own_he_cap = sband->iftype_data->he_cap;
+	struct ieee80211_sta_he_cap own_he_cap;
 	struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
 	u8 he_ppe_size;
 	u8 mcs_nss_size;
@@ -125,6 +125,8 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 					 ieee80211_vif_type_p2p(&sdata->vif)))
 		return;
 
+	own_he_cap = sband->iftype_data->he_cap;
+
 	/* Make sure size is OK */
 	mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
 	he_ppe_size =

From 744757e46bf13ec3a7b3507d17ab3faab9516d43 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 23 Jun 2021 21:48:25 +0800
Subject: [PATCH 2724/3292] mac80211: remove iwlwifi specific workaround NDPs
 of null_response

Remove the remaining workaround that is not removed by the
commit e41eb3e408de ("mac80211: remove iwlwifi specific workaround
that broke sta NDP tx")

Fixes: 41cbb0f5a295 ("mac80211: add support for HE")
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Link: https://lore.kernel.org/r/20210623134826.10318-1-pkshih@realtek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 641a6657d0c9..e18c3855f616 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1398,11 +1398,6 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
 	struct ieee80211_tx_info *info;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 
-	/* Don't send NDPs when STA is connected HE */
-	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
-		return;
-
 	if (qos) {
 		fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
 				 IEEE80211_STYPE_QOS_NULLFUNC |

From 2832943c789aa6a89eb3d1cf1a466e817ae451a7 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 23 Jun 2021 21:48:26 +0800
Subject: [PATCH 2725/3292] Revert "mac80211: HE STA disassoc due to QOS NULL
 not sent"

This reverts commit f39b07fdfb68 ("mac80211: HE STA disassoc
due to QOS NULL not sent")

Since iwlwifi specific workaround, which blocks to send NDP,
is removed, we can revert this commit.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Link: https://lore.kernel.org/r/20210623134826.10318-2-pkshih@realtek.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a0572ce99826..a00f11a33699 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2541,10 +2541,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 
 	if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
 		ifmgd->nullfunc_failed = false;
-		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
-			ifmgd->probe_send_count--;
-		else
-			ieee80211_send_nullfunc(sdata->local, sdata, false);
+		ieee80211_send_nullfunc(sdata->local, sdata, false);
 	} else {
 		int ssid_len;
 

From 2433647bc8d983a543e7d31b41ca2de1c7e2c198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 23 Jun 2021 15:47:55 +0200
Subject: [PATCH 2726/3292] mac80211: Switch to a virtual time-based airtime
 scheduler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This switches the airtime scheduler in mac80211 to use a virtual
time-based scheduler instead of the round-robin scheduler used before.
This has a couple of advantages:

- No need to sync up the round-robin scheduler in firmware/hardware with
  the round-robin airtime scheduler.

- If several stations are eligible for transmission we can schedule both
  of them; no need to hard-block the scheduling rotation until the head
  of the queue has used up its quantum.

- The check of whether a station is eligible for transmission becomes
  simpler (in ieee80211_txq_may_transmit()).

The drawback is that scheduling becomes slightly more expensive, as we
need to maintain an rbtree of TXQs sorted by virtual time. This means
that ieee80211_register_airtime() becomes O(logN) in the number of
currently scheduled TXQs because it can change the order of the
scheduled stations. We mitigate this overhead by only resorting when a
station changes position in the tree, and hopefully N rarely grows too
big (it's only TXQs currently backlogged, not all associated stations),
so it shouldn't be too big of an issue.

To prevent divisions in the fast path, we maintain both station sums and
pre-computed reciprocals of the sums. This turns the fast-path operation
into a multiplication, with divisions only happening as the number of
active stations change (to re-compute the current sum of all active
station weights). To prevent this re-computation of the reciprocal from
happening too frequently, we use a time-based notion of station
activity, instead of updating the weight every time a station gets
scheduled or de-scheduled. As queues can oscillate between empty and
occupied quite frequently, this can significantly cut down on the number
of re-computations. It also has the added benefit of making the station
airtime calculation independent on whether the queue happened to have
drained at the time an airtime value was accounted.

Co-developed-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Yibo Zhao <yiboz@codeaurora.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20210623134755.235545-1-toke@redhat.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h        |  17 +-
 net/mac80211/cfg.c            |  35 +++-
 net/mac80211/debugfs.c        |  70 +++++--
 net/mac80211/debugfs_netdev.c |  32 ++-
 net/mac80211/debugfs_sta.c    |  24 +--
 net/mac80211/ieee80211_i.h    | 182 ++++++++++++++--
 net/mac80211/iface.c          |   3 +
 net/mac80211/main.c           |  11 +-
 net/mac80211/rx.c             |   6 +-
 net/mac80211/sta_info.c       |  69 +++++--
 net/mac80211/sta_info.h       |  11 +-
 net/mac80211/status.c         |  19 ++
 net/mac80211/tx.c             | 378 ++++++++++++++++++++++++----------
 13 files changed, 660 insertions(+), 197 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9afbcac61c2a..d8a1d09a2141 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6605,9 +6605,6 @@ static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 {
 }
 
-void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
-			      struct ieee80211_txq *txq, bool force);
-
 /**
  * ieee80211_schedule_txq - schedule a TXQ for transmission
  *
@@ -6620,11 +6617,7 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
  * The driver may call this function if it has buffered packets for
  * this TXQ internally.
  */
-static inline void
-ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
-{
-	__ieee80211_schedule_txq(hw, txq, true);
-}
+void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
 
 /**
  * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
@@ -6636,12 +6629,8 @@ ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
  * The driver may set force=true if it has buffered packets for this TXQ
  * internally.
  */
-static inline void
-ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
-		     bool force)
-{
-	__ieee80211_schedule_txq(hw, txq, force);
-}
+void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
+			  bool force);
 
 /**
  * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0d29a9d1f910..84cc7733ea66 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1442,6 +1442,38 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
 #endif
 }
 
+static void sta_apply_airtime_params(struct ieee80211_local *local,
+				     struct sta_info *sta,
+				     struct station_parameters *params)
+{
+	u8 ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		struct airtime_sched_info *air_sched = &local->airtime[ac];
+		struct airtime_info *air_info = &sta->airtime[ac];
+		struct txq_info *txqi;
+		u8 tid;
+
+		spin_lock_bh(&air_sched->lock);
+		for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+			if (air_info->weight == params->airtime_weight ||
+			    !sta->sta.txq[tid] ||
+			    ac != ieee80211_ac_from_tid(tid))
+				continue;
+
+			airtime_weight_set(air_info, params->airtime_weight);
+
+			txqi = to_txq_info(sta->sta.txq[tid]);
+			if (RB_EMPTY_NODE(&txqi->schedule_order))
+				continue;
+
+			ieee80211_update_airtime_weight(local, air_sched,
+							0, true);
+		}
+		spin_unlock_bh(&air_sched->lock);
+	}
+}
+
 static int sta_apply_parameters(struct ieee80211_local *local,
 				struct sta_info *sta,
 				struct station_parameters *params)
@@ -1629,7 +1661,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		sta_apply_mesh_params(local, sta, params);
 
 	if (params->airtime_weight)
-		sta->airtime_weight = params->airtime_weight;
+		sta_apply_airtime_params(local, sta, params);
+
 
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index fc34ae2b604c..8dbfe325ee66 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -216,14 +216,14 @@ static ssize_t aql_txq_limit_read(struct file *file,
 			"VI	%u		%u\n"
 			"BE	%u		%u\n"
 			"BK	%u		%u\n",
-			local->aql_txq_limit_low[IEEE80211_AC_VO],
-			local->aql_txq_limit_high[IEEE80211_AC_VO],
-			local->aql_txq_limit_low[IEEE80211_AC_VI],
-			local->aql_txq_limit_high[IEEE80211_AC_VI],
-			local->aql_txq_limit_low[IEEE80211_AC_BE],
-			local->aql_txq_limit_high[IEEE80211_AC_BE],
-			local->aql_txq_limit_low[IEEE80211_AC_BK],
-			local->aql_txq_limit_high[IEEE80211_AC_BK]);
+			local->airtime[IEEE80211_AC_VO].aql_txq_limit_low,
+			local->airtime[IEEE80211_AC_VO].aql_txq_limit_high,
+			local->airtime[IEEE80211_AC_VI].aql_txq_limit_low,
+			local->airtime[IEEE80211_AC_VI].aql_txq_limit_high,
+			local->airtime[IEEE80211_AC_BE].aql_txq_limit_low,
+			local->airtime[IEEE80211_AC_BE].aql_txq_limit_high,
+			local->airtime[IEEE80211_AC_BK].aql_txq_limit_low,
+			local->airtime[IEEE80211_AC_BK].aql_txq_limit_high);
 	return simple_read_from_buffer(user_buf, count, ppos,
 				       buf, len);
 }
@@ -255,11 +255,11 @@ static ssize_t aql_txq_limit_write(struct file *file,
 	if (ac >= IEEE80211_NUM_ACS)
 		return -EINVAL;
 
-	q_limit_low_old = local->aql_txq_limit_low[ac];
-	q_limit_high_old = local->aql_txq_limit_high[ac];
+	q_limit_low_old = local->airtime[ac].aql_txq_limit_low;
+	q_limit_high_old = local->airtime[ac].aql_txq_limit_high;
 
-	local->aql_txq_limit_low[ac] = q_limit_low;
-	local->aql_txq_limit_high[ac] = q_limit_high;
+	local->airtime[ac].aql_txq_limit_low = q_limit_low;
+	local->airtime[ac].aql_txq_limit_high = q_limit_high;
 
 	mutex_lock(&local->sta_mtx);
 	list_for_each_entry(sta, &local->sta_list, list) {
@@ -382,6 +382,46 @@ static const struct file_operations force_tx_status_ops = {
 	.llseek = default_llseek,
 };
 
+static ssize_t airtime_read(struct file *file,
+			    char __user *user_buf,
+			    size_t count,
+			    loff_t *ppos)
+{
+	struct ieee80211_local *local = file->private_data;
+	char buf[200];
+	u64 v_t[IEEE80211_NUM_ACS];
+	u64 wt[IEEE80211_NUM_ACS];
+	int len = 0, ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->airtime[ac].lock);
+		v_t[ac] = local->airtime[ac].v_t;
+		wt[ac] = local->airtime[ac].weight_sum;
+		spin_unlock_bh(&local->airtime[ac].lock);
+	}
+	len = scnprintf(buf, sizeof(buf),
+			"\tVO         VI         BE         BK\n"
+			"Virt-t\t%-10llu %-10llu %-10llu %-10llu\n"
+			"Weight\t%-10llu %-10llu %-10llu %-10llu\n",
+			v_t[0],
+			v_t[1],
+			v_t[2],
+			v_t[3],
+			wt[0],
+			wt[1],
+			wt[2],
+			wt[3]);
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       buf, len);
+}
+
+static const struct file_operations airtime_ops = {
+	.read = airtime_read,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
 #ifdef CONFIG_PM
 static ssize_t reset_write(struct file *file, const char __user *user_buf,
 			   size_t count, loff_t *ppos)
@@ -632,7 +672,11 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD_MODE(aqm, 0600);
 
-	DEBUGFS_ADD_MODE(airtime_flags, 0600);
+	if (wiphy_ext_feature_isset(local->hw.wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+		DEBUGFS_ADD_MODE(airtime, 0600);
+		DEBUGFS_ADD_MODE(airtime_flags, 0600);
+	}
 
 	DEBUGFS_ADD(aql_txq_limit);
 	debugfs_create_u32("aql_threshold", 0600,
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index f7aac8955681..db724fc10a5f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -512,6 +512,34 @@ static ssize_t ieee80211_if_fmt_aqm(
 }
 IEEE80211_IF_FILE_R(aqm);
 
+static ssize_t ieee80211_if_fmt_airtime(
+	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_txq *txq = sdata->vif.txq;
+	struct airtime_info *air_info;
+	int len;
+
+	if (!txq)
+		return 0;
+
+	spin_lock_bh(&local->airtime[txq->ac].lock);
+	air_info = to_airtime_info(txq);
+	len = scnprintf(buf,
+			buflen,
+			"RX: %llu us\nTX: %llu us\nWeight: %u\n"
+			"Virt-T: %lld us\n",
+			air_info->rx_airtime,
+			air_info->tx_airtime,
+			air_info->weight,
+			air_info->v_t);
+	spin_unlock_bh(&local->airtime[txq->ac].lock);
+
+	return len;
+}
+
+IEEE80211_IF_FILE_R(airtime);
+
 IEEE80211_IF_FILE(multicast_to_unicast, u.ap.multicast_to_unicast, HEX);
 
 /* IBSS attributes */
@@ -657,8 +685,10 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata)
 
 	if (sdata->local->ops->wake_tx_queue &&
 	    sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
-	    sdata->vif.type != NL80211_IFTYPE_NAN)
+	    sdata->vif.type != NL80211_IFTYPE_NAN) {
 		DEBUGFS_ADD(aqm);
+		DEBUGFS_ADD(airtime);
+	}
 }
 
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 936c9dfa86c8..8be28cfd6f64 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -202,7 +202,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 	size_t bufsz = 400;
 	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
 	u64 rx_airtime = 0, tx_airtime = 0;
-	s64 deficit[IEEE80211_NUM_ACS];
+	u64 v_t[IEEE80211_NUM_ACS];
 	ssize_t rv;
 	int ac;
 
@@ -210,18 +210,18 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
 		return -ENOMEM;
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-		spin_lock_bh(&local->active_txq_lock[ac]);
+		spin_lock_bh(&local->airtime[ac].lock);
 		rx_airtime += sta->airtime[ac].rx_airtime;
 		tx_airtime += sta->airtime[ac].tx_airtime;
-		deficit[ac] = sta->airtime[ac].deficit;
-		spin_unlock_bh(&local->active_txq_lock[ac]);
+		v_t[ac] = sta->airtime[ac].v_t;
+		spin_unlock_bh(&local->airtime[ac].lock);
 	}
 
 	p += scnprintf(p, bufsz + buf - p,
 		"RX: %llu us\nTX: %llu us\nWeight: %u\n"
-		"Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
-		rx_airtime, tx_airtime, sta->airtime_weight,
-		deficit[0], deficit[1], deficit[2], deficit[3]);
+		"Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+		rx_airtime, tx_airtime, sta->airtime[0].weight,
+		v_t[0], v_t[1], v_t[2], v_t[3]);
 
 	rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 	kfree(buf);
@@ -236,11 +236,11 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
 	int ac;
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-		spin_lock_bh(&local->active_txq_lock[ac]);
+		spin_lock_bh(&local->airtime[ac].lock);
 		sta->airtime[ac].rx_airtime = 0;
 		sta->airtime[ac].tx_airtime = 0;
-		sta->airtime[ac].deficit = sta->airtime_weight;
-		spin_unlock_bh(&local->active_txq_lock[ac]);
+		sta->airtime[ac].v_t = 0;
+		spin_unlock_bh(&local->airtime[ac].lock);
 	}
 
 	return count;
@@ -263,10 +263,10 @@ static ssize_t sta_aql_read(struct file *file, char __user *userbuf,
 		return -ENOMEM;
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-		spin_lock_bh(&local->active_txq_lock[ac]);
+		spin_lock_bh(&local->airtime[ac].lock);
 		q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
 		q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
-		spin_unlock_bh(&local->active_txq_lock[ac]);
+		spin_unlock_bh(&local->airtime[ac].lock);
 		q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending);
 	}
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 17068408b27d..22549b95d1aa 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -831,20 +831,16 @@ enum txq_info_flags {
  * @def_flow: used as a fallback flow when a packet destined to @tin hashes to
  *	a fq_flow which is already owned by a different tin
  * @def_cvars: codel vars for @def_flow
- * @frags: used to keep fragments created after dequeue
  * @schedule_order: used with ieee80211_local->active_txqs
- * @schedule_round: counter to prevent infinite loops on TXQ scheduling
+ * @frags: used to keep fragments created after dequeue
  */
 struct txq_info {
 	struct fq_tin tin;
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
-
-	u16 schedule_round;
-	struct list_head schedule_order;
+	struct rb_node schedule_order;
 
 	struct sk_buff_head frags;
-
 	unsigned long flags;
 
 	/* keep last! */
@@ -921,6 +917,8 @@ struct ieee80211_sub_if_data {
 	struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
 	struct mac80211_qos_map __rcu *qos_map;
 
+	struct airtime_info airtime[IEEE80211_NUM_ACS];
+
 	struct work_struct csa_finalize_work;
 	bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
 	struct cfg80211_chan_def csa_chandef;
@@ -1133,6 +1131,44 @@ enum mac80211_scan_state {
 	SCAN_ABORT,
 };
 
+/**
+ * struct airtime_sched_info - state used for airtime scheduling and AQL
+ *
+ * @lock: spinlock that protects all the fields in this struct
+ * @active_txqs: rbtree of currently backlogged queues, sorted by virtual time
+ * @schedule_pos: the current position maintained while a driver walks the tree
+ *                with ieee80211_next_txq()
+ * @active_list: list of struct airtime_info structs that were active within
+ *               the last AIRTIME_ACTIVE_DURATION (100 ms), used to compute
+ *               weight_sum
+ * @last_weight_update: used for rate limiting walking active_list
+ * @last_schedule_time: tracks the last time a transmission was scheduled; used
+ *                      for catching up v_t if no stations are eligible for
+ *                      transmission.
+ * @v_t: global virtual time; queues with v_t < this are eligible for
+ *       transmission
+ * @weight_sum: total sum of all active stations used for dividing airtime
+ * @weight_sum_reciprocal: reciprocal of weight_sum (to avoid divisions in fast
+ *                         path - see comment above
+ *                         IEEE80211_RECIPROCAL_DIVISOR_64)
+ * @aql_txq_limit_low: AQL limit when total outstanding airtime
+ *                     is < IEEE80211_AQL_THRESHOLD
+ * @aql_txq_limit_high: AQL limit when total outstanding airtime
+ *                      is > IEEE80211_AQL_THRESHOLD
+ */
+struct airtime_sched_info {
+	spinlock_t lock;
+	struct rb_root_cached active_txqs;
+	struct rb_node *schedule_pos;
+	struct list_head active_list;
+	u64 last_weight_update;
+	u64 last_schedule_activity;
+	u64 v_t;
+	u64 weight_sum;
+	u64 weight_sum_reciprocal;
+	u32 aql_txq_limit_low;
+	u32 aql_txq_limit_high;
+};
 DECLARE_STATIC_KEY_FALSE(aql_disable);
 
 struct ieee80211_local {
@@ -1146,13 +1182,8 @@ struct ieee80211_local {
 	struct codel_params cparams;
 
 	/* protects active_txqs and txqi->schedule_order */
-	spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
-	struct list_head active_txqs[IEEE80211_NUM_ACS];
-	u16 schedule_round[IEEE80211_NUM_ACS];
-
+	struct airtime_sched_info airtime[IEEE80211_NUM_ACS];
 	u16 airtime_flags;
-	u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
-	u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
 	u32 aql_threshold;
 	atomic_t aql_total_pending_airtime;
 
@@ -1566,6 +1597,125 @@ static inline bool txq_has_queue(struct ieee80211_txq *txq)
 	return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
 }
 
+static inline struct airtime_info *to_airtime_info(struct ieee80211_txq *txq)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct sta_info *sta;
+
+	if (txq->sta) {
+		sta = container_of(txq->sta, struct sta_info, sta);
+		return &sta->airtime[txq->ac];
+	}
+
+	sdata = vif_to_sdata(txq->vif);
+	return &sdata->airtime[txq->ac];
+}
+
+/* To avoid divisions in the fast path, we keep pre-computed reciprocals for
+ * airtime weight calculations. There are two different weights to keep track
+ * of: The per-station weight and the sum of weights per phy.
+ *
+ * For the per-station weights (kept in airtime_info below), we use 32-bit
+ * reciprocals with a devisor of 2^19. This lets us keep the multiplications and
+ * divisions for the station weights as 32-bit operations at the cost of a bit
+ * of rounding error for high weights; but the choice of divisor keeps rounding
+ * errors <10% for weights <2^15, assuming no more than 8ms of airtime is
+ * reported at a time.
+ *
+ * For the per-phy sum of weights the values can get higher, so we use 64-bit
+ * operations for those with a 32-bit divisor, which should avoid any
+ * significant rounding errors.
+ */
+#define IEEE80211_RECIPROCAL_DIVISOR_64 0x100000000ULL
+#define IEEE80211_RECIPROCAL_SHIFT_64 32
+#define IEEE80211_RECIPROCAL_DIVISOR_32 0x80000U
+#define IEEE80211_RECIPROCAL_SHIFT_32 19
+
+static inline void airtime_weight_set(struct airtime_info *air_info, u16 weight)
+{
+	if (air_info->weight == weight)
+		return;
+
+	air_info->weight = weight;
+	if (weight) {
+		air_info->weight_reciprocal =
+			IEEE80211_RECIPROCAL_DIVISOR_32 / weight;
+	} else {
+		air_info->weight_reciprocal = 0;
+	}
+}
+
+static inline void airtime_weight_sum_set(struct airtime_sched_info *air_sched,
+					  int weight_sum)
+{
+	if (air_sched->weight_sum == weight_sum)
+		return;
+
+	air_sched->weight_sum = weight_sum;
+	if (air_sched->weight_sum) {
+		air_sched->weight_sum_reciprocal = IEEE80211_RECIPROCAL_DIVISOR_64;
+		do_div(air_sched->weight_sum_reciprocal, air_sched->weight_sum);
+	} else {
+		air_sched->weight_sum_reciprocal = 0;
+	}
+}
+
+/* A problem when trying to enforce airtime fairness is that we want to divide
+ * the airtime between the currently *active* stations. However, basing this on
+ * the instantaneous queue state of stations doesn't work, as queues tend to
+ * oscillate very quickly between empty and occupied, leading to the scheduler
+ * thinking only a single station is active when deciding whether to allow
+ * transmission (and thus not throttling correctly).
+ *
+ * To fix this we use a timer-based notion of activity: a station is considered
+ * active if it has been scheduled within the last 100 ms; we keep a separate
+ * list of all the stations considered active in this manner, and lazily update
+ * the total weight of active stations from this list (filtering the stations in
+ * the list by their 'last active' time).
+ *
+ * We add one additional safeguard to guard against stations that manage to get
+ * scheduled every 100 ms but don't transmit a lot of data, and thus don't use
+ * up any airtime. Such stations would be able to get priority for an extended
+ * period of time if they do start transmitting at full capacity again, and so
+ * we add an explicit maximum for how far behind a station is allowed to fall in
+ * the virtual airtime domain. This limit is set to a relatively high value of
+ * 20 ms because the main mechanism for catching up idle stations is the active
+ * state as described above; i.e., the hard limit should only be hit in
+ * pathological cases.
+ */
+#define AIRTIME_ACTIVE_DURATION (100 * NSEC_PER_MSEC)
+#define AIRTIME_MAX_BEHIND 20000 /* 20 ms */
+
+static inline bool airtime_is_active(struct airtime_info *air_info, u64 now)
+{
+	return air_info->last_scheduled >= now - AIRTIME_ACTIVE_DURATION;
+}
+
+static inline void airtime_set_active(struct airtime_sched_info *air_sched,
+				      struct airtime_info *air_info, u64 now)
+{
+	air_info->last_scheduled = now;
+	air_sched->last_schedule_activity = now;
+	list_move_tail(&air_info->list, &air_sched->active_list);
+}
+
+static inline bool airtime_catchup_v_t(struct airtime_sched_info *air_sched,
+				       u64 v_t, u64 now)
+{
+	air_sched->v_t = v_t;
+	return true;
+}
+
+static inline void init_airtime_info(struct airtime_info *air_info,
+				     struct airtime_sched_info *air_sched)
+{
+	atomic_set(&air_info->aql_tx_pending, 0);
+	air_info->aql_limit_low = air_sched->aql_txq_limit_low;
+	air_info->aql_limit_high = air_sched->aql_txq_limit_high;
+	airtime_weight_set(air_info, IEEE80211_DEFAULT_AIRTIME_WEIGHT);
+	INIT_LIST_HEAD(&air_info->list);
+}
+
 static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
 {
 	return ether_addr_equal(raddr, addr) ||
@@ -1808,6 +1958,14 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 			      u64 *cookie);
 int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
 			      const u8 *buf, size_t len);
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq);
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+			      struct ieee80211_txq *txq,
+			      bool purge);
+void ieee80211_update_airtime_weight(struct ieee80211_local *local,
+				     struct airtime_sched_info *air_sched,
+				     u64 now, bool force);
 
 /* HT */
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 68375ef56b4a..1e5e9fc45523 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1977,6 +1977,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		}
 	}
 
+	for (i = 0; i < IEEE80211_NUM_ACS; i++)
+		init_airtime_info(&sdata->airtime[i], &local->airtime[i]);
+
 	ieee80211_set_default_queues(sdata);
 
 	sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 95a8300da2d0..05f4c3c72619 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -705,10 +705,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	spin_lock_init(&local->queue_stop_reason_lock);
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
-		INIT_LIST_HEAD(&local->active_txqs[i]);
-		spin_lock_init(&local->active_txq_lock[i]);
-		local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
-		local->aql_txq_limit_high[i] =
+		struct airtime_sched_info *air_sched = &local->airtime[i];
+
+		air_sched->active_txqs = RB_ROOT_CACHED;
+		INIT_LIST_HEAD(&air_sched->active_list);
+		spin_lock_init(&air_sched->lock);
+		air_sched->aql_txq_limit_low = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
+		air_sched->aql_txq_limit_high =
 			IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
 	}
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a6400adf08bf..771921c057e8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1578,12 +1578,8 @@ static void sta_ps_start(struct sta_info *sta)
 
 	for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
 		struct ieee80211_txq *txq = sta->sta.txq[tid];
-		struct txq_info *txqi = to_txq_info(txq);
 
-		spin_lock(&local->active_txq_lock[txq->ac]);
-		if (!list_empty(&txqi->schedule_order))
-			list_del_init(&txqi->schedule_order);
-		spin_unlock(&local->active_txq_lock[txq->ac]);
+		ieee80211_unschedule_txq(&local->hw, txq, false);
 
 		if (txq_has_queue(txq))
 			set_bit(tid, &sta->txq_buffered_tids);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index e18c3855f616..a5505ee51229 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -425,15 +425,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	if (sta_prepare_rate_control(local, sta, gfp))
 		goto free_txq;
 
-	sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
-		sta->airtime[i].deficit = sta->airtime_weight;
-		atomic_set(&sta->airtime[i].aql_tx_pending, 0);
-		sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i];
-		sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i];
+		init_airtime_info(&sta->airtime[i], &local->airtime[i]);
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1892,24 +1888,59 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
 }
 EXPORT_SYMBOL(ieee80211_sta_set_buffered);
 
+void ieee80211_register_airtime(struct ieee80211_txq *txq,
+				u32 tx_airtime, u32 rx_airtime)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
+	struct ieee80211_local *local = sdata->local;
+	u64 weight_sum, weight_sum_reciprocal;
+	struct airtime_sched_info *air_sched;
+	struct airtime_info *air_info;
+	u32 airtime = 0;
+
+	air_sched = &local->airtime[txq->ac];
+	air_info = to_airtime_info(txq);
+
+	if (local->airtime_flags & AIRTIME_USE_TX)
+		airtime += tx_airtime;
+	if (local->airtime_flags & AIRTIME_USE_RX)
+		airtime += rx_airtime;
+
+	/* Weights scale so the unit weight is 256 */
+	airtime <<= 8;
+
+	spin_lock_bh(&air_sched->lock);
+
+	air_info->tx_airtime += tx_airtime;
+	air_info->rx_airtime += rx_airtime;
+
+	if (air_sched->weight_sum) {
+		weight_sum = air_sched->weight_sum;
+		weight_sum_reciprocal = air_sched->weight_sum_reciprocal;
+	} else {
+		weight_sum = air_info->weight;
+		weight_sum_reciprocal = air_info->weight_reciprocal;
+	}
+
+	/* Round the calculation of global vt */
+	air_sched->v_t += (u64)((airtime + (weight_sum >> 1)) *
+				weight_sum_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_64;
+	air_info->v_t += (u32)((airtime + (air_info->weight >> 1)) *
+			       air_info->weight_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_32;
+	ieee80211_resort_txq(&local->hw, txq);
+
+	spin_unlock_bh(&air_sched->lock);
+}
+
 void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
 				    u32 tx_airtime, u32 rx_airtime)
 {
-	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
-	struct ieee80211_local *local = sta->sdata->local;
-	u8 ac = ieee80211_ac_from_tid(tid);
-	u32 airtime = 0;
+	struct ieee80211_txq *txq = pubsta->txq[tid];
 
-	if (sta->local->airtime_flags & AIRTIME_USE_TX)
-		airtime += tx_airtime;
-	if (sta->local->airtime_flags & AIRTIME_USE_RX)
-		airtime += rx_airtime;
+	if (!txq)
+		return;
 
-	spin_lock_bh(&local->active_txq_lock[ac]);
-	sta->airtime[ac].tx_airtime += tx_airtime;
-	sta->airtime[ac].rx_airtime += rx_airtime;
-	sta->airtime[ac].deficit -= airtime;
-	spin_unlock_bh(&local->active_txq_lock[ac]);
+	ieee80211_register_airtime(txq, tx_airtime, rx_airtime);
 }
 EXPORT_SYMBOL(ieee80211_sta_register_airtime);
 
@@ -2353,7 +2384,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 	}
 
 	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
-		sinfo->airtime_weight = sta->airtime_weight;
+		sinfo->airtime_weight = sta->airtime[0].weight;
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
 	}
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 0333072ebd98..ba2796782008 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -135,18 +135,25 @@ enum ieee80211_agg_stop_reason {
 #define AIRTIME_USE_TX		BIT(0)
 #define AIRTIME_USE_RX		BIT(1)
 
+
 struct airtime_info {
 	u64 rx_airtime;
 	u64 tx_airtime;
-	s64 deficit;
+	u64 v_t;
+	u64 last_scheduled;
+	struct list_head list;
 	atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
 	u32 aql_limit_low;
 	u32 aql_limit_high;
+	u32 weight_reciprocal;
+	u16 weight;
 };
 
 void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
 					  struct sta_info *sta, u8 ac,
 					  u16 tx_airtime, bool tx_completed);
+void ieee80211_register_airtime(struct ieee80211_txq *txq,
+				u32 tx_airtime, u32 rx_airtime);
 
 struct sta_info;
 
@@ -515,7 +522,6 @@ struct ieee80211_fragment_cache {
  * @tid_seq: per-TID sequence numbers for sending to this STA
  * @airtime: per-AC struct airtime_info describing airtime statistics for this
  *	station
- * @airtime_weight: station weight for airtime fairness calculation purposes
  * @ampdu_mlme: A-MPDU state machine state
  * @mesh: mesh STA information
  * @debugfs_dir: debug filesystem directory dentry
@@ -646,7 +652,6 @@ struct sta_info {
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
 	struct airtime_info airtime[IEEE80211_NUM_ACS];
-	u16 airtime_weight;
 
 	/*
 	 * Aggregation information, locked with lock.
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index b6ef96a25eac..bae321ff77f6 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -970,6 +970,25 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
 		if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked)
 			ieee80211_frame_acked(sta, skb);
 
+	} else if (wiphy_ext_feature_isset(local->hw.wiphy,
+					   NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+		struct ieee80211_sub_if_data *sdata;
+		struct ieee80211_txq *txq;
+		u32 airtime;
+
+		/* Account airtime to multicast queue */
+		sdata = ieee80211_sdata_from_skb(local, skb);
+
+		if (sdata && (txq = sdata->vif.txq)) {
+			airtime = info->status.tx_time ?:
+				ieee80211_calc_expected_tx_airtime(hw,
+								   &sdata->vif,
+								   NULL,
+								   skb->len,
+								   false);
+
+			ieee80211_register_airtime(txq, airtime, 0);
+		}
 	}
 
 	/* SNMP counters
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index caa7caa89ab9..e96981144358 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -18,6 +18,7 @@
 #include <linux/bitmap.h>
 #include <linux/rcupdate.h>
 #include <linux/export.h>
+#include <linux/timekeeping.h>
 #include <net/net_namespace.h>
 #include <net/ieee80211_radiotap.h>
 #include <net/cfg80211.h>
@@ -1449,7 +1450,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 	codel_vars_init(&txqi->def_cvars);
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
-	INIT_LIST_HEAD(&txqi->schedule_order);
+	RB_CLEAR_NODE(&txqi->schedule_order);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -1493,9 +1494,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
 	spin_unlock_bh(&fq->lock);
 
-	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
-	list_del_init(&txqi->schedule_order);
-	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	ieee80211_unschedule_txq(&local->hw, &txqi->txq, true);
 }
 
 void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -3783,102 +3782,259 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
 struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct airtime_sched_info *air_sched;
+	u64 now = ktime_get_boottime_ns();
 	struct ieee80211_txq *ret = NULL;
-	struct txq_info *txqi = NULL, *head = NULL;
-	bool found_eligible_txq = false;
+	struct airtime_info *air_info;
+	struct txq_info *txqi = NULL;
+	struct rb_node *node;
+	bool first = false;
 
-	spin_lock_bh(&local->active_txq_lock[ac]);
+	air_sched = &local->airtime[ac];
+	spin_lock_bh(&air_sched->lock);
 
- begin:
-	txqi = list_first_entry_or_null(&local->active_txqs[ac],
-					struct txq_info,
-					schedule_order);
-	if (!txqi)
-		goto out;
+	node = air_sched->schedule_pos;
 
-	if (txqi == head) {
-		if (!found_eligible_txq)
-			goto out;
-		else
-			found_eligible_txq = false;
+begin:
+	if (!node) {
+		node = rb_first_cached(&air_sched->active_txqs);
+		first = true;
+	} else {
+		node = rb_next(node);
 	}
 
-	if (!head)
-		head = txqi;
-
-	if (txqi->txq.sta) {
-		struct sta_info *sta = container_of(txqi->txq.sta,
-						    struct sta_info, sta);
-		bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
-		s64 deficit = sta->airtime[txqi->txq.ac].deficit;
-
-		if (aql_check)
-			found_eligible_txq = true;
-
-		if (deficit < 0)
-			sta->airtime[txqi->txq.ac].deficit +=
-				sta->airtime_weight;
-
-		if (deficit < 0 || !aql_check) {
-			list_move_tail(&txqi->schedule_order,
-				       &local->active_txqs[txqi->txq.ac]);
-			goto begin;
-		}
-	}
-
-
-	if (txqi->schedule_round == local->schedule_round[ac])
+	if (!node)
 		goto out;
 
-	list_del_init(&txqi->schedule_order);
-	txqi->schedule_round = local->schedule_round[ac];
+	txqi = container_of(node, struct txq_info, schedule_order);
+	air_info = to_airtime_info(&txqi->txq);
+
+	if (air_info->v_t > air_sched->v_t &&
+	    (!first || !airtime_catchup_v_t(air_sched, air_info->v_t, now)))
+		goto out;
+
+	if (!ieee80211_txq_airtime_check(hw, &txqi->txq)) {
+		first = false;
+		goto begin;
+	}
+
+	air_sched->schedule_pos = node;
+	air_sched->last_schedule_activity = now;
 	ret = &txqi->txq;
-
 out:
-	spin_unlock_bh(&local->active_txq_lock[ac]);
+	spin_unlock_bh(&air_sched->lock);
 	return ret;
 }
 EXPORT_SYMBOL(ieee80211_next_txq);
 
-void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+static void __ieee80211_insert_txq(struct rb_root_cached *root,
+				   struct txq_info *txqi)
+{
+	struct rb_node **new = &root->rb_root.rb_node;
+	struct airtime_info *old_air, *new_air;
+	struct rb_node *parent = NULL;
+	struct txq_info *__txqi;
+	bool leftmost = true;
+
+	while (*new) {
+		parent = *new;
+		__txqi = rb_entry(parent, struct txq_info, schedule_order);
+		old_air = to_airtime_info(&__txqi->txq);
+		new_air = to_airtime_info(&txqi->txq);
+
+		if (new_air->v_t <= old_air->v_t) {
+			new = &parent->rb_left;
+		} else {
+			new = &parent->rb_right;
+			leftmost = false;
+		}
+	}
+
+	rb_link_node(&txqi->schedule_order, parent, new);
+	rb_insert_color_cached(&txqi->schedule_order, root, leftmost);
+}
+
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq)
+{
+	struct airtime_info *air_info = to_airtime_info(txq);
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+	struct airtime_sched_info *air_sched;
+
+	air_sched = &local->airtime[txq->ac];
+
+	lockdep_assert_held(&air_sched->lock);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
+		struct airtime_info *a_prev = NULL, *a_next = NULL;
+		struct txq_info *t_prev, *t_next;
+		struct rb_node *n_prev, *n_next;
+
+		/* Erasing a node can cause an expensive rebalancing operation,
+		 * so we check the previous and next nodes first and only remove
+		 * and re-insert if the current node is not already in the
+		 * correct position.
+		 */
+		if ((n_prev = rb_prev(&txqi->schedule_order)) != NULL) {
+			t_prev = container_of(n_prev, struct txq_info,
+					      schedule_order);
+			a_prev = to_airtime_info(&t_prev->txq);
+		}
+
+		if ((n_next = rb_next(&txqi->schedule_order)) != NULL) {
+			t_next = container_of(n_next, struct txq_info,
+					      schedule_order);
+			a_next = to_airtime_info(&t_next->txq);
+		}
+
+		if ((!a_prev || a_prev->v_t <= air_info->v_t) &&
+		    (!a_next || a_next->v_t > air_info->v_t))
+			return;
+
+		if (air_sched->schedule_pos == &txqi->schedule_order)
+			air_sched->schedule_pos = n_prev;
+
+		rb_erase_cached(&txqi->schedule_order,
+				&air_sched->active_txqs);
+		RB_CLEAR_NODE(&txqi->schedule_order);
+		__ieee80211_insert_txq(&air_sched->active_txqs, txqi);
+	}
+}
+
+void ieee80211_update_airtime_weight(struct ieee80211_local *local,
+				     struct airtime_sched_info *air_sched,
+				     u64 now, bool force)
+{
+	struct airtime_info *air_info, *tmp;
+	u64 weight_sum = 0;
+
+	if (unlikely(!now))
+		now = ktime_get_boottime_ns();
+
+	lockdep_assert_held(&air_sched->lock);
+
+	if (!force && (air_sched->last_weight_update <
+		       now - AIRTIME_ACTIVE_DURATION))
+		return;
+
+	list_for_each_entry_safe(air_info, tmp,
+				 &air_sched->active_list, list) {
+		if (airtime_is_active(air_info, now))
+			weight_sum += air_info->weight;
+		else
+			list_del_init(&air_info->list);
+	}
+	airtime_weight_sum_set(air_sched, weight_sum);
+	air_sched->last_weight_update = now;
+}
+
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq)
+	__acquires(txq_lock) __releases(txq_lock)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+	struct airtime_sched_info *air_sched;
+	u64 now = ktime_get_boottime_ns();
+	struct airtime_info *air_info;
+	u8 ac = txq->ac;
+	bool was_active;
+
+	air_sched = &local->airtime[ac];
+	air_info = to_airtime_info(txq);
+
+	spin_lock_bh(&air_sched->lock);
+	was_active = airtime_is_active(air_info, now);
+	airtime_set_active(air_sched, air_info, now);
+
+	if (!RB_EMPTY_NODE(&txqi->schedule_order))
+		goto out;
+
+	/* If the station has been inactive for a while, catch up its v_t so it
+	 * doesn't get indefinite priority; see comment above the definition of
+	 * AIRTIME_MAX_BEHIND.
+	 */
+	if ((!was_active && air_info->v_t < air_sched->v_t) ||
+	    air_info->v_t < air_sched->v_t - AIRTIME_MAX_BEHIND)
+		air_info->v_t = air_sched->v_t;
+
+	ieee80211_update_airtime_weight(local, air_sched, now, !was_active);
+	__ieee80211_insert_txq(&air_sched->active_txqs, txqi);
+
+out:
+	spin_unlock_bh(&air_sched->lock);
+}
+EXPORT_SYMBOL(ieee80211_schedule_txq);
+
+static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+				       struct ieee80211_txq *txq,
+				       bool purge)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+	struct airtime_sched_info *air_sched;
+	struct airtime_info *air_info;
+
+	air_sched = &local->airtime[txq->ac];
+	air_info = to_airtime_info(&txqi->txq);
+
+	lockdep_assert_held(&air_sched->lock);
+
+	if (purge) {
+		list_del_init(&air_info->list);
+		ieee80211_update_airtime_weight(local, air_sched, 0, true);
+	}
+
+	if (RB_EMPTY_NODE(&txqi->schedule_order))
+		return;
+
+	if (air_sched->schedule_pos == &txqi->schedule_order)
+		air_sched->schedule_pos = rb_prev(&txqi->schedule_order);
+
+	if (!purge)
+		airtime_set_active(air_sched, air_info,
+				   ktime_get_boottime_ns());
+
+	rb_erase_cached(&txqi->schedule_order,
+			&air_sched->active_txqs);
+	RB_CLEAR_NODE(&txqi->schedule_order);
+}
+
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
 			      struct ieee80211_txq *txq,
-			      bool force)
+			      bool purge)
+	__acquires(txq_lock) __releases(txq_lock)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	spin_lock_bh(&local->airtime[txq->ac].lock);
+	__ieee80211_unschedule_txq(hw, txq, purge);
+	spin_unlock_bh(&local->airtime[txq->ac].lock);
+}
+
+void ieee80211_return_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq, bool force)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct txq_info *txqi = to_txq_info(txq);
 
-	spin_lock_bh(&local->active_txq_lock[txq->ac]);
+	spin_lock_bh(&local->airtime[txq->ac].lock);
 
-	if (list_empty(&txqi->schedule_order) &&
-	    (force || !skb_queue_empty(&txqi->frags) ||
-	     txqi->tin.backlog_packets)) {
-		/* If airtime accounting is active, always enqueue STAs at the
-		 * head of the list to ensure that they only get moved to the
-		 * back by the airtime DRR scheduler once they have a negative
-		 * deficit. A station that already has a negative deficit will
-		 * get immediately moved to the back of the list on the next
-		 * call to ieee80211_next_txq().
-		 */
-		if (txqi->txq.sta && local->airtime_flags &&
-		    wiphy_ext_feature_isset(local->hw.wiphy,
-					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
-			list_add(&txqi->schedule_order,
-				 &local->active_txqs[txq->ac]);
-		else
-			list_add_tail(&txqi->schedule_order,
-				      &local->active_txqs[txq->ac]);
-	}
+	if (!RB_EMPTY_NODE(&txqi->schedule_order) && !force &&
+	    !txq_has_queue(txq))
+		__ieee80211_unschedule_txq(hw, txq, false);
 
-	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
+	spin_unlock_bh(&local->airtime[txq->ac].lock);
 }
-EXPORT_SYMBOL(__ieee80211_schedule_txq);
+EXPORT_SYMBOL(ieee80211_return_txq);
 
 DEFINE_STATIC_KEY_FALSE(aql_disable);
 
 bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
 				 struct ieee80211_txq *txq)
 {
-	struct sta_info *sta;
+	struct airtime_info *air_info = to_airtime_info(txq);
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
@@ -3893,15 +4049,12 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
 	if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
 		return true;
 
-	sta = container_of(txq->sta, struct sta_info, sta);
-	if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
-	    sta->airtime[txq->ac].aql_limit_low)
+	if (atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_low)
 		return true;
 
 	if (atomic_read(&local->aql_total_pending_airtime) <
 	    local->aql_threshold &&
-	    atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
-	    sta->airtime[txq->ac].aql_limit_high)
+	    atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_high)
 		return true;
 
 	return false;
@@ -3911,60 +4064,59 @@ EXPORT_SYMBOL(ieee80211_txq_airtime_check);
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
+	struct txq_info *first_txqi = NULL, *txqi = to_txq_info(txq);
 	struct ieee80211_local *local = hw_to_local(hw);
-	struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
-	struct sta_info *sta;
-	u8 ac = txq->ac;
+	struct airtime_sched_info *air_sched;
+	struct airtime_info *air_info;
+	struct rb_node *node = NULL;
+	bool ret = false;
+	u64 now;
 
-	spin_lock_bh(&local->active_txq_lock[ac]);
 
-	if (!txqi->txq.sta)
+	if (!ieee80211_txq_airtime_check(hw, txq))
+		return false;
+
+	air_sched = &local->airtime[txq->ac];
+	spin_lock_bh(&air_sched->lock);
+
+	if (RB_EMPTY_NODE(&txqi->schedule_order))
 		goto out;
 
-	if (list_empty(&txqi->schedule_order))
-		goto out;
+	now = ktime_get_boottime_ns();
 
-	list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
-				 schedule_order) {
-		if (iter == txqi)
-			break;
+	/* Like in ieee80211_next_txq(), make sure the first station in the
+	 * scheduling order is eligible for transmission to avoid starvation.
+	 */
+	node = rb_first_cached(&air_sched->active_txqs);
+	if (node) {
+		first_txqi = container_of(node, struct txq_info,
+					  schedule_order);
+		air_info = to_airtime_info(&first_txqi->txq);
 
-		if (!iter->txq.sta) {
-			list_move_tail(&iter->schedule_order,
-				       &local->active_txqs[ac]);
-			continue;
-		}
-		sta = container_of(iter->txq.sta, struct sta_info, sta);
-		if (sta->airtime[ac].deficit < 0)
-			sta->airtime[ac].deficit += sta->airtime_weight;
-		list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
+		if (air_sched->v_t < air_info->v_t)
+			airtime_catchup_v_t(air_sched, air_info->v_t, now);
 	}
 
-	sta = container_of(txqi->txq.sta, struct sta_info, sta);
-	if (sta->airtime[ac].deficit >= 0)
-		goto out;
+	air_info = to_airtime_info(&txqi->txq);
+	if (air_info->v_t <= air_sched->v_t) {
+		air_sched->last_schedule_activity = now;
+		ret = true;
+	}
 
-	sta->airtime[ac].deficit += sta->airtime_weight;
-	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
-	spin_unlock_bh(&local->active_txq_lock[ac]);
-
-	return false;
 out:
-	if (!list_empty(&txqi->schedule_order))
-		list_del_init(&txqi->schedule_order);
-	spin_unlock_bh(&local->active_txq_lock[ac]);
-
-	return true;
+	spin_unlock_bh(&air_sched->lock);
+	return ret;
 }
 EXPORT_SYMBOL(ieee80211_txq_may_transmit);
 
 void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct airtime_sched_info *air_sched = &local->airtime[ac];
 
-	spin_lock_bh(&local->active_txq_lock[ac]);
-	local->schedule_round[ac]++;
-	spin_unlock_bh(&local->active_txq_lock[ac]);
+	spin_lock_bh(&air_sched->lock);
+	air_sched->schedule_pos = NULL;
+	spin_unlock_bh(&air_sched->lock);
 }
 EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 

From f37ccf8fce155d08ae2a4fb3db677911ced0c21a Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 22 Jun 2021 18:38:01 -0700
Subject: [PATCH 2727/3292] ACPI: bgrt: Fix CFI violation

clang's Control Flow Integrity requires that every indirect call has a
valid target, which is based on the type of the function pointer. The
*_show() functions in this file are written as if they will be called
from dev_attr_show(); however, they will be called from
sysfs_kf_seq_show() because the files were created by
sysfs_create_group() and the sysfs ops are based on kobj_sysfs_ops
because of kobject_add_and_create(). Because the *_show() functions do
not match the type of the show() member in struct kobj_attribute, there
is a CFI violation.

$ cat /sys/firmware/acpi/bgrt/{status,type,version,{x,y}offset}}
1
0
1
522
307

$ dmesg | grep "CFI failure"
[  267.761825] CFI failure (target: type_show.d5e1ad21498a5fd14edbc5c320906598.cfi_jt+0x0/0x8):
[  267.762246] CFI failure (target: xoffset_show.d5e1ad21498a5fd14edbc5c320906598.cfi_jt+0x0/0x8):
[  267.762584] CFI failure (target: status_show.d5e1ad21498a5fd14edbc5c320906598.cfi_jt+0x0/0x8):
[  267.762973] CFI failure (target: yoffset_show.d5e1ad21498a5fd14edbc5c320906598.cfi_jt+0x0/0x8):
[  267.763330] CFI failure (target: version_show.d5e1ad21498a5fd14edbc5c320906598.cfi_jt+0x0/0x8):

Convert these functions to the type of the show() member in struct
kobj_attribute so that there is no more CFI violation. Because these
functions are all so similar, combine them into a macro.

Fixes: d1ff4b1cdbab ("ACPI: Add support for exposing BGRT data")
Link: https://github.com/ClangBuiltLinux/linux/issues/1406
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bgrt.c | 55 ++++++++++++++-------------------------------
 1 file changed, 17 insertions(+), 38 deletions(-)

diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c
index 19bb7f870204..e0d14017706e 100644
--- a/drivers/acpi/bgrt.c
+++ b/drivers/acpi/bgrt.c
@@ -15,40 +15,19 @@
 static void *bgrt_image;
 static struct kobject *bgrt_kobj;
 
-static ssize_t version_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.version);
-}
-static DEVICE_ATTR_RO(version);
+#define BGRT_SHOW(_name, _member) \
+	static ssize_t _name##_show(struct kobject *kobj,			\
+				    struct kobj_attribute *attr, char *buf)	\
+	{									\
+		return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab._member);	\
+	}									\
+	struct kobj_attribute bgrt_attr_##_name = __ATTR_RO(_name)
 
-static ssize_t status_show(struct device *dev,
-			   struct device_attribute *attr, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.status);
-}
-static DEVICE_ATTR_RO(status);
-
-static ssize_t type_show(struct device *dev,
-			 struct device_attribute *attr, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.image_type);
-}
-static DEVICE_ATTR_RO(type);
-
-static ssize_t xoffset_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.image_offset_x);
-}
-static DEVICE_ATTR_RO(xoffset);
-
-static ssize_t yoffset_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.image_offset_y);
-}
-static DEVICE_ATTR_RO(yoffset);
+BGRT_SHOW(version, version);
+BGRT_SHOW(status, status);
+BGRT_SHOW(type, image_type);
+BGRT_SHOW(xoffset, image_offset_x);
+BGRT_SHOW(yoffset, image_offset_y);
 
 static ssize_t image_read(struct file *file, struct kobject *kobj,
 	       struct bin_attribute *attr, char *buf, loff_t off, size_t count)
@@ -60,11 +39,11 @@ static ssize_t image_read(struct file *file, struct kobject *kobj,
 static BIN_ATTR_RO(image, 0);	/* size gets filled in later */
 
 static struct attribute *bgrt_attributes[] = {
-	&dev_attr_version.attr,
-	&dev_attr_status.attr,
-	&dev_attr_type.attr,
-	&dev_attr_xoffset.attr,
-	&dev_attr_yoffset.attr,
+	&bgrt_attr_version.attr,
+	&bgrt_attr_status.attr,
+	&bgrt_attr_type.attr,
+	&bgrt_attr_xoffset.attr,
+	&bgrt_attr_yoffset.attr,
 	NULL,
 };
 

From 6554ca9cc8c7502895f9c20b3e5e9d81c5edc986 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 22 Jun 2021 18:38:02 -0700
Subject: [PATCH 2728/3292] ACPI: bgrt: Use sysfs_emit

sysfs_emit is preferred to snprintf for emitting values after
commit 2efc459d06f1 ("sysfs: Add sysfs_emit and sysfs_emit_at to format
sysfs output").

Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bgrt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c
index e0d14017706e..02d208732f9a 100644
--- a/drivers/acpi/bgrt.c
+++ b/drivers/acpi/bgrt.c
@@ -19,7 +19,7 @@ static struct kobject *bgrt_kobj;
 	static ssize_t _name##_show(struct kobject *kobj,			\
 				    struct kobj_attribute *attr, char *buf)	\
 	{									\
-		return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab._member);	\
+		return sysfs_emit(buf, "%d\n", bgrt_tab._member);		\
 	}									\
 	struct kobj_attribute bgrt_attr_##_name = __ATTR_RO(_name)
 

From d1059c1b1146870c52f3dac12cb7b6cbf39ed27f Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 21 Jun 2021 16:24:33 +0100
Subject: [PATCH 2729/3292] ACPI: tables: Add custom DSDT file as makefile
 prerequisite

A custom DSDT file is mostly used during development or debugging,
and in that case it is quite likely to want to rebuild the kernel
after changing ONLY the content of the DSDT.

This patch adds the custom DSDT as a prerequisite to tables.o
to ensure a rebuild if the DSDT file is updated. Make will merge
the prerequisites from multiple rules for the same target.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 700b41adf2db..9aa82d527272 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -8,6 +8,11 @@ ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
 #
 # ACPI Boot-Time Table Parsing
 #
+ifeq ($(CONFIG_ACPI_CUSTOM_DSDT),y)
+tables.o: $(src)/../../include/$(subst $\",,$(CONFIG_ACPI_CUSTOM_DSDT_FILE)) ;
+
+endif
+
 obj-$(CONFIG_ACPI)		+= tables.o
 obj-$(CONFIG_X86)		+= blacklist.o
 

From f7599be2bb7694d94b65a57a74aba75f2c101c28 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 22 Jun 2021 14:39:39 -0700
Subject: [PATCH 2730/3292] ACPI: PM: postpone bringing devices to D0 unless we
 need them

Currently ACPI power domain brings devices into D0 state in the "resume
early" phase. Normally this does not cause any issues, as powering up
happens quickly. However there are peripherals that have certain timing
requirements for powering on, for example some models of Elan
touchscreens need 300msec after powering up/releasing reset line before
they can accept commands from the host. Such devices will dominate
the time spent in early resume phase and cause increase in overall
resume time as we wait for early resume to complete before we can
proceed to the normal resume stage.

There are ways for a driver to indicate that it can tolerate device
being in the low power mode and that it knows how to power the device
back up when resuming, bit that requires changes to individual drivers
that may not really care about details of ACPI controlled power
management.

This change attempts to solve this issue at ACPI power domain level, by
postponing powering up device until we get to the normal resume stage,
unless there is early resume handler defined for the device, or device
does not declare any resume handlers, in which case we continue powering
up such devices early. This allows us to shave off several hundred
milliseconds of resume time on affected systems.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 9d2d3b9bb8b5..675a69de516f 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1134,19 +1134,48 @@ static int acpi_subsys_resume_noirq(struct device *dev)
  *
  * Use ACPI to put the given device into the full-power state and carry out the
  * generic early resume procedure for it during system transition into the
- * working state.
+ * working state, but only do that if device either defines early resume
+ * handler, or does not define power operations at all. Otherwise powering up
+ * of the device is postponed to the normal resume phase.
  */
 static int acpi_subsys_resume_early(struct device *dev)
 {
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int ret;
 
 	if (dev_pm_skip_resume(dev))
 		return 0;
 
+	if (pm && !pm->resume_early) {
+		dev_dbg(dev, "postponing D0 transition to normal resume stage\n");
+		return 0;
+	}
+
 	ret = acpi_dev_resume(dev);
 	return ret ? ret : pm_generic_resume_early(dev);
 }
 
+/**
+ * acpi_subsys_resume - Resume device using ACPI.
+ * @dev: Device to Resume.
+ *
+ * Use ACPI to put the given device into the full-power state if it has not been
+ * powered up during early resume phase, and carry out the generic resume
+ * procedure for it during system transition into the working state.
+ */
+static int acpi_subsys_resume(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int ret = 0;
+
+	if (!dev_pm_skip_resume(dev) && pm && !pm->resume_early) {
+		dev_dbg(dev, "executing postponed D0 transition\n");
+		ret = acpi_dev_resume(dev);
+	}
+
+	return ret ? ret : pm_generic_resume(dev);
+}
+
 /**
  * acpi_subsys_freeze - Run the device driver's freeze callback.
  * @dev: Device to handle.
@@ -1240,6 +1269,7 @@ static struct dev_pm_domain acpi_general_pm_domain = {
 		.prepare = acpi_subsys_prepare,
 		.complete = acpi_subsys_complete,
 		.suspend = acpi_subsys_suspend,
+		.resume = acpi_subsys_resume,
 		.suspend_late = acpi_subsys_suspend_late,
 		.suspend_noirq = acpi_subsys_suspend_noirq,
 		.resume_noirq = acpi_subsys_resume_noirq,

From 3b7180573c250eb6e2a7eec54ae91f27472332ea Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Jun 2021 21:11:39 +0200
Subject: [PATCH 2731/3292] cpufreq: Make cpufreq_online() call
 driver->offline() on errors

In the CPU removal path the ->offline() callback provided by the
driver is always invoked before ->exit(), but in the cpufreq_online()
error path it is not, so ->exit() is expected to somehow know the
context in which it has been called and act accordingly.

That is less than straightforward, so make cpufreq_online() invoke
the driver's ->offline() callback, if present, on errors before
->exit() too.

This only potentially affects intel_pstate.

Fixes: 91a12e91dc39 ("cpufreq: Allow light-weight tear down and bring up of CPUs")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 802abc925b2a..cbab834c37a0 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1367,9 +1367,14 @@ static int cpufreq_online(unsigned int cpu)
 			goto out_free_policy;
 		}
 
+		/*
+		 * The initialization has succeeded and the policy is online.
+		 * If there is a problem with its frequency table, take it
+		 * offline and drop it.
+		 */
 		ret = cpufreq_table_validate_and_sort(policy);
 		if (ret)
-			goto out_exit_policy;
+			goto out_offline_policy;
 
 		/* related_cpus should at least include policy->cpus. */
 		cpumask_copy(policy->related_cpus, policy->cpus);
@@ -1515,6 +1520,10 @@ out_destroy_policy:
 
 	up_write(&policy->rwsem);
 
+out_offline_policy:
+	if (cpufreq_driver->offline)
+		cpufreq_driver->offline(policy);
+
 out_exit_policy:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);

From 761025b51c540ae1fc9516b5dafa55cd109e4871 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 12 May 2021 00:15:48 +0300
Subject: [PATCH 2732/3292] cfg80211: Add wiphy_info_once()

Add wiphy_info_once() helper that prints info message only once.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511211549.30571-1-digetx@gmail.com
---
 include/net/cfg80211.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 58c2cd417e89..1e0bf249b601 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8154,6 +8154,8 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
 	dev_notice(&(wiphy)->dev, format, ##args)
 #define wiphy_info(wiphy, format, args...)			\
 	dev_info(&(wiphy)->dev, format, ##args)
+#define wiphy_info_once(wiphy, format, args...)			\
+	dev_info_once(&(wiphy)->dev, format, ##args)
 
 #define wiphy_err_ratelimited(wiphy, format, args...)		\
 	dev_err_ratelimited(&(wiphy)->dev, format, ##args)

From 78f0a64f66d4f582987bbe45433374b61c21500f Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 12 May 2021 00:15:49 +0300
Subject: [PATCH 2733/3292] brcmfmac: Silence error messages about unsupported
 firmware features

KMSG is flooded with error messages about unsupported firmware
features of BCM4329 chip. The GET_ASSOCLIST error became especially
noisy with a newer NetworkManager version of Ubuntu 21.04. Turn the
noisy error messages into info messages and print them out only once.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210511211549.30571-2-digetx@gmail.com
---
 .../broadcom/brcm80211/brcmfmac/cfg80211.c       | 16 +++++++++++++---
 .../wireless/broadcom/brcm80211/brcmfmac/core.c  | 11 ++++++++---
 .../wireless/broadcom/brcm80211/brcmfmac/debug.h |  4 ++++
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 65fb038d88e7..cedba56fc448 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -2895,8 +2895,13 @@ brcmf_cfg80211_dump_station(struct wiphy *wiphy, struct net_device *ndev,
 					     &cfg->assoclist,
 					     sizeof(cfg->assoclist));
 		if (err) {
-			bphy_err(drvr, "BRCMF_C_GET_ASSOCLIST unsupported, err=%d\n",
-				 err);
+			/* GET_ASSOCLIST unsupported by firmware of older chips */
+			if (err == -EBADE)
+				bphy_info_once(drvr, "BRCMF_C_GET_ASSOCLIST unsupported\n");
+			else
+				bphy_err(drvr, "BRCMF_C_GET_ASSOCLIST failed, err=%d\n",
+					 err);
+
 			cfg->assoclist.count = 0;
 			return -EOPNOTSUPP;
 		}
@@ -6851,7 +6856,12 @@ static int brcmf_setup_wiphybands(struct brcmf_cfg80211_info *cfg)
 
 	err = brcmf_fil_iovar_int_get(ifp, "rxchain", &rxchain);
 	if (err) {
-		bphy_err(drvr, "rxchain error (%d)\n", err);
+		/* rxchain unsupported by firmware of older chips */
+		if (err == -EBADE)
+			bphy_info_once(drvr, "rxchain unsupported\n");
+		else
+			bphy_err(drvr, "rxchain error (%d)\n", err);
+
 		nchain = 1;
 	} else {
 		for (nchain = 0; rxchain; nchain++)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index cee1682d2333..db5f8535fdb5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -188,9 +188,14 @@ static void _brcmf_set_multicast_list(struct work_struct *work)
 	/*Finally, pick up the PROMISC flag */
 	cmd_value = (ndev->flags & IFF_PROMISC) ? true : false;
 	err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_PROMISC, cmd_value);
-	if (err < 0)
-		bphy_err(drvr, "Setting BRCMF_C_SET_PROMISC failed, %d\n",
-			 err);
+	if (err < 0) {
+		/* PROMISC unsupported by firmware of older chips */
+		if (err == -EBADE)
+			bphy_info_once(drvr, "BRCMF_C_SET_PROMISC unsupported\n");
+		else
+			bphy_err(drvr, "Setting BRCMF_C_SET_PROMISC failed, err=%d\n",
+				 err);
+	}
 	brcmf_configure_arp_nd_offload(ifp, !cmd_value);
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
index 44ba6f389fa9..9bb5f709d41a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
@@ -60,6 +60,10 @@ void __brcmf_err(struct brcmf_bus *bus, const char *func, const char *fmt, ...);
 				  ##__VA_ARGS__);			\
 	} while (0)
 
+#define bphy_info_once(drvr, fmt, ...)					\
+	wiphy_info_once((drvr)->wiphy, "%s: " fmt, __func__,		\
+			##__VA_ARGS__)
+
 #if defined(DEBUG) || defined(CONFIG_BRCM_TRACING)
 
 /* For debug/tracing purposes treat info messages as errors */

From 1689b0b554defd5a7863bbd434ae911b8869504a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 22 Jun 2021 13:59:55 +0300
Subject: [PATCH 2734/3292] cifs: fix NULL dereference in smb2_check_message()

This code sets "ses" to NULL which will lead to a NULL dereference on
the second iteration through the loop.

Fixes: 85346c17e425 ("cifs: convert list_for_each to entry variant in smb2misc.c")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2misc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index c6bb2ea1983b..668f77108831 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -158,11 +158,10 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
 		list_for_each_entry(ses, &srvr->smb_ses_list, smb_ses_list) {
 			if (ses->Suid == thdr->SessionId)
 				break;
-
-			ses = NULL;
 		}
 		spin_unlock(&cifs_tcp_ses_lock);
-		if (ses == NULL) {
+		if (list_entry_is_head(ses, &srvr->smb_ses_list,
+				       smb_ses_list)) {
 			cifs_dbg(VFS, "no decryption - session id not found\n");
 			return 1;
 		}

From 2438c0bd598b5547333e0e3c1581da078569f130 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 22 Jun 2021 13:56:20 -0500
Subject: [PATCH 2735/3292] SMB3: Add new info level for query directory

The recently updated MS-SMB2 (June 2021) added protocol definitions
for a new level 60 for query directory (FileIdExtdDirectoryInformation).

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/readdir.c |  2 +-
 fs/cifs/smb2pdu.h | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 00b6b953d13c..bfee176b901d 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -309,7 +309,7 @@ static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
 {
 	__dir_info_to_fattr(fattr, info);
 
-	/* See MS-FSCC 2.4.18 FileIdFullDirectoryInformation */
+	/* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */
 	if (fattr->cf_cifsattrs & ATTR_REPARSE)
 		fattr->cf_cifstag = le32_to_cpu(info->EaSize);
 	cifs_fill_common_info(fattr, cifs_sb);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 653486243cdf..a5c48b85549a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -264,7 +264,7 @@ struct share_redirect_error_context_rsp {
 	__le32 NotificationType;
 	__le32 ResourceNameOffset;
 	__le32 ResourceNameLength;
-	__le16 Flags;
+	__le16 Reserved;
 	__le16 TargetType;
 	__le32 IPAddrCount;
 	struct move_dst_ipaddr IpAddrMoveList[];
@@ -1448,6 +1448,22 @@ struct smb2_echo_rsp {
 
 #define SMB2_QUERY_DIRECTORY_IOV_SIZE 2
 
+/*
+ * Valid FileInformation classes.
+ *
+ * Note that these are a subset of the (file) QUERY_INFO levels defined
+ * later in this file (but since QUERY_DIRECTORY uses equivalent numbers
+ * we do not redefine them here)
+ *
+ * FileDirectoryInfomation		0x01
+ * FileFullDirectoryInformation		0x02
+ * FileIdFullDirectoryInformation	0x26
+ * FileBothDirectoryInformation		0x03
+ * FileIdBothDirectoryInformation	0x25
+ * FileNamesInformation			0x0C
+ * FileIdExtdDirectoryInformation	0x3C
+ */
+
 struct smb2_query_directory_req {
 	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 33 */
@@ -1684,6 +1700,7 @@ struct smb3_fs_vol_info {
 #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50
 #define FILE_STANDARD_LINK_INFORMATION	54
 #define FILE_ID_INFORMATION		59
+#define FILE_ID_EXTD_DIRECTORY_INFORMATION 60
 
 struct smb2_file_internal_info {
 	__le64 IndexNumber;
@@ -1764,13 +1781,31 @@ struct smb2_file_network_open_info {
 	__le32 Reserved;
 } __packed; /* level 34 Query also similar returned in close rsp and open rsp */
 
-/* See MS-FSCC 2.4.43 */
+/* See MS-FSCC 2.4.21 */
 struct smb2_file_id_information {
 	__le64	VolumeSerialNumber;
 	__u64  PersistentFileId; /* opaque endianness */
 	__u64  VolatileFileId; /* opaque endianness */
 } __packed; /* level 59 */
 
+/* See MS-FSCC 2.4.18 */
+struct smb2_file_id_extd_directory_info {
+	__le32 NextEntryOffset;
+	__u32 FileIndex;
+	__le64 CreationTime;
+	__le64 LastAccessTime;
+	__le64 LastWriteTime;
+	__le64 ChangeTime;
+	__le64 EndOfFile;
+	__le64 AllocationSize;
+	__le32 FileAttributes;
+	__le32 FileNameLength;
+	__le32 EaSize; /* EA size */
+	__le32 ReparsePointTag; /* valid if FILE_ATTR_REPARSE_POINT set in FileAttributes */
+	__le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit */
+	char FileName[1];
+} __packed; /* level 60 */
+
 extern char smb2_padding[7];
 
 /* equivalent of the contents of SMB3.1.1 POSIX open context response */

From 4c4a525ae590623eb3ba667151194a024e643ffd Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 22 Jun 2021 14:07:36 -0500
Subject: [PATCH 2736/3292] cifs: remove two cases where rc is set
 unnecessarily in sid_to_id

In both these cases sid_to_id unconditionally returned success, and
used the default uid/gid for the mount, so setting rc is confusing
and simply gets overwritten (set to 0) later in the function.

Addresses-Coverity: 1491672 ("Unused value")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsacl.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 3898a9e6d3c6..5ec5d9d24032 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -397,7 +397,6 @@ try_upcall_to_get_id:
 	saved_cred = override_creds(root_cred);
 	sidkey = request_key(&cifs_idmap_key_type, sidstr, "");
 	if (IS_ERR(sidkey)) {
-		rc = -EINVAL;
 		cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n",
 			 __func__, sidstr, sidtype == SIDOWNER ? 'u' : 'g');
 		goto out_revert_creds;
@@ -410,7 +409,6 @@ try_upcall_to_get_id:
 	 */
 	BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
 	if (sidkey->datalen != sizeof(uid_t)) {
-		rc = -EIO;
 		cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu)\n",
 			 __func__, sidkey->datalen);
 		key_invalidate(sidkey);

From dd25296afaf60b5140ddfa9e3d8e5d9df7076754 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 22 Jun 2021 14:49:55 +0800
Subject: [PATCH 2737/3292] net: sched: avoid unnecessary seqcount operation
 for lockless qdisc

qdisc->running seqcount operation is mainly used to do heuristic
locking on q->busylock for locked qdisc, see qdisc_is_running()
and __dev_xmit_skb().

So avoid doing seqcount operation for qdisc with TCQ_F_NOLOCK
flag.

Acked-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com> # flexcan
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1e625519ae96..3ed6bcc4be72 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -188,6 +188,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 
 nolock_empty:
 		WRITE_ONCE(qdisc->empty, false);
+		return true;
 	} else if (qdisc_is_running(qdisc)) {
 		return false;
 	}
@@ -201,7 +202,6 @@ nolock_empty:
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
-	write_seqcount_end(&qdisc->running);
 	if (qdisc->flags & TCQ_F_NOLOCK) {
 		spin_unlock(&qdisc->seqlock);
 
@@ -210,6 +210,8 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
 			clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
 			__netif_schedule(qdisc);
 		}
+	} else {
+		write_seqcount_end(&qdisc->running);
 	}
 }
 

From c4fef01ba4793a85b2d38a472bddd1e3b56d9585 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 22 Jun 2021 14:49:56 +0800
Subject: [PATCH 2738/3292] net: sched: implement TCQ_F_CAN_BYPASS for lockless
 qdisc

Currently pfifo_fast has both TCQ_F_CAN_BYPASS and TCQ_F_NOLOCK
flag set, but queue discipline by-pass does not work for lockless
qdisc because skb is always enqueued to qdisc even when the qdisc
is empty, see __dev_xmit_skb().

This patch calls sch_direct_xmit() to transmit the skb directly
to the driver for empty lockless qdisc, which aviod enqueuing
and dequeuing operation.

As qdisc->empty is not reliable to indicate a empty qdisc because
there is a time window between enqueuing and setting qdisc->empty.
So we use the MISSED state added in commit a90c57f2cedd ("net:
sched: fix packet stuck problem for lockless qdisc"), which
indicate there is lock contention, suggesting that it is better
not to do the qdisc bypass in order to avoid packet out of order
problem.

In order to make MISSED state reliable to indicate a empty qdisc,
we need to ensure that testing and clearing of MISSED state is
within the protection of qdisc->seqlock, only setting MISSED state
can be done without the protection of qdisc->seqlock. A MISSED
state testing is added without the protection of qdisc->seqlock to
aviod doing unnecessary spin_trylock() for contention case.

As the enqueuing is not within the protection of qdisc->seqlock,
there is still a potential data race as mentioned by Jakub [1]:

      thread1               thread2             thread3
qdisc_run_begin() # true
                        qdisc_run_begin(q)
                             set(MISSED)
pfifo_fast_dequeue
  clear(MISSED)
  # recheck the queue
qdisc_run_end()
                            enqueue skb1
                                             qdisc empty # true
                                          qdisc_run_begin() # true
                                          sch_direct_xmit() # skb2
                         qdisc_run_begin()
                            set(MISSED)

When above happens, skb1 enqueued by thread2 is transmited after
skb2 is transmited by thread3 because MISSED state setting and
enqueuing is not under the qdisc->seqlock. If qdisc bypass is
disabled, skb1 has better chance to be transmited quicker than
skb2.

This patch does not take care of the above data race, because we
view this as similar as below:
Even at the same time CPU1 and CPU2 write the skb to two socket
which both heading to the same qdisc, there is no guarantee that
which skb will hit the qdisc first, because there is a lot of
factor like interrupt/softirq/cache miss/scheduling afffecting
that.

There are below cases that need special handling:
1. When MISSED state is cleared before another round of dequeuing
   in pfifo_fast_dequeue(), and __qdisc_run() might not be able to
   dequeue all skb in one round and call __netif_schedule(), which
   might result in a non-empty qdisc without MISSED set. In order
   to avoid this, the MISSED state is set for lockless qdisc and
   __netif_schedule() will be called at the end of qdisc_run_end.

2. The MISSED state also need to be set for lockless qdisc instead
   of calling __netif_schedule() directly when requeuing a skb for
   a similar reason.

3. For netdev queue stopped case, the MISSED case need clearing
   while the netdev queue is stopped, otherwise there may be
   unnecessary __netif_schedule() calling. So a new DRAINING state
   is added to indicate this case, which also indicate a non-empty
   qdisc.

4. As there is already netif_xmit_frozen_or_stopped() checking in
   dequeue_skb() and sch_direct_xmit(), which are both within the
   protection of qdisc->seqlock, but the same checking in
   __dev_xmit_skb() is without the protection, which might cause
   empty indication of a lockless qdisc to be not reliable. So
   remove the checking in __dev_xmit_skb(), and the checking in
   the protection of qdisc->seqlock seems enough to avoid the cpu
   consumption problem for netdev queue stopped case.

1. https://lkml.org/lkml/2021/5/29/215

Acked-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com> # flexcan
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 16 +++++++++++++---
 net/core/dev.c            | 29 ++++++++++++++++++++++++++---
 net/sched/sch_generic.c   | 20 ++++++++++++++++----
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3ed6bcc4be72..177f240d59d4 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -37,8 +37,15 @@ enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
 	__QDISC_STATE_MISSED,
+	__QDISC_STATE_DRAINING,
 };
 
+#define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
+#define QDISC_STATE_DRAINING	BIT(__QDISC_STATE_DRAINING)
+
+#define QDISC_STATE_NON_EMPTY	(QDISC_STATE_MISSED | \
+					QDISC_STATE_DRAINING)
+
 struct qdisc_size_table {
 	struct rcu_head		rcu;
 	struct list_head	list;
@@ -145,6 +152,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
 	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
 }
 
+static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
+{
+	return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
+}
+
 static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
 {
 	return q->flags & TCQ_F_CPUSTATS;
@@ -206,10 +218,8 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
 		spin_unlock(&qdisc->seqlock);
 
 		if (unlikely(test_bit(__QDISC_STATE_MISSED,
-				      &qdisc->state))) {
-			clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+				      &qdisc->state)))
 			__netif_schedule(qdisc);
-		}
 	} else {
 		write_seqcount_end(&qdisc->running);
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index 50531a2d0b20..991d09b67bd9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	qdisc_calculate_pkt_len(skb, q);
 
 	if (q->flags & TCQ_F_NOLOCK) {
-		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
-		if (likely(!netif_xmit_frozen_or_stopped(txq)))
-			qdisc_run(q);
+		if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
+		    qdisc_run_begin(q)) {
+			/* Retest nolock_qdisc_is_empty() within the protection
+			 * of q->seqlock to protect from racing with requeuing.
+			 */
+			if (unlikely(!nolock_qdisc_is_empty(q))) {
+				rc = q->enqueue(skb, q, &to_free) &
+					NET_XMIT_MASK;
+				__qdisc_run(q);
+				qdisc_run_end(q);
 
+				goto no_lock_out;
+			}
+
+			qdisc_bstats_cpu_update(q, skb);
+			if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
+			    !nolock_qdisc_is_empty(q))
+				__qdisc_run(q);
+
+			qdisc_run_end(q);
+			return NET_XMIT_SUCCESS;
+		}
+
+		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+		qdisc_run(q);
+
+no_lock_out:
 		if (unlikely(to_free))
 			kfree_skb_list(to_free);
 		return rc;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e9c0afc8becc..9984ccc45946 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
 	 */
 	if (!netif_xmit_frozen_or_stopped(txq))
 		set_bit(__QDISC_STATE_MISSED, &q->state);
+	else
+		set_bit(__QDISC_STATE_DRAINING, &q->state);
 }
 
 /* Main transmission queue. */
@@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 
 		skb = next;
 	}
-	if (lock)
+
+	if (lock) {
 		spin_unlock(lock);
-	__netif_schedule(q);
+		set_bit(__QDISC_STATE_MISSED, &q->state);
+	} else {
+		__netif_schedule(q);
+	}
 }
 
 static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
 	while (qdisc_restart(q, &packets)) {
 		quota -= packets;
 		if (quota <= 0) {
-			__netif_schedule(q);
+			if (q->flags & TCQ_F_NOLOCK)
+				set_bit(__QDISC_STATE_MISSED, &q->state);
+			else
+				__netif_schedule(q);
+
 			break;
 		}
 	}
@@ -698,13 +708,14 @@ retry:
 	if (likely(skb)) {
 		qdisc_update_stats_at_dequeue(qdisc, skb);
 	} else if (need_retry &&
-		   test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+		   READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
 		/* Delay clearing the STATE_MISSED here to reduce
 		 * the overhead of the second spin_trylock() in
 		 * qdisc_run_begin() and __netif_schedule() calling
 		 * in qdisc_run_end().
 		 */
 		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+		clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
 
 		/* Make sure dequeuing happens after clearing
 		 * STATE_MISSED.
@@ -1222,6 +1233,7 @@ static void dev_reset_queue(struct net_device *dev,
 	spin_unlock_bh(qdisc_lock(qdisc));
 	if (nolock) {
 		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+		clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
 		spin_unlock_bh(&qdisc->seqlock);
 	}
 }

From d3e0f57501bde8a9585aff79afcffd99e6a5d91c Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 22 Jun 2021 14:49:57 +0800
Subject: [PATCH 2739/3292] net: sched: remove qdisc->empty for lockless qdisc

As MISSED and DRAINING state are used to indicate a non-empty
qdisc, qdisc->empty is not longer needed, so remove it.

Acked-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com> # flexcan
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 13 +++----------
 net/sched/sch_generic.c   |  3 ---
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 177f240d59d4..c99ffe9cc88f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -117,8 +117,6 @@ struct Qdisc {
 	spinlock_t		busylock ____cacheline_aligned_in_smp;
 	spinlock_t		seqlock;
 
-	/* for NOLOCK qdisc, true if there are no enqueued skbs */
-	bool			empty;
 	struct rcu_head		rcu;
 
 	/* private data */
@@ -165,7 +163,7 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
 static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
 {
 	if (qdisc_is_percpu_stats(qdisc))
-		return READ_ONCE(qdisc->empty);
+		return nolock_qdisc_is_empty(qdisc);
 	return !READ_ONCE(qdisc->q.qlen);
 }
 
@@ -173,7 +171,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK) {
 		if (spin_trylock(&qdisc->seqlock))
-			goto nolock_empty;
+			return true;
 
 		/* If the MISSED flag is set, it means other thread has
 		 * set the MISSED flag before second spin_trylock(), so
@@ -195,12 +193,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 		/* Retry again in case other CPU may not see the new flag
 		 * after it releases the lock at the end of qdisc_run_end().
 		 */
-		if (!spin_trylock(&qdisc->seqlock))
-			return false;
-
-nolock_empty:
-		WRITE_ONCE(qdisc->empty, false);
-		return true;
+		return spin_trylock(&qdisc->seqlock);
 	} else if (qdisc_is_running(qdisc)) {
 		return false;
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9984ccc45946..d9ac60ffe927 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -725,8 +725,6 @@ retry:
 		need_retry = false;
 
 		goto retry;
-	} else {
-		WRITE_ONCE(qdisc->empty, true);
 	}
 
 	return skb;
@@ -927,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
 	sch->dev_queue = dev_queue;
-	sch->empty = true;
 	dev_hold(dev);
 	refcount_set(&sch->refcnt, 1);
 

From 0a36a75c6818800c67057458376e99b5f77c2a1f Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Tue, 22 Jun 2021 09:46:48 +0200
Subject: [PATCH 2740/3292] selftests: icmp_redirect: support expected failures

According to a comment in commit 99513cfa16c6 ("selftest: Fixes for
icmp_redirect test") the test "IPv6: mtu exception plus redirect" is
expected to fail, because of a bug in the IPv6 logic that hasn't been
fixed yet apparently.

We should probably consider this failure as an "expected failure",
therefore change the script to return XFAIL for that particular test and
also report the total amount of expected failures at the end of the run.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/icmp_redirect.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index bf361f30d6ef..c19ecc6a8614 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -63,10 +63,14 @@ log_test()
 	local rc=$1
 	local expected=$2
 	local msg="$3"
+	local xfail=$4
 
 	if [ ${rc} -eq ${expected} ]; then
 		printf "TEST: %-60s  [ OK ]\n" "${msg}"
 		nsuccess=$((nsuccess+1))
+	elif [ ${rc} -eq ${xfail} ]; then
+		printf "TEST: %-60s  [XFAIL]\n" "${msg}"
+		nxfail=$((nxfail+1))
 	else
 		ret=1
 		nfail=$((nfail+1))
@@ -322,7 +326,7 @@ check_exception()
 		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
 		grep -v "mtu" | grep -q "${R1_LLADDR}"
 	fi
-	log_test $? 0 "IPv6: ${desc}"
+	log_test $? 0 "IPv6: ${desc}" 1
 }
 
 run_ping()
@@ -488,6 +492,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 ret=0
 nsuccess=0
 nfail=0
+nxfail=0
 
 while getopts :pv o
 do
@@ -532,5 +537,6 @@ fi
 
 printf "\nTests passed: %3d\n" ${nsuccess}
 printf "Tests failed: %3d\n"   ${nfail}
+printf "Tests xfailed: %3d\n"  ${nxfail}
 
 exit $ret

From 7525de2516fba8661bdea409718708c711c5d5e1 Mon Sep 17 00:00:00 2001
From: David Wilder <dwilder@us.ibm.com>
Date: Tue, 22 Jun 2021 14:52:15 -0700
Subject: [PATCH 2741/3292] ibmveth: Set CHECKSUM_PARTIAL if NULL TCP CSUM.

TCP checksums on received packets may be set to NULL by the sender if CSO
is enabled. The hypervisor flags these packets as check-sum-ok and the
skb is then flagged CHECKSUM_UNNECESSARY. If these packets are then
forwarded the sender will not request CSO due to the CHECKSUM_UNNECESSARY
flag. The result is a TCP packet sent with a bad checksum. This change
sets up CHECKSUM_PARTIAL on these packets causing the sender to correctly
request CSUM offload.

Signed-off-by: David Wilder <dwilder@us.ibm.com>
Reviewed-by: Pradeep Satyanarayana <pradeeps@linux.vnet.ibm.com>
Tested-by: Cristobal Forno <cforno12@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmveth.c | 51 ++++++++++++++++--------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index bc67a7ee872b..737ba85e409f 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1285,36 +1285,41 @@ static void ibmveth_rx_csum_helper(struct sk_buff *skb,
 		iph_proto = iph6->nexthdr;
 	}
 
-	/* In OVS environment, when a flow is not cached, specifically for a
-	 * new TCP connection, the first packet information is passed up
+	/* When CSO is enabled the TCP checksum may have be set to NULL by
+	 * the sender given that we zeroed out TCP checksum field in
+	 * transmit path (refer ibmveth_start_xmit routine). In this case set
+	 * up CHECKSUM_PARTIAL. If the packet is forwarded, the checksum will
+	 * then be recalculated by the destination NIC (CSO must be enabled
+	 * on the destination NIC).
+	 *
+	 * In an OVS environment, when a flow is not cached, specifically for a
+	 * new TCP connection, the first packet information is passed up to
 	 * the user space for finding a flow. During this process, OVS computes
 	 * checksum on the first packet when CHECKSUM_PARTIAL flag is set.
 	 *
-	 * Given that we zeroed out TCP checksum field in transmit path
-	 * (refer ibmveth_start_xmit routine) as we set "no checksum bit",
-	 * OVS computed checksum will be incorrect w/o TCP pseudo checksum
-	 * in the packet. This leads to OVS dropping the packet and hence
-	 * TCP retransmissions are seen.
-	 *
-	 * So, re-compute TCP pseudo header checksum.
+	 * So, re-compute TCP pseudo header checksum when configured for
+	 * trunk mode.
 	 */
-	if (iph_proto == IPPROTO_TCP && adapter->is_active_trunk) {
+	if (iph_proto == IPPROTO_TCP) {
 		struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
-
-		tcphdrlen = skb->len - iphlen;
-
-		/* Recompute TCP pseudo header checksum */
-		if (skb_proto == ETH_P_IP)
-			tcph->check = ~csum_tcpudp_magic(iph->saddr,
+		if (tcph->check == 0x0000) {
+			/* Recompute TCP pseudo header checksum  */
+			if (adapter->is_active_trunk) {
+				tcphdrlen = skb->len - iphlen;
+				if (skb_proto == ETH_P_IP)
+					tcph->check =
+					 ~csum_tcpudp_magic(iph->saddr,
 					iph->daddr, tcphdrlen, iph_proto, 0);
-		else if (skb_proto == ETH_P_IPV6)
-			tcph->check = ~csum_ipv6_magic(&iph6->saddr,
+				else if (skb_proto == ETH_P_IPV6)
+					tcph->check =
+					 ~csum_ipv6_magic(&iph6->saddr,
 					&iph6->daddr, tcphdrlen, iph_proto, 0);
-
-		/* Setup SKB fields for checksum offload */
-		skb_partial_csum_set(skb, iphlen,
-				     offsetof(struct tcphdr, check));
-		skb_reset_network_header(skb);
+			}
+			/* Setup SKB fields for checksum offload */
+			skb_partial_csum_set(skb, iphlen,
+					     offsetof(struct tcphdr, check));
+			skb_reset_network_header(skb);
+		}
 	}
 }
 

From 55d444b310c64b084dcc62ba3e4dc3862269fb96 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Wed, 23 Jun 2021 08:35:29 +0900
Subject: [PATCH 2742/3292] tcp: Add stats for socket migration.

This commit adds two stats for the socket migration feature to evaluate the
effectiveness: LINUX_MIB_TCPMIGRATEREQ(SUCCESS|FAILURE).

If the migration fails because of the own_req race in receiving ACK and
sending SYN+ACK paths, we do not increment the failure stat. Then another
CPU is responsible for the req.

Link: https://lore.kernel.org/bpf/CAK6E8=cgFKuGecTzSCSQ8z3YJ_163C0uwO9yRvfDSE7vOe9mJA@mail.gmail.com/
Suggested-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h       |  2 ++
 net/core/sock_reuseport.c       | 15 +++++++++++----
 net/ipv4/inet_connection_sock.c | 15 +++++++++++++--
 net/ipv4/proc.c                 |  2 ++
 net/ipv4/tcp_minisocks.c        |  3 +++
 5 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 26fc60ce9298..904909d020e2 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -290,6 +290,8 @@ enum
 	LINUX_MIB_TCPDUPLICATEDATAREHASH,	/* TCPDuplicateDataRehash */
 	LINUX_MIB_TCPDSACKRECVSEGS,		/* TCPDSACKRecvSegs */
 	LINUX_MIB_TCPDSACKIGNOREDDUBIOUS,	/* TCPDSACKIgnoredDubious */
+	LINUX_MIB_TCPMIGRATEREQSUCCESS,		/* TCPMigrateReqSuccess */
+	LINUX_MIB_TCPMIGRATEREQFAILURE,		/* TCPMigrateReqFailure */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index de5ee3ae86d5..3f00a28fe762 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -6,6 +6,7 @@
  * selecting the socket index from the array of available sockets.
  */
 
+#include <net/ip.h>
 #include <net/sock_reuseport.h>
 #include <linux/bpf.h>
 #include <linux/idr.h>
@@ -536,7 +537,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
 
 	socks = READ_ONCE(reuse->num_socks);
 	if (unlikely(!socks))
-		goto out;
+		goto failure;
 
 	/* paired with smp_wmb() in __reuseport_add_sock() */
 	smp_rmb();
@@ -546,13 +547,13 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
 	if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
 		if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
 			goto select_by_hash;
-		goto out;
+		goto failure;
 	}
 
 	if (!skb) {
 		skb = alloc_skb(0, GFP_ATOMIC);
 		if (!skb)
-			goto out;
+			goto failure;
 		allocated = true;
 	}
 
@@ -565,12 +566,18 @@ select_by_hash:
 	if (!nsk)
 		nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
 
-	if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
+	if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) {
 		nsk = NULL;
+		goto failure;
+	}
 
 out:
 	rcu_read_unlock();
 	return nsk;
+
+failure:
+	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+	goto out;
 }
 EXPORT_SYMBOL(reuseport_migrate_sock);
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0eea878edc30..754013fa393b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -703,6 +703,8 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
 
 	nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
 	if (!nreq) {
+		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
 		/* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
 		sock_put(sk);
 		return NULL;
@@ -876,9 +878,10 @@ static void reqsk_timer_handler(struct timer_list *t)
 		if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
 			/* delete timer */
 			inet_csk_reqsk_queue_drop(sk_listener, nreq);
-			goto drop;
+			goto no_ownership;
 		}
 
+		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
 		reqsk_migrate_reset(oreq);
 		reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
 		reqsk_put(oreq);
@@ -887,17 +890,19 @@ static void reqsk_timer_handler(struct timer_list *t)
 		return;
 	}
 
-drop:
 	/* Even if we can clone the req, we may need not retransmit any more
 	 * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
 	 * CPU may win the "own_req" race so that inet_ehash_insert() fails.
 	 */
 	if (nreq) {
+		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
+no_ownership:
 		reqsk_migrate_reset(nreq);
 		reqsk_queue_removed(queue, nreq);
 		__reqsk_free(nreq);
 	}
 
+drop:
 	inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
 }
 
@@ -1135,11 +1140,13 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
 
 			refcount_set(&nreq->rsk_refcnt, 1);
 			if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
+				__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
 				reqsk_migrate_reset(req);
 				reqsk_put(req);
 				return child;
 			}
 
+			__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
 			reqsk_migrate_reset(nreq);
 			__reqsk_free(nreq);
 		} else if (inet_csk_reqsk_queue_add(sk, req, child)) {
@@ -1188,8 +1195,12 @@ void inet_csk_listen_stop(struct sock *sk)
 				refcount_set(&nreq->rsk_refcnt, 1);
 
 				if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
+					__NET_INC_STATS(sock_net(nsk),
+							LINUX_MIB_TCPMIGRATEREQSUCCESS);
 					reqsk_migrate_reset(req);
 				} else {
+					__NET_INC_STATS(sock_net(nsk),
+							LINUX_MIB_TCPMIGRATEREQFAILURE);
 					reqsk_migrate_reset(nreq);
 					__reqsk_free(nreq);
 				}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6d46297a99f8..b0d3a09dc84e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -295,6 +295,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
 	SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
 	SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
+	SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
+	SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f258a4c0da71..0a4f3f16140a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -786,6 +786,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
+	if (sk != req->rsk_listener)
+		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
 	if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
 		inet_rsk(req)->acked = 1;
 		return NULL;

From 4d293fe1c69c157c15ac06918a805e5fef036682 Mon Sep 17 00:00:00 2001
From: Di Zhu <zhudi21@huawei.com>
Date: Wed, 23 Jun 2021 11:21:08 +0800
Subject: [PATCH 2743/3292] bonding: allow nesting of bonding device

The commit 3c9ef511b9fa ("bonding: avoid adding slave device with
IFF_MASTER flag") fix a crash when add slave device with IFF_MASTER,
but it rejects the scenario of nested bonding device.

As Eric Dumazet described: since there indeed is a usage scenario about
nesting bonding, we should not break it.

So we add a new judgment condition to allow nesting of bonding device.

Fixes: 3c9ef511b9fa ("bonding: avoid adding slave device with IFF_MASTER flag")
Suggested-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: Di Zhu <zhudi21@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 16840c9bc00d..03b1a93d7fea 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1601,7 +1601,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 	int link_reporting;
 	int res = 0, i;
 
-	if (slave_dev->flags & IFF_MASTER) {
+	if (slave_dev->flags & IFF_MASTER &&
+	    !netif_is_bond_master(slave_dev)) {
+		NL_SET_ERR_MSG(extack, "Device with IFF_MASTER cannot be enslaved");
 		netdev_err(bond_dev,
 			   "Error: Device with IFF_MASTER cannot be enslaved\n");
 		return -EPERM;

From b2ac9800cfe0f8da16abc4e74e003440361c112e Mon Sep 17 00:00:00 2001
From: Jian-Hong Pan <jhp@endlessos.org>
Date: Wed, 23 Jun 2021 11:28:03 +0800
Subject: [PATCH 2744/3292] net: bcmgenet: Fix attaching to PYH failed on RPi
 4B

The Broadcom UniMAC MDIO bus from mdio-bcm-unimac module comes too late.
So, GENET cannot find the ethernet PHY on UniMAC MDIO bus. This leads
GENET fail to attach the PHY as following log:

bcmgenet fd580000.ethernet: GENET 5.0 EPHY: 0x0000
...
could not attach to PHY
bcmgenet fd580000.ethernet eth0: failed to connect to PHY
uart-pl011 fe201000.serial: no DMA platform data
libphy: bcmgenet MII bus: probed
...
unimac-mdio unimac-mdio.-19: Broadcom UniMAC MDIO bus

This patch adds the soft dependency to load mdio-bcm-unimac module
before genet module to avoid the issue.

Fixes: 9a4e79697009 ("net: bcmgenet: utilize generic Broadcom UniMAC MDIO controller driver")
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=213485
Signed-off-by: Jian-Hong Pan <jhp@endlessos.org>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index fcca023f22e5..41f7f078cd27 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4296,3 +4296,4 @@ MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Broadcom GENET Ethernet controller driver");
 MODULE_ALIAS("platform:bcmgenet");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: mdio-bcm-unimac");

From 10ed7ce42b13790ba85f8e10110d89a2bce58807 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Wed, 23 Jun 2021 15:06:34 +0900
Subject: [PATCH 2745/3292] net/tls: Remove the __TLS_DEC_STATS() macro.

The commit d26b698dd3cd ("net/tls: add skeleton of MIB statistics")
introduced __TLS_DEC_STATS(), but it is not used and __SNMP_DEC_STATS() is
not defined also. Let's remove it.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 8341a8d1e807..8d398a5de3ee 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -79,8 +79,6 @@
 	__SNMP_INC_STATS((net)->mib.tls_statistics, field)
 #define TLS_INC_STATS(net, field)				\
 	SNMP_INC_STATS((net)->mib.tls_statistics, field)
-#define __TLS_DEC_STATS(net, field)				\
-	__SNMP_DEC_STATS((net)->mib.tls_statistics, field)
 #define TLS_DEC_STATS(net, field)				\
 	SNMP_DEC_STATS((net)->mib.tls_statistics, field)
 

From a2f7dc00ea51a9dbb7c5b4ca8e508acb24f7ca8c Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting_tian@126.com>
Date: Wed, 23 Jun 2021 11:16:22 -0400
Subject: [PATCH 2746/3292] virtio_net: Use virtio_find_vqs_ctx() helper

virtio_find_vqs_ctx() is defined but never be called currently,
it is the right place to use it.

Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0416a7e00914..b0b81458ca94 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2847,8 +2847,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 			ctx[rxq2vq(i)] = true;
 	}
 
-	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names, ctx, NULL);
+	ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
+				  names, ctx, NULL);
 	if (ret)
 		goto err_find;
 

From bcc3f2a829b9edbe3da5fb117ee5a63686d31834 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 23 Jun 2021 08:27:00 -0700
Subject: [PATCH 2747/3292] ipv6: exthdrs: do not blindly use init_net

I see no reason why max_dst_opts_cnt and max_hbh_opts_cnt
are fetched from the initial net namespace.

The other sysctls (max_dst_opts_len & max_hbh_opts_len)
are in fact already using the current ns.

Note: it is not clear why ipv6_destopt_rcv() use two ways to
get to the netns :

 1) dev_net(dst->dev)
    Originally used to increment IPSTATS_MIB_INHDRERRORS

 2) dev_net(skb->dev)
     Tom used this variant in his patch.

Maybe this calls to use ipv6_skb_net() instead ?

Fixes: 47d3d7ac656a ("ipv6: Implement limits on Hop-by-Hop and Destination options")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <tom@quantonium.net>
Cc: Coco Li <lixiaoyan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 56e479d158b7..6f7da8f3e2e5 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -306,7 +306,7 @@ fail_and_free:
 #endif
 
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
-			  init_net.ipv6.sysctl.max_dst_opts_cnt)) {
+			  net->ipv6.sysctl.max_dst_opts_cnt)) {
 		skb->transport_header += extlen;
 		opt = IP6CB(skb);
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -1037,7 +1037,7 @@ fail_and_free:
 
 	opt->flags |= IP6SKB_HOPBYHOP;
 	if (ip6_parse_tlv(tlvprochopopt_lst, skb,
-			  init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
+			  net->ipv6.sysctl.max_hbh_opts_cnt)) {
 		skb->transport_header += extlen;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);

From 1321ed5e76488cfd7a5d3ee83254be9b7c1cc581 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 23 Jun 2021 16:43:13 +0300
Subject: [PATCH 2748/3292] devlink: Decrease refcnt of parent rate object on
 leaf destroy

Port functions, like SFs, can be deleted by the user when its leaf rate
object has parent node. In such case node refcnt won't be decreased
which blocks the node from deletion later.
Do simple refcnt decrease, since driver in cleanup stage. This:
1) assumes that driver took proper internal parent unset action;
2) allows to avoid nested callbacks call and deadlock.

Fixes: d75559845078 ("devlink: Allow setting parent node of rate objects")
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 566ddd147633..ba27395d8fb3 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -9275,6 +9275,8 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
 
 	mutex_lock(&devlink->lock);
 	devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+	if (devlink_rate->parent)
+		refcount_dec(&devlink_rate->parent->refcnt);
 	list_del(&devlink_rate->list);
 	devlink_port->devlink_rate = NULL;
 	mutex_unlock(&devlink->lock);

From ff99324ded0176d28c3d8de7cac44580cf79d52a Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 23 Jun 2021 16:43:14 +0300
Subject: [PATCH 2749/3292] devlink: Remove eswitch mode check for mode set
 call

When eswitch is disabled, querying its current mode results in error.
Due to this when trying to set the eswitch mode for mlx5 devices, it
fails to set the eswitch switchdev mode.
Hence remove such check.

Fixes: a8ecb93ef03d ("devlink: Introduce rate nodes")
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index ba27395d8fb3..153d432f6daf 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2709,17 +2709,6 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 				    struct netlink_ext_ack *extack)
 {
 	struct devlink_rate *devlink_rate;
-	u16 old_mode;
-	int err;
-
-	if (!devlink->ops->eswitch_mode_get)
-		return -EOPNOTSUPP;
-	err = devlink->ops->eswitch_mode_get(devlink, &old_mode);
-	if (err)
-		return err;
-
-	if (old_mode == mode)
-		return 0;
 
 	list_for_each_entry(devlink_rate, &devlink->rate_list, list)
 		if (devlink_rate_is_node(devlink_rate)) {

From a3e5e5797faad0db319d106afaa31b9020fac44f Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Wed, 23 Jun 2021 16:43:15 +0300
Subject: [PATCH 2750/3292] devlink: Protect rate list with lock while
 switching modes

Devlink eswitch set command doesn't hold devlink->lock, which makes
possible race condition between rate list traversing and others devlink
rate KAPI calls, like devlink_rate_nodes_destroy().
Hold devlink lock while traversing the list.

Fixes: a8ecb93ef03d ("devlink: Introduce rate nodes")
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 153d432f6daf..8fdd04f00fd7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2710,11 +2710,15 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
 {
 	struct devlink_rate *devlink_rate;
 
+	/* Take the lock to sync with devlink_rate_nodes_destroy() */
+	mutex_lock(&devlink->lock);
 	list_for_each_entry(devlink_rate, &devlink->rate_list, list)
 		if (devlink_rate_is_node(devlink_rate)) {
+			mutex_unlock(&devlink->lock);
 			NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
 			return -EBUSY;
 		}
+	mutex_unlock(&devlink->lock);
 	return 0;
 }
 

From 0917310b984c1ac24da2be0ebe992539b9ba94ed Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 22 Jun 2021 19:53:08 -0500
Subject: [PATCH 2751/3292] cifs: missing null check for newinode pointer

in cifs_do_create we check if newinode is valid before referencing it
but are missing the check in one place in fs/cifs/dir.c

Addresses-Coverity: 1357292 ("Dereference after null check")
Acked-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/dir.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 912333082b18..79402ca0ddfa 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -384,10 +384,11 @@ cifs_create_set_dentry:
 		goto out_err;
 	}
 
-	if (S_ISDIR(newinode->i_mode)) {
-		rc = -EISDIR;
-		goto out_err;
-	}
+	if (newinode)
+		if (S_ISDIR(newinode->i_mode)) {
+			rc = -EISDIR;
+			goto out_err;
+		}
 
 	d_drop(direntry);
 	d_add(direntry, newinode);

From a5628263a9f8d47d9a1548fe9d5d75ba4423a735 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 22 Jun 2021 17:54:50 -0500
Subject: [PATCH 2752/3292] smb3: fix possible access to uninitialized pointer
 to DACL

dacl_ptr can be null so we must check for it everywhere it is
used in build_sec_desc.

Addresses-Coverity: 1475598 ("Explicit null dereference")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsacl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 5ec5d9d24032..388eb536cff1 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1294,7 +1294,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 		ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
 		ndacl_ptr->revision =
 			dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
-		ndacl_ptr->num_aces = dacl_ptr->num_aces;
+		ndacl_ptr->num_aces = dacl_ptr ? dacl_ptr->num_aces : 0;
 
 		if (uid_valid(uid)) { /* chown */
 			uid_t id;

From 162004a2f7ef5c77600e364dc4e9315b0e6ca386 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 23 Jun 2021 19:32:24 -0500
Subject: [PATCH 2753/3292] cifs: missing null pointer check in cifs_mount

We weren't checking if tcon is null before setting dfs path,
although we check for null tcon in an earlier assignment statement.

Addresses-Coverity: 1476411 ("Dereference after null check")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/connect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8d95607a9312..c8079376d294 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3451,7 +3451,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 			rc = -ELOOP;
 	} while (rc == -EREMOTE);
 
-	if (rc)
+	if (rc || !tcon)
 		goto error;
 
 	kfree(ref_path);

From 8c37d01e1a86073d15ea7084390fba58d9a1665f Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 17 Jun 2021 15:05:43 +0900
Subject: [PATCH 2754/3292] PM / devfreq: passive: Fix get_target_freq when not
 using required-opp

The 86ad9a24f21e ("PM / devfreq: Add required OPPs support to passive governor")
supported the required-opp property for using devfreq passive governor.
But, 86ad9a24f21e has caused the problem on use-case when required-opp
is not used such as exynos-bus.c devfreq driver. So that fix the
get_target_freq of passive governor for supporting the case of when
required-opp is not used.

Fixes: 86ad9a24f21e ("PM / devfreq: Add required OPPs support to passive governor")
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/governor_passive.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index b094132bd20b..fc09324a03e0 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -65,7 +65,7 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
 		dev_pm_opp_put(p_opp);
 
 		if (IS_ERR(opp))
-			return PTR_ERR(opp);
+			goto no_required_opp;
 
 		*freq = dev_pm_opp_get_freq(opp);
 		dev_pm_opp_put(opp);
@@ -73,6 +73,7 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
 		return 0;
 	}
 
+no_required_opp:
 	/*
 	 * Get the OPP table's index of decided frequency by governor
 	 * of parent device.

From 997135017716c33f3405e86cca5da9567b40a08e Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Wed, 23 Jun 2021 11:50:11 -0700
Subject: [PATCH 2755/3292] io_uring: Fix race condition when sqp thread goes
 to sleep

If an asynchronous completion happens before the task is preparing
itself to wait and set its state to TASK_INTERRUPTIBLE, the completion
will not wake up the sqp thread.

Cc: stable@vger.kernel.org
Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d1419dc32ec6a97b453bee34dc03fa6a02797142.1624473200.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fc8637f591a6..7c545fa66f31 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6902,7 +6902,7 @@ static int io_sq_thread(void *data)
 		}
 
 		prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
-		if (!io_sqd_events_pending(sqd)) {
+		if (!io_sqd_events_pending(sqd) && !io_run_task_work()) {
 			needs_sched = true;
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
 				io_ring_set_wakeup_flag(ctx);

From 4ce8ad95f0afe927e9a29e7ad491274ebe3a8a7b Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Wed, 23 Jun 2021 11:50:18 -0700
Subject: [PATCH 2756/3292] io_uring: Create define to modify a SQPOLL
 parameter

The magic number used to cap the number of entries extracted from an
io_uring instance SQ before moving to the other instances is an
interesting parameter to experiment with.

A define has been created to make it easy to change its value from a
single location.

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/b401640063e77ad3e9f921e09c9b3ac10a8bb923.1624473200.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7c545fa66f31..e7997f9bf879 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -89,6 +89,7 @@
 
 #define IORING_MAX_ENTRIES	32768
 #define IORING_MAX_CQ_ENTRIES	(2 * IORING_MAX_ENTRIES)
+#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
 
 /*
  * Shift of 9 is 512 entries, or exactly one page on 64-bit archs
@@ -6797,8 +6798,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 
 	to_submit = io_sqring_entries(ctx);
 	/* if we're handling multiple rings, cap submit size for fairness */
-	if (cap_entries && to_submit > 8)
-		to_submit = 8;
+	if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE)
+		to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE;
 
 	if (!list_empty(&ctx->iopoll_list) || to_submit) {
 		unsigned nr_events = 0;

From aaf473d0100f64abc88560e2bea905805bcf2a8e Mon Sep 17 00:00:00 2001
From: Norbert Slusarek <nslusarek@gmx.net>
Date: Sun, 20 Jun 2021 14:38:42 +0200
Subject: [PATCH 2757/3292] can: j1939: j1939_sk_setsockopt(): prevent
 allocation of j1939 filter for optlen == 0

If optval != NULL and optlen == 0 are specified for SO_J1939_FILTER in
j1939_sk_setsockopt(), memdup_sockptr() will return ZERO_PTR for 0
size allocation. The new filter will be mistakenly assigned ZERO_PTR.
This patch checks for optlen != 0 and filter will be assigned NULL in
case of optlen == 0.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Link: https://lore.kernel.org/r/20210620123842.117975-1-nslusarek@gmx.net
Signed-off-by: Norbert Slusarek <nslusarek@gmx.net>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index fce8bc8afeb7..e1a399821238 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -676,7 +676,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
 
 	switch (optname) {
 	case SO_J1939_FILTER:
-		if (!sockptr_is_null(optval)) {
+		if (!sockptr_is_null(optval) && optlen != 0) {
 			struct j1939_filter *f;
 			int c;
 

From b17233d385d0b6b43ecf81d43008cb1bbb008166 Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Wed, 23 Jun 2021 16:26:00 +0200
Subject: [PATCH 2758/3292] can: peak_pciefd: pucan_handle_status(): fix a
 potential starvation issue in TX path

Rather than just indicating that transmission can start, this patch
requires the explicit flushing of the network TX queue when the driver
is informed by the device that it can transmit, next to its
configuration.

In this way, if frames have already been written by the application,
they will actually be transmitted.

Fixes: ffd137f7043c ("can: peak/pcie_fd: remove useless code when interface starts")
Link: https://lore.kernel.org/r/20210623142600.149904-1-s.grosjean@peak-system.com
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/peak_canfd/peak_canfd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index 00847cbaf7b6..d08718e98e11 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -351,8 +351,8 @@ static int pucan_handle_status(struct peak_canfd_priv *priv,
 				return err;
 		}
 
-		/* start network queue (echo_skb array is empty) */
-		netif_start_queue(ndev);
+		/* wake network queue up (echo_skb array is empty) */
+		netif_wake_queue(ndev);
 
 		return 0;
 	}

From 276010551664f73b6f1616dde471d6f0d63a73ba Mon Sep 17 00:00:00 2001
From: Cassio Neri <cassio.neri@gmail.com>
Date: Tue, 22 Jun 2021 22:36:16 +0100
Subject: [PATCH 2759/3292] time: Improve performance of time64_to_tm()

The current implementation of time64_to_tm() contains unnecessary loops,
branches and look-up tables. The new one uses an arithmetic-based algorithm
appeared in [1] and is approximately 3x faster (YMMV).

The drawback is that the new code isn't intuitive and contains many 'magic
numbers' (not unusual for this type of algorithm). However, [1] justifies
all those numbers and, given this function's history, the code is unlikely
to need much maintenance, if any at all.

Add a KUnit test for it which checks every day in a 160,000 years interval
centered at 1970-01-01 against the expected result.

[1] Neri, Schneider, "Euclidean Affine Functions and Applications to
Calendar Algorithms". https://arxiv.org/abs/2102.06959

Signed-off-by: Cassio Neri <cassio.neri@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210622213616.313046-1-cassio.neri@gmail.com
---
 kernel/time/Kconfig     |   9 ++++
 kernel/time/Makefile    |   1 +
 kernel/time/time_test.c |  98 +++++++++++++++++++++++++++++++++
 kernel/time/timeconv.c  | 116 ++++++++++++++++++++++------------------
 4 files changed, 172 insertions(+), 52 deletions(-)
 create mode 100644 kernel/time/time_test.c

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 83e158d016ba..3610b1bef142 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -64,6 +64,15 @@ config LEGACY_TIMER_TICK
 	  lack support for the generic clockevent framework.
 	  New platforms should use generic clockevents instead.
 
+config TIME_KUNIT_TEST
+	tristate "KUnit test for kernel/time functions" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this option to test RTC library functions.
+
+	  If unsure, say N.
+
 if GENERIC_CLOCKEVENTS
 menu "Timers subsystem"
 
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 1ed85b25b096..7e875e63ff3b 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
 obj-$(CONFIG_TIME_NS)				+= namespace.o
 obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG)		+= clocksource-wdtest.o
+obj-$(CONFIG_TIME_KUNIT_TEST)			+= time_test.o
diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c
new file mode 100644
index 000000000000..341ebfad5e99
--- /dev/null
+++ b/kernel/time/time_test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: LGPL-2.1+
+
+#include <kunit/test.h>
+#include <linux/time.h>
+
+/*
+ * Traditional implementation of leap year evaluation.
+ */
+static bool is_leap(long year)
+{
+	return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
+}
+
+/*
+ * Gets the last day of a month.
+ */
+static int last_day_of_month(long year, int month)
+{
+	if (month == 2)
+		return 28 + is_leap(year);
+	if (month == 4 || month == 6 || month == 9 || month == 11)
+		return 30;
+	return 31;
+}
+
+/*
+ * Advances a date by one day.
+ */
+static void advance_date(long *year, int *month, int *mday, int *yday)
+{
+	if (*mday != last_day_of_month(*year, *month)) {
+		++*mday;
+		++*yday;
+		return;
+	}
+
+	*mday = 1;
+	if (*month != 12) {
+		++*month;
+		++*yday;
+		return;
+	}
+
+	*month = 1;
+	*yday  = 0;
+	++*year;
+}
+
+/*
+ * Checks every day in a 160000 years interval centered at 1970-01-01
+ * against the expected result.
+ */
+static void time64_to_tm_test_date_range(struct kunit *test)
+{
+	/*
+	 * 80000 years	= (80000 / 400) * 400 years
+	 *		= (80000 / 400) * 146097 days
+	 *		= (80000 / 400) * 146097 * 86400 seconds
+	 */
+	time64_t total_secs = ((time64_t) 80000) / 400 * 146097 * 86400;
+	long year = 1970 - 80000;
+	int month = 1;
+	int mdday = 1;
+	int yday = 0;
+
+	struct tm result;
+	time64_t secs;
+	s64 days;
+
+	for (secs = -total_secs; secs <= total_secs; secs += 86400) {
+
+		time64_to_tm(secs, 0, &result);
+
+		days = div_s64(secs, 86400);
+
+		#define FAIL_MSG "%05ld/%02d/%02d (%2d) : %ld", \
+			year, month, mdday, yday, days
+
+		KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, month - 1, result.tm_mon, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, mdday, result.tm_mday, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, yday, result.tm_yday, FAIL_MSG);
+
+		advance_date(&year, &month, &mdday, &yday);
+	}
+}
+
+static struct kunit_case time_test_cases[] = {
+	KUNIT_CASE(time64_to_tm_test_date_range),
+	{}
+};
+
+static struct kunit_suite time_test_suite = {
+	.name = "time_test_cases",
+	.test_cases = time_test_cases,
+};
+
+kunit_test_suite(time_test_suite);
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
index 62e3b46717a6..59b922c826e7 100644
--- a/kernel/time/timeconv.c
+++ b/kernel/time/timeconv.c
@@ -22,47 +22,16 @@
 
 /*
  * Converts the calendar time to broken-down time representation
- * Based on code from glibc-2.6
  *
  * 2009-7-14:
  *   Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ * 2021-06-02:
+ *   Reimplemented by Cassio Neri <cassio.neri@gmail.com>
  */
 
 #include <linux/time.h>
 #include <linux/module.h>
-
-/*
- * Nonzero if YEAR is a leap year (every 4 years,
- * except every 100th isn't, and every 400th is).
- */
-static int __isleap(long year)
-{
-	return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
-}
-
-/* do a mathdiv for long type */
-static long math_div(long a, long b)
-{
-	return a / b - (a % b < 0);
-}
-
-/* How many leap years between y1 and y2, y1 must less or equal to y2 */
-static long leaps_between(long y1, long y2)
-{
-	long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
-		+ math_div(y1 - 1, 400);
-	long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
-		+ math_div(y2 - 1, 400);
-	return leaps2 - leaps1;
-}
-
-/* How many days come before each month (0-12). */
-static const unsigned short __mon_yday[2][13] = {
-	/* Normal years. */
-	{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
-	/* Leap years. */
-	{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
-};
+#include <linux/kernel.h>
 
 #define SECS_PER_HOUR	(60 * 60)
 #define SECS_PER_DAY	(SECS_PER_HOUR * 24)
@@ -77,9 +46,11 @@ static const unsigned short __mon_yday[2][13] = {
  */
 void time64_to_tm(time64_t totalsecs, int offset, struct tm *result)
 {
-	long days, rem, y;
+	u32 u32tmp, day_of_century, year_of_century, day_of_year, month, day;
+	u64 u64tmp, udays, century, year;
+	bool is_Jan_or_Feb, is_leap_year;
+	long days, rem;
 	int remainder;
-	const unsigned short *ip;
 
 	days = div_s64_rem(totalsecs, SECS_PER_DAY, &remainder);
 	rem = remainder;
@@ -103,27 +74,68 @@ void time64_to_tm(time64_t totalsecs, int offset, struct tm *result)
 	if (result->tm_wday < 0)
 		result->tm_wday += 7;
 
-	y = 1970;
+	/*
+	 * The following algorithm is, basically, Proposition 6.3 of Neri
+	 * and Schneider [1]. In a few words: it works on the computational
+	 * (fictitious) calendar where the year starts in March, month = 2
+	 * (*), and finishes in February, month = 13. This calendar is
+	 * mathematically convenient because the day of the year does not
+	 * depend on whether the year is leap or not. For instance:
+	 *
+	 * March 1st		0-th day of the year;
+	 * ...
+	 * April 1st		31-st day of the year;
+	 * ...
+	 * January 1st		306-th day of the year; (Important!)
+	 * ...
+	 * February 28th	364-th day of the year;
+	 * February 29th	365-th day of the year (if it exists).
+	 *
+	 * After having worked out the date in the computational calendar
+	 * (using just arithmetics) it's easy to convert it to the
+	 * corresponding date in the Gregorian calendar.
+	 *
+	 * [1] "Euclidean Affine Functions and Applications to Calendar
+	 * Algorithms". https://arxiv.org/abs/2102.06959
+	 *
+	 * (*) The numbering of months follows tm more closely and thus,
+	 * is slightly different from [1].
+	 */
 
-	while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
-		/* Guess a corrected year, assuming 365 days per year. */
-		long yg = y + math_div(days, 365);
+	udays	= ((u64) days) + 2305843009213814918ULL;
 
-		/* Adjust DAYS and Y to match the guessed year. */
-		days -= (yg - y) * 365 + leaps_between(y, yg);
-		y = yg;
-	}
+	u64tmp		= 4 * udays + 3;
+	century		= div64_u64_rem(u64tmp, 146097, &u64tmp);
+	day_of_century	= (u32) (u64tmp / 4);
 
-	result->tm_year = y - 1900;
+	u32tmp		= 4 * day_of_century + 3;
+	u64tmp		= 2939745ULL * u32tmp;
+	year_of_century	= upper_32_bits(u64tmp);
+	day_of_year	= lower_32_bits(u64tmp) / 2939745 / 4;
 
-	result->tm_yday = days;
+	year		= 100 * century + year_of_century;
+	is_leap_year	= year_of_century ? !(year_of_century % 4) : !(century % 4);
 
-	ip = __mon_yday[__isleap(y)];
-	for (y = 11; days < ip[y]; y--)
-		continue;
-	days -= ip[y];
+	u32tmp		= 2141 * day_of_year + 132377;
+	month		= u32tmp >> 16;
+	day		= ((u16) u32tmp) / 2141;
 
-	result->tm_mon = y;
-	result->tm_mday = days + 1;
+	/*
+	 * Recall that January 1st is the 306-th day of the year in the
+	 * computational (not Gregorian) calendar.
+	 */
+	is_Jan_or_Feb	= day_of_year >= 306;
+
+	/* Convert to the Gregorian calendar and adjust to Unix time. */
+	year		= year + is_Jan_or_Feb - 6313183731940000ULL;
+	month		= is_Jan_or_Feb ? month - 12 : month;
+	day		= day + 1;
+	day_of_year	+= is_Jan_or_Feb ? -306 : 31 + 28 + is_leap_year;
+
+	/* Convert to tm's format. */
+	result->tm_year = (long) (year - 1900);
+	result->tm_mon  = (int) month;
+	result->tm_mday = (int) day;
+	result->tm_yday = (int) day_of_year;
 }
 EXPORT_SYMBOL(time64_to_tm);

From ced50fc49f3bde2892c3d7fad7b3b6bfbc6ef90e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 23 Jun 2021 13:25:04 +0200
Subject: [PATCH 2760/3292] bpf, x86: Remove unused cnt increase from EMIT
 macro

Removing unused cnt increase from EMIT macro together with cnt declarations.
This was introduced in commit [1] to ensure proper code generation. But that
code was removed in commit [2] and this extra code was left in.

  [1] b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper")
  [2] ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT")

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210623112504.709856-1-jolsa@kernel.org
---
 arch/x86/net/bpf_jit_comp.c | 44 ++++++++++---------------------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2a2e290fa5d8..db1e83813db5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -31,7 +31,7 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 }
 
 #define EMIT(bytes, len) \
-	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
+	do { prog = emit_code(prog, bytes, len); } while (0)
 
 #define EMIT1(b1)		EMIT(b1, 1)
 #define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
@@ -239,7 +239,6 @@ struct jit_context {
 static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	if (callee_regs_used[0])
 		EMIT1(0x53);         /* push rbx */
@@ -255,7 +254,6 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
 static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	if (callee_regs_used[3])
 		EMIT2(0x41, 0x5F);   /* pop r15 */
@@ -277,13 +275,12 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
 			  bool tail_call_reachable, bool is_subprog)
 {
 	u8 *prog = *pprog;
-	int cnt = X86_PATCH_SIZE;
 
 	/* BPF trampoline can be made to work without these nops,
 	 * but let's waste 5 bytes for now and optimize later
 	 */
-	memcpy(prog, x86_nops[5], cnt);
-	prog += cnt;
+	memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
+	prog += X86_PATCH_SIZE;
 	if (!ebpf_from_cbpf) {
 		if (tail_call_reachable && !is_subprog)
 			EMIT2(0x31, 0xC0); /* xor eax, eax */
@@ -303,7 +300,6 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
 static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 	s64 offset;
 
 	offset = func - (ip + X86_PATCH_SIZE);
@@ -423,7 +419,6 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
 	int off1 = 42;
 	int off2 = 31;
 	int off3 = 9;
-	int cnt = 0;
 
 	/* count the additional bytes used for popping callee regs from stack
 	 * that need to be taken into account for each of the offsets that
@@ -513,7 +508,6 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
 	int pop_bytes = 0;
 	int off1 = 20;
 	int poke_off;
-	int cnt = 0;
 
 	/* count the additional bytes used for popping callee regs to stack
 	 * that need to be taken into account for jump offset that is used for
@@ -615,7 +609,6 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
 {
 	u8 *prog = *pprog;
 	u8 b1, b2, b3;
-	int cnt = 0;
 
 	/*
 	 * Optimization: if imm32 is positive, use 'mov %eax, imm32'
@@ -655,7 +648,6 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
 			   const u32 imm32_hi, const u32 imm32_lo)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
 		/*
@@ -678,7 +670,6 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
 static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	if (is64) {
 		/* mov dst, src */
@@ -697,7 +688,6 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	if (is_imm8(off)) {
 		/* 1-byte signed displacement.
@@ -720,7 +710,6 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
 static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	if (is64)
 		EMIT1(add_2mod(0x48, dst_reg, src_reg));
@@ -733,7 +722,6 @@ static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	switch (size) {
 	case BPF_B:
@@ -764,7 +752,6 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	switch (size) {
 	case BPF_B:
@@ -799,7 +786,6 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
 		       u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 
 	EMIT1(0xF0); /* lock prefix */
 
@@ -869,10 +855,10 @@ static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
 	}
 }
 
-static int emit_nops(u8 **pprog, int len)
+static void emit_nops(u8 **pprog, int len)
 {
 	u8 *prog = *pprog;
-	int i, noplen, cnt = 0;
+	int i, noplen;
 
 	while (len > 0) {
 		noplen = len;
@@ -886,8 +872,6 @@ static int emit_nops(u8 **pprog, int len)
 	}
 
 	*pprog = prog;
-
-	return cnt;
 }
 
 #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
@@ -902,7 +886,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	bool tail_call_seen = false;
 	bool seen_exit = false;
 	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
-	int i, cnt = 0, excnt = 0;
+	int i, excnt = 0;
 	int ilen, proglen = 0;
 	u8 *prog = temp;
 	int err;
@@ -1576,7 +1560,7 @@ emit_cond_jmp:		/* Convert BPF opcode to x86 */
 						       nops);
 						return -EFAULT;
 					}
-					cnt += emit_nops(&prog, nops);
+					emit_nops(&prog, nops);
 				}
 				EMIT2(jmp_cond, jmp_offset);
 			} else if (is_simm32(jmp_offset)) {
@@ -1622,7 +1606,7 @@ emit_cond_jmp:		/* Convert BPF opcode to x86 */
 						       nops);
 						return -EFAULT;
 					}
-					cnt += emit_nops(&prog, nops);
+					emit_nops(&prog, nops);
 				}
 				break;
 			}
@@ -1647,7 +1631,7 @@ emit_jmp:
 						       nops);
 						return -EFAULT;
 					}
-					cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
+					emit_nops(&prog, INSN_SZ_DIFF - 2);
 				}
 				EMIT2(0xEB, jmp_offset);
 			} else if (is_simm32(jmp_offset)) {
@@ -1754,7 +1738,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 {
 	u8 *prog = *pprog;
 	u8 *jmp_insn;
-	int cnt = 0;
 
 	/* arg1: mov rdi, progs[i] */
 	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
@@ -1822,7 +1805,6 @@ static void emit_align(u8 **pprog, u32 align)
 static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
 {
 	u8 *prog = *pprog;
-	int cnt = 0;
 	s64 offset;
 
 	offset = func - (ip + 2 + 4);
@@ -1854,7 +1836,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 			      u8 **branches)
 {
 	u8 *prog = *pprog;
-	int i, cnt = 0;
+	int i;
 
 	/* The first fmod_ret program will receive a garbage return value.
 	 * Set this to 0 to avoid confusing the program.
@@ -1950,7 +1932,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 				struct bpf_tramp_progs *tprogs,
 				void *orig_call)
 {
-	int ret, i, cnt = 0, nr_args = m->nr_args;
+	int ret, i, nr_args = m->nr_args;
 	int stack_size = nr_args * 8;
 	struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
 	struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
@@ -2095,8 +2077,6 @@ static int emit_fallback_jump(u8 **pprog)
 	 */
 	err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
 #else
-	int cnt = 0;
-
 	EMIT2(0xFF, 0xE2);	/* jmp rdx */
 #endif
 	*pprog = prog;
@@ -2106,7 +2086,7 @@ static int emit_fallback_jump(u8 **pprog)
 static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
 {
 	u8 *jg_reloc, *prog = *pprog;
-	int pivot, err, jg_bytes = 1, cnt = 0;
+	int pivot, err, jg_bytes = 1;
 	s64 jg_offset;
 
 	if (a == b) {

From f264481ad614dfd9aae59eeefa5fc664cdf173ae Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Fri, 18 Jun 2021 13:48:36 +0530
Subject: [PATCH 2761/3292] HID: amd_sfh: Extend driver capabilities for
 multi-generation support

Initial driver support only covered the first generation of SFH
platforms. In order to support the future generations introduce
ops selection to distinguish the different platforms.

Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Reviewed-by: Nehal Shah <nehal-bakulchandra.shah@amd.com>
Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/amd-sfh-hid/amd_sfh_client.c |  4 +-
 drivers/hid/amd-sfh-hid/amd_sfh_pcie.c   | 80 ++++++++++++++++++++++--
 drivers/hid/amd-sfh-hid/amd_sfh_pcie.h   | 27 ++++++++
 3 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
index 3589d9945da1..d3d5dcec7cf1 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
@@ -202,7 +202,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 		rc = amdtp_hid_probe(cl_data->cur_hid_dev, cl_data);
 		if (rc)
 			return rc;
-		amd_start_sensor(privdata, info);
+		privdata->mp2_ops->start(privdata, info);
 		cl_data->sensor_sts[i] = 1;
 	}
 	privdata->cl_data = cl_data;
@@ -230,7 +230,7 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata)
 	int i;
 
 	for (i = 0; i < cl_data->num_hid_devices; i++)
-		amd_stop_sensor(privdata, i);
+		privdata->mp2_ops->stop(privdata, i);
 
 	cancel_delayed_work_sync(&cl_data->work);
 	cancel_delayed_work_sync(&cl_data->work_buffer);
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
index ddecc84fd6f0..48ff54dc5d70 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
@@ -30,6 +30,48 @@ static int sensor_mask_override = -1;
 module_param_named(sensor_mask, sensor_mask_override, int, 0444);
 MODULE_PARM_DESC(sensor_mask, "override the detected sensors mask");
 
+static void amd_start_sensor_v2(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info)
+{
+	union sfh_cmd_base cmd_base;
+
+	cmd_base.ul = 0;
+	cmd_base.cmd_v2.cmd_id = ENABLE_SENSOR;
+	cmd_base.cmd_v2.period = info.period;
+	cmd_base.cmd_v2.sensor_id = info.sensor_idx;
+	cmd_base.cmd_v2.length = 16;
+
+	if (info.sensor_idx == als_idx)
+		cmd_base.cmd_v2.mem_type = USE_C2P_REG;
+
+	writeq(info.dma_address, privdata->mmio + AMD_C2P_MSG1);
+	writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
+}
+
+static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx)
+{
+	union sfh_cmd_base cmd_base;
+
+	cmd_base.ul = 0;
+	cmd_base.cmd_v2.cmd_id = DISABLE_SENSOR;
+	cmd_base.cmd_v2.period = 0;
+	cmd_base.cmd_v2.sensor_id = sensor_idx;
+	cmd_base.cmd_v2.length  = 16;
+
+	writeq(0x0, privdata->mmio + AMD_C2P_MSG2);
+	writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
+}
+
+static void amd_stop_all_sensor_v2(struct amd_mp2_dev *privdata)
+{
+	union sfh_cmd_base cmd_base;
+
+	cmd_base.cmd_v2.cmd_id = STOP_ALL_SENSORS;
+	cmd_base.cmd_v2.period = 0;
+	cmd_base.cmd_v2.sensor_id = 0;
+
+	writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
+}
+
 void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info)
 {
 	union sfh_cmd_param cmd_param;
@@ -98,7 +140,6 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id)
 {
 	int activestatus, num_of_sensors = 0;
 	const struct dmi_system_id *dmi_id;
-	u32 activecontrolstatus;
 
 	if (sensor_mask_override == -1) {
 		dmi_id = dmi_first_match(dmi_sensor_mask_overrides);
@@ -109,8 +150,7 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id)
 	if (sensor_mask_override >= 0) {
 		activestatus = sensor_mask_override;
 	} else {
-		activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3);
-		activestatus = activecontrolstatus >> 4;
+		activestatus = privdata->mp2_acs >> 4;
 	}
 
 	if (ACEL_EN  & activestatus)
@@ -130,8 +170,38 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id)
 
 static void amd_mp2_pci_remove(void *privdata)
 {
+	struct amd_mp2_dev *mp2 = privdata;
 	amd_sfh_hid_client_deinit(privdata);
-	amd_stop_all_sensors(privdata);
+	mp2->mp2_ops->stop_all(mp2);
+}
+
+static const struct amd_mp2_ops amd_sfh_ops_v2 = {
+	.start = amd_start_sensor_v2,
+	.stop = amd_stop_sensor_v2,
+	.stop_all = amd_stop_all_sensor_v2,
+};
+
+static const struct amd_mp2_ops amd_sfh_ops = {
+	.start = amd_start_sensor,
+	.stop = amd_stop_sensor,
+	.stop_all = amd_stop_all_sensors,
+};
+
+static void mp2_select_ops(struct amd_mp2_dev *privdata)
+{
+	u8 acs;
+
+	privdata->mp2_acs = readl(privdata->mmio + AMD_P2C_MSG3);
+	acs = privdata->mp2_acs & GENMASK(3, 0);
+
+	switch (acs) {
+	case V2_STATUS:
+		privdata->mp2_ops = &amd_sfh_ops_v2;
+		break;
+	default:
+		privdata->mp2_ops = &amd_sfh_ops;
+		break;
+	}
 }
 
 static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -164,6 +234,8 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 	if (rc)
 		return rc;
 
+	mp2_select_ops(privdata);
+
 	return amd_sfh_hid_client_init(privdata);
 }
 
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
index 489415f7c22c..6297d78c7aef 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
@@ -22,9 +22,13 @@
 #define AMD_C2P_MSG1	0x10504
 #define AMD_C2P_MSG2	0x10508
 
+#define AMD_C2P_MSG(regno) (0x10500 + ((regno) * 4))
+
 /* MP2 P2C Message Registers */
 #define AMD_P2C_MSG3	0x1068C /* Supported Sensors info */
 
+#define V2_STATUS	0x2
+
 /* SFH Command register */
 union sfh_cmd_base {
 	u32 ul;
@@ -33,6 +37,15 @@ union sfh_cmd_base {
 		u32 sensor_id : 8;
 		u32 period : 16;
 	} s;
+	struct {
+		u32 cmd_id : 4;
+		u32 intr_enable : 1;
+		u32 rsvd1 : 3;
+		u32 length : 7;
+		u32 mem_type : 1;
+		u32 sensor_id : 8;
+		u32 period : 8;
+	} cmd_v2;
 };
 
 union sfh_cmd_param {
@@ -61,6 +74,9 @@ struct amd_mp2_dev {
 	struct pci_dev *pdev;
 	struct amdtp_cl_data *cl_data;
 	void __iomem *mmio;
+	const struct amd_mp2_ops *mp2_ops;
+	/* mp2 active control status */
+	u32 mp2_acs;
 };
 
 struct amd_mp2_sensor_info {
@@ -69,10 +85,21 @@ struct amd_mp2_sensor_info {
 	dma_addr_t dma_address;
 };
 
+enum mem_use_type {
+	USE_DRAM,
+	USE_C2P_REG,
+};
+
 void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info);
 void amd_stop_sensor(struct amd_mp2_dev *privdata, u16 sensor_idx);
 void amd_stop_all_sensors(struct amd_mp2_dev *privdata);
 int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id);
 int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata);
 int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata);
+
+struct amd_mp2_ops {
+	 void (*start)(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info);
+	 void (*stop)(struct amd_mp2_dev *privdata, u16 sensor_idx);
+	 void (*stop_all)(struct amd_mp2_dev *privdata);
+};
 #endif

From 0aad9c95eb9a2b086322e28ae2e58ad25598604e Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Fri, 18 Jun 2021 13:48:37 +0530
Subject: [PATCH 2762/3292] HID: amd_sfh: Extend ALS support for newer AMD
 platform

Extend ALS support for AMD next generation SoC's like Renoir, Cezanne.
AMD next generation platforms use C2P message register to read ALS
sensor data instead of DRAM address.

Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Reviewed-by: Nehal Shah <nehal-bakulchandra.shah@amd.com>
Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/amd-sfh-hid/amd_sfh_client.c      | 39 +++++++++----------
 drivers/hid/amd-sfh-hid/amd_sfh_hid.h         | 10 ++++-
 drivers/hid/amd-sfh-hid/amd_sfh_pcie.c        |  5 +++
 drivers/hid/amd-sfh-hid/amd_sfh_pcie.h        |  2 +
 .../hid_descriptor/amd_sfh_hid_desc.c         | 14 ++++++-
 .../hid_descriptor/amd_sfh_hid_desc.h         |  1 -
 6 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
index d3d5dcec7cf1..efb849411d25 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
@@ -77,6 +77,7 @@ int amd_sfh_get_report(struct hid_device *hid, int report_id, int report_type)
 static void amd_sfh_work(struct work_struct *work)
 {
 	struct amdtp_cl_data *cli_data = container_of(work, struct amdtp_cl_data, work.work);
+	struct amd_input_data *in_data = cli_data->in_data;
 	struct request_list *req_node;
 	u8 current_index, sensor_index;
 	u8 report_id, node_type;
@@ -101,13 +102,11 @@ static void amd_sfh_work(struct work_struct *work)
 			pr_err("AMDSFH: Invalid report size\n");
 
 	} else if (node_type == HID_INPUT_REPORT) {
-		report_size = get_input_report(sensor_index, report_id,
-					       cli_data->input_report[current_index],
-					       cli_data->sensor_virt_addr[current_index]);
+		report_size = get_input_report(current_index, sensor_index, report_id, in_data);
 		if (report_size)
 			hid_input_report(cli_data->hid_sensor_hubs[current_index],
 					 cli_data->report_type[current_index],
-					 cli_data->input_report[current_index], report_size, 0);
+					 in_data->input_report[current_index], report_size, 0);
 		else
 			pr_err("AMDSFH: Invalid report size\n");
 	}
@@ -119,21 +118,22 @@ static void amd_sfh_work(struct work_struct *work)
 static void amd_sfh_work_buffer(struct work_struct *work)
 {
 	struct amdtp_cl_data *cli_data = container_of(work, struct amdtp_cl_data, work_buffer.work);
+	struct amd_input_data *in_data = cli_data->in_data;
 	u8 report_size;
 	int i;
 
 	for (i = 0; i < cli_data->num_hid_devices; i++) {
-		report_size = get_input_report(cli_data->sensor_idx[i], cli_data->report_id[i],
-					       cli_data->input_report[i],
-					       cli_data->sensor_virt_addr[i]);
+		report_size = get_input_report(i, cli_data->sensor_idx[i], cli_data->report_id[i],
+					       in_data);
 		hid_input_report(cli_data->hid_sensor_hubs[i], HID_INPUT_REPORT,
-				 cli_data->input_report[i], report_size, 0);
+				 in_data->input_report[i], report_size, 0);
 	}
 	schedule_delayed_work(&cli_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
 }
 
 int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 {
+	struct amd_input_data *in_data = &privdata->in_data;
 	struct amdtp_cl_data *cl_data = privdata->cl_data;
 	struct amd_mp2_sensor_info info;
 	struct device *dev;
@@ -143,18 +143,16 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 	int rc, i;
 
 	dev = &privdata->pdev->dev;
-	cl_data = devm_kzalloc(dev, sizeof(*cl_data), GFP_KERNEL);
-	if (!cl_data)
-		return -ENOMEM;
 
 	cl_data->num_hid_devices = amd_mp2_get_sensor_num(privdata, &cl_data->sensor_idx[0]);
 
 	INIT_DELAYED_WORK(&cl_data->work, amd_sfh_work);
 	INIT_DELAYED_WORK(&cl_data->work_buffer, amd_sfh_work_buffer);
 	INIT_LIST_HEAD(&req_list.list);
+	cl_data->in_data = in_data;
 
 	for (i = 0; i < cl_data->num_hid_devices; i++) {
-		cl_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8,
+		in_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8,
 								  &cl_data->sensor_dma_addr[i],
 								  GFP_KERNEL);
 		cl_data->sensor_sts[i] = 0;
@@ -181,8 +179,8 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 			rc = -ENOMEM;
 			goto cleanup;
 		}
-		cl_data->input_report[i] = devm_kzalloc(dev, input_report_size, GFP_KERNEL);
-		if (!cl_data->input_report[i]) {
+		in_data->input_report[i] = devm_kzalloc(dev, input_report_size, GFP_KERNEL);
+		if (!in_data->input_report[i]) {
 			rc = -ENOMEM;
 			goto cleanup;
 		}
@@ -205,28 +203,27 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 		privdata->mp2_ops->start(privdata, info);
 		cl_data->sensor_sts[i] = 1;
 	}
-	privdata->cl_data = cl_data;
 	schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP));
 	return 0;
 
 cleanup:
 	for (i = 0; i < cl_data->num_hid_devices; i++) {
-		if (cl_data->sensor_virt_addr[i]) {
+		if (in_data->sensor_virt_addr[i]) {
 			dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int),
-					  cl_data->sensor_virt_addr[i],
+					  in_data->sensor_virt_addr[i],
 					  cl_data->sensor_dma_addr[i]);
 		}
 		devm_kfree(dev, cl_data->feature_report[i]);
-		devm_kfree(dev, cl_data->input_report[i]);
+		devm_kfree(dev, in_data->input_report[i]);
 		devm_kfree(dev, cl_data->report_descr[i]);
 	}
-	devm_kfree(dev, cl_data);
 	return rc;
 }
 
 int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata)
 {
 	struct amdtp_cl_data *cl_data = privdata->cl_data;
+	struct amd_input_data *in_data = cl_data->in_data;
 	int i;
 
 	for (i = 0; i < cl_data->num_hid_devices; i++)
@@ -237,9 +234,9 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata)
 	amdtp_hid_remove(cl_data);
 
 	for (i = 0; i < cl_data->num_hid_devices; i++) {
-		if (cl_data->sensor_virt_addr[i]) {
+		if (in_data->sensor_virt_addr[i]) {
 			dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int),
-					  cl_data->sensor_virt_addr[i],
+					  in_data->sensor_virt_addr[i],
 					  cl_data->sensor_dma_addr[i]);
 		}
 	}
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
index d7eac1728e31..359c5de96af8 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
@@ -14,6 +14,11 @@
 #define AMD_SFH_HID_VENDOR	0x1022
 #define AMD_SFH_HID_PRODUCT	0x0001
 
+struct amd_input_data {
+	u32 *sensor_virt_addr[MAX_HID_DEVICES];
+	u8 *input_report[MAX_HID_DEVICES];
+};
+
 struct amdtp_cl_data {
 	u8 init_done;
 	u32 cur_hid_dev;
@@ -26,7 +31,6 @@ struct amdtp_cl_data {
 	u8 *hid_descr[MAX_HID_DEVICES];
 	int hid_descr_size[MAX_HID_DEVICES];
 	phys_addr_t phys_addr_base;
-	u32 *sensor_virt_addr[MAX_HID_DEVICES];
 	dma_addr_t sensor_dma_addr[MAX_HID_DEVICES];
 	u32 sensor_sts[MAX_HID_DEVICES];
 	u32 sensor_requested_cnt[MAX_HID_DEVICES];
@@ -34,8 +38,8 @@ struct amdtp_cl_data {
 	u8 report_id[MAX_HID_DEVICES];
 	u8 sensor_idx[MAX_HID_DEVICES];
 	u8 *feature_report[MAX_HID_DEVICES];
-	u8 *input_report[MAX_HID_DEVICES];
 	u8 request_done[MAX_HID_DEVICES];
+	struct amd_input_data *in_data;
 	struct delayed_work work;
 	struct delayed_work work_buffer;
 };
@@ -64,4 +68,6 @@ void amdtp_hid_remove(struct amdtp_cl_data *cli_data);
 int amd_sfh_get_report(struct hid_device *hid, int report_id, int report_type);
 void amd_sfh_set_report(struct hid_device *hid, int report_id, int report_type);
 void amdtp_hid_wakeup(struct hid_device *hid);
+u8 get_input_report(u8 current_index, int sensor_idx, int report_id,
+		    struct amd_input_data *in_data);
 #endif
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
index 48ff54dc5d70..ff131f450bdc 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
@@ -230,6 +230,11 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		return rc;
 	}
+
+	privdata->cl_data = devm_kzalloc(&pdev->dev, sizeof(struct amdtp_cl_data), GFP_KERNEL);
+	if (!privdata->cl_data)
+		return -ENOMEM;
+
 	rc = devm_add_action_or_reset(&pdev->dev, amd_mp2_pci_remove, privdata);
 	if (rc)
 		return rc;
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
index 6297d78c7aef..0886b2ad033e 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
@@ -10,6 +10,7 @@
 #define PCIE_MP2_AMD_H
 
 #include <linux/pci.h>
+#include "amd_sfh_hid.h"
 
 #define PCI_DEVICE_ID_AMD_MP2	0x15E4
 
@@ -75,6 +76,7 @@ struct amd_mp2_dev {
 	struct amdtp_cl_data *cl_data;
 	void __iomem *mmio;
 	const struct amd_mp2_ops *mp2_ops;
+	struct amd_input_data in_data;
 	/* mp2 active control status */
 	u32 mp2_acs;
 };
diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
index 6e3ad66e57a4..cdc0a8d32249 100644
--- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
+++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
@@ -12,6 +12,7 @@
 #include "amd_sfh_pcie.h"
 #include "amd_sfh_hid_desc.h"
 #include "amd_sfh_hid_report_desc.h"
+#include "amd_sfh_hid.h"
 
 #define	AMD_SFH_FW_MULTIPLIER (1000)
 #define HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM	0x41
@@ -174,8 +175,12 @@ static void get_common_inputs(struct common_input_property *common, int report_i
 	common->event_type = HID_USAGE_SENSOR_EVENT_DATA_UPDATED_ENUM;
 }
 
-u8 get_input_report(int sensor_idx, int report_id, u8 *input_report, u32 *sensor_virt_addr)
+u8 get_input_report(u8 current_index, int sensor_idx, int report_id, struct amd_input_data *in_data)
 {
+	struct amd_mp2_dev *privdata = container_of(in_data, struct amd_mp2_dev, in_data);
+	u32 *sensor_virt_addr = in_data->sensor_virt_addr[current_index];
+	u8 *input_report = in_data->input_report[current_index];
+	u8 supported_input = privdata->mp2_acs & GENMASK(3, 0);
 	struct accel3_input_report acc_input;
 	struct gyro_input_report gyro_input;
 	struct magno_input_report magno_input;
@@ -213,7 +218,12 @@ u8 get_input_report(int sensor_idx, int report_id, u8 *input_report, u32 *sensor
 		break;
 	case als_idx: /* Als */
 		get_common_inputs(&als_input.common_property, report_id);
-		als_input.illuminance_value =  (int)sensor_virt_addr[0] / AMD_SFH_FW_MULTIPLIER;
+		/* For ALS ,V2 Platforms uses C2P_MSG5 register instead of DRAM access method */
+		if (supported_input == V2_STATUS)
+			als_input.illuminance_value = (int)readl(privdata->mmio + AMD_C2P_MSG(5));
+		else
+			als_input.illuminance_value =
+				(int)sensor_virt_addr[0] / AMD_SFH_FW_MULTIPLIER;
 		report_size = sizeof(als_input);
 		memcpy(input_report, &als_input, sizeof(als_input));
 		break;
diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h
index 095c471d8fd6..a23c1046627f 100644
--- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h
+++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h
@@ -103,5 +103,4 @@ struct als_input_report {
 int get_report_descriptor(int sensor_idx, u8 rep_desc[]);
 u32 get_descr_sz(int sensor_idx, int descriptor_name);
 u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report);
-u8 get_input_report(int sensor_idx, int report_id, u8 *input_report, u32 *sensor_virt_addr);
 #endif

From 24a31ea94922d391a96a9dd0a9a830de65423817 Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Fri, 18 Jun 2021 13:48:38 +0530
Subject: [PATCH 2763/3292] HID: amd_sfh: Add initial support for HPD sensor

Add Human Presence Detection (HPD) sensors support
on AMD next generation HPD supported platforms.

Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Reviewed-by: Nehal Shah <nehal-bakulchandra.shah@amd.com>
Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/amd-sfh-hid/amd_sfh_hid.h         |   2 +-
 drivers/hid/amd-sfh-hid/amd_sfh_pcie.c        |   4 +
 drivers/hid/amd-sfh-hid/amd_sfh_pcie.h        |  14 +++
 .../hid_descriptor/amd_sfh_hid_desc.c         |  34 +++++-
 .../hid_descriptor/amd_sfh_hid_desc.h         |  10 ++
 .../hid_descriptor/amd_sfh_hid_report_desc.h  | 112 ++++++++++++++++++
 6 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
index 359c5de96af8..ae2ac9191ba7 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h
@@ -9,7 +9,7 @@
 #ifndef AMDSFH_HID_H
 #define AMDSFH_HID_H
 
-#define MAX_HID_DEVICES		4
+#define MAX_HID_DEVICES		5
 #define BUS_AMD_AMDTP		0x20
 #define AMD_SFH_HID_VENDOR	0x1022
 #define AMD_SFH_HID_PRODUCT	0x0001
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
index ff131f450bdc..96e2577fa37e 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
@@ -24,6 +24,7 @@
 #define ACEL_EN		BIT(0)
 #define GYRO_EN		BIT(1)
 #define MAGNO_EN	BIT(2)
+#define HPD_EN		BIT(16)
 #define ALS_EN		BIT(19)
 
 static int sensor_mask_override = -1;
@@ -165,6 +166,9 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id)
 	if (ALS_EN & activestatus)
 		sensor_id[num_of_sensors++] = als_idx;
 
+	if (HPD_EN & activestatus)
+		sensor_id[num_of_sensors++] = HPD_IDX;
+
 	return num_of_sensors;
 }
 
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
index 0886b2ad033e..2d5c57e3782d 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h
@@ -30,6 +30,8 @@
 
 #define V2_STATUS	0x2
 
+#define HPD_IDX		16
+
 /* SFH Command register */
 union sfh_cmd_base {
 	u32 ul;
@@ -92,6 +94,18 @@ enum mem_use_type {
 	USE_C2P_REG,
 };
 
+struct hpd_status {
+	union {
+		struct {
+			u32 human_presence_report : 4;
+			u32 human_presence_actual : 4;
+			u32 probablity		  : 8;
+			u32 object_distance       : 16;
+		} shpd;
+		u32 val;
+	};
+};
+
 void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info);
 void amd_stop_sensor(struct amd_mp2_dev *privdata, u16 sensor_idx);
 void amd_stop_all_sensors(struct amd_mp2_dev *privdata);
diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
index cdc0a8d32249..0c3697219382 100644
--- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
+++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c
@@ -50,6 +50,11 @@ int get_report_descriptor(int sensor_idx, u8 *rep_desc)
 		memcpy(rep_desc, als_report_descriptor,
 		       sizeof(als_report_descriptor));
 		break;
+	case HPD_IDX: /* HPD sensor */
+		memset(rep_desc, 0, sizeof(hpd_report_descriptor));
+		memcpy(rep_desc, hpd_report_descriptor,
+		       sizeof(hpd_report_descriptor));
+		break;
 	default:
 		break;
 	}
@@ -99,6 +104,17 @@ u32 get_descr_sz(int sensor_idx, int descriptor_name)
 			return sizeof(struct als_feature_report);
 		}
 		break;
+	case HPD_IDX:
+		switch (descriptor_name) {
+		case descr_size:
+			return sizeof(hpd_report_descriptor);
+		case input_size:
+			return sizeof(struct hpd_input_report);
+		case feature_size:
+			return sizeof(struct hpd_feature_report);
+		}
+		break;
+
 	default:
 		break;
 	}
@@ -120,6 +136,7 @@ u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report)
 	struct accel3_feature_report acc_feature;
 	struct gyro_feature_report gyro_feature;
 	struct magno_feature_report magno_feature;
+	struct hpd_feature_report hpd_feature;
 	struct als_feature_report als_feature;
 	u8 report_size = 0;
 
@@ -162,6 +179,12 @@ u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report)
 		memcpy(feature_report, &als_feature, sizeof(als_feature));
 		report_size = sizeof(als_feature);
 		break;
+	case HPD_IDX:  /* human presence detection sensor */
+		get_common_features(&hpd_feature.common_property, report_id);
+		memcpy(feature_report, &hpd_feature, sizeof(hpd_feature));
+		report_size = sizeof(hpd_feature);
+		break;
+
 	default:
 		break;
 	}
@@ -181,10 +204,12 @@ u8 get_input_report(u8 current_index, int sensor_idx, int report_id, struct amd_
 	u32 *sensor_virt_addr = in_data->sensor_virt_addr[current_index];
 	u8 *input_report = in_data->input_report[current_index];
 	u8 supported_input = privdata->mp2_acs & GENMASK(3, 0);
+	struct magno_input_report magno_input;
 	struct accel3_input_report acc_input;
 	struct gyro_input_report gyro_input;
-	struct magno_input_report magno_input;
+	struct hpd_input_report hpd_input;
 	struct als_input_report als_input;
+	struct hpd_status hpdstatus;
 	u8 report_size = 0;
 
 	if (!sensor_virt_addr || !input_report)
@@ -227,6 +252,13 @@ u8 get_input_report(u8 current_index, int sensor_idx, int report_id, struct amd_
 		report_size = sizeof(als_input);
 		memcpy(input_report, &als_input, sizeof(als_input));
 		break;
+	case HPD_IDX: /* hpd */
+		get_common_inputs(&hpd_input.common_property, report_id);
+		hpdstatus.val = readl(privdata->mmio + AMD_C2P_MSG(4));
+		hpd_input.human_presence = hpdstatus.shpd.human_presence_actual;
+		report_size = sizeof(hpd_input);
+		memcpy(input_report, &hpd_input, sizeof(hpd_input));
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h
index a23c1046627f..16f563d1823b 100644
--- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h
+++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h
@@ -100,6 +100,16 @@ struct als_input_report {
 	int illuminance_value;
 } __packed;
 
+struct hpd_feature_report {
+	struct common_feature_property common_property;
+} __packed;
+
+struct hpd_input_report {
+	struct common_input_property common_property;
+	 /* values specific to human presence sensor */
+	u8 human_presence;
+} __packed;
+
 int get_report_descriptor(int sensor_idx, u8 rep_desc[]);
 u32 get_descr_sz(int sensor_idx, int descriptor_name);
 u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report);
diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h
index 44271d39b322..66d6b26e4708 100644
--- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h
+++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h
@@ -642,4 +642,116 @@ const u8 als_report_descriptor[] = {
 0X81, 0x02,		/* HID Input (Data_Arr_Abs) */
 0xC0			/* HID end collection */
 };
+
+/* BIOMETRIC PRESENCE*/
+static const u8 hpd_report_descriptor[] = {
+0x05, 0x20,          /* Usage page */
+0x09, 0x11,          /* BIOMETRIC PRESENCE  */
+0xA1, 0x00,          /* HID Collection (Physical) */
+
+//feature reports(xmit/receive)
+0x85, 5,           /* HID  Report ID */
+0x05, 0x20,	   /* HID usage page sensor */
+0x0A, 0x09, 0x03,  /* Sensor property and sensor connection type */
+0x15, 0,           /* HID logical MIN_8(0) */
+0x25, 2,	   /* HID logical MAX_8(2) */
+0x75, 8,	   /* HID report size(8) */
+0x95, 1,	   /* HID report count(1) */
+0xA1, 0x02,	   /* HID collection (logical) */
+0x0A, 0x30, 0x08, /* Sensor property connection type intergated sel*/
+0x0A, 0x31, 0x08, /* Sensor property connection type attached sel */
+0x0A, 0x32, 0x08, /* Sensor property connection type external sel */
+0xB1, 0x00,       /* HID feature (Data_Arr_Abs) */
+0xC0,		  /* HID end collection */
+0x0A, 0x16, 0x03, /* HID usage sensor property reporting state */
+0x15, 0,          /* HID logical Min_8(0) */
+0x25, 5,	  /* HID logical Max_8(5) */
+0x75, 8,	  /* HID report size(8) */
+0x95, 1,          /* HID report count(1) */
+0xA1, 0x02,	  /* HID collection(logical) */
+0x0A, 0x40, 0x08, /* Sensor property report state no events sel */
+0x0A, 0x41, 0x08, /* Sensor property report state all events sel */
+0x0A, 0x42, 0x08, /* Sensor property report state threshold events sel */
+0x0A, 0x43, 0x08, /* Sensor property report state no events wake sel */
+0x0A, 0x44, 0x08, /* Sensor property report state all events wake sel */
+0x0A, 0x45, 0x08, /* Sensor property report state threshold events wake sel */
+0xB1, 0x00,	  /* HID feature (Data_Arr_Abs) */
+0xC0,		  /* HID end collection */
+0x0A, 0x19, 0x03, /* HID usage sensor property power state */
+0x15, 0,	  /* HID logical Min_8(0) */
+0x25, 5,	  /* HID logical Max_8(5) */
+0x75, 8,	  /* HID report size(8) */
+0x95, 1,	  /* HID report count(1) */
+0xA1, 0x02,	  /* HID collection(logical) */
+0x0A, 0x50, 0x08, /* Sensor property power state undefined sel */
+0x0A, 0x51, 0x08, /* Sensor property power state D0 full power  sel */
+0x0A, 0x52, 0x08, /* Sensor property power state D1 low power sel */
+0x0A, 0x53, 0x08, /* Sensor property power state D2 standby with wake sel */
+0x0A, 0x54, 0x08, /* Sensor property power state D3 sleep with wake  sel */
+0x0A, 0x55, 0x08, /* Sensor property power state D4 power off sel */
+0xB1, 0x00,       /* HID feature (Data_Arr_Abs) */
+0xC0,		  /* HID end collection */
+0x0A, 0x01, 0x02, /* HID usage sensor state */
+0x15, 0,	  /* HID logical Min_8(0) */
+0x25, 6,	  /* HID logical Max_8(6) */
+0x75, 8,	  /* HID report size(8) */
+0x95, 1,	  /* HID report count(1) */
+0xA1, 0x02,	  /* HID collection(logical) */
+0x0A, 0x00, 0x08, /* HID usage sensor state unknown sel */
+0x0A, 0x01, 0x08, /* HID usage sensor state ready sel */
+0x0A, 0x02, 0x08, /* HID usage sensor state not available sel */
+0x0A, 0x03, 0x08, /* HID usage sensor state no data sel */
+0x0A, 0x04, 0x08, /* HID usage sensor state initializing sel */
+0x0A, 0x05, 0x08, /* HID usage sensor state access denied sel */
+0x0A, 0x06, 0x08, /* HID usage sensor state error sel */
+0xB1, 0x00,	  /* HID feature (Data_Arr_Abs) */
+0xC0,		  /* HID end collection */
+0x0A, 0x0E, 0x03, /* HID usage sensor property report interval */
+0x15, 0,	  /* HID logical Min_8(0) */
+0x27, 0xFF, 0xFF, 0xFF, 0xFF, /* HID logical Max_32 */
+
+0x75, 32,	  /* HID report size(32) */
+0x95, 1,	  /* HID report count(1) */
+0x55, 0,	  /* HID unit exponent(0) */
+0xB1, 0x02,	  /* HID feature (Data_Var_Abs) */
+
+//input report (transmit)
+0x05, 0x20,		 /* HID usage page sensors */
+0x0A, 0x01, 0x02,	 /* HID usage sensor state */
+0x15, 0,		 /* HID logical Min_8(0) */
+0x25, 6,		 /* HID logical Max_8(6) */
+0x75, 8,		 /* HID report size(8) */
+0x95, 1,		 /* HID report count (1) */
+0xA1, 0x02,		 /* HID end collection (logical) */
+0x0A, 0x00, 0x08,	 /* HID usage sensor state unknown sel */
+0x0A, 0x01, 0x08,	 /* HID usage sensor state ready sel */
+0x0A, 0x02, 0x08,	 /* HID usage sensor state not available sel */
+0x0A, 0x03, 0x08,	 /* HID usage sensor state no data sel */
+0x0A, 0x04, 0x08,	 /* HID usage sensor state initializing sel */
+0x0A, 0x05, 0x08,	 /* HID usage sensor state access denied sel */
+0x0A, 0x06, 0x08,	 /* HID usage sensor state error sel */
+0X81, 0x00,		 /* HID Input (Data_Arr_Abs) */
+0xC0,			 /* HID end collection */
+0x0A, 0x02, 0x02,	 /* HID usage sensor event */
+0x15, 0,		 /* HID logical Min_8(0) */
+0x25, 5,		 /* HID logical Max_8(5) */
+0x75, 8,		 /* HID report size(8) */
+0x95, 1,		 /* HID report count (1) */
+0xA1, 0x02,		 /* HID end collection (logical) */
+0x0A, 0x10, 0x08,	 /* HID usage sensor event unknown sel */
+0x0A, 0x11, 0x08,	 /* HID usage sensor event state changed sel */
+0x0A, 0x12, 0x08,	 /* HID usage sensor event property changed sel */
+0x0A, 0x13, 0x08,	 /* HID usage sensor event data updated sel */
+0x0A, 0x14, 0x08,	 /* HID usage sensor event poll response sel */
+0x0A, 0x15, 0x08,	 /* HID usage sensor event change sensitivity sel */
+0X81, 0x00,		 /* HID Input (Data_Arr_Abs) */
+0xC0,			 /* HID end collection */
+0x0A, 0xB1, 0x04,	 /* HID usage sensor data BIOMETRIC HUMAN PRESENCE */
+0x15, 0,		 /* HID logical Min_8(0) */
+0x25, 1,		 /* HID logical Max_8(1) */
+0x75, 8,		 /* HID report size(8) */
+0x95, 1,		 /* HID report count (1) */
+0X81, 0x02,		 /* HID Input (Data_Var_Abs) */
+0xC0			 /* HID end collection */
+};
 #endif

From 647d446d66e493d23ca1047fa8492b0269674530 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Wed, 23 Jun 2021 22:37:54 +0100
Subject: [PATCH 2764/3292] media, bpf: Do not copy more entries than user
 space requested

The syscall bpf(BPF_PROG_QUERY, &attr) should use the prog_cnt field to
see how many entries user space provided and return ENOSPC if there are
more programs than that. Before this patch, this is not checked and
ENOSPC is never returned.

Note that one lirc device is limited to 64 bpf programs, and user space
I'm aware of -- ir-keytable -- always gives enough space for 64 entries
already. However, we should not copy program ids than are requested.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210623213754.632-1-sean@mess.org
---
 drivers/media/rc/bpf-lirc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 3fe3edd80876..afae0afe3f81 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -326,7 +326,8 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
 	}
 
 	if (attr->query.prog_cnt != 0 && prog_ids && cnt)
-		ret = bpf_prog_array_copy_to_user(progs, prog_ids, cnt);
+		ret = bpf_prog_array_copy_to_user(progs, prog_ids,
+						  attr->query.prog_cnt);
 
 unlock:
 	mutex_unlock(&ir_raw_handler_lock);

From 424d8237945c6c448c8b3f23885d464fb5685c97 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Wed, 23 Jun 2021 09:58:09 -0700
Subject: [PATCH 2765/3292] HID: wacom: Correct base usage for capacitive
 ExpressKey status bits

The capacitive status of ExpressKeys is reported with usages beginning
at 0x940, not 0x950. Bring our driver into alignment with reality.

Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 71c886245dbf..8f16654eca09 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -122,7 +122,7 @@
 #define WACOM_HID_WD_TOUCHONOFF         (WACOM_HID_UP_WACOMDIGITIZER | 0x0454)
 #define WACOM_HID_WD_BATTERY_LEVEL      (WACOM_HID_UP_WACOMDIGITIZER | 0x043b)
 #define WACOM_HID_WD_EXPRESSKEY00       (WACOM_HID_UP_WACOMDIGITIZER | 0x0910)
-#define WACOM_HID_WD_EXPRESSKEYCAP00    (WACOM_HID_UP_WACOMDIGITIZER | 0x0950)
+#define WACOM_HID_WD_EXPRESSKEYCAP00    (WACOM_HID_UP_WACOMDIGITIZER | 0x0940)
 #define WACOM_HID_WD_MODE_CHANGE        (WACOM_HID_UP_WACOMDIGITIZER | 0x0980)
 #define WACOM_HID_WD_MUTE_DEVICE        (WACOM_HID_UP_WACOMDIGITIZER | 0x0981)
 #define WACOM_HID_WD_CONTROLPANEL       (WACOM_HID_UP_WACOMDIGITIZER | 0x0982)

From ba47396e1c042619f1c038ad19493aef737677f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Wed, 16 Jun 2021 17:09:50 -0700
Subject: [PATCH 2766/3292] Revert "bpf: Check for BPF_F_ADJ_ROOM_FIXED_GSO
 when bpf_skb_change_proto"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit fa7b83bf3b156c767f3e4a25bbf3817b08f3ff8e.

See the followup commit for the reasoning why I believe the appropriate
approach is to simply make this change without a flag, but it can basically
be summarized as using this helper without the flag is bug-prone or outright
buggy, and thus the default should be this new behaviour.

As this commit has only made it into net-next/master, but not into
any real release, such a backwards incompatible change is still ok.

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Dongseok Yi <dseok.yi@samsung.com>
Cc: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/20210617000953.2787453-1-zenczykowski@gmail.com
---
 net/core/filter.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 0b13d8157a8f..243abf519efd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3235,7 +3235,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
 	return ret;
 }
 
-static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
 	u32 off = skb_mac_header_len(skb);
@@ -3264,9 +3264,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
 		}
 
 		/* Due to IPv6 header, MSS needs to be downgraded. */
-		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
-			skb_decrease_gso_size(shinfo, len_diff);
-
+		skb_decrease_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -3278,7 +3276,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u64 flags)
 	return 0;
 }
 
-static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 {
 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
 	u32 off = skb_mac_header_len(skb);
@@ -3307,9 +3305,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
 		}
 
 		/* Due to IPv4 header, MSS can be upgraded. */
-		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
-			skb_increase_gso_size(shinfo, len_diff);
-
+		skb_increase_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -3321,17 +3317,17 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u64 flags)
 	return 0;
 }
 
-static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u64 flags)
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
 {
 	__be16 from_proto = skb->protocol;
 
 	if (from_proto == htons(ETH_P_IP) &&
 	      to_proto == htons(ETH_P_IPV6))
-		return bpf_skb_proto_4_to_6(skb, flags);
+		return bpf_skb_proto_4_to_6(skb);
 
 	if (from_proto == htons(ETH_P_IPV6) &&
 	      to_proto == htons(ETH_P_IP))
-		return bpf_skb_proto_6_to_4(skb, flags);
+		return bpf_skb_proto_6_to_4(skb);
 
 	return -ENOTSUPP;
 }
@@ -3341,7 +3337,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
 {
 	int ret;
 
-	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO)))
+	if (unlikely(flags))
 		return -EINVAL;
 
 	/* General idea is that this helper does the basic groundwork
@@ -3361,7 +3357,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
 	 * that. For offloads, we mark packet as dodgy, so that headers
 	 * need to be verified first.
 	 */
-	ret = bpf_skb_proto_xlat(skb, proto, flags);
+	ret = bpf_skb_proto_xlat(skb, proto);
 	bpf_compute_data_pointers(skb);
 	return ret;
 }

From bcfa8d14570d85c998a9b706b074ab151b286edf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 15 Jun 2021 23:41:03 +0200
Subject: [PATCH 2767/3292] HID: input: Add support for Programmable Buttons
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Map them to KEY_MACRO# event codes.

These buttons are defined by HID as follows:
"The user defines the function of these buttons to control software applications or GUI objects."

This matches the semantics of the KEY_MACRO# input event codes that Linux supports.

Also add support for HID "Named Array" collections.
Also add hid-debug support for KEY_MACRO#.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c | 11 +++++++++++
 drivers/hid/hid-input.c | 22 ++++++++++++++++++++++
 include/linux/hid.h     |  1 +
 3 files changed, 34 insertions(+)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 59f8d716d78f..0e76d9b4530a 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -122,6 +122,7 @@ static const struct hid_usage_entry hid_usage_table[] = {
   {  9, 0, "Button" },
   { 10, 0, "Ordinal" },
   { 12, 0, "Consumer" },
+      {0, 0x003, "ProgrammableButtons"},
       {0, 0x238, "HorizontalWheel"},
   { 13, 0, "Digitizers" },
     {0, 0x01, "Digitizer"},
@@ -939,6 +940,16 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_KBDINPUTASSIST_NEXTGROUP] = "KbdInputAssistNextGroup",
 	[KEY_KBDINPUTASSIST_ACCEPT] = "KbdInputAssistAccept",
 	[KEY_KBDINPUTASSIST_CANCEL] = "KbdInputAssistCancel",
+	[KEY_MACRO1] = "Macro1", [KEY_MACRO2] = "Macro2", [KEY_MACRO3] = "Macro3",
+	[KEY_MACRO4] = "Macro4", [KEY_MACRO5] = "Macro5", [KEY_MACRO6] = "Macro6",
+	[KEY_MACRO7] = "Macro7", [KEY_MACRO8] = "Macro8", [KEY_MACRO9] = "Macro9",
+	[KEY_MACRO10] = "Macro10", [KEY_MACRO11] = "Macro11", [KEY_MACRO12] = "Macro12",
+	[KEY_MACRO13] = "Macro13", [KEY_MACRO14] = "Macro14", [KEY_MACRO15] = "Macro15",
+	[KEY_MACRO16] = "Macro16", [KEY_MACRO17] = "Macro17", [KEY_MACRO18] = "Macro18",
+	[KEY_MACRO19] = "Macro19", [KEY_MACRO20] = "Macro20", [KEY_MACRO21] = "Macro21",
+	[KEY_MACRO22] = "Macro22", [KEY_MACRO23] = "Macro23", [KEY_MACRO24] = "Macro24",
+	[KEY_MACRO25] = "Macro25", [KEY_MACRO26] = "Macro26", [KEY_MACRO27] = "Macro27",
+	[KEY_MACRO28] = "Macro28", [KEY_MACRO29] = "Macro29", [KEY_MACRO30] = "Macro30",
 };
 
 static const char *relatives[REL_MAX + 1] = {
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index c62c6a9b2132..56bdd55fbfb3 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -567,6 +567,16 @@ static void hidinput_update_battery(struct hid_device *dev, int value)
 }
 #endif	/* CONFIG_HID_BATTERY_STRENGTH */
 
+static bool hidinput_field_in_collection(struct hid_device *device, struct hid_field *field,
+					 unsigned int type, unsigned int usage)
+{
+	struct hid_collection *collection;
+
+	collection = &device->collection[field->usage->collection_index];
+
+	return collection->type == type && collection->usage == usage;
+}
+
 static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_field *field,
 				     struct hid_usage *usage)
 {
@@ -632,6 +642,18 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 				else
 					code += BTN_TRIGGER_HAPPY - 0x10;
 				break;
+		case HID_CP_CONSUMER_CONTROL:
+				if (hidinput_field_in_collection(device, field,
+								 HID_COLLECTION_NAMED_ARRAY,
+								 HID_CP_PROGRAMMABLEBUTTONS)) {
+					if (code <= 0x1d)
+						code += KEY_MACRO1;
+					else
+						code += BTN_TRIGGER_HAPPY - 0x1e;
+				} else {
+					goto ignore;
+				}
+				break;
 		default:
 			switch (field->physical) {
 			case HID_GD_MOUSE:
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 271021e20a3f..fb0e4dde6175 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -102,6 +102,7 @@ struct hid_item {
 #define HID_COLLECTION_PHYSICAL		0
 #define HID_COLLECTION_APPLICATION	1
 #define HID_COLLECTION_LOGICAL		2
+#define HID_COLLECTION_NAMED_ARRAY	4
 
 /*
  * HID report descriptor global item tags

From 6d2424a84533d3563ef525cb6e19cfda13abc472 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 27 May 2021 16:55:57 -0700
Subject: [PATCH 2768/3292] ext4: fix comment for s_hash_unsigned

Fix the comment for s_hash_unsigned to not be the opposite of what it
actually is.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20210527235557.2377525-1-ebiggers@kernel.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a646bfcbd0e8..8ff4ae3b5715 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1486,7 +1486,7 @@ struct ext4_sb_info {
 	unsigned int s_inode_goal;
 	u32 s_hash_seed[4];
 	int s_def_hash_version;
-	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
+	int s_hash_unsigned;	/* 3 if hash should be unsigned, 0 if not */
 	struct percpu_counter s_freeclusters_counter;
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;

From 364745fbe981a4370f50274475da4675661104df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Wed, 16 Jun 2021 17:09:51 -0700
Subject: [PATCH 2769/3292] bpf: Do not change gso_size during
 bpf_skb_change_proto()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is technically a backwards incompatible change in behaviour, but I'm
going to argue that it is very unlikely to break things, and likely to fix
*far* more then it breaks.

In no particular order, various reasons follow:

(a) I've long had a bug assigned to myself to debug a super rare kernel crash
on Android Pixel phones which can (per stacktrace) be traced back to BPF clat
IPv6 to IPv4 protocol conversion causing some sort of ugly failure much later
on during transmit deep in the GSO engine, AFAICT precisely because of this
change to gso_size, though I've never been able to manually reproduce it. I
believe it may be related to the particular network offload support of attached
USB ethernet dongle being used for tethering off of an IPv6-only cellular
connection. The reason might be we end up with more segments than max permitted,
or with a GSO packet with only one segment... (either way we break some
assumption and hit a BUG_ON)

(b) There is no check that the gso_size is > 20 when reducing it by 20, so we
might end up with a negative (or underflowing) gso_size or a gso_size of 0.
This can't possibly be good. Indeed this is probably somehow exploitable (or
at least can result in a kernel crash) by delivering crafted packets and perhaps
triggering an infinite loop or a divide by zero... As a reminder: gso_size (MSS)
is related to MTU, but not directly derived from it: gso_size/MSS may be
significantly smaller then one would get by deriving from local MTU. And on
some NICs (which do loose MTU checking on receive, it may even potentially be
larger, for example my work pc with 1500 MTU can receive 1520 byte frames [and
sometimes does due to bugs in a vendor plat46 implementation]). Indeed even just
going from 21 to 1 is potentially problematic because it increases the number
of segments by a factor of 21 (think DoS, or some other crash due to too many
segments).

(c) It's always safe to not increase the gso_size, because it doesn't result in
the max packet size increasing.  So the skb_increase_gso_size() call was always
unnecessary for correctness (and outright undesirable, see later). As such the
only part which is potentially dangerous (ie. could cause backwards compatibility
issues) is the removal of the skb_decrease_gso_size() call.

(d) If the packets are ultimately destined to the local device, then there is
absolutely no benefit to playing around with gso_size. It only matters if the
packets will egress the device. ie. we're either forwarding, or transmitting
from the device.

(e) This logic only triggers for packets which are GSO. It does not trigger for
skbs which are not GSO. It will not convert a non-GSO MTU sized packet into a
GSO packet (and you don't even know what the MTU is, so you can't even fix it).
As such your transmit path must *already* be able to handle an MTU 20 bytes
larger then your receive path (for IPv4 to IPv6 translation) - and indeed 28
bytes larger due to IPv4 fragments. Thus removing the skb_decrease_gso_size()
call doesn't actually increase the size of the packets your transmit side must
be able to handle. ie. to handle non-GSO max-MTU packets, the IPv4/IPv6 device/
route MTUs must already be set correctly. Since for example with an IPv4 egress
MTU of 1500, IPv4 to IPv6 translation will already build 1520 byte IPv6 frames,
so you need a 1520 byte device MTU. This means if your IPv6 device's egress
MTU is 1280, your IPv4 route must be 1260 (and actually 1252, because of the
need to handle fragments). This is to handle normal non-GSO packets. Thus the
reduction is simply not needed for GSO packets, because when they're correctly
built, they will already be the right size.

(f) TSO/GSO should be able to exactly undo GRO: the number of packets (TCP
segments) should not be modified, so that TCP's MSS counting works correctly
(this matters for congestion control). If protocol conversion changes the
gso_size, then the number of TCP segments may increase or decrease. Packet loss
after protocol conversion can result in partial loss of MSS segments that the
sender sent. How's the sending TCP stack going to react to receiving ACKs/SACKs
in the middle of the segments it sent?

(g) skb_{decrease,increase}_gso_size() are already no-ops for GSO_BY_FRAGS
case (besides triggering WARN_ON_ONCE). This means you already cannot guarantee
that gso_size (and thus resulting packet MTU) is changed. ie. you must assume
it won't be changed.

(h) changing gso_size is outright buggy for UDP GSO packets, where framing
matters (I believe that's also the case for SCTP, but it's already excluded
by [g]).  So the only remaining case is TCP, which also doesn't want it
(see [f]).

(i) see also the reasoning on the previous attempt at fixing this
(commit fa7b83bf3b156c767f3e4a25bbf3817b08f3ff8e) which shows that the current
behaviour causes TCP packet loss:

  In the forwarding path GRO -> BPF 6 to 4 -> GSO for TCP traffic, the
  coalesced packet payload can be > MSS, but < MSS + 20.

  bpf_skb_proto_6_to_4() will upgrade the MSS and it can be > the payload
  length. After then tcp_gso_segment checks for the payload length if it
  is <= MSS. The condition is causing the packet to be dropped.

  tcp_gso_segment():
    [...]
    mss = skb_shinfo(skb)->gso_size;
    if (unlikely(skb->len <= mss)) goto out;
    [...]

Thus changing the gso_size is simply a very bad idea. Increasing is unnecessary
and buggy, and decreasing can go negative.

Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper")
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Dongseok Yi <dseok.yi@samsung.com>
Cc: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/CANP3RGfjLikQ6dg=YpBU0OeHvyv7JOki7CyOUS9modaXAi-9vQ@mail.gmail.com
Link: https://lore.kernel.org/bpf/20210617000953.2787453-2-zenczykowski@gmail.com
---
 net/core/filter.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 243abf519efd..ae92a8bada0f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3263,8 +3263,6 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 			shinfo->gso_type |=  SKB_GSO_TCPV6;
 		}
 
-		/* Due to IPv6 header, MSS needs to be downgraded. */
-		skb_decrease_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;
@@ -3304,8 +3302,6 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 			shinfo->gso_type |=  SKB_GSO_TCPV4;
 		}
 
-		/* Due to IPv4 header, MSS can be upgraded. */
-		skb_increase_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= SKB_GSO_DODGY;
 		shinfo->gso_segs = 0;

From ee00d6b3c7aa65f97ace382ddf59739e65f5e8dd Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Mon, 31 May 2021 09:19:08 +0530
Subject: [PATCH 2770/3292] ext4: fsmap: fix the block/inode bitmap comment

While debugging fstest ext4/027 failure, found below comment to be wrong and
confusing. Hence fix it while we are at it.

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/e79134132db7ea42f15747b5c669ee91cc1aacdf.1622432690.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fsmap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/fsmap.h b/fs/ext4/fsmap.h
index 68c8001fee85..ac642be2302e 100644
--- a/fs/ext4/fsmap.h
+++ b/fs/ext4/fsmap.h
@@ -50,7 +50,7 @@ int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head,
 #define EXT4_FMR_OWN_INODES	FMR_OWNER('X', 5) /* inodes */
 #define EXT4_FMR_OWN_GDT	FMR_OWNER('f', 1) /* group descriptors */
 #define EXT4_FMR_OWN_RESV_GDT	FMR_OWNER('f', 2) /* reserved gdt blocks */
-#define EXT4_FMR_OWN_BLKBM	FMR_OWNER('f', 3) /* inode bitmap */
-#define EXT4_FMR_OWN_INOBM	FMR_OWNER('f', 4) /* block bitmap */
+#define EXT4_FMR_OWN_BLKBM	FMR_OWNER('f', 3) /* block bitmap */
+#define EXT4_FMR_OWN_INOBM	FMR_OWNER('f', 4) /* inode bitmap */
 
 #endif /* __EXT4_FSMAP_H__ */

From 0bc919d3e0b8149a60d2444c6a8e2b5974556522 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Wed, 16 Jun 2021 17:09:52 -0700
Subject: [PATCH 2771/3292] bpf: Support all gso types in
 bpf_skb_change_proto()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we no longer modify gso_size, it is now theoretically
safe to not set SKB_GSO_DODGY and reset gso_segs to zero.

This also means the skb_is_gso_tcp() check should no longer
be necessary.

Unfortunately we cannot remove the skb_{decrease,increase}_gso_size()
helpers, as they are still used elsewhere:

  bpf_skb_net_grow() without BPF_F_ADJ_ROOM_FIXED_GSO
  bpf_skb_net_shrink() without BPF_F_ADJ_ROOM_FIXED_GSO
  net/core/lwt_bpf.c's handle_gso_type()

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Dongseok Yi <dseok.yi@samsung.com>
Cc: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/20210617000953.2787453-3-zenczykowski@gmail.com
---
 net/core/filter.c | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index ae92a8bada0f..d062053994c7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3241,9 +3241,6 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-		return -ENOTSUPP;
-
 	ret = skb_cow(skb, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
@@ -3255,17 +3252,11 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	if (skb_is_gso(skb)) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-		/* SKB_GSO_TCPV4 needs to be changed into
-		 * SKB_GSO_TCPV6.
-		 */
+		/* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
 		if (shinfo->gso_type & SKB_GSO_TCPV4) {
 			shinfo->gso_type &= ~SKB_GSO_TCPV4;
 			shinfo->gso_type |=  SKB_GSO_TCPV6;
 		}
-
-		/* Header must be checked, and gso_segs recomputed. */
-		shinfo->gso_type |= SKB_GSO_DODGY;
-		shinfo->gso_segs = 0;
 	}
 
 	skb->protocol = htons(ETH_P_IPV6);
@@ -3280,9 +3271,6 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
-		return -ENOTSUPP;
-
 	ret = skb_unclone(skb, GFP_ATOMIC);
 	if (unlikely(ret < 0))
 		return ret;
@@ -3294,17 +3282,11 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	if (skb_is_gso(skb)) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-		/* SKB_GSO_TCPV6 needs to be changed into
-		 * SKB_GSO_TCPV4.
-		 */
+		/* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
 		if (shinfo->gso_type & SKB_GSO_TCPV6) {
 			shinfo->gso_type &= ~SKB_GSO_TCPV6;
 			shinfo->gso_type |=  SKB_GSO_TCPV4;
 		}
-
-		/* Header must be checked, and gso_segs recomputed. */
-		shinfo->gso_type |= SKB_GSO_DODGY;
-		shinfo->gso_segs = 0;
 	}
 
 	skb->protocol = htons(ETH_P_IP);

From 310c097c2bdbea253d6ee4e064f3e65580ef93ac Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Thu, 3 Jun 2021 07:33:02 +0530
Subject: [PATCH 2772/3292] ext4: remove duplicate definition of
 ext4_xattr_ibody_inline_set()

ext4_xattr_ibody_inline_set() & ext4_xattr_ibody_set() have the exact
same definition.  Hence remove ext4_xattr_ibody_inline_set() and all
its call references. Convert the callers of it to call
ext4_xattr_ibody_set() instead.

[ Modified to preserve ext4_xattr_ibody_set() and remove
  ext4_xattr_ibody_inline_set() instead. -- TYT ]

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/fd566b799bbbbe9b668eb5eecde5b5e319e3694f.1622685482.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inline.c | 11 +++++------
 fs/ext4/xattr.c  | 26 +-------------------------
 fs/ext4/xattr.h  |  6 +++---
 3 files changed, 9 insertions(+), 34 deletions(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 3cf01629010d..70cb64db33f7 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -204,7 +204,7 @@ out:
 /*
  * write the buffer to the inline inode.
  * If 'create' is set, we don't need to do the extra copy in the xattr
- * value since it is already handled by ext4_xattr_ibody_inline_set.
+ * value since it is already handled by ext4_xattr_ibody_set.
  * That saves us one memcpy.
  */
 static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
@@ -286,7 +286,7 @@ static int ext4_create_inline_data(handle_t *handle,
 
 	BUG_ON(!is.s.not_found);
 
-	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+	error = ext4_xattr_ibody_set(handle, inode, &i, &is);
 	if (error) {
 		if (error == -ENOSPC)
 			ext4_clear_inode_state(inode,
@@ -358,7 +358,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
 	i.value = value;
 	i.value_len = len;
 
-	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+	error = ext4_xattr_ibody_set(handle, inode, &i, &is);
 	if (error)
 		goto out;
 
@@ -431,7 +431,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
 	if (error)
 		goto out;
 
-	error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+	error = ext4_xattr_ibody_set(handle, inode, &i, &is);
 	if (error)
 		goto out;
 
@@ -1925,8 +1925,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
 			i.value = value;
 			i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
 					i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
-			err = ext4_xattr_ibody_inline_set(handle, inode,
-							  &i, &is);
+			err = ext4_xattr_ibody_set(handle, inode, &i, &is);
 			if (err)
 				goto out_error;
 		}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 10ba4b24a0aa..6dd5c05c444a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2190,31 +2190,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
 	return 0;
 }
 
-int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
-				struct ext4_xattr_info *i,
-				struct ext4_xattr_ibody_find *is)
-{
-	struct ext4_xattr_ibody_header *header;
-	struct ext4_xattr_search *s = &is->s;
-	int error;
-
-	if (EXT4_I(inode)->i_extra_isize == 0)
-		return -ENOSPC;
-	error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
-	if (error)
-		return error;
-	header = IHDR(inode, ext4_raw_inode(&is->iloc));
-	if (!IS_LAST_ENTRY(s->first)) {
-		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
-		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
-	} else {
-		header->h_magic = cpu_to_le32(0);
-		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
-	}
-	return 0;
-}
-
-static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
 				struct ext4_xattr_info *i,
 				struct ext4_xattr_ibody_find *is)
 {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 730b91fa0dd7..77efb9a627ad 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -186,9 +186,9 @@ extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
 extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
 				const char *name,
 				void *buffer, size_t buffer_size);
-extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
-				       struct ext4_xattr_info *i,
-				       struct ext4_xattr_ibody_find *is);
+extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+				struct ext4_xattr_info *i,
+				struct ext4_xattr_ibody_find *is);
 
 extern struct mb_cache *ext4_xattr_create_cache(void);
 extern void ext4_xattr_destroy_cache(struct mb_cache *);

From e9f9f61d0cdcb7f0b0b5feb2d84aa1c5894751f3 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Mon, 7 Jun 2021 12:15:08 -0700
Subject: [PATCH 2773/3292] ext4: consolidate checks for resize of bigalloc
 into ext4_resize_begin

Two different places checked for attempts to resize a filesystem with
the bigalloc feature. Move the check into ext4_resize_begin, which both
places already call.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: https://lore.kernel.org/r/bee03303d999225ecb3bfa5be8576b2f4c6edbe6.1623093259.git.josh@joshtriplett.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ioctl.c  | 14 --------------
 fs/ext4/resize.c |  5 +++++
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5730aeca563c..e27f34bceb8d 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -692,13 +692,6 @@ static long ext4_ioctl_group_add(struct file *file,
 	if (err)
 		return err;
 
-	if (ext4_has_feature_bigalloc(sb)) {
-		ext4_msg(sb, KERN_ERR,
-			 "Online resizing not supported with bigalloc");
-		err = -EOPNOTSUPP;
-		goto group_add_out;
-	}
-
 	err = mnt_want_write_file(file);
 	if (err)
 		goto group_add_out;
@@ -921,13 +914,6 @@ setversion_out:
 			goto group_extend_out;
 		}
 
-		if (ext4_has_feature_bigalloc(sb)) {
-			ext4_msg(sb, KERN_ERR,
-				 "Online resizing not supported with bigalloc");
-			err = -EOPNOTSUPP;
-			goto group_extend_out;
-		}
-
 		err = mnt_want_write_file(filp);
 		if (err)
 			goto group_extend_out;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bd0d185654f3..d13bb9e76482 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -74,6 +74,11 @@ int ext4_resize_begin(struct super_block *sb)
 		return -EPERM;
 	}
 
+	if (ext4_has_feature_bigalloc(sb)) {
+		ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc");
+		return -EOPNOTSUPP;
+	}
+
 	if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
 				  &EXT4_SB(sb)->s_ext4_flags))
 		ret = -EBUSY;

From b1489186cc8391e0c1e342f9fbc3eedf6b944c61 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Mon, 7 Jun 2021 12:15:24 -0700
Subject: [PATCH 2774/3292] ext4: add check to prevent attempting to resize an
 fs with sparse_super2

The in-kernel ext4 resize code doesn't support filesystem with the
sparse_super2 feature. It fails with errors like this and doesn't finish
the resize:
EXT4-fs (loop0): resizing filesystem from 16640 to 7864320 blocks
EXT4-fs warning (device loop0): verify_reserved_gdb:760: reserved GDT 2 missing grp 1 (32770)
EXT4-fs warning (device loop0): ext4_resize_fs:2111: error (-22) occurred during file system resize
EXT4-fs (loop0): resized filesystem to 2097152

To reproduce:
mkfs.ext4 -b 4096 -I 256 -J size=32 -E resize=$((256*1024*1024)) -O sparse_super2 ext4.img 65M
truncate -s 30G ext4.img
mount ext4.img /mnt
python3 -c 'import fcntl, os, struct ; fd = os.open("/mnt", os.O_RDONLY | os.O_DIRECTORY) ; fcntl.ioctl(fd, 0x40086610, struct.pack("Q", 30 * 1024 * 1024 * 1024 // 4096), False) ; os.close(fd)'
dmesg | tail
e2fsck ext4.img

The userspace resize2fs tool has a check for this case: it checks if the
filesystem has sparse_super2 set and if the kernel provides
/sys/fs/ext4/features/sparse_super2. However, the former check requires
manually reading and parsing the filesystem superblock.

Detect this case in ext4_resize_begin and error out early with a clear
error message.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: https://lore.kernel.org/r/74b8ae78405270211943cd7393e65586c5faeed1.1623093259.git.josh@joshtriplett.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/resize.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index d13bb9e76482..fc885914c88a 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -78,6 +78,10 @@ int ext4_resize_begin(struct super_block *sb)
 		ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc");
 		return -EOPNOTSUPP;
 	}
+	if (ext4_has_feature_sparse_super2(sb)) {
+		ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2");
+		return -EOPNOTSUPP;
+	}
 
 	if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
 				  &EXT4_SB(sb)->s_ext4_flags))

From d07621d9b9b8231187cc6e2121c927b3b8016789 Mon Sep 17 00:00:00 2001
From: yangerkun <yangerkun@huawei.com>
Date: Tue, 8 Jun 2021 22:12:36 +0800
Subject: [PATCH 2775/3292] jbd2: clean up misleading comments for
 jbd2_fc_release_bufs

This comments was for jbd2_fc_wait_bufs, not for jbd2_fc_release_bufs.
Remove this misleading comments.

Signed-off-by: yangerkun <yangerkun@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210608141236.459441-1-yangerkun@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 3a2ed60ea8b7..f88895b4920c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -934,10 +934,6 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
 }
 EXPORT_SYMBOL(jbd2_fc_wait_bufs);
 
-/*
- * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
- * for completion.
- */
 int jbd2_fc_release_bufs(journal_t *journal)
 {
 	struct buffer_head *bh;
@@ -945,10 +941,6 @@ int jbd2_fc_release_bufs(journal_t *journal)
 
 	j_fc_off = journal->j_fc_off;
 
-	/*
-	 * Wait in reverse order to minimize chances of us being woken up before
-	 * all IOs have completed
-	 */
 	for (i = j_fc_off - 1; i >= 0; i--) {
 		bh = journal->j_fc_wbuf[i];
 		if (!bh)

From 0caaefbaf2a429c256c7469cb603ca8918e96fb0 Mon Sep 17 00:00:00 2001
From: yangerkun <yangerkun@huawei.com>
Date: Wed, 9 Jun 2021 15:55:45 +0800
Subject: [PATCH 2776/3292] ext4: no need to verify new add extent block

ext4_ext_grow_indepth will add a new extent block which has init the
expected content. We can mark this buffer as verified so to stop a
useless check in __read_extent_tree_block.

Signed-off-by: yangerkun <yangerkun@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210609075545.1442160-1-yangerkun@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/extents.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1293de50c8d4..92ad64b89d9b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1309,6 +1309,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	neh->eh_magic = EXT4_EXT_MAGIC;
 	ext4_extent_block_csum_set(inode, neh);
 	set_buffer_uptodate(bh);
+	set_buffer_verified(bh);
 	unlock_buffer(bh);
 
 	err = ext4_handle_dirty_metadata(handle, inode, bh);

From 1866cba842437f3e7a5a8ee5b558744d9ae844d0 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:33 +0800
Subject: [PATCH 2777/3292] jbd2: remove the out label in
 __jbd2_journal_remove_checkpoint()

The 'out' lable just return the 'ret' value and seems not required, so
remove this label and switch to return appropriate value immediately.
This patch also do some minor cleanup, no logical change.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-2-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 63b526d44886..bf5511d19ac5 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -564,13 +564,13 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	struct transaction_chp_stats_s *stats;
 	transaction_t *transaction;
 	journal_t *journal;
-	int ret = 0;
 
 	JBUFFER_TRACE(jh, "entry");
 
-	if ((transaction = jh->b_cp_transaction) == NULL) {
+	transaction = jh->b_cp_transaction;
+	if (!transaction) {
 		JBUFFER_TRACE(jh, "not on transaction");
-		goto out;
+		return 0;
 	}
 	journal = transaction->t_journal;
 
@@ -579,9 +579,9 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	jh->b_cp_transaction = NULL;
 	jbd2_journal_put_journal_head(jh);
 
-	if (transaction->t_checkpoint_list != NULL ||
-	    transaction->t_checkpoint_io_list != NULL)
-		goto out;
+	/* Is this transaction empty? */
+	if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
+		return 0;
 
 	/*
 	 * There is one special case to worry about: if we have just pulled the
@@ -593,10 +593,12 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	 * See the comment at the end of jbd2_journal_commit_transaction().
 	 */
 	if (transaction->t_state != T_FINISHED)
-		goto out;
+		return 0;
 
-	/* OK, that was the last buffer for the transaction: we can now
-	   safely remove this transaction from the log */
+	/*
+	 * OK, that was the last buffer for the transaction, we can now
+	 * safely remove this transaction from the log.
+	 */
 	stats = &transaction->t_chp_stats;
 	if (stats->cs_chp_time)
 		stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
@@ -606,9 +608,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 
 	__jbd2_journal_drop_transaction(journal, transaction);
 	jbd2_journal_free_transaction(transaction);
-	ret = 1;
-out:
-	return ret;
+	return 1;
 }
 
 /*

From fcf37549ae19e904bc6a5eadf5c25eca36100c5e Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:34 +0800
Subject: [PATCH 2778/3292] jbd2: ensure abort the journal if detect IO error
 when writing original buffer back

Although we merged c044f3d8360 ("jbd2: abort journal if free a async
write error metadata buffer"), there is a race between
jbd2_journal_try_to_free_buffers() and jbd2_journal_destroy(), so the
jbd2_log_do_checkpoint() may still fail to detect the buffer write
io error flag which may lead to filesystem inconsistency.

jbd2_journal_try_to_free_buffers()     ext4_put_super()
                                        jbd2_journal_destroy()
  __jbd2_journal_remove_checkpoint()
  detect buffer write error              jbd2_log_do_checkpoint()
                                         jbd2_cleanup_journal_tail()
                                           <--- lead to inconsistency
  jbd2_journal_abort()

Fix this issue by introducing a new atomic flag which only have one
JBD2_CHECKPOINT_IO_ERROR bit now, and set it in
__jbd2_journal_remove_checkpoint() when freeing a checkpoint buffer
which has write_io_error flag. Then jbd2_journal_destroy() will detect
this mark and abort the journal to prevent updating log tail.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-3-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c | 12 ++++++++++++
 fs/jbd2/journal.c    | 14 ++++++++++++++
 include/linux/jbd2.h | 11 +++++++++++
 3 files changed, 37 insertions(+)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index bf5511d19ac5..d27c10f4502f 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -564,6 +564,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	struct transaction_chp_stats_s *stats;
 	transaction_t *transaction;
 	journal_t *journal;
+	struct buffer_head *bh = jh2bh(jh);
 
 	JBUFFER_TRACE(jh, "entry");
 
@@ -575,6 +576,17 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	journal = transaction->t_journal;
 
 	JBUFFER_TRACE(jh, "removing from transaction");
+
+	/*
+	 * If we have failed to write the buffer out to disk, the filesystem
+	 * may become inconsistent. We cannot abort the journal here since
+	 * we hold j_list_lock and we have to be careful about races with
+	 * jbd2_journal_destroy(). So mark the writeback IO error in the
+	 * journal here and we abort the journal later from a better context.
+	 */
+	if (buffer_write_io_error(bh))
+		set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags);
+
 	__buffer_unlink(jh);
 	jh->b_cp_transaction = NULL;
 	jbd2_journal_put_journal_head(jh);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f88895b4920c..8b3f5bbd65f9 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1610,6 +1610,10 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 
 	if (is_journal_aborted(journal))
 		return -EIO;
+	if (test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) {
+		jbd2_journal_abort(journal, -EIO);
+		return -EIO;
+	}
 
 	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
 	jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@@ -2091,6 +2095,16 @@ int jbd2_journal_destroy(journal_t *journal)
 	J_ASSERT(journal->j_checkpoint_transactions == NULL);
 	spin_unlock(&journal->j_list_lock);
 
+	/*
+	 * OK, all checkpoint transactions have been checked, now check the
+	 * write out io error flag and abort the journal if some buffer failed
+	 * to write back to the original location, otherwise the filesystem
+	 * may become inconsistent.
+	 */
+	if (!is_journal_aborted(journal) &&
+	    test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags))
+		jbd2_journal_abort(journal, -EIO);
+
 	if (journal->j_sb_buffer) {
 		if (!is_journal_aborted(journal)) {
 			mutex_lock_io(&journal->j_checkpoint_mutex);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8543233b0388..d5db408ae064 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -779,6 +779,11 @@ struct journal_s
 	 */
 	unsigned long		j_flags;
 
+	/**
+	 * @j_atomic_flags: Atomic journaling state flags.
+	 */
+	unsigned long		j_atomic_flags;
+
 	/**
 	 * @j_errno:
 	 *
@@ -1375,6 +1380,12 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit,	FAST_COMMIT)
 #define JBD2_JOURNAL_FLUSH_VALID	(JBD2_JOURNAL_FLUSH_DISCARD | \
 					JBD2_JOURNAL_FLUSH_ZEROOUT)
 
+/*
+ * Journal atomic flag definitions
+ */
+#define JBD2_CHECKPOINT_IO_ERROR	0x001	/* Detect io error while writing
+						 * buffer back to disk */
+
 /*
  * Function declarations for the journaling transaction and buffer
  * management

From 235d68069cbd158cb00835d434e9e9accf9a6dd4 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:35 +0800
Subject: [PATCH 2779/3292] jbd2: don't abort the journal when freeing buffers

Now that we can be sure the journal is aborted once a buffer has failed
to be written back to disk, we can remove the journal abort logic in
jbd2_journal_try_to_free_buffers() which was introduced in
commit c044f3d8360d ("jbd2: abort journal if free a async write error
metadata buffer"), because it may cost and propably is not safe.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-4-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e8fc45fd751f..8804e126805f 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2123,7 +2123,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
 {
 	struct buffer_head *head;
 	struct buffer_head *bh;
-	bool has_write_io_error = false;
 	int ret = 0;
 
 	J_ASSERT(PageLocked(page));
@@ -2148,26 +2147,10 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
 		jbd2_journal_put_journal_head(jh);
 		if (buffer_jbd(bh))
 			goto busy;
-
-		/*
-		 * If we free a metadata buffer which has been failed to
-		 * write out, the jbd2 checkpoint procedure will not detect
-		 * this failure and may lead to filesystem inconsistency
-		 * after cleanup journal tail.
-		 */
-		if (buffer_write_io_error(bh)) {
-			pr_err("JBD2: Error while async write back metadata bh %llu.",
-			       (unsigned long long)bh->b_blocknr);
-			has_write_io_error = true;
-		}
 	} while ((bh = bh->b_this_page) != head);
 
 	ret = try_to_free_buffers(page);
-
 busy:
-	if (has_write_io_error)
-		jbd2_journal_abort(journal, -EIO);
-
 	return ret;
 }
 

From 214eb5a4d8a2032fb9f0711d1b202eb88ee02920 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:36 +0800
Subject: [PATCH 2780/3292] jbd2: remove redundant buffer io error checks

Now that __jbd2_journal_remove_checkpoint() can detect buffer io error
and mark journal checkpoint error, then we abort the journal later
before updating log tail to ensure the filesystem works consistently.
So we could remove other redundant buffer io error checkes.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-5-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index d27c10f4502f..75a4f622afaf 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -91,8 +91,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 	int ret = 0;
 	struct buffer_head *bh = jh2bh(jh);
 
-	if (jh->b_transaction == NULL && !buffer_locked(bh) &&
-	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+	if (!jh->b_transaction && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
 		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 	}
@@ -228,7 +227,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
 	 * OK, we need to start writing disk blocks.  Take one transaction
 	 * and write it.
 	 */
-	result = 0;
 	spin_lock(&journal->j_list_lock);
 	if (!journal->j_checkpoint_transactions)
 		goto out;
@@ -295,8 +293,6 @@ restart:
 			goto restart;
 		}
 		if (!buffer_dirty(bh)) {
-			if (unlikely(buffer_write_io_error(bh)) && !result)
-				result = -EIO;
 			BUFFER_TRACE(bh, "remove from checkpoint");
 			if (__jbd2_journal_remove_checkpoint(jh))
 				/* The transaction was released; we're done */
@@ -356,8 +352,6 @@ restart2:
 			spin_lock(&journal->j_list_lock);
 			goto restart2;
 		}
-		if (unlikely(buffer_write_io_error(bh)) && !result)
-			result = -EIO;
 
 		/*
 		 * Now in whatever state the buffer currently is, we
@@ -369,10 +363,7 @@ restart2:
 	}
 out:
 	spin_unlock(&journal->j_list_lock);
-	if (result < 0)
-		jbd2_journal_abort(journal, result);
-	else
-		result = jbd2_cleanup_journal_tail(journal);
+	result = jbd2_cleanup_journal_tail(journal);
 
 	return (result < 0) ? result : 0;
 }

From 4ba3fcdde7e36af93610ceb3cc38365b14539865 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:37 +0800
Subject: [PATCH 2781/3292] jbd2,ext4: add a shrinker to release checkpointed
 buffers

Current metadata buffer release logic in bdev_try_to_free_page() have
a lot of use-after-free issues when umount filesystem concurrently, and
it is difficult to fix directly because ext4 is the only user of
s_op->bdev_try_to_free_page callback and we may have to add more special
refcount or lock that is only used by ext4 into the common vfs layer,
which is unacceptable.

One better solution is remove the bdev_try_to_free_page callback, but
the real problem is we cannot easily release journal_head on the
checkpointed buffer, so try_to_free_buffers() cannot release buffers and
page under memory pressure, which is more likely to trigger
out-of-memory. So we cannot remove the callback directly before we find
another way to release journal_head.

This patch introduce a shrinker to free journal_head on the checkpointed
transaction. After the journal_head got freed, try_to_free_buffers()
could free buffer properly.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-6-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c             |   8 ++
 fs/jbd2/checkpoint.c        | 147 ++++++++++++++++++++++++++++++++++++
 fs/jbd2/journal.c           |  87 +++++++++++++++++++++
 include/linux/jbd2.h        |  26 +++++++
 include/trace/events/jbd2.h | 101 +++++++++++++++++++++++++
 5 files changed, 369 insertions(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ad3919dbd49e..7ee2e21537e0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1174,6 +1174,7 @@ static void ext4_put_super(struct super_block *sb)
 	ext4_unregister_sysfs(sb);
 
 	if (sbi->s_journal) {
+		jbd2_journal_unregister_shrinker(sbi->s_journal);
 		aborted = is_journal_aborted(sbi->s_journal);
 		err = jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
@@ -5186,6 +5187,7 @@ failed_mount_wq:
 	sbi->s_ea_block_cache = NULL;
 
 	if (sbi->s_journal) {
+		jbd2_journal_unregister_shrinker(sbi->s_journal);
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
 	}
@@ -5511,6 +5513,12 @@ static int ext4_load_journal(struct super_block *sb,
 		ext4_commit_super(sb);
 	}
 
+	err = jbd2_journal_register_shrinker(journal);
+	if (err) {
+		EXT4_SB(sb)->s_journal = NULL;
+		goto err_out;
+	}
+
 	return 0;
 
 err_out:
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 75a4f622afaf..1abdae44a3d8 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -79,6 +79,18 @@ static inline void __buffer_relink_io(struct journal_head *jh)
 	transaction->t_checkpoint_io_list = jh;
 }
 
+/*
+ * Check a checkpoint buffer could be release or not.
+ *
+ * Requires j_list_lock
+ */
+static inline bool __cp_buffer_busy(struct journal_head *jh)
+{
+	struct buffer_head *bh = jh2bh(jh);
+
+	return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
+}
+
 /*
  * Try to release a checkpointed buffer from its transaction.
  * Returns 1 if we released it and 2 if we also released the
@@ -458,6 +470,137 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 	return 0;
 }
 
+/*
+ * journal_shrink_one_cp_list
+ *
+ * Find 'nr_to_scan' written-back checkpoint buffers in the given list
+ * and try to release them. If the whole transaction is released, set
+ * the 'released' parameter. Return the number of released checkpointed
+ * buffers.
+ *
+ * Called with j_list_lock held.
+ */
+static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
+						unsigned long *nr_to_scan,
+						bool *released)
+{
+	struct journal_head *last_jh;
+	struct journal_head *next_jh = jh;
+	unsigned long nr_freed = 0;
+	int ret;
+
+	if (!jh || *nr_to_scan == 0)
+		return 0;
+
+	last_jh = jh->b_cpprev;
+	do {
+		jh = next_jh;
+		next_jh = jh->b_cpnext;
+
+		(*nr_to_scan)--;
+		if (__cp_buffer_busy(jh))
+			continue;
+
+		nr_freed++;
+		ret = __jbd2_journal_remove_checkpoint(jh);
+		if (ret) {
+			*released = true;
+			break;
+		}
+
+		if (need_resched())
+			break;
+	} while (jh != last_jh && *nr_to_scan);
+
+	return nr_freed;
+}
+
+/*
+ * jbd2_journal_shrink_checkpoint_list
+ *
+ * Find 'nr_to_scan' written-back checkpoint buffers in the journal
+ * and try to release them. Return the number of released checkpointed
+ * buffers.
+ *
+ * Called with j_list_lock held.
+ */
+unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
+						  unsigned long *nr_to_scan)
+{
+	transaction_t *transaction, *last_transaction, *next_transaction;
+	bool released;
+	tid_t first_tid = 0, last_tid = 0, next_tid = 0;
+	tid_t tid = 0;
+	unsigned long nr_freed = 0;
+	unsigned long nr_scanned = *nr_to_scan;
+
+again:
+	spin_lock(&journal->j_list_lock);
+	if (!journal->j_checkpoint_transactions) {
+		spin_unlock(&journal->j_list_lock);
+		goto out;
+	}
+
+	/*
+	 * Get next shrink transaction, resume previous scan or start
+	 * over again. If some others do checkpoint and drop transaction
+	 * from the checkpoint list, we ignore saved j_shrink_transaction
+	 * and start over unconditionally.
+	 */
+	if (journal->j_shrink_transaction)
+		transaction = journal->j_shrink_transaction;
+	else
+		transaction = journal->j_checkpoint_transactions;
+
+	if (!first_tid)
+		first_tid = transaction->t_tid;
+	last_transaction = journal->j_checkpoint_transactions->t_cpprev;
+	next_transaction = transaction;
+	last_tid = last_transaction->t_tid;
+	do {
+		transaction = next_transaction;
+		next_transaction = transaction->t_cpnext;
+		tid = transaction->t_tid;
+		released = false;
+
+		nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+						       nr_to_scan, &released);
+		if (*nr_to_scan == 0)
+			break;
+		if (need_resched() || spin_needbreak(&journal->j_list_lock))
+			break;
+		if (released)
+			continue;
+
+		nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
+						       nr_to_scan, &released);
+		if (*nr_to_scan == 0)
+			break;
+		if (need_resched() || spin_needbreak(&journal->j_list_lock))
+			break;
+	} while (transaction != last_transaction);
+
+	if (transaction != last_transaction) {
+		journal->j_shrink_transaction = next_transaction;
+		next_tid = next_transaction->t_tid;
+	} else {
+		journal->j_shrink_transaction = NULL;
+		next_tid = 0;
+	}
+
+	spin_unlock(&journal->j_list_lock);
+	cond_resched();
+
+	if (*nr_to_scan && next_tid)
+		goto again;
+out:
+	nr_scanned -= *nr_to_scan;
+	trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
+					  nr_freed, nr_scanned, next_tid);
+
+	return nr_freed;
+}
+
 /*
  * journal_clean_checkpoint_list
  *
@@ -580,6 +723,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 
 	__buffer_unlink(jh);
 	jh->b_cp_transaction = NULL;
+	percpu_counter_dec(&journal->j_jh_shrink_count);
 	jbd2_journal_put_journal_head(jh);
 
 	/* Is this transaction empty? */
@@ -642,6 +786,7 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 		jh->b_cpnext->b_cpprev = jh;
 	}
 	transaction->t_checkpoint_list = jh;
+	percpu_counter_inc(&transaction->t_journal->j_jh_shrink_count);
 }
 
 /*
@@ -657,6 +802,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 {
 	assert_spin_locked(&journal->j_list_lock);
+
+	journal->j_shrink_transaction = NULL;
 	if (transaction->t_cpnext) {
 		transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
 		transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8b3f5bbd65f9..7c52feb6f753 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2050,6 +2050,91 @@ recovery_error:
 	return -EIO;
 }
 
+/**
+ * jbd2_journal_shrink_scan()
+ *
+ * Scan the checkpointed buffer on the checkpoint list and release the
+ * journal_head.
+ */
+static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
+					      struct shrink_control *sc)
+{
+	journal_t *journal = container_of(shrink, journal_t, j_shrinker);
+	unsigned long nr_to_scan = sc->nr_to_scan;
+	unsigned long nr_shrunk;
+	unsigned long count;
+
+	count = percpu_counter_read_positive(&journal->j_jh_shrink_count);
+	trace_jbd2_shrink_scan_enter(journal, sc->nr_to_scan, count);
+
+	nr_shrunk = jbd2_journal_shrink_checkpoint_list(journal, &nr_to_scan);
+
+	count = percpu_counter_read_positive(&journal->j_jh_shrink_count);
+	trace_jbd2_shrink_scan_exit(journal, nr_to_scan, nr_shrunk, count);
+
+	return nr_shrunk;
+}
+
+/**
+ * jbd2_journal_shrink_count()
+ *
+ * Count the number of checkpoint buffers on the checkpoint list.
+ */
+static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
+					       struct shrink_control *sc)
+{
+	journal_t *journal = container_of(shrink, journal_t, j_shrinker);
+	unsigned long count;
+
+	count = percpu_counter_read_positive(&journal->j_jh_shrink_count);
+	trace_jbd2_shrink_count(journal, sc->nr_to_scan, count);
+
+	return count;
+}
+
+/**
+ * jbd2_journal_register_shrinker()
+ * @journal: Journal to act on.
+ *
+ * Init a percpu counter to record the checkpointed buffers on the checkpoint
+ * list and register a shrinker to release their journal_head.
+ */
+int jbd2_journal_register_shrinker(journal_t *journal)
+{
+	int err;
+
+	journal->j_shrink_transaction = NULL;
+
+	err = percpu_counter_init(&journal->j_jh_shrink_count, 0, GFP_KERNEL);
+	if (err)
+		return err;
+
+	journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
+	journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
+	journal->j_shrinker.seeks = DEFAULT_SEEKS;
+	journal->j_shrinker.batch = journal->j_max_transaction_buffers;
+
+	err = register_shrinker(&journal->j_shrinker);
+	if (err) {
+		percpu_counter_destroy(&journal->j_jh_shrink_count);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * jbd2_journal_unregister_shrinker()
+ * @journal: Journal to act on.
+ *
+ * Unregister the checkpointed buffer shrinker and destroy the percpu counter.
+ */
+void jbd2_journal_unregister_shrinker(journal_t *journal)
+{
+	percpu_counter_destroy(&journal->j_jh_shrink_count);
+	unregister_shrinker(&journal->j_shrinker);
+}
+
 /**
  * jbd2_journal_destroy() - Release a journal_t structure.
  * @journal: Journal to act on.
@@ -2122,6 +2207,8 @@ int jbd2_journal_destroy(journal_t *journal)
 		brelse(journal->j_sb_buffer);
 	}
 
+	jbd2_journal_unregister_shrinker(journal);
+
 	if (journal->j_proc_entry)
 		jbd2_stats_proc_exit(journal);
 	iput(journal->j_inode);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d5db408ae064..6cc035321562 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -909,6 +909,29 @@ struct journal_s
 	 */
 	struct buffer_head	*j_chkpt_bhs[JBD2_NR_BATCH];
 
+	/**
+	 * @j_shrinker:
+	 *
+	 * Journal head shrinker, reclaim buffer's journal head which
+	 * has been written back.
+	 */
+	struct shrinker		j_shrinker;
+
+	/**
+	 * @j_jh_shrink_count:
+	 *
+	 * Number of journal buffers on the checkpoint list. [j_list_lock]
+	 */
+	struct percpu_counter	j_jh_shrink_count;
+
+	/**
+	 * @j_shrink_transaction:
+	 *
+	 * Record next transaction will shrink on the checkpoint list.
+	 * [j_list_lock]
+	 */
+	transaction_t		*j_shrink_transaction;
+
 	/**
 	 * @j_head:
 	 *
@@ -1422,6 +1445,7 @@ extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
+unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
 void jbd2_journal_destroy_checkpoint(journal_t *journal);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
@@ -1532,6 +1556,8 @@ extern int	   jbd2_journal_set_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
 extern void	   jbd2_journal_clear_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int	   jbd2_journal_register_shrinker(journal_t *journal);
+extern void	   jbd2_journal_unregister_shrinker(journal_t *journal);
 extern int	   jbd2_journal_load       (journal_t *journal);
 extern int	   jbd2_journal_destroy    (journal_t *);
 extern int	   jbd2_journal_recover    (journal_t *journal);
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index d16a32867f3a..a4dfe005983d 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -394,6 +394,107 @@ TRACE_EVENT(jbd2_lock_buffer_stall,
 		__entry->stall_ms)
 );
 
+DECLARE_EVENT_CLASS(jbd2_journal_shrink,
+
+	TP_PROTO(journal_t *journal, unsigned long nr_to_scan,
+		 unsigned long count),
+
+	TP_ARGS(journal, nr_to_scan, count),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long, nr_to_scan)
+		__field(unsigned long, count)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->nr_to_scan	= nr_to_scan;
+		__entry->count		= count;
+	),
+
+	TP_printk("dev %d,%d nr_to_scan %lu count %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->nr_to_scan, __entry->count)
+);
+
+DEFINE_EVENT(jbd2_journal_shrink, jbd2_shrink_count,
+
+	TP_PROTO(journal_t *journal, unsigned long nr_to_scan, unsigned long count),
+
+	TP_ARGS(journal, nr_to_scan, count)
+);
+
+DEFINE_EVENT(jbd2_journal_shrink, jbd2_shrink_scan_enter,
+
+	TP_PROTO(journal_t *journal, unsigned long nr_to_scan, unsigned long count),
+
+	TP_ARGS(journal, nr_to_scan, count)
+);
+
+TRACE_EVENT(jbd2_shrink_scan_exit,
+
+	TP_PROTO(journal_t *journal, unsigned long nr_to_scan,
+		 unsigned long nr_shrunk, unsigned long count),
+
+	TP_ARGS(journal, nr_to_scan, nr_shrunk, count),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long, nr_to_scan)
+		__field(unsigned long, nr_shrunk)
+		__field(unsigned long, count)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->nr_to_scan	= nr_to_scan;
+		__entry->nr_shrunk	= nr_shrunk;
+		__entry->count		= count;
+	),
+
+	TP_printk("dev %d,%d nr_to_scan %lu nr_shrunk %lu count %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->nr_to_scan, __entry->nr_shrunk,
+		  __entry->count)
+);
+
+TRACE_EVENT(jbd2_shrink_checkpoint_list,
+
+	TP_PROTO(journal_t *journal, tid_t first_tid, tid_t tid, tid_t last_tid,
+		 unsigned long nr_freed, unsigned long nr_scanned,
+		 tid_t next_tid),
+
+	TP_ARGS(journal, first_tid, tid, last_tid, nr_freed,
+		nr_scanned, next_tid),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(tid_t, first_tid)
+		__field(tid_t, tid)
+		__field(tid_t, last_tid)
+		__field(unsigned long, nr_freed)
+		__field(unsigned long, nr_scanned)
+		__field(tid_t, next_tid)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->first_tid	= first_tid;
+		__entry->tid		= tid;
+		__entry->last_tid	= last_tid;
+		__entry->nr_freed	= nr_freed;
+		__entry->nr_scanned	= nr_scanned;
+		__entry->next_tid	= next_tid;
+	),
+
+	TP_printk("dev %d,%d shrink transaction %u-%u(%u) freed %lu "
+		  "scanned %lu next transaction %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->first_tid, __entry->tid, __entry->last_tid,
+		  __entry->nr_freed, __entry->nr_scanned, __entry->next_tid)
+);
+
 #endif /* _TRACE_JBD2_H */
 
 /* This part must be outside protection */

From dbf2bab7935b65689f3b39178cf87374f0334ead Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:38 +0800
Subject: [PATCH 2782/3292] jbd2: simplify journal_clean_one_cp_list()

Now that __try_to_free_cp_buf() remove checkpointed buffer or transaction
when the buffer is not 'busy', which is only called by
journal_clean_one_cp_list(). This patch simplify this function by remove
__try_to_free_cp_buf() and invoke __cp_buffer_busy() directly.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-7-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1abdae44a3d8..51d1eb2ffeb9 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -91,25 +91,6 @@ static inline bool __cp_buffer_busy(struct journal_head *jh)
 	return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
 }
 
-/*
- * Try to release a checkpointed buffer from its transaction.
- * Returns 1 if we released it and 2 if we also released the
- * whole transaction.
- *
- * Requires j_list_lock
- */
-static int __try_to_free_cp_buf(struct journal_head *jh)
-{
-	int ret = 0;
-	struct buffer_head *bh = jh2bh(jh);
-
-	if (!jh->b_transaction && !buffer_locked(bh) && !buffer_dirty(bh)) {
-		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
-	}
-	return ret;
-}
-
 /*
  * __jbd2_log_wait_for_space: wait until there is space in the journal.
  *
@@ -440,7 +421,6 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 {
 	struct journal_head *last_jh;
 	struct journal_head *next_jh = jh;
-	int ret;
 
 	if (!jh)
 		return 0;
@@ -449,13 +429,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 	do {
 		jh = next_jh;
 		next_jh = jh->b_cpnext;
-		if (!destroy)
-			ret = __try_to_free_cp_buf(jh);
-		else
-			ret = __jbd2_journal_remove_checkpoint(jh) + 1;
-		if (!ret)
+
+		if (!destroy && __cp_buffer_busy(jh))
 			return 0;
-		if (ret == 2)
+
+		if (__jbd2_journal_remove_checkpoint(jh))
 			return 1;
 		/*
 		 * This function only frees up some memory

From 3b672e3aedffc9f092e7e7eae0050a97a8ca508e Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:39 +0800
Subject: [PATCH 2783/3292] ext4: remove bdev_try_to_free_page() callback

After we introduce a jbd2 shrinker to release checkpointed buffer's
journal head, we could free buffer without bdev_try_to_free_page()
under memory pressure. So this patch remove the whole
bdev_try_to_free_page() callback directly. It also remove many
use-after-free issues relate to it together.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-8-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7ee2e21537e0..9e0fb798b807 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1442,26 +1442,6 @@ static int ext4_nfs_commit_metadata(struct inode *inode)
 	return ext4_write_inode(inode, &wbc);
 }
 
-/*
- * Try to release metadata pages (indirect blocks, directories) which are
- * mapped via the block device.  Since these pages could have journal heads
- * which would prevent try_to_free_buffers() from freeing them, we must use
- * jbd2 layer's try_to_free_buffers() function to release them.
- */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
-				 gfp_t wait)
-{
-	journal_t *journal = EXT4_SB(sb)->s_journal;
-
-	WARN_ON(PageChecked(page));
-	if (!page_has_buffers(page))
-		return 0;
-	if (journal)
-		return jbd2_journal_try_to_free_buffers(journal, page);
-
-	return try_to_free_buffers(page);
-}
-
 #ifdef CONFIG_FS_ENCRYPTION
 static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
 {
@@ -1656,7 +1636,6 @@ static const struct super_operations ext4_sops = {
 	.quota_write	= ext4_quota_write,
 	.get_dquots	= ext4_get_dquots,
 #endif
-	.bdev_try_to_free_page = bdev_try_to_free_page,
 };
 
 static const struct export_operations ext4_export_ops = {

From acc6100d3ffa24bdd2add8ea85fb66811bcce5d4 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Thu, 10 Jun 2021 19:24:40 +0800
Subject: [PATCH 2784/3292] fs: remove bdev_try_to_free_page callback

After remove the unique user of sop->bdev_try_to_free_page() callback,
we could remove the callback and the corresponding blkdev_releasepage()
at all.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210610112440.3438139-9-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/block_dev.c     | 15 ---------------
 include/linux/fs.h |  1 -
 2 files changed, 16 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6cc4d4cfe0c2..e215da6d49b4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1733,20 +1733,6 @@ ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 }
 EXPORT_SYMBOL_GPL(blkdev_read_iter);
 
-/*
- * Try to release a page associated with block device when the system
- * is under memory pressure.
- */
-static int blkdev_releasepage(struct page *page, gfp_t wait)
-{
-	struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
-
-	if (super && super->s_op->bdev_try_to_free_page)
-		return super->s_op->bdev_try_to_free_page(super, page, wait);
-
-	return try_to_free_buffers(page);
-}
-
 static int blkdev_writepages(struct address_space *mapping,
 			     struct writeback_control *wbc)
 {
@@ -1760,7 +1746,6 @@ static const struct address_space_operations def_blk_aops = {
 	.write_begin	= blkdev_write_begin,
 	.write_end	= blkdev_write_end,
 	.writepages	= blkdev_writepages,
-	.releasepage	= blkdev_releasepage,
 	.direct_IO	= blkdev_direct_IO,
 	.migratepage	= buffer_migrate_page_norefs,
 	.is_dirty_writeback = buffer_check_dirty_writeback,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..c3277b445f96 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2171,7 +2171,6 @@ struct super_operations {
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 	struct dquot **(*get_dquots)(struct inode *);
 #endif
-	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
 	long (*nr_cached_objects)(struct super_block *,
 				  struct shrink_control *);
 	long (*free_cached_objects)(struct super_block *,

From 60b6a7e6a0f4382cd689f9afdac816964fec2921 Mon Sep 17 00:00:00 2001
From: Edward Hsieh <edwardh@synology.com>
Date: Thu, 24 Jun 2021 20:30:30 +0800
Subject: [PATCH 2785/3292] block: fix trace completion for chained bio

For chained bio, trace_block_bio_complete in bio_endio is currently called
only by the parent bio once upon all chained bio completed.
However, the sector and size for the parent bio are modified in bio_split.
Therefore, the size and sector of the complete events might not match the
queue events in blktrace.

The original fix of bio completion trace <fbbaf700e7b1> ("block: trace
completion of all bios.") wants multiple complete events to correspond
to one queue event but missed this.

The issue can be reproduced by md/raid5 read with bio cross chunks.

To fix, move trace completion into the loop for every chained bio to call.

Fixes: fbbaf700e7b1 ("block: trace completion of all bios.")
Reviewed-by: Wade Liang <wadel@synology.com>
Reviewed-by: BingJing Chang <bingjingc@synology.com>
Signed-off-by: Edward Hsieh <edwardh@synology.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210624123030.27014-1-edwardh@synology.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 44205dfb6b60..1fab762e079b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1375,8 +1375,7 @@ static inline bool bio_remaining_done(struct bio *bio)
  *
  *   bio_endio() can be called several times on a bio that has been chained
  *   using bio_chain().  The ->bi_end_io() function will only be called the
- *   last time.  At this point the BLK_TA_COMPLETE tracing event will be
- *   generated if BIO_TRACE_COMPLETION is set.
+ *   last time.
  **/
 void bio_endio(struct bio *bio)
 {
@@ -1389,6 +1388,11 @@ again:
 	if (bio->bi_bdev)
 		rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
 
+	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+		trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
+		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
+	}
+
 	/*
 	 * Need to have a real endio function for chained bios, otherwise
 	 * various corner cases will break (like stacking block devices that
@@ -1402,11 +1406,6 @@ again:
 		goto again;
 	}
 
-	if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-		trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
-		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
-	}
-
 	blk_throtl_bio_endio(bio);
 	/* release cgroup info */
 	bio_uninit(bio);

From 1a3ac5c651a0c859bdea64ed964fc93c2ba980d3 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Sat, 17 Apr 2021 15:54:28 +0800
Subject: [PATCH 2786/3292] brcmfmac: support parse country code map from DT

With any regulatory domain requests coming from either user space or
802.11 IE (Information Element), the country is coded in ISO3166
standard.  It needs to be translated to firmware country code and
revision with the mapping info in settings->country_codes table.
Support populate country_codes table by parsing the mapping from DT.

The BRCMF_BUSTYPE_SDIO bus_type check gets separated from general DT
validation, so that country code can be handled as general part rather
than SDIO bus specific one.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210417075428.2671-1-shawn.guo@linaro.org
---
 .../wireless/broadcom/brcm80211/brcmfmac/of.c | 57 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
index a7554265f95f..2f7bc3a70c65 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
@@ -12,12 +12,59 @@
 #include "common.h"
 #include "of.h"
 
+static int brcmf_of_get_country_codes(struct device *dev,
+				      struct brcmf_mp_device *settings)
+{
+	struct device_node *np = dev->of_node;
+	struct brcmfmac_pd_cc_entry *cce;
+	struct brcmfmac_pd_cc *cc;
+	int count;
+	int i;
+
+	count = of_property_count_strings(np, "brcm,ccode-map");
+	if (count < 0) {
+		/* The property is optional, so return success if it doesn't
+		 * exist. Otherwise propagate the error code.
+		 */
+		return (count == -EINVAL) ? 0 : count;
+	}
+
+	cc = devm_kzalloc(dev, sizeof(*cc) + count * sizeof(*cce), GFP_KERNEL);
+	if (!cc)
+		return -ENOMEM;
+
+	cc->table_size = count;
+
+	for (i = 0; i < count; i++) {
+		const char *map;
+
+		cce = &cc->table[i];
+
+		if (of_property_read_string_index(np, "brcm,ccode-map",
+						  i, &map))
+			continue;
+
+		/* String format e.g. US-Q2-86 */
+		if (sscanf(map, "%2c-%2c-%d", cce->iso3166, cce->cc,
+			   &cce->rev) != 3)
+			brcmf_err("failed to read country map %s\n", map);
+		else
+			brcmf_dbg(INFO, "%s-%s-%d\n", cce->iso3166, cce->cc,
+				  cce->rev);
+	}
+
+	settings->country_codes = cc;
+
+	return 0;
+}
+
 void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
 		    struct brcmf_mp_device *settings)
 {
 	struct brcmfmac_sdio_pd *sdio = &settings->bus.sdio;
 	struct device_node *root, *np = dev->of_node;
 	int irq;
+	int err;
 	u32 irqf;
 	u32 val;
 
@@ -43,8 +90,14 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
 		of_node_put(root);
 	}
 
-	if (!np || bus_type != BRCMF_BUSTYPE_SDIO ||
-	    !of_device_is_compatible(np, "brcm,bcm4329-fmac"))
+	if (!np || !of_device_is_compatible(np, "brcm,bcm4329-fmac"))
+		return;
+
+	err = brcmf_of_get_country_codes(dev, settings);
+	if (err)
+		brcmf_err("failed to get OF country code map (err=%d)\n", err);
+
+	if (bus_type != BRCMF_BUSTYPE_SDIO)
 		return;
 
 	if (of_property_read_u32(np, "brcm,drive-strength", &val) == 0)

From 1d8820d5462dcdd34f3eb7ef4893536c439e476d Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Thu, 24 Jun 2021 10:34:59 +0800
Subject: [PATCH 2787/3292] rtw88: fix c2h memory leak

Fix erroneous code that leads to unreferenced objects. During H2C
operations, some functions returned without freeing the memory that only
the function have access to. Release these objects when they're no longer
needed to avoid potentially memory leaks.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210624023459.10294-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/coex.c | 11 ++++++++++-
 drivers/net/wireless/realtek/rtw88/fw.c   |  2 ++
 drivers/net/wireless/realtek/rtw88/main.c |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c
index 103e87745be6..2551e228b581 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.c
+++ b/drivers/net/wireless/realtek/rtw88/coex.c
@@ -591,8 +591,10 @@ void rtw_coex_info_response(struct rtw_dev *rtwdev, struct sk_buff *skb)
 	struct rtw_coex *coex = &rtwdev->coex;
 	u8 *payload = get_payload_from_coex_resp(skb);
 
-	if (payload[0] != COEX_RESP_ACK_BY_WL_FW)
+	if (payload[0] != COEX_RESP_ACK_BY_WL_FW) {
+		dev_kfree_skb_any(skb);
 		return;
+	}
 
 	skb_queue_tail(&coex->queue, skb);
 	wake_up(&coex->wait);
@@ -3515,6 +3517,7 @@ static bool rtw_coex_get_bt_reg(struct rtw_dev *rtwdev,
 
 	payload = get_payload_from_coex_resp(skb);
 	*val = GET_COEX_RESP_BT_REG_VAL(payload);
+	dev_kfree_skb_any(skb);
 
 	return true;
 }
@@ -3533,6 +3536,8 @@ static bool rtw_coex_get_bt_patch_version(struct rtw_dev *rtwdev,
 
 	payload = get_payload_from_coex_resp(skb);
 	*patch_version = GET_COEX_RESP_BT_PATCH_VER(payload);
+	dev_kfree_skb_any(skb);
+
 	return true;
 }
 
@@ -3550,6 +3555,8 @@ static bool rtw_coex_get_bt_supported_version(struct rtw_dev *rtwdev,
 
 	payload = get_payload_from_coex_resp(skb);
 	*supported_version = GET_COEX_RESP_BT_SUPP_VER(payload);
+	dev_kfree_skb_any(skb);
+
 	return true;
 }
 
@@ -3567,6 +3574,8 @@ static bool rtw_coex_get_bt_supported_feature(struct rtw_dev *rtwdev,
 
 	payload = get_payload_from_coex_resp(skb);
 	*supported_feature = GET_COEX_RESP_BT_SUPP_FEAT(payload);
+	dev_kfree_skb_any(skb);
+
 	return true;
 }
 
diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 176e8b67530e..3bfa5ecc0053 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -245,10 +245,12 @@ void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset,
 		break;
 	case C2H_WLAN_RFON:
 		complete(&rtwdev->lps_leave_check);
+		dev_kfree_skb_any(skb);
 		break;
 	case C2H_SCAN_RESULT:
 		complete(&rtwdev->fw_scan_density);
 		rtw_fw_scan_result(rtwdev, c2h->payload, len);
+		dev_kfree_skb_any(skb);
 		break;
 	default:
 		/* pass offset for further operation */
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 4a9a8544e8ca..c6364837e83b 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1899,6 +1899,7 @@ void rtw_core_deinit(struct rtw_dev *rtwdev)
 	destroy_workqueue(rtwdev->tx_wq);
 	spin_lock_irqsave(&rtwdev->tx_report.q_lock, flags);
 	skb_queue_purge(&rtwdev->tx_report.queue);
+	skb_queue_purge(&rtwdev->coex.queue);
 	spin_unlock_irqrestore(&rtwdev->tx_report.q_lock, flags);
 
 	list_for_each_entry_safe(rsvd_pkt, tmp, &rtwdev->rsvd_page_list,

From c2a3823dad4988943c0b0f61af9336301e30d4e5 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Thu, 24 Jun 2021 08:29:18 +0300
Subject: [PATCH 2788/3292] iwlwifi: acpi: remove unused function
 iwl_acpi_eval_dsm_func()

Stephen reported a warning:

drivers/net/wireless/intel/iwlwifi/fw/acpi.c:720:12: warning: 'iwl_acpi_eval_dsm_func' defined but not used [-Wunused-function]

The warning is correct and the function is not used anywhere, so let's
just remove it.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 7119f02b5d34 ("iwlwifi: mvm: support BIOS enable/disable for 11ax in Russia")
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Acked-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210624052918.4946-1-kvalo@codeaurora.org
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 36 --------------------
 1 file changed, 36 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 8cf7bc3aa09a..34933f133a0a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -717,42 +717,6 @@ int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
 }
 IWL_EXPORT_SYMBOL(iwl_sar_geo_init);
 
-static u32 iwl_acpi_eval_dsm_func(struct device *dev, enum iwl_dsm_funcs_rev_0 eval_func)
-{
-	union acpi_object *obj;
-	u32 ret;
-
-	obj = iwl_acpi_get_dsm_object(dev, 0,
-				      eval_func, NULL,
-				      &iwl_guid);
-
-	if (IS_ERR(obj)) {
-		IWL_DEBUG_DEV_RADIO(dev,
-				    "ACPI: DSM func '%d': Got Error in obj = %ld\n",
-				    eval_func,
-				    PTR_ERR(obj));
-		return 0;
-	}
-
-	if (obj->type != ACPI_TYPE_INTEGER) {
-		IWL_DEBUG_DEV_RADIO(dev,
-				    "ACPI: DSM func '%d' did not return a valid object, type=%d\n",
-				    eval_func,
-				    obj->type);
-		ret = 0;
-		goto out;
-	}
-
-	ret = obj->integer.value;
-	IWL_DEBUG_DEV_RADIO(dev,
-			    "ACPI: DSM method evaluated: func='%d', ret=%d\n",
-			    eval_func,
-			    ret);
-out:
-	ACPI_FREE(obj);
-	return ret;
-}
-
 __le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
 {
 	int ret;

From 9c04cfcd4aad232e36306cdc5c74cd9fc9148a7e Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Sun, 28 Feb 2021 19:50:58 +0800
Subject: [PATCH 2789/3292] i40e: Fix error handling in i40e_vsi_open

When vsi->type == I40E_VSI_FDIR, we have caught the return value of
i40e_vsi_request_irq() but without further handling. Check and execute
memory clean on failure just like the other i40e_vsi_request_irq().

Fixes: 8a9eb7d3cbcab ("i40e: rework fdir setup and teardown")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 704e474879c5..526fa0a791ea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8703,6 +8703,8 @@ int i40e_vsi_open(struct i40e_vsi *vsi)
 			 dev_driver_string(&pf->pdev->dev),
 			 dev_name(&pf->pdev->dev));
 		err = i40e_vsi_request_irq(vsi, int_name);
+		if (err)
+			goto err_setup_rx;
 
 	} else {
 		err = -EINVAL;

From 9262793e59f0423437166a879a73d056b1fe6f9a Mon Sep 17 00:00:00 2001
From: Mateusz Palczewski <mateusz.palczewski@intel.com>
Date: Wed, 10 Mar 2021 11:12:54 +0000
Subject: [PATCH 2790/3292] i40e: Fix autoneg disabling for non-10GBaseT links

Disabling autonegotiation was allowed only for 10GBaseT PHY.
The condition was changed to check if link media type is BaseT.

Fixes: 3ce12ee9d8f9 ("i40e: Fix order of checks when enabling/disabling autoneg in ethtool")
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Karen Sornek <karen.sornek@intel.com>
Signed-off-by: Dawid Lukwinski <dawid.lukwinski@intel.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index ccd5b9486ea9..3e822bad4851 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1262,8 +1262,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 			if (ethtool_link_ksettings_test_link_mode(&safe_ks,
 								  supported,
 								  Autoneg) &&
-			    hw->phy.link_info.phy_type !=
-			    I40E_PHY_TYPE_10GBASE_T) {
+			    hw->phy.media_type != I40E_MEDIA_TYPE_BASET) {
 				netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
 				err = -EINVAL;
 				goto done;

From b9964ce74544ea6cbc4eabd2c89a531adf7f291d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 24 Jun 2021 18:05:51 +0200
Subject: [PATCH 2791/3292] rcu: Create an unrcu_pointer() to remove __rcu from
 a pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xchg() and cmpxchg() functions are sometimes used to carry out RCU
updates.  Unfortunately, this can result in sparse warnings for both
the old-value and new-value arguments, as well as for the return value.
The arguments can be dealt with using RCU_INITIALIZER():

        old_p = xchg(&p, RCU_INITIALIZER(new_p));

But a sparse warning still remains due to assigning the __rcu pointer
returned from xchg to the (most likely) non-__rcu pointer old_p.

This commit therefore provides an unrcu_pointer() macro that strips
the __rcu.  This macro can be used as follows:

        old_p = unrcu_pointer(xchg(&p, RCU_INITIALIZER(new_p)));

Reported-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210624160609.292325-2-toke@redhat.com
---
 include/linux/rcupdate.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9455476c5ba2..d7895b81264e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -363,6 +363,20 @@ static inline void rcu_preempt_sleep_check(void) { }
 #define rcu_check_sparse(p, space)
 #endif /* #else #ifdef __CHECKER__ */
 
+/**
+ * unrcu_pointer - mark a pointer as not being RCU protected
+ * @p: pointer needing to lose its __rcu property
+ *
+ * Converts @p from an __rcu pointer to a __kernel pointer.
+ * This allows an __rcu pointer to be used with xchg() and friends.
+ */
+#define unrcu_pointer(p)						\
+({									\
+	typeof(*p) *_________p1 = (typeof(*p) *__force)(p);		\
+	rcu_check_sparse(p, __rcu); 					\
+	((typeof(*p) __force __kernel *)(_________p1)); 		\
+})
+
 #define __rcu_access_pointer(p, space) \
 ({ \
 	typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \

From 9a145c04a293933002ec288a4d6b4f370b59e4d1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 24 Jun 2021 18:05:52 +0200
Subject: [PATCH 2792/3292] doc: Clarify and expand RCU updaters and
 corresponding readers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit clarifies which primitives readers can use given that the
corresponding updaters have made a specific choice.  This commit also adds
this information for the various RCU Tasks flavors.  While in the area, it
removes a paragraph that no longer applies in any straightforward manner.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210624160609.292325-3-toke@redhat.com
---
 Documentation/RCU/checklist.rst | 46 +++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 1030119294d0..07f6cb8f674d 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -211,27 +211,33 @@ over a rather long period of time, but improvements are always welcome!
 	of the system, especially to real-time workloads running on
 	the rest of the system.
 
-7.	As of v4.20, a given kernel implements only one RCU flavor,
-	which is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
-	If the updater uses call_rcu() or synchronize_rcu(),
-	then the corresponding readers may use rcu_read_lock() and
-	rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
-	or any pair of primitives that disables and re-enables preemption,
-	for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
-	If the updater uses synchronize_srcu() or call_srcu(),
-	then the corresponding readers must use srcu_read_lock() and
-	srcu_read_unlock(), and with the same srcu_struct.  The rules for
-	the expedited primitives are the same as for their non-expedited
-	counterparts.  Mixing things up will result in confusion and
-	broken kernels, and has even resulted in an exploitable security
-	issue.
+7.	As of v4.20, a given kernel implements only one RCU flavor, which
+	is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
+	If the updater uses call_rcu() or synchronize_rcu(), then
+	the corresponding readers may use:  (1) rcu_read_lock() and
+	rcu_read_unlock(), (2) any pair of primitives that disables
+	and re-enables softirq, for example, rcu_read_lock_bh() and
+	rcu_read_unlock_bh(), or (3) any pair of primitives that disables
+	and re-enables preemption, for example, rcu_read_lock_sched() and
+	rcu_read_unlock_sched().  If the updater uses synchronize_srcu()
+	or call_srcu(), then the corresponding readers must use
+	srcu_read_lock() and srcu_read_unlock(), and with the same
+	srcu_struct.  The rules for the expedited RCU grace-period-wait
+	primitives are the same as for their non-expedited counterparts.
 
-	One exception to this rule: rcu_read_lock() and rcu_read_unlock()
-	may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
-	in cases where local bottom halves are already known to be
-	disabled, for example, in irq or softirq context.  Commenting
-	such cases is a must, of course!  And the jury is still out on
-	whether the increased speed is worth it.
+	If the updater uses call_rcu_tasks() or synchronize_rcu_tasks(),
+	then the readers must refrain from executing voluntary
+	context switches, that is, from blocking.  If the updater uses
+	call_rcu_tasks_trace() or synchronize_rcu_tasks_trace(), then
+	the corresponding readers must use rcu_read_lock_trace() and
+	rcu_read_unlock_trace().  If an updater uses call_rcu_tasks_rude()
+	or synchronize_rcu_tasks_rude(), then the corresponding readers
+	must use anything that disables interrupts.
+
+	Mixing things up will result in confusion and broken kernels, and
+	has even resulted in an exploitable security issue.  Therefore,
+	when using non-obvious pairs of primitives, commenting is of
+	course a must.
 
 8.	Although synchronize_rcu() is slower than is call_rcu(), it
 	usually results in simpler code.  So, unless update performance is

From e74c74f9e51deb725e72d129084ba8252d47222d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:05:53 +0200
Subject: [PATCH 2793/3292] doc: Give XDP as example of non-obvious RCU
 reader/updater pairing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit gives an example of non-obvious RCU reader/updater pairing
in the guise of the XDP feature in networking, which calls BPF programs
from network-driver NAPI (softirq) context.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210624160609.292325-4-toke@redhat.com
---
 Documentation/RCU/checklist.rst | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 07f6cb8f674d..01cc21f17f7b 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -236,8 +236,15 @@ over a rather long period of time, but improvements are always welcome!
 
 	Mixing things up will result in confusion and broken kernels, and
 	has even resulted in an exploitable security issue.  Therefore,
-	when using non-obvious pairs of primitives, commenting is of
-	course a must.
+	when using non-obvious pairs of primitives, commenting is
+	of course a must.  One example of non-obvious pairing is
+	the XDP feature in networking, which calls BPF programs from
+	network-driver NAPI (softirq) context.	BPF relies heavily on RCU
+	protection for its data structures, but because the BPF program
+	invocation happens entirely within a single local_bh_disable()
+	section in a NAPI poll cycle, this usage is safe.  The reason
+	that this usage is safe is that readers can use anything that
+	disables BH when updaters use call_rcu() or synchronize_rcu().
 
 8.	Although synchronize_rcu() is slower than is call_rcu(), it
 	usually results in simpler code.  So, unless update performance is

From 694cea395fded425008e93cd90cfdf7a451674af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:05:54 +0200
Subject: [PATCH 2794/3292] bpf: Allow RCU-protected lookups to happen from bh
 context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

XDP programs are called from a NAPI poll context, which means the RCU
reference liveness is ensured by local_bh_disable(). Add
rcu_read_lock_bh_held() as a condition to the RCU checks for map lookups so
lockdep understands that the dereferences are safe from inside *either* an
rcu_read_lock() section *or* a local_bh_disable() section. While both
bh_disabled and rcu_read_lock() provide RCU protection, they are
semantically distinct, so we need both conditions to prevent lockdep
complaints.

This change is done in preparation for removing the redundant
rcu_read_lock()s from drivers.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-5-toke@redhat.com
---
 kernel/bpf/hashtab.c  | 21 ++++++++++++++-------
 kernel/bpf/helpers.c  |  6 +++---
 kernel/bpf/lpm_trie.c |  6 ++++--
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 6f6681b07364..72c58cc516a3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -596,7 +596,8 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 	struct htab_elem *l;
 	u32 hash, key_size;
 
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
 
@@ -989,7 +990,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
 
@@ -1082,7 +1084,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
 
@@ -1148,7 +1151,8 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
 
@@ -1202,7 +1206,8 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 		/* unknown flags */
 		return -EINVAL;
 
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
 
@@ -1276,7 +1281,8 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 	u32 hash, key_size;
 	int ret;
 
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
 
@@ -1311,7 +1317,8 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 	u32 hash, key_size;
 	int ret;
 
-	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+		     !rcu_read_lock_bh_held());
 
 	key_size = map->key_size;
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a2f1f15ce432..62cf00383910 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -29,7 +29,7 @@
  */
 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
 {
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 	return (unsigned long) map->ops->map_lookup_elem(map, key);
 }
 
@@ -45,7 +45,7 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
 	   void *, value, u64, flags)
 {
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 	return map->ops->map_update_elem(map, key, value, flags);
 }
 
@@ -62,7 +62,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
 
 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
 {
-	WARN_ON_ONCE(!rcu_read_lock_held());
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 	return map->ops->map_delete_elem(map, key);
 }
 
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1b7b8a6f34ee..423549d2c52e 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -232,7 +232,8 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
 
 	/* Start walking the trie from the root node ... */
 
-	for (node = rcu_dereference(trie->root); node;) {
+	for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held());
+	     node;) {
 		unsigned int next_bit;
 		size_t matchlen;
 
@@ -264,7 +265,8 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
 		 * traverse down.
 		 */
 		next_bit = extract_bit(key->data, node->prefixlen);
-		node = rcu_dereference(node->child[next_bit]);
+		node = rcu_dereference_check(node->child[next_bit],
+					     rcu_read_lock_bh_held());
 	}
 
 	if (!found)

From 782347b6bcad07ddb574422e01e22c92e05928c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:05:55 +0200
Subject: [PATCH 2795/3292] xdp: Add proper __rcu annotations to redirect map
 entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

XDP_REDIRECT works by a three-step process: the bpf_redirect() and
bpf_redirect_map() helpers will lookup the target of the redirect and store
it (along with some other metadata) in a per-CPU struct bpf_redirect_info.
Next, when the program returns the XDP_REDIRECT return code, the driver
will call xdp_do_redirect() which will use the information thus stored to
actually enqueue the frame into a bulk queue structure (that differs
slightly by map type, but shares the same principle). Finally, before
exiting its NAPI poll loop, the driver will call xdp_do_flush(), which will
flush all the different bulk queues, thus completing the redirect.

Pointers to the map entries will be kept around for this whole sequence of
steps, protected by RCU. However, there is no top-level rcu_read_lock() in
the core code; instead drivers add their own rcu_read_lock() around the XDP
portions of the code, but somewhat inconsistently as Martin discovered[0].
However, things still work because everything happens inside a single NAPI
poll sequence, which means it's between a pair of calls to
local_bh_disable()/local_bh_enable(). So Paul suggested[1] that we could
document this intention by using rcu_dereference_check() with
rcu_read_lock_bh_held() as a second parameter, thus allowing sparse and
lockdep to verify that everything is done correctly.

This patch does just that: we add an __rcu annotation to the map entry
pointers and remove the various comments explaining the NAPI poll assurance
strewn through devmap.c in favour of a longer explanation in filter.c. The
goal is to have one coherent documentation of the entire flow, and rely on
the RCU annotations as a "standard" way of communicating the flow in the
map code (which can additionally be understood by sparse and lockdep).

The RCU annotation replacements result in a fairly straight-forward
replacement where READ_ONCE() becomes rcu_dereference_check(), WRITE_ONCE()
becomes rcu_assign_pointer() and xchg() and cmpxchg() gets wrapped in the
proper constructs to cast the pointer back and forth between __rcu and
__kernel address space (for the benefit of sparse). The one complication is
that xskmap has a few constructions where double-pointers are passed back
and forth; these simply all gain __rcu annotations, and only the final
reference/dereference to the inner-most pointer gets changed.

With this, everything can be run through sparse without eliciting
complaints, and lockdep can verify correctness even without the use of
rcu_read_lock() in the drivers. Subsequent patches will clean these up from
the drivers.

[0] https://lore.kernel.org/bpf/20210415173551.7ma4slcbqeyiba2r@kafai-mbp.dhcp.thefacebook.com/
[1] https://lore.kernel.org/bpf/20210419165837.GA975577@paulmck-ThinkPad-P17-Gen-1/

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210624160609.292325-6-toke@redhat.com
---
 include/linux/filter.h |  8 +++----
 include/net/xdp_sock.h |  2 +-
 kernel/bpf/cpumap.c    | 13 +++++++----
 kernel/bpf/devmap.c    | 49 ++++++++++++++++++------------------------
 net/core/filter.c      | 28 ++++++++++++++++++++++++
 net/xdp/xsk.c          |  4 ++--
 net/xdp/xsk.h          |  4 ++--
 net/xdp/xskmap.c       | 29 ++++++++++++++-----------
 8 files changed, 83 insertions(+), 54 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 688856e0b28a..472f97074da0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -763,11 +763,9 @@ DECLARE_BPF_DISPATCHER(xdp)
 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 					    struct xdp_buff *xdp)
 {
-	/* Caller needs to hold rcu_read_lock() (!), otherwise program
-	 * can be released while still running, or map elements could be
-	 * freed early while still having concurrent users. XDP fastpath
-	 * already takes rcu_read_lock() when fetching the program, so
-	 * it's not necessary here anymore.
+	/* Driver XDP hooks are invoked within a single NAPI poll cycle and thus
+	 * under local_bh_disable(), which provides the needed RCU protection
+	 * for accessing map entries.
 	 */
 	return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
 }
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 9c0722c6d7ac..fff069d2ed1b 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -37,7 +37,7 @@ struct xdp_umem {
 struct xsk_map {
 	struct bpf_map map;
 	spinlock_t lock; /* Synchronize map updates */
-	struct xdp_sock *xsk_map[];
+	struct xdp_sock __rcu *xsk_map[];
 };
 
 struct xdp_sock {
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a1a0c4e791c6..480e936c54d0 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -74,7 +74,7 @@ struct bpf_cpu_map_entry {
 struct bpf_cpu_map {
 	struct bpf_map map;
 	/* Below members specific for map type */
-	struct bpf_cpu_map_entry **cpu_map;
+	struct bpf_cpu_map_entry __rcu **cpu_map;
 };
 
 static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
@@ -469,7 +469,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
 {
 	struct bpf_cpu_map_entry *old_rcpu;
 
-	old_rcpu = xchg(&cmap->cpu_map[key_cpu], rcpu);
+	old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu)));
 	if (old_rcpu) {
 		call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
 		INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
@@ -551,7 +551,7 @@ static void cpu_map_free(struct bpf_map *map)
 	for (i = 0; i < cmap->map.max_entries; i++) {
 		struct bpf_cpu_map_entry *rcpu;
 
-		rcpu = READ_ONCE(cmap->cpu_map[i]);
+		rcpu = rcu_dereference_raw(cmap->cpu_map[i]);
 		if (!rcpu)
 			continue;
 
@@ -562,6 +562,10 @@ static void cpu_map_free(struct bpf_map *map)
 	kfree(cmap);
 }
 
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
+ */
 static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
@@ -570,7 +574,8 @@ static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 	if (key >= map->max_entries)
 		return NULL;
 
-	rcpu = READ_ONCE(cmap->cpu_map[key]);
+	rcpu = rcu_dereference_check(cmap->cpu_map[key],
+				     rcu_read_lock_bh_held());
 	return rcpu;
 }
 
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2a75e6c2d27d..2f6bd75cd682 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -73,7 +73,7 @@ struct bpf_dtab_netdev {
 
 struct bpf_dtab {
 	struct bpf_map map;
-	struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
+	struct bpf_dtab_netdev __rcu **netdev_map; /* DEVMAP type only */
 	struct list_head list;
 
 	/* these are only used for DEVMAP_HASH type maps */
@@ -226,7 +226,7 @@ static void dev_map_free(struct bpf_map *map)
 		for (i = 0; i < dtab->map.max_entries; i++) {
 			struct bpf_dtab_netdev *dev;
 
-			dev = dtab->netdev_map[i];
+			dev = rcu_dereference_raw(dtab->netdev_map[i]);
 			if (!dev)
 				continue;
 
@@ -259,6 +259,10 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
+ */
 static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
@@ -410,15 +414,9 @@ out:
 	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, cnt - sent, err);
 }
 
-/* __dev_flush is called from xdp_do_flush() which _must_ be signaled
- * from the driver before returning from its napi->poll() routine. The poll()
- * routine is called either from busy_poll context or net_rx_action signaled
- * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
- * net device can be torn down. On devmap tear down we ensure the flush list
- * is empty before completing to ensure all flush operations have completed.
- * When drivers update the bpf program they may need to ensure any flush ops
- * are also complete. Using synchronize_rcu or call_rcu will suffice for this
- * because both wait for napi context to exit.
+/* __dev_flush is called from xdp_do_flush() which _must_ be signalled from the
+ * driver before returning from its napi->poll() routine. See the comment above
+ * xdp_do_flush() in filter.c.
  */
 void __dev_flush(void)
 {
@@ -433,9 +431,9 @@ void __dev_flush(void)
 	}
 }
 
-/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
- * update happens in parallel here a dev_put won't happen until after reading
- * the ifindex.
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
  */
 static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -445,12 +443,14 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 	if (key >= map->max_entries)
 		return NULL;
 
-	obj = READ_ONCE(dtab->netdev_map[key]);
+	obj = rcu_dereference_check(dtab->netdev_map[key],
+				    rcu_read_lock_bh_held());
 	return obj;
 }
 
-/* Runs under RCU-read-side, plus in softirq under NAPI protection.
- * Thus, safe percpu variable access.
+/* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu
+ * variable access, and map elements stick around. See comment above
+ * xdp_do_flush() in filter.c.
  */
 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		       struct net_device *dev_rx, struct bpf_prog *xdp_prog)
@@ -735,14 +735,7 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
 	if (k >= map->max_entries)
 		return -EINVAL;
 
-	/* Use call_rcu() here to ensure any rcu critical sections have
-	 * completed as well as any flush operations because call_rcu
-	 * will wait for preempt-disable region to complete, NAPI in this
-	 * context.  And additionally, the driver tear down ensures all
-	 * soft irqs are complete before removing the net device in the
-	 * case of dev_put equals zero.
-	 */
-	old_dev = xchg(&dtab->netdev_map[k], NULL);
+	old_dev = unrcu_pointer(xchg(&dtab->netdev_map[k], NULL));
 	if (old_dev)
 		call_rcu(&old_dev->rcu, __dev_map_entry_free);
 	return 0;
@@ -851,7 +844,7 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 	 * Remembering the driver side flush operation will happen before the
 	 * net device is removed.
 	 */
-	old_dev = xchg(&dtab->netdev_map[i], dev);
+	old_dev = unrcu_pointer(xchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev)));
 	if (old_dev)
 		call_rcu(&old_dev->rcu, __dev_map_entry_free);
 
@@ -1031,10 +1024,10 @@ static int dev_map_notification(struct notifier_block *notifier,
 			for (i = 0; i < dtab->map.max_entries; i++) {
 				struct bpf_dtab_netdev *dev, *odev;
 
-				dev = READ_ONCE(dtab->netdev_map[i]);
+				dev = rcu_dereference(dtab->netdev_map[i]);
 				if (!dev || netdev != dev->dev)
 					continue;
-				odev = cmpxchg(&dtab->netdev_map[i], dev, NULL);
+				odev = unrcu_pointer(cmpxchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev), NULL));
 				if (dev == odev)
 					call_rcu(&dev->rcu,
 						 __dev_map_entry_free);
diff --git a/net/core/filter.c b/net/core/filter.c
index d062053994c7..d22895caa164 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3897,6 +3897,34 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+/* XDP_REDIRECT works by a three-step process, implemented in the functions
+ * below:
+ *
+ * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
+ *    of the redirect and store it (along with some other metadata) in a per-CPU
+ *    struct bpf_redirect_info.
+ *
+ * 2. When the program returns the XDP_REDIRECT return code, the driver will
+ *    call xdp_do_redirect() which will use the information in struct
+ *    bpf_redirect_info to actually enqueue the frame into a map type-specific
+ *    bulk queue structure.
+ *
+ * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
+ *    which will flush all the different bulk queues, thus completing the
+ *    redirect.
+ *
+ * Pointers to the map entries will be kept around for this whole sequence of
+ * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
+ * the core code; instead, the RCU protection relies on everything happening
+ * inside a single NAPI poll sequence, which means it's between a pair of calls
+ * to local_bh_disable()/local_bh_enable().
+ *
+ * The map entries are marked as __rcu and the map code makes sure to
+ * dereference those pointers with rcu_dereference_check() in a way that works
+ * for both sections that to hold an rcu_read_lock() and sections that are
+ * called from NAPI without a separate rcu_read_lock(). The code below does not
+ * use RCU annotations, but relies on those in the map code.
+ */
 void xdp_do_flush(void)
 {
 	__dev_flush();
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cd62d4ba87a9..996da915f520 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -749,7 +749,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
 }
 
 static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
-					      struct xdp_sock ***map_entry)
+					      struct xdp_sock __rcu ***map_entry)
 {
 	struct xsk_map *map = NULL;
 	struct xsk_map_node *node;
@@ -785,7 +785,7 @@ static void xsk_delete_from_maps(struct xdp_sock *xs)
 	 * might be updates to the map between
 	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
 	 */
-	struct xdp_sock **map_entry = NULL;
+	struct xdp_sock __rcu **map_entry = NULL;
 	struct xsk_map *map;
 
 	while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index edcf249ad1f1..a4bc4749faac 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -31,7 +31,7 @@ struct xdp_mmap_offsets_v1 {
 struct xsk_map_node {
 	struct list_head node;
 	struct xsk_map *map;
-	struct xdp_sock **map_entry;
+	struct xdp_sock __rcu **map_entry;
 };
 
 static inline struct xdp_sock *xdp_sk(struct sock *sk)
@@ -40,7 +40,7 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk)
 }
 
 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
-			     struct xdp_sock **map_entry);
+			     struct xdp_sock __rcu **map_entry);
 void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id);
 int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
 			u16 queue_id);
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 9df75ea4a567..2e48d0e094d9 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -12,7 +12,7 @@
 #include "xsk.h"
 
 static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
-					       struct xdp_sock **map_entry)
+					       struct xdp_sock __rcu **map_entry)
 {
 	struct xsk_map_node *node;
 
@@ -42,7 +42,7 @@ static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
 }
 
 static void xsk_map_sock_delete(struct xdp_sock *xs,
-				struct xdp_sock **map_entry)
+				struct xdp_sock __rcu **map_entry)
 {
 	struct xsk_map_node *n, *tmp;
 
@@ -124,6 +124,10 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 	return insn - insn_buf;
 }
 
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
+ */
 static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
@@ -131,12 +135,11 @@ static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
 	if (key >= map->max_entries)
 		return NULL;
 
-	return READ_ONCE(m->xsk_map[key]);
+	return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held());
 }
 
 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
 {
-	WARN_ON_ONCE(!rcu_read_lock_held());
 	return __xsk_map_lookup_elem(map, *(u32 *)key);
 }
 
@@ -149,7 +152,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 			       u64 map_flags)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	struct xdp_sock *xs, *old_xs, **map_entry;
+	struct xdp_sock __rcu **map_entry;
+	struct xdp_sock *xs, *old_xs;
 	u32 i = *(u32 *)key, fd = *(u32 *)value;
 	struct xsk_map_node *node;
 	struct socket *sock;
@@ -179,7 +183,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 	}
 
 	spin_lock_bh(&m->lock);
-	old_xs = READ_ONCE(*map_entry);
+	old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock));
 	if (old_xs == xs) {
 		err = 0;
 		goto out;
@@ -191,7 +195,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 		goto out;
 	}
 	xsk_map_sock_add(xs, node);
-	WRITE_ONCE(*map_entry, xs);
+	rcu_assign_pointer(*map_entry, xs);
 	if (old_xs)
 		xsk_map_sock_delete(old_xs, map_entry);
 	spin_unlock_bh(&m->lock);
@@ -208,7 +212,8 @@ out:
 static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	struct xdp_sock *old_xs, **map_entry;
+	struct xdp_sock __rcu **map_entry;
+	struct xdp_sock *old_xs;
 	int k = *(u32 *)key;
 
 	if (k >= map->max_entries)
@@ -216,7 +221,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 
 	spin_lock_bh(&m->lock);
 	map_entry = &m->xsk_map[k];
-	old_xs = xchg(map_entry, NULL);
+	old_xs = unrcu_pointer(xchg(map_entry, NULL));
 	if (old_xs)
 		xsk_map_sock_delete(old_xs, map_entry);
 	spin_unlock_bh(&m->lock);
@@ -231,11 +236,11 @@ static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 }
 
 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
-			     struct xdp_sock **map_entry)
+			     struct xdp_sock __rcu **map_entry)
 {
 	spin_lock_bh(&map->lock);
-	if (READ_ONCE(*map_entry) == xs) {
-		WRITE_ONCE(*map_entry, NULL);
+	if (rcu_access_pointer(*map_entry) == xs) {
+		rcu_assign_pointer(*map_entry, NULL);
 		xsk_map_sock_delete(xs, map_entry);
 	}
 	spin_unlock_bh(&map->lock);

From 77151ccf10659d4066074f278402032f3265f0cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:05:56 +0200
Subject: [PATCH 2796/3292] bpf, sched: Remove unneeded rcu_read_lock() around
 BPF program invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The rcu_read_lock() call in cls_bpf and act_bpf are redundant: on the TX
side, there's already a call to rcu_read_lock_bh() in __dev_queue_xmit(),
and on RX there's a covering rcu_read_lock() in
netif_receive_skb{,_list}_internal().

With the previous patches we also amended the lockdep checks in the map
code to not require any particular RCU flavour, so we can just get rid of
the rcu_read_lock()s.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210624160609.292325-7-toke@redhat.com
---
 net/sched/act_bpf.c | 2 --
 net/sched/cls_bpf.c | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index e48e980c3b93..e409a0005717 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -43,7 +43,6 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
 	tcf_lastuse_update(&prog->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
 
-	rcu_read_lock();
 	filter = rcu_dereference(prog->filter);
 	if (at_ingress) {
 		__skb_push(skb, skb->mac_len);
@@ -56,7 +55,6 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
 	}
 	if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
 		skb_orphan(skb);
-	rcu_read_unlock();
 
 	/* A BPF program may overwrite the default action opcode.
 	 * Similarly as in cls_bpf, if filter_res == -1 we use the
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6e3e63db0e01..fa739efa59f4 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -85,8 +85,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct cls_bpf_prog *prog;
 	int ret = -1;
 
-	/* Needed here for accessing maps. */
-	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
 		int filter_res;
 
@@ -131,7 +129,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 		break;
 	}
-	rcu_read_unlock();
 
 	return ret;
 }

From 0939e0537896e421e391fa4b1a0b052907808e0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:05:57 +0200
Subject: [PATCH 2797/3292] ena: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ena driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Saeed Bishara <saeedb@amazon.com>
Cc: Guy Tzalik <gtzalik@amazon.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-8-toke@redhat.com
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 3bb0e66b2c7e..44ef6b88f715 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -382,7 +382,6 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 	struct xdp_frame *xdpf;
 	u64 *xdp_stat;
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
 
 	if (!xdp_prog)
@@ -439,8 +438,6 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 
 	ena_increase_stat(xdp_stat, 1, &rx_ring->syncp);
 out:
-	rcu_read_unlock();
-
 	return verdict;
 }
 

From 158c1399fc45c5178a3f2b8b68ff2faa2e36a52d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:05:58 +0200
Subject: [PATCH 2798/3292] bnxt: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bnxt driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Michael Chan <michael.chan@broadcom.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-9-toke@redhat.com
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index ec9564e584e0..bee6e091a997 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -138,9 +138,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	xdp_prepare_buff(&xdp, *data_ptr - offset, offset, *len, false);
 	orig_data = xdp.data;
 
-	rcu_read_lock();
 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
-	rcu_read_unlock();
 
 	tx_avail = bnxt_tx_avail(bp, txr);
 	/* If the tx ring is not full, we must not update the rx producer yet

From 36baafe347a85a9d85f61aac0a9b53c53635829e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:05:59 +0200
Subject: [PATCH 2799/3292] thunderx: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The thunderx driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Sunil Goutham <sgoutham@marvell.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/bpf/20210624160609.292325-10-toke@redhat.com
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index c33b4e837515..e2b290135fd9 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -555,9 +555,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 	xdp_prepare_buff(&xdp, hard_start, data - hard_start, len, false);
 	orig_data = xdp.data;
 
-	rcu_read_lock();
 	action = bpf_prog_run_xdp(prog, &xdp);
-	rcu_read_unlock();
 
 	len = xdp.data_end - xdp.data;
 	/* Check if XDP program has changed headers */

From 547aabcac3251c40e4cd09d79dba70f7eab8cca2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:00 +0200
Subject: [PATCH 2800/3292] freescale: Remove rcu_read_lock() around XDP
 program invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dpaa and dpaa2 drivers have rcu_read_lock()/rcu_read_unlock() pairs
around XDP program invocations. However, the actual lifetime of the objects
referred by the XDP program invocation is longer, all the way through to
the call to xdp_do_flush(), making the scope of the rcu_read_lock() too
small. This turns out to be harmless because it all happens in a single
NAPI poll cycle (and thus under local_bh_disable()), but it makes the
rcu_read_lock() misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Camelia Groza <camelia.groza@nxp.com>
Cc: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Cc: Madalin Bucur <madalin.bucur@nxp.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-11-toke@redhat.com
---
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c   | 8 +-------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 3 ---
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 177c020bf34a..e6826561cf11 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2558,13 +2558,9 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr,
 	u32 xdp_act;
 	int err;
 
-	rcu_read_lock();
-
 	xdp_prog = READ_ONCE(priv->xdp_prog);
-	if (!xdp_prog) {
-		rcu_read_unlock();
+	if (!xdp_prog)
 		return XDP_PASS;
-	}
 
 	xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE,
 		      &dpaa_fq->xdp_rxq);
@@ -2638,8 +2634,6 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr,
 		break;
 	}
 
-	rcu_read_unlock();
-
 	return xdp_act;
 }
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 8433aa730c42..973352393bd4 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -352,8 +352,6 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
 	u32 xdp_act = XDP_PASS;
 	int err, offset;
 
-	rcu_read_lock();
-
 	xdp_prog = READ_ONCE(ch->xdp.prog);
 	if (!xdp_prog)
 		goto out;
@@ -414,7 +412,6 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
 
 	ch->xdp.res |= xdp_act;
 out:
-	rcu_read_unlock();
 	return xdp_act;
 }
 

From 49589b23d5a92dff4a7cb705608dff7dd13ef709 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:01 +0200
Subject: [PATCH 2801/3292] intel: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Intel drivers all have rcu_read_lock()/rcu_read_unlock() pairs around
XDP program invocations. However, the actual lifetime of the objects
referred by the XDP program invocation is longer, all the way through to
the call to xdp_do_flush(), making the scope of the rcu_read_lock() too
small. This turns out to be harmless because it all happens in a single
NAPI poll cycle (and thus under local_bh_disable()), but it makes the
rcu_read_lock() misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Jesper Dangaard Brouer <brouer@redhat.com> # i40e
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20210624160609.292325-12-toke@redhat.com
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c       | 2 --
 drivers/net/ethernet/intel/i40e/i40e_xsk.c        | 3 ---
 drivers/net/ethernet/intel/ice/ice_txrx.c         | 6 +-----
 drivers/net/ethernet/intel/ice/ice_xsk.c          | 3 ---
 drivers/net/ethernet/intel/igb/igb_main.c         | 2 --
 drivers/net/ethernet/intel/igc/igc_main.c         | 7 ++-----
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     | 2 --
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c      | 3 ---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 --
 9 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b883ab809df3..38eb8151ee9a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2298,7 +2298,6 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 
 	if (!xdp_prog)
@@ -2334,7 +2333,6 @@ out_failure:
 		break;
 	}
 xdp_out:
-	rcu_read_unlock();
 	return result;
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 68f177a86403..e7e778ca074c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -153,7 +153,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
-	rcu_read_lock();
 	/* NB! xdp_prog will always be !NULL, due to the fact that
 	 * this path is enabled by setting an XDP program.
 	 */
@@ -164,7 +163,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
 		if (err)
 			goto out_failure;
-		rcu_read_unlock();
 		return I40E_XDP_REDIR;
 	}
 
@@ -188,7 +186,6 @@ out_failure:
 		result = I40E_XDP_CONSUMED;
 		break;
 	}
-	rcu_read_unlock();
 	return result;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 917eba7fdd0c..dd791ca34fab 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1135,15 +1135,11 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
 #endif
 
-		rcu_read_lock();
 		xdp_prog = READ_ONCE(rx_ring->xdp_prog);
-		if (!xdp_prog) {
-			rcu_read_unlock();
+		if (!xdp_prog)
 			goto construct_skb;
-		}
 
 		xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
-		rcu_read_unlock();
 		if (!xdp_res)
 			goto construct_skb;
 		if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 239b9bf10794..8a093368f631 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -466,7 +466,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 	struct ice_ring *xdp_ring;
 	u32 act;
 
-	rcu_read_lock();
 	/* ZC patch is enabled only when XDP program is set,
 	 * so here it can not be NULL
 	 */
@@ -478,7 +477,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
 		if (err)
 			goto out_failure;
-		rcu_read_unlock();
 		return ICE_XDP_REDIR;
 	}
 
@@ -503,7 +501,6 @@ out_failure:
 		break;
 	}
 
-	rcu_read_unlock();
 	return result;
 }
 
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 5db303d64d14..7e6435dc7e80 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8381,7 +8381,6 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 
 	if (!xdp_prog)
@@ -8416,7 +8415,6 @@ out_failure:
 		break;
 	}
 xdp_out:
-	rcu_read_unlock();
 	return ERR_PTR(-result);
 }
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 3f6b6d4543a8..95323095094d 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2240,18 +2240,15 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
 	struct bpf_prog *prog;
 	int res;
 
-	rcu_read_lock();
-
 	prog = READ_ONCE(adapter->xdp_prog);
 	if (!prog) {
 		res = IGC_XDP_PASS;
-		goto unlock;
+		goto out;
 	}
 
 	res = __igc_xdp_run_prog(adapter, prog, xdp);
 
-unlock:
-	rcu_read_unlock();
+out:
 	return ERR_PTR(-res);
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2ac5b82676f3..ffff69efd78a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2199,7 +2199,6 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 	struct xdp_frame *xdpf;
 	u32 act;
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 
 	if (!xdp_prog)
@@ -2237,7 +2236,6 @@ out_failure:
 		break;
 	}
 xdp_out:
-	rcu_read_unlock();
 	return ERR_PTR(-result);
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index f72d2978263b..96dd1a4f956a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -100,7 +100,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 	struct xdp_frame *xdpf;
 	u32 act;
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
@@ -108,7 +107,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
 		if (err)
 			goto out_failure;
-		rcu_read_unlock();
 		return IXGBE_XDP_REDIR;
 	}
 
@@ -134,7 +132,6 @@ out_failure:
 		result = IXGBE_XDP_CONSUMED;
 		break;
 	}
-	rcu_read_unlock();
 	return result;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index dc56931fc1dc..c714e1ecd308 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1054,7 +1054,6 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 
 	if (!xdp_prog)
@@ -1082,7 +1081,6 @@ out_failure:
 		break;
 	}
 xdp_out:
-	rcu_read_unlock();
 	return ERR_PTR(-result);
 }
 

From 959ad7ec066d9a61557ad6aedf77ea9b54c82df0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:02 +0200
Subject: [PATCH 2802/3292] marvell: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mvneta and mvpp2 drivers have rcu_read_lock()/rcu_read_unlock() pairs
around XDP program invocations. However, the actual lifetime of the objects
referred by the XDP program invocation is longer, all the way through to
the call to xdp_do_flush(), making the scope of the rcu_read_lock() too
small. This turns out to be harmless because it all happens in a single
NAPI poll cycle (and thus under local_bh_disable()), but it makes the
rcu_read_lock() misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Marcin Wojtas <mw@semihalf.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-13-toke@redhat.com
---
 drivers/net/ethernet/marvell/mvneta.c           | 2 --
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index c15ce06427d0..ada4e26a5492 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2373,7 +2373,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 	/* Get number of received packets */
 	rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(pp->xdp_prog);
 
 	/* Fairness NAPI loop */
@@ -2451,7 +2450,6 @@ next:
 		xdp_buf.data_hard_start = NULL;
 		sinfo.nr_frags = 0;
 	}
-	rcu_read_unlock();
 
 	if (xdp_buf.data_hard_start)
 		mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 9bca8c8f9f8d..c31677527a02 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3881,8 +3881,6 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 	int rx_done = 0;
 	u32 xdp_ret = 0;
 
-	rcu_read_lock();
-
 	xdp_prog = READ_ONCE(port->xdp_prog);
 
 	/* Get number of received packets and clamp the to-do */
@@ -4028,8 +4026,6 @@ err_drop_frame:
 			mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 	}
 
-	rcu_read_unlock();
-
 	if (xdp_ret & MVPP2_XDP_REDIR)
 		xdp_do_flush_map();
 

From c4411b371c104e65efb531ebd4d8892c568e3a29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:03 +0200
Subject: [PATCH 2803/3292] mlx4: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mlx4 driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around. Also switch the RCU
dereferences in the driver loop itself to the _bh variants.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-14-toke@redhat.com
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index cea62b8f554c..442991d91c15 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -679,9 +679,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 	ring = priv->rx_ring[cq_ring];
 
-	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
-	rcu_read_lock();
-	xdp_prog = rcu_dereference(ring->xdp_prog);
+	xdp_prog = rcu_dereference_bh(ring->xdp_prog);
 	xdp_init_buff(&xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq);
 	doorbell_pending = false;
 
@@ -744,7 +742,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 				/* Drop the packet, since HW loopback-ed it */
 				mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
 				bucket = &priv->mac_hash[mac_hash];
-				hlist_for_each_entry_rcu(entry, bucket, hlist) {
+				hlist_for_each_entry_rcu_bh(entry, bucket, hlist) {
 					if (ether_addr_equal_64bits(entry->mac,
 								    ethh->h_source))
 						goto next;
@@ -899,8 +897,6 @@ next:
 			break;
 	}
 
-	rcu_read_unlock();
-
 	if (likely(polled)) {
 		if (doorbell_pending) {
 			priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true;

From d5789621b658369b21bd13446bab8102cf75df65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:04 +0200
Subject: [PATCH 2804/3292] nfp: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The nfp driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small.

While this is not actually an issue for the nfp driver because it doesn't
support XDP_REDIRECT (and thus doesn't call xdp_do_flush()), the
rcu_read_lock() is still unneeded. And With the addition of RCU annotations
to the XDP_REDIRECT map types that take bh execution into account, lockdep
even understands this to be safe, so there's really no reason to keep it
around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Cc: oss-drivers@netronome.com
Link: https://lore.kernel.org/bpf/20210624160609.292325-15-toke@redhat.com
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index eeb30680b4dc..5dfa4799c34f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1819,7 +1819,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 	struct xdp_buff xdp;
 	int idx;
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(dp->xdp_prog);
 	true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
 	xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM,
@@ -2036,7 +2035,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 			if (!nfp_net_xdp_complete(tx_ring))
 				pkts_polled = budget;
 	}
-	rcu_read_unlock();
 
 	return pkts_polled;
 }

From 4415db6ca85ae57830a83290388f2b9dfa5f237f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:05 +0200
Subject: [PATCH 2805/3292] qede: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The qede driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Ariel Elior <aelior@marvell.com>
Cc: gr-everest-linux-l2@marvell.com
Link: https://lore.kernel.org/bpf/20210624160609.292325-16-toke@redhat.com
---
 drivers/net/ethernet/qlogic/qede/qede_fp.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 8e150dd4f899..065e9004598e 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1089,13 +1089,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
 	xdp_prepare_buff(&xdp, page_address(bd->data), *data_offset,
 			 *len, false);
 
-	/* Queues always have a full reset currently, so for the time
-	 * being until there's atomic program replace just mark read
-	 * side for map helpers.
-	 */
-	rcu_read_lock();
 	act = bpf_prog_run_xdp(prog, &xdp);
-	rcu_read_unlock();
 
 	/* Recalculate, as XDP might have changed the headers */
 	*data_offset = xdp.data - xdp.data_hard_start;

From 4eb14e3fc6197b7205069ed4e2b31eafa11a0697 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:06 +0200
Subject: [PATCH 2806/3292] sfc: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sfc driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Cc: Martin Habets <habetsm.xilinx@gmail.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-17-toke@redhat.com
---
 drivers/net/ethernet/sfc/rx.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 17b8119c48e5..606750938b89 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -260,18 +260,14 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
 	s16 offset;
 	int err;
 
-	rcu_read_lock();
-	xdp_prog = rcu_dereference(efx->xdp_prog);
-	if (!xdp_prog) {
-		rcu_read_unlock();
+	xdp_prog = rcu_dereference_bh(efx->xdp_prog);
+	if (!xdp_prog)
 		return true;
-	}
 
 	rx_queue = efx_channel_get_rx_queue(channel);
 
 	if (unlikely(channel->rx_pkt_n_frags > 1)) {
 		/* We can't do XDP on fragmented packets - drop. */
-		rcu_read_unlock();
 		efx_free_rx_buffers(rx_queue, rx_buf,
 				    channel->rx_pkt_n_frags);
 		if (net_ratelimit())
@@ -296,7 +292,6 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
 			 rx_buf->len, false);
 
 	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
-	rcu_read_unlock();
 
 	offset = (u8 *)xdp.data - *ehp;
 

From 7b6ee873ff20c22af355661b241defa7f6ed7582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:07 +0200
Subject: [PATCH 2807/3292] netsec: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The netsec driver has a rcu_read_lock()/rcu_read_unlock() pair around the
full RX loop, covering everything up to and including xdp_do_flush(). This
is actually the correct behaviour, but because it all happens in a single
NAPI poll cycle (and thus under local_bh_disable()), it is also technically
redundant.

With the addition of RCU annotations to the XDP_REDIRECT map types that
take bh execution into account, lockdep even understands this to be safe,
so there's really no reason to keep the rcu_read_lock() around anymore, so
let's just remove it.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Cc: Jassi Brar <jaswinder.singh@linaro.org>
Link: https://lore.kernel.org/bpf/20210624160609.292325-18-toke@redhat.com
---
 drivers/net/ethernet/socionext/netsec.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index dfc85cc68173..20d148c019d8 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -958,7 +958,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 
 	xdp_init_buff(&xdp, PAGE_SIZE, &dring->xdp_rxq);
 
-	rcu_read_lock();
 	xdp_prog = READ_ONCE(priv->xdp_prog);
 	dma_dir = page_pool_get_dma_dir(dring->page_pool);
 
@@ -1069,8 +1068,6 @@ next:
 	}
 	netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit);
 
-	rcu_read_unlock();
-
 	return done;
 }
 

From 2f1e432d339c5fed435adf521cae392755721050 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:08 +0200
Subject: [PATCH 2808/3292] stmmac: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The stmmac driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Jose Abreu <joabreu@synopsys.com>
Link: https://lore.kernel.org/bpf/20210624160609.292325-19-toke@redhat.com
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 16820873b01d..219535ab2c0c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4651,7 +4651,6 @@ static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
 	return res;
 }
 
-/* This function assumes rcu_read_lock() is held by the caller. */
 static int __stmmac_xdp_run_prog(struct stmmac_priv *priv,
 				 struct bpf_prog *prog,
 				 struct xdp_buff *xdp)
@@ -4693,17 +4692,14 @@ static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
 	struct bpf_prog *prog;
 	int res;
 
-	rcu_read_lock();
-
 	prog = READ_ONCE(priv->xdp_prog);
 	if (!prog) {
 		res = STMMAC_XDP_PASS;
-		goto unlock;
+		goto out;
 	}
 
 	res = __stmmac_xdp_run_prog(priv, prog, xdp);
-unlock:
-	rcu_read_unlock();
+out:
 	return ERR_PTR(-res);
 }
 
@@ -4973,10 +4969,8 @@ read_again:
 		buf->xdp->data_end = buf->xdp->data + buf1_len;
 		xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool);
 
-		rcu_read_lock();
 		prog = READ_ONCE(priv->xdp_prog);
 		res = __stmmac_xdp_run_prog(priv, prog, buf->xdp);
-		rcu_read_unlock();
 
 		switch (res) {
 		case STMMAC_XDP_PASS:

From 0cc84b9a6003fa7f6ef5d19e7c8532a01cd41776 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 24 Jun 2021 18:06:09 +0200
Subject: [PATCH 2809/3292] ti: Remove rcu_read_lock() around XDP program
 invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cpsw driver has rcu_read_lock()/rcu_read_unlock() pairs around XDP
program invocations. However, the actual lifetime of the objects referred
by the XDP program invocation is longer, all the way through to the call to
xdp_do_flush(), making the scope of the rcu_read_lock() too small. This
turns out to be harmless because it all happens in a single NAPI poll
cycle (and thus under local_bh_disable()), but it makes the rcu_read_lock()
misleading.

Rather than extend the scope of the rcu_read_lock(), just get rid of it
entirely. With the addition of RCU annotations to the XDP_REDIRECT map
types that take bh execution into account, lockdep even understands this to
be safe, so there's really no reason to keep it around.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: linux-omap@vger.kernel.org
Link: https://lore.kernel.org/bpf/20210624160609.292325-20-toke@redhat.com
---
 drivers/net/ethernet/ti/cpsw_priv.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 5862f0a4a975..ecc2a6b7e28f 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1328,13 +1328,9 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
 	struct bpf_prog *prog;
 	u32 act;
 
-	rcu_read_lock();
-
 	prog = READ_ONCE(priv->xdp_prog);
-	if (!prog) {
-		ret = CPSW_XDP_PASS;
-		goto out;
-	}
+	if (!prog)
+		return CPSW_XDP_PASS;
 
 	act = bpf_prog_run_xdp(prog, xdp);
 	/* XDP prog might have changed packet data and boundaries */
@@ -1378,10 +1374,8 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
 	ndev->stats.rx_bytes += *len;
 	ndev->stats.rx_packets++;
 out:
-	rcu_read_unlock();
 	return ret;
 drop:
-	rcu_read_unlock();
 	page_pool_recycle_direct(cpsw->page_pool[ch], page);
 	return ret;
 }

From d5870edfa3afc4608231267ea3b8e4beb3eab1ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Jun 2021 09:38:42 +0200
Subject: [PATCH 2810/3292] block: move the disk events code to a separate file

Move the code for handling disk events from genhd.c into a new file
as it isn't very related to the rest of the file while at the same
time requiring lots of forward declarations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20210624073843.251178-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile      |   3 +-
 block/blk.h         |   5 +
 block/disk-events.c | 484 +++++++++++++++++++++++++++++++++++++++++++
 block/genhd.c       | 492 --------------------------------------------
 4 files changed, 491 insertions(+), 493 deletions(-)
 create mode 100644 block/disk-events.c

diff --git a/block/Makefile b/block/Makefile
index b9db5d4edfc8..bfbe4e13ca1e 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,7 +8,8 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
 			blk-exec.o blk-merge.o blk-timeout.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
-			genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o
+			genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
+			disk-events.o
 
 obj-$(CONFIG_BOUNCE)		+= bounce.o
 obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_ioctl.o
diff --git a/block/blk.h b/block/blk.h
index d3fa47af3607..f8d726429906 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -360,4 +360,9 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 
 struct request_queue *blk_alloc_queue(int node_id);
 
+void disk_alloc_events(struct gendisk *disk);
+void disk_add_events(struct gendisk *disk);
+void disk_del_events(struct gendisk *disk);
+void disk_release_events(struct gendisk *disk);
+
 #endif /* BLK_INTERNAL_H */
diff --git a/block/disk-events.c b/block/disk-events.c
new file mode 100644
index 000000000000..1bc5dcb75e4e
--- /dev/null
+++ b/block/disk-events.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Disk events - monitor disk events like media change and eject request.
+ */
+#include <linux/export.h>
+#include <linux/moduleparam.h>
+#include <linux/genhd.h>
+#include "blk.h"
+
+struct disk_events {
+	struct list_head	node;		/* all disk_event's */
+	struct gendisk		*disk;		/* the associated disk */
+	spinlock_t		lock;
+
+	struct mutex		block_mutex;	/* protects blocking */
+	int			block;		/* event blocking depth */
+	unsigned int		pending;	/* events already sent out */
+	unsigned int		clearing;	/* events being cleared */
+
+	long			poll_msecs;	/* interval, -1 for default */
+	struct delayed_work	dwork;
+};
+
+static const char *disk_events_strs[] = {
+	[ilog2(DISK_EVENT_MEDIA_CHANGE)]	= "media_change",
+	[ilog2(DISK_EVENT_EJECT_REQUEST)]	= "eject_request",
+};
+
+static char *disk_uevents[] = {
+	[ilog2(DISK_EVENT_MEDIA_CHANGE)]	= "DISK_MEDIA_CHANGE=1",
+	[ilog2(DISK_EVENT_EJECT_REQUEST)]	= "DISK_EJECT_REQUEST=1",
+};
+
+/* list of all disk_events */
+static DEFINE_MUTEX(disk_events_mutex);
+static LIST_HEAD(disk_events);
+
+/* disable in-kernel polling by default */
+static unsigned long disk_events_dfl_poll_msecs;
+
+static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
+{
+	struct disk_events *ev = disk->ev;
+	long intv_msecs = 0;
+
+	/*
+	 * If device-specific poll interval is set, always use it.  If
+	 * the default is being used, poll if the POLL flag is set.
+	 */
+	if (ev->poll_msecs >= 0)
+		intv_msecs = ev->poll_msecs;
+	else if (disk->event_flags & DISK_EVENT_FLAG_POLL)
+		intv_msecs = disk_events_dfl_poll_msecs;
+
+	return msecs_to_jiffies(intv_msecs);
+}
+
+/**
+ * disk_block_events - block and flush disk event checking
+ * @disk: disk to block events for
+ *
+ * On return from this function, it is guaranteed that event checking
+ * isn't in progress and won't happen until unblocked by
+ * disk_unblock_events().  Events blocking is counted and the actual
+ * unblocking happens after the matching number of unblocks are done.
+ *
+ * Note that this intentionally does not block event checking from
+ * disk_clear_events().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void disk_block_events(struct gendisk *disk)
+{
+	struct disk_events *ev = disk->ev;
+	unsigned long flags;
+	bool cancel;
+
+	if (!ev)
+		return;
+
+	/*
+	 * Outer mutex ensures that the first blocker completes canceling
+	 * the event work before further blockers are allowed to finish.
+	 */
+	mutex_lock(&ev->block_mutex);
+
+	spin_lock_irqsave(&ev->lock, flags);
+	cancel = !ev->block++;
+	spin_unlock_irqrestore(&ev->lock, flags);
+
+	if (cancel)
+		cancel_delayed_work_sync(&disk->ev->dwork);
+
+	mutex_unlock(&ev->block_mutex);
+}
+
+static void __disk_unblock_events(struct gendisk *disk, bool check_now)
+{
+	struct disk_events *ev = disk->ev;
+	unsigned long intv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ev->lock, flags);
+
+	if (WARN_ON_ONCE(ev->block <= 0))
+		goto out_unlock;
+
+	if (--ev->block)
+		goto out_unlock;
+
+	intv = disk_events_poll_jiffies(disk);
+	if (check_now)
+		queue_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, 0);
+	else if (intv)
+		queue_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, intv);
+out_unlock:
+	spin_unlock_irqrestore(&ev->lock, flags);
+}
+
+/**
+ * disk_unblock_events - unblock disk event checking
+ * @disk: disk to unblock events for
+ *
+ * Undo disk_block_events().  When the block count reaches zero, it
+ * starts events polling if configured.
+ *
+ * CONTEXT:
+ * Don't care.  Safe to call from irq context.
+ */
+void disk_unblock_events(struct gendisk *disk)
+{
+	if (disk->ev)
+		__disk_unblock_events(disk, false);
+}
+
+/**
+ * disk_flush_events - schedule immediate event checking and flushing
+ * @disk: disk to check and flush events for
+ * @mask: events to flush
+ *
+ * Schedule immediate event checking on @disk if not blocked.  Events in
+ * @mask are scheduled to be cleared from the driver.  Note that this
+ * doesn't clear the events from @disk->ev.
+ *
+ * CONTEXT:
+ * If @mask is non-zero must be called with disk->open_mutex held.
+ */
+void disk_flush_events(struct gendisk *disk, unsigned int mask)
+{
+	struct disk_events *ev = disk->ev;
+
+	if (!ev)
+		return;
+
+	spin_lock_irq(&ev->lock);
+	ev->clearing |= mask;
+	if (!ev->block)
+		mod_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, 0);
+	spin_unlock_irq(&ev->lock);
+}
+
+static void disk_check_events(struct disk_events *ev,
+			      unsigned int *clearing_ptr)
+{
+	struct gendisk *disk = ev->disk;
+	char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
+	unsigned int clearing = *clearing_ptr;
+	unsigned int events;
+	unsigned long intv;
+	int nr_events = 0, i;
+
+	/* check events */
+	events = disk->fops->check_events(disk, clearing);
+
+	/* accumulate pending events and schedule next poll if necessary */
+	spin_lock_irq(&ev->lock);
+
+	events &= ~ev->pending;
+	ev->pending |= events;
+	*clearing_ptr &= ~clearing;
+
+	intv = disk_events_poll_jiffies(disk);
+	if (!ev->block && intv)
+		queue_delayed_work(system_freezable_power_efficient_wq,
+				&ev->dwork, intv);
+
+	spin_unlock_irq(&ev->lock);
+
+	/*
+	 * Tell userland about new events.  Only the events listed in
+	 * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
+	 * is set. Otherwise, events are processed internally but never
+	 * get reported to userland.
+	 */
+	for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
+		if ((events & disk->events & (1 << i)) &&
+		    (disk->event_flags & DISK_EVENT_FLAG_UEVENT))
+			envp[nr_events++] = disk_uevents[i];
+
+	if (nr_events)
+		kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+}
+
+/**
+ * disk_clear_events - synchronously check, clear and return pending events
+ * @disk: disk to fetch and clear events from
+ * @mask: mask of events to be fetched and cleared
+ *
+ * Disk events are synchronously checked and pending events in @mask
+ * are cleared and returned.  This ignores the block count.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
+{
+	struct disk_events *ev = disk->ev;
+	unsigned int pending;
+	unsigned int clearing = mask;
+
+	if (!ev)
+		return 0;
+
+	disk_block_events(disk);
+
+	/*
+	 * store the union of mask and ev->clearing on the stack so that the
+	 * race with disk_flush_events does not cause ambiguity (ev->clearing
+	 * can still be modified even if events are blocked).
+	 */
+	spin_lock_irq(&ev->lock);
+	clearing |= ev->clearing;
+	ev->clearing = 0;
+	spin_unlock_irq(&ev->lock);
+
+	disk_check_events(ev, &clearing);
+	/*
+	 * if ev->clearing is not 0, the disk_flush_events got called in the
+	 * middle of this function, so we want to run the workfn without delay.
+	 */
+	__disk_unblock_events(disk, ev->clearing ? true : false);
+
+	/* then, fetch and clear pending events */
+	spin_lock_irq(&ev->lock);
+	pending = ev->pending & mask;
+	ev->pending &= ~mask;
+	spin_unlock_irq(&ev->lock);
+	WARN_ON_ONCE(clearing & mask);
+
+	return pending;
+}
+
+/**
+ * bdev_check_media_change - check if a removable media has been changed
+ * @bdev: block device to check
+ *
+ * Check whether a removable media has been changed, and attempt to free all
+ * dentries and inodes and invalidates all block device page cache entries in
+ * that case.
+ *
+ * Returns %true if the block device changed, or %false if not.
+ */
+bool bdev_check_media_change(struct block_device *bdev)
+{
+	unsigned int events;
+
+	events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
+				   DISK_EVENT_EJECT_REQUEST);
+	if (!(events & DISK_EVENT_MEDIA_CHANGE))
+		return false;
+
+	if (__invalidate_device(bdev, true))
+		pr_warn("VFS: busy inodes on changed media %s\n",
+			bdev->bd_disk->disk_name);
+	set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+	return true;
+}
+EXPORT_SYMBOL(bdev_check_media_change);
+
+/*
+ * Separate this part out so that a different pointer for clearing_ptr can be
+ * passed in for disk_clear_events.
+ */
+static void disk_events_workfn(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
+
+	disk_check_events(ev, &ev->clearing);
+}
+
+/*
+ * A disk events enabled device has the following sysfs nodes under
+ * its /sys/block/X/ directory.
+ *
+ * events		: list of all supported events
+ * events_async		: list of events which can be detected w/o polling
+ *			  (always empty, only for backwards compatibility)
+ * events_poll_msecs	: polling interval, 0: disable, -1: system default
+ */
+static ssize_t __disk_events_show(unsigned int events, char *buf)
+{
+	const char *delim = "";
+	ssize_t pos = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
+		if (events & (1 << i)) {
+			pos += sprintf(buf + pos, "%s%s",
+				       delim, disk_events_strs[i]);
+			delim = " ";
+		}
+	if (pos)
+		pos += sprintf(buf + pos, "\n");
+	return pos;
+}
+
+static ssize_t disk_events_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
+		return 0;
+	return __disk_events_show(disk->events, buf);
+}
+
+static ssize_t disk_events_async_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	return 0;
+}
+
+static ssize_t disk_events_poll_msecs_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	if (!disk->ev)
+		return sprintf(buf, "-1\n");
+	return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
+}
+
+static ssize_t disk_events_poll_msecs_store(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	long intv;
+
+	if (!count || !sscanf(buf, "%ld", &intv))
+		return -EINVAL;
+
+	if (intv < 0 && intv != -1)
+		return -EINVAL;
+
+	if (!disk->ev)
+		return -ENODEV;
+
+	disk_block_events(disk);
+	disk->ev->poll_msecs = intv;
+	__disk_unblock_events(disk, true);
+	return count;
+}
+
+static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
+static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
+static const DEVICE_ATTR(events_poll_msecs, 0644,
+			 disk_events_poll_msecs_show,
+			 disk_events_poll_msecs_store);
+
+static const struct attribute *disk_events_attrs[] = {
+	&dev_attr_events.attr,
+	&dev_attr_events_async.attr,
+	&dev_attr_events_poll_msecs.attr,
+	NULL,
+};
+
+/*
+ * The default polling interval can be specified by the kernel
+ * parameter block.events_dfl_poll_msecs which defaults to 0
+ * (disable).  This can also be modified runtime by writing to
+ * /sys/module/block/parameters/events_dfl_poll_msecs.
+ */
+static int disk_events_set_dfl_poll_msecs(const char *val,
+					  const struct kernel_param *kp)
+{
+	struct disk_events *ev;
+	int ret;
+
+	ret = param_set_ulong(val, kp);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&disk_events_mutex);
+	list_for_each_entry(ev, &disk_events, node)
+		disk_flush_events(ev->disk, 0);
+	mutex_unlock(&disk_events_mutex);
+	return 0;
+}
+
+static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
+	.set	= disk_events_set_dfl_poll_msecs,
+	.get	= param_get_ulong,
+};
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX	"block."
+
+module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
+		&disk_events_dfl_poll_msecs, 0644);
+
+/*
+ * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
+ */
+void disk_alloc_events(struct gendisk *disk)
+{
+	struct disk_events *ev;
+
+	if (!disk->fops->check_events || !disk->events)
+		return;
+
+	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+	if (!ev) {
+		pr_warn("%s: failed to initialize events\n", disk->disk_name);
+		return;
+	}
+
+	INIT_LIST_HEAD(&ev->node);
+	ev->disk = disk;
+	spin_lock_init(&ev->lock);
+	mutex_init(&ev->block_mutex);
+	ev->block = 1;
+	ev->poll_msecs = -1;
+	INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
+
+	disk->ev = ev;
+}
+
+void disk_add_events(struct gendisk *disk)
+{
+	/* FIXME: error handling */
+	if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
+		pr_warn("%s: failed to create sysfs files for events\n",
+			disk->disk_name);
+
+	if (!disk->ev)
+		return;
+
+	mutex_lock(&disk_events_mutex);
+	list_add_tail(&disk->ev->node, &disk_events);
+	mutex_unlock(&disk_events_mutex);
+
+	/*
+	 * Block count is initialized to 1 and the following initial
+	 * unblock kicks it into action.
+	 */
+	__disk_unblock_events(disk, true);
+}
+
+void disk_del_events(struct gendisk *disk)
+{
+	if (disk->ev) {
+		disk_block_events(disk);
+
+		mutex_lock(&disk_events_mutex);
+		list_del_init(&disk->ev->node);
+		mutex_unlock(&disk_events_mutex);
+	}
+
+	sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
+}
+
+void disk_release_events(struct gendisk *disk)
+{
+	/* the block count should be 1 from disk_del_events() */
+	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
+	kfree(disk->ev);
+}
diff --git a/block/genhd.c b/block/genhd.c
index 5f5628216295..4f879deede9a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -33,13 +33,6 @@ static struct kobject *block_depr;
 #define NR_EXT_DEVT		(1 << MINORBITS)
 static DEFINE_IDA(ext_devt_ida);
 
-static void disk_check_events(struct disk_events *ev,
-			      unsigned int *clearing_ptr);
-static void disk_alloc_events(struct gendisk *disk);
-static void disk_add_events(struct gendisk *disk);
-static void disk_del_events(struct gendisk *disk);
-static void disk_release_events(struct gendisk *disk);
-
 void set_capacity(struct gendisk *disk, sector_t sectors)
 {
 	struct block_device *bdev = disk->part0;
@@ -1367,488 +1360,3 @@ int bdev_read_only(struct block_device *bdev)
 	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
 }
 EXPORT_SYMBOL(bdev_read_only);
-
-/*
- * Disk events - monitor disk events like media change and eject request.
- */
-struct disk_events {
-	struct list_head	node;		/* all disk_event's */
-	struct gendisk		*disk;		/* the associated disk */
-	spinlock_t		lock;
-
-	struct mutex		block_mutex;	/* protects blocking */
-	int			block;		/* event blocking depth */
-	unsigned int		pending;	/* events already sent out */
-	unsigned int		clearing;	/* events being cleared */
-
-	long			poll_msecs;	/* interval, -1 for default */
-	struct delayed_work	dwork;
-};
-
-static const char *disk_events_strs[] = {
-	[ilog2(DISK_EVENT_MEDIA_CHANGE)]	= "media_change",
-	[ilog2(DISK_EVENT_EJECT_REQUEST)]	= "eject_request",
-};
-
-static char *disk_uevents[] = {
-	[ilog2(DISK_EVENT_MEDIA_CHANGE)]	= "DISK_MEDIA_CHANGE=1",
-	[ilog2(DISK_EVENT_EJECT_REQUEST)]	= "DISK_EJECT_REQUEST=1",
-};
-
-/* list of all disk_events */
-static DEFINE_MUTEX(disk_events_mutex);
-static LIST_HEAD(disk_events);
-
-/* disable in-kernel polling by default */
-static unsigned long disk_events_dfl_poll_msecs;
-
-static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
-{
-	struct disk_events *ev = disk->ev;
-	long intv_msecs = 0;
-
-	/*
-	 * If device-specific poll interval is set, always use it.  If
-	 * the default is being used, poll if the POLL flag is set.
-	 */
-	if (ev->poll_msecs >= 0)
-		intv_msecs = ev->poll_msecs;
-	else if (disk->event_flags & DISK_EVENT_FLAG_POLL)
-		intv_msecs = disk_events_dfl_poll_msecs;
-
-	return msecs_to_jiffies(intv_msecs);
-}
-
-/**
- * disk_block_events - block and flush disk event checking
- * @disk: disk to block events for
- *
- * On return from this function, it is guaranteed that event checking
- * isn't in progress and won't happen until unblocked by
- * disk_unblock_events().  Events blocking is counted and the actual
- * unblocking happens after the matching number of unblocks are done.
- *
- * Note that this intentionally does not block event checking from
- * disk_clear_events().
- *
- * CONTEXT:
- * Might sleep.
- */
-void disk_block_events(struct gendisk *disk)
-{
-	struct disk_events *ev = disk->ev;
-	unsigned long flags;
-	bool cancel;
-
-	if (!ev)
-		return;
-
-	/*
-	 * Outer mutex ensures that the first blocker completes canceling
-	 * the event work before further blockers are allowed to finish.
-	 */
-	mutex_lock(&ev->block_mutex);
-
-	spin_lock_irqsave(&ev->lock, flags);
-	cancel = !ev->block++;
-	spin_unlock_irqrestore(&ev->lock, flags);
-
-	if (cancel)
-		cancel_delayed_work_sync(&disk->ev->dwork);
-
-	mutex_unlock(&ev->block_mutex);
-}
-
-static void __disk_unblock_events(struct gendisk *disk, bool check_now)
-{
-	struct disk_events *ev = disk->ev;
-	unsigned long intv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ev->lock, flags);
-
-	if (WARN_ON_ONCE(ev->block <= 0))
-		goto out_unlock;
-
-	if (--ev->block)
-		goto out_unlock;
-
-	intv = disk_events_poll_jiffies(disk);
-	if (check_now)
-		queue_delayed_work(system_freezable_power_efficient_wq,
-				&ev->dwork, 0);
-	else if (intv)
-		queue_delayed_work(system_freezable_power_efficient_wq,
-				&ev->dwork, intv);
-out_unlock:
-	spin_unlock_irqrestore(&ev->lock, flags);
-}
-
-/**
- * disk_unblock_events - unblock disk event checking
- * @disk: disk to unblock events for
- *
- * Undo disk_block_events().  When the block count reaches zero, it
- * starts events polling if configured.
- *
- * CONTEXT:
- * Don't care.  Safe to call from irq context.
- */
-void disk_unblock_events(struct gendisk *disk)
-{
-	if (disk->ev)
-		__disk_unblock_events(disk, false);
-}
-
-/**
- * disk_flush_events - schedule immediate event checking and flushing
- * @disk: disk to check and flush events for
- * @mask: events to flush
- *
- * Schedule immediate event checking on @disk if not blocked.  Events in
- * @mask are scheduled to be cleared from the driver.  Note that this
- * doesn't clear the events from @disk->ev.
- *
- * CONTEXT:
- * If @mask is non-zero must be called with disk->open_mutex held.
- */
-void disk_flush_events(struct gendisk *disk, unsigned int mask)
-{
-	struct disk_events *ev = disk->ev;
-
-	if (!ev)
-		return;
-
-	spin_lock_irq(&ev->lock);
-	ev->clearing |= mask;
-	if (!ev->block)
-		mod_delayed_work(system_freezable_power_efficient_wq,
-				&ev->dwork, 0);
-	spin_unlock_irq(&ev->lock);
-}
-
-/**
- * disk_clear_events - synchronously check, clear and return pending events
- * @disk: disk to fetch and clear events from
- * @mask: mask of events to be fetched and cleared
- *
- * Disk events are synchronously checked and pending events in @mask
- * are cleared and returned.  This ignores the block count.
- *
- * CONTEXT:
- * Might sleep.
- */
-static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
-{
-	struct disk_events *ev = disk->ev;
-	unsigned int pending;
-	unsigned int clearing = mask;
-
-	if (!ev)
-		return 0;
-
-	disk_block_events(disk);
-
-	/*
-	 * store the union of mask and ev->clearing on the stack so that the
-	 * race with disk_flush_events does not cause ambiguity (ev->clearing
-	 * can still be modified even if events are blocked).
-	 */
-	spin_lock_irq(&ev->lock);
-	clearing |= ev->clearing;
-	ev->clearing = 0;
-	spin_unlock_irq(&ev->lock);
-
-	disk_check_events(ev, &clearing);
-	/*
-	 * if ev->clearing is not 0, the disk_flush_events got called in the
-	 * middle of this function, so we want to run the workfn without delay.
-	 */
-	__disk_unblock_events(disk, ev->clearing ? true : false);
-
-	/* then, fetch and clear pending events */
-	spin_lock_irq(&ev->lock);
-	pending = ev->pending & mask;
-	ev->pending &= ~mask;
-	spin_unlock_irq(&ev->lock);
-	WARN_ON_ONCE(clearing & mask);
-
-	return pending;
-}
-
-/**
- * bdev_check_media_change - check if a removable media has been changed
- * @bdev: block device to check
- *
- * Check whether a removable media has been changed, and attempt to free all
- * dentries and inodes and invalidates all block device page cache entries in
- * that case.
- *
- * Returns %true if the block device changed, or %false if not.
- */
-bool bdev_check_media_change(struct block_device *bdev)
-{
-	unsigned int events;
-
-	events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
-				   DISK_EVENT_EJECT_REQUEST);
-	if (!(events & DISK_EVENT_MEDIA_CHANGE))
-		return false;
-
-	if (__invalidate_device(bdev, true))
-		pr_warn("VFS: busy inodes on changed media %s\n",
-			bdev->bd_disk->disk_name);
-	set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
-	return true;
-}
-EXPORT_SYMBOL(bdev_check_media_change);
-
-/*
- * Separate this part out so that a different pointer for clearing_ptr can be
- * passed in for disk_clear_events.
- */
-static void disk_events_workfn(struct work_struct *work)
-{
-	struct delayed_work *dwork = to_delayed_work(work);
-	struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
-
-	disk_check_events(ev, &ev->clearing);
-}
-
-static void disk_check_events(struct disk_events *ev,
-			      unsigned int *clearing_ptr)
-{
-	struct gendisk *disk = ev->disk;
-	char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
-	unsigned int clearing = *clearing_ptr;
-	unsigned int events;
-	unsigned long intv;
-	int nr_events = 0, i;
-
-	/* check events */
-	events = disk->fops->check_events(disk, clearing);
-
-	/* accumulate pending events and schedule next poll if necessary */
-	spin_lock_irq(&ev->lock);
-
-	events &= ~ev->pending;
-	ev->pending |= events;
-	*clearing_ptr &= ~clearing;
-
-	intv = disk_events_poll_jiffies(disk);
-	if (!ev->block && intv)
-		queue_delayed_work(system_freezable_power_efficient_wq,
-				&ev->dwork, intv);
-
-	spin_unlock_irq(&ev->lock);
-
-	/*
-	 * Tell userland about new events.  Only the events listed in
-	 * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
-	 * is set. Otherwise, events are processed internally but never
-	 * get reported to userland.
-	 */
-	for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
-		if ((events & disk->events & (1 << i)) &&
-		    (disk->event_flags & DISK_EVENT_FLAG_UEVENT))
-			envp[nr_events++] = disk_uevents[i];
-
-	if (nr_events)
-		kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
-}
-
-/*
- * A disk events enabled device has the following sysfs nodes under
- * its /sys/block/X/ directory.
- *
- * events		: list of all supported events
- * events_async		: list of events which can be detected w/o polling
- *			  (always empty, only for backwards compatibility)
- * events_poll_msecs	: polling interval, 0: disable, -1: system default
- */
-static ssize_t __disk_events_show(unsigned int events, char *buf)
-{
-	const char *delim = "";
-	ssize_t pos = 0;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
-		if (events & (1 << i)) {
-			pos += sprintf(buf + pos, "%s%s",
-				       delim, disk_events_strs[i]);
-			delim = " ";
-		}
-	if (pos)
-		pos += sprintf(buf + pos, "\n");
-	return pos;
-}
-
-static ssize_t disk_events_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
-		return 0;
-
-	return __disk_events_show(disk->events, buf);
-}
-
-static ssize_t disk_events_async_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
-{
-	return 0;
-}
-
-static ssize_t disk_events_poll_msecs_show(struct device *dev,
-					   struct device_attribute *attr,
-					   char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	if (!disk->ev)
-		return sprintf(buf, "-1\n");
-
-	return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
-}
-
-static ssize_t disk_events_poll_msecs_store(struct device *dev,
-					    struct device_attribute *attr,
-					    const char *buf, size_t count)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-	long intv;
-
-	if (!count || !sscanf(buf, "%ld", &intv))
-		return -EINVAL;
-
-	if (intv < 0 && intv != -1)
-		return -EINVAL;
-
-	if (!disk->ev)
-		return -ENODEV;
-
-	disk_block_events(disk);
-	disk->ev->poll_msecs = intv;
-	__disk_unblock_events(disk, true);
-
-	return count;
-}
-
-static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
-static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
-static const DEVICE_ATTR(events_poll_msecs, 0644,
-			 disk_events_poll_msecs_show,
-			 disk_events_poll_msecs_store);
-
-static const struct attribute *disk_events_attrs[] = {
-	&dev_attr_events.attr,
-	&dev_attr_events_async.attr,
-	&dev_attr_events_poll_msecs.attr,
-	NULL,
-};
-
-/*
- * The default polling interval can be specified by the kernel
- * parameter block.events_dfl_poll_msecs which defaults to 0
- * (disable).  This can also be modified runtime by writing to
- * /sys/module/block/parameters/events_dfl_poll_msecs.
- */
-static int disk_events_set_dfl_poll_msecs(const char *val,
-					  const struct kernel_param *kp)
-{
-	struct disk_events *ev;
-	int ret;
-
-	ret = param_set_ulong(val, kp);
-	if (ret < 0)
-		return ret;
-
-	mutex_lock(&disk_events_mutex);
-
-	list_for_each_entry(ev, &disk_events, node)
-		disk_flush_events(ev->disk, 0);
-
-	mutex_unlock(&disk_events_mutex);
-
-	return 0;
-}
-
-static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
-	.set	= disk_events_set_dfl_poll_msecs,
-	.get	= param_get_ulong,
-};
-
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX	"block."
-
-module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
-		&disk_events_dfl_poll_msecs, 0644);
-
-/*
- * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
- */
-static void disk_alloc_events(struct gendisk *disk)
-{
-	struct disk_events *ev;
-
-	if (!disk->fops->check_events || !disk->events)
-		return;
-
-	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
-	if (!ev) {
-		pr_warn("%s: failed to initialize events\n", disk->disk_name);
-		return;
-	}
-
-	INIT_LIST_HEAD(&ev->node);
-	ev->disk = disk;
-	spin_lock_init(&ev->lock);
-	mutex_init(&ev->block_mutex);
-	ev->block = 1;
-	ev->poll_msecs = -1;
-	INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
-
-	disk->ev = ev;
-}
-
-static void disk_add_events(struct gendisk *disk)
-{
-	/* FIXME: error handling */
-	if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
-		pr_warn("%s: failed to create sysfs files for events\n",
-			disk->disk_name);
-
-	if (!disk->ev)
-		return;
-
-	mutex_lock(&disk_events_mutex);
-	list_add_tail(&disk->ev->node, &disk_events);
-	mutex_unlock(&disk_events_mutex);
-
-	/*
-	 * Block count is initialized to 1 and the following initial
-	 * unblock kicks it into action.
-	 */
-	__disk_unblock_events(disk, true);
-}
-
-static void disk_del_events(struct gendisk *disk)
-{
-	if (disk->ev) {
-		disk_block_events(disk);
-
-		mutex_lock(&disk_events_mutex);
-		list_del_init(&disk->ev->node);
-		mutex_unlock(&disk_events_mutex);
-	}
-
-	sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
-}
-
-static void disk_release_events(struct gendisk *disk)
-{
-	/* the block count should be 1 from disk_del_events() */
-	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
-	kfree(disk->ev);
-}

From 2bc8cda5ea4b42ff78be1b11011092d57b424d37 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Jun 2021 09:38:43 +0200
Subject: [PATCH 2811/3292] block: add the events* attributes to disk_attrs

Add the events attributes to the disk_attrs array, which ensures they are
added by the driver core when the device is created rather than adding
them after the device has been added, which is racy versus uevents and
requires more boilerplate code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20210624073843.251178-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h         |  3 +++
 block/disk-events.c | 23 ++++-------------------
 block/genhd.c       |  3 +++
 3 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/block/blk.h b/block/blk.h
index f8d726429906..4fcd7a032377 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -364,5 +364,8 @@ void disk_alloc_events(struct gendisk *disk);
 void disk_add_events(struct gendisk *disk);
 void disk_del_events(struct gendisk *disk);
 void disk_release_events(struct gendisk *disk);
+extern struct device_attribute dev_attr_events;
+extern struct device_attribute dev_attr_events_async;
+extern struct device_attribute dev_attr_events_poll_msecs;
 
 #endif /* BLK_INTERNAL_H */
diff --git a/block/disk-events.c b/block/disk-events.c
index 1bc5dcb75e4e..a75931ff5da4 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -368,18 +368,10 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
 	return count;
 }
 
-static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
-static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
-static const DEVICE_ATTR(events_poll_msecs, 0644,
-			 disk_events_poll_msecs_show,
-			 disk_events_poll_msecs_store);
-
-static const struct attribute *disk_events_attrs[] = {
-	&dev_attr_events.attr,
-	&dev_attr_events_async.attr,
-	&dev_attr_events_poll_msecs.attr,
-	NULL,
-};
+DEVICE_ATTR(events, 0444, disk_events_show, NULL);
+DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
+DEVICE_ATTR(events_poll_msecs, 0644, disk_events_poll_msecs_show,
+	    disk_events_poll_msecs_store);
 
 /*
  * The default polling interval can be specified by the kernel
@@ -444,11 +436,6 @@ void disk_alloc_events(struct gendisk *disk)
 
 void disk_add_events(struct gendisk *disk)
 {
-	/* FIXME: error handling */
-	if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
-		pr_warn("%s: failed to create sysfs files for events\n",
-			disk->disk_name);
-
 	if (!disk->ev)
 		return;
 
@@ -472,8 +459,6 @@ void disk_del_events(struct gendisk *disk)
 		list_del_init(&disk->ev->node);
 		mutex_unlock(&disk_events_mutex);
 	}
-
-	sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
 }
 
 void disk_release_events(struct gendisk *disk)
diff --git a/block/genhd.c b/block/genhd.c
index 4f879deede9a..79aa40b4c39c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1022,6 +1022,9 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
 	&dev_attr_badblocks.attr,
+	&dev_attr_events.attr,
+	&dev_attr_events_async.attr,
+	&dev_attr_events_poll_msecs.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif

From 630161cfdf5cdc696a82b59410d1ff00b23d946e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Jun 2021 14:32:39 +0200
Subject: [PATCH 2812/3292] block: move bdev_disk_changed

Move bdev_disk_changed to block/partitions/core.c, together with the
rest of the partition scanning code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210624123240.441814-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/core.c | 55 ++++++++++++++++++++++++++++++++++++++++-
 fs/block_dev.c          | 53 ---------------------------------------
 include/linux/genhd.h   |  1 -
 3 files changed, 54 insertions(+), 55 deletions(-)

diff --git a/block/partitions/core.c b/block/partitions/core.c
index 186d4fbd9f09..b79785f7027c 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -596,7 +596,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
 	return true;
 }
 
-int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
+static int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
 {
 	struct parsed_partitions *state;
 	int ret = -EAGAIN, p;
@@ -657,6 +657,59 @@ out_free_state:
 	return ret;
 }
 
+int bdev_disk_changed(struct block_device *bdev, bool invalidate)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	int ret = 0;
+
+	lockdep_assert_held(&disk->open_mutex);
+
+	if (!(disk->flags & GENHD_FL_UP))
+		return -ENXIO;
+
+rescan:
+	if (disk->open_partitions)
+		return -EBUSY;
+	sync_blockdev(bdev);
+	invalidate_bdev(bdev);
+	blk_drop_partitions(disk);
+
+	clear_bit(GD_NEED_PART_SCAN, &disk->state);
+
+	/*
+	 * Historically we only set the capacity to zero for devices that
+	 * support partitions (independ of actually having partitions created).
+	 * Doing that is rather inconsistent, but changing it broke legacy
+	 * udisks polling for legacy ide-cdrom devices.  Use the crude check
+	 * below to get the sane behavior for most device while not breaking
+	 * userspace for this particular setup.
+	 */
+	if (invalidate) {
+		if (disk_part_scan_enabled(disk) ||
+		    !(disk->flags & GENHD_FL_REMOVABLE))
+			set_capacity(disk, 0);
+	}
+
+	if (get_capacity(disk)) {
+		ret = blk_add_partitions(disk, bdev);
+		if (ret == -EAGAIN)
+			goto rescan;
+	} else if (invalidate) {
+		/*
+		 * Tell userspace that the media / partition table may have
+		 * changed.
+		 */
+		kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+	}
+
+	return ret;
+}
+/*
+ * Only exported for loop and dasd for historic reasons.  Don't use in new
+ * code!
+ */
+EXPORT_SYMBOL_GPL(bdev_disk_changed);
+
 void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
 {
 	struct address_space *mapping = state->bdev->bd_inode->i_mapping;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ac9b3c158a77..5b3a73ecb696 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1242,59 +1242,6 @@ static void blkdev_flush_mapping(struct block_device *bdev)
 	bdev_write_inode(bdev);
 }
 
-int bdev_disk_changed(struct block_device *bdev, bool invalidate)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	int ret = 0;
-
-	lockdep_assert_held(&disk->open_mutex);
-
-	if (!(disk->flags & GENHD_FL_UP))
-		return -ENXIO;
-
-rescan:
-	if (disk->open_partitions)
-		return -EBUSY;
-	sync_blockdev(bdev);
-	invalidate_bdev(bdev);
-	blk_drop_partitions(disk);
-
-	clear_bit(GD_NEED_PART_SCAN, &disk->state);
-
-	/*
-	 * Historically we only set the capacity to zero for devices that
-	 * support partitions (independ of actually having partitions created).
-	 * Doing that is rather inconsistent, but changing it broke legacy
-	 * udisks polling for legacy ide-cdrom devices.  Use the crude check
-	 * below to get the sane behavior for most device while not breaking
-	 * userspace for this particular setup.
-	 */
-	if (invalidate) {
-		if (disk_part_scan_enabled(disk) ||
-		    !(disk->flags & GENHD_FL_REMOVABLE))
-			set_capacity(disk, 0);
-	}
-
-	if (get_capacity(disk)) {
-		ret = blk_add_partitions(disk, bdev);
-		if (ret == -EAGAIN)
-			goto rescan;
-	} else if (invalidate) {
-		/*
-		 * Tell userspace that the media / partition table may have
-		 * changed.
-		 */
-		kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
-	}
-
-	return ret;
-}
-/*
- * Only exported for loop and dasd for historic reasons.  Don't use in new
- * code!
- */
-EXPORT_SYMBOL_GPL(bdev_disk_changed);
-
 static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 03d684f0498f..f5f0c9bdf1d2 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -257,7 +257,6 @@ static inline sector_t get_capacity(struct gendisk *disk)
 }
 
 int bdev_disk_changed(struct block_device *bdev, bool invalidate);
-int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
 void blk_drop_partitions(struct gendisk *disk);
 
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);

From 0384264ea8a39bd98c9a3158060565f650c056a6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Jun 2021 14:32:40 +0200
Subject: [PATCH 2813/3292] block: pass a gendisk to bdev_disk_changed

bdev_disk_changed can only operate on whole devices.  Make that clear
by passing a gendisk instead of the struct block_device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210624123240.441814-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/core.c         | 22 ++++++++++------------
 drivers/block/loop.c            | 21 ++++++++++-----------
 drivers/s390/block/dasd_genhd.c |  4 ++--
 fs/block_dev.c                  |  4 ++--
 include/linux/genhd.h           |  2 +-
 5 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/block/partitions/core.c b/block/partitions/core.c
index b79785f7027c..347c56a51d87 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -120,8 +120,7 @@ static void free_partitions(struct parsed_partitions *state)
 	kfree(state);
 }
 
-static struct parsed_partitions *check_partition(struct gendisk *hd,
-		struct block_device *bdev)
+static struct parsed_partitions *check_partition(struct gendisk *hd)
 {
 	struct parsed_partitions *state;
 	int i, res, err;
@@ -136,7 +135,7 @@ static struct parsed_partitions *check_partition(struct gendisk *hd,
 	}
 	state->pp_buf[0] = '\0';
 
-	state->bdev = bdev;
+	state->bdev = hd->part0;
 	disk_name(hd, 0, state->name);
 	snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
 	if (isdigit(state->name[strlen(state->name)-1]))
@@ -546,7 +545,7 @@ void blk_drop_partitions(struct gendisk *disk)
 	}
 }
 
-static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
+static bool blk_add_partition(struct gendisk *disk,
 		struct parsed_partitions *state, int p)
 {
 	sector_t size = state->parts[p].size;
@@ -596,7 +595,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
 	return true;
 }
 
-static int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
+static int blk_add_partitions(struct gendisk *disk)
 {
 	struct parsed_partitions *state;
 	int ret = -EAGAIN, p;
@@ -604,7 +603,7 @@ static int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (!disk_part_scan_enabled(disk))
 		return 0;
 
-	state = check_partition(disk, bdev);
+	state = check_partition(disk);
 	if (!state)
 		return 0;
 	if (IS_ERR(state)) {
@@ -648,7 +647,7 @@ static int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
 	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 
 	for (p = 1; p < state->limit; p++)
-		if (!blk_add_partition(disk, bdev, state, p))
+		if (!blk_add_partition(disk, state, p))
 			goto out_free_state;
 
 	ret = 0;
@@ -657,9 +656,8 @@ out_free_state:
 	return ret;
 }
 
-int bdev_disk_changed(struct block_device *bdev, bool invalidate)
+int bdev_disk_changed(struct gendisk *disk, bool invalidate)
 {
-	struct gendisk *disk = bdev->bd_disk;
 	int ret = 0;
 
 	lockdep_assert_held(&disk->open_mutex);
@@ -670,8 +668,8 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 rescan:
 	if (disk->open_partitions)
 		return -EBUSY;
-	sync_blockdev(bdev);
-	invalidate_bdev(bdev);
+	sync_blockdev(disk->part0);
+	invalidate_bdev(disk->part0);
 	blk_drop_partitions(disk);
 
 	clear_bit(GD_NEED_PART_SCAN, &disk->state);
@@ -691,7 +689,7 @@ rescan:
 	}
 
 	if (get_capacity(disk)) {
-		ret = blk_add_partitions(disk, bdev);
+		ret = blk_add_partitions(disk);
 		if (ret == -EAGAIN)
 			goto rescan;
 	} else if (invalidate) {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e90f7d349816..4fb1f9530d5a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -647,14 +647,13 @@ static inline void loop_update_dio(struct loop_device *lo)
 				lo->use_dio);
 }
 
-static void loop_reread_partitions(struct loop_device *lo,
-				   struct block_device *bdev)
+static void loop_reread_partitions(struct loop_device *lo)
 {
 	int rc;
 
-	mutex_lock(&bdev->bd_disk->open_mutex);
-	rc = bdev_disk_changed(bdev, false);
-	mutex_unlock(&bdev->bd_disk->open_mutex);
+	mutex_lock(&lo->lo_disk->open_mutex);
+	rc = bdev_disk_changed(lo->lo_disk, false);
+	mutex_unlock(&lo->lo_disk->open_mutex);
 	if (rc)
 		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
 			__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -752,7 +751,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	 */
 	fput(old_file);
 	if (partscan)
-		loop_reread_partitions(lo, bdev);
+		loop_reread_partitions(lo);
 	return 0;
 
 out_err:
@@ -1174,7 +1173,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	bdgrab(bdev);
 	mutex_unlock(&lo->lo_mutex);
 	if (partscan)
-		loop_reread_partitions(lo, bdev);
+		loop_reread_partitions(lo);
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
 	return 0;
@@ -1268,10 +1267,10 @@ out_unlock:
 		 * current holder is released.
 		 */
 		if (!release)
-			mutex_lock(&bdev->bd_disk->open_mutex);
-		err = bdev_disk_changed(bdev, false);
+			mutex_lock(&lo->lo_disk->open_mutex);
+		err = bdev_disk_changed(lo->lo_disk, false);
 		if (!release)
-			mutex_unlock(&bdev->bd_disk->open_mutex);
+			mutex_unlock(&lo->lo_disk->open_mutex);
 		if (err)
 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
 				__func__, lo_number, err);
@@ -1416,7 +1415,7 @@ out_unfreeze:
 out_unlock:
 	mutex_unlock(&lo->lo_mutex);
 	if (partscan)
-		loop_reread_partitions(lo, bdev);
+		loop_reread_partitions(lo);
 
 	return err;
 }
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index bf2082d461c7..493e8469893c 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -110,7 +110,7 @@ int dasd_scan_partitions(struct dasd_block *block)
 	}
 
 	mutex_lock(&block->gdp->open_mutex);
-	rc = bdev_disk_changed(bdev, false);
+	rc = bdev_disk_changed(block->gdp, false);
 	mutex_unlock(&block->gdp->open_mutex);
 	if (rc)
 		DBF_DEV_EVENT(DBF_ERR, block->base,
@@ -146,7 +146,7 @@ void dasd_destroy_partitions(struct dasd_block *block)
 	block->bdev = NULL;
 
 	mutex_lock(&bdev->bd_disk->open_mutex);
-	bdev_disk_changed(bdev, true);
+	bdev_disk_changed(bdev->bd_disk, true);
 	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5b3a73ecb696..34253d155f5c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1253,7 +1253,7 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 			/* avoid ghost partitions on a removed medium */
 			if (ret == -ENOMEDIUM &&
 			     test_bit(GD_NEED_PART_SCAN, &disk->state))
-				bdev_disk_changed(bdev, true);
+				bdev_disk_changed(disk, true);
 			return ret;
 		}
 	}
@@ -1264,7 +1264,7 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 			bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
 	}
 	if (test_bit(GD_NEED_PART_SCAN, &disk->state))
-		bdev_disk_changed(bdev, false);
+		bdev_disk_changed(disk, false);
 	bdev->bd_openers++;
 	return 0;;
 }
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index f5f0c9bdf1d2..13b34177cc85 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -256,7 +256,7 @@ static inline sector_t get_capacity(struct gendisk *disk)
 	return bdev_nr_sectors(disk->part0);
 }
 
-int bdev_disk_changed(struct block_device *bdev, bool invalidate);
+int bdev_disk_changed(struct gendisk *disk, bool invalidate);
 void blk_drop_partitions(struct gendisk *disk);
 
 extern struct gendisk *__alloc_disk_node(int minors, int node_id);

From 26b0ce8dd3dd704393dbace4dc416adfeffe531f Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 7 May 2021 11:56:25 -0700
Subject: [PATCH 2814/3292] i40e: fix PTP on 5Gb links

As reported by Alex Sergeev, the i40e driver is incrementing the PTP
clock at 40Gb speeds when linked at 5Gb. Fix this bug by making
sure that the right multiplier is selected when linked at 5Gb.

Fixes: 3dbdd6c2f70a ("i40e: Add support for 5Gbps cards")
Cc: stable@vger.kernel.org
Reported-by: Alex Sergeev <asergeev@carbonrobotics.com>
Suggested-by: Alex Sergeev <asergeev@carbonrobotics.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ptp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index f1f6fc3744e9..7b971b205d36 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -11,13 +11,14 @@
  * operate with the nanosecond field directly without fear of overflow.
  *
  * Much like the 82599, the update period is dependent upon the link speed:
- * At 40Gb link or no link, the period is 1.6ns.
- * At 10Gb link, the period is multiplied by 2. (3.2ns)
+ * At 40Gb, 25Gb, or no link, the period is 1.6ns.
+ * At 10Gb or 5Gb link, the period is multiplied by 2. (3.2ns)
  * At 1Gb link, the period is multiplied by 20. (32ns)
  * 1588 functionality is not supported at 100Mbps.
  */
 #define I40E_PTP_40GB_INCVAL		0x0199999999ULL
 #define I40E_PTP_10GB_INCVAL_MULT	2
+#define I40E_PTP_5GB_INCVAL_MULT	2
 #define I40E_PTP_1GB_INCVAL_MULT	20
 
 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1  BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
@@ -465,6 +466,9 @@ void i40e_ptp_set_increment(struct i40e_pf *pf)
 	case I40E_LINK_SPEED_10GB:
 		mult = I40E_PTP_10GB_INCVAL_MULT;
 		break;
+	case I40E_LINK_SPEED_5GB:
+		mult = I40E_PTP_5GB_INCVAL_MULT;
+		break;
 	case I40E_LINK_SPEED_1GB:
 		mult = I40E_PTP_1GB_INCVAL_MULT;
 		break;

From 956e759d5f8e0859e86b951a8779c60af633aafd Mon Sep 17 00:00:00 2001
From: Jan Sokolowski <jan.sokolowski@intel.com>
Date: Fri, 11 Jun 2021 12:01:41 +0200
Subject: [PATCH 2815/3292] i40e: Fix missing rtnl locking when setting up pf
 switch

A recent change that made i40e use new udp_tunnel infrastructure
uses a method that expects to be called under rtnl lock.

However, not all codepaths do the lock prior to calling
i40e_setup_pf_switch.

Fix that by adding additional rtnl locking and unlocking.

Fixes: 40a98cb6f01f ("i40e: convert to new udp_tunnel infrastructure")
Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 526fa0a791ea..f9fe500d4ec4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -32,7 +32,7 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
 static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired);
 static int i40e_add_vsi(struct i40e_vsi *vsi);
 static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi);
-static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit);
+static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static int i40e_setup_misc_vector(struct i40e_pf *pf);
 static void i40e_determine_queue_usage(struct i40e_pf *pf);
 static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
@@ -10571,7 +10571,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 #endif /* CONFIG_I40E_DCB */
 	if (!lock_acquired)
 		rtnl_lock();
-	ret = i40e_setup_pf_switch(pf, reinit);
+	ret = i40e_setup_pf_switch(pf, reinit, true);
 	if (ret)
 		goto end_unlock;
 
@@ -14629,10 +14629,11 @@ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig)
  * i40e_setup_pf_switch - Setup the HW switch on startup or after reset
  * @pf: board private structure
  * @reinit: if the Main VSI needs to re-initialized.
+ * @lock_acquired: indicates whether or not the lock has been acquired
  *
  * Returns 0 on success, negative value on failure
  **/
-static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
+static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 {
 	u16 flags = 0;
 	int ret;
@@ -14734,9 +14735,15 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 
 	i40e_ptp_init(pf);
 
+	if (!lock_acquired)
+		rtnl_lock();
+
 	/* repopulate tunnel port filters */
 	udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev);
 
+	if (!lock_acquired)
+		rtnl_unlock();
+
 	return ret;
 }
 
@@ -15530,7 +15537,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
 	}
 #endif
-	err = i40e_setup_pf_switch(pf, false);
+	err = i40e_setup_pf_switch(pf, false, false);
 	if (err) {
 		dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
 		goto err_vsis;

From e8b9eab99232c4e62ada9d7976c80fd5e8118289 Mon Sep 17 00:00:00 2001
From: Martynas Pumputis <m@lambda.lt>
Date: Wed, 23 Jun 2021 15:56:45 +0200
Subject: [PATCH 2816/3292] net: retrieve netns cookie via getsocketopt

It's getting more common to run nested container environments for
testing cloud software. One of such examples is Kind [1] which runs a
Kubernetes cluster in Docker containers on a single host. Each container
acts as a Kubernetes node, and thus can run any Pod (aka container)
inside the former. This approach simplifies testing a lot, as it
eliminates complicated VM setups.

Unfortunately, such a setup breaks some functionality when cgroupv2 BPF
programs are used for load-balancing. The load-balancer BPF program
needs to detect whether a request originates from the host netns or a
container netns in order to allow some access, e.g. to a service via a
loopback IP address. Typically, the programs detect this by comparing
netns cookies with the one of the init ns via a call to
bpf_get_netns_cookie(NULL). However, in nested environments the latter
cannot be used given the Kubernetes node's netns is outside the init ns.
To fix this, we need to pass the Kubernetes node netns cookie to the
program in a different way: by extending getsockopt() with a
SO_NETNS_COOKIE option, the orchestrator which runs in the Kubernetes
node netns can retrieve the cookie and pass it to the program instead.

Thus, this is following up on Eric's commit 3d368ab87cf6 ("net:
initialize net->net_cookie at netns setup") to allow retrieval via
SO_NETNS_COOKIE.  This is also in line in how we retrieve socket cookie
via SO_COOKIE.

  [1] https://kind.sigs.k8s.io/

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Martynas Pumputis <m@lambda.lt>
Cc: Eric Dumazet <edumazet@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  | 2 ++
 arch/mips/include/uapi/asm/socket.h   | 2 ++
 arch/parisc/include/uapi/asm/socket.h | 2 ++
 arch/sparc/include/uapi/asm/socket.h  | 2 ++
 include/uapi/asm-generic/socket.h     | 2 ++
 net/core/sock.c                       | 7 +++++++
 6 files changed, 17 insertions(+)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 57420356ce4c..6b3daba60987 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -127,6 +127,8 @@
 #define SO_PREFER_BUSY_POLL	69
 #define SO_BUSY_POLL_BUDGET	70
 
+#define SO_NETNS_COOKIE		71
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 2d949969313b..cdf404a831b2 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -138,6 +138,8 @@
 #define SO_PREFER_BUSY_POLL	69
 #define SO_BUSY_POLL_BUDGET	70
 
+#define SO_NETNS_COOKIE		71
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index f60904329bbc..5b5351cdcb33 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -119,6 +119,8 @@
 #define SO_PREFER_BUSY_POLL	0x4043
 #define SO_BUSY_POLL_BUDGET	0x4044
 
+#define SO_NETNS_COOKIE		0x4045
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 848a22fbac20..92675dc380fa 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -120,6 +120,8 @@
 #define SO_PREFER_BUSY_POLL	 0x0048
 #define SO_BUSY_POLL_BUDGET	 0x0049
 
+#define SO_NETNS_COOKIE          0x0050
+
 #if !defined(__KERNEL__)
 
 
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 4dcd13d097a9..d588c244ec2f 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -122,6 +122,8 @@
 #define SO_PREFER_BUSY_POLL	69
 #define SO_BUSY_POLL_BUDGET	70
 
+#define SO_NETNS_COOKIE		71
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index ddfa88082a2b..a2337b37eba6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1635,6 +1635,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_bound_dev_if;
 		break;
 
+	case SO_NETNS_COOKIE:
+		lv = sizeof(u64);
+		if (len != lv)
+			return -EINVAL;
+		v.val64 = sock_net(sk)->net_cookie;
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).

From ae24bab257bb2043b53c80e65cdd8b507ace06c4 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Wed, 23 Jun 2021 15:56:46 +0200
Subject: [PATCH 2817/3292] tools/testing: add a selftest for SO_NETNS_COOKIE

Make sure that SO_NETNS_COOKIE returns a non-zero value, and
that sockets from different namespaces have a distinct cookie
value.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/.gitignore        |  1 +
 tools/testing/selftests/net/Makefile          |  2 +-
 tools/testing/selftests/net/config            |  1 +
 tools/testing/selftests/net/so_netns_cookie.c | 61 +++++++++++++++++++
 4 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/so_netns_cookie.c

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 61ae899cfc17..19deb9cdf72f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -30,3 +30,4 @@ hwtstamp_config
 rxtimestamp
 timestamping
 txtimestamp
+so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3915bb7bfc39..79c9eb0034d5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -30,7 +30,7 @@ TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
 TEST_GEN_FILES += tcp_fastopen_backup_key
 TEST_GEN_FILES += fin_ack_lat
 TEST_GEN_FILES += reuseaddr_ports_exhausted
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 614d5477365a..6f905b53904f 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,4 +1,5 @@
 CONFIG_USER_NS=y
+CONFIG_NET_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c
new file mode 100644
index 000000000000..b39e87e967cd
--- /dev/null
+++ b/tools/testing/selftests/net/so_netns_cookie.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+#define pr_err(fmt, ...) \
+	({ \
+		fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+			__func__, __LINE__, ##__VA_ARGS__); \
+		1; \
+	})
+
+int main(int argc, char *argvp[])
+{
+	uint64_t cookie1, cookie2;
+	socklen_t vallen;
+	int sock1, sock2;
+
+	sock1 = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock1 < 0)
+		return pr_err("Unable to create TCP socket");
+
+	vallen = sizeof(cookie1);
+	if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0)
+		return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+	if (!cookie1)
+		return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+	if (unshare(CLONE_NEWNET))
+		return pr_err("unshare");
+
+	sock2 = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock2 < 0)
+		return pr_err("Unable to create TCP socket");
+
+	vallen = sizeof(cookie2);
+	if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0)
+		return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+	if (!cookie2)
+		return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+	if (cookie1 == cookie2)
+		return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns");
+
+	close(sock1);
+	close(sock2);
+	return 0;
+}

From 6d123b81ac615072a8525c13c6c41b695270a15d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 23 Jun 2021 14:44:38 -0700
Subject: [PATCH 2818/3292] net: ip: avoid OOM kills with large UDP sends over
 loopback

Dave observed number of machines hitting OOM on the UDP send
path. The workload seems to be sending large UDP packets over
loopback. Since loopback has MTU of 64k kernel will try to
allocate an skb with up to 64k of head space. This has a good
chance of failing under memory pressure. What's worse if
the message length is <32k the allocation may trigger an
OOM killer.

This is entirely avoidable, we can use an skb with page frags.

af_unix solves a similar problem by limiting the head
length to SKB_MAX_ALLOC. This seems like a good and simple
approach. It means that UDP messages > 16kB will now
use fragments if underlying device supports SG, if extra
allocator pressure causes regressions in real workloads
we can switch to trying the large allocation first and
falling back.

v4: pre-calculate all the additions to alloclen so
    we can be sure it won't go over order-2

Reported-by: Dave Jones <dsj@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 36 ++++++++++++++++++++----------------
 net/ipv6/ip6_output.c | 32 +++++++++++++++++---------------
 2 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c3efc7d658f6..8d8a8da3ae7e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1054,7 +1054,7 @@ static int __ip_append_data(struct sock *sk,
 			unsigned int datalen;
 			unsigned int fraglen;
 			unsigned int fraggap;
-			unsigned int alloclen;
+			unsigned int alloclen, alloc_extra;
 			unsigned int pagedlen;
 			struct sk_buff *skb_prev;
 alloc_new_skb:
@@ -1074,17 +1074,8 @@ alloc_new_skb:
 			fraglen = datalen + fragheaderlen;
 			pagedlen = 0;
 
-			if ((flags & MSG_MORE) &&
-			    !(rt->dst.dev->features&NETIF_F_SG))
-				alloclen = mtu;
-			else if (!paged)
-				alloclen = fraglen;
-			else {
-				alloclen = min_t(int, fraglen, MAX_HEADER);
-				pagedlen = fraglen - alloclen;
-			}
-
-			alloclen += exthdrlen;
+			alloc_extra = hh_len + 15;
+			alloc_extra += exthdrlen;
 
 			/* The last fragment gets additional space at tail.
 			 * Note, with MSG_MORE we overallocate on fragments,
@@ -1092,17 +1083,30 @@ alloc_new_skb:
 			 * the last.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->dst.trailer_len;
+				alloc_extra += rt->dst.trailer_len;
+
+			if ((flags & MSG_MORE) &&
+			    !(rt->dst.dev->features&NETIF_F_SG))
+				alloclen = mtu;
+			else if (!paged &&
+				 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+				  !(rt->dst.dev->features & NETIF_F_SG)))
+				alloclen = fraglen;
+			else {
+				alloclen = min_t(int, fraglen, MAX_HEADER);
+				pagedlen = fraglen - alloclen;
+			}
+
+			alloclen += alloc_extra;
 
 			if (transhdrlen) {
-				skb = sock_alloc_send_skb(sk,
-						alloclen + hh_len + 15,
+				skb = sock_alloc_send_skb(sk, alloclen,
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
 				    2 * sk->sk_sndbuf)
-					skb = alloc_skb(alloclen + hh_len + 15,
+					skb = alloc_skb(alloclen,
 							sk->sk_allocation);
 				if (unlikely(!skb))
 					err = -ENOBUFS;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ff4f9ebcf7f6..497974b4372a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1555,7 +1555,7 @@ emsgsize:
 			unsigned int datalen;
 			unsigned int fraglen;
 			unsigned int fraggap;
-			unsigned int alloclen;
+			unsigned int alloclen, alloc_extra;
 			unsigned int pagedlen;
 alloc_new_skb:
 			/* There's no room in the current skb */
@@ -1582,17 +1582,28 @@ alloc_new_skb:
 			fraglen = datalen + fragheaderlen;
 			pagedlen = 0;
 
+			alloc_extra = hh_len;
+			alloc_extra += dst_exthdrlen;
+			alloc_extra += rt->dst.trailer_len;
+
+			/* We just reserve space for fragment header.
+			 * Note: this may be overallocation if the message
+			 * (without MSG_MORE) fits into the MTU.
+			 */
+			alloc_extra += sizeof(struct frag_hdr);
+
 			if ((flags & MSG_MORE) &&
 			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
-			else if (!paged)
+			else if (!paged &&
+				 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+				  !(rt->dst.dev->features & NETIF_F_SG)))
 				alloclen = fraglen;
 			else {
 				alloclen = min_t(int, fraglen, MAX_HEADER);
 				pagedlen = fraglen - alloclen;
 			}
-
-			alloclen += dst_exthdrlen;
+			alloclen += alloc_extra;
 
 			if (datalen != length + fraggap) {
 				/*
@@ -1602,30 +1613,21 @@ alloc_new_skb:
 				datalen += rt->dst.trailer_len;
 			}
 
-			alloclen += rt->dst.trailer_len;
 			fraglen = datalen + fragheaderlen;
 
-			/*
-			 * We just reserve space for fragment header.
-			 * Note: this may be overallocation if the message
-			 * (without MSG_MORE) fits into the MTU.
-			 */
-			alloclen += sizeof(struct frag_hdr);
-
 			copy = datalen - transhdrlen - fraggap - pagedlen;
 			if (copy < 0) {
 				err = -EINVAL;
 				goto error;
 			}
 			if (transhdrlen) {
-				skb = sock_alloc_send_skb(sk,
-						alloclen + hh_len,
+				skb = sock_alloc_send_skb(sk, alloclen,
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
 				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
 				    2 * sk->sk_sndbuf)
-					skb = alloc_skb(alloclen + hh_len,
+					skb = alloc_skb(alloclen,
 							sk->sk_allocation);
 				if (unlikely(!skb))
 					err = -ENOBUFS;

From c88c192dc3ea209694cc08f4ccf51f920d26bdae Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Thu, 24 Jun 2021 02:51:51 +0200
Subject: [PATCH 2819/3292] net: mdiobus: fix fwnode_mdbiobus_register()
 fallback case

The fallback case of fwnode_mdbiobus_register()
(relevant for !CONFIG_FWNODE_MDIO) was defined with wrong
argument name, causing a compilation error. Fix that.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fwnode_mdio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fwnode_mdio.h b/include/linux/fwnode_mdio.h
index 13d4ae8fee0a..f62817c23137 100644
--- a/include/linux/fwnode_mdio.h
+++ b/include/linux/fwnode_mdio.h
@@ -40,7 +40,7 @@ static inline int fwnode_mdiobus_register(struct mii_bus *bus,
 	 * This way, we don't have to keep compat bits around in drivers.
 	 */
 
-	return mdiobus_register(mdio);
+	return mdiobus_register(bus);
 }
 #endif
 

From 0ec13aff058a82426c8d44b688c804cc4a5a0a3d Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Date: Wed, 23 Jun 2021 21:13:10 -0700
Subject: [PATCH 2820/3292] Revert "ibmvnic: simplify reset_long_term_buff
 function"

This reverts commit 1c7d45e7b2c29080bf6c8cd0e213cc3cbb62a054.

We tried to optimize the number of hcalls we send and skipped sending
the REQUEST_MAP calls for some maps. However during resets, we need to
resend all the maps to the VIOS since the VIOS does not remember the
old values. In fact we may have failed over to a new VIOS which will
not have any of the mappings.

When we send packets with map ids the VIOS does not know about, it
triggers a FATAL reset. While the client does recover from the FATAL
error reset, we are seeing a large number of such resets. Handling
FATAL resets is lot more unnecessary work than issuing a few more
hcalls so revert the commit and resend the maps to the VIOS.

Fixes: 1c7d45e7b2c ("ibmvnic: simplify reset_long_term_buff function")
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 46 ++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5788bb956d73..4b4eccc496a8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -257,12 +257,40 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
 	dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
 }
 
-static int reset_long_term_buff(struct ibmvnic_long_term_buff *ltb)
+static int reset_long_term_buff(struct ibmvnic_adapter *adapter,
+				struct ibmvnic_long_term_buff *ltb)
 {
-	if (!ltb->buff)
-		return -EINVAL;
+	struct device *dev = &adapter->vdev->dev;
+	int rc;
 
 	memset(ltb->buff, 0, ltb->size);
+
+	mutex_lock(&adapter->fw_lock);
+	adapter->fw_done_rc = 0;
+
+	reinit_completion(&adapter->fw_done);
+	rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
+	if (rc) {
+		mutex_unlock(&adapter->fw_lock);
+		return rc;
+	}
+
+	rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
+	if (rc) {
+		dev_info(dev,
+			 "Reset failed, long term map request timed out or aborted\n");
+		mutex_unlock(&adapter->fw_lock);
+		return rc;
+	}
+
+	if (adapter->fw_done_rc) {
+		dev_info(dev,
+			 "Reset failed, attempting to free and reallocate buffer\n");
+		free_long_term_buff(adapter, ltb);
+		mutex_unlock(&adapter->fw_lock);
+		return alloc_long_term_buff(adapter, ltb, ltb->size);
+	}
+	mutex_unlock(&adapter->fw_lock);
 	return 0;
 }
 
@@ -484,7 +512,8 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 						  rx_pool->size *
 						  rx_pool->buff_size);
 		} else {
-			rc = reset_long_term_buff(&rx_pool->long_term_buff);
+			rc = reset_long_term_buff(adapter,
+						  &rx_pool->long_term_buff);
 		}
 
 		if (rc)
@@ -607,11 +636,12 @@ static int init_rx_pools(struct net_device *netdev)
 	return 0;
 }
 
-static int reset_one_tx_pool(struct ibmvnic_tx_pool *tx_pool)
+static int reset_one_tx_pool(struct ibmvnic_adapter *adapter,
+			     struct ibmvnic_tx_pool *tx_pool)
 {
 	int rc, i;
 
-	rc = reset_long_term_buff(&tx_pool->long_term_buff);
+	rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
 	if (rc)
 		return rc;
 
@@ -638,10 +668,10 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 
 	tx_scrqs = adapter->num_active_tx_pools;
 	for (i = 0; i < tx_scrqs; i++) {
-		rc = reset_one_tx_pool(&adapter->tso_pool[i]);
+		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
 		if (rc)
 			return rc;
-		rc = reset_one_tx_pool(&adapter->tx_pool[i]);
+		rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]);
 		if (rc)
 			return rc;
 	}

From 2ca220f92878470c6ba03f9946e412323093cc94 Mon Sep 17 00:00:00 2001
From: Dany Madden <drt@linux.ibm.com>
Date: Wed, 23 Jun 2021 21:13:11 -0700
Subject: [PATCH 2821/3292] Revert "ibmvnic: remove duplicate napi_schedule
 call in open function"

This reverts commit 7c451f3ef676c805a4b77a743a01a5c21a250a73.

When a vnic interface is taken down and then up, connectivity is not
restored. We bisected it to this commit. Reverting this commit until
we can fully investigate the issue/benefit of the change.

Fixes: 7c451f3ef676 ("ibmvnic: remove duplicate napi_schedule call in open function")
Reported-by: Cristobal Forno <cforno12@linux.ibm.com>
Reported-by: Abdul Haleem <abdhalee@in.ibm.com>
Signed-off-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4b4eccc496a8..8b2f6eb8eb21 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1210,6 +1210,11 @@ static int __ibmvnic_open(struct net_device *netdev)
 
 	netif_tx_start_all_queues(netdev);
 
+	if (prev_state == VNIC_CLOSED) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			napi_schedule(&adapter->napi[i]);
+	}
+
 	adapter->state = VNIC_OPEN;
 	return rc;
 }

From 65d6470d139a6c1655fccb5cbacbeaba8e8ad2f8 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Date: Wed, 23 Jun 2021 21:13:12 -0700
Subject: [PATCH 2822/3292] ibmvnic: clean pending indirect buffs during reset

We batch subordinate command response queue (scrq) descriptors that we
need to send to the VIOS using an "indirect" buffer. If after we queue
one or more scrqs in the indirect buffer encounter an error (say fail
to allocate an skb), we leave the queued scrq descriptors in the
indirect buffer until the next call to ibmvnic_xmit().

On the next call to ibmvnic_xmit(), it is possible that the adapter is
going through a reset and it is possible that the long term  buffers
have been unmapped on the VIOS side. If we proceed to flush (send) the
packets that are in the indirect buffer, we will end up using the old
map ids and this can cause the VIOS to trigger an unnecessary FATAL
error reset.

Instead of flushing packets remaining on the indirect_buff, discard
(clean) them instead.

Fixes: 0d973388185d4 ("ibmvnic: Introduce xmit_more support using batched subCRQ hcalls")
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 8b2f6eb8eb21..2d15b446ceb3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -106,6 +106,8 @@ static void release_crq_queue(struct ibmvnic_adapter *);
 static int __ibmvnic_set_mac(struct net_device *, u8 *);
 static int init_crq_queue(struct ibmvnic_adapter *adapter);
 static int send_query_phys_parms(struct ibmvnic_adapter *adapter);
+static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
+					 struct ibmvnic_sub_crq_queue *tx_scrq);
 
 struct ibmvnic_stat {
 	char name[ETH_GSTRING_LEN];
@@ -668,6 +670,7 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 
 	tx_scrqs = adapter->num_active_tx_pools;
 	for (i = 0; i < tx_scrqs; i++) {
+		ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]);
 		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
 		if (rc)
 			return rc;
@@ -1618,7 +1621,8 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
 	ind_bufp->index = 0;
 	if (atomic_sub_return(entries, &tx_scrq->used) <=
 	    (adapter->req_tx_entries_per_subcrq / 2) &&
-	    __netif_subqueue_stopped(adapter->netdev, queue_num)) {
+	    __netif_subqueue_stopped(adapter->netdev, queue_num) &&
+	    !test_bit(0, &adapter->resetting)) {
 		netif_wake_subqueue(adapter->netdev, queue_num);
 		netdev_dbg(adapter->netdev, "Started queue %d\n",
 			   queue_num);
@@ -1711,7 +1715,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_send_failed++;
 		tx_dropped++;
 		ret = NETDEV_TX_OK;
-		ibmvnic_tx_scrq_flush(adapter, tx_scrq);
 		goto out;
 	}
 
@@ -3175,6 +3178,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 
 			netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n",
 				   i);
+			ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]);
 			if (adapter->tx_scrq[i]->irq) {
 				free_irq(adapter->tx_scrq[i]->irq,
 					 adapter->tx_scrq[i]);

From 72368f8b2b9e4106072a2728bed3367d54641c22 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Date: Wed, 23 Jun 2021 21:13:13 -0700
Subject: [PATCH 2823/3292] ibmvnic: account for bufs already saved in
 indir_buf

This fixes a crash in replenish_rx_pool() when called from ibmvnic_poll()
after a previous call to replenish_rx_pool() encountered an error when
allocating a socket buffer.

Thanks to Rick Lindsley and Dany Madden for helping debug the crash.

Fixes: 4f0b6812e9b9 ("ibmvnic: Introduce batched RX buffer descriptor transmission")
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2d15b446ceb3..779de81a54a6 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -328,7 +328,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 
 	rx_scrq = adapter->rx_scrq[pool->index];
 	ind_bufp = &rx_scrq->ind_buf;
-	for (i = 0; i < count; ++i) {
+
+	/* netdev_skb_alloc() could have failed after we saved a few skbs
+	 * in the indir_buf and we would not have sent them to VIOS yet.
+	 * To account for them, start the loop at ind_bufp->index rather
+	 * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will
+	 * be 0.
+	 */
+	for (i = ind_bufp->index; i < count; ++i) {
 		skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
 		if (!skb) {
 			dev_err(dev, "Couldn't replenish rx buff\n");

From 552a33729f1a7cc5115d0752064fe9abd6e3e336 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Date: Wed, 23 Jun 2021 21:13:14 -0700
Subject: [PATCH 2824/3292] ibmvnic: set ltb->buff to NULL after freeing

free_long_term_buff() checks ltb->buff to decide whether we have a long
term buffer to free. So set ltb->buff to NULL afer freeing. While here,
also clear ->map_id, fix up some coding style and log an error.

Fixes: 9c4eaabd1bb39 ("Check CRQ command return codes")
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 779de81a54a6..b8bdab0b2701 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -211,12 +211,11 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
 	mutex_lock(&adapter->fw_lock);
 	adapter->fw_done_rc = 0;
 	reinit_completion(&adapter->fw_done);
-	rc = send_request_map(adapter, ltb->addr,
-			      ltb->size, ltb->map_id);
+
+	rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
 	if (rc) {
-		dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
-		mutex_unlock(&adapter->fw_lock);
-		return rc;
+		dev_err(dev, "send_request_map failed, rc = %d\n", rc);
+		goto out;
 	}
 
 	rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
@@ -224,20 +223,23 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
 		dev_err(dev,
 			"Long term map request aborted or timed out,rc = %d\n",
 			rc);
-		dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
-		mutex_unlock(&adapter->fw_lock);
-		return rc;
+		goto out;
 	}
 
 	if (adapter->fw_done_rc) {
 		dev_err(dev, "Couldn't map long term buffer,rc = %d\n",
 			adapter->fw_done_rc);
+		rc = -1;
+		goto out;
+	}
+	rc = 0;
+out:
+	if (rc) {
 		dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
-		mutex_unlock(&adapter->fw_lock);
-		return -1;
+		ltb->buff = NULL;
 	}
 	mutex_unlock(&adapter->fw_lock);
-	return 0;
+	return rc;
 }
 
 static void free_long_term_buff(struct ibmvnic_adapter *adapter,
@@ -257,6 +259,8 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
 	    adapter->reset_reason != VNIC_RESET_TIMEOUT)
 		send_request_unmap(adapter, ltb->map_id);
 	dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+	ltb->buff = NULL;
+	ltb->map_id = 0;
 }
 
 static int reset_long_term_buff(struct ibmvnic_adapter *adapter,

From f6ebca8efa52e4ae770f0325d618e7bcf08ada0c Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Date: Wed, 23 Jun 2021 21:13:15 -0700
Subject: [PATCH 2825/3292] ibmvnic: free tx_pool if tso_pool alloc fails

Free tx_pool and clear it, if allocation of tso_pool fails.

release_tx_pools() assumes we have both tx and tso_pools if ->tx_pool is
non-NULL. If allocation of tso_pool fails in init_tx_pools(), the assumption
will not be true and we would end up dereferencing ->tx_buff, ->free_map
fields from a NULL pointer.

Fixes: 3205306c6b8d ("ibmvnic: Update TX pool initialization routine")
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index b8bdab0b2701..ede65b32f821 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -778,8 +778,11 @@ static int init_tx_pools(struct net_device *netdev)
 
 	adapter->tso_pool = kcalloc(tx_subcrqs,
 				    sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
-	if (!adapter->tso_pool)
+	if (!adapter->tso_pool) {
+		kfree(adapter->tx_pool);
+		adapter->tx_pool = NULL;
 		return -1;
+	}
 
 	adapter->num_active_tx_pools = tx_subcrqs;
 

From 154b3b2a6ffca445379063ef49f71895104d5a5e Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Date: Wed, 23 Jun 2021 21:13:16 -0700
Subject: [PATCH 2826/3292] ibmvnic: parenthesize a check

Parenthesize a check to be more explicit and to fix a sparse warning
seen on some distros.

Fixes: 91dc5d2553fbf ("ibmvnic: fix miscellaneous checks")
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ede65b32f821..1c572491441c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3266,7 +3266,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
 		/* H_EOI would fail with rc = H_FUNCTION when running
 		 * in XIVE mode which is expected, but not an error.
 		 */
-		if (rc && rc != H_FUNCTION)
+		if (rc && (rc != H_FUNCTION))
 			dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
 				val, rc);
 	}

From f8c63088a98bac8926cb40ecf46ebd71dc1232c4 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:49 +0200
Subject: [PATCH 2827/3292] dt-bindings: net: sparx5: Add sparx5-switch
 bindings

Document the Sparx5 switch device driver bindings

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/microchip,sparx5-switch.yaml | 226 ++++++++++++++++++
 1 file changed, 226 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml

diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
new file mode 100644
index 000000000000..347b912a46bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/microchip,sparx5-switch.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip Sparx5 Ethernet switch controller
+
+maintainers:
+  - Steen Hegelund <steen.hegelund@microchip.com>
+  - Lars Povlsen <lars.povlsen@microchip.com>
+
+description: |
+  The SparX-5 Enterprise Ethernet switch family provides a rich set of
+  Enterprise switching features such as advanced TCAM-based VLAN and
+  QoS processing enabling delivery of differentiated services, and
+  security through TCAM-based frame processing using versatile content
+  aware processor (VCAP).
+
+  IPv4/IPv6 Layer 3 (L3) unicast and multicast routing is supported
+  with up to 18K IPv4/9K IPv6 unicast LPM entries and up to 9K IPv4/3K
+  IPv6 (S,G) multicast groups.
+
+  L3 security features include source guard and reverse path
+  forwarding (uRPF) tasks. Additional L3 features include VRF-Lite and
+  IP tunnels (IP over GRE/IP).
+
+  The SparX-5 switch family targets managed Layer 2 and Layer 3
+  equipment in SMB, SME, and Enterprise where high port count
+  1G/2.5G/5G/10G switching with 10G/25G aggregation links is required.
+
+properties:
+  $nodename:
+    pattern: "^switch@[0-9a-f]+$"
+
+  compatible:
+    const: microchip,sparx5-switch
+
+  reg:
+    items:
+      - description: cpu target
+      - description: devices target
+      - description: general control block target
+
+  reg-names:
+    items:
+      - const: cpu
+      - const: devices
+      - const: gcb
+
+  interrupts:
+    minItems: 1
+    items:
+      - description: register based extraction
+      - description: frame dma based extraction
+
+  interrupt-names:
+    minItems: 1
+    items:
+      - const: xtr
+      - const: fdma
+
+  resets:
+    items:
+      - description: Reset controller used for switch core reset (soft reset)
+
+  reset-names:
+    items:
+      - const: switch
+
+  mac-address: true
+
+  ethernet-ports:
+    type: object
+    patternProperties:
+      "^port@[0-9a-f]+$":
+        type: object
+
+        properties:
+          '#address-cells':
+            const: 1
+          '#size-cells':
+            const: 0
+
+          reg:
+            description: Switch port number
+
+          phys:
+            maxItems: 1
+            description:
+              phandle of a Ethernet SerDes PHY.  This defines which SerDes
+              instance will handle the Ethernet traffic.
+
+          phy-mode:
+            description:
+              This specifies the interface used by the Ethernet SerDes towards
+              the PHY or SFP.
+
+          microchip,bandwidth:
+            description: Specifies bandwidth in Mbit/s allocated to the port.
+            $ref: "/schemas/types.yaml#/definitions/uint32"
+            maximum: 25000
+
+          phy-handle:
+            description:
+              phandle of a Ethernet PHY.  This is optional and if provided it
+              points to the cuPHY used by the Ethernet SerDes.
+
+          sfp:
+            description:
+              phandle of an SFP.  This is optional and used when not specifying
+              a cuPHY.  It points to the SFP node that describes the SFP used by
+              the Ethernet SerDes.
+
+          managed: true
+
+          microchip,sd-sgpio:
+            description:
+              Index of the ports Signal Detect SGPIO in the set of 384 SGPIOs
+              This is optional, and only needed if the default used index is
+              is not correct.
+            $ref: "/schemas/types.yaml#/definitions/uint32"
+            minimum: 0
+            maximum: 383
+
+        required:
+          - reg
+          - phys
+          - phy-mode
+          - microchip,bandwidth
+
+        oneOf:
+          - required:
+              - phy-handle
+          - required:
+              - sfp
+              - managed
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+  - interrupt-names
+  - resets
+  - reset-names
+  - ethernet-ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    switch: switch@600000000 {
+      compatible = "microchip,sparx5-switch";
+      reg =  <0 0x401000>,
+             <0x10004000 0x7fc000>,
+             <0x11010000 0xaf0000>;
+      reg-names = "cpu", "devices", "gcb";
+      interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+      interrupt-names = "xtr";
+      resets = <&reset 0>;
+      reset-names = "switch";
+      ethernet-ports {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        port0: port@0 {
+          reg = <0>;
+          microchip,bandwidth = <1000>;
+          phys = <&serdes 13>;
+          phy-handle = <&phy0>;
+          phy-mode = "qsgmii";
+        };
+        /* ... */
+        /* Then the 25G interfaces */
+        port60: port@60 {
+          reg = <60>;
+          microchip,bandwidth = <25000>;
+          phys = <&serdes 29>;
+          phy-mode = "10gbase-r";
+          sfp = <&sfp_eth60>;
+          managed = "in-band-status";
+          microchip,sd-sgpio = <365>;
+        };
+        port61: port@61 {
+          reg = <61>;
+          microchip,bandwidth = <25000>;
+          phys = <&serdes 30>;
+          phy-mode = "10gbase-r";
+          sfp = <&sfp_eth61>;
+          managed = "in-band-status";
+          microchip,sd-sgpio = <369>;
+        };
+        port62: port@62 {
+          reg = <62>;
+          microchip,bandwidth = <25000>;
+          phys = <&serdes 31>;
+          phy-mode = "10gbase-r";
+          sfp = <&sfp_eth62>;
+          managed = "in-band-status";
+          microchip,sd-sgpio = <373>;
+        };
+        port63: port@63 {
+          reg = <63>;
+          microchip,bandwidth = <25000>;
+          phys = <&serdes 32>;
+          phy-mode = "10gbase-r";
+          sfp = <&sfp_eth63>;
+          managed = "in-band-status";
+          microchip,sd-sgpio = <377>;
+        };
+        /* Finally the Management interface */
+        port64: port@64 {
+          reg = <64>;
+          microchip,bandwidth = <1000>;
+          phys = <&serdes 0>;
+          phy-handle = <&phy64>;
+          phy-mode = "sgmii";
+          mac-address = [ 00 00 00 01 02 03 ];
+        };
+      };
+    };
+
+...
+#  vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml :

From 3cfa11bac9bbede3066e15bcd5fb26c755c3da45 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:50 +0200
Subject: [PATCH 2828/3292] net: sparx5: add the basic sparx5 driver

This adds the Sparx5 basic SwitchDev driver framework with IO range
mapping, switch device detection and core clock configuration.

Support for ports, phylink, netdev, mactable etc. are in the following
patches.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/Kconfig        |    2 +
 drivers/net/ethernet/microchip/Makefile       |    2 +
 drivers/net/ethernet/microchip/sparx5/Kconfig |    9 +
 .../net/ethernet/microchip/sparx5/Makefile    |    8 +
 .../ethernet/microchip/sparx5/sparx5_main.c   |  744 +++
 .../ethernet/microchip/sparx5/sparx5_main.h   |  273 +
 .../microchip/sparx5/sparx5_main_regs.h       | 4642 +++++++++++++++++
 7 files changed, 5680 insertions(+)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/Kconfig
 create mode 100644 drivers/net/ethernet/microchip/sparx5/Makefile
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_main.c
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_main.h
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h

diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig
index d0f6dfe0dcf3..d54aa164c4e9 100644
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig
@@ -54,4 +54,6 @@ config LAN743X
 	  To compile this driver as a module, choose M here. The module will be
 	  called lan743x.
 
+source "drivers/net/ethernet/microchip/sparx5/Kconfig"
+
 endif # NET_VENDOR_MICROCHIP
diff --git a/drivers/net/ethernet/microchip/Makefile b/drivers/net/ethernet/microchip/Makefile
index da603540ca57..c77dc0379bfd 100644
--- a/drivers/net/ethernet/microchip/Makefile
+++ b/drivers/net/ethernet/microchip/Makefile
@@ -8,3 +8,5 @@ obj-$(CONFIG_ENCX24J600) += encx24j600.o encx24j600-regmap.o
 obj-$(CONFIG_LAN743X) += lan743x.o
 
 lan743x-objs := lan743x_main.o lan743x_ethtool.o lan743x_ptp.o
+
+obj-$(CONFIG_SPARX5_SWITCH) += sparx5/
diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig
new file mode 100644
index 000000000000..a80419d8d4b5
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/Kconfig
@@ -0,0 +1,9 @@
+config SPARX5_SWITCH
+	tristate "Sparx5 switch driver"
+	depends on NET_SWITCHDEV
+	depends on HAS_IOMEM
+	select PHYLINK
+	select PHY_SPARX5_SERDES
+	select RESET_CONTROLLER
+	help
+	  This driver supports the Sparx5 network switch device.
diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
new file mode 100644
index 000000000000..41a31843d86f
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Microchip Sparx5 network device drivers.
+#
+
+obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
+
+sparx5-switch-objs  := sparx5_main.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
new file mode 100644
index 000000000000..0ba255d547b0
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ *
+ * The Sparx5 Chip Register Model can be browsed at this location:
+ * https://github.com/microchip-ung/sparx-5_reginfo
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <net/switchdev.h>
+#include <linux/etherdevice.h>
+#include <linux/io.h>
+#include <linux/printk.h>
+#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/types.h>
+#include <linux/reset.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+#define QLIM_WM(fraction) \
+	((SPX5_BUFFER_MEMORY / SPX5_BUFFER_CELL_SZ - 100) * (fraction) / 100)
+#define IO_RANGES 3
+
+struct initial_port_config {
+	u32 portno;
+	struct device_node *node;
+	struct sparx5_port_config conf;
+	struct phy *serdes;
+};
+
+struct sparx5_ram_config {
+	void __iomem *init_reg;
+	u32 init_val;
+};
+
+struct sparx5_main_io_resource {
+	enum sparx5_target id;
+	phys_addr_t offset;
+	int range;
+};
+
+static const struct sparx5_main_io_resource sparx5_main_iomap[] =  {
+	{ TARGET_CPU,                         0, 0 }, /* 0x600000000 */
+	{ TARGET_FDMA,                  0x80000, 0 }, /* 0x600080000 */
+	{ TARGET_PCEP,                 0x400000, 0 }, /* 0x600400000 */
+	{ TARGET_DEV2G5,             0x10004000, 1 }, /* 0x610004000 */
+	{ TARGET_DEV5G,              0x10008000, 1 }, /* 0x610008000 */
+	{ TARGET_PCS5G_BR,           0x1000c000, 1 }, /* 0x61000c000 */
+	{ TARGET_DEV2G5 +  1,        0x10010000, 1 }, /* 0x610010000 */
+	{ TARGET_DEV5G +  1,         0x10014000, 1 }, /* 0x610014000 */
+	{ TARGET_PCS5G_BR +  1,      0x10018000, 1 }, /* 0x610018000 */
+	{ TARGET_DEV2G5 +  2,        0x1001c000, 1 }, /* 0x61001c000 */
+	{ TARGET_DEV5G +  2,         0x10020000, 1 }, /* 0x610020000 */
+	{ TARGET_PCS5G_BR +  2,      0x10024000, 1 }, /* 0x610024000 */
+	{ TARGET_DEV2G5 +  6,        0x10028000, 1 }, /* 0x610028000 */
+	{ TARGET_DEV5G +  6,         0x1002c000, 1 }, /* 0x61002c000 */
+	{ TARGET_PCS5G_BR +  6,      0x10030000, 1 }, /* 0x610030000 */
+	{ TARGET_DEV2G5 +  7,        0x10034000, 1 }, /* 0x610034000 */
+	{ TARGET_DEV5G +  7,         0x10038000, 1 }, /* 0x610038000 */
+	{ TARGET_PCS5G_BR +  7,      0x1003c000, 1 }, /* 0x61003c000 */
+	{ TARGET_DEV2G5 +  8,        0x10040000, 1 }, /* 0x610040000 */
+	{ TARGET_DEV5G +  8,         0x10044000, 1 }, /* 0x610044000 */
+	{ TARGET_PCS5G_BR +  8,      0x10048000, 1 }, /* 0x610048000 */
+	{ TARGET_DEV2G5 +  9,        0x1004c000, 1 }, /* 0x61004c000 */
+	{ TARGET_DEV5G +  9,         0x10050000, 1 }, /* 0x610050000 */
+	{ TARGET_PCS5G_BR +  9,      0x10054000, 1 }, /* 0x610054000 */
+	{ TARGET_DEV2G5 + 10,        0x10058000, 1 }, /* 0x610058000 */
+	{ TARGET_DEV5G + 10,         0x1005c000, 1 }, /* 0x61005c000 */
+	{ TARGET_PCS5G_BR + 10,      0x10060000, 1 }, /* 0x610060000 */
+	{ TARGET_DEV2G5 + 11,        0x10064000, 1 }, /* 0x610064000 */
+	{ TARGET_DEV5G + 11,         0x10068000, 1 }, /* 0x610068000 */
+	{ TARGET_PCS5G_BR + 11,      0x1006c000, 1 }, /* 0x61006c000 */
+	{ TARGET_DEV2G5 + 12,        0x10070000, 1 }, /* 0x610070000 */
+	{ TARGET_DEV10G,             0x10074000, 1 }, /* 0x610074000 */
+	{ TARGET_PCS10G_BR,          0x10078000, 1 }, /* 0x610078000 */
+	{ TARGET_DEV2G5 + 14,        0x1007c000, 1 }, /* 0x61007c000 */
+	{ TARGET_DEV10G +  2,        0x10080000, 1 }, /* 0x610080000 */
+	{ TARGET_PCS10G_BR +  2,     0x10084000, 1 }, /* 0x610084000 */
+	{ TARGET_DEV2G5 + 15,        0x10088000, 1 }, /* 0x610088000 */
+	{ TARGET_DEV10G +  3,        0x1008c000, 1 }, /* 0x61008c000 */
+	{ TARGET_PCS10G_BR +  3,     0x10090000, 1 }, /* 0x610090000 */
+	{ TARGET_DEV2G5 + 16,        0x10094000, 1 }, /* 0x610094000 */
+	{ TARGET_DEV2G5 + 17,        0x10098000, 1 }, /* 0x610098000 */
+	{ TARGET_DEV2G5 + 18,        0x1009c000, 1 }, /* 0x61009c000 */
+	{ TARGET_DEV2G5 + 19,        0x100a0000, 1 }, /* 0x6100a0000 */
+	{ TARGET_DEV2G5 + 20,        0x100a4000, 1 }, /* 0x6100a4000 */
+	{ TARGET_DEV2G5 + 21,        0x100a8000, 1 }, /* 0x6100a8000 */
+	{ TARGET_DEV2G5 + 22,        0x100ac000, 1 }, /* 0x6100ac000 */
+	{ TARGET_DEV2G5 + 23,        0x100b0000, 1 }, /* 0x6100b0000 */
+	{ TARGET_DEV2G5 + 32,        0x100b4000, 1 }, /* 0x6100b4000 */
+	{ TARGET_DEV2G5 + 33,        0x100b8000, 1 }, /* 0x6100b8000 */
+	{ TARGET_DEV2G5 + 34,        0x100bc000, 1 }, /* 0x6100bc000 */
+	{ TARGET_DEV2G5 + 35,        0x100c0000, 1 }, /* 0x6100c0000 */
+	{ TARGET_DEV2G5 + 36,        0x100c4000, 1 }, /* 0x6100c4000 */
+	{ TARGET_DEV2G5 + 37,        0x100c8000, 1 }, /* 0x6100c8000 */
+	{ TARGET_DEV2G5 + 38,        0x100cc000, 1 }, /* 0x6100cc000 */
+	{ TARGET_DEV2G5 + 39,        0x100d0000, 1 }, /* 0x6100d0000 */
+	{ TARGET_DEV2G5 + 40,        0x100d4000, 1 }, /* 0x6100d4000 */
+	{ TARGET_DEV2G5 + 41,        0x100d8000, 1 }, /* 0x6100d8000 */
+	{ TARGET_DEV2G5 + 42,        0x100dc000, 1 }, /* 0x6100dc000 */
+	{ TARGET_DEV2G5 + 43,        0x100e0000, 1 }, /* 0x6100e0000 */
+	{ TARGET_DEV2G5 + 44,        0x100e4000, 1 }, /* 0x6100e4000 */
+	{ TARGET_DEV2G5 + 45,        0x100e8000, 1 }, /* 0x6100e8000 */
+	{ TARGET_DEV2G5 + 46,        0x100ec000, 1 }, /* 0x6100ec000 */
+	{ TARGET_DEV2G5 + 47,        0x100f0000, 1 }, /* 0x6100f0000 */
+	{ TARGET_DEV2G5 + 57,        0x100f4000, 1 }, /* 0x6100f4000 */
+	{ TARGET_DEV25G +  1,        0x100f8000, 1 }, /* 0x6100f8000 */
+	{ TARGET_PCS25G_BR +  1,     0x100fc000, 1 }, /* 0x6100fc000 */
+	{ TARGET_DEV2G5 + 59,        0x10104000, 1 }, /* 0x610104000 */
+	{ TARGET_DEV25G +  3,        0x10108000, 1 }, /* 0x610108000 */
+	{ TARGET_PCS25G_BR +  3,     0x1010c000, 1 }, /* 0x61010c000 */
+	{ TARGET_DEV2G5 + 60,        0x10114000, 1 }, /* 0x610114000 */
+	{ TARGET_DEV25G +  4,        0x10118000, 1 }, /* 0x610118000 */
+	{ TARGET_PCS25G_BR +  4,     0x1011c000, 1 }, /* 0x61011c000 */
+	{ TARGET_DEV2G5 + 64,        0x10124000, 1 }, /* 0x610124000 */
+	{ TARGET_DEV5G + 12,         0x10128000, 1 }, /* 0x610128000 */
+	{ TARGET_PCS5G_BR + 12,      0x1012c000, 1 }, /* 0x61012c000 */
+	{ TARGET_PORT_CONF,          0x10130000, 1 }, /* 0x610130000 */
+	{ TARGET_DEV2G5 +  3,        0x10404000, 1 }, /* 0x610404000 */
+	{ TARGET_DEV5G +  3,         0x10408000, 1 }, /* 0x610408000 */
+	{ TARGET_PCS5G_BR +  3,      0x1040c000, 1 }, /* 0x61040c000 */
+	{ TARGET_DEV2G5 +  4,        0x10410000, 1 }, /* 0x610410000 */
+	{ TARGET_DEV5G +  4,         0x10414000, 1 }, /* 0x610414000 */
+	{ TARGET_PCS5G_BR +  4,      0x10418000, 1 }, /* 0x610418000 */
+	{ TARGET_DEV2G5 +  5,        0x1041c000, 1 }, /* 0x61041c000 */
+	{ TARGET_DEV5G +  5,         0x10420000, 1 }, /* 0x610420000 */
+	{ TARGET_PCS5G_BR +  5,      0x10424000, 1 }, /* 0x610424000 */
+	{ TARGET_DEV2G5 + 13,        0x10428000, 1 }, /* 0x610428000 */
+	{ TARGET_DEV10G +  1,        0x1042c000, 1 }, /* 0x61042c000 */
+	{ TARGET_PCS10G_BR +  1,     0x10430000, 1 }, /* 0x610430000 */
+	{ TARGET_DEV2G5 + 24,        0x10434000, 1 }, /* 0x610434000 */
+	{ TARGET_DEV2G5 + 25,        0x10438000, 1 }, /* 0x610438000 */
+	{ TARGET_DEV2G5 + 26,        0x1043c000, 1 }, /* 0x61043c000 */
+	{ TARGET_DEV2G5 + 27,        0x10440000, 1 }, /* 0x610440000 */
+	{ TARGET_DEV2G5 + 28,        0x10444000, 1 }, /* 0x610444000 */
+	{ TARGET_DEV2G5 + 29,        0x10448000, 1 }, /* 0x610448000 */
+	{ TARGET_DEV2G5 + 30,        0x1044c000, 1 }, /* 0x61044c000 */
+	{ TARGET_DEV2G5 + 31,        0x10450000, 1 }, /* 0x610450000 */
+	{ TARGET_DEV2G5 + 48,        0x10454000, 1 }, /* 0x610454000 */
+	{ TARGET_DEV10G +  4,        0x10458000, 1 }, /* 0x610458000 */
+	{ TARGET_PCS10G_BR +  4,     0x1045c000, 1 }, /* 0x61045c000 */
+	{ TARGET_DEV2G5 + 49,        0x10460000, 1 }, /* 0x610460000 */
+	{ TARGET_DEV10G +  5,        0x10464000, 1 }, /* 0x610464000 */
+	{ TARGET_PCS10G_BR +  5,     0x10468000, 1 }, /* 0x610468000 */
+	{ TARGET_DEV2G5 + 50,        0x1046c000, 1 }, /* 0x61046c000 */
+	{ TARGET_DEV10G +  6,        0x10470000, 1 }, /* 0x610470000 */
+	{ TARGET_PCS10G_BR +  6,     0x10474000, 1 }, /* 0x610474000 */
+	{ TARGET_DEV2G5 + 51,        0x10478000, 1 }, /* 0x610478000 */
+	{ TARGET_DEV10G +  7,        0x1047c000, 1 }, /* 0x61047c000 */
+	{ TARGET_PCS10G_BR +  7,     0x10480000, 1 }, /* 0x610480000 */
+	{ TARGET_DEV2G5 + 52,        0x10484000, 1 }, /* 0x610484000 */
+	{ TARGET_DEV10G +  8,        0x10488000, 1 }, /* 0x610488000 */
+	{ TARGET_PCS10G_BR +  8,     0x1048c000, 1 }, /* 0x61048c000 */
+	{ TARGET_DEV2G5 + 53,        0x10490000, 1 }, /* 0x610490000 */
+	{ TARGET_DEV10G +  9,        0x10494000, 1 }, /* 0x610494000 */
+	{ TARGET_PCS10G_BR +  9,     0x10498000, 1 }, /* 0x610498000 */
+	{ TARGET_DEV2G5 + 54,        0x1049c000, 1 }, /* 0x61049c000 */
+	{ TARGET_DEV10G + 10,        0x104a0000, 1 }, /* 0x6104a0000 */
+	{ TARGET_PCS10G_BR + 10,     0x104a4000, 1 }, /* 0x6104a4000 */
+	{ TARGET_DEV2G5 + 55,        0x104a8000, 1 }, /* 0x6104a8000 */
+	{ TARGET_DEV10G + 11,        0x104ac000, 1 }, /* 0x6104ac000 */
+	{ TARGET_PCS10G_BR + 11,     0x104b0000, 1 }, /* 0x6104b0000 */
+	{ TARGET_DEV2G5 + 56,        0x104b4000, 1 }, /* 0x6104b4000 */
+	{ TARGET_DEV25G,             0x104b8000, 1 }, /* 0x6104b8000 */
+	{ TARGET_PCS25G_BR,          0x104bc000, 1 }, /* 0x6104bc000 */
+	{ TARGET_DEV2G5 + 58,        0x104c4000, 1 }, /* 0x6104c4000 */
+	{ TARGET_DEV25G +  2,        0x104c8000, 1 }, /* 0x6104c8000 */
+	{ TARGET_PCS25G_BR +  2,     0x104cc000, 1 }, /* 0x6104cc000 */
+	{ TARGET_DEV2G5 + 61,        0x104d4000, 1 }, /* 0x6104d4000 */
+	{ TARGET_DEV25G +  5,        0x104d8000, 1 }, /* 0x6104d8000 */
+	{ TARGET_PCS25G_BR +  5,     0x104dc000, 1 }, /* 0x6104dc000 */
+	{ TARGET_DEV2G5 + 62,        0x104e4000, 1 }, /* 0x6104e4000 */
+	{ TARGET_DEV25G +  6,        0x104e8000, 1 }, /* 0x6104e8000 */
+	{ TARGET_PCS25G_BR +  6,     0x104ec000, 1 }, /* 0x6104ec000 */
+	{ TARGET_DEV2G5 + 63,        0x104f4000, 1 }, /* 0x6104f4000 */
+	{ TARGET_DEV25G +  7,        0x104f8000, 1 }, /* 0x6104f8000 */
+	{ TARGET_PCS25G_BR +  7,     0x104fc000, 1 }, /* 0x6104fc000 */
+	{ TARGET_DSM,                0x10504000, 1 }, /* 0x610504000 */
+	{ TARGET_ASM,                0x10600000, 1 }, /* 0x610600000 */
+	{ TARGET_GCB,                0x11010000, 2 }, /* 0x611010000 */
+	{ TARGET_QS,                 0x11030000, 2 }, /* 0x611030000 */
+	{ TARGET_ANA_ACL,            0x11050000, 2 }, /* 0x611050000 */
+	{ TARGET_LRN,                0x11060000, 2 }, /* 0x611060000 */
+	{ TARGET_VCAP_SUPER,         0x11080000, 2 }, /* 0x611080000 */
+	{ TARGET_QSYS,               0x110a0000, 2 }, /* 0x6110a0000 */
+	{ TARGET_QFWD,               0x110b0000, 2 }, /* 0x6110b0000 */
+	{ TARGET_XQS,                0x110c0000, 2 }, /* 0x6110c0000 */
+	{ TARGET_CLKGEN,             0x11100000, 2 }, /* 0x611100000 */
+	{ TARGET_ANA_AC_POL,         0x11200000, 2 }, /* 0x611200000 */
+	{ TARGET_QRES,               0x11280000, 2 }, /* 0x611280000 */
+	{ TARGET_EACL,               0x112c0000, 2 }, /* 0x6112c0000 */
+	{ TARGET_ANA_CL,             0x11400000, 2 }, /* 0x611400000 */
+	{ TARGET_ANA_L3,             0x11480000, 2 }, /* 0x611480000 */
+	{ TARGET_HSCH,               0x11580000, 2 }, /* 0x611580000 */
+	{ TARGET_REW,                0x11600000, 2 }, /* 0x611600000 */
+	{ TARGET_ANA_L2,             0x11800000, 2 }, /* 0x611800000 */
+	{ TARGET_ANA_AC,             0x11900000, 2 }, /* 0x611900000 */
+	{ TARGET_VOP,                0x11a00000, 2 }, /* 0x611a00000 */
+};
+
+static int sparx5_create_targets(struct sparx5 *sparx5)
+{
+	struct resource *iores[IO_RANGES];
+	void __iomem *iomem[IO_RANGES];
+	void __iomem *begin[IO_RANGES];
+	int range_id[IO_RANGES];
+	int idx, jdx;
+
+	for (idx = 0, jdx = 0; jdx < ARRAY_SIZE(sparx5_main_iomap); jdx++) {
+		const struct sparx5_main_io_resource *iomap = &sparx5_main_iomap[jdx];
+
+		if (idx == iomap->range) {
+			range_id[idx] = jdx;
+			idx++;
+		}
+	}
+	for (idx = 0; idx < IO_RANGES; idx++) {
+		iores[idx] = platform_get_resource(sparx5->pdev, IORESOURCE_MEM,
+						   idx);
+		iomem[idx] = devm_ioremap(sparx5->dev,
+					  iores[idx]->start,
+					  iores[idx]->end - iores[idx]->start
+					  + 1);
+		if (IS_ERR(iomem[idx])) {
+			dev_err(sparx5->dev, "Unable to get switch registers: %s\n",
+				iores[idx]->name);
+			return PTR_ERR(iomem[idx]);
+		}
+		begin[idx] = iomem[idx] - sparx5_main_iomap[range_id[idx]].offset;
+	}
+	for (jdx = 0; jdx < ARRAY_SIZE(sparx5_main_iomap); jdx++) {
+		const struct sparx5_main_io_resource *iomap = &sparx5_main_iomap[jdx];
+
+		sparx5->regs[iomap->id] = begin[iomap->range] + iomap->offset;
+	}
+	return 0;
+}
+
+static int sparx5_create_port(struct sparx5 *sparx5,
+			      struct initial_port_config *config)
+{
+	struct sparx5_port *spx5_port;
+
+	/* netdev creation to be added in later patches */
+	spx5_port = devm_kzalloc(sparx5->dev, sizeof(*spx5_port), GFP_KERNEL);
+	spx5_port->of_node = config->node;
+	spx5_port->serdes = config->serdes;
+	spx5_port->pvid = NULL_VID;
+	spx5_port->signd_internal = true;
+	spx5_port->signd_active_high = true;
+	spx5_port->signd_enable = true;
+	spx5_port->max_vlan_tags = SPX5_PORT_MAX_TAGS_NONE;
+	spx5_port->vlan_type = SPX5_VLAN_PORT_TYPE_UNAWARE;
+	spx5_port->custom_etype = 0x8880; /* Vitesse */
+
+	/* PHYLINK support to be added in later patches */
+
+	return 0;
+}
+
+static int sparx5_init_ram(struct sparx5 *s5)
+{
+	const struct sparx5_ram_config spx5_ram_cfg[] = {
+		{spx5_reg_get(s5, ANA_AC_STAT_RESET), ANA_AC_STAT_RESET_RESET},
+		{spx5_reg_get(s5, ASM_STAT_CFG), ASM_STAT_CFG_STAT_CNT_CLR_SHOT},
+		{spx5_reg_get(s5, QSYS_RAM_INIT), QSYS_RAM_INIT_RAM_INIT},
+		{spx5_reg_get(s5, REW_RAM_INIT), QSYS_RAM_INIT_RAM_INIT},
+		{spx5_reg_get(s5, VOP_RAM_INIT), QSYS_RAM_INIT_RAM_INIT},
+		{spx5_reg_get(s5, ANA_AC_RAM_INIT), QSYS_RAM_INIT_RAM_INIT},
+		{spx5_reg_get(s5, ASM_RAM_INIT), QSYS_RAM_INIT_RAM_INIT},
+		{spx5_reg_get(s5, EACL_RAM_INIT), QSYS_RAM_INIT_RAM_INIT},
+		{spx5_reg_get(s5, VCAP_SUPER_RAM_INIT), QSYS_RAM_INIT_RAM_INIT},
+		{spx5_reg_get(s5, DSM_RAM_INIT), QSYS_RAM_INIT_RAM_INIT}
+	};
+	const struct sparx5_ram_config *cfg;
+	u32 value, pending, jdx, idx;
+
+	for (jdx = 0; jdx < 10; jdx++) {
+		pending = ARRAY_SIZE(spx5_ram_cfg);
+		for (idx = 0; idx < ARRAY_SIZE(spx5_ram_cfg); idx++) {
+			cfg = &spx5_ram_cfg[idx];
+			if (jdx == 0) {
+				writel(cfg->init_val, cfg->init_reg);
+			} else {
+				value = readl(cfg->init_reg);
+				if ((value & cfg->init_val) != cfg->init_val)
+					pending--;
+			}
+		}
+		if (!pending)
+			break;
+		usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
+	}
+
+	if (pending > 0) {
+		/* Still initializing, should be complete in
+		 * less than 1ms
+		 */
+		dev_err(s5->dev, "Memory initialization error\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int sparx5_init_switchcore(struct sparx5 *sparx5)
+{
+	u32 value;
+	int err = 0;
+
+	spx5_rmw(EACL_POL_EACL_CFG_EACL_FORCE_INIT_SET(1),
+		 EACL_POL_EACL_CFG_EACL_FORCE_INIT,
+		 sparx5,
+		 EACL_POL_EACL_CFG);
+
+	spx5_rmw(EACL_POL_EACL_CFG_EACL_FORCE_INIT_SET(0),
+		 EACL_POL_EACL_CFG_EACL_FORCE_INIT,
+		 sparx5,
+		 EACL_POL_EACL_CFG);
+
+	/* Initialize memories, if not done already */
+	value = spx5_rd(sparx5, HSCH_RESET_CFG);
+	if (!(value & HSCH_RESET_CFG_CORE_ENA)) {
+		err = sparx5_init_ram(sparx5);
+		if (err)
+			return err;
+	}
+
+	/* Reset counters */
+	spx5_wr(ANA_AC_STAT_RESET_RESET_SET(1), sparx5, ANA_AC_STAT_RESET);
+	spx5_wr(ASM_STAT_CFG_STAT_CNT_CLR_SHOT_SET(1), sparx5, ASM_STAT_CFG);
+
+	/* Enable switch-core and queue system */
+	spx5_wr(HSCH_RESET_CFG_CORE_ENA_SET(1), sparx5, HSCH_RESET_CFG);
+
+	return 0;
+}
+
+static int sparx5_init_coreclock(struct sparx5 *sparx5)
+{
+	enum sparx5_core_clockfreq freq = sparx5->coreclock;
+	u32 clk_div, clk_period, pol_upd_int, idx;
+
+	/* Verify if core clock frequency is supported on target.
+	 * If 'VTSS_CORE_CLOCK_DEFAULT' then the highest supported
+	 * freq. is used
+	 */
+	switch (sparx5->target_ct) {
+	case SPX5_TARGET_CT_7546:
+		if (sparx5->coreclock == SPX5_CORE_CLOCK_DEFAULT)
+			freq = SPX5_CORE_CLOCK_250MHZ;
+		else if (sparx5->coreclock != SPX5_CORE_CLOCK_250MHZ)
+			freq = 0; /* Not supported */
+		break;
+	case SPX5_TARGET_CT_7549:
+	case SPX5_TARGET_CT_7552:
+	case SPX5_TARGET_CT_7556:
+		if (sparx5->coreclock == SPX5_CORE_CLOCK_DEFAULT)
+			freq = SPX5_CORE_CLOCK_500MHZ;
+		else if (sparx5->coreclock != SPX5_CORE_CLOCK_500MHZ)
+			freq = 0; /* Not supported */
+		break;
+	case SPX5_TARGET_CT_7558:
+	case SPX5_TARGET_CT_7558TSN:
+		if (sparx5->coreclock == SPX5_CORE_CLOCK_DEFAULT)
+			freq = SPX5_CORE_CLOCK_625MHZ;
+		else if (sparx5->coreclock != SPX5_CORE_CLOCK_625MHZ)
+			freq = 0; /* Not supported */
+		break;
+	case SPX5_TARGET_CT_7546TSN:
+		if (sparx5->coreclock == SPX5_CORE_CLOCK_DEFAULT)
+			freq = SPX5_CORE_CLOCK_625MHZ;
+		break;
+	case SPX5_TARGET_CT_7549TSN:
+	case SPX5_TARGET_CT_7552TSN:
+	case SPX5_TARGET_CT_7556TSN:
+		if (sparx5->coreclock == SPX5_CORE_CLOCK_DEFAULT)
+			freq = SPX5_CORE_CLOCK_625MHZ;
+		else if (sparx5->coreclock == SPX5_CORE_CLOCK_250MHZ)
+			freq = 0; /* Not supported */
+		break;
+	default:
+		dev_err(sparx5->dev, "Target (%#04x) not supported\n",
+			sparx5->target_ct);
+		return -ENODEV;
+	}
+
+	switch (freq) {
+	case SPX5_CORE_CLOCK_250MHZ:
+		clk_div = 10;
+		pol_upd_int = 312;
+		break;
+	case SPX5_CORE_CLOCK_500MHZ:
+		clk_div = 5;
+		pol_upd_int = 624;
+		break;
+	case SPX5_CORE_CLOCK_625MHZ:
+		clk_div = 4;
+		pol_upd_int = 780;
+		break;
+	default:
+		dev_err(sparx5->dev, "%d coreclock not supported on (%#04x)\n",
+			sparx5->coreclock, sparx5->target_ct);
+		return -EINVAL;
+	}
+
+	/* Update state with chosen frequency */
+	sparx5->coreclock = freq;
+
+	/* Configure the LCPLL */
+	spx5_rmw(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_DIV_SET(clk_div) |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_PRE_DIV_SET(0) |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_DIR_SET(0) |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_SEL_SET(0) |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_ENA_SET(0) |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_ENA_SET(1),
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_DIV |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_PRE_DIV |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_DIR |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_SEL |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_ENA |
+		 CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_ENA,
+		 sparx5,
+		 CLKGEN_LCPLL1_CORE_CLK_CFG);
+
+	clk_period = sparx5_clk_period(freq);
+
+	spx5_rmw(HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS_SET(clk_period / 100),
+		 HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS,
+		 sparx5,
+		 HSCH_SYS_CLK_PER);
+
+	spx5_rmw(ANA_AC_POL_BDLB_DLB_CTRL_CLK_PERIOD_01NS_SET(clk_period / 100),
+		 ANA_AC_POL_BDLB_DLB_CTRL_CLK_PERIOD_01NS,
+		 sparx5,
+		 ANA_AC_POL_BDLB_DLB_CTRL);
+
+	spx5_rmw(ANA_AC_POL_SLB_DLB_CTRL_CLK_PERIOD_01NS_SET(clk_period / 100),
+		 ANA_AC_POL_SLB_DLB_CTRL_CLK_PERIOD_01NS,
+		 sparx5,
+		 ANA_AC_POL_SLB_DLB_CTRL);
+
+	spx5_rmw(LRN_AUTOAGE_CFG_1_CLK_PERIOD_01NS_SET(clk_period / 100),
+		 LRN_AUTOAGE_CFG_1_CLK_PERIOD_01NS,
+		 sparx5,
+		 LRN_AUTOAGE_CFG_1);
+
+	for (idx = 0; idx < 3; idx++)
+		spx5_rmw(GCB_SIO_CLOCK_SYS_CLK_PERIOD_SET(clk_period / 100),
+			 GCB_SIO_CLOCK_SYS_CLK_PERIOD,
+			 sparx5,
+			 GCB_SIO_CLOCK(idx));
+
+	spx5_rmw(HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY_SET
+		 ((256 * 1000) / clk_period),
+		 HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY,
+		 sparx5,
+		 HSCH_TAS_STATEMACHINE_CFG);
+
+	spx5_rmw(ANA_AC_POL_POL_UPD_INT_CFG_POL_UPD_INT_SET(pol_upd_int),
+		 ANA_AC_POL_POL_UPD_INT_CFG_POL_UPD_INT,
+		 sparx5,
+		 ANA_AC_POL_POL_UPD_INT_CFG);
+
+	return 0;
+}
+
+static int sparx5_qlim_set(struct sparx5 *sparx5)
+{
+	u32 res, dp, prio;
+
+	for (res = 0; res < 2; res++) {
+		for (prio = 0; prio < 8; prio++)
+			spx5_wr(0xFFF, sparx5,
+				QRES_RES_CFG(prio + 630 + res * 1024));
+
+		for (dp = 0; dp < 4; dp++)
+			spx5_wr(0xFFF, sparx5,
+				QRES_RES_CFG(dp + 638 + res * 1024));
+	}
+
+	/* Set 80,90,95,100% of memory size for top watermarks */
+	spx5_wr(QLIM_WM(80), sparx5, XQS_QLIMIT_SHR_QLIM_CFG(0));
+	spx5_wr(QLIM_WM(90), sparx5, XQS_QLIMIT_SHR_CTOP_CFG(0));
+	spx5_wr(QLIM_WM(95), sparx5, XQS_QLIMIT_SHR_ATOP_CFG(0));
+	spx5_wr(QLIM_WM(100), sparx5, XQS_QLIMIT_SHR_TOP_CFG(0));
+
+	return 0;
+}
+
+/* Some boards needs to map the SGPIO for signal detect explicitly to the
+ * port module
+ */
+static void sparx5_board_init(struct sparx5 *sparx5)
+{
+	int idx;
+
+	if (!sparx5->sd_sgpio_remapping)
+		return;
+
+	/* Enable SGPIO Signal Detect remapping */
+	spx5_rmw(GCB_HW_SGPIO_SD_CFG_SD_MAP_SEL,
+		 GCB_HW_SGPIO_SD_CFG_SD_MAP_SEL,
+		 sparx5,
+		 GCB_HW_SGPIO_SD_CFG);
+
+	/* Refer to LOS SGPIO */
+	for (idx = 0; idx < SPX5_PORTS; idx++)
+		if (sparx5->ports[idx])
+			if (sparx5->ports[idx]->conf.sd_sgpio != ~0)
+				spx5_wr(sparx5->ports[idx]->conf.sd_sgpio,
+					sparx5,
+					GCB_HW_SGPIO_TO_SD_MAP_CFG(idx));
+}
+
+static int sparx5_start(struct sparx5 *sparx5)
+{
+	u32 idx;
+
+	/* Setup own UPSIDs */
+	for (idx = 0; idx < 3; idx++) {
+		spx5_wr(idx, sparx5, ANA_AC_OWN_UPSID(idx));
+		spx5_wr(idx, sparx5, ANA_CL_OWN_UPSID(idx));
+		spx5_wr(idx, sparx5, ANA_L2_OWN_UPSID(idx));
+		spx5_wr(idx, sparx5, REW_OWN_UPSID(idx));
+	}
+
+	/* Enable CPU ports */
+	for (idx = SPX5_PORTS; idx < SPX5_PORTS_ALL; idx++)
+		spx5_rmw(QFWD_SWITCH_PORT_MODE_PORT_ENA_SET(1),
+			 QFWD_SWITCH_PORT_MODE_PORT_ENA,
+			 sparx5,
+			 QFWD_SWITCH_PORT_MODE(idx));
+
+	/* Forwarding masks to be added in later patches */
+	/* CPU copy CPU pgids */
+	spx5_wr(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(1),
+		sparx5, ANA_AC_PGID_MISC_CFG(PGID_CPU));
+	spx5_wr(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(1),
+		sparx5, ANA_AC_PGID_MISC_CFG(PGID_BCAST));
+
+	/* Recalc injected frame FCS */
+	for (idx = SPX5_PORT_CPU_0; idx <= SPX5_PORT_CPU_1; idx++)
+		spx5_rmw(ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA_SET(1),
+			 ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA,
+			 sparx5, ANA_CL_FILTER_CTRL(idx));
+
+	/* MAC/VLAN support to be added in later patches */
+	/* Enable queue limitation watermarks */
+	sparx5_qlim_set(sparx5);
+
+	/* netdev and resource calendar support to be added in later patches */
+
+	sparx5_board_init(sparx5);
+
+	/* Injection/Extraction config to be added in later patches */
+
+	return 0;
+}
+
+static int mchp_sparx5_probe(struct platform_device *pdev)
+{
+	struct initial_port_config *configs, *config;
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *ports, *portnp;
+	struct reset_control *reset;
+	struct sparx5 *sparx5;
+	int idx = 0, err = 0;
+	u8 *mac_addr;
+
+	if (!np && !pdev->dev.platform_data)
+		return -ENODEV;
+
+	sparx5 = devm_kzalloc(&pdev->dev, sizeof(*sparx5), GFP_KERNEL);
+	if (!sparx5)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, sparx5);
+	sparx5->pdev = pdev;
+	sparx5->dev = &pdev->dev;
+
+	/* Do switch core reset if available */
+	reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch");
+	if (IS_ERR(reset))
+		return dev_err_probe(&pdev->dev, PTR_ERR(reset),
+				     "Failed to get switch reset controller.\n");
+	reset_control_reset(reset);
+
+	/* Default values, some from DT */
+	sparx5->coreclock = SPX5_CORE_CLOCK_DEFAULT;
+
+	ports = of_get_child_by_name(np, "ethernet-ports");
+	if (!ports) {
+		dev_err(sparx5->dev, "no ethernet-ports child node found\n");
+		return -ENODEV;
+	}
+	sparx5->port_count = of_get_child_count(ports);
+
+	configs = kcalloc(sparx5->port_count,
+			  sizeof(struct initial_port_config), GFP_KERNEL);
+	if (!configs) {
+		err = -ENOMEM;
+		goto cleanup_pnode;
+	}
+
+	for_each_available_child_of_node(ports, portnp) {
+		struct sparx5_port_config *conf;
+		struct phy *serdes;
+		u32 portno;
+
+		err = of_property_read_u32(portnp, "reg", &portno);
+		if (err) {
+			dev_err(sparx5->dev, "port reg property error\n");
+			continue;
+		}
+		config = &configs[idx];
+		conf = &config->conf;
+		conf->speed = SPEED_UNKNOWN;
+		conf->bandwidth = SPEED_UNKNOWN;
+		err = of_get_phy_mode(portnp, &conf->phy_mode);
+		if (err) {
+			dev_err(sparx5->dev, "port %u: missing phy-mode\n",
+				portno);
+			continue;
+		}
+		err = of_property_read_u32(portnp, "microchip,bandwidth",
+					   &conf->bandwidth);
+		if (err) {
+			dev_err(sparx5->dev, "port %u: missing bandwidth\n",
+				portno);
+			continue;
+		}
+		err = of_property_read_u32(portnp, "microchip,sd-sgpio", &conf->sd_sgpio);
+		if (err)
+			conf->sd_sgpio = ~0;
+		else
+			sparx5->sd_sgpio_remapping = true;
+		serdes = devm_of_phy_get(sparx5->dev, portnp, NULL);
+		if (IS_ERR(serdes)) {
+			err = dev_err_probe(sparx5->dev, PTR_ERR(serdes),
+					    "port %u: missing serdes\n",
+					    portno);
+			goto cleanup_config;
+		}
+		config->portno = portno;
+		config->node = portnp;
+		config->serdes = serdes;
+
+		conf->media = PHY_MEDIA_DAC;
+		conf->serdes_reset = true;
+		conf->portmode = conf->phy_mode;
+		conf->power_down = true;
+		idx++;
+	}
+
+	err = sparx5_create_targets(sparx5);
+	if (err)
+		goto cleanup_config;
+
+	if (of_get_mac_address(np, mac_addr)) {
+		dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n");
+		eth_random_addr(sparx5->base_mac);
+		sparx5->base_mac[5] = 0;
+	} else {
+		ether_addr_copy(sparx5->base_mac, mac_addr);
+	}
+
+	/* Inj/Xtr IRQ support to be added in later patches */
+	/* Read chip ID to check CPU interface */
+	sparx5->chip_id = spx5_rd(sparx5, GCB_CHIP_ID);
+
+	sparx5->target_ct = (enum spx5_target_chiptype)
+		GCB_CHIP_ID_PART_ID_GET(sparx5->chip_id);
+
+	/* Initialize Switchcore and internal RAMs */
+	err = sparx5_init_switchcore(sparx5);
+	if (err) {
+		dev_err(sparx5->dev, "Switchcore initialization error\n");
+		goto cleanup_config;
+	}
+
+	/* Initialize the LC-PLL (core clock) and set affected registers */
+	err = sparx5_init_coreclock(sparx5);
+	if (err) {
+		dev_err(sparx5->dev, "LC-PLL initialization error\n");
+		goto cleanup_config;
+	}
+
+	for (idx = 0; idx < sparx5->port_count; ++idx) {
+		config = &configs[idx];
+		if (!config->node)
+			continue;
+
+		err = sparx5_create_port(sparx5, config);
+		if (err) {
+			dev_err(sparx5->dev, "port create error\n");
+			goto cleanup_ports;
+		}
+	}
+
+	err = sparx5_start(sparx5);
+	if (err) {
+		dev_err(sparx5->dev, "Start failed\n");
+		goto cleanup_ports;
+	}
+	goto cleanup_config;
+
+cleanup_ports:
+	/* Port cleanup to be added in later patches */
+cleanup_config:
+	kfree(configs);
+cleanup_pnode:
+	of_node_put(ports);
+	return err;
+}
+
+static const struct of_device_id mchp_sparx5_match[] = {
+	{ .compatible = "microchip,sparx5-switch" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mchp_sparx5_match);
+
+static struct platform_driver mchp_sparx5_driver = {
+	.probe = mchp_sparx5_probe,
+	.driver = {
+		.name = "sparx5-switch",
+		.of_match_table = mchp_sparx5_match,
+	},
+};
+
+module_platform_driver(mchp_sparx5_driver);
+
+MODULE_DESCRIPTION("Microchip Sparx5 switch driver");
+MODULE_AUTHOR("Steen Hegelund <steen.hegelund@microchip.com>");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
new file mode 100644
index 000000000000..705b5c80228d
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#ifndef __SPARX5_MAIN_H__
+#define __SPARX5_MAIN_H__
+
+#include <linux/types.h>
+#include <linux/phy/phy.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include <linux/if_vlan.h>
+#include <linux/bitmap.h>
+#include <linux/phylink.h>
+
+/* Target chip type */
+enum spx5_target_chiptype {
+	SPX5_TARGET_CT_7546    = 0x7546,  /* SparX-5-64  Enterprise */
+	SPX5_TARGET_CT_7549    = 0x7549,  /* SparX-5-90  Enterprise */
+	SPX5_TARGET_CT_7552    = 0x7552,  /* SparX-5-128 Enterprise */
+	SPX5_TARGET_CT_7556    = 0x7556,  /* SparX-5-160 Enterprise */
+	SPX5_TARGET_CT_7558    = 0x7558,  /* SparX-5-200 Enterprise */
+	SPX5_TARGET_CT_7546TSN = 0x47546, /* SparX-5-64i Industrial */
+	SPX5_TARGET_CT_7549TSN = 0x47549, /* SparX-5-90i Industrial */
+	SPX5_TARGET_CT_7552TSN = 0x47552, /* SparX-5-128i Industrial */
+	SPX5_TARGET_CT_7556TSN = 0x47556, /* SparX-5-160i Industrial */
+	SPX5_TARGET_CT_7558TSN = 0x47558, /* SparX-5-200i Industrial */
+};
+
+enum sparx5_port_max_tags {
+	SPX5_PORT_MAX_TAGS_NONE,  /* No extra tags allowed */
+	SPX5_PORT_MAX_TAGS_ONE,   /* Single tag allowed */
+	SPX5_PORT_MAX_TAGS_TWO    /* Single and double tag allowed */
+};
+
+enum sparx5_vlan_port_type {
+	SPX5_VLAN_PORT_TYPE_UNAWARE, /* VLAN unaware port */
+	SPX5_VLAN_PORT_TYPE_C,       /* C-port */
+	SPX5_VLAN_PORT_TYPE_S,       /* S-port */
+	SPX5_VLAN_PORT_TYPE_S_CUSTOM /* S-port using custom type */
+};
+
+#define SPX5_PORTS             65
+#define SPX5_PORT_CPU          (SPX5_PORTS)  /* Next port is CPU port */
+#define SPX5_PORT_CPU_0        (SPX5_PORT_CPU + 0) /* CPU Port 65 */
+#define SPX5_PORT_CPU_1        (SPX5_PORT_CPU + 1) /* CPU Port 66 */
+#define SPX5_PORT_VD0          (SPX5_PORT_CPU + 2) /* VD0/Port 67 used for IPMC */
+#define SPX5_PORT_VD1          (SPX5_PORT_CPU + 3) /* VD1/Port 68 used for AFI/OAM */
+#define SPX5_PORT_VD2          (SPX5_PORT_CPU + 4) /* VD2/Port 69 used for IPinIP*/
+#define SPX5_PORTS_ALL         (SPX5_PORT_CPU + 5) /* Total number of ports */
+
+#define PGID_BASE              SPX5_PORTS /* Starts after port PGIDs */
+#define PGID_UC_FLOOD          (PGID_BASE + 0)
+#define PGID_MC_FLOOD          (PGID_BASE + 1)
+#define PGID_IPV4_MC_DATA      (PGID_BASE + 2)
+#define PGID_IPV4_MC_CTRL      (PGID_BASE + 3)
+#define PGID_IPV6_MC_DATA      (PGID_BASE + 4)
+#define PGID_IPV6_MC_CTRL      (PGID_BASE + 5)
+#define PGID_BCAST	       (PGID_BASE + 6)
+#define PGID_CPU	       (PGID_BASE + 7)
+
+#define IFH_LEN                9 /* 36 bytes */
+#define NULL_VID               0
+#define SPX5_MACT_PULL_DELAY   (2 * HZ)
+#define SPX5_STATS_CHECK_DELAY (1 * HZ)
+#define SPX5_PRIOS             8     /* Number of priority queues */
+#define SPX5_BUFFER_CELL_SZ    184   /* Cell size  */
+#define SPX5_BUFFER_MEMORY     4194280 /* 22795 words * 184 bytes */
+
+struct sparx5;
+
+struct sparx5_port_config {
+	phy_interface_t portmode;
+	u32 bandwidth;
+	int speed;
+	int duplex;
+	enum phy_media media;
+	bool inband;
+	bool power_down;
+	bool autoneg;
+	bool serdes_reset;
+	u32 pause;
+	u32 pause_adv;
+	phy_interface_t phy_mode;
+	u32 sd_sgpio;
+};
+
+struct sparx5_port {
+	struct net_device *ndev;
+	struct sparx5 *sparx5;
+	struct device_node *of_node;
+	struct phy *serdes;
+	struct sparx5_port_config conf;
+	u16 portno;
+	/* Ingress default VLAN (pvid) */
+	u16 pvid;
+	/* Egress default VLAN (vid) */
+	u16 vid;
+	bool signd_internal;
+	bool signd_active_high;
+	bool signd_enable;
+	bool flow_control;
+	enum sparx5_port_max_tags max_vlan_tags;
+	enum sparx5_vlan_port_type vlan_type;
+	u32 custom_etype;
+	u32 ifh[IFH_LEN];
+	bool vlan_aware;
+};
+
+enum sparx5_core_clockfreq {
+	SPX5_CORE_CLOCK_DEFAULT,  /* Defaults to the highest supported frequency */
+	SPX5_CORE_CLOCK_250MHZ,   /* 250MHZ core clock frequency */
+	SPX5_CORE_CLOCK_500MHZ,   /* 500MHZ core clock frequency */
+	SPX5_CORE_CLOCK_625MHZ,   /* 625MHZ core clock frequency */
+};
+
+struct sparx5 {
+	struct platform_device *pdev;
+	struct device *dev;
+	u32 chip_id;
+	enum spx5_target_chiptype target_ct;
+	void __iomem *regs[NUM_TARGETS];
+	int port_count;
+	struct mutex lock; /* MAC reg lock */
+	/* port structures are in net device */
+	struct sparx5_port *ports[SPX5_PORTS];
+	enum sparx5_core_clockfreq coreclock;
+	u8 base_mac[ETH_ALEN];
+	/* Board specifics */
+	bool sd_sgpio_remapping;
+};
+
+/* Clock period in picoseconds */
+static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock)
+{
+	switch (cclock) {
+	case SPX5_CORE_CLOCK_250MHZ:
+		return 4000;
+	case SPX5_CORE_CLOCK_500MHZ:
+		return 2000;
+	case SPX5_CORE_CLOCK_625MHZ:
+	default:
+		return 1600;
+	}
+}
+
+/* Calculate raw offset */
+static inline __pure int spx5_offset(int id, int tinst, int tcnt,
+				     int gbase, int ginst,
+				     int gcnt, int gwidth,
+				     int raddr, int rinst,
+				     int rcnt, int rwidth)
+{
+	WARN_ON((tinst) >= tcnt);
+	WARN_ON((ginst) >= gcnt);
+	WARN_ON((rinst) >= rcnt);
+	return gbase + ((ginst) * gwidth) +
+		raddr + ((rinst) * rwidth);
+}
+
+/* Read, Write and modify registers content.
+ * The register definition macros start at the id
+ */
+static inline void __iomem *spx5_addr(void __iomem *base[],
+				      int id, int tinst, int tcnt,
+				      int gbase, int ginst,
+				      int gcnt, int gwidth,
+				      int raddr, int rinst,
+				      int rcnt, int rwidth)
+{
+	WARN_ON((tinst) >= tcnt);
+	WARN_ON((ginst) >= gcnt);
+	WARN_ON((rinst) >= rcnt);
+	return base[id + (tinst)] +
+		gbase + ((ginst) * gwidth) +
+		raddr + ((rinst) * rwidth);
+}
+
+static inline void __iomem *spx5_inst_addr(void __iomem *base,
+					   int gbase, int ginst,
+					   int gcnt, int gwidth,
+					   int raddr, int rinst,
+					   int rcnt, int rwidth)
+{
+	WARN_ON((ginst) >= gcnt);
+	WARN_ON((rinst) >= rcnt);
+	return base +
+		gbase + ((ginst) * gwidth) +
+		raddr + ((rinst) * rwidth);
+}
+
+static inline u32 spx5_rd(struct sparx5 *sparx5, int id, int tinst, int tcnt,
+			  int gbase, int ginst, int gcnt, int gwidth,
+			  int raddr, int rinst, int rcnt, int rwidth)
+{
+	return readl(spx5_addr(sparx5->regs, id, tinst, tcnt, gbase, ginst,
+			       gcnt, gwidth, raddr, rinst, rcnt, rwidth));
+}
+
+static inline u32 spx5_inst_rd(void __iomem *iomem, int id, int tinst, int tcnt,
+			       int gbase, int ginst, int gcnt, int gwidth,
+			       int raddr, int rinst, int rcnt, int rwidth)
+{
+	return readl(spx5_inst_addr(iomem, gbase, ginst,
+				     gcnt, gwidth, raddr, rinst, rcnt, rwidth));
+}
+
+static inline void spx5_wr(u32 val, struct sparx5 *sparx5,
+			   int id, int tinst, int tcnt,
+			   int gbase, int ginst, int gcnt, int gwidth,
+			   int raddr, int rinst, int rcnt, int rwidth)
+{
+	writel(val, spx5_addr(sparx5->regs, id, tinst, tcnt,
+			      gbase, ginst, gcnt, gwidth,
+			      raddr, rinst, rcnt, rwidth));
+}
+
+static inline void spx5_inst_wr(u32 val, void __iomem *iomem,
+				int id, int tinst, int tcnt,
+				int gbase, int ginst, int gcnt, int gwidth,
+				int raddr, int rinst, int rcnt, int rwidth)
+{
+	writel(val, spx5_inst_addr(iomem,
+				   gbase, ginst, gcnt, gwidth,
+				   raddr, rinst, rcnt, rwidth));
+}
+
+static inline void spx5_rmw(u32 val, u32 mask, struct sparx5 *sparx5,
+			    int id, int tinst, int tcnt,
+			    int gbase, int ginst, int gcnt, int gwidth,
+			    int raddr, int rinst, int rcnt, int rwidth)
+{
+	u32 nval;
+
+	nval = readl(spx5_addr(sparx5->regs, id, tinst, tcnt, gbase, ginst,
+			       gcnt, gwidth, raddr, rinst, rcnt, rwidth));
+	nval = (nval & ~mask) | (val & mask);
+	writel(nval, spx5_addr(sparx5->regs, id, tinst, tcnt, gbase, ginst,
+			       gcnt, gwidth, raddr, rinst, rcnt, rwidth));
+}
+
+static inline void spx5_inst_rmw(u32 val, u32 mask, void __iomem *iomem,
+				 int id, int tinst, int tcnt,
+				 int gbase, int ginst, int gcnt, int gwidth,
+				 int raddr, int rinst, int rcnt, int rwidth)
+{
+	u32 nval;
+
+	nval = readl(spx5_inst_addr(iomem, gbase, ginst, gcnt, gwidth, raddr,
+				    rinst, rcnt, rwidth));
+	nval = (nval & ~mask) | (val & mask);
+	writel(nval, spx5_inst_addr(iomem, gbase, ginst, gcnt, gwidth, raddr,
+				    rinst, rcnt, rwidth));
+}
+
+static inline void __iomem *spx5_inst_get(struct sparx5 *sparx5, int id, int tinst)
+{
+	return sparx5->regs[id + tinst];
+}
+
+static inline void __iomem *spx5_reg_get(struct sparx5 *sparx5,
+					 int id, int tinst, int tcnt,
+					 int gbase, int ginst, int gcnt, int gwidth,
+					 int raddr, int rinst, int rcnt, int rwidth)
+{
+	return spx5_addr(sparx5->regs, id, tinst, tcnt,
+			 gbase, ginst, gcnt, gwidth,
+			 raddr, rinst, rcnt, rwidth);
+}
+
+#endif	/* __SPARX5_MAIN_H__ */
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h
new file mode 100644
index 000000000000..5ab2373a7178
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h
@@ -0,0 +1,4642 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ * Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc.
+ */
+
+/* This file is autogenerated by cml-utils 2021-05-06 13:06:37 +0200.
+ * Commit ID: 9ae4ec441e25e4b9003f4e514df5cb12a36b84d3
+ */
+
+#ifndef _SPARX5_MAIN_REGS_H_
+#define _SPARX5_MAIN_REGS_H_
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+
+enum sparx5_target {
+	TARGET_ANA_AC = 1,
+	TARGET_ANA_ACL = 2,
+	TARGET_ANA_AC_POL = 4,
+	TARGET_ANA_CL = 6,
+	TARGET_ANA_L2 = 7,
+	TARGET_ANA_L3 = 8,
+	TARGET_ASM = 9,
+	TARGET_CLKGEN = 11,
+	TARGET_CPU = 12,
+	TARGET_DEV10G = 17,
+	TARGET_DEV25G = 29,
+	TARGET_DEV2G5 = 37,
+	TARGET_DEV5G = 102,
+	TARGET_DSM = 115,
+	TARGET_EACL = 116,
+	TARGET_FDMA = 117,
+	TARGET_GCB = 118,
+	TARGET_HSCH = 119,
+	TARGET_LRN = 122,
+	TARGET_PCEP = 129,
+	TARGET_PCS10G_BR = 132,
+	TARGET_PCS25G_BR = 144,
+	TARGET_PCS5G_BR = 160,
+	TARGET_PORT_CONF = 173,
+	TARGET_QFWD = 175,
+	TARGET_QRES = 176,
+	TARGET_QS = 177,
+	TARGET_QSYS = 178,
+	TARGET_REW = 179,
+	TARGET_VCAP_SUPER = 326,
+	TARGET_VOP = 327,
+	TARGET_XQS = 331,
+	NUM_TARGETS = 332
+};
+
+#define __REG(...)    __VA_ARGS__
+
+/*      ANA_AC:RAM_CTRL:RAM_INIT */
+#define ANA_AC_RAM_INIT           __REG(TARGET_ANA_AC, 0, 1, 839108, 0, 1, 4, 0, 0, 1, 4)
+
+#define ANA_AC_RAM_INIT_RAM_INIT                 BIT(1)
+#define ANA_AC_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(ANA_AC_RAM_INIT_RAM_INIT, x)
+#define ANA_AC_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(ANA_AC_RAM_INIT_RAM_INIT, x)
+
+#define ANA_AC_RAM_INIT_RAM_CFG_HOOK             BIT(0)
+#define ANA_AC_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(ANA_AC_RAM_INIT_RAM_CFG_HOOK, x)
+#define ANA_AC_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(ANA_AC_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      ANA_AC:PS_COMMON:OWN_UPSID */
+#define ANA_AC_OWN_UPSID(r)       __REG(TARGET_ANA_AC, 0, 1, 894472, 0, 1, 352, 52, r, 3, 4)
+
+#define ANA_AC_OWN_UPSID_OWN_UPSID               GENMASK(4, 0)
+#define ANA_AC_OWN_UPSID_OWN_UPSID_SET(x)\
+	FIELD_PREP(ANA_AC_OWN_UPSID_OWN_UPSID, x)
+#define ANA_AC_OWN_UPSID_OWN_UPSID_GET(x)\
+	FIELD_GET(ANA_AC_OWN_UPSID_OWN_UPSID, x)
+
+/*      ANA_AC:SRC:SRC_CFG */
+#define ANA_AC_SRC_CFG(g)         __REG(TARGET_ANA_AC, 0, 1, 849920, g, 102, 16, 0, 0, 1, 4)
+
+/*      ANA_AC:SRC:SRC_CFG1 */
+#define ANA_AC_SRC_CFG1(g)        __REG(TARGET_ANA_AC, 0, 1, 849920, g, 102, 16, 4, 0, 1, 4)
+
+/*      ANA_AC:SRC:SRC_CFG2 */
+#define ANA_AC_SRC_CFG2(g)        __REG(TARGET_ANA_AC, 0, 1, 849920, g, 102, 16, 8, 0, 1, 4)
+
+#define ANA_AC_SRC_CFG2_PORT_MASK2               BIT(0)
+#define ANA_AC_SRC_CFG2_PORT_MASK2_SET(x)\
+	FIELD_PREP(ANA_AC_SRC_CFG2_PORT_MASK2, x)
+#define ANA_AC_SRC_CFG2_PORT_MASK2_GET(x)\
+	FIELD_GET(ANA_AC_SRC_CFG2_PORT_MASK2, x)
+
+/*      ANA_AC:PGID:PGID_CFG */
+#define ANA_AC_PGID_CFG(g)        __REG(TARGET_ANA_AC, 0, 1, 786432, g, 3290, 16, 0, 0, 1, 4)
+
+/*      ANA_AC:PGID:PGID_CFG1 */
+#define ANA_AC_PGID_CFG1(g)       __REG(TARGET_ANA_AC, 0, 1, 786432, g, 3290, 16, 4, 0, 1, 4)
+
+/*      ANA_AC:PGID:PGID_CFG2 */
+#define ANA_AC_PGID_CFG2(g)       __REG(TARGET_ANA_AC, 0, 1, 786432, g, 3290, 16, 8, 0, 1, 4)
+
+#define ANA_AC_PGID_CFG2_PORT_MASK2              BIT(0)
+#define ANA_AC_PGID_CFG2_PORT_MASK2_SET(x)\
+	FIELD_PREP(ANA_AC_PGID_CFG2_PORT_MASK2, x)
+#define ANA_AC_PGID_CFG2_PORT_MASK2_GET(x)\
+	FIELD_GET(ANA_AC_PGID_CFG2_PORT_MASK2, x)
+
+/*      ANA_AC:PGID:PGID_MISC_CFG */
+#define ANA_AC_PGID_MISC_CFG(g)   __REG(TARGET_ANA_AC, 0, 1, 786432, g, 3290, 16, 12, 0, 1, 4)
+
+#define ANA_AC_PGID_MISC_CFG_PGID_CPU_QU         GENMASK(6, 4)
+#define ANA_AC_PGID_MISC_CFG_PGID_CPU_QU_SET(x)\
+	FIELD_PREP(ANA_AC_PGID_MISC_CFG_PGID_CPU_QU, x)
+#define ANA_AC_PGID_MISC_CFG_PGID_CPU_QU_GET(x)\
+	FIELD_GET(ANA_AC_PGID_MISC_CFG_PGID_CPU_QU, x)
+
+#define ANA_AC_PGID_MISC_CFG_STACK_TYPE_ENA      BIT(1)
+#define ANA_AC_PGID_MISC_CFG_STACK_TYPE_ENA_SET(x)\
+	FIELD_PREP(ANA_AC_PGID_MISC_CFG_STACK_TYPE_ENA, x)
+#define ANA_AC_PGID_MISC_CFG_STACK_TYPE_ENA_GET(x)\
+	FIELD_GET(ANA_AC_PGID_MISC_CFG_STACK_TYPE_ENA, x)
+
+#define ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA   BIT(0)
+#define ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(x)\
+	FIELD_PREP(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, x)
+#define ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_GET(x)\
+	FIELD_GET(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA, x)
+
+/*      ANA_AC:STAT_GLOBAL_CFG_PORT:STAT_GLOBAL_EVENT_MASK */
+#define ANA_AC_PORT_SGE_CFG(r)    __REG(TARGET_ANA_AC, 0, 1, 851552, 0, 1, 20, 0, r, 4, 4)
+
+#define ANA_AC_PORT_SGE_CFG_MASK                 GENMASK(15, 0)
+#define ANA_AC_PORT_SGE_CFG_MASK_SET(x)\
+	FIELD_PREP(ANA_AC_PORT_SGE_CFG_MASK, x)
+#define ANA_AC_PORT_SGE_CFG_MASK_GET(x)\
+	FIELD_GET(ANA_AC_PORT_SGE_CFG_MASK, x)
+
+/*      ANA_AC:STAT_GLOBAL_CFG_PORT:STAT_RESET */
+#define ANA_AC_STAT_RESET         __REG(TARGET_ANA_AC, 0, 1, 851552, 0, 1, 20, 16, 0, 1, 4)
+
+#define ANA_AC_STAT_RESET_RESET                  BIT(0)
+#define ANA_AC_STAT_RESET_RESET_SET(x)\
+	FIELD_PREP(ANA_AC_STAT_RESET_RESET, x)
+#define ANA_AC_STAT_RESET_RESET_GET(x)\
+	FIELD_GET(ANA_AC_STAT_RESET_RESET, x)
+
+/*      ANA_AC:STAT_CNT_CFG_PORT:STAT_CFG */
+#define ANA_AC_PORT_STAT_CFG(g, r) __REG(TARGET_ANA_AC, 0, 1, 843776, g, 70, 64, 4, r, 4, 4)
+
+#define ANA_AC_PORT_STAT_CFG_CFG_PRIO_MASK       GENMASK(11, 4)
+#define ANA_AC_PORT_STAT_CFG_CFG_PRIO_MASK_SET(x)\
+	FIELD_PREP(ANA_AC_PORT_STAT_CFG_CFG_PRIO_MASK, x)
+#define ANA_AC_PORT_STAT_CFG_CFG_PRIO_MASK_GET(x)\
+	FIELD_GET(ANA_AC_PORT_STAT_CFG_CFG_PRIO_MASK, x)
+
+#define ANA_AC_PORT_STAT_CFG_CFG_CNT_FRM_TYPE    GENMASK(3, 1)
+#define ANA_AC_PORT_STAT_CFG_CFG_CNT_FRM_TYPE_SET(x)\
+	FIELD_PREP(ANA_AC_PORT_STAT_CFG_CFG_CNT_FRM_TYPE, x)
+#define ANA_AC_PORT_STAT_CFG_CFG_CNT_FRM_TYPE_GET(x)\
+	FIELD_GET(ANA_AC_PORT_STAT_CFG_CFG_CNT_FRM_TYPE, x)
+
+#define ANA_AC_PORT_STAT_CFG_CFG_CNT_BYTE        BIT(0)
+#define ANA_AC_PORT_STAT_CFG_CFG_CNT_BYTE_SET(x)\
+	FIELD_PREP(ANA_AC_PORT_STAT_CFG_CFG_CNT_BYTE, x)
+#define ANA_AC_PORT_STAT_CFG_CFG_CNT_BYTE_GET(x)\
+	FIELD_GET(ANA_AC_PORT_STAT_CFG_CFG_CNT_BYTE, x)
+
+/*      ANA_AC:STAT_CNT_CFG_PORT:STAT_LSB_CNT */
+#define ANA_AC_PORT_STAT_LSB_CNT(g, r) __REG(TARGET_ANA_AC, 0, 1, 843776, g, 70, 64, 20, r, 4, 4)
+
+/*      ANA_ACL:COMMON:OWN_UPSID */
+#define ANA_ACL_OWN_UPSID(r)      __REG(TARGET_ANA_ACL, 0, 1, 32768, 0, 1, 592, 580, r, 3, 4)
+
+#define ANA_ACL_OWN_UPSID_OWN_UPSID              GENMASK(4, 0)
+#define ANA_ACL_OWN_UPSID_OWN_UPSID_SET(x)\
+	FIELD_PREP(ANA_ACL_OWN_UPSID_OWN_UPSID, x)
+#define ANA_ACL_OWN_UPSID_OWN_UPSID_GET(x)\
+	FIELD_GET(ANA_ACL_OWN_UPSID_OWN_UPSID, x)
+
+/*      ANA_AC_POL:POL_ALL_CFG:POL_UPD_INT_CFG */
+#define ANA_AC_POL_POL_UPD_INT_CFG __REG(TARGET_ANA_AC_POL, 0, 1, 75968, 0, 1, 1160, 1148, 0, 1, 4)
+
+#define ANA_AC_POL_POL_UPD_INT_CFG_POL_UPD_INT   GENMASK(9, 0)
+#define ANA_AC_POL_POL_UPD_INT_CFG_POL_UPD_INT_SET(x)\
+	FIELD_PREP(ANA_AC_POL_POL_UPD_INT_CFG_POL_UPD_INT, x)
+#define ANA_AC_POL_POL_UPD_INT_CFG_POL_UPD_INT_GET(x)\
+	FIELD_GET(ANA_AC_POL_POL_UPD_INT_CFG_POL_UPD_INT, x)
+
+/*      ANA_AC_POL:COMMON_BDLB:DLB_CTRL */
+#define ANA_AC_POL_BDLB_DLB_CTRL  __REG(TARGET_ANA_AC_POL, 0, 1, 79048, 0, 1, 8, 0, 0, 1, 4)
+
+#define ANA_AC_POL_BDLB_DLB_CTRL_CLK_PERIOD_01NS GENMASK(26, 19)
+#define ANA_AC_POL_BDLB_DLB_CTRL_CLK_PERIOD_01NS_SET(x)\
+	FIELD_PREP(ANA_AC_POL_BDLB_DLB_CTRL_CLK_PERIOD_01NS, x)
+#define ANA_AC_POL_BDLB_DLB_CTRL_CLK_PERIOD_01NS_GET(x)\
+	FIELD_GET(ANA_AC_POL_BDLB_DLB_CTRL_CLK_PERIOD_01NS, x)
+
+#define ANA_AC_POL_BDLB_DLB_CTRL_BASE_TICK_CNT   GENMASK(18, 4)
+#define ANA_AC_POL_BDLB_DLB_CTRL_BASE_TICK_CNT_SET(x)\
+	FIELD_PREP(ANA_AC_POL_BDLB_DLB_CTRL_BASE_TICK_CNT, x)
+#define ANA_AC_POL_BDLB_DLB_CTRL_BASE_TICK_CNT_GET(x)\
+	FIELD_GET(ANA_AC_POL_BDLB_DLB_CTRL_BASE_TICK_CNT, x)
+
+#define ANA_AC_POL_BDLB_DLB_CTRL_LEAK_ENA        BIT(1)
+#define ANA_AC_POL_BDLB_DLB_CTRL_LEAK_ENA_SET(x)\
+	FIELD_PREP(ANA_AC_POL_BDLB_DLB_CTRL_LEAK_ENA, x)
+#define ANA_AC_POL_BDLB_DLB_CTRL_LEAK_ENA_GET(x)\
+	FIELD_GET(ANA_AC_POL_BDLB_DLB_CTRL_LEAK_ENA, x)
+
+#define ANA_AC_POL_BDLB_DLB_CTRL_DLB_ADD_ENA     BIT(0)
+#define ANA_AC_POL_BDLB_DLB_CTRL_DLB_ADD_ENA_SET(x)\
+	FIELD_PREP(ANA_AC_POL_BDLB_DLB_CTRL_DLB_ADD_ENA, x)
+#define ANA_AC_POL_BDLB_DLB_CTRL_DLB_ADD_ENA_GET(x)\
+	FIELD_GET(ANA_AC_POL_BDLB_DLB_CTRL_DLB_ADD_ENA, x)
+
+/*      ANA_AC_POL:COMMON_BUM_SLB:DLB_CTRL */
+#define ANA_AC_POL_SLB_DLB_CTRL   __REG(TARGET_ANA_AC_POL, 0, 1, 79056, 0, 1, 20, 0, 0, 1, 4)
+
+#define ANA_AC_POL_SLB_DLB_CTRL_CLK_PERIOD_01NS  GENMASK(26, 19)
+#define ANA_AC_POL_SLB_DLB_CTRL_CLK_PERIOD_01NS_SET(x)\
+	FIELD_PREP(ANA_AC_POL_SLB_DLB_CTRL_CLK_PERIOD_01NS, x)
+#define ANA_AC_POL_SLB_DLB_CTRL_CLK_PERIOD_01NS_GET(x)\
+	FIELD_GET(ANA_AC_POL_SLB_DLB_CTRL_CLK_PERIOD_01NS, x)
+
+#define ANA_AC_POL_SLB_DLB_CTRL_BASE_TICK_CNT    GENMASK(18, 4)
+#define ANA_AC_POL_SLB_DLB_CTRL_BASE_TICK_CNT_SET(x)\
+	FIELD_PREP(ANA_AC_POL_SLB_DLB_CTRL_BASE_TICK_CNT, x)
+#define ANA_AC_POL_SLB_DLB_CTRL_BASE_TICK_CNT_GET(x)\
+	FIELD_GET(ANA_AC_POL_SLB_DLB_CTRL_BASE_TICK_CNT, x)
+
+#define ANA_AC_POL_SLB_DLB_CTRL_LEAK_ENA         BIT(1)
+#define ANA_AC_POL_SLB_DLB_CTRL_LEAK_ENA_SET(x)\
+	FIELD_PREP(ANA_AC_POL_SLB_DLB_CTRL_LEAK_ENA, x)
+#define ANA_AC_POL_SLB_DLB_CTRL_LEAK_ENA_GET(x)\
+	FIELD_GET(ANA_AC_POL_SLB_DLB_CTRL_LEAK_ENA, x)
+
+#define ANA_AC_POL_SLB_DLB_CTRL_DLB_ADD_ENA      BIT(0)
+#define ANA_AC_POL_SLB_DLB_CTRL_DLB_ADD_ENA_SET(x)\
+	FIELD_PREP(ANA_AC_POL_SLB_DLB_CTRL_DLB_ADD_ENA, x)
+#define ANA_AC_POL_SLB_DLB_CTRL_DLB_ADD_ENA_GET(x)\
+	FIELD_GET(ANA_AC_POL_SLB_DLB_CTRL_DLB_ADD_ENA, x)
+
+/*      ANA_CL:PORT:FILTER_CTRL */
+#define ANA_CL_FILTER_CTRL(g)     __REG(TARGET_ANA_CL, 0, 1, 131072, g, 70, 512, 4, 0, 1, 4)
+
+#define ANA_CL_FILTER_CTRL_FILTER_SMAC_MC_DIS    BIT(2)
+#define ANA_CL_FILTER_CTRL_FILTER_SMAC_MC_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_FILTER_CTRL_FILTER_SMAC_MC_DIS, x)
+#define ANA_CL_FILTER_CTRL_FILTER_SMAC_MC_DIS_GET(x)\
+	FIELD_GET(ANA_CL_FILTER_CTRL_FILTER_SMAC_MC_DIS, x)
+
+#define ANA_CL_FILTER_CTRL_FILTER_NULL_MAC_DIS   BIT(1)
+#define ANA_CL_FILTER_CTRL_FILTER_NULL_MAC_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_FILTER_CTRL_FILTER_NULL_MAC_DIS, x)
+#define ANA_CL_FILTER_CTRL_FILTER_NULL_MAC_DIS_GET(x)\
+	FIELD_GET(ANA_CL_FILTER_CTRL_FILTER_NULL_MAC_DIS, x)
+
+#define ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA  BIT(0)
+#define ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA_SET(x)\
+	FIELD_PREP(ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA, x)
+#define ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA_GET(x)\
+	FIELD_GET(ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA, x)
+
+/*      ANA_CL:PORT:VLAN_FILTER_CTRL */
+#define ANA_CL_VLAN_FILTER_CTRL(g, r) __REG(TARGET_ANA_CL, 0, 1, 131072, g, 70, 512, 8, r, 3, 4)
+
+#define ANA_CL_VLAN_FILTER_CTRL_TAG_REQUIRED_ENA BIT(10)
+#define ANA_CL_VLAN_FILTER_CTRL_TAG_REQUIRED_ENA_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_TAG_REQUIRED_ENA, x)
+#define ANA_CL_VLAN_FILTER_CTRL_TAG_REQUIRED_ENA_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_TAG_REQUIRED_ENA, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CTAG_DIS    BIT(9)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CTAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_PRIO_CTAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CTAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_PRIO_CTAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_CTAG_DIS         BIT(8)
+#define ANA_CL_VLAN_FILTER_CTRL_CTAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_CTAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_CTAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_CTAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_STAG_DIS    BIT(7)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_PRIO_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_PRIO_STAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST1_STAG_DIS BIT(6)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST1_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST1_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST1_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST1_STAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST2_STAG_DIS BIT(5)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST2_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST2_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST2_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST2_STAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST3_STAG_DIS BIT(4)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST3_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST3_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST3_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_PRIO_CUST3_STAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_STAG_DIS         BIT(3)
+#define ANA_CL_VLAN_FILTER_CTRL_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_STAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_CUST1_STAG_DIS   BIT(2)
+#define ANA_CL_VLAN_FILTER_CTRL_CUST1_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_CUST1_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_CUST1_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_CUST1_STAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_CUST2_STAG_DIS   BIT(1)
+#define ANA_CL_VLAN_FILTER_CTRL_CUST2_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_CUST2_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_CUST2_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_CUST2_STAG_DIS, x)
+
+#define ANA_CL_VLAN_FILTER_CTRL_CUST3_STAG_DIS   BIT(0)
+#define ANA_CL_VLAN_FILTER_CTRL_CUST3_STAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_FILTER_CTRL_CUST3_STAG_DIS, x)
+#define ANA_CL_VLAN_FILTER_CTRL_CUST3_STAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_FILTER_CTRL_CUST3_STAG_DIS, x)
+
+/*      ANA_CL:PORT:ETAG_FILTER_CTRL */
+#define ANA_CL_ETAG_FILTER_CTRL(g) __REG(TARGET_ANA_CL, 0, 1, 131072, g, 70, 512, 20, 0, 1, 4)
+
+#define ANA_CL_ETAG_FILTER_CTRL_ETAG_REQUIRED_ENA BIT(1)
+#define ANA_CL_ETAG_FILTER_CTRL_ETAG_REQUIRED_ENA_SET(x)\
+	FIELD_PREP(ANA_CL_ETAG_FILTER_CTRL_ETAG_REQUIRED_ENA, x)
+#define ANA_CL_ETAG_FILTER_CTRL_ETAG_REQUIRED_ENA_GET(x)\
+	FIELD_GET(ANA_CL_ETAG_FILTER_CTRL_ETAG_REQUIRED_ENA, x)
+
+#define ANA_CL_ETAG_FILTER_CTRL_ETAG_DIS         BIT(0)
+#define ANA_CL_ETAG_FILTER_CTRL_ETAG_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_ETAG_FILTER_CTRL_ETAG_DIS, x)
+#define ANA_CL_ETAG_FILTER_CTRL_ETAG_DIS_GET(x)\
+	FIELD_GET(ANA_CL_ETAG_FILTER_CTRL_ETAG_DIS, x)
+
+/*      ANA_CL:PORT:VLAN_CTRL */
+#define ANA_CL_VLAN_CTRL(g)       __REG(TARGET_ANA_CL, 0, 1, 131072, g, 70, 512, 32, 0, 1, 4)
+
+#define ANA_CL_VLAN_CTRL_PORT_VOE_TPID_AWARE_DIS GENMASK(30, 26)
+#define ANA_CL_VLAN_CTRL_PORT_VOE_TPID_AWARE_DIS_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_PORT_VOE_TPID_AWARE_DIS, x)
+#define ANA_CL_VLAN_CTRL_PORT_VOE_TPID_AWARE_DIS_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_PORT_VOE_TPID_AWARE_DIS, x)
+
+#define ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_PCP    GENMASK(25, 23)
+#define ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_PCP_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_PCP, x)
+#define ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_PCP_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_PCP, x)
+
+#define ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_DEI    BIT(22)
+#define ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_DEI_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_DEI, x)
+#define ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_DEI_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_PORT_VOE_DEFAULT_DEI, x)
+
+#define ANA_CL_VLAN_CTRL_VLAN_PCP_DEI_TRANS_ENA  BIT(21)
+#define ANA_CL_VLAN_CTRL_VLAN_PCP_DEI_TRANS_ENA_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_VLAN_PCP_DEI_TRANS_ENA, x)
+#define ANA_CL_VLAN_CTRL_VLAN_PCP_DEI_TRANS_ENA_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_VLAN_PCP_DEI_TRANS_ENA, x)
+
+#define ANA_CL_VLAN_CTRL_VLAN_TAG_SEL            BIT(20)
+#define ANA_CL_VLAN_CTRL_VLAN_TAG_SEL_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_VLAN_TAG_SEL, x)
+#define ANA_CL_VLAN_CTRL_VLAN_TAG_SEL_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_VLAN_TAG_SEL, x)
+
+#define ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA          BIT(19)
+#define ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA, x)
+#define ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA, x)
+
+#define ANA_CL_VLAN_CTRL_VLAN_POP_CNT            GENMASK(18, 17)
+#define ANA_CL_VLAN_CTRL_VLAN_POP_CNT_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_VLAN_POP_CNT, x)
+#define ANA_CL_VLAN_CTRL_VLAN_POP_CNT_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_VLAN_POP_CNT, x)
+
+#define ANA_CL_VLAN_CTRL_PORT_TAG_TYPE           BIT(16)
+#define ANA_CL_VLAN_CTRL_PORT_TAG_TYPE_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_PORT_TAG_TYPE, x)
+#define ANA_CL_VLAN_CTRL_PORT_TAG_TYPE_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_PORT_TAG_TYPE, x)
+
+#define ANA_CL_VLAN_CTRL_PORT_PCP                GENMASK(15, 13)
+#define ANA_CL_VLAN_CTRL_PORT_PCP_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_PORT_PCP, x)
+#define ANA_CL_VLAN_CTRL_PORT_PCP_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_PORT_PCP, x)
+
+#define ANA_CL_VLAN_CTRL_PORT_DEI                BIT(12)
+#define ANA_CL_VLAN_CTRL_PORT_DEI_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_PORT_DEI, x)
+#define ANA_CL_VLAN_CTRL_PORT_DEI_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_PORT_DEI, x)
+
+#define ANA_CL_VLAN_CTRL_PORT_VID                GENMASK(11, 0)
+#define ANA_CL_VLAN_CTRL_PORT_VID_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_PORT_VID, x)
+#define ANA_CL_VLAN_CTRL_PORT_VID_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_PORT_VID, x)
+
+/*      ANA_CL:PORT:VLAN_CTRL_2 */
+#define ANA_CL_VLAN_CTRL_2(g)     __REG(TARGET_ANA_CL, 0, 1, 131072, g, 70, 512, 36, 0, 1, 4)
+
+#define ANA_CL_VLAN_CTRL_2_VLAN_PUSH_CNT         GENMASK(1, 0)
+#define ANA_CL_VLAN_CTRL_2_VLAN_PUSH_CNT_SET(x)\
+	FIELD_PREP(ANA_CL_VLAN_CTRL_2_VLAN_PUSH_CNT, x)
+#define ANA_CL_VLAN_CTRL_2_VLAN_PUSH_CNT_GET(x)\
+	FIELD_GET(ANA_CL_VLAN_CTRL_2_VLAN_PUSH_CNT, x)
+
+/*      ANA_CL:PORT:CAPTURE_BPDU_CFG */
+#define ANA_CL_CAPTURE_BPDU_CFG(g) __REG(TARGET_ANA_CL, 0, 1, 131072, g, 70, 512, 196, 0, 1, 4)
+
+/*      ANA_CL:COMMON:OWN_UPSID */
+#define ANA_CL_OWN_UPSID(r)       __REG(TARGET_ANA_CL, 0, 1, 166912, 0, 1, 756, 0, r, 3, 4)
+
+#define ANA_CL_OWN_UPSID_OWN_UPSID               GENMASK(4, 0)
+#define ANA_CL_OWN_UPSID_OWN_UPSID_SET(x)\
+	FIELD_PREP(ANA_CL_OWN_UPSID_OWN_UPSID, x)
+#define ANA_CL_OWN_UPSID_OWN_UPSID_GET(x)\
+	FIELD_GET(ANA_CL_OWN_UPSID_OWN_UPSID, x)
+
+/*      ANA_L2:COMMON:AUTO_LRN_CFG */
+#define ANA_L2_AUTO_LRN_CFG       __REG(TARGET_ANA_L2, 0, 1, 566024, 0, 1, 700, 24, 0, 1, 4)
+
+/*      ANA_L2:COMMON:AUTO_LRN_CFG1 */
+#define ANA_L2_AUTO_LRN_CFG1      __REG(TARGET_ANA_L2, 0, 1, 566024, 0, 1, 700, 28, 0, 1, 4)
+
+/*      ANA_L2:COMMON:AUTO_LRN_CFG2 */
+#define ANA_L2_AUTO_LRN_CFG2      __REG(TARGET_ANA_L2, 0, 1, 566024, 0, 1, 700, 32, 0, 1, 4)
+
+#define ANA_L2_AUTO_LRN_CFG2_AUTO_LRN_ENA2       BIT(0)
+#define ANA_L2_AUTO_LRN_CFG2_AUTO_LRN_ENA2_SET(x)\
+	FIELD_PREP(ANA_L2_AUTO_LRN_CFG2_AUTO_LRN_ENA2, x)
+#define ANA_L2_AUTO_LRN_CFG2_AUTO_LRN_ENA2_GET(x)\
+	FIELD_GET(ANA_L2_AUTO_LRN_CFG2_AUTO_LRN_ENA2, x)
+
+/*      ANA_L2:COMMON:OWN_UPSID */
+#define ANA_L2_OWN_UPSID(r)       __REG(TARGET_ANA_L2, 0, 1, 566024, 0, 1, 700, 672, r, 3, 4)
+
+#define ANA_L2_OWN_UPSID_OWN_UPSID               GENMASK(4, 0)
+#define ANA_L2_OWN_UPSID_OWN_UPSID_SET(x)\
+	FIELD_PREP(ANA_L2_OWN_UPSID_OWN_UPSID, x)
+#define ANA_L2_OWN_UPSID_OWN_UPSID_GET(x)\
+	FIELD_GET(ANA_L2_OWN_UPSID_OWN_UPSID, x)
+
+/*      ANA_L3:COMMON:VLAN_CTRL */
+#define ANA_L3_VLAN_CTRL          __REG(TARGET_ANA_L3, 0, 1, 493632, 0, 1, 184, 4, 0, 1, 4)
+
+#define ANA_L3_VLAN_CTRL_VLAN_ENA                BIT(0)
+#define ANA_L3_VLAN_CTRL_VLAN_ENA_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CTRL_VLAN_ENA, x)
+#define ANA_L3_VLAN_CTRL_VLAN_ENA_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CTRL_VLAN_ENA, x)
+
+/*      ANA_L3:VLAN:VLAN_CFG */
+#define ANA_L3_VLAN_CFG(g)        __REG(TARGET_ANA_L3, 0, 1, 0, g, 5120, 64, 8, 0, 1, 4)
+
+#define ANA_L3_VLAN_CFG_VLAN_MSTP_PTR            GENMASK(30, 24)
+#define ANA_L3_VLAN_CFG_VLAN_MSTP_PTR_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_MSTP_PTR, x)
+#define ANA_L3_VLAN_CFG_VLAN_MSTP_PTR_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_MSTP_PTR, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_FID                 GENMASK(20, 8)
+#define ANA_L3_VLAN_CFG_VLAN_FID_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_FID, x)
+#define ANA_L3_VLAN_CFG_VLAN_FID_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_FID, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_IGR_FILTER_ENA      BIT(6)
+#define ANA_L3_VLAN_CFG_VLAN_IGR_FILTER_ENA_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_IGR_FILTER_ENA, x)
+#define ANA_L3_VLAN_CFG_VLAN_IGR_FILTER_ENA_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_IGR_FILTER_ENA, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_SEC_FWD_ENA         BIT(5)
+#define ANA_L3_VLAN_CFG_VLAN_SEC_FWD_ENA_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_SEC_FWD_ENA, x)
+#define ANA_L3_VLAN_CFG_VLAN_SEC_FWD_ENA_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_SEC_FWD_ENA, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_FLOOD_DIS           BIT(4)
+#define ANA_L3_VLAN_CFG_VLAN_FLOOD_DIS_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_FLOOD_DIS, x)
+#define ANA_L3_VLAN_CFG_VLAN_FLOOD_DIS_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_FLOOD_DIS, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_LRN_DIS             BIT(3)
+#define ANA_L3_VLAN_CFG_VLAN_LRN_DIS_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_LRN_DIS, x)
+#define ANA_L3_VLAN_CFG_VLAN_LRN_DIS_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_LRN_DIS, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_RLEG_ENA            BIT(2)
+#define ANA_L3_VLAN_CFG_VLAN_RLEG_ENA_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_RLEG_ENA, x)
+#define ANA_L3_VLAN_CFG_VLAN_RLEG_ENA_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_RLEG_ENA, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_PRIVATE_ENA         BIT(1)
+#define ANA_L3_VLAN_CFG_VLAN_PRIVATE_ENA_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_PRIVATE_ENA, x)
+#define ANA_L3_VLAN_CFG_VLAN_PRIVATE_ENA_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_PRIVATE_ENA, x)
+
+#define ANA_L3_VLAN_CFG_VLAN_MIRROR_ENA          BIT(0)
+#define ANA_L3_VLAN_CFG_VLAN_MIRROR_ENA_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_CFG_VLAN_MIRROR_ENA, x)
+#define ANA_L3_VLAN_CFG_VLAN_MIRROR_ENA_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_CFG_VLAN_MIRROR_ENA, x)
+
+/*      ANA_L3:VLAN:VLAN_MASK_CFG */
+#define ANA_L3_VLAN_MASK_CFG(g)   __REG(TARGET_ANA_L3, 0, 1, 0, g, 5120, 64, 16, 0, 1, 4)
+
+/*      ANA_L3:VLAN:VLAN_MASK_CFG1 */
+#define ANA_L3_VLAN_MASK_CFG1(g)  __REG(TARGET_ANA_L3, 0, 1, 0, g, 5120, 64, 20, 0, 1, 4)
+
+/*      ANA_L3:VLAN:VLAN_MASK_CFG2 */
+#define ANA_L3_VLAN_MASK_CFG2(g)  __REG(TARGET_ANA_L3, 0, 1, 0, g, 5120, 64, 24, 0, 1, 4)
+
+#define ANA_L3_VLAN_MASK_CFG2_VLAN_PORT_MASK2    BIT(0)
+#define ANA_L3_VLAN_MASK_CFG2_VLAN_PORT_MASK2_SET(x)\
+	FIELD_PREP(ANA_L3_VLAN_MASK_CFG2_VLAN_PORT_MASK2, x)
+#define ANA_L3_VLAN_MASK_CFG2_VLAN_PORT_MASK2_GET(x)\
+	FIELD_GET(ANA_L3_VLAN_MASK_CFG2_VLAN_PORT_MASK2, x)
+
+/*      ASM:DEV_STATISTICS:RX_IN_BYTES_CNT */
+#define ASM_RX_IN_BYTES_CNT(g)    __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 0, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SYMBOL_ERR_CNT */
+#define ASM_RX_SYMBOL_ERR_CNT(g)  __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 4, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_PAUSE_CNT */
+#define ASM_RX_PAUSE_CNT(g)       __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 8, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_UNSUP_OPCODE_CNT */
+#define ASM_RX_UNSUP_OPCODE_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 12, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_OK_BYTES_CNT */
+#define ASM_RX_OK_BYTES_CNT(g)    __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 16, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_BAD_BYTES_CNT */
+#define ASM_RX_BAD_BYTES_CNT(g)   __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 20, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_UC_CNT */
+#define ASM_RX_UC_CNT(g)          __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 24, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_MC_CNT */
+#define ASM_RX_MC_CNT(g)          __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 28, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_BC_CNT */
+#define ASM_RX_BC_CNT(g)          __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 32, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_CRC_ERR_CNT */
+#define ASM_RX_CRC_ERR_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 36, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_UNDERSIZE_CNT */
+#define ASM_RX_UNDERSIZE_CNT(g)   __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 40, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_FRAGMENTS_CNT */
+#define ASM_RX_FRAGMENTS_CNT(g)   __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 44, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_IN_RANGE_LEN_ERR_CNT */
+#define ASM_RX_IN_RANGE_LEN_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 48, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_OUT_OF_RANGE_LEN_ERR_CNT */
+#define ASM_RX_OUT_OF_RANGE_LEN_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 52, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_OVERSIZE_CNT */
+#define ASM_RX_OVERSIZE_CNT(g)    __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 56, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_JABBERS_CNT */
+#define ASM_RX_JABBERS_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 60, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SIZE64_CNT */
+#define ASM_RX_SIZE64_CNT(g)      __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 64, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SIZE65TO127_CNT */
+#define ASM_RX_SIZE65TO127_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 68, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SIZE128TO255_CNT */
+#define ASM_RX_SIZE128TO255_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 72, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SIZE256TO511_CNT */
+#define ASM_RX_SIZE256TO511_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 76, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SIZE512TO1023_CNT */
+#define ASM_RX_SIZE512TO1023_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 80, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SIZE1024TO1518_CNT */
+#define ASM_RX_SIZE1024TO1518_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 84, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_SIZE1519TOMAX_CNT */
+#define ASM_RX_SIZE1519TOMAX_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 88, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_IPG_SHRINK_CNT */
+#define ASM_RX_IPG_SHRINK_CNT(g)  __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 92, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_OUT_BYTES_CNT */
+#define ASM_TX_OUT_BYTES_CNT(g)   __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 96, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_PAUSE_CNT */
+#define ASM_TX_PAUSE_CNT(g)       __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 100, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_OK_BYTES_CNT */
+#define ASM_TX_OK_BYTES_CNT(g)    __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 104, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_UC_CNT */
+#define ASM_TX_UC_CNT(g)          __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 108, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_MC_CNT */
+#define ASM_TX_MC_CNT(g)          __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 112, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_BC_CNT */
+#define ASM_TX_BC_CNT(g)          __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 116, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_SIZE64_CNT */
+#define ASM_TX_SIZE64_CNT(g)      __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 120, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_SIZE65TO127_CNT */
+#define ASM_TX_SIZE65TO127_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 124, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_SIZE128TO255_CNT */
+#define ASM_TX_SIZE128TO255_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 128, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_SIZE256TO511_CNT */
+#define ASM_TX_SIZE256TO511_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 132, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_SIZE512TO1023_CNT */
+#define ASM_TX_SIZE512TO1023_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 136, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_SIZE1024TO1518_CNT */
+#define ASM_TX_SIZE1024TO1518_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 140, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_SIZE1519TOMAX_CNT */
+#define ASM_TX_SIZE1519TOMAX_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 144, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_ALIGNMENT_LOST_CNT */
+#define ASM_RX_ALIGNMENT_LOST_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 148, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_TAGGED_FRMS_CNT */
+#define ASM_RX_TAGGED_FRMS_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 152, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_UNTAGGED_FRMS_CNT */
+#define ASM_RX_UNTAGGED_FRMS_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 156, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_TAGGED_FRMS_CNT */
+#define ASM_TX_TAGGED_FRMS_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 160, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_UNTAGGED_FRMS_CNT */
+#define ASM_TX_UNTAGGED_FRMS_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 164, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SYMBOL_ERR_CNT */
+#define ASM_PMAC_RX_SYMBOL_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 168, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_PAUSE_CNT */
+#define ASM_PMAC_RX_PAUSE_CNT(g)  __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 172, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_UNSUP_OPCODE_CNT */
+#define ASM_PMAC_RX_UNSUP_OPCODE_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 176, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_OK_BYTES_CNT */
+#define ASM_PMAC_RX_OK_BYTES_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 180, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_BAD_BYTES_CNT */
+#define ASM_PMAC_RX_BAD_BYTES_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 184, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_UC_CNT */
+#define ASM_PMAC_RX_UC_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 188, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_MC_CNT */
+#define ASM_PMAC_RX_MC_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 192, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_BC_CNT */
+#define ASM_PMAC_RX_BC_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 196, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_CRC_ERR_CNT */
+#define ASM_PMAC_RX_CRC_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 200, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_UNDERSIZE_CNT */
+#define ASM_PMAC_RX_UNDERSIZE_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 204, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_FRAGMENTS_CNT */
+#define ASM_PMAC_RX_FRAGMENTS_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 208, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_IN_RANGE_LEN_ERR_CNT */
+#define ASM_PMAC_RX_IN_RANGE_LEN_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 212, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_OUT_OF_RANGE_LEN_ERR_CNT */
+#define ASM_PMAC_RX_OUT_OF_RANGE_LEN_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 216, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_OVERSIZE_CNT */
+#define ASM_PMAC_RX_OVERSIZE_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 220, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_JABBERS_CNT */
+#define ASM_PMAC_RX_JABBERS_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 224, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SIZE64_CNT */
+#define ASM_PMAC_RX_SIZE64_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 228, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SIZE65TO127_CNT */
+#define ASM_PMAC_RX_SIZE65TO127_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 232, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SIZE128TO255_CNT */
+#define ASM_PMAC_RX_SIZE128TO255_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 236, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SIZE256TO511_CNT */
+#define ASM_PMAC_RX_SIZE256TO511_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 240, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SIZE512TO1023_CNT */
+#define ASM_PMAC_RX_SIZE512TO1023_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 244, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SIZE1024TO1518_CNT */
+#define ASM_PMAC_RX_SIZE1024TO1518_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 248, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_SIZE1519TOMAX_CNT */
+#define ASM_PMAC_RX_SIZE1519TOMAX_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 252, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_PAUSE_CNT */
+#define ASM_PMAC_TX_PAUSE_CNT(g)  __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 256, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_OK_BYTES_CNT */
+#define ASM_PMAC_TX_OK_BYTES_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 260, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_UC_CNT */
+#define ASM_PMAC_TX_UC_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 264, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_MC_CNT */
+#define ASM_PMAC_TX_MC_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 268, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_BC_CNT */
+#define ASM_PMAC_TX_BC_CNT(g)     __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 272, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_SIZE64_CNT */
+#define ASM_PMAC_TX_SIZE64_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 276, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_SIZE65TO127_CNT */
+#define ASM_PMAC_TX_SIZE65TO127_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 280, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_SIZE128TO255_CNT */
+#define ASM_PMAC_TX_SIZE128TO255_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 284, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_SIZE256TO511_CNT */
+#define ASM_PMAC_TX_SIZE256TO511_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 288, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_SIZE512TO1023_CNT */
+#define ASM_PMAC_TX_SIZE512TO1023_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 292, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_SIZE1024TO1518_CNT */
+#define ASM_PMAC_TX_SIZE1024TO1518_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 296, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_SIZE1519TOMAX_CNT */
+#define ASM_PMAC_TX_SIZE1519TOMAX_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 300, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_ALIGNMENT_LOST_CNT */
+#define ASM_PMAC_RX_ALIGNMENT_LOST_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 304, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:MM_RX_ASSEMBLY_ERR_CNT */
+#define ASM_MM_RX_ASSEMBLY_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 308, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:MM_RX_SMD_ERR_CNT */
+#define ASM_MM_RX_SMD_ERR_CNT(g)  __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 312, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:MM_RX_ASSEMBLY_OK_CNT */
+#define ASM_MM_RX_ASSEMBLY_OK_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 316, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:MM_RX_MERGE_FRAG_CNT */
+#define ASM_MM_RX_MERGE_FRAG_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 320, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:MM_TX_PFRAGMENT_CNT */
+#define ASM_MM_TX_PFRAGMENT_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 324, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_MULTI_COLL_CNT */
+#define ASM_TX_MULTI_COLL_CNT(g)  __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 328, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_LATE_COLL_CNT */
+#define ASM_TX_LATE_COLL_CNT(g)   __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 332, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_XCOLL_CNT */
+#define ASM_TX_XCOLL_CNT(g)       __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 336, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_DEFER_CNT */
+#define ASM_TX_DEFER_CNT(g)       __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 340, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_XDEFER_CNT */
+#define ASM_TX_XDEFER_CNT(g)      __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 344, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_BACKOFF1_CNT */
+#define ASM_TX_BACKOFF1_CNT(g)    __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 348, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:TX_CSENSE_CNT */
+#define ASM_TX_CSENSE_CNT(g)      __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 352, 0, 1, 4)
+
+/*      ASM:DEV_STATISTICS:RX_IN_BYTES_MSB_CNT */
+#define ASM_RX_IN_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 356, 0, 1, 4)
+
+#define ASM_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT, x)
+#define ASM_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:RX_OK_BYTES_MSB_CNT */
+#define ASM_RX_OK_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 360, 0, 1, 4)
+
+#define ASM_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT, x)
+#define ASM_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_OK_BYTES_MSB_CNT */
+#define ASM_PMAC_RX_OK_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 364, 0, 1, 4)
+
+#define ASM_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT, x)
+#define ASM_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:RX_BAD_BYTES_MSB_CNT */
+#define ASM_RX_BAD_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 368, 0, 1, 4)
+
+#define ASM_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT, x)
+#define ASM_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:PMAC_RX_BAD_BYTES_MSB_CNT */
+#define ASM_PMAC_RX_BAD_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 372, 0, 1, 4)
+
+#define ASM_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT, x)
+#define ASM_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:TX_OUT_BYTES_MSB_CNT */
+#define ASM_TX_OUT_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 376, 0, 1, 4)
+
+#define ASM_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT, x)
+#define ASM_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:TX_OK_BYTES_MSB_CNT */
+#define ASM_TX_OK_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 380, 0, 1, 4)
+
+#define ASM_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT, x)
+#define ASM_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:PMAC_TX_OK_BYTES_MSB_CNT */
+#define ASM_PMAC_TX_OK_BYTES_MSB_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 384, 0, 1, 4)
+
+#define ASM_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT GENMASK(3, 0)
+#define ASM_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(ASM_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT, x)
+#define ASM_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(ASM_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT, x)
+
+/*      ASM:DEV_STATISTICS:RX_SYNC_LOST_ERR_CNT */
+#define ASM_RX_SYNC_LOST_ERR_CNT(g) __REG(TARGET_ASM, 0, 1, 0, g, 65, 512, 388, 0, 1, 4)
+
+/*      ASM:CFG:STAT_CFG */
+#define ASM_STAT_CFG              __REG(TARGET_ASM, 0, 1, 33280, 0, 1, 1088, 0, 0, 1, 4)
+
+#define ASM_STAT_CFG_STAT_CNT_CLR_SHOT           BIT(0)
+#define ASM_STAT_CFG_STAT_CNT_CLR_SHOT_SET(x)\
+	FIELD_PREP(ASM_STAT_CFG_STAT_CNT_CLR_SHOT, x)
+#define ASM_STAT_CFG_STAT_CNT_CLR_SHOT_GET(x)\
+	FIELD_GET(ASM_STAT_CFG_STAT_CNT_CLR_SHOT, x)
+
+/*      ASM:CFG:PORT_CFG */
+#define ASM_PORT_CFG(r)           __REG(TARGET_ASM, 0, 1, 33280, 0, 1, 1088, 540, r, 67, 4)
+
+#define ASM_PORT_CFG_CSC_STAT_DIS                BIT(12)
+#define ASM_PORT_CFG_CSC_STAT_DIS_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_CSC_STAT_DIS, x)
+#define ASM_PORT_CFG_CSC_STAT_DIS_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_CSC_STAT_DIS, x)
+
+#define ASM_PORT_CFG_HIH_AFTER_PREAMBLE_ENA      BIT(11)
+#define ASM_PORT_CFG_HIH_AFTER_PREAMBLE_ENA_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_HIH_AFTER_PREAMBLE_ENA, x)
+#define ASM_PORT_CFG_HIH_AFTER_PREAMBLE_ENA_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_HIH_AFTER_PREAMBLE_ENA, x)
+
+#define ASM_PORT_CFG_IGN_TAXI_ABORT_ENA          BIT(10)
+#define ASM_PORT_CFG_IGN_TAXI_ABORT_ENA_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_IGN_TAXI_ABORT_ENA, x)
+#define ASM_PORT_CFG_IGN_TAXI_ABORT_ENA_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_IGN_TAXI_ABORT_ENA, x)
+
+#define ASM_PORT_CFG_NO_PREAMBLE_ENA             BIT(9)
+#define ASM_PORT_CFG_NO_PREAMBLE_ENA_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_NO_PREAMBLE_ENA, x)
+#define ASM_PORT_CFG_NO_PREAMBLE_ENA_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_NO_PREAMBLE_ENA, x)
+
+#define ASM_PORT_CFG_SKIP_PREAMBLE_ENA           BIT(8)
+#define ASM_PORT_CFG_SKIP_PREAMBLE_ENA_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_SKIP_PREAMBLE_ENA, x)
+#define ASM_PORT_CFG_SKIP_PREAMBLE_ENA_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_SKIP_PREAMBLE_ENA, x)
+
+#define ASM_PORT_CFG_FRM_AGING_DIS               BIT(7)
+#define ASM_PORT_CFG_FRM_AGING_DIS_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_FRM_AGING_DIS, x)
+#define ASM_PORT_CFG_FRM_AGING_DIS_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_FRM_AGING_DIS, x)
+
+#define ASM_PORT_CFG_PAD_ENA                     BIT(6)
+#define ASM_PORT_CFG_PAD_ENA_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_PAD_ENA, x)
+#define ASM_PORT_CFG_PAD_ENA_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_PAD_ENA, x)
+
+#define ASM_PORT_CFG_INJ_DISCARD_CFG             GENMASK(5, 4)
+#define ASM_PORT_CFG_INJ_DISCARD_CFG_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_INJ_DISCARD_CFG, x)
+#define ASM_PORT_CFG_INJ_DISCARD_CFG_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_INJ_DISCARD_CFG, x)
+
+#define ASM_PORT_CFG_INJ_FORMAT_CFG              GENMASK(3, 2)
+#define ASM_PORT_CFG_INJ_FORMAT_CFG_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_INJ_FORMAT_CFG, x)
+#define ASM_PORT_CFG_INJ_FORMAT_CFG_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_INJ_FORMAT_CFG, x)
+
+#define ASM_PORT_CFG_VSTAX2_AWR_ENA              BIT(1)
+#define ASM_PORT_CFG_VSTAX2_AWR_ENA_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_VSTAX2_AWR_ENA, x)
+#define ASM_PORT_CFG_VSTAX2_AWR_ENA_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_VSTAX2_AWR_ENA, x)
+
+#define ASM_PORT_CFG_PFRM_FLUSH                  BIT(0)
+#define ASM_PORT_CFG_PFRM_FLUSH_SET(x)\
+	FIELD_PREP(ASM_PORT_CFG_PFRM_FLUSH, x)
+#define ASM_PORT_CFG_PFRM_FLUSH_GET(x)\
+	FIELD_GET(ASM_PORT_CFG_PFRM_FLUSH, x)
+
+/*      ASM:RAM_CTRL:RAM_INIT */
+#define ASM_RAM_INIT              __REG(TARGET_ASM, 0, 1, 34832, 0, 1, 4, 0, 0, 1, 4)
+
+#define ASM_RAM_INIT_RAM_INIT                    BIT(1)
+#define ASM_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(ASM_RAM_INIT_RAM_INIT, x)
+#define ASM_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(ASM_RAM_INIT_RAM_INIT, x)
+
+#define ASM_RAM_INIT_RAM_CFG_HOOK                BIT(0)
+#define ASM_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(ASM_RAM_INIT_RAM_CFG_HOOK, x)
+#define ASM_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(ASM_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      CLKGEN:LCPLL1:LCPLL1_CORE_CLK_CFG */
+#define CLKGEN_LCPLL1_CORE_CLK_CFG __REG(TARGET_CLKGEN, 0, 1, 12, 0, 1, 36, 0, 0, 1, 4)
+
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_DIV  GENMASK(7, 0)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_DIV_SET(x)\
+	FIELD_PREP(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_DIV, x)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_DIV_GET(x)\
+	FIELD_GET(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_DIV, x)
+
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_PRE_DIV  GENMASK(10, 8)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_PRE_DIV_SET(x)\
+	FIELD_PREP(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_PRE_DIV, x)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_PRE_DIV_GET(x)\
+	FIELD_GET(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_PRE_DIV, x)
+
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_DIR  BIT(11)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_DIR_SET(x)\
+	FIELD_PREP(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_DIR, x)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_DIR_GET(x)\
+	FIELD_GET(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_DIR, x)
+
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_SEL  GENMASK(13, 12)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_SEL_SET(x)\
+	FIELD_PREP(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_SEL, x)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_SEL_GET(x)\
+	FIELD_GET(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_SEL, x)
+
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_ENA  BIT(14)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_ENA_SET(x)\
+	FIELD_PREP(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_ENA, x)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_ENA_GET(x)\
+	FIELD_GET(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_ROT_ENA, x)
+
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_ENA  BIT(15)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_ENA_SET(x)\
+	FIELD_PREP(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_ENA, x)
+#define CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_ENA_GET(x)\
+	FIELD_GET(CLKGEN_LCPLL1_CORE_CLK_CFG_CORE_CLK_ENA, x)
+
+/*      CPU:CPU_REGS:PROC_CTRL */
+#define CPU_PROC_CTRL             __REG(TARGET_CPU, 0, 1, 0, 0, 1, 204, 176, 0, 1, 4)
+
+#define CPU_PROC_CTRL_AARCH64_MODE_ENA           BIT(12)
+#define CPU_PROC_CTRL_AARCH64_MODE_ENA_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_AARCH64_MODE_ENA, x)
+#define CPU_PROC_CTRL_AARCH64_MODE_ENA_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_AARCH64_MODE_ENA, x)
+
+#define CPU_PROC_CTRL_L2_RST_INVALIDATE_DIS      BIT(11)
+#define CPU_PROC_CTRL_L2_RST_INVALIDATE_DIS_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_L2_RST_INVALIDATE_DIS, x)
+#define CPU_PROC_CTRL_L2_RST_INVALIDATE_DIS_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_L2_RST_INVALIDATE_DIS, x)
+
+#define CPU_PROC_CTRL_L1_RST_INVALIDATE_DIS      BIT(10)
+#define CPU_PROC_CTRL_L1_RST_INVALIDATE_DIS_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_L1_RST_INVALIDATE_DIS, x)
+#define CPU_PROC_CTRL_L1_RST_INVALIDATE_DIS_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_L1_RST_INVALIDATE_DIS, x)
+
+#define CPU_PROC_CTRL_BE_EXCEP_MODE              BIT(9)
+#define CPU_PROC_CTRL_BE_EXCEP_MODE_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_BE_EXCEP_MODE, x)
+#define CPU_PROC_CTRL_BE_EXCEP_MODE_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_BE_EXCEP_MODE, x)
+
+#define CPU_PROC_CTRL_VINITHI                    BIT(8)
+#define CPU_PROC_CTRL_VINITHI_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_VINITHI, x)
+#define CPU_PROC_CTRL_VINITHI_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_VINITHI, x)
+
+#define CPU_PROC_CTRL_CFGTE                      BIT(7)
+#define CPU_PROC_CTRL_CFGTE_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_CFGTE, x)
+#define CPU_PROC_CTRL_CFGTE_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_CFGTE, x)
+
+#define CPU_PROC_CTRL_CP15S_DISABLE              BIT(6)
+#define CPU_PROC_CTRL_CP15S_DISABLE_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_CP15S_DISABLE, x)
+#define CPU_PROC_CTRL_CP15S_DISABLE_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_CP15S_DISABLE, x)
+
+#define CPU_PROC_CTRL_PROC_CRYPTO_DISABLE        BIT(5)
+#define CPU_PROC_CTRL_PROC_CRYPTO_DISABLE_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_PROC_CRYPTO_DISABLE, x)
+#define CPU_PROC_CTRL_PROC_CRYPTO_DISABLE_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_PROC_CRYPTO_DISABLE, x)
+
+#define CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA        BIT(4)
+#define CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA, x)
+#define CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA, x)
+
+#define CPU_PROC_CTRL_ACP_AWCACHE                BIT(3)
+#define CPU_PROC_CTRL_ACP_AWCACHE_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_ACP_AWCACHE, x)
+#define CPU_PROC_CTRL_ACP_AWCACHE_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_ACP_AWCACHE, x)
+
+#define CPU_PROC_CTRL_ACP_ARCACHE                BIT(2)
+#define CPU_PROC_CTRL_ACP_ARCACHE_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_ACP_ARCACHE, x)
+#define CPU_PROC_CTRL_ACP_ARCACHE_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_ACP_ARCACHE, x)
+
+#define CPU_PROC_CTRL_L2_FLUSH_REQ               BIT(1)
+#define CPU_PROC_CTRL_L2_FLUSH_REQ_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_L2_FLUSH_REQ, x)
+#define CPU_PROC_CTRL_L2_FLUSH_REQ_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_L2_FLUSH_REQ, x)
+
+#define CPU_PROC_CTRL_ACP_DISABLE                BIT(0)
+#define CPU_PROC_CTRL_ACP_DISABLE_SET(x)\
+	FIELD_PREP(CPU_PROC_CTRL_ACP_DISABLE, x)
+#define CPU_PROC_CTRL_ACP_DISABLE_GET(x)\
+	FIELD_GET(CPU_PROC_CTRL_ACP_DISABLE, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_ENA_CFG */
+#define DEV10G_MAC_ENA_CFG(t)     __REG(TARGET_DEV10G, t, 12, 0, 0, 1, 60, 0, 0, 1, 4)
+
+#define DEV10G_MAC_ENA_CFG_RX_ENA                BIT(4)
+#define DEV10G_MAC_ENA_CFG_RX_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ENA_CFG_RX_ENA, x)
+#define DEV10G_MAC_ENA_CFG_RX_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ENA_CFG_RX_ENA, x)
+
+#define DEV10G_MAC_ENA_CFG_TX_ENA                BIT(0)
+#define DEV10G_MAC_ENA_CFG_TX_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ENA_CFG_TX_ENA, x)
+#define DEV10G_MAC_ENA_CFG_TX_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ENA_CFG_TX_ENA, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_MAXLEN_CFG */
+#define DEV10G_MAC_MAXLEN_CFG(t)  __REG(TARGET_DEV10G, t, 12, 0, 0, 1, 60, 8, 0, 1, 4)
+
+#define DEV10G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK    BIT(16)
+#define DEV10G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK_SET(x)\
+	FIELD_PREP(DEV10G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK, x)
+#define DEV10G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK_GET(x)\
+	FIELD_GET(DEV10G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK, x)
+
+#define DEV10G_MAC_MAXLEN_CFG_MAX_LEN            GENMASK(15, 0)
+#define DEV10G_MAC_MAXLEN_CFG_MAX_LEN_SET(x)\
+	FIELD_PREP(DEV10G_MAC_MAXLEN_CFG_MAX_LEN, x)
+#define DEV10G_MAC_MAXLEN_CFG_MAX_LEN_GET(x)\
+	FIELD_GET(DEV10G_MAC_MAXLEN_CFG_MAX_LEN, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_NUM_TAGS_CFG */
+#define DEV10G_MAC_NUM_TAGS_CFG(t) __REG(TARGET_DEV10G, t, 12, 0, 0, 1, 60, 12, 0, 1, 4)
+
+#define DEV10G_MAC_NUM_TAGS_CFG_NUM_TAGS         GENMASK(1, 0)
+#define DEV10G_MAC_NUM_TAGS_CFG_NUM_TAGS_SET(x)\
+	FIELD_PREP(DEV10G_MAC_NUM_TAGS_CFG_NUM_TAGS, x)
+#define DEV10G_MAC_NUM_TAGS_CFG_NUM_TAGS_GET(x)\
+	FIELD_GET(DEV10G_MAC_NUM_TAGS_CFG_NUM_TAGS, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_TAGS_CFG */
+#define DEV10G_MAC_TAGS_CFG(t, r) __REG(TARGET_DEV10G, t, 12, 0, 0, 1, 60, 16, r, 3, 4)
+
+#define DEV10G_MAC_TAGS_CFG_TAG_ID               GENMASK(31, 16)
+#define DEV10G_MAC_TAGS_CFG_TAG_ID_SET(x)\
+	FIELD_PREP(DEV10G_MAC_TAGS_CFG_TAG_ID, x)
+#define DEV10G_MAC_TAGS_CFG_TAG_ID_GET(x)\
+	FIELD_GET(DEV10G_MAC_TAGS_CFG_TAG_ID, x)
+
+#define DEV10G_MAC_TAGS_CFG_TAG_ENA              BIT(4)
+#define DEV10G_MAC_TAGS_CFG_TAG_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_TAGS_CFG_TAG_ENA, x)
+#define DEV10G_MAC_TAGS_CFG_TAG_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_TAGS_CFG_TAG_ENA, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_ADV_CHK_CFG */
+#define DEV10G_MAC_ADV_CHK_CFG(t) __REG(TARGET_DEV10G, t, 12, 0, 0, 1, 60, 28, 0, 1, 4)
+
+#define DEV10G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA   BIT(24)
+#define DEV10G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA, x)
+#define DEV10G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA, x)
+
+#define DEV10G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA   BIT(20)
+#define DEV10G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA, x)
+#define DEV10G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA, x)
+
+#define DEV10G_MAC_ADV_CHK_CFG_SFD_CHK_ENA       BIT(16)
+#define DEV10G_MAC_ADV_CHK_CFG_SFD_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ADV_CHK_CFG_SFD_CHK_ENA, x)
+#define DEV10G_MAC_ADV_CHK_CFG_SFD_CHK_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ADV_CHK_CFG_SFD_CHK_ENA, x)
+
+#define DEV10G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS   BIT(12)
+#define DEV10G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS, x)
+#define DEV10G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS_GET(x)\
+	FIELD_GET(DEV10G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS, x)
+
+#define DEV10G_MAC_ADV_CHK_CFG_PRM_CHK_ENA       BIT(8)
+#define DEV10G_MAC_ADV_CHK_CFG_PRM_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ADV_CHK_CFG_PRM_CHK_ENA, x)
+#define DEV10G_MAC_ADV_CHK_CFG_PRM_CHK_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ADV_CHK_CFG_PRM_CHK_ENA, x)
+
+#define DEV10G_MAC_ADV_CHK_CFG_OOR_ERR_ENA       BIT(4)
+#define DEV10G_MAC_ADV_CHK_CFG_OOR_ERR_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ADV_CHK_CFG_OOR_ERR_ENA, x)
+#define DEV10G_MAC_ADV_CHK_CFG_OOR_ERR_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ADV_CHK_CFG_OOR_ERR_ENA, x)
+
+#define DEV10G_MAC_ADV_CHK_CFG_INR_ERR_ENA       BIT(0)
+#define DEV10G_MAC_ADV_CHK_CFG_INR_ERR_ENA_SET(x)\
+	FIELD_PREP(DEV10G_MAC_ADV_CHK_CFG_INR_ERR_ENA, x)
+#define DEV10G_MAC_ADV_CHK_CFG_INR_ERR_ENA_GET(x)\
+	FIELD_GET(DEV10G_MAC_ADV_CHK_CFG_INR_ERR_ENA, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_TX_MONITOR_STICKY */
+#define DEV10G_MAC_TX_MONITOR_STICKY(t) __REG(TARGET_DEV10G, t, 12, 0, 0, 1, 60, 48, 0, 1, 4)
+
+#define DEV10G_MAC_TX_MONITOR_STICKY_LOCAL_ERR_STATE_STICKY BIT(4)
+#define DEV10G_MAC_TX_MONITOR_STICKY_LOCAL_ERR_STATE_STICKY_SET(x)\
+	FIELD_PREP(DEV10G_MAC_TX_MONITOR_STICKY_LOCAL_ERR_STATE_STICKY, x)
+#define DEV10G_MAC_TX_MONITOR_STICKY_LOCAL_ERR_STATE_STICKY_GET(x)\
+	FIELD_GET(DEV10G_MAC_TX_MONITOR_STICKY_LOCAL_ERR_STATE_STICKY, x)
+
+#define DEV10G_MAC_TX_MONITOR_STICKY_REMOTE_ERR_STATE_STICKY BIT(3)
+#define DEV10G_MAC_TX_MONITOR_STICKY_REMOTE_ERR_STATE_STICKY_SET(x)\
+	FIELD_PREP(DEV10G_MAC_TX_MONITOR_STICKY_REMOTE_ERR_STATE_STICKY, x)
+#define DEV10G_MAC_TX_MONITOR_STICKY_REMOTE_ERR_STATE_STICKY_GET(x)\
+	FIELD_GET(DEV10G_MAC_TX_MONITOR_STICKY_REMOTE_ERR_STATE_STICKY, x)
+
+#define DEV10G_MAC_TX_MONITOR_STICKY_LINK_INTERRUPTION_STATE_STICKY BIT(2)
+#define DEV10G_MAC_TX_MONITOR_STICKY_LINK_INTERRUPTION_STATE_STICKY_SET(x)\
+	FIELD_PREP(DEV10G_MAC_TX_MONITOR_STICKY_LINK_INTERRUPTION_STATE_STICKY, x)
+#define DEV10G_MAC_TX_MONITOR_STICKY_LINK_INTERRUPTION_STATE_STICKY_GET(x)\
+	FIELD_GET(DEV10G_MAC_TX_MONITOR_STICKY_LINK_INTERRUPTION_STATE_STICKY, x)
+
+#define DEV10G_MAC_TX_MONITOR_STICKY_IDLE_STATE_STICKY BIT(1)
+#define DEV10G_MAC_TX_MONITOR_STICKY_IDLE_STATE_STICKY_SET(x)\
+	FIELD_PREP(DEV10G_MAC_TX_MONITOR_STICKY_IDLE_STATE_STICKY, x)
+#define DEV10G_MAC_TX_MONITOR_STICKY_IDLE_STATE_STICKY_GET(x)\
+	FIELD_GET(DEV10G_MAC_TX_MONITOR_STICKY_IDLE_STATE_STICKY, x)
+
+#define DEV10G_MAC_TX_MONITOR_STICKY_DIS_STATE_STICKY BIT(0)
+#define DEV10G_MAC_TX_MONITOR_STICKY_DIS_STATE_STICKY_SET(x)\
+	FIELD_PREP(DEV10G_MAC_TX_MONITOR_STICKY_DIS_STATE_STICKY, x)
+#define DEV10G_MAC_TX_MONITOR_STICKY_DIS_STATE_STICKY_GET(x)\
+	FIELD_GET(DEV10G_MAC_TX_MONITOR_STICKY_DIS_STATE_STICKY, x)
+
+/*      DEV10G:DEV_CFG_STATUS:DEV_RST_CTRL */
+#define DEV10G_DEV_RST_CTRL(t)    __REG(TARGET_DEV10G, t, 12, 436, 0, 1, 52, 0, 0, 1, 4)
+
+#define DEV10G_DEV_RST_CTRL_PARDET_MODE_ENA      BIT(28)
+#define DEV10G_DEV_RST_CTRL_PARDET_MODE_ENA_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_PARDET_MODE_ENA, x)
+#define DEV10G_DEV_RST_CTRL_PARDET_MODE_ENA_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_PARDET_MODE_ENA, x)
+
+#define DEV10G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS BIT(27)
+#define DEV10G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+#define DEV10G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+
+#define DEV10G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS GENMASK(26, 25)
+#define DEV10G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS, x)
+#define DEV10G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS, x)
+
+#define DEV10G_DEV_RST_CTRL_SERDES_SPEED_SEL     GENMASK(24, 23)
+#define DEV10G_DEV_RST_CTRL_SERDES_SPEED_SEL_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_SERDES_SPEED_SEL, x)
+#define DEV10G_DEV_RST_CTRL_SERDES_SPEED_SEL_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_SERDES_SPEED_SEL, x)
+
+#define DEV10G_DEV_RST_CTRL_SPEED_SEL            GENMASK(22, 20)
+#define DEV10G_DEV_RST_CTRL_SPEED_SEL_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_SPEED_SEL, x)
+#define DEV10G_DEV_RST_CTRL_SPEED_SEL_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_SPEED_SEL, x)
+
+#define DEV10G_DEV_RST_CTRL_PCS_TX_RST           BIT(12)
+#define DEV10G_DEV_RST_CTRL_PCS_TX_RST_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_PCS_TX_RST, x)
+#define DEV10G_DEV_RST_CTRL_PCS_TX_RST_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_PCS_TX_RST, x)
+
+#define DEV10G_DEV_RST_CTRL_PCS_RX_RST           BIT(8)
+#define DEV10G_DEV_RST_CTRL_PCS_RX_RST_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_PCS_RX_RST, x)
+#define DEV10G_DEV_RST_CTRL_PCS_RX_RST_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_PCS_RX_RST, x)
+
+#define DEV10G_DEV_RST_CTRL_MAC_TX_RST           BIT(4)
+#define DEV10G_DEV_RST_CTRL_MAC_TX_RST_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_MAC_TX_RST, x)
+#define DEV10G_DEV_RST_CTRL_MAC_TX_RST_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_MAC_TX_RST, x)
+
+#define DEV10G_DEV_RST_CTRL_MAC_RX_RST           BIT(0)
+#define DEV10G_DEV_RST_CTRL_MAC_RX_RST_SET(x)\
+	FIELD_PREP(DEV10G_DEV_RST_CTRL_MAC_RX_RST, x)
+#define DEV10G_DEV_RST_CTRL_MAC_RX_RST_GET(x)\
+	FIELD_GET(DEV10G_DEV_RST_CTRL_MAC_RX_RST, x)
+
+/*      DEV10G:PCS25G_CFG_STATUS:PCS25G_CFG */
+#define DEV10G_PCS25G_CFG(t)      __REG(TARGET_DEV10G, t, 12, 488, 0, 1, 32, 0, 0, 1, 4)
+
+#define DEV10G_PCS25G_CFG_PCS25G_ENA             BIT(0)
+#define DEV10G_PCS25G_CFG_PCS25G_ENA_SET(x)\
+	FIELD_PREP(DEV10G_PCS25G_CFG_PCS25G_ENA, x)
+#define DEV10G_PCS25G_CFG_PCS25G_ENA_GET(x)\
+	FIELD_GET(DEV10G_PCS25G_CFG_PCS25G_ENA, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_ENA_CFG */
+#define DEV25G_MAC_ENA_CFG(t)     __REG(TARGET_DEV25G, t, 8, 0, 0, 1, 60, 0, 0, 1, 4)
+
+#define DEV25G_MAC_ENA_CFG_RX_ENA                BIT(4)
+#define DEV25G_MAC_ENA_CFG_RX_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ENA_CFG_RX_ENA, x)
+#define DEV25G_MAC_ENA_CFG_RX_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ENA_CFG_RX_ENA, x)
+
+#define DEV25G_MAC_ENA_CFG_TX_ENA                BIT(0)
+#define DEV25G_MAC_ENA_CFG_TX_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ENA_CFG_TX_ENA, x)
+#define DEV25G_MAC_ENA_CFG_TX_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ENA_CFG_TX_ENA, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_MAXLEN_CFG */
+#define DEV25G_MAC_MAXLEN_CFG(t)  __REG(TARGET_DEV25G, t, 8, 0, 0, 1, 60, 8, 0, 1, 4)
+
+#define DEV25G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK    BIT(16)
+#define DEV25G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK_SET(x)\
+	FIELD_PREP(DEV25G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK, x)
+#define DEV25G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK_GET(x)\
+	FIELD_GET(DEV25G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK, x)
+
+#define DEV25G_MAC_MAXLEN_CFG_MAX_LEN            GENMASK(15, 0)
+#define DEV25G_MAC_MAXLEN_CFG_MAX_LEN_SET(x)\
+	FIELD_PREP(DEV25G_MAC_MAXLEN_CFG_MAX_LEN, x)
+#define DEV25G_MAC_MAXLEN_CFG_MAX_LEN_GET(x)\
+	FIELD_GET(DEV25G_MAC_MAXLEN_CFG_MAX_LEN, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_ADV_CHK_CFG */
+#define DEV25G_MAC_ADV_CHK_CFG(t) __REG(TARGET_DEV25G, t, 8, 0, 0, 1, 60, 28, 0, 1, 4)
+
+#define DEV25G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA   BIT(24)
+#define DEV25G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA, x)
+#define DEV25G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA, x)
+
+#define DEV25G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA   BIT(20)
+#define DEV25G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA, x)
+#define DEV25G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA, x)
+
+#define DEV25G_MAC_ADV_CHK_CFG_SFD_CHK_ENA       BIT(16)
+#define DEV25G_MAC_ADV_CHK_CFG_SFD_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ADV_CHK_CFG_SFD_CHK_ENA, x)
+#define DEV25G_MAC_ADV_CHK_CFG_SFD_CHK_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ADV_CHK_CFG_SFD_CHK_ENA, x)
+
+#define DEV25G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS   BIT(12)
+#define DEV25G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS, x)
+#define DEV25G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS_GET(x)\
+	FIELD_GET(DEV25G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS, x)
+
+#define DEV25G_MAC_ADV_CHK_CFG_PRM_CHK_ENA       BIT(8)
+#define DEV25G_MAC_ADV_CHK_CFG_PRM_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ADV_CHK_CFG_PRM_CHK_ENA, x)
+#define DEV25G_MAC_ADV_CHK_CFG_PRM_CHK_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ADV_CHK_CFG_PRM_CHK_ENA, x)
+
+#define DEV25G_MAC_ADV_CHK_CFG_OOR_ERR_ENA       BIT(4)
+#define DEV25G_MAC_ADV_CHK_CFG_OOR_ERR_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ADV_CHK_CFG_OOR_ERR_ENA, x)
+#define DEV25G_MAC_ADV_CHK_CFG_OOR_ERR_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ADV_CHK_CFG_OOR_ERR_ENA, x)
+
+#define DEV25G_MAC_ADV_CHK_CFG_INR_ERR_ENA       BIT(0)
+#define DEV25G_MAC_ADV_CHK_CFG_INR_ERR_ENA_SET(x)\
+	FIELD_PREP(DEV25G_MAC_ADV_CHK_CFG_INR_ERR_ENA, x)
+#define DEV25G_MAC_ADV_CHK_CFG_INR_ERR_ENA_GET(x)\
+	FIELD_GET(DEV25G_MAC_ADV_CHK_CFG_INR_ERR_ENA, x)
+
+/*      DEV10G:DEV_CFG_STATUS:DEV_RST_CTRL */
+#define DEV25G_DEV_RST_CTRL(t)    __REG(TARGET_DEV25G, t, 8, 436, 0, 1, 52, 0, 0, 1, 4)
+
+#define DEV25G_DEV_RST_CTRL_PARDET_MODE_ENA      BIT(28)
+#define DEV25G_DEV_RST_CTRL_PARDET_MODE_ENA_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_PARDET_MODE_ENA, x)
+#define DEV25G_DEV_RST_CTRL_PARDET_MODE_ENA_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_PARDET_MODE_ENA, x)
+
+#define DEV25G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS BIT(27)
+#define DEV25G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+#define DEV25G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+
+#define DEV25G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS GENMASK(26, 25)
+#define DEV25G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS, x)
+#define DEV25G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS, x)
+
+#define DEV25G_DEV_RST_CTRL_SERDES_SPEED_SEL     GENMASK(24, 23)
+#define DEV25G_DEV_RST_CTRL_SERDES_SPEED_SEL_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_SERDES_SPEED_SEL, x)
+#define DEV25G_DEV_RST_CTRL_SERDES_SPEED_SEL_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_SERDES_SPEED_SEL, x)
+
+#define DEV25G_DEV_RST_CTRL_SPEED_SEL            GENMASK(22, 20)
+#define DEV25G_DEV_RST_CTRL_SPEED_SEL_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_SPEED_SEL, x)
+#define DEV25G_DEV_RST_CTRL_SPEED_SEL_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_SPEED_SEL, x)
+
+#define DEV25G_DEV_RST_CTRL_PCS_TX_RST           BIT(12)
+#define DEV25G_DEV_RST_CTRL_PCS_TX_RST_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_PCS_TX_RST, x)
+#define DEV25G_DEV_RST_CTRL_PCS_TX_RST_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_PCS_TX_RST, x)
+
+#define DEV25G_DEV_RST_CTRL_PCS_RX_RST           BIT(8)
+#define DEV25G_DEV_RST_CTRL_PCS_RX_RST_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_PCS_RX_RST, x)
+#define DEV25G_DEV_RST_CTRL_PCS_RX_RST_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_PCS_RX_RST, x)
+
+#define DEV25G_DEV_RST_CTRL_MAC_TX_RST           BIT(4)
+#define DEV25G_DEV_RST_CTRL_MAC_TX_RST_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_MAC_TX_RST, x)
+#define DEV25G_DEV_RST_CTRL_MAC_TX_RST_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_MAC_TX_RST, x)
+
+#define DEV25G_DEV_RST_CTRL_MAC_RX_RST           BIT(0)
+#define DEV25G_DEV_RST_CTRL_MAC_RX_RST_SET(x)\
+	FIELD_PREP(DEV25G_DEV_RST_CTRL_MAC_RX_RST, x)
+#define DEV25G_DEV_RST_CTRL_MAC_RX_RST_GET(x)\
+	FIELD_GET(DEV25G_DEV_RST_CTRL_MAC_RX_RST, x)
+
+/*      DEV10G:PCS25G_CFG_STATUS:PCS25G_CFG */
+#define DEV25G_PCS25G_CFG(t)      __REG(TARGET_DEV25G, t, 8, 488, 0, 1, 32, 0, 0, 1, 4)
+
+#define DEV25G_PCS25G_CFG_PCS25G_ENA             BIT(0)
+#define DEV25G_PCS25G_CFG_PCS25G_ENA_SET(x)\
+	FIELD_PREP(DEV25G_PCS25G_CFG_PCS25G_ENA, x)
+#define DEV25G_PCS25G_CFG_PCS25G_ENA_GET(x)\
+	FIELD_GET(DEV25G_PCS25G_CFG_PCS25G_ENA, x)
+
+/*      DEV10G:PCS25G_CFG_STATUS:PCS25G_SD_CFG */
+#define DEV25G_PCS25G_SD_CFG(t)   __REG(TARGET_DEV25G, t, 8, 488, 0, 1, 32, 4, 0, 1, 4)
+
+#define DEV25G_PCS25G_SD_CFG_SD_SEL              BIT(8)
+#define DEV25G_PCS25G_SD_CFG_SD_SEL_SET(x)\
+	FIELD_PREP(DEV25G_PCS25G_SD_CFG_SD_SEL, x)
+#define DEV25G_PCS25G_SD_CFG_SD_SEL_GET(x)\
+	FIELD_GET(DEV25G_PCS25G_SD_CFG_SD_SEL, x)
+
+#define DEV25G_PCS25G_SD_CFG_SD_POL              BIT(4)
+#define DEV25G_PCS25G_SD_CFG_SD_POL_SET(x)\
+	FIELD_PREP(DEV25G_PCS25G_SD_CFG_SD_POL, x)
+#define DEV25G_PCS25G_SD_CFG_SD_POL_GET(x)\
+	FIELD_GET(DEV25G_PCS25G_SD_CFG_SD_POL, x)
+
+#define DEV25G_PCS25G_SD_CFG_SD_ENA              BIT(0)
+#define DEV25G_PCS25G_SD_CFG_SD_ENA_SET(x)\
+	FIELD_PREP(DEV25G_PCS25G_SD_CFG_SD_ENA, x)
+#define DEV25G_PCS25G_SD_CFG_SD_ENA_GET(x)\
+	FIELD_GET(DEV25G_PCS25G_SD_CFG_SD_ENA, x)
+
+/*      DEV1G:DEV_CFG_STATUS:DEV_RST_CTRL */
+#define DEV2G5_DEV_RST_CTRL(t)    __REG(TARGET_DEV2G5, t, 65, 0, 0, 1, 36, 0, 0, 1, 4)
+
+#define DEV2G5_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS BIT(23)
+#define DEV2G5_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+#define DEV2G5_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+
+#define DEV2G5_DEV_RST_CTRL_SPEED_SEL            GENMASK(22, 20)
+#define DEV2G5_DEV_RST_CTRL_SPEED_SEL_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_SPEED_SEL, x)
+#define DEV2G5_DEV_RST_CTRL_SPEED_SEL_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_SPEED_SEL, x)
+
+#define DEV2G5_DEV_RST_CTRL_USX_PCS_TX_RST       BIT(17)
+#define DEV2G5_DEV_RST_CTRL_USX_PCS_TX_RST_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_USX_PCS_TX_RST, x)
+#define DEV2G5_DEV_RST_CTRL_USX_PCS_TX_RST_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_USX_PCS_TX_RST, x)
+
+#define DEV2G5_DEV_RST_CTRL_USX_PCS_RX_RST       BIT(16)
+#define DEV2G5_DEV_RST_CTRL_USX_PCS_RX_RST_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_USX_PCS_RX_RST, x)
+#define DEV2G5_DEV_RST_CTRL_USX_PCS_RX_RST_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_USX_PCS_RX_RST, x)
+
+#define DEV2G5_DEV_RST_CTRL_PCS_TX_RST           BIT(12)
+#define DEV2G5_DEV_RST_CTRL_PCS_TX_RST_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_PCS_TX_RST, x)
+#define DEV2G5_DEV_RST_CTRL_PCS_TX_RST_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_PCS_TX_RST, x)
+
+#define DEV2G5_DEV_RST_CTRL_PCS_RX_RST           BIT(8)
+#define DEV2G5_DEV_RST_CTRL_PCS_RX_RST_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_PCS_RX_RST, x)
+#define DEV2G5_DEV_RST_CTRL_PCS_RX_RST_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_PCS_RX_RST, x)
+
+#define DEV2G5_DEV_RST_CTRL_MAC_TX_RST           BIT(4)
+#define DEV2G5_DEV_RST_CTRL_MAC_TX_RST_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_MAC_TX_RST, x)
+#define DEV2G5_DEV_RST_CTRL_MAC_TX_RST_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_MAC_TX_RST, x)
+
+#define DEV2G5_DEV_RST_CTRL_MAC_RX_RST           BIT(0)
+#define DEV2G5_DEV_RST_CTRL_MAC_RX_RST_SET(x)\
+	FIELD_PREP(DEV2G5_DEV_RST_CTRL_MAC_RX_RST, x)
+#define DEV2G5_DEV_RST_CTRL_MAC_RX_RST_GET(x)\
+	FIELD_GET(DEV2G5_DEV_RST_CTRL_MAC_RX_RST, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_ENA_CFG */
+#define DEV2G5_MAC_ENA_CFG(t)     __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 0, 0, 1, 4)
+
+#define DEV2G5_MAC_ENA_CFG_RX_ENA                BIT(4)
+#define DEV2G5_MAC_ENA_CFG_RX_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_ENA_CFG_RX_ENA, x)
+#define DEV2G5_MAC_ENA_CFG_RX_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_ENA_CFG_RX_ENA, x)
+
+#define DEV2G5_MAC_ENA_CFG_TX_ENA                BIT(0)
+#define DEV2G5_MAC_ENA_CFG_TX_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_ENA_CFG_TX_ENA, x)
+#define DEV2G5_MAC_ENA_CFG_TX_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_ENA_CFG_TX_ENA, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_MODE_CFG */
+#define DEV2G5_MAC_MODE_CFG(t)    __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 4, 0, 1, 4)
+
+#define DEV2G5_MAC_MODE_CFG_FC_WORD_SYNC_ENA     BIT(8)
+#define DEV2G5_MAC_MODE_CFG_FC_WORD_SYNC_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_MODE_CFG_FC_WORD_SYNC_ENA, x)
+#define DEV2G5_MAC_MODE_CFG_FC_WORD_SYNC_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_MODE_CFG_FC_WORD_SYNC_ENA, x)
+
+#define DEV2G5_MAC_MODE_CFG_GIGA_MODE_ENA        BIT(4)
+#define DEV2G5_MAC_MODE_CFG_GIGA_MODE_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_MODE_CFG_GIGA_MODE_ENA, x)
+#define DEV2G5_MAC_MODE_CFG_GIGA_MODE_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_MODE_CFG_GIGA_MODE_ENA, x)
+
+#define DEV2G5_MAC_MODE_CFG_FDX_ENA              BIT(0)
+#define DEV2G5_MAC_MODE_CFG_FDX_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_MODE_CFG_FDX_ENA, x)
+#define DEV2G5_MAC_MODE_CFG_FDX_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_MODE_CFG_FDX_ENA, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_MAXLEN_CFG */
+#define DEV2G5_MAC_MAXLEN_CFG(t)  __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 8, 0, 1, 4)
+
+#define DEV2G5_MAC_MAXLEN_CFG_MAX_LEN            GENMASK(15, 0)
+#define DEV2G5_MAC_MAXLEN_CFG_MAX_LEN_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_MAXLEN_CFG_MAX_LEN, x)
+#define DEV2G5_MAC_MAXLEN_CFG_MAX_LEN_GET(x)\
+	FIELD_GET(DEV2G5_MAC_MAXLEN_CFG_MAX_LEN, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_TAGS_CFG */
+#define DEV2G5_MAC_TAGS_CFG(t)    __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 12, 0, 1, 4)
+
+#define DEV2G5_MAC_TAGS_CFG_TAG_ID               GENMASK(31, 16)
+#define DEV2G5_MAC_TAGS_CFG_TAG_ID_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_TAGS_CFG_TAG_ID, x)
+#define DEV2G5_MAC_TAGS_CFG_TAG_ID_GET(x)\
+	FIELD_GET(DEV2G5_MAC_TAGS_CFG_TAG_ID, x)
+
+#define DEV2G5_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA     BIT(3)
+#define DEV2G5_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA, x)
+#define DEV2G5_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA, x)
+
+#define DEV2G5_MAC_TAGS_CFG_PB_ENA               GENMASK(2, 1)
+#define DEV2G5_MAC_TAGS_CFG_PB_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_TAGS_CFG_PB_ENA, x)
+#define DEV2G5_MAC_TAGS_CFG_PB_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_TAGS_CFG_PB_ENA, x)
+
+#define DEV2G5_MAC_TAGS_CFG_VLAN_AWR_ENA         BIT(0)
+#define DEV2G5_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_TAGS_CFG_VLAN_AWR_ENA, x)
+#define DEV2G5_MAC_TAGS_CFG_VLAN_AWR_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_TAGS_CFG_VLAN_AWR_ENA, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_TAGS_CFG2 */
+#define DEV2G5_MAC_TAGS_CFG2(t)   __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 16, 0, 1, 4)
+
+#define DEV2G5_MAC_TAGS_CFG2_TAG_ID3             GENMASK(31, 16)
+#define DEV2G5_MAC_TAGS_CFG2_TAG_ID3_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_TAGS_CFG2_TAG_ID3, x)
+#define DEV2G5_MAC_TAGS_CFG2_TAG_ID3_GET(x)\
+	FIELD_GET(DEV2G5_MAC_TAGS_CFG2_TAG_ID3, x)
+
+#define DEV2G5_MAC_TAGS_CFG2_TAG_ID2             GENMASK(15, 0)
+#define DEV2G5_MAC_TAGS_CFG2_TAG_ID2_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_TAGS_CFG2_TAG_ID2, x)
+#define DEV2G5_MAC_TAGS_CFG2_TAG_ID2_GET(x)\
+	FIELD_GET(DEV2G5_MAC_TAGS_CFG2_TAG_ID2, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_ADV_CHK_CFG */
+#define DEV2G5_MAC_ADV_CHK_CFG(t) __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 20, 0, 1, 4)
+
+#define DEV2G5_MAC_ADV_CHK_CFG_LEN_DROP_ENA      BIT(0)
+#define DEV2G5_MAC_ADV_CHK_CFG_LEN_DROP_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_ADV_CHK_CFG_LEN_DROP_ENA, x)
+#define DEV2G5_MAC_ADV_CHK_CFG_LEN_DROP_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_ADV_CHK_CFG_LEN_DROP_ENA, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_IFG_CFG */
+#define DEV2G5_MAC_IFG_CFG(t)     __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 24, 0, 1, 4)
+
+#define DEV2G5_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK BIT(17)
+#define DEV2G5_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK, x)
+#define DEV2G5_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK_GET(x)\
+	FIELD_GET(DEV2G5_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK, x)
+
+#define DEV2G5_MAC_IFG_CFG_TX_IFG                GENMASK(12, 8)
+#define DEV2G5_MAC_IFG_CFG_TX_IFG_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_IFG_CFG_TX_IFG, x)
+#define DEV2G5_MAC_IFG_CFG_TX_IFG_GET(x)\
+	FIELD_GET(DEV2G5_MAC_IFG_CFG_TX_IFG, x)
+
+#define DEV2G5_MAC_IFG_CFG_RX_IFG2               GENMASK(7, 4)
+#define DEV2G5_MAC_IFG_CFG_RX_IFG2_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_IFG_CFG_RX_IFG2, x)
+#define DEV2G5_MAC_IFG_CFG_RX_IFG2_GET(x)\
+	FIELD_GET(DEV2G5_MAC_IFG_CFG_RX_IFG2, x)
+
+#define DEV2G5_MAC_IFG_CFG_RX_IFG1               GENMASK(3, 0)
+#define DEV2G5_MAC_IFG_CFG_RX_IFG1_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_IFG_CFG_RX_IFG1, x)
+#define DEV2G5_MAC_IFG_CFG_RX_IFG1_GET(x)\
+	FIELD_GET(DEV2G5_MAC_IFG_CFG_RX_IFG1, x)
+
+/*      DEV1G:MAC_CFG_STATUS:MAC_HDX_CFG */
+#define DEV2G5_MAC_HDX_CFG(t)     __REG(TARGET_DEV2G5, t, 65, 52, 0, 1, 36, 28, 0, 1, 4)
+
+#define DEV2G5_MAC_HDX_CFG_BYPASS_COL_SYNC       BIT(26)
+#define DEV2G5_MAC_HDX_CFG_BYPASS_COL_SYNC_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_HDX_CFG_BYPASS_COL_SYNC, x)
+#define DEV2G5_MAC_HDX_CFG_BYPASS_COL_SYNC_GET(x)\
+	FIELD_GET(DEV2G5_MAC_HDX_CFG_BYPASS_COL_SYNC, x)
+
+#define DEV2G5_MAC_HDX_CFG_SEED                  GENMASK(23, 16)
+#define DEV2G5_MAC_HDX_CFG_SEED_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_HDX_CFG_SEED, x)
+#define DEV2G5_MAC_HDX_CFG_SEED_GET(x)\
+	FIELD_GET(DEV2G5_MAC_HDX_CFG_SEED, x)
+
+#define DEV2G5_MAC_HDX_CFG_SEED_LOAD             BIT(12)
+#define DEV2G5_MAC_HDX_CFG_SEED_LOAD_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_HDX_CFG_SEED_LOAD, x)
+#define DEV2G5_MAC_HDX_CFG_SEED_LOAD_GET(x)\
+	FIELD_GET(DEV2G5_MAC_HDX_CFG_SEED_LOAD, x)
+
+#define DEV2G5_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA BIT(8)
+#define DEV2G5_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA, x)
+#define DEV2G5_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA_GET(x)\
+	FIELD_GET(DEV2G5_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA, x)
+
+#define DEV2G5_MAC_HDX_CFG_LATE_COL_POS          GENMASK(6, 0)
+#define DEV2G5_MAC_HDX_CFG_LATE_COL_POS_SET(x)\
+	FIELD_PREP(DEV2G5_MAC_HDX_CFG_LATE_COL_POS, x)
+#define DEV2G5_MAC_HDX_CFG_LATE_COL_POS_GET(x)\
+	FIELD_GET(DEV2G5_MAC_HDX_CFG_LATE_COL_POS, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_CFG */
+#define DEV2G5_PCS1G_CFG(t)       __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 0, 0, 1, 4)
+
+#define DEV2G5_PCS1G_CFG_LINK_STATUS_TYPE        BIT(4)
+#define DEV2G5_PCS1G_CFG_LINK_STATUS_TYPE_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_CFG_LINK_STATUS_TYPE, x)
+#define DEV2G5_PCS1G_CFG_LINK_STATUS_TYPE_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_CFG_LINK_STATUS_TYPE, x)
+
+#define DEV2G5_PCS1G_CFG_AN_LINK_CTRL_ENA        BIT(1)
+#define DEV2G5_PCS1G_CFG_AN_LINK_CTRL_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_CFG_AN_LINK_CTRL_ENA, x)
+#define DEV2G5_PCS1G_CFG_AN_LINK_CTRL_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_CFG_AN_LINK_CTRL_ENA, x)
+
+#define DEV2G5_PCS1G_CFG_PCS_ENA                 BIT(0)
+#define DEV2G5_PCS1G_CFG_PCS_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_CFG_PCS_ENA, x)
+#define DEV2G5_PCS1G_CFG_PCS_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_CFG_PCS_ENA, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_MODE_CFG */
+#define DEV2G5_PCS1G_MODE_CFG(t)  __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 4, 0, 1, 4)
+
+#define DEV2G5_PCS1G_MODE_CFG_UNIDIR_MODE_ENA    BIT(4)
+#define DEV2G5_PCS1G_MODE_CFG_UNIDIR_MODE_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_MODE_CFG_UNIDIR_MODE_ENA, x)
+#define DEV2G5_PCS1G_MODE_CFG_UNIDIR_MODE_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_MODE_CFG_UNIDIR_MODE_ENA, x)
+
+#define DEV2G5_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA  BIT(1)
+#define DEV2G5_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA, x)
+#define DEV2G5_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_MODE_CFG_SAVE_PREAMBLE_ENA, x)
+
+#define DEV2G5_PCS1G_MODE_CFG_SGMII_MODE_ENA     BIT(0)
+#define DEV2G5_PCS1G_MODE_CFG_SGMII_MODE_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_MODE_CFG_SGMII_MODE_ENA, x)
+#define DEV2G5_PCS1G_MODE_CFG_SGMII_MODE_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_MODE_CFG_SGMII_MODE_ENA, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_SD_CFG */
+#define DEV2G5_PCS1G_SD_CFG(t)    __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 8, 0, 1, 4)
+
+#define DEV2G5_PCS1G_SD_CFG_SD_SEL               BIT(8)
+#define DEV2G5_PCS1G_SD_CFG_SD_SEL_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_SD_CFG_SD_SEL, x)
+#define DEV2G5_PCS1G_SD_CFG_SD_SEL_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_SD_CFG_SD_SEL, x)
+
+#define DEV2G5_PCS1G_SD_CFG_SD_POL               BIT(4)
+#define DEV2G5_PCS1G_SD_CFG_SD_POL_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_SD_CFG_SD_POL, x)
+#define DEV2G5_PCS1G_SD_CFG_SD_POL_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_SD_CFG_SD_POL, x)
+
+#define DEV2G5_PCS1G_SD_CFG_SD_ENA               BIT(0)
+#define DEV2G5_PCS1G_SD_CFG_SD_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_SD_CFG_SD_ENA, x)
+#define DEV2G5_PCS1G_SD_CFG_SD_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_SD_CFG_SD_ENA, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_ANEG_CFG */
+#define DEV2G5_PCS1G_ANEG_CFG(t)  __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 12, 0, 1, 4)
+
+#define DEV2G5_PCS1G_ANEG_CFG_ADV_ABILITY        GENMASK(31, 16)
+#define DEV2G5_PCS1G_ANEG_CFG_ADV_ABILITY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_CFG_ADV_ABILITY, x)
+#define DEV2G5_PCS1G_ANEG_CFG_ADV_ABILITY_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_CFG_ADV_ABILITY, x)
+
+#define DEV2G5_PCS1G_ANEG_CFG_SW_RESOLVE_ENA     BIT(8)
+#define DEV2G5_PCS1G_ANEG_CFG_SW_RESOLVE_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_CFG_SW_RESOLVE_ENA, x)
+#define DEV2G5_PCS1G_ANEG_CFG_SW_RESOLVE_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_CFG_SW_RESOLVE_ENA, x)
+
+#define DEV2G5_PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT BIT(1)
+#define DEV2G5_PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT, x)
+#define DEV2G5_PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT, x)
+
+#define DEV2G5_PCS1G_ANEG_CFG_ANEG_ENA           BIT(0)
+#define DEV2G5_PCS1G_ANEG_CFG_ANEG_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_CFG_ANEG_ENA, x)
+#define DEV2G5_PCS1G_ANEG_CFG_ANEG_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_CFG_ANEG_ENA, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_LB_CFG */
+#define DEV2G5_PCS1G_LB_CFG(t)    __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 20, 0, 1, 4)
+
+#define DEV2G5_PCS1G_LB_CFG_RA_ENA               BIT(4)
+#define DEV2G5_PCS1G_LB_CFG_RA_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_LB_CFG_RA_ENA, x)
+#define DEV2G5_PCS1G_LB_CFG_RA_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_LB_CFG_RA_ENA, x)
+
+#define DEV2G5_PCS1G_LB_CFG_GMII_PHY_LB_ENA      BIT(1)
+#define DEV2G5_PCS1G_LB_CFG_GMII_PHY_LB_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_LB_CFG_GMII_PHY_LB_ENA, x)
+#define DEV2G5_PCS1G_LB_CFG_GMII_PHY_LB_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_LB_CFG_GMII_PHY_LB_ENA, x)
+
+#define DEV2G5_PCS1G_LB_CFG_TBI_HOST_LB_ENA      BIT(0)
+#define DEV2G5_PCS1G_LB_CFG_TBI_HOST_LB_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_LB_CFG_TBI_HOST_LB_ENA, x)
+#define DEV2G5_PCS1G_LB_CFG_TBI_HOST_LB_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_LB_CFG_TBI_HOST_LB_ENA, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_ANEG_STATUS */
+#define DEV2G5_PCS1G_ANEG_STATUS(t) __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 32, 0, 1, 4)
+
+#define DEV2G5_PCS1G_ANEG_STATUS_LP_ADV_ABILITY  GENMASK(31, 16)
+#define DEV2G5_PCS1G_ANEG_STATUS_LP_ADV_ABILITY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_STATUS_LP_ADV_ABILITY, x)
+#define DEV2G5_PCS1G_ANEG_STATUS_LP_ADV_ABILITY_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_STATUS_LP_ADV_ABILITY, x)
+
+#define DEV2G5_PCS1G_ANEG_STATUS_PR              BIT(4)
+#define DEV2G5_PCS1G_ANEG_STATUS_PR_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_STATUS_PR, x)
+#define DEV2G5_PCS1G_ANEG_STATUS_PR_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_STATUS_PR, x)
+
+#define DEV2G5_PCS1G_ANEG_STATUS_PAGE_RX_STICKY  BIT(3)
+#define DEV2G5_PCS1G_ANEG_STATUS_PAGE_RX_STICKY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_STATUS_PAGE_RX_STICKY, x)
+#define DEV2G5_PCS1G_ANEG_STATUS_PAGE_RX_STICKY_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_STATUS_PAGE_RX_STICKY, x)
+
+#define DEV2G5_PCS1G_ANEG_STATUS_ANEG_COMPLETE   BIT(0)
+#define DEV2G5_PCS1G_ANEG_STATUS_ANEG_COMPLETE_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_ANEG_STATUS_ANEG_COMPLETE, x)
+#define DEV2G5_PCS1G_ANEG_STATUS_ANEG_COMPLETE_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_ANEG_STATUS_ANEG_COMPLETE, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_LINK_STATUS */
+#define DEV2G5_PCS1G_LINK_STATUS(t) __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 40, 0, 1, 4)
+
+#define DEV2G5_PCS1G_LINK_STATUS_DELAY_VAR       GENMASK(15, 12)
+#define DEV2G5_PCS1G_LINK_STATUS_DELAY_VAR_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_LINK_STATUS_DELAY_VAR, x)
+#define DEV2G5_PCS1G_LINK_STATUS_DELAY_VAR_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_LINK_STATUS_DELAY_VAR, x)
+
+#define DEV2G5_PCS1G_LINK_STATUS_SIGNAL_DETECT   BIT(8)
+#define DEV2G5_PCS1G_LINK_STATUS_SIGNAL_DETECT_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_LINK_STATUS_SIGNAL_DETECT, x)
+#define DEV2G5_PCS1G_LINK_STATUS_SIGNAL_DETECT_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_LINK_STATUS_SIGNAL_DETECT, x)
+
+#define DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS     BIT(4)
+#define DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS, x)
+#define DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS, x)
+
+#define DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS     BIT(0)
+#define DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS, x)
+#define DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS, x)
+
+/*      DEV1G:PCS1G_CFG_STATUS:PCS1G_STICKY */
+#define DEV2G5_PCS1G_STICKY(t)    __REG(TARGET_DEV2G5, t, 65, 88, 0, 1, 68, 48, 0, 1, 4)
+
+#define DEV2G5_PCS1G_STICKY_LINK_DOWN_STICKY     BIT(4)
+#define DEV2G5_PCS1G_STICKY_LINK_DOWN_STICKY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_STICKY_LINK_DOWN_STICKY, x)
+#define DEV2G5_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_STICKY_LINK_DOWN_STICKY, x)
+
+#define DEV2G5_PCS1G_STICKY_OUT_OF_SYNC_STICKY   BIT(0)
+#define DEV2G5_PCS1G_STICKY_OUT_OF_SYNC_STICKY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS1G_STICKY_OUT_OF_SYNC_STICKY, x)
+#define DEV2G5_PCS1G_STICKY_OUT_OF_SYNC_STICKY_GET(x)\
+	FIELD_GET(DEV2G5_PCS1G_STICKY_OUT_OF_SYNC_STICKY, x)
+
+/*      DEV1G:PCS_FX100_CONFIGURATION:PCS_FX100_CFG */
+#define DEV2G5_PCS_FX100_CFG(t)   __REG(TARGET_DEV2G5, t, 65, 164, 0, 1, 4, 0, 0, 1, 4)
+
+#define DEV2G5_PCS_FX100_CFG_SD_SEL              BIT(26)
+#define DEV2G5_PCS_FX100_CFG_SD_SEL_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_SD_SEL, x)
+#define DEV2G5_PCS_FX100_CFG_SD_SEL_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_SD_SEL, x)
+
+#define DEV2G5_PCS_FX100_CFG_SD_POL              BIT(25)
+#define DEV2G5_PCS_FX100_CFG_SD_POL_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_SD_POL, x)
+#define DEV2G5_PCS_FX100_CFG_SD_POL_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_SD_POL, x)
+
+#define DEV2G5_PCS_FX100_CFG_SD_ENA              BIT(24)
+#define DEV2G5_PCS_FX100_CFG_SD_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_SD_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_SD_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_SD_ENA, x)
+
+#define DEV2G5_PCS_FX100_CFG_LOOPBACK_ENA        BIT(20)
+#define DEV2G5_PCS_FX100_CFG_LOOPBACK_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_LOOPBACK_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_LOOPBACK_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_LOOPBACK_ENA, x)
+
+#define DEV2G5_PCS_FX100_CFG_SWAP_MII_ENA        BIT(16)
+#define DEV2G5_PCS_FX100_CFG_SWAP_MII_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_SWAP_MII_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_SWAP_MII_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_SWAP_MII_ENA, x)
+
+#define DEV2G5_PCS_FX100_CFG_RXBITSEL            GENMASK(15, 12)
+#define DEV2G5_PCS_FX100_CFG_RXBITSEL_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_RXBITSEL, x)
+#define DEV2G5_PCS_FX100_CFG_RXBITSEL_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_RXBITSEL, x)
+
+#define DEV2G5_PCS_FX100_CFG_SIGDET_CFG          GENMASK(10, 9)
+#define DEV2G5_PCS_FX100_CFG_SIGDET_CFG_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_SIGDET_CFG, x)
+#define DEV2G5_PCS_FX100_CFG_SIGDET_CFG_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_SIGDET_CFG, x)
+
+#define DEV2G5_PCS_FX100_CFG_LINKHYST_TM_ENA     BIT(8)
+#define DEV2G5_PCS_FX100_CFG_LINKHYST_TM_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_LINKHYST_TM_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_LINKHYST_TM_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_LINKHYST_TM_ENA, x)
+
+#define DEV2G5_PCS_FX100_CFG_LINKHYSTTIMER       GENMASK(7, 4)
+#define DEV2G5_PCS_FX100_CFG_LINKHYSTTIMER_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_LINKHYSTTIMER, x)
+#define DEV2G5_PCS_FX100_CFG_LINKHYSTTIMER_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_LINKHYSTTIMER, x)
+
+#define DEV2G5_PCS_FX100_CFG_UNIDIR_MODE_ENA     BIT(3)
+#define DEV2G5_PCS_FX100_CFG_UNIDIR_MODE_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_UNIDIR_MODE_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_UNIDIR_MODE_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_UNIDIR_MODE_ENA, x)
+
+#define DEV2G5_PCS_FX100_CFG_FEFCHK_ENA          BIT(2)
+#define DEV2G5_PCS_FX100_CFG_FEFCHK_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_FEFCHK_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_FEFCHK_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_FEFCHK_ENA, x)
+
+#define DEV2G5_PCS_FX100_CFG_FEFGEN_ENA          BIT(1)
+#define DEV2G5_PCS_FX100_CFG_FEFGEN_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_FEFGEN_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_FEFGEN_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_FEFGEN_ENA, x)
+
+#define DEV2G5_PCS_FX100_CFG_PCS_ENA             BIT(0)
+#define DEV2G5_PCS_FX100_CFG_PCS_ENA_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_CFG_PCS_ENA, x)
+#define DEV2G5_PCS_FX100_CFG_PCS_ENA_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_CFG_PCS_ENA, x)
+
+/*      DEV1G:PCS_FX100_STATUS:PCS_FX100_STATUS */
+#define DEV2G5_PCS_FX100_STATUS(t) __REG(TARGET_DEV2G5, t, 65, 168, 0, 1, 4, 0, 0, 1, 4)
+
+#define DEV2G5_PCS_FX100_STATUS_EDGE_POS_PTP     GENMASK(11, 8)
+#define DEV2G5_PCS_FX100_STATUS_EDGE_POS_PTP_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_EDGE_POS_PTP, x)
+#define DEV2G5_PCS_FX100_STATUS_EDGE_POS_PTP_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_EDGE_POS_PTP, x)
+
+#define DEV2G5_PCS_FX100_STATUS_PCS_ERROR_STICKY BIT(7)
+#define DEV2G5_PCS_FX100_STATUS_PCS_ERROR_STICKY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_PCS_ERROR_STICKY, x)
+#define DEV2G5_PCS_FX100_STATUS_PCS_ERROR_STICKY_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_PCS_ERROR_STICKY, x)
+
+#define DEV2G5_PCS_FX100_STATUS_FEF_FOUND_STICKY BIT(6)
+#define DEV2G5_PCS_FX100_STATUS_FEF_FOUND_STICKY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_FEF_FOUND_STICKY, x)
+#define DEV2G5_PCS_FX100_STATUS_FEF_FOUND_STICKY_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_FEF_FOUND_STICKY, x)
+
+#define DEV2G5_PCS_FX100_STATUS_SSD_ERROR_STICKY BIT(5)
+#define DEV2G5_PCS_FX100_STATUS_SSD_ERROR_STICKY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_SSD_ERROR_STICKY, x)
+#define DEV2G5_PCS_FX100_STATUS_SSD_ERROR_STICKY_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_SSD_ERROR_STICKY, x)
+
+#define DEV2G5_PCS_FX100_STATUS_SYNC_LOST_STICKY BIT(4)
+#define DEV2G5_PCS_FX100_STATUS_SYNC_LOST_STICKY_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_SYNC_LOST_STICKY, x)
+#define DEV2G5_PCS_FX100_STATUS_SYNC_LOST_STICKY_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_SYNC_LOST_STICKY, x)
+
+#define DEV2G5_PCS_FX100_STATUS_FEF_STATUS       BIT(2)
+#define DEV2G5_PCS_FX100_STATUS_FEF_STATUS_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_FEF_STATUS, x)
+#define DEV2G5_PCS_FX100_STATUS_FEF_STATUS_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_FEF_STATUS, x)
+
+#define DEV2G5_PCS_FX100_STATUS_SIGNAL_DETECT    BIT(1)
+#define DEV2G5_PCS_FX100_STATUS_SIGNAL_DETECT_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_SIGNAL_DETECT, x)
+#define DEV2G5_PCS_FX100_STATUS_SIGNAL_DETECT_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_SIGNAL_DETECT, x)
+
+#define DEV2G5_PCS_FX100_STATUS_SYNC_STATUS      BIT(0)
+#define DEV2G5_PCS_FX100_STATUS_SYNC_STATUS_SET(x)\
+	FIELD_PREP(DEV2G5_PCS_FX100_STATUS_SYNC_STATUS, x)
+#define DEV2G5_PCS_FX100_STATUS_SYNC_STATUS_GET(x)\
+	FIELD_GET(DEV2G5_PCS_FX100_STATUS_SYNC_STATUS, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_ENA_CFG */
+#define DEV5G_MAC_ENA_CFG(t)      __REG(TARGET_DEV5G, t, 13, 0, 0, 1, 60, 0, 0, 1, 4)
+
+#define DEV5G_MAC_ENA_CFG_RX_ENA                 BIT(4)
+#define DEV5G_MAC_ENA_CFG_RX_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ENA_CFG_RX_ENA, x)
+#define DEV5G_MAC_ENA_CFG_RX_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ENA_CFG_RX_ENA, x)
+
+#define DEV5G_MAC_ENA_CFG_TX_ENA                 BIT(0)
+#define DEV5G_MAC_ENA_CFG_TX_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ENA_CFG_TX_ENA, x)
+#define DEV5G_MAC_ENA_CFG_TX_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ENA_CFG_TX_ENA, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_MAXLEN_CFG */
+#define DEV5G_MAC_MAXLEN_CFG(t)   __REG(TARGET_DEV5G, t, 13, 0, 0, 1, 60, 8, 0, 1, 4)
+
+#define DEV5G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK     BIT(16)
+#define DEV5G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK_SET(x)\
+	FIELD_PREP(DEV5G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK, x)
+#define DEV5G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK_GET(x)\
+	FIELD_GET(DEV5G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK, x)
+
+#define DEV5G_MAC_MAXLEN_CFG_MAX_LEN             GENMASK(15, 0)
+#define DEV5G_MAC_MAXLEN_CFG_MAX_LEN_SET(x)\
+	FIELD_PREP(DEV5G_MAC_MAXLEN_CFG_MAX_LEN, x)
+#define DEV5G_MAC_MAXLEN_CFG_MAX_LEN_GET(x)\
+	FIELD_GET(DEV5G_MAC_MAXLEN_CFG_MAX_LEN, x)
+
+/*      DEV10G:MAC_CFG_STATUS:MAC_ADV_CHK_CFG */
+#define DEV5G_MAC_ADV_CHK_CFG(t)  __REG(TARGET_DEV5G, t, 13, 0, 0, 1, 60, 28, 0, 1, 4)
+
+#define DEV5G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA    BIT(24)
+#define DEV5G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA, x)
+#define DEV5G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ADV_CHK_CFG_EXT_EOP_CHK_ENA, x)
+
+#define DEV5G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA    BIT(20)
+#define DEV5G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA, x)
+#define DEV5G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ADV_CHK_CFG_EXT_SOP_CHK_ENA, x)
+
+#define DEV5G_MAC_ADV_CHK_CFG_SFD_CHK_ENA        BIT(16)
+#define DEV5G_MAC_ADV_CHK_CFG_SFD_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ADV_CHK_CFG_SFD_CHK_ENA, x)
+#define DEV5G_MAC_ADV_CHK_CFG_SFD_CHK_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ADV_CHK_CFG_SFD_CHK_ENA, x)
+
+#define DEV5G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS    BIT(12)
+#define DEV5G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS, x)
+#define DEV5G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS_GET(x)\
+	FIELD_GET(DEV5G_MAC_ADV_CHK_CFG_PRM_SHK_CHK_DIS, x)
+
+#define DEV5G_MAC_ADV_CHK_CFG_PRM_CHK_ENA        BIT(8)
+#define DEV5G_MAC_ADV_CHK_CFG_PRM_CHK_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ADV_CHK_CFG_PRM_CHK_ENA, x)
+#define DEV5G_MAC_ADV_CHK_CFG_PRM_CHK_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ADV_CHK_CFG_PRM_CHK_ENA, x)
+
+#define DEV5G_MAC_ADV_CHK_CFG_OOR_ERR_ENA        BIT(4)
+#define DEV5G_MAC_ADV_CHK_CFG_OOR_ERR_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ADV_CHK_CFG_OOR_ERR_ENA, x)
+#define DEV5G_MAC_ADV_CHK_CFG_OOR_ERR_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ADV_CHK_CFG_OOR_ERR_ENA, x)
+
+#define DEV5G_MAC_ADV_CHK_CFG_INR_ERR_ENA        BIT(0)
+#define DEV5G_MAC_ADV_CHK_CFG_INR_ERR_ENA_SET(x)\
+	FIELD_PREP(DEV5G_MAC_ADV_CHK_CFG_INR_ERR_ENA, x)
+#define DEV5G_MAC_ADV_CHK_CFG_INR_ERR_ENA_GET(x)\
+	FIELD_GET(DEV5G_MAC_ADV_CHK_CFG_INR_ERR_ENA, x)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SYMBOL_ERR_CNT */
+#define DEV5G_RX_SYMBOL_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 0, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_PAUSE_CNT */
+#define DEV5G_RX_PAUSE_CNT(t)     __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 4, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_UNSUP_OPCODE_CNT */
+#define DEV5G_RX_UNSUP_OPCODE_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 8, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_UC_CNT */
+#define DEV5G_RX_UC_CNT(t)        __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 12, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_MC_CNT */
+#define DEV5G_RX_MC_CNT(t)        __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 16, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_BC_CNT */
+#define DEV5G_RX_BC_CNT(t)        __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 20, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_CRC_ERR_CNT */
+#define DEV5G_RX_CRC_ERR_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 24, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_UNDERSIZE_CNT */
+#define DEV5G_RX_UNDERSIZE_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 28, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_FRAGMENTS_CNT */
+#define DEV5G_RX_FRAGMENTS_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 32, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_IN_RANGE_LEN_ERR_CNT */
+#define DEV5G_RX_IN_RANGE_LEN_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 36, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_OUT_OF_RANGE_LEN_ERR_CNT */
+#define DEV5G_RX_OUT_OF_RANGE_LEN_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 40, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_OVERSIZE_CNT */
+#define DEV5G_RX_OVERSIZE_CNT(t)  __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 44, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_JABBERS_CNT */
+#define DEV5G_RX_JABBERS_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 48, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SIZE64_CNT */
+#define DEV5G_RX_SIZE64_CNT(t)    __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 52, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SIZE65TO127_CNT */
+#define DEV5G_RX_SIZE65TO127_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 56, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SIZE128TO255_CNT */
+#define DEV5G_RX_SIZE128TO255_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 60, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SIZE256TO511_CNT */
+#define DEV5G_RX_SIZE256TO511_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 64, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SIZE512TO1023_CNT */
+#define DEV5G_RX_SIZE512TO1023_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 68, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SIZE1024TO1518_CNT */
+#define DEV5G_RX_SIZE1024TO1518_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 72, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_SIZE1519TOMAX_CNT */
+#define DEV5G_RX_SIZE1519TOMAX_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 76, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_IPG_SHRINK_CNT */
+#define DEV5G_RX_IPG_SHRINK_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 80, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_PAUSE_CNT */
+#define DEV5G_TX_PAUSE_CNT(t)     __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 84, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_UC_CNT */
+#define DEV5G_TX_UC_CNT(t)        __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 88, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_MC_CNT */
+#define DEV5G_TX_MC_CNT(t)        __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 92, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_BC_CNT */
+#define DEV5G_TX_BC_CNT(t)        __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 96, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_SIZE64_CNT */
+#define DEV5G_TX_SIZE64_CNT(t)    __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 100, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_SIZE65TO127_CNT */
+#define DEV5G_TX_SIZE65TO127_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 104, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_SIZE128TO255_CNT */
+#define DEV5G_TX_SIZE128TO255_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 108, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_SIZE256TO511_CNT */
+#define DEV5G_TX_SIZE256TO511_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 112, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_SIZE512TO1023_CNT */
+#define DEV5G_TX_SIZE512TO1023_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 116, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_SIZE1024TO1518_CNT */
+#define DEV5G_TX_SIZE1024TO1518_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 120, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_SIZE1519TOMAX_CNT */
+#define DEV5G_TX_SIZE1519TOMAX_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 124, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_ALIGNMENT_LOST_CNT */
+#define DEV5G_RX_ALIGNMENT_LOST_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 128, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_TAGGED_FRMS_CNT */
+#define DEV5G_RX_TAGGED_FRMS_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 132, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_UNTAGGED_FRMS_CNT */
+#define DEV5G_RX_UNTAGGED_FRMS_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 136, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_TAGGED_FRMS_CNT */
+#define DEV5G_TX_TAGGED_FRMS_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 140, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:TX_UNTAGGED_FRMS_CNT */
+#define DEV5G_TX_UNTAGGED_FRMS_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 144, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SYMBOL_ERR_CNT */
+#define DEV5G_PMAC_RX_SYMBOL_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 148, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_PAUSE_CNT */
+#define DEV5G_PMAC_RX_PAUSE_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 152, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_UNSUP_OPCODE_CNT */
+#define DEV5G_PMAC_RX_UNSUP_OPCODE_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 156, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_UC_CNT */
+#define DEV5G_PMAC_RX_UC_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 160, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_MC_CNT */
+#define DEV5G_PMAC_RX_MC_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 164, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_BC_CNT */
+#define DEV5G_PMAC_RX_BC_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 168, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_CRC_ERR_CNT */
+#define DEV5G_PMAC_RX_CRC_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 172, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_UNDERSIZE_CNT */
+#define DEV5G_PMAC_RX_UNDERSIZE_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 176, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_FRAGMENTS_CNT */
+#define DEV5G_PMAC_RX_FRAGMENTS_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 180, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_IN_RANGE_LEN_ERR_CNT */
+#define DEV5G_PMAC_RX_IN_RANGE_LEN_ERR_CNT(t) __REG(TARGET_DEV5G,\
+					t, 13, 60, 0, 1, 312, 184, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_OUT_OF_RANGE_LEN_ERR_CNT */
+#define DEV5G_PMAC_RX_OUT_OF_RANGE_LEN_ERR_CNT(t) __REG(TARGET_DEV5G,\
+					t, 13, 60, 0, 1, 312, 188, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_OVERSIZE_CNT */
+#define DEV5G_PMAC_RX_OVERSIZE_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 192, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_JABBERS_CNT */
+#define DEV5G_PMAC_RX_JABBERS_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 196, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SIZE64_CNT */
+#define DEV5G_PMAC_RX_SIZE64_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 200, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SIZE65TO127_CNT */
+#define DEV5G_PMAC_RX_SIZE65TO127_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 204, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SIZE128TO255_CNT */
+#define DEV5G_PMAC_RX_SIZE128TO255_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 208, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SIZE256TO511_CNT */
+#define DEV5G_PMAC_RX_SIZE256TO511_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 212, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SIZE512TO1023_CNT */
+#define DEV5G_PMAC_RX_SIZE512TO1023_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 216, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SIZE1024TO1518_CNT */
+#define DEV5G_PMAC_RX_SIZE1024TO1518_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 220, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_SIZE1519TOMAX_CNT */
+#define DEV5G_PMAC_RX_SIZE1519TOMAX_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 224, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_PAUSE_CNT */
+#define DEV5G_PMAC_TX_PAUSE_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 228, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_UC_CNT */
+#define DEV5G_PMAC_TX_UC_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 232, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_MC_CNT */
+#define DEV5G_PMAC_TX_MC_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 236, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_BC_CNT */
+#define DEV5G_PMAC_TX_BC_CNT(t)   __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 240, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_SIZE64_CNT */
+#define DEV5G_PMAC_TX_SIZE64_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 244, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_SIZE65TO127_CNT */
+#define DEV5G_PMAC_TX_SIZE65TO127_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 248, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_SIZE128TO255_CNT */
+#define DEV5G_PMAC_TX_SIZE128TO255_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 252, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_SIZE256TO511_CNT */
+#define DEV5G_PMAC_TX_SIZE256TO511_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 256, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_SIZE512TO1023_CNT */
+#define DEV5G_PMAC_TX_SIZE512TO1023_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 260, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_SIZE1024TO1518_CNT */
+#define DEV5G_PMAC_TX_SIZE1024TO1518_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 264, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_TX_SIZE1519TOMAX_CNT */
+#define DEV5G_PMAC_TX_SIZE1519TOMAX_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 268, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_ALIGNMENT_LOST_CNT */
+#define DEV5G_PMAC_RX_ALIGNMENT_LOST_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 272, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:MM_RX_ASSEMBLY_ERR_CNT */
+#define DEV5G_MM_RX_ASSEMBLY_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 276, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:MM_RX_SMD_ERR_CNT */
+#define DEV5G_MM_RX_SMD_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 280, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:MM_RX_ASSEMBLY_OK_CNT */
+#define DEV5G_MM_RX_ASSEMBLY_OK_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 284, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:MM_RX_MERGE_FRAG_CNT */
+#define DEV5G_MM_RX_MERGE_FRAG_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 288, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:MM_TX_PFRAGMENT_CNT */
+#define DEV5G_MM_TX_PFRAGMENT_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 292, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_HIH_CKSM_ERR_CNT */
+#define DEV5G_RX_HIH_CKSM_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 296, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:RX_XGMII_PROT_ERR_CNT */
+#define DEV5G_RX_XGMII_PROT_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 300, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_HIH_CKSM_ERR_CNT */
+#define DEV5G_PMAC_RX_HIH_CKSM_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 304, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_32BIT:PMAC_RX_XGMII_PROT_ERR_CNT */
+#define DEV5G_PMAC_RX_XGMII_PROT_ERR_CNT(t) __REG(TARGET_DEV5G, t, 13, 60, 0, 1, 312, 308, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:RX_IN_BYTES_CNT */
+#define DEV5G_RX_IN_BYTES_CNT(t)  __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 0, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:RX_IN_BYTES_MSB_CNT */
+#define DEV5G_RX_IN_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 4, 0, 1, 4)
+
+#define DEV5G_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT, x)
+#define DEV5G_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_RX_IN_BYTES_MSB_CNT_RX_IN_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:RX_OK_BYTES_CNT */
+#define DEV5G_RX_OK_BYTES_CNT(t)  __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 8, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:RX_OK_BYTES_MSB_CNT */
+#define DEV5G_RX_OK_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 12, 0, 1, 4)
+
+#define DEV5G_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT, x)
+#define DEV5G_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_RX_OK_BYTES_MSB_CNT_RX_OK_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:RX_BAD_BYTES_CNT */
+#define DEV5G_RX_BAD_BYTES_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 16, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:RX_BAD_BYTES_MSB_CNT */
+#define DEV5G_RX_BAD_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 20, 0, 1, 4)
+
+#define DEV5G_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT, x)
+#define DEV5G_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_RX_BAD_BYTES_MSB_CNT_RX_BAD_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:TX_OUT_BYTES_CNT */
+#define DEV5G_TX_OUT_BYTES_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 24, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:TX_OUT_BYTES_MSB_CNT */
+#define DEV5G_TX_OUT_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 28, 0, 1, 4)
+
+#define DEV5G_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT, x)
+#define DEV5G_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_TX_OUT_BYTES_MSB_CNT_TX_OUT_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:TX_OK_BYTES_CNT */
+#define DEV5G_TX_OK_BYTES_CNT(t)  __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 32, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:TX_OK_BYTES_MSB_CNT */
+#define DEV5G_TX_OK_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 36, 0, 1, 4)
+
+#define DEV5G_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT, x)
+#define DEV5G_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_TX_OK_BYTES_MSB_CNT_TX_OK_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:PMAC_RX_OK_BYTES_CNT */
+#define DEV5G_PMAC_RX_OK_BYTES_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 40, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:PMAC_RX_OK_BYTES_MSB_CNT */
+#define DEV5G_PMAC_RX_OK_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 44, 0, 1, 4)
+
+#define DEV5G_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT, x)
+#define DEV5G_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_PMAC_RX_OK_BYTES_MSB_CNT_PMAC_RX_OK_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:PMAC_RX_BAD_BYTES_CNT */
+#define DEV5G_PMAC_RX_BAD_BYTES_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 48, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:PMAC_RX_BAD_BYTES_MSB_CNT */
+#define DEV5G_PMAC_RX_BAD_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 52, 0, 1, 4)
+
+#define DEV5G_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT, x)
+#define DEV5G_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_PMAC_RX_BAD_BYTES_MSB_CNT_PMAC_RX_BAD_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:PMAC_TX_OK_BYTES_CNT */
+#define DEV5G_PMAC_TX_OK_BYTES_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 56, 0, 1, 4)
+
+/*      DEV10G:DEV_STATISTICS_40BIT:PMAC_TX_OK_BYTES_MSB_CNT */
+#define DEV5G_PMAC_TX_OK_BYTES_MSB_CNT(t) __REG(TARGET_DEV5G, t, 13, 372, 0, 1, 64, 60, 0, 1, 4)
+
+#define DEV5G_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT GENMASK(7, 0)
+#define DEV5G_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT_SET(x)\
+	FIELD_PREP(DEV5G_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT, x)
+#define DEV5G_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT_GET(x)\
+	FIELD_GET(DEV5G_PMAC_TX_OK_BYTES_MSB_CNT_PMAC_TX_OK_BYTES_MSB_CNT, x)
+
+/*      DEV10G:DEV_CFG_STATUS:DEV_RST_CTRL */
+#define DEV5G_DEV_RST_CTRL(t)     __REG(TARGET_DEV5G, t, 13, 436, 0, 1, 52, 0, 0, 1, 4)
+
+#define DEV5G_DEV_RST_CTRL_PARDET_MODE_ENA       BIT(28)
+#define DEV5G_DEV_RST_CTRL_PARDET_MODE_ENA_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_PARDET_MODE_ENA, x)
+#define DEV5G_DEV_RST_CTRL_PARDET_MODE_ENA_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_PARDET_MODE_ENA, x)
+
+#define DEV5G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS BIT(27)
+#define DEV5G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+#define DEV5G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_USXGMII_OSET_FILTER_DIS, x)
+
+#define DEV5G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS GENMASK(26, 25)
+#define DEV5G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS, x)
+#define DEV5G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_MUXED_USXGMII_NETWORK_PORTS, x)
+
+#define DEV5G_DEV_RST_CTRL_SERDES_SPEED_SEL      GENMASK(24, 23)
+#define DEV5G_DEV_RST_CTRL_SERDES_SPEED_SEL_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_SERDES_SPEED_SEL, x)
+#define DEV5G_DEV_RST_CTRL_SERDES_SPEED_SEL_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_SERDES_SPEED_SEL, x)
+
+#define DEV5G_DEV_RST_CTRL_SPEED_SEL             GENMASK(22, 20)
+#define DEV5G_DEV_RST_CTRL_SPEED_SEL_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_SPEED_SEL, x)
+#define DEV5G_DEV_RST_CTRL_SPEED_SEL_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_SPEED_SEL, x)
+
+#define DEV5G_DEV_RST_CTRL_PCS_TX_RST            BIT(12)
+#define DEV5G_DEV_RST_CTRL_PCS_TX_RST_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_PCS_TX_RST, x)
+#define DEV5G_DEV_RST_CTRL_PCS_TX_RST_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_PCS_TX_RST, x)
+
+#define DEV5G_DEV_RST_CTRL_PCS_RX_RST            BIT(8)
+#define DEV5G_DEV_RST_CTRL_PCS_RX_RST_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_PCS_RX_RST, x)
+#define DEV5G_DEV_RST_CTRL_PCS_RX_RST_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_PCS_RX_RST, x)
+
+#define DEV5G_DEV_RST_CTRL_MAC_TX_RST            BIT(4)
+#define DEV5G_DEV_RST_CTRL_MAC_TX_RST_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_MAC_TX_RST, x)
+#define DEV5G_DEV_RST_CTRL_MAC_TX_RST_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_MAC_TX_RST, x)
+
+#define DEV5G_DEV_RST_CTRL_MAC_RX_RST            BIT(0)
+#define DEV5G_DEV_RST_CTRL_MAC_RX_RST_SET(x)\
+	FIELD_PREP(DEV5G_DEV_RST_CTRL_MAC_RX_RST, x)
+#define DEV5G_DEV_RST_CTRL_MAC_RX_RST_GET(x)\
+	FIELD_GET(DEV5G_DEV_RST_CTRL_MAC_RX_RST, x)
+
+/*      DSM:RAM_CTRL:RAM_INIT */
+#define DSM_RAM_INIT              __REG(TARGET_DSM, 0, 1, 0, 0, 1, 4, 0, 0, 1, 4)
+
+#define DSM_RAM_INIT_RAM_INIT                    BIT(1)
+#define DSM_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(DSM_RAM_INIT_RAM_INIT, x)
+#define DSM_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(DSM_RAM_INIT_RAM_INIT, x)
+
+#define DSM_RAM_INIT_RAM_CFG_HOOK                BIT(0)
+#define DSM_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(DSM_RAM_INIT_RAM_CFG_HOOK, x)
+#define DSM_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(DSM_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      DSM:CFG:BUF_CFG */
+#define DSM_BUF_CFG(r)            __REG(TARGET_DSM, 0, 1, 20, 0, 1, 3528, 0, r, 67, 4)
+
+#define DSM_BUF_CFG_CSC_STAT_DIS                 BIT(13)
+#define DSM_BUF_CFG_CSC_STAT_DIS_SET(x)\
+	FIELD_PREP(DSM_BUF_CFG_CSC_STAT_DIS, x)
+#define DSM_BUF_CFG_CSC_STAT_DIS_GET(x)\
+	FIELD_GET(DSM_BUF_CFG_CSC_STAT_DIS, x)
+
+#define DSM_BUF_CFG_AGING_ENA                    BIT(12)
+#define DSM_BUF_CFG_AGING_ENA_SET(x)\
+	FIELD_PREP(DSM_BUF_CFG_AGING_ENA, x)
+#define DSM_BUF_CFG_AGING_ENA_GET(x)\
+	FIELD_GET(DSM_BUF_CFG_AGING_ENA, x)
+
+#define DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS       BIT(11)
+#define DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS_SET(x)\
+	FIELD_PREP(DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS, x)
+#define DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS_GET(x)\
+	FIELD_GET(DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS, x)
+
+#define DSM_BUF_CFG_UNDERFLOW_WATCHDOG_TIMEOUT   GENMASK(10, 0)
+#define DSM_BUF_CFG_UNDERFLOW_WATCHDOG_TIMEOUT_SET(x)\
+	FIELD_PREP(DSM_BUF_CFG_UNDERFLOW_WATCHDOG_TIMEOUT, x)
+#define DSM_BUF_CFG_UNDERFLOW_WATCHDOG_TIMEOUT_GET(x)\
+	FIELD_GET(DSM_BUF_CFG_UNDERFLOW_WATCHDOG_TIMEOUT, x)
+
+/*      DSM:CFG:DEV_TX_STOP_WM_CFG */
+#define DSM_DEV_TX_STOP_WM_CFG(r) __REG(TARGET_DSM, 0, 1, 20, 0, 1, 3528, 1360, r, 67, 4)
+
+#define DSM_DEV_TX_STOP_WM_CFG_FAST_STARTUP_ENA  BIT(9)
+#define DSM_DEV_TX_STOP_WM_CFG_FAST_STARTUP_ENA_SET(x)\
+	FIELD_PREP(DSM_DEV_TX_STOP_WM_CFG_FAST_STARTUP_ENA, x)
+#define DSM_DEV_TX_STOP_WM_CFG_FAST_STARTUP_ENA_GET(x)\
+	FIELD_GET(DSM_DEV_TX_STOP_WM_CFG_FAST_STARTUP_ENA, x)
+
+#define DSM_DEV_TX_STOP_WM_CFG_DEV10G_SHADOW_ENA BIT(8)
+#define DSM_DEV_TX_STOP_WM_CFG_DEV10G_SHADOW_ENA_SET(x)\
+	FIELD_PREP(DSM_DEV_TX_STOP_WM_CFG_DEV10G_SHADOW_ENA, x)
+#define DSM_DEV_TX_STOP_WM_CFG_DEV10G_SHADOW_ENA_GET(x)\
+	FIELD_GET(DSM_DEV_TX_STOP_WM_CFG_DEV10G_SHADOW_ENA, x)
+
+#define DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM    GENMASK(7, 1)
+#define DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM_SET(x)\
+	FIELD_PREP(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM, x)
+#define DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM_GET(x)\
+	FIELD_GET(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM, x)
+
+#define DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR    BIT(0)
+#define DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR_SET(x)\
+	FIELD_PREP(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR, x)
+#define DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR_GET(x)\
+	FIELD_GET(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR, x)
+
+/*      DSM:CFG:RX_PAUSE_CFG */
+#define DSM_RX_PAUSE_CFG(r)       __REG(TARGET_DSM, 0, 1, 20, 0, 1, 3528, 1628, r, 67, 4)
+
+#define DSM_RX_PAUSE_CFG_RX_PAUSE_EN             BIT(1)
+#define DSM_RX_PAUSE_CFG_RX_PAUSE_EN_SET(x)\
+	FIELD_PREP(DSM_RX_PAUSE_CFG_RX_PAUSE_EN, x)
+#define DSM_RX_PAUSE_CFG_RX_PAUSE_EN_GET(x)\
+	FIELD_GET(DSM_RX_PAUSE_CFG_RX_PAUSE_EN, x)
+
+#define DSM_RX_PAUSE_CFG_FC_OBEY_LOCAL           BIT(0)
+#define DSM_RX_PAUSE_CFG_FC_OBEY_LOCAL_SET(x)\
+	FIELD_PREP(DSM_RX_PAUSE_CFG_FC_OBEY_LOCAL, x)
+#define DSM_RX_PAUSE_CFG_FC_OBEY_LOCAL_GET(x)\
+	FIELD_GET(DSM_RX_PAUSE_CFG_FC_OBEY_LOCAL, x)
+
+/*      DSM:CFG:MAC_CFG */
+#define DSM_MAC_CFG(r)            __REG(TARGET_DSM, 0, 1, 20, 0, 1, 3528, 2432, r, 67, 4)
+
+#define DSM_MAC_CFG_TX_PAUSE_VAL                 GENMASK(31, 16)
+#define DSM_MAC_CFG_TX_PAUSE_VAL_SET(x)\
+	FIELD_PREP(DSM_MAC_CFG_TX_PAUSE_VAL, x)
+#define DSM_MAC_CFG_TX_PAUSE_VAL_GET(x)\
+	FIELD_GET(DSM_MAC_CFG_TX_PAUSE_VAL, x)
+
+#define DSM_MAC_CFG_HDX_BACKPREASSURE            BIT(2)
+#define DSM_MAC_CFG_HDX_BACKPREASSURE_SET(x)\
+	FIELD_PREP(DSM_MAC_CFG_HDX_BACKPREASSURE, x)
+#define DSM_MAC_CFG_HDX_BACKPREASSURE_GET(x)\
+	FIELD_GET(DSM_MAC_CFG_HDX_BACKPREASSURE, x)
+
+#define DSM_MAC_CFG_SEND_PAUSE_FRM_TWICE         BIT(1)
+#define DSM_MAC_CFG_SEND_PAUSE_FRM_TWICE_SET(x)\
+	FIELD_PREP(DSM_MAC_CFG_SEND_PAUSE_FRM_TWICE, x)
+#define DSM_MAC_CFG_SEND_PAUSE_FRM_TWICE_GET(x)\
+	FIELD_GET(DSM_MAC_CFG_SEND_PAUSE_FRM_TWICE, x)
+
+#define DSM_MAC_CFG_TX_PAUSE_XON_XOFF            BIT(0)
+#define DSM_MAC_CFG_TX_PAUSE_XON_XOFF_SET(x)\
+	FIELD_PREP(DSM_MAC_CFG_TX_PAUSE_XON_XOFF, x)
+#define DSM_MAC_CFG_TX_PAUSE_XON_XOFF_GET(x)\
+	FIELD_GET(DSM_MAC_CFG_TX_PAUSE_XON_XOFF, x)
+
+/*      DSM:CFG:MAC_ADDR_BASE_HIGH_CFG */
+#define DSM_MAC_ADDR_BASE_HIGH_CFG(r) __REG(TARGET_DSM, 0, 1, 20, 0, 1, 3528, 2700, r, 65, 4)
+
+#define DSM_MAC_ADDR_BASE_HIGH_CFG_MAC_ADDR_HIGH GENMASK(23, 0)
+#define DSM_MAC_ADDR_BASE_HIGH_CFG_MAC_ADDR_HIGH_SET(x)\
+	FIELD_PREP(DSM_MAC_ADDR_BASE_HIGH_CFG_MAC_ADDR_HIGH, x)
+#define DSM_MAC_ADDR_BASE_HIGH_CFG_MAC_ADDR_HIGH_GET(x)\
+	FIELD_GET(DSM_MAC_ADDR_BASE_HIGH_CFG_MAC_ADDR_HIGH, x)
+
+/*      DSM:CFG:MAC_ADDR_BASE_LOW_CFG */
+#define DSM_MAC_ADDR_BASE_LOW_CFG(r) __REG(TARGET_DSM, 0, 1, 20, 0, 1, 3528, 2960, r, 65, 4)
+
+#define DSM_MAC_ADDR_BASE_LOW_CFG_MAC_ADDR_LOW   GENMASK(23, 0)
+#define DSM_MAC_ADDR_BASE_LOW_CFG_MAC_ADDR_LOW_SET(x)\
+	FIELD_PREP(DSM_MAC_ADDR_BASE_LOW_CFG_MAC_ADDR_LOW, x)
+#define DSM_MAC_ADDR_BASE_LOW_CFG_MAC_ADDR_LOW_GET(x)\
+	FIELD_GET(DSM_MAC_ADDR_BASE_LOW_CFG_MAC_ADDR_LOW, x)
+
+/*      DSM:CFG:TAXI_CAL_CFG */
+#define DSM_TAXI_CAL_CFG(r)       __REG(TARGET_DSM, 0, 1, 20, 0, 1, 3528, 3224, r, 9, 4)
+
+#define DSM_TAXI_CAL_CFG_CAL_IDX                 GENMASK(20, 15)
+#define DSM_TAXI_CAL_CFG_CAL_IDX_SET(x)\
+	FIELD_PREP(DSM_TAXI_CAL_CFG_CAL_IDX, x)
+#define DSM_TAXI_CAL_CFG_CAL_IDX_GET(x)\
+	FIELD_GET(DSM_TAXI_CAL_CFG_CAL_IDX, x)
+
+#define DSM_TAXI_CAL_CFG_CAL_CUR_LEN             GENMASK(14, 9)
+#define DSM_TAXI_CAL_CFG_CAL_CUR_LEN_SET(x)\
+	FIELD_PREP(DSM_TAXI_CAL_CFG_CAL_CUR_LEN, x)
+#define DSM_TAXI_CAL_CFG_CAL_CUR_LEN_GET(x)\
+	FIELD_GET(DSM_TAXI_CAL_CFG_CAL_CUR_LEN, x)
+
+#define DSM_TAXI_CAL_CFG_CAL_CUR_VAL             GENMASK(8, 5)
+#define DSM_TAXI_CAL_CFG_CAL_CUR_VAL_SET(x)\
+	FIELD_PREP(DSM_TAXI_CAL_CFG_CAL_CUR_VAL, x)
+#define DSM_TAXI_CAL_CFG_CAL_CUR_VAL_GET(x)\
+	FIELD_GET(DSM_TAXI_CAL_CFG_CAL_CUR_VAL, x)
+
+#define DSM_TAXI_CAL_CFG_CAL_PGM_VAL             GENMASK(4, 1)
+#define DSM_TAXI_CAL_CFG_CAL_PGM_VAL_SET(x)\
+	FIELD_PREP(DSM_TAXI_CAL_CFG_CAL_PGM_VAL, x)
+#define DSM_TAXI_CAL_CFG_CAL_PGM_VAL_GET(x)\
+	FIELD_GET(DSM_TAXI_CAL_CFG_CAL_PGM_VAL, x)
+
+#define DSM_TAXI_CAL_CFG_CAL_PGM_ENA             BIT(0)
+#define DSM_TAXI_CAL_CFG_CAL_PGM_ENA_SET(x)\
+	FIELD_PREP(DSM_TAXI_CAL_CFG_CAL_PGM_ENA, x)
+#define DSM_TAXI_CAL_CFG_CAL_PGM_ENA_GET(x)\
+	FIELD_GET(DSM_TAXI_CAL_CFG_CAL_PGM_ENA, x)
+
+/*      EACL:POL_CFG:POL_EACL_CFG */
+#define EACL_POL_EACL_CFG         __REG(TARGET_EACL, 0, 1, 150608, 0, 1, 780, 768, 0, 1, 4)
+
+#define EACL_POL_EACL_CFG_EACL_CNT_MARKED_AS_DROPPED BIT(5)
+#define EACL_POL_EACL_CFG_EACL_CNT_MARKED_AS_DROPPED_SET(x)\
+	FIELD_PREP(EACL_POL_EACL_CFG_EACL_CNT_MARKED_AS_DROPPED, x)
+#define EACL_POL_EACL_CFG_EACL_CNT_MARKED_AS_DROPPED_GET(x)\
+	FIELD_GET(EACL_POL_EACL_CFG_EACL_CNT_MARKED_AS_DROPPED, x)
+
+#define EACL_POL_EACL_CFG_EACL_ALLOW_FP_COPY     BIT(4)
+#define EACL_POL_EACL_CFG_EACL_ALLOW_FP_COPY_SET(x)\
+	FIELD_PREP(EACL_POL_EACL_CFG_EACL_ALLOW_FP_COPY, x)
+#define EACL_POL_EACL_CFG_EACL_ALLOW_FP_COPY_GET(x)\
+	FIELD_GET(EACL_POL_EACL_CFG_EACL_ALLOW_FP_COPY, x)
+
+#define EACL_POL_EACL_CFG_EACL_ALLOW_CPU_COPY    BIT(3)
+#define EACL_POL_EACL_CFG_EACL_ALLOW_CPU_COPY_SET(x)\
+	FIELD_PREP(EACL_POL_EACL_CFG_EACL_ALLOW_CPU_COPY, x)
+#define EACL_POL_EACL_CFG_EACL_ALLOW_CPU_COPY_GET(x)\
+	FIELD_GET(EACL_POL_EACL_CFG_EACL_ALLOW_CPU_COPY, x)
+
+#define EACL_POL_EACL_CFG_EACL_FORCE_CLOSE       BIT(2)
+#define EACL_POL_EACL_CFG_EACL_FORCE_CLOSE_SET(x)\
+	FIELD_PREP(EACL_POL_EACL_CFG_EACL_FORCE_CLOSE, x)
+#define EACL_POL_EACL_CFG_EACL_FORCE_CLOSE_GET(x)\
+	FIELD_GET(EACL_POL_EACL_CFG_EACL_FORCE_CLOSE, x)
+
+#define EACL_POL_EACL_CFG_EACL_FORCE_OPEN        BIT(1)
+#define EACL_POL_EACL_CFG_EACL_FORCE_OPEN_SET(x)\
+	FIELD_PREP(EACL_POL_EACL_CFG_EACL_FORCE_OPEN, x)
+#define EACL_POL_EACL_CFG_EACL_FORCE_OPEN_GET(x)\
+	FIELD_GET(EACL_POL_EACL_CFG_EACL_FORCE_OPEN, x)
+
+#define EACL_POL_EACL_CFG_EACL_FORCE_INIT        BIT(0)
+#define EACL_POL_EACL_CFG_EACL_FORCE_INIT_SET(x)\
+	FIELD_PREP(EACL_POL_EACL_CFG_EACL_FORCE_INIT, x)
+#define EACL_POL_EACL_CFG_EACL_FORCE_INIT_GET(x)\
+	FIELD_GET(EACL_POL_EACL_CFG_EACL_FORCE_INIT, x)
+
+/*      EACL:RAM_CTRL:RAM_INIT */
+#define EACL_RAM_INIT             __REG(TARGET_EACL, 0, 1, 118736, 0, 1, 4, 0, 0, 1, 4)
+
+#define EACL_RAM_INIT_RAM_INIT                   BIT(1)
+#define EACL_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(EACL_RAM_INIT_RAM_INIT, x)
+#define EACL_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(EACL_RAM_INIT_RAM_INIT, x)
+
+#define EACL_RAM_INIT_RAM_CFG_HOOK               BIT(0)
+#define EACL_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(EACL_RAM_INIT_RAM_CFG_HOOK, x)
+#define EACL_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(EACL_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      FDMA:FDMA:FDMA_CH_ACTIVATE */
+#define FDMA_CH_ACTIVATE          __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 0, 0, 1, 4)
+
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE             GENMASK(7, 0)
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(x)\
+	FIELD_PREP(FDMA_CH_ACTIVATE_CH_ACTIVATE, x)
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE_GET(x)\
+	FIELD_GET(FDMA_CH_ACTIVATE_CH_ACTIVATE, x)
+
+/*      FDMA:FDMA:FDMA_CH_RELOAD */
+#define FDMA_CH_RELOAD            __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 4, 0, 1, 4)
+
+#define FDMA_CH_RELOAD_CH_RELOAD                 GENMASK(7, 0)
+#define FDMA_CH_RELOAD_CH_RELOAD_SET(x)\
+	FIELD_PREP(FDMA_CH_RELOAD_CH_RELOAD, x)
+#define FDMA_CH_RELOAD_CH_RELOAD_GET(x)\
+	FIELD_GET(FDMA_CH_RELOAD_CH_RELOAD, x)
+
+/*      FDMA:FDMA:FDMA_CH_DISABLE */
+#define FDMA_CH_DISABLE           __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 8, 0, 1, 4)
+
+#define FDMA_CH_DISABLE_CH_DISABLE               GENMASK(7, 0)
+#define FDMA_CH_DISABLE_CH_DISABLE_SET(x)\
+	FIELD_PREP(FDMA_CH_DISABLE_CH_DISABLE, x)
+#define FDMA_CH_DISABLE_CH_DISABLE_GET(x)\
+	FIELD_GET(FDMA_CH_DISABLE_CH_DISABLE, x)
+
+/*      FDMA:FDMA:FDMA_DCB_LLP */
+#define FDMA_DCB_LLP(r)           __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 52, r, 8, 4)
+
+/*      FDMA:FDMA:FDMA_DCB_LLP1 */
+#define FDMA_DCB_LLP1(r)          __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 84, r, 8, 4)
+
+/*      FDMA:FDMA:FDMA_DCB_LLP_PREV */
+#define FDMA_DCB_LLP_PREV(r)      __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 116, r, 8, 4)
+
+/*      FDMA:FDMA:FDMA_DCB_LLP_PREV1 */
+#define FDMA_DCB_LLP_PREV1(r)     __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 148, r, 8, 4)
+
+/*      FDMA:FDMA:FDMA_CH_CFG */
+#define FDMA_CH_CFG(r)            __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 224, r, 8, 4)
+
+#define FDMA_CH_CFG_CH_XTR_STATUS_MODE           BIT(7)
+#define FDMA_CH_CFG_CH_XTR_STATUS_MODE_SET(x)\
+	FIELD_PREP(FDMA_CH_CFG_CH_XTR_STATUS_MODE, x)
+#define FDMA_CH_CFG_CH_XTR_STATUS_MODE_GET(x)\
+	FIELD_GET(FDMA_CH_CFG_CH_XTR_STATUS_MODE, x)
+
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY          BIT(6)
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(x)\
+	FIELD_PREP(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x)
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_GET(x)\
+	FIELD_GET(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x)
+
+#define FDMA_CH_CFG_CH_INJ_PORT                  BIT(5)
+#define FDMA_CH_CFG_CH_INJ_PORT_SET(x)\
+	FIELD_PREP(FDMA_CH_CFG_CH_INJ_PORT, x)
+#define FDMA_CH_CFG_CH_INJ_PORT_GET(x)\
+	FIELD_GET(FDMA_CH_CFG_CH_INJ_PORT, x)
+
+#define FDMA_CH_CFG_CH_DCB_DB_CNT                GENMASK(4, 1)
+#define FDMA_CH_CFG_CH_DCB_DB_CNT_SET(x)\
+	FIELD_PREP(FDMA_CH_CFG_CH_DCB_DB_CNT, x)
+#define FDMA_CH_CFG_CH_DCB_DB_CNT_GET(x)\
+	FIELD_GET(FDMA_CH_CFG_CH_DCB_DB_CNT, x)
+
+#define FDMA_CH_CFG_CH_MEM                       BIT(0)
+#define FDMA_CH_CFG_CH_MEM_SET(x)\
+	FIELD_PREP(FDMA_CH_CFG_CH_MEM, x)
+#define FDMA_CH_CFG_CH_MEM_GET(x)\
+	FIELD_GET(FDMA_CH_CFG_CH_MEM, x)
+
+/*      FDMA:FDMA:FDMA_CH_TRANSLATE */
+#define FDMA_CH_TRANSLATE(r)      __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 256, r, 8, 4)
+
+#define FDMA_CH_TRANSLATE_OFFSET                 GENMASK(15, 0)
+#define FDMA_CH_TRANSLATE_OFFSET_SET(x)\
+	FIELD_PREP(FDMA_CH_TRANSLATE_OFFSET, x)
+#define FDMA_CH_TRANSLATE_OFFSET_GET(x)\
+	FIELD_GET(FDMA_CH_TRANSLATE_OFFSET, x)
+
+/*      FDMA:FDMA:FDMA_XTR_CFG */
+#define FDMA_XTR_CFG              __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 364, 0, 1, 4)
+
+#define FDMA_XTR_CFG_XTR_FIFO_WM                 GENMASK(15, 11)
+#define FDMA_XTR_CFG_XTR_FIFO_WM_SET(x)\
+	FIELD_PREP(FDMA_XTR_CFG_XTR_FIFO_WM, x)
+#define FDMA_XTR_CFG_XTR_FIFO_WM_GET(x)\
+	FIELD_GET(FDMA_XTR_CFG_XTR_FIFO_WM, x)
+
+#define FDMA_XTR_CFG_XTR_ARB_SAT                 GENMASK(10, 0)
+#define FDMA_XTR_CFG_XTR_ARB_SAT_SET(x)\
+	FIELD_PREP(FDMA_XTR_CFG_XTR_ARB_SAT, x)
+#define FDMA_XTR_CFG_XTR_ARB_SAT_GET(x)\
+	FIELD_GET(FDMA_XTR_CFG_XTR_ARB_SAT, x)
+
+/*      FDMA:FDMA:FDMA_PORT_CTRL */
+#define FDMA_PORT_CTRL(r)         __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 376, r, 2, 4)
+
+#define FDMA_PORT_CTRL_INJ_STOP                  BIT(4)
+#define FDMA_PORT_CTRL_INJ_STOP_SET(x)\
+	FIELD_PREP(FDMA_PORT_CTRL_INJ_STOP, x)
+#define FDMA_PORT_CTRL_INJ_STOP_GET(x)\
+	FIELD_GET(FDMA_PORT_CTRL_INJ_STOP, x)
+
+#define FDMA_PORT_CTRL_INJ_STOP_FORCE            BIT(3)
+#define FDMA_PORT_CTRL_INJ_STOP_FORCE_SET(x)\
+	FIELD_PREP(FDMA_PORT_CTRL_INJ_STOP_FORCE, x)
+#define FDMA_PORT_CTRL_INJ_STOP_FORCE_GET(x)\
+	FIELD_GET(FDMA_PORT_CTRL_INJ_STOP_FORCE, x)
+
+#define FDMA_PORT_CTRL_XTR_STOP                  BIT(2)
+#define FDMA_PORT_CTRL_XTR_STOP_SET(x)\
+	FIELD_PREP(FDMA_PORT_CTRL_XTR_STOP, x)
+#define FDMA_PORT_CTRL_XTR_STOP_GET(x)\
+	FIELD_GET(FDMA_PORT_CTRL_XTR_STOP, x)
+
+#define FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY          BIT(1)
+#define FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY_SET(x)\
+	FIELD_PREP(FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY, x)
+#define FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY_GET(x)\
+	FIELD_GET(FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY, x)
+
+#define FDMA_PORT_CTRL_XTR_BUF_RST               BIT(0)
+#define FDMA_PORT_CTRL_XTR_BUF_RST_SET(x)\
+	FIELD_PREP(FDMA_PORT_CTRL_XTR_BUF_RST, x)
+#define FDMA_PORT_CTRL_XTR_BUF_RST_GET(x)\
+	FIELD_GET(FDMA_PORT_CTRL_XTR_BUF_RST, x)
+
+/*      FDMA:FDMA:FDMA_INTR_DCB */
+#define FDMA_INTR_DCB             __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 384, 0, 1, 4)
+
+#define FDMA_INTR_DCB_INTR_DCB                   GENMASK(7, 0)
+#define FDMA_INTR_DCB_INTR_DCB_SET(x)\
+	FIELD_PREP(FDMA_INTR_DCB_INTR_DCB, x)
+#define FDMA_INTR_DCB_INTR_DCB_GET(x)\
+	FIELD_GET(FDMA_INTR_DCB_INTR_DCB, x)
+
+/*      FDMA:FDMA:FDMA_INTR_DCB_ENA */
+#define FDMA_INTR_DCB_ENA         __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 388, 0, 1, 4)
+
+#define FDMA_INTR_DCB_ENA_INTR_DCB_ENA           GENMASK(7, 0)
+#define FDMA_INTR_DCB_ENA_INTR_DCB_ENA_SET(x)\
+	FIELD_PREP(FDMA_INTR_DCB_ENA_INTR_DCB_ENA, x)
+#define FDMA_INTR_DCB_ENA_INTR_DCB_ENA_GET(x)\
+	FIELD_GET(FDMA_INTR_DCB_ENA_INTR_DCB_ENA, x)
+
+/*      FDMA:FDMA:FDMA_INTR_DB */
+#define FDMA_INTR_DB              __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 392, 0, 1, 4)
+
+#define FDMA_INTR_DB_INTR_DB                     GENMASK(7, 0)
+#define FDMA_INTR_DB_INTR_DB_SET(x)\
+	FIELD_PREP(FDMA_INTR_DB_INTR_DB, x)
+#define FDMA_INTR_DB_INTR_DB_GET(x)\
+	FIELD_GET(FDMA_INTR_DB_INTR_DB, x)
+
+/*      FDMA:FDMA:FDMA_INTR_DB_ENA */
+#define FDMA_INTR_DB_ENA          __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 396, 0, 1, 4)
+
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA             GENMASK(7, 0)
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(x)\
+	FIELD_PREP(FDMA_INTR_DB_ENA_INTR_DB_ENA, x)
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(x)\
+	FIELD_GET(FDMA_INTR_DB_ENA_INTR_DB_ENA, x)
+
+/*      FDMA:FDMA:FDMA_INTR_ERR */
+#define FDMA_INTR_ERR             __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 400, 0, 1, 4)
+
+#define FDMA_INTR_ERR_INTR_PORT_ERR              GENMASK(9, 8)
+#define FDMA_INTR_ERR_INTR_PORT_ERR_SET(x)\
+	FIELD_PREP(FDMA_INTR_ERR_INTR_PORT_ERR, x)
+#define FDMA_INTR_ERR_INTR_PORT_ERR_GET(x)\
+	FIELD_GET(FDMA_INTR_ERR_INTR_PORT_ERR, x)
+
+#define FDMA_INTR_ERR_INTR_CH_ERR                GENMASK(7, 0)
+#define FDMA_INTR_ERR_INTR_CH_ERR_SET(x)\
+	FIELD_PREP(FDMA_INTR_ERR_INTR_CH_ERR, x)
+#define FDMA_INTR_ERR_INTR_CH_ERR_GET(x)\
+	FIELD_GET(FDMA_INTR_ERR_INTR_CH_ERR, x)
+
+/*      FDMA:FDMA:FDMA_ERRORS */
+#define FDMA_ERRORS               __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 412, 0, 1, 4)
+
+#define FDMA_ERRORS_ERR_XTR_WR                   GENMASK(31, 30)
+#define FDMA_ERRORS_ERR_XTR_WR_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_XTR_WR, x)
+#define FDMA_ERRORS_ERR_XTR_WR_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_XTR_WR, x)
+
+#define FDMA_ERRORS_ERR_XTR_OVF                  GENMASK(29, 28)
+#define FDMA_ERRORS_ERR_XTR_OVF_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_XTR_OVF, x)
+#define FDMA_ERRORS_ERR_XTR_OVF_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_XTR_OVF, x)
+
+#define FDMA_ERRORS_ERR_XTR_TAXI32_OVF           GENMASK(27, 26)
+#define FDMA_ERRORS_ERR_XTR_TAXI32_OVF_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_XTR_TAXI32_OVF, x)
+#define FDMA_ERRORS_ERR_XTR_TAXI32_OVF_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_XTR_TAXI32_OVF, x)
+
+#define FDMA_ERRORS_ERR_DCB_XTR_DATAL            GENMASK(25, 24)
+#define FDMA_ERRORS_ERR_DCB_XTR_DATAL_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_DCB_XTR_DATAL, x)
+#define FDMA_ERRORS_ERR_DCB_XTR_DATAL_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_DCB_XTR_DATAL, x)
+
+#define FDMA_ERRORS_ERR_DCB_RD                   GENMASK(23, 16)
+#define FDMA_ERRORS_ERR_DCB_RD_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_DCB_RD, x)
+#define FDMA_ERRORS_ERR_DCB_RD_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_DCB_RD, x)
+
+#define FDMA_ERRORS_ERR_INJ_RD                   GENMASK(15, 10)
+#define FDMA_ERRORS_ERR_INJ_RD_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_INJ_RD, x)
+#define FDMA_ERRORS_ERR_INJ_RD_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_INJ_RD, x)
+
+#define FDMA_ERRORS_ERR_INJ_OUT_OF_SYNC          GENMASK(9, 8)
+#define FDMA_ERRORS_ERR_INJ_OUT_OF_SYNC_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_INJ_OUT_OF_SYNC, x)
+#define FDMA_ERRORS_ERR_INJ_OUT_OF_SYNC_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_INJ_OUT_OF_SYNC, x)
+
+#define FDMA_ERRORS_ERR_CH_WR                    GENMASK(7, 0)
+#define FDMA_ERRORS_ERR_CH_WR_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_ERR_CH_WR, x)
+#define FDMA_ERRORS_ERR_CH_WR_GET(x)\
+	FIELD_GET(FDMA_ERRORS_ERR_CH_WR, x)
+
+/*      FDMA:FDMA:FDMA_ERRORS_2 */
+#define FDMA_ERRORS_2             __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 416, 0, 1, 4)
+
+#define FDMA_ERRORS_2_ERR_XTR_FRAG               GENMASK(1, 0)
+#define FDMA_ERRORS_2_ERR_XTR_FRAG_SET(x)\
+	FIELD_PREP(FDMA_ERRORS_2_ERR_XTR_FRAG, x)
+#define FDMA_ERRORS_2_ERR_XTR_FRAG_GET(x)\
+	FIELD_GET(FDMA_ERRORS_2_ERR_XTR_FRAG, x)
+
+/*      FDMA:FDMA:FDMA_CTRL */
+#define FDMA_CTRL                 __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 424, 0, 1, 4)
+
+#define FDMA_CTRL_NRESET                         BIT(0)
+#define FDMA_CTRL_NRESET_SET(x)\
+	FIELD_PREP(FDMA_CTRL_NRESET, x)
+#define FDMA_CTRL_NRESET_GET(x)\
+	FIELD_GET(FDMA_CTRL_NRESET, x)
+
+/*      DEVCPU_GCB:CHIP_REGS:CHIP_ID */
+#define GCB_CHIP_ID               __REG(TARGET_GCB, 0, 1, 0, 0, 1, 424, 0, 0, 1, 4)
+
+#define GCB_CHIP_ID_REV_ID                       GENMASK(31, 28)
+#define GCB_CHIP_ID_REV_ID_SET(x)\
+	FIELD_PREP(GCB_CHIP_ID_REV_ID, x)
+#define GCB_CHIP_ID_REV_ID_GET(x)\
+	FIELD_GET(GCB_CHIP_ID_REV_ID, x)
+
+#define GCB_CHIP_ID_PART_ID                      GENMASK(27, 12)
+#define GCB_CHIP_ID_PART_ID_SET(x)\
+	FIELD_PREP(GCB_CHIP_ID_PART_ID, x)
+#define GCB_CHIP_ID_PART_ID_GET(x)\
+	FIELD_GET(GCB_CHIP_ID_PART_ID, x)
+
+#define GCB_CHIP_ID_MFG_ID                       GENMASK(11, 1)
+#define GCB_CHIP_ID_MFG_ID_SET(x)\
+	FIELD_PREP(GCB_CHIP_ID_MFG_ID, x)
+#define GCB_CHIP_ID_MFG_ID_GET(x)\
+	FIELD_GET(GCB_CHIP_ID_MFG_ID, x)
+
+#define GCB_CHIP_ID_ONE                          BIT(0)
+#define GCB_CHIP_ID_ONE_SET(x)\
+	FIELD_PREP(GCB_CHIP_ID_ONE, x)
+#define GCB_CHIP_ID_ONE_GET(x)\
+	FIELD_GET(GCB_CHIP_ID_ONE, x)
+
+/*      DEVCPU_GCB:CHIP_REGS:SOFT_RST */
+#define GCB_SOFT_RST              __REG(TARGET_GCB, 0, 1, 0, 0, 1, 424, 8, 0, 1, 4)
+
+#define GCB_SOFT_RST_SOFT_NON_CFG_RST            BIT(2)
+#define GCB_SOFT_RST_SOFT_NON_CFG_RST_SET(x)\
+	FIELD_PREP(GCB_SOFT_RST_SOFT_NON_CFG_RST, x)
+#define GCB_SOFT_RST_SOFT_NON_CFG_RST_GET(x)\
+	FIELD_GET(GCB_SOFT_RST_SOFT_NON_CFG_RST, x)
+
+#define GCB_SOFT_RST_SOFT_SWC_RST                BIT(1)
+#define GCB_SOFT_RST_SOFT_SWC_RST_SET(x)\
+	FIELD_PREP(GCB_SOFT_RST_SOFT_SWC_RST, x)
+#define GCB_SOFT_RST_SOFT_SWC_RST_GET(x)\
+	FIELD_GET(GCB_SOFT_RST_SOFT_SWC_RST, x)
+
+#define GCB_SOFT_RST_SOFT_CHIP_RST               BIT(0)
+#define GCB_SOFT_RST_SOFT_CHIP_RST_SET(x)\
+	FIELD_PREP(GCB_SOFT_RST_SOFT_CHIP_RST, x)
+#define GCB_SOFT_RST_SOFT_CHIP_RST_GET(x)\
+	FIELD_GET(GCB_SOFT_RST_SOFT_CHIP_RST, x)
+
+/*      DEVCPU_GCB:CHIP_REGS:HW_SGPIO_SD_CFG */
+#define GCB_HW_SGPIO_SD_CFG       __REG(TARGET_GCB, 0, 1, 0, 0, 1, 424, 20, 0, 1, 4)
+
+#define GCB_HW_SGPIO_SD_CFG_SD_HIGH_ENA          BIT(1)
+#define GCB_HW_SGPIO_SD_CFG_SD_HIGH_ENA_SET(x)\
+	FIELD_PREP(GCB_HW_SGPIO_SD_CFG_SD_HIGH_ENA, x)
+#define GCB_HW_SGPIO_SD_CFG_SD_HIGH_ENA_GET(x)\
+	FIELD_GET(GCB_HW_SGPIO_SD_CFG_SD_HIGH_ENA, x)
+
+#define GCB_HW_SGPIO_SD_CFG_SD_MAP_SEL           BIT(0)
+#define GCB_HW_SGPIO_SD_CFG_SD_MAP_SEL_SET(x)\
+	FIELD_PREP(GCB_HW_SGPIO_SD_CFG_SD_MAP_SEL, x)
+#define GCB_HW_SGPIO_SD_CFG_SD_MAP_SEL_GET(x)\
+	FIELD_GET(GCB_HW_SGPIO_SD_CFG_SD_MAP_SEL, x)
+
+/*      DEVCPU_GCB:CHIP_REGS:HW_SGPIO_TO_SD_MAP_CFG */
+#define GCB_HW_SGPIO_TO_SD_MAP_CFG(r) __REG(TARGET_GCB, 0, 1, 0, 0, 1, 424, 24, r, 65, 4)
+
+#define GCB_HW_SGPIO_TO_SD_MAP_CFG_SGPIO_TO_SD_SEL GENMASK(8, 0)
+#define GCB_HW_SGPIO_TO_SD_MAP_CFG_SGPIO_TO_SD_SEL_SET(x)\
+	FIELD_PREP(GCB_HW_SGPIO_TO_SD_MAP_CFG_SGPIO_TO_SD_SEL, x)
+#define GCB_HW_SGPIO_TO_SD_MAP_CFG_SGPIO_TO_SD_SEL_GET(x)\
+	FIELD_GET(GCB_HW_SGPIO_TO_SD_MAP_CFG_SGPIO_TO_SD_SEL, x)
+
+/*      DEVCPU_GCB:SIO_CTRL:SIO_CLOCK */
+#define GCB_SIO_CLOCK(g)          __REG(TARGET_GCB, 0, 1, 876, g, 3, 280, 20, 0, 1, 4)
+
+#define GCB_SIO_CLOCK_SIO_CLK_FREQ               GENMASK(19, 8)
+#define GCB_SIO_CLOCK_SIO_CLK_FREQ_SET(x)\
+	FIELD_PREP(GCB_SIO_CLOCK_SIO_CLK_FREQ, x)
+#define GCB_SIO_CLOCK_SIO_CLK_FREQ_GET(x)\
+	FIELD_GET(GCB_SIO_CLOCK_SIO_CLK_FREQ, x)
+
+#define GCB_SIO_CLOCK_SYS_CLK_PERIOD             GENMASK(7, 0)
+#define GCB_SIO_CLOCK_SYS_CLK_PERIOD_SET(x)\
+	FIELD_PREP(GCB_SIO_CLOCK_SYS_CLK_PERIOD, x)
+#define GCB_SIO_CLOCK_SYS_CLK_PERIOD_GET(x)\
+	FIELD_GET(GCB_SIO_CLOCK_SYS_CLK_PERIOD, x)
+
+/*      HSCH:HSCH_MISC:SYS_CLK_PER */
+#define HSCH_SYS_CLK_PER          __REG(TARGET_HSCH, 0, 1, 163104, 0, 1, 648, 640, 0, 1, 4)
+
+#define HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS       GENMASK(7, 0)
+#define HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS_SET(x)\
+	FIELD_PREP(HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS, x)
+#define HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS_GET(x)\
+	FIELD_GET(HSCH_SYS_CLK_PER_SYS_CLK_PER_100PS, x)
+
+/*      HSCH:SYSTEM:FLUSH_CTRL */
+#define HSCH_FLUSH_CTRL           __REG(TARGET_HSCH, 0, 1, 184000, 0, 1, 312, 4, 0, 1, 4)
+
+#define HSCH_FLUSH_CTRL_FLUSH_ENA                BIT(27)
+#define HSCH_FLUSH_CTRL_FLUSH_ENA_SET(x)\
+	FIELD_PREP(HSCH_FLUSH_CTRL_FLUSH_ENA, x)
+#define HSCH_FLUSH_CTRL_FLUSH_ENA_GET(x)\
+	FIELD_GET(HSCH_FLUSH_CTRL_FLUSH_ENA, x)
+
+#define HSCH_FLUSH_CTRL_FLUSH_SRC                BIT(26)
+#define HSCH_FLUSH_CTRL_FLUSH_SRC_SET(x)\
+	FIELD_PREP(HSCH_FLUSH_CTRL_FLUSH_SRC, x)
+#define HSCH_FLUSH_CTRL_FLUSH_SRC_GET(x)\
+	FIELD_GET(HSCH_FLUSH_CTRL_FLUSH_SRC, x)
+
+#define HSCH_FLUSH_CTRL_FLUSH_DST                BIT(25)
+#define HSCH_FLUSH_CTRL_FLUSH_DST_SET(x)\
+	FIELD_PREP(HSCH_FLUSH_CTRL_FLUSH_DST, x)
+#define HSCH_FLUSH_CTRL_FLUSH_DST_GET(x)\
+	FIELD_GET(HSCH_FLUSH_CTRL_FLUSH_DST, x)
+
+#define HSCH_FLUSH_CTRL_FLUSH_PORT               GENMASK(24, 18)
+#define HSCH_FLUSH_CTRL_FLUSH_PORT_SET(x)\
+	FIELD_PREP(HSCH_FLUSH_CTRL_FLUSH_PORT, x)
+#define HSCH_FLUSH_CTRL_FLUSH_PORT_GET(x)\
+	FIELD_GET(HSCH_FLUSH_CTRL_FLUSH_PORT, x)
+
+#define HSCH_FLUSH_CTRL_FLUSH_QUEUE              BIT(17)
+#define HSCH_FLUSH_CTRL_FLUSH_QUEUE_SET(x)\
+	FIELD_PREP(HSCH_FLUSH_CTRL_FLUSH_QUEUE, x)
+#define HSCH_FLUSH_CTRL_FLUSH_QUEUE_GET(x)\
+	FIELD_GET(HSCH_FLUSH_CTRL_FLUSH_QUEUE, x)
+
+#define HSCH_FLUSH_CTRL_FLUSH_SE                 BIT(16)
+#define HSCH_FLUSH_CTRL_FLUSH_SE_SET(x)\
+	FIELD_PREP(HSCH_FLUSH_CTRL_FLUSH_SE, x)
+#define HSCH_FLUSH_CTRL_FLUSH_SE_GET(x)\
+	FIELD_GET(HSCH_FLUSH_CTRL_FLUSH_SE, x)
+
+#define HSCH_FLUSH_CTRL_FLUSH_HIER               GENMASK(15, 0)
+#define HSCH_FLUSH_CTRL_FLUSH_HIER_SET(x)\
+	FIELD_PREP(HSCH_FLUSH_CTRL_FLUSH_HIER, x)
+#define HSCH_FLUSH_CTRL_FLUSH_HIER_GET(x)\
+	FIELD_GET(HSCH_FLUSH_CTRL_FLUSH_HIER, x)
+
+/*      HSCH:SYSTEM:PORT_MODE */
+#define HSCH_PORT_MODE(r)         __REG(TARGET_HSCH, 0, 1, 184000, 0, 1, 312, 8, r, 70, 4)
+
+#define HSCH_PORT_MODE_DEQUEUE_DIS               BIT(4)
+#define HSCH_PORT_MODE_DEQUEUE_DIS_SET(x)\
+	FIELD_PREP(HSCH_PORT_MODE_DEQUEUE_DIS, x)
+#define HSCH_PORT_MODE_DEQUEUE_DIS_GET(x)\
+	FIELD_GET(HSCH_PORT_MODE_DEQUEUE_DIS, x)
+
+#define HSCH_PORT_MODE_AGE_DIS                   BIT(3)
+#define HSCH_PORT_MODE_AGE_DIS_SET(x)\
+	FIELD_PREP(HSCH_PORT_MODE_AGE_DIS, x)
+#define HSCH_PORT_MODE_AGE_DIS_GET(x)\
+	FIELD_GET(HSCH_PORT_MODE_AGE_DIS, x)
+
+#define HSCH_PORT_MODE_TRUNC_ENA                 BIT(2)
+#define HSCH_PORT_MODE_TRUNC_ENA_SET(x)\
+	FIELD_PREP(HSCH_PORT_MODE_TRUNC_ENA, x)
+#define HSCH_PORT_MODE_TRUNC_ENA_GET(x)\
+	FIELD_GET(HSCH_PORT_MODE_TRUNC_ENA, x)
+
+#define HSCH_PORT_MODE_EIR_REMARK_ENA            BIT(1)
+#define HSCH_PORT_MODE_EIR_REMARK_ENA_SET(x)\
+	FIELD_PREP(HSCH_PORT_MODE_EIR_REMARK_ENA, x)
+#define HSCH_PORT_MODE_EIR_REMARK_ENA_GET(x)\
+	FIELD_GET(HSCH_PORT_MODE_EIR_REMARK_ENA, x)
+
+#define HSCH_PORT_MODE_CPU_PRIO_MODE             BIT(0)
+#define HSCH_PORT_MODE_CPU_PRIO_MODE_SET(x)\
+	FIELD_PREP(HSCH_PORT_MODE_CPU_PRIO_MODE, x)
+#define HSCH_PORT_MODE_CPU_PRIO_MODE_GET(x)\
+	FIELD_GET(HSCH_PORT_MODE_CPU_PRIO_MODE, x)
+
+/*      HSCH:SYSTEM:OUTB_SHARE_ENA */
+#define HSCH_OUTB_SHARE_ENA(r)    __REG(TARGET_HSCH, 0, 1, 184000, 0, 1, 312, 288, r, 5, 4)
+
+#define HSCH_OUTB_SHARE_ENA_OUTB_SHARE_ENA       GENMASK(7, 0)
+#define HSCH_OUTB_SHARE_ENA_OUTB_SHARE_ENA_SET(x)\
+	FIELD_PREP(HSCH_OUTB_SHARE_ENA_OUTB_SHARE_ENA, x)
+#define HSCH_OUTB_SHARE_ENA_OUTB_SHARE_ENA_GET(x)\
+	FIELD_GET(HSCH_OUTB_SHARE_ENA_OUTB_SHARE_ENA, x)
+
+/*      HSCH:MMGT:RESET_CFG */
+#define HSCH_RESET_CFG            __REG(TARGET_HSCH, 0, 1, 162368, 0, 1, 16, 8, 0, 1, 4)
+
+#define HSCH_RESET_CFG_CORE_ENA                  BIT(0)
+#define HSCH_RESET_CFG_CORE_ENA_SET(x)\
+	FIELD_PREP(HSCH_RESET_CFG_CORE_ENA, x)
+#define HSCH_RESET_CFG_CORE_ENA_GET(x)\
+	FIELD_GET(HSCH_RESET_CFG_CORE_ENA, x)
+
+/*      HSCH:TAS_CONFIG:TAS_STATEMACHINE_CFG */
+#define HSCH_TAS_STATEMACHINE_CFG __REG(TARGET_HSCH, 0, 1, 162384, 0, 1, 12, 8, 0, 1, 4)
+
+#define HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY    GENMASK(7, 0)
+#define HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY_SET(x)\
+	FIELD_PREP(HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY, x)
+#define HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY_GET(x)\
+	FIELD_GET(HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY, x)
+
+/*      LRN:COMMON:COMMON_ACCESS_CTRL */
+#define LRN_COMMON_ACCESS_CTRL    __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 0, 0, 1, 4)
+
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_COL GENMASK(21, 20)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_COL_SET(x)\
+	FIELD_PREP(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_COL, x)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_COL_GET(x)\
+	FIELD_GET(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_COL, x)
+
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_TYPE BIT(19)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_TYPE_SET(x)\
+	FIELD_PREP(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_TYPE, x)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_TYPE_GET(x)\
+	FIELD_GET(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_TYPE, x)
+
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_ROW GENMASK(18, 5)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_ROW_SET(x)\
+	FIELD_PREP(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_ROW, x)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_ROW_GET(x)\
+	FIELD_GET(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_DIRECT_ROW, x)
+
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD    GENMASK(4, 1)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_SET(x)\
+	FIELD_PREP(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD, x)
+#define LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_GET(x)\
+	FIELD_GET(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD, x)
+
+#define LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT BIT(0)
+#define LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_SET(x)\
+	FIELD_PREP(LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT, x)
+#define LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_GET(x)\
+	FIELD_GET(LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT, x)
+
+/*      LRN:COMMON:MAC_ACCESS_CFG_0 */
+#define LRN_MAC_ACCESS_CFG_0      __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 4, 0, 1, 4)
+
+#define LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_FID       GENMASK(28, 16)
+#define LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_FID_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_FID, x)
+#define LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_FID_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_FID, x)
+
+#define LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_MAC_MSB   GENMASK(15, 0)
+#define LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_MAC_MSB_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_MAC_MSB, x)
+#define LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_MAC_MSB_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_0_MAC_ENTRY_MAC_MSB, x)
+
+/*      LRN:COMMON:MAC_ACCESS_CFG_1 */
+#define LRN_MAC_ACCESS_CFG_1      __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 8, 0, 1, 4)
+
+/*      LRN:COMMON:MAC_ACCESS_CFG_2 */
+#define LRN_MAC_ACCESS_CFG_2      __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 12, 0, 1, 4)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_SRC_KILL_FWD BIT(28)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_SRC_KILL_FWD_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_SRC_KILL_FWD, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_SRC_KILL_FWD_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_SRC_KILL_FWD, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_NXT_LRN_ALL BIT(27)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_NXT_LRN_ALL_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_NXT_LRN_ALL, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_NXT_LRN_ALL_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_NXT_LRN_ALL, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_QU    GENMASK(26, 24)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_QU_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_QU, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_QU_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_QU, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_COPY  BIT(23)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_COPY_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_COPY, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_COPY_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_CPU_COPY, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLAN_IGNORE BIT(22)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLAN_IGNORE_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLAN_IGNORE, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLAN_IGNORE_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLAN_IGNORE, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_MIRROR    BIT(21)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_MIRROR_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_MIRROR, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_MIRROR_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_MIRROR, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_FLAG  GENMASK(20, 19)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_FLAG_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_FLAG, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_FLAG_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_FLAG, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_INTERVAL GENMASK(18, 17)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_INTERVAL_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_INTERVAL, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_INTERVAL_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_AGE_INTERVAL, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_LOCKED    BIT(16)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_LOCKED_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_LOCKED, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_LOCKED_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_LOCKED, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD       BIT(15)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_TYPE GENMASK(14, 12)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_TYPE_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_TYPE, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_TYPE_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_TYPE, x)
+
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR      GENMASK(11, 0)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR, x)
+#define LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR, x)
+
+/*      LRN:COMMON:MAC_ACCESS_CFG_3 */
+#define LRN_MAC_ACCESS_CFG_3      __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 16, 0, 1, 4)
+
+#define LRN_MAC_ACCESS_CFG_3_MAC_ENTRY_ISDX_LIMIT_IDX GENMASK(10, 0)
+#define LRN_MAC_ACCESS_CFG_3_MAC_ENTRY_ISDX_LIMIT_IDX_SET(x)\
+	FIELD_PREP(LRN_MAC_ACCESS_CFG_3_MAC_ENTRY_ISDX_LIMIT_IDX, x)
+#define LRN_MAC_ACCESS_CFG_3_MAC_ENTRY_ISDX_LIMIT_IDX_GET(x)\
+	FIELD_GET(LRN_MAC_ACCESS_CFG_3_MAC_ENTRY_ISDX_LIMIT_IDX, x)
+
+/*      LRN:COMMON:SCAN_NEXT_CFG */
+#define LRN_SCAN_NEXT_CFG         __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 20, 0, 1, 4)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_FLAG_UPDATE_SEL GENMASK(21, 19)
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_FLAG_UPDATE_SEL_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_AGE_FLAG_UPDATE_SEL, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_FLAG_UPDATE_SEL_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_AGE_FLAG_UPDATE_SEL, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_NXT_LRN_ALL_UPDATE_SEL GENMASK(18, 17)
+#define LRN_SCAN_NEXT_CFG_SCAN_NXT_LRN_ALL_UPDATE_SEL_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_NXT_LRN_ALL_UPDATE_SEL, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_NXT_LRN_ALL_UPDATE_SEL_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_NXT_LRN_ALL_UPDATE_SEL, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_FILTER_SEL    GENMASK(16, 15)
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_FILTER_SEL_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_AGE_FILTER_SEL, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_FILTER_SEL_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_AGE_FILTER_SEL, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_MOVE_FOUND_ENA BIT(14)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_MOVE_FOUND_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_NEXT_MOVE_FOUND_ENA, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_MOVE_FOUND_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_NEXT_MOVE_FOUND_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_NXT_LRN_ALL_FILTER_ENA BIT(13)
+#define LRN_SCAN_NEXT_CFG_NXT_LRN_ALL_FILTER_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_NXT_LRN_ALL_FILTER_ENA, x)
+#define LRN_SCAN_NEXT_CFG_NXT_LRN_ALL_FILTER_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_NXT_LRN_ALL_FILTER_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_USE_PORT_FILTER_ENA BIT(12)
+#define LRN_SCAN_NEXT_CFG_SCAN_USE_PORT_FILTER_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_USE_PORT_FILTER_ENA, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_USE_PORT_FILTER_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_USE_PORT_FILTER_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_REMOVE_FOUND_ENA BIT(11)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_REMOVE_FOUND_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_NEXT_REMOVE_FOUND_ENA, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_REMOVE_FOUND_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_NEXT_REMOVE_FOUND_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_UNTIL_FOUND_ENA BIT(10)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_UNTIL_FOUND_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_NEXT_UNTIL_FOUND_ENA, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_UNTIL_FOUND_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_NEXT_UNTIL_FOUND_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_INC_AGE_BITS_ENA BIT(9)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_INC_AGE_BITS_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_NEXT_INC_AGE_BITS_ENA, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_INC_AGE_BITS_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_NEXT_INC_AGE_BITS_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_AGED_ONLY_ENA BIT(8)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_AGED_ONLY_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_NEXT_AGED_ONLY_ENA, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_AGED_ONLY_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_NEXT_AGED_ONLY_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_IGNORE_LOCKED_ENA BIT(7)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_IGNORE_LOCKED_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_NEXT_IGNORE_LOCKED_ENA, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_NEXT_IGNORE_LOCKED_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_NEXT_IGNORE_LOCKED_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_INTERVAL_MASK GENMASK(6, 3)
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_INTERVAL_MASK_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_SCAN_AGE_INTERVAL_MASK, x)
+#define LRN_SCAN_NEXT_CFG_SCAN_AGE_INTERVAL_MASK_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_SCAN_AGE_INTERVAL_MASK, x)
+
+#define LRN_SCAN_NEXT_CFG_ISDX_LIMIT_IDX_FILTER_ENA BIT(2)
+#define LRN_SCAN_NEXT_CFG_ISDX_LIMIT_IDX_FILTER_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_ISDX_LIMIT_IDX_FILTER_ENA, x)
+#define LRN_SCAN_NEXT_CFG_ISDX_LIMIT_IDX_FILTER_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_ISDX_LIMIT_IDX_FILTER_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_FID_FILTER_ENA         BIT(1)
+#define LRN_SCAN_NEXT_CFG_FID_FILTER_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_FID_FILTER_ENA, x)
+#define LRN_SCAN_NEXT_CFG_FID_FILTER_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_FID_FILTER_ENA, x)
+
+#define LRN_SCAN_NEXT_CFG_ADDR_FILTER_ENA        BIT(0)
+#define LRN_SCAN_NEXT_CFG_ADDR_FILTER_ENA_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_ADDR_FILTER_ENA, x)
+#define LRN_SCAN_NEXT_CFG_ADDR_FILTER_ENA_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_ADDR_FILTER_ENA, x)
+
+/*      LRN:COMMON:SCAN_NEXT_CFG_1 */
+#define LRN_SCAN_NEXT_CFG_1       __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 24, 0, 1, 4)
+
+#define LRN_SCAN_NEXT_CFG_1_PORT_MOVE_NEW_ADDR   GENMASK(30, 16)
+#define LRN_SCAN_NEXT_CFG_1_PORT_MOVE_NEW_ADDR_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_1_PORT_MOVE_NEW_ADDR, x)
+#define LRN_SCAN_NEXT_CFG_1_PORT_MOVE_NEW_ADDR_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_1_PORT_MOVE_NEW_ADDR, x)
+
+#define LRN_SCAN_NEXT_CFG_1_SCAN_ENTRY_ADDR_MASK GENMASK(14, 0)
+#define LRN_SCAN_NEXT_CFG_1_SCAN_ENTRY_ADDR_MASK_SET(x)\
+	FIELD_PREP(LRN_SCAN_NEXT_CFG_1_SCAN_ENTRY_ADDR_MASK, x)
+#define LRN_SCAN_NEXT_CFG_1_SCAN_ENTRY_ADDR_MASK_GET(x)\
+	FIELD_GET(LRN_SCAN_NEXT_CFG_1_SCAN_ENTRY_ADDR_MASK, x)
+
+/*      LRN:COMMON:AUTOAGE_CFG */
+#define LRN_AUTOAGE_CFG(r)        __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 36, r, 4, 4)
+
+#define LRN_AUTOAGE_CFG_UNIT_SIZE                GENMASK(29, 28)
+#define LRN_AUTOAGE_CFG_UNIT_SIZE_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_UNIT_SIZE, x)
+#define LRN_AUTOAGE_CFG_UNIT_SIZE_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_UNIT_SIZE, x)
+
+#define LRN_AUTOAGE_CFG_PERIOD_VAL               GENMASK(27, 0)
+#define LRN_AUTOAGE_CFG_PERIOD_VAL_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_PERIOD_VAL, x)
+#define LRN_AUTOAGE_CFG_PERIOD_VAL_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_PERIOD_VAL, x)
+
+/*      LRN:COMMON:AUTOAGE_CFG_1 */
+#define LRN_AUTOAGE_CFG_1         __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 52, 0, 1, 4)
+
+#define LRN_AUTOAGE_CFG_1_PAUSE_AUTO_AGE_ENA     BIT(25)
+#define LRN_AUTOAGE_CFG_1_PAUSE_AUTO_AGE_ENA_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_1_PAUSE_AUTO_AGE_ENA, x)
+#define LRN_AUTOAGE_CFG_1_PAUSE_AUTO_AGE_ENA_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_1_PAUSE_AUTO_AGE_ENA, x)
+
+#define LRN_AUTOAGE_CFG_1_CELLS_BETWEEN_ENTRY_SCAN GENMASK(24, 15)
+#define LRN_AUTOAGE_CFG_1_CELLS_BETWEEN_ENTRY_SCAN_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_1_CELLS_BETWEEN_ENTRY_SCAN, x)
+#define LRN_AUTOAGE_CFG_1_CELLS_BETWEEN_ENTRY_SCAN_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_1_CELLS_BETWEEN_ENTRY_SCAN, x)
+
+#define LRN_AUTOAGE_CFG_1_CLK_PERIOD_01NS        GENMASK(14, 7)
+#define LRN_AUTOAGE_CFG_1_CLK_PERIOD_01NS_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_1_CLK_PERIOD_01NS, x)
+#define LRN_AUTOAGE_CFG_1_CLK_PERIOD_01NS_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_1_CLK_PERIOD_01NS, x)
+
+#define LRN_AUTOAGE_CFG_1_USE_PORT_FILTER_ENA    BIT(6)
+#define LRN_AUTOAGE_CFG_1_USE_PORT_FILTER_ENA_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_1_USE_PORT_FILTER_ENA, x)
+#define LRN_AUTOAGE_CFG_1_USE_PORT_FILTER_ENA_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_1_USE_PORT_FILTER_ENA, x)
+
+#define LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_SHOT     GENMASK(5, 2)
+#define LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_SHOT_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_SHOT, x)
+#define LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_SHOT_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_SHOT, x)
+
+#define LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_STOP_SHOT BIT(1)
+#define LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_STOP_SHOT_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_STOP_SHOT, x)
+#define LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_STOP_SHOT_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_1_FORCE_HW_SCAN_STOP_SHOT, x)
+
+#define LRN_AUTOAGE_CFG_1_FORCE_IDLE_ENA         BIT(0)
+#define LRN_AUTOAGE_CFG_1_FORCE_IDLE_ENA_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_1_FORCE_IDLE_ENA, x)
+#define LRN_AUTOAGE_CFG_1_FORCE_IDLE_ENA_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_1_FORCE_IDLE_ENA, x)
+
+/*      LRN:COMMON:AUTOAGE_CFG_2 */
+#define LRN_AUTOAGE_CFG_2         __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 56, 0, 1, 4)
+
+#define LRN_AUTOAGE_CFG_2_NEXT_ROW               GENMASK(17, 4)
+#define LRN_AUTOAGE_CFG_2_NEXT_ROW_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_2_NEXT_ROW, x)
+#define LRN_AUTOAGE_CFG_2_NEXT_ROW_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_2_NEXT_ROW, x)
+
+#define LRN_AUTOAGE_CFG_2_SCAN_ONGOING_STATUS    GENMASK(3, 0)
+#define LRN_AUTOAGE_CFG_2_SCAN_ONGOING_STATUS_SET(x)\
+	FIELD_PREP(LRN_AUTOAGE_CFG_2_SCAN_ONGOING_STATUS, x)
+#define LRN_AUTOAGE_CFG_2_SCAN_ONGOING_STATUS_GET(x)\
+	FIELD_GET(LRN_AUTOAGE_CFG_2_SCAN_ONGOING_STATUS, x)
+
+/*      PCIE_DM_EP:PF0_ATU_CAP:IATU_REGION_CTRL_2_OFF_OUTBOUND_0 */
+#define PCEP_RCTRL_2_OUT_0        __REG(TARGET_PCEP, 0, 1, 3145728, 0, 1, 130852, 4, 0, 1, 4)
+
+#define PCEP_RCTRL_2_OUT_0_MSG_CODE              GENMASK(7, 0)
+#define PCEP_RCTRL_2_OUT_0_MSG_CODE_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_MSG_CODE, x)
+#define PCEP_RCTRL_2_OUT_0_MSG_CODE_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_MSG_CODE, x)
+
+#define PCEP_RCTRL_2_OUT_0_TAG                   GENMASK(15, 8)
+#define PCEP_RCTRL_2_OUT_0_TAG_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_TAG, x)
+#define PCEP_RCTRL_2_OUT_0_TAG_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_TAG, x)
+
+#define PCEP_RCTRL_2_OUT_0_TAG_SUBSTITUTE_EN     BIT(16)
+#define PCEP_RCTRL_2_OUT_0_TAG_SUBSTITUTE_EN_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_TAG_SUBSTITUTE_EN, x)
+#define PCEP_RCTRL_2_OUT_0_TAG_SUBSTITUTE_EN_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_TAG_SUBSTITUTE_EN, x)
+
+#define PCEP_RCTRL_2_OUT_0_FUNC_BYPASS           BIT(19)
+#define PCEP_RCTRL_2_OUT_0_FUNC_BYPASS_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_FUNC_BYPASS, x)
+#define PCEP_RCTRL_2_OUT_0_FUNC_BYPASS_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_FUNC_BYPASS, x)
+
+#define PCEP_RCTRL_2_OUT_0_SNP                   BIT(20)
+#define PCEP_RCTRL_2_OUT_0_SNP_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_SNP, x)
+#define PCEP_RCTRL_2_OUT_0_SNP_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_SNP, x)
+
+#define PCEP_RCTRL_2_OUT_0_INHIBIT_PAYLOAD       BIT(22)
+#define PCEP_RCTRL_2_OUT_0_INHIBIT_PAYLOAD_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_INHIBIT_PAYLOAD, x)
+#define PCEP_RCTRL_2_OUT_0_INHIBIT_PAYLOAD_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_INHIBIT_PAYLOAD, x)
+
+#define PCEP_RCTRL_2_OUT_0_HEADER_SUBSTITUTE_EN  BIT(23)
+#define PCEP_RCTRL_2_OUT_0_HEADER_SUBSTITUTE_EN_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_HEADER_SUBSTITUTE_EN, x)
+#define PCEP_RCTRL_2_OUT_0_HEADER_SUBSTITUTE_EN_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_HEADER_SUBSTITUTE_EN, x)
+
+#define PCEP_RCTRL_2_OUT_0_CFG_SHIFT_MODE        BIT(28)
+#define PCEP_RCTRL_2_OUT_0_CFG_SHIFT_MODE_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_CFG_SHIFT_MODE, x)
+#define PCEP_RCTRL_2_OUT_0_CFG_SHIFT_MODE_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_CFG_SHIFT_MODE, x)
+
+#define PCEP_RCTRL_2_OUT_0_INVERT_MODE           BIT(29)
+#define PCEP_RCTRL_2_OUT_0_INVERT_MODE_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_INVERT_MODE, x)
+#define PCEP_RCTRL_2_OUT_0_INVERT_MODE_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_INVERT_MODE, x)
+
+#define PCEP_RCTRL_2_OUT_0_REGION_EN             BIT(31)
+#define PCEP_RCTRL_2_OUT_0_REGION_EN_SET(x)\
+	FIELD_PREP(PCEP_RCTRL_2_OUT_0_REGION_EN, x)
+#define PCEP_RCTRL_2_OUT_0_REGION_EN_GET(x)\
+	FIELD_GET(PCEP_RCTRL_2_OUT_0_REGION_EN, x)
+
+/*      PCIE_DM_EP:PF0_ATU_CAP:IATU_LWR_BASE_ADDR_OFF_OUTBOUND_0 */
+#define PCEP_ADDR_LWR_OUT_0       __REG(TARGET_PCEP, 0, 1, 3145728, 0, 1, 130852, 8, 0, 1, 4)
+
+#define PCEP_ADDR_LWR_OUT_0_LWR_BASE_HW          GENMASK(15, 0)
+#define PCEP_ADDR_LWR_OUT_0_LWR_BASE_HW_SET(x)\
+	FIELD_PREP(PCEP_ADDR_LWR_OUT_0_LWR_BASE_HW, x)
+#define PCEP_ADDR_LWR_OUT_0_LWR_BASE_HW_GET(x)\
+	FIELD_GET(PCEP_ADDR_LWR_OUT_0_LWR_BASE_HW, x)
+
+#define PCEP_ADDR_LWR_OUT_0_LWR_BASE_RW          GENMASK(31, 16)
+#define PCEP_ADDR_LWR_OUT_0_LWR_BASE_RW_SET(x)\
+	FIELD_PREP(PCEP_ADDR_LWR_OUT_0_LWR_BASE_RW, x)
+#define PCEP_ADDR_LWR_OUT_0_LWR_BASE_RW_GET(x)\
+	FIELD_GET(PCEP_ADDR_LWR_OUT_0_LWR_BASE_RW, x)
+
+/*      PCIE_DM_EP:PF0_ATU_CAP:IATU_UPPER_BASE_ADDR_OFF_OUTBOUND_0 */
+#define PCEP_ADDR_UPR_OUT_0       __REG(TARGET_PCEP, 0, 1, 3145728, 0, 1, 130852, 12, 0, 1, 4)
+
+/*      PCIE_DM_EP:PF0_ATU_CAP:IATU_LIMIT_ADDR_OFF_OUTBOUND_0 */
+#define PCEP_ADDR_LIM_OUT_0       __REG(TARGET_PCEP, 0, 1, 3145728, 0, 1, 130852, 16, 0, 1, 4)
+
+#define PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_HW        GENMASK(15, 0)
+#define PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_HW_SET(x)\
+	FIELD_PREP(PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_HW, x)
+#define PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_HW_GET(x)\
+	FIELD_GET(PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_HW, x)
+
+#define PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_RW        GENMASK(31, 16)
+#define PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_RW_SET(x)\
+	FIELD_PREP(PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_RW, x)
+#define PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_RW_GET(x)\
+	FIELD_GET(PCEP_ADDR_LIM_OUT_0_LIMIT_ADDR_RW, x)
+
+/*      PCIE_DM_EP:PF0_ATU_CAP:IATU_LWR_TARGET_ADDR_OFF_OUTBOUND_0 */
+#define PCEP_ADDR_LWR_TGT_OUT_0   __REG(TARGET_PCEP, 0, 1, 3145728, 0, 1, 130852, 20, 0, 1, 4)
+
+/*      PCIE_DM_EP:PF0_ATU_CAP:IATU_UPPER_TARGET_ADDR_OFF_OUTBOUND_0 */
+#define PCEP_ADDR_UPR_TGT_OUT_0   __REG(TARGET_PCEP, 0, 1, 3145728, 0, 1, 130852, 24, 0, 1, 4)
+
+/*      PCIE_DM_EP:PF0_ATU_CAP:IATU_UPPR_LIMIT_ADDR_OFF_OUTBOUND_0 */
+#define PCEP_ADDR_UPR_LIM_OUT_0   __REG(TARGET_PCEP, 0, 1, 3145728, 0, 1, 130852, 32, 0, 1, 4)
+
+#define PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_RW GENMASK(1, 0)
+#define PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_RW_SET(x)\
+	FIELD_PREP(PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_RW, x)
+#define PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_RW_GET(x)\
+	FIELD_GET(PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_RW, x)
+
+#define PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_HW GENMASK(31, 2)
+#define PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_HW_SET(x)\
+	FIELD_PREP(PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_HW, x)
+#define PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_HW_GET(x)\
+	FIELD_GET(PCEP_ADDR_UPR_LIM_OUT_0_UPPR_LIMIT_ADDR_HW, x)
+
+/*      PCS_10GBASE_R:PCS_10GBR_CFG:PCS_CFG */
+#define PCS10G_BR_PCS_CFG(t)      __REG(TARGET_PCS10G_BR, t, 12, 0, 0, 1, 56, 0, 0, 1, 4)
+
+#define PCS10G_BR_PCS_CFG_PCS_ENA                BIT(31)
+#define PCS10G_BR_PCS_CFG_PCS_ENA_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_PCS_ENA, x)
+#define PCS10G_BR_PCS_CFG_PCS_ENA_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_PCS_ENA, x)
+
+#define PCS10G_BR_PCS_CFG_PMA_LOOPBACK_ENA       BIT(30)
+#define PCS10G_BR_PCS_CFG_PMA_LOOPBACK_ENA_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_PMA_LOOPBACK_ENA, x)
+#define PCS10G_BR_PCS_CFG_PMA_LOOPBACK_ENA_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_PMA_LOOPBACK_ENA, x)
+
+#define PCS10G_BR_PCS_CFG_SH_CNT_MAX             GENMASK(29, 24)
+#define PCS10G_BR_PCS_CFG_SH_CNT_MAX_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_SH_CNT_MAX, x)
+#define PCS10G_BR_PCS_CFG_SH_CNT_MAX_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_SH_CNT_MAX, x)
+
+#define PCS10G_BR_PCS_CFG_RX_DATA_FLIP           BIT(18)
+#define PCS10G_BR_PCS_CFG_RX_DATA_FLIP_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_RX_DATA_FLIP, x)
+#define PCS10G_BR_PCS_CFG_RX_DATA_FLIP_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_RX_DATA_FLIP, x)
+
+#define PCS10G_BR_PCS_CFG_RESYNC_ENA             BIT(15)
+#define PCS10G_BR_PCS_CFG_RESYNC_ENA_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_RESYNC_ENA, x)
+#define PCS10G_BR_PCS_CFG_RESYNC_ENA_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_RESYNC_ENA, x)
+
+#define PCS10G_BR_PCS_CFG_LF_GEN_DIS             BIT(14)
+#define PCS10G_BR_PCS_CFG_LF_GEN_DIS_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_LF_GEN_DIS, x)
+#define PCS10G_BR_PCS_CFG_LF_GEN_DIS_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_LF_GEN_DIS, x)
+
+#define PCS10G_BR_PCS_CFG_RX_TEST_MODE           BIT(13)
+#define PCS10G_BR_PCS_CFG_RX_TEST_MODE_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_RX_TEST_MODE, x)
+#define PCS10G_BR_PCS_CFG_RX_TEST_MODE_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_RX_TEST_MODE, x)
+
+#define PCS10G_BR_PCS_CFG_RX_SCR_DISABLE         BIT(12)
+#define PCS10G_BR_PCS_CFG_RX_SCR_DISABLE_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_RX_SCR_DISABLE, x)
+#define PCS10G_BR_PCS_CFG_RX_SCR_DISABLE_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_RX_SCR_DISABLE, x)
+
+#define PCS10G_BR_PCS_CFG_TX_DATA_FLIP           BIT(7)
+#define PCS10G_BR_PCS_CFG_TX_DATA_FLIP_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_TX_DATA_FLIP, x)
+#define PCS10G_BR_PCS_CFG_TX_DATA_FLIP_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_TX_DATA_FLIP, x)
+
+#define PCS10G_BR_PCS_CFG_AN_LINK_CTRL_ENA       BIT(6)
+#define PCS10G_BR_PCS_CFG_AN_LINK_CTRL_ENA_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_AN_LINK_CTRL_ENA, x)
+#define PCS10G_BR_PCS_CFG_AN_LINK_CTRL_ENA_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_AN_LINK_CTRL_ENA, x)
+
+#define PCS10G_BR_PCS_CFG_TX_TEST_MODE           BIT(4)
+#define PCS10G_BR_PCS_CFG_TX_TEST_MODE_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_TX_TEST_MODE, x)
+#define PCS10G_BR_PCS_CFG_TX_TEST_MODE_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_TX_TEST_MODE, x)
+
+#define PCS10G_BR_PCS_CFG_TX_SCR_DISABLE         BIT(3)
+#define PCS10G_BR_PCS_CFG_TX_SCR_DISABLE_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_CFG_TX_SCR_DISABLE, x)
+#define PCS10G_BR_PCS_CFG_TX_SCR_DISABLE_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_CFG_TX_SCR_DISABLE, x)
+
+/*      PCS_10GBASE_R:PCS_10GBR_CFG:PCS_SD_CFG */
+#define PCS10G_BR_PCS_SD_CFG(t)   __REG(TARGET_PCS10G_BR, t, 12, 0, 0, 1, 56, 4, 0, 1, 4)
+
+#define PCS10G_BR_PCS_SD_CFG_SD_SEL              BIT(8)
+#define PCS10G_BR_PCS_SD_CFG_SD_SEL_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_SD_CFG_SD_SEL, x)
+#define PCS10G_BR_PCS_SD_CFG_SD_SEL_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_SD_CFG_SD_SEL, x)
+
+#define PCS10G_BR_PCS_SD_CFG_SD_POL              BIT(4)
+#define PCS10G_BR_PCS_SD_CFG_SD_POL_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_SD_CFG_SD_POL, x)
+#define PCS10G_BR_PCS_SD_CFG_SD_POL_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_SD_CFG_SD_POL, x)
+
+#define PCS10G_BR_PCS_SD_CFG_SD_ENA              BIT(0)
+#define PCS10G_BR_PCS_SD_CFG_SD_ENA_SET(x)\
+	FIELD_PREP(PCS10G_BR_PCS_SD_CFG_SD_ENA, x)
+#define PCS10G_BR_PCS_SD_CFG_SD_ENA_GET(x)\
+	FIELD_GET(PCS10G_BR_PCS_SD_CFG_SD_ENA, x)
+
+/*      PCS_10GBASE_R:PCS_10GBR_CFG:PCS_CFG */
+#define PCS25G_BR_PCS_CFG(t)      __REG(TARGET_PCS25G_BR, t, 8, 0, 0, 1, 56, 0, 0, 1, 4)
+
+#define PCS25G_BR_PCS_CFG_PCS_ENA                BIT(31)
+#define PCS25G_BR_PCS_CFG_PCS_ENA_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_PCS_ENA, x)
+#define PCS25G_BR_PCS_CFG_PCS_ENA_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_PCS_ENA, x)
+
+#define PCS25G_BR_PCS_CFG_PMA_LOOPBACK_ENA       BIT(30)
+#define PCS25G_BR_PCS_CFG_PMA_LOOPBACK_ENA_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_PMA_LOOPBACK_ENA, x)
+#define PCS25G_BR_PCS_CFG_PMA_LOOPBACK_ENA_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_PMA_LOOPBACK_ENA, x)
+
+#define PCS25G_BR_PCS_CFG_SH_CNT_MAX             GENMASK(29, 24)
+#define PCS25G_BR_PCS_CFG_SH_CNT_MAX_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_SH_CNT_MAX, x)
+#define PCS25G_BR_PCS_CFG_SH_CNT_MAX_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_SH_CNT_MAX, x)
+
+#define PCS25G_BR_PCS_CFG_RX_DATA_FLIP           BIT(18)
+#define PCS25G_BR_PCS_CFG_RX_DATA_FLIP_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_RX_DATA_FLIP, x)
+#define PCS25G_BR_PCS_CFG_RX_DATA_FLIP_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_RX_DATA_FLIP, x)
+
+#define PCS25G_BR_PCS_CFG_RESYNC_ENA             BIT(15)
+#define PCS25G_BR_PCS_CFG_RESYNC_ENA_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_RESYNC_ENA, x)
+#define PCS25G_BR_PCS_CFG_RESYNC_ENA_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_RESYNC_ENA, x)
+
+#define PCS25G_BR_PCS_CFG_LF_GEN_DIS             BIT(14)
+#define PCS25G_BR_PCS_CFG_LF_GEN_DIS_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_LF_GEN_DIS, x)
+#define PCS25G_BR_PCS_CFG_LF_GEN_DIS_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_LF_GEN_DIS, x)
+
+#define PCS25G_BR_PCS_CFG_RX_TEST_MODE           BIT(13)
+#define PCS25G_BR_PCS_CFG_RX_TEST_MODE_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_RX_TEST_MODE, x)
+#define PCS25G_BR_PCS_CFG_RX_TEST_MODE_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_RX_TEST_MODE, x)
+
+#define PCS25G_BR_PCS_CFG_RX_SCR_DISABLE         BIT(12)
+#define PCS25G_BR_PCS_CFG_RX_SCR_DISABLE_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_RX_SCR_DISABLE, x)
+#define PCS25G_BR_PCS_CFG_RX_SCR_DISABLE_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_RX_SCR_DISABLE, x)
+
+#define PCS25G_BR_PCS_CFG_TX_DATA_FLIP           BIT(7)
+#define PCS25G_BR_PCS_CFG_TX_DATA_FLIP_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_TX_DATA_FLIP, x)
+#define PCS25G_BR_PCS_CFG_TX_DATA_FLIP_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_TX_DATA_FLIP, x)
+
+#define PCS25G_BR_PCS_CFG_AN_LINK_CTRL_ENA       BIT(6)
+#define PCS25G_BR_PCS_CFG_AN_LINK_CTRL_ENA_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_AN_LINK_CTRL_ENA, x)
+#define PCS25G_BR_PCS_CFG_AN_LINK_CTRL_ENA_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_AN_LINK_CTRL_ENA, x)
+
+#define PCS25G_BR_PCS_CFG_TX_TEST_MODE           BIT(4)
+#define PCS25G_BR_PCS_CFG_TX_TEST_MODE_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_TX_TEST_MODE, x)
+#define PCS25G_BR_PCS_CFG_TX_TEST_MODE_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_TX_TEST_MODE, x)
+
+#define PCS25G_BR_PCS_CFG_TX_SCR_DISABLE         BIT(3)
+#define PCS25G_BR_PCS_CFG_TX_SCR_DISABLE_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_CFG_TX_SCR_DISABLE, x)
+#define PCS25G_BR_PCS_CFG_TX_SCR_DISABLE_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_CFG_TX_SCR_DISABLE, x)
+
+/*      PCS_10GBASE_R:PCS_10GBR_CFG:PCS_SD_CFG */
+#define PCS25G_BR_PCS_SD_CFG(t)   __REG(TARGET_PCS25G_BR, t, 8, 0, 0, 1, 56, 4, 0, 1, 4)
+
+#define PCS25G_BR_PCS_SD_CFG_SD_SEL              BIT(8)
+#define PCS25G_BR_PCS_SD_CFG_SD_SEL_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_SD_CFG_SD_SEL, x)
+#define PCS25G_BR_PCS_SD_CFG_SD_SEL_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_SD_CFG_SD_SEL, x)
+
+#define PCS25G_BR_PCS_SD_CFG_SD_POL              BIT(4)
+#define PCS25G_BR_PCS_SD_CFG_SD_POL_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_SD_CFG_SD_POL, x)
+#define PCS25G_BR_PCS_SD_CFG_SD_POL_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_SD_CFG_SD_POL, x)
+
+#define PCS25G_BR_PCS_SD_CFG_SD_ENA              BIT(0)
+#define PCS25G_BR_PCS_SD_CFG_SD_ENA_SET(x)\
+	FIELD_PREP(PCS25G_BR_PCS_SD_CFG_SD_ENA, x)
+#define PCS25G_BR_PCS_SD_CFG_SD_ENA_GET(x)\
+	FIELD_GET(PCS25G_BR_PCS_SD_CFG_SD_ENA, x)
+
+/*      PCS_10GBASE_R:PCS_10GBR_CFG:PCS_CFG */
+#define PCS5G_BR_PCS_CFG(t)       __REG(TARGET_PCS5G_BR, t, 13, 0, 0, 1, 56, 0, 0, 1, 4)
+
+#define PCS5G_BR_PCS_CFG_PCS_ENA                 BIT(31)
+#define PCS5G_BR_PCS_CFG_PCS_ENA_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_PCS_ENA, x)
+#define PCS5G_BR_PCS_CFG_PCS_ENA_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_PCS_ENA, x)
+
+#define PCS5G_BR_PCS_CFG_PMA_LOOPBACK_ENA        BIT(30)
+#define PCS5G_BR_PCS_CFG_PMA_LOOPBACK_ENA_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_PMA_LOOPBACK_ENA, x)
+#define PCS5G_BR_PCS_CFG_PMA_LOOPBACK_ENA_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_PMA_LOOPBACK_ENA, x)
+
+#define PCS5G_BR_PCS_CFG_SH_CNT_MAX              GENMASK(29, 24)
+#define PCS5G_BR_PCS_CFG_SH_CNT_MAX_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_SH_CNT_MAX, x)
+#define PCS5G_BR_PCS_CFG_SH_CNT_MAX_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_SH_CNT_MAX, x)
+
+#define PCS5G_BR_PCS_CFG_RX_DATA_FLIP            BIT(18)
+#define PCS5G_BR_PCS_CFG_RX_DATA_FLIP_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_RX_DATA_FLIP, x)
+#define PCS5G_BR_PCS_CFG_RX_DATA_FLIP_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_RX_DATA_FLIP, x)
+
+#define PCS5G_BR_PCS_CFG_RESYNC_ENA              BIT(15)
+#define PCS5G_BR_PCS_CFG_RESYNC_ENA_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_RESYNC_ENA, x)
+#define PCS5G_BR_PCS_CFG_RESYNC_ENA_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_RESYNC_ENA, x)
+
+#define PCS5G_BR_PCS_CFG_LF_GEN_DIS              BIT(14)
+#define PCS5G_BR_PCS_CFG_LF_GEN_DIS_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_LF_GEN_DIS, x)
+#define PCS5G_BR_PCS_CFG_LF_GEN_DIS_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_LF_GEN_DIS, x)
+
+#define PCS5G_BR_PCS_CFG_RX_TEST_MODE            BIT(13)
+#define PCS5G_BR_PCS_CFG_RX_TEST_MODE_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_RX_TEST_MODE, x)
+#define PCS5G_BR_PCS_CFG_RX_TEST_MODE_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_RX_TEST_MODE, x)
+
+#define PCS5G_BR_PCS_CFG_RX_SCR_DISABLE          BIT(12)
+#define PCS5G_BR_PCS_CFG_RX_SCR_DISABLE_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_RX_SCR_DISABLE, x)
+#define PCS5G_BR_PCS_CFG_RX_SCR_DISABLE_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_RX_SCR_DISABLE, x)
+
+#define PCS5G_BR_PCS_CFG_TX_DATA_FLIP            BIT(7)
+#define PCS5G_BR_PCS_CFG_TX_DATA_FLIP_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_TX_DATA_FLIP, x)
+#define PCS5G_BR_PCS_CFG_TX_DATA_FLIP_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_TX_DATA_FLIP, x)
+
+#define PCS5G_BR_PCS_CFG_AN_LINK_CTRL_ENA        BIT(6)
+#define PCS5G_BR_PCS_CFG_AN_LINK_CTRL_ENA_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_AN_LINK_CTRL_ENA, x)
+#define PCS5G_BR_PCS_CFG_AN_LINK_CTRL_ENA_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_AN_LINK_CTRL_ENA, x)
+
+#define PCS5G_BR_PCS_CFG_TX_TEST_MODE            BIT(4)
+#define PCS5G_BR_PCS_CFG_TX_TEST_MODE_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_TX_TEST_MODE, x)
+#define PCS5G_BR_PCS_CFG_TX_TEST_MODE_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_TX_TEST_MODE, x)
+
+#define PCS5G_BR_PCS_CFG_TX_SCR_DISABLE          BIT(3)
+#define PCS5G_BR_PCS_CFG_TX_SCR_DISABLE_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_CFG_TX_SCR_DISABLE, x)
+#define PCS5G_BR_PCS_CFG_TX_SCR_DISABLE_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_CFG_TX_SCR_DISABLE, x)
+
+/*      PCS_10GBASE_R:PCS_10GBR_CFG:PCS_SD_CFG */
+#define PCS5G_BR_PCS_SD_CFG(t)    __REG(TARGET_PCS5G_BR, t, 13, 0, 0, 1, 56, 4, 0, 1, 4)
+
+#define PCS5G_BR_PCS_SD_CFG_SD_SEL               BIT(8)
+#define PCS5G_BR_PCS_SD_CFG_SD_SEL_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_SD_CFG_SD_SEL, x)
+#define PCS5G_BR_PCS_SD_CFG_SD_SEL_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_SD_CFG_SD_SEL, x)
+
+#define PCS5G_BR_PCS_SD_CFG_SD_POL               BIT(4)
+#define PCS5G_BR_PCS_SD_CFG_SD_POL_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_SD_CFG_SD_POL, x)
+#define PCS5G_BR_PCS_SD_CFG_SD_POL_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_SD_CFG_SD_POL, x)
+
+#define PCS5G_BR_PCS_SD_CFG_SD_ENA               BIT(0)
+#define PCS5G_BR_PCS_SD_CFG_SD_ENA_SET(x)\
+	FIELD_PREP(PCS5G_BR_PCS_SD_CFG_SD_ENA, x)
+#define PCS5G_BR_PCS_SD_CFG_SD_ENA_GET(x)\
+	FIELD_GET(PCS5G_BR_PCS_SD_CFG_SD_ENA, x)
+
+/*      PORT_CONF:HW_CFG:DEV5G_MODES */
+#define PORT_CONF_DEV5G_MODES     __REG(TARGET_PORT_CONF, 0, 1, 0, 0, 1, 24, 0, 0, 1, 4)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D0_MODE      BIT(0)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D0_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D0_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D0_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D0_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D1_MODE      BIT(1)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D1_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D1_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D1_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D1_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D2_MODE      BIT(2)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D2_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D2_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D2_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D2_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D3_MODE      BIT(3)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D3_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D3_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D3_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D3_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D4_MODE      BIT(4)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D4_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D4_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D4_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D4_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D5_MODE      BIT(5)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D5_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D5_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D5_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D5_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D6_MODE      BIT(6)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D6_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D6_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D6_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D6_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D7_MODE      BIT(7)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D7_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D7_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D7_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D7_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D8_MODE      BIT(8)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D8_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D8_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D8_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D8_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D9_MODE      BIT(9)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D9_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D9_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D9_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D9_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D10_MODE     BIT(10)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D10_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D10_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D10_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D10_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D11_MODE     BIT(11)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D11_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D11_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D11_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D11_MODE, x)
+
+#define PORT_CONF_DEV5G_MODES_DEV5G_D64_MODE     BIT(12)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D64_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV5G_MODES_DEV5G_D64_MODE, x)
+#define PORT_CONF_DEV5G_MODES_DEV5G_D64_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV5G_MODES_DEV5G_D64_MODE, x)
+
+/*      PORT_CONF:HW_CFG:DEV10G_MODES */
+#define PORT_CONF_DEV10G_MODES    __REG(TARGET_PORT_CONF, 0, 1, 0, 0, 1, 24, 4, 0, 1, 4)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D12_MODE   BIT(0)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D12_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D12_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D12_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D12_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D13_MODE   BIT(1)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D13_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D13_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D13_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D13_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D14_MODE   BIT(2)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D14_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D14_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D14_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D14_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D15_MODE   BIT(3)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D15_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D15_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D15_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D15_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D48_MODE   BIT(4)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D48_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D48_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D48_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D48_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D49_MODE   BIT(5)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D49_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D49_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D49_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D49_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D50_MODE   BIT(6)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D50_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D50_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D50_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D50_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D51_MODE   BIT(7)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D51_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D51_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D51_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D51_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D52_MODE   BIT(8)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D52_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D52_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D52_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D52_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D53_MODE   BIT(9)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D53_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D53_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D53_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D53_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D54_MODE   BIT(10)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D54_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D54_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D54_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D54_MODE, x)
+
+#define PORT_CONF_DEV10G_MODES_DEV10G_D55_MODE   BIT(11)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D55_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV10G_MODES_DEV10G_D55_MODE, x)
+#define PORT_CONF_DEV10G_MODES_DEV10G_D55_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV10G_MODES_DEV10G_D55_MODE, x)
+
+/*      PORT_CONF:HW_CFG:DEV25G_MODES */
+#define PORT_CONF_DEV25G_MODES    __REG(TARGET_PORT_CONF, 0, 1, 0, 0, 1, 24, 8, 0, 1, 4)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D56_MODE   BIT(0)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D56_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D56_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D56_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D56_MODE, x)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D57_MODE   BIT(1)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D57_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D57_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D57_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D57_MODE, x)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D58_MODE   BIT(2)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D58_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D58_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D58_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D58_MODE, x)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D59_MODE   BIT(3)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D59_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D59_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D59_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D59_MODE, x)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D60_MODE   BIT(4)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D60_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D60_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D60_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D60_MODE, x)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D61_MODE   BIT(5)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D61_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D61_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D61_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D61_MODE, x)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D62_MODE   BIT(6)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D62_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D62_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D62_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D62_MODE, x)
+
+#define PORT_CONF_DEV25G_MODES_DEV25G_D63_MODE   BIT(7)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D63_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_DEV25G_MODES_DEV25G_D63_MODE, x)
+#define PORT_CONF_DEV25G_MODES_DEV25G_D63_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_DEV25G_MODES_DEV25G_D63_MODE, x)
+
+/*      PORT_CONF:HW_CFG:QSGMII_ENA */
+#define PORT_CONF_QSGMII_ENA      __REG(TARGET_PORT_CONF, 0, 1, 0, 0, 1, 24, 12, 0, 1, 4)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_0        BIT(0)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_0_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_0, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_0_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_0, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_1        BIT(1)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_1_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_1, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_1_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_1, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_2        BIT(2)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_2_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_2, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_2_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_2, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_3        BIT(3)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_3_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_3, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_3_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_3, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_4        BIT(4)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_4_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_4, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_4_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_4, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_5        BIT(5)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_5_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_5, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_5_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_5, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_6        BIT(6)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_6_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_6, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_6_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_6, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_7        BIT(7)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_7_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_7, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_7_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_7, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_8        BIT(8)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_8_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_8, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_8_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_8, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_9        BIT(9)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_9_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_9, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_9_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_9, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_10       BIT(10)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_10_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_10, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_10_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_10, x)
+
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_11       BIT(11)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_11_SET(x)\
+	FIELD_PREP(PORT_CONF_QSGMII_ENA_QSGMII_ENA_11, x)
+#define PORT_CONF_QSGMII_ENA_QSGMII_ENA_11_GET(x)\
+	FIELD_GET(PORT_CONF_QSGMII_ENA_QSGMII_ENA_11, x)
+
+/*      PORT_CONF:USGMII_CFG_STAT:USGMII_CFG */
+#define PORT_CONF_USGMII_CFG(g)   __REG(TARGET_PORT_CONF, 0, 1, 72, g, 6, 8, 0, 0, 1, 4)
+
+#define PORT_CONF_USGMII_CFG_BYPASS_SCRAM        BIT(9)
+#define PORT_CONF_USGMII_CFG_BYPASS_SCRAM_SET(x)\
+	FIELD_PREP(PORT_CONF_USGMII_CFG_BYPASS_SCRAM, x)
+#define PORT_CONF_USGMII_CFG_BYPASS_SCRAM_GET(x)\
+	FIELD_GET(PORT_CONF_USGMII_CFG_BYPASS_SCRAM, x)
+
+#define PORT_CONF_USGMII_CFG_BYPASS_DESCRAM      BIT(8)
+#define PORT_CONF_USGMII_CFG_BYPASS_DESCRAM_SET(x)\
+	FIELD_PREP(PORT_CONF_USGMII_CFG_BYPASS_DESCRAM, x)
+#define PORT_CONF_USGMII_CFG_BYPASS_DESCRAM_GET(x)\
+	FIELD_GET(PORT_CONF_USGMII_CFG_BYPASS_DESCRAM, x)
+
+#define PORT_CONF_USGMII_CFG_FLIP_LANES          BIT(7)
+#define PORT_CONF_USGMII_CFG_FLIP_LANES_SET(x)\
+	FIELD_PREP(PORT_CONF_USGMII_CFG_FLIP_LANES, x)
+#define PORT_CONF_USGMII_CFG_FLIP_LANES_GET(x)\
+	FIELD_GET(PORT_CONF_USGMII_CFG_FLIP_LANES, x)
+
+#define PORT_CONF_USGMII_CFG_SHYST_DIS           BIT(6)
+#define PORT_CONF_USGMII_CFG_SHYST_DIS_SET(x)\
+	FIELD_PREP(PORT_CONF_USGMII_CFG_SHYST_DIS, x)
+#define PORT_CONF_USGMII_CFG_SHYST_DIS_GET(x)\
+	FIELD_GET(PORT_CONF_USGMII_CFG_SHYST_DIS, x)
+
+#define PORT_CONF_USGMII_CFG_E_DET_ENA           BIT(5)
+#define PORT_CONF_USGMII_CFG_E_DET_ENA_SET(x)\
+	FIELD_PREP(PORT_CONF_USGMII_CFG_E_DET_ENA, x)
+#define PORT_CONF_USGMII_CFG_E_DET_ENA_GET(x)\
+	FIELD_GET(PORT_CONF_USGMII_CFG_E_DET_ENA, x)
+
+#define PORT_CONF_USGMII_CFG_USE_I1_ENA          BIT(4)
+#define PORT_CONF_USGMII_CFG_USE_I1_ENA_SET(x)\
+	FIELD_PREP(PORT_CONF_USGMII_CFG_USE_I1_ENA, x)
+#define PORT_CONF_USGMII_CFG_USE_I1_ENA_GET(x)\
+	FIELD_GET(PORT_CONF_USGMII_CFG_USE_I1_ENA, x)
+
+#define PORT_CONF_USGMII_CFG_QUAD_MODE           BIT(1)
+#define PORT_CONF_USGMII_CFG_QUAD_MODE_SET(x)\
+	FIELD_PREP(PORT_CONF_USGMII_CFG_QUAD_MODE, x)
+#define PORT_CONF_USGMII_CFG_QUAD_MODE_GET(x)\
+	FIELD_GET(PORT_CONF_USGMII_CFG_QUAD_MODE, x)
+
+/*      QFWD:SYSTEM:SWITCH_PORT_MODE */
+#define QFWD_SWITCH_PORT_MODE(r)  __REG(TARGET_QFWD, 0, 1, 0, 0, 1, 340, 0, r, 70, 4)
+
+#define QFWD_SWITCH_PORT_MODE_PORT_ENA           BIT(19)
+#define QFWD_SWITCH_PORT_MODE_PORT_ENA_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_PORT_ENA, x)
+#define QFWD_SWITCH_PORT_MODE_PORT_ENA_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_PORT_ENA, x)
+
+#define QFWD_SWITCH_PORT_MODE_FWD_URGENCY        GENMASK(18, 10)
+#define QFWD_SWITCH_PORT_MODE_FWD_URGENCY_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_FWD_URGENCY, x)
+#define QFWD_SWITCH_PORT_MODE_FWD_URGENCY_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_FWD_URGENCY, x)
+
+#define QFWD_SWITCH_PORT_MODE_YEL_RSRVD          GENMASK(9, 6)
+#define QFWD_SWITCH_PORT_MODE_YEL_RSRVD_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_YEL_RSRVD, x)
+#define QFWD_SWITCH_PORT_MODE_YEL_RSRVD_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_YEL_RSRVD, x)
+
+#define QFWD_SWITCH_PORT_MODE_INGRESS_DROP_MODE  BIT(5)
+#define QFWD_SWITCH_PORT_MODE_INGRESS_DROP_MODE_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_INGRESS_DROP_MODE, x)
+#define QFWD_SWITCH_PORT_MODE_INGRESS_DROP_MODE_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_INGRESS_DROP_MODE, x)
+
+#define QFWD_SWITCH_PORT_MODE_IGR_NO_SHARING     BIT(4)
+#define QFWD_SWITCH_PORT_MODE_IGR_NO_SHARING_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_IGR_NO_SHARING, x)
+#define QFWD_SWITCH_PORT_MODE_IGR_NO_SHARING_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_IGR_NO_SHARING, x)
+
+#define QFWD_SWITCH_PORT_MODE_EGR_NO_SHARING     BIT(3)
+#define QFWD_SWITCH_PORT_MODE_EGR_NO_SHARING_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_EGR_NO_SHARING, x)
+#define QFWD_SWITCH_PORT_MODE_EGR_NO_SHARING_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_EGR_NO_SHARING, x)
+
+#define QFWD_SWITCH_PORT_MODE_EGRESS_DROP_MODE   BIT(2)
+#define QFWD_SWITCH_PORT_MODE_EGRESS_DROP_MODE_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_EGRESS_DROP_MODE, x)
+#define QFWD_SWITCH_PORT_MODE_EGRESS_DROP_MODE_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_EGRESS_DROP_MODE, x)
+
+#define QFWD_SWITCH_PORT_MODE_EGRESS_RSRV_DIS    BIT(1)
+#define QFWD_SWITCH_PORT_MODE_EGRESS_RSRV_DIS_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_EGRESS_RSRV_DIS, x)
+#define QFWD_SWITCH_PORT_MODE_EGRESS_RSRV_DIS_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_EGRESS_RSRV_DIS, x)
+
+#define QFWD_SWITCH_PORT_MODE_LEARNALL_MORE      BIT(0)
+#define QFWD_SWITCH_PORT_MODE_LEARNALL_MORE_SET(x)\
+	FIELD_PREP(QFWD_SWITCH_PORT_MODE_LEARNALL_MORE, x)
+#define QFWD_SWITCH_PORT_MODE_LEARNALL_MORE_GET(x)\
+	FIELD_GET(QFWD_SWITCH_PORT_MODE_LEARNALL_MORE, x)
+
+/*      QRES:RES_CTRL:RES_CFG */
+#define QRES_RES_CFG(g)           __REG(TARGET_QRES, 0, 1, 0, g, 5120, 16, 0, 0, 1, 4)
+
+#define QRES_RES_CFG_WM_HIGH                     GENMASK(11, 0)
+#define QRES_RES_CFG_WM_HIGH_SET(x)\
+	FIELD_PREP(QRES_RES_CFG_WM_HIGH, x)
+#define QRES_RES_CFG_WM_HIGH_GET(x)\
+	FIELD_GET(QRES_RES_CFG_WM_HIGH, x)
+
+/*      QRES:RES_CTRL:RES_STAT */
+#define QRES_RES_STAT(g)          __REG(TARGET_QRES, 0, 1, 0, g, 5120, 16, 4, 0, 1, 4)
+
+#define QRES_RES_STAT_MAXUSE                     GENMASK(20, 0)
+#define QRES_RES_STAT_MAXUSE_SET(x)\
+	FIELD_PREP(QRES_RES_STAT_MAXUSE, x)
+#define QRES_RES_STAT_MAXUSE_GET(x)\
+	FIELD_GET(QRES_RES_STAT_MAXUSE, x)
+
+/*      QRES:RES_CTRL:RES_STAT_CUR */
+#define QRES_RES_STAT_CUR(g)      __REG(TARGET_QRES, 0, 1, 0, g, 5120, 16, 8, 0, 1, 4)
+
+#define QRES_RES_STAT_CUR_INUSE                  GENMASK(20, 0)
+#define QRES_RES_STAT_CUR_INUSE_SET(x)\
+	FIELD_PREP(QRES_RES_STAT_CUR_INUSE, x)
+#define QRES_RES_STAT_CUR_INUSE_GET(x)\
+	FIELD_GET(QRES_RES_STAT_CUR_INUSE, x)
+
+/*      DEVCPU_QS:XTR:XTR_GRP_CFG */
+#define QS_XTR_GRP_CFG(r)         __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 0, r, 2, 4)
+
+#define QS_XTR_GRP_CFG_MODE                      GENMASK(3, 2)
+#define QS_XTR_GRP_CFG_MODE_SET(x)\
+	FIELD_PREP(QS_XTR_GRP_CFG_MODE, x)
+#define QS_XTR_GRP_CFG_MODE_GET(x)\
+	FIELD_GET(QS_XTR_GRP_CFG_MODE, x)
+
+#define QS_XTR_GRP_CFG_STATUS_WORD_POS           BIT(1)
+#define QS_XTR_GRP_CFG_STATUS_WORD_POS_SET(x)\
+	FIELD_PREP(QS_XTR_GRP_CFG_STATUS_WORD_POS, x)
+#define QS_XTR_GRP_CFG_STATUS_WORD_POS_GET(x)\
+	FIELD_GET(QS_XTR_GRP_CFG_STATUS_WORD_POS, x)
+
+#define QS_XTR_GRP_CFG_BYTE_SWAP                 BIT(0)
+#define QS_XTR_GRP_CFG_BYTE_SWAP_SET(x)\
+	FIELD_PREP(QS_XTR_GRP_CFG_BYTE_SWAP, x)
+#define QS_XTR_GRP_CFG_BYTE_SWAP_GET(x)\
+	FIELD_GET(QS_XTR_GRP_CFG_BYTE_SWAP, x)
+
+/*      DEVCPU_QS:XTR:XTR_RD */
+#define QS_XTR_RD(r)              __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 8, r, 2, 4)
+
+/*      DEVCPU_QS:XTR:XTR_FLUSH */
+#define QS_XTR_FLUSH              __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 24, 0, 1, 4)
+
+#define QS_XTR_FLUSH_FLUSH                       GENMASK(1, 0)
+#define QS_XTR_FLUSH_FLUSH_SET(x)\
+	FIELD_PREP(QS_XTR_FLUSH_FLUSH, x)
+#define QS_XTR_FLUSH_FLUSH_GET(x)\
+	FIELD_GET(QS_XTR_FLUSH_FLUSH, x)
+
+/*      DEVCPU_QS:XTR:XTR_DATA_PRESENT */
+#define QS_XTR_DATA_PRESENT       __REG(TARGET_QS, 0, 1, 0, 0, 1, 36, 28, 0, 1, 4)
+
+#define QS_XTR_DATA_PRESENT_DATA_PRESENT         GENMASK(1, 0)
+#define QS_XTR_DATA_PRESENT_DATA_PRESENT_SET(x)\
+	FIELD_PREP(QS_XTR_DATA_PRESENT_DATA_PRESENT, x)
+#define QS_XTR_DATA_PRESENT_DATA_PRESENT_GET(x)\
+	FIELD_GET(QS_XTR_DATA_PRESENT_DATA_PRESENT, x)
+
+/*      DEVCPU_QS:INJ:INJ_GRP_CFG */
+#define QS_INJ_GRP_CFG(r)         __REG(TARGET_QS, 0, 1, 36, 0, 1, 40, 0, r, 2, 4)
+
+#define QS_INJ_GRP_CFG_MODE                      GENMASK(3, 2)
+#define QS_INJ_GRP_CFG_MODE_SET(x)\
+	FIELD_PREP(QS_INJ_GRP_CFG_MODE, x)
+#define QS_INJ_GRP_CFG_MODE_GET(x)\
+	FIELD_GET(QS_INJ_GRP_CFG_MODE, x)
+
+#define QS_INJ_GRP_CFG_BYTE_SWAP                 BIT(0)
+#define QS_INJ_GRP_CFG_BYTE_SWAP_SET(x)\
+	FIELD_PREP(QS_INJ_GRP_CFG_BYTE_SWAP, x)
+#define QS_INJ_GRP_CFG_BYTE_SWAP_GET(x)\
+	FIELD_GET(QS_INJ_GRP_CFG_BYTE_SWAP, x)
+
+/*      DEVCPU_QS:INJ:INJ_WR */
+#define QS_INJ_WR(r)              __REG(TARGET_QS, 0, 1, 36, 0, 1, 40, 8, r, 2, 4)
+
+/*      DEVCPU_QS:INJ:INJ_CTRL */
+#define QS_INJ_CTRL(r)            __REG(TARGET_QS, 0, 1, 36, 0, 1, 40, 16, r, 2, 4)
+
+#define QS_INJ_CTRL_GAP_SIZE                     GENMASK(24, 21)
+#define QS_INJ_CTRL_GAP_SIZE_SET(x)\
+	FIELD_PREP(QS_INJ_CTRL_GAP_SIZE, x)
+#define QS_INJ_CTRL_GAP_SIZE_GET(x)\
+	FIELD_GET(QS_INJ_CTRL_GAP_SIZE, x)
+
+#define QS_INJ_CTRL_ABORT                        BIT(20)
+#define QS_INJ_CTRL_ABORT_SET(x)\
+	FIELD_PREP(QS_INJ_CTRL_ABORT, x)
+#define QS_INJ_CTRL_ABORT_GET(x)\
+	FIELD_GET(QS_INJ_CTRL_ABORT, x)
+
+#define QS_INJ_CTRL_EOF                          BIT(19)
+#define QS_INJ_CTRL_EOF_SET(x)\
+	FIELD_PREP(QS_INJ_CTRL_EOF, x)
+#define QS_INJ_CTRL_EOF_GET(x)\
+	FIELD_GET(QS_INJ_CTRL_EOF, x)
+
+#define QS_INJ_CTRL_SOF                          BIT(18)
+#define QS_INJ_CTRL_SOF_SET(x)\
+	FIELD_PREP(QS_INJ_CTRL_SOF, x)
+#define QS_INJ_CTRL_SOF_GET(x)\
+	FIELD_GET(QS_INJ_CTRL_SOF, x)
+
+#define QS_INJ_CTRL_VLD_BYTES                    GENMASK(17, 16)
+#define QS_INJ_CTRL_VLD_BYTES_SET(x)\
+	FIELD_PREP(QS_INJ_CTRL_VLD_BYTES, x)
+#define QS_INJ_CTRL_VLD_BYTES_GET(x)\
+	FIELD_GET(QS_INJ_CTRL_VLD_BYTES, x)
+
+/*      DEVCPU_QS:INJ:INJ_STATUS */
+#define QS_INJ_STATUS             __REG(TARGET_QS, 0, 1, 36, 0, 1, 40, 24, 0, 1, 4)
+
+#define QS_INJ_STATUS_WMARK_REACHED              GENMASK(5, 4)
+#define QS_INJ_STATUS_WMARK_REACHED_SET(x)\
+	FIELD_PREP(QS_INJ_STATUS_WMARK_REACHED, x)
+#define QS_INJ_STATUS_WMARK_REACHED_GET(x)\
+	FIELD_GET(QS_INJ_STATUS_WMARK_REACHED, x)
+
+#define QS_INJ_STATUS_FIFO_RDY                   GENMASK(3, 2)
+#define QS_INJ_STATUS_FIFO_RDY_SET(x)\
+	FIELD_PREP(QS_INJ_STATUS_FIFO_RDY, x)
+#define QS_INJ_STATUS_FIFO_RDY_GET(x)\
+	FIELD_GET(QS_INJ_STATUS_FIFO_RDY, x)
+
+#define QS_INJ_STATUS_INJ_IN_PROGRESS            GENMASK(1, 0)
+#define QS_INJ_STATUS_INJ_IN_PROGRESS_SET(x)\
+	FIELD_PREP(QS_INJ_STATUS_INJ_IN_PROGRESS, x)
+#define QS_INJ_STATUS_INJ_IN_PROGRESS_GET(x)\
+	FIELD_GET(QS_INJ_STATUS_INJ_IN_PROGRESS, x)
+
+/*      QSYS:PAUSE_CFG:PAUSE_CFG */
+#define QSYS_PAUSE_CFG(r)         __REG(TARGET_QSYS, 0, 1, 544, 0, 1, 1128, 0, r, 70, 4)
+
+#define QSYS_PAUSE_CFG_PAUSE_START               GENMASK(25, 14)
+#define QSYS_PAUSE_CFG_PAUSE_START_SET(x)\
+	FIELD_PREP(QSYS_PAUSE_CFG_PAUSE_START, x)
+#define QSYS_PAUSE_CFG_PAUSE_START_GET(x)\
+	FIELD_GET(QSYS_PAUSE_CFG_PAUSE_START, x)
+
+#define QSYS_PAUSE_CFG_PAUSE_STOP                GENMASK(13, 2)
+#define QSYS_PAUSE_CFG_PAUSE_STOP_SET(x)\
+	FIELD_PREP(QSYS_PAUSE_CFG_PAUSE_STOP, x)
+#define QSYS_PAUSE_CFG_PAUSE_STOP_GET(x)\
+	FIELD_GET(QSYS_PAUSE_CFG_PAUSE_STOP, x)
+
+#define QSYS_PAUSE_CFG_PAUSE_ENA                 BIT(1)
+#define QSYS_PAUSE_CFG_PAUSE_ENA_SET(x)\
+	FIELD_PREP(QSYS_PAUSE_CFG_PAUSE_ENA, x)
+#define QSYS_PAUSE_CFG_PAUSE_ENA_GET(x)\
+	FIELD_GET(QSYS_PAUSE_CFG_PAUSE_ENA, x)
+
+#define QSYS_PAUSE_CFG_AGGRESSIVE_TAILDROP_ENA   BIT(0)
+#define QSYS_PAUSE_CFG_AGGRESSIVE_TAILDROP_ENA_SET(x)\
+	FIELD_PREP(QSYS_PAUSE_CFG_AGGRESSIVE_TAILDROP_ENA, x)
+#define QSYS_PAUSE_CFG_AGGRESSIVE_TAILDROP_ENA_GET(x)\
+	FIELD_GET(QSYS_PAUSE_CFG_AGGRESSIVE_TAILDROP_ENA, x)
+
+/*      QSYS:PAUSE_CFG:ATOP */
+#define QSYS_ATOP(r)              __REG(TARGET_QSYS, 0, 1, 544, 0, 1, 1128, 284, r, 70, 4)
+
+#define QSYS_ATOP_ATOP                           GENMASK(11, 0)
+#define QSYS_ATOP_ATOP_SET(x)\
+	FIELD_PREP(QSYS_ATOP_ATOP, x)
+#define QSYS_ATOP_ATOP_GET(x)\
+	FIELD_GET(QSYS_ATOP_ATOP, x)
+
+/*      QSYS:PAUSE_CFG:FWD_PRESSURE */
+#define QSYS_FWD_PRESSURE(r)      __REG(TARGET_QSYS, 0, 1, 544, 0, 1, 1128, 564, r, 70, 4)
+
+#define QSYS_FWD_PRESSURE_FWD_PRESSURE           GENMASK(11, 1)
+#define QSYS_FWD_PRESSURE_FWD_PRESSURE_SET(x)\
+	FIELD_PREP(QSYS_FWD_PRESSURE_FWD_PRESSURE, x)
+#define QSYS_FWD_PRESSURE_FWD_PRESSURE_GET(x)\
+	FIELD_GET(QSYS_FWD_PRESSURE_FWD_PRESSURE, x)
+
+#define QSYS_FWD_PRESSURE_FWD_PRESSURE_DIS       BIT(0)
+#define QSYS_FWD_PRESSURE_FWD_PRESSURE_DIS_SET(x)\
+	FIELD_PREP(QSYS_FWD_PRESSURE_FWD_PRESSURE_DIS, x)
+#define QSYS_FWD_PRESSURE_FWD_PRESSURE_DIS_GET(x)\
+	FIELD_GET(QSYS_FWD_PRESSURE_FWD_PRESSURE_DIS, x)
+
+/*      QSYS:PAUSE_CFG:ATOP_TOT_CFG */
+#define QSYS_ATOP_TOT_CFG         __REG(TARGET_QSYS, 0, 1, 544, 0, 1, 1128, 844, 0, 1, 4)
+
+#define QSYS_ATOP_TOT_CFG_ATOP_TOT               GENMASK(11, 0)
+#define QSYS_ATOP_TOT_CFG_ATOP_TOT_SET(x)\
+	FIELD_PREP(QSYS_ATOP_TOT_CFG_ATOP_TOT, x)
+#define QSYS_ATOP_TOT_CFG_ATOP_TOT_GET(x)\
+	FIELD_GET(QSYS_ATOP_TOT_CFG_ATOP_TOT, x)
+
+/*      QSYS:CALCFG:CAL_AUTO */
+#define QSYS_CAL_AUTO(r)          __REG(TARGET_QSYS, 0, 1, 2304, 0, 1, 40, 0, r, 7, 4)
+
+#define QSYS_CAL_AUTO_CAL_AUTO                   GENMASK(29, 0)
+#define QSYS_CAL_AUTO_CAL_AUTO_SET(x)\
+	FIELD_PREP(QSYS_CAL_AUTO_CAL_AUTO, x)
+#define QSYS_CAL_AUTO_CAL_AUTO_GET(x)\
+	FIELD_GET(QSYS_CAL_AUTO_CAL_AUTO, x)
+
+/*      QSYS:CALCFG:CAL_CTRL */
+#define QSYS_CAL_CTRL             __REG(TARGET_QSYS, 0, 1, 2304, 0, 1, 40, 36, 0, 1, 4)
+
+#define QSYS_CAL_CTRL_CAL_MODE                   GENMASK(14, 11)
+#define QSYS_CAL_CTRL_CAL_MODE_SET(x)\
+	FIELD_PREP(QSYS_CAL_CTRL_CAL_MODE, x)
+#define QSYS_CAL_CTRL_CAL_MODE_GET(x)\
+	FIELD_GET(QSYS_CAL_CTRL_CAL_MODE, x)
+
+#define QSYS_CAL_CTRL_CAL_AUTO_GRANT_RATE        GENMASK(10, 1)
+#define QSYS_CAL_CTRL_CAL_AUTO_GRANT_RATE_SET(x)\
+	FIELD_PREP(QSYS_CAL_CTRL_CAL_AUTO_GRANT_RATE, x)
+#define QSYS_CAL_CTRL_CAL_AUTO_GRANT_RATE_GET(x)\
+	FIELD_GET(QSYS_CAL_CTRL_CAL_AUTO_GRANT_RATE, x)
+
+#define QSYS_CAL_CTRL_CAL_AUTO_ERROR             BIT(0)
+#define QSYS_CAL_CTRL_CAL_AUTO_ERROR_SET(x)\
+	FIELD_PREP(QSYS_CAL_CTRL_CAL_AUTO_ERROR, x)
+#define QSYS_CAL_CTRL_CAL_AUTO_ERROR_GET(x)\
+	FIELD_GET(QSYS_CAL_CTRL_CAL_AUTO_ERROR, x)
+
+/*      QSYS:RAM_CTRL:RAM_INIT */
+#define QSYS_RAM_INIT             __REG(TARGET_QSYS, 0, 1, 2344, 0, 1, 4, 0, 0, 1, 4)
+
+#define QSYS_RAM_INIT_RAM_INIT                   BIT(1)
+#define QSYS_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(QSYS_RAM_INIT_RAM_INIT, x)
+#define QSYS_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(QSYS_RAM_INIT_RAM_INIT, x)
+
+#define QSYS_RAM_INIT_RAM_CFG_HOOK               BIT(0)
+#define QSYS_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(QSYS_RAM_INIT_RAM_CFG_HOOK, x)
+#define QSYS_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(QSYS_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      REW:COMMON:OWN_UPSID */
+#define REW_OWN_UPSID(r)          __REG(TARGET_REW, 0, 1, 387264, 0, 1, 1232, 0, r, 3, 4)
+
+#define REW_OWN_UPSID_OWN_UPSID                  GENMASK(4, 0)
+#define REW_OWN_UPSID_OWN_UPSID_SET(x)\
+	FIELD_PREP(REW_OWN_UPSID_OWN_UPSID, x)
+#define REW_OWN_UPSID_OWN_UPSID_GET(x)\
+	FIELD_GET(REW_OWN_UPSID_OWN_UPSID, x)
+
+/*      REW:PORT:PORT_VLAN_CFG */
+#define REW_PORT_VLAN_CFG(g)      __REG(TARGET_REW, 0, 1, 360448, g, 70, 256, 0, 0, 1, 4)
+
+#define REW_PORT_VLAN_CFG_PORT_PCP               GENMASK(15, 13)
+#define REW_PORT_VLAN_CFG_PORT_PCP_SET(x)\
+	FIELD_PREP(REW_PORT_VLAN_CFG_PORT_PCP, x)
+#define REW_PORT_VLAN_CFG_PORT_PCP_GET(x)\
+	FIELD_GET(REW_PORT_VLAN_CFG_PORT_PCP, x)
+
+#define REW_PORT_VLAN_CFG_PORT_DEI               BIT(12)
+#define REW_PORT_VLAN_CFG_PORT_DEI_SET(x)\
+	FIELD_PREP(REW_PORT_VLAN_CFG_PORT_DEI, x)
+#define REW_PORT_VLAN_CFG_PORT_DEI_GET(x)\
+	FIELD_GET(REW_PORT_VLAN_CFG_PORT_DEI, x)
+
+#define REW_PORT_VLAN_CFG_PORT_VID               GENMASK(11, 0)
+#define REW_PORT_VLAN_CFG_PORT_VID_SET(x)\
+	FIELD_PREP(REW_PORT_VLAN_CFG_PORT_VID, x)
+#define REW_PORT_VLAN_CFG_PORT_VID_GET(x)\
+	FIELD_GET(REW_PORT_VLAN_CFG_PORT_VID, x)
+
+/*      REW:PORT:TAG_CTRL */
+#define REW_TAG_CTRL(g)           __REG(TARGET_REW, 0, 1, 360448, g, 70, 256, 132, 0, 1, 4)
+
+#define REW_TAG_CTRL_TAG_CFG_OBEY_WAS_TAGGED     BIT(13)
+#define REW_TAG_CTRL_TAG_CFG_OBEY_WAS_TAGGED_SET(x)\
+	FIELD_PREP(REW_TAG_CTRL_TAG_CFG_OBEY_WAS_TAGGED, x)
+#define REW_TAG_CTRL_TAG_CFG_OBEY_WAS_TAGGED_GET(x)\
+	FIELD_GET(REW_TAG_CTRL_TAG_CFG_OBEY_WAS_TAGGED, x)
+
+#define REW_TAG_CTRL_TAG_CFG                     GENMASK(12, 11)
+#define REW_TAG_CTRL_TAG_CFG_SET(x)\
+	FIELD_PREP(REW_TAG_CTRL_TAG_CFG, x)
+#define REW_TAG_CTRL_TAG_CFG_GET(x)\
+	FIELD_GET(REW_TAG_CTRL_TAG_CFG, x)
+
+#define REW_TAG_CTRL_TAG_TPID_CFG                GENMASK(10, 8)
+#define REW_TAG_CTRL_TAG_TPID_CFG_SET(x)\
+	FIELD_PREP(REW_TAG_CTRL_TAG_TPID_CFG, x)
+#define REW_TAG_CTRL_TAG_TPID_CFG_GET(x)\
+	FIELD_GET(REW_TAG_CTRL_TAG_TPID_CFG, x)
+
+#define REW_TAG_CTRL_TAG_VID_CFG                 GENMASK(7, 6)
+#define REW_TAG_CTRL_TAG_VID_CFG_SET(x)\
+	FIELD_PREP(REW_TAG_CTRL_TAG_VID_CFG, x)
+#define REW_TAG_CTRL_TAG_VID_CFG_GET(x)\
+	FIELD_GET(REW_TAG_CTRL_TAG_VID_CFG, x)
+
+#define REW_TAG_CTRL_TAG_PCP_CFG                 GENMASK(5, 3)
+#define REW_TAG_CTRL_TAG_PCP_CFG_SET(x)\
+	FIELD_PREP(REW_TAG_CTRL_TAG_PCP_CFG, x)
+#define REW_TAG_CTRL_TAG_PCP_CFG_GET(x)\
+	FIELD_GET(REW_TAG_CTRL_TAG_PCP_CFG, x)
+
+#define REW_TAG_CTRL_TAG_DEI_CFG                 GENMASK(2, 0)
+#define REW_TAG_CTRL_TAG_DEI_CFG_SET(x)\
+	FIELD_PREP(REW_TAG_CTRL_TAG_DEI_CFG, x)
+#define REW_TAG_CTRL_TAG_DEI_CFG_GET(x)\
+	FIELD_GET(REW_TAG_CTRL_TAG_DEI_CFG, x)
+
+/*      REW:RAM_CTRL:RAM_INIT */
+#define REW_RAM_INIT              __REG(TARGET_REW, 0, 1, 378696, 0, 1, 4, 0, 0, 1, 4)
+
+#define REW_RAM_INIT_RAM_INIT                    BIT(1)
+#define REW_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(REW_RAM_INIT_RAM_INIT, x)
+#define REW_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(REW_RAM_INIT_RAM_INIT, x)
+
+#define REW_RAM_INIT_RAM_CFG_HOOK                BIT(0)
+#define REW_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(REW_RAM_INIT_RAM_CFG_HOOK, x)
+#define REW_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(REW_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      VCAP_SUPER:RAM_CTRL:RAM_INIT */
+#define VCAP_SUPER_RAM_INIT       __REG(TARGET_VCAP_SUPER, 0, 1, 1120, 0, 1, 4, 0, 0, 1, 4)
+
+#define VCAP_SUPER_RAM_INIT_RAM_INIT             BIT(1)
+#define VCAP_SUPER_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(VCAP_SUPER_RAM_INIT_RAM_INIT, x)
+#define VCAP_SUPER_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(VCAP_SUPER_RAM_INIT_RAM_INIT, x)
+
+#define VCAP_SUPER_RAM_INIT_RAM_CFG_HOOK         BIT(0)
+#define VCAP_SUPER_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(VCAP_SUPER_RAM_INIT_RAM_CFG_HOOK, x)
+#define VCAP_SUPER_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(VCAP_SUPER_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      VOP:RAM_CTRL:RAM_INIT */
+#define VOP_RAM_INIT              __REG(TARGET_VOP, 0, 1, 279176, 0, 1, 4, 0, 0, 1, 4)
+
+#define VOP_RAM_INIT_RAM_INIT                    BIT(1)
+#define VOP_RAM_INIT_RAM_INIT_SET(x)\
+	FIELD_PREP(VOP_RAM_INIT_RAM_INIT, x)
+#define VOP_RAM_INIT_RAM_INIT_GET(x)\
+	FIELD_GET(VOP_RAM_INIT_RAM_INIT, x)
+
+#define VOP_RAM_INIT_RAM_CFG_HOOK                BIT(0)
+#define VOP_RAM_INIT_RAM_CFG_HOOK_SET(x)\
+	FIELD_PREP(VOP_RAM_INIT_RAM_CFG_HOOK, x)
+#define VOP_RAM_INIT_RAM_CFG_HOOK_GET(x)\
+	FIELD_GET(VOP_RAM_INIT_RAM_CFG_HOOK, x)
+
+/*      XQS:SYSTEM:STAT_CFG */
+#define XQS_STAT_CFG              __REG(TARGET_XQS, 0, 1, 6768, 0, 1, 872, 860, 0, 1, 4)
+
+#define XQS_STAT_CFG_STAT_CLEAR_SHOT             GENMASK(21, 18)
+#define XQS_STAT_CFG_STAT_CLEAR_SHOT_SET(x)\
+	FIELD_PREP(XQS_STAT_CFG_STAT_CLEAR_SHOT, x)
+#define XQS_STAT_CFG_STAT_CLEAR_SHOT_GET(x)\
+	FIELD_GET(XQS_STAT_CFG_STAT_CLEAR_SHOT, x)
+
+#define XQS_STAT_CFG_STAT_VIEW                   GENMASK(17, 5)
+#define XQS_STAT_CFG_STAT_VIEW_SET(x)\
+	FIELD_PREP(XQS_STAT_CFG_STAT_VIEW, x)
+#define XQS_STAT_CFG_STAT_VIEW_GET(x)\
+	FIELD_GET(XQS_STAT_CFG_STAT_VIEW, x)
+
+#define XQS_STAT_CFG_STAT_SRV_PKT_ONLY           BIT(4)
+#define XQS_STAT_CFG_STAT_SRV_PKT_ONLY_SET(x)\
+	FIELD_PREP(XQS_STAT_CFG_STAT_SRV_PKT_ONLY, x)
+#define XQS_STAT_CFG_STAT_SRV_PKT_ONLY_GET(x)\
+	FIELD_GET(XQS_STAT_CFG_STAT_SRV_PKT_ONLY, x)
+
+#define XQS_STAT_CFG_STAT_WRAP_DIS               GENMASK(3, 0)
+#define XQS_STAT_CFG_STAT_WRAP_DIS_SET(x)\
+	FIELD_PREP(XQS_STAT_CFG_STAT_WRAP_DIS, x)
+#define XQS_STAT_CFG_STAT_WRAP_DIS_GET(x)\
+	FIELD_GET(XQS_STAT_CFG_STAT_WRAP_DIS, x)
+
+/*      XQS:QLIMIT_SHR:QLIMIT_SHR_TOP_CFG */
+#define XQS_QLIMIT_SHR_TOP_CFG(g) __REG(TARGET_XQS, 0, 1, 7936, g, 4, 48, 0, 0, 1, 4)
+
+#define XQS_QLIMIT_SHR_TOP_CFG_QLIMIT_SHR_TOP    GENMASK(14, 0)
+#define XQS_QLIMIT_SHR_TOP_CFG_QLIMIT_SHR_TOP_SET(x)\
+	FIELD_PREP(XQS_QLIMIT_SHR_TOP_CFG_QLIMIT_SHR_TOP, x)
+#define XQS_QLIMIT_SHR_TOP_CFG_QLIMIT_SHR_TOP_GET(x)\
+	FIELD_GET(XQS_QLIMIT_SHR_TOP_CFG_QLIMIT_SHR_TOP, x)
+
+/*      XQS:QLIMIT_SHR:QLIMIT_SHR_ATOP_CFG */
+#define XQS_QLIMIT_SHR_ATOP_CFG(g) __REG(TARGET_XQS, 0, 1, 7936, g, 4, 48, 4, 0, 1, 4)
+
+#define XQS_QLIMIT_SHR_ATOP_CFG_QLIMIT_SHR_ATOP  GENMASK(14, 0)
+#define XQS_QLIMIT_SHR_ATOP_CFG_QLIMIT_SHR_ATOP_SET(x)\
+	FIELD_PREP(XQS_QLIMIT_SHR_ATOP_CFG_QLIMIT_SHR_ATOP, x)
+#define XQS_QLIMIT_SHR_ATOP_CFG_QLIMIT_SHR_ATOP_GET(x)\
+	FIELD_GET(XQS_QLIMIT_SHR_ATOP_CFG_QLIMIT_SHR_ATOP, x)
+
+/*      XQS:QLIMIT_SHR:QLIMIT_SHR_CTOP_CFG */
+#define XQS_QLIMIT_SHR_CTOP_CFG(g) __REG(TARGET_XQS, 0, 1, 7936, g, 4, 48, 8, 0, 1, 4)
+
+#define XQS_QLIMIT_SHR_CTOP_CFG_QLIMIT_SHR_CTOP  GENMASK(14, 0)
+#define XQS_QLIMIT_SHR_CTOP_CFG_QLIMIT_SHR_CTOP_SET(x)\
+	FIELD_PREP(XQS_QLIMIT_SHR_CTOP_CFG_QLIMIT_SHR_CTOP, x)
+#define XQS_QLIMIT_SHR_CTOP_CFG_QLIMIT_SHR_CTOP_GET(x)\
+	FIELD_GET(XQS_QLIMIT_SHR_CTOP_CFG_QLIMIT_SHR_CTOP, x)
+
+/*      XQS:QLIMIT_SHR:QLIMIT_SHR_QLIM_CFG */
+#define XQS_QLIMIT_SHR_QLIM_CFG(g) __REG(TARGET_XQS, 0, 1, 7936, g, 4, 48, 12, 0, 1, 4)
+
+#define XQS_QLIMIT_SHR_QLIM_CFG_QLIMIT_SHR_QLIM  GENMASK(14, 0)
+#define XQS_QLIMIT_SHR_QLIM_CFG_QLIMIT_SHR_QLIM_SET(x)\
+	FIELD_PREP(XQS_QLIMIT_SHR_QLIM_CFG_QLIMIT_SHR_QLIM, x)
+#define XQS_QLIMIT_SHR_QLIM_CFG_QLIMIT_SHR_QLIM_GET(x)\
+	FIELD_GET(XQS_QLIMIT_SHR_QLIM_CFG_QLIMIT_SHR_QLIM, x)
+
+/*      XQS:STAT:CNT */
+#define XQS_CNT(g)                __REG(TARGET_XQS, 0, 1, 0, g, 1024, 4, 0, 0, 1, 4)
+
+#endif /* _SPARX5_MAIN_REGS_H_ */

From f3cad2611a77f0229dc16aa7bd2ef63e35ea9fb6 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:51 +0200
Subject: [PATCH 2829/3292] net: sparx5: add hostmode with phylink support

This patch adds netdevs and phylink support for the ports in the switch.
It also adds register based injection and extraction for these ports.

Frame DMA support for injection and extraction will be added in a later
series.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/microchip/sparx5/Makefile    |   3 +-
 .../ethernet/microchip/sparx5/sparx5_main.c   |  79 ++++-
 .../ethernet/microchip/sparx5/sparx5_main.h   |  33 ++
 .../ethernet/microchip/sparx5/sparx5_netdev.c | 221 ++++++++++++
 .../ethernet/microchip/sparx5/sparx5_packet.c | 314 ++++++++++++++++++
 .../microchip/sparx5/sparx5_phylink.c         | 199 +++++++++++
 6 files changed, 840 insertions(+), 9 deletions(-)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c

diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index 41a31843d86f..48a7c110040a 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -5,4 +5,5 @@
 
 obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
-sparx5-switch-objs  := sparx5_main.o
+sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
+ sparx5_netdev.o sparx5_phylink.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 0ba255d547b0..c295d814f75f 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -250,9 +250,16 @@ static int sparx5_create_port(struct sparx5 *sparx5,
 			      struct initial_port_config *config)
 {
 	struct sparx5_port *spx5_port;
+	struct net_device *ndev;
+	struct phylink *phylink;
 
-	/* netdev creation to be added in later patches */
-	spx5_port = devm_kzalloc(sparx5->dev, sizeof(*spx5_port), GFP_KERNEL);
+	ndev = sparx5_create_netdev(sparx5, config->portno);
+	if (IS_ERR(ndev)) {
+		dev_err(sparx5->dev, "Could not create net device: %02u\n",
+			config->portno);
+		return PTR_ERR(ndev);
+	}
+	spx5_port = netdev_priv(ndev);
 	spx5_port->of_node = config->node;
 	spx5_port->serdes = config->serdes;
 	spx5_port->pvid = NULL_VID;
@@ -262,8 +269,28 @@ static int sparx5_create_port(struct sparx5 *sparx5,
 	spx5_port->max_vlan_tags = SPX5_PORT_MAX_TAGS_NONE;
 	spx5_port->vlan_type = SPX5_VLAN_PORT_TYPE_UNAWARE;
 	spx5_port->custom_etype = 0x8880; /* Vitesse */
+	spx5_port->phylink_pcs.poll = true;
+	spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops;
+	sparx5->ports[config->portno] = spx5_port;
 
-	/* PHYLINK support to be added in later patches */
+	spx5_port->conf = config->conf;
+
+	/* VLAN setup to be added in later patches */
+
+	/* Create a phylink for PHY management.  Also handles SFPs */
+	spx5_port->phylink_config.dev = &spx5_port->ndev->dev;
+	spx5_port->phylink_config.type = PHYLINK_NETDEV;
+	spx5_port->phylink_config.pcs_poll = true;
+
+	phylink = phylink_create(&spx5_port->phylink_config,
+				 of_fwnode_handle(config->node),
+				 config->conf.phy_mode,
+				 &sparx5_phylink_mac_ops);
+	if (IS_ERR(phylink))
+		return PTR_ERR(phylink);
+
+	spx5_port->phylink = phylink;
+	phylink_set_pcs(phylink, &spx5_port->phylink_pcs);
 
 	return 0;
 }
@@ -525,6 +552,7 @@ static void sparx5_board_init(struct sparx5 *sparx5)
 static int sparx5_start(struct sparx5 *sparx5)
 {
 	u32 idx;
+	int err;
 
 	/* Setup own UPSIDs */
 	for (idx = 0; idx < 3; idx++) {
@@ -558,13 +586,34 @@ static int sparx5_start(struct sparx5 *sparx5)
 	/* Enable queue limitation watermarks */
 	sparx5_qlim_set(sparx5);
 
-	/* netdev and resource calendar support to be added in later patches */
+	/* Resource calendar support to be added in later patches */
+
+	err = sparx5_register_netdevs(sparx5);
+	if (err)
+		return err;
 
 	sparx5_board_init(sparx5);
 
-	/* Injection/Extraction config to be added in later patches */
+	/* Start register based INJ/XTR */
+	err = -ENXIO;
+	if (err && sparx5->xtr_irq >= 0) {
+		err = devm_request_irq(sparx5->dev, sparx5->xtr_irq,
+				       sparx5_xtr_handler, IRQF_SHARED,
+				       "sparx5-xtr", sparx5);
+		if (!err)
+			err = sparx5_manual_injection_mode(sparx5);
+		if (err)
+			sparx5->xtr_irq = -ENXIO;
+	} else {
+		sparx5->xtr_irq = -ENXIO;
+	}
+	return err;
+}
 
-	return 0;
+static void sparx5_cleanup_ports(struct sparx5 *sparx5)
+{
+	sparx5_unregister_netdevs(sparx5);
+	sparx5_destroy_netdevs(sparx5);
 }
 
 static int mchp_sparx5_probe(struct platform_device *pdev)
@@ -674,7 +723,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
 		ether_addr_copy(sparx5->base_mac, mac_addr);
 	}
 
-	/* Inj/Xtr IRQ support to be added in later patches */
+	sparx5->xtr_irq = platform_get_irq_byname(sparx5->pdev, "xtr");
+
 	/* Read chip ID to check CPU interface */
 	sparx5->chip_id = spx5_rd(sparx5, GCB_CHIP_ID);
 
@@ -715,7 +765,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
 	goto cleanup_config;
 
 cleanup_ports:
-	/* Port cleanup to be added in later patches */
+	sparx5_cleanup_ports(sparx5);
 cleanup_config:
 	kfree(configs);
 cleanup_pnode:
@@ -723,6 +773,18 @@ cleanup_pnode:
 	return err;
 }
 
+static int mchp_sparx5_remove(struct platform_device *pdev)
+{
+	struct sparx5 *sparx5 = platform_get_drvdata(pdev);
+
+	if (sparx5->xtr_irq) {
+		disable_irq(sparx5->xtr_irq);
+		sparx5->xtr_irq = -ENXIO;
+	}
+	sparx5_cleanup_ports(sparx5);
+	return 0;
+}
+
 static const struct of_device_id mchp_sparx5_match[] = {
 	{ .compatible = "microchip,sparx5-switch" },
 	{ }
@@ -731,6 +793,7 @@ MODULE_DEVICE_TABLE(of, mchp_sparx5_match);
 
 static struct platform_driver mchp_sparx5_driver = {
 	.probe = mchp_sparx5_probe,
+	.remove = mchp_sparx5_remove,
 	.driver = {
 		.name = "sparx5-switch",
 		.of_match_table = mchp_sparx5_match,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 705b5c80228d..b0fb577ca6df 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -14,6 +14,7 @@
 #include <linux/if_vlan.h>
 #include <linux/bitmap.h>
 #include <linux/phylink.h>
+#include <linux/hrtimer.h>
 
 /* Target chip type */
 enum spx5_target_chiptype {
@@ -69,6 +70,9 @@ enum sparx5_vlan_port_type {
 #define SPX5_BUFFER_CELL_SZ    184   /* Cell size  */
 #define SPX5_BUFFER_MEMORY     4194280 /* 22795 words * 184 bytes */
 
+#define XTR_QUEUE     0
+#define INJ_QUEUE     0
+
 struct sparx5;
 
 struct sparx5_port_config {
@@ -93,6 +97,9 @@ struct sparx5_port {
 	struct device_node *of_node;
 	struct phy *serdes;
 	struct sparx5_port_config conf;
+	struct phylink_config phylink_config;
+	struct phylink *phylink;
+	struct phylink_pcs phylink_pcs;
 	u16 portno;
 	/* Ingress default VLAN (pvid) */
 	u16 pvid;
@@ -107,6 +114,7 @@ struct sparx5_port {
 	u32 custom_etype;
 	u32 ifh[IFH_LEN];
 	bool vlan_aware;
+	struct hrtimer inj_timer;
 };
 
 enum sparx5_core_clockfreq {
@@ -130,8 +138,23 @@ struct sparx5 {
 	u8 base_mac[ETH_ALEN];
 	/* Board specifics */
 	bool sd_sgpio_remapping;
+	/* Register based inj/xtr */
+	int xtr_irq;
 };
 
+/* sparx5_packet.c */
+irqreturn_t sparx5_xtr_handler(int irq, void *_priv);
+int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev);
+int sparx5_manual_injection_mode(struct sparx5 *sparx5);
+void sparx5_port_inj_timer_setup(struct sparx5_port *port);
+
+/* sparx5_netdev.c */
+bool sparx5_netdevice_check(const struct net_device *dev);
+struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno);
+int sparx5_register_netdevs(struct sparx5 *sparx5);
+void sparx5_destroy_netdevs(struct sparx5 *sparx5);
+void sparx5_unregister_netdevs(struct sparx5 *sparx5);
+
 /* Clock period in picoseconds */
 static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock)
 {
@@ -146,6 +169,16 @@ static inline u32 sparx5_clk_period(enum sparx5_core_clockfreq cclock)
 	}
 }
 
+static inline bool sparx5_is_baser(phy_interface_t interface)
+{
+	return interface == PHY_INTERFACE_MODE_5GBASER ||
+		   interface == PHY_INTERFACE_MODE_10GBASER ||
+		   interface == PHY_INTERFACE_MODE_25GBASER;
+}
+
+extern const struct phylink_mac_ops sparx5_phylink_mac_ops;
+extern const struct phylink_pcs_ops sparx5_phylink_pcs_ops;
+
 /* Calculate raw offset */
 static inline __pure int spx5_offset(int id, int tinst, int tcnt,
 				     int gbase, int ginst,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
new file mode 100644
index 000000000000..b6df6e8b80d5
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+/* The IFH bit position of the first VSTAX bit. This is because the
+ * VSTAX bit positions in Data sheet is starting from zero.
+ */
+#define VSTAX 73
+
+static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
+{
+	u8 *ifh_hdr = ifh;
+	/* Calculate the Start IFH byte position of this IFH bit position */
+	u32 byte = (35 - (pos / 8));
+	/* Calculate the Start bit position in the Start IFH byte */
+	u32 bit  = (pos % 8);
+	u64 encode = GENMASK(bit + width - 1, bit) & (value << bit);
+
+	/* Max width is 5 bytes - 40 bits. In worst case this will
+	 * spread over 6 bytes - 48 bits
+	 */
+	compiletime_assert(width <= 40, "Unsupported width, must be <= 40");
+
+	/* The b0-b7 goes into the start IFH byte */
+	if (encode & 0xFF)
+		ifh_hdr[byte] |= (u8)((encode & 0xFF));
+	/* The b8-b15 goes into the next IFH byte */
+	if (encode & 0xFF00)
+		ifh_hdr[byte - 1] |= (u8)((encode & 0xFF00) >> 8);
+	/* The b16-b23 goes into the next IFH byte */
+	if (encode & 0xFF0000)
+		ifh_hdr[byte - 2] |= (u8)((encode & 0xFF0000) >> 16);
+	/* The b24-b31 goes into the next IFH byte */
+	if (encode & 0xFF000000)
+		ifh_hdr[byte - 3] |= (u8)((encode & 0xFF000000) >> 24);
+	/* The b32-b39 goes into the next IFH byte */
+	if (encode & 0xFF00000000)
+		ifh_hdr[byte - 4] |= (u8)((encode & 0xFF00000000) >> 32);
+	/* The b40-b47 goes into the next IFH byte */
+	if (encode & 0xFF0000000000)
+		ifh_hdr[byte - 5] |= (u8)((encode & 0xFF0000000000) >> 40);
+}
+
+static void sparx5_set_port_ifh(void *ifh_hdr, u16 portno)
+{
+	/* VSTAX.RSV = 1. MSBit must be 1 */
+	ifh_encode_bitfield(ifh_hdr, 1, VSTAX + 79,  1);
+	/* VSTAX.INGR_DROP_MODE = Enable. Don't make head-of-line blocking */
+	ifh_encode_bitfield(ifh_hdr, 1, VSTAX + 55,  1);
+	/* MISC.CPU_MASK/DPORT = Destination port */
+	ifh_encode_bitfield(ifh_hdr, portno,   29, 8);
+	/* MISC.PIPELINE_PT */
+	ifh_encode_bitfield(ifh_hdr, 16,       37, 5);
+	/* MISC.PIPELINE_ACT */
+	ifh_encode_bitfield(ifh_hdr, 1,        42, 3);
+	/* FWD.SRC_PORT = CPU */
+	ifh_encode_bitfield(ifh_hdr, SPX5_PORT_CPU, 46, 7);
+	/* FWD.SFLOW_ID (disable SFlow sampling) */
+	ifh_encode_bitfield(ifh_hdr, 124,      57, 7);
+	/* FWD.UPDATE_FCS = Enable. Enforce update of FCS. */
+	ifh_encode_bitfield(ifh_hdr, 1,        67, 1);
+}
+
+static int sparx5_port_open(struct net_device *ndev)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	int err = 0;
+
+	err = phylink_of_phy_connect(port->phylink, port->of_node, 0);
+	if (err) {
+		netdev_err(ndev, "Could not attach to PHY\n");
+		return err;
+	}
+
+	phylink_start(port->phylink);
+
+	if (!ndev->phydev) {
+		/* power up serdes */
+		port->conf.power_down = false;
+		err = phy_power_on(port->serdes);
+		if (err)
+			netdev_err(ndev, "%s failed\n", __func__);
+	}
+
+	return err;
+}
+
+static int sparx5_port_stop(struct net_device *ndev)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	int err = 0;
+
+	phylink_stop(port->phylink);
+	phylink_disconnect_phy(port->phylink);
+
+	if (!ndev->phydev) {
+		port->conf.power_down = true;
+		err = phy_power_off(port->serdes);
+		if (err)
+			netdev_err(ndev, "%s failed\n", __func__);
+	}
+	return 0;
+}
+
+static int sparx5_port_get_phys_port_name(struct net_device *dev,
+					  char *buf, size_t len)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+	int ret;
+
+	ret = snprintf(buf, len, "p%d", port->portno);
+	if (ret >= len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int sparx5_set_mac_address(struct net_device *dev, void *p)
+{
+	const struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	/* Record the address */
+	ether_addr_copy(dev->dev_addr, addr->sa_data);
+
+	return 0;
+}
+
+static const struct net_device_ops sparx5_port_netdev_ops = {
+	.ndo_open               = sparx5_port_open,
+	.ndo_stop               = sparx5_port_stop,
+	.ndo_start_xmit         = sparx5_port_xmit_impl,
+	.ndo_get_phys_port_name = sparx5_port_get_phys_port_name,
+	.ndo_set_mac_address    = sparx5_set_mac_address,
+	.ndo_validate_addr      = eth_validate_addr,
+};
+
+bool sparx5_netdevice_check(const struct net_device *dev)
+{
+	return dev && (dev->netdev_ops == &sparx5_port_netdev_ops);
+}
+
+struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno)
+{
+	struct sparx5_port *spx5_port;
+	struct net_device *ndev;
+	u64 val;
+
+	ndev = devm_alloc_etherdev(sparx5->dev, sizeof(struct sparx5_port));
+	if (!ndev)
+		return ERR_PTR(-ENOMEM);
+
+	SET_NETDEV_DEV(ndev, sparx5->dev);
+	spx5_port = netdev_priv(ndev);
+	spx5_port->ndev = ndev;
+	spx5_port->sparx5 = sparx5;
+	spx5_port->portno = portno;
+	sparx5_set_port_ifh(spx5_port->ifh, portno);
+
+	ndev->netdev_ops = &sparx5_port_netdev_ops;
+
+	val = ether_addr_to_u64(sparx5->base_mac) + portno + 1;
+	u64_to_ether_addr(val, ndev->dev_addr);
+
+	return ndev;
+}
+
+int sparx5_register_netdevs(struct sparx5 *sparx5)
+{
+	int portno;
+	int err;
+
+	for (portno = 0; portno < SPX5_PORTS; portno++)
+		if (sparx5->ports[portno]) {
+			err = register_netdev(sparx5->ports[portno]->ndev);
+			if (err) {
+				dev_err(sparx5->dev,
+					"port: %02u: netdev registration failed\n",
+					portno);
+				return err;
+			}
+			sparx5_port_inj_timer_setup(sparx5->ports[portno]);
+		}
+	return 0;
+}
+
+void sparx5_destroy_netdevs(struct sparx5 *sparx5)
+{
+	struct sparx5_port *port;
+	int portno;
+
+	for (portno = 0; portno < SPX5_PORTS; portno++) {
+		port = sparx5->ports[portno];
+		if (port && port->phylink) {
+			/* Disconnect the phy */
+			rtnl_lock();
+			sparx5_port_stop(port->ndev);
+			phylink_disconnect_phy(port->phylink);
+			rtnl_unlock();
+			phylink_destroy(port->phylink);
+			port->phylink = NULL;
+		}
+	}
+}
+
+void sparx5_unregister_netdevs(struct sparx5 *sparx5)
+{
+	int portno;
+
+	for (portno = 0; portno < SPX5_PORTS; portno++)
+		if (sparx5->ports[portno])
+			unregister_netdev(sparx5->ports[portno]->ndev);
+}
+
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
new file mode 100644
index 000000000000..05f4f3f9b6e2
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+#define XTR_EOF_0     ntohl((__force __be32)0x80000000u)
+#define XTR_EOF_1     ntohl((__force __be32)0x80000001u)
+#define XTR_EOF_2     ntohl((__force __be32)0x80000002u)
+#define XTR_EOF_3     ntohl((__force __be32)0x80000003u)
+#define XTR_PRUNED    ntohl((__force __be32)0x80000004u)
+#define XTR_ABORT     ntohl((__force __be32)0x80000005u)
+#define XTR_ESCAPE    ntohl((__force __be32)0x80000006u)
+#define XTR_NOT_READY ntohl((__force __be32)0x80000007u)
+
+#define XTR_VALID_BYTES(x)      (4 - ((x) & 3))
+
+#define INJ_TIMEOUT_NS 50000
+
+struct frame_info {
+	int src_port;
+};
+
+static void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp)
+{
+	/* Start flush */
+	spx5_wr(QS_XTR_FLUSH_FLUSH_SET(BIT(grp)), sparx5, QS_XTR_FLUSH);
+
+	/* Allow to drain */
+	mdelay(1);
+
+	/* All Queues normal */
+	spx5_wr(0, sparx5, QS_XTR_FLUSH);
+}
+
+static void sparx5_ifh_parse(u32 *ifh, struct frame_info *info)
+{
+	u8 *xtr_hdr = (u8 *)ifh;
+
+	/* FWD is bit 45-72 (28 bits), but we only read the 27 LSB for now */
+	u32 fwd =
+		((u32)xtr_hdr[27] << 24) |
+		((u32)xtr_hdr[28] << 16) |
+		((u32)xtr_hdr[29] <<  8) |
+		((u32)xtr_hdr[30] <<  0);
+	fwd = (fwd >> 5);
+	info->src_port = FIELD_GET(GENMASK(7, 1), fwd);
+}
+
+static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap)
+{
+	bool eof_flag = false, pruned_flag = false, abort_flag = false;
+	struct net_device *netdev;
+	struct sparx5_port *port;
+	struct frame_info fi;
+	int i, byte_cnt = 0;
+	struct sk_buff *skb;
+	u32 ifh[IFH_LEN];
+	u32 *rxbuf;
+
+	/* Get IFH */
+	for (i = 0; i < IFH_LEN; i++)
+		ifh[i] = spx5_rd(sparx5, QS_XTR_RD(grp));
+
+	/* Decode IFH (whats needed) */
+	sparx5_ifh_parse(ifh, &fi);
+
+	/* Map to port netdev */
+	port = fi.src_port < SPX5_PORTS ?
+		sparx5->ports[fi.src_port] : NULL;
+	if (!port || !port->ndev) {
+		dev_err(sparx5->dev, "Data on inactive port %d\n", fi.src_port);
+		sparx5_xtr_flush(sparx5, grp);
+		return;
+	}
+
+	/* Have netdev, get skb */
+	netdev = port->ndev;
+	skb = netdev_alloc_skb(netdev, netdev->mtu + ETH_HLEN);
+	if (!skb) {
+		sparx5_xtr_flush(sparx5, grp);
+		dev_err(sparx5->dev, "No skb allocated\n");
+		netdev->stats.rx_dropped++;
+		return;
+	}
+	rxbuf = (u32 *)skb->data;
+
+	/* Now, pull frame data */
+	while (!eof_flag) {
+		u32 val = spx5_rd(sparx5, QS_XTR_RD(grp));
+		u32 cmp = val;
+
+		if (byte_swap)
+			cmp = ntohl((__force __be32)val);
+
+		switch (cmp) {
+		case XTR_NOT_READY:
+			break;
+		case XTR_ABORT:
+			/* No accompanying data */
+			abort_flag = true;
+			eof_flag = true;
+			break;
+		case XTR_EOF_0:
+		case XTR_EOF_1:
+		case XTR_EOF_2:
+		case XTR_EOF_3:
+			/* This assumes STATUS_WORD_POS == 1, Status
+			 * just after last data
+			 */
+			byte_cnt -= (4 - XTR_VALID_BYTES(val));
+			eof_flag = true;
+			break;
+		case XTR_PRUNED:
+			/* But get the last 4 bytes as well */
+			eof_flag = true;
+			pruned_flag = true;
+			fallthrough;
+		case XTR_ESCAPE:
+			*rxbuf = spx5_rd(sparx5, QS_XTR_RD(grp));
+			byte_cnt += 4;
+			rxbuf++;
+			break;
+		default:
+			*rxbuf = val;
+			byte_cnt += 4;
+			rxbuf++;
+		}
+	}
+
+	if (abort_flag || pruned_flag || !eof_flag) {
+		netdev_err(netdev, "Discarded frame: abort:%d pruned:%d eof:%d\n",
+			   abort_flag, pruned_flag, eof_flag);
+		kfree_skb(skb);
+		netdev->stats.rx_dropped++;
+		return;
+	}
+
+	/* Finish up skb */
+	skb_put(skb, byte_cnt - ETH_FCS_LEN);
+	eth_skb_pad(skb);
+	skb->protocol = eth_type_trans(skb, netdev);
+	netif_rx(skb);
+	netdev->stats.rx_bytes += skb->len;
+	netdev->stats.rx_packets++;
+}
+
+static int sparx5_inject(struct sparx5 *sparx5,
+			 u32 *ifh,
+			 struct sk_buff *skb,
+			 struct net_device *ndev)
+{
+	int grp = INJ_QUEUE;
+	u32 val, w, count;
+	u8 *buf;
+
+	val = spx5_rd(sparx5, QS_INJ_STATUS);
+	if (!(QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp))) {
+		pr_err_ratelimited("Injection: Queue not ready: 0x%lx\n",
+				   QS_INJ_STATUS_FIFO_RDY_GET(val));
+		return -EBUSY;
+	}
+
+	/* Indicate SOF */
+	spx5_wr(QS_INJ_CTRL_SOF_SET(1) |
+		QS_INJ_CTRL_GAP_SIZE_SET(1),
+		sparx5, QS_INJ_CTRL(grp));
+
+	/* Write the IFH to the chip. */
+	for (w = 0; w < IFH_LEN; w++)
+		spx5_wr(ifh[w], sparx5, QS_INJ_WR(grp));
+
+	/* Write words, round up */
+	count = DIV_ROUND_UP(skb->len, 4);
+	buf = skb->data;
+	for (w = 0; w < count; w++, buf += 4) {
+		val = get_unaligned((const u32 *)buf);
+		spx5_wr(val, sparx5, QS_INJ_WR(grp));
+	}
+
+	/* Add padding */
+	while (w < (60 / 4)) {
+		spx5_wr(0, sparx5, QS_INJ_WR(grp));
+		w++;
+	}
+
+	/* Indicate EOF and valid bytes in last word */
+	spx5_wr(QS_INJ_CTRL_GAP_SIZE_SET(1) |
+		QS_INJ_CTRL_VLD_BYTES_SET(skb->len < 60 ? 0 : skb->len % 4) |
+		QS_INJ_CTRL_EOF_SET(1),
+		sparx5, QS_INJ_CTRL(grp));
+
+	/* Add dummy CRC */
+	spx5_wr(0, sparx5, QS_INJ_WR(grp));
+	w++;
+
+	val = spx5_rd(sparx5, QS_INJ_STATUS);
+	if (QS_INJ_STATUS_WMARK_REACHED_GET(val) & BIT(grp)) {
+		struct sparx5_port *port = netdev_priv(ndev);
+
+		pr_err_ratelimited("Injection: Watermark reached: 0x%lx\n",
+				   QS_INJ_STATUS_WMARK_REACHED_GET(val));
+		netif_stop_queue(ndev);
+		hrtimer_start(&port->inj_timer, INJ_TIMEOUT_NS,
+			      HRTIMER_MODE_REL);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device_stats *stats = &dev->stats;
+	struct sparx5_port *port = netdev_priv(dev);
+	struct sparx5 *sparx5 = port->sparx5;
+	int ret;
+
+	ret = sparx5_inject(sparx5, port->ifh, skb, dev);
+
+	if (ret == NETDEV_TX_OK) {
+		stats->tx_bytes += skb->len;
+		stats->tx_packets++;
+		skb_tx_timestamp(skb);
+		dev_kfree_skb_any(skb);
+	} else {
+		stats->tx_dropped++;
+	}
+	return ret;
+}
+
+static enum hrtimer_restart sparx5_injection_timeout(struct hrtimer *tmr)
+{
+	struct sparx5_port *port = container_of(tmr, struct sparx5_port,
+						inj_timer);
+	int grp = INJ_QUEUE;
+	u32 val;
+
+	val = spx5_rd(port->sparx5, QS_INJ_STATUS);
+	if (QS_INJ_STATUS_WMARK_REACHED_GET(val) & BIT(grp)) {
+		pr_err_ratelimited("Injection: Reset watermark count\n");
+		/* Reset Watermark count to restart */
+		spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR_SET(1),
+			 DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR,
+			 port->sparx5,
+			 DSM_DEV_TX_STOP_WM_CFG(port->portno));
+	}
+	netif_wake_queue(port->ndev);
+	return HRTIMER_NORESTART;
+}
+
+int sparx5_manual_injection_mode(struct sparx5 *sparx5)
+{
+	const int byte_swap = 1;
+	int portno;
+
+	/* Change mode to manual extraction and injection */
+	spx5_wr(QS_XTR_GRP_CFG_MODE_SET(1) |
+		QS_XTR_GRP_CFG_STATUS_WORD_POS_SET(1) |
+		QS_XTR_GRP_CFG_BYTE_SWAP_SET(byte_swap),
+		sparx5, QS_XTR_GRP_CFG(XTR_QUEUE));
+	spx5_wr(QS_INJ_GRP_CFG_MODE_SET(1) |
+		QS_INJ_GRP_CFG_BYTE_SWAP_SET(byte_swap),
+		sparx5, QS_INJ_GRP_CFG(INJ_QUEUE));
+
+	/* CPU ports capture setup */
+	for (portno = SPX5_PORT_CPU_0; portno <= SPX5_PORT_CPU_1; portno++) {
+		/* ASM CPU port: No preamble, IFH, enable padding */
+		spx5_wr(ASM_PORT_CFG_PAD_ENA_SET(1) |
+			ASM_PORT_CFG_NO_PREAMBLE_ENA_SET(1) |
+			ASM_PORT_CFG_INJ_FORMAT_CFG_SET(1), /* 1 = IFH */
+			sparx5, ASM_PORT_CFG(portno));
+
+		/* Reset WM cnt to unclog queued frames */
+		spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR_SET(1),
+			 DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR,
+			 sparx5,
+			 DSM_DEV_TX_STOP_WM_CFG(portno));
+
+		/* Set Disassembler Stop Watermark level */
+		spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM_SET(0),
+			 DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM,
+			 sparx5,
+			 DSM_DEV_TX_STOP_WM_CFG(portno));
+
+		/* Enable Disassembler buffer underrun watchdog
+		 */
+		spx5_rmw(DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS_SET(0),
+			 DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS,
+			 sparx5,
+			 DSM_BUF_CFG(portno));
+	}
+	return 0;
+}
+
+irqreturn_t sparx5_xtr_handler(int irq, void *_sparx5)
+{
+	struct sparx5 *s5 = _sparx5;
+	int poll = 64;
+
+	/* Check data in queue */
+	while (spx5_rd(s5, QS_XTR_DATA_PRESENT) & BIT(XTR_QUEUE) && poll-- > 0)
+		sparx5_xtr_grp(s5, XTR_QUEUE, false);
+
+	return IRQ_HANDLED;
+}
+
+void sparx5_port_inj_timer_setup(struct sparx5_port *port)
+{
+	hrtimer_init(&port->inj_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	port->inj_timer.function = sparx5_injection_timeout;
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
new file mode 100644
index 000000000000..7fedef52d4aa
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include <linux/module.h>
+#include <linux/phylink.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/sfp.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+static bool port_conf_has_changed(struct sparx5_port_config *a, struct sparx5_port_config *b)
+{
+	if (a->speed != b->speed ||
+	    a->portmode != b->portmode ||
+	    a->autoneg != b->autoneg ||
+	    a->pause_adv != b->pause_adv ||
+	    a->power_down != b->power_down ||
+	    a->media != b->media)
+		return true;
+	return false;
+}
+
+static void sparx5_phylink_validate(struct phylink_config *config,
+				    unsigned long *supported,
+				    struct phylink_link_state *state)
+{
+	struct sparx5_port *port = netdev_priv(to_net_dev(config->dev));
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+	phylink_set(mask, Autoneg);
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_5GBASER:
+	case PHY_INTERFACE_MODE_10GBASER:
+	case PHY_INTERFACE_MODE_25GBASER:
+	case PHY_INTERFACE_MODE_NA:
+		if (port->conf.bandwidth == SPEED_5000)
+			phylink_set(mask, 5000baseT_Full);
+		if (port->conf.bandwidth == SPEED_10000) {
+			phylink_set(mask, 5000baseT_Full);
+			phylink_set(mask, 10000baseT_Full);
+			phylink_set(mask, 10000baseCR_Full);
+			phylink_set(mask, 10000baseSR_Full);
+			phylink_set(mask, 10000baseLR_Full);
+			phylink_set(mask, 10000baseLRM_Full);
+			phylink_set(mask, 10000baseER_Full);
+		}
+		if (port->conf.bandwidth == SPEED_25000) {
+			phylink_set(mask, 5000baseT_Full);
+			phylink_set(mask, 10000baseT_Full);
+			phylink_set(mask, 10000baseCR_Full);
+			phylink_set(mask, 10000baseSR_Full);
+			phylink_set(mask, 10000baseLR_Full);
+			phylink_set(mask, 10000baseLRM_Full);
+			phylink_set(mask, 10000baseER_Full);
+			phylink_set(mask, 25000baseCR_Full);
+			phylink_set(mask, 25000baseSR_Full);
+		}
+		if (state->interface != PHY_INTERFACE_MODE_NA)
+			break;
+		fallthrough;
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+		phylink_set(mask, 10baseT_Half);
+		phylink_set(mask, 10baseT_Full);
+		phylink_set(mask, 100baseT_Half);
+		phylink_set(mask, 100baseT_Full);
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseX_Full);
+		if (state->interface != PHY_INTERFACE_MODE_NA)
+			break;
+		fallthrough;
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		if (state->interface != PHY_INTERFACE_MODE_2500BASEX) {
+			phylink_set(mask, 1000baseT_Full);
+			phylink_set(mask, 1000baseX_Full);
+		}
+		if (state->interface == PHY_INTERFACE_MODE_2500BASEX ||
+		    state->interface == PHY_INTERFACE_MODE_NA) {
+			phylink_set(mask, 2500baseT_Full);
+			phylink_set(mask, 2500baseX_Full);
+		}
+		break;
+	default:
+		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		return;
+	}
+	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void sparx5_phylink_mac_config(struct phylink_config *config,
+				      unsigned int mode,
+				      const struct phylink_link_state *state)
+{
+	/* Currently not used */
+}
+
+static void sparx5_phylink_mac_link_up(struct phylink_config *config,
+				       struct phy_device *phy,
+				       unsigned int mode,
+				       phy_interface_t interface,
+				       int speed, int duplex,
+				       bool tx_pause, bool rx_pause)
+{
+	struct sparx5_port *port = netdev_priv(to_net_dev(config->dev));
+	struct sparx5_port_config conf;
+
+	conf = port->conf;
+	conf.duplex = duplex;
+	conf.pause = 0;
+	conf.pause |= tx_pause ? MLO_PAUSE_TX : 0;
+	conf.pause |= rx_pause ? MLO_PAUSE_RX : 0;
+	conf.speed = speed;
+	/* Port configuration to be added later */
+}
+
+static void sparx5_phylink_mac_link_down(struct phylink_config *config,
+					 unsigned int mode,
+					 phy_interface_t interface)
+{
+	/* Currently not used */
+}
+
+static struct sparx5_port *sparx5_pcs_to_port(struct phylink_pcs *pcs)
+{
+	return container_of(pcs, struct sparx5_port, phylink_pcs);
+}
+
+static void sparx5_pcs_get_state(struct phylink_pcs *pcs,
+				 struct phylink_link_state *state)
+{
+	/* Getting port status to be added later, just defaults now */
+	state->link = true;
+	state->an_complete = true;
+	state->speed = SPEED_1000;
+	state->duplex = true;
+	state->pause = MLO_PAUSE_AN;
+}
+
+static int sparx5_pcs_config(struct phylink_pcs *pcs,
+			     unsigned int mode,
+			     phy_interface_t interface,
+			     const unsigned long *advertising,
+			     bool permit_pause_to_mac)
+{
+	struct sparx5_port *port = sparx5_pcs_to_port(pcs);
+	struct sparx5_port_config conf;
+	int ret = 0;
+
+	conf = port->conf;
+	conf.power_down = false;
+	conf.portmode = interface;
+	conf.inband = phylink_autoneg_inband(mode);
+	conf.autoneg = phylink_test(advertising, Autoneg);
+	conf.pause_adv = 0;
+	if (phylink_test(advertising, Pause))
+		conf.pause_adv |= ADVERTISE_1000XPAUSE;
+	if (phylink_test(advertising, Asym_Pause))
+		conf.pause_adv |= ADVERTISE_1000XPSE_ASYM;
+	if (sparx5_is_baser(interface)) {
+		if (phylink_test(advertising, FIBRE))
+			conf.media = PHY_MEDIA_SR;
+		else
+			conf.media = PHY_MEDIA_DAC;
+	}
+	if (!port_conf_has_changed(&port->conf, &conf))
+		return ret;
+	/* PCS configuration added later */
+	return ret;
+}
+
+static void sparx5_pcs_aneg_restart(struct phylink_pcs *pcs)
+{
+	/* Currently not used */
+}
+
+const struct phylink_pcs_ops sparx5_phylink_pcs_ops = {
+	.pcs_get_state = sparx5_pcs_get_state,
+	.pcs_config = sparx5_pcs_config,
+	.pcs_an_restart = sparx5_pcs_aneg_restart,
+};
+
+const struct phylink_mac_ops sparx5_phylink_mac_ops = {
+	.validate = sparx5_phylink_validate,
+	.mac_config = sparx5_phylink_mac_config,
+	.mac_link_down = sparx5_phylink_mac_link_down,
+	.mac_link_up = sparx5_phylink_mac_link_up,
+};

From 946e7fd5053a218d3907268cd85c9a519561d5d8 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:52 +0200
Subject: [PATCH 2830/3292] net: sparx5: add port module support

This add configuration of the Sparx5 port module instances.

Sparx5 has in total 65 logical ports (denoted D0 to D64) and 33
physical SerDes connections (S0 to S32).  The 65th port (D64) is fixed
allocated to SerDes0 (S0). The remaining 64 ports can in various
multiplexing scenarios be connected to the remaining 32 SerDes using
QSGMII, or USGMII or USXGMII extenders. 32 of the ports can have a 1:1
mapping to the 32 SerDes.

Some additional ports (D65 to D69) are internal to the device and do not
connect to port modules or SerDes macros. For example, internal ports are
used for frame injection and extraction to the CPU queues.

The 65 logical ports are split up into the following blocks.

- 13 x 5G ports (D0-D11, D64)
- 32 x 2G5 ports (D16-D47)
- 12 x 10G ports (D12-D15, D48-D55)
- 8 x 25G ports (D56-D63)

Each logical port supports different line speeds, and depending on the
speeds supported, different port modules (MAC+PCS) are needed. A port
supporting 5 Gbps, 10 Gbps, or 25 Gbps as maximum line speed, will have a
DEV5G, DEV10G, or DEV25G module to support the 5 Gbps, 10 Gbps (incl 5
Gbps), or 25 Gbps (including 10 Gbps and 5 Gbps) speeds. As well as, it
will have a shadow DEV2G5 port module to support the lower speeds
(10/100/1000/2500Mbps). When a port needs to operate at lower speed and the
shadow DEV2G5 needs to be connected to its corresponding SerDes

Not all interface modes are supported in this series, but will be added at
a later stage.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/microchip/sparx5/Makefile    |    2 +-
 .../ethernet/microchip/sparx5/sparx5_main.c   |    9 +-
 .../ethernet/microchip/sparx5/sparx5_netdev.c |   14 +-
 .../microchip/sparx5/sparx5_phylink.c         |   27 +-
 .../ethernet/microchip/sparx5/sparx5_port.c   | 1146 +++++++++++++++++
 .../ethernet/microchip/sparx5/sparx5_port.h   |   93 ++
 6 files changed, 1279 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_port.c
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_port.h

diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index 48a7c110040a..25f115d2a2d9 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -6,4 +6,4 @@
 obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
 sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
- sparx5_netdev.o sparx5_phylink.o
+ sparx5_netdev.o sparx5_phylink.o sparx5_port.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index c295d814f75f..107da841ccc4 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -26,6 +26,7 @@
 
 #include "sparx5_main_regs.h"
 #include "sparx5_main.h"
+#include "sparx5_port.h"
 
 #define QLIM_WM(fraction) \
 	((SPX5_BUFFER_MEMORY / SPX5_BUFFER_CELL_SZ - 100) * (fraction) / 100)
@@ -252,6 +253,7 @@ static int sparx5_create_port(struct sparx5 *sparx5,
 	struct sparx5_port *spx5_port;
 	struct net_device *ndev;
 	struct phylink *phylink;
+	int err;
 
 	ndev = sparx5_create_netdev(sparx5, config->portno);
 	if (IS_ERR(ndev)) {
@@ -273,9 +275,14 @@ static int sparx5_create_port(struct sparx5 *sparx5,
 	spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops;
 	sparx5->ports[config->portno] = spx5_port;
 
+	err = sparx5_port_init(sparx5, spx5_port, &config->conf);
+	if (err) {
+		dev_err(sparx5->dev, "port init failed\n");
+		return err;
+	}
 	spx5_port->conf = config->conf;
 
-	/* VLAN setup to be added in later patches */
+	/* VLAN support to be added in later patches */
 
 	/* Create a phylink for PHY management.  Also handles SFPs */
 	spx5_port->phylink_config.dev = &spx5_port->ndev->dev;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
index b6df6e8b80d5..4c2280345ecc 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -6,6 +6,7 @@
 
 #include "sparx5_main_regs.h"
 #include "sparx5_main.h"
+#include "sparx5_port.h"
 
 /* The IFH bit position of the first VSTAX bit. This is because the
  * VSTAX bit positions in Data sheet is starting from zero.
@@ -71,6 +72,7 @@ static int sparx5_port_open(struct net_device *ndev)
 	struct sparx5_port *port = netdev_priv(ndev);
 	int err = 0;
 
+	sparx5_port_enable(port, true);
 	err = phylink_of_phy_connect(port->phylink, port->of_node, 0);
 	if (err) {
 		netdev_err(ndev, "Could not attach to PHY\n");
@@ -82,7 +84,10 @@ static int sparx5_port_open(struct net_device *ndev)
 	if (!ndev->phydev) {
 		/* power up serdes */
 		port->conf.power_down = false;
-		err = phy_power_on(port->serdes);
+		if (port->conf.serdes_reset)
+			err = sparx5_serdes_set(port->sparx5, port, &port->conf);
+		else
+			err = phy_power_on(port->serdes);
 		if (err)
 			netdev_err(ndev, "%s failed\n", __func__);
 	}
@@ -95,12 +100,17 @@ static int sparx5_port_stop(struct net_device *ndev)
 	struct sparx5_port *port = netdev_priv(ndev);
 	int err = 0;
 
+	sparx5_port_enable(port, false);
 	phylink_stop(port->phylink);
 	phylink_disconnect_phy(port->phylink);
 
 	if (!ndev->phydev) {
+		/* power down serdes */
 		port->conf.power_down = true;
-		err = phy_power_off(port->serdes);
+		if (port->conf.serdes_reset)
+			err = sparx5_serdes_set(port->sparx5, port, &port->conf);
+		else
+			err = phy_power_off(port->serdes);
 		if (err)
 			netdev_err(ndev, "%s failed\n", __func__);
 	}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
index 7fedef52d4aa..af70e2795125 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c
@@ -12,6 +12,7 @@
 
 #include "sparx5_main_regs.h"
 #include "sparx5_main.h"
+#include "sparx5_port.h"
 
 static bool port_conf_has_changed(struct sparx5_port_config *a, struct sparx5_port_config *b)
 {
@@ -115,6 +116,7 @@ static void sparx5_phylink_mac_link_up(struct phylink_config *config,
 {
 	struct sparx5_port *port = netdev_priv(to_net_dev(config->dev));
 	struct sparx5_port_config conf;
+	int err;
 
 	conf = port->conf;
 	conf.duplex = duplex;
@@ -122,7 +124,10 @@ static void sparx5_phylink_mac_link_up(struct phylink_config *config,
 	conf.pause |= tx_pause ? MLO_PAUSE_TX : 0;
 	conf.pause |= rx_pause ? MLO_PAUSE_RX : 0;
 	conf.speed = speed;
-	/* Port configuration to be added later */
+	/* Configure the port to speed/duplex/pause */
+	err = sparx5_port_config(port->sparx5, port, &conf);
+	if (err)
+		netdev_err(port->ndev, "port config failed: %d\n", err);
 }
 
 static void sparx5_phylink_mac_link_down(struct phylink_config *config,
@@ -140,12 +145,15 @@ static struct sparx5_port *sparx5_pcs_to_port(struct phylink_pcs *pcs)
 static void sparx5_pcs_get_state(struct phylink_pcs *pcs,
 				 struct phylink_link_state *state)
 {
-	/* Getting port status to be added later, just defaults now */
-	state->link = true;
-	state->an_complete = true;
-	state->speed = SPEED_1000;
-	state->duplex = true;
-	state->pause = MLO_PAUSE_AN;
+	struct sparx5_port *port = sparx5_pcs_to_port(pcs);
+	struct sparx5_port_status status;
+
+	sparx5_get_port_status(port->sparx5, port, &status);
+	state->link = status.link && !status.link_down;
+	state->an_complete = status.an_complete;
+	state->speed = status.speed;
+	state->duplex = status.duplex;
+	state->pause = status.pause;
 }
 
 static int sparx5_pcs_config(struct phylink_pcs *pcs,
@@ -176,7 +184,10 @@ static int sparx5_pcs_config(struct phylink_pcs *pcs,
 	}
 	if (!port_conf_has_changed(&port->conf, &conf))
 		return ret;
-	/* PCS configuration added later */
+	/* Enable the PCS matching this interface type */
+	ret = sparx5_port_pcs_set(port->sparx5, port, &conf);
+	if (ret)
+		netdev_err(port->ndev, "port PCS config failed: %d\n", ret);
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
new file mode 100644
index 000000000000..d2e3250928bf
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -0,0 +1,1146 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include <linux/module.h>
+#include <linux/phy/phy.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+#include "sparx5_port.h"
+
+#define SPX5_ETYPE_TAG_C     0x8100
+#define SPX5_ETYPE_TAG_S     0x88a8
+
+#define SPX5_WAIT_US         1000
+#define SPX5_WAIT_MAX_US     2000
+
+enum port_error {
+	SPX5_PERR_SPEED,
+	SPX5_PERR_IFTYPE,
+};
+
+#define PAUSE_DISCARD        0xC
+#define ETH_MAXLEN           (ETH_DATA_LEN + ETH_HLEN + ETH_FCS_LEN)
+
+static void decode_sgmii_word(u16 lp_abil, struct sparx5_port_status *status)
+{
+	status->an_complete = true;
+	if (!(lp_abil & LPA_SGMII_LINK)) {
+		status->link = false;
+		return;
+	}
+
+	switch (lp_abil & LPA_SGMII_SPD_MASK) {
+	case LPA_SGMII_10:
+		status->speed = SPEED_10;
+		break;
+	case LPA_SGMII_100:
+		status->speed = SPEED_100;
+		break;
+	case LPA_SGMII_1000:
+		status->speed = SPEED_1000;
+		break;
+	default:
+		status->link = false;
+		return;
+	}
+	if (lp_abil & LPA_SGMII_FULL_DUPLEX)
+		status->duplex = DUPLEX_FULL;
+	else
+		status->duplex = DUPLEX_HALF;
+}
+
+static void decode_cl37_word(u16 lp_abil, uint16_t ld_abil, struct sparx5_port_status *status)
+{
+	status->link = !(lp_abil & ADVERTISE_RFAULT) && status->link;
+	status->an_complete = true;
+	status->duplex = (ADVERTISE_1000XFULL & lp_abil) ?
+		DUPLEX_FULL : DUPLEX_UNKNOWN; // 1G HDX not supported
+
+	if ((ld_abil & ADVERTISE_1000XPAUSE) &&
+	    (lp_abil & ADVERTISE_1000XPAUSE)) {
+		status->pause = MLO_PAUSE_RX | MLO_PAUSE_TX;
+	} else if ((ld_abil & ADVERTISE_1000XPSE_ASYM) &&
+		   (lp_abil & ADVERTISE_1000XPSE_ASYM)) {
+		status->pause |= (lp_abil & ADVERTISE_1000XPAUSE) ?
+			MLO_PAUSE_TX : 0;
+		status->pause |= (ld_abil & ADVERTISE_1000XPAUSE) ?
+			MLO_PAUSE_RX : 0;
+	} else {
+		status->pause = MLO_PAUSE_NONE;
+	}
+}
+
+static int sparx5_get_dev2g5_status(struct sparx5 *sparx5,
+				    struct sparx5_port *port,
+				    struct sparx5_port_status *status)
+{
+	u32 portno = port->portno;
+	u16 lp_adv, ld_adv;
+	u32 value;
+
+	/* Get PCS Link down sticky */
+	value = spx5_rd(sparx5, DEV2G5_PCS1G_STICKY(portno));
+	status->link_down = DEV2G5_PCS1G_STICKY_LINK_DOWN_STICKY_GET(value);
+	if (status->link_down)	/* Clear the sticky */
+		spx5_wr(value, sparx5, DEV2G5_PCS1G_STICKY(portno));
+
+	/* Get both current Link and Sync status */
+	value = spx5_rd(sparx5, DEV2G5_PCS1G_LINK_STATUS(portno));
+	status->link = DEV2G5_PCS1G_LINK_STATUS_LINK_STATUS_GET(value) &&
+		       DEV2G5_PCS1G_LINK_STATUS_SYNC_STATUS_GET(value);
+
+	if (port->conf.portmode == PHY_INTERFACE_MODE_1000BASEX)
+		status->speed = SPEED_1000;
+	else if (port->conf.portmode == PHY_INTERFACE_MODE_2500BASEX)
+		status->speed = SPEED_2500;
+
+	status->duplex = DUPLEX_FULL;
+
+	/* Get PCS ANEG status register */
+	value = spx5_rd(sparx5, DEV2G5_PCS1G_ANEG_STATUS(portno));
+
+	/* Aneg complete provides more information  */
+	if (DEV2G5_PCS1G_ANEG_STATUS_ANEG_COMPLETE_GET(value)) {
+		lp_adv = DEV2G5_PCS1G_ANEG_STATUS_LP_ADV_ABILITY_GET(value);
+		if (port->conf.portmode == PHY_INTERFACE_MODE_SGMII) {
+			decode_sgmii_word(lp_adv, status);
+		} else {
+			value = spx5_rd(sparx5, DEV2G5_PCS1G_ANEG_CFG(portno));
+			ld_adv = DEV2G5_PCS1G_ANEG_CFG_ADV_ABILITY_GET(value);
+			decode_cl37_word(lp_adv, ld_adv, status);
+		}
+	}
+	return 0;
+}
+
+static int sparx5_get_sfi_status(struct sparx5 *sparx5,
+				 struct sparx5_port *port,
+				 struct sparx5_port_status *status)
+{
+	bool high_speed_dev = sparx5_is_baser(port->conf.portmode);
+	u32 portno = port->portno;
+	u32 value, dev, tinst;
+	void __iomem *inst;
+
+	if (!high_speed_dev) {
+		netdev_err(port->ndev, "error: low speed and SFI mode\n");
+		return -EINVAL;
+	}
+
+	dev = sparx5_to_high_dev(portno);
+	tinst = sparx5_port_dev_index(portno);
+	inst = spx5_inst_get(sparx5, dev, tinst);
+
+	value = spx5_inst_rd(inst, DEV10G_MAC_TX_MONITOR_STICKY(0));
+	if (value != DEV10G_MAC_TX_MONITOR_STICKY_IDLE_STATE_STICKY) {
+		/* The link is or has been down. Clear the sticky bit */
+		status->link_down = 1;
+		spx5_inst_wr(0xffffffff, inst, DEV10G_MAC_TX_MONITOR_STICKY(0));
+		value = spx5_inst_rd(inst, DEV10G_MAC_TX_MONITOR_STICKY(0));
+	}
+	status->link = (value == DEV10G_MAC_TX_MONITOR_STICKY_IDLE_STATE_STICKY);
+	status->duplex = DUPLEX_FULL;
+	if (port->conf.portmode == PHY_INTERFACE_MODE_5GBASER)
+		status->speed = SPEED_5000;
+	else if (port->conf.portmode == PHY_INTERFACE_MODE_10GBASER)
+		status->speed = SPEED_10000;
+	else
+		status->speed = SPEED_25000;
+
+	return 0;
+}
+
+/* Get link status of 1000Base-X/in-band and SFI ports.
+ */
+int sparx5_get_port_status(struct sparx5 *sparx5,
+			   struct sparx5_port *port,
+			   struct sparx5_port_status *status)
+{
+	memset(status, 0, sizeof(*status));
+	status->speed = port->conf.speed;
+	if (port->conf.power_down) {
+		status->link = false;
+		return 0;
+	}
+	switch (port->conf.portmode) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		return sparx5_get_dev2g5_status(sparx5, port, status);
+	case PHY_INTERFACE_MODE_5GBASER:
+	case PHY_INTERFACE_MODE_10GBASER:
+	case PHY_INTERFACE_MODE_25GBASER:
+		return sparx5_get_sfi_status(sparx5, port, status);
+	case PHY_INTERFACE_MODE_NA:
+		return 0;
+	default:
+		netdev_err(port->ndev, "Status not supported");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int sparx5_port_error(struct sparx5_port *port,
+			     struct sparx5_port_config *conf,
+			     enum port_error errtype)
+{
+	switch (errtype) {
+	case SPX5_PERR_SPEED:
+		netdev_err(port->ndev,
+			   "Interface does not support speed: %u: for %s\n",
+			   conf->speed, phy_modes(conf->portmode));
+		break;
+	case SPX5_PERR_IFTYPE:
+		netdev_err(port->ndev,
+			   "Switch port does not support interface type: %s\n",
+			   phy_modes(conf->portmode));
+		break;
+	default:
+		netdev_err(port->ndev,
+			   "Interface configuration error\n");
+	}
+
+	return -EINVAL;
+}
+
+static int sparx5_port_verify_speed(struct sparx5 *sparx5,
+				    struct sparx5_port *port,
+				    struct sparx5_port_config *conf)
+{
+	if ((sparx5_port_is_2g5(port->portno) &&
+	     conf->speed > SPEED_2500) ||
+	    (sparx5_port_is_5g(port->portno)  &&
+	     conf->speed > SPEED_5000) ||
+	    (sparx5_port_is_10g(port->portno) &&
+	     conf->speed > SPEED_10000))
+		return sparx5_port_error(port, conf, SPX5_PERR_SPEED);
+
+	switch (conf->portmode) {
+	case PHY_INTERFACE_MODE_NA:
+		return -EINVAL;
+	case PHY_INTERFACE_MODE_1000BASEX:
+		if (conf->speed != SPEED_1000 ||
+		    sparx5_port_is_2g5(port->portno))
+			return sparx5_port_error(port, conf, SPX5_PERR_SPEED);
+		if (sparx5_port_is_2g5(port->portno))
+			return sparx5_port_error(port, conf, SPX5_PERR_IFTYPE);
+		break;
+	case PHY_INTERFACE_MODE_2500BASEX:
+		if (conf->speed != SPEED_2500 ||
+		    sparx5_port_is_2g5(port->portno))
+			return sparx5_port_error(port, conf, SPX5_PERR_SPEED);
+		break;
+	case PHY_INTERFACE_MODE_QSGMII:
+		if (port->portno > 47)
+			return sparx5_port_error(port, conf, SPX5_PERR_IFTYPE);
+		fallthrough;
+	case PHY_INTERFACE_MODE_SGMII:
+		if (conf->speed != SPEED_1000 &&
+		    conf->speed != SPEED_100 &&
+		    conf->speed != SPEED_10 &&
+		    conf->speed != SPEED_2500)
+			return sparx5_port_error(port, conf, SPX5_PERR_SPEED);
+		break;
+	case PHY_INTERFACE_MODE_5GBASER:
+	case PHY_INTERFACE_MODE_10GBASER:
+	case PHY_INTERFACE_MODE_25GBASER:
+		if ((conf->speed != SPEED_5000 &&
+		     conf->speed != SPEED_10000 &&
+		     conf->speed != SPEED_25000))
+			return sparx5_port_error(port, conf, SPX5_PERR_SPEED);
+		break;
+	default:
+		return sparx5_port_error(port, conf, SPX5_PERR_IFTYPE);
+	}
+	return 0;
+}
+
+static bool sparx5_dev_change(struct sparx5 *sparx5,
+			      struct sparx5_port *port,
+			      struct sparx5_port_config *conf)
+{
+	return sparx5_is_baser(port->conf.portmode) ^
+		sparx5_is_baser(conf->portmode);
+}
+
+static int sparx5_port_flush_poll(struct sparx5 *sparx5, u32 portno)
+{
+	u32  value, resource, prio, delay_cnt = 0;
+	bool poll_src = true;
+	char *mem = "";
+
+	/* Resource == 0: Memory tracked per source (SRC-MEM)
+	 * Resource == 1: Frame references tracked per source (SRC-REF)
+	 * Resource == 2: Memory tracked per destination (DST-MEM)
+	 * Resource == 3: Frame references tracked per destination. (DST-REF)
+	 */
+	while (1) {
+		bool empty = true;
+
+		for (resource = 0; resource < (poll_src ? 2 : 1); resource++) {
+			u32 base;
+
+			base = (resource == 0 ? 2048 : 0) + SPX5_PRIOS * portno;
+			for (prio = 0; prio < SPX5_PRIOS; prio++) {
+				value = spx5_rd(sparx5,
+						QRES_RES_STAT(base + prio));
+				if (value) {
+					mem = resource == 0 ?
+						"DST-MEM" : "SRC-MEM";
+					empty = false;
+				}
+			}
+		}
+
+		if (empty)
+			break;
+
+		if (delay_cnt++ == 2000) {
+			dev_err(sparx5->dev,
+				"Flush timeout port %u. %s queue not empty\n",
+				portno, mem);
+			return -EINVAL;
+		}
+
+		usleep_range(SPX5_WAIT_US, SPX5_WAIT_MAX_US);
+	}
+	return 0;
+}
+
+static int sparx5_port_disable(struct sparx5 *sparx5, struct sparx5_port *port, bool high_spd_dev)
+{
+	u32 tinst = high_spd_dev ?
+		    sparx5_port_dev_index(port->portno) : port->portno;
+	u32 dev = high_spd_dev ?
+		  sparx5_to_high_dev(port->portno) : TARGET_DEV2G5;
+	void __iomem *devinst = spx5_inst_get(sparx5, dev, tinst);
+	u32 spd = port->conf.speed;
+	u32 spd_prm;
+	int err;
+
+	if (high_spd_dev) {
+		/* 1: Reset the PCS Rx clock domain  */
+		spx5_inst_rmw(DEV10G_DEV_RST_CTRL_PCS_RX_RST,
+			      DEV10G_DEV_RST_CTRL_PCS_RX_RST,
+			      devinst,
+			      DEV10G_DEV_RST_CTRL(0));
+
+		/* 2: Disable MAC frame reception */
+		spx5_inst_rmw(0,
+			      DEV10G_MAC_ENA_CFG_RX_ENA,
+			      devinst,
+			      DEV10G_MAC_ENA_CFG(0));
+	} else {
+		/* 1: Reset the PCS Rx clock domain  */
+		spx5_inst_rmw(DEV2G5_DEV_RST_CTRL_PCS_RX_RST,
+			      DEV2G5_DEV_RST_CTRL_PCS_RX_RST,
+			      devinst,
+			      DEV2G5_DEV_RST_CTRL(0));
+		/* 2: Disable MAC frame reception */
+		spx5_inst_rmw(0,
+			      DEV2G5_MAC_ENA_CFG_RX_ENA,
+			      devinst,
+			      DEV2G5_MAC_ENA_CFG(0));
+	}
+	/* 3: Disable traffic being sent to or from switch port->portno */
+	spx5_rmw(0,
+		 QFWD_SWITCH_PORT_MODE_PORT_ENA,
+		 sparx5,
+		 QFWD_SWITCH_PORT_MODE(port->portno));
+
+	/* 4: Disable dequeuing from the egress queues  */
+	spx5_rmw(HSCH_PORT_MODE_DEQUEUE_DIS,
+		 HSCH_PORT_MODE_DEQUEUE_DIS,
+		 sparx5,
+		 HSCH_PORT_MODE(port->portno));
+
+	/* 5: Disable Flowcontrol */
+	spx5_rmw(QSYS_PAUSE_CFG_PAUSE_STOP_SET(0xFFF - 1),
+		 QSYS_PAUSE_CFG_PAUSE_STOP,
+		 sparx5,
+		 QSYS_PAUSE_CFG(port->portno));
+
+	spd_prm = spd == SPEED_10 ? 1000 : spd == SPEED_100 ? 100 : 10;
+	/* 6: Wait while the last frame is exiting the queues */
+	usleep_range(8 * spd_prm, 10 * spd_prm);
+
+	/* 7: Flush the queues accociated with the port->portno */
+	spx5_rmw(HSCH_FLUSH_CTRL_FLUSH_PORT_SET(port->portno) |
+		 HSCH_FLUSH_CTRL_FLUSH_DST_SET(1) |
+		 HSCH_FLUSH_CTRL_FLUSH_SRC_SET(1) |
+		 HSCH_FLUSH_CTRL_FLUSH_ENA_SET(1),
+		 HSCH_FLUSH_CTRL_FLUSH_PORT |
+		 HSCH_FLUSH_CTRL_FLUSH_DST |
+		 HSCH_FLUSH_CTRL_FLUSH_SRC |
+		 HSCH_FLUSH_CTRL_FLUSH_ENA,
+		 sparx5,
+		 HSCH_FLUSH_CTRL);
+
+	/* 8: Enable dequeuing from the egress queues */
+	spx5_rmw(0,
+		 HSCH_PORT_MODE_DEQUEUE_DIS,
+		 sparx5,
+		 HSCH_PORT_MODE(port->portno));
+
+	/* 9: Wait until flushing is complete */
+	err = sparx5_port_flush_poll(sparx5, port->portno);
+	if (err)
+		return err;
+
+	/* 10: Reset the  MAC clock domain */
+	if (high_spd_dev) {
+		spx5_inst_rmw(DEV10G_DEV_RST_CTRL_PCS_TX_RST_SET(1) |
+			      DEV10G_DEV_RST_CTRL_MAC_RX_RST_SET(1) |
+			      DEV10G_DEV_RST_CTRL_MAC_TX_RST_SET(1),
+			      DEV10G_DEV_RST_CTRL_PCS_TX_RST |
+			      DEV10G_DEV_RST_CTRL_MAC_RX_RST |
+			      DEV10G_DEV_RST_CTRL_MAC_TX_RST,
+			      devinst,
+			      DEV10G_DEV_RST_CTRL(0));
+
+	} else {
+		spx5_inst_rmw(DEV2G5_DEV_RST_CTRL_SPEED_SEL_SET(3) |
+			      DEV2G5_DEV_RST_CTRL_PCS_TX_RST_SET(1) |
+			      DEV2G5_DEV_RST_CTRL_PCS_RX_RST_SET(1) |
+			      DEV2G5_DEV_RST_CTRL_MAC_TX_RST_SET(1) |
+			      DEV2G5_DEV_RST_CTRL_MAC_RX_RST_SET(1),
+			      DEV2G5_DEV_RST_CTRL_SPEED_SEL |
+			      DEV2G5_DEV_RST_CTRL_PCS_TX_RST |
+			      DEV2G5_DEV_RST_CTRL_PCS_RX_RST |
+			      DEV2G5_DEV_RST_CTRL_MAC_TX_RST |
+			      DEV2G5_DEV_RST_CTRL_MAC_RX_RST,
+			      devinst,
+			      DEV2G5_DEV_RST_CTRL(0));
+	}
+	/* 11: Clear flushing */
+	spx5_rmw(HSCH_FLUSH_CTRL_FLUSH_PORT_SET(port->portno) |
+		 HSCH_FLUSH_CTRL_FLUSH_ENA_SET(0),
+		 HSCH_FLUSH_CTRL_FLUSH_PORT |
+		 HSCH_FLUSH_CTRL_FLUSH_ENA,
+		 sparx5,
+		 HSCH_FLUSH_CTRL);
+
+	if (high_spd_dev) {
+		u32 pcs = sparx5_to_pcs_dev(port->portno);
+		void __iomem *pcsinst = spx5_inst_get(sparx5, pcs, tinst);
+
+		/* 12: Disable 5G/10G/25 BaseR PCS */
+		spx5_inst_rmw(PCS10G_BR_PCS_CFG_PCS_ENA_SET(0),
+			      PCS10G_BR_PCS_CFG_PCS_ENA,
+			      pcsinst,
+			      PCS10G_BR_PCS_CFG(0));
+
+		if (sparx5_port_is_25g(port->portno))
+			/* Disable 25G PCS */
+			spx5_rmw(DEV25G_PCS25G_CFG_PCS25G_ENA_SET(0),
+				 DEV25G_PCS25G_CFG_PCS25G_ENA,
+				 sparx5,
+				 DEV25G_PCS25G_CFG(tinst));
+	} else {
+		/* 12: Disable 1G PCS */
+		spx5_rmw(DEV2G5_PCS1G_CFG_PCS_ENA_SET(0),
+			 DEV2G5_PCS1G_CFG_PCS_ENA,
+			 sparx5,
+			 DEV2G5_PCS1G_CFG(port->portno));
+	}
+
+	/* The port is now flushed and disabled  */
+	return 0;
+}
+
+static int sparx5_port_fifo_sz(struct sparx5 *sparx5,
+			       u32 portno, u32 speed)
+{
+	u32 sys_clk = sparx5_clk_period(sparx5->coreclock);
+	const u32 taxi_dist[SPX5_PORTS_ALL] = {
+		6, 8, 10, 6, 8, 10, 6, 8, 10, 6, 8, 10,
+		4, 4, 4, 4,
+		11, 12, 13, 14, 15, 16, 17, 18,
+		11, 12, 13, 14, 15, 16, 17, 18,
+		11, 12, 13, 14, 15, 16, 17, 18,
+		11, 12, 13, 14, 15, 16, 17, 18,
+		4, 6, 8, 4, 6, 8, 6, 8,
+		2, 2, 2, 2, 2, 2, 2, 4, 2
+	};
+	u32 mac_per    = 6400, tmp1, tmp2, tmp3;
+	u32 fifo_width = 16;
+	u32 mac_width  = 8;
+	u32 addition   = 0;
+
+	switch (speed) {
+	case SPEED_25000:
+		return 0;
+	case SPEED_10000:
+		mac_per = 6400;
+		mac_width = 8;
+		addition = 1;
+		break;
+	case SPEED_5000:
+		mac_per = 12800;
+		mac_width = 8;
+		addition = 0;
+		break;
+	case SPEED_2500:
+		mac_per = 3200;
+		mac_width = 1;
+		addition = 0;
+		break;
+	case SPEED_1000:
+		mac_per =  8000;
+		mac_width = 1;
+		addition = 0;
+		break;
+	case SPEED_100:
+	case SPEED_10:
+		return 1;
+	default:
+		break;
+	}
+
+	tmp1 = 1000 * mac_width / fifo_width;
+	tmp2 = 3000 + ((12000 + 2 * taxi_dist[portno] * 1000)
+		       * sys_clk / mac_per);
+	tmp3 = tmp1 * tmp2 / 1000;
+	return  (tmp3 + 2000 + 999) / 1000 + addition;
+}
+
+/* Configure port muxing:
+ * QSGMII:     4x2G5 devices
+ */
+static int sparx5_port_mux_set(struct sparx5 *sparx5,
+			       struct sparx5_port *port,
+			       struct sparx5_port_config *conf)
+{
+	u32 portno = port->portno;
+	u32 inst;
+
+	if (port->conf.portmode == conf->portmode)
+		return 0; /* Nothing to do */
+
+	switch (conf->portmode) {
+	case PHY_INTERFACE_MODE_QSGMII: /* QSGMII: 4x2G5 devices. Mode Q'  */
+		inst = (portno - portno % 4) / 4;
+		spx5_rmw(BIT(inst),
+			 BIT(inst),
+			 sparx5,
+			 PORT_CONF_QSGMII_ENA);
+
+		if ((portno / 4 % 2) == 0) {
+			/* Affects d0-d3,d8-d11..d40-d43 */
+			spx5_rmw(PORT_CONF_USGMII_CFG_BYPASS_SCRAM_SET(1) |
+				 PORT_CONF_USGMII_CFG_BYPASS_DESCRAM_SET(1) |
+				 PORT_CONF_USGMII_CFG_QUAD_MODE_SET(1),
+				 PORT_CONF_USGMII_CFG_BYPASS_SCRAM |
+				 PORT_CONF_USGMII_CFG_BYPASS_DESCRAM |
+				 PORT_CONF_USGMII_CFG_QUAD_MODE,
+				 sparx5,
+				 PORT_CONF_USGMII_CFG((portno / 8)));
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int sparx5_port_max_tags_set(struct sparx5 *sparx5,
+				    struct sparx5_port *port)
+{
+	enum sparx5_port_max_tags max_tags    = port->max_vlan_tags;
+	int tag_ct          = max_tags == SPX5_PORT_MAX_TAGS_ONE ? 1 :
+			      max_tags == SPX5_PORT_MAX_TAGS_TWO ? 2 : 0;
+	bool dtag           = max_tags == SPX5_PORT_MAX_TAGS_TWO;
+	enum sparx5_vlan_port_type vlan_type  = port->vlan_type;
+	bool dotag          = max_tags != SPX5_PORT_MAX_TAGS_NONE;
+	u32 dev             = sparx5_to_high_dev(port->portno);
+	u32 tinst           = sparx5_port_dev_index(port->portno);
+	void __iomem *inst  = spx5_inst_get(sparx5, dev, tinst);
+	u32 etype;
+
+	etype = (vlan_type == SPX5_VLAN_PORT_TYPE_S_CUSTOM ?
+		 port->custom_etype :
+		 vlan_type == SPX5_VLAN_PORT_TYPE_C ?
+		 SPX5_ETYPE_TAG_C : SPX5_ETYPE_TAG_S);
+
+	spx5_wr(DEV2G5_MAC_TAGS_CFG_TAG_ID_SET(etype) |
+		DEV2G5_MAC_TAGS_CFG_PB_ENA_SET(dtag) |
+		DEV2G5_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(dotag) |
+		DEV2G5_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA_SET(dotag),
+		sparx5,
+		DEV2G5_MAC_TAGS_CFG(port->portno));
+
+	if (sparx5_port_is_2g5(port->portno))
+		return 0;
+
+	spx5_inst_rmw(DEV10G_MAC_TAGS_CFG_TAG_ID_SET(etype) |
+		      DEV10G_MAC_TAGS_CFG_TAG_ENA_SET(dotag),
+		      DEV10G_MAC_TAGS_CFG_TAG_ID |
+		      DEV10G_MAC_TAGS_CFG_TAG_ENA,
+		      inst,
+		      DEV10G_MAC_TAGS_CFG(0, 0));
+
+	spx5_inst_rmw(DEV10G_MAC_NUM_TAGS_CFG_NUM_TAGS_SET(tag_ct),
+		      DEV10G_MAC_NUM_TAGS_CFG_NUM_TAGS,
+		      inst,
+		      DEV10G_MAC_NUM_TAGS_CFG(0));
+
+	spx5_inst_rmw(DEV10G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK_SET(dotag),
+		      DEV10G_MAC_MAXLEN_CFG_MAX_LEN_TAG_CHK,
+		      inst,
+		      DEV10G_MAC_MAXLEN_CFG(0));
+	return 0;
+}
+
+static int sparx5_port_fwd_urg(struct sparx5 *sparx5, u32 speed)
+{
+	u32 clk_period_ps = 1600; /* 625Mhz for now */
+	u32 urg = 672000;
+
+	switch (speed) {
+	case SPEED_10:
+	case SPEED_100:
+	case SPEED_1000:
+		urg = 672000;
+		break;
+	case SPEED_2500:
+		urg = 270000;
+		break;
+	case SPEED_5000:
+		urg = 135000;
+		break;
+	case SPEED_10000:
+		urg = 67200;
+		break;
+	case SPEED_25000:
+		urg = 27000;
+		break;
+	}
+	return urg / clk_period_ps - 1;
+}
+
+static u16 sparx5_wm_enc(u16 value)
+{
+	if (value >= 2048)
+		return 2048 + value / 16;
+
+	return value;
+}
+
+static int sparx5_port_fc_setup(struct sparx5 *sparx5,
+				struct sparx5_port *port,
+				struct sparx5_port_config *conf)
+{
+	bool fc_obey = conf->pause & MLO_PAUSE_RX ? 1 : 0;
+	u32 pause_stop = 0xFFF - 1; /* FC gen disabled */
+
+	if (conf->pause & MLO_PAUSE_TX)
+		pause_stop = sparx5_wm_enc(4  * (ETH_MAXLEN /
+						 SPX5_BUFFER_CELL_SZ));
+
+	/* Set HDX flowcontrol */
+	spx5_rmw(DSM_MAC_CFG_HDX_BACKPREASSURE_SET(conf->duplex == DUPLEX_HALF),
+		 DSM_MAC_CFG_HDX_BACKPREASSURE,
+		 sparx5,
+		 DSM_MAC_CFG(port->portno));
+
+	/* Obey flowcontrol  */
+	spx5_rmw(DSM_RX_PAUSE_CFG_RX_PAUSE_EN_SET(fc_obey),
+		 DSM_RX_PAUSE_CFG_RX_PAUSE_EN,
+		 sparx5,
+		 DSM_RX_PAUSE_CFG(port->portno));
+
+	/* Disable forward pressure */
+	spx5_rmw(QSYS_FWD_PRESSURE_FWD_PRESSURE_DIS_SET(fc_obey),
+		 QSYS_FWD_PRESSURE_FWD_PRESSURE_DIS,
+		 sparx5,
+		 QSYS_FWD_PRESSURE(port->portno));
+
+	/* Generate pause frames */
+	spx5_rmw(QSYS_PAUSE_CFG_PAUSE_STOP_SET(pause_stop),
+		 QSYS_PAUSE_CFG_PAUSE_STOP,
+		 sparx5,
+		 QSYS_PAUSE_CFG(port->portno));
+
+	return 0;
+}
+
+static u16 sparx5_get_aneg_word(struct sparx5_port_config *conf)
+{
+	if (conf->portmode == PHY_INTERFACE_MODE_1000BASEX) /* cl-37 aneg */
+		return (conf->pause_adv | ADVERTISE_LPACK | ADVERTISE_1000XFULL);
+	else
+		return 1; /* Enable SGMII Aneg */
+}
+
+int sparx5_serdes_set(struct sparx5 *sparx5,
+		      struct sparx5_port *port,
+		      struct sparx5_port_config *conf)
+{
+	int portmode, err, speed = conf->speed;
+
+	if (conf->portmode == PHY_INTERFACE_MODE_QSGMII &&
+	    ((port->portno % 4) != 0)) {
+		return 0;
+	}
+	if (sparx5_is_baser(conf->portmode)) {
+		if (conf->portmode == PHY_INTERFACE_MODE_25GBASER)
+			speed = SPEED_25000;
+		else if (conf->portmode == PHY_INTERFACE_MODE_10GBASER)
+			speed = SPEED_10000;
+		else
+			speed = SPEED_5000;
+	}
+
+	err = phy_set_media(port->serdes, conf->media);
+	if (err)
+		return err;
+	if (speed > 0) {
+		err = phy_set_speed(port->serdes, speed);
+		if (err)
+			return err;
+	}
+	if (conf->serdes_reset) {
+		err = phy_reset(port->serdes);
+		if (err)
+			return err;
+	}
+
+	/* Configure SerDes with port parameters
+	 * For BaseR, the serdes driver supports 10GGBASE-R and speed 5G/10G/25G
+	 */
+	portmode = conf->portmode;
+	if (sparx5_is_baser(conf->portmode))
+		portmode = PHY_INTERFACE_MODE_10GBASER;
+	err = phy_set_mode_ext(port->serdes, PHY_MODE_ETHERNET, portmode);
+	if (err)
+		return err;
+	conf->serdes_reset = false;
+	return err;
+}
+
+static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
+				   struct sparx5_port *port,
+				   struct sparx5_port_config *conf)
+{
+	bool sgmii = false, inband_aneg = false;
+	int err;
+
+	if (port->conf.inband) {
+		if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
+		    conf->portmode == PHY_INTERFACE_MODE_QSGMII)
+			inband_aneg = true; /* Cisco-SGMII in-band-aneg */
+		else if (conf->portmode == PHY_INTERFACE_MODE_1000BASEX &&
+			 conf->autoneg)
+			inband_aneg = true; /* Clause-37 in-band-aneg */
+
+		err = sparx5_serdes_set(sparx5, port, conf);
+		if (err)
+			return -EINVAL;
+	} else {
+		sgmii = true; /* Phy is connnected to the MAC */
+	}
+
+	/* Choose SGMII or 1000BaseX/2500BaseX PCS mode */
+	spx5_rmw(DEV2G5_PCS1G_MODE_CFG_SGMII_MODE_ENA_SET(sgmii),
+		 DEV2G5_PCS1G_MODE_CFG_SGMII_MODE_ENA,
+		 sparx5,
+		 DEV2G5_PCS1G_MODE_CFG(port->portno));
+
+	/* Enable PCS */
+	spx5_wr(DEV2G5_PCS1G_CFG_PCS_ENA_SET(1),
+		sparx5,
+		DEV2G5_PCS1G_CFG(port->portno));
+
+	if (inband_aneg) {
+		u16 abil = sparx5_get_aneg_word(conf);
+
+		/* Enable in-band aneg */
+		spx5_wr(DEV2G5_PCS1G_ANEG_CFG_ADV_ABILITY_SET(abil) |
+			DEV2G5_PCS1G_ANEG_CFG_SW_RESOLVE_ENA_SET(1) |
+			DEV2G5_PCS1G_ANEG_CFG_ANEG_ENA_SET(1) |
+			DEV2G5_PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT_SET(1),
+			sparx5,
+			DEV2G5_PCS1G_ANEG_CFG(port->portno));
+	} else {
+		spx5_wr(0, sparx5, DEV2G5_PCS1G_ANEG_CFG(port->portno));
+	}
+
+	/* Take PCS out of reset */
+	spx5_rmw(DEV2G5_DEV_RST_CTRL_SPEED_SEL_SET(2) |
+		 DEV2G5_DEV_RST_CTRL_PCS_TX_RST_SET(0) |
+		 DEV2G5_DEV_RST_CTRL_PCS_RX_RST_SET(0),
+		 DEV2G5_DEV_RST_CTRL_SPEED_SEL |
+		 DEV2G5_DEV_RST_CTRL_PCS_TX_RST |
+		 DEV2G5_DEV_RST_CTRL_PCS_RX_RST,
+		 sparx5,
+		 DEV2G5_DEV_RST_CTRL(port->portno));
+
+	return 0;
+}
+
+static int sparx5_port_pcs_high_set(struct sparx5 *sparx5,
+				    struct sparx5_port *port,
+				    struct sparx5_port_config *conf)
+{
+	u32 clk_spd = conf->portmode == PHY_INTERFACE_MODE_5GBASER ? 1 : 0;
+	u32 pix = sparx5_port_dev_index(port->portno);
+	u32 dev = sparx5_to_high_dev(port->portno);
+	u32 pcs = sparx5_to_pcs_dev(port->portno);
+	void __iomem *devinst;
+	void __iomem *pcsinst;
+	int err;
+
+	devinst = spx5_inst_get(sparx5, dev, pix);
+	pcsinst = spx5_inst_get(sparx5, pcs, pix);
+
+	/*  SFI : No in-band-aneg. Speeds 5G/10G/25G */
+	err = sparx5_serdes_set(sparx5, port, conf);
+	if (err)
+		return -EINVAL;
+	if (conf->portmode == PHY_INTERFACE_MODE_25GBASER) {
+		/* Enable PCS for 25G device, speed 25G */
+		spx5_rmw(DEV25G_PCS25G_CFG_PCS25G_ENA_SET(1),
+			 DEV25G_PCS25G_CFG_PCS25G_ENA,
+			 sparx5,
+			 DEV25G_PCS25G_CFG(pix));
+	} else {
+		/* Enable PCS for 5G/10G/25G devices, speed 5G/10G */
+		spx5_inst_rmw(PCS10G_BR_PCS_CFG_PCS_ENA_SET(1),
+			      PCS10G_BR_PCS_CFG_PCS_ENA,
+			      pcsinst,
+			      PCS10G_BR_PCS_CFG(0));
+	}
+
+	/* Enable 5G/10G/25G MAC module */
+	spx5_inst_wr(DEV10G_MAC_ENA_CFG_RX_ENA_SET(1) |
+		     DEV10G_MAC_ENA_CFG_TX_ENA_SET(1),
+		     devinst,
+		     DEV10G_MAC_ENA_CFG(0));
+
+	/* Take the device out of reset */
+	spx5_inst_rmw(DEV10G_DEV_RST_CTRL_PCS_RX_RST_SET(0) |
+		      DEV10G_DEV_RST_CTRL_PCS_TX_RST_SET(0) |
+		      DEV10G_DEV_RST_CTRL_MAC_RX_RST_SET(0) |
+		      DEV10G_DEV_RST_CTRL_MAC_TX_RST_SET(0) |
+		      DEV10G_DEV_RST_CTRL_SPEED_SEL_SET(clk_spd),
+		      DEV10G_DEV_RST_CTRL_PCS_RX_RST |
+		      DEV10G_DEV_RST_CTRL_PCS_TX_RST |
+		      DEV10G_DEV_RST_CTRL_MAC_RX_RST |
+		      DEV10G_DEV_RST_CTRL_MAC_TX_RST |
+		      DEV10G_DEV_RST_CTRL_SPEED_SEL,
+		      devinst,
+		      DEV10G_DEV_RST_CTRL(0));
+
+	return 0;
+}
+
+/* Switch between 1G/2500 and 5G/10G/25G devices */
+static void sparx5_dev_switch(struct sparx5 *sparx5, int port, bool hsd)
+{
+	int bt_indx = BIT(sparx5_port_dev_index(port));
+
+	if (sparx5_port_is_5g(port)) {
+		spx5_rmw(hsd ? 0 : bt_indx,
+			 bt_indx,
+			 sparx5,
+			 PORT_CONF_DEV5G_MODES);
+	} else if (sparx5_port_is_10g(port)) {
+		spx5_rmw(hsd ? 0 : bt_indx,
+			 bt_indx,
+			 sparx5,
+			 PORT_CONF_DEV10G_MODES);
+	} else if (sparx5_port_is_25g(port)) {
+		spx5_rmw(hsd ? 0 : bt_indx,
+			 bt_indx,
+			 sparx5,
+			 PORT_CONF_DEV25G_MODES);
+	}
+}
+
+/* Configure speed/duplex dependent registers */
+static int sparx5_port_config_low_set(struct sparx5 *sparx5,
+				      struct sparx5_port *port,
+				      struct sparx5_port_config *conf)
+{
+	u32 clk_spd, gig_mode, tx_gap, hdx_gap_1, hdx_gap_2;
+	bool fdx = conf->duplex == DUPLEX_FULL;
+	int spd = conf->speed;
+
+	clk_spd = spd == SPEED_10 ? 0 : spd == SPEED_100 ? 1 : 2;
+	gig_mode = spd == SPEED_1000 || spd == SPEED_2500;
+	tx_gap = spd == SPEED_1000 ? 4 : fdx ? 6 : 5;
+	hdx_gap_1 = spd == SPEED_1000 ? 0 : spd == SPEED_100 ? 1 : 2;
+	hdx_gap_2 = spd == SPEED_1000 ? 0 : spd == SPEED_100 ? 4 : 1;
+
+	/* GIG/FDX mode */
+	spx5_rmw(DEV2G5_MAC_MODE_CFG_GIGA_MODE_ENA_SET(gig_mode) |
+		 DEV2G5_MAC_MODE_CFG_FDX_ENA_SET(fdx),
+		 DEV2G5_MAC_MODE_CFG_GIGA_MODE_ENA |
+		 DEV2G5_MAC_MODE_CFG_FDX_ENA,
+		 sparx5,
+		 DEV2G5_MAC_MODE_CFG(port->portno));
+
+	/* Set MAC IFG Gaps */
+	spx5_wr(DEV2G5_MAC_IFG_CFG_TX_IFG_SET(tx_gap) |
+		DEV2G5_MAC_IFG_CFG_RX_IFG1_SET(hdx_gap_1) |
+		DEV2G5_MAC_IFG_CFG_RX_IFG2_SET(hdx_gap_2),
+		sparx5,
+		DEV2G5_MAC_IFG_CFG(port->portno));
+
+	/* Disabling frame aging when in HDX (due to HDX issue) */
+	spx5_rmw(HSCH_PORT_MODE_AGE_DIS_SET(fdx == 0),
+		 HSCH_PORT_MODE_AGE_DIS,
+		 sparx5,
+		 HSCH_PORT_MODE(port->portno));
+
+	/* Enable MAC module */
+	spx5_wr(DEV2G5_MAC_ENA_CFG_RX_ENA |
+		DEV2G5_MAC_ENA_CFG_TX_ENA,
+		sparx5,
+		DEV2G5_MAC_ENA_CFG(port->portno));
+
+	/* Select speed and take MAC out of reset */
+	spx5_rmw(DEV2G5_DEV_RST_CTRL_SPEED_SEL_SET(clk_spd) |
+		 DEV2G5_DEV_RST_CTRL_MAC_TX_RST_SET(0) |
+		 DEV2G5_DEV_RST_CTRL_MAC_RX_RST_SET(0),
+		 DEV2G5_DEV_RST_CTRL_SPEED_SEL |
+		 DEV2G5_DEV_RST_CTRL_MAC_TX_RST |
+		 DEV2G5_DEV_RST_CTRL_MAC_RX_RST,
+		 sparx5,
+		 DEV2G5_DEV_RST_CTRL(port->portno));
+
+	return 0;
+}
+
+int sparx5_port_pcs_set(struct sparx5 *sparx5,
+			struct sparx5_port *port,
+			struct sparx5_port_config *conf)
+
+{
+	bool high_speed_dev = sparx5_is_baser(conf->portmode);
+	int err;
+
+	if (sparx5_dev_change(sparx5, port, conf)) {
+		/* switch device */
+		sparx5_dev_switch(sparx5, port->portno, high_speed_dev);
+
+		/* Disable the not-in-use device */
+		err = sparx5_port_disable(sparx5, port, !high_speed_dev);
+		if (err)
+			return err;
+	}
+	/* Disable the port before re-configuring */
+	err = sparx5_port_disable(sparx5, port, high_speed_dev);
+	if (err)
+		return -EINVAL;
+
+	if (high_speed_dev)
+		err = sparx5_port_pcs_high_set(sparx5, port, conf);
+	else
+		err = sparx5_port_pcs_low_set(sparx5, port, conf);
+
+	if (err)
+		return -EINVAL;
+
+	if (port->conf.inband) {
+		/* Enable/disable 1G counters in ASM */
+		spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
+			 ASM_PORT_CFG_CSC_STAT_DIS,
+			 sparx5,
+			 ASM_PORT_CFG(port->portno));
+
+		/* Enable/disable 1G counters in DSM */
+		spx5_rmw(DSM_BUF_CFG_CSC_STAT_DIS_SET(high_speed_dev),
+			 DSM_BUF_CFG_CSC_STAT_DIS,
+			 sparx5,
+			 DSM_BUF_CFG(port->portno));
+	}
+
+	port->conf = *conf;
+
+	return 0;
+}
+
+int sparx5_port_config(struct sparx5 *sparx5,
+		       struct sparx5_port *port,
+		       struct sparx5_port_config *conf)
+{
+	bool high_speed_dev = sparx5_is_baser(conf->portmode);
+	int err, urgency, stop_wm;
+
+	err = sparx5_port_verify_speed(sparx5, port, conf);
+	if (err)
+		return err;
+
+	/* high speed device is already configured */
+	if (!high_speed_dev)
+		sparx5_port_config_low_set(sparx5, port, conf);
+
+	/* Configure flow control */
+	err = sparx5_port_fc_setup(sparx5, port, conf);
+	if (err)
+		return err;
+
+	/* Set the DSM stop watermark */
+	stop_wm = sparx5_port_fifo_sz(sparx5, port->portno, conf->speed);
+	spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM_SET(stop_wm),
+		 DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM,
+		 sparx5,
+		 DSM_DEV_TX_STOP_WM_CFG(port->portno));
+
+	/* Enable port in queue system */
+	urgency = sparx5_port_fwd_urg(sparx5, conf->speed);
+	spx5_rmw(QFWD_SWITCH_PORT_MODE_PORT_ENA_SET(1) |
+		 QFWD_SWITCH_PORT_MODE_FWD_URGENCY_SET(urgency),
+		 QFWD_SWITCH_PORT_MODE_PORT_ENA |
+		 QFWD_SWITCH_PORT_MODE_FWD_URGENCY,
+		 sparx5,
+		 QFWD_SWITCH_PORT_MODE(port->portno));
+
+	/* Save the new values */
+	port->conf = *conf;
+
+	return 0;
+}
+
+/* Initialize port config to default */
+int sparx5_port_init(struct sparx5 *sparx5,
+		     struct sparx5_port *port,
+		     struct sparx5_port_config *conf)
+{
+	u32 pause_start = sparx5_wm_enc(6  * (ETH_MAXLEN / SPX5_BUFFER_CELL_SZ));
+	u32 atop = sparx5_wm_enc(20 * (ETH_MAXLEN / SPX5_BUFFER_CELL_SZ));
+	u32 devhigh = sparx5_to_high_dev(port->portno);
+	u32 pix = sparx5_port_dev_index(port->portno);
+	u32 pcs = sparx5_to_pcs_dev(port->portno);
+	bool sd_pol = port->signd_active_high;
+	bool sd_sel = !port->signd_internal;
+	bool sd_ena = port->signd_enable;
+	u32 pause_stop = 0xFFF - 1; /* FC generate disabled */
+	void __iomem *devinst;
+	void __iomem *pcsinst;
+	int err;
+
+	devinst = spx5_inst_get(sparx5, devhigh, pix);
+	pcsinst = spx5_inst_get(sparx5, pcs, pix);
+
+	/* Set the mux port mode  */
+	err = sparx5_port_mux_set(sparx5, port, conf);
+	if (err)
+		return err;
+
+	/* Configure MAC vlan awareness */
+	err = sparx5_port_max_tags_set(sparx5, port);
+	if (err)
+		return err;
+
+	/* Set Max Length */
+	spx5_rmw(DEV2G5_MAC_MAXLEN_CFG_MAX_LEN_SET(ETH_MAXLEN),
+		 DEV2G5_MAC_MAXLEN_CFG_MAX_LEN,
+		 sparx5,
+		 DEV2G5_MAC_MAXLEN_CFG(port->portno));
+
+	/* 1G/2G5: Signal Detect configuration */
+	spx5_wr(DEV2G5_PCS1G_SD_CFG_SD_POL_SET(sd_pol) |
+		DEV2G5_PCS1G_SD_CFG_SD_SEL_SET(sd_sel) |
+		DEV2G5_PCS1G_SD_CFG_SD_ENA_SET(sd_ena),
+		sparx5,
+		DEV2G5_PCS1G_SD_CFG(port->portno));
+
+	/* Set Pause WM hysteresis */
+	spx5_rmw(QSYS_PAUSE_CFG_PAUSE_START_SET(pause_start) |
+		 QSYS_PAUSE_CFG_PAUSE_STOP_SET(pause_stop) |
+		 QSYS_PAUSE_CFG_PAUSE_ENA_SET(1),
+		 QSYS_PAUSE_CFG_PAUSE_START |
+		 QSYS_PAUSE_CFG_PAUSE_STOP |
+		 QSYS_PAUSE_CFG_PAUSE_ENA,
+		 sparx5,
+		 QSYS_PAUSE_CFG(port->portno));
+
+	/* Port ATOP. Frames are tail dropped when this WM is hit */
+	spx5_wr(QSYS_ATOP_ATOP_SET(atop),
+		sparx5,
+		QSYS_ATOP(port->portno));
+
+	/* Discard pause frame 01-80-C2-00-00-01 */
+	spx5_wr(PAUSE_DISCARD, sparx5, ANA_CL_CAPTURE_BPDU_CFG(port->portno));
+
+	if (conf->portmode == PHY_INTERFACE_MODE_QSGMII ||
+	    conf->portmode == PHY_INTERFACE_MODE_SGMII) {
+		err = sparx5_serdes_set(sparx5, port, conf);
+		if (err)
+			return err;
+
+		if (!sparx5_port_is_2g5(port->portno))
+			/* Enable shadow device */
+			spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV10G_SHADOW_ENA_SET(1),
+				 DSM_DEV_TX_STOP_WM_CFG_DEV10G_SHADOW_ENA,
+				 sparx5,
+				 DSM_DEV_TX_STOP_WM_CFG(port->portno));
+
+		sparx5_dev_switch(sparx5, port->portno, false);
+	}
+	if (conf->portmode == PHY_INTERFACE_MODE_QSGMII) {
+		// All ports must be PCS enabled in QSGMII mode
+		spx5_rmw(DEV2G5_DEV_RST_CTRL_PCS_TX_RST_SET(0),
+			 DEV2G5_DEV_RST_CTRL_PCS_TX_RST,
+			 sparx5,
+			 DEV2G5_DEV_RST_CTRL(port->portno));
+	}
+	/* Default IFGs for 1G */
+	spx5_wr(DEV2G5_MAC_IFG_CFG_TX_IFG_SET(6) |
+		DEV2G5_MAC_IFG_CFG_RX_IFG1_SET(0) |
+		DEV2G5_MAC_IFG_CFG_RX_IFG2_SET(0),
+		sparx5,
+		DEV2G5_MAC_IFG_CFG(port->portno));
+
+	if (sparx5_port_is_2g5(port->portno))
+		return 0; /* Low speed device only - return */
+
+	/* Now setup the high speed device */
+	if (conf->portmode == PHY_INTERFACE_MODE_NA)
+		conf->portmode = PHY_INTERFACE_MODE_10GBASER;
+
+	if (sparx5_is_baser(conf->portmode))
+		sparx5_dev_switch(sparx5, port->portno, true);
+
+	/* Set Max Length */
+	spx5_inst_rmw(DEV10G_MAC_MAXLEN_CFG_MAX_LEN_SET(ETH_MAXLEN),
+		      DEV10G_MAC_MAXLEN_CFG_MAX_LEN,
+		      devinst,
+		      DEV10G_MAC_ENA_CFG(0));
+
+	/* Handle Signal Detect in 10G PCS */
+	spx5_inst_wr(PCS10G_BR_PCS_SD_CFG_SD_POL_SET(sd_pol) |
+		     PCS10G_BR_PCS_SD_CFG_SD_SEL_SET(sd_sel) |
+		     PCS10G_BR_PCS_SD_CFG_SD_ENA_SET(sd_ena),
+		     pcsinst,
+		     PCS10G_BR_PCS_SD_CFG(0));
+
+	if (sparx5_port_is_25g(port->portno)) {
+		/* Handle Signal Detect in 25G PCS */
+		spx5_wr(DEV25G_PCS25G_SD_CFG_SD_POL_SET(sd_pol) |
+			DEV25G_PCS25G_SD_CFG_SD_SEL_SET(sd_sel) |
+			DEV25G_PCS25G_SD_CFG_SD_ENA_SET(sd_ena),
+			sparx5,
+			DEV25G_PCS25G_SD_CFG(pix));
+	}
+
+	return 0;
+}
+
+void sparx5_port_enable(struct sparx5_port *port, bool enable)
+{
+	struct sparx5 *sparx5 = port->sparx5;
+
+	/* Enable port for frame transfer? */
+	spx5_rmw(QFWD_SWITCH_PORT_MODE_PORT_ENA_SET(enable),
+		 QFWD_SWITCH_PORT_MODE_PORT_ENA,
+		 sparx5,
+		 QFWD_SWITCH_PORT_MODE(port->portno));
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.h b/drivers/net/ethernet/microchip/sparx5/sparx5_port.h
new file mode 100644
index 000000000000..fd05ab6436d1
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#ifndef __SPARX5_PORT_H__
+#define __SPARX5_PORT_H__
+
+#include "sparx5_main.h"
+
+static inline bool sparx5_port_is_2g5(int portno)
+{
+	return portno >= 16 && portno <= 47;
+}
+
+static inline bool sparx5_port_is_5g(int portno)
+{
+	return portno <= 11 || portno == 64;
+}
+
+static inline bool sparx5_port_is_10g(int portno)
+{
+	return (portno >= 12 && portno <= 15) || (portno >= 48 && portno <= 55);
+}
+
+static inline bool sparx5_port_is_25g(int portno)
+{
+	return portno >= 56 && portno <= 63;
+}
+
+static inline u32 sparx5_to_high_dev(int port)
+{
+	if (sparx5_port_is_5g(port))
+		return TARGET_DEV5G;
+	if (sparx5_port_is_10g(port))
+		return TARGET_DEV10G;
+	return TARGET_DEV25G;
+}
+
+static inline u32 sparx5_to_pcs_dev(int port)
+{
+	if (sparx5_port_is_5g(port))
+		return TARGET_PCS5G_BR;
+	if (sparx5_port_is_10g(port))
+		return TARGET_PCS10G_BR;
+	return TARGET_PCS25G_BR;
+}
+
+static inline int sparx5_port_dev_index(int port)
+{
+	if (sparx5_port_is_2g5(port))
+		return port;
+	if (sparx5_port_is_5g(port))
+		return (port <= 11 ? port : 12);
+	if (sparx5_port_is_10g(port))
+		return (port >= 12 && port <= 15) ?
+			port - 12 : port - 44;
+	return (port - 56);
+}
+
+int sparx5_port_init(struct sparx5 *sparx5,
+		     struct sparx5_port *spx5_port,
+		     struct sparx5_port_config *conf);
+
+int sparx5_port_config(struct sparx5 *sparx5,
+		       struct sparx5_port *spx5_port,
+		       struct sparx5_port_config *conf);
+
+int sparx5_port_pcs_set(struct sparx5 *sparx5,
+			struct sparx5_port *port,
+			struct sparx5_port_config *conf);
+
+int sparx5_serdes_set(struct sparx5 *sparx5,
+		      struct sparx5_port *spx5_port,
+		      struct sparx5_port_config *conf);
+
+struct sparx5_port_status {
+	bool link;
+	bool link_down;
+	int  speed;
+	bool an_complete;
+	int  duplex;
+	int  pause;
+};
+
+int sparx5_get_port_status(struct sparx5 *sparx5,
+			   struct sparx5_port *port,
+			   struct sparx5_port_status *status);
+
+void sparx5_port_enable(struct sparx5_port *port, bool enable);
+
+#endif	/* __SPARX5_PORT_H__ */

From b37a1bae742f92cc9b1f777d54e04ee3d86bbfc2 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:53 +0200
Subject: [PATCH 2831/3292] net: sparx5: add mactable support

This adds the Sparx5 MAC tables: listening for MAC table updates and
updating on request.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/microchip/sparx5/Makefile    |   2 +-
 .../microchip/sparx5/sparx5_mactable.c        | 497 ++++++++++++++++++
 .../ethernet/microchip/sparx5/sparx5_main.c   |  21 +-
 .../ethernet/microchip/sparx5/sparx5_main.h   |  26 +
 .../ethernet/microchip/sparx5/sparx5_netdev.c |  21 +
 5 files changed, 565 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c

diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index 25f115d2a2d9..13876a9c5dbf 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -6,4 +6,4 @@
 obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
 sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
- sparx5_netdev.o sparx5_phylink.o sparx5_port.o
+ sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
new file mode 100644
index 000000000000..6c5e04eccaa3
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include <net/switchdev.h>
+#include <linux/if_bridge.h>
+#include <linux/iopoll.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+/* Commands for Mac Table Command register */
+#define MAC_CMD_LEARN         0 /* Insert (Learn) 1 entry */
+#define MAC_CMD_UNLEARN       1 /* Unlearn (Forget) 1 entry */
+#define MAC_CMD_LOOKUP        2 /* Look up 1 entry */
+#define MAC_CMD_READ          3 /* Read entry at Mac Table Index */
+#define MAC_CMD_WRITE         4 /* Write entry at Mac Table Index */
+#define MAC_CMD_SCAN          5 /* Scan (Age or find next) */
+#define MAC_CMD_FIND_SMALLEST 6 /* Get next entry */
+#define MAC_CMD_CLEAR_ALL     7 /* Delete all entries in table */
+
+/* Commands for MAC_ENTRY_ADDR_TYPE */
+#define  MAC_ENTRY_ADDR_TYPE_UPSID_PN         0
+#define  MAC_ENTRY_ADDR_TYPE_UPSID_CPU_OR_INT 1
+#define  MAC_ENTRY_ADDR_TYPE_GLAG             2
+#define  MAC_ENTRY_ADDR_TYPE_MC_IDX           3
+
+#define TABLE_UPDATE_SLEEP_US 10
+#define TABLE_UPDATE_TIMEOUT_US 100000
+
+struct sparx5_mact_entry {
+	struct list_head list;
+	unsigned char mac[ETH_ALEN];
+	u32 flags;
+#define MAC_ENT_ALIVE	BIT(0)
+#define MAC_ENT_MOVED	BIT(1)
+#define MAC_ENT_LOCK	BIT(2)
+	u16 vid;
+	u16 port;
+};
+
+static int sparx5_mact_get_status(struct sparx5 *sparx5)
+{
+	return spx5_rd(sparx5, LRN_COMMON_ACCESS_CTRL);
+}
+
+static int sparx5_mact_wait_for_completion(struct sparx5 *sparx5)
+{
+	u32 val;
+
+	return readx_poll_timeout(sparx5_mact_get_status,
+		sparx5, val,
+		LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_GET(val) == 0,
+		TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
+}
+
+static void sparx5_mact_select(struct sparx5 *sparx5,
+			       const unsigned char mac[ETH_ALEN],
+			       u16 vid)
+{
+	u32 macl = 0, mach = 0;
+
+	/* Set the MAC address to handle and the vlan associated in a format
+	 * understood by the hardware.
+	 */
+	mach |= vid    << 16;
+	mach |= mac[0] << 8;
+	mach |= mac[1] << 0;
+	macl |= mac[2] << 24;
+	macl |= mac[3] << 16;
+	macl |= mac[4] << 8;
+	macl |= mac[5] << 0;
+
+	spx5_wr(mach, sparx5, LRN_MAC_ACCESS_CFG_0);
+	spx5_wr(macl, sparx5, LRN_MAC_ACCESS_CFG_1);
+}
+
+int sparx5_mact_learn(struct sparx5 *sparx5, int pgid,
+		      const unsigned char mac[ETH_ALEN], u16 vid)
+{
+	int addr, type, ret;
+
+	if (pgid < SPX5_PORTS) {
+		type = MAC_ENTRY_ADDR_TYPE_UPSID_PN;
+		addr = pgid % 32;
+		addr += (pgid / 32) << 5; /* Add upsid */
+	} else {
+		type = MAC_ENTRY_ADDR_TYPE_MC_IDX;
+		addr = pgid - SPX5_PORTS;
+	}
+
+	mutex_lock(&sparx5->lock);
+
+	sparx5_mact_select(sparx5, mac, vid);
+
+	/* MAC entry properties */
+	spx5_wr(LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_SET(addr) |
+		LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_TYPE_SET(type) |
+		LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD_SET(1) |
+		LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_LOCKED_SET(1),
+		sparx5, LRN_MAC_ACCESS_CFG_2);
+	spx5_wr(0, sparx5, LRN_MAC_ACCESS_CFG_3);
+
+	/*  Insert/learn new entry */
+	spx5_wr(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_SET(MAC_CMD_LEARN) |
+		LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_SET(1),
+		sparx5, LRN_COMMON_ACCESS_CTRL);
+
+	ret = sparx5_mact_wait_for_completion(sparx5);
+
+	mutex_unlock(&sparx5->lock);
+
+	return ret;
+}
+
+int sparx5_mc_unsync(struct net_device *dev, const unsigned char *addr)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+	struct sparx5 *sparx5 = port->sparx5;
+
+	return sparx5_mact_forget(sparx5, addr, port->pvid);
+}
+
+int sparx5_mc_sync(struct net_device *dev, const unsigned char *addr)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+	struct sparx5 *sparx5 = port->sparx5;
+
+	return sparx5_mact_learn(sparx5, PGID_CPU, addr, port->pvid);
+}
+
+static int sparx5_mact_get(struct sparx5 *sparx5,
+			   unsigned char mac[ETH_ALEN],
+			   u16 *vid, u32 *pcfg2)
+{
+	u32 mach, macl, cfg2;
+	int ret = -ENOENT;
+
+	cfg2 = spx5_rd(sparx5, LRN_MAC_ACCESS_CFG_2);
+	if (LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD_GET(cfg2)) {
+		mach = spx5_rd(sparx5, LRN_MAC_ACCESS_CFG_0);
+		macl = spx5_rd(sparx5, LRN_MAC_ACCESS_CFG_1);
+		mac[0] = ((mach >> 8)  & 0xff);
+		mac[1] = ((mach >> 0)  & 0xff);
+		mac[2] = ((macl >> 24) & 0xff);
+		mac[3] = ((macl >> 16) & 0xff);
+		mac[4] = ((macl >> 8)  & 0xff);
+		mac[5] = ((macl >> 0)  & 0xff);
+		*vid = mach >> 16;
+		*pcfg2 = cfg2;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+bool sparx5_mact_getnext(struct sparx5 *sparx5,
+			 unsigned char mac[ETH_ALEN], u16 *vid, u32 *pcfg2)
+{
+	u32 cfg2;
+	int ret;
+
+	mutex_lock(&sparx5->lock);
+
+	sparx5_mact_select(sparx5, mac, *vid);
+
+	spx5_wr(LRN_SCAN_NEXT_CFG_SCAN_NEXT_IGNORE_LOCKED_ENA_SET(1) |
+		LRN_SCAN_NEXT_CFG_SCAN_NEXT_UNTIL_FOUND_ENA_SET(1),
+		sparx5, LRN_SCAN_NEXT_CFG);
+	spx5_wr(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_SET
+		(MAC_CMD_FIND_SMALLEST) |
+		LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_SET(1),
+		sparx5, LRN_COMMON_ACCESS_CTRL);
+
+	ret = sparx5_mact_wait_for_completion(sparx5);
+	if (ret == 0) {
+		ret = sparx5_mact_get(sparx5, mac, vid, &cfg2);
+		if (ret == 0)
+			*pcfg2 = cfg2;
+	}
+
+	mutex_unlock(&sparx5->lock);
+
+	return ret == 0;
+}
+
+static int sparx5_mact_lookup(struct sparx5 *sparx5,
+			      const unsigned char mac[ETH_ALEN],
+			      u16 vid)
+{
+	int ret;
+
+	mutex_lock(&sparx5->lock);
+
+	sparx5_mact_select(sparx5, mac, vid);
+
+	/* Issue a lookup command */
+	spx5_wr(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_SET(MAC_CMD_LOOKUP) |
+		LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_SET(1),
+		sparx5, LRN_COMMON_ACCESS_CTRL);
+
+	ret = sparx5_mact_wait_for_completion(sparx5);
+	if (ret)
+		goto out;
+
+	ret = LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_VLD_GET
+		(spx5_rd(sparx5, LRN_MAC_ACCESS_CFG_2));
+
+out:
+	mutex_unlock(&sparx5->lock);
+
+	return ret;
+}
+
+int sparx5_mact_forget(struct sparx5 *sparx5,
+		       const unsigned char mac[ETH_ALEN], u16 vid)
+{
+	int ret;
+
+	mutex_lock(&sparx5->lock);
+
+	sparx5_mact_select(sparx5, mac, vid);
+
+	/* Issue an unlearn command */
+	spx5_wr(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_SET(MAC_CMD_UNLEARN) |
+		LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_SET(1),
+		sparx5, LRN_COMMON_ACCESS_CTRL);
+
+	ret = sparx5_mact_wait_for_completion(sparx5);
+
+	mutex_unlock(&sparx5->lock);
+
+	return ret;
+}
+
+static struct sparx5_mact_entry *alloc_mact_entry(struct sparx5 *sparx5,
+						  const unsigned char *mac,
+						  u16 vid, u16 port_index)
+{
+	struct sparx5_mact_entry *mact_entry;
+
+	mact_entry = devm_kzalloc(sparx5->dev,
+				  sizeof(*mact_entry), GFP_ATOMIC);
+	if (!mact_entry)
+		return NULL;
+
+	memcpy(mact_entry->mac, mac, ETH_ALEN);
+	mact_entry->vid = vid;
+	mact_entry->port = port_index;
+	return mact_entry;
+}
+
+static struct sparx5_mact_entry *find_mact_entry(struct sparx5 *sparx5,
+						 const unsigned char *mac,
+						 u16 vid, u16 port_index)
+{
+	struct sparx5_mact_entry *mact_entry;
+	struct sparx5_mact_entry *res = NULL;
+
+	mutex_lock(&sparx5->mact_lock);
+	list_for_each_entry(mact_entry, &sparx5->mact_entries, list) {
+		if (mact_entry->vid == vid &&
+		    ether_addr_equal(mac, mact_entry->mac) &&
+		    mact_entry->port == port_index) {
+			res = mact_entry;
+			break;
+		}
+	}
+	mutex_unlock(&sparx5->mact_lock);
+
+	return res;
+}
+
+static void sparx5_fdb_call_notifiers(enum switchdev_notifier_type type,
+				      const char *mac, u16 vid,
+				      struct net_device *dev, bool offloaded)
+{
+	struct switchdev_notifier_fdb_info info;
+
+	info.addr = mac;
+	info.vid = vid;
+	info.offloaded = offloaded;
+	call_switchdev_notifiers(type, dev, &info.info, NULL);
+}
+
+int sparx5_add_mact_entry(struct sparx5 *sparx5,
+			  struct sparx5_port *port,
+			  const unsigned char *addr, u16 vid)
+{
+	struct sparx5_mact_entry *mact_entry;
+	int ret;
+
+	ret = sparx5_mact_lookup(sparx5, addr, vid);
+	if (ret)
+		return 0;
+
+	/* In case the entry already exists, don't add it again to SW,
+	 * just update HW, but we need to look in the actual HW because
+	 * it is possible for an entry to be learn by HW and before the
+	 * mact thread to start the frame will reach CPU and the CPU will
+	 * add the entry but without the extern_learn flag.
+	 */
+	mact_entry = find_mact_entry(sparx5, addr, vid, port->portno);
+	if (mact_entry)
+		goto update_hw;
+
+	/* Add the entry in SW MAC table not to get the notification when
+	 * SW is pulling again
+	 */
+	mact_entry = alloc_mact_entry(sparx5, addr, vid, port->portno);
+	if (!mact_entry)
+		return -ENOMEM;
+
+	mutex_lock(&sparx5->mact_lock);
+	list_add_tail(&mact_entry->list, &sparx5->mact_entries);
+	mutex_unlock(&sparx5->mact_lock);
+
+update_hw:
+	ret = sparx5_mact_learn(sparx5, port->portno, addr, vid);
+
+	/* New entry? */
+	if (mact_entry->flags == 0) {
+		mact_entry->flags |= MAC_ENT_LOCK; /* Don't age this */
+		sparx5_fdb_call_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE, addr, vid,
+					  port->ndev, true);
+	}
+
+	return ret;
+}
+
+int sparx5_del_mact_entry(struct sparx5 *sparx5,
+			  const unsigned char *addr,
+			  u16 vid)
+{
+	struct sparx5_mact_entry *mact_entry, *tmp;
+
+	/* Delete the entry in SW MAC table not to get the notification when
+	 * SW is pulling again
+	 */
+	mutex_lock(&sparx5->mact_lock);
+	list_for_each_entry_safe(mact_entry, tmp, &sparx5->mact_entries,
+				 list) {
+		if ((vid == 0 || mact_entry->vid == vid) &&
+		    ether_addr_equal(addr, mact_entry->mac)) {
+			list_del(&mact_entry->list);
+			devm_kfree(sparx5->dev, mact_entry);
+
+			sparx5_mact_forget(sparx5, addr, mact_entry->vid);
+		}
+	}
+	mutex_unlock(&sparx5->mact_lock);
+
+	return 0;
+}
+
+static void sparx5_mact_handle_entry(struct sparx5 *sparx5,
+				     unsigned char mac[ETH_ALEN],
+				     u16 vid, u32 cfg2)
+{
+	struct sparx5_mact_entry *mact_entry;
+	bool found = false;
+	u16 port;
+
+	if (LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_TYPE_GET(cfg2) !=
+	    MAC_ENTRY_ADDR_TYPE_UPSID_PN)
+		return;
+
+	port = LRN_MAC_ACCESS_CFG_2_MAC_ENTRY_ADDR_GET(cfg2);
+	if (port >= SPX5_PORTS)
+		return;
+
+	mutex_lock(&sparx5->mact_lock);
+	list_for_each_entry(mact_entry, &sparx5->mact_entries, list) {
+		if (mact_entry->vid == vid &&
+		    ether_addr_equal(mac, mact_entry->mac)) {
+			found = true;
+			mact_entry->flags |= MAC_ENT_ALIVE;
+			if (mact_entry->port != port) {
+				dev_warn(sparx5->dev, "Entry move: %d -> %d\n",
+					 mact_entry->port, port);
+				mact_entry->port = port;
+				mact_entry->flags |= MAC_ENT_MOVED;
+			}
+			/* Entry handled */
+			break;
+		}
+	}
+	mutex_unlock(&sparx5->mact_lock);
+
+	if (found && !(mact_entry->flags & MAC_ENT_MOVED))
+		/* Present, not moved */
+		return;
+
+	if (!found) {
+		/* Entry not found - now add */
+		mact_entry = alloc_mact_entry(sparx5, mac, vid, port);
+		if (!mact_entry)
+			return;
+
+		mact_entry->flags |= MAC_ENT_ALIVE;
+		mutex_lock(&sparx5->mact_lock);
+		list_add_tail(&mact_entry->list, &sparx5->mact_entries);
+		mutex_unlock(&sparx5->mact_lock);
+	}
+
+	/* New or moved entry - notify bridge */
+	sparx5_fdb_call_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
+				  mac, vid, sparx5->ports[port]->ndev,
+				  true);
+}
+
+void sparx5_mact_pull_work(struct work_struct *work)
+{
+	struct delayed_work *del_work = to_delayed_work(work);
+	struct sparx5 *sparx5 = container_of(del_work, struct sparx5,
+					     mact_work);
+	struct sparx5_mact_entry *mact_entry, *tmp;
+	unsigned char mac[ETH_ALEN];
+	u32 cfg2;
+	u16 vid;
+	int ret;
+
+	/* Reset MAC entry flags */
+	mutex_lock(&sparx5->mact_lock);
+	list_for_each_entry(mact_entry, &sparx5->mact_entries, list)
+		mact_entry->flags &= MAC_ENT_LOCK;
+	mutex_unlock(&sparx5->mact_lock);
+
+	/* MAIN mac address processing loop */
+	vid = 0;
+	memset(mac, 0, sizeof(mac));
+	do {
+		mutex_lock(&sparx5->lock);
+		sparx5_mact_select(sparx5, mac, vid);
+		spx5_wr(LRN_SCAN_NEXT_CFG_SCAN_NEXT_UNTIL_FOUND_ENA_SET(1),
+			sparx5, LRN_SCAN_NEXT_CFG);
+		spx5_wr(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_SET
+			(MAC_CMD_FIND_SMALLEST) |
+			LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_SET(1),
+			sparx5, LRN_COMMON_ACCESS_CTRL);
+		ret = sparx5_mact_wait_for_completion(sparx5);
+		if (ret == 0)
+			ret = sparx5_mact_get(sparx5, mac, &vid, &cfg2);
+		mutex_unlock(&sparx5->lock);
+		if (ret == 0)
+			sparx5_mact_handle_entry(sparx5, mac, vid, cfg2);
+	} while (ret == 0);
+
+	mutex_lock(&sparx5->mact_lock);
+	list_for_each_entry_safe(mact_entry, tmp, &sparx5->mact_entries,
+				 list) {
+		/* If the entry is in HW or permanent, then skip */
+		if (mact_entry->flags & (MAC_ENT_ALIVE | MAC_ENT_LOCK))
+			continue;
+
+		sparx5_fdb_call_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+					  mact_entry->mac, mact_entry->vid,
+					  sparx5->ports[mact_entry->port]->ndev,
+					  true);
+
+		list_del(&mact_entry->list);
+		devm_kfree(sparx5->dev, mact_entry);
+	}
+	mutex_unlock(&sparx5->mact_lock);
+
+	queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work,
+			   SPX5_MACT_PULL_DELAY);
+}
+
+void sparx5_set_ageing(struct sparx5 *sparx5, int msecs)
+{
+	int value = max(1, msecs / 10); /* unit 10 ms */
+
+	spx5_rmw(LRN_AUTOAGE_CFG_UNIT_SIZE_SET(2) | /* 10 ms */
+		 LRN_AUTOAGE_CFG_PERIOD_VAL_SET(value / 2), /* one bit ageing */
+		 LRN_AUTOAGE_CFG_UNIT_SIZE |
+		 LRN_AUTOAGE_CFG_PERIOD_VAL,
+		 sparx5,
+		 LRN_AUTOAGE_CFG(0));
+}
+
+void sparx5_mact_init(struct sparx5 *sparx5)
+{
+	mutex_init(&sparx5->lock);
+
+	/*  Flush MAC table */
+	spx5_wr(LRN_COMMON_ACCESS_CTRL_CPU_ACCESS_CMD_SET(MAC_CMD_CLEAR_ALL) |
+		LRN_COMMON_ACCESS_CTRL_MAC_TABLE_ACCESS_SHOT_SET(1),
+		sparx5, LRN_COMMON_ACCESS_CTRL);
+
+	if (sparx5_mact_wait_for_completion(sparx5) != 0)
+		dev_warn(sparx5->dev, "MAC flush error\n");
+
+	sparx5_set_ageing(sparx5, BR_DEFAULT_AGEING_TIME / HZ * 1000);
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 107da841ccc4..c5f8f4a10475 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -558,6 +558,8 @@ static void sparx5_board_init(struct sparx5 *sparx5)
 
 static int sparx5_start(struct sparx5 *sparx5)
 {
+	u8 broadcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	char queue_name[32];
 	u32 idx;
 	int err;
 
@@ -589,12 +591,29 @@ static int sparx5_start(struct sparx5 *sparx5)
 			 ANA_CL_FILTER_CTRL_FORCE_FCS_UPDATE_ENA,
 			 sparx5, ANA_CL_FILTER_CTRL(idx));
 
-	/* MAC/VLAN support to be added in later patches */
+	/* Init MAC table, ageing */
+	sparx5_mact_init(sparx5);
+
+	/* VLAN support to be added in later patches */
+
+	/* Add host mode BC address (points only to CPU) */
+	sparx5_mact_learn(sparx5, PGID_CPU, broadcast, NULL_VID);
+
 	/* Enable queue limitation watermarks */
 	sparx5_qlim_set(sparx5);
 
 	/* Resource calendar support to be added in later patches */
 
+	/* Init mact_sw struct */
+	mutex_init(&sparx5->mact_lock);
+	INIT_LIST_HEAD(&sparx5->mact_entries);
+	snprintf(queue_name, sizeof(queue_name), "%s-mact",
+		 dev_name(sparx5->dev));
+	sparx5->mact_queue = create_singlethread_workqueue(queue_name);
+	INIT_DELAYED_WORK(&sparx5->mact_work, sparx5_mact_pull_work);
+	queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work,
+			   SPX5_MACT_PULL_DELAY);
+
 	err = sparx5_register_netdevs(sparx5);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index b0fb577ca6df..e313611c2942 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -135,7 +135,14 @@ struct sparx5 {
 	/* port structures are in net device */
 	struct sparx5_port *ports[SPX5_PORTS];
 	enum sparx5_core_clockfreq coreclock;
+	/* Switch state */
 	u8 base_mac[ETH_ALEN];
+	/* SW MAC table */
+	struct list_head mact_entries;
+	/* mac table list (mact_entries) mutex */
+	struct mutex mact_lock;
+	struct delayed_work mact_work;
+	struct workqueue_struct *mact_queue;
 	/* Board specifics */
 	bool sd_sgpio_remapping;
 	/* Register based inj/xtr */
@@ -148,6 +155,25 @@ int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev);
 int sparx5_manual_injection_mode(struct sparx5 *sparx5);
 void sparx5_port_inj_timer_setup(struct sparx5_port *port);
 
+/* sparx5_mactable.c */
+void sparx5_mact_pull_work(struct work_struct *work);
+int sparx5_mact_learn(struct sparx5 *sparx5, int port,
+		      const unsigned char mac[ETH_ALEN], u16 vid);
+bool sparx5_mact_getnext(struct sparx5 *sparx5,
+			 unsigned char mac[ETH_ALEN], u16 *vid, u32 *pcfg2);
+int sparx5_mact_forget(struct sparx5 *sparx5,
+		       const unsigned char mac[ETH_ALEN], u16 vid);
+int sparx5_add_mact_entry(struct sparx5 *sparx5,
+			  struct sparx5_port *port,
+			  const unsigned char *addr, u16 vid);
+int sparx5_del_mact_entry(struct sparx5 *sparx5,
+			  const unsigned char *addr,
+			  u16 vid);
+int sparx5_mc_sync(struct net_device *dev, const unsigned char *addr);
+int sparx5_mc_unsync(struct net_device *dev, const unsigned char *addr);
+void sparx5_set_ageing(struct sparx5 *sparx5, int msecs);
+void sparx5_mact_init(struct sparx5 *sparx5);
+
 /* sparx5_netdev.c */
 bool sparx5_netdevice_check(const struct net_device *dev);
 struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
index 4c2280345ecc..8a3008c86534 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -132,17 +132,37 @@ static int sparx5_port_get_phys_port_name(struct net_device *dev,
 
 static int sparx5_set_mac_address(struct net_device *dev, void *p)
 {
+	struct sparx5_port *port = netdev_priv(dev);
+	struct sparx5 *sparx5 = port->sparx5;
 	const struct sockaddr *addr = p;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+	/* Remove current */
+	sparx5_mact_forget(sparx5, dev->dev_addr,  port->pvid);
+
+	/* Add new */
+	sparx5_mact_learn(sparx5, PGID_CPU, addr->sa_data, port->pvid);
+
 	/* Record the address */
 	ether_addr_copy(dev->dev_addr, addr->sa_data);
 
 	return 0;
 }
 
+static int sparx5_get_port_parent_id(struct net_device *dev,
+				     struct netdev_phys_item_id *ppid)
+{
+	struct sparx5_port *sparx5_port = netdev_priv(dev);
+	struct sparx5 *sparx5 = sparx5_port->sparx5;
+
+	ppid->id_len = sizeof(sparx5->base_mac);
+	memcpy(&ppid->id, &sparx5->base_mac, ppid->id_len);
+
+	return 0;
+}
+
 static const struct net_device_ops sparx5_port_netdev_ops = {
 	.ndo_open               = sparx5_port_open,
 	.ndo_stop               = sparx5_port_stop,
@@ -150,6 +170,7 @@ static const struct net_device_ops sparx5_port_netdev_ops = {
 	.ndo_get_phys_port_name = sparx5_port_get_phys_port_name,
 	.ndo_set_mac_address    = sparx5_set_mac_address,
 	.ndo_validate_addr      = eth_validate_addr,
+	.ndo_get_port_parent_id = sparx5_get_port_parent_id,
 };
 
 bool sparx5_netdevice_check(const struct net_device *dev)

From 78eab33bb68b565176917b24a0b72a2f5b938d84 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:54 +0200
Subject: [PATCH 2832/3292] net: sparx5: add vlan support

This adds Sparx5 VLAN support.

Sparx5 has more VLAN features than provided here, but these will be added
in later series. For now we only add the basic L2 features.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/microchip/sparx5/Makefile    |   2 +-
 .../ethernet/microchip/sparx5/sparx5_main.c   |  10 +-
 .../ethernet/microchip/sparx5/sparx5_main.h   |  14 ++
 .../ethernet/microchip/sparx5/sparx5_vlan.c   | 224 ++++++++++++++++++
 4 files changed, 246 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c

diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index 13876a9c5dbf..bd4793367844 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -6,4 +6,4 @@
 obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
 sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
- sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o
+ sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index c5f8f4a10475..857f6454b574 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -282,7 +282,8 @@ static int sparx5_create_port(struct sparx5 *sparx5,
 	}
 	spx5_port->conf = config->conf;
 
-	/* VLAN support to be added in later patches */
+	/* Setup VLAN */
+	sparx5_vlan_port_setup(sparx5, spx5_port->portno);
 
 	/* Create a phylink for PHY management.  Also handles SFPs */
 	spx5_port->phylink_config.dev = &spx5_port->ndev->dev;
@@ -578,7 +579,9 @@ static int sparx5_start(struct sparx5 *sparx5)
 			 sparx5,
 			 QFWD_SWITCH_PORT_MODE(idx));
 
-	/* Forwarding masks to be added in later patches */
+	/* Init masks */
+	sparx5_update_fwd(sparx5);
+
 	/* CPU copy CPU pgids */
 	spx5_wr(ANA_AC_PGID_MISC_CFG_PGID_CPU_COPY_ENA_SET(1),
 		sparx5, ANA_AC_PGID_MISC_CFG(PGID_CPU));
@@ -594,7 +597,8 @@ static int sparx5_start(struct sparx5 *sparx5)
 	/* Init MAC table, ageing */
 	sparx5_mact_init(sparx5);
 
-	/* VLAN support to be added in later patches */
+	/* Setup VLANs */
+	sparx5_vlan_init(sparx5);
 
 	/* Add host mode BC address (points only to CPU) */
 	sparx5_mact_learn(sparx5, PGID_CPU, broadcast, NULL_VID);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index e313611c2942..65b80033de9e 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -137,6 +137,10 @@ struct sparx5 {
 	enum sparx5_core_clockfreq coreclock;
 	/* Switch state */
 	u8 base_mac[ETH_ALEN];
+	/* Bridged interfaces */
+	DECLARE_BITMAP(bridge_fwd_mask, SPX5_PORTS);
+	DECLARE_BITMAP(bridge_lrn_mask, SPX5_PORTS);
+	DECLARE_BITMAP(vlan_mask[VLAN_N_VID], SPX5_PORTS);
 	/* SW MAC table */
 	struct list_head mact_entries;
 	/* mac table list (mact_entries) mutex */
@@ -174,6 +178,16 @@ int sparx5_mc_unsync(struct net_device *dev, const unsigned char *addr);
 void sparx5_set_ageing(struct sparx5 *sparx5, int msecs);
 void sparx5_mact_init(struct sparx5 *sparx5);
 
+/* sparx5_vlan.c */
+void sparx5_pgid_update_mask(struct sparx5_port *port, int pgid, bool enable);
+void sparx5_update_fwd(struct sparx5 *sparx5);
+void sparx5_vlan_init(struct sparx5 *sparx5);
+void sparx5_vlan_port_setup(struct sparx5 *sparx5, int portno);
+int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid,
+			bool untagged);
+int sparx5_vlan_vid_del(struct sparx5_port *port, u16 vid);
+void sparx5_vlan_port_apply(struct sparx5 *sparx5, struct sparx5_port *port);
+
 /* sparx5_netdev.c */
 bool sparx5_netdevice_check(const struct net_device *dev);
 struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
new file mode 100644
index 000000000000..4ce490a25f33
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vlan.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+static int sparx5_vlant_set_mask(struct sparx5 *sparx5, u16 vid)
+{
+	u32 mask[3];
+
+	/* Divide up mask in 32 bit words */
+	bitmap_to_arr32(mask, sparx5->vlan_mask[vid], SPX5_PORTS);
+
+	/* Output mask to respective registers */
+	spx5_wr(mask[0], sparx5, ANA_L3_VLAN_MASK_CFG(vid));
+	spx5_wr(mask[1], sparx5, ANA_L3_VLAN_MASK_CFG1(vid));
+	spx5_wr(mask[2], sparx5, ANA_L3_VLAN_MASK_CFG2(vid));
+
+	return 0;
+}
+
+void sparx5_vlan_init(struct sparx5 *sparx5)
+{
+	u16 vid;
+
+	spx5_rmw(ANA_L3_VLAN_CTRL_VLAN_ENA_SET(1),
+		 ANA_L3_VLAN_CTRL_VLAN_ENA,
+		 sparx5,
+		 ANA_L3_VLAN_CTRL);
+
+	/* Map VLAN = FID */
+	for (vid = NULL_VID; vid < VLAN_N_VID; vid++)
+		spx5_rmw(ANA_L3_VLAN_CFG_VLAN_FID_SET(vid),
+			 ANA_L3_VLAN_CFG_VLAN_FID,
+			 sparx5,
+			 ANA_L3_VLAN_CFG(vid));
+}
+
+void sparx5_vlan_port_setup(struct sparx5 *sparx5, int portno)
+{
+	struct sparx5_port *port = sparx5->ports[portno];
+
+	/* Configure PVID */
+	spx5_rmw(ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA_SET(0) |
+		 ANA_CL_VLAN_CTRL_PORT_VID_SET(port->pvid),
+		 ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA |
+		 ANA_CL_VLAN_CTRL_PORT_VID,
+		 sparx5,
+		 ANA_CL_VLAN_CTRL(port->portno));
+}
+
+int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid,
+			bool untagged)
+{
+	struct sparx5 *sparx5 = port->sparx5;
+	int ret;
+
+	/* Make the port a member of the VLAN */
+	set_bit(port->portno, sparx5->vlan_mask[vid]);
+	ret = sparx5_vlant_set_mask(sparx5, vid);
+	if (ret)
+		return ret;
+
+	/* Default ingress vlan classification */
+	if (pvid)
+		port->pvid = vid;
+
+	/* Untagged egress vlan classification */
+	if (untagged && port->vid != vid) {
+		if (port->vid) {
+			netdev_err(port->ndev,
+				   "Port already has a native VLAN: %d\n",
+				   port->vid);
+			return -EBUSY;
+		}
+		port->vid = vid;
+	}
+
+	sparx5_vlan_port_apply(sparx5, port);
+
+	return 0;
+}
+
+int sparx5_vlan_vid_del(struct sparx5_port *port, u16 vid)
+{
+	struct sparx5 *sparx5 = port->sparx5;
+	int ret;
+
+	/* 8021q removes VID 0 on module unload for all interfaces
+	 * with VLAN filtering feature. We need to keep it to receive
+	 * untagged traffic.
+	 */
+	if (vid == 0)
+		return 0;
+
+	/* Stop the port from being a member of the vlan */
+	clear_bit(port->portno, sparx5->vlan_mask[vid]);
+	ret = sparx5_vlant_set_mask(sparx5, vid);
+	if (ret)
+		return ret;
+
+	/* Ingress */
+	if (port->pvid == vid)
+		port->pvid = 0;
+
+	/* Egress */
+	if (port->vid == vid)
+		port->vid = 0;
+
+	sparx5_vlan_port_apply(sparx5, port);
+
+	return 0;
+}
+
+void sparx5_pgid_update_mask(struct sparx5_port *port, int pgid, bool enable)
+{
+	struct sparx5 *sparx5 = port->sparx5;
+	u32 val, mask;
+
+	/* mask is spread across 3 registers x 32 bit */
+	if (port->portno < 32) {
+		mask = BIT(port->portno);
+		val = enable ? mask : 0;
+		spx5_rmw(val, mask, sparx5, ANA_AC_PGID_CFG(pgid));
+	} else if (port->portno < 64) {
+		mask = BIT(port->portno - 32);
+		val = enable ? mask : 0;
+		spx5_rmw(val, mask, sparx5, ANA_AC_PGID_CFG1(pgid));
+	} else if (port->portno < SPX5_PORTS) {
+		mask = BIT(port->portno - 64);
+		val = enable ? mask : 0;
+		spx5_rmw(val, mask, sparx5, ANA_AC_PGID_CFG2(pgid));
+	} else {
+		netdev_err(port->ndev, "Invalid port no: %d\n", port->portno);
+	}
+}
+
+void sparx5_update_fwd(struct sparx5 *sparx5)
+{
+	DECLARE_BITMAP(workmask, SPX5_PORTS);
+	u32 mask[3];
+	int port;
+
+	/* Divide up fwd mask in 32 bit words */
+	bitmap_to_arr32(mask, sparx5->bridge_fwd_mask, SPX5_PORTS);
+
+	/* Update flood masks */
+	for (port = PGID_UC_FLOOD; port <= PGID_BCAST; port++) {
+		spx5_wr(mask[0], sparx5, ANA_AC_PGID_CFG(port));
+		spx5_wr(mask[1], sparx5, ANA_AC_PGID_CFG1(port));
+		spx5_wr(mask[2], sparx5, ANA_AC_PGID_CFG2(port));
+	}
+
+	/* Update SRC masks */
+	for (port = 0; port < SPX5_PORTS; port++) {
+		if (test_bit(port, sparx5->bridge_fwd_mask)) {
+			/* Allow to send to all bridged but self */
+			bitmap_copy(workmask, sparx5->bridge_fwd_mask, SPX5_PORTS);
+			clear_bit(port, workmask);
+			bitmap_to_arr32(mask, workmask, SPX5_PORTS);
+			spx5_wr(mask[0], sparx5, ANA_AC_SRC_CFG(port));
+			spx5_wr(mask[1], sparx5, ANA_AC_SRC_CFG1(port));
+			spx5_wr(mask[2], sparx5, ANA_AC_SRC_CFG2(port));
+		} else {
+			spx5_wr(0, sparx5, ANA_AC_SRC_CFG(port));
+			spx5_wr(0, sparx5, ANA_AC_SRC_CFG1(port));
+			spx5_wr(0, sparx5, ANA_AC_SRC_CFG2(port));
+		}
+	}
+
+	/* Learning enabled only for bridged ports */
+	bitmap_and(workmask, sparx5->bridge_fwd_mask,
+		   sparx5->bridge_lrn_mask, SPX5_PORTS);
+	bitmap_to_arr32(mask, workmask, SPX5_PORTS);
+
+	/* Apply learning mask */
+	spx5_wr(mask[0], sparx5, ANA_L2_AUTO_LRN_CFG);
+	spx5_wr(mask[1], sparx5, ANA_L2_AUTO_LRN_CFG1);
+	spx5_wr(mask[2], sparx5, ANA_L2_AUTO_LRN_CFG2);
+}
+
+void sparx5_vlan_port_apply(struct sparx5 *sparx5,
+			    struct sparx5_port *port)
+
+{
+	u32 val;
+
+	/* Configure PVID, vlan aware */
+	val = ANA_CL_VLAN_CTRL_VLAN_AWARE_ENA_SET(port->vlan_aware) |
+		ANA_CL_VLAN_CTRL_VLAN_POP_CNT_SET(port->vlan_aware) |
+		ANA_CL_VLAN_CTRL_PORT_VID_SET(port->pvid);
+	spx5_wr(val, sparx5, ANA_CL_VLAN_CTRL(port->portno));
+
+	val = 0;
+	if (port->vlan_aware && !port->pvid)
+		/* If port is vlan-aware and tagged, drop untagged and
+		 * priority tagged frames.
+		 */
+		val = ANA_CL_VLAN_FILTER_CTRL_TAG_REQUIRED_ENA_SET(1) |
+			ANA_CL_VLAN_FILTER_CTRL_PRIO_CTAG_DIS_SET(1) |
+			ANA_CL_VLAN_FILTER_CTRL_PRIO_STAG_DIS_SET(1);
+	spx5_wr(val, sparx5,
+		ANA_CL_VLAN_FILTER_CTRL(port->portno, 0));
+
+	/* Egress configuration (REW_TAG_CFG): VLAN tag type to 8021Q */
+	val = REW_TAG_CTRL_TAG_TPID_CFG_SET(0);
+	if (port->vlan_aware) {
+		if (port->vid)
+			/* Tag all frames except when VID == DEFAULT_VLAN */
+			val |= REW_TAG_CTRL_TAG_CFG_SET(1);
+		else
+			val |= REW_TAG_CTRL_TAG_CFG_SET(3);
+	}
+	spx5_wr(val, sparx5, REW_TAG_CTRL(port->portno));
+
+	/* Egress VID */
+	spx5_rmw(REW_PORT_VLAN_CFG_PORT_VID_SET(port->vid),
+		 REW_PORT_VLAN_CFG_PORT_VID,
+		 sparx5,
+		 REW_PORT_VLAN_CFG(port->portno));
+}

From d6fce5141929697a27f029c633433d487f6f62cb Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:55 +0200
Subject: [PATCH 2833/3292] net: sparx5: add switching support

This adds SwitchDev support by hardware offloading the
software bridge.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/microchip/sparx5/Makefile    |   3 +-
 .../microchip/sparx5/sparx5_mactable.c        |   3 +
 .../ethernet/microchip/sparx5/sparx5_main.c   |   4 +
 .../ethernet/microchip/sparx5/sparx5_main.h   |  11 +
 .../ethernet/microchip/sparx5/sparx5_netdev.c |  10 +
 .../ethernet/microchip/sparx5/sparx5_packet.c |   6 +
 .../microchip/sparx5/sparx5_switchdev.c       | 508 ++++++++++++++++++
 7 files changed, 544 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c

diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index bd4793367844..5bf46b2fbdee 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -6,4 +6,5 @@
 obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
 sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
- sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o
+ sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \
+ sparx5_switchdev.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
index 6c5e04eccaa3..0443f66b5550 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c
@@ -371,6 +371,9 @@ static void sparx5_mact_handle_entry(struct sparx5 *sparx5,
 	if (port >= SPX5_PORTS)
 		return;
 
+	if (!test_bit(port, sparx5->bridge_mask))
+		return;
+
 	mutex_lock(&sparx5->mact_lock);
 	list_for_each_entry(mact_entry, &sparx5->mact_entries, list) {
 		if (mact_entry->vid == vid &&
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 857f6454b574..549ec2d7bd5a 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -623,6 +623,7 @@ static int sparx5_start(struct sparx5 *sparx5)
 		return err;
 
 	sparx5_board_init(sparx5);
+	err = sparx5_register_notifier_blocks(sparx5);
 
 	/* Start register based INJ/XTR */
 	err = -ENXIO;
@@ -812,6 +813,9 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
 		sparx5->xtr_irq = -ENXIO;
 	}
 	sparx5_cleanup_ports(sparx5);
+	/* Unregister netdevs */
+	sparx5_unregister_notifier_blocks(sparx5);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 65b80033de9e..67bad6878e98 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -135,9 +135,16 @@ struct sparx5 {
 	/* port structures are in net device */
 	struct sparx5_port *ports[SPX5_PORTS];
 	enum sparx5_core_clockfreq coreclock;
+	/* Notifiers */
+	struct notifier_block netdevice_nb;
+	struct notifier_block switchdev_nb;
+	struct notifier_block switchdev_blocking_nb;
 	/* Switch state */
 	u8 base_mac[ETH_ALEN];
+	/* Associated bridge device (when bridged) */
+	struct net_device *hw_bridge_dev;
 	/* Bridged interfaces */
+	DECLARE_BITMAP(bridge_mask, SPX5_PORTS);
 	DECLARE_BITMAP(bridge_fwd_mask, SPX5_PORTS);
 	DECLARE_BITMAP(bridge_lrn_mask, SPX5_PORTS);
 	DECLARE_BITMAP(vlan_mask[VLAN_N_VID], SPX5_PORTS);
@@ -153,6 +160,10 @@ struct sparx5 {
 	int xtr_irq;
 };
 
+/* sparx5_switchdev.c */
+int sparx5_register_notifier_blocks(struct sparx5 *sparx5);
+void sparx5_unregister_notifier_blocks(struct sparx5 *sparx5);
+
 /* sparx5_packet.c */
 irqreturn_t sparx5_xtr_handler(int irq, void *_priv);
 int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
index 8a3008c86534..569a7f7ef0bb 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -117,6 +117,15 @@ static int sparx5_port_stop(struct net_device *ndev)
 	return 0;
 }
 
+static void sparx5_set_rx_mode(struct net_device *dev)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+	struct sparx5 *sparx5 = port->sparx5;
+
+	if (!test_bit(port->portno, sparx5->bridge_mask))
+		__dev_mc_sync(dev, sparx5_mc_sync, sparx5_mc_unsync);
+}
+
 static int sparx5_port_get_phys_port_name(struct net_device *dev,
 					  char *buf, size_t len)
 {
@@ -167,6 +176,7 @@ static const struct net_device_ops sparx5_port_netdev_ops = {
 	.ndo_open               = sparx5_port_open,
 	.ndo_stop               = sparx5_port_stop,
 	.ndo_start_xmit         = sparx5_port_xmit_impl,
+	.ndo_set_rx_mode        = sparx5_set_rx_mode,
 	.ndo_get_phys_port_name = sparx5_port_get_phys_port_name,
 	.ndo_set_mac_address    = sparx5_set_mac_address,
 	.ndo_validate_addr      = eth_validate_addr,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
index 05f4f3f9b6e2..09ca7a3bafdc 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -139,6 +139,12 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap)
 		return;
 	}
 
+	/* Everything we see on an interface that is in the HW bridge
+	 * has already been forwarded
+	 */
+	if (test_bit(port->portno, sparx5->bridge_mask))
+		skb->offload_fwd_mark = 1;
+
 	/* Finish up skb */
 	skb_put(skb, byte_cnt - ETH_FCS_LEN);
 	eth_skb_pad(skb);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
new file mode 100644
index 000000000000..19c7cb795b4b
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include <linux/if_bridge.h>
+#include <net/switchdev.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+static struct workqueue_struct *sparx5_owq;
+
+struct sparx5_switchdev_event_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+static void sparx5_port_attr_bridge_flags(struct sparx5_port *port,
+					  struct switchdev_brport_flags flags)
+{
+	if (flags.mask & BR_MCAST_FLOOD)
+		sparx5_pgid_update_mask(port, PGID_MC_FLOOD, true);
+}
+
+static void sparx5_attr_stp_state_set(struct sparx5_port *port,
+				      u8 state)
+{
+	struct sparx5 *sparx5 = port->sparx5;
+
+	if (!test_bit(port->portno, sparx5->bridge_mask)) {
+		netdev_err(port->ndev,
+			   "Controlling non-bridged port %d?\n", port->portno);
+		return;
+	}
+
+	switch (state) {
+	case BR_STATE_FORWARDING:
+		set_bit(port->portno, sparx5->bridge_fwd_mask);
+		fallthrough;
+	case BR_STATE_LEARNING:
+		set_bit(port->portno, sparx5->bridge_lrn_mask);
+		break;
+
+	default:
+		/* All other states treated as blocking */
+		clear_bit(port->portno, sparx5->bridge_fwd_mask);
+		clear_bit(port->portno, sparx5->bridge_lrn_mask);
+		break;
+	}
+
+	/* apply the bridge_fwd_mask to all the ports */
+	sparx5_update_fwd(sparx5);
+}
+
+static void sparx5_port_attr_ageing_set(struct sparx5_port *port,
+					unsigned long ageing_clock_t)
+{
+	unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t);
+	u32 ageing_time = jiffies_to_msecs(ageing_jiffies);
+
+	sparx5_set_ageing(port->sparx5, ageing_time);
+}
+
+static int sparx5_port_attr_set(struct net_device *dev,
+				const struct switchdev_attr *attr,
+				struct netlink_ext_ack *extack)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		sparx5_port_attr_bridge_flags(port, attr->u.brport_flags);
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+		sparx5_attr_stp_state_set(port, attr->u.stp_state);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+		sparx5_port_attr_ageing_set(port, attr->u.ageing_time);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		port->vlan_aware = attr->u.vlan_filtering;
+		sparx5_vlan_port_apply(port->sparx5, port);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int sparx5_port_bridge_join(struct sparx5_port *port,
+				   struct net_device *bridge)
+{
+	struct sparx5 *sparx5 = port->sparx5;
+
+	if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS))
+		/* First bridged port */
+		sparx5->hw_bridge_dev = bridge;
+	else
+		if (sparx5->hw_bridge_dev != bridge)
+			/* This is adding the port to a second bridge, this is
+			 * unsupported
+			 */
+			return -ENODEV;
+
+	set_bit(port->portno, sparx5->bridge_mask);
+
+	/* Port enters in bridge mode therefor don't need to copy to CPU
+	 * frames for multicast in case the bridge is not requesting them
+	 */
+	__dev_mc_unsync(port->ndev, sparx5_mc_unsync);
+
+	return 0;
+}
+
+static void sparx5_port_bridge_leave(struct sparx5_port *port,
+				     struct net_device *bridge)
+{
+	struct sparx5 *sparx5 = port->sparx5;
+
+	clear_bit(port->portno, sparx5->bridge_mask);
+	if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS))
+		sparx5->hw_bridge_dev = NULL;
+
+	/* Clear bridge vlan settings before updating the port settings */
+	port->vlan_aware = 0;
+	port->pvid = NULL_VID;
+	port->vid = NULL_VID;
+
+	/* Port enters in host more therefore restore mc list */
+	__dev_mc_sync(port->ndev, sparx5_mc_sync, sparx5_mc_unsync);
+}
+
+static int sparx5_port_changeupper(struct net_device *dev,
+				   struct netdev_notifier_changeupper_info *info)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+	int err = 0;
+
+	if (netif_is_bridge_master(info->upper_dev)) {
+		if (info->linking)
+			err = sparx5_port_bridge_join(port, info->upper_dev);
+		else
+			sparx5_port_bridge_leave(port, info->upper_dev);
+
+		sparx5_vlan_port_apply(port->sparx5, port);
+	}
+
+	return err;
+}
+
+static int sparx5_port_add_addr(struct net_device *dev, bool up)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+	struct sparx5 *sparx5 = port->sparx5;
+	u16 vid = port->pvid;
+
+	if (up)
+		sparx5_mact_learn(sparx5, PGID_CPU, port->ndev->dev_addr, vid);
+	else
+		sparx5_mact_forget(sparx5, port->ndev->dev_addr, vid);
+
+	return 0;
+}
+
+static int sparx5_netdevice_port_event(struct net_device *dev,
+				       struct notifier_block *nb,
+				       unsigned long event, void *ptr)
+{
+	int err = 0;
+
+	if (!sparx5_netdevice_check(dev))
+		return 0;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		err = sparx5_port_changeupper(dev, ptr);
+		break;
+	case NETDEV_PRE_UP:
+		err = sparx5_port_add_addr(dev, true);
+		break;
+	case NETDEV_DOWN:
+		err = sparx5_port_add_addr(dev, false);
+		break;
+	}
+
+	return err;
+}
+
+static int sparx5_netdevice_event(struct notifier_block *nb,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	int ret = 0;
+
+	ret = sparx5_netdevice_port_event(dev, nb, event, ptr);
+
+	return notifier_from_errno(ret);
+}
+
+static void sparx5_switchdev_bridge_fdb_event_work(struct work_struct *work)
+{
+	struct sparx5_switchdev_event_work *switchdev_work =
+		container_of(work, struct sparx5_switchdev_event_work, work);
+	struct net_device *dev = switchdev_work->dev;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct sparx5_port *port;
+	struct sparx5 *sparx5;
+
+	rtnl_lock();
+	if (!sparx5_netdevice_check(dev))
+		goto out;
+
+	port = netdev_priv(dev);
+	sparx5 = port->sparx5;
+
+	fdb_info = &switchdev_work->fdb_info;
+
+	switch (switchdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		if (!fdb_info->added_by_user)
+			break;
+		sparx5_add_mact_entry(sparx5, port, fdb_info->addr,
+				      fdb_info->vid);
+		break;
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		if (!fdb_info->added_by_user)
+			break;
+		sparx5_del_mact_entry(sparx5, fdb_info->addr, fdb_info->vid);
+		break;
+	}
+
+out:
+	rtnl_unlock();
+	kfree(switchdev_work->fdb_info.addr);
+	kfree(switchdev_work);
+	dev_put(dev);
+}
+
+static void sparx5_schedule_work(struct work_struct *work)
+{
+	queue_work(sparx5_owq, work);
+}
+
+static int sparx5_switchdev_event(struct notifier_block *unused,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct sparx5_switchdev_event_work *switchdev_work;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct switchdev_notifier_info *info = ptr;
+	int err;
+
+	switch (event) {
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     sparx5_netdevice_check,
+						     sparx5_port_attr_set);
+		return notifier_from_errno(err);
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		fallthrough;
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+		if (!switchdev_work)
+			return NOTIFY_BAD;
+
+		switchdev_work->dev = dev;
+		switchdev_work->event = event;
+
+		fdb_info = container_of(info,
+					struct switchdev_notifier_fdb_info,
+					info);
+		INIT_WORK(&switchdev_work->work,
+			  sparx5_switchdev_bridge_fdb_event_work);
+		memcpy(&switchdev_work->fdb_info, ptr,
+		       sizeof(switchdev_work->fdb_info));
+		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+		if (!switchdev_work->fdb_info.addr)
+			goto err_addr_alloc;
+
+		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+				fdb_info->addr);
+		dev_hold(dev);
+
+		sparx5_schedule_work(&switchdev_work->work);
+		break;
+	}
+
+	return NOTIFY_DONE;
+err_addr_alloc:
+	kfree(switchdev_work);
+	return NOTIFY_BAD;
+}
+
+static void sparx5_sync_port_dev_addr(struct sparx5 *sparx5,
+				      struct sparx5_port *port,
+				      u16 vid, bool add)
+{
+	if (!port ||
+	    !test_bit(port->portno, sparx5->bridge_mask))
+		return; /* Skip null/host interfaces */
+
+	/* Bridge connects to vid? */
+	if (add) {
+		/* Add port MAC address from the VLAN */
+		sparx5_mact_learn(sparx5, PGID_CPU,
+				  port->ndev->dev_addr, vid);
+	} else {
+		/* Control port addr visibility depending on
+		 * port VLAN connectivity.
+		 */
+		if (test_bit(port->portno, sparx5->vlan_mask[vid]))
+			sparx5_mact_learn(sparx5, PGID_CPU,
+					  port->ndev->dev_addr, vid);
+		else
+			sparx5_mact_forget(sparx5,
+					   port->ndev->dev_addr, vid);
+	}
+}
+
+static void sparx5_sync_bridge_dev_addr(struct net_device *dev,
+					struct sparx5 *sparx5,
+					u16 vid, bool add)
+{
+	int i;
+
+	/* First, handle bridge address'es */
+	if (add) {
+		sparx5_mact_learn(sparx5, PGID_CPU, dev->dev_addr,
+				  vid);
+		sparx5_mact_learn(sparx5, PGID_BCAST, dev->broadcast,
+				  vid);
+	} else {
+		sparx5_mact_forget(sparx5, dev->dev_addr, vid);
+		sparx5_mact_forget(sparx5, dev->broadcast, vid);
+	}
+
+	/* Now look at bridged ports */
+	for (i = 0; i < SPX5_PORTS; i++)
+		sparx5_sync_port_dev_addr(sparx5, sparx5->ports[i], vid, add);
+}
+
+static int sparx5_handle_port_vlan_add(struct net_device *dev,
+				       struct notifier_block *nb,
+				       const struct switchdev_obj_port_vlan *v)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+
+	if (netif_is_bridge_master(dev)) {
+		if (v->flags & BRIDGE_VLAN_INFO_BRENTRY) {
+			struct sparx5 *sparx5 =
+				container_of(nb, struct sparx5,
+					     switchdev_blocking_nb);
+
+			sparx5_sync_bridge_dev_addr(dev, sparx5, v->vid, true);
+		}
+		return 0;
+	}
+
+	if (!sparx5_netdevice_check(dev))
+		return -EOPNOTSUPP;
+
+	return sparx5_vlan_vid_add(port, v->vid,
+				  v->flags & BRIDGE_VLAN_INFO_PVID,
+				  v->flags & BRIDGE_VLAN_INFO_UNTAGGED);
+}
+
+static int sparx5_handle_port_obj_add(struct net_device *dev,
+				      struct notifier_block *nb,
+				      struct switchdev_notifier_port_obj_info *info)
+{
+	const struct switchdev_obj *obj = info->obj;
+	int err;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = sparx5_handle_port_vlan_add(dev, nb,
+						  SWITCHDEV_OBJ_PORT_VLAN(obj));
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	info->handled = true;
+	return err;
+}
+
+static int sparx5_handle_port_vlan_del(struct net_device *dev,
+				       struct notifier_block *nb,
+				       u16 vid)
+{
+	struct sparx5_port *port = netdev_priv(dev);
+	int ret;
+
+	/* Master bridge? */
+	if (netif_is_bridge_master(dev)) {
+		struct sparx5 *sparx5 =
+			container_of(nb, struct sparx5,
+				     switchdev_blocking_nb);
+
+		sparx5_sync_bridge_dev_addr(dev, sparx5, vid, false);
+		return 0;
+	}
+
+	if (!sparx5_netdevice_check(dev))
+		return -EOPNOTSUPP;
+
+	ret = sparx5_vlan_vid_del(port, vid);
+	if (ret)
+		return ret;
+
+	/* Delete the port MAC address with the matching VLAN information */
+	sparx5_mact_forget(port->sparx5, port->ndev->dev_addr, vid);
+
+	return 0;
+}
+
+static int sparx5_handle_port_obj_del(struct net_device *dev,
+				      struct notifier_block *nb,
+				      struct switchdev_notifier_port_obj_info *info)
+{
+	const struct switchdev_obj *obj = info->obj;
+	int err;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = sparx5_handle_port_vlan_del(dev, nb,
+						  SWITCHDEV_OBJ_PORT_VLAN(obj)->vid);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	info->handled = true;
+	return err;
+}
+
+static int sparx5_switchdev_blocking_event(struct notifier_block *nb,
+					   unsigned long event,
+					   void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = sparx5_handle_port_obj_add(dev, nb, ptr);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = sparx5_handle_port_obj_del(dev, nb, ptr);
+		return notifier_from_errno(err);
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     sparx5_netdevice_check,
+						     sparx5_port_attr_set);
+		return notifier_from_errno(err);
+	}
+
+	return NOTIFY_DONE;
+}
+
+int sparx5_register_notifier_blocks(struct sparx5 *s5)
+{
+	int err;
+
+	s5->netdevice_nb.notifier_call = sparx5_netdevice_event;
+	err = register_netdevice_notifier(&s5->netdevice_nb);
+	if (err)
+		return err;
+
+	s5->switchdev_nb.notifier_call = sparx5_switchdev_event;
+	err = register_switchdev_notifier(&s5->switchdev_nb);
+	if (err)
+		goto err_switchdev_nb;
+
+	s5->switchdev_blocking_nb.notifier_call = sparx5_switchdev_blocking_event;
+	err = register_switchdev_blocking_notifier(&s5->switchdev_blocking_nb);
+	if (err)
+		goto err_switchdev_blocking_nb;
+
+	sparx5_owq = alloc_ordered_workqueue("sparx5_order", 0);
+	if (!sparx5_owq)
+		goto err_switchdev_blocking_nb;
+
+	return 0;
+
+err_switchdev_blocking_nb:
+	unregister_switchdev_notifier(&s5->switchdev_nb);
+err_switchdev_nb:
+	unregister_netdevice_notifier(&s5->netdevice_nb);
+
+	return err;
+}
+
+void sparx5_unregister_notifier_blocks(struct sparx5 *s5)
+{
+	destroy_workqueue(sparx5_owq);
+
+	unregister_switchdev_blocking_notifier(&s5->switchdev_blocking_nb);
+	unregister_switchdev_notifier(&s5->switchdev_nb);
+	unregister_netdevice_notifier(&s5->netdevice_nb);
+}

From 0a9d48ad0d09ce869ea44e405e066959aa5d5371 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:56 +0200
Subject: [PATCH 2834/3292] net: sparx5: add calendar bandwidth allocation
 support

This configures the Sparx5 calendars according to the bandwidth
requested in the Device Tree nodes.
It also checks if the total requested bandwidth is within the
specs of the detected Sparx5 models limits.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/microchip/sparx5/Makefile    |   2 +-
 .../microchip/sparx5/sparx5_calendar.c        | 596 ++++++++++++++++++
 .../ethernet/microchip/sparx5/sparx5_main.c   |   9 +-
 .../ethernet/microchip/sparx5/sparx5_main.h   |   4 +
 4 files changed, 609 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c

diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index 5bf46b2fbdee..7f0035aefcdd 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
 sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
  sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \
- sparx5_switchdev.o
+ sparx5_switchdev.o sparx5_calendar.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c b/drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c
new file mode 100644
index 000000000000..76a8bb596aec
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_calendar.c
@@ -0,0 +1,596 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+
+/* QSYS calendar information */
+#define SPX5_PORTS_PER_CALREG          10  /* Ports mapped in a calendar register */
+#define SPX5_CALBITS_PER_PORT          3   /* Bit per port in calendar register */
+
+/* DSM calendar information */
+#define SPX5_DSM_CAL_LEN               64
+#define SPX5_DSM_CAL_EMPTY             0xFFFF
+#define SPX5_DSM_CAL_MAX_DEVS_PER_TAXI 13
+#define SPX5_DSM_CAL_TAXIS             8
+#define SPX5_DSM_CAL_BW_LOSS           553
+
+#define SPX5_TAXI_PORT_MAX             70
+
+#define SPEED_12500                    12500
+
+/* Maps from taxis to port numbers */
+static u32 sparx5_taxi_ports[SPX5_DSM_CAL_TAXIS][SPX5_DSM_CAL_MAX_DEVS_PER_TAXI] = {
+	{57, 12, 0, 1, 2, 16, 17, 18, 19, 20, 21, 22, 23},
+	{58, 13, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31},
+	{59, 14, 6, 7, 8, 32, 33, 34, 35, 36, 37, 38, 39},
+	{60, 15, 9, 10, 11, 40, 41, 42, 43, 44, 45, 46, 47},
+	{61, 48, 49, 50, 99, 99, 99, 99, 99, 99, 99, 99, 99},
+	{62, 51, 52, 53, 99, 99, 99, 99, 99, 99, 99, 99, 99},
+	{56, 63, 54, 55, 99, 99, 99, 99, 99, 99, 99, 99, 99},
+	{64, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99},
+};
+
+struct sparx5_calendar_data {
+	u32 schedule[SPX5_DSM_CAL_LEN];
+	u32 avg_dist[SPX5_DSM_CAL_MAX_DEVS_PER_TAXI];
+	u32 taxi_ports[SPX5_DSM_CAL_MAX_DEVS_PER_TAXI];
+	u32 taxi_speeds[SPX5_DSM_CAL_MAX_DEVS_PER_TAXI];
+	u32 dev_slots[SPX5_DSM_CAL_MAX_DEVS_PER_TAXI];
+	u32 new_slots[SPX5_DSM_CAL_LEN];
+	u32 temp_sched[SPX5_DSM_CAL_LEN];
+	u32 indices[SPX5_DSM_CAL_LEN];
+	u32 short_list[SPX5_DSM_CAL_LEN];
+	u32 long_list[SPX5_DSM_CAL_LEN];
+};
+
+static u32 sparx5_target_bandwidth(struct sparx5 *sparx5)
+{
+	switch (sparx5->target_ct) {
+	case SPX5_TARGET_CT_7546:
+	case SPX5_TARGET_CT_7546TSN:
+		return 65000;
+	case SPX5_TARGET_CT_7549:
+	case SPX5_TARGET_CT_7549TSN:
+		return 91000;
+	case SPX5_TARGET_CT_7552:
+	case SPX5_TARGET_CT_7552TSN:
+		return 129000;
+	case SPX5_TARGET_CT_7556:
+	case SPX5_TARGET_CT_7556TSN:
+		return 161000;
+	case SPX5_TARGET_CT_7558:
+	case SPX5_TARGET_CT_7558TSN:
+		return 201000;
+	default:
+		return 0;
+	}
+}
+
+/* This is used in calendar configuration */
+enum sparx5_cal_bw {
+	SPX5_CAL_SPEED_NONE = 0,
+	SPX5_CAL_SPEED_1G   = 1,
+	SPX5_CAL_SPEED_2G5  = 2,
+	SPX5_CAL_SPEED_5G   = 3,
+	SPX5_CAL_SPEED_10G  = 4,
+	SPX5_CAL_SPEED_25G  = 5,
+	SPX5_CAL_SPEED_0G5  = 6,
+	SPX5_CAL_SPEED_12G5 = 7
+};
+
+static u32 sparx5_clk_to_bandwidth(enum sparx5_core_clockfreq cclock)
+{
+	switch (cclock) {
+	case SPX5_CORE_CLOCK_250MHZ: return 83000; /* 250000 / 3 */
+	case SPX5_CORE_CLOCK_500MHZ: return 166000; /* 500000 / 3 */
+	case SPX5_CORE_CLOCK_625MHZ: return  208000; /* 625000 / 3 */
+	default: return 0;
+	}
+	return 0;
+}
+
+static u32 sparx5_cal_speed_to_value(enum sparx5_cal_bw speed)
+{
+	switch (speed) {
+	case SPX5_CAL_SPEED_1G:   return 1000;
+	case SPX5_CAL_SPEED_2G5:  return 2500;
+	case SPX5_CAL_SPEED_5G:   return 5000;
+	case SPX5_CAL_SPEED_10G:  return 10000;
+	case SPX5_CAL_SPEED_25G:  return 25000;
+	case SPX5_CAL_SPEED_0G5:  return 500;
+	case SPX5_CAL_SPEED_12G5: return 12500;
+	default: return 0;
+	}
+}
+
+static u32 sparx5_bandwidth_to_calendar(u32 bw)
+{
+	switch (bw) {
+	case SPEED_10:      return SPX5_CAL_SPEED_0G5;
+	case SPEED_100:     return SPX5_CAL_SPEED_0G5;
+	case SPEED_1000:    return SPX5_CAL_SPEED_1G;
+	case SPEED_2500:    return SPX5_CAL_SPEED_2G5;
+	case SPEED_5000:    return SPX5_CAL_SPEED_5G;
+	case SPEED_10000:   return SPX5_CAL_SPEED_10G;
+	case SPEED_12500:   return SPX5_CAL_SPEED_12G5;
+	case SPEED_25000:   return SPX5_CAL_SPEED_25G;
+	case SPEED_UNKNOWN: return SPX5_CAL_SPEED_1G;
+	default:            return SPX5_CAL_SPEED_NONE;
+	}
+}
+
+static enum sparx5_cal_bw sparx5_get_port_cal_speed(struct sparx5 *sparx5,
+						    u32 portno)
+{
+	struct sparx5_port *port;
+
+	if (portno >= SPX5_PORTS) {
+		/* Internal ports */
+		if (portno == SPX5_PORT_CPU_0 || portno == SPX5_PORT_CPU_1) {
+			/* Equals 1.25G */
+			return SPX5_CAL_SPEED_2G5;
+		} else if (portno == SPX5_PORT_VD0) {
+			/* IPMC only idle BW */
+			return SPX5_CAL_SPEED_NONE;
+		} else if (portno == SPX5_PORT_VD1) {
+			/* OAM only idle BW */
+			return SPX5_CAL_SPEED_NONE;
+		} else if (portno == SPX5_PORT_VD2) {
+			/* IPinIP gets only idle BW */
+			return SPX5_CAL_SPEED_NONE;
+		}
+		/* not in port map */
+		return SPX5_CAL_SPEED_NONE;
+	}
+	/* Front ports - may be used */
+	port = sparx5->ports[portno];
+	if (!port)
+		return SPX5_CAL_SPEED_NONE;
+	return sparx5_bandwidth_to_calendar(port->conf.bandwidth);
+}
+
+/* Auto configure the QSYS calendar based on port configuration */
+int sparx5_config_auto_calendar(struct sparx5 *sparx5)
+{
+	u32 cal[7], value, idx, portno;
+	u32 max_core_bw;
+	u32 total_bw = 0, used_port_bw = 0;
+	int err = 0;
+	enum sparx5_cal_bw spd;
+
+	memset(cal, 0, sizeof(cal));
+
+	max_core_bw = sparx5_clk_to_bandwidth(sparx5->coreclock);
+	if (max_core_bw == 0) {
+		dev_err(sparx5->dev, "Core clock not supported");
+		return -EINVAL;
+	}
+
+	/* Setup the calendar with the bandwidth to each port */
+	for (portno = 0; portno < SPX5_PORTS_ALL; portno++) {
+		u64 reg, offset, this_bw;
+
+		spd = sparx5_get_port_cal_speed(sparx5, portno);
+		if (spd == SPX5_CAL_SPEED_NONE)
+			continue;
+
+		this_bw = sparx5_cal_speed_to_value(spd);
+		if (portno < SPX5_PORTS)
+			used_port_bw += this_bw;
+		else
+			/* Internal ports are granted half the value */
+			this_bw = this_bw / 2;
+		total_bw += this_bw;
+		reg = portno;
+		offset = do_div(reg, SPX5_PORTS_PER_CALREG);
+		cal[reg] |= spd << (offset * SPX5_CALBITS_PER_PORT);
+	}
+
+	if (used_port_bw > sparx5_target_bandwidth(sparx5)) {
+		dev_err(sparx5->dev,
+			"Port BW %u above target BW %u\n",
+			used_port_bw, sparx5_target_bandwidth(sparx5));
+		return -EINVAL;
+	}
+
+	if (total_bw > max_core_bw) {
+		dev_err(sparx5->dev,
+			"Total BW %u above switch core BW %u\n",
+			total_bw, max_core_bw);
+		return -EINVAL;
+	}
+
+	/* Halt the calendar while changing it */
+	spx5_rmw(QSYS_CAL_CTRL_CAL_MODE_SET(10),
+		 QSYS_CAL_CTRL_CAL_MODE,
+		 sparx5, QSYS_CAL_CTRL);
+
+	/* Assign port bandwidth to auto calendar */
+	for (idx = 0; idx < ARRAY_SIZE(cal); idx++)
+		spx5_wr(cal[idx], sparx5, QSYS_CAL_AUTO(idx));
+
+	/* Increase grant rate of all ports to account for
+	 * core clock ppm deviations
+	 */
+	spx5_rmw(QSYS_CAL_CTRL_CAL_AUTO_GRANT_RATE_SET(671), /* 672->671 */
+		 QSYS_CAL_CTRL_CAL_AUTO_GRANT_RATE,
+		 sparx5,
+		 QSYS_CAL_CTRL);
+
+	/* Grant idle usage to VD 0-2 */
+	for (idx = 2; idx < 5; idx++)
+		spx5_wr(HSCH_OUTB_SHARE_ENA_OUTB_SHARE_ENA_SET(12),
+			sparx5,
+			HSCH_OUTB_SHARE_ENA(idx));
+
+	/* Enable Auto mode */
+	spx5_rmw(QSYS_CAL_CTRL_CAL_MODE_SET(8),
+		 QSYS_CAL_CTRL_CAL_MODE,
+		 sparx5, QSYS_CAL_CTRL);
+
+	/* Verify successful calendar config */
+	value = spx5_rd(sparx5, QSYS_CAL_CTRL);
+	if (QSYS_CAL_CTRL_CAL_AUTO_ERROR_GET(value)) {
+		dev_err(sparx5->dev, "QSYS calendar error\n");
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static u32 sparx5_dsm_exb_gcd(u32 a, u32 b)
+{
+	if (b == 0)
+		return a;
+	return sparx5_dsm_exb_gcd(b, a % b);
+}
+
+static u32 sparx5_dsm_cal_len(u32 *cal)
+{
+	u32 idx = 0, len = 0;
+
+	while (idx < SPX5_DSM_CAL_LEN) {
+		if (cal[idx] != SPX5_DSM_CAL_EMPTY)
+			len++;
+		idx++;
+	}
+	return len;
+}
+
+static u32 sparx5_dsm_cp_cal(u32 *sched)
+{
+	u32 idx = 0, tmp;
+
+	while (idx < SPX5_DSM_CAL_LEN) {
+		if (sched[idx] != SPX5_DSM_CAL_EMPTY) {
+			tmp = sched[idx];
+			sched[idx] = SPX5_DSM_CAL_EMPTY;
+			return tmp;
+		}
+		idx++;
+	}
+	return SPX5_DSM_CAL_EMPTY;
+}
+
+static int sparx5_dsm_calendar_calc(struct sparx5 *sparx5, u32 taxi,
+				    struct sparx5_calendar_data *data)
+{
+	bool slow_mode;
+	u32 gcd, idx, sum, min, factor;
+	u32 num_of_slots, slot_spd, empty_slots;
+	u32 taxi_bw, clk_period_ps;
+
+	clk_period_ps = sparx5_clk_period(sparx5->coreclock);
+	taxi_bw = 128 * 1000000 / clk_period_ps;
+	slow_mode = !!(clk_period_ps > 2000);
+	memcpy(data->taxi_ports, &sparx5_taxi_ports[taxi],
+	       sizeof(data->taxi_ports));
+
+	for (idx = 0; idx < SPX5_DSM_CAL_LEN; idx++) {
+		data->new_slots[idx] = SPX5_DSM_CAL_EMPTY;
+		data->schedule[idx] = SPX5_DSM_CAL_EMPTY;
+		data->temp_sched[idx] = SPX5_DSM_CAL_EMPTY;
+	}
+	/* Default empty calendar */
+	data->schedule[0] = SPX5_DSM_CAL_MAX_DEVS_PER_TAXI;
+
+	/* Map ports to taxi positions */
+	for (idx = 0; idx < SPX5_DSM_CAL_MAX_DEVS_PER_TAXI; idx++) {
+		u32 portno = data->taxi_ports[idx];
+
+		if (portno < SPX5_TAXI_PORT_MAX) {
+			data->taxi_speeds[idx] = sparx5_cal_speed_to_value
+				(sparx5_get_port_cal_speed(sparx5, portno));
+		} else {
+			data->taxi_speeds[idx] = 0;
+		}
+	}
+
+	sum = 0;
+	min = 25000;
+	for (idx = 0; idx < ARRAY_SIZE(data->taxi_speeds); idx++) {
+		u32 jdx;
+
+		sum += data->taxi_speeds[idx];
+		if (data->taxi_speeds[idx] && data->taxi_speeds[idx] < min)
+			min = data->taxi_speeds[idx];
+		gcd = min;
+		for (jdx = 0; jdx < ARRAY_SIZE(data->taxi_speeds); jdx++)
+			gcd = sparx5_dsm_exb_gcd(gcd, data->taxi_speeds[jdx]);
+	}
+	if (sum == 0) /* Empty calendar */
+		return 0;
+	/* Make room for overhead traffic */
+	factor = 100 * 100 * 1000 / (100 * 100 - SPX5_DSM_CAL_BW_LOSS);
+
+	if (sum * factor > (taxi_bw * 1000)) {
+		dev_err(sparx5->dev,
+			"Taxi %u, Requested BW %u above available BW %u\n",
+			taxi, sum, taxi_bw);
+		return -EINVAL;
+	}
+	for (idx = 0; idx < 4; idx++) {
+		u32 raw_spd;
+
+		if (idx == 0)
+			raw_spd = gcd / 5;
+		else if (idx == 1)
+			raw_spd = gcd / 2;
+		else if (idx == 2)
+			raw_spd = gcd;
+		else
+			raw_spd = min;
+		slot_spd = raw_spd * factor / 1000;
+		num_of_slots = taxi_bw / slot_spd;
+		if (num_of_slots <= 64)
+			break;
+	}
+
+	num_of_slots = num_of_slots > 64 ? 64 : num_of_slots;
+	slot_spd = taxi_bw / num_of_slots;
+
+	sum = 0;
+	for (idx = 0; idx < ARRAY_SIZE(data->taxi_speeds); idx++) {
+		u32 spd = data->taxi_speeds[idx];
+		u32 adjusted_speed = data->taxi_speeds[idx] * factor / 1000;
+
+		if (adjusted_speed > 0) {
+			data->avg_dist[idx] = (128 * 1000000 * 10) /
+				(adjusted_speed * clk_period_ps);
+		} else {
+			data->avg_dist[idx] = -1;
+		}
+		data->dev_slots[idx] = ((spd * factor / slot_spd) + 999) / 1000;
+		if (spd != 25000 && (spd != 10000 || !slow_mode)) {
+			if (num_of_slots < (5 * data->dev_slots[idx])) {
+				dev_err(sparx5->dev,
+					"Taxi %u, speed %u, Low slot sep.\n",
+					taxi, spd);
+				return -EINVAL;
+			}
+		}
+		sum += data->dev_slots[idx];
+		if (sum > num_of_slots) {
+			dev_err(sparx5->dev,
+				"Taxi %u with overhead factor %u\n",
+				taxi, factor);
+			return -EINVAL;
+		}
+	}
+
+	empty_slots = num_of_slots - sum;
+
+	for (idx = 0; idx < empty_slots; idx++)
+		data->schedule[idx] = SPX5_DSM_CAL_MAX_DEVS_PER_TAXI;
+
+	for (idx = 1; idx < num_of_slots; idx++) {
+		u32 indices_len = 0;
+		u32 slot, jdx, kdx, ts;
+		s32 cnt;
+		u32 num_of_old_slots, num_of_new_slots, tgt_score;
+
+		for (slot = 0; slot < ARRAY_SIZE(data->dev_slots); slot++) {
+			if (data->dev_slots[slot] == idx) {
+				data->indices[indices_len] = slot;
+				indices_len++;
+			}
+		}
+		if (indices_len == 0)
+			continue;
+		kdx = 0;
+		for (slot = 0; slot < idx; slot++) {
+			for (jdx = 0; jdx < indices_len; jdx++, kdx++)
+				data->new_slots[kdx] = data->indices[jdx];
+		}
+
+		for (slot = 0; slot < SPX5_DSM_CAL_LEN; slot++) {
+			if (data->schedule[slot] == SPX5_DSM_CAL_EMPTY)
+				break;
+		}
+
+		num_of_old_slots =  slot;
+		num_of_new_slots =  kdx;
+		cnt = 0;
+		ts = 0;
+
+		if (num_of_new_slots > num_of_old_slots) {
+			memcpy(data->short_list, data->schedule,
+			       sizeof(data->short_list));
+			memcpy(data->long_list, data->new_slots,
+			       sizeof(data->long_list));
+			tgt_score = 100000 * num_of_old_slots /
+				num_of_new_slots;
+		} else {
+			memcpy(data->short_list, data->new_slots,
+			       sizeof(data->short_list));
+			memcpy(data->long_list, data->schedule,
+			       sizeof(data->long_list));
+			tgt_score = 100000 * num_of_new_slots /
+				num_of_old_slots;
+		}
+
+		while (sparx5_dsm_cal_len(data->short_list) > 0 ||
+		       sparx5_dsm_cal_len(data->long_list) > 0) {
+			u32 act = 0;
+
+			if (sparx5_dsm_cal_len(data->short_list) > 0) {
+				data->temp_sched[ts] =
+					sparx5_dsm_cp_cal(data->short_list);
+				ts++;
+				cnt += 100000;
+				act = 1;
+			}
+			while (sparx5_dsm_cal_len(data->long_list) > 0 &&
+			       cnt > 0) {
+				data->temp_sched[ts] =
+					sparx5_dsm_cp_cal(data->long_list);
+				ts++;
+				cnt -= tgt_score;
+				act = 1;
+			}
+			if (act == 0) {
+				dev_err(sparx5->dev,
+					"Error in DSM calendar calculation\n");
+				return -EINVAL;
+			}
+		}
+
+		for (slot = 0; slot < SPX5_DSM_CAL_LEN; slot++) {
+			if (data->temp_sched[slot] == SPX5_DSM_CAL_EMPTY)
+				break;
+		}
+		for (slot = 0; slot < SPX5_DSM_CAL_LEN; slot++) {
+			data->schedule[slot] = data->temp_sched[slot];
+			data->temp_sched[slot] = SPX5_DSM_CAL_EMPTY;
+			data->new_slots[slot] = SPX5_DSM_CAL_EMPTY;
+		}
+	}
+	return 0;
+}
+
+static int sparx5_dsm_calendar_check(struct sparx5 *sparx5,
+				     struct sparx5_calendar_data *data)
+{
+	u32 num_of_slots, idx, port;
+	int cnt, max_dist;
+	u32 slot_indices[SPX5_DSM_CAL_LEN], distances[SPX5_DSM_CAL_LEN];
+	u32 cal_length = sparx5_dsm_cal_len(data->schedule);
+
+	for (port = 0; port < SPX5_DSM_CAL_MAX_DEVS_PER_TAXI; port++) {
+		num_of_slots = 0;
+		max_dist = data->avg_dist[port];
+		for (idx = 0; idx < SPX5_DSM_CAL_LEN; idx++) {
+			slot_indices[idx] = SPX5_DSM_CAL_EMPTY;
+			distances[idx] = SPX5_DSM_CAL_EMPTY;
+		}
+
+		for (idx = 0; idx < cal_length; idx++) {
+			if (data->schedule[idx] == port) {
+				slot_indices[num_of_slots] = idx;
+				num_of_slots++;
+			}
+		}
+
+		slot_indices[num_of_slots] = slot_indices[0] + cal_length;
+
+		for (idx = 0; idx < num_of_slots; idx++) {
+			distances[idx] = (slot_indices[idx + 1] -
+					  slot_indices[idx]) * 10;
+		}
+
+		for (idx = 0; idx < num_of_slots; idx++) {
+			u32 jdx, kdx;
+
+			cnt = distances[idx] - max_dist;
+			if (cnt < 0)
+				cnt = -cnt;
+			kdx = 0;
+			for (jdx = (idx + 1) % num_of_slots;
+			     jdx != idx;
+			     jdx = (jdx + 1) % num_of_slots, kdx++) {
+				cnt =  cnt + distances[jdx] - max_dist;
+				if (cnt < 0)
+					cnt = -cnt;
+				if (cnt > max_dist)
+					goto check_err;
+			}
+		}
+	}
+	return 0;
+check_err:
+	dev_err(sparx5->dev,
+		"Port %u: distance %u above limit %d\n",
+		port, cnt, max_dist);
+	return -EINVAL;
+}
+
+static int sparx5_dsm_calendar_update(struct sparx5 *sparx5, u32 taxi,
+				      struct sparx5_calendar_data *data)
+{
+	u32 idx;
+	u32 cal_len = sparx5_dsm_cal_len(data->schedule), len;
+
+	spx5_wr(DSM_TAXI_CAL_CFG_CAL_PGM_ENA_SET(1),
+		sparx5,
+		DSM_TAXI_CAL_CFG(taxi));
+	for (idx = 0; idx < cal_len; idx++) {
+		spx5_rmw(DSM_TAXI_CAL_CFG_CAL_IDX_SET(idx),
+			 DSM_TAXI_CAL_CFG_CAL_IDX,
+			 sparx5,
+			 DSM_TAXI_CAL_CFG(taxi));
+		spx5_rmw(DSM_TAXI_CAL_CFG_CAL_PGM_VAL_SET(data->schedule[idx]),
+			 DSM_TAXI_CAL_CFG_CAL_PGM_VAL,
+			 sparx5,
+			 DSM_TAXI_CAL_CFG(taxi));
+	}
+	spx5_wr(DSM_TAXI_CAL_CFG_CAL_PGM_ENA_SET(0),
+		sparx5,
+		DSM_TAXI_CAL_CFG(taxi));
+	len = DSM_TAXI_CAL_CFG_CAL_CUR_LEN_GET(spx5_rd(sparx5,
+						       DSM_TAXI_CAL_CFG(taxi)));
+	if (len != cal_len - 1)
+		goto update_err;
+	return 0;
+update_err:
+	dev_err(sparx5->dev, "Incorrect calendar length: %u\n", len);
+	return -EINVAL;
+}
+
+/* Configure the DSM calendar based on port configuration */
+int sparx5_config_dsm_calendar(struct sparx5 *sparx5)
+{
+	int taxi;
+	struct sparx5_calendar_data *data;
+	int err = 0;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	for (taxi = 0; taxi < SPX5_DSM_CAL_TAXIS; ++taxi) {
+		err = sparx5_dsm_calendar_calc(sparx5, taxi, data);
+		if (err) {
+			dev_err(sparx5->dev, "DSM calendar calculation failed\n");
+			goto cal_out;
+		}
+		err = sparx5_dsm_calendar_check(sparx5, data);
+		if (err) {
+			dev_err(sparx5->dev, "DSM calendar check failed\n");
+			goto cal_out;
+		}
+		err = sparx5_dsm_calendar_update(sparx5, taxi, data);
+		if (err) {
+			dev_err(sparx5->dev, "DSM calendar update failed\n");
+			goto cal_out;
+		}
+	}
+cal_out:
+	kfree(data);
+	return err;
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 549ec2d7bd5a..ffd761b8f975 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -606,7 +606,14 @@ static int sparx5_start(struct sparx5 *sparx5)
 	/* Enable queue limitation watermarks */
 	sparx5_qlim_set(sparx5);
 
-	/* Resource calendar support to be added in later patches */
+	err = sparx5_config_auto_calendar(sparx5);
+	if (err)
+		return err;
+
+	err = sparx5_config_dsm_calendar(sparx5);
+	if (err)
+		return err;
+
 
 	/* Init mact_sw struct */
 	mutex_init(&sparx5->mact_lock);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 67bad6878e98..4d7a024fedc0 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -199,6 +199,10 @@ int sparx5_vlan_vid_add(struct sparx5_port *port, u16 vid, bool pvid,
 int sparx5_vlan_vid_del(struct sparx5_port *port, u16 vid);
 void sparx5_vlan_port_apply(struct sparx5 *sparx5, struct sparx5_port *port);
 
+/* sparx5_calendar.c */
+int sparx5_config_auto_calendar(struct sparx5 *sparx5);
+int sparx5_config_dsm_calendar(struct sparx5 *sparx5);
+
 /* sparx5_netdev.c */
 bool sparx5_netdevice_check(const struct net_device *dev);
 struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno);

From af4b11022e2d39865e390dd50946c4e8703ec3c7 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:57 +0200
Subject: [PATCH 2835/3292] net: sparx5: add ethtool configuration and
 statistics support

This adds statistic counters for the network interfaces provided
by the driver.  It also adds CPU port counters (which are not
exposed by ethtool).
This also adds support for configuring the network interface
parameters via ethtool: speed, duplex, aneg etc.

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/microchip/sparx5/Makefile    |    2 +-
 .../microchip/sparx5/sparx5_ethtool.c         | 1227 +++++++++++++++++
 .../ethernet/microchip/sparx5/sparx5_main.c   |    4 +
 .../ethernet/microchip/sparx5/sparx5_main.h   |   14 +
 .../ethernet/microchip/sparx5/sparx5_netdev.c |    2 +
 5 files changed, 1248 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c

diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile
index 7f0035aefcdd..faa8f07a6b75 100644
--- a/drivers/net/ethernet/microchip/sparx5/Makefile
+++ b/drivers/net/ethernet/microchip/sparx5/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
 sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
  sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \
- sparx5_switchdev.o sparx5_calendar.o
+ sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
new file mode 100644
index 000000000000..59783fc46a7b
--- /dev/null
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c
@@ -0,0 +1,1227 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ */
+
+#include <linux/ethtool.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+#include "sparx5_port.h"
+
+/* Index of ANA_AC port counters */
+#define SPX5_PORT_POLICER_DROPS 0
+
+/* Add a potentially wrapping 32 bit value to a 64 bit counter */
+static void sparx5_update_counter(u64 *cnt, u32 val)
+{
+	if (val < (*cnt & U32_MAX))
+		*cnt += (u64)1 << 32; /* value has wrapped */
+	*cnt = (*cnt & ~(u64)U32_MAX) + val;
+}
+
+enum sparx5_stats_entry {
+	spx5_stats_rx_symbol_err_cnt = 0,
+	spx5_stats_pmac_rx_symbol_err_cnt = 1,
+	spx5_stats_tx_uc_cnt = 2,
+	spx5_stats_pmac_tx_uc_cnt = 3,
+	spx5_stats_tx_mc_cnt = 4,
+	spx5_stats_tx_bc_cnt = 5,
+	spx5_stats_tx_backoff1_cnt = 6,
+	spx5_stats_tx_multi_coll_cnt = 7,
+	spx5_stats_rx_uc_cnt = 8,
+	spx5_stats_pmac_rx_uc_cnt = 9,
+	spx5_stats_rx_mc_cnt = 10,
+	spx5_stats_rx_bc_cnt = 11,
+	spx5_stats_rx_crc_err_cnt = 12,
+	spx5_stats_pmac_rx_crc_err_cnt = 13,
+	spx5_stats_rx_alignment_lost_cnt = 14,
+	spx5_stats_pmac_rx_alignment_lost_cnt = 15,
+	spx5_stats_tx_ok_bytes_cnt = 16,
+	spx5_stats_pmac_tx_ok_bytes_cnt = 17,
+	spx5_stats_tx_defer_cnt = 18,
+	spx5_stats_tx_late_coll_cnt = 19,
+	spx5_stats_tx_xcoll_cnt = 20,
+	spx5_stats_tx_csense_cnt = 21,
+	spx5_stats_rx_ok_bytes_cnt = 22,
+	spx5_stats_pmac_rx_ok_bytes_cnt = 23,
+	spx5_stats_pmac_tx_mc_cnt = 24,
+	spx5_stats_pmac_tx_bc_cnt = 25,
+	spx5_stats_tx_xdefer_cnt = 26,
+	spx5_stats_pmac_rx_mc_cnt = 27,
+	spx5_stats_pmac_rx_bc_cnt = 28,
+	spx5_stats_rx_in_range_len_err_cnt = 29,
+	spx5_stats_pmac_rx_in_range_len_err_cnt = 30,
+	spx5_stats_rx_out_of_range_len_err_cnt = 31,
+	spx5_stats_pmac_rx_out_of_range_len_err_cnt = 32,
+	spx5_stats_rx_oversize_cnt = 33,
+	spx5_stats_pmac_rx_oversize_cnt = 34,
+	spx5_stats_tx_pause_cnt = 35,
+	spx5_stats_pmac_tx_pause_cnt = 36,
+	spx5_stats_rx_pause_cnt = 37,
+	spx5_stats_pmac_rx_pause_cnt = 38,
+	spx5_stats_rx_unsup_opcode_cnt = 39,
+	spx5_stats_pmac_rx_unsup_opcode_cnt = 40,
+	spx5_stats_rx_undersize_cnt = 41,
+	spx5_stats_pmac_rx_undersize_cnt = 42,
+	spx5_stats_rx_fragments_cnt = 43,
+	spx5_stats_pmac_rx_fragments_cnt = 44,
+	spx5_stats_rx_jabbers_cnt = 45,
+	spx5_stats_pmac_rx_jabbers_cnt = 46,
+	spx5_stats_rx_size64_cnt = 47,
+	spx5_stats_pmac_rx_size64_cnt = 48,
+	spx5_stats_rx_size65to127_cnt = 49,
+	spx5_stats_pmac_rx_size65to127_cnt = 50,
+	spx5_stats_rx_size128to255_cnt = 51,
+	spx5_stats_pmac_rx_size128to255_cnt = 52,
+	spx5_stats_rx_size256to511_cnt = 53,
+	spx5_stats_pmac_rx_size256to511_cnt = 54,
+	spx5_stats_rx_size512to1023_cnt = 55,
+	spx5_stats_pmac_rx_size512to1023_cnt = 56,
+	spx5_stats_rx_size1024to1518_cnt = 57,
+	spx5_stats_pmac_rx_size1024to1518_cnt = 58,
+	spx5_stats_rx_size1519tomax_cnt = 59,
+	spx5_stats_pmac_rx_size1519tomax_cnt = 60,
+	spx5_stats_tx_size64_cnt = 61,
+	spx5_stats_pmac_tx_size64_cnt = 62,
+	spx5_stats_tx_size65to127_cnt = 63,
+	spx5_stats_pmac_tx_size65to127_cnt = 64,
+	spx5_stats_tx_size128to255_cnt = 65,
+	spx5_stats_pmac_tx_size128to255_cnt = 66,
+	spx5_stats_tx_size256to511_cnt = 67,
+	spx5_stats_pmac_tx_size256to511_cnt = 68,
+	spx5_stats_tx_size512to1023_cnt = 69,
+	spx5_stats_pmac_tx_size512to1023_cnt = 70,
+	spx5_stats_tx_size1024to1518_cnt = 71,
+	spx5_stats_pmac_tx_size1024to1518_cnt = 72,
+	spx5_stats_tx_size1519tomax_cnt = 73,
+	spx5_stats_pmac_tx_size1519tomax_cnt = 74,
+	spx5_stats_mm_rx_assembly_err_cnt = 75,
+	spx5_stats_mm_rx_assembly_ok_cnt = 76,
+	spx5_stats_mm_rx_merge_frag_cnt = 77,
+	spx5_stats_mm_rx_smd_err_cnt = 78,
+	spx5_stats_mm_tx_pfragment_cnt = 79,
+	spx5_stats_rx_bad_bytes_cnt = 80,
+	spx5_stats_pmac_rx_bad_bytes_cnt = 81,
+	spx5_stats_rx_in_bytes_cnt = 82,
+	spx5_stats_rx_ipg_shrink_cnt = 83,
+	spx5_stats_rx_sync_lost_err_cnt = 84,
+	spx5_stats_rx_tagged_frms_cnt = 85,
+	spx5_stats_rx_untagged_frms_cnt = 86,
+	spx5_stats_tx_out_bytes_cnt = 87,
+	spx5_stats_tx_tagged_frms_cnt = 88,
+	spx5_stats_tx_untagged_frms_cnt = 89,
+	spx5_stats_rx_hih_cksm_err_cnt = 90,
+	spx5_stats_pmac_rx_hih_cksm_err_cnt = 91,
+	spx5_stats_rx_xgmii_prot_err_cnt = 92,
+	spx5_stats_pmac_rx_xgmii_prot_err_cnt = 93,
+	spx5_stats_ana_ac_port_stat_lsb_cnt = 94,
+	spx5_stats_green_p0_rx_fwd = 95,
+	spx5_stats_green_p0_rx_port_drop = 111,
+	spx5_stats_green_p0_tx_port = 127,
+	spx5_stats_rx_local_drop = 143,
+	spx5_stats_tx_local_drop = 144,
+	spx5_stats_count = 145,
+};
+
+static const char *const sparx5_stats_layout[] = {
+	"mm_rx_assembly_err_cnt",
+	"mm_rx_assembly_ok_cnt",
+	"mm_rx_merge_frag_cnt",
+	"mm_rx_smd_err_cnt",
+	"mm_tx_pfragment_cnt",
+	"rx_bad_bytes_cnt",
+	"pmac_rx_bad_bytes_cnt",
+	"rx_in_bytes_cnt",
+	"rx_ipg_shrink_cnt",
+	"rx_sync_lost_err_cnt",
+	"rx_tagged_frms_cnt",
+	"rx_untagged_frms_cnt",
+	"tx_out_bytes_cnt",
+	"tx_tagged_frms_cnt",
+	"tx_untagged_frms_cnt",
+	"rx_hih_cksm_err_cnt",
+	"pmac_rx_hih_cksm_err_cnt",
+	"rx_xgmii_prot_err_cnt",
+	"pmac_rx_xgmii_prot_err_cnt",
+	"rx_port_policer_drop",
+	"rx_fwd_green_p0",
+	"rx_fwd_green_p1",
+	"rx_fwd_green_p2",
+	"rx_fwd_green_p3",
+	"rx_fwd_green_p4",
+	"rx_fwd_green_p5",
+	"rx_fwd_green_p6",
+	"rx_fwd_green_p7",
+	"rx_fwd_yellow_p0",
+	"rx_fwd_yellow_p1",
+	"rx_fwd_yellow_p2",
+	"rx_fwd_yellow_p3",
+	"rx_fwd_yellow_p4",
+	"rx_fwd_yellow_p5",
+	"rx_fwd_yellow_p6",
+	"rx_fwd_yellow_p7",
+	"rx_port_drop_green_p0",
+	"rx_port_drop_green_p1",
+	"rx_port_drop_green_p2",
+	"rx_port_drop_green_p3",
+	"rx_port_drop_green_p4",
+	"rx_port_drop_green_p5",
+	"rx_port_drop_green_p6",
+	"rx_port_drop_green_p7",
+	"rx_port_drop_yellow_p0",
+	"rx_port_drop_yellow_p1",
+	"rx_port_drop_yellow_p2",
+	"rx_port_drop_yellow_p3",
+	"rx_port_drop_yellow_p4",
+	"rx_port_drop_yellow_p5",
+	"rx_port_drop_yellow_p6",
+	"rx_port_drop_yellow_p7",
+	"tx_port_green_p0",
+	"tx_port_green_p1",
+	"tx_port_green_p2",
+	"tx_port_green_p3",
+	"tx_port_green_p4",
+	"tx_port_green_p5",
+	"tx_port_green_p6",
+	"tx_port_green_p7",
+	"tx_port_yellow_p0",
+	"tx_port_yellow_p1",
+	"tx_port_yellow_p2",
+	"tx_port_yellow_p3",
+	"tx_port_yellow_p4",
+	"tx_port_yellow_p5",
+	"tx_port_yellow_p6",
+	"tx_port_yellow_p7",
+	"rx_local_drop",
+	"tx_local_drop",
+};
+
+static void sparx5_get_queue_sys_stats(struct sparx5 *sparx5, int portno)
+{
+	u64 *portstats;
+	u64 *stats;
+	u32 addr;
+	int idx;
+
+	portstats = &sparx5->stats[portno * sparx5->num_stats];
+	mutex_lock(&sparx5->queue_stats_lock);
+	spx5_wr(XQS_STAT_CFG_STAT_VIEW_SET(portno), sparx5, XQS_STAT_CFG);
+	addr = 0;
+	stats = &portstats[spx5_stats_green_p0_rx_fwd];
+	for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++addr, ++stats)
+		sparx5_update_counter(stats, spx5_rd(sparx5, XQS_CNT(addr)));
+	addr = 16;
+	stats = &portstats[spx5_stats_green_p0_rx_port_drop];
+	for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++addr, ++stats)
+		sparx5_update_counter(stats, spx5_rd(sparx5, XQS_CNT(addr)));
+	addr = 256;
+	stats = &portstats[spx5_stats_green_p0_tx_port];
+	for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++addr, ++stats)
+		sparx5_update_counter(stats, spx5_rd(sparx5, XQS_CNT(addr)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_local_drop],
+			      spx5_rd(sparx5, XQS_CNT(32)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_local_drop],
+			      spx5_rd(sparx5, XQS_CNT(272)));
+	mutex_unlock(&sparx5->queue_stats_lock);
+}
+
+static void sparx5_get_ana_ac_stats_stats(struct sparx5 *sparx5, int portno)
+{
+	u64 *portstats = &sparx5->stats[portno * sparx5->num_stats];
+
+	sparx5_update_counter(&portstats[spx5_stats_ana_ac_port_stat_lsb_cnt],
+			      spx5_rd(sparx5, ANA_AC_PORT_STAT_LSB_CNT(portno,
+								       SPX5_PORT_POLICER_DROPS)));
+}
+
+static void sparx5_get_dev_phy_stats(u64 *portstats, void __iomem *inst, u32
+				     tinst)
+{
+	sparx5_update_counter(&portstats[spx5_stats_rx_symbol_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_SYMBOL_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_symbol_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SYMBOL_ERR_CNT(tinst)));
+}
+
+static void sparx5_get_dev_mac_stats(u64 *portstats, void __iomem *inst, u32
+				     tinst)
+{
+	sparx5_update_counter(&portstats[spx5_stats_tx_uc_cnt],
+			      spx5_inst_rd(inst, DEV5G_TX_UC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_uc_cnt],
+			      spx5_inst_rd(inst, DEV5G_PMAC_TX_UC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_mc_cnt],
+			      spx5_inst_rd(inst, DEV5G_TX_MC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_bc_cnt],
+			      spx5_inst_rd(inst, DEV5G_TX_BC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_uc_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_UC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_uc_cnt],
+			      spx5_inst_rd(inst, DEV5G_PMAC_RX_UC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_mc_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_MC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_bc_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_BC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_crc_err_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_CRC_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_crc_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_CRC_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_alignment_lost_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_ALIGNMENT_LOST_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_alignment_lost_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_ALIGNMENT_LOST_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_ok_bytes_cnt],
+			      spx5_inst_rd(inst, DEV5G_TX_OK_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_ok_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_OK_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_ok_bytes_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_OK_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_ok_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_OK_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_mc_cnt],
+			      spx5_inst_rd(inst, DEV5G_PMAC_TX_MC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_bc_cnt],
+			      spx5_inst_rd(inst, DEV5G_PMAC_TX_BC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_mc_cnt],
+			      spx5_inst_rd(inst, DEV5G_PMAC_RX_MC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_bc_cnt],
+			      spx5_inst_rd(inst, DEV5G_PMAC_RX_BC_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_in_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_IN_RANGE_LEN_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_in_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_IN_RANGE_LEN_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_out_of_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_OUT_OF_RANGE_LEN_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_out_of_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_OUT_OF_RANGE_LEN_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_oversize_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_OVERSIZE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_oversize_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_OVERSIZE_CNT(tinst)));
+}
+
+static void sparx5_get_dev_mac_ctrl_stats(u64 *portstats, void __iomem *inst,
+					  u32 tinst)
+{
+	sparx5_update_counter(&portstats[spx5_stats_tx_pause_cnt],
+			      spx5_inst_rd(inst, DEV5G_TX_PAUSE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_pause_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_PAUSE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_pause_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_PAUSE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_pause_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_PAUSE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_unsup_opcode_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_UNSUP_OPCODE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_unsup_opcode_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_UNSUP_OPCODE_CNT(tinst)));
+}
+
+static void sparx5_get_dev_rmon_stats(u64 *portstats, void __iomem *inst, u32
+				      tinst)
+{
+	sparx5_update_counter(&portstats[spx5_stats_rx_undersize_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_UNDERSIZE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_undersize_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_UNDERSIZE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_oversize_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_OVERSIZE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_oversize_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_OVERSIZE_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_fragments_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_FRAGMENTS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_fragments_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_FRAGMENTS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_jabbers_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_JABBERS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_jabbers_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_JABBERS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size64_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_SIZE64_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size64_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SIZE64_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_SIZE65TO127_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SIZE65TO127_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_SIZE128TO255_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SIZE128TO255_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_SIZE256TO511_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SIZE256TO511_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_SIZE512TO1023_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SIZE512TO1023_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_SIZE1024TO1518_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SIZE1024TO1518_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_SIZE1519TOMAX_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_SIZE1519TOMAX_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size64_cnt],
+			      spx5_inst_rd(inst, DEV5G_TX_SIZE64_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size64_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_SIZE64_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_SIZE65TO127_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_SIZE65TO127_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_SIZE128TO255_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_SIZE128TO255_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_SIZE256TO511_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_SIZE256TO511_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_SIZE512TO1023_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_SIZE512TO1023_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_SIZE1024TO1518_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_SIZE1024TO1518_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_SIZE1519TOMAX_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_TX_SIZE1519TOMAX_CNT(tinst)));
+}
+
+static void sparx5_get_dev_misc_stats(u64 *portstats, void __iomem *inst, u32
+				      tinst)
+{
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_assembly_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_MM_RX_ASSEMBLY_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_assembly_ok_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_MM_RX_ASSEMBLY_OK_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_merge_frag_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_MM_RX_MERGE_FRAG_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_smd_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_MM_RX_SMD_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_tx_pfragment_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_MM_TX_PFRAGMENT_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_bad_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_BAD_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_bad_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_BAD_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_in_bytes_cnt],
+			      spx5_inst_rd(inst, DEV5G_RX_IN_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_ipg_shrink_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_IPG_SHRINK_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_tagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_TAGGED_FRMS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_untagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_UNTAGGED_FRMS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_out_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_OUT_BYTES_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_tagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_TAGGED_FRMS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_untagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_TX_UNTAGGED_FRMS_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_hih_cksm_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_HIH_CKSM_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_hih_cksm_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_HIH_CKSM_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_xgmii_prot_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_RX_XGMII_PROT_ERR_CNT(tinst)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_xgmii_prot_err_cnt],
+			      spx5_inst_rd(inst,
+					   DEV5G_PMAC_RX_XGMII_PROT_ERR_CNT(tinst)));
+}
+
+static void sparx5_get_device_stats(struct sparx5 *sparx5, int portno)
+{
+	u64 *portstats = &sparx5->stats[portno * sparx5->num_stats];
+	u32 tinst = sparx5_port_dev_index(portno);
+	u32 dev = sparx5_to_high_dev(portno);
+	void __iomem *inst;
+
+	inst = spx5_inst_get(sparx5, dev, tinst);
+	sparx5_get_dev_phy_stats(portstats, inst, tinst);
+	sparx5_get_dev_mac_stats(portstats, inst, tinst);
+	sparx5_get_dev_mac_ctrl_stats(portstats, inst, tinst);
+	sparx5_get_dev_rmon_stats(portstats, inst, tinst);
+	sparx5_get_dev_misc_stats(portstats, inst, tinst);
+}
+
+static void sparx5_get_asm_phy_stats(u64 *portstats, void __iomem *inst, int
+				     portno)
+{
+	sparx5_update_counter(&portstats[spx5_stats_rx_symbol_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SYMBOL_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_symbol_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SYMBOL_ERR_CNT(portno)));
+}
+
+static void sparx5_get_asm_mac_stats(u64 *portstats, void __iomem *inst, int
+				     portno)
+{
+	sparx5_update_counter(&portstats[spx5_stats_tx_uc_cnt],
+			      spx5_inst_rd(inst, ASM_TX_UC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_uc_cnt],
+			      spx5_inst_rd(inst, ASM_PMAC_TX_UC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_mc_cnt],
+			      spx5_inst_rd(inst, ASM_TX_MC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_bc_cnt],
+			      spx5_inst_rd(inst, ASM_TX_BC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_backoff1_cnt],
+			      spx5_inst_rd(inst, ASM_TX_BACKOFF1_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_multi_coll_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_MULTI_COLL_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_uc_cnt],
+			      spx5_inst_rd(inst, ASM_RX_UC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_uc_cnt],
+			      spx5_inst_rd(inst, ASM_PMAC_RX_UC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_mc_cnt],
+			      spx5_inst_rd(inst, ASM_RX_MC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_bc_cnt],
+			      spx5_inst_rd(inst, ASM_RX_BC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_crc_err_cnt],
+			      spx5_inst_rd(inst, ASM_RX_CRC_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_crc_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_CRC_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_alignment_lost_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_ALIGNMENT_LOST_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_alignment_lost_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_ALIGNMENT_LOST_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_ok_bytes_cnt],
+			      spx5_inst_rd(inst, ASM_TX_OK_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_ok_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_OK_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_defer_cnt],
+			      spx5_inst_rd(inst, ASM_TX_DEFER_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_late_coll_cnt],
+			      spx5_inst_rd(inst, ASM_TX_LATE_COLL_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_xcoll_cnt],
+			      spx5_inst_rd(inst, ASM_TX_XCOLL_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_csense_cnt],
+			      spx5_inst_rd(inst, ASM_TX_CSENSE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_ok_bytes_cnt],
+			      spx5_inst_rd(inst, ASM_RX_OK_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_ok_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_OK_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_mc_cnt],
+			      spx5_inst_rd(inst, ASM_PMAC_TX_MC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_bc_cnt],
+			      spx5_inst_rd(inst, ASM_PMAC_TX_BC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_xdefer_cnt],
+			      spx5_inst_rd(inst, ASM_TX_XDEFER_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_mc_cnt],
+			      spx5_inst_rd(inst, ASM_PMAC_RX_MC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_bc_cnt],
+			      spx5_inst_rd(inst, ASM_PMAC_RX_BC_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_in_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_IN_RANGE_LEN_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_in_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_IN_RANGE_LEN_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_out_of_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_OUT_OF_RANGE_LEN_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_out_of_range_len_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_OUT_OF_RANGE_LEN_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_oversize_cnt],
+			      spx5_inst_rd(inst, ASM_RX_OVERSIZE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_oversize_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_OVERSIZE_CNT(portno)));
+}
+
+static void sparx5_get_asm_mac_ctrl_stats(u64 *portstats, void __iomem *inst,
+					  int portno)
+{
+	sparx5_update_counter(&portstats[spx5_stats_tx_pause_cnt],
+			      spx5_inst_rd(inst, ASM_TX_PAUSE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_pause_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_PAUSE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_pause_cnt],
+			      spx5_inst_rd(inst, ASM_RX_PAUSE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_pause_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_PAUSE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_unsup_opcode_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_UNSUP_OPCODE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_unsup_opcode_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_UNSUP_OPCODE_CNT(portno)));
+}
+
+static void sparx5_get_asm_rmon_stats(u64 *portstats, void __iomem *inst, int
+				      portno)
+{
+	sparx5_update_counter(&portstats[spx5_stats_rx_undersize_cnt],
+			      spx5_inst_rd(inst, ASM_RX_UNDERSIZE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_undersize_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_UNDERSIZE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_oversize_cnt],
+			      spx5_inst_rd(inst, ASM_RX_OVERSIZE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_oversize_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_OVERSIZE_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_fragments_cnt],
+			      spx5_inst_rd(inst, ASM_RX_FRAGMENTS_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_fragments_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_FRAGMENTS_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_jabbers_cnt],
+			      spx5_inst_rd(inst, ASM_RX_JABBERS_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_jabbers_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_JABBERS_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size64_cnt],
+			      spx5_inst_rd(inst, ASM_RX_SIZE64_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size64_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SIZE64_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SIZE65TO127_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SIZE65TO127_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SIZE128TO255_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SIZE128TO255_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SIZE256TO511_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SIZE256TO511_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SIZE512TO1023_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SIZE512TO1023_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SIZE1024TO1518_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SIZE1024TO1518_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SIZE1519TOMAX_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_SIZE1519TOMAX_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size64_cnt],
+			      spx5_inst_rd(inst, ASM_TX_SIZE64_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size64_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_SIZE64_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_SIZE65TO127_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size65to127_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_SIZE65TO127_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_SIZE128TO255_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size128to255_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_SIZE128TO255_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_SIZE256TO511_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size256to511_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_SIZE256TO511_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_SIZE512TO1023_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size512to1023_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_SIZE512TO1023_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_SIZE1024TO1518_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size1024to1518_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_SIZE1024TO1518_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_SIZE1519TOMAX_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_tx_size1519tomax_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_TX_SIZE1519TOMAX_CNT(portno)));
+}
+
+static void sparx5_get_asm_misc_stats(u64 *portstats, void __iomem *inst, int
+				      portno)
+{
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_assembly_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_MM_RX_ASSEMBLY_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_assembly_ok_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_MM_RX_ASSEMBLY_OK_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_merge_frag_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_MM_RX_MERGE_FRAG_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_rx_smd_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_MM_RX_SMD_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_mm_tx_pfragment_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_MM_TX_PFRAGMENT_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_bad_bytes_cnt],
+			      spx5_inst_rd(inst, ASM_RX_BAD_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_pmac_rx_bad_bytes_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_PMAC_RX_BAD_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_in_bytes_cnt],
+			      spx5_inst_rd(inst, ASM_RX_IN_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_ipg_shrink_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_IPG_SHRINK_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_sync_lost_err_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_SYNC_LOST_ERR_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_tagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_TAGGED_FRMS_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_rx_untagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_RX_UNTAGGED_FRMS_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_out_bytes_cnt],
+			      spx5_inst_rd(inst, ASM_TX_OUT_BYTES_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_tagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_TAGGED_FRMS_CNT(portno)));
+	sparx5_update_counter(&portstats[spx5_stats_tx_untagged_frms_cnt],
+			      spx5_inst_rd(inst,
+					   ASM_TX_UNTAGGED_FRMS_CNT(portno)));
+}
+
+static void sparx5_get_asm_stats(struct sparx5 *sparx5, int portno)
+{
+	u64 *portstats = &sparx5->stats[portno * sparx5->num_stats];
+	void __iomem *inst = spx5_inst_get(sparx5, TARGET_ASM, 0);
+
+	sparx5_get_asm_phy_stats(portstats, inst, portno);
+	sparx5_get_asm_mac_stats(portstats, inst, portno);
+	sparx5_get_asm_mac_ctrl_stats(portstats, inst, portno);
+	sparx5_get_asm_rmon_stats(portstats, inst, portno);
+	sparx5_get_asm_misc_stats(portstats, inst, portno);
+}
+
+static const struct ethtool_rmon_hist_range sparx5_rmon_ranges[] = {
+	{    0,    64 },
+	{   65,   127 },
+	{  128,   255 },
+	{  256,   511 },
+	{  512,  1023 },
+	{ 1024,  1518 },
+	{ 1519, 10239 },
+	{}
+};
+
+static void sparx5_get_eth_phy_stats(struct net_device *ndev,
+				     struct ethtool_eth_phy_stats *phy_stats)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5 *sparx5 = port->sparx5;
+	int portno = port->portno;
+	void __iomem *inst;
+	u64 *portstats;
+
+	portstats = &sparx5->stats[portno * sparx5->num_stats];
+	if (sparx5_is_baser(port->conf.portmode)) {
+		u32 tinst = sparx5_port_dev_index(portno);
+		u32 dev = sparx5_to_high_dev(portno);
+
+		inst = spx5_inst_get(sparx5, dev, tinst);
+		sparx5_get_dev_phy_stats(portstats, inst, tinst);
+	} else {
+		inst = spx5_inst_get(sparx5, TARGET_ASM, 0);
+		sparx5_get_asm_phy_stats(portstats, inst, portno);
+	}
+	phy_stats->SymbolErrorDuringCarrier =
+		portstats[spx5_stats_rx_symbol_err_cnt] +
+		portstats[spx5_stats_pmac_rx_symbol_err_cnt];
+}
+
+static void sparx5_get_eth_mac_stats(struct net_device *ndev,
+				     struct ethtool_eth_mac_stats *mac_stats)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5 *sparx5 = port->sparx5;
+	int portno = port->portno;
+	void __iomem *inst;
+	u64 *portstats;
+
+	portstats = &sparx5->stats[portno * sparx5->num_stats];
+	if (sparx5_is_baser(port->conf.portmode)) {
+		u32 tinst = sparx5_port_dev_index(portno);
+		u32 dev = sparx5_to_high_dev(portno);
+
+		inst = spx5_inst_get(sparx5, dev, tinst);
+		sparx5_get_dev_mac_stats(portstats, inst, tinst);
+	} else {
+		inst = spx5_inst_get(sparx5, TARGET_ASM, 0);
+		sparx5_get_asm_mac_stats(portstats, inst, portno);
+	}
+	mac_stats->FramesTransmittedOK = portstats[spx5_stats_tx_uc_cnt] +
+		portstats[spx5_stats_pmac_tx_uc_cnt] +
+		portstats[spx5_stats_tx_mc_cnt] +
+		portstats[spx5_stats_tx_bc_cnt];
+	mac_stats->SingleCollisionFrames =
+		portstats[spx5_stats_tx_backoff1_cnt];
+	mac_stats->MultipleCollisionFrames =
+		portstats[spx5_stats_tx_multi_coll_cnt];
+	mac_stats->FramesReceivedOK = portstats[spx5_stats_rx_uc_cnt] +
+		portstats[spx5_stats_pmac_rx_uc_cnt] +
+		portstats[spx5_stats_rx_mc_cnt] +
+		portstats[spx5_stats_rx_bc_cnt];
+	mac_stats->FrameCheckSequenceErrors =
+		portstats[spx5_stats_rx_crc_err_cnt] +
+		portstats[spx5_stats_pmac_rx_crc_err_cnt];
+	mac_stats->AlignmentErrors = portstats[spx5_stats_rx_alignment_lost_cnt]
+		+ portstats[spx5_stats_pmac_rx_alignment_lost_cnt];
+	mac_stats->OctetsTransmittedOK = portstats[spx5_stats_tx_ok_bytes_cnt] +
+		portstats[spx5_stats_pmac_tx_ok_bytes_cnt];
+	mac_stats->FramesWithDeferredXmissions =
+		portstats[spx5_stats_tx_defer_cnt];
+	mac_stats->LateCollisions =
+		portstats[spx5_stats_tx_late_coll_cnt];
+	mac_stats->FramesAbortedDueToXSColls =
+		portstats[spx5_stats_tx_xcoll_cnt];
+	mac_stats->CarrierSenseErrors = portstats[spx5_stats_tx_csense_cnt];
+	mac_stats->OctetsReceivedOK = portstats[spx5_stats_rx_ok_bytes_cnt] +
+		portstats[spx5_stats_pmac_rx_ok_bytes_cnt];
+	mac_stats->MulticastFramesXmittedOK = portstats[spx5_stats_tx_mc_cnt] +
+		portstats[spx5_stats_pmac_tx_mc_cnt];
+	mac_stats->BroadcastFramesXmittedOK = portstats[spx5_stats_tx_bc_cnt] +
+		portstats[spx5_stats_pmac_tx_bc_cnt];
+	mac_stats->FramesWithExcessiveDeferral =
+		portstats[spx5_stats_tx_xdefer_cnt];
+	mac_stats->MulticastFramesReceivedOK = portstats[spx5_stats_rx_mc_cnt] +
+		portstats[spx5_stats_pmac_rx_mc_cnt];
+	mac_stats->BroadcastFramesReceivedOK = portstats[spx5_stats_rx_bc_cnt] +
+		portstats[spx5_stats_pmac_rx_bc_cnt];
+	mac_stats->InRangeLengthErrors =
+		portstats[spx5_stats_rx_in_range_len_err_cnt] +
+		portstats[spx5_stats_pmac_rx_in_range_len_err_cnt];
+	mac_stats->OutOfRangeLengthField =
+		portstats[spx5_stats_rx_out_of_range_len_err_cnt] +
+		portstats[spx5_stats_pmac_rx_out_of_range_len_err_cnt];
+	mac_stats->FrameTooLongErrors = portstats[spx5_stats_rx_oversize_cnt] +
+		portstats[spx5_stats_pmac_rx_oversize_cnt];
+}
+
+static void sparx5_get_eth_mac_ctrl_stats(struct net_device *ndev,
+					  struct ethtool_eth_ctrl_stats *mac_ctrl_stats)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5 *sparx5 = port->sparx5;
+	int portno = port->portno;
+	void __iomem *inst;
+	u64 *portstats;
+
+	portstats = &sparx5->stats[portno * sparx5->num_stats];
+	if (sparx5_is_baser(port->conf.portmode)) {
+		u32 tinst = sparx5_port_dev_index(portno);
+		u32 dev = sparx5_to_high_dev(portno);
+
+		inst = spx5_inst_get(sparx5, dev, tinst);
+		sparx5_get_dev_mac_ctrl_stats(portstats, inst, tinst);
+	} else {
+		inst = spx5_inst_get(sparx5, TARGET_ASM, 0);
+		sparx5_get_asm_mac_ctrl_stats(portstats, inst, portno);
+	}
+	mac_ctrl_stats->MACControlFramesTransmitted =
+		portstats[spx5_stats_tx_pause_cnt] +
+		portstats[spx5_stats_pmac_tx_pause_cnt];
+	mac_ctrl_stats->MACControlFramesReceived =
+		portstats[spx5_stats_rx_pause_cnt] +
+		portstats[spx5_stats_pmac_rx_pause_cnt];
+	mac_ctrl_stats->UnsupportedOpcodesReceived =
+		portstats[spx5_stats_rx_unsup_opcode_cnt] +
+		portstats[spx5_stats_pmac_rx_unsup_opcode_cnt];
+}
+
+static void sparx5_get_eth_rmon_stats(struct net_device *ndev,
+				      struct ethtool_rmon_stats *rmon_stats,
+				      const struct ethtool_rmon_hist_range **ranges)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5 *sparx5 = port->sparx5;
+	int portno = port->portno;
+	void __iomem *inst;
+	u64 *portstats;
+
+	portstats = &sparx5->stats[portno * sparx5->num_stats];
+	if (sparx5_is_baser(port->conf.portmode)) {
+		u32 tinst = sparx5_port_dev_index(portno);
+		u32 dev = sparx5_to_high_dev(portno);
+
+		inst = spx5_inst_get(sparx5, dev, tinst);
+		sparx5_get_dev_rmon_stats(portstats, inst, tinst);
+	} else {
+		inst = spx5_inst_get(sparx5, TARGET_ASM, 0);
+		sparx5_get_asm_rmon_stats(portstats, inst, portno);
+	}
+	rmon_stats->undersize_pkts = portstats[spx5_stats_rx_undersize_cnt] +
+		portstats[spx5_stats_pmac_rx_undersize_cnt];
+	rmon_stats->oversize_pkts = portstats[spx5_stats_rx_oversize_cnt] +
+		portstats[spx5_stats_pmac_rx_oversize_cnt];
+	rmon_stats->fragments = portstats[spx5_stats_rx_fragments_cnt] +
+		portstats[spx5_stats_pmac_rx_fragments_cnt];
+	rmon_stats->jabbers = portstats[spx5_stats_rx_jabbers_cnt] +
+		portstats[spx5_stats_pmac_rx_jabbers_cnt];
+	rmon_stats->hist[0] = portstats[spx5_stats_rx_size64_cnt] +
+		portstats[spx5_stats_pmac_rx_size64_cnt];
+	rmon_stats->hist[1] = portstats[spx5_stats_rx_size65to127_cnt] +
+		portstats[spx5_stats_pmac_rx_size65to127_cnt];
+	rmon_stats->hist[2] = portstats[spx5_stats_rx_size128to255_cnt] +
+		portstats[spx5_stats_pmac_rx_size128to255_cnt];
+	rmon_stats->hist[3] = portstats[spx5_stats_rx_size256to511_cnt] +
+		portstats[spx5_stats_pmac_rx_size256to511_cnt];
+	rmon_stats->hist[4] = portstats[spx5_stats_rx_size512to1023_cnt] +
+		portstats[spx5_stats_pmac_rx_size512to1023_cnt];
+	rmon_stats->hist[5] = portstats[spx5_stats_rx_size1024to1518_cnt] +
+		portstats[spx5_stats_pmac_rx_size1024to1518_cnt];
+	rmon_stats->hist[6] = portstats[spx5_stats_rx_size1519tomax_cnt] +
+		portstats[spx5_stats_pmac_rx_size1519tomax_cnt];
+	rmon_stats->hist_tx[0] = portstats[spx5_stats_tx_size64_cnt] +
+		portstats[spx5_stats_pmac_tx_size64_cnt];
+	rmon_stats->hist_tx[1] = portstats[spx5_stats_tx_size65to127_cnt] +
+		portstats[spx5_stats_pmac_tx_size65to127_cnt];
+	rmon_stats->hist_tx[2] = portstats[spx5_stats_tx_size128to255_cnt] +
+		portstats[spx5_stats_pmac_tx_size128to255_cnt];
+	rmon_stats->hist_tx[3] = portstats[spx5_stats_tx_size256to511_cnt] +
+		portstats[spx5_stats_pmac_tx_size256to511_cnt];
+	rmon_stats->hist_tx[4] = portstats[spx5_stats_tx_size512to1023_cnt] +
+		portstats[spx5_stats_pmac_tx_size512to1023_cnt];
+	rmon_stats->hist_tx[5] = portstats[spx5_stats_tx_size1024to1518_cnt] +
+		portstats[spx5_stats_pmac_tx_size1024to1518_cnt];
+	rmon_stats->hist_tx[6] = portstats[spx5_stats_tx_size1519tomax_cnt] +
+		portstats[spx5_stats_pmac_tx_size1519tomax_cnt];
+	*ranges = sparx5_rmon_ranges;
+}
+
+static int sparx5_get_sset_count(struct net_device *ndev, int sset)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5  *sparx5 = port->sparx5;
+
+	if (sset != ETH_SS_STATS)
+		return -EOPNOTSUPP;
+	return sparx5->num_ethtool_stats;
+}
+
+static void sparx5_get_sset_strings(struct net_device *ndev, u32 sset, u8 *data)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5  *sparx5 = port->sparx5;
+	int idx;
+
+	if (sset != ETH_SS_STATS)
+		return;
+
+	for (idx = 0; idx < sparx5->num_ethtool_stats; idx++)
+		strncpy(data + idx * ETH_GSTRING_LEN,
+			sparx5->stats_layout[idx], ETH_GSTRING_LEN);
+}
+
+static void sparx5_get_sset_data(struct net_device *ndev,
+				 struct ethtool_stats *stats, u64 *data)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5 *sparx5 = port->sparx5;
+	int portno = port->portno;
+	void __iomem *inst;
+	u64 *portstats;
+	int idx;
+
+	portstats = &sparx5->stats[portno * sparx5->num_stats];
+	if (sparx5_is_baser(port->conf.portmode)) {
+		u32 tinst = sparx5_port_dev_index(portno);
+		u32 dev = sparx5_to_high_dev(portno);
+
+		inst = spx5_inst_get(sparx5, dev, tinst);
+		sparx5_get_dev_misc_stats(portstats, inst, tinst);
+	} else {
+		inst = spx5_inst_get(sparx5, TARGET_ASM, 0);
+		sparx5_get_asm_misc_stats(portstats, inst, portno);
+	}
+	sparx5_get_ana_ac_stats_stats(sparx5, portno);
+	sparx5_get_queue_sys_stats(sparx5, portno);
+	/* Copy port counters to the ethtool buffer */
+	for (idx = spx5_stats_mm_rx_assembly_err_cnt;
+	     idx < spx5_stats_mm_rx_assembly_err_cnt +
+	     sparx5->num_ethtool_stats; idx++)
+		*data++ = portstats[idx];
+}
+
+void sparx5_get_stats64(struct net_device *ndev,
+			struct rtnl_link_stats64 *stats)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+	struct sparx5 *sparx5 = port->sparx5;
+	u64 *portstats;
+	int idx;
+
+	if (!sparx5->stats)
+		return; /* Not initialized yet */
+
+	portstats = &sparx5->stats[port->portno * sparx5->num_stats];
+
+	stats->rx_packets = portstats[spx5_stats_rx_uc_cnt] +
+		portstats[spx5_stats_pmac_rx_uc_cnt] +
+		portstats[spx5_stats_rx_mc_cnt] +
+		portstats[spx5_stats_rx_bc_cnt];
+	stats->tx_packets = portstats[spx5_stats_tx_uc_cnt] +
+		portstats[spx5_stats_pmac_tx_uc_cnt] +
+		portstats[spx5_stats_tx_mc_cnt] +
+		portstats[spx5_stats_tx_bc_cnt];
+	stats->rx_bytes = portstats[spx5_stats_rx_ok_bytes_cnt] +
+		portstats[spx5_stats_pmac_rx_ok_bytes_cnt];
+	stats->tx_bytes = portstats[spx5_stats_tx_ok_bytes_cnt] +
+		portstats[spx5_stats_pmac_tx_ok_bytes_cnt];
+	stats->rx_errors = portstats[spx5_stats_rx_in_range_len_err_cnt] +
+		portstats[spx5_stats_pmac_rx_in_range_len_err_cnt] +
+		portstats[spx5_stats_rx_out_of_range_len_err_cnt] +
+		portstats[spx5_stats_pmac_rx_out_of_range_len_err_cnt] +
+		portstats[spx5_stats_rx_oversize_cnt] +
+		portstats[spx5_stats_pmac_rx_oversize_cnt] +
+		portstats[spx5_stats_rx_crc_err_cnt] +
+		portstats[spx5_stats_pmac_rx_crc_err_cnt] +
+		portstats[spx5_stats_rx_alignment_lost_cnt] +
+		portstats[spx5_stats_pmac_rx_alignment_lost_cnt];
+	stats->tx_errors = portstats[spx5_stats_tx_xcoll_cnt] +
+		portstats[spx5_stats_tx_csense_cnt] +
+		portstats[spx5_stats_tx_late_coll_cnt];
+	stats->multicast = portstats[spx5_stats_rx_mc_cnt] +
+		portstats[spx5_stats_pmac_rx_mc_cnt];
+	stats->collisions = portstats[spx5_stats_tx_late_coll_cnt] +
+		portstats[spx5_stats_tx_xcoll_cnt] +
+		portstats[spx5_stats_tx_backoff1_cnt];
+	stats->rx_length_errors = portstats[spx5_stats_rx_in_range_len_err_cnt] +
+		portstats[spx5_stats_pmac_rx_in_range_len_err_cnt] +
+		portstats[spx5_stats_rx_out_of_range_len_err_cnt] +
+		portstats[spx5_stats_pmac_rx_out_of_range_len_err_cnt] +
+		portstats[spx5_stats_rx_oversize_cnt] +
+		portstats[spx5_stats_pmac_rx_oversize_cnt];
+	stats->rx_crc_errors = portstats[spx5_stats_rx_crc_err_cnt] +
+		portstats[spx5_stats_pmac_rx_crc_err_cnt];
+	stats->rx_frame_errors = portstats[spx5_stats_rx_alignment_lost_cnt] +
+		portstats[spx5_stats_pmac_rx_alignment_lost_cnt];
+	stats->tx_aborted_errors = portstats[spx5_stats_tx_xcoll_cnt];
+	stats->tx_carrier_errors = portstats[spx5_stats_tx_csense_cnt];
+	stats->tx_window_errors = portstats[spx5_stats_tx_late_coll_cnt];
+	stats->rx_dropped = portstats[spx5_stats_ana_ac_port_stat_lsb_cnt];
+	for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++stats)
+		stats->rx_dropped += portstats[spx5_stats_green_p0_rx_port_drop
+					       + idx];
+	stats->tx_dropped = portstats[spx5_stats_tx_local_drop];
+}
+
+static void sparx5_update_port_stats(struct sparx5 *sparx5, int portno)
+{
+	if (sparx5_is_baser(sparx5->ports[portno]->conf.portmode))
+		sparx5_get_device_stats(sparx5, portno);
+	else
+		sparx5_get_asm_stats(sparx5, portno);
+	sparx5_get_ana_ac_stats_stats(sparx5, portno);
+	sparx5_get_queue_sys_stats(sparx5, portno);
+}
+
+static void sparx5_update_stats(struct sparx5 *sparx5)
+{
+	int idx;
+
+	for (idx = 0; idx < SPX5_PORTS; idx++)
+		if (sparx5->ports[idx])
+			sparx5_update_port_stats(sparx5, idx);
+}
+
+static void sparx5_check_stats_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct sparx5 *sparx5 = container_of(dwork,
+					     struct sparx5,
+					     stats_work);
+
+	sparx5_update_stats(sparx5);
+
+	queue_delayed_work(sparx5->stats_queue, &sparx5->stats_work,
+			   SPX5_STATS_CHECK_DELAY);
+}
+
+static int sparx5_get_link_settings(struct net_device *ndev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_get(port->phylink, cmd);
+}
+
+static int sparx5_set_link_settings(struct net_device *ndev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	struct sparx5_port *port = netdev_priv(ndev);
+
+	return phylink_ethtool_ksettings_set(port->phylink, cmd);
+}
+
+static void sparx5_config_stats(struct sparx5 *sparx5)
+{
+	/* Enable global events for port policer drops */
+	spx5_rmw(ANA_AC_PORT_SGE_CFG_MASK_SET(0xf0f0),
+		 ANA_AC_PORT_SGE_CFG_MASK,
+		 sparx5,
+		 ANA_AC_PORT_SGE_CFG(SPX5_PORT_POLICER_DROPS));
+}
+
+static void sparx5_config_port_stats(struct sparx5 *sparx5, int portno)
+{
+	/* Clear Queue System counters */
+	spx5_wr(XQS_STAT_CFG_STAT_VIEW_SET(portno) |
+		XQS_STAT_CFG_STAT_CLEAR_SHOT_SET(3), sparx5,
+		XQS_STAT_CFG);
+
+	/* Use counter for port policer drop count */
+	spx5_rmw(ANA_AC_PORT_STAT_CFG_CFG_CNT_FRM_TYPE_SET(1) |
+		 ANA_AC_PORT_STAT_CFG_CFG_CNT_BYTE_SET(0) |
+		 ANA_AC_PORT_STAT_CFG_CFG_PRIO_MASK_SET(0xff),
+		 ANA_AC_PORT_STAT_CFG_CFG_CNT_FRM_TYPE |
+		 ANA_AC_PORT_STAT_CFG_CFG_CNT_BYTE |
+		 ANA_AC_PORT_STAT_CFG_CFG_PRIO_MASK,
+		 sparx5, ANA_AC_PORT_STAT_CFG(portno, SPX5_PORT_POLICER_DROPS));
+}
+
+const struct ethtool_ops sparx5_ethtool_ops = {
+	.get_sset_count         = sparx5_get_sset_count,
+	.get_strings            = sparx5_get_sset_strings,
+	.get_ethtool_stats      = sparx5_get_sset_data,
+	.get_link_ksettings	= sparx5_get_link_settings,
+	.set_link_ksettings	= sparx5_set_link_settings,
+	.get_link               = ethtool_op_get_link,
+	.get_eth_phy_stats      = sparx5_get_eth_phy_stats,
+	.get_eth_mac_stats      = sparx5_get_eth_mac_stats,
+	.get_eth_ctrl_stats     = sparx5_get_eth_mac_ctrl_stats,
+	.get_rmon_stats         = sparx5_get_eth_rmon_stats,
+};
+
+int sparx_stats_init(struct sparx5 *sparx5)
+{
+	char queue_name[32];
+	int portno;
+
+	sparx5->stats_layout = sparx5_stats_layout;
+	sparx5->num_stats = spx5_stats_count;
+	sparx5->num_ethtool_stats = ARRAY_SIZE(sparx5_stats_layout);
+	sparx5->stats = devm_kcalloc(sparx5->dev,
+				     SPX5_PORTS_ALL * sparx5->num_stats,
+				     sizeof(u64), GFP_KERNEL);
+	if (!sparx5->stats)
+		return -ENOMEM;
+
+	mutex_init(&sparx5->queue_stats_lock);
+	sparx5_config_stats(sparx5);
+	for (portno = 0; portno < SPX5_PORTS; portno++)
+		if (sparx5->ports[portno])
+			sparx5_config_port_stats(sparx5, portno);
+
+	snprintf(queue_name, sizeof(queue_name), "%s-stats",
+		 dev_name(sparx5->dev));
+	sparx5->stats_queue = create_singlethread_workqueue(queue_name);
+	INIT_DELAYED_WORK(&sparx5->stats_work, sparx5_check_stats_work);
+	queue_delayed_work(sparx5->stats_queue, &sparx5->stats_work,
+			   SPX5_STATS_CHECK_DELAY);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index ffd761b8f975..a325f7c05a07 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -614,6 +614,10 @@ static int sparx5_start(struct sparx5 *sparx5)
 	if (err)
 		return err;
 
+	/* Init stats */
+	err = sparx_stats_init(sparx5);
+	if (err)
+		return err;
 
 	/* Init mact_sw struct */
 	mutex_init(&sparx5->mact_lock);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 4d7a024fedc0..4d5f44c3a421 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -135,6 +135,15 @@ struct sparx5 {
 	/* port structures are in net device */
 	struct sparx5_port *ports[SPX5_PORTS];
 	enum sparx5_core_clockfreq coreclock;
+	/* Statistics */
+	u32 num_stats;
+	u32 num_ethtool_stats;
+	const char * const *stats_layout;
+	u64 *stats;
+	/* Workqueue for reading stats */
+	struct mutex queue_stats_lock;
+	struct delayed_work stats_work;
+	struct workqueue_struct *stats_queue;
 	/* Notifiers */
 	struct notifier_block netdevice_nb;
 	struct notifier_block switchdev_nb;
@@ -203,6 +212,10 @@ void sparx5_vlan_port_apply(struct sparx5 *sparx5, struct sparx5_port *port);
 int sparx5_config_auto_calendar(struct sparx5 *sparx5);
 int sparx5_config_dsm_calendar(struct sparx5 *sparx5);
 
+/* sparx5_ethtool.c */
+void sparx5_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats);
+int sparx_stats_init(struct sparx5 *sparx5);
+
 /* sparx5_netdev.c */
 bool sparx5_netdevice_check(const struct net_device *dev);
 struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno);
@@ -233,6 +246,7 @@ static inline bool sparx5_is_baser(phy_interface_t interface)
 
 extern const struct phylink_mac_ops sparx5_phylink_mac_ops;
 extern const struct phylink_pcs_ops sparx5_phylink_pcs_ops;
+extern const struct ethtool_ops sparx5_ethtool_ops;
 
 /* Calculate raw offset */
 static inline __pure int spx5_offset(int id, int tinst, int tcnt,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
index 569a7f7ef0bb..9d485a9d1f1f 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -180,6 +180,7 @@ static const struct net_device_ops sparx5_port_netdev_ops = {
 	.ndo_get_phys_port_name = sparx5_port_get_phys_port_name,
 	.ndo_set_mac_address    = sparx5_set_mac_address,
 	.ndo_validate_addr      = eth_validate_addr,
+	.ndo_get_stats64        = sparx5_get_stats64,
 	.ndo_get_port_parent_id = sparx5_get_port_parent_id,
 };
 
@@ -206,6 +207,7 @@ struct net_device *sparx5_create_netdev(struct sparx5 *sparx5, u32 portno)
 	sparx5_set_port_ifh(spx5_port->ifh, portno);
 
 	ndev->netdev_ops = &sparx5_port_netdev_ops;
+	ndev->ethtool_ops = &sparx5_ethtool_ops;
 
 	val = ether_addr_to_u64(sparx5->base_mac) + portno + 1;
 	u64_to_ether_addr(val, ndev->dev_addr);

From d0f482bb06f9447d44d2cae0386a0bd768c3cc16 Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Thu, 24 Jun 2021 09:07:58 +0200
Subject: [PATCH 2836/3292] arm64: dts: sparx5: Add the Sparx5 switch node

This provides the configuration for the currently available evaluation
boards PCB134 and PCB135.

The series depends on the following series currently on its way
into the kernel:

- Sparx5 Reset Driver
  Link: https://lore.kernel.org/r/20210416084054.2922327-1-steen.hegelund@microchip.com/

Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Signed-off-by: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/microchip/sparx5.dtsi     |  94 ++-
 .../dts/microchip/sparx5_pcb134_board.dtsi    | 481 ++++++++++++--
 .../dts/microchip/sparx5_pcb135_board.dtsi    | 621 +++++++++++++++++-
 3 files changed, 1112 insertions(+), 84 deletions(-)

diff --git a/arch/arm64/boot/dts/microchip/sparx5.dtsi b/arch/arm64/boot/dts/microchip/sparx5.dtsi
index d64621d1213b..ad07fff40544 100644
--- a/arch/arm64/boot/dts/microchip/sparx5.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5.dtsi
@@ -135,9 +135,12 @@
 			};
 		};
 
-		reset@611010008 {
-			compatible = "microchip,sparx5-chip-reset";
+		reset: reset-controller@611010008 {
+			compatible = "microchip,sparx5-switch-reset";
 			reg = <0x6 0x11010008 0x4>;
+			reg-names = "gcb";
+			#reset-cells = <1>;
+			cpu-syscon = <&cpu_ctrl>;
 		};
 
 		uart0: serial@600100000 {
@@ -275,6 +278,21 @@
 					"GPIO_46", "GPIO_47";
 				function = "emmc";
 			};
+
+			miim1_pins: miim1-pins {
+				pins = "GPIO_56", "GPIO_57";
+				function = "miim";
+			};
+
+			miim2_pins: miim2-pins {
+				pins = "GPIO_58", "GPIO_59";
+				function = "miim";
+			};
+
+			miim3_pins: miim3-pins {
+				pins = "GPIO_52", "GPIO_53";
+				function = "miim";
+			};
 		};
 
 		sgpio0: gpio@61101036c {
@@ -285,6 +303,8 @@
 			clocks = <&sys_clk>;
 			pinctrl-0 = <&sgpio0_pins>;
 			pinctrl-names = "default";
+			resets = <&reset 0>;
+			reset-names = "switch";
 			reg = <0x6 0x1101036c 0x100>;
 			sgpio_in0: gpio@0 {
 				compatible = "microchip,sparx5-sgpio-bank";
@@ -292,6 +312,9 @@
 				gpio-controller;
 				#gpio-cells = <3>;
 				ngpios = <96>;
+				interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <3>;
 			};
 			sgpio_out0: gpio@1 {
 				compatible = "microchip,sparx5-sgpio-bank";
@@ -310,6 +333,8 @@
 			clocks = <&sys_clk>;
 			pinctrl-0 = <&sgpio1_pins>;
 			pinctrl-names = "default";
+			resets = <&reset 0>;
+			reset-names = "switch";
 			reg = <0x6 0x11010484 0x100>;
 			sgpio_in1: gpio@0 {
 				compatible = "microchip,sparx5-sgpio-bank";
@@ -317,6 +342,9 @@
 				gpio-controller;
 				#gpio-cells = <3>;
 				ngpios = <96>;
+				interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <3>;
 			};
 			sgpio_out1: gpio@1 {
 				compatible = "microchip,sparx5-sgpio-bank";
@@ -335,6 +363,8 @@
 			clocks = <&sys_clk>;
 			pinctrl-0 = <&sgpio2_pins>;
 			pinctrl-names = "default";
+			resets = <&reset 0>;
+			reset-names = "switch";
 			reg = <0x6 0x1101059c 0x100>;
 			sgpio_in2: gpio@0 {
 				reg = <0>;
@@ -342,6 +372,9 @@
 				gpio-controller;
 				#gpio-cells = <3>;
 				ngpios = <96>;
+				interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-controller;
+				#interrupt-cells = <3>;
 			};
 			sgpio_out2: gpio@1 {
 				compatible = "microchip,sparx5-sgpio-bank";
@@ -386,5 +419,62 @@
 			#thermal-sensor-cells = <0>;
 			clocks = <&ahb_clk>;
 		};
+
+		mdio0: mdio@6110102b0 {
+			compatible = "mscc,ocelot-miim";
+			status = "disabled";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x6 0x110102b0 0x24>;
+		};
+
+		mdio1: mdio@6110102d4 {
+			compatible = "mscc,ocelot-miim";
+			status = "disabled";
+			pinctrl-0 = <&miim1_pins>;
+			pinctrl-names = "default";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x6 0x110102d4 0x24>;
+		};
+
+		mdio2: mdio@6110102f8 {
+			compatible = "mscc,ocelot-miim";
+			status = "disabled";
+			pinctrl-0 = <&miim2_pins>;
+			pinctrl-names = "default";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x6 0x110102d4 0x24>;
+		};
+
+		mdio3: mdio@61101031c {
+			compatible = "mscc,ocelot-miim";
+			status = "disabled";
+			pinctrl-0 = <&miim3_pins>;
+			pinctrl-names = "default";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x6 0x1101031c 0x24>;
+		};
+
+		serdes: serdes@10808000 {
+			compatible = "microchip,sparx5-serdes";
+			#phy-cells = <1>;
+			clocks = <&sys_clk>;
+			reg = <0x6 0x10808000 0x5d0000>;
+		};
+
+		switch: switch@0x600000000 {
+			compatible = "microchip,sparx5-switch";
+			reg =	<0x6 0 0x401000>,
+				<0x6 0x10004000 0x7fc000>,
+				<0x6 0x11010000 0xaf0000>;
+			reg-names = "cpu", "dev", "gcb";
+			interrupt-names = "xtr";
+			interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+			resets = <&reset 0>;
+			reset-names = "switch";
+		};
 	};
 };
diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
index f0c915160990..33faf1f3264f 100644
--- a/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
@@ -7,30 +7,6 @@
 #include "sparx5_pcb_common.dtsi"
 
 /{
-	aliases {
-	    i2c0   = &i2c0;
-	    i2c100 = &i2c100;
-	    i2c101 = &i2c101;
-	    i2c102 = &i2c102;
-	    i2c103 = &i2c103;
-	    i2c104 = &i2c104;
-	    i2c105 = &i2c105;
-	    i2c106 = &i2c106;
-	    i2c107 = &i2c107;
-	    i2c108 = &i2c108;
-	    i2c109 = &i2c109;
-	    i2c110 = &i2c110;
-	    i2c111 = &i2c111;
-	    i2c112 = &i2c112;
-	    i2c113 = &i2c113;
-	    i2c114 = &i2c114;
-	    i2c115 = &i2c115;
-	    i2c116 = &i2c116;
-	    i2c117 = &i2c117;
-	    i2c118 = &i2c118;
-	    i2c119 = &i2c119;
-	};
-
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpio 37 GPIO_ACTIVE_LOW>;
@@ -298,17 +274,10 @@
 
 &spi0 {
 	status = "okay";
-	spi@0 {
-		compatible = "spi-mux";
-		mux-controls = <&mux>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0>;	/* CS0 */
-		spi-flash@9 {
-			compatible = "jedec,spi-nor";
-			spi-max-frequency = <8000000>;
-			reg = <0x9>;	/* SPI */
-		};
+	spi-flash@0 {
+		compatible = "jedec,spi-nor";
+		spi-max-frequency = <8000000>;
+		reg = <0>;
 	};
 };
 
@@ -328,6 +297,33 @@
 	};
 };
 
+&sgpio0 {
+	status = "okay";
+	microchip,sgpio-port-ranges = <8 15>;
+	gpio@0 {
+		ngpios = <64>;
+	};
+	gpio@1 {
+		ngpios = <64>;
+	};
+};
+
+&sgpio1 {
+	status = "okay";
+	microchip,sgpio-port-ranges = <24 31>;
+	gpio@0 {
+		ngpios = <64>;
+	};
+	gpio@1 {
+		ngpios = <64>;
+	};
+};
+
+&sgpio2 {
+	status = "okay";
+	microchip,sgpio-port-ranges = <0 0>, <11 31>;
+};
+
 &gpio {
 	i2cmux_pins_i: i2cmux-pins-i {
 	       pins = "GPIO_16", "GPIO_17", "GPIO_18", "GPIO_19",
@@ -415,9 +411,9 @@
 
 &i2c0_imux {
 	pinctrl-names =
-		"i2c100", "i2c101", "i2c102", "i2c103",
-		"i2c104", "i2c105", "i2c106", "i2c107",
-		"i2c108", "i2c109", "i2c110", "i2c111", "idle";
+		"i2c_sfp1", "i2c_sfp2", "i2c_sfp3", "i2c_sfp4",
+		"i2c_sfp5", "i2c_sfp6", "i2c_sfp7", "i2c_sfp8",
+		"i2c_sfp9", "i2c_sfp10", "i2c_sfp11", "i2c_sfp12", "idle";
 	pinctrl-0 = <&i2cmux_0>;
 	pinctrl-1 = <&i2cmux_1>;
 	pinctrl-2 = <&i2cmux_2>;
@@ -431,62 +427,62 @@
 	pinctrl-10 = <&i2cmux_10>;
 	pinctrl-11 = <&i2cmux_11>;
 	pinctrl-12 = <&i2cmux_pins_i>;
-	i2c100: i2c_sfp1 {
+	i2c_sfp1: i2c_sfp1 {
 		reg = <0x0>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c101: i2c_sfp2 {
+	i2c_sfp2: i2c_sfp2 {
 		reg = <0x1>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c102: i2c_sfp3 {
+	i2c_sfp3: i2c_sfp3 {
 		reg = <0x2>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c103: i2c_sfp4 {
+	i2c_sfp4: i2c_sfp4 {
 		reg = <0x3>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c104: i2c_sfp5 {
+	i2c_sfp5: i2c_sfp5 {
 		reg = <0x4>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c105: i2c_sfp6 {
+	i2c_sfp6: i2c_sfp6 {
 		reg = <0x5>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c106: i2c_sfp7 {
+	i2c_sfp7: i2c_sfp7 {
 		reg = <0x6>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c107: i2c_sfp8 {
+	i2c_sfp8: i2c_sfp8 {
 		reg = <0x7>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c108: i2c_sfp9 {
+	i2c_sfp9: i2c_sfp9 {
 		reg = <0x8>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c109: i2c_sfp10 {
+	i2c_sfp10: i2c_sfp10 {
 		reg = <0x9>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c110: i2c_sfp11 {
+	i2c_sfp11: i2c_sfp11 {
 		reg = <0xa>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c111: i2c_sfp12 {
+	i2c_sfp12: i2c_sfp12 {
 		reg = <0xb>;
 		#address-cells = <1>;
 		#size-cells = <0>;
@@ -499,44 +495,413 @@
 		     &gpio 61 GPIO_ACTIVE_HIGH
 		     &gpio 54 GPIO_ACTIVE_HIGH>;
 	idle-state = <0x8>;
-	i2c112: i2c_sfp13 {
+	i2c_sfp13: i2c_sfp13 {
 		reg = <0x0>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c113: i2c_sfp14 {
+	i2c_sfp14: i2c_sfp14 {
 		reg = <0x1>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c114: i2c_sfp15 {
+	i2c_sfp15: i2c_sfp15 {
 		reg = <0x2>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c115: i2c_sfp16 {
+	i2c_sfp16: i2c_sfp16 {
 		reg = <0x3>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c116: i2c_sfp17 {
+	i2c_sfp17: i2c_sfp17 {
 		reg = <0x4>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c117: i2c_sfp18 {
+	i2c_sfp18: i2c_sfp18 {
 		reg = <0x5>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c118: i2c_sfp19 {
+	i2c_sfp19: i2c_sfp19 {
 		reg = <0x6>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c119: i2c_sfp20 {
+	i2c_sfp20: i2c_sfp20 {
 		reg = <0x7>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
 };
+
+&mdio3 {
+	status = "ok";
+	phy64: ethernet-phy@64 {
+		reg = <28>;
+	};
+};
+
+&axi {
+	sfp_eth12: sfp-eth12 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp1>;
+		tx-disable-gpios = <&sgpio_out2 11 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 11 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 11 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 12 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth13: sfp-eth13 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp2>;
+		tx-disable-gpios = <&sgpio_out2 12 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 12 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 12 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 13 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth14: sfp-eth14 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp3>;
+		tx-disable-gpios = <&sgpio_out2 13 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 13 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 13 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 14 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth15: sfp-eth15 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp4>;
+		tx-disable-gpios = <&sgpio_out2 14 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 14 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 14 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 15 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth48: sfp-eth48 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp5>;
+		tx-disable-gpios = <&sgpio_out2 15 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 15 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 15 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 16 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth49: sfp-eth49 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp6>;
+		tx-disable-gpios = <&sgpio_out2 16 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 16 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 16 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 17 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth50: sfp-eth50 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp7>;
+		tx-disable-gpios = <&sgpio_out2 17 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 17 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 17 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 18 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth51: sfp-eth51 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp8>;
+		tx-disable-gpios = <&sgpio_out2 18 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 18 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 18 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 19 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth52: sfp-eth52 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp9>;
+		tx-disable-gpios = <&sgpio_out2 19 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 19 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 19 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 20 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth53: sfp-eth53 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp10>;
+		tx-disable-gpios = <&sgpio_out2 20 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 20 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 20 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 21 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth54: sfp-eth54 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp11>;
+		tx-disable-gpios = <&sgpio_out2 21 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 21 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 21 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 22 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth55: sfp-eth55 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp12>;
+		tx-disable-gpios = <&sgpio_out2 22 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 22 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 22 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 23 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth56: sfp-eth56 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp13>;
+		tx-disable-gpios = <&sgpio_out2 23 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 23 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 23 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 24 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth57: sfp-eth57 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp14>;
+		tx-disable-gpios = <&sgpio_out2 24 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 24 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 24 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 25 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth58: sfp-eth58 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp15>;
+		tx-disable-gpios = <&sgpio_out2 25 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 25 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 25 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 26 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth59: sfp-eth59 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp16>;
+		tx-disable-gpios = <&sgpio_out2 26 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 26 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 26 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 27 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth60: sfp-eth60 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp17>;
+		tx-disable-gpios = <&sgpio_out2 27 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 27 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 27 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 28 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth61: sfp-eth61 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp18>;
+		tx-disable-gpios = <&sgpio_out2 28 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 28 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 28 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 29 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth62: sfp-eth62 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp19>;
+		tx-disable-gpios = <&sgpio_out2 29 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 29 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 29 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 30 0 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth63: sfp-eth63 {
+		compatible       = "sff,sfp";
+		i2c-bus          = <&i2c_sfp20>;
+		tx-disable-gpios = <&sgpio_out2 30 1 GPIO_ACTIVE_LOW>;
+		los-gpios        = <&sgpio_in2 30 1 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios   = <&sgpio_in2 30 2 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios   = <&sgpio_in2 31 0 GPIO_ACTIVE_HIGH>;
+	};
+};
+
+&switch {
+	ethernet-ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		/* 10G SFPs */
+		port12: port@12 {
+			reg = <12>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 13>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth12>;
+			microchip,sd-sgpio = <301>;
+			managed = "in-band-status";
+		};
+		port13: port@13 {
+			reg = <13>;
+			/* Example: CU SFP, 1G speed */
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 14>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth13>;
+			microchip,sd-sgpio = <305>;
+			managed = "in-band-status";
+		};
+		port14: port@14 {
+			reg = <14>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 15>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth14>;
+			microchip,sd-sgpio = <309>;
+			managed = "in-band-status";
+		};
+		port15: port@15 {
+			reg = <15>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 16>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth15>;
+			microchip,sd-sgpio = <313>;
+			managed = "in-band-status";
+		};
+		port48: port@48 {
+			reg = <48>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 17>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth48>;
+			microchip,sd-sgpio = <317>;
+			managed = "in-band-status";
+		};
+		port49: port@49 {
+			reg = <49>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 18>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth49>;
+			microchip,sd-sgpio = <321>;
+			managed = "in-band-status";
+		};
+		port50: port@50 {
+			reg = <50>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 19>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth50>;
+			microchip,sd-sgpio = <325>;
+			managed = "in-band-status";
+		};
+		port51: port@51 {
+			reg = <51>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 20>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth51>;
+			microchip,sd-sgpio = <329>;
+			managed = "in-band-status";
+		};
+		port52: port@52 {
+			reg = <52>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 21>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth52>;
+			microchip,sd-sgpio = <333>;
+			managed = "in-band-status";
+		};
+		port53: port@53 {
+			reg = <53>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 22>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth53>;
+			microchip,sd-sgpio = <337>;
+			managed = "in-band-status";
+		};
+		port54: port@54 {
+			reg = <54>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 23>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth54>;
+			microchip,sd-sgpio = <341>;
+			managed = "in-band-status";
+		};
+		port55: port@55 {
+			reg = <55>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 24>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth55>;
+			microchip,sd-sgpio = <345>;
+			managed = "in-band-status";
+		};
+		/* 25G SFPs */
+		port56: port@56 {
+			reg = <56>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 25>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth56>;
+			microchip,sd-sgpio = <349>;
+			managed = "in-band-status";
+		};
+		port57: port@57 {
+			reg = <57>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 26>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth57>;
+			microchip,sd-sgpio = <353>;
+			managed = "in-band-status";
+		};
+		port58: port@58 {
+			reg = <58>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 27>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth58>;
+			microchip,sd-sgpio = <357>;
+			managed = "in-band-status";
+		};
+		port59: port@59 {
+			reg = <59>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 28>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth59>;
+			microchip,sd-sgpio = <361>;
+			managed = "in-band-status";
+		};
+		port60: port@60 {
+			reg = <60>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 29>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth60>;
+			microchip,sd-sgpio = <365>;
+			managed = "in-band-status";
+		};
+		port61: port@61 {
+			reg = <61>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 30>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth61>;
+			microchip,sd-sgpio = <369>;
+			managed = "in-band-status";
+		};
+		port62: port@62 {
+			reg = <62>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 31>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth62>;
+			microchip,sd-sgpio = <373>;
+			managed = "in-band-status";
+		};
+		port63: port@63 {
+			reg = <63>;
+			microchip,bandwidth = <10000>;
+			phys = <&serdes 32>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth63>;
+			microchip,sd-sgpio = <377>;
+			managed = "in-band-status";
+		};
+		/* Finally the Management interface */
+		port64: port@64 {
+			reg = <64>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 0>;
+			phy-handle = <&phy64>;
+			phy-mode = "sgmii";
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
index e28c6dd16377..ef96e6d8c6b3 100644
--- a/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
@@ -7,14 +7,6 @@
 #include "sparx5_pcb_common.dtsi"
 
 /{
-	aliases {
-	    i2c0   = &i2c0;
-	    i2c152 = &i2c152;
-	    i2c153 = &i2c153;
-	    i2c154 = &i2c154;
-	    i2c155 = &i2c155;
-	};
-
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpio 37 GPIO_ACTIVE_LOW>;
@@ -97,17 +89,10 @@
 
 &spi0 {
 	status = "okay";
-	spi@0 {
-		compatible = "spi-mux";
-		mux-controls = <&mux>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0>; /* CS0 */
-		spi-flash@9 {
-			compatible = "jedec,spi-nor";
-			spi-max-frequency = <8000000>;
-			reg = <0x9>; /* SPI */
-		};
+	spi-flash@0 {
+		compatible = "jedec,spi-nor";
+		spi-max-frequency = <8000000>;
+		reg = <0>;
 	};
 };
 
@@ -138,6 +123,11 @@
 	};
 };
 
+&sgpio2 {
+	status = "okay";
+	microchip,sgpio-port-ranges = <0 0>, <16 18>, <28 31>;
+};
+
 &axi {
 	i2c0_imux: i2c0-imux@0 {
 		compatible = "i2c-mux-pinctrl";
@@ -149,31 +139,614 @@
 
 &i2c0_imux {
 	pinctrl-names =
-		"i2c152", "i2c153", "i2c154", "i2c155",
+		"i2c_sfp1", "i2c_sfp2", "i2c_sfp3", "i2c_sfp4",
 		"idle";
 	pinctrl-0 = <&i2cmux_s29>;
 	pinctrl-1 = <&i2cmux_s30>;
 	pinctrl-2 = <&i2cmux_s31>;
 	pinctrl-3 = <&i2cmux_s32>;
 	pinctrl-4 = <&i2cmux_pins_i>;
-	i2c152: i2c_sfp1 {
+	i2c_sfp1: i2c_sfp1 {
 		reg = <0x0>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c153: i2c_sfp2 {
+	i2c_sfp2: i2c_sfp2 {
 		reg = <0x1>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c154: i2c_sfp3 {
+	i2c_sfp3: i2c_sfp3 {
 		reg = <0x2>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
-	i2c155: i2c_sfp4 {
+	i2c_sfp4: i2c_sfp4 {
 		reg = <0x3>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 	};
 };
+
+&axi {
+	sfp_eth60: sfp-eth60 {
+		compatible	   = "sff,sfp";
+		i2c-bus            = <&i2c_sfp1>;
+		tx-disable-gpios   = <&sgpio_out2 28 0 GPIO_ACTIVE_LOW>;
+		rate-select0-gpios = <&sgpio_out2 28 1 GPIO_ACTIVE_HIGH>;
+		los-gpios          = <&sgpio_in2 28 0 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios     = <&sgpio_in2 28 1 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios     = <&sgpio_in2 28 2 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth61: sfp-eth61 {
+		compatible         = "sff,sfp";
+		i2c-bus            = <&i2c_sfp2>;
+		tx-disable-gpios   = <&sgpio_out2 29 0 GPIO_ACTIVE_LOW>;
+		rate-select0-gpios = <&sgpio_out2 29 1 GPIO_ACTIVE_HIGH>;
+		los-gpios          = <&sgpio_in2 29 0 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios     = <&sgpio_in2 29 1 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios     = <&sgpio_in2 29 2 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth62: sfp-eth62 {
+		compatible         = "sff,sfp";
+		i2c-bus            = <&i2c_sfp3>;
+		tx-disable-gpios   = <&sgpio_out2 30 0 GPIO_ACTIVE_LOW>;
+		rate-select0-gpios = <&sgpio_out2 30 1 GPIO_ACTIVE_HIGH>;
+		los-gpios          = <&sgpio_in2 30 0 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios     = <&sgpio_in2 30 1 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios     = <&sgpio_in2 30 2 GPIO_ACTIVE_HIGH>;
+	};
+	sfp_eth63: sfp-eth63 {
+		compatible         = "sff,sfp";
+		i2c-bus            = <&i2c_sfp4>;
+		tx-disable-gpios   = <&sgpio_out2 31 0 GPIO_ACTIVE_LOW>;
+		rate-select0-gpios = <&sgpio_out2 31 1 GPIO_ACTIVE_HIGH>;
+		los-gpios          = <&sgpio_in2 31 0 GPIO_ACTIVE_HIGH>;
+		mod-def0-gpios     = <&sgpio_in2 31 1 GPIO_ACTIVE_LOW>;
+		tx-fault-gpios     = <&sgpio_in2 31 2 GPIO_ACTIVE_HIGH>;
+	};
+};
+
+&mdio0 {
+	status = "ok";
+	phy0: ethernet-phy@0 {
+		reg = <0>;
+	};
+	phy1: ethernet-phy@1 {
+		reg = <1>;
+	};
+	phy2: ethernet-phy@2 {
+		reg = <2>;
+	};
+	phy3: ethernet-phy@3 {
+		reg = <3>;
+	};
+	phy4: ethernet-phy@4 {
+		reg = <4>;
+	};
+	phy5: ethernet-phy@5 {
+		reg = <5>;
+	};
+	phy6: ethernet-phy@6 {
+		reg = <6>;
+	};
+	phy7: ethernet-phy@7 {
+		reg = <7>;
+	};
+	phy8: ethernet-phy@8 {
+		reg = <8>;
+	};
+	phy9: ethernet-phy@9 {
+		reg = <9>;
+	};
+	phy10: ethernet-phy@10 {
+		reg = <10>;
+	};
+	phy11: ethernet-phy@11 {
+		reg = <11>;
+	};
+	phy12: ethernet-phy@12 {
+		reg = <12>;
+	};
+	phy13: ethernet-phy@13 {
+		reg = <13>;
+	};
+	phy14: ethernet-phy@14 {
+		reg = <14>;
+	};
+	phy15: ethernet-phy@15 {
+		reg = <15>;
+	};
+	phy16: ethernet-phy@16 {
+		reg = <16>;
+	};
+	phy17: ethernet-phy@17 {
+		reg = <17>;
+	};
+	phy18: ethernet-phy@18 {
+		reg = <18>;
+	};
+	phy19: ethernet-phy@19 {
+		reg = <19>;
+	};
+	phy20: ethernet-phy@20 {
+		reg = <20>;
+	};
+	phy21: ethernet-phy@21 {
+		reg = <21>;
+	};
+	phy22: ethernet-phy@22 {
+		reg = <22>;
+	};
+	phy23: ethernet-phy@23 {
+		reg = <23>;
+	};
+};
+
+&mdio1 {
+	status = "ok";
+	phy24: ethernet-phy@24 {
+		reg = <0>;
+	};
+	phy25: ethernet-phy@25 {
+		reg = <1>;
+	};
+	phy26: ethernet-phy@26 {
+		reg = <2>;
+	};
+	phy27: ethernet-phy@27 {
+		reg = <3>;
+	};
+	phy28: ethernet-phy@28 {
+		reg = <4>;
+	};
+	phy29: ethernet-phy@29 {
+		reg = <5>;
+	};
+	phy30: ethernet-phy@30 {
+		reg = <6>;
+	};
+	phy31: ethernet-phy@31 {
+		reg = <7>;
+	};
+	phy32: ethernet-phy@32 {
+		reg = <8>;
+	};
+	phy33: ethernet-phy@33 {
+		reg = <9>;
+	};
+	phy34: ethernet-phy@34 {
+		reg = <10>;
+	};
+	phy35: ethernet-phy@35 {
+		reg = <11>;
+	};
+	phy36: ethernet-phy@36 {
+		reg = <12>;
+	};
+	phy37: ethernet-phy@37 {
+		reg = <13>;
+	};
+	phy38: ethernet-phy@38 {
+		reg = <14>;
+	};
+	phy39: ethernet-phy@39 {
+		reg = <15>;
+	};
+	phy40: ethernet-phy@40 {
+		reg = <16>;
+	};
+	phy41: ethernet-phy@41 {
+		reg = <17>;
+	};
+	phy42: ethernet-phy@42 {
+		reg = <18>;
+	};
+	phy43: ethernet-phy@43 {
+		reg = <19>;
+	};
+	phy44: ethernet-phy@44 {
+		reg = <20>;
+	};
+	phy45: ethernet-phy@45 {
+		reg = <21>;
+	};
+	phy46: ethernet-phy@46 {
+		reg = <22>;
+	};
+	phy47: ethernet-phy@47 {
+		reg = <23>;
+	};
+};
+
+&mdio3 {
+	status = "ok";
+	phy64: ethernet-phy@64 {
+		reg = <28>;
+	};
+};
+
+&switch {
+	ethernet-ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port0: port@0 {
+			reg = <0>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 13>;
+			phy-handle = <&phy0>;
+			phy-mode = "qsgmii";
+		};
+		port1: port@1 {
+			reg = <1>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 13>;
+			phy-handle = <&phy1>;
+			phy-mode = "qsgmii";
+		};
+		port2: port@2 {
+			reg = <2>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 13>;
+			phy-handle = <&phy2>;
+			phy-mode = "qsgmii";
+		};
+		port3: port@3 {
+			reg = <3>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 13>;
+			phy-handle = <&phy3>;
+			phy-mode = "qsgmii";
+		};
+		port4: port@4 {
+			reg = <4>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 14>;
+			phy-handle = <&phy4>;
+			phy-mode = "qsgmii";
+		};
+		port5: port@5 {
+			reg = <5>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 14>;
+			phy-handle = <&phy5>;
+			phy-mode = "qsgmii";
+		};
+		port6: port@6 {
+			reg = <6>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 14>;
+			phy-handle = <&phy6>;
+			phy-mode = "qsgmii";
+		};
+		port7: port@7 {
+			reg = <7>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 14>;
+			phy-handle = <&phy7>;
+			phy-mode = "qsgmii";
+		};
+		port8: port@8 {
+			reg = <8>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 15>;
+			phy-handle = <&phy8>;
+			phy-mode = "qsgmii";
+		};
+		port9: port@9 {
+			reg = <9>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 15>;
+			phy-handle = <&phy9>;
+			phy-mode = "qsgmii";
+		};
+		port10: port@10 {
+			reg = <10>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 15>;
+			phy-handle = <&phy10>;
+			phy-mode = "qsgmii";
+		};
+		port11: port@11 {
+			reg = <11>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 15>;
+			phy-handle = <&phy11>;
+			phy-mode = "qsgmii";
+		};
+		port12: port@12 {
+			reg = <12>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 16>;
+			phy-handle = <&phy12>;
+			phy-mode = "qsgmii";
+		};
+		port13: port@13 {
+			reg = <13>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 16>;
+			phy-handle = <&phy13>;
+			phy-mode = "qsgmii";
+		};
+		port14: port@14 {
+			reg = <14>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 16>;
+			phy-handle = <&phy14>;
+			phy-mode = "qsgmii";
+		};
+		port15: port@15 {
+			reg = <15>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 16>;
+			phy-handle = <&phy15>;
+			phy-mode = "qsgmii";
+		};
+		port16: port@16 {
+			reg = <16>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 17>;
+			phy-handle = <&phy16>;
+			phy-mode = "qsgmii";
+		};
+		port17: port@17 {
+			reg = <17>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 17>;
+			phy-handle = <&phy17>;
+			phy-mode = "qsgmii";
+		};
+		port18: port@18 {
+			reg = <18>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 17>;
+			phy-handle = <&phy18>;
+			phy-mode = "qsgmii";
+		};
+		port19: port@19 {
+			reg = <19>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 17>;
+			phy-handle = <&phy19>;
+			phy-mode = "qsgmii";
+		};
+		port20: port@20 {
+			reg = <20>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 18>;
+			phy-handle = <&phy20>;
+			phy-mode = "qsgmii";
+		};
+		port21: port@21 {
+			reg = <21>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 18>;
+			phy-handle = <&phy21>;
+			phy-mode = "qsgmii";
+		};
+		port22: port@22 {
+			reg = <22>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 18>;
+			phy-handle = <&phy22>;
+			phy-mode = "qsgmii";
+		};
+		port23: port@23 {
+			reg = <23>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 18>;
+			phy-handle = <&phy23>;
+			phy-mode = "qsgmii";
+		};
+		port24: port@24 {
+			reg = <24>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 19>;
+			phy-handle = <&phy24>;
+			phy-mode = "qsgmii";
+		};
+		port25: port@25 {
+			reg = <25>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 19>;
+			phy-handle = <&phy25>;
+			phy-mode = "qsgmii";
+		};
+		port26: port@26 {
+			reg = <26>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 19>;
+			phy-handle = <&phy26>;
+			phy-mode = "qsgmii";
+		};
+		port27: port@27 {
+			reg = <27>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 19>;
+			phy-handle = <&phy27>;
+			phy-mode = "qsgmii";
+		};
+		port28: port@28 {
+			reg = <28>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 20>;
+			phy-handle = <&phy28>;
+			phy-mode = "qsgmii";
+		};
+		port29: port@29 {
+			reg = <29>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 20>;
+			phy-handle = <&phy29>;
+			phy-mode = "qsgmii";
+		};
+		port30: port@30 {
+			reg = <30>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 20>;
+			phy-handle = <&phy30>;
+			phy-mode = "qsgmii";
+		};
+		port31: port@31 {
+			reg = <31>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 20>;
+			phy-handle = <&phy31>;
+			phy-mode = "qsgmii";
+		};
+		port32: port@32 {
+			reg = <32>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 21>;
+			phy-handle = <&phy32>;
+			phy-mode = "qsgmii";
+		};
+		port33: port@33 {
+			reg = <33>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 21>;
+			phy-handle = <&phy33>;
+			phy-mode = "qsgmii";
+		};
+		port34: port@34 {
+			reg = <34>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 21>;
+			phy-handle = <&phy34>;
+			phy-mode = "qsgmii";
+		};
+		port35: port@35 {
+			reg = <35>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 21>;
+			phy-handle = <&phy35>;
+			phy-mode = "qsgmii";
+		};
+		port36: port@36 {
+			reg = <36>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 22>;
+			phy-handle = <&phy36>;
+			phy-mode = "qsgmii";
+		};
+		port37: port@37 {
+			reg = <37>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 22>;
+			phy-handle = <&phy37>;
+			phy-mode = "qsgmii";
+		};
+		port38: port@38 {
+			reg = <38>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 22>;
+			phy-handle = <&phy38>;
+			phy-mode = "qsgmii";
+		};
+		port39: port@39 {
+			reg = <39>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 22>;
+			phy-handle = <&phy39>;
+			phy-mode = "qsgmii";
+		};
+		port40: port@40 {
+			reg = <40>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 23>;
+			phy-handle = <&phy40>;
+			phy-mode = "qsgmii";
+		};
+		port41: port@41 {
+			reg = <41>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 23>;
+			phy-handle = <&phy41>;
+			phy-mode = "qsgmii";
+		};
+		port42: port@42 {
+			reg = <42>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 23>;
+			phy-handle = <&phy42>;
+			phy-mode = "qsgmii";
+		};
+		port43: port@43 {
+			reg = <43>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 23>;
+			phy-handle = <&phy43>;
+			phy-mode = "qsgmii";
+		};
+		port44: port@44 {
+			reg = <44>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 24>;
+			phy-handle = <&phy44>;
+			phy-mode = "qsgmii";
+		};
+		port45: port@45 {
+			reg = <45>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 24>;
+			phy-handle = <&phy45>;
+			phy-mode = "qsgmii";
+		};
+		port46: port@46 {
+			reg = <46>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 24>;
+			phy-handle = <&phy46>;
+			phy-mode = "qsgmii";
+		};
+		port47: port@47 {
+			reg = <47>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 24>;
+			phy-handle = <&phy47>;
+			phy-mode = "qsgmii";
+		};
+		/* Then the 25G interfaces */
+		port60: port@60 {
+			reg = <60>;
+			microchip,bandwidth = <25000>;
+			phys = <&serdes 29>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth60>;
+			managed = "in-band-status";
+		};
+		port61: port@61 {
+			reg = <61>;
+			microchip,bandwidth = <25000>;
+			phys = <&serdes 30>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth61>;
+			managed = "in-band-status";
+		};
+		port62: port@62 {
+			reg = <62>;
+			microchip,bandwidth = <25000>;
+			phys = <&serdes 31>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth62>;
+			managed = "in-band-status";
+		};
+		port63: port@63 {
+			reg = <63>;
+			microchip,bandwidth = <25000>;
+			phys = <&serdes 32>;
+			phy-mode = "10gbase-r";
+			sfp = <&sfp_eth63>;
+			managed = "in-band-status";
+		};
+		/* Finally the Management interface */
+		port64: port@64 {
+			reg = <64>;
+			microchip,bandwidth = <1000>;
+			phys = <&serdes 0>;
+			phy-handle = <&phy64>;
+			phy-mode = "sgmii";
+		};
+	};
+};

From 478890682ff7124bf4eaa6f0af382e9d2c937f73 Mon Sep 17 00:00:00 2001
From: Yajun Deng <yajun.deng@linux.dev>
Date: Thu, 24 Jun 2021 15:35:08 +0800
Subject: [PATCH 2837/3292] usbnet: add usbnet_event_names[] for kevent

Modify the netdev_dbg content from int to char * in usbnet_defer_kevent(),
this looks more readable.

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 57a5a025255c..470e1c1e6353 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -74,6 +74,23 @@ MODULE_PARM_DESC (msg_level, "Override default message level");
 
 /*-------------------------------------------------------------------------*/
 
+static const char * const usbnet_event_names[] = {
+	[EVENT_TX_HALT]		   = "EVENT_TX_HALT",
+	[EVENT_RX_HALT]		   = "EVENT_RX_HALT",
+	[EVENT_RX_MEMORY]	   = "EVENT_RX_MEMORY",
+	[EVENT_STS_SPLIT]	   = "EVENT_STS_SPLIT",
+	[EVENT_LINK_RESET]	   = "EVENT_LINK_RESET",
+	[EVENT_RX_PAUSED]	   = "EVENT_RX_PAUSED",
+	[EVENT_DEV_ASLEEP]	   = "EVENT_DEV_ASLEEP",
+	[EVENT_DEV_OPEN]	   = "EVENT_DEV_OPEN",
+	[EVENT_DEVICE_REPORT_IDLE] = "EVENT_DEVICE_REPORT_IDLE",
+	[EVENT_NO_RUNTIME_PM]	   = "EVENT_NO_RUNTIME_PM",
+	[EVENT_RX_KILL]		   = "EVENT_RX_KILL",
+	[EVENT_LINK_CHANGE]	   = "EVENT_LINK_CHANGE",
+	[EVENT_SET_RX_MODE]	   = "EVENT_SET_RX_MODE",
+	[EVENT_NO_IP_ALIGN]	   = "EVENT_NO_IP_ALIGN",
+};
+
 /* handles CDC Ethernet and many other network "bulk data" interfaces */
 int usbnet_get_endpoints(struct usbnet *dev, struct usb_interface *intf)
 {
@@ -452,9 +469,9 @@ void usbnet_defer_kevent (struct usbnet *dev, int work)
 {
 	set_bit (work, &dev->flags);
 	if (!schedule_work (&dev->kevent))
-		netdev_dbg(dev->net, "kevent %d may have been dropped\n", work);
+		netdev_dbg(dev->net, "kevent %s may have been dropped\n", usbnet_event_names[work]);
 	else
-		netdev_dbg(dev->net, "kevent %d scheduled\n", work);
+		netdev_dbg(dev->net, "kevent %s scheduled\n", usbnet_event_names[work]);
 }
 EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
 

From 1f7fe5121127e037b86592ba42ce36515ea0e3f7 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 24 Jun 2021 11:38:28 +0200
Subject: [PATCH 2838/3292] net: macsec: fix the length used to copy the key
 for offloading

The key length used when offloading macsec to Ethernet or PHY drivers
was set to MACSEC_KEYID_LEN (16), which is an issue as:
- This was never meant to be the key length.
- The key length can be > 16.

Fix this by using MACSEC_MAX_KEY_LEN to store the key (the max length
accepted in uAPI) and secy->key_len to copy it.

Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure")
Reported-by: Lior Nahmanson <liorna@nvidia.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macsec.c | 4 ++--
 include/net/macsec.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 92425e1fd70c..93dc48b9b4f2 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1819,7 +1819,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
 		ctx.sa.rx_sa = rx_sa;
 		ctx.secy = secy;
 		memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
-		       MACSEC_KEYID_LEN);
+		       secy->key_len);
 
 		err = macsec_offload(ops->mdo_add_rxsa, &ctx);
 		if (err)
@@ -2061,7 +2061,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
 		ctx.sa.tx_sa = tx_sa;
 		ctx.secy = secy;
 		memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]),
-		       MACSEC_KEYID_LEN);
+		       secy->key_len);
 
 		err = macsec_offload(ops->mdo_add_txsa, &ctx);
 		if (err)
diff --git a/include/net/macsec.h b/include/net/macsec.h
index 52874cdfe226..d6fa6b97f6ef 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -241,7 +241,7 @@ struct macsec_context {
 	struct macsec_rx_sc *rx_sc;
 	struct {
 		unsigned char assoc_num;
-		u8 key[MACSEC_KEYID_LEN];
+		u8 key[MACSEC_MAX_KEY_LEN];
 		union {
 			struct macsec_rx_sa *rx_sa;
 			struct macsec_tx_sa *tx_sa;

From c309217f91f2d2097c2a0a832d9bff50b88c81dc Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 24 Jun 2021 11:38:29 +0200
Subject: [PATCH 2839/3292] net: phy: mscc: fix macsec key length

The key length used to store the macsec key was set to MACSEC_KEYID_LEN
(16), which is an issue as:
- This was never meant to be the key length.
- The key length can be > 16.

Fix this by using MACSEC_MAX_KEY_LEN instead (the max length accepted in
uAPI).

Fixes: 28c5107aa904 ("net: phy: mscc: macsec support")
Reported-by: Lior Nahmanson <liorna@nvidia.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc/mscc_macsec.c | 2 +-
 drivers/net/phy/mscc/mscc_macsec.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c
index 10be266e48e8..b7b2521c73fb 100644
--- a/drivers/net/phy/mscc/mscc_macsec.c
+++ b/drivers/net/phy/mscc/mscc_macsec.c
@@ -501,7 +501,7 @@ static u32 vsc8584_macsec_flow_context_id(struct macsec_flow *flow)
 }
 
 /* Derive the AES key to get a key for the hash autentication */
-static int vsc8584_macsec_derive_key(const u8 key[MACSEC_KEYID_LEN],
+static int vsc8584_macsec_derive_key(const u8 key[MACSEC_MAX_KEY_LEN],
 				     u16 key_len, u8 hkey[16])
 {
 	const u8 input[AES_BLOCK_SIZE] = {0};
diff --git a/drivers/net/phy/mscc/mscc_macsec.h b/drivers/net/phy/mscc/mscc_macsec.h
index 9c6d25e36de2..453304bae778 100644
--- a/drivers/net/phy/mscc/mscc_macsec.h
+++ b/drivers/net/phy/mscc/mscc_macsec.h
@@ -81,7 +81,7 @@ struct macsec_flow {
 	/* Highest takes precedence [0..15] */
 	u8 priority;
 
-	u8 key[MACSEC_KEYID_LEN];
+	u8 key[MACSEC_MAX_KEY_LEN];
 
 	union {
 		struct macsec_rx_sa *rx_sa;

From d67fb4772d9a6cfd10f1109f0e7b1e6eb58c8e16 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 24 Jun 2021 11:38:30 +0200
Subject: [PATCH 2840/3292] net: atlantic: fix the macsec key length

The key length used to store the macsec key was set to MACSEC_KEYID_LEN
(16), which is an issue as:
- This was never meant to be the key length.
- The key length can be > 16.

Fix this by using MACSEC_MAX_KEY_LEN instead (the max length accepted in
uAPI).

Fixes: 27736563ce32 ("net: atlantic: MACSec egress offload implementation")
Fixes: 9ff40a751a6f ("net: atlantic: MACSec ingress offload implementation")
Reported-by: Lior Nahmanson <liorna@nvidia.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_macsec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h
index f5fba8b8cdea..a47e2710487e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.h
@@ -91,7 +91,7 @@ struct aq_macsec_txsc {
 	u32 hw_sc_idx;
 	unsigned long tx_sa_idx_busy;
 	const struct macsec_secy *sw_secy;
-	u8 tx_sa_key[MACSEC_NUM_AN][MACSEC_KEYID_LEN];
+	u8 tx_sa_key[MACSEC_NUM_AN][MACSEC_MAX_KEY_LEN];
 	struct aq_macsec_tx_sc_stats stats;
 	struct aq_macsec_tx_sa_stats tx_sa_stats[MACSEC_NUM_AN];
 };
@@ -101,7 +101,7 @@ struct aq_macsec_rxsc {
 	unsigned long rx_sa_idx_busy;
 	const struct macsec_secy *sw_secy;
 	const struct macsec_rx_sc *sw_rxsc;
-	u8 rx_sa_key[MACSEC_NUM_AN][MACSEC_KEYID_LEN];
+	u8 rx_sa_key[MACSEC_NUM_AN][MACSEC_MAX_KEY_LEN];
 	struct aq_macsec_rx_sa_stats rx_sa_stats[MACSEC_NUM_AN];
 };
 

From 624085a31c1ad6a80b1e53f686bf6ee92abbf6e8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 24 Jun 2021 03:07:20 -0700
Subject: [PATCH 2841/3292] ipv6: fix out-of-bound access in ip6_parse_tlv()

First problem is that optlen is fetched without checking
there is more than one byte to parse.

Fix this by taking care of IPV6_TLV_PAD1 before
fetching optlen (under appropriate sanity checks against len)

Second problem is that IPV6_TLV_PADN checks of zero
padding are performed before the check of remaining length.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Fixes: c1412fce7ecc ("net/ipv6/exthdrs.c: Strict PadN option checking")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6f7da8f3e2e5..26882e165c9e 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -135,18 +135,23 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = nh[off + 1] + 2;
-		int i;
+		int optlen, i;
 
-		switch (nh[off]) {
-		case IPV6_TLV_PAD1:
-			optlen = 1;
+		if (nh[off] == IPV6_TLV_PAD1) {
 			padlen++;
 			if (padlen > 7)
 				goto bad;
-			break;
+			off++;
+			len--;
+			continue;
+		}
+		if (len < 2)
+			goto bad;
+		optlen = nh[off + 1] + 2;
+		if (optlen > len)
+			goto bad;
 
-		case IPV6_TLV_PADN:
+		if (nh[off] == IPV6_TLV_PADN) {
 			/* RFC 2460 states that the purpose of PadN is
 			 * to align the containing header to multiples
 			 * of 8. 7 is therefore the highest valid value.
@@ -163,12 +168,7 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
 				if (nh[off + i] != 0)
 					goto bad;
 			}
-			break;
-
-		default: /* Other TLV code so scan list */
-			if (optlen > len)
-				goto bad;
-
+		} else {
 			tlv_count++;
 			if (tlv_count > max_count)
 				goto bad;
@@ -188,7 +188,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
 				return false;
 
 			padlen = 0;
-			break;
 		}
 		off += optlen;
 		len -= optlen;

From c6a7ed77ee6334f3a85a0f3db74ca80101e25304 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:17 -0700
Subject: [PATCH 2842/3292] gve: Update GVE documentation to describe DQO

DQO is a new descriptor format for our next generation virtual NIC.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/google/gve.rst    | 53 +++++++++++++++++--
 1 file changed, 48 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/google/gve.rst b/Documentation/networking/device_drivers/ethernet/google/gve.rst
index 793693cef6e3..6d73ee78f3d7 100644
--- a/Documentation/networking/device_drivers/ethernet/google/gve.rst
+++ b/Documentation/networking/device_drivers/ethernet/google/gve.rst
@@ -47,13 +47,24 @@ The driver interacts with the device in the following ways:
  - Transmit and Receive Queues
     - See description below
 
+Descriptor Formats
+------------------
+GVE supports two descriptor formats: GQI and DQO. These two formats have
+entirely different descriptors, which will be described below.
+
 Registers
 ---------
-All registers are MMIO and big endian.
+All registers are MMIO.
 
 The registers are used for initializing and configuring the device as well as
 querying device status in response to management interrupts.
 
+Endianness
+----------
+- Admin Queue messages and registers are all Big Endian.
+- GQI descriptors and datapath registers are Big Endian.
+- DQO descriptors and datapath registers are Little Endian.
+
 Admin Queue (AQ)
 ----------------
 The Admin Queue is a PAGE_SIZE memory block, treated as an array of AQ
@@ -97,10 +108,10 @@ the queues associated with that interrupt.
 The handler for these irqs schedule the napi for that block to run
 and poll the queues.
 
-Traffic Queues
---------------
-gVNIC's queues are composed of a descriptor ring and a buffer and are
-assigned to a notification block.
+GQI Traffic Queues
+------------------
+GQI queues are composed of a descriptor ring and a buffer and are assigned to a
+notification block.
 
 The descriptor rings are power-of-two-sized ring buffers consisting of
 fixed-size descriptors. They advance their head pointer using a __be32
@@ -121,3 +132,35 @@ Receive
 The buffers for receive rings are put into a data ring that is the same
 length as the descriptor ring and the head and tail pointers advance over
 the rings together.
+
+DQO Traffic Queues
+------------------
+- Every TX and RX queue is assigned a notification block.
+
+- TX and RX buffers queues, which send descriptors to the device, use MMIO
+  doorbells to notify the device of new descriptors.
+
+- RX and TX completion queues, which receive descriptors from the device, use a
+  "generation bit" to know when a descriptor was populated by the device. The
+  driver initializes all bits with the "current generation". The device will
+  populate received descriptors with the "next generation" which is inverted
+  from the current generation. When the ring wraps, the current/next generation
+  are swapped.
+
+- It's the driver's responsibility to ensure that the RX and TX completion
+  queues are not overrun. This can be accomplished by limiting the number of
+  descriptors posted to HW.
+
+- TX packets have a 16 bit completion_tag and RX buffers have a 16 bit
+  buffer_id. These will be returned on the TX completion and RX queues
+  respectively to let the driver know which packet/buffer was completed.
+
+Transmit
+~~~~~~~~
+A packet's buffers are DMA mapped for the device to access before transmission.
+After the packet was successfully transmitted, the buffers are unmapped.
+
+Receive
+~~~~~~~
+The driver posts fixed sized buffers to HW on the RX buffer queue. The packet
+received on the associated RX queue may span multiple descriptors.

From dbdaa6754051280d929514a1722b9d5bc7c65a61 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:18 -0700
Subject: [PATCH 2843/3292] gve: Move some static functions to a common file

These functions will be shared by the GQI and DQO variants of the GVNIC
driver as of follow-up patches in this series.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/Makefile    |  2 +-
 drivers/net/ethernet/google/gve/gve_rx.c    | 42 +------------
 drivers/net/ethernet/google/gve/gve_tx.c    | 21 +------
 drivers/net/ethernet/google/gve/gve_utils.c | 65 +++++++++++++++++++++
 drivers/net/ethernet/google/gve/gve_utils.h | 24 ++++++++
 5 files changed, 94 insertions(+), 60 deletions(-)
 create mode 100644 drivers/net/ethernet/google/gve/gve_utils.c
 create mode 100644 drivers/net/ethernet/google/gve/gve_utils.h

diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
index 3354ce40eb97..0143f4471e42 100644
--- a/drivers/net/ethernet/google/gve/Makefile
+++ b/drivers/net/ethernet/google/gve/Makefile
@@ -1,4 +1,4 @@
 # Makefile for the Google virtual Ethernet (gve) driver
 
 obj-$(CONFIG_GVE) += gve.o
-gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o
+gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o gve_utils.o
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index bf123fe524c4..2cfedf4bf5d8 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -1,21 +1,14 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Google virtual Ethernet (gve) driver
  *
- * Copyright (C) 2015-2019 Google, Inc.
+ * Copyright (C) 2015-2021 Google, Inc.
  */
 
 #include "gve.h"
 #include "gve_adminq.h"
+#include "gve_utils.h"
 #include <linux/etherdevice.h>
 
-static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
-{
-	struct gve_notify_block *block =
-			&priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
-
-	block->rx = NULL;
-}
-
 static void gve_rx_free_buffer(struct device *dev,
 			       struct gve_rx_slot_page_info *page_info,
 			       union gve_rx_data_slot *data_slot)
@@ -137,16 +130,6 @@ alloc_err:
 	return err;
 }
 
-static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
-{
-	u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
-	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
-	struct gve_rx_ring *rx = &priv->rx[queue_idx];
-
-	block->rx = rx;
-	rx->ntfy_id = ntfy_idx;
-}
-
 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
 {
 	struct gve_rx_ring *rx = &priv->rx[idx];
@@ -279,27 +262,6 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
 	return PKT_HASH_TYPE_L2;
 }
 
-static struct sk_buff *gve_rx_copy(struct net_device *dev,
-				   struct napi_struct *napi,
-				   struct gve_rx_slot_page_info *page_info,
-				   u16 len)
-{
-	struct sk_buff *skb = napi_alloc_skb(napi, len);
-	void *va = page_info->page_address + GVE_RX_PAD +
-		   (page_info->page_offset ? PAGE_SIZE / 2 : 0);
-
-	if (unlikely(!skb))
-		return NULL;
-
-	__skb_put(skb, len);
-
-	skb_copy_to_linear_data(skb, va, len);
-
-	skb->protocol = eth_type_trans(skb, dev);
-
-	return skb;
-}
-
 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
 					struct gve_rx_slot_page_info *page_info,
 					u16 len)
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 3e04a3973d68..6866f6e0139d 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -1,11 +1,12 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Google virtual Ethernet (gve) driver
  *
- * Copyright (C) 2015-2019 Google, Inc.
+ * Copyright (C) 2015-2021 Google, Inc.
  */
 
 #include "gve.h"
 #include "gve_adminq.h"
+#include "gve_utils.h"
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/vmalloc.h>
@@ -131,14 +132,6 @@ static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
 	atomic_add(bytes, &fifo->available);
 }
 
-static void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
-{
-	struct gve_notify_block *block =
-			&priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
-
-	block->tx = NULL;
-}
-
 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
 			     u32 to_do, bool try_to_wake);
 
@@ -174,16 +167,6 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
 	netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
 }
 
-static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
-{
-	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
-	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
-	struct gve_tx_ring *tx = &priv->tx[queue_idx];
-
-	block->tx = tx;
-	tx->ntfy_id = ntfy_idx;
-}
-
 static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 {
 	struct gve_tx_ring *tx = &priv->tx[idx];
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
new file mode 100644
index 000000000000..2bfff0f75519
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_utils.h"
+
+void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+	struct gve_notify_block *block =
+			&priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
+
+	block->tx = NULL;
+}
+
+void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+	int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+	struct gve_tx_ring *tx = &priv->tx[queue_idx];
+
+	block->tx = tx;
+	tx->ntfy_id = ntfy_idx;
+}
+
+void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
+{
+	struct gve_notify_block *block =
+			&priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
+
+	block->rx = NULL;
+}
+
+void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
+{
+	u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
+	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+	struct gve_rx_ring *rx = &priv->rx[queue_idx];
+
+	block->rx = rx;
+	rx->ntfy_id = ntfy_idx;
+}
+
+struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
+			    struct gve_rx_slot_page_info *page_info, u16 len)
+{
+	struct sk_buff *skb = napi_alloc_skb(napi, len);
+	void *va = page_info->page_address + GVE_RX_PAD +
+		   (page_info->page_offset ? PAGE_SIZE / 2 : 0);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	__skb_put(skb, len);
+
+	skb_copy_to_linear_data(skb, va, len);
+
+	skb->protocol = eth_type_trans(skb, dev);
+
+	return skb;
+}
+
diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h
new file mode 100644
index 000000000000..76540374a083
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_utils.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#ifndef _GVE_UTILS_H
+#define _GVE_UTILS_H
+
+#include <linux/etherdevice.h>
+
+#include "gve.h"
+
+void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx);
+void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx);
+
+void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx);
+void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx);
+
+struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
+			    struct gve_rx_slot_page_info *page_info, u16 len);
+
+#endif /* _GVE_UTILS_H */
+

From 35f9b2f43f8e0aee52421640912edc1ec309fd2e Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:19 -0700
Subject: [PATCH 2844/3292] gve: gve_rx_copy: Move padding to an argument

Future use cases will have a different padding value.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_rx.c    | 4 ++--
 drivers/net/ethernet/google/gve/gve_utils.c | 5 +++--
 drivers/net/ethernet/google/gve/gve_utils.h | 3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 2cfedf4bf5d8..c51578c1e2b2 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -350,7 +350,7 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev,
 			gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
 		}
 	} else {
-		skb = gve_rx_copy(netdev, napi, page_info, len);
+		skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
 		if (skb) {
 			u64_stats_update_begin(&rx->statss);
 			rx->rx_copied_pkt++;
@@ -392,7 +392,7 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 
 	if (len <= priv->rx_copybreak) {
 		/* Just copy small packets */
-		skb = gve_rx_copy(dev, napi, page_info, len);
+		skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD);
 		u64_stats_update_begin(&rx->statss);
 		rx->rx_copied_pkt++;
 		rx->rx_copybreak_pkt++;
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 2bfff0f75519..eb3d67c8b3ac 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -45,10 +45,11 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
 }
 
 struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
-			    struct gve_rx_slot_page_info *page_info, u16 len)
+			    struct gve_rx_slot_page_info *page_info, u16 len,
+			    u16 pad)
 {
 	struct sk_buff *skb = napi_alloc_skb(napi, len);
-	void *va = page_info->page_address + GVE_RX_PAD +
+	void *va = page_info->page_address + pad +
 		   (page_info->page_offset ? PAGE_SIZE / 2 : 0);
 
 	if (unlikely(!skb))
diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h
index 76540374a083..8fb39b990bbc 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.h
+++ b/drivers/net/ethernet/google/gve/gve_utils.h
@@ -18,7 +18,8 @@ void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx);
 void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx);
 
 struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
-			    struct gve_rx_slot_page_info *page_info, u16 len);
+			    struct gve_rx_slot_page_info *page_info, u16 len,
+			    u16 pad);
 
 #endif /* _GVE_UTILS_H */
 

From 920fb45193551dc0e6cd8fa89e2487906f1867f6 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:20 -0700
Subject: [PATCH 2845/3292] gve: Make gve_rx_slot_page_info.page_offset an
 absolute offset

Using `page_offset` like a boolean means a page may only be split into
two sections. With page sizes larger than 4k, this can be very wasteful.
Future commits in this patchset use `struct gve_rx_slot_page_info` in a
way which supports a fixed buffer size and a variable page size.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h       | 4 ++--
 drivers/net/ethernet/google/gve/gve_rx.c    | 4 ++--
 drivers/net/ethernet/google/gve/gve_utils.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index daf07c0f790b..5467c74d379e 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR MIT)
  * Google virtual Ethernet (gve) driver
  *
- * Copyright (C) 2015-2019 Google, Inc.
+ * Copyright (C) 2015-2021 Google, Inc.
  */
 
 #ifndef _GVE_H_
@@ -51,7 +51,7 @@ struct gve_rx_desc_queue {
 struct gve_rx_slot_page_info {
 	struct page *page;
 	void *page_address;
-	u8 page_offset; /* flipped to second half? */
+	u32 page_offset; /* offset to write to in page */
 	u8 can_flip;
 };
 
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index c51578c1e2b2..e14509614287 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -272,7 +272,7 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
 		return NULL;
 
 	skb_add_rx_frag(skb, 0, page_info->page,
-			(page_info->page_offset ? PAGE_SIZE / 2 : 0) +
+			page_info->page_offset +
 			GVE_RX_PAD, len, PAGE_SIZE / 2);
 
 	return skb;
@@ -283,7 +283,7 @@ static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *sl
 	const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
 
 	/* "flip" to other packet buffer on this page */
-	page_info->page_offset ^= 0x1;
+	page_info->page_offset ^= PAGE_SIZE / 2;
 	*(slot_addr) ^= offset;
 }
 
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index eb3d67c8b3ac..a0607a824ab9 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -50,7 +50,7 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
 {
 	struct sk_buff *skb = napi_alloc_skb(napi, len);
 	void *va = page_info->page_address + pad +
-		   (page_info->page_offset ? PAGE_SIZE / 2 : 0);
+		   page_info->page_offset;
 
 	if (unlikely(!skb))
 		return NULL;

From 8a39d3e0dadfe27a50019fa83dc57c5158e42ed1 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:21 -0700
Subject: [PATCH 2846/3292] gve: Introduce a new model for device options

The current model uses an integer ID and a fixed size struct for the
parameters of each device option.

The new model allows the device option structs to grow in size over
time. A driver may assume that changes to device option structs will
always be appended.

New device options will also generally have a
`supported_features_mask` so that the driver knows which fields within a
particular device option are enabled.

`gve_device_option.feat_mask` is changed to `required_features_mask`,
and it is a bitmask which must match the value expected by the driver.
This gives the device the ability to break backwards compatibility with
old drivers for certain features by blocking the old drivers from trying
to use the feature.

We maintain ABI compatibility with the old model for
GVE_DEV_OPT_ID_RAW_ADDRESSING in case a driver is using a device which
does not support the new model.

This patch introduces some new terminology:

RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
      mapped and read/updated by the device.

QPL - Queue Page Lists - Driver uses bounce buffers which are DMA mapped
      with the device for read/write and data is copied from/to SKBs.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_adminq.c | 172 +++++++++++++++----
 drivers/net/ethernet/google/gve/gve_adminq.h |  50 +++++-
 2 files changed, 179 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 53864f200599..1c2a4ccaefe5 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Google virtual Ethernet (gve) driver
  *
- * Copyright (C) 2015-2019 Google, Inc.
+ * Copyright (C) 2015-2021 Google, Inc.
  */
 
 #include <linux/etherdevice.h>
@@ -18,6 +18,8 @@
 "Expected: length=%d, feature_mask=%x.\n" \
 "Actual: length=%d, feature_mask=%x.\n"
 
+#define GVE_DEVICE_OPTION_TOO_BIG_FMT "Length of %s option larger than expected. Possible older version of guest driver.\n"
+
 static
 struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *descriptor,
 					      struct gve_device_option *option)
@@ -33,28 +35,81 @@ struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *desc
 static
 void gve_parse_device_option(struct gve_priv *priv,
 			     struct gve_device_descriptor *device_descriptor,
-			     struct gve_device_option *option)
+			     struct gve_device_option *option,
+			     struct gve_device_option_gqi_rda **dev_op_gqi_rda,
+			     struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
+			     struct gve_device_option_dqo_rda **dev_op_dqo_rda)
 {
+	u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
 	u16 option_length = be16_to_cpu(option->option_length);
 	u16 option_id = be16_to_cpu(option->option_id);
 
+	/* If the length or feature mask doesn't match, continue without
+	 * enabling the feature.
+	 */
 	switch (option_id) {
-	case GVE_DEV_OPT_ID_RAW_ADDRESSING:
-		/* If the length or feature mask doesn't match,
-		 * continue without enabling the feature.
-		 */
-		if (option_length != GVE_DEV_OPT_LEN_RAW_ADDRESSING ||
-		    option->feat_mask != cpu_to_be32(GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING)) {
-			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, "Raw Addressing",
-				 GVE_DEV_OPT_LEN_RAW_ADDRESSING,
-				 cpu_to_be32(GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING),
-				 option_length, option->feat_mask);
-			priv->raw_addressing = 0;
-		} else {
-			dev_info(&priv->pdev->dev,
-				 "Raw addressing device option enabled.\n");
-			priv->raw_addressing = 1;
+	case GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING:
+		if (option_length != GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING ||
+		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING) {
+			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+				 "Raw Addressing",
+				 GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING,
+				 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING,
+				 option_length, req_feat_mask);
+			break;
 		}
+
+		dev_info(&priv->pdev->dev,
+			 "Gqi raw addressing device option enabled.\n");
+		priv->raw_addressing = 1;
+		break;
+	case GVE_DEV_OPT_ID_GQI_RDA:
+		if (option_length < sizeof(**dev_op_gqi_rda) ||
+		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA) {
+			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+				 "GQI RDA", (int)sizeof(**dev_op_gqi_rda),
+				 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA,
+				 option_length, req_feat_mask);
+			break;
+		}
+
+		if (option_length > sizeof(**dev_op_gqi_rda)) {
+			dev_warn(&priv->pdev->dev,
+				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI RDA");
+		}
+		*dev_op_gqi_rda = (void *)(option + 1);
+		break;
+	case GVE_DEV_OPT_ID_GQI_QPL:
+		if (option_length < sizeof(**dev_op_gqi_qpl) ||
+		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL) {
+			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+				 "GQI QPL", (int)sizeof(**dev_op_gqi_qpl),
+				 GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL,
+				 option_length, req_feat_mask);
+			break;
+		}
+
+		if (option_length > sizeof(**dev_op_gqi_qpl)) {
+			dev_warn(&priv->pdev->dev,
+				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI QPL");
+		}
+		*dev_op_gqi_qpl = (void *)(option + 1);
+		break;
+	case GVE_DEV_OPT_ID_DQO_RDA:
+		if (option_length < sizeof(**dev_op_dqo_rda) ||
+		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA) {
+			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+				 "DQO RDA", (int)sizeof(**dev_op_dqo_rda),
+				 GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA,
+				 option_length, req_feat_mask);
+			break;
+		}
+
+		if (option_length > sizeof(**dev_op_dqo_rda)) {
+			dev_warn(&priv->pdev->dev,
+				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO RDA");
+		}
+		*dev_op_dqo_rda = (void *)(option + 1);
 		break;
 	default:
 		/* If we don't recognize the option just continue
@@ -65,6 +120,39 @@ void gve_parse_device_option(struct gve_priv *priv,
 	}
 }
 
+/* Process all device options for a given describe device call. */
+static int
+gve_process_device_options(struct gve_priv *priv,
+			   struct gve_device_descriptor *descriptor,
+			   struct gve_device_option_gqi_rda **dev_op_gqi_rda,
+			   struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
+			   struct gve_device_option_dqo_rda **dev_op_dqo_rda)
+{
+	const int num_options = be16_to_cpu(descriptor->num_device_options);
+	struct gve_device_option *dev_opt;
+	int i;
+
+	/* The options struct directly follows the device descriptor. */
+	dev_opt = (void *)(descriptor + 1);
+	for (i = 0; i < num_options; i++) {
+		struct gve_device_option *next_opt;
+
+		next_opt = gve_get_next_option(descriptor, dev_opt);
+		if (!next_opt) {
+			dev_err(&priv->dev->dev,
+				"options exceed device_descriptor's total length.\n");
+			return -EINVAL;
+		}
+
+		gve_parse_device_option(priv, descriptor, dev_opt,
+					dev_op_gqi_rda, dev_op_gqi_qpl,
+					dev_op_dqo_rda);
+		dev_opt = next_opt;
+	}
+
+	return 0;
+}
+
 int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
 {
 	priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE,
@@ -514,15 +602,15 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
 
 int gve_adminq_describe_device(struct gve_priv *priv)
 {
+	struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
+	struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
+	struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
 	struct gve_device_descriptor *descriptor;
-	struct gve_device_option *dev_opt;
 	union gve_adminq_command cmd;
 	dma_addr_t descriptor_bus;
-	u16 num_options;
 	int err = 0;
 	u8 *mac;
 	u16 mtu;
-	int i;
 
 	memset(&cmd, 0, sizeof(cmd));
 	descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE,
@@ -540,6 +628,31 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	if (err)
 		goto free_device_descriptor;
 
+	priv->raw_addressing = 0;
+	err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
+					 &dev_op_gqi_qpl, &dev_op_dqo_rda);
+	if (err)
+		goto free_device_descriptor;
+
+	/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
+	 * is not set to GqiRda, choose the queue format in a priority order:
+	 * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
+	 */
+	if (priv->raw_addressing == 1) {
+		dev_info(&priv->pdev->dev,
+			 "Driver is running with GQI RDA queue format.\n");
+	} else if (dev_op_dqo_rda) {
+		dev_info(&priv->pdev->dev,
+			 "Driver is running with DQO RDA queue format.\n");
+	} else if (dev_op_gqi_rda) {
+		dev_info(&priv->pdev->dev,
+			 "Driver is running with GQI RDA queue format.\n");
+		priv->raw_addressing = 1;
+	} else {
+		dev_info(&priv->pdev->dev,
+			 "Driver is running with GQI QPL queue format.\n");
+	}
+
 	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
 	if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
 		dev_err(&priv->pdev->dev, "Tx desc count %d too low\n", priv->tx_desc_cnt);
@@ -576,26 +689,9 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 		priv->rx_desc_cnt = priv->rx_data_slot_cnt;
 	}
 	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
-	dev_opt = (void *)(descriptor + 1);
-
-	num_options = be16_to_cpu(descriptor->num_device_options);
-	for (i = 0; i < num_options; i++) {
-		struct gve_device_option *next_opt;
-
-		next_opt = gve_get_next_option(descriptor, dev_opt);
-		if (!next_opt) {
-			dev_err(&priv->dev->dev,
-				"options exceed device_descriptor's total length.\n");
-			err = -EINVAL;
-			goto free_device_descriptor;
-		}
-
-		gve_parse_device_option(priv, descriptor, dev_opt);
-		dev_opt = next_opt;
-	}
 
 free_device_descriptor:
-	dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
+	dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
 			  descriptor_bus);
 	return err;
 }
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index d320c2ffd87c..4b1485b11a7b 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR MIT)
  * Google virtual Ethernet (gve) driver
  *
- * Copyright (C) 2015-2019 Google, Inc.
+ * Copyright (C) 2015-2021 Google, Inc.
  */
 
 #ifndef _GVE_ADMINQ_H
@@ -82,14 +82,54 @@ static_assert(sizeof(struct gve_device_descriptor) == 40);
 struct gve_device_option {
 	__be16 option_id;
 	__be16 option_length;
-	__be32 feat_mask;
+	__be32 required_features_mask;
 };
 
 static_assert(sizeof(struct gve_device_option) == 8);
 
-#define GVE_DEV_OPT_ID_RAW_ADDRESSING 0x1
-#define GVE_DEV_OPT_LEN_RAW_ADDRESSING 0x0
-#define GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING 0x0
+struct gve_device_option_gqi_rda {
+	__be32 supported_features_mask;
+};
+
+static_assert(sizeof(struct gve_device_option_gqi_rda) == 4);
+
+struct gve_device_option_gqi_qpl {
+	__be32 supported_features_mask;
+};
+
+static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4);
+
+struct gve_device_option_dqo_rda {
+	__be32 supported_features_mask;
+	__be16 tx_comp_ring_entries;
+	__be16 rx_buff_ring_entries;
+};
+
+static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);
+
+/* Terminology:
+ *
+ * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
+ *       mapped and read/updated by the device.
+ *
+ * QPL - Queue Page Lists - Driver uses bounce buffers which are DMA mapped with
+ *       the device for read/write and data is copied from/to SKBs.
+ */
+enum gve_dev_opt_id {
+	GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
+	GVE_DEV_OPT_ID_GQI_RDA = 0x2,
+	GVE_DEV_OPT_ID_GQI_QPL = 0x3,
+	GVE_DEV_OPT_ID_DQO_RDA = 0x4,
+};
+
+enum gve_dev_opt_req_feat_mask {
+	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
+	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
+	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
+	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
+};
+
+#define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
 
 struct gve_adminq_configure_device_resources {
 	__be64 counter_array;

From a5886ef4f4bfc305bfd9efdf6166621b5c602fca Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:22 -0700
Subject: [PATCH 2847/3292] gve: Introduce per netdev `enum gve_queue_format`

The currently supported queue formats are:
- GQI_RDA - GQI with raw DMA addressing
- GQI_QPL - GQI with queue page list
- DQO_RDA - DQO with raw DMA addressing

The old `gve_priv.raw_addressing` value is only used for GQI_RDA, so we
remove it in favor of just checking against GQI_RDA

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h        | 24 +++++++++++++++++---
 drivers/net/ethernet/google/gve/gve_adminq.c | 15 +++++++-----
 drivers/net/ethernet/google/gve/gve_main.c   |  9 ++++----
 drivers/net/ethernet/google/gve/gve_rx.c     |  2 +-
 drivers/net/ethernet/google/gve/gve_tx.c     |  2 +-
 5 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 5467c74d379e..9cb9b8f3e66e 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -194,6 +194,17 @@ struct gve_qpl_config {
 	unsigned long *qpl_id_map; /* bitmap of used qpl ids */
 };
 
+/* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
+ * when the entire configure_device_resources command is zeroed out and the
+ * queue_format is not specified.
+ */
+enum gve_queue_format {
+	GVE_QUEUE_FORMAT_UNSPECIFIED	= 0x0,
+	GVE_GQI_RDA_FORMAT		= 0x1,
+	GVE_GQI_QPL_FORMAT		= 0x2,
+	GVE_DQO_RDA_FORMAT		= 0x3,
+};
+
 struct gve_priv {
 	struct net_device *dev;
 	struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
@@ -216,7 +227,6 @@ struct gve_priv {
 	u64 num_registered_pages; /* num pages registered with NIC */
 	u32 rx_copybreak; /* copy packets smaller than this */
 	u16 default_num_queues; /* default num queues to set up */
-	u8 raw_addressing; /* 1 if this dev supports raw addressing, 0 otherwise */
 
 	struct gve_queue_config tx_cfg;
 	struct gve_queue_config rx_cfg;
@@ -275,6 +285,8 @@ struct gve_priv {
 
 	/* Gvnic device link speed from hypervisor. */
 	u64 link_speed;
+
+	enum gve_queue_format queue_format;
 };
 
 enum gve_service_task_flags_bit {
@@ -454,14 +466,20 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
  */
 static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
 {
-	return priv->raw_addressing ? 0 : priv->tx_cfg.num_queues;
+	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+		return 0;
+
+	return priv->tx_cfg.num_queues;
 }
 
 /* Returns the number of rx queue page lists
  */
 static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
 {
-	return priv->raw_addressing ? 0 : priv->rx_cfg.num_queues;
+	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+		return 0;
+
+	return priv->rx_cfg.num_queues;
 }
 
 /* Returns a pointer to the next available tx qpl in the list of qpls
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 1c2a4ccaefe5..9dfce9af60bc 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -61,7 +61,7 @@ void gve_parse_device_option(struct gve_priv *priv,
 
 		dev_info(&priv->pdev->dev,
 			 "Gqi raw addressing device option enabled.\n");
-		priv->raw_addressing = 1;
+		priv->queue_format = GVE_GQI_RDA_FORMAT;
 		break;
 	case GVE_DEV_OPT_ID_GQI_RDA:
 		if (option_length < sizeof(**dev_op_gqi_rda) ||
@@ -460,7 +460,8 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
 	u32 qpl_id;
 	int err;
 
-	qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID : tx->tx_fifo.qpl->id;
+	qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
+		 GVE_RAW_ADDRESSING_QPL_ID : tx->tx_fifo.qpl->id;
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
 	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
@@ -501,7 +502,8 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
 	u32 qpl_id;
 	int err;
 
-	qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
+	qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
+		 GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
 	cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
@@ -628,7 +630,6 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	if (err)
 		goto free_device_descriptor;
 
-	priv->raw_addressing = 0;
 	err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
 					 &dev_op_gqi_qpl, &dev_op_dqo_rda);
 	if (err)
@@ -638,17 +639,19 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	 * is not set to GqiRda, choose the queue format in a priority order:
 	 * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
 	 */
-	if (priv->raw_addressing == 1) {
+	if (priv->queue_format == GVE_GQI_RDA_FORMAT) {
 		dev_info(&priv->pdev->dev,
 			 "Driver is running with GQI RDA queue format.\n");
 	} else if (dev_op_dqo_rda) {
+		priv->queue_format = GVE_DQO_RDA_FORMAT;
 		dev_info(&priv->pdev->dev,
 			 "Driver is running with DQO RDA queue format.\n");
 	} else if (dev_op_gqi_rda) {
+		priv->queue_format = GVE_GQI_RDA_FORMAT;
 		dev_info(&priv->pdev->dev,
 			 "Driver is running with GQI RDA queue format.\n");
-		priv->raw_addressing = 1;
 	} else {
+		priv->queue_format = GVE_GQI_QPL_FORMAT;
 		dev_info(&priv->pdev->dev,
 			 "Driver is running with GQI QPL queue format.\n");
 	}
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index bbc423e93122..aa0bff03c6c8 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Google virtual Ethernet (gve) driver
  *
- * Copyright (C) 2015-2019 Google, Inc.
+ * Copyright (C) 2015-2021 Google, Inc.
  */
 
 #include <linux/cpumask.h>
@@ -681,7 +681,7 @@ static int gve_alloc_qpls(struct gve_priv *priv)
 	int err;
 
 	/* Raw addressing means no QPLs */
-	if (priv->raw_addressing)
+	if (priv->queue_format == GVE_GQI_RDA_FORMAT)
 		return 0;
 
 	priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
@@ -725,7 +725,7 @@ static void gve_free_qpls(struct gve_priv *priv)
 	int i;
 
 	/* Raw addressing means no QPLs */
-	if (priv->raw_addressing)
+	if (priv->queue_format == GVE_GQI_RDA_FORMAT)
 		return;
 
 	kvfree(priv->qpl_cfg.qpl_id_map);
@@ -1088,7 +1088,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
 	if (skip_describe_device)
 		goto setup_device;
 
-	priv->raw_addressing = false;
+	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
 	/* Get the initial information we need from the device */
 	err = gve_adminq_describe_device(priv);
 	if (err) {
@@ -1352,6 +1352,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto abort_with_wq;
 
 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
+	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
 	gve_clear_probe_in_progress(priv);
 	queue_work(priv->gve_wq, &priv->service_task);
 	return 0;
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index e14509614287..15a64e40004d 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -148,7 +148,7 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
 
 	slots = priv->rx_data_slot_cnt;
 	rx->mask = slots - 1;
-	rx->data.raw_addressing = priv->raw_addressing;
+	rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
 
 	/* alloc rx data ring */
 	bytes = sizeof(*rx->data.data_ring) * slots;
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 6866f6e0139d..75930bb64eb9 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -191,7 +191,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 	if (!tx->desc)
 		goto abort_with_info;
 
-	tx->raw_addressing = priv->raw_addressing;
+	tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
 	tx->dev = &priv->pdev->dev;
 	if (!tx->raw_addressing) {
 		tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);

From 5ca2265eefc0bdfc80d4cbe9f70a81c40c41ae60 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:23 -0700
Subject: [PATCH 2848/3292] gve: adminq: DQO specific device descriptor logic

- In addition to TX and RX queues, DQO has TX completion and RX buffer
  queues.
  - TX completions are received when the device has completed sending a
    packet on the wire.
  - RX buffers are posted on a separate queue form the RX completions.
- DQO descriptor rings are allowed to be smaller than PAGE_SIZE.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h        | 13 +++++
 drivers/net/ethernet/google/gve/gve_adminq.c | 59 ++++++++++++++------
 2 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 9cb9b8f3e66e..9045b86279cb 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -194,6 +194,11 @@ struct gve_qpl_config {
 	unsigned long *qpl_id_map; /* bitmap of used qpl ids */
 };
 
+struct gve_options_dqo_rda {
+	u16 tx_comp_ring_entries; /* number of tx_comp descriptors */
+	u16 rx_buff_ring_entries; /* number of rx_buff descriptors */
+};
+
 /* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
  * when the entire configure_device_resources command is zeroed out and the
  * queue_format is not specified.
@@ -286,6 +291,8 @@ struct gve_priv {
 	/* Gvnic device link speed from hypervisor. */
 	u64 link_speed;
 
+	struct gve_options_dqo_rda options_dqo_rda;
+
 	enum gve_queue_format queue_format;
 };
 
@@ -533,6 +540,12 @@ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
 		return DMA_FROM_DEVICE;
 }
 
+static inline bool gve_is_gqi(struct gve_priv *priv)
+{
+	return priv->queue_format == GVE_GQI_RDA_FORMAT ||
+		priv->queue_format == GVE_GQI_QPL_FORMAT;
+}
+
 /* buffers */
 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
 		   struct page **page, dma_addr_t *dma,
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 9dfce9af60bc..9efa60ce34e0 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -602,6 +602,40 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
 	return gve_adminq_kick_and_wait(priv);
 }
 
+static int gve_set_desc_cnt(struct gve_priv *priv,
+			    struct gve_device_descriptor *descriptor)
+{
+	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+	if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
+		dev_err(&priv->pdev->dev, "Tx desc count %d too low\n",
+			priv->tx_desc_cnt);
+		return -EINVAL;
+	}
+	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+	if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0])
+	    < PAGE_SIZE) {
+		dev_err(&priv->pdev->dev, "Rx desc count %d too low\n",
+			priv->rx_desc_cnt);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+gve_set_desc_cnt_dqo(struct gve_priv *priv,
+		     const struct gve_device_descriptor *descriptor,
+		     const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
+{
+	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+	priv->options_dqo_rda.tx_comp_ring_entries =
+		be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
+	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+	priv->options_dqo_rda.rx_buff_ring_entries =
+		be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
+
+	return 0;
+}
+
 int gve_adminq_describe_device(struct gve_priv *priv)
 {
 	struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
@@ -655,22 +689,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 		dev_info(&priv->pdev->dev,
 			 "Driver is running with GQI QPL queue format.\n");
 	}
+	if (gve_is_gqi(priv)) {
+		err = gve_set_desc_cnt(priv, descriptor);
+	} else {
+		err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
+	}
+	if (err)
+		goto free_device_descriptor;
 
-	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
-	if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) {
-		dev_err(&priv->pdev->dev, "Tx desc count %d too low\n", priv->tx_desc_cnt);
-		err = -EINVAL;
-		goto free_device_descriptor;
-	}
-	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
-	if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0])
-	    < PAGE_SIZE ||
-	    priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0])
-	    < PAGE_SIZE) {
-		dev_err(&priv->pdev->dev, "Rx desc count %d too low\n", priv->rx_desc_cnt);
-		err = -EINVAL;
-		goto free_device_descriptor;
-	}
 	priv->max_registered_pages =
 				be64_to_cpu(descriptor->max_registered_pages);
 	mtu = be16_to_cpu(descriptor->mtu);
@@ -686,7 +712,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
 	priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
 	priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
-	if (priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
+
+	if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
 		dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
 			priv->rx_data_slot_cnt);
 		priv->rx_desc_cnt = priv->rx_data_slot_cnt;

From c4b87ac87635879ecf0d26807dc00df9bb7eb508 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:24 -0700
Subject: [PATCH 2849/3292] gve: Add support for DQO RX PTYPE map

Unlike GQI, DQO RX descriptors do not contain the L3 and L4 type of the
packet. L3 and L4 types are necessary in order to set the hash and csum
on RX SKBs correctly.

DQO RX descriptors instead contain a 10 bit PTYPE index. The PTYPE map
enables the device to tell the driver how to map from PTYPE index to
L3/L4 type.

The device doesn't provide any guarantees about the range of possible
PTYPEs, so we just use a 1024 entry array to implement a fast mapping
structure.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h        | 15 +++++++
 drivers/net/ethernet/google/gve/gve_adminq.c | 45 +++++++++++++++++++-
 drivers/net/ethernet/google/gve/gve_adminq.h | 44 ++++++++++++++++++-
 drivers/net/ethernet/google/gve/gve_main.c   | 25 +++++++++++
 4 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 9045b86279cb..e32730f50bf9 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -11,6 +11,7 @@
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/u64_stats_sync.h>
+
 #include "gve_desc.h"
 
 #ifndef PCI_VENDOR_ID_GOOGLE
@@ -40,6 +41,9 @@
 
 #define GVE_DATA_SLOT_ADDR_PAGE_MASK (~(PAGE_SIZE - 1))
 
+/* PTYPEs are always 10 bits. */
+#define GVE_NUM_PTYPES	1024
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
 	struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -199,6 +203,15 @@ struct gve_options_dqo_rda {
 	u16 rx_buff_ring_entries; /* number of rx_buff descriptors */
 };
 
+struct gve_ptype {
+	u8 l3_type;  /* `gve_l3_type` in gve_adminq.h */
+	u8 l4_type;  /* `gve_l4_type` in gve_adminq.h */
+};
+
+struct gve_ptype_lut {
+	struct gve_ptype ptypes[GVE_NUM_PTYPES];
+};
+
 /* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
  * when the entire configure_device_resources command is zeroed out and the
  * queue_format is not specified.
@@ -266,6 +279,7 @@ struct gve_priv {
 	u32 adminq_set_driver_parameter_cnt;
 	u32 adminq_report_stats_cnt;
 	u32 adminq_report_link_speed_cnt;
+	u32 adminq_get_ptype_map_cnt;
 
 	/* Global stats */
 	u32 interface_up_cnt; /* count of times interface turned up since last reset */
@@ -292,6 +306,7 @@ struct gve_priv {
 	u64 link_speed;
 
 	struct gve_options_dqo_rda options_dqo_rda;
+	struct gve_ptype_lut *ptype_lut_dqo;
 
 	enum gve_queue_format queue_format;
 };
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 9efa60ce34e0..7d8d354f67e2 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -176,6 +176,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
 	priv->adminq_set_driver_parameter_cnt = 0;
 	priv->adminq_report_stats_cnt = 0;
 	priv->adminq_report_link_speed_cnt = 0;
+	priv->adminq_get_ptype_map_cnt = 0;
 
 	/* Setup Admin queue with the device */
 	iowrite32be(priv->adminq_bus_addr / PAGE_SIZE,
@@ -381,6 +382,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
 	case GVE_ADMINQ_REPORT_LINK_SPEED:
 		priv->adminq_report_link_speed_cnt++;
 		break;
+	case GVE_ADMINQ_GET_PTYPE_MAP:
+		priv->adminq_get_ptype_map_cnt++;
+		break;
 	default:
 		dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
 	}
@@ -393,7 +397,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
  * The caller is also responsible for making sure there are no commands
  * waiting to be executed.
  */
-static int gve_adminq_execute_cmd(struct gve_priv *priv, union gve_adminq_command *cmd_orig)
+static int gve_adminq_execute_cmd(struct gve_priv *priv,
+				  union gve_adminq_command *cmd_orig)
 {
 	u32 tail, head;
 	int err;
@@ -827,3 +832,41 @@ int gve_adminq_report_link_speed(struct gve_priv *priv)
 			  link_speed_region_bus);
 	return err;
 }
+
+int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
+				 struct gve_ptype_lut *ptype_lut)
+{
+	struct gve_ptype_map *ptype_map;
+	union gve_adminq_command cmd;
+	dma_addr_t ptype_map_bus;
+	int err = 0;
+	int i;
+
+	memset(&cmd, 0, sizeof(cmd));
+	ptype_map = dma_alloc_coherent(&priv->pdev->dev, sizeof(*ptype_map),
+				       &ptype_map_bus, GFP_KERNEL);
+	if (!ptype_map)
+		return -ENOMEM;
+
+	cmd.opcode = cpu_to_be32(GVE_ADMINQ_GET_PTYPE_MAP);
+	cmd.get_ptype_map = (struct gve_adminq_get_ptype_map) {
+		.ptype_map_len = cpu_to_be64(sizeof(*ptype_map)),
+		.ptype_map_addr = cpu_to_be64(ptype_map_bus),
+	};
+
+	err = gve_adminq_execute_cmd(priv, &cmd);
+	if (err)
+		goto err;
+
+	/* Populate ptype_lut. */
+	for (i = 0; i < GVE_NUM_PTYPES; i++) {
+		ptype_lut->ptypes[i].l3_type =
+			ptype_map->ptypes[i].l3_type;
+		ptype_lut->ptypes[i].l4_type =
+			ptype_map->ptypes[i].l4_type;
+	}
+err:
+	dma_free_coherent(&priv->pdev->dev, sizeof(*ptype_map), ptype_map,
+			  ptype_map_bus);
+	return err;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 4b1485b11a7b..62a7e96af715 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -22,7 +22,8 @@ enum gve_adminq_opcodes {
 	GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES	= 0x9,
 	GVE_ADMINQ_SET_DRIVER_PARAMETER		= 0xB,
 	GVE_ADMINQ_REPORT_STATS			= 0xC,
-	GVE_ADMINQ_REPORT_LINK_SPEED	= 0xD
+	GVE_ADMINQ_REPORT_LINK_SPEED		= 0xD,
+	GVE_ADMINQ_GET_PTYPE_MAP		= 0xE,
 };
 
 /* Admin queue status codes */
@@ -266,6 +267,41 @@ enum gve_stat_names {
 	RX_DROPS_INVALID_CHECKSUM	= 68,
 };
 
+enum gve_l3_type {
+	/* Must be zero so zero initialized LUT is unknown. */
+	GVE_L3_TYPE_UNKNOWN = 0,
+	GVE_L3_TYPE_OTHER,
+	GVE_L3_TYPE_IPV4,
+	GVE_L3_TYPE_IPV6,
+};
+
+enum gve_l4_type {
+	/* Must be zero so zero initialized LUT is unknown. */
+	GVE_L4_TYPE_UNKNOWN = 0,
+	GVE_L4_TYPE_OTHER,
+	GVE_L4_TYPE_TCP,
+	GVE_L4_TYPE_UDP,
+	GVE_L4_TYPE_ICMP,
+	GVE_L4_TYPE_SCTP,
+};
+
+/* These are control path types for PTYPE which are the same as the data path
+ * types.
+ */
+struct gve_ptype_entry {
+	u8 l3_type;
+	u8 l4_type;
+};
+
+struct gve_ptype_map {
+	struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */
+};
+
+struct gve_adminq_get_ptype_map {
+	__be64 ptype_map_len;
+	__be64 ptype_map_addr;
+};
+
 union gve_adminq_command {
 	struct {
 		__be32 opcode;
@@ -283,6 +319,7 @@ union gve_adminq_command {
 			struct gve_adminq_set_driver_parameter set_driver_param;
 			struct gve_adminq_report_stats report_stats;
 			struct gve_adminq_report_link_speed report_link_speed;
+			struct gve_adminq_get_ptype_map get_ptype_map;
 		};
 	};
 	u8 reserved[64];
@@ -311,4 +348,9 @@ int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
 int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
 			    dma_addr_t stats_report_addr, u64 interval);
 int gve_adminq_report_link_speed(struct gve_priv *priv);
+
+struct gve_ptype_lut;
+int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
+				 struct gve_ptype_lut *ptype_lut);
+
 #endif /* _GVE_ADMINQ_H */
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index aa0bff03c6c8..8cc0ac061c93 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -346,6 +346,22 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 		err = -ENXIO;
 		goto abort_with_stats_report;
 	}
+
+	if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
+					       GFP_KERNEL);
+		if (!priv->ptype_lut_dqo) {
+			err = -ENOMEM;
+			goto abort_with_stats_report;
+		}
+		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
+		if (err) {
+			dev_err(&priv->pdev->dev,
+				"Failed to get ptype map: err=%d\n", err);
+			goto abort_with_ptype_lut;
+		}
+	}
+
 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
 				      priv->stats_report_bus,
 				      GVE_STATS_REPORT_TIMER_PERIOD);
@@ -354,12 +370,17 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 			"Failed to report stats: err=%d\n", err);
 	gve_set_device_resources_ok(priv);
 	return 0;
+
+abort_with_ptype_lut:
+	kvfree(priv->ptype_lut_dqo);
+	priv->ptype_lut_dqo = NULL;
 abort_with_stats_report:
 	gve_free_stats_report(priv);
 abort_with_ntfy_blocks:
 	gve_free_notify_blocks(priv);
 abort_with_counter:
 	gve_free_counter_array(priv);
+
 	return err;
 }
 
@@ -386,6 +407,10 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
 			gve_trigger_reset(priv);
 		}
 	}
+
+	kvfree(priv->ptype_lut_dqo);
+	priv->ptype_lut_dqo = NULL;
+
 	gve_free_counter_array(priv);
 	gve_free_notify_blocks(priv);
 	gve_free_stats_report(priv);

From 223198183ff1fc099184081f997bf1f710f1ef72 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:25 -0700
Subject: [PATCH 2850/3292] gve: Add dqo descriptors

General description of rings and descriptors:

TX ring is used for sending TX packet buffers to the NIC. It has the
following descriptors:
- `gve_tx_pkt_desc_dqo` - Data buffer descriptor
- `gve_tx_tso_context_desc_dqo` - TSO context descriptor
- `gve_tx_general_context_desc_dqo` - Generic metadata descriptor

Metadata is a collection of 12 bytes. We define `gve_tx_metadata_dqo`
which represents the logical interpetation of the metadata bytes. It's
helpful to define this structure because the metadata bytes exist in
multiple descriptor types (including `gve_tx_tso_context_desc_dqo`),
and the device requires same field has the same value in all
descriptors.

The TX completion ring is used to receive completions from the NIC.
Having a separate ring allows for completions to be out of order. The
completion descriptor `gve_tx_compl_desc` has several different types,
most important are packet and descriptor completions. Descriptor
completions are used to notify the driver when descriptors sent on the
TX ring are done being consumed. The descriptor completion is only used
to signal that space is cleared in the TX ring. A packet completion will
be received when a packet transmitted on the TX queue is done being
transmitted.

In addition there are "miss" and "reinjection" completions. The device
implements a "flow-miss model". Most packets will simply receive a
packet completion. The flow-miss system may choose to process a packet
based on its contents. A TX packet which experiences a flow miss would
receive a miss completion followed by a later reinjection completion.
The miss-completion is received when the packet starts to be processed
by the flow-miss system and the reinjection completion is received when
the flow-miss system completes processing the packet and sends it on the
wire.

The RX buffer ring is used to send buffers to HW via the
`gve_rx_desc_dqo` descriptor.

Received packets are put into the RX queue by the device, which
populates the `gve_rx_compl_desc_dqo` descriptor. The RX descriptors
refer to buffers posted by the buffer queue. Received buffers may be
returned out of order, such as when HW LRO is enabled.

Important concepts:
- "TX" and "RX buffer" queues, which send descriptors to the device, use
  MMIO doorbells to notify the device of new descriptors.

- "RX" and "TX completion" queues, which receive descriptors from the
  device, use a "generation bit" to know when a descriptor was populated
  by the device. The driver initializes all bits with the "current
  generation". The device will populate received descriptors with the
  "next generation" which is inverted from the current generation. When
  the ring wraps, the current/next generation are swapped.

- It's the driver's responsibility to ensure that the RX and TX
  completion queues are not overrun. This can be accomplished by
  limiting the number of descriptors posted to HW.

- TX packets have a 16 bit completion_tag and RX buffers have a 16 bit
  buffer_id. These will be returned on the TX completion and RX queues
  respectively to let the driver know which packet/buffer was completed.

Bitfields are used to describe descriptor fields. This notation is more
concise and readable than shift-and-mask. It is possible because the
driver is restricted to little endian platforms.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/Kconfig           |   2 +-
 .../net/ethernet/google/gve/gve_desc_dqo.h    | 256 ++++++++++++++++++
 2 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/google/gve/gve_desc_dqo.h

diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig
index b8f04d052fda..8641a00f8e63 100644
--- a/drivers/net/ethernet/google/Kconfig
+++ b/drivers/net/ethernet/google/Kconfig
@@ -17,7 +17,7 @@ if NET_VENDOR_GOOGLE
 
 config GVE
 	tristate "Google Virtual NIC (gVNIC) support"
-	depends on PCI_MSI
+	depends on (PCI_MSI && (X86 || CPU_LITTLE_ENDIAN))
 	help
 	  This driver supports Google Virtual NIC (gVNIC)"
 
diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h
new file mode 100644
index 000000000000..e8fe9adef7f2
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+/* GVE DQO Descriptor formats */
+
+#ifndef _GVE_DESC_DQO_H_
+#define _GVE_DESC_DQO_H_
+
+#include <linux/build_bug.h>
+
+#define GVE_TX_MAX_HDR_SIZE_DQO 255
+#define GVE_TX_MIN_TSO_MSS_DQO 88
+
+#ifndef __LITTLE_ENDIAN_BITFIELD
+#error "Only little endian supported"
+#endif
+
+/* Basic TX descriptor (DTYPE 0x0C) */
+struct gve_tx_pkt_desc_dqo {
+	__le64 buf_addr;
+
+	/* Must be GVE_TX_PKT_DESC_DTYPE_DQO (0xc) */
+	u8 dtype: 5;
+
+	/* Denotes the last descriptor of a packet. */
+	u8 end_of_packet: 1;
+	u8 checksum_offload_enable: 1;
+
+	/* If set, will generate a descriptor completion for this descriptor. */
+	u8 report_event: 1;
+	u8 reserved0;
+	__le16 reserved1;
+
+	/* The TX completion associated with this packet will contain this tag.
+	 */
+	__le16 compl_tag;
+	u16 buf_size: 14;
+	u16 reserved2: 2;
+} __packed;
+static_assert(sizeof(struct gve_tx_pkt_desc_dqo) == 16);
+
+#define GVE_TX_PKT_DESC_DTYPE_DQO 0xc
+#define GVE_TX_MAX_BUF_SIZE_DQO ((16 * 1024) - 1)
+
+/* Maximum number of data descriptors allowed per packet, or per-TSO segment. */
+#define GVE_TX_MAX_DATA_DESCS 10
+
+/* Min gap between tail and head to avoid cacheline overlap */
+#define GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP 4
+
+/* "report_event" on TX packet descriptors may only be reported on the last
+ * descriptor of a TX packet, and they must be spaced apart with at least this
+ * value.
+ */
+#define GVE_TX_MIN_RE_INTERVAL 32
+
+struct gve_tx_context_cmd_dtype {
+	u8 dtype: 5;
+	u8 tso: 1;
+	u8 reserved1: 2;
+
+	u8 reserved2;
+};
+
+static_assert(sizeof(struct gve_tx_context_cmd_dtype) == 2);
+
+/* TX Native TSO Context DTYPE (0x05)
+ *
+ * "flex" fields allow the driver to send additional packet context to HW.
+ */
+struct gve_tx_tso_context_desc_dqo {
+	/* The L4 payload bytes that should be segmented. */
+	u32 tso_total_len: 24;
+	u32 flex10: 8;
+
+	/* Max segment size in TSO excluding headers. */
+	u16 mss: 14;
+	u16 reserved: 2;
+
+	u8 header_len; /* Header length to use for TSO offload */
+	u8 flex11;
+	struct gve_tx_context_cmd_dtype cmd_dtype;
+	u8 flex0;
+	u8 flex5;
+	u8 flex6;
+	u8 flex7;
+	u8 flex8;
+	u8 flex9;
+} __packed;
+static_assert(sizeof(struct gve_tx_tso_context_desc_dqo) == 16);
+
+#define GVE_TX_TSO_CTX_DESC_DTYPE_DQO 0x5
+
+/* General context descriptor for sending metadata. */
+struct gve_tx_general_context_desc_dqo {
+	u8 flex4;
+	u8 flex5;
+	u8 flex6;
+	u8 flex7;
+	u8 flex8;
+	u8 flex9;
+	u8 flex10;
+	u8 flex11;
+	struct gve_tx_context_cmd_dtype cmd_dtype;
+	u16 reserved;
+	u8 flex0;
+	u8 flex1;
+	u8 flex2;
+	u8 flex3;
+} __packed;
+static_assert(sizeof(struct gve_tx_general_context_desc_dqo) == 16);
+
+#define GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO 0x4
+
+/* Logical structure of metadata which is packed into context descriptor flex
+ * fields.
+ */
+struct gve_tx_metadata_dqo {
+	union {
+		struct {
+			u8 version;
+
+			/* If `skb->l4_hash` is set, this value should be
+			 * derived from `skb->hash`.
+			 *
+			 * A zero value means no l4_hash was associated with the
+			 * skb.
+			 */
+			u16 path_hash: 15;
+
+			/* Should be set to 1 if the flow associated with the
+			 * skb had a rehash from the TCP stack.
+			 */
+			u16 rehash_event: 1;
+		}  __packed;
+		u8 bytes[12];
+	};
+}  __packed;
+static_assert(sizeof(struct gve_tx_metadata_dqo) == 12);
+
+#define GVE_TX_METADATA_VERSION_DQO 0
+
+/* TX completion descriptor */
+struct gve_tx_compl_desc {
+	/* For types 0-4 this is the TX queue ID associated with this
+	 * completion.
+	 */
+	u16 id: 11;
+
+	/* See: GVE_COMPL_TYPE_DQO* */
+	u16 type: 3;
+	u16 reserved0: 1;
+
+	/* Flipped by HW to notify the descriptor is populated. */
+	u16 generation: 1;
+	union {
+		/* For descriptor completions, this is the last index fetched
+		 * by HW + 1.
+		 */
+		__le16 tx_head;
+
+		/* For packet completions, this is the completion tag set on the
+		 * TX packet descriptors.
+		 */
+		__le16 completion_tag;
+	};
+	__le32 reserved1;
+} __packed;
+static_assert(sizeof(struct gve_tx_compl_desc) == 8);
+
+#define GVE_COMPL_TYPE_DQO_PKT 0x2 /* Packet completion */
+#define GVE_COMPL_TYPE_DQO_DESC 0x4 /* Descriptor completion */
+#define GVE_COMPL_TYPE_DQO_MISS 0x1 /* Miss path completion */
+#define GVE_COMPL_TYPE_DQO_REINJECTION 0x3 /* Re-injection completion */
+
+/* Descriptor to post buffers to HW on buffer queue. */
+struct gve_rx_desc_dqo {
+	__le16 buf_id; /* ID returned in Rx completion descriptor */
+	__le16 reserved0;
+	__le32 reserved1;
+	__le64 buf_addr; /* DMA address of the buffer */
+	__le64 header_buf_addr;
+	__le64 reserved2;
+} __packed;
+static_assert(sizeof(struct gve_rx_desc_dqo) == 32);
+
+/* Descriptor for HW to notify SW of new packets received on RX queue. */
+struct gve_rx_compl_desc_dqo {
+	/* Must be 1 */
+	u8 rxdid: 4;
+	u8 reserved0: 4;
+
+	/* Packet originated from this system rather than the network. */
+	u8 loopback: 1;
+	/* Set when IPv6 packet contains a destination options header or routing
+	 * header.
+	 */
+	u8 ipv6_ex_add: 1;
+	/* Invalid packet was received. */
+	u8 rx_error: 1;
+	u8 reserved1: 5;
+
+	u16 packet_type: 10;
+	u16 ip_hdr_err: 1;
+	u16 udp_len_err: 1;
+	u16 raw_cs_invalid: 1;
+	u16 reserved2: 3;
+
+	u16 packet_len: 14;
+	/* Flipped by HW to notify the descriptor is populated. */
+	u16 generation: 1;
+	/* Should be zero. */
+	u16 buffer_queue_id: 1;
+
+	u16 header_len: 10;
+	u16 rsc: 1;
+	u16 split_header: 1;
+	u16 reserved3: 4;
+
+	u8 descriptor_done: 1;
+	u8 end_of_packet: 1;
+	u8 header_buffer_overflow: 1;
+	u8 l3_l4_processed: 1;
+	u8 csum_ip_err: 1;
+	u8 csum_l4_err: 1;
+	u8 csum_external_ip_err: 1;
+	u8 csum_external_udp_err: 1;
+
+	u8 status_error1;
+
+	__le16 reserved5;
+	__le16 buf_id; /* Buffer ID which was sent on the buffer queue. */
+
+	union {
+		/* Packet checksum. */
+		__le16 raw_cs;
+		/* Segment length for RSC packets. */
+		__le16 rsc_seg_len;
+	};
+	__le32 hash;
+	__le32 reserved6;
+	__le64 reserved7;
+} __packed;
+
+static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32);
+
+/* Ringing the doorbell too often can hurt performance.
+ *
+ * HW requires this value to be at least 8.
+ */
+#define GVE_RX_BUF_THRESH_DQO 32
+
+#endif /* _GVE_DESC_DQO_H_ */

From a4aa1f1e69df5612bcc0d7cf2ca23b9fae79941b Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:26 -0700
Subject: [PATCH 2851/3292] gve: Add DQO fields for core data structures

- Add new DQO datapath structures:
  - `gve_rx_buf_queue_dqo`
  - `gve_rx_compl_queue_dqo`
  - `gve_rx_buf_state_dqo`
  - `gve_tx_desc_dqo`
  - `gve_tx_pending_packet_dqo`

- Incorporate these into the existing ring data structures:
  - `gve_rx_ring`
  - `gve_tx_ring`

Noteworthy mentions:

- `gve_rx_buf_state` represents an RX buffer which was posted to HW.
  Each RX queue has an array of these objects and the index into the
  array is used as the buffer_id when posted to HW.

- `gve_tx_pending_packet_dqo` is treated similarly for TX queues. The
  completion_tag is the index into the array.

- These two structures have links for linked lists which are represented
  by 16b indexes into a contiguous array of these structures.
  This reduces memory footprint compared to 64b pointers.

- We use unions for the writeable datapath structures to reduce cache
  footprint. GQI specific members will renamed like DQO members in a
  future patch.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h | 262 ++++++++++++++++++++++++--
 1 file changed, 251 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index e32730f50bf9..5bfab1ac20d1 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -13,6 +13,7 @@
 #include <linux/u64_stats_sync.h>
 
 #include "gve_desc.h"
+#include "gve_desc_dqo.h"
 
 #ifndef PCI_VENDOR_ID_GOOGLE
 #define PCI_VENDOR_ID_GOOGLE	0x1ae0
@@ -80,17 +81,117 @@ struct gve_rx_data_queue {
 
 struct gve_priv;
 
-/* An RX ring that contains a power-of-two sized desc and data ring. */
+/* RX buffer queue for posting buffers to HW.
+ * Each RX (completion) queue has a corresponding buffer queue.
+ */
+struct gve_rx_buf_queue_dqo {
+	struct gve_rx_desc_dqo *desc_ring;
+	dma_addr_t bus;
+	u32 head; /* Pointer to start cleaning buffers at. */
+	u32 tail; /* Last posted buffer index + 1 */
+	u32 mask; /* Mask for indices to the size of the ring */
+};
+
+/* RX completion queue to receive packets from HW. */
+struct gve_rx_compl_queue_dqo {
+	struct gve_rx_compl_desc_dqo *desc_ring;
+	dma_addr_t bus;
+
+	/* Number of slots which did not have a buffer posted yet. We should not
+	 * post more buffers than the queue size to avoid HW overrunning the
+	 * queue.
+	 */
+	int num_free_slots;
+
+	/* HW uses a "generation bit" to notify SW of new descriptors. When a
+	 * descriptor's generation bit is different from the current generation,
+	 * that descriptor is ready to be consumed by SW.
+	 */
+	u8 cur_gen_bit;
+
+	/* Pointer into desc_ring where the next completion descriptor will be
+	 * received.
+	 */
+	u32 head;
+	u32 mask; /* Mask for indices to the size of the ring */
+};
+
+/* Stores state for tracking buffers posted to HW */
+struct gve_rx_buf_state_dqo {
+	/* The page posted to HW. */
+	struct gve_rx_slot_page_info page_info;
+
+	/* The DMA address corresponding to `page_info`. */
+	dma_addr_t addr;
+
+	/* Last offset into the page when it only had a single reference, at
+	 * which point every other offset is free to be reused.
+	 */
+	u32 last_single_ref_offset;
+
+	/* Linked list index to next element in the list, or -1 if none */
+	s16 next;
+};
+
+/* `head` and `tail` are indices into an array, or -1 if empty. */
+struct gve_index_list {
+	s16 head;
+	s16 tail;
+};
+
+/* Contains datapath state used to represent an RX queue. */
 struct gve_rx_ring {
 	struct gve_priv *gve;
-	struct gve_rx_desc_queue desc;
-	struct gve_rx_data_queue data;
+	union {
+		/* GQI fields */
+		struct {
+			struct gve_rx_desc_queue desc;
+			struct gve_rx_data_queue data;
+
+			/* threshold for posting new buffs and descs */
+			u32 db_threshold;
+		};
+
+		/* DQO fields. */
+		struct {
+			struct gve_rx_buf_queue_dqo bufq;
+			struct gve_rx_compl_queue_dqo complq;
+
+			struct gve_rx_buf_state_dqo *buf_states;
+			u16 num_buf_states;
+
+			/* Linked list of gve_rx_buf_state_dqo. Index into
+			 * buf_states, or -1 if empty.
+			 */
+			s16 free_buf_states;
+
+			/* Linked list of gve_rx_buf_state_dqo. Indexes into
+			 * buf_states, or -1 if empty.
+			 *
+			 * This list contains buf_states which are pointing to
+			 * valid buffers.
+			 *
+			 * We use a FIFO here in order to increase the
+			 * probability that buffers can be reused by increasing
+			 * the time between usages.
+			 */
+			struct gve_index_list recycled_buf_states;
+
+			/* Linked list of gve_rx_buf_state_dqo. Indexes into
+			 * buf_states, or -1 if empty.
+			 *
+			 * This list contains buf_states which have buffers
+			 * which cannot be reused yet.
+			 */
+			struct gve_index_list used_buf_states;
+		} dqo;
+	};
+
 	u64 rbytes; /* free-running bytes received */
 	u64 rpackets; /* free-running packets received */
 	u32 cnt; /* free-running total number of completed packets */
 	u32 fill_cnt; /* free-running total number of descs and buffs posted */
 	u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
-	u32 db_threshold; /* threshold for posting new buffs and descs */
 	u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
 	u64 rx_copied_pkt; /* free-running total number of copied packets */
 	u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
@@ -141,23 +242,161 @@ struct gve_tx_fifo {
 	struct gve_queue_page_list *qpl; /* QPL mapped into this FIFO */
 };
 
-/* A TX ring that contains a power-of-two sized desc ring and a FIFO buffer */
+/* TX descriptor for DQO format */
+union gve_tx_desc_dqo {
+	struct gve_tx_pkt_desc_dqo pkt;
+	struct gve_tx_tso_context_desc_dqo tso_ctx;
+	struct gve_tx_general_context_desc_dqo general_ctx;
+};
+
+enum gve_packet_state {
+	/* Packet is in free list, available to be allocated.
+	 * This should always be zero since state is not explicitly initialized.
+	 */
+	GVE_PACKET_STATE_UNALLOCATED,
+	/* Packet is expecting a regular data completion or miss completion */
+	GVE_PACKET_STATE_PENDING_DATA_COMPL,
+	/* Packet has received a miss completion and is expecting a
+	 * re-injection completion.
+	 */
+	GVE_PACKET_STATE_PENDING_REINJECT_COMPL,
+	/* No valid completion received within the specified timeout. */
+	GVE_PACKET_STATE_TIMED_OUT_COMPL,
+};
+
+struct gve_tx_pending_packet_dqo {
+	struct sk_buff *skb; /* skb for this packet */
+
+	/* 0th element corresponds to the linear portion of `skb`, should be
+	 * unmapped with `dma_unmap_single`.
+	 *
+	 * All others correspond to `skb`'s frags and should be unmapped with
+	 * `dma_unmap_page`.
+	 */
+	struct gve_tx_dma_buf bufs[MAX_SKB_FRAGS + 1];
+	u16 num_bufs;
+
+	/* Linked list index to next element in the list, or -1 if none */
+	s16 next;
+
+	/* Linked list index to prev element in the list, or -1 if none.
+	 * Used for tracking either outstanding miss completions or prematurely
+	 * freed packets.
+	 */
+	s16 prev;
+
+	/* Identifies the current state of the packet as defined in
+	 * `enum gve_packet_state`.
+	 */
+	u8 state;
+
+	/* If packet is an outstanding miss completion, then the packet is
+	 * freed if the corresponding re-injection completion is not received
+	 * before kernel jiffies exceeds timeout_jiffies.
+	 */
+	unsigned long timeout_jiffies;
+};
+
+/* Contains datapath state used to represent a TX queue. */
 struct gve_tx_ring {
 	/* Cacheline 0 -- Accessed & dirtied during transmit */
-	struct gve_tx_fifo tx_fifo;
-	u32 req; /* driver tracked head pointer */
-	u32 done; /* driver tracked tail pointer */
+	union {
+		/* GQI fields */
+		struct {
+			struct gve_tx_fifo tx_fifo;
+			u32 req; /* driver tracked head pointer */
+			u32 done; /* driver tracked tail pointer */
+		};
+
+		/* DQO fields. */
+		struct {
+			/* Linked list of gve_tx_pending_packet_dqo. Index into
+			 * pending_packets, or -1 if empty.
+			 *
+			 * This is a consumer list owned by the TX path. When it
+			 * runs out, the producer list is stolen from the
+			 * completion handling path
+			 * (dqo_compl.free_pending_packets).
+			 */
+			s16 free_pending_packets;
+
+			/* Cached value of `dqo_compl.hw_tx_head` */
+			u32 head;
+			u32 tail; /* Last posted buffer index + 1 */
+
+			/* Index of the last descriptor with "report event" bit
+			 * set.
+			 */
+			u32 last_re_idx;
+		} dqo_tx;
+	};
 
 	/* Cacheline 1 -- Accessed & dirtied during gve_clean_tx_done */
-	__be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
+	union {
+		/* GQI fields */
+		struct {
+			/* NIC tail pointer */
+			__be32 last_nic_done;
+		};
+
+		/* DQO fields. */
+		struct {
+			u32 head; /* Last read on compl_desc */
+
+			/* Tracks the current gen bit of compl_q */
+			u8 cur_gen_bit;
+
+			/* Linked list of gve_tx_pending_packet_dqo. Index into
+			 * pending_packets, or -1 if empty.
+			 *
+			 * This is the producer list, owned by the completion
+			 * handling path. When the consumer list
+			 * (dqo_tx.free_pending_packets) is runs out, this list
+			 * will be stolen.
+			 */
+			atomic_t free_pending_packets;
+
+			/* Last TX ring index fetched by HW */
+			atomic_t hw_tx_head;
+
+			/* List to track pending packets which received a miss
+			 * completion but not a corresponding reinjection.
+			 */
+			struct gve_index_list miss_completions;
+
+			/* List to track pending packets that were completed
+			 * before receiving a valid completion because they
+			 * reached a specified timeout.
+			 */
+			struct gve_index_list timed_out_completions;
+		} dqo_compl;
+	} ____cacheline_aligned;
 	u64 pkt_done; /* free-running - total packets completed */
 	u64 bytes_done; /* free-running - total bytes completed */
 	u64 dropped_pkt; /* free-running - total packets dropped */
 	u64 dma_mapping_error; /* count of dma mapping errors */
 
 	/* Cacheline 2 -- Read-mostly fields */
-	union gve_tx_desc *desc ____cacheline_aligned;
-	struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
+	union {
+		/* GQI fields */
+		struct {
+			union gve_tx_desc *desc;
+
+			/* Maps 1:1 to a desc */
+			struct gve_tx_buffer_state *info;
+		};
+
+		/* DQO fields. */
+		struct {
+			union gve_tx_desc_dqo *tx_ring;
+			struct gve_tx_compl_desc *compl_ring;
+
+			struct gve_tx_pending_packet_dqo *pending_packets;
+			s16 num_pending_packets;
+
+			u32 complq_mask; /* complq size is complq_mask + 1 */
+		} dqo;
+	} ____cacheline_aligned;
 	struct netdev_queue *netdev_txq;
 	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
 	struct device *dev;
@@ -171,6 +410,7 @@ struct gve_tx_ring {
 	u32 ntfy_id; /* notification block index */
 	dma_addr_t bus; /* dma address of the descr ring */
 	dma_addr_t q_resources_bus; /* dma address of the queue resources */
+	dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
 	struct u64_stats_sync statss; /* sync stats for 32bit archs */
 } ____cacheline_aligned;
 

From 1f6228e459f8bcfcda2f6a157bbd1ceb57b566f4 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:27 -0700
Subject: [PATCH 2852/3292] gve: Update adminq commands to support DQO queues

DQO queue creation requires additional parameters:
- TX completion/RX buffer queue size
- TX completion/RX buffer queue address
- TX/RX queue size
- RX buffer size

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h         |  3 +
 drivers/net/ethernet/google/gve/gve_adminq.c  | 63 ++++++++++++-------
 drivers/net/ethernet/google/gve/gve_adminq.h  | 18 ++++--
 drivers/net/ethernet/google/gve/gve_ethtool.c |  9 ++-
 4 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 5bfab1ac20d1..8a2a8d125090 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -548,6 +548,9 @@ struct gve_priv {
 	struct gve_options_dqo_rda options_dqo_rda;
 	struct gve_ptype_lut *ptype_lut_dqo;
 
+	/* Must be a power of two. */
+	int data_buffer_size_dqo;
+
 	enum gve_queue_format queue_format;
 };
 
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 7d8d354f67e2..cf017a499119 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -443,6 +443,7 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
 		.irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])),
 		.ntfy_blk_msix_base_idx =
 					cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
+		.queue_format = priv->queue_format,
 	};
 
 	return gve_adminq_execute_cmd(priv, &cmd);
@@ -462,28 +463,32 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
 {
 	struct gve_tx_ring *tx = &priv->tx[queue_index];
 	union gve_adminq_command cmd;
-	u32 qpl_id;
-	int err;
 
-	qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
-		 GVE_RAW_ADDRESSING_QPL_ID : tx->tx_fifo.qpl->id;
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
 	cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
 		.queue_id = cpu_to_be32(queue_index),
-		.reserved = 0,
 		.queue_resources_addr =
 			cpu_to_be64(tx->q_resources_bus),
 		.tx_ring_addr = cpu_to_be64(tx->bus),
-		.queue_page_list_id = cpu_to_be32(qpl_id),
 		.ntfy_id = cpu_to_be32(tx->ntfy_id),
 	};
 
-	err = gve_adminq_issue_cmd(priv, &cmd);
-	if (err)
-		return err;
+	if (gve_is_gqi(priv)) {
+		u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
+			GVE_RAW_ADDRESSING_QPL_ID : tx->tx_fifo.qpl->id;
 
-	return 0;
+		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
+	} else {
+		cmd.create_tx_queue.tx_ring_size =
+			cpu_to_be16(priv->tx_desc_cnt);
+		cmd.create_tx_queue.tx_comp_ring_addr =
+			cpu_to_be64(tx->complq_bus_dqo);
+		cmd.create_tx_queue.tx_comp_ring_size =
+			cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
+	}
+
+	return gve_adminq_issue_cmd(priv, &cmd);
 }
 
 int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
@@ -504,29 +509,41 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
 {
 	struct gve_rx_ring *rx = &priv->rx[queue_index];
 	union gve_adminq_command cmd;
-	u32 qpl_id;
-	int err;
 
-	qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
-		 GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
 	cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
 		.queue_id = cpu_to_be32(queue_index),
-		.index = cpu_to_be32(queue_index),
-		.reserved = 0,
 		.ntfy_id = cpu_to_be32(rx->ntfy_id),
 		.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
-		.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
-		.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
-		.queue_page_list_id = cpu_to_be32(qpl_id),
 	};
 
-	err = gve_adminq_issue_cmd(priv, &cmd);
-	if (err)
-		return err;
+	if (gve_is_gqi(priv)) {
+		u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
+			GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
 
-	return 0;
+		cmd.create_rx_queue.rx_desc_ring_addr =
+			cpu_to_be64(rx->desc.bus),
+		cmd.create_rx_queue.rx_data_ring_addr =
+			cpu_to_be64(rx->data.data_bus),
+		cmd.create_rx_queue.index = cpu_to_be32(queue_index);
+		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
+	} else {
+		cmd.create_rx_queue.rx_ring_size =
+			cpu_to_be16(priv->rx_desc_cnt);
+		cmd.create_rx_queue.rx_desc_ring_addr =
+			cpu_to_be64(rx->dqo.complq.bus);
+		cmd.create_rx_queue.rx_data_ring_addr =
+			cpu_to_be64(rx->dqo.bufq.bus);
+		cmd.create_rx_queue.packet_buffer_size =
+			cpu_to_be16(priv->data_buffer_size_dqo);
+		cmd.create_rx_queue.rx_buff_ring_size =
+			cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries);
+		cmd.create_rx_queue.enable_rsc =
+			!!(priv->dev->features & NETIF_F_LRO);
+	}
+
+	return gve_adminq_issue_cmd(priv, &cmd);
 }
 
 int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 62a7e96af715..47c3d8f313fc 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -139,9 +139,11 @@ struct gve_adminq_configure_device_resources {
 	__be32 num_irq_dbs;
 	__be32 irq_db_stride;
 	__be32 ntfy_blk_msix_base_idx;
+	u8 queue_format;
+	u8 padding[7];
 };
 
-static_assert(sizeof(struct gve_adminq_configure_device_resources) == 32);
+static_assert(sizeof(struct gve_adminq_configure_device_resources) == 40);
 
 struct gve_adminq_register_page_list {
 	__be32 page_list_id;
@@ -166,9 +168,13 @@ struct gve_adminq_create_tx_queue {
 	__be64 tx_ring_addr;
 	__be32 queue_page_list_id;
 	__be32 ntfy_id;
+	__be64 tx_comp_ring_addr;
+	__be16 tx_ring_size;
+	__be16 tx_comp_ring_size;
+	u8 padding[4];
 };
 
-static_assert(sizeof(struct gve_adminq_create_tx_queue) == 32);
+static_assert(sizeof(struct gve_adminq_create_tx_queue) == 48);
 
 struct gve_adminq_create_rx_queue {
 	__be32 queue_id;
@@ -179,10 +185,14 @@ struct gve_adminq_create_rx_queue {
 	__be64 rx_desc_ring_addr;
 	__be64 rx_data_ring_addr;
 	__be32 queue_page_list_id;
-	u8 padding[4];
+	__be16 rx_ring_size;
+	__be16 packet_buffer_size;
+	__be16 rx_buff_ring_size;
+	u8 enable_rsc;
+	u8 padding[5];
 };
 
-static_assert(sizeof(struct gve_adminq_create_rx_queue) == 48);
+static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56);
 
 /* Queue resources that are shared with the device */
 struct gve_queue_resources {
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 5fb05cf36b49..ccaf68562312 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Google virtual Ethernet (gve) driver
  *
- * Copyright (C) 2015-2019 Google, Inc.
+ * Copyright (C) 2015-2021 Google, Inc.
  */
 
 #include <linux/ethtool.h>
@@ -453,11 +453,16 @@ static int gve_set_tunable(struct net_device *netdev,
 
 	switch (etuna->id) {
 	case ETHTOOL_RX_COPYBREAK:
+	{
+		u32 max_copybreak = gve_is_gqi(priv) ?
+			(PAGE_SIZE / 2) : priv->data_buffer_size_dqo;
+
 		len = *(u32 *)value;
-		if (len > PAGE_SIZE / 2)
+		if (len > max_copybreak)
 			return -EINVAL;
 		priv->rx_copybreak = len;
 		return 0;
+	}
 	default:
 		return -EOPNOTSUPP;
 	}

From 5e8c5adf95f8a537ec08e8e3798d8bc3464deee8 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:28 -0700
Subject: [PATCH 2853/3292] gve: DQO: Add core netdev features

Add napi netdev device registration, interrupt handling and initial tx
and rx polling stubs. The stubs will be filled in follow-on patches.

Also:
- LRO feature advertisement and handling
- Also update ethtool logic

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/Makefile      |   2 +-
 drivers/net/ethernet/google/gve/gve.h         |   2 +
 drivers/net/ethernet/google/gve/gve_adminq.c  |   2 +
 drivers/net/ethernet/google/gve/gve_dqo.h     |  32 +++
 drivers/net/ethernet/google/gve/gve_ethtool.c |  12 +-
 drivers/net/ethernet/google/gve/gve_main.c    | 188 ++++++++++++++++--
 drivers/net/ethernet/google/gve/gve_rx_dqo.c  |  24 +++
 drivers/net/ethernet/google/gve/gve_tx_dqo.c  |  23 +++
 8 files changed, 260 insertions(+), 25 deletions(-)
 create mode 100644 drivers/net/ethernet/google/gve/gve_dqo.h
 create mode 100644 drivers/net/ethernet/google/gve/gve_rx_dqo.c
 create mode 100644 drivers/net/ethernet/google/gve/gve_tx_dqo.c

diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
index 0143f4471e42..b9a6be76531b 100644
--- a/drivers/net/ethernet/google/gve/Makefile
+++ b/drivers/net/ethernet/google/gve/Makefile
@@ -1,4 +1,4 @@
 # Makefile for the Google virtual Ethernet (gve) driver
 
 obj-$(CONFIG_GVE) += gve.o
-gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o gve_utils.o
+gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 8a2a8d125090..d6bf0466ae8b 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -45,6 +45,8 @@
 /* PTYPEs are always 10 bits. */
 #define GVE_NUM_PTYPES	1024
 
+#define GVE_RX_BUFFER_SIZE_DQO 2048
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
 	struct gve_rx_desc *desc_ring; /* the descriptor ring */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index cf017a499119..5bb56b454541 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -714,6 +714,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	if (gve_is_gqi(priv)) {
 		err = gve_set_desc_cnt(priv, descriptor);
 	} else {
+		/* DQO supports LRO. */
+		priv->dev->hw_features |= NETIF_F_LRO;
 		err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
 	}
 	if (err)
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
new file mode 100644
index 000000000000..cff4e6ef7bb6
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#ifndef _GVE_DQO_H_
+#define _GVE_DQO_H_
+
+#include "gve_adminq.h"
+
+#define GVE_ITR_ENABLE_BIT_DQO BIT(0)
+#define GVE_ITR_CLEAR_PBA_BIT_DQO BIT(1)
+#define GVE_ITR_NO_UPDATE_DQO (3 << 3)
+
+#define GVE_TX_IRQ_RATELIMIT_US_DQO 50
+#define GVE_RX_IRQ_RATELIMIT_US_DQO 20
+
+netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
+bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
+int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
+
+static inline void
+gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
+			   const struct gve_notify_block *block, u32 val)
+{
+	u32 index = be32_to_cpu(block->irq_db_index);
+
+	iowrite32(val, &priv->db_bar2[index]);
+}
+
+#endif /* _GVE_DQO_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index ccaf68562312..716e6240305d 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -311,8 +311,16 @@ gve_get_ethtool_stats(struct net_device *netdev,
 		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
 			struct gve_tx_ring *tx = &priv->tx[ring];
 
-			data[i++] = tx->req;
-			data[i++] = tx->done;
+			if (gve_is_gqi(priv)) {
+				data[i++] = tx->req;
+				data[i++] = tx->done;
+			} else {
+				/* DQO doesn't currently support
+				 * posted/completed descriptor counts;
+				 */
+				data[i++] = 0;
+				data[i++] = 0;
+			}
 			do {
 				start =
 				  u64_stats_fetch_begin(&priv->tx[ring].statss);
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 8cc0ac061c93..579f867cf148 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -14,6 +14,7 @@
 #include <linux/workqueue.h>
 #include <net/sch_generic.h>
 #include "gve.h"
+#include "gve_dqo.h"
 #include "gve_adminq.h"
 #include "gve_register.h"
 
@@ -26,6 +27,16 @@
 const char gve_version_str[] = GVE_VERSION;
 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
 
+static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+
+	if (gve_is_gqi(priv))
+		return gve_tx(skb, dev);
+	else
+		return gve_tx_dqo(skb, dev);
+}
+
 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
 {
 	struct gve_priv *priv = netdev_priv(dev);
@@ -155,6 +166,15 @@ static irqreturn_t gve_intr(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t gve_intr_dqo(int irq, void *arg)
+{
+	struct gve_notify_block *block = arg;
+
+	/* Interrupts are automatically masked */
+	napi_schedule_irqoff(&block->napi);
+	return IRQ_HANDLED;
+}
+
 static int gve_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct gve_notify_block *block;
@@ -191,6 +211,54 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
 	return 0;
 }
 
+static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
+{
+	struct gve_notify_block *block =
+		container_of(napi, struct gve_notify_block, napi);
+	struct gve_priv *priv = block->priv;
+	bool reschedule = false;
+	int work_done = 0;
+
+	/* Clear PCI MSI-X Pending Bit Array (PBA)
+	 *
+	 * This bit is set if an interrupt event occurs while the vector is
+	 * masked. If this bit is set and we reenable the interrupt, it will
+	 * fire again. Since we're just about to poll the queue state, we don't
+	 * need it to fire again.
+	 *
+	 * Under high softirq load, it's possible that the interrupt condition
+	 * is triggered twice before we got the chance to process it.
+	 */
+	gve_write_irq_doorbell_dqo(priv, block,
+				   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);
+
+	if (block->tx)
+		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+
+	if (block->rx) {
+		work_done = gve_rx_poll_dqo(block, budget);
+		reschedule |= work_done == budget;
+	}
+
+	if (reschedule)
+		return budget;
+
+	if (likely(napi_complete_done(napi, work_done))) {
+		/* Enable interrupts again.
+		 *
+		 * We don't need to repoll afterwards because HW supports the
+		 * PCI MSI-X PBA feature.
+		 *
+		 * Another interrupt would be triggered if a new event came in
+		 * since the last one.
+		 */
+		gve_write_irq_doorbell_dqo(priv, block,
+					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
+	}
+
+	return work_done;
+}
+
 static int gve_alloc_notify_blocks(struct gve_priv *priv)
 {
 	int num_vecs_requested = priv->num_ntfy_blks + 1;
@@ -264,7 +332,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
 			 name, i);
 		block->priv = priv;
 		err = request_irq(priv->msix_vectors[msix_idx].vector,
-				  gve_intr, 0, block->name, block);
+				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
+				  0, block->name, block);
 		if (err) {
 			dev_err(&priv->pdev->dev,
 				"Failed to receive msix vector %d\n", i);
@@ -417,11 +486,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
 	gve_clear_device_resources_ok(priv);
 }
 
-static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
+static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
+			 int (*gve_poll)(struct napi_struct *, int))
 {
 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 
-	netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
+	netif_napi_add(priv->dev, &block->napi, gve_poll,
 		       NAPI_POLL_WEIGHT);
 }
 
@@ -512,11 +582,33 @@ static int gve_create_rings(struct gve_priv *priv)
 	return 0;
 }
 
+static void add_napi_init_sync_stats(struct gve_priv *priv,
+				     int (*napi_poll)(struct napi_struct *napi,
+						      int budget))
+{
+	int i;
+
+	/* Add tx napi & init sync stats*/
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+
+		u64_stats_init(&priv->tx[i].statss);
+		priv->tx[i].ntfy_id = ntfy_idx;
+		gve_add_napi(priv, ntfy_idx, napi_poll);
+	}
+	/* Add rx napi  & init sync stats*/
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+
+		u64_stats_init(&priv->rx[i].statss);
+		priv->rx[i].ntfy_id = ntfy_idx;
+		gve_add_napi(priv, ntfy_idx, napi_poll);
+	}
+}
+
 static int gve_alloc_rings(struct gve_priv *priv)
 {
-	int ntfy_idx;
 	int err;
-	int i;
 
 	/* Setup tx rings */
 	priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
@@ -536,18 +628,11 @@ static int gve_alloc_rings(struct gve_priv *priv)
 	err = gve_rx_alloc_rings(priv);
 	if (err)
 		goto free_rx;
-	/* Add tx napi & init sync stats*/
-	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
-		u64_stats_init(&priv->tx[i].statss);
-		ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
-		gve_add_napi(priv, ntfy_idx);
-	}
-	/* Add rx napi  & init sync stats*/
-	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
-		u64_stats_init(&priv->rx[i].statss);
-		ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
-		gve_add_napi(priv, ntfy_idx);
-	}
+
+	if (gve_is_gqi(priv))
+		add_napi_init_sync_stats(priv, gve_napi_poll);
+	else
+		add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
 
 	return 0;
 
@@ -798,9 +883,17 @@ static int gve_open(struct net_device *dev)
 	err = gve_register_qpls(priv);
 	if (err)
 		goto reset;
+
+	if (!gve_is_gqi(priv)) {
+		/* Hard code this for now. This may be tuned in the future for
+		 * performance.
+		 */
+		priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
+	}
 	err = gve_create_rings(priv);
 	if (err)
 		goto reset;
+
 	gve_set_device_rings_ok(priv);
 
 	if (gve_get_report_stats(priv))
@@ -970,12 +1063,49 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
 	priv->tx_timeo_cnt++;
 }
 
+static int gve_set_features(struct net_device *netdev,
+			    netdev_features_t features)
+{
+	const netdev_features_t orig_features = netdev->features;
+	struct gve_priv *priv = netdev_priv(netdev);
+	int err;
+
+	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
+		netdev->features ^= NETIF_F_LRO;
+		if (netif_carrier_ok(netdev)) {
+			/* To make this process as simple as possible we
+			 * teardown the device, set the new configuration,
+			 * and then bring the device up again.
+			 */
+			err = gve_close(netdev);
+			/* We have already tried to reset in close, just fail
+			 * at this point.
+			 */
+			if (err)
+				goto err;
+
+			err = gve_open(netdev);
+			if (err)
+				goto err;
+		}
+	}
+
+	return 0;
+err:
+	/* Reverts the change on error. */
+	netdev->features = orig_features;
+	netif_err(priv, drv, netdev,
+		  "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
+	return err;
+}
+
 static const struct net_device_ops gve_netdev_ops = {
-	.ndo_start_xmit		=	gve_tx,
+	.ndo_start_xmit		=	gve_start_xmit,
 	.ndo_open		=	gve_open,
 	.ndo_stop		=	gve_close,
 	.ndo_get_stats64	=	gve_get_stats,
 	.ndo_tx_timeout         =       gve_tx_timeout,
+	.ndo_set_features	=	gve_set_features,
 };
 
 static void gve_handle_status(struct gve_priv *priv, u32 status)
@@ -1019,6 +1149,15 @@ void gve_handle_report_stats(struct gve_priv *priv)
 	/* tx stats */
 	if (priv->tx) {
 		for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+			u32 last_completion = 0;
+			u32 tx_frames = 0;
+
+			/* DQO doesn't currently support these metrics. */
+			if (gve_is_gqi(priv)) {
+				last_completion = priv->tx[idx].done;
+				tx_frames = priv->tx[idx].req;
+			}
+
 			do {
 				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
 				tx_bytes = priv->tx[idx].bytes_done;
@@ -1035,7 +1174,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
 			};
 			stats[stats_idx++] = (struct stats) {
 				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
-				.value = cpu_to_be64(priv->tx[idx].req),
+				.value = cpu_to_be64(tx_frames),
 				.queue_id = cpu_to_be32(idx),
 			};
 			stats[stats_idx++] = (struct stats) {
@@ -1045,7 +1184,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
 			};
 			stats[stats_idx++] = (struct stats) {
 				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
-				.value = cpu_to_be64(priv->tx[idx].done),
+				.value = cpu_to_be64(last_completion),
 				.queue_id = cpu_to_be32(idx),
 			};
 		}
@@ -1121,7 +1260,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
 			"Could not get device information: err=%d\n", err);
 		goto err;
 	}
-	if (priv->dev->max_mtu > PAGE_SIZE) {
+	if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) {
 		priv->dev->max_mtu = PAGE_SIZE;
 		err = gve_adminq_set_mtu(priv, priv->dev->mtu);
 		if (err) {
@@ -1332,7 +1471,12 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, dev);
 	dev->ethtool_ops = &gve_ethtool_ops;
 	dev->netdev_ops = &gve_netdev_ops;
-	/* advertise features */
+
+	/* Set default and supported features.
+	 *
+	 * Features might be set in other locations as well (such as
+	 * `gve_adminq_describe_device`).
+	 */
 	dev->hw_features = NETIF_F_HIGHDMA;
 	dev->hw_features |= NETIF_F_SG;
 	dev->hw_features |= NETIF_F_HW_CSUM;
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
new file mode 100644
index 000000000000..808e09741ecc
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_dqo.h"
+#include "gve_adminq.h"
+#include "gve_utils.h"
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/ip6_checksum.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
+{
+	u32 work_done = 0;
+
+	return work_done;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
new file mode 100644
index 000000000000..4b3319a1b299
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_utils.h"
+#include "gve_dqo.h"
+#include <linux/tcp.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+
+netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
+{
+	return NETDEV_TX_OK;
+}
+
+bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
+{
+	return false;
+}

From 9c1a59a2f4bcf9926ac5814ae0ce53e449ee0b95 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:29 -0700
Subject: [PATCH 2854/3292] gve: DQO: Add ring allocation and initialization

Allocate the buffer and completion ring structures. Do not populate the
rings yet. That will happen in the respective rx and tx datapath
follow-on patches

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h        |   8 +-
 drivers/net/ethernet/google/gve/gve_dqo.h    |  18 ++
 drivers/net/ethernet/google/gve/gve_main.c   |  53 ++++-
 drivers/net/ethernet/google/gve/gve_rx.c     |   2 +-
 drivers/net/ethernet/google/gve/gve_rx_dqo.c | 157 +++++++++++++++
 drivers/net/ethernet/google/gve/gve_tx.c     |   2 +-
 drivers/net/ethernet/google/gve/gve_tx_dqo.c | 193 +++++++++++++++++++
 7 files changed, 420 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index d6bf0466ae8b..30978a15e37d 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -204,6 +204,10 @@ struct gve_rx_ring {
 	struct gve_queue_resources *q_resources; /* head and tail pointer idx */
 	dma_addr_t q_resources_bus; /* dma address for the queue resources */
 	struct u64_stats_sync statss; /* sync stats for 32bit archs */
+
+	/* head and tail of skb chain for the current packet or NULL if none */
+	struct sk_buff *skb_head;
+	struct sk_buff *skb_tail;
 };
 
 /* A TX desc ring entry */
@@ -816,14 +820,14 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
 netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
 bool gve_tx_poll(struct gve_notify_block *block, int budget);
 int gve_tx_alloc_rings(struct gve_priv *priv);
-void gve_tx_free_rings(struct gve_priv *priv);
+void gve_tx_free_rings_gqi(struct gve_priv *priv);
 __be32 gve_tx_load_event_counter(struct gve_priv *priv,
 				 struct gve_tx_ring *tx);
 /* rx handling */
 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
 bool gve_rx_poll(struct gve_notify_block *block, int budget);
 int gve_rx_alloc_rings(struct gve_priv *priv);
-void gve_rx_free_rings(struct gve_priv *priv);
+void gve_rx_free_rings_gqi(struct gve_priv *priv);
 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 		       netdev_features_t feat);
 /* Reset */
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index cff4e6ef7bb6..9877a33ec068 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -19,6 +19,24 @@
 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
 bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings_dqo(struct gve_priv *priv);
+void gve_tx_free_rings_dqo(struct gve_priv *priv);
+int gve_rx_alloc_rings_dqo(struct gve_priv *priv);
+void gve_rx_free_rings_dqo(struct gve_priv *priv);
+int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+			  struct napi_struct *napi);
+void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx);
+void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx);
+
+static inline void
+gve_tx_put_doorbell_dqo(const struct gve_priv *priv,
+			const struct gve_queue_resources *q_resources, u32 val)
+{
+	u64 index;
+
+	index = be32_to_cpu(q_resources->db_index);
+	iowrite32(val, &priv->db_bar2[index]);
+}
 
 static inline void
 gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 579f867cf148..cddf19c8cf0b 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -571,13 +571,21 @@ static int gve_create_rings(struct gve_priv *priv)
 	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
 		  priv->rx_cfg.num_queues);
 
-	/* Rx data ring has been prefilled with packet buffers at queue
-	 * allocation time.
-	 * Write the doorbell to provide descriptor slots and packet buffers
-	 * to the NIC.
-	 */
-	for (i = 0; i < priv->rx_cfg.num_queues; i++)
-		gve_rx_write_doorbell(priv, &priv->rx[i]);
+	if (gve_is_gqi(priv)) {
+		/* Rx data ring has been prefilled with packet buffers at queue
+		 * allocation time.
+		 *
+		 * Write the doorbell to provide descriptor slots and packet
+		 * buffers to the NIC.
+		 */
+		for (i = 0; i < priv->rx_cfg.num_queues; i++)
+			gve_rx_write_doorbell(priv, &priv->rx[i]);
+	} else {
+		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+			/* Post buffers and ring doorbell. */
+			gve_rx_post_buffers_dqo(&priv->rx[i]);
+		}
+	}
 
 	return 0;
 }
@@ -606,6 +614,15 @@ static void add_napi_init_sync_stats(struct gve_priv *priv,
 	}
 }
 
+static void gve_tx_free_rings(struct gve_priv *priv)
+{
+	if (gve_is_gqi(priv)) {
+		gve_tx_free_rings_gqi(priv);
+	} else {
+		gve_tx_free_rings_dqo(priv);
+	}
+}
+
 static int gve_alloc_rings(struct gve_priv *priv)
 {
 	int err;
@@ -615,9 +632,14 @@ static int gve_alloc_rings(struct gve_priv *priv)
 			    GFP_KERNEL);
 	if (!priv->tx)
 		return -ENOMEM;
-	err = gve_tx_alloc_rings(priv);
+
+	if (gve_is_gqi(priv))
+		err = gve_tx_alloc_rings(priv);
+	else
+		err = gve_tx_alloc_rings_dqo(priv);
 	if (err)
 		goto free_tx;
+
 	/* Setup rx rings */
 	priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
 			    GFP_KERNEL);
@@ -625,7 +647,11 @@ static int gve_alloc_rings(struct gve_priv *priv)
 		err = -ENOMEM;
 		goto free_tx_queue;
 	}
-	err = gve_rx_alloc_rings(priv);
+
+	if (gve_is_gqi(priv))
+		err = gve_rx_alloc_rings(priv);
+	else
+		err = gve_rx_alloc_rings_dqo(priv);
 	if (err)
 		goto free_rx;
 
@@ -670,6 +696,14 @@ static int gve_destroy_rings(struct gve_priv *priv)
 	return 0;
 }
 
+static inline void gve_rx_free_rings(struct gve_priv *priv)
+{
+	if (gve_is_gqi(priv))
+		gve_rx_free_rings_gqi(priv);
+	else
+		gve_rx_free_rings_dqo(priv);
+}
+
 static void gve_free_rings(struct gve_priv *priv)
 {
 	int ntfy_idx;
@@ -869,6 +903,7 @@ static int gve_open(struct net_device *dev)
 	err = gve_alloc_qpls(priv);
 	if (err)
 		return err;
+
 	err = gve_alloc_rings(priv);
 	if (err)
 		goto free_qpls;
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 15a64e40004d..bb8261368250 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -238,7 +238,7 @@ int gve_rx_alloc_rings(struct gve_priv *priv)
 	return err;
 }
 
-void gve_rx_free_rings(struct gve_priv *priv)
+void gve_rx_free_rings_gqi(struct gve_priv *priv)
 {
 	int i;
 
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 808e09741ecc..1073a820767d 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -16,6 +16,163 @@
 #include <net/ipv6.h>
 #include <net/tcp.h>
 
+static void gve_free_page_dqo(struct gve_priv *priv,
+			      struct gve_rx_buf_state_dqo *bs)
+{
+}
+
+static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
+{
+	struct gve_rx_ring *rx = &priv->rx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	size_t completion_queue_slots;
+	size_t buffer_queue_slots;
+	size_t size;
+	int i;
+
+	completion_queue_slots = rx->dqo.complq.mask + 1;
+	buffer_queue_slots = rx->dqo.bufq.mask + 1;
+
+	gve_rx_remove_from_block(priv, idx);
+
+	if (rx->q_resources) {
+		dma_free_coherent(hdev, sizeof(*rx->q_resources),
+				  rx->q_resources, rx->q_resources_bus);
+		rx->q_resources = NULL;
+	}
+
+	for (i = 0; i < rx->dqo.num_buf_states; i++) {
+		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
+
+		if (bs->page_info.page)
+			gve_free_page_dqo(priv, bs);
+	}
+
+	if (rx->dqo.bufq.desc_ring) {
+		size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
+		dma_free_coherent(hdev, size, rx->dqo.bufq.desc_ring,
+				  rx->dqo.bufq.bus);
+		rx->dqo.bufq.desc_ring = NULL;
+	}
+
+	if (rx->dqo.complq.desc_ring) {
+		size = sizeof(rx->dqo.complq.desc_ring[0]) *
+			completion_queue_slots;
+		dma_free_coherent(hdev, size, rx->dqo.complq.desc_ring,
+				  rx->dqo.complq.bus);
+		rx->dqo.complq.desc_ring = NULL;
+	}
+
+	kvfree(rx->dqo.buf_states);
+	rx->dqo.buf_states = NULL;
+
+	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
+}
+
+static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
+{
+	struct gve_rx_ring *rx = &priv->rx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	size_t size;
+	int i;
+
+	const u32 buffer_queue_slots =
+		priv->options_dqo_rda.rx_buff_ring_entries;
+	const u32 completion_queue_slots = priv->rx_desc_cnt;
+
+	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
+
+	memset(rx, 0, sizeof(*rx));
+	rx->gve = priv;
+	rx->q_num = idx;
+	rx->dqo.bufq.mask = buffer_queue_slots - 1;
+	rx->dqo.complq.num_free_slots = completion_queue_slots;
+	rx->dqo.complq.mask = completion_queue_slots - 1;
+	rx->skb_head = NULL;
+	rx->skb_tail = NULL;
+
+	rx->dqo.num_buf_states = min_t(s16, S16_MAX, buffer_queue_slots * 4);
+	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
+				      sizeof(rx->dqo.buf_states[0]),
+				      GFP_KERNEL);
+	if (!rx->dqo.buf_states)
+		return -ENOMEM;
+
+	/* Set up linked list of buffer IDs */
+	for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
+		rx->dqo.buf_states[i].next = i + 1;
+
+	rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
+	rx->dqo.recycled_buf_states.head = -1;
+	rx->dqo.recycled_buf_states.tail = -1;
+	rx->dqo.used_buf_states.head = -1;
+	rx->dqo.used_buf_states.tail = -1;
+
+	/* Allocate RX completion queue */
+	size = sizeof(rx->dqo.complq.desc_ring[0]) *
+		completion_queue_slots;
+	rx->dqo.complq.desc_ring =
+		dma_alloc_coherent(hdev, size, &rx->dqo.complq.bus, GFP_KERNEL);
+	if (!rx->dqo.complq.desc_ring)
+		goto err;
+
+	/* Allocate RX buffer queue */
+	size = sizeof(rx->dqo.bufq.desc_ring[0]) * buffer_queue_slots;
+	rx->dqo.bufq.desc_ring =
+		dma_alloc_coherent(hdev, size, &rx->dqo.bufq.bus, GFP_KERNEL);
+	if (!rx->dqo.bufq.desc_ring)
+		goto err;
+
+	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
+					     &rx->q_resources_bus, GFP_KERNEL);
+	if (!rx->q_resources)
+		goto err;
+
+	gve_rx_add_to_block(priv, idx);
+
+	return 0;
+
+err:
+	gve_rx_free_ring_dqo(priv, idx);
+	return -ENOMEM;
+}
+
+int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+		err = gve_rx_alloc_ring_dqo(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to alloc rx ring=%d: err=%d\n",
+				  i, err);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		gve_rx_free_ring_dqo(priv, i);
+
+	return err;
+}
+
+void gve_rx_free_rings_dqo(struct gve_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->rx_cfg.num_queues; i++)
+		gve_rx_free_ring_dqo(priv, i);
+}
+
+void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
+{
+}
+
 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
 {
 	u32 work_done = 0;
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 75930bb64eb9..665ac795a1ad 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -256,7 +256,7 @@ int gve_tx_alloc_rings(struct gve_priv *priv)
 	return err;
 }
 
-void gve_tx_free_rings(struct gve_priv *priv)
+void gve_tx_free_rings_gqi(struct gve_priv *priv)
 {
 	int i;
 
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index 4b3319a1b299..bde8f90ac8bd 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -12,11 +12,204 @@
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 
+/* gve_tx_free_desc - Cleans up all pending tx requests and buffers.
+ */
+static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx)
+{
+	int i;
+
+	for (i = 0; i < tx->dqo.num_pending_packets; i++) {
+		struct gve_tx_pending_packet_dqo *cur_state =
+			&tx->dqo.pending_packets[i];
+		int j;
+
+		for (j = 0; j < cur_state->num_bufs; j++) {
+			struct gve_tx_dma_buf *buf = &cur_state->bufs[j];
+
+			if (j == 0) {
+				dma_unmap_single(tx->dev,
+						 dma_unmap_addr(buf, dma),
+						 dma_unmap_len(buf, len),
+						 DMA_TO_DEVICE);
+			} else {
+				dma_unmap_page(tx->dev,
+					       dma_unmap_addr(buf, dma),
+					       dma_unmap_len(buf, len),
+					       DMA_TO_DEVICE);
+			}
+		}
+		if (cur_state->skb) {
+			dev_consume_skb_any(cur_state->skb);
+			cur_state->skb = NULL;
+		}
+	}
+}
+
+static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx)
+{
+	struct gve_tx_ring *tx = &priv->tx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	size_t bytes;
+
+	gve_tx_remove_from_block(priv, idx);
+
+	if (tx->q_resources) {
+		dma_free_coherent(hdev, sizeof(*tx->q_resources),
+				  tx->q_resources, tx->q_resources_bus);
+		tx->q_resources = NULL;
+	}
+
+	if (tx->dqo.compl_ring) {
+		bytes = sizeof(tx->dqo.compl_ring[0]) *
+			(tx->dqo.complq_mask + 1);
+		dma_free_coherent(hdev, bytes, tx->dqo.compl_ring,
+				  tx->complq_bus_dqo);
+		tx->dqo.compl_ring = NULL;
+	}
+
+	if (tx->dqo.tx_ring) {
+		bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1);
+		dma_free_coherent(hdev, bytes, tx->dqo.tx_ring, tx->bus);
+		tx->dqo.tx_ring = NULL;
+	}
+
+	kvfree(tx->dqo.pending_packets);
+	tx->dqo.pending_packets = NULL;
+
+	netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
+}
+
+static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
+{
+	struct gve_tx_ring *tx = &priv->tx[idx];
+	struct device *hdev = &priv->pdev->dev;
+	int num_pending_packets;
+	size_t bytes;
+	int i;
+
+	memset(tx, 0, sizeof(*tx));
+	tx->q_num = idx;
+	tx->dev = &priv->pdev->dev;
+	tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+	atomic_set_release(&tx->dqo_compl.hw_tx_head, 0);
+
+	/* Queue sizes must be a power of 2 */
+	tx->mask = priv->tx_desc_cnt - 1;
+	tx->dqo.complq_mask = priv->options_dqo_rda.tx_comp_ring_entries - 1;
+
+	/* The max number of pending packets determines the maximum number of
+	 * descriptors which maybe written to the completion queue.
+	 *
+	 * We must set the number small enough to make sure we never overrun the
+	 * completion queue.
+	 */
+	num_pending_packets = tx->dqo.complq_mask + 1;
+
+	/* Reserve space for descriptor completions, which will be reported at
+	 * most every GVE_TX_MIN_RE_INTERVAL packets.
+	 */
+	num_pending_packets -=
+		(tx->dqo.complq_mask + 1) / GVE_TX_MIN_RE_INTERVAL;
+
+	/* Each packet may have at most 2 buffer completions if it receives both
+	 * a miss and reinjection completion.
+	 */
+	num_pending_packets /= 2;
+
+	tx->dqo.num_pending_packets = min_t(int, num_pending_packets, S16_MAX);
+	tx->dqo.pending_packets = kvcalloc(tx->dqo.num_pending_packets,
+					   sizeof(tx->dqo.pending_packets[0]),
+					   GFP_KERNEL);
+	if (!tx->dqo.pending_packets)
+		goto err;
+
+	/* Set up linked list of pending packets */
+	for (i = 0; i < tx->dqo.num_pending_packets - 1; i++)
+		tx->dqo.pending_packets[i].next = i + 1;
+
+	tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1;
+	atomic_set_release(&tx->dqo_compl.free_pending_packets, -1);
+	tx->dqo_compl.miss_completions.head = -1;
+	tx->dqo_compl.miss_completions.tail = -1;
+	tx->dqo_compl.timed_out_completions.head = -1;
+	tx->dqo_compl.timed_out_completions.tail = -1;
+
+	bytes = sizeof(tx->dqo.tx_ring[0]) * (tx->mask + 1);
+	tx->dqo.tx_ring = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
+	if (!tx->dqo.tx_ring)
+		goto err;
+
+	bytes = sizeof(tx->dqo.compl_ring[0]) * (tx->dqo.complq_mask + 1);
+	tx->dqo.compl_ring = dma_alloc_coherent(hdev, bytes,
+						&tx->complq_bus_dqo,
+						GFP_KERNEL);
+	if (!tx->dqo.compl_ring)
+		goto err;
+
+	tx->q_resources = dma_alloc_coherent(hdev, sizeof(*tx->q_resources),
+					     &tx->q_resources_bus, GFP_KERNEL);
+	if (!tx->q_resources)
+		goto err;
+
+	gve_tx_add_to_block(priv, idx);
+
+	return 0;
+
+err:
+	gve_tx_free_ring_dqo(priv, idx);
+	return -ENOMEM;
+}
+
+int gve_tx_alloc_rings_dqo(struct gve_priv *priv)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		err = gve_tx_alloc_ring_dqo(priv, i);
+		if (err) {
+			netif_err(priv, drv, priv->dev,
+				  "Failed to alloc tx ring=%d: err=%d\n",
+				  i, err);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		gve_tx_free_ring_dqo(priv, i);
+
+	return err;
+}
+
+void gve_tx_free_rings_dqo(struct gve_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+		struct gve_tx_ring *tx = &priv->tx[i];
+
+		gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL);
+		netdev_tx_reset_queue(tx->netdev_txq);
+		gve_tx_clean_pending_packets(tx);
+
+		gve_tx_free_ring_dqo(priv, i);
+	}
+}
+
 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
 {
 	return NETDEV_TX_OK;
 }
 
+int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+			  struct napi_struct *napi)
+{
+	return 0;
+}
+
 bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
 {
 	return false;

From 0dcc144a7994007e50afe4806e0e4ef860f58013 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:30 -0700
Subject: [PATCH 2855/3292] gve: DQO: Configure interrupts on device up

When interrupts are first enabled, we also set the ratelimits, which
will be static for the entire usage of the device.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_dqo.h  | 19 +++++++++++++++++++
 drivers/net/ethernet/google/gve/gve_main.c | 16 ++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index 9877a33ec068..3b300223ea15 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -13,6 +13,9 @@
 #define GVE_ITR_CLEAR_PBA_BIT_DQO BIT(1)
 #define GVE_ITR_NO_UPDATE_DQO (3 << 3)
 
+#define GVE_ITR_INTERVAL_DQO_SHIFT 5
+#define GVE_ITR_INTERVAL_DQO_MASK ((1 << 12) - 1)
+
 #define GVE_TX_IRQ_RATELIMIT_US_DQO 50
 #define GVE_RX_IRQ_RATELIMIT_US_DQO 20
 
@@ -38,6 +41,22 @@ gve_tx_put_doorbell_dqo(const struct gve_priv *priv,
 	iowrite32(val, &priv->db_bar2[index]);
 }
 
+/* Builds register value to write to DQO IRQ doorbell to enable with specified
+ * ratelimit.
+ */
+static inline u32 gve_set_itr_ratelimit_dqo(u32 ratelimit_us)
+{
+	u32 result = GVE_ITR_ENABLE_BIT_DQO;
+
+	/* Interval has 2us granularity. */
+	ratelimit_us >>= 1;
+
+	ratelimit_us &= GVE_ITR_INTERVAL_DQO_MASK;
+	result |= (ratelimit_us << GVE_ITR_INTERVAL_DQO_SHIFT);
+
+	return result;
+}
+
 static inline void
 gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
 			   const struct gve_notify_block *block, u32 val)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index cddf19c8cf0b..1bf446836724 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1077,14 +1077,26 @@ static void gve_turnup(struct gve_priv *priv)
 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 
 		napi_enable(&block->napi);
-		iowrite32be(0, gve_irq_doorbell(priv, block));
+		if (gve_is_gqi(priv)) {
+			iowrite32be(0, gve_irq_doorbell(priv, block));
+		} else {
+			u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO);
+
+			gve_write_irq_doorbell_dqo(priv, block, val);
+		}
 	}
 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 
 		napi_enable(&block->napi);
-		iowrite32be(0, gve_irq_doorbell(priv, block));
+		if (gve_is_gqi(priv)) {
+			iowrite32be(0, gve_irq_doorbell(priv, block));
+		} else {
+			u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO);
+
+			gve_write_irq_doorbell_dqo(priv, block, val);
+		}
 	}
 
 	gve_set_napi_enabled(priv);

From a57e5de476be0b4b7f42beb6a21c19ad9c577aa3 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:31 -0700
Subject: [PATCH 2856/3292] gve: DQO: Add TX path

TX SKBs will have their buffers DMA mapped with the device. Each buffer
will have at least one TX descriptor associated. Each SKB will also have
a metadata descriptor.

Each TX queue maintains an array of `gve_tx_pending_packet_dqo` objects.
Every TX SKB will have an associated pending_packet object. A TX SKB's
descriptors will use its pending_packet's index as the completion tag,
which will be returned on the TX completion queue.

The device implements a "flow-miss model". Most packets will simply
receive a packet completion. The flow-miss system may choose to process
a packet based on its contents. A TX packet which experiences a flow
miss would receive a miss completion followed by a later reinjection
completion. The miss-completion is received when the packet starts to be
processed by the flow-miss system and the reinjection completion is
received when the flow-miss system completes processing the packet and
sends it on the wire.

Notable mentions:

- Buffers may be freed after receiving the miss-completion, but in order
  to avoid packet reordering, we do not complete the SKB until receiving
  the reinjection completion.

- The driver must robustly handle the unlikely scenario where a miss
  completion does not have an associated reinjection completion. This is
  accomplished by maintaining a list of packets which have a pending
  reinjection completion. After a short timeout (5 seconds), the
  SKB and buffers are released and the pending_packet is moved to a
  second list which has a longer timeout (60 seconds), where the
  pending_packet will not be reused. When the longer timeout elapses,
  the driver may assume the reinjection completion would never be
  received and the pending_packet may be reused.

- Completion handling is triggered by an interrupt and is done in the
  NAPI poll function. Because the TX path and completion exist in
  different threading contexts they maintain their own lists for free
  pending_packet objects. The TX path uses a lock-free approach to steal
  the list from the completion path.

- Both the TSO context and general context descriptors have metadata
  bytes. The device requires that if multiple descriptors contain the
  same field, each descriptor must have the same value set for that
  field.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_dqo.h    |  12 +
 drivers/net/ethernet/google/gve/gve_tx_dqo.c | 819 ++++++++++++++++++-
 2 files changed, 829 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index 3b300223ea15..836042364124 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -19,6 +19,18 @@
 #define GVE_TX_IRQ_RATELIMIT_US_DQO 50
 #define GVE_RX_IRQ_RATELIMIT_US_DQO 20
 
+/* Timeout in seconds to wait for a reinjection completion after receiving
+ * its corresponding miss completion.
+ */
+#define GVE_REINJECT_COMPL_TIMEOUT 1
+
+/* Timeout in seconds to deallocate the completion tag for a packet that was
+ * prematurely freed for not receiving a valid completion. This should be large
+ * enough to rule out the possibility of receiving the corresponding valid
+ * completion after this interval.
+ */
+#define GVE_DEALLOCATE_COMPL_TIMEOUT 60
+
 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
 bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index bde8f90ac8bd..a4906b9df540 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -12,6 +12,67 @@
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 
+/* Returns true if a gve_tx_pending_packet_dqo object is available. */
+static bool gve_has_pending_packet(struct gve_tx_ring *tx)
+{
+	/* Check TX path's list. */
+	if (tx->dqo_tx.free_pending_packets != -1)
+		return true;
+
+	/* Check completion handler's list. */
+	if (atomic_read_acquire(&tx->dqo_compl.free_pending_packets) != -1)
+		return true;
+
+	return false;
+}
+
+static struct gve_tx_pending_packet_dqo *
+gve_alloc_pending_packet(struct gve_tx_ring *tx)
+{
+	struct gve_tx_pending_packet_dqo *pending_packet;
+	s16 index;
+
+	index = tx->dqo_tx.free_pending_packets;
+
+	/* No pending_packets available, try to steal the list from the
+	 * completion handler.
+	 */
+	if (unlikely(index == -1)) {
+		tx->dqo_tx.free_pending_packets =
+			atomic_xchg(&tx->dqo_compl.free_pending_packets, -1);
+		index = tx->dqo_tx.free_pending_packets;
+
+		if (unlikely(index == -1))
+			return NULL;
+	}
+
+	pending_packet = &tx->dqo.pending_packets[index];
+
+	/* Remove pending_packet from free list */
+	tx->dqo_tx.free_pending_packets = pending_packet->next;
+	pending_packet->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
+
+	return pending_packet;
+}
+
+static void
+gve_free_pending_packet(struct gve_tx_ring *tx,
+			struct gve_tx_pending_packet_dqo *pending_packet)
+{
+	s16 index = pending_packet - tx->dqo.pending_packets;
+
+	pending_packet->state = GVE_PACKET_STATE_UNALLOCATED;
+	while (true) {
+		s16 old_head = atomic_read_acquire(&tx->dqo_compl.free_pending_packets);
+
+		pending_packet->next = old_head;
+		if (atomic_cmpxchg(&tx->dqo_compl.free_pending_packets,
+				   old_head, index) == old_head) {
+			break;
+		}
+	}
+}
+
 /* gve_tx_free_desc - Cleans up all pending tx requests and buffers.
  */
 static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx)
@@ -199,18 +260,772 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv)
 	}
 }
 
+/* Returns the number of slots available in the ring */
+static inline u32 num_avail_tx_slots(const struct gve_tx_ring *tx)
+{
+	u32 num_used = (tx->dqo_tx.tail - tx->dqo_tx.head) & tx->mask;
+
+	return tx->mask - num_used;
+}
+
+/* Stops the queue if available descriptors is less than 'count'.
+ * Return: 0 if stop is not required.
+ */
+static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, int count)
+{
+	if (likely(gve_has_pending_packet(tx) &&
+		   num_avail_tx_slots(tx) >= count))
+		return 0;
+
+	/* Update cached TX head pointer */
+	tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
+
+	if (likely(gve_has_pending_packet(tx) &&
+		   num_avail_tx_slots(tx) >= count))
+		return 0;
+
+	/* No space, so stop the queue */
+	tx->stop_queue++;
+	netif_tx_stop_queue(tx->netdev_txq);
+
+	/* Sync with restarting queue in `gve_tx_poll_dqo()` */
+	mb();
+
+	/* After stopping queue, check if we can transmit again in order to
+	 * avoid TOCTOU bug.
+	 */
+	tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
+
+	if (likely(!gve_has_pending_packet(tx) ||
+		   num_avail_tx_slots(tx) < count))
+		return -EBUSY;
+
+	netif_tx_start_queue(tx->netdev_txq);
+	tx->wake_queue++;
+	return 0;
+}
+
+static void gve_extract_tx_metadata_dqo(const struct sk_buff *skb,
+					struct gve_tx_metadata_dqo *metadata)
+{
+	memset(metadata, 0, sizeof(*metadata));
+	metadata->version = GVE_TX_METADATA_VERSION_DQO;
+
+	if (skb->l4_hash) {
+		u16 path_hash = skb->hash ^ (skb->hash >> 16);
+
+		path_hash &= (1 << 15) - 1;
+		if (unlikely(path_hash == 0))
+			path_hash = ~path_hash;
+
+		metadata->path_hash = path_hash;
+	}
+}
+
+static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx,
+				     struct sk_buff *skb, u32 len, u64 addr,
+				     s16 compl_tag, bool eop, bool is_gso)
+{
+	const bool checksum_offload_en = skb->ip_summed == CHECKSUM_PARTIAL;
+
+	while (len > 0) {
+		struct gve_tx_pkt_desc_dqo *desc =
+			&tx->dqo.tx_ring[*desc_idx].pkt;
+		u32 cur_len = min_t(u32, len, GVE_TX_MAX_BUF_SIZE_DQO);
+		bool cur_eop = eop && cur_len == len;
+
+		*desc = (struct gve_tx_pkt_desc_dqo){
+			.buf_addr = cpu_to_le64(addr),
+			.dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
+			.end_of_packet = cur_eop,
+			.checksum_offload_enable = checksum_offload_en,
+			.compl_tag = cpu_to_le16(compl_tag),
+			.buf_size = cur_len,
+		};
+
+		addr += cur_len;
+		len -= cur_len;
+		*desc_idx = (*desc_idx + 1) & tx->mask;
+	}
+}
+
+/* Validates and prepares `skb` for TSO.
+ *
+ * Returns header length, or < 0 if invalid.
+ */
+static int gve_prep_tso(struct sk_buff *skb)
+{
+	struct tcphdr *tcp;
+	int header_len;
+	u32 paylen;
+	int err;
+
+	/* Note: HW requires MSS (gso_size) to be <= 9728 and the total length
+	 * of the TSO to be <= 262143.
+	 *
+	 * However, we don't validate these because:
+	 * - Hypervisor enforces a limit of 9K MTU
+	 * - Kernel will not produce a TSO larger than 64k
+	 */
+
+	if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO))
+		return -1;
+
+	/* Needed because we will modify header. */
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	tcp = tcp_hdr(skb);
+
+	/* Remove payload length from checksum. */
+	paylen = skb->len - skb_transport_offset(skb);
+
+	switch (skb_shinfo(skb)->gso_type) {
+	case SKB_GSO_TCPV4:
+	case SKB_GSO_TCPV6:
+		csum_replace_by_diff(&tcp->check,
+				     (__force __wsum)htonl(paylen));
+
+		/* Compute length of segmentation header. */
+		header_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO))
+		return -EINVAL;
+
+	return header_len;
+}
+
+static void gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
+				     const struct sk_buff *skb,
+				     const struct gve_tx_metadata_dqo *metadata,
+				     int header_len)
+{
+	*desc = (struct gve_tx_tso_context_desc_dqo){
+		.header_len = header_len,
+		.cmd_dtype = {
+			.dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
+			.tso = 1,
+		},
+		.flex0 = metadata->bytes[0],
+		.flex5 = metadata->bytes[5],
+		.flex6 = metadata->bytes[6],
+		.flex7 = metadata->bytes[7],
+		.flex8 = metadata->bytes[8],
+		.flex9 = metadata->bytes[9],
+		.flex10 = metadata->bytes[10],
+		.flex11 = metadata->bytes[11],
+	};
+	desc->tso_total_len = skb->len - header_len;
+	desc->mss = skb_shinfo(skb)->gso_size;
+}
+
+static void
+gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
+			     const struct gve_tx_metadata_dqo *metadata)
+{
+	*desc = (struct gve_tx_general_context_desc_dqo){
+		.flex0 = metadata->bytes[0],
+		.flex1 = metadata->bytes[1],
+		.flex2 = metadata->bytes[2],
+		.flex3 = metadata->bytes[3],
+		.flex4 = metadata->bytes[4],
+		.flex5 = metadata->bytes[5],
+		.flex6 = metadata->bytes[6],
+		.flex7 = metadata->bytes[7],
+		.flex8 = metadata->bytes[8],
+		.flex9 = metadata->bytes[9],
+		.flex10 = metadata->bytes[10],
+		.flex11 = metadata->bytes[11],
+		.cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
+	};
+}
+
+/* Returns 0 on success, or < 0 on error.
+ *
+ * Before this function is called, the caller must ensure
+ * gve_has_pending_packet(tx) returns true.
+ */
+static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
+				      struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	const bool is_gso = skb_is_gso(skb);
+	u32 desc_idx = tx->dqo_tx.tail;
+
+	struct gve_tx_pending_packet_dqo *pending_packet;
+	struct gve_tx_metadata_dqo metadata;
+	s16 completion_tag;
+	int i;
+
+	pending_packet = gve_alloc_pending_packet(tx);
+	pending_packet->skb = skb;
+	pending_packet->num_bufs = 0;
+	completion_tag = pending_packet - tx->dqo.pending_packets;
+
+	gve_extract_tx_metadata_dqo(skb, &metadata);
+	if (is_gso) {
+		int header_len = gve_prep_tso(skb);
+
+		if (unlikely(header_len < 0))
+			goto err;
+
+		gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx,
+					 skb, &metadata, header_len);
+		desc_idx = (desc_idx + 1) & tx->mask;
+	}
+
+	gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx,
+				     &metadata);
+	desc_idx = (desc_idx + 1) & tx->mask;
+
+	/* Note: HW requires that the size of a non-TSO packet be within the
+	 * range of [17, 9728].
+	 *
+	 * We don't double check because
+	 * - We limited `netdev->min_mtu` to ETH_MIN_MTU.
+	 * - Hypervisor won't allow MTU larger than 9216.
+	 */
+
+	/* Map the linear portion of skb */
+	{
+		struct gve_tx_dma_buf *buf =
+			&pending_packet->bufs[pending_packet->num_bufs];
+		u32 len = skb_headlen(skb);
+		dma_addr_t addr;
+
+		addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(tx->dev, addr)))
+			goto err;
+
+		dma_unmap_len_set(buf, len, len);
+		dma_unmap_addr_set(buf, dma, addr);
+		++pending_packet->num_bufs;
+
+		gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+					 completion_tag,
+					 /*eop=*/shinfo->nr_frags == 0, is_gso);
+	}
+
+	for (i = 0; i < shinfo->nr_frags; i++) {
+		struct gve_tx_dma_buf *buf =
+			&pending_packet->bufs[pending_packet->num_bufs];
+		const skb_frag_t *frag = &shinfo->frags[i];
+		bool is_eop = i == (shinfo->nr_frags - 1);
+		u32 len = skb_frag_size(frag);
+		dma_addr_t addr;
+
+		addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(tx->dev, addr)))
+			goto err;
+
+		dma_unmap_len_set(buf, len, len);
+		dma_unmap_addr_set(buf, dma, addr);
+		++pending_packet->num_bufs;
+
+		gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+					 completion_tag, is_eop, is_gso);
+	}
+
+	/* Commit the changes to our state */
+	tx->dqo_tx.tail = desc_idx;
+
+	/* Request a descriptor completion on the last descriptor of the
+	 * packet if we are allowed to by the HW enforced interval.
+	 */
+	{
+		u32 last_desc_idx = (desc_idx - 1) & tx->mask;
+		u32 last_report_event_interval =
+			(last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask;
+
+		if (unlikely(last_report_event_interval >=
+			     GVE_TX_MIN_RE_INTERVAL)) {
+			tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true;
+			tx->dqo_tx.last_re_idx = last_desc_idx;
+		}
+	}
+
+	return 0;
+
+err:
+	for (i = 0; i < pending_packet->num_bufs; i++) {
+		struct gve_tx_dma_buf *buf = &pending_packet->bufs[i];
+
+		if (i == 0) {
+			dma_unmap_single(tx->dev, dma_unmap_addr(buf, dma),
+					 dma_unmap_len(buf, len),
+					 DMA_TO_DEVICE);
+		} else {
+			dma_unmap_page(tx->dev, dma_unmap_addr(buf, dma),
+				       dma_unmap_len(buf, len), DMA_TO_DEVICE);
+		}
+	}
+
+	pending_packet->skb = NULL;
+	pending_packet->num_bufs = 0;
+	gve_free_pending_packet(tx, pending_packet);
+
+	return -1;
+}
+
+static int gve_num_descs_per_buf(size_t size)
+{
+	return DIV_ROUND_UP(size, GVE_TX_MAX_BUF_SIZE_DQO);
+}
+
+static int gve_num_buffer_descs_needed(const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int num_descs;
+	int i;
+
+	num_descs = gve_num_descs_per_buf(skb_headlen(skb));
+
+	for (i = 0; i < shinfo->nr_frags; i++) {
+		unsigned int frag_size = skb_frag_size(&shinfo->frags[i]);
+
+		num_descs += gve_num_descs_per_buf(frag_size);
+	}
+
+	return num_descs;
+}
+
+/* Returns true if HW is capable of sending TSO represented by `skb`.
+ *
+ * Each segment must not span more than GVE_TX_MAX_DATA_DESCS buffers.
+ * - The header is counted as one buffer for every single segment.
+ * - A buffer which is split between two segments is counted for both.
+ * - If a buffer contains both header and payload, it is counted as two buffers.
+ */
+static bool gve_can_send_tso(const struct sk_buff *skb)
+{
+	const int header_len = skb_checksum_start_offset(skb) + tcp_hdrlen(skb);
+	const int max_bufs_per_seg = GVE_TX_MAX_DATA_DESCS - 1;
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	const int gso_size = shinfo->gso_size;
+	int cur_seg_num_bufs;
+	int cur_seg_size;
+	int i;
+
+	cur_seg_size = skb_headlen(skb) - header_len;
+	cur_seg_num_bufs = cur_seg_size > 0;
+
+	for (i = 0; i < shinfo->nr_frags; i++) {
+		if (cur_seg_size >= gso_size) {
+			cur_seg_size %= gso_size;
+			cur_seg_num_bufs = cur_seg_size > 0;
+		}
+
+		if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg))
+			return false;
+
+		cur_seg_size += skb_frag_size(&shinfo->frags[i]);
+	}
+
+	return true;
+}
+
+/* Attempt to transmit specified SKB.
+ *
+ * Returns 0 if the SKB was transmitted or dropped.
+ * Returns -1 if there is not currently enough space to transmit the SKB.
+ */
+static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
+			  struct sk_buff *skb)
+{
+	int num_buffer_descs;
+	int total_num_descs;
+
+	if (skb_is_gso(skb)) {
+		/* If TSO doesn't meet HW requirements, attempt to linearize the
+		 * packet.
+		 */
+		if (unlikely(!gve_can_send_tso(skb) &&
+			     skb_linearize(skb) < 0)) {
+			net_err_ratelimited("%s: Failed to transmit TSO packet\n",
+					    priv->dev->name);
+			goto drop;
+		}
+
+		num_buffer_descs = gve_num_buffer_descs_needed(skb);
+	} else {
+		num_buffer_descs = gve_num_buffer_descs_needed(skb);
+
+		if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
+			if (unlikely(skb_linearize(skb) < 0))
+				goto drop;
+
+			num_buffer_descs = 1;
+		}
+	}
+
+	/* Metadata + (optional TSO) + data descriptors. */
+	total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs;
+	if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs +
+			GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP))) {
+		return -1;
+	}
+
+	if (unlikely(gve_tx_add_skb_no_copy_dqo(tx, skb) < 0))
+		goto drop;
+
+	netdev_tx_sent_queue(tx->netdev_txq, skb->len);
+	skb_tx_timestamp(skb);
+	return 0;
+
+drop:
+	tx->dropped_pkt++;
+	dev_kfree_skb_any(skb);
+	return 0;
+}
+
+/* Transmit a given skb and ring the doorbell. */
 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
 {
+	struct gve_priv *priv = netdev_priv(dev);
+	struct gve_tx_ring *tx;
+
+	tx = &priv->tx[skb_get_queue_mapping(skb)];
+	if (unlikely(gve_try_tx_skb(priv, tx, skb) < 0)) {
+		/* We need to ring the txq doorbell -- we have stopped the Tx
+		 * queue for want of resources, but prior calls to gve_tx()
+		 * may have added descriptors without ringing the doorbell.
+		 */
+		gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
+		return NETDEV_TX_OK;
+
+	gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
 	return NETDEV_TX_OK;
 }
 
+static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list,
+			struct gve_tx_pending_packet_dqo *pending_packet)
+{
+	s16 old_tail, index;
+
+	index = pending_packet - tx->dqo.pending_packets;
+	old_tail = list->tail;
+	list->tail = index;
+	if (old_tail == -1)
+		list->head = index;
+	else
+		tx->dqo.pending_packets[old_tail].next = index;
+
+	pending_packet->next = -1;
+	pending_packet->prev = old_tail;
+}
+
+static void remove_from_list(struct gve_tx_ring *tx,
+			     struct gve_index_list *list,
+			     struct gve_tx_pending_packet_dqo *pending_packet)
+{
+	s16 index, prev_index, next_index;
+
+	index = pending_packet - tx->dqo.pending_packets;
+	prev_index = pending_packet->prev;
+	next_index = pending_packet->next;
+
+	if (prev_index == -1) {
+		/* Node is head */
+		list->head = next_index;
+	} else {
+		tx->dqo.pending_packets[prev_index].next = next_index;
+	}
+	if (next_index == -1) {
+		/* Node is tail */
+		list->tail = prev_index;
+	} else {
+		tx->dqo.pending_packets[next_index].prev = prev_index;
+	}
+}
+
+static void gve_unmap_packet(struct device *dev,
+			     struct gve_tx_pending_packet_dqo *pending_packet)
+{
+	struct gve_tx_dma_buf *buf;
+	int i;
+
+	/* SKB linear portion is guaranteed to be mapped */
+	buf = &pending_packet->bufs[0];
+	dma_unmap_single(dev, dma_unmap_addr(buf, dma),
+			 dma_unmap_len(buf, len), DMA_TO_DEVICE);
+	for (i = 1; i < pending_packet->num_bufs; i++) {
+		buf = &pending_packet->bufs[i];
+		dma_unmap_page(dev, dma_unmap_addr(buf, dma),
+			       dma_unmap_len(buf, len), DMA_TO_DEVICE);
+	}
+	pending_packet->num_bufs = 0;
+}
+
+/* Completion types and expected behavior:
+ * No Miss compl + Packet compl = Packet completed normally.
+ * Miss compl + Re-inject compl = Packet completed normally.
+ * No Miss compl + Re-inject compl = Skipped i.e. packet not completed.
+ * Miss compl + Packet compl = Skipped i.e. packet not completed.
+ */
+static void gve_handle_packet_completion(struct gve_priv *priv,
+					 struct gve_tx_ring *tx, bool is_napi,
+					 u16 compl_tag, u64 *bytes, u64 *pkts,
+					 bool is_reinjection)
+{
+	struct gve_tx_pending_packet_dqo *pending_packet;
+
+	if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) {
+		net_err_ratelimited("%s: Invalid TX completion tag: %d\n",
+				    priv->dev->name, (int)compl_tag);
+		return;
+	}
+
+	pending_packet = &tx->dqo.pending_packets[compl_tag];
+
+	if (unlikely(is_reinjection)) {
+		if (unlikely(pending_packet->state ==
+			     GVE_PACKET_STATE_TIMED_OUT_COMPL)) {
+			net_err_ratelimited("%s: Re-injection completion: %d received after timeout.\n",
+					    priv->dev->name, (int)compl_tag);
+			/* Packet was already completed as a result of timeout,
+			 * so just remove from list and free pending packet.
+			 */
+			remove_from_list(tx,
+					 &tx->dqo_compl.timed_out_completions,
+					 pending_packet);
+			gve_free_pending_packet(tx, pending_packet);
+			return;
+		}
+		if (unlikely(pending_packet->state !=
+			     GVE_PACKET_STATE_PENDING_REINJECT_COMPL)) {
+			/* No outstanding miss completion but packet allocated
+			 * implies packet receives a re-injection completion
+			 * without a a prior miss completion. Return without
+			 * completing the packet.
+			 */
+			net_err_ratelimited("%s: Re-injection completion received without corresponding miss completion: %d\n",
+					    priv->dev->name, (int)compl_tag);
+			return;
+		}
+		remove_from_list(tx, &tx->dqo_compl.miss_completions,
+				 pending_packet);
+	} else {
+		/* Packet is allocated but not a pending data completion. */
+		if (unlikely(pending_packet->state !=
+			     GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
+			net_err_ratelimited("%s: No pending data completion: %d\n",
+					    priv->dev->name, (int)compl_tag);
+			return;
+		}
+	}
+	gve_unmap_packet(tx->dev, pending_packet);
+
+	*bytes += pending_packet->skb->len;
+	(*pkts)++;
+	napi_consume_skb(pending_packet->skb, is_napi);
+	pending_packet->skb = NULL;
+	gve_free_pending_packet(tx, pending_packet);
+}
+
+static void gve_handle_miss_completion(struct gve_priv *priv,
+				       struct gve_tx_ring *tx, u16 compl_tag,
+				       u64 *bytes, u64 *pkts)
+{
+	struct gve_tx_pending_packet_dqo *pending_packet;
+
+	if (unlikely(compl_tag >= tx->dqo.num_pending_packets)) {
+		net_err_ratelimited("%s: Invalid TX completion tag: %d\n",
+				    priv->dev->name, (int)compl_tag);
+		return;
+	}
+
+	pending_packet = &tx->dqo.pending_packets[compl_tag];
+	if (unlikely(pending_packet->state !=
+				GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
+		net_err_ratelimited("%s: Unexpected packet state: %d for completion tag : %d\n",
+				    priv->dev->name, (int)pending_packet->state,
+				    (int)compl_tag);
+		return;
+	}
+
+	pending_packet->state = GVE_PACKET_STATE_PENDING_REINJECT_COMPL;
+	/* jiffies can wraparound but time comparisons can handle overflows. */
+	pending_packet->timeout_jiffies =
+			jiffies +
+			msecs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT *
+					 MSEC_PER_SEC);
+	add_to_list(tx, &tx->dqo_compl.miss_completions, pending_packet);
+
+	*bytes += pending_packet->skb->len;
+	(*pkts)++;
+}
+
+static void remove_miss_completions(struct gve_priv *priv,
+				    struct gve_tx_ring *tx)
+{
+	struct gve_tx_pending_packet_dqo *pending_packet;
+	s16 next_index;
+
+	next_index = tx->dqo_compl.miss_completions.head;
+	while (next_index != -1) {
+		pending_packet = &tx->dqo.pending_packets[next_index];
+		next_index = pending_packet->next;
+		/* Break early because packets should timeout in order. */
+		if (time_is_after_jiffies(pending_packet->timeout_jiffies))
+			break;
+
+		remove_from_list(tx, &tx->dqo_compl.miss_completions,
+				 pending_packet);
+		/* Unmap buffers and free skb but do not unallocate packet i.e.
+		 * the completion tag is not freed to ensure that the driver
+		 * can take appropriate action if a corresponding valid
+		 * completion is received later.
+		 */
+		gve_unmap_packet(tx->dev, pending_packet);
+		/* This indicates the packet was dropped. */
+		dev_kfree_skb_any(pending_packet->skb);
+		pending_packet->skb = NULL;
+		tx->dropped_pkt++;
+		net_err_ratelimited("%s: No reinjection completion was received for: %ld.\n",
+				    priv->dev->name,
+				    (pending_packet - tx->dqo.pending_packets));
+
+		pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL;
+		pending_packet->timeout_jiffies =
+				jiffies +
+				msecs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT *
+						 MSEC_PER_SEC);
+		/* Maintain pending packet in another list so the packet can be
+		 * unallocated at a later time.
+		 */
+		add_to_list(tx, &tx->dqo_compl.timed_out_completions,
+			    pending_packet);
+	}
+}
+
+static void remove_timed_out_completions(struct gve_priv *priv,
+					 struct gve_tx_ring *tx)
+{
+	struct gve_tx_pending_packet_dqo *pending_packet;
+	s16 next_index;
+
+	next_index = tx->dqo_compl.timed_out_completions.head;
+	while (next_index != -1) {
+		pending_packet = &tx->dqo.pending_packets[next_index];
+		next_index = pending_packet->next;
+		/* Break early because packets should timeout in order. */
+		if (time_is_after_jiffies(pending_packet->timeout_jiffies))
+			break;
+
+		remove_from_list(tx, &tx->dqo_compl.timed_out_completions,
+				 pending_packet);
+		gve_free_pending_packet(tx, pending_packet);
+	}
+}
+
 int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
 			  struct napi_struct *napi)
 {
-	return 0;
+	u64 reinject_compl_bytes = 0;
+	u64 reinject_compl_pkts = 0;
+	int num_descs_cleaned = 0;
+	u64 miss_compl_bytes = 0;
+	u64 miss_compl_pkts = 0;
+	u64 pkt_compl_bytes = 0;
+	u64 pkt_compl_pkts = 0;
+
+	/* Limit in order to avoid blocking for too long */
+	while (!napi || pkt_compl_pkts < napi->weight) {
+		struct gve_tx_compl_desc *compl_desc =
+			&tx->dqo.compl_ring[tx->dqo_compl.head];
+		u16 type;
+
+		if (compl_desc->generation == tx->dqo_compl.cur_gen_bit)
+			break;
+
+		/* Prefetch the next descriptor. */
+		prefetch(&tx->dqo.compl_ring[(tx->dqo_compl.head + 1) &
+				tx->dqo.complq_mask]);
+
+		/* Do not read data until we own the descriptor */
+		dma_rmb();
+		type = compl_desc->type;
+
+		if (type == GVE_COMPL_TYPE_DQO_DESC) {
+			/* This is the last descriptor fetched by HW plus one */
+			u16 tx_head = le16_to_cpu(compl_desc->tx_head);
+
+			atomic_set_release(&tx->dqo_compl.hw_tx_head, tx_head);
+		} else if (type == GVE_COMPL_TYPE_DQO_PKT) {
+			u16 compl_tag = le16_to_cpu(compl_desc->completion_tag);
+
+			gve_handle_packet_completion(priv, tx, !!napi,
+						     compl_tag,
+						     &pkt_compl_bytes,
+						     &pkt_compl_pkts,
+						     /*is_reinjection=*/false);
+		} else if (type == GVE_COMPL_TYPE_DQO_MISS) {
+			u16 compl_tag = le16_to_cpu(compl_desc->completion_tag);
+
+			gve_handle_miss_completion(priv, tx, compl_tag,
+						   &miss_compl_bytes,
+						   &miss_compl_pkts);
+		} else if (type == GVE_COMPL_TYPE_DQO_REINJECTION) {
+			u16 compl_tag = le16_to_cpu(compl_desc->completion_tag);
+
+			gve_handle_packet_completion(priv, tx, !!napi,
+						     compl_tag,
+						     &reinject_compl_bytes,
+						     &reinject_compl_pkts,
+						     /*is_reinjection=*/true);
+		}
+
+		tx->dqo_compl.head =
+			(tx->dqo_compl.head + 1) & tx->dqo.complq_mask;
+		/* Flip the generation bit when we wrap around */
+		tx->dqo_compl.cur_gen_bit ^= tx->dqo_compl.head == 0;
+		num_descs_cleaned++;
+	}
+
+	netdev_tx_completed_queue(tx->netdev_txq,
+				  pkt_compl_pkts + miss_compl_pkts,
+				  pkt_compl_bytes + miss_compl_bytes);
+
+	remove_miss_completions(priv, tx);
+	remove_timed_out_completions(priv, tx);
+
+	u64_stats_update_begin(&tx->statss);
+	tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes;
+	tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts;
+	u64_stats_update_end(&tx->statss);
+	return num_descs_cleaned;
 }
 
 bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
 {
-	return false;
+	struct gve_tx_compl_desc *compl_desc;
+	struct gve_tx_ring *tx = block->tx;
+	struct gve_priv *priv = block->priv;
+
+	if (do_clean) {
+		int num_descs_cleaned = gve_clean_tx_done_dqo(priv, tx,
+							      &block->napi);
+
+		/* Sync with queue being stopped in `gve_maybe_stop_tx_dqo()` */
+		mb();
+
+		if (netif_tx_queue_stopped(tx->netdev_txq) &&
+		    num_descs_cleaned > 0) {
+			tx->wake_queue++;
+			netif_tx_wake_queue(tx->netdev_txq);
+		}
+	}
+
+	/* Return true if we still have work. */
+	compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head];
+	return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
 }

From 9b8dd5e5ea48bbb7532d20c4093a79d8283e4029 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 11:06:32 -0700
Subject: [PATCH 2857/3292] gve: DQO: Add RX path

The RX queue has an array of `gve_rx_buf_state_dqo` objects. All
allocated pages have an associated buf_state object. When a buffer is
posted on the RX buffer queue, the buffer ID will be the buf_state's
index into the RX queue's array.

On packet reception, the RX queue will have one descriptor for each
buffer associated with a received packet. Each RX descriptor will have
a buffer_id that was posted on the buffer queue.

Notable mentions:

- We use a default buffer size of 2048 bytes. Based on page size, we
  may post separate sections of a single page as separate buffers.

- The driver holds an extra reference on pages passed up the receive
  path with an skb and keeps these pages on a list. When posting new
  buffers to the NIC, we check if any of these pages has only our
  reference, or another buffer sized segment of the page has no
  references. If so, it is free to reuse. This page recycling approach
  is a common netdev optimization that reduces page alloc/free calls.

- Pages in the free list have a page_count bias in order to avoid an
  atomic increment of pagecount every time we attempt to reuse a page.
  # references = page_count() - bias

- In order to track when a page is safe to reuse, we keep track of the
  last offset which had a single SKB reference. When this occurs, it
  implies that every single other offset is reusable. Otherwise, we
  don't know if offsets can be safely reused.

- We maintain two free lists of pages. List #1 (recycled_buf_states)
  contains pages we know can be reused right away. List #2
  (used_buf_states) contains pages which cannot be used right away. We
  only attempt to get pages from list #2 when list #1 is empty. We only
  attempt to use a small fixed number pages from list #2 before giving
  up and allocating a new page. Both lists are FIFOs in hope that by the
  time we attempt to reuse a page, the references were dropped.

Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve.h        |   1 +
 drivers/net/ethernet/google/gve/gve_rx_dqo.c | 582 +++++++++++++++++++
 drivers/net/ethernet/google/gve/gve_utils.c  |  15 +
 drivers/net/ethernet/google/gve/gve_utils.h  |   3 +
 4 files changed, 601 insertions(+)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 30978a15e37d..1d3188e8e3b3 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -59,6 +59,7 @@ struct gve_rx_slot_page_info {
 	struct page *page;
 	void *page_address;
 	u32 page_offset; /* offset to write to in page */
+	int pagecnt_bias; /* expected pagecnt if only the driver has a ref */
 	u8 can_flip;
 };
 
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 1073a820767d..8738db020061 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -16,9 +16,161 @@
 #include <net/ipv6.h>
 #include <net/tcp.h>
 
+static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
+{
+	return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
+}
+
 static void gve_free_page_dqo(struct gve_priv *priv,
 			      struct gve_rx_buf_state_dqo *bs)
 {
+	page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
+	gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
+		      DMA_FROM_DEVICE);
+	bs->page_info.page = NULL;
+}
+
+static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
+{
+	struct gve_rx_buf_state_dqo *buf_state;
+	s16 buffer_id;
+
+	buffer_id = rx->dqo.free_buf_states;
+	if (unlikely(buffer_id == -1))
+		return NULL;
+
+	buf_state = &rx->dqo.buf_states[buffer_id];
+
+	/* Remove buf_state from free list */
+	rx->dqo.free_buf_states = buf_state->next;
+
+	/* Point buf_state to itself to mark it as allocated */
+	buf_state->next = buffer_id;
+
+	return buf_state;
+}
+
+static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
+				       struct gve_rx_buf_state_dqo *buf_state)
+{
+	s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+	return buf_state->next == buffer_id;
+}
+
+static void gve_free_buf_state(struct gve_rx_ring *rx,
+			       struct gve_rx_buf_state_dqo *buf_state)
+{
+	s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+	buf_state->next = rx->dqo.free_buf_states;
+	rx->dqo.free_buf_states = buffer_id;
+}
+
+static struct gve_rx_buf_state_dqo *
+gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
+{
+	struct gve_rx_buf_state_dqo *buf_state;
+	s16 buffer_id;
+
+	buffer_id = list->head;
+	if (unlikely(buffer_id == -1))
+		return NULL;
+
+	buf_state = &rx->dqo.buf_states[buffer_id];
+
+	/* Remove buf_state from list */
+	list->head = buf_state->next;
+	if (buf_state->next == -1)
+		list->tail = -1;
+
+	/* Point buf_state to itself to mark it as allocated */
+	buf_state->next = buffer_id;
+
+	return buf_state;
+}
+
+static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
+				  struct gve_index_list *list,
+				  struct gve_rx_buf_state_dqo *buf_state)
+{
+	s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+	buf_state->next = -1;
+
+	if (list->head == -1) {
+		list->head = buffer_id;
+		list->tail = buffer_id;
+	} else {
+		int tail = list->tail;
+
+		rx->dqo.buf_states[tail].next = buffer_id;
+		list->tail = buffer_id;
+	}
+}
+
+static struct gve_rx_buf_state_dqo *
+gve_get_recycled_buf_state(struct gve_rx_ring *rx)
+{
+	struct gve_rx_buf_state_dqo *buf_state;
+	int i;
+
+	/* Recycled buf states are immediately usable. */
+	buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
+	if (likely(buf_state))
+		return buf_state;
+
+	if (unlikely(rx->dqo.used_buf_states.head == -1))
+		return NULL;
+
+	/* Used buf states are only usable when ref count reaches 0, which means
+	 * no SKBs refer to them.
+	 *
+	 * Search a limited number before giving up.
+	 */
+	for (i = 0; i < 5; i++) {
+		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
+		if (gve_buf_ref_cnt(buf_state) == 0)
+			return buf_state;
+
+		gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+	}
+
+	/* If there are no free buf states discard an entry from
+	 * `used_buf_states` so it can be used.
+	 */
+	if (unlikely(rx->dqo.free_buf_states == -1)) {
+		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
+		if (gve_buf_ref_cnt(buf_state) == 0)
+			return buf_state;
+
+		gve_free_page_dqo(rx->gve, buf_state);
+		gve_free_buf_state(rx, buf_state);
+	}
+
+	return NULL;
+}
+
+static int gve_alloc_page_dqo(struct gve_priv *priv,
+			      struct gve_rx_buf_state_dqo *buf_state)
+{
+	int err;
+
+	err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
+			     &buf_state->addr, DMA_FROM_DEVICE);
+	if (err)
+		return err;
+
+	buf_state->page_info.page_offset = 0;
+	buf_state->page_info.page_address =
+		page_address(buf_state->page_info.page);
+	buf_state->last_single_ref_offset = 0;
+
+	/* The page already has 1 ref. */
+	page_ref_add(buf_state->page_info.page, INT_MAX - 1);
+	buf_state->page_info.pagecnt_bias = INT_MAX;
+
+	return 0;
 }
 
 static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
@@ -137,6 +289,14 @@ err:
 	return -ENOMEM;
 }
 
+void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
+{
+	const struct gve_rx_ring *rx = &priv->rx[queue_idx];
+	u64 index = be32_to_cpu(rx->q_resources->db_index);
+
+	iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
+}
+
 int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
 {
 	int err = 0;
@@ -171,11 +331,433 @@ void gve_rx_free_rings_dqo(struct gve_priv *priv)
 
 void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
 {
+	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
+	struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
+	struct gve_priv *priv = rx->gve;
+	u32 num_avail_slots;
+	u32 num_full_slots;
+	u32 num_posted = 0;
+
+	num_full_slots = (bufq->tail - bufq->head) & bufq->mask;
+	num_avail_slots = bufq->mask - num_full_slots;
+
+	num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
+	while (num_posted < num_avail_slots) {
+		struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
+		struct gve_rx_buf_state_dqo *buf_state;
+
+		buf_state = gve_get_recycled_buf_state(rx);
+		if (unlikely(!buf_state)) {
+			buf_state = gve_alloc_buf_state(rx);
+			if (unlikely(!buf_state))
+				break;
+
+			if (unlikely(gve_alloc_page_dqo(priv, buf_state))) {
+				u64_stats_update_begin(&rx->statss);
+				rx->rx_buf_alloc_fail++;
+				u64_stats_update_end(&rx->statss);
+				gve_free_buf_state(rx, buf_state);
+				break;
+			}
+		}
+
+		desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
+		desc->buf_addr = cpu_to_le64(buf_state->addr +
+					     buf_state->page_info.page_offset);
+
+		bufq->tail = (bufq->tail + 1) & bufq->mask;
+		complq->num_free_slots--;
+		num_posted++;
+
+		if ((bufq->tail & (GVE_RX_BUF_THRESH_DQO - 1)) == 0)
+			gve_rx_write_doorbell_dqo(priv, rx->q_num);
+	}
+
+	rx->fill_cnt += num_posted;
+}
+
+static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
+				struct gve_rx_buf_state_dqo *buf_state)
+{
+	const int data_buffer_size = priv->data_buffer_size_dqo;
+	int pagecount;
+
+	/* Can't reuse if we only fit one buffer per page */
+	if (data_buffer_size * 2 > PAGE_SIZE)
+		goto mark_used;
+
+	pagecount = gve_buf_ref_cnt(buf_state);
+
+	/* Record the offset when we have a single remaining reference.
+	 *
+	 * When this happens, we know all of the other offsets of the page are
+	 * usable.
+	 */
+	if (pagecount == 1) {
+		buf_state->last_single_ref_offset =
+			buf_state->page_info.page_offset;
+	}
+
+	/* Use the next buffer sized chunk in the page. */
+	buf_state->page_info.page_offset += data_buffer_size;
+	buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
+
+	/* If we wrap around to the same offset without ever dropping to 1
+	 * reference, then we don't know if this offset was ever freed.
+	 */
+	if (buf_state->page_info.page_offset ==
+	    buf_state->last_single_ref_offset) {
+		goto mark_used;
+	}
+
+	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+	return;
+
+mark_used:
+	gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+}
+
+static void gve_rx_skb_csum(struct sk_buff *skb,
+			    const struct gve_rx_compl_desc_dqo *desc,
+			    struct gve_ptype ptype)
+{
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* HW did not identify and process L3 and L4 headers. */
+	if (unlikely(!desc->l3_l4_processed))
+		return;
+
+	if (ptype.l3_type == GVE_L3_TYPE_IPV4) {
+		if (unlikely(desc->csum_ip_err || desc->csum_external_ip_err))
+			return;
+	} else if (ptype.l3_type == GVE_L3_TYPE_IPV6) {
+		/* Checksum should be skipped if this flag is set. */
+		if (unlikely(desc->ipv6_ex_add))
+			return;
+	}
+
+	if (unlikely(desc->csum_l4_err))
+		return;
+
+	switch (ptype.l4_type) {
+	case GVE_L4_TYPE_TCP:
+	case GVE_L4_TYPE_UDP:
+	case GVE_L4_TYPE_ICMP:
+	case GVE_L4_TYPE_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		break;
+	default:
+		break;
+	}
+}
+
+static void gve_rx_skb_hash(struct sk_buff *skb,
+			    const struct gve_rx_compl_desc_dqo *compl_desc,
+			    struct gve_ptype ptype)
+{
+	enum pkt_hash_types hash_type = PKT_HASH_TYPE_L2;
+
+	if (ptype.l4_type != GVE_L4_TYPE_UNKNOWN)
+		hash_type = PKT_HASH_TYPE_L4;
+	else if (ptype.l3_type != GVE_L3_TYPE_UNKNOWN)
+		hash_type = PKT_HASH_TYPE_L3;
+
+	skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
+}
+
+static void gve_rx_free_skb(struct gve_rx_ring *rx)
+{
+	if (!rx->skb_head)
+		return;
+
+	dev_kfree_skb_any(rx->skb_head);
+	rx->skb_head = NULL;
+	rx->skb_tail = NULL;
+}
+
+/* Chains multi skbs for single rx packet.
+ * Returns 0 if buffer is appended, -1 otherwise.
+ */
+static int gve_rx_append_frags(struct napi_struct *napi,
+			       struct gve_rx_buf_state_dqo *buf_state,
+			       u16 buf_len, struct gve_rx_ring *rx,
+			       struct gve_priv *priv)
+{
+	int num_frags = skb_shinfo(rx->skb_tail)->nr_frags;
+
+	if (unlikely(num_frags == MAX_SKB_FRAGS)) {
+		struct sk_buff *skb;
+
+		skb = napi_alloc_skb(napi, 0);
+		if (!skb)
+			return -1;
+
+		skb_shinfo(rx->skb_tail)->frag_list = skb;
+		rx->skb_tail = skb;
+		num_frags = 0;
+	}
+	if (rx->skb_tail != rx->skb_head) {
+		rx->skb_head->len += buf_len;
+		rx->skb_head->data_len += buf_len;
+		rx->skb_head->truesize += priv->data_buffer_size_dqo;
+	}
+
+	skb_add_rx_frag(rx->skb_tail, num_frags,
+			buf_state->page_info.page,
+			buf_state->page_info.page_offset,
+			buf_len, priv->data_buffer_size_dqo);
+	gve_dec_pagecnt_bias(&buf_state->page_info);
+
+	return 0;
+}
+
+/* Returns 0 if descriptor is completed successfully.
+ * Returns -EINVAL if descriptor is invalid.
+ * Returns -ENOMEM if data cannot be copied to skb.
+ */
+static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
+		      const struct gve_rx_compl_desc_dqo *compl_desc,
+		      int queue_idx)
+{
+	const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
+	const bool eop = compl_desc->end_of_packet != 0;
+	struct gve_rx_buf_state_dqo *buf_state;
+	struct gve_priv *priv = rx->gve;
+	u16 buf_len;
+
+	if (unlikely(buffer_id > rx->dqo.num_buf_states)) {
+		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
+				    priv->dev->name, buffer_id);
+		return -EINVAL;
+	}
+	buf_state = &rx->dqo.buf_states[buffer_id];
+	if (unlikely(!gve_buf_state_is_allocated(rx, buf_state))) {
+		net_err_ratelimited("%s: RX buffer_id is not allocated: %u\n",
+				    priv->dev->name, buffer_id);
+		return -EINVAL;
+	}
+
+	if (unlikely(compl_desc->rx_error)) {
+		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
+				      buf_state);
+		return -EINVAL;
+	}
+
+	buf_len = compl_desc->packet_len;
+
+	/* Page might have not been used for awhile and was likely last written
+	 * by a different thread.
+	 */
+	prefetch(buf_state->page_info.page);
+
+	/* Sync the portion of dma buffer for CPU to read. */
+	dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
+				      buf_state->page_info.page_offset,
+				      buf_len, DMA_FROM_DEVICE);
+
+	/* Append to current skb if one exists. */
+	if (rx->skb_head) {
+		if (unlikely(gve_rx_append_frags(napi, buf_state, buf_len, rx,
+						 priv)) != 0) {
+			goto error;
+		}
+
+		gve_try_recycle_buf(priv, rx, buf_state);
+		return 0;
+	}
+
+	/* Prefetch the payload header. */
+	prefetch((char *)buf_state->addr + buf_state->page_info.page_offset);
+#if L1_CACHE_BYTES < 128
+	prefetch((char *)buf_state->addr + buf_state->page_info.page_offset +
+		 L1_CACHE_BYTES);
+#endif
+
+	if (eop && buf_len <= priv->rx_copybreak) {
+		rx->skb_head = gve_rx_copy(priv->dev, napi,
+					   &buf_state->page_info, buf_len, 0);
+		if (unlikely(!rx->skb_head))
+			goto error;
+		rx->skb_tail = rx->skb_head;
+
+		u64_stats_update_begin(&rx->statss);
+		rx->rx_copied_pkt++;
+		rx->rx_copybreak_pkt++;
+		u64_stats_update_end(&rx->statss);
+
+		gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
+				      buf_state);
+		return 0;
+	}
+
+	rx->skb_head = napi_get_frags(napi);
+	if (unlikely(!rx->skb_head))
+		goto error;
+	rx->skb_tail = rx->skb_head;
+
+	skb_add_rx_frag(rx->skb_head, 0, buf_state->page_info.page,
+			buf_state->page_info.page_offset, buf_len,
+			priv->data_buffer_size_dqo);
+	gve_dec_pagecnt_bias(&buf_state->page_info);
+
+	gve_try_recycle_buf(priv, rx, buf_state);
+	return 0;
+
+error:
+	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+	return -ENOMEM;
+}
+
+static int gve_rx_complete_rsc(struct sk_buff *skb,
+			       const struct gve_rx_compl_desc_dqo *desc,
+			       struct gve_ptype ptype)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+	/* Only TCP is supported right now. */
+	if (ptype.l4_type != GVE_L4_TYPE_TCP)
+		return -EINVAL;
+
+	switch (ptype.l3_type) {
+	case GVE_L3_TYPE_IPV4:
+		shinfo->gso_type = SKB_GSO_TCPV4;
+		break;
+	case GVE_L3_TYPE_IPV6:
+		shinfo->gso_type = SKB_GSO_TCPV6;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	shinfo->gso_size = le16_to_cpu(desc->rsc_seg_len);
+	return 0;
+}
+
+/* Returns 0 if skb is completed successfully, -1 otherwise. */
+static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
+			       const struct gve_rx_compl_desc_dqo *desc,
+			       netdev_features_t feat)
+{
+	struct gve_ptype ptype =
+		rx->gve->ptype_lut_dqo->ptypes[desc->packet_type];
+	int err;
+
+	skb_record_rx_queue(rx->skb_head, rx->q_num);
+
+	if (feat & NETIF_F_RXHASH)
+		gve_rx_skb_hash(rx->skb_head, desc, ptype);
+
+	if (feat & NETIF_F_RXCSUM)
+		gve_rx_skb_csum(rx->skb_head, desc, ptype);
+
+	/* RSC packets must set gso_size otherwise the TCP stack will complain
+	 * that packets are larger than MTU.
+	 */
+	if (desc->rsc) {
+		err = gve_rx_complete_rsc(rx->skb_head, desc, ptype);
+		if (err < 0)
+			return err;
+	}
+
+	if (skb_headlen(rx->skb_head) == 0)
+		napi_gro_frags(napi);
+	else
+		napi_gro_receive(napi, rx->skb_head);
+
+	return 0;
 }
 
 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
 {
+	struct napi_struct *napi = &block->napi;
+	netdev_features_t feat = napi->dev->features;
+
+	struct gve_rx_ring *rx = block->rx;
+	struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
+
 	u32 work_done = 0;
+	u64 bytes = 0;
+	int err;
+
+	while (work_done < budget) {
+		struct gve_rx_compl_desc_dqo *compl_desc =
+			&complq->desc_ring[complq->head];
+		u32 pkt_bytes;
+
+		/* No more new packets */
+		if (compl_desc->generation == complq->cur_gen_bit)
+			break;
+
+		/* Prefetch the next two descriptors. */
+		prefetch(&complq->desc_ring[(complq->head + 1) & complq->mask]);
+		prefetch(&complq->desc_ring[(complq->head + 2) & complq->mask]);
+
+		/* Do not read data until we own the descriptor */
+		dma_rmb();
+
+		err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
+		if (err < 0) {
+			gve_rx_free_skb(rx);
+			u64_stats_update_begin(&rx->statss);
+			if (err == -ENOMEM)
+				rx->rx_skb_alloc_fail++;
+			else if (err == -EINVAL)
+				rx->rx_desc_err_dropped_pkt++;
+			u64_stats_update_end(&rx->statss);
+		}
+
+		complq->head = (complq->head + 1) & complq->mask;
+		complq->num_free_slots++;
+
+		/* When the ring wraps, the generation bit is flipped. */
+		complq->cur_gen_bit ^= (complq->head == 0);
+
+		/* Receiving a completion means we have space to post another
+		 * buffer on the buffer queue.
+		 */
+		{
+			struct gve_rx_buf_queue_dqo *bufq = &rx->dqo.bufq;
+
+			bufq->head = (bufq->head + 1) & bufq->mask;
+		}
+
+		/* Free running counter of completed descriptors */
+		rx->cnt++;
+
+		if (!rx->skb_head)
+			continue;
+
+		if (!compl_desc->end_of_packet)
+			continue;
+
+		work_done++;
+		pkt_bytes = rx->skb_head->len;
+		/* The ethernet header (first ETH_HLEN bytes) is snipped off
+		 * by eth_type_trans.
+		 */
+		if (skb_headlen(rx->skb_head))
+			pkt_bytes += ETH_HLEN;
+
+		/* gve_rx_complete_skb() will consume skb if successful */
+		if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
+			gve_rx_free_skb(rx);
+			u64_stats_update_begin(&rx->statss);
+			rx->rx_desc_err_dropped_pkt++;
+			u64_stats_update_end(&rx->statss);
+			continue;
+		}
+
+		bytes += pkt_bytes;
+		rx->skb_head = NULL;
+		rx->skb_tail = NULL;
+	}
+
+	gve_rx_post_buffers_dqo(rx);
+
+	u64_stats_update_begin(&rx->statss);
+	rx->rpackets += work_done;
+	rx->rbytes += bytes;
+	u64_stats_update_end(&rx->statss);
 
 	return work_done;
 }
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index a0607a824ab9..93f3dcbeeea9 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -64,3 +64,18 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
 	return skb;
 }
 
+void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
+{
+	page_info->pagecnt_bias--;
+	if (page_info->pagecnt_bias == 0) {
+		int pagecount = page_count(page_info->page);
+
+		/* If we have run out of bias - set it back up to INT_MAX
+		 * minus the existing refs.
+		 */
+		page_info->pagecnt_bias = INT_MAX - pagecount;
+
+		/* Set pagecount back up to max. */
+		page_ref_add(page_info->page, INT_MAX - pagecount);
+	}
+}
diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h
index 8fb39b990bbc..79595940b351 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.h
+++ b/drivers/net/ethernet/google/gve/gve_utils.h
@@ -21,5 +21,8 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
 			    struct gve_rx_slot_page_info *page_info, u16 len,
 			    u16 pad);
 
+/* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */
+void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info);
+
 #endif /* _GVE_UTILS_H */
 

From 44531076338fc9d9556685d3e7efc2526185760d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 24 Jun 2021 17:55:23 +0300
Subject: [PATCH 2858/3292] Documentation: net: dsa: add details about SJA1110

Denote that the new switch generation is supported, detail its pin
strapping options (with differences compared to SJA1105) and explain how
MDIO access to the internal 100base-T1 and 100base-TX PHYs is performed.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/sja1105.rst | 61 ++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index 7395a33baaf9..da4057ba37f1 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -5,7 +5,7 @@ NXP SJA1105 switch driver
 Overview
 ========
 
-The NXP SJA1105 is a family of 6 devices:
+The NXP SJA1105 is a family of 10 SPI-managed automotive switches:
 
 - SJA1105E: First generation, no TTEthernet
 - SJA1105T: First generation, TTEthernet
@@ -13,9 +13,11 @@ The NXP SJA1105 is a family of 6 devices:
 - SJA1105Q: Second generation, TTEthernet, no SGMII
 - SJA1105R: Second generation, no TTEthernet, SGMII
 - SJA1105S: Second generation, TTEthernet, SGMII
-
-These are SPI-managed automotive switches, with all ports being gigabit
-capable, and supporting MII/RMII/RGMII and optionally SGMII on one port.
+- SJA1110A: Third generation, TTEthernet, SGMII, integrated 100base-T1 and
+  100base-TX PHYs
+- SJA1110B: Third generation, TTEthernet, SGMII, 100base-T1, 100base-TX
+- SJA1110C: Third generation, TTEthernet, SGMII, 100base-T1, 100base-TX
+- SJA1110D: Third generation, TTEthernet, SGMII, 100base-T1
 
 Being automotive parts, their configuration interface is geared towards
 set-and-forget use, with minimal dynamic interaction at runtime. They
@@ -579,3 +581,54 @@ A board would need to hook up the PHYs connected to the switch to any other
 MDIO bus available to Linux within the system (e.g. to the DSA master's MDIO
 bus). Link state management then works by the driver manually keeping in sync
 (over SPI commands) the MAC link speed with the settings negotiated by the PHY.
+
+By comparison, the SJA1110 supports an MDIO slave access point over which its
+internal 100base-T1 PHYs can be accessed from the host. This is, however, not
+used by the driver, instead the internal 100base-T1 and 100base-TX PHYs are
+accessed through SPI commands, modeled in Linux as virtual MDIO buses.
+
+The microcontroller attached to the SJA1110 port 0 also has an MDIO controller
+operating in master mode, however the driver does not support this either,
+since the microcontroller gets disabled when the Linux driver operates.
+Discrete PHYs connected to the switch ports should have their MDIO interface
+attached to an MDIO controller from the host system and not to the switch,
+similar to SJA1105.
+
+Port compatibility matrix
+-------------------------
+
+The SJA1105 port compatibility matrix is:
+
+===== ============== ============== ==============
+Port   SJA1105E/T     SJA1105P/Q     SJA1105R/S
+===== ============== ============== ==============
+0      xMII           xMII           xMII
+1      xMII           xMII           xMII
+2      xMII           xMII           xMII
+3      xMII           xMII           xMII
+4      xMII           xMII           SGMII
+===== ============== ============== ==============
+
+
+The SJA1110 port compatibility matrix is:
+
+===== ============== ============== ============== ==============
+Port   SJA1110A       SJA1110B       SJA1110C       SJA1110D
+===== ============== ============== ============== ==============
+0      RevMII (uC)    RevMII (uC)    RevMII (uC)    RevMII (uC)
+1      100base-TX     100base-TX     100base-TX
+       or SGMII                                     SGMII
+2      xMII           xMII           xMII           xMII
+       or SGMII                                     or SGMII
+3      xMII           xMII           xMII
+       or SGMII       or SGMII                      SGMII
+       or 2500base-X  or 2500base-X                 or 2500base-X
+4      SGMII          SGMII          SGMII          SGMII
+       or 2500base-X  or 2500base-X  or 2500base-X  or 2500base-X
+5      100base-T1     100base-T1     100base-T1     100base-T1
+6      100base-T1     100base-T1     100base-T1     100base-T1
+7      100base-T1     100base-T1     100base-T1     100base-T1
+8      100base-T1     100base-T1     n/a            n/a
+9      100base-T1     100base-T1     n/a            n/a
+10     100base-T1     n/a            n/a            n/a
+===== ============== ============== ============== ==============

From 75e994709f8a7c34ae826731b50ec1a0c4d99404 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 24 Jun 2021 17:55:24 +0300
Subject: [PATCH 2859/3292] net: dsa: sja1105: document the SJA1110 in the
 Kconfig

Mention support for the SJA1110 in menuconfig.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/Kconfig | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig
index 8383cd6d2178..b29d41e5e1e7 100644
--- a/drivers/net/dsa/sja1105/Kconfig
+++ b/drivers/net/dsa/sja1105/Kconfig
@@ -7,8 +7,8 @@ tristate "NXP SJA1105 Ethernet switch family support"
 	select PACKING
 	select CRC32
 	help
-	  This is the driver for the NXP SJA1105 automotive Ethernet switch
-	  family. These are 5-port devices and are managed over an SPI
+	  This is the driver for the NXP SJA1105 (5-port) and SJA1110 (10-port)
+	  automotive Ethernet switch family. These are managed over an SPI
 	  interface. Probing is handled based on OF bindings and so is the
 	  linkage to PHYLINK. The driver supports the following revisions:
 	    - SJA1105E (Gen. 1, No TT-Ethernet)
@@ -17,6 +17,10 @@ tristate "NXP SJA1105 Ethernet switch family support"
 	    - SJA1105Q (Gen. 2, No SGMII, TT-Ethernet)
 	    - SJA1105R (Gen. 2, SGMII, No TT-Ethernet)
 	    - SJA1105S (Gen. 2, SGMII, TT-Ethernet)
+	    - SJA1110A (Gen. 3, SGMII, TT-Ethernet, 100base-TX PHY, 10 ports)
+	    - SJA1110B (Gen. 3, SGMII, TT-Ethernet, 100base-TX PHY, 9 ports)
+	    - SJA1110C (Gen. 3, SGMII, TT-Ethernet, 100base-TX PHY, 7 ports)
+	    - SJA1110D (Gen. 3, SGMII, TT-Ethernet, no 100base-TX PHY, 7 ports)
 
 config NET_DSA_SJA1105_PTP
 	bool "Support for the PTP clock on the NXP SJA1105 Ethernet switch"

From 0dac127c05579854405ef14480936b32371ddaed Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 24 Jun 2021 11:48:08 -0400
Subject: [PATCH 2860/3292] sctp: do black hole detection in search complete
 state

Currently the PLPMUTD probe will stop for a long period (interval * 30)
after it enters search complete state. If there's a pmtu change on the
route path, it takes a long time to be aware if the ICMP TooBig packet
is lost or filtered.

As it says in rfc8899#section-4.3:

  "A DPLPMTUD method MUST NOT rely solely on this method."
  (ICMP PTB message).

This patch is to enable the other method for search complete state:

  "A PL can use the DPLPMTUD probing mechanism to periodically
   generate probe packets of the size of the current PLPMTU."

With this patch, the probe will continue with the current pmtu every
'interval' until the PMTU_RAISE_TIMER 'timeout', which we implement
by adding raise_count to raise the probe size when it counts to 30
and removing the SCTP_PL_COMPLETE check for PMTU_RAISE_TIMER.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  3 ++-
 net/sctp/transport.c       | 11 ++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 9eaa701cda23..c4a4c1754be8 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -987,7 +987,8 @@ struct sctp_transport {
 		__u16 pmtu;
 		__u16 probe_size;
 		__u16 probe_high;
-		__u8 probe_count;
+		__u8 probe_count:3;
+		__u8 raise_count:5;
 		__u8 state;
 	} pl; /* plpmtud related */
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f27b856ea8ce..5f23804f21c7 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -213,15 +213,10 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
 
 void sctp_transport_reset_probe_timer(struct sctp_transport *transport)
 {
-	int scale = 1;
-
 	if (timer_pending(&transport->probe_timer))
 		return;
-	if (transport->pl.state == SCTP_PL_COMPLETE &&
-	    transport->pl.probe_count == 1)
-		scale = 30; /* works as PMTU_RAISE_TIMER */
 	if (!mod_timer(&transport->probe_timer,
-		       jiffies + transport->probe_interval * scale))
+		       jiffies + transport->probe_interval))
 		sctp_transport_hold(transport);
 }
 
@@ -333,13 +328,15 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
 		t->pl.probe_size += SCTP_PL_MIN_STEP;
 		if (t->pl.probe_size >= t->pl.probe_high) {
 			t->pl.probe_high = 0;
+			t->pl.raise_count = 0;
 			t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */
 
 			t->pl.probe_size = t->pl.pmtu;
 			t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
 			sctp_assoc_sync_pmtu(t->asoc);
 		}
-	} else if (t->pl.state == SCTP_PL_COMPLETE) {
+	} else if (t->pl.state == SCTP_PL_COMPLETE && ++t->pl.raise_count == 30) {
+		/* Raise probe_size again after 30 * interval in Search Complete */
 		t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
 		t->pl.probe_size += SCTP_PL_MIN_STEP;
 	}

From fea1d5b17f821b78abbdadb9cb6f28fe433b635e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 24 Jun 2021 11:48:09 -0400
Subject: [PATCH 2861/3292] sctp: send the next probe immediately once the last
 one is acked

These is no need to wait for 'interval' period for the next probe
if the last probe is already acked in search state. The 'interval'
period waiting should be only for probe failure timeout and the
current pmtu check when it's in search complete state.

This change will shorten the probe time a lot in search state, and
also fix the document accordingly.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst | 12 ++++++++----
 net/sctp/sm_statefuns.c                |  5 ++++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 8bff728b3a1e..b3fa522e4cd9 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2835,10 +2835,14 @@ encap_port - INTEGER
 	Default: 0
 
 plpmtud_probe_interval - INTEGER
-        The time interval (in milliseconds) for sending PLPMTUD probe chunks.
-        These chunks are sent at the specified interval with a variable size
-        to probe the mtu of a given path between 2 endpoints. PLPMTUD will
-        be disabled when 0 is set, and other values for it must be >= 5000.
+        The time interval (in milliseconds) for the PLPMTUD probe timer,
+        which is configured to expire after this period to receive an
+        acknowledgment to a probe packet. This is also the time interval
+        between the probes for the current pmtu when the probe search
+        is done.
+
+        PLPMTUD will be disabled when 0 is set, and other values for it
+        must be >= 5000.
 
 	Default: 0
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d29b579da904..09a8f23ec709 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1275,7 +1275,10 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
 			return SCTP_DISPOSITION_DISCARD;
 
 		sctp_transport_pl_recv(link);
-		return SCTP_DISPOSITION_CONSUME;
+		if (link->pl.state == SCTP_PL_COMPLETE)
+			return SCTP_DISPOSITION_CONSUME;
+
+		return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
 	}
 
 	max_interval = link->hbinterval + link->rto;

From 0060a4f28a9ef45ae8163c0805e944a2b1546762 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 24 Jun 2021 15:28:04 -0500
Subject: [PATCH 2862/3292] cifs: fix missing spinlock around update to
 ses->status

In the other places where we update ses->status we protect the
updates via GlobalMid_Lock. So to be consistent add the same
locking around it in cifs_put_smb_ses where it was missing.

Addresses-Coverity: 1268904 ("Data race condition")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h | 3 ++-
 fs/cifs/connect.c  | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 988346904fd0..fc6b08e5ebbc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -887,7 +887,7 @@ struct cifs_ses {
 	struct mutex session_mutex;
 	struct TCP_Server_Info *server;	/* pointer to server info */
 	int ses_count;		/* reference counter */
-	enum statusEnum status;
+	enum statusEnum status;  /* updates protected by GlobalMid_Lock */
 	unsigned overrideSecFlg;  /* if non-zero override global sec flags */
 	char *serverOS;		/* name of operating system underlying server */
 	char *serverNOS;	/* name of network operating system of server */
@@ -1785,6 +1785,7 @@ require use of the stronger protocol */
  *	list operations on pending_mid_q and oplockQ
  *      updates to XID counters, multiplex id  and SMB sequence numbers
  *      list operations on global DnotifyReqList
+ *      updates to ses->status
  *  tcp_ses_lock protects:
  *	list operations on tcp and SMB session lists
  *  tcon->open_file_lock protects the list of open files hanging off the tcon
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c8079376d294..5d269f583dac 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1598,9 +1598,12 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
 		spin_unlock(&cifs_tcp_ses_lock);
 		return;
 	}
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	spin_lock(&GlobalMid_Lock);
 	if (ses->status == CifsGood)
 		ses->status = CifsExiting;
-	spin_unlock(&cifs_tcp_ses_lock);
+	spin_unlock(&GlobalMid_Lock);
 
 	cifs_free_ipc(ses);
 

From 2e7256f12cdb16eaa2515b6231d665044a07c51a Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Thu, 24 Jun 2021 12:02:48 -0700
Subject: [PATCH 2863/3292] e1000e: Check the PCIm state

Complete to commit def4ec6dce393e ("e1000e: PCIm function state support")
Check the PCIm state only on CSME systems. There is no point to do this
check on non CSME systems.
This patch fixes a generation a false-positive warning:
"Error in exiting dmoff"

Fixes: def4ec6dce39 ("e1000e: PCIm function state support")
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 88e9035b75cf..dc0ded7e5e61 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5223,18 +5223,20 @@ static void e1000_watchdog_task(struct work_struct *work)
 			pm_runtime_resume(netdev->dev.parent);
 
 			/* Checking if MAC is in DMoff state*/
-			pcim_state = er32(STATUS);
-			while (pcim_state & E1000_STATUS_PCIM_STATE) {
-				if (tries++ == dmoff_exit_timeout) {
-					e_dbg("Error in exiting dmoff\n");
-					break;
-				}
-				usleep_range(10000, 20000);
+			if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
 				pcim_state = er32(STATUS);
+				while (pcim_state & E1000_STATUS_PCIM_STATE) {
+					if (tries++ == dmoff_exit_timeout) {
+						e_dbg("Error in exiting dmoff\n");
+						break;
+					}
+					usleep_range(10000, 20000);
+					pcim_state = er32(STATUS);
 
-				/* Checking if MAC exited DMoff state */
-				if (!(pcim_state & E1000_STATUS_PCIM_STATE))
-					e1000_phy_hw_reset(&adapter->hw);
+					/* Checking if MAC exited DMoff state */
+					if (!(pcim_state & E1000_STATUS_PCIM_STATE))
+						e1000_phy_hw_reset(&adapter->hw);
+				}
 			}
 
 			/* update snapshot of PHY registers on LSC */

From e8192476de58f044576adb2179dd1a05e3a9e903 Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 15:08:52 -0700
Subject: [PATCH 2864/3292] gve: Fix warnings reported for DQO patchset

https://patchwork.kernel.org/project/netdevbpf/list/?series=506637&state=*

- Remove unused variable
- Use correct integer type for string formatting.
- Remove `inline` in C files

Fixes: 9c1a59a2f4bc ("gve: DQO: Add ring allocation and initialization")
Fixes: a57e5de476be ("gve: DQO: Add TX path")
Signed-off-by: Bailey Forrest <bcf@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c   | 2 +-
 drivers/net/ethernet/google/gve/gve_tx_dqo.c | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 1bf446836724..ac4819c25aca 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -696,7 +696,7 @@ static int gve_destroy_rings(struct gve_priv *priv)
 	return 0;
 }
 
-static inline void gve_rx_free_rings(struct gve_priv *priv)
+static void gve_rx_free_rings(struct gve_priv *priv)
 {
 	if (gve_is_gqi(priv))
 		gve_rx_free_rings_gqi(priv);
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index a4906b9df540..05ddb6a75c38 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -261,7 +261,7 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv)
 }
 
 /* Returns the number of slots available in the ring */
-static inline u32 num_avail_tx_slots(const struct gve_tx_ring *tx)
+static u32 num_avail_tx_slots(const struct gve_tx_ring *tx)
 {
 	u32 num_used = (tx->dqo_tx.tail - tx->dqo_tx.head) & tx->mask;
 
@@ -727,9 +727,8 @@ static void remove_from_list(struct gve_tx_ring *tx,
 			     struct gve_index_list *list,
 			     struct gve_tx_pending_packet_dqo *pending_packet)
 {
-	s16 index, prev_index, next_index;
+	s16 prev_index, next_index;
 
-	index = pending_packet - tx->dqo.pending_packets;
 	prev_index = pending_packet->prev;
 	next_index = pending_packet->next;
 
@@ -890,9 +889,9 @@ static void remove_miss_completions(struct gve_priv *priv,
 		dev_kfree_skb_any(pending_packet->skb);
 		pending_packet->skb = NULL;
 		tx->dropped_pkt++;
-		net_err_ratelimited("%s: No reinjection completion was received for: %ld.\n",
+		net_err_ratelimited("%s: No reinjection completion was received for: %d.\n",
 				    priv->dev->name,
-				    (pending_packet - tx->dqo.pending_packets));
+				    (int)(pending_packet - tx->dqo.pending_packets));
 
 		pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL;
 		pending_packet->timeout_jiffies =

From 911bd1b1f08f31b74607c3e989955b97da588279 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 24 Jun 2021 22:47:24 +0300
Subject: [PATCH 2865/3292] mlxsw: core_env: Avoid unnecessary memcpy()s

Simply get a pointer to the data in the register payload instead of
copying it to a temporary buffer.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core_env.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 4a0dbdb6730b..3713c45cfa1e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -26,8 +26,8 @@ struct mlxsw_env {
 static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
 					  bool *qsfp, bool *cmis)
 {
-	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
 	char mcia_pl[MLXSW_REG_MCIA_LEN];
+	char *eeprom_tmp;
 	u8 ident;
 	int err;
 
@@ -36,7 +36,7 @@ static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
 	err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
 	if (err)
 		return err;
-	mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+	eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl);
 	ident = eeprom_tmp[0];
 	*cmis = false;
 	switch (ident) {
@@ -64,8 +64,8 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
 			      u16 offset, u16 size, void *data,
 			      bool qsfp, unsigned int *p_read_size)
 {
-	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
 	char mcia_pl[MLXSW_REG_MCIA_LEN];
+	char *eeprom_tmp;
 	u16 i2c_addr;
 	u8 page = 0;
 	int status;
@@ -116,7 +116,7 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
 	if (status)
 		return -EIO;
 
-	mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+	eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl);
 	memcpy(data, eeprom_tmp, size);
 	*p_read_size = size;
 
@@ -127,13 +127,13 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 					 int off, int *temp)
 {
 	unsigned int module_temp, module_crit, module_emerg;
-	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
 	union {
 		u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE];
 		u16 temp;
 	} temp_thresh;
 	char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	char *eeprom_tmp;
 	bool qsfp, cmis;
 	int page;
 	int err;
@@ -195,7 +195,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
 	if (err)
 		return err;
 
-	mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+	eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl);
 	memcpy(temp_thresh.buf, eeprom_tmp, MLXSW_REG_MCIA_TH_ITEM_SIZE);
 	*temp = temp_thresh.temp * 1000;
 
@@ -357,8 +357,8 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
 	device_addr = page->offset;
 
 	while (bytes_read < page->length) {
-		char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
 		char mcia_pl[MLXSW_REG_MCIA_LEN];
+		char *eeprom_tmp;
 		u8 size;
 		int err;
 
@@ -380,7 +380,7 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
 		if (err)
 			return err;
 
-		mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+		eeprom_tmp = mlxsw_reg_mcia_eeprom_data(mcia_pl);
 		memcpy(page->data + bytes_read, eeprom_tmp, size);
 		bytes_read += size;
 	}

From be7f62eebaff2f86c1467a2d33930a0a7a87675b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 24 Jun 2021 18:52:07 +0300
Subject: [PATCH 2866/3292] net: dsa: sja1105: fix NULL pointer dereference in
 sja1105_reload_cbs()

priv->cbs is an array of priv->info->num_cbs_shapers elements of type
struct sja1105_cbs_entry which only get allocated if CONFIG_NET_SCH_CBS
is enabled.

However, sja1105_reload_cbs() is called from sja1105_static_config_reload()
which in turn is called for any of the items in sja1105_reset_reasons,
therefore during the normal runtime of the driver and not just from a
code path which can be triggered by the tc-cbs offload.

The sja1105_reload_cbs() function does not contain a check whether the
priv->cbs array is NULL or not, it just assumes it isn't and proceeds to
iterate through the credit-based shaper elements. This leads to a NULL
pointer dereference.

The solution is to return success if the priv->cbs array has not been
allocated, since sja1105_reload_cbs() has nothing to do.

Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index b88d9ef45a1f..ebe4d33cda27 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1798,6 +1798,12 @@ static int sja1105_reload_cbs(struct sja1105_private *priv)
 {
 	int rc = 0, i;
 
+	/* The credit based shapers are only allocated if
+	 * CONFIG_NET_SCH_CBS is enabled.
+	 */
+	if (!priv->cbs)
+		return 0;
+
 	for (i = 0; i < priv->info->num_cbs_shapers; i++) {
 		struct sja1105_cbs_entry *cbs = &priv->cbs[i];
 

From c305b9e6d553f73b8434dd781736d180d63b1d64 Mon Sep 17 00:00:00 2001
From: zhang kai <zhangkaiheb@126.com>
Date: Thu, 24 Jun 2021 11:09:14 +0800
Subject: [PATCH 2867/3292] ipv6: delete useless dst check in
 ip6_dst_lookup_tail

parameter dst always points to null.

Signed-off-by: zhang kai <zhangkaiheb@126.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 497974b4372a..984050f35c61 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1055,13 +1055,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 	 * ip6_route_output will fail given src=any saddr, though, so
 	 * that's why we try it again later.
 	 */
-	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+	if (ipv6_addr_any(&fl6->saddr)) {
 		struct fib6_info *from;
 		struct rt6_info *rt;
-		bool had_dst = *dst != NULL;
 
-		if (!had_dst)
-			*dst = ip6_route_output(net, sk, fl6);
+		*dst = ip6_route_output(net, sk, fl6);
 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
 
 		rcu_read_lock();
@@ -1078,7 +1076,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 		 * never existed and let the SA-enabled version take
 		 * over.
 		 */
-		if (!had_dst && (*dst)->error) {
+		if ((*dst)->error) {
 			dst_release(*dst);
 			*dst = NULL;
 		}

From 19938bafa7ae8fc0a4a2c1c1430abb1a04668da1 Mon Sep 17 00:00:00 2001
From: Jian-Hong Pan <jhp@endlessos.org>
Date: Thu, 24 Jun 2021 11:22:41 +0800
Subject: [PATCH 2868/3292] net: bcmgenet: Add mdio-bcm-unimac soft dependency

The Broadcom UniMAC MDIO bus from mdio-bcm-unimac module comes too late.
So, GENET cannot find the ethernet PHY on UniMAC MDIO bus. This leads
GENET fail to attach the PHY as following log:

bcmgenet fd580000.ethernet: GENET 5.0 EPHY: 0x0000
...
could not attach to PHY
bcmgenet fd580000.ethernet eth0: failed to connect to PHY
uart-pl011 fe201000.serial: no DMA platform data
libphy: bcmgenet MII bus: probed
...
unimac-mdio unimac-mdio.-19: Broadcom UniMAC MDIO bus

It is not just coming too late, there is also no way for the module
loader to figure out the dependency between GENET and its MDIO bus
driver unless we provide this MODULE_SOFTDEP hint.

This patch adds the soft dependency to load mdio-bcm-unimac module
before genet module to fix this issue.

Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=213485
Fixes: 9a4e79697009 ("net: bcmgenet: utilize generic Broadcom UniMAC MDIO controller driver")
Signed-off-by: Jian-Hong Pan <jhp@endlessos.org>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index fcca023f22e5..41f7f078cd27 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4296,3 +4296,4 @@ MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Broadcom GENET Ethernet controller driver");
 MODULE_ALIAS("platform:bcmgenet");
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: mdio-bcm-unimac");

From a921c655f2033dd1ce1379128efe881dda23ea37 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 23 Jun 2021 11:36:33 +0200
Subject: [PATCH 2869/3292] bfq: Remove merged request already in
 bfq_requests_merged()

Currently, bfq does very little in bfq_requests_merged() and handles all
the request cleanup in bfq_finish_requeue_request() called from
blk_mq_free_request(). That is currently safe only because
blk_mq_free_request() is called shortly after bfq_requests_merged()
while bfqd->lock is still held. However to fix a lock inversion between
bfqd->lock and ioc->lock, we need to call blk_mq_free_request() after
dropping bfqd->lock. That would mean that already merged request could
be seen by other processes inside bfq queues and possibly dispatched to
the device which is wrong. So move cleanup of the request from
bfq_finish_requeue_request() to bfq_requests_merged().

Acked-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210623093634.27879-2-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 41 +++++++++++++----------------------------
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index fedb0a8fd388..9433d38e486c 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2433,7 +2433,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
 		*next_bfqq = bfq_init_rq(next);
 
 	if (!bfqq)
-		return;
+		goto remove;
 
 	/*
 	 * If next and rq belong to the same bfq_queue and next is older
@@ -2456,6 +2456,14 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
 		bfqq->next_rq = rq;
 
 	bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+remove:
+	/* Merged request may be in the IO scheduler. Remove it. */
+	if (!RB_EMPTY_NODE(&next->rb_node)) {
+		bfq_remove_request(next->q, next);
+		if (next_bfqq)
+			bfqg_stats_update_io_remove(bfqq_group(next_bfqq),
+						    next->cmd_flags);
+	}
 }
 
 /* Must be called with bfqq != NULL */
@@ -6414,6 +6422,7 @@ static void bfq_finish_requeue_request(struct request *rq)
 {
 	struct bfq_queue *bfqq = RQ_BFQQ(rq);
 	struct bfq_data *bfqd;
+	unsigned long flags;
 
 	/*
 	 * rq either is not associated with any icq, or is an already
@@ -6431,39 +6440,15 @@ static void bfq_finish_requeue_request(struct request *rq)
 					     rq->io_start_time_ns,
 					     rq->cmd_flags);
 
+	spin_lock_irqsave(&bfqd->lock, flags);
 	if (likely(rq->rq_flags & RQF_STARTED)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&bfqd->lock, flags);
-
 		if (rq == bfqd->waited_rq)
 			bfq_update_inject_limit(bfqd, bfqq);
 
 		bfq_completed_request(bfqq, bfqd);
-		bfq_finish_requeue_request_body(bfqq);
-
-		spin_unlock_irqrestore(&bfqd->lock, flags);
-	} else {
-		/*
-		 * Request rq may be still/already in the scheduler,
-		 * in which case we need to remove it (this should
-		 * never happen in case of requeue). And we cannot
-		 * defer such a check and removal, to avoid
-		 * inconsistencies in the time interval from the end
-		 * of this function to the start of the deferred work.
-		 * This situation seems to occur only in process
-		 * context, as a consequence of a merge. In the
-		 * current version of the code, this implies that the
-		 * lock is held.
-		 */
-
-		if (!RB_EMPTY_NODE(&rq->rb_node)) {
-			bfq_remove_request(rq->q, rq);
-			bfqg_stats_update_io_remove(bfqq_group(bfqq),
-						    rq->cmd_flags);
-		}
-		bfq_finish_requeue_request_body(bfqq);
 	}
+	bfq_finish_requeue_request_body(bfqq);
+	spin_unlock_irqrestore(&bfqd->lock, flags);
 
 	/*
 	 * Reset private fields. In case of a requeue, this allows

From fd2ef39cc9a6b9c4c41864ac506906c52f94b06a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 23 Jun 2021 11:36:34 +0200
Subject: [PATCH 2870/3292] blk: Fix lock inversion between ioc lock and bfqd
 lock

Lockdep complains about lock inversion between ioc->lock and bfqd->lock:

bfqd -> ioc:
 put_io_context+0x33/0x90 -> ioc->lock grabbed
 blk_mq_free_request+0x51/0x140
 blk_put_request+0xe/0x10
 blk_attempt_req_merge+0x1d/0x30
 elv_attempt_insert_merge+0x56/0xa0
 blk_mq_sched_try_insert_merge+0x4b/0x60
 bfq_insert_requests+0x9e/0x18c0 -> bfqd->lock grabbed
 blk_mq_sched_insert_requests+0xd6/0x2b0
 blk_mq_flush_plug_list+0x154/0x280
 blk_finish_plug+0x40/0x60
 ext4_writepages+0x696/0x1320
 do_writepages+0x1c/0x80
 __filemap_fdatawrite_range+0xd7/0x120
 sync_file_range+0xac/0xf0

ioc->bfqd:
 bfq_exit_icq+0xa3/0xe0 -> bfqd->lock grabbed
 put_io_context_active+0x78/0xb0 -> ioc->lock grabbed
 exit_io_context+0x48/0x50
 do_exit+0x7e9/0xdd0
 do_group_exit+0x54/0xc0

To avoid this inversion we change blk_mq_sched_try_insert_merge() to not
free the merged request but rather leave that upto the caller similarly
to blk_mq_sched_try_merge(). And in bfq_insert_requests() we make sure
to free all the merged requests after dropping bfqd->lock.

Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler")
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210623093634.27879-3-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      |  6 ++++--
 block/blk-merge.c        | 19 ++++++++-----------
 block/blk-mq-sched.c     |  5 +++--
 block/blk-mq-sched.h     |  3 ++-
 block/blk-mq.h           | 11 +++++++++++
 block/blk.h              |  2 +-
 block/elevator.c         | 11 ++++++++---
 block/mq-deadline-main.c |  5 ++++-
 include/linux/elevator.h |  3 ++-
 9 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 9433d38e486c..727955918563 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2345,9 +2345,9 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
 
 	ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
 
+	spin_unlock_irq(&bfqd->lock);
 	if (free)
 		blk_mq_free_request(free);
-	spin_unlock_irq(&bfqd->lock);
 
 	return ret;
 }
@@ -5969,14 +5969,16 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	struct bfq_queue *bfqq;
 	bool idle_timer_disabled = false;
 	unsigned int cmd_flags;
+	LIST_HEAD(free);
 
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
 		bfqg_stats_update_legacy_io(q, rq);
 #endif
 	spin_lock_irq(&bfqd->lock);
-	if (blk_mq_sched_try_insert_merge(q, rq)) {
+	if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
 		spin_unlock_irq(&bfqd->lock);
+		blk_mq_free_requests(&free);
 		return;
 	}
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4d97fb6dd226..1398b52a24b4 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -846,18 +846,15 @@ static struct request *attempt_front_merge(struct request_queue *q,
 	return NULL;
 }
 
-int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
-			  struct request *next)
+/*
+ * Try to merge 'next' into 'rq'. Return true if the merge happened, false
+ * otherwise. The caller is responsible for freeing 'next' if the merge
+ * happened.
+ */
+bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+			   struct request *next)
 {
-	struct request *free;
-
-	free = attempt_merge(q, rq, next);
-	if (free) {
-		blk_put_request(free);
-		return 1;
-	}
-
-	return 0;
+	return attempt_merge(q, rq, next);
 }
 
 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 2403a5c2b053..c838d81ac058 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -399,9 +399,10 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 	return ret;
 }
 
-bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
+bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
+				   struct list_head *free)
 {
-	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
+	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free);
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
 
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index aff037cfd8e7..5246ae040704 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -13,7 +13,8 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs, struct request **merged_request);
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs);
-bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
+bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
+				   struct list_head *free);
 void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx);
 void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 4b1ca7b7bbeb..d08779f77a26 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -302,6 +302,17 @@ static inline struct blk_plug *blk_mq_plug(struct request_queue *q,
 	return NULL;
 }
 
+/* Free all requests on the list */
+static inline void blk_mq_free_requests(struct list_head *list)
+{
+	while (!list_empty(list)) {
+		struct request *rq = list_entry_rq(list->next);
+
+		list_del_init(&rq->queuelist);
+		blk_mq_free_request(rq);
+	}
+}
+
 /*
  * For shared tag users, we track the number of currently active users
  * and attempt to provide a fair share of the tag depth for each of them.
diff --git a/block/blk.h b/block/blk.h
index 4fcd7a032377..4b885c0f6708 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -224,7 +224,7 @@ ssize_t part_timeout_store(struct device *, struct device_attribute *,
 void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
 int ll_back_merge_fn(struct request *req, struct bio *bio,
 		unsigned int nr_segs);
-int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 				struct request *next);
 unsigned int blk_recalc_rq_segments(struct request *rq);
 void blk_rq_set_mixed_merge(struct request *rq);
diff --git a/block/elevator.c b/block/elevator.c
index 85d0d4adbb64..52ada14cfe45 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -350,9 +350,11 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req,
  * we can append 'rq' to an existing request, so we can throw 'rq' away
  * afterwards.
  *
- * Returns true if we merged, false otherwise
+ * Returns true if we merged, false otherwise. 'free' will contain all
+ * requests that need to be freed.
  */
-bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
+bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq,
+			      struct list_head *free)
 {
 	struct request *__rq;
 	bool ret;
@@ -363,8 +365,10 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
 	/*
 	 * First try one-hit cache.
 	 */
-	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
+	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) {
+		list_add(&rq->queuelist, free);
 		return true;
+	}
 
 	if (blk_queue_noxmerges(q))
 		return false;
@@ -378,6 +382,7 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
 		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
 			break;
 
+		list_add(&rq->queuelist, free);
 		/* The merged request could be merged with others, try again */
 		ret = true;
 		rq = __rq;
diff --git a/block/mq-deadline-main.c b/block/mq-deadline-main.c
index 4815e536091f..9db6da9ef4c6 100644
--- a/block/mq-deadline-main.c
+++ b/block/mq-deadline-main.c
@@ -719,6 +719,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	struct dd_per_prio *per_prio;
 	enum dd_prio prio;
 	struct dd_blkcg *blkcg;
+	LIST_HEAD(free);
 
 	lockdep_assert_held(&dd->lock);
 
@@ -742,8 +743,10 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	WARN_ON_ONCE(rq->elv.priv[0]);
 	rq->elv.priv[0] = blkcg;
 
-	if (blk_mq_sched_try_insert_merge(q, rq))
+	if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
+		blk_mq_free_requests(&free);
 		return;
+	}
 
 	trace_block_rq_insert(rq);
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 783ecb3cb77a..ef9ceead3db1 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -117,7 +117,8 @@ extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *,
 		enum elv_merge);
-extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
+extern bool elv_attempt_insert_merge(struct request_queue *, struct request *,
+				     struct list_head *);
 extern struct request *elv_former_request(struct request_queue *, struct request *);
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 void elevator_init_mq(struct request_queue *q);

From a196fa78a26571359740f701cf30d774eb8a72cb Mon Sep 17 00:00:00 2001
From: Gary Lin <glin@suse.com>
Date: Wed, 23 Jun 2021 12:09:18 +0800
Subject: [PATCH 2871/3292] bpfilter: Specify the log level for the kmsg
 message

Per the kmsg document [0], if we don't specify the log level with a
prefix "<N>" in the message string, the default log level will be
applied to the message. Since the default level could be warning(4),
this would make the log utility such as journalctl treat the message,
"Started bpfilter", as a warning. To avoid confusion, this commit
adds the prefix "<5>" to make the message always a notice.

  [0] https://www.kernel.org/doc/Documentation/ABI/testing/dev-kmsg

Fixes: 36c4357c63f3 ("net: bpfilter: print umh messages to /dev/kmsg")
Reported-by: Martin Loviska <mloviska@suse.com>
Signed-off-by: Gary Lin <glin@suse.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Link: https://lore.kernel.org/bpf/20210623040918.8683-1-glin@suse.com
---
 net/bpfilter/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index 05e1cfc1e5cd..291a92546246 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c
@@ -57,7 +57,7 @@ int main(void)
 {
 	debug_f = fopen("/dev/kmsg", "w");
 	setvbuf(debug_f, 0, _IOLBF, 0);
-	fprintf(debug_f, "Started bpfilter\n");
+	fprintf(debug_f, "<5>Started bpfilter\n");
 	loop();
 	fclose(debug_f);
 	return 0;

From 24e166f43e93de0e9b0a460ecfe4bab1f12212d7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 29 May 2021 17:14:21 +0200
Subject: [PATCH 2872/3292] HID: core: Add hid_hw_may_wakeup() function

Add a hid_hw_may_wakeup() function, which is the equivalent of
device_may_wakeup() for hid devices.

In most cases this just returns device_may_wakeup(hdev->dev.parent), but for
some ll-drivers this is not correct. E.g. usb_hid_driver instantiated hid
devices have their parent set to the usb-interface to which the usb_hid_driver
is bound, but the power/wakeup* sysfs attributes are part of the usb-device,
which is the usb-interface's parent.

For these special cases a new may_wakeup callback is added to
hid_ll_driver, so that ll-drivers can override the default behavior.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 10e922cee4eb..51a4dad3565e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -800,6 +800,7 @@ struct hid_driver {
  * @raw_request: send raw report request to device (e.g. feature report)
  * @output_report: send output report to device
  * @idle: send idle request to device
+ * @may_wakeup: return if device may act as a wakeup source during system-suspend
  */
 struct hid_ll_driver {
 	int (*start)(struct hid_device *hdev);
@@ -824,6 +825,7 @@ struct hid_ll_driver {
 	int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len);
 
 	int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
+	bool (*may_wakeup)(struct hid_device *hdev);
 };
 
 extern struct hid_ll_driver i2c_hid_ll_driver;
@@ -1149,6 +1151,22 @@ static inline int hid_hw_idle(struct hid_device *hdev, int report, int idle,
 	return 0;
 }
 
+/**
+ * hid_may_wakeup - return if the hid device may act as a wakeup source during system-suspend
+ *
+ * @hdev: hid device
+ */
+static inline bool hid_hw_may_wakeup(struct hid_device *hdev)
+{
+	if (hdev->ll_driver->may_wakeup)
+		return hdev->ll_driver->may_wakeup(hdev);
+
+	if (hdev->dev.parent)
+		return device_may_wakeup(hdev->dev.parent);
+
+	return false;
+}
+
 /**
  * hid_hw_wait - wait for buffered io to complete
  *

From 978e786c5e9bf538da0a4807539608a52d9be16b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 29 May 2021 17:14:22 +0200
Subject: [PATCH 2873/3292] HID: usbhid: Implement may_wakeup ll-driver
 callback

Without a ll-driver callback hid_hw_may_wakeup() will return:
device_may_wakeup(hdev->dev.parent), usb_hid_driver instantiated hid
devices have their parent set to the usb-interface to which the
usb_hid_driver is bound, but the power/wakeup* sysfs attributes are
part of the usb-device.

Add a may_wakeup ll-driver callback which calls device_may_wakeup()
on the usb-device instead.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-core.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 4e9077363c96..06130dc431a0 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1304,6 +1304,13 @@ static int usbhid_idle(struct hid_device *hid, int report, int idle,
 	return hid_set_idle(dev, ifnum, report, idle);
 }
 
+static bool usbhid_may_wakeup(struct hid_device *hid)
+{
+	struct usb_device *dev = hid_to_usb_dev(hid);
+
+	return device_may_wakeup(&dev->dev);
+}
+
 struct hid_ll_driver usb_hid_driver = {
 	.parse = usbhid_parse,
 	.start = usbhid_start,
@@ -1316,6 +1323,7 @@ struct hid_ll_driver usb_hid_driver = {
 	.raw_request = usbhid_raw_request,
 	.output_report = usbhid_output_report,
 	.idle = usbhid_idle,
+	.may_wakeup = usbhid_may_wakeup,
 };
 EXPORT_SYMBOL_GPL(usb_hid_driver);
 

From 622d97cf7f2b4efb36bec3c85b5c1db5e3dfd586 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 29 May 2021 17:14:23 +0200
Subject: [PATCH 2874/3292] HID: logitech-dj: Implement may_wakeup ll-driver
 callback

Without a ll-driver callback hid_hw_may_wakeup() will return:
device_may_wakeup(hdev->dev.parent), but for the hid devices
instantiated by logitech-dj for devices behind the receiver the
logitech-dj hid(pp)-device is the parent.

Add a logi_dj_ll_may_wakeup() callback which calls hid_hw_may_wakeup()
on the logitech-dj hid(pp) parent-hid-device.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-logitech-dj.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index fa835d565982..f96349faa5f4 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -1489,6 +1489,13 @@ static void logi_dj_ll_stop(struct hid_device *hid)
 	dbg_hid("%s\n", __func__);
 }
 
+static bool logi_dj_ll_may_wakeup(struct hid_device *hid)
+{
+	struct dj_device *djdev = hid->driver_data;
+	struct dj_receiver_dev *djrcv_dev = djdev->dj_receiver_dev;
+
+	return hid_hw_may_wakeup(djrcv_dev->hidpp);
+}
 
 static struct hid_ll_driver logi_dj_ll_driver = {
 	.parse = logi_dj_ll_parse,
@@ -1497,6 +1504,7 @@ static struct hid_ll_driver logi_dj_ll_driver = {
 	.open = logi_dj_ll_open,
 	.close = logi_dj_ll_close,
 	.raw_request = logi_dj_ll_raw_request,
+	.may_wakeup = logi_dj_ll_may_wakeup,
 };
 
 static int logi_dj_dj_event(struct hid_device *hdev,

From 498d0ddc6ae931e4e79a57c56b6dd4576aa435b6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 29 May 2021 17:14:24 +0200
Subject: [PATCH 2875/3292] HID: multitouch: Disable event reporting on suspend
 when the device is not a wakeup-source

Disable event reporting on suspend when the hid device is not
a wakeup-source. This should help save some extra power in this case.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-multitouch.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 2e4fb76c45f3..3ea7cb1cda84 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1768,7 +1768,8 @@ static int mt_suspend(struct hid_device *hdev, pm_message_t state)
 	struct mt_device *td = hid_get_drvdata(hdev);
 
 	/* High latency is desirable for power savings during S3/S0ix */
-	if (td->mtclass.quirks & MT_QUIRK_DISABLE_WAKEUP)
+	if ((td->mtclass.quirks & MT_QUIRK_DISABLE_WAKEUP) ||
+	    !hid_hw_may_wakeup(hdev))
 		mt_set_modes(hdev, HID_LATENCY_HIGH, false, false);
 	else
 		mt_set_modes(hdev, HID_LATENCY_HIGH, true, true);

From 3b770932eefb7c0c6319d332023efee87eb12913 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 11 May 2021 19:30:33 +0800
Subject: [PATCH 2876/3292] HID: thrustmaster: Switch to kmemdup() when
 allocate change_request

Use kmemdup() helper instead of open-coding to
simplify the code when allocate change_request.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-thrustmaster.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index f643b1cb112d..cdc7d82ae9ed 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -311,12 +311,13 @@ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_i
 		goto error4;
 	}
 
-	tm_wheel->change_request = kzalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
+	tm_wheel->change_request = kmemdup(&change_request,
+					   sizeof(struct usb_ctrlrequest),
+					   GFP_KERNEL);
 	if (!tm_wheel->change_request) {
 		ret = -ENOMEM;
 		goto error5;
 	}
-	memcpy(tm_wheel->change_request, &change_request, sizeof(struct usb_ctrlrequest));
 
 	tm_wheel->usb_dev = interface_to_usbdev(to_usb_interface(hdev->dev.parent));
 	hid_set_drvdata(hdev, tm_wheel);

From 3089cf6d3caa1eb344aac05c875f4aeaf891552d Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue, 8 Jun 2021 16:35:17 -0700
Subject: [PATCH 2877/3292] ice: add tracepoints

This patch is modeled after one by Scott Peterson for i40e.

Add tracepoints to the driver, via a new file ice_trace.h and some new
trace calls added in interesting places in the driver. Add some tracing
for DIMLIB to help debug interrupt moderation problems.

Performance should not be affected, and this can be very useful
for debugging and adding new trace events to paths in the future.

Note eBPF programs can attach to these events, as well as perf
can count them since we're attaching to the events subsystem
in the kernel.

Co-developed-by: Ben Shelton <benjamin.h.shelton@intel.com>
Signed-off-by: Ben Shelton <benjamin.h.shelton@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c  |   8 +
 drivers/net/ethernet/intel/ice/ice_trace.h | 232 +++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_txrx.c  |   9 +
 3 files changed, 249 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_trace.h

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5c3ea504770a..b72ab9e97e79 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -13,6 +13,12 @@
 #include "ice_dcb_lib.h"
 #include "ice_dcb_nl.h"
 #include "ice_devlink.h"
+/* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
+ * ice tracepoint functions. This must be done exactly once across the
+ * ice driver.
+ */
+#define CREATE_TRACE_POINTS
+#include "ice_trace.h"
 
 #define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
 static const char ice_driver_string[] = DRV_SUMMARY;
@@ -5477,6 +5483,7 @@ static void ice_tx_dim_work(struct work_struct *work)
 	itr = tx_profile[dim->profile_ix].itr;
 	intrl = tx_profile[dim->profile_ix].intrl;
 
+	ice_trace(tx_dim_work, q_vector, dim);
 	ice_write_itr(rc, itr);
 	ice_write_intrl(q_vector, intrl);
 
@@ -5501,6 +5508,7 @@ static void ice_rx_dim_work(struct work_struct *work)
 	itr = rx_profile[dim->profile_ix].itr;
 	intrl = rx_profile[dim->profile_ix].intrl;
 
+	ice_trace(rx_dim_work, q_vector, dim);
 	ice_write_itr(rc, itr);
 	ice_write_intrl(q_vector, intrl);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h
new file mode 100644
index 000000000000..9bc0b8fdfc77
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_trace.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021 Intel Corporation. */
+
+/* Modeled on trace-events-sample.h */
+
+/* The trace subsystem name for ice will be "ice".
+ *
+ * This file is named ice_trace.h.
+ *
+ * Since this include file's name is different from the trace
+ * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end
+ * of this file.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ice
+
+/* See trace-events-sample.h for a detailed description of why this
+ * guard clause is different from most normal include files.
+ */
+#if !defined(_ICE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _ICE_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+/* ice_trace() macro enables shared code to refer to trace points
+ * like:
+ *
+ * trace_ice_example(args...)
+ *
+ * ... as:
+ *
+ * ice_trace(example, args...)
+ *
+ * ... to resolve to the PF version of the tracepoint without
+ * ifdefs, and to allow tracepoints to be disabled entirely at build
+ * time.
+ *
+ * Trace point should always be referred to in the driver via this
+ * macro.
+ *
+ * Similarly, ice_trace_enabled(trace_name) wraps references to
+ * trace_ice_<trace_name>_enabled() functions.
+ * @trace_name: name of tracepoint
+ */
+#define _ICE_TRACE_NAME(trace_name) (trace_##ice##_##trace_name)
+#define ICE_TRACE_NAME(trace_name) _ICE_TRACE_NAME(trace_name)
+
+#define ice_trace(trace_name, args...) ICE_TRACE_NAME(trace_name)(args)
+
+#define ice_trace_enabled(trace_name) ICE_TRACE_NAME(trace_name##_enabled)()
+
+/* This is for events common to PF. Corresponding versions will be named
+ * trace_ice_*. The ice_trace() macro above will select the right trace point
+ * name for the driver.
+ */
+
+/* Begin tracepoints */
+
+/* Global tracepoints */
+
+/* Events related to DIM, q_vectors and ring containers */
+DECLARE_EVENT_CLASS(ice_rx_dim_template,
+		    TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+		    TP_ARGS(q_vector, dim),
+		    TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector)
+				     __field(struct dim *, dim)
+				     __string(devname, q_vector->rx.ring->netdev->name)),
+
+		    TP_fast_assign(__entry->q_vector = q_vector;
+				   __entry->dim = dim;
+				   __assign_str(devname, q_vector->rx.ring->netdev->name);),
+
+		    TP_printk("netdev: %s Rx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d",
+			      __get_str(devname),
+			      __entry->q_vector->rx.ring->q_index,
+			      __entry->dim->state,
+			      __entry->dim->profile_ix,
+			      __entry->dim->tune_state,
+			      __entry->dim->steps_right,
+			      __entry->dim->steps_left,
+			      __entry->dim->tired)
+);
+
+DEFINE_EVENT(ice_rx_dim_template, ice_rx_dim_work,
+	     TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+	     TP_ARGS(q_vector, dim)
+);
+
+DECLARE_EVENT_CLASS(ice_tx_dim_template,
+		    TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+		    TP_ARGS(q_vector, dim),
+		    TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector)
+				     __field(struct dim *, dim)
+				     __string(devname, q_vector->tx.ring->netdev->name)),
+
+		    TP_fast_assign(__entry->q_vector = q_vector;
+				   __entry->dim = dim;
+				   __assign_str(devname, q_vector->tx.ring->netdev->name);),
+
+		    TP_printk("netdev: %s Tx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d",
+			      __get_str(devname),
+			      __entry->q_vector->tx.ring->q_index,
+			      __entry->dim->state,
+			      __entry->dim->profile_ix,
+			      __entry->dim->tune_state,
+			      __entry->dim->steps_right,
+			      __entry->dim->steps_left,
+			      __entry->dim->tired)
+);
+
+DEFINE_EVENT(ice_tx_dim_template, ice_tx_dim_work,
+	     TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+	     TP_ARGS(q_vector, dim)
+);
+
+/* Events related to a vsi & ring */
+DECLARE_EVENT_CLASS(ice_tx_template,
+		    TP_PROTO(struct ice_ring *ring, struct ice_tx_desc *desc,
+			     struct ice_tx_buf *buf),
+
+		    TP_ARGS(ring, desc, buf),
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, desc)
+				     __field(void *, buf)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->desc = desc;
+				   __entry->buf = buf;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s ring: %pK desc: %pK buf %pK", __get_str(devname),
+			      __entry->ring, __entry->desc, __entry->buf)
+);
+
+#define DEFINE_TX_TEMPLATE_OP_EVENT(name) \
+DEFINE_EVENT(ice_tx_template, name, \
+	     TP_PROTO(struct ice_ring *ring, \
+		      struct ice_tx_desc *desc, \
+		      struct ice_tx_buf *buf), \
+	     TP_ARGS(ring, desc, buf))
+
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq);
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap);
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap_eop);
+
+DECLARE_EVENT_CLASS(ice_rx_template,
+		    TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc),
+
+		    TP_ARGS(ring, desc),
+
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, desc)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->desc = desc;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s ring: %pK desc: %pK", __get_str(devname),
+			      __entry->ring, __entry->desc)
+);
+DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq,
+	     TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc),
+	     TP_ARGS(ring, desc)
+);
+
+DECLARE_EVENT_CLASS(ice_rx_indicate_template,
+		    TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc,
+			     struct sk_buff *skb),
+
+		    TP_ARGS(ring, desc, skb),
+
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, desc)
+				     __field(void *, skb)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->desc = desc;
+				   __entry->skb = skb;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s ring: %pK desc: %pK skb %pK", __get_str(devname),
+			      __entry->ring, __entry->desc, __entry->skb)
+);
+
+DEFINE_EVENT(ice_rx_indicate_template, ice_clean_rx_irq_indicate,
+	     TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc,
+		      struct sk_buff *skb),
+	     TP_ARGS(ring, desc, skb)
+);
+
+DECLARE_EVENT_CLASS(ice_xmit_template,
+		    TP_PROTO(struct ice_ring *ring, struct sk_buff *skb),
+
+		    TP_ARGS(ring, skb),
+
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, skb)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->skb = skb;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s skb: %pK ring: %pK", __get_str(devname),
+			      __entry->skb, __entry->ring)
+);
+
+#define DEFINE_XMIT_TEMPLATE_OP_EVENT(name) \
+DEFINE_EVENT(ice_xmit_template, name, \
+	     TP_PROTO(struct ice_ring *ring, struct sk_buff *skb), \
+	     TP_ARGS(ring, skb))
+
+DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring);
+DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring_drop);
+
+/* End tracepoints */
+
+#endif /* _ICE_TRACE_H_ */
+/* This must be outside ifdef _ICE_TRACE_H */
+
+/* This trace include file is not located in the .../include/trace
+ * with the kernel tracepoint definitions, because we're a loadable
+ * module.
+ */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE ../../drivers/net/ethernet/intel/ice/ice_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index e9e9edb32c6f..a63d5916ebb0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -10,6 +10,7 @@
 #include "ice_txrx_lib.h"
 #include "ice_lib.h"
 #include "ice.h"
+#include "ice_trace.h"
 #include "ice_dcb_lib.h"
 #include "ice_xsk.h"
 
@@ -224,6 +225,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 
 		smp_rmb();	/* prevent any other reads prior to eop_desc */
 
+		ice_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
 		/* if the descriptor isn't done, no work yet to do */
 		if (!(eop_desc->cmd_type_offset_bsz &
 		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
@@ -254,6 +256,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 
 		/* unmap remaining buffers */
 		while (tx_desc != eop_desc) {
+			ice_trace(clean_tx_irq_unmap, tx_ring, tx_desc, tx_buf);
 			tx_buf++;
 			tx_desc++;
 			i++;
@@ -272,6 +275,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 				dma_unmap_len_set(tx_buf, len, 0);
 			}
 		}
+		ice_trace(clean_tx_irq_unmap_eop, tx_ring, tx_desc, tx_buf);
 
 		/* move us one more past the eop_desc for start of next pkt */
 		tx_buf++;
@@ -1102,6 +1106,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		 */
 		dma_rmb();
 
+		ice_trace(clean_rx_irq, rx_ring, rx_desc);
 		if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
 			struct ice_vsi *ctrl_vsi = rx_ring->vsi;
 
@@ -1207,6 +1212,7 @@ construct_skb:
 
 		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
+		ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
 		/* send completed skb up the stack */
 		ice_receive_skb(rx_ring, skb, vlan_tag);
 		skb = NULL;
@@ -2188,6 +2194,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 	unsigned int count;
 	int tso, csum;
 
+	ice_trace(xmit_frame_ring, tx_ring, skb);
+
 	count = ice_xmit_desc_count(skb);
 	if (ice_chk_linearize(skb, count)) {
 		if (__skb_linearize(skb))
@@ -2262,6 +2270,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 	return NETDEV_TX_OK;
 
 out_drop:
+	ice_trace(xmit_frame_ring_drop, tx_ring, skb);
 	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }

From cb9516be7708a2a18ec0a19fe3a225b5b3bc92c7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 25 Jun 2021 10:02:48 +0800
Subject: [PATCH 2878/3292] blk-mq: update hctx->dispatch_busy in case of real
 scheduler

Commit 6e6fcbc27e77 ("blk-mq: support batching dispatch in case of io")
starts to support io batching submission by using hctx->dispatch_busy.

However, blk_mq_update_dispatch_busy() isn't changed to update hctx->dispatch_busy
in that commit, so fix the issue by updating hctx->dispatch_busy in case
of real scheduler.

Reported-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Fixes: 6e6fcbc27e77 ("blk-mq: support batching dispatch in case of io")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210625020248.1630497-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3115ea2d0990..c2f3550337f7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1224,9 +1224,6 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
 {
 	unsigned int ewma;
 
-	if (hctx->queue->elevator)
-		return;
-
 	ewma = hctx->dispatch_busy;
 
 	if (!ewma && !busy)

From ff70202b2d1ad522275c6aadc8c53519b6a22c57 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 24 Jun 2021 10:05:05 +0200
Subject: [PATCH 2879/3292] dev_forward_skb: do not scrub skb mark within the
 same name space

The goal is to keep the mark during a bpf_redirect(), like it is done for
legacy encapsulation / decapsulation, when there is no x-netns.
This was initially done in commit 213dd74aee76 ("skbuff: Do not scrub skb
mark within the same name space").

When the call to skb_scrub_packet() was added in dev_forward_skb() (commit
8b27f27797ca ("skb: allow skb_scrub_packet() to be used by tunnels")), the
second argument (xnet) was set to true to force a call to skb_orphan(). At
this time, the mark was always cleanned up by skb_scrub_packet(), whatever
xnet value was.
This call to skb_orphan() was removed later in commit
9c4c325252c5 ("skbuff: preserve sock reference when scrubbing the skb.").
But this 'true' stayed here without any real reason.

Let's correctly set xnet in ____dev_forward_skb(), this function has access
to the previous interface and to the new interface.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5cbc950b34df..5ab2d1917ca1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4114,7 +4114,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
 		return NET_RX_DROP;
 	}
 
-	skb_scrub_packet(skb, true);
+	skb_scrub_packet(skb, !net_eq(dev_net(dev), dev_net(skb->dev)));
 	skb->priority = 0;
 	return 0;
 }

From f92e1869d74e1acc6551256eb084a1c14a054e19 Mon Sep 17 00:00:00 2001
From: David Thompson <davthompson@nvidia.com>
Date: Thu, 24 Jun 2021 21:11:46 -0400
Subject: [PATCH 2880/3292] Add Mellanox BlueField Gigabit Ethernet driver

This patch adds build and driver logic for the "mlxbf_gige"
Ethernet driver from Mellanox Technologies. The second
generation BlueField SoC from Mellanox supports an
out-of-band GigaBit Ethernet management port to the Arm
subsystem.  This driver supports TCP/IP network connectivity
for that port, and provides back-end routines to handle
basic ethtool requests.

The driver interfaces to the Gigabit Ethernet block of
BlueField SoC via MMIO accesses to registers, which contain
control information or pointers describing transmit and
receive resources.  There is a single transmit queue, and
the port supports transmit ring sizes of 4 to 256 entries.
There is a single receive queue, and the port supports
receive ring sizes of 32 to 32K entries. The transmit and
receive rings are allocated from DMA coherent memory. There
is a 16-bit producer and consumer index per ring to denote
software ownership and hardware ownership, respectively.

The main driver logic such as probe(), remove(), and netdev
ops are in "mlxbf_gige_main.c".  Logic in "mlxbf_gige_rx.c"
and "mlxbf_gige_tx.c" handles the packet processing for
receive and transmit respectively.

The logic in "mlxbf_gige_ethtool.c" supports the handling
of some basic ethtool requests: get driver info, get ring
parameters, get registers, and get statistics.

The logic in "mlxbf_gige_mdio.c" is the driver controlling
the Mellanox BlueField hardware that interacts with a PHY
device via MDIO/MDC pins.  This driver does the following:
  - At driver probe time, it configures several BlueField MDIO
    parameters such as sample rate, full drive, voltage and MDC
  - It defines functions to read and write MDIO registers and
    registers the MDIO bus.
  - It defines the phy interrupt handler reporting a
    link up/down status change
  - This driver's probe is invoked from the main driver logic
    while the phy interrupt handler is registered in ndo_open.

Driver limitations
  - Only supports 1Gbps speed
  - Only supports GMII protocol
  - Supports maximum packet size of 2KB
  - Does not support scatter-gather buffering

Testing
  - Successful build of kernel for ARM64, ARM32, X86_64
  - Tested ARM64 build on FastModels & Palladium
  - Tested ARM64 build on several Mellanox boards that are built with
    the BlueField-2 SoC.  The testing includes coverage in the areas
    of networking (e.g. ping, iperf, ifconfig, route), file transfers
    (e.g. SCP), and various ethtool options relevant to this driver.

Signed-off-by: David Thompson <davthompson@nvidia.com>
Signed-off-by: Asmaa Mnebhi <asmaa@nvidia.com>
Reviewed-by: Liming Sun <limings@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/Kconfig         |   1 +
 drivers/net/ethernet/mellanox/Makefile        |   1 +
 .../net/ethernet/mellanox/mlxbf_gige/Kconfig  |  13 +
 .../net/ethernet/mellanox/mlxbf_gige/Makefile |  11 +
 .../ethernet/mellanox/mlxbf_gige/mlxbf_gige.h | 190 ++++++++
 .../mellanox/mlxbf_gige/mlxbf_gige_ethtool.c  | 137 ++++++
 .../mellanox/mlxbf_gige/mlxbf_gige_gpio.c     | 212 ++++++++
 .../mellanox/mlxbf_gige/mlxbf_gige_intr.c     | 142 ++++++
 .../mellanox/mlxbf_gige/mlxbf_gige_main.c     | 452 ++++++++++++++++++
 .../mellanox/mlxbf_gige/mlxbf_gige_mdio.c     | 187 ++++++++
 .../mellanox/mlxbf_gige/mlxbf_gige_regs.h     |  78 +++
 .../mellanox/mlxbf_gige/mlxbf_gige_rx.c       | 320 +++++++++++++
 .../mellanox/mlxbf_gige/mlxbf_gige_tx.c       | 284 +++++++++++
 13 files changed, 2028 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/Makefile
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_gpio.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c

diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig
index ff6613a5cdd3..b4f66eb9ddb9 100644
--- a/drivers/net/ethernet/mellanox/Kconfig
+++ b/drivers/net/ethernet/mellanox/Kconfig
@@ -22,5 +22,6 @@ source "drivers/net/ethernet/mellanox/mlx4/Kconfig"
 source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig"
 source "drivers/net/ethernet/mellanox/mlxsw/Kconfig"
 source "drivers/net/ethernet/mellanox/mlxfw/Kconfig"
+source "drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig"
 
 endif # NET_VENDOR_MELLANOX
diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile
index 79773ac331ee..d4b5f547a727 100644
--- a/drivers/net/ethernet/mellanox/Makefile
+++ b/drivers/net/ethernet/mellanox/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_MLX4_CORE) += mlx4/
 obj-$(CONFIG_MLX5_CORE) += mlx5/core/
 obj-$(CONFIG_MLXSW_CORE) += mlxsw/
 obj-$(CONFIG_MLXFW) += mlxfw/
+obj-$(CONFIG_MLXBF_GIGE) += mlxbf_gige/
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig b/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig
new file mode 100644
index 000000000000..4cdebafaf222
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+#
+# Mellanox GigE driver configuration
+#
+
+config MLXBF_GIGE
+	tristate "Mellanox Technologies BlueField Gigabit Ethernet support"
+	depends on (ARM64 && ACPI) || COMPILE_TEST
+	select PHYLIB
+	help
+	  The second generation BlueField SoC from Mellanox Technologies
+	  supports an out-of-band Gigabit Ethernet management port to the
+	  Arm subsystem.
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile b/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile
new file mode 100644
index 000000000000..e57c1375f236
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+obj-$(CONFIG_MLXBF_GIGE) += mlxbf_gige.o
+
+mlxbf_gige-y := mlxbf_gige_ethtool.o \
+		mlxbf_gige_gpio.o \
+		mlxbf_gige_intr.o \
+		mlxbf_gige_main.o \
+		mlxbf_gige_mdio.o \
+		mlxbf_gige_rx.o   \
+		mlxbf_gige_tx.o
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
new file mode 100644
index 000000000000..e3509e69ed1c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
+
+/* Header file for Gigabit Ethernet driver for Mellanox BlueField SoC
+ * - this file contains software data structures and any chip-specific
+ *   data structures (e.g. TX WQE format) that are memory resident.
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#ifndef __MLXBF_GIGE_H__
+#define __MLXBF_GIGE_H__
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/irqreturn.h>
+#include <linux/netdevice.h>
+#include <linux/irq.h>
+
+/* The silicon design supports a maximum RX ring size of
+ * 32K entries. Based on current testing this maximum size
+ * is not required to be supported.  Instead the RX ring
+ * will be capped at a realistic value of 1024 entries.
+ */
+#define MLXBF_GIGE_MIN_RXQ_SZ     32
+#define MLXBF_GIGE_MAX_RXQ_SZ     1024
+#define MLXBF_GIGE_DEFAULT_RXQ_SZ 128
+
+#define MLXBF_GIGE_MIN_TXQ_SZ     4
+#define MLXBF_GIGE_MAX_TXQ_SZ     256
+#define MLXBF_GIGE_DEFAULT_TXQ_SZ 128
+
+#define MLXBF_GIGE_DEFAULT_BUF_SZ 2048
+
+#define MLXBF_GIGE_DMA_PAGE_SZ    4096
+#define MLXBF_GIGE_DMA_PAGE_SHIFT 12
+
+/* There are four individual MAC RX filters. Currently
+ * two of them are being used: one for the broadcast MAC
+ * (index 0) and one for local MAC (index 1)
+ */
+#define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0
+#define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1
+
+/* Define for broadcast MAC literal */
+#define BCAST_MAC_ADDR 0xFFFFFFFFFFFF
+
+/* There are three individual interrupts:
+ *   1) Errors, "OOB" interrupt line
+ *   2) Receive Packet, "OOB_LLU" interrupt line
+ *   3) LLU and PLU Events, "OOB_PLU" interrupt line
+ */
+#define MLXBF_GIGE_ERROR_INTR_IDX       0
+#define MLXBF_GIGE_RECEIVE_PKT_INTR_IDX 1
+#define MLXBF_GIGE_LLU_PLU_INTR_IDX     2
+#define MLXBF_GIGE_PHY_INT_N            3
+
+#define MLXBF_GIGE_MDIO_DEFAULT_PHY_ADDR 0x3
+
+#define MLXBF_GIGE_DEFAULT_PHY_INT_GPIO 12
+
+struct mlxbf_gige_stats {
+	u64 hw_access_errors;
+	u64 tx_invalid_checksums;
+	u64 tx_small_frames;
+	u64 tx_index_errors;
+	u64 sw_config_errors;
+	u64 sw_access_errors;
+	u64 rx_truncate_errors;
+	u64 rx_mac_errors;
+	u64 rx_din_dropped_pkts;
+	u64 tx_fifo_full;
+	u64 rx_filter_passed_pkts;
+	u64 rx_filter_discard_pkts;
+};
+
+struct mlxbf_gige {
+	void __iomem *base;
+	void __iomem *llu_base;
+	void __iomem *plu_base;
+	struct device *dev;
+	struct net_device *netdev;
+	struct platform_device *pdev;
+	void __iomem *mdio_io;
+	struct mii_bus *mdiobus;
+	void __iomem *gpio_io;
+	struct irq_domain *irqdomain;
+	u32 phy_int_gpio_mask;
+	spinlock_t lock;      /* for packet processing indices */
+	spinlock_t gpio_lock; /* for GPIO bus access */
+	u16 rx_q_entries;
+	u16 tx_q_entries;
+	u64 *tx_wqe_base;
+	dma_addr_t tx_wqe_base_dma;
+	u64 *tx_wqe_next;
+	u64 *tx_cc;
+	dma_addr_t tx_cc_dma;
+	dma_addr_t *rx_wqe_base;
+	dma_addr_t rx_wqe_base_dma;
+	u64 *rx_cqe_base;
+	dma_addr_t rx_cqe_base_dma;
+	u16 tx_pi;
+	u16 prev_tx_ci;
+	u64 error_intr_count;
+	u64 rx_intr_count;
+	u64 llu_plu_intr_count;
+	struct sk_buff *rx_skb[MLXBF_GIGE_MAX_RXQ_SZ];
+	struct sk_buff *tx_skb[MLXBF_GIGE_MAX_TXQ_SZ];
+	int error_irq;
+	int rx_irq;
+	int llu_plu_irq;
+	int phy_irq;
+	int hw_phy_irq;
+	bool promisc_enabled;
+	u8 valid_polarity;
+	struct napi_struct napi;
+	struct mlxbf_gige_stats stats;
+};
+
+/* Rx Work Queue Element definitions */
+#define MLXBF_GIGE_RX_WQE_SZ                   8
+
+/* Rx Completion Queue Element definitions */
+#define MLXBF_GIGE_RX_CQE_SZ                   8
+#define MLXBF_GIGE_RX_CQE_PKT_LEN_MASK         GENMASK(10, 0)
+#define MLXBF_GIGE_RX_CQE_VALID_MASK           GENMASK(11, 11)
+#define MLXBF_GIGE_RX_CQE_PKT_STATUS_MASK      GENMASK(15, 12)
+#define MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR   GENMASK(12, 12)
+#define MLXBF_GIGE_RX_CQE_PKT_STATUS_TRUNCATED GENMASK(13, 13)
+#define MLXBF_GIGE_RX_CQE_CHKSUM_MASK          GENMASK(31, 16)
+
+/* Tx Work Queue Element definitions */
+#define MLXBF_GIGE_TX_WQE_SZ_QWORDS            2
+#define MLXBF_GIGE_TX_WQE_SZ                   16
+#define MLXBF_GIGE_TX_WQE_PKT_LEN_MASK         GENMASK(10, 0)
+#define MLXBF_GIGE_TX_WQE_UPDATE_MASK          GENMASK(31, 31)
+#define MLXBF_GIGE_TX_WQE_CHKSUM_LEN_MASK      GENMASK(42, 32)
+#define MLXBF_GIGE_TX_WQE_CHKSUM_START_MASK    GENMASK(55, 48)
+#define MLXBF_GIGE_TX_WQE_CHKSUM_OFFSET_MASK   GENMASK(63, 56)
+
+/* Macro to return packet length of specified TX WQE */
+#define MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr) \
+	(*((tx_wqe_addr) + 1) & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK)
+
+/* Tx Completion Count */
+#define MLXBF_GIGE_TX_CC_SZ                    8
+
+/* List of resources in ACPI table */
+enum mlxbf_gige_res {
+	MLXBF_GIGE_RES_MAC,
+	MLXBF_GIGE_RES_MDIO9,
+	MLXBF_GIGE_RES_GPIO0,
+	MLXBF_GIGE_RES_LLU,
+	MLXBF_GIGE_RES_PLU
+};
+
+/* Version of register data returned by mlxbf_gige_get_regs() */
+#define MLXBF_GIGE_REGS_VERSION 1
+
+int mlxbf_gige_mdio_probe(struct platform_device *pdev,
+			  struct mlxbf_gige *priv);
+void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv);
+irqreturn_t mlxbf_gige_mdio_handle_phy_interrupt(int irq, void *dev_id);
+void mlxbf_gige_mdio_enable_phy_int(struct mlxbf_gige *priv);
+
+void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+				  unsigned int index, u64 dmac);
+void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
+				  unsigned int index, u64 *dmac);
+void mlxbf_gige_enable_promisc(struct mlxbf_gige *priv);
+void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv);
+int mlxbf_gige_rx_init(struct mlxbf_gige *priv);
+void mlxbf_gige_rx_deinit(struct mlxbf_gige *priv);
+int mlxbf_gige_tx_init(struct mlxbf_gige *priv);
+void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv);
+bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv);
+netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb,
+				  struct net_device *netdev);
+struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv,
+				     unsigned int map_len,
+				     dma_addr_t *buf_dma,
+				     enum dma_data_direction dir);
+int mlxbf_gige_request_irqs(struct mlxbf_gige *priv);
+void mlxbf_gige_free_irqs(struct mlxbf_gige *priv);
+int mlxbf_gige_poll(struct napi_struct *napi, int budget);
+extern const struct ethtool_ops mlxbf_gige_ethtool_ops;
+void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv);
+
+int mlxbf_gige_gpio_init(struct platform_device *pdev, struct mlxbf_gige *priv);
+void mlxbf_gige_gpio_free(struct mlxbf_gige *priv);
+
+#endif /* !defined(__MLXBF_GIGE_H__) */
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
new file mode 100644
index 000000000000..92b798f8e73a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+/* Ethtool support for Mellanox Gigabit Ethernet driver
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/phy.h>
+
+#include "mlxbf_gige.h"
+#include "mlxbf_gige_regs.h"
+
+/* Start of struct ethtool_ops functions */
+static int mlxbf_gige_get_regs_len(struct net_device *netdev)
+{
+	return MLXBF_GIGE_MMIO_REG_SZ;
+}
+
+static void mlxbf_gige_get_regs(struct net_device *netdev,
+				struct ethtool_regs *regs, void *p)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+
+	regs->version = MLXBF_GIGE_REGS_VERSION;
+
+	/* Read entire MMIO register space and store results
+	 * into the provided buffer. Each 64-bit word is converted
+	 * to big-endian to make the output more readable.
+	 *
+	 * NOTE: by design, a read to an offset without an existing
+	 *       register will be acknowledged and return zero.
+	 */
+	memcpy_fromio(p, priv->base, MLXBF_GIGE_MMIO_REG_SZ);
+}
+
+static void mlxbf_gige_get_ringparam(struct net_device *netdev,
+				     struct ethtool_ringparam *ering)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+
+	ering->rx_max_pending = MLXBF_GIGE_MAX_RXQ_SZ;
+	ering->tx_max_pending = MLXBF_GIGE_MAX_TXQ_SZ;
+	ering->rx_pending = priv->rx_q_entries;
+	ering->tx_pending = priv->tx_q_entries;
+}
+
+static const struct {
+	const char string[ETH_GSTRING_LEN];
+} mlxbf_gige_ethtool_stats_keys[] = {
+	{ "hw_access_errors" },
+	{ "tx_invalid_checksums" },
+	{ "tx_small_frames" },
+	{ "tx_index_errors" },
+	{ "sw_config_errors" },
+	{ "sw_access_errors" },
+	{ "rx_truncate_errors" },
+	{ "rx_mac_errors" },
+	{ "rx_din_dropped_pkts" },
+	{ "tx_fifo_full" },
+	{ "rx_filter_passed_pkts" },
+	{ "rx_filter_discard_pkts" },
+};
+
+static int mlxbf_gige_get_sset_count(struct net_device *netdev, int stringset)
+{
+	if (stringset != ETH_SS_STATS)
+		return -EOPNOTSUPP;
+	return ARRAY_SIZE(mlxbf_gige_ethtool_stats_keys);
+}
+
+static void mlxbf_gige_get_strings(struct net_device *netdev, u32 stringset,
+				   u8 *buf)
+{
+	if (stringset != ETH_SS_STATS)
+		return;
+	memcpy(buf, &mlxbf_gige_ethtool_stats_keys,
+	       sizeof(mlxbf_gige_ethtool_stats_keys));
+}
+
+static void mlxbf_gige_get_ethtool_stats(struct net_device *netdev,
+					 struct ethtool_stats *estats,
+					 u64 *data)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+
+	/* Fill data array with interface statistics
+	 *
+	 * NOTE: the data writes must be in
+	 *       sync with the strings shown in
+	 *       the mlxbf_gige_ethtool_stats_keys[] array
+	 *
+	 * NOTE2: certain statistics below are zeroed upon
+	 *        port disable, so the calculation below
+	 *        must include the "cached" value of the stat
+	 *        plus the value read directly from hardware.
+	 *        Cached statistics are currently:
+	 *          rx_din_dropped_pkts
+	 *          rx_filter_passed_pkts
+	 *          rx_filter_discard_pkts
+	 */
+	*data++ = priv->stats.hw_access_errors;
+	*data++ = priv->stats.tx_invalid_checksums;
+	*data++ = priv->stats.tx_small_frames;
+	*data++ = priv->stats.tx_index_errors;
+	*data++ = priv->stats.sw_config_errors;
+	*data++ = priv->stats.sw_access_errors;
+	*data++ = priv->stats.rx_truncate_errors;
+	*data++ = priv->stats.rx_mac_errors;
+	*data++ = (priv->stats.rx_din_dropped_pkts +
+		   readq(priv->base + MLXBF_GIGE_RX_DIN_DROP_COUNTER));
+	*data++ = priv->stats.tx_fifo_full;
+	*data++ = (priv->stats.rx_filter_passed_pkts +
+		   readq(priv->base + MLXBF_GIGE_RX_PASS_COUNTER_ALL));
+	*data++ = (priv->stats.rx_filter_discard_pkts +
+		   readq(priv->base + MLXBF_GIGE_RX_DISC_COUNTER_ALL));
+}
+
+static void mlxbf_gige_get_pauseparam(struct net_device *netdev,
+				      struct ethtool_pauseparam *pause)
+{
+	pause->autoneg = AUTONEG_DISABLE;
+	pause->rx_pause = 1;
+	pause->tx_pause = 1;
+}
+
+const struct ethtool_ops mlxbf_gige_ethtool_ops = {
+	.get_link		= ethtool_op_get_link,
+	.get_ringparam		= mlxbf_gige_get_ringparam,
+	.get_regs_len           = mlxbf_gige_get_regs_len,
+	.get_regs               = mlxbf_gige_get_regs,
+	.get_strings            = mlxbf_gige_get_strings,
+	.get_sset_count         = mlxbf_gige_get_sset_count,
+	.get_ethtool_stats      = mlxbf_gige_get_ethtool_stats,
+	.nway_reset		= phy_ethtool_nway_reset,
+	.get_pauseparam		= mlxbf_gige_get_pauseparam,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_gpio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_gpio.c
new file mode 100644
index 000000000000..a8d966db5715
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_gpio.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+/* Initialize and handle GPIO interrupt triggered by INT_N PHY signal.
+ * This GPIO interrupt triggers the PHY state machine to bring the link
+ * up/down.
+ *
+ * Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/irqreturn.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+
+#include "mlxbf_gige.h"
+#include "mlxbf_gige_regs.h"
+
+#define MLXBF_GIGE_GPIO_CAUSE_FALL_EN		0x48
+#define MLXBF_GIGE_GPIO_CAUSE_OR_CAUSE_EVTEN0	0x80
+#define MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0		0x94
+#define MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE	0x98
+
+static void mlxbf_gige_gpio_enable(struct mlxbf_gige *priv)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&priv->gpio_lock, flags);
+	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
+	val |= priv->phy_int_gpio_mask;
+	writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
+
+	/* The INT_N interrupt level is active low.
+	 * So enable cause fall bit to detect when GPIO
+	 * state goes low.
+	 */
+	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_FALL_EN);
+	val |= priv->phy_int_gpio_mask;
+	writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_FALL_EN);
+
+	/* Enable PHY interrupt by setting the priority level */
+	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
+	val |= priv->phy_int_gpio_mask;
+	writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
+	spin_unlock_irqrestore(&priv->gpio_lock, flags);
+}
+
+static void mlxbf_gige_gpio_disable(struct mlxbf_gige *priv)
+{
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&priv->gpio_lock, flags);
+	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
+	val &= ~priv->phy_int_gpio_mask;
+	writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
+	spin_unlock_irqrestore(&priv->gpio_lock, flags);
+}
+
+static irqreturn_t mlxbf_gige_gpio_handler(int irq, void *ptr)
+{
+	struct mlxbf_gige *priv;
+	u32 val;
+
+	priv = ptr;
+
+	/* Check if this interrupt is from PHY device.
+	 * Return if it is not.
+	 */
+	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_CAUSE_EVTEN0);
+	if (!(val & priv->phy_int_gpio_mask))
+		return IRQ_NONE;
+
+	/* Clear interrupt when done, otherwise, no further interrupt
+	 * will be triggered.
+	 */
+	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
+	val |= priv->phy_int_gpio_mask;
+	writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
+
+	generic_handle_irq(priv->phy_irq);
+
+	return IRQ_HANDLED;
+}
+
+static void mlxbf_gige_gpio_mask(struct irq_data *irqd)
+{
+	struct mlxbf_gige *priv = irq_data_get_irq_chip_data(irqd);
+
+	mlxbf_gige_gpio_disable(priv);
+}
+
+static void mlxbf_gige_gpio_unmask(struct irq_data *irqd)
+{
+	struct mlxbf_gige *priv = irq_data_get_irq_chip_data(irqd);
+
+	mlxbf_gige_gpio_enable(priv);
+}
+
+static struct irq_chip mlxbf_gige_gpio_chip = {
+	.name			= "mlxbf_gige_phy",
+	.irq_mask		= mlxbf_gige_gpio_mask,
+	.irq_unmask		= mlxbf_gige_gpio_unmask,
+};
+
+static int mlxbf_gige_gpio_domain_map(struct irq_domain *d,
+				      unsigned int irq,
+				      irq_hw_number_t hwirq)
+{
+	irq_set_chip_data(irq, d->host_data);
+	irq_set_chip_and_handler(irq, &mlxbf_gige_gpio_chip, handle_simple_irq);
+	irq_set_noprobe(irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mlxbf_gige_gpio_domain_ops = {
+	.map    = mlxbf_gige_gpio_domain_map,
+	.xlate	= irq_domain_xlate_twocell,
+};
+
+#ifdef CONFIG_ACPI
+static int mlxbf_gige_gpio_resources(struct acpi_resource *ares,
+				     void *data)
+{
+	struct acpi_resource_gpio *gpio;
+	u32 *phy_int_gpio = data;
+
+	if (ares->type == ACPI_RESOURCE_TYPE_GPIO) {
+		gpio = &ares->data.gpio;
+		*phy_int_gpio = gpio->pin_table[0];
+	}
+
+	return 1;
+}
+#endif
+
+void mlxbf_gige_gpio_free(struct mlxbf_gige *priv)
+{
+	irq_dispose_mapping(priv->phy_irq);
+	irq_domain_remove(priv->irqdomain);
+}
+
+int mlxbf_gige_gpio_init(struct platform_device *pdev,
+			 struct mlxbf_gige *priv)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	u32 phy_int_gpio = 0;
+	int ret;
+
+	LIST_HEAD(resources);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_GPIO0);
+	if (!res)
+		return -ENODEV;
+
+	priv->gpio_io = devm_ioremap(dev, res->start, resource_size(res));
+	if (!priv->gpio_io)
+		return -ENOMEM;
+
+#ifdef CONFIG_ACPI
+	ret = acpi_dev_get_resources(ACPI_COMPANION(dev),
+				     &resources, mlxbf_gige_gpio_resources,
+				     &phy_int_gpio);
+	acpi_dev_free_resource_list(&resources);
+	if (ret < 0 || !phy_int_gpio) {
+		dev_err(dev, "Error retrieving the gpio phy pin");
+		return -EINVAL;
+	}
+#endif
+
+	priv->phy_int_gpio_mask = BIT(phy_int_gpio);
+
+	mlxbf_gige_gpio_disable(priv);
+
+	priv->hw_phy_irq = platform_get_irq(pdev, MLXBF_GIGE_PHY_INT_N);
+
+	priv->irqdomain = irq_domain_add_simple(NULL, 1, 0,
+						&mlxbf_gige_gpio_domain_ops,
+						priv);
+	if (!priv->irqdomain) {
+		dev_err(dev, "Failed to add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	priv->phy_irq = irq_create_mapping(priv->irqdomain, 0);
+	if (!priv->phy_irq) {
+		irq_domain_remove(priv->irqdomain);
+		priv->irqdomain = NULL;
+		dev_err(dev, "Error mapping PHY IRQ\n");
+		return -EINVAL;
+	}
+
+	ret = devm_request_irq(dev, priv->hw_phy_irq, mlxbf_gige_gpio_handler,
+			       IRQF_ONESHOT | IRQF_SHARED, "mlxbf_gige_phy", priv);
+	if (ret) {
+		dev_err(dev, "Failed to request PHY IRQ");
+		mlxbf_gige_gpio_free(priv);
+		return ret;
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c
new file mode 100644
index 000000000000..c38795be04a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+/* Interrupt related logic for Mellanox Gigabit Ethernet driver
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/interrupt.h>
+
+#include "mlxbf_gige.h"
+#include "mlxbf_gige_regs.h"
+
+static irqreturn_t mlxbf_gige_error_intr(int irq, void *dev_id)
+{
+	struct mlxbf_gige *priv;
+	u64 int_status;
+
+	priv = dev_id;
+
+	priv->error_intr_count++;
+
+	int_status = readq(priv->base + MLXBF_GIGE_INT_STATUS);
+
+	if (int_status & MLXBF_GIGE_INT_STATUS_HW_ACCESS_ERROR)
+		priv->stats.hw_access_errors++;
+
+	if (int_status & MLXBF_GIGE_INT_STATUS_TX_CHECKSUM_INPUTS) {
+		priv->stats.tx_invalid_checksums++;
+		/* This error condition is latched into MLXBF_GIGE_INT_STATUS
+		 * when the GigE silicon operates on the offending
+		 * TX WQE. The write to MLXBF_GIGE_INT_STATUS at the bottom
+		 * of this routine clears this error condition.
+		 */
+	}
+
+	if (int_status & MLXBF_GIGE_INT_STATUS_TX_SMALL_FRAME_SIZE) {
+		priv->stats.tx_small_frames++;
+		/* This condition happens when the networking stack invokes
+		 * this driver's "start_xmit()" method with a packet whose
+		 * size < 60 bytes.  The GigE silicon will automatically pad
+		 * this small frame up to a minimum-sized frame before it is
+		 * sent. The "tx_small_frame" condition is latched into the
+		 * MLXBF_GIGE_INT_STATUS register when the GigE silicon
+		 * operates on the offending TX WQE. The write to
+		 * MLXBF_GIGE_INT_STATUS at the bottom of this routine
+		 * clears this condition.
+		 */
+	}
+
+	if (int_status & MLXBF_GIGE_INT_STATUS_TX_PI_CI_EXCEED_WQ_SIZE)
+		priv->stats.tx_index_errors++;
+
+	if (int_status & MLXBF_GIGE_INT_STATUS_SW_CONFIG_ERROR)
+		priv->stats.sw_config_errors++;
+
+	if (int_status & MLXBF_GIGE_INT_STATUS_SW_ACCESS_ERROR)
+		priv->stats.sw_access_errors++;
+
+	/* Clear all error interrupts by writing '1' back to
+	 * all the asserted bits in INT_STATUS.  Do not write
+	 * '1' back to 'receive packet' bit, since that is
+	 * managed separately.
+	 */
+
+	int_status &= ~MLXBF_GIGE_INT_STATUS_RX_RECEIVE_PACKET;
+
+	writeq(int_status, priv->base + MLXBF_GIGE_INT_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mlxbf_gige_rx_intr(int irq, void *dev_id)
+{
+	struct mlxbf_gige *priv;
+
+	priv = dev_id;
+
+	priv->rx_intr_count++;
+
+	/* NOTE: GigE silicon automatically disables "packet rx" interrupt by
+	 *       setting MLXBF_GIGE_INT_MASK bit0 upon triggering the interrupt
+	 *       to the ARM cores.  Software needs to re-enable "packet rx"
+	 *       interrupts by clearing MLXBF_GIGE_INT_MASK bit0.
+	 */
+
+	napi_schedule(&priv->napi);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mlxbf_gige_llu_plu_intr(int irq, void *dev_id)
+{
+	struct mlxbf_gige *priv;
+
+	priv = dev_id;
+	priv->llu_plu_intr_count++;
+
+	return IRQ_HANDLED;
+}
+
+int mlxbf_gige_request_irqs(struct mlxbf_gige *priv)
+{
+	int err;
+
+	err = request_irq(priv->error_irq, mlxbf_gige_error_intr, 0,
+			  "mlxbf_gige_error", priv);
+	if (err) {
+		dev_err(priv->dev, "Request error_irq failure\n");
+		return err;
+	}
+
+	err = request_irq(priv->rx_irq, mlxbf_gige_rx_intr, 0,
+			  "mlxbf_gige_rx", priv);
+	if (err) {
+		dev_err(priv->dev, "Request rx_irq failure\n");
+		goto free_error_irq;
+	}
+
+	err = request_irq(priv->llu_plu_irq, mlxbf_gige_llu_plu_intr, 0,
+			  "mlxbf_gige_llu_plu", priv);
+	if (err) {
+		dev_err(priv->dev, "Request llu_plu_irq failure\n");
+		goto free_rx_irq;
+	}
+
+	return 0;
+
+free_rx_irq:
+	free_irq(priv->rx_irq, priv);
+
+free_error_irq:
+	free_irq(priv->error_irq, priv);
+
+	return err;
+}
+
+void mlxbf_gige_free_irqs(struct mlxbf_gige *priv)
+{
+	free_irq(priv->error_irq, priv);
+	free_irq(priv->rx_irq, priv);
+	free_irq(priv->llu_plu_irq, priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
new file mode 100644
index 000000000000..a0a059e0154f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+/* Gigabit Ethernet driver for Mellanox BlueField SoC
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+
+#include "mlxbf_gige.h"
+#include "mlxbf_gige_regs.h"
+
+#define DRV_NAME    "mlxbf_gige"
+
+/* Allocate SKB whose payload pointer aligns with the Bluefield
+ * hardware DMA limitation, i.e. DMA operation can't cross
+ * a 4KB boundary.  A maximum packet size of 2KB is assumed in the
+ * alignment formula.  The alignment logic overallocates an SKB,
+ * and then adjusts the headroom so that the SKB data pointer is
+ * naturally aligned to a 2KB boundary.
+ */
+struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv,
+				     unsigned int map_len,
+				     dma_addr_t *buf_dma,
+				     enum dma_data_direction dir)
+{
+	struct sk_buff *skb;
+	u64 addr, offset;
+
+	/* Overallocate the SKB so that any headroom adjustment (to
+	 * provide 2KB natural alignment) does not exceed payload area
+	 */
+	skb = netdev_alloc_skb(priv->netdev, MLXBF_GIGE_DEFAULT_BUF_SZ * 2);
+	if (!skb)
+		return NULL;
+
+	/* Adjust the headroom so that skb->data is naturally aligned to
+	 * a 2KB boundary, which is the maximum packet size supported.
+	 */
+	addr = (long)skb->data;
+	offset = (addr + MLXBF_GIGE_DEFAULT_BUF_SZ - 1) &
+		~(MLXBF_GIGE_DEFAULT_BUF_SZ - 1);
+	offset -= addr;
+	if (offset)
+		skb_reserve(skb, offset);
+
+	/* Return streaming DMA mapping to caller */
+	*buf_dma = dma_map_single(priv->dev, skb->data, map_len, dir);
+	if (dma_mapping_error(priv->dev, *buf_dma)) {
+		dev_kfree_skb(skb);
+		*buf_dma = (dma_addr_t)0;
+		return NULL;
+	}
+
+	return skb;
+}
+
+static void mlxbf_gige_initial_mac(struct mlxbf_gige *priv)
+{
+	u8 mac[ETH_ALEN];
+	u64 local_mac;
+
+	memset(mac, 0, ETH_ALEN);
+	mlxbf_gige_get_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX,
+				     &local_mac);
+	u64_to_ether_addr(local_mac, mac);
+
+	if (is_valid_ether_addr(mac)) {
+		ether_addr_copy(priv->netdev->dev_addr, mac);
+	} else {
+		/* Provide a random MAC if for some reason the device has
+		 * not been configured with a valid MAC address already.
+		 */
+		eth_hw_addr_random(priv->netdev);
+	}
+
+	local_mac = ether_addr_to_u64(priv->netdev->dev_addr);
+	mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX,
+				     local_mac);
+}
+
+static void mlxbf_gige_cache_stats(struct mlxbf_gige *priv)
+{
+	struct mlxbf_gige_stats *p;
+
+	/* Cache stats that will be cleared by clean port operation */
+	p = &priv->stats;
+	p->rx_din_dropped_pkts += readq(priv->base +
+					MLXBF_GIGE_RX_DIN_DROP_COUNTER);
+	p->rx_filter_passed_pkts += readq(priv->base +
+					  MLXBF_GIGE_RX_PASS_COUNTER_ALL);
+	p->rx_filter_discard_pkts += readq(priv->base +
+					   MLXBF_GIGE_RX_DISC_COUNTER_ALL);
+}
+
+static int mlxbf_gige_clean_port(struct mlxbf_gige *priv)
+{
+	u64 control;
+	u64 temp;
+	int err;
+
+	/* Set the CLEAN_PORT_EN bit to trigger SW reset */
+	control = readq(priv->base + MLXBF_GIGE_CONTROL);
+	control |= MLXBF_GIGE_CONTROL_CLEAN_PORT_EN;
+	writeq(control, priv->base + MLXBF_GIGE_CONTROL);
+
+	/* Ensure completion of "clean port" write before polling status */
+	mb();
+
+	err = readq_poll_timeout_atomic(priv->base + MLXBF_GIGE_STATUS, temp,
+					(temp & MLXBF_GIGE_STATUS_READY),
+					100, 100000);
+
+	/* Clear the CLEAN_PORT_EN bit at end of this loop */
+	control = readq(priv->base + MLXBF_GIGE_CONTROL);
+	control &= ~MLXBF_GIGE_CONTROL_CLEAN_PORT_EN;
+	writeq(control, priv->base + MLXBF_GIGE_CONTROL);
+
+	return err;
+}
+
+static int mlxbf_gige_open(struct net_device *netdev)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
+	u64 int_en;
+	int err;
+
+	err = mlxbf_gige_request_irqs(priv);
+	if (err)
+		return err;
+	mlxbf_gige_cache_stats(priv);
+	err = mlxbf_gige_clean_port(priv);
+	if (err)
+		goto free_irqs;
+	err = mlxbf_gige_rx_init(priv);
+	if (err)
+		goto free_irqs;
+	err = mlxbf_gige_tx_init(priv);
+	if (err)
+		goto rx_deinit;
+
+	phy_start(phydev);
+
+	netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll, NAPI_POLL_WEIGHT);
+	napi_enable(&priv->napi);
+	netif_start_queue(netdev);
+
+	/* Set bits in INT_EN that we care about */
+	int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
+		 MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
+		 MLXBF_GIGE_INT_EN_TX_SMALL_FRAME_SIZE |
+		 MLXBF_GIGE_INT_EN_TX_PI_CI_EXCEED_WQ_SIZE |
+		 MLXBF_GIGE_INT_EN_SW_CONFIG_ERROR |
+		 MLXBF_GIGE_INT_EN_SW_ACCESS_ERROR |
+		 MLXBF_GIGE_INT_EN_RX_RECEIVE_PACKET;
+
+	/* Ensure completion of all initialization before enabling interrupts */
+	mb();
+
+	writeq(int_en, priv->base + MLXBF_GIGE_INT_EN);
+
+	return 0;
+
+rx_deinit:
+	mlxbf_gige_rx_deinit(priv);
+
+free_irqs:
+	mlxbf_gige_free_irqs(priv);
+	return err;
+}
+
+static int mlxbf_gige_stop(struct net_device *netdev)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+
+	writeq(0, priv->base + MLXBF_GIGE_INT_EN);
+	netif_stop_queue(netdev);
+	napi_disable(&priv->napi);
+	netif_napi_del(&priv->napi);
+	mlxbf_gige_free_irqs(priv);
+
+	phy_stop(netdev->phydev);
+
+	mlxbf_gige_rx_deinit(priv);
+	mlxbf_gige_tx_deinit(priv);
+	mlxbf_gige_cache_stats(priv);
+	mlxbf_gige_clean_port(priv);
+
+	return 0;
+}
+
+static int mlxbf_gige_do_ioctl(struct net_device *netdev,
+			       struct ifreq *ifr, int cmd)
+{
+	if (!(netif_running(netdev)))
+		return -EINVAL;
+
+	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+}
+
+static void mlxbf_gige_set_rx_mode(struct net_device *netdev)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+	bool new_promisc_enabled;
+
+	new_promisc_enabled = netdev->flags & IFF_PROMISC;
+
+	/* Only write to the hardware registers if the new setting
+	 * of promiscuous mode is different from the current one.
+	 */
+	if (new_promisc_enabled != priv->promisc_enabled) {
+		priv->promisc_enabled = new_promisc_enabled;
+
+		if (new_promisc_enabled)
+			mlxbf_gige_enable_promisc(priv);
+		else
+			mlxbf_gige_disable_promisc(priv);
+	}
+}
+
+static void mlxbf_gige_get_stats64(struct net_device *netdev,
+				   struct rtnl_link_stats64 *stats)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+
+	netdev_stats_to_stats64(stats, &netdev->stats);
+
+	stats->rx_length_errors = priv->stats.rx_truncate_errors;
+	stats->rx_fifo_errors = priv->stats.rx_din_dropped_pkts +
+				readq(priv->base + MLXBF_GIGE_RX_DIN_DROP_COUNTER);
+	stats->rx_crc_errors = priv->stats.rx_mac_errors;
+	stats->rx_errors = stats->rx_length_errors +
+			   stats->rx_fifo_errors +
+			   stats->rx_crc_errors;
+
+	stats->tx_fifo_errors = priv->stats.tx_fifo_full;
+	stats->tx_errors = stats->tx_fifo_errors;
+}
+
+static const struct net_device_ops mlxbf_gige_netdev_ops = {
+	.ndo_open		= mlxbf_gige_open,
+	.ndo_stop		= mlxbf_gige_stop,
+	.ndo_start_xmit		= mlxbf_gige_start_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_do_ioctl		= mlxbf_gige_do_ioctl,
+	.ndo_set_rx_mode        = mlxbf_gige_set_rx_mode,
+	.ndo_get_stats64        = mlxbf_gige_get_stats64,
+};
+
+static void mlxbf_gige_adjust_link(struct net_device *netdev)
+{
+	struct phy_device *phydev = netdev->phydev;
+
+	phy_print_status(phydev);
+}
+
+static int mlxbf_gige_probe(struct platform_device *pdev)
+{
+	struct phy_device *phydev;
+	struct net_device *netdev;
+	struct resource *mac_res;
+	struct resource *llu_res;
+	struct resource *plu_res;
+	struct mlxbf_gige *priv;
+	void __iomem *llu_base;
+	void __iomem *plu_base;
+	void __iomem *base;
+	u64 control;
+	int addr;
+	int err;
+
+	mac_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MAC);
+	if (!mac_res)
+		return -ENXIO;
+
+	base = devm_ioremap_resource(&pdev->dev, mac_res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	llu_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_LLU);
+	if (!llu_res)
+		return -ENXIO;
+
+	llu_base = devm_ioremap_resource(&pdev->dev, llu_res);
+	if (IS_ERR(llu_base))
+		return PTR_ERR(llu_base);
+
+	plu_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_PLU);
+	if (!plu_res)
+		return -ENXIO;
+
+	plu_base = devm_ioremap_resource(&pdev->dev, plu_res);
+	if (IS_ERR(plu_base))
+		return PTR_ERR(plu_base);
+
+	/* Perform general init of GigE block */
+	control = readq(base + MLXBF_GIGE_CONTROL);
+	control |= MLXBF_GIGE_CONTROL_PORT_EN;
+	writeq(control, base + MLXBF_GIGE_CONTROL);
+
+	netdev = devm_alloc_etherdev(&pdev->dev, sizeof(*priv));
+	if (!netdev)
+		return -ENOMEM;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	netdev->netdev_ops = &mlxbf_gige_netdev_ops;
+	netdev->ethtool_ops = &mlxbf_gige_ethtool_ops;
+	priv = netdev_priv(netdev);
+	priv->netdev = netdev;
+
+	platform_set_drvdata(pdev, priv);
+	priv->dev = &pdev->dev;
+	priv->pdev = pdev;
+
+	spin_lock_init(&priv->lock);
+	spin_lock_init(&priv->gpio_lock);
+
+	/* Attach MDIO device */
+	err = mlxbf_gige_mdio_probe(pdev, priv);
+	if (err)
+		return err;
+
+	err = mlxbf_gige_gpio_init(pdev, priv);
+	if (err) {
+		dev_err(&pdev->dev, "PHY IRQ initialization failed\n");
+		mlxbf_gige_mdio_remove(priv);
+		return -ENODEV;
+	}
+
+	priv->base = base;
+	priv->llu_base = llu_base;
+	priv->plu_base = plu_base;
+
+	priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ;
+	priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ;
+
+	/* Write initial MAC address to hardware */
+	mlxbf_gige_initial_mac(priv);
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err);
+		goto out;
+	}
+
+	priv->error_irq = platform_get_irq(pdev, MLXBF_GIGE_ERROR_INTR_IDX);
+	priv->rx_irq = platform_get_irq(pdev, MLXBF_GIGE_RECEIVE_PKT_INTR_IDX);
+	priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX);
+
+	phydev = phy_find_first(priv->mdiobus);
+	if (!phydev) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	addr = phydev->mdio.addr;
+	priv->mdiobus->irq[addr] = priv->phy_irq;
+	phydev->irq = priv->phy_irq;
+
+	err = phy_connect_direct(netdev, phydev,
+				 mlxbf_gige_adjust_link,
+				 PHY_INTERFACE_MODE_GMII);
+	if (err) {
+		dev_err(&pdev->dev, "Could not attach to PHY\n");
+		goto out;
+	}
+
+	/* MAC only supports 1000T full duplex mode */
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+
+	/* Only symmetric pause with flow control enabled is supported so no
+	 * need to negotiate pause.
+	 */
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->advertising);
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->advertising);
+
+	/* Display information about attached PHY device */
+	phy_attached_info(phydev);
+
+	err = register_netdev(netdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register netdev\n");
+		phy_disconnect(phydev);
+		goto out;
+	}
+
+	return 0;
+
+out:
+	mlxbf_gige_gpio_free(priv);
+	mlxbf_gige_mdio_remove(priv);
+	return err;
+}
+
+static int mlxbf_gige_remove(struct platform_device *pdev)
+{
+	struct mlxbf_gige *priv = platform_get_drvdata(pdev);
+
+	unregister_netdev(priv->netdev);
+	phy_disconnect(priv->netdev->phydev);
+	mlxbf_gige_gpio_free(priv);
+	mlxbf_gige_mdio_remove(priv);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static void mlxbf_gige_shutdown(struct platform_device *pdev)
+{
+	struct mlxbf_gige *priv = platform_get_drvdata(pdev);
+
+	writeq(0, priv->base + MLXBF_GIGE_INT_EN);
+	mlxbf_gige_clean_port(priv);
+}
+
+static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
+	{ "MLNXBF17", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_gige_acpi_match);
+
+static struct platform_driver mlxbf_gige_driver = {
+	.probe = mlxbf_gige_probe,
+	.remove = mlxbf_gige_remove,
+	.shutdown = mlxbf_gige_shutdown,
+	.driver = {
+		.name = DRV_NAME,
+		.acpi_match_table = ACPI_PTR(mlxbf_gige_acpi_match),
+	},
+};
+
+module_platform_driver(mlxbf_gige_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC Gigabit Ethernet Driver");
+MODULE_AUTHOR("David Thompson <davthompson@nvidia.com>");
+MODULE_AUTHOR("Asmaa Mnebhi <asmaa@nvidia.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
new file mode 100644
index 000000000000..e32dd34fdcc0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+/* MDIO support for Mellanox Gigabit Ethernet driver
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/ioport.h>
+#include <linux/irqreturn.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+
+#include "mlxbf_gige.h"
+
+#define MLXBF_GIGE_MDIO_GW_OFFSET	0x0
+#define MLXBF_GIGE_MDIO_CFG_OFFSET	0x4
+
+/* Support clause 22 */
+#define MLXBF_GIGE_MDIO_CL22_ST1	0x1
+#define MLXBF_GIGE_MDIO_CL22_WRITE	0x1
+#define MLXBF_GIGE_MDIO_CL22_READ	0x2
+
+/* Busy bit is set by software and cleared by hardware */
+#define MLXBF_GIGE_MDIO_SET_BUSY	0x1
+
+/* MDIO GW register bits */
+#define MLXBF_GIGE_MDIO_GW_AD_MASK	GENMASK(15, 0)
+#define MLXBF_GIGE_MDIO_GW_DEVAD_MASK	GENMASK(20, 16)
+#define MLXBF_GIGE_MDIO_GW_PARTAD_MASK	GENMASK(25, 21)
+#define MLXBF_GIGE_MDIO_GW_OPCODE_MASK	GENMASK(27, 26)
+#define MLXBF_GIGE_MDIO_GW_ST1_MASK	GENMASK(28, 28)
+#define MLXBF_GIGE_MDIO_GW_BUSY_MASK	GENMASK(30, 30)
+
+/* MDIO config register bits */
+#define MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK		GENMASK(1, 0)
+#define MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK		GENMASK(2, 2)
+#define MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK	GENMASK(4, 4)
+#define MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK		GENMASK(15, 8)
+#define MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK		GENMASK(23, 16)
+#define MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK		GENMASK(31, 24)
+
+/* Formula for encoding the MDIO period. The encoded value is
+ * passed to the MDIO config register.
+ *
+ * mdc_clk = 2*(val + 1)*i1clk
+ *
+ * 400 ns = 2*(val + 1)*(((1/430)*1000) ns)
+ *
+ * val = (((400 * 430 / 1000) / 2) - 1)
+ */
+#define MLXBF_GIGE_I1CLK_MHZ		430
+#define MLXBF_GIGE_MDC_CLK_NS		400
+
+#define MLXBF_GIGE_MDIO_PERIOD	(((MLXBF_GIGE_MDC_CLK_NS * MLXBF_GIGE_I1CLK_MHZ / 1000) / 2) - 1)
+
+#define MLXBF_GIGE_MDIO_CFG_VAL (FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_MODE_MASK, 1) | \
+				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \
+				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \
+				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, \
+					    MLXBF_GIGE_MDIO_PERIOD) | \
+				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \
+				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13))
+
+static u32 mlxbf_gige_mdio_create_cmd(u16 data, int phy_add,
+				      int phy_reg, u32 opcode)
+{
+	u32 gw_reg = 0;
+
+	gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_AD_MASK, data);
+	gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_DEVAD_MASK, phy_reg);
+	gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_PARTAD_MASK, phy_add);
+	gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_OPCODE_MASK, opcode);
+	gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_ST1_MASK,
+			     MLXBF_GIGE_MDIO_CL22_ST1);
+	gw_reg |= FIELD_PREP(MLXBF_GIGE_MDIO_GW_BUSY_MASK,
+			     MLXBF_GIGE_MDIO_SET_BUSY);
+
+	return gw_reg;
+}
+
+static int mlxbf_gige_mdio_read(struct mii_bus *bus, int phy_add, int phy_reg)
+{
+	struct mlxbf_gige *priv = bus->priv;
+	u32 cmd;
+	int ret;
+	u32 val;
+
+	if (phy_reg & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	/* Send mdio read request */
+	cmd = mlxbf_gige_mdio_create_cmd(0, phy_add, phy_reg, MLXBF_GIGE_MDIO_CL22_READ);
+
+	writel(cmd, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
+
+	ret = readl_poll_timeout_atomic(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET,
+					val, !(val & MLXBF_GIGE_MDIO_GW_BUSY_MASK), 100, 1000000);
+
+	if (ret) {
+		writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
+		return ret;
+	}
+
+	ret = readl(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
+	/* Only return ad bits of the gw register */
+	ret &= MLXBF_GIGE_MDIO_GW_AD_MASK;
+
+	return ret;
+}
+
+static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add,
+				 int phy_reg, u16 val)
+{
+	struct mlxbf_gige *priv = bus->priv;
+	u32 cmd;
+	int ret;
+	u32 temp;
+
+	if (phy_reg & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	/* Send mdio write request */
+	cmd = mlxbf_gige_mdio_create_cmd(val, phy_add, phy_reg,
+					 MLXBF_GIGE_MDIO_CL22_WRITE);
+	writel(cmd, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
+
+	/* If the poll timed out, drop the request */
+	ret = readl_poll_timeout_atomic(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET,
+					temp, !(temp & MLXBF_GIGE_MDIO_GW_BUSY_MASK), 100, 1000000);
+
+	return ret;
+}
+
+int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MDIO9);
+	if (!res)
+		return -ENODEV;
+
+	priv->mdio_io = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->mdio_io))
+		return PTR_ERR(priv->mdio_io);
+
+	/* Configure mdio parameters */
+	writel(MLXBF_GIGE_MDIO_CFG_VAL,
+	       priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET);
+
+	priv->mdiobus = devm_mdiobus_alloc(dev);
+	if (!priv->mdiobus) {
+		dev_err(dev, "Failed to alloc MDIO bus\n");
+		return -ENOMEM;
+	}
+
+	priv->mdiobus->name = "mlxbf-mdio";
+	priv->mdiobus->read = mlxbf_gige_mdio_read;
+	priv->mdiobus->write = mlxbf_gige_mdio_write;
+	priv->mdiobus->parent = dev;
+	priv->mdiobus->priv = priv;
+	snprintf(priv->mdiobus->id, MII_BUS_ID_SIZE, "%s",
+		 dev_name(dev));
+
+	ret = mdiobus_register(priv->mdiobus);
+	if (ret)
+		dev_err(dev, "Failed to register MDIO bus\n");
+
+	return ret;
+}
+
+void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv)
+{
+	mdiobus_unregister(priv->mdiobus);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
new file mode 100644
index 000000000000..5fb33c9294bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
+
+/* Header file for Mellanox BlueField GigE register defines
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#ifndef __MLXBF_GIGE_REGS_H__
+#define __MLXBF_GIGE_REGS_H__
+
+#define MLXBF_GIGE_STATUS                             0x0010
+#define MLXBF_GIGE_STATUS_READY                       BIT(0)
+#define MLXBF_GIGE_INT_STATUS                         0x0028
+#define MLXBF_GIGE_INT_STATUS_RX_RECEIVE_PACKET       BIT(0)
+#define MLXBF_GIGE_INT_STATUS_RX_MAC_ERROR            BIT(1)
+#define MLXBF_GIGE_INT_STATUS_RX_TRN_ERROR            BIT(2)
+#define MLXBF_GIGE_INT_STATUS_SW_ACCESS_ERROR         BIT(3)
+#define MLXBF_GIGE_INT_STATUS_SW_CONFIG_ERROR         BIT(4)
+#define MLXBF_GIGE_INT_STATUS_TX_PI_CI_EXCEED_WQ_SIZE BIT(5)
+#define MLXBF_GIGE_INT_STATUS_TX_SMALL_FRAME_SIZE     BIT(6)
+#define MLXBF_GIGE_INT_STATUS_TX_CHECKSUM_INPUTS      BIT(7)
+#define MLXBF_GIGE_INT_STATUS_HW_ACCESS_ERROR         BIT(8)
+#define MLXBF_GIGE_INT_EN                             0x0030
+#define MLXBF_GIGE_INT_EN_RX_RECEIVE_PACKET           BIT(0)
+#define MLXBF_GIGE_INT_EN_RX_MAC_ERROR                BIT(1)
+#define MLXBF_GIGE_INT_EN_RX_TRN_ERROR                BIT(2)
+#define MLXBF_GIGE_INT_EN_SW_ACCESS_ERROR             BIT(3)
+#define MLXBF_GIGE_INT_EN_SW_CONFIG_ERROR             BIT(4)
+#define MLXBF_GIGE_INT_EN_TX_PI_CI_EXCEED_WQ_SIZE     BIT(5)
+#define MLXBF_GIGE_INT_EN_TX_SMALL_FRAME_SIZE         BIT(6)
+#define MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS          BIT(7)
+#define MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR             BIT(8)
+#define MLXBF_GIGE_INT_MASK                           0x0038
+#define MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET         BIT(0)
+#define MLXBF_GIGE_CONTROL                            0x0040
+#define MLXBF_GIGE_CONTROL_PORT_EN                    BIT(0)
+#define MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN            BIT(1)
+#define MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC            BIT(4)
+#define MLXBF_GIGE_CONTROL_CLEAN_PORT_EN              BIT(31)
+#define MLXBF_GIGE_RX_WQ_BASE                         0x0200
+#define MLXBF_GIGE_RX_WQE_SIZE_LOG2                   0x0208
+#define MLXBF_GIGE_RX_WQE_SIZE_LOG2_RESET_VAL         7
+#define MLXBF_GIGE_RX_CQ_BASE                         0x0210
+#define MLXBF_GIGE_TX_WQ_BASE                         0x0218
+#define MLXBF_GIGE_TX_WQ_SIZE_LOG2                    0x0220
+#define MLXBF_GIGE_TX_WQ_SIZE_LOG2_RESET_VAL          7
+#define MLXBF_GIGE_TX_CI_UPDATE_ADDRESS               0x0228
+#define MLXBF_GIGE_RX_WQE_PI                          0x0230
+#define MLXBF_GIGE_TX_PRODUCER_INDEX                  0x0238
+#define MLXBF_GIGE_RX_MAC_FILTER                      0x0240
+#define MLXBF_GIGE_RX_MAC_FILTER_STRIDE               0x0008
+#define MLXBF_GIGE_RX_DIN_DROP_COUNTER                0x0260
+#define MLXBF_GIGE_TX_CONSUMER_INDEX                  0x0310
+#define MLXBF_GIGE_TX_CONTROL                         0x0318
+#define MLXBF_GIGE_TX_CONTROL_GRACEFUL_STOP           BIT(0)
+#define MLXBF_GIGE_TX_STATUS                          0x0388
+#define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL           BIT(1)
+#define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START     0x0520
+#define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END       0x0528
+#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC           0x0540
+#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN        BIT(0)
+#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS           0x0548
+#define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS_EN        BIT(0)
+#define MLXBF_GIGE_RX_PASS_COUNTER_ALL                0x0550
+#define MLXBF_GIGE_RX_DISC_COUNTER_ALL                0x0560
+#define MLXBF_GIGE_RX                                 0x0578
+#define MLXBF_GIGE_RX_STRIP_CRC_EN                    BIT(1)
+#define MLXBF_GIGE_RX_DMA                             0x0580
+#define MLXBF_GIGE_RX_DMA_EN                          BIT(0)
+#define MLXBF_GIGE_RX_CQE_PACKET_CI                   0x05b0
+#define MLXBF_GIGE_MAC_CFG                            0x05e8
+
+/* NOTE: MLXBF_GIGE_MAC_CFG is the last defined register offset,
+ * so use that plus size of single register to derive total size
+ */
+#define MLXBF_GIGE_MMIO_REG_SZ                        (MLXBF_GIGE_MAC_CFG + 8)
+
+#endif /* !defined(__MLXBF_GIGE_REGS_H__) */
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
new file mode 100644
index 000000000000..afa3b92a6905
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+/* Packet receive logic for Mellanox Gigabit Ethernet driver
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+
+#include "mlxbf_gige.h"
+#include "mlxbf_gige_regs.h"
+
+void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
+				  unsigned int index, u64 dmac)
+{
+	void __iomem *base = priv->base;
+	u64 control;
+
+	/* Write destination MAC to specified MAC RX filter */
+	writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
+	       (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
+
+	/* Enable MAC receive filter mask for specified index */
+	control = readq(base + MLXBF_GIGE_CONTROL);
+	control |= (MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index);
+	writeq(control, base + MLXBF_GIGE_CONTROL);
+}
+
+void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
+				  unsigned int index, u64 *dmac)
+{
+	void __iomem *base = priv->base;
+
+	/* Read destination MAC from specified MAC RX filter */
+	*dmac = readq(base + MLXBF_GIGE_RX_MAC_FILTER +
+		      (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
+}
+
+void mlxbf_gige_enable_promisc(struct mlxbf_gige *priv)
+{
+	void __iomem *base = priv->base;
+	u64 control;
+	u64 end_mac;
+
+	/* Enable MAC_ID_RANGE match functionality */
+	control = readq(base + MLXBF_GIGE_CONTROL);
+	control |= MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN;
+	writeq(control, base + MLXBF_GIGE_CONTROL);
+
+	/* Set start of destination MAC range check to 0 */
+	writeq(0, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START);
+
+	/* Set end of destination MAC range check to all FFs */
+	end_mac = BCAST_MAC_ADDR;
+	writeq(end_mac, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END);
+}
+
+void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv)
+{
+	void __iomem *base = priv->base;
+	u64 control;
+
+	/* Disable MAC_ID_RANGE match functionality */
+	control = readq(base + MLXBF_GIGE_CONTROL);
+	control &= ~MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN;
+	writeq(control, base + MLXBF_GIGE_CONTROL);
+
+	/* NOTE: no need to change DMAC_RANGE_START or END;
+	 * those values are ignored since MAC_ID_RANGE_EN=0
+	 */
+}
+
+/* Receive Initialization
+ * 1) Configures RX MAC filters via MMIO registers
+ * 2) Allocates RX WQE array using coherent DMA mapping
+ * 3) Initializes each element of RX WQE array with a receive
+ *    buffer pointer (also using coherent DMA mapping)
+ * 4) Allocates RX CQE array using coherent DMA mapping
+ * 5) Completes other misc receive initialization
+ */
+int mlxbf_gige_rx_init(struct mlxbf_gige *priv)
+{
+	size_t wq_size, cq_size;
+	dma_addr_t *rx_wqe_ptr;
+	dma_addr_t rx_buf_dma;
+	u64 data;
+	int i, j;
+
+	/* Configure MAC RX filter #0 to allow RX of broadcast pkts */
+	mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX,
+				     BCAST_MAC_ADDR);
+
+	wq_size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries;
+	priv->rx_wqe_base = dma_alloc_coherent(priv->dev, wq_size,
+					       &priv->rx_wqe_base_dma,
+					       GFP_KERNEL);
+	if (!priv->rx_wqe_base)
+		return -ENOMEM;
+
+	/* Initialize 'rx_wqe_ptr' to point to first RX WQE in array
+	 * Each RX WQE is simply a receive buffer pointer, so walk
+	 * the entire array, allocating a 2KB buffer for each element
+	 */
+	rx_wqe_ptr = priv->rx_wqe_base;
+
+	for (i = 0; i < priv->rx_q_entries; i++) {
+		priv->rx_skb[i] = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ,
+						       &rx_buf_dma, DMA_FROM_DEVICE);
+		if (!priv->rx_skb[i])
+			goto free_wqe_and_skb;
+		*rx_wqe_ptr++ = rx_buf_dma;
+	}
+
+	/* Write RX WQE base address into MMIO reg */
+	writeq(priv->rx_wqe_base_dma, priv->base + MLXBF_GIGE_RX_WQ_BASE);
+
+	cq_size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries;
+	priv->rx_cqe_base = dma_alloc_coherent(priv->dev, cq_size,
+					       &priv->rx_cqe_base_dma,
+					       GFP_KERNEL);
+	if (!priv->rx_cqe_base)
+		goto free_wqe_and_skb;
+
+	for (i = 0; i < priv->rx_q_entries; i++)
+		priv->rx_cqe_base[i] |= MLXBF_GIGE_RX_CQE_VALID_MASK;
+
+	/* Write RX CQE base address into MMIO reg */
+	writeq(priv->rx_cqe_base_dma, priv->base + MLXBF_GIGE_RX_CQ_BASE);
+
+	/* Write RX_WQE_PI with current number of replenished buffers */
+	writeq(priv->rx_q_entries, priv->base + MLXBF_GIGE_RX_WQE_PI);
+
+	/* Enable removal of CRC during RX */
+	data = readq(priv->base + MLXBF_GIGE_RX);
+	data |= MLXBF_GIGE_RX_STRIP_CRC_EN;
+	writeq(data, priv->base + MLXBF_GIGE_RX);
+
+	/* Enable RX MAC filter pass and discard counters */
+	writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN,
+	       priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC);
+	writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS_EN,
+	       priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS);
+
+	/* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to
+	 * indicate readiness to receive interrupts
+	 */
+	data = readq(priv->base + MLXBF_GIGE_INT_MASK);
+	data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET;
+	writeq(data, priv->base + MLXBF_GIGE_INT_MASK);
+
+	/* Enable RX DMA to write new packets to memory */
+	data = readq(priv->base + MLXBF_GIGE_RX_DMA);
+	data |= MLXBF_GIGE_RX_DMA_EN;
+	writeq(data, priv->base + MLXBF_GIGE_RX_DMA);
+
+	writeq(ilog2(priv->rx_q_entries),
+	       priv->base + MLXBF_GIGE_RX_WQE_SIZE_LOG2);
+
+	return 0;
+
+free_wqe_and_skb:
+	rx_wqe_ptr = priv->rx_wqe_base;
+	for (j = 0; j < i; j++) {
+		dma_unmap_single(priv->dev, *rx_wqe_ptr,
+				 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
+		dev_kfree_skb(priv->rx_skb[j]);
+		rx_wqe_ptr++;
+	}
+	dma_free_coherent(priv->dev, wq_size,
+			  priv->rx_wqe_base, priv->rx_wqe_base_dma);
+	return -ENOMEM;
+}
+
+/* Receive Deinitialization
+ * This routine will free allocations done by mlxbf_gige_rx_init(),
+ * namely the RX WQE and RX CQE arrays, as well as all RX buffers
+ */
+void mlxbf_gige_rx_deinit(struct mlxbf_gige *priv)
+{
+	dma_addr_t *rx_wqe_ptr;
+	size_t size;
+	u64 data;
+	int i;
+
+	/* Disable RX DMA to prevent packet transfers to memory */
+	data = readq(priv->base + MLXBF_GIGE_RX_DMA);
+	data &= ~MLXBF_GIGE_RX_DMA_EN;
+	writeq(data, priv->base + MLXBF_GIGE_RX_DMA);
+
+	rx_wqe_ptr = priv->rx_wqe_base;
+
+	for (i = 0; i < priv->rx_q_entries; i++) {
+		dma_unmap_single(priv->dev, *rx_wqe_ptr, MLXBF_GIGE_DEFAULT_BUF_SZ,
+				 DMA_FROM_DEVICE);
+		dev_kfree_skb(priv->rx_skb[i]);
+		rx_wqe_ptr++;
+	}
+
+	size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries;
+	dma_free_coherent(priv->dev, size,
+			  priv->rx_wqe_base, priv->rx_wqe_base_dma);
+
+	size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries;
+	dma_free_coherent(priv->dev, size,
+			  priv->rx_cqe_base, priv->rx_cqe_base_dma);
+
+	priv->rx_wqe_base = NULL;
+	priv->rx_wqe_base_dma = 0;
+	priv->rx_cqe_base = NULL;
+	priv->rx_cqe_base_dma = 0;
+	writeq(0, priv->base + MLXBF_GIGE_RX_WQ_BASE);
+	writeq(0, priv->base + MLXBF_GIGE_RX_CQ_BASE);
+}
+
+static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
+{
+	struct net_device *netdev = priv->netdev;
+	struct sk_buff *skb = NULL, *rx_skb;
+	u16 rx_pi_rem, rx_ci_rem;
+	dma_addr_t *rx_wqe_addr;
+	dma_addr_t rx_buf_dma;
+	u64 *rx_cqe_addr;
+	u64 datalen;
+	u64 rx_cqe;
+	u16 rx_ci;
+	u16 rx_pi;
+
+	/* Index into RX buffer array is rx_pi w/wrap based on RX_CQE_SIZE */
+	rx_pi = readq(priv->base + MLXBF_GIGE_RX_WQE_PI);
+	rx_pi_rem = rx_pi % priv->rx_q_entries;
+
+	rx_wqe_addr = priv->rx_wqe_base + rx_pi_rem;
+	rx_cqe_addr = priv->rx_cqe_base + rx_pi_rem;
+	rx_cqe = *rx_cqe_addr;
+
+	if ((!!(rx_cqe & MLXBF_GIGE_RX_CQE_VALID_MASK)) != priv->valid_polarity)
+		return false;
+
+	if ((rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MASK) == 0) {
+		/* Packet is OK, increment stats */
+		datalen = rx_cqe & MLXBF_GIGE_RX_CQE_PKT_LEN_MASK;
+		netdev->stats.rx_packets++;
+		netdev->stats.rx_bytes += datalen;
+
+		skb = priv->rx_skb[rx_pi_rem];
+
+		skb_put(skb, datalen);
+
+		skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */
+
+		skb->protocol = eth_type_trans(skb, netdev);
+
+		/* Alloc another RX SKB for this same index */
+		rx_skb = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ,
+					      &rx_buf_dma, DMA_FROM_DEVICE);
+		if (!rx_skb)
+			return false;
+		priv->rx_skb[rx_pi_rem] = rx_skb;
+		dma_unmap_single(priv->dev, *rx_wqe_addr,
+				 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
+		*rx_wqe_addr = rx_buf_dma;
+	} else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) {
+		priv->stats.rx_mac_errors++;
+	} else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_TRUNCATED) {
+		priv->stats.rx_truncate_errors++;
+	}
+
+	/* Let hardware know we've replenished one buffer */
+	rx_pi++;
+
+	/* Ensure completion of all writes before notifying HW of replenish */
+	wmb();
+	writeq(rx_pi, priv->base + MLXBF_GIGE_RX_WQE_PI);
+
+	(*rx_pkts)++;
+
+	rx_pi_rem = rx_pi % priv->rx_q_entries;
+	if (rx_pi_rem == 0)
+		priv->valid_polarity ^= 1;
+	rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI);
+	rx_ci_rem = rx_ci % priv->rx_q_entries;
+
+	if (skb)
+		netif_receive_skb(skb);
+
+	return rx_pi_rem != rx_ci_rem;
+}
+
+/* Driver poll() function called by NAPI infrastructure */
+int mlxbf_gige_poll(struct napi_struct *napi, int budget)
+{
+	struct mlxbf_gige *priv;
+	bool remaining_pkts;
+	int work_done = 0;
+	u64 data;
+
+	priv = container_of(napi, struct mlxbf_gige, napi);
+
+	mlxbf_gige_handle_tx_complete(priv);
+
+	do {
+		remaining_pkts = mlxbf_gige_rx_packet(priv, &work_done);
+	} while (remaining_pkts && work_done < budget);
+
+	/* If amount of work done < budget, turn off NAPI polling
+	 * via napi_complete_done(napi, work_done) and then
+	 * re-enable interrupts.
+	 */
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		/* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to
+		 * indicate receive readiness
+		 */
+		data = readq(priv->base + MLXBF_GIGE_INT_MASK);
+		data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET;
+		writeq(data, priv->base + MLXBF_GIGE_INT_MASK);
+	}
+
+	return work_done;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c
new file mode 100644
index 000000000000..04982e888c63
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
+
+/* Packet transmit logic for Mellanox Gigabit Ethernet driver
+ *
+ * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/skbuff.h>
+
+#include "mlxbf_gige.h"
+#include "mlxbf_gige_regs.h"
+
+/* Transmit Initialization
+ * 1) Allocates TX WQE array using coherent DMA mapping
+ * 2) Allocates TX completion counter using coherent DMA mapping
+ */
+int mlxbf_gige_tx_init(struct mlxbf_gige *priv)
+{
+	size_t size;
+
+	size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
+	priv->tx_wqe_base = dma_alloc_coherent(priv->dev, size,
+					       &priv->tx_wqe_base_dma,
+					       GFP_KERNEL);
+	if (!priv->tx_wqe_base)
+		return -ENOMEM;
+
+	priv->tx_wqe_next = priv->tx_wqe_base;
+
+	/* Write TX WQE base address into MMIO reg */
+	writeq(priv->tx_wqe_base_dma, priv->base + MLXBF_GIGE_TX_WQ_BASE);
+
+	/* Allocate address for TX completion count */
+	priv->tx_cc = dma_alloc_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
+					 &priv->tx_cc_dma, GFP_KERNEL);
+	if (!priv->tx_cc) {
+		dma_free_coherent(priv->dev, size,
+				  priv->tx_wqe_base, priv->tx_wqe_base_dma);
+		return -ENOMEM;
+	}
+
+	/* Write TX CC base address into MMIO reg */
+	writeq(priv->tx_cc_dma, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
+
+	writeq(ilog2(priv->tx_q_entries),
+	       priv->base + MLXBF_GIGE_TX_WQ_SIZE_LOG2);
+
+	priv->prev_tx_ci = 0;
+	priv->tx_pi = 0;
+
+	return 0;
+}
+
+/* Transmit Deinitialization
+ * This routine will free allocations done by mlxbf_gige_tx_init(),
+ * namely the TX WQE array and the TX completion counter
+ */
+void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv)
+{
+	u64 *tx_wqe_addr;
+	size_t size;
+	int i;
+
+	tx_wqe_addr = priv->tx_wqe_base;
+
+	for (i = 0; i < priv->tx_q_entries; i++) {
+		if (priv->tx_skb[i]) {
+			dma_unmap_single(priv->dev, *tx_wqe_addr,
+					 priv->tx_skb[i]->len, DMA_TO_DEVICE);
+			dev_kfree_skb(priv->tx_skb[i]);
+			priv->tx_skb[i] = NULL;
+		}
+		tx_wqe_addr += 2;
+	}
+
+	size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
+	dma_free_coherent(priv->dev, size,
+			  priv->tx_wqe_base, priv->tx_wqe_base_dma);
+
+	dma_free_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
+			  priv->tx_cc, priv->tx_cc_dma);
+
+	priv->tx_wqe_base = NULL;
+	priv->tx_wqe_base_dma = 0;
+	priv->tx_cc = NULL;
+	priv->tx_cc_dma = 0;
+	priv->tx_wqe_next = NULL;
+	writeq(0, priv->base + MLXBF_GIGE_TX_WQ_BASE);
+	writeq(0, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
+}
+
+/* Function that returns status of TX ring:
+ *          0: TX ring is full, i.e. there are no
+ *             available un-used entries in TX ring.
+ *   non-null: TX ring is not full, i.e. there are
+ *             some available entries in TX ring.
+ *             The non-null value is a measure of
+ *             how many TX entries are available, but
+ *             it is not the exact number of available
+ *             entries (see below).
+ *
+ * The algorithm makes the assumption that if
+ * (prev_tx_ci == tx_pi) then the TX ring is empty.
+ * An empty ring actually has (tx_q_entries-1)
+ * entries, which allows the algorithm to differentiate
+ * the case of an empty ring vs. a full ring.
+ */
+static u16 mlxbf_gige_tx_buffs_avail(struct mlxbf_gige *priv)
+{
+	unsigned long flags;
+	u16 avail;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	if (priv->prev_tx_ci == priv->tx_pi)
+		avail = priv->tx_q_entries - 1;
+	else
+		avail = ((priv->tx_q_entries + priv->prev_tx_ci - priv->tx_pi)
+			  % priv->tx_q_entries) - 1;
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return avail;
+}
+
+bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv)
+{
+	struct net_device_stats *stats;
+	u16 tx_wqe_index;
+	u64 *tx_wqe_addr;
+	u64 tx_status;
+	u16 tx_ci;
+
+	tx_status = readq(priv->base + MLXBF_GIGE_TX_STATUS);
+	if (tx_status & MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL)
+		priv->stats.tx_fifo_full++;
+	tx_ci = readq(priv->base + MLXBF_GIGE_TX_CONSUMER_INDEX);
+	stats = &priv->netdev->stats;
+
+	/* Transmit completion logic needs to loop until the completion
+	 * index (in SW) equals TX consumer index (from HW).  These
+	 * parameters are unsigned 16-bit values and the wrap case needs
+	 * to be supported, that is TX consumer index wrapped from 0xFFFF
+	 * to 0 while TX completion index is still < 0xFFFF.
+	 */
+	for (; priv->prev_tx_ci != tx_ci; priv->prev_tx_ci++) {
+		tx_wqe_index = priv->prev_tx_ci % priv->tx_q_entries;
+		/* Each TX WQE is 16 bytes. The 8 MSB store the 2KB TX
+		 * buffer address and the 8 LSB contain information
+		 * about the TX WQE.
+		 */
+		tx_wqe_addr = priv->tx_wqe_base +
+			       (tx_wqe_index * MLXBF_GIGE_TX_WQE_SZ_QWORDS);
+
+		stats->tx_packets++;
+		stats->tx_bytes += MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr);
+
+		dma_unmap_single(priv->dev, *tx_wqe_addr,
+				 priv->tx_skb[tx_wqe_index]->len, DMA_TO_DEVICE);
+		dev_consume_skb_any(priv->tx_skb[tx_wqe_index]);
+		priv->tx_skb[tx_wqe_index] = NULL;
+
+		/* Ensure completion of updates across all cores */
+		mb();
+	}
+
+	/* Since the TX ring was likely just drained, check if TX queue
+	 * had previously been stopped and now that there are TX buffers
+	 * available the TX queue can be awakened.
+	 */
+	if (netif_queue_stopped(priv->netdev) &&
+	    mlxbf_gige_tx_buffs_avail(priv))
+		netif_wake_queue(priv->netdev);
+
+	return true;
+}
+
+/* Function to advance the tx_wqe_next pointer to next TX WQE */
+void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv)
+{
+	/* Advance tx_wqe_next pointer */
+	priv->tx_wqe_next += MLXBF_GIGE_TX_WQE_SZ_QWORDS;
+
+	/* Check if 'next' pointer is beyond end of TX ring */
+	/* If so, set 'next' back to 'base' pointer of ring */
+	if (priv->tx_wqe_next == (priv->tx_wqe_base +
+				  (priv->tx_q_entries * MLXBF_GIGE_TX_WQE_SZ_QWORDS)))
+		priv->tx_wqe_next = priv->tx_wqe_base;
+}
+
+netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct mlxbf_gige *priv = netdev_priv(netdev);
+	long buff_addr, start_dma_page, end_dma_page;
+	struct sk_buff *tx_skb;
+	dma_addr_t tx_buf_dma;
+	unsigned long flags;
+	u64 *tx_wqe_addr;
+	u64 word2;
+
+	/* If needed, linearize TX SKB as hardware DMA expects this */
+	if (skb->len > MLXBF_GIGE_DEFAULT_BUF_SZ || skb_linearize(skb)) {
+		dev_kfree_skb(skb);
+		netdev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	buff_addr = (long)skb->data;
+	start_dma_page = buff_addr >> MLXBF_GIGE_DMA_PAGE_SHIFT;
+	end_dma_page   = (buff_addr + skb->len - 1) >> MLXBF_GIGE_DMA_PAGE_SHIFT;
+
+	/* Verify that payload pointer and data length of SKB to be
+	 * transmitted does not violate the hardware DMA limitation.
+	 */
+	if (start_dma_page != end_dma_page) {
+		/* DMA operation would fail as-is, alloc new aligned SKB */
+		tx_skb = mlxbf_gige_alloc_skb(priv, skb->len,
+					      &tx_buf_dma, DMA_TO_DEVICE);
+		if (!tx_skb) {
+			/* Free original skb, could not alloc new aligned SKB */
+			dev_kfree_skb(skb);
+			netdev->stats.tx_dropped++;
+			return NETDEV_TX_OK;
+		}
+
+		skb_put_data(tx_skb, skb->data, skb->len);
+
+		/* Free the original SKB */
+		dev_kfree_skb(skb);
+	} else {
+		tx_skb = skb;
+		tx_buf_dma = dma_map_single(priv->dev, skb->data,
+					    skb->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, tx_buf_dma)) {
+			dev_kfree_skb(skb);
+			netdev->stats.tx_dropped++;
+			return NETDEV_TX_OK;
+		}
+	}
+
+	/* Get address of TX WQE */
+	tx_wqe_addr = priv->tx_wqe_next;
+
+	mlxbf_gige_update_tx_wqe_next(priv);
+
+	/* Put PA of buffer address into first 64-bit word of TX WQE */
+	*tx_wqe_addr = tx_buf_dma;
+
+	/* Set TX WQE pkt_len appropriately
+	 * NOTE: GigE silicon will automatically pad up to
+	 *       minimum packet length if needed.
+	 */
+	word2 = tx_skb->len & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK;
+
+	/* Write entire 2nd word of TX WQE */
+	*(tx_wqe_addr + 1) = word2;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	priv->tx_skb[priv->tx_pi % priv->tx_q_entries] = tx_skb;
+	priv->tx_pi++;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (!netdev_xmit_more()) {
+		/* Create memory barrier before write to TX PI */
+		wmb();
+		writeq(priv->tx_pi, priv->base + MLXBF_GIGE_TX_PRODUCER_INDEX);
+	}
+
+	/* Check if the last TX entry was just used */
+	if (!mlxbf_gige_tx_buffs_avail(priv)) {
+		/* TX ring is full, inform stack */
+		netif_stop_queue(netdev);
+
+		/* Since there is no separate "TX complete" interrupt, need
+		 * to explicitly schedule NAPI poll.  This will trigger logic
+		 * which processes TX completions, and will hopefully drain
+		 * the TX ring allowing the TX queue to be awakened.
+		 */
+		napi_schedule(&priv->napi);
+	}
+
+	return NETDEV_TX_OK;
+}

From 1db1a862a08f85edc36aad091236ac9b818e949e Mon Sep 17 00:00:00 2001
From: Bailey Forrest <bcf@google.com>
Date: Thu, 24 Jun 2021 19:55:41 -0700
Subject: [PATCH 2881/3292] gve: Fix swapped vars when fetching max queues

Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC")
Signed-off-by: Bailey Forrest <bcf@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index bbc423e93122..79cefe85a799 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1295,8 +1295,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	gve_write_version(&reg_bar->driver_version);
 	/* Get max queues to alloc etherdev */
-	max_rx_queues = ioread32be(&reg_bar->max_tx_queues);
-	max_tx_queues = ioread32be(&reg_bar->max_rx_queues);
+	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
+	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
 	/* Alloc and setup the netdev and priv */
 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
 	if (!dev) {

From 172db5f91d5f7b91670c68a7547798b0b5374158 Mon Sep 17 00:00:00 2001
From: Maciej Machnikowski <maciej.machnikowski@intel.com>
Date: Wed, 16 Jun 2021 09:35:22 -0700
Subject: [PATCH 2882/3292] ice: add support for auxiliary input/output pins

The E810 device supports programmable pins for enabling both input and
output events related to the PTP hardware clock. This includes both
output signals with programmable period, as well as timestamping of
events on input pins.

Add support for enabling these using the CONFIG_PTP_1588_CLOCK
interface.

This allows programming the software defined pins to take advantage of
the hardware clock features.

Signed-off-by: Maciej Machnikowski <maciej.machnikowski@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |  18 ++
 drivers/net/ethernet/intel/ice/ice_main.c     |  12 +
 drivers/net/ethernet/intel/ice/ice_ptp.c      | 293 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_ptp.h      |  43 +++
 4 files changed, 366 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 6989a76c42a7..76021d977b60 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -138,6 +138,10 @@
 #define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S		4
 #define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M		ICE_M(0x3, 4)
 #define GLGEN_CLKSTAT_SRC			0x000B826C
+#define GLGEN_GPIO_CTL(_i)			(0x000880C8 + ((_i) * 4))
+#define GLGEN_GPIO_CTL_PIN_DIR_M		BIT(4)
+#define GLGEN_GPIO_CTL_PIN_FUNC_S		8
+#define GLGEN_GPIO_CTL_PIN_FUNC_M		ICE_M(0xF, 8)
 #define GLGEN_RSTAT				0x000B8188
 #define GLGEN_RSTAT_DEVSTATE_M			ICE_M(0x3, 0)
 #define GLGEN_RSTCTL				0x000B8180
@@ -203,6 +207,7 @@
 #define PFINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
 #define PFINT_OICR				0x0016CA00
 #define PFINT_OICR_TSYN_TX_M			BIT(11)
+#define PFINT_OICR_TSYN_EVNT_M			BIT(12)
 #define PFINT_OICR_ECC_ERR_M			BIT(16)
 #define PFINT_OICR_MAL_DETECT_M			BIT(19)
 #define PFINT_OICR_GRST_M			BIT(20)
@@ -434,10 +439,18 @@
 #define GLV_UPRCL(_i)				(0x003B2000 + ((_i) * 8))
 #define GLV_UPTCL(_i)				(0x0030A000 + ((_i) * 8))
 #define PRTRPB_RDPC				0x000AC260
+#define GLTSYN_AUX_IN_0(_i)			(0x000889D8 + ((_i) * 4))
+#define GLTSYN_AUX_IN_0_INT_ENA_M		BIT(4)
+#define GLTSYN_AUX_OUT_0(_i)			(0x00088998 + ((_i) * 4))
+#define GLTSYN_AUX_OUT_0_OUT_ENA_M		BIT(0)
+#define GLTSYN_AUX_OUT_0_OUTMOD_M		ICE_M(0x3, 1)
+#define GLTSYN_CLKO_0(_i)			(0x000889B8 + ((_i) * 4))
 #define GLTSYN_CMD				0x00088810
 #define GLTSYN_CMD_SYNC				0x00088814
 #define GLTSYN_ENA(_i)				(0x00088808 + ((_i) * 4))
 #define GLTSYN_ENA_TSYN_ENA_M			BIT(0)
+#define GLTSYN_EVNT_H_0(_i)			(0x00088970 + ((_i) * 4))
+#define GLTSYN_EVNT_L_0(_i)			(0x00088968 + ((_i) * 4))
 #define GLTSYN_INCVAL_H(_i)			(0x00088920 + ((_i) * 4))
 #define GLTSYN_INCVAL_L(_i)			(0x00088918 + ((_i) * 4))
 #define GLTSYN_SHADJ_H(_i)			(0x00088910 + ((_i) * 4))
@@ -446,7 +459,12 @@
 #define GLTSYN_SHTIME_H(_i)			(0x000888F0 + ((_i) * 4))
 #define GLTSYN_SHTIME_L(_i)			(0x000888E8 + ((_i) * 4))
 #define GLTSYN_STAT(_i)				(0x000888C0 + ((_i) * 4))
+#define GLTSYN_STAT_EVENT0_M			BIT(0)
+#define GLTSYN_STAT_EVENT1_M			BIT(1)
+#define GLTSYN_STAT_EVENT2_M			BIT(2)
 #define GLTSYN_SYNC_DLAY			0x00088818
+#define GLTSYN_TGT_H_0(_i)			(0x00088930 + ((_i) * 4))
+#define GLTSYN_TGT_L_0(_i)			(0x00088928 + ((_i) * 4))
 #define GLTSYN_TIME_H(_i)			(0x000888D8 + ((_i) * 4))
 #define GLTSYN_TIME_L(_i)			(0x000888D0 + ((_i) * 4))
 #define PFTSYN_SEM				0x00088880
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index b72ab9e97e79..ef8d1815af56 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2817,6 +2817,18 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		ice_ptp_process_ts(pf);
 	}
 
+	if (oicr & PFINT_OICR_TSYN_EVNT_M) {
+		u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+		u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx));
+
+		/* Save EVENTs from GTSYN register */
+		pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M |
+						     GLTSYN_STAT_EVENT1_M |
+						     GLTSYN_STAT_EVENT2_M);
+		ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
+		kthread_queue_work(pf->ptp.kworker, &pf->ptp.extts_work);
+	}
+
 #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
 	if (oicr & ICE_AUX_CRIT_ERR) {
 		struct iidc_event *event;
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 609f433a4b96..5d5207b56ca9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -4,6 +4,8 @@
 #include "ice.h"
 #include "ice_lib.h"
 
+#define E810_OUT_PROP_DELAY_NS 1
+
 /**
  * ice_set_tx_tstamp - Enable or disable Tx timestamping
  * @pf: The PF pointer to search in
@@ -483,6 +485,255 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm)
 	return 0;
 }
 
+/**
+ * ice_ptp_extts_work - Workqueue task function
+ * @work: external timestamp work structure
+ *
+ * Service for PTP external clock event
+ */
+static void ice_ptp_extts_work(struct kthread_work *work)
+{
+	struct ice_ptp *ptp = container_of(work, struct ice_ptp, extts_work);
+	struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
+	struct ptp_clock_event event;
+	struct ice_hw *hw = &pf->hw;
+	u8 chan, tmr_idx;
+	u32 hi, lo;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	/* Event time is captured by one of the two matched registers
+	 *      GLTSYN_EVNT_L: 32 LSB of sampled time event
+	 *      GLTSYN_EVNT_H: 32 MSB of sampled time event
+	 * Event is defined in GLTSYN_EVNT_0 register
+	 */
+	for (chan = 0; chan < GLTSYN_EVNT_H_IDX_MAX; chan++) {
+		/* Check if channel is enabled */
+		if (pf->ptp.ext_ts_irq & (1 << chan)) {
+			lo = rd32(hw, GLTSYN_EVNT_L(chan, tmr_idx));
+			hi = rd32(hw, GLTSYN_EVNT_H(chan, tmr_idx));
+			event.timestamp = (((u64)hi) << 32) | lo;
+			event.type = PTP_CLOCK_EXTTS;
+			event.index = chan;
+
+			/* Fire event */
+			ptp_clock_event(pf->ptp.clock, &event);
+			pf->ptp.ext_ts_irq &= ~(1 << chan);
+		}
+	}
+}
+
+/**
+ * ice_ptp_cfg_extts - Configure EXTTS pin and channel
+ * @pf: Board private structure
+ * @ena: true to enable; false to disable
+ * @chan: GPIO channel (0-3)
+ * @gpio_pin: GPIO pin
+ * @extts_flags: request flags from the ptp_extts_request.flags
+ */
+static int
+ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
+		  unsigned int extts_flags)
+{
+	u32 func, aux_reg, gpio_reg, irq_reg;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+
+	if (chan > (unsigned int)pf->ptp.info.n_ext_ts)
+		return -EINVAL;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	irq_reg = rd32(hw, PFINT_OICR_ENA);
+
+	if (ena) {
+		/* Enable the interrupt */
+		irq_reg |= PFINT_OICR_TSYN_EVNT_M;
+		aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M;
+
+#define GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE	BIT(0)
+#define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE	BIT(1)
+
+		/* set event level to requested edge */
+		if (extts_flags & PTP_FALLING_EDGE)
+			aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE;
+		if (extts_flags & PTP_RISING_EDGE)
+			aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE;
+
+		/* Write GPIO CTL reg.
+		 * 0x1 is input sampled by EVENT register(channel)
+		 * + num_in_channels * tmr_idx
+		 */
+		func = 1 + chan + (tmr_idx * 3);
+		gpio_reg = ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) &
+			    GLGEN_GPIO_CTL_PIN_FUNC_M);
+		pf->ptp.ext_ts_chan |= (1 << chan);
+	} else {
+		/* clear the values we set to reset defaults */
+		aux_reg = 0;
+		gpio_reg = 0;
+		pf->ptp.ext_ts_chan &= ~(1 << chan);
+		if (!pf->ptp.ext_ts_chan)
+			irq_reg &= ~PFINT_OICR_TSYN_EVNT_M;
+	}
+
+	wr32(hw, PFINT_OICR_ENA, irq_reg);
+	wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg);
+	wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_cfg_clkout - Configure clock to generate periodic wave
+ * @pf: Board private structure
+ * @chan: GPIO channel (0-3)
+ * @config: desired periodic clk configuration. NULL will disable channel
+ * @store: If set to true the values will be stored
+ *
+ * Configure the internal clock generator modules to generate the clock wave of
+ * specified period.
+ */
+static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan,
+			      struct ice_perout_channel *config, bool store)
+{
+	u64 current_time, period, start_time, phase;
+	struct ice_hw *hw = &pf->hw;
+	u32 func, val, gpio_pin;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* 0. Reset mode & out_en in AUX_OUT */
+	wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0);
+
+	/* If we're disabling the output, clear out CLKO and TGT and keep
+	 * output level low
+	 */
+	if (!config || !config->ena) {
+		wr32(hw, GLTSYN_CLKO(chan, tmr_idx), 0);
+		wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), 0);
+		wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), 0);
+
+		val = GLGEN_GPIO_CTL_PIN_DIR_M;
+		gpio_pin = pf->ptp.perout_channels[chan].gpio_pin;
+		wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val);
+
+		/* Store the value if requested */
+		if (store)
+			memset(&pf->ptp.perout_channels[chan], 0,
+			       sizeof(struct ice_perout_channel));
+
+		return 0;
+	}
+	period = config->period;
+	start_time = config->start_time;
+	div64_u64_rem(start_time, period, &phase);
+	gpio_pin = config->gpio_pin;
+
+	/* 1. Write clkout with half of required period value */
+	if (period & 0x1) {
+		dev_err(ice_pf_to_dev(pf), "CLK Period must be an even value\n");
+		goto err;
+	}
+
+	period >>= 1;
+
+	/* For proper operation, the GLTSYN_CLKO must be larger than clock tick
+	 */
+#define MIN_PULSE 3
+	if (period <= MIN_PULSE || period > U32_MAX) {
+		dev_err(ice_pf_to_dev(pf), "CLK Period must be > %d && < 2^33",
+			MIN_PULSE * 2);
+		goto err;
+	}
+
+	wr32(hw, GLTSYN_CLKO(chan, tmr_idx), lower_32_bits(period));
+
+	/* Allow time for programming before start_time is hit */
+	current_time = ice_ptp_read_src_clk_reg(pf, NULL);
+
+	/* if start time is in the past start the timer at the nearest second
+	 * maintaining phase
+	 */
+	if (start_time < current_time)
+		start_time = div64_u64(current_time + NSEC_PER_MSEC - 1,
+				       NSEC_PER_SEC) * NSEC_PER_SEC + phase;
+
+	start_time -= E810_OUT_PROP_DELAY_NS;
+
+	/* 2. Write TARGET time */
+	wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), lower_32_bits(start_time));
+	wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), upper_32_bits(start_time));
+
+	/* 3. Write AUX_OUT register */
+	val = GLTSYN_AUX_OUT_0_OUT_ENA_M | GLTSYN_AUX_OUT_0_OUTMOD_M;
+	wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), val);
+
+	/* 4. write GPIO CTL reg */
+	func = 8 + chan + (tmr_idx * 4);
+	val = GLGEN_GPIO_CTL_PIN_DIR_M |
+	      ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) & GLGEN_GPIO_CTL_PIN_FUNC_M);
+	wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val);
+
+	/* Store the value if requested */
+	if (store) {
+		memcpy(&pf->ptp.perout_channels[chan], config,
+		       sizeof(struct ice_perout_channel));
+		pf->ptp.perout_channels[chan].start_time = phase;
+	}
+
+	return 0;
+err:
+	dev_err(ice_pf_to_dev(pf), "PTP failed to cfg per_clk\n");
+	return -EFAULT;
+}
+
+/**
+ * ice_ptp_gpio_enable_e810 - Enable/disable ancillary features of PHC
+ * @info: the driver's PTP info structure
+ * @rq: The requested feature to change
+ * @on: Enable/disable flag
+ */
+static int
+ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
+			 struct ptp_clock_request *rq, int on)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_perout_channel clk_cfg = {0};
+	unsigned int chan;
+	u32 gpio_pin;
+	int err;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PEROUT:
+		chan = rq->perout.index;
+		if (chan == PPS_CLK_GEN_CHAN)
+			clk_cfg.gpio_pin = PPS_PIN_INDEX;
+		else
+			clk_cfg.gpio_pin = chan;
+
+		clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) +
+				   rq->perout.period.nsec);
+		clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) +
+				       rq->perout.start.nsec);
+		clk_cfg.ena = !!on;
+
+		err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true);
+		break;
+	case PTP_CLK_REQ_EXTTS:
+		chan = rq->extts.index;
+		gpio_pin = chan;
+
+		err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin,
+					rq->extts.flags);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
 /**
  * ice_ptp_gettimex64 - Get the time of the clock
  * @info: the driver's PTP info structure
@@ -740,6 +991,34 @@ ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring,
 	}
 }
 
+/**
+ * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs
+ * @info: PTP clock capabilities
+ */
+static void ice_ptp_setup_pins_e810(struct ptp_clock_info *info)
+{
+	info->n_per_out = E810_N_PER_OUT;
+	info->n_ext_ts = E810_N_EXT_TS;
+}
+
+/**
+ * ice_ptp_set_funcs_e810 - Set specialized functions for E810 support
+ * @pf: Board private structure
+ * @info: PTP info to fill
+ *
+ * Assign functions to the PTP capabiltiies structure for E810 devices.
+ * Functions which operate across all device families should be set directly
+ * in ice_ptp_set_caps. Only add functions here which are distinct for e810
+ * devices.
+ */
+static void
+ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+	info->enable = ice_ptp_gpio_enable_e810;
+
+	ice_ptp_setup_pins_e810(info);
+}
+
 /**
  * ice_ptp_set_caps - Set PTP capabilities
  * @pf: Board private structure
@@ -757,6 +1036,8 @@ static void ice_ptp_set_caps(struct ice_pf *pf)
 	info->adjfine = ice_ptp_adjfine;
 	info->gettimex64 = ice_ptp_gettimex64;
 	info->settime64 = ice_ptp_settime64;
+
+	ice_ptp_set_funcs_e810(pf, info);
 }
 
 /**
@@ -783,6 +1064,17 @@ static long ice_ptp_create_clock(struct ice_pf *pf)
 	info = &pf->ptp.info;
 	dev = ice_pf_to_dev(pf);
 
+	/* Allocate memory for kernel pins interface */
+	if (info->n_pins) {
+		info->pin_config = devm_kcalloc(dev, info->n_pins,
+						sizeof(*info->pin_config),
+						GFP_KERNEL);
+		if (!info->pin_config) {
+			info->n_pins = 0;
+			return -ENOMEM;
+		}
+	}
+
 	/* Attempt to register the clock before enabling the hardware. */
 	clock = ptp_clock_register(info, dev);
 	if (IS_ERR(clock))
@@ -1203,6 +1495,7 @@ void ice_ptp_init(struct ice_pf *pf)
 
 	/* Initialize work functions */
 	kthread_init_delayed_work(&pf->ptp.work, ice_ptp_periodic_work);
+	kthread_init_work(&pf->ptp.extts_work, ice_ptp_extts_work);
 
 	/* Allocate a kworker for handling work required for the ports
 	 * connected to the PTP hardware clock.
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index d01507eba036..e1c787bd5b96 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -9,6 +9,21 @@
 
 #include "ice_ptp_hw.h"
 
+enum ice_ptp_pin {
+	GPIO_20 = 0,
+	GPIO_21,
+	GPIO_22,
+	GPIO_23,
+	NUM_ICE_PTP_PIN
+};
+
+struct ice_perout_channel {
+	bool ena;
+	u32 gpio_pin;
+	u64 period;
+	u64 start_time;
+};
+
 /* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp
  * is stored in a buffer of registers. Depending on the specific hardware,
  * this buffer might be shared across multiple PHY ports.
@@ -82,12 +97,18 @@ struct ice_ptp_port {
 	struct ice_ptp_tx tx;
 };
 
+#define GLTSYN_TGT_H_IDX_MAX		4
+
 /**
  * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK
  * @port: data for the PHY port initialization procedure
  * @work: delayed work function for periodic tasks
+ * @extts_work: work function for handling external Tx timestamps
  * @cached_phc_time: a cached copy of the PHC time for timestamp extension
+ * @ext_ts_chan: the external timestamp channel in use
+ * @ext_ts_irq: the external timestamp IRQ in use
  * @kworker: kwork thread for handling periodic work
+ * @perout_channels: periodic output data
  * @info: structure defining PTP hardware capabilities
  * @clock: pointer to registered PTP clock device
  * @tstamp_config: hardware timestamping configuration
@@ -95,8 +116,12 @@ struct ice_ptp_port {
 struct ice_ptp {
 	struct ice_ptp_port port;
 	struct kthread_delayed_work work;
+	struct kthread_work extts_work;
 	u64 cached_phc_time;
+	u8 ext_ts_chan;
+	u8 ext_ts_irq;
 	struct kthread_worker *kworker;
+	struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX];
 	struct ptp_clock_info info;
 	struct ptp_clock *clock;
 	struct hwtstamp_config tstamp_config;
@@ -115,6 +140,24 @@ struct ice_ptp {
 #define PTP_SHARED_CLK_IDX_VALID	BIT(31)
 #define ICE_PTP_TS_VALID		BIT(0)
 
+/* Per-channel register definitions */
+#define GLTSYN_AUX_OUT(_chan, _idx)	(GLTSYN_AUX_OUT_0(_idx) + ((_chan) * 8))
+#define GLTSYN_AUX_IN(_chan, _idx)	(GLTSYN_AUX_IN_0(_idx) + ((_chan) * 8))
+#define GLTSYN_CLKO(_chan, _idx)	(GLTSYN_CLKO_0(_idx) + ((_chan) * 8))
+#define GLTSYN_TGT_L(_chan, _idx)	(GLTSYN_TGT_L_0(_idx) + ((_chan) * 16))
+#define GLTSYN_TGT_H(_chan, _idx)	(GLTSYN_TGT_H_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_L(_chan, _idx)	(GLTSYN_EVNT_L_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_H(_chan, _idx)	(GLTSYN_EVNT_H_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_H_IDX_MAX		3
+
+/* Pin definitions for PTP PPS out */
+#define PPS_CLK_GEN_CHAN		3
+#define PPS_CLK_SRC_CHAN		2
+#define PPS_PIN_INDEX			5
+#define TIME_SYNC_PIN_INDEX		4
+#define E810_N_EXT_TS			3
+#define E810_N_PER_OUT			4
+
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 struct ice_pf;
 int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);

From 37c592062b16d349dc2344936ee6100265d327a0 Mon Sep 17 00:00:00 2001
From: Victor Raj <victor.raj@intel.com>
Date: Mon, 14 Jun 2021 14:46:06 -0700
Subject: [PATCH 2883/3292] ice: remove the VSI info from previous agg

Remove the VSI info from previous aggregator after moving the VSI to a
new aggregator.

Signed-off-by: Victor Raj <victor.raj@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_sched.c | 24 ++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index a17e24e54cf3..9f07b6641705 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -2745,8 +2745,8 @@ static enum ice_status
 ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
 			   u16 vsi_handle, unsigned long *tc_bitmap)
 {
-	struct ice_sched_agg_vsi_info *agg_vsi_info;
-	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_agg_vsi_info *agg_vsi_info, *old_agg_vsi_info = NULL;
+	struct ice_sched_agg_info *agg_info, *old_agg_info;
 	enum ice_status status = 0;
 	struct ice_hw *hw = pi->hw;
 	u8 tc;
@@ -2756,6 +2756,20 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
 	agg_info = ice_get_agg_info(hw, agg_id);
 	if (!agg_info)
 		return ICE_ERR_PARAM;
+	/* If the VSI is already part of another aggregator then update
+	 * its VSI info list
+	 */
+	old_agg_info = ice_get_vsi_agg_info(hw, vsi_handle);
+	if (old_agg_info && old_agg_info != agg_info) {
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(old_agg_vsi_info, vtmp,
+					 &old_agg_info->agg_vsi_list,
+					 list_entry)
+			if (old_agg_vsi_info->vsi_handle == vsi_handle)
+				break;
+	}
+
 	/* check if entry already exist */
 	agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
 	if (!agg_vsi_info) {
@@ -2780,6 +2794,12 @@ ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
 			break;
 
 		set_bit(tc, agg_vsi_info->tc_bitmap);
+		if (old_agg_vsi_info)
+			clear_bit(tc, old_agg_vsi_info->tc_bitmap);
+	}
+	if (old_agg_vsi_info && !old_agg_vsi_info->tc_bitmap[0]) {
+		list_del(&old_agg_vsi_info->list_entry);
+		devm_kfree(ice_hw_to_dev(pi->hw), old_agg_vsi_info);
 	}
 	return status;
 }

From 70fa0a078099881c1e0553a7c351a28a575afcfc Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Mon, 14 Jun 2021 14:46:07 -0700
Subject: [PATCH 2884/3292] ice: remove unnecessary VSI assignment

ice_get_vf_vsi() is being called twice for the same VSI. Remove the
unnecessary call/assignment.

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 6392e0b31b90..2826570dab51 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1689,7 +1689,6 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 		else
 			promisc_m = ICE_UCAST_PROMISC_BITS;
 
-		vsi = ice_get_vf_vsi(vf);
 		if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true))
 			dev_err(dev, "disabling promiscuous mode failed\n");
 	}

From b81c191c468bb9f9e63cb19cdf090732e3218dce Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 20 Jun 2021 15:28:06 +0200
Subject: [PATCH 2885/3292] ice: Fix a memory leak in an error handling path in
 'ice_pf_dcb_cfg()'

If this 'kzalloc()' fails we must free some resources as in all the other
error handling paths of this function.

Fixes: 348048e724a0 ("ice: Implement iidc operations")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 857dc62da7a8..926cf748c5ec 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -316,8 +316,10 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 
 	/* Notify AUX drivers about impending change to TCs */
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
-	if (!event)
-		return -ENOMEM;
+	if (!event) {
+		ret = -ENOMEM;
+		goto free_cfg;
+	}
 
 	set_bit(IIDC_EVENT_BEFORE_TC_CHANGE, event->type);
 	ice_send_event_to_aux(pf, event);

From d6765985a42a660f078896d5c5b27f97c580a490 Mon Sep 17 00:00:00 2001
From: Petr Oros <poros@redhat.com>
Date: Fri, 25 Jun 2021 10:27:45 +0200
Subject: [PATCH 2886/3292] Revert "be2net: disable bh with spin_lock in
 be_process_mcc"

Patch was based on wrong presumption that be_poll can be called only
from bh context. It reintroducing old regression (also reverted) and
causing deadlock when we use netconsole with benet in bonding.

Old revert: commit 072a9c486004 ("netpoll: revert 6bdb7fe3104 and fix
be_poll() instead")

[  331.269715] bond0: (slave enp0s7f0): Releasing backup interface
[  331.270121] CPU: 4 PID: 1479 Comm: ifenslave Not tainted 5.13.0-rc7+ #2
[  331.270122] Call Trace:
[  331.270122] [c00000001789f200] [c0000000008c505c] dump_stack+0x100/0x174 (unreliable)
[  331.270124] [c00000001789f240] [c008000001238b9c] be_poll+0x64/0xe90 [be2net]
[  331.270125] [c00000001789f330] [c000000000d1e6e4] netpoll_poll_dev+0x174/0x3d0
[  331.270127] [c00000001789f400] [c008000001bc167c] bond_poll_controller+0xb4/0x130 [bonding]
[  331.270128] [c00000001789f450] [c000000000d1e624] netpoll_poll_dev+0xb4/0x3d0
[  331.270129] [c00000001789f520] [c000000000d1ed88] netpoll_send_skb+0x448/0x470
[  331.270130] [c00000001789f5d0] [c0080000011f14f8] write_msg+0x180/0x1b0 [netconsole]
[  331.270131] [c00000001789f640] [c000000000230c0c] console_unlock+0x54c/0x790
[  331.270132] [c00000001789f7b0] [c000000000233098] vprintk_emit+0x2d8/0x450
[  331.270133] [c00000001789f810] [c000000000234758] vprintk+0xc8/0x270
[  331.270134] [c00000001789f850] [c000000000233c28] printk+0x40/0x54
[  331.270135] [c00000001789f870] [c000000000ccf908] __netdev_printk+0x150/0x198
[  331.270136] [c00000001789f910] [c000000000ccfdb4] netdev_info+0x68/0x94
[  331.270137] [c00000001789f950] [c008000001bcbd70] __bond_release_one+0x188/0x6b0 [bonding]
[  331.270138] [c00000001789faa0] [c008000001bcc6f4] bond_do_ioctl+0x42c/0x490 [bonding]
[  331.270139] [c00000001789fb60] [c000000000d0d17c] dev_ifsioc+0x17c/0x400
[  331.270140] [c00000001789fbc0] [c000000000d0db70] dev_ioctl+0x390/0x890
[  331.270141] [c00000001789fc10] [c000000000c7c76c] sock_do_ioctl+0xac/0x1b0
[  331.270142] [c00000001789fc90] [c000000000c7ffac] sock_ioctl+0x31c/0x6e0
[  331.270143] [c00000001789fd60] [c0000000005b9728] sys_ioctl+0xf8/0x150
[  331.270145] [c00000001789fdb0] [c0000000000336c0] system_call_exception+0x160/0x2f0
[  331.270146] [c00000001789fe10] [c00000000000d35c] system_call_common+0xec/0x278
[  331.270147] --- interrupt: c00 at 0x7fffa6c6ec00
[  331.270147] NIP:  00007fffa6c6ec00 LR: 0000000105c4185c CTR: 0000000000000000
[  331.270148] REGS: c00000001789fe80 TRAP: 0c00   Not tainted  (5.13.0-rc7+)
[  331.270148] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28000428  XER: 00000000
[  331.270155] IRQMASK: 0
[  331.270156] GPR00: 0000000000000036 00007fffd494d5b0 00007fffa6d57100 0000000000000003
[  331.270158] GPR04: 0000000000008991 00007fffd494d6d0 0000000000000008 00007fffd494f28c
[  331.270161] GPR08: 0000000000000003 0000000000000000 0000000000000000 0000000000000000
[  331.270164] GPR12: 0000000000000000 00007fffa6dfa220 0000000000000000 0000000000000000
[  331.270167] GPR16: 0000000105c44880 0000000000000000 0000000105c60088 0000000105c60318
[  331.270170] GPR20: 0000000105c602c0 0000000105c44560 0000000000000000 0000000000000000
[  331.270172] GPR24: 00007fffd494dc50 00007fffd494d6a8 0000000105c60008 00007fffd494d6d0
[  331.270175] GPR28: 00007fffd494f27e 0000000105c6026c 00007fffd494f284 0000000000000000
[  331.270178] NIP [00007fffa6c6ec00] 0x7fffa6c6ec00
[  331.270178] LR [0000000105c4185c] 0x105c4185c
[  331.270179] --- interrupt: c00

This reverts commit d0d006a43e9a7a796f6f178839c92fcc222c564d.

Fixes: d0d006a43e9a7a ("be2net: disable bh with spin_lock in be_process_mcc")
Signed-off-by: Petr Oros <poros@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 6 ++++--
 drivers/net/ethernet/emulex/benet/be_main.c | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 701c12c9e033..649c5c429bd7 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -550,7 +550,7 @@ int be_process_mcc(struct be_adapter *adapter)
 	int num = 0, status = 0;
 	struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
-	spin_lock_bh(&adapter->mcc_cq_lock);
+	spin_lock(&adapter->mcc_cq_lock);
 
 	while ((compl = be_mcc_compl_get(adapter))) {
 		if (compl->flags & CQE_FLAGS_ASYNC_MASK) {
@@ -566,7 +566,7 @@ int be_process_mcc(struct be_adapter *adapter)
 	if (num)
 		be_cq_notify(adapter, mcc_obj->cq.id, mcc_obj->rearm_cq, num);
 
-	spin_unlock_bh(&adapter->mcc_cq_lock);
+	spin_unlock(&adapter->mcc_cq_lock);
 	return status;
 }
 
@@ -581,7 +581,9 @@ static int be_mcc_wait_compl(struct be_adapter *adapter)
 		if (be_check_error(adapter, BE_ERROR_ANY))
 			return -EIO;
 
+		local_bh_disable();
 		status = be_process_mcc(adapter);
+		local_bh_enable();
 
 		if (atomic_read(&mcc_obj->q.used) == 0)
 			break;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 7968568bbe21..361c1c87c183 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5501,7 +5501,9 @@ static void be_worker(struct work_struct *work)
 	 * mcc completions
 	 */
 	if (!netif_running(adapter->netdev)) {
+		local_bh_disable();
 		be_process_mcc(adapter);
+		local_bh_enable();
 		goto reschedule;
 	}
 

From ac53c26433b51f1835ce5a935970e427d83e3ec5 Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Fri, 25 Jun 2021 12:38:53 +0200
Subject: [PATCH 2887/3292] net: mdiobus: withdraw fwnode_mdbiobus_register

The newly implemented fwnode_mdbiobus_register turned out to be
problematic - in case the fwnode_/of_/acpi_mdio are built as
modules, a dependency cycle can be observed during the depmod phase of
modules_install, eg.:

depmod: ERROR: Cycle detected: fwnode_mdio -> of_mdio -> fwnode_mdio
depmod: ERROR: Found 2 modules in dependency cycles!

OR:

depmod: ERROR: Cycle detected: acpi_mdio -> fwnode_mdio -> acpi_mdio
depmod: ERROR: Found 2 modules in dependency cycles!

A possible solution could be to rework fwnode_mdiobus_register,
so that to merge the contents of acpi_mdiobus_register and
of_mdiobus_register. However feasible, such change would
be very intrusive and affect huge amount of the of_mdiobus_register
users.

Since there are currently 2 users of ACPI and MDIO
(xgmac_mdio and mvmdio), withdraw the fwnode_mdbiobus_register
and roll back to a simple 'if' condition in affected drivers.

Fixes: 62a6ef6a996f ("net: mdiobus: Introduce fwnode_mdbiobus_register()")
Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/Kconfig      |  4 +++-
 drivers/net/ethernet/freescale/xgmac_mdio.c | 11 +++++++++--
 drivers/net/ethernet/marvell/mvmdio.c       | 10 ++++++++--
 drivers/net/mdio/fwnode_mdio.c              | 22 ---------------------
 include/linux/fwnode_mdio.h                 | 12 -----------
 5 files changed, 20 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 92a390576b88..2d1abdd58fab 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -67,7 +67,9 @@ config FSL_PQ_MDIO
 
 config FSL_XGMAC_MDIO
 	tristate "Freescale XGMAC MDIO"
-	depends on FWNODE_MDIO
+	select PHYLIB
+	depends on OF
+	select OF_MDIO
 	help
 	  This driver supports the MDIO bus on the Fman 10G Ethernet MACs, and
 	  on the FMan mEMAC (which supports both Clauses 22 and 45)
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 2d99edc8a647..0b68852379da 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/fwnode_mdio.h>
+#include <linux/acpi_mdio.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mdio.h>
@@ -246,6 +246,7 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 
 static int xgmac_mdio_probe(struct platform_device *pdev)
 {
+	struct fwnode_handle *fwnode;
 	struct mdio_fsl_priv *priv;
 	struct resource *res;
 	struct mii_bus *bus;
@@ -290,7 +291,13 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 	priv->has_a011043 = device_property_read_bool(&pdev->dev,
 						      "fsl,erratum-a011043");
 
-	ret = fwnode_mdiobus_register(bus, pdev->dev.fwnode);
+	fwnode = pdev->dev.fwnode;
+	if (is_of_node(fwnode))
+		ret = of_mdiobus_register(bus, to_of_node(fwnode));
+	else if (is_acpi_node(fwnode))
+		ret = acpi_mdiobus_register(bus, fwnode);
+	else
+		ret = -EINVAL;
 	if (ret) {
 		dev_err(&pdev->dev, "cannot register MDIO bus\n");
 		goto err_registration;
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 7537ee3f6622..62a97c46fba0 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -18,9 +18,9 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/acpi_mdio.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/fwnode_mdio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -371,7 +371,13 @@ static int orion_mdio_probe(struct platform_device *pdev)
 		goto out_mdio;
 	}
 
-	ret = fwnode_mdiobus_register(bus, pdev->dev.fwnode);
+	/* For the platforms not supporting DT/ACPI fall-back
+	 * to mdiobus_register via of_mdiobus_register.
+	 */
+	if (is_acpi_node(pdev->dev.fwnode))
+		ret = acpi_mdiobus_register(bus, pdev->dev.fwnode);
+	else
+		ret = of_mdiobus_register(bus, pdev->dev.of_node);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret);
 		goto out_mdio;
diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index ae0bf71a9932..1becb1a731f6 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -7,10 +7,8 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/acpi_mdio.h>
 #include <linux/fwnode_mdio.h>
 #include <linux/of.h>
-#include <linux/of_mdio.h>
 #include <linux/phy.h>
 
 MODULE_AUTHOR("Calvin Johnson <calvin.johnson@oss.nxp.com>");
@@ -144,23 +142,3 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus,
 	return 0;
 }
 EXPORT_SYMBOL(fwnode_mdiobus_register_phy);
-
-/**
- * fwnode_mdiobus_register - bring up all the PHYs on a given MDIO bus and
- *	attach them to it.
- * @bus: Target MDIO bus.
- * @fwnode: Pointer to fwnode of the MDIO controller.
- *
- * Return values are determined accordingly to acpi_/of_ mdiobus_register()
- * operation.
- */
-int fwnode_mdiobus_register(struct mii_bus *bus, struct fwnode_handle *fwnode)
-{
-	if (is_acpi_node(fwnode))
-		return acpi_mdiobus_register(bus, fwnode);
-	else if (is_of_node(fwnode))
-		return of_mdiobus_register(bus, to_of_node(fwnode));
-	else
-		return -EINVAL;
-}
-EXPORT_SYMBOL(fwnode_mdiobus_register);
diff --git a/include/linux/fwnode_mdio.h b/include/linux/fwnode_mdio.h
index f62817c23137..faf603c48c86 100644
--- a/include/linux/fwnode_mdio.h
+++ b/include/linux/fwnode_mdio.h
@@ -16,7 +16,6 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
 int fwnode_mdiobus_register_phy(struct mii_bus *bus,
 				struct fwnode_handle *child, u32 addr);
 
-int fwnode_mdiobus_register(struct mii_bus *bus, struct fwnode_handle *fwnode);
 #else /* CONFIG_FWNODE_MDIO */
 int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
 				       struct phy_device *phy,
@@ -31,17 +30,6 @@ static inline int fwnode_mdiobus_register_phy(struct mii_bus *bus,
 {
 	return -EINVAL;
 }
-
-static inline int fwnode_mdiobus_register(struct mii_bus *bus,
-					  struct fwnode_handle *fwnode)
-{
-	/*
-	 * Fall back to mdiobus_register() function to register a bus.
-	 * This way, we don't have to keep compat bits around in drivers.
-	 */
-
-	return mdiobus_register(bus);
-}
 #endif
 
 #endif /* __LINUX_FWNODE_MDIO_H */

From 17081633e22d83be928a779fd7acd04b247dec90 Mon Sep 17 00:00:00 2001
From: Guvenc Gulce <guvenc@linux.ibm.com>
Date: Fri, 25 Jun 2021 17:11:02 +0200
Subject: [PATCH 2888/3292] net/smc: Ensure correct state of the socket in send
 path

When smc_sendmsg() is called before the SMC socket initialization has
completed, smc_tx_sendmsg() will access un-initialized fields of the
SMC socket which results in a null-pointer dereference.
Fix this by checking the socket state first in smc_tx_sendmsg().

Fixes: e0e4b8fa5338 ("net/smc: Add SMC statistics support")
Reported-by: syzbot+5dda108b672b54141857@syzkaller.appspotmail.com
Reviewed-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_tx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 075c4f4b41cf..289025cd545a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -154,6 +154,9 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 		goto out_err;
 	}
 
+	if (sk->sk_state == SMC_INIT)
+		return -ENOTCONN;
+
 	if (len > conn->sndbuf_desc->len)
 		SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk);
 
@@ -164,8 +167,6 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 		SMC_STAT_INC(smc, urg_data_cnt);
 
 	while (msg_data_left(msg)) {
-		if (sk->sk_state == SMC_INIT)
-			return -ENOTCONN;
 		if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
 		    (smc->sk.sk_err == ECONNABORTED) ||
 		    conn->killed)

From 0fa757b5d3ea6e3d3d59f0e0d34c8214b8643b8f Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 25 Jun 2021 13:54:32 -0500
Subject: [PATCH 2889/3292] smb3: prevent races updating CurrentMid

There was one place where we weren't locking CurrentMid, and although
likely to be safe since even without the lock since it is during
negotiate protocol, it is more consistent to lock it in this last remaining
place, and avoids confusing Coverity warning.

Addresses-Coverity: 1486665 ("Data race condition")
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h | 3 ++-
 fs/cifs/smb2ops.c  | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index fc6b08e5ebbc..3100f8b66e60 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -621,7 +621,7 @@ struct TCP_Server_Info {
 	/* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
 	unsigned int capabilities; /* selective disabling of caps by smb sess */
 	int timeAdj;  /* Adjust for difference in server time zone in sec */
-	__u64 CurrentMid;         /* multiplex id - rotating counter */
+	__u64 CurrentMid;         /* multiplex id - rotating counter, protected by GlobalMid_Lock */
 	char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
 	/* 16th byte of RFC1001 workstation name is always null */
 	char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
@@ -1786,6 +1786,7 @@ require use of the stronger protocol */
  *      updates to XID counters, multiplex id  and SMB sequence numbers
  *      list operations on global DnotifyReqList
  *      updates to ses->status
+ *      updates to server->CurrentMid
  *  tcp_ses_lock protects:
  *	list operations on tcp and SMB session lists
  *  tcon->open_file_lock protects the list of open files hanging off the tcon
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 903de7449aa3..e4c8f603dd58 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -388,7 +388,9 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 {
 	int rc;
 
+	spin_lock(&GlobalMid_Lock);
 	cifs_ses_server(ses)->CurrentMid = 0;
+	spin_unlock(&GlobalMid_Lock);
 	rc = SMB2_negotiate(xid, ses);
 	/* BB we probably don't need to retry with modern servers */
 	if (rc == -EAGAIN)

From cd039afa0ad86e1f01921cc5abf7f80d2449543a Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 25 Jun 2021 15:18:59 -0400
Subject: [PATCH 2890/3292] dm writecache: add "cleaner" and "max_age" to
 Documentation

Backfill missing Documentation.

Fixes: 93de44eb3fc8 ("dm writecache: implement the "cleaner" policy")
Fixes: 3923d4854e18 ("dm writecache: implement gradual cleanup")
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 .../admin-guide/device-mapper/writecache.rst    | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
index dce0184e07ca..c181f26af769 100644
--- a/Documentation/admin-guide/device-mapper/writecache.rst
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -53,6 +53,21 @@ Constructor parameters:
 
 		- some underlying devices perform better with fua, some
 		  with nofua. The user should test it
+	cleaner
+		when this option is activated (either in the constructor
+		arguments or by a message), the cache will not promote
+		new writes (however, writes to already cached blocks are
+		promoted, to avoid data corruption due to misordered
+		writes) and it will gradually writeback any cached
+		data. The userspace can then monitor the cleaning
+		process with "dmsetup status". When the number of cached
+		blocks drops to zero, userspace can unload the
+		dm-writecache target and replace it with dm-linear or
+		other targets.
+	max_age n
+		specifies the maximum age of a block in milliseconds. If
+		a block is stored in the cache for too long, it will be
+		written to the underlying device and cleaned up.
 
 Status:
 1. error indicator - 0 if there was no error, otherwise error number
@@ -77,3 +92,5 @@ Messages:
 		5. resume the device, so that it will use the linear
 		   target
 		6. the cache device is now inactive and it can be deleted
+	cleaner
+		See above "cleaner" constructor documentation.

From 611c3e168b1c5b6cf81e6deb8f6b4eb83f6b53fd Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 21 Jun 2021 05:22:21 -0400
Subject: [PATCH 2891/3292] dm writecache: add optional "metadata_only"
 parameter

Add a "metadata_only" parameter that when present: only metadata is
promoted to the cache. This option improves performance for heavier
REQ_META workloads (e.g. device-mapper-test-suite's "git clone and
checkout" benchmark improves from 341s to 312s).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 .../admin-guide/device-mapper/writecache.rst     |  3 +++
 drivers/md/dm-writecache.c                       | 16 ++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
index c181f26af769..977f82b5a811 100644
--- a/Documentation/admin-guide/device-mapper/writecache.rst
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -68,6 +68,9 @@ Constructor parameters:
 		specifies the maximum age of a block in milliseconds. If
 		a block is stored in the cache for too long, it will be
 		written to the underlying device and cleaned up.
+	metadata_only
+		only metadata is promoted to the cache. This option
+		improves performance for heavier REQ_META workloads.
 
 Status:
 1. error indicator - 0 if there was no error, otherwise error number
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 558d39764e6d..2eb7d7bcdfb1 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -171,6 +171,7 @@ struct dm_writecache {
 	bool flush_on_suspend:1;
 	bool cleaner:1;
 	bool cleaner_set:1;
+	bool metadata_only:1;
 
 	unsigned high_wm_percent_value;
 	unsigned low_wm_percent_value;
@@ -1301,7 +1302,7 @@ static int writecache_map(struct dm_target *ti, struct bio *bio)
 			writecache_flush(wc);
 			if (writecache_has_error(wc))
 				goto unlock_error;
-			if (unlikely(wc->cleaner))
+			if (unlikely(wc->cleaner) || unlikely(wc->metadata_only))
 				goto unlock_remap_origin;
 			goto unlock_submit;
 		} else {
@@ -1380,7 +1381,8 @@ read_next_block:
 				}
 				found_entry = true;
 			} else {
-				if (unlikely(wc->cleaner))
+				if (unlikely(wc->cleaner) ||
+				    (wc->metadata_only && !(bio->bi_opf & REQ_META)))
 					goto direct_write;
 			}
 			e = writecache_pop_from_freelist(wc, (sector_t)-1);
@@ -2094,7 +2096,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	struct wc_memory_superblock s;
 
 	static struct dm_arg _args[] = {
-		{0, 16, "Invalid number of feature args"},
+		{0, 17, "Invalid number of feature args"},
 	};
 
 	as.argc = argc;
@@ -2321,6 +2323,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 				wc->writeback_fua = false;
 				wc->writeback_fua_set = true;
 			} else goto invalid_optional;
+		} else if (!strcasecmp(string, "metadata_only")) {
+			wc->metadata_only = true;
 		} else {
 invalid_optional:
 			r = -EINVAL;
@@ -2544,6 +2548,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
 			extra_args++;
 		if (wc->writeback_fua_set)
 			extra_args++;
+		if (wc->metadata_only)
+			extra_args++;
 
 		DMEMIT("%u", extra_args);
 		if (wc->start_sector_set)
@@ -2564,13 +2570,15 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
 			DMEMIT(" cleaner");
 		if (wc->writeback_fua_set)
 			DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
+		if (wc->metadata_only)
+			DMEMIT(" metadata_only");
 		break;
 	}
 }
 
 static struct target_type writecache_target = {
 	.name			= "writecache",
-	.version		= {1, 4, 0},
+	.version		= {1, 5, 0},
 	.module			= THIS_MODULE,
 	.ctr			= writecache_ctr,
 	.dtr			= writecache_dtr,

From 326dbde2e0a77be107c9ddd04899fd9ee27ffc94 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 16 Jun 2021 14:29:53 +0100
Subject: [PATCH 2892/3292] dm ps io affinity: remove redundant continue
 statement

The continue statement at the end of a for-loop has no effect,
remove it.

Addresses-Coverity: ("Continue has no effect")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-ps-io-affinity.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
index 077655cd4fae..cb8e83bfb1a7 100644
--- a/drivers/md/dm-ps-io-affinity.c
+++ b/drivers/md/dm-ps-io-affinity.c
@@ -91,7 +91,6 @@ static int ioa_add_path(struct path_selector *ps, struct dm_path *path,
 		cpumask_set_cpu(cpu, s->path_mask);
 		s->path_map[cpu] = pi;
 		refcount_inc(&pi->refcount);
-		continue;
 	}
 
 	if (refcount_dec_and_test(&pi->refcount)) {

From 28436ba34b7d1b6af2a898d37ee678a1eb643db4 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Sat, 19 Jun 2021 08:15:19 +0900
Subject: [PATCH 2893/3292] dm zone: fix dm_revalidate_zones() memory
 allocation

Make sure that the zone write pointer offset array is allocated with a
vmalloc in dm_zone_revalidate_cb() by passing GFP_KERNEL gfp flag to
kvcalloc(). However, since we do not want to trigger IOs while
revalidating zones, change dm_revalidate_zones() to have the zone scan
done in GFP_NOIO context using memalloc_noio_save/restore calls.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: bb37d77239af ("dm: introduce zone append emulation")
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-zone.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index c2f26949f5ee..6d82a34438c8 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -205,7 +205,7 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
 		if (!md->zwp_offset) {
 			md->zwp_offset =
 				kvcalloc(q->nr_zones, sizeof(unsigned int),
-					 GFP_NOIO);
+					 GFP_KERNEL);
 			if (!md->zwp_offset)
 				return -ENOMEM;
 		}
@@ -230,6 +230,7 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
 static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
 {
 	struct request_queue *q = md->queue;
+	unsigned int noio_flag;
 	int ret;
 
 	/*
@@ -241,9 +242,14 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
 	if (md->nr_zones)
 		return 0;
 
-	/* Scan all zones to initialize everything */
+	/*
+	 * Scan all zones to initialize everything. Ensure that all vmalloc
+	 * operations in this context are done as if GFP_NOIO was specified.
+	 */
+	noio_flag = memalloc_noio_save();
 	ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones,
 				     dm_zone_revalidate_cb, md);
+	memalloc_noio_restore(noio_flag);
 	if (ret < 0)
 		goto err;
 	if (ret != q->nr_zones) {

From b6e58b5466b2959f83034bead2e2e1395cca8aeb Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Thu, 17 Jun 2021 15:45:47 +0800
Subject: [PATCH 2894/3292] dm btree remove: assign new_root only when removal
 succeeds

remove_raw() in dm_btree_remove() may fail due to IO read error
(e.g. read the content of origin block fails during shadowing),
and the value of shadow_spine::root is uninitialized, but
the uninitialized value is still assign to new_root in the
end of dm_btree_remove().

For dm-thin, the value of pmd->details_root or pmd->root will become
an uninitialized value, so if trying to read details_info tree again
out-of-bound memory may occur as showed below:

  general protection fault, probably for non-canonical address 0x3fdcb14c8d7520
  CPU: 4 PID: 515 Comm: dmsetup Not tainted 5.13.0-rc6
  Hardware name: QEMU Standard PC
  RIP: 0010:metadata_ll_load_ie+0x14/0x30
  Call Trace:
   sm_metadata_count_is_more_than_one+0xb9/0xe0
   dm_tm_shadow_block+0x52/0x1c0
   shadow_step+0x59/0xf0
   remove_raw+0xb2/0x170
   dm_btree_remove+0xf4/0x1c0
   dm_pool_delete_thin_device+0xc3/0x140
   pool_message+0x218/0x2b0
   target_message+0x251/0x290
   ctl_ioctl+0x1c4/0x4d0
   dm_ctl_ioctl+0xe/0x20
   __x64_sys_ioctl+0x7b/0xb0
   do_syscall_64+0x40/0xb0
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixing it by only assign new_root when removal succeeds

Signed-off-by: Hou Tao <houtao1@huawei.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/persistent-data/dm-btree-remove.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index b34af195bf2a..70532335c7c7 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -549,7 +549,8 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 		delete_at(n, index);
 	}
 
-	*new_root = shadow_root(&spine);
+	if (!r)
+		*new_root = shadow_root(&spine);
 	exit_shadow_spine(&spine);
 
 	return r;

From dc4fa29fe445933f51e08674f3b325547ba52de1 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 25 Jun 2021 12:10:26 -0400
Subject: [PATCH 2895/3292] dm io tracker: factor out IO tracker

Allow other code to use dm_io_tracker.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c | 82 +++---------------------------------
 drivers/md/dm-io-tracker.h   | 69 ++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 76 deletions(-)
 create mode 100644 drivers/md/dm-io-tracker.h

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 6ab01ff25747..8e4ced5a2516 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -8,6 +8,7 @@
 #include "dm-bio-prison-v2.h"
 #include "dm-bio-record.h"
 #include "dm-cache-metadata.h"
+#include "dm-io-tracker.h"
 
 #include <linux/dm-io.h>
 #include <linux/dm-kcopyd.h>
@@ -39,77 +40,6 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
 
 /*----------------------------------------------------------------*/
 
-struct io_tracker {
-	spinlock_t lock;
-
-	/*
-	 * Sectors of in-flight IO.
-	 */
-	sector_t in_flight;
-
-	/*
-	 * The time, in jiffies, when this device became idle (if it is
-	 * indeed idle).
-	 */
-	unsigned long idle_time;
-	unsigned long last_update_time;
-};
-
-static void iot_init(struct io_tracker *iot)
-{
-	spin_lock_init(&iot->lock);
-	iot->in_flight = 0ul;
-	iot->idle_time = 0ul;
-	iot->last_update_time = jiffies;
-}
-
-static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
-{
-	if (iot->in_flight)
-		return false;
-
-	return time_after(jiffies, iot->idle_time + jifs);
-}
-
-static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
-{
-	bool r;
-
-	spin_lock_irq(&iot->lock);
-	r = __iot_idle_for(iot, jifs);
-	spin_unlock_irq(&iot->lock);
-
-	return r;
-}
-
-static void iot_io_begin(struct io_tracker *iot, sector_t len)
-{
-	spin_lock_irq(&iot->lock);
-	iot->in_flight += len;
-	spin_unlock_irq(&iot->lock);
-}
-
-static void __iot_io_end(struct io_tracker *iot, sector_t len)
-{
-	if (!len)
-		return;
-
-	iot->in_flight -= len;
-	if (!iot->in_flight)
-		iot->idle_time = jiffies;
-}
-
-static void iot_io_end(struct io_tracker *iot, sector_t len)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&iot->lock, flags);
-	__iot_io_end(iot, len);
-	spin_unlock_irqrestore(&iot->lock, flags);
-}
-
-/*----------------------------------------------------------------*/
-
 /*
  * Represents a chunk of future work.  'input' allows continuations to pass
  * values between themselves, typically error values.
@@ -470,7 +400,7 @@ struct cache {
 	struct batcher committer;
 	struct work_struct commit_ws;
 
-	struct io_tracker tracker;
+	struct dm_io_tracker tracker;
 
 	mempool_t migration_pool;
 
@@ -866,7 +796,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)
 	if (accountable_bio(cache, bio)) {
 		pb = get_per_bio_data(bio);
 		pb->len = bio_sectors(bio);
-		iot_io_begin(&cache->tracker, pb->len);
+		dm_iot_io_begin(&cache->tracker, pb->len);
 	}
 }
 
@@ -874,7 +804,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
 {
 	struct per_bio_data *pb = get_per_bio_data(bio);
 
-	iot_io_end(&cache->tracker, pb->len);
+	dm_iot_io_end(&cache->tracker, pb->len);
 }
 
 static void accounted_request(struct cache *cache, struct bio *bio)
@@ -1642,7 +1572,7 @@ enum busy {
 
 static enum busy spare_migration_bandwidth(struct cache *cache)
 {
-	bool idle = iot_idle_for(&cache->tracker, HZ);
+	bool idle = dm_iot_idle_for(&cache->tracker, HZ);
 	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
 		cache->sectors_per_block;
 
@@ -2603,7 +2533,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	batcher_init(&cache->committer, commit_op, cache,
 		     issue_op, cache, cache->wq);
-	iot_init(&cache->tracker);
+	dm_iot_init(&cache->tracker);
 
 	init_rwsem(&cache->background_work_lock);
 	prevent_background_work(cache);
diff --git a/drivers/md/dm-io-tracker.h b/drivers/md/dm-io-tracker.h
new file mode 100644
index 000000000000..1dcf01f9f066
--- /dev/null
+++ b/drivers/md/dm-io-tracker.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2021 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_IO_TRACKER_H
+#define DM_IO_TRACKER_H
+
+#include <linux/jiffies.h>
+
+struct dm_io_tracker {
+	spinlock_t lock;
+
+	/*
+	 * Sectors of in-flight IO.
+	 */
+	sector_t in_flight;
+
+	/*
+	 * The time, in jiffies, when this device became idle
+	 * (if it is indeed idle).
+	 */
+	unsigned long idle_time;
+	unsigned long last_update_time;
+};
+
+static inline void dm_iot_init(struct dm_io_tracker *iot)
+{
+	spin_lock_init(&iot->lock);
+	iot->in_flight = 0ul;
+	iot->idle_time = 0ul;
+	iot->last_update_time = jiffies;
+}
+
+static inline bool dm_iot_idle_for(struct dm_io_tracker *iot, unsigned long j)
+{
+	bool r = false;
+
+	spin_lock_irq(&iot->lock);
+	if (!iot->in_flight)
+		r = time_after(jiffies, iot->idle_time + j);
+	spin_unlock_irq(&iot->lock);
+
+	return r;
+}
+
+static inline void dm_iot_io_begin(struct dm_io_tracker *iot, sector_t len)
+{
+	spin_lock_irq(&iot->lock);
+	iot->in_flight += len;
+	spin_unlock_irq(&iot->lock);
+}
+
+static inline void dm_iot_io_end(struct dm_io_tracker *iot, sector_t len)
+{
+	unsigned long flags;
+
+	if (!len)
+		return;
+
+	spin_lock_irqsave(&iot->lock, flags);
+	iot->in_flight -= len;
+	if (!iot->in_flight)
+		iot->idle_time = jiffies;
+	spin_unlock_irqrestore(&iot->lock, flags);
+}
+
+#endif

From 95b88f4d71cb953e02206be3c757083601391a0f Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 25 Jun 2021 15:33:02 -0400
Subject: [PATCH 2896/3292] dm writecache: pause writeback if cache full and
 origin being written directly

Implementation reuses dm_io_tracker, that until now was only used by
dm-cache, to track if any writes were issued directly to the origin
(due to cache being full) within the last second. If so writeback is
paused for a second.

This change improves performance for when the cache is full and IO is
issued directly to the origin device (rather than through the cache).

Depends-on: d53f1fafec9d ("dm writecache: do direct write if the cache is full")
Suggested-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-writecache.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2eb7d7bcdfb1..d70342c9003a 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -15,6 +15,8 @@
 #include <linux/dax.h>
 #include <linux/pfn_t.h>
 #include <linux/libnvdimm.h>
+#include <linux/delay.h>
+#include "dm-io-tracker.h"
 
 #define DM_MSG_PREFIX "writecache"
 
@@ -183,6 +185,8 @@ struct dm_writecache {
 	struct work_struct writeback_work;
 	struct work_struct flush_work;
 
+	struct dm_io_tracker iot;
+
 	struct dm_io_client *dm_io;
 
 	raw_spinlock_t endio_list_lock;
@@ -1466,6 +1470,10 @@ bio_copy:
 	}
 
 unlock_remap_origin:
+	if (bio_data_dir(bio) != READ) {
+		dm_iot_io_begin(&wc->iot, 1);
+		bio->bi_private = (void *)2;
+	}
 	bio_set_dev(bio, wc->dev->bdev);
 	wc_unlock(wc);
 	return DM_MAPIO_REMAPPED;
@@ -1496,11 +1504,13 @@ static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t
 {
 	struct dm_writecache *wc = ti->private;
 
-	if (bio->bi_private != NULL) {
+	if (bio->bi_private == (void *)1) {
 		int dir = bio_data_dir(bio);
 		if (atomic_dec_and_test(&wc->bio_in_progress[dir]))
 			if (unlikely(waitqueue_active(&wc->bio_in_progress_wait[dir])))
 				wake_up(&wc->bio_in_progress_wait[dir]);
+	} else if (bio->bi_private == (void *)2) {
+		dm_iot_io_end(&wc->iot, 1);
 	}
 	return 0;
 }
@@ -1827,6 +1837,13 @@ static void writecache_writeback(struct work_struct *work)
 		dm_kcopyd_client_flush(wc->dm_kcopyd);
 	}
 
+	if (!wc->writeback_all && !dm_suspended(wc->ti)) {
+		while (!dm_iot_idle_for(&wc->iot, HZ)) {
+			cond_resched();
+			msleep(1000);
+		}
+	}
+
 	wc_lock(wc);
 restart:
 	if (writecache_has_error(wc)) {
@@ -2140,6 +2157,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	INIT_WORK(&wc->writeback_work, writecache_writeback);
 	INIT_WORK(&wc->flush_work, writecache_flush_work);
 
+	dm_iot_init(&wc->iot);
+
 	raw_spin_lock_init(&wc->endio_list_lock);
 	INIT_LIST_HEAD(&wc->endio_list);
 	wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio");

From c469c9c9733cc92bef6d4bf2c0f5bea0550abf4d Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 2 Apr 2021 18:46:29 +0800
Subject: [PATCH 2897/3292] Bluetooth: 6lowpan: delete unneeded variable
 initialization

Delete unneeded variable initialization.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/6lowpan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 97617d02c8f9..d5befa061aa2 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -691,7 +691,7 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
 static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
 {
 	struct net_device *netdev;
-	int err = 0;
+	int err;
 
 	netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)),
 			      IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN,

From 07d85dbe411a1194eef5b70f1a5d070ee1e226a5 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Sat, 10 Apr 2021 10:19:35 +0800
Subject: [PATCH 2898/3292] Bluetooth: use flexible-array member instead of
 zero-length array

Fix the following coccicheck warning:

net/bluetooth/msft.c:37:6-13: WARNING use flexible-array member instead
net/bluetooth/msft.c:42:6-10: WARNING use flexible-array member instead
net/bluetooth/msft.c:52:6-10: WARNING use flexible-array member instead

Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/msft.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index e28f15439ce4..37a394786a94 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -34,12 +34,12 @@ struct msft_le_monitor_advertisement_pattern {
 	__u8 length;
 	__u8 data_type;
 	__u8 start_byte;
-	__u8 pattern[0];
+	__u8 pattern[];
 };
 
 struct msft_le_monitor_advertisement_pattern_data {
 	__u8 count;
-	__u8 data[0];
+	__u8 data[];
 };
 
 struct msft_cp_le_monitor_advertisement {
@@ -49,7 +49,7 @@ struct msft_cp_le_monitor_advertisement {
 	__u8 rssi_low_interval;
 	__u8 rssi_sampling_period;
 	__u8 cond_type;
-	__u8 data[0];
+	__u8 data[];
 } __packed;
 
 struct msft_rp_le_monitor_advertisement {

From 1c6ed31b1696d9b5462ba5ce15b83f5ea955600c Mon Sep 17 00:00:00 2001
From: Yu Liu <yudiliu@google.com>
Date: Fri, 9 Apr 2021 15:04:06 -0700
Subject: [PATCH 2899/3292] Bluetooth: Return whether a connection is outbound

When an MGMT_EV_DEVICE_CONNECTED event is reported back to the user
space we will set the flags to tell if the established connection is
outbound or not. This is useful for the user space to log better metrics
and error messages.

Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Yu Liu <yudiliu@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 include/net/bluetooth/mgmt.h     | 1 +
 net/bluetooth/hci_event.c        | 8 ++++----
 net/bluetooth/l2cap_core.c       | 2 +-
 net/bluetooth/mgmt.c             | 6 +++++-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c73ac52af186..8f5f390363f5 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1768,7 +1768,7 @@ void __mgmt_power_off(struct hci_dev *hdev);
 void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
 		       bool persistent);
 void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
-			   u32 flags, u8 *name, u8 name_len);
+			   u8 *name, u8 name_len);
 void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			      u8 link_type, u8 addr_type, u8 reason,
 			      bool mgmt_connected);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index a7cffb069565..a03c62b1dc2f 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -939,6 +939,7 @@ struct mgmt_ev_auth_failed {
 #define MGMT_DEV_FOUND_CONFIRM_NAME    0x01
 #define MGMT_DEV_FOUND_LEGACY_PAIRING  0x02
 #define MGMT_DEV_FOUND_NOT_CONNECTABLE 0x04
+#define MGMT_DEV_FOUND_INITIATED_CONN  0x08
 
 #define MGMT_EV_DEVICE_FOUND		0x0012
 struct mgmt_ev_device_found {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ea06b010ccad..59c5329354e1 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2069,7 +2069,7 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
 	if (conn &&
 	    (conn->state == BT_CONFIG || conn->state == BT_CONNECTED) &&
 	    !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
-		mgmt_device_connected(hdev, conn, 0, name, name_len);
+		mgmt_device_connected(hdev, conn, name, name_len);
 
 	if (discov->state == DISCOVERY_STOPPED)
 		return;
@@ -3256,7 +3256,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
 		cp.pscan_rep_mode = 0x02;
 		hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
 	} else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
-		mgmt_device_connected(hdev, conn, 0, NULL, 0);
+		mgmt_device_connected(hdev, conn, NULL, 0);
 
 	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
@@ -4330,7 +4330,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
 		cp.pscan_rep_mode = 0x02;
 		hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
 	} else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
-		mgmt_device_connected(hdev, conn, 0, NULL, 0);
+		mgmt_device_connected(hdev, conn, NULL, 0);
 
 	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
@@ -5204,7 +5204,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 	}
 
 	if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
-		mgmt_device_connected(hdev, conn, 0, NULL, 0);
+		mgmt_device_connected(hdev, conn, NULL, 0);
 
 	conn->sec_level = BT_SECURITY_LOW;
 	conn->handle = handle;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b6a88b8256c7..7d975cf98c20 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4237,7 +4237,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
 	hci_dev_lock(hdev);
 	if (hci_dev_test_flag(hdev, HCI_MGMT) &&
 	    !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
-		mgmt_device_connected(hdev, hcon, 0, NULL, 0);
+		mgmt_device_connected(hdev, hcon, NULL, 0);
 	hci_dev_unlock(hdev);
 
 	l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f290d0c54d32..f6e510d06bec 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -8767,15 +8767,19 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr,
 }
 
 void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
-			   u32 flags, u8 *name, u8 name_len)
+			   u8 *name, u8 name_len)
 {
 	char buf[512];
 	struct mgmt_ev_device_connected *ev = (void *) buf;
 	u16 eir_len = 0;
+	u32 flags = 0;
 
 	bacpy(&ev->addr.bdaddr, &conn->dst);
 	ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type);
 
+	if (conn->out)
+		flags |= MGMT_DEV_FOUND_INITIATED_CONN;
+
 	ev->flags = __cpu_to_le32(flags);
 
 	/* We must ensure that the EIR Data fields are ordered and

From 1cb027f2f803d0a7abe9c291f0625e6bccd25999 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Apr 2021 17:53:14 +0100
Subject: [PATCH 2900/3292] Bluetooth: virtio_bt: add missing null pointer
 check on alloc_skb call return

The call to alloc_skb with the GFP_KERNEL flag can return a null sk_buff
pointer, so add a null check to avoid any null pointer deference issues.

Addresses-Coverity: ("Dereference null return value")
Fixes: afd2daa26c7a ("Bluetooth: Add support for virtio transport driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/virtio_bt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c
index c804db7e90f8..57908ce4fae8 100644
--- a/drivers/bluetooth/virtio_bt.c
+++ b/drivers/bluetooth/virtio_bt.c
@@ -34,6 +34,9 @@ static int virtbt_add_inbuf(struct virtio_bluetooth *vbt)
 	int err;
 
 	skb = alloc_skb(1000, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
 	sg_init_one(sg, skb->data, 1000);
 
 	err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL);

From 8454ed9ff9647e31e061fb5eb2e39ce79bc5e960 Mon Sep 17 00:00:00 2001
From: "mark-yw.chen" <mark-yw.chen@mediatek.com>
Date: Mon, 12 Apr 2021 23:06:26 +0800
Subject: [PATCH 2901/3292] Bluetooth: btusb: Fixed too many in-token issue for
 Mediatek Chip.

This patch reduce in-token during download patch procedure.
Don't submit urb for polling event before sending hci command.

Signed-off-by: mark-yw.chen <mark-yw.chen@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 7f6ba2c975ed..99fd88f7653d 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -3312,11 +3312,6 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev,
 	struct btmtk_wmt_hdr *hdr;
 	int err;
 
-	/* Submit control IN URB on demand to process the WMT event */
-	err = btusb_mtk_submit_wmt_recv_urb(hdev);
-	if (err < 0)
-		return err;
-
 	/* Send the WMT command and wait until the WMT event returns */
 	hlen = sizeof(*hdr) + wmt_params->dlen;
 	if (hlen > 255)
@@ -3342,6 +3337,11 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev,
 		goto err_free_wc;
 	}
 
+	/* Submit control IN URB on demand to process the WMT event */
+	err = btusb_mtk_submit_wmt_recv_urb(hdev);
+	if (err < 0)
+		return err;
+
 	/* The vendor specific WMT commands are all answered by a vendor
 	 * specific event and will have the Command Status or Command
 	 * Complete as with usual HCI command flow control.

From 393dc5d19c825906f955210f10ee3befc39854f7 Mon Sep 17 00:00:00 2001
From: "mark-yw.chen" <mark-yw.chen@mediatek.com>
Date: Mon, 12 Apr 2021 23:06:27 +0800
Subject: [PATCH 2902/3292] Bluetooth: btusb: Add support for Lite-On Mediatek
 Chip

Add support for Lite-On Mediatek Chip (MT7921)
Lite On VID = 04CA.

* /sys/kernel/debug/usb/devices
T:  Bus=01 Lev=03 Prnt=04 Port=01 Cnt=02 Dev#=  8 Spd=480  MxCh= 0
D:  Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=04ca ProdID=3802 Rev= 1.00
S:  Manufacturer=MediaTek Inc.
S:  Product=Wireless_Device
S:  SerialNumber=000000000
C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=100mA
A:  FirstIf#= 0 IfCount= 3 Cls=e0(wlcon) Sub=01 Prot=01
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=125us
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
I:  If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  63 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  63 Ivl=1ms
I:* If#= 2 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none)
E:  Ad=8a(I) Atr=03(Int.) MxPS=  64 Ivl=125us
E:  Ad=0a(O) Atr=03(Int.) MxPS=  64 Ivl=125us
I:  If#= 2 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=(none)
E:  Ad=8a(I) Atr=03(Int.) MxPS=  64 Ivl=125us
E:  Ad=0a(O) Atr=03(Int.) MxPS=  64 Ivl=125us

Signed-off-by: mark-yw.chen <mark-yw.chen@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 99fd88f7653d..cb18d63a948d 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -408,6 +408,11 @@ static const struct usb_device_id blacklist_table[] = {
 	/* Additional MediaTek MT7615E Bluetooth devices */
 	{ USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK},
 
+	/* Additional MediaTek MT7921 Bluetooth devices */
+	{ USB_DEVICE(0x04ca, 0x3802), .driver_info = BTUSB_MEDIATEK |
+						     BTUSB_WIDEBAND_SPEECH |
+						     BTUSB_VALID_LE_STATES },
+
 	/* Additional Realtek 8723AE Bluetooth devices */
 	{ USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3394), .driver_info = BTUSB_REALTEK },

From 44e936d7459cac598d9fe0b6d23cc9d12b648d5e Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <musamaanjum@gmail.com>
Date: Tue, 13 Apr 2021 22:52:08 +0500
Subject: [PATCH 2903/3292] Bluetooth: btusb: fix memory leak

If btusb_mtk_submit_wmt_recv_urb returns error, wc should be freed and
then error should be returned to prevent memory leak.

Addresses-Coverity: ("Prevent memory leak")
Fixes: 4cbb375e997d ("Bluetooth: btusb: Fixed too many in-token issue for Mediatek Chip.")
Signed-off-by: Muhammad Usama Anjum <musamaanjum@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index cb18d63a948d..dd48543b65ce 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -3345,7 +3345,7 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev,
 	/* Submit control IN URB on demand to process the WMT event */
 	err = btusb_mtk_submit_wmt_recv_urb(hdev);
 	if (err < 0)
-		return err;
+		goto err_free_wc;
 
 	/* The vendor specific WMT commands are all answered by a vendor
 	 * specific event and will have the Command Status or Command

From 3cfdf8fcaafa62a4123f92eb0f4a72650da3a479 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Tue, 13 Apr 2021 13:21:03 -0300
Subject: [PATCH 2904/3292] Bluetooth: cmtp: fix file refcount when
 cmtp_attach_device fails

When cmtp_attach_device fails, cmtp_add_connection returns the error value
which leads to the caller to doing fput through sockfd_put. But
cmtp_session kthread, which is stopped in this path will also call fput,
leading to a potential refcount underflow or a use-after-free.

Add a refcount before we signal the kthread to stop. The kthread will try
to grab the cmtp_session_sem mutex before doing the fput, which is held
when get_file is called, so there should be no races there.

Reported-by: Ryota Shiga
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/cmtp/core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 07cfa3249f83..0a2d78e811cf 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -392,6 +392,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
 	if (!(session->flags & BIT(CMTP_LOOPBACK))) {
 		err = cmtp_attach_device(session);
 		if (err < 0) {
+			/* Caller will call fput in case of failure, and so
+			 * will cmtp_session kthread.
+			 */
+			get_file(session->sock->file);
+
 			atomic_inc(&session->terminate);
 			wake_up_interruptible(sk_sleep(session->sock->sk));
 			up_write(&cmtp_session_sem);

From 4ef36a52b0e47c80bbfd69c0cce61c7ae9f541ed Mon Sep 17 00:00:00 2001
From: Yu Liu <yudiliu@google.com>
Date: Mon, 19 Apr 2021 16:53:30 -0700
Subject: [PATCH 2905/3292] Bluetooth: Fix the HCI to MGMT status conversion
 table

0x2B, 0x31 and 0x33 are reserved for future use but were not present in
the HCI to MGMT conversion table, this caused the conversion to be
incorrect for the HCI status code greater than 0x2A.

Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Yu Liu <yudiliu@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f6e510d06bec..a81cf1b8b2e8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -252,12 +252,15 @@ static const u8 mgmt_status_table[] = {
 	MGMT_STATUS_TIMEOUT,		/* Instant Passed */
 	MGMT_STATUS_NOT_SUPPORTED,	/* Pairing Not Supported */
 	MGMT_STATUS_FAILED,		/* Transaction Collision */
+	MGMT_STATUS_FAILED,		/* Reserved for future use */
 	MGMT_STATUS_INVALID_PARAMS,	/* Unacceptable Parameter */
 	MGMT_STATUS_REJECTED,		/* QoS Rejected */
 	MGMT_STATUS_NOT_SUPPORTED,	/* Classification Not Supported */
 	MGMT_STATUS_REJECTED,		/* Insufficient Security */
 	MGMT_STATUS_INVALID_PARAMS,	/* Parameter Out Of Range */
+	MGMT_STATUS_FAILED,		/* Reserved for future use */
 	MGMT_STATUS_BUSY,		/* Role Switch Pending */
+	MGMT_STATUS_FAILED,		/* Reserved for future use */
 	MGMT_STATUS_FAILED,		/* Slot Violation */
 	MGMT_STATUS_FAILED,		/* Role Switch Failed */
 	MGMT_STATUS_INVALID_PARAMS,	/* EIR Too Large */

From 59f90f1351282ea2dbd0c59098fd9bb2634e920e Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 3 May 2021 13:06:05 +0300
Subject: [PATCH 2906/3292] Bluetooth: hci_qca: fix potential GPF

In qca_power_shutdown() qcadev local variable is
initialized by hu->serdev.dev private data, but
hu->serdev can be NULL and there is a check for it.

Since, qcadev is not used before

	if (!hu->serdev)
		return;

we can move its initialization after this "if" to
prevent GPF.

Fixes: 5559904ccc08 ("Bluetooth: hci_qca: Add QCA Rome power off support to the qca_power_shutdown()")
Cc: stable@vger.kernel.org # v5.6+
Cc: Rocky Liao <rjliao@codeaurora.org>
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 0a0056912d51..dc6551d65912 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1835,8 +1835,6 @@ static void qca_power_shutdown(struct hci_uart *hu)
 	unsigned long flags;
 	enum qca_btsoc_type soc_type = qca_soc_type(hu);
 
-	qcadev = serdev_device_get_drvdata(hu->serdev);
-
 	/* From this point we go into power off state. But serial port is
 	 * still open, stop queueing the IBS data and flush all the buffered
 	 * data in skb's.
@@ -1852,6 +1850,8 @@ static void qca_power_shutdown(struct hci_uart *hu)
 	if (!hu->serdev)
 		return;
 
+	qcadev = serdev_device_get_drvdata(hu->serdev);
+
 	if (qca_is_wcn399x(soc_type)) {
 		host_set_baudrate(hu, 2400);
 		qca_send_power_pulse(hu, false);

From de75cd0d9b2f3250d5f25846bb5632ccce6275f4 Mon Sep 17 00:00:00 2001
From: Manish Mandlik <mmandlik@google.com>
Date: Thu, 29 Apr 2021 10:24:22 -0700
Subject: [PATCH 2907/3292] Bluetooth: Add ncmd=0 recovery handling

During command status or command complete event, the controller may set
ncmd=0 indicating that it is not accepting any more commands. In such a
case, host holds off sending any more commands to the controller. If the
controller doesn't recover from such condition, host will wait forever,
until the user decides that the Bluetooth is broken and may power cycles
the Bluetooth.

This patch triggers the hardware error to reset the controller and
driver when it gets into such state as there is no other wat out.

Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Manish Mandlik <mmandlik@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  1 +
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 22 ++++++++++++++++++++++
 net/bluetooth/hci_event.c        | 29 +++++++++++++++++++----------
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ea4ae551c426..c4b0650fb9ae 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -339,6 +339,7 @@ enum {
 #define HCI_PAIRING_TIMEOUT	msecs_to_jiffies(60000)	/* 60 seconds */
 #define HCI_INIT_TIMEOUT	msecs_to_jiffies(10000)	/* 10 seconds */
 #define HCI_CMD_TIMEOUT		msecs_to_jiffies(2000)	/* 2 seconds */
+#define HCI_NCMD_TIMEOUT	msecs_to_jiffies(4000)	/* 4 seconds */
 #define HCI_ACL_TX_TIMEOUT	msecs_to_jiffies(45000)	/* 45 seconds */
 #define HCI_AUTO_OFF_TIMEOUT	msecs_to_jiffies(2000)	/* 2 seconds */
 #define HCI_POWER_OFF_TIMEOUT	msecs_to_jiffies(5000)	/* 5 seconds */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 8f5f390363f5..43b08bebae74 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -470,6 +470,7 @@ struct hci_dev {
 	struct delayed_work	service_cache;
 
 	struct delayed_work	cmd_timer;
+	struct delayed_work	ncmd_timer;
 
 	struct work_struct	rx_work;
 	struct work_struct	cmd_work;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 25484bb0773d..572f2362ddb7 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1730,6 +1730,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
 	}
 
 	cancel_delayed_work(&hdev->power_off);
+	cancel_delayed_work(&hdev->ncmd_timer);
 
 	hci_request_cancel_all(hdev);
 	hci_req_sync_lock(hdev);
@@ -2777,6 +2778,24 @@ static void hci_cmd_timeout(struct work_struct *work)
 	queue_work(hdev->workqueue, &hdev->cmd_work);
 }
 
+/* HCI ncmd timer function */
+static void hci_ncmd_timeout(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev,
+					    ncmd_timer.work);
+
+	bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0");
+
+	/* During HCI_INIT phase no events can be injected if the ncmd timer
+	 * triggers since the procedure has its own timeout handling.
+	 */
+	if (test_bit(HCI_INIT, &hdev->flags))
+		return;
+
+	/* This is an irrecoverable state, inject hardware error event */
+	hci_reset_dev(hdev);
+}
+
 struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
 					  bdaddr_t *bdaddr, u8 bdaddr_type)
 {
@@ -3841,6 +3860,7 @@ struct hci_dev *hci_alloc_dev(void)
 	init_waitqueue_head(&hdev->suspend_wait_q);
 
 	INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
+	INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout);
 
 	hci_request_setup(hdev);
 
@@ -4078,6 +4098,8 @@ int hci_reset_dev(struct hci_dev *hdev)
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
 	skb_put_data(skb, hw_err, 3);
 
+	bt_dev_err(hdev, "Injecting HCI hardware error event");
+
 	/* Send Hardware Error to upper stack */
 	return hci_recv_frame(hdev, skb);
 }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 59c5329354e1..18339ebc5959 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3268,6 +3268,23 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
+static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev,
+					    u16 opcode, u8 ncmd)
+{
+	if (opcode != HCI_OP_NOP)
+		cancel_delayed_work(&hdev->cmd_timer);
+
+	if (!test_bit(HCI_RESET, &hdev->flags)) {
+		if (ncmd) {
+			cancel_delayed_work(&hdev->ncmd_timer);
+			atomic_set(&hdev->cmd_cnt, 1);
+		} else {
+			schedule_delayed_work(&hdev->ncmd_timer,
+					      HCI_NCMD_TIMEOUT);
+		}
+	}
+}
+
 static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 				 u16 *opcode, u8 *status,
 				 hci_req_complete_t *req_complete,
@@ -3630,11 +3647,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		break;
 	}
 
-	if (*opcode != HCI_OP_NOP)
-		cancel_delayed_work(&hdev->cmd_timer);
-
-	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
-		atomic_set(&hdev->cmd_cnt, 1);
+	handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
 
 	hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
 			     req_complete_skb);
@@ -3735,11 +3748,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		break;
 	}
 
-	if (*opcode != HCI_OP_NOP)
-		cancel_delayed_work(&hdev->cmd_timer);
-
-	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
-		atomic_set(&hdev->cmd_cnt, 1);
+	handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
 
 	/* Indicate request completion if the command failed. Also, if
 	 * we're not waiting for a special event and we get a success

From b0e56db78744000a26b03fb442d6f944f68a8386 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 15 Apr 2021 16:49:28 +0800
Subject: [PATCH 2908/3292] Bluetooth: 6lowpan: remove unused function

Fix the following clang warning:

net/bluetooth/6lowpan.c:913:20: warning: unused function 'bdaddr_type'
[-Wunused-function].

net/bluetooth/6lowpan.c:106:35: warning: unused function
'peer_lookup_ba' [-Wunused-function].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/6lowpan.c | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d5befa061aa2..29c96bc5733f 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -103,34 +103,6 @@ static inline bool peer_del(struct lowpan_btle_dev *dev,
 	return false;
 }
 
-static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_btle_dev *dev,
-						 bdaddr_t *ba, __u8 type)
-{
-	struct lowpan_peer *peer;
-
-	BT_DBG("peers %d addr %pMR type %d", atomic_read(&dev->peer_count),
-	       ba, type);
-
-	rcu_read_lock();
-
-	list_for_each_entry_rcu(peer, &dev->peers, list) {
-		BT_DBG("dst addr %pMR dst type %d",
-		       &peer->chan->dst, peer->chan->dst_type);
-
-		if (bacmp(&peer->chan->dst, ba))
-			continue;
-
-		if (type == peer->chan->dst_type) {
-			rcu_read_unlock();
-			return peer;
-		}
-	}
-
-	rcu_read_unlock();
-
-	return NULL;
-}
-
 static inline struct lowpan_peer *
 __peer_lookup_chan(struct lowpan_btle_dev *dev, struct l2cap_chan *chan)
 {
@@ -907,14 +879,6 @@ static const struct l2cap_ops bt_6lowpan_chan_ops = {
 	.set_shutdown		= l2cap_chan_no_set_shutdown,
 };
 
-static inline __u8 bdaddr_type(__u8 type)
-{
-	if (type == ADDR_LE_DEV_PUBLIC)
-		return BDADDR_LE_PUBLIC;
-	else
-		return BDADDR_LE_RANDOM;
-}
-
 static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
 {
 	struct l2cap_chan *chan;

From 06d213d8a89a6f55b708422c3dda2b22add10748 Mon Sep 17 00:00:00 2001
From: Kiran K <kiran.k@intel.com>
Date: Thu, 8 Apr 2021 22:31:59 +0530
Subject: [PATCH 2909/3292] Bluetooth: Fix alt settings for incoming SCO with
 transparent coding format

For incoming SCO connection with transparent coding format, alt setting
of CVSD is getting applied instead of Transparent.

Before fix:
< HCI Command: Accept Synchron.. (0x01|0x0029) plen 21  #2196 [hci0] 321.342548
        Address: 1C:CC:D6:E2:EA:80 (Xiaomi Communications Co Ltd)
        Transmit bandwidth: 8000
        Receive bandwidth: 8000
        Max latency: 13
        Setting: 0x0003
          Input Coding: Linear
          Input Data Format: 1's complement
          Input Sample Size: 8-bit
          # of bits padding at MSB: 0
          Air Coding Format: Transparent Data
        Retransmission effort: Optimize for link quality (0x02)
        Packet type: 0x003f
          HV1 may be used
          HV2 may be used
          HV3 may be used
          EV3 may be used
          EV4 may be used
          EV5 may be used
> HCI Event: Command Status (0x0f) plen 4               #2197 [hci0] 321.343585
      Accept Synchronous Connection Request (0x01|0x0029) ncmd 1
        Status: Success (0x00)
> HCI Event: Synchronous Connect Comp.. (0x2c) plen 17  #2198 [hci0] 321.351666
        Status: Success (0x00)
        Handle: 257
        Address: 1C:CC:D6:E2:EA:80 (Xiaomi Communications Co Ltd)
        Link type: eSCO (0x02)
        Transmission interval: 0x0c
        Retransmission window: 0x04
        RX packet length: 60
        TX packet length: 60
        Air mode: Transparent (0x03)
........
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2336 [hci0] 321.383655
< SCO Data TX: Handle 257 flags 0x00 dlen 60            #2337 [hci0] 321.389558
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2338 [hci0] 321.393615
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2339 [hci0] 321.393618
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2340 [hci0] 321.393618
< SCO Data TX: Handle 257 flags 0x00 dlen 60            #2341 [hci0] 321.397070
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2342 [hci0] 321.403622
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2343 [hci0] 321.403625
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2344 [hci0] 321.403625
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2345 [hci0] 321.403625
< SCO Data TX: Handle 257 flags 0x00 dlen 60            #2346 [hci0] 321.404569
< SCO Data TX: Handle 257 flags 0x00 dlen 60            #2347 [hci0] 321.412091
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2348 [hci0] 321.413626
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2349 [hci0] 321.413630
> SCO Data RX: Handle 257 flags 0x00 dlen 48            #2350 [hci0] 321.413630
< SCO Data TX: Handle 257 flags 0x00 dlen 60            #2351 [hci0] 321.419674

After fix:

< HCI Command: Accept Synchronou.. (0x01|0x0029) plen 21  #309 [hci0] 49.439693
        Address: 1C:CC:D6:E2:EA:80 (Xiaomi Communications Co Ltd)
        Transmit bandwidth: 8000
        Receive bandwidth: 8000
        Max latency: 13
        Setting: 0x0003
          Input Coding: Linear
          Input Data Format: 1's complement
          Input Sample Size: 8-bit
          # of bits padding at MSB: 0
          Air Coding Format: Transparent Data
        Retransmission effort: Optimize for link quality (0x02)
        Packet type: 0x003f
          HV1 may be used
          HV2 may be used
          HV3 may be used
          EV3 may be used
          EV4 may be used
          EV5 may be used
> HCI Event: Command Status (0x0f) plen 4                 #310 [hci0] 49.440308
      Accept Synchronous Connection Request (0x01|0x0029) ncmd 1
        Status: Success (0x00)
> HCI Event: Synchronous Connect Complete (0x2c) plen 17  #311 [hci0] 49.449308
        Status: Success (0x00)
        Handle: 257
        Address: 1C:CC:D6:E2:EA:80 (Xiaomi Communications Co Ltd)
        Link type: eSCO (0x02)
        Transmission interval: 0x0c
        Retransmission window: 0x04
        RX packet length: 60
        TX packet length: 60
        Air mode: Transparent (0x03)
< SCO Data TX: Handle 257 flags 0x00 dlen 60              #312 [hci0] 49.450421
< SCO Data TX: Handle 257 flags 0x00 dlen 60              #313 [hci0] 49.457927
> HCI Event: Max Slots Change (0x1b) plen 3               #314 [hci0] 49.460345
        Handle: 256
        Max slots: 5
< SCO Data TX: Handle 257 flags 0x00 dlen 60              #315 [hci0] 49.465453
> SCO Data RX: Handle 257 flags 0x00 dlen 60              #316 [hci0] 49.470502
> SCO Data RX: Handle 257 flags 0x00 dlen 60              #317 [hci0] 49.470519
< SCO Data TX: Handle 257 flags 0x00 dlen 60              #318 [hci0] 49.472996
> SCO Data RX: Handle 257 flags 0x00 dlen 60              #319 [hci0] 49.480412
< SCO Data TX: Handle 257 flags 0x00 dlen 60              #320 [hci0] 49.480492
< SCO Data TX: Handle 257 flags 0x00 dlen 60              #321 [hci0] 49.487989
> SCO Data RX: Handle 257 flags 0x00 dlen 60              #322 [hci0] 49.490303
< SCO Data TX: Handle 257 flags 0x00 dlen 60              #323 [hci0] 49.495496
> SCO Data RX: Handle 257 flags 0x00 dlen 60              #324 [hci0] 49.500304
> SCO Data RX: Handle 257 flags 0x00 dlen 60              #325 [hci0] 49.500311

Signed-off-by: Kiran K <kiran.k@intel.com>
Signed-off-by: Lokendra Singh <lokendra.singh@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 18339ebc5959..d73950441938 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4413,12 +4413,12 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 
 	bt_dev_dbg(hdev, "SCO connected with air mode: %02x", ev->air_mode);
 
-	switch (conn->setting & SCO_AIRMODE_MASK) {
-	case SCO_AIRMODE_CVSD:
+	switch (ev->air_mode) {
+	case 0x02:
 		if (hdev->notify)
 			hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_CVSD);
 		break;
-	case SCO_AIRMODE_TRANSP:
+	case 0x03:
 		if (hdev->notify)
 			hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_TRANSP);
 		break;

From b43ca511178ed0ab6fd2405df28cf9e100273020 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 7 May 2021 14:27:33 +0200
Subject: [PATCH 2910/3292] Bluetooth: btqca: Don't modify firmware contents
 in-place

struct firmware::data is marked const, and when the firmware is
compressed with xz (default at least with Fedora) it's mapped read-only
which results in a crash:

BUG: unable to handle page fault for address: ffffae57c0ca5047
PGD 100000067 P4D 100000067 PUD 1001ce067 PMD 10165a067 PTE 8000000112bba161
Oops: 0003 [#1] SMP NOPTI
CPU: 3 PID: 204 Comm: kworker/u17:0 Not tainted 5.12.1-test+ #1
Hardware name: Dell Inc. XPS 13 9310/0F7M4C, BIOS 1.2.5 12/10/2020
Workqueue: hci0 hci_power_on [bluetooth]
RIP: 0010:qca_download_firmware+0x27c/0x4e0 [btqca]
Code: 1b 75 04 80 48 0c 01 0f b7 c6 8d 54 02 0c 41 39 d7 0f 8e 62 fe ff ff 48 63 c2 4c 01 e8 0f b7 38 0f b7 70 02 66 83 ff 11 75 d3 <80> 48 0c 80 41 83 fc 03 7e 6e 88 58 0d eb ce 41 0f b6 45 0e 48 8b
RSP: 0018:ffffae57c08dfc68 EFLAGS: 00010246
RAX: ffffae57c0ca503b RBX: 000000000000000e RCX: 0000000000000000
RDX: 0000000000000037 RSI: 0000000000000006 RDI: 0000000000000011
RBP: ffff978d9949e000 R08: ffff978d84ed7540 R09: ffffae57c0ca5000
R10: 000000000010cd00 R11: 0000000000000001 R12: 0000000000000005
R13: ffffae57c0ca5004 R14: ffff978d98ca8680 R15: 00000000000016a9
FS:  0000000000000000(0000) GS:ffff9794ef6c0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffae57c0ca5047 CR3: 0000000113d5a004 CR4: 0000000000770ee0
PKRU: 55555554
Call Trace:
 qca_uart_setup+0x2cb/0x1390 [btqca]
 ? qca_read_soc_version+0x136/0x220 [btqca]
 qca_setup+0x288/0xab0 [hci_uart]
 hci_dev_do_open+0x1f3/0x780 [bluetooth]
 ? try_to_wake_up+0x1c1/0x4f0
 hci_power_on+0x3f/0x200 [bluetooth]
 process_one_work+0x1ec/0x380
 worker_thread+0x53/0x3e0
 ? process_one_work+0x380/0x380
 kthread+0x11b/0x140
 ? kthread_associate_blkcg+0xa0/0xa0
 ret_from_fork+0x1f/0x30
Modules linked in: llc ip_set nf_tables nfnetlink snd_soc_skl_hda_dsp(+) ip6table_filter snd_soc_hdac_hdmi ip6_tables qrtr_mhi iptable_filter snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic s>
 dell_wmi_sysman(+) dell_smbios snd dcdbas mhi vfat videobuf2_vmalloc i2c_i801 videobuf2_memops videobuf2_v4l2 dell_wmi_descriptor fat wmi_bmof soundcore i2c_smbus videobuf2_common libarc4 mei_me mei hid_se>
 i2c_hid_acpi i2c_hid video pinctrl_tigerlake fuse
CR2: ffffae57c0ca5047

This also seems to fix a failure to suspend due to the firmware
download on bootup getting interrupted by the crash:

Bluetooth: hci0: SSR or FW download time out
PM: dpm_run_callback(): acpi_subsys_suspend+0x0/0x60 returns -110
PM: Device serial0-0 failed to suspend: error -110
PM: Some devices failed to suspend, or early wake event detected

Fixes: 83e8196 ("Bluetooth: btqca: Introduce generic QCA ROME support")
Cc: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Cc: stable@vger.kernel.org
Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btqca.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 25114f0d1319..bd71dfc9c974 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -183,7 +183,7 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
 EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd);
 
 static void qca_tlv_check_data(struct qca_fw_config *config,
-		const struct firmware *fw, enum qca_btsoc_type soc_type)
+		u8 *fw_data, enum qca_btsoc_type soc_type)
 {
 	const u8 *data;
 	u32 type_len;
@@ -194,7 +194,7 @@ static void qca_tlv_check_data(struct qca_fw_config *config,
 	struct tlv_type_nvm *tlv_nvm;
 	uint8_t nvm_baud_rate = config->user_baud_rate;
 
-	tlv = (struct tlv_type_hdr *)fw->data;
+	tlv = (struct tlv_type_hdr *)fw_data;
 
 	type_len = le32_to_cpu(tlv->type_len);
 	length = (type_len >> 8) & 0x00ffffff;
@@ -390,8 +390,9 @@ static int qca_download_firmware(struct hci_dev *hdev,
 				 enum qca_btsoc_type soc_type)
 {
 	const struct firmware *fw;
+	u8 *data;
 	const u8 *segment;
-	int ret, remain, i = 0;
+	int ret, size, remain, i = 0;
 
 	bt_dev_info(hdev, "QCA Downloading %s", config->fwname);
 
@@ -402,10 +403,22 @@ static int qca_download_firmware(struct hci_dev *hdev,
 		return ret;
 	}
 
-	qca_tlv_check_data(config, fw, soc_type);
+	size = fw->size;
+	data = vmalloc(fw->size);
+	if (!data) {
+		bt_dev_err(hdev, "QCA Failed to allocate memory for file: %s",
+			   config->fwname);
+		release_firmware(fw);
+		return -ENOMEM;
+	}
 
-	segment = fw->data;
-	remain = fw->size;
+	memcpy(data, fw->data, size);
+	release_firmware(fw);
+
+	qca_tlv_check_data(config, data, soc_type);
+
+	segment = data;
+	remain = size;
 	while (remain > 0) {
 		int segsize = min(MAX_SIZE_PER_TLV_SEGMENT, remain);
 
@@ -435,7 +448,7 @@ static int qca_download_firmware(struct hci_dev *hdev,
 		ret = qca_inject_cmd_complete_event(hdev);
 
 out:
-	release_firmware(fw);
+	vfree(data);
 
 	return ret;
 }

From 27f4d1f214ae4a3364623f212ea2d45f772d35b1 Mon Sep 17 00:00:00 2001
From: Mikhail Rudenko <mike.rudenko@gmail.com>
Date: Mon, 10 May 2021 02:28:37 +0300
Subject: [PATCH 2911/3292] Bluetooth: btbcm: Add entry for BCM43430B0 UART
 Bluetooth

This patch adds the device ID for the BCM43430B0 module, found e.g. in
certain revisions of AMPAK AP6212 chip. The required firmware file is
named 'BCM43430B0.hcd'.

Signed-off-by: Mikhail Rudenko <mike.rudenko@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 1b9743b7f2ef..e5d706ed55ea 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -404,6 +404,7 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
 	{ 0x4217, "BCM4329B1"   },	/* 002.002.023 */
 	{ 0x6106, "BCM4359C0"	},	/* 003.001.006 */
 	{ 0x4106, "BCM4335A0"	},	/* 002.001.006 */
+	{ 0x410c, "BCM43430B0"	},	/* 002.001.012 */
 	{ }
 };
 

From 0ea9fd001a14ebc294f112b0361a4e601551d508 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 14 May 2021 15:14:52 +0800
Subject: [PATCH 2912/3292] Bluetooth: Shutdown controller after workqueues are
 flushed or cancelled

Rfkill block and unblock Intel USB Bluetooth [8087:0026] may make it
stops working:
[  509.691509] Bluetooth: hci0: HCI reset during shutdown failed
[  514.897584] Bluetooth: hci0: MSFT filter_enable is already on
[  530.044751] usb 3-10: reset full-speed USB device number 5 using xhci_hcd
[  545.660350] usb 3-10: device descriptor read/64, error -110
[  561.283530] usb 3-10: device descriptor read/64, error -110
[  561.519682] usb 3-10: reset full-speed USB device number 5 using xhci_hcd
[  566.686650] Bluetooth: hci0: unexpected event for opcode 0x0500
[  568.752452] Bluetooth: hci0: urb 0000000096cd309b failed to resubmit (113)
[  578.797955] Bluetooth: hci0: Failed to read MSFT supported features (-110)
[  586.286565] Bluetooth: hci0: urb 00000000c522f633 failed to resubmit (113)
[  596.215302] Bluetooth: hci0: Failed to read MSFT supported features (-110)

Or kernel panics because other workqueues already freed skb:
[ 2048.663763] BUG: kernel NULL pointer dereference, address: 0000000000000000
[ 2048.663775] #PF: supervisor read access in kernel mode
[ 2048.663779] #PF: error_code(0x0000) - not-present page
[ 2048.663782] PGD 0 P4D 0
[ 2048.663787] Oops: 0000 [#1] SMP NOPTI
[ 2048.663793] CPU: 3 PID: 4491 Comm: rfkill Tainted: G        W         5.13.0-rc1-next-20210510+ #20
[ 2048.663799] Hardware name: HP HP EliteBook 850 G8 Notebook PC/8846, BIOS T76 Ver. 01.01.04 12/02/2020
[ 2048.663801] RIP: 0010:__skb_ext_put+0x6/0x50
[ 2048.663814] Code: 8b 1b 48 85 db 75 db 5b 41 5c 5d c3 be 01 00 00 00 e8 de 13 c0 ff eb e7 be 02 00 00 00 e8 d2 13 c0 ff eb db 0f 1f 44 00 00 55 <8b> 07 48 89 e5 83 f8 01 74 14 b8 ff ff ff ff f0 0f c1
07 83 f8 01
[ 2048.663819] RSP: 0018:ffffc1d105b6fd80 EFLAGS: 00010286
[ 2048.663824] RAX: 0000000000000000 RBX: ffff9d9ac5649000 RCX: 0000000000000000
[ 2048.663827] RDX: ffffffffc0d1daf6 RSI: 0000000000000206 RDI: 0000000000000000
[ 2048.663830] RBP: ffffc1d105b6fd98 R08: 0000000000000001 R09: ffff9d9ace8ceac0
[ 2048.663834] R10: ffff9d9ace8ceac0 R11: 0000000000000001 R12: ffff9d9ac5649000
[ 2048.663838] R13: 0000000000000000 R14: 00007ffe0354d650 R15: 0000000000000000
[ 2048.663843] FS:  00007fe02ab19740(0000) GS:ffff9d9e5f8c0000(0000) knlGS:0000000000000000
[ 2048.663849] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2048.663853] CR2: 0000000000000000 CR3: 0000000111a52004 CR4: 0000000000770ee0
[ 2048.663856] PKRU: 55555554
[ 2048.663859] Call Trace:
[ 2048.663865]  ? skb_release_head_state+0x5e/0x80
[ 2048.663873]  kfree_skb+0x2f/0xb0
[ 2048.663881]  btusb_shutdown_intel_new+0x36/0x60 [btusb]
[ 2048.663905]  hci_dev_do_close+0x48c/0x5e0 [bluetooth]
[ 2048.663954]  ? __cond_resched+0x1a/0x50
[ 2048.663962]  hci_rfkill_set_block+0x56/0xa0 [bluetooth]
[ 2048.664007]  rfkill_set_block+0x98/0x170
[ 2048.664016]  rfkill_fop_write+0x136/0x1e0
[ 2048.664022]  vfs_write+0xc7/0x260
[ 2048.664030]  ksys_write+0xb1/0xe0
[ 2048.664035]  ? exit_to_user_mode_prepare+0x37/0x1c0
[ 2048.664042]  __x64_sys_write+0x1a/0x20
[ 2048.664048]  do_syscall_64+0x40/0xb0
[ 2048.664055]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 2048.664060] RIP: 0033:0x7fe02ac23c27
[ 2048.664066] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[ 2048.664070] RSP: 002b:00007ffe0354d638 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 2048.664075] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fe02ac23c27
[ 2048.664078] RDX: 0000000000000008 RSI: 00007ffe0354d650 RDI: 0000000000000003
[ 2048.664081] RBP: 0000000000000000 R08: 0000559b05998440 R09: 0000559b05998440
[ 2048.664084] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
[ 2048.664086] R13: 0000000000000000 R14: ffffffff00000000 R15: 00000000ffffffff

So move the shutdown callback to a place where workqueues are either
flushed or cancelled to resolve the issue.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 572f2362ddb7..cdf147899c50 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1721,14 +1721,6 @@ int hci_dev_do_close(struct hci_dev *hdev)
 
 	BT_DBG("%s %p", hdev->name, hdev);
 
-	if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
-	    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
-	    test_bit(HCI_UP, &hdev->flags)) {
-		/* Execute vendor specific shutdown routine */
-		if (hdev->shutdown)
-			hdev->shutdown(hdev);
-	}
-
 	cancel_delayed_work(&hdev->power_off);
 	cancel_delayed_work(&hdev->ncmd_timer);
 
@@ -1806,6 +1798,14 @@ int hci_dev_do_close(struct hci_dev *hdev)
 		clear_bit(HCI_INIT, &hdev->flags);
 	}
 
+	if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+	    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+	    test_bit(HCI_UP, &hdev->flags)) {
+		/* Execute vendor specific shutdown routine */
+		if (hdev->shutdown)
+			hdev->shutdown(hdev);
+	}
+
 	/* flush cmd  work */
 	flush_work(&hdev->cmd_work);
 

From d8f97da1b92d2fe89d51c673ecf80c4016119e5c Mon Sep 17 00:00:00 2001
From: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Date: Tue, 18 May 2021 22:04:42 +0530
Subject: [PATCH 2913/3292] Bluetooth: hci_qca: Add support for QTI Bluetooth
 chip wcn6750

Added regulators,GPIOs and changes required to power on/off wcn6750.
Added support for firmware download for wcn6750.

Changes done in detail:
1. Added regulators and corresponding current values.
2. Added sw_ctrl GPIO pin which is output from BT SoC and indicates
   status of clock supply to BT SoC.
3. Added inline function to check if the SoC type is wcn6750.
4. Modified the function qca_wcn3990_init() to support wcn6750 and
   renamed it to qca_regulator_init().

Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btqca.c   |  13 +++-
 drivers/bluetooth/btqca.h   |  11 ++++
 drivers/bluetooth/hci_qca.c | 114 +++++++++++++++++++++++++++++-------
 3 files changed, 115 insertions(+), 23 deletions(-)

diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index bd71dfc9c974..95742494dc40 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -530,6 +530,11 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 			    (soc_ver & 0x0000000f);
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/htbtfw%02x.tlv", rom_ver);
+	} else if (soc_type == QCA_WCN6750) {
+		rom_ver = ((soc_ver & 0x00000f00) >> 0x04) |
+			    (soc_ver & 0x0000000f);
+		snprintf(config.fwname, sizeof(config.fwname),
+			 "qca/msbtfw%02x.tlv", rom_ver);
 	} else {
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/rampatch_%08x.bin", soc_ver);
@@ -561,6 +566,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 	else if (soc_type == QCA_QCA6390)
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/htnv%02x.bin", rom_ver);
+	else if (soc_type == QCA_WCN6750)
+		snprintf(config.fwname, sizeof(config.fwname),
+			 "qca/msnv%02x.bin", rom_ver);
 	else
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/nvm_%08x.bin", soc_ver);
@@ -577,13 +585,14 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 			return err;
 	}
 
-	/* WCN399x supports the Microsoft vendor extension with 0xFD70 as the
+	/* WCN399x and WCN6750 supports the Microsoft vendor extension with 0xFD70 as the
 	 * VsMsftOpCode.
 	 */
 	switch (soc_type) {
 	case QCA_WCN3990:
 	case QCA_WCN3991:
 	case QCA_WCN3998:
+	case QCA_WCN6750:
 		hci_set_msft_opcode(hdev, 0xFD70);
 		break;
 	default:
@@ -597,7 +606,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 		return err;
 	}
 
-	if (soc_type == QCA_WCN3991) {
+	if (soc_type == QCA_WCN3991 || soc_type == QCA_WCN6750) {
 		/* get fw build info */
 		err = qca_read_fw_build_info(hdev);
 		if (err < 0)
diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h
index b19add7675a4..6ecd329bad41 100644
--- a/drivers/bluetooth/btqca.h
+++ b/drivers/bluetooth/btqca.h
@@ -143,6 +143,7 @@ enum qca_btsoc_type {
 	QCA_WCN3998,
 	QCA_WCN3991,
 	QCA_QCA6390,
+	QCA_WCN6750,
 };
 
 #if IS_ENABLED(CONFIG_BT_QCA)
@@ -160,6 +161,11 @@ static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type)
 	return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3991 ||
 	       soc_type == QCA_WCN3998;
 }
+static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type)
+{
+	return soc_type == QCA_WCN6750;
+}
+
 #else
 
 static inline int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr)
@@ -192,6 +198,11 @@ static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type)
 	return false;
 }
 
+static inline bool qca_is_wcn6750(enum qca_btsoc_type soc_type)
+{
+	return false;
+}
+
 static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
 {
 	return -EOPNOTSUPP;
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index dc6551d65912..53deea2eb7b4 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -218,6 +218,7 @@ struct qca_power {
 struct qca_serdev {
 	struct hci_uart	 serdev_hu;
 	struct gpio_desc *bt_en;
+	struct gpio_desc *sw_ctrl;
 	struct clk	 *susclk;
 	enum qca_btsoc_type btsoc_type;
 	struct qca_power *bt_power;
@@ -604,7 +605,8 @@ static int qca_open(struct hci_uart *hu)
 	if (hu->serdev) {
 		qcadev = serdev_device_get_drvdata(hu->serdev);
 
-		if (qca_is_wcn399x(qcadev->btsoc_type))
+		if (qca_is_wcn399x(qcadev->btsoc_type) ||
+		    qca_is_wcn6750(qcadev->btsoc_type))
 			hu->init_speed = qcadev->init_speed;
 
 		if (qcadev->oper_speed)
@@ -1308,7 +1310,8 @@ static int qca_set_baudrate(struct hci_dev *hdev, uint8_t baudrate)
 		      msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS));
 
 	/* Give the controller time to process the request */
-	if (qca_is_wcn399x(qca_soc_type(hu)))
+	if (qca_is_wcn399x(qca_soc_type(hu)) ||
+	    qca_is_wcn6750(qca_soc_type(hu)))
 		usleep_range(1000, 10000);
 	else
 		msleep(300);
@@ -1384,7 +1387,8 @@ static unsigned int qca_get_speed(struct hci_uart *hu,
 
 static int qca_check_speeds(struct hci_uart *hu)
 {
-	if (qca_is_wcn399x(qca_soc_type(hu))) {
+	if (qca_is_wcn399x(qca_soc_type(hu)) ||
+	    qca_is_wcn6750(qca_soc_type(hu))) {
 		if (!qca_get_speed(hu, QCA_INIT_SPEED) &&
 		    !qca_get_speed(hu, QCA_OPER_SPEED))
 			return -EINVAL;
@@ -1417,7 +1421,8 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
 		/* Disable flow control for wcn3990 to deassert RTS while
 		 * changing the baudrate of chip and host.
 		 */
-		if (qca_is_wcn399x(soc_type))
+		if (qca_is_wcn399x(soc_type) ||
+		    qca_is_wcn6750(soc_type))
 			hci_uart_set_flow_control(hu, true);
 
 		if (soc_type == QCA_WCN3990) {
@@ -1434,7 +1439,8 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
 		host_set_baudrate(hu, speed);
 
 error:
-		if (qca_is_wcn399x(soc_type))
+		if (qca_is_wcn399x(soc_type) ||
+		    qca_is_wcn6750(soc_type))
 			hci_uart_set_flow_control(hu, false);
 
 		if (soc_type == QCA_WCN3990) {
@@ -1585,10 +1591,12 @@ static bool qca_prevent_wake(struct hci_dev *hdev)
 	return !wakeup;
 }
 
-static int qca_wcn3990_init(struct hci_uart *hu)
+static int qca_regulator_init(struct hci_uart *hu)
 {
+	enum qca_btsoc_type soc_type = qca_soc_type(hu);
 	struct qca_serdev *qcadev;
 	int ret;
+	bool sw_ctrl_state;
 
 	/* Check for vregs status, may be hci down has turned
 	 * off the voltage regulator.
@@ -1607,16 +1615,33 @@ static int qca_wcn3990_init(struct hci_uart *hu)
 		}
 	}
 
-	/* Forcefully enable wcn3990 to enter in to boot mode. */
-	host_set_baudrate(hu, 2400);
-	ret = qca_send_power_pulse(hu, false);
-	if (ret)
-		return ret;
+	if (qca_is_wcn399x(soc_type)) {
+		/* Forcefully enable wcn399x to enter in to boot mode. */
+		host_set_baudrate(hu, 2400);
+		ret = qca_send_power_pulse(hu, false);
+		if (ret)
+			return ret;
+	}
+
+	/* For wcn6750 need to enable gpio bt_en */
+	if (qcadev->bt_en) {
+		gpiod_set_value_cansleep(qcadev->bt_en, 0);
+		msleep(50);
+		gpiod_set_value_cansleep(qcadev->bt_en, 1);
+		msleep(50);
+		if (qcadev->sw_ctrl) {
+			sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl);
+			bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state);
+		}
+	}
 
 	qca_set_speed(hu, QCA_INIT_SPEED);
-	ret = qca_send_power_pulse(hu, true);
-	if (ret)
-		return ret;
+
+	if (qca_is_wcn399x(soc_type)) {
+		ret = qca_send_power_pulse(hu, true);
+		if (ret)
+			return ret;
+	}
 
 	/* Now the device is in ready state to communicate with host.
 	 * To sync host with device we need to reopen port.
@@ -1649,8 +1674,9 @@ static int qca_power_on(struct hci_dev *hdev)
 	if (!hu->serdev)
 		return 0;
 
-	if (qca_is_wcn399x(soc_type)) {
-		ret = qca_wcn3990_init(hu);
+	if (qca_is_wcn399x(soc_type) ||
+	    qca_is_wcn6750(soc_type)) {
+		ret = qca_regulator_init(hu);
 	} else {
 		qcadev = serdev_device_get_drvdata(hu->serdev);
 		if (qcadev->bt_en) {
@@ -1689,7 +1715,8 @@ static int qca_setup(struct hci_uart *hu)
 	set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
 
 	bt_dev_info(hdev, "setting up %s",
-		qca_is_wcn399x(soc_type) ? "wcn399x" : "ROME/QCA6390");
+		qca_is_wcn399x(soc_type) ? "wcn399x" :
+		(soc_type == QCA_WCN6750) ? "wcn6750" : "ROME/QCA6390");
 
 	qca->memdump_state = QCA_MEMDUMP_IDLE;
 
@@ -1700,7 +1727,8 @@ retry:
 
 	clear_bit(QCA_SSR_TRIGGERED, &qca->flags);
 
-	if (qca_is_wcn399x(soc_type)) {
+	if (qca_is_wcn399x(soc_type) ||
+	    qca_is_wcn6750(soc_type)) {
 		set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
 
 		ret = qca_read_soc_version(hdev, &ver, soc_type);
@@ -1720,7 +1748,8 @@ retry:
 		qca_baudrate = qca_get_baudrate_value(speed);
 	}
 
-	if (!qca_is_wcn399x(soc_type)) {
+	if (!(qca_is_wcn399x(soc_type) ||
+	     qca_is_wcn6750(soc_type))) {
 		/* Get QCA version information */
 		ret = qca_read_soc_version(hdev, &ver, soc_type);
 		if (ret)
@@ -1828,12 +1857,30 @@ static const struct qca_device_data qca_soc_data_qca6390 = {
 	.num_vregs = 0,
 };
 
+static const struct qca_device_data qca_soc_data_wcn6750 = {
+	.soc_type = QCA_WCN6750,
+	.vregs = (struct qca_vreg []) {
+		{ "vddio", 5000 },
+		{ "vddaon", 26000 },
+		{ "vddbtcxmx", 126000 },
+		{ "vddrfacmn", 12500 },
+		{ "vddrfa0p8", 102000 },
+		{ "vddrfa1p7", 302000 },
+		{ "vddrfa1p2", 257000 },
+		{ "vddrfa2p2", 1700000 },
+		{ "vddasd", 200 },
+	},
+	.num_vregs = 9,
+	.capabilities = QCA_CAP_WIDEBAND_SPEECH | QCA_CAP_VALID_LE_STATES,
+};
+
 static void qca_power_shutdown(struct hci_uart *hu)
 {
 	struct qca_serdev *qcadev;
 	struct qca_data *qca = hu->priv;
 	unsigned long flags;
 	enum qca_btsoc_type soc_type = qca_soc_type(hu);
+	bool sw_ctrl_state;
 
 	/* From this point we go into power off state. But serial port is
 	 * still open, stop queueing the IBS data and flush all the buffered
@@ -1856,6 +1903,14 @@ static void qca_power_shutdown(struct hci_uart *hu)
 		host_set_baudrate(hu, 2400);
 		qca_send_power_pulse(hu, false);
 		qca_regulator_disable(qcadev);
+	} else if (soc_type == QCA_WCN6750) {
+		gpiod_set_value_cansleep(qcadev->bt_en, 0);
+		msleep(100);
+		qca_regulator_disable(qcadev);
+		if (qcadev->sw_ctrl) {
+			sw_ctrl_state = gpiod_get_value_cansleep(qcadev->sw_ctrl);
+			bt_dev_dbg(hu->hdev, "SW_CTRL is %d", sw_ctrl_state);
+		}
 	} else if (qcadev->bt_en) {
 		gpiod_set_value_cansleep(qcadev->bt_en, 0);
 	}
@@ -1978,7 +2033,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 	if (!qcadev->oper_speed)
 		BT_DBG("UART will pick default operating speed");
 
-	if (data && qca_is_wcn399x(data->soc_type)) {
+	if (data &&
+	    (qca_is_wcn399x(data->soc_type) ||
+	    qca_is_wcn6750(data->soc_type))) {
 		qcadev->btsoc_type = data->soc_type;
 		qcadev->bt_power = devm_kzalloc(&serdev->dev,
 						sizeof(struct qca_power),
@@ -1996,6 +2053,18 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 
 		qcadev->bt_power->vregs_on = false;
 
+		qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
+					       GPIOD_OUT_LOW);
+		if (!qcadev->bt_en && data->soc_type == QCA_WCN6750) {
+			dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
+			power_ctrl_enabled = false;
+		}
+
+		qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
+					       GPIOD_IN);
+		if (!qcadev->sw_ctrl && data->soc_type == QCA_WCN6750)
+			dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n");
+
 		qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
 		if (IS_ERR(qcadev->susclk)) {
 			dev_err(&serdev->dev, "failed to acquire clk\n");
@@ -2068,7 +2137,9 @@ static void qca_serdev_remove(struct serdev_device *serdev)
 	struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
 	struct qca_power *power = qcadev->bt_power;
 
-	if (qca_is_wcn399x(qcadev->btsoc_type) && power->vregs_on)
+	if ((qca_is_wcn399x(qcadev->btsoc_type) ||
+	     qca_is_wcn6750(qcadev->btsoc_type)) &&
+	     power->vregs_on)
 		qca_power_shutdown(&qcadev->serdev_hu);
 	else if (qcadev->susclk)
 		clk_disable_unprepare(qcadev->susclk);
@@ -2244,6 +2315,7 @@ static const struct of_device_id qca_bluetooth_of_match[] = {
 	{ .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990},
 	{ .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991},
 	{ .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998},
+	{ .compatible = "qcom,wcn6750-bt", .data = &qca_soc_data_wcn6750},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match);

From ecf6b2d9566606cd78bdc0af36e5a938624b13d1 Mon Sep 17 00:00:00 2001
From: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Date: Tue, 18 May 2021 22:04:43 +0530
Subject: [PATCH 2914/3292] Bluetooth: btqca: Add support for firmware image
 with mbn type for WCN6750

1. Added support to download firmware image with mbn type for wcn6750
   as it supports mbn type image.
2. If mbn type image is not present then check for tlv type image.
3. Added debug logs for mbn type image.

Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btqca.c | 69 +++++++++++++++++++++++++++++----------
 drivers/bluetooth/btqca.h |  3 +-
 2 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 95742494dc40..3210283eb08f 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -182,7 +182,8 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
 }
 EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd);
 
-static void qca_tlv_check_data(struct qca_fw_config *config,
+static void qca_tlv_check_data(struct hci_dev *hdev,
+			       struct qca_fw_config *config,
 		u8 *fw_data, enum qca_btsoc_type soc_type)
 {
 	const u8 *data;
@@ -194,19 +195,21 @@ static void qca_tlv_check_data(struct qca_fw_config *config,
 	struct tlv_type_nvm *tlv_nvm;
 	uint8_t nvm_baud_rate = config->user_baud_rate;
 
-	tlv = (struct tlv_type_hdr *)fw_data;
-
-	type_len = le32_to_cpu(tlv->type_len);
-	length = (type_len >> 8) & 0x00ffffff;
-
-	BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff);
-	BT_DBG("Length\t\t : %d bytes", length);
-
 	config->dnld_mode = QCA_SKIP_EVT_NONE;
 	config->dnld_type = QCA_SKIP_EVT_NONE;
 
 	switch (config->type) {
+	case ELF_TYPE_PATCH:
+		config->dnld_mode = QCA_SKIP_EVT_VSE_CC;
+		config->dnld_type = QCA_SKIP_EVT_VSE_CC;
+
+		bt_dev_dbg(hdev, "File Class        : 0x%x", fw_data[4]);
+		bt_dev_dbg(hdev, "Data Encoding     : 0x%x", fw_data[5]);
+		bt_dev_dbg(hdev, "File version      : 0x%x", fw_data[6]);
+		break;
 	case TLV_TYPE_PATCH:
+		tlv = (struct tlv_type_hdr *)fw_data;
+		type_len = le32_to_cpu(tlv->type_len);
 		tlv_patch = (struct tlv_type_patch *)tlv->data;
 
 		/* For Rome version 1.1 to 3.1, all segment commands
@@ -218,6 +221,7 @@ static void qca_tlv_check_data(struct qca_fw_config *config,
 		config->dnld_mode = tlv_patch->download_mode;
 		config->dnld_type = config->dnld_mode;
 
+		BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff);
 		BT_DBG("Total Length           : %d bytes",
 		       le32_to_cpu(tlv_patch->total_size));
 		BT_DBG("Patch Data Length      : %d bytes",
@@ -243,6 +247,14 @@ static void qca_tlv_check_data(struct qca_fw_config *config,
 		break;
 
 	case TLV_TYPE_NVM:
+		tlv = (struct tlv_type_hdr *)fw_data;
+
+		type_len = le32_to_cpu(tlv->type_len);
+		length = (type_len >> 8) & 0x00ffffff;
+
+		BT_DBG("TLV Type\t\t : 0x%x", type_len & 0x000000ff);
+		BT_DBG("Length\t\t : %d bytes", length);
+
 		idx = 0;
 		data = tlv->data;
 		while (idx < length) {
@@ -387,7 +399,8 @@ static int qca_inject_cmd_complete_event(struct hci_dev *hdev)
 
 static int qca_download_firmware(struct hci_dev *hdev,
 				 struct qca_fw_config *config,
-				 enum qca_btsoc_type soc_type)
+				 enum qca_btsoc_type soc_type,
+				 u8 rom_ver)
 {
 	const struct firmware *fw;
 	u8 *data;
@@ -398,9 +411,27 @@ static int qca_download_firmware(struct hci_dev *hdev,
 
 	ret = request_firmware(&fw, config->fwname, &hdev->dev);
 	if (ret) {
-		bt_dev_err(hdev, "QCA Failed to request file: %s (%d)",
-			   config->fwname, ret);
-		return ret;
+		/* For WCN6750, if mbn file is not present then check for
+		 * tlv file.
+		 */
+		if (soc_type == QCA_WCN6750 && config->type == ELF_TYPE_PATCH) {
+			bt_dev_dbg(hdev, "QCA Failed to request file: %s (%d)",
+				   config->fwname, ret);
+			config->type = TLV_TYPE_PATCH;
+			snprintf(config->fwname, sizeof(config->fwname),
+				 "qca/msbtfw%02x.tlv", rom_ver);
+			bt_dev_info(hdev, "QCA Downloading %s", config->fwname);
+			ret = request_firmware(&fw, config->fwname, &hdev->dev);
+			if (ret) {
+				bt_dev_err(hdev, "QCA Failed to request file: %s (%d)",
+					   config->fwname, ret);
+				return ret;
+			}
+		} else {
+			bt_dev_err(hdev, "QCA Failed to request file: %s (%d)",
+				   config->fwname, ret);
+			return ret;
+		}
 	}
 
 	size = fw->size;
@@ -415,7 +446,7 @@ static int qca_download_firmware(struct hci_dev *hdev,
 	memcpy(data, fw->data, size);
 	release_firmware(fw);
 
-	qca_tlv_check_data(config, data, soc_type);
+	qca_tlv_check_data(hdev, config, data, soc_type);
 
 	segment = data;
 	remain = size;
@@ -533,14 +564,18 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 	} else if (soc_type == QCA_WCN6750) {
 		rom_ver = ((soc_ver & 0x00000f00) >> 0x04) |
 			    (soc_ver & 0x0000000f);
+		/* Choose mbn file by default.If mbn file is not found
+		 * then choose tlv file
+		 */
+		config.type = ELF_TYPE_PATCH;
 		snprintf(config.fwname, sizeof(config.fwname),
-			 "qca/msbtfw%02x.tlv", rom_ver);
+			 "qca/msbtfw%02x.mbn", rom_ver);
 	} else {
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/rampatch_%08x.bin", soc_ver);
 	}
 
-	err = qca_download_firmware(hdev, &config, soc_type);
+	err = qca_download_firmware(hdev, &config, soc_type, rom_ver);
 	if (err < 0) {
 		bt_dev_err(hdev, "QCA Failed to download patch (%d)", err);
 		return err;
@@ -573,7 +608,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/nvm_%08x.bin", soc_ver);
 
-	err = qca_download_firmware(hdev, &config, soc_type);
+	err = qca_download_firmware(hdev, &config, soc_type, rom_ver);
 	if (err < 0) {
 		bt_dev_err(hdev, "QCA Failed to download NVM (%d)", err);
 		return err;
diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h
index 6ecd329bad41..30afa7703afd 100644
--- a/drivers/bluetooth/btqca.h
+++ b/drivers/bluetooth/btqca.h
@@ -80,7 +80,8 @@ enum qca_tlv_dnld_mode {
 
 enum qca_tlv_type {
 	TLV_TYPE_PATCH = 1,
-	TLV_TYPE_NVM
+	TLV_TYPE_NVM,
+	ELF_TYPE_PATCH,
 };
 
 struct qca_fw_config {

From 99fba8e3f1d1fd60042187a90ba2381efc1833f7 Mon Sep 17 00:00:00 2001
From: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Date: Tue, 18 May 2021 22:04:44 +0530
Subject: [PATCH 2915/3292] Bluetooth: btqca: Moved extracting rom version info
 to common place

Moved extracting rom version info to common place as this code is
common in all if else ladder in qca_uart_setup.

Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btqca.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 3210283eb08f..be04d74037d2 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -546,24 +546,20 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 
 	config.user_baud_rate = baudrate;
 
+	/* Firmware files to download are based on ROM version.
+	 * ROM version is derived from last two bytes of soc_ver.
+	 */
+	rom_ver = ((soc_ver & 0x00000f00) >> 0x04) | (soc_ver & 0x0000000f);
+
 	/* Download rampatch file */
 	config.type = TLV_TYPE_PATCH;
 	if (qca_is_wcn399x(soc_type)) {
-		/* Firmware files to download are based on ROM version.
-		 * ROM version is derived from last two bytes of soc_ver.
-		 */
-		rom_ver = ((soc_ver & 0x00000f00) >> 0x04) |
-			    (soc_ver & 0x0000000f);
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/crbtfw%02x.tlv", rom_ver);
 	} else if (soc_type == QCA_QCA6390) {
-		rom_ver = ((soc_ver & 0x00000f00) >> 0x04) |
-			    (soc_ver & 0x0000000f);
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/htbtfw%02x.tlv", rom_ver);
 	} else if (soc_type == QCA_WCN6750) {
-		rom_ver = ((soc_ver & 0x00000f00) >> 0x04) |
-			    (soc_ver & 0x0000000f);
 		/* Choose mbn file by default.If mbn file is not found
 		 * then choose tlv file
 		 */

From d88c6de4f8b6e6f1b6c3e3a85d39106c83553bc9 Mon Sep 17 00:00:00 2001
From: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Date: Tue, 18 May 2021 22:04:45 +0530
Subject: [PATCH 2916/3292] dt-bindings: net: bluetooth: Convert Qualcomm BT
 binding to DT schema

Converted Qualcomm Bluetooth binidings to DT schema.

Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../bindings/net/qualcomm-bluetooth.txt       |  69 -----------
 .../bindings/net/qualcomm-bluetooth.yaml      | 112 ++++++++++++++++++
 2 files changed, 112 insertions(+), 69 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
 create mode 100644 Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml

diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
deleted file mode 100644
index 709ca6d51650..000000000000
--- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-Qualcomm Bluetooth Chips
----------------------
-
-This documents the binding structure and common properties for serial
-attached Qualcomm devices.
-
-Serial attached Qualcomm devices shall be a child node of the host UART
-device the slave device is attached to.
-
-Required properties:
- - compatible: should contain one of the following:
-   * "qcom,qca6174-bt"
-   * "qcom,qca9377-bt"
-   * "qcom,wcn3990-bt"
-   * "qcom,wcn3991-bt"
-   * "qcom,wcn3998-bt"
-   * "qcom,qca6390-bt"
-
-Optional properties for compatible string qcom,qca6174-bt:
-
- - enable-gpios: gpio specifier used to enable chip
- - clocks: clock provided to the controller (SUSCLK_32KHZ)
- - firmware-name: specify the name of nvm firmware to load
-
-Optional properties for compatible string qcom,qca9377-bt:
-
- - max-speed: see Documentation/devicetree/bindings/serial/serial.yaml
-
-Required properties for compatible string qcom,wcn399x-bt:
-
- - vddio-supply: VDD_IO supply regulator handle.
- - vddxo-supply: VDD_XO supply regulator handle.
- - vddrf-supply: VDD_RF supply regulator handle.
- - vddch0-supply: VDD_CH0 supply regulator handle.
-
-Optional properties for compatible string qcom,wcn399x-bt:
-
- - max-speed: see Documentation/devicetree/bindings/serial/serial.yaml
- - firmware-name: specify the name of nvm firmware to load
- - clocks: clock provided to the controller
-
-Examples:
-
-serial@7570000 {
-	label = "BT-UART";
-	status = "okay";
-
-	bluetooth {
-		compatible = "qcom,qca6174-bt";
-
-		enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
-		clocks = <&divclk4>;
-		firmware-name = "nvm_00440302.bin";
-	};
-};
-
-serial@898000 {
-	bluetooth {
-		compatible = "qcom,wcn3990-bt";
-
-		vddio-supply = <&vreg_s4a_1p8>;
-		vddxo-supply = <&vreg_l7a_1p8>;
-		vddrf-supply = <&vreg_l17a_1p3>;
-		vddch0-supply = <&vreg_l25a_3p3>;
-		max-speed = <3200000>;
-		firmware-name = "crnv21.bin";
-		clocks = <&rpmhcc RPMH_RF_CLK2>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml
new file mode 100644
index 000000000000..772689bf50c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qualcomm-bluetooth.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Bluetooth Chips
+
+maintainers:
+  - Balakrishna Godavarthi <bgodavar@codeaurora.org>
+  - Rocky Liao <rjliao@codeaurora.org>
+
+description:
+  This binding describes Qualcomm UART-attached bluetooth chips.
+
+properties:
+  compatible:
+    enum:
+      - qcom,qca6174-bt
+      - qcom,qca9377-bt
+      - qcom,wcn3990-bt
+      - qcom,wcn3991-bt
+      - qcom,wcn3998-bt
+      - qcom,qca6390-bt
+
+  enable-gpios:
+    maxItems: 1
+    description: gpio specifier used to enable chip
+
+  clocks:
+    maxItems: 1
+    description: clock provided to the controller (SUSCLK_32KHZ)
+
+  vddio-supply:
+    description: VDD_IO supply regulator handle
+
+  vddxo-supply:
+    description: VDD_XO supply regulator handle
+
+  vddrf-supply:
+    description: VDD_RF supply regulator handle
+
+  vddch0-supply:
+    description: VDD_CH0 supply regulator handle
+
+  max-speed:
+    description: see Documentation/devicetree/bindings/serial/serial.yaml
+
+  firmware-name:
+    description: specify the name of nvm firmware to load
+
+  local-bd-address:
+    description: see Documentation/devicetree/bindings/net/bluetooth.txt
+
+
+required:
+  - compatible
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,qca6174-bt
+    then:
+      required:
+        - enable-gpios
+        - clocks
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,wcn3990-bt
+              - qcom,wcn3991-bt
+              - qcom,wcn3998-bt
+    then:
+      required:
+        - vddio-supply
+        - vddxo-supply
+        - vddrf-supply
+        - vddch0-supply
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    serial {
+
+        bluetooth {
+            compatible = "qcom,qca6174-bt";
+            enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
+            clocks = <&divclk4>;
+            firmware-name = "nvm_00440302.bin";
+        };
+    };
+  - |
+    serial {
+
+        bluetooth {
+            compatible = "qcom,wcn3990-bt";
+            vddio-supply = <&vreg_s4a_1p8>;
+            vddxo-supply = <&vreg_l7a_1p8>;
+            vddrf-supply = <&vreg_l17a_1p3>;
+            vddch0-supply = <&vreg_l25a_3p3>;
+            max-speed = <3200000>;
+            firmware-name = "crnv21.bin";
+        };
+    };

From 7a4cb1635a4b879f8d118ec7c6586aef913819f3 Mon Sep 17 00:00:00 2001
From: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Date: Tue, 18 May 2021 22:04:46 +0530
Subject: [PATCH 2917/3292] dt-bindings: net: bluetooth: Add device tree
 bindings for QTI chip wcn6750

This patch enables regulators and gpios for the Qualcomm Bluetooth wcn6750
controller.

Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../bindings/net/qualcomm-bluetooth.yaml      | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml
index 772689bf50c1..f93c6e7a1b59 100644
--- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml
@@ -22,11 +22,17 @@ properties:
       - qcom,wcn3991-bt
       - qcom,wcn3998-bt
       - qcom,qca6390-bt
+      - qcom,wcn6750-bt
 
   enable-gpios:
     maxItems: 1
     description: gpio specifier used to enable chip
 
+  swctrl-gpios:
+    maxItems: 1
+    description: gpio specifier is used to find status
+                 of clock supply to SoC
+
   clocks:
     maxItems: 1
     description: clock provided to the controller (SUSCLK_32KHZ)
@@ -43,6 +49,30 @@ properties:
   vddch0-supply:
     description: VDD_CH0 supply regulator handle
 
+  vddaon-supply:
+    description: VDD_AON supply regulator handle
+
+  vddbtcxmx-supply:
+    description: VDD_BT_CXMX supply regulator handle
+
+  vddrfacmn-supply:
+    description: VDD_RFA_CMN supply regulator handle
+
+  vddrfa0p8-supply:
+    description: VDD_RFA_0P8 suppply regulator handle
+
+  vddrfa1p7-supply:
+    description: VDD_RFA_1P7 supply regulator handle
+
+  vddrfa1p2-supply:
+    description: VDD_RFA_1P2 supply regulator handle
+
+  vddrfa2p2-supply:
+    description: VDD_RFA_2P2 supply regulator handle
+
+  vddasd-supply:
+    description: VDD_ASD supply regulator handle
+
   max-speed:
     description: see Documentation/devicetree/bindings/serial/serial.yaml
 
@@ -85,6 +115,25 @@ allOf:
         - vddrf-supply
         - vddch0-supply
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,wcn6750-bt
+    then:
+      required:
+        - enable-gpios
+        - swctrl-gpios
+        - vddio-supply
+        - vddaon-supply
+        - vddbtcxmx-supply
+        - vddrfacmn-supply
+        - vddrfa0p8-supply
+        - vddrfa1p7-supply
+        - vddrfa1p2-supply
+        - vddasd-supply
+
 examples:
   - |
     #include <dt-bindings/gpio/gpio.h>
@@ -110,3 +159,25 @@ examples:
             firmware-name = "crnv21.bin";
         };
     };
+  - |
+    serial {
+
+        bluetooth {
+            compatible = "qcom,wcn6750-bt";
+            pinctrl-names = "default";
+            pinctrl-0 = <&bt_en_default>;
+            enable-gpios = <&tlmm 85 GPIO_ACTIVE_HIGH>;
+            swctrl-gpios = <&tlmm 86 GPIO_ACTIVE_HIGH>;
+            vddio-supply = <&vreg_l19b_1p8>;
+            vddaon-supply = <&vreg_s7b_0p9>;
+            vddbtcxmx-supply = <&vreg_s7b_0p9>;
+            vddrfacmn-supply = <&vreg_s7b_0p9>;
+            vddrfa0p8-supply = <&vreg_s7b_0p9>;
+            vddrfa1p7-supply = <&vreg_s1b_1p8>;
+            vddrfa1p2-supply = <&vreg_s8b_1p2>;
+            vddrfa2p2-supply = <&vreg_s1c_2p2>;
+            vddasd-supply = <&vreg_l11c_2p8>;
+            max-speed = <3200000>;
+            firmware-name = "msnv11.bin";
+        };
+    };

From 0324d19cb99804d99e42c990b8b1e191575a091b Mon Sep 17 00:00:00 2001
From: Daniel Lenski <dlenski@gmail.com>
Date: Tue, 18 May 2021 10:25:46 -0700
Subject: [PATCH 2918/3292] Bluetooth: btusb: Add a new QCA_ROME device
 (0cf3:e500)

This patch adds the 0cf3:e500 Bluetooth device (from a QCA9377 board) as a
QCA_ROME device.  It appears to be functionally identical to another device
ID, also from a QCA9377 board, which was previously marked as QCA_ROME in
0a03f98b98c201191e3ba15a0e33f46d8660e1fd
("Bluetooth: Add a new 04ca:3015 QCA_ROME device").

Without this patch, the WiFi side of the QCA9377 board is slow or unusable
when the Bluetooth side is in use.

See https://askubuntu.com/a/1137852 for another report of QCA_ROME fixing
this issue for this device ID.

/sys/kernel/debug/usb/devices:

T:  Bus=05 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12   MxCh= 0
D:  Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0cf3 ProdID=e500 Rev= 0.01
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Signed-off-by: Daniel Lenski <dlenski@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index dd48543b65ce..ff41fb0fb557 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -270,6 +270,8 @@ static const struct usb_device_id blacklist_table[] = {
 						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME |
 						     BTUSB_WIDEBAND_SPEECH },
+	{ USB_DEVICE(0x0cf3, 0xe500), .driver_info = BTUSB_QCA_ROME |
+						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME |
 						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x0489, 0xe09f), .driver_info = BTUSB_QCA_ROME |

From 1c58e933aba23f68c0d3f192f7cc6eed8fabd694 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@codecoup.pl>
Date: Tue, 18 May 2021 16:54:36 +0200
Subject: [PATCH 2919/3292] Bluetooth: Remove spurious error message

Even with rate limited reporting this is very spammy and since
it is remote device that is providing bogus data there is no
need to report this as error.

Since real_len variable was used only to allow conditional error
message it is now also removed.

[72454.143336] bt_err_ratelimited: 10 callbacks suppressed
[72454.143337] Bluetooth: hci0: advertising data len corrected
[72454.296314] Bluetooth: hci0: advertising data len corrected
[72454.892329] Bluetooth: hci0: advertising data len corrected
[72455.051319] Bluetooth: hci0: advertising data len corrected
[72455.357326] Bluetooth: hci0: advertising data len corrected
[72455.663295] Bluetooth: hci0: advertising data len corrected
[72455.787278] Bluetooth: hci0: advertising data len corrected
[72455.942278] Bluetooth: hci0: advertising data len corrected
[72456.094276] Bluetooth: hci0: advertising data len corrected
[72456.249137] Bluetooth: hci0: advertising data len corrected
[72459.416333] bt_err_ratelimited: 13 callbacks suppressed
[72459.416334] Bluetooth: hci0: advertising data len corrected
[72459.721334] Bluetooth: hci0: advertising data len corrected
[72460.011317] Bluetooth: hci0: advertising data len corrected
[72460.327171] Bluetooth: hci0: advertising data len corrected
[72460.638294] Bluetooth: hci0: advertising data len corrected
[72460.946350] Bluetooth: hci0: advertising data len corrected
[72461.225320] Bluetooth: hci0: advertising data len corrected
[72461.690322] Bluetooth: hci0: advertising data len corrected
[72462.118318] Bluetooth: hci0: advertising data len corrected
[72462.427319] Bluetooth: hci0: advertising data len corrected
[72464.546319] bt_err_ratelimited: 7 callbacks suppressed
[72464.546319] Bluetooth: hci0: advertising data len corrected
[72464.857318] Bluetooth: hci0: advertising data len corrected
[72465.163332] Bluetooth: hci0: advertising data len corrected
[72465.278331] Bluetooth: hci0: advertising data len corrected
[72465.432323] Bluetooth: hci0: advertising data len corrected
[72465.891334] Bluetooth: hci0: advertising data len corrected
[72466.045334] Bluetooth: hci0: advertising data len corrected
[72466.197321] Bluetooth: hci0: advertising data len corrected
[72466.340318] Bluetooth: hci0: advertising data len corrected
[72466.498335] Bluetooth: hci0: advertising data len corrected
[72469.803299] bt_err_ratelimited: 10 callbacks suppressed

Signed-off-by: Szymon Janc <szymon.janc@codecoup.pl>
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=203753
Cc: stable@vger.kernel.org
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d73950441938..04b2509b7cb5 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5450,7 +5450,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 	struct hci_conn *conn;
 	bool match;
 	u32 flags;
-	u8 *ptr, real_len;
+	u8 *ptr;
 
 	switch (type) {
 	case LE_ADV_IND:
@@ -5481,14 +5481,10 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 			break;
 	}
 
-	real_len = ptr - data;
-
-	/* Adjust for actual length */
-	if (len != real_len) {
-		bt_dev_err_ratelimited(hdev, "advertising data len corrected %u -> %u",
-				       len, real_len);
-		len = real_len;
-	}
+	/* Adjust for actual length. This handles the case when remote
+	 * device is advertising with incorrect data length.
+	 */
+	len = ptr - data;
 
 	/* If the direct address is present, then this report is from
 	 * a LE Direct Advertising Report event. In that case it is

From 1fa20d7d4aad02206e84b74915819fbe9f81dab3 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 19 May 2021 13:41:50 -0700
Subject: [PATCH 2920/3292] Bluetooth: L2CAP: Fix invalid access if ECRED
 Reconfigure fails

The use of l2cap_chan_del is not safe under a loop using
list_for_each_entry.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7d975cf98c20..f3b70fa348ab 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6248,7 +6248,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn,
 					 struct l2cap_cmd_hdr *cmd, u16 cmd_len,
 					 u8 *data)
 {
-	struct l2cap_chan *chan;
+	struct l2cap_chan *chan, *tmp;
 	struct l2cap_ecred_conn_rsp *rsp = (void *) data;
 	u16 result;
 
@@ -6262,7 +6262,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn,
 	if (!result)
 		return 0;
 
-	list_for_each_entry(chan, &conn->chan_l, list) {
+	list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) {
 		if (chan->ident != cmd->ident)
 			continue;
 

From de895b43932cb47e69480540be7eca289af24f23 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 19 May 2021 13:41:51 -0700
Subject: [PATCH 2921/3292] Bluetooth: L2CAP: Fix invalid access on ECRED
 Connection response

The use of l2cap_chan_del is not safe under a loop using
list_for_each_entry.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index f3b70fa348ab..9ebb85df4db4 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6066,7 +6066,7 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn,
 	struct l2cap_ecred_conn_rsp *rsp = (void *) data;
 	struct hci_conn *hcon = conn->hcon;
 	u16 mtu, mps, credits, result;
-	struct l2cap_chan *chan;
+	struct l2cap_chan *chan, *tmp;
 	int err = 0, sec_level;
 	int i = 0;
 
@@ -6085,7 +6085,7 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn,
 
 	cmd_len -= sizeof(*rsp);
 
-	list_for_each_entry(chan, &conn->chan_l, list) {
+	list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) {
 		u16 dcid;
 
 		if (chan->ident != cmd->ident ||

From e848dbd364aca44c9d23c04bef964fab79e2b34f Mon Sep 17 00:00:00 2001
From: Hilda Wu <hildawu@realtek.com>
Date: Fri, 14 May 2021 11:19:01 +0800
Subject: [PATCH 2922/3292] Bluetooth: btusb: Add support USB ALT 3 for WBS

Because mSBC frames do not need to be aligned to the SCO packet
boundary. Using USB ALT 3 let HCI payload >= 60 bytes, let mSBC
data satisfy 60 Bytes avoid payload unaligned situation and fixed
some headset no voise issue.

USB Alt 3 supported also need HFP support transparent MTU in 72 Bytes.

Signed-off-by: Hilda Wu <hildawu@realtek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index ff41fb0fb557..f7131059469e 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -1756,6 +1756,13 @@ static void btusb_work(struct work_struct *work)
 			 * which work with WBS at all.
 			 */
 			new_alts = btusb_find_altsetting(data, 6) ? 6 : 1;
+			/* Because mSBC frames do not need to be aligned to the
+			 * SCO packet boundary. If support the Alt 3, use the
+			 * Alt 3 for HCI payload >= 60 Bytes let air packet
+			 * data satisfy 60 bytes.
+			 */
+			if (new_alts == 1 && btusb_find_altsetting(data, 3))
+				new_alts = 3;
 		}
 
 		if (btusb_switch_alt_setting(hdev, new_alts) < 0)

From c615943ef0525fdaea631ca42ded446e11389062 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 20 May 2021 21:32:35 +0800
Subject: [PATCH 2923/3292] Bluetooth: RFCOMM: Use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/tty.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index a58584949a95..8cb53e10a985 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -198,20 +198,22 @@ static void rfcomm_reparent_device(struct rfcomm_dev *dev)
 	hci_dev_put(hdev);
 }
 
-static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf)
+static ssize_t address_show(struct device *tty_dev,
+			    struct device_attribute *attr, char *buf)
 {
 	struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
 	return sprintf(buf, "%pMR\n", &dev->dst);
 }
 
-static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf)
+static ssize_t channel_show(struct device *tty_dev,
+			    struct device_attribute *attr, char *buf)
 {
 	struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
 	return sprintf(buf, "%d\n", dev->channel);
 }
 
-static DEVICE_ATTR(address, 0444, show_address, NULL);
-static DEVICE_ATTR(channel, 0444, show_channel, NULL);
+static DEVICE_ATTR_RO(address);
+static DEVICE_ATTR_RO(channel);
 
 static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
 					   struct rfcomm_dlc *dlc)

From 79699a7056ff784524d1baa387f30ddf98e14a1c Mon Sep 17 00:00:00 2001
From: Sathish Narasimman <sathish.narasimman@intel.com>
Date: Thu, 20 May 2021 17:12:01 +0530
Subject: [PATCH 2924/3292] Bluetooth: Translate additional address type during
 le_conn_comp

When using controller based address resolution, then the destination
address type during le_conn_complete uses 0x02 & 0x03 if controller
resolves the destination address(RPA).
These address types need to be converted back into either 0x00 0r 0x01

Signed-off-by: Sathish Narasimman <sathish.narasimman@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 04b2509b7cb5..7c9482449228 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5196,6 +5196,23 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 		conn->dst_type = irk->addr_type;
 	}
 
+	/* When using controller based address resolution, then the new
+	 * address types 0x02 and 0x03 are used. These types need to be
+	 * converted back into either public address or random address type
+	 */
+	if (use_ll_privacy(hdev) &&
+	    hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
+	    hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) {
+		switch (conn->dst_type) {
+		case ADDR_LE_DEV_PUBLIC_RESOLVED:
+			conn->dst_type = ADDR_LE_DEV_PUBLIC;
+			break;
+		case ADDR_LE_DEV_RANDOM_RESOLVED:
+			conn->dst_type = ADDR_LE_DEV_RANDOM;
+			break;
+		}
+	}
+
 	if (status) {
 		hci_le_conn_failed(conn, status);
 		goto unlock;

From c32d624640fd2254ec40e76e4a176e75de77ee09 Mon Sep 17 00:00:00 2001
From: Yun-Hao Chung <howardchung@chromium.org>
Date: Thu, 20 May 2021 13:12:09 +0800
Subject: [PATCH 2925/3292] Bluetooth: disable filter dup when scan for adv
 monitor

Disable duplicates filter when scanning for advertisement monitor for
the following reasons. The scanning includes active scan and passive
scan.

For HW pattern filtering (ex. MSFT), Realtek and Qualcomm controllers
ignore RSSI_Sampling_Period when the duplicates filter is enabled.

For SW pattern filtering, when we're not doing interleaved scanning, it
is necessary to disable duplicates filter, otherwise hosts can only
receive one advertisement and it's impossible to know if a peer is still
in range.

Signed-off-by: Yun-Hao Chung <howardchung@chromium.org>
Reviewed-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Manish Mandlik <mmandlik@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 46 +++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index fa9125b782f8..3465862429fb 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -932,7 +932,7 @@ static bool scan_use_rpa(struct hci_dev *hdev)
 
 static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
 			       u16 window, u8 own_addr_type, u8 filter_policy,
-			       bool addr_resolv)
+			       bool filter_dup, bool addr_resolv)
 {
 	struct hci_dev *hdev = req->hdev;
 
@@ -997,7 +997,7 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
 
 		memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
 		ext_enable_cp.enable = LE_SCAN_ENABLE;
-		ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+		ext_enable_cp.filter_dup = filter_dup;
 
 		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
 			    sizeof(ext_enable_cp), &ext_enable_cp);
@@ -1016,7 +1016,7 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
 
 		memset(&enable_cp, 0, sizeof(enable_cp));
 		enable_cp.enable = LE_SCAN_ENABLE;
-		enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+		enable_cp.filter_dup = filter_dup;
 		hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
 			    &enable_cp);
 	}
@@ -1053,6 +1053,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 	u8 own_addr_type;
 	u8 filter_policy;
 	u16 window, interval;
+	/* Default is to enable duplicates filter */
+	u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
 	/* Background scanning should run with address resolution */
 	bool addr_resolv = true;
 
@@ -1106,6 +1108,20 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 	} else if (hci_is_adv_monitoring(hdev)) {
 		window = hdev->le_scan_window_adv_monitor;
 		interval = hdev->le_scan_int_adv_monitor;
+
+		/* Disable duplicates filter when scanning for advertisement
+		 * monitor for the following reasons.
+		 *
+		 * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm
+		 * controllers ignore RSSI_Sampling_Period when the duplicates
+		 * filter is enabled.
+		 *
+		 * For SW pattern filtering, when we're not doing interleaved
+		 * scanning, it is necessary to disable duplicates filter,
+		 * otherwise hosts can only receive one advertisement and it's
+		 * impossible to know if a peer is still in range.
+		 */
+		filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
 	} else {
 		window = hdev->le_scan_window;
 		interval = hdev->le_scan_interval;
@@ -1113,7 +1129,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 
 	bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy);
 	hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window,
-			   own_addr_type, filter_policy, addr_resolv);
+			   own_addr_type, filter_policy, filter_dup,
+			   addr_resolv);
 }
 
 static bool adv_instance_is_scannable(struct hci_dev *hdev, u8 instance)
@@ -3135,6 +3152,8 @@ static int active_scan(struct hci_request *req, unsigned long opt)
 	u8 own_addr_type;
 	/* White list is not used for discovery */
 	u8 filter_policy = 0x00;
+	/* Default is to enable duplicates filter */
+	u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
 	/* Discovery doesn't require controller address resolution */
 	bool addr_resolv = false;
 	int err;
@@ -3159,9 +3178,26 @@ static int active_scan(struct hci_request *req, unsigned long opt)
 	if (err < 0)
 		own_addr_type = ADDR_LE_DEV_PUBLIC;
 
+	if (hci_is_adv_monitoring(hdev)) {
+		/* Duplicate filter should be disabled when some advertisement
+		 * monitor is activated, otherwise AdvMon can only receive one
+		 * advertisement for one peer(*) during active scanning, and
+		 * might report loss to these peers.
+		 *
+		 * Note that different controllers have different meanings of
+		 * |duplicate|. Some of them consider packets with the same
+		 * address as duplicate, and others consider packets with the
+		 * same address and the same RSSI as duplicate. Although in the
+		 * latter case we don't need to disable duplicate filter, but
+		 * it is common to have active scanning for a short period of
+		 * time, the power impact should be neglectable.
+		 */
+		filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
+	}
+
 	hci_req_start_scan(req, LE_SCAN_ACTIVE, interval,
 			   hdev->le_scan_window_discovery, own_addr_type,
-			   filter_policy, addr_resolv);
+			   filter_policy, filter_dup, addr_resolv);
 	return 0;
 }
 

From 02ce2c2c24024aade65a8d91d6a596651eaf2d0a Mon Sep 17 00:00:00 2001
From: Tedd Ho-Jeong An <tedd.an@intel.com>
Date: Wed, 26 May 2021 10:36:22 -0700
Subject: [PATCH 2926/3292] Bluetooth: mgmt: Fix the command returns garbage
 parameter value

When the Get Device Flags command fails, it returns the error status
with the parameters filled with the garbage values. Although the
parameters are not used, it is better to fill with zero than the random
values.

Signed-off-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index a81cf1b8b2e8..5616e8afb22e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4061,6 +4061,8 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	hci_dev_lock(hdev);
 
+	memset(&rp, 0, sizeof(rp));
+
 	if (cp->addr.type == BDADDR_BREDR) {
 		br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
 							      &cp->addr.bdaddr,

From 3011faa29bc6f45d1388b8588cb9c5a5154927e7 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Thu, 27 May 2021 15:19:54 +0800
Subject: [PATCH 2927/3292] Bluetooth: hci_h5: Add RTL8822CS capabilities

RTL8822 chipset supports WBS, and this information is conveyed in
btusb.c. However, the UART driver doesn't have this information just
yet.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btrtl.c  | 26 ++++++++++++++++----------
 drivers/bluetooth/btrtl.h  |  7 +++++++
 drivers/bluetooth/hci_h5.c |  5 +----
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index e7fe5fb22753..988a09860c6b 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -719,17 +719,8 @@ int btrtl_download_firmware(struct hci_dev *hdev,
 }
 EXPORT_SYMBOL_GPL(btrtl_download_firmware);
 
-int btrtl_setup_realtek(struct hci_dev *hdev)
+void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev)
 {
-	struct btrtl_device_info *btrtl_dev;
-	int ret;
-
-	btrtl_dev = btrtl_initialize(hdev, NULL);
-	if (IS_ERR(btrtl_dev))
-		return PTR_ERR(btrtl_dev);
-
-	ret = btrtl_download_firmware(hdev, btrtl_dev);
-
 	/* Enable controller to do both LE scan and BR/EDR inquiry
 	 * simultaneously.
 	 */
@@ -750,6 +741,21 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 		rtl_dev_dbg(hdev, "WBS supported not enabled.");
 		break;
 	}
+}
+EXPORT_SYMBOL_GPL(btrtl_set_quirks);
+
+int btrtl_setup_realtek(struct hci_dev *hdev)
+{
+	struct btrtl_device_info *btrtl_dev;
+	int ret;
+
+	btrtl_dev = btrtl_initialize(hdev, NULL);
+	if (IS_ERR(btrtl_dev))
+		return PTR_ERR(btrtl_dev);
+
+	ret = btrtl_download_firmware(hdev, btrtl_dev);
+
+	btrtl_set_quirks(hdev, btrtl_dev);
 
 	btrtl_free(btrtl_dev);
 	return ret;
diff --git a/drivers/bluetooth/btrtl.h b/drivers/bluetooth/btrtl.h
index 2a582682136d..2c441bda390a 100644
--- a/drivers/bluetooth/btrtl.h
+++ b/drivers/bluetooth/btrtl.h
@@ -54,6 +54,8 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev,
 void btrtl_free(struct btrtl_device_info *btrtl_dev);
 int btrtl_download_firmware(struct hci_dev *hdev,
 			    struct btrtl_device_info *btrtl_dev);
+void btrtl_set_quirks(struct hci_dev *hdev,
+		      struct btrtl_device_info *btrtl_dev);
 int btrtl_setup_realtek(struct hci_dev *hdev);
 int btrtl_shutdown_realtek(struct hci_dev *hdev);
 int btrtl_get_uart_settings(struct hci_dev *hdev,
@@ -79,6 +81,11 @@ static inline int btrtl_download_firmware(struct hci_dev *hdev,
 	return -EOPNOTSUPP;
 }
 
+static inline void btrtl_set_quirks(struct hci_dev *hdev,
+				    struct btrtl_device_info *btrtl_dev)
+{
+}
+
 static inline int btrtl_setup_realtek(struct hci_dev *hdev)
 {
 	return -EOPNOTSUPP;
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index 27e96681d583..e0520639f4ba 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -906,10 +906,7 @@ static int h5_btrtl_setup(struct h5 *h5)
 	/* Give the device some time before the hci-core sends it a reset */
 	usleep_range(10000, 20000);
 
-	/* Enable controller to do both LE scan and BR/EDR inquiry
-	 * simultaneously.
-	 */
-	set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &h5->hu->hdev->quirks);
+	btrtl_set_quirks(h5->hu->hdev, btrtl_dev);
 
 out_free:
 	btrtl_free(btrtl_dev);

From 33404381c5e875cbd57eec6d9bbacd3b13b404c9 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Date: Fri, 28 May 2021 17:26:45 +0200
Subject: [PATCH 2928/3292] Bluetooth: btusb: Add 0x0b05:0x190e Realtek 8761BU
 (ASUS BT500) device.

T:  Bus=01 Lev=01 Prnt=01 Port=08 Cnt=04 Dev#= 18 Spd=12   MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0b05 ProdID=190e Rev= 2.00
S:  Manufacturer=Realtek
S:  Product=ASUS USB-BT500
S:  SerialNumber=xxxxxxxx
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index f7131059469e..b5813aa209ed 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -434,6 +434,10 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0bda, 0xb009), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8761BU Bluetooth devices */
+	{ USB_DEVICE(0x0b05, 0x190e), .driver_info = BTUSB_REALTEK |
+	  					     BTUSB_WIDEBAND_SPEECH },
+
 	/* Additional Realtek 8821AE Bluetooth devices */
 	{ USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK },

From 9fd2e2949b43dea869f7fce0f8f51df44f635d59 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Date: Fri, 28 May 2021 17:26:44 +0200
Subject: [PATCH 2929/3292] Bluetooth: btrtl: rename USB fw for RTL8761

According Realteks own BT drivers firmware RTL8761B is for UART
and RTL8761BU is for USB.

Change existing 8761B to UART and add an 8761BU entry for USB

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btrtl.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 988a09860c6b..cce0125ec4fd 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -132,12 +132,19 @@ static const struct id_table ic_id_table[] = {
 	  .cfg_name = "rtl_bt/rtl8761a_config" },
 
 	/* 8761B */
-	{ IC_INFO(RTL_ROM_LMP_8761A, 0xb, 0xa, HCI_USB),
+	{ IC_INFO(RTL_ROM_LMP_8761A, 0xb, 0xa, HCI_UART),
 	  .config_needed = false,
 	  .has_rom_version = true,
 	  .fw_name  = "rtl_bt/rtl8761b_fw.bin",
 	  .cfg_name = "rtl_bt/rtl8761b_config" },
 
+	/* 8761BU */
+	{ IC_INFO(RTL_ROM_LMP_8761A, 0xb, 0xa, HCI_USB),
+	  .config_needed = false,
+	  .has_rom_version = true,
+	  .fw_name  = "rtl_bt/rtl8761bu_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8761bu_config" },
+
 	/* 8822C with UART interface */
 	{ IC_INFO(RTL_ROM_LMP_8822B, 0xc, 0xa, HCI_UART),
 	  .config_needed = true,

From 799acb9347915bfe4eac0ff2345b468f0a1ca207 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 28 May 2021 11:45:02 -0700
Subject: [PATCH 2930/3292] Bluetooth: mgmt: Fix slab-out-of-bounds in
 tlv_data_is_valid

This fixes parsing of LTV entries when the length is 0.

Found with:

tools/mgmt-tester -s "Add Advertising - Success (ScRsp only)"

Add Advertising - Success (ScRsp only) - run
  Sending Add Advertising (0x003e)
  Test condition added, total 1
[   11.004577] ==================================================================
[   11.005292] BUG: KASAN: slab-out-of-bounds in tlv_data_is_valid+0x87/0xe0
[   11.005984] Read of size 1 at addr ffff888002c695b0 by task mgmt-tester/87
[   11.006711]
[   11.007176]
[   11.007429] Allocated by task 87:
[   11.008151]
[   11.008438] The buggy address belongs to the object at ffff888002c69580
[   11.008438]  which belongs to the cache kmalloc-64 of size 64
[   11.010526] The buggy address is located 48 bytes inside of
[   11.010526]  64-byte region [ffff888002c69580, ffff888002c695c0)
[   11.012423] The buggy address belongs to the page:
[   11.013291]
[   11.013544] Memory state around the buggy address:
[   11.014359]  ffff888002c69480: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
[   11.015453]  ffff888002c69500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
[   11.016232] >ffff888002c69580: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc
[   11.017010]                                      ^
[   11.017547]  ffff888002c69600: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc
[   11.018296]  ffff888002c69680: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
[   11.019116] ==================================================================

Fixes: 2bb36870e8cb2 ("Bluetooth: Unify advertising instance flags check")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 5616e8afb22e..896f0c482912 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7590,6 +7590,9 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
 	for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) {
 		cur_len = data[i];
 
+		if (!cur_len)
+			continue;
+
 		if (data[i + 1] == EIR_FLAGS &&
 		    (!is_adv_data || flags_managed(adv_flags)))
 			return false;

From 76c185a51505262fe19b5a2cd5dd70199d21949b Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Mon, 31 May 2021 16:37:21 +0800
Subject: [PATCH 2931/3292] Bluetooth: use inclusive language in hci_core.h

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced:
master -> central
slave  -> peripheral

These attributes are not used elsewhere in the code.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 44 ++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index c4b0650fb9ae..4eea590cd432 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1839,23 +1839,23 @@ struct hci_rp_le_read_iso_tx_sync {
 #define HCI_OP_LE_SET_CIG_PARAMS		0x2062
 struct hci_cis_params {
 	__u8    cis_id;
-	__le16  m_sdu;
-	__le16  s_sdu;
-	__u8    m_phy;
-	__u8    s_phy;
-	__u8    m_rtn;
-	__u8    s_rtn;
+	__le16  c_sdu;
+	__le16  p_pdu;
+	__u8    c_phy;
+	__u8    p_phy;
+	__u8    c_rtn;
+	__u8    p_rtn;
 } __packed;
 
 struct hci_cp_le_set_cig_params {
 	__u8    cig_id;
-	__u8    m_interval[3];
-	__u8    s_interval[3];
-	__u8    sca;
+	__u8    c_interval[3];
+	__u8    p_interval[3];
+	__u8    wc_sca;
 	__u8    packing;
 	__u8    framing;
-	__le16  m_latency;
-	__le16  s_latency;
+	__le16  c_latency;
+	__le16  p_latency;
 	__u8    num_cis;
 	struct hci_cis_params cis[];
 } __packed;
@@ -2260,7 +2260,7 @@ struct hci_ev_sync_train_complete {
 	__u8	status;
 } __packed;
 
-#define HCI_EV_SLAVE_PAGE_RESP_TIMEOUT	0x54
+#define HCI_EV_PERIPHERAL_PAGE_RESP_TIMEOUT	0x54
 
 #define HCI_EV_LE_CONN_COMPLETE		0x01
 struct hci_ev_le_conn_complete {
@@ -2418,17 +2418,17 @@ struct hci_evt_le_cis_established {
 	__le16 handle;
 	__u8  cig_sync_delay[3];
 	__u8  cis_sync_delay[3];
-	__u8  m_latency[3];
-	__u8  s_latency[3];
-	__u8  m_phy;
-	__u8  s_phy;
+	__u8  c_latency[3];
+	__u8  p_latency[3];
+	__u8  c_phy;
+	__u8  p_phy;
 	__u8  nse;
-	__u8  m_bn;
-	__u8  s_bn;
-	__u8  m_ft;
-	__u8  s_ft;
-	__le16 m_mtu;
-	__le16 s_mtu;
+	__u8  c_bn;
+	__u8  p_bn;
+	__u8  c_ft;
+	__u8  p_ft;
+	__le16 c_mtu;
+	__le16 p_mtu;
 	__le16 interval;
 } __packed;
 

From 6397729bb74df3918187c5e96fb0f63c5f5292d9 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Mon, 31 May 2021 16:37:22 +0800
Subject: [PATCH 2932/3292] Bluetooth: use inclusive language to describe CPB

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced when describing the
connectionless peripheral broadcast feature:
master -> central
slave  -> peripheral

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 26 +++++++++++++-------------
 include/net/bluetooth/hci_core.h |  4 ++--
 net/bluetooth/hci_conn.c         |  2 +-
 net/bluetooth/hci_core.c         | 16 ++++++++--------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4eea590cd432..ece96ccc42de 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -36,7 +36,7 @@
 
 #define HCI_MAX_AMP_ASSOC_SIZE	672
 
-#define HCI_MAX_CSB_DATA_SIZE	252
+#define HCI_MAX_CPB_DATA_SIZE	252
 
 /* HCI dev events */
 #define HCI_DEV_REG			1
@@ -472,10 +472,10 @@ enum {
 #define LMP_EXTFEATURES	0x80
 
 /* Extended LMP features */
-#define LMP_CSB_MASTER	0x01
-#define LMP_CSB_SLAVE	0x02
-#define LMP_SYNC_TRAIN	0x04
-#define LMP_SYNC_SCAN	0x08
+#define LMP_CPB_CENTRAL		0x01
+#define LMP_CPB_PERIPHERAL	0x02
+#define LMP_SYNC_TRAIN		0x04
+#define LMP_SYNC_SCAN		0x08
 
 #define LMP_SC		0x01
 #define LMP_PING	0x02
@@ -877,17 +877,17 @@ struct hci_rp_logical_link_cancel {
 	__u8     flow_spec_id;
 } __packed;
 
-#define HCI_OP_SET_CSB			0x0441
-struct hci_cp_set_csb {
+#define HCI_OP_SET_CPB			0x0441
+struct hci_cp_set_cpb {
 	__u8	enable;
 	__u8	lt_addr;
 	__u8	lpo_allowed;
 	__le16	packet_type;
 	__le16	interval_min;
 	__le16	interval_max;
-	__le16	csb_sv_tout;
+	__le16	cpb_sv_tout;
 } __packed;
-struct hci_rp_set_csb {
+struct hci_rp_set_cpb {
 	__u8	status;
 	__u8	lt_addr;
 	__le16	interval;
@@ -1184,14 +1184,14 @@ struct hci_rp_delete_reserved_lt_addr {
 	__u8	lt_addr;
 } __packed;
 
-#define HCI_OP_SET_CSB_DATA		0x0c76
-struct hci_cp_set_csb_data {
+#define HCI_OP_SET_CPB_DATA		0x0c76
+struct hci_cp_set_cpb_data {
 	__u8	lt_addr;
 	__u8	fragment;
 	__u8	data_length;
-	__u8	data[HCI_MAX_CSB_DATA_SIZE];
+	__u8	data[HCI_MAX_CPB_DATA_SIZE];
 } __packed;
-struct hci_rp_set_csb_data {
+struct hci_rp_set_cpb_data {
 	__u8	status;
 	__u8	lt_addr;
 } __packed;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 43b08bebae74..c9ec06997e1c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1394,8 +1394,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT)
 
 /* ----- Extended LMP capabilities ----- */
-#define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER)
-#define lmp_csb_slave_capable(dev)  ((dev)->features[2][0] & LMP_CSB_SLAVE)
+#define lmp_cpb_central_capable(dev) ((dev)->features[2][0] & LMP_CPB_CENTRAL)
+#define lmp_cpb_peripheral_capable(dev) ((dev)->features[2][0] & LMP_CPB_PERIPHERAL)
 #define lmp_sync_train_capable(dev) ((dev)->features[2][0] & LMP_SYNC_TRAIN)
 #define lmp_sync_scan_capable(dev)  ((dev)->features[2][0] & LMP_SYNC_SCAN)
 #define lmp_sc_capable(dev)         ((dev)->features[2][1] & LMP_SC)
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 0ceb72d32208..ea0f9cdaa6b1 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1842,7 +1842,7 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
 
 	/* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471:
 	 * Table 6.2: Packets defined for synchronous, asynchronous, and
-	 * CSB logical transport types.
+	 * CPB logical transport types.
 	 */
 	switch (conn->type) {
 	case SCO_LINK:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index cdf147899c50..5735171e2e23 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -545,24 +545,24 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
 	u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
 	bool changed = false;
 
-	/* If Connectionless Slave Broadcast master role is supported
+	/* If Connectionless Peripheral Broadcast central role is supported
 	 * enable all necessary events for it.
 	 */
-	if (lmp_csb_master_capable(hdev)) {
+	if (lmp_cpb_central_capable(hdev)) {
 		events[1] |= 0x40;	/* Triggered Clock Capture */
 		events[1] |= 0x80;	/* Synchronization Train Complete */
-		events[2] |= 0x10;	/* Slave Page Response Timeout */
-		events[2] |= 0x20;	/* CSB Channel Map Change */
+		events[2] |= 0x10;	/* Peripheral Page Response Timeout */
+		events[2] |= 0x20;	/* CPB Channel Map Change */
 		changed = true;
 	}
 
-	/* If Connectionless Slave Broadcast slave role is supported
+	/* If Connectionless Peripheral Broadcast peripheral role is supported
 	 * enable all necessary events for it.
 	 */
-	if (lmp_csb_slave_capable(hdev)) {
+	if (lmp_cpb_peripheral_capable(hdev)) {
 		events[2] |= 0x01;	/* Synchronization Train Received */
-		events[2] |= 0x02;	/* CSB Receive */
-		events[2] |= 0x04;	/* CSB Timeout */
+		events[2] |= 0x02;	/* CPB Receive */
+		events[2] |= 0x04;	/* CPB Timeout */
 		events[2] |= 0x08;	/* Truncated Page Complete */
 		changed = true;
 	}

From ef365da1803de7891589c75304c8c36bb7cf4b98 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Mon, 31 May 2021 16:37:23 +0800
Subject: [PATCH 2933/3292] Bluetooth: use inclusive language in HCI LE
 features

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced:
master -> central
slave  -> peripheral

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  6 +++---
 net/bluetooth/hci_event.c   | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ece96ccc42de..3abd6273a189 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -489,7 +489,7 @@ enum {
 /* LE features */
 #define HCI_LE_ENCRYPTION		0x01
 #define HCI_LE_CONN_PARAM_REQ_PROC	0x02
-#define HCI_LE_SLAVE_FEATURES		0x08
+#define HCI_LE_PERIPHERAL_FEATURES	0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
 #define HCI_LE_LL_PRIVACY		0x40
@@ -498,8 +498,8 @@ enum {
 #define HCI_LE_PHY_CODED		0x08
 #define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_CHAN_SEL_ALG2		0x40
-#define HCI_LE_CIS_MASTER		0x10
-#define HCI_LE_CIS_SLAVE		0x20
+#define HCI_LE_CIS_CENTRAL		0x10
+#define HCI_LE_CIS_PERIPHERAL		0x20
 
 /* Connection modes */
 #define HCI_CM_ACTIVE	0x0000
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 7c9482449228..e289187075b9 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5243,17 +5243,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 	hci_debugfs_create_conn(conn);
 	hci_conn_add_sysfs(conn);
 
-	/* The remote features procedure is defined for master
+	/* The remote features procedure is defined for central
 	 * role only. So only in case of an initiated connection
 	 * request the remote features.
 	 *
-	 * If the local controller supports slave-initiated features
-	 * exchange, then requesting the remote features in slave
+	 * If the local controller supports peripheral-initiated features
+	 * exchange, then requesting the remote features in peripheral
 	 * role is possible. Otherwise just transition into the
 	 * connected state without requesting the remote features.
 	 */
 	if (conn->out ||
-	    (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
+	    (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) {
 		struct hci_cp_le_read_remote_features cp;
 
 		cp.handle = __cpu_to_le16(conn->handle);
@@ -5774,7 +5774,7 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev,
 		if (conn->state == BT_CONFIG) {
 			__u8 status;
 
-			/* If the local controller supports slave-initiated
+			/* If the local controller supports peripheral-initiated
 			 * features exchange, but the remote controller does
 			 * not, then it is possible that the error code 0x1a
 			 * for unsupported remote feature gets returned.
@@ -5783,8 +5783,8 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev,
 			 * transition into connected state and mark it as
 			 * successful.
 			 */
-			if ((hdev->le_features[0] & HCI_LE_SLAVE_FEATURES) &&
-			    !conn->out && ev->status == 0x1a)
+			if (!conn->out && ev->status == 0x1a &&
+			    (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES))
 				status = 0x00;
 			else
 				status = ev->status;

From fad646e16d3cafd67d3cfff8e66f77401190957e Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Mon, 31 May 2021 16:37:25 +0800
Subject: [PATCH 2934/3292] Bluetooth: use inclusive language in SMP

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced:
master -> initiator
slave  -> responder

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h |  2 +-
 net/bluetooth/mgmt.c         | 10 +++---
 net/bluetooth/smp.c          | 66 +++++++++++++++++++-----------------
 net/bluetooth/smp.h          |  6 ++--
 4 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index a03c62b1dc2f..23a0524061b7 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -202,7 +202,7 @@ struct mgmt_cp_load_link_keys {
 struct mgmt_ltk_info {
 	struct mgmt_addr_info addr;
 	__u8	type;
-	__u8	master;
+	__u8	initiator;
 	__u8	enc_size;
 	__le16	ediv;
 	__le64	rand;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 896f0c482912..fac972415d68 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6169,7 +6169,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 
 static bool ltk_is_valid(struct mgmt_ltk_info *key)
 {
-	if (key->master != 0x00 && key->master != 0x01)
+	if (key->initiator != 0x00 && key->initiator != 0x01)
 		return false;
 
 	switch (key->addr.type) {
@@ -6247,11 +6247,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 		switch (key->type) {
 		case MGMT_LTK_UNAUTHENTICATED:
 			authenticated = 0x00;
-			type = key->master ? SMP_LTK : SMP_LTK_SLAVE;
+			type = key->initiator ? SMP_LTK : SMP_LTK_RESPONDER;
 			break;
 		case MGMT_LTK_AUTHENTICATED:
 			authenticated = 0x01;
-			type = key->master ? SMP_LTK : SMP_LTK_SLAVE;
+			type = key->initiator ? SMP_LTK : SMP_LTK_RESPONDER;
 			break;
 		case MGMT_LTK_P256_UNAUTH:
 			authenticated = 0x00;
@@ -8649,7 +8649,7 @@ static u8 mgmt_ltk_type(struct smp_ltk *ltk)
 {
 	switch (ltk->type) {
 	case SMP_LTK:
-	case SMP_LTK_SLAVE:
+	case SMP_LTK_RESPONDER:
 		if (ltk->authenticated)
 			return MGMT_LTK_AUTHENTICATED;
 		return MGMT_LTK_UNAUTHENTICATED;
@@ -8695,7 +8695,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
 	ev.key.rand = key->rand;
 
 	if (key->type == SMP_LTK)
-		ev.key.master = 1;
+		ev.key.initiator = 1;
 
 	/* Make sure we copy only the significant bytes based on the
 	 * encryption key size, and set the rest of the value to zeroes.
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 4d93c6c32a71..6197f8ae53ab 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -111,9 +111,9 @@ struct smp_chan {
 	u8		id_addr_type;
 	u8		irk[16];
 	struct smp_csrk	*csrk;
-	struct smp_csrk	*slave_csrk;
+	struct smp_csrk	*responder_csrk;
 	struct smp_ltk	*ltk;
-	struct smp_ltk	*slave_ltk;
+	struct smp_ltk	*responder_ltk;
 	struct smp_irk	*remote_irk;
 	u8		*link_key;
 	unsigned long	flags;
@@ -753,7 +753,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
 	mgmt_smp_complete(hcon, complete);
 
 	kfree_sensitive(smp->csrk);
-	kfree_sensitive(smp->slave_csrk);
+	kfree_sensitive(smp->responder_csrk);
 	kfree_sensitive(smp->link_key);
 
 	crypto_free_shash(smp->tfm_cmac);
@@ -776,9 +776,9 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
 			kfree_rcu(smp->ltk, rcu);
 		}
 
-		if (smp->slave_ltk) {
-			list_del_rcu(&smp->slave_ltk->list);
-			kfree_rcu(smp->slave_ltk, rcu);
+		if (smp->responder_ltk) {
+			list_del_rcu(&smp->responder_ltk->list);
+			kfree_rcu(smp->responder_ltk, rcu);
 		}
 
 		if (smp->remote_irk) {
@@ -979,7 +979,7 @@ static u8 smp_random(struct smp_chan *smp)
 	int ret;
 
 	bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn,
-		   conn->hcon->out ? "master" : "slave");
+		   conn->hcon->out ? "initiator" : "responder");
 
 	ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp,
 		     hcon->init_addr_type, &hcon->init_addr,
@@ -1021,8 +1021,8 @@ static u8 smp_random(struct smp_chan *smp)
 		else
 			auth = 0;
 
-		/* Even though there's no _SLAVE suffix this is the
-		 * slave STK we're adding for later lookup (the master
+		/* Even though there's no _RESPONDER suffix this is the
+		 * responder STK we're adding for later lookup (the initiator
 		 * STK never needs to be stored).
 		 */
 		hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
@@ -1077,10 +1077,10 @@ static void smp_notify_keys(struct l2cap_conn *conn)
 		mgmt_new_csrk(hdev, smp->csrk, persistent);
 	}
 
-	if (smp->slave_csrk) {
-		smp->slave_csrk->bdaddr_type = hcon->dst_type;
-		bacpy(&smp->slave_csrk->bdaddr, &hcon->dst);
-		mgmt_new_csrk(hdev, smp->slave_csrk, persistent);
+	if (smp->responder_csrk) {
+		smp->responder_csrk->bdaddr_type = hcon->dst_type;
+		bacpy(&smp->responder_csrk->bdaddr, &hcon->dst);
+		mgmt_new_csrk(hdev, smp->responder_csrk, persistent);
 	}
 
 	if (smp->ltk) {
@@ -1089,10 +1089,10 @@ static void smp_notify_keys(struct l2cap_conn *conn)
 		mgmt_new_ltk(hdev, smp->ltk, persistent);
 	}
 
-	if (smp->slave_ltk) {
-		smp->slave_ltk->bdaddr_type = hcon->dst_type;
-		bacpy(&smp->slave_ltk->bdaddr, &hcon->dst);
-		mgmt_new_ltk(hdev, smp->slave_ltk, persistent);
+	if (smp->responder_ltk) {
+		smp->responder_ltk->bdaddr_type = hcon->dst_type;
+		bacpy(&smp->responder_ltk->bdaddr, &hcon->dst);
+		mgmt_new_ltk(hdev, smp->responder_ltk, persistent);
 	}
 
 	if (smp->link_key) {
@@ -1272,7 +1272,7 @@ static void smp_distribute_keys(struct smp_chan *smp)
 
 	if (*keydist & SMP_DIST_ENC_KEY) {
 		struct smp_cmd_encrypt_info enc;
-		struct smp_cmd_master_ident ident;
+		struct smp_cmd_initiator_ident ident;
 		struct smp_ltk *ltk;
 		u8 authenticated;
 		__le16 ediv;
@@ -1293,14 +1293,15 @@ static void smp_distribute_keys(struct smp_chan *smp)
 
 		authenticated = hcon->sec_level == BT_SECURITY_HIGH;
 		ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type,
-				  SMP_LTK_SLAVE, authenticated, enc.ltk,
+				  SMP_LTK_RESPONDER, authenticated, enc.ltk,
 				  smp->enc_key_size, ediv, rand);
-		smp->slave_ltk = ltk;
+		smp->responder_ltk = ltk;
 
 		ident.ediv = ediv;
 		ident.rand = rand;
 
-		smp_send_cmd(conn, SMP_CMD_MASTER_IDENT, sizeof(ident), &ident);
+		smp_send_cmd(conn, SMP_CMD_INITIATOR_IDENT, sizeof(ident),
+			     &ident);
 
 		*keydist &= ~SMP_DIST_ENC_KEY;
 	}
@@ -1343,7 +1344,7 @@ static void smp_distribute_keys(struct smp_chan *smp)
 				csrk->type = MGMT_CSRK_LOCAL_UNAUTHENTICATED;
 			memcpy(csrk->val, sign.csrk, sizeof(csrk->val));
 		}
-		smp->slave_csrk = csrk;
+		smp->responder_csrk = csrk;
 
 		smp_send_cmd(conn, SMP_CMD_SIGN_INFO, sizeof(sign), &sign);
 
@@ -2048,7 +2049,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
 	struct smp_cmd_pairing *req, *rsp;
 	u8 auth;
 
-	/* The issue is only observed when we're in slave role */
+	/* The issue is only observed when we're in responder role */
 	if (hcon->out)
 		return SMP_UNSPECIFIED;
 
@@ -2084,7 +2085,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct hci_conn *hcon = conn->hcon;
 	struct hci_dev *hdev = hcon->hdev;
 
-	bt_dev_dbg(hdev, "conn %p %s", conn, hcon->out ? "master" : "slave");
+	bt_dev_dbg(hdev, "conn %p %s", conn,
+		   hcon->out ? "initiator" : "responder");
 
 	if (skb->len < sizeof(smp->pcnf))
 		return SMP_INVALID_PARAMS;
@@ -2251,7 +2253,7 @@ static bool smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
 	hci_le_start_enc(hcon, key->ediv, key->rand, key->val, key->enc_size);
 	hcon->enc_key_size = key->enc_size;
 
-	/* We never store STKs for master role, so clear this flag */
+	/* We never store STKs for initiator role, so clear this flag */
 	clear_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags);
 
 	return true;
@@ -2467,7 +2469,7 @@ int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr,
 		/* Set keys to NULL to make sure smp_failure() does not try to
 		 * remove and free already invalidated rcu list entries. */
 		smp->ltk = NULL;
-		smp->slave_ltk = NULL;
+		smp->responder_ltk = NULL;
 		smp->remote_irk = NULL;
 
 		if (test_bit(SMP_FLAG_COMPLETE, &smp->flags))
@@ -2503,7 +2505,7 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
 		return SMP_INVALID_PARAMS;
 	}
 
-	SMP_ALLOW_CMD(smp, SMP_CMD_MASTER_IDENT);
+	SMP_ALLOW_CMD(smp, SMP_CMD_INITIATOR_IDENT);
 
 	skb_pull(skb, sizeof(*rp));
 
@@ -2512,9 +2514,9 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
 	return 0;
 }
 
-static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
+static int smp_cmd_initiator_ident(struct l2cap_conn *conn, struct sk_buff *skb)
 {
-	struct smp_cmd_master_ident *rp = (void *) skb->data;
+	struct smp_cmd_initiator_ident *rp = (void *)skb->data;
 	struct l2cap_chan *chan = conn->smp;
 	struct smp_chan *smp = chan->data;
 	struct hci_dev *hdev = conn->hcon->hdev;
@@ -2913,7 +2915,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
 			return 0;
 		}
 
-		/* Slave sends DHKey check as response to master */
+		/* Responder sends DHKey check as response to initiator */
 		sc_dhkey_check(smp);
 	}
 
@@ -3000,8 +3002,8 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
 		reason = smp_cmd_encrypt_info(conn, skb);
 		break;
 
-	case SMP_CMD_MASTER_IDENT:
-		reason = smp_cmd_master_ident(conn, skb);
+	case SMP_CMD_INITIATOR_IDENT:
+		reason = smp_cmd_initiator_ident(conn, skb);
 		break;
 
 	case SMP_CMD_IDENT_INFO:
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index fc35a8bf358e..87a59ec2c9f0 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -79,8 +79,8 @@ struct smp_cmd_encrypt_info {
 	__u8	ltk[16];
 } __packed;
 
-#define SMP_CMD_MASTER_IDENT	0x07
-struct smp_cmd_master_ident {
+#define SMP_CMD_INITIATOR_IDENT	0x07
+struct smp_cmd_initiator_ident {
 	__le16	ediv;
 	__le64	rand;
 } __packed;
@@ -146,7 +146,7 @@ struct smp_cmd_keypress_notify {
 enum {
 	SMP_STK,
 	SMP_LTK,
-	SMP_LTK_SLAVE,
+	SMP_LTK_RESPONDER,
 	SMP_LTK_P256,
 	SMP_LTK_P256_DEBUG,
 };

From 67ffb1857a182d90c0e7db16752b556d6cf3944f Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Mon, 31 May 2021 16:37:28 +0800
Subject: [PATCH 2935/3292] Bluetooth: use inclusive language in comments

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced:
slave       -> peripheral
blacklisted -> blocked

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 6 +++---
 net/bluetooth/hidp/core.c | 2 +-
 net/bluetooth/mgmt.c      | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index e289187075b9..43c324c46c0b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5404,7 +5404,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 		case HCI_AUTO_CONN_DIRECT:
 			/* Only devices advertising with ADV_DIRECT_IND are
 			 * triggering a connection attempt. This is allowing
-			 * incoming connections from slave devices.
+			 * incoming connections from peripheral devices.
 			 */
 			if (adv_type != LE_ADV_DIRECT_IND)
 				return NULL;
@@ -5412,8 +5412,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 		case HCI_AUTO_CONN_ALWAYS:
 			/* Devices advertising with ADV_IND or ADV_DIRECT_IND
 			 * are triggering a connection attempt. This means
-			 * that incoming connections from slave device are
-			 * accepted and also outgoing connections to slave
+			 * that incoming connections from peripheral device are
+			 * accepted and also outgoing connections to peripheral
 			 * devices are established when found.
 			 */
 			break;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0db48c812662..96fedef14723 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -794,7 +794,7 @@ static int hidp_setup_hid(struct hidp_session *session,
 	hid->dev.parent = &session->conn->hcon->dev;
 	hid->ll_driver = &hidp_hid_driver;
 
-	/* True if device is blacklisted in drivers/hid/hid-quirks.c */
+	/* True if device is blocked in drivers/hid/hid-quirks.c */
 	if (hid_ignore(hid)) {
 		hid_destroy_device(session->hid);
 		session->hid = NULL;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index fac972415d68..80ec35c67ea5 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2959,7 +2959,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		/* When pairing a new device, it is expected to remember
 		 * this device for future connections. Adding the connection
 		 * parameter information ahead of time allows tracking
-		 * of the slave preferred values and will speed up any
+		 * of the peripheral preferred values and will speed up any
 		 * further connection establishment.
 		 *
 		 * If connection parameters already exist, then they

From a1b2fdf97f3659948d83ff491abbab73e591c982 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Mon, 31 May 2021 22:24:49 +0800
Subject: [PATCH 2936/3292] Bluetooth: btmtkuart: using
 pm_runtime_resume_and_get instead of pm_runtime_get_sync

Using pm_runtime_resume_and_get is more appropriate
for simplifing code

Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmtkuart.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c
index 6c40bc75fb5b..e9d91d7c0db4 100644
--- a/drivers/bluetooth/btmtkuart.c
+++ b/drivers/bluetooth/btmtkuart.c
@@ -581,11 +581,9 @@ static int btmtkuart_open(struct hci_dev *hdev)
 
 	/* Enable the power domain and clock the device requires */
 	pm_runtime_enable(dev);
-	err = pm_runtime_get_sync(dev);
-	if (err < 0) {
-		pm_runtime_put_noidle(dev);
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0)
 		goto err_disable_rpm;
-	}
 
 	err = clk_prepare_enable(bdev->clk);
 	if (err < 0)

From ca17a5cccf8b6d35dab4729bea8f4350bc0b4caf Mon Sep 17 00:00:00 2001
From: Tim Jiang <tjiang@codeaurora.org>
Date: Tue, 1 Jun 2021 17:57:25 +0800
Subject: [PATCH 2937/3292] Bluetooth: btusb: use default nvm if boardID is 0
 for wcn6855.

if boardID is 0, will use the default nvm file without surfix.

Signed-off-by: Tim Jiang <tjiang@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index b5813aa209ed..ad07dbedc284 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -4172,9 +4172,15 @@ static int btusb_setup_qca_load_nvm(struct hci_dev *hdev,
 	int err;
 
 	if (((ver->flag >> 8) & 0xff) == QCA_FLAG_MULTI_NVM) {
-		snprintf(fwname, sizeof(fwname), "qca/nvm_usb_%08x_%04x.bin",
-			 le32_to_cpu(ver->rom_version),
-			 le16_to_cpu(ver->board_id));
+		/* if boardid equal 0, use default nvm without surfix */
+		if (le16_to_cpu(ver->board_id) == 0x0) {
+			snprintf(fwname, sizeof(fwname), "qca/nvm_usb_%08x.bin",
+				 le32_to_cpu(ver->rom_version));
+		} else {
+			snprintf(fwname, sizeof(fwname), "qca/nvm_usb_%08x_%04x.bin",
+				le32_to_cpu(ver->rom_version),
+				le16_to_cpu(ver->board_id));
+		}
 	} else {
 		snprintf(fwname, sizeof(fwname), "qca/nvm_usb_%08x.bin",
 			 le32_to_cpu(ver->rom_version));

From 4f00bfb372674d586c4a261bfc595cbce101fbb6 Mon Sep 17 00:00:00 2001
From: Tim Jiang <tjiang@codeaurora.org>
Date: Tue, 1 Jun 2021 17:57:10 +0800
Subject: [PATCH 2938/3292] Bluetooth: btusb: fix bt fiwmare downloading
 failure issue for qca btsoc.

This is btsoc timing issue, after host start to downloading bt firmware,
ep2 need time to switch from function acl to function dfu, so host add
20ms delay as workaround.

Signed-off-by: Tim Jiang <tjiang@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index ad07dbedc284..a9855a2dd561 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -4080,6 +4080,11 @@ static int btusb_setup_qca_download_fw(struct hci_dev *hdev,
 	sent += size;
 	count -= size;
 
+	/* ep2 need time to switch from function acl to function dfu,
+	 * so we add 20ms delay here.
+	 */
+	msleep(20);
+
 	while (count) {
 		size = min_t(size_t, count, QCA_DFU_PACKET_LEN);
 

From 8c8ca05d3291d5e77eccf8f87106506a90aa82a2 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:40:54 +0800
Subject: [PATCH 2939/3292] Bluetooth: bnep: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/bnep/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 43c284158f63..72f47b372705 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -126,8 +126,8 @@ static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len
 			f[i].start = get_unaligned_be16(data++);
 			f[i].end   = get_unaligned_be16(data++);
 
-			BT_DBG("proto filter start %d end %d",
-				f[i].start, f[i].end);
+			BT_DBG("proto filter start %u end %u",
+			       f[i].start, f[i].end);
 		}
 
 		if (i < BNEP_MAX_PROTO_FILTERS)
@@ -266,7 +266,7 @@ static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb)
 			break;
 		}
 
-		BT_DBG("type 0x%x len %d", h->type, h->len);
+		BT_DBG("type 0x%x len %u", h->type, h->len);
 
 		switch (h->type & BNEP_TYPE_MASK) {
 		case BNEP_EXT_CONTROL:
@@ -424,7 +424,7 @@ static int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb)
 	int len = 0, il = 0;
 	u8 type = 0;
 
-	BT_DBG("skb %p dev %p type %d", skb, skb->dev, skb->pkt_type);
+	BT_DBG("skb %p dev %p type %u", skb, skb->dev, skb->pkt_type);
 
 	if (!skb->dev) {
 		/* Control frame sent by us */

From b442a8533b02b44bafa81b67a3571b2b106fcc88 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:40:55 +0800
Subject: [PATCH 2940/3292] Bluetooth: cmtp: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/cmtp/capi.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index eb41556002e3..f3bedc3b613a 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -74,7 +74,7 @@ static struct cmtp_application *cmtp_application_add(struct cmtp_session *sessio
 {
 	struct cmtp_application *app = kzalloc(sizeof(*app), GFP_KERNEL);
 
-	BT_DBG("session %p application %p appl %d", session, app, appl);
+	BT_DBG("session %p application %p appl %u", session, app, appl);
 
 	if (!app)
 		return NULL;
@@ -135,7 +135,7 @@ static void cmtp_send_capimsg(struct cmtp_session *session, struct sk_buff *skb)
 {
 	struct cmtp_scb *scb = (void *) skb->cb;
 
-	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
 
 	scb->id = -1;
 	scb->data = (CAPIMSG_COMMAND(skb->data) == CAPI_DATA_B3);
@@ -152,7 +152,7 @@ static void cmtp_send_interopmsg(struct cmtp_session *session,
 	struct sk_buff *skb;
 	unsigned char *s;
 
-	BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum);
+	BT_DBG("session %p subcmd 0x%02x appl %u msgnum %u", session, subcmd, appl, msgnum);
 
 	skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC);
 	if (!skb) {
@@ -188,7 +188,7 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
 	__u16 appl, msgnum, func, info;
 	__u32 controller;
 
-	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
 
 	switch (CAPIMSG_SUBCOMMAND(skb->data)) {
 	case CAPI_CONF:
@@ -321,7 +321,7 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb)
 	__u16 appl;
 	__u32 contr;
 
-	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
 
 	if (skb->len < CAPI_MSG_BASELEN)
 		return;
@@ -344,7 +344,7 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb)
 		appl = application->appl;
 		CAPIMSG_SETAPPID(skb->data, appl);
 	} else {
-		BT_ERR("Can't find application with id %d", appl);
+		BT_ERR("Can't find application with id %u", appl);
 		kfree_skb(skb);
 		return;
 	}
@@ -385,8 +385,8 @@ static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_
 	unsigned char buf[8];
 	int err = 0, nconn, want = rp->level3cnt;
 
-	BT_DBG("ctrl %p appl %d level3cnt %d datablkcnt %d datablklen %d",
-		ctrl, appl, rp->level3cnt, rp->datablkcnt, rp->datablklen);
+	BT_DBG("ctrl %p appl %u level3cnt %u datablkcnt %u datablklen %u",
+	       ctrl, appl, rp->level3cnt, rp->datablkcnt, rp->datablklen);
 
 	application = cmtp_application_add(session, appl);
 	if (!application) {
@@ -450,7 +450,7 @@ static void cmtp_release_appl(struct capi_ctr *ctrl, __u16 appl)
 	struct cmtp_session *session = ctrl->driverdata;
 	struct cmtp_application *application;
 
-	BT_DBG("ctrl %p appl %d", ctrl, appl);
+	BT_DBG("ctrl %p appl %u", ctrl, appl);
 
 	application = cmtp_application_get(session, CMTP_APPLID, appl);
 	if (!application) {
@@ -483,7 +483,7 @@ static u16 cmtp_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 
 	application = cmtp_application_get(session, CMTP_APPLID, appl);
 	if ((!application) || (application->state != BT_CONNECTED)) {
-		BT_ERR("Can't find application with id %d", appl);
+		BT_ERR("Can't find application with id %u", appl);
 		return CAPI_ILLAPPNR;
 	}
 
@@ -515,7 +515,7 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
 	seq_printf(m, "ctrl %d\n", session->num);
 
 	list_for_each_entry(app, &session->applications, list) {
-		seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping);
+		seq_printf(m, "appl %u -> %u\n", app->appl, app->mapping);
 	}
 
 	return 0;

From 093dabb4f1aff982f7ef1cebf4e24be3fe47bcdb Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:40:56 +0800
Subject: [PATCH 2941/3292] Bluetooth: hidp: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hidp/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 96fedef14723..80848dfc01db 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -508,7 +508,7 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
 				unsigned char param)
 {
 	int done_with_skb = 1;
-	BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param);
+	BT_DBG("session %p skb %p len %u param 0x%02x", session, skb, skb->len, param);
 
 	switch (param) {
 	case HIDP_DATA_RTYPE_INPUT:
@@ -553,7 +553,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
 	unsigned char hdr, type, param;
 	int free_skb = 1;
 
-	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
 
 	hdr = skb->data[0];
 	skb_pull(skb, 1);
@@ -589,7 +589,7 @@ static void hidp_recv_intr_frame(struct hidp_session *session,
 {
 	unsigned char hdr;
 
-	BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+	BT_DBG("session %p skb %p len %u", session, skb, skb->len);
 
 	hdr = skb->data[0];
 	skb_pull(skb, 1);

From 658d5d8080b5ec6184402d3cf37c2070e4d9b6db Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:40:58 +0800
Subject: [PATCH 2942/3292] Bluetooth: 6lowpan: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/6lowpan.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 29c96bc5733f..fd164a248569 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -167,7 +167,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(peer, &dev->peers, list) {
-		BT_DBG("dst addr %pMR dst type %d ip %pI6c",
+		BT_DBG("dst addr %pMR dst type %u ip %pI6c",
 		       &peer->chan->dst, peer->chan->dst_type,
 		       &peer->peer_addr);
 
@@ -478,7 +478,7 @@ static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev)
 
 			local_skb = skb_clone(skb, GFP_ATOMIC);
 
-			BT_DBG("xmit %s to %pMR type %d IP %pI6c chan %p",
+			BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p",
 			       netdev->name,
 			       &pentry->chan->dst, pentry->chan->dst_type,
 			       &pentry->peer_addr, pentry->chan);
@@ -521,7 +521,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	if (err) {
 		if (lowpan_cb(skb)->chan) {
-			BT_DBG("xmit %s to %pMR type %d IP %pI6c chan %p",
+			BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p",
 			       netdev->name, &addr, addr_type,
 			       &lowpan_cb(skb)->addr, lowpan_cb(skb)->chan);
 			err = send_pkt(lowpan_cb(skb)->chan, skb, netdev);
@@ -790,7 +790,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
 
 			BT_DBG("dev %p removing %speer %p", dev,
 			       last ? "last " : "1 ", peer);
-			BT_DBG("chan %p orig refcnt %d", chan,
+			BT_DBG("chan %p orig refcnt %u", chan,
 			       kref_read(&chan->kref));
 
 			l2cap_chan_put(chan);
@@ -904,7 +904,7 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type)
 {
 	struct lowpan_peer *peer;
 
-	BT_DBG("conn %p dst type %d", conn, dst_type);
+	BT_DBG("conn %p dst type %u", conn, dst_type);
 
 	peer = lookup_peer(conn);
 	if (!peer)
@@ -936,7 +936,7 @@ static struct l2cap_chan *bt_6lowpan_listen(void)
 
 	atomic_set(&chan->nesting, L2CAP_NESTING_PARENT);
 
-	BT_DBG("chan %p src type %d", chan, chan->src_type);
+	BT_DBG("chan %p src type %u", chan, chan->src_type);
 
 	err = l2cap_add_psm(chan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
 	if (err) {
@@ -977,7 +977,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
 
 	*conn = (struct l2cap_conn *)hcon->l2cap_data;
 
-	BT_DBG("conn %p dst %pMR type %d", *conn, &hcon->dst, hcon->dst_type);
+	BT_DBG("conn %p dst %pMR type %u", *conn, &hcon->dst, hcon->dst_type);
 
 	return 0;
 }
@@ -1119,7 +1119,7 @@ static ssize_t lowpan_control_write(struct file *fp,
 				return -EALREADY;
 			}
 
-			BT_DBG("conn %p dst %pMR type %d user %d", conn,
+			BT_DBG("conn %p dst %pMR type %d user %u", conn,
 			       &conn->hcon->dst, conn->hcon->dst_type,
 			       addr_type);
 		}

From fad48d848cf64d4673474c9ebcb9f6fbf66aa3b8 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:40:59 +0800
Subject: [PATCH 2943/3292] Bluetooth: a2mp: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/a2mp.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 463bad58478b..1fcc482397c3 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -120,7 +120,7 @@ static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (le16_to_cpu(hdr->len) < sizeof(*rej))
 		return -EINVAL;
 
-	BT_DBG("ident %d reason %d", hdr->ident, le16_to_cpu(rej->reason));
+	BT_DBG("ident %u reason %d", hdr->ident, le16_to_cpu(rej->reason));
 
 	skb_pull(skb, sizeof(*rej));
 
@@ -219,7 +219,7 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
 
 	cl = (void *) skb->data;
 	while (len >= sizeof(*cl)) {
-		BT_DBG("Remote AMP id %d type %d status %d", cl->id, cl->type,
+		BT_DBG("Remote AMP id %u type %u status %u", cl->id, cl->type,
 		       cl->status);
 
 		if (cl->id != AMP_ID_BREDR && cl->type != AMP_TYPE_BREDR) {
@@ -273,7 +273,7 @@ static int a2mp_change_notify(struct amp_mgr *mgr, struct sk_buff *skb,
 	struct a2mp_cl *cl = (void *) skb->data;
 
 	while (skb->len >= sizeof(*cl)) {
-		BT_DBG("Controller id %d type %d status %d", cl->id, cl->type,
+		BT_DBG("Controller id %u type %u status %u", cl->id, cl->type,
 		       cl->status);
 		cl = skb_pull(skb, sizeof(*cl));
 	}
@@ -302,7 +302,7 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (le16_to_cpu(hdr->len) < sizeof(*req))
 		return -EINVAL;
 
-	BT_DBG("id %d", req->id);
+	BT_DBG("id %u", req->id);
 
 	hdev = hci_dev_get(req->id);
 	if (!hdev || hdev->dev_type != HCI_AMP) {
@@ -344,7 +344,7 @@ static int a2mp_getinfo_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (le16_to_cpu(hdr->len) < sizeof(*rsp))
 		return -EINVAL;
 
-	BT_DBG("id %d status 0x%2.2x", rsp->id, rsp->status);
+	BT_DBG("id %u status 0x%2.2x", rsp->id, rsp->status);
 
 	if (rsp->status)
 		return -EINVAL;
@@ -373,7 +373,7 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (le16_to_cpu(hdr->len) < sizeof(*req))
 		return -EINVAL;
 
-	BT_DBG("id %d", req->id);
+	BT_DBG("id %u", req->id);
 
 	/* Make sure that other request is not processed */
 	tmp = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC);
@@ -423,7 +423,7 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
 
 	assoc_len = len - sizeof(*rsp);
 
-	BT_DBG("id %d status 0x%2.2x assoc len %zu", rsp->id, rsp->status,
+	BT_DBG("id %u status 0x%2.2x assoc len %zu", rsp->id, rsp->status,
 	       assoc_len);
 
 	if (rsp->status)
@@ -457,7 +457,7 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (!hcon)
 		goto done;
 
-	BT_DBG("Created hcon %p: loc:%d -> rem:%d", hcon, hdev->id, rsp->id);
+	BT_DBG("Created hcon %p: loc:%u -> rem:%u", hcon, hdev->id, rsp->id);
 
 	mgr->bredr_chan->remote_amp_id = rsp->id;
 
@@ -481,7 +481,7 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (le16_to_cpu(hdr->len) < sizeof(*req))
 		return -EINVAL;
 
-	BT_DBG("local_id %d, remote_id %d", req->local_id, req->remote_id);
+	BT_DBG("local_id %u, remote_id %u", req->local_id, req->remote_id);
 
 	memset(&rsp, 0, sizeof(rsp));
 
@@ -562,7 +562,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
 	if (le16_to_cpu(hdr->len) < sizeof(*req))
 		return -EINVAL;
 
-	BT_DBG("local_id %d remote_id %d", req->local_id, req->remote_id);
+	BT_DBG("local_id %u remote_id %u", req->local_id, req->remote_id);
 
 	memset(&rsp, 0, sizeof(rsp));
 
@@ -599,7 +599,7 @@ send_rsp:
 static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
 			       struct a2mp_cmd *hdr)
 {
-	BT_DBG("ident %d code 0x%2.2x", hdr->ident, hdr->code);
+	BT_DBG("ident %u code 0x%2.2x", hdr->ident, hdr->code);
 
 	skb_pull(skb, le16_to_cpu(hdr->len));
 	return 0;
@@ -620,7 +620,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
 		hdr = (void *) skb->data;
 		len = le16_to_cpu(hdr->len);
 
-		BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len);
+		BT_DBG("code 0x%2.2x id %u len %u", hdr->code, hdr->ident, len);
 
 		skb_pull(skb, sizeof(*hdr));
 

From 610850bebc5baaf92d113247387b9fcab187259f Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:41:00 +0800
Subject: [PATCH 2944/3292] Bluetooth: amp: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/amp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index be2d469d6369..2134f92bd7ac 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -78,7 +78,7 @@ struct amp_ctrl *amp_ctrl_lookup(struct amp_mgr *mgr, u8 id)
 {
 	struct amp_ctrl *ctrl;
 
-	BT_DBG("mgr %p id %d", mgr, id);
+	BT_DBG("mgr %p id %u", mgr, id);
 
 	mutex_lock(&mgr->amp_ctrls_lock);
 	list_for_each_entry(ctrl, &mgr->amp_ctrls, list) {
@@ -179,7 +179,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
 
 	/* Legacy key */
 	if (conn->key_type < 3) {
-		bt_dev_err(hdev, "legacy key type %d", conn->key_type);
+		bt_dev_err(hdev, "legacy key type %u", conn->key_type);
 		return -EACCES;
 	}
 
@@ -257,7 +257,7 @@ void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle)
 	struct hci_request req;
 	int err;
 
-	BT_DBG("%s handle %d", hdev->name, phy_handle);
+	BT_DBG("%s handle %u", hdev->name, phy_handle);
 
 	cp.phy_handle = phy_handle;
 	cp.max_len = cpu_to_le16(hdev->amp_assoc_size);

From 85d6728421c9b2797dea3a20f213dd44d9f8d7cd Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:41:02 +0800
Subject: [PATCH 2945/3292] Bluetooth: mgmt: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c        | 16 ++++++++--------
 net/bluetooth/mgmt_config.c |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 80ec35c67ea5..22f9f52c5ae6 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4279,7 +4279,7 @@ int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status)
 
 done:
 	hci_dev_unlock(hdev);
-	bt_dev_dbg(hdev, "add monitor %d complete, status %d",
+	bt_dev_dbg(hdev, "add monitor %d complete, status %u",
 		   rp.monitor_handle, status);
 
 	return err;
@@ -4504,7 +4504,7 @@ int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status)
 
 done:
 	hci_dev_unlock(hdev);
-	bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
+	bt_dev_dbg(hdev, "remove monitor %d complete, status %u",
 		   rp.monitor_handle, status);
 
 	return err;
@@ -4834,7 +4834,7 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status)
 {
 	struct mgmt_pending_cmd *cmd;
 
-	bt_dev_dbg(hdev, "status %d", status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	hci_dev_lock(hdev);
 
@@ -5090,7 +5090,7 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status)
 {
 	struct mgmt_pending_cmd *cmd;
 
-	bt_dev_dbg(hdev, "status %d", status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	hci_dev_lock(hdev);
 
@@ -5303,7 +5303,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
 static void enable_advertising_instance(struct hci_dev *hdev, u8 status,
 					u16 opcode)
 {
-	bt_dev_dbg(hdev, "status %d", status);
+	bt_dev_dbg(hdev, "status %u", status);
 }
 
 static void set_advertising_complete(struct hci_dev *hdev, u8 status,
@@ -6347,7 +6347,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
 	handle = __le16_to_cpu(cp->handle);
 	conn = hci_conn_hash_lookup_handle(hdev, handle);
 	if (!conn) {
-		bt_dev_err(hdev, "unknown handle (%d) in conn_info response",
+		bt_dev_err(hdev, "unknown handle (%u) in conn_info response",
 			   handle);
 		goto unlock;
 	}
@@ -7654,7 +7654,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
 	struct adv_info *adv_instance, *n;
 	u8 instance;
 
-	bt_dev_dbg(hdev, "status %d", status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	hci_dev_lock(hdev);
 
@@ -8184,7 +8184,7 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status,
 	struct mgmt_cp_remove_advertising *cp;
 	struct mgmt_rp_remove_advertising rp;
 
-	bt_dev_dbg(hdev, "status %d", status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	hci_dev_lock(hdev);
 
diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
index 1deb0ca7a929..6ef701c27da4 100644
--- a/net/bluetooth/mgmt_config.c
+++ b/net/bluetooth/mgmt_config.c
@@ -146,7 +146,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
 		const u16 type = le16_to_cpu(TO_TLV(buffer)->type);
 
 		if (buffer_left < exp_len) {
-			bt_dev_warn(hdev, "invalid len left %d, exp >= %d",
+			bt_dev_warn(hdev, "invalid len left %u, exp >= %u",
 				    buffer_left, exp_len);
 
 			return mgmt_cmd_status(sk, hdev->id,
@@ -198,7 +198,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
 		}
 
 		if (exp_type_len && len != exp_type_len) {
-			bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d",
+			bt_dev_warn(hdev, "invalid length %d, exp %zu for type %u",
 				    len, exp_type_len, type);
 
 			return mgmt_cmd_status(sk, hdev->id,

From 496bdeeeda09e84f469f47e66f6d38d3735f802f Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:41:03 +0800
Subject: [PATCH 2946/3292] Bluetooth: msft: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/msft.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 37a394786a94..b4bfae41e8a5 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -311,7 +311,7 @@ static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
 
 	monitor = idr_find(&hdev->adv_monitors_idr, msft->pending_add_handle);
 	if (!monitor) {
-		bt_dev_err(hdev, "msft add advmon: monitor %d is not found!",
+		bt_dev_err(hdev, "msft add advmon: monitor %u is not found!",
 			   msft->pending_add_handle);
 		status = HCI_ERROR_UNSPECIFIED;
 		goto unlock;

From 79dbeafe5ef162ede87c916054755a987e93e542 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:41:04 +0800
Subject: [PATCH 2947/3292] Bluetooth: sco: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/sco.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3bd41563f118..d9a4e88dacbb 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -310,7 +310,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
 	if (!sk)
 		goto drop;
 
-	BT_DBG("sk %p len %d", sk, skb->len);
+	BT_DBG("sk %p len %u", sk, skb->len);
 
 	if (sk->sk_state != BT_CONNECTED)
 		goto drop;
@@ -905,7 +905,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
 
 		opts.mtu = sco_pi(sk)->conn->mtu;
 
-		BT_DBG("mtu %d", opts.mtu);
+		BT_DBG("mtu %u", opts.mtu);
 
 		len = min_t(unsigned int, len, sizeof(opts));
 		if (copy_to_user(optval, (char *)&opts, len))
@@ -1167,7 +1167,7 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
 	if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
 		return;
 
-	BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
+	BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status);
 
 	if (!status) {
 		struct sco_conn *conn;
@@ -1196,7 +1196,7 @@ void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
 	if (!conn)
 		goto drop;
 
-	BT_DBG("conn %p len %d", conn, skb->len);
+	BT_DBG("conn %p len %u", conn, skb->len);
 
 	if (skb->len) {
 		sco_recv_frame(conn, skb);

From 83b4b19551411c83bbcf677718ab5d9f60d982f6 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Thu, 3 Jun 2021 15:41:05 +0800
Subject: [PATCH 2948/3292] Bluetooth: smp: Use the correct print format

According to Documentation/core-api/printk-formats.rst,
Use the correct print format. Printing an unsigned int value should use %u
instead of %d. Otherwise printk() might end up displaying negative numbers.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 6197f8ae53ab..b9413a78993a 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -859,7 +859,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 	memset(smp->tk, 0, sizeof(smp->tk));
 	clear_bit(SMP_FLAG_TK_VALID, &smp->flags);
 
-	bt_dev_dbg(hcon->hdev, "auth:%d lcl:%d rem:%d", auth, local_io,
+	bt_dev_dbg(hcon->hdev, "auth:%u lcl:%u rem:%u", auth, local_io,
 		   remote_io);
 
 	/* If neither side wants MITM, either "just" confirm an incoming
@@ -925,7 +925,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 		get_random_bytes(&passkey, sizeof(passkey));
 		passkey %= 1000000;
 		put_unaligned_le32(passkey, smp->tk);
-		bt_dev_dbg(hcon->hdev, "PassKey: %d", passkey);
+		bt_dev_dbg(hcon->hdev, "PassKey: %u", passkey);
 		set_bit(SMP_FLAG_TK_VALID, &smp->flags);
 	}
 
@@ -1655,7 +1655,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
 	case MGMT_OP_USER_PASSKEY_REPLY:
 		value = le32_to_cpu(passkey);
 		memset(smp->tk, 0, sizeof(smp->tk));
-		bt_dev_dbg(conn->hcon->hdev, "PassKey: %d", value);
+		bt_dev_dbg(conn->hcon->hdev, "PassKey: %u", value);
 		put_unaligned_le32(value, smp->tk);
 		fallthrough;
 	case MGMT_OP_USER_CONFIRM_REPLY:

From 74be523ce6bed0531e4f31c3e1387909589e9bfe Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Fri, 4 Jun 2021 16:26:25 +0800
Subject: [PATCH 2949/3292] Bluetooth: use inclusive language in HCI role
 comments

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced:
master -> initiator (for smp) or central (everything else)
slave  -> responder (for smp) or peripheral (everything else)

The #define preprocessor terms are unchanged for now to not disturb
dependent APIs.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c   | 8 ++++----
 net/bluetooth/hci_event.c  | 6 +++---
 net/bluetooth/l2cap_core.c | 2 +-
 net/bluetooth/smp.c        | 6 +++---
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index ea0f9cdaa6b1..2b5059a56cda 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -257,7 +257,7 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
 {
 	BT_DBG("hcon %p", conn);
 
-	/* When we are master of an established connection and it enters
+	/* When we are central of an established connection and it enters
 	 * the disconnect timeout, then go ahead and try to read the
 	 * current clock offset.  Processing of the result is done
 	 * within the event handling and hci_clock_offset_evt function.
@@ -1109,9 +1109,9 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 
 	hci_req_init(&req, hdev);
 
-	/* Disable advertising if we're active. For master role
+	/* Disable advertising if we're active. For central role
 	 * connections most controllers will refuse to connect if
-	 * advertising is enabled, and for slave role connections we
+	 * advertising is enabled, and for peripheral role connections we
 	 * anyway have to disable it in order to start directed
 	 * advertising. Any registered advertisements will be
 	 * re-enabled after the connection attempt is finished.
@@ -1119,7 +1119,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 		__hci_req_pause_adv_instances(&req);
 
-	/* If requested to connect as slave use directed advertising */
+	/* If requested to connect as peripheral use directed advertising */
 	if (conn->role == HCI_ROLE_SLAVE) {
 		/* If we're active scanning most controllers are unable
 		 * to initiate advertising. Simply reject the attempt.
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 43c324c46c0b..da013d485f14 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2795,9 +2795,9 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		bacpy(&cp.bdaddr, &ev->bdaddr);
 
 		if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
-			cp.role = 0x00; /* Become master */
+			cp.role = 0x00; /* Become central */
 		else
-			cp.role = 0x01; /* Remain slave */
+			cp.role = 0x01; /* Remain peripheral */
 
 		hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
 	} else if (!(flags & HCI_PROTO_DEFER)) {
@@ -5131,7 +5131,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 		conn->dst_type = bdaddr_type;
 
 		/* If we didn't have a hci_conn object previously
-		 * but we're in master role this must be something
+		 * but we're in central role this must be something
 		 * initiated using a white list. Since white list based
 		 * connections are not "first class citizens" we don't
 		 * have full tracking of them. Therefore, we go ahead
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9ebb85df4db4..b76c5d00b082 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1691,7 +1691,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
 	if (hcon->out)
 		smp_conn_security(hcon, hcon->pending_sec_level);
 
-	/* For LE slave connections, make sure the connection interval
+	/* For LE peripheral connections, make sure the connection interval
 	 * is in the range of the minimum and maximum interval that has
 	 * been configured for this connection. If not, then trigger
 	 * the connection update procedure.
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index b9413a78993a..11f853d0500f 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -909,8 +909,8 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 			hcon->pending_sec_level = BT_SECURITY_HIGH;
 	}
 
-	/* If both devices have Keyoard-Display I/O, the master
-	 * Confirms and the slave Enters the passkey.
+	/* If both devices have Keyboard-Display I/O, the initiator
+	 * Confirms and the responder Enters the passkey.
 	 */
 	if (smp->method == OVERLAP) {
 		if (hcon->role == HCI_ROLE_MASTER)
@@ -3083,7 +3083,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 	if (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
 		return;
 
-	/* Only master may initiate SMP over BR/EDR */
+	/* Only initiator may initiate SMP over BR/EDR */
 	if (hcon->role != HCI_ROLE_MASTER)
 		return;
 

From 39bc74ca0119025e3cc24b97ebd964b5c605aa83 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Fri, 4 Jun 2021 16:26:26 +0800
Subject: [PATCH 2950/3292] Bluetooth: use inclusive language when tracking
 connections

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced:
master -> central
slave  -> peripheral

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  6 +++---
 net/bluetooth/hci_event.c        |  4 ++--
 net/bluetooth/hci_request.c      | 17 +++++++++--------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c9ec06997e1c..fe5f3a9d9924 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -122,7 +122,7 @@ struct hci_conn_hash {
 	unsigned int     amp_num;
 	unsigned int     sco_num;
 	unsigned int     le_num;
-	unsigned int     le_num_slave;
+	unsigned int     le_num_peripheral;
 };
 
 struct bdaddr_list {
@@ -894,7 +894,7 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c)
 	case LE_LINK:
 		h->le_num++;
 		if (c->role == HCI_ROLE_SLAVE)
-			h->le_num_slave++;
+			h->le_num_peripheral++;
 		break;
 	case SCO_LINK:
 	case ESCO_LINK:
@@ -920,7 +920,7 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c)
 	case LE_LINK:
 		h->le_num--;
 		if (c->role == HCI_ROLE_SLAVE)
-			h->le_num_slave--;
+			h->le_num_peripheral--;
 		break;
 	case SCO_LINK:
 	case ESCO_LINK:
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index da013d485f14..e479dc44e572 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5384,9 +5384,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 		return NULL;
 
 	/* Most controller will fail if we try to create new connections
-	 * while we have an existing one in slave role.
+	 * while we have an existing one in peripheral role.
 	 */
-	if (hdev->conn_hash.le_num_slave > 0 &&
+	if (hdev->conn_hash.le_num_peripheral > 0 &&
 	    (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) ||
 	     !(hdev->le_states[3] & 0x10)))
 		return NULL;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3465862429fb..a5d55175176e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1519,13 +1519,14 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 	if (hci_conn_num(hdev, LE_LINK) == 0)
 		return true;
 
-	/* Check le_states if there is any connection in slave role. */
-	if (hdev->conn_hash.le_num_slave > 0) {
-		/* Slave connection state and non connectable mode bit 20. */
+	/* Check le_states if there is any connection in peripheral role. */
+	if (hdev->conn_hash.le_num_peripheral > 0) {
+		/* Peripheral connection state and non connectable mode bit 20.
+		 */
 		if (!connectable && !(hdev->le_states[2] & 0x10))
 			return false;
 
-		/* Slave connection state and connectable mode bit 38
+		/* Peripheral connection state and connectable mode bit 38
 		 * and scannable bit 21.
 		 */
 		if (connectable && (!(hdev->le_states[4] & 0x40) ||
@@ -1533,13 +1534,13 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 			return false;
 	}
 
-	/* Check le_states if there is any connection in master role. */
-	if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_slave) {
-		/* Master connection state and non connectable mode bit 18. */
+	/* Check le_states if there is any connection in central role. */
+	if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_peripheral) {
+		/* Central connection state and non connectable mode bit 18. */
 		if (!connectable && !(hdev->le_states[2] & 0x02))
 			return false;
 
-		/* Master connection state and connectable mode bit 35 and
+		/* Central connection state and connectable mode bit 35 and
 		 * scannable 19.
 		 */
 		if (connectable && (!(hdev->le_states[4] & 0x08) ||

From 3d4f9c00492b4e21641e5140a5e78cb50b58d60b Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Fri, 4 Jun 2021 16:26:27 +0800
Subject: [PATCH 2951/3292] Bluetooth: use inclusive language when filtering
 devices

This patch replaces some non-inclusive terms based on the appropriate
language mapping table compiled by the Bluetooth SIG:
https://specificationrefs.bluetooth.com/language-mapping/Appropriate_Language_Mapping_Table.pdf

Specifically, these terms are replaced:
blacklist -> reject list
whitelist -> accept list

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 16 +++---
 include/net/bluetooth/hci_core.h |  8 +--
 net/bluetooth/hci_core.c         | 24 ++++----
 net/bluetooth/hci_debugfs.c      |  8 +--
 net/bluetooth/hci_event.c        | 94 ++++++++++++++++----------------
 net/bluetooth/hci_request.c      | 89 +++++++++++++++---------------
 net/bluetooth/hci_sock.c         | 12 ++--
 net/bluetooth/l2cap_core.c       |  4 +-
 net/bluetooth/mgmt.c             | 14 ++---
 9 files changed, 135 insertions(+), 134 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 3abd6273a189..2dc947341502 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1505,7 +1505,7 @@ struct hci_cp_le_set_scan_enable {
 } __packed;
 
 #define HCI_LE_USE_PEER_ADDR		0x00
-#define HCI_LE_USE_WHITELIST		0x01
+#define HCI_LE_USE_ACCEPT_LIST		0x01
 
 #define HCI_OP_LE_CREATE_CONN		0x200d
 struct hci_cp_le_create_conn {
@@ -1525,22 +1525,22 @@ struct hci_cp_le_create_conn {
 
 #define HCI_OP_LE_CREATE_CONN_CANCEL	0x200e
 
-#define HCI_OP_LE_READ_WHITE_LIST_SIZE	0x200f
-struct hci_rp_le_read_white_list_size {
+#define HCI_OP_LE_READ_ACCEPT_LIST_SIZE	0x200f
+struct hci_rp_le_read_accept_list_size {
 	__u8	status;
 	__u8	size;
 } __packed;
 
-#define HCI_OP_LE_CLEAR_WHITE_LIST	0x2010
+#define HCI_OP_LE_CLEAR_ACCEPT_LIST	0x2010
 
-#define HCI_OP_LE_ADD_TO_WHITE_LIST	0x2011
-struct hci_cp_le_add_to_white_list {
+#define HCI_OP_LE_ADD_TO_ACCEPT_LIST	0x2011
+struct hci_cp_le_add_to_accept_list {
 	__u8     bdaddr_type;
 	bdaddr_t bdaddr;
 } __packed;
 
-#define HCI_OP_LE_DEL_FROM_WHITE_LIST	0x2012
-struct hci_cp_le_del_from_white_list {
+#define HCI_OP_LE_DEL_FROM_ACCEPT_LIST	0x2012
+struct hci_cp_le_del_from_accept_list {
 	__u8     bdaddr_type;
 	bdaddr_t bdaddr;
 } __packed;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index fe5f3a9d9924..212f46806ce7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -327,7 +327,7 @@ struct hci_dev {
 	__u8		max_page;
 	__u8		features[HCI_MAX_PAGES][8];
 	__u8		le_features[8];
-	__u8		le_white_list_size;
+	__u8		le_accept_list_size;
 	__u8		le_resolv_list_size;
 	__u8		le_num_of_adv_sets;
 	__u8		le_states[8];
@@ -522,14 +522,14 @@ struct hci_dev {
 	struct hci_conn_hash	conn_hash;
 
 	struct list_head	mgmt_pending;
-	struct list_head	blacklist;
-	struct list_head	whitelist;
+	struct list_head	reject_list;
+	struct list_head	accept_list;
 	struct list_head	uuids;
 	struct list_head	link_keys;
 	struct list_head	long_term_keys;
 	struct list_head	identity_resolving_keys;
 	struct list_head	remote_oob_data;
-	struct list_head	le_white_list;
+	struct list_head	le_accept_list;
 	struct list_head	le_resolv_list;
 	struct list_head	le_conn_params;
 	struct list_head	pend_le_conns;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5735171e2e23..2560ed2f144d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -749,14 +749,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 		}
 
 		if (hdev->commands[26] & 0x40) {
-			/* Read LE White List Size */
-			hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE,
+			/* Read LE Accept List Size */
+			hci_req_add(req, HCI_OP_LE_READ_ACCEPT_LIST_SIZE,
 				    0, NULL);
 		}
 
 		if (hdev->commands[26] & 0x80) {
-			/* Clear LE White List */
-			hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
+			/* Clear LE Accept List */
+			hci_req_add(req, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL);
 		}
 
 		if (hdev->commands[34] & 0x40) {
@@ -3713,13 +3713,13 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
 		/* Suspend consists of two actions:
 		 *  - First, disconnect everything and make the controller not
 		 *    connectable (disabling scanning)
-		 *  - Second, program event filter/whitelist and enable scan
+		 *  - Second, program event filter/accept list and enable scan
 		 */
 		ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT);
 		if (!ret)
 			state = BT_SUSPEND_DISCONNECT;
 
-		/* Only configure whitelist if disconnect succeeded and wake
+		/* Only configure accept list if disconnect succeeded and wake
 		 * isn't being prevented.
 		 */
 		if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) {
@@ -3827,14 +3827,14 @@ struct hci_dev *hci_alloc_dev(void)
 	mutex_init(&hdev->req_lock);
 
 	INIT_LIST_HEAD(&hdev->mgmt_pending);
-	INIT_LIST_HEAD(&hdev->blacklist);
-	INIT_LIST_HEAD(&hdev->whitelist);
+	INIT_LIST_HEAD(&hdev->reject_list);
+	INIT_LIST_HEAD(&hdev->accept_list);
 	INIT_LIST_HEAD(&hdev->uuids);
 	INIT_LIST_HEAD(&hdev->link_keys);
 	INIT_LIST_HEAD(&hdev->long_term_keys);
 	INIT_LIST_HEAD(&hdev->identity_resolving_keys);
 	INIT_LIST_HEAD(&hdev->remote_oob_data);
-	INIT_LIST_HEAD(&hdev->le_white_list);
+	INIT_LIST_HEAD(&hdev->le_accept_list);
 	INIT_LIST_HEAD(&hdev->le_resolv_list);
 	INIT_LIST_HEAD(&hdev->le_conn_params);
 	INIT_LIST_HEAD(&hdev->pend_le_conns);
@@ -4047,8 +4047,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	destroy_workqueue(hdev->req_workqueue);
 
 	hci_dev_lock(hdev);
-	hci_bdaddr_list_clear(&hdev->blacklist);
-	hci_bdaddr_list_clear(&hdev->whitelist);
+	hci_bdaddr_list_clear(&hdev->reject_list);
+	hci_bdaddr_list_clear(&hdev->accept_list);
 	hci_uuids_clear(hdev);
 	hci_link_keys_clear(hdev);
 	hci_smp_ltks_clear(hdev);
@@ -4056,7 +4056,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	hci_remote_oob_data_clear(hdev);
 	hci_adv_instances_clear(hdev);
 	hci_adv_monitors_clear(hdev);
-	hci_bdaddr_list_clear(&hdev->le_white_list);
+	hci_bdaddr_list_clear(&hdev->le_accept_list);
 	hci_bdaddr_list_clear(&hdev->le_resolv_list);
 	hci_conn_params_clear_all(hdev);
 	hci_discovery_filter_clear(hdev);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 47f4f21fbc1a..841393389f7b 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -125,7 +125,7 @@ static int device_list_show(struct seq_file *f, void *ptr)
 	struct bdaddr_list *b;
 
 	hci_dev_lock(hdev);
-	list_for_each_entry(b, &hdev->whitelist, list)
+	list_for_each_entry(b, &hdev->accept_list, list)
 		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
 	list_for_each_entry(p, &hdev->le_conn_params, list) {
 		seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type,
@@ -144,7 +144,7 @@ static int blacklist_show(struct seq_file *f, void *p)
 	struct bdaddr_list *b;
 
 	hci_dev_lock(hdev);
-	list_for_each_entry(b, &hdev->blacklist, list)
+	list_for_each_entry(b, &hdev->reject_list, list)
 		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
 	hci_dev_unlock(hdev);
 
@@ -784,7 +784,7 @@ static int white_list_show(struct seq_file *f, void *ptr)
 	struct bdaddr_list *b;
 
 	hci_dev_lock(hdev);
-	list_for_each_entry(b, &hdev->le_white_list, list)
+	list_for_each_entry(b, &hdev->le_accept_list, list)
 		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
 	hci_dev_unlock(hdev);
 
@@ -1195,7 +1195,7 @@ void hci_debugfs_create_le(struct hci_dev *hdev)
 				    &force_static_address_fops);
 
 	debugfs_create_u8("white_list_size", 0444, hdev->debugfs,
-			  &hdev->le_white_list_size);
+			  &hdev->le_accept_list_size);
 	debugfs_create_file("white_list", 0444, hdev->debugfs, hdev,
 			    &white_list_fops);
 	debugfs_create_u8("resolv_list_size", 0444, hdev->debugfs,
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index e479dc44e572..98ec486743ba 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -236,7 +236,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hdev->ssp_debug_mode = 0;
 
-	hci_bdaddr_list_clear(&hdev->le_white_list);
+	hci_bdaddr_list_clear(&hdev->le_accept_list);
 	hci_bdaddr_list_clear(&hdev->le_resolv_list);
 }
 
@@ -1492,36 +1492,22 @@ static void hci_cc_le_read_num_adv_sets(struct hci_dev *hdev,
 	hdev->le_num_of_adv_sets = rp->num_of_sets;
 }
 
-static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
-					   struct sk_buff *skb)
+static void hci_cc_le_read_accept_list_size(struct hci_dev *hdev,
+					    struct sk_buff *skb)
 {
-	struct hci_rp_le_read_white_list_size *rp = (void *) skb->data;
+	struct hci_rp_le_read_accept_list_size *rp = (void *)skb->data;
 
 	BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size);
 
 	if (rp->status)
 		return;
 
-	hdev->le_white_list_size = rp->size;
+	hdev->le_accept_list_size = rp->size;
 }
 
-static void hci_cc_le_clear_white_list(struct hci_dev *hdev,
-				       struct sk_buff *skb)
-{
-	__u8 status = *((__u8 *) skb->data);
-
-	BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
-	if (status)
-		return;
-
-	hci_bdaddr_list_clear(&hdev->le_white_list);
-}
-
-static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
+static void hci_cc_le_clear_accept_list(struct hci_dev *hdev,
 					struct sk_buff *skb)
 {
-	struct hci_cp_le_add_to_white_list *sent;
 	__u8 status = *((__u8 *) skb->data);
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1529,18 +1515,13 @@ static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
 	if (status)
 		return;
 
-	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_WHITE_LIST);
-	if (!sent)
-		return;
-
-	hci_bdaddr_list_add(&hdev->le_white_list, &sent->bdaddr,
-			   sent->bdaddr_type);
+	hci_bdaddr_list_clear(&hdev->le_accept_list);
 }
 
-static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
-					  struct sk_buff *skb)
+static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev,
+					 struct sk_buff *skb)
 {
-	struct hci_cp_le_del_from_white_list *sent;
+	struct hci_cp_le_add_to_accept_list *sent;
 	__u8 status = *((__u8 *) skb->data);
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1548,11 +1529,30 @@ static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
 	if (status)
 		return;
 
-	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_WHITE_LIST);
+	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST);
 	if (!sent)
 		return;
 
-	hci_bdaddr_list_del(&hdev->le_white_list, &sent->bdaddr,
+	hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr,
+			    sent->bdaddr_type);
+}
+
+static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev,
+					   struct sk_buff *skb)
+{
+	struct hci_cp_le_del_from_accept_list *sent;
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_ACCEPT_LIST);
+	if (!sent)
+		return;
+
+	hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr,
 			    sent->bdaddr_type);
 }
 
@@ -2367,7 +2367,7 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr,
 	/* We don't want the connection attempt to stick around
 	 * indefinitely since LE doesn't have a page timeout concept
 	 * like BR/EDR. Set a timer for any connection that doesn't use
-	 * the white list for connecting.
+	 * the accept list for connecting.
 	 */
 	if (filter_policy == HCI_LE_USE_PEER_ADDR)
 		queue_delayed_work(conn->hdev->workqueue,
@@ -2623,7 +2623,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		 * only used during suspend.
 		 */
 		if (ev->link_type == ACL_LINK &&
-		    hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+		    hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
 						      &ev->bdaddr,
 						      BDADDR_BREDR)) {
 			conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
@@ -2745,19 +2745,19 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 	}
 
-	if (hci_bdaddr_list_lookup(&hdev->blacklist, &ev->bdaddr,
+	if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr,
 				   BDADDR_BREDR)) {
 		hci_reject_conn(hdev, &ev->bdaddr);
 		return;
 	}
 
-	/* Require HCI_CONNECTABLE or a whitelist entry to accept the
+	/* Require HCI_CONNECTABLE or an accept list entry to accept the
 	 * connection. These features are only touched through mgmt so
 	 * only do the checks if HCI_MGMT is set.
 	 */
 	if (hci_dev_test_flag(hdev, HCI_MGMT) &&
 	    !hci_dev_test_flag(hdev, HCI_CONNECTABLE) &&
-	    !hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, &ev->bdaddr,
+	    !hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr,
 					       BDADDR_BREDR)) {
 		hci_reject_conn(hdev, &ev->bdaddr);
 		return;
@@ -3538,20 +3538,20 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_set_scan_enable(hdev, skb);
 		break;
 
-	case HCI_OP_LE_READ_WHITE_LIST_SIZE:
-		hci_cc_le_read_white_list_size(hdev, skb);
+	case HCI_OP_LE_READ_ACCEPT_LIST_SIZE:
+		hci_cc_le_read_accept_list_size(hdev, skb);
 		break;
 
-	case HCI_OP_LE_CLEAR_WHITE_LIST:
-		hci_cc_le_clear_white_list(hdev, skb);
+	case HCI_OP_LE_CLEAR_ACCEPT_LIST:
+		hci_cc_le_clear_accept_list(hdev, skb);
 		break;
 
-	case HCI_OP_LE_ADD_TO_WHITE_LIST:
-		hci_cc_le_add_to_white_list(hdev, skb);
+	case HCI_OP_LE_ADD_TO_ACCEPT_LIST:
+		hci_cc_le_add_to_accept_list(hdev, skb);
 		break;
 
-	case HCI_OP_LE_DEL_FROM_WHITE_LIST:
-		hci_cc_le_del_from_white_list(hdev, skb);
+	case HCI_OP_LE_DEL_FROM_ACCEPT_LIST:
+		hci_cc_le_del_from_accept_list(hdev, skb);
 		break;
 
 	case HCI_OP_LE_READ_SUPPORTED_STATES:
@@ -5132,7 +5132,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 
 		/* If we didn't have a hci_conn object previously
 		 * but we're in central role this must be something
-		 * initiated using a white list. Since white list based
+		 * initiated using an accept list. Since accept list based
 		 * connections are not "first class citizens" we don't
 		 * have full tracking of them. Therefore, we go ahead
 		 * with a "best effort" approach of determining the
@@ -5224,7 +5224,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 		addr_type = BDADDR_LE_RANDOM;
 
 	/* Drop the connection if the device is blocked */
-	if (hci_bdaddr_list_lookup(&hdev->blacklist, &conn->dst, addr_type)) {
+	if (hci_bdaddr_list_lookup(&hdev->reject_list, &conn->dst, addr_type)) {
 		hci_conn_drop(conn);
 		goto unlock;
 	}
@@ -5380,7 +5380,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 		return NULL;
 
 	/* Ignore if the device is blocked */
-	if (hci_bdaddr_list_lookup(&hdev->blacklist, addr, addr_type))
+	if (hci_bdaddr_list_lookup(&hdev->reject_list, addr, addr_type))
 		return NULL;
 
 	/* Most controller will fail if we try to create new connections
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index a5d55175176e..f7a9d97f3e84 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -745,17 +745,17 @@ void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn)
 	}
 }
 
-static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
-				u8 bdaddr_type)
+static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr,
+				 u8 bdaddr_type)
 {
-	struct hci_cp_le_del_from_white_list cp;
+	struct hci_cp_le_del_from_accept_list cp;
 
 	cp.bdaddr_type = bdaddr_type;
 	bacpy(&cp.bdaddr, bdaddr);
 
-	bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr,
+	bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from accept list", &cp.bdaddr,
 		   cp.bdaddr_type);
-	hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp);
 
 	if (use_ll_privacy(req->hdev) &&
 	    hci_dev_test_flag(req->hdev, HCI_ENABLE_LL_PRIVACY)) {
@@ -774,31 +774,31 @@ static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
 	}
 }
 
-/* Adds connection to white list if needed. On error, returns -1. */
-static int add_to_white_list(struct hci_request *req,
-			     struct hci_conn_params *params, u8 *num_entries,
-			     bool allow_rpa)
+/* Adds connection to accept list if needed. On error, returns -1. */
+static int add_to_accept_list(struct hci_request *req,
+			      struct hci_conn_params *params, u8 *num_entries,
+			      bool allow_rpa)
 {
-	struct hci_cp_le_add_to_white_list cp;
+	struct hci_cp_le_add_to_accept_list cp;
 	struct hci_dev *hdev = req->hdev;
 
-	/* Already in white list */
-	if (hci_bdaddr_list_lookup(&hdev->le_white_list, &params->addr,
+	/* Already in accept list */
+	if (hci_bdaddr_list_lookup(&hdev->le_accept_list, &params->addr,
 				   params->addr_type))
 		return 0;
 
 	/* Select filter policy to accept all advertising */
-	if (*num_entries >= hdev->le_white_list_size)
+	if (*num_entries >= hdev->le_accept_list_size)
 		return -1;
 
-	/* White list can not be used with RPAs */
+	/* Accept list can not be used with RPAs */
 	if (!allow_rpa &&
 	    !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
 	    hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
 		return -1;
 	}
 
-	/* During suspend, only wakeable devices can be in whitelist */
+	/* During suspend, only wakeable devices can be in accept list */
 	if (hdev->suspended && !hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
 						   params->current_flags))
 		return 0;
@@ -807,9 +807,9 @@ static int add_to_white_list(struct hci_request *req,
 	cp.bdaddr_type = params->addr_type;
 	bacpy(&cp.bdaddr, &params->addr);
 
-	bt_dev_dbg(hdev, "Add %pMR (0x%x) to whitelist", &cp.bdaddr,
+	bt_dev_dbg(hdev, "Add %pMR (0x%x) to accept list", &cp.bdaddr,
 		   cp.bdaddr_type);
-	hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
+	hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp);
 
 	if (use_ll_privacy(hdev) &&
 	    hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) {
@@ -837,15 +837,15 @@ static int add_to_white_list(struct hci_request *req,
 	return 0;
 }
 
-static u8 update_white_list(struct hci_request *req)
+static u8 update_accept_list(struct hci_request *req)
 {
 	struct hci_dev *hdev = req->hdev;
 	struct hci_conn_params *params;
 	struct bdaddr_list *b;
 	u8 num_entries = 0;
 	bool pend_conn, pend_report;
-	/* We allow whitelisting even with RPAs in suspend. In the worst case,
-	 * we won't be able to wake from devices that use the privacy1.2
+	/* We allow usage of accept list even with RPAs in suspend. In the worst
+	 * case, we won't be able to wake from devices that use the privacy1.2
 	 * features. Additionally, once we support privacy1.2 and IRK
 	 * offloading, we can update this to also check for those conditions.
 	 */
@@ -855,13 +855,13 @@ static u8 update_white_list(struct hci_request *req)
 	    hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
 		allow_rpa = true;
 
-	/* Go through the current white list programmed into the
+	/* Go through the current accept list programmed into the
 	 * controller one by one and check if that address is still
 	 * in the list of pending connections or list of devices to
 	 * report. If not present in either list, then queue the
 	 * command to remove it from the controller.
 	 */
-	list_for_each_entry(b, &hdev->le_white_list, list) {
+	list_for_each_entry(b, &hdev->le_accept_list, list) {
 		pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
 						      &b->bdaddr,
 						      b->bdaddr_type);
@@ -870,14 +870,14 @@ static u8 update_white_list(struct hci_request *req)
 							b->bdaddr_type);
 
 		/* If the device is not likely to connect or report,
-		 * remove it from the whitelist.
+		 * remove it from the accept list.
 		 */
 		if (!pend_conn && !pend_report) {
-			del_from_white_list(req, &b->bdaddr, b->bdaddr_type);
+			del_from_accept_list(req, &b->bdaddr, b->bdaddr_type);
 			continue;
 		}
 
-		/* White list can not be used with RPAs */
+		/* Accept list can not be used with RPAs */
 		if (!allow_rpa &&
 		    !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
 		    hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
@@ -887,27 +887,27 @@ static u8 update_white_list(struct hci_request *req)
 		num_entries++;
 	}
 
-	/* Since all no longer valid white list entries have been
+	/* Since all no longer valid accept list entries have been
 	 * removed, walk through the list of pending connections
 	 * and ensure that any new device gets programmed into
 	 * the controller.
 	 *
 	 * If the list of the devices is larger than the list of
-	 * available white list entries in the controller, then
+	 * available accept list entries in the controller, then
 	 * just abort and return filer policy value to not use the
-	 * white list.
+	 * accept list.
 	 */
 	list_for_each_entry(params, &hdev->pend_le_conns, action) {
-		if (add_to_white_list(req, params, &num_entries, allow_rpa))
+		if (add_to_accept_list(req, params, &num_entries, allow_rpa))
 			return 0x00;
 	}
 
 	/* After adding all new pending connections, walk through
 	 * the list of pending reports and also add these to the
-	 * white list if there is still space. Abort if space runs out.
+	 * accept list if there is still space. Abort if space runs out.
 	 */
 	list_for_each_entry(params, &hdev->pend_le_reports, action) {
-		if (add_to_white_list(req, params, &num_entries, allow_rpa))
+		if (add_to_accept_list(req, params, &num_entries, allow_rpa))
 			return 0x00;
 	}
 
@@ -921,7 +921,7 @@ static u8 update_white_list(struct hci_request *req)
 	    hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST)
 		return 0x00;
 
-	/* Select filter policy to use white list */
+	/* Select filter policy to use accept list */
 	return 0x01;
 }
 
@@ -1078,20 +1078,20 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 		return;
 
 	bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state);
-	/* Adding or removing entries from the white list must
+	/* Adding or removing entries from the accept list must
 	 * happen before enabling scanning. The controller does
-	 * not allow white list modification while scanning.
+	 * not allow accept list modification while scanning.
 	 */
-	filter_policy = update_white_list(req);
+	filter_policy = update_accept_list(req);
 
 	/* When the controller is using random resolvable addresses and
 	 * with that having LE privacy enabled, then controllers with
 	 * Extended Scanner Filter Policies support can now enable support
 	 * for handling directed advertising.
 	 *
-	 * So instead of using filter polices 0x00 (no whitelist)
-	 * and 0x01 (whitelist enabled) use the new filter policies
-	 * 0x02 (no whitelist) and 0x03 (whitelist enabled).
+	 * So instead of using filter polices 0x00 (no accept list)
+	 * and 0x01 (accept list enabled) use the new filter policies
+	 * 0x02 (no accept list) and 0x03 (accept list enabled).
 	 */
 	if (hci_dev_test_flag(hdev, HCI_PRIVACY) &&
 	    (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
@@ -1127,7 +1127,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 		interval = hdev->le_scan_interval;
 	}
 
-	bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy);
+	bt_dev_dbg(hdev, "LE passive scan with accept list = %d",
+		   filter_policy);
 	hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window,
 			   own_addr_type, filter_policy, filter_dup,
 			   addr_resolv);
@@ -1180,7 +1181,7 @@ static void hci_req_set_event_filter(struct hci_request *req)
 	/* Always clear event filter when starting */
 	hci_req_clear_event_filter(req);
 
-	list_for_each_entry(b, &hdev->whitelist, list) {
+	list_for_each_entry(b, &hdev->accept_list, list) {
 		if (!hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
 					b->current_flags))
 			continue;
@@ -2623,11 +2624,11 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 	return 0;
 }
 
-static bool disconnected_whitelist_entries(struct hci_dev *hdev)
+static bool disconnected_accept_list_entries(struct hci_dev *hdev)
 {
 	struct bdaddr_list *b;
 
-	list_for_each_entry(b, &hdev->whitelist, list) {
+	list_for_each_entry(b, &hdev->accept_list, list) {
 		struct hci_conn *conn;
 
 		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
@@ -2659,7 +2660,7 @@ void __hci_req_update_scan(struct hci_request *req)
 		return;
 
 	if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) ||
-	    disconnected_whitelist_entries(hdev))
+	    disconnected_accept_list_entries(hdev))
 		scan = SCAN_PAGE;
 	else
 		scan = SCAN_DISABLED;
@@ -3151,7 +3152,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
 	uint16_t interval = opt;
 	struct hci_dev *hdev = req->hdev;
 	u8 own_addr_type;
-	/* White list is not used for discovery */
+	/* Accept list is not used for discovery */
 	u8 filter_policy = 0x00;
 	/* Default is to enable duplicates filter */
 	u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index e8d53af7c6a6..b04a5a02ecf3 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -892,7 +892,7 @@ static int hci_sock_release(struct socket *sock)
 	return 0;
 }
 
-static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
+static int hci_sock_reject_list_add(struct hci_dev *hdev, void __user *arg)
 {
 	bdaddr_t bdaddr;
 	int err;
@@ -902,14 +902,14 @@ static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
 
 	hci_dev_lock(hdev);
 
-	err = hci_bdaddr_list_add(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
+	err = hci_bdaddr_list_add(&hdev->reject_list, &bdaddr, BDADDR_BREDR);
 
 	hci_dev_unlock(hdev);
 
 	return err;
 }
 
-static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
+static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg)
 {
 	bdaddr_t bdaddr;
 	int err;
@@ -919,7 +919,7 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
 
 	hci_dev_lock(hdev);
 
-	err = hci_bdaddr_list_del(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
+	err = hci_bdaddr_list_del(&hdev->reject_list, &bdaddr, BDADDR_BREDR);
 
 	hci_dev_unlock(hdev);
 
@@ -959,12 +959,12 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
 	case HCIBLOCKADDR:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		return hci_sock_blacklist_add(hdev, (void __user *)arg);
+		return hci_sock_reject_list_add(hdev, (void __user *)arg);
 
 	case HCIUNBLOCKADDR:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		return hci_sock_blacklist_del(hdev, (void __user *)arg);
+		return hci_sock_reject_list_del(hdev, (void __user *)arg);
 	}
 
 	return -ENOIOCTLCMD;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b76c5d00b082..77ba68209dbd 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7662,7 +7662,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 	 * at least ensure that we ignore incoming data from them.
 	 */
 	if (hcon->type == LE_LINK &&
-	    hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst,
+	    hci_bdaddr_list_lookup(&hcon->hdev->reject_list, &hcon->dst,
 				   bdaddr_dst_type(hcon))) {
 		kfree_skb(skb);
 		return;
@@ -8119,7 +8119,7 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 	dst_type = bdaddr_dst_type(hcon);
 
 	/* If device is blocked, do not create channels for it */
-	if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type))
+	if (hci_bdaddr_list_lookup(&hdev->reject_list, &hcon->dst, dst_type))
 		return;
 
 	/* Find fixed channels and notify them of the new connection. We
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 22f9f52c5ae6..d1bf5a55ff85 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4064,7 +4064,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
 	memset(&rp, 0, sizeof(rp));
 
 	if (cp->addr.type == BDADDR_BREDR) {
-		br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+		br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
 							      &cp->addr.bdaddr,
 							      cp->addr.type);
 		if (!br_params)
@@ -4132,7 +4132,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
 	hci_dev_lock(hdev);
 
 	if (cp->addr.type == BDADDR_BREDR) {
-		br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+		br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
 							      &cp->addr.bdaddr,
 							      cp->addr.type);
 
@@ -5209,7 +5209,7 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	hci_dev_lock(hdev);
 
-	err = hci_bdaddr_list_add(&hdev->blacklist, &cp->addr.bdaddr,
+	err = hci_bdaddr_list_add(&hdev->reject_list, &cp->addr.bdaddr,
 				  cp->addr.type);
 	if (err < 0) {
 		status = MGMT_STATUS_FAILED;
@@ -5245,7 +5245,7 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	hci_dev_lock(hdev);
 
-	err = hci_bdaddr_list_del(&hdev->blacklist, &cp->addr.bdaddr,
+	err = hci_bdaddr_list_del(&hdev->reject_list, &cp->addr.bdaddr,
 				  cp->addr.type);
 	if (err < 0) {
 		status = MGMT_STATUS_INVALID_PARAMS;
@@ -6736,7 +6736,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 			goto unlock;
 		}
 
-		err = hci_bdaddr_list_add_with_flags(&hdev->whitelist,
+		err = hci_bdaddr_list_add_with_flags(&hdev->accept_list,
 						     &cp->addr.bdaddr,
 						     cp->addr.type, 0);
 		if (err)
@@ -6834,7 +6834,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 		}
 
 		if (cp->addr.type == BDADDR_BREDR) {
-			err = hci_bdaddr_list_del(&hdev->whitelist,
+			err = hci_bdaddr_list_del(&hdev->accept_list,
 						  &cp->addr.bdaddr,
 						  cp->addr.type);
 			if (err) {
@@ -6905,7 +6905,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 			goto unlock;
 		}
 
-		list_for_each_entry_safe(b, btmp, &hdev->whitelist, list) {
+		list_for_each_entry_safe(b, btmp, &hdev->accept_list, list) {
 			device_removed(sk, hdev, &b->bdaddr, b->bdaddr_type);
 			list_del(&b->list);
 			kfree(b);

From c9ed0a7077306f9d41d74fb006ab5dbada8349c5 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 9 Jun 2021 11:09:27 -0700
Subject: [PATCH 2952/3292] Bluetooth: Fix Set Extended (Scan Response) Data

These command do have variable length and the length can go up to 251,
so this changes the struct to not use a fixed size and then when
creating the PDU only the actual length of the data send to the
controller.

Fixes: a0fb3726ba551 ("Bluetooth: Use Set ext adv/scan rsp data if controller supports")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  6 ++--
 include/net/bluetooth/hci_core.h |  8 ++---
 net/bluetooth/hci_request.c      | 51 ++++++++++++++++++--------------
 3 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 2dc947341502..b80415011dcd 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1775,13 +1775,15 @@ struct hci_cp_ext_adv_set {
 	__u8  max_events;
 } __packed;
 
+#define HCI_MAX_EXT_AD_LENGTH	251
+
 #define HCI_OP_LE_SET_EXT_ADV_DATA		0x2037
 struct hci_cp_le_set_ext_adv_data {
 	__u8  handle;
 	__u8  operation;
 	__u8  frag_pref;
 	__u8  length;
-	__u8  data[HCI_MAX_AD_LENGTH];
+	__u8  data[];
 } __packed;
 
 #define HCI_OP_LE_SET_EXT_SCAN_RSP_DATA		0x2038
@@ -1790,7 +1792,7 @@ struct hci_cp_le_set_ext_scan_rsp_data {
 	__u8  operation;
 	__u8  frag_pref;
 	__u8  length;
-	__u8  data[HCI_MAX_AD_LENGTH];
+	__u8  data[];
 } __packed;
 
 #define LE_SET_ADV_DATA_OP_COMPLETE	0x03
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 212f46806ce7..a53e94459ecd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -228,9 +228,9 @@ struct adv_info {
 	__u16	remaining_time;
 	__u16	duration;
 	__u16	adv_data_len;
-	__u8	adv_data[HCI_MAX_AD_LENGTH];
+	__u8	adv_data[HCI_MAX_EXT_AD_LENGTH];
 	__u16	scan_rsp_len;
-	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
+	__u8	scan_rsp_data[HCI_MAX_EXT_AD_LENGTH];
 	__s8	tx_power;
 	__u32   min_interval;
 	__u32   max_interval;
@@ -551,9 +551,9 @@ struct hci_dev {
 	DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS);
 
 	__s8			adv_tx_power;
-	__u8			adv_data[HCI_MAX_AD_LENGTH];
+	__u8			adv_data[HCI_MAX_EXT_AD_LENGTH];
 	__u8			adv_data_len;
-	__u8			scan_rsp_data[HCI_MAX_AD_LENGTH];
+	__u8			scan_rsp_data[HCI_MAX_EXT_AD_LENGTH];
 	__u8			scan_rsp_data_len;
 
 	struct list_head	adv_instances;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index f7a9d97f3e84..1d14adc023e9 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1716,30 +1716,33 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
 		return;
 
 	if (ext_adv_capable(hdev)) {
-		struct hci_cp_le_set_ext_scan_rsp_data cp;
+		struct {
+			struct hci_cp_le_set_ext_scan_rsp_data cp;
+			u8 data[HCI_MAX_EXT_AD_LENGTH];
+		} pdu;
 
-		memset(&cp, 0, sizeof(cp));
+		memset(&pdu, 0, sizeof(pdu));
 
 		if (instance)
 			len = create_instance_scan_rsp_data(hdev, instance,
-							    cp.data);
+							    pdu.data);
 		else
-			len = create_default_scan_rsp_data(hdev, cp.data);
+			len = create_default_scan_rsp_data(hdev, pdu.data);
 
 		if (hdev->scan_rsp_data_len == len &&
-		    !memcmp(cp.data, hdev->scan_rsp_data, len))
+		    !memcmp(pdu.data, hdev->scan_rsp_data, len))
 			return;
 
-		memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
+		memcpy(hdev->scan_rsp_data, pdu.data, len);
 		hdev->scan_rsp_data_len = len;
 
-		cp.handle = instance;
-		cp.length = len;
-		cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
-		cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+		pdu.cp.handle = instance;
+		pdu.cp.length = len;
+		pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+		pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
 
-		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, sizeof(cp),
-			    &cp);
+		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
+			    sizeof(pdu.cp) + len, &pdu.cp);
 	} else {
 		struct hci_cp_le_set_scan_rsp_data cp;
 
@@ -1862,26 +1865,30 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
 		return;
 
 	if (ext_adv_capable(hdev)) {
-		struct hci_cp_le_set_ext_adv_data cp;
+		struct {
+			struct hci_cp_le_set_ext_adv_data cp;
+			u8 data[HCI_MAX_EXT_AD_LENGTH];
+		} pdu;
 
-		memset(&cp, 0, sizeof(cp));
+		memset(&pdu, 0, sizeof(pdu));
 
-		len = create_instance_adv_data(hdev, instance, cp.data);
+		len = create_instance_adv_data(hdev, instance, pdu.data);
 
 		/* There's nothing to do if the data hasn't changed */
 		if (hdev->adv_data_len == len &&
-		    memcmp(cp.data, hdev->adv_data, len) == 0)
+		    memcmp(pdu.data, hdev->adv_data, len) == 0)
 			return;
 
-		memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+		memcpy(hdev->adv_data, pdu.data, len);
 		hdev->adv_data_len = len;
 
-		cp.length = len;
-		cp.handle = instance;
-		cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
-		cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+		pdu.cp.length = len;
+		pdu.cp.handle = instance;
+		pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+		pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
 
-		hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA, sizeof(cp), &cp);
+		hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA,
+			    sizeof(pdu.cp) + len, &pdu.cp);
 	} else {
 		struct hci_cp_le_set_adv_data cp;
 

From dd912f43bbda87ed37099a9287e4fbb7c85af706 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 16 Jun 2021 14:07:57 +0100
Subject: [PATCH 2953/3292] Bluetooth: btmrvl: remove redundant continue
 statement

The continue statement in the for-loop has no effect,
remove it.

Addresses-Coverity: ("Continue has no effect")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmrvl_sdio.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 33d58b30c5ac..cddd350beba3 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -1461,9 +1461,7 @@ static void btmrvl_sdio_coredump(struct device *dev)
 					BT_ERR("Allocated buffer not enough");
 			}
 
-			if (stat != RDWR_STATUS_DONE) {
-				continue;
-			} else {
+			if (stat == RDWR_STATUS_DONE) {
 				BT_INFO("%s done: size=0x%tx",
 					entry->mem_name,
 					dbg_ptr - entry->mem_ptr);

From 43e59cb7e6077110c4622e61a188e7703e8c7e36 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 15 Jun 2021 21:23:35 +0200
Subject: [PATCH 2954/3292] Bluetooth: Increment management interface revision

Increment the mgmt revision due to recent changes.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 net/bluetooth/mgmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d1bf5a55ff85..3663f880df11 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -40,7 +40,7 @@
 #include "msft.h"
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	20
+#define MGMT_REVISION	21
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,

From 23837a6d7a1a61818ed94a6b8af552d6cf7d32d5 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 22 Jun 2021 20:59:02 -0700
Subject: [PATCH 2955/3292] Bluetooth: Fix handling of
 HCI_LE_Advertising_Set_Terminated event

Error status of this event means that it has ended due reasons other
than a connection:

 'If advertising has terminated as a result of the advertising duration
 elapsing, the Status parameter shall be set to the error code
 Advertising Timeout (0x3C).'

 'If advertising has terminated because the
 Max_Extended_Advertising_Events was reached, the Status parameter
 shall be set to the error code Limit Reached (0x43).'

Fixes: acf0aeae431a0 ("Bluetooth: Handle ADv set terminated event")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 98ec486743ba..1c3018202564 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5322,8 +5322,19 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
 
-	if (ev->status)
+	if (ev->status) {
+		struct adv_info *adv;
+
+		adv = hci_find_adv_instance(hdev, ev->handle);
+		if (!adv)
+			return;
+
+		/* Remove advertising as it has been terminated */
+		hci_remove_adv_instance(hdev, ev->handle);
+		mgmt_advertising_removed(NULL, hdev, ev->handle);
+
 		return;
+	}
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle));
 	if (conn) {

From 1f0536139cb8e8175ca034e12706b86f77f9061e Mon Sep 17 00:00:00 2001
From: Nigel Christian <nigel.l.christian@gmail.com>
Date: Wed, 16 Jun 2021 15:45:02 -0400
Subject: [PATCH 2956/3292] Bluetooth: hci_uart: Remove redundant assignment to
 fw_ptr

The variable fw_ptr is assigned a value that is not read and the same value
is assigned in the patch goto. The assignment is redundant and can be
removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Nigel Christian <nigel.l.christian@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_ag6xx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/bluetooth/hci_ag6xx.c b/drivers/bluetooth/hci_ag6xx.c
index 1f55df93e4ce..2d40302409ff 100644
--- a/drivers/bluetooth/hci_ag6xx.c
+++ b/drivers/bluetooth/hci_ag6xx.c
@@ -199,7 +199,6 @@ static int ag6xx_setup(struct hci_uart *hu)
 			   fwname, err);
 		goto patch;
 	}
-	fw_ptr = fw->data;
 
 	bt_dev_info(hdev, "Applying bddata (%s)", fwname);
 

From 6f8515568e69d97ddcbd9bfca10f57e9f4591d59 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Mon, 19 Apr 2021 16:32:16 +0300
Subject: [PATCH 2957/3292] net/mlx5: Compare sampler flow destination ID in
 fs_core

When comparing sampler flow destinations,
in fs_core, consider sampler ID as well.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b8617458a3fd..d7bf0a3e4a52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1504,7 +1504,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
 		     d1->tir_num == d2->tir_num) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
-		     d1->ft_num == d2->ft_num))
+		     d1->ft_num == d2->ft_num) ||
+		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER &&
+		     d1->sampler_id == d2->sampler_id))
 			return true;
 	}
 

From 1ab6dc35e9148e3cb4a837fdd08f1ca56b55eda0 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Mon, 19 Apr 2021 16:23:49 +0300
Subject: [PATCH 2958/3292] net/mlx5: DR, Add support for flow sampler offload

Add SW steering support for sFlow / flow sampler action.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_action.c   | 55 +++++++++++++++++++
 .../mellanox/mlx5/core/steering/dr_cmd.c      | 33 +++++++++++
 .../mellanox/mlx5/core/steering/dr_types.h    | 14 +++++
 .../mellanox/mlx5/core/steering/fs_dr.c       | 17 +++++-
 .../mellanox/mlx5/core/steering/mlx5dr.h      |  3 +
 include/linux/mlx5/mlx5_ifc.h                 |  5 ++
 6 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index de68c0ec2143..6475ba35cf6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -31,6 +31,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
@@ -45,6 +46,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
@@ -57,6 +59,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
 		},
@@ -64,6 +67,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
@@ -74,6 +78,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
@@ -86,6 +91,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_TAG]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
@@ -104,6 +110,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_NO_ACTION] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
@@ -114,11 +121,13 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_ENCAP] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_HDR] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
@@ -128,6 +137,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
@@ -137,6 +147,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
@@ -152,6 +163,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_NO_ACTION] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
@@ -166,6 +178,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_DECAP,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -178,11 +191,13 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_QP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
 		},
 		[DR_ACTION_STATE_MODIFY_HDR] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
@@ -192,6 +207,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_POP_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
@@ -203,6 +219,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_TNL_L2_TO_L2]	= DR_ACTION_STATE_DECAP,
 			[DR_ACTION_TYP_TNL_L3_TO_L2]	= DR_ACTION_STATE_DECAP,
@@ -221,6 +238,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_NO_ACTION] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
@@ -233,11 +251,13 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_ENCAP,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_VPORT]		= DR_ACTION_STATE_TERM,
 		},
 		[DR_ACTION_STATE_MODIFY_HDR] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
 			[DR_ACTION_TYP_L2_TO_TNL_L3]	= DR_ACTION_STATE_ENCAP,
@@ -248,6 +268,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_MODIFY_VLAN] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_PUSH_VLAN]	= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_MODIFY_VLAN,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
@@ -258,6 +279,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
 		[DR_ACTION_STATE_NON_TERM] = {
 			[DR_ACTION_TYP_DROP]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_FT]		= DR_ACTION_STATE_TERM,
+			[DR_ACTION_TYP_SAMPLER]		= DR_ACTION_STATE_TERM,
 			[DR_ACTION_TYP_CTR]		= DR_ACTION_STATE_NON_TERM,
 			[DR_ACTION_TYP_MODIFY_HDR]	= DR_ACTION_STATE_MODIFY_HDR,
 			[DR_ACTION_TYP_L2_TO_TNL_L2]	= DR_ACTION_STATE_ENCAP,
@@ -519,6 +541,10 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 			attr.reformat.size = action->reformat->size;
 			attr.reformat.id = action->reformat->id;
 			break;
+		case DR_ACTION_TYP_SAMPLER:
+			attr.final_icm_addr = rx_rule ? action->sampler->rx_icm_addr :
+							action->sampler->tx_icm_addr;
+			break;
 		case DR_ACTION_TYP_VPORT:
 			attr.hit_gvmi = action->vport->caps->vhca_gvmi;
 			dest_action = action;
@@ -612,6 +638,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = {
 	[DR_ACTION_TYP_VPORT]        = sizeof(struct mlx5dr_action_vport),
 	[DR_ACTION_TYP_PUSH_VLAN]    = sizeof(struct mlx5dr_action_push_vlan),
 	[DR_ACTION_TYP_INSERT_HDR]   = sizeof(struct mlx5dr_action_reformat),
+	[DR_ACTION_TYP_SAMPLER]      = sizeof(struct mlx5dr_action_sampler),
 };
 
 static struct mlx5dr_action *
@@ -824,6 +851,31 @@ struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value)
 	return action;
 }
 
+struct mlx5dr_action *
+mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id)
+{
+	struct mlx5dr_action *action;
+	u64 icm_rx, icm_tx;
+	int ret;
+
+	ret = mlx5dr_cmd_query_flow_sampler(dmn->mdev, sampler_id,
+					    &icm_rx, &icm_tx);
+	if (ret)
+		return NULL;
+
+	action = dr_action_create_generic(DR_ACTION_TYP_SAMPLER);
+	if (!action)
+		return NULL;
+
+	action->sampler->dmn = dmn;
+	action->sampler->sampler_id = sampler_id;
+	action->sampler->rx_icm_addr = icm_rx;
+	action->sampler->tx_icm_addr = icm_tx;
+
+	refcount_inc(&dmn->refcount);
+	return action;
+}
+
 static int
 dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
 				 struct mlx5dr_domain *dmn,
@@ -1624,6 +1676,9 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
 		kfree(action->rewrite->data);
 		refcount_dec(&action->rewrite->dmn->refcount);
 		break;
+	case DR_ACTION_TYP_SAMPLER:
+		refcount_dec(&action->sampler->dmn->refcount);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 6314f50efbd4..54e1f5438bbe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -228,6 +228,36 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
 	return 0;
 }
 
+int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev,
+				  u32 sampler_id,
+				  u64 *rx_icm_addr,
+				  u64 *tx_icm_addr)
+{
+	u32 out[MLX5_ST_SZ_DW(query_sampler_obj_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	void *attr;
+	int ret;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	attr = MLX5_ADDR_OF(query_sampler_obj_out, out, sampler_object);
+
+	*rx_icm_addr = MLX5_GET64(sampler_obj, attr,
+				  sw_steering_icm_address_rx);
+	*tx_icm_addr = MLX5_GET64(sampler_obj, attr,
+				  sw_steering_icm_address_tx);
+
+	return 0;
+}
+
 int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev)
 {
 	u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {};
@@ -711,6 +741,9 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
 						 fte->dest_arr[i].vport.reformat_id);
 				}
 				break;
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+				id = fte->dest_arr[i].sampler_id;
+				break;
 			default:
 				id = fte->dest_arr[i].tir_num;
 			}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 60b8c04e165e..f5e93fa87aff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -124,6 +124,7 @@ enum mlx5dr_action_type {
 	DR_ACTION_TYP_POP_VLAN,
 	DR_ACTION_TYP_PUSH_VLAN,
 	DR_ACTION_TYP_INSERT_HDR,
+	DR_ACTION_TYP_SAMPLER,
 	DR_ACTION_TYP_MAX,
 };
 
@@ -919,6 +920,13 @@ struct mlx5dr_action_reformat {
 	u8 param_1;
 };
 
+struct mlx5dr_action_sampler {
+	struct mlx5dr_domain *dmn;
+	u64 rx_icm_addr;
+	u64 tx_icm_addr;
+	u32 sampler_id;
+};
+
 struct mlx5dr_action_dest_tbl {
 	u8 is_fw_tbl:1;
 	union {
@@ -962,6 +970,7 @@ struct mlx5dr_action {
 		void *data;
 		struct mlx5dr_action_rewrite *rewrite;
 		struct mlx5dr_action_reformat *reformat;
+		struct mlx5dr_action_sampler *sampler;
 		struct mlx5dr_action_dest_tbl *dest_tbl;
 		struct mlx5dr_action_ctr *ctr;
 		struct mlx5dr_action_vport *vport;
@@ -1116,6 +1125,10 @@ int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev,
 			  bool other_vport, u16 vport_number, u16 *gvmi);
 int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
 			      struct mlx5dr_esw_caps *caps);
+int mlx5dr_cmd_query_flow_sampler(struct mlx5_core_dev *dev,
+				  u32 sampler_id,
+				  u64 *rx_icm_addr,
+				  u64 *tx_icm_addr);
 int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev);
 int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
 					u32 table_type,
@@ -1303,6 +1316,7 @@ struct mlx5dr_cmd_flow_destination_hw_info {
 		u32 ft_num;
 		u32 ft_id;
 		u32 counter_id;
+		u32 sampler_id;
 		struct {
 			u16 num;
 			u16 vhca_id;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 00b4c753cae2..d5926dd7e972 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -387,7 +387,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		list_for_each_entry(dst, &fte->node.children, node.list) {
 			enum mlx5_flow_destination_type type = dst->dest_attr.type;
-			u32 ft_id;
+			u32 id;
 
 			if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
 			    num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) {
@@ -425,9 +425,20 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 				num_term_actions++;
 				break;
 			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
-				ft_id = dst->dest_attr.ft_num;
+				id = dst->dest_attr.ft_num;
 				tmp_action = mlx5dr_action_create_dest_table_num(domain,
-										 ft_id);
+										 id);
+				if (!tmp_action) {
+					err = -ENOMEM;
+					goto free_actions;
+				}
+				fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+				term_actions[num_term_actions++].dest = tmp_action;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+				id = dst->dest_attr.sampler_id;
+				tmp_action = mlx5dr_action_create_flow_sampler(domain,
+									       id);
 				if (!tmp_action) {
 					err = -ENOMEM;
 					goto free_actions;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index b2aa6c93c3a1..bbfe101d4e57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -100,6 +100,9 @@ struct mlx5dr_action *mlx5dr_action_create_drop(void);
 
 struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value);
 
+struct mlx5dr_action *
+mlx5dr_action_create_flow_sampler(struct mlx5dr_domain *dmn, u32 sampler_id);
+
 struct mlx5dr_action *
 mlx5dr_action_create_flow_counter(u32 counter_id);
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2d1ed78289ff..e32a0d61929b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -11083,6 +11083,11 @@ struct mlx5_ifc_create_sampler_obj_in_bits {
 	struct mlx5_ifc_sampler_obj_bits sampler_object;
 };
 
+struct mlx5_ifc_query_sampler_obj_out_bits {
+	struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
+	struct mlx5_ifc_sampler_obj_bits sampler_object;
+};
+
 enum {
 	MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128 = 0x0,
 	MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256 = 0x1,

From 6cdc686aa3163192ebce8ea72efee806729172c2 Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@nvidia.com>
Date: Wed, 2 Dec 2020 21:46:04 +0200
Subject: [PATCH 2959/3292] net/mlx5: Increase hairpin buffer size

The max packet size a hairpin queue is able to handle
is determined by the total hairpin buffer size divided
by 4.

Currently the buffer size is set to 32KB which makes
the max packet size to be 8KB and doesn't support
jumbo frames of size 9KB.

This change increases the buffer size to 64KB to increase
the max frame size and support 9KB frames.

Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 8d84d0712c20..629a61e8022f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -846,7 +846,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 		 hash_hairpin_info(peer_id, match_prio));
 	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 
-	params.log_data_size = 15;
+	params.log_data_size = 16;
 	params.log_data_size = min_t(u8, params.log_data_size,
 				     MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
 	params.log_data_size = max_t(u8, params.log_data_size,

From 5bd8cee2b9c5aa31d58ed97caca433f0bf74c574 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Mon, 26 Apr 2021 09:58:55 +0300
Subject: [PATCH 2960/3292] net/mlx5: SF, Improve performance in SF allocation

Avoid second traversal on the SF table by recording the first free entry
and using it in case the looked up entry was not found in the table.

Signed-off-by: Eli Cohen <elic@nvidia.com>
Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/sf/hw_table.c | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index 500c71fb6f6d..d9c69123c1ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -73,26 +73,29 @@ static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 control
 				     u32 usr_sfnum)
 {
 	struct mlx5_sf_hwc_table *hwc;
+	int free_idx = -1;
 	int i;
 
 	hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
 	if (!hwc->sfs)
 		return -ENOSPC;
 
-	/* Check if sf with same sfnum already exists or not. */
 	for (i = 0; i < hwc->max_fn; i++) {
+		if (!hwc->sfs[i].allocated && free_idx == -1) {
+			free_idx = i;
+			continue;
+		}
+
 		if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum)
 			return -EEXIST;
 	}
-	/* Find the free entry and allocate the entry from the array */
-	for (i = 0; i < hwc->max_fn; i++) {
-		if (!hwc->sfs[i].allocated) {
-			hwc->sfs[i].usr_sfnum = usr_sfnum;
-			hwc->sfs[i].allocated = true;
-			return i;
-		}
-	}
-	return -ENOSPC;
+
+	if (free_idx == -1)
+		return -ENOSPC;
+
+	hwc->sfs[free_idx].usr_sfnum = usr_sfnum;
+	hwc->sfs[free_idx].allocated = true;
+	return free_idx;
 }
 
 static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id)

From e8c8276145309890e9e6b7ccf37ed731375a6a96 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Mon, 7 Sep 2020 15:11:06 +0300
Subject: [PATCH 2961/3292] net/mlx5e: kTLS, Add stats for number of deleted
 kTLS TX offloaded connections

Expose ethtool SW counter for the number of kTLS device-offloaded
TX connections that are finished and deleted.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c   | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h       | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 2c0a9344338a..9ad3459fb63a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -138,6 +138,7 @@ void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx)
 	priv = netdev_priv(netdev);
 	mdev = priv->mdev;
 
+	atomic64_inc(&priv_tx->sw_stats->tx_tls_del);
 	mlx5e_destroy_tis(mdev, priv_tx->tisn);
 	mlx5_ktls_destroy_key(mdev, priv_tx->key_id);
 	kfree(priv_tx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index 3fd6fd69bbd0..62ecf14bf86a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -42,6 +42,7 @@
 
 struct mlx5e_tls_sw_stats {
 	atomic64_t tx_tls_ctx;
+	atomic64_t tx_tls_del;
 	atomic64_t tx_tls_drop_metadata;
 	atomic64_t tx_tls_drop_resync_alloc;
 	atomic64_t tx_tls_drop_no_sync_data;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
index ffc84f9b41b0..56e7b2aee85f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
@@ -47,6 +47,7 @@ static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
 
 static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) },
 };

From 5589b8f1a2c74670cbca9ea98756dbb8f92569b8 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@nvidia.com>
Date: Fri, 19 Jun 2020 09:42:07 -0500
Subject: [PATCH 2962/3292] net/mlx5e: Add IPsec support to uplink representor

Add the xfrm xdo and ipsec_init/cleanup to uplink representor to
support IPsec in SRIOV switchdev mode.

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c         | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 26f7fab109d9..7cab08a2f715 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -428,7 +428,6 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	spin_lock_init(&ipsec->sadb_rx_lock);
 	ida_init(&ipsec->halloc);
 	ipsec->en_priv = priv;
-	ipsec->en_priv->ipsec = ipsec;
 	ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
 			       MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
 	ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
@@ -438,6 +437,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 		return -ENOMEM;
 	}
 
+	priv->ipsec = ipsec;
 	mlx5e_accel_ipsec_fs_init(priv);
 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 2d2cc5f3b03f..bf94bcb6fa5d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -51,6 +51,7 @@
 #include "lib/mlx5.h"
 #define CREATE_TRACE_POINTS
 #include "diag/en_rep_tracepoint.h"
+#include "en_accel/ipsec.h"
 
 #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
 	max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
@@ -630,6 +631,11 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
 			     struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	int err;
+
+	err = mlx5e_ipsec_init(priv);
+	if (err)
+		mlx5_core_err(mdev, "Uplink rep IPsec initialization failed, %d\n", err);
 
 	mlx5e_vxlan_set_netdev_info(priv);
 	return mlx5e_init_rep(mdev, netdev);
@@ -637,6 +643,7 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
 
 static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
 {
+	mlx5e_ipsec_cleanup(priv);
 }
 
 static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)

From 9f0752355b211c98ccf2c678e8fddda8d325501a Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 17:45:21 +0800
Subject: [PATCH 2963/3292] mailbox: arm_mhu: Remove redundant error printing
 in mhu_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/arm_mhu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/mailbox/arm_mhu.c b/drivers/mailbox/arm_mhu.c
index b7fbf276eb62..22243cabe056 100644
--- a/drivers/mailbox/arm_mhu.c
+++ b/drivers/mailbox/arm_mhu.c
@@ -122,10 +122,8 @@ static int mhu_probe(struct amba_device *adev, const struct amba_id *id)
 		return -ENOMEM;
 
 	mhu->base = devm_ioremap_resource(dev, &adev->res);
-	if (IS_ERR(mhu->base)) {
-		dev_err(dev, "ioremap failed\n");
+	if (IS_ERR(mhu->base))
 		return PTR_ERR(mhu->base);
-	}
 
 	for (i = 0; i < MHU_CHANS; i++) {
 		mhu->chan[i].con_priv = &mhu->mlink[i];

From 3edf083f901fb0ff26b2eb107eb15c93d3e62545 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 19:56:54 +0800
Subject: [PATCH 2964/3292] mailbox: bcm-pdc: Remove redundant error printing
 in pdc_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/bcm-pdc-mailbox.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c
index 5b375985f7b8..8d3a4c1fe761 100644
--- a/drivers/mailbox/bcm-pdc-mailbox.c
+++ b/drivers/mailbox/bcm-pdc-mailbox.c
@@ -1577,7 +1577,6 @@ static int pdc_probe(struct platform_device *pdev)
 	pdcs->pdc_reg_vbase = devm_ioremap_resource(&pdev->dev, pdc_regs);
 	if (IS_ERR(pdcs->pdc_reg_vbase)) {
 		err = PTR_ERR(pdcs->pdc_reg_vbase);
-		dev_err(&pdev->dev, "Failed to map registers: %d\n", err);
 		goto cleanup_ring_pool;
 	}
 

From ff16cae3a31a2ff9dbfd3c053f252128e66c8aba Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 19:35:58 +0800
Subject: [PATCH 2965/3292] mailbox: mediatek: Remove redundant error printing
 in cmdq_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/mtk-cmdq-mailbox.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 5665b6ea8119..3900ea63b28d 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -519,10 +519,8 @@ static int cmdq_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	cmdq->base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(cmdq->base)) {
-		dev_err(dev, "failed to ioremap gce\n");
+	if (IS_ERR(cmdq->base))
 		return PTR_ERR(cmdq->base);
-	}
 
 	cmdq->irq = platform_get_irq(pdev, 0);
 	if (cmdq->irq < 0)

From d9605fefe37811f8eaf03689bc41fdb43ee6d589 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 31 May 2021 15:17:30 -0700
Subject: [PATCH 2966/3292] MAINTAINERS: Add dt-bindings to mailbox entry

The MAINTAINER entry for the MAILBOX framework does not cover the
dt-bindings and as such Jassi is not among the recipients for such
patches.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8c5ee008301a..2a2285bbaf13 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10860,6 +10860,7 @@ S:	Maintained
 F:	drivers/mailbox/
 F:	include/linux/mailbox_client.h
 F:	include/linux/mailbox_controller.h
+F:	include/dt-bindings/mailbox/
 F:	Documentation/devicetree/bindings/mailbox/
 
 MAILBOX ARM MHUv2

From c7701684eef8aab8b612812d179dfb2467176a6f Mon Sep 17 00:00:00 2001
From: Hao Fang <fanghao11@huawei.com>
Date: Sat, 22 May 2021 18:12:54 +0800
Subject: [PATCH 2967/3292] mailbox: hisilicon: Use the correct HiSilicon
 copyright

s/Hisilicon/HiSilicon/.
It should use capital S, according to
https://www.hisilicon.com/en.

Signed-off-by: Hao Fang <fanghao11@huawei.com>
Reviewed-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/hi3660-mailbox.c | 2 +-
 drivers/mailbox/hi6220-mailbox.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mailbox/hi3660-mailbox.c b/drivers/mailbox/hi3660-mailbox.c
index 53f4bc2488c5..395ddc250828 100644
--- a/drivers/mailbox/hi3660-mailbox.c
+++ b/drivers/mailbox/hi3660-mailbox.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2017-2018 Hisilicon Limited.
+// Copyright (c) 2017-2018 HiSilicon Limited.
 // Copyright (c) 2017-2018 Linaro Limited.
 
 #include <linux/bitops.h>
diff --git a/drivers/mailbox/hi6220-mailbox.c b/drivers/mailbox/hi6220-mailbox.c
index cc236ac7a0b5..560cd09538b1 100644
--- a/drivers/mailbox/hi6220-mailbox.c
+++ b/drivers/mailbox/hi6220-mailbox.c
@@ -2,7 +2,7 @@
 /*
  * Hisilicon's Hi6220 mailbox driver
  *
- * Copyright (c) 2015 Hisilicon Limited.
+ * Copyright (c) 2015 HiSilicon Limited.
  * Copyright (c) 2015 Linaro Limited.
  *
  * Author: Leo Yan <leo.yan@linaro.org>

From 8339642c930500140fe27621d783630b002a6342 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 28 May 2021 18:06:00 +0800
Subject: [PATCH 2968/3292] dt-bindings: mailbox: imx-mu: add i.MX8ULP MU
 support

The register layout and bits definition of i.MX8ULP MU is different
compared with others, let's add the compatible for the new MU.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 Documentation/devicetree/bindings/mailbox/fsl,mu.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml b/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml
index 1a3dff277e2b..675ad9de15bb 100644
--- a/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml
+++ b/Documentation/devicetree/bindings/mailbox/fsl,mu.yaml
@@ -26,6 +26,7 @@ properties:
     oneOf:
       - const: fsl,imx6sx-mu
       - const: fsl,imx7ulp-mu
+      - const: fsl,imx8ulp-mu
       - const: fsl,imx8-mu-scu
       - items:
           - enum:

From 32f7443d4139208927bc9c3fda8e2a77ec24fe14 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 28 May 2021 18:06:01 +0800
Subject: [PATCH 2969/3292] mailbox: imx: replace the xTR/xRR array with single
 register

The xTR/xRR registers are using 4 bytes stride and continuous.
Considering we will support more TR and RR registers, use base + idx * 4
method to calculate register address, not hardcoding in driver.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/imx-mailbox.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c
index 2543c7b6948b..bd7758c32a80 100644
--- a/drivers/mailbox/imx-mailbox.c
+++ b/drivers/mailbox/imx-mailbox.c
@@ -76,8 +76,8 @@ struct imx_mu_dcfg {
 	int (*tx)(struct imx_mu_priv *priv, struct imx_mu_con_priv *cp, void *data);
 	int (*rx)(struct imx_mu_priv *priv, struct imx_mu_con_priv *cp);
 	void (*init)(struct imx_mu_priv *priv);
-	u32	xTR[4];		/* Transmit Registers */
-	u32	xRR[4];		/* Receive Registers */
+	u32	xTR;		/* Transmit Register0 */
+	u32	xRR;		/* Receive Register0 */
 	u32	xSR;		/* Status Register */
 	u32	xCR;		/* Control Register */
 };
@@ -120,7 +120,7 @@ static int imx_mu_generic_tx(struct imx_mu_priv *priv,
 
 	switch (cp->type) {
 	case IMX_MU_TYPE_TX:
-		imx_mu_write(priv, *arg, priv->dcfg->xTR[cp->idx]);
+		imx_mu_write(priv, *arg, priv->dcfg->xTR + cp->idx * 4);
 		imx_mu_xcr_rmw(priv, IMX_MU_xCR_TIEn(cp->idx), 0);
 		break;
 	case IMX_MU_TYPE_TXDB:
@@ -140,7 +140,7 @@ static int imx_mu_generic_rx(struct imx_mu_priv *priv,
 {
 	u32 dat;
 
-	dat = imx_mu_read(priv, priv->dcfg->xRR[cp->idx]);
+	dat = imx_mu_read(priv, priv->dcfg->xRR + (cp->idx) * 4);
 	mbox_chan_received_data(cp->chan, (void *)&dat);
 
 	return 0;
@@ -172,7 +172,7 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 		}
 
 		for (i = 0; i < 4 && i < msg->hdr.size; i++)
-			imx_mu_write(priv, *arg++, priv->dcfg->xTR[i % 4]);
+			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % 4) * 4);
 		for (; i < msg->hdr.size; i++) {
 			ret = readl_poll_timeout(priv->base + priv->dcfg->xSR,
 						 xsr,
@@ -182,7 +182,7 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 				dev_err(priv->dev, "Send data index: %d timeout\n", i);
 				return ret;
 			}
-			imx_mu_write(priv, *arg++, priv->dcfg->xTR[i % 4]);
+			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % 4) * 4);
 		}
 
 		imx_mu_xcr_rmw(priv, IMX_MU_xCR_TIEn(cp->idx), 0);
@@ -204,7 +204,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 	u32 xsr;
 
 	imx_mu_xcr_rmw(priv, 0, IMX_MU_xCR_RIEn(0));
-	*data++ = imx_mu_read(priv, priv->dcfg->xRR[0]);
+	*data++ = imx_mu_read(priv, priv->dcfg->xRR);
 
 	if (msg.hdr.size > sizeof(msg) / 4) {
 		dev_err(priv->dev, "Maximal message size (%zu bytes) exceeded on RX; got: %i bytes\n", sizeof(msg), msg.hdr.size << 2);
@@ -218,7 +218,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 			dev_err(priv->dev, "timeout read idx %d\n", i);
 			return ret;
 		}
-		*data++ = imx_mu_read(priv, priv->dcfg->xRR[i % 4]);
+		*data++ = imx_mu_read(priv, priv->dcfg->xRR + (i % 4) * 4);
 	}
 
 	imx_mu_xcr_rmw(priv, IMX_MU_xCR_RIEn(0), 0);
@@ -564,8 +564,8 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx6sx = {
 	.tx	= imx_mu_generic_tx,
 	.rx	= imx_mu_generic_rx,
 	.init	= imx_mu_init_generic,
-	.xTR	= {0x0, 0x4, 0x8, 0xc},
-	.xRR	= {0x10, 0x14, 0x18, 0x1c},
+	.xTR	= 0x0,
+	.xRR	= 0x10,
 	.xSR	= 0x20,
 	.xCR	= 0x24,
 };
@@ -574,8 +574,8 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx7ulp = {
 	.tx	= imx_mu_generic_tx,
 	.rx	= imx_mu_generic_rx,
 	.init	= imx_mu_init_generic,
-	.xTR	= {0x20, 0x24, 0x28, 0x2c},
-	.xRR	= {0x40, 0x44, 0x48, 0x4c},
+	.xTR	= 0x20,
+	.xRR	= 0x40,
 	.xSR	= 0x60,
 	.xCR	= 0x64,
 };
@@ -584,8 +584,8 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx8_scu = {
 	.tx	= imx_mu_scu_tx,
 	.rx	= imx_mu_scu_rx,
 	.init	= imx_mu_init_scu,
-	.xTR	= {0x0, 0x4, 0x8, 0xc},
-	.xRR	= {0x10, 0x14, 0x18, 0x1c},
+	.xTR	= 0x0
+	.xRR	= 0x10
 	.xSR	= 0x20,
 	.xCR	= 0x24,
 };

From f689a7cf75975680eb2993d7360dbe6dd7617e17 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 28 May 2021 18:06:02 +0800
Subject: [PATCH 2970/3292] mailbox: imx: add xSR/xCR register array

We are going to add a new platform which has 4 status registers(SR, TSR,
RSR, GSR) and 4 control registers(CR, TCR, RCR, GCR), so extend xSR
and xCR to register array and adapt code to use it.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/imx-mailbox.c | 96 ++++++++++++++++++++++-------------
 1 file changed, 61 insertions(+), 35 deletions(-)

diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c
index bd7758c32a80..f5dadcc827aa 100644
--- a/drivers/mailbox/imx-mailbox.c
+++ b/drivers/mailbox/imx-mailbox.c
@@ -41,6 +41,21 @@ enum imx_mu_chan_type {
 	IMX_MU_TYPE_RXDB,	/* Rx doorbell */
 };
 
+enum imx_mu_xcr {
+	IMX_MU_CR,
+	IMX_MU_GCR,
+	IMX_MU_TCR,
+	IMX_MU_RCR,
+	IMX_MU_xCR_MAX,
+};
+
+enum imx_mu_xsr {
+	IMX_MU_SR,
+	IMX_MU_GSR,
+	IMX_MU_TSR,
+	IMX_MU_RSR,
+};
+
 struct imx_sc_rpc_msg_max {
 	struct imx_sc_rpc_msg hdr;
 	u32 data[7];
@@ -67,7 +82,7 @@ struct imx_mu_priv {
 	struct clk		*clk;
 	int			irq;
 
-	u32 xcr;
+	u32 xcr[4];
 
 	bool			side_b;
 };
@@ -78,8 +93,8 @@ struct imx_mu_dcfg {
 	void (*init)(struct imx_mu_priv *priv);
 	u32	xTR;		/* Transmit Register0 */
 	u32	xRR;		/* Receive Register0 */
-	u32	xSR;		/* Status Register */
-	u32	xCR;		/* Control Register */
+	u32	xSR[4];		/* Status Registers */
+	u32	xCR[4];		/* Control Registers */
 };
 
 static struct imx_mu_priv *to_imx_mu_priv(struct mbox_controller *mbox)
@@ -97,16 +112,16 @@ static u32 imx_mu_read(struct imx_mu_priv *priv, u32 offs)
 	return ioread32(priv->base + offs);
 }
 
-static u32 imx_mu_xcr_rmw(struct imx_mu_priv *priv, u32 set, u32 clr)
+static u32 imx_mu_xcr_rmw(struct imx_mu_priv *priv, enum imx_mu_xcr type, u32 set, u32 clr)
 {
 	unsigned long flags;
 	u32 val;
 
 	spin_lock_irqsave(&priv->xcr_lock, flags);
-	val = imx_mu_read(priv, priv->dcfg->xCR);
+	val = imx_mu_read(priv, priv->dcfg->xCR[type]);
 	val &= ~clr;
 	val |= set;
-	imx_mu_write(priv, val, priv->dcfg->xCR);
+	imx_mu_write(priv, val, priv->dcfg->xCR[type]);
 	spin_unlock_irqrestore(&priv->xcr_lock, flags);
 
 	return val;
@@ -121,10 +136,10 @@ static int imx_mu_generic_tx(struct imx_mu_priv *priv,
 	switch (cp->type) {
 	case IMX_MU_TYPE_TX:
 		imx_mu_write(priv, *arg, priv->dcfg->xTR + cp->idx * 4);
-		imx_mu_xcr_rmw(priv, IMX_MU_xCR_TIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, IMX_MU_xCR_TIEn(cp->idx), 0);
 		break;
 	case IMX_MU_TYPE_TXDB:
-		imx_mu_xcr_rmw(priv, IMX_MU_xCR_GIRn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_GCR, IMX_MU_xCR_GIRn(cp->idx), 0);
 		tasklet_schedule(&cp->txdb_tasklet);
 		break;
 	default:
@@ -174,7 +189,7 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 		for (i = 0; i < 4 && i < msg->hdr.size; i++)
 			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % 4) * 4);
 		for (; i < msg->hdr.size; i++) {
-			ret = readl_poll_timeout(priv->base + priv->dcfg->xSR,
+			ret = readl_poll_timeout(priv->base + priv->dcfg->xSR[IMX_MU_TSR],
 						 xsr,
 						 xsr & IMX_MU_xSR_TEn(i % 4),
 						 0, 100);
@@ -185,7 +200,7 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % 4) * 4);
 		}
 
-		imx_mu_xcr_rmw(priv, IMX_MU_xCR_TIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, IMX_MU_xCR_TIEn(cp->idx), 0);
 		break;
 	default:
 		dev_warn_ratelimited(priv->dev, "Send data on wrong channel type: %d\n", cp->type);
@@ -203,7 +218,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 	int i, ret;
 	u32 xsr;
 
-	imx_mu_xcr_rmw(priv, 0, IMX_MU_xCR_RIEn(0));
+	imx_mu_xcr_rmw(priv, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(0));
 	*data++ = imx_mu_read(priv, priv->dcfg->xRR);
 
 	if (msg.hdr.size > sizeof(msg) / 4) {
@@ -212,7 +227,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 	}
 
 	for (i = 1; i < msg.hdr.size; i++) {
-		ret = readl_poll_timeout(priv->base + priv->dcfg->xSR, xsr,
+		ret = readl_poll_timeout(priv->base + priv->dcfg->xSR[IMX_MU_RSR], xsr,
 					 xsr & IMX_MU_xSR_RFn(i % 4), 0, 100);
 		if (ret) {
 			dev_err(priv->dev, "timeout read idx %d\n", i);
@@ -221,7 +236,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 		*data++ = imx_mu_read(priv, priv->dcfg->xRR + (i % 4) * 4);
 	}
 
-	imx_mu_xcr_rmw(priv, IMX_MU_xCR_RIEn(0), 0);
+	imx_mu_xcr_rmw(priv, IMX_MU_RCR, IMX_MU_xCR_RIEn(0), 0);
 	mbox_chan_received_data(cp->chan, (void *)&msg);
 
 	return 0;
@@ -241,19 +256,22 @@ static irqreturn_t imx_mu_isr(int irq, void *p)
 	struct imx_mu_con_priv *cp = chan->con_priv;
 	u32 val, ctrl;
 
-	ctrl = imx_mu_read(priv, priv->dcfg->xCR);
-	val = imx_mu_read(priv, priv->dcfg->xSR);
-
 	switch (cp->type) {
 	case IMX_MU_TYPE_TX:
+		ctrl = imx_mu_read(priv, priv->dcfg->xCR[IMX_MU_TCR]);
+		val = imx_mu_read(priv, priv->dcfg->xSR[IMX_MU_TSR]);
 		val &= IMX_MU_xSR_TEn(cp->idx) &
 			(ctrl & IMX_MU_xCR_TIEn(cp->idx));
 		break;
 	case IMX_MU_TYPE_RX:
+		ctrl = imx_mu_read(priv, priv->dcfg->xCR[IMX_MU_RCR]);
+		val = imx_mu_read(priv, priv->dcfg->xSR[IMX_MU_RSR]);
 		val &= IMX_MU_xSR_RFn(cp->idx) &
 			(ctrl & IMX_MU_xCR_RIEn(cp->idx));
 		break;
 	case IMX_MU_TYPE_RXDB:
+		ctrl = imx_mu_read(priv, priv->dcfg->xCR[IMX_MU_GCR]);
+		val = imx_mu_read(priv, priv->dcfg->xSR[IMX_MU_GSR]);
 		val &= IMX_MU_xSR_GIPn(cp->idx) &
 			(ctrl & IMX_MU_xCR_GIEn(cp->idx));
 		break;
@@ -265,12 +283,12 @@ static irqreturn_t imx_mu_isr(int irq, void *p)
 		return IRQ_NONE;
 
 	if (val == IMX_MU_xSR_TEn(cp->idx)) {
-		imx_mu_xcr_rmw(priv, 0, IMX_MU_xCR_TIEn(cp->idx));
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, 0, IMX_MU_xCR_TIEn(cp->idx));
 		mbox_chan_txdone(chan, 0);
 	} else if (val == IMX_MU_xSR_RFn(cp->idx)) {
 		priv->dcfg->rx(priv, cp);
 	} else if (val == IMX_MU_xSR_GIPn(cp->idx)) {
-		imx_mu_write(priv, IMX_MU_xSR_GIPn(cp->idx), priv->dcfg->xSR);
+		imx_mu_write(priv, IMX_MU_xSR_GIPn(cp->idx), priv->dcfg->xSR[IMX_MU_GSR]);
 		mbox_chan_received_data(chan, NULL);
 	} else {
 		dev_warn_ratelimited(priv->dev, "Not handled interrupt\n");
@@ -317,10 +335,10 @@ static int imx_mu_startup(struct mbox_chan *chan)
 
 	switch (cp->type) {
 	case IMX_MU_TYPE_RX:
-		imx_mu_xcr_rmw(priv, IMX_MU_xCR_RIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_RCR, IMX_MU_xCR_RIEn(cp->idx), 0);
 		break;
 	case IMX_MU_TYPE_RXDB:
-		imx_mu_xcr_rmw(priv, IMX_MU_xCR_GIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_GCR, IMX_MU_xCR_GIEn(cp->idx), 0);
 		break;
 	default:
 		break;
@@ -342,13 +360,13 @@ static void imx_mu_shutdown(struct mbox_chan *chan)
 
 	switch (cp->type) {
 	case IMX_MU_TYPE_TX:
-		imx_mu_xcr_rmw(priv, 0, IMX_MU_xCR_TIEn(cp->idx));
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, 0, IMX_MU_xCR_TIEn(cp->idx));
 		break;
 	case IMX_MU_TYPE_RX:
-		imx_mu_xcr_rmw(priv, 0, IMX_MU_xCR_RIEn(cp->idx));
+		imx_mu_xcr_rmw(priv, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(cp->idx));
 		break;
 	case IMX_MU_TYPE_RXDB:
-		imx_mu_xcr_rmw(priv, 0, IMX_MU_xCR_GIEn(cp->idx));
+		imx_mu_xcr_rmw(priv, IMX_MU_GCR, 0, IMX_MU_xCR_GIEn(cp->idx));
 		break;
 	default:
 		break;
@@ -444,7 +462,8 @@ static void imx_mu_init_generic(struct imx_mu_priv *priv)
 		return;
 
 	/* Set default MU configuration */
-	imx_mu_write(priv, 0, priv->dcfg->xCR);
+	for (i = 0; i < IMX_MU_xCR_MAX; i++)
+		imx_mu_write(priv, 0, priv->dcfg->xCR[i]);
 }
 
 static void imx_mu_init_scu(struct imx_mu_priv *priv)
@@ -466,7 +485,8 @@ static void imx_mu_init_scu(struct imx_mu_priv *priv)
 	priv->mbox.of_xlate = imx_mu_scu_xlate;
 
 	/* Set default MU configuration */
-	imx_mu_write(priv, 0, priv->dcfg->xCR);
+	for (i = 0; i < IMX_MU_xCR_MAX; i++)
+		imx_mu_write(priv, 0, priv->dcfg->xCR[i]);
 }
 
 static int imx_mu_probe(struct platform_device *pdev)
@@ -566,8 +586,8 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx6sx = {
 	.init	= imx_mu_init_generic,
 	.xTR	= 0x0,
 	.xRR	= 0x10,
-	.xSR	= 0x20,
-	.xCR	= 0x24,
+	.xSR	= {0x20, 0x20, 0x20, 0x20},
+	.xCR	= {0x24, 0x24, 0x24, 0x24},
 };
 
 static const struct imx_mu_dcfg imx_mu_cfg_imx7ulp = {
@@ -576,8 +596,8 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx7ulp = {
 	.init	= imx_mu_init_generic,
 	.xTR	= 0x20,
 	.xRR	= 0x40,
-	.xSR	= 0x60,
-	.xCR	= 0x64,
+	.xSR	= {0x60, 0x60, 0x60, 0x60},
+	.xCR	= {0x64, 0x64, 0x64, 0x64},
 };
 
 static const struct imx_mu_dcfg imx_mu_cfg_imx8_scu = {
@@ -586,8 +606,8 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx8_scu = {
 	.init	= imx_mu_init_scu,
 	.xTR	= 0x0
 	.xRR	= 0x10
-	.xSR	= 0x20,
-	.xCR	= 0x24,
+	.xSR	= {0x20, 0x20, 0x20, 0x20},
+	.xCR	= {0x24, 0x24, 0x24, 0x24},
 };
 
 static const struct of_device_id imx_mu_dt_ids[] = {
@@ -601,9 +621,12 @@ MODULE_DEVICE_TABLE(of, imx_mu_dt_ids);
 static int __maybe_unused imx_mu_suspend_noirq(struct device *dev)
 {
 	struct imx_mu_priv *priv = dev_get_drvdata(dev);
+	int i;
 
-	if (!priv->clk)
-		priv->xcr = imx_mu_read(priv, priv->dcfg->xCR);
+	if (!priv->clk) {
+		for (i = 0; i < IMX_MU_xCR_MAX; i++)
+			priv->xcr[i] = imx_mu_read(priv, priv->dcfg->xCR[i]);
+	}
 
 	return 0;
 }
@@ -611,6 +634,7 @@ static int __maybe_unused imx_mu_suspend_noirq(struct device *dev)
 static int __maybe_unused imx_mu_resume_noirq(struct device *dev)
 {
 	struct imx_mu_priv *priv = dev_get_drvdata(dev);
+	int i;
 
 	/*
 	 * ONLY restore MU when context lost, the TIE could
@@ -620,8 +644,10 @@ static int __maybe_unused imx_mu_resume_noirq(struct device *dev)
 	 * send failed, may lead to system freeze. This issue
 	 * is observed by testing freeze mode suspend.
 	 */
-	if (!imx_mu_read(priv, priv->dcfg->xCR) && !priv->clk)
-		imx_mu_write(priv, priv->xcr, priv->dcfg->xCR);
+	if (!imx_mu_read(priv, priv->dcfg->xCR[0]) && !priv->clk) {
+		for (i = 0; i < IMX_MU_xCR_MAX; i++)
+			imx_mu_write(priv, priv->xcr[i], priv->dcfg->xCR[i]);
+	}
 
 	return 0;
 }

From 4f0b776ef5831700fe47567f6d986be410d7b9e4 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 28 May 2021 18:06:03 +0800
Subject: [PATCH 2971/3292] mailbox: imx-mailbox: support i.MX8ULP MU

i.MX8ULP MU has different register layout and bit layout compared
with i.MX6SX/7ULP/8.

So add enum imx_mu_type to show it is IMX_MU_V2 or IMX_MU_V1.

For IMX_MU_V2 mu hardware, check it when calculating bit offset to get
the correct offset.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/imx-mailbox.c | 104 ++++++++++++++++++++--------------
 1 file changed, 63 insertions(+), 41 deletions(-)

diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c
index f5dadcc827aa..bc51bd3c1999 100644
--- a/drivers/mailbox/imx-mailbox.c
+++ b/drivers/mailbox/imx-mailbox.c
@@ -15,20 +15,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 
-#define IMX_MU_xSR_GIPn(x)	BIT(28 + (3 - (x)))
-#define IMX_MU_xSR_RFn(x)	BIT(24 + (3 - (x)))
-#define IMX_MU_xSR_TEn(x)	BIT(20 + (3 - (x)))
-#define IMX_MU_xSR_BRDIP	BIT(9)
-
-/* General Purpose Interrupt Enable */
-#define IMX_MU_xCR_GIEn(x)	BIT(28 + (3 - (x)))
-/* Receive Interrupt Enable */
-#define IMX_MU_xCR_RIEn(x)	BIT(24 + (3 - (x)))
-/* Transmit Interrupt Enable */
-#define IMX_MU_xCR_TIEn(x)	BIT(20 + (3 - (x)))
-/* General Purpose Interrupt Request */
-#define IMX_MU_xCR_GIRn(x)	BIT(16 + (3 - (x)))
-
 #define IMX_MU_CHANS		16
 /* TX0/RX0/RXDB[0-3] */
 #define IMX_MU_SCU_CHANS	6
@@ -42,7 +28,7 @@ enum imx_mu_chan_type {
 };
 
 enum imx_mu_xcr {
-	IMX_MU_CR,
+	IMX_MU_GIER,
 	IMX_MU_GCR,
 	IMX_MU_TCR,
 	IMX_MU_RCR,
@@ -87,16 +73,36 @@ struct imx_mu_priv {
 	bool			side_b;
 };
 
+enum imx_mu_type {
+	IMX_MU_V1,
+	IMX_MU_V2,
+};
+
 struct imx_mu_dcfg {
 	int (*tx)(struct imx_mu_priv *priv, struct imx_mu_con_priv *cp, void *data);
 	int (*rx)(struct imx_mu_priv *priv, struct imx_mu_con_priv *cp);
 	void (*init)(struct imx_mu_priv *priv);
+	enum imx_mu_type type;
 	u32	xTR;		/* Transmit Register0 */
 	u32	xRR;		/* Receive Register0 */
 	u32	xSR[4];		/* Status Registers */
 	u32	xCR[4];		/* Control Registers */
 };
 
+#define IMX_MU_xSR_GIPn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(28 + (3 - (x))))
+#define IMX_MU_xSR_RFn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
+#define IMX_MU_xSR_TEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(20 + (3 - (x))))
+
+/* General Purpose Interrupt Enable */
+#define IMX_MU_xCR_GIEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(28 + (3 - (x))))
+/* Receive Interrupt Enable */
+#define IMX_MU_xCR_RIEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
+/* Transmit Interrupt Enable */
+#define IMX_MU_xCR_TIEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(20 + (3 - (x))))
+/* General Purpose Interrupt Request */
+#define IMX_MU_xCR_GIRn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(16 + (3 - (x))))
+
+
 static struct imx_mu_priv *to_imx_mu_priv(struct mbox_controller *mbox)
 {
 	return container_of(mbox, struct imx_mu_priv, mbox);
@@ -136,10 +142,10 @@ static int imx_mu_generic_tx(struct imx_mu_priv *priv,
 	switch (cp->type) {
 	case IMX_MU_TYPE_TX:
 		imx_mu_write(priv, *arg, priv->dcfg->xTR + cp->idx * 4);
-		imx_mu_xcr_rmw(priv, IMX_MU_TCR, IMX_MU_xCR_TIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, IMX_MU_xCR_TIEn(priv->dcfg->type, cp->idx), 0);
 		break;
 	case IMX_MU_TYPE_TXDB:
-		imx_mu_xcr_rmw(priv, IMX_MU_GCR, IMX_MU_xCR_GIRn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_GCR, IMX_MU_xCR_GIRn(priv->dcfg->type, cp->idx), 0);
 		tasklet_schedule(&cp->txdb_tasklet);
 		break;
 	default:
@@ -191,7 +197,7 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 		for (; i < msg->hdr.size; i++) {
 			ret = readl_poll_timeout(priv->base + priv->dcfg->xSR[IMX_MU_TSR],
 						 xsr,
-						 xsr & IMX_MU_xSR_TEn(i % 4),
+						 xsr & IMX_MU_xSR_TEn(priv->dcfg->type, i % 4),
 						 0, 100);
 			if (ret) {
 				dev_err(priv->dev, "Send data index: %d timeout\n", i);
@@ -200,7 +206,7 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % 4) * 4);
 		}
 
-		imx_mu_xcr_rmw(priv, IMX_MU_TCR, IMX_MU_xCR_TIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, IMX_MU_xCR_TIEn(priv->dcfg->type, cp->idx), 0);
 		break;
 	default:
 		dev_warn_ratelimited(priv->dev, "Send data on wrong channel type: %d\n", cp->type);
@@ -218,7 +224,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 	int i, ret;
 	u32 xsr;
 
-	imx_mu_xcr_rmw(priv, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(0));
+	imx_mu_xcr_rmw(priv, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(priv->dcfg->type, 0));
 	*data++ = imx_mu_read(priv, priv->dcfg->xRR);
 
 	if (msg.hdr.size > sizeof(msg) / 4) {
@@ -228,7 +234,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 
 	for (i = 1; i < msg.hdr.size; i++) {
 		ret = readl_poll_timeout(priv->base + priv->dcfg->xSR[IMX_MU_RSR], xsr,
-					 xsr & IMX_MU_xSR_RFn(i % 4), 0, 100);
+					 xsr & IMX_MU_xSR_RFn(priv->dcfg->type, i % 4), 0, 100);
 		if (ret) {
 			dev_err(priv->dev, "timeout read idx %d\n", i);
 			return ret;
@@ -236,7 +242,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 		*data++ = imx_mu_read(priv, priv->dcfg->xRR + (i % 4) * 4);
 	}
 
-	imx_mu_xcr_rmw(priv, IMX_MU_RCR, IMX_MU_xCR_RIEn(0), 0);
+	imx_mu_xcr_rmw(priv, IMX_MU_RCR, IMX_MU_xCR_RIEn(priv->dcfg->type, 0), 0);
 	mbox_chan_received_data(cp->chan, (void *)&msg);
 
 	return 0;
@@ -260,20 +266,20 @@ static irqreturn_t imx_mu_isr(int irq, void *p)
 	case IMX_MU_TYPE_TX:
 		ctrl = imx_mu_read(priv, priv->dcfg->xCR[IMX_MU_TCR]);
 		val = imx_mu_read(priv, priv->dcfg->xSR[IMX_MU_TSR]);
-		val &= IMX_MU_xSR_TEn(cp->idx) &
-			(ctrl & IMX_MU_xCR_TIEn(cp->idx));
+		val &= IMX_MU_xSR_TEn(priv->dcfg->type, cp->idx) &
+			(ctrl & IMX_MU_xCR_TIEn(priv->dcfg->type, cp->idx));
 		break;
 	case IMX_MU_TYPE_RX:
 		ctrl = imx_mu_read(priv, priv->dcfg->xCR[IMX_MU_RCR]);
 		val = imx_mu_read(priv, priv->dcfg->xSR[IMX_MU_RSR]);
-		val &= IMX_MU_xSR_RFn(cp->idx) &
-			(ctrl & IMX_MU_xCR_RIEn(cp->idx));
+		val &= IMX_MU_xSR_RFn(priv->dcfg->type, cp->idx) &
+			(ctrl & IMX_MU_xCR_RIEn(priv->dcfg->type, cp->idx));
 		break;
 	case IMX_MU_TYPE_RXDB:
-		ctrl = imx_mu_read(priv, priv->dcfg->xCR[IMX_MU_GCR]);
+		ctrl = imx_mu_read(priv, priv->dcfg->xCR[IMX_MU_GIER]);
 		val = imx_mu_read(priv, priv->dcfg->xSR[IMX_MU_GSR]);
-		val &= IMX_MU_xSR_GIPn(cp->idx) &
-			(ctrl & IMX_MU_xCR_GIEn(cp->idx));
+		val &= IMX_MU_xSR_GIPn(priv->dcfg->type, cp->idx) &
+			(ctrl & IMX_MU_xCR_GIEn(priv->dcfg->type, cp->idx));
 		break;
 	default:
 		break;
@@ -282,13 +288,17 @@ static irqreturn_t imx_mu_isr(int irq, void *p)
 	if (!val)
 		return IRQ_NONE;
 
-	if (val == IMX_MU_xSR_TEn(cp->idx)) {
-		imx_mu_xcr_rmw(priv, IMX_MU_TCR, 0, IMX_MU_xCR_TIEn(cp->idx));
+	if ((val == IMX_MU_xSR_TEn(priv->dcfg->type, cp->idx)) &&
+	    (cp->type == IMX_MU_TYPE_TX)) {
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, 0, IMX_MU_xCR_TIEn(priv->dcfg->type, cp->idx));
 		mbox_chan_txdone(chan, 0);
-	} else if (val == IMX_MU_xSR_RFn(cp->idx)) {
+	} else if ((val == IMX_MU_xSR_RFn(priv->dcfg->type, cp->idx)) &&
+		   (cp->type == IMX_MU_TYPE_RX)) {
 		priv->dcfg->rx(priv, cp);
-	} else if (val == IMX_MU_xSR_GIPn(cp->idx)) {
-		imx_mu_write(priv, IMX_MU_xSR_GIPn(cp->idx), priv->dcfg->xSR[IMX_MU_GSR]);
+	} else if ((val == IMX_MU_xSR_GIPn(priv->dcfg->type, cp->idx)) &&
+		   (cp->type == IMX_MU_TYPE_RXDB)) {
+		imx_mu_write(priv, IMX_MU_xSR_GIPn(priv->dcfg->type, cp->idx),
+			     priv->dcfg->xSR[IMX_MU_GSR]);
 		mbox_chan_received_data(chan, NULL);
 	} else {
 		dev_warn_ratelimited(priv->dev, "Not handled interrupt\n");
@@ -335,10 +345,10 @@ static int imx_mu_startup(struct mbox_chan *chan)
 
 	switch (cp->type) {
 	case IMX_MU_TYPE_RX:
-		imx_mu_xcr_rmw(priv, IMX_MU_RCR, IMX_MU_xCR_RIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_RCR, IMX_MU_xCR_RIEn(priv->dcfg->type, cp->idx), 0);
 		break;
 	case IMX_MU_TYPE_RXDB:
-		imx_mu_xcr_rmw(priv, IMX_MU_GCR, IMX_MU_xCR_GIEn(cp->idx), 0);
+		imx_mu_xcr_rmw(priv, IMX_MU_GIER, IMX_MU_xCR_GIEn(priv->dcfg->type, cp->idx), 0);
 		break;
 	default:
 		break;
@@ -360,13 +370,13 @@ static void imx_mu_shutdown(struct mbox_chan *chan)
 
 	switch (cp->type) {
 	case IMX_MU_TYPE_TX:
-		imx_mu_xcr_rmw(priv, IMX_MU_TCR, 0, IMX_MU_xCR_TIEn(cp->idx));
+		imx_mu_xcr_rmw(priv, IMX_MU_TCR, 0, IMX_MU_xCR_TIEn(priv->dcfg->type, cp->idx));
 		break;
 	case IMX_MU_TYPE_RX:
-		imx_mu_xcr_rmw(priv, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(cp->idx));
+		imx_mu_xcr_rmw(priv, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(priv->dcfg->type, cp->idx));
 		break;
 	case IMX_MU_TYPE_RXDB:
-		imx_mu_xcr_rmw(priv, IMX_MU_GCR, 0, IMX_MU_xCR_GIEn(cp->idx));
+		imx_mu_xcr_rmw(priv, IMX_MU_GIER, 0, IMX_MU_xCR_GIEn(priv->dcfg->type, cp->idx));
 		break;
 	default:
 		break;
@@ -600,12 +610,23 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx7ulp = {
 	.xCR	= {0x64, 0x64, 0x64, 0x64},
 };
 
+static const struct imx_mu_dcfg imx_mu_cfg_imx8ulp = {
+	.tx	= imx_mu_generic_tx,
+	.rx	= imx_mu_generic_rx,
+	.init	= imx_mu_init_generic,
+	.type	= IMX_MU_V2,
+	.xTR	= 0x200,
+	.xRR	= 0x280,
+	.xSR	= {0xC, 0x118, 0x124, 0x12C},
+	.xCR	= {0x110, 0x114, 0x120, 0x128},
+};
+
 static const struct imx_mu_dcfg imx_mu_cfg_imx8_scu = {
 	.tx	= imx_mu_scu_tx,
 	.rx	= imx_mu_scu_rx,
 	.init	= imx_mu_init_scu,
-	.xTR	= 0x0
-	.xRR	= 0x10
+	.xTR	= 0x0,
+	.xRR	= 0x10,
 	.xSR	= {0x20, 0x20, 0x20, 0x20},
 	.xCR	= {0x24, 0x24, 0x24, 0x24},
 };
@@ -613,6 +634,7 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx8_scu = {
 static const struct of_device_id imx_mu_dt_ids[] = {
 	{ .compatible = "fsl,imx7ulp-mu", .data = &imx_mu_cfg_imx7ulp },
 	{ .compatible = "fsl,imx6sx-mu", .data = &imx_mu_cfg_imx6sx },
+	{ .compatible = "fsl,imx8ulp-mu", .data = &imx_mu_cfg_imx8ulp },
 	{ .compatible = "fsl,imx8-mu-scu", .data = &imx_mu_cfg_imx8_scu },
 	{ },
 };

From b3c0d72b092e52ae7369b52fb97f63eb2ea7f16a Mon Sep 17 00:00:00 2001
From: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Date: Mon, 15 Mar 2021 07:33:21 +0800
Subject: [PATCH 2972/3292] mailbox: mtk-cmdq: Remove cmdq_cb_status

cmdq_cb_status is an error status. Use the standard error number
instead of cmdq_cb_status to prevent status duplication.

Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Reviewed-by: Yongqiang Niu <yongqiang.niu@mediatek.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/mtk-cmdq-mailbox.c       | 10 +++++-----
 include/linux/mailbox/mtk-cmdq-mailbox.h |  7 +------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 3900ea63b28d..1f2edc9521c1 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -180,7 +180,7 @@ static bool cmdq_thread_is_in_wfe(struct cmdq_thread *thread)
 	return readl(thread->base + CMDQ_THR_WAIT_TOKEN) & CMDQ_THR_IS_WAITING;
 }
 
-static void cmdq_task_exec_done(struct cmdq_task *task, enum cmdq_cb_status sta)
+static void cmdq_task_exec_done(struct cmdq_task *task, int sta)
 {
 	struct cmdq_task_cb *cb = &task->pkt->async_cb;
 	struct cmdq_cb_data data;
@@ -244,10 +244,10 @@ static void cmdq_thread_irq_handler(struct cmdq *cmdq,
 			curr_task = task;
 
 		if (!curr_task || curr_pa == task_end_pa - CMDQ_INST_SIZE) {
-			cmdq_task_exec_done(task, CMDQ_CB_NORMAL);
+			cmdq_task_exec_done(task, 0);
 			kfree(task);
 		} else if (err) {
-			cmdq_task_exec_done(task, CMDQ_CB_ERROR);
+			cmdq_task_exec_done(task, -ENOEXEC);
 			cmdq_task_handle_error(curr_task);
 			kfree(task);
 		}
@@ -415,7 +415,7 @@ static void cmdq_mbox_shutdown(struct mbox_chan *chan)
 
 	list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
 				 list_entry) {
-		cmdq_task_exec_done(task, CMDQ_CB_ERROR);
+		cmdq_task_exec_done(task, -ECONNABORTED);
 		kfree(task);
 	}
 
@@ -453,7 +453,7 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 				 list_entry) {
 		cb = &task->pkt->async_cb;
 		if (cb->cb) {
-			data.sta = CMDQ_CB_ERROR;
+			data.sta = -ECONNABORTED;
 			data.data = cb->data;
 			cb->cb(data);
 		}
diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
index d5a983d65f05..2f7d9a37d611 100644
--- a/include/linux/mailbox/mtk-cmdq-mailbox.h
+++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
@@ -65,13 +65,8 @@ enum cmdq_code {
 	CMDQ_CODE_LOGIC = 0xa0,
 };
 
-enum cmdq_cb_status {
-	CMDQ_CB_NORMAL = 0,
-	CMDQ_CB_ERROR
-};
-
 struct cmdq_cb_data {
-	enum cmdq_cb_status	sta;
+	int			sta;
 	void			*data;
 };
 

From 1b6b0ce2240e717bd5839cc106a0bf6cdbac9abc Mon Sep 17 00:00:00 2001
From: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Date: Mon, 15 Mar 2021 07:33:22 +0800
Subject: [PATCH 2973/3292] mailbox: mtk-cmdq: Use mailbox rx_callback

rx_callback is a standard mailbox callback mechanism and could cover the
function of proprietary cmdq_task_cb, so use the standard one instead of
the proprietary one. But the client driver has already used cmdq_task_cb,
so keep cmdq_task_cb until all client driver use rx_callback instead of
cmdq_task_cb.

Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Reviewed-by: Yongqiang Niu <yongqiang.niu@mediatek.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/mtk-cmdq-mailbox.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 1f2edc9521c1..7dfd0c9d7bbd 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -188,7 +188,10 @@ static void cmdq_task_exec_done(struct cmdq_task *task, int sta)
 	WARN_ON(cb->cb == (cmdq_async_flush_cb)NULL);
 	data.sta = sta;
 	data.data = cb->data;
-	cb->cb(data);
+	if (cb->cb)
+		cb->cb(data);
+
+	mbox_chan_received_data(task->thread->chan, &data);
 
 	list_del(&task->list_entry);
 }
@@ -451,12 +454,13 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 
 	list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
 				 list_entry) {
+		data.sta = -ECONNABORTED;
+		data.data = cb->data;
 		cb = &task->pkt->async_cb;
-		if (cb->cb) {
-			data.sta = -ECONNABORTED;
-			data.data = cb->data;
+		if (cb->cb)
 			cb->cb(data);
-		}
+
+		mbox_chan_received_data(task->thread->chan, &data);
 		list_del(&task->list_entry);
 		kfree(task);
 	}

From 8ebc3b5aa4cfafd8b9d58e2595a12f0715594619 Mon Sep 17 00:00:00 2001
From: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Date: Mon, 15 Mar 2021 07:33:23 +0800
Subject: [PATCH 2974/3292] mailbox: mtk-cmdq: Add struct cmdq_pkt in struct
 cmdq_cb_data

Current client use 'struct cmdq_pkt' as callback data, so
change 'void *data' to 'struct cmdq_pkt *pkt'. Keep data
until client use pkt instead of data.

Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Reviewed-by: Yongqiang Niu <yongqiang.niu@mediatek.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/mtk-cmdq-mailbox.c       | 2 ++
 include/linux/mailbox/mtk-cmdq-mailbox.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 7dfd0c9d7bbd..301e65b9527a 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -188,6 +188,7 @@ static void cmdq_task_exec_done(struct cmdq_task *task, int sta)
 	WARN_ON(cb->cb == (cmdq_async_flush_cb)NULL);
 	data.sta = sta;
 	data.data = cb->data;
+	data.pkt = task->pkt;
 	if (cb->cb)
 		cb->cb(data);
 
@@ -456,6 +457,7 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 				 list_entry) {
 		data.sta = -ECONNABORTED;
 		data.data = cb->data;
+		data.pkt = task->pkt;
 		cb = &task->pkt->async_cb;
 		if (cb->cb)
 			cb->cb(data);
diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
index 2f7d9a37d611..44365aab043c 100644
--- a/include/linux/mailbox/mtk-cmdq-mailbox.h
+++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
@@ -68,6 +68,7 @@ enum cmdq_code {
 struct cmdq_cb_data {
 	int			sta;
 	void			*data;
+	struct cmdq_pkt		*pkt;
 };
 
 typedef void (*cmdq_async_flush_cb)(struct cmdq_cb_data data);

From d6fbfdbc12745ce24bcd348dbf7e652353b3e59c Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Wed, 16 Jun 2021 23:12:58 +0530
Subject: [PATCH 2975/3292] mailbox: qcom-ipcc: Fix IPCC mbox channel
 exhaustion

Fix IPCC (Inter-Processor Communication Controller) channel exhaustion by
setting the channel private data to NULL on mbox shutdown.

Err Logs:
remoteproc: MBA booted without debug policy, loading mpss
remoteproc: glink-edge: failed to acquire IPC channel
remoteproc: failed to probe subdevices for remoteproc: -16

Fixes: fa74a0257f45 ("mailbox: Add support for Qualcomm IPCC")
Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Cc: stable@vger.kernel.org
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/qcom-ipcc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/mailbox/qcom-ipcc.c b/drivers/mailbox/qcom-ipcc.c
index 2d13c72944c6..584700cd1585 100644
--- a/drivers/mailbox/qcom-ipcc.c
+++ b/drivers/mailbox/qcom-ipcc.c
@@ -155,6 +155,11 @@ static int qcom_ipcc_mbox_send_data(struct mbox_chan *chan, void *data)
 	return 0;
 }
 
+static void qcom_ipcc_mbox_shutdown(struct mbox_chan *chan)
+{
+	chan->con_priv = NULL;
+}
+
 static struct mbox_chan *qcom_ipcc_mbox_xlate(struct mbox_controller *mbox,
 					const struct of_phandle_args *ph)
 {
@@ -184,6 +189,7 @@ static struct mbox_chan *qcom_ipcc_mbox_xlate(struct mbox_controller *mbox,
 
 static const struct mbox_chan_ops ipcc_mbox_chan_ops = {
 	.send_data = qcom_ipcc_mbox_send_data,
+	.shutdown = qcom_ipcc_mbox_shutdown,
 };
 
 static int qcom_ipcc_setup_mbox(struct qcom_ipcc *ipcc)

From 01c59166b4a00026b2a5b072b3149d5009a33e7b Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Tue, 1 Jun 2021 16:24:54 +0800
Subject: [PATCH 2976/3292] mailbox: bcm2835: Remove redundant dev_err call in
 bcm2835_mbox_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/bcm2835-mailbox.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mailbox/bcm2835-mailbox.c b/drivers/mailbox/bcm2835-mailbox.c
index 39761d190545..86b7ce3549c5 100644
--- a/drivers/mailbox/bcm2835-mailbox.c
+++ b/drivers/mailbox/bcm2835-mailbox.c
@@ -157,7 +157,6 @@ static int bcm2835_mbox_probe(struct platform_device *pdev)
 	mbox->regs = devm_ioremap_resource(&pdev->dev, iomem);
 	if (IS_ERR(mbox->regs)) {
 		ret = PTR_ERR(mbox->regs);
-		dev_err(&pdev->dev, "Failed to remap mailbox regs: %d\n", ret);
 		return ret;
 	}
 

From c0d580cefa46f28873202b7e3df9313a570633bd Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Tue, 1 Jun 2021 16:28:34 +0800
Subject: [PATCH 2977/3292] mailbox: bcm-flexrm-mailbox: Remove redundant
 dev_err call in flexrm_mbox_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/bcm-flexrm-mailbox.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c
index b4f33dc399a0..78073ad1f2f1 100644
--- a/drivers/mailbox/bcm-flexrm-mailbox.c
+++ b/drivers/mailbox/bcm-flexrm-mailbox.c
@@ -1523,7 +1523,6 @@ static int flexrm_mbox_probe(struct platform_device *pdev)
 	mbox->regs = devm_ioremap_resource(&pdev->dev, iomem);
 	if (IS_ERR(mbox->regs)) {
 		ret = PTR_ERR(mbox->regs);
-		dev_err(&pdev->dev, "Failed to remap mailbox regs: %d\n", ret);
 		goto fail;
 	}
 	regs_end = mbox->regs + resource_size(iomem);

From cc3eb51814d0f31671d57e85d4912403a11506e9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 22 Jun 2021 16:08:52 +0300
Subject: [PATCH 2978/3292] mailbox: mtk-cmdq: Fix uninitialized variable in
 cmdq_mbox_flush()

The "cb" pointer needs to be initialized before can assign
"data.data = cb->data;".

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/mtk-cmdq-mailbox.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 301e65b9527a..67a42b514429 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -455,10 +455,10 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 
 	list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
 				 list_entry) {
+		cb = &task->pkt->async_cb;
 		data.sta = -ECONNABORTED;
 		data.data = cb->data;
 		data.pkt = task->pkt;
-		cb = &task->pkt->async_cb;
 		if (cb->cb)
 			cb->cb(data);
 

From 4649d722c37bec95fd818b6e46179d31c8b9807b Mon Sep 17 00:00:00 2001
From: Martin Botka <martin.botka@somainline.org>
Date: Sat, 12 Jun 2021 11:46:29 +0200
Subject: [PATCH 2979/3292] dt-bindings: mailbox: Add binding for sm6125

This patch adds the binding for sm6125

Signed-off-by: Martin Botka <martin.botka@somainline.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 .../devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
index 5dc1173d03fd..9f4fbc6a141c 100644
--- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
+++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
@@ -27,6 +27,7 @@ properties:
       - qcom,sc8180x-apss-shared
       - qcom,sdm660-apcs-hmss-global
       - qcom,sdm845-apss-shared
+      - qcom,sm6125-apcs-hmss-global
       - qcom,sm8150-apss-shared
 
   reg:
@@ -75,6 +76,7 @@ allOf:
             - qcom,sc7180-apss-shared
             - qcom,sdm660-apcs-hmss-global
             - qcom,sdm845-apss-shared
+            - qcom,sm6125-apcs-hmss-global
             - qcom,sm8150-apss-shared
     then:
       properties:

From 72648436b55f310749352e005a508ede082f63d6 Mon Sep 17 00:00:00 2001
From: Martin Botka <martin.botka@somainline.org>
Date: Sat, 12 Jun 2021 11:46:30 +0200
Subject: [PATCH 2980/3292] mailbox: qcom-apcs: Add SM6125 compatible

This commit adds compatible for the SM6125 SoC

Signed-off-by: Martin Botka <martin.botka@somainline.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/qcom-apcs-ipc-mailbox.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/mailbox/qcom-apcs-ipc-mailbox.c b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
index f25324d03842..f24c5ad8d658 100644
--- a/drivers/mailbox/qcom-apcs-ipc-mailbox.c
+++ b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
@@ -57,6 +57,10 @@ static const struct qcom_apcs_ipc_data sdm660_apcs_data = {
 	.offset = 8, .clk_name = NULL
 };
 
+static const struct qcom_apcs_ipc_data sm6125_apcs_data = {
+	.offset = 8, .clk_name = NULL
+};
+
 static const struct qcom_apcs_ipc_data apps_shared_apcs_data = {
 	.offset = 12, .clk_name = NULL
 };
@@ -166,6 +170,7 @@ static const struct of_device_id qcom_apcs_ipc_of_match[] = {
 	{ .compatible = "qcom,sc8180x-apss-shared", .data = &apps_shared_apcs_data },
 	{ .compatible = "qcom,sdm660-apcs-hmss-global", .data = &sdm660_apcs_data },
 	{ .compatible = "qcom,sdm845-apss-shared", .data = &apps_shared_apcs_data },
+	{ .compatible = "qcom,sm6125-apcs-hmss-global", .data = &sm6125_apcs_data },
 	{ .compatible = "qcom,sm8150-apss-shared", .data = &apps_shared_apcs_data },
 	{ .compatible = "qcom,sdx55-apcs-gcc", .data = &sdx55_apcs_data },
 	{}

From 2ef6123182face5df85e585dfddff1e013659ee9 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 22 Jun 2021 08:39:17 +0800
Subject: [PATCH 2981/3292] dt-bindings: mailbox: qcom: Add MSM8939 APCS
 compatible

Add compatible for the Qualcomm MSM8939 APCS block to the Qualcomm APCS
bindings.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 .../devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
index 9f4fbc6a141c..8878ec00820e 100644
--- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
+++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
@@ -19,6 +19,7 @@ properties:
       - qcom,ipq6018-apcs-apps-global
       - qcom,ipq8074-apcs-apps-global
       - qcom,msm8916-apcs-kpss-global
+      - qcom,msm8939-apcs-kpss-global
       - qcom,msm8994-apcs-kpss-global
       - qcom,msm8996-apcs-hmss-global
       - qcom,msm8998-apcs-hmss-global

From 96e39e95c01283ff5695dafe659df88ada802159 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 22 Jun 2021 08:39:18 +0800
Subject: [PATCH 2982/3292] mailbox: qcom: Use PLATFORM_DEVID_AUTO to register
 platform device

In adding APCS clock support for MSM8939, the second clock registration
fails due to duplicate device name like below.

[    0.519657] sysfs: cannot create duplicate filename '/bus/platform/devices/qcom-apcs-msm8916-clk'
...
[    0.661158] qcom_apcs_ipc b111000.mailbox: failed to register APCS clk

This is because MSM8939 has 3 APCS instances for Cluster0 (little cores),
Cluster1 (big cores) and CCI (Cache Coherent Interconnect).  Although
only APCS of Cluster0 and Cluster1 have IPC bits, each of 3 APCS has
A53PLL clock control bits.  That said, 3 'qcom-apcs-msm8916-clk' devices
need to be registered to instantiate all 3 clocks.  Use PLATFORM_DEVID_AUTO
rather than PLATFORM_DEVID_NONE for platform_device_register_data() call
to fix the issue above.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/qcom-apcs-ipc-mailbox.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mailbox/qcom-apcs-ipc-mailbox.c b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
index f24c5ad8d658..16d107139573 100644
--- a/drivers/mailbox/qcom-apcs-ipc-mailbox.c
+++ b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
@@ -136,7 +136,7 @@ static int qcom_apcs_ipc_probe(struct platform_device *pdev)
 	if (apcs_data->clk_name) {
 		apcs->clk = platform_device_register_data(&pdev->dev,
 							  apcs_data->clk_name,
-							  PLATFORM_DEVID_NONE,
+							  PLATFORM_DEVID_AUTO,
 							  NULL, 0);
 		if (IS_ERR(apcs->clk))
 			dev_err(&pdev->dev, "failed to register APCS clk\n");

From 8a7cdb108d3020e221944fbd52b9e047f3f51594 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 22 Jun 2021 08:39:19 +0800
Subject: [PATCH 2983/3292] mailbox: qcom: Add MSM8939 APCS support

Enable MSM8939 APCS support by adding the compatible.  It reuses
msm8916_apcs_data.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/qcom-apcs-ipc-mailbox.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mailbox/qcom-apcs-ipc-mailbox.c b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
index 16d107139573..03bdc96dc457 100644
--- a/drivers/mailbox/qcom-apcs-ipc-mailbox.c
+++ b/drivers/mailbox/qcom-apcs-ipc-mailbox.c
@@ -162,6 +162,7 @@ static const struct of_device_id qcom_apcs_ipc_of_match[] = {
 	{ .compatible = "qcom,ipq6018-apcs-apps-global", .data = &ipq6018_apcs_data },
 	{ .compatible = "qcom,ipq8074-apcs-apps-global", .data = &ipq8074_apcs_data },
 	{ .compatible = "qcom,msm8916-apcs-kpss-global", .data = &msm8916_apcs_data },
+	{ .compatible = "qcom,msm8939-apcs-kpss-global", .data = &msm8916_apcs_data },
 	{ .compatible = "qcom,msm8994-apcs-kpss-global", .data = &msm8994_apcs_data },
 	{ .compatible = "qcom,msm8996-apcs-hmss-global", .data = &msm8996_apcs_data },
 	{ .compatible = "qcom,msm8998-apcs-hmss-global", .data = &msm8998_apcs_data },

From e80a7e7eafcd5e75bf8c73164cae954b3f0addbc Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 21 Jun 2021 11:56:45 -0700
Subject: [PATCH 2984/3292] mailbox: imx: Avoid using val uninitialized in
 imx_mu_isr()

Clang warns:

drivers/mailbox/imx-mailbox.c:284:2: warning: variable 'val' is used
uninitialized whenever switch default is taken
[-Wsometimes-uninitialized]
        default:
        ^~~~~~~
drivers/mailbox/imx-mailbox.c:288:7: note: uninitialized use occurs here
        if (!val)
             ^~~
drivers/mailbox/imx-mailbox.c:263:9: note: initialize the variable 'val'
to silence this warning
        u32 val, ctrl;
               ^
                = 0
1 warning generated.

Prior to commit 91c8c1fbe498 ("mailbox: imx: add xSR/xCR register
array"), val was always initialized in imx_mu_isr() but now, it is not
initialized in the default case. Return IRQ_NONE like the statement
below does and add a message that there is an unhandled type for this
switch statement so that it can be updated.

Fixes: 91c8c1fbe498 ("mailbox: imx: add xSR/xCR register array")
Link: https://github.com/ClangBuiltLinux/linux/issues/1404
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/imx-mailbox.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c
index bc51bd3c1999..0ce75c6b36b6 100644
--- a/drivers/mailbox/imx-mailbox.c
+++ b/drivers/mailbox/imx-mailbox.c
@@ -282,7 +282,9 @@ static irqreturn_t imx_mu_isr(int irq, void *p)
 			(ctrl & IMX_MU_xCR_GIEn(priv->dcfg->type, cp->idx));
 		break;
 	default:
-		break;
+		dev_warn_ratelimited(priv->dev, "Unhandled channel type %d\n",
+				     cp->type);
+		return IRQ_NONE;
 	}
 
 	if (!val)

From ed9543d6f2c444457b1936026f67cb8d3bf70bc7 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Thu, 24 Jun 2021 16:00:48 +0100
Subject: [PATCH 2985/3292] dt-bindings: add bindings for polarfire soc mailbox

Add device tree bindings for the MSS system controller mailbox on
the Microchip PolarFire SoC.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 .../microchip,polarfire-soc-mailbox.yaml      | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml

diff --git a/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml
new file mode 100644
index 000000000000..bbb173ea483c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/microchip,polarfire-soc-mailbox.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/mailbox/microchip,polarfire-soc-mailbox.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) mailbox controller
+
+maintainers:
+  - Conor Dooley <conor.dooley@microchip.com>
+
+properties:
+  compatible:
+    const: microchip,polarfire-soc-mailbox
+
+  reg:
+    items:
+      - description: mailbox data registers
+      - description: mailbox interrupt registers
+
+  interrupts:
+    maxItems: 1
+
+  "#mbox-cells":
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - "#mbox-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+      mbox: mailbox@37020000 {
+        compatible = "microchip,polarfire-soc-mailbox";
+        reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318c 0x0 0x40>;
+        interrupt-parent = <&L1>;
+        interrupts = <96>;
+        #mbox-cells = <1>;
+      };
+    };

From 83d7b1560810e038e1d07ca6bff41edaeae29725 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Thu, 24 Jun 2021 16:00:55 +0100
Subject: [PATCH 2986/3292] mbox: add polarfire soc system controller mailbox

This driver adds support for the single mailbox channel of the MSS
system controller on the Microchip PolarFire SoC.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/Kconfig        |  12 ++
 drivers/mailbox/Makefile       |   2 +
 drivers/mailbox/mailbox-mpfs.c | 251 +++++++++++++++++++++++++++++++++
 include/soc/microchip/mpfs.h   |  43 ++++++
 4 files changed, 308 insertions(+)
 create mode 100644 drivers/mailbox/mailbox-mpfs.c
 create mode 100644 include/soc/microchip/mpfs.h

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 68de2c6af727..b4b780ea2ac8 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -160,6 +160,18 @@ config MAILBOX_TEST
 	  Test client to help with testing new Controller driver
 	  implementations.
 
+config POLARFIRE_SOC_MAILBOX
+	tristate "PolarFire SoC (MPFS) Mailbox"
+	depends on HAS_IOMEM
+	depends on SOC_MICROCHIP_POLARFIRE || COMPILE_TEST
+	help
+	  This driver adds support for the PolarFire SoC (MPFS) mailbox controller.
+
+	  To compile this driver as a module, choose M here. the
+	  module will be called mailbox-mpfs.
+
+	  If unsure, say N.
+
 config QCOM_APCS_IPC
 	tristate "Qualcomm APCS IPC driver"
 	depends on ARCH_QCOM || COMPILE_TEST
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 7194fa92c787..c2089f04887e 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -41,6 +41,8 @@ obj-$(CONFIG_BCM_PDC_MBOX)	+= bcm-pdc-mailbox.o
 
 obj-$(CONFIG_BCM_FLEXRM_MBOX)	+= bcm-flexrm-mailbox.o
 
+obj-$(CONFIG_POLARFIRE_SOC_MAILBOX)	+= mailbox-mpfs.o
+
 obj-$(CONFIG_QCOM_APCS_IPC)	+= qcom-apcs-ipc-mailbox.o
 
 obj-$(CONFIG_TEGRA_HSP_MBOX)	+= tegra-hsp.o
diff --git a/drivers/mailbox/mailbox-mpfs.c b/drivers/mailbox/mailbox-mpfs.c
new file mode 100644
index 000000000000..0d6e2231a2c7
--- /dev/null
+++ b/drivers/mailbox/mailbox-mpfs.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip PolarFire SoC (MPFS) system controller/mailbox controller driver
+ *
+ * Copyright (c) 2020 Microchip Corporation. All rights reserved.
+ *
+ * Author: Conor Dooley <conor.dooley@microchip.com>
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mailbox_controller.h>
+#include <soc/microchip/mpfs.h>
+
+#define SERVICES_CR_OFFSET		0x50u
+#define SERVICES_SR_OFFSET		0x54u
+#define MAILBOX_REG_OFFSET		0x800u
+#define MSS_SYS_MAILBOX_DATA_OFFSET	0u
+#define SCB_MASK_WIDTH			16u
+
+/* SCBCTRL service control register */
+
+#define SCB_CTRL_REQ (0)
+#define SCB_CTRL_REQ_MASK BIT(SCB_CTRL_REQ)
+
+#define SCB_CTRL_BUSY (1)
+#define SCB_CTRL_BUSY_MASK BIT(SCB_CTRL_BUSY)
+
+#define SCB_CTRL_ABORT (2)
+#define SCB_CTRL_ABORT_MASK BIT(SCB_CTRL_ABORT)
+
+#define SCB_CTRL_NOTIFY (3)
+#define SCB_CTRL_NOTIFY_MASK BIT(SCB_CTRL_NOTIFY)
+
+#define SCB_CTRL_POS (16)
+#define SCB_CTRL_MASK GENMASK_ULL(SCB_CTRL_POS + SCB_MASK_WIDTH, SCB_CTRL_POS)
+
+/* SCBCTRL service status register */
+
+#define SCB_STATUS_REQ (0)
+#define SCB_STATUS_REQ_MASK BIT(SCB_STATUS_REQ)
+
+#define SCB_STATUS_BUSY (1)
+#define SCB_STATUS_BUSY_MASK BIT(SCB_STATUS_BUSY)
+
+#define SCB_STATUS_ABORT (2)
+#define SCB_STATUS_ABORT_MASK BIT(SCB_STATUS_ABORT)
+
+#define SCB_STATUS_NOTIFY (3)
+#define SCB_STATUS_NOTIFY_MASK BIT(SCB_STATUS_NOTIFY)
+
+#define SCB_STATUS_POS (16)
+#define SCB_STATUS_MASK GENMASK_ULL(SCB_STATUS_POS + SCB_MASK_WIDTH, SCB_STATUS_POS)
+
+struct mpfs_mbox {
+	struct mbox_controller controller;
+	struct device *dev;
+	int irq;
+	void __iomem *mbox_base;
+	void __iomem *int_reg;
+	struct mbox_chan chans[1];
+	struct mpfs_mss_response *response;
+	u16 resp_offset;
+};
+
+static bool mpfs_mbox_busy(struct mpfs_mbox *mbox)
+{
+	u32 status;
+
+	status = readl_relaxed(mbox->mbox_base + SERVICES_SR_OFFSET);
+
+	return status & SCB_STATUS_BUSY_MASK;
+}
+
+static int mpfs_mbox_send_data(struct mbox_chan *chan, void *data)
+{
+	struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
+	struct mpfs_mss_msg *msg = data;
+	u32 tx_trigger;
+	u16 opt_sel;
+	u32 val = 0u;
+
+	mbox->response = msg->response;
+	mbox->resp_offset = msg->resp_offset;
+
+	if (mpfs_mbox_busy(mbox))
+		return -EBUSY;
+
+	if (msg->cmd_data_size) {
+		u32 index;
+		u8 extra_bits = msg->cmd_data_size & 3;
+		u32 *word_buf = (u32 *)msg->cmd_data;
+
+		for (index = 0; index < (msg->cmd_data_size / 4); index++)
+			writel_relaxed(word_buf[index],
+				       mbox->mbox_base + MAILBOX_REG_OFFSET + index * 0x4);
+		if (extra_bits) {
+			u8 i;
+			u8 byte_off = ALIGN_DOWN(msg->cmd_data_size, 4);
+			u8 *byte_buf = msg->cmd_data + byte_off;
+
+			val = readl_relaxed(mbox->mbox_base +
+					    MAILBOX_REG_OFFSET + index * 0x4);
+
+			for (i = 0u; i < extra_bits; i++) {
+				val &= ~(0xffu << (i * 8u));
+				val |= (byte_buf[i] << (i * 8u));
+			}
+
+			writel_relaxed(val,
+				       mbox->mbox_base + MAILBOX_REG_OFFSET + index * 0x4);
+		}
+	}
+
+	opt_sel = ((msg->mbox_offset << 7u) | (msg->cmd_opcode & 0x7fu));
+	tx_trigger = (opt_sel << SCB_CTRL_POS) & SCB_CTRL_MASK;
+	tx_trigger |= SCB_CTRL_REQ_MASK | SCB_STATUS_NOTIFY_MASK;
+	writel_relaxed(tx_trigger, mbox->mbox_base + SERVICES_CR_OFFSET);
+
+	return 0;
+}
+
+static void mpfs_mbox_rx_data(struct mbox_chan *chan)
+{
+	struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
+	struct mpfs_mss_response *response = mbox->response;
+	u16 num_words = ALIGN((response->resp_size), (4)) / 4U;
+	u32 i;
+
+	if (!response->resp_msg) {
+		dev_err(mbox->dev, "failed to assign memory for response %d\n", -ENOMEM);
+		return;
+	}
+
+	if (!mpfs_mbox_busy(mbox)) {
+		for (i = 0; i < num_words; i++) {
+			response->resp_msg[i] =
+				readl_relaxed(mbox->mbox_base + MAILBOX_REG_OFFSET
+					      + mbox->resp_offset + i * 0x4);
+		}
+	}
+
+	mbox_chan_received_data(chan, response);
+}
+
+static irqreturn_t mpfs_mbox_inbox_isr(int irq, void *data)
+{
+	struct mbox_chan *chan = data;
+	struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
+
+	writel_relaxed(0, mbox->int_reg);
+
+	mpfs_mbox_rx_data(chan);
+
+	mbox_chan_txdone(chan, 0);
+	return IRQ_HANDLED;
+}
+
+static int mpfs_mbox_startup(struct mbox_chan *chan)
+{
+	struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
+	int ret = 0;
+
+	if (!mbox)
+		return -EINVAL;
+
+	ret = devm_request_irq(mbox->dev, mbox->irq, mpfs_mbox_inbox_isr, 0, "mpfs-mailbox", chan);
+	if (ret)
+		dev_err(mbox->dev, "failed to register mailbox interrupt:%d\n", ret);
+
+	return ret;
+}
+
+static void mpfs_mbox_shutdown(struct mbox_chan *chan)
+{
+	struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv;
+
+	devm_free_irq(mbox->dev, mbox->irq, chan);
+}
+
+static const struct mbox_chan_ops mpfs_mbox_ops = {
+	.send_data = mpfs_mbox_send_data,
+	.startup = mpfs_mbox_startup,
+	.shutdown = mpfs_mbox_shutdown,
+};
+
+static int mpfs_mbox_probe(struct platform_device *pdev)
+{
+	struct mpfs_mbox *mbox;
+	struct resource *regs;
+	int ret;
+
+	mbox = devm_kzalloc(&pdev->dev, sizeof(*mbox), GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	mbox->mbox_base = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
+	if (IS_ERR(mbox->mbox_base))
+		return PTR_ERR(mbox->mbox_base);
+
+	mbox->int_reg = devm_platform_get_and_ioremap_resource(pdev, 1, &regs);
+	if (IS_ERR(mbox->int_reg))
+		return PTR_ERR(mbox->int_reg);
+
+	mbox->irq = platform_get_irq(pdev, 0);
+	if (mbox->irq < 0)
+		return mbox->irq;
+
+	mbox->dev = &pdev->dev;
+
+	mbox->chans[0].con_priv = mbox;
+	mbox->controller.dev = mbox->dev;
+	mbox->controller.num_chans = 1;
+	mbox->controller.chans = mbox->chans;
+	mbox->controller.ops = &mpfs_mbox_ops;
+	mbox->controller.txdone_irq = true;
+
+	ret = devm_mbox_controller_register(&pdev->dev, &mbox->controller);
+	if (ret) {
+		dev_err(&pdev->dev, "Registering MPFS mailbox controller failed\n");
+		return ret;
+	}
+	dev_info(&pdev->dev, "Registered MPFS mailbox controller driver\n");
+
+	return 0;
+}
+
+static const struct of_device_id mpfs_mbox_of_match[] = {
+	{.compatible = "microchip,polarfire-soc-mailbox", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mpfs_mbox_of_match);
+
+static struct platform_driver mpfs_mbox_driver = {
+	.driver = {
+		.name = "mpfs-mailbox",
+		.of_match_table = mpfs_mbox_of_match,
+	},
+	.probe = mpfs_mbox_probe,
+};
+module_platform_driver(mpfs_mbox_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
+MODULE_DESCRIPTION("MPFS mailbox controller driver");
diff --git a/include/soc/microchip/mpfs.h b/include/soc/microchip/mpfs.h
new file mode 100644
index 000000000000..2b64c95f3be5
--- /dev/null
+++ b/include/soc/microchip/mpfs.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * Microchip PolarFire SoC (MPFS)
+ *
+ * Copyright (c) 2020 Microchip Corporation. All rights reserved.
+ *
+ * Author: Conor Dooley <conor.dooley@microchip.com>
+ *
+ */
+
+#ifndef __SOC_MPFS_H__
+#define __SOC_MPFS_H__
+
+#include <linux/types.h>
+#include <linux/of_device.h>
+
+struct mpfs_sys_controller;
+
+struct mpfs_mss_msg {
+	u8 cmd_opcode;
+	u16 cmd_data_size;
+	struct mpfs_mss_response *response;
+	u8 *cmd_data;
+	u16 mbox_offset;
+	u16 resp_offset;
+};
+
+struct mpfs_mss_response {
+	u32 resp_status;
+	u32 *resp_msg;
+	u16 resp_size;
+};
+
+#if IS_ENABLED(CONFIG_POLARFIRE_SOC_SYS_CTRL)
+
+int mpfs_blocking_transaction(struct mpfs_sys_controller *mpfs_client, void *msg);
+
+struct mpfs_sys_controller *mpfs_sys_controller_get(struct device_node *mailbox_node);
+
+#endif /* if IS_ENABLED(CONFIG_POLARFIRE_SOC_SYS_CTRL) */
+
+#endif /* __SOC_MPFS_H__ */

From c317ae30afc26112e64e832253dc780e32c7b734 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Thu, 24 Jun 2021 16:01:39 +0100
Subject: [PATCH 2987/3292] dt-bindings: add bindings for polarfire soc system
 controller

Add device tree bindings for the MSS system controller on
the Microchip PolarFire SoC.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 ...icrochip,polarfire-soc-sys-controller.yaml | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml

diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml
new file mode 100644
index 000000000000..2cd3bc6bd8d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/microchip/microchip,polarfire-soc-sys-controller.yaml
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/soc/microchip/microchip,polarfire-soc-sys-controller.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller
+
+maintainers:
+  - Conor Dooley <conor.dooley@microchip.com>
+
+description: |
+  The PolarFire SoC system controller is communicated with via a mailbox.
+  This document describes the bindings for the client portion of that mailbox.
+
+
+properties:
+  mboxes:
+    maxItems: 1
+
+  compatible:
+    const: microchip,polarfire-soc-sys-controller
+
+required:
+  - compatible
+  - mboxes
+
+additionalProperties: false
+
+examples:
+  - |
+    syscontroller: syscontroller {
+      compatible = "microchip,polarfire-soc-sys-controller";
+      mboxes = <&mbox 0>;
+    };

From 4f197188da668180d5ea7d808ae6221ce66cfe33 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Thu, 24 Jun 2021 16:01:52 +0100
Subject: [PATCH 2988/3292] MAINTAINERS: add entry for polarfire soc mailbox

Add Lewis Hanly as a maintainer for the Microchip SoC
directory and the system services mailbox driver

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2a2285bbaf13..48ba9c93972f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15706,6 +15706,14 @@ F:	arch/riscv/
 N:	riscv
 K:	riscv
 
+RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
+M:	Lewis Hanly <lewis.hanly@microchip.com>
+L:	linux-riscv@lists.infradead.org
+S:	Supported
+F:	drivers/mailbox/mailbox-mpfs.c
+F:	drivers/soc/microchip/
+F:	include/soc/microchip/mpfs.h
+
 RNBD BLOCK DRIVERS
 M:	Md. Haris Iqbal <haris.iqbal@ionos.com>
 M:	Jack Wang <jinpu.wang@ionos.com>

From c06bc5a3fb42304d815a2dc41e324b5a97c9f7da Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Sun, 27 Jun 2021 14:11:12 -0700
Subject: [PATCH 2989/3292] block/mq-deadline: Remove a WARN_ON_ONCE() call

The purpose of the WARN_ON_ONCE() statement in dd_insert_request() is to
verify that dd_prepare_request() cleared rq->elv.priv[0]. Since
dd_prepare_request() is called during request initialization but not if a
request is requeued, a warning is triggered if a request is requeued. Fix
this by removing the WARN_ON_ONCE() statement. This patch suppresses the
following kernel warning:

WARNING: CPU: 28 PID: 432 at block/mq-deadline-main.c:740 dd_insert_request+0x4d4/0x5b0
Workqueue: kblockd blk_mq_requeue_work
Call Trace:
 dd_insert_requests+0xfa/0x130
 blk_mq_sched_insert_request+0x22c/0x240
 blk_mq_requeue_work+0x21c/0x2d0
 process_one_work+0x4c2/0xa70
 worker_thread+0x2e5/0x6d0
 kthread+0x21c/0x250
 ret_from_fork+0x1f/0x30

Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Fixes: 08a9ad8bf607 ("block/mq-deadline: Add cgroup support")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210627211112.12720-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline-main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/mq-deadline-main.c b/block/mq-deadline-main.c
index 9db6da9ef4c6..6f612e6dc82b 100644
--- a/block/mq-deadline-main.c
+++ b/block/mq-deadline-main.c
@@ -740,7 +740,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	dd_count(dd, inserted, prio);
 	blkcg = dd_blkcg_from_bio(rq->bio);
 	ddcg_count(blkcg, inserted, ioprio_class);
-	WARN_ON_ONCE(rq->elv.priv[0]);
 	rq->elv.priv[0] = blkcg;
 
 	if (blk_mq_sched_try_insert_merge(q, rq, &free)) {

From 2d0a9eb23ccfdf11308bec6db0bc007585d919d2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 26 Jun 2021 22:44:11 +0200
Subject: [PATCH 2990/3292] time/kunit: Add missing MODULE_LICENSE()

[ mingo: MODULE_LICENSE() takes a string. ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/time_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c
index 341ebfad5e99..831e8e779ace 100644
--- a/kernel/time/time_test.c
+++ b/kernel/time/time_test.c
@@ -96,3 +96,4 @@ static struct kunit_suite time_test_suite = {
 };
 
 kunit_test_suite(time_test_suite);
+MODULE_LICENSE("GPL");

From 1bcad8e510b27ad843315ab2c27ccf459e3acded Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 22 Jun 2021 15:07:54 -0500
Subject: [PATCH 2991/3292] platform/x86: think-lmi: Fix issues with duplicate
 attributes

On an AMD based Lenovo T14, I find that the module doesn't work at
all, and instead has a traceback with messages like:

```
sysfs: cannot create duplicate filename '/devices/virtual/firmware-attributes/thinklmi/attributes/Reserved'
```

Duplicate and reserved values showing up appear to be a firmware bug,
but they shouldn't make the driver explode.  So catch them and skip
them.

Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20210622200755.12379-2-mario.limonciello@amd.com
[hdegoede@redhat.com: Add missing kfree(tlmi_priv.setting[i])]
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index b57061079288..bbfcaf23d420 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -691,6 +691,17 @@ static int tlmi_sysfs_init(void)
 		if (!tlmi_priv.setting[i])
 			continue;
 
+		/* check for duplicate or reserved values */
+		if (kset_find_obj(tlmi_priv.attribute_kset, tlmi_priv.setting[i]->display_name) ||
+		    !strcmp(tlmi_priv.setting[i]->display_name, "Reserved")) {
+			pr_debug("duplicate or reserved attribute name found - %s\n",
+				tlmi_priv.setting[i]->display_name);
+			kfree(tlmi_priv.setting[i]->possible_values);
+			kfree(tlmi_priv.setting[i]);
+			tlmi_priv.setting[i] = NULL;
+			continue;
+		}
+
 		/* Build attribute */
 		tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
 		ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,

From 0fdf10e5fc964c315cf131a2eaab9cc531a9f40f Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 22 Jun 2021 15:07:55 -0500
Subject: [PATCH 2992/3292] platform/x86: think-lmi: Split current_value to
 reflect only the value

Currently attributes will show things like:
`BootOrderLock,Disable`
rather than just
`Disable`.

Of course this works, but the attribute is intended to be read by
userspace tools and not require further processing.  That is a userspace
tool can display a drop down of `possible_values` and `current_value` is
one of them from the list.

This also aligns `think-lmi` with how `dell-wmi-sysman` works.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20210622200755.12379-3-mario.limonciello@amd.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index bbfcaf23d420..4cab341a3538 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -492,14 +492,19 @@ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *at
 static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 {
 	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
-	char *item;
+	char *item, *value;
 	int ret;
 
 	ret = tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID);
 	if (ret)
 		return ret;
 
-	ret = sysfs_emit(buf, "%s\n", item);
+	/* validate and split from `item,value` -> `value` */
+	value = strpbrk(item, ",");
+	if (!value || value == item || !strlen(value + 1))
+		return -EINVAL;
+
+	ret = sysfs_emit(buf, "%s\n", value + 1);
 	kfree(item);
 	return ret;
 }

From 23dcd7497c227a16acdda5e44f141fdc1e660f94 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 28 Jun 2021 11:36:00 +0200
Subject: [PATCH 2993/3292] platform/x86: think-lmi: Move
 kfree(setting->possible_values) to tlmi_attr_setting_release()

We must not free the possible_values string before we have called
sysfs_remove_group(kobj, &tlmi_attr_group) otherwise there is a race
where a sysfs read of possible_values could reference the free-ed
memory.

Move the kfree(setting->possible_values) together with the free of the
actual tlmi_attr_setting struct to avoid this race.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 4cab341a3538..3671b5d20613 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -626,6 +626,7 @@ static void tlmi_attr_setting_release(struct kobject *kobj)
 {
 	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
 
+	kfree(setting->possible_values);
 	kfree(setting);
 }
 
@@ -654,7 +655,6 @@ static void tlmi_release_attr(void)
 	/* Attribute structures */
 	for (i = 0; i < TLMI_SETTINGS_COUNT; i++) {
 		if (tlmi_priv.setting[i]) {
-			kfree(tlmi_priv.setting[i]->possible_values);
 			sysfs_remove_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group);
 			kobject_put(&tlmi_priv.setting[i]->kobj);
 		}

From caf23895ce96e90d8667328144344263ff0e7f1f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 25 Jun 2021 16:01:04 +0300
Subject: [PATCH 2994/3292] platform/x86: intel_skl_int3472: Uninitialized
 variable in skl_int3472_handle_gpio_resources()

This function returns negative error codes, zero (to indicate that
everything has been completed successfully) and one (to indicate that
more resources need to be handled still).

This code prints an uninitialized error message when the function
returns one which potentially leads to an Oops.

Fixes: 5de691bffe57 ("platform/x86: Add intel_skl_int3472 driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Link: https://lore.kernel.org/r/YNXTkLNtiTDlFlZa@mwanda
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../platform/x86/intel/int3472/intel_skl_int3472_discrete.c   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c b/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c
index 17c6fe830765..9fe0a2527e1c 100644
--- a/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c
+++ b/drivers/platform/x86/intel/int3472/intel_skl_int3472_discrete.c
@@ -286,10 +286,10 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
 	int3472->ngpios++;
 	ACPI_FREE(obj);
 
-	if (ret)
+	if (ret < 0)
 		return dev_err_probe(int3472->dev, ret, err_msg);
 
-	return 0;
+	return ret;
 }
 
 static int skl_int3472_parse_crs(struct int3472_discrete_device *int3472)

From 0e695c3f7f66c66e0a1da90cc5378198a656d494 Mon Sep 17 00:00:00 2001
From: Prasanth KSR <prasanth.ksr@dell.com>
Date: Mon, 28 Jun 2021 14:19:06 +0530
Subject: [PATCH 2995/3292] platform/x86: dell-wmi-sysman: Change user
 experience when Admin/System Password is modified

Whenever user has changed an Admin/System Password using the sysfs,
then we are automatically copying the new password to existing
password field.

Co-developed-by: Divya Bharathi <divya.bharathi@dell.com>
Signed-off-by: Divya Bharathi <divya.bharathi@dell.com>
Signed-off-by: Prasanth KSR <prasanth.ksr@dell.com>
Link: https://lore.kernel.org/r/20210628084906.4233-1-prasanth.ksr@dell.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 .../x86/dell/dell-wmi-sysman/passwordattr-interface.c         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c
index 339a082d6c18..86ec962aace9 100644
--- a/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c
+++ b/drivers/platform/x86/dell/dell-wmi-sysman/passwordattr-interface.c
@@ -95,9 +95,9 @@ int set_new_password(const char *password_type, const char *new)
 
 	print_hex_dump_bytes("set new password data: ", DUMP_PREFIX_NONE, buffer, buffer_size);
 	ret = call_password_interface(wmi_priv.password_attr_wdev, buffer, buffer_size);
-	/* clear current_password here and use user input from wmi_priv.current_password */
+	/* on success copy the new password to current password */
 	if (!ret)
-		memset(current_password, 0, MAX_BUFF);
+		strscpy(current_password, new, MAX_BUFF);
 	/* explain to user the detailed failure reason */
 	else if (ret == -EOPNOTSUPP)
 		dev_err(&wmi_priv.password_attr_wdev->dev, "admin password must be configured\n");

From 0f1616f6df294fe460432630850c0e0a8c30a192 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 20 May 2021 14:52:46 +0200
Subject: [PATCH 2996/3292] gfs2: Fix do_gfs2_set_flags description

Commit 88b631cbfbeb ("gfs2: convert to fileattr") changed the argument list
without updating the description.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 493a83e3f590..7b757195fd97 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -210,7 +210,7 @@ void gfs2_set_inode_flags(struct inode *inode)
 
 /**
  * do_gfs2_set_flags - set flags on an inode
- * @filp: file pointer
+ * @inode: The inode
  * @reqflags: The flags to set
  * @mask: Indicates which flags are valid
  * @fsflags: The FS_* inode flags passed in

From 38a618dbf47f837f11df01052977dcaf31c5c2a8 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Tue, 8 Jun 2021 11:12:44 +0800
Subject: [PATCH 2997/3292] gfs2: Use list_move_tail instead of
 list_del/list_add_tail

Using list_move_tail() instead of list_del() + list_add_tail().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d9cb261f55b0..1f3902ecdded 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -212,8 +212,7 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
 
 	spin_lock(&lru_lock);
 
-	list_del(&gl->gl_lru);
-	list_add_tail(&gl->gl_lru, &lru_list);
+	list_move_tail(&gl->gl_lru, &lru_list);
 
 	if (!test_bit(GLF_LRU, &gl->gl_flags)) {
 		set_bit(GLF_LRU, &gl->gl_flags);

From d3c51c55cb9274dd43c156f1f26b5eb4d5f2d58c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 21 Jun 2021 22:28:50 +0200
Subject: [PATCH 2998/3292] gfs2: Fix underflow in gfs2_page_mkwrite

On filesystems with a block size smaller than PAGE_SIZE and non-empty
files smaller then PAGE_SIZE, gfs2_page_mkwrite could end up allocating
excess blocks beyond the end of the file, similar to fallocate.  This
doesn't make sense; fix it.

Reported-by: Bob Peterson <rpeterso@redhat.com>
Fixes: 184b4e60853d ("gfs2: Fix end-of-file handling in gfs2_page_mkwrite")
Cc: stable@vger.kernel.org # v5.5+
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7b757195fd97..37041873bb95 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -450,8 +450,8 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 	file_update_time(vmf->vma->vm_file);
 
 	/* page is wholly or partially inside EOF */
-	if (offset > size - PAGE_SIZE)
-		length = offset_in_page(size);
+	if (size - offset < PAGE_SIZE)
+		length = size - offset;
 	else
 		length = PAGE_SIZE;
 

From 5d49d3508b3c67201bd3e1bf7f4ef049111b7051 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 28 Jun 2021 19:14:50 +0800
Subject: [PATCH 2999/3292] gfs2: Fix error handling in init_statfs

On an error path, init_statfs calls iput(pn) after pn has already been put.
Fix that by setting pn to NULL after the initial iput.

Fixes: 97fd734ba17e ("gfs2: lookup local statfs inodes prior to journal recovery")
Cc: stable@vger.kernel.org # v5.10+
Reported-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/ops_fstype.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 826f77d9cff5..5f4504dd0875 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -687,6 +687,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
 	}
 
 	iput(pn);
+	pn = NULL;
 	ip = GFS2_I(sdp->sd_sc_inode);
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
 				   &sdp->sd_sc_gh);

From 328aac5ecd119ede3633f7d17969b1ff34ccc784 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Tue, 22 Jun 2021 16:30:26 +0530
Subject: [PATCH 3000/3292] bpf, x86: Fix extable offset calculation

Commit 4c5de127598e1 ("bpf: Emit explicit NULL pointer checks for PROBE_LDX
instructions.") is emitting a couple of instructions before the actual load.
Consider those additional instructions while calculating extable offset.

Fixes: 4c5de127598e1 ("bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.")
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210622110026.1157847-1-ravi.bangoria@linux.ibm.com
---
 arch/x86/net/bpf_jit_comp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index db1e83813db5..e835164189f1 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1281,7 +1281,7 @@ st:			if (is_imm8(insn->off))
 			emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
 				struct exception_table_entry *ex;
-				u8 *_insn = image + proglen;
+				u8 *_insn = image + proglen + (start_of_ldx - temp);
 				s64 delta;
 
 				/* populate jmp_offset for JMP above */

From 1c35b07e6d3986474e5635be566e7bc79d97c64d Mon Sep 17 00:00:00 2001
From: Odin Ugedal <odin@uged.al>
Date: Thu, 24 Jun 2021 13:18:15 +0200
Subject: [PATCH 3001/3292] sched/fair: Ensure _sum and _avg values stay
 consistent

The _sum and _avg values are in general sync together with the PELT
divider. They are however not always completely in perfect sync,
resulting in situations where _sum gets to zero while _avg stays
positive. Such situations are undesirable.

This comes from the fact that PELT will increase period_contrib, also
increasing the PELT divider, without updating _sum and _avg values to
stay in perfect sync where (_sum == _avg * divider). However, such PELT
change will never lower _sum, making it impossible to end up in a
situation where _sum is zero and _avg is not.

Therefore, we need to ensure that when subtracting load outside PELT,
that when _sum is zero, _avg is also set to zero. This occurs when
(_sum < _avg * divider), and the subtracted (_avg * divider) is bigger
or equal to the current _sum, while the subtracted _avg is smaller than
the current _avg.

Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Odin Ugedal <odin@uged.al>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Link: https://lore.kernel.org/r/20210624111815.57937-1-odin@uged.al
---
 kernel/sched/fair.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4a3e61a88acc..45edf61eed73 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3657,15 +3657,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 
 		r = removed_load;
 		sub_positive(&sa->load_avg, r);
-		sub_positive(&sa->load_sum, r * divider);
+		sa->load_sum = sa->load_avg * divider;
 
 		r = removed_util;
 		sub_positive(&sa->util_avg, r);
-		sub_positive(&sa->util_sum, r * divider);
+		sa->util_sum = sa->util_avg * divider;
 
 		r = removed_runnable;
 		sub_positive(&sa->runnable_avg, r);
-		sub_positive(&sa->runnable_sum, r * divider);
+		sa->runnable_sum = sa->runnable_avg * divider;
 
 		/*
 		 * removed_runnable is the unweighted version of removed_load so we

From 95b861a7935bf75f647959073093ab8058b88c26 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Sun, 27 Jun 2021 08:36:27 -0700
Subject: [PATCH 3002/3292] bpf: Allow bpf_get_current_ancestor_cgroup_id for
 tracing

Allow the helper to be called from tracing programs. This is needed to
handle cgroup hiererachies in the program.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210627153627.824198-1-namhyung@kernel.org
---
 kernel/trace/bpf_trace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7a52bc172841..64bd2d84367f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1017,6 +1017,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_CGROUPS
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;
+	case BPF_FUNC_get_current_ancestor_cgroup_id:
+		return &bpf_get_current_ancestor_cgroup_id_proto;
 #endif
 	case BPF_FUNC_send_signal:
 		return &bpf_send_signal_proto;

From ccff81e1d028bbbf8573d3364a87542386c707bf Mon Sep 17 00:00:00 2001
From: Rustam Kovhaev <rkovhaev@gmail.com>
Date: Sat, 26 Jun 2021 11:11:56 -0700
Subject: [PATCH 3003/3292] bpf: Fix false positive kmemleak report in
 bpf_ringbuf_area_alloc()

kmemleak scans struct page, but it does not scan the page content. If we
allocate some memory with kmalloc(), then allocate page with alloc_page(),
and if we put kmalloc pointer somewhere inside that page, kmemleak will
report kmalloc pointer as a false positive.

We can instruct kmemleak to scan the memory area by calling kmemleak_alloc()
and kmemleak_free(), but part of struct bpf_ringbuf is mmaped to user space,
and if struct bpf_ringbuf changes we would have to revisit and review size
argument in kmemleak_alloc(), because we do not want kmemleak to scan the
user space memory. Let's simplify things and use kmemleak_not_leak() here.

For posterity, also adding additional prior analysis from Andrii:

  I think either kmemleak or syzbot are misreporting this. I've added a
  bunch of printks around all allocations performed by BPF ringbuf. [...]
  On repro side I get these two warnings:

  [vmuser@archvm bpf]$ sudo ./repro
  BUG: memory leak
  unreferenced object 0xffff88810d538c00 (size 64):
    comm "repro", pid 2140, jiffies 4294692933 (age 14.540s)
    hex dump (first 32 bytes):
      00 af 19 04 00 ea ff ff c0 ae 19 04 00 ea ff ff  ................
      80 ae 19 04 00 ea ff ff c0 29 2e 04 00 ea ff ff  .........)......
    backtrace:
      [<0000000077bfbfbd>] __bpf_map_area_alloc+0x31/0xc0
      [<00000000587fa522>] ringbuf_map_alloc.cold.4+0x48/0x218
      [<0000000044d49e96>] __do_sys_bpf+0x359/0x1d90
      [<00000000f601d565>] do_syscall_64+0x2d/0x40
      [<0000000043d3112a>] entry_SYSCALL_64_after_hwframe+0x44/0xae

  BUG: memory leak
  unreferenced object 0xffff88810d538c80 (size 64):
    comm "repro", pid 2143, jiffies 4294699025 (age 8.448s)
    hex dump (first 32 bytes):
      80 aa 19 04 00 ea ff ff 00 ab 19 04 00 ea ff ff  ................
      c0 ab 19 04 00 ea ff ff 80 44 28 04 00 ea ff ff  .........D(.....
    backtrace:
      [<0000000077bfbfbd>] __bpf_map_area_alloc+0x31/0xc0
      [<00000000587fa522>] ringbuf_map_alloc.cold.4+0x48/0x218
      [<0000000044d49e96>] __do_sys_bpf+0x359/0x1d90
      [<00000000f601d565>] do_syscall_64+0x2d/0x40
      [<0000000043d3112a>] entry_SYSCALL_64_after_hwframe+0x44/0xae

  Note that both reported leaks (ffff88810d538c80 and ffff88810d538c00)
  correspond to pages array bpf_ringbuf is allocating and tracking properly
  internally. Note also that syzbot repro doesn't close FD of created BPF
  ringbufs, and even when ./repro itself exits with error, there are still
  two forked processes hanging around in my system. So clearly ringbuf maps
  are alive at that point. So reporting any memory leak looks weird at that
  point, because that memory is being used by active referenced BPF ringbuf.

  It's also a question why repro doesn't clean up its forks. But if I do a
  `pkill repro`, I do see that all the allocated memory is /properly/ cleaned
  up [and the] "leaks" are deallocated properly.

  BTW, if I add close() right after bpf() syscall in syzbot repro, I see that
  everything is immediately deallocated, like designed. And no memory leak
  is reported. So I don't think the problem is anywhere in bpf_ringbuf code,
  rather in the leak detection and/or repro itself.

Reported-by: syzbot+5d895828587f49e7fe9b@syzkaller.appspotmail.com
Signed-off-by: Rustam Kovhaev <rkovhaev@gmail.com>
[ Daniel: also included analysis from Andrii to the commit log ]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: syzbot+5d895828587f49e7fe9b@syzkaller.appspotmail.com
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/CAEf4BzYk+dqs+jwu6VKXP-RttcTEGFe+ySTGWT9CRNkagDiJVA@mail.gmail.com
Link: https://lore.kernel.org/lkml/YNTAqiE7CWJhOK2M@nuc10
Link: https://lore.kernel.org/lkml/20210615101515.GC26027@arm.com
Link: https://syzkaller.appspot.com/bug?extid=5d895828587f49e7fe9b
Link: https://lore.kernel.org/bpf/20210626181156.1873604-1-rkovhaev@gmail.com
---
 kernel/bpf/ringbuf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 84b3b35fc0d0..9e0c10c6892a 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -8,6 +8,7 @@
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
 #include <linux/poll.h>
+#include <linux/kmemleak.h>
 #include <uapi/linux/btf.h>
 
 #define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
@@ -105,6 +106,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
 	rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages,
 		  VM_ALLOC | VM_USERMAP, PAGE_KERNEL);
 	if (rb) {
+		kmemleak_not_leak(pages);
 		rb->pages = pages;
 		rb->nr_pages = nr_pages;
 		return rb;

From 8b0ed8443ae6458786580d36b7d5f8125535c5d4 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Fri, 2 Apr 2021 17:11:45 +0800
Subject: [PATCH 3004/3292] writeback: fix obtain a reference to a freeing
 memcg css

The caller of wb_get_create() should pin the memcg, because
wb_get_create() relies on this guarantee. The rcu read lock
only can guarantee that the memcg css returned by css_from_id()
cannot be released, but the reference of the memcg can be zero.

  rcu_read_lock()
  memcg_css = css_from_id()
  wb_get_create(memcg_css)
      cgwb_create(memcg_css)
          // css_get can change the ref counter from 0 back to 1
          css_get(memcg_css)
  rcu_read_unlock()

Fix it by holding a reference to the css before calling
wb_get_create(). This is not a problem I encountered in the
real world. Just the result of a code review.

Fixes: 682aa8e1a6a1 ("writeback: implement unlocked_inode_to_wb transaction and use it for stat updates")
Link: https://lore.kernel.org/r/20210402091145.80635-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e91980f49388..d52c291d7bcd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -508,9 +508,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	/* find and pin the new wb */
 	rcu_read_lock();
 	memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
-	if (memcg_css)
-		isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+	if (memcg_css && !css_tryget(memcg_css))
+		memcg_css = NULL;
 	rcu_read_unlock();
+	if (!memcg_css)
+		goto out_free;
+
+	isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+	css_put(memcg_css);
 	if (!isw->new_wb)
 		goto out_free;
 

From fade56410c22cacafb1be9f911a0afd3701d8366 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Fri, 25 Jun 2021 19:21:39 +0300
Subject: [PATCH 3005/3292] net: lwtunnel: handle MTU calculation in forwading

Commit 14972cbd34ff ("net: lwtunnel: Handle fragmentation") moved
fragmentation logic away from lwtunnel by carry encap headroom and
use it in output MTU calculation. But the forwarding part was not
covered and created difference in MTU for output and forwarding and
further to silent drops on ipv4 forwarding path. Fix it by taking
into account lwtunnel encap headroom.

The same commit also introduced difference in how to treat RTAX_MTU
in IPv4 and IPv6 where latter explicitly removes lwtunnel encap
headroom from route MTU. Make IPv4 version do the same.

Fixes: 14972cbd34ff ("net: lwtunnel: Handle fragmentation")
Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h        | 12 ++++++++----
 include/net/ip6_route.h | 16 ++++++++++++----
 net/ipv4/route.c        |  3 ++-
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index e20874059f82..d9683bef8684 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -31,6 +31,7 @@
 #include <net/flow.h>
 #include <net/flow_dissector.h>
 #include <net/netns/hash.h>
+#include <net/lwtunnel.h>
 
 #define IPV4_MAX_PMTU		65535U		/* RFC 2675, Section 5.1 */
 #define IPV4_MIN_MTU		68			/* RFC 791 */
@@ -445,22 +446,25 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 
 	/* 'forwarding = true' case should always honour route mtu */
 	mtu = dst_metric_raw(dst, RTAX_MTU);
-	if (mtu)
-		return mtu;
+	if (!mtu)
+		mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
 
-	return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
+	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
 static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
 					  const struct sk_buff *skb)
 {
+	unsigned int mtu;
+
 	if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
 		bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
 
 		return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
 	}
 
-	return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+	mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+	return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
 }
 
 struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index f51a118bfce8..f14149df5a65 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -265,11 +265,18 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 {
+	int mtu;
+
 	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 				inet6_sk(skb->sk) : NULL;
 
-	return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ?
-	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+	if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+		mtu = READ_ONCE(skb_dst(skb)->dev->mtu);
+		mtu -= lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
+	} else
+		mtu = dst_mtu(skb_dst(skb));
+
+	return mtu;
 }
 
 static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
@@ -317,7 +324,7 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 	if (dst_metric_locked(dst, RTAX_MTU)) {
 		mtu = dst_metric_raw(dst, RTAX_MTU);
 		if (mtu)
-			return mtu;
+			goto out;
 	}
 
 	mtu = IPV6_MIN_MTU;
@@ -327,7 +334,8 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 		mtu = idev->cnf.mtu6;
 	rcu_read_unlock();
 
-	return mtu;
+out:
+	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
 u32 ip6_mtu_from_fib6(const struct fib6_result *res,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a36ac98476f..78d1e5afc452 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1306,7 +1306,7 @@ INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
 		mtu = dst_metric_raw(dst, RTAX_MTU);
 
 	if (mtu)
-		return mtu;
+		goto out;
 
 	mtu = READ_ONCE(dst->dev->mtu);
 
@@ -1315,6 +1315,7 @@ INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
 			mtu = 576;
 	}
 
+out:
 	mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
 
 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);

From 99c8719b79814cab3fd43519591dcc41c978a48c Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 25 Jun 2021 15:33:01 +0200
Subject: [PATCH 3006/3292] bareudp: allow redirecting bareudp packets to eth
 devices

Even though bareudp transports L3 data (typically IP or MPLS), it needs
to reset the mac_header pointer, so that other parts of the stack don't
mistakenly access the outer header after the packet has been
decapsulated.

This allows to push an Ethernet header to bareudp packets and redirect
them to an Ethernet device:

  $ tc filter add dev bareudp0 ingress matchall      \
      action vlan push_eth dst_mac 00:00:5e:00:53:01 \
                           src_mac 00:00:5e:00:53:00 \
      action mirred egress redirect dev eth0

Without this patch, push_eth refuses to add an ethernet header because
the skb appears to already have a MAC header.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bareudp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index edfad93e7b68..a7ee0af1af90 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -133,6 +133,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	skb->dev = bareudp->dev;
 	oiph = skb_network_header(skb);
 	skb_reset_network_header(skb);
+	skb_reset_mac_header(skb);
 
 	if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET)
 		err = IP_ECN_decapsulate(oiph, skb);

From 7ad136fd288c0e0177eb29e04ec289e1b873b270 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 25 Jun 2021 15:33:04 +0200
Subject: [PATCH 3007/3292] ipip: allow redirecting ipip and mplsip packets to
 eth devices

Even though ipip transports IPv4 or MPLS packets, it needs to reset the
mac_header pointer, so that other parts of the stack don't mistakenly
access the outer header after the packet has been decapsulated.

This allows to push an Ethernet header to ipip or mplsip packets and
redirect them to an Ethernet device:

  $ tc filter add dev ipip0 ingress matchall         \
      action vlan push_eth dst_mac 00:00:5e:00:53:01 \
                           src_mac 00:00:5e:00:53:00 \
      action mirred egress redirect dev eth0

Without this patch, push_eth refuses to add an ethernet header because
the skb appears to already have a MAC header.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipip.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index d5bfa087c23a..266c65577ba6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -242,6 +242,8 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 			if (!tun_dst)
 				return 0;
 		}
+		skb_reset_mac_header(skb);
+
 		return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 	}
 

From 730eed2772e740c30229d03e3d578cc00a5ae304 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 25 Jun 2021 15:33:08 +0200
Subject: [PATCH 3008/3292] sit: allow redirecting ip6ip, ipip and mplsip
 packets to eth devices

Even though sit transports L3 data (IPv6, IPv4 or MPLS) packets, it
needs to reset the mac_header pointer, so that other parts of the stack
don't mistakenly access the outer header after the packet has been
decapsulated. There are two rx handlers to modify: ipip6_rcv() for the
ip6ip mode and sit_tunnel_rcv() which is used to re-implement the ipip
and mplsip modes of ipip.ko.

This allows to push an Ethernet header to sit packets and redirect
them to an Ethernet device:

  $ tc filter add dev sit0 ingress matchall          \
      action vlan push_eth dst_mac 00:00:5e:00:53:01 \
                           src_mac 00:00:5e:00:53:00 \
      action mirred egress redirect dev eth0

Without this patch, push_eth refuses to add an ethernet header because
the skb appears to already have a MAC header.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e0a39b0bb4c1..df5bea818410 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb)
 		 * old iph is no longer valid
 		 */
 		iph = (const struct iphdr *)skb_mac_header(skb);
+		skb_reset_mac_header(skb);
+
 		err = IP_ECN_decapsulate(iph, skb);
 		if (unlikely(err)) {
 			if (log_ecn_error)
@@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 			tpi = &ipip_tpi;
 		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
+		skb_reset_mac_header(skb);
+
 		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 	}
 

From aab1e898c26c3e4289c62b6d6482948672fab939 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 25 Jun 2021 15:33:13 +0200
Subject: [PATCH 3009/3292] gre: let mac_header point to outer header only when
 necessary

Commit e271c7b4420d ("gre: do not keep the GRE header around in collect
medata mode") did reset the mac_header for the collect_md case. Let's
extend this behaviour to classical gre devices as well.

ipgre_header_parse() seems to be the only case that requires mac_header
to point to the outer header. We can detect this case accurately by
checking ->header_ops. For all other cases, we can reset mac_header.

This allows to push an Ethernet header to ipgre packets and redirect
them to an Ethernet device:

  $ tc filter add dev gre0 ingress matchall          \
      action vlan push_eth dst_mac 00:00:5e:00:53:01 \
                           src_mac 00:00:5e:00:53:00 \
      action mirred egress redirect dev eth0

Before this patch, this worked only for collect_md gre devices.
Now this works for regular gre devices as well. Only the special case
of gre devices that use ipgre_header_ops isn't supported.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a68bf4c6fe9b..12dca0c85f3c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -107,6 +107,8 @@ module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
+static const struct header_ops ipgre_header_ops;
+
 static int ipgre_tunnel_init(struct net_device *dev);
 static void erspan_build_header(struct sk_buff *skb,
 				u32 id, u32 index,
@@ -364,7 +366,10 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 					   raw_proto, false) < 0)
 			goto drop;
 
-		if (tunnel->dev->type != ARPHRD_NONE)
+		/* Special case for ipgre_header_parse(), which expects the
+		 * mac_header to point to the outer IP header.
+		 */
+		if (tunnel->dev->header_ops == &ipgre_header_ops)
 			skb_pop_mac_header(skb);
 		else
 			skb_reset_mac_header(skb);

From da5a2e49f064a86a3b102b20c545f855a7298394 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 25 Jun 2021 15:33:17 +0200
Subject: [PATCH 3010/3292] ip6_tunnel: allow redirecting ip6gre and ipxip6
 packets to eth devices

Reset the mac_header pointer even when the tunnel transports only L3
data (in the ARPHRD_ETHER case, this is already done by eth_type_trans).
This prevents other parts of the stack from mistakenly accessing the
outer header after the packet has been decapsulated.

In practice, this allows to push an Ethernet header to ipip6, ip6ip6,
mplsip6 or ip6gre packets and redirect them to an Ethernet device:

  $ tc filter add dev ip6tnl0 ingress matchall       \
      action vlan push_eth dst_mac 00:00:5e:00:53:01 \
                           src_mac 00:00:5e:00:53:00 \
      action mirred egress redirect dev eth0

Without this patch, push_eth refuses to add an ethernet header because
the skb appears to already have a MAC header.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 288bafded998..0b8a38687ce4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 	} else {
 		skb->dev = tunnel->dev;
+		skb_reset_mac_header(skb);
 	}
 
 	skb_reset_network_header(skb);

From b2d898c8a523f44ee7b3eea608e81a6e2264579f Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 25 Jun 2021 15:33:23 +0200
Subject: [PATCH 3011/3292] gtp: reset mac_header after decap

For consistency with other L3 tunnel devices, reset the mac_header
pointer after decapsulation. This makes the mac_header 0 bytes long,
thus making it clear that this skb has no mac_header.

Compile tested only.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 1c9023d47e00..30e0a10595a1 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -201,6 +201,7 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
 	 * calculate the transport header.
 	 */
 	skb_reset_network_header(skb);
+	skb_reset_mac_header(skb);
 
 	skb->dev = pctx->dev;
 

From 3f2db250099f46988088800052cdf2332c7aba61 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 25 Jun 2021 23:23:48 +0300
Subject: [PATCH 3012/3292] net: sched: fix warning in
 tcindex_alloc_perfect_hash

Syzbot reported warning in tcindex_alloc_perfect_hash. The problem
was in too big cp->hash, which triggers warning in kmalloc. Since
cp->hash comes from userspace, there is no need to warn if value
is not correct

Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()")
Reported-and-tested-by: syzbot+1071ad60cd7df39fdadb@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_tcindex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index c4007b9cd16d..5b274534264c 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -304,7 +304,7 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
 	int i, err = 0;
 
 	cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
-			      GFP_KERNEL);
+			      GFP_KERNEL | __GFP_NOWARN);
 	if (!cp->perfect)
 		return -ENOMEM;
 

From c4512c63b1193c73b3f09c598a6d0a7f88da1dd8 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Fri, 25 Jun 2021 14:25:22 -0700
Subject: [PATCH 3013/3292] mptcp: fix 'masking a bool' warning

Dan Carpenter reported an issue introduced in
commit fde56eea01f9 ("mptcp: refine mptcp_cleanup_rbuf") where a new
boolean (ack_pending) is masked with 0x9.

This is not the intention to ignore values by using a boolean. This
variable should not have a 'bool' type: we should keep the 'u8' to allow
this comparison.

Fixes: fde56eea01f9 ("mptcp: refine mptcp_cleanup_rbuf")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ce0c45dfb79e..7bb82424e551 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -455,7 +455,7 @@ static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
 static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
 {
 	const struct inet_connection_sock *icsk = inet_csk(ssk);
-	bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+	u8 ack_pending = READ_ONCE(icsk->icsk_ack.pending);
 	const struct tcp_sock *tp = tcp_sk(ssk);
 
 	return (ack_pending & ICSK_ACK_SCHED) &&

From 5c0de3d72f8c05678ed769bea24e98128f7ab570 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 28 Jun 2021 09:59:37 -0400
Subject: [PATCH 3014/3292] dm writecache: make writeback pause configurable

Commit 95b88f4d71cb953e02206be3c757083601391a0f ("dm writecache: pause
writeback if cache full and origin being written directly") introduced a
code that pauses cache flushing if we are issuing writes directly to the
origin.

Improve that initial commit by making the timeout code configurable
(via the option "pause_writeback"). Also change the default from 1s to
3s because it performed better.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 .../admin-guide/device-mapper/writecache.rst  |  5 +-
 drivers/md/dm-io-tracker.h                    | 12 +++++
 drivers/md/dm-writecache.c                    | 48 +++++++++++++++----
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst
index 977f82b5a811..65427d8dfca6 100644
--- a/Documentation/admin-guide/device-mapper/writecache.rst
+++ b/Documentation/admin-guide/device-mapper/writecache.rst
@@ -12,7 +12,6 @@ first sector should contain valid superblock from previous invocation.
 Constructor parameters:
 
 1. type of the cache device - "p" or "s"
-
 	- p - persistent memory
 	- s - SSD
 2. the underlying device that will be cached
@@ -21,7 +20,6 @@ Constructor parameters:
    size)
 5. the number of optional parameters (the parameters with an argument
    count as two)
-
 	start_sector n		(default: 0)
 		offset from the start of cache device in 512-byte sectors
 	high_watermark n	(default: 50)
@@ -71,6 +69,9 @@ Constructor parameters:
 	metadata_only
 		only metadata is promoted to the cache. This option
 		improves performance for heavier REQ_META workloads.
+	pause_writeback n	(default: 3000)
+		pause writeback if there was some write I/O redirected to
+		the origin volume in the last n milliseconds
 
 Status:
 1. error indicator - 0 if there was no error, otherwise error number
diff --git a/drivers/md/dm-io-tracker.h b/drivers/md/dm-io-tracker.h
index 1dcf01f9f066..bdcc6273ebf0 100644
--- a/drivers/md/dm-io-tracker.h
+++ b/drivers/md/dm-io-tracker.h
@@ -45,6 +45,18 @@ static inline bool dm_iot_idle_for(struct dm_io_tracker *iot, unsigned long j)
 	return r;
 }
 
+static inline unsigned long dm_iot_idle_time(struct dm_io_tracker *iot)
+{
+	unsigned long r = 0;
+
+	spin_lock_irq(&iot->lock);
+	if (!iot->in_flight)
+		r = jiffies - iot->idle_time;
+	spin_unlock_irq(&iot->lock);
+
+	return r;
+}
+
 static inline void dm_iot_io_begin(struct dm_io_tracker *iot, sector_t len)
 {
 	spin_lock_irq(&iot->lock);
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index d70342c9003a..e21e29e81bbf 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -30,6 +30,7 @@
 #define AUTOCOMMIT_MSEC			1000
 #define MAX_AGE_DIV			16
 #define MAX_AGE_UNSPECIFIED		-1UL
+#define PAUSE_WRITEBACK			(HZ * 3)
 
 #define BITMAP_GRANULARITY	65536
 #if BITMAP_GRANULARITY < PAGE_SIZE
@@ -125,6 +126,7 @@ struct dm_writecache {
 	size_t freelist_high_watermark;
 	size_t freelist_low_watermark;
 	unsigned long max_age;
+	unsigned long pause;
 
 	unsigned uncommitted_blocks;
 	unsigned autocommit_blocks;
@@ -174,11 +176,13 @@ struct dm_writecache {
 	bool cleaner:1;
 	bool cleaner_set:1;
 	bool metadata_only:1;
+	bool pause_set:1;
 
 	unsigned high_wm_percent_value;
 	unsigned low_wm_percent_value;
 	unsigned autocommit_time_value;
 	unsigned max_age_value;
+	unsigned pause_value;
 
 	unsigned writeback_all;
 	struct workqueue_struct *writeback_wq;
@@ -1470,9 +1474,11 @@ bio_copy:
 	}
 
 unlock_remap_origin:
-	if (bio_data_dir(bio) != READ) {
-		dm_iot_io_begin(&wc->iot, 1);
-		bio->bi_private = (void *)2;
+	if (likely(wc->pause != 0)) {
+		 if (bio_op(bio) == REQ_OP_WRITE) {
+			dm_iot_io_begin(&wc->iot, 1);
+			bio->bi_private = (void *)2;
+		}
 	}
 	bio_set_dev(bio, wc->dev->bdev);
 	wc_unlock(wc);
@@ -1837,10 +1843,19 @@ static void writecache_writeback(struct work_struct *work)
 		dm_kcopyd_client_flush(wc->dm_kcopyd);
 	}
 
-	if (!wc->writeback_all && !dm_suspended(wc->ti)) {
-		while (!dm_iot_idle_for(&wc->iot, HZ)) {
-			cond_resched();
-			msleep(1000);
+	if (likely(wc->pause != 0)) {
+		while (1) {
+			unsigned long idle;
+			if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) ||
+			    unlikely(dm_suspended(wc->ti)))
+				break;
+			idle = dm_iot_idle_time(&wc->iot);
+			if (idle >= wc->pause)
+				break;
+			idle = wc->pause - idle;
+			if (idle > HZ)
+				idle = HZ;
+			schedule_timeout_idle(idle);
 		}
 	}
 
@@ -2113,7 +2128,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	struct wc_memory_superblock s;
 
 	static struct dm_arg _args[] = {
-		{0, 17, "Invalid number of feature args"},
+		{0, 18, "Invalid number of feature args"},
 	};
 
 	as.argc = argc;
@@ -2206,6 +2221,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 			goto bad;
 		}
 	} else {
+		wc->pause = PAUSE_WRITEBACK;
 		r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct));
 		if (r) {
 			ti->error = "Could not allocate mempool";
@@ -2344,6 +2360,18 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 			} else goto invalid_optional;
 		} else if (!strcasecmp(string, "metadata_only")) {
 			wc->metadata_only = true;
+		} else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) {
+			unsigned pause_msecs;
+			if (WC_MODE_PMEM(wc))
+				goto invalid_optional;
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1)
+				goto invalid_optional;
+			if (pause_msecs > 60000)
+				goto invalid_optional;
+			wc->pause = msecs_to_jiffies(pause_msecs);
+			wc->pause_set = true;
+			wc->pause_value = pause_msecs;
 		} else {
 invalid_optional:
 			r = -EINVAL;
@@ -2569,6 +2597,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
 			extra_args++;
 		if (wc->metadata_only)
 			extra_args++;
+		if (wc->pause_set)
+			extra_args += 2;
 
 		DMEMIT("%u", extra_args);
 		if (wc->start_sector_set)
@@ -2591,6 +2621,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
 			DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
 		if (wc->metadata_only)
 			DMEMIT(" metadata_only");
+		if (wc->pause_set)
+			DMEMIT(" pause_writeback %u", wc->pause_value);
 		break;
 	}
 }

From 0c6de0c943dbb42831bf7502eb5c007f71e752d2 Mon Sep 17 00:00:00 2001
From: Menglong Dong <dong.menglong@zte.com.cn>
Date: Sun, 27 Jun 2021 23:37:44 -0700
Subject: [PATCH 3015/3292] net: tipc: fix FB_MTU eat two pages

FB_MTU is used in 'tipc_msg_build()' to alloc smaller skb when memory
allocation fails, which can avoid unnecessary sending failures.

The value of FB_MTU now is 3744, and the data size will be:

  (3744 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \
    SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + 3))

which is larger than one page(4096), and two pages will be allocated.

To avoid it, replace '3744' with a calculation:

  (PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) - \
    SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

What's more, alloc_skb_fclone() will call SKB_DATA_ALIGN for data size,
and it's not necessary to make alignment for buf_size in
tipc_buf_acquire(). So, just remove it.

Fixes: 4c94cc2d3d57 ("tipc: fall back to smaller MTU if allocation of local send skb fails")
Signed-off-by: Menglong Dong <dong.menglong@zte.com.cn>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c |  2 +-
 net/tipc/msg.c   | 17 ++++++++---------
 net/tipc/msg.h   |  3 ++-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d4beca895992..593846d25214 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -699,7 +699,7 @@ int tipc_bcast_init(struct net *net)
 	spin_lock_init(&tipc_net(net)->bclock);
 
 	if (!tipc_link_bc_create(net, 0, 0, NULL,
-				 FB_MTU,
+				 one_page_mtu,
 				 BCLINK_WIN_DEFAULT,
 				 BCLINK_WIN_DEFAULT,
 				 0,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ce6ab54822d8..7053c22e393e 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -44,12 +44,15 @@
 #define MAX_FORWARD_SIZE 1024
 #ifdef CONFIG_TIPC_CRYPTO
 #define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
-#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE)
+#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE)
 #else
 #define BUF_HEADROOM (LL_MAX_HEADER + 48)
-#define BUF_TAILROOM 16
+#define BUF_OVERHEAD BUF_HEADROOM
 #endif
 
+const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
+			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
 static unsigned int align(unsigned int i)
 {
 	return (i + 3) & ~3u;
@@ -69,13 +72,8 @@ static unsigned int align(unsigned int i)
 struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
 {
 	struct sk_buff *skb;
-#ifdef CONFIG_TIPC_CRYPTO
-	unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u;
-#else
-	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-#endif
 
-	skb = alloc_skb_fclone(buf_size, gfp);
+	skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp);
 	if (skb) {
 		skb_reserve(skb, BUF_HEADROOM);
 		skb_put(skb, size);
@@ -395,7 +393,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 		if (unlikely(!skb)) {
 			if (pktmax != MAX_MSG_SIZE)
 				return -ENOMEM;
-			rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
+			rc = tipc_msg_build(mhdr, m, offset, dsz,
+					    one_page_mtu, list);
 			if (rc != dsz)
 				return rc;
 			if (tipc_msg_assemble(list))
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5d64596ba987..64ae4c4c44f8 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -99,9 +99,10 @@ struct plist;
 #define MAX_H_SIZE                60	/* Largest possible TIPC header size */
 
 #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-#define FB_MTU                  3744
 #define TIPC_MEDIA_INFO_OFFSET	5
 
+extern const int one_page_mtu;
+
 struct tipc_skb_cb {
 	union {
 		struct {

From d4cfb7fe5713521280925019e7a7857b373aa627 Mon Sep 17 00:00:00 2001
From: Menglong Dong <dong.menglong@zte.com.cn>
Date: Sun, 27 Jun 2021 23:37:45 -0700
Subject: [PATCH 3016/3292] net: tipc: replace align() with ALIGN in msg.c

The function align() which is defined in msg.c is redundant, replace it
with ALIGN() and introduce a BUF_ALIGN().

Signed-off-by: Menglong Dong <dong.menglong@zte.com.cn>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 7053c22e393e..5c9fd4791c4b 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,6 +41,7 @@
 #include "name_table.h"
 #include "crypto.h"
 
+#define BUF_ALIGN(x) ALIGN(x, 4)
 #define MAX_FORWARD_SIZE 1024
 #ifdef CONFIG_TIPC_CRYPTO
 #define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
@@ -53,11 +54,6 @@
 const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-static unsigned int align(unsigned int i)
-{
-	return (i + 3) & ~3u;
-}
-
 /**
  * tipc_buf_acquire - creates a TIPC message buffer
  * @size: message size (including TIPC header)
@@ -489,7 +485,7 @@ static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
 
 	msz = msg_size(msg);
 	bsz = msg_size(bmsg);
-	offset = align(bsz);
+	offset = BUF_ALIGN(bsz);
 	pad = offset - bsz;
 
 	if (unlikely(skb_tailroom(bskb) < (pad + msz)))
@@ -546,7 +542,7 @@ bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
 
 	/* Make a new bundle of the two messages if possible */
 	tsz = msg_size(buf_msg(tskb));
-	if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg)))
+	if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg)))
 		return true;
 	if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
 				      GFP_ATOMIC)))
@@ -605,7 +601,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
 	if (unlikely(!tipc_msg_validate(iskb)))
 		goto none;
 
-	*pos += align(imsz);
+	*pos += BUF_ALIGN(imsz);
 	return true;
 none:
 	kfree_skb(skb);

From 03a92fe8cedb6f619df416d38d0b57fd55070cd7 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 26 Jun 2021 09:00:16 +0800
Subject: [PATCH 3017/3292] net: hns3: add support for FD counter in debugfs

Previously, the flow director counter is not enabled. To improve the
maintainability for chechking whether flow director hit or not, enable
flow director counter for each function, and add debugfs query inerface
to query the counters for each function.

The debugfs command is below:
cat fd_counter
func_id    hit_times
pf         0
vf0        0
vf1        0

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  7 ++++
 .../hisilicon/hns3/hns3pf/hclge_cmd.h         |  9 +++++
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 37 +++++++++++++++++++
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 10 ++++-
 5 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 0b202f4def83..a6ef67e47c8a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -290,6 +290,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_RX_QUEUE_INFO,
 	HNAE3_DBG_CMD_TX_QUEUE_INFO,
 	HNAE3_DBG_CMD_FD_TCAM,
+	HNAE3_DBG_CMD_FD_COUNTER,
 	HNAE3_DBG_CMD_MAC_TNL_STATUS,
 	HNAE3_DBG_CMD_SERV_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 34b6cd904a1a..b72fdb94df63 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -323,6 +323,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "fd_counter",
+		.cmd = HNAE3_DBG_CMD_FD_COUNTER,
+		.dentry = HNS3_DBG_DENTRY_FD,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index a322dfeba5cf..18bde77ef944 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -248,6 +248,7 @@ enum hclge_opcode_type {
 	HCLGE_OPC_FD_KEY_CONFIG		= 0x1202,
 	HCLGE_OPC_FD_TCAM_OP		= 0x1203,
 	HCLGE_OPC_FD_AD_OP		= 0x1204,
+	HCLGE_OPC_FD_CNT_OP		= 0x1205,
 	HCLGE_OPC_FD_USER_DEF_OP	= 0x1207,
 
 	/* MDIO command */
@@ -1109,6 +1110,14 @@ struct hclge_fd_ad_config_cmd {
 	u8 rsv2[8];
 };
 
+struct hclge_fd_ad_cnt_read_cmd {
+	u8 rsv0[4];
+	__le16 index;
+	u8 rsv1[2];
+	__le64 cnt;
+	u8 rsv2[8];
+};
+
 #define HCLGE_FD_USER_DEF_OFT_S		0
 #define HCLGE_FD_USER_DEF_OFT_M		GENMASK(14, 0)
 #define HCLGE_FD_USER_DEF_EN_B		15
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 6fc50d09b9db..b69c54d365a7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1549,6 +1549,39 @@ out:
 	return ret;
 }
 
+static int hclge_dbg_dump_fd_counter(struct hclge_dev *hdev, char *buf, int len)
+{
+	u8 func_num = pci_num_vf(hdev->pdev) + 1; /* pf and enabled vf num */
+	struct hclge_fd_ad_cnt_read_cmd *req;
+	char str_id[HCLGE_DBG_ID_LEN];
+	struct hclge_desc desc;
+	int pos = 0;
+	int ret;
+	u64 cnt;
+	u8 i;
+
+	pos += scnprintf(buf + pos, len - pos,
+			 "func_id\thit_times\n");
+
+	for (i = 0; i < func_num; i++) {
+		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_CNT_OP, true);
+		req = (struct hclge_fd_ad_cnt_read_cmd *)desc.data;
+		req->index = cpu_to_le16(i);
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev, "failed to get fd counter, ret = %d\n",
+				ret);
+			return ret;
+		}
+		cnt = le64_to_cpu(req->cnt);
+		hclge_dbg_get_func_id_str(str_id, i);
+		pos += scnprintf(buf + pos, len - pos,
+				 "%s\t%llu\n", str_id, cnt);
+	}
+
+	return 0;
+}
+
 int hclge_dbg_dump_rst_info(struct hclge_dev *hdev, char *buf, int len)
 {
 	int pos = 0;
@@ -2375,6 +2408,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_VLAN_CONFIG,
 		.dbg_dump = hclge_dbg_dump_vlan_config,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_FD_COUNTER,
+		.dbg_dump = hclge_dbg_dump_fd_counter,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f3e482ab3c71..dd3354a57c62 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6000,8 +6000,14 @@ static int hclge_config_action(struct hclge_dev *hdev, u8 stage,
 		ad_data.queue_id = rule->queue_id;
 	}
 
-	ad_data.use_counter = false;
-	ad_data.counter_id = 0;
+	if (hdev->fd_cfg.cnt_num[HCLGE_FD_STAGE_1]) {
+		ad_data.use_counter = true;
+		ad_data.counter_id = rule->vf_id %
+				     hdev->fd_cfg.cnt_num[HCLGE_FD_STAGE_1];
+	} else {
+		ad_data.use_counter = false;
+		ad_data.counter_id = 0;
+	}
 
 	ad_data.use_next_stage = false;
 	ad_data.next_input_key = 0;

From d59daf6a4ceedf342f349e94f1300e1598213252 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 26 Jun 2021 09:00:17 +0800
Subject: [PATCH 3018/3292] net: hns3: add support for dumping MAC umv counter
 in debugfs

This patch adds support of dumping MAC umv counter in debugfs,
which will be helpful for debugging.

The display style is below:
$ cat umv_info
num_alloc_vport  : 2
max_umv_size     : 256
wanted_umv_size  : 256
priv_umv_size    : 85
share_umv_size   : 86
vport(0) used_umv_num : 1
vport(1) used_umv_num : 1

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../ethernet/hisilicon/hns3/hns3_debugfs.c    |  7 ++++
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 34 +++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a6ef67e47c8a..e0b7c3c44e7b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -293,6 +293,7 @@ enum hnae3_dbg_cmd {
 	HNAE3_DBG_CMD_FD_COUNTER,
 	HNAE3_DBG_CMD_MAC_TNL_STATUS,
 	HNAE3_DBG_CMD_SERV_INFO,
+	HNAE3_DBG_CMD_UMV_INFO,
 	HNAE3_DBG_CMD_UNKNOWN,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index b72fdb94df63..532523069d74 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -330,6 +330,13 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
 		.buf_len = HNS3_DBG_READ_LEN,
 		.init = hns3_dbg_common_file_init,
 	},
+	{
+		.name = "umv_info",
+		.cmd = HNAE3_DBG_CMD_UMV_INFO,
+		.dentry = HNS3_DBG_DENTRY_COMMON,
+		.buf_len = HNS3_DBG_READ_LEN,
+		.init = hns3_dbg_common_file_init,
+	},
 };
 
 static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index b69c54d365a7..288788186ecc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1927,6 +1927,36 @@ static void hclge_dbg_dump_mac_list(struct hclge_dev *hdev, char *buf, int len,
 	}
 }
 
+static int hclge_dbg_dump_umv_info(struct hclge_dev *hdev, char *buf, int len)
+{
+	u8 func_num = pci_num_vf(hdev->pdev) + 1;
+	struct hclge_vport *vport;
+	int pos = 0;
+	u8 i;
+
+	pos += scnprintf(buf, len, "num_alloc_vport   : %u\n",
+			  hdev->num_alloc_vport);
+	pos += scnprintf(buf + pos, len - pos, "max_umv_size     : %u\n",
+			 hdev->max_umv_size);
+	pos += scnprintf(buf + pos, len - pos, "wanted_umv_size  : %u\n",
+			 hdev->wanted_umv_size);
+	pos += scnprintf(buf + pos, len - pos, "priv_umv_size    : %u\n",
+			 hdev->priv_umv_size);
+
+	mutex_lock(&hdev->vport_lock);
+	pos += scnprintf(buf + pos, len - pos, "share_umv_size   : %u\n",
+			 hdev->share_umv_size);
+	for (i = 0; i < func_num; i++) {
+		vport = &hdev->vport[i];
+		pos += scnprintf(buf + pos, len - pos,
+				 "vport(%u) used_umv_num : %u\n",
+				 i, vport->used_umv_num);
+	}
+	mutex_unlock(&hdev->vport_lock);
+
+	return 0;
+}
+
 static int hclge_get_vlan_rx_offload_cfg(struct hclge_dev *hdev, u8 vf_id,
 					 struct hclge_dbg_vlan_cfg *vlan_cfg)
 {
@@ -2412,6 +2442,10 @@ static const struct hclge_dbg_func hclge_dbg_cmd_func[] = {
 		.cmd = HNAE3_DBG_CMD_FD_COUNTER,
 		.dbg_dump = hclge_dbg_dump_fd_counter,
 	},
+	{
+		.cmd = HNAE3_DBG_CMD_UMV_INFO,
+		.dbg_dump = hclge_dbg_dump_umv_info,
+	},
 };
 
 int hclge_dbg_read_cmd(struct hnae3_handle *handle, enum hnae3_dbg_cmd cmd,

From 78eeadb8fea6d1a37d5060fe2ea0a0b45f8d8860 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 27 Jun 2021 13:19:44 -0400
Subject: [PATCH 3019/3292] bnxt_en: Update firmware interface to 1.10.2.47

Adding the PTP related firmware interface is the main change.

There is also a name change for admin_mtu, requiring code fixup.

Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 667 ++++++++++++++++--
 .../net/ethernet/broadcom/bnxt/bnxt_sriov.c   |   4 +-
 2 files changed, 629 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 6199f125bc13..3fc6781c5b98 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -189,6 +189,8 @@ struct cmd_nums {
 	#define HWRM_QUEUE_VLANPRI_QCAPS                  0x83UL
 	#define HWRM_QUEUE_VLANPRI2PRI_QCFG               0x84UL
 	#define HWRM_QUEUE_VLANPRI2PRI_CFG                0x85UL
+	#define HWRM_QUEUE_GLOBAL_CFG                     0x86UL
+	#define HWRM_QUEUE_GLOBAL_QCFG                    0x87UL
 	#define HWRM_CFA_L2_FILTER_ALLOC                  0x90UL
 	#define HWRM_CFA_L2_FILTER_FREE                   0x91UL
 	#define HWRM_CFA_L2_FILTER_CFG                    0x92UL
@@ -250,6 +252,8 @@ struct cmd_nums {
 	#define HWRM_PORT_SFP_SIDEBAND_QCFG               0xd7UL
 	#define HWRM_FW_STATE_UNQUIESCE                   0xd8UL
 	#define HWRM_PORT_DSC_DUMP                        0xd9UL
+	#define HWRM_PORT_EP_TX_QCFG                      0xdaUL
+	#define HWRM_PORT_EP_TX_CFG                       0xdbUL
 	#define HWRM_TEMP_MONITOR_QUERY                   0xe0UL
 	#define HWRM_REG_POWER_QUERY                      0xe1UL
 	#define HWRM_CORE_FREQUENCY_QUERY                 0xe2UL
@@ -305,6 +309,8 @@ struct cmd_nums {
 	#define HWRM_CFA_EEM_OP                           0x123UL
 	#define HWRM_CFA_ADV_FLOW_MGNT_QCAPS              0x124UL
 	#define HWRM_CFA_TFLIB                            0x125UL
+	#define HWRM_CFA_LAG_GROUP_MEMBER_RGTR            0x126UL
+	#define HWRM_CFA_LAG_GROUP_MEMBER_UNRGTR          0x127UL
 	#define HWRM_ENGINE_CKV_STATUS                    0x12eUL
 	#define HWRM_ENGINE_CKV_CKEK_ADD                  0x12fUL
 	#define HWRM_ENGINE_CKV_CKEK_DELETE               0x130UL
@@ -356,6 +362,12 @@ struct cmd_nums {
 	#define HWRM_STAT_EXT_CTX_QUERY                   0x199UL
 	#define HWRM_FUNC_SPD_CFG                         0x19aUL
 	#define HWRM_FUNC_SPD_QCFG                        0x19bUL
+	#define HWRM_FUNC_PTP_PIN_QCFG                    0x19cUL
+	#define HWRM_FUNC_PTP_PIN_CFG                     0x19dUL
+	#define HWRM_FUNC_PTP_CFG                         0x19eUL
+	#define HWRM_FUNC_PTP_TS_QUERY                    0x19fUL
+	#define HWRM_FUNC_PTP_EXT_CFG                     0x1a0UL
+	#define HWRM_FUNC_PTP_EXT_QCFG                    0x1a1UL
 	#define HWRM_SELFTEST_QLIST                       0x200UL
 	#define HWRM_SELFTEST_EXEC                        0x201UL
 	#define HWRM_SELFTEST_IRQ                         0x202UL
@@ -373,6 +385,10 @@ struct cmd_nums {
 	#define HWRM_MFG_PARAM_SEEPROM_SYNC               0x20eUL
 	#define HWRM_MFG_PARAM_SEEPROM_READ               0x20fUL
 	#define HWRM_MFG_PARAM_SEEPROM_HEALTH             0x210UL
+	#define HWRM_MFG_PRVSN_EXPORT_CSR                 0x211UL
+	#define HWRM_MFG_PRVSN_IMPORT_CERT                0x212UL
+	#define HWRM_MFG_PRVSN_GET_STATE                  0x213UL
+	#define HWRM_MFG_GET_NVM_MEASUREMENT              0x214UL
 	#define HWRM_TF                                   0x2bcUL
 	#define HWRM_TF_VERSION_GET                       0x2bdUL
 	#define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -385,6 +401,7 @@ struct cmd_nums {
 	#define HWRM_TF_SESSION_RESC_ALLOC                0x2cdUL
 	#define HWRM_TF_SESSION_RESC_FREE                 0x2ceUL
 	#define HWRM_TF_SESSION_RESC_FLUSH                0x2cfUL
+	#define HWRM_TF_SESSION_RESC_INFO                 0x2d0UL
 	#define HWRM_TF_TBL_TYPE_GET                      0x2daUL
 	#define HWRM_TF_TBL_TYPE_SET                      0x2dbUL
 	#define HWRM_TF_TBL_TYPE_BULK_GET                 0x2dcUL
@@ -399,6 +416,7 @@ struct cmd_nums {
 	#define HWRM_TF_EM_INSERT                         0x2eaUL
 	#define HWRM_TF_EM_DELETE                         0x2ebUL
 	#define HWRM_TF_EM_HASH_INSERT                    0x2ecUL
+	#define HWRM_TF_EM_MOVE                           0x2edUL
 	#define HWRM_TF_TCAM_SET                          0x2f8UL
 	#define HWRM_TF_TCAM_GET                          0x2f9UL
 	#define HWRM_TF_TCAM_MOVE                         0x2faUL
@@ -427,6 +445,16 @@ struct cmd_nums {
 	#define HWRM_DBG_QCAPS                            0xff20UL
 	#define HWRM_DBG_QCFG                             0xff21UL
 	#define HWRM_DBG_CRASHDUMP_MEDIUM_CFG             0xff22UL
+	#define HWRM_DBG_USEQ_ALLOC                       0xff23UL
+	#define HWRM_DBG_USEQ_FREE                        0xff24UL
+	#define HWRM_DBG_USEQ_FLUSH                       0xff25UL
+	#define HWRM_DBG_USEQ_QCAPS                       0xff26UL
+	#define HWRM_DBG_USEQ_CW_CFG                      0xff27UL
+	#define HWRM_DBG_USEQ_SCHED_CFG                   0xff28UL
+	#define HWRM_DBG_USEQ_RUN                         0xff29UL
+	#define HWRM_DBG_USEQ_DELIVERY_REQ                0xff2aUL
+	#define HWRM_DBG_USEQ_RESP_HDR                    0xff2bUL
+	#define HWRM_NVM_DEFRAG                           0xffecUL
 	#define HWRM_NVM_REQ_ARBITRATION                  0xffedUL
 	#define HWRM_NVM_FACTORY_DEFAULTS                 0xffeeUL
 	#define HWRM_NVM_VALIDATE_OPTION                  0xffefUL
@@ -471,6 +499,7 @@ struct ret_codes {
 	#define HWRM_ERR_CODE_HWRM_ERROR                   0xfUL
 	#define HWRM_ERR_CODE_BUSY                         0x10UL
 	#define HWRM_ERR_CODE_RESOURCE_LOCKED              0x11UL
+	#define HWRM_ERR_CODE_PF_UNAVAILABLE               0x12UL
 	#define HWRM_ERR_CODE_TLV_ENCAPSULATED_RESPONSE    0x8000UL
 	#define HWRM_ERR_CODE_UNKNOWN_ERR                  0xfffeUL
 	#define HWRM_ERR_CODE_CMD_NOT_SUPPORTED            0xffffUL
@@ -502,8 +531,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 2
-#define HWRM_VERSION_RSVD 16
-#define HWRM_VERSION_STR "1.10.2.16"
+#define HWRM_VERSION_RSVD 47
+#define HWRM_VERSION_STR "1.10.2.47"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -604,7 +633,8 @@ struct hwrm_ver_get_output {
 	__le16	roce_fw_build;
 	__le16	roce_fw_patch;
 	__le16	max_ext_req_len;
-	u8	unused_1[5];
+	__le16	max_req_timeout;
+	u8	unused_1[3];
 	u8	valid;
 };
 
@@ -725,7 +755,10 @@ struct hwrm_async_event_cmpl {
 	#define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE          0x40UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE    0x41UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST               0x42UL
-	#define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x43UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PHC_MASTER                 0x43UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP              0x44UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT               0x45UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x46UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -919,6 +952,8 @@ struct hwrm_async_event_cmpl_vf_cfg_change {
 	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL
 	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_LAST         ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE
 	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA2_VF_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA2_VF_ID_SFT 0
 	u8	opaque_v;
 	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V          0x1UL
 	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_OPAQUE_MASK 0xfeUL
@@ -1074,6 +1109,223 @@ struct hwrm_async_event_cmpl_echo_request {
 	__le32	event_data1;
 };
 
+/* hwrm_async_event_cmpl_phc_master (size:128b/16B) */
+struct hwrm_async_event_cmpl_phc_master {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_LAST             ASYNC_EVENT_CMPL_PHC_MASTER_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER 0x43UL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_LAST      ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_ID_PHC_MASTER
+	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_MASTER_FID_SFT 0
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_MASK   0xffff0000UL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA2_PHC_SEC_FID_SFT    16
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_V          0x1UL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_MASK         0xfUL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_SFT          0
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_MASTER     0x1UL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_SECONDARY  0x2UL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER   0x3UL
+	#define ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_LAST          ASYNC_EVENT_CMPL_PHC_MASTER_EVENT_DATA1_FLAGS_PHC_FAILOVER
+};
+
+/* hwrm_async_event_cmpl_pps_timestamp (size:128b/16B) */
+struct hwrm_async_event_cmpl_pps_timestamp {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_LAST             ASYNC_EVENT_CMPL_PPS_TIMESTAMP_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_ID_PPS_TIMESTAMP 0x44UL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_ID_LAST         ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_ID_PPS_TIMESTAMP
+	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE              0x1UL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_INTERNAL       0x0UL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_EXTERNAL       0x1UL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_LAST          ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_EXTERNAL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_MASK         0xeUL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_SFT          1
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_MASK 0xffff0UL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_SFT 4
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_V          0x1UL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_MASK 0xffffffffUL
+	#define ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_SFT 0
+};
+
+/* hwrm_async_event_cmpl_error_report (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_ID_ERROR_REPORT 0x45UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_ID_ERROR_REPORT
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_V          0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_EVENT_DATA1_ERROR_TYPE_SFT 0
+};
+
+/* hwrm_async_event_cmpl_hwrm_error (size:128b/16B) */
+struct hwrm_async_event_cmpl_hwrm_error {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_LAST             ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_LAST      ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR
+	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK    0xffUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT     0
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING   0x0UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL  0x1UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL     0x2UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST     ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_V          0x1UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP     0x1UL
+};
+
+/* hwrm_async_event_cmpl_error_report_base (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_base {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_ID_ERROR_REPORT 0x45UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_ID_ERROR_REPORT
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_V          0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK          0xffUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT           0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED        0x0UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM     0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL  0x2UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM             0x3UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST           ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM
+};
+
+/* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_pause_storm {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_ID_ERROR_REPORT 0x45UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_ID_ERROR_REPORT
+	__le32	event_data2;
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_V          0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_MASK       0xffUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_SFT        0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM  0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_PAUSE_STORM_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM
+};
+
+/* hwrm_async_event_cmpl_error_report_invalid_signal (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_invalid_signal {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_ID_ERROR_REPORT 0x45UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_ID_ERROR_REPORT
+	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_MASK 0xffUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_SFT 0
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_V          0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_MASK          0xffUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_SFT           0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL  0x2UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_LAST           ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL
+};
+
+/* hwrm_async_event_cmpl_error_report_nvm (size:128b/16B) */
+struct hwrm_async_event_cmpl_error_report_nvm {
+	__le16	type;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_MASK            0x3fUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_SFT             0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_HWRM_ASYNC_EVENT  0x2eUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_LAST             ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_TYPE_HWRM_ASYNC_EVENT
+	__le16	event_id;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_ID_ERROR_REPORT 0x45UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_ID_LAST        ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_ID_ERROR_REPORT
+	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_MASK 0xffffffffUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA2_ERR_ADDR_SFT 0
+	u8	opaque_v;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_V          0x1UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_OPAQUE_SFT 1
+	u8	timestamp_lo;
+	__le16	timestamp_hi;
+	__le32	event_data1;
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_MASK     0xffUL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_SFT      0
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_NVM_ERROR  0x3UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_LAST      ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_ERROR_TYPE_NVM_ERROR
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_MASK   0xff00UL
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_SFT    8
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_WRITE    (0x1UL << 8)
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE    (0x2UL << 8)
+	#define ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_LAST    ASYNC_EVENT_CMPL_ERROR_REPORT_NVM_EVENT_DATA1_NVM_ERR_TYPE_ERASE
+};
+
 /* hwrm_func_reset_input (size:192b/24B) */
 struct hwrm_func_reset_input {
 	__le16	req_type;
@@ -1302,7 +1554,7 @@ struct hwrm_func_qcaps_output {
 	__le32	max_flow_id;
 	__le32	max_hw_ring_grps;
 	__le16	max_sp_tx_rings;
-	u8	unused_0[2];
+	__le16	max_msix_vfs;
 	__le32	flags_ext;
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_ECN_MARK_SUPPORTED                     0x1UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_ECN_STATS_SUPPORTED                    0x2UL
@@ -1320,6 +1572,14 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_NVM_OPTION_ACTION_SUPPORTED            0x2000UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_BD_METADATA_SUPPORTED                  0x4000UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_ECHO_REQUEST_SUPPORTED                 0x8000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_NPAR_1_2_SUPPORTED                     0x10000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PTM_SUPPORTED                      0x20000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED                      0x40000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_VF_CFG_ASYNC_FOR_PF_SUPPORTED          0x80000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_PARTITION_BW_SUPPORTED                 0x100000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_DFLT_VLAN_TPID_PCP_SUPPORTED           0x200000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_KTLS_SUPPORTED                         0x400000UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_EP_RATE_CONTROL                        0x800000UL
 	u8	max_schqs;
 	u8	mpc_chnls_cap;
 	#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
@@ -1342,7 +1602,7 @@ struct hwrm_func_qcfg_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_func_qcfg_output (size:768b/96B) */
+/* hwrm_func_qcfg_output (size:832b/104B) */
 struct hwrm_func_qcfg_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -1366,6 +1626,7 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED         0x800UL
 	#define FUNC_QCFG_RESP_FLAGS_FAST_RESET_ALLOWED           0x1000UL
 	#define FUNC_QCFG_RESP_FLAGS_MULTI_ROOT                   0x2000UL
+	#define FUNC_QCFG_RESP_FLAGS_ENABLE_RDMA_SRIOV            0x4000UL
 	u8	mac_address[6];
 	__le16	pci_id;
 	__le16	alloc_rsscos_ctx;
@@ -1374,7 +1635,7 @@ struct hwrm_func_qcfg_output {
 	__le16	alloc_rx_rings;
 	__le16	alloc_l2_ctx;
 	__le16	alloc_vnics;
-	__le16	mtu;
+	__le16	admin_mtu;
 	__le16	mru;
 	__le16	stat_ctx_id;
 	u8	port_partition_type;
@@ -1383,6 +1644,7 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 0x2UL
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 0x3UL
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 0x4UL
+	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_2 0x5UL
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN 0xffUL
 	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_LAST   FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN
 	u8	port_pf_cnt;
@@ -1463,11 +1725,35 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_MPC_CHNLS_TE_CFA_ENABLED      0x4UL
 	#define FUNC_QCFG_RESP_MPC_CHNLS_RE_CFA_ENABLED      0x8UL
 	#define FUNC_QCFG_RESP_MPC_CHNLS_PRIMATE_ENABLED     0x10UL
-	u8	unused_2[6];
+	u8	unused_2[3];
+	__le32	partition_min_bw;
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_MASK             0xfffffffUL
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_SFT              0
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE                     0x10000000UL
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_BITS                  (0x0UL << 28)
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_BYTES                 (0x1UL << 28)
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_LAST                 FUNC_QCFG_RESP_PARTITION_MIN_BW_SCALE_BYTES
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_MASK        0xe0000000UL
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_SFT         29
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
+	#define FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_LAST         FUNC_QCFG_RESP_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100
+	__le32	partition_max_bw;
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_MASK             0xfffffffUL
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_SFT              0
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE                     0x10000000UL
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_BITS                  (0x0UL << 28)
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_BYTES                 (0x1UL << 28)
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_LAST                 FUNC_QCFG_RESP_PARTITION_MAX_BW_SCALE_BYTES
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_MASK        0xe0000000UL
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_SFT         29
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
+	#define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST         FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100
+	__le16	host_mtu;
+	u8	unused_3;
 	u8	valid;
 };
 
-/* hwrm_func_cfg_input (size:768b/96B) */
+/* hwrm_func_cfg_input (size:832b/104B) */
 struct hwrm_func_cfg_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -1504,7 +1790,7 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_FLAGS_BD_METADATA_ENABLE             0x20000000UL
 	#define FUNC_CFG_REQ_FLAGS_BD_METADATA_DISABLE            0x40000000UL
 	__le32	enables;
-	#define FUNC_CFG_REQ_ENABLES_MTU                      0x1UL
+	#define FUNC_CFG_REQ_ENABLES_ADMIN_MTU                0x1UL
 	#define FUNC_CFG_REQ_ENABLES_MRU                      0x2UL
 	#define FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS          0x4UL
 	#define FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS           0x8UL
@@ -1530,7 +1816,11 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_ENABLES_HOT_RESET_IF_SUPPORT     0x800000UL
 	#define FUNC_CFG_REQ_ENABLES_SCHQ_ID                  0x1000000UL
 	#define FUNC_CFG_REQ_ENABLES_MPC_CHNLS                0x2000000UL
-	__le16	mtu;
+	#define FUNC_CFG_REQ_ENABLES_PARTITION_MIN_BW         0x4000000UL
+	#define FUNC_CFG_REQ_ENABLES_PARTITION_MAX_BW         0x8000000UL
+	#define FUNC_CFG_REQ_ENABLES_TPID                     0x10000000UL
+	#define FUNC_CFG_REQ_ENABLES_HOST_MTU                 0x20000000UL
+	__le16	admin_mtu;
 	__le16	mru;
 	__le16	num_rsscos_ctxs;
 	__le16	num_cmpl_rings;
@@ -1615,7 +1905,30 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_MPC_CHNLS_RE_CFA_DISABLE      0x80UL
 	#define FUNC_CFG_REQ_MPC_CHNLS_PRIMATE_ENABLE      0x100UL
 	#define FUNC_CFG_REQ_MPC_CHNLS_PRIMATE_DISABLE     0x200UL
-	u8	unused_0[4];
+	__le32	partition_min_bw;
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_MASK             0xfffffffUL
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_SFT              0
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE                     0x10000000UL
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_BITS                  (0x0UL << 28)
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_BYTES                 (0x1UL << 28)
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_LAST                 FUNC_CFG_REQ_PARTITION_MIN_BW_SCALE_BYTES
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_MASK        0xe0000000UL
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_SFT         29
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
+	#define FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_LAST         FUNC_CFG_REQ_PARTITION_MIN_BW_BW_VALUE_UNIT_PERCENT1_100
+	__le32	partition_max_bw;
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_MASK             0xfffffffUL
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_SFT              0
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE                     0x10000000UL
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_BITS                  (0x0UL << 28)
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_BYTES                 (0x1UL << 28)
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_LAST                 FUNC_CFG_REQ_PARTITION_MAX_BW_SCALE_BYTES
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_MASK        0xe0000000UL
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_SFT         29
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
+	#define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST         FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100
+	__be16	tpid;
+	__le16	host_mtu;
 };
 
 /* hwrm_func_cfg_output (size:128b/16B) */
@@ -1777,14 +2090,15 @@ struct hwrm_func_drv_rgtr_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_ALL_MODE               0x1UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_NONE_MODE              0x2UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE             0x4UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE     0x8UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT          0x10UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT     0x20UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT             0x40UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT         0x80UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_ALL_MODE                     0x1UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_NONE_MODE                    0x2UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE                   0x4UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE           0x8UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT                0x10UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT           0x20UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT                   0x40UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT               0x80UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_RSS_STRICT_HASH_TYPE_SUPPORT     0x100UL
 	__le32	enables;
 	#define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE             0x1UL
 	#define FUNC_DRV_RGTR_REQ_ENABLES_VER                 0x2UL
@@ -2047,7 +2361,7 @@ struct hwrm_func_backing_store_qcaps_input {
 	__le64	resp_addr;
 };
 
-/* hwrm_func_backing_store_qcaps_output (size:704b/88B) */
+/* hwrm_func_backing_store_qcaps_output (size:832b/104B) */
 struct hwrm_func_backing_store_qcaps_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -2085,6 +2399,8 @@ struct hwrm_func_backing_store_qcaps_output {
 	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_VNIC     0x8UL
 	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_STAT     0x10UL
 	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_MRAV     0x20UL
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_TKC      0x40UL
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_RKC      0x80UL
 	u8	qp_init_offset;
 	u8	srq_init_offset;
 	u8	cq_init_offset;
@@ -2093,7 +2409,13 @@ struct hwrm_func_backing_store_qcaps_output {
 	u8	stat_init_offset;
 	u8	mrav_init_offset;
 	u8	tqm_fp_rings_count_ext;
-	u8	rsvd[5];
+	u8	tkc_init_offset;
+	u8	rkc_init_offset;
+	__le16	tkc_entry_size;
+	__le16	rkc_entry_size;
+	__le32	tkc_max_entries;
+	__le32	rkc_max_entries;
+	u8	rsvd[7];
 	u8	valid;
 };
 
@@ -2120,7 +2442,7 @@ struct tqm_fp_ring_cfg {
 	__le64	tqm_ring_page_dir;
 };
 
-/* hwrm_func_backing_store_cfg_input (size:2432b/304B) */
+/* hwrm_func_backing_store_cfg_input (size:2688b/336B) */
 struct hwrm_func_backing_store_cfg_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -2150,6 +2472,8 @@ struct hwrm_func_backing_store_cfg_input {
 	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING8      0x10000UL
 	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING9      0x20000UL
 	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING10     0x40000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TKC            0x80000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_RKC            0x100000UL
 	u8	qpc_pg_size_qpc_lvl;
 	#define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_MASK      0xfUL
 	#define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_SFT       0
@@ -2508,6 +2832,45 @@ struct hwrm_func_backing_store_cfg_input {
 	u8	ring10_unused[3];
 	__le32	tqm_ring10_num_entries;
 	__le64	tqm_ring10_page_dir;
+	__le32	tkc_num_entries;
+	__le32	rkc_num_entries;
+	__le64	tkc_page_dir;
+	__le64	rkc_page_dir;
+	__le16	tkc_entry_size;
+	__le16	rkc_entry_size;
+	u8	tkc_pg_size_tkc_lvl;
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_MASK      0xfUL
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_SFT       0
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_0       0x0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_1       0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_2       0x2UL
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_TKC_LVL_LVL_2
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_MASK  0xf0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_SFT   4
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_4K   (0x0UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_8K   (0x1UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_64K  (0x2UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_2M   (0x3UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_8M   (0x4UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_1G   (0x5UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_TKC_PG_SIZE_PG_1G
+	u8	rkc_pg_size_rkc_lvl;
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_MASK      0xfUL
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_SFT       0
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_0       0x0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_1       0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_2       0x2UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_RKC_LVL_LVL_2
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_MASK  0xf0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_SFT   4
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_4K   (0x0UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_8K   (0x1UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_64K  (0x2UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_2M   (0x3UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_8M   (0x4UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_1G   (0x5UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_RKC_PG_SIZE_PG_1G
+	u8	rsvd[2];
 };
 
 /* hwrm_func_backing_store_cfg_output (size:128b/16B) */
@@ -2634,6 +2997,212 @@ struct hwrm_func_echo_response_output {
 	u8	valid;
 };
 
+/* hwrm_func_ptp_pin_qcfg_input (size:192b/24B) */
+struct hwrm_func_ptp_pin_qcfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	u8	unused_0[8];
+};
+
+/* hwrm_func_ptp_pin_qcfg_output (size:128b/16B) */
+struct hwrm_func_ptp_pin_qcfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	num_pins;
+	u8	state;
+	#define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN0_ENABLED     0x1UL
+	#define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN1_ENABLED     0x2UL
+	#define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN2_ENABLED     0x4UL
+	#define FUNC_PTP_PIN_QCFG_RESP_STATE_PIN3_ENABLED     0x8UL
+	u8	pin0_usage;
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_LAST    FUNC_PTP_PIN_QCFG_RESP_PIN0_USAGE_SYNC_OUT
+	u8	pin1_usage;
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_LAST    FUNC_PTP_PIN_QCFG_RESP_PIN1_USAGE_SYNC_OUT
+	u8	pin2_usage;
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_LAST    FUNC_PTP_PIN_QCFG_RESP_PIN2_USAGE_SYNC_OUT
+	u8	pin3_usage;
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_LAST    FUNC_PTP_PIN_QCFG_RESP_PIN3_USAGE_SYNC_OUT
+	u8	unused_0;
+	u8	valid;
+};
+
+/* hwrm_func_ptp_pin_cfg_input (size:256b/32B) */
+struct hwrm_func_ptp_pin_cfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	enables;
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_STATE     0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_USAGE     0x2UL
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN1_STATE     0x4UL
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN1_USAGE     0x8UL
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN2_STATE     0x10UL
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN2_USAGE     0x20UL
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN3_STATE     0x40UL
+	#define FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN3_USAGE     0x80UL
+	u8	pin0_state;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_DISABLED 0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_ENABLED  0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN0_STATE_ENABLED
+	u8	pin0_usage;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN0_USAGE_SYNC_OUT
+	u8	pin1_state;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_DISABLED 0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_ENABLED  0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN1_STATE_ENABLED
+	u8	pin1_usage;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN1_USAGE_SYNC_OUT
+	u8	pin2_state;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_DISABLED 0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_ENABLED  0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN2_STATE_ENABLED
+	u8	pin2_usage;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN2_USAGE_SYNC_OUT
+	u8	pin3_state;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_DISABLED 0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_ENABLED  0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN3_STATE_ENABLED
+	u8	pin3_usage;
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_NONE     0x0UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_PPS_IN   0x1UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_PPS_OUT  0x2UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNC_IN  0x3UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNC_OUT 0x4UL
+	#define FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_LAST    FUNC_PTP_PIN_CFG_REQ_PIN3_USAGE_SYNC_OUT
+	u8	unused_0[4];
+};
+
+/* hwrm_func_ptp_pin_cfg_output (size:128b/16B) */
+struct hwrm_func_ptp_pin_cfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[7];
+	u8	valid;
+};
+
+/* hwrm_func_ptp_cfg_input (size:320b/40B) */
+struct hwrm_func_ptp_cfg_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	enables;
+	#define FUNC_PTP_CFG_REQ_ENABLES_PTP_PPS_EVENT               0x1UL
+	#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_DLL_SOURCE     0x2UL
+	#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_DLL_PHASE      0x4UL
+	#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD     0x8UL
+	#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP         0x10UL
+	#define FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE      0x20UL
+	u8	ptp_pps_event;
+	#define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_INTERNAL     0x1UL
+	#define FUNC_PTP_CFG_REQ_PTP_PPS_EVENT_EXTERNAL     0x2UL
+	u8	ptp_freq_adj_dll_source;
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_NONE    0x0UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_0  0x1UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_1  0x2UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_2  0x3UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_TSIO_3  0x4UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_0  0x5UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_1  0x6UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_2  0x7UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_PORT_3  0x8UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_INVALID 0xffUL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_LAST   FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_SOURCE_INVALID
+	u8	ptp_freq_adj_dll_phase;
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_NONE 0x0UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_4K   0x1UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_8K   0x2UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_10M  0x3UL
+	#define FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_LAST FUNC_PTP_CFG_REQ_PTP_FREQ_ADJ_DLL_PHASE_10M
+	u8	unused_0[3];
+	__le32	ptp_freq_adj_ext_period;
+	__le32	ptp_freq_adj_ext_up;
+	__le32	ptp_freq_adj_ext_phase_lower;
+	__le32	ptp_freq_adj_ext_phase_upper;
+};
+
+/* hwrm_func_ptp_cfg_output (size:128b/16B) */
+struct hwrm_func_ptp_cfg_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	u8	unused_0[7];
+	u8	valid;
+};
+
+/* hwrm_func_ptp_ts_query_input (size:192b/24B) */
+struct hwrm_func_ptp_ts_query_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le32	flags;
+	#define FUNC_PTP_TS_QUERY_REQ_FLAGS_PPS_TIME     0x1UL
+	#define FUNC_PTP_TS_QUERY_REQ_FLAGS_PTM_TIME     0x2UL
+	u8	unused_0[4];
+};
+
+/* hwrm_func_ptp_ts_query_output (size:320b/40B) */
+struct hwrm_func_ptp_ts_query_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le64	pps_event_ts;
+	__le64	ptm_res_local_ts;
+	__le64	ptm_pmstr_ts;
+	__le32	ptm_mstr_prop_dly;
+	u8	unused_0[3];
+	u8	valid;
+};
+
 /* hwrm_func_drv_if_change_input (size:192b/24B) */
 struct hwrm_func_drv_if_change_input {
 	__le16	req_type;
@@ -3156,6 +3725,7 @@ struct hwrm_port_mac_cfg_input {
 	#define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE     0x80UL
 	#define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG                  0x100UL
 	#define PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB               0x200UL
+	#define PORT_MAC_CFG_REQ_ENABLES_PTP_ADJ_PHASE                  0x400UL
 	__le16	port_id;
 	u8	ipg;
 	u8	lpbk;
@@ -3188,8 +3758,8 @@ struct hwrm_port_mac_cfg_input {
 	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK          0xe0UL
 	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT           5
 	u8	unused_0[3];
-	__s32	ptp_freq_adj_ppb;
-	u8	unused_1[4];
+	__le32	ptp_freq_adj_ppb;
+	__le32	ptp_adj_phase;
 };
 
 /* hwrm_port_mac_cfg_output (size:128b/16B) */
@@ -3221,16 +3791,17 @@ struct hwrm_port_mac_ptp_qcfg_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_port_mac_ptp_qcfg_output (size:640b/80B) */
+/* hwrm_port_mac_ptp_qcfg_output (size:704b/88B) */
 struct hwrm_port_mac_ptp_qcfg_output {
 	__le16	error_code;
 	__le16	req_type;
 	__le16	seq_id;
 	__le16	resp_len;
 	u8	flags;
-	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS      0x1UL
-	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS     0x4UL
-	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS        0x8UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS                       0x1UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_ONE_STEP_TX_TS                      0x4UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS                         0x8UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK     0x10UL
 	u8	unused_0[3];
 	__le32	rx_ts_reg_off_lower;
 	__le32	rx_ts_reg_off_upper;
@@ -3247,6 +3818,8 @@ struct hwrm_port_mac_ptp_qcfg_output {
 	__le32	tx_ts_reg_off_seq_id;
 	__le32	tx_ts_reg_off_fifo;
 	__le32	tx_ts_reg_off_granularity;
+	__le32	ts_ref_clock_reg_lower;
+	__le32	ts_ref_clock_reg_upper;
 	u8	unused_1[7];
 	u8	valid;
 };
@@ -3647,7 +4220,7 @@ struct hwrm_port_lpbk_clr_stats_output {
 	u8	valid;
 };
 
-/* hwrm_port_ts_query_input (size:192b/24B) */
+/* hwrm_port_ts_query_input (size:256b/32B) */
 struct hwrm_port_ts_query_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -3662,6 +4235,11 @@ struct hwrm_port_ts_query_input {
 	#define PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME     0x2UL
 	__le16	port_id;
 	u8	unused_0[2];
+	__le16	enables;
+	#define PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT     0x1UL
+	#define PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID         0x2UL
+	__le16	ts_req_timeout;
+	__le32	ptp_seq_id;
 };
 
 /* hwrm_port_ts_query_output (size:192b/24B) */
@@ -4215,7 +4793,8 @@ struct hwrm_queue_qportcfg_output {
 	u8	max_configurable_lossless_queues;
 	u8	queue_cfg_allowed;
 	u8	queue_cfg_info;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG             0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_USE_PROFILE_TYPE     0x2UL
 	u8	queue_pfcenable_cfg_allowed;
 	u8	queue_pri2cos_cfg_allowed;
 	u8	queue_cos2bw_cfg_allowed;
@@ -5467,6 +6046,7 @@ struct hwrm_vnic_qcaps_output {
 	#define VNIC_QCAPS_RESP_FLAGS_VNIC_STATE_CAP                      0x400UL
 	#define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP           0x800UL
 	#define VNIC_QCAPS_RESP_FLAGS_METADATA_FORMAT_CAP                 0x1000UL
+	#define VNIC_QCAPS_RESP_FLAGS_RSS_STRICT_HASH_TYPE_CAP            0x2000UL
 	__le16	max_aggs_supported;
 	u8	unused_1[5];
 	u8	valid;
@@ -7224,6 +7804,7 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
 	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED        0x4000UL
 	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE                              0x8000UL
 	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED     0x10000UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_LAG_SUPPORTED                                0x20000UL
 	u8	unused_0[3];
 	u8	valid;
 };
@@ -7914,11 +8495,14 @@ struct hwrm_temp_monitor_query_output {
 	u8	phy_temp;
 	u8	om_temp;
 	u8	flags;
-	#define TEMP_MONITOR_QUERY_RESP_FLAGS_TEMP_NOT_AVAILABLE         0x1UL
-	#define TEMP_MONITOR_QUERY_RESP_FLAGS_PHY_TEMP_NOT_AVAILABLE     0x2UL
-	#define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_NOT_PRESENT             0x4UL
-	#define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_TEMP_NOT_AVAILABLE      0x8UL
-	u8	unused_0[3];
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_TEMP_NOT_AVAILABLE            0x1UL
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_PHY_TEMP_NOT_AVAILABLE        0x2UL
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_NOT_PRESENT                0x4UL
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_OM_TEMP_NOT_AVAILABLE         0x8UL
+	#define TEMP_MONITOR_QUERY_RESP_FLAGS_EXT_TEMP_FIELDS_AVAILABLE     0x10UL
+	u8	temp2;
+	u8	phy_temp2;
+	u8	om_temp2;
 	u8	valid;
 };
 
@@ -8109,6 +8693,7 @@ struct hwrm_dbg_qcaps_output {
 	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_NVM          0x1UL
 	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR     0x2UL
 	#define DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR      0x4UL
+	#define DBG_QCAPS_RESP_FLAGS_USEQ                   0x8UL
 	u8	unused_1[3];
 	u8	valid;
 };
@@ -8632,10 +9217,11 @@ struct hwrm_nvm_install_update_output {
 /* hwrm_nvm_install_update_cmd_err (size:64b/8B) */
 struct hwrm_nvm_install_update_cmd_err {
 	u8	code;
-	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN  0x0UL
-	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR 0x1UL
-	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE 0x2UL
-	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST    NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_UNKNOWN       0x0UL
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR      0x1UL
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE      0x2UL
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK 0x3UL
+	#define NVM_INSTALL_UPDATE_CMD_ERR_CODE_LAST         NVM_INSTALL_UPDATE_CMD_ERR_CODE_ANTI_ROLLBACK
 	u8	unused_0[7];
 };
 
@@ -8876,6 +9462,7 @@ struct fw_status_reg {
 	#define FW_STATUS_REG_CRASHDUMP_COMPLETE     0x80000UL
 	#define FW_STATUS_REG_SHUTDOWN               0x100000UL
 	#define FW_STATUS_REG_CRASHED_NO_MASTER      0x200000UL
+	#define FW_STATUS_REG_RECOVERING             0x400000UL
 };
 
 /* hcomm_status (size:64b/8B) */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index eb00a219aa51..7fa881e1cd80 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -632,7 +632,7 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
 	vf_vnics = (hw_resc->max_vnics - bp->nr_vnics) / num_vfs;
 	vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
 
-	req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_MTU |
+	req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ADMIN_MTU |
 				  FUNC_CFG_REQ_ENABLES_MRU |
 				  FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
 				  FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS |
@@ -645,7 +645,7 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
 
 	mtu = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
 	req.mru = cpu_to_le16(mtu);
-	req.mtu = cpu_to_le16(mtu);
+	req.admin_mtu = cpu_to_le16(mtu);
 
 	req.num_rsscos_ctxs = cpu_to_le16(1);
 	req.num_cmpl_rings = cpu_to_le16(vf_cp_rings);

From ae5c42f0b92ca0abefe2e3930a14fc2e716c81a2 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 27 Jun 2021 13:19:45 -0400
Subject: [PATCH 3020/3292] bnxt_en: Get PTP hardware capability from firmware

Store PTP hardware info in a structure if hardware and firmware support PTP.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 59 +++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  5 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 49 +++++++++++++++
 3 files changed, 113 insertions(+)
 create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index aef3fccc27a9..081cdcb02b48 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -49,6 +49,8 @@
 #include <linux/log2.h>
 #include <linux/aer.h>
 #include <linux/bitmap.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
 #include <linux/cpu_rmap.h>
 #include <linux/cpumask.h>
 #include <net/pkt_cls.h>
@@ -63,6 +65,7 @@
 #include "bnxt_ethtool.h"
 #include "bnxt_dcb.h"
 #include "bnxt_xdp.h"
+#include "bnxt_ptp.h"
 #include "bnxt_vfr.h"
 #include "bnxt_tc.h"
 #include "bnxt_devlink.h"
@@ -7391,6 +7394,56 @@ hwrm_func_resc_qcaps_exit:
 	return rc;
 }
 
+/* bp->hwrm_cmd_lock already held. */
+static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
+{
+	struct hwrm_port_mac_ptp_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_port_mac_ptp_qcfg_input req = {0};
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	u8 flags;
+	int rc;
+
+	if (bp->hwrm_spec_code < 0x10801) {
+		rc = -ENODEV;
+		goto no_ptp;
+	}
+
+	req.port_id = cpu_to_le16(bp->pf.port_id);
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_PTP_QCFG, -1, -1);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		goto no_ptp;
+
+	flags = resp->flags;
+	if (!(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) {
+		rc = -ENODEV;
+		goto no_ptp;
+	}
+	if (!ptp) {
+		ptp = kzalloc(sizeof(*ptp), GFP_KERNEL);
+		if (!ptp)
+			return -ENOMEM;
+		ptp->bp = bp;
+		bp->ptp_cfg = ptp;
+	}
+	if (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK) {
+		ptp->refclk_regs[0] = le32_to_cpu(resp->ts_ref_clock_reg_lower);
+		ptp->refclk_regs[1] = le32_to_cpu(resp->ts_ref_clock_reg_upper);
+	} else if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		ptp->refclk_regs[0] = BNXT_TS_REG_TIMESYNC_TS0_LOWER;
+		ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER;
+	} else {
+		rc = -ENODEV;
+		goto no_ptp;
+	}
+	return 0;
+
+no_ptp:
+	kfree(ptp);
+	bp->ptp_cfg = NULL;
+	return rc;
+}
+
 static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 {
 	int rc = 0;
@@ -7462,6 +7515,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 		bp->flags &= ~BNXT_FLAG_WOL_CAP;
 		if (flags & FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED)
 			bp->flags |= BNXT_FLAG_WOL_CAP;
+		if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED)
+			__bnxt_hwrm_ptp_qcfg(bp);
 	} else {
 #ifdef CONFIG_BNXT_SRIOV
 		struct bnxt_vf_info *vf = &bp->vf;
@@ -12571,6 +12626,8 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	bnxt_dcb_free(bp);
 	kfree(bp->edev);
 	bp->edev = NULL;
+	kfree(bp->ptp_cfg);
+	bp->ptp_cfg = NULL;
 	kfree(bp->fw_health);
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
@@ -13161,6 +13218,8 @@ init_err_pci_clean:
 	bnxt_free_hwrm_short_cmd_req(bp);
 	bnxt_free_hwrm_resources(bp);
 	bnxt_ethtool_free(bp);
+	kfree(bp->ptp_cfg);
+	bp->ptp_cfg = NULL;
 	kfree(bp->fw_health);
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 30e47ea343f9..696163559b64 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1362,6 +1362,9 @@ struct bnxt_test_info {
 #define BNXT_GRC_REG_CHIP_NUM			0x48
 #define BNXT_GRC_REG_BASE			0x260000
 
+#define BNXT_TS_REG_TIMESYNC_TS0_LOWER		0x640180c
+#define BNXT_TS_REG_TIMESYNC_TS0_UPPER		0x6401810
+
 #define BNXT_GRC_BASE_MASK			0xfffff000
 #define BNXT_GRC_OFFSET_MASK			0x00000ffc
 
@@ -2042,6 +2045,8 @@ struct bnxt {
 
 	struct bpf_prog		*xdp_prog;
 
+	struct bnxt_ptp_cfg	*ptp_cfg;
+
 	/* devlink interface and vf-rep structs */
 	struct devlink		*dl;
 	struct devlink_port	dl_port;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
new file mode 100644
index 000000000000..603f0fdb71c2
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -0,0 +1,49 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2021 Broadcom Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_PTP_H
+#define BNXT_PTP_H
+
+struct bnxt_ptp_cfg {
+	struct ptp_clock_info	ptp_info;
+	struct ptp_clock	*ptp_clock;
+	struct cyclecounter	cc;
+	struct timecounter	tc;
+	/* serialize timecounter access */
+	spinlock_t		ptp_lock;
+	struct sk_buff		*tx_skb;
+	u64			current_time;
+	u64			old_time;
+	unsigned long		next_period;
+	u16			tx_seqid;
+	struct bnxt		*bp;
+	atomic_t		tx_avail;
+#define BNXT_MAX_TX_TS	1
+	u16			rxctl;
+#define BNXT_PTP_MSG_SYNC			(1 << 0)
+#define BNXT_PTP_MSG_DELAY_REQ			(1 << 1)
+#define BNXT_PTP_MSG_PDELAY_REQ			(1 << 2)
+#define BNXT_PTP_MSG_PDELAY_RESP		(1 << 3)
+#define BNXT_PTP_MSG_FOLLOW_UP			(1 << 8)
+#define BNXT_PTP_MSG_DELAY_RESP			(1 << 9)
+#define BNXT_PTP_MSG_PDELAY_RESP_FOLLOW_UP	(1 << 10)
+#define BNXT_PTP_MSG_ANNOUNCE			(1 << 11)
+#define BNXT_PTP_MSG_SIGNALING			(1 << 12)
+#define BNXT_PTP_MSG_MANAGEMENT			(1 << 13)
+#define BNXT_PTP_MSG_EVENTS		(BNXT_PTP_MSG_SYNC |		\
+					 BNXT_PTP_MSG_DELAY_REQ |	\
+					 BNXT_PTP_MSG_PDELAY_REQ |	\
+					 BNXT_PTP_MSG_PDELAY_RESP)
+	u8			tx_tstamp_en:1;
+	int			rx_filter;
+
+	u32			refclk_regs[2];
+	u32			refclk_mapped_regs[2];
+};
+#endif

From 118612d519d83b98ead11195a5c818f5e8904654 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 27 Jun 2021 13:19:46 -0400
Subject: [PATCH 3021/3292] bnxt_en: Add PTP clock APIs, ioctls, and ethtool
 methods

Add the clock APIs to set/get/adjust the hw clock, and the related
ioctls and ethtool methods.

v2: Propagate error code from ptp_clock_register().
    Add spinlock to serialize access to the timecounter.  The
    timecounter is accessed in process context and the RX datapath.
    Read the PHC using direct registers.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/Kconfig         |   1 +
 drivers/net/ethernet/broadcom/bnxt/Makefile   |   2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |   6 +
 .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c |  34 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 324 ++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h |  16 +
 6 files changed, 382 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index cb88ffb8f12f..1a02ca600b71 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -206,6 +206,7 @@ config SYSTEMPORT
 config BNXT
 	tristate "Broadcom NetXtreme-C/E support"
 	depends on PCI
+	imply PTP_1588_CLOCK
 	select FW_LOADER
 	select LIBCRC32C
 	select NET_DEVLINK
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index cb97ec56fdec..2b8ae687b3c1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
 bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
 bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 081cdcb02b48..1250a5b50b50 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10390,6 +10390,12 @@ static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return bnxt_hwrm_port_phy_write(bp, mdio->phy_id, mdio->reg_num,
 						mdio->val_in);
 
+	case SIOCSHWTSTAMP:
+		return bnxt_hwtstamp_set(dev, ifr);
+
+	case SIOCGHWTSTAMP:
+		return bnxt_hwtstamp_get(dev, ifr);
+
 	default:
 		/* do nothing */
 		break;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index c664ec52ebcf..786ca51e669b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -19,9 +19,13 @@
 #include <linux/firmware.h>
 #include <linux/utsname.h>
 #include <linux/time.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+#include <linux/timecounter.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_xdp.h"
+#include "bnxt_ptp.h"
 #include "bnxt_ethtool.h"
 #include "bnxt_nvm_defs.h"	/* NVRAM content constant and structure defs */
 #include "bnxt_fw_hdr.h"	/* Firmware hdr constant and structure defs */
@@ -3926,6 +3930,35 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump,
 	return 0;
 }
 
+static int bnxt_get_ts_info(struct net_device *dev,
+			    struct ethtool_ts_info *info)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_ptp_cfg *ptp;
+
+	ptp = bp->ptp_cfg;
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE;
+
+	info->phc_index = -1;
+	if (!ptp)
+		return 0;
+
+	info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE |
+				 SOF_TIMESTAMPING_RX_HARDWARE |
+				 SOF_TIMESTAMPING_RAW_HARDWARE;
+	if (ptp->ptp_clock)
+		info->phc_index = ptp_clock_index(ptp->ptp_clock);
+
+	info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
+
+	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			   (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+			   (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
+	return 0;
+}
+
 void bnxt_ethtool_init(struct bnxt *bp)
 {
 	struct hwrm_selftest_qlist_output *resp = bp->hwrm_cmd_resp_addr;
@@ -4172,6 +4205,7 @@ const struct ethtool_ops bnxt_ethtool_ops = {
 	.nway_reset		= bnxt_nway_reset,
 	.set_phys_id		= bnxt_set_phys_id,
 	.self_test		= bnxt_self_test,
+	.get_ts_info		= bnxt_get_ts_info,
 	.reset			= bnxt_reset,
 	.set_dump		= bnxt_set_dump,
 	.get_dump_flag		= bnxt_get_dump_flag,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
new file mode 100644
index 000000000000..47f1f9c3380c
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -0,0 +1,324 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2021 Broadcom Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+#include <linux/timecounter.h>
+#include <linux/timekeeping.h>
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_ptp.h"
+
+static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
+			    const struct timespec64 *ts)
+{
+	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+						ptp_info);
+	u64 ns = timespec64_to_ns(ts);
+
+	spin_lock_bh(&ptp->ptp_lock);
+	timecounter_init(&ptp->tc, &ptp->cc, ns);
+	spin_unlock_bh(&ptp->ptp_lock);
+	return 0;
+}
+
+/* Caller holds ptp_lock */
+static u64 bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	u64 ns;
+
+	ptp_read_system_prets(sts);
+	ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+	ptp_read_system_postts(sts);
+	ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
+	return ns;
+}
+
+static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
+			     struct timespec64 *ts,
+			     struct ptp_system_timestamp *sts)
+{
+	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+						ptp_info);
+	u64 ns, cycles;
+
+	spin_lock_bh(&ptp->ptp_lock);
+	cycles = bnxt_refclk_read(ptp->bp, sts);
+	ns = timecounter_cyc2time(&ptp->tc, cycles);
+	spin_unlock_bh(&ptp->ptp_lock);
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
+{
+	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+						ptp_info);
+
+	spin_lock_bh(&ptp->ptp_lock);
+	timecounter_adjtime(&ptp->tc, delta);
+	spin_unlock_bh(&ptp->ptp_lock);
+	return 0;
+}
+
+static int bnxt_ptp_adjfreq(struct ptp_clock_info *ptp_info, s32 ppb)
+{
+	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+						ptp_info);
+	struct hwrm_port_mac_cfg_input req = {0};
+	struct bnxt *bp = ptp->bp;
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
+	req.ptp_freq_adj_ppb = cpu_to_le32(ppb);
+	req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		netdev_err(ptp->bp->dev,
+			   "ptp adjfreq failed. rc = %d\n", rc);
+	return rc;
+}
+
+static int bnxt_ptp_enable(struct ptp_clock_info *ptp,
+			   struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+static int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
+{
+	struct hwrm_port_mac_cfg_input req = {0};
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	u32 flags = 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
+	if (ptp->rx_filter)
+		flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE;
+	else
+		flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_DISABLE;
+	if (ptp->tx_tstamp_en)
+		flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_ENABLE;
+	else
+		flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE;
+	req.flags = cpu_to_le32(flags);
+	req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE);
+	req.rx_ts_capture_ptp_msg_type = cpu_to_le16(ptp->rxctl);
+
+	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+}
+
+int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct hwtstamp_config stmpconf;
+	struct bnxt_ptp_cfg *ptp;
+	u16 old_rxctl;
+	int old_rx_filter, rc;
+	u8 old_tx_tstamp_en;
+
+	ptp = bp->ptp_cfg;
+	if (!ptp)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&stmpconf, ifr->ifr_data, sizeof(stmpconf)))
+		return -EFAULT;
+
+	if (stmpconf.flags)
+		return -EINVAL;
+
+	if (stmpconf.tx_type != HWTSTAMP_TX_ON &&
+	    stmpconf.tx_type != HWTSTAMP_TX_OFF)
+		return -ERANGE;
+
+	old_rx_filter = ptp->rx_filter;
+	old_rxctl = ptp->rxctl;
+	old_tx_tstamp_en = ptp->tx_tstamp_en;
+	switch (stmpconf.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		ptp->rxctl = 0;
+		ptp->rx_filter = HWTSTAMP_FILTER_NONE;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+		ptp->rxctl = BNXT_PTP_MSG_EVENTS;
+		ptp->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		ptp->rxctl = BNXT_PTP_MSG_SYNC;
+		ptp->rx_filter = HWTSTAMP_FILTER_PTP_V2_SYNC;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		ptp->rxctl = BNXT_PTP_MSG_DELAY_REQ;
+		ptp->rx_filter = HWTSTAMP_FILTER_PTP_V2_DELAY_REQ;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (stmpconf.tx_type == HWTSTAMP_TX_ON)
+		ptp->tx_tstamp_en = 1;
+	else
+		ptp->tx_tstamp_en = 0;
+
+	rc = bnxt_hwrm_ptp_cfg(bp);
+	if (rc)
+		goto ts_set_err;
+
+	stmpconf.rx_filter = ptp->rx_filter;
+	return copy_to_user(ifr->ifr_data, &stmpconf, sizeof(stmpconf)) ?
+		-EFAULT : 0;
+
+ts_set_err:
+	ptp->rx_filter = old_rx_filter;
+	ptp->rxctl = old_rxctl;
+	ptp->tx_tstamp_en = old_tx_tstamp_en;
+	return rc;
+}
+
+int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct hwtstamp_config stmpconf;
+	struct bnxt_ptp_cfg *ptp;
+
+	ptp = bp->ptp_cfg;
+	if (!ptp)
+		return -EOPNOTSUPP;
+
+	stmpconf.flags = 0;
+	stmpconf.tx_type = ptp->tx_tstamp_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+
+	stmpconf.rx_filter = ptp->rx_filter;
+	return copy_to_user(ifr->ifr_data, &stmpconf, sizeof(stmpconf)) ?
+		-EFAULT : 0;
+}
+
+static int bnxt_map_regs(struct bnxt *bp, u32 *reg_arr, int count, int reg_win)
+{
+	u32 reg_base = *reg_arr & BNXT_GRC_BASE_MASK;
+	u32 win_off;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if ((reg_arr[i] & BNXT_GRC_BASE_MASK) != reg_base)
+			return -ERANGE;
+	}
+	win_off = BNXT_GRCPF_REG_WINDOW_BASE_OUT + (reg_win - 1) * 4;
+	writel(reg_base, bp->bar0 + win_off);
+	return 0;
+}
+
+static int bnxt_map_ptp_regs(struct bnxt *bp)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	u32 *reg_arr;
+	int rc, i;
+
+	reg_arr = ptp->refclk_regs;
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		rc = bnxt_map_regs(bp, reg_arr, 2, BNXT_PTP_GRC_WIN);
+		if (rc)
+			return rc;
+		for (i = 0; i < 2; i++)
+			ptp->refclk_mapped_regs[i] = BNXT_PTP_GRC_WIN_BASE +
+				(ptp->refclk_regs[i] & BNXT_GRC_OFFSET_MASK);
+		return 0;
+	}
+	return -ENODEV;
+}
+
+static void bnxt_unmap_ptp_regs(struct bnxt *bp)
+{
+	writel(0, bp->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT +
+		  (BNXT_PTP_GRC_WIN - 1) * 4);
+}
+
+static u64 bnxt_cc_read(const struct cyclecounter *cc)
+{
+	struct bnxt_ptp_cfg *ptp = container_of(cc, struct bnxt_ptp_cfg, cc);
+
+	return bnxt_refclk_read(ptp->bp, NULL);
+}
+
+static const struct ptp_clock_info bnxt_ptp_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "bnxt clock",
+	.max_adj	= BNXT_MAX_PHC_DRIFT,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfreq	= bnxt_ptp_adjfreq,
+	.adjtime	= bnxt_ptp_adjtime,
+	.gettimex64	= bnxt_ptp_gettimex,
+	.settime64	= bnxt_ptp_settime,
+	.enable		= bnxt_ptp_enable,
+};
+
+int bnxt_ptp_init(struct bnxt *bp)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	int rc;
+
+	if (!ptp)
+		return 0;
+
+	rc = bnxt_map_ptp_regs(bp);
+	if (rc)
+		return rc;
+
+	atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
+	spin_lock_init(&ptp->ptp_lock);
+
+	memset(&ptp->cc, 0, sizeof(ptp->cc));
+	ptp->cc.read = bnxt_cc_read;
+	ptp->cc.mask = CYCLECOUNTER_MASK(48);
+	ptp->cc.shift = 0;
+	ptp->cc.mult = 1;
+
+	timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
+
+	ptp->ptp_info = bnxt_ptp_caps;
+	ptp->ptp_clock = ptp_clock_register(&ptp->ptp_info, &bp->pdev->dev);
+	if (IS_ERR(ptp->ptp_clock)) {
+		int err = PTR_ERR(ptp->ptp_clock);
+
+		ptp->ptp_clock = NULL;
+		bnxt_unmap_ptp_regs(bp);
+		return err;
+	}
+
+	return 0;
+}
+
+void bnxt_ptp_clear(struct bnxt *bp)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+	if (!ptp)
+		return;
+
+	if (ptp->ptp_clock)
+		ptp_clock_unregister(ptp->ptp_clock);
+
+	ptp->ptp_clock = NULL;
+	bnxt_unmap_ptp_regs(bp);
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 603f0fdb71c2..93a9921a8b46 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -10,6 +10,17 @@
 #ifndef BNXT_PTP_H
 #define BNXT_PTP_H
 
+#define BNXT_PTP_GRC_WIN	5
+#define BNXT_PTP_GRC_WIN_BASE	0x5000
+
+#define BNXT_MAX_PHC_DRIFT	31000000
+#define BNXT_LO_TIMER_MASK	0x0000ffffffffUL
+#define BNXT_HI_TIMER_MASK	0xffff00000000UL
+
+#define BNXT_PTP_QTS_TIMEOUT	1000
+#define BNXT_PTP_QTS_TX_ENABLES	(PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID |	\
+				 PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT)
+
 struct bnxt_ptp_cfg {
 	struct ptp_clock_info	ptp_info;
 	struct ptp_clock	*ptp_clock;
@@ -46,4 +57,9 @@ struct bnxt_ptp_cfg {
 	u32			refclk_regs[2];
 	u32			refclk_mapped_regs[2];
 };
+
+int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
+int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
+int bnxt_ptp_init(struct bnxt *bp);
+void bnxt_ptp_clear(struct bnxt *bp);
 #endif

From 390862f45c85b8ebbf9c5c09192bf413a8fb72f8 Mon Sep 17 00:00:00 2001
From: Pavan Chebbi <pavan.chebbi@broadcom.com>
Date: Sun, 27 Jun 2021 13:19:47 -0400
Subject: [PATCH 3022/3292] bnxt_en: Get the full 48-bit hardware timestamp
 periodically

From the bnxt_timer(), read the 48-bit hardware running clock
periodically and store it in ptp->current_time.  The previous snapshot
of the clock will be stored in ptp->old_time.  The old_time snapshot
will be used in the next patches to compute the RX packet timestamps.

v2: Use .do_aux_work() to read the timer periodically.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 39 +++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h |  1 +
 3 files changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 1250a5b50b50..23eddde7bf12 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10075,6 +10075,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 		}
 	}
 
+	bnxt_ptp_start(bp);
 	rc = bnxt_init_nic(bp, irq_re_init);
 	if (rc) {
 		netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index 47f1f9c3380c..b0563c7761ff 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -45,6 +45,18 @@ static u64 bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts)
 	return ns;
 }
 
+static void bnxt_ptp_get_current_time(struct bnxt *bp)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+	if (!ptp)
+		return;
+	spin_lock_bh(&ptp->ptp_lock);
+	WRITE_ONCE(ptp->old_time, ptp->current_time);
+	ptp->current_time = bnxt_refclk_read(bp, NULL);
+	spin_unlock_bh(&ptp->ptp_lock);
+}
+
 static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
 			     struct timespec64 *ts,
 			     struct ptp_system_timestamp *sts)
@@ -257,6 +269,32 @@ static u64 bnxt_cc_read(const struct cyclecounter *cc)
 	return bnxt_refclk_read(ptp->bp, NULL);
 }
 
+static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
+{
+	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+						ptp_info);
+	struct bnxt *bp = ptp->bp;
+
+	bnxt_ptp_get_current_time(bp);
+	return HZ;
+}
+
+void bnxt_ptp_start(struct bnxt *bp)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+	if (!ptp)
+		return;
+
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		spin_lock_bh(&ptp->ptp_lock);
+		ptp->current_time = bnxt_refclk_read(bp, NULL);
+		WRITE_ONCE(ptp->old_time, ptp->current_time);
+		spin_unlock_bh(&ptp->ptp_lock);
+		ptp_schedule_worker(ptp->ptp_clock, 0);
+	}
+}
+
 static const struct ptp_clock_info bnxt_ptp_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "bnxt clock",
@@ -268,6 +306,7 @@ static const struct ptp_clock_info bnxt_ptp_caps = {
 	.pps		= 0,
 	.adjfreq	= bnxt_ptp_adjfreq,
 	.adjtime	= bnxt_ptp_adjtime,
+	.do_aux_work	= bnxt_ptp_ts_aux_work,
 	.gettimex64	= bnxt_ptp_gettimex,
 	.settime64	= bnxt_ptp_settime,
 	.enable		= bnxt_ptp_enable,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 93a9921a8b46..61a67055c812 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -60,6 +60,7 @@ struct bnxt_ptp_cfg {
 
 int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
+void bnxt_ptp_start(struct bnxt *bp);
 int bnxt_ptp_init(struct bnxt *bp);
 void bnxt_ptp_clear(struct bnxt *bp);
 #endif

From 7f5515d19cd7aa02a866fd86622a022f12e06f0f Mon Sep 17 00:00:00 2001
From: Pavan Chebbi <pavan.chebbi@broadcom.com>
Date: Sun, 27 Jun 2021 13:19:48 -0400
Subject: [PATCH 3023/3292] bnxt_en: Get the RX packet timestamp

If the RX packet is timestamped by the hardware, the RX completion
record will contain the lower 32-bit of the timestamp.  This needs
to be combined with the upper 16-bit of the periodic timestamp that
we get from the timer.  The previous snapshot in ptp->old_timer is
used to make sure that the snapshot is not ahead of the RX timestamp
and we adjust for wrap-around if needed.

v2: Make ptp->old_time read access safe on 32-bit CPUs.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 23 +++++++++++++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  3 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 16 +++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 13 +++++++++++
 4 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 23eddde7bf12..5132f07a5f43 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1709,9 +1709,9 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	u8 *data_ptr, agg_bufs, cmp_type;
 	dma_addr_t dma_addr;
 	struct sk_buff *skb;
+	u32 flags, misc;
 	void *data;
 	int rc = 0;
-	u32 misc;
 
 	rxcmp = (struct rx_cmp *)
 			&cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
@@ -1809,7 +1809,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		goto next_rx_no_len;
 	}
 
-	len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT;
+	flags = le32_to_cpu(rxcmp->rx_cmp_len_flags_type);
+	len = flags >> RX_CMP_LEN_SHIFT;
 	dma_addr = rx_buf->mapping;
 
 	if (bnxt_rx_xdp(bp, rxr, cons, data, &data_ptr, &len, event)) {
@@ -1886,6 +1887,24 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		}
 	}
 
+	if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) ==
+		     RX_CMP_FLAGS_ITYPE_PTP_W_TS)) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp);
+			u64 ns, ts;
+
+			if (!bnxt_get_rx_ts_p5(bp, &ts, cmpl_ts)) {
+				struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+				spin_lock_bh(&ptp->ptp_lock);
+				ns = timecounter_cyc2time(&ptp->tc, ts);
+				spin_unlock_bh(&ptp->ptp_lock);
+				memset(skb_hwtstamps(skb), 0,
+				       sizeof(*skb_hwtstamps(skb)));
+				skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns);
+			}
+		}
+	}
 	bnxt_deliver_skb(bp, bnapi, skb);
 	rc = 1;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 696163559b64..94a612e8cd42 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -159,6 +159,7 @@ struct rx_cmp {
 	#define RX_CMP_FLAGS_RSS_VALID				(1 << 10)
 	#define RX_CMP_FLAGS_UNUSED				(1 << 11)
 	 #define RX_CMP_FLAGS_ITYPES_SHIFT			 12
+	 #define RX_CMP_FLAGS_ITYPES_MASK			 0xf000
 	 #define RX_CMP_FLAGS_ITYPE_UNKNOWN			 (0 << 12)
 	 #define RX_CMP_FLAGS_ITYPE_IP				 (1 << 12)
 	 #define RX_CMP_FLAGS_ITYPE_TCP				 (2 << 12)
@@ -240,7 +241,7 @@ struct rx_cmp_ext {
 	#define RX_CMPL_CFA_CODE_MASK				(0xffff << 16)
 	 #define RX_CMPL_CFA_CODE_SFT				 16
 
-	__le32 rx_cmp_unused3;
+	__le32 rx_cmp_timestamp;
 };
 
 #define RX_CMP_L2_ERRORS						\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index b0563c7761ff..cea7220f3d1b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -279,6 +279,22 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
 	return HZ;
 }
 
+int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	u64 time;
+
+	if (!ptp)
+		return -ENODEV;
+
+	BNXT_READ_TIME64(ptp, time, ptp->old_time);
+	*ts = (time & BNXT_HI_TIMER_MASK) | pkt_ts;
+	if (pkt_ts < (time & BNXT_LO_TIMER_MASK))
+		*ts += BNXT_LO_TIMER_MASK + 1;
+
+	return 0;
+}
+
 void bnxt_ptp_start(struct bnxt *bp)
 {
 	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 61a67055c812..4f2c62f5a78e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -58,8 +58,21 @@ struct bnxt_ptp_cfg {
 	u32			refclk_mapped_regs[2];
 };
 
+#if BITS_PER_LONG == 32
+#define BNXT_READ_TIME64(ptp, dst, src)		\
+do {						\
+	spin_lock_bh(&(ptp)->ptp_lock);		\
+	(dst) = (src);				\
+	spin_unlock_bh(&(ptp)->ptp_lock);	\
+} while (0)
+#else
+#define BNXT_READ_TIME64(ptp, dst, src)		\
+	((dst) = READ_ONCE(src))
+#endif
+
 int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
+int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
 void bnxt_ptp_start(struct bnxt *bp);
 int bnxt_ptp_init(struct bnxt *bp);
 void bnxt_ptp_clear(struct bnxt *bp);

From 83bb623c968e7351aee5111547693f95f330dc5a Mon Sep 17 00:00:00 2001
From: Pavan Chebbi <pavan.chebbi@broadcom.com>
Date: Sun, 27 Jun 2021 13:19:49 -0400
Subject: [PATCH 3024/3292] bnxt_en: Transmit and retrieve packet timestamps

Setup the TXBD to enable TX timestamp if requested.  At TX packet DMA
completion, if we requested TX timestamp on that packet, we defer to
.do_aux_work() to obtain the TX timestamp from the firmware before we
free the TX SKB.

v2: Use .do_aux_work() to get the TX timestamp from firmware.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 38 +++++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  2 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 94 +++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h |  2 +
 4 files changed, 131 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5132f07a5f43..e198e1426551 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -421,12 +421,25 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
 	}
 
-	if (unlikely(skb->no_fcs)) {
-		lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
-		goto normal_tx;
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+		if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb) &&
+		    atomic_dec_if_positive(&ptp->tx_avail) >= 0) {
+			if (!bnxt_ptp_parse(skb, &ptp->tx_seqid)) {
+				lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP);
+				skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			} else {
+				atomic_inc(&bp->ptp_cfg->tx_avail);
+			}
+		}
 	}
 
-	if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
+	if (unlikely(skb->no_fcs))
+		lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
+
+	if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh &&
+	    !lflags) {
 		struct tx_push_buffer *tx_push_buf = txr->tx_push;
 		struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
 		struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
@@ -593,6 +606,8 @@ normal_tx:
 
 	netdev_tx_sent_queue(txq, skb->len);
 
+	skb_tx_timestamp(skb);
+
 	/* Sync BD data before updating doorbell */
 	wmb();
 
@@ -622,6 +637,9 @@ tx_done:
 	return NETDEV_TX_OK;
 
 tx_dma_error:
+	if (BNXT_TX_PTP_IS_SET(lflags))
+		atomic_inc(&bp->ptp_cfg->tx_avail);
+
 	last_frag = i;
 
 	/* start back at beginning and unmap skb */
@@ -656,6 +674,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 
 	for (i = 0; i < nr_pkts; i++) {
 		struct bnxt_sw_tx_bd *tx_buf;
+		bool compl_deferred = false;
 		struct sk_buff *skb;
 		int j, last;
 
@@ -682,12 +701,21 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
 				skb_frag_size(&skb_shinfo(skb)->frags[j]),
 				PCI_DMA_TODEVICE);
 		}
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+			if (bp->flags & BNXT_FLAG_CHIP_P5) {
+				if (!bnxt_get_tx_ts_p5(bp, skb))
+					compl_deferred = true;
+				else
+					atomic_inc(&bp->ptp_cfg->tx_avail);
+			}
+		}
 
 next_tx_int:
 		cons = NEXT_TX(cons);
 
 		tx_bytes += skb->len;
-		dev_kfree_skb_any(skb);
+		if (!compl_deferred)
+			dev_kfree_skb_any(skb);
 	}
 
 	netdev_tx_completed_queue(txq, nr_pkts, tx_bytes);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 94a612e8cd42..bcf8d00b8c80 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -89,6 +89,8 @@ struct tx_bd_ext {
 	#define TX_BD_CFA_META_KEY_VLAN                         (1 << 28)
 };
 
+#define BNXT_TX_PTP_IS_SET(lflags) ((lflags) & cpu_to_le32(TX_BD_FLAGS_STAMP))
+
 struct rx_bd {
 	__le32 rx_bd_len_flags_type;
 	#define RX_BD_TYPE					(0x3f << 0)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index cea7220f3d1b..f698b6bd4ff8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -15,10 +15,32 @@
 #include <linux/net_tstamp.h>
 #include <linux/timecounter.h>
 #include <linux/timekeeping.h>
+#include <linux/ptp_classify.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_ptp.h"
 
+int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id)
+{
+	unsigned int ptp_class;
+	struct ptp_header *hdr;
+
+	ptp_class = ptp_classify_raw(skb);
+
+	switch (ptp_class & PTP_CLASS_VMASK) {
+	case PTP_CLASS_V1:
+	case PTP_CLASS_V2:
+		hdr = ptp_parse_header(skb, ptp_class);
+		if (!hdr)
+			return -EINVAL;
+
+		*seq_id	 = ntohs(hdr->sequence_id);
+		return 0;
+	default:
+		return -ERANGE;
+	}
+}
+
 static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
 			    const struct timespec64 *ts)
 {
@@ -57,6 +79,28 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp)
 	spin_unlock_bh(&ptp->ptp_lock);
 }
 
+static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts)
+{
+	struct hwrm_port_ts_query_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_port_ts_query_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_TS_QUERY, -1, -1);
+	req.flags = cpu_to_le32(flags);
+	if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) ==
+	    PORT_TS_QUERY_REQ_FLAGS_PATH_TX) {
+		req.enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES);
+		req.ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid);
+		req.ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT);
+	}
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		*ts = le64_to_cpu(resp->ptp_msg_ts);
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
 			     struct timespec64 *ts,
 			     struct ptp_system_timestamp *sts)
@@ -269,16 +313,62 @@ static u64 bnxt_cc_read(const struct cyclecounter *cc)
 	return bnxt_refclk_read(ptp->bp, NULL);
 }
 
+static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	struct skb_shared_hwtstamps timestamp;
+	u64 ts = 0, ns = 0;
+	int rc;
+
+	rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts);
+	if (!rc) {
+		memset(&timestamp, 0, sizeof(timestamp));
+		spin_lock_bh(&ptp->ptp_lock);
+		ns = timecounter_cyc2time(&ptp->tc, ts);
+		spin_unlock_bh(&ptp->ptp_lock);
+		timestamp.hwtstamp = ns_to_ktime(ns);
+		skb_tstamp_tx(ptp->tx_skb, &timestamp);
+	} else {
+		netdev_err(bp->dev, "TS query for TX timer failed rc = %x\n",
+			   rc);
+	}
+
+	dev_kfree_skb_any(ptp->tx_skb);
+	ptp->tx_skb = NULL;
+	atomic_inc(&ptp->tx_avail);
+}
+
 static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
 {
 	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
 						ptp_info);
+	unsigned long now = jiffies;
 	struct bnxt *bp = ptp->bp;
 
+	if (ptp->tx_skb)
+		bnxt_stamp_tx_skb(bp, ptp->tx_skb);
+
+	if (!time_after_eq(now, ptp->next_period))
+		return ptp->next_period - now;
+
 	bnxt_ptp_get_current_time(bp);
+	ptp->next_period = now + HZ;
 	return HZ;
 }
 
+int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb)
+{
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+	if (ptp->tx_skb) {
+		netdev_err(bp->dev, "deferring skb:one SKB is still outstanding\n");
+		return -EBUSY;
+	}
+	ptp->tx_skb = skb;
+	ptp_schedule_worker(ptp->ptp_clock, 0);
+	return 0;
+}
+
 int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts)
 {
 	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
@@ -375,5 +465,9 @@ void bnxt_ptp_clear(struct bnxt *bp)
 		ptp_clock_unregister(ptp->ptp_clock);
 
 	ptp->ptp_clock = NULL;
+	if (ptp->tx_skb) {
+		dev_kfree_skb_any(ptp->tx_skb);
+		ptp->tx_skb = NULL;
+	}
 	bnxt_unmap_ptp_regs(bp);
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
index 4f2c62f5a78e..6b6245750e20 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -70,8 +70,10 @@ do {						\
 	((dst) = READ_ONCE(src))
 #endif
 
+int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id);
 int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
+int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
 int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts);
 void bnxt_ptp_start(struct bnxt *bp);
 int bnxt_ptp_init(struct bnxt *bp);

From 93cb62d98e9c3d8c94cc09a15b9ab1faf342c392 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 27 Jun 2021 13:19:50 -0400
Subject: [PATCH 3025/3292] bnxt_en: Enable hardware PTP support

Call bnxt_ptp_init() to initialize and register with the clock driver
to enable PTP support.  Call bnxt_ptp_free() to unregister and clean
up during shutdown.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e198e1426551..8f185a4883d2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -12660,6 +12660,8 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 
 	if (BNXT_PF(bp))
 		devlink_port_type_clear(&bp->dl_port);
+
+	bnxt_ptp_clear(bp);
 	pci_disable_pcie_error_reporting(pdev);
 	unregister_netdev(dev);
 	clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
@@ -13243,6 +13245,11 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 				   rc);
 	}
 
+	if (bnxt_ptp_init(bp)) {
+		netdev_warn(dev, "PTP initialization failed.\n");
+		kfree(bp->ptp_cfg);
+		bp->ptp_cfg = NULL;
+	}
 	bnxt_inv_fw_health_reg(bp);
 	bnxt_dl_register(bp);
 

From d2343cb8d154fe20c4499711bb3a9af2095b2b4b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 28 Jun 2021 21:55:16 +0200
Subject: [PATCH 3026/3292] sched/core: Disable CONFIG_SCHED_CORE by default

This option at minimum adds extra code to the scheduler - even if
it's default unused - and most users wouldn't want it.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/Kconfig.preempt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bd7c4147b9a8..5876e30c5740 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -102,7 +102,6 @@ config PREEMPT_DYNAMIC
 
 config SCHED_CORE
 	bool "Core Scheduling for SMT"
-	default y
 	depends on SCHED_SMT
 	help
 	  This option permits Core Scheduling, a means of coordinated task
@@ -115,7 +114,8 @@ config SCHED_CORE
 	   - mitigation of some (not all) SMT side channels;
 	   - limiting SMT interference to improve determinism and/or performance.
 
-	  SCHED_CORE is default enabled when SCHED_SMT is enabled -- when
-	  unused there should be no impact on performance.
+	  SCHED_CORE is default disabled. When it is enabled and unused,
+	  which is the likely usage by Linux distributions, there should
+	  be no measurable impact on performance.
 
 

From a78cae2476812cecaa4a33d0086bbb53986906bc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 25 Jun 2021 15:16:12 -0700
Subject: [PATCH 3027/3292] xdp: Move the rxq_info.mem clearing to
 unreg_mem_model()

xdp_rxq_info_unreg() implicitly calls xdp_rxq_info_unreg_mem_model().
This may well be confusing to the driver authors, and lead to double free
if they call xdp_rxq_info_unreg_mem_model() before xdp_rxq_info_unreg()
(when mem model type == MEM_TYPE_PAGE_POOL).

In fact error path of mvpp2_rxq_init() seems to currently do exactly that.

The double free will result in refcount underflow in page_pool_destroy().
Make the interface a little more programmer friendly by clearing type and
id so that xdp_rxq_info_unreg_mem_model() can be called multiple times.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210625221612.2637086-1-kuba@kernel.org
---
 net/core/xdp.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/core/xdp.c b/net/core/xdp.c
index 725d20f1b100..cc92ccb38432 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -113,8 +113,13 @@ static void mem_allocator_disconnect(void *allocator)
 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
 {
 	struct xdp_mem_allocator *xa;
+	int type = xdp_rxq->mem.type;
 	int id = xdp_rxq->mem.id;
 
+	/* Reset mem info to defaults */
+	xdp_rxq->mem.id = 0;
+	xdp_rxq->mem.type = 0;
+
 	if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
 		WARN(1, "Missing register, driver bug");
 		return;
@@ -123,7 +128,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
 	if (id == 0)
 		return;
 
-	if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
+	if (type == MEM_TYPE_PAGE_POOL) {
 		rcu_read_lock();
 		xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
 		page_pool_destroy(xa->page_pool);
@@ -144,10 +149,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 
 	xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
 	xdp_rxq->dev = NULL;
-
-	/* Reset mem info to defaults */
-	xdp_rxq->mem.id = 0;
-	xdp_rxq->mem.type = 0;
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
 

From e887b2df62513505ac6f6db2cb59ee6234ab042b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:22 +0300
Subject: [PATCH 3028/3292] net: bridge: include the is_local bit in
 br_fdb_replay

Since commit 2c4eca3ef716 ("net: bridge: switchdev: include local flag
in FDB notifications"), the bridge emits SWITCHDEV_FDB_ADD_TO_DEVICE
events with the is_local flag populated (but we ignore it nonetheless).

We would like DSA to start treating this bit, but it is still not
populated by the replay helper, so add it there too.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 698b79747d32..b8d3ddfe5853 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -737,6 +737,7 @@ static int br_fdb_replay_one(struct notifier_block *nb,
 	item.vid = fdb->key.vlan_id;
 	item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
 	item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
+	item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
 	item.info.dev = dev;
 
 	err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);

From 97558e880f63f372f72cf7cd24dfc4befac13c28 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:23 +0300
Subject: [PATCH 3029/3292] net: ocelot: delete call to br_fdb_replay

Not using this driver, I did not realize it doesn't react to
SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE notifications, but it implements just
the bridge bypass operations (.ndo_fdb_{add,del}). So the call to
br_fdb_replay just produces notifications that are ignored, delete it
for now.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_net.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index aad33d22c33f..4fc74ee4aaab 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -1165,10 +1165,6 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_fdb_replay(bridge_dev, brport_dev, &ocelot_switchdev_nb);
-	if (err)
-		return err;
-
 	err = br_vlan_replay(bridge_dev, brport_dev,
 			     &ocelot_switchdev_blocking_nb, extack);
 	if (err && err != -EOPNOTSUPP)

From 69bfac968a06aab5927160f8736485f85c3e8ee8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:24 +0300
Subject: [PATCH 3030/3292] net: switchdev: add a context void pointer to
 struct switchdev_notifier_info

In the case where the driver asks for a replay of a certain type of
event (port object or attribute) for a bridge port that is a LAG, it may
do so because this port has just joined the LAG.

But there might already be other switchdev ports in that LAG, and it is
preferable that those preexisting switchdev ports do not act upon the
replayed event.

The solution is to add a context to switchdev events, which is NULL most
of the time (when the bridge layer initiates the call) but which can be
set to a value controlled by the switchdev driver when a replay is
requested. The driver can then check the context to figure out if all
ports within the LAG should act upon the switchdev event, or just the
ones that match the context.

We have to modify all switchdev_handle_* helper functions as well as the
prototypes in the drivers that use these helpers too, because these
helpers hide the underlying struct switchdev_notifier_info from us and
there is no way to retrieve the context otherwise.

The context structure will be populated and used in later patches.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/freescale/dpaa2/dpaa2-switch.c   |  2 +-
 .../marvell/prestera/prestera_switchdev.c     |  6 ++---
 .../mellanox/mlx5/core/en/rep/bridge.c        |  3 +++
 .../mellanox/mlxsw/spectrum_switchdev.c       |  6 ++---
 .../microchip/sparx5/sparx5_switchdev.c       |  2 +-
 drivers/net/ethernet/mscc/ocelot_net.c        |  6 ++---
 drivers/net/ethernet/ti/am65-cpsw-switchdev.c |  6 ++---
 drivers/net/ethernet/ti/cpsw_switchdev.c      |  6 ++---
 include/net/switchdev.h                       | 13 +++++-----
 net/dsa/slave.c                               |  6 ++---
 net/switchdev/switchdev.c                     | 25 +++++++++++--------
 11 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 05de37c3b64c..f3d12d0714fb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1625,7 +1625,7 @@ static int dpaa2_switch_port_bridge_flags(struct net_device *netdev,
 	return 0;
 }
 
-static int dpaa2_switch_port_attr_set(struct net_device *netdev,
+static int dpaa2_switch_port_attr_set(struct net_device *netdev, const void *ctx,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack)
 {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 74b81b4fbb97..0b3e8f2db294 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -708,7 +708,7 @@ err_port_stp_set:
 	return err;
 }
 
-static int prestera_port_obj_attr_set(struct net_device *dev,
+static int prestera_port_obj_attr_set(struct net_device *dev, const void *ctx,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack)
 {
@@ -1040,7 +1040,7 @@ static int prestera_port_vlans_add(struct prestera_port *port,
 					     flag_pvid, extack);
 }
 
-static int prestera_port_obj_add(struct net_device *dev,
+static int prestera_port_obj_add(struct net_device *dev, const void *ctx,
 				 const struct switchdev_obj *obj,
 				 struct netlink_ext_ack *extack)
 {
@@ -1078,7 +1078,7 @@ static int prestera_port_vlans_del(struct prestera_port *port,
 	return 0;
 }
 
-static int prestera_port_obj_del(struct net_device *dev,
+static int prestera_port_obj_del(struct net_device *dev, const void *ctx,
 				 const struct switchdev_obj *obj)
 {
 	struct prestera_port *port = netdev_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index 7f5efc1b4392..3c0032c9647c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -76,6 +76,7 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
 }
 
 static int mlx5_esw_bridge_port_obj_add(struct net_device *dev,
+					const void *ctx,
 					const struct switchdev_obj *obj,
 					struct netlink_ext_ack *extack)
 {
@@ -107,6 +108,7 @@ static int mlx5_esw_bridge_port_obj_add(struct net_device *dev,
 }
 
 static int mlx5_esw_bridge_port_obj_del(struct net_device *dev,
+					const void *ctx,
 					const struct switchdev_obj *obj)
 {
 	const struct switchdev_obj_port_vlan *vlan;
@@ -136,6 +138,7 @@ static int mlx5_esw_bridge_port_obj_del(struct net_device *dev,
 }
 
 static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
+					     const void *ctx,
 					     const struct switchdev_attr *attr,
 					     struct netlink_ext_ack *extack)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 0cfba2986841..c5ef9aa64efe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -898,7 +898,7 @@ mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 }
 
-static int mlxsw_sp_port_attr_set(struct net_device *dev,
+static int mlxsw_sp_port_attr_set(struct net_device *dev, const void *ctx,
 				  const struct switchdev_attr *attr,
 				  struct netlink_ext_ack *extack)
 {
@@ -1766,7 +1766,7 @@ mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
-static int mlxsw_sp_port_obj_add(struct net_device *dev,
+static int mlxsw_sp_port_obj_add(struct net_device *dev, const void *ctx,
 				 const struct switchdev_obj *obj,
 				 struct netlink_ext_ack *extack)
 {
@@ -1916,7 +1916,7 @@ mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
-static int mlxsw_sp_port_obj_del(struct net_device *dev,
+static int mlxsw_sp_port_obj_del(struct net_device *dev, const void *ctx,
 				 const struct switchdev_obj *obj)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
index 19c7cb795b4b..246eba711f15 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
@@ -65,7 +65,7 @@ static void sparx5_port_attr_ageing_set(struct sparx5_port *port,
 	sparx5_set_ageing(port->sparx5, ageing_time);
 }
 
-static int sparx5_port_attr_set(struct net_device *dev,
+static int sparx5_port_attr_set(struct net_device *dev, const void *ctx,
 				const struct switchdev_attr *attr,
 				struct netlink_ext_ack *extack)
 {
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 4fc74ee4aaab..456541640feb 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -939,7 +939,7 @@ static void ocelot_port_attr_mc_set(struct ocelot *ocelot, int port, bool mc)
 		       ANA_PORT_CPU_FWD_CFG, port);
 }
 
-static int ocelot_port_attr_set(struct net_device *dev,
+static int ocelot_port_attr_set(struct net_device *dev, const void *ctx,
 				const struct switchdev_attr *attr,
 				struct netlink_ext_ack *extack)
 {
@@ -1058,7 +1058,7 @@ ocelot_port_obj_mrp_del_ring_role(struct net_device *dev,
 	return ocelot_mrp_del_ring_role(ocelot, port, mrp);
 }
 
-static int ocelot_port_obj_add(struct net_device *dev,
+static int ocelot_port_obj_add(struct net_device *dev, const void *ctx,
 			       const struct switchdev_obj *obj,
 			       struct netlink_ext_ack *extack)
 {
@@ -1086,7 +1086,7 @@ static int ocelot_port_obj_add(struct net_device *dev,
 	return ret;
 }
 
-static int ocelot_port_obj_del(struct net_device *dev,
+static int ocelot_port_obj_del(struct net_device *dev, const void *ctx,
 			       const struct switchdev_obj *obj)
 {
 	int ret = 0;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
index 23cfb91e9c4d..9c29b363e9ae 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
@@ -84,7 +84,7 @@ static int am65_cpsw_port_attr_br_flags_pre_set(struct net_device *netdev,
 	return 0;
 }
 
-static int am65_cpsw_port_attr_set(struct net_device *ndev,
+static int am65_cpsw_port_attr_set(struct net_device *ndev, const void *ctx,
 				   const struct switchdev_attr *attr,
 				   struct netlink_ext_ack *extack)
 {
@@ -302,7 +302,7 @@ static int am65_cpsw_port_mdb_del(struct am65_cpsw_port *port,
 	return 0;
 }
 
-static int am65_cpsw_port_obj_add(struct net_device *ndev,
+static int am65_cpsw_port_obj_add(struct net_device *ndev, const void *ctx,
 				  const struct switchdev_obj *obj,
 				  struct netlink_ext_ack *extack)
 {
@@ -329,7 +329,7 @@ static int am65_cpsw_port_obj_add(struct net_device *ndev,
 	return err;
 }
 
-static int am65_cpsw_port_obj_del(struct net_device *ndev,
+static int am65_cpsw_port_obj_del(struct net_device *ndev, const void *ctx,
 				  const struct switchdev_obj *obj)
 {
 	struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c
index 05a64fb7a04f..f7fb6e17dadd 100644
--- a/drivers/net/ethernet/ti/cpsw_switchdev.c
+++ b/drivers/net/ethernet/ti/cpsw_switchdev.c
@@ -86,7 +86,7 @@ static int cpsw_port_attr_br_flags_pre_set(struct net_device *netdev,
 	return 0;
 }
 
-static int cpsw_port_attr_set(struct net_device *ndev,
+static int cpsw_port_attr_set(struct net_device *ndev, const void *ctx,
 			      const struct switchdev_attr *attr,
 			      struct netlink_ext_ack *extack)
 {
@@ -310,7 +310,7 @@ static int cpsw_port_mdb_del(struct cpsw_priv *priv,
 	return err;
 }
 
-static int cpsw_port_obj_add(struct net_device *ndev,
+static int cpsw_port_obj_add(struct net_device *ndev, const void *ctx,
 			     const struct switchdev_obj *obj,
 			     struct netlink_ext_ack *extack)
 {
@@ -338,7 +338,7 @@ static int cpsw_port_obj_add(struct net_device *ndev,
 	return err;
 }
 
-static int cpsw_port_obj_del(struct net_device *ndev,
+static int cpsw_port_obj_del(struct net_device *ndev, const void *ctx,
 			     const struct switchdev_obj *obj)
 {
 	struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index f1a5a9a3634d..e4cac9218ce1 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -202,6 +202,7 @@ enum switchdev_notifier_type {
 struct switchdev_notifier_info {
 	struct net_device *dev;
 	struct netlink_ext_ack *extack;
+	const void *ctx;
 };
 
 struct switchdev_notifier_fdb_info {
@@ -268,19 +269,19 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 int switchdev_handle_port_obj_add(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*add_cb)(struct net_device *dev,
+			int (*add_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj,
 				      struct netlink_ext_ack *extack));
 int switchdev_handle_port_obj_del(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*del_cb)(struct net_device *dev,
+			int (*del_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj));
 
 int switchdev_handle_port_attr_set(struct net_device *dev,
 			struct switchdev_notifier_port_attr_info *port_attr_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*set_cb)(struct net_device *dev,
+			int (*set_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack));
 #else
@@ -352,7 +353,7 @@ static inline int
 switchdev_handle_port_obj_add(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*add_cb)(struct net_device *dev,
+			int (*add_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj,
 				      struct netlink_ext_ack *extack))
 {
@@ -363,7 +364,7 @@ static inline int
 switchdev_handle_port_obj_del(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*del_cb)(struct net_device *dev,
+			int (*del_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj))
 {
 	return 0;
@@ -373,7 +374,7 @@ static inline int
 switchdev_handle_port_attr_set(struct net_device *dev,
 			struct switchdev_notifier_port_attr_info *port_attr_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*set_cb)(struct net_device *dev,
+			int (*set_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack))
 {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 5e668e529575..3692259a025f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -271,7 +271,7 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return phylink_mii_ioctl(p->dp->pl, ifr, cmd);
 }
 
-static int dsa_slave_port_attr_set(struct net_device *dev,
+static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
 				   const struct switchdev_attr *attr,
 				   struct netlink_ext_ack *extack)
 {
@@ -394,7 +394,7 @@ static int dsa_slave_vlan_add(struct net_device *dev,
 	return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid);
 }
 
-static int dsa_slave_port_obj_add(struct net_device *dev,
+static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
 				  const struct switchdev_obj *obj,
 				  struct netlink_ext_ack *extack)
 {
@@ -469,7 +469,7 @@ static int dsa_slave_vlan_del(struct net_device *dev,
 	return 0;
 }
 
-static int dsa_slave_port_obj_del(struct net_device *dev,
+static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
 				  const struct switchdev_obj *obj)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 89a36db47ab4..070698dd19bc 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -381,19 +381,20 @@ EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
 static int __switchdev_handle_port_obj_add(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*add_cb)(struct net_device *dev,
+			int (*add_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj,
 				      struct netlink_ext_ack *extack))
 {
+	struct switchdev_notifier_info *info = &port_obj_info->info;
 	struct netlink_ext_ack *extack;
 	struct net_device *lower_dev;
 	struct list_head *iter;
 	int err = -EOPNOTSUPP;
 
-	extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+	extack = switchdev_notifier_info_to_extack(info);
 
 	if (check_cb(dev)) {
-		err = add_cb(dev, port_obj_info->obj, extack);
+		err = add_cb(dev, info->ctx, port_obj_info->obj, extack);
 		if (err != -EOPNOTSUPP)
 			port_obj_info->handled = true;
 		return err;
@@ -422,7 +423,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev,
 int switchdev_handle_port_obj_add(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*add_cb)(struct net_device *dev,
+			int (*add_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj,
 				      struct netlink_ext_ack *extack))
 {
@@ -439,15 +440,16 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
 static int __switchdev_handle_port_obj_del(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*del_cb)(struct net_device *dev,
+			int (*del_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj))
 {
+	struct switchdev_notifier_info *info = &port_obj_info->info;
 	struct net_device *lower_dev;
 	struct list_head *iter;
 	int err = -EOPNOTSUPP;
 
 	if (check_cb(dev)) {
-		err = del_cb(dev, port_obj_info->obj);
+		err = del_cb(dev, info->ctx, port_obj_info->obj);
 		if (err != -EOPNOTSUPP)
 			port_obj_info->handled = true;
 		return err;
@@ -476,7 +478,7 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev,
 int switchdev_handle_port_obj_del(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*del_cb)(struct net_device *dev,
+			int (*del_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_obj *obj))
 {
 	int err;
@@ -492,19 +494,20 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
 static int __switchdev_handle_port_attr_set(struct net_device *dev,
 			struct switchdev_notifier_port_attr_info *port_attr_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*set_cb)(struct net_device *dev,
+			int (*set_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack))
 {
+	struct switchdev_notifier_info *info = &port_attr_info->info;
 	struct netlink_ext_ack *extack;
 	struct net_device *lower_dev;
 	struct list_head *iter;
 	int err = -EOPNOTSUPP;
 
-	extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+	extack = switchdev_notifier_info_to_extack(info);
 
 	if (check_cb(dev)) {
-		err = set_cb(dev, port_attr_info->attr, extack);
+		err = set_cb(dev, info->ctx, port_attr_info->attr, extack);
 		if (err != -EOPNOTSUPP)
 			port_attr_info->handled = true;
 		return err;
@@ -533,7 +536,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev,
 int switchdev_handle_port_attr_set(struct net_device *dev,
 			struct switchdev_notifier_port_attr_info *port_attr_info,
 			bool (*check_cb)(const struct net_device *dev),
-			int (*set_cb)(struct net_device *dev,
+			int (*set_cb)(struct net_device *dev, const void *ctx,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack))
 {

From 0d2cfbd41c4a5a0ca5598d1874b1081138cd64c6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:25 +0300
Subject: [PATCH 3031/3292] net: bridge: ignore switchdev events for LAG ports
 which didn't request replay

There is a slight inconvenience in the switchdev replay helpers added
recently, and this is when:

ip link add br0 type bridge
ip link add bond0 type bond
ip link set bond0 master br0
bridge vlan add dev bond0 vid 100
ip link set swp0 master bond0
ip link set swp1 master bond0

Since the underlying driver (currently only DSA) asks for a replay of
VLANs when swp0 and swp1 join the LAG because it is bridged, what will
happen is that DSA will try to react twice on the VLAN event for swp0.
This is not really a huge problem right now, because most drivers accept
duplicates since the bridge itself does, but it will become a problem
when we add support for replaying switchdev object deletions.

Let's fix this by adding a blank void *ctx in the replay helpers, which
will be passed on by the bridge in the switchdev notifications. If the
context is NULL, everything is the same as before. But if the context is
populated with a valid pointer, the underlying switchdev driver
(currently DSA) can use the pointer to 'see through' the bridge port
(which in the example above is bond0) and 'know' that the event is only
for a particular physical port offloading that bridge port, and not for
all of them.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_net.c | 19 +++++++++++++++++--
 include/linux/if_bridge.h              | 14 ++++++++------
 net/bridge/br_fdb.c                    |  7 ++++---
 net/bridge/br_mdb.c                    |  8 +++++---
 net/bridge/br_vlan.c                   |  8 +++++---
 net/dsa/port.c                         |  6 +++---
 net/dsa/slave.c                        |  9 +++++++++
 7 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 456541640feb..166d851962d2 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -948,6 +948,9 @@ static int ocelot_port_attr_set(struct net_device *dev, const void *ctx,
 	int port = priv->chip_port;
 	int err = 0;
 
+	if (ctx && ctx != priv)
+		return 0;
+
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
 		ocelot_port_attr_stp_state_set(ocelot, port, attr->u.stp_state);
@@ -1062,8 +1065,12 @@ static int ocelot_port_obj_add(struct net_device *dev, const void *ctx,
 			       const struct switchdev_obj *obj,
 			       struct netlink_ext_ack *extack)
 {
+	struct ocelot_port_private *priv = netdev_priv(dev);
 	int ret = 0;
 
+	if (ctx && ctx != priv)
+		return 0;
+
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		ret = ocelot_port_obj_add_vlan(dev,
@@ -1089,8 +1096,12 @@ static int ocelot_port_obj_add(struct net_device *dev, const void *ctx,
 static int ocelot_port_obj_del(struct net_device *dev, const void *ctx,
 			       const struct switchdev_obj *obj)
 {
+	struct ocelot_port_private *priv = netdev_priv(dev);
 	int ret = 0;
 
+	if (ctx && ctx != priv)
+		return 0;
+
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		ret = ocelot_vlan_vid_del(dev,
@@ -1143,10 +1154,14 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
 				 struct net_device *bridge_dev,
 				 struct netlink_ext_ack *extack)
 {
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	struct ocelot_port_private *priv;
 	clock_t ageing_time;
 	u8 stp_state;
 	int err;
 
+	priv = container_of(ocelot_port, struct ocelot_port_private, port);
+
 	ocelot_inherit_brport_flags(ocelot, port, brport_dev);
 
 	stp_state = br_port_get_stp_state(brport_dev);
@@ -1160,12 +1175,12 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
 	ageing_time = br_get_ageing_time(bridge_dev);
 	ocelot_port_attr_ageing_set(ocelot, port, ageing_time);
 
-	err = br_mdb_replay(bridge_dev, brport_dev,
+	err = br_mdb_replay(bridge_dev, brport_dev, priv,
 			    &ocelot_switchdev_blocking_nb, extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_vlan_replay(bridge_dev, brport_dev,
+	err = br_vlan_replay(bridge_dev, brport_dev, priv,
 			     &ocelot_switchdev_blocking_nb, extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 12e9a32dbca0..57df761b6f4a 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -71,7 +71,8 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto);
 bool br_multicast_enabled(const struct net_device *dev);
 bool br_multicast_router(const struct net_device *dev);
 int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
-		  struct notifier_block *nb, struct netlink_ext_ack *extack);
+		  const void *ctx, struct notifier_block *nb,
+		  struct netlink_ext_ack *extack);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
 					     struct list_head *br_ip_list)
@@ -104,7 +105,7 @@ static inline bool br_multicast_router(const struct net_device *dev)
 	return false;
 }
 static inline int br_mdb_replay(struct net_device *br_dev,
-				struct net_device *dev,
+				struct net_device *dev, const void *ctx,
 				struct notifier_block *nb,
 				struct netlink_ext_ack *extack)
 {
@@ -120,7 +121,8 @@ int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
 		     struct bridge_vlan_info *p_vinfo);
 int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
-		   struct notifier_block *nb, struct netlink_ext_ack *extack);
+		   const void *ctx, struct notifier_block *nb,
+		   struct netlink_ext_ack *extack);
 #else
 static inline bool br_vlan_enabled(const struct net_device *dev)
 {
@@ -149,7 +151,7 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
 }
 
 static inline int br_vlan_replay(struct net_device *br_dev,
-				 struct net_device *dev,
+				 struct net_device *dev, const void *ctx,
 				 struct notifier_block *nb,
 				 struct netlink_ext_ack *extack)
 {
@@ -166,7 +168,7 @@ bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
 u8 br_port_get_stp_state(const struct net_device *dev);
 clock_t br_get_ageing_time(struct net_device *br_dev);
 int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
-		  struct notifier_block *nb);
+		  const void *ctx, struct notifier_block *nb);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -197,7 +199,7 @@ static inline clock_t br_get_ageing_time(struct net_device *br_dev)
 }
 
 static inline int br_fdb_replay(struct net_device *br_dev,
-				struct net_device *dev,
+				struct net_device *dev, const void *ctx,
 				struct notifier_block *nb)
 {
 	return -EOPNOTSUPP;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b8d3ddfe5853..9d164a518e38 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -728,7 +728,7 @@ static inline size_t fdb_nlmsg_size(void)
 
 static int br_fdb_replay_one(struct notifier_block *nb,
 			     struct net_bridge_fdb_entry *fdb,
-			     struct net_device *dev)
+			     struct net_device *dev, const void *ctx)
 {
 	struct switchdev_notifier_fdb_info item;
 	int err;
@@ -739,13 +739,14 @@ static int br_fdb_replay_one(struct notifier_block *nb,
 	item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
 	item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
 	item.info.dev = dev;
+	item.info.ctx = ctx;
 
 	err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
 	return notifier_to_errno(err);
 }
 
 int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
-		  struct notifier_block *nb)
+		  const void *ctx, struct notifier_block *nb)
 {
 	struct net_bridge_fdb_entry *fdb;
 	struct net_bridge *br;
@@ -766,7 +767,7 @@ int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
 		if (dst_dev != br_dev && dst_dev != dev)
 			continue;
 
-		err = br_fdb_replay_one(nb, fdb, dst_dev);
+		err = br_fdb_replay_one(nb, fdb, dst_dev, ctx);
 		if (err)
 			break;
 	}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 3f839a8cc9fb..8bc6afca5e8c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -568,12 +568,13 @@ static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
 
 static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
 			     struct switchdev_obj_port_mdb *mdb,
-			     struct netlink_ext_ack *extack)
+			     const void *ctx, struct netlink_ext_ack *extack)
 {
 	struct switchdev_notifier_port_obj_info obj_info = {
 		.info = {
 			.dev = dev,
 			.extack = extack,
+			.ctx = ctx,
 		},
 		.obj = &mdb->obj,
 	};
@@ -603,7 +604,8 @@ static int br_mdb_queue_one(struct list_head *mdb_list,
 }
 
 int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
-		  struct notifier_block *nb, struct netlink_ext_ack *extack)
+		  const void *ctx, struct notifier_block *nb,
+		  struct netlink_ext_ack *extack)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct switchdev_obj *obj, *tmp;
@@ -664,7 +666,7 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
 
 	list_for_each_entry(obj, &mdb_list, list) {
 		err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
-					extack);
+					ctx, extack);
 		if (err)
 			goto out_free_mdb;
 	}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8789a57af543..2bfa2a00e193 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1807,12 +1807,13 @@ out_kfree:
 static int br_vlan_replay_one(struct notifier_block *nb,
 			      struct net_device *dev,
 			      struct switchdev_obj_port_vlan *vlan,
-			      struct netlink_ext_ack *extack)
+			      const void *ctx, struct netlink_ext_ack *extack)
 {
 	struct switchdev_notifier_port_obj_info obj_info = {
 		.info = {
 			.dev = dev,
 			.extack = extack,
+			.ctx = ctx,
 		},
 		.obj = &vlan->obj,
 	};
@@ -1823,7 +1824,8 @@ static int br_vlan_replay_one(struct notifier_block *nb,
 }
 
 int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
-		   struct notifier_block *nb, struct netlink_ext_ack *extack)
+		   const void *ctx, struct notifier_block *nb,
+		   struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *v;
@@ -1868,7 +1870,7 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
 		if (!br_vlan_should_use(v))
 			continue;
 
-		err = br_vlan_replay_one(nb, dev, &vlan, extack);
+		err = br_vlan_replay_one(nb, dev, &vlan, ctx, extack);
 		if (err)
 			return err;
 	}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 5c93f1e1a03d..339781c98de1 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -194,17 +194,17 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_mdb_replay(br, brport_dev,
+	err = br_mdb_replay(br, brport_dev, dp,
 			    &dsa_slave_switchdev_blocking_notifier,
 			    extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_fdb_replay(br, brport_dev, &dsa_slave_switchdev_notifier);
+	err = br_fdb_replay(br, brport_dev, dp, &dsa_slave_switchdev_notifier);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_vlan_replay(br, brport_dev,
+	err = br_vlan_replay(br, brport_dev, dp,
 			     &dsa_slave_switchdev_blocking_notifier,
 			     extack);
 	if (err && err != -EOPNOTSUPP)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3692259a025f..2f0d0a6b1f9c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -278,6 +278,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int ret;
 
+	if (ctx && ctx != dp)
+		return 0;
+
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
 		if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
@@ -401,6 +404,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int err;
 
+	if (ctx && ctx != dp)
+		return 0;
+
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -475,6 +481,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int err;
 
+	if (ctx && ctx != dp)
+		return 0;
+
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))

From bdf123b455ce596aec6e410ec36fe3687b6a2140 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:26 +0300
Subject: [PATCH 3032/3292] net: bridge: constify variables in the replay
 helpers

Some of the arguments and local variables for the newly added switchdev
replay helpers can be const, so let's make them so.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 14 +++++++-------
 net/bridge/br_fdb.c       |  6 +++---
 net/bridge/br_mdb.c       |  8 ++++----
 net/bridge/br_stp.c       |  4 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 57df761b6f4a..6b54da2c65ba 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -104,8 +104,8 @@ static inline bool br_multicast_router(const struct net_device *dev)
 {
 	return false;
 }
-static inline int br_mdb_replay(struct net_device *br_dev,
-				struct net_device *dev, const void *ctx,
+static inline int br_mdb_replay(const struct net_device *br_dev,
+				const struct net_device *dev, const void *ctx,
 				struct notifier_block *nb,
 				struct netlink_ext_ack *extack)
 {
@@ -166,8 +166,8 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
 void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
 u8 br_port_get_stp_state(const struct net_device *dev);
-clock_t br_get_ageing_time(struct net_device *br_dev);
-int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+clock_t br_get_ageing_time(const struct net_device *br_dev);
+int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
 		  const void *ctx, struct notifier_block *nb);
 #else
 static inline struct net_device *
@@ -193,13 +193,13 @@ static inline u8 br_port_get_stp_state(const struct net_device *dev)
 	return BR_STATE_DISABLED;
 }
 
-static inline clock_t br_get_ageing_time(struct net_device *br_dev)
+static inline clock_t br_get_ageing_time(const struct net_device *br_dev)
 {
 	return 0;
 }
 
-static inline int br_fdb_replay(struct net_device *br_dev,
-				struct net_device *dev, const void *ctx,
+static inline int br_fdb_replay(const struct net_device *br_dev,
+				const struct net_device *dev, const void *ctx,
 				struct notifier_block *nb)
 {
 	return -EOPNOTSUPP;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9d164a518e38..2e777c8b0921 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -727,7 +727,7 @@ static inline size_t fdb_nlmsg_size(void)
 }
 
 static int br_fdb_replay_one(struct notifier_block *nb,
-			     struct net_bridge_fdb_entry *fdb,
+			     const struct net_bridge_fdb_entry *fdb,
 			     struct net_device *dev, const void *ctx)
 {
 	struct switchdev_notifier_fdb_info item;
@@ -745,7 +745,7 @@ static int br_fdb_replay_one(struct notifier_block *nb,
 	return notifier_to_errno(err);
 }
 
-int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
 		  const void *ctx, struct notifier_block *nb)
 {
 	struct net_bridge_fdb_entry *fdb;
@@ -760,7 +760,7 @@ int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
 	rcu_read_lock();
 
 	hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
-		struct net_bridge_port *dst = READ_ONCE(fdb->dst);
+		const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
 		struct net_device *dst_dev;
 
 		dst_dev = dst ? dst->dev : br->dev;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 8bc6afca5e8c..cebdbff17b54 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -567,7 +567,7 @@ static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
 }
 
 static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
-			     struct switchdev_obj_port_mdb *mdb,
+			     const struct switchdev_obj_port_mdb *mdb,
 			     const void *ctx, struct netlink_ext_ack *extack)
 {
 	struct switchdev_notifier_port_obj_info obj_info = {
@@ -607,7 +607,7 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
 		  const void *ctx, struct notifier_block *nb,
 		  struct netlink_ext_ack *extack)
 {
-	struct net_bridge_mdb_entry *mp;
+	const struct net_bridge_mdb_entry *mp;
 	struct switchdev_obj *obj, *tmp;
 	struct net_bridge *br;
 	LIST_HEAD(mdb_list);
@@ -634,8 +634,8 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
 	rcu_read_lock();
 
 	hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
-		struct net_bridge_port_group __rcu **pp;
-		struct net_bridge_port_group *p;
+		struct net_bridge_port_group __rcu * const *pp;
+		const struct net_bridge_port_group *p;
 
 		if (mp->host_joined) {
 			err = br_mdb_queue_one(&mdb_list,
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3dafb6143cff..1d80f34a139c 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -639,9 +639,9 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
 	return 0;
 }
 
-clock_t br_get_ageing_time(struct net_device *br_dev)
+clock_t br_get_ageing_time(const struct net_device *br_dev)
 {
-	struct net_bridge *br;
+	const struct net_bridge *br;
 
 	if (!netif_is_bridge_master(br_dev))
 		return 0;

From 7e8c18586daf7c1653c4b43a8119bc9662ed8fa6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:27 +0300
Subject: [PATCH 3033/3292] net: bridge: allow the switchdev replay functions
 to be called for deletion

When a switchdev port leaves a LAG that is a bridge port, the switchdev
objects and port attributes offloaded to that port are not removed:

ip link add br0 type bridge
ip link add bond0 type bond mode 802.3ad
ip link set swp0 master bond0
ip link set bond0 master br0
bridge vlan add dev bond0 vid 100
ip link set swp0 nomaster

VLAN 100 will remain installed on swp0 despite it going into standalone
mode, because as far as the bridge is concerned, nothing ever happened
to its bridge port.

Let's extend the bridge vlan, fdb and mdb replay functions to take a
'bool adding' argument, and make DSA and ocelot call the replay
functions with 'adding' as false from the switchdev unsync path, for the
switch port that leaves the bridge.

Note that this patch in itself does not salvage anything, because in the
current pull mode of operation, DSA still needs to call the replay
helpers with adding=false. This will be done in another patch.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_net.c |  4 ++--
 include/linux/if_bridge.h              | 12 ++++++------
 net/bridge/br_fdb.c                    | 15 +++++++++++----
 net/bridge/br_mdb.c                    | 15 +++++++++++----
 net/bridge/br_vlan.c                   | 15 +++++++++++----
 net/dsa/port.c                         | 13 ++++++-------
 6 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 166d851962d2..3e89e34f86d5 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -1175,12 +1175,12 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
 	ageing_time = br_get_ageing_time(bridge_dev);
 	ocelot_port_attr_ageing_set(ocelot, port, ageing_time);
 
-	err = br_mdb_replay(bridge_dev, brport_dev, priv,
+	err = br_mdb_replay(bridge_dev, brport_dev, priv, true,
 			    &ocelot_switchdev_blocking_nb, extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_vlan_replay(bridge_dev, brport_dev, priv,
+	err = br_vlan_replay(bridge_dev, brport_dev, priv, true,
 			     &ocelot_switchdev_blocking_nb, extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 6b54da2c65ba..b651c5e32a28 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -71,7 +71,7 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto);
 bool br_multicast_enabled(const struct net_device *dev);
 bool br_multicast_router(const struct net_device *dev);
 int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
-		  const void *ctx, struct notifier_block *nb,
+		  const void *ctx, bool adding, struct notifier_block *nb,
 		  struct netlink_ext_ack *extack);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
@@ -106,7 +106,7 @@ static inline bool br_multicast_router(const struct net_device *dev)
 }
 static inline int br_mdb_replay(const struct net_device *br_dev,
 				const struct net_device *dev, const void *ctx,
-				struct notifier_block *nb,
+				bool adding, struct notifier_block *nb,
 				struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
@@ -121,7 +121,7 @@ int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
 		     struct bridge_vlan_info *p_vinfo);
 int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
-		   const void *ctx, struct notifier_block *nb,
+		   const void *ctx, bool adding, struct notifier_block *nb,
 		   struct netlink_ext_ack *extack);
 #else
 static inline bool br_vlan_enabled(const struct net_device *dev)
@@ -152,7 +152,7 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
 
 static inline int br_vlan_replay(struct net_device *br_dev,
 				 struct net_device *dev, const void *ctx,
-				 struct notifier_block *nb,
+				 bool adding, struct notifier_block *nb,
 				 struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
@@ -168,7 +168,7 @@ bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
 u8 br_port_get_stp_state(const struct net_device *dev);
 clock_t br_get_ageing_time(const struct net_device *br_dev);
 int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
-		  const void *ctx, struct notifier_block *nb);
+		  const void *ctx, bool adding, struct notifier_block *nb);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -200,7 +200,7 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev)
 
 static inline int br_fdb_replay(const struct net_device *br_dev,
 				const struct net_device *dev, const void *ctx,
-				struct notifier_block *nb)
+				bool adding, struct notifier_block *nb)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 2e777c8b0921..16f9434fdb5d 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -728,7 +728,8 @@ static inline size_t fdb_nlmsg_size(void)
 
 static int br_fdb_replay_one(struct notifier_block *nb,
 			     const struct net_bridge_fdb_entry *fdb,
-			     struct net_device *dev, const void *ctx)
+			     struct net_device *dev, unsigned long action,
+			     const void *ctx)
 {
 	struct switchdev_notifier_fdb_info item;
 	int err;
@@ -741,15 +742,16 @@ static int br_fdb_replay_one(struct notifier_block *nb,
 	item.info.dev = dev;
 	item.info.ctx = ctx;
 
-	err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
+	err = nb->notifier_call(nb, action, &item);
 	return notifier_to_errno(err);
 }
 
 int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
-		  const void *ctx, struct notifier_block *nb)
+		  const void *ctx, bool adding, struct notifier_block *nb)
 {
 	struct net_bridge_fdb_entry *fdb;
 	struct net_bridge *br;
+	unsigned long action;
 	int err = 0;
 
 	if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
@@ -757,6 +759,11 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
 
 	br = netdev_priv(br_dev);
 
+	if (adding)
+		action = SWITCHDEV_FDB_ADD_TO_DEVICE;
+	else
+		action = SWITCHDEV_FDB_DEL_TO_DEVICE;
+
 	rcu_read_lock();
 
 	hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
@@ -767,7 +774,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
 		if (dst_dev != br_dev && dst_dev != dev)
 			continue;
 
-		err = br_fdb_replay_one(nb, fdb, dst_dev, ctx);
+		err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx);
 		if (err)
 			break;
 	}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index cebdbff17b54..17a720b4473f 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -568,7 +568,8 @@ static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
 
 static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
 			     const struct switchdev_obj_port_mdb *mdb,
-			     const void *ctx, struct netlink_ext_ack *extack)
+			     unsigned long action, const void *ctx,
+			     struct netlink_ext_ack *extack)
 {
 	struct switchdev_notifier_port_obj_info obj_info = {
 		.info = {
@@ -580,7 +581,7 @@ static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
 	};
 	int err;
 
-	err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+	err = nb->notifier_call(nb, action, &obj_info);
 	return notifier_to_errno(err);
 }
 
@@ -604,12 +605,13 @@ static int br_mdb_queue_one(struct list_head *mdb_list,
 }
 
 int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
-		  const void *ctx, struct notifier_block *nb,
+		  const void *ctx, bool adding, struct notifier_block *nb,
 		  struct netlink_ext_ack *extack)
 {
 	const struct net_bridge_mdb_entry *mp;
 	struct switchdev_obj *obj, *tmp;
 	struct net_bridge *br;
+	unsigned long action;
 	LIST_HEAD(mdb_list);
 	int err = 0;
 
@@ -664,9 +666,14 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
 
 	rcu_read_unlock();
 
+	if (adding)
+		action = SWITCHDEV_PORT_OBJ_ADD;
+	else
+		action = SWITCHDEV_PORT_OBJ_DEL;
+
 	list_for_each_entry(obj, &mdb_list, list) {
 		err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
-					ctx, extack);
+					action, ctx, extack);
 		if (err)
 			goto out_free_mdb;
 	}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 2bfa2a00e193..a08e9f193009 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1807,7 +1807,8 @@ out_kfree:
 static int br_vlan_replay_one(struct notifier_block *nb,
 			      struct net_device *dev,
 			      struct switchdev_obj_port_vlan *vlan,
-			      const void *ctx, struct netlink_ext_ack *extack)
+			      const void *ctx, unsigned long action,
+			      struct netlink_ext_ack *extack)
 {
 	struct switchdev_notifier_port_obj_info obj_info = {
 		.info = {
@@ -1819,18 +1820,19 @@ static int br_vlan_replay_one(struct notifier_block *nb,
 	};
 	int err;
 
-	err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+	err = nb->notifier_call(nb, action, &obj_info);
 	return notifier_to_errno(err);
 }
 
 int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
-		   const void *ctx, struct notifier_block *nb,
+		   const void *ctx, bool adding, struct notifier_block *nb,
 		   struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *v;
 	struct net_bridge_port *p;
 	struct net_bridge *br;
+	unsigned long action;
 	int err = 0;
 	u16 pvid;
 
@@ -1857,6 +1859,11 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
 	if (!vg)
 		return 0;
 
+	if (adding)
+		action = SWITCHDEV_PORT_OBJ_ADD;
+	else
+		action = SWITCHDEV_PORT_OBJ_DEL;
+
 	pvid = br_get_pvid(vg);
 
 	list_for_each_entry(v, &vg->vlan_list, vlist) {
@@ -1870,7 +1877,7 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
 		if (!br_vlan_should_use(v))
 			continue;
 
-		err = br_vlan_replay_one(nb, dev, &vlan, ctx, extack);
+		err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack);
 		if (err)
 			return err;
 	}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 339781c98de1..4e58d07ececd 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -194,19 +194,18 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_mdb_replay(br, brport_dev, dp,
-			    &dsa_slave_switchdev_blocking_notifier,
-			    extack);
+	err = br_mdb_replay(br, brport_dev, dp, true,
+			    &dsa_slave_switchdev_blocking_notifier, extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_fdb_replay(br, brport_dev, dp, &dsa_slave_switchdev_notifier);
+	err = br_fdb_replay(br, brport_dev, dp, true,
+			    &dsa_slave_switchdev_notifier);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
-	err = br_vlan_replay(br, brport_dev, dp,
-			     &dsa_slave_switchdev_blocking_notifier,
-			     extack);
+	err = br_vlan_replay(br, brport_dev, dp, true,
+			     &dsa_slave_switchdev_blocking_notifier, extack);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 

From 4ede74e73b5b540b2a20bb6d5ad4d69348ba51fc Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:28 +0300
Subject: [PATCH 3034/3292] net: dsa: refactor the prechangeupper sanity checks
 into a dedicated function

We need to add more logic to the DSA NETDEV_PRECHANGEUPPER event
handler, more exactly we need to request an unsync of switchdev objects.
In order to fit more code, refactor the existing logic into a helper.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2f0d0a6b1f9c..20d8466d78f2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2166,6 +2166,32 @@ dsa_slave_check_8021q_upper(struct net_device *dev,
 	return NOTIFY_DONE;
 }
 
+static int
+dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
+				      struct netdev_notifier_changeupper_info *info)
+{
+	struct dsa_switch *ds;
+	struct dsa_port *dp;
+	int err;
+
+	if (!dsa_slave_dev_check(dev))
+		return dsa_prevent_bridging_8021q_upper(dev, info);
+
+	dp = dsa_slave_to_port(dev);
+	ds = dp->ds;
+
+	if (ds->ops->port_prechangeupper) {
+		err = ds->ops->port_prechangeupper(ds, dp->index, info);
+		if (err)
+			return notifier_from_errno(err);
+	}
+
+	if (is_vlan_dev(info->upper_dev))
+		return dsa_slave_check_8021q_upper(dev, info);
+
+	return NOTIFY_DONE;
+}
+
 static int dsa_slave_netdevice_event(struct notifier_block *nb,
 				     unsigned long event, void *ptr)
 {
@@ -2174,24 +2200,12 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER: {
 		struct netdev_notifier_changeupper_info *info = ptr;
-		struct dsa_switch *ds;
-		struct dsa_port *dp;
 		int err;
 
-		if (!dsa_slave_dev_check(dev))
-			return dsa_prevent_bridging_8021q_upper(dev, ptr);
+		err = dsa_slave_prechangeupper_sanity_check(dev, info);
+		if (err != NOTIFY_DONE)
+			return err;
 
-		dp = dsa_slave_to_port(dev);
-		ds = dp->ds;
-
-		if (ds->ops->port_prechangeupper) {
-			err = ds->ops->port_prechangeupper(ds, dp->index, info);
-			if (err)
-				return notifier_from_errno(err);
-		}
-
-		if (is_vlan_dev(info->upper_dev))
-			return dsa_slave_check_8021q_upper(dev, ptr);
 		break;
 	}
 	case NETDEV_CHANGEUPPER:

From 7491894532341cff11babd1fe3bd68537166bcc4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 14:54:29 +0300
Subject: [PATCH 3035/3292] net: dsa: replay a deletion of switchdev objects
 for ports leaving a bridged LAG

When a DSA switch port leaves a bonding interface that is under a
bridge, there might be dangling switchdev objects on that port left
behind, because the bridge is not aware that its lower interface (the
bond) changed state in any way.

Call the bridge replay helpers with adding=false before changing
dp->bridge_dev to NULL, because we need to simulate to
dsa_slave_port_obj_del() that these notifications were emitted by the
bridge.

We add this hook to the NETDEV_PRECHANGEUPPER event handler, because
we are calling into switchdev (and the __switchdev_handle_port_obj_del
fanout helpers expect the upper/lower adjacency lists to still be valid)
and PRECHANGEUPPER is the last moment in time when they still are.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h |  4 ++++
 net/dsa/port.c     | 45 +++++++++++++++++++++++++++++++++++--
 net/dsa/slave.c    | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index b0811253d101..c8712942002f 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -188,12 +188,16 @@ void dsa_port_disable_rt(struct dsa_port *dp);
 void dsa_port_disable(struct dsa_port *dp);
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
 			 struct netlink_ext_ack *extack);
+int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
+			      struct netlink_ext_ack *extack);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_lag_change(struct dsa_port *dp,
 			struct netdev_lag_lower_state_info *linfo);
 int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
 		      struct netdev_lag_upper_info *uinfo,
 		      struct netlink_ext_ack *extack);
+int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev,
+			   struct netlink_ext_ack *extack);
 void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 			    struct netlink_ext_ack *extack);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 4e58d07ececd..46089dd2b2ec 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -212,7 +212,33 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
 	return 0;
 }
 
-static void dsa_port_switchdev_unsync(struct dsa_port *dp)
+static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp,
+					  struct net_device *br,
+					  struct netlink_ext_ack *extack)
+{
+	struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+	int err;
+
+	/* Delete the switchdev objects left on this port */
+	err = br_mdb_replay(br, brport_dev, dp, false,
+			    &dsa_slave_switchdev_blocking_notifier, extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_fdb_replay(br, brport_dev, dp, false,
+			    &dsa_slave_switchdev_notifier);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_vlan_replay(br, brport_dev, dp, false,
+			     &dsa_slave_switchdev_blocking_notifier, extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
+
+static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
 {
 	/* Configure the port for standalone mode (no address learning,
 	 * flood everything).
@@ -278,6 +304,12 @@ out_rollback:
 	return err;
 }
 
+int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
+			      struct netlink_ext_ack *extack)
+{
+	return dsa_port_switchdev_unsync_objs(dp, br, extack);
+}
+
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
 {
 	struct dsa_notifier_bridge_info info = {
@@ -297,7 +329,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
 	if (err)
 		pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
 
-	dsa_port_switchdev_unsync(dp);
+	dsa_port_switchdev_unsync_attrs(dp);
 }
 
 int dsa_port_lag_change(struct dsa_port *dp,
@@ -365,6 +397,15 @@ err_lag_join:
 	return err;
 }
 
+int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag,
+			   struct netlink_ext_ack *extack)
+{
+	if (dp->bridge_dev)
+		return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack);
+
+	return 0;
+}
+
 void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
 {
 	struct dsa_notifier_lag_info info = {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 20d8466d78f2..898ed9cf756f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2077,6 +2077,26 @@ static int dsa_slave_changeupper(struct net_device *dev,
 	return err;
 }
 
+static int dsa_slave_prechangeupper(struct net_device *dev,
+				    struct netdev_notifier_changeupper_info *info)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct netlink_ext_ack *extack;
+	int err = 0;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	if (netif_is_bridge_master(info->upper_dev) && !info->linking)
+		err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack);
+	else if (netif_is_lag_master(info->upper_dev) && !info->linking)
+		err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack);
+	/* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be
+	 * meaningfully enslaved to a bridge yet
+	 */
+
+	return notifier_from_errno(err);
+}
+
 static int
 dsa_slave_lag_changeupper(struct net_device *dev,
 			  struct netdev_notifier_changeupper_info *info)
@@ -2103,6 +2123,35 @@ dsa_slave_lag_changeupper(struct net_device *dev,
 	return err;
 }
 
+/* Same as dsa_slave_lag_changeupper() except that it calls
+ * dsa_slave_prechangeupper()
+ */
+static int
+dsa_slave_lag_prechangeupper(struct net_device *dev,
+			     struct netdev_notifier_changeupper_info *info)
+{
+	struct net_device *lower;
+	struct list_head *iter;
+	int err = NOTIFY_DONE;
+	struct dsa_port *dp;
+
+	netdev_for_each_lower_dev(dev, lower, iter) {
+		if (!dsa_slave_dev_check(lower))
+			continue;
+
+		dp = dsa_slave_to_port(lower);
+		if (!dp->lag_dev)
+			/* Software LAG */
+			continue;
+
+		err = dsa_slave_prechangeupper(lower, info);
+		if (notifier_to_errno(err))
+			break;
+	}
+
+	return err;
+}
+
 static int
 dsa_prevent_bridging_8021q_upper(struct net_device *dev,
 				 struct netdev_notifier_changeupper_info *info)
@@ -2206,6 +2255,12 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
 		if (err != NOTIFY_DONE)
 			return err;
 
+		if (dsa_slave_dev_check(dev))
+			return dsa_slave_prechangeupper(dev, ptr);
+
+		if (netif_is_lag_master(dev))
+			return dsa_slave_lag_prechangeupper(dev, ptr);
+
 		break;
 	}
 	case NETDEV_CHANGEUPPER:

From f00af5cc58ec5aae83ce4860e4912d61fb143d5e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 26 Jun 2021 12:44:18 +0800
Subject: [PATCH 3036/3292] net: sparx5: check return value after calling
 platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Fixes: 3cfa11bac9bb ("net: sparx5: add the basic sparx5 driver")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index a325f7c05a07..665e20ccb404 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -228,6 +228,10 @@ static int sparx5_create_targets(struct sparx5 *sparx5)
 	for (idx = 0; idx < IO_RANGES; idx++) {
 		iores[idx] = platform_get_resource(sparx5->pdev, IORESOURCE_MEM,
 						   idx);
+		if (!iores[idx]) {
+			dev_err(sparx5->dev, "Invalid resource\n");
+			return -EINVAL;
+		}
 		iomem[idx] = devm_ioremap(sparx5->dev,
 					  iores[idx]->start,
 					  iores[idx]->end - iores[idx]->start

From 8f4c38f7588650690ffa08f7784c831e0a8c38e7 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 26 Jun 2021 12:44:19 +0800
Subject: [PATCH 3037/3292] net: sparx5: fix return value check in
 sparx5_create_targets()

In case of error, the function devm_ioremap() returns NULL pointer
not ERR_PTR(). The IS_ERR() test in the return value check should
be replaced with NULL test.

Fixes: 3cfa11bac9bb ("net: sparx5: add the basic sparx5 driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 665e20ccb404..abaa086ce345 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -236,10 +236,10 @@ static int sparx5_create_targets(struct sparx5 *sparx5)
 					  iores[idx]->start,
 					  iores[idx]->end - iores[idx]->start
 					  + 1);
-		if (IS_ERR(iomem[idx])) {
+		if (!iomem[idx]) {
 			dev_err(sparx5->dev, "Unable to get switch registers: %s\n",
 				iores[idx]->name);
-			return PTR_ERR(iomem[idx]);
+			return -ENOMEM;
 		}
 		begin[idx] = iomem[idx] - sparx5_main_iomap[range_id[idx]].offset;
 	}

From 83300c69e797343c560086b1a4be56443c0d7f5e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 26 Jun 2021 12:44:20 +0800
Subject: [PATCH 3038/3292] net: sparx5: fix error return code in
 sparx5_register_notifier_blocks()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: d6fce5141929 ("net: sparx5: add switching support")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
index 246eba711f15..a72e3b3b596e 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
@@ -485,8 +485,10 @@ int sparx5_register_notifier_blocks(struct sparx5 *s5)
 		goto err_switchdev_blocking_nb;
 
 	sparx5_owq = alloc_ordered_workqueue("sparx5_order", 0);
-	if (!sparx5_owq)
+	if (!sparx5_owq) {
+		err = -ENOMEM;
 		goto err_switchdev_blocking_nb;
+	}
 
 	return 0;
 

From 9ea3e52c5bc8bb4a084938dc1e3160643438927a Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Sat, 26 Jun 2021 04:56:06 -0700
Subject: [PATCH 3039/3292] flow_offload: action should not be NULL when it is
 referenced

"action" should not be NULL when it is referenced.

Signed-off-by: gushengxian <13145886936@163.com>
Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index dc5c1e69cd9f..69c9eabf8325 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -319,12 +319,14 @@ flow_action_mixed_hw_stats_check(const struct flow_action *action,
 	if (flow_offload_has_one_action(action))
 		return true;
 
-	flow_action_for_each(i, action_entry, action) {
-		if (i && action_entry->hw_stats != last_hw_stats) {
-			NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported");
-			return false;
+	if (action) {
+		flow_action_for_each(i, action_entry, action) {
+			if (i && action_entry->hw_stats != last_hw_stats) {
+				NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported");
+				return false;
+			}
+			last_hw_stats = action_entry->hw_stats;
 		}
-		last_hw_stats = action_entry->hw_stats;
 	}
 	return true;
 }

From 1fd07f33c3ea2b4aa77426f13e8cb91d4f55af8f Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Sat, 26 Jun 2021 09:07:46 -0500
Subject: [PATCH 3040/3292] ipv6: ICMPV6: add response to ICMPV6 RFC 8335 PROBE
 messages

This patch builds off of commit 2b246b2569cd2ac6ff700d0dce56b8bae29b1842
and adds functionality to respond to ICMPV6 PROBE requests.

Add icmp_build_probe function to construct PROBE requests for both
ICMPV4 and ICMPV6.

Modify icmpv6_rcv to detect ICMPV6 PROBE messages and call the
icmpv6_echo_reply handler.

Modify icmpv6_echo_reply to build a PROBE response message based on the
queried interface.

This patch has been tested using a branch of the iputils git repo which can
be found here: https://github.com/Juniper-Clinic-2020/iputils/tree/probe-request

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h |  1 +
 net/ipv4/icmp.c    | 63 ++++++++++++++++++++++++++++++----------------
 net/ipv6/icmp.c    | 21 +++++++++++++---
 3 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index fd84adc47963..caddf4a59ad1 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -57,5 +57,6 @@ int icmp_rcv(struct sk_buff *skb);
 int icmp_err(struct sk_buff *skb, u32 info);
 int icmp_init(void);
 void icmp_out_count(struct net *net, unsigned char type);
+bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr);
 
 #endif	/* _ICMP_H */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 0a57f1892e7e..c695d294a5df 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -993,14 +993,8 @@ static bool icmp_redirect(struct sk_buff *skb)
 
 static bool icmp_echo(struct sk_buff *skb)
 {
-	struct icmp_ext_hdr *ext_hdr, _ext_hdr;
-	struct icmp_ext_echo_iio *iio, _iio;
 	struct icmp_bxm icmp_param;
-	struct net_device *dev;
-	char buff[IFNAMSIZ];
 	struct net *net;
-	u16 ident_len;
-	u8 status;
 
 	net = dev_net(skb_dst(skb)->dev);
 	/* should there be an ICMP stat for ignored echos? */
@@ -1013,20 +1007,46 @@ static bool icmp_echo(struct sk_buff *skb)
 	icmp_param.data_len	   = skb->len;
 	icmp_param.head_len	   = sizeof(struct icmphdr);
 
-	if (icmp_param.data.icmph.type == ICMP_ECHO) {
+	if (icmp_param.data.icmph.type == ICMP_ECHO)
 		icmp_param.data.icmph.type = ICMP_ECHOREPLY;
-		goto send_reply;
-	}
-	if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+	else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
 		return true;
+
+	icmp_reply(&icmp_param, skb);
+	return true;
+}
+
+/*	Helper for icmp_echo and icmpv6_echo_reply.
+ *	Searches for net_device that matches PROBE interface identifier
+ *		and builds PROBE reply message in icmphdr.
+ *
+ *	Returns false if PROBE responses are disabled via sysctl
+ */
+
+bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
+{
+	struct icmp_ext_hdr *ext_hdr, _ext_hdr;
+	struct icmp_ext_echo_iio *iio, _iio;
+	struct net *net = dev_net(skb->dev);
+	struct net_device *dev;
+	char buff[IFNAMSIZ];
+	u16 ident_len;
+	u8 status;
+
+	if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+		return false;
+
 	/* We currently only support probing interfaces on the proxy node
 	 * Check to ensure L-bit is set
 	 */
-	if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1))
-		return true;
+	if (!(ntohs(icmphdr->un.echo.sequence) & 1))
+		return false;
 	/* Clear status bits in reply message */
-	icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00);
-	icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY;
+	icmphdr->un.echo.sequence &= htons(0xFF00);
+	if (icmphdr->type == ICMP_EXT_ECHO)
+		icmphdr->type = ICMP_EXT_ECHOREPLY;
+	else
+		icmphdr->type = ICMPV6_EXT_ECHO_REPLY;
 	ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr);
 	/* Size of iio is class_type dependent.
 	 * Only check header here and assign length based on ctype in the switch statement
@@ -1087,8 +1107,8 @@ static bool icmp_echo(struct sk_buff *skb)
 		goto send_mal_query;
 	}
 	if (!dev) {
-		icmp_param.data.icmph.code = ICMP_EXT_CODE_NO_IF;
-		goto send_reply;
+		icmphdr->code = ICMP_EXT_CODE_NO_IF;
+		return true;
 	}
 	/* Fill bits in reply message */
 	if (dev->flags & IFF_UP)
@@ -1098,14 +1118,13 @@ static bool icmp_echo(struct sk_buff *skb)
 	if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
 		status |= ICMP_EXT_ECHOREPLY_IPV6;
 	dev_put(dev);
-	icmp_param.data.icmph.un.echo.sequence |= htons(status);
-send_reply:
-	icmp_reply(&icmp_param, skb);
-		return true;
+	icmphdr->un.echo.sequence |= htons(status);
+	return true;
 send_mal_query:
-	icmp_param.data.icmph.code = ICMP_EXT_CODE_MAL_QUERY;
-	goto send_reply;
+	icmphdr->code = ICMP_EXT_CODE_MAL_QUERY;
+	return true;
 }
+EXPORT_SYMBOL_GPL(icmp_build_probe);
 
 /*
  *	Handle ICMP Timestamp requests.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e8398ffb5e35..a7c31ab67c5d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -725,6 +725,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	struct ipcm6_cookie ipc6;
 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 	bool acast;
+	u8 type;
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
 	    net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
@@ -740,8 +741,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	    !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
 		saddr = NULL;
 
+	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
+		type = ICMPV6_EXT_ECHO_REPLY;
+	else
+		type = ICMPV6_ECHO_REPLY;
+
 	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
-	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+	tmp_hdr.icmp6_type = type;
 
 	memset(&fl6, 0, sizeof(fl6));
 	if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
@@ -752,7 +758,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	if (saddr)
 		fl6.saddr = *saddr;
 	fl6.flowi6_oif = icmp6_iif(skb);
-	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+	fl6.fl6_icmp_type = type;
 	fl6.flowi6_mark = mark;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
@@ -783,13 +789,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	msg.skb = skb;
 	msg.offset = 0;
-	msg.type = ICMPV6_ECHO_REPLY;
+	msg.type = type;
 
 	ipcm6_init_sk(&ipc6, np);
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 	ipc6.sockc.mark = mark;
 
+	if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
+		if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
+			goto out_dst_release;
+
 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 			    skb->len + sizeof(struct icmp6hdr),
 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
@@ -911,6 +921,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
 			icmpv6_echo_reply(skb);
 		break;
+	case ICMPV6_EXT_ECHO_REQUEST:
+		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
+		    net->ipv4.sysctl_icmp_echo_enable_probe)
+			icmpv6_echo_reply(skb);
+		break;
 
 	case ICMPV6_ECHO_REPLY:
 		success = ping_rcv(skb);

From 0c5dc070ff3d6246d22ddd931f23a6266249e3db Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 28 Jun 2021 16:13:41 -0300
Subject: [PATCH 3041/3292] sctp: validate from_addr_param return

Ilja reported that, simply putting it, nothing was validating that
from_addr_param functions were operating on initialized memory. That is,
the parameter itself was being validated by sctp_walk_params, but it
doesn't check for types and their specific sizes and it could be a 0-length
one, causing from_addr_param to potentially work over the next parameter or
even uninitialized memory.

The fix here is to, in all calls to from_addr_param, check if enough space
is there for the wanted IP address type.

Reported-by: Ilja Van Sprundel <ivansprundel@ioactive.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  2 +-
 net/sctp/bind_addr.c       | 19 +++++++++++--------
 net/sctp/input.c           |  6 ++++--
 net/sctp/ipv6.c            |  7 ++++++-
 net/sctp/protocol.c        |  7 ++++++-
 net/sctp/sm_make_chunk.c   | 29 ++++++++++++++++-------------
 6 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 1aa585216f34..d49593c72a55 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -461,7 +461,7 @@ struct sctp_af {
 					 int saddr);
 	void		(*from_sk)	(union sctp_addr *,
 					 struct sock *sk);
-	void		(*from_addr_param) (union sctp_addr *,
+	bool		(*from_addr_param) (union sctp_addr *,
 					    union sctp_addr_param *,
 					    __be16 port, int iif);
 	int		(*to_addr_param) (const union sctp_addr *,
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 53e5ed79f63f..59e653b528b1 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -270,22 +270,19 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
 		rawaddr = (union sctp_addr_param *)raw_addr_list;
 
 		af = sctp_get_af_specific(param_type2af(param->type));
-		if (unlikely(!af)) {
+		if (unlikely(!af) ||
+		    !af->from_addr_param(&addr, rawaddr, htons(port), 0)) {
 			retval = -EINVAL;
-			sctp_bind_addr_clean(bp);
-			break;
+			goto out_err;
 		}
 
-		af->from_addr_param(&addr, rawaddr, htons(port), 0);
 		if (sctp_bind_addr_state(bp, &addr) != -1)
 			goto next;
 		retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
 					    SCTP_ADDR_SRC, gfp);
-		if (retval) {
+		if (retval)
 			/* Can't finish building the list, clean up. */
-			sctp_bind_addr_clean(bp);
-			break;
-		}
+			goto out_err;
 
 next:
 		len = ntohs(param->length);
@@ -294,6 +291,12 @@ next:
 	}
 
 	return retval;
+
+out_err:
+	if (retval)
+		sctp_bind_addr_clean(bp);
+
+	return retval;
 }
 
 /********************************************************************
diff --git a/net/sctp/input.c b/net/sctp/input.c
index d508f6f3dd08..8924e2e142c8 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1131,7 +1131,8 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
 		if (!af)
 			continue;
 
-		af->from_addr_param(paddr, params.addr, sh->source, 0);
+		if (!af->from_addr_param(paddr, params.addr, sh->source, 0))
+			continue;
 
 		asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
 		if (asoc)
@@ -1174,7 +1175,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 	if (unlikely(!af))
 		return NULL;
 
-	af->from_addr_param(&paddr, param, peer_port, 0);
+	if (af->from_addr_param(&paddr, param, peer_port, 0))
+		return NULL;
 
 	return __sctp_lookup_association(net, laddr, &paddr, transportp);
 }
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index bd08807c9e44..5c6f5ced9cfa 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -551,15 +551,20 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 }
 
 /* Initialize a sctp_addr from an address parameter. */
-static void sctp_v6_from_addr_param(union sctp_addr *addr,
+static bool sctp_v6_from_addr_param(union sctp_addr *addr,
 				    union sctp_addr_param *param,
 				    __be16 port, int iif)
 {
+	if (ntohs(param->v6.param_hdr.length) < sizeof(struct sctp_ipv6addr_param))
+		return false;
+
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = port;
 	addr->v6.sin6_flowinfo = 0; /* BUG */
 	addr->v6.sin6_addr = param->v6.addr;
 	addr->v6.sin6_scope_id = iif;
+
+	return true;
 }
 
 /* Initialize an address parameter from a sctp_addr and return the length
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6f2bbfeec3a4..25192b378e2e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -254,14 +254,19 @@ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 }
 
 /* Initialize a sctp_addr from an address parameter. */
-static void sctp_v4_from_addr_param(union sctp_addr *addr,
+static bool sctp_v4_from_addr_param(union sctp_addr *addr,
 				    union sctp_addr_param *param,
 				    __be16 port, int iif)
 {
+	if (ntohs(param->v4.param_hdr.length) < sizeof(struct sctp_ipv4addr_param))
+		return false;
+
 	addr->v4.sin_family = AF_INET;
 	addr->v4.sin_port = port;
 	addr->v4.sin_addr.s_addr = param->v4.addr.s_addr;
 	memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
+
+	return true;
 }
 
 /* Initialize an address parameter from a sctp_addr and return the length
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5b44d228b6ca..f33a870b483d 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2346,11 +2346,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 
 	/* Process the initialization parameters.  */
 	sctp_walk_params(param, peer_init, init_hdr.params) {
-		if (!src_match && (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
-		    param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
+		if (!src_match &&
+		    (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
+		     param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
 			af = sctp_get_af_specific(param_type2af(param.p->type));
-			af->from_addr_param(&addr, param.addr,
-					    chunk->sctp_hdr->source, 0);
+			if (!af->from_addr_param(&addr, param.addr,
+						 chunk->sctp_hdr->source, 0))
+				continue;
 			if (sctp_cmp_addr_exact(sctp_source(chunk), &addr))
 				src_match = 1;
 		}
@@ -2531,7 +2533,8 @@ static int sctp_process_param(struct sctp_association *asoc,
 			break;
 do_addr_param:
 		af = sctp_get_af_specific(param_type2af(param.p->type));
-		af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
+		if (!af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0))
+			break;
 		scope = sctp_scope(peer_addr);
 		if (sctp_in_scope(net, &addr, scope))
 			if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
@@ -2632,15 +2635,13 @@ do_addr_param:
 		addr_param = param.v + sizeof(struct sctp_addip_param);
 
 		af = sctp_get_af_specific(param_type2af(addr_param->p.type));
-		if (af == NULL)
+		if (!af)
 			break;
 
-		af->from_addr_param(&addr, addr_param,
-				    htons(asoc->peer.port), 0);
+		if (!af->from_addr_param(&addr, addr_param,
+					 htons(asoc->peer.port), 0))
+			break;
 
-		/* if the address is invalid, we can't process it.
-		 * XXX: see spec for what to do.
-		 */
 		if (!af->addr_valid(&addr, NULL, NULL))
 			break;
 
@@ -3054,7 +3055,8 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 	if (unlikely(!af))
 		return SCTP_ERROR_DNS_FAILED;
 
-	af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
+	if (!af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0))
+		return SCTP_ERROR_DNS_FAILED;
 
 	/* ADDIP 4.2.1  This parameter MUST NOT contain a broadcast
 	 * or multicast address.
@@ -3331,7 +3333,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
 
 	/* We have checked the packet before, so we do not check again.	*/
 	af = sctp_get_af_specific(param_type2af(addr_param->p.type));
-	af->from_addr_param(&addr, addr_param, htons(bp->port), 0);
+	if (!af->from_addr_param(&addr, addr_param, htons(bp->port), 0))
+		return;
 
 	switch (asconf_param->param_hdr.type) {
 	case SCTP_PARAM_ADD_IP:

From 50619dbf8db77e98d821d615af4f634d08e22698 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 28 Jun 2021 16:13:42 -0300
Subject: [PATCH 3042/3292] sctp: add size validation when walking chunks

The first chunk in a packet is ensured to be present at the beginning of
sctp_rcv(), as a packet needs to have at least 1 chunk. But the second
one, may not be completely available and ch->length can be over
uninitialized memory.

Fix here is by only trying to walk on the next chunk if there is enough to
hold at least the header, and then proceed with the ch->length validation
that is already there.

Reported-by: Ilja Van Sprundel <ivansprundel@ioactive.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 8924e2e142c8..f72bff93745c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1247,7 +1247,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 
 		ch = (struct sctp_chunkhdr *)ch_end;
 		chunk_num++;
-	} while (ch_end < skb_tail_pointer(skb));
+	} while (ch_end + sizeof(*ch) < skb_tail_pointer(skb));
 
 	return asoc;
 }

From b6ffe7671b24689c09faa5675dd58f93758a97ae Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 28 Jun 2021 16:13:43 -0300
Subject: [PATCH 3043/3292] sctp: validate chunk size in __rcv_asconf_lookup

In one of the fallbacks that SCTP has for identifying an association for an
incoming packet, it looks for AddIp chunk (from ASCONF) and take a peek.
Thing is, at this stage nothing was validating that the chunk actually had
enough content for that, allowing the peek to happen over uninitialized
memory.

Similar check already exists in actual asconf handling in
sctp_verify_asconf().

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index f72bff93745c..96dea8097dbe 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1168,6 +1168,9 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 	union sctp_addr_param *param;
 	union sctp_addr paddr;
 
+	if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr))
+		return NULL;
+
 	/* Skip over the ADDIP header and find the Address parameter */
 	param = (union sctp_addr_param *)(asconf + 1);
 

From ef6c8d6ccf0c1dccdda092ebe8782777cd7803c9 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 28 Jun 2021 16:13:44 -0300
Subject: [PATCH 3044/3292] sctp: add param size validation for
 SCTP_PARAM_SET_PRIMARY

When SCTP handles an INIT chunk, it calls for example:
sctp_sf_do_5_1B_init
  sctp_verify_init
    sctp_verify_param
  sctp_process_init
    sctp_process_param
      handling of SCTP_PARAM_SET_PRIMARY

sctp_verify_init() wasn't doing proper size validation and neither the
later handling, allowing it to work over the chunk itself, possibly being
uninitialized memory.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f33a870b483d..587fb3cb88e2 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2166,9 +2166,16 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-		if (ep->asconf_enable)
-			break;
-		goto unhandled;
+		if (!ep->asconf_enable)
+			goto unhandled;
+
+		if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) +
+					     sizeof(struct sctp_paramhdr)) {
+			sctp_process_inv_paramlength(asoc, param.p,
+						     chunk, err_chunk);
+			retval = SCTP_IERROR_ABORT;
+		}
+		break;
 
 	case SCTP_PARAM_HOST_NAME_ADDRESS:
 		/* Tell the peer, we won't support this param.  */

From f7458934b0791c39a001e4d902fc3bf697b439b5 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sat, 26 Jun 2021 22:18:04 +0200
Subject: [PATCH 3045/3292] net: bridge: mrp: Update the Test frames for MRA

According to the standard IEC 62439-2, in case the node behaves as MRA
and needs to send Test frames on ring ports, then these Test frames need
to have an Option TLV and a Sub-Option TLV which has the type AUTO_MGR.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mrp.c         | 27 +++++++++++++++++++++++++++
 net/bridge/br_private_mrp.h | 11 +++++++++++
 2 files changed, 38 insertions(+)

diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index f7012b7d7ce4..fd2de35ffb3c 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -204,6 +204,33 @@ static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp,
 	hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies));
 
 	br_mrp_skb_common(skb, mrp);
+
+	/* In case the node behaves as MRA then the Test frame needs to have
+	 * an Option TLV which includes eventually a sub-option TLV that has
+	 * the type AUTO_MGR
+	 */
+	if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) {
+		struct br_mrp_sub_option1_hdr *sub_opt = NULL;
+		struct br_mrp_tlv_hdr *sub_tlv = NULL;
+		struct br_mrp_oui_hdr *oui = NULL;
+		u8 length;
+
+		length = sizeof(*sub_opt) + sizeof(*sub_tlv) + sizeof(oui) +
+			MRP_OPT_PADDING;
+		br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_OPTION, length);
+
+		oui = skb_put(skb, sizeof(*oui));
+		memset(oui, 0x0, sizeof(*oui));
+		sub_opt = skb_put(skb, sizeof(*sub_opt));
+		memset(sub_opt, 0x0, sizeof(*sub_opt));
+
+		sub_tlv = skb_put(skb, sizeof(*sub_tlv));
+		sub_tlv->type = BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR;
+
+		/* 32 bit alligment shall be ensured therefore add 2 bytes */
+		skb_put(skb, MRP_OPT_PADDING);
+	}
+
 	br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0);
 
 	return skb;
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 9559aa2750fb..bda8e1896712 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -6,6 +6,8 @@
 #include "br_private.h"
 #include <uapi/linux/mrp_bridge.h>
 
+#define MRP_OPT_PADDING		0x2
+
 struct br_mrp {
 	/* list of mrp instances */
 	struct hlist_node		list;
@@ -134,4 +136,13 @@ struct br_mrp_in_test_hdr {
 	__be32 timestamp;
 } __attribute__((__packed__));
 
+struct br_mrp_oui_hdr {
+	__u8 oui[MRP_OUI_LENGTH];
+};
+
+struct br_mrp_sub_option1_hdr {
+	__u8 type;
+	__u8 data[MRP_MANUFACTURE_DATA_LENGTH];
+};
+
 #endif /* _BR_PRIVATE_MRP_H */

From b856150c8098f12996ee81c3ab2a65adbaeeb3ec Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Sun, 27 Jun 2021 12:16:07 +0200
Subject: [PATCH 3046/3292] net: phy: at803x: mask 1000 Base-X link mode

AR8031/AR8033 have different status registers for copper
and fiber operation. However, the extended status register
is the same for both operation modes.

As a result of that, ESTATUS_1000_XFULL is set to 1 even when
operating in copper TP mode.

Remove this mode from the supported link modes, as this driver
currently only supports copper operation.

Signed-off-by: David Bauer <mail@david-bauer.net>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 32af52dd5aed..d797c2c9ae3f 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -610,6 +610,34 @@ static void at803x_remove(struct phy_device *phydev)
 		regulator_disable(priv->vddio);
 }
 
+static int at803x_get_features(struct phy_device *phydev)
+{
+	int err;
+
+	err = genphy_read_abilities(phydev);
+	if (err)
+		return err;
+
+	if (!at803x_match_phy_id(phydev, ATH8031_PHY_ID))
+		return 0;
+
+	/* AR8031/AR8033 have different status registers
+	 * for copper and fiber operation. However, the
+	 * extended status register is the same for both
+	 * operation modes.
+	 *
+	 * As a result of that, ESTATUS_1000_XFULL is set
+	 * to 1 even when operating in copper TP mode.
+	 *
+	 * Remove this mode from the supported link modes,
+	 * as this driver currently only supports copper
+	 * operation.
+	 */
+	linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+			   phydev->supported);
+	return 0;
+}
+
 static int at803x_smarteee_config(struct phy_device *phydev)
 {
 	struct at803x_priv *priv = phydev->priv;
@@ -1225,7 +1253,7 @@ static struct phy_driver at803x_driver[] = {
 	.resume			= at803x_resume,
 	.read_page		= at803x_read_page,
 	.write_page		= at803x_write_page,
-	/* PHY_GBIT_FEATURES */
+	.get_features		= at803x_get_features,
 	.read_status		= at803x_read_status,
 	.config_intr		= &at803x_config_intr,
 	.handle_interrupt	= at803x_handle_interrupt,

From 74e7feff0e22f054839c18b29658d33e7b2d8512 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 27 Jun 2021 17:27:08 +0300
Subject: [PATCH 3047/3292] net: dsa: sja1105: fix dynamic access to L2 Address
 Lookup table for SJA1110

The SJA1105P/Q/R/S and SJA1110 may have the same layout for the command
to read/write/search for L2 Address Lookup entries, but as explained in
the comments at the beginning of the sja1105_dynamic_config.c file, the
command portion of the buffer is at the end, and we need to obtain a
pointer to it by adding the length of the entry to the buffer.

Alas, the length of an L2 Address Lookup entry is larger in SJA1110 than
it is for SJA1105P/Q/R/S, so we need to create a common helper to access
the command buffer, and this receives as argument the length of the
entry buffer.

Fixes: 3e77e59bf8cf ("net: dsa: sja1105: add support for the SJA1110 switch family")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/dsa/sja1105/sja1105_dynamic_config.c  | 26 ++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 4c4c04f04269..56fead68ea9f 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -258,11 +258,11 @@ sja1110_vl_policing_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 }
 
 static void
-sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
-				  enum packing_op op)
+sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+					 enum packing_op op, int entry_size)
 {
-	u8 *p = buf + SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
 	const int size = SJA1105_SIZE_DYN_CMD;
+	u8 *p = buf + entry_size;
 	u64 hostcmd;
 
 	sja1105_packing(p, &cmd->valid,    31, 31, size, op);
@@ -317,6 +317,24 @@ sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 			SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op);
 }
 
+static void
+sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				  enum packing_op op)
+{
+	int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
+
+	return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+}
+
+static void
+sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+			      enum packing_op op)
+{
+	int size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+
+	return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+}
+
 /* The switch is so retarded that it makes our command/entry abstraction
  * crumble apart.
  *
@@ -1055,7 +1073,7 @@ const struct sja1105_dynamic_table_ops sja1110_dyn_ops[BLK_IDX_MAX_DYN] = {
 	},
 	[BLK_IDX_L2_LOOKUP] = {
 		.entry_packing = sja1110_dyn_l2_lookup_entry_packing,
-		.cmd_packing = sja1105pqrs_l2_lookup_cmd_packing,
+		.cmd_packing = sja1110_l2_lookup_cmd_packing,
 		.access = (OP_READ | OP_WRITE | OP_DEL | OP_SEARCH),
 		.max_entry_count = SJA1105_MAX_L2_LOOKUP_COUNT,
 		.packed_size = SJA1110_SIZE_L2_LOOKUP_DYN_CMD,

From b74ef9f9cb91fc86c642af965b7598c4df1c9922 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Sun, 27 Jun 2021 11:45:43 -0700
Subject: [PATCH 3048/3292] net: sparx5: Do not use mac_addr uninitialized in
 mchp_sparx5_probe()

Clang warns:

drivers/net/ethernet/microchip/sparx5/sparx5_main.c:760:29: warning:
variable 'mac_addr' is uninitialized when used here [-Wuninitialized]
        if (of_get_mac_address(np, mac_addr)) {
                                   ^~~~~~~~
drivers/net/ethernet/microchip/sparx5/sparx5_main.c:669:14: note:
initialize the variable 'mac_addr' to silence this warning
        u8 *mac_addr;
                    ^
                     = NULL
1 warning generated.

mac_addr is only used to store the value retrieved from
of_get_mac_address(), which is then copied into the base_mac member of
the sparx5 struct using ether_addr_copy(). It is easier to just use the
base_mac address directly, which avoids the warning and the extra copy.

Fixes: 3cfa11bac9bb ("net: sparx5: add the basic sparx5 driver")
Link: https://github.com/ClangBuiltLinux/linux/issues/1413
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index abaa086ce345..f666133a15de 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -670,7 +670,6 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
 	struct reset_control *reset;
 	struct sparx5 *sparx5;
 	int idx = 0, err = 0;
-	u8 *mac_addr;
 
 	if (!np && !pdev->dev.platform_data)
 		return -ENODEV;
@@ -761,12 +760,10 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
 	if (err)
 		goto cleanup_config;
 
-	if (of_get_mac_address(np, mac_addr)) {
+	if (!of_get_mac_address(np, sparx5->base_mac)) {
 		dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n");
 		eth_random_addr(sparx5->base_mac);
 		sparx5->base_mac[5] = 0;
-	} else {
-		ether_addr_copy(sparx5->base_mac, mac_addr);
 	}
 
 	sparx5->xtr_irq = platform_get_irq_byname(sparx5->pdev, "xtr");

From a358f40600b3b39ae3906b6118625b99c0aa7a34 Mon Sep 17 00:00:00 2001
From: Tanner Love <tannerlove@google.com>
Date: Mon, 28 Jun 2021 09:50:06 -0400
Subject: [PATCH 3049/3292] once: implement DO_ONCE_LITE for non-fast-path "do
 once" functionality

Certain uses of "do once" functionality reside outside of fast path,
and so do not require jump label patching via static keys, making
existing DO_ONCE undesirable in such cases.

Replace uses of __section(".data.once") with DO_ONCE_LITE(_IF)?

This patch changes the return values of xfs_printk_once, printk_once,
and printk_deferred_once. Before, they returned whether the print was
performed, but now, they always return true. This is okay because the
return values of the following macros are entirely ignored throughout
the kernel:
- xfs_printk_once
- xfs_warn_once
- xfs_notice_once
- xfs_info_once
- printk_once
- pr_emerg_once
- pr_alert_once
- pr_crit_once
- pr_err_once
- pr_warn_once
- pr_notice_once
- pr_info_once
- pr_devel_once
- pr_debug_once
- printk_deferred_once
- orc_warn

Changes
v3:
  - Expand commit message to explain why changing return values of
    xfs_printk_once, printk_once, printk_deferred_once is benign
v2:
  - Fix i386 build warnings

Signed-off-by: Tanner Love <tannerlove@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Mahesh Bandewar <maheshb@google.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/xfs/xfs_message.h      | 13 +++----------
 include/asm-generic/bug.h | 37 +++++++------------------------------
 include/linux/once_lite.h | 24 ++++++++++++++++++++++++
 include/linux/printk.h    | 23 +++--------------------
 kernel/trace/trace.h      | 13 +++----------
 5 files changed, 40 insertions(+), 70 deletions(-)
 create mode 100644 include/linux/once_lite.h

diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 7ec1a9207517..bb9860ec9a93 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -2,6 +2,8 @@
 #ifndef __XFS_MESSAGE_H
 #define __XFS_MESSAGE_H 1
 
+#include <linux/once_lite.h>
+
 struct xfs_mount;
 
 extern __printf(2, 3)
@@ -41,16 +43,7 @@ do {									\
 } while (0)
 
 #define xfs_printk_once(func, dev, fmt, ...)			\
-({								\
-	static bool __section(".data.once") __print_once;	\
-	bool __ret_print_once = !__print_once; 			\
-								\
-	if (!__print_once) {					\
-		__print_once = true;				\
-		func(dev, fmt, ##__VA_ARGS__);			\
-	}							\
-	unlikely(__ret_print_once);				\
-})
+	DO_ONCE_LITE(func, dev, fmt, ##__VA_ARGS__)
 
 #define xfs_emerg_ratelimited(dev, fmt, ...)				\
 	xfs_printk_ratelimited(xfs_emerg, dev, fmt, ##__VA_ARGS__)
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index b402494883b6..bafc51f483c4 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -4,6 +4,7 @@
 
 #include <linux/compiler.h>
 #include <linux/instrumentation.h>
+#include <linux/once_lite.h>
 
 #define CUT_HERE		"------------[ cut here ]------------\n"
 
@@ -140,39 +141,15 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 })
 
 #ifndef WARN_ON_ONCE
-#define WARN_ON_ONCE(condition)	({				\
-	static bool __section(".data.once") __warned;		\
-	int __ret_warn_once = !!(condition);			\
-								\
-	if (unlikely(__ret_warn_once && !__warned)) {		\
-		__warned = true;				\
-		WARN_ON(1);					\
-	}							\
-	unlikely(__ret_warn_once);				\
-})
+#define WARN_ON_ONCE(condition)					\
+	DO_ONCE_LITE_IF(condition, WARN_ON, 1)
 #endif
 
-#define WARN_ONCE(condition, format...)	({			\
-	static bool __section(".data.once") __warned;		\
-	int __ret_warn_once = !!(condition);			\
-								\
-	if (unlikely(__ret_warn_once && !__warned)) {		\
-		__warned = true;				\
-		WARN(1, format);				\
-	}							\
-	unlikely(__ret_warn_once);				\
-})
+#define WARN_ONCE(condition, format...)				\
+	DO_ONCE_LITE_IF(condition, WARN, 1, format)
 
-#define WARN_TAINT_ONCE(condition, taint, format...)	({	\
-	static bool __section(".data.once") __warned;		\
-	int __ret_warn_once = !!(condition);			\
-								\
-	if (unlikely(__ret_warn_once && !__warned)) {		\
-		__warned = true;				\
-		WARN_TAINT(1, taint, format);			\
-	}							\
-	unlikely(__ret_warn_once);				\
-})
+#define WARN_TAINT_ONCE(condition, taint, format...)		\
+	DO_ONCE_LITE_IF(condition, WARN_TAINT, 1, taint, format)
 
 #else /* !CONFIG_BUG */
 #ifndef HAVE_ARCH_BUG
diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h
new file mode 100644
index 000000000000..861e606b820f
--- /dev/null
+++ b/include/linux/once_lite.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ONCE_LITE_H
+#define _LINUX_ONCE_LITE_H
+
+#include <linux/types.h>
+
+/* Call a function once. Similar to DO_ONCE(), but does not use jump label
+ * patching via static keys.
+ */
+#define DO_ONCE_LITE(func, ...)						\
+	DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__)
+#define DO_ONCE_LITE_IF(condition, func, ...)				\
+	({								\
+		static bool __section(".data.once") __already_done;	\
+		bool __ret_do_once = !!(condition);			\
+									\
+		if (unlikely(__ret_do_once && !__already_done)) {	\
+			__already_done = true;				\
+			func(__VA_ARGS__);				\
+		}							\
+		unlikely(__ret_do_once);				\
+	})
+
+#endif /* _LINUX_ONCE_LITE_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index fe7eb2351610..885379a1c9a1 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <linux/cache.h>
 #include <linux/ratelimit_types.h>
+#include <linux/once_lite.h>
 
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
@@ -436,27 +437,9 @@ extern int kptr_restrict;
 
 #ifdef CONFIG_PRINTK
 #define printk_once(fmt, ...)					\
-({								\
-	static bool __section(".data.once") __print_once;	\
-	bool __ret_print_once = !__print_once;			\
-								\
-	if (!__print_once) {					\
-		__print_once = true;				\
-		printk(fmt, ##__VA_ARGS__);			\
-	}							\
-	unlikely(__ret_print_once);				\
-})
+	DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__)
 #define printk_deferred_once(fmt, ...)				\
-({								\
-	static bool __section(".data.once") __print_once;	\
-	bool __ret_print_once = !__print_once;			\
-								\
-	if (!__print_once) {					\
-		__print_once = true;				\
-		printk_deferred(fmt, ##__VA_ARGS__);		\
-	}							\
-	unlikely(__ret_print_once);				\
-})
+	DO_ONCE_LITE(printk_deferred, fmt, ##__VA_ARGS__)
 #else
 #define printk_once(fmt, ...)					\
 	no_printk(fmt, ##__VA_ARGS__)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cd80d046c7a5..d5d8c088a55d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -20,6 +20,7 @@
 #include <linux/irq_work.h>
 #include <linux/workqueue.h>
 #include <linux/ctype.h>
+#include <linux/once_lite.h>
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 #include <asm/unistd.h>		/* For NR_SYSCALLS	     */
@@ -99,16 +100,8 @@ enum trace_type {
 #include "trace_entries.h"
 
 /* Use this for memory failure errors */
-#define MEM_FAIL(condition, fmt, ...) ({			\
-	static bool __section(".data.once") __warned;		\
-	int __ret_warn_once = !!(condition);			\
-								\
-	if (unlikely(__ret_warn_once && !__warned)) {		\
-		__warned = true;				\
-		pr_err("ERROR: " fmt, ##__VA_ARGS__);		\
-	}							\
-	unlikely(__ret_warn_once);				\
-})
+#define MEM_FAIL(condition, fmt, ...)					\
+	DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
 
 /*
  * syscalls are special, and need special handling, this is why

From 127d7355abb355b05ff4b42d6e18cc97aa9d1d11 Mon Sep 17 00:00:00 2001
From: Tanner Love <tannerlove@google.com>
Date: Mon, 28 Jun 2021 09:50:07 -0400
Subject: [PATCH 3050/3292] net: update netdev_rx_csum_fault() print dump only
 once

Printing this stack dump multiple times does not provide additional
useful information, and consumes time in the data path. Printing once
is sufficient.

Changes
  v2: Format indentation properly

Signed-off-by: Tanner Love <tannerlove@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 991d09b67bd9..d609366da95c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -148,6 +148,7 @@
 #include <net/devlink.h>
 #include <linux/pm_runtime.h>
 #include <linux/prandom.h>
+#include <linux/once_lite.h>
 
 #include "net-sysfs.h"
 
@@ -3487,13 +3488,16 @@ EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
+static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
+{
+	pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+	skb_dump(KERN_ERR, skb, true);
+	dump_stack();
+}
+
 void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
 {
-	if (net_ratelimit()) {
-		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
-		skb_dump(KERN_ERR, skb, true);
-		dump_stack();
-	}
+	DO_ONCE_LITE(do_netdev_rx_csum_fault, dev, skb);
 }
 EXPORT_SYMBOL(netdev_rx_csum_fault);
 #endif

From a118ff661889ecee3ca90f8125bad8fb5bbc07d5 Mon Sep 17 00:00:00 2001
From: Paolo Pisati <paolo.pisati@canonical.com>
Date: Mon, 28 Jun 2021 16:54:24 +0200
Subject: [PATCH 3051/3292] selftests: net: devlink_port_split: check devlink
 returned an element before dereferencing it

And thus avoid a Python stacktrace:

~/linux/tools/testing/selftests/net$ ./devlink_port_split.py
Traceback (most recent call last):
  File "/home/linux/tools/testing/selftests/net/./devlink_port_split.py",
line 277, in <module> main()
  File "/home/linux/tools/testing/selftests/net/./devlink_port_split.py",
line 242, in main
    dev = list(devs.keys())[0]
IndexError: list index out of range

Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/devlink_port_split.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 834066d465fc..d162915311fd 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -239,6 +239,9 @@ def main(cmdline=None):
         assert stderr == ""
 
         devs = json.loads(stdout)['dev']
+        if len(devs.keys()) == 0:
+            print("no devlink device found")
+            sys.exit(1)
         dev = list(devs.keys())[0]
 
     cmd = "devlink dev show %s" % dev

From 0fc3bcd6b6e34281254658bef27c45ec8c19e50c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 13 May 2021 14:30:31 +0200
Subject: [PATCH 3052/3292] gfs2: Clean up the error handling in
 gfs2_page_mkwrite

We're setting an error number so that block_page_mkwrite_return
translates it into the corresponding VM_FAULT_* code in several places,
but this is getting confusing, so set the VM_FAULT_* codes directly
instead.  (No change in functionality.)

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 63 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 40 insertions(+), 23 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 37041873bb95..cf16d61f46ec 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -427,22 +427,25 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
 	u64 offset = page_offset(page);
 	unsigned int data_blocks, ind_blocks, rblocks;
+	vm_fault_t ret = VM_FAULT_LOCKED;
 	struct gfs2_holder gh;
 	unsigned int length;
 	loff_t size;
-	int ret;
+	int err;
 
 	sb_start_pagefault(inode->i_sb);
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	ret = gfs2_glock_nq(&gh);
-	if (ret)
+	err = gfs2_glock_nq(&gh);
+	if (err) {
+		ret = block_page_mkwrite_return(err);
 		goto out_uninit;
+	}
 
 	/* Check page index against inode size */
 	size = i_size_read(inode);
 	if (offset >= size) {
-		ret = -EINVAL;
+		ret = VM_FAULT_SIGBUS;
 		goto out_unlock;
 	}
 
@@ -469,24 +472,30 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 	    !gfs2_write_alloc_required(ip, offset, length)) {
 		lock_page(page);
 		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
-			ret = -EAGAIN;
+			ret = VM_FAULT_NOPAGE;
 			unlock_page(page);
 		}
 		goto out_unlock;
 	}
 
-	ret = gfs2_rindex_update(sdp);
-	if (ret)
+	err = gfs2_rindex_update(sdp);
+	if (err) {
+		ret = block_page_mkwrite_return(err);
 		goto out_unlock;
+	}
 
 	gfs2_write_calc_reserv(ip, length, &data_blocks, &ind_blocks);
 	ap.target = data_blocks + ind_blocks;
-	ret = gfs2_quota_lock_check(ip, &ap);
-	if (ret)
+	err = gfs2_quota_lock_check(ip, &ap);
+	if (err) {
+		ret = block_page_mkwrite_return(err);
 		goto out_unlock;
-	ret = gfs2_inplace_reserve(ip, &ap);
-	if (ret)
+	}
+	err = gfs2_inplace_reserve(ip, &ap);
+	if (err) {
+		ret = block_page_mkwrite_return(err);
 		goto out_quota_unlock;
+	}
 
 	rblocks = RES_DINODE + ind_blocks;
 	if (gfs2_is_jdata(ip))
@@ -495,27 +504,35 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 		rblocks += RES_STATFS + RES_QUOTA;
 		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
 	}
-	ret = gfs2_trans_begin(sdp, rblocks, 0);
-	if (ret)
+	err = gfs2_trans_begin(sdp, rblocks, 0);
+	if (err) {
+		ret = block_page_mkwrite_return(err);
 		goto out_trans_fail;
+	}
 
 	lock_page(page);
-	ret = -EAGAIN;
 	/* If truncated, we must retry the operation, we may have raced
 	 * with the glock demotion code.
 	 */
-	if (!PageUptodate(page) || page->mapping != inode->i_mapping)
+	if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
+		ret = VM_FAULT_NOPAGE;
 		goto out_trans_end;
+	}
 
 	/* Unstuff, if required, and allocate backing blocks for page */
-	ret = 0;
-	if (gfs2_is_stuffed(ip))
-		ret = gfs2_unstuff_dinode(ip, page);
-	if (ret == 0)
-		ret = gfs2_allocate_page_backing(page, length);
+	if (gfs2_is_stuffed(ip)) {
+		err = gfs2_unstuff_dinode(ip, page);
+		if (err) {
+			ret = block_page_mkwrite_return(err);
+			goto out_trans_end;
+		}
+	}
+	err = gfs2_allocate_page_backing(page, length);
+	if (err)
+		ret = block_page_mkwrite_return(err);
 
 out_trans_end:
-	if (ret)
+	if (ret != VM_FAULT_LOCKED)
 		unlock_page(page);
 	gfs2_trans_end(sdp);
 out_trans_fail:
@@ -526,12 +543,12 @@ out_unlock:
 	gfs2_glock_dq(&gh);
 out_uninit:
 	gfs2_holder_uninit(&gh);
-	if (ret == 0) {
+	if (ret == VM_FAULT_LOCKED) {
 		set_page_dirty(page);
 		wait_for_stable_page(page);
 	}
 	sb_end_pagefault(inode->i_sb);
-	return block_page_mkwrite_return(ret);
+	return ret;
 }
 
 static vm_fault_t gfs2_fault(struct vm_fault *vmf)

From 64090cbe4bff240e9a5266e956fcaf449ea6b327 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 17 Jun 2021 23:04:14 +0200
Subject: [PATCH 3053/3292] gfs2: Unstuff before locking page in
 gfs2_page_mkwrite

In gfs2_page_mkwrite, unstuff inodes before locking the page.  That
way, we won't have to pass in the locked page to gfs2_unstuff_inode,
and gfs2_unstuff_inode can look up and lock the page itself.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index cf16d61f46ec..4a14616b2c0a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -510,30 +510,32 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 		goto out_trans_fail;
 	}
 
+	/* Unstuff, if required, and allocate backing blocks for page */
+	if (gfs2_is_stuffed(ip)) {
+		err = gfs2_unstuff_dinode(ip, NULL);
+		if (err) {
+			ret = block_page_mkwrite_return(err);
+			goto out_trans_end;
+		}
+	}
+
 	lock_page(page);
 	/* If truncated, we must retry the operation, we may have raced
 	 * with the glock demotion code.
 	 */
 	if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
 		ret = VM_FAULT_NOPAGE;
-		goto out_trans_end;
+		goto out_page_locked;
 	}
 
-	/* Unstuff, if required, and allocate backing blocks for page */
-	if (gfs2_is_stuffed(ip)) {
-		err = gfs2_unstuff_dinode(ip, page);
-		if (err) {
-			ret = block_page_mkwrite_return(err);
-			goto out_trans_end;
-		}
-	}
 	err = gfs2_allocate_page_backing(page, length);
 	if (err)
 		ret = block_page_mkwrite_return(err);
 
-out_trans_end:
+out_page_locked:
 	if (ret != VM_FAULT_LOCKED)
 		unlock_page(page);
+out_trans_end:
 	gfs2_trans_end(sdp);
 out_trans_fail:
 	gfs2_inplace_release(ip);

From 7a607a41cdc6c6f27b8e234cb44ce57070513dac Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 17 Jun 2021 21:36:50 +0200
Subject: [PATCH 3054/3292] gfs2: Clean up gfs2_unstuff_dinode

Split __gfs2_unstuff_inode off from gfs2_unstuff_dinode and clean up the
code a little.  All remaining callers now pass NULL as the page argument
of gfs2_unstuff_dinode, so remove that argument.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c  | 62 ++++++++++++++++++++++++-------------------------
 fs/gfs2/bmap.h  |  2 +-
 fs/gfs2/dir.c   |  2 +-
 fs/gfs2/file.c  |  4 ++--
 fs/gfs2/quota.c |  2 +-
 5 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0bcf11a9987b..ed8b67b21718 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -56,14 +56,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 			       u64 block, struct page *page)
 {
 	struct inode *inode = &ip->i_inode;
-	int release = 0;
-
-	if (!page || page->index) {
-		page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
-		if (!page)
-			return -ENOMEM;
-		release = 1;
-	}
 
 	if (!PageUptodate(page)) {
 		void *kaddr = kmap(page);
@@ -97,26 +89,10 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 		gfs2_ordered_add_inode(ip);
 	}
 
-	if (release) {
-		unlock_page(page);
-		put_page(page);
-	}
-
 	return 0;
 }
 
-/**
- * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
- * @ip: The GFS2 inode to unstuff
- * @page: The (optional) page. This is looked up if the @page is NULL
- *
- * This routine unstuffs a dinode and returns it to a "normal" state such
- * that the height can be grown in the traditional way.
- *
- * Returns: errno
- */
-
-int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
+static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct page *page)
 {
 	struct buffer_head *bh, *dibh;
 	struct gfs2_dinode *di;
@@ -124,11 +100,9 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 	int isdir = gfs2_is_dir(ip);
 	int error;
 
-	down_write(&ip->i_rw_mutex);
-
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
-		goto out;
+		return error;
 
 	if (i_size_read(&ip->i_inode)) {
 		/* Get a free block, fill it with the stuffed data,
@@ -170,12 +144,38 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 
 out_brelse:
 	brelse(dibh);
+	return error;
+}
+
+/**
+ * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
+ * @ip: The GFS2 inode to unstuff
+ *
+ * This routine unstuffs a dinode and returns it to a "normal" state such
+ * that the height can be grown in the traditional way.
+ *
+ * Returns: errno
+ */
+
+int gfs2_unstuff_dinode(struct gfs2_inode *ip)
+{
+	struct inode *inode = &ip->i_inode;
+	struct page *page;
+	int error;
+
+	down_write(&ip->i_rw_mutex);
+	page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
+	error = -ENOMEM;
+	if (!page)
+		goto out;
+	error = __gfs2_unstuff_inode(ip, page);
+	unlock_page(page);
+	put_page(page);
 out:
 	up_write(&ip->i_rw_mutex);
 	return error;
 }
 
-
 /**
  * find_metapath - Find path through the metadata tree
  * @sdp: The superblock
@@ -1079,7 +1079,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 			goto out_trans_fail;
 
 		if (unstuff) {
-			ret = gfs2_unstuff_dinode(ip, NULL);
+			ret = gfs2_unstuff_dinode(ip);
 			if (ret)
 				goto out_trans_end;
 			release_metapath(mp);
@@ -2143,7 +2143,7 @@ static int do_grow(struct inode *inode, u64 size)
 		goto do_grow_release;
 
 	if (unstuff) {
-		error = gfs2_unstuff_dinode(ip, NULL);
+		error = gfs2_unstuff_dinode(ip);
 		if (error)
 			goto do_end_trans;
 	}
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 6676d863faef..53cce6c08e81 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -46,7 +46,7 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
 extern const struct iomap_ops gfs2_iomap_ops;
 extern const struct iomap_writeback_ops gfs2_writeback_ops;
 
-extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
+extern int gfs2_unstuff_dinode(struct gfs2_inode *ip);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh, int create);
 extern int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 18f67b37d6f8..42b7dfffb5e7 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -172,7 +172,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
 		return -EINVAL;
 
 	if (gfs2_is_stuffed(ip)) {
-		error = gfs2_unstuff_dinode(ip, NULL);
+		error = gfs2_unstuff_dinode(ip);
 		if (error)
 			return error;
 	}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4a14616b2c0a..84ec053d43b4 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -512,7 +512,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 
 	/* Unstuff, if required, and allocate backing blocks for page */
 	if (gfs2_is_stuffed(ip)) {
-		err = gfs2_unstuff_dinode(ip, NULL);
+		err = gfs2_unstuff_dinode(ip);
 		if (err) {
 			ret = block_page_mkwrite_return(err);
 			goto out_trans_end;
@@ -981,7 +981,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 
 	if (gfs2_is_stuffed(ip)) {
-		error = gfs2_unstuff_dinode(ip, NULL);
+		error = gfs2_unstuff_dinode(ip);
 		if (unlikely(error))
 			goto out;
 	}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 94637c307cc8..be0997e24d60 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -825,7 +825,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	u64 size;
 
 	if (gfs2_is_stuffed(ip)) {
-		err = gfs2_unstuff_dinode(ip, NULL);
+		err = gfs2_unstuff_dinode(ip);
 		if (err)
 			return err;
 	}

From 2705dfb2094777e405e065105e307074af8965c1 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 28 Jun 2021 10:33:12 +0800
Subject: [PATCH 3055/3292] block: fix discard request merge

ll_new_hw_segment() is reached only in case of single range discard
merge, and we don't have max discard segment size limit actually, so
it is wrong to run the following check:

if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))

it may be always false since req->nr_phys_segments is initialized as
one, and bio's segment count is still 1, blk_rq_get_max_segments(reg)
is 1 too.

Fix the issue by not doing the check and bypassing the calculation of
discard request's nr_phys_segments.

Based on analysis from Wang Shanker.

Cc: Christoph Hellwig <hch@lst.de>
Reported-by: Wang Shanker <shankerwangmiao@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210628023312.1903255-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1398b52a24b4..a11b3b53717e 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -559,10 +559,14 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq)
 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
 		unsigned int nr_phys_segs)
 {
-	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
+	if (blk_integrity_merge_bio(req->q, req, bio) == false)
 		goto no_merge;
 
-	if (blk_integrity_merge_bio(req->q, req, bio) == false)
+	/* discard request merge won't add new segment */
+	if (req_op(req) == REQ_OP_DISCARD)
+		return 1;
+
+	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
 		goto no_merge;
 
 	/*

From 3e19ae7c6fd62978ae518b17ae0e30ab8d17ed07 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:44 +0300
Subject: [PATCH 3056/3292] net: bridge: use READ_ONCE() and WRITE_ONCE()
 compiler barriers for fdb->dst

Annotate the writer side of fdb->dst:

- fdb_create()
- br_fdb_update()
- fdb_add_entry()
- br_fdb_external_learn_add()

with WRITE_ONCE() and the reader side:

- br_fdb_test_addr()
- br_fdb_update()
- fdb_fill_info()
- fdb_add_entry()
- fdb_delete_by_addr_and_port()
- br_fdb_external_learn_add()
- br_switchdev_fdb_notify()

with compiler barriers such that the readers do not attempt to reload
fdb->dst multiple times, leading to potentially different destination
ports when the fdb entry is updated concurrently.

This is especially important in read-side sections where fdb->dst is
used more than once, but let's convert all accesses for the sake of
uniformity.

Suggested-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c       | 28 +++++++++++++++++-----------
 net/bridge/br_switchdev.c |  7 ++++---
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 16f9434fdb5d..dc3ecf2d5637 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -440,9 +440,14 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 	if (!port)
 		ret = 0;
 	else {
+		const struct net_bridge_port *dst = NULL;
+
 		fdb = br_fdb_find_rcu(port->br, addr, 0);
-		ret = fdb && fdb->dst && fdb->dst->dev != dev &&
-			fdb->dst->state == BR_STATE_FORWARDING;
+		if (fdb)
+			dst = READ_ONCE(fdb->dst);
+
+		ret = dst && dst->dev != dev &&
+		      dst->state == BR_STATE_FORWARDING;
 	}
 	rcu_read_unlock();
 
@@ -509,7 +514,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
 	fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
 	if (fdb) {
 		memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
-		fdb->dst = source;
+		WRITE_ONCE(fdb->dst, source);
 		fdb->key.vlan_id = vid;
 		fdb->flags = flags;
 		fdb->updated = fdb->used = jiffies;
@@ -600,10 +605,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 			}
 
 			/* fastpath: update of existing entry */
-			if (unlikely(source != fdb->dst &&
+			if (unlikely(source != READ_ONCE(fdb->dst) &&
 				     !test_bit(BR_FDB_STICKY, &fdb->flags))) {
 				br_switchdev_fdb_notify(fdb, RTM_DELNEIGH);
-				fdb->dst = source;
+				WRITE_ONCE(fdb->dst, source);
 				fdb_modified = true;
 				/* Take over HW learned entry */
 				if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN,
@@ -650,6 +655,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 			 const struct net_bridge_fdb_entry *fdb,
 			 u32 portid, u32 seq, int type, unsigned int flags)
 {
+	const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
 	unsigned long now = jiffies;
 	struct nda_cacheinfo ci;
 	struct nlmsghdr *nlh;
@@ -665,7 +671,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	ndm->ndm_pad2    = 0;
 	ndm->ndm_flags	 = 0;
 	ndm->ndm_type	 = 0;
-	ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
+	ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex;
 	ndm->ndm_state   = fdb_to_nud(br, fdb);
 
 	if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
@@ -964,8 +970,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
 		if (flags & NLM_F_EXCL)
 			return -EEXIST;
 
-		if (fdb->dst != source) {
-			fdb->dst = source;
+		if (READ_ONCE(fdb->dst) != source) {
+			WRITE_ONCE(fdb->dst, source);
 			modified = true;
 		}
 	}
@@ -1132,7 +1138,7 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br,
 	struct net_bridge_fdb_entry *fdb;
 
 	fdb = br_fdb_find(br, addr, vlan);
-	if (!fdb || fdb->dst != p)
+	if (!fdb || READ_ONCE(fdb->dst) != p)
 		return -ENOENT;
 
 	fdb_delete(br, fdb, true);
@@ -1281,8 +1287,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 	} else {
 		fdb->updated = jiffies;
 
-		if (fdb->dst != p) {
-			fdb->dst = p;
+		if (READ_ONCE(fdb->dst) != p) {
+			WRITE_ONCE(fdb->dst, p);
 			modified = true;
 		}
 
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index a5e601e41cb9..192293fe37fd 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -110,6 +110,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 void
 br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
+	const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
 	struct switchdev_notifier_fdb_info info = {
 		.addr = fdb->key.addr.addr,
 		.vid = fdb->key.vlan_id,
@@ -118,17 +119,17 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 		.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
 	};
 
-	if (!fdb->dst)
+	if (!dst)
 		return;
 
 	switch (type) {
 	case RTM_DELNEIGH:
 		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
-					 fdb->dst->dev, &info.info, NULL);
+					 dst->dev, &info.info, NULL);
 		break;
 	case RTM_NEWNEIGH:
 		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
-					 fdb->dst->dev, &info.info, NULL);
+					 dst->dev, &info.info, NULL);
 		break;
 	}
 }

From 6eb38bf8eb90748dbf4191f6c4940ae76223b0a4 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 29 Jun 2021 17:06:45 +0300
Subject: [PATCH 3057/3292] net: bridge: switchdev: send FDB notifications for
 host addresses

Treat addresses added to the bridge itself in the same way as regular
ports and send out a notification so that drivers may sync it down to
the hardware FDB.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c       |  4 ++--
 net/bridge/br_private.h   |  7 ++++---
 net/bridge/br_switchdev.c | 11 +++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index dc3ecf2d5637..bad7e84d76af 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -607,7 +607,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 			/* fastpath: update of existing entry */
 			if (unlikely(source != READ_ONCE(fdb->dst) &&
 				     !test_bit(BR_FDB_STICKY, &fdb->flags))) {
-				br_switchdev_fdb_notify(fdb, RTM_DELNEIGH);
+				br_switchdev_fdb_notify(br, fdb, RTM_DELNEIGH);
 				WRITE_ONCE(fdb->dst, source);
 				fdb_modified = true;
 				/* Take over HW learned entry */
@@ -800,7 +800,7 @@ static void fdb_notify(struct net_bridge *br,
 	int err = -ENOBUFS;
 
 	if (swdev_notify)
-		br_switchdev_fdb_notify(fdb, type);
+		br_switchdev_fdb_notify(br, fdb, type);
 
 	skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
 	if (skb == NULL)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a684d0cfc58c..2b48b204205e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1654,8 +1654,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 			       unsigned long flags,
 			       unsigned long mask,
 			       struct netlink_ext_ack *extack);
-void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
-			     int type);
+void br_switchdev_fdb_notify(struct net_bridge *br,
+			     const struct net_bridge_fdb_entry *fdb, int type);
 int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
 			       struct netlink_ext_ack *extack);
 int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
@@ -1702,7 +1702,8 @@ static inline int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
 }
 
 static inline void
-br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
+br_switchdev_fdb_notify(struct net_bridge *br,
+			const struct net_bridge_fdb_entry *fdb, int type)
 {
 }
 
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 192293fe37fd..d3adee0f91f9 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -108,9 +108,11 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 }
 
 void
-br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
+br_switchdev_fdb_notify(struct net_bridge *br,
+			const struct net_bridge_fdb_entry *fdb, int type)
 {
 	const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
+	struct net_device *dev = dst ? dst->dev : br->dev;
 	struct switchdev_notifier_fdb_info info = {
 		.addr = fdb->key.addr.addr,
 		.vid = fdb->key.vlan_id,
@@ -119,17 +121,14 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 		.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
 	};
 
-	if (!dst)
-		return;
-
 	switch (type) {
 	case RTM_DELNEIGH:
 		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
-					 dst->dev, &info.info, NULL);
+					 dev, &info.info, NULL);
 		break;
 	case RTM_NEWNEIGH:
 		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
-					 dst->dev, &info.info, NULL);
+					 dev, &info.info, NULL);
 		break;
 	}
 }

From f851a721a638316a8257459db8359f2930d4b473 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:46 +0300
Subject: [PATCH 3058/3292] net: bridge: allow br_fdb_replay to be called for
 the bridge device

When a port joins a bridge which already has local FDB entries pointing
to the bridge device itself, we would like to offload those, so allow
the "dev" argument to be equal to the bridge too. The code already does
what we need in that case.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index bad7e84d76af..2b862cffc03a 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -760,7 +760,10 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
 	unsigned long action;
 	int err = 0;
 
-	if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+	if (!netif_is_bridge_master(br_dev))
+		return -EINVAL;
+
+	if (!netif_is_bridge_port(dev) && !netif_is_bridge_master(dev))
 		return -EINVAL;
 
 	br = netdev_priv(br_dev);

From b117e1e8a86d363fc1ad53df8d2c47884d2c0048 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:47 +0300
Subject: [PATCH 3059/3292] net: dsa: delete dsa_legacy_fdb_add and
 dsa_legacy_fdb_del

We want to add reference counting for FDB entries in cross-chip
topologies, and in order for that to have any chance of working and not
be unbalanced (leading to entries which are never deleted), we need to
ensure that higher layers are sane, because if they aren't, it's garbage
in, garbage out.

For example, if we add a bridge FDB entry twice, the bridge properly
errors out:

$ bridge fdb add dev swp0 00:01:02:03:04:07 master static
$ bridge fdb add dev swp0 00:01:02:03:04:07 master static
RTNETLINK answers: File exists

However, the same thing cannot be said about the bridge bypass
operations:

$ bridge fdb add dev swp0 00:01:02:03:04:07
$ bridge fdb add dev swp0 00:01:02:03:04:07
$ bridge fdb add dev swp0 00:01:02:03:04:07
$ bridge fdb add dev swp0 00:01:02:03:04:07
$ echo $?
0

But one 'bridge fdb del' is enough to remove the entry, no matter how
many times it was added.

The bridge bypass operations are impossible to maintain in these
circumstances and lack of support for reference counting the cross-chip
notifiers is holding us back from making further progress, so just drop
support for them. The only way left for users to install static bridge
FDB entries is the proper one, using the "master static" flags.

With this change, rtnl_fdb_add() falls back to calling
ndo_dflt_fdb_add() which uses the duplicate-exclusive variant of
dev_uc_add(): dev_uc_add_excl(). Because DSA does not (yet) declare
IFF_UNICAST_FLT, this results in us going to promiscuous mode:

$ bridge fdb add dev swp0 00:01:02:03:04:05
[   28.206743] device swp0 entered promiscuous mode
$ bridge fdb add dev swp0 00:01:02:03:04:05
RTNETLINK answers: File exists

So even if it does not completely fail, there is at least some indication
that it is behaving differently from before, and closer to user space
expectations, I would argue (the lack of a "local|static" specifier
defaults to "local", or "host-only", so dev_uc_add() is a reasonable
default implementation). If the generic implementation of .ndo_fdb_add
provided by Vlad Yasevich is a proof of anything, it only proves that
the implementation provided by DSA was always wrong, by not looking at
"ndm->ndm_state & NUD_NOARP" (the "static" flag which means that the FDB
entry points outwards) and "ndm->ndm_state & NUD_PERMANENT" (the "local"
flag which means that the FDB entry points towards the host). It all
used to mean the same thing to DSA.

Update the documentation so that the users are not confused about what's
going on.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/dsa/configuration.rst          | 68 +++++++++++++++++++
 net/dsa/slave.c                               | 23 -------
 2 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst
index 774f0e76c746..2b08f1a772d3 100644
--- a/Documentation/networking/dsa/configuration.rst
+++ b/Documentation/networking/dsa/configuration.rst
@@ -292,3 +292,71 @@ configuration.
 
     # bring up the bridge devices
     ip link set br0 up
+
+Forwarding database (FDB) management
+------------------------------------
+
+The existing DSA switches do not have the necessary hardware support to keep
+the software FDB of the bridge in sync with the hardware tables, so the two
+tables are managed separately (``bridge fdb show`` queries both, and depending
+on whether the ``self`` or ``master`` flags are being used, a ``bridge fdb
+add`` or ``bridge fdb del`` command acts upon entries from one or both tables).
+
+Up until kernel v4.14, DSA only supported user space management of bridge FDB
+entries using the bridge bypass operations (which do not update the software
+FDB, just the hardware one) using the ``self`` flag (which is optional and can
+be omitted).
+
+  .. code-block:: sh
+
+    bridge fdb add dev swp0 00:01:02:03:04:05 self static
+    # or shorthand
+    bridge fdb add dev swp0 00:01:02:03:04:05 static
+
+Due to a bug, the bridge bypass FDB implementation provided by DSA did not
+distinguish between ``static`` and ``local`` FDB entries (``static`` are meant
+to be forwarded, while ``local`` are meant to be locally terminated, i.e. sent
+to the host port). Instead, all FDB entries with the ``self`` flag (implicit or
+explicit) are treated by DSA as ``static`` even if they are ``local``.
+
+  .. code-block:: sh
+
+    # This command:
+    bridge fdb add dev swp0 00:01:02:03:04:05 static
+    # behaves the same for DSA as this command:
+    bridge fdb add dev swp0 00:01:02:03:04:05 local
+    # or shorthand, because the 'local' flag is implicit if 'static' is not
+    # specified, it also behaves the same as:
+    bridge fdb add dev swp0 00:01:02:03:04:05
+
+The last command is an incorrect way of adding a static bridge FDB entry to a
+DSA switch using the bridge bypass operations, and works by mistake. Other
+drivers will treat an FDB entry added by the same command as ``local`` and as
+such, will not forward it, as opposed to DSA.
+
+Between kernel v4.14 and v5.14, DSA has supported in parallel two modes of
+adding a bridge FDB entry to the switch: the bridge bypass discussed above, as
+well as a new mode using the ``master`` flag which installs FDB entries in the
+software bridge too.
+
+  .. code-block:: sh
+
+    bridge fdb add dev swp0 00:01:02:03:04:05 master static
+
+Since kernel v5.14, DSA has gained stronger integration with the bridge's
+software FDB, and the support for its bridge bypass FDB implementation (using
+the ``self`` flag) has been removed. This results in the following changes:
+
+  .. code-block:: sh
+
+    # This is the only valid way of adding an FDB entry that is supported,
+    # compatible with v4.14 kernels and later:
+    bridge fdb add dev swp0 00:01:02:03:04:05 master static
+    # This command is no longer buggy and the entry is properly treated as
+    # 'local' instead of being forwarded:
+    bridge fdb add dev swp0 00:01:02:03:04:05
+    # This command no longer installs a static FDB entry to hardware:
+    bridge fdb add dev swp0 00:01:02:03:04:05 static
+
+Script writers are therefore encouraged to use the ``master static`` set of
+flags when working with bridge FDB entries on DSA switch interfaces.
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 898ed9cf756f..64acb1e11cd7 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1651,27 +1651,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.self_test		= dsa_slave_net_selftest,
 };
 
-/* legacy way, bypassing the bridge *****************************************/
-static int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-			      struct net_device *dev,
-			      const unsigned char *addr, u16 vid,
-			      u16 flags,
-			      struct netlink_ext_ack *extack)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-
-	return dsa_port_fdb_add(dp, addr, vid);
-}
-
-static int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-			      struct net_device *dev,
-			      const unsigned char *addr, u16 vid)
-{
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-
-	return dsa_port_fdb_del(dp, addr, vid);
-}
-
 static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -1713,8 +1692,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_fdb_add		= dsa_legacy_fdb_add,
-	.ndo_fdb_del		= dsa_legacy_fdb_del,
 	.ndo_fdb_dump		= dsa_slave_fdb_dump,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 	.ndo_get_iflink		= dsa_slave_get_iflink,

From 63609c8fac40810b0b14c9512d47b11965cea37f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:48 +0300
Subject: [PATCH 3060/3292] net: dsa: introduce dsa_is_upstream_port and
 dsa_switch_is_upstream_of

In preparation for the new cross-chip notifiers for host addresses,
let's introduce some more topology helpers which we are going to use to
discern switches that are in our path towards the dedicated CPU port
from switches that aren't.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index ea47783d5695..5f632cfd33c7 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -491,6 +491,32 @@ static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port)
 	return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index);
 }
 
+/* Return true if this is the local port used to reach the CPU port */
+static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port)
+{
+	if (dsa_is_unused_port(ds, port))
+		return false;
+
+	return port == dsa_upstream_port(ds, port);
+}
+
+/* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning
+ * that the routing port from @downstream_ds to @upstream_ds is also the port
+ * which @downstream_ds uses to reach its dedicated CPU.
+ */
+static inline bool dsa_switch_is_upstream_of(struct dsa_switch *upstream_ds,
+					     struct dsa_switch *downstream_ds)
+{
+	int routing_port;
+
+	if (upstream_ds == downstream_ds)
+		return true;
+
+	routing_port = dsa_routing_port(downstream_ds, upstream_ds->index);
+
+	return dsa_is_upstream_port(downstream_ds, routing_port);
+}
+
 static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
 {
 	const struct dsa_switch *ds = dp->ds;

From b8e997c490036f38d48687415fd1367e00e98fb9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:49 +0300
Subject: [PATCH 3061/3292] net: dsa: introduce a separate cross-chip notifier
 type for host MDBs

Commit abd49535c380 ("net: dsa: execute dsa_switch_mdb_add only for
routing port in cross-chip topologies") does a surprisingly good job
even for the SWITCHDEV_OBJ_ID_HOST_MDB use case, where DSA simply
translates a switchdev object received on dp into a cross-chip notifier
for dp->cpu_dp.

To visualize how that works, imagine the daisy chain topology below and
consider a SWITCHDEV_OBJ_ID_HOST_MDB object emitted on sw2p0. How does
the cross-chip notifier know to match on all the right ports (sw0p4, the
dedicated CPU port, sw1p4, an upstream DSA link, and sw2p4, another
upstream DSA link)?

                                                |
       sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
    [  user ] [  user ] [  user ] [  dsa  ] [  cpu  ]
    [       ] [       ] [       ] [       ] [   x   ]
                                      |
                                      +---------+
                                                |
       sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
    [  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]
    [       ] [       ] [       ] [       ] [   x   ]
                                      |
                                      +---------+
                                                |
       sw2p0     sw2p1     sw2p2     sw2p3     sw2p4
    [  user ] [  user ] [  user ] [  user ] [  dsa  ]
    [       ] [       ] [       ] [       ] [   x   ]

The answer is simple: the dedicated CPU port of sw2p0 is sw0p4, and
dsa_routing_port returns the upstream port for all switches.

That is fine, but there are other topologies where this does not work as
well. There are trees with "H" topologies in the wild, where there are 2
or more switches with DSA links between them, but every switch has its
dedicated CPU port. For these topologies, it seems stupid for the neighbor
switches to install an MDB entry on the routing port, since these
multicast addresses are fundamentally different than the usual ones we
support (and that is the justification for this patch, to introduce the
concept of a termination plane multicast MAC address, as opposed to a
forwarding plane multicast MAC address).

For example, when a SWITCHDEV_OBJ_ID_HOST_MDB would get added to sw0p0,
without this patch, it would get treated as a regular port MDB on sw0p2
and it would match on the ports below (including the sw1p3 routing port).

                         |                                  |
    sw0p0     sw0p1     sw0p2     sw0p3          sw1p3     sw1p2     sw1p1     sw1p0
 [  user ] [  user ] [  cpu  ] [  dsa  ]      [  dsa  ] [  cpu  ] [  user ] [  user ]
 [       ] [       ] [   x   ] [       ] ---- [   x   ] [       ] [       ] [       ]

With the patch, the host MDB notifier on sw0p0 matches only on the local
switch, which is what we want for a termination plane address.

                         |                                  |
    sw0p0     sw0p1     sw0p2     sw0p3          sw1p3     sw1p2     sw1p1     sw1p0
 [  user ] [  user ] [  cpu  ] [  dsa  ]      [  dsa  ] [  cpu  ] [  user ] [  user ]
 [       ] [       ] [   x   ] [       ] ---- [       ] [       ] [       ] [       ]

Name this new matching function "dsa_switch_host_address_match" since we
will be reusing it soon for host FDB entries as well.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h |  6 +++++
 net/dsa/port.c     | 24 +++++++++++++++++++
 net/dsa/slave.c    | 10 ++------
 net/dsa/switch.c   | 60 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c8712942002f..cd65933d269b 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -27,6 +27,8 @@ enum {
 	DSA_NOTIFIER_LAG_LEAVE,
 	DSA_NOTIFIER_MDB_ADD,
 	DSA_NOTIFIER_MDB_DEL,
+	DSA_NOTIFIER_HOST_MDB_ADD,
+	DSA_NOTIFIER_HOST_MDB_DEL,
 	DSA_NOTIFIER_VLAN_ADD,
 	DSA_NOTIFIER_VLAN_DEL,
 	DSA_NOTIFIER_MTU,
@@ -214,6 +216,10 @@ int dsa_port_mdb_add(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
 int dsa_port_mdb_del(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_host_mdb_add(const struct dsa_port *dp,
+			  const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_host_mdb_del(const struct dsa_port *dp,
+			  const struct switchdev_obj_port_mdb *mdb);
 int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
 			      struct switchdev_brport_flags flags,
 			      struct netlink_ext_ack *extack);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 46089dd2b2ec..47f45f795f44 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -681,6 +681,30 @@ int dsa_port_mdb_del(const struct dsa_port *dp,
 	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
 }
 
+int dsa_port_host_mdb_add(const struct dsa_port *dp,
+			  const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_notifier_mdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.mdb = mdb,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info);
+}
+
+int dsa_port_host_mdb_del(const struct dsa_port *dp,
+			  const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_notifier_mdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.mdb = mdb,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info);
+}
+
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct netlink_ext_ack *extack)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 64acb1e11cd7..4b1d738bc3bc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -418,10 +418,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
 		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
 
-		/* DSA can directly translate this to a normal MDB add,
-		 * but on the CPU port.
-		 */
-		err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+		err = dsa_port_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -495,10 +492,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
 		if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
 			return -EOPNOTSUPP;
 
-		/* DSA can directly translate this to a normal MDB add,
-		 * but on the CPU port.
-		 */
-		err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+		err = dsa_port_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index c1e5afafe633..c40afd622331 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -154,6 +154,27 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
 	return 0;
 }
 
+/* Matches for all upstream-facing ports (the CPU port and all upstream-facing
+ * DSA links) that sit between the targeted port on which the notifier was
+ * emitted and its dedicated CPU port.
+ */
+static bool dsa_switch_host_address_match(struct dsa_switch *ds, int port,
+					  int info_sw_index, int info_port)
+{
+	struct dsa_port *targeted_dp, *cpu_dp;
+	struct dsa_switch *targeted_ds;
+
+	targeted_ds = dsa_switch_find(ds->dst->index, info_sw_index);
+	targeted_dp = dsa_to_port(targeted_ds, info_port);
+	cpu_dp = targeted_dp->cpu_dp;
+
+	if (dsa_switch_is_upstream_of(ds, targeted_ds))
+		return port == dsa_towards_port(ds, cpu_dp->ds->index,
+						cpu_dp->index);
+
+	return false;
+}
+
 static int dsa_switch_fdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
@@ -258,6 +279,39 @@ static int dsa_switch_mdb_del(struct dsa_switch *ds,
 	return 0;
 }
 
+static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
+				   struct dsa_notifier_mdb_info *info)
+{
+	int err = 0;
+	int port;
+
+	if (!ds->ops->port_mdb_add)
+		return -EOPNOTSUPP;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_host_address_match(ds, port, info->sw_index,
+						  info->port)) {
+			err = ds->ops->port_mdb_add(ds, port, info->mdb);
+			if (err)
+				break;
+		}
+	}
+
+	return err;
+}
+
+static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
+				   struct dsa_notifier_mdb_info *info)
+{
+	if (!ds->ops->port_mdb_del)
+		return -EOPNOTSUPP;
+
+	if (ds->index == info->sw_index)
+		return ds->ops->port_mdb_del(ds, info->port, info->mdb);
+
+	return 0;
+}
+
 static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
 				  struct dsa_notifier_vlan_info *info)
 {
@@ -441,6 +495,12 @@ static int dsa_switch_event(struct notifier_block *nb,
 	case DSA_NOTIFIER_MDB_DEL:
 		err = dsa_switch_mdb_del(ds, info);
 		break;
+	case DSA_NOTIFIER_HOST_MDB_ADD:
+		err = dsa_switch_host_mdb_add(ds, info);
+		break;
+	case DSA_NOTIFIER_HOST_MDB_DEL:
+		err = dsa_switch_host_mdb_del(ds, info);
+		break;
 	case DSA_NOTIFIER_VLAN_ADD:
 		err = dsa_switch_vlan_add(ds, info);
 		break;

From 161ca59d39e909d37eeeaf14bc1165b114790d00 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:50 +0300
Subject: [PATCH 3062/3292] net: dsa: reference count the MDB entries at the
 cross-chip notifier level

Ever since the cross-chip notifiers were introduced, the design was
meant to be simplistic and just get the job done without worrying too
much about dangling resources left behind.

For example, somebody installs an MDB entry on sw0p0 in this daisy chain
topology. It gets installed using ds->ops->port_mdb_add() on sw0p0,
sw1p4 and sw2p4.

                                                    |
           sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
        [  user ] [  user ] [  user ] [  dsa  ] [  cpu  ]
        [   x   ] [       ] [       ] [       ] [       ]
                                          |
                                          +---------+
                                                    |
           sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
        [  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]
        [       ] [       ] [       ] [       ] [   x   ]
                                          |
                                          +---------+
                                                    |
           sw2p0     sw2p1     sw2p2     sw2p3     sw2p4
        [  user ] [  user ] [  user ] [  user ] [  dsa  ]
        [       ] [       ] [       ] [       ] [   x   ]

Then the same person deletes that MDB entry. The cross-chip notifier for
deletion only matches sw0p0:

                                                    |
           sw0p0     sw0p1     sw0p2     sw0p3     sw0p4
        [  user ] [  user ] [  user ] [  dsa  ] [  cpu  ]
        [   x   ] [       ] [       ] [       ] [       ]
                                          |
                                          +---------+
                                                    |
           sw1p0     sw1p1     sw1p2     sw1p3     sw1p4
        [  user ] [  user ] [  user ] [  dsa  ] [  dsa  ]
        [       ] [       ] [       ] [       ] [       ]
                                          |
                                          +---------+
                                                    |
           sw2p0     sw2p1     sw2p2     sw2p3     sw2p4
        [  user ] [  user ] [  user ] [  user ] [  dsa  ]
        [       ] [       ] [       ] [       ] [       ]

Why?

Because the DSA links are 'trunk' ports, if we just go ahead and delete
the MDB from sw1p4 and sw2p4 directly, we might delete those multicast
entries when they are still needed. Just consider the fact that somebody
does:

- add a multicast MAC address towards sw0p0 [ via the cross-chip
  notifiers it gets installed on the DSA links too ]
- add the same multicast MAC address towards sw0p1 (another port of that
  same switch)
- delete the same multicast MAC address from sw0p0.

At this point, if we deleted the MAC address from the DSA links, it
would be flooded, even though there is still an entry on switch 0 which
needs it not to.

So that is why deletions only match the targeted source port and nothing
on DSA links. Of course, dangling resources means that the hardware
tables will eventually run out given enough additions/removals, but hey,
at least it's simple.

But there is a bigger concern which needs to be addressed, and that is
our support for SWITCHDEV_OBJ_ID_HOST_MDB. DSA simply translates such an
object into a dsa_port_host_mdb_add() which ends up as ds->ops->port_mdb_add()
on the upstream port, and a similar thing happens on deletion:
dsa_port_host_mdb_del() will trigger ds->ops->port_mdb_del() on the
upstream port.

When there are 2 VLAN-unaware bridges spanning the same switch (which is
a use case DSA proudly supports), each bridge will install its own
SWITCHDEV_OBJ_ID_HOST_MDB entries. But upon deletion, DSA goes ahead and
emits a DSA_NOTIFIER_MDB_DEL for dp->cpu_dp, which is shared between the
user ports enslaved to br0 and the user ports enslaved to br1. Not good.
The host-trapped multicast addresses installed by br1 will be deleted
when any state changes in br0 (IGMP timers expire, or ports leave, etc).

To avoid this, we could of course go the route of the zero-sum game and
delete the DSA_NOTIFIER_MDB_DEL call for dp->cpu_dp. But the better
design is to just admit that on shared ports like DSA links and CPU
ports, we should be reference counting calls, even if this consumes some
dynamic memory which DSA has traditionally avoided. On the flip side,
the hardware tables of switches are limited in size, so it would be good
if the OS managed them properly instead of having them eventually
overflow.

To address the memory usage concern, we only apply the refcounting of
MDB entries on ports that are really shared (CPU ports and DSA links)
and not on user ports. In a typical single-switch setup, this means only
the CPU port (and the host MDB entries are not that many, really).

The name of the newly introduced data structures (dsa_mac_addr) is
chosen in such a way that will be reusable for host FDB entries (next
patch).

With this change, we can finally have the same matching logic for the
MDB additions and deletions, as well as for their host-trapped variants.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  12 ++++++
 net/dsa/dsa2.c    |   8 ++++
 net/dsa/switch.c  | 104 ++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 5f632cfd33c7..2c50546f9667 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -285,6 +285,11 @@ struct dsa_port {
 	 */
 	const struct dsa_netdevice_ops *netdev_ops;
 
+	/* List of MAC addresses that must be forwarded on this port.
+	 * These are only valid on CPU ports and DSA links.
+	 */
+	struct list_head	mdbs;
+
 	bool setup;
 };
 
@@ -299,6 +304,13 @@ struct dsa_link {
 	struct list_head list;
 };
 
+struct dsa_mac_addr {
+	unsigned char addr[ETH_ALEN];
+	u16 vid;
+	refcount_t refcount;
+	struct list_head list;
+};
+
 struct dsa_switch {
 	bool setup;
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 9000a8c84baf..2035d132682f 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -348,6 +348,8 @@ static int dsa_port_setup(struct dsa_port *dp)
 	if (dp->setup)
 		return 0;
 
+	INIT_LIST_HEAD(&dp->mdbs);
+
 	switch (dp->type) {
 	case DSA_PORT_TYPE_UNUSED:
 		dsa_port_disable(dp);
@@ -443,6 +445,7 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
 static void dsa_port_teardown(struct dsa_port *dp)
 {
 	struct devlink_port *dlp = &dp->devlink_port;
+	struct dsa_mac_addr *a, *tmp;
 
 	if (!dp->setup)
 		return;
@@ -468,6 +471,11 @@ static void dsa_port_teardown(struct dsa_port *dp)
 		break;
 	}
 
+	list_for_each_entry_safe(a, tmp, &dp->mdbs, list) {
+		list_del(&a->list);
+		kfree(a);
+	}
+
 	dp->setup = false;
 }
 
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index c40afd622331..5439de029485 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -175,6 +175,84 @@ static bool dsa_switch_host_address_match(struct dsa_switch *ds, int port,
 	return false;
 }
 
+static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list,
+					      const unsigned char *addr,
+					      u16 vid)
+{
+	struct dsa_mac_addr *a;
+
+	list_for_each_entry(a, addr_list, list)
+		if (ether_addr_equal(a->addr, addr) && a->vid == vid)
+			return a;
+
+	return NULL;
+}
+
+static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port,
+				 const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct dsa_mac_addr *a;
+	int err;
+
+	/* No need to bother with refcounting for user ports */
+	if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+		return ds->ops->port_mdb_add(ds, port, mdb);
+
+	a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+	if (a) {
+		refcount_inc(&a->refcount);
+		return 0;
+	}
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return -ENOMEM;
+
+	err = ds->ops->port_mdb_add(ds, port, mdb);
+	if (err) {
+		kfree(a);
+		return err;
+	}
+
+	ether_addr_copy(a->addr, mdb->addr);
+	a->vid = mdb->vid;
+	refcount_set(&a->refcount, 1);
+	list_add_tail(&a->list, &dp->mdbs);
+
+	return 0;
+}
+
+static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port,
+				 const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct dsa_mac_addr *a;
+	int err;
+
+	/* No need to bother with refcounting for user ports */
+	if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+		return ds->ops->port_mdb_del(ds, port, mdb);
+
+	a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+	if (!a)
+		return -ENOENT;
+
+	if (!refcount_dec_and_test(&a->refcount))
+		return 0;
+
+	err = ds->ops->port_mdb_del(ds, port, mdb);
+	if (err) {
+		refcount_inc(&a->refcount);
+		return err;
+	}
+
+	list_del(&a->list);
+	kfree(a);
+
+	return 0;
+}
+
 static int dsa_switch_fdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
@@ -264,19 +342,18 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 	if (!ds->ops->port_mdb_add)
 		return -EOPNOTSUPP;
 
-	return ds->ops->port_mdb_add(ds, port, info->mdb);
+	return dsa_switch_do_mdb_add(ds, port, info->mdb);
 }
 
 static int dsa_switch_mdb_del(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
+	int port = dsa_towards_port(ds, info->sw_index, info->port);
+
 	if (!ds->ops->port_mdb_del)
 		return -EOPNOTSUPP;
 
-	if (ds->index == info->sw_index)
-		return ds->ops->port_mdb_del(ds, info->port, info->mdb);
-
-	return 0;
+	return dsa_switch_do_mdb_del(ds, port, info->mdb);
 }
 
 static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
@@ -291,7 +368,7 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
 	for (port = 0; port < ds->num_ports; port++) {
 		if (dsa_switch_host_address_match(ds, port, info->sw_index,
 						  info->port)) {
-			err = ds->ops->port_mdb_add(ds, port, info->mdb);
+			err = dsa_switch_do_mdb_add(ds, port, info->mdb);
 			if (err)
 				break;
 		}
@@ -303,13 +380,22 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
 static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
 				   struct dsa_notifier_mdb_info *info)
 {
+	int err = 0;
+	int port;
+
 	if (!ds->ops->port_mdb_del)
 		return -EOPNOTSUPP;
 
-	if (ds->index == info->sw_index)
-		return ds->ops->port_mdb_del(ds, info->port, info->mdb);
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_host_address_match(ds, port, info->sw_index,
+						  info->port)) {
+			err = dsa_switch_do_mdb_del(ds, port, info->mdb);
+			if (err)
+				break;
+		}
+	}
 
-	return 0;
+	return err;
 }
 
 static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,

From 3dc80afc509831ec436e14d8ae74de330b37636d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:51 +0300
Subject: [PATCH 3063/3292] net: dsa: introduce a separate cross-chip notifier
 type for host FDBs

DSA treats some bridge FDB entries by trapping them to the CPU port.
Currently, the only class of such entries are FDB addresses learnt by
the software bridge on a foreign interface. However there are many more
to be added:

- FDB entries with the is_local flag (for termination) added by the
  bridge on the user ports (typically containing the MAC address of the
  bridge port)
- FDB entries pointing towards the bridge net device (for termination).
  Typically these contain the MAC address of the bridge net device.
- Static FDB entries installed on a foreign interface that is in the
  same bridge with a DSA user port.

The reason why a separate cross-chip notifier for host FDBs is justified
compared to normal FDBs is the same as in the case of host MDBs: the
cross-chip notifier matching function in switch.c should avoid
installing these entries on routing ports that route towards the
targeted switch, but not towards the CPU. This is required in order to
have proper support for H-like multi-chip topologies.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h |  7 +++++++
 net/dsa/port.c     | 26 ++++++++++++++++++++++++++
 net/dsa/slave.c    | 21 ++++++++++++++++-----
 net/dsa/switch.c   | 41 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index cd65933d269b..36e667ea94db 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -20,6 +20,8 @@ enum {
 	DSA_NOTIFIER_BRIDGE_LEAVE,
 	DSA_NOTIFIER_FDB_ADD,
 	DSA_NOTIFIER_FDB_DEL,
+	DSA_NOTIFIER_HOST_FDB_ADD,
+	DSA_NOTIFIER_HOST_FDB_DEL,
 	DSA_NOTIFIER_HSR_JOIN,
 	DSA_NOTIFIER_HSR_LEAVE,
 	DSA_NOTIFIER_LAG_CHANGE,
@@ -121,6 +123,7 @@ struct dsa_switchdev_event_work {
 	 */
 	unsigned char addr[ETH_ALEN];
 	u16 vid;
+	bool host_addr;
 };
 
 /* DSA_NOTIFIER_HSR_* */
@@ -211,6 +214,10 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
 		     u16 vid);
 int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
 		     u16 vid);
+int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+			  u16 vid);
+int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+			  u16 vid);
 int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
 int dsa_port_mdb_add(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 47f45f795f44..1b80e0fbdfaa 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -646,6 +646,32 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
 	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
 }
 
+int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+			  u16 vid)
+{
+	struct dsa_notifier_fdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.addr = addr,
+		.vid = vid,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
+}
+
+int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+			  u16 vid)
+{
+	struct dsa_notifier_fdb_info info = {
+		.sw_index = dp->ds->index,
+		.port = dp->index,
+		.addr = addr,
+		.vid = vid,
+	};
+
+	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
+}
+
 int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct dsa_switch *ds = dp->ds;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 4b1d738bc3bc..ac7f4f200ab1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2315,8 +2315,12 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
 	rtnl_lock();
 	switch (switchdev_work->event) {
 	case SWITCHDEV_FDB_ADD_TO_DEVICE:
-		err = dsa_port_fdb_add(dp, switchdev_work->addr,
-				       switchdev_work->vid);
+		if (switchdev_work->host_addr)
+			err = dsa_port_host_fdb_add(dp, switchdev_work->addr,
+						    switchdev_work->vid);
+		else
+			err = dsa_port_fdb_add(dp, switchdev_work->addr,
+					       switchdev_work->vid);
 		if (err) {
 			dev_err(ds->dev,
 				"port %d failed to add %pM vid %d to fdb: %d\n",
@@ -2328,8 +2332,12 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
 		break;
 
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
-		err = dsa_port_fdb_del(dp, switchdev_work->addr,
-				       switchdev_work->vid);
+		if (switchdev_work->host_addr)
+			err = dsa_port_host_fdb_del(dp, switchdev_work->addr,
+						    switchdev_work->vid);
+		else
+			err = dsa_port_fdb_del(dp, switchdev_work->addr,
+					       switchdev_work->vid);
 		if (err) {
 			dev_err(ds->dev,
 				"port %d failed to delete %pM vid %d from fdb: %d\n",
@@ -2375,6 +2383,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
 	const struct switchdev_notifier_fdb_info *fdb_info;
 	struct dsa_switchdev_event_work *switchdev_work;
+	bool host_addr = false;
 	struct dsa_port *dp;
 	int err;
 
@@ -2412,7 +2421,8 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 			if (!p)
 				return NOTIFY_DONE;
 
-			dp = p->dp->cpu_dp;
+			dp = p->dp;
+			host_addr = true;
 
 			if (!dp->ds->assisted_learning_on_cpu_port)
 				return NOTIFY_DONE;
@@ -2442,6 +2452,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 		ether_addr_copy(switchdev_work->addr,
 				fdb_info->addr);
 		switchdev_work->vid = fdb_info->vid;
+		switchdev_work->host_addr = host_addr;
 
 		/* Hold a reference on the slave for dsa_fdb_offload_notify */
 		if (dsa_is_user_port(dp->ds, dp->index))
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 5439de029485..219fc9baaa1c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -253,6 +253,41 @@ static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port,
 	return 0;
 }
 
+static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
+				   struct dsa_notifier_fdb_info *info)
+{
+	int err = 0;
+	int port;
+
+	if (!ds->ops->port_fdb_add)
+		return -EOPNOTSUPP;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_host_address_match(ds, port, info->sw_index,
+						  info->port)) {
+			err = ds->ops->port_fdb_add(ds, port, info->addr,
+						    info->vid);
+			if (err)
+				break;
+		}
+	}
+
+	return err;
+}
+
+static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
+				   struct dsa_notifier_fdb_info *info)
+{
+	if (!ds->ops->port_fdb_del)
+		return -EOPNOTSUPP;
+
+	if (ds->index == info->sw_index)
+		return ds->ops->port_fdb_del(ds, info->port, info->addr,
+					     info->vid);
+
+	return 0;
+}
+
 static int dsa_switch_fdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
@@ -560,6 +595,12 @@ static int dsa_switch_event(struct notifier_block *nb,
 	case DSA_NOTIFIER_FDB_DEL:
 		err = dsa_switch_fdb_del(ds, info);
 		break;
+	case DSA_NOTIFIER_HOST_FDB_ADD:
+		err = dsa_switch_host_fdb_add(ds, info);
+		break;
+	case DSA_NOTIFIER_HOST_FDB_DEL:
+		err = dsa_switch_host_fdb_del(ds, info);
+		break;
 	case DSA_NOTIFIER_HSR_JOIN:
 		err = dsa_switch_hsr_join(ds, info);
 		break;

From 3f6e32f92a027e91f001070ec324dd3b534d948c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:52 +0300
Subject: [PATCH 3064/3292] net: dsa: reference count the FDB addresses at the
 cross-chip notifier level

The same concerns expressed for host MDB entries are valid for host FDBs
just as well:

- in the case of multiple bridges spanning the same switch chip, deleting
  a host FDB entry that belongs to one bridge will result in breakage to
  the other bridge
- not deleting FDB entries across DSA links means that the switch's
  hardware tables will eventually run out, given enough wear&tear

So do the same thing and introduce reference counting for CPU ports and
DSA links using the same data structures as we have for MDB entries.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  1 +
 net/dsa/dsa2.c    |  6 ++++
 net/dsa/switch.c  | 88 +++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 88 insertions(+), 7 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2c50546f9667..33f40c1ec379 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -288,6 +288,7 @@ struct dsa_port {
 	/* List of MAC addresses that must be forwarded on this port.
 	 * These are only valid on CPU ports and DSA links.
 	 */
+	struct list_head	fdbs;
 	struct list_head	mdbs;
 
 	bool setup;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 2035d132682f..185629f27f80 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -348,6 +348,7 @@ static int dsa_port_setup(struct dsa_port *dp)
 	if (dp->setup)
 		return 0;
 
+	INIT_LIST_HEAD(&dp->fdbs);
 	INIT_LIST_HEAD(&dp->mdbs);
 
 	switch (dp->type) {
@@ -471,6 +472,11 @@ static void dsa_port_teardown(struct dsa_port *dp)
 		break;
 	}
 
+	list_for_each_entry_safe(a, tmp, &dp->fdbs, list) {
+		list_del(&a->list);
+		kfree(a);
+	}
+
 	list_for_each_entry_safe(a, tmp, &dp->mdbs, list) {
 		list_del(&a->list);
 		kfree(a);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 219fc9baaa1c..af71b8638098 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -253,6 +253,71 @@ static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port,
 	return 0;
 }
 
+static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port,
+				 const unsigned char *addr, u16 vid)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct dsa_mac_addr *a;
+	int err;
+
+	/* No need to bother with refcounting for user ports */
+	if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+		return ds->ops->port_fdb_add(ds, port, addr, vid);
+
+	a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+	if (a) {
+		refcount_inc(&a->refcount);
+		return 0;
+	}
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return -ENOMEM;
+
+	err = ds->ops->port_fdb_add(ds, port, addr, vid);
+	if (err) {
+		kfree(a);
+		return err;
+	}
+
+	ether_addr_copy(a->addr, addr);
+	a->vid = vid;
+	refcount_set(&a->refcount, 1);
+	list_add_tail(&a->list, &dp->fdbs);
+
+	return 0;
+}
+
+static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port,
+				 const unsigned char *addr, u16 vid)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct dsa_mac_addr *a;
+	int err;
+
+	/* No need to bother with refcounting for user ports */
+	if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+		return ds->ops->port_fdb_del(ds, port, addr, vid);
+
+	a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+	if (!a)
+		return -ENOENT;
+
+	if (!refcount_dec_and_test(&a->refcount))
+		return 0;
+
+	err = ds->ops->port_fdb_del(ds, port, addr, vid);
+	if (err) {
+		refcount_inc(&a->refcount);
+		return err;
+	}
+
+	list_del(&a->list);
+	kfree(a);
+
+	return 0;
+}
+
 static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
 				   struct dsa_notifier_fdb_info *info)
 {
@@ -265,7 +330,7 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
 	for (port = 0; port < ds->num_ports; port++) {
 		if (dsa_switch_host_address_match(ds, port, info->sw_index,
 						  info->port)) {
-			err = ds->ops->port_fdb_add(ds, port, info->addr,
+			err = dsa_switch_do_fdb_add(ds, port, info->addr,
 						    info->vid);
 			if (err)
 				break;
@@ -278,14 +343,23 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
 static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
 				   struct dsa_notifier_fdb_info *info)
 {
+	int err = 0;
+	int port;
+
 	if (!ds->ops->port_fdb_del)
 		return -EOPNOTSUPP;
 
-	if (ds->index == info->sw_index)
-		return ds->ops->port_fdb_del(ds, info->port, info->addr,
-					     info->vid);
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_host_address_match(ds, port, info->sw_index,
+						  info->port)) {
+			err = dsa_switch_do_fdb_del(ds, port, info->addr,
+						    info->vid);
+			if (err)
+				break;
+		}
+	}
 
-	return 0;
+	return err;
 }
 
 static int dsa_switch_fdb_add(struct dsa_switch *ds,
@@ -296,7 +370,7 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
 	if (!ds->ops->port_fdb_add)
 		return -EOPNOTSUPP;
 
-	return ds->ops->port_fdb_add(ds, port, info->addr, info->vid);
+	return dsa_switch_do_fdb_add(ds, port, info->addr, info->vid);
 }
 
 static int dsa_switch_fdb_del(struct dsa_switch *ds,
@@ -307,7 +381,7 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	if (!ds->ops->port_fdb_del)
 		return -EOPNOTSUPP;
 
-	return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
+	return dsa_switch_do_fdb_del(ds, port, info->addr, info->vid);
 }
 
 static int dsa_switch_hsr_join(struct dsa_switch *ds,

From 26ee7b06a4d3086a3751b69c14663ba6c6bbfe7f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:53 +0300
Subject: [PATCH 3065/3292] net: dsa: install the host MDB and FDB entries in
 the master's RX filter

If the DSA master implements strict address filtering, then the unicast
and multicast addresses kept by the DSA CPU ports should be synchronized
with the address lists of the DSA master.

Note that we want the synchronization of the master's address lists even
if the DSA switch doesn't support unicast/multicast database operations,
on the premises that the packets will be flooded to the CPU in that
case, and we should still instruct the master to receive them. This is
why we do the dev_uc_add() etc first, even if dsa_port_notify() returns
-EOPNOTSUPP. In turn, dev_uc_add() and friends return error only if
memory allocation fails, so it is probably ok to check and propagate
that error code and not just ignore it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/port.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index 1b80e0fbdfaa..778b0dc2bb39 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -655,6 +655,12 @@ int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
 		.addr = addr,
 		.vid = vid,
 	};
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+	int err;
+
+	err = dev_uc_add(cpu_dp->master, addr);
+	if (err)
+		return err;
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
 }
@@ -668,6 +674,12 @@ int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
 		.addr = addr,
 		.vid = vid,
 	};
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+	int err;
+
+	err = dev_uc_del(cpu_dp->master, addr);
+	if (err)
+		return err;
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
 }
@@ -715,6 +727,12 @@ int dsa_port_host_mdb_add(const struct dsa_port *dp,
 		.port = dp->index,
 		.mdb = mdb,
 	};
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+	int err;
+
+	err = dev_mc_add(cpu_dp->master, mdb->addr);
+	if (err)
+		return err;
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info);
 }
@@ -727,6 +745,12 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp,
 		.port = dp->index,
 		.mdb = mdb,
 	};
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+	int err;
+
+	err = dev_mc_del(cpu_dp->master, mdb->addr);
+	if (err)
+		return err;
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info);
 }

From 3068d466a67ec96a6972f248f5c7a7b6763dbeb1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:54 +0300
Subject: [PATCH 3066/3292] net: dsa: sync static FDB entries on foreign
 interfaces to hardware

DSA is able to install FDB entries towards the CPU port for addresses
which were dynamically learnt by the software bridge on foreign
interfaces that are in the same bridge with a DSA switch interface.
Since this behavior is opportunistic, it is guarded by the
"assisted_learning_on_cpu_port" property which can be enabled by drivers
and is not done automatically (since certain switches may support
address learning of packets coming from the CPU port).

But if those FDB entries added on the foreign interfaces are static
(added by the user) instead of dynamically learnt, currently DSA does
not do anything (and arguably it should).

Because static FDB entries are not supposed to move on their own, there
is no downside in reusing the "assisted_learning_on_cpu_port" logic to
sync static FDB entries to the DSA CPU port unconditionally, even if
assisted_learning_on_cpu_port is not requested by the driver.

For example, this situation:

   br0
   / \
swp0 dummy0

$ bridge fdb add 02:00:de:ad:00:01 dev dummy0 vlan 1 master static

Results in DSA adding an entry in the hardware FDB, pointing this
address towards the CPU port.

The same is true for entries added to the bridge itself, e.g:

$ bridge fdb add 02:00:de:ad:00:01 dev br0 vlan 1 self local

(except that right now, DSA still ignores 'local' FDB entries, this will
be changed in a later patch)

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ac7f4f200ab1..ea9a7c1ce83e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2403,9 +2403,12 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 
 			dp = dsa_slave_to_port(dev);
 		} else {
-			/* Snoop addresses learnt on foreign interfaces
-			 * bridged with us, for switches that don't
-			 * automatically learn SA from CPU-injected traffic
+			/* Snoop addresses added to foreign interfaces
+			 * bridged with us, or the bridge
+			 * itself. Dynamically learned addresses can
+			 * also be added for switches that don't
+			 * automatically learn SA from CPU-injected
+			 * traffic.
 			 */
 			struct net_device *br_dev;
 			struct dsa_slave_priv *p;
@@ -2424,7 +2427,8 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 			dp = p->dp;
 			host_addr = true;
 
-			if (!dp->ds->assisted_learning_on_cpu_port)
+			if (!fdb_info->added_by_user &&
+			    !dp->ds->assisted_learning_on_cpu_port)
 				return NOTIFY_DONE;
 
 			/* When the bridge learns an address on an offloaded

From 10fae4ac89ce5c2ead6c6c35fd09651b5f97ae05 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 29 Jun 2021 17:06:55 +0300
Subject: [PATCH 3067/3292] net: dsa: include bridge addresses which are local
 in the host fdb list

The bridge automatically creates local (not forwarded) fdb entries
pointing towards physical ports with their interface MAC addresses.
For switchdev, the significance of these fdb entries is the exact
opposite of that of non-local entries: instead of sending these frame
outwards, we must send them inwards (towards the host).

NOTE: The bridge's own MAC address is also "local". If that address is
not shared with any port, the bridge's MAC is not be added by this
functionality - but the following commit takes care of that case.

NOTE 2: We mark these addresses as host-filtered regardless of the value
of ds->assisted_learning_on_cpu_port. This is because, as opposed to the
speculative logic done for dynamic address learning on foreign
interfaces, the local FDB entries are rather fixed, so there isn't any
risk of them migrating from one bridge port to another.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ea9a7c1ce83e..d006bd04f84a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2398,10 +2398,12 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 		fdb_info = ptr;
 
 		if (dsa_slave_dev_check(dev)) {
-			if (!fdb_info->added_by_user || fdb_info->is_local)
-				return NOTIFY_OK;
-
 			dp = dsa_slave_to_port(dev);
+
+			if (fdb_info->is_local)
+				host_addr = true;
+			else if (!fdb_info->added_by_user)
+				return NOTIFY_OK;
 		} else {
 			/* Snoop addresses added to foreign interfaces
 			 * bridged with us, or the bridge
@@ -2425,9 +2427,15 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 				return NOTIFY_DONE;
 
 			dp = p->dp;
-			host_addr = true;
+			host_addr = fdb_info->is_local;
 
-			if (!fdb_info->added_by_user &&
+			/* FDB entries learned by the software bridge should
+			 * be installed as host addresses only if the driver
+			 * requests assisted learning.
+			 * On the other hand, FDB entries for local termination
+			 * should always be installed.
+			 */
+			if (!fdb_info->added_by_user && !fdb_info->is_local &&
 			    !dp->ds->assisted_learning_on_cpu_port)
 				return NOTIFY_DONE;
 

From 81a619f787593daf6224068c6dc8022ece591844 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:56 +0300
Subject: [PATCH 3068/3292] net: dsa: include fdb entries pointing to bridge in
 the host fdb list

The bridge supports a legacy way of adding local (non-forwarded) FDB
entries, which works on an individual port basis:

bridge fdb add dev swp0 00:01:02:03:04:05 master local

As well as a new way, added by Roopa Prabhu in commit 3741873b4f73
("bridge: allow adding of fdb entries pointing to the bridge device"):

bridge fdb add dev br0 00:01:02:03:04:05 self local

The two commands are functionally equivalent, except that the first one
produces an entry with fdb->dst == swp0, and the other an entry with
fdb->dst == NULL. The confusing part, though, is that even if fdb->dst
is swp0 for the 'local on port' entry, that destination is not used.

Nonetheless, the idea is that the bridge has reference counting for
local entries, and local entries pointing towards the bridge are still
'as local' as local entries for a port.

The bridge adds the MAC addresses of the interfaces automatically as
FDB entries with is_local=1. For the MAC address of the ports, fdb->dst
will be equal to the port, and for the MAC address of the bridge,
fdb->dst will point towards the bridge (i.e. be NULL). Therefore, if the
MAC address of the bridge is not inherited from either of the physical
ports, then we must explicitly catch local FDB entries emitted towards
the br0, otherwise we'll miss the MAC address of the bridge (and, of
course, any entry with 'bridge add dev br0 ... self local').

Co-developed-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d006bd04f84a..a7b5d2a41472 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2415,7 +2415,11 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 			struct net_device *br_dev;
 			struct dsa_slave_priv *p;
 
-			br_dev = netdev_master_upper_dev_get_rcu(dev);
+			if (netif_is_bridge_master(dev))
+				br_dev = dev;
+			else
+				br_dev = netdev_master_upper_dev_get_rcu(dev);
+
 			if (!br_dev)
 				return NOTIFY_DONE;
 
@@ -2443,8 +2447,13 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 			 * LAG we don't want to send traffic to the CPU, the
 			 * other ports bridged with the LAG should be able to
 			 * autonomously forward towards it.
+			 * On the other hand, if the address is local
+			 * (therefore not learned) then we want to trap it to
+			 * the CPU regardless of whether the interface it
+			 * belongs to is offloaded or not.
 			 */
-			if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev))
+			if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev) &&
+			    !fdb_info->is_local)
 				return NOTIFY_DONE;
 		}
 

From 4bed397c3e65638e9118956bda85d2a9bcac3668 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:57 +0300
Subject: [PATCH 3069/3292] net: dsa: ensure during dsa_fdb_offload_notify that
 dev_hold and dev_put are on the same dev

When
(a) "dev" is a bridge port which the DSA switch tree offloads, but is
    otherwise not a dsa slave (such as a LAG netdev), or
(b) "dev" is the bridge net device itself

then strange things happen to the dev_hold/dev_put pair:
dsa_schedule_work() will still be called with a DSA port that offloads
that netdev, but dev_hold() will be called on the non-DSA netdev.
Then the "if" condition in dsa_slave_switchdev_event_work() does not
pass, because "dev" is not a DSA netdev, so dev_put() is not called.

This results in the simple fact that we have a reference counting
mismatch on the "dev" net device.

This can be seen when we add support for host addresses installed on the
bridge net device.

ip link add br1 type bridge
ip link set br1 address 00:01:02:03:04:05
ip link set swp0 master br1
ip link del br1
[  968.512278] unregister_netdevice: waiting for br1 to become free. Usage count = 5

It seems foolish to do penny pinching and not add the net_device pointer
in the dsa_switchdev_event_work structure, so let's finally do that.
As an added bonus, when we start offloading local entries pointing
towards the bridge, these will now properly appear as 'offloaded' in
'bridge fdb' (this was not possible before, because 'dev' was assumed to
only be a DSA net device):

00:01:02:03:04:05 dev br0 vlan 1 offload master br0 permanent
00:01:02:03:04:05 dev br0 offload master br0 permanent

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h | 1 +
 net/dsa/slave.c    | 9 ++++-----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 36e667ea94db..f201c33980bf 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -116,6 +116,7 @@ struct dsa_notifier_mrp_ring_role_info {
 struct dsa_switchdev_event_work {
 	struct dsa_switch *ds;
 	int port;
+	struct net_device *dev;
 	struct work_struct work;
 	unsigned long event;
 	/* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index a7b5d2a41472..ffbba1e71551 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2349,9 +2349,8 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
 	}
 	rtnl_unlock();
 
+	dev_put(switchdev_work->dev);
 	kfree(switchdev_work);
-	if (dsa_is_user_port(ds, dp->index))
-		dev_put(dp->slave);
 }
 
 static int dsa_lower_dev_walk(struct net_device *lower_dev,
@@ -2469,15 +2468,15 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 		switchdev_work->ds = dp->ds;
 		switchdev_work->port = dp->index;
 		switchdev_work->event = event;
+		switchdev_work->dev = dev;
 
 		ether_addr_copy(switchdev_work->addr,
 				fdb_info->addr);
 		switchdev_work->vid = fdb_info->vid;
 		switchdev_work->host_addr = host_addr;
 
-		/* Hold a reference on the slave for dsa_fdb_offload_notify */
-		if (dsa_is_user_port(dp->ds, dp->index))
-			dev_hold(dev);
+		/* Hold a reference for dsa_fdb_offload_notify */
+		dev_hold(dev);
 		dsa_schedule_work(&switchdev_work->work);
 		break;
 	default:

From 63c51453c82cddc27556233ff41041ea9fc49fe0 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 17:06:58 +0300
Subject: [PATCH 3070/3292] net: dsa: replay the local bridge FDB entries
 pointing to the bridge dev too

When we join a bridge that already has some local addresses pointing to
itself, we do not get those notifications. Similarly, when we leave that
bridge, we do not get notifications for the deletion of those entries.
The only switchdev notifications we get are those of entries added while
the DSA port is enslaved to the bridge.

This makes use cases such as the following work properly (with the
number of additions and removals properly balanced):

ip link add br0 type bridge
ip link add br1 type bridge
ip link set br0 address 00:01:02:03:04:05
ip link set br1 address 00:01:02:03:04:05
ip link set swp0 up
ip link set swp1 up
ip link set swp0 master br0
ip link set swp1 master br1
ip link set br0 up
ip link set br1 up
ip link del br1 # 00:01:02:03:04:05 still installed on the CPU port
ip link del br0 # 00:01:02:03:04:05 finally removed from the CPU port

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/port.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index 778b0dc2bb39..28b45b7e66df 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -199,11 +199,17 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
+	/* Forwarding and termination FDB entries on the port */
 	err = br_fdb_replay(br, brport_dev, dp, true,
 			    &dsa_slave_switchdev_notifier);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
+	/* Termination FDB entries on the bridge itself */
+	err = br_fdb_replay(br, br, dp, true, &dsa_slave_switchdev_notifier);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
 	err = br_vlan_replay(br, brport_dev, dp, true,
 			     &dsa_slave_switchdev_blocking_notifier, extack);
 	if (err && err != -EOPNOTSUPP)
@@ -225,11 +231,17 @@ static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp,
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
+	/* Forwarding and termination FDB entries on the port */
 	err = br_fdb_replay(br, brport_dev, dp, false,
 			    &dsa_slave_switchdev_notifier);
 	if (err && err != -EOPNOTSUPP)
 		return err;
 
+	/* Termination FDB entries on the bridge itself */
+	err = br_fdb_replay(br, br, dp, false, &dsa_slave_switchdev_notifier);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
 	err = br_vlan_replay(br, brport_dev, dp, false,
 			     &dsa_slave_switchdev_blocking_notifier, extack);
 	if (err && err != -EOPNOTSUPP)

From c24d37322548a6ec3caec67100d28b9c1f89f60a Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 28 Jun 2021 19:33:23 -0700
Subject: [PATCH 3071/3292] mm/gup: fix try_grab_compound_head() race with
 split_huge_page()

try_grab_compound_head() is used to grab a reference to a page from
get_user_pages_fast(), which is only protected against concurrent freeing
of page tables (via local_irq_save()), but not against concurrent TLB
flushes, freeing of data pages, or splitting of compound pages.

Because no reference is held to the page when try_grab_compound_head() is
called, the page may have been freed and reallocated by the time its
refcount has been elevated; therefore, once we're holding a stable
reference to the page, the caller re-checks whether the PTE still points
to the same page (with the same access rights).

The problem is that try_grab_compound_head() has to grab a reference on
the head page; but between the time we look up what the head page is and
the time we actually grab a reference on the head page, the compound page
may have been split up (either explicitly through split_huge_page() or by
freeing the compound page to the buddy allocator and then allocating its
individual order-0 pages).  If that happens, get_user_pages_fast() may end
up returning the right page but lifting the refcount on a now-unrelated
page, leading to use-after-free of pages.

To fix it: Re-check whether the pages still belong together after lifting
the refcount on the head page.  Move anything else that checks
compound_head(page) below the refcount increment.

This can't actually happen on bare-metal x86 (because there, disabling
IRQs locks out remote TLB flushes), but it can happen on virtualized x86
(e.g.  under KVM) and probably also on arm64.  The race window is pretty
narrow, and constantly allocating and shattering hugepages isn't exactly
fast; for now I've only managed to reproduce this in an x86 KVM guest with
an artificially widened timing window (by adding a loop that repeatedly
calls `inl(0x3f8 + 5)` in `try_get_compound_head()` to force VM exits, so
that PV TLB flushes are used instead of IPIs).

As requested on the list, also replace the existing VM_BUG_ON_PAGE() with
a warning and bailout.  Since the existing code only performed the BUG_ON
check on DEBUG_VM kernels, ensure that the new code also only performs the
check under that configuration - I don't want to mix two logically
separate changes together too much.  The macro VM_WARN_ON_ONCE_PAGE()
doesn't return a value on !DEBUG_VM, so wrap the whole check in an #ifdef
block.  An alternative would be to change the VM_WARN_ON_ONCE_PAGE()
definition for !DEBUG_VM such that it always returns false, but since that
would differ from the behavior of the normal WARN macros, it might be too
confusing for readers.

Link: https://lkml.kernel.org/r/20210615012014.1100672-1-jannh@google.com
Fixes: 7aef4172c795 ("mm: handle PTE-mapped tail pages in gerneric fast gup implementaiton")
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Jan Kara <jack@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c | 58 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 15 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 3ded6a5f26b2..90262e448552 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -44,6 +44,23 @@ static void hpage_pincount_sub(struct page *page, int refs)
 	atomic_sub(refs, compound_pincount_ptr(page));
 }
 
+/* Equivalent to calling put_page() @refs times. */
+static void put_page_refs(struct page *page, int refs)
+{
+#ifdef CONFIG_DEBUG_VM
+	if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
+		return;
+#endif
+
+	/*
+	 * Calling put_page() for each ref is unnecessarily slow. Only the last
+	 * ref needs a put_page().
+	 */
+	if (refs > 1)
+		page_ref_sub(page, refs - 1);
+	put_page(page);
+}
+
 /*
  * Return the compound head page with ref appropriately incremented,
  * or NULL if that failed.
@@ -56,6 +73,21 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
 		return NULL;
 	if (unlikely(!page_cache_add_speculative(head, refs)))
 		return NULL;
+
+	/*
+	 * At this point we have a stable reference to the head page; but it
+	 * could be that between the compound_head() lookup and the refcount
+	 * increment, the compound page was split, in which case we'd end up
+	 * holding a reference on a page that has nothing to do with the page
+	 * we were given anymore.
+	 * So now that the head page is stable, recheck that the pages still
+	 * belong together.
+	 */
+	if (unlikely(compound_head(page) != head)) {
+		put_page_refs(head, refs);
+		return NULL;
+	}
+
 	return head;
 }
 
@@ -95,6 +127,14 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
 			     !is_pinnable_page(page)))
 			return NULL;
 
+		/*
+		 * CAUTION: Don't use compound_head() on the page before this
+		 * point, the result won't be stable.
+		 */
+		page = try_get_compound_head(page, refs);
+		if (!page)
+			return NULL;
+
 		/*
 		 * When pinning a compound page of order > 1 (which is what
 		 * hpage_pincount_available() checks for), use an exact count to
@@ -103,15 +143,10 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
 		 * However, be sure to *also* increment the normal page refcount
 		 * field at least once, so that the page really is pinned.
 		 */
-		if (!hpage_pincount_available(page))
-			refs *= GUP_PIN_COUNTING_BIAS;
-
-		page = try_get_compound_head(page, refs);
-		if (!page)
-			return NULL;
-
 		if (hpage_pincount_available(page))
 			hpage_pincount_add(page, refs);
+		else
+			page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
 
 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
 				    orig_refs);
@@ -135,14 +170,7 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
 			refs *= GUP_PIN_COUNTING_BIAS;
 	}
 
-	VM_BUG_ON_PAGE(page_ref_count(page) < refs, page);
-	/*
-	 * Calling put_page() for each ref is unnecessarily slow. Only the last
-	 * ref needs a put_page().
-	 */
-	if (refs > 1)
-		page_ref_sub(page, refs - 1);
-	put_page(page);
+	put_page_refs(page, refs);
 }
 
 /**

From 122e093c1734361dedb64f65c99b93e28e4624f4 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:33:26 -0700
Subject: [PATCH 3072/3292] mm/page_alloc: fix memory map initialization for
 descending nodes

On systems with memory nodes sorted in descending order, for instance Dell
Precision WorkStation T5500, the struct pages for higher PFNs and
respectively lower nodes, could be overwritten by the initialization of
struct pages corresponding to the holes in the memory sections.

For example for the below memory layout

[    0.245624] Early memory node ranges
[    0.248496]   node   1: [mem 0x0000000000001000-0x0000000000090fff]
[    0.251376]   node   1: [mem 0x0000000000100000-0x00000000dbdf8fff]
[    0.254256]   node   1: [mem 0x0000000100000000-0x0000001423ffffff]
[    0.257144]   node   0: [mem 0x0000001424000000-0x0000002023ffffff]

the range 0x1424000000 - 0x1428000000 in the beginning of node 0 starts in
the middle of a section and will be considered as a hole during the
initialization of the last section in node 1.

The wrong initialization of the memory map causes panic on boot when
CONFIG_DEBUG_VM is enabled.

Reorder loop order of the memory map initialization so that the outer loop
will always iterate over populated memory regions in the ascending order
and the inner loop will select the zone corresponding to the PFN range.

This way initialization of the struct pages for the memory holes will be
always done for the ranges that are actually not populated.

[akpm@linux-foundation.org: coding style fixes]

Link: https://lkml.kernel.org/r/YNXlMqBbL+tBG7yq@kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213073
Link: https://lkml.kernel.org/r/20210624062305.10940-1-rppt@kernel.org
Fixes: 0740a50b9baa ("mm/page_alloc.c: refactor initialization of struct page for holes in memory layout")
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Boris Petkov <bp@alien8.de>
Cc: Robert Shteynfeld <robert.shteynfeld@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 -
 mm/page_alloc.c    | 94 ++++++++++++++++++++++++++++------------------
 2 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8ae31622deef..9afb8998e7e5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2474,7 +2474,6 @@ extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_range(unsigned long, int, unsigned long,
 		unsigned long, unsigned long, enum meminit_context,
 		struct vmem_altmap *, int migratetype);
-extern void memmap_init_zone(struct zone *zone);
 extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef2265f86b91..5b5c9f5813b9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6400,7 +6400,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
 		return;
 
 	/*
-	 * The call to memmap_init_zone should have already taken care
+	 * The call to memmap_init should have already taken care
 	 * of the pages reserved for the memmap, so we can just jump to
 	 * the end of that region and start processing the device pages.
 	 */
@@ -6465,7 +6465,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 /*
  * Only struct pages that correspond to ranges defined by memblock.memory
  * are zeroed and initialized by going through __init_single_page() during
- * memmap_init_zone().
+ * memmap_init_zone_range().
  *
  * But, there could be struct pages that correspond to holes in
  * memblock.memory. This can happen because of the following reasons:
@@ -6484,9 +6484,9 @@ static void __meminit zone_init_free_lists(struct zone *zone)
  *   zone/node above the hole except for the trailing pages in the last
  *   section that will be appended to the zone/node below.
  */
-static u64 __meminit init_unavailable_range(unsigned long spfn,
-					    unsigned long epfn,
-					    int zone, int node)
+static void __init init_unavailable_range(unsigned long spfn,
+					  unsigned long epfn,
+					  int zone, int node)
 {
 	unsigned long pfn;
 	u64 pgcnt = 0;
@@ -6502,56 +6502,77 @@ static u64 __meminit init_unavailable_range(unsigned long spfn,
 		pgcnt++;
 	}
 
-	return pgcnt;
+	if (pgcnt)
+		pr_info("On node %d, zone %s: %lld pages in unavailable ranges",
+			node, zone_names[zone], pgcnt);
 }
 #else
-static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn,
-					 int zone, int node)
+static inline void init_unavailable_range(unsigned long spfn,
+					  unsigned long epfn,
+					  int zone, int node)
 {
-	return 0;
 }
 #endif
 
-void __meminit __weak memmap_init_zone(struct zone *zone)
+static void __init memmap_init_zone_range(struct zone *zone,
+					  unsigned long start_pfn,
+					  unsigned long end_pfn,
+					  unsigned long *hole_pfn)
 {
 	unsigned long zone_start_pfn = zone->zone_start_pfn;
 	unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
-	int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone);
-	static unsigned long hole_pfn;
+	int nid = zone_to_nid(zone), zone_id = zone_idx(zone);
+
+	start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
+	end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
+
+	if (start_pfn >= end_pfn)
+		return;
+
+	memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn,
+			  zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+
+	if (*hole_pfn < start_pfn)
+		init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
+
+	*hole_pfn = end_pfn;
+}
+
+static void __init memmap_init(void)
+{
 	unsigned long start_pfn, end_pfn;
-	u64 pgcnt = 0;
+	unsigned long hole_pfn = 0;
+	int i, j, zone_id, nid;
 
-	for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
-		start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
-		end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+		struct pglist_data *node = NODE_DATA(nid);
 
-		if (end_pfn > start_pfn)
-			memmap_init_range(end_pfn - start_pfn, nid,
-					zone_id, start_pfn, zone_end_pfn,
-					MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+		for (j = 0; j < MAX_NR_ZONES; j++) {
+			struct zone *zone = node->node_zones + j;
 
-		if (hole_pfn < start_pfn)
-			pgcnt += init_unavailable_range(hole_pfn, start_pfn,
-							zone_id, nid);
-		hole_pfn = end_pfn;
+			if (!populated_zone(zone))
+				continue;
+
+			memmap_init_zone_range(zone, start_pfn, end_pfn,
+					       &hole_pfn);
+			zone_id = j;
+		}
 	}
 
 #ifdef CONFIG_SPARSEMEM
 	/*
-	 * Initialize the hole in the range [zone_end_pfn, section_end].
-	 * If zone boundary falls in the middle of a section, this hole
-	 * will be re-initialized during the call to this function for the
-	 * higher zone.
+	 * Initialize the memory map for hole in the range [memory_end,
+	 * section_end].
+	 * Append the pages in this hole to the highest zone in the last
+	 * node.
+	 * The call to init_unavailable_range() is outside the ifdef to
+	 * silence the compiler warining about zone_id set but not used;
+	 * for FLATMEM it is a nop anyway
 	 */
-	end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION);
+	end_pfn = round_up(end_pfn, PAGES_PER_SECTION);
 	if (hole_pfn < end_pfn)
-		pgcnt += init_unavailable_range(hole_pfn, end_pfn,
-						zone_id, nid);
 #endif
-
-	if (pgcnt)
-		pr_info("  %s zone: %llu pages in unavailable ranges\n",
-			zone->name, pgcnt);
+		init_unavailable_range(hole_pfn, end_pfn, zone_id, nid);
 }
 
 static int zone_batchsize(struct zone *zone)
@@ -7254,7 +7275,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 		set_pageblock_order();
 		setup_usemap(zone);
 		init_currently_empty_zone(zone, zone->zone_start_pfn, size);
-		memmap_init_zone(zone);
 	}
 }
 
@@ -7780,6 +7800,8 @@ void __init free_area_init(unsigned long *max_zone_pfn)
 			node_set_state(nid, N_MEMORY);
 		check_for_memory(pgdat, nid);
 	}
+
+	memmap_init();
 }
 
 static int __init cmdline_parse_core(char *p, unsigned long *core,

From ff4b2b4014cbffb3d32b22629252f4dc8616b0fe Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:33:29 -0700
Subject: [PATCH 3073/3292] mm/page_alloc: correct return value of populated
 elements if bulk array is populated

Dave Jones reported the following

	This made it into 5.13 final, and completely breaks NFSD for me
	(Serving tcp v3 mounts).  Existing mounts on clients hang, as do
	new mounts from new clients.  Rebooting the server back to rc7
	everything recovers.

The commit b3b64ebd3822 ("mm/page_alloc: do bulk array bounds check after
checking populated elements") returns the wrong value if the array is
already populated which is interpreted as an allocation failure.  Dave
reported this fixes his problem and it also passed a test running dbench
over NFS.

Link: https://lkml.kernel.org/r/20210628150219.GC3840@techsingularity.net
Fixes: b3b64ebd3822 ("mm/page_alloc: do bulk array bounds check after checking populated elements")
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Tested-by: Dave Jones <davej@codemonkey.org.uk>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org> [5.13+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b5c9f5813b9..2bf03c76504b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5058,7 +5058,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 
 	/* Already populated array? */
 	if (unlikely(page_array && nr_pages - nr_populated == 0))
-		return 0;
+		return nr_populated;
 
 	/* Use the single page allocator for one page. */
 	if (nr_pages - nr_populated == 1)

From 20ce0c2d5a303c41c0e02ceb596837868e290dcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Mon, 28 Jun 2021 19:33:32 -0700
Subject: [PATCH 3074/3292] kthread: switch to new kerneldoc syntax for named
 variable macro argument
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The syntax without dots is available since commit 43756e347f21
("scripts/kernel-doc: Add support for named variable macro arguments").

The same HTML output is produced with and without this patch.

Link: https://lkml.kernel.org/r/20210513161702.1721039-1-j.neuschaefer@gmx.net
Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2484ed97e72f..db3eafea168f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -18,7 +18,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
  * @threadfn: the function to run in the thread
  * @data: data pointer for @threadfn()
  * @namefmt: printf-style format string for the thread name
- * @arg...: arguments for @namefmt.
+ * @arg: arguments for @namefmt.
  *
  * This macro will create a kthread on the current node, leaving it in
  * the stopped state.  This is just a helper for kthread_create_on_node();

From d71ba1649fa3c464c51ec7163e4b817345bff2c7 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Mon, 28 Jun 2021 19:33:35 -0700
Subject: [PATCH 3075/3292] kthread_worker: fix return value when
 kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync()

kthread_mod_delayed_work() might race with
kthread_cancel_delayed_work_sync() or another kthread_mod_delayed_work()
call.  The function lets the other operation win when it sees
work->canceling counter set.  And it returns @false.

But it should return @true as it is done by the related workqueue API, see
mod_delayed_work_on().

The reason is that the return value might be used for reference counting.
It has to distinguish the case when the number of queued works has changed
or stayed the same.

The change is safe.  kthread_mod_delayed_work() return value is not
checked anywhere at the moment.

Link: https://lore.kernel.org/r/20210521163526.GA17916@redhat.com
Link: https://lkml.kernel.org/r/20210610133051.15337-4-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Minchan Kim <minchan@google.com>
Cc: <jenhaochen@google.com>
Cc: Martin Liu <liumartin@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kthread.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 0fccf7d0c6a1..86ae5f2e6db8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1156,14 +1156,14 @@ static bool __kthread_cancel_work(struct kthread_work *work)
  * modify @dwork's timer so that it expires after @delay. If @delay is zero,
  * @work is guaranteed to be queued immediately.
  *
- * Return: %true if @dwork was pending and its timer was modified,
- * %false otherwise.
+ * Return: %false if @dwork was idle and queued, %true otherwise.
  *
  * A special case is when the work is being canceled in parallel.
  * It might be caused either by the real kthread_cancel_delayed_work_sync()
  * or yet another kthread_mod_delayed_work() call. We let the other command
- * win and return %false here. The caller is supposed to synchronize these
- * operations a reasonable way.
+ * win and return %true here. The return value can be used for reference
+ * counting and the number of queued works stays the same. Anyway, the caller
+ * is supposed to synchronize these operations a reasonable way.
  *
  * This function is safe to call from any context including IRQ handler.
  * See __kthread_cancel_work() and kthread_delayed_work_timer_fn()
@@ -1175,13 +1175,15 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
 {
 	struct kthread_work *work = &dwork->work;
 	unsigned long flags;
-	int ret = false;
+	int ret;
 
 	raw_spin_lock_irqsave(&worker->lock, flags);
 
 	/* Do not bother with canceling when never queued. */
-	if (!work->worker)
+	if (!work->worker) {
+		ret = false;
 		goto fast_queue;
+	}
 
 	/* Work must not be used with >1 worker, see kthread_queue_work() */
 	WARN_ON_ONCE(work->worker != worker);
@@ -1199,8 +1201,11 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
 	 * be used for reference counting.
 	 */
 	kthread_cancel_delayed_work_timer(work, &flags);
-	if (work->canceling)
+	if (work->canceling) {
+		/* The number of works in the queue does not change. */
+		ret = true;
 		goto out;
+	}
 	ret = __kthread_cancel_work(work);
 
 fast_queue:

From f589c67ff08c82405f3e69603ac159ea76933a50 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 28 Jun 2021 19:33:38 -0700
Subject: [PATCH 3076/3292] ia64: headers: drop duplicated words

Delete the repeated words "to" and "the".

Link: https://lkml.kernel.org/r/20210507184837.10754-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/pal.h       | 2 +-
 arch/ia64/include/asm/spinlock.h  | 2 +-
 arch/ia64/include/asm/uv/uv_hub.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/include/asm/pal.h b/arch/ia64/include/asm/pal.h
index b1d87955e8cc..5c51fceedaf9 100644
--- a/arch/ia64/include/asm/pal.h
+++ b/arch/ia64/include/asm/pal.h
@@ -1086,7 +1086,7 @@ static inline long ia64_pal_freq_base(unsigned long *platform_base_freq)
 
 /*
  * Get the ratios for processor frequency, bus frequency and interval timer to
- * to base frequency of the platform
+ * the base frequency of the platform
  */
 static inline s64
 ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio,
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 5f620e66384e..864775970c50 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -26,7 +26,7 @@
  * the queue, and the other indicating the current tail. The lock is acquired
  * by atomically noting the tail and incrementing it by one (thus adding
  * ourself to the queue and noting our position), then waiting until the head
- * becomes equal to the the initial value of the tail.
+ * becomes equal to the initial value of the tail.
  * The pad bits in the middle are used to prevent the next_ticket number
  * overflowing into the now_serving number.
  *
diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h
index 2a88c7204e52..809ddb6896db 100644
--- a/arch/ia64/include/asm/uv/uv_hub.h
+++ b/arch/ia64/include/asm/uv/uv_hub.h
@@ -257,7 +257,7 @@ static inline int uv_numa_blade_id(void)
 	return 0;
 }
 
-/* Convert a cpu number to the the UV blade number */
+/* Convert a cpu number to the UV blade number */
 static inline int uv_cpu_to_blade_id(int cpu)
 {
 	return 0;

From c5f320ff8a79501bb59338278336ec43acb9d7e2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Jun 2021 19:33:41 -0700
Subject: [PATCH 3077/3292] ia64: mca_drv: fix incorrect array size calculation

gcc points out a mistake in the mca driver that goes back to before the
git history:

arch/ia64/kernel/mca_drv.c: In function 'init_record_index_pools':
arch/ia64/kernel/mca_drv.c:346:54: error: expression does not compute the number of elements in this array; element typ
e is 'int', not 'size_t' {aka 'long unsigned int'} [-Werror=sizeof-array-div]
  346 |         for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
      |                                                      ^

This is the same as sizeof(size_t), which is two shorter than the actual
array.  Use the ARRAY_SIZE() macro to get the correct calculation instead.

Link: https://lkml.kernel.org/r/20210514214123.875971-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/mca_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 36a69b4e6169..5bfc79be4cef 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -343,7 +343,7 @@ init_record_index_pools(void)
 
 	/* - 2 - */
 	sect_min_size = sal_log_sect_min_sizes[0];
-	for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
+	for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++)
 		if (sect_min_size > sal_log_sect_min_sizes[i])
 			sect_min_size = sal_log_sect_min_sizes[i];
 

From b83c8ba40cebcee1d07cb852c23d616acf8988b7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 28 Jun 2021 19:33:44 -0700
Subject: [PATCH 3078/3292] streamline_config.pl: make spacing consistent

Patch series "streamline_config.pl: Fix Perl spacing".

Talking with John Hawley about how vim and emacs deal with Perl files with
respect to tabs and spaces, I found that some of my Perl code in the
kernel had inconsistent spacing.  The way emacs handles Perl by default is
to use 4 spaces per indent, but make all 8 spaces into a single tab.  Vim
does not do this by default.  But if you add the vim variable control:

 # vim: softtabstop=4

to a perl file, it makes vim behave the same way as emacs.

The first patch is to change all 8 spaces into a single tab (mostly from
people editing the file with vim).  The next patch adds the softtabstop
variable to make vim act like emacs by default.

This patch (of 2):

As Perl code tends to have 4 space indentation, but uses tabs for every 8
spaces, make that consistent in the streamline_config.pl code.  Replace
all 8 spaces with a single tab.

Link: https://lkml.kernel.org/r/20210322214032.133596267@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: "John (Warthog9) Hawley" <warthog9@kernel.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/kconfig/streamline_config.pl | 74 ++++++++++++++--------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index 911c72a2dbc4..2e65aa9edf97 100755
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -601,12 +601,12 @@ if (defined($ENV{'LMC_KEEP'})) {
 sub in_preserved_kconfigs {
     my $kconfig = $config2kfile{$_[0]};
     if (!defined($kconfig)) {
-        return 0;
+	return 0;
     }
     foreach my $excl (@preserved_kconfigs) {
-        if($kconfig =~ /^$excl/) {
-            return 1;
-        }
+	if($kconfig =~ /^$excl/) {
+	    return 1;
+	}
     }
     return 0;
 }
@@ -629,52 +629,52 @@ foreach my $line (@config_file) {
     }
 
     if (/CONFIG_MODULE_SIG_KEY="(.+)"/) {
-        my $orig_cert = $1;
-        my $default_cert = "certs/signing_key.pem";
+	my $orig_cert = $1;
+	my $default_cert = "certs/signing_key.pem";
 
-        # Check that the logic in this script still matches the one in Kconfig
-        if (!defined($depends{"MODULE_SIG_KEY"}) ||
-            $depends{"MODULE_SIG_KEY"} !~ /"\Q$default_cert\E"/) {
-            print STDERR "WARNING: MODULE_SIG_KEY assertion failure, ",
-                "update needed to ", __FILE__, " line ", __LINE__, "\n";
-            print;
-        } elsif ($orig_cert ne $default_cert && ! -f $orig_cert) {
-            print STDERR "Module signature verification enabled but ",
-                "module signing key \"$orig_cert\" not found. Resetting ",
-                "signing key to default value.\n";
-            print "CONFIG_MODULE_SIG_KEY=\"$default_cert\"\n";
-        } else {
-            print;
-        }
-        next;
+	# Check that the logic in this script still matches the one in Kconfig
+	if (!defined($depends{"MODULE_SIG_KEY"}) ||
+	    $depends{"MODULE_SIG_KEY"} !~ /"\Q$default_cert\E"/) {
+	    print STDERR "WARNING: MODULE_SIG_KEY assertion failure, ",
+		"update needed to ", __FILE__, " line ", __LINE__, "\n";
+	    print;
+	} elsif ($orig_cert ne $default_cert && ! -f $orig_cert) {
+	    print STDERR "Module signature verification enabled but ",
+		"module signing key \"$orig_cert\" not found. Resetting ",
+		"signing key to default value.\n";
+	    print "CONFIG_MODULE_SIG_KEY=\"$default_cert\"\n";
+	} else {
+	    print;
+	}
+	next;
     }
 
     if (/CONFIG_SYSTEM_TRUSTED_KEYS="(.+)"/) {
-        my $orig_keys = $1;
+	my $orig_keys = $1;
 
-        if (! -f $orig_keys) {
-            print STDERR "System keyring enabled but keys \"$orig_keys\" ",
-                "not found. Resetting keys to default value.\n";
-            print "CONFIG_SYSTEM_TRUSTED_KEYS=\"\"\n";
-        } else {
-            print;
-        }
-        next;
+	if (! -f $orig_keys) {
+	    print STDERR "System keyring enabled but keys \"$orig_keys\" ",
+		"not found. Resetting keys to default value.\n";
+	    print "CONFIG_SYSTEM_TRUSTED_KEYS=\"\"\n";
+	} else {
+	    print;
+	}
+	next;
     }
 
     if (/^(CONFIG.*)=(m|y)/) {
-        if (in_preserved_kconfigs($1)) {
-            dprint "Preserve config $1";
-            print;
-            next;
-        }
+	if (in_preserved_kconfigs($1)) {
+	    dprint "Preserve config $1";
+	    print;
+	    next;
+	}
 	if (defined($configs{$1})) {
 	    if ($localyesconfig) {
-	        $setconfigs{$1} = 'y';
+		$setconfigs{$1} = 'y';
 		print "$1=y\n";
 		next;
 	    } else {
-	        $setconfigs{$1} = $2;
+		$setconfigs{$1} = $2;
 	    }
 	} elsif ($2 eq "m") {
 	    print "# $1 is not set\n";

From d1b1f1e627c0085fb2e2b5690929a3d53879cc67 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 28 Jun 2021 19:33:47 -0700
Subject: [PATCH 3079/3292] streamline_config.pl: add softtabstop=4 for vim
 users

The tab stop for Perl files is by default (at least in emacs) to be 4
spaces, where a tab is used for all 8 spaces.  Add a local variable
comment to make vim do the same by default, and this will help keep the
file consistent in the future when others edit it via vim and not emacs.

Link: https://lkml.kernel.org/r/20210322214032.293992979@goodmis.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: "John (Warthog9) Hawley" <warthog9@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/kconfig/streamline_config.pl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index 2e65aa9edf97..1a5fea0519eb 100755
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -702,3 +702,5 @@ foreach my $module (keys(%modules)) {
 	print STDERR "\n";
     }
 }
+
+# vim: softtabstop=4

From c1c9142004e7e21d6d3d2cd6a339845771ce6a27 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 28 Jun 2021 19:33:50 -0700
Subject: [PATCH 3080/3292] scripts/spelling.txt: add more spellings to
 spelling.txt

Here are some of the more common spelling mistakes and typos that I've
found while fixing up spelling mistakes in the kernel in the past few
months.

Link: https://lkml.kernel.org/r/20210514093655.8829-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/spelling.txt | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 7b6a01291598..17fdc620d548 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -22,6 +22,7 @@ absolut||absolute
 absoulte||absolute
 acccess||access
 acceess||access
+accelaration||acceleration
 acceleratoin||acceleration
 accelleration||acceleration
 accesing||accessing
@@ -264,6 +265,7 @@ calucate||calculate
 calulate||calculate
 cancelation||cancellation
 cancle||cancel
+canot||cannot
 capabilites||capabilities
 capabilties||capabilities
 capabilty||capability
@@ -494,7 +496,10 @@ digial||digital
 dimention||dimension
 dimesions||dimensions
 diconnected||disconnected
+disabed||disabled
+disble||disable
 disgest||digest
+disired||desired
 dispalying||displaying
 diplay||display
 directon||direction
@@ -710,6 +715,7 @@ havind||having
 heirarchically||hierarchically
 heirarchy||hierarchy
 helpfull||helpful
+hearbeat||heartbeat
 heterogenous||heterogeneous
 hexdecimal||hexadecimal
 hybernate||hibernate
@@ -989,6 +995,7 @@ notications||notifications
 notifcations||notifications
 notifed||notified
 notity||notify
+nubmer||number
 numebr||number
 numner||number
 obtaion||obtain
@@ -1014,8 +1021,10 @@ ommiting||omitting
 ommitted||omitted
 onself||oneself
 ony||only
+openning||opening
 operatione||operation
 opertaions||operations
+opportunies||opportunities
 optionnal||optional
 optmizations||optimizations
 orientatied||orientated
@@ -1111,6 +1120,7 @@ prefitler||prefilter
 preform||perform
 premption||preemption
 prepaired||prepared
+prepate||prepare
 preperation||preparation
 preprare||prepare
 pressre||pressure
@@ -1123,6 +1133,7 @@ privilaged||privileged
 privilage||privilege
 priviledge||privilege
 priviledges||privileges
+privleges||privileges
 probaly||probably
 procceed||proceed
 proccesors||processors
@@ -1167,6 +1178,7 @@ promixity||proximity
 psudo||pseudo
 psuedo||pseudo
 psychadelic||psychedelic
+purgable||purgeable
 pwoer||power
 queing||queuing
 quering||querying
@@ -1180,6 +1192,7 @@ receieve||receive
 recepient||recipient
 recevied||received
 receving||receiving
+recievd||received
 recieved||received
 recieve||receive
 reciever||receiver
@@ -1228,6 +1241,7 @@ reponse||response
 representaion||representation
 reqeust||request
 reqister||register
+requed||requeued
 requestied||requested
 requiere||require
 requirment||requirement
@@ -1332,6 +1346,7 @@ singal||signal
 singed||signed
 sleeped||slept
 sliped||slipped
+softwade||software
 softwares||software
 soley||solely
 souce||source
@@ -1510,6 +1525,7 @@ unintialized||uninitialized
 unitialized||uninitialized
 unkmown||unknown
 unknonw||unknown
+unknouwn||unknown
 unknow||unknown
 unkown||unknown
 unamed||unnamed

From d98e4d95411bbde2220a7afa38dcc9c14d71acbe Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Mon, 28 Jun 2021 19:33:52 -0700
Subject: [PATCH 3081/3292] ntfs: fix validity check for file name attribute

When checking the file name attribute, we want to ensure that it fits
within the bounds of ATTR_RECORD.  To do this, we should check that (attr
record + file name offset + file name length) < (attr record + attr record
length).

However, the original check did not include the file name offset in the
calculation.  This means that corrupted on-disk metadata might not caught
by the incorrect file name check, and lead to an invalid memory access.

An example can be seen in the crash report of a memory corruption error
found by Syzbot:
https://syzkaller.appspot.com/bug?id=a1a1e379b225812688566745c3e2f7242bffc246

Adding the file name offset to the validity check fixes this error and
passes the Syzbot reproducer test.

Link: https://lkml.kernel.org/r/20210614050540.289494-1-desmondcheongzx@gmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Reported-by: syzbot+213ac8bb98f7f4420840@syzkaller.appspotmail.com
Tested-by: syzbot+213ac8bb98f7f4420840@syzkaller.appspotmail.com
Acked-by: Anton Altaparmakov <anton@tuxera.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ntfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index f5c058b3192c..4474adb393ca 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -477,7 +477,7 @@ err_corrupt_attr:
 		}
 		file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
 				le16_to_cpu(attr->data.resident.value_offset));
-		p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length);
+		p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
 		if (p2 < (u8*)attr || p2 > p)
 			goto err_corrupt_attr;
 		/* This attribute is ok, but is it in the $Extend directory? */

From 10dde05b89980ef147f590d2735d7dd53aa39c88 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Mon, 28 Jun 2021 19:33:55 -0700
Subject: [PATCH 3082/3292] squashfs: add option to panic on errors

Add an errors=panic mount option to make squashfs trigger a panic when
errors are encountered, similar to several other filesystems.  This allows
a kernel dump to be saved using which the corruption can be analysed and
debugged.

Inspired by a pre-fs_context patch by Anton Eliasson.

Link: https://lkml.kernel.org/r/20210527125019.14511-1-vincent.whitchurch@axis.com
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/squashfs/block.c          |  5 ++-
 fs/squashfs/squashfs_fs_sb.h |  1 +
 fs/squashfs/super.c          | 86 ++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index b9e87ebb1060..855f0e87066d 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -226,8 +226,11 @@ out_free_bio:
 	bio_free_pages(bio);
 	bio_put(bio);
 out:
-	if (res < 0)
+	if (res < 0) {
 		ERROR("Failed to read block 0x%llx: %d\n", index, res);
+		if (msblk->panic_on_errors)
+			panic("squashfs read failed");
+	}
 
 	return res;
 }
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 166e98806265..1e90c2575f9b 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -65,5 +65,6 @@ struct squashfs_sb_info {
 	unsigned int				fragments;
 	int					xattr_ids;
 	unsigned int				ids;
+	bool					panic_on_errors;
 };
 #endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 88cc94be1076..60d6951915f4 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -18,9 +18,11 @@
 
 #include <linux/fs.h>
 #include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
+#include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -37,6 +39,51 @@
 static struct file_system_type squashfs_fs_type;
 static const struct super_operations squashfs_super_ops;
 
+enum Opt_errors {
+	Opt_errors_continue,
+	Opt_errors_panic,
+};
+
+enum squashfs_param {
+	Opt_errors,
+};
+
+struct squashfs_mount_opts {
+	enum Opt_errors errors;
+};
+
+static const struct constant_table squashfs_param_errors[] = {
+	{"continue",   Opt_errors_continue },
+	{"panic",      Opt_errors_panic },
+	{}
+};
+
+static const struct fs_parameter_spec squashfs_fs_parameters[] = {
+	fsparam_enum("errors", Opt_errors, squashfs_param_errors),
+	{}
+};
+
+static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct squashfs_mount_opts *opts = fc->fs_private;
+	struct fs_parse_result result;
+	int opt;
+
+	opt = fs_parse(fc, squashfs_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case Opt_errors:
+		opts->errors = result.uint_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct squashfs_decompressor *supported_squashfs_filesystem(
 	struct fs_context *fc,
 	short major, short minor, short id)
@@ -67,6 +114,7 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(
 
 static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
 {
+	struct squashfs_mount_opts *opts = fc->fs_private;
 	struct squashfs_sb_info *msblk;
 	struct squashfs_super_block *sblk = NULL;
 	struct inode *root;
@@ -85,6 +133,8 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
 	}
 	msblk = sb->s_fs_info;
 
+	msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
+
 	msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
 	msblk->devblksize_log2 = ffz(~msblk->devblksize);
 
@@ -350,18 +400,52 @@ static int squashfs_get_tree(struct fs_context *fc)
 
 static int squashfs_reconfigure(struct fs_context *fc)
 {
+	struct super_block *sb = fc->root->d_sb;
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	struct squashfs_mount_opts *opts = fc->fs_private;
+
 	sync_filesystem(fc->root->d_sb);
 	fc->sb_flags |= SB_RDONLY;
+
+	msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
+
 	return 0;
 }
 
+static void squashfs_free_fs_context(struct fs_context *fc)
+{
+	kfree(fc->fs_private);
+}
+
 static const struct fs_context_operations squashfs_context_ops = {
 	.get_tree	= squashfs_get_tree,
+	.free		= squashfs_free_fs_context,
+	.parse_param	= squashfs_parse_param,
 	.reconfigure	= squashfs_reconfigure,
 };
 
+static int squashfs_show_options(struct seq_file *s, struct dentry *root)
+{
+	struct super_block *sb = root->d_sb;
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
+
+	if (msblk->panic_on_errors)
+		seq_puts(s, ",errors=panic");
+	else
+		seq_puts(s, ",errors=continue");
+
+	return 0;
+}
+
 static int squashfs_init_fs_context(struct fs_context *fc)
 {
+	struct squashfs_mount_opts *opts;
+
+	opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+	if (!opts)
+		return -ENOMEM;
+
+	fc->fs_private = opts;
 	fc->ops = &squashfs_context_ops;
 	return 0;
 }
@@ -481,6 +565,7 @@ static struct file_system_type squashfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "squashfs",
 	.init_fs_context = squashfs_init_fs_context,
+	.parameters = squashfs_fs_parameters,
 	.kill_sb = kill_block_super,
 	.fs_flags = FS_REQUIRES_DEV
 };
@@ -491,6 +576,7 @@ static const struct super_operations squashfs_super_ops = {
 	.free_inode = squashfs_free_inode,
 	.statfs = squashfs_statfs,
 	.put_super = squashfs_put_super,
+	.show_options = squashfs_show_options,
 };
 
 module_init(init_squashfs_fs);

From 74ef829e41be8ada93e1d1dfa681c11be338c8d5 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 28 Jun 2021 19:33:58 -0700
Subject: [PATCH 3083/3292] ocfs2: remove unnecessary INIT_LIST_HEAD()

The list_head o2hb_node_events is initialized statically.  It is
unnecessary to initialize by INIT_LIST_HEAD().

Link: https://lkml.kernel.org/r/20210511115847.3817395-1-yangyingliang@huawei.com
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reported-by: Hulk Robot <hulkci@huawei.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/cluster/heartbeat.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index e829c2595543..1169c8dc9106 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1442,8 +1442,6 @@ void o2hb_init(void)
 	for (i = 0; i < ARRAY_SIZE(o2hb_live_slots); i++)
 		INIT_LIST_HEAD(&o2hb_live_slots[i]);
 
-	INIT_LIST_HEAD(&o2hb_node_events);
-
 	memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
 	memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
 	memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));

From 54e948c60cc843b6e84dc44496edc91f51d2a28e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 28 Jun 2021 19:34:01 -0700
Subject: [PATCH 3084/3292] ocfs2: fix snprintf() checking

The snprintf() function returns the number of bytes which would have been
printed if the buffer was large enough.  In other words it can return ">=
remain" but this code assumes it returns "== remain".

The run time impact of this bug is not very severe.  The next iteration
through the loop would trigger a WARN() when we pass a negative limit to
snprintf().  We would then return success instead of -E2BIG.

The kernel implementation of snprintf() will never return negatives so
there is no need to check and I have deleted that dead code.

Link: https://lkml.kernel.org/r/20210511135350.GV1955@kadam
Fixes: a860f6eb4c6a ("ocfs2: sysfile interfaces for online file check")
Fixes: 74ae4e104dfc ("ocfs2: Create stack glue sysfs files.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/filecheck.c | 6 +-----
 fs/ocfs2/stackglue.c | 8 ++------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 90b8d300c1ee..de56e6231af8 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -326,11 +326,7 @@ static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
 		ret = snprintf(buf + total, remain, "%lu\t\t%u\t%s\n",
 			       p->fe_ino, p->fe_done,
 			       ocfs2_filecheck_error(p->fe_status));
-		if (ret < 0) {
-			total = ret;
-			break;
-		}
-		if (ret == remain) {
+		if (ret >= remain) {
 			/* snprintf() didn't fit */
 			total = -E2BIG;
 			break;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index d50e8b8dfea4..16f1bfc407f2 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -500,11 +500,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
 	list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
 		ret = snprintf(buf, remain, "%s\n",
 			       p->sp_name);
-		if (ret < 0) {
-			total = ret;
-			break;
-		}
-		if (ret == remain) {
+		if (ret >= remain) {
 			/* snprintf() didn't fit */
 			total = -E2BIG;
 			break;
@@ -531,7 +527,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
 	if (active_stack) {
 		ret = snprintf(buf, PAGE_SIZE, "%s\n",
 			       active_stack->sp_name);
-		if (ret == PAGE_SIZE)
+		if (ret >= PAGE_SIZE)
 			ret = -E2BIG;
 	}
 	spin_unlock(&ocfs2_stack_lock);

From ca49b6d856ebde1e795e8bee37c461bff9939e02 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 28 Jun 2021 19:34:05 -0700
Subject: [PATCH 3085/3292] ocfs2: remove redundant assignment to pointer queue

The pointer queue is being initialized with a value that is never read and
it is being updated later with a new value.  The initialization is
redundant and can be removed.

Addresses-Coverity: ("Unused value")
Link: https://lkml.kernel.org/r/20210513113957.57539-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4960a6de768d..9b88219febb5 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2977,7 +2977,7 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
 				    struct dlm_lock_resource *res)
 {
 	enum dlm_lockres_list idx;
-	struct list_head *queue = &res->granted;
+	struct list_head *queue;
 	struct dlm_lock *lock;
 	int noderef;
 	u8 nodenum = O2NM_MAX_NODES;

From 01f01399136ca290e20cb21839c32a52ce626d16 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Mon, 28 Jun 2021 19:34:08 -0700
Subject: [PATCH 3086/3292] ocfs2: remove repeated uptodate check for buffer

In commit 60f91826ca62 ("buffer: Avoid setting buffer bits that are
already set"), function set_buffer_##name was added a test_bit() to check
buffer, which is the same as function buffer_##name.  The
!buffer_uptodate(bh) here is a repeated check.  Remove it.

Link: https://lkml.kernel.org/r/20210425025702.13628-1-wanjiabing@vivo.com
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/aops.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1294925ac94a..e1c6fa5bd0e7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -632,8 +632,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
 		}
 
 		if (PageUptodate(page)) {
-			if (!buffer_uptodate(bh))
-				set_buffer_uptodate(bh);
+			set_buffer_uptodate(bh);
 		} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
 			   !buffer_new(bh) &&
 			   ocfs2_should_read_blk(inode, page, block_start) &&

From f0f798db0586b34c552997f8c9d923a5db21fe5e Mon Sep 17 00:00:00 2001
From: Chen Huang <chenhuang5@huawei.com>
Date: Mon, 28 Jun 2021 19:34:11 -0700
Subject: [PATCH 3087/3292] ocfs2: replace simple_strtoull() with kstrtoull()

simple_strtoull() is deprecated in some situation since it does not check
for the range overflow, use kstrtoull() instead.

Link: https://lkml.kernel.org/r/20210526092020.554341-3-chenhuang5@huawei.com
Signed-off-by: Chen Huang <chenhuang5@huawei.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/cluster/heartbeat.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 1169c8dc9106..f89ffcbd585f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1596,12 +1596,13 @@ static ssize_t o2hb_region_start_block_store(struct config_item *item,
 	struct o2hb_region *reg = to_o2hb_region(item);
 	unsigned long long tmp;
 	char *p = (char *)page;
+	ssize_t ret;
 
 	if (reg->hr_bdev)
 		return -EINVAL;
 
-	tmp = simple_strtoull(p, &p, 0);
-	if (!p || (*p && (*p != '\n')))
+	ret = kstrtoull(p, 0, &tmp);
+	if (ret)
 		return -EINVAL;
 
 	reg->hr_start_block = tmp;

From 7ed6d4e418d98e78c9f2b895d76cdaed7a7ccbdb Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 28 Jun 2021 19:34:14 -0700
Subject: [PATCH 3088/3292] ocfs2: remove redundant initialization of variable
 ret

The variable ret is being initialized with a value that is never read, the
assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Link: https://lkml.kernel.org/r/20210613135148.74658-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/cluster/nodemanager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index bb82e6b1ff4e..625c92521416 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -824,7 +824,7 @@ static void __exit exit_o2nm(void)
 
 static int __init init_o2nm(void)
 {
-	int ret = -1;
+	int ret;
 
 	o2hb_init();
 

From b124ac45bda0338f2aa3969e7c135139267f8987 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Mon, 28 Jun 2021 19:34:17 -0700
Subject: [PATCH 3089/3292] kernel: watchdog: modify the explanation related to
 watchdog thread

The watchdog thread has been replaced by cpu_stop_work, modify the
explanation related.

Link: https://lkml.kernel.org/r/1619687073-24686-2-git-send-email-wangqing@vivo.com
Signed-off-by: Wang Qing <wangqing@vivo.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Cc: Joe Perches <joe@perches.com>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@canonical.com>
Cc: Qais Yousef <qais.yousef@arm.com>
Cc: Santosh Sivaraj <santosh@fossix.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 92d3bcc5a5e0..ad912511a0c0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -92,7 +92,7 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
  * own hardlockup detector.
  *
  * watchdog_nmi_enable/disable can be implemented to start and stop when
- * softlockup watchdog threads start and stop. The arch must select the
+ * softlockup watchdog start and stop. The arch must select the
  * SOFTLOCKUP_DETECTOR Kconfig.
  */
 int __weak watchdog_nmi_enable(unsigned int cpu)
@@ -335,7 +335,7 @@ static DEFINE_PER_CPU(struct completion, softlockup_completion);
 static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
 
 /*
- * The watchdog thread function - touches the timestamp.
+ * The watchdog feed function - touches the timestamp.
  *
  * It only runs once every sample_period seconds (4 seconds by
  * default) to reset the softlockup timestamp. If this gets delayed
@@ -558,11 +558,7 @@ static void lockup_detector_reconfigure(void)
 }
 
 /*
- * Create the watchdog thread infrastructure and configure the detector(s).
- *
- * The threads are not unparked as watchdog_allowed_mask is empty.  When
- * the threads are successfully initialized, take the proper locks and
- * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
+ * Create the watchdog infrastructure and configure the detector(s).
  */
 static __init void lockup_detector_setup(void)
 {
@@ -628,7 +624,7 @@ void lockup_detector_soft_poweroff(void)
 
 #ifdef CONFIG_SYSCTL
 
-/* Propagate any changes to the watchdog threads */
+/* Propagate any changes to the watchdog infrastructure */
 static void proc_watchdog_update(void)
 {
 	/* Remove impossible cpus to keep sysctl output clean. */

From e55fda8cdcba2cb3d5d46ae5fcd5f243f8b70d6e Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Mon, 28 Jun 2021 19:34:20 -0700
Subject: [PATCH 3090/3292] doc: watchdog: modify the explanation related to
 watchdog thread

"watchdog/%u" threads has be replaced by cpu_stop_work.  The current
description is extremely misleading.

Link: https://lkml.kernel.org/r/1619687073-24686-4-git-send-email-wangqing@vivo.com
Signed-off-by: Wang Qing <wangqing@vivo.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: "Guilherme G. Piccoli" <gpiccoli@canonical.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Cc: Qais Yousef <qais.yousef@arm.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Santosh Sivaraj <santosh@fossix.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/lockup-watchdogs.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/lockup-watchdogs.rst b/Documentation/admin-guide/lockup-watchdogs.rst
index 290840c160af..3e09284a8b9b 100644
--- a/Documentation/admin-guide/lockup-watchdogs.rst
+++ b/Documentation/admin-guide/lockup-watchdogs.rst
@@ -39,7 +39,7 @@ in principle, they should work in any architecture where these
 subsystems are present.
 
 A periodic hrtimer runs to generate interrupts and kick the watchdog
-task. An NMI perf event is generated every "watchdog_thresh"
+job. An NMI perf event is generated every "watchdog_thresh"
 (compile-time initialized to 10 and configurable through sysctl of the
 same name) seconds to check for hardlockups. If any CPU in the system
 does not receive any hrtimer interrupt during that time the
@@ -47,7 +47,7 @@ does not receive any hrtimer interrupt during that time the
 generate a kernel warning or call panic, depending on the
 configuration.
 
-The watchdog task is a high priority kernel thread that updates a
+The watchdog job runs in a stop scheduling thread that updates a
 timestamp every time it is scheduled. If that timestamp is not updated
 for 2*watchdog_thresh seconds (the softlockup threshold) the
 'softlockup detector' (coded inside the hrtimer callback function)

From 256f7a6791e8f19bafa1d702f69a6a6ba16250e3 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Mon, 28 Jun 2021 19:34:24 -0700
Subject: [PATCH 3091/3292] doc: watchdog: modify the doc related to
 "watchdog/%u"

"watchdog/%u" threads has be replaced by cpu_stop_work.  The current
description is extremely misleading.

Link: https://lkml.kernel.org/r/1619687073-24686-5-git-send-email-wangqing@vivo.com
Signed-off-by: Wang Qing <wangqing@vivo.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: "Guilherme G. Piccoli" <gpiccoli@canonical.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Cc: Qais Yousef <qais.yousef@arm.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Santosh Sivaraj <santosh@fossix.org>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/sysctl/kernel.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 68b21395a743..04c79017814b 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1283,11 +1283,11 @@ This parameter can be used to control the soft lockup detector.
 = =================================
 
 The soft lockup detector monitors CPUs for threads that are hogging the CPUs
-without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
-from running. The mechanism depends on the CPUs ability to respond to timer
-interrupts which are needed for the 'watchdog/N' threads to be woken up by
-the watchdog timer function, otherwise the NMI watchdog — if enabled — can
-detect a hard lockup condition.
+without rescheduling voluntarily, and thus prevent the 'migration/N' threads
+from running, causing the watchdog work fail to execute. The mechanism depends
+on the CPUs ability to respond to timer interrupts which are needed for the
+watchdog work to be queued by the watchdog timer function, otherwise the NMI
+watchdog — if enabled — can detect a hard lockup condition.
 
 
 stack_erasing

From 4acaa7d5045e21d5469232d0e6e79cdaf6755754 Mon Sep 17 00:00:00 2001
From: gumingtao <gumingtao1225@gmail.com>
Date: Mon, 28 Jun 2021 19:34:27 -0700
Subject: [PATCH 3092/3292] slab: use __func__ to trace function name

It is better to use __func__ to trace function name.

Link: https://lkml.kernel.org/r/31fdbad5c45cd1e26be9ff37be321b8586b80fee.1624355507.git.gumingtao@xiaomi.com
Signed-off-by: gumingtao <gumingtao@xiaomi.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab_common.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 7cab77655f11..1ded52592b56 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -377,11 +377,11 @@ out_unlock:
 
 	if (err) {
 		if (flags & SLAB_PANIC)
-			panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
-				name, err);
+			panic("%s: Failed to create slab '%s'. Error %d\n",
+				__func__, name, err);
 		else {
-			pr_warn("kmem_cache_create(%s) failed with error %d\n",
-				name, err);
+			pr_warn("%s(%s) failed with error %d\n",
+				__func__, name, err);
 			dump_stack();
 		}
 		return NULL;
@@ -508,8 +508,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
 
 	err = shutdown_cache(s);
 	if (err) {
-		pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
-		       s->name);
+		pr_err("%s %s: Slab cache still has objects\n",
+		       __func__, s->name);
 		dump_stack();
 	}
 out_unlock:

From 26c6cb7cf830349c6518a7efe1c32ac796cd192e Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 28 Jun 2021 19:34:30 -0700
Subject: [PATCH 3093/3292] kunit: make test->lock irq safe

The upcoming SLUB kunit test will be calling kunit_find_named_resource()
from a context with disabled interrupts.  That means kunit's test->lock
needs to be IRQ safe to avoid potential deadlocks and lockdep splats.

This patch therefore changes the test->lock usage to spin_lock_irqsave()
and spin_unlock_irqrestore().

Link: https://lkml.kernel.org/r/20210511150734.3492-1-glittao@gmail.com
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Oliver Glitta <glittao@gmail.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Latypov <dlatypov@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Marco Elver <elver@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/kunit/test.h |  5 +++--
 lib/kunit/test.c     | 18 +++++++++++-------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 49601c4b98b8..524d4789af22 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -515,8 +515,9 @@ kunit_find_resource(struct kunit *test,
 		    void *match_data)
 {
 	struct kunit_resource *res, *found = NULL;
+	unsigned long flags;
 
-	spin_lock(&test->lock);
+	spin_lock_irqsave(&test->lock, flags);
 
 	list_for_each_entry_reverse(res, &test->resources, node) {
 		if (match(test, res, (void *)match_data)) {
@@ -526,7 +527,7 @@ kunit_find_resource(struct kunit *test,
 		}
 	}
 
-	spin_unlock(&test->lock);
+	spin_unlock_irqrestore(&test->lock, flags);
 
 	return found;
 }
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 2f6cc0123232..45f068864d76 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -475,6 +475,7 @@ int kunit_add_resource(struct kunit *test,
 		       void *data)
 {
 	int ret = 0;
+	unsigned long flags;
 
 	res->free = free;
 	kref_init(&res->refcount);
@@ -487,10 +488,10 @@ int kunit_add_resource(struct kunit *test,
 		res->data = data;
 	}
 
-	spin_lock(&test->lock);
+	spin_lock_irqsave(&test->lock, flags);
 	list_add_tail(&res->node, &test->resources);
 	/* refcount for list is established by kref_init() */
-	spin_unlock(&test->lock);
+	spin_unlock_irqrestore(&test->lock, flags);
 
 	return ret;
 }
@@ -548,9 +549,11 @@ EXPORT_SYMBOL_GPL(kunit_alloc_and_get_resource);
 
 void kunit_remove_resource(struct kunit *test, struct kunit_resource *res)
 {
-	spin_lock(&test->lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&test->lock, flags);
 	list_del(&res->node);
-	spin_unlock(&test->lock);
+	spin_unlock_irqrestore(&test->lock, flags);
 	kunit_put_resource(res);
 }
 EXPORT_SYMBOL_GPL(kunit_remove_resource);
@@ -630,6 +633,7 @@ EXPORT_SYMBOL_GPL(kunit_kfree);
 void kunit_cleanup(struct kunit *test)
 {
 	struct kunit_resource *res;
+	unsigned long flags;
 
 	/*
 	 * test->resources is a stack - each allocation must be freed in the
@@ -641,9 +645,9 @@ void kunit_cleanup(struct kunit *test)
 	 * protect against the current node being deleted, not the next.
 	 */
 	while (true) {
-		spin_lock(&test->lock);
+		spin_lock_irqsave(&test->lock, flags);
 		if (list_empty(&test->resources)) {
-			spin_unlock(&test->lock);
+			spin_unlock_irqrestore(&test->lock, flags);
 			break;
 		}
 		res = list_last_entry(&test->resources,
@@ -654,7 +658,7 @@ void kunit_cleanup(struct kunit *test)
 		 * resource, and this can't happen if the test->lock
 		 * is held.
 		 */
-		spin_unlock(&test->lock);
+		spin_unlock_irqrestore(&test->lock, flags);
 		kunit_remove_resource(test, res);
 	}
 	current->kunit_test = NULL;

From 1f9f78b1b376f82cdd8ed73cc0abdb74d0453d43 Mon Sep 17 00:00:00 2001
From: Oliver Glitta <glittao@gmail.com>
Date: Mon, 28 Jun 2021 19:34:33 -0700
Subject: [PATCH 3094/3292] mm/slub, kunit: add a KUnit test for SLUB debugging
 functionality

SLUB has resiliency_test() function which is hidden behind #ifdef
SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody runs it.
KUnit should be a proper replacement for it.

Try changing byte in redzone after allocation and changing pointer to next
free node, first byte, 50th byte and redzone byte.  Check if validation
finds errors.

There are several differences from the original resiliency test: Tests
create own caches with known state instead of corrupting shared kmalloc
caches.

The corruption of freepointer uses correct offset, the original resiliency
test got broken with freepointer changes.

Scratch changing random byte test, because it does not have meaning in
this form where we need deterministic results.

Add new option CONFIG_SLUB_KUNIT_TEST in Kconfig.  Tests next_pointer,
first_word and clobber_50th_byte do not run with KASAN option on.  Because
the test deliberately modifies non-allocated objects.

Use kunit_resource to count errors in cache and silence bug reports.
Count error whenever slab_bug() or slab_fix() is called or when the count
of pages is wrong.

[glittao@gmail.com: remove unused function test_exit(), from SLUB KUnit test]
  Link: https://lkml.kernel.org/r/20210512140656.12083-1-glittao@gmail.com
[akpm@linux-foundation.org: export kasan_enable/disable_current to modules]

Link: https://lkml.kernel.org/r/20210511150734.3492-2-glittao@gmail.com
Signed-off-by: Oliver Glitta <glittao@gmail.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Daniel Latypov <dlatypov@google.com>
Acked-by: Marco Elver <elver@google.com>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug |  12 ++++
 lib/Makefile      |   1 +
 lib/slub_kunit.c  | 152 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/kasan/common.c |   3 +
 mm/slab.h         |   1 +
 mm/slub.c         |  46 +++++++++++++-
 6 files changed, 212 insertions(+), 3 deletions(-)
 create mode 100644 lib/slub_kunit.c

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 678c13967580..7723f58a9394 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2429,6 +2429,18 @@ config BITS_TEST
 
 	  If unsure, say N.
 
+config SLUB_KUNIT_TEST
+	tristate "KUnit test for SLUB cache error detection" if !KUNIT_ALL_TESTS
+	depends on SLUB_DEBUG && KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This builds SLUB allocator unit test.
+	  Tests SLUB cache debugging functionality.
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 2cc359ec1fdd..6d5ea8f5b52c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -354,5 +354,6 @@ obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
 obj-$(CONFIG_BITS_TEST) += test_bits.o
 obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
+obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
new file mode 100644
index 000000000000..8662dc6cb509
--- /dev/null
+++ b/lib/slub_kunit.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/test.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "../mm/slab.h"
+
+static struct kunit_resource resource;
+static int slab_errors;
+
+static void test_clobber_zone(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
+				SLAB_RED_ZONE, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kasan_disable_current();
+	p[64] = 0x12;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 2, slab_errors);
+
+	kasan_enable_current();
+	kmem_cache_free(s, p);
+	kmem_cache_destroy(s);
+}
+
+#ifndef CONFIG_KASAN
+static void test_next_pointer(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
+				SLAB_POISON, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+	unsigned long tmp;
+	unsigned long *ptr_addr;
+
+	kmem_cache_free(s, p);
+
+	ptr_addr = (unsigned long *)(p + s->offset);
+	tmp = *ptr_addr;
+	p[s->offset] = 0x12;
+
+	/*
+	 * Expecting three errors.
+	 * One for the corrupted freechain and the other one for the wrong
+	 * count of objects in use. The third error is fixing broken cache.
+	 */
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 3, slab_errors);
+
+	/*
+	 * Try to repair corrupted freepointer.
+	 * Still expecting two errors. The first for the wrong count
+	 * of objects in use.
+	 * The second error is for fixing broken cache.
+	 */
+	*ptr_addr = tmp;
+	slab_errors = 0;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 2, slab_errors);
+
+	/*
+	 * Previous validation repaired the count of objects in use.
+	 * Now expecting no error.
+	 */
+	slab_errors = 0;
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 0, slab_errors);
+
+	kmem_cache_destroy(s);
+}
+
+static void test_first_word(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
+				SLAB_POISON, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kmem_cache_free(s, p);
+	*p = 0x78;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 2, slab_errors);
+
+	kmem_cache_destroy(s);
+}
+
+static void test_clobber_50th_byte(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
+				SLAB_POISON, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kmem_cache_free(s, p);
+	p[50] = 0x9a;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 2, slab_errors);
+
+	kmem_cache_destroy(s);
+}
+#endif
+
+static void test_clobber_redzone_free(struct kunit *test)
+{
+	struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
+				SLAB_RED_ZONE, NULL);
+	u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
+
+	kasan_disable_current();
+	kmem_cache_free(s, p);
+	p[64] = 0xab;
+
+	validate_slab_cache(s);
+	KUNIT_EXPECT_EQ(test, 2, slab_errors);
+
+	kasan_enable_current();
+	kmem_cache_destroy(s);
+}
+
+static int test_init(struct kunit *test)
+{
+	slab_errors = 0;
+
+	kunit_add_named_resource(test, NULL, NULL, &resource,
+					"slab_errors", &slab_errors);
+	return 0;
+}
+
+static struct kunit_case test_cases[] = {
+	KUNIT_CASE(test_clobber_zone),
+
+#ifndef CONFIG_KASAN
+	KUNIT_CASE(test_next_pointer),
+	KUNIT_CASE(test_first_word),
+	KUNIT_CASE(test_clobber_50th_byte),
+#endif
+
+	KUNIT_CASE(test_clobber_redzone_free),
+	{}
+};
+
+static struct kunit_suite test_suite = {
+	.name = "slub_test",
+	.init = test_init,
+	.test_cases = test_cases,
+};
+kunit_test_suite(test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 6bb87f2acd4e..2586d3718600 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -51,11 +51,14 @@ void kasan_enable_current(void)
 {
 	current->kasan_depth++;
 }
+EXPORT_SYMBOL(kasan_enable_current);
 
 void kasan_disable_current(void)
 {
 	current->kasan_depth--;
 }
+EXPORT_SYMBOL(kasan_disable_current);
+
 #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
 void __kasan_unpoison_range(const void *address, size_t size)
diff --git a/mm/slab.h b/mm/slab.h
index 18c1927cd196..9b690fa44cae 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -215,6 +215,7 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
 DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
 #endif
 extern void print_tracking(struct kmem_cache *s, void *object);
+long validate_slab_cache(struct kmem_cache *s);
 #else
 static inline void print_tracking(struct kmem_cache *s, void *object)
 {
diff --git a/mm/slub.c b/mm/slub.c
index 61bd40e3eb9a..290444ab932b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -36,6 +36,7 @@
 #include <linux/prefetch.h>
 #include <linux/memcontrol.h>
 #include <linux/random.h>
+#include <kunit/test.h>
 
 #include <trace/events/kmem.h>
 
@@ -449,6 +450,26 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
 static DEFINE_SPINLOCK(object_map_lock);
 
+#if IS_ENABLED(CONFIG_KUNIT)
+static bool slab_add_kunit_errors(void)
+{
+	struct kunit_resource *resource;
+
+	if (likely(!current->kunit_test))
+		return false;
+
+	resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
+	if (!resource)
+		return false;
+
+	(*(int *)resource->data)++;
+	kunit_put_resource(resource);
+	return true;
+}
+#else
+static inline bool slab_add_kunit_errors(void) { return false; }
+#endif
+
 /*
  * Determine a map of object in use on a page.
  *
@@ -679,6 +700,9 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
 	struct va_format vaf;
 	va_list args;
 
+	if (slab_add_kunit_errors())
+		return;
+
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
@@ -742,6 +766,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 void object_err(struct kmem_cache *s, struct page *page,
 			u8 *object, char *reason)
 {
+	if (slab_add_kunit_errors())
+		return;
+
 	slab_bug(s, "%s", reason);
 	print_trailer(s, page, object);
 }
@@ -752,6 +779,9 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
 	va_list args;
 	char buf[100];
 
+	if (slab_add_kunit_errors())
+		return;
+
 	va_start(args, fmt);
 	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
@@ -801,12 +831,16 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 	while (end > fault && end[-1] == value)
 		end--;
 
+	if (slab_add_kunit_errors())
+		goto skip_bug_print;
+
 	slab_bug(s, "%s overwritten", what);
 	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
 					fault, end - 1, fault - addr,
 					fault[0], value);
 	print_trailer(s, page, object);
 
+skip_bug_print:
 	restore_bytes(s, what, value, fault, end);
 	return 0;
 }
@@ -4649,9 +4683,11 @@ static int validate_slab_node(struct kmem_cache *s,
 		validate_slab(s, page);
 		count++;
 	}
-	if (count != n->nr_partial)
+	if (count != n->nr_partial) {
 		pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
 		       s->name, count, n->nr_partial);
+		slab_add_kunit_errors();
+	}
 
 	if (!(s->flags & SLAB_STORE_USER))
 		goto out;
@@ -4660,16 +4696,18 @@ static int validate_slab_node(struct kmem_cache *s,
 		validate_slab(s, page);
 		count++;
 	}
-	if (count != atomic_long_read(&n->nr_slabs))
+	if (count != atomic_long_read(&n->nr_slabs)) {
 		pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
 		       s->name, count, atomic_long_read(&n->nr_slabs));
+		slab_add_kunit_errors();
+	}
 
 out:
 	spin_unlock_irqrestore(&n->list_lock, flags);
 	return count;
 }
 
-static long validate_slab_cache(struct kmem_cache *s)
+long validate_slab_cache(struct kmem_cache *s)
 {
 	int node;
 	unsigned long count = 0;
@@ -4681,6 +4719,8 @@ static long validate_slab_cache(struct kmem_cache *s)
 
 	return count;
 }
+EXPORT_SYMBOL(validate_slab_cache);
+
 /*
  * Generate lists of code addresses where slabcache objects are allocated
  * and freed.

From 3d8e374c6d46a648333b9ef87983bc726f8e56bc Mon Sep 17 00:00:00 2001
From: Oliver Glitta <glittao@gmail.com>
Date: Mon, 28 Jun 2021 19:34:36 -0700
Subject: [PATCH 3095/3292] slub: remove resiliency_test() function

Function resiliency_test() is hidden behind #ifdef SLUB_RESILIENCY_TEST
that is not part of Kconfig, so nobody runs it.

This function is replaced with KUnit test for SLUB added by the previous
patch "selftests: add a KUnit test for SLUB debugging functionality".

Link: https://lkml.kernel.org/r/20210511150734.3492-3-glittao@gmail.com
Signed-off-by: Oliver Glitta <glittao@gmail.com>
Reviewed-by: Marco Elver <elver@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Oliver Glitta <glittao@gmail.com>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Daniel Latypov <dlatypov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 64 -------------------------------------------------------
 1 file changed, 64 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 290444ab932b..13857910055b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -155,9 +155,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
  * - Variable sizing of the per node arrays
  */
 
-/* Enable to test recovery from slab corruption on boot */
-#undef SLUB_RESILIENCY_TEST
-
 /* Enable to log cmpxchg failures */
 #undef SLUB_DEBUG_CMPXCHG
 
@@ -4938,66 +4935,6 @@ static int list_locations(struct kmem_cache *s, char *buf,
 }
 #endif	/* CONFIG_SLUB_DEBUG */
 
-#ifdef SLUB_RESILIENCY_TEST
-static void __init resiliency_test(void)
-{
-	u8 *p;
-	int type = KMALLOC_NORMAL;
-
-	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
-
-	pr_err("SLUB resiliency testing\n");
-	pr_err("-----------------------\n");
-	pr_err("A. Corruption after allocation\n");
-
-	p = kzalloc(16, GFP_KERNEL);
-	p[16] = 0x12;
-	pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
-	       p + 16);
-
-	validate_slab_cache(kmalloc_caches[type][4]);
-
-	/* Hmmm... The next two are dangerous */
-	p = kzalloc(32, GFP_KERNEL);
-	p[32 + sizeof(void *)] = 0x34;
-	pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
-	       p);
-	pr_err("If allocated object is overwritten then not detectable\n\n");
-
-	validate_slab_cache(kmalloc_caches[type][5]);
-	p = kzalloc(64, GFP_KERNEL);
-	p += 64 + (get_cycles() & 0xff) * sizeof(void *);
-	*p = 0x56;
-	pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
-	       p);
-	pr_err("If allocated object is overwritten then not detectable\n\n");
-	validate_slab_cache(kmalloc_caches[type][6]);
-
-	pr_err("\nB. Corruption after free\n");
-	p = kzalloc(128, GFP_KERNEL);
-	kfree(p);
-	*p = 0x78;
-	pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
-	validate_slab_cache(kmalloc_caches[type][7]);
-
-	p = kzalloc(256, GFP_KERNEL);
-	kfree(p);
-	p[50] = 0x9a;
-	pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
-	validate_slab_cache(kmalloc_caches[type][8]);
-
-	p = kzalloc(512, GFP_KERNEL);
-	kfree(p);
-	p[512] = 0xab;
-	pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
-	validate_slab_cache(kmalloc_caches[type][9]);
-}
-#else
-#ifdef CONFIG_SYSFS
-static void resiliency_test(void) {};
-#endif
-#endif	/* SLUB_RESILIENCY_TEST */
-
 #ifdef CONFIG_SYSFS
 enum slab_stat_type {
 	SL_ALL,			/* All slabs */
@@ -5846,7 +5783,6 @@ static int __init slab_sysfs_init(void)
 	}
 
 	mutex_unlock(&slab_mutex);
-	resiliency_test();
 	return 0;
 }
 

From 588c7fa022d7b2361500ead5660d9a1a2ecd9b7d Mon Sep 17 00:00:00 2001
From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Date: Mon, 28 Jun 2021 19:34:39 -0700
Subject: [PATCH 3096/3292] mm, slub: change run-time assertion in
 kmalloc_index() to compile-time

Currently when size is not supported by kmalloc_index, compiler will
generate a run-time BUG() while compile-time error is also possible, and
better.  So change BUG to BUILD_BUG_ON_MSG to make compile-time check
possible.

Also remove code that allocates more than 32MB because current
implementation supports only up to 32MB.

[42.hyeyoo@gmail.com: fix support for clang 10]
  Link: https://lkml.kernel.org/r/20210518181247.GA10062@hyeyoo
[vbabka@suse.cz: fix false-positive assert in kernel/bpf/local_storage.c]
  Link: https://lkml.kernel.org/r/bea97388-01df-8eac-091b-a3c89b4a4a09@suse.czLink: https://lkml.kernel.org/r/20210511173448.GA54466@hyeyoo
[elver@google.com: kfence fix]
  Link: https://lkml.kernel.org/r/20210512195227.245000695c9014242e9a00e5@linux-foundation.org

Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Marco Elver <elver@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h    | 17 ++++++++++++++---
 mm/kfence/kfence_test.c |  5 +++--
 mm/slab_common.c        |  7 +++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0c97d788762c..bc9ab3a5a017 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -346,8 +346,14 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
  * 1 =  65 .. 96 bytes
  * 2 = 129 .. 192 bytes
  * n = 2^(n-1)+1 .. 2^n
+ *
+ * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized;
+ * typical usage is via kmalloc_index() and therefore evaluated at compile-time.
+ * Callers where !size_is_constant should only be test modules, where runtime
+ * overheads of __kmalloc_index() can be tolerated.  Also see kmalloc_slab().
  */
-static __always_inline unsigned int kmalloc_index(size_t size)
+static __always_inline unsigned int __kmalloc_index(size_t size,
+						    bool size_is_constant)
 {
 	if (!size)
 		return 0;
@@ -382,12 +388,17 @@ static __always_inline unsigned int kmalloc_index(size_t size)
 	if (size <=  8 * 1024 * 1024) return 23;
 	if (size <=  16 * 1024 * 1024) return 24;
 	if (size <=  32 * 1024 * 1024) return 25;
-	if (size <=  64 * 1024 * 1024) return 26;
-	BUG();
+
+	if ((IS_ENABLED(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 110000)
+	    && !IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
+		BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
+	else
+		BUG();
 
 	/* Will never be reached. Needed because the compiler may complain */
 	return -1;
 }
+#define kmalloc_index(s) __kmalloc_index(s, true)
 #endif /* !CONFIG_SLOB */
 
 void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 4acf4251ee04..7f24b9bcb2ec 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -197,7 +197,7 @@ static void test_cache_destroy(void)
 
 static inline size_t kmalloc_cache_alignment(size_t size)
 {
-	return kmalloc_caches[kmalloc_type(GFP_KERNEL)][kmalloc_index(size)]->align;
+	return kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]->align;
 }
 
 /* Must always inline to match stack trace against caller. */
@@ -267,7 +267,8 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
 
 		if (is_kfence_address(alloc)) {
 			struct page *page = virt_to_head_page(alloc);
-			struct kmem_cache *s = test_cache ?: kmalloc_caches[kmalloc_type(GFP_KERNEL)][kmalloc_index(size)];
+			struct kmem_cache *s = test_cache ?:
+					kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
 
 			/*
 			 * Verify that various helpers return the right values
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1ded52592b56..b97b6fa8a7c6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -754,8 +754,8 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
 
 /*
  * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
- * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
- * kmalloc-67108864.
+ * kmalloc_index() supports up to 2^25=32MB, so the final entry of the table is
+ * kmalloc-32M.
  */
 const struct kmalloc_info_struct kmalloc_info[] __initconst = {
 	INIT_KMALLOC_INFO(0, 0),
@@ -783,8 +783,7 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = {
 	INIT_KMALLOC_INFO(4194304, 4M),
 	INIT_KMALLOC_INFO(8388608, 8M),
 	INIT_KMALLOC_INFO(16777216, 16M),
-	INIT_KMALLOC_INFO(33554432, 32M),
-	INIT_KMALLOC_INFO(67108864, 64M)
+	INIT_KMALLOC_INFO(33554432, 32M)
 };
 
 /*

From 02ac47d0cdd48c0c859a6ac7a6fad49c8e413ce1 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 28 Jun 2021 19:34:43 -0700
Subject: [PATCH 3097/3292] slub: restore slub_debug=- behavior

Petch series "slub: Print non-hashed pointers in slub debugging", v3.

I was doing some debugging recently and noticed that my pointers were
being hashed while slub_debug was on the kernel commandline.  Let's force
on the no hash pointer option when slub_debug is on the kernel commandline
so that the prints are more meaningful.

The first two patches are something else I noticed while looking at the
code.  The message argument is never used so the debugging messages are
not as clear as they could be and the slub_debug=- behavior seems to be
busted.  Then there's a printf fixup from Joe and the final patch is the
one that force disables pointer hashing.

This patch (of 4):

Passing slub_debug=- on the kernel commandline is supposed to disable slub
debugging.  This is especially useful with CONFIG_SLUB_DEBUG_ON where the
default is to have slub debugging enabled in the build.  Due to some code
reorganization this behavior was dropped, but the code to make it work
mostly stuck around.  Restore the previous behavior by disabling the
static key when we parse the commandline and see that we're trying to
disable slub debugging.

Link: https://lkml.kernel.org/r/20210601182202.3011020-1-swboyd@chromium.org
Link: https://lkml.kernel.org/r/20210601182202.3011020-2-swboyd@chromium.org
Fixes: ca0cab65ea2b ("mm, slub: introduce static key for slub_debug()")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/slub.c b/mm/slub.c
index 13857910055b..1de695200efe 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1429,6 +1429,8 @@ static int __init setup_slub_debug(char *str)
 out:
 	if (slub_debug != 0 || slub_debug_string)
 		static_branch_enable(&slub_debug_enabled);
+	else
+		static_branch_disable(&slub_debug_enabled);
 	if ((static_branch_unlikely(&init_on_alloc) ||
 	     static_branch_unlikely(&init_on_free)) &&
 	    (slub_debug & SLAB_POISON))

From 1a88ef87f861e10611e9162c4c701704bfdeed85 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 28 Jun 2021 19:34:46 -0700
Subject: [PATCH 3098/3292] slub: actually use 'message' in restore_bytes()

The message argument isn't used here.  Let's pass the string to the printk
message so that the developer can figure out what's happening, instead of
guessing that a redzone is being restored, etc.

Link: https://lkml.kernel.org/r/20210601182202.3011020-3-swboyd@chromium.org
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Joe Perches <joe@perches.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index 1de695200efe..89a4045b917d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -806,7 +806,7 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
 						void *from, void *to)
 {
-	slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
+	slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x\n", message, from, to - 1, data);
 	memset(from, data, to - from);
 }
 

From 582d1212edc73e6459d5219a24f312799877b61e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 28 Jun 2021 19:34:49 -0700
Subject: [PATCH 3099/3292] slub: indicate slab_fix() uses printf formats

Ideally, slab_fix() would be marked with __printf and the format here
would not use \n as that's emitted by the slab_fix().  Make these changes.

Link: https://lkml.kernel.org/r/20210601182202.3011020-4-swboyd@chromium.org
Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 89a4045b917d..f8e4d37c4641 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -692,6 +692,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 	va_end(args);
 }
 
+__printf(2, 3)
 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
 {
 	struct va_format vaf;
@@ -806,7 +807,7 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
 						void *from, void *to)
 {
-	slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x\n", message, from, to - 1, data);
+	slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x", message, from, to - 1, data);
 	memset(from, data, to - from);
 }
 
@@ -1059,13 +1060,13 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
 		slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
 			 page->objects, max_objects);
 		page->objects = max_objects;
-		slab_fix(s, "Number of objects adjusted.");
+		slab_fix(s, "Number of objects adjusted");
 	}
 	if (page->inuse != page->objects - nr) {
 		slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
 			 page->inuse, page->objects - nr);
 		page->inuse = page->objects - nr;
-		slab_fix(s, "Object count adjusted.");
+		slab_fix(s, "Object count adjusted");
 	}
 	return search == NULL;
 }

From 792702911f581f7793962fbeb99d5c3a1b28f4c3 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 28 Jun 2021 19:34:52 -0700
Subject: [PATCH 3100/3292] slub: force on no_hash_pointers when slub_debug is
 enabled

Obscuring the pointers that slub shows when debugging makes for some
confusing slub debug messages:

 Padding overwritten. 0x0000000079f0674a-0x000000000d4dce17

Those addresses are hashed for kernel security reasons.  If we're trying
to be secure with slub_debug on the commandline we have some big problems
given that we dump whole chunks of kernel memory to the kernel logs.
Let's force on the no_hash_pointers commandline flag when slub_debug is on
the commandline.  This makes slub debug messages more meaningful and if by
chance a kernel address is in some slub debug object dump we will have a
better chance of figuring out what went wrong.

Note that we don't use %px in the slub code because we want to reduce the
number of places that %px is used in the kernel.  This also nicely prints
a big fat warning at kernel boot if slub_debug is on the commandline so
that we know that this kernel shouldn't be used on production systems.

[akpm@linux-foundation.org: fix build with CONFIG_SLUB_DEBUG=n]

Link: https://lkml.kernel.org/r/20210601182202.3011020-5-swboyd@chromium.org
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Petr Mladek <pmladek@suse.com>
Cc: Joe Perches <joe@perches.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  2 ++
 lib/vsprintf.c         |  2 +-
 mm/slub.c              | 20 +++++++++++++++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 15d8bad3d2f2..bf950621febf 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -357,6 +357,8 @@ int sscanf(const char *, const char *, ...);
 extern __scanf(2, 0)
 int vsscanf(const char *, const char *, va_list);
 
+extern int no_hash_pointers_enable(char *str);
+
 extern int get_option(char **str, int *pint);
 extern char *get_options(const char *str, int nints, int *ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index f0c35d9b65bf..cc281f5895f9 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -2186,7 +2186,7 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
 bool no_hash_pointers __ro_after_init;
 EXPORT_SYMBOL_GPL(no_hash_pointers);
 
-static int __init no_hash_pointers_enable(char *str)
+int __init no_hash_pointers_enable(char *str)
 {
 	if (no_hash_pointers)
 		return 0;
diff --git a/mm/slub.c b/mm/slub.c
index f8e4d37c4641..4b2ba9c099c9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -118,12 +118,26 @@
  */
 
 #ifdef CONFIG_SLUB_DEBUG
+
 #ifdef CONFIG_SLUB_DEBUG_ON
 DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
 #else
 DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
 #endif
-#endif
+
+static inline bool __slub_debug_enabled(void)
+{
+	return static_branch_unlikely(&slub_debug_enabled);
+}
+
+#else		/* CONFIG_SLUB_DEBUG */
+
+static inline bool __slub_debug_enabled(void)
+{
+	return false;
+}
+
+#endif		/* CONFIG_SLUB_DEBUG */
 
 static inline bool kmem_cache_debug(struct kmem_cache *s)
 {
@@ -4487,6 +4501,10 @@ void __init kmem_cache_init(void)
 	if (debug_guardpage_minorder())
 		slub_max_order = 0;
 
+	/* Print slub debugging pointers without hashing */
+	if (__slub_debug_enabled())
+		no_hash_pointers_enable(NULL);
+
 	kmem_cache_node = &boot_kmem_cache_node;
 	kmem_cache = &boot_kmem_cache;
 

From 64dd68497be76ab4e237cca06f5324e220d0f050 Mon Sep 17 00:00:00 2001
From: Faiyaz Mohammed <faiyazm@codeaurora.org>
Date: Mon, 28 Jun 2021 19:34:55 -0700
Subject: [PATCH 3101/3292] mm: slub: move sysfs slab alloc/free interfaces to
 debugfs

alloc_calls and free_calls implementation in sysfs have two issues, one is
PAGE_SIZE limitation of sysfs and other is it does not adhere to "one
value per file" rule.

To overcome this issues, move the alloc_calls and free_calls
implementation to debugfs.

Debugfs cache will be created if SLAB_STORE_USER flag is set.

Rename the alloc_calls/free_calls to alloc_traces/free_traces, to be
inline with what it does.

[faiyazm@codeaurora.org: fix the leak of alloc/free traces debugfs interface]
  Link: https://lkml.kernel.org/r/1624248060-30286-1-git-send-email-faiyazm@codeaurora.org

Link: https://lkml.kernel.org/r/1623438200-19361-1-git-send-email-faiyazm@codeaurora.org
Signed-off-by: Faiyaz Mohammed <faiyazm@codeaurora.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab.h        |   6 ++
 mm/slab_common.c |   2 +
 mm/slub.c        | 274 +++++++++++++++++++++++++++++++----------------
 3 files changed, 189 insertions(+), 93 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 9b690fa44cae..7f9b4bd9fc65 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -631,6 +631,12 @@ static inline bool slab_want_init_on_free(struct kmem_cache *c)
 	return false;
 }
 
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
+void debugfs_slab_release(struct kmem_cache *);
+#else
+static inline void debugfs_slab_release(struct kmem_cache *s) { }
+#endif
+
 #ifdef CONFIG_PRINTK
 #define KS_ADDRS_COUNT 16
 struct kmem_obj_info {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index b97b6fa8a7c6..6c0db9f9bd8a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -448,6 +448,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 	rcu_barrier();
 
 	list_for_each_entry_safe(s, s2, &to_destroy, list) {
+		debugfs_slab_release(s);
 		kfence_shutdown_cache(s);
 #ifdef SLAB_SUPPORTS_SYSFS
 		sysfs_slab_release(s);
@@ -475,6 +476,7 @@ static int shutdown_cache(struct kmem_cache *s)
 		schedule_work(&slab_caches_to_rcu_destroy_work);
 	} else {
 		kfence_shutdown_cache(s);
+		debugfs_slab_release(s);
 #ifdef SLAB_SUPPORTS_SYSFS
 		sysfs_slab_unlink(s);
 		sysfs_slab_release(s);
diff --git a/mm/slub.c b/mm/slub.c
index 4b2ba9c099c9..70bb844a44d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -38,6 +38,7 @@
 #include <linux/random.h>
 #include <kunit/test.h>
 
+#include <linux/debugfs.h>
 #include <trace/events/kmem.h>
 
 #include "internal.h"
@@ -238,6 +239,12 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
 #endif
 
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
+static void debugfs_slab_add(struct kmem_cache *);
+#else
+static inline void debugfs_slab_add(struct kmem_cache *s) { }
+#endif
+
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
 {
 #ifdef CONFIG_SLUB_STATS
@@ -4593,6 +4600,9 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
 	if (err)
 		__kmem_cache_release(s);
 
+	if (s->flags & SLAB_STORE_USER)
+		debugfs_slab_add(s);
+
 	return err;
 }
 
@@ -4739,6 +4749,7 @@ long validate_slab_cache(struct kmem_cache *s)
 }
 EXPORT_SYMBOL(validate_slab_cache);
 
+#ifdef CONFIG_DEBUG_FS
 /*
  * Generate lists of code addresses where slabcache objects are allocated
  * and freed.
@@ -4762,6 +4773,8 @@ struct loc_track {
 	struct location *loc;
 };
 
+static struct dentry *slab_debugfs_root;
+
 static void free_loc_track(struct loc_track *t)
 {
 	if (t->max)
@@ -4878,82 +4891,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,
 			add_location(t, s, get_track(s, p, alloc));
 	put_map(map);
 }
-
-static int list_locations(struct kmem_cache *s, char *buf,
-			  enum track_item alloc)
-{
-	int len = 0;
-	unsigned long i;
-	struct loc_track t = { 0, 0, NULL };
-	int node;
-	struct kmem_cache_node *n;
-
-	if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
-			     GFP_KERNEL)) {
-		return sysfs_emit(buf, "Out of memory\n");
-	}
-	/* Push back cpu slabs */
-	flush_all(s);
-
-	for_each_kmem_cache_node(s, node, n) {
-		unsigned long flags;
-		struct page *page;
-
-		if (!atomic_long_read(&n->nr_slabs))
-			continue;
-
-		spin_lock_irqsave(&n->list_lock, flags);
-		list_for_each_entry(page, &n->partial, slab_list)
-			process_slab(&t, s, page, alloc);
-		list_for_each_entry(page, &n->full, slab_list)
-			process_slab(&t, s, page, alloc);
-		spin_unlock_irqrestore(&n->list_lock, flags);
-	}
-
-	for (i = 0; i < t.count; i++) {
-		struct location *l = &t.loc[i];
-
-		len += sysfs_emit_at(buf, len, "%7ld ", l->count);
-
-		if (l->addr)
-			len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
-		else
-			len += sysfs_emit_at(buf, len, "<not-available>");
-
-		if (l->sum_time != l->min_time)
-			len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
-					     l->min_time,
-					     (long)div_u64(l->sum_time,
-							   l->count),
-					     l->max_time);
-		else
-			len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
-
-		if (l->min_pid != l->max_pid)
-			len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
-					     l->min_pid, l->max_pid);
-		else
-			len += sysfs_emit_at(buf, len, " pid=%ld",
-					     l->min_pid);
-
-		if (num_online_cpus() > 1 &&
-		    !cpumask_empty(to_cpumask(l->cpus)))
-			len += sysfs_emit_at(buf, len, " cpus=%*pbl",
-					     cpumask_pr_args(to_cpumask(l->cpus)));
-
-		if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
-			len += sysfs_emit_at(buf, len, " nodes=%*pbl",
-					     nodemask_pr_args(&l->nodes));
-
-		len += sysfs_emit_at(buf, len, "\n");
-	}
-
-	free_loc_track(&t);
-	if (!t.count)
-		len += sysfs_emit_at(buf, len, "No data\n");
-
-	return len;
-}
+#endif  /* CONFIG_DEBUG_FS   */
 #endif	/* CONFIG_SLUB_DEBUG */
 
 #ifdef CONFIG_SYSFS
@@ -5343,21 +5281,6 @@ static ssize_t validate_store(struct kmem_cache *s,
 }
 SLAB_ATTR(validate);
 
-static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
-{
-	if (!(s->flags & SLAB_STORE_USER))
-		return -ENOSYS;
-	return list_locations(s, buf, TRACK_ALLOC);
-}
-SLAB_ATTR_RO(alloc_calls);
-
-static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
-{
-	if (!(s->flags & SLAB_STORE_USER))
-		return -ENOSYS;
-	return list_locations(s, buf, TRACK_FREE);
-}
-SLAB_ATTR_RO(free_calls);
 #endif /* CONFIG_SLUB_DEBUG */
 
 #ifdef CONFIG_FAILSLAB
@@ -5521,8 +5444,6 @@ static struct attribute *slab_attrs[] = {
 	&poison_attr.attr,
 	&store_user_attr.attr,
 	&validate_attr.attr,
-	&alloc_calls_attr.attr,
-	&free_calls_attr.attr,
 #endif
 #ifdef CONFIG_ZONE_DMA
 	&cache_dma_attr.attr,
@@ -5810,6 +5731,173 @@ static int __init slab_sysfs_init(void)
 __initcall(slab_sysfs_init);
 #endif /* CONFIG_SYSFS */
 
+#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
+static int slab_debugfs_show(struct seq_file *seq, void *v)
+{
+
+	struct location *l;
+	unsigned int idx = *(unsigned int *)v;
+	struct loc_track *t = seq->private;
+
+	if (idx < t->count) {
+		l = &t->loc[idx];
+
+		seq_printf(seq, "%7ld ", l->count);
+
+		if (l->addr)
+			seq_printf(seq, "%pS", (void *)l->addr);
+		else
+			seq_puts(seq, "<not-available>");
+
+		if (l->sum_time != l->min_time) {
+			seq_printf(seq, " age=%ld/%llu/%ld",
+				l->min_time, div_u64(l->sum_time, l->count),
+				l->max_time);
+		} else
+			seq_printf(seq, " age=%ld", l->min_time);
+
+		if (l->min_pid != l->max_pid)
+			seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
+		else
+			seq_printf(seq, " pid=%ld",
+				l->min_pid);
+
+		if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)))
+			seq_printf(seq, " cpus=%*pbl",
+				 cpumask_pr_args(to_cpumask(l->cpus)));
+
+		if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
+			seq_printf(seq, " nodes=%*pbl",
+				 nodemask_pr_args(&l->nodes));
+
+		seq_puts(seq, "\n");
+	}
+
+	if (!idx && !t->count)
+		seq_puts(seq, "No data\n");
+
+	return 0;
+}
+
+static void slab_debugfs_stop(struct seq_file *seq, void *v)
+{
+}
+
+static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
+{
+	struct loc_track *t = seq->private;
+
+	v = ppos;
+	++*ppos;
+	if (*ppos <= t->count)
+		return v;
+
+	return NULL;
+}
+
+static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
+{
+	return ppos;
+}
+
+static const struct seq_operations slab_debugfs_sops = {
+	.start  = slab_debugfs_start,
+	.next   = slab_debugfs_next,
+	.stop   = slab_debugfs_stop,
+	.show   = slab_debugfs_show,
+};
+
+static int slab_debug_trace_open(struct inode *inode, struct file *filep)
+{
+
+	struct kmem_cache_node *n;
+	enum track_item alloc;
+	int node;
+	struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
+						sizeof(struct loc_track));
+	struct kmem_cache *s = file_inode(filep)->i_private;
+
+	if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
+		alloc = TRACK_ALLOC;
+	else
+		alloc = TRACK_FREE;
+
+	if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL))
+		return -ENOMEM;
+
+	/* Push back cpu slabs */
+	flush_all(s);
+
+	for_each_kmem_cache_node(s, node, n) {
+		unsigned long flags;
+		struct page *page;
+
+		if (!atomic_long_read(&n->nr_slabs))
+			continue;
+
+		spin_lock_irqsave(&n->list_lock, flags);
+		list_for_each_entry(page, &n->partial, slab_list)
+			process_slab(t, s, page, alloc);
+		list_for_each_entry(page, &n->full, slab_list)
+			process_slab(t, s, page, alloc);
+		spin_unlock_irqrestore(&n->list_lock, flags);
+	}
+
+	return 0;
+}
+
+static int slab_debug_trace_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct loc_track *t = seq->private;
+
+	free_loc_track(t);
+	return seq_release_private(inode, file);
+}
+
+static const struct file_operations slab_debugfs_fops = {
+	.open    = slab_debug_trace_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = slab_debug_trace_release,
+};
+
+static void debugfs_slab_add(struct kmem_cache *s)
+{
+	struct dentry *slab_cache_dir;
+
+	if (unlikely(!slab_debugfs_root))
+		return;
+
+	slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
+
+	debugfs_create_file("alloc_traces", 0400,
+		slab_cache_dir, s, &slab_debugfs_fops);
+
+	debugfs_create_file("free_traces", 0400,
+		slab_cache_dir, s, &slab_debugfs_fops);
+}
+
+void debugfs_slab_release(struct kmem_cache *s)
+{
+	debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root));
+}
+
+static int __init slab_debugfs_init(void)
+{
+	struct kmem_cache *s;
+
+	slab_debugfs_root = debugfs_create_dir("slab", NULL);
+
+	list_for_each_entry(s, &slab_caches, list)
+		if (s->flags & SLAB_STORE_USER)
+			debugfs_slab_add(s);
+
+	return 0;
+
+}
+__initcall(slab_debugfs_init);
+#endif
 /*
  * The /proc/slabinfo ABI
  */

From 65ebdeef103fd70988fdd0ffef1d4fecb0cb97ed Mon Sep 17 00:00:00 2001
From: Georgi Djakov <quic_c_gdjako@quicinc.com>
Date: Mon, 28 Jun 2021 19:34:58 -0700
Subject: [PATCH 3102/3292] mm/slub: add taint after the errors are printed

When running the kernel with panic_on_taint, the usual slub debug error
messages are not being printed when object corruption happens.  That's
because we panic in add_taint(), which is called before printing the
additional information.  This is a bit unfortunate as the error messages
are actually very useful, especially before a panic.  Let's fix this by
moving add_taint() after the errors are printed on the console.

Link: https://lkml.kernel.org/r/1623860738-146761-1-git-send-email-quic_c_gdjako@quicinc.com
Signed-off-by: Georgi Djakov <quic_c_gdjako@quicinc.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 70bb844a44d2..3bc8b940c933 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -708,8 +708,6 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 	pr_err("=============================================================================\n");
 	pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
 	pr_err("-----------------------------------------------------------------------------\n\n");
-
-	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 	va_end(args);
 }
 
@@ -790,6 +788,7 @@ void object_err(struct kmem_cache *s, struct page *page,
 
 	slab_bug(s, "%s", reason);
 	print_trailer(s, page, object);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
@@ -807,6 +806,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
 	slab_bug(s, "%s", buf);
 	print_page_info(page);
 	dump_stack();
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 static void init_object(struct kmem_cache *s, void *object, u8 val)
@@ -858,6 +858,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 					fault, end - 1, fault - addr,
 					fault[0], value);
 	print_trailer(s, page, object);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 
 skip_bug_print:
 	restore_bytes(s, what, value, fault, end);

From 54dd200c5a251b5db9f6f0f72a251c28e0d7da43 Mon Sep 17 00:00:00 2001
From: Yanfei Xu <yanfei.xu@windriver.com>
Date: Mon, 28 Jun 2021 19:35:01 -0700
Subject: [PATCH 3103/3292] mm/kmemleak: fix possible wrong memory scanning
 period

This commit contains 3 modifications:

1. Convert the type of jiffies_scan_wait to "unsigned long".

2. Use READ/WRITE_ONCE() for accessing "jiffies_scan_wait".

3. Fix the possible wrong memory scanning period.  If you set a large
   memory scanning period like blow, then the "secs" variable will be
   non-zero, however the value of "jiffies_scan_wait" will be zero.

    echo "scan=0x10000000" > /sys/kernel/debug/kmemleak

It is because the type of the msecs_to_jiffies()'s parameter is "unsigned
int", and the "secs * 1000" is larger than its max value.  This in turn
leads a unexpected jiffies_scan_wait, maybe zero.  We corret it by
replacing kstrtoul() with kstrtouint(), and check the msecs to prevent it
larger than UINT_MAX.

Link: https://lkml.kernel.org/r/20210613174022.23044-1-yanfei.xu@windriver.com
Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kmemleak.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 92a2d4885808..228a2fbe0657 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -219,7 +219,7 @@ static struct task_struct *scan_thread;
 static unsigned long jiffies_min_age;
 static unsigned long jiffies_last_scan;
 /* delay between automatic memory scannings */
-static signed long jiffies_scan_wait;
+static unsigned long jiffies_scan_wait;
 /* enables or disables the task stacks scanning */
 static int kmemleak_stack_scan = 1;
 /* protects the memory scanning, parameters and debug/kmemleak file access */
@@ -1567,7 +1567,7 @@ static int kmemleak_scan_thread(void *arg)
 	}
 
 	while (!kthread_should_stop()) {
-		signed long timeout = jiffies_scan_wait;
+		signed long timeout = READ_ONCE(jiffies_scan_wait);
 
 		mutex_lock(&scan_mutex);
 		kmemleak_scan();
@@ -1807,14 +1807,20 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
 	else if (strncmp(buf, "scan=off", 8) == 0)
 		stop_scan_thread();
 	else if (strncmp(buf, "scan=", 5) == 0) {
-		unsigned long secs;
+		unsigned secs;
+		unsigned long msecs;
 
-		ret = kstrtoul(buf + 5, 0, &secs);
+		ret = kstrtouint(buf + 5, 0, &secs);
 		if (ret < 0)
 			goto out;
+
+		msecs = secs * MSEC_PER_SEC;
+		if (msecs > UINT_MAX)
+			msecs = UINT_MAX;
+
 		stop_scan_thread();
-		if (secs) {
-			jiffies_scan_wait = msecs_to_jiffies(secs * 1000);
+		if (msecs) {
+			WRITE_ONCE(jiffies_scan_wait, msecs_to_jiffies(msecs));
 			start_scan_thread();
 		}
 	} else if (strncmp(buf, "scan", 4) == 0)

From 1a14e3779dd58c16b30e56558146e5cc850ba8b0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 28 Jun 2021 19:35:04 -0700
Subject: [PATCH 3104/3292] dax: fix ENOMEM handling in grab_mapping_entry()

grab_mapping_entry() has a bug in handling of ENOMEM condition.  Suppose
we have a PMD entry at index i which we are downgrading to a PTE entry.
grab_mapping_entry() will set pmd_downgrade to true, lock the entry, clear
the entry in xarray, and decrement mapping->nrpages.  The it will call:

	entry = dax_make_entry(pfn_to_pfn_t(0), flags);
	dax_lock_entry(xas, entry);

which inserts new PTE entry into xarray.  However this may fail allocating
the new node.  We handle this by:

	if (xas_nomem(xas, mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM))
		goto retry;

however pmd_downgrade stays set to true even though 'entry' returned from
get_unlocked_entry() will be NULL now.  And we will go again through the
downgrade branch.  This is mostly harmless except that mapping->nrpages is
decremented again and we temporarily have an invalid entry stored in
xarray.  Fix the problem by setting pmd_downgrade to false each time we
lookup the entry we work with so that it matches the entry we found.

Link: https://lkml.kernel.org/r/20210622160015.18004-1-jack@suse.cz
Fixes: b15cd800682f ("dax: Convert page fault handlers to XArray")
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 62352cbcf0f4..da41f9363568 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -488,10 +488,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
 		struct address_space *mapping, unsigned int order)
 {
 	unsigned long index = xas->xa_index;
-	bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */
+	bool pmd_downgrade;	/* splitting PMD entry into PTE entries? */
 	void *entry;
 
 retry:
+	pmd_downgrade = false;
 	xas_lock_irq(xas);
 	entry = get_unlocked_entry(xas, order);
 

From 85f29cd6a12d430706c39247e7d0207590f581df Mon Sep 17 00:00:00 2001
From: Tang Bin <tangbin@cmss.chinamobile.com>
Date: Mon, 28 Jun 2021 19:35:07 -0700
Subject: [PATCH 3105/3292] tools/vm/page_owner_sort.c: check malloc() return

Link: https://lkml.kernel.org/r/20210506131402.10416-1-tangbin@cmss.chinamobile.com
Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Signed-off-by: Tang Bin <tangbin@cmss.chinamobile.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/vm/page_owner_sort.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index 85eb65ea16d3..0e75f22c9475 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -132,6 +132,10 @@ int main(int argc, char **argv)
 	qsort(list, list_size, sizeof(list[0]), compare_txt);
 
 	list2 = malloc(sizeof(*list) * list_size);
+	if (!list2) {
+		printf("Out of memory\n");
+		exit(1);
+	}
 
 	printf("culling\n");
 

From 65ac1a60a57e2c55f2ac37f27095f6b012295e81 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 28 Jun 2021 19:35:10 -0700
Subject: [PATCH 3106/3292] mm/debug_vm_pgtable: ensure THP availability via
 has_transparent_hugepage()

On certain platforms, THP support could not just be validated via the
build option CONFIG_TRANSPARENT_HUGEPAGE.  Instead
has_transparent_hugepage() also needs to be called upon to verify THP
runtime support.  Otherwise the debug test will just run into unusable THP
helpers like in the case of a 4K hash config on powerpc platform [1].
This just moves all pfn_pmd() and pfn_pud() after THP runtime validation
with has_transparent_hugepage() which prevents the mentioned problem.

[1] https://bugzilla.kernel.org/show_bug.cgi?id=213069

Link: https://lkml.kernel.org/r/1621397588-19211-1-git-send-email-anshuman.khandual@arm.com
Fixes: 787d563b8642 ("mm/debug_vm_pgtable: fix kernel crash by checking for THP support")
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/debug_vm_pgtable.c | 63 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 297d1b349c19..92bfc37300df 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -146,13 +146,14 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 static void __init pmd_basic_tests(unsigned long pfn, int idx)
 {
 	pgprot_t prot = protection_map[idx];
-	pmd_t pmd = pfn_pmd(pfn, prot);
 	unsigned long val = idx, *ptr = &val;
+	pmd_t pmd;
 
 	if (!has_transparent_hugepage())
 		return;
 
 	pr_debug("Validating PMD basic (%pGv)\n", ptr);
+	pmd = pfn_pmd(pfn, prot);
 
 	/*
 	 * This test needs to be executed after the given page table entry
@@ -185,7 +186,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 				      unsigned long pfn, unsigned long vaddr,
 				      pgprot_t prot, pgtable_t pgtable)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd;
 
 	if (!has_transparent_hugepage())
 		return;
@@ -232,9 +233,14 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 
 static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd;
+
+	if (!has_transparent_hugepage())
+		return;
 
 	pr_debug("Validating PMD leaf\n");
+	pmd = pfn_pmd(pfn, prot);
+
 	/*
 	 * PMD based THP is a leaf entry.
 	 */
@@ -267,12 +273,16 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
 
 static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd;
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
 
+	if (!has_transparent_hugepage())
+		return;
+
 	pr_debug("Validating PMD saved write\n");
+	pmd = pfn_pmd(pfn, prot);
 	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
 	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
 }
@@ -281,13 +291,14 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx)
 {
 	pgprot_t prot = protection_map[idx];
-	pud_t pud = pfn_pud(pfn, prot);
 	unsigned long val = idx, *ptr = &val;
+	pud_t pud;
 
 	if (!has_transparent_hugepage())
 		return;
 
 	pr_debug("Validating PUD basic (%pGv)\n", ptr);
+	pud = pfn_pud(pfn, prot);
 
 	/*
 	 * This test needs to be executed after the given page table entry
@@ -323,7 +334,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 				      unsigned long pfn, unsigned long vaddr,
 				      pgprot_t prot)
 {
-	pud_t pud = pfn_pud(pfn, prot);
+	pud_t pud;
 
 	if (!has_transparent_hugepage())
 		return;
@@ -332,6 +343,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 	/* Align the address wrt HPAGE_PUD_SIZE */
 	vaddr &= HPAGE_PUD_MASK;
 
+	pud = pfn_pud(pfn, prot);
 	set_pud_at(mm, vaddr, pudp, pud);
 	pudp_set_wrprotect(mm, vaddr, pudp);
 	pud = READ_ONCE(*pudp);
@@ -370,9 +382,13 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 
 static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
 {
-	pud_t pud = pfn_pud(pfn, prot);
+	pud_t pud;
+
+	if (!has_transparent_hugepage())
+		return;
 
 	pr_debug("Validating PUD leaf\n");
+	pud = pfn_pud(pfn, prot);
 	/*
 	 * PUD based THP is a leaf entry.
 	 */
@@ -654,12 +670,16 @@ static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
+	pmd_t pmd;
 
 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
 		return;
 
+	if (!has_transparent_hugepage())
+		return;
+
 	pr_debug("Validating PMD protnone\n");
+	pmd = pmd_mkhuge(pfn_pmd(pfn, prot));
 	WARN_ON(!pmd_protnone(pmd));
 	WARN_ON(!pmd_present(pmd));
 }
@@ -679,18 +699,26 @@ static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd;
+
+	if (!has_transparent_hugepage())
+		return;
 
 	pr_debug("Validating PMD devmap\n");
+	pmd = pfn_pmd(pfn, prot);
 	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot)
 {
-	pud_t pud = pfn_pud(pfn, prot);
+	pud_t pud;
+
+	if (!has_transparent_hugepage())
+		return;
 
 	pr_debug("Validating PUD devmap\n");
+	pud = pfn_pud(pfn, prot);
 	WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
 }
 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
@@ -733,25 +761,33 @@ static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd;
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
 		return;
 
+	if (!has_transparent_hugepage())
+		return;
+
 	pr_debug("Validating PMD soft dirty\n");
+	pmd = pfn_pmd(pfn, prot);
 	WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
 	WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
 }
 
 static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
 {
-	pmd_t pmd = pfn_pmd(pfn, prot);
+	pmd_t pmd;
 
 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
 		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
 		return;
 
+	if (!has_transparent_hugepage())
+		return;
+
 	pr_debug("Validating PMD swap soft dirty\n");
+	pmd = pfn_pmd(pfn, prot);
 	WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
 	WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
 }
@@ -780,6 +816,9 @@ static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
 	swp_entry_t swp;
 	pmd_t pmd;
 
+	if (!has_transparent_hugepage())
+		return;
+
 	pr_debug("Validating PMD swap\n");
 	pmd = pfn_pmd(pfn, prot);
 	swp = __pmd_to_swp_entry(pmd);

From 832b50725373e8c46781b7d4db104ec9cf564a6b Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Date: Mon, 28 Jun 2021 19:35:13 -0700
Subject: [PATCH 3107/3292] mm: mmap_lock: use local locks instead of disabling
 preemption

mmap_lock will explicitly disable/enable preemption upon manipulating its
local CPU variables.  This is to be expected, but in this case, it doesn't
play well with PREEMPT_RT.  The preemption disabled code section also
takes a spin-lock.  Spin-locks in RT systems will try to schedule, which
is exactly what we're trying to avoid.

To mitigate this, convert the explicit preemption handling to local_locks.
Which are RT aware, and will disable migration instead of preemption when
PREEMPT_RT=y.

The faulty call trace looks like the following:
    __mmap_lock_do_trace_*()
      preempt_disable()
      get_mm_memcg_path()
        cgroup_path()
          kernfs_path_from_node()
            spin_lock_irqsave() /* Scheduling while atomic! */

Link: https://lkml.kernel.org/r/20210604163506.2103900-1-nsaenzju@redhat.com
Fixes: 2b5067a8143e3 ("mm: mmap_lock: add tracepoints around lock acquisition ")
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Tested-by: Axel Rasmussen <axelrasmussen@google.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap_lock.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
index dcdde4f722a4..2ae3f33b85b1 100644
--- a/mm/mmap_lock.c
+++ b/mm/mmap_lock.c
@@ -11,6 +11,7 @@
 #include <linux/rcupdate.h>
 #include <linux/smp.h>
 #include <linux/trace_events.h>
+#include <linux/local_lock.h>
 
 EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking);
 EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned);
@@ -39,21 +40,30 @@ static int reg_refcount; /* Protected by reg_lock. */
  */
 #define CONTEXT_COUNT 4
 
-static DEFINE_PER_CPU(char __rcu *, memcg_path_buf);
+struct memcg_path {
+	local_lock_t lock;
+	char __rcu *buf;
+	local_t buf_idx;
+};
+static DEFINE_PER_CPU(struct memcg_path, memcg_paths) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+	.buf_idx = LOCAL_INIT(0),
+};
+
 static char **tmp_bufs;
-static DEFINE_PER_CPU(int, memcg_path_buf_idx);
 
 /* Called with reg_lock held. */
 static void free_memcg_path_bufs(void)
 {
+	struct memcg_path *memcg_path;
 	int cpu;
 	char **old = tmp_bufs;
 
 	for_each_possible_cpu(cpu) {
-		*(old++) = rcu_dereference_protected(
-			per_cpu(memcg_path_buf, cpu),
+		memcg_path = per_cpu_ptr(&memcg_paths, cpu);
+		*(old++) = rcu_dereference_protected(memcg_path->buf,
 			lockdep_is_held(&reg_lock));
-		rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), NULL);
+		rcu_assign_pointer(memcg_path->buf, NULL);
 	}
 
 	/* Wait for inflight memcg_path_buf users to finish. */
@@ -88,7 +98,7 @@ int trace_mmap_lock_reg(void)
 		new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL);
 		if (new == NULL)
 			goto out_fail_free;
-		rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), new);
+		rcu_assign_pointer(per_cpu_ptr(&memcg_paths, cpu)->buf, new);
 		/* Don't need to wait for inflights, they'd have gotten NULL. */
 	}
 
@@ -122,23 +132,24 @@ out:
 
 static inline char *get_memcg_path_buf(void)
 {
+	struct memcg_path *memcg_path = this_cpu_ptr(&memcg_paths);
 	char *buf;
 	int idx;
 
 	rcu_read_lock();
-	buf = rcu_dereference(*this_cpu_ptr(&memcg_path_buf));
+	buf = rcu_dereference(memcg_path->buf);
 	if (buf == NULL) {
 		rcu_read_unlock();
 		return NULL;
 	}
-	idx = this_cpu_add_return(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE) -
+	idx = local_add_return(MEMCG_PATH_BUF_SIZE, &memcg_path->buf_idx) -
 	      MEMCG_PATH_BUF_SIZE;
 	return &buf[idx];
 }
 
 static inline void put_memcg_path_buf(void)
 {
-	this_cpu_sub(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE);
+	local_sub(MEMCG_PATH_BUF_SIZE, &this_cpu_ptr(&memcg_paths)->buf_idx);
 	rcu_read_unlock();
 }
 
@@ -179,14 +190,14 @@ out:
 #define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
 	do {                                                                   \
 		const char *memcg_path;                                        \
-		preempt_disable();                                             \
+		local_lock(&memcg_paths.lock);				       \
 		memcg_path = get_mm_memcg_path(mm);                            \
 		trace_mmap_lock_##type(mm,                                     \
 				       memcg_path != NULL ? memcg_path : "",   \
 				       ##__VA_ARGS__);                         \
 		if (likely(memcg_path != NULL))                                \
 			put_memcg_path_buf();                                  \
-		preempt_enable();                                              \
+		local_unlock(&memcg_paths.lock);			       \
 	} while (0)
 
 #else /* !CONFIG_MEMCG */

From 5631de543acb5c7a740534e727f7432e45a9e6dd Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 28 Jun 2021 19:35:16 -0700
Subject: [PATCH 3108/3292] mm/page_reporting: fix code style in
 __page_reporting_request()

Patch series "mm/page_reporting: Make page reporting work on arm64 with 64KB page size", v4.

The page reporting threshold is currently equal to @pageblock_order, which
is 13 and 512MB on arm64 with 64KB base page size selected.  The page
reporting won't be triggered if the freeing page can't come up with a free
area like that huge.  The condition is hard to be met, especially when the
system memory becomes fragmented.

This series intends to solve the issue by having page reporting threshold
as 5 (2MB) on arm64 with 64KB base page size.  The patches are organized
as:

   PATCH[1/4] Fix some coding style in __page_reporting_request().
   PATCH[2/4] Represents page reporting order with variable so that it can
              be exported as module parameter.
   PATCH[3/4] Allows the device driver (e.g. virtio_balloon) to specify
              the page reporting order when the device info is registered.
   PATCH[4/4] Specifies the page reporting order to 5, corresponding to
              2MB in size on ARM64 when 64KB base page size is used.

This patch (of 4):

The lines of comments would be starting with one, instead two space.  This
corrects the style.

Link: https://lkml.kernel.org/r/20210625014710.42954-1-gshan@redhat.com
Link: https://lkml.kernel.org/r/20210625014710.42954-2-gshan@redhat.com
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_reporting.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index c50d93ffa252..df9c5054e1b4 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -31,8 +31,8 @@ __page_reporting_request(struct page_reporting_dev_info *prdev)
 		return;
 
 	/*
-	 *  If reporting is already active there is nothing we need to do.
-	 *  Test against 0 as that represents PAGE_REPORTING_IDLE.
+	 * If reporting is already active there is nothing we need to do.
+	 * Test against 0 as that represents PAGE_REPORTING_IDLE.
 	 */
 	state = atomic_xchg(&prdev->state, PAGE_REPORTING_REQUESTED);
 	if (state != PAGE_REPORTING_IDLE)

From f58780a8e3851edae5bafb7d3af19425308a37f5 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 28 Jun 2021 19:35:19 -0700
Subject: [PATCH 3109/3292] mm/page_reporting: export reporting order as module
 parameter

The macro PAGE_REPORTING_MIN_ORDER is defined as the page reporting
threshold.  It can't be adjusted at runtime.

This introduces a variable (@page_reporting_order) to replace the marcro
(PAGE_REPORTING_MIN_ORDER).  MAX_ORDER is assigned to it initially,
meaning the page reporting is disabled.  It will be specified by driver if
valid one is provided.  Otherwise, it will fall back to @pageblock_order.
It's also exported so that the page reporting order can be adjusted at
runtime.

Link: https://lkml.kernel.org/r/20210625014710.42954-3-gshan@redhat.com
Signed-off-by: Gavin Shan <gshan@redhat.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
 mm/page_reporting.c                             | 9 +++++++--
 mm/page_reporting.h                             | 5 ++---
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc46..566c4b9af3cd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3566,6 +3566,12 @@
 			off: turn off poisoning (default)
 			on: turn on poisoning
 
+	page_reporting.page_reporting_order=
+			[KNL] Minimal page reporting order
+			Format: <integer>
+			Adjust the minimal page reporting order. The page
+			reporting is disabled when it exceeds (MAX_ORDER-1).
+
 	panic=		[KNL] Kernel behaviour on panic: delay <timeout>
 			timeout > 0: seconds before rebooting
 			timeout = 0: wait forever
diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index df9c5054e1b4..34bf4d26c2c4 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -4,12 +4,17 @@
 #include <linux/page_reporting.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
+#include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 
 #include "page_reporting.h"
 #include "internal.h"
 
+unsigned int page_reporting_order = MAX_ORDER;
+module_param(page_reporting_order, uint, 0644);
+MODULE_PARM_DESC(page_reporting_order, "Set page reporting order");
+
 #define PAGE_REPORTING_DELAY	(2 * HZ)
 static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
 
@@ -229,7 +234,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev,
 
 	/* Generate minimum watermark to be able to guarantee progress */
 	watermark = low_wmark_pages(zone) +
-		    (PAGE_REPORTING_CAPACITY << PAGE_REPORTING_MIN_ORDER);
+		    (PAGE_REPORTING_CAPACITY << page_reporting_order);
 
 	/*
 	 * Cancel request if insufficient free memory or if we failed
@@ -239,7 +244,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev,
 		return err;
 
 	/* Process each free list starting from lowest order/mt */
-	for (order = PAGE_REPORTING_MIN_ORDER; order < MAX_ORDER; order++) {
+	for (order = page_reporting_order; order < MAX_ORDER; order++) {
 		for (mt = 0; mt < MIGRATE_TYPES; mt++) {
 			/* We do not pull pages from the isolate free list */
 			if (is_migrate_isolate(mt))
diff --git a/mm/page_reporting.h b/mm/page_reporting.h
index 2c385dd4ddbd..c51dbc228b94 100644
--- a/mm/page_reporting.h
+++ b/mm/page_reporting.h
@@ -10,10 +10,9 @@
 #include <linux/pgtable.h>
 #include <linux/scatterlist.h>
 
-#define PAGE_REPORTING_MIN_ORDER	pageblock_order
-
 #ifdef CONFIG_PAGE_REPORTING
 DECLARE_STATIC_KEY_FALSE(page_reporting_enabled);
+extern unsigned int page_reporting_order;
 void __page_reporting_notify(void);
 
 static inline bool page_reported(struct page *page)
@@ -38,7 +37,7 @@ static inline void page_reporting_notify_free(unsigned int order)
 		return;
 
 	/* Determine if we have crossed reporting threshold */
-	if (order < PAGE_REPORTING_MIN_ORDER)
+	if (order < page_reporting_order)
 		return;
 
 	/* This will add a few cycles, but should be called infrequently */

From 9f849c6f9572d8cef407f55928d3dc68fc42ad3e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 28 Jun 2021 19:35:22 -0700
Subject: [PATCH 3110/3292] mm/page_reporting: allow driver to specify
 reporting order

The page reporting order (threshold) is sticky to @pageblock_order by
default.  The page reporting can never be triggered because the freeing
page can't come up with a free area like that huge.  The situation becomes
worse when the system memory becomes heavily fragmented.

For example, the following configurations are used on ARM64 when 64KB base
page size is enabled.  In this specific case, the page reporting won't be
triggered until the freeing page comes up with a 512MB free area.  That's
hard to be met, especially when the system memory becomes heavily
fragmented.

   PAGE_SIZE:          64KB
   HPAGE_SIZE:         512MB
   pageblock_order:    13       (512MB)
   MAX_ORDER:          14

This allows the drivers to specify the page reporting order when the page
reporting device is registered.  It falls back to @pageblock_order if it's
not specified by the driver.  The existing users (hv_balloon and
virtio_balloon) don't specify it and @pageblock_order is still taken as
their page reporting order.  So this shouldn't introduce any functional
changes.

Link: https://lkml.kernel.org/r/20210625014710.42954-4-gshan@redhat.com
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_reporting.h | 3 +++
 mm/page_reporting.c            | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h
index 3b99e0ec24f2..fe648dfa3a7c 100644
--- a/include/linux/page_reporting.h
+++ b/include/linux/page_reporting.h
@@ -18,6 +18,9 @@ struct page_reporting_dev_info {
 
 	/* Current state of page reporting */
 	atomic_t state;
+
+	/* Minimal order of page reporting */
+	unsigned int order;
 };
 
 /* Tear-down and bring-up for page reporting devices */
diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index 34bf4d26c2c4..382958eef8a9 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -329,6 +329,12 @@ int page_reporting_register(struct page_reporting_dev_info *prdev)
 		goto err_out;
 	}
 
+	/*
+	 * Update the page reporting order if it's specified by driver.
+	 * Otherwise, it falls back to @pageblock_order.
+	 */
+	page_reporting_order = prdev->order ? : pageblock_order;
+
 	/* initialize state and work structures */
 	atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
 	INIT_DELAYED_WORK(&prdev->work, &page_reporting_process);

From f8af4d0892cbb84fc3913de75ba5da374147a691 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 28 Jun 2021 19:35:25 -0700
Subject: [PATCH 3111/3292] virtio_balloon: specify page reporting order if
 needed

The page reporting won't be triggered if the freeing page can't come up
with a free area, whose size is equal or bigger than the threshold (page
reporting order).  The default page reporting order, equal to
@pageblock_order, is too huge on some architectures to trigger page
reporting.  One example is ARM64 when 64KB base page size is used.

      PAGE_SIZE:          64KB
      pageblock_order:    13       (512MB)
      MAX_ORDER:          14

This specifies the page reporting order to 5 (2MB) for this specific case
so that page reporting can be triggered.

Link: https://lkml.kernel.org/r/20210625014710.42954-5-gshan@redhat.com
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 510e9318854d..47dce91f788c 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -993,6 +993,23 @@ static int virtballoon_probe(struct virtio_device *vdev)
 			goto out_unregister_oom;
 		}
 
+		/*
+		 * The default page reporting order is @pageblock_order, which
+		 * corresponds to 512MB in size on ARM64 when 64KB base page
+		 * size is used. The page reporting won't be triggered if the
+		 * freeing page can't come up with a free area like that huge.
+		 * So we specify the page reporting order to 5, corresponding
+		 * to 2MB. It helps to avoid THP splitting if 4KB base page
+		 * size is used by host.
+		 *
+		 * Ideally, the page reporting order is selected based on the
+		 * host's base page size. However, it needs more work to report
+		 * that value. The hard-coded order would be fine currently.
+		 */
+#if defined(CONFIG_ARM64) && defined(CONFIG_ARM64_64K_PAGES)
+		vb->pr_dev_info.order = 5;
+#endif
+
 		err = page_reporting_register(&vb->pr_dev_info);
 		if (err)
 			goto out_unregister_oom;

From 5defd497ed78fdc2bad115b0b4316c0c0de8b485 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 28 Jun 2021 19:35:28 -0700
Subject: [PATCH 3112/3292] mm: page-writeback: kill get_writeback_state()
 comments

The get_writeback_state() has gone since 2006, kill related comments.

Link: https://lkml.kernel.org/r/20210508125026.56600-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0062d5c57d41..1bbe185a6524 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1869,10 +1869,9 @@ DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
  * which was newly dirtied.  The function will periodically check the system's
  * dirty state and will initiate writeback if needed.
  *
- * On really big machines, get_writeback_state is expensive, so try to avoid
- * calling it too often (ratelimiting).  But once we're over the dirty memory
- * limit we decrease the ratelimiting by a lot, to prevent individual processes
- * from overshooting the limit by (ratelimit_pages) each.
+ * Once we're over the dirty memory limit we decrease the ratelimiting
+ * by a lot, to prevent individual processes from overshooting the limit
+ * by (ratelimit_pages) each.
  */
 void balance_dirty_pages_ratelimited(struct address_space *mapping)
 {
@@ -2045,8 +2044,6 @@ void laptop_sync_completion(void)
 /*
  * If ratelimit_pages is too high then we can get into dirty-data overload
  * if a large number of processes all perform writes at the same time.
- * If it is too low then SMP machines will call the (expensive)
- * get_writeback_state too often.
  *
  * Here we set ratelimit_pages to a level which ensures that when all CPUs are
  * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory

From ab19939a6a5010cba4e9cb04dd8bee03c72edcbd Mon Sep 17 00:00:00 2001
From: Chi Wu <wuchi.zero@gmail.com>
Date: Mon, 28 Jun 2021 19:35:31 -0700
Subject: [PATCH 3113/3292] mm/page-writeback: Fix performance when BDI's share
 of ratio is 0.

Fix performance when BDI's share of ratio is 0.

The issue is similar to commit 74d369443325 ("writeback: Fix
performance regression in wb_over_bg_thresh()").

Balance_dirty_pages and the writeback worker will also disagree on
whether writeback when a BDI uses BDI_CAP_STRICTLIMIT and BDI's share
of the thresh ratio is zero.

For example, A thread on cpu0 writes 32 pages and then
balance_dirty_pages, it will wake up background writeback and pauses
because wb_dirty > wb->wb_thresh = 0 (share of thresh ratio is zero).
A thread may runs on cpu0 again because scheduler prefers pre_cpu.
Then writeback worker may runs on other cpus(1,2..) which causes the
value of wb_stat(wb, WB_RECLAIMABLE) in wb_over_bg_thresh is 0 and does
not writeback and returns.

Thus, balance_dirty_pages keeps looping, sleeping and then waking up the
worker who will do nothing. It remains stuck in this state until the
writeback worker hit the right dirty cpu or the dirty pages expire.

The fix that we should get the wb_stat_sum radically when thresh is low.

Link: https://lkml.kernel.org/r/20210428225046.16301-1-wuchi.zero@gmail.com
Signed-off-by: Chi Wu <wuchi.zero@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1bbe185a6524..aff3205dfde5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1944,6 +1944,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 	struct dirty_throttle_control * const gdtc = &gdtc_stor;
 	struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
 						     &mdtc_stor : NULL;
+	unsigned long reclaimable;
+	unsigned long thresh;
 
 	/*
 	 * Similar to balance_dirty_pages() but ignores pages being written
@@ -1956,8 +1958,13 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 	if (gdtc->dirty > gdtc->bg_thresh)
 		return true;
 
-	if (wb_stat(wb, WB_RECLAIMABLE) >
-	    wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
+	thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
+	if (thresh < 2 * wb_stat_error())
+		reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
+	else
+		reclaimable = wb_stat(wb, WB_RECLAIMABLE);
+
+	if (reclaimable > thresh)
 		return true;
 
 	if (mdtc) {
@@ -1971,8 +1978,13 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 		if (mdtc->dirty > mdtc->bg_thresh)
 			return true;
 
-		if (wb_stat(wb, WB_RECLAIMABLE) >
-		    wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
+		thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
+		if (thresh < 2 * wb_stat_error())
+			reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
+		else
+			reclaimable = wb_stat(wb, WB_RECLAIMABLE);
+
+		if (reclaimable > thresh)
 			return true;
 	}
 

From 0323155437870dbbae6e30fb659d7514c9f649da Mon Sep 17 00:00:00 2001
From: Chi Wu <wuchi.zero@gmail.com>
Date: Mon, 28 Jun 2021 19:35:34 -0700
Subject: [PATCH 3114/3292] mm/page-writeback: update the comment of Dirty
 position control

As the value of pos_ratio_polynom() clamp between 0 and 2LL <<
RATELIMIT_CALC_SHIFT, the global control line should be consistent with
it.

Link: https://lkml.kernel.org/r/20210511103606.3732-1-wuchi.zero@gmail.com
Signed-off-by: Chi Wu <wuchi.zero@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Howard Cochran <hcochran@kernelspring.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index aff3205dfde5..d0f090d682f5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -845,7 +845,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
  *     ^ pos_ratio
  *     |
  *     |            |<===== global dirty control scope ======>|
- * 2.0 .............*
+ * 2.0  * * * * * * *
  *     |            .*
  *     |            . *
  *     |            .   *

From 87e3789749750d83aa085f04f74242087de0154b Mon Sep 17 00:00:00 2001
From: Chi Wu <wuchi.zero@gmail.com>
Date: Mon, 28 Jun 2021 19:35:37 -0700
Subject: [PATCH 3115/3292] mm/page-writeback: use __this_cpu_inc() in
 account_page_dirtied()

As account_page_dirtied() was always protected by xa_lock_irqsave(), so
using __this_cpu_inc() is better.

Link: https://lkml.kernel.org/r/20210512144742.4764-1-wuchi.zero@gmail.com
Signed-off-by: Chi Wu <wuchi.zero@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Howard Cochran <hcochran@kernelspring.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d0f090d682f5..c2a849d653a9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2445,7 +2445,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		inc_wb_stat(wb, WB_DIRTIED);
 		task_io_account_write(PAGE_SIZE);
 		current->nr_dirtied++;
-		this_cpu_inc(bdp_ratelimits);
+		__this_cpu_inc(bdp_ratelimits);
 
 		mem_cgroup_track_foreign_dirty(page, wb);
 	}

From 4ade5867b4b878b00a4526b8621442f9442536ce Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:35:41 -0700
Subject: [PATCH 3116/3292] writeback, cgroup: do not switch inodes with
 I_WILL_FREE flag

Patch series "cgroup, blkcg: prevent dirty inodes to pin dying memory cgroups", v9.

When an inode is getting dirty for the first time it's associated with a
wb structure (see __inode_attach_wb()).  It can later be switched to
another wb (if e.g.  some other cgroup is writing a lot of data to the
same inode), but otherwise stays attached to the original wb until being
reclaimed.

The problem is that the wb structure holds a reference to the original
memory and blkcg cgroups.  So if an inode has been dirty once and later is
actively used in read-only mode, it has a good chance to pin down the
original memory and blkcg cgroups forever.  This is often the case with
services bringing data for other services, e.g.  updating some rpm
packages.

In the real life it becomes a problem due to a large size of the memcg
structure, which can easily be 1000x larger than an inode.  Also a really
large number of dying cgroups can raise different scalability issues, e.g.
making the memory reclaim costly and less effective.

To solve the problem inodes should be eventually detached from the
corresponding writeback structure.  It's inefficient to do it after every
writeback completion.  Instead it can be done whenever the original memory
cgroup is offlined and writeback structure is getting killed.  Scanning
over a (potentially long) list of inodes and detach them from the
writeback structure can take quite some time.  To avoid scanning all
inodes, attached inodes are kept on a new list (b_attached).  To make it
less noticeable to a user, the scanning and switching is performed from a
work context.

Big thanks to Jan Kara, Dennis Zhou, Hillf Danton and Tejun Heo for their
ideas and contribution to this patchset.

This patch (of 8):

If an inode's state has I_WILL_FREE flag set, the inode will be freed
soon, so there is no point in trying to switch the inode to a different
cgwb.

I_WILL_FREE was ignored since the introduction of the inode switching, so
it looks like it doesn't lead to any noticeable issues for a user.  This
is why the patch is not intended for a stable backport.

Link: https://lkml.kernel.org/r/20210608230225.2078447-1-guro@fb.com
Link: https://lkml.kernel.org/r/20210608230225.2078447-2-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Suggested-by: Jan Kara <jack@suse.cz>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e91980f49388..bd99890599e0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -389,10 +389,10 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	xa_lock_irq(&mapping->i_pages);
 
 	/*
-	 * Once I_FREEING is visible under i_lock, the eviction path owns
-	 * the inode and we shouldn't modify ->i_io_list.
+	 * Once I_FREEING or I_WILL_FREE are visible under i_lock, the eviction
+	 * path owns the inode and we shouldn't modify ->i_io_list.
 	 */
-	if (unlikely(inode->i_state & I_FREEING))
+	if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE)))
 		goto skip_switch;
 
 	trace_inode_switch_wbs(inode, old_wb, new_wb);
@@ -517,7 +517,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	/* while holding I_WB_SWITCH, no one else can update the association */
 	spin_lock(&inode->i_lock);
 	if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
-	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||
+	    inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) ||
 	    inode_to_wb(inode) == isw->new_wb) {
 		spin_unlock(&inode->i_lock);
 		goto out_free;

From 592fa002180af3425ba962b8e74edd680f0ec77b Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:35:44 -0700
Subject: [PATCH 3117/3292] writeback, cgroup: add smp_mb() to
 cgroup_writeback_umount()

A full memory barrier is required between clearing SB_ACTIVE flag in
generic_shutdown_super() and checking isw_nr_in_flight in
cgroup_writeback_umount(), otherwise a new switch operation might be
scheduled after atomic_read(&isw_nr_in_flight) returned 0.  This would
result in a non-flushed isw_wq, and a potential crash.

The problem hasn't yet been seen in the real life and was discovered by
Jan Kara by looking into the code.

Link: https://lkml.kernel.org/r/20210608230225.2078447-3-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Jan Kara <jack@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index bd99890599e0..3564efcc4b78 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1000,6 +1000,12 @@ out_bdi_put:
  */
 void cgroup_writeback_umount(void)
 {
+	/*
+	 * SB_ACTIVE should be reliably cleared before checking
+	 * isw_nr_in_flight, see generic_shutdown_super().
+	 */
+	smp_mb();
+
 	if (atomic_read(&isw_nr_in_flight)) {
 		/*
 		 * Use rcu_barrier() to wait for all pending callbacks to

From 8826ee4fe75051f8cbfa5d4a9aa70565938e724c Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:35:47 -0700
Subject: [PATCH 3118/3292] writeback, cgroup: increment isw_nr_in_flight
 before grabbing an inode

isw_nr_in_flight is used to determine whether the inode switch queue
should be flushed from the umount path.  Currently it's increased after
grabbing an inode and even scheduling the switch work.  It means the
umount path can walk past cleanup_offline_cgwb() with active inode
references, which can result in a "Busy inodes after unmount." message and
use-after-free issues (with inode->i_sb which gets freed).

Fix it by incrementing isw_nr_in_flight before doing anything with the
inode and decrementing in the case when switching wasn't scheduled.

The problem hasn't yet been seen in the real life and was discovered by
Jan Kara by looking into the code.

Link: https://lkml.kernel.org/r/20210608230225.2078447-4-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Suggested-by: Jan Kara <jack@suse.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3564efcc4b78..e2cc860a001b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -505,6 +505,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	if (!isw)
 		return;
 
+	atomic_inc(&isw_nr_in_flight);
+
 	/* find and pin the new wb */
 	rcu_read_lock();
 	memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
@@ -535,11 +537,10 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
 	 */
 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
-
-	atomic_inc(&isw_nr_in_flight);
 	return;
 
 out_free:
+	atomic_dec(&isw_nr_in_flight);
 	if (isw->new_wb)
 		wb_put(isw->new_wb);
 	kfree(isw);

From 29264d92a0f157f3147129066d912718b99fc6b0 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:35:50 -0700
Subject: [PATCH 3119/3292] writeback, cgroup: switch to rcu_work API in
 inode_switch_wbs()

Inode's wb switching requires two steps divided by an RCU grace period.
It's currently implemented as an RCU callback inode_switch_wbs_rcu_fn(),
which schedules inode_switch_wbs_work_fn() as a work.

Switching to the rcu_work API allows to do the same in a cleaner and
slightly shorter form.

Link: https://lkml.kernel.org/r/20210608230225.2078447-5-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e2cc860a001b..96974e13a203 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -335,8 +335,7 @@ struct inode_switch_wbs_context {
 	struct inode		*inode;
 	struct bdi_writeback	*new_wb;
 
-	struct rcu_head		rcu_head;
-	struct work_struct	work;
+	struct rcu_work		work;
 };
 
 static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
@@ -352,7 +351,7 @@ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
 static void inode_switch_wbs_work_fn(struct work_struct *work)
 {
 	struct inode_switch_wbs_context *isw =
-		container_of(work, struct inode_switch_wbs_context, work);
+		container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
 	struct inode *inode = isw->inode;
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	struct address_space *mapping = inode->i_mapping;
@@ -469,16 +468,6 @@ skip_switch:
 	atomic_dec(&isw_nr_in_flight);
 }
 
-static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
-{
-	struct inode_switch_wbs_context *isw = container_of(rcu_head,
-				struct inode_switch_wbs_context, rcu_head);
-
-	/* needs to grab bh-unsafe locks, bounce to work item */
-	INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
-	queue_work(isw_wq, &isw->work);
-}
-
 /**
  * inode_switch_wbs - change the wb association of an inode
  * @inode: target inode
@@ -536,7 +525,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	 * lock so that stat transfer can synchronize against them.
 	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
 	 */
-	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
+	INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
+	queue_rcu_work(isw_wq, &isw->work);
 	return;
 
 out_free:

From f3b6a6df38aa514d97e8c6fcc748be1d4142bec9 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:35:53 -0700
Subject: [PATCH 3120/3292] writeback, cgroup: keep list of inodes attached to
 bdi_writeback

Currently there is no way to iterate over inodes attached to a specific
cgwb structure.  It limits the ability to efficiently reclaim the
writeback structure itself and associated memory and block cgroup
structures without scanning all inodes belonging to a sb, which can be
prohibitively expensive.

While dirty/in-active-writeback an inode belongs to one of the
bdi_writeback's io lists: b_dirty, b_io, b_more_io and b_dirty_time.  Once
cleaned up, it's removed from all io lists.  So the inode->i_io_list can
be reused to maintain the list of inodes, attached to a bdi_writeback
structure.

This patch introduces a new wb->b_attached list, which contains all inodes
which were dirty at least once and are attached to the given cgwb.  Inodes
attached to the root bdi_writeback structures are never placed on such
list.  The following patch will use this list to try to release cgwbs
structures more efficiently.

Link: https://lkml.kernel.org/r/20210608230225.2078447-6-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c                | 93 ++++++++++++++++++++------------
 include/linux/backing-dev-defs.h |  1 +
 mm/backing-dev.c                 |  2 +
 3 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 96974e13a203..87b305ee5348 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -131,25 +131,6 @@ static bool inode_io_list_move_locked(struct inode *inode,
 	return false;
 }
 
-/**
- * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
- * @inode: inode to be removed
- * @wb: bdi_writeback @inode is being removed from
- *
- * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
- * clear %WB_has_dirty_io if all are empty afterwards.
- */
-static void inode_io_list_del_locked(struct inode *inode,
-				     struct bdi_writeback *wb)
-{
-	assert_spin_locked(&wb->list_lock);
-	assert_spin_locked(&inode->i_lock);
-
-	inode->i_state &= ~I_SYNC_QUEUED;
-	list_del_init(&inode->i_io_list);
-	wb_io_lists_depopulated(wb);
-}
-
 static void wb_wakeup(struct bdi_writeback *wb)
 {
 	spin_lock_bh(&wb->work_lock);
@@ -278,6 +259,28 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
 }
 EXPORT_SYMBOL_GPL(__inode_attach_wb);
 
+/**
+ * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
+ * @inode: inode of interest with i_lock held
+ * @wb: target bdi_writeback
+ *
+ * Remove the inode from wb's io lists and if necessarily put onto b_attached
+ * list.  Only inodes attached to cgwb's are kept on this list.
+ */
+static void inode_cgwb_move_to_attached(struct inode *inode,
+					struct bdi_writeback *wb)
+{
+	assert_spin_locked(&wb->list_lock);
+	assert_spin_locked(&inode->i_lock);
+
+	inode->i_state &= ~I_SYNC_QUEUED;
+	if (wb != &wb->bdi->wb)
+		list_move(&inode->i_io_list, &wb->b_attached);
+	else
+		list_del_init(&inode->i_io_list);
+	wb_io_lists_depopulated(wb);
+}
+
 /**
  * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
  * @inode: inode of interest with i_lock held
@@ -418,21 +421,28 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	wb_get(new_wb);
 
 	/*
-	 * Transfer to @new_wb's IO list if necessary.  The specific list
-	 * @inode was on is ignored and the inode is put on ->b_dirty which
-	 * is always correct including from ->b_dirty_time.  The transfer
-	 * preserves @inode->dirtied_when ordering.
+	 * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
+	 * the specific list @inode was on is ignored and the @inode is put on
+	 * ->b_dirty which is always correct including from ->b_dirty_time.
+	 * The transfer preserves @inode->dirtied_when ordering.  If the @inode
+	 * was clean, it means it was on the b_attached list, so move it onto
+	 * the b_attached list of @new_wb.
 	 */
 	if (!list_empty(&inode->i_io_list)) {
-		struct inode *pos;
-
-		inode_io_list_del_locked(inode, old_wb);
 		inode->i_wb = new_wb;
-		list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
-			if (time_after_eq(inode->dirtied_when,
-					  pos->dirtied_when))
-				break;
-		inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
+
+		if (inode->i_state & I_DIRTY_ALL) {
+			struct inode *pos;
+
+			list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+				if (time_after_eq(inode->dirtied_when,
+						  pos->dirtied_when))
+					break;
+			inode_io_list_move_locked(inode, new_wb,
+						  pos->i_io_list.prev);
+		} else {
+			inode_cgwb_move_to_attached(inode, new_wb);
+		}
 	} else {
 		inode->i_wb = new_wb;
 	}
@@ -1021,6 +1031,17 @@ fs_initcall(cgroup_writeback_init);
 static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
 static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
 
+static void inode_cgwb_move_to_attached(struct inode *inode,
+					struct bdi_writeback *wb)
+{
+	assert_spin_locked(&wb->list_lock);
+	assert_spin_locked(&inode->i_lock);
+
+	inode->i_state &= ~I_SYNC_QUEUED;
+	list_del_init(&inode->i_io_list);
+	wb_io_lists_depopulated(wb);
+}
+
 static struct bdi_writeback *
 locked_inode_to_wb_and_lock_list(struct inode *inode)
 	__releases(&inode->i_lock)
@@ -1121,7 +1142,11 @@ void inode_io_list_del(struct inode *inode)
 
 	wb = inode_to_wb_and_lock_list(inode);
 	spin_lock(&inode->i_lock);
-	inode_io_list_del_locked(inode, wb);
+
+	inode->i_state &= ~I_SYNC_QUEUED;
+	list_del_init(&inode->i_io_list);
+	wb_io_lists_depopulated(wb);
+
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&wb->list_lock);
 }
@@ -1434,7 +1459,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
 		inode->i_state &= ~I_SYNC_QUEUED;
 	} else {
 		/* The inode is clean. Remove from writeback lists. */
-		inode_io_list_del_locked(inode, wb);
+		inode_cgwb_move_to_attached(inode, wb);
 	}
 }
 
@@ -1586,7 +1611,7 @@ static int writeback_single_inode(struct inode *inode,
 	 * responsible for the writeback lists.
 	 */
 	if (!(inode->i_state & I_DIRTY_ALL))
-		inode_io_list_del_locked(inode, wb);
+		inode_cgwb_move_to_attached(inode, wb);
 	spin_unlock(&wb->list_lock);
 	inode_sync_complete(inode);
 out:
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index fff9367a6348..e5dc238ebe4f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -154,6 +154,7 @@ struct bdi_writeback {
 	struct cgroup_subsys_state *blkcg_css; /* and blkcg */
 	struct list_head memcg_node;	/* anchored at memcg->cgwb_list */
 	struct list_head blkcg_node;	/* anchored at blkcg->cgwb_list */
+	struct list_head b_attached;	/* attached inodes, protected by list_lock */
 
 	union {
 		struct work_struct release_work;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 576220acd686..54c5dc4b8c24 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -396,6 +396,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 	fprop_local_destroy_percpu(&wb->memcg_completions);
 	percpu_ref_exit(&wb->refcnt);
 	wb_exit(wb);
+	WARN_ON_ONCE(!list_empty(&wb->b_attached));
 	kfree_rcu(wb, rcu);
 }
 
@@ -472,6 +473,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 
 	wb->memcg_css = memcg_css;
 	wb->blkcg_css = blkcg_css;
+	INIT_LIST_HEAD(&wb->b_attached);
 	INIT_WORK(&wb->release_work, cgwb_release_workfn);
 	set_bit(WB_registered, &wb->state);
 

From 72d4512e9cb14d790e361c0e085186a7ef2d2431 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:35:56 -0700
Subject: [PATCH 3121/3292] writeback, cgroup: split out the functional part of
 inode_switch_wbs_work_fn()

Split out the functional part of the inode_switch_wbs_work_fn() function
as inode_do switch_wbs() to reuse it later for switching inodes attached
to dying cgwbs.

This commit doesn't bring any functional changes.

Link: https://lkml.kernel.org/r/20210608230225.2078447-7-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 87b305ee5348..5520a6b5cc4d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -351,15 +351,12 @@ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
 	up_write(&bdi->wb_switch_rwsem);
 }
 
-static void inode_switch_wbs_work_fn(struct work_struct *work)
+static void inode_do_switch_wbs(struct inode *inode,
+				struct bdi_writeback *new_wb)
 {
-	struct inode_switch_wbs_context *isw =
-		container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
-	struct inode *inode = isw->inode;
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	struct address_space *mapping = inode->i_mapping;
 	struct bdi_writeback *old_wb = inode->i_wb;
-	struct bdi_writeback *new_wb = isw->new_wb;
 	XA_STATE(xas, &mapping->i_pages, 0);
 	struct page *page;
 	bool switched = false;
@@ -470,11 +467,17 @@ skip_switch:
 		wb_wakeup(new_wb);
 		wb_put(old_wb);
 	}
-	wb_put(new_wb);
+}
 
-	iput(inode);
+static void inode_switch_wbs_work_fn(struct work_struct *work)
+{
+	struct inode_switch_wbs_context *isw =
+		container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
+
+	inode_do_switch_wbs(isw->inode, isw->new_wb);
+	wb_put(isw->new_wb);
+	iput(isw->inode);
 	kfree(isw);
-
 	atomic_dec(&isw_nr_in_flight);
 }
 

From f5fbe6b7ad6ef1fbdf8074a6ca9fdab739bf86d4 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:35:59 -0700
Subject: [PATCH 3122/3292] writeback, cgroup: support switching multiple
 inodes at once

Currently only a single inode can be switched to another writeback
structure at once.  That means to switch an inode a separate
inode_switch_wbs_context structure must be allocated, and a separate rcu
callback and work must be scheduled.

It's fine for the existing ad-hoc switching, which is not happening that
often, but sub-optimal for massive switching required in order to release
a writeback structure.  To prepare for it, let's add a support for
switching multiple inodes at once.

Instead of containing a single inode pointer, inode_switch_wbs_context
will contain a NULL-terminated array of inode pointers.
inode_do_switch_wbs() will be called for each inode.

To optimize the locking bdi->wb_switch_rwsem, old_wb's and new_wb's
list_locks will be acquired and released only once altogether for all
inodes.  wb_wakeup() will be also be called only once.  Instead of calling
wb_put(old_wb) after each successful switch, wb_put_many() is introduced
and used.

Link: https://lkml.kernel.org/r/20210608230225.2078447-8-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c                | 106 +++++++++++++++++++------------
 include/linux/backing-dev-defs.h |  18 +++++-
 2 files changed, 80 insertions(+), 44 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5520a6b5cc4d..737ac27adb77 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -335,10 +335,18 @@ static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
 }
 
 struct inode_switch_wbs_context {
-	struct inode		*inode;
-	struct bdi_writeback	*new_wb;
-
 	struct rcu_work		work;
+
+	/*
+	 * Multiple inodes can be switched at once.  The switching procedure
+	 * consists of two parts, separated by a RCU grace period.  To make
+	 * sure that the second part is executed for each inode gone through
+	 * the first part, all inode pointers are placed into a NULL-terminated
+	 * array embedded into struct inode_switch_wbs_context.  Otherwise
+	 * an inode could be left in a non-consistent state.
+	 */
+	struct bdi_writeback	*new_wb;
+	struct inode		*inodes[];
 };
 
 static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
@@ -351,39 +359,15 @@ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
 	up_write(&bdi->wb_switch_rwsem);
 }
 
-static void inode_do_switch_wbs(struct inode *inode,
+static bool inode_do_switch_wbs(struct inode *inode,
+				struct bdi_writeback *old_wb,
 				struct bdi_writeback *new_wb)
 {
-	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	struct address_space *mapping = inode->i_mapping;
-	struct bdi_writeback *old_wb = inode->i_wb;
 	XA_STATE(xas, &mapping->i_pages, 0);
 	struct page *page;
 	bool switched = false;
 
-	/*
-	 * If @inode switches cgwb membership while sync_inodes_sb() is
-	 * being issued, sync_inodes_sb() might miss it.  Synchronize.
-	 */
-	down_read(&bdi->wb_switch_rwsem);
-
-	/*
-	 * By the time control reaches here, RCU grace period has passed
-	 * since I_WB_SWITCH assertion and all wb stat update transactions
-	 * between unlocked_inode_to_wb_begin/end() are guaranteed to be
-	 * synchronizing against the i_pages lock.
-	 *
-	 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
-	 * gives us exclusion against all wb related operations on @inode
-	 * including IO list manipulations and stat updates.
-	 */
-	if (old_wb < new_wb) {
-		spin_lock(&old_wb->list_lock);
-		spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
-	} else {
-		spin_lock(&new_wb->list_lock);
-		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
-	}
 	spin_lock(&inode->i_lock);
 	xa_lock_irq(&mapping->i_pages);
 
@@ -458,25 +442,63 @@ skip_switch:
 
 	xa_unlock_irq(&mapping->i_pages);
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&new_wb->list_lock);
-	spin_unlock(&old_wb->list_lock);
 
-	up_read(&bdi->wb_switch_rwsem);
-
-	if (switched) {
-		wb_wakeup(new_wb);
-		wb_put(old_wb);
-	}
+	return switched;
 }
 
 static void inode_switch_wbs_work_fn(struct work_struct *work)
 {
 	struct inode_switch_wbs_context *isw =
 		container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
+	struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]);
+	struct bdi_writeback *old_wb = isw->inodes[0]->i_wb;
+	struct bdi_writeback *new_wb = isw->new_wb;
+	unsigned long nr_switched = 0;
+	struct inode **inodep;
 
-	inode_do_switch_wbs(isw->inode, isw->new_wb);
-	wb_put(isw->new_wb);
-	iput(isw->inode);
+	/*
+	 * If @inode switches cgwb membership while sync_inodes_sb() is
+	 * being issued, sync_inodes_sb() might miss it.  Synchronize.
+	 */
+	down_read(&bdi->wb_switch_rwsem);
+
+	/*
+	 * By the time control reaches here, RCU grace period has passed
+	 * since I_WB_SWITCH assertion and all wb stat update transactions
+	 * between unlocked_inode_to_wb_begin/end() are guaranteed to be
+	 * synchronizing against the i_pages lock.
+	 *
+	 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
+	 * gives us exclusion against all wb related operations on @inode
+	 * including IO list manipulations and stat updates.
+	 */
+	if (old_wb < new_wb) {
+		spin_lock(&old_wb->list_lock);
+		spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
+	} else {
+		spin_lock(&new_wb->list_lock);
+		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
+	}
+
+	for (inodep = isw->inodes; *inodep; inodep++) {
+		WARN_ON_ONCE((*inodep)->i_wb != old_wb);
+		if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
+			nr_switched++;
+	}
+
+	spin_unlock(&new_wb->list_lock);
+	spin_unlock(&old_wb->list_lock);
+
+	up_read(&bdi->wb_switch_rwsem);
+
+	if (nr_switched) {
+		wb_wakeup(new_wb);
+		wb_put_many(old_wb, nr_switched);
+	}
+
+	for (inodep = isw->inodes; *inodep; inodep++)
+		iput(*inodep);
+	wb_put(new_wb);
 	kfree(isw);
 	atomic_dec(&isw_nr_in_flight);
 }
@@ -503,7 +525,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
 		return;
 
-	isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
+	isw = kzalloc(sizeof(*isw) + 2 * sizeof(struct inode *), GFP_ATOMIC);
 	if (!isw)
 		return;
 
@@ -530,7 +552,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	__iget(inode);
 	spin_unlock(&inode->i_lock);
 
-	isw->inode = inode;
+	isw->inodes[0] = inode;
 
 	/*
 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index e5dc238ebe4f..63f52ad2ce7a 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -240,8 +240,9 @@ static inline void wb_get(struct bdi_writeback *wb)
 /**
  * wb_put - decrement a wb's refcount
  * @wb: bdi_writeback to put
+ * @nr: number of references to put
  */
-static inline void wb_put(struct bdi_writeback *wb)
+static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
 {
 	if (WARN_ON_ONCE(!wb->bdi)) {
 		/*
@@ -252,7 +253,16 @@ static inline void wb_put(struct bdi_writeback *wb)
 	}
 
 	if (wb != &wb->bdi->wb)
-		percpu_ref_put(&wb->refcnt);
+		percpu_ref_put_many(&wb->refcnt, nr);
+}
+
+/**
+ * wb_put - decrement a wb's refcount
+ * @wb: bdi_writeback to put
+ */
+static inline void wb_put(struct bdi_writeback *wb)
+{
+	wb_put_many(wb, 1);
 }
 
 /**
@@ -281,6 +291,10 @@ static inline void wb_put(struct bdi_writeback *wb)
 {
 }
 
+static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
+{
+}
+
 static inline bool wb_dying(struct bdi_writeback *wb)
 {
 	return false;

From c22d70a162d3cc177282c4487be4d54876ca55c8 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Mon, 28 Jun 2021 19:36:03 -0700
Subject: [PATCH 3123/3292] writeback, cgroup: release dying cgwbs by switching
 attached inodes

Asynchronously try to release dying cgwbs by switching attached inodes to
the nearest living ancestor wb.  It helps to get rid of per-cgroup
writeback structures themselves and of pinned memory and block cgroups,
which are significantly larger structures (mostly due to large per-cpu
statistics data).  This prevents memory waste and helps to avoid different
scalability problems caused by large piles of dying cgroups.

Reuse the existing mechanism of inode switching used for foreign inode
detection.  To speed things up batch up to 115 inode switching in a single
operation (the maximum number is selected so that the resulting struct
inode_switch_wbs_context can fit into 1024 bytes).  Because every
switching consists of two steps divided by an RCU grace period, it would
be too slow without batching.  Please note that the whole batch counts as
a single operation (when increasing/decreasing isw_nr_in_flight).  This
allows to keep umounting working (flush the switching queue), however
prevents cleanups from consuming the whole switching quota and effectively
blocking the frn switching.

A cgwb cleanup operation can fail due to different reasons (e.g.  not
enough memory, the cgwb has an in-flight/pending io, an attached inode in
a wrong state, etc).  In this case the next scheduled cleanup will make a
new attempt.  An attempt is made each time a new cgwb is offlined (in
other words a memcg and/or a blkcg is deleted by a user).  In the future
an additional attempt scheduled by a timer can be implemented.

[guro@fb.com: replace open-coded "115" with arithmetic]
  Link: https://lkml.kernel.org/r/YMEcSBcq/VXMiPPO@carbon.dhcp.thefacebook.com
[guro@fb.com: add smp_mb() to inode_prepare_wbs_switch()]
  Link: https://lkml.kernel.org/r/YMFa+guFw7OFjf3X@carbon.dhcp.thefacebook.com
[willy@infradead.org: fix documentation]
  Link: https://lkml.kernel.org/r/20210615200242.1716568-2-willy@infradead.org

Link: https://lkml.kernel.org/r/20210608230225.2078447-9-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c                | 111 ++++++++++++++++++++++++++++---
 include/linux/backing-dev-defs.h |   1 +
 include/linux/writeback.h        |   1 +
 mm/backing-dev.c                 |  64 +++++++++++++++++-
 4 files changed, 165 insertions(+), 12 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 737ac27adb77..62193106683d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -225,6 +225,13 @@ void wb_wait_for_completion(struct wb_completion *done)
 					/* one round can affect upto 5 slots */
 #define WB_FRN_MAX_IN_FLIGHT	1024	/* don't queue too many concurrently */
 
+/*
+ * Maximum inodes per isw.  A specific value has been chosen to make
+ * struct inode_switch_wbs_context fit into 1024 bytes kmalloc.
+ */
+#define WB_MAX_INODES_PER_ISW  ((1024UL - sizeof(struct inode_switch_wbs_context)) \
+                                / sizeof(struct inode *))
+
 static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
 static struct workqueue_struct *isw_wq;
 
@@ -503,6 +510,32 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	atomic_dec(&isw_nr_in_flight);
 }
 
+static bool inode_prepare_wbs_switch(struct inode *inode,
+				     struct bdi_writeback *new_wb)
+{
+	/*
+	 * Paired with smp_mb() in cgroup_writeback_umount().
+	 * isw_nr_in_flight must be increased before checking SB_ACTIVE and
+	 * grabbing an inode, otherwise isw_nr_in_flight can be observed as 0
+	 * in cgroup_writeback_umount() and the isw_wq will be not flushed.
+	 */
+	smp_mb();
+
+	/* while holding I_WB_SWITCH, no one else can update the association */
+	spin_lock(&inode->i_lock);
+	if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
+	    inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) ||
+	    inode_to_wb(inode) == new_wb) {
+		spin_unlock(&inode->i_lock);
+		return false;
+	}
+	inode->i_state |= I_WB_SWITCH;
+	__iget(inode);
+	spin_unlock(&inode->i_lock);
+
+	return true;
+}
+
 /**
  * inode_switch_wbs - change the wb association of an inode
  * @inode: target inode
@@ -540,17 +573,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 	if (!isw->new_wb)
 		goto out_free;
 
-	/* while holding I_WB_SWITCH, no one else can update the association */
-	spin_lock(&inode->i_lock);
-	if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
-	    inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) ||
-	    inode_to_wb(inode) == isw->new_wb) {
-		spin_unlock(&inode->i_lock);
+	if (!inode_prepare_wbs_switch(inode, isw->new_wb))
 		goto out_free;
-	}
-	inode->i_state |= I_WB_SWITCH;
-	__iget(inode);
-	spin_unlock(&inode->i_lock);
 
 	isw->inodes[0] = inode;
 
@@ -571,6 +595,73 @@ out_free:
 	kfree(isw);
 }
 
+/**
+ * cleanup_offline_cgwb - detach associated inodes
+ * @wb: target wb
+ *
+ * Switch all inodes attached to @wb to a nearest living ancestor's wb in order
+ * to eventually release the dying @wb.  Returns %true if not all inodes were
+ * switched and the function has to be restarted.
+ */
+bool cleanup_offline_cgwb(struct bdi_writeback *wb)
+{
+	struct cgroup_subsys_state *memcg_css;
+	struct inode_switch_wbs_context *isw;
+	struct inode *inode;
+	int nr;
+	bool restart = false;
+
+	isw = kzalloc(sizeof(*isw) + WB_MAX_INODES_PER_ISW *
+		      sizeof(struct inode *), GFP_KERNEL);
+	if (!isw)
+		return restart;
+
+	atomic_inc(&isw_nr_in_flight);
+
+	for (memcg_css = wb->memcg_css->parent; memcg_css;
+	     memcg_css = memcg_css->parent) {
+		isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL);
+		if (isw->new_wb)
+			break;
+	}
+	if (unlikely(!isw->new_wb))
+		isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */
+
+	nr = 0;
+	spin_lock(&wb->list_lock);
+	list_for_each_entry(inode, &wb->b_attached, i_io_list) {
+		if (!inode_prepare_wbs_switch(inode, isw->new_wb))
+			continue;
+
+		isw->inodes[nr++] = inode;
+
+		if (nr >= WB_MAX_INODES_PER_ISW - 1) {
+			restart = true;
+			break;
+		}
+	}
+	spin_unlock(&wb->list_lock);
+
+	/* no attached inodes? bail out */
+	if (nr == 0) {
+		atomic_dec(&isw_nr_in_flight);
+		wb_put(isw->new_wb);
+		kfree(isw);
+		return restart;
+	}
+
+	/*
+	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
+	 * the RCU protected stat update paths to grab the i_page
+	 * lock so that stat transfer can synchronize against them.
+	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
+	 */
+	INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
+	queue_rcu_work(isw_wq, &isw->work);
+
+	return restart;
+}
+
 /**
  * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
  * @wbc: writeback_control of interest
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 63f52ad2ce7a..1d7edad9914f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -155,6 +155,7 @@ struct bdi_writeback {
 	struct list_head memcg_node;	/* anchored at memcg->cgwb_list */
 	struct list_head blkcg_node;	/* anchored at blkcg->cgwb_list */
 	struct list_head b_attached;	/* attached inodes, protected by list_lock */
+	struct list_head offline_node;	/* anchored at offline_cgwbs */
 
 	union {
 		struct work_struct release_work;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8e5c5bb16e2d..95de51c10248 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -221,6 +221,7 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
 int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
 			   enum wb_reason reason, struct wb_completion *done);
 void cgroup_writeback_umount(void);
+bool cleanup_offline_cgwb(struct bdi_writeback *wb);
 
 /**
  * inode_attach_wb - associate an inode with its wb
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 54c5dc4b8c24..271f2ca862c8 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -371,12 +371,16 @@ static void wb_exit(struct bdi_writeback *wb)
 #include <linux/memcontrol.h>
 
 /*
- * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, and memcg->cgwb_list.
- * bdi->cgwb_tree is also RCU protected.
+ * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, offline_cgwbs and
+ * memcg->cgwb_list.  bdi->cgwb_tree is also RCU protected.
  */
 static DEFINE_SPINLOCK(cgwb_lock);
 static struct workqueue_struct *cgwb_release_wq;
 
+static LIST_HEAD(offline_cgwbs);
+static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
+static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
+
 static void cgwb_release_workfn(struct work_struct *work)
 {
 	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
@@ -395,6 +399,11 @@ static void cgwb_release_workfn(struct work_struct *work)
 
 	fprop_local_destroy_percpu(&wb->memcg_completions);
 	percpu_ref_exit(&wb->refcnt);
+
+	spin_lock_irq(&cgwb_lock);
+	list_del(&wb->offline_node);
+	spin_unlock_irq(&cgwb_lock);
+
 	wb_exit(wb);
 	WARN_ON_ONCE(!list_empty(&wb->b_attached));
 	kfree_rcu(wb, rcu);
@@ -414,6 +423,7 @@ static void cgwb_kill(struct bdi_writeback *wb)
 	WARN_ON(!radix_tree_delete(&wb->bdi->cgwb_tree, wb->memcg_css->id));
 	list_del(&wb->memcg_node);
 	list_del(&wb->blkcg_node);
+	list_add(&wb->offline_node, &offline_cgwbs);
 	percpu_ref_kill(&wb->refcnt);
 }
 
@@ -635,6 +645,54 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
 	mutex_unlock(&bdi->cgwb_release_mutex);
 }
 
+/*
+ * cleanup_offline_cgwbs_workfn - try to release dying cgwbs
+ *
+ * Try to release dying cgwbs by switching attached inodes to the nearest
+ * living ancestor's writeback. Processed wbs are placed at the end
+ * of the list to guarantee the forward progress.
+ */
+static void cleanup_offline_cgwbs_workfn(struct work_struct *work)
+{
+	struct bdi_writeback *wb;
+	LIST_HEAD(processed);
+
+	spin_lock_irq(&cgwb_lock);
+
+	while (!list_empty(&offline_cgwbs)) {
+		wb = list_first_entry(&offline_cgwbs, struct bdi_writeback,
+				      offline_node);
+		list_move(&wb->offline_node, &processed);
+
+		/*
+		 * If wb is dirty, cleaning up the writeback by switching
+		 * attached inodes will result in an effective removal of any
+		 * bandwidth restrictions, which isn't the goal.  Instead,
+		 * it can be postponed until the next time, when all io
+		 * will be likely completed.  If in the meantime some inodes
+		 * will get re-dirtied, they should be eventually switched to
+		 * a new cgwb.
+		 */
+		if (wb_has_dirty_io(wb))
+			continue;
+
+		if (!wb_tryget(wb))
+			continue;
+
+		spin_unlock_irq(&cgwb_lock);
+		while (cleanup_offline_cgwb(wb))
+			cond_resched();
+		spin_lock_irq(&cgwb_lock);
+
+		wb_put(wb);
+	}
+
+	if (!list_empty(&processed))
+		list_splice_tail(&processed, &offline_cgwbs);
+
+	spin_unlock_irq(&cgwb_lock);
+}
+
 /**
  * wb_memcg_offline - kill all wb's associated with a memcg being offlined
  * @memcg: memcg being offlined
@@ -651,6 +709,8 @@ void wb_memcg_offline(struct mem_cgroup *memcg)
 		cgwb_kill(wb);
 	memcg_cgwb_list->next = NULL;	/* prevent new wb's */
 	spin_unlock_irq(&cgwb_lock);
+
+	queue_work(system_unbound_wq, &cleanup_offline_cgwbs_work);
 }
 
 /**

From 34ebcce793245e64db3b40f24486c59668e1f059 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Jun 2021 19:36:06 -0700
Subject: [PATCH 3124/3292] fs: unexport __set_page_dirty

Patch series "remove the implicit .set_page_dirty default".

This series cleans up a few lose ends around ->set_page_dirty, most
importantly removes the default to the buffer head based on if no method
is wired up.

This patch (of 3):

__set_page_dirty is only used by built-in code.

Link: https://lkml.kernel.org/r/20210614061512.3966143-1-hch@lst.de
Link: https://lkml.kernel.org/r/20210614061512.3966143-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index ea48c01fb76b..3d18831c7ad8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -611,7 +611,6 @@ void __set_page_dirty(struct page *page, struct address_space *mapping,
 	}
 	xa_unlock_irqrestore(&mapping->i_pages, flags);
 }
-EXPORT_SYMBOL_GPL(__set_page_dirty);
 
 /*
  * Add a page to the dirty page list.

From c1e3dbe9818e3caa4e467255a348df56912ca549 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Jun 2021 19:36:09 -0700
Subject: [PATCH 3125/3292] fs: move ramfs_aops to libfs

Move the ramfs aops to libfs and reuse them for kernfs and configfs.
Thosw two did not wire up ->set_page_dirty before and now get
__set_page_dirty_no_writeback, which is the right one for no-writeback
address_space usage.

Drop the now unused exports of the libfs helpers only used for ramfs-style
pagecache usage.

Link: https://lkml.kernel.org/r/20210614061512.3966143-3-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/configfs/inode.c |  8 +-------
 fs/kernfs/inode.c   |  8 +-------
 fs/libfs.c          | 17 +++++++++++++----
 fs/ramfs/inode.c    |  9 +--------
 include/linux/fs.h  |  5 +----
 5 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index eb5ec3e46283..b601610e9907 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -28,12 +28,6 @@
 static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
 #endif
 
-static const struct address_space_operations configfs_aops = {
-	.readpage	= simple_readpage,
-	.write_begin	= simple_write_begin,
-	.write_end	= simple_write_end,
-};
-
 static const struct inode_operations configfs_inode_operations ={
 	.setattr	= configfs_setattr,
 };
@@ -114,7 +108,7 @@ struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
 	struct inode * inode = new_inode(s);
 	if (inode) {
 		inode->i_ino = get_next_ino();
-		inode->i_mapping->a_ops = &configfs_aops;
+		inode->i_mapping->a_ops = &ram_aops;
 		inode->i_op = &configfs_inode_operations;
 
 		if (sd->s_iattr) {
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index d73950fc3d57..26f2aa3586f9 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -17,12 +17,6 @@
 
 #include "kernfs-internal.h"
 
-static const struct address_space_operations kernfs_aops = {
-	.readpage	= simple_readpage,
-	.write_begin	= simple_write_begin,
-	.write_end	= simple_write_end,
-};
-
 static const struct inode_operations kernfs_iops = {
 	.permission	= kernfs_iop_permission,
 	.setattr	= kernfs_iop_setattr,
@@ -203,7 +197,7 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
 {
 	kernfs_get(kn);
 	inode->i_private = kn;
-	inode->i_mapping->a_ops = &kernfs_aops;
+	inode->i_mapping->a_ops = &ram_aops;
 	inode->i_op = &kernfs_iops;
 	inode->i_generation = kernfs_gen(kn);
 
diff --git a/fs/libfs.c b/fs/libfs.c
index e9b29c6ffccb..2d7f086b93d6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -512,7 +512,7 @@ int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 }
 EXPORT_SYMBOL(simple_setattr);
 
-int simple_readpage(struct file *file, struct page *page)
+static int simple_readpage(struct file *file, struct page *page)
 {
 	clear_highpage(page);
 	flush_dcache_page(page);
@@ -520,7 +520,6 @@ int simple_readpage(struct file *file, struct page *page)
 	unlock_page(page);
 	return 0;
 }
-EXPORT_SYMBOL(simple_readpage);
 
 int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
@@ -568,7 +567,7 @@ EXPORT_SYMBOL(simple_write_begin);
  *
  * Use *ONLY* with simple_readpage()
  */
-int simple_write_end(struct file *file, struct address_space *mapping,
+static int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
 {
@@ -597,7 +596,17 @@ int simple_write_end(struct file *file, struct address_space *mapping,
 
 	return copied;
 }
-EXPORT_SYMBOL(simple_write_end);
+
+/*
+ * Provides ramfs-style behavior: data in the pagecache, but no writeback.
+ */
+const struct address_space_operations ram_aops = {
+	.readpage	= simple_readpage,
+	.write_begin	= simple_write_begin,
+	.write_end	= simple_write_end,
+	.set_page_dirty	= __set_page_dirty_no_writeback,
+};
+EXPORT_SYMBOL(ram_aops);
 
 /*
  * the inodes created here are not hashed. If you use iunique to generate
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 9ebd17d7befb..65e7e56005b8 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -53,13 +53,6 @@ struct ramfs_fs_info {
 static const struct super_operations ramfs_ops;
 static const struct inode_operations ramfs_dir_inode_operations;
 
-static const struct address_space_operations ramfs_aops = {
-	.readpage	= simple_readpage,
-	.write_begin	= simple_write_begin,
-	.write_end	= simple_write_end,
-	.set_page_dirty	= __set_page_dirty_no_writeback,
-};
-
 struct inode *ramfs_get_inode(struct super_block *sb,
 				const struct inode *dir, umode_t mode, dev_t dev)
 {
@@ -68,7 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
 	if (inode) {
 		inode->i_ino = get_next_ino();
 		inode_init_owner(&init_user_ns, inode, dir, mode);
-		inode->i_mapping->a_ops = &ramfs_aops;
+		inode->i_mapping->a_ops = &ram_aops;
 		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
 		mapping_set_unevictable(inode->i_mapping);
 		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..869909345420 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3422,13 +3422,10 @@ extern void noop_invalidatepage(struct page *page, unsigned int offset,
 		unsigned int length);
 extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
 extern int simple_empty(struct dentry *);
-extern int simple_readpage(struct file *file, struct page *page);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata);
-extern int simple_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata);
+extern const struct address_space_operations ram_aops;
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
 extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);

From 0af573780b0b13fceb7fabd49dc1b073cee9a507 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Jun 2021 19:36:12 -0700
Subject: [PATCH 3126/3292] mm: require ->set_page_dirty to be explicitly wired
 up

Remove the CONFIG_BLOCK default to __set_page_dirty_buffers and just wire
that method up for the missing instances.

[hch@lst.de: ecryptfs: add a ->set_page_dirty cludge]
  Link: https://lkml.kernel.org/r/20210624125250.536369-1-hch@lst.de

Link: https://lkml.kernel.org/r/20210614061512.3966143-4-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Tyler Hicks <code@tyhicks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/adfs/inode.c     |  1 +
 fs/affs/file.c      |  2 ++
 fs/bfs/file.c       |  1 +
 fs/block_dev.c      |  1 +
 fs/ecryptfs/mmap.c  | 13 +++++++++++++
 fs/exfat/inode.c    |  1 +
 fs/ext2/inode.c     |  2 ++
 fs/fat/inode.c      |  1 +
 fs/gfs2/meta_io.c   |  2 ++
 fs/hfs/inode.c      |  2 ++
 fs/hfsplus/inode.c  |  2 ++
 fs/hpfs/file.c      |  1 +
 fs/jfs/inode.c      |  1 +
 fs/minix/inode.c    |  1 +
 fs/nilfs2/mdt.c     |  1 +
 fs/ocfs2/aops.c     |  1 +
 fs/omfs/file.c      |  1 +
 fs/sysv/itree.c     |  1 +
 fs/udf/file.c       |  1 +
 fs/udf/inode.c      |  1 +
 fs/ufs/inode.c      |  1 +
 mm/page-writeback.c | 18 ++++--------------
 22 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index fb7ee026d101..adbb3a1edcbf 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -73,6 +73,7 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
 }
 
 static const struct address_space_operations adfs_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= adfs_readpage,
 	.writepage	= adfs_writepage,
 	.write_begin	= adfs_write_begin,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d91b0133d95d..75ebd2b576ca 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -453,6 +453,7 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
 }
 
 const struct address_space_operations affs_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage = affs_readpage,
 	.writepage = affs_writepage,
 	.write_begin = affs_write_begin,
@@ -833,6 +834,7 @@ err_bh:
 }
 
 const struct address_space_operations affs_aops_ofs = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage = affs_readpage_ofs,
 	//.writepage = affs_writepage_ofs,
 	.write_begin = affs_write_begin_ofs,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 0dceefc54b48..7f8544abf636 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -188,6 +188,7 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
 }
 
 const struct address_space_operations bfs_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= bfs_readpage,
 	.writepage	= bfs_writepage,
 	.write_begin	= bfs_write_begin,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6cc4d4cfe0c2..eb34f5c357cf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1754,6 +1754,7 @@ static int blkdev_writepages(struct address_space *mapping,
 }
 
 static const struct address_space_operations def_blk_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= blkdev_readpage,
 	.readahead	= blkdev_readahead,
 	.writepage	= blkdev_writepage,
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 392e721b50a3..7d85e64ea62f 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -533,7 +533,20 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
 	return block;
 }
 
+#include <linux/buffer_head.h>
+
 const struct address_space_operations ecryptfs_aops = {
+	/*
+	 * XXX: This is pretty broken for multiple reasons: ecryptfs does not
+	 * actually use buffer_heads, and ecryptfs will crash without
+	 * CONFIG_BLOCK.  But it matches the behavior before the default for
+	 * address_space_operations without the ->set_page_dirty method was
+	 * cleaned up, so this is the best we can do without maintainer
+	 * feedback.
+	 */
+#ifdef CONFIG_BLOCK
+	.set_page_dirty = __set_page_dirty_buffers,
+#endif
 	.writepage = ecryptfs_writepage,
 	.readpage = ecryptfs_readpage,
 	.write_begin = ecryptfs_write_begin,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1803ef3220fd..ca37d4344361 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -491,6 +491,7 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from)
 }
 
 static const struct address_space_operations exfat_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= exfat_readpage,
 	.readahead	= exfat_readahead,
 	.writepage	= exfat_writepage,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 68178b2234bd..bf41f579ed3e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -961,6 +961,7 @@ ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc
 }
 
 const struct address_space_operations ext2_aops = {
+	.set_page_dirty		= __set_page_dirty_buffers,
 	.readpage		= ext2_readpage,
 	.readahead		= ext2_readahead,
 	.writepage		= ext2_writepage,
@@ -975,6 +976,7 @@ const struct address_space_operations ext2_aops = {
 };
 
 const struct address_space_operations ext2_nobh_aops = {
+	.set_page_dirty		= __set_page_dirty_buffers,
 	.readpage		= ext2_readpage,
 	.readahead		= ext2_readahead,
 	.writepage		= ext2_nobh_writepage,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bab9b202b496..de0c9b013a85 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -342,6 +342,7 @@ int fat_block_truncate_page(struct inode *inode, loff_t from)
 }
 
 static const struct address_space_operations fat_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= fat_readpage,
 	.readahead	= fat_readahead,
 	.writepage	= fat_writepage,
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index d68184ebbfdd..7c9619997355 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -89,11 +89,13 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 }
 
 const struct address_space_operations gfs2_meta_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.writepage = gfs2_aspace_writepage,
 	.releasepage = gfs2_releasepage,
 };
 
 const struct address_space_operations gfs2_rgrp_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.writepage = gfs2_aspace_writepage,
 	.releasepage = gfs2_releasepage,
 };
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 3fc5cb346586..4a95a92546a0 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -159,6 +159,7 @@ static int hfs_writepages(struct address_space *mapping,
 }
 
 const struct address_space_operations hfs_btree_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
 	.write_begin	= hfs_write_begin,
@@ -168,6 +169,7 @@ const struct address_space_operations hfs_btree_aops = {
 };
 
 const struct address_space_operations hfs_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
 	.write_begin	= hfs_write_begin,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 8ea447e5c470..70e8374ddac4 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -156,6 +156,7 @@ static int hfsplus_writepages(struct address_space *mapping,
 }
 
 const struct address_space_operations hfsplus_btree_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
 	.write_begin	= hfsplus_write_begin,
@@ -165,6 +166,7 @@ const struct address_space_operations hfsplus_btree_aops = {
 };
 
 const struct address_space_operations hfsplus_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
 	.write_begin	= hfsplus_write_begin,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 077c25128eb7..c3a49aacf20a 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -196,6 +196,7 @@ static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 }
 
 const struct address_space_operations hpfs_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage = hpfs_readpage,
 	.writepage = hpfs_writepage,
 	.readahead = hpfs_readahead,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6f65bfa9f18d..3663dd5a23bc 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -356,6 +356,7 @@ static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 }
 
 const struct address_space_operations jfs_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= jfs_readpage,
 	.readahead	= jfs_readahead,
 	.writepage	= jfs_writepage,
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index a532a99bbe81..a71f1cf894b9 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -442,6 +442,7 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
 }
 
 static const struct address_space_operations minix_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage = minix_readpage,
 	.writepage = minix_writepage,
 	.write_begin = minix_write_begin,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index c0361ce45f62..97769fe4d588 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -434,6 +434,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
 
 
 static const struct address_space_operations def_mdt_aops = {
+	.set_page_dirty		= __set_page_dirty_buffers,
 	.writepage		= nilfs_mdt_write_page,
 };
 
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e1c6fa5bd0e7..68d11c295dd3 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2453,6 +2453,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 }
 
 const struct address_space_operations ocfs2_aops = {
+	.set_page_dirty		= __set_page_dirty_buffers,
 	.readpage		= ocfs2_readpage,
 	.readahead		= ocfs2_readahead,
 	.writepage		= ocfs2_writepage,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 11e733aab25d..89725b15a64b 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -372,6 +372,7 @@ const struct inode_operations omfs_file_inops = {
 };
 
 const struct address_space_operations omfs_aops = {
+	.set_page_dirty = __set_page_dirty_buffers,
 	.readpage = omfs_readpage,
 	.readahead = omfs_readahead,
 	.writepage = omfs_writepage,
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 8b2e99b7bc9f..749385015a8d 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -495,6 +495,7 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
 }
 
 const struct address_space_operations sysv_aops = {
+	.set_page_dirty = __set_page_dirty_buffers,
 	.readpage = sysv_readpage,
 	.writepage = sysv_writepage,
 	.write_begin = sysv_write_begin,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2846dcd92197..1baff8ddb754 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -125,6 +125,7 @@ static int udf_adinicb_write_end(struct file *file, struct address_space *mappin
 }
 
 const struct address_space_operations udf_adinicb_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= udf_adinicb_readpage,
 	.writepage	= udf_adinicb_writepage,
 	.write_begin	= udf_adinicb_write_begin,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 0dd2f93ac048..4917670860a0 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -235,6 +235,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
 }
 
 const struct address_space_operations udf_aops = {
+	.set_page_dirty	= __set_page_dirty_buffers,
 	.readpage	= udf_readpage,
 	.readahead	= udf_readahead,
 	.writepage	= udf_writepage,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index debc282c1bb4..ac628de69601 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -526,6 +526,7 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
 }
 
 const struct address_space_operations ufs_aops = {
+	.set_page_dirty = __set_page_dirty_buffers,
 	.readpage = ufs_readpage,
 	.writepage = ufs_writepage,
 	.write_begin = ufs_write_begin,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c2a849d653a9..21f4b5972311 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -32,7 +32,6 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
-#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
 #include <linux/pagevec.h>
 #include <linux/timer.h>
 #include <linux/sched/rt.h>
@@ -2555,13 +2554,9 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
 /*
  * Dirty a page.
  *
- * For pages with a mapping this should be done under the page lock
- * for the benefit of asynchronous memory errors who prefer a consistent
- * dirty state. This rule can be broken in some special cases,
- * but should be better not to.
- *
- * If the mapping doesn't provide a set_page_dirty a_op, then
- * just fall through and assume that it wants buffer_heads.
+ * For pages with a mapping this should be done under the page lock for the
+ * benefit of asynchronous memory errors who prefer a consistent dirty state.
+ * This rule can be broken in some special cases, but should be better not to.
  */
 int set_page_dirty(struct page *page)
 {
@@ -2569,7 +2564,6 @@ int set_page_dirty(struct page *page)
 
 	page = compound_head(page);
 	if (likely(mapping)) {
-		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
 		/*
 		 * readahead/lru_deactivate_page could remain
 		 * PG_readahead/PG_reclaim due to race with end_page_writeback
@@ -2582,11 +2576,7 @@ int set_page_dirty(struct page *page)
 		 */
 		if (PageReclaim(page))
 			ClearPageReclaim(page);
-#ifdef CONFIG_BLOCK
-		if (!spd)
-			spd = __set_page_dirty_buffers;
-#endif
-		return (*spd)(page);
+		return mapping->a_ops->set_page_dirty(page);
 	}
 	if (!PageDirty(page)) {
 		if (!TestSetPageDirty(page))

From 6e1cae881a0646f31fe2bda90297d820da1137eb Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:36:15 -0700
Subject: [PATCH 3127/3292] mm/writeback: move __set_page_dirty() to core mm

Patch series "Further set_page_dirty cleanups".

Prompted by Christoph's recent patches, here are some more patches to
improve the state of set_page_dirty().  They're all from the folio tree,
so they've been tested to a certain extent.

This patch (of 6):

Nothing in __set_page_dirty() is specific to buffer_head, so move it to
mm/page-writeback.c.  That removes the only caller of
account_page_dirtied() outside of page-writeback.c, so make it static.

Link: https://lkml.kernel.org/r/20210615162342.1669332-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20210615162342.1669332-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c         | 24 ------------------------
 include/linux/mm.h  |  1 -
 mm/page-writeback.c | 27 ++++++++++++++++++++++++++-
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 3d18831c7ad8..6290c3afdba4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -588,30 +588,6 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 }
 EXPORT_SYMBOL(mark_buffer_dirty_inode);
 
-/*
- * Mark the page dirty, and set it dirty in the page cache, and mark the inode
- * dirty.
- *
- * If warn is true, then emit a warning if the page is not uptodate and has
- * not been truncated.
- *
- * The caller must hold lock_page_memcg().
- */
-void __set_page_dirty(struct page *page, struct address_space *mapping,
-			     int warn)
-{
-	unsigned long flags;
-
-	xa_lock_irqsave(&mapping->i_pages, flags);
-	if (page->mapping) {	/* Race with truncate? */
-		WARN_ON_ONCE(warn && !PageUptodate(page));
-		account_page_dirtied(page, mapping);
-		__xa_set_mark(&mapping->i_pages, page_index(page),
-				PAGECACHE_TAG_DIRTY);
-	}
-	xa_unlock_irqrestore(&mapping->i_pages, flags);
-}
-
 /*
  * Add a page to the dirty page list.
  *
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9afb8998e7e5..12589b811555 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1855,7 +1855,6 @@ int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
-void account_page_dirtied(struct page *page, struct address_space *mapping);
 void account_page_cleaned(struct page *page, struct address_space *mapping,
 			  struct bdi_writeback *wb);
 int set_page_dirty(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 21f4b5972311..1345882c428b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2425,7 +2425,8 @@ int __set_page_dirty_no_writeback(struct page *page)
  *
  * NOTE: This relies on being atomic wrt interrupts.
  */
-void account_page_dirtied(struct page *page, struct address_space *mapping)
+static void account_page_dirtied(struct page *page,
+		struct address_space *mapping)
 {
 	struct inode *inode = mapping->host;
 
@@ -2466,6 +2467,30 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 	}
 }
 
+/*
+ * Mark the page dirty, and set it dirty in the page cache, and mark the inode
+ * dirty.
+ *
+ * If warn is true, then emit a warning if the page is not uptodate and has
+ * not been truncated.
+ *
+ * The caller must hold lock_page_memcg().
+ */
+void __set_page_dirty(struct page *page, struct address_space *mapping,
+			     int warn)
+{
+	unsigned long flags;
+
+	xa_lock_irqsave(&mapping->i_pages, flags);
+	if (page->mapping) {	/* Race with truncate? */
+		WARN_ON_ONCE(warn && !PageUptodate(page));
+		account_page_dirtied(page, mapping);
+		__xa_set_mark(&mapping->i_pages, page_index(page),
+				PAGECACHE_TAG_DIRTY);
+	}
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
+}
+
 /*
  * For address_spaces which do not use buffers.  Just tag the page as dirty in
  * the xarray.

From 2f18be363c3332dedaabb9fc050a282a00f4f646 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:36:18 -0700
Subject: [PATCH 3128/3292] mm/writeback: use __set_page_dirty in
 __set_page_dirty_nobuffers

This is fundamentally the same code, so just call it instead of
duplicating it.

Link: https://lkml.kernel.org/r/20210615162342.1669332-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page-writeback.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1345882c428b..8bd69dc5379a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2508,20 +2508,12 @@ int __set_page_dirty_nobuffers(struct page *page)
 	lock_page_memcg(page);
 	if (!TestSetPageDirty(page)) {
 		struct address_space *mapping = page_mapping(page);
-		unsigned long flags;
 
 		if (!mapping) {
 			unlock_page_memcg(page);
 			return 1;
 		}
-
-		xa_lock_irqsave(&mapping->i_pages, flags);
-		BUG_ON(page_mapping(page) != mapping);
-		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
-		account_page_dirtied(page, mapping);
-		__xa_set_mark(&mapping->i_pages, page_index(page),
-				   PAGECACHE_TAG_DIRTY);
-		xa_unlock_irqrestore(&mapping->i_pages, flags);
+		__set_page_dirty(page, mapping, !PagePrivate(page));
 		unlock_page_memcg(page);
 
 		if (mapping->host) {

From fd7353f88bde80d557b6d74a5351979fc8b1b8db Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:36:21 -0700
Subject: [PATCH 3129/3292] iomap: use __set_page_dirty_nobuffers

The only difference between iomap_set_page_dirty() and
__set_page_dirty_nobuffers() is that the latter includes a debugging check
that a !Uptodate page has private data.

Link: https://lkml.kernel.org/r/20210615162342.1669332-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/gfs2/aops.c         |  2 +-
 fs/iomap/buffered-io.c | 27 +--------------------------
 fs/xfs/xfs_aops.c      |  2 +-
 fs/zonefs/super.c      |  2 +-
 include/linux/iomap.h  |  1 -
 5 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 23b5be3db044..81d8f064126e 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -784,7 +784,7 @@ static const struct address_space_operations gfs2_aops = {
 	.writepages = gfs2_writepages,
 	.readpage = gfs2_readpage,
 	.readahead = gfs2_readahead,
-	.set_page_dirty = iomap_set_page_dirty,
+	.set_page_dirty = __set_page_dirty_nobuffers,
 	.releasepage = iomap_releasepage,
 	.invalidatepage = iomap_invalidatepage,
 	.bmap = gfs2_bmap,
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 9023717c5188..0065781935c7 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -640,31 +640,6 @@ out_no_page:
 	return status;
 }
 
-int
-iomap_set_page_dirty(struct page *page)
-{
-	struct address_space *mapping = page_mapping(page);
-	int newly_dirty;
-
-	if (unlikely(!mapping))
-		return !TestSetPageDirty(page);
-
-	/*
-	 * Lock out page's memcg migration to keep PageDirty
-	 * synchronized with per-memcg dirty page counters.
-	 */
-	lock_page_memcg(page);
-	newly_dirty = !TestSetPageDirty(page);
-	if (newly_dirty)
-		__set_page_dirty(page, mapping, 0);
-	unlock_page_memcg(page);
-
-	if (newly_dirty)
-		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return newly_dirty;
-}
-EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
-
 static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 		size_t copied, struct page *page)
 {
@@ -684,7 +659,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 	if (unlikely(copied < len && !PageUptodate(page)))
 		return 0;
 	iomap_set_range_uptodate(page, offset_in_page(pos), len);
-	iomap_set_page_dirty(page);
+	__set_page_dirty_nobuffers(page);
 	return copied;
 }
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 826caa6b4a5a..a335d79dcff8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -561,7 +561,7 @@ const struct address_space_operations xfs_address_space_operations = {
 	.readahead		= xfs_vm_readahead,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
-	.set_page_dirty		= iomap_set_page_dirty,
+	.set_page_dirty		= __set_page_dirty_nobuffers,
 	.releasepage		= iomap_releasepage,
 	.invalidatepage		= iomap_invalidatepage,
 	.bmap			= xfs_vm_bmap,
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index cd145d318b17..3aacf016c7c2 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -185,7 +185,7 @@ static const struct address_space_operations zonefs_file_aops = {
 	.readahead		= zonefs_readahead,
 	.writepage		= zonefs_writepage,
 	.writepages		= zonefs_writepages,
-	.set_page_dirty		= iomap_set_page_dirty,
+	.set_page_dirty		= __set_page_dirty_nobuffers,
 	.releasepage		= iomap_releasepage,
 	.invalidatepage		= iomap_invalidatepage,
 	.migratepage		= iomap_migrate_page,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index c87d0cb0de6d..479c1da3e221 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -159,7 +159,6 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
 void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
-int iomap_set_page_dirty(struct page *page);
 int iomap_is_partially_uptodate(struct page *page, unsigned long from,
 		unsigned long count);
 int iomap_releasepage(struct page *page, gfp_t gfp_mask);

From fc50eee3291556d623b64bb4b1dc345b971e184e Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:36:24 -0700
Subject: [PATCH 3130/3292] fs: remove anon_set_page_dirty()

Use __set_page_dirty_no_writeback() instead.  This will set the dirty bit
on the page, which will be used to avoid calling set_page_dirty() in the
future.  It will have no effect on actually writing the page back, as the
pages are not on any LRU lists.

Link: https://lkml.kernel.org/r/20210615162342.1669332-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/libfs.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 2d7f086b93d6..3fdd89b156d6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1217,19 +1217,10 @@ void kfree_link(void *p)
 }
 EXPORT_SYMBOL(kfree_link);
 
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
-	return 0;
-};
-
 struct inode *alloc_anon_inode(struct super_block *s)
 {
 	static const struct address_space_operations anon_aops = {
-		.set_page_dirty = anon_set_page_dirty,
+		.set_page_dirty = __set_page_dirty_no_writeback,
 	};
 	struct inode *inode = new_inode_pseudo(s);
 

From b82a96c9253333a8834b2df5f262a39cccf4f6c7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:36:27 -0700
Subject: [PATCH 3131/3292] fs: remove noop_set_page_dirty()

Use __set_page_dirty_no_writeback() instead.  This will set the dirty bit
on the page, which will be used to avoid calling set_page_dirty() in the
future.  It will have no effect on actually writing the page back, as the
pages are not on any LRU lists.

[akpm@linux-foundation.org: export __set_page_dirty_no_writeback() to modules]

Link: https://lkml.kernel.org/r/20210615162342.1669332-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/dax/device.c |  2 +-
 fs/ext2/inode.c      |  2 +-
 fs/ext4/inode.c      |  2 +-
 fs/fuse/dax.c        |  2 +-
 fs/libfs.c           | 16 ----------------
 fs/xfs/xfs_aops.c    |  2 +-
 include/linux/fs.h   |  1 -
 mm/page-writeback.c  |  1 +
 8 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index db92573c94e8..dd8222a42808 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -337,7 +337,7 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
 }
 
 static const struct address_space_operations dev_dax_aops = {
-	.set_page_dirty		= noop_set_page_dirty,
+	.set_page_dirty		= __set_page_dirty_no_writeback,
 	.invalidatepage		= noop_invalidatepage,
 };
 
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index bf41f579ed3e..dadb121beb22 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -992,7 +992,7 @@ const struct address_space_operations ext2_nobh_aops = {
 static const struct address_space_operations ext2_dax_aops = {
 	.writepages		= ext2_dax_writepages,
 	.direct_IO		= noop_direct_IO,
-	.set_page_dirty		= noop_set_page_dirty,
+	.set_page_dirty		= __set_page_dirty_no_writeback,
 	.invalidatepage		= noop_invalidatepage,
 };
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fe6045a46599..b8170a008590 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3701,7 +3701,7 @@ static const struct address_space_operations ext4_da_aops = {
 static const struct address_space_operations ext4_dax_aops = {
 	.writepages		= ext4_dax_writepages,
 	.direct_IO		= noop_direct_IO,
-	.set_page_dirty		= noop_set_page_dirty,
+	.set_page_dirty		= __set_page_dirty_no_writeback,
 	.bmap			= ext4_bmap,
 	.invalidatepage		= noop_invalidatepage,
 	.swap_activate		= ext4_iomap_swap_activate,
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index ff99ab2a3c43..515ad0895345 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1329,7 +1329,7 @@ bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
 static const struct address_space_operations fuse_dax_file_aops  = {
 	.writepages	= fuse_dax_writepages,
 	.direct_IO	= noop_direct_IO,
-	.set_page_dirty	= noop_set_page_dirty,
+	.set_page_dirty	= __set_page_dirty_no_writeback,
 	.invalidatepage	= noop_invalidatepage,
 };
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 3fdd89b156d6..51b4de3b3447 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1171,22 +1171,6 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(noop_fsync);
 
-int noop_set_page_dirty(struct page *page)
-{
-	/*
-	 * Unlike __set_page_dirty_no_writeback that handles dirty page
-	 * tracking in the page object, dax does all dirty tracking in
-	 * the inode address_space in response to mkwrite faults. In the
-	 * dax case we only need to worry about potentially dirty CPU
-	 * caches, not dirty page cache pages to write back.
-	 *
-	 * This callback is defined to prevent fallback to
-	 * __set_page_dirty_buffers() in set_page_dirty().
-	 */
-	return 0;
-}
-EXPORT_SYMBOL_GPL(noop_set_page_dirty);
-
 void noop_invalidatepage(struct page *page, unsigned int offset,
 		unsigned int length)
 {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a335d79dcff8..cb4e0fcf4c76 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -575,7 +575,7 @@ const struct address_space_operations xfs_address_space_operations = {
 const struct address_space_operations xfs_dax_aops = {
 	.writepages		= xfs_dax_writepages,
 	.direct_IO		= noop_direct_IO,
-	.set_page_dirty		= noop_set_page_dirty,
+	.set_page_dirty		= __set_page_dirty_no_writeback,
 	.invalidatepage		= noop_invalidatepage,
 	.swap_activate		= xfs_iomap_swapfile_activate,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 869909345420..fad6663cd1b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3417,7 +3417,6 @@ extern int simple_rename(struct user_namespace *, struct inode *,
 extern void simple_recursive_removal(struct dentry *,
                               void (*callback)(struct dentry *));
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
-extern int noop_set_page_dirty(struct page *page);
 extern void noop_invalidatepage(struct page *page, unsigned int offset,
 		unsigned int length);
 extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8bd69dc5379a..e5b38ffe9fca 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2417,6 +2417,7 @@ int __set_page_dirty_no_writeback(struct page *page)
 		return !TestSetPageDirty(page);
 	return 0;
 }
+EXPORT_SYMBOL(__set_page_dirty_no_writeback);
 
 /*
  * Helper function for set_page_dirty family.

From 3a6b2162005f24c7caa10d7f10dba487629787f2 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:36:30 -0700
Subject: [PATCH 3132/3292] mm: move page dirtying prototypes from mm.h

These functions implement the address_space ->set_page_dirty operation and
should live in pagemap.h, not mm.h so that the rest of the kernel doesn't
get funny ideas about calling them directly.

Link: https://lkml.kernel.org/r/20210615162342.1669332-7-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dax.c           | 1 +
 fs/zonefs/super.c       | 2 +-
 include/linux/mm.h      | 3 ---
 include/linux/pagemap.h | 4 ++++
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 515ad0895345..fb733eb5aead 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/dax.h>
 #include <linux/uio.h>
+#include <linux/pagemap.h>
 #include <linux/pfn_t.h>
 #include <linux/iomap.h>
 #include <linux/interval_tree.h>
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 3aacf016c7c2..dbf03635869c 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -5,7 +5,7 @@
  * Copyright (C) 2019 Western Digital Corporation or its affiliates.
  */
 #include <linux/module.h>
-#include <linux/fs.h>
+#include <linux/pagemap.h>
 #include <linux/magic.h>
 #include <linux/iomap.h>
 #include <linux/init.h>
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 12589b811555..e39ed497578b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1850,9 +1850,6 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned int offset,
 			      unsigned int length);
 
-void __set_page_dirty(struct page *, struct address_space *, int warn);
-int __set_page_dirty_nobuffers(struct page *page);
-int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
 void account_page_cleaned(struct page *page, struct address_space *mapping,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0f1b34dbf3a2..ed02aa522263 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -702,6 +702,10 @@ int wait_on_page_writeback_killable(struct page *page);
 extern void end_page_writeback(struct page *page);
 void wait_for_stable_page(struct page *page);
 
+void __set_page_dirty(struct page *, struct address_space *, int warn);
+int __set_page_dirty_nobuffers(struct page *page);
+int __set_page_dirty_no_writeback(struct page *page);
+
 void page_endio(struct page *page, bool is_write, int err);
 
 /**

From f39bd8534594535f6fd968ee7e05d6a70b74d1a9 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 28 Jun 2021 19:36:33 -0700
Subject: [PATCH 3133/3292] mm/gup_benchmark: support threading

Patch series "mm/gup: Fix pin page write cache bouncing on has_pinned", v2.

This series contains 3 patches, the 1st one enables threading for
gup_benchmark in the kselftest.  The latter two patches are collected from
Andrea's local branch which can fix write cache bouncing issue with
pinning fast-gup.

To be explicit on the latter two patches:

  - the 2nd patch fixes the perf degrade when introducing has_pinned, then

  - the last patch tries to remove the has_pinned with a bit in mm->flags

For patch 3: originally I think we had a plan to reuse has_pinned into a
counter very soon, however that's not happening at least until today, so
maybe it proves that we can remove it until we really want such a counter
for whatever reason.  As the commit message stated, it saves 4 bytes for
each mm without observable regressions.

Regarding testing: we can reference to the commit message of patch 2 for
some detailed testing with will-is-scale.  Meanwhile I did patch 1 just
because then we can even easily verify the patchset using the existing
kselftest facilities or even regress test it in the future with the repo
if we want.

Below numbers are extra verification tests that I did besides commit
message of patch 2 using the new gup_benchmark and 256 cpus.  Below test
is done on 40 cpus host with Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz,
and I can get similar result (of course the write cache bouncing get
severe with even more cores).

After patch 1 applied (only test patch, so using old kernel):

  $ sudo chrt -f 1 ./gup_test -a  -m 512 -j 40
  PIN_FAST_BENCHMARK: Time: get:459632 put:5990 us
  PIN_FAST_BENCHMARK: Time: get:461967 put:5840 us
  PIN_FAST_BENCHMARK: Time: get:464521 put:6140 us
  PIN_FAST_BENCHMARK: Time: get:465176 put:7100 us
  PIN_FAST_BENCHMARK: Time: get:465960 put:6733 us
  PIN_FAST_BENCHMARK: Time: get:465324 put:6781 us
  PIN_FAST_BENCHMARK: Time: get:466018 put:7130 us
  PIN_FAST_BENCHMARK: Time: get:466362 put:7118 us
  PIN_FAST_BENCHMARK: Time: get:465118 put:6975 us
  PIN_FAST_BENCHMARK: Time: get:466422 put:6602 us
  PIN_FAST_BENCHMARK: Time: get:465791 put:6818 us
  PIN_FAST_BENCHMARK: Time: get:467091 put:6298 us
  PIN_FAST_BENCHMARK: Time: get:467694 put:5432 us
  PIN_FAST_BENCHMARK: Time: get:469575 put:5581 us
  PIN_FAST_BENCHMARK: Time: get:468124 put:6055 us
  PIN_FAST_BENCHMARK: Time: get:468877 put:6720 us
  PIN_FAST_BENCHMARK: Time: get:467212 put:4961 us
  PIN_FAST_BENCHMARK: Time: get:467834 put:6697 us
  PIN_FAST_BENCHMARK: Time: get:470778 put:6398 us
  PIN_FAST_BENCHMARK: Time: get:469788 put:6310 us
  PIN_FAST_BENCHMARK: Time: get:488277 put:7113 us
  PIN_FAST_BENCHMARK: Time: get:486613 put:7085 us
  PIN_FAST_BENCHMARK: Time: get:486940 put:7202 us
  PIN_FAST_BENCHMARK: Time: get:488728 put:7101 us
  PIN_FAST_BENCHMARK: Time: get:487570 put:7327 us
  PIN_FAST_BENCHMARK: Time: get:489260 put:7027 us
  PIN_FAST_BENCHMARK: Time: get:488846 put:6866 us
  PIN_FAST_BENCHMARK: Time: get:488521 put:6745 us
  PIN_FAST_BENCHMARK: Time: get:489950 put:6459 us
  PIN_FAST_BENCHMARK: Time: get:489777 put:6617 us
  PIN_FAST_BENCHMARK: Time: get:488224 put:6591 us
  PIN_FAST_BENCHMARK: Time: get:488644 put:6477 us
  PIN_FAST_BENCHMARK: Time: get:488754 put:6711 us
  PIN_FAST_BENCHMARK: Time: get:488875 put:6743 us
  PIN_FAST_BENCHMARK: Time: get:489290 put:6657 us
  PIN_FAST_BENCHMARK: Time: get:490264 put:6684 us
  PIN_FAST_BENCHMARK: Time: get:489631 put:6737 us
  PIN_FAST_BENCHMARK: Time: get:488434 put:6655 us
  PIN_FAST_BENCHMARK: Time: get:492213 put:6297 us
  PIN_FAST_BENCHMARK: Time: get:491124 put:6173 us

After the whole series applied (new fixed kernel):

  $ sudo chrt -f 1 ./gup_test -a  -m 512 -j 40
  PIN_FAST_BENCHMARK: Time: get:82038 put:7041 us
  PIN_FAST_BENCHMARK: Time: get:82144 put:6817 us
  PIN_FAST_BENCHMARK: Time: get:83417 put:6674 us
  PIN_FAST_BENCHMARK: Time: get:82540 put:6594 us
  PIN_FAST_BENCHMARK: Time: get:83214 put:6681 us
  PIN_FAST_BENCHMARK: Time: get:83444 put:6889 us
  PIN_FAST_BENCHMARK: Time: get:83194 put:7499 us
  PIN_FAST_BENCHMARK: Time: get:84876 put:7369 us
  PIN_FAST_BENCHMARK: Time: get:86092 put:10289 us
  PIN_FAST_BENCHMARK: Time: get:86153 put:10415 us
  PIN_FAST_BENCHMARK: Time: get:85026 put:7751 us
  PIN_FAST_BENCHMARK: Time: get:85458 put:7944 us
  PIN_FAST_BENCHMARK: Time: get:85735 put:8154 us
  PIN_FAST_BENCHMARK: Time: get:85851 put:8299 us
  PIN_FAST_BENCHMARK: Time: get:86323 put:9617 us
  PIN_FAST_BENCHMARK: Time: get:86288 put:10496 us
  PIN_FAST_BENCHMARK: Time: get:87697 put:9346 us
  PIN_FAST_BENCHMARK: Time: get:87980 put:8382 us
  PIN_FAST_BENCHMARK: Time: get:88719 put:8400 us
  PIN_FAST_BENCHMARK: Time: get:87616 put:8588 us
  PIN_FAST_BENCHMARK: Time: get:86730 put:9563 us
  PIN_FAST_BENCHMARK: Time: get:88167 put:8673 us
  PIN_FAST_BENCHMARK: Time: get:86844 put:9777 us
  PIN_FAST_BENCHMARK: Time: get:88068 put:11774 us
  PIN_FAST_BENCHMARK: Time: get:86170 put:15676 us
  PIN_FAST_BENCHMARK: Time: get:87967 put:12827 us
  PIN_FAST_BENCHMARK: Time: get:95773 put:7652 us
  PIN_FAST_BENCHMARK: Time: get:87734 put:13650 us
  PIN_FAST_BENCHMARK: Time: get:89833 put:14237 us
  PIN_FAST_BENCHMARK: Time: get:96186 put:8029 us
  PIN_FAST_BENCHMARK: Time: get:95532 put:8886 us
  PIN_FAST_BENCHMARK: Time: get:95351 put:5826 us
  PIN_FAST_BENCHMARK: Time: get:96401 put:8407 us
  PIN_FAST_BENCHMARK: Time: get:96473 put:8287 us
  PIN_FAST_BENCHMARK: Time: get:97177 put:8430 us
  PIN_FAST_BENCHMARK: Time: get:98120 put:5263 us
  PIN_FAST_BENCHMARK: Time: get:96271 put:7757 us
  PIN_FAST_BENCHMARK: Time: get:99628 put:10467 us
  PIN_FAST_BENCHMARK: Time: get:99344 put:10045 us
  PIN_FAST_BENCHMARK: Time: get:94212 put:15485 us

Summary:

  Old kernel: 477729.97 (+-3.79%)
  New kernel:  89144.65 (+-11.76%)

This patch (of 3):

Add a new parameter "-j N" to support concurrent gup test.

Link: https://lkml.kernel.org/r/20210507150553.208763-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20210507150553.208763-2-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Kirill Shutemov <kirill@shutemov.name>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/gup_test.c | 96 ++++++++++++++++++---------
 1 file changed, 65 insertions(+), 31 deletions(-)

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 1e662d59c502..fe043f67798b 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -6,6 +6,8 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <pthread.h>
+#include <assert.h>
 #include "../../../../mm/gup_test.h"
 
 #define MB (1UL << 20)
@@ -15,6 +17,12 @@
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 
+static unsigned long cmd = GUP_FAST_BENCHMARK;
+static int gup_fd, repeats = 1;
+static unsigned long size = 128 * MB;
+/* Serialize prints */
+static pthread_mutex_t print_mutex = PTHREAD_MUTEX_INITIALIZER;
+
 static char *cmd_to_str(unsigned long cmd)
 {
 	switch (cmd) {
@@ -34,17 +42,55 @@ static char *cmd_to_str(unsigned long cmd)
 	return "Unknown command";
 }
 
+void *gup_thread(void *data)
+{
+	struct gup_test gup = *(struct gup_test *)data;
+	int i;
+
+	/* Only report timing information on the *_BENCHMARK commands: */
+	if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+	     (cmd == PIN_LONGTERM_BENCHMARK)) {
+		for (i = 0; i < repeats; i++) {
+			gup.size = size;
+			if (ioctl(gup_fd, cmd, &gup))
+				perror("ioctl"), exit(1);
+
+			pthread_mutex_lock(&print_mutex);
+			printf("%s: Time: get:%lld put:%lld us",
+			       cmd_to_str(cmd), gup.get_delta_usec,
+			       gup.put_delta_usec);
+			if (gup.size != size)
+				printf(", truncated (size: %lld)", gup.size);
+			printf("\n");
+			pthread_mutex_unlock(&print_mutex);
+		}
+	} else {
+		gup.size = size;
+		if (ioctl(gup_fd, cmd, &gup)) {
+			perror("ioctl");
+			exit(1);
+		}
+
+		pthread_mutex_lock(&print_mutex);
+		printf("%s: done\n", cmd_to_str(cmd));
+		if (gup.size != size)
+			printf("Truncated (size: %lld)\n", gup.size);
+		pthread_mutex_unlock(&print_mutex);
+	}
+
+	return NULL;
+}
+
 int main(int argc, char **argv)
 {
 	struct gup_test gup = { 0 };
-	unsigned long size = 128 * MB;
-	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1;
-	unsigned long cmd = GUP_FAST_BENCHMARK;
+	int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
 	int flags = MAP_PRIVATE, touch = 0;
 	char *file = "/dev/zero";
+	pthread_t *tid;
 	char *p;
 
-	while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) {
+	while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
 		switch (opt) {
 		case 'a':
 			cmd = PIN_FAST_BENCHMARK;
@@ -74,6 +120,9 @@ int main(int argc, char **argv)
 			/* strtol, so you can pass flags in hex form */
 			gup.gup_flags = strtol(optarg, 0, 0);
 			break;
+		case 'j':
+			nthreads = atoi(optarg);
+			break;
 		case 'm':
 			size = atoi(optarg) * MB;
 			break;
@@ -154,8 +203,8 @@ int main(int argc, char **argv)
 	if (write)
 		gup.gup_flags |= FOLL_WRITE;
 
-	fd = open("/sys/kernel/debug/gup_test", O_RDWR);
-	if (fd == -1) {
+	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+	if (gup_fd == -1) {
 		perror("open");
 		exit(1);
 	}
@@ -185,32 +234,17 @@ int main(int argc, char **argv)
 			p[0] = 0;
 	}
 
-	/* Only report timing information on the *_BENCHMARK commands: */
-	if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
-	     (cmd == PIN_LONGTERM_BENCHMARK)) {
-		for (i = 0; i < repeats; i++) {
-			gup.size = size;
-			if (ioctl(fd, cmd, &gup))
-				perror("ioctl"), exit(1);
-
-			printf("%s: Time: get:%lld put:%lld us",
-			       cmd_to_str(cmd), gup.get_delta_usec,
-			       gup.put_delta_usec);
-			if (gup.size != size)
-				printf(", truncated (size: %lld)", gup.size);
-			printf("\n");
-		}
-	} else {
-		gup.size = size;
-		if (ioctl(fd, cmd, &gup)) {
-			perror("ioctl");
-			exit(1);
-		}
-
-		printf("%s: done\n", cmd_to_str(cmd));
-		if (gup.size != size)
-			printf("Truncated (size: %lld)\n", gup.size);
+	tid = malloc(sizeof(pthread_t) * nthreads);
+	assert(tid);
+	for (i = 0; i < nthreads; i++) {
+		ret = pthread_create(&tid[i], NULL, gup_thread, &gup);
+		assert(ret == 0);
 	}
+	for (i = 0; i < nthreads; i++) {
+		ret = pthread_join(tid[i], NULL);
+		assert(ret == 0);
+	}
+	free(tid);
 
 	return 0;
 }

From 292648ac5cf16ec1fce33e29e0f9e35da7de63f7 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 28 Jun 2021 19:36:36 -0700
Subject: [PATCH 3134/3292] mm: gup: allow FOLL_PIN to scale in SMP

has_pinned cannot be written by each pin-fast or it won't scale in SMP.
This isn't "false sharing" strictly speaking (it's more like "true
non-sharing"), but it creates the same SMP scalability bottleneck of
"false sharing".

To verify the improvement, below test is done on 40 cpus host with
Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz (must be with
CONFIG_GUP_TEST=y):

  $ sudo chrt -f 1 ./gup_test -a  -m 512 -j 40

Where we can get (average value for 40 threads):

  Old kernel: 477729.97 (+- 3.79%)
  New kernel:  89144.65 (+-11.76%)

On a similar condition with 256 cpus, this commits increases the SMP
scalability of pin_user_pages_fast() executed by different threads of the
same process by more than 4000%.

[peterx@redhat.com: rewrite commit message, add parentheses against "(A & B)"]

Link: https://lkml.kernel.org/r/20210507150553.208763-3-peterx@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Kirill Shutemov <kirill@shutemov.name>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 90262e448552..a6c20a7b3c49 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1320,7 +1320,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 		BUG_ON(*locked != 1);
 	}
 
-	if (flags & FOLL_PIN)
+	if ((flags & FOLL_PIN) && !atomic_read(&mm->has_pinned))
 		atomic_set(&mm->has_pinned, 1);
 
 	/*
@@ -2641,7 +2641,7 @@ static int internal_get_user_pages_fast(unsigned long start,
 				       FOLL_FAST_ONLY)))
 		return -EINVAL;
 
-	if (gup_flags & FOLL_PIN)
+	if ((gup_flags & FOLL_PIN) && !atomic_read(&current->mm->has_pinned))
 		atomic_set(&current->mm->has_pinned, 1);
 
 	if (!(gup_flags & FOLL_FAST_ONLY))

From a458b76a4171f893efa7657dc079924580a8746a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 28 Jun 2021 19:36:40 -0700
Subject: [PATCH 3135/3292] mm: gup: pack has_pinned in MMF_HAS_PINNED

has_pinned 32bit can be packed in the MMF_HAS_PINNED bit as a noop
cleanup.

Any atomic_inc/dec to the mm cacheline shared by all threads in pin-fast
would reintroduce a loss of SMP scalability to pin-fast, so there's no
future potential usefulness to keep an atomic in the mm for this.

set_bit(MMF_HAS_PINNED) will be theoretically a bit slower than WRITE_ONCE
(atomic_set is equivalent to WRITE_ONCE), but the set_bit (just like
atomic_set after this commit) has to be still issued only once per "mm",
so the difference between the two will be lost in the noise.

will-it-scale "mmap2" shows no change in performance with enterprise
config as expected.

will-it-scale "pin_fast" retains the > 4000% SMP scalability performance
improvement against upstream as expected.

This is a noop as far as overall performance and SMP scalability are
concerned.

[peterx@redhat.com: pack has_pinned in MMF_HAS_PINNED]
  Link: https://lkml.kernel.org/r/YJqWESqyxa8OZA+2@t490s
[akpm@linux-foundation.org: coding style fixes]
[peterx@redhat.com: fix build for task_mmu.c, introduce mm_set_has_pinned_flag, fix comments]

Link: https://lkml.kernel.org/r/20210507150553.208763-4-peterx@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Kirill Shutemov <kirill@shutemov.name>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c             |  2 +-
 include/linux/mm.h             |  2 +-
 include/linux/mm_types.h       | 10 ----------
 include/linux/sched/coredump.h |  8 ++++++++
 kernel/fork.c                  |  1 -
 mm/gup.c                       | 19 +++++++++++++++----
 6 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fc9784544b24..66965ad88d8b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1047,7 +1047,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
 		return false;
 	if (!is_cow_mapping(vma->vm_flags))
 		return false;
-	if (likely(!atomic_read(&vma->vm_mm->has_pinned)))
+	if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
 		return false;
 	page = vm_normal_page(vma, addr, pte);
 	if (!page)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e39ed497578b..79f32962d7ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1341,7 +1341,7 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
 	if (!is_cow_mapping(vma->vm_flags))
 		return false;
 
-	if (!atomic_read(&vma->vm_mm->has_pinned))
+	if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
 		return false;
 
 	return page_maybe_dma_pinned(page);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8f0fb62e8975..b66d0225414e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -435,16 +435,6 @@ struct mm_struct {
 		 */
 		atomic_t mm_count;
 
-		/**
-		 * @has_pinned: Whether this mm has pinned any pages.  This can
-		 * be either replaced in the future by @pinned_vm when it
-		 * becomes stable, or grow into a counter on its own. We're
-		 * aggresive on this bit now - even if the pinned pages were
-		 * unpinned later on, we'll still keep this bit set for the
-		 * lifecycle of this mm just for simplicity.
-		 */
-		atomic_t has_pinned;
-
 #ifdef CONFIG_MMU
 		atomic_long_t pgtables_bytes;	/* PTE page table pages */
 #endif
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index dfd82eab2902..4d9e3a656875 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -73,6 +73,14 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_OOM_VICTIM		25	/* mm is the oom victim */
 #define MMF_OOM_REAP_QUEUED	26	/* mm was queued for oom_reaper */
 #define MMF_MULTIPROCESS	27	/* mm is shared between processes */
+/*
+ * MMF_HAS_PINNED: Whether this mm has pinned any pages.  This can be either
+ * replaced in the future by mm.pinned_vm when it becomes stable, or grow into
+ * a counter on its own. We're aggresive on this bit for now: even if the
+ * pinned pages were unpinned later on, we'll still keep this bit set for the
+ * lifecycle of this mm, just for simplicity.
+ */
+#define MMF_HAS_PINNED		28	/* FOLL_PIN has run, never cleared */
 #define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/kernel/fork.c b/kernel/fork.c
index a070caed5c8e..c6747d556ef9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1029,7 +1029,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_pgtables_bytes_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
-	atomic_set(&mm->has_pinned, 0);
 	atomic64_set(&mm->pinned_vm, 0);
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
diff --git a/mm/gup.c b/mm/gup.c
index a6c20a7b3c49..8651309f8ec3 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -420,6 +420,17 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
 }
 EXPORT_SYMBOL(unpin_user_pages);
 
+/*
+ * Set the MMF_HAS_PINNED if not set yet; after set it'll be there for the mm's
+ * lifecycle.  Avoid setting the bit unless necessary, or it might cause write
+ * cache bouncing on large SMP machines for concurrent pinned gups.
+ */
+static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
+{
+	if (!test_bit(MMF_HAS_PINNED, mm_flags))
+		set_bit(MMF_HAS_PINNED, mm_flags);
+}
+
 #ifdef CONFIG_MMU
 static struct page *no_page_table(struct vm_area_struct *vma,
 		unsigned int flags)
@@ -1320,8 +1331,8 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 		BUG_ON(*locked != 1);
 	}
 
-	if ((flags & FOLL_PIN) && !atomic_read(&mm->has_pinned))
-		atomic_set(&mm->has_pinned, 1);
+	if (flags & FOLL_PIN)
+		mm_set_has_pinned_flag(&mm->flags);
 
 	/*
 	 * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
@@ -2641,8 +2652,8 @@ static int internal_get_user_pages_fast(unsigned long start,
 				       FOLL_FAST_ONLY)))
 		return -EINVAL;
 
-	if ((gup_flags & FOLL_PIN) && !atomic_read(&current->mm->has_pinned))
-		atomic_set(&current->mm->has_pinned, 1);
+	if (gup_flags & FOLL_PIN)
+		mm_set_has_pinned_flag(&current->mm->flags);
 
 	if (!(gup_flags & FOLL_FAST_ONLY))
 		might_lock_read(&current->mm->mmap_lock);

From e17eae2b839937817d771e2f5d2b30e5e2b81bb7 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 28 Jun 2021 19:36:43 -0700
Subject: [PATCH 3136/3292] mm: pagewalk: fix walk for hugepage tables

Pagewalk ignores hugepd entries and walk down the tables as if it was
traditionnal entries, leading to crazy result.

Add walk_hugepd_range() and use it to walk hugepage tables.

Link: https://lkml.kernel.org/r/38d04410700c8d02f28ba37e020b62c55d6f3d2c.1624597695.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Steven Price <steven.price@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Daniel Axtens <dja@axtens.net>
Cc: "Oliver O'Halloran" <oohall@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/pagewalk.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index e81640d9f177..9b3db11a4d1d 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -58,6 +58,45 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	return err;
 }
 
+#ifdef CONFIG_ARCH_HAS_HUGEPD
+static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
+			     unsigned long end, struct mm_walk *walk, int pdshift)
+{
+	int err = 0;
+	const struct mm_walk_ops *ops = walk->ops;
+	int shift = hugepd_shift(*phpd);
+	int page_size = 1 << shift;
+
+	if (!ops->pte_entry)
+		return 0;
+
+	if (addr & (page_size - 1))
+		return 0;
+
+	for (;;) {
+		pte_t *pte;
+
+		spin_lock(&walk->mm->page_table_lock);
+		pte = hugepte_offset(*phpd, addr, pdshift);
+		err = ops->pte_entry(pte, addr, addr + page_size, walk);
+		spin_unlock(&walk->mm->page_table_lock);
+
+		if (err)
+			break;
+		if (addr >= end - page_size)
+			break;
+		addr += page_size;
+	}
+	return err;
+}
+#else
+static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
+			     unsigned long end, struct mm_walk *walk, int pdshift)
+{
+	return 0;
+}
+#endif
+
 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 			  struct mm_walk *walk)
 {
@@ -108,7 +147,10 @@ again:
 				goto again;
 		}
 
-		err = walk_pte_range(pmd, addr, next, walk);
+		if (is_hugepd(__hugepd(pmd_val(*pmd))))
+			err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT);
+		else
+			err = walk_pte_range(pmd, addr, next, walk);
 		if (err)
 			break;
 	} while (pmd++, addr = next, addr != end);
@@ -157,7 +199,10 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 		if (pud_none(*pud))
 			goto again;
 
-		err = walk_pmd_range(pud, addr, next, walk);
+		if (is_hugepd(__hugepd(pud_val(*pud))))
+			err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT);
+		else
+			err = walk_pmd_range(pud, addr, next, walk);
 		if (err)
 			break;
 	} while (pud++, addr = next, addr != end);
@@ -189,7 +234,9 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 			if (err)
 				break;
 		}
-		if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
+		if (is_hugepd(__hugepd(p4d_val(*p4d))))
+			err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT);
+		else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
 			err = walk_pud_range(p4d, addr, next, walk);
 		if (err)
 			break;
@@ -224,8 +271,9 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 			if (err)
 				break;
 		}
-		if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
-		    ops->pte_entry)
+		if (is_hugepd(__hugepd(pgd_val(*pgd))))
+			err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT);
+		else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry)
 			err = walk_p4d_range(pgd, addr, next, walk);
 		if (err)
 			break;

From 63d8620ecf93b5d8d0a254471184d08f8e8f538d Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 28 Jun 2021 19:36:46 -0700
Subject: [PATCH 3137/3292] mm/swapfile: use percpu_ref to serialize against
 concurrent swapoff

Patch series "close various race windows for swap", v6.

When I was investigating the swap code, I found some possible race
windows.  This series aims to fix all these races.  But using current
get/put_swap_device() to guard against concurrent swapoff for
swap_readpage() looks terrible because swap_readpage() may take really
long time.  And to reduce the performance overhead on the hot-path as much
as possible, it appears we can use the percpu_ref to close this race
window(as suggested by Huang, Ying).  The patch 1 adds percpu_ref support
for swap and most of the remaining patches try to use this to close
various race windows.  More details can be found in the respective
changelogs.

This patch (of 4):

Using current get/put_swap_device() to guard against concurrent swapoff
for some swap ops, e.g.  swap_readpage(), looks terrible because they
might take really long time.  This patch adds the percpu_ref support to
serialize against concurrent swapoff(as suggested by Huang, Ying).  Also
we remove the SWP_VALID flag because it's used together with RCU solution.

Link: https://lkml.kernel.org/r/20210426123316.806267-1-linmiaohe@huawei.com
Link: https://lkml.kernel.org/r/20210426123316.806267-2-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  5 +--
 mm/swapfile.c        | 79 +++++++++++++++++++++++++++-----------------
 2 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 144727041e78..c9e7fea10b83 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -177,7 +177,6 @@ enum {
 	SWP_PAGE_DISCARD = (1 << 10),	/* freed swap page-cluster discards */
 	SWP_STABLE_WRITES = (1 << 11),	/* no overwrite PG_writeback pages */
 	SWP_SYNCHRONOUS_IO = (1 << 12),	/* synchronous IO is efficient */
-	SWP_VALID	= (1 << 13),	/* swap is valid to be operated on? */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 14),	/* refcount in scan_swap_map */
 };
@@ -240,6 +239,7 @@ struct swap_cluster_list {
  * The in-memory structure used to track swap areas.
  */
 struct swap_info_struct {
+	struct percpu_ref users;	/* indicate and keep swap device valid. */
 	unsigned long	flags;		/* SWP_USED etc: see above */
 	signed short	prio;		/* swap priority of this type */
 	struct plist_node list;		/* entry in swap_active_head */
@@ -260,6 +260,7 @@ struct swap_info_struct {
 	struct block_device *bdev;	/* swap device or bdev of swap file */
 	struct file *swap_file;		/* seldom referenced */
 	unsigned int old_block_size;	/* seldom referenced */
+	struct completion comp;		/* seldom referenced */
 #ifdef CONFIG_FRONTSWAP
 	unsigned long *frontswap_map;	/* frontswap in-use, one bit per page */
 	atomic_t frontswap_pages;	/* frontswap pages in-use counter */
@@ -511,7 +512,7 @@ sector_t swap_page_sector(struct page *page);
 
 static inline void put_swap_device(struct swap_info_struct *si)
 {
-	rcu_read_unlock();
+	percpu_ref_put(&si->users);
 }
 
 #else /* CONFIG_SWAP */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 996afa8131c8..a9a04a5360d9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -39,6 +39,7 @@
 #include <linux/export.h>
 #include <linux/swap_slots.h>
 #include <linux/sort.h>
+#include <linux/completion.h>
 
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
@@ -511,6 +512,14 @@ static void swap_discard_work(struct work_struct *work)
 	spin_unlock(&si->lock);
 }
 
+static void swap_users_ref_free(struct percpu_ref *ref)
+{
+	struct swap_info_struct *si;
+
+	si = container_of(ref, struct swap_info_struct, users);
+	complete(&si->comp);
+}
+
 static void alloc_cluster(struct swap_info_struct *si, unsigned long idx)
 {
 	struct swap_cluster_info *ci = si->cluster_info;
@@ -1270,18 +1279,12 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
  * via preventing the swap device from being swapoff, until
  * put_swap_device() is called.  Otherwise return NULL.
  *
- * The entirety of the RCU read critical section must come before the
- * return from or after the call to synchronize_rcu() in
- * enable_swap_info() or swapoff().  So if "si->flags & SWP_VALID" is
- * true, the si->map, si->cluster_info, etc. must be valid in the
- * critical section.
- *
  * Notice that swapoff or swapoff+swapon can still happen before the
- * rcu_read_lock() in get_swap_device() or after the rcu_read_unlock()
- * in put_swap_device() if there isn't any other way to prevent
- * swapoff, such as page lock, page table lock, etc.  The caller must
- * be prepared for that.  For example, the following situation is
- * possible.
+ * percpu_ref_tryget_live() in get_swap_device() or after the
+ * percpu_ref_put() in put_swap_device() if there isn't any other way
+ * to prevent swapoff, such as page lock, page table lock, etc.  The
+ * caller must be prepared for that.  For example, the following
+ * situation is possible.
  *
  *   CPU1				CPU2
  *   do_swap_page()
@@ -1309,21 +1312,27 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
 	si = swp_swap_info(entry);
 	if (!si)
 		goto bad_nofile;
-
-	rcu_read_lock();
-	if (data_race(!(si->flags & SWP_VALID)))
-		goto unlock_out;
+	if (!percpu_ref_tryget_live(&si->users))
+		goto out;
+	/*
+	 * Guarantee the si->users are checked before accessing other
+	 * fields of swap_info_struct.
+	 *
+	 * Paired with the spin_unlock() after setup_swap_info() in
+	 * enable_swap_info().
+	 */
+	smp_rmb();
 	offset = swp_offset(entry);
 	if (offset >= si->max)
-		goto unlock_out;
+		goto put_out;
 
 	return si;
 bad_nofile:
 	pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val);
 out:
 	return NULL;
-unlock_out:
-	rcu_read_unlock();
+put_out:
+	percpu_ref_put(&si->users);
 	return NULL;
 }
 
@@ -2466,7 +2475,7 @@ static void setup_swap_info(struct swap_info_struct *p, int prio,
 
 static void _enable_swap_info(struct swap_info_struct *p)
 {
-	p->flags |= SWP_WRITEOK | SWP_VALID;
+	p->flags |= SWP_WRITEOK;
 	atomic_long_add(p->pages, &nr_swap_pages);
 	total_swap_pages += p->pages;
 
@@ -2497,10 +2506,9 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
 	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
 	/*
-	 * Guarantee swap_map, cluster_info, etc. fields are valid
-	 * between get/put_swap_device() if SWP_VALID bit is set
+	 * Finished initializing swap device, now it's safe to reference it.
 	 */
-	synchronize_rcu();
+	percpu_ref_resurrect(&p->users);
 	spin_lock(&swap_lock);
 	spin_lock(&p->lock);
 	_enable_swap_info(p);
@@ -2616,16 +2624,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 
 	reenable_swap_slots_cache_unlock();
 
-	spin_lock(&swap_lock);
-	spin_lock(&p->lock);
-	p->flags &= ~SWP_VALID;		/* mark swap device as invalid */
-	spin_unlock(&p->lock);
-	spin_unlock(&swap_lock);
 	/*
-	 * wait for swap operations protected by get/put_swap_device()
-	 * to complete
+	 * Wait for swap operations protected by get/put_swap_device()
+	 * to complete.
+	 *
+	 * We need synchronize_rcu() here to protect the accessing to
+	 * the swap cache data structure.
 	 */
+	percpu_ref_kill(&p->users);
 	synchronize_rcu();
+	wait_for_completion(&p->comp);
 
 	flush_work(&p->discard_work);
 
@@ -2857,6 +2865,12 @@ static struct swap_info_struct *alloc_swap_info(void)
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
+	if (percpu_ref_init(&p->users, swap_users_ref_free,
+			    PERCPU_REF_INIT_DEAD, GFP_KERNEL)) {
+		kvfree(p);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	spin_lock(&swap_lock);
 	for (type = 0; type < nr_swapfiles; type++) {
 		if (!(swap_info[type]->flags & SWP_USED))
@@ -2864,6 +2878,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 	}
 	if (type >= MAX_SWAPFILES) {
 		spin_unlock(&swap_lock);
+		percpu_ref_exit(&p->users);
 		kvfree(p);
 		return ERR_PTR(-EPERM);
 	}
@@ -2891,9 +2906,13 @@ static struct swap_info_struct *alloc_swap_info(void)
 		plist_node_init(&p->avail_lists[i], 0);
 	p->flags = SWP_USED;
 	spin_unlock(&swap_lock);
-	kvfree(defer);
+	if (defer) {
+		percpu_ref_exit(&defer->users);
+		kvfree(defer);
+	}
 	spin_lock_init(&p->lock);
 	spin_lock_init(&p->cont_lock);
+	init_completion(&p->comp);
 
 	return p;
 }

From 2799e77529c2a25492a4395db93996e3dacd762d Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 28 Jun 2021 19:36:50 -0700
Subject: [PATCH 3138/3292] swap: fix do_swap_page() race with swapoff

When I was investigating the swap code, I found the below possible race
window:

CPU 1                                   	CPU 2
-----                                   	-----
do_swap_page
  if (data_race(si->flags & SWP_SYNCHRONOUS_IO)
  swap_readpage
    if (data_race(sis->flags & SWP_FS_OPS)) {
                                        	swapoff
					  	  ..
					  	  p->swap_file = NULL;
					  	  ..
    struct file *swap_file = sis->swap_file;
    struct address_space *mapping = swap_file->f_mapping;[oops!]

Note that for the pages that are swapped in through swap cache, this isn't
an issue. Because the page is locked, and the swap entry will be marked
with SWAP_HAS_CACHE, so swapoff() can not proceed until the page has been
unlocked.

Fix this race by using get/put_swap_device() to guard against concurrent
swapoff.

Link: https://lkml.kernel.org/r/20210426123316.806267-3-linmiaohe@huawei.com
Fixes: 0bcac06f27d7 ("mm,swap: skip swapcache for swapin of synchronous device")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  9 +++++++++
 mm/memory.c          | 11 +++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c9e7fea10b83..46d51d058d05 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -527,6 +527,15 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
 	return NULL;
 }
 
+static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
+{
+	return NULL;
+}
+
+static inline void put_swap_device(struct swap_info_struct *si)
+{
+}
+
 #define swap_address_space(entry)		(NULL)
 #define get_nr_swap_pages()			0L
 #define total_swap_pages			0L
diff --git a/mm/memory.c b/mm/memory.c
index 486f4a2874e7..b15367c285bd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3353,6 +3353,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL, *swapcache;
+	struct swap_info_struct *si = NULL;
 	swp_entry_t entry;
 	pte_t pte;
 	int locked;
@@ -3380,14 +3381,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 		goto out;
 	}
 
+	/* Prevent swapoff from happening to us. */
+	si = get_swap_device(entry);
+	if (unlikely(!si))
+		goto out;
 
 	delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
 	page = lookup_swap_cache(entry, vma, vmf->address);
 	swapcache = page;
 
 	if (!page) {
-		struct swap_info_struct *si = swp_swap_info(entry);
-
 		if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
 		    __swap_count(entry) == 1) {
 			/* skip swapcache */
@@ -3556,6 +3559,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 unlock:
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 out:
+	if (si)
+		put_swap_device(si);
 	return ret;
 out_nomap:
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -3567,6 +3572,8 @@ out_release:
 		unlock_page(swapcache);
 		put_page(swapcache);
 	}
+	if (si)
+		put_swap_device(si);
 	return ret;
 }
 

From 5c046235a826370d528a29c44e0ce05f9685d8b4 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 28 Jun 2021 19:36:53 -0700
Subject: [PATCH 3139/3292] mm/swap: remove confusing checking for
 non_swap_entry() in swap_ra_info()

The non_swap_entry() was used for working with VMA based swap readahead
via commit ec560175c0b6 ("mm, swap: VMA based swap readahead").  At that
time, the non_swap_entry() checking is necessary because the function is
called before checking that in do_swap_page().  Then it's moved to
swap_ra_info() since commit eaf649ebc3ac ("mm: swap: clean up swap
readahead").  After that, the non_swap_entry() checking is unnecessary,
because swap_ra_info() is called after non_swap_entry() has been checked
already.  The resulting code is confusing as the non_swap_entry() check
looks racy now because while we released the pte lock, somebody else might
have faulted in this pte.  So we should check whether it's swap pte first
to guard against such race or swap_type will be unexpected.  But the race
isn't important because it will not cause problem.  We would have enough
checking when we really operate the PTE entries later.  So we remove the
non_swap_entry() check here to avoid confusion.

Link: https://lkml.kernel.org/r/20210426123316.806267-4-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_state.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 272ea2108c9d..df5405384520 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -721,7 +721,6 @@ static void swap_ra_info(struct vm_fault *vmf,
 {
 	struct vm_area_struct *vma = vmf->vma;
 	unsigned long ra_val;
-	swp_entry_t entry;
 	unsigned long faddr, pfn, fpfn;
 	unsigned long start, end;
 	pte_t *pte, *orig_pte;
@@ -739,11 +738,6 @@ static void swap_ra_info(struct vm_fault *vmf,
 
 	faddr = vmf->address;
 	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
-	entry = pte_to_swp_entry(*pte);
-	if ((unlikely(non_swap_entry(entry)))) {
-		pte_unmap(orig_pte);
-		return;
-	}
 
 	fpfn = PFN_DOWN(faddr);
 	ra_val = GET_SWAP_RA_VAL(vma);

From 2efa33fc7f6ec94a3a538c1a264273c889be2b36 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 28 Jun 2021 19:36:57 -0700
Subject: [PATCH 3140/3292] mm/shmem: fix shmem_swapin() race with swapoff

When I was investigating the swap code, I found the below possible race
window:

CPU 1                                         CPU 2
-----                                         -----
shmem_swapin
  swap_cluster_readahead
    if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
                                              swapoff
                                                ..
                                                si->swap_file = NULL;
                                                ..
    struct inode *inode = si->swap_file->f_mapping->host;[oops!]

Close this race window by using get/put_swap_device() to guard against
concurrent swapoff.

Link: https://lkml.kernel.org/r/20210426123316.806267-5-linmiaohe@huawei.com
Fixes: 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 5d46611cba8d..53f21016608e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1696,7 +1696,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
-	struct page *page;
+	struct swap_info_struct *si;
+	struct page *page = NULL;
 	swp_entry_t swap;
 	int error;
 
@@ -1704,6 +1705,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 	swap = radix_to_swp_entry(*pagep);
 	*pagep = NULL;
 
+	/* Prevent swapoff from happening to us. */
+	si = get_swap_device(swap);
+	if (!si) {
+		error = EINVAL;
+		goto failed;
+	}
 	/* Look it up and read it in.. */
 	page = lookup_swap_cache(swap, NULL, 0);
 	if (!page) {
@@ -1765,6 +1772,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 	swap_free(swap);
 
 	*pagep = page;
+	if (si)
+		put_swap_device(si);
 	return 0;
 failed:
 	if (!shmem_confirm_swap(mapping, index, swap))
@@ -1775,6 +1784,9 @@ unlock:
 		put_page(page);
 	}
 
+	if (si)
+		put_swap_device(si);
+
 	return error;
 }
 

From bb243f7dc62429343404b052e9c51d745e618346 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 28 Jun 2021 19:37:00 -0700
Subject: [PATCH 3141/3292] mm/swapfile: move get_swap_page_of_type() under
 CONFIG_HIBERNATION

Patch series "Cleanups for swap", v2.

This series contains just cleanups to remove some unused variables, delete
meaningless forward declarations and so on.  More details can be found in
the respective changelogs.

This patch (of 4):

We should move get_swap_page_of_type() under CONFIG_HIBERNATION since the
only caller of this function is now suspend routine.

[linmiaohe@huawei.com: move scan_swap_map() under CONFIG_HIBERNATION]
  Link: https://lkml.kernel.org/r/20210521070855.2015094-1-linmiaohe@huawei.com
[linmiaohe@huawei.com: fold scan_swap_map() into the only caller get_swap_page_of_type()]
  Link: https://lkml.kernel.org/r/20210527120328.3935132-1-linmiaohe@huawei.com

Link: https://lkml.kernel.org/r/20210520134022.1370406-1-linmiaohe@huawei.com
Link: https://lkml.kernel.org/r/20210520134022.1370406-2-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 83 +++++++++++++++++++--------------------------------
 1 file changed, 31 insertions(+), 52 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index a9a04a5360d9..115f0b0c0c10 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -453,10 +453,10 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
 		unsigned int idx)
 {
 	/*
-	 * If scan_swap_map() can't find a free cluster, it will check
+	 * If scan_swap_map_slots() can't find a free cluster, it will check
 	 * si->swap_map directly. To make sure the discarding cluster isn't
-	 * taken by scan_swap_map(), mark the swap entries bad (occupied). It
-	 * will be cleared after discard
+	 * taken by scan_swap_map_slots(), mark the swap entries bad (occupied).
+	 * It will be cleared after discard
 	 */
 	memset(si->swap_map + idx * SWAPFILE_CLUSTER,
 			SWAP_MAP_BAD, SWAPFILE_CLUSTER);
@@ -589,7 +589,7 @@ static void dec_cluster_info_page(struct swap_info_struct *p,
 }
 
 /*
- * It's possible scan_swap_map() uses a free cluster in the middle of free
+ * It's possible scan_swap_map_slots() uses a free cluster in the middle of free
  * cluster list. Avoiding such abuse to avoid list corruption.
  */
 static bool
@@ -1037,21 +1037,6 @@ static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
 	swap_range_free(si, offset, SWAPFILE_CLUSTER);
 }
 
-static unsigned long scan_swap_map(struct swap_info_struct *si,
-				   unsigned char usage)
-{
-	swp_entry_t entry;
-	int n_ret;
-
-	n_ret = scan_swap_map_slots(si, usage, 1, &entry);
-
-	if (n_ret)
-		return swp_offset(entry);
-	else
-		return 0;
-
-}
-
 int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
 {
 	unsigned long size = swap_entry_size(entry_size);
@@ -1114,14 +1099,14 @@ start_over:
 nextsi:
 		/*
 		 * if we got here, it's likely that si was almost full before,
-		 * and since scan_swap_map() can drop the si->lock, multiple
-		 * callers probably all tried to get a page from the same si
-		 * and it filled up before we could get one; or, the si filled
-		 * up between us dropping swap_avail_lock and taking si->lock.
-		 * Since we dropped the swap_avail_lock, the swap_avail_head
-		 * list may have been modified; so if next is still in the
-		 * swap_avail_head list then try it, otherwise start over
-		 * if we have not gotten any slots.
+		 * and since scan_swap_map_slots() can drop the si->lock,
+		 * multiple callers probably all tried to get a page from the
+		 * same si and it filled up before we could get one; or, the si
+		 * filled up between us dropping swap_avail_lock and taking
+		 * si->lock. Since we dropped the swap_avail_lock, the
+		 * swap_avail_head list may have been modified; so if next is
+		 * still in the swap_avail_head list then try it, otherwise
+		 * start over if we have not gotten any slots.
 		 */
 		if (plist_node_empty(&next->avail_lists[node]))
 			goto start_over;
@@ -1137,30 +1122,6 @@ noswap:
 	return n_ret;
 }
 
-/* The only caller of this function is now suspend routine */
-swp_entry_t get_swap_page_of_type(int type)
-{
-	struct swap_info_struct *si = swap_type_to_swap_info(type);
-	pgoff_t offset;
-
-	if (!si)
-		goto fail;
-
-	spin_lock(&si->lock);
-	if (si->flags & SWP_WRITEOK) {
-		/* This is called for allocating swap entry, not cache */
-		offset = scan_swap_map(si, 1);
-		if (offset) {
-			atomic_long_dec(&nr_swap_pages);
-			spin_unlock(&si->lock);
-			return swp_entry(type, offset);
-		}
-	}
-	spin_unlock(&si->lock);
-fail:
-	return (swp_entry_t) {0};
-}
-
 static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
 {
 	struct swap_info_struct *p;
@@ -1812,6 +1773,24 @@ int free_swap_and_cache(swp_entry_t entry)
 }
 
 #ifdef CONFIG_HIBERNATION
+
+swp_entry_t get_swap_page_of_type(int type)
+{
+	struct swap_info_struct *si = swap_type_to_swap_info(type);
+	swp_entry_t entry = {0};
+
+	if (!si)
+		goto fail;
+
+	/* This is called for allocating swap entry, not cache */
+	spin_lock(&si->lock);
+	if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry))
+		atomic_long_dec(&nr_swap_pages);
+	spin_unlock(&si->lock);
+fail:
+	return entry;
+}
+
 /*
  * Find the swap type that corresponds to given device (if any).
  *
@@ -2649,7 +2628,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	spin_lock(&p->lock);
 	drain_mmlist();
 
-	/* wait for anyone still in scan_swap_map */
+	/* wait for anyone still in scan_swap_map_slots */
 	p->highest_bit = 0;		/* cuts scans short */
 	while (p->flags >= SWP_SCANNING) {
 		spin_unlock(&p->lock);

From eb7709c5f3e55e230b9c8d8e79aa261b316066c2 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 28 Jun 2021 19:37:03 -0700
Subject: [PATCH 3142/3292] mm/swap: remove unused local variable nr_shadows

Since commit 55c653b71e8c ("mm: stop accounting shadow entries"),
nr_shadows is not used anymore.

Link: https://lkml.kernel.org/r/20210520134022.1370406-3-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_state.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index df5405384520..1a2ba4056f37 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -114,8 +114,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
 	SetPageSwapCache(page);
 
 	do {
-		unsigned long nr_shadows = 0;
-
 		xas_lock_irq(&xas);
 		xas_create_range(&xas);
 		if (xas_error(&xas))
@@ -124,7 +122,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
 			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
 			old = xas_load(&xas);
 			if (xa_is_value(old)) {
-				nr_shadows++;
 				if (shadowp)
 					*shadowp = old;
 			}
@@ -260,7 +257,6 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
 	void *old;
 
 	for (;;) {
-		unsigned long nr_shadows = 0;
 		swp_entry_t entry = swp_entry(type, curr);
 		struct address_space *address_space = swap_address_space(entry);
 		XA_STATE(xas, &address_space->i_pages, curr);
@@ -270,7 +266,6 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
 			if (!xa_is_value(old))
 				continue;
 			xas_store(&xas, NULL);
-			nr_shadows++;
 		}
 		xa_unlock_irq(&address_space->i_pages);
 

From 1cfcc8306a767bda9a8fe6fddb3e80ca9ab7656b Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 28 Jun 2021 19:37:06 -0700
Subject: [PATCH 3143/3292] mm/swap_slots.c: delete meaningless forward
 declarations

deactivate_swap_slots_cache() and reactivate_swap_slots_cache() are only
called below their implementations.  So these forward declarations are
meaningless and should be removed.

Link: https://lkml.kernel.org/r/20210520134022.1370406-4-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_slots.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 6248d1030a9b..a66f3e0ec973 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -43,8 +43,6 @@ static DEFINE_MUTEX(swap_slots_cache_mutex);
 static DEFINE_MUTEX(swap_slots_cache_enable_mutex);
 
 static void __drain_swap_slots_cache(unsigned int type);
-static void deactivate_swap_slots_cache(void);
-static void reactivate_swap_slots_cache(void);
 
 #define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled)
 #define SLOTS_CACHE 0x1

From a4b451143fa275a31f17a93adac3b8dbb3d20ca2 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 28 Jun 2021 19:37:09 -0700
Subject: [PATCH 3144/3292] mm, swap: remove unnecessary smp_rmb() in
 swap_type_to_swap_info()

Before commit c10d38cc8d3e ("mm, swap: bounds check swap_info array
accesses to avoid NULL derefs"), the typical code to reference the
swap_info[] is as follows,

  type = swp_type(swp_entry);
  if (type >= nr_swapfiles)
          /* handle invalid swp_entry */;
  p = swap_info[type];
  /* access fields of *p.  OOPS! p may be NULL! */

Because the ordering isn't guaranteed, it's possible that swap_info[type]
is read before "nr_swapfiles".  And that may result in NULL pointer
dereference.

So after commit c10d38cc8d3e, the code becomes,

  struct swap_info_struct *swap_type_to_swap_info(int type)
  {
	  if (type >= READ_ONCE(nr_swapfiles))
		  return NULL;
	  smp_rmb();
	  return READ_ONCE(swap_info[type]);
  }

  /* users */
  type = swp_type(swp_entry);
  p = swap_type_to_swap_info(type);
  if (!p)
	  /* handle invalid swp_entry */;
  /* dereference p */

Where the value of swap_info[type] (that is, "p") is checked to be
non-zero before being dereferenced.  So, the NULL deferencing becomes
impossible even if "nr_swapfiles" is read after swap_info[type].
Therefore, the "smp_rmb()" becomes unnecessary.

And, we don't even need to read "nr_swapfiles" here.  Because the non-zero
checking for "p" is sufficient.  We just need to make sure we will not
access out of the boundary of the array.  With the change, nr_swapfiles
will only be accessed with swap_lock held, except in
swapcache_free_entries().  Where the absolute correctness of the value
isn't needed, as described in the comments.

We still need to guarantee swap_info[type] is read before being
dereferenced.  That can be satisfied via the data dependency ordering
enforced by READ_ONCE(swap_info[type]).  This needs to be paired with
proper write barriers.  So smp_store_release() is used in
alloc_swap_info() to guarantee the fields of *swap_info[type] is
initialized before swap_info[type] itself being written.  Note that the
fields of *swap_info[type] is initialized to be 0 via kvzalloc() firstly.
The assignment and deferencing of swap_info[type] is like
rcu_assign_pointer() and rcu_dereference().

Link: https://lkml.kernel.org/r/20210520073301.1676294-1-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Paul McKenney <paulmck@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 115f0b0c0c10..e898c879a434 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -100,11 +100,10 @@ atomic_t nr_rotate_swap = ATOMIC_INIT(0);
 
 static struct swap_info_struct *swap_type_to_swap_info(int type)
 {
-	if (type >= READ_ONCE(nr_swapfiles))
+	if (type >= MAX_SWAPFILES)
 		return NULL;
 
-	smp_rmb();	/* Pairs with smp_wmb in alloc_swap_info. */
-	return READ_ONCE(swap_info[type]);
+	return READ_ONCE(swap_info[type]); /* rcu_dereference() */
 }
 
 static inline unsigned char swap_count(unsigned char ent)
@@ -2863,14 +2862,12 @@ static struct swap_info_struct *alloc_swap_info(void)
 	}
 	if (type >= nr_swapfiles) {
 		p->type = type;
-		WRITE_ONCE(swap_info[type], p);
 		/*
-		 * Write swap_info[type] before nr_swapfiles, in case a
-		 * racing procfs swap_start() or swap_next() is reading them.
-		 * (We never shrink nr_swapfiles, we never free this entry.)
+		 * Publish the swap_info_struct after initializing it.
+		 * Note that kvzalloc() above zeroes all its fields.
 		 */
-		smp_wmb();
-		WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1);
+		smp_store_release(&swap_info[type], p); /* rcu_assign_pointer() */
+		nr_swapfiles++;
 	} else {
 		defer = p;
 		p = swap_info[type];

From f4c4a3f48480730214c4f02ffa480f6bf5b0718f Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 28 Jun 2021 19:37:12 -0700
Subject: [PATCH 3145/3292] mm: free idle swap cache page after COW

With commit 09854ba94c6a ("mm: do_wp_page() simplification"), after COW,
the idle swap cache page (neither the page nor the corresponding swap
entry is mapped by any process) will be left in the LRU list, even if it's
in the active list or the head of the inactive list.  So, the page
reclaimer may take quite some overhead to reclaim these actually unused
pages.

To help the page reclaiming, in this patch, after COW, the idle swap cache
page will be tried to be freed.  To avoid to introduce much overhead to
the hot COW code path,

a) there's almost zero overhead for non-swap case via checking
   PageSwapCache() firstly.

b) the page lock is acquired via trylock only.

To test the patch, we used pmbench memory accessing benchmark with
working-set larger than available memory on a 2-socket Intel server with a
NVMe SSD as swap device.  Test results shows that the pmbench score
increases up to 23.8% with the decreased size of swap cache and swapin
throughput.

Link: https://lkml.kernel.org/r/20210601053143.1380078-1-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>	[use free_swap_cache()]
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Tim Chen <tim.c.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 5 +++++
 mm/memory.c          | 2 ++
 mm/swap_state.c      | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 46d51d058d05..49b1dd2c100b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -446,6 +446,7 @@ extern void __delete_from_swap_cache(struct page *page,
 extern void delete_from_swap_cache(struct page *);
 extern void clear_shadow_from_swap_cache(int type, unsigned long begin,
 				unsigned long end);
+extern void free_swap_cache(struct page *);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
 extern struct page *lookup_swap_cache(swp_entry_t entry,
@@ -551,6 +552,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
 #define free_pages_and_swap_cache(pages, nr) \
 	release_pages((pages), (nr));
 
+static inline void free_swap_cache(struct page *page)
+{
+}
+
 static inline void show_swap_cache_info(void)
 {
 }
diff --git a/mm/memory.c b/mm/memory.c
index b15367c285bd..a4d82a6de000 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3023,6 +3023,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 				munlock_vma_page(old_page);
 			unlock_page(old_page);
 		}
+		if (page_copied)
+			free_swap_cache(old_page);
 		put_page(old_page);
 	}
 	return page_copied ? VM_FAULT_WRITE : 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 1a2ba4056f37..4f8a912ff692 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -286,7 +286,7 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
  * try_to_free_swap() _with_ the lock.
  * 					- Marcelo
  */
-static inline void free_swap_cache(struct page *page)
+void free_swap_cache(struct page *page)
 {
 	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
 		try_to_free_swap(page);

From eea4a5011ae520c98d0a14474ecde44f29659861 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 28 Jun 2021 19:37:16 -0700
Subject: [PATCH 3146/3292] swap: check mapping_empty() for swap cache before
 being freed

To check whether all pages and shadow entries in swap cache has been
removed before swap cache is freed.

Link: https://lkml.kernel.org/r/20210608005121.511140-1-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_state.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 4f8a912ff692..c56aa9ac050d 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -693,7 +693,12 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 
 void exit_swap_address_space(unsigned int type)
 {
-	kvfree(swapper_spaces[type]);
+	int i;
+	struct address_space *spaces = swapper_spaces[type];
+
+	for (i = 0; i < nr_swapper_spaces[type]; i++)
+		VM_WARN_ON_ONCE(!mapping_empty(&spaces[i]));
+	kvfree(spaces);
 	nr_swapper_spaces[type] = 0;
 	swapper_spaces[type] = NULL;
 }

From fdbcb2a6d6778e0b91938529694e5f40b4a66130 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 28 Jun 2021 19:37:19 -0700
Subject: [PATCH 3147/3292] mm/memcg: move mod_objcg_state() to memcontrol.c

Patch series "mm/memcg: Reduce kmemcache memory accounting overhead", v6.

With the recent introduction of the new slab memory controller, we
eliminate the need for having separate kmemcaches for each memory cgroup
and reduce overall kernel memory usage.  However, we also add additional
memory accounting overhead to each call of kmem_cache_alloc() and
kmem_cache_free().

For workloads that require a lot of kmemcache allocations and
de-allocations, they may experience performance regression as illustrated
in [1] and [2].

A simple kernel module that performs repeated loop of 100,000,000
kmem_cache_alloc() and kmem_cache_free() of either a small 32-byte object
or a big 4k object at module init time with a batch size of 4 (4 kmalloc's
followed by 4 kfree's) is used for benchmarking.  The benchmarking tool
was run on a kernel based on linux-next-20210419.  The test was run on a
CascadeLake server with turbo-boosting disable to reduce run-to-run
variation.

The small object test exercises mainly the object stock charging and
vmstat update code paths.  The large object test also exercises the
refill_obj_stock() and __memcg_kmem_charge()/__memcg_kmem_uncharge() code
paths.

With memory accounting disabled, the run time was 3.130s with both small
object big object tests.

With memory accounting enabled, both cgroup v1 and v2 showed similar
results in the small object test.  The performance results of the large
object test, however, differed between cgroup v1 and v2.

The execution times with the application of various patches in the
patchset were:

  Applied patches   Run time   Accounting overhead   %age 1   %age 2
  ---------------   --------   -------------------   ------   ------

  Small 32-byte object:
       None          11.634s         8.504s          100.0%   271.7%
        1-2           9.425s         6.295s           74.0%   201.1%
        1-3           9.708s         6.578s           77.4%   210.2%
        1-4           8.062s         4.932s           58.0%   157.6%

  Large 4k object (v2):
       None          22.107s        18.977s          100.0%   606.3%
        1-2          20.960s        17.830s           94.0%   569.6%
        1-3          14.238s        11.108s           58.5%   354.9%
        1-4          11.329s         8.199s           43.2%   261.9%

  Large 4k object (v1):
       None          36.807s        33.677s          100.0%  1075.9%
        1-2          36.648s        33.518s           99.5%  1070.9%
        1-3          22.345s        19.215s           57.1%   613.9%
        1-4          18.662s        15.532s           46.1%   496.2%

  N.B. %age 1 = overhead/unpatched overhead
       %age 2 = overhead/accounting disabled time

Patch 2 (vmstat data stock caching) helps in both the small object test
and the large v2 object test. It doesn't help much in v1 big object test.

Patch 3 (refill_obj_stock improvement) does help the small object test
but offer significant performance improvement for the large object test
(both v1 and v2).

Patch 4 (eliminating irq disable/enable) helps in all test cases.

To test for the extreme case, a multi-threaded kmalloc/kfree
microbenchmark was run on the 2-socket 48-core 96-thread system with
96 testing threads in the same memcg doing kmalloc+kfree of a 4k object
with accounting enabled for 10s. The total number of kmalloc+kfree done
in kilo operations per second (kops/s) were as follows:

  Applied patches   v1 kops/s   v1 change   v2 kops/s   v2 change
  ---------------   ---------   ---------   ---------   ---------
       None           3,520        1.00X      6,242        1.00X
        1-2           4,304        1.22X      8,478        1.36X
        1-3           4,731        1.34X    418,142       66.99X
        1-4           4,587        1.30X    438,838       70.30X

With memory accounting disabled, the kmalloc/kfree rate was 1,481,291
kop/s. This test shows how significant the memory accouting overhead
can be in some extreme situations.

For this multithreaded test, the improvement from patch 2 mainly
comes from the conditional atomic xchg of objcg->nr_charged_bytes in
mod_objcg_state(). By using an unconditional xchg, the operation rates
were similar to the unpatched kernel.

Patch 3 elminates the single highly contended cacheline of
objcg->nr_charged_bytes for cgroup v2 leading to a huge performance
improvement. Cgroup v1, however, still has another highly contended
cacheline in the shared page counter &memcg->kmem. So the improvement
is only modest.

Patch 4 helps in cgroup v2, but performs worse in cgroup v1 as
eliminating the irq_disable/irq_enable overhead seems to aggravate the
cacheline contention.

[1] https://lore.kernel.org/linux-mm/20210408193948.vfktg3azh2wrt56t@gabell/T/#u
[2] https://lore.kernel.org/lkml/20210114025151.GA22932@xsang-OptiPlex-9020/

This patch (of 4):

mod_objcg_state() is moved from mm/slab.h to mm/memcontrol.c so that
further optimization can be done to it in later patches without exposing
unnecessary details to other mm components.

Link: https://lkml.kernel.org/r/20210506150007.16288-1-longman@redhat.com
Link: https://lkml.kernel.org/r/20210506150007.16288-2-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Chris Down <chris@chrisdown.name>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 13 +++++++++++++
 mm/slab.h       | 16 ++--------------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 64ada9e650a5..7cd7187a017c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -782,6 +782,19 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
 	rcu_read_unlock();
 }
 
+void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+		     enum node_stat_item idx, int nr)
+{
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
+
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(objcg);
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	mod_memcg_lruvec_state(lruvec, idx, nr);
+	rcu_read_unlock();
+}
+
 /**
  * __count_memcg_events - account VM events in a cgroup
  * @memcg: the memory cgroup
diff --git a/mm/slab.h b/mm/slab.h
index 7f9b4bd9fc65..f2c32f24da95 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -240,6 +240,8 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
 #ifdef CONFIG_MEMCG_KMEM
 int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
 				 gfp_t gfp, bool new_page);
+void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+		     enum node_stat_item idx, int nr);
 
 static inline void memcg_free_page_obj_cgroups(struct page *page)
 {
@@ -284,20 +286,6 @@ static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
 	return true;
 }
 
-static inline void mod_objcg_state(struct obj_cgroup *objcg,
-				   struct pglist_data *pgdat,
-				   enum node_stat_item idx, int nr)
-{
-	struct mem_cgroup *memcg;
-	struct lruvec *lruvec;
-
-	rcu_read_lock();
-	memcg = obj_cgroup_memcg(objcg);
-	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	mod_memcg_lruvec_state(lruvec, idx, nr);
-	rcu_read_unlock();
-}
-
 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 					      struct obj_cgroup *objcg,
 					      gfp_t flags, size_t size,

From 68ac5b3c8db2fda00af594eca4100aceaf927c0e Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 28 Jun 2021 19:37:23 -0700
Subject: [PATCH 3148/3292] mm/memcg: cache vmstat data in percpu
 memcg_stock_pcp

Before the new slab memory controller with per object byte charging,
charging and vmstat data update happen only when new slab pages are
allocated or freed.  Now they are done with every kmem_cache_alloc() and
kmem_cache_free().  This causes additional overhead for workloads that
generate a lot of alloc and free calls.

The memcg_stock_pcp is used to cache byte charge for a specific obj_cgroup
to reduce that overhead.  To further reducing it, this patch makes the
vmstat data cached in the memcg_stock_pcp structure as well until it
accumulates a page size worth of update or when other cached data change.
Caching the vmstat data in the per-cpu stock eliminates two writes to
non-hot cachelines for memcg specific as well as memcg-lruvecs specific
vmstat data by a write to a hot local stock cacheline.

On a 2-socket Cascade Lake server with instrumentation enabled and this
patch applied, it was found that about 20% (634400 out of 3243830) of the
time when mod_objcg_state() is called leads to an actual call to
__mod_objcg_state() after initial boot.  When doing parallel kernel build,
the figure was about 17% (24329265 out of 142512465).  So caching the
vmstat data reduces the number of calls to __mod_objcg_state() by more
than 80%.

Link: https://lkml.kernel.org/r/20210506150007.16288-3-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Chris Down <chris@chrisdown.name>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 3 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7cd7187a017c..b4624580d18a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -782,8 +782,9 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
 	rcu_read_unlock();
 }
 
-void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
-		     enum node_stat_item idx, int nr)
+static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
+				     struct pglist_data *pgdat,
+				     enum node_stat_item idx, int nr)
 {
 	struct mem_cgroup *memcg;
 	struct lruvec *lruvec;
@@ -791,7 +792,7 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 	rcu_read_lock();
 	memcg = obj_cgroup_memcg(objcg);
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	mod_memcg_lruvec_state(lruvec, idx, nr);
+	__mod_memcg_lruvec_state(lruvec, idx, nr);
 	rcu_read_unlock();
 }
 
@@ -2059,7 +2060,10 @@ struct memcg_stock_pcp {
 
 #ifdef CONFIG_MEMCG_KMEM
 	struct obj_cgroup *cached_objcg;
+	struct pglist_data *cached_pgdat;
 	unsigned int nr_bytes;
+	int nr_slab_reclaimable_b;
+	int nr_slab_unreclaimable_b;
 #endif
 
 	struct work_struct work;
@@ -3008,6 +3012,67 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
 	obj_cgroup_put(objcg);
 }
 
+void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+		     enum node_stat_item idx, int nr)
+{
+	struct memcg_stock_pcp *stock;
+	unsigned long flags;
+	int *bytes;
+
+	local_irq_save(flags);
+	stock = this_cpu_ptr(&memcg_stock);
+
+	/*
+	 * Save vmstat data in stock and skip vmstat array update unless
+	 * accumulating over a page of vmstat data or when pgdat or idx
+	 * changes.
+	 */
+	if (stock->cached_objcg != objcg) {
+		drain_obj_stock(stock);
+		obj_cgroup_get(objcg);
+		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
+				? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
+		stock->cached_objcg = objcg;
+		stock->cached_pgdat = pgdat;
+	} else if (stock->cached_pgdat != pgdat) {
+		/* Flush the existing cached vmstat data */
+		if (stock->nr_slab_reclaimable_b) {
+			mod_objcg_mlstate(objcg, pgdat, NR_SLAB_RECLAIMABLE_B,
+					  stock->nr_slab_reclaimable_b);
+			stock->nr_slab_reclaimable_b = 0;
+		}
+		if (stock->nr_slab_unreclaimable_b) {
+			mod_objcg_mlstate(objcg, pgdat, NR_SLAB_UNRECLAIMABLE_B,
+					  stock->nr_slab_unreclaimable_b);
+			stock->nr_slab_unreclaimable_b = 0;
+		}
+		stock->cached_pgdat = pgdat;
+	}
+
+	bytes = (idx == NR_SLAB_RECLAIMABLE_B) ? &stock->nr_slab_reclaimable_b
+					       : &stock->nr_slab_unreclaimable_b;
+	/*
+	 * Even for large object >= PAGE_SIZE, the vmstat data will still be
+	 * cached locally at least once before pushing it out.
+	 */
+	if (!*bytes) {
+		*bytes = nr;
+		nr = 0;
+	} else {
+		*bytes += nr;
+		if (abs(*bytes) > PAGE_SIZE) {
+			nr = *bytes;
+			*bytes = 0;
+		} else {
+			nr = 0;
+		}
+	}
+	if (nr)
+		mod_objcg_mlstate(objcg, pgdat, idx, nr);
+
+	local_irq_restore(flags);
+}
+
 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 {
 	struct memcg_stock_pcp *stock;
@@ -3055,6 +3120,25 @@ static void drain_obj_stock(struct memcg_stock_pcp *stock)
 		stock->nr_bytes = 0;
 	}
 
+	/*
+	 * Flush the vmstat data in current stock
+	 */
+	if (stock->nr_slab_reclaimable_b || stock->nr_slab_unreclaimable_b) {
+		if (stock->nr_slab_reclaimable_b) {
+			mod_objcg_mlstate(old, stock->cached_pgdat,
+					  NR_SLAB_RECLAIMABLE_B,
+					  stock->nr_slab_reclaimable_b);
+			stock->nr_slab_reclaimable_b = 0;
+		}
+		if (stock->nr_slab_unreclaimable_b) {
+			mod_objcg_mlstate(old, stock->cached_pgdat,
+					  NR_SLAB_UNRECLAIMABLE_B,
+					  stock->nr_slab_unreclaimable_b);
+			stock->nr_slab_unreclaimable_b = 0;
+		}
+		stock->cached_pgdat = NULL;
+	}
+
 	obj_cgroup_put(old);
 	stock->cached_objcg = NULL;
 }

From 5387c90490f7f42df3209154ca955a453ee01b41 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 28 Jun 2021 19:37:27 -0700
Subject: [PATCH 3149/3292] mm/memcg: improve refill_obj_stock() performance

There are two issues with the current refill_obj_stock() code.  First of
all, when nr_bytes reaches over PAGE_SIZE, it calls drain_obj_stock() to
atomically flush out remaining bytes to obj_cgroup, clear cached_objcg and
do a obj_cgroup_put().  It is likely that the same obj_cgroup will be used
again which leads to another call to drain_obj_stock() and
obj_cgroup_get() as well as atomically retrieve the available byte from
obj_cgroup.  That is costly.  Instead, we should just uncharge the excess
pages, reduce the stock bytes and be done with it.  The drain_obj_stock()
function should only be called when obj_cgroup changes.

Secondly, when charging an object of size not less than a page in
obj_cgroup_charge(), it is possible that the remaining bytes to be
refilled to the stock will overflow a page and cause refill_obj_stock() to
uncharge 1 page.  To avoid the additional uncharge in this case, a new
allow_uncharge flag is added to refill_obj_stock() which will be set to
false when called from obj_cgroup_charge() so that an uncharge_pages()
call won't be issued right after a charge_pages() call unless the objcg
changes.

A multithreaded kmalloc+kfree microbenchmark on a 2-socket 48-core
96-thread x86-64 system with 96 testing threads were run.  Before this
patch, the total number of kilo kmalloc+kfree operations done for a 4k
large object by all the testing threads per second were 4,304 kops/s
(cgroup v1) and 8,478 kops/s (cgroup v2).  After applying this patch, the
number were 4,731 (cgroup v1) and 418,142 (cgroup v2) respectively.  This
represents a performance improvement of 1.10X (cgroup v1) and 49.3X
(cgroup v2).

Link: https://lkml.kernel.org/r/20210506150007.16288-4-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Chris Down <chris@chrisdown.name>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 48 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b4624580d18a..17d38c7f630f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3157,10 +3157,12 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 	return false;
 }
 
-static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
+			     bool allow_uncharge)
 {
 	struct memcg_stock_pcp *stock;
 	unsigned long flags;
+	unsigned int nr_pages = 0;
 
 	local_irq_save(flags);
 
@@ -3169,14 +3171,21 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 		drain_obj_stock(stock);
 		obj_cgroup_get(objcg);
 		stock->cached_objcg = objcg;
-		stock->nr_bytes = atomic_xchg(&objcg->nr_charged_bytes, 0);
+		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
+				? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
+		allow_uncharge = true;	/* Allow uncharge when objcg changes */
 	}
 	stock->nr_bytes += nr_bytes;
 
-	if (stock->nr_bytes > PAGE_SIZE)
-		drain_obj_stock(stock);
+	if (allow_uncharge && (stock->nr_bytes > PAGE_SIZE)) {
+		nr_pages = stock->nr_bytes >> PAGE_SHIFT;
+		stock->nr_bytes &= (PAGE_SIZE - 1);
+	}
 
 	local_irq_restore(flags);
+
+	if (nr_pages)
+		obj_cgroup_uncharge_pages(objcg, nr_pages);
 }
 
 int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
@@ -3188,14 +3197,27 @@ int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
 		return 0;
 
 	/*
-	 * In theory, memcg->nr_charged_bytes can have enough
+	 * In theory, objcg->nr_charged_bytes can have enough
 	 * pre-charged bytes to satisfy the allocation. However,
-	 * flushing memcg->nr_charged_bytes requires two atomic
-	 * operations, and memcg->nr_charged_bytes can't be big,
-	 * so it's better to ignore it and try grab some new pages.
-	 * memcg->nr_charged_bytes will be flushed in
-	 * refill_obj_stock(), called from this function or
-	 * independently later.
+	 * flushing objcg->nr_charged_bytes requires two atomic
+	 * operations, and objcg->nr_charged_bytes can't be big.
+	 * The shared objcg->nr_charged_bytes can also become a
+	 * performance bottleneck if all tasks of the same memcg are
+	 * trying to update it. So it's better to ignore it and try
+	 * grab some new pages. The stock's nr_bytes will be flushed to
+	 * objcg->nr_charged_bytes later on when objcg changes.
+	 *
+	 * The stock's nr_bytes may contain enough pre-charged bytes
+	 * to allow one less page from being charged, but we can't rely
+	 * on the pre-charged bytes not being changed outside of
+	 * consume_obj_stock() or refill_obj_stock(). So ignore those
+	 * pre-charged bytes as well when charging pages. To avoid a
+	 * page uncharge right after a page charge, we set the
+	 * allow_uncharge flag to false when calling refill_obj_stock()
+	 * to temporarily allow the pre-charged bytes to exceed the page
+	 * size limit. The maximum reachable value of the pre-charged
+	 * bytes is (sizeof(object) + PAGE_SIZE - 2) if there is no data
+	 * race.
 	 */
 	nr_pages = size >> PAGE_SHIFT;
 	nr_bytes = size & (PAGE_SIZE - 1);
@@ -3205,14 +3227,14 @@ int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
 
 	ret = obj_cgroup_charge_pages(objcg, gfp, nr_pages);
 	if (!ret && nr_bytes)
-		refill_obj_stock(objcg, PAGE_SIZE - nr_bytes);
+		refill_obj_stock(objcg, PAGE_SIZE - nr_bytes, false);
 
 	return ret;
 }
 
 void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
 {
-	refill_obj_stock(objcg, size);
+	refill_obj_stock(objcg, size, true);
 }
 
 #endif /* CONFIG_MEMCG_KMEM */

From 559271146efc0bf125e6390191f683eab884e4a1 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 28 Jun 2021 19:37:30 -0700
Subject: [PATCH 3150/3292] mm/memcg: optimize user context object stock access

Most kmem_cache_alloc() calls are from user context.  With instrumentation
enabled, the measured amount of kmem_cache_alloc() calls from non-task
context was about 0.01% of the total.

The irq disable/enable sequence used in this case to access content from
object stock is slow.  To optimize for user context access, there are now
two sets of object stocks (in the new obj_stock structure) for task
context and interrupt context access respectively.

The task context object stock can be accessed after disabling preemption
which is cheap in non-preempt kernel.  The interrupt context object stock
can only be accessed after disabling interrupt.  User context code can
access interrupt object stock, but not vice versa.

The downside of this change is that there are more data stored in local
object stocks and not reflected in the charge counter and the vmstat
arrays.  However, this is a small price to pay for better performance.

[longman@redhat.com: fix potential uninitialized variable warning]
  Link: https://lkml.kernel.org/r/20210526193602.8742-1-longman@redhat.com
[akpm@linux-foundation.org: coding style fixes]

Link: https://lkml.kernel.org/r/20210506150007.16288-5-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Roman Gushchin <guro@fb.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Chris Down <chris@chrisdown.name>
Cc: Yafang Shao <laoar.shao@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 100 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 72 insertions(+), 28 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 17d38c7f630f..97f76ce04eae 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -782,6 +782,10 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
 	rcu_read_unlock();
 }
 
+/*
+ * mod_objcg_mlstate() may be called with irq enabled, so
+ * mod_memcg_lruvec_state() should be used.
+ */
 static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
 				     struct pglist_data *pgdat,
 				     enum node_stat_item idx, int nr)
@@ -792,7 +796,7 @@ static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
 	rcu_read_lock();
 	memcg = obj_cgroup_memcg(objcg);
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	__mod_memcg_lruvec_state(lruvec, idx, nr);
+	mod_memcg_lruvec_state(lruvec, idx, nr);
 	rcu_read_unlock();
 }
 
@@ -2054,17 +2058,23 @@ void unlock_page_memcg(struct page *page)
 }
 EXPORT_SYMBOL(unlock_page_memcg);
 
-struct memcg_stock_pcp {
-	struct mem_cgroup *cached; /* this never be root cgroup */
-	unsigned int nr_pages;
-
+struct obj_stock {
 #ifdef CONFIG_MEMCG_KMEM
 	struct obj_cgroup *cached_objcg;
 	struct pglist_data *cached_pgdat;
 	unsigned int nr_bytes;
 	int nr_slab_reclaimable_b;
 	int nr_slab_unreclaimable_b;
+#else
+	int dummy[0];
 #endif
+};
+
+struct memcg_stock_pcp {
+	struct mem_cgroup *cached; /* this never be root cgroup */
+	unsigned int nr_pages;
+	struct obj_stock task_obj;
+	struct obj_stock irq_obj;
 
 	struct work_struct work;
 	unsigned long flags;
@@ -2074,12 +2084,12 @@ static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
 static DEFINE_MUTEX(percpu_charge_mutex);
 
 #ifdef CONFIG_MEMCG_KMEM
-static void drain_obj_stock(struct memcg_stock_pcp *stock);
+static void drain_obj_stock(struct obj_stock *stock);
 static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 				     struct mem_cgroup *root_memcg);
 
 #else
-static inline void drain_obj_stock(struct memcg_stock_pcp *stock)
+static inline void drain_obj_stock(struct obj_stock *stock)
 {
 }
 static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
@@ -2089,6 +2099,41 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 }
 #endif
 
+/*
+ * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
+ * sequence used in this case to access content from object stock is slow.
+ * To optimize for user context access, there are now two object stocks for
+ * task context and interrupt context access respectively.
+ *
+ * The task context object stock can be accessed by disabling preemption only
+ * which is cheap in non-preempt kernel. The interrupt context object stock
+ * can only be accessed after disabling interrupt. User context code can
+ * access interrupt object stock, but not vice versa.
+ */
+static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
+{
+	struct memcg_stock_pcp *stock;
+
+	if (likely(in_task())) {
+		*pflags = 0UL;
+		preempt_disable();
+		stock = this_cpu_ptr(&memcg_stock);
+		return &stock->task_obj;
+	}
+
+	local_irq_save(*pflags);
+	stock = this_cpu_ptr(&memcg_stock);
+	return &stock->irq_obj;
+}
+
+static inline void put_obj_stock(unsigned long flags)
+{
+	if (likely(in_task()))
+		preempt_enable();
+	else
+		local_irq_restore(flags);
+}
+
 /**
  * consume_stock: Try to consume stocked charge on this cpu.
  * @memcg: memcg to consume from.
@@ -2155,7 +2200,9 @@ static void drain_local_stock(struct work_struct *dummy)
 	local_irq_save(flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	drain_obj_stock(stock);
+	drain_obj_stock(&stock->irq_obj);
+	if (in_task())
+		drain_obj_stock(&stock->task_obj);
 	drain_stock(stock);
 	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
 
@@ -3015,13 +3062,10 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
 void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 		     enum node_stat_item idx, int nr)
 {
-	struct memcg_stock_pcp *stock;
 	unsigned long flags;
+	struct obj_stock *stock = get_obj_stock(&flags);
 	int *bytes;
 
-	local_irq_save(flags);
-	stock = this_cpu_ptr(&memcg_stock);
-
 	/*
 	 * Save vmstat data in stock and skip vmstat array update unless
 	 * accumulating over a page of vmstat data or when pgdat or idx
@@ -3070,29 +3114,26 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 	if (nr)
 		mod_objcg_mlstate(objcg, pgdat, idx, nr);
 
-	local_irq_restore(flags);
+	put_obj_stock(flags);
 }
 
 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 {
-	struct memcg_stock_pcp *stock;
 	unsigned long flags;
+	struct obj_stock *stock = get_obj_stock(&flags);
 	bool ret = false;
 
-	local_irq_save(flags);
-
-	stock = this_cpu_ptr(&memcg_stock);
 	if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
 		stock->nr_bytes -= nr_bytes;
 		ret = true;
 	}
 
-	local_irq_restore(flags);
+	put_obj_stock(flags);
 
 	return ret;
 }
 
-static void drain_obj_stock(struct memcg_stock_pcp *stock)
+static void drain_obj_stock(struct obj_stock *stock)
 {
 	struct obj_cgroup *old = stock->cached_objcg;
 
@@ -3148,8 +3189,13 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 {
 	struct mem_cgroup *memcg;
 
-	if (stock->cached_objcg) {
-		memcg = obj_cgroup_memcg(stock->cached_objcg);
+	if (in_task() && stock->task_obj.cached_objcg) {
+		memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
+		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
+			return true;
+	}
+	if (stock->irq_obj.cached_objcg) {
+		memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
 		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
 			return true;
 	}
@@ -3160,13 +3206,10 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 			     bool allow_uncharge)
 {
-	struct memcg_stock_pcp *stock;
 	unsigned long flags;
+	struct obj_stock *stock = get_obj_stock(&flags);
 	unsigned int nr_pages = 0;
 
-	local_irq_save(flags);
-
-	stock = this_cpu_ptr(&memcg_stock);
 	if (stock->cached_objcg != objcg) { /* reset if necessary */
 		drain_obj_stock(stock);
 		obj_cgroup_get(objcg);
@@ -3182,7 +3225,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 		stock->nr_bytes &= (PAGE_SIZE - 1);
 	}
 
-	local_irq_restore(flags);
+	put_obj_stock(flags);
 
 	if (nr_pages)
 		obj_cgroup_uncharge_pages(objcg, nr_pages);
@@ -6790,6 +6833,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 	unsigned long nr_pages;
 	struct mem_cgroup *memcg;
 	struct obj_cgroup *objcg;
+	bool use_objcg = PageMemcgKmem(page);
 
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 
@@ -6798,7 +6842,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 	 * page memcg or objcg at this point, we have fully
 	 * exclusive access to the page.
 	 */
-	if (PageMemcgKmem(page)) {
+	if (use_objcg) {
 		objcg = __page_objcg(page);
 		/*
 		 * This get matches the put at the end of the function and
@@ -6826,7 +6870,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 
 	nr_pages = compound_nr(page);
 
-	if (PageMemcgKmem(page)) {
+	if (use_objcg) {
 		ug->nr_memory += nr_pages;
 		ug->nr_kmem += nr_pages;
 

From 41eb5df1cbc9b302fc263ad7c9f38cfc38b4df61 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 28 Jun 2021 19:37:34 -0700
Subject: [PATCH 3151/3292] mm: memcg/slab: properly set up gfp flags for objcg
 pointer array

Patch series "mm: memcg/slab: Fix objcg pointer array handling problem", v4.

Since the merging of the new slab memory controller in v5.9, the page
structure stores a pointer to objcg pointer array for slab pages.  When
the slab has no used objects, it can be freed in free_slab() which will
call kfree() to free the objcg pointer array in
memcg_alloc_page_obj_cgroups().  If it happens that the objcg pointer
array is the last used object in its slab, that slab may then be freed
which may caused kfree() to be called again.

With the right workload, the slab cache may be set up in a way that allows
the recursive kfree() calling loop to nest deep enough to cause a kernel
stack overflow and panic the system.  In fact, we have a reproducer that
can cause kernel stack overflow on a s390 system involving kmalloc-rcl-256
and kmalloc-rcl-128 slabs with the following kfree() loop recursively
called 74 times:

  [ 285.520739] [<000000000ec432fc>] kfree+0x4bc/0x560 [ 285.520740]
[<000000000ec43466>] __free_slab+0xc6/0x228 [ 285.520741]
[<000000000ec41fc2>] __slab_free+0x3c2/0x3e0 [ 285.520742]
[<000000000ec432fc>] kfree+0x4bc/0x560 : While investigating this issue, I
also found an issue on the allocation side.  If the objcg pointer array
happen to come from the same slab or a circular dependency linkage is
formed with multiple slabs, those affected slabs can never be freed again.

This patch series addresses these two issues by introducing a new set of
kmalloc-cg-<n> caches split from kmalloc-<n> caches.  The new set will
only contain non-reclaimable and non-dma objects that are accounted in
memory cgroups whereas the old set are now for unaccounted objects only.
By making this split, all the objcg pointer arrays will come from the
kmalloc-<n> caches, but those caches will never hold any objcg pointer
array.  As a result, deeply nested kfree() call and the unfreeable slab
problems are now gone.

This patch (of 4):

Since the merging of the new slab memory controller in v5.9, the page
structure may store a pointer to obj_cgroup pointer array for slab pages.
Currently, only the __GFP_ACCOUNT bit is masked off.  However, the array
is not readily reclaimable and doesn't need to come from the DMA buffer.
So those GFP bits should be masked off as well.

Do the flag bit clearing at memcg_alloc_page_obj_cgroups() to make sure
that it is consistently applied no matter where it is called.

Link: https://lkml.kernel.org/r/20210505200610.13943-1-longman@redhat.com
Link: https://lkml.kernel.org/r/20210505200610.13943-2-longman@redhat.com
Fixes: 286e04b8ed7a ("mm: memcg/slab: allocate obj_cgroups for non-root slab pages")
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 8 ++++++++
 mm/slab.h       | 1 -
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 97f76ce04eae..2508bd97349c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2803,6 +2803,13 @@ retry:
 }
 
 #ifdef CONFIG_MEMCG_KMEM
+/*
+ * The allocated objcg pointers array is not accounted directly.
+ * Moreover, it should not come from DMA buffer and is not readily
+ * reclaimable. So those GFP bits should be masked off.
+ */
+#define OBJCGS_CLEAR_MASK	(__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+
 int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
 				 gfp_t gfp, bool new_page)
 {
@@ -2810,6 +2817,7 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
 	unsigned long memcg_data;
 	void *vec;
 
+	gfp &= ~OBJCGS_CLEAR_MASK;
 	vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
 			   page_to_nid(page));
 	if (!vec)
diff --git a/mm/slab.h b/mm/slab.h
index f2c32f24da95..7b60ef2f32c3 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -298,7 +298,6 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 	if (!memcg_kmem_enabled() || !objcg)
 		return;
 
-	flags &= ~__GFP_ACCOUNT;
 	for (i = 0; i < size; i++) {
 		if (likely(p[i])) {
 			page = virt_to_head_page(p[i]);

From 494c1dfe855ec1f70f89552fce5eadf4a1717552 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 28 Jun 2021 19:37:38 -0700
Subject: [PATCH 3152/3292] mm: memcg/slab: create a new set of kmalloc-cg-<n>
 caches

There are currently two problems in the way the objcg pointer array
(memcg_data) in the page structure is being allocated and freed.

On its allocation, it is possible that the allocated objcg pointer
array comes from the same slab that requires memory accounting. If this
happens, the slab will never become empty again as there is at least
one object left (the obj_cgroup array) in the slab.

When it is freed, the objcg pointer array object may be the last one
in its slab and hence causes kfree() to be called again. With the
right workload, the slab cache may be set up in a way that allows the
recursive kfree() calling loop to nest deep enough to cause a kernel
stack overflow and panic the system.

One way to solve this problem is to split the kmalloc-<n> caches
(KMALLOC_NORMAL) into two separate sets - a new set of kmalloc-<n>
(KMALLOC_NORMAL) caches for unaccounted objects only and a new set of
kmalloc-cg-<n> (KMALLOC_CGROUP) caches for accounted objects only. All
the other caches can still allow a mix of accounted and unaccounted
objects.

With this change, all the objcg pointer array objects will come from
KMALLOC_NORMAL caches which won't have their objcg pointer arrays. So
both the recursive kfree() problem and non-freeable slab problem are
gone.

Since both the KMALLOC_NORMAL and KMALLOC_CGROUP caches no longer have
mixed accounted and unaccounted objects, this will slightly reduce the
number of objcg pointer arrays that need to be allocated and save a bit
of memory. On the other hand, creating a new set of kmalloc caches does
have the effect of reducing cache utilization. So it is properly a wash.

The new KMALLOC_CGROUP is added between KMALLOC_NORMAL and
KMALLOC_RECLAIM so that the first for loop in create_kmalloc_caches()
will include the newly added caches without change.

[vbabka@suse.cz: don't create kmalloc-cg caches with cgroup.memory=nokmem]
  Link: https://lkml.kernel.org/r/20210512145107.6208-1-longman@redhat.com
[akpm@linux-foundation.org: un-fat-finger v5 delta creation]
[longman@redhat.com: disable cache merging for KMALLOC_NORMAL caches]
  Link: https://lkml.kernel.org/r/20210505200610.13943-4-longman@redhat.com

Link: https://lkml.kernel.org/r/20210512145107.6208-1-longman@redhat.com
Link: https://lkml.kernel.org/r/20210505200610.13943-3-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
[longman@redhat.com: fix for CONFIG_ZONE_DMA=n]
Suggested-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 42 +++++++++++++++++++++++++++++++++---------
 mm/internal.h        |  5 +++++
 mm/memcontrol.c      |  2 +-
 mm/slab_common.c     | 32 +++++++++++++++++++++++---------
 4 files changed, 62 insertions(+), 19 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index bc9ab3a5a017..083f3ce550bc 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -305,9 +305,21 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
 /*
  * Whenever changing this, take care of that kmalloc_type() and
  * create_kmalloc_caches() still work as intended.
+ *
+ * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP
+ * is for accounted but unreclaimable and non-dma objects. All the other
+ * kmem caches can have both accounted and unaccounted objects.
  */
 enum kmalloc_cache_type {
 	KMALLOC_NORMAL = 0,
+#ifndef CONFIG_ZONE_DMA
+	KMALLOC_DMA = KMALLOC_NORMAL,
+#endif
+#ifndef CONFIG_MEMCG_KMEM
+	KMALLOC_CGROUP = KMALLOC_NORMAL,
+#else
+	KMALLOC_CGROUP,
+#endif
 	KMALLOC_RECLAIM,
 #ifdef CONFIG_ZONE_DMA
 	KMALLOC_DMA,
@@ -319,24 +331,36 @@ enum kmalloc_cache_type {
 extern struct kmem_cache *
 kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
 
+/*
+ * Define gfp bits that should not be set for KMALLOC_NORMAL.
+ */
+#define KMALLOC_NOT_NORMAL_BITS					\
+	(__GFP_RECLAIMABLE |					\
+	(IS_ENABLED(CONFIG_ZONE_DMA)   ? __GFP_DMA : 0) |	\
+	(IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
+
 static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
 {
-#ifdef CONFIG_ZONE_DMA
 	/*
 	 * The most common case is KMALLOC_NORMAL, so test for it
-	 * with a single branch for both flags.
+	 * with a single branch for all the relevant flags.
 	 */
-	if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0))
+	if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
 		return KMALLOC_NORMAL;
 
 	/*
-	 * At least one of the flags has to be set. If both are, __GFP_DMA
-	 * is more important.
+	 * At least one of the flags has to be set. Their priorities in
+	 * decreasing order are:
+	 *  1) __GFP_DMA
+	 *  2) __GFP_RECLAIMABLE
+	 *  3) __GFP_ACCOUNT
 	 */
-	return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM;
-#else
-	return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL;
-#endif
+	if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA))
+		return KMALLOC_DMA;
+	if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE))
+		return KMALLOC_RECLAIM;
+	else
+		return KMALLOC_CGROUP;
 }
 
 /*
diff --git a/mm/internal.h b/mm/internal.h
index e8fdb531f887..2946dfa0f245 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -115,6 +115,11 @@ extern void putback_lru_page(struct page *page);
  */
 extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
 
+/*
+ * in mm/memcontrol.c:
+ */
+extern bool cgroup_memory_nokmem;
+
 /*
  * in mm/page_alloc.c
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2508bd97349c..b913950b9f64 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -83,7 +83,7 @@ DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 static bool cgroup_memory_nosocket;
 
 /* Kernel memory accounting disabled? */
-static bool cgroup_memory_nokmem;
+bool cgroup_memory_nokmem;
 
 /* Whether the swap controller is active */
 #ifdef CONFIG_MEMCG_SWAP
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6c0db9f9bd8a..db3f356bf725 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -738,21 +738,25 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
 }
 
 #ifdef CONFIG_ZONE_DMA
-#define INIT_KMALLOC_INFO(__size, __short_size)			\
-{								\
-	.name[KMALLOC_NORMAL]  = "kmalloc-" #__short_size,	\
-	.name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size,	\
-	.name[KMALLOC_DMA]     = "dma-kmalloc-" #__short_size,	\
-	.size = __size,						\
-}
+#define KMALLOC_DMA_NAME(sz)	.name[KMALLOC_DMA] = "dma-kmalloc-" #sz,
 #else
+#define KMALLOC_DMA_NAME(sz)
+#endif
+
+#ifdef CONFIG_MEMCG_KMEM
+#define KMALLOC_CGROUP_NAME(sz)	.name[KMALLOC_CGROUP] = "kmalloc-cg-" #sz,
+#else
+#define KMALLOC_CGROUP_NAME(sz)
+#endif
+
 #define INIT_KMALLOC_INFO(__size, __short_size)			\
 {								\
 	.name[KMALLOC_NORMAL]  = "kmalloc-" #__short_size,	\
 	.name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size,	\
+	KMALLOC_CGROUP_NAME(__short_size)			\
+	KMALLOC_DMA_NAME(__short_size)				\
 	.size = __size,						\
 }
-#endif
 
 /*
  * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
@@ -838,8 +842,15 @@ void __init setup_kmalloc_cache_index_table(void)
 static void __init
 new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
 {
-	if (type == KMALLOC_RECLAIM)
+	if (type == KMALLOC_RECLAIM) {
 		flags |= SLAB_RECLAIM_ACCOUNT;
+	} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
+		if (cgroup_memory_nokmem) {
+			kmalloc_caches[type][idx] = kmalloc_caches[KMALLOC_NORMAL][idx];
+			return;
+		}
+		flags |= SLAB_ACCOUNT;
+	}
 
 	kmalloc_caches[type][idx] = create_kmalloc_cache(
 					kmalloc_info[idx].name[type],
@@ -857,6 +868,9 @@ void __init create_kmalloc_caches(slab_flags_t flags)
 	int i;
 	enum kmalloc_cache_type type;
 
+	/*
+	 * Including KMALLOC_CGROUP if CONFIG_MEMCG_KMEM defined
+	 */
 	for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
 		for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
 			if (!kmalloc_caches[type][i])

From 13e680fb6a1e7749ef4f4824ed883684ceb838df Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 28 Jun 2021 19:37:41 -0700
Subject: [PATCH 3153/3292] mm: memcg/slab: disable cache merging for
 KMALLOC_NORMAL caches

The KMALLOC_NORMAL (kmalloc-<n>) caches are for unaccounted objects only
when CONFIG_MEMCG_KMEM is enabled.  To make sure that this condition
remains true, we will have to prevent KMALOC_NORMAL caches to merge with
other kmem caches.  This is now done by setting its refcount to -1 right
after its creation.

Link: https://lkml.kernel.org/r/20210505200610.13943-4-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Suggested-by: Roman Gushchin <guro@fb.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab_common.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index db3f356bf725..c126e6f6b5a5 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -856,6 +856,13 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
 					kmalloc_info[idx].name[type],
 					kmalloc_info[idx].size, flags, 0,
 					kmalloc_info[idx].size);
+
+	/*
+	 * If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
+	 * KMALLOC_NORMAL caches.
+	 */
+	if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_NORMAL))
+		kmalloc_caches[type][idx]->refcount = -1;
 }
 
 /*

From c5c8b16b596e15471db22ed8ed10aafbf1a11878 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:37:44 -0700
Subject: [PATCH 3154/3292] mm: memcontrol: fix root_mem_cgroup charging

The below scenario can cause the page counters of the root_mem_cgroup to
be out of balance.

CPU0:                                   CPU1:

objcg = get_obj_cgroup_from_current()
obj_cgroup_charge_pages(objcg)
                                        memcg_reparent_objcgs()
                                            // reparent to root_mem_cgroup
                                            WRITE_ONCE(iter->memcg, parent)
    // memcg == root_mem_cgroup
    memcg = get_mem_cgroup_from_objcg(objcg)
    // do not charge to the root_mem_cgroup
    try_charge(memcg)

obj_cgroup_uncharge_pages(objcg)
    memcg = get_mem_cgroup_from_objcg(objcg)
    // uncharge from the root_mem_cgroup
    refill_stock(memcg)
        drain_stock(memcg)
            page_counter_uncharge(&memcg->memory)

get_obj_cgroup_from_current() never returns a root_mem_cgroup's objcg, so
we never explicitly charge the root_mem_cgroup.  And it's not going to
change.  It's all about a race when we got an obj_cgroup pointing at some
non-root memcg, but before we were able to charge it, the cgroup was gone,
objcg was reparented to the root and so we're skipping the charging.  Then
we store the objcg pointer and later use to uncharge the root_mem_cgroup.

This can cause the page counter to be less than the actual value.
Although we do not display the value (mem_cgroup_usage) so there shouldn't
be any actual problem, but there is a WARN_ON_ONCE in the
page_counter_cancel().  Who knows if it will trigger?  So it is better to
fix it.

Link: https://lkml.kernel.org/r/20210425075410.19255-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b913950b9f64..70690fdf53cc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2568,8 +2568,8 @@ out:
 	css_put(&memcg->css);
 }
 
-static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
-		      unsigned int nr_pages)
+static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
+			unsigned int nr_pages)
 {
 	unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages);
 	int nr_retries = MAX_RECLAIM_RETRIES;
@@ -2581,8 +2581,6 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	bool drained = false;
 	unsigned long pflags;
 
-	if (mem_cgroup_is_root(memcg))
-		return 0;
 retry:
 	if (consume_stock(memcg, nr_pages))
 		return 0;
@@ -2762,6 +2760,15 @@ done_restock:
 	return 0;
 }
 
+static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+			     unsigned int nr_pages)
+{
+	if (mem_cgroup_is_root(memcg))
+		return 0;
+
+	return try_charge_memcg(memcg, gfp_mask, nr_pages);
+}
+
 #if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MMU)
 static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
@@ -2997,7 +3004,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
 
 	memcg = get_mem_cgroup_from_objcg(objcg);
 
-	ret = try_charge(memcg, gfp, nr_pages);
+	ret = try_charge_memcg(memcg, gfp, nr_pages);
 	if (ret)
 		goto out;
 

From 8dc87c7d1fec8851925ca96ade0d65d3dcf89cce Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:37:47 -0700
Subject: [PATCH 3155/3292] mm: memcontrol: fix page charging in page
 replacement

Patch series "memcontrol code cleanup and simplification", v3.

This patch (of 8):

The pages aren't accounted at the root level, so do not charge the page to
the root memcg in page replacement.  Although we do not display the value
(mem_cgroup_usage) so there shouldn't be any actual problem, but there is
a WARN_ON_ONCE in the page_counter_cancel().  Who knows if it will
trigger?  So it is better to fix it.

Link: https://lkml.kernel.org/r/20210417043538.9793-1-songmuchun@bytedance.com
Link: https://lkml.kernel.org/r/20210417043538.9793-2-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 70690fdf53cc..239f69ed1ac1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6984,9 +6984,11 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 	/* Force-charge the new page. The old one will be freed soon */
 	nr_pages = thp_nr_pages(newpage);
 
-	page_counter_charge(&memcg->memory, nr_pages);
-	if (do_memsw_account())
-		page_counter_charge(&memcg->memsw, nr_pages);
+	if (!mem_cgroup_is_root(memcg)) {
+		page_counter_charge(&memcg->memory, nr_pages);
+		if (do_memsw_account())
+			page_counter_charge(&memcg->memsw, nr_pages);
+	}
 
 	css_get(&memcg->css);
 	commit_charge(newpage, memcg);

From 2884b6b7eed4fc14c0630fb16e56a4c66c786d33 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:37:50 -0700
Subject: [PATCH 3156/3292] mm: memcontrol: bail out early when !mm in
 get_mem_cgroup_from_mm

When mm is NULL, we do not need to hold rcu lock and call css_tryget for
the root memcg.  And we also do not need to check !mm in every loop of
while.  So bail out early when !mm.

Link: https://lkml.kernel.org/r/20210417043538.9793-3-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 239f69ed1ac1..babbaf49ee36 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -919,20 +919,23 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 	if (mem_cgroup_disabled())
 		return NULL;
 
+	/*
+	 * Page cache insertions can happen without an
+	 * actual mm context, e.g. during disk probing
+	 * on boot, loopback IO, acct() writes etc.
+	 *
+	 * No need to css_get on root memcg as the reference
+	 * counting is disabled on the root level in the
+	 * cgroup core. See CSS_NO_REF.
+	 */
+	if (unlikely(!mm))
+		return root_mem_cgroup;
+
 	rcu_read_lock();
 	do {
-		/*
-		 * Page cache insertions can happen without an
-		 * actual mm context, e.g. during disk probing
-		 * on boot, loopback IO, acct() writes etc.
-		 */
-		if (unlikely(!mm))
+		memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+		if (unlikely(!memcg))
 			memcg = root_mem_cgroup;
-		else {
-			memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-			if (unlikely(!memcg))
-				memcg = root_mem_cgroup;
-		}
 	} while (!css_tryget(&memcg->css));
 	rcu_read_unlock();
 	return memcg;

From a984226f457f849eb9c4ce727eeaa3b5080597d8 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:37:53 -0700
Subject: [PATCH 3157/3292] mm: memcontrol: remove the pgdata parameter of
 mem_cgroup_page_lruvec

All the callers of mem_cgroup_page_lruvec() just pass page_pgdat(page) as
the 2nd parameter to it (except isolate_migratepages_block()).  But for
isolate_migratepages_block(), the page_pgdat(page) is also equal to the
local variable of @pgdat.  So mem_cgroup_page_lruvec() do not need the
pgdat parameter.  Just remove it to simplify the code.

Link: https://lkml.kernel.org/r/20210417043538.9793-4-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 +++++-----
 mm/compaction.c            |  2 +-
 mm/memcontrol.c            |  9 +++------
 mm/swap.c                  |  2 +-
 mm/workingset.c            |  2 +-
 5 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c193be760709..f2a5aaba3577 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -743,13 +743,12 @@ out:
 /**
  * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
  * @page: the page
- * @pgdat: pgdat of the page
  *
  * This function relies on page->mem_cgroup being stable.
  */
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
-						struct pglist_data *pgdat)
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
 {
+	pg_data_t *pgdat = page_pgdat(page);
 	struct mem_cgroup *memcg = page_memcg(page);
 
 	VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
@@ -1221,9 +1220,10 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
 	return &pgdat->__lruvec;
 }
 
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
-						    struct pglist_data *pgdat)
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
 {
+	pg_data_t *pgdat = page_pgdat(page);
+
 	return &pgdat->__lruvec;
 }
 
diff --git a/mm/compaction.c b/mm/compaction.c
index 84fde270ae74..7d41b58fb17c 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1028,7 +1028,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (!TestClearPageLRU(page))
 			goto isolate_fail_put;
 
-		lruvec = mem_cgroup_page_lruvec(page, pgdat);
+		lruvec = mem_cgroup_page_lruvec(page);
 
 		/* If we already hold the lock, we can skip some rechecking */
 		if (lruvec != locked) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index babbaf49ee36..946a9a483e71 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1199,9 +1199,8 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
 struct lruvec *lock_page_lruvec(struct page *page)
 {
 	struct lruvec *lruvec;
-	struct pglist_data *pgdat = page_pgdat(page);
 
-	lruvec = mem_cgroup_page_lruvec(page, pgdat);
+	lruvec = mem_cgroup_page_lruvec(page);
 	spin_lock(&lruvec->lru_lock);
 
 	lruvec_memcg_debug(lruvec, page);
@@ -1212,9 +1211,8 @@ struct lruvec *lock_page_lruvec(struct page *page)
 struct lruvec *lock_page_lruvec_irq(struct page *page)
 {
 	struct lruvec *lruvec;
-	struct pglist_data *pgdat = page_pgdat(page);
 
-	lruvec = mem_cgroup_page_lruvec(page, pgdat);
+	lruvec = mem_cgroup_page_lruvec(page);
 	spin_lock_irq(&lruvec->lru_lock);
 
 	lruvec_memcg_debug(lruvec, page);
@@ -1225,9 +1223,8 @@ struct lruvec *lock_page_lruvec_irq(struct page *page)
 struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
 {
 	struct lruvec *lruvec;
-	struct pglist_data *pgdat = page_pgdat(page);
 
-	lruvec = mem_cgroup_page_lruvec(page, pgdat);
+	lruvec = mem_cgroup_page_lruvec(page);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
 
 	lruvec_memcg_debug(lruvec, page);
diff --git a/mm/swap.c b/mm/swap.c
index dfb48cf9c2c9..18cc9e63515b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -313,7 +313,7 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
 
 void lru_note_cost_page(struct page *page)
 {
-	lru_note_cost(mem_cgroup_page_lruvec(page, page_pgdat(page)),
+	lru_note_cost(mem_cgroup_page_lruvec(page),
 		      page_is_file_lru(page), thp_nr_pages(page));
 }
 
diff --git a/mm/workingset.c b/mm/workingset.c
index b7cdeca5a76d..4f7a306ce75a 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -408,7 +408,7 @@ void workingset_activation(struct page *page)
 	memcg = page_memcg_rcu(page);
 	if (!mem_cgroup_disabled() && !memcg)
 		goto out;
-	lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+	lruvec = mem_cgroup_page_lruvec(page);
 	workingset_age_nonresident(lruvec, thp_nr_pages(page));
 out:
 	rcu_read_unlock();

From f2e4d28dd9f6478dd54d47b91edc3fe62c019968 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:37:56 -0700
Subject: [PATCH 3158/3292] mm: memcontrol: simplify lruvec_holds_page_lru_lock

We already have a helper lruvec_memcg() to get the memcg from lruvec, we
do not need to do it ourselves in the lruvec_holds_page_lru_lock().  So
use lruvec_memcg() instead.  And if mem_cgroup_disabled() returns false,
the page_memcg(page) (the LRU pages) cannot be NULL.  So remove the odd
logic of "memcg = page_memcg(page) ?  : root_mem_cgroup".  And use
lruvec_pgdat to simplify the code.  We can have a single definition for
this function that works for !CONFIG_MEMCG, CONFIG_MEMCG +
mem_cgroup_disabled() and CONFIG_MEMCG.

Link: https://lkml.kernel.org/r/20210417043538.9793-5-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 31 +++++++------------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f2a5aaba3577..2fc728492c9b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -755,22 +755,6 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
 	return mem_cgroup_lruvec(memcg, pgdat);
 }
 
-static inline bool lruvec_holds_page_lru_lock(struct page *page,
-					      struct lruvec *lruvec)
-{
-	pg_data_t *pgdat = page_pgdat(page);
-	const struct mem_cgroup *memcg;
-	struct mem_cgroup_per_node *mz;
-
-	if (mem_cgroup_disabled())
-		return lruvec == &pgdat->__lruvec;
-
-	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-	memcg = page_memcg(page) ? : root_mem_cgroup;
-
-	return lruvec->pgdat == pgdat && mz->memcg == memcg;
-}
-
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
@@ -1227,14 +1211,6 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
 	return &pgdat->__lruvec;
 }
 
-static inline bool lruvec_holds_page_lru_lock(struct page *page,
-					      struct lruvec *lruvec)
-{
-	pg_data_t *pgdat = page_pgdat(page);
-
-	return lruvec == &pgdat->__lruvec;
-}
-
 static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
 {
 }
@@ -1516,6 +1492,13 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
 }
 
+static inline bool lruvec_holds_page_lru_lock(struct page *page,
+					      struct lruvec *lruvec)
+{
+	return lruvec_pgdat(lruvec) == page_pgdat(page) &&
+	       lruvec_memcg(lruvec) == page_memcg(page);
+}
+
 /* Don't lock again iff page's lruvec locked */
 static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
 		struct lruvec *locked_lruvec)

From 7467c39128bda1d58af08aaeb0c7ba54d0ec87ae Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:37:59 -0700
Subject: [PATCH 3159/3292] mm: memcontrol: rename lruvec_holds_page_lru_lock
 to page_matches_lruvec

lruvec_holds_page_lru_lock() doesn't check anything about locking and is
used to check whether the page belongs to the lruvec.  So rename it to
page_matches_lruvec().

Link: https://lkml.kernel.org/r/20210417043538.9793-6-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 8 ++++----
 mm/vmscan.c                | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2fc728492c9b..0ce97eff79e2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1492,8 +1492,8 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
 }
 
-static inline bool lruvec_holds_page_lru_lock(struct page *page,
-					      struct lruvec *lruvec)
+/* Test requires a stable page->memcg binding, see page_memcg() */
+static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
 {
 	return lruvec_pgdat(lruvec) == page_pgdat(page) &&
 	       lruvec_memcg(lruvec) == page_memcg(page);
@@ -1504,7 +1504,7 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
 		struct lruvec *locked_lruvec)
 {
 	if (locked_lruvec) {
-		if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+		if (page_matches_lruvec(page, locked_lruvec))
 			return locked_lruvec;
 
 		unlock_page_lruvec_irq(locked_lruvec);
@@ -1518,7 +1518,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
 		struct lruvec *locked_lruvec, unsigned long *flags)
 {
 	if (locked_lruvec) {
-		if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+		if (page_matches_lruvec(page, locked_lruvec))
 			return locked_lruvec;
 
 		unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5199b9696bab..ec93d4fd5a6b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2063,7 +2063,7 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 		 * All pages were isolated from the same lruvec (and isolation
 		 * inhibits memcg migration).
 		 */
-		VM_BUG_ON_PAGE(!lruvec_holds_page_lru_lock(page, lruvec), page);
+		VM_BUG_ON_PAGE(!page_matches_lruvec(page, lruvec), page);
 		add_page_to_lru_list(page, lruvec);
 		nr_pages = thp_nr_pages(page);
 		nr_moved += nr_pages;

From 9838354e16a2a920d5a228559850d10fa588a18d Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:38:03 -0700
Subject: [PATCH 3160/3292] mm: memcontrol: simplify the logic of objcg pinning
 memcg

The obj_cgroup_release() and memcg_reparent_objcgs() are serialized by the
css_set_lock.  We do not need to care about objcg->memcg being released in
the process of obj_cgroup_release().  So there is no need to pin memcg
before releasing objcg.  Remove those pinning logic to simplfy the code.

There are only two places that modifies the objcg->memcg.  One is the
initialization to objcg->memcg in the memcg_online_kmem(), another is
objcgs reparenting in the memcg_reparent_objcgs().  It is also impossible
for the two to run in parallel.  So xchg() is unnecessary and it is enough
to use WRITE_ONCE().

Link: https://lkml.kernel.org/r/20210417043538.9793-7-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 946a9a483e71..c79b6926fe83 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -261,7 +261,6 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
 static void obj_cgroup_release(struct percpu_ref *ref)
 {
 	struct obj_cgroup *objcg = container_of(ref, struct obj_cgroup, refcnt);
-	struct mem_cgroup *memcg;
 	unsigned int nr_bytes;
 	unsigned int nr_pages;
 	unsigned long flags;
@@ -291,11 +290,9 @@ static void obj_cgroup_release(struct percpu_ref *ref)
 	nr_pages = nr_bytes >> PAGE_SHIFT;
 
 	spin_lock_irqsave(&css_set_lock, flags);
-	memcg = obj_cgroup_memcg(objcg);
 	if (nr_pages)
 		obj_cgroup_uncharge_pages(objcg, nr_pages);
 	list_del(&objcg->list);
-	mem_cgroup_put(memcg);
 	spin_unlock_irqrestore(&css_set_lock, flags);
 
 	percpu_ref_exit(ref);
@@ -330,17 +327,12 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
 
 	spin_lock_irq(&css_set_lock);
 
-	/* Move active objcg to the parent's list */
-	xchg(&objcg->memcg, parent);
-	css_get(&parent->css);
-	list_add(&objcg->list, &parent->objcg_list);
-
-	/* Move already reparented objcgs to the parent's list */
-	list_for_each_entry(iter, &memcg->objcg_list, list) {
-		css_get(&parent->css);
-		xchg(&iter->memcg, parent);
-		css_put(&memcg->css);
-	}
+	/* 1) Ready to reparent active objcg. */
+	list_add(&objcg->list, &memcg->objcg_list);
+	/* 2) Reparent active objcg and already reparented objcgs to parent. */
+	list_for_each_entry(iter, &memcg->objcg_list, list)
+		WRITE_ONCE(iter->memcg, parent);
+	/* 3) Move already reparented objcgs to the parent's list */
 	list_splice(&memcg->objcg_list, &parent->objcg_list);
 
 	spin_unlock_irq(&css_set_lock);

From 271dd6b1f636a99a3a77889935296c063f5a3cbe Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:38:06 -0700
Subject: [PATCH 3161/3292] mm: memcontrol: move obj_cgroup_uncharge_pages()
 out of css_set_lock

The css_set_lock is used to guard the list of inherited objcgs.  So there
is no need to uncharge kernel memory under css_set_lock.  Just move it out
of the lock.

Link: https://lkml.kernel.org/r/20210417043538.9793-8-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c79b6926fe83..f7a552eb3e9d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -289,9 +289,10 @@ static void obj_cgroup_release(struct percpu_ref *ref)
 	WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1));
 	nr_pages = nr_bytes >> PAGE_SHIFT;
 
-	spin_lock_irqsave(&css_set_lock, flags);
 	if (nr_pages)
 		obj_cgroup_uncharge_pages(objcg, nr_pages);
+
+	spin_lock_irqsave(&css_set_lock, flags);
 	list_del(&objcg->list);
 	spin_unlock_irqrestore(&css_set_lock, flags);
 

From 9ef56b78b888c2fa15b6140fbdb88853d7d4fecd Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 28 Jun 2021 19:38:09 -0700
Subject: [PATCH 3162/3292] mm: vmscan: remove noinline_for_stack

The noinline_for_stack is introduced by commit 666356297ec4 ("vmscan: set
up pagevec as late as possible in shrink_inactive_list()"), its purpose is
to delay the allocation of pagevec as late as possible to save stack
memory.  But the commit 2bcf88796381 ("mm: take pagevecs off reclaim
stack") replace pagevecs by lists of pages_to_free.  So we do not need
noinline_for_stack, just remove it (let the compiler decide whether to
inline).

Link: https://lkml.kernel.org/r/20210417043538.9793-9-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index ec93d4fd5a6b..f96d62159720 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2015,8 +2015,8 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
  *
  * Returns the number of pages moved to the given lruvec.
  */
-static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
-						     struct list_head *list)
+static unsigned int move_pages_to_lru(struct lruvec *lruvec,
+				      struct list_head *list)
 {
 	int nr_pages, nr_moved = 0;
 	LIST_HEAD(pages_to_free);
@@ -2096,7 +2096,7 @@ static int current_may_throttle(void)
  * shrink_inactive_list() is a helper for shrink_node().  It returns the number
  * of reclaimed pages
  */
-static noinline_for_stack unsigned long
+static unsigned long
 shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		     struct scan_control *sc, enum lru_list lru)
 {

From b51478a0b3c7040bfcadf6e2e04df5ddde59fd98 Mon Sep 17 00:00:00 2001
From: wenhuizhang <wenhui@gwmail.gwu.edu>
Date: Mon, 28 Jun 2021 19:38:12 -0700
Subject: [PATCH 3163/3292] memcontrol: use flexible-array member

Change deprecated zero-length-and-one-element-arrays into flexible array
member.Zero-length and one-element arrays detected by Lukas's CodeChecker.
Zero/one element arrays cause undefined behaviours if sizeof() used.

Link: https://lkml.kernel.org/r/20210518200910.29912-1-wenhui@gwmail.gwu.edu
Signed-off-by: wenhuizhang <wenhui@gwmail.gwu.edu>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0ce97eff79e2..3cc18c2176e7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -349,8 +349,7 @@ struct mem_cgroup {
 	struct deferred_split deferred_split_queue;
 #endif
 
-	struct mem_cgroup_per_node *nodeinfo[0];
-	/* WARNING: nodeinfo must be the last member here */
+	struct mem_cgroup_per_node *nodeinfo[];
 };
 
 /*

From 87579e9b7d8dc36e7cfc40c03f1ae5634e16e2c5 Mon Sep 17 00:00:00 2001
From: Dan Schatzberg <schatzberg.dan@gmail.com>
Date: Mon, 28 Jun 2021 19:38:15 -0700
Subject: [PATCH 3164/3292] loop: use worker per cgroup instead of kworker

Patch series "Charge loop device i/o to issuing cgroup", v14.

The loop device runs all i/o to the backing file on a separate kworker
thread which results in all i/o being charged to the root cgroup.  This
allows a loop device to be used to trivially bypass resource limits and
other policy.  This patch series fixes this gap in accounting.

A simple script to demonstrate this behavior on cgroupv2 machine:

'''
#!/bin/bash
set -e

CGROUP=/sys/fs/cgroup/test.slice
LOOP_DEV=/dev/loop0

if [[ ! -d $CGROUP ]]
then
    sudo mkdir $CGROUP
fi

grep oom_kill $CGROUP/memory.events

# Set a memory limit, write more than that limit to tmpfs -> OOM kill
sudo unshare -m bash -c "
echo \$\$ > $CGROUP/cgroup.procs;
echo 0 > $CGROUP/memory.swap.max;
echo 64M > $CGROUP/memory.max;
mount -t tmpfs -o size=512m tmpfs /tmp;
dd if=/dev/zero of=/tmp/file bs=1M count=256" || true

grep oom_kill $CGROUP/memory.events

# Set a memory limit, write more than that limit through loopback
# device -> no OOM kill
sudo unshare -m bash -c "
echo \$\$ > $CGROUP/cgroup.procs;
echo 0 > $CGROUP/memory.swap.max;
echo 64M > $CGROUP/memory.max;
mount -t tmpfs -o size=512m tmpfs /tmp;
truncate -s 512m /tmp/backing_file
losetup $LOOP_DEV /tmp/backing_file
dd if=/dev/zero of=$LOOP_DEV bs=1M count=256;
losetup -D $LOOP_DEV" || true

grep oom_kill $CGROUP/memory.events
'''

Naively charging cgroups could result in priority inversions through the
single kworker thread in the case where multiple cgroups are
reading/writing to the same loop device.  This patch series does some
minor modification to the loop driver so that each cgroup can make forward
progress independently to avoid this inversion.

With this patch series applied, the above script triggers OOM kills when
writing through the loop device as expected.

This patch (of 3):

Existing uses of loop device may have multiple cgroups reading/writing to
the same device.  Simply charging resources for I/O to the backing file
could result in priority inversion where one cgroup gets synchronously
blocked, holding up all other I/O to the loop device.

In order to avoid this priority inversion, we use a single workqueue where
each work item is a "struct loop_worker" which contains a queue of struct
loop_cmds to issue.  The loop device maintains a tree mapping blk css_id
-> loop_worker.  This allows each cgroup to independently make forward
progress issuing I/O to the backing file.

There is also a single queue for I/O associated with the rootcg which can
be used in cases of extreme memory shortage where we cannot allocate a
loop_worker.

The locking for the tree and queues is fairly heavy handed - we acquire a
per-loop-device spinlock any time either is accessed.  The existing
implementation serializes all I/O through a single thread anyways, so I
don't believe this is any worse.

[colin.king@canonical.com: fixes]

Link: https://lkml.kernel.org/r/20210610173944.1203706-1-schatzberg.dan@gmail.com
Link: https://lkml.kernel.org/r/20210610173944.1203706-2-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Chris Down <chris@chrisdown.name>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/loop.c | 217 ++++++++++++++++++++++++++++++++++++-------
 drivers/block/loop.h |  12 ++-
 2 files changed, 191 insertions(+), 38 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 76e12f3482a9..54ed3ebbbc37 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -71,7 +71,6 @@
 #include <linux/writeback.h>
 #include <linux/completion.h>
 #include <linux/highmem.h>
-#include <linux/kthread.h>
 #include <linux/splice.h>
 #include <linux/sysfs.h>
 #include <linux/miscdevice.h>
@@ -84,6 +83,8 @@
 
 #include <linux/uaccess.h>
 
+#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
+
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_ctl_mutex);
 
@@ -921,27 +922,95 @@ static void loop_config_discard(struct loop_device *lo)
 	q->limits.discard_alignment = 0;
 }
 
-static void loop_unprepare_queue(struct loop_device *lo)
-{
-	kthread_flush_worker(&lo->worker);
-	kthread_stop(lo->worker_task);
-}
+struct loop_worker {
+	struct rb_node rb_node;
+	struct work_struct work;
+	struct list_head cmd_list;
+	struct list_head idle_list;
+	struct loop_device *lo;
+	struct cgroup_subsys_state *css;
+	unsigned long last_ran_at;
+};
 
-static int loop_kthread_worker_fn(void *worker_ptr)
-{
-	current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
-	return kthread_worker_fn(worker_ptr);
-}
+static void loop_workfn(struct work_struct *work);
+static void loop_rootcg_workfn(struct work_struct *work);
+static void loop_free_idle_workers(struct timer_list *timer);
 
-static int loop_prepare_queue(struct loop_device *lo)
+#ifdef CONFIG_BLK_CGROUP
+static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
 {
-	kthread_init_worker(&lo->worker);
-	lo->worker_task = kthread_run(loop_kthread_worker_fn,
-			&lo->worker, "loop%d", lo->lo_number);
-	if (IS_ERR(lo->worker_task))
-		return -ENOMEM;
-	set_user_nice(lo->worker_task, MIN_NICE);
-	return 0;
+	return !css || css == blkcg_root_css;
+}
+#else
+static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
+{
+	return !css;
+}
+#endif
+
+static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
+{
+	struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
+	struct loop_worker *cur_worker, *worker = NULL;
+	struct work_struct *work;
+	struct list_head *cmd_list;
+
+	spin_lock_irq(&lo->lo_work_lock);
+
+	if (queue_on_root_worker(cmd->css))
+		goto queue_work;
+
+	node = &lo->worker_tree.rb_node;
+
+	while (*node) {
+		parent = *node;
+		cur_worker = container_of(*node, struct loop_worker, rb_node);
+		if (cur_worker->css == cmd->css) {
+			worker = cur_worker;
+			break;
+		} else if ((long)cur_worker->css < (long)cmd->css) {
+			node = &(*node)->rb_left;
+		} else {
+			node = &(*node)->rb_right;
+		}
+	}
+	if (worker)
+		goto queue_work;
+
+	worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN);
+	/*
+	 * In the event we cannot allocate a worker, just queue on the
+	 * rootcg worker
+	 */
+	if (!worker)
+		goto queue_work;
+
+	worker->css = cmd->css;
+	css_get(worker->css);
+	INIT_WORK(&worker->work, loop_workfn);
+	INIT_LIST_HEAD(&worker->cmd_list);
+	INIT_LIST_HEAD(&worker->idle_list);
+	worker->lo = lo;
+	rb_link_node(&worker->rb_node, parent, node);
+	rb_insert_color(&worker->rb_node, &lo->worker_tree);
+queue_work:
+	if (worker) {
+		/*
+		 * We need to remove from the idle list here while
+		 * holding the lock so that the idle timer doesn't
+		 * free the worker
+		 */
+		if (!list_empty(&worker->idle_list))
+			list_del_init(&worker->idle_list);
+		work = &worker->work;
+		cmd_list = &worker->cmd_list;
+	} else {
+		work = &lo->rootcg_work;
+		cmd_list = &lo->rootcg_cmd_list;
+	}
+	list_add_tail(&cmd->list_entry, cmd_list);
+	queue_work(lo->workqueue, work);
+	spin_unlock_irq(&lo->lo_work_lock);
 }
 
 static void loop_update_rotational(struct loop_device *lo)
@@ -1127,12 +1196,23 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	    !file->f_op->write_iter)
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
-	error = loop_prepare_queue(lo);
-	if (error)
+	lo->workqueue = alloc_workqueue("loop%d",
+					WQ_UNBOUND | WQ_FREEZABLE,
+					0,
+					lo->lo_number);
+	if (!lo->workqueue) {
+		error = -ENOMEM;
 		goto out_unlock;
+	}
 
 	set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
+	INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
+	INIT_LIST_HEAD(&lo->rootcg_cmd_list);
+	INIT_LIST_HEAD(&lo->idle_worker_list);
+	lo->worker_tree = RB_ROOT;
+	timer_setup(&lo->timer, loop_free_idle_workers,
+		TIMER_DEFERRABLE);
 	lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
 	lo->lo_device = bdev;
 	lo->lo_backing_file = file;
@@ -1200,6 +1280,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	int err = 0;
 	bool partscan = false;
 	int lo_number;
+	struct loop_worker *pos, *worker;
 
 	mutex_lock(&lo->lo_mutex);
 	if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
@@ -1219,6 +1300,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	/* freeze request queue during the transition */
 	blk_mq_freeze_queue(lo->lo_queue);
 
+	destroy_workqueue(lo->workqueue);
+	spin_lock_irq(&lo->lo_work_lock);
+	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+				idle_list) {
+		list_del(&worker->idle_list);
+		rb_erase(&worker->rb_node, &lo->worker_tree);
+		css_put(worker->css);
+		kfree(worker);
+	}
+	spin_unlock_irq(&lo->lo_work_lock);
+	del_timer_sync(&lo->timer);
+
 	spin_lock_irq(&lo->lo_lock);
 	lo->lo_backing_file = NULL;
 	spin_unlock_irq(&lo->lo_lock);
@@ -1255,7 +1348,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 
 	partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev;
 	lo_number = lo->lo_number;
-	loop_unprepare_queue(lo);
 out_unlock:
 	mutex_unlock(&lo->lo_mutex);
 	if (partscan) {
@@ -2015,7 +2107,7 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	} else
 #endif
 		cmd->css = NULL;
-	kthread_queue_work(&lo->worker, &cmd->work);
+	loop_queue_work(lo, cmd);
 
 	return BLK_STS_OK;
 }
@@ -2045,26 +2137,82 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 	}
 }
 
-static void loop_queue_work(struct kthread_work *work)
+static void loop_set_timer(struct loop_device *lo)
 {
-	struct loop_cmd *cmd =
-		container_of(work, struct loop_cmd, work);
-
-	loop_handle_cmd(cmd);
+	timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
 }
 
-static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq,
-		unsigned int hctx_idx, unsigned int numa_node)
+static void loop_process_work(struct loop_worker *worker,
+			struct list_head *cmd_list, struct loop_device *lo)
 {
-	struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	int orig_flags = current->flags;
+	struct loop_cmd *cmd;
 
-	kthread_init_work(&cmd->work, loop_queue_work);
-	return 0;
+	current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
+	spin_lock_irq(&lo->lo_work_lock);
+	while (!list_empty(cmd_list)) {
+		cmd = container_of(
+			cmd_list->next, struct loop_cmd, list_entry);
+		list_del(cmd_list->next);
+		spin_unlock_irq(&lo->lo_work_lock);
+
+		loop_handle_cmd(cmd);
+		cond_resched();
+
+		spin_lock_irq(&lo->lo_work_lock);
+	}
+
+	/*
+	 * We only add to the idle list if there are no pending cmds
+	 * *and* the worker will not run again which ensures that it
+	 * is safe to free any worker on the idle list
+	 */
+	if (worker && !work_pending(&worker->work)) {
+		worker->last_ran_at = jiffies;
+		list_add_tail(&worker->idle_list, &lo->idle_worker_list);
+		loop_set_timer(lo);
+	}
+	spin_unlock_irq(&lo->lo_work_lock);
+	current->flags = orig_flags;
+}
+
+static void loop_workfn(struct work_struct *work)
+{
+	struct loop_worker *worker =
+		container_of(work, struct loop_worker, work);
+	loop_process_work(worker, &worker->cmd_list, worker->lo);
+}
+
+static void loop_rootcg_workfn(struct work_struct *work)
+{
+	struct loop_device *lo =
+		container_of(work, struct loop_device, rootcg_work);
+	loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
+}
+
+static void loop_free_idle_workers(struct timer_list *timer)
+{
+	struct loop_device *lo = container_of(timer, struct loop_device, timer);
+	struct loop_worker *pos, *worker;
+
+	spin_lock_irq(&lo->lo_work_lock);
+	list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
+				idle_list) {
+		if (time_is_after_jiffies(worker->last_ran_at +
+						LOOP_IDLE_WORKER_TIMEOUT))
+			break;
+		list_del(&worker->idle_list);
+		rb_erase(&worker->rb_node, &lo->worker_tree);
+		css_put(worker->css);
+		kfree(worker);
+	}
+	if (!list_empty(&lo->idle_worker_list))
+		loop_set_timer(lo);
+	spin_unlock_irq(&lo->lo_work_lock);
 }
 
 static const struct blk_mq_ops loop_mq_ops = {
 	.queue_rq       = loop_queue_rq,
-	.init_request	= loop_init_request,
 	.complete	= lo_complete_rq,
 };
 
@@ -2153,6 +2301,7 @@ static int loop_add(struct loop_device **l, int i)
 	mutex_init(&lo->lo_mutex);
 	lo->lo_number		= i;
 	spin_lock_init(&lo->lo_lock);
+	spin_lock_init(&lo->lo_work_lock);
 	disk->major		= LOOP_MAJOR;
 	disk->first_minor	= i << part_shift;
 	disk->fops		= &lo_fops;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 5beb959b94d3..f81c01bde5c0 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -14,7 +14,6 @@
 #include <linux/blk-mq.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
-#include <linux/kthread.h>
 #include <uapi/linux/loop.h>
 
 /* Possible states of device */
@@ -55,8 +54,13 @@ struct loop_device {
 
 	spinlock_t		lo_lock;
 	int			lo_state;
-	struct kthread_worker	worker;
-	struct task_struct	*worker_task;
+	spinlock_t              lo_work_lock;
+	struct workqueue_struct *workqueue;
+	struct work_struct      rootcg_work;
+	struct list_head        rootcg_cmd_list;
+	struct list_head        idle_worker_list;
+	struct rb_root          worker_tree;
+	struct timer_list       timer;
 	bool			use_dio;
 	bool			sysfs_inited;
 
@@ -67,7 +71,7 @@ struct loop_device {
 };
 
 struct loop_cmd {
-	struct kthread_work work;
+	struct list_head list_entry;
 	bool use_aio; /* use AIO interface to handle I/O */
 	atomic_t ref; /* only for aio */
 	long ret;

From 04f94e3fbe1afcb815d7c7ace78c6779772aa837 Mon Sep 17 00:00:00 2001
From: Dan Schatzberg <schatzberg.dan@gmail.com>
Date: Mon, 28 Jun 2021 19:38:18 -0700
Subject: [PATCH 3165/3292] mm: charge active memcg when no mm is set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

set_active_memcg() worked for kernel allocations but was silently ignored
for user pages.

This patch establishes a precedence order for who gets charged:

1. If there is a memcg associated with the page already, that memcg is
   charged. This happens during swapin.

2. If an explicit mm is passed, mm->memcg is charged. This happens
   during page faults, which can be triggered in remote VMs (eg gup).

3. Otherwise consult the current process context. If there is an
   active_memcg, use that. Otherwise, current->mm->memcg.

Previously, if a NULL mm was passed to mem_cgroup_charge (case 3) it would
always charge the root cgroup.  Now it looks up the active_memcg first
(falling back to charging the root cgroup if not set).

Link: https://lkml.kernel.org/r/20210610173944.1203706-3-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Chris Down <chris@chrisdown.name>
Acked-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c    |  2 +-
 mm/memcontrol.c | 41 +++++++++++++++++++++++++++--------------
 mm/shmem.c      |  4 ++--
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 66f7e9fdfbc4..ac82a93d4f38 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -872,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
 	page->index = offset;
 
 	if (!huge) {
-		error = mem_cgroup_charge(page, current->mm, gfp);
+		error = mem_cgroup_charge(page, NULL, gfp);
 		if (error)
 			goto error;
 		charged = true;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f7a552eb3e9d..8f3244e59b30 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -897,13 +897,24 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 }
 EXPORT_SYMBOL(mem_cgroup_from_task);
 
+static __always_inline struct mem_cgroup *active_memcg(void)
+{
+	if (in_interrupt())
+		return this_cpu_read(int_active_memcg);
+	else
+		return current->active_memcg;
+}
+
 /**
  * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
  * @mm: mm from which memcg should be extracted. It can be NULL.
  *
- * Obtain a reference on mm->memcg and returns it if successful. Otherwise
- * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
- * returned.
+ * Obtain a reference on mm->memcg and returns it if successful. If mm
+ * is NULL, then the memcg is chosen as follows:
+ * 1) The active memcg, if set.
+ * 2) current->mm->memcg, if available
+ * 3) root memcg
+ * If mem_cgroup is disabled, NULL is returned.
  */
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 {
@@ -921,8 +932,17 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 	 * counting is disabled on the root level in the
 	 * cgroup core. See CSS_NO_REF.
 	 */
-	if (unlikely(!mm))
-		return root_mem_cgroup;
+	if (unlikely(!mm)) {
+		memcg = active_memcg();
+		if (unlikely(memcg)) {
+			/* remote memcg must hold a ref */
+			css_get(&memcg->css);
+			return memcg;
+		}
+		mm = current->mm;
+		if (unlikely(!mm))
+			return root_mem_cgroup;
+	}
 
 	rcu_read_lock();
 	do {
@@ -935,14 +955,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(get_mem_cgroup_from_mm);
 
-static __always_inline struct mem_cgroup *active_memcg(void)
-{
-	if (in_interrupt())
-		return this_cpu_read(int_active_memcg);
-	else
-		return current->active_memcg;
-}
-
 static __always_inline bool memcg_kmem_bypass(void)
 {
 	/* Allow remote memcg charging from any context. */
@@ -6711,7 +6723,8 @@ out:
  * @gfp_mask: reclaim mode
  *
  * Try to charge @page to the memcg that @mm belongs to, reclaiming
- * pages according to @gfp_mask if necessary.
+ * pages according to @gfp_mask if necessary. if @mm is NULL, try to
+ * charge to the active memcg.
  *
  * Do not use this for pages allocated for swapin.
  *
diff --git a/mm/shmem.c b/mm/shmem.c
index 53f21016608e..e72931b9246c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1695,7 +1695,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
+	struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
 	struct swap_info_struct *si;
 	struct page *page = NULL;
 	swp_entry_t swap;
@@ -1828,7 +1828,7 @@ repeat:
 	}
 
 	sbinfo = SHMEM_SB(inode->i_sb);
-	charge_mm = vma ? vma->vm_mm : current->mm;
+	charge_mm = vma ? vma->vm_mm : NULL;
 
 	page = pagecache_get_page(mapping, index,
 					FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);

From c74d40e8b5e2ac5eee1ca45b12d3e174915f1d88 Mon Sep 17 00:00:00 2001
From: Dan Schatzberg <schatzberg.dan@gmail.com>
Date: Mon, 28 Jun 2021 19:38:21 -0700
Subject: [PATCH 3166/3292] loop: charge i/o to mem and blk cg

The current code only associates with the existing blkcg when aio is used
to access the backing file.  This patch covers all types of i/o to the
backing file and also associates the memcg so if the backing file is on
tmpfs, memory is charged appropriately.

This patch also exports cgroup_get_e_css and int_active_memcg so it can be
used by the loop module.

Link: https://lkml.kernel.org/r/20210610173944.1203706-4-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: Chris Down <chris@chrisdown.name>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/loop.c       | 61 +++++++++++++++++++++++++-------------
 drivers/block/loop.h       |  3 +-
 include/linux/memcontrol.h |  6 ++++
 kernel/cgroup/cgroup.c     |  1 +
 mm/memcontrol.c            |  1 +
 5 files changed, 51 insertions(+), 21 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 54ed3ebbbc37..452c7437e1f0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -78,6 +78,7 @@
 #include <linux/uio.h>
 #include <linux/ioprio.h>
 #include <linux/blk-cgroup.h>
+#include <linux/sched/mm.h>
 
 #include "loop.h"
 
@@ -516,8 +517,6 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
 {
 	struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
 
-	if (cmd->css)
-		css_put(cmd->css);
 	cmd->ret = ret;
 	lo_rw_aio_do_completion(cmd);
 }
@@ -578,8 +577,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	cmd->iocb.ki_complete = lo_rw_aio_complete;
 	cmd->iocb.ki_flags = IOCB_DIRECT;
 	cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
-	if (cmd->css)
-		kthread_associate_blkcg(cmd->css);
 
 	if (rw == WRITE)
 		ret = call_write_iter(file, &cmd->iocb, &iter);
@@ -587,7 +584,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 		ret = call_read_iter(file, &cmd->iocb, &iter);
 
 	lo_rw_aio_do_completion(cmd);
-	kthread_associate_blkcg(NULL);
 
 	if (ret != -EIOCBQUEUED)
 		cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
@@ -928,7 +924,7 @@ struct loop_worker {
 	struct list_head cmd_list;
 	struct list_head idle_list;
 	struct loop_device *lo;
-	struct cgroup_subsys_state *css;
+	struct cgroup_subsys_state *blkcg_css;
 	unsigned long last_ran_at;
 };
 
@@ -957,7 +953,7 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
 
 	spin_lock_irq(&lo->lo_work_lock);
 
-	if (queue_on_root_worker(cmd->css))
+	if (queue_on_root_worker(cmd->blkcg_css))
 		goto queue_work;
 
 	node = &lo->worker_tree.rb_node;
@@ -965,10 +961,10 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
 	while (*node) {
 		parent = *node;
 		cur_worker = container_of(*node, struct loop_worker, rb_node);
-		if (cur_worker->css == cmd->css) {
+		if (cur_worker->blkcg_css == cmd->blkcg_css) {
 			worker = cur_worker;
 			break;
-		} else if ((long)cur_worker->css < (long)cmd->css) {
+		} else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) {
 			node = &(*node)->rb_left;
 		} else {
 			node = &(*node)->rb_right;
@@ -980,13 +976,18 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
 	worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN);
 	/*
 	 * In the event we cannot allocate a worker, just queue on the
-	 * rootcg worker
+	 * rootcg worker and issue the I/O as the rootcg
 	 */
-	if (!worker)
+	if (!worker) {
+		cmd->blkcg_css = NULL;
+		if (cmd->memcg_css)
+			css_put(cmd->memcg_css);
+		cmd->memcg_css = NULL;
 		goto queue_work;
+	}
 
-	worker->css = cmd->css;
-	css_get(worker->css);
+	worker->blkcg_css = cmd->blkcg_css;
+	css_get(worker->blkcg_css);
 	INIT_WORK(&worker->work, loop_workfn);
 	INIT_LIST_HEAD(&worker->cmd_list);
 	INIT_LIST_HEAD(&worker->idle_list);
@@ -1306,7 +1307,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 				idle_list) {
 		list_del(&worker->idle_list);
 		rb_erase(&worker->rb_node, &lo->worker_tree);
-		css_put(worker->css);
+		css_put(worker->blkcg_css);
 		kfree(worker);
 	}
 	spin_unlock_irq(&lo->lo_work_lock);
@@ -2100,13 +2101,18 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	}
 
 	/* always use the first bio's css */
+	cmd->blkcg_css = NULL;
+	cmd->memcg_css = NULL;
 #ifdef CONFIG_BLK_CGROUP
-	if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
-		cmd->css = &bio_blkcg(rq->bio)->css;
-		css_get(cmd->css);
-	} else
+	if (rq->bio && rq->bio->bi_blkg) {
+		cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
+#ifdef CONFIG_MEMCG
+		cmd->memcg_css =
+			cgroup_get_e_css(cmd->blkcg_css->cgroup,
+					&memory_cgrp_subsys);
+#endif
+	}
 #endif
-		cmd->css = NULL;
 	loop_queue_work(lo, cmd);
 
 	return BLK_STS_OK;
@@ -2118,13 +2124,28 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 	const bool write = op_is_write(req_op(rq));
 	struct loop_device *lo = rq->q->queuedata;
 	int ret = 0;
+	struct mem_cgroup *old_memcg = NULL;
 
 	if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
 		ret = -EIO;
 		goto failed;
 	}
 
+	if (cmd->blkcg_css)
+		kthread_associate_blkcg(cmd->blkcg_css);
+	if (cmd->memcg_css)
+		old_memcg = set_active_memcg(
+			mem_cgroup_from_css(cmd->memcg_css));
+
 	ret = do_req_filebacked(lo, rq);
+
+	if (cmd->blkcg_css)
+		kthread_associate_blkcg(NULL);
+
+	if (cmd->memcg_css) {
+		set_active_memcg(old_memcg);
+		css_put(cmd->memcg_css);
+	}
  failed:
 	/* complete non-aio request */
 	if (!cmd->use_aio || ret) {
@@ -2203,7 +2224,7 @@ static void loop_free_idle_workers(struct timer_list *timer)
 			break;
 		list_del(&worker->idle_list);
 		rb_erase(&worker->rb_node, &lo->worker_tree);
-		css_put(worker->css);
+		css_put(worker->blkcg_css);
 		kfree(worker);
 	}
 	if (!list_empty(&lo->idle_worker_list))
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index f81c01bde5c0..1988899db63a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -77,7 +77,8 @@ struct loop_cmd {
 	long ret;
 	struct kiocb iocb;
 	struct bio_vec *bvec;
-	struct cgroup_subsys_state *css;
+	struct cgroup_subsys_state *blkcg_css;
+	struct cgroup_subsys_state *memcg_css;
 };
 
 /* Support for loadable transfer modules */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3cc18c2176e7..1de3859233a6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1230,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 	return NULL;
 }
 
+static inline
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
+{
+	return NULL;
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 21ecc6ee6a6d..9cc8c3a686b1 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -577,6 +577,7 @@ out_unlock:
 	rcu_read_unlock();
 	return css;
 }
+EXPORT_SYMBOL_GPL(cgroup_get_e_css);
 
 static void cgroup_get_live(struct cgroup *cgrp)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8f3244e59b30..4ee243ce6135 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -78,6 +78,7 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
 
 /* Active memory cgroup to use from an interrupt context */
 DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
+EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
 
 /* Socket memory accounting disabled? */
 static bool cgroup_memory_nosocket;

From 6a1803bb582c50909a7f6cc4153360eaf5ae8fc8 Mon Sep 17 00:00:00 2001
From: Huilong Deng <denghuilong@cdjrlc.com>
Date: Mon, 28 Jun 2021 19:38:24 -0700
Subject: [PATCH 3167/3292] mm: memcontrol: remove trailing semicolon in macros

Macros should not use a trailing semicolon.

Link: https://lkml.kernel.org/r/20210614091530.22117-1-denghuilong@cdjrlc.com
Signed-off-by: Huilong Deng <denghuilong@cdjrlc.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1de3859233a6..6d66037be646 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -192,7 +192,7 @@ enum memcg_kmem_state {
 struct memcg_padding {
 	char x[0];
 } ____cacheline_internodealigned_in_smp;
-#define MEMCG_PADDING(name)      struct memcg_padding name;
+#define MEMCG_PADDING(name)      struct memcg_padding name
 #else
 #define MEMCG_PADDING(name)
 #endif

From 8fa207525f6ae241c19cbe4c470c5cb9bea4aab0 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 28 Jun 2021 19:38:28 -0700
Subject: [PATCH 3168/3292] perf: MAP_EXECUTABLE does not indicate VM_MAYEXEC

Patch series "perf/binfmt/mm: remove in-tree usage of MAP_EXECUTABLE".

Stumbling over the history of MAP_EXECUTABLE, I noticed that we still have
some in-tree users that we can get rid of.

This patch (of 3):

Before commit e9714acf8c43 ("mm: kill vma flag VM_EXECUTABLE and
mm->num_exe_file_vmas"), VM_EXECUTABLE indicated MAP_EXECUTABLE.
MAP_EXECUTABLE is nowadays essentially ignored by the kernel and does not
relate to VM_MAYEXEC.

Link: https://lkml.kernel.org/r/20210421093453.6904-1-david@redhat.com
Link: https://lkml.kernel.org/r/20210421093453.6904-2-david@redhat.com
Fixes: f972eb63b100 ("perf: Pass protection and flags bits through mmap2 interface")
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Kevin Brodsky <Kevin.Brodsky@arm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/events/core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index fe88d6eea3c2..1c5e3240cdbc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8301,8 +8301,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 
 	if (vma->vm_flags & VM_DENYWRITE)
 		flags |= MAP_DENYWRITE;
-	if (vma->vm_flags & VM_MAYEXEC)
-		flags |= MAP_EXECUTABLE;
 	if (vma->vm_flags & VM_LOCKED)
 		flags |= MAP_LOCKED;
 	if (is_vm_hugetlb_page(vma))

From a4eec6a3dfb7a6257ddcacf15e9428fe5834ffd4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 28 Jun 2021 19:38:31 -0700
Subject: [PATCH 3169/3292] binfmt: remove in-tree usage of MAP_EXECUTABLE

Ever since commit e9714acf8c43 ("mm: kill vma flag VM_EXECUTABLE and
mm->num_exe_file_vmas"), VM_EXECUTABLE is gone and MAP_EXECUTABLE is
essentially completely ignored.  Let's remove all usage of MAP_EXECUTABLE.

[akpm@linux-foundation.org: fix blooper in fs/binfmt_aout.c. per David]

Link: https://lkml.kernel.org/r/20210421093453.6904-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kevin Brodsky <Kevin.Brodsky@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/ia32/ia32_aout.c |  4 ++--
 fs/binfmt_aout.c          |  4 ++--
 fs/binfmt_elf.c           |  2 +-
 fs/binfmt_elf_fdpic.c     | 11 ++---------
 fs/binfmt_flat.c          |  2 +-
 5 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a09fc37ead9d..5e5b9fc2747f 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -203,7 +203,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
 				PROT_READ | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
-				MAP_EXECUTABLE | MAP_32BIT,
+				MAP_32BIT,
 				fd_offset);
 
 		if (error != N_TXTADDR(ex))
@@ -212,7 +212,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
-				MAP_EXECUTABLE | MAP_32BIT,
+				MAP_32BIT,
 				fd_offset + ex.a_text);
 		if (error != N_DATADDR(ex))
 			return error;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 3e84e9bb9084..145917f734fe 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -222,7 +222,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
 
 		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
 			PROT_READ | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
+			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
 			fd_offset);
 
 		if (error != N_TXTADDR(ex))
@@ -230,7 +230,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
 
 		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
+				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
 				fd_offset + ex.a_text);
 		if (error != N_DATADDR(ex))
 			return error;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 187b3f2b9202..baf8f91776f4 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1070,7 +1070,7 @@ out_free_interp:
 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
 				     !!interpreter, false);
 
-		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
+		elf_flags = MAP_PRIVATE | MAP_DENYWRITE;
 
 		vaddr = elf_ppnt->p_vaddr;
 		/*
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c99b102c860..39fa1b0307e1 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -928,7 +928,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 {
 	struct elf32_fdpic_loadseg *seg;
 	struct elf32_phdr *phdr;
-	unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags;
+	unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0;
 	int loop, ret;
 
 	load_addr = params->load_addr;
@@ -948,12 +948,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 	}
 
 	/* allocate one big anon block for everything */
-	mflags = MAP_PRIVATE;
-	if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
-		mflags |= MAP_EXECUTABLE;
-
 	maddr = vm_mmap(NULL, load_addr, top - base,
-			PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
+			PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, 0);
 	if (IS_ERR_VALUE(maddr))
 		return (int) maddr;
 
@@ -1046,9 +1042,6 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		if (phdr->p_flags & PF_X) prot |= PROT_EXEC;
 
 		flags = MAP_PRIVATE | MAP_DENYWRITE;
-		if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
-			flags |= MAP_EXECUTABLE;
-
 		maddr = 0;
 
 		switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a1072c6a2341..5d776f80ee50 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -573,7 +573,7 @@ static int load_flat_file(struct linux_binprm *bprm,
 		pr_debug("ROM mapping of file (we hope)\n");
 
 		textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
-				  MAP_PRIVATE|MAP_EXECUTABLE, 0);
+				  MAP_PRIVATE, 0);
 		if (!textpos || IS_ERR_VALUE(textpos)) {
 			ret = textpos;
 			if (!textpos)

From 3b8db39fad98cbb1d36e079236a446fad710daea Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 28 Jun 2021 19:38:35 -0700
Subject: [PATCH 3170/3292] mm: ignore MAP_EXECUTABLE in ksys_mmap_pgoff()

Let's also remove masking off MAP_EXECUTABLE from ksys_mmap_pgoff(): the
last in-tree occurrence of MAP_EXECUTABLE is now in LEGACY_MAP_MASK, which
accepts the flag e.g., for MAP_SHARED_VALIDATE; however, the flag is
ignored throughout the kernel now.

Add a comment to LEGACY_MAP_MASK stating that MAP_EXECUTABLE is ignored.

Link: https://lkml.kernel.org/r/20210421093453.6904-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kevin Brodsky <Kevin.Brodsky@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mman.h | 2 ++
 mm/mmap.c            | 2 +-
 mm/nommu.c           | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/mman.h b/include/linux/mman.h
index 629cefc4ecba..ebb09a964272 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -31,6 +31,8 @@
 /*
  * The historical set of flags that all mmap implementations implicitly
  * support when a ->mmap_validate() op is not provided in file_operations.
+ *
+ * MAP_EXECUTABLE is completely ignored throughout the kernel.
  */
 #define LEGACY_MAP_MASK (MAP_SHARED \
 		| MAP_PRIVATE \
diff --git a/mm/mmap.c b/mm/mmap.c
index 0584e540246e..f9a61f7dc540 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1633,7 +1633,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			return PTR_ERR(file);
 	}
 
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	flags &= ~MAP_DENYWRITE;
 
 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 out_fput:
diff --git a/mm/nommu.c b/mm/nommu.c
index 85a3a68dffb6..affda71641ca 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1296,7 +1296,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			goto out;
 	}
 
-	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	flags &= ~MAP_DENYWRITE;
 
 	retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 

From 78d9cf6041b968cc881fd22e25e2662d1cec4dba Mon Sep 17 00:00:00 2001
From: Gonzalo Matias Juarez Tello <gmjuareztello@gmail.com>
Date: Mon, 28 Jun 2021 19:38:39 -0700
Subject: [PATCH 3171/3292] mm/mmap.c: logic of find_vma_intersection repeated
 in __do_munmap

Logic of find_vma_intersection() is repeated in __do_munmap().

Also, prev is assigned a value before checking vma->vm_start >= end which
might end up on a return statement making that assignment useless.

Calling find_vma_intersection() checks that condition and returns NULL if
no vma is found, hence only the !vma check is needed in __do_munmap().

Link: https://lkml.kernel.org/r/20210409162129.18313-1-gmjuareztello@gmail.com
Signed-off-by: Gonzalo Matias Juarez Tello <gmjuareztello@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index f9a61f7dc540..bb128a42557e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2828,16 +2828,11 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 	 */
 	arch_unmap(mm, start, end);
 
-	/* Find the first overlapping VMA */
-	vma = find_vma(mm, start);
+	/* Find the first overlapping VMA where start < vma->vm_end */
+	vma = find_vma_intersection(mm, start, end);
 	if (!vma)
 		return 0;
 	prev = vma->vm_prev;
-	/* we have  start < vma->vm_end  */
-
-	/* if it doesn't overlap, we have nothing.. */
-	if (vma->vm_start >= end)
-		return 0;
 
 	/*
 	 * If we need to split any vma, do it now to save pain later.

From 96d990239e31d9623fdb96e829237b997c9d3d63 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:38:41 -0700
Subject: [PATCH 3172/3292] mm/mmap: introduce unlock_range() for code cleanup

Both __do_munmap() and exit_mmap() unlock a range of VMAs using almost
identical code blocks.  Replace both blocks by a static inline function.

[akpm@linux-foundation.org: tweak code layout]

Link: https://lkml.kernel.org/r/20210510211021.2797427-1-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index bb128a42557e..d72716f7a0b4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2802,6 +2802,22 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	return __split_vma(mm, vma, addr, new_below);
 }
 
+static inline void
+unlock_range(struct vm_area_struct *start, unsigned long limit)
+{
+	struct mm_struct *mm = start->vm_mm;
+	struct vm_area_struct *tmp = start;
+
+	while (tmp && tmp->vm_start < limit) {
+		if (tmp->vm_flags & VM_LOCKED) {
+			mm->locked_vm -= vma_pages(tmp);
+			munlock_vma_pages_all(tmp);
+		}
+
+		tmp = tmp->vm_next;
+	}
+}
+
 /* Munmap is split into 2 main parts -- this part which finds
  * what needs doing, and the areas themselves, which do the
  * work.  This now handles partial unmappings.
@@ -2885,17 +2901,8 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 	/*
 	 * unlock any mlock()ed ranges before detaching vmas
 	 */
-	if (mm->locked_vm) {
-		struct vm_area_struct *tmp = vma;
-		while (tmp && tmp->vm_start < end) {
-			if (tmp->vm_flags & VM_LOCKED) {
-				mm->locked_vm -= vma_pages(tmp);
-				munlock_vma_pages_all(tmp);
-			}
-
-			tmp = tmp->vm_next;
-		}
-	}
+	if (mm->locked_vm)
+		unlock_range(vma, end);
 
 	/* Detach vmas from rbtree */
 	if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
@@ -3180,14 +3187,8 @@ void exit_mmap(struct mm_struct *mm)
 		mmap_write_unlock(mm);
 	}
 
-	if (mm->locked_vm) {
-		vma = mm->mmap;
-		while (vma) {
-			if (vma->vm_flags & VM_LOCKED)
-				munlock_vma_pages_all(vma);
-			vma = vma->vm_next;
-		}
-	}
+	if (mm->locked_vm)
+		unlock_range(mm->mmap, ULONG_MAX);
 
 	arch_exit_mmap(mm);
 

From 35e43c5ff4d2da700e8ed2216acae81f62800eaa Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:38:44 -0700
Subject: [PATCH 3173/3292] mm/mmap: use find_vma_intersection() in do_mmap()
 for overlap

Using find_vma_intersection() avoids the need for a temporary variable and
makes the code cleaner.

Link: https://lkml.kernel.org/r/20210511014328.2902782-1-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mmap.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index d72716f7a0b4..d8c92ae50565 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1457,9 +1457,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 		return addr;
 
 	if (flags & MAP_FIXED_NOREPLACE) {
-		struct vm_area_struct *vma = find_vma(mm, addr);
-
-		if (vma && vma->vm_start < addr + len)
+		if (find_vma_intersection(mm, addr, addr + len))
 			return -EEXIST;
 	}
 

From 2797e79f1a491fe4ffc4daf1104243ad07902d3f Mon Sep 17 00:00:00 2001
From: Liu Xiang <liu.xiang@zlingsmart.com>
Date: Mon, 28 Jun 2021 19:38:47 -0700
Subject: [PATCH 3174/3292] mm/memory.c: fix comment of finish_mkwrite_fault()

Fix the return value in comment of finish_mkwrite_fault().

Link: https://lkml.kernel.org/r/20210513093931.15234-1-liu.xiang@zlingsmart.com
Signed-off-by: Liu Xiang <liu.xiang@zlingsmart.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index a4d82a6de000..b195ece205ef 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3049,7 +3049,7 @@ oom:
  * The function expects the page to be locked or other protection against
  * concurrent faults / writeback (such as DAX radix tree locks).
  *
- * Return: %VM_FAULT_WRITE on success, %0 when PTE got changed before
+ * Return: %0 on success, %VM_FAULT_NOPAGE when PTE got changed before
  * we acquired PTE lock.
  */
 vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)

From ce6d42f2e4a2d98898419743b037a95661e3ac9d Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:38:50 -0700
Subject: [PATCH 3175/3292] mm: add vma_lookup(), update
 find_vma_intersection() comments

Patch series "mm: Add vma_lookup()", v2.

Many places in the kernel use find_vma() to get a vma and then check the
start address of the vma to ensure the next vma was not returned.

Other places use the find_vma_intersection() call with add, addr + 1 as
the range; looking for just the vma at a specific address.

The third use of find_vma() is by developers who do not know that the
function starts searching at the provided address upwards for the next
vma.  This results in a bug that is often overlooked for a long time.

Adding the new vma_lookup() function will allow for cleaner code by
removing the find_vma() calls which check limits, making
find_vma_intersection() calls of a single address to be shorter, and
potentially reduce the incorrect uses of find_vma().

This patch (of 22):

Many places in the kernel use find_vma() to get a vma and then check the
start address of the vma to ensure the next vma was not returned.

Other places use the find_vma_intersection() call with add, addr + 1 as
the range; looking for just the vma at a specific address.

The third use of find_vma() is by developers who do not know that the
function starts searching at the provided address upwards for the next
vma.  This results in a bug that is often overlooked for a long time.

Adding the new vma_lookup() function will allow for cleaner code by
removing the find_vma() calls which check limits, making
find_vma_intersection() calls of a single address to be shorter, and
potentially reduce the incorrect uses of find_vma().

Also change find_vma_intersection() comments and declaration to be of the
correct length and add kernel documentation style comment.

Link: https://lkml.kernel.org/r/20210521174745.2219620-1-Liam.Howlett@Oracle.com
Link: https://lkml.kernel.org/r/20210521174745.2219620-2-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 79f32962d7ae..1a98b5447a3b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2676,17 +2676,45 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add
 extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
 					     struct vm_area_struct **pprev);
 
-/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
-   NULL if none.  Assume start_addr < end_addr. */
-static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+/**
+ * find_vma_intersection() - Look up the first VMA which intersects the interval
+ * @mm: The process address space.
+ * @start_addr: The inclusive start user address.
+ * @end_addr: The exclusive end user address.
+ *
+ * Returns: The first VMA within the provided range, %NULL otherwise.  Assumes
+ * start_addr < end_addr.
+ */
+static inline
+struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
+					     unsigned long start_addr,
+					     unsigned long end_addr)
 {
-	struct vm_area_struct * vma = find_vma(mm,start_addr);
+	struct vm_area_struct *vma = find_vma(mm, start_addr);
 
 	if (vma && end_addr <= vma->vm_start)
 		vma = NULL;
 	return vma;
 }
 
+/**
+ * vma_lookup() - Find a VMA at a specific address
+ * @mm: The process address space.
+ * @addr: The user address.
+ *
+ * Return: The vm_area_struct at the given address, %NULL otherwise.
+ */
+static inline
+struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma = find_vma(mm, addr);
+
+	if (vma && addr < vma->vm_start)
+		vma = NULL;
+
+	return vma;
+}
+
 static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
 {
 	unsigned long vm_start = vma->vm_start;

From 064b2663603c76e9ab6fe1bb2e92d1a7299fff9e Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:38:53 -0700
Subject: [PATCH 3176/3292] drm/i915/selftests: use vma_lookup() in
 __igt_mmap()

vma_lookup() will look up the vma at a specific address.  find_vma() will
start the search for a specific address and continue upwards.  This fixes
an issue with the selftest as the returned vma may not be the newly
created vma, but simply the vma at a higher address.

objects

Link: https://lkml.kernel.org/r/20210521174745.2219620-3-Liam.Howlett@Oracle.com
Fixes: 6fedafacae1b (drm/i915/selftests: Wrap vm_mmap() around GEM
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 5cf6df49c333..35c15ef1327d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -871,7 +871,7 @@ static int __igt_mmap(struct drm_i915_private *i915,
 
 	pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr);
 
-	area = find_vma(current->mm, addr);
+	area = vma_lookup(current->mm, addr);
 	if (!area) {
 		pr_err("%s: Did not create a vm_area_struct for the mmap\n",
 		       obj->mm.region->name);

From b55541414bd00dbf64cf2ff4c4f1c41cd5cd42dc Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:38:56 -0700
Subject: [PATCH 3177/3292] arch/arc/kernel/troubleshoot: use vma_lookup()
 instead of find_vma()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-4-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/kernel/troubleshoot.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index a331bb5d8319..7654c2e42dc0 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -83,12 +83,12 @@ static void show_faulting_vma(unsigned long address)
 	 * non-inclusive vma
 	 */
 	mmap_read_lock(active_mm);
-	vma = find_vma(active_mm, address);
+	vma = vma_lookup(active_mm, address);
 
-	/* check against the find_vma( ) behaviour which returns the next VMA
-	 * if the container VMA is not found
+	/* Lookup the vma at the address and report if the container VMA is not
+	 * found
 	 */
-	if (vma && (vma->vm_start <= address)) {
+	if (vma) {
 		char buf[ARC_PATH_MAX];
 		char *nm = "?";
 

From 09eef83a801512a71b0c95c25e7d8fd69141aa1b Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:38:59 -0700
Subject: [PATCH 3178/3292] arch/arm64/kvm: use vma_lookup() instead of
 find_vma_intersection()

vma_lookup() finds the vma of a specific address with a cleaner interface
and is more readable.

Link: https://lkml.kernel.org/r/20210521174745.2219620-5-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c10207fed2f3..74b3c1a3ff5a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -855,7 +855,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 	/* Let's check if we will get back a huge page backed by hugetlbfs */
 	mmap_read_lock(current->mm);
-	vma = find_vma_intersection(current->mm, hva, hva + 1);
+	vma = vma_lookup(current->mm, hva);
 	if (unlikely(!vma)) {
 		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
 		mmap_read_unlock(current->mm);

From 27a14d287e16c308040508be9f0cb28bc935bd0e Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:02 -0700
Subject: [PATCH 3179/3292] arch/powerpc/kvm/book3s_hv_uvmem: use vma_lookup()
 instead of find_vma_intersection()

vma_lookup() finds the vma of a specific address with a cleaner interface
and is more readable.

Link: https://lkml.kernel.org/r/20210521174745.2219620-6-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 84e5a2dc8be5..34720b79588f 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -614,7 +614,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
 
 		/* Fetch the VMA if addr is not in the latest fetched one */
 		if (!vma || addr >= vma->vm_end) {
-			vma = find_vma_intersection(kvm->mm, addr, addr+1);
+			vma = vma_lookup(kvm->mm, addr);
 			if (!vma) {
 				pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
 				break;

From 900c83f88af06bf0466c28bdde8a2c011b855e44 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:05 -0700
Subject: [PATCH 3180/3292] arch/powerpc/kvm/book3s: use vma_lookup() in
 kvmppc_hv_setup_htab_rma()

Using vma_lookup() removes the requirement to check if the address is
within the returned vma.  The code is easier to understand and more
compact.

Link: https://lkml.kernel.org/r/20210521174745.2219620-7-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index bc0813644666..fb83c84d116a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4758,8 +4758,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	/* Look up the VMA for the start of this memory slot */
 	hva = memslot->userspace_addr;
 	mmap_read_lock(kvm->mm);
-	vma = find_vma(kvm->mm, hva);
-	if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+	vma = vma_lookup(kvm->mm, hva);
+	if (!vma || (vma->vm_flags & VM_IO))
 		goto up_out;
 
 	psize = vma_kernel_pagesize(vma);

From 7f7020ac0dc9a7a7dfb2237c6e8ed13f40162a6b Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:08 -0700
Subject: [PATCH 3181/3292] arch/mips/kernel/traps: use vma_lookup() instead of
 find_vma()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-8-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/kernel/traps.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 0b4e06303c55..6f07362de5ce 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -784,7 +784,6 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
 int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 {
 	int si_code;
-	struct vm_area_struct *vma;
 
 	switch (sig) {
 	case 0:
@@ -800,8 +799,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 
 	case SIGSEGV:
 		mmap_read_lock(current->mm);
-		vma = find_vma(current->mm, (unsigned long)fault_addr);
-		if (vma && (vma->vm_start <= (unsigned long)fault_addr))
+		if (vma_lookup(current->mm, (unsigned long)fault_addr))
 			si_code = SEGV_ACCERR;
 		else
 			si_code = SEGV_MAPERR;

From 3b93e042a59dd8c5b252eb0934f195f169ce68f9 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:11 -0700
Subject: [PATCH 3182/3292] arch/m68k/kernel/sys_m68k: use vma_lookup() in
 sys_cacheflush()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-9-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/kernel/sys_m68k.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index f55bdcb8e4f1..bd0274c7592e 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -402,8 +402,8 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
 		 * to this process.
 		 */
 		mmap_read_lock(current->mm);
-		vma = find_vma(current->mm, addr);
-		if (!vma || addr < vma->vm_start || addr + len > vma->vm_end)
+		vma = vma_lookup(current->mm, addr);
+		if (!vma || addr + len > vma->vm_end)
 			goto out_unlock;
 	}
 

From 9ce2c3fc0be6e7d0bb2236a33bbb7a0f1943bd81 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:14 -0700
Subject: [PATCH 3183/3292] x86/sgx: use vma_lookup() in sgx_encl_find()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-10-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/sgx/encl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 6e74f85b6264..fec43ca65065 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -91,8 +91,8 @@ static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
 {
 	struct vm_area_struct *result;
 
-	result = find_vma(mm, addr);
-	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
+	result = vma_lookup(mm, addr);
+	if (!result || result->vm_ops != &sgx_vm_ops)
 		return -EINVAL;
 
 	*vma = result;

From fc98c03ba9ea970c6b346a6fe57f98c16a3971da Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:17 -0700
Subject: [PATCH 3184/3292] virt/kvm: use vma_lookup() instead of
 find_vma_intersection()

vma_lookup() finds the vma of a specific address with a cleaner interface
and is more readable.

Link: https://lkml.kernel.org/r/20210521174745.2219620-11-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 46fb042837d2..732bfaf252bd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2170,7 +2170,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	}
 
 retry:
-	vma = find_vma_intersection(current->mm, addr, addr + 1);
+	vma = vma_lookup(current->mm, addr);
 
 	if (vma == NULL)
 		pfn = KVM_PFN_ERR_FAULT;

From 85715d6809014870a8a4d498b292fc5711a969e7 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:20 -0700
Subject: [PATCH 3185/3292] vfio: use vma_lookup() instead of
 find_vma_intersection()

vma_lookup() finds the vma of a specific address with a cleaner interface
and is more readable.

Link: https://lkml.kernel.org/r/20210521174745.2219620-12-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/vfio/vfio_iommu_type1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index a3e925a41b0d..4fce73a8a650 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -567,7 +567,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
 	vaddr = untagged_addr(vaddr);
 
 retry:
-	vma = find_vma_intersection(mm, vaddr, vaddr + 1);
+	vma = vma_lookup(mm, vaddr);
 
 	if (vma && vma->vm_flags & VM_PFNMAP) {
 		ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);

From 47bdd1db16e67ebfde6f77eaf7625b2292ae6d58 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:23 -0700
Subject: [PATCH 3186/3292] net/ipv5/tcp: use vma_lookup() in
 tcp_zerocopy_receive()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-13-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/ipv4/tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1c1f9e3de72..64bf179cc915 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2095,8 +2095,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
 
 	mmap_read_lock(current->mm);
 
-	vma = find_vma(current->mm, address);
-	if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
+	vma = vma_lookup(current->mm, address);
+	if (!vma || vma->vm_ops != &tcp_vm_ops) {
 		mmap_read_unlock(current->mm);
 		return -EINVAL;
 	}

From da68547d3692e89984f2c952c0931aa27b9095cd Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:26 -0700
Subject: [PATCH 3187/3292] drm/amdgpu: use vma_lookup() in
 amdgpu_ttm_tt_get_user_pages()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-14-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index d5cbc51c5eaa..61c4fb1b87fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -709,8 +709,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 	}
 
 	mmap_read_lock(mm);
-	vma = find_vma(mm, start);
-	if (unlikely(!vma || start < vma->vm_start)) {
+	vma = vma_lookup(mm, start);
+	if (unlikely(!vma)) {
 		r = -EFAULT;
 		goto out_unlock;
 	}

From 49be780f798446ea86aa6cd687f9e51cbe569149 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:29 -0700
Subject: [PATCH 3188/3292] media: videobuf2: use vma_lookup() in
 get_vaddr_frames()

vma_lookup() finds the vma of a specific address with a cleaner interface
and is more readable.

Link: https://lkml.kernel.org/r/20210521174745.2219620-15-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/common/videobuf2/frame_vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c
index 381158320a90..ce879f6f8f82 100644
--- a/drivers/media/common/videobuf2/frame_vector.c
+++ b/drivers/media/common/videobuf2/frame_vector.c
@@ -64,7 +64,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
 	do {
 		unsigned long *nums = frame_vector_pfns(vec);
 
-		vma = find_vma_intersection(mm, start, start + 1);
+		vma = vma_lookup(mm, start);
 		if (!vma)
 			break;
 

From 2beaf153e1d041e0a61e3aae618294f4a037055d Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:32 -0700
Subject: [PATCH 3189/3292] misc/sgi-gru/grufault: use vma_lookup() in
 gru_find_vma()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-16-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grufault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 723825524ea0..d7ef61e602ed 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -49,8 +49,8 @@ struct vm_area_struct *gru_find_vma(unsigned long vaddr)
 {
 	struct vm_area_struct *vma;
 
-	vma = find_vma(current->mm, vaddr);
-	if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
+	vma = vma_lookup(current->mm, vaddr);
+	if (vma && vma->vm_ops == &gru_vm_ops)
 		return vma;
 	return NULL;
 }

From 9016ddeddf8510f79b4c5816855cdd244e84ad7f Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:35 -0700
Subject: [PATCH 3190/3292] kernel/events/uprobes: use vma_lookup() in
 find_active_uprobe()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-17-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/events/uprobes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6addc9780319..907d4ee00cb2 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2046,8 +2046,8 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 	struct vm_area_struct *vma;
 
 	mmap_read_lock(mm);
-	vma = find_vma(mm, bp_vaddr);
-	if (vma && vma->vm_start <= bp_vaddr) {
+	vma = vma_lookup(mm, bp_vaddr);
+	if (vma) {
 		if (valid_vma(vma, false)) {
 			struct inode *inode = file_inode(vma->vm_file);
 			loff_t offset = vaddr_to_offset(vma, bp_vaddr);

From 46e6b31d4617612e47daeb7b4b6350b116349f6d Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:38 -0700
Subject: [PATCH 3191/3292] lib/test_hmm: use vma_lookup() in dmirror_migrate()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-18-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_hmm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 80a78877bd93..15f2e2db77bc 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -686,9 +686,8 @@ static int dmirror_migrate(struct dmirror *dmirror,
 
 	mmap_read_lock(mm);
 	for (addr = start; addr < end; addr = next) {
-		vma = find_vma(mm, addr);
-		if (!vma || addr < vma->vm_start ||
-		    !(vma->vm_flags & VM_READ)) {
+		vma = vma_lookup(mm, addr);
+		if (!vma || !(vma->vm_flags & VM_READ)) {
 			ret = -EINVAL;
 			goto out;
 		}

From ff69fb8100f18151f838c1e07368bbc98b437e6a Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:41 -0700
Subject: [PATCH 3192/3292] mm/ksm: use vma_lookup() in find_mergeable_vma()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-19-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/ksm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 2f3aaeb34a42..3fa9bc8a67cf 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -521,10 +521,8 @@ static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
 	struct vm_area_struct *vma;
 	if (ksm_test_exit(mm))
 		return NULL;
-	vma = find_vma(mm, addr);
-	if (!vma || vma->vm_start > addr)
-		return NULL;
-	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+	vma = vma_lookup(mm, addr);
+	if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 		return NULL;
 	return vma;
 }

From 059b8b4875b3c046770e4f9fb553ece40b217b40 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:44 -0700
Subject: [PATCH 3193/3292] mm/migrate: use vma_lookup() in
 do_pages_stat_array()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-20-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 41ff2c9896c4..380ca57b9031 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1834,8 +1834,8 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
 		struct page *page;
 		int err = -EFAULT;
 
-		vma = find_vma(mm, addr);
-		if (!vma || addr < vma->vm_start)
+		vma = vma_lookup(mm, addr);
+		if (!vma)
 			goto set_status;
 
 		/* FOLL_DUMP to ignore special (like zero) pages */

From 5aaf07f0812adef788f9f08a73914148b5fdd40e Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:47 -0700
Subject: [PATCH 3194/3292] mm/mremap: use vma_lookup() in vma_to_resize()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-21-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mremap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/mremap.c b/mm/mremap.c
index 47c255b60150..a369a6100698 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -634,10 +634,11 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 	unsigned long *p)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma = find_vma(mm, addr);
+	struct vm_area_struct *vma;
 	unsigned long pgoff;
 
-	if (!vma || vma->vm_start > addr)
+	vma = vma_lookup(mm, addr);
+	if (!vma)
 		return ERR_PTR(-EFAULT);
 
 	/*

From 3e418f9888463a80d559498a523e582b59e5ff2d Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:50 -0700
Subject: [PATCH 3195/3292] mm/memory.c: use vma_lookup() in
 __access_remote_vm()

Use vma_lookup() to find the VMA at a specific address.  As vma_lookup()
will return NULL if the address is not within any VMA, the start address
no longer needs to be validated.

Link: https://lkml.kernel.org/r/20210521174745.2219620-22-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index b195ece205ef..3dd6b2e73e1d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4994,8 +4994,8 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
 			 * Check if this is a VM_IO | VM_PFNMAP VMA, which
 			 * we can access using slightly different code.
 			 */
-			vma = find_vma(mm, addr);
-			if (!vma || vma->vm_start > addr)
+			vma = vma_lookup(mm, addr);
+			if (!vma)
 				break;
 			if (vma->vm_ops && vma->vm_ops->access)
 				ret = vma->vm_ops->access(vma, addr, buf,

From 33e3575c5148c9874122d9a5062d58fc570f5ee6 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Mon, 28 Jun 2021 19:39:53 -0700
Subject: [PATCH 3196/3292] mm/mempolicy: use vma_lookup() in
 __access_remote_vm()

vma_lookup() finds the vma of a specific address with a cleaner interface
and is more readable.

Link: https://lkml.kernel.org/r/20210521174745.2219620-23-Liam.Howlett@Oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mempolicy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d79fa299b70c..325771bef5e2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -975,7 +975,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 		 * want to return MPOL_DEFAULT in this case.
 		 */
 		mmap_read_lock(mm);
-		vma = find_vma_intersection(mm, addr, addr+1);
+		vma = vma_lookup(mm, addr);
 		if (!vma) {
 			mmap_read_unlock(mm);
 			return -EFAULT;

From 5673a60b80e8d1eaaa1e800e8a85451fd037f63e Mon Sep 17 00:00:00 2001
From: Chen Li <chenli@uniontech.com>
Date: Mon, 28 Jun 2021 19:39:56 -0700
Subject: [PATCH 3197/3292] mm: update legacy flush_tlb_* to use vma

1. These tlb flush functions have been using vma instead mm long time
   ago, but there is still some comments use mm as parameter.

2. the actual struct we use is vm_area_struct instead of vma_struct.

3. remove unused flush_kern_tlb_page.

Link: https://lkml.kernel.org/r/87k0oaq311.wl-chenli@uniontech.com
Signed-off-by: Chen Li <chenli@uniontech.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Chris Zankel <chris@zankel.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlbflush.h      | 13 +++----------
 arch/arm/mm/tlb-v6.S                 |  2 +-
 arch/arm/mm/tlb-v7.S                 |  2 +-
 arch/ia64/kernel/efi_stub.S          |  2 +-
 arch/m68k/include/asm/tlbflush.h     |  2 +-
 arch/openrisc/include/asm/tlbflush.h |  2 +-
 arch/xtensa/include/asm/tlbflush.h   |  4 ++--
 7 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index 24cbfc112dfa..0ccc985b90af 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -253,7 +253,7 @@ extern struct cpu_tlb_fns cpu_tlb;
  *		space.
  *		- mm	- mm_struct describing address space
  *
- *	flush_tlb_range(mm,start,end)
+ *	flush_tlb_range(vma,start,end)
  *
  *		Invalidate a range of TLB entries in the specified
  *		address space.
@@ -261,18 +261,11 @@ extern struct cpu_tlb_fns cpu_tlb;
  *		- start - start address (may not be aligned)
  *		- end	- end address (exclusive, may not be aligned)
  *
- *	flush_tlb_page(vaddr,vma)
+ *	flush_tlb_page(vma, uaddr)
  *
  *		Invalidate the specified page in the specified address range.
+ *		- vma	- vm_area_struct describing address range
  *		- vaddr - virtual address (may not be aligned)
- *		- vma	- vma_struct describing address range
- *
- *	flush_kern_tlb_page(kaddr)
- *
- *		Invalidate the TLB entry for the specified page.  The address
- *		will be in the kernels virtual memory space.  Current uses
- *		only require the D-TLB to be invalidated.
- *		- kaddr - Kernel virtual memory address
  */
 
 /*
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
index 5335b9687297..74f4b383afe3 100644
--- a/arch/arm/mm/tlb-v6.S
+++ b/arch/arm/mm/tlb-v6.S
@@ -24,7 +24,7 @@
  *
  *	- start - start address (may not be aligned)
  *	- end   - end address (exclusive, may not be aligned)
- *	- vma   - vma_struct describing address range
+ *	- vma   - vm_area_struct describing address range
  *
  *	It is assumed that:
  *	- the "Invalidate single entry" instruction will invalidate
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index 1bb28d7db567..87bf4ab17721 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -23,7 +23,7 @@
  *
  *	- start - start address (may not be aligned)
  *	- end   - end address (exclusive, may not be aligned)
- *	- vma   - vma_struct describing address range
+ *	- vma   - vm_area_struct describing address range
  *
  *	It is assumed that:
  *	- the "Invalidate single entry" instruction will invalidate
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
index 58233bb7976d..1fd61b78fb29 100644
--- a/arch/ia64/kernel/efi_stub.S
+++ b/arch/ia64/kernel/efi_stub.S
@@ -7,7 +7,7 @@
  *
  * This stub allows us to make EFI calls in physical mode with interrupts
  * turned off.  We need this because we can't call SetVirtualMap() until
- * the kernel has booted far enough to allow allocation of struct vma_struct
+ * the kernel has booted far enough to allow allocation of struct vm_area_struct
  * entries (which we would need to map stuff with memory attributes other
  * than uncached or writeback...).  Since the GetTime() service gets called
  * earlier than that, we need to be able to make physical mode EFI calls from
diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h
index 5337bc2c262f..a6318ccd308f 100644
--- a/arch/m68k/include/asm/tlbflush.h
+++ b/arch/m68k/include/asm/tlbflush.h
@@ -263,7 +263,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr
 	BUG();
 }
 
-static inline void flush_tlb_range(struct mm_struct *mm,
+static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
 	BUG();
diff --git a/arch/openrisc/include/asm/tlbflush.h b/arch/openrisc/include/asm/tlbflush.h
index 185dcd3731ed..dbf030365ab4 100644
--- a/arch/openrisc/include/asm/tlbflush.h
+++ b/arch/openrisc/include/asm/tlbflush.h
@@ -25,7 +25,7 @@
  *  - flush_tlb_all() flushes all processes TLBs
  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
  *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
  */
 extern void local_flush_tlb_all(void);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
diff --git a/arch/xtensa/include/asm/tlbflush.h b/arch/xtensa/include/asm/tlbflush.h
index 856e2da2e397..573df8cea200 100644
--- a/arch/xtensa/include/asm/tlbflush.h
+++ b/arch/xtensa/include/asm/tlbflush.h
@@ -26,8 +26,8 @@
  *
  *  - flush_tlb_all() flushes all processes TLB entries
  *  - flush_tlb_mm(mm) flushes the specified mm context TLB entries
- *  - flush_tlb_page(mm, vmaddr) flushes a single page
- *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_page(vma, page) flushes a single page
+ *  - flush_tlb_range(vma, vmaddr, end) flushes a range of pages
  */
 
 void local_flush_tlb_all(void);

From f4c1ab0937c3a22c5e6e735b47fa3fa9c68dc26e Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Mon, 28 Jun 2021 19:40:02 -0700
Subject: [PATCH 3198/3292] h8300: remove unused variable

Kernel test robot throws below warning ->

>> arch/h8300/kernel/setup.c:72:26:
warning: Unused variable: region [unusedVariable]
    struct memblock_region *region;

Fixed it by removing unused variable.

Link: https://lkml.kernel.org/r/20210602185431.11416-1-jrdr.linux@gmail.com
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/h8300/kernel/setup.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index 0281f92eea3d..c3590b2e9592 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -69,8 +69,6 @@ void __init h8300_fdt_init(void *fdt, char *bootargs)
 
 static void __init bootmem_init(void)
 {
-	struct memblock_region *region;
-
 	memory_end = memory_start = 0;
 
 	/* Find main memory where is the kernel */

From e8df2c703d5d1a99cfc45124bfa6f5e1982e0166 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 28 Jun 2021 19:40:05 -0700
Subject: [PATCH 3199/3292] mm/dmapool: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO() helper instead of plain DEVICE_ATTR(), which makes
the code a bit shorter and easier to read.

Link: https://lkml.kernel.org/r/20210524112852.34716-1-yuehaibing@huawei.com
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/dmapool.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index 16483f86360e..64b537b3ccb0 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -62,8 +62,7 @@ struct dma_page {		/* cacheable header for 'allocation' bytes */
 static DEFINE_MUTEX(pools_lock);
 static DEFINE_MUTEX(pools_reg_lock);
 
-static ssize_t
-show_pools(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t pools_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	unsigned temp;
 	unsigned size;
@@ -103,7 +102,7 @@ show_pools(struct device *dev, struct device_attribute *attr, char *buf)
 	return PAGE_SIZE - size;
 }
 
-static DEVICE_ATTR(pools, 0444, show_pools, NULL);
+static DEVICE_ATTR_RO(pools);
 
 /**
  * dma_pool_create - Creates a pool of consistent memory blocks, for dma.

From 53d884a6675b0fd7bc8c7b4afd6ead6f17bc4c61 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Mon, 28 Jun 2021 19:40:08 -0700
Subject: [PATCH 3200/3292] mm, tracing: unify PFN format strings

Some trace event formats print PFNs as hex while others print them as
decimal.  This is rather annoying when attempting to grep through traces
to understand what's going on with a particular page.

 $ git grep -ho 'pfn=[0x%lu]\+' include/trace/events/ | sort | uniq -c
      11 pfn=0x%lx
      12 pfn=%lu
       2 pfn=%lx

Printing as hex is in the majority in the trace events, and all the normal
printks in mm/ also print PFNs as hex, so change all the PFN formats in
the trace events to use 0x%lx.

Link: https://lkml.kernel.org/r/20210602092608.1493-1-vincent.whitchurch@axis.com
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/cma.h       |  4 ++--
 include/trace/events/filemap.h   |  2 +-
 include/trace/events/kmem.h      | 12 ++++++------
 include/trace/events/page_pool.h |  4 ++--
 include/trace/events/pagemap.h   |  4 ++--
 include/trace/events/vmscan.h    |  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
index c3d354702cb0..3d708dae1542 100644
--- a/include/trace/events/cma.h
+++ b/include/trace/events/cma.h
@@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(cma_alloc_class,
 		__entry->align = align;
 	),
 
-	TP_printk("name=%s pfn=%lx page=%p count=%lu align=%u",
+	TP_printk("name=%s pfn=0x%lx page=%p count=%lu align=%u",
 		  __get_str(name),
 		  __entry->pfn,
 		  __entry->page,
@@ -60,7 +60,7 @@ TRACE_EVENT(cma_release,
 		__entry->count = count;
 	),
 
-	TP_printk("name=%s pfn=%lx page=%p count=%lu",
+	TP_printk("name=%s pfn=0x%lx page=%p count=%lu",
 		  __get_str(name),
 		  __entry->pfn,
 		  __entry->page,
diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
index 796053e162d2..c47b63db124e 100644
--- a/include/trace/events/filemap.h
+++ b/include/trace/events/filemap.h
@@ -36,7 +36,7 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,
 			__entry->s_dev = page->mapping->host->i_rdev;
 	),
 
-	TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu",
+	TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu",
 		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
 		__entry->i_ino,
 		pfn_to_page(__entry->pfn),
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 829a75692cc0..ddc8c944f417 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -173,7 +173,7 @@ TRACE_EVENT(mm_page_free,
 		__entry->order		= order;
 	),
 
-	TP_printk("page=%p pfn=%lu order=%d",
+	TP_printk("page=%p pfn=0x%lx order=%d",
 			pfn_to_page(__entry->pfn),
 			__entry->pfn,
 			__entry->order)
@@ -193,7 +193,7 @@ TRACE_EVENT(mm_page_free_batched,
 		__entry->pfn		= page_to_pfn(page);
 	),
 
-	TP_printk("page=%p pfn=%lu order=0",
+	TP_printk("page=%p pfn=0x%lx order=0",
 			pfn_to_page(__entry->pfn),
 			__entry->pfn)
 );
@@ -219,7 +219,7 @@ TRACE_EVENT(mm_page_alloc,
 		__entry->migratetype	= migratetype;
 	),
 
-	TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
+	TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s",
 		__entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
 		__entry->pfn != -1UL ? __entry->pfn : 0,
 		__entry->order,
@@ -245,7 +245,7 @@ DECLARE_EVENT_CLASS(mm_page,
 		__entry->migratetype	= migratetype;
 	),
 
-	TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
+	TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d",
 		__entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
 		__entry->pfn != -1UL ? __entry->pfn : 0,
 		__entry->order,
@@ -278,7 +278,7 @@ TRACE_EVENT(mm_page_pcpu_drain,
 		__entry->migratetype	= migratetype;
 	),
 
-	TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
+	TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d",
 		pfn_to_page(__entry->pfn), __entry->pfn,
 		__entry->order, __entry->migratetype)
 );
@@ -312,7 +312,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
 					get_pageblock_migratetype(page));
 	),
 
-	TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
+	TP_printk("page=%p pfn=0x%lx alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
 		pfn_to_page(__entry->pfn),
 		__entry->pfn,
 		__entry->alloc_order,
diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h
index ad0aa7f31675..ca534501158b 100644
--- a/include/trace/events/page_pool.h
+++ b/include/trace/events/page_pool.h
@@ -60,7 +60,7 @@ TRACE_EVENT(page_pool_state_release,
 		__entry->pfn		= page_to_pfn(page);
 	),
 
-	TP_printk("page_pool=%p page=%p pfn=%lu release=%u",
+	TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u",
 		  __entry->pool, __entry->page, __entry->pfn, __entry->release)
 );
 
@@ -85,7 +85,7 @@ TRACE_EVENT(page_pool_state_hold,
 		__entry->pfn	= page_to_pfn(page);
 	),
 
-	TP_printk("page_pool=%p page=%p pfn=%lu hold=%u",
+	TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u",
 		  __entry->pool, __entry->page, __entry->pfn, __entry->hold)
 );
 
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index e1735fe7c76a..1d28431e85bd 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -46,7 +46,7 @@ TRACE_EVENT(mm_lru_insertion,
 	),
 
 	/* Flag format is based on page-types.c formatting for pagemap */
-	TP_printk("page=%p pfn=%lu lru=%d flags=%s%s%s%s%s%s",
+	TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
 			__entry->page,
 			__entry->pfn,
 			__entry->lru,
@@ -75,7 +75,7 @@ TRACE_EVENT(mm_lru_activate,
 	),
 
 	/* Flag format is based on page-types.c formatting for pagemap */
-	TP_printk("page=%p pfn=%lu", __entry->page, __entry->pfn)
+	TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn)
 
 );
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 2070df64958e..00d1180527d8 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -330,7 +330,7 @@ TRACE_EVENT(mm_vmscan_writepage,
 						page_is_file_lru(page));
 	),
 
-	TP_printk("page=%p pfn=%lu flags=%s",
+	TP_printk("page=%p pfn=0x%lx flags=%s",
 		pfn_to_page(__entry->pfn),
 		__entry->pfn,
 		show_reclaim_flags(__entry->reclaim_flags))

From a2afc59fb25027749bd41c44f47382522232019e Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Mon, 28 Jun 2021 19:40:11 -0700
Subject: [PATCH 3201/3292] mm/page_alloc: add an alloc_pages_bulk_array_node()
 helper

Patch series "vmalloc() vs bulk allocator", v2.

This patch (of 3):

Add a "node" variant of the alloc_pages_bulk_array() function.  The helper
guarantees that a __alloc_pages_bulk() is invoked with a valid NUMA node
ID.

Link: https://lkml.kernel.org/r/20210516202056.2120-1-urezki@gmail.com
Link: https://lkml.kernel.org/r/20210516202056.2120-2-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 11da8af06704..94f0b8b1cb55 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -536,6 +536,15 @@ alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_arr
 	return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array);
 }
 
+static inline unsigned long
+alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
+{
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+
+	return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
+}
+
 /*
  * Allocate pages, preferring the node given as nid. The node must be valid and
  * online. For more general interface, see alloc_pages_node().

From 5c1f4e690eecc795b2e4d4408e87302040fceca4 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Mon, 28 Jun 2021 19:40:14 -0700
Subject: [PATCH 3202/3292] mm/vmalloc: switch to bulk allocator in
 __vmalloc_area_node()

Recently there has been introduced a page bulk allocator for users which
need to get number of pages per one call request.

For order-0 pages switch to an alloc_pages_bulk_array_node() instead of
alloc_pages_node(), the reason is the former is not capable of allocating
set of pages, thus a one call is per one page.

Second, according to my tests the bulk allocator uses less cycles even for
scenarios when only one page is requested.  Running the "perf" on same
test case shows below difference:

<default>
  - 45.18% __vmalloc_node
     - __vmalloc_node_range
        - 35.60% __alloc_pages
           - get_page_from_freelist
                3.36% __list_del_entry_valid
                3.00% check_preemption_disabled
                1.42% prep_new_page
<default>

<patch>
  - 31.00% __vmalloc_node
     - __vmalloc_node_range
        - 14.48% __alloc_pages_bulk
             3.22% __list_del_entry_valid
           - 0.83% __alloc_pages
                get_page_from_freelist
<patch>

The "test_vmalloc.sh" also shows performance improvements:

fix_size_alloc_test_4MB   loops: 1000000 avg: 89105095 usec
fix_size_alloc_test       loops: 1000000 avg: 513672   usec
full_fit_alloc_test       loops: 1000000 avg: 748900   usec
long_busy_list_alloc_test loops: 1000000 avg: 8043038  usec
random_size_alloc_test    loops: 1000000 avg: 4028582  usec
fix_align_alloc_test      loops: 1000000 avg: 1457671  usec

fix_size_alloc_test_4MB   loops: 1000000 avg: 62083711 usec
fix_size_alloc_test       loops: 1000000 avg: 449207   usec
full_fit_alloc_test       loops: 1000000 avg: 735985   usec
long_busy_list_alloc_test loops: 1000000 avg: 5176052  usec
random_size_alloc_test    loops: 1000000 avg: 2589252  usec
fix_align_alloc_test      loops: 1000000 avg: 1365009  usec

For example 4MB allocations illustrates ~30% gain, all the
rest is also better.

Link: https://lkml.kernel.org/r/20210516202056.2120-3-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 76 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 34 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d0a7d89be091..e630f2cf7900 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2768,8 +2768,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	unsigned long array_size;
 	unsigned int nr_small_pages = size >> PAGE_SHIFT;
 	unsigned int page_order;
-	struct page **pages;
-	unsigned int i;
 
 	array_size = (unsigned long)nr_small_pages * sizeof(struct page *);
 	gfp_mask |= __GFP_NOWARN;
@@ -2778,13 +2776,13 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 	/* Please note that the recursion is strictly bounded. */
 	if (array_size > PAGE_SIZE) {
-		pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+		area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
 					area->caller);
 	} else {
-		pages = kmalloc_node(array_size, nested_gfp, node);
+		area->pages = kmalloc_node(array_size, nested_gfp, node);
 	}
 
-	if (!pages) {
+	if (!area->pages) {
 		free_vm_area(area);
 		warn_alloc(gfp_mask, NULL,
 			   "vmalloc size %lu allocation failure: "
@@ -2793,43 +2791,53 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		return NULL;
 	}
 
-	area->pages = pages;
-	area->nr_pages = nr_small_pages;
+	area->nr_pages = 0;
 	set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
-
 	page_order = vm_area_page_order(area);
 
-	/*
-	 * Careful, we allocate and map page_order pages, but tracking is done
-	 * per PAGE_SIZE page so as to keep the vm_struct APIs independent of
-	 * the physical/mapped size.
-	 */
-	for (i = 0; i < area->nr_pages; i += 1U << page_order) {
-		struct page *page;
-		int p;
+	if (!page_order) {
+		area->nr_pages = alloc_pages_bulk_array_node(
+			gfp_mask, node, nr_small_pages, area->pages);
+	} else {
+		/*
+		 * Careful, we allocate and map page_order pages, but tracking is done
+		 * per PAGE_SIZE page so as to keep the vm_struct APIs independent of
+		 * the physical/mapped size.
+		 */
+		while (area->nr_pages < nr_small_pages) {
+			struct page *page;
+			int i;
 
-		/* Compound pages required for remap_vmalloc_page */
-		page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order);
-		if (unlikely(!page)) {
-			/* Successfully allocated i pages, free them in __vfree() */
-			area->nr_pages = i;
-			atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
-			warn_alloc(gfp_mask, NULL,
-				   "vmalloc size %lu allocation failure: "
-				   "page order %u allocation failed",
-				   area->nr_pages * PAGE_SIZE, page_order);
-			goto fail;
+			/* Compound pages required for remap_vmalloc_page */
+			page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order);
+			if (unlikely(!page))
+				break;
+
+			for (i = 0; i < (1U << page_order); i++)
+				area->pages[area->nr_pages + i] = page + i;
+
+			if (gfpflags_allow_blocking(gfp_mask))
+				cond_resched();
+
+			area->nr_pages += 1U << page_order;
 		}
-
-		for (p = 0; p < (1U << page_order); p++)
-			area->pages[i + p] = page + p;
-
-		if (gfpflags_allow_blocking(gfp_mask))
-			cond_resched();
 	}
+
 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 
-	if (vmap_pages_range(addr, addr + size, prot, pages, page_shift) < 0) {
+	/*
+	 * If not enough pages were obtained to accomplish an
+	 * allocation request, free them via __vfree() if any.
+	 */
+	if (area->nr_pages != nr_small_pages) {
+		warn_alloc(gfp_mask, NULL,
+			"vmalloc size %lu allocation failure: "
+			"page order %u allocation failed",
+			area->nr_pages * PAGE_SIZE, page_order);
+		goto fail;
+	}
+
+	if (vmap_pages_range(addr, addr + size, prot, area->pages, page_shift) < 0) {
 		warn_alloc(gfp_mask, NULL,
 			   "vmalloc size %lu allocation failure: "
 			   "failed to map pages",

From cd61413baa1052fc13e75dd092a0e23ac29a0205 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Mon, 28 Jun 2021 19:40:17 -0700
Subject: [PATCH 3203/3292] mm/vmalloc: print a warning message first on
 failure

When a memory allocation for array of pages are not succeed emit a warning
message as a first step and then perform the further cleanup.

The reason it should be done in a right order is the clean up function
which is free_vm_area() can potentially also follow its error paths what
can lead to confusion what was broken first.

Link: https://lkml.kernel.org/r/20210516202056.2120-4-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e630f2cf7900..74a31abaa52c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2783,11 +2783,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	}
 
 	if (!area->pages) {
-		free_vm_area(area);
 		warn_alloc(gfp_mask, NULL,
 			   "vmalloc size %lu allocation failure: "
 			   "page array size %lu allocation failed",
 			   nr_small_pages * PAGE_SIZE, array_size);
+		free_vm_area(area);
 		return NULL;
 	}
 

From f4bdfeaf18a44b4d0bca945ace272cbf5e91a1b3 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Mon, 28 Jun 2021 19:40:20 -0700
Subject: [PATCH 3204/3292] mm/vmalloc: remove quoted strings split across
 lines

A checkpatch.pl script complains on splitting a text across lines.  It is
because if a user wants to find an entire string he or she will not
succeeded.

<snip>
WARNING: quoted string split across lines
+               "vmalloc size %lu allocation failure: "
+               "page order %u allocation failed",

total: 0 errors, 1 warnings, 10 lines checked
<snip>

Link: https://lkml.kernel.org/r/20210521204359.19943-1-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 74a31abaa52c..ed0a32ea09ee 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2784,9 +2784,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 	if (!area->pages) {
 		warn_alloc(gfp_mask, NULL,
-			   "vmalloc size %lu allocation failure: "
-			   "page array size %lu allocation failed",
-			   nr_small_pages * PAGE_SIZE, array_size);
+			"vmalloc error: size %lu, failed to allocated page array size %lu",
+			nr_small_pages * PAGE_SIZE, array_size);
 		free_vm_area(area);
 		return NULL;
 	}
@@ -2831,17 +2830,15 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	 */
 	if (area->nr_pages != nr_small_pages) {
 		warn_alloc(gfp_mask, NULL,
-			"vmalloc size %lu allocation failure: "
-			"page order %u allocation failed",
+			"vmalloc error: size %lu, page order %u, failed to allocate pages",
 			area->nr_pages * PAGE_SIZE, page_order);
 		goto fail;
 	}
 
 	if (vmap_pages_range(addr, addr + size, prot, area->pages, page_shift) < 0) {
 		warn_alloc(gfp_mask, NULL,
-			   "vmalloc size %lu allocation failure: "
-			   "failed to map pages",
-			   area->nr_pages * PAGE_SIZE);
+			"vmalloc error: size %lu, failed to map pages",
+			area->nr_pages * PAGE_SIZE);
 		goto fail;
 	}
 
@@ -2886,8 +2883,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 
 	if ((size >> PAGE_SHIFT) > totalram_pages()) {
 		warn_alloc(gfp_mask, NULL,
-			   "vmalloc size %lu allocation failure: "
-			   "exceeds total pages", real_size);
+			"vmalloc error: size %lu, exceeds total pages",
+			real_size);
 		return NULL;
 	}
 
@@ -2918,8 +2915,8 @@ again:
 				  gfp_mask, caller);
 	if (!area) {
 		warn_alloc(gfp_mask, NULL,
-			   "vmalloc size %lu allocation failure: "
-			   "vm_struct allocation failed", real_size);
+			"vmalloc error: size %lu, vm_struct allocation failed",
+			real_size);
 		goto fail;
 	}
 

From 12b9f873a5d0e6b3846835ec973bbafa338d0b5a Mon Sep 17 00:00:00 2001
From: Uladzislau Rezki <urezki@gmail.com>
Date: Mon, 28 Jun 2021 19:40:23 -0700
Subject: [PATCH 3205/3292] mm/vmalloc: fallback to a single page allocator

Currently for order-0 pages we use a bulk-page allocator to get set of
pages.  From the other hand not allocating all pages is something that
might occur.  In that case we should fallbak to the single-page allocator
trying to get missing pages, because it is more permissive(direct reclaim,
etc).

Introduce a vm_area_alloc_pages() function where the described logic is
implemented.

Link: https://lkml.kernel.org/r/20210521130718.GA17882@pc638.lan
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 81 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 52 insertions(+), 29 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ed0a32ea09ee..0c80caaf041a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2758,6 +2758,54 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
 EXPORT_SYMBOL_GPL(vmap_pfn);
 #endif /* CONFIG_VMAP_PFN */
 
+static inline unsigned int
+vm_area_alloc_pages(gfp_t gfp, int nid,
+		unsigned int order, unsigned long nr_pages, struct page **pages)
+{
+	unsigned int nr_allocated = 0;
+
+	/*
+	 * For order-0 pages we make use of bulk allocator, if
+	 * the page array is partly or not at all populated due
+	 * to fails, fallback to a single page allocator that is
+	 * more permissive.
+	 */
+	if (!order)
+		nr_allocated = alloc_pages_bulk_array_node(
+			gfp, nid, nr_pages, pages);
+	else
+		/*
+		 * Compound pages required for remap_vmalloc_page if
+		 * high-order pages.
+		 */
+		gfp |= __GFP_COMP;
+
+	/* High-order pages or fallback path if "bulk" fails. */
+	while (nr_allocated < nr_pages) {
+		struct page *page;
+		int i;
+
+		page = alloc_pages_node(nid, gfp, order);
+		if (unlikely(!page))
+			break;
+
+		/*
+		 * Careful, we allocate and map page-order pages, but
+		 * tracking is done per PAGE_SIZE page so as to keep the
+		 * vm_struct APIs independent of the physical/mapped size.
+		 */
+		for (i = 0; i < (1U << order); i++)
+			pages[nr_allocated + i] = page + i;
+
+		if (gfpflags_allow_blocking(gfp))
+			cond_resched();
+
+		nr_allocated += 1U << order;
+	}
+
+	return nr_allocated;
+}
+
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, unsigned int page_shift,
 				 int node)
@@ -2790,37 +2838,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		return NULL;
 	}
 
-	area->nr_pages = 0;
 	set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
 	page_order = vm_area_page_order(area);
 
-	if (!page_order) {
-		area->nr_pages = alloc_pages_bulk_array_node(
-			gfp_mask, node, nr_small_pages, area->pages);
-	} else {
-		/*
-		 * Careful, we allocate and map page_order pages, but tracking is done
-		 * per PAGE_SIZE page so as to keep the vm_struct APIs independent of
-		 * the physical/mapped size.
-		 */
-		while (area->nr_pages < nr_small_pages) {
-			struct page *page;
-			int i;
-
-			/* Compound pages required for remap_vmalloc_page */
-			page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order);
-			if (unlikely(!page))
-				break;
-
-			for (i = 0; i < (1U << page_order); i++)
-				area->pages[area->nr_pages + i] = page + i;
-
-			if (gfpflags_allow_blocking(gfp_mask))
-				cond_resched();
-
-			area->nr_pages += 1U << page_order;
-		}
-	}
+	area->nr_pages = vm_area_alloc_pages(gfp_mask, node,
+		page_order, nr_small_pages, area->pages);
 
 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 
@@ -2835,7 +2857,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		goto fail;
 	}
 
-	if (vmap_pages_range(addr, addr + size, prot, area->pages, page_shift) < 0) {
+	if (vmap_pages_range(addr, addr + size, prot, area->pages,
+			page_shift) < 0) {
 		warn_alloc(gfp_mask, NULL,
 			"vmalloc error: size %lu, failed to map pages",
 			area->nr_pages * PAGE_SIZE);

From a850e932df657c11f2030920dbda5f5621cef091 Mon Sep 17 00:00:00 2001
From: Rafael Aquini <aquini@redhat.com>
Date: Mon, 28 Jun 2021 19:40:27 -0700
Subject: [PATCH 3206/3292] mm: vmalloc: add cond_resched() in __vunmap()

On non-preemptible kernel builds the watchdog can complain about soft
lockups when vfree() is called against large vmalloc areas:

[  210.851798] kvmalloc-test: vmalloc(2199023255552) succeeded
[  238.654842] watchdog: BUG: soft lockup - CPU#181 stuck for 26s! [rmmod:5203]
[  238.662716] Modules linked in: kvmalloc_test(OE-) ...
[  238.772671] CPU: 181 PID: 5203 Comm: rmmod Tainted: G S         OE     5.13.0-rc7+ #1
[  238.781413] Hardware name: Intel Corporation PURLEY/PURLEY, BIOS PLYXCRB1.86B.0553.D01.1809190614 09/19/2018
[  238.792383] RIP: 0010:free_unref_page+0x52/0x60
[  238.797447] Code: 48 c1 fd 06 48 89 ee e8 9c d0 ff ff 84 c0 74 19 9c 41 5c fa 48 89 ee 48 89 df e8 b9 ea ff ff 41 f7 c4 00 02 00 00 74 01 fb 5b <5d> 41 5c c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f0 29 77
[  238.818406] RSP: 0018:ffffb4d87868fe98 EFLAGS: 00000206
[  238.824236] RAX: 0000000000000000 RBX: 000000001da0c945 RCX: ffffb4d87868fe40
[  238.832200] RDX: ffffd79d3beed108 RSI: ffffd7998501dc08 RDI: ffff9c6fbffd7010
[  238.840166] RBP: 000000000d518cbd R08: ffffd7998501dc08 R09: 0000000000000001
[  238.848131] R10: 0000000000000000 R11: ffffd79d3beee088 R12: 0000000000000202
[  238.856095] R13: ffff9e5be3eceec0 R14: 0000000000000000 R15: 0000000000000000
[  238.864059] FS:  00007fe082c2d740(0000) GS:ffff9f4c69b40000(0000) knlGS:0000000000000000
[  238.873089] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  238.879503] CR2: 000055a000611128 CR3: 000000f6094f6006 CR4: 00000000007706e0
[  238.887467] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  238.895433] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  238.903397] PKRU: 55555554
[  238.906417] Call Trace:
[  238.909149]  __vunmap+0x17c/0x220
[  238.912851]  __x64_sys_delete_module+0x13a/0x250
[  238.918008]  ? syscall_trace_enter.isra.20+0x13c/0x1b0
[  238.923746]  do_syscall_64+0x39/0x80
[  238.927740]  entry_SYSCALL_64_after_hwframe+0x44/0xae

Like in other range zapping routines that iterate over a large list, lets
just add cond_resched() within __vunmap()'s page-releasing loop in order
to avoid the watchdog splats.

Link: https://lkml.kernel.org/r/20210622225030.478384-1-aquini@redhat.com
Signed-off-by: Rafael Aquini <aquini@redhat.com>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0c80caaf041a..b2ec7f751bd0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2567,6 +2567,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
 
 			BUG_ON(!page);
 			__free_pages(page, page_order);
+			cond_resched();
 		}
 		atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
 

From 4469c0f17ec63dcc8c9ed512f4330b566c2c0d34 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Mon, 28 Jun 2021 19:40:30 -0700
Subject: [PATCH 3207/3292] printk: introduce dump_stack_lvl()

dump_stack() is used for many different cases, which may require a log
level consistent with other kernel messages surrounding the dump_stack()
call.  Without that, certain systems that are configured to ignore the
default level messages will miss stack traces in critical error reports.

This patch introduces dump_stack_lvl() that behaves similarly to
dump_stack(), but accepts a custom log level.  The old dump_stack()
becomes equal to dump_stack_lvl(KERN_DEFAULT).

A somewhat similar patch has been proposed in 2012:
https://lore.kernel.org/lkml/1332493269.2359.9.camel@hebo/ , but wasn't
merged.

[elver@google.com: add missing dump_stack_lvl() stub if CONFIG_PRINTK=n]
  Link: https://lkml.kernel.org/r/YJ0KAM0hQev1AmWe@elver.google.com

Link: https://lkml.kernel.org/r/20210506105405.3535023-1-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: he, bo <bo.he@intel.com>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Cc: Prasad Sodagudi <psodagud@quicinc.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h |  5 +++++
 lib/dump_stack.c       | 20 +++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index fe7eb2351610..f589b8b60806 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -206,6 +206,7 @@ void __init setup_log_buf(int early);
 __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
 void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
+extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
 extern void printk_safe_flush(void);
 extern void printk_safe_flush_on_panic(void);
@@ -269,6 +270,10 @@ static inline void show_regs_print_info(const char *log_lvl)
 {
 }
 
+static inline void dump_stack_lvl(const char *log_lvl)
+{
+}
+
 static inline void dump_stack(void)
 {
 }
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index f5a33b6f773f..586e3f2c6a15 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -73,10 +73,10 @@ void show_regs_print_info(const char *log_lvl)
 	dump_stack_print_info(log_lvl);
 }
 
-static void __dump_stack(void)
+static void __dump_stack(const char *log_lvl)
 {
-	dump_stack_print_info(KERN_DEFAULT);
-	show_stack(NULL, NULL, KERN_DEFAULT);
+	dump_stack_print_info(log_lvl);
+	show_stack(NULL, NULL, log_lvl);
 }
 
 /**
@@ -87,7 +87,7 @@ static void __dump_stack(void)
 #ifdef CONFIG_SMP
 static atomic_t dump_lock = ATOMIC_INIT(-1);
 
-asmlinkage __visible void dump_stack(void)
+asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
 {
 	unsigned long flags;
 	int was_locked;
@@ -117,7 +117,7 @@ retry:
 		goto retry;
 	}
 
-	__dump_stack();
+	__dump_stack(log_lvl);
 
 	if (!was_locked)
 		atomic_set(&dump_lock, -1);
@@ -125,9 +125,15 @@ retry:
 	local_irq_restore(flags);
 }
 #else
-asmlinkage __visible void dump_stack(void)
+asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
 {
-	__dump_stack();
+	__dump_stack(log_lvl);
 }
 #endif
+EXPORT_SYMBOL(dump_stack_lvl);
+
+asmlinkage __visible void dump_stack(void)
+{
+	dump_stack_lvl(KERN_DEFAULT);
+}
 EXPORT_SYMBOL(dump_stack);

From 336abff6e8723c6b98e141372956e6c0c55e8ea4 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Mon, 28 Jun 2021 19:40:33 -0700
Subject: [PATCH 3208/3292] kasan: use dump_stack_lvl(KERN_ERR) to print stacks

Most of the contents of KASAN reports are printed with pr_err(), so use a
consistent logging level to print the memory access stacks.

Link: https://lkml.kernel.org/r/20210506105405.3535023-2-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Prasad Sodagudi <psodagud@quicinc.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: he, bo <bo.he@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/report.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 14bd51ea2348..8fff1825b22c 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -230,7 +230,7 @@ static void print_address_description(void *addr, u8 tag)
 {
 	struct page *page = kasan_addr_to_page(addr);
 
-	dump_stack();
+	dump_stack_lvl(KERN_ERR);
 	pr_err("\n");
 
 	if (page && PageSlab(page)) {
@@ -375,7 +375,7 @@ void kasan_report_async(void)
 	pr_err("BUG: KASAN: invalid-access\n");
 	pr_err("Asynchronous mode enabled: no access details available\n");
 	pr_err("\n");
-	dump_stack();
+	dump_stack_lvl(KERN_ERR);
 	end_report(&flags, 0);
 }
 #endif /* CONFIG_KASAN_HW_TAGS */
@@ -420,7 +420,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
 		pr_err("\n");
 		print_memory_metadata(info.first_bad_addr);
 	} else {
-		dump_stack();
+		dump_stack_lvl(KERN_ERR);
 	}
 
 	end_report(&flags, addr);

From 3ff16d30f593d80a958104ee06a94562a12c5879 Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Mon, 28 Jun 2021 19:40:36 -0700
Subject: [PATCH 3209/3292] kasan: test: improve failure message in
 KUNIT_EXPECT_KASAN_FAIL()

The KUNIT_EXPECT_KASAN_FAIL() macro currently uses KUNIT_EXPECT_EQ() to
compare fail_data.report_expected and fail_data.report_found.  This always
gave a somewhat useless error message on failure, but the addition of
extra compile-time checking with READ_ONCE() has caused it to get much
longer, and be truncated before anything useful is displayed.

Instead, just check fail_data.report_found by hand (we've just set
report_expected to 'true'), and print a better failure message with
KUNIT_FAIL().  Because of this, report_expected is no longer used
anywhere, and can be removed.

Beforehand, a failure in:
KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)area)[3100]);
would have looked like:
[22:00:34] [FAILED] vmalloc_oob
[22:00:34]     # vmalloc_oob: EXPECTATION FAILED at lib/test_kasan.c:991
[22:00:34]     Expected ({ do { extern void __compiletime_assert_705(void) __attribute__((__error__("Unsupported access size for {READ,WRITE}_ONCE()."))); if (!((sizeof(fail_data.report_expected) == sizeof(char) || sizeof(fail_data.repp
[22:00:34]     not ok 45 - vmalloc_oob

With this change, it instead looks like:
[22:04:04] [FAILED] vmalloc_oob
[22:04:04]     # vmalloc_oob: EXPECTATION FAILED at lib/test_kasan.c:993
[22:04:04]     KASAN failure expected in "((volatile char *)area)[3100]", but none occurred
[22:04:04]     not ok 45 - vmalloc_oob

Also update the example failure in the documentation to reflect this.

Link: https://lkml.kernel.org/r/20210606005531.165954-1-davidgow@google.com
Signed-off-by: David Gow <davidgow@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Marco Elver <elver@google.com>
Acked-by: Brendan Higgins <brendanhiggins@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Daniel Axtens <dja@axtens.net>
Cc: David Gow <davidgow@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kasan.rst |  9 ++++-----
 include/linux/kasan.h             |  1 -
 lib/test_kasan.c                  | 11 +++++------
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index d3f335ffc751..83ec4a556c19 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -447,11 +447,10 @@ When a test fails due to a failed ``kmalloc``::
 
 When a test fails due to a missing KASAN report::
 
-        # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629
-        Expected kasan_data->report_expected == kasan_data->report_found, but
-        kasan_data->report_expected == 1
-        kasan_data->report_found == 0
-        not ok 28 - kmalloc_double_kzfree
+        # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:974
+        KASAN failure expected in "kfree_sensitive(ptr)", but none occurred
+        not ok 44 - kmalloc_double_kzfree
+
 
 At the end the cumulative status of all KASAN tests is printed. On success::
 
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index b1678a61e6a7..18cd5ec2f469 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -17,7 +17,6 @@ struct task_struct;
 
 /* kasan_data struct is used in KUnit tests for KASAN expected failures */
 struct kunit_kasan_expectation {
-	bool report_expected;
 	bool report_found;
 };
 
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index cacbbbdef768..44e08f4d9c52 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -55,7 +55,6 @@ static int kasan_test_init(struct kunit *test)
 	multishot = kasan_save_enable_multi_shot();
 	kasan_set_tagging_report_once(false);
 	fail_data.report_found = false;
-	fail_data.report_expected = false;
 	kunit_add_named_resource(test, NULL, NULL, &resource,
 					"kasan_data", &fail_data);
 	return 0;
@@ -94,20 +93,20 @@ static void kasan_test_exit(struct kunit *test)
 	    !kasan_async_mode_enabled())				\
 		migrate_disable();					\
 	KUNIT_EXPECT_FALSE(test, READ_ONCE(fail_data.report_found));	\
-	WRITE_ONCE(fail_data.report_expected, true);			\
 	barrier();							\
 	expression;							\
 	barrier();							\
-	KUNIT_EXPECT_EQ(test,						\
-			READ_ONCE(fail_data.report_expected),		\
-			READ_ONCE(fail_data.report_found));		\
+	if (!READ_ONCE(fail_data.report_found)) {			\
+		KUNIT_FAIL(test, KUNIT_SUBTEST_INDENT "KASAN failure "	\
+				"expected in \"" #expression		\
+				 "\", but none occurred");		\
+	}								\
 	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) {				\
 		if (READ_ONCE(fail_data.report_found))			\
 			kasan_enable_tagging_sync();			\
 		migrate_enable();					\
 	}								\
 	WRITE_ONCE(fail_data.report_found, false);			\
-	WRITE_ONCE(fail_data.report_expected, false);			\
 } while (0)
 
 #define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do {			\

From 158f25522ca8cc87f512a03ed5e2a5923bd37eb3 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Mon, 28 Jun 2021 19:40:39 -0700
Subject: [PATCH 3210/3292] kasan: allow an architecture to disable inline
 instrumentation

Patch series "KASAN core changes for ppc64 radix KASAN", v16.

Building on the work of Christophe, Aneesh and Balbir, I've ported KASAN
to 64-bit Book3S kernels running on the Radix MMU.  I've been trying this
for a while, but we keep having collisions between the kasan code in the
mm tree and the code I want to put in to the ppc tree.

This series just contains the kasan core changes that we need.  There
should be no noticeable changes to other platforms.

This patch (of 4):

For annoying architectural reasons, it's very difficult to support inline
instrumentation on powerpc64.*

Add a Kconfig flag to allow an arch to disable inline.  (It's a bit
annoying to be 'backwards', but I'm not aware of any way to have an arch
force a symbol to be 'n', rather than 'y'.)

We also disable stack instrumentation in this case as it does things that
are functionally equivalent to inline instrumentation, namely adding code
that touches the shadow directly without going through a C helper.

* on ppc64 atm, the shadow lives in virtual memory and isn't accessible in
real mode.  However, before we turn on virtual memory, we parse the device
tree to determine which platform and MMU we're running under.  That calls
generic DT code, which is instrumented.  Inline instrumentation in DT
would unconditionally attempt to touch the shadow region, which we won't
have set up yet, and would crash.  We can make outline mode wait for the
arch to be ready, but we can't change what the compiler inserts for inline
mode.

Link: https://lkml.kernel.org/r/20210624034050.511391-1-dja@axtens.net
Link: https://lkml.kernel.org/r/20210624034050.511391-2-dja@axtens.net
Signed-off-by: Daniel Axtens <dja@axtens.net>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.kasan | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index cffc2ebbf185..c3b228828a80 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -12,6 +12,13 @@ config HAVE_ARCH_KASAN_HW_TAGS
 config HAVE_ARCH_KASAN_VMALLOC
 	bool
 
+config ARCH_DISABLE_KASAN_INLINE
+	bool
+	help
+	  An architecture might not support inline instrumentation.
+	  When this option is selected, inline and stack instrumentation are
+	  disabled.
+
 config CC_HAS_KASAN_GENERIC
 	def_bool $(cc-option, -fsanitize=kernel-address)
 
@@ -130,6 +137,7 @@ config KASAN_OUTLINE
 
 config KASAN_INLINE
 	bool "Inline instrumentation"
+	depends on !ARCH_DISABLE_KASAN_INLINE
 	help
 	  Compiler directly inserts code checking shadow memory before
 	  memory accesses. This is faster than outline (in some workloads
@@ -141,6 +149,7 @@ endchoice
 config KASAN_STACK
 	bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST
 	depends on KASAN_GENERIC || KASAN_SW_TAGS
+	depends on !ARCH_DISABLE_KASAN_INLINE
 	default y if CC_IS_GCC
 	help
 	  The LLVM stack address sanitizer has a know problem that
@@ -154,6 +163,9 @@ config KASAN_STACK
 	  but clang users can still enable it for builds without
 	  CONFIG_COMPILE_TEST.	On gcc it is assumed to always be safe
 	  to use and enabled by default.
+	  If the architecture disables inline instrumentation, stack
+	  instrumentation is also disabled as it adds inline-style
+	  instrumentation that is run unconditionally.
 
 config KASAN_SW_TAGS_IDENTIFY
 	bool "Enable memory corruption identification"

From af3751f3c2b6282bebcb56c35bbe4c8b671f80aa Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Mon, 28 Jun 2021 19:40:42 -0700
Subject: [PATCH 3211/3292] kasan: allow architectures to provide an outline
 readiness check

Allow architectures to define a kasan_arch_is_ready() hook that bails out
of any function that's about to touch the shadow unless the arch says that
it is ready for the memory to be accessed.  This is fairly uninvasive and
should have a negligible performance penalty.

This will only work in outline mode, so an arch must specify
ARCH_DISABLE_KASAN_INLINE if it requires this.

Link: https://lkml.kernel.org/r/20210624034050.511391-3-dja@axtens.net
Signed-off-by: Daniel Axtens <dja@axtens.net>
Reviewed-by: Marco Elver <elver@google.com>
Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/common.c  | 3 +++
 mm/kasan/generic.c | 3 +++
 mm/kasan/kasan.h   | 6 ++++++
 mm/kasan/shadow.c  | 6 ++++++
 4 files changed, 18 insertions(+)

diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 2586d3718600..267500896b1e 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -331,6 +331,9 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
 	u8 tag;
 	void *tagged_object;
 
+	if (!kasan_arch_is_ready())
+		return false;
+
 	tag = get_tag(object);
 	tagged_object = object;
 	object = kasan_reset_tag(object);
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 53cbf28859b5..c3f5ba7a294a 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -163,6 +163,9 @@ static __always_inline bool check_region_inline(unsigned long addr,
 						size_t size, bool write,
 						unsigned long ret_ip)
 {
+	if (!kasan_arch_is_ready())
+		return true;
+
 	if (unlikely(size == 0))
 		return true;
 
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 8f450bc28045..4dbc8def64f4 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -449,6 +449,12 @@ static inline void kasan_poison_last_granule(const void *address, size_t size) {
 
 #endif /* CONFIG_KASAN_GENERIC */
 
+#ifndef kasan_arch_is_ready
+static inline bool kasan_arch_is_ready(void)	{ return true; }
+#elif !defined(CONFIG_KASAN_GENERIC) || !defined(CONFIG_KASAN_OUTLINE)
+#error kasan_arch_is_ready only works in KASAN generic outline mode!
+#endif
+
 /*
  * Exported functions for interfaces called from assembly or from generated
  * code. Declarations here to avoid warning about missing declarations.
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 082ee5b6d9a1..8d95ee52d019 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -73,6 +73,9 @@ void kasan_poison(const void *addr, size_t size, u8 value, bool init)
 {
 	void *shadow_start, *shadow_end;
 
+	if (!kasan_arch_is_ready())
+		return;
+
 	/*
 	 * Perform shadow offset calculation based on untagged address, as
 	 * some of the callers (e.g. kasan_poison_object_data) pass tagged
@@ -99,6 +102,9 @@ EXPORT_SYMBOL(kasan_poison);
 #ifdef CONFIG_KASAN_GENERIC
 void kasan_poison_last_granule(const void *addr, size_t size)
 {
+	if (!kasan_arch_is_ready())
+		return;
+
 	if (size & KASAN_GRANULE_MASK) {
 		u8 *shadow = (u8 *)kasan_mem_to_shadow(addr + size);
 		*shadow = size & KASAN_GRANULE_MASK;

From c0f8aa4fa815daacb6eca52cae04820d6aecb7c2 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Mon, 28 Jun 2021 19:40:46 -0700
Subject: [PATCH 3212/3292] mm: define default MAX_PTRS_PER_* in
 include/pgtable.h

Commit c65e774fb3f6 ("x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable")
made PTRS_PER_P4D variable on x86 and introduced MAX_PTRS_PER_P4D as a
constant for cases which need a compile-time constant (e.g.  fixed-size
arrays).

powerpc likewise has boot-time selectable MMU features which can cause
other mm "constants" to vary.  For KASAN, we have some static
PTE/PMD/PUD/P4D arrays so we need compile-time maximums for all these
constants.  Extend the MAX_PTRS_PER_ idiom, and place default definitions
in include/pgtable.h.  These define MAX_PTRS_PER_x to be PTRS_PER_x unless
an architecture has defined MAX_PTRS_PER_x in its arch headers.

Clean up pgtable-nop4d.h and s390's MAX_PTRS_PER_P4D definitions while
we're at it: both can just pick up the default now.

Link: https://lkml.kernel.org/r/20210624034050.511391-4-dja@axtens.net
Signed-off-by: Daniel Axtens <dja@axtens.net>
Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/pgtable.h     |  2 --
 include/asm-generic/pgtable-nop4d.h |  1 -
 include/linux/pgtable.h             | 22 ++++++++++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 29c7ecd5ad1d..b38f7b781564 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -344,8 +344,6 @@ static inline int is_module_addr(void *addr)
 #define PTRS_PER_P4D	_CRST_ENTRIES
 #define PTRS_PER_PGD	_CRST_ENTRIES
 
-#define MAX_PTRS_PER_P4D	PTRS_PER_P4D
-
 /*
  * Segment table and region3 table entry encoding
  * (R = read-only, I = invalid, y = young bit):
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index ce2cbb3c380f..2f6b1befb129 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -9,7 +9,6 @@
 typedef struct { pgd_t pgd; } p4d_t;
 
 #define P4D_SHIFT		PGDIR_SHIFT
-#define MAX_PTRS_PER_P4D	1
 #define PTRS_PER_P4D		1
 #define P4D_SIZE		(1UL << P4D_SHIFT)
 #define P4D_MASK		(~(P4D_SIZE-1))
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a43047b1030d..c32600c9e1ad 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1592,4 +1592,26 @@ typedef unsigned int pgtbl_mod_mask;
 #define pte_leaf_size(x) PAGE_SIZE
 #endif
 
+/*
+ * Some architectures have MMUs that are configurable or selectable at boot
+ * time. These lead to variable PTRS_PER_x. For statically allocated arrays it
+ * helps to have a static maximum value.
+ */
+
+#ifndef MAX_PTRS_PER_PTE
+#define MAX_PTRS_PER_PTE PTRS_PER_PTE
+#endif
+
+#ifndef MAX_PTRS_PER_PMD
+#define MAX_PTRS_PER_PMD PTRS_PER_PMD
+#endif
+
+#ifndef MAX_PTRS_PER_PUD
+#define MAX_PTRS_PER_PUD PTRS_PER_PUD
+#endif
+
+#ifndef MAX_PTRS_PER_P4D
+#define MAX_PTRS_PER_P4D PTRS_PER_P4D
+#endif
+
 #endif /* _LINUX_PGTABLE_H */

From cb32c9c5d45662770160e0055cb672fd6e0813e8 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Mon, 28 Jun 2021 19:40:49 -0700
Subject: [PATCH 3213/3292] kasan: use MAX_PTRS_PER_* for early shadow tables

powerpc has a variable number of PTRS_PER_*, set at runtime based on the
MMU that the kernel is booted under.

This means the PTRS_PER_* are no longer constants, and therefore breaks
the build.  Switch to using MAX_PTRS_PER_*, which are constant.

Link: https://lkml.kernel.org/r/20210624034050.511391-5-dja@axtens.net
Signed-off-by: Daniel Axtens <dja@axtens.net>
Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Suggested-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 6 +++---
 mm/kasan/init.c       | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 18cd5ec2f469..8d83bbffcfbb 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -40,9 +40,9 @@ struct kunit_kasan_expectation {
 #endif
 
 extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
-extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS];
-extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD];
-extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD];
+extern pte_t kasan_early_shadow_pte[MAX_PTRS_PER_PTE + PTE_HWTABLE_PTRS];
+extern pmd_t kasan_early_shadow_pmd[MAX_PTRS_PER_PMD];
+extern pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD];
 extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
 
 int kasan_populate_early_shadow(const void *shadow_start,
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index 348f31d15a97..cc64ed6858c6 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -41,7 +41,7 @@ static inline bool kasan_p4d_table(pgd_t pgd)
 }
 #endif
 #if CONFIG_PGTABLE_LEVELS > 3
-pud_t kasan_early_shadow_pud[PTRS_PER_PUD] __page_aligned_bss;
+pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD] __page_aligned_bss;
 static inline bool kasan_pud_table(p4d_t p4d)
 {
 	return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud));
@@ -53,7 +53,7 @@ static inline bool kasan_pud_table(p4d_t p4d)
 }
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD] __page_aligned_bss;
+pmd_t kasan_early_shadow_pmd[MAX_PTRS_PER_PMD] __page_aligned_bss;
 static inline bool kasan_pmd_table(pud_t pud)
 {
 	return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd));
@@ -64,7 +64,7 @@ static inline bool kasan_pmd_table(pud_t pud)
 	return false;
 }
 #endif
-pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]
+pte_t kasan_early_shadow_pte[MAX_PTRS_PER_PTE + PTE_HWTABLE_PTRS]
 	__page_aligned_bss;
 
 static inline bool kasan_pte_table(pmd_t pmd)

From f06f78ab48fb90cfbef5289e5556704b74c46b7a Mon Sep 17 00:00:00 2001
From: Kuan-Ying Lee <Kuan-Ying.Lee@mediatek.com>
Date: Mon, 28 Jun 2021 19:40:52 -0700
Subject: [PATCH 3214/3292] kasan: rename CONFIG_KASAN_SW_TAGS_IDENTIFY to
 CONFIG_KASAN_TAGS_IDENTIFY

Patch series "kasan: add memory corruption identification support for hw tag-based kasan", v4.

Add memory corruption identification for hardware tag-based KASAN mode.

This patch (of 3):

Rename CONFIG_KASAN_SW_TAGS_IDENTIFY to CONFIG_KASAN_TAGS_IDENTIFY in
order to be compatible with hardware tag-based mode.

Link: https://lkml.kernel.org/r/20210626100931.22794-1-Kuan-Ying.Lee@mediatek.com
Link: https://lkml.kernel.org/r/20210626100931.22794-2-Kuan-Ying.Lee@mediatek.com
Signed-off-by: Kuan-Ying Lee <Kuan-Ying.Lee@mediatek.com>
Suggested-by: Marco Elver <elver@google.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: Nicholas Tang <nicholas.tang@mediatek.com>
Cc: Kuan-Ying Lee <Kuan-Ying.Lee@mediatek.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.kasan         | 2 +-
 mm/kasan/kasan.h          | 4 ++--
 mm/kasan/report_sw_tags.c | 2 +-
 mm/kasan/sw_tags.c        | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index c3b228828a80..fdb4a08dba83 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -167,7 +167,7 @@ config KASAN_STACK
 	  instrumentation is also disabled as it adds inline-style
 	  instrumentation that is run unconditionally.
 
-config KASAN_SW_TAGS_IDENTIFY
+config KASAN_TAGS_IDENTIFY
 	bool "Enable memory corruption identification"
 	depends on KASAN_SW_TAGS
 	help
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 4dbc8def64f4..2317d0943a07 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -153,7 +153,7 @@ struct kasan_track {
 	depot_stack_handle_t stack;
 };
 
-#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
 #define KASAN_NR_FREE_STACKS 5
 #else
 #define KASAN_NR_FREE_STACKS 1
@@ -170,7 +170,7 @@ struct kasan_alloc_meta {
 #else
 	struct kasan_track free_track[KASAN_NR_FREE_STACKS];
 #endif
-#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
 	u8 free_pointer_tag[KASAN_NR_FREE_STACKS];
 	u8 free_track_idx;
 #endif
diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c
index 3d20d3451d9e..821a14a19a92 100644
--- a/mm/kasan/report_sw_tags.c
+++ b/mm/kasan/report_sw_tags.c
@@ -31,7 +31,7 @@
 
 const char *kasan_get_bug_type(struct kasan_access_info *info)
 {
-#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
 	struct kasan_alloc_meta *alloc_meta;
 	struct kmem_cache *cache;
 	struct page *page;
diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c
index 9df8e7f69e87..0d6e5e976231 100644
--- a/mm/kasan/sw_tags.c
+++ b/mm/kasan/sw_tags.c
@@ -177,7 +177,7 @@ void kasan_set_free_info(struct kmem_cache *cache,
 	if (!alloc_meta)
 		return;
 
-#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
 	idx = alloc_meta->free_track_idx;
 	alloc_meta->free_pointer_tag[idx] = tag;
 	alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
@@ -196,7 +196,7 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
 	if (!alloc_meta)
 		return NULL;
 
-#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
 	for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
 		if (alloc_meta->free_pointer_tag[i] == tag)
 			break;

From a0503b8a0b3c8ef1be55744a248bffb8f533d227 Mon Sep 17 00:00:00 2001
From: Kuan-Ying Lee <Kuan-Ying.Lee@mediatek.com>
Date: Mon, 28 Jun 2021 19:40:55 -0700
Subject: [PATCH 3215/3292] kasan: integrate the common part of two KASAN
 tag-based modes

1. Move kasan_get_free_track() and kasan_set_free_info() into tags.c
   and combine these two functions for SW_TAGS and HW_TAGS kasan mode.

2. Move kasan_get_bug_type() to report_tags.c and make this function
   compatible for SW_TAGS and HW_TAGS kasan mode.

Link: https://lkml.kernel.org/r/20210626100931.22794-3-Kuan-Ying.Lee@mediatek.com
Signed-off-by: Kuan-Ying Lee <Kuan-Ying.Lee@mediatek.com>
Suggested-by: Marco Elver <elver@google.com>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Nicholas Tang <nicholas.tang@mediatek.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/Makefile         |  4 +--
 mm/kasan/hw_tags.c        | 22 ---------------
 mm/kasan/report_hw_tags.c |  5 ----
 mm/kasan/report_sw_tags.c | 43 ----------------------------
 mm/kasan/report_tags.c    | 51 +++++++++++++++++++++++++++++++++
 mm/kasan/sw_tags.c        | 41 ---------------------------
 mm/kasan/tags.c           | 59 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 112 insertions(+), 113 deletions(-)
 create mode 100644 mm/kasan/report_tags.c
 create mode 100644 mm/kasan/tags.c

diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 9fe39a66388a..adcd9acaef61 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -37,5 +37,5 @@ CFLAGS_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
 
 obj-$(CONFIG_KASAN) := common.o report.o
 obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o report_generic.o shadow.o quarantine.o
-obj-$(CONFIG_KASAN_HW_TAGS) += hw_tags.o report_hw_tags.o
-obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o
+obj-$(CONFIG_KASAN_HW_TAGS) += hw_tags.o report_hw_tags.o tags.o report_tags.o
+obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o tags.o report_tags.o
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 4004388b4e4b..d867b22ddbb7 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -216,28 +216,6 @@ void __init kasan_init_hw_tags(void)
 	pr_info("KernelAddressSanitizer initialized\n");
 }
 
-void kasan_set_free_info(struct kmem_cache *cache,
-				void *object, u8 tag)
-{
-	struct kasan_alloc_meta *alloc_meta;
-
-	alloc_meta = kasan_get_alloc_meta(cache, object);
-	if (alloc_meta)
-		kasan_set_track(&alloc_meta->free_track[0], GFP_NOWAIT);
-}
-
-struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
-				void *object, u8 tag)
-{
-	struct kasan_alloc_meta *alloc_meta;
-
-	alloc_meta = kasan_get_alloc_meta(cache, object);
-	if (!alloc_meta)
-		return NULL;
-
-	return &alloc_meta->free_track[0];
-}
-
 #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
 
 void kasan_set_tagging_report_once(bool state)
diff --git a/mm/kasan/report_hw_tags.c b/mm/kasan/report_hw_tags.c
index 42b2168755d6..5dbbbb930e7a 100644
--- a/mm/kasan/report_hw_tags.c
+++ b/mm/kasan/report_hw_tags.c
@@ -15,11 +15,6 @@
 
 #include "kasan.h"
 
-const char *kasan_get_bug_type(struct kasan_access_info *info)
-{
-	return "invalid-access";
-}
-
 void *kasan_find_first_bad_addr(void *addr, size_t size)
 {
 	return kasan_reset_tag(addr);
diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c
index 821a14a19a92..d2298c357834 100644
--- a/mm/kasan/report_sw_tags.c
+++ b/mm/kasan/report_sw_tags.c
@@ -29,49 +29,6 @@
 #include "kasan.h"
 #include "../slab.h"
 
-const char *kasan_get_bug_type(struct kasan_access_info *info)
-{
-#ifdef CONFIG_KASAN_TAGS_IDENTIFY
-	struct kasan_alloc_meta *alloc_meta;
-	struct kmem_cache *cache;
-	struct page *page;
-	const void *addr;
-	void *object;
-	u8 tag;
-	int i;
-
-	tag = get_tag(info->access_addr);
-	addr = kasan_reset_tag(info->access_addr);
-	page = kasan_addr_to_page(addr);
-	if (page && PageSlab(page)) {
-		cache = page->slab_cache;
-		object = nearest_obj(cache, page, (void *)addr);
-		alloc_meta = kasan_get_alloc_meta(cache, object);
-
-		if (alloc_meta) {
-			for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
-				if (alloc_meta->free_pointer_tag[i] == tag)
-					return "use-after-free";
-			}
-		}
-		return "out-of-bounds";
-	}
-
-#endif
-	/*
-	 * If access_size is a negative number, then it has reason to be
-	 * defined as out-of-bounds bug type.
-	 *
-	 * Casting negative numbers to size_t would indeed turn up as
-	 * a large size_t and its value will be larger than ULONG_MAX/2,
-	 * so that this can qualify as out-of-bounds.
-	 */
-	if (info->access_addr + info->access_size < info->access_addr)
-		return "out-of-bounds";
-
-	return "invalid-access";
-}
-
 void *kasan_find_first_bad_addr(void *addr, size_t size)
 {
 	u8 tag = get_tag(addr);
diff --git a/mm/kasan/report_tags.c b/mm/kasan/report_tags.c
new file mode 100644
index 000000000000..8a319fc16dab
--- /dev/null
+++ b/mm/kasan/report_tags.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2020 Google, Inc.
+ */
+
+#include "kasan.h"
+#include "../slab.h"
+
+const char *kasan_get_bug_type(struct kasan_access_info *info)
+{
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
+	struct kasan_alloc_meta *alloc_meta;
+	struct kmem_cache *cache;
+	struct page *page;
+	const void *addr;
+	void *object;
+	u8 tag;
+	int i;
+
+	tag = get_tag(info->access_addr);
+	addr = kasan_reset_tag(info->access_addr);
+	page = kasan_addr_to_page(addr);
+	if (page && PageSlab(page)) {
+		cache = page->slab_cache;
+		object = nearest_obj(cache, page, (void *)addr);
+		alloc_meta = kasan_get_alloc_meta(cache, object);
+
+		if (alloc_meta) {
+			for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
+				if (alloc_meta->free_pointer_tag[i] == tag)
+					return "use-after-free";
+			}
+		}
+		return "out-of-bounds";
+	}
+#endif
+
+	/*
+	 * If access_size is a negative number, then it has reason to be
+	 * defined as out-of-bounds bug type.
+	 *
+	 * Casting negative numbers to size_t would indeed turn up as
+	 * a large size_t and its value will be larger than ULONG_MAX/2,
+	 * so that this can qualify as out-of-bounds.
+	 */
+	if (info->access_addr + info->access_size < info->access_addr)
+		return "out-of-bounds";
+
+	return "invalid-access";
+}
diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c
index 0d6e5e976231..675e67375fb5 100644
--- a/mm/kasan/sw_tags.c
+++ b/mm/kasan/sw_tags.c
@@ -166,44 +166,3 @@ void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size)
 	kasan_poison((void *)addr, size, tag, false);
 }
 EXPORT_SYMBOL(__hwasan_tag_memory);
-
-void kasan_set_free_info(struct kmem_cache *cache,
-				void *object, u8 tag)
-{
-	struct kasan_alloc_meta *alloc_meta;
-	u8 idx = 0;
-
-	alloc_meta = kasan_get_alloc_meta(cache, object);
-	if (!alloc_meta)
-		return;
-
-#ifdef CONFIG_KASAN_TAGS_IDENTIFY
-	idx = alloc_meta->free_track_idx;
-	alloc_meta->free_pointer_tag[idx] = tag;
-	alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
-#endif
-
-	kasan_set_track(&alloc_meta->free_track[idx], GFP_NOWAIT);
-}
-
-struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
-				void *object, u8 tag)
-{
-	struct kasan_alloc_meta *alloc_meta;
-	int i = 0;
-
-	alloc_meta = kasan_get_alloc_meta(cache, object);
-	if (!alloc_meta)
-		return NULL;
-
-#ifdef CONFIG_KASAN_TAGS_IDENTIFY
-	for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
-		if (alloc_meta->free_pointer_tag[i] == tag)
-			break;
-	}
-	if (i == KASAN_NR_FREE_STACKS)
-		i = alloc_meta->free_track_idx;
-#endif
-
-	return &alloc_meta->free_track[i];
-}
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
new file mode 100644
index 000000000000..8f48b9502a17
--- /dev/null
+++ b/mm/kasan/tags.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains common tag-based KASAN code.
+ *
+ * Copyright (c) 2018 Google, Inc.
+ * Copyright (c) 2020 Google, Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/mm.h>
+#include <linux/static_key.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "kasan.h"
+
+void kasan_set_free_info(struct kmem_cache *cache,
+				void *object, u8 tag)
+{
+	struct kasan_alloc_meta *alloc_meta;
+	u8 idx = 0;
+
+	alloc_meta = kasan_get_alloc_meta(cache, object);
+	if (!alloc_meta)
+		return;
+
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
+	idx = alloc_meta->free_track_idx;
+	alloc_meta->free_pointer_tag[idx] = tag;
+	alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS;
+#endif
+
+	kasan_set_track(&alloc_meta->free_track[idx], GFP_NOWAIT);
+}
+
+struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
+				void *object, u8 tag)
+{
+	struct kasan_alloc_meta *alloc_meta;
+	int i = 0;
+
+	alloc_meta = kasan_get_alloc_meta(cache, object);
+	if (!alloc_meta)
+		return NULL;
+
+#ifdef CONFIG_KASAN_TAGS_IDENTIFY
+	for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
+		if (alloc_meta->free_pointer_tag[i] == tag)
+			break;
+	}
+	if (i == KASAN_NR_FREE_STACKS)
+		i = alloc_meta->free_track_idx;
+#endif
+
+	return &alloc_meta->free_track[i];
+}

From 7a22bdc3c443d5abc420df1381e425b49e8901a3 Mon Sep 17 00:00:00 2001
From: Kuan-Ying Lee <Kuan-Ying.Lee@mediatek.com>
Date: Mon, 28 Jun 2021 19:40:58 -0700
Subject: [PATCH 3216/3292] kasan: add memory corruption identification support
 for hardware tag-based mode

Add memory corruption identification support for hardware tag-based mode.
We store one old free pointer tag and free backtrace instead of five
because hardware tag-based kasan only has 16 different tags.

If we store as many stacks as SW tag-based kasan does(5 stacks), there is
high probability to find the same tag in the stacks when out-of-bound
issues happened and we will mistake out-of-bound issue for use-after-free.

Link: https://lkml.kernel.org/r/20210626100931.22794-4-Kuan-Ying.Lee@mediatek.com
Signed-off-by: Kuan-Ying Lee <Kuan-Ying.Lee@mediatek.com>
Suggested-by: Marco Elver <elver@google.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Nicholas Tang <nicholas.tang@mediatek.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.kasan | 2 +-
 mm/kasan/kasan.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index fdb4a08dba83..1e2d10f86011 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -169,7 +169,7 @@ config KASAN_STACK
 
 config KASAN_TAGS_IDENTIFY
 	bool "Enable memory corruption identification"
-	depends on KASAN_SW_TAGS
+	depends on KASAN_SW_TAGS || KASAN_HW_TAGS
 	help
 	  This option enables best-effort identification of bug type
 	  (use-after-free or out-of-bounds) at the cost of increased
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 2317d0943a07..98e3059bfea4 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -153,7 +153,7 @@ struct kasan_track {
 	depot_stack_handle_t stack;
 };
 
-#ifdef CONFIG_KASAN_TAGS_IDENTIFY
+#if defined(CONFIG_KASAN_TAGS_IDENTIFY) && defined(CONFIG_KASAN_SW_TAGS)
 #define KASAN_NR_FREE_STACKS 5
 #else
 #define KASAN_NR_FREE_STACKS 1

From c5a54c706e04a4ba7c4e3428776ac9e44aec17ea Mon Sep 17 00:00:00 2001
From: Jungseung Lee <js07.lee@samsung.com>
Date: Mon, 28 Jun 2021 19:41:02 -0700
Subject: [PATCH 3217/3292] mm: report which part of mem is being freed on
 initmem case

Add the details for figuring out which parts of the kernel image is being
freed on initmem case.

Before:
   Freeing unused kernel memory: 1024K

After:
   Freeing unused kernel image (initmem) memory: 1024K

Link: https://lkml.kernel.org/r/1622706274-4533-1-git-send-email-js07.lee@samsung.com
Signed-off-by: Jungseung Lee <js07.lee@samsung.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1a98b5447a3b..f08e9de92fc5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2416,7 +2416,7 @@ static inline unsigned long free_initmem_default(int poison)
 	extern char __init_begin[], __init_end[];
 
 	return free_reserved_area(&__init_begin, &__init_end,
-				  poison, "unused kernel");
+				  poison, "unused kernel image (initmem)");
 }
 
 static inline unsigned long get_num_physpages(void)

From b19bd1c976afeefc2ebba3d4dae8a4c296dae67f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:41:04 -0700
Subject: [PATCH 3218/3292] mm/mmzone.h: simplify is_highmem_idx()

There is a lot of historical ifdefery in is_highmem_idx() and its helper
zone_movable_is_highmem() that was required because of two different paths
for nodes and zones initialization that were selected at compile time.

Until commit 3f08a302f533 ("mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP
option") the movable_zone variable was only available for configurations
that had CONFIG_HAVE_MEMBLOCK_NODE_MAP enabled so the test in
zone_movable_is_highmem() used that variable only for such configurations.
For other configurations the test checked if the index of ZONE_MOVABLE
was greater by 1 than the index of ZONE_HIGMEM and then movable zone was
considered a highmem zone.  Needless to say, ZONE_MOVABLE - 1 equals
ZONE_HIGHMEM by definition when CONFIG_HIGHMEM=y.

Commit 3f08a302f533 ("mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option")
made movable_zone variable always available.  Since this variable is set
to ZONE_HIGHMEM if CONFIG_HIGHMEM is enabled and highmem zone is
populated, it is enough to check whether

	zone_idx == ZONE_MOVABLE && movable_zone == ZONE_HIGMEM

to test if zone index points to a highmem zone.

Remove zone_movable_is_highmem() that is not used anywhere except
is_highmem_idx() and use the test above in is_highmem_idx() instead.

Link: https://lkml.kernel.org/r/20210426141927.1314326-3-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0d53eba1c383..c2bfefd34b59 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -982,22 +982,11 @@ static inline void zone_set_nid(struct zone *zone, int nid) {}
 
 extern int movable_zone;
 
-#ifdef CONFIG_HIGHMEM
-static inline int zone_movable_is_highmem(void)
-{
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-	return movable_zone == ZONE_HIGHMEM;
-#else
-	return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
-#endif
-}
-#endif
-
 static inline int is_highmem_idx(enum zone_type idx)
 {
 #ifdef CONFIG_HIGHMEM
 	return (idx == ZONE_HIGHMEM ||
-		(idx == ZONE_MOVABLE && zone_movable_is_highmem()));
+		(idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM));
 #else
 	return 0;
 #endif

From d2f07ec052ac1a720d6f1919e3dee7d73f04d495 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:41:07 -0700
Subject: [PATCH 3219/3292] mm: make __dump_page static

Patch series "Constify struct page arguments".

While working on various solutions to the 32-bit struct page size
regression, one of the problems I found was the networking stack expects
to be able to pass const struct page pointers around, and the mm doesn't
provide a lot of const-friendly functions to call.  The root tangle of
problems is that a lot of functions call VM_BUG_ON_PAGE(), which calls
dump_page(), which calls a lot of functions which don't take a const
struct page (but could be const).

This patch (of 6):

The only caller of __dump_page() now opencodes dump_page(), so remove it
as an externally visible symbol.

Link: https://lkml.kernel.org/r/20210416231531.2521383-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20210416231531.2521383-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 3 +--
 mm/debug.c              | 2 +-
 mm/page_alloc.c         | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 5d0767cb424a..1935d4c72d10 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -9,8 +9,7 @@ struct page;
 struct vm_area_struct;
 struct mm_struct;
 
-extern void dump_page(struct page *page, const char *reason);
-extern void __dump_page(struct page *page, const char *reason);
+void dump_page(struct page *page, const char *reason);
 void dump_vma(const struct vm_area_struct *vma);
 void dump_mm(const struct mm_struct *mm);
 
diff --git a/mm/debug.c b/mm/debug.c
index 0bdda8407f71..84cdcd0f7bd3 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,7 +42,7 @@ const struct trace_print_flags vmaflag_names[] = {
 	{0, NULL}
 };
 
-void __dump_page(struct page *page, const char *reason)
+static void __dump_page(struct page *page, const char *reason)
 {
 	struct page *head = compound_head(page);
 	struct address_space *mapping;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bf03c76504b..4087340fca32 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -658,8 +658,7 @@ static void bad_page(struct page *page, const char *reason)
 
 	pr_alert("BUG: Bad page state in process %s  pfn:%05lx\n",
 		current->comm, page_to_pfn(page));
-	__dump_page(page, reason);
-	dump_page_owner(page);
+	dump_page(page, reason);
 
 	print_modules();
 	dump_stack();

From 691d9497285a90346a67bfee5cac2007e5e18405 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Mon, 28 Jun 2021 19:41:10 -0700
Subject: [PATCH 3220/3292] mm/page_alloc: bail out on fatal signal during
 reclaim/compaction retry attempt

A customer experienced a low-memory situation and decided to issue a
SIGKILL (i.e.  a fatal signal).  Instead of promptly terminating as one
would expect, the aforementioned task remained unresponsive.

Further investigation indicated that the task was "stuck" in the
reclaim/compaction retry loop.  Now, it does not make sense to retry
compaction when a fatal signal is pending.

In the context of try_to_compact_pages(), indeed COMPACT_SKIPPED can be
returned; albeit, not every zone, on the zone list, would be considered in
the case a fatal signal is found to be pending.  Yet, in
should_compact_retry(), given the last known compaction result, each zone,
on the zone list, can be considered/or checked (see
compaction_zonelist_suitable()).  For example, if a zone was found to
succeed, then reclaim/compaction would be tried again (notwithstanding the
above).

This patch ensures that compaction is not needlessly retried irrespective
of the last known compaction result e.g.  if it was skipped, in the
unlikely case a fatal signal is found pending.  So, OOM is at least
attempted.

Link: https://lkml.kernel.org/r/20210520142901.3371299-1-atomlin@redhat.com
Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4087340fca32..ea1efbb06e40 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4251,6 +4251,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
 	if (!order)
 		return false;
 
+	if (fatal_signal_pending(current))
+		return false;
+
 	if (compaction_made_progress(compact_result))
 		(*compaction_retries)++;
 

From be7c701fd42c2dd124ec5ce3493ec72e217738a8 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:41:13 -0700
Subject: [PATCH 3221/3292] mm/debug: factor PagePoisoned out of __dump_page

Move the PagePoisoned test into dump_page().  Skip the hex print for
poisoned pages -- we know they're full of ffffffff.  Move the reason
printing from __dump_page() to dump_page().

Link: https://lkml.kernel.org/r/20210416231531.2521383-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/debug.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/mm/debug.c b/mm/debug.c
index 84cdcd0f7bd3..e73fe0a8ec3d 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,11 +42,10 @@ const struct trace_print_flags vmaflag_names[] = {
 	{0, NULL}
 };
 
-static void __dump_page(struct page *page, const char *reason)
+static void __dump_page(struct page *page)
 {
 	struct page *head = compound_head(page);
 	struct address_space *mapping;
-	bool page_poisoned = PagePoisoned(page);
 	bool compound = PageCompound(page);
 	/*
 	 * Accessing the pageblock without the zone lock. It could change to
@@ -58,16 +57,6 @@ static void __dump_page(struct page *page, const char *reason)
 	int mapcount;
 	char *type = "";
 
-	/*
-	 * If struct page is poisoned don't access Page*() functions as that
-	 * leads to recursive loop. Page*() check for poisoned pages, and calls
-	 * dump_page() when detected.
-	 */
-	if (page_poisoned) {
-		pr_warn("page:%px is uninitialized and poisoned", page);
-		goto hex_only;
-	}
-
 	if (page < head || (page >= head + MAX_ORDER_NR_PAGES)) {
 		/*
 		 * Corrupt page, so we cannot call page_mapping. Instead, do a
@@ -173,8 +162,6 @@ out_mapping:
 
 	pr_warn("%sflags: %#lx(%pGp)%s\n", type, head->flags, &head->flags,
 		page_cma ? " CMA" : "");
-
-hex_only:
 	print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
 			sizeof(unsigned long), page,
 			sizeof(struct page), false);
@@ -182,14 +169,16 @@ hex_only:
 		print_hex_dump(KERN_WARNING, "head: ", DUMP_PREFIX_NONE, 32,
 			sizeof(unsigned long), head,
 			sizeof(struct page), false);
-
-	if (reason)
-		pr_warn("page dumped because: %s\n", reason);
 }
 
 void dump_page(struct page *page, const char *reason)
 {
-	__dump_page(page, reason);
+	if (PagePoisoned(page))
+		pr_warn("page:%p is uninitialized and poisoned", page);
+	else
+		__dump_page(page);
+	if (reason)
+		pr_warn("page dumped because: %s\n", reason);
 	dump_page_owner(page);
 }
 EXPORT_SYMBOL(dump_page);

From 8bf6f451bded5db7840b3b2932ef48be5dce6b38 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:41:16 -0700
Subject: [PATCH 3222/3292] mm/page_owner: constify dump_page_owner

dump_page_owner() only uses struct page to find the page_ext, and
lookup_page_ext() already takes a const argument.

Link: https://lkml.kernel.org/r/20210416231531.2521383-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_owner.h | 6 +++---
 mm/page_owner.c            | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 3468794f83d2..719bfe5108c5 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page,
 extern void __split_page_owner(struct page *page, unsigned int nr);
 extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
-extern void __dump_page_owner(struct page *page);
+extern void __dump_page_owner(const struct page *page);
 extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 					pg_data_t *pgdat, struct zone *zone);
 
@@ -46,7 +46,7 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason)
 	if (static_branch_unlikely(&page_owner_inited))
 		__set_page_owner_migrate_reason(page, reason);
 }
-static inline void dump_page_owner(struct page *page)
+static inline void dump_page_owner(const struct page *page)
 {
 	if (static_branch_unlikely(&page_owner_inited))
 		__dump_page_owner(page);
@@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
 static inline void set_page_owner_migrate_reason(struct page *page, int reason)
 {
 }
-static inline void dump_page_owner(struct page *page)
+static inline void dump_page_owner(const struct page *page)
 {
 }
 #endif /* CONFIG_PAGE_OWNER */
diff --git a/mm/page_owner.c b/mm/page_owner.c
index adfabb560eb9..f51a57e92aa3 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -392,7 +392,7 @@ err:
 	return -ENOMEM;
 }
 
-void __dump_page_owner(struct page *page)
+void __dump_page_owner(const struct page *page)
 {
 	struct page_ext *page_ext = lookup_page_ext(page);
 	struct page_owner *page_owner;

From 0f2317e34e2c7b97efd4600122115410795ebeea Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:41:19 -0700
Subject: [PATCH 3223/3292] mm: make compound_head const-preserving

If you pass a const pointer to compound_head(), you get a const pointer
back; if you pass a mutable pointer, you get a mutable pointer back.  Also
remove an unnecessary forward definition of struct page; we're about to
dereference page->compound_head, so it must already have been defined.

Link: https://lkml.kernel.org/r/20210416231531.2521383-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 04a34c08e0a6..d8e26243db25 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -177,17 +177,17 @@ enum pageflags {
 
 #ifndef __GENERATING_BOUNDS_H
 
-struct page;	/* forward declaration */
-
-static inline struct page *compound_head(struct page *page)
+static inline unsigned long _compound_head(const struct page *page)
 {
 	unsigned long head = READ_ONCE(page->compound_head);
 
 	if (unlikely(head & 1))
-		return (struct page *) (head - 1);
-	return page;
+		return head - 1;
+	return (unsigned long)page;
 }
 
+#define compound_head(page)	((typeof(page))_compound_head(page))
+
 static __always_inline int PageTail(struct page *page)
 {
 	return READ_ONCE(page->compound_head) & 1;

From ca891f41c4c7921a03dfd0fa1faf324393724480 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:41:22 -0700
Subject: [PATCH 3224/3292] mm: constify get_pfnblock_flags_mask and
 get_pfnblock_migratetype

The struct page is not modified by these routines, so it can be marked
const.

Link: https://lkml.kernel.org/r/20210416231531.2521383-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pageblock-flags.h |  2 +-
 mm/page_alloc.c                 | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..973fd731a520 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ extern unsigned int pageblock_order;
 /* Forward declaration */
 struct page;
 
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned long get_pfnblock_flags_mask(const struct page *page,
 				unsigned long pfn,
 				unsigned long mask);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ea1efbb06e40..4f5eedb6593a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -474,7 +474,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned long *get_pageblock_bitmap(const struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -484,7 +484,7 @@ static inline unsigned long *get_pageblock_bitmap(struct page *page,
 #endif /* CONFIG_SPARSEMEM */
 }
 
-static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
+static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
 	pfn &= (PAGES_PER_SECTION-1);
@@ -495,7 +495,7 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
 }
 
 static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned long __get_pfnblock_flags_mask(const struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
@@ -520,13 +520,14 @@ unsigned long __get_pfnblock_flags_mask(struct page *page,
  *
  * Return: pageblock_bits flags
  */
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
-					unsigned long mask)
+unsigned long get_pfnblock_flags_mask(const struct page *page,
+					unsigned long pfn, unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
 }
 
-static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
+static __always_inline int get_pfnblock_migratetype(const struct page *page,
+					unsigned long pfn)
 {
 	return __get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
 }

From 5f7dadf3958f882b393d3c4c60da232dbac66424 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:41:25 -0700
Subject: [PATCH 3225/3292] mm: constify page_count and page_ref_count

Now that compound_head() accepts a const struct page pointer, these two
functions can be marked as not modifying the page pointer they are passed.

Link: https://lkml.kernel.org/r/20210416231531.2521383-7-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ref.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index f3318f34fc54..7ad46f45df39 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -62,12 +62,12 @@ static inline void __page_ref_unfreeze(struct page *page, int v)
 
 #endif
 
-static inline int page_ref_count(struct page *page)
+static inline int page_ref_count(const struct page *page)
 {
 	return atomic_read(&page->_refcount);
 }
 
-static inline int page_count(struct page *page)
+static inline int page_count(const struct page *page)
 {
 	return atomic_read(&compound_head(page)->_refcount);
 }

From 1cfcee728391ece94a75e4b17fa87253d40c2185 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 19:41:28 -0700
Subject: [PATCH 3226/3292] mm: optimise nth_page for contiguous memmap

If the memmap is virtually contiguous (either because we're using a
virtually mapped memmap or because we don't support a discontig memmap at
all), then we can implement nth_page() by simple addition.  Contrary to
popular belief, the compiler is not able to optimise this itself for a
vmemmap configuration.  This reduces one example user (sg.c) by four
instructions:

        struct page *page = nth_page(rsv_schp->pages[k], offset >> PAGE_SHIFT);

before:
   49 8b 45 70             mov    0x70(%r13),%rax
   48 63 c9                movslq %ecx,%rcx
   48 c1 eb 0c             shr    $0xc,%rbx
   48 8b 04 c8             mov    (%rax,%rcx,8),%rax
   48 2b 05 00 00 00 00    sub    0x0(%rip),%rax
           R_X86_64_PC32      vmemmap_base-0x4
   48 c1 f8 06             sar    $0x6,%rax
   48 01 d8                add    %rbx,%rax
   48 c1 e0 06             shl    $0x6,%rax
   48 03 05 00 00 00 00    add    0x0(%rip),%rax
           R_X86_64_PC32      vmemmap_base-0x4

after:
   49 8b 45 70             mov    0x70(%r13),%rax
   48 63 c9                movslq %ecx,%rcx
   48 c1 eb 0c             shr    $0xc,%rbx
   48 c1 e3 06             shl    $0x6,%rbx
   48 03 1c c8             add    (%rax,%rcx,8),%rbx

Link: https://lkml.kernel.org/r/20210413194625.1472345-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Douglas Gilbert <dougg@torque.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f08e9de92fc5..9bd21e6fad6a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -234,7 +234,11 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
 int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		pgoff_t index, gfp_t gfp, void **shadowp);
 
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+#else
+#define nth_page(page,n) ((page) + (n))
+#endif
 
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)

From 9660ecaa79ce5c068aa3138ca7e29a9402f284ed Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 28 Jun 2021 19:41:31 -0700
Subject: [PATCH 3227/3292] mm/page_alloc: switch to pr_debug

Having such debug messages in the dmesg log may confuse users.  Therefore
restrict debug output to cases where DEBUG is defined or dynamic debugging
is enabled for the respective code piece.

Link: https://lkml.kernel.org/r/976adb93-3041-ce63-48fc-55a6096a51c1@gmail.com
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4f5eedb6593a..902f889a324d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6770,9 +6770,8 @@ static __meminit void zone_pcp_init(struct zone *zone)
 	zone->pageset_batch = BOOT_PAGESET_BATCH;
 
 	if (populated_zone(zone))
-		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%u\n",
-			zone->name, zone->present_pages,
-					 zone_batchsize(zone));
+		pr_debug("  %s zone: %lu pages, LIFO batch:%u\n", zone->name,
+			 zone->present_pages, zone_batchsize(zone));
 }
 
 void __meminit init_currently_empty_zone(struct zone *zone,
@@ -7042,8 +7041,7 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
 
 	pgdat->node_spanned_pages = totalpages;
 	pgdat->node_present_pages = realtotalpages;
-	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
-							realtotalpages);
+	pr_debug("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
 }
 
 #ifndef CONFIG_SPARSEMEM
@@ -7243,9 +7241,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 			if (freesize >= memmap_pages) {
 				freesize -= memmap_pages;
 				if (memmap_pages)
-					printk(KERN_DEBUG
-					       "  %s zone: %lu pages used for memmap\n",
-					       zone_names[j], memmap_pages);
+					pr_debug("  %s zone: %lu pages used for memmap\n",
+						 zone_names[j], memmap_pages);
 			} else
 				pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
 					zone_names[j], memmap_pages, freesize);
@@ -7254,8 +7251,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 		/* Account for reserved pages */
 		if (j == 0 && freesize > dma_reserve) {
 			freesize -= dma_reserve;
-			printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
-					zone_names[0], dma_reserve);
+			pr_debug("  %s zone: %lu pages reserved\n", zone_names[0], dma_reserve);
 		}
 
 		if (!is_highmem_idx(j))

From a0b8200d06ad6450c179407baa5f0f52f8cfcc97 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 28 Jun 2021 19:41:34 -0700
Subject: [PATCH 3228/3292] kbuild: skip per-CPU BTF generation for pahole
 v1.18-v1.21

Commit "mm/page_alloc: convert per-cpu list protection to local_lock" will
introduce a zero-sized per-CPU variable, which causes pahole to generate
invalid BTF.  Only pahole versions 1.18 through 1.21 are impacted, as
before 1.18 pahole doesn't know anything about per-CPU variables, and 1.22
contains the proper fix for the issue.

Luckily, pahole 1.18 got --skip_encoding_btf_vars option disabling BTF
generation for per-CPU variables in anticipation of some unanticipated
problems.  So use this escape hatch to disable per-CPU var BTF info on
those problematic pahole versions.  Users relying on availability of
per-CPU var BTFs would need to upgrade to pahole 1.22+, but everyone won't
notice any regressions.

Link: https://lkml.kernel.org/r/20210530002536.3193829-1-andrii@kernel.org
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Michal Suchanek <msuchanek@suse.de>
Cc: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/link-vmlinux.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 0e0f6466b18d..475faa15854e 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -235,6 +235,10 @@ gen_btf()
 
 	vmlinux_link ${1}
 
+	if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then
+		# pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars
+		extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars"
+	fi
 	if [ "${pahole_ver}" -ge "121" ]; then
 		extra_paholeopt="${extra_paholeopt} --btf_gen_floats"
 	fi

From 28f836b6777b6f42dce068a40d83a891deaaca37 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:41:38 -0700
Subject: [PATCH 3229/3292] mm/page_alloc: split per cpu page lists and zone
 stats

The PCP (per-cpu page allocator in page_alloc.c) shares locking
requirements with vmstat and the zone lock which is inconvenient and
causes some issues.  For example, the PCP list and vmstat share the same
per-cpu space meaning that it's possible that vmstat updates dirty cache
lines holding per-cpu lists across CPUs unless padding is used.  Second,
PREEMPT_RT does not want to disable IRQs for too long in the page
allocator.

This series splits the locking requirements and uses locks types more
suitable for PREEMPT_RT, reduces the time when special locking is required
for stats and reduces the time when IRQs need to be disabled on
!PREEMPT_RT kernels.

Why local_lock?  PREEMPT_RT considers the following sequence to be unsafe
as documented in Documentation/locking/locktypes.rst

   local_irq_disable();
   spin_lock(&lock);

The pcp allocator has this sequence for rmqueue_pcplist (local_irq_save)
-> __rmqueue_pcplist -> rmqueue_bulk (spin_lock).  While it's possible to
separate this out, it generally means there are points where we enable
IRQs and reenable them again immediately.  To prevent a migration and the
per-cpu pointer going stale, migrate_disable is also needed.  That is a
custom lock that is similar, but worse, than local_lock.  Furthermore, on
PREEMPT_RT, it's undesirable to leave IRQs disabled for too long.  By
converting to local_lock which disables migration on PREEMPT_RT, the
locking requirements can be separated and start moving the protections for
PCP, stats and the zone lock to PREEMPT_RT-safe equivalent locking.  As a
bonus, local_lock also means that PROVE_LOCKING does something useful.

After that, it's obvious that zone_statistics incurs too much overhead and
leaves IRQs disabled for longer than necessary on !PREEMPT_RT kernels.
zone_statistics uses perfectly accurate counters requiring IRQs be
disabled for parallel RMW sequences when inaccurate ones like vm_events
would do.  The series makes the NUMA statistics (NUMA_HIT and friends)
inaccurate counters that then require no special protection on
!PREEMPT_RT.

The bulk page allocator can then do stat updates in bulk with IRQs enabled
which should improve the efficiency.  Technically, this could have been
done without the local_lock and vmstat conversion work and the order
simply reflects the timing of when different series were implemented.

Finally, there are places where we conflate IRQs being disabled for the
PCP with the IRQ-safe zone spinlock.  The remainder of the series reduces
the scope of what is protected by disabled IRQs on !PREEMPT_RT kernels.
By the end of the series, page_alloc.c does not call local_irq_save so the
locking scope is a bit clearer.  The one exception is that modifying
NR_FREE_PAGES still happens in places where it's known the IRQs are
disabled as it's harmless for PREEMPT_RT and would be expensive to split
the locking there.

No performance data is included because despite the overhead of the stats,
it's within the noise for most workloads on !PREEMPT_RT.  However, Jesper
Dangaard Brouer ran a page allocation microbenchmark on a E5-1650 v4 @
3.60GHz CPU on the first version of this series.  Focusing on the array
variant of the bulk page allocator reveals the following.

(CPU: Intel(R) Xeon(R) CPU E5-1650 v4 @ 3.60GHz)
ARRAY variant: time_bulk_page_alloc_free_array: step=bulk size

         Baseline        Patched
 1       56.383          54.225 (+3.83%)
 2       40.047          35.492 (+11.38%)
 3       37.339          32.643 (+12.58%)
 4       35.578          30.992 (+12.89%)
 8       33.592          29.606 (+11.87%)
 16      32.362          28.532 (+11.85%)
 32      31.476          27.728 (+11.91%)
 64      30.633          27.252 (+11.04%)
 128     30.596          27.090 (+11.46%)

While this is a positive outcome, the series is more likely to be
interesting to the RT people in terms of getting parts of the PREEMPT_RT
tree into mainline.

This patch (of 9):

The per-cpu page allocator lists and the per-cpu vmstat deltas are stored
in the same struct per_cpu_pages even though vmstats have no direct impact
on the per-cpu page lists.  This is inconsistent because the vmstats for a
node are stored on a dedicated structure.  The bigger issue is that the
per_cpu_pages structure is not cache-aligned and stat updates either cache
conflict with adjacent per-cpu lists incurring a runtime cost or padding
is required incurring a memory cost.

This patch splits the per-cpu pagelists and the vmstat deltas into
separate structures.  It's mostly a mechanical conversion but some
variable renaming is done to clearly distinguish the per-cpu pages
structure (pcp) from the vmstats (pzstats).

Superficially, this appears to increase the size of the per_cpu_pages
structure but the movement of expire fills a structure hole so there is no
impact overall.

[mgorman@techsingularity.net: make it W=1 cleaner]
  Link: https://lkml.kernel.org/r/20210514144622.GA3735@techsingularity.net
[mgorman@techsingularity.net: make it W=1 even cleaner]
  Link: https://lkml.kernel.org/r/20210516140705.GB3735@techsingularity.net
[lkp@intel.com: check struct per_cpu_zonestat has a non-zero size]
[vbabka@suse.cz: Init zone->per_cpu_zonestats properly]

Link: https://lkml.kernel.org/r/20210512095458.30632-1-mgorman@techsingularity.net
Link: https://lkml.kernel.org/r/20210512095458.30632-2-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 18 ++++----
 include/linux/vmstat.h |  8 ++--
 mm/page_alloc.c        | 85 ++++++++++++++++++++----------------
 mm/vmstat.c            | 98 ++++++++++++++++++++++--------------------
 4 files changed, 113 insertions(+), 96 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c2bfefd34b59..a50b123ab7ae 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -341,20 +341,21 @@ struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
+#ifdef CONFIG_NUMA
+	int expire;		/* When 0, remote pagesets are drained */
+#endif
 
 	/* Lists of pages, one per migrate type stored on the pcp-lists */
 	struct list_head lists[MIGRATE_PCPTYPES];
 };
 
-struct per_cpu_pageset {
-	struct per_cpu_pages pcp;
-#ifdef CONFIG_NUMA
-	s8 expire;
-	u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
-#endif
+struct per_cpu_zonestat {
 #ifdef CONFIG_SMP
-	s8 stat_threshold;
 	s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
+	s8 stat_threshold;
+#endif
+#ifdef CONFIG_NUMA
+	u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
 #endif
 };
 
@@ -484,7 +485,8 @@ struct zone {
 	int node;
 #endif
 	struct pglist_data	*zone_pgdat;
-	struct per_cpu_pageset __percpu *pageset;
+	struct per_cpu_pages	__percpu *per_cpu_pageset;
+	struct per_cpu_zonestat	__percpu *per_cpu_zonestats;
 	/*
 	 * the high and batch values are copied to individual pagesets for
 	 * faster access
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3299cd69e4ca..0c5f36504613 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -163,7 +163,7 @@ static inline unsigned long zone_numa_state_snapshot(struct zone *zone,
 	int cpu;
 
 	for_each_online_cpu(cpu)
-		x += per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item];
+		x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item];
 
 	return x;
 }
@@ -236,7 +236,7 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
 #ifdef CONFIG_SMP
 	int cpu;
 	for_each_online_cpu(cpu)
-		x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item];
+		x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_stat_diff[item];
 
 	if (x < 0)
 		x = 0;
@@ -291,7 +291,7 @@ struct ctl_table;
 int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp,
 		loff_t *ppos);
 
-void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
+void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *);
 
 int calculate_pressure_threshold(struct zone *zone);
 int calculate_normal_threshold(struct zone *zone);
@@ -399,7 +399,7 @@ static inline void cpu_vm_stats_fold(int cpu) { }
 static inline void quiet_vmstat(void) { }
 
 static inline void drain_zonestat(struct zone *zone,
-			struct per_cpu_pageset *pset) { }
+			struct per_cpu_zonestat *pzstats) { }
 #endif		/* CONFIG_SMP */
 
 static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 902f889a324d..330c7307a92b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3026,15 +3026,14 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 {
 	unsigned long flags;
-	struct per_cpu_pageset *pset;
 	struct per_cpu_pages *pcp;
 
 	local_irq_save(flags);
-	pset = per_cpu_ptr(zone->pageset, cpu);
 
-	pcp = &pset->pcp;
+	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
 	if (pcp->count)
 		free_pcppages_bulk(zone, pcp->count, pcp);
+
 	local_irq_restore(flags);
 }
 
@@ -3133,7 +3132,7 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
 	 * disables preemption as part of its processing
 	 */
 	for_each_online_cpu(cpu) {
-		struct per_cpu_pageset *pcp;
+		struct per_cpu_pages *pcp;
 		struct zone *z;
 		bool has_pcps = false;
 
@@ -3144,13 +3143,13 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
 			 */
 			has_pcps = true;
 		} else if (zone) {
-			pcp = per_cpu_ptr(zone->pageset, cpu);
-			if (pcp->pcp.count)
+			pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+			if (pcp->count)
 				has_pcps = true;
 		} else {
 			for_each_populated_zone(z) {
-				pcp = per_cpu_ptr(z->pageset, cpu);
-				if (pcp->pcp.count) {
+				pcp = per_cpu_ptr(z->per_cpu_pageset, cpu);
+				if (pcp->count) {
 					has_pcps = true;
 					break;
 				}
@@ -3280,7 +3279,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
-	pcp = &this_cpu_ptr(zone->pageset)->pcp;
+	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list_add(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
 	if (pcp->count >= READ_ONCE(pcp->high))
@@ -3496,7 +3495,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	unsigned long flags;
 
 	local_irq_save(flags);
-	pcp = &this_cpu_ptr(zone->pageset)->pcp;
+	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list = &pcp->lists[migratetype];
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
 	if (page) {
@@ -5105,7 +5104,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 
 	/* Attempt the batch allocation */
 	local_irq_save(flags);
-	pcp = &this_cpu_ptr(zone->pageset)->pcp;
+	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	pcp_list = &pcp->lists[ac.migratetype];
 
 	while (nr_populated < nr_pages) {
@@ -5720,7 +5719,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			continue;
 
 		for_each_online_cpu(cpu)
-			free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
+			free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
 	}
 
 	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
@@ -5812,7 +5811,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 
 		free_pcp = 0;
 		for_each_online_cpu(cpu)
-			free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
+			free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
 
 		show_node(zone);
 		printk(KERN_CONT
@@ -5853,7 +5852,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(zone_page_state(zone, NR_MLOCK)),
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(free_pcp),
-			K(this_cpu_read(zone->pageset->pcp.count)),
+			K(this_cpu_read(zone->per_cpu_pageset->count)),
 			K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
 		printk("lowmem_reserve[]:");
 		for (i = 0; i < MAX_NR_ZONES; i++)
@@ -6180,11 +6179,12 @@ static void build_zonelists(pg_data_t *pgdat)
  * not check if the processor is online before following the pageset pointer.
  * Other parts of the kernel may not check if the zone is available.
  */
-static void pageset_init(struct per_cpu_pageset *p);
+static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats);
 /* These effectively disable the pcplists in the boot pageset completely */
 #define BOOT_PAGESET_HIGH	0
 #define BOOT_PAGESET_BATCH	1
-static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
+static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset);
+static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats);
 static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
 
 static void __build_all_zonelists(void *data)
@@ -6251,7 +6251,7 @@ build_all_zonelists_init(void)
 	 * (a chicken-egg dilemma).
 	 */
 	for_each_possible_cpu(cpu)
-		pageset_init(&per_cpu(boot_pageset, cpu));
+		per_cpu_pages_init(&per_cpu(boot_pageset, cpu), &per_cpu(boot_zonestats, cpu));
 
 	mminit_verify_zonelist();
 	cpuset_init_current_mems_allowed();
@@ -6650,14 +6650,13 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
 	WRITE_ONCE(pcp->high, high);
 }
 
-static void pageset_init(struct per_cpu_pageset *p)
+static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats)
 {
-	struct per_cpu_pages *pcp;
 	int migratetype;
 
-	memset(p, 0, sizeof(*p));
+	memset(pcp, 0, sizeof(*pcp));
+	memset(pzstats, 0, sizeof(*pzstats));
 
-	pcp = &p->pcp;
 	for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
 		INIT_LIST_HEAD(&pcp->lists[migratetype]);
 
@@ -6674,12 +6673,12 @@ static void pageset_init(struct per_cpu_pageset *p)
 static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long high,
 		unsigned long batch)
 {
-	struct per_cpu_pageset *p;
+	struct per_cpu_pages *pcp;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		p = per_cpu_ptr(zone->pageset, cpu);
-		pageset_update(&p->pcp, high, batch);
+		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+		pageset_update(pcp, high, batch);
 	}
 }
 
@@ -6714,13 +6713,20 @@ static void zone_set_pageset_high_and_batch(struct zone *zone)
 
 void __meminit setup_zone_pageset(struct zone *zone)
 {
-	struct per_cpu_pageset *p;
 	int cpu;
 
-	zone->pageset = alloc_percpu(struct per_cpu_pageset);
+	/* Size may be 0 on !SMP && !NUMA */
+	if (sizeof(struct per_cpu_zonestat) > 0)
+		zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat);
+
+	zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages);
 	for_each_possible_cpu(cpu) {
-		p = per_cpu_ptr(zone->pageset, cpu);
-		pageset_init(p);
+		struct per_cpu_pages *pcp;
+		struct per_cpu_zonestat *pzstats;
+
+		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
+		per_cpu_pages_init(pcp, pzstats);
 	}
 
 	zone_set_pageset_high_and_batch(zone);
@@ -6747,9 +6753,9 @@ void __init setup_per_cpu_pageset(void)
 	 * the nodes these zones are associated with.
 	 */
 	for_each_possible_cpu(cpu) {
-		struct per_cpu_pageset *pcp = &per_cpu(boot_pageset, cpu);
-		memset(pcp->vm_numa_stat_diff, 0,
-		       sizeof(pcp->vm_numa_stat_diff));
+		struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu);
+		memset(pzstats->vm_numa_stat_diff, 0,
+		       sizeof(pzstats->vm_numa_stat_diff));
 	}
 #endif
 
@@ -6765,7 +6771,8 @@ static __meminit void zone_pcp_init(struct zone *zone)
 	 * relies on the ability of the linker to provide the
 	 * offset of a (static) per cpu variable into the per cpu area.
 	 */
-	zone->pageset = &boot_pageset;
+	zone->per_cpu_pageset = &boot_pageset;
+	zone->per_cpu_zonestats = &boot_zonestats;
 	zone->pageset_high = BOOT_PAGESET_HIGH;
 	zone->pageset_batch = BOOT_PAGESET_BATCH;
 
@@ -9046,15 +9053,17 @@ void zone_pcp_enable(struct zone *zone)
 void zone_pcp_reset(struct zone *zone)
 {
 	int cpu;
-	struct per_cpu_pageset *pset;
+	struct per_cpu_zonestat *pzstats;
 
-	if (zone->pageset != &boot_pageset) {
+	if (zone->per_cpu_pageset != &boot_pageset) {
 		for_each_online_cpu(cpu) {
-			pset = per_cpu_ptr(zone->pageset, cpu);
-			drain_zonestat(zone, pset);
+			pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
+			drain_zonestat(zone, pzstats);
 		}
-		free_percpu(zone->pageset);
-		zone->pageset = &boot_pageset;
+		free_percpu(zone->per_cpu_pageset);
+		free_percpu(zone->per_cpu_zonestats);
+		zone->per_cpu_pageset = &boot_pageset;
+		zone->per_cpu_zonestats = &boot_zonestats;
 	}
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index cccee36b289c..f1400ba46beb 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -44,7 +44,7 @@ static void zero_zone_numa_counters(struct zone *zone)
 	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
 		atomic_long_set(&zone->vm_numa_stat[item], 0);
 		for_each_online_cpu(cpu)
-			per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
+			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item]
 						= 0;
 	}
 }
@@ -266,7 +266,7 @@ void refresh_zone_stat_thresholds(void)
 		for_each_online_cpu(cpu) {
 			int pgdat_threshold;
 
-			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
 							= threshold;
 
 			/* Base nodestat threshold on the largest populated zone. */
@@ -303,7 +303,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
 
 		threshold = (*calculate_pressure)(zone);
 		for_each_online_cpu(cpu)
-			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
 							= threshold;
 	}
 }
@@ -316,7 +316,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 			   long delta)
 {
-	struct per_cpu_pageset __percpu *pcp = zone->pageset;
+	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
 	s8 __percpu *p = pcp->vm_stat_diff + item;
 	long x;
 	long t;
@@ -389,7 +389,7 @@ EXPORT_SYMBOL(__mod_node_page_state);
  */
 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct per_cpu_pageset __percpu *pcp = zone->pageset;
+	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
 	s8 __percpu *p = pcp->vm_stat_diff + item;
 	s8 v, t;
 
@@ -435,7 +435,7 @@ EXPORT_SYMBOL(__inc_node_page_state);
 
 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct per_cpu_pageset __percpu *pcp = zone->pageset;
+	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
 	s8 __percpu *p = pcp->vm_stat_diff + item;
 	s8 v, t;
 
@@ -495,7 +495,7 @@ EXPORT_SYMBOL(__dec_node_page_state);
 static inline void mod_zone_state(struct zone *zone,
        enum zone_stat_item item, long delta, int overstep_mode)
 {
-	struct per_cpu_pageset __percpu *pcp = zone->pageset;
+	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
 	s8 __percpu *p = pcp->vm_stat_diff + item;
 	long o, n, t, z;
 
@@ -781,19 +781,22 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 	int changes = 0;
 
 	for_each_populated_zone(zone) {
-		struct per_cpu_pageset __percpu *p = zone->pageset;
+		struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+#ifdef CONFIG_NUMA
+		struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
+#endif
 
 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
 			int v;
 
-			v = this_cpu_xchg(p->vm_stat_diff[i], 0);
+			v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
 			if (v) {
 
 				atomic_long_add(v, &zone->vm_stat[i]);
 				global_zone_diff[i] += v;
 #ifdef CONFIG_NUMA
 				/* 3 seconds idle till flush */
-				__this_cpu_write(p->expire, 3);
+				__this_cpu_write(pcp->expire, 3);
 #endif
 			}
 		}
@@ -801,12 +804,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
 			int v;
 
-			v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
+			v = this_cpu_xchg(pzstats->vm_numa_stat_diff[i], 0);
 			if (v) {
 
 				atomic_long_add(v, &zone->vm_numa_stat[i]);
 				global_numa_diff[i] += v;
-				__this_cpu_write(p->expire, 3);
+				__this_cpu_write(pcp->expire, 3);
 			}
 		}
 
@@ -819,23 +822,23 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 			 * Check if there are pages remaining in this pageset
 			 * if not then there is nothing to expire.
 			 */
-			if (!__this_cpu_read(p->expire) ||
-			       !__this_cpu_read(p->pcp.count))
+			if (!__this_cpu_read(pcp->expire) ||
+			       !__this_cpu_read(pcp->count))
 				continue;
 
 			/*
 			 * We never drain zones local to this processor.
 			 */
 			if (zone_to_nid(zone) == numa_node_id()) {
-				__this_cpu_write(p->expire, 0);
+				__this_cpu_write(pcp->expire, 0);
 				continue;
 			}
 
-			if (__this_cpu_dec_return(p->expire))
+			if (__this_cpu_dec_return(pcp->expire))
 				continue;
 
-			if (__this_cpu_read(p->pcp.count)) {
-				drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+			if (__this_cpu_read(pcp->count)) {
+				drain_zone_pages(zone, this_cpu_ptr(pcp));
 				changes++;
 			}
 		}
@@ -882,27 +885,27 @@ void cpu_vm_stats_fold(int cpu)
 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 
 	for_each_populated_zone(zone) {
-		struct per_cpu_pageset *p;
+		struct per_cpu_zonestat *pzstats;
 
-		p = per_cpu_ptr(zone->pageset, cpu);
+		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
 
 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-			if (p->vm_stat_diff[i]) {
+			if (pzstats->vm_stat_diff[i]) {
 				int v;
 
-				v = p->vm_stat_diff[i];
-				p->vm_stat_diff[i] = 0;
+				v = pzstats->vm_stat_diff[i];
+				pzstats->vm_stat_diff[i] = 0;
 				atomic_long_add(v, &zone->vm_stat[i]);
 				global_zone_diff[i] += v;
 			}
 
 #ifdef CONFIG_NUMA
 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-			if (p->vm_numa_stat_diff[i]) {
+			if (pzstats->vm_numa_stat_diff[i]) {
 				int v;
 
-				v = p->vm_numa_stat_diff[i];
-				p->vm_numa_stat_diff[i] = 0;
+				v = pzstats->vm_numa_stat_diff[i];
+				pzstats->vm_numa_stat_diff[i] = 0;
 				atomic_long_add(v, &zone->vm_numa_stat[i]);
 				global_numa_diff[i] += v;
 			}
@@ -936,24 +939,24 @@ void cpu_vm_stats_fold(int cpu)
  * this is only called if !populated_zone(zone), which implies no other users of
  * pset->vm_stat_diff[] exist.
  */
-void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
+void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
 {
 	int i;
 
 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-		if (pset->vm_stat_diff[i]) {
-			int v = pset->vm_stat_diff[i];
-			pset->vm_stat_diff[i] = 0;
+		if (pzstats->vm_stat_diff[i]) {
+			int v = pzstats->vm_stat_diff[i];
+			pzstats->vm_stat_diff[i] = 0;
 			atomic_long_add(v, &zone->vm_stat[i]);
 			atomic_long_add(v, &vm_zone_stat[i]);
 		}
 
 #ifdef CONFIG_NUMA
 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-		if (pset->vm_numa_stat_diff[i]) {
-			int v = pset->vm_numa_stat_diff[i];
+		if (pzstats->vm_numa_stat_diff[i]) {
+			int v = pzstats->vm_numa_stat_diff[i];
 
-			pset->vm_numa_stat_diff[i] = 0;
+			pzstats->vm_numa_stat_diff[i] = 0;
 			atomic_long_add(v, &zone->vm_numa_stat[i]);
 			atomic_long_add(v, &vm_numa_stat[i]);
 		}
@@ -965,8 +968,8 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
 void __inc_numa_state(struct zone *zone,
 				 enum numa_stat_item item)
 {
-	struct per_cpu_pageset __percpu *pcp = zone->pageset;
-	u16 __percpu *p = pcp->vm_numa_stat_diff + item;
+	struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+	u16 __percpu *p = pzstats->vm_numa_stat_diff + item;
 	u16 v;
 
 	v = __this_cpu_inc_return(*p);
@@ -1693,21 +1696,23 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 
 	seq_printf(m, "\n  pagesets");
 	for_each_online_cpu(i) {
-		struct per_cpu_pageset *pageset;
+		struct per_cpu_pages *pcp;
+		struct per_cpu_zonestat __maybe_unused *pzstats;
 
-		pageset = per_cpu_ptr(zone->pageset, i);
+		pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
 		seq_printf(m,
 			   "\n    cpu: %i"
 			   "\n              count: %i"
 			   "\n              high:  %i"
 			   "\n              batch: %i",
 			   i,
-			   pageset->pcp.count,
-			   pageset->pcp.high,
-			   pageset->pcp.batch);
+			   pcp->count,
+			   pcp->high,
+			   pcp->batch);
 #ifdef CONFIG_SMP
+		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
 		seq_printf(m, "\n  vm stats threshold: %d",
-				pageset->stat_threshold);
+				pzstats->stat_threshold);
 #endif
 	}
 	seq_printf(m,
@@ -1927,17 +1932,18 @@ static bool need_update(int cpu)
 	struct zone *zone;
 
 	for_each_populated_zone(zone) {
-		struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+		struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
 		struct per_cpu_nodestat *n;
+
 		/*
 		 * The fast way of checking if there are any vmstat diffs.
 		 */
-		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
-			       sizeof(p->vm_stat_diff[0])))
+		if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
+			       sizeof(pzstats->vm_stat_diff[0])))
 			return true;
 #ifdef CONFIG_NUMA
-		if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
-			       sizeof(p->vm_numa_stat_diff[0])))
+		if (memchr_inv(pzstats->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
+			       sizeof(pzstats->vm_numa_stat_diff[0])))
 			return true;
 #endif
 		if (last_pgdat == zone->zone_pgdat)

From dbbee9d5cd83f9d0a29639e260516907ceb2ac3d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:41:41 -0700
Subject: [PATCH 3230/3292] mm/page_alloc: convert per-cpu list protection to
 local_lock

There is a lack of clarity of what exactly
local_irq_save/local_irq_restore protects in page_alloc.c .  It conflates
the protection of per-cpu page allocation structures with per-cpu vmstat
deltas.

This patch protects the PCP structure using local_lock which for most
configurations is identical to IRQ enabling/disabling.  The scope of the
lock is still wider than it should be but this is decreased later.

It is possible for the local_lock to be embedded safely within struct
per_cpu_pages but it adds complexity to free_unref_page_list.

[akpm@linux-foundation.org: coding style fixes]
[mgorman@techsingularity.net: work around a pahole limitation with zero-sized struct pagesets]
  Link: https://lkml.kernel.org/r/20210526080741.GW30378@techsingularity.net
[lkp@intel.com: Make pagesets static]

Link: https://lkml.kernel.org/r/20210512095458.30632-3-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 ++
 lib/Kconfig.debug      |  3 +++
 mm/page_alloc.c        | 61 +++++++++++++++++++++++++++++++-----------
 3 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a50b123ab7ae..0d6bb737e5a2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -20,6 +20,7 @@
 #include <linux/atomic.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
+#include <linux/local_lock.h>
 #include <asm/page.h>
 
 /* Free memory management - zoned buddy allocator.  */
@@ -337,6 +338,7 @@ enum zone_watermarks {
 #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
 #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
 
+/* Fields and list protected by pagesets local_lock in page_alloc.c */
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7723f58a9394..deca67d28abb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -313,6 +313,9 @@ config DEBUG_INFO_BTF
 config PAHOLE_HAS_SPLIT_BTF
 	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
 
+config PAHOLE_HAS_ZEROSIZE_PERCPU_SUPPORT
+	def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "122")
+
 config DEBUG_INFO_BTF_MODULES
 	def_bool y
 	depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 330c7307a92b..89872ad5e872 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -122,6 +122,24 @@ typedef int __bitwise fpi_t;
 static DEFINE_MUTEX(pcp_batch_high_lock);
 #define MIN_PERCPU_PAGELIST_FRACTION	(8)
 
+struct pagesets {
+	local_lock_t lock;
+#if defined(CONFIG_DEBUG_INFO_BTF) &&				\
+	!defined(CONFIG_DEBUG_LOCK_ALLOC) &&			\
+	!defined(CONFIG_PAHOLE_HAS_ZEROSIZE_PERCPU_SUPPORT)
+	/*
+	 * pahole 1.21 and earlier gets confused by zero-sized per-CPU
+	 * variables and produces invalid BTF. Ensure that
+	 * sizeof(struct pagesets) != 0 for older versions of pahole.
+	 */
+	char __pahole_hack;
+	#warning "pahole too old to support zero-sized struct pagesets"
+#endif
+};
+static DEFINE_PER_CPU(struct pagesets, pagesets) = {
+	.lock = INIT_LOCAL_LOCK(lock),
+};
+
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -1453,6 +1471,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 		} while (--count && --batch_free && !list_empty(list));
 	}
 
+	/*
+	 * local_lock_irq held so equivalent to spin_lock_irqsave for
+	 * both PREEMPT_RT and non-PREEMPT_RT configurations.
+	 */
 	spin_lock(&zone->lock);
 	isolated_pageblocks = has_isolate_pageblock(zone);
 
@@ -1573,6 +1595,11 @@ static void __free_pages_ok(struct page *page, unsigned int order,
 		return;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
+
+	/*
+	 * TODO FIX: Disable IRQs before acquiring IRQ-safe zone->lock
+	 * and protect vmstat updates.
+	 */
 	local_irq_save(flags);
 	__count_vm_events(PGFREE, 1 << order);
 	free_one_page(page_zone(page), page, pfn, order, migratetype,
@@ -2955,6 +2982,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 {
 	int i, allocated = 0;
 
+	/*
+	 * local_lock_irq held so equivalent to spin_lock_irqsave for
+	 * both PREEMPT_RT and non-PREEMPT_RT configurations.
+	 */
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
 		struct page *page = __rmqueue(zone, order, migratetype,
@@ -3007,12 +3038,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	unsigned long flags;
 	int to_drain, batch;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pagesets.lock, flags);
 	batch = READ_ONCE(pcp->batch);
 	to_drain = min(pcp->count, batch);
 	if (to_drain > 0)
 		free_pcppages_bulk(zone, to_drain, pcp);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 }
 #endif
 
@@ -3028,13 +3059,13 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 	unsigned long flags;
 	struct per_cpu_pages *pcp;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pagesets.lock, flags);
 
 	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
 	if (pcp->count)
 		free_pcppages_bulk(zone, pcp->count, pcp);
 
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 }
 
 /*
@@ -3297,9 +3328,9 @@ void free_unref_page(struct page *page)
 	if (!free_unref_page_prepare(page, pfn))
 		return;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pagesets.lock, flags);
 	free_unref_page_commit(page, pfn);
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 }
 
 /*
@@ -3319,7 +3350,7 @@ void free_unref_page_list(struct list_head *list)
 		set_page_private(page, pfn);
 	}
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pagesets.lock, flags);
 	list_for_each_entry_safe(page, next, list, lru) {
 		unsigned long pfn = page_private(page);
 
@@ -3332,12 +3363,12 @@ void free_unref_page_list(struct list_head *list)
 		 * a large list of pages to free.
 		 */
 		if (++batch_count == SWAP_CLUSTER_MAX) {
-			local_irq_restore(flags);
+			local_unlock_irqrestore(&pagesets.lock, flags);
 			batch_count = 0;
-			local_irq_save(flags);
+			local_lock_irqsave(&pagesets.lock, flags);
 		}
 	}
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 }
 
 /*
@@ -3494,7 +3525,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	struct page *page;
 	unsigned long flags;
 
-	local_irq_save(flags);
+	local_lock_irqsave(&pagesets.lock, flags);
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list = &pcp->lists[migratetype];
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
@@ -3502,7 +3533,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
 		zone_statistics(preferred_zone, zone);
 	}
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 	return page;
 }
 
@@ -5103,7 +5134,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		goto failed;
 
 	/* Attempt the batch allocation */
-	local_irq_save(flags);
+	local_lock_irqsave(&pagesets.lock, flags);
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	pcp_list = &pcp->lists[ac.migratetype];
 
@@ -5141,12 +5172,12 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_populated++;
 	}
 
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 
 	return nr_populated;
 
 failed_irq:
-	local_irq_restore(flags);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 
 failed:
 	page = __alloc_pages(gfp, 0, preferred_nid, nodemask);

From f19298b9516c1a031b34b4147773457e3efe743b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:41:44 -0700
Subject: [PATCH 3231/3292] mm/vmstat: convert NUMA statistics to basic NUMA
 counters

NUMA statistics are maintained on the zone level for hits, misses, foreign
etc but nothing relies on them being perfectly accurate for functional
correctness.  The counters are used by userspace to get a general overview
of a workloads NUMA behaviour but the page allocator incurs a high cost to
maintain perfect accuracy similar to what is required for a vmstat like
NR_FREE_PAGES.  There even is a sysctl vm.numa_stat to allow userspace to
turn off the collection of NUMA statistics like NUMA_HIT.

This patch converts NUMA_HIT and friends to be NUMA events with similar
accuracy to VM events.  There is a possibility that slight errors will be
introduced but the overall trend as seen by userspace will be similar.
The counters are no longer updated from vmstat_refresh context as it is
unnecessary overhead for counters that may never be read by userspace.
Note that counters could be maintained at the node level to save space but
it would have a user-visible impact due to /proc/zoneinfo.

[lkp@intel.com: Fix misplaced closing brace for !CONFIG_NUMA]

Link: https://lkml.kernel.org/r/20210512095458.30632-4-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    |  18 ++--
 include/linux/mmzone.h |  13 ++-
 include/linux/vmstat.h |  45 +++++-----
 mm/mempolicy.c         |   2 +-
 mm/page_alloc.c        |  12 +--
 mm/vmstat.c            | 181 ++++++++++++++++-------------------------
 6 files changed, 118 insertions(+), 153 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 2c36f61d30bc..9db297431b97 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -482,6 +482,7 @@ static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL);
 static ssize_t node_read_numastat(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
+	fold_vm_numa_events();
 	return sysfs_emit(buf,
 			  "numa_hit %lu\n"
 			  "numa_miss %lu\n"
@@ -489,12 +490,12 @@ static ssize_t node_read_numastat(struct device *dev,
 			  "interleave_hit %lu\n"
 			  "local_node %lu\n"
 			  "other_node %lu\n",
-			  sum_zone_numa_state(dev->id, NUMA_HIT),
-			  sum_zone_numa_state(dev->id, NUMA_MISS),
-			  sum_zone_numa_state(dev->id, NUMA_FOREIGN),
-			  sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
-			  sum_zone_numa_state(dev->id, NUMA_LOCAL),
-			  sum_zone_numa_state(dev->id, NUMA_OTHER));
+			  sum_zone_numa_event_state(dev->id, NUMA_HIT),
+			  sum_zone_numa_event_state(dev->id, NUMA_MISS),
+			  sum_zone_numa_event_state(dev->id, NUMA_FOREIGN),
+			  sum_zone_numa_event_state(dev->id, NUMA_INTERLEAVE_HIT),
+			  sum_zone_numa_event_state(dev->id, NUMA_LOCAL),
+			  sum_zone_numa_event_state(dev->id, NUMA_OTHER));
 }
 static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL);
 
@@ -512,10 +513,11 @@ static ssize_t node_read_vmstat(struct device *dev,
 				     sum_zone_node_page_state(nid, i));
 
 #ifdef CONFIG_NUMA
-	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+	fold_vm_numa_events();
+	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
 		len += sysfs_emit_at(buf, len, "%s %lu\n",
 				     numa_stat_name(i),
-				     sum_zone_numa_state(nid, i));
+				     sum_zone_numa_event_state(nid, i));
 
 #endif
 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0d6bb737e5a2..f86018d5e362 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -135,10 +135,10 @@ enum numa_stat_item {
 	NUMA_INTERLEAVE_HIT,	/* interleaver preferred this zone */
 	NUMA_LOCAL,		/* allocation from local node */
 	NUMA_OTHER,		/* allocation from other node */
-	NR_VM_NUMA_STAT_ITEMS
+	NR_VM_NUMA_EVENT_ITEMS
 };
 #else
-#define NR_VM_NUMA_STAT_ITEMS 0
+#define NR_VM_NUMA_EVENT_ITEMS 0
 #endif
 
 enum zone_stat_item {
@@ -357,7 +357,12 @@ struct per_cpu_zonestat {
 	s8 stat_threshold;
 #endif
 #ifdef CONFIG_NUMA
-	u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
+	/*
+	 * Low priority inaccurate counters that are only folded
+	 * on demand. Use a large type to avoid the overhead of
+	 * folding during refresh_cpu_vm_stats.
+	 */
+	unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
 #endif
 };
 
@@ -623,7 +628,7 @@ struct zone {
 	ZONE_PADDING(_pad3_)
 	/* Zone statistics */
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
-	atomic_long_t		vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
+	atomic_long_t		vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 
 enum pgdat_flags {
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 0c5f36504613..59748bbbba4c 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -138,34 +138,27 @@ static inline void vm_events_fold_cpu(int cpu)
  * Zone and node-based page accounting with per cpu differentials.
  */
 extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
-extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
 extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
+extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
 
 #ifdef CONFIG_NUMA
-static inline void zone_numa_state_add(long x, struct zone *zone,
-				 enum numa_stat_item item)
+static inline void zone_numa_event_add(long x, struct zone *zone,
+				enum numa_stat_item item)
 {
-	atomic_long_add(x, &zone->vm_numa_stat[item]);
-	atomic_long_add(x, &vm_numa_stat[item]);
+	atomic_long_add(x, &zone->vm_numa_event[item]);
+	atomic_long_add(x, &vm_numa_event[item]);
 }
 
-static inline unsigned long global_numa_state(enum numa_stat_item item)
-{
-	long x = atomic_long_read(&vm_numa_stat[item]);
-
-	return x;
-}
-
-static inline unsigned long zone_numa_state_snapshot(struct zone *zone,
+static inline unsigned long zone_numa_event_state(struct zone *zone,
 					enum numa_stat_item item)
 {
-	long x = atomic_long_read(&zone->vm_numa_stat[item]);
-	int cpu;
+	return atomic_long_read(&zone->vm_numa_event[item]);
+}
 
-	for_each_online_cpu(cpu)
-		x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item];
-
-	return x;
+static inline unsigned long
+global_numa_event_state(enum numa_stat_item item)
+{
+	return atomic_long_read(&vm_numa_event[item]);
 }
 #endif /* CONFIG_NUMA */
 
@@ -245,18 +238,22 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
 }
 
 #ifdef CONFIG_NUMA
-extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item);
+extern void __count_numa_event(struct zone *zone, enum numa_stat_item item);
 extern unsigned long sum_zone_node_page_state(int node,
 					      enum zone_stat_item item);
-extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item);
+extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
 extern unsigned long node_page_state(struct pglist_data *pgdat,
 						enum node_stat_item item);
 extern unsigned long node_page_state_pages(struct pglist_data *pgdat,
 					   enum node_stat_item item);
+extern void fold_vm_numa_events(void);
 #else
 #define sum_zone_node_page_state(node, item) global_zone_page_state(item)
 #define node_page_state(node, item) global_node_page_state(item)
 #define node_page_state_pages(node, item) global_node_page_state_pages(item)
+static inline void fold_vm_numa_events(void)
+{
+}
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_SMP
@@ -428,7 +425,7 @@ static inline const char *numa_stat_name(enum numa_stat_item item)
 static inline const char *node_stat_name(enum node_stat_item item)
 {
 	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
-			   NR_VM_NUMA_STAT_ITEMS +
+			   NR_VM_NUMA_EVENT_ITEMS +
 			   item];
 }
 
@@ -440,7 +437,7 @@ static inline const char *lru_list_name(enum lru_list lru)
 static inline const char *writeback_stat_name(enum writeback_stat_item item)
 {
 	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
-			   NR_VM_NUMA_STAT_ITEMS +
+			   NR_VM_NUMA_EVENT_ITEMS +
 			   NR_VM_NODE_STAT_ITEMS +
 			   item];
 }
@@ -449,7 +446,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item)
 static inline const char *vm_event_name(enum vm_event_item item)
 {
 	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
-			   NR_VM_NUMA_STAT_ITEMS +
+			   NR_VM_NUMA_EVENT_ITEMS +
 			   NR_VM_NODE_STAT_ITEMS +
 			   NR_VM_WRITEBACK_STAT_ITEMS +
 			   item];
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 325771bef5e2..b5d95bf1025d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2150,7 +2150,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
 		return page;
 	if (page && page_to_nid(page) == nid) {
 		preempt_disable();
-		__inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
+		__count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT);
 		preempt_enable();
 	}
 	return page;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 89872ad5e872..4e03109bdae5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3480,12 +3480,12 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
 		local_stat = NUMA_OTHER;
 
 	if (zone_to_nid(z) == zone_to_nid(preferred_zone))
-		__inc_numa_state(z, NUMA_HIT);
+		__count_numa_event(z, NUMA_HIT);
 	else {
-		__inc_numa_state(z, NUMA_MISS);
-		__inc_numa_state(preferred_zone, NUMA_FOREIGN);
+		__count_numa_event(z, NUMA_MISS);
+		__count_numa_event(preferred_zone, NUMA_FOREIGN);
 	}
-	__inc_numa_state(z, local_stat);
+	__count_numa_event(z, local_stat);
 #endif
 }
 
@@ -6785,8 +6785,8 @@ void __init setup_per_cpu_pageset(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu);
-		memset(pzstats->vm_numa_stat_diff, 0,
-		       sizeof(pzstats->vm_numa_stat_diff));
+		memset(pzstats->vm_numa_event, 0,
+		       sizeof(pzstats->vm_numa_event));
 	}
 #endif
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f1400ba46beb..0e27b62e487d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -31,8 +31,6 @@
 
 #include "internal.h"
 
-#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
-
 #ifdef CONFIG_NUMA
 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
 
@@ -41,11 +39,12 @@ static void zero_zone_numa_counters(struct zone *zone)
 {
 	int item, cpu;
 
-	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
-		atomic_long_set(&zone->vm_numa_stat[item], 0);
-		for_each_online_cpu(cpu)
-			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item]
+	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
+		atomic_long_set(&zone->vm_numa_event[item], 0);
+		for_each_online_cpu(cpu) {
+			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
 						= 0;
+		}
 	}
 }
 
@@ -63,8 +62,8 @@ static void zero_global_numa_counters(void)
 {
 	int item;
 
-	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
-		atomic_long_set(&vm_numa_stat[item], 0);
+	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+		atomic_long_set(&vm_numa_event[item], 0);
 }
 
 static void invalid_numa_statistics(void)
@@ -161,10 +160,9 @@ void vm_events_fold_cpu(int cpu)
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
-atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
-EXPORT_SYMBOL(vm_numa_stat);
 EXPORT_SYMBOL(vm_node_stat);
 
 #ifdef CONFIG_SMP
@@ -706,32 +704,6 @@ EXPORT_SYMBOL(dec_node_page_state);
  * Fold a differential into the global counters.
  * Returns the number of counters updated.
  */
-#ifdef CONFIG_NUMA
-static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
-{
-	int i;
-	int changes = 0;
-
-	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-		if (zone_diff[i]) {
-			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
-			changes++;
-	}
-
-	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-		if (numa_diff[i]) {
-			atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
-			changes++;
-	}
-
-	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-		if (node_diff[i]) {
-			atomic_long_add(node_diff[i], &vm_node_stat[i]);
-			changes++;
-	}
-	return changes;
-}
-#else
 static int fold_diff(int *zone_diff, int *node_diff)
 {
 	int i;
@@ -750,7 +722,34 @@ static int fold_diff(int *zone_diff, int *node_diff)
 	}
 	return changes;
 }
-#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_NUMA
+static void fold_vm_zone_numa_events(struct zone *zone)
+{
+	unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
+	int cpu;
+	enum numa_stat_item item;
+
+	for_each_online_cpu(cpu) {
+		struct per_cpu_zonestat *pzstats;
+
+		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
+		for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+			zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
+	}
+
+	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+		zone_numa_event_add(zone_numa_events[item], zone, item);
+}
+
+void fold_vm_numa_events(void)
+{
+	struct zone *zone;
+
+	for_each_populated_zone(zone)
+		fold_vm_zone_numa_events(zone);
+}
+#endif
 
 /*
  * Update the zone counters for the current cpu.
@@ -774,9 +773,6 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 	struct zone *zone;
 	int i;
 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
-	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 	int changes = 0;
 
@@ -801,17 +797,6 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 			}
 		}
 #ifdef CONFIG_NUMA
-		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
-			int v;
-
-			v = this_cpu_xchg(pzstats->vm_numa_stat_diff[i], 0);
-			if (v) {
-
-				atomic_long_add(v, &zone->vm_numa_stat[i]);
-				global_numa_diff[i] += v;
-				__this_cpu_write(pcp->expire, 3);
-			}
-		}
 
 		if (do_pagesets) {
 			cond_resched();
@@ -859,12 +844,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
 		}
 	}
 
-#ifdef CONFIG_NUMA
-	changes += fold_diff(global_zone_diff, global_numa_diff,
-			     global_node_diff);
-#else
 	changes += fold_diff(global_zone_diff, global_node_diff);
-#endif
 	return changes;
 }
 
@@ -879,9 +859,6 @@ void cpu_vm_stats_fold(int cpu)
 	struct zone *zone;
 	int i;
 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
-	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 
 	for_each_populated_zone(zone) {
@@ -889,7 +866,7 @@ void cpu_vm_stats_fold(int cpu)
 
 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
 
-		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
 			if (pzstats->vm_stat_diff[i]) {
 				int v;
 
@@ -898,17 +875,17 @@ void cpu_vm_stats_fold(int cpu)
 				atomic_long_add(v, &zone->vm_stat[i]);
 				global_zone_diff[i] += v;
 			}
-
+		}
 #ifdef CONFIG_NUMA
-		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-			if (pzstats->vm_numa_stat_diff[i]) {
-				int v;
+		for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+			if (pzstats->vm_numa_event[i]) {
+				unsigned long v;
 
-				v = pzstats->vm_numa_stat_diff[i];
-				pzstats->vm_numa_stat_diff[i] = 0;
-				atomic_long_add(v, &zone->vm_numa_stat[i]);
-				global_numa_diff[i] += v;
+				v = pzstats->vm_numa_event[i];
+				pzstats->vm_numa_event[i] = 0;
+				zone_numa_event_add(v, zone, i);
 			}
+		}
 #endif
 	}
 
@@ -928,11 +905,7 @@ void cpu_vm_stats_fold(int cpu)
 			}
 	}
 
-#ifdef CONFIG_NUMA
-	fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
-#else
 	fold_diff(global_zone_diff, global_node_diff);
-#endif
 }
 
 /*
@@ -941,43 +914,37 @@ void cpu_vm_stats_fold(int cpu)
  */
 void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
 {
+	unsigned long v;
 	int i;
 
-	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
 		if (pzstats->vm_stat_diff[i]) {
-			int v = pzstats->vm_stat_diff[i];
+			v = pzstats->vm_stat_diff[i];
 			pzstats->vm_stat_diff[i] = 0;
-			atomic_long_add(v, &zone->vm_stat[i]);
-			atomic_long_add(v, &vm_zone_stat[i]);
+			zone_page_state_add(v, zone, i);
 		}
+	}
 
 #ifdef CONFIG_NUMA
-	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-		if (pzstats->vm_numa_stat_diff[i]) {
-			int v = pzstats->vm_numa_stat_diff[i];
-
-			pzstats->vm_numa_stat_diff[i] = 0;
-			atomic_long_add(v, &zone->vm_numa_stat[i]);
-			atomic_long_add(v, &vm_numa_stat[i]);
+	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+		if (pzstats->vm_numa_event[i]) {
+			v = pzstats->vm_numa_event[i];
+			pzstats->vm_numa_event[i] = 0;
+			zone_numa_event_add(v, zone, i);
 		}
+	}
 #endif
 }
 #endif
 
 #ifdef CONFIG_NUMA
-void __inc_numa_state(struct zone *zone,
+/* See __count_vm_event comment on why raw_cpu_inc is used. */
+void __count_numa_event(struct zone *zone,
 				 enum numa_stat_item item)
 {
 	struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
-	u16 __percpu *p = pzstats->vm_numa_stat_diff + item;
-	u16 v;
 
-	v = __this_cpu_inc_return(*p);
-
-	if (unlikely(v > NUMA_STATS_THRESHOLD)) {
-		zone_numa_state_add(v, zone, item);
-		__this_cpu_write(*p, 0);
-	}
+	raw_cpu_inc(pzstats->vm_numa_event[item]);
 }
 
 /*
@@ -998,19 +965,16 @@ unsigned long sum_zone_node_page_state(int node,
 	return count;
 }
 
-/*
- * Determine the per node value of a numa stat item. To avoid deviation,
- * the per cpu stat number in vm_numa_stat_diff[] is also included.
- */
-unsigned long sum_zone_numa_state(int node,
+/* Determine the per node value of a numa stat item. */
+unsigned long sum_zone_numa_event_state(int node,
 				 enum numa_stat_item item)
 {
 	struct zone *zones = NODE_DATA(node)->node_zones;
-	int i;
 	unsigned long count = 0;
+	int i;
 
 	for (i = 0; i < MAX_NR_ZONES; i++)
-		count += zone_numa_state_snapshot(zones + i, item);
+		count += zone_numa_event_state(zones + i, item);
 
 	return count;
 }
@@ -1689,9 +1653,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 			   zone_page_state(zone, i));
 
 #ifdef CONFIG_NUMA
-	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
 		seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
-			   zone_numa_state_snapshot(zone, i));
+			   zone_numa_event_state(zone, i));
 #endif
 
 	seq_printf(m, "\n  pagesets");
@@ -1745,7 +1709,7 @@ static const struct seq_operations zoneinfo_op = {
 };
 
 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
-			 NR_VM_NUMA_STAT_ITEMS + \
+			 NR_VM_NUMA_EVENT_ITEMS + \
 			 NR_VM_NODE_STAT_ITEMS + \
 			 NR_VM_WRITEBACK_STAT_ITEMS + \
 			 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
@@ -1760,6 +1724,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
 		return NULL;
 
 	BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+	fold_vm_numa_events();
 	v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
 	m->private = v;
 	if (!v)
@@ -1769,9 +1734,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
 	v += NR_VM_ZONE_STAT_ITEMS;
 
 #ifdef CONFIG_NUMA
-	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-		v[i] = global_numa_state(i);
-	v += NR_VM_NUMA_STAT_ITEMS;
+	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
+		v[i] = global_numa_event_state(i);
+	v += NR_VM_NUMA_EVENT_ITEMS;
 #endif
 
 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
@@ -1941,11 +1906,7 @@ static bool need_update(int cpu)
 		if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
 			       sizeof(pzstats->vm_stat_diff[0])))
 			return true;
-#ifdef CONFIG_NUMA
-		if (memchr_inv(pzstats->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
-			       sizeof(pzstats->vm_numa_stat_diff[0])))
-			return true;
-#endif
+
 		if (last_pgdat == zone->zone_pgdat)
 			continue;
 		last_pgdat = zone->zone_pgdat;

From 3ac44a346a50988131db124a7e4bb99d3ec71706 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:41:47 -0700
Subject: [PATCH 3232/3292] mm/vmstat: inline NUMA event counter updates

__count_numa_event is small enough to be treated similarly to
__count_vm_event so inline it.

Link: https://lkml.kernel.org/r/20210512095458.30632-5-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 10 +++++++++-
 mm/vmstat.c            |  9 ---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 59748bbbba4c..fe32a2210e73 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -238,7 +238,15 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
 }
 
 #ifdef CONFIG_NUMA
-extern void __count_numa_event(struct zone *zone, enum numa_stat_item item);
+/* See __count_vm_event comment on why raw_cpu_inc is used. */
+static inline void
+__count_numa_event(struct zone *zone, enum numa_stat_item item)
+{
+	struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+
+	raw_cpu_inc(pzstats->vm_numa_event[item]);
+}
+
 extern unsigned long sum_zone_node_page_state(int node,
 					      enum zone_stat_item item);
 extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 0e27b62e487d..b0534e068166 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -938,15 +938,6 @@ void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
 #endif
 
 #ifdef CONFIG_NUMA
-/* See __count_vm_event comment on why raw_cpu_inc is used. */
-void __count_numa_event(struct zone *zone,
-				 enum numa_stat_item item)
-{
-	struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
-
-	raw_cpu_inc(pzstats->vm_numa_event[item]);
-}
-
 /*
  * Determine the per node value of a stat item. This function
  * is called frequently in a NUMA machine, so try to be as

From 3e23060b2d0b7eebf37b3b6043ea68da0ebc0646 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:41:50 -0700
Subject: [PATCH 3233/3292] mm/page_alloc: batch the accounting updates in the
 bulk allocator

Now that the zone_statistics are simple counters that do not require
special protection, the bulk allocator accounting updates can be batch
updated without adding too much complexity with protected RMW updates or
using xchg.

Link: https://lkml.kernel.org/r/20210512095458.30632-6-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h |  8 ++++++++
 mm/page_alloc.c        | 30 +++++++++++++-----------------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index fe32a2210e73..d6a6cf53b127 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -247,6 +247,14 @@ __count_numa_event(struct zone *zone, enum numa_stat_item item)
 	raw_cpu_inc(pzstats->vm_numa_event[item]);
 }
 
+static inline void
+__count_numa_events(struct zone *zone, enum numa_stat_item item, long delta)
+{
+	struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+
+	raw_cpu_add(pzstats->vm_numa_event[item], delta);
+}
+
 extern unsigned long sum_zone_node_page_state(int node,
 					      enum zone_stat_item item);
 extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e03109bdae5..6bb9b87cf7d5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3467,7 +3467,8 @@ void __putback_isolated_page(struct page *page, unsigned int order, int mt)
  *
  * Must be called with interrupts disabled.
  */
-static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
+static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
+				   long nr_account)
 {
 #ifdef CONFIG_NUMA
 	enum numa_stat_item local_stat = NUMA_LOCAL;
@@ -3480,12 +3481,12 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
 		local_stat = NUMA_OTHER;
 
 	if (zone_to_nid(z) == zone_to_nid(preferred_zone))
-		__count_numa_event(z, NUMA_HIT);
+		__count_numa_events(z, NUMA_HIT, nr_account);
 	else {
-		__count_numa_event(z, NUMA_MISS);
-		__count_numa_event(preferred_zone, NUMA_FOREIGN);
+		__count_numa_events(z, NUMA_MISS, nr_account);
+		__count_numa_events(preferred_zone, NUMA_FOREIGN, nr_account);
 	}
-	__count_numa_event(z, local_stat);
+	__count_numa_events(z, local_stat, nr_account);
 #endif
 }
 
@@ -3531,7 +3532,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
 	if (page) {
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
-		zone_statistics(preferred_zone, zone);
+		zone_statistics(preferred_zone, zone, 1);
 	}
 	local_unlock_irqrestore(&pagesets.lock, flags);
 	return page;
@@ -3592,7 +3593,7 @@ struct page *rmqueue(struct zone *preferred_zone,
 				  get_pcppage_migratetype(page));
 
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
-	zone_statistics(preferred_zone, zone);
+	zone_statistics(preferred_zone, zone, 1);
 	local_irq_restore(flags);
 
 out:
@@ -5077,7 +5078,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	struct alloc_context ac;
 	gfp_t alloc_gfp;
 	unsigned int alloc_flags = ALLOC_WMARK_LOW;
-	int nr_populated = 0;
+	int nr_populated = 0, nr_account = 0;
 
 	if (unlikely(nr_pages <= 0))
 		return 0;
@@ -5154,15 +5155,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 				goto failed_irq;
 			break;
 		}
-
-		/*
-		 * Ideally this would be batched but the best way to do
-		 * that cheaply is to first convert zone_statistics to
-		 * be inaccurate per-cpu counter like vm_events to avoid
-		 * a RMW cycle then do the accounting with IRQs enabled.
-		 */
-		__count_zid_vm_events(PGALLOC, zone_idx(zone), 1);
-		zone_statistics(ac.preferred_zoneref->zone, zone);
+		nr_account++;
 
 		prep_new_page(page, 0, gfp, 0);
 		if (page_list)
@@ -5172,6 +5165,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_populated++;
 	}
 
+	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
+	zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
+
 	local_unlock_irqrestore(&pagesets.lock, flags);
 
 	return nr_populated;

From 43c95bcc51e4e7f3e3cbce01515fe429a4cf12a7 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:41:54 -0700
Subject: [PATCH 3234/3292] mm/page_alloc: reduce duration that IRQs are
 disabled for VM counters

IRQs are left disabled for the zone and node VM event counters.  This is
unnecessary as the affected counters are allowed to race for preemmption
and IRQs.

This patch reduces the scope of IRQs being disabled via
local_[lock|unlock]_irq on !PREEMPT_RT kernels.  One
__mod_zone_freepage_state is still called with IRQs disabled.  While this
could be moved out, it's not free on all architectures as some require
IRQs to be disabled for mod_zone_page_state on !PREEMPT_RT kernels.

Link: https://lkml.kernel.org/r/20210512095458.30632-7-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6bb9b87cf7d5..161bcda61520 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3530,11 +3530,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list = &pcp->lists[migratetype];
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
+	local_unlock_irqrestore(&pagesets.lock, flags);
 	if (page) {
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
 		zone_statistics(preferred_zone, zone, 1);
 	}
-	local_unlock_irqrestore(&pagesets.lock, flags);
 	return page;
 }
 
@@ -3586,15 +3586,15 @@ struct page *rmqueue(struct zone *preferred_zone,
 		if (!page)
 			page = __rmqueue(zone, order, migratetype, alloc_flags);
 	} while (page && check_new_pages(page, order));
-	spin_unlock(&zone->lock);
 	if (!page)
 		goto failed;
+
 	__mod_zone_freepage_state(zone, -(1 << order),
 				  get_pcppage_migratetype(page));
+	spin_unlock_irqrestore(&zone->lock, flags);
 
 	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
 	zone_statistics(preferred_zone, zone, 1);
-	local_irq_restore(flags);
 
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
@@ -3607,7 +3607,7 @@ out:
 	return page;
 
 failed:
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&zone->lock, flags);
 	return NULL;
 }
 
@@ -5165,11 +5165,11 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_populated++;
 	}
 
+	local_unlock_irqrestore(&pagesets.lock, flags);
+
 	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
 	zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
 
-	local_unlock_irqrestore(&pagesets.lock, flags);
-
 	return nr_populated;
 
 failed_irq:

From 56f0e661ea8c0178e80048df7166653a51ef2c3d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:41:57 -0700
Subject: [PATCH 3235/3292] mm/page_alloc: explicitly acquire the zone lock in
 __free_pages_ok

__free_pages_ok() disables IRQs before calling a common helper
free_one_page() that acquires the zone lock.  This is not safe according
to Documentation/locking/locktypes.rst and in this context, IRQ disabling
is not protecting a per_cpu_pages structure either or a local_lock would
be used.

This patch explicitly acquires the lock with spin_lock_irqsave instead of
relying on a helper.  This removes the last instance of local_irq_save()
in page_alloc.c.

Link: https://lkml.kernel.org/r/20210512095458.30632-8-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 161bcda61520..f1a51c163e75 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1590,21 +1590,21 @@ static void __free_pages_ok(struct page *page, unsigned int order,
 	unsigned long flags;
 	int migratetype;
 	unsigned long pfn = page_to_pfn(page);
+	struct zone *zone = page_zone(page);
 
 	if (!free_pages_prepare(page, order, true, fpi_flags))
 		return;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
 
-	/*
-	 * TODO FIX: Disable IRQs before acquiring IRQ-safe zone->lock
-	 * and protect vmstat updates.
-	 */
-	local_irq_save(flags);
+	spin_lock_irqsave(&zone->lock, flags);
 	__count_vm_events(PGFREE, 1 << order);
-	free_one_page(page_zone(page), page, pfn, order, migratetype,
-		      fpi_flags);
-	local_irq_restore(flags);
+	if (unlikely(has_isolate_pageblock(zone) ||
+		is_migrate_isolate(migratetype))) {
+		migratetype = get_pfnblock_migratetype(page, pfn);
+	}
+	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
+	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
 void __free_pages_core(struct page *page, unsigned int order)

From df1acc856923c0a65c28b588585449106c316b71 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:00 -0700
Subject: [PATCH 3236/3292] mm/page_alloc: avoid conflating IRQs disabled with
 zone->lock

Historically when freeing pages, free_one_page() assumed that callers had
IRQs disabled and the zone->lock could be acquired with spin_lock().  This
confuses the scope of what local_lock_irq is protecting and what
zone->lock is protecting in free_unref_page_list in particular.

This patch uses spin_lock_irqsave() for the zone->lock in free_one_page()
instead of relying on callers to have disabled IRQs.
free_unref_page_commit() is changed to only deal with PCP pages protected
by the local lock.  free_unref_page_list() then first frees isolated pages
to the buddy lists with free_one_page() and frees the rest of the pages to
the PCP via free_unref_page_commit().  The end result is that
free_one_page() is no longer depending on side-effects of local_lock to be
correct.

Note that this may incur a performance penalty while memory hot-remove is
running but that is not a common operation.

[lkp@intel.com: Ensure CMA pages get addded to correct pcp list]

Link: https://lkml.kernel.org/r/20210512095458.30632-9-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 75 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 26 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f1a51c163e75..dd367e5df8cb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1501,13 +1501,15 @@ static void free_one_page(struct zone *zone,
 				unsigned int order,
 				int migratetype, fpi_t fpi_flags)
 {
-	spin_lock(&zone->lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&zone->lock, flags);
 	if (unlikely(has_isolate_pageblock(zone) ||
 		is_migrate_isolate(migratetype))) {
 		migratetype = get_pfnblock_migratetype(page, pfn);
 	}
 	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
-	spin_unlock(&zone->lock);
+	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -3285,31 +3287,13 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
 	return true;
 }
 
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+				   int migratetype)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
-	int migratetype;
 
-	migratetype = get_pcppage_migratetype(page);
 	__count_vm_event(PGFREE);
-
-	/*
-	 * We only track unmovable, reclaimable and movable on pcp lists.
-	 * Free ISOLATE pages back to the allocator because they are being
-	 * offlined but treat HIGHATOMIC as movable pages so we can get those
-	 * areas back if necessary. Otherwise, we may have to free
-	 * excessively into the page allocator
-	 */
-	if (migratetype >= MIGRATE_PCPTYPES) {
-		if (unlikely(is_migrate_isolate(migratetype))) {
-			free_one_page(zone, page, pfn, 0, migratetype,
-				      FPI_NONE);
-			return;
-		}
-		migratetype = MIGRATE_MOVABLE;
-	}
-
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list_add(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
@@ -3324,12 +3308,29 @@ void free_unref_page(struct page *page)
 {
 	unsigned long flags;
 	unsigned long pfn = page_to_pfn(page);
+	int migratetype;
 
 	if (!free_unref_page_prepare(page, pfn))
 		return;
 
+	/*
+	 * We only track unmovable, reclaimable and movable on pcp lists.
+	 * Place ISOLATE pages on the isolated list because they are being
+	 * offlined but treat HIGHATOMIC as movable pages so we can get those
+	 * areas back if necessary. Otherwise, we may have to free
+	 * excessively into the page allocator
+	 */
+	migratetype = get_pcppage_migratetype(page);
+	if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
+		if (unlikely(is_migrate_isolate(migratetype))) {
+			free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
+			return;
+		}
+		migratetype = MIGRATE_MOVABLE;
+	}
+
 	local_lock_irqsave(&pagesets.lock, flags);
-	free_unref_page_commit(page, pfn);
+	free_unref_page_commit(page, pfn, migratetype);
 	local_unlock_irqrestore(&pagesets.lock, flags);
 }
 
@@ -3341,22 +3342,44 @@ void free_unref_page_list(struct list_head *list)
 	struct page *page, *next;
 	unsigned long flags, pfn;
 	int batch_count = 0;
+	int migratetype;
 
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
 		pfn = page_to_pfn(page);
 		if (!free_unref_page_prepare(page, pfn))
 			list_del(&page->lru);
+
+		/*
+		 * Free isolated pages directly to the allocator, see
+		 * comment in free_unref_page.
+		 */
+		migratetype = get_pcppage_migratetype(page);
+		if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
+			if (unlikely(is_migrate_isolate(migratetype))) {
+				list_del(&page->lru);
+				free_one_page(page_zone(page), page, pfn, 0,
+							migratetype, FPI_NONE);
+				continue;
+			}
+
+			/*
+			 * Non-isolated types over MIGRATE_PCPTYPES get added
+			 * to the MIGRATE_MOVABLE pcp list.
+			 */
+			set_pcppage_migratetype(page, MIGRATE_MOVABLE);
+		}
+
 		set_page_private(page, pfn);
 	}
 
 	local_lock_irqsave(&pagesets.lock, flags);
 	list_for_each_entry_safe(page, next, list, lru) {
-		unsigned long pfn = page_private(page);
-
+		pfn = page_private(page);
 		set_page_private(page, 0);
+		migratetype = get_pcppage_migratetype(page);
 		trace_mm_page_free_batched(page);
-		free_unref_page_commit(page, pfn);
+		free_unref_page_commit(page, pfn, migratetype);
 
 		/*
 		 * Guard against excessive IRQ disabled times when we get

From 902499937e3a82156dcb5069b6df27640480e204 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:03 -0700
Subject: [PATCH 3237/3292] mm/page_alloc: update PGFREE outside the zone lock
 in __free_pages_ok

VM events do not need explicit protection by disabling IRQs so update the
counter with IRQs enabled in __free_pages_ok.

Link: https://lkml.kernel.org/r/20210512095458.30632-10-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dd367e5df8cb..37ce0c2f3bae 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1600,13 +1600,14 @@ static void __free_pages_ok(struct page *page, unsigned int order,
 	migratetype = get_pfnblock_migratetype(page, pfn);
 
 	spin_lock_irqsave(&zone->lock, flags);
-	__count_vm_events(PGFREE, 1 << order);
 	if (unlikely(has_isolate_pageblock(zone) ||
 		is_migrate_isolate(migratetype))) {
 		migratetype = get_pfnblock_migratetype(page, pfn);
 	}
 	__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
 	spin_unlock_irqrestore(&zone->lock, flags);
+
+	__count_vm_events(PGFREE, 1 << order);
 }
 
 void __free_pages_core(struct page *page, unsigned int order)

From 151e084af4946344fe0d021f4110b69edaac1e8d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Mon, 28 Jun 2021 19:42:06 -0700
Subject: [PATCH 3238/3292] mm: page_alloc: dump migrate-failed pages only at
 -EBUSY

alloc_contig_dump_pages() aims for helping debugging page migration
failure by elevated page refcount compared to expected_count.  (for the
detail, please look at migrate_page_move_mapping)

However, -ENOMEM is just the case that system is under memory pressure
state, not relevant with page refcount at all.  Thus, the dumping page
list is not helpful for the debugging point of view.

Link: https://lkml.kernel.org/r/YKa2Wyo9xqIErpfa@google.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: John Dias <joaodias@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 37ce0c2f3bae..941a75b9fb5a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8800,7 +8800,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 
 	lru_cache_enable();
 	if (ret < 0) {
-		alloc_contig_dump_pages(&cc->migratepages);
+		if (ret == -EBUSY)
+			alloc_contig_dump_pages(&cc->migratepages);
 		putback_movable_pages(&cc->migratepages);
 		return ret;
 	}

From bbbecb35a41cb5c63ef78e14cc8b95fa9130bc1a Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:09 -0700
Subject: [PATCH 3239/3292] mm/page_alloc: delete vm.percpu_pagelist_fraction

Patch series "Calculate pcp->high based on zone sizes and active CPUs", v2.

The per-cpu page allocator (PCP) is meant to reduce contention on the zone
lock but the sizing of batch and high is archaic and neither takes the
zone size into account or the number of CPUs local to a zone.  With larger
zones and more CPUs per node, the contention is getting worse.
Furthermore, the fact that vm.percpu_pagelist_fraction adjusts both batch
and high values means that the sysctl can reduce zone lock contention but
also increase allocation latencies.

This series disassociates pcp->high from pcp->batch and then scales
pcp->high based on the size of the local zone with limited impact to
reclaim and accounting for active CPUs but leaves pcp->batch static.  It
also adapts the number of pages that can be on the pcp list based on
recent freeing patterns.

The motivation is partially to adjust to larger memory sizes but is also
driven by the fact that large batches of page freeing via release_pages()
often shows zone contention as a major part of the problem.  Another is a
bug report based on an older kernel where a multi-terabyte process can
takes several minutes to exit.  A workaround was to use
vm.percpu_pagelist_fraction to increase the pcp->high value but testing
indicated that a production workload could not use the same values because
of an increase in allocation latencies.  Unfortunately, I cannot reproduce
this test case myself as the multi-terabyte machines are in active use but
it should alleviate the problem.

The series aims to address both and partially acts as a pre-requisite.
pcp only works with order-0 which is useless for SLUB (when using high
orders) and THP (unconditionally).  To store high-order pages on PCP, the
pcp->high values need to be increased first.

This patch (of 6):

The vm.percpu_pagelist_fraction is used to increase the batch and high
limits for the per-cpu page allocator (PCP).  The intent behind the sysctl
is to reduce zone lock acquisition when allocating/freeing pages but it
has a problem.  While it can decrease contention, it can also increase
latency on the allocation side due to unreasonably large batch sizes.
This leads to games where an administrator adjusts
percpu_pagelist_fraction on the fly to work around contention and
allocation latency problems.

This series aims to alleviate the problems with zone lock contention while
avoiding the allocation-side latency problems.  For the purposes of
review, it's easier to remove this sysctl now and reintroduce a similar
sysctl later in the series that deals only with pcp->high.

Link: https://lkml.kernel.org/r/20210525080119.5455-1-mgorman@techsingularity.net
Link: https://lkml.kernel.org/r/20210525080119.5455-2-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/sysctl/vm.rst | 19 ---------
 include/linux/mmzone.h                  |  3 --
 kernel/sysctl.c                         |  8 ----
 mm/page_alloc.c                         | 55 ++-----------------------
 4 files changed, 4 insertions(+), 81 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 586cd4b86428..2fcafccb53a8 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -64,7 +64,6 @@ Currently, these files are in /proc/sys/vm:
 - overcommit_ratio
 - page-cluster
 - panic_on_oom
-- percpu_pagelist_fraction
 - stat_interval
 - stat_refresh
 - numa_stat
@@ -790,24 +789,6 @@ panic_on_oom=2+kdump gives you very strong tool to investigate
 why oom happens. You can get snapshot.
 
 
-percpu_pagelist_fraction
-========================
-
-This is the fraction of pages at most (high mark pcp->high) in each zone that
-are allocated for each per cpu page list.  The min value for this is 8.  It
-means that we don't allow more than 1/8th of pages in each zone to be
-allocated in any single per_cpu_pagelist.  This entry only changes the value
-of hot per cpu pagelists.  User can specify a number like 100 to allocate
-1/100th of each zone to each per cpu page list.
-
-The batch value of each per cpu pagelist is also updated as a result.  It is
-set to pcp->high/4.  The upper limit of batch is (PAGE_SHIFT * 8)
-
-The initial value is zero.  Kernel does not use this value at boot time to set
-the high water marks for each per cpu page list.  If the user writes '0' to this
-sysctl, it will revert to this default behavior.
-
-
 stat_interval
 =============
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f86018d5e362..7937a1d1d166 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1027,15 +1027,12 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
 		size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
-		void *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 		void *, size_t *, loff_t *);
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
 		void *, size_t *, loff_t *);
 int numa_zonelist_order_handler(struct ctl_table *, int,
 		void *, size_t *, loff_t *);
-extern int percpu_pagelist_fraction;
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN	16
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d4a78e08f6d8..51213c33171e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2908,14 +2908,6 @@ static struct ctl_table vm_table[] = {
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &one_thousand,
 	},
-	{
-		.procname	= "percpu_pagelist_fraction",
-		.data		= &percpu_pagelist_fraction,
-		.maxlen		= sizeof(percpu_pagelist_fraction),
-		.mode		= 0644,
-		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
 	{
 		.procname	= "page_lock_unfairness",
 		.data		= &sysctl_page_lock_unfairness,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 941a75b9fb5a..5abf2c1d4c58 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -120,7 +120,6 @@ typedef int __bitwise fpi_t;
 
 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
 static DEFINE_MUTEX(pcp_batch_high_lock);
-#define MIN_PERCPU_PAGELIST_FRACTION	(8)
 
 struct pagesets {
 	local_lock_t lock;
@@ -193,7 +192,6 @@ EXPORT_SYMBOL(_totalram_pages);
 unsigned long totalreserve_pages __read_mostly;
 unsigned long totalcma_pages __read_mostly;
 
-int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
 EXPORT_SYMBOL(init_on_alloc);
@@ -6735,22 +6733,15 @@ static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long h
 
 /*
  * Calculate and set new high and batch values for all per-cpu pagesets of a
- * zone, based on the zone's size and the percpu_pagelist_fraction sysctl.
+ * zone based on the zone's size.
  */
 static void zone_set_pageset_high_and_batch(struct zone *zone)
 {
 	unsigned long new_high, new_batch;
 
-	if (percpu_pagelist_fraction) {
-		new_high = zone_managed_pages(zone) / percpu_pagelist_fraction;
-		new_batch = max(1UL, new_high / 4);
-		if ((new_high / 4) > (PAGE_SHIFT * 8))
-			new_batch = PAGE_SHIFT * 8;
-	} else {
-		new_batch = zone_batchsize(zone);
-		new_high = 6 * new_batch;
-		new_batch = max(1UL, 1 * new_batch);
-	}
+	new_batch = zone_batchsize(zone);
+	new_high = 6 * new_batch;
+	new_batch = max(1UL, 1 * new_batch);
 
 	if (zone->pageset_high == new_high &&
 	    zone->pageset_batch == new_batch)
@@ -8413,44 +8404,6 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
-/*
- * percpu_pagelist_fraction - changes the pcp->high for each zone on each
- * cpu.  It is the fraction of total pages in each zone that a hot per cpu
- * pagelist can have before it gets flushed back to buddy allocator.
- */
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write,
-		void *buffer, size_t *length, loff_t *ppos)
-{
-	struct zone *zone;
-	int old_percpu_pagelist_fraction;
-	int ret;
-
-	mutex_lock(&pcp_batch_high_lock);
-	old_percpu_pagelist_fraction = percpu_pagelist_fraction;
-
-	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
-	if (!write || ret < 0)
-		goto out;
-
-	/* Sanity checking to avoid pcp imbalance */
-	if (percpu_pagelist_fraction &&
-	    percpu_pagelist_fraction < MIN_PERCPU_PAGELIST_FRACTION) {
-		percpu_pagelist_fraction = old_percpu_pagelist_fraction;
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* No change? */
-	if (percpu_pagelist_fraction == old_percpu_pagelist_fraction)
-		goto out;
-
-	for_each_populated_zone(zone)
-		zone_set_pageset_high_and_batch(zone);
-out:
-	mutex_unlock(&pcp_batch_high_lock);
-	return ret;
-}
-
 #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
 /*
  * Returns the number of pages that arch has reserved but

From b92ca18e8ca596f4f3d80c1fe833bc57a1b2458c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:12 -0700
Subject: [PATCH 3240/3292] mm/page_alloc: disassociate the pcp->high from
 pcp->batch

The pcp high watermark is based on the batch size but there is no
relationship between them other than it is convenient to use early in
boot.

This patch takes the first step and bases pcp->high on the zone low
watermark split across the number of CPUs local to a zone while the batch
size remains the same to avoid increasing allocation latencies.  The
intent behind the default pcp->high is "set the number of PCP pages such
that if they are all full that background reclaim is not started
prematurely".

Note that in this patch the pcp->high values are adjusted after memory
hotplug events, min_free_kbytes adjustments and watermark scale factor
adjustments but not CPU hotplug events which is handled later in the
series.

On a test KVM instance;

Before grep -E "high:|batch" /proc/zoneinfo | tail -2
              high:  378
              batch: 63

After grep -E "high:|batch" /proc/zoneinfo | tail -2
              high:  649
              batch: 63

[mgorman@techsingularity.net:  fix __setup_per_zone_wmarks for parallel memory
hotplug]
  Link: https://lkml.kernel.org/r/20210528105925.GN30378@techsingularity.net

Link: https://lkml.kernel.org/r/20210525080119.5455-3-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c |  6 ++---
 mm/page_alloc.c     | 62 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 70620d0dd923..974a565797d8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -961,7 +961,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
 	node_states_set_node(nid, &arg);
 	if (need_zonelists_rebuild)
 		build_all_zonelists(NULL);
-	zone_pcp_update(zone);
 
 	/* Basic onlining is complete, allow allocation of onlined pages. */
 	undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
@@ -974,6 +973,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
 	 */
 	shuffle_zone(zone);
 
+	/* reinitialise watermarks and update pcp limits */
 	init_per_zone_wmark_min();
 
 	kswapd_run(nid);
@@ -1829,13 +1829,13 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 	adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
 	adjust_present_page_count(zone, -nr_pages);
 
+	/* reinitialise watermarks and update pcp limits */
 	init_per_zone_wmark_min();
 
 	if (!populated_zone(zone)) {
 		zone_pcp_reset(zone);
 		build_all_zonelists(NULL);
-	} else
-		zone_pcp_update(zone);
+	}
 
 	node_states_clear_node(node, &arg);
 	if (arg.status_change_nid >= 0) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5abf2c1d4c58..19ec81d403a0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2174,14 +2174,6 @@ void __init page_alloc_init_late(void)
 	/* Block until all are initialised */
 	wait_for_completion(&pgdat_init_all_done_comp);
 
-	/*
-	 * The number of managed pages has changed due to the initialisation
-	 * so the pcpu batch and high limits needs to be updated or the limits
-	 * will be artificially small.
-	 */
-	for_each_populated_zone(zone)
-		zone_pcp_update(zone);
-
 	/*
 	 * We initialized the rest of the deferred pages.  Permanently disable
 	 * on-demand struct page initialization.
@@ -6633,13 +6625,12 @@ static int zone_batchsize(struct zone *zone)
 	int batch;
 
 	/*
-	 * The per-cpu-pages pools are set to around 1000th of the
-	 * size of the zone.
+	 * The number of pages to batch allocate is either ~0.1%
+	 * of the zone or 1MB, whichever is smaller. The batch
+	 * size is striking a balance between allocation latency
+	 * and zone lock contention.
 	 */
-	batch = zone_managed_pages(zone) / 1024;
-	/* But no more than a meg. */
-	if (batch * PAGE_SIZE > 1024 * 1024)
-		batch = (1024 * 1024) / PAGE_SIZE;
+	batch = min(zone_managed_pages(zone) >> 10, (1024 * 1024) / PAGE_SIZE);
 	batch /= 4;		/* We effectively *= 4 below */
 	if (batch < 1)
 		batch = 1;
@@ -6676,6 +6667,34 @@ static int zone_batchsize(struct zone *zone)
 #endif
 }
 
+static int zone_highsize(struct zone *zone, int batch)
+{
+#ifdef CONFIG_MMU
+	int high;
+	int nr_local_cpus;
+
+	/*
+	 * The high value of the pcp is based on the zone low watermark
+	 * so that if they are full then background reclaim will not be
+	 * started prematurely. The value is split across all online CPUs
+	 * local to the zone. Note that early in boot that CPUs may not be
+	 * online yet.
+	 */
+	nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone))));
+	high = low_wmark_pages(zone) / nr_local_cpus;
+
+	/*
+	 * Ensure high is at least batch*4. The multiple is based on the
+	 * historical relationship between high and batch.
+	 */
+	high = max(high, batch << 2);
+
+	return high;
+#else
+	return 0;
+#endif
+}
+
 /*
  * pcp->high and pcp->batch values are related and generally batch is lower
  * than high. They are also related to pcp->count such that count is lower
@@ -6737,11 +6756,10 @@ static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long h
  */
 static void zone_set_pageset_high_and_batch(struct zone *zone)
 {
-	unsigned long new_high, new_batch;
+	int new_high, new_batch;
 
-	new_batch = zone_batchsize(zone);
-	new_high = 6 * new_batch;
-	new_batch = max(1UL, 1 * new_batch);
+	new_batch = max(1, zone_batchsize(zone));
+	new_high = zone_highsize(zone, new_batch);
 
 	if (zone->pageset_high == new_high &&
 	    zone->pageset_batch == new_batch)
@@ -8222,11 +8240,19 @@ static void __setup_per_zone_wmarks(void)
  */
 void setup_per_zone_wmarks(void)
 {
+	struct zone *zone;
 	static DEFINE_SPINLOCK(lock);
 
 	spin_lock(&lock);
 	__setup_per_zone_wmarks();
 	spin_unlock(&lock);
+
+	/*
+	 * The watermark size have changed so update the pcpu batch
+	 * and high limits or the limits may be inappropriate.
+	 */
+	for_each_zone(zone)
+		zone_pcp_update(zone);
 }
 
 /*

From 04f8cfeaed0849e702278378bce3867577ca45fb Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:15 -0700
Subject: [PATCH 3241/3292] mm/page_alloc: adjust pcp->high after CPU hotplug
 events

The PCP high watermark is based on the number of online CPUs so the
watermarks must be adjusted during CPU hotplug.  At the time of
hot-remove, the number of online CPUs is already adjusted but during
hot-add, a delta needs to be applied to update PCP to the correct value.
After this patch is applied, the high watermarks are adjusted correctly.

  # grep high: /proc/zoneinfo  | tail -1
              high:  649
  # echo 0 > /sys/devices/system/cpu/cpu4/online
  # grep high: /proc/zoneinfo  | tail -1
              high:  664
  # echo 1 > /sys/devices/system/cpu/cpu4/online
  # grep high: /proc/zoneinfo  | tail -1
              high:  649

Link: https://lkml.kernel.org/r/20210525080119.5455-4-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuhotplug.h |  2 +-
 mm/internal.h              |  2 +-
 mm/page_alloc.c            | 38 +++++++++++++++++++++++++++-----------
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4a62b3980642..47e13582d9fc 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -54,7 +54,7 @@ enum cpuhp_state {
 	CPUHP_MM_MEMCQ_DEAD,
 	CPUHP_PERCPU_CNT_DEAD,
 	CPUHP_RADIX_DEAD,
-	CPUHP_PAGE_ALLOC_DEAD,
+	CPUHP_PAGE_ALLOC,
 	CPUHP_NET_DEV_DEAD,
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_IOVA_DEAD,
diff --git a/mm/internal.h b/mm/internal.h
index 2946dfa0f245..18e5fb4d225f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -206,7 +206,7 @@ extern int user_min_free_kbytes;
 extern void free_unref_page(struct page *page);
 extern void free_unref_page_list(struct list_head *list);
 
-extern void zone_pcp_update(struct zone *zone);
+extern void zone_pcp_update(struct zone *zone, int cpu_online);
 extern void zone_pcp_reset(struct zone *zone);
 extern void zone_pcp_disable(struct zone *zone);
 extern void zone_pcp_enable(struct zone *zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 19ec81d403a0..8d196a803820 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6667,7 +6667,7 @@ static int zone_batchsize(struct zone *zone)
 #endif
 }
 
-static int zone_highsize(struct zone *zone, int batch)
+static int zone_highsize(struct zone *zone, int batch, int cpu_online)
 {
 #ifdef CONFIG_MMU
 	int high;
@@ -6678,9 +6678,10 @@ static int zone_highsize(struct zone *zone, int batch)
 	 * so that if they are full then background reclaim will not be
 	 * started prematurely. The value is split across all online CPUs
 	 * local to the zone. Note that early in boot that CPUs may not be
-	 * online yet.
+	 * online yet and that during CPU hotplug that the cpumask is not
+	 * yet updated when a CPU is being onlined.
 	 */
-	nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone))));
+	nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone)))) + cpu_online;
 	high = low_wmark_pages(zone) / nr_local_cpus;
 
 	/*
@@ -6754,12 +6755,12 @@ static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long h
  * Calculate and set new high and batch values for all per-cpu pagesets of a
  * zone based on the zone's size.
  */
-static void zone_set_pageset_high_and_batch(struct zone *zone)
+static void zone_set_pageset_high_and_batch(struct zone *zone, int cpu_online)
 {
 	int new_high, new_batch;
 
 	new_batch = max(1, zone_batchsize(zone));
-	new_high = zone_highsize(zone, new_batch);
+	new_high = zone_highsize(zone, new_batch, cpu_online);
 
 	if (zone->pageset_high == new_high &&
 	    zone->pageset_batch == new_batch)
@@ -6789,7 +6790,7 @@ void __meminit setup_zone_pageset(struct zone *zone)
 		per_cpu_pages_init(pcp, pzstats);
 	}
 
-	zone_set_pageset_high_and_batch(zone);
+	zone_set_pageset_high_and_batch(zone, 0);
 }
 
 /*
@@ -8044,6 +8045,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
 
 static int page_alloc_cpu_dead(unsigned int cpu)
 {
+	struct zone *zone;
 
 	lru_add_drain_cpu(cpu);
 	drain_pages(cpu);
@@ -8064,6 +8066,19 @@ static int page_alloc_cpu_dead(unsigned int cpu)
 	 * race with what we are doing.
 	 */
 	cpu_vm_stats_fold(cpu);
+
+	for_each_populated_zone(zone)
+		zone_pcp_update(zone, 0);
+
+	return 0;
+}
+
+static int page_alloc_cpu_online(unsigned int cpu)
+{
+	struct zone *zone;
+
+	for_each_populated_zone(zone)
+		zone_pcp_update(zone, 1);
 	return 0;
 }
 
@@ -8089,8 +8104,9 @@ void __init page_alloc_init(void)
 		hashdist = 0;
 #endif
 
-	ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
-					"mm/page_alloc:dead", NULL,
+	ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC,
+					"mm/page_alloc:pcp",
+					page_alloc_cpu_online,
 					page_alloc_cpu_dead);
 	WARN_ON(ret < 0);
 }
@@ -8252,7 +8268,7 @@ void setup_per_zone_wmarks(void)
 	 * and high limits or the limits may be inappropriate.
 	 */
 	for_each_zone(zone)
-		zone_pcp_update(zone);
+		zone_pcp_update(zone, 0);
 }
 
 /*
@@ -9053,10 +9069,10 @@ EXPORT_SYMBOL(free_contig_range);
  * The zone indicated has a new number of managed_pages; batch sizes and percpu
  * page high values need to be recalculated.
  */
-void __meminit zone_pcp_update(struct zone *zone)
+void zone_pcp_update(struct zone *zone, int cpu_online)
 {
 	mutex_lock(&pcp_batch_high_lock);
-	zone_set_pageset_high_and_batch(zone);
+	zone_set_pageset_high_and_batch(zone, cpu_online);
 	mutex_unlock(&pcp_batch_high_lock);
 }
 

From 3b12e7e97938424de2bb1b95ba0bd6a49bad39f9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:18 -0700
Subject: [PATCH 3242/3292] mm/page_alloc: scale the number of pages that are
 batch freed

When a task is freeing a large number of order-0 pages, it may acquire the
zone->lock multiple times freeing pages in batches.  This may
unnecessarily contend on the zone lock when freeing very large number of
pages.  This patch adapts the size of the batch based on the recent
pattern to scale the batch size for subsequent frees.

As the machines I used were not large enough to test this are not large
enough to illustrate a problem, a debugging patch shows patterns like the
following (slightly editted for clarity)

Baseline vanilla kernel
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378

With patches
  time-unmap-7724    [...] free_pcppages_bulk: free  126 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  252 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  504 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  751 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  751 count  814 high  814

Link: https://lkml.kernel.org/r/20210525080119.5455-5-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  3 ++-
 mm/page_alloc.c        | 41 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7937a1d1d166..0a86b2890a16 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -343,8 +343,9 @@ struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
+	short free_factor;	/* batch scaling factor during free */
 #ifdef CONFIG_NUMA
-	int expire;		/* When 0, remote pagesets are drained */
+	short expire;		/* When 0, remote pagesets are drained */
 #endif
 
 	/* Lists of pages, one per migrate type stored on the pcp-lists */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8d196a803820..e1d1825a2611 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3278,18 +3278,47 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
 	return true;
 }
 
+static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
+{
+	int min_nr_free, max_nr_free;
+
+	/* Check for PCP disabled or boot pageset */
+	if (unlikely(high < batch))
+		return 1;
+
+	/* Leave at least pcp->batch pages on the list */
+	min_nr_free = batch;
+	max_nr_free = high - batch;
+
+	/*
+	 * Double the number of pages freed each time there is subsequent
+	 * freeing of pages without any allocation.
+	 */
+	batch <<= pcp->free_factor;
+	if (batch < max_nr_free)
+		pcp->free_factor++;
+	batch = clamp(batch, min_nr_free, max_nr_free);
+
+	return batch;
+}
+
 static void free_unref_page_commit(struct page *page, unsigned long pfn,
 				   int migratetype)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
+	int high;
 
 	__count_vm_event(PGFREE);
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list_add(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
-	if (pcp->count >= READ_ONCE(pcp->high))
-		free_pcppages_bulk(zone, READ_ONCE(pcp->batch), pcp);
+	high = READ_ONCE(pcp->high);
+	if (pcp->count >= high) {
+		int batch = READ_ONCE(pcp->batch);
+
+		free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp);
+	}
 }
 
 /*
@@ -3541,7 +3570,14 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	unsigned long flags;
 
 	local_lock_irqsave(&pagesets.lock, flags);
+
+	/*
+	 * On allocation, reduce the number of pages that are batch freed.
+	 * See nr_pcp_free() where free_factor is increased for subsequent
+	 * frees.
+	 */
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
+	pcp->free_factor >>= 1;
 	list = &pcp->lists[migratetype];
 	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
 	local_unlock_irqrestore(&pagesets.lock, flags);
@@ -6737,6 +6773,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta
 	 */
 	pcp->high = BOOT_PAGESET_HIGH;
 	pcp->batch = BOOT_PAGESET_BATCH;
+	pcp->free_factor = 0;
 }
 
 static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long high,

From c49c2c47dab6b8d45022b3fabf0642a0e62e3109 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:21 -0700
Subject: [PATCH 3243/3292] mm/page_alloc: limit the number of pages on PCP
 lists when reclaim is active

When kswapd is active then direct reclaim is potentially active.  In
either case, it is possible that a zone would be balanced if pages were
not trapped on PCP lists.  Instead of draining remote pages, simply limit
the size of the PCP lists while kswapd is active.

Link: https://lkml.kernel.org/r/20210525080119.5455-6-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  1 +
 mm/page_alloc.c        | 19 ++++++++++++++++++-
 mm/vmscan.c            | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0a86b2890a16..b2f40d64bc4b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -647,6 +647,7 @@ enum zone_flags {
 	ZONE_BOOSTED_WATERMARK,		/* zone recently boosted watermarks.
 					 * Cleared when kswapd is woken.
 					 */
+	ZONE_RECLAIM_ACTIVE,		/* kswapd may be scanning the zone. */
 };
 
 static inline unsigned long zone_managed_pages(struct zone *zone)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e1d1825a2611..adf35ccfd8e5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3302,6 +3302,23 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
 	return batch;
 }
 
+static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
+{
+	int high = READ_ONCE(pcp->high);
+
+	if (unlikely(!high))
+		return 0;
+
+	if (!test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags))
+		return high;
+
+	/*
+	 * If reclaim is active, limit the number of pages that can be
+	 * stored on pcp lists
+	 */
+	return min(READ_ONCE(pcp->batch) << 2, high);
+}
+
 static void free_unref_page_commit(struct page *page, unsigned long pfn,
 				   int migratetype)
 {
@@ -3313,7 +3330,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	list_add(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
-	high = READ_ONCE(pcp->high);
+	high = nr_pcp_high(pcp, zone);
 	if (pcp->count >= high) {
 		int batch = READ_ONCE(pcp->batch);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f96d62159720..d7c3cb8688dd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3722,6 +3722,38 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
 	return sc->nr_scanned >= sc->nr_to_reclaim;
 }
 
+/* Page allocator PCP high watermark is lowered if reclaim is active. */
+static inline void
+update_reclaim_active(pg_data_t *pgdat, int highest_zoneidx, bool active)
+{
+	int i;
+	struct zone *zone;
+
+	for (i = 0; i <= highest_zoneidx; i++) {
+		zone = pgdat->node_zones + i;
+
+		if (!managed_zone(zone))
+			continue;
+
+		if (active)
+			set_bit(ZONE_RECLAIM_ACTIVE, &zone->flags);
+		else
+			clear_bit(ZONE_RECLAIM_ACTIVE, &zone->flags);
+	}
+}
+
+static inline void
+set_reclaim_active(pg_data_t *pgdat, int highest_zoneidx)
+{
+	update_reclaim_active(pgdat, highest_zoneidx, true);
+}
+
+static inline void
+clear_reclaim_active(pg_data_t *pgdat, int highest_zoneidx)
+{
+	update_reclaim_active(pgdat, highest_zoneidx, false);
+}
+
 /*
  * For kswapd, balance_pgdat() will reclaim pages across a node from zones
  * that are eligible for use by the caller until at least one zone is
@@ -3774,6 +3806,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 	boosted = nr_boost_reclaim;
 
 restart:
+	set_reclaim_active(pgdat, highest_zoneidx);
 	sc.priority = DEF_PRIORITY;
 	do {
 		unsigned long nr_reclaimed = sc.nr_reclaimed;
@@ -3907,6 +3940,8 @@ restart:
 		pgdat->kswapd_failures++;
 
 out:
+	clear_reclaim_active(pgdat, highest_zoneidx);
+
 	/* If reclaim was boosted, account for the reclaim done in this pass */
 	if (boosted) {
 		unsigned long flags;

From 74f44822097c665041010994502b5971d6cd9f04 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:24 -0700
Subject: [PATCH 3244/3292] mm/page_alloc: introduce
 vm.percpu_pagelist_high_fraction

This introduces a new sysctl vm.percpu_pagelist_high_fraction.  It is
similar to the old vm.percpu_pagelist_fraction.  The old sysctl increased
both pcp->batch and pcp->high with the higher pcp->high potentially
reducing zone->lock contention.  However, the higher pcp->batch value also
potentially increased allocation latency while the PCP was refilled.  This
sysctl only adjusts pcp->high so that zone->lock contention is potentially
reduced but allocation latency during a PCP refill remains the same.

  # grep -E "high:|batch" /proc/zoneinfo | tail -2
              high:  649
              batch: 63

  # sysctl vm.percpu_pagelist_high_fraction=8
  # grep -E "high:|batch" /proc/zoneinfo | tail -2
              high:  35071
              batch: 63

  # sysctl vm.percpu_pagelist_high_fraction=64
              high:  4383
              batch: 63

  # sysctl vm.percpu_pagelist_high_fraction=0
              high:  649
              batch: 63

[mgorman@techsingularity.net: fix documentation]
  Link: https://lkml.kernel.org/r/20210528151010.GQ30378@techsingularity.net

Link: https://lkml.kernel.org/r/20210525080119.5455-7-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/sysctl/vm.rst | 21 ++++++++
 include/linux/mmzone.h                  |  3 ++
 kernel/sysctl.c                         |  8 +++
 mm/page_alloc.c                         | 69 ++++++++++++++++++++++---
 4 files changed, 94 insertions(+), 7 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 2fcafccb53a8..2da25735a629 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -64,6 +64,7 @@ Currently, these files are in /proc/sys/vm:
 - overcommit_ratio
 - page-cluster
 - panic_on_oom
+- percpu_pagelist_high_fraction
 - stat_interval
 - stat_refresh
 - numa_stat
@@ -789,6 +790,26 @@ panic_on_oom=2+kdump gives you very strong tool to investigate
 why oom happens. You can get snapshot.
 
 
+percpu_pagelist_high_fraction
+=============================
+
+This is the fraction of pages in each zone that are can be stored to
+per-cpu page lists. It is an upper boundary that is divided depending
+on the number of online CPUs. The min value for this is 8 which means
+that we do not allow more than 1/8th of pages in each zone to be stored
+on per-cpu page lists. This entry only changes the value of hot per-cpu
+page lists. A user can specify a number like 100 to allocate 1/100th of
+each zone between per-cpu lists.
+
+The batch value of each per-cpu page list remains the same regardless of
+the value of the high fraction so allocation latencies are unaffected.
+
+The initial value is zero. Kernel uses this value to set the high pcp->high
+mark based on the low watermark for the zone and the number of local
+online CPUs.  If the user writes '0' to this sysctl, it will revert to
+this default behavior.
+
+
 stat_interval
 =============
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b2f40d64bc4b..7d206ca850c7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1029,12 +1029,15 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
 		size_t *, loff_t *);
+int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
+		void *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 		void *, size_t *, loff_t *);
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
 		void *, size_t *, loff_t *);
 int numa_zonelist_order_handler(struct ctl_table *, int,
 		void *, size_t *, loff_t *);
+extern int percpu_pagelist_high_fraction;
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN	16
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 51213c33171e..69d925f1e5da 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2908,6 +2908,14 @@ static struct ctl_table vm_table[] = {
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &one_thousand,
 	},
+	{
+		.procname	= "percpu_pagelist_high_fraction",
+		.data		= &percpu_pagelist_high_fraction,
+		.maxlen		= sizeof(percpu_pagelist_high_fraction),
+		.mode		= 0644,
+		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
 	{
 		.procname	= "page_lock_unfairness",
 		.data		= &sysctl_page_lock_unfairness,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index adf35ccfd8e5..cfc4071310fb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -120,6 +120,7 @@ typedef int __bitwise fpi_t;
 
 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
 static DEFINE_MUTEX(pcp_batch_high_lock);
+#define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
 
 struct pagesets {
 	local_lock_t lock;
@@ -192,6 +193,7 @@ EXPORT_SYMBOL(_totalram_pages);
 unsigned long totalreserve_pages __read_mostly;
 unsigned long totalcma_pages __read_mostly;
 
+int percpu_pagelist_high_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
 EXPORT_SYMBOL(init_on_alloc);
@@ -6725,17 +6727,32 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online)
 #ifdef CONFIG_MMU
 	int high;
 	int nr_local_cpus;
+	unsigned long total_pages;
+
+	if (!percpu_pagelist_high_fraction) {
+		/*
+		 * By default, the high value of the pcp is based on the zone
+		 * low watermark so that if they are full then background
+		 * reclaim will not be started prematurely.
+		 */
+		total_pages = low_wmark_pages(zone);
+	} else {
+		/*
+		 * If percpu_pagelist_high_fraction is configured, the high
+		 * value is based on a fraction of the managed pages in the
+		 * zone.
+		 */
+		total_pages = zone_managed_pages(zone) / percpu_pagelist_high_fraction;
+	}
 
 	/*
-	 * The high value of the pcp is based on the zone low watermark
-	 * so that if they are full then background reclaim will not be
-	 * started prematurely. The value is split across all online CPUs
-	 * local to the zone. Note that early in boot that CPUs may not be
-	 * online yet and that during CPU hotplug that the cpumask is not
-	 * yet updated when a CPU is being onlined.
+	 * Split the high value across all online CPUs local to the zone. Note
+	 * that early in boot that CPUs may not be online yet and that during
+	 * CPU hotplug that the cpumask is not yet updated when a CPU is being
+	 * onlined.
 	 */
 	nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone)))) + cpu_online;
-	high = low_wmark_pages(zone) / nr_local_cpus;
+	high = total_pages / nr_local_cpus;
 
 	/*
 	 * Ensure high is at least batch*4. The multiple is based on the
@@ -8500,6 +8517,44 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
+/*
+ * percpu_pagelist_high_fraction - changes the pcp->high for each zone on each
+ * cpu. It is the fraction of total pages in each zone that a hot per cpu
+ * pagelist can have before it gets flushed back to buddy allocator.
+ */
+int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
+		int write, void *buffer, size_t *length, loff_t *ppos)
+{
+	struct zone *zone;
+	int old_percpu_pagelist_high_fraction;
+	int ret;
+
+	mutex_lock(&pcp_batch_high_lock);
+	old_percpu_pagelist_high_fraction = percpu_pagelist_high_fraction;
+
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+	if (!write || ret < 0)
+		goto out;
+
+	/* Sanity checking to avoid pcp imbalance */
+	if (percpu_pagelist_high_fraction &&
+	    percpu_pagelist_high_fraction < MIN_PERCPU_PAGELIST_HIGH_FRACTION) {
+		percpu_pagelist_high_fraction = old_percpu_pagelist_high_fraction;
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* No change? */
+	if (percpu_pagelist_high_fraction == old_percpu_pagelist_high_fraction)
+		goto out;
+
+	for_each_populated_zone(zone)
+		zone_set_pageset_high_and_batch(zone, 0);
+out:
+	mutex_unlock(&pcp_batch_high_lock);
+	return ret;
+}
+
 #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
 /*
  * Returns the number of pages that arch has reserved but

From 777c00f5ede4fcb9ae49a2a957bec26d4d8f4c29 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Mon, 28 Jun 2021 19:42:27 -0700
Subject: [PATCH 3245/3292] mm: drop SECTION_SHIFT in code comments

Actually SECTIONS_SHIFT is used in the kernel code, so the code comments
is strictly incorrect.  And since commit bbeae5b05ef6 ("mm: move page
flags layout to separate header"), SECTIONS_SHIFT definition has been
moved to include/linux/page-flags-layout.h, since code itself looks quite
straighforward, instead of moving the code comment into the new place as
well, we just simply remove it.

This also fixed a checkpatch complain derived from the original code:
WARNING: please, no space before tabs
+ * SECTIONS_SHIFT    ^I^I#bits space required to store a section #$

Link: https://lkml.kernel.org/r/20210531091908.1738465-2-aisheng.dong@nxp.com
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Suggested-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Yu Zhao <yuzhao@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7d206ca850c7..3e62e8ef68b5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1200,8 +1200,6 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 #ifdef CONFIG_SPARSEMEM
 
 /*
- * SECTION_SHIFT    		#bits space required to store a section #
- *
  * PA_SECTION_SHIFT		physical address to/from section number
  * PFN_SECTION_SHIFT		pfn to/from section number
  */

From e47aa90568de326625b19d7bc872f8d70b0820b0 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Mon, 28 Jun 2021 19:42:30 -0700
Subject: [PATCH 3246/3292] mm/page_alloc: improve memmap_pages dbg msg

Make debug message more accurate.

Link: https://lkml.kernel.org/r/20210531091908.1738465-6-aisheng.dong@nxp.com
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cfc4071310fb..2a306c34fda7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7383,7 +7383,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 					pr_debug("  %s zone: %lu pages used for memmap\n",
 						 zone_names[j], memmap_pages);
 			} else
-				pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
+				pr_warn("  %s zone: %lu memmap pages exceeds freesize %lu\n",
 					zone_names[j], memmap_pages, freesize);
 		}
 

From f7ec104458e00d27a190348ac3a513f3df3699a4 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Mon, 28 Jun 2021 19:42:33 -0700
Subject: [PATCH 3247/3292] mm/page_alloc: fix counting of managed_pages

commit f63661566fad ("mm/page_alloc.c: clear out zone->lowmem_reserve[] if
the zone is empty") clears out zone->lowmem_reserve[] if zone is empty.
But when zone is not empty and sysctl_lowmem_reserve_ratio[i] is set to
zero, zone_managed_pages(zone) is not counted in the managed_pages either.
This is inconsistent with the description of lowmem_reserve, so fix it.

Link: https://lkml.kernel.org/r/20210527125707.3760259-1-liushixin2@huawei.com
Fixes: f63661566fad ("mm/page_alloc.c: clear out zone->lowmem_reserve[] if the zone is empty")
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reported-by: yangerkun <yangerkun@huawei.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2a306c34fda7..fc151f6a7dbd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8240,14 +8240,14 @@ static void setup_per_zone_lowmem_reserve(void)
 			unsigned long managed_pages = 0;
 
 			for (j = i + 1; j < MAX_NR_ZONES; j++) {
-				if (clear) {
-					zone->lowmem_reserve[j] = 0;
-				} else {
-					struct zone *upper_zone = &pgdat->node_zones[j];
+				struct zone *upper_zone = &pgdat->node_zones[j];
 
-					managed_pages += zone_managed_pages(upper_zone);
+				managed_pages += zone_managed_pages(upper_zone);
+
+				if (clear)
+					zone->lowmem_reserve[j] = 0;
+				else
 					zone->lowmem_reserve[j] = managed_pages / ratio;
-				}
 			}
 		}
 	}

From 21d02f8f8464e27434f477c73431075197a9f72f Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:42:36 -0700
Subject: [PATCH 3248/3292] mm/page_alloc: move free_the_page

Patch series "Allow high order pages to be stored on PCP", v2.

The per-cpu page allocator (PCP) only handles order-0 pages.  With the
series "Use local_lock for pcp protection and reduce stat overhead" and
"Calculate pcp->high based on zone sizes and active CPUs", it's now
feasible to store high-order pages on PCP lists.

This small series allows PCP to store "cheap" orders where cheap is
determined by PAGE_ALLOC_COSTLY_ORDER and THP-sized allocations.

This patch (of 2):

In the next page, free_compount_page is going to use the common helper
free_the_page.  This patch moves the definition to ease review.  No
functional change.

Link: https://lkml.kernel.org/r/20210603142220.10851-1-mgorman@techsingularity.net
Link: https://lkml.kernel.org/r/20210603142220.10851-2-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fc151f6a7dbd..58f7a321598f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -687,6 +687,14 @@ out:
 	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
+static inline void free_the_page(struct page *page, unsigned int order)
+{
+	if (order == 0)		/* Via pcp? */
+		free_unref_page(page);
+	else
+		__free_pages_ok(page, order, FPI_NONE);
+}
+
 /*
  * Higher-order pages are called "compound pages".  They are structured thusly:
  *
@@ -5349,14 +5357,6 @@ unsigned long get_zeroed_page(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(get_zeroed_page);
 
-static inline void free_the_page(struct page *page, unsigned int order)
-{
-	if (order == 0)		/* Via pcp? */
-		free_unref_page(page);
-	else
-		__free_pages_ok(page, order, FPI_NONE);
-}
-
 /**
  * __free_pages - Free pages allocated with alloc_pages().
  * @page: The page pointer returned from alloc_pages().

From fdb7d9b7acd02f573ae4fc0c7772f6b5c6b1bad0 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:42:39 -0700
Subject: [PATCH 3249/3292] alpha: remove DISCONTIGMEM and NUMA

Patch series "Remove DISCONTIGMEM memory model", v3.

SPARSEMEM memory model was supposed to entirely replace DISCONTIGMEM a
(long) while ago.  The last architectures that used DISCONTIGMEM were
updated to use other memory models in v5.11 and it is about the time to
entirely remove DISCONTIGMEM from the kernel.

This set removes DISCONTIGMEM from alpha, arc and m68k, simplifies memory
model selection in mm/Kconfig and replaces usage of redundant
CONFIG_NEED_MULTIPLE_NODES and CONFIG_FLAT_NODE_MEM_MAP with CONFIG_NUMA
and CONFIG_FLATMEM respectively.

I've also removed NUMA support on alpha that was BROKEN for more than 15
years.

There were also minor updates all over arch/ to remove mentions of
DISCONTIGMEM in comments and #ifdefs.

This patch (of 9):

NUMA is marked broken on alpha for more than 15 years and DISCONTIGMEM was
replaced with SPARSEMEM in v5.11.

Remove both NUMA and DISCONTIGMEM support from alpha.

Link: https://lkml.kernel.org/r/20210608091316.3622-1-rppt@kernel.org
Link: https://lkml.kernel.org/r/20210608091316.3622-2-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig                |  22 ---
 arch/alpha/include/asm/machvec.h  |   6 -
 arch/alpha/include/asm/mmzone.h   | 100 --------------
 arch/alpha/include/asm/pgtable.h  |   4 -
 arch/alpha/include/asm/topology.h |  39 ------
 arch/alpha/kernel/core_marvel.c   |  53 +------
 arch/alpha/kernel/core_wildfire.c |  29 +---
 arch/alpha/kernel/pci_iommu.c     |  29 ----
 arch/alpha/kernel/proto.h         |   8 --
 arch/alpha/kernel/setup.c         |  16 ---
 arch/alpha/kernel/sys_marvel.c    |   5 -
 arch/alpha/kernel/sys_wildfire.c  |   5 -
 arch/alpha/mm/Makefile            |   2 -
 arch/alpha/mm/init.c              |   3 -
 arch/alpha/mm/numa.c              | 223 ------------------------------
 15 files changed, 4 insertions(+), 540 deletions(-)
 delete mode 100644 arch/alpha/include/asm/mmzone.h
 delete mode 100644 arch/alpha/mm/numa.c

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5998106faa60..8954216b9956 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -549,29 +549,12 @@ config NR_CPUS
 	  MARVEL support can handle a maximum of 32 CPUs, all the others
 	  with working support have a maximum of 4 CPUs.
 
-config ARCH_DISCONTIGMEM_ENABLE
-	bool "Discontiguous Memory Support"
-	depends on BROKEN
-	help
-	  Say Y to support efficient handling of discontiguous physical memory,
-	  for architectures which are either NUMA (Non-Uniform Memory Access)
-	  or have huge holes in the physical address space for other reasons.
-	  See <file:Documentation/vm/numa.rst> for more.
-
 config ARCH_SPARSEMEM_ENABLE
 	bool "Sparse Memory Support"
 	help
 	  Say Y to support efficient handling of discontiguous physical memory,
 	  for systems that have huge holes in the physical address space.
 
-config NUMA
-	bool "NUMA Support (EXPERIMENTAL)"
-	depends on DISCONTIGMEM && BROKEN
-	help
-	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
-	  Access).  This option is for configuring high-end multiprocessor
-	  server machines.  If in doubt, say N.
-
 config ALPHA_WTINT
 	bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC
 	default y if ALPHA_QEMU
@@ -596,11 +579,6 @@ config ALPHA_WTINT
 
 	  If unsure, say N.
 
-config NODES_SHIFT
-	int
-	default "7"
-	depends on NEED_MULTIPLE_NODES
-
 # LARGE_VMALLOC is racy, if you *really* need it then fix it first
 config ALPHA_LARGE_VMALLOC
 	bool
diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h
index a4e96e2bec74..e49fabce7b33 100644
--- a/arch/alpha/include/asm/machvec.h
+++ b/arch/alpha/include/asm/machvec.h
@@ -99,12 +99,6 @@ struct alpha_machine_vector
 
 	const char *vector_name;
 
-	/* NUMA information */
-	int (*pa_to_nid)(unsigned long);
-	int (*cpuid_to_nid)(int);
-	unsigned long (*node_mem_start)(int);
-	unsigned long (*node_mem_size)(int);
-
 	/* System specific parameters.  */
 	union {
 	    struct {
diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
deleted file mode 100644
index 86644604d977..000000000000
--- a/arch/alpha/include/asm/mmzone.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99
- * Adapted for the alpha wildfire architecture Jan 2001.
- */
-#ifndef _ASM_MMZONE_H_
-#define _ASM_MMZONE_H_
-
-#ifdef CONFIG_DISCONTIGMEM
-
-#include <asm/smp.h>
-
-/*
- * Following are macros that are specific to this numa platform.
- */
-
-extern pg_data_t node_data[];
-
-#define alpha_pa_to_nid(pa)		\
-        (alpha_mv.pa_to_nid 		\
-	 ? alpha_mv.pa_to_nid(pa)	\
-	 : (0))
-#define node_mem_start(nid)		\
-        (alpha_mv.node_mem_start 	\
-	 ? alpha_mv.node_mem_start(nid) \
-	 : (0UL))
-#define node_mem_size(nid)		\
-        (alpha_mv.node_mem_size 	\
-	 ? alpha_mv.node_mem_size(nid) 	\
-	 : ((nid) ? (0UL) : (~0UL)))
-
-#define pa_to_nid(pa)		alpha_pa_to_nid(pa)
-#define NODE_DATA(nid)		(&node_data[(nid)])
-
-#define node_localnr(pfn, nid)	((pfn) - NODE_DATA(nid)->node_start_pfn)
-
-#if 1
-#define PLAT_NODE_DATA_LOCALNR(p, n)	\
-	(((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn)
-#else
-static inline unsigned long
-PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
-{
-	unsigned long temp;
-	temp = p >> PAGE_SHIFT;
-	return temp - PLAT_NODE_DATA(n)->gendata.node_start_pfn;
-}
-#endif
-
-/*
- * Following are macros that each numa implementation must define.
- */
-
-/*
- * Given a kernel address, find the home node of the underlying memory.
- */
-#define kvaddr_to_nid(kaddr)	pa_to_nid(__pa(kaddr))
-
-/*
- * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory
- * and returns the kaddr corresponding to first physical page in the
- * node's mem_map.
- */
-#define LOCAL_BASE_ADDR(kaddr)						  \
-    ((unsigned long)__va(NODE_DATA(kvaddr_to_nid(kaddr))->node_start_pfn  \
-			 << PAGE_SHIFT))
-
-/* XXX: FIXME -- nyc */
-#define kern_addr_valid(kaddr)	(0)
-
-#define mk_pte(page, pgprot)						     \
-({								 	     \
-	pte_t pte;                                                           \
-	unsigned long pfn;                                                   \
-									     \
-	pfn = page_to_pfn(page) << 32; \
-	pte_val(pte) = pfn | pgprot_val(pgprot);			     \
-									     \
-	pte;								     \
-})
-
-#define pte_page(x)							\
-({									\
-       	unsigned long kvirt;						\
-	struct page * __xx;						\
-									\
-	kvirt = (unsigned long)__va(pte_val(x) >> (32-PAGE_SHIFT));	\
-	__xx = virt_to_page(kvirt);					\
-									\
-	__xx;                                                           \
-})
-
-#define pfn_to_nid(pfn)		pa_to_nid(((u64)(pfn) << PAGE_SHIFT))
-#define pfn_valid(pfn)							\
-	(((pfn) - node_start_pfn(pfn_to_nid(pfn))) <			\
-	 node_spanned_pages(pfn_to_nid(pfn)))					\
-
-#endif /* CONFIG_DISCONTIGMEM */
-
-#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 8d856c62e22a..e1757b7cfe3d 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -206,7 +206,6 @@ extern unsigned long __zero_page(void);
 #define page_to_pa(page)	(page_to_pfn(page) << PAGE_SHIFT)
 #define pte_pfn(pte)	(pte_val(pte) >> 32)
 
-#ifndef CONFIG_DISCONTIGMEM
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 #define mk_pte(page, pgprot)						\
 ({									\
@@ -215,7 +214,6 @@ extern unsigned long __zero_page(void);
 	pte_val(pte) = (page_to_pfn(page) << 32) | pgprot_val(pgprot);	\
 	pte;								\
 })
-#endif
 
 extern inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot)
 { pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpfn) << 32) | pgprot_val(pgprot); return pte; }
@@ -330,9 +328,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
 
-#ifndef CONFIG_DISCONTIGMEM
 #define kern_addr_valid(addr)	(1)
-#endif
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index 5a77a40567fa..7d393036aa8f 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -7,45 +7,6 @@
 #include <linux/numa.h>
 #include <asm/machvec.h>
 
-#ifdef CONFIG_NUMA
-static inline int cpu_to_node(int cpu)
-{
-	int node;
-	
-	if (!alpha_mv.cpuid_to_nid)
-		return 0;
-
-	node = alpha_mv.cpuid_to_nid(cpu);
-
-#ifdef DEBUG_NUMA
-	BUG_ON(node < 0);
-#endif
-
-	return node;
-}
-
-extern struct cpumask node_to_cpumask_map[];
-/* FIXME: This is dumb, recalculating every time.  But simple. */
-static const struct cpumask *cpumask_of_node(int node)
-{
-	int cpu;
-
-	if (node == NUMA_NO_NODE)
-		return cpu_all_mask;
-
-	cpumask_clear(&node_to_cpumask_map[node]);
-
-	for_each_online_cpu(cpu) {
-		if (cpu_to_node(cpu) == node)
-			cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
-	}
-
-	return &node_to_cpumask_map[node];
-}
-
-#define cpumask_of_pcibus(bus)	(cpu_online_mask)
-
-#endif /* !CONFIG_NUMA */
 # include <asm-generic/topology.h>
 
 #endif /* _ASM_ALPHA_TOPOLOGY_H */
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 4485b77f8658..1efca79ac83c 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -287,8 +287,7 @@ io7_init_hose(struct io7 *io7, int port)
 	/*
 	 * Set up window 0 for scatter-gather 8MB at 8MB.
 	 */
-	hose->sg_isa = iommu_arena_new_node(marvel_cpuid_to_nid(io7->pe),
-					    hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new_node(0, hose, 0x00800000, 0x00800000, 0);
 	hose->sg_isa->align_entry = 8;	/* cache line boundary */
 	csrs->POx_WBASE[0].csr = 
 		hose->sg_isa->dma_base | wbase_m_ena | wbase_m_sg;
@@ -305,8 +304,7 @@ io7_init_hose(struct io7 *io7, int port)
 	/*
 	 * Set up window 2 for scatter-gather (up-to) 1GB at 3GB.
 	 */
-	hose->sg_pci = iommu_arena_new_node(marvel_cpuid_to_nid(io7->pe),
-					    hose, 0xc0000000, 0x40000000, 0);
+	hose->sg_pci = iommu_arena_new_node(0, hose, 0xc0000000, 0x40000000, 0);
 	hose->sg_pci->align_entry = 8;	/* cache line boundary */
 	csrs->POx_WBASE[2].csr = 
 		hose->sg_pci->dma_base | wbase_m_ena | wbase_m_sg;
@@ -843,53 +841,8 @@ EXPORT_SYMBOL(marvel_ioportmap);
 EXPORT_SYMBOL(marvel_ioread8);
 EXPORT_SYMBOL(marvel_iowrite8);
 #endif
-
+
 /*
- * NUMA Support
- */
-/**********
- * FIXME - for now each cpu is a node by itself 
- *              -- no real support for striped mode 
- **********
- */
-int
-marvel_pa_to_nid(unsigned long pa)
-{
-	int cpuid;
-
-	if ((pa >> 43) & 1) 	/* I/O */ 
-		cpuid = (~(pa >> 35) & 0xff);
-	else			/* mem */
-		cpuid = ((pa >> 34) & 0x3) | ((pa >> (37 - 2)) & (0x1f << 2));
-
-	return marvel_cpuid_to_nid(cpuid);
-}
-
-int
-marvel_cpuid_to_nid(int cpuid)
-{
-	return cpuid;
-}
-
-unsigned long
-marvel_node_mem_start(int nid)
-{
-	unsigned long pa;
-
-	pa = (nid & 0x3) | ((nid & (0x1f << 2)) << 1);
-	pa <<= 34;
-
-	return pa;
-}
-
-unsigned long
-marvel_node_mem_size(int nid)
-{
-	return 16UL * 1024 * 1024 * 1024; /* 16GB */
-}
-
-
-/* 
  * AGP GART Support.
  */
 #include <linux/agp_backend.h>
diff --git a/arch/alpha/kernel/core_wildfire.c b/arch/alpha/kernel/core_wildfire.c
index e8d3b033018d..3a804b67f9da 100644
--- a/arch/alpha/kernel/core_wildfire.c
+++ b/arch/alpha/kernel/core_wildfire.c
@@ -434,39 +434,12 @@ wildfire_write_config(struct pci_bus *bus, unsigned int devfn, int where,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-struct pci_ops wildfire_pci_ops = 
+struct pci_ops wildfire_pci_ops =
 {
 	.read =		wildfire_read_config,
 	.write =	wildfire_write_config,
 };
 
-
-/*
- * NUMA Support
- */
-int wildfire_pa_to_nid(unsigned long pa)
-{
-	return pa >> 36;
-}
-
-int wildfire_cpuid_to_nid(int cpuid)
-{
-	/* assume 4 CPUs per node */
-	return cpuid >> 2;
-}
-
-unsigned long wildfire_node_mem_start(int nid)
-{
-	/* 64GB per node */
-	return (unsigned long)nid * (64UL * 1024 * 1024 * 1024);
-}
-
-unsigned long wildfire_node_mem_size(int nid)
-{
-	/* 64GB per node */
-	return 64UL * 1024 * 1024 * 1024;
-}
-
 #if DEBUG_DUMP_REGS
 
 static void __init
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index d84b19aa8e9d..35d7b3096d6e 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -71,33 +71,6 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 	if (align < mem_size)
 		align = mem_size;
 
-
-#ifdef CONFIG_DISCONTIGMEM
-
-	arena = memblock_alloc_node(sizeof(*arena), align, nid);
-	if (!NODE_DATA(nid) || !arena) {
-		printk("%s: couldn't allocate arena from node %d\n"
-		       "    falling back to system-wide allocation\n",
-		       __func__, nid);
-		arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
-		if (!arena)
-			panic("%s: Failed to allocate %zu bytes\n", __func__,
-			      sizeof(*arena));
-	}
-
-	arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid);
-	if (!NODE_DATA(nid) || !arena->ptes) {
-		printk("%s: couldn't allocate arena ptes from node %d\n"
-		       "    falling back to system-wide allocation\n",
-		       __func__, nid);
-		arena->ptes = memblock_alloc(mem_size, align);
-		if (!arena->ptes)
-			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-			      __func__, mem_size, align);
-	}
-
-#else /* CONFIG_DISCONTIGMEM */
-
 	arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
 	if (!arena)
 		panic("%s: Failed to allocate %zu bytes\n", __func__,
@@ -107,8 +80,6 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 		      __func__, mem_size, align);
 
-#endif /* CONFIG_DISCONTIGMEM */
-
 	spin_lock_init(&arena->lock);
 	arena->hose = hose;
 	arena->dma_base = base;
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
index 701a05090141..5816a31c1b38 100644
--- a/arch/alpha/kernel/proto.h
+++ b/arch/alpha/kernel/proto.h
@@ -49,10 +49,6 @@ extern void marvel_init_arch(void);
 extern void marvel_kill_arch(int);
 extern void marvel_machine_check(unsigned long, unsigned long);
 extern void marvel_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
-extern int marvel_pa_to_nid(unsigned long);
-extern int marvel_cpuid_to_nid(int);
-extern unsigned long marvel_node_mem_start(int);
-extern unsigned long marvel_node_mem_size(int);
 extern struct _alpha_agp_info *marvel_agp_info(void);
 struct io7 *marvel_find_io7(int pe);
 struct io7 *marvel_next_io7(struct io7 *prev);
@@ -101,10 +97,6 @@ extern void wildfire_init_arch(void);
 extern void wildfire_kill_arch(int);
 extern void wildfire_machine_check(unsigned long vector, unsigned long la_ptr);
 extern void wildfire_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
-extern int wildfire_pa_to_nid(unsigned long);
-extern int wildfire_cpuid_to_nid(int);
-extern unsigned long wildfire_node_mem_start(int);
-extern unsigned long wildfire_node_mem_size(int);
 
 /* console.c */
 #ifdef CONFIG_VGA_HOSE
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 03dda3beb3bd..5f6858e9dc28 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -79,11 +79,6 @@ int alpha_l3_cacheshape;
 unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON;
 #endif
 
-#ifdef CONFIG_NUMA
-struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_to_cpumask_map);
-#endif
-
 /* Which processor we booted from.  */
 int boot_cpuid;
 
@@ -305,7 +300,6 @@ move_initrd(unsigned long mem_limit)
 }
 #endif
 
-#ifndef CONFIG_DISCONTIGMEM
 static void __init
 setup_memory(void *kernel_end)
 {
@@ -389,9 +383,6 @@ setup_memory(void *kernel_end)
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 }
-#else
-extern void setup_memory(void *);
-#endif /* !CONFIG_DISCONTIGMEM */
 
 int __init
 page_is_ram(unsigned long pfn)
@@ -618,13 +609,6 @@ setup_arch(char **cmdline_p)
 	       "VERBOSE_MCHECK "
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-	       "DISCONTIGMEM "
-#ifdef CONFIG_NUMA
-	       "NUMA "
-#endif
-#endif
-
 #ifdef CONFIG_DEBUG_SPINLOCK
 	       "DEBUG_SPINLOCK "
 #endif
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 83d6c53d6d4d..1f99b03effc2 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -461,10 +461,5 @@ struct alpha_machine_vector marvel_ev7_mv __initmv = {
 	.kill_arch		= marvel_kill_arch,
 	.pci_map_irq		= marvel_map_irq,
 	.pci_swizzle		= common_swizzle,
-
-	.pa_to_nid		= marvel_pa_to_nid,
-	.cpuid_to_nid		= marvel_cpuid_to_nid,
-	.node_mem_start		= marvel_node_mem_start,
-	.node_mem_size		= marvel_node_mem_size,
 };
 ALIAS_MV(marvel_ev7)
diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c
index 2c54d707142a..3cee05443f07 100644
--- a/arch/alpha/kernel/sys_wildfire.c
+++ b/arch/alpha/kernel/sys_wildfire.c
@@ -337,10 +337,5 @@ struct alpha_machine_vector wildfire_mv __initmv = {
 	.kill_arch		= wildfire_kill_arch,
 	.pci_map_irq		= wildfire_map_irq,
 	.pci_swizzle		= common_swizzle,
-
-	.pa_to_nid		= wildfire_pa_to_nid,
-	.cpuid_to_nid		= wildfire_cpuid_to_nid,
-	.node_mem_start		= wildfire_node_mem_start,
-	.node_mem_size		= wildfire_node_mem_size,
 };
 ALIAS_MV(wildfire)
diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
index 08ac6612edad..bd770302eb82 100644
--- a/arch/alpha/mm/Makefile
+++ b/arch/alpha/mm/Makefile
@@ -6,5 +6,3 @@
 ccflags-y := -Werror
 
 obj-y	:= init.o fault.o
-
-obj-$(CONFIG_DISCONTIGMEM) += numa.o
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index a97650a618f1..f6114d03357c 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -235,8 +235,6 @@ callback_init(void * kernel_end)
 	return kernel_end;
 }
 
-
-#ifndef CONFIG_DISCONTIGMEM
 /*
  * paging_init() sets up the memory map.
  */
@@ -257,7 +255,6 @@ void __init paging_init(void)
 	/* Initialize the kernel's ZERO_PGE. */
 	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
 }
-#endif /* CONFIG_DISCONTIGMEM */
 
 #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
 void
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
deleted file mode 100644
index 0636e254a22f..000000000000
--- a/arch/alpha/mm/numa.c
+++ /dev/null
@@ -1,223 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  linux/arch/alpha/mm/numa.c
- *
- *  DISCONTIGMEM NUMA alpha support.
- *
- *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/memblock.h>
-#include <linux/swap.h>
-#include <linux/initrd.h>
-#include <linux/pfn.h>
-#include <linux/module.h>
-
-#include <asm/hwrpb.h>
-#include <asm/sections.h>
-
-pg_data_t node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
-#undef DEBUG_DISCONTIG
-#ifdef DEBUG_DISCONTIG
-#define DBGDCONT(args...) printk(args)
-#else
-#define DBGDCONT(args...)
-#endif
-
-#define for_each_mem_cluster(memdesc, _cluster, i)		\
-	for ((_cluster) = (memdesc)->cluster, (i) = 0;		\
-	     (i) < (memdesc)->numclusters; (i)++, (_cluster)++)
-
-static void __init show_mem_layout(void)
-{
-	struct memclust_struct * cluster;
-	struct memdesc_struct * memdesc;
-	int i;
-
-	/* Find free clusters, and init and free the bootmem accordingly.  */
-	memdesc = (struct memdesc_struct *)
-	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
-
-	printk("Raw memory layout:\n");
-	for_each_mem_cluster(memdesc, cluster, i) {
-		printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
-		       i, cluster->usage, cluster->start_pfn,
-		       cluster->start_pfn + cluster->numpages);
-	}
-}
-
-static void __init
-setup_memory_node(int nid, void *kernel_end)
-{
-	extern unsigned long mem_size_limit;
-	struct memclust_struct * cluster;
-	struct memdesc_struct * memdesc;
-	unsigned long start_kernel_pfn, end_kernel_pfn;
-	unsigned long start, end;
-	unsigned long node_pfn_start, node_pfn_end;
-	unsigned long node_min_pfn, node_max_pfn;
-	int i;
-	int show_init = 0;
-
-	/* Find the bounds of current node */
-	node_pfn_start = (node_mem_start(nid)) >> PAGE_SHIFT;
-	node_pfn_end = node_pfn_start + (node_mem_size(nid) >> PAGE_SHIFT);
-	
-	/* Find free clusters, and init and free the bootmem accordingly.  */
-	memdesc = (struct memdesc_struct *)
-	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
-
-	/* find the bounds of this node (node_min_pfn/node_max_pfn) */
-	node_min_pfn = ~0UL;
-	node_max_pfn = 0UL;
-	for_each_mem_cluster(memdesc, cluster, i) {
-		/* Bit 0 is console/PALcode reserved.  Bit 1 is
-		   non-volatile memory -- we might want to mark
-		   this for later.  */
-		if (cluster->usage & 3)
-			continue;
-
-		start = cluster->start_pfn;
-		end = start + cluster->numpages;
-
-		if (start >= node_pfn_end || end <= node_pfn_start)
-			continue;
-
-		if (!show_init) {
-			show_init = 1;
-			printk("Initializing bootmem allocator on Node ID %d\n", nid);
-		}
-		printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
-		       i, cluster->usage, cluster->start_pfn,
-		       cluster->start_pfn + cluster->numpages);
-
-		if (start < node_pfn_start)
-			start = node_pfn_start;
-		if (end > node_pfn_end)
-			end = node_pfn_end;
-
-		if (start < node_min_pfn)
-			node_min_pfn = start;
-		if (end > node_max_pfn)
-			node_max_pfn = end;
-	}
-
-	if (mem_size_limit && node_max_pfn > mem_size_limit) {
-		static int msg_shown = 0;
-		if (!msg_shown) {
-			msg_shown = 1;
-			printk("setup: forcing memory size to %ldK (from %ldK).\n",
-			       mem_size_limit << (PAGE_SHIFT - 10),
-			       node_max_pfn    << (PAGE_SHIFT - 10));
-		}
-		node_max_pfn = mem_size_limit;
-	}
-
-	if (node_min_pfn >= node_max_pfn)
-		return;
-
-	/* Update global {min,max}_low_pfn from node information. */
-	if (node_min_pfn < min_low_pfn)
-		min_low_pfn = node_min_pfn;
-	if (node_max_pfn > max_low_pfn)
-		max_pfn = max_low_pfn = node_max_pfn;
-
-#if 0 /* we'll try this one again in a little while */
-	/* Cute trick to make sure our local node data is on local memory */
-	node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
-#endif
-	printk(" Detected node memory:   start %8lu, end %8lu\n",
-	       node_min_pfn, node_max_pfn);
-
-	DBGDCONT(" DISCONTIG: node_data[%d]   is at 0x%p\n", nid, NODE_DATA(nid));
-
-	/* Find the bounds of kernel memory.  */
-	start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
-	end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
-
-	if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
-		panic("kernel loaded out of ram");
-
-	memblock_add_node(PFN_PHYS(node_min_pfn),
-			  (node_max_pfn - node_min_pfn) << PAGE_SHIFT, nid);
-
-	/* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
-	   Note that we round this down, not up - node memory
-	   has much larger alignment than 8Mb, so it's safe. */
-	node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
-
-	NODE_DATA(nid)->node_start_pfn = node_min_pfn;
-	NODE_DATA(nid)->node_present_pages = node_max_pfn - node_min_pfn;
-
-	node_set_online(nid);
-}
-
-void __init
-setup_memory(void *kernel_end)
-{
-	unsigned long kernel_size;
-	int nid;
-
-	show_mem_layout();
-
-	nodes_clear(node_online_map);
-
-	min_low_pfn = ~0UL;
-	max_low_pfn = 0UL;
-	for (nid = 0; nid < MAX_NUMNODES; nid++)
-		setup_memory_node(nid, kernel_end);
-
-	kernel_size = virt_to_phys(kernel_end) - KERNEL_START_PHYS;
-	memblock_reserve(KERNEL_START_PHYS, kernel_size);
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	initrd_start = INITRD_START;
-	if (initrd_start) {
-		extern void *move_initrd(unsigned long);
-
-		initrd_end = initrd_start+INITRD_SIZE;
-		printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
-		       (void *) initrd_start, INITRD_SIZE);
-
-		if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) {
-			if (!move_initrd(PFN_PHYS(max_low_pfn)))
-				printk("initrd extends beyond end of memory "
-				       "(0x%08lx > 0x%p)\ndisabling initrd\n",
-				       initrd_end,
-				       phys_to_virt(PFN_PHYS(max_low_pfn)));
-		} else {
-			nid = kvaddr_to_nid(initrd_start);
-			memblock_reserve(virt_to_phys((void *)initrd_start),
-					 INITRD_SIZE);
-		}
-	}
-#endif /* CONFIG_BLK_DEV_INITRD */
-}
-
-void __init paging_init(void)
-{
-	unsigned long   max_zone_pfn[MAX_NR_ZONES] = {0, };
-	unsigned long	dma_local_pfn;
-
-	/*
-	 * The old global MAX_DMA_ADDRESS per-arch API doesn't fit
-	 * in the NUMA model, for now we convert it to a pfn and
-	 * we interpret this pfn as a local per-node information.
-	 * This issue isn't very important since none of these machines
-	 * have legacy ISA slots anyways.
-	 */
-	dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-	max_zone_pfn[ZONE_DMA] = dma_local_pfn;
-	max_zone_pfn[ZONE_NORMAL] = max_pfn;
-
-	free_area_init(max_zone_pfn);
-
-	/* Initialize the kernel's ZERO_PGE. */
-	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
-}

From e7793e53901b31a06db534679e77c0cdeab260a2 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:42:43 -0700
Subject: [PATCH 3250/3292] arc: update comment about HIGHMEM implementation

Arc does not use DISCONTIGMEM to implement high memory, update the comment
describing how high memory works to reflect this.

Link: https://lkml.kernel.org/r/20210608091316.3622-3-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/mm/init.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index e2ed355438c9..397a201adfe3 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -139,16 +139,13 @@ void __init setup_arch_memory(void)
 
 #ifdef CONFIG_HIGHMEM
 	/*
-	 * Populate a new node with highmem
-	 *
 	 * On ARC (w/o PAE) HIGHMEM addresses are actually smaller (0 based)
-	 * than addresses in normal ala low memory (0x8000_0000 based).
+	 * than addresses in normal aka low memory (0x8000_0000 based).
 	 * Even with PAE, the huge peripheral space hole would waste a lot of
-	 * mem with single mem_map[]. This warrants a mem_map per region design.
-	 * Thus HIGHMEM on ARC is imlemented with DISCONTIGMEM.
-	 *
-	 * DISCONTIGMEM in turns requires multiple nodes. node 0 above is
-	 * populated with normal memory zone while node 1 only has highmem
+	 * mem with single contiguous mem_map[].
+	 * Thus when HIGHMEM on ARC is enabled the memory map corresponding
+	 * to the hole is freed and ARC specific version of pfn_valid()
+	 * handles the hole in the memory map.
 	 */
 #ifdef CONFIG_DISCONTIGMEM
 	node_set_online(1);

From 8b793b442051550b6cc694213e276587e01bddcb Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:42:46 -0700
Subject: [PATCH 3251/3292] arc: remove support for DISCONTIGMEM

DISCONTIGMEM was replaced by FLATMEM with freeing of the unused memory map
in v5.11.

Remove the support for DISCONTIGMEM entirely.

Link: https://lkml.kernel.org/r/20210608091316.3622-4-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/Kconfig              | 13 ------------
 arch/arc/include/asm/mmzone.h | 40 -----------------------------------
 arch/arc/mm/init.c            |  8 -------
 3 files changed, 61 deletions(-)
 delete mode 100644 arch/arc/include/asm/mmzone.h

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 2d98501c0897..d8f51eb8963b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -62,10 +62,6 @@ config SCHED_OMIT_FRAME_POINTER
 config GENERIC_CSUM
 	def_bool y
 
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool n
-	depends on BROKEN
-
 config ARCH_FLATMEM_ENABLE
 	def_bool y
 
@@ -344,15 +340,6 @@ config ARC_HUGEPAGE_16M
 
 endchoice
 
-config NODES_SHIFT
-	int "Maximum NUMA Nodes (as a power of 2)"
-	default "0" if !DISCONTIGMEM
-	default "1" if DISCONTIGMEM
-	depends on NEED_MULTIPLE_NODES
-	help
-	  Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory
-	  zones.
-
 config ARC_COMPACT_IRQ_LEVELS
 	depends on ISA_ARCOMPACT
 	bool "Setup Timer IRQ as high Priority"
diff --git a/arch/arc/include/asm/mmzone.h b/arch/arc/include/asm/mmzone.h
deleted file mode 100644
index b86b9d1e54dc..000000000000
--- a/arch/arc/include/asm/mmzone.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef _ASM_ARC_MMZONE_H
-#define _ASM_ARC_MMZONE_H
-
-#ifdef CONFIG_DISCONTIGMEM
-
-extern struct pglist_data node_data[];
-#define NODE_DATA(nid) (&node_data[nid])
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
-	int is_end_low = 1;
-
-	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40))
-		is_end_low = pfn <= virt_to_pfn(0xFFFFFFFFUL);
-
-	/*
-	 * node 0: lowmem:             0x8000_0000   to 0xFFFF_FFFF
-	 * node 1: HIGHMEM w/o  PAE40: 0x0           to 0x7FFF_FFFF
-	 *         HIGHMEM with PAE40: 0x1_0000_0000 to ...
-	 */
-	if (pfn >= ARCH_PFN_OFFSET && is_end_low)
-		return 0;
-
-	return 1;
-}
-
-static inline int pfn_valid(unsigned long pfn)
-{
-	int nid = pfn_to_nid(pfn);
-
-	return (pfn <= node_end_pfn(nid));
-}
-#endif /* CONFIG_DISCONTIGMEM  */
-
-#endif
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 397a201adfe3..abfeef7bf6f8 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -32,11 +32,6 @@ unsigned long arch_pfn_offset;
 EXPORT_SYMBOL(arch_pfn_offset);
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-#endif
-
 long __init arc_get_mem_sz(void)
 {
 	return low_mem_sz;
@@ -147,9 +142,6 @@ void __init setup_arch_memory(void)
 	 * to the hole is freed and ARC specific version of pfn_valid()
 	 * handles the hole in the memory map.
 	 */
-#ifdef CONFIG_DISCONTIGMEM
-	node_set_online(1);
-#endif
 
 	min_high_pfn = PFN_DOWN(high_mem_start);
 	max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);

From 5ab06e10990c3a04e00318c5ca93048c0f53a0a7 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:42:49 -0700
Subject: [PATCH 3252/3292] m68k: remove support for DISCONTIGMEM

DISCONTIGMEM was replaced by FLATMEM with freeing of the unused memory map
in v5.11.

Remove the support for DISCONTIGMEM entirely.

Link: https://lkml.kernel.org/r/20210608091316.3622-5-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/Kconfig.cpu           | 10 ----------
 arch/m68k/include/asm/mmzone.h  | 10 ----------
 arch/m68k/include/asm/page.h    |  2 +-
 arch/m68k/include/asm/page_mm.h | 35 ---------------------------------
 arch/m68k/mm/init.c             | 20 -------------------
 5 files changed, 1 insertion(+), 76 deletions(-)
 delete mode 100644 arch/m68k/include/asm/mmzone.h

diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index f4d23977d2a5..29e946394fdb 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -408,10 +408,6 @@ config SINGLE_MEMORY_CHUNK
 	  order" to save memory that could be wasted for unused memory map.
 	  Say N if not sure.
 
-config ARCH_DISCONTIGMEM_ENABLE
-	depends on BROKEN
-	def_bool MMU && !SINGLE_MEMORY_CHUNK
-
 config FORCE_MAX_ZONEORDER
 	int "Maximum zone order" if ADVANCED
 	depends on !SINGLE_MEMORY_CHUNK
@@ -451,11 +447,6 @@ config M68K_L2_CACHE
 	depends on MAC
 	default y
 
-config NODES_SHIFT
-	int
-	default "3"
-	depends on DISCONTIGMEM
-
 config CPU_HAS_NO_BITFIELDS
 	bool
 
@@ -553,4 +544,3 @@ config CACHE_COPYBACK
 	  The ColdFire CPU cache is set into Copy-back mode.
 endchoice
 endif
-
diff --git a/arch/m68k/include/asm/mmzone.h b/arch/m68k/include/asm/mmzone.h
deleted file mode 100644
index 64573fe8e60d..000000000000
--- a/arch/m68k/include/asm/mmzone.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_M68K_MMZONE_H_
-#define _ASM_M68K_MMZONE_H_
-
-extern pg_data_t pg_data_map[];
-
-#define NODE_DATA(nid)		(&pg_data_map[nid])
-#define NODE_MEM_MAP(nid)	(NODE_DATA(nid)->node_mem_map)
-
-#endif /* _ASM_M68K_MMZONE_H_ */
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 97087dd3ca6d..2f1c54e4725d 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -62,7 +62,7 @@ extern unsigned long _ramend;
 #include <asm/page_no.h>
 #endif
 
-#if !defined(CONFIG_MMU) || defined(CONFIG_DISCONTIGMEM)
+#ifndef CONFIG_MMU
 #define __phys_to_pfn(paddr)	((unsigned long)((paddr) >> PAGE_SHIFT))
 #define __pfn_to_phys(pfn)	PFN_PHYS(pfn)
 #endif
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 2411ea9ef578..a5b459bcb7d8 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -126,26 +126,6 @@ static inline void *__va(unsigned long x)
 
 extern int m68k_virt_to_node_shift;
 
-#ifndef CONFIG_DISCONTIGMEM
-#define __virt_to_node(addr)	(&pg_data_map[0])
-#else
-extern struct pglist_data *pg_data_table[];
-
-static inline __attribute_const__ int __virt_to_node_shift(void)
-{
-	int shift;
-
-	asm (
-		"1:	moveq	#0,%0\n"
-		m68k_fixup(%c1, 1b)
-		: "=d" (shift)
-		: "i" (m68k_fixup_vnode_shift));
-	return shift;
-}
-
-#define __virt_to_node(addr)	(pg_data_table[(unsigned long)(addr) >> __virt_to_node_shift()])
-#endif
-
 #define virt_to_page(addr) ({						\
 	pfn_to_page(virt_to_pfn(addr));					\
 })
@@ -153,23 +133,8 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
 	pfn_to_virt(page_to_pfn(page));					\
 })
 
-#ifdef CONFIG_DISCONTIGMEM
-#define pfn_to_page(pfn) ({						\
-	unsigned long __pfn = (pfn);					\
-	struct pglist_data *pgdat;					\
-	pgdat = __virt_to_node((unsigned long)pfn_to_virt(__pfn));	\
-	pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn);		\
-})
-#define page_to_pfn(_page) ({						\
-	const struct page *__p = (_page);				\
-	struct pglist_data *pgdat;					\
-	pgdat = &pg_data_map[page_to_nid(__p)];				\
-	((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn;		\
-})
-#else
 #define ARCH_PFN_OFFSET (m68k_memory[0].addr >> PAGE_SHIFT)
 #include <asm-generic/memory_model.h>
-#endif
 
 #define virt_addr_valid(kaddr)	((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
 #define pfn_valid(pfn)		virt_addr_valid(pfn_to_virt(pfn))
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 1759ab875d47..5d749e188246 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -44,28 +44,8 @@ EXPORT_SYMBOL(empty_zero_page);
 
 int m68k_virt_to_node_shift;
 
-#ifdef CONFIG_DISCONTIGMEM
-pg_data_t pg_data_map[MAX_NUMNODES];
-EXPORT_SYMBOL(pg_data_map);
-
-pg_data_t *pg_data_table[65];
-EXPORT_SYMBOL(pg_data_table);
-#endif
-
 void __init m68k_setup_node(int node)
 {
-#ifdef CONFIG_DISCONTIGMEM
-	struct m68k_mem_info *info = m68k_memory + node;
-	int i, end;
-
-	i = (unsigned long)phys_to_virt(info->addr) >> __virt_to_node_shift();
-	end = (unsigned long)phys_to_virt(info->addr + info->size - 1) >> __virt_to_node_shift();
-	for (; i <= end; i++) {
-		if (pg_data_table[i])
-			pr_warn("overlap at %u for chunk %u\n", i, node);
-		pg_data_table[i] = pg_data_map + node;
-	}
-#endif
 	node_set_online(node);
 }
 

From bb1c50d3967f69f413b333713c2718d48d1ab7ea Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:42:52 -0700
Subject: [PATCH 3253/3292] mm: remove CONFIG_DISCONTIGMEM

There are no architectures that support DISCONTIGMEM left.

Remove the configuration option and the dead code it was guarding in the
generic memory management code.

Link: https://lkml.kernel.org/r/20210608091316.3622-6-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/memory_model.h | 37 ++++--------------------------
 include/linux/mmzone.h             |  8 ++++---
 mm/Kconfig                         | 25 +++-----------------
 mm/page_alloc.c                    | 13 -----------
 4 files changed, 12 insertions(+), 71 deletions(-)

diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 7637fb46ba4f..a2c8ed60233a 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -6,47 +6,18 @@
 
 #ifndef __ASSEMBLY__
 
+/*
+ * supports 3 memory models.
+ */
 #if defined(CONFIG_FLATMEM)
 
 #ifndef ARCH_PFN_OFFSET
 #define ARCH_PFN_OFFSET		(0UL)
 #endif
 
-#elif defined(CONFIG_DISCONTIGMEM)
-
-#ifndef arch_pfn_to_nid
-#define arch_pfn_to_nid(pfn)	pfn_to_nid(pfn)
-#endif
-
-#ifndef arch_local_page_offset
-#define arch_local_page_offset(pfn, nid)	\
-	((pfn) - NODE_DATA(nid)->node_start_pfn)
-#endif
-
-#endif /* CONFIG_DISCONTIGMEM */
-
-/*
- * supports 3 memory models.
- */
-#if defined(CONFIG_FLATMEM)
-
 #define __pfn_to_page(pfn)	(mem_map + ((pfn) - ARCH_PFN_OFFSET))
 #define __page_to_pfn(page)	((unsigned long)((page) - mem_map) + \
 				 ARCH_PFN_OFFSET)
-#elif defined(CONFIG_DISCONTIGMEM)
-
-#define __pfn_to_page(pfn)			\
-({	unsigned long __pfn = (pfn);		\
-	unsigned long __nid = arch_pfn_to_nid(__pfn);  \
-	NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\
-})
-
-#define __page_to_pfn(pg)						\
-({	const struct page *__pg = (pg);					\
-	struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg));	\
-	(unsigned long)(__pg - __pgdat->node_mem_map) +			\
-	 __pgdat->node_start_pfn;					\
-})
 
 #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
 
@@ -70,7 +41,7 @@
 	struct mem_section *__sec = __pfn_to_section(__pfn);	\
 	__section_mem_map_addr(__sec) + __pfn;		\
 })
-#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
+#endif /* CONFIG_FLATMEM/SPARSEMEM */
 
 /*
  * Convert a physical address to a Page Frame Number and back
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e62e8ef68b5..6f9829562af2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -749,10 +749,12 @@ struct zonelist {
 	struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
 };
 
-#ifndef CONFIG_DISCONTIGMEM
-/* The array of struct pages - for discontigmem use pgdat->lmem_map */
+/*
+ * The array of struct pages for flatmem.
+ * It must be declared for SPARSEMEM as well because there are configurations
+ * that rely on that.
+ */
 extern struct page *mem_map;
-#endif
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 struct deferred_split {
diff --git a/mm/Kconfig b/mm/Kconfig
index 02d44e3420f5..218b96ccc84a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -19,7 +19,7 @@ choice
 
 config FLATMEM_MANUAL
 	bool "Flat Memory"
-	depends on !(ARCH_DISCONTIGMEM_ENABLE || ARCH_SPARSEMEM_ENABLE) || ARCH_FLATMEM_ENABLE
+	depends on !ARCH_SPARSEMEM_ENABLE || ARCH_FLATMEM_ENABLE
 	help
 	  This option is best suited for non-NUMA systems with
 	  flat address space. The FLATMEM is the most efficient
@@ -32,21 +32,6 @@ config FLATMEM_MANUAL
 
 	  If unsure, choose this option (Flat Memory) over any other.
 
-config DISCONTIGMEM_MANUAL
-	bool "Discontiguous Memory"
-	depends on ARCH_DISCONTIGMEM_ENABLE
-	help
-	  This option provides enhanced support for discontiguous
-	  memory systems, over FLATMEM.  These systems have holes
-	  in their physical address spaces, and this option provides
-	  more efficient handling of these holes.
-
-	  Although "Discontiguous Memory" is still used by several
-	  architectures, it is considered deprecated in favor of
-	  "Sparse Memory".
-
-	  If unsure, choose "Sparse Memory" over this option.
-
 config SPARSEMEM_MANUAL
 	bool "Sparse Memory"
 	depends on ARCH_SPARSEMEM_ENABLE
@@ -62,17 +47,13 @@ config SPARSEMEM_MANUAL
 
 endchoice
 
-config DISCONTIGMEM
-	def_bool y
-	depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL
-
 config SPARSEMEM
 	def_bool y
 	depends on (!SELECT_MEMORY_MODEL && ARCH_SPARSEMEM_ENABLE) || SPARSEMEM_MANUAL
 
 config FLATMEM
 	def_bool y
-	depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
+	depends on !SPARSEMEM || FLATMEM_MANUAL
 
 config FLAT_NODE_MEM_MAP
 	def_bool y
@@ -85,7 +66,7 @@ config FLAT_NODE_MEM_MAP
 #
 config NEED_MULTIPLE_NODES
 	def_bool y
-	depends on DISCONTIGMEM || NUMA
+	depends on NUMA
 
 #
 # SPARSEMEM_EXTREME (which is the default) does some bootmem
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 58f7a321598f..8926f3fd3bcf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -349,20 +349,7 @@ compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
 
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
-#ifdef CONFIG_DISCONTIGMEM
-/*
- * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
- * are not on separate NUMA nodes. Functionally this works but with
- * watermark_boost_factor, it can reclaim prematurely as the ranges can be
- * quite small. By default, do not boost watermarks on discontigmem as in
- * many cases very high-order allocations like THP are likely to be
- * unsupported and the premature reclaim offsets the advantage of long-term
- * fragmentation avoidance.
- */
-int watermark_boost_factor __read_mostly;
-#else
 int watermark_boost_factor __read_mostly = 15000;
-#endif
 int watermark_scale_factor = 10;
 
 static unsigned long nr_kernel_pages __initdata;

From d3c251ab95b69f3dc189c4657baeac1b4c050789 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:42:55 -0700
Subject: [PATCH 3254/3292] arch, mm: remove stale mentions of DISCONIGMEM

There are several places that mention DISCONIGMEM in comments or have
stale code guarded by CONFIG_DISCONTIGMEM.

Remove the dead code and update the comments.

Link: https://lkml.kernel.org/r/20210608091316.3622-7-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/topology.c     | 5 ++---
 arch/ia64/mm/numa.c             | 5 ++---
 arch/mips/include/asm/mmzone.h  | 6 ------
 arch/mips/mm/init.c             | 3 ---
 arch/nds32/include/asm/memory.h | 6 ------
 arch/xtensa/include/asm/page.h  | 4 ----
 include/linux/gfp.h             | 4 ++--
 7 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 09fc385c2acd..3639e0a7cb3b 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -3,9 +3,8 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * This file contains NUMA specific variables and functions which can
- * be split away from DISCONTIGMEM and are used on NUMA machines with
- * contiguous memory.
+ * This file contains NUMA specific variables and functions which are used on
+ * NUMA machines with contiguous memory.
  * 		2002/08/07 Erich Focht <efocht@ess.nec.de>
  * Populate cpu entries in sysfs for non-numa systems as well
  *  	Intel Corporation - Ashok Raj
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 46b6e5f3a40f..d6579ec3ea32 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -3,9 +3,8 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * This file contains NUMA specific variables and functions which can
- * be split away from DISCONTIGMEM and are used on NUMA machines with
- * contiguous memory.
+ * This file contains NUMA specific variables and functions which are used on
+ * NUMA machines with contiguous memory.
  * 
  *                         2002/08/07 Erich Focht <efocht@ess.nec.de>
  */
diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h
index b826b8473e95..7649ab45e80c 100644
--- a/arch/mips/include/asm/mmzone.h
+++ b/arch/mips/include/asm/mmzone.h
@@ -20,10 +20,4 @@
 #define nid_to_addrbase(nid) 0
 #endif
 
-#ifdef CONFIG_DISCONTIGMEM
-
-#define pfn_to_nid(pfn)		pa_to_nid((pfn) << PAGE_SHIFT)
-
-#endif /* CONFIG_DISCONTIGMEM */
-
 #endif /* _ASM_MMZONE_H_ */
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c36358758969..97f6ca341448 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -454,9 +454,6 @@ void __init mem_init(void)
 	BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT));
 
 #ifdef CONFIG_HIGHMEM
-#ifdef CONFIG_DISCONTIGMEM
-#error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet"
-#endif
 	max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;
 #else
 	max_mapnr = max_low_pfn;
diff --git a/arch/nds32/include/asm/memory.h b/arch/nds32/include/asm/memory.h
index 940d32842793..62faafbc28e4 100644
--- a/arch/nds32/include/asm/memory.h
+++ b/arch/nds32/include/asm/memory.h
@@ -76,18 +76,12 @@
  *  virt_to_page(k)	convert a _valid_ virtual address to struct page *
  *  virt_addr_valid(k)	indicates whether a virtual address is valid
  */
-#ifndef CONFIG_DISCONTIGMEM
-
 #define ARCH_PFN_OFFSET		PHYS_PFN_OFFSET
 #define pfn_valid(pfn)		((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
 
 #define virt_to_page(kaddr)	(pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
 #define virt_addr_valid(kaddr)	((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
 
-#else /* CONFIG_DISCONTIGMEM */
-#error CONFIG_DISCONTIGMEM is not supported yet.
-#endif /* !CONFIG_DISCONTIGMEM */
-
 #define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
 
 #endif
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 37ce25ef92d6..493eb7083b1a 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -192,10 +192,6 @@ static inline unsigned long ___pa(unsigned long va)
 #define pfn_valid(pfn) \
 	((pfn) >= ARCH_PFN_OFFSET && ((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
 
-#ifdef CONFIG_DISCONTIGMEM
-# error CONFIG_DISCONTIGMEM not supported
-#endif
-
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
 #define page_to_virt(page)	__va(page_to_pfn(page) << PAGE_SHIFT)
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 94f0b8b1cb55..0bec15b0691f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -494,8 +494,8 @@ static inline int gfp_zonelist(gfp_t flags)
  * There are two zonelists per node, one for all zones with memory and
  * one containing just zones from the node the zonelist belongs to.
  *
- * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
- * optimized to &contig_page_data at compile-time.
+ * For the case of non-NUMA systems the NODE_DATA() gets optimized to
+ * &contig_page_data at compile-time.
  */
 static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
 {

From 48d9f3355a8eaa79b00472929b517df497fc6d5f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:42:58 -0700
Subject: [PATCH 3255/3292] docs: remove description of DISCONTIGMEM

Remove description of DISCONTIGMEM from the "Memory Models" document and
update VM sysctl description so that it won't mention DISCONIGMEM.

Link: https://lkml.kernel.org/r/20210608091316.3622-8-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/sysctl/vm.rst | 12 +++----
 Documentation/vm/memory-model.rst       | 45 ++-----------------------
 2 files changed, 8 insertions(+), 49 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 2da25735a629..8387ad0b0b83 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -938,12 +938,12 @@ allocations, THP and hugetlbfs pages.
 
 To make it sensible with respect to the watermark_scale_factor
 parameter, the unit is in fractions of 10,000. The default value of
-15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
-watermark will be reclaimed in the event of a pageblock being mixed due
-to fragmentation. The level of reclaim is determined by the number of
-fragmentation events that occurred in the recent past. If this value is
-smaller than a pageblock then a pageblocks worth of pages will be reclaimed
-(e.g.  2MB on 64-bit x86). A boost factor of 0 will disable the feature.
+15,000 means that up to 150% of the high watermark will be reclaimed in the
+event of a pageblock being mixed due to fragmentation. The level of reclaim
+is determined by the number of fragmentation events that occurred in the
+recent past. If this value is smaller than a pageblock then a pageblocks
+worth of pages will be reclaimed (e.g.  2MB on 64-bit x86). A boost factor
+of 0 will disable the feature.
 
 
 watermark_scale_factor
diff --git a/Documentation/vm/memory-model.rst b/Documentation/vm/memory-model.rst
index ce398a7dc6cd..30e8fbed6914 100644
--- a/Documentation/vm/memory-model.rst
+++ b/Documentation/vm/memory-model.rst
@@ -14,15 +14,11 @@ for the CPU. Then there could be several contiguous ranges at
 completely distinct addresses. And, don't forget about NUMA, where
 different memory banks are attached to different CPUs.
 
-Linux abstracts this diversity using one of the three memory models:
-FLATMEM, DISCONTIGMEM and SPARSEMEM. Each architecture defines what
+Linux abstracts this diversity using one of the two memory models:
+FLATMEM and SPARSEMEM. Each architecture defines what
 memory models it supports, what the default memory model is and
 whether it is possible to manually override that default.
 
-.. note::
-   At time of this writing, DISCONTIGMEM is considered deprecated,
-   although it is still in use by several architectures.
-
 All the memory models track the status of physical page frames using
 struct page arranged in one or more arrays.
 
@@ -63,43 +59,6 @@ straightforward: `PFN - ARCH_PFN_OFFSET` is an index to the
 The `ARCH_PFN_OFFSET` defines the first page frame number for
 systems with physical memory starting at address different from 0.
 
-DISCONTIGMEM
-============
-
-The DISCONTIGMEM model treats the physical memory as a collection of
-`nodes` similarly to how Linux NUMA support does. For each node Linux
-constructs an independent memory management subsystem represented by
-`struct pglist_data` (or `pg_data_t` for short). Among other
-things, `pg_data_t` holds the `node_mem_map` array that maps
-physical pages belonging to that node. The `node_start_pfn` field of
-`pg_data_t` is the number of the first page frame belonging to that
-node.
-
-The architecture setup code should call :c:func:`free_area_init_node` for
-each node in the system to initialize the `pg_data_t` object and its
-`node_mem_map`.
-
-Every `node_mem_map` behaves exactly as FLATMEM's `mem_map` -
-every physical page frame in a node has a `struct page` entry in the
-`node_mem_map` array. When DISCONTIGMEM is enabled, a portion of the
-`flags` field of the `struct page` encodes the node number of the
-node hosting that page.
-
-The conversion between a PFN and the `struct page` in the
-DISCONTIGMEM model became slightly more complex as it has to determine
-which node hosts the physical page and which `pg_data_t` object
-holds the `struct page`.
-
-Architectures that support DISCONTIGMEM provide :c:func:`pfn_to_nid`
-to convert PFN to the node number. The opposite conversion helper
-:c:func:`page_to_nid` is generic as it uses the node number encoded in
-page->flags.
-
-Once the node number is known, the PFN can be used to index
-appropriate `node_mem_map` array to access the `struct page` and
-the offset of the `struct page` from the `node_mem_map` plus
-`node_start_pfn` is the PFN of that page.
-
 SPARSEMEM
 =========
 

From a9ee6cf5c60ed1070e786e53665f9b2f23f2bd11 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:43:01 -0700
Subject: [PATCH 3256/3292] mm: replace CONFIG_NEED_MULTIPLE_NODES with
 CONFIG_NUMA

After removal of DISCINTIGMEM the NEED_MULTIPLE_NODES and NUMA
configuration options are equivalent.

Drop CONFIG_NEED_MULTIPLE_NODES and use CONFIG_NUMA instead.

Done with

	$ sed -i 's/CONFIG_NEED_MULTIPLE_NODES/CONFIG_NUMA/' \
		$(git grep -wl CONFIG_NEED_MULTIPLE_NODES)
	$ sed -i 's/NEED_MULTIPLE_NODES/NUMA/' \
		$(git grep -wl NEED_MULTIPLE_NODES)

with manual tweaks afterwards.

[rppt@linux.ibm.com: fix arm boot crash]
  Link: https://lkml.kernel.org/r/YMj9vHhHOiCVN4BF@linux.ibm.com

Link: https://lkml.kernel.org/r/20210608091316.3622-9-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/Kconfig                |  2 +-
 arch/ia64/Kconfig                 |  2 +-
 arch/mips/Kconfig                 |  2 +-
 arch/mips/include/asm/mmzone.h    |  2 +-
 arch/mips/include/asm/page.h      |  2 +-
 arch/mips/mm/init.c               |  4 ++--
 arch/powerpc/Kconfig              |  2 +-
 arch/powerpc/include/asm/mmzone.h |  4 ++--
 arch/powerpc/kernel/setup_64.c    |  2 +-
 arch/powerpc/kernel/smp.c         |  2 +-
 arch/powerpc/kexec/core.c         |  4 ++--
 arch/powerpc/mm/Makefile          |  2 +-
 arch/powerpc/mm/mem.c             |  4 ++--
 arch/riscv/Kconfig                |  2 +-
 arch/s390/Kconfig                 |  2 +-
 arch/sh/include/asm/mmzone.h      |  4 ++--
 arch/sh/kernel/topology.c         |  2 +-
 arch/sh/mm/Kconfig                |  2 +-
 arch/sh/mm/init.c                 |  2 +-
 arch/sparc/Kconfig                |  2 +-
 arch/sparc/include/asm/mmzone.h   |  4 ++--
 arch/sparc/kernel/smp_64.c        |  2 +-
 arch/sparc/mm/init_64.c           | 12 ++++++------
 arch/x86/Kconfig                  |  2 +-
 arch/x86/kernel/setup_percpu.c    |  6 +++---
 arch/x86/mm/init_32.c             |  4 ++--
 include/asm-generic/topology.h    |  2 +-
 include/linux/memblock.h          |  6 +++---
 include/linux/mm.h                |  4 ++--
 include/linux/mmzone.h            |  6 +++---
 kernel/crash_core.c               |  2 +-
 mm/Kconfig                        |  9 ---------
 mm/memblock.c                     |  8 ++++----
 mm/memory.c                       |  3 +--
 mm/page_alloc.c                   |  6 +++---
 mm/sparse.c                       |  2 +-
 36 files changed, 59 insertions(+), 69 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9f1d8566bbf9..d01a1545ab8f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1035,7 +1035,7 @@ config NODES_SHIFT
 	int "Maximum NUMA Nodes (as a power of 2)"
 	range 1 10
 	default "4"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 	help
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 279252e3e0f7..da22a35e6f03 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -302,7 +302,7 @@ config NODES_SHIFT
 	int "Max num nodes shift(3-10)"
 	range 3 10
 	default "10"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 	help
 	  This option specifies the maximum number of nodes in your SSI system.
 	  MAX_NUMNODES will be 2^(This value).
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ed51970c08e7..4704a16c2e44 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2867,7 +2867,7 @@ config RANDOMIZE_BASE_MAX_OFFSET
 config NODES_SHIFT
 	int
 	default "6"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 
 config HW_PERF_EVENTS
 	bool "Enable hardware performance counter support for perf events"
diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h
index 7649ab45e80c..602a21aee9d4 100644
--- a/arch/mips/include/asm/mmzone.h
+++ b/arch/mips/include/asm/mmzone.h
@@ -8,7 +8,7 @@
 
 #include <asm/page.h>
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 # include <mmzone.h>
 #endif
 
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 195ff4e9771f..96bc798c1ec1 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -239,7 +239,7 @@ static inline int pfn_valid(unsigned long pfn)
 
 /* pfn_valid is defined in linux/mmzone.h */
 
-#elif defined(CONFIG_NEED_MULTIPLE_NODES)
+#elif defined(CONFIG_NUMA)
 
 #define pfn_valid(pfn)							\
 ({									\
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 97f6ca341448..19347dc6bbf8 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -394,7 +394,7 @@ void maar_init(void)
 	}
 }
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -473,7 +473,7 @@ void __init mem_init(void)
 				0x80000000 - 4, KCORE_TEXT);
 #endif
 }
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+#endif /* !CONFIG_NUMA */
 
 void free_init_pages(const char *what, unsigned long begin, unsigned long end)
 {
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe4..14b132cf95e2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -671,7 +671,7 @@ config NODES_SHIFT
 	int
 	default "8" if PPC64
 	default "4"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 
 config USE_PERCPU_NUMA_NODE_ID
 	def_bool y
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 6cda76b57c5d..4c6c6dbd182f 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -18,7 +18,7 @@
  *    flags field of the struct page
  */
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 
 extern struct pglist_data *node_data[];
 /*
@@ -41,7 +41,7 @@ u64 memory_hotplug_max(void);
 
 #else
 #define memory_hotplug_max() memblock_end_of_DRAM()
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
 #ifdef CONFIG_FA_DUMP
 #define __HAVE_ARCH_RESERVED_KERNEL_PAGES
 #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e42b85e4f1aa..a35fbf4d0bce 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -788,7 +788,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 					size_t align)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	int node = early_cpu_to_node(cpu);
 	void *ptr;
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 2e05c783440a..a5209ea3859e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1047,7 +1047,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 			zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
 						GFP_KERNEL, cpu_to_node(cpu));
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 		/*
 		 * numa_node_id() works after this.
 		 */
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 56da5eb2b923..48525e8b5730 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -68,11 +68,11 @@ void machine_kexec_cleanup(struct kimage *image)
 void arch_crash_save_vmcoreinfo(void)
 {
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	VMCOREINFO_SYMBOL(node_data);
 	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
 #endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 	VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index c3df3a8501d4..2ffcf540f08b 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -13,7 +13,7 @@ obj-y				:= fault.o mem.o pgtable.o mmap.o maccess.o \
 obj-$(CONFIG_PPC_MMU_NOHASH)	+= nohash/
 obj-$(CONFIG_PPC_BOOK3S_32)	+= book3s32/
 obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64/
-obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_NUMA) += numa.o
 obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index a6b36a40897a..c5e520c6f13b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -127,7 +127,7 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 }
 #endif
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 void __init mem_topology_setup(void)
 {
 	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -162,7 +162,7 @@ static int __init mark_nonram_nosave(void)
 
 	return 0;
 }
-#else /* CONFIG_NEED_MULTIPLE_NODES */
+#else /* CONFIG_NUMA */
 static int __init mark_nonram_nosave(void)
 {
 	return 0;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 18ec0f9bb8d5..15f9490a7aad 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -332,7 +332,7 @@ config NODES_SHIFT
 	int "Maximum NUMA Nodes (as a power of 2)"
 	range 1 10
 	default "2"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 	help
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b4c7c34069f8..707afbcd81c2 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -475,7 +475,7 @@ config NUMA
 
 config NODES_SHIFT
 	int
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 	default "1"
 
 config SCHED_SMT
diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h
index 6552a088dc97..7b8dead2723d 100644
--- a/arch/sh/include/asm/mmzone.h
+++ b/arch/sh/include/asm/mmzone.h
@@ -2,7 +2,7 @@
 #ifndef __ASM_SH_MMZONE_H
 #define __ASM_SH_MMZONE_H
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 #include <linux/numa.h>
 
 extern struct pglist_data *node_data[];
@@ -31,7 +31,7 @@ static inline void
 setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 {
 }
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
 
 /* Platform specific mem init */
 void __init plat_mem_setup(void);
diff --git a/arch/sh/kernel/topology.c b/arch/sh/kernel/topology.c
index 7a989eed3b18..76af6db9daa2 100644
--- a/arch/sh/kernel/topology.c
+++ b/arch/sh/kernel/topology.c
@@ -46,7 +46,7 @@ static int __init topology_init(void)
 {
 	int i, ret;
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	for_each_online_node(i)
 		register_one_node(i);
 #endif
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index d551a9cac41e..ba569cfb4368 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -120,7 +120,7 @@ config NODES_SHIFT
 	int
 	default "3" if CPU_SUBTYPE_SHX3
 	default "1"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 168d7d4dd735..ce26c7f8950a 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -211,7 +211,7 @@ void __init allocate_pgdat(unsigned int nid)
 
 	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	NODE_DATA(nid) = memblock_alloc_try_nid(
 				sizeof(struct pglist_data),
 				SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 164a5254c91c..c72f52c704cd 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -265,7 +265,7 @@ config NODES_SHIFT
 	int "Maximum NUMA Nodes (as a power of 2)"
 	range 4 5 if SPARC64
 	default "5"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 	help
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
diff --git a/arch/sparc/include/asm/mmzone.h b/arch/sparc/include/asm/mmzone.h
index 6543fb97a849..a236d8aa893a 100644
--- a/arch/sparc/include/asm/mmzone.h
+++ b/arch/sparc/include/asm/mmzone.h
@@ -2,7 +2,7 @@
 #ifndef _SPARC64_MMZONE_H
 #define _SPARC64_MMZONE_H
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 
 #include <linux/cpumask.h>
 
@@ -13,6 +13,6 @@ extern struct pglist_data *node_data[];
 extern int numa_cpu_lookup_table[];
 extern cpumask_t numa_cpumask_lookup_table[];
 
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
 
 #endif /* _SPARC64_MMZONE_H */
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index e38d8bf454e8..c89a5971fb0d 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1546,7 +1546,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 					size_t align)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	int node = cpu_to_node(cpu);
 	void *ptr;
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index e454f179cf5d..06e938d03f3b 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -903,7 +903,7 @@ struct node_mem_mask {
 static struct node_mem_mask node_masks[MAX_NUMNODES];
 static int num_node_masks;
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 
 struct mdesc_mlgroup {
 	u64	node;
@@ -1059,7 +1059,7 @@ static void __init allocate_node_data(int nid)
 {
 	struct pglist_data *p;
 	unsigned long start_pfn, end_pfn;
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 
 	NODE_DATA(nid) = memblock_alloc_node(sizeof(struct pglist_data),
 					     SMP_CACHE_BYTES, nid);
@@ -1080,7 +1080,7 @@ static void __init allocate_node_data(int nid)
 
 static void init_node_masks_nonnuma(void)
 {
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	int i;
 #endif
 
@@ -1090,7 +1090,7 @@ static void init_node_masks_nonnuma(void)
 	node_masks[0].match = 0;
 	num_node_masks = 1;
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	for (i = 0; i < NR_CPUS; i++)
 		numa_cpu_lookup_table[i] = 0;
 
@@ -1098,7 +1098,7 @@ static void init_node_masks_nonnuma(void)
 #endif
 }
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 struct pglist_data *node_data[MAX_NUMNODES];
 
 EXPORT_SYMBOL(numa_cpu_lookup_table);
@@ -2487,7 +2487,7 @@ int page_in_phys_avail(unsigned long paddr)
 
 static void __init register_page_bootmem_info(void)
 {
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	int i;
 
 	for_each_online_node(i)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0045e1b44190..5d523ff70fe7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1597,7 +1597,7 @@ config NODES_SHIFT
 	default "10" if MAXSMP
 	default "6" if X86_64
 	default "3"
-	depends on NEED_MULTIPLE_NODES
+	depends on NUMA
 	help
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0941d2f44f2a..78a32b956e81 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -66,7 +66,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
  */
 static bool __init pcpu_need_numa(void)
 {
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	pg_data_t *last = NULL;
 	unsigned int cpu;
 
@@ -101,7 +101,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 					unsigned long align)
 {
 	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	int node = early_cpu_to_node(cpu);
 	void *ptr;
 
@@ -140,7 +140,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	if (early_cpu_to_node(from) == early_cpu_to_node(to))
 		return LOCAL_DISTANCE;
 	else
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 21ffb03f6c72..74b78840182d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -651,7 +651,7 @@ void __init find_low_pfn_range(void)
 		highmem_pfn_init();
 }
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 void __init initmem_init(void)
 {
 #ifdef CONFIG_HIGHMEM
@@ -677,7 +677,7 @@ void __init initmem_init(void)
 
 	setup_bootmem_allocator();
 }
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+#endif /* !CONFIG_NUMA */
 
 void __init setup_bootmem_allocator(void)
 {
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 5aa8705df87e..4dbe715be65b 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -45,7 +45,7 @@
 #endif
 
 #ifndef cpumask_of_node
-  #ifdef CONFIG_NEED_MULTIPLE_NODES
+  #ifdef CONFIG_NUMA
     #define cpumask_of_node(node)	((node) == 0 ? cpu_online_mask : cpu_none_mask)
   #else
     #define cpumask_of_node(node)	((void)(node), cpu_online_mask)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5984fff3f175..552309342c38 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -50,7 +50,7 @@ struct memblock_region {
 	phys_addr_t base;
 	phys_addr_t size;
 	enum memblock_flags flags;
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	int nid;
 #endif
 };
@@ -347,7 +347,7 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask);
 int memblock_set_node(phys_addr_t base, phys_addr_t size,
 		      struct memblock_type *type, int nid);
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 static inline void memblock_set_region_node(struct memblock_region *r, int nid)
 {
 	r->nid = nid;
@@ -366,7 +366,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 {
 	return 0;
 }
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
 
 /* Flags for memblock allocation APIs */
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9bd21e6fad6a..07922ee1477e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -46,7 +46,7 @@ extern int sysctl_page_lock_unfairness;
 
 void init_mm_internals(void);
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES	/* Don't use mapnrs, do it properly */
+#ifndef CONFIG_NUMA		/* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
 
 static inline void set_max_mapnr(unsigned long limit)
@@ -2460,7 +2460,7 @@ extern void get_pfn_range_for_nid(unsigned int nid,
 			unsigned long *start_pfn, unsigned long *end_pfn);
 extern unsigned long find_min_pfn_with_active_regions(void);
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 static inline int early_pfn_to_nid(unsigned long pfn)
 {
 	return 0;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6f9829562af2..4bd420ed3961 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1043,17 +1043,17 @@ extern int percpu_pagelist_high_fraction;
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN	16
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 
 extern struct pglist_data contig_page_data;
 #define NODE_DATA(nid)		(&contig_page_data)
 #define NODE_MEM_MAP(nid)	mem_map
 
-#else /* CONFIG_NEED_MULTIPLE_NODES */
+#else /* CONFIG_NUMA */
 
 #include <asm/mmzone.h>
 
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+#endif /* !CONFIG_NUMA */
 
 extern struct pglist_data *first_online_pgdat(void);
 extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 684a6061a13a..0a4780c047c9 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -455,7 +455,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_SYMBOL(_stext);
 	VMCOREINFO_SYMBOL(vmap_area_list);
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 	VMCOREINFO_SYMBOL(mem_map);
 	VMCOREINFO_SYMBOL(contig_page_data);
 #endif
diff --git a/mm/Kconfig b/mm/Kconfig
index 218b96ccc84a..bffe4bd859f3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -59,15 +59,6 @@ config FLAT_NODE_MEM_MAP
 	def_bool y
 	depends on !SPARSEMEM
 
-#
-# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
-# to represent different areas of memory.  This variable allows
-# those dependencies to exist individually.
-#
-config NEED_MULTIPLE_NODES
-	def_bool y
-	depends on NUMA
-
 #
 # SPARSEMEM_EXTREME (which is the default) does some bootmem
 # allocations when sparse_init() is called.  If this cannot
diff --git a/mm/memblock.c b/mm/memblock.c
index afaefa8fc6ab..123feef5259d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -92,7 +92,7 @@
  * system initialization completes.
  */
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 struct pglist_data __refdata contig_page_data;
 EXPORT_SYMBOL(contig_page_data);
 #endif
@@ -607,7 +607,7 @@ repeat:
 		 * area, insert that portion.
 		 */
 		if (rbase > base) {
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 			WARN_ON(nid != memblock_get_region_node(rgn));
 #endif
 			WARN_ON(flags != rgn->flags);
@@ -1205,7 +1205,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 				      struct memblock_type *type, int nid)
 {
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 	int start_rgn, end_rgn;
 	int i, ret;
 
@@ -1849,7 +1849,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
 		size = rgn->size;
 		end = base + size - 1;
 		flags = rgn->flags;
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 		if (memblock_get_region_node(rgn) != MAX_NUMNODES)
 			snprintf(nid_buf, sizeof(nid_buf), " on node %d",
 				 memblock_get_region_node(rgn));
diff --git a/mm/memory.c b/mm/memory.c
index 3dd6b2e73e1d..48c4576df898 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -90,8 +90,7 @@
 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
 #endif
 
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-/* use the per-pgdat data instead for discontigmem - mbligh */
+#ifndef CONFIG_NUMA
 unsigned long max_mapnr;
 EXPORT_SYMBOL(max_mapnr);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8926f3fd3bcf..c4069f9e3968 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1634,7 +1634,7 @@ void __free_pages_core(struct page *page, unsigned int order)
 	__free_pages_ok(page, order, FPI_TO_TAIL | FPI_SKIP_KASAN_POISON);
 }
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
 
 /*
  * During memory init memblocks map pfns to nids. The search is expensive and
@@ -1684,7 +1684,7 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
 
 	return nid;
 }
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
 
 void __init memblock_free_pages(struct page *page, unsigned long pfn,
 							unsigned int order)
@@ -7438,7 +7438,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 	pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
 				__func__, pgdat->node_id, (unsigned long)pgdat,
 				(unsigned long)pgdat->node_mem_map);
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 	/*
 	 * With no DISCONTIG, the global mem_map is just set as node 0's
 	 */
diff --git a/mm/sparse.c b/mm/sparse.c
index 55c18aff3e42..7272f7a1449d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -346,7 +346,7 @@ size_t mem_section_usage_size(void)
 
 static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
 {
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
 	return __pa_symbol(pgdat);
 #else
 	return __pa(pgdat);

From 43b02ba93b25b1caff7a3457fc5d005485e78da5 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 28 Jun 2021 19:43:05 -0700
Subject: [PATCH 3257/3292] mm: replace CONFIG_FLAT_NODE_MEM_MAP with
 CONFIG_FLATMEM

After removal of the DISCONTIGMEM memory model the FLAT_NODE_MEM_MAP
configuration option is equivalent to FLATMEM.

Drop CONFIG_FLAT_NODE_MEM_MAP and use CONFIG_FLATMEM instead.

Link: https://lkml.kernel.org/r/20210608091316.3622-10-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 4 ++--
 kernel/crash_core.c    | 2 +-
 mm/Kconfig             | 4 ----
 mm/page_alloc.c        | 6 +++---
 mm/page_ext.c          | 2 +-
 5 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4bd420ed3961..578588d4afc9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -788,7 +788,7 @@ typedef struct pglist_data {
 	struct zonelist node_zonelists[MAX_ZONELISTS];
 
 	int nr_zones; /* number of populated zones in this node */
-#ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
+#ifdef CONFIG_FLATMEM	/* means !SPARSEMEM */
 	struct page *node_mem_map;
 #ifdef CONFIG_PAGE_EXTENSION
 	struct page_ext *node_page_ext;
@@ -878,7 +878,7 @@ typedef struct pglist_data {
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
 #define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLATMEM
 #define pgdat_page_nr(pgdat, pagenr)	((pgdat)->node_mem_map + (pagenr))
 #else
 #define pgdat_page_nr(pgdat, pagenr)	pfn_to_page((pgdat)->node_start_pfn + (pagenr))
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 0a4780c047c9..da449c1cdca7 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -484,7 +484,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_OFFSET(page, compound_head);
 	VMCOREINFO_OFFSET(pglist_data, node_zones);
 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLATMEM
 	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
 #endif
 	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
diff --git a/mm/Kconfig b/mm/Kconfig
index bffe4bd859f3..ded98fb859ab 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -55,10 +55,6 @@ config FLATMEM
 	def_bool y
 	depends on !SPARSEMEM || FLATMEM_MANUAL
 
-config FLAT_NODE_MEM_MAP
-	def_bool y
-	depends on !SPARSEMEM
-
 #
 # SPARSEMEM_EXTREME (which is the default) does some bootmem
 # allocations when sparse_init() is called.  If this cannot
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c4069f9e3968..0e441f1677f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6547,7 +6547,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 	}
 }
 
-#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
+#if !defined(CONFIG_FLATMEM)
 /*
  * Only struct pages that correspond to ranges defined by memblock.memory
  * are zeroed and initialized by going through __init_single_page() during
@@ -7403,7 +7403,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 	}
 }
 
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLATMEM
 static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 {
 	unsigned long __maybe_unused start = 0;
@@ -7451,7 +7451,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 }
 #else
 static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
-#endif /* CONFIG_FLAT_NODE_MEM_MAP */
+#endif /* CONFIG_FLATMEM */
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
diff --git a/mm/page_ext.c b/mm/page_ext.c
index df6f74aac8e1..293b2685fc48 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -191,7 +191,7 @@ fail:
 	panic("Out of memory");
 }
 
-#else /* CONFIG_FLAT_NODE_MEM_MAP */
+#else /* CONFIG_FLATMEM */
 
 struct page_ext *lookup_page_ext(const struct page *page)
 {

From 44042b4498728f4376e84bae1ac8016d146d850b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:43:08 -0700
Subject: [PATCH 3258/3292] mm/page_alloc: allow high-order pages to be stored
 on the per-cpu lists

The per-cpu page allocator (PCP) only stores order-0 pages.  This means
that all THP and "cheap" high-order allocations including SLUB contends on
the zone->lock.  This patch extends the PCP allocator to store THP and
"cheap" high-order pages.  Note that struct per_cpu_pages increases in
size to 256 bytes (4 cache lines) on x86-64.

Note that this is not necessarily a universal performance win because of
how it is implemented.  High-order pages can cause pcp->high to be
exceeded prematurely for lower-orders so for example, a large number of
THP pages being freed could release order-0 pages from the PCP lists.
Hence, much depends on the allocation/free pattern as observed by a single
CPU to determine if caching helps or hurts a particular workload.

That said, basic performance testing passed.  The following is a netperf
UDP_STREAM test which hits the relevant patches as some of the network
allocations are high-order.

netperf-udp
                                 5.13.0-rc2             5.13.0-rc2
                           mm-pcpburst-v3r4   mm-pcphighorder-v1r7
Hmean     send-64         261.46 (   0.00%)      266.30 *   1.85%*
Hmean     send-128        516.35 (   0.00%)      536.78 *   3.96%*
Hmean     send-256       1014.13 (   0.00%)     1034.63 *   2.02%*
Hmean     send-1024      3907.65 (   0.00%)     4046.11 *   3.54%*
Hmean     send-2048      7492.93 (   0.00%)     7754.85 *   3.50%*
Hmean     send-3312     11410.04 (   0.00%)    11772.32 *   3.18%*
Hmean     send-4096     13521.95 (   0.00%)    13912.34 *   2.89%*
Hmean     send-8192     21660.50 (   0.00%)    22730.72 *   4.94%*
Hmean     send-16384    31902.32 (   0.00%)    32637.50 *   2.30%*

Functionally, a patch like this is necessary to make bulk allocation of
high-order pages work with similar performance to order-0 bulk
allocations.  The bulk allocator is not updated in this series as it would
have to be determined by bulk allocation users how they want to track the
order of pages allocated with the bulk allocator.

Link: https://lkml.kernel.org/r/20210611135753.GC30378@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  20 ++++-
 mm/internal.h          |   2 +-
 mm/page_alloc.c        | 169 ++++++++++++++++++++++++++++++-----------
 mm/swap.c              |   2 +-
 4 files changed, 144 insertions(+), 49 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 578588d4afc9..265a32e1ff74 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -333,6 +333,24 @@ enum zone_watermarks {
 	NR_WMARK
 };
 
+/*
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional
+ * for pageblock size for THP if configured.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define NR_PCP_THP 1
+#else
+#define NR_PCP_THP 0
+#endif
+#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
+
+/*
+ * Shift to encode migratetype and order in the same integer, with order
+ * in the least significant bits.
+ */
+#define NR_PCP_ORDER_WIDTH 8
+#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1)
+
 #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
 #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
 #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
@@ -349,7 +367,7 @@ struct per_cpu_pages {
 #endif
 
 	/* Lists of pages, one per migrate type stored on the pcp-lists */
-	struct list_head lists[MIGRATE_PCPTYPES];
+	struct list_head lists[NR_PCP_LISTS];
 };
 
 struct per_cpu_zonestat {
diff --git a/mm/internal.h b/mm/internal.h
index 18e5fb4d225f..6ec2cea9926b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -203,7 +203,7 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
 
-extern void free_unref_page(struct page *page);
+extern void free_unref_page(struct page *page, unsigned int order);
 extern void free_unref_page_list(struct list_head *list);
 
 extern void zone_pcp_update(struct zone *zone, int cpu_online);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0e441f1677f3..34f097ecfe08 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -674,10 +674,53 @@ out:
 	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
+static inline unsigned int order_to_pindex(int migratetype, int order)
+{
+	int base = order;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (order > PAGE_ALLOC_COSTLY_ORDER) {
+		VM_BUG_ON(order != pageblock_order);
+		base = PAGE_ALLOC_COSTLY_ORDER + 1;
+	}
+#else
+	VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
+#endif
+
+	return (MIGRATE_PCPTYPES * base) + migratetype;
+}
+
+static inline int pindex_to_order(unsigned int pindex)
+{
+	int order = pindex / MIGRATE_PCPTYPES;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (order > PAGE_ALLOC_COSTLY_ORDER) {
+		order = pageblock_order;
+		VM_BUG_ON(order != pageblock_order);
+	}
+#else
+	VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
+#endif
+
+	return order;
+}
+
+static inline bool pcp_allowed_order(unsigned int order)
+{
+	if (order <= PAGE_ALLOC_COSTLY_ORDER)
+		return true;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (order == pageblock_order)
+		return true;
+#endif
+	return false;
+}
+
 static inline void free_the_page(struct page *page, unsigned int order)
 {
-	if (order == 0)		/* Via pcp? */
-		free_unref_page(page);
+	if (pcp_allowed_order(order))		/* Via pcp? */
+		free_unref_page(page, order);
 	else
 		__free_pages_ok(page, order, FPI_NONE);
 }
@@ -700,7 +743,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
 void free_compound_page(struct page *page)
 {
 	mem_cgroup_uncharge(page);
-	__free_pages_ok(page, compound_order(page), FPI_NONE);
+	free_the_page(page, compound_order(page));
 }
 
 void prep_compound_page(struct page *page, unsigned int order)
@@ -1350,9 +1393,9 @@ static __always_inline bool free_pages_prepare(struct page *page,
  * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when
  * moved from pcp lists to free lists.
  */
-static bool free_pcp_prepare(struct page *page)
+static bool free_pcp_prepare(struct page *page, unsigned int order)
 {
-	return free_pages_prepare(page, 0, true, FPI_NONE);
+	return free_pages_prepare(page, order, true, FPI_NONE);
 }
 
 static bool bulkfree_pcp_prepare(struct page *page)
@@ -1369,12 +1412,12 @@ static bool bulkfree_pcp_prepare(struct page *page)
  * debug_pagealloc enabled, they are checked also immediately when being freed
  * to the pcp lists.
  */
-static bool free_pcp_prepare(struct page *page)
+static bool free_pcp_prepare(struct page *page, unsigned int order)
 {
 	if (debug_pagealloc_enabled_static())
-		return free_pages_prepare(page, 0, true, FPI_NONE);
+		return free_pages_prepare(page, order, true, FPI_NONE);
 	else
-		return free_pages_prepare(page, 0, false, FPI_NONE);
+		return free_pages_prepare(page, order, false, FPI_NONE);
 }
 
 static bool bulkfree_pcp_prepare(struct page *page)
@@ -1406,8 +1449,10 @@ static inline void prefetch_buddy(struct page *page)
 static void free_pcppages_bulk(struct zone *zone, int count,
 					struct per_cpu_pages *pcp)
 {
-	int migratetype = 0;
+	int pindex = 0;
 	int batch_free = 0;
+	int nr_freed = 0;
+	unsigned int order;
 	int prefetch_nr = READ_ONCE(pcp->batch);
 	bool isolated_pageblocks;
 	struct page *page, *tmp;
@@ -1418,7 +1463,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	 * below while (list_empty(list)) loop.
 	 */
 	count = min(pcp->count, count);
-	while (count) {
+	while (count > 0) {
 		struct list_head *list;
 
 		/*
@@ -1430,24 +1475,31 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 		 */
 		do {
 			batch_free++;
-			if (++migratetype == MIGRATE_PCPTYPES)
-				migratetype = 0;
-			list = &pcp->lists[migratetype];
+			if (++pindex == NR_PCP_LISTS)
+				pindex = 0;
+			list = &pcp->lists[pindex];
 		} while (list_empty(list));
 
 		/* This is the only non-empty list. Free them all. */
-		if (batch_free == MIGRATE_PCPTYPES)
+		if (batch_free == NR_PCP_LISTS)
 			batch_free = count;
 
+		order = pindex_to_order(pindex);
+		BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
 		do {
 			page = list_last_entry(list, struct page, lru);
 			/* must delete to avoid corrupting pcp list */
 			list_del(&page->lru);
-			pcp->count--;
+			nr_freed += 1 << order;
+			count -= 1 << order;
 
 			if (bulkfree_pcp_prepare(page))
 				continue;
 
+			/* Encode order with the migratetype */
+			page->index <<= NR_PCP_ORDER_WIDTH;
+			page->index |= order;
+
 			list_add_tail(&page->lru, &head);
 
 			/*
@@ -1463,8 +1515,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 				prefetch_buddy(page);
 				prefetch_nr--;
 			}
-		} while (--count && --batch_free && !list_empty(list));
+		} while (count > 0 && --batch_free && !list_empty(list));
 	}
+	pcp->count -= nr_freed;
 
 	/*
 	 * local_lock_irq held so equivalent to spin_lock_irqsave for
@@ -1479,14 +1532,19 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	 */
 	list_for_each_entry_safe(page, tmp, &head, lru) {
 		int mt = get_pcppage_migratetype(page);
+
+		/* mt has been encoded with the order (see above) */
+		order = mt & NR_PCP_ORDER_MASK;
+		mt >>= NR_PCP_ORDER_WIDTH;
+
 		/* MIGRATE_ISOLATE page should not go to pcplists */
 		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
 		/* Pageblock could have been isolated meanwhile */
 		if (unlikely(isolated_pageblocks))
 			mt = get_pageblock_migratetype(page);
 
-		__free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
-		trace_mm_page_pcpu_drain(page, 0, mt);
+		__free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
+		trace_mm_page_pcpu_drain(page, order, mt);
 	}
 	spin_unlock(&zone->lock);
 }
@@ -3263,11 +3321,12 @@ void mark_free_pages(struct zone *zone)
 }
 #endif /* CONFIG_PM */
 
-static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
+static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
+							unsigned int order)
 {
 	int migratetype;
 
-	if (!free_pcp_prepare(page))
+	if (!free_pcp_prepare(page, order))
 		return false;
 
 	migratetype = get_pfnblock_migratetype(page, pfn);
@@ -3317,16 +3376,18 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
 }
 
 static void free_unref_page_commit(struct page *page, unsigned long pfn,
-				   int migratetype)
+				   int migratetype, unsigned int order)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	int high;
+	int pindex;
 
 	__count_vm_event(PGFREE);
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
-	list_add(&page->lru, &pcp->lists[migratetype]);
-	pcp->count++;
+	pindex = order_to_pindex(migratetype, order);
+	list_add(&page->lru, &pcp->lists[pindex]);
+	pcp->count += 1 << order;
 	high = nr_pcp_high(pcp, zone);
 	if (pcp->count >= high) {
 		int batch = READ_ONCE(pcp->batch);
@@ -3336,15 +3397,15 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
 }
 
 /*
- * Free a 0-order page
+ * Free a pcp page
  */
-void free_unref_page(struct page *page)
+void free_unref_page(struct page *page, unsigned int order)
 {
 	unsigned long flags;
 	unsigned long pfn = page_to_pfn(page);
 	int migratetype;
 
-	if (!free_unref_page_prepare(page, pfn))
+	if (!free_unref_page_prepare(page, pfn, order))
 		return;
 
 	/*
@@ -3357,14 +3418,14 @@ void free_unref_page(struct page *page)
 	migratetype = get_pcppage_migratetype(page);
 	if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
 		if (unlikely(is_migrate_isolate(migratetype))) {
-			free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
+			free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
 			return;
 		}
 		migratetype = MIGRATE_MOVABLE;
 	}
 
 	local_lock_irqsave(&pagesets.lock, flags);
-	free_unref_page_commit(page, pfn, migratetype);
+	free_unref_page_commit(page, pfn, migratetype, order);
 	local_unlock_irqrestore(&pagesets.lock, flags);
 }
 
@@ -3381,7 +3442,7 @@ void free_unref_page_list(struct list_head *list)
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
 		pfn = page_to_pfn(page);
-		if (!free_unref_page_prepare(page, pfn))
+		if (!free_unref_page_prepare(page, pfn, 0))
 			list_del(&page->lru);
 
 		/*
@@ -3413,7 +3474,7 @@ void free_unref_page_list(struct list_head *list)
 		set_page_private(page, 0);
 		migratetype = get_pcppage_migratetype(page);
 		trace_mm_page_free_batched(page);
-		free_unref_page_commit(page, pfn, migratetype);
+		free_unref_page_commit(page, pfn, migratetype, 0);
 
 		/*
 		 * Guard against excessive IRQ disabled times when we get
@@ -3549,7 +3610,8 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
 
 /* Remove page from the per-cpu list, caller must protect the list */
 static inline
-struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
+			int migratetype,
 			unsigned int alloc_flags,
 			struct per_cpu_pages *pcp,
 			struct list_head *list)
@@ -3558,16 +3620,30 @@ struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
 
 	do {
 		if (list_empty(list)) {
-			pcp->count += rmqueue_bulk(zone, 0,
-					READ_ONCE(pcp->batch), list,
+			int batch = READ_ONCE(pcp->batch);
+			int alloced;
+
+			/*
+			 * Scale batch relative to order if batch implies
+			 * free pages can be stored on the PCP. Batch can
+			 * be 1 for small zones or for boot pagesets which
+			 * should never store free pages as the pages may
+			 * belong to arbitrary zones.
+			 */
+			if (batch > 1)
+				batch = max(batch >> order, 2);
+			alloced = rmqueue_bulk(zone, order,
+					batch, list,
 					migratetype, alloc_flags);
+
+			pcp->count += alloced << order;
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
 
 		page = list_first_entry(list, struct page, lru);
 		list_del(&page->lru);
-		pcp->count--;
+		pcp->count -= 1 << order;
 	} while (check_new_pcp(page));
 
 	return page;
@@ -3575,8 +3651,9 @@ struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
 
 /* Lock and remove page from the per-cpu list */
 static struct page *rmqueue_pcplist(struct zone *preferred_zone,
-			struct zone *zone, gfp_t gfp_flags,
-			int migratetype, unsigned int alloc_flags)
+			struct zone *zone, unsigned int order,
+			gfp_t gfp_flags, int migratetype,
+			unsigned int alloc_flags)
 {
 	struct per_cpu_pages *pcp;
 	struct list_head *list;
@@ -3592,8 +3669,8 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	 */
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
 	pcp->free_factor >>= 1;
-	list = &pcp->lists[migratetype];
-	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
+	list = &pcp->lists[order_to_pindex(migratetype, order)];
+	page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
 	local_unlock_irqrestore(&pagesets.lock, flags);
 	if (page) {
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
@@ -3614,15 +3691,15 @@ struct page *rmqueue(struct zone *preferred_zone,
 	unsigned long flags;
 	struct page *page;
 
-	if (likely(order == 0)) {
+	if (likely(pcp_allowed_order(order))) {
 		/*
 		 * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
 		 * we need to skip it when CMA area isn't allowed.
 		 */
 		if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
 				migratetype != MIGRATE_MOVABLE) {
-			page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
-					migratetype, alloc_flags);
+			page = rmqueue_pcplist(preferred_zone, zone, order,
+					gfp_flags, migratetype, alloc_flags);
 			goto out;
 		}
 	}
@@ -5201,7 +5278,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	/* Attempt the batch allocation */
 	local_lock_irqsave(&pagesets.lock, flags);
 	pcp = this_cpu_ptr(zone->per_cpu_pageset);
-	pcp_list = &pcp->lists[ac.migratetype];
+	pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
 
 	while (nr_populated < nr_pages) {
 
@@ -5211,7 +5288,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 			continue;
 		}
 
-		page = __rmqueue_pcplist(zone, ac.migratetype, alloc_flags,
+		page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
 								pcp, pcp_list);
 		if (unlikely(!page)) {
 			/* Try and get at least one page */
@@ -6778,13 +6855,13 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
 
 static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats)
 {
-	int migratetype;
+	int pindex;
 
 	memset(pcp, 0, sizeof(*pcp));
 	memset(pzstats, 0, sizeof(*pzstats));
 
-	for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
-		INIT_LIST_HEAD(&pcp->lists[migratetype]);
+	for (pindex = 0; pindex < NR_PCP_LISTS; pindex++)
+		INIT_LIST_HEAD(&pcp->lists[pindex]);
 
 	/*
 	 * Set batch and high values safe for a boot pageset. A true percpu
diff --git a/mm/swap.c b/mm/swap.c
index 18cc9e63515b..6c11db780467 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -95,7 +95,7 @@ static void __put_single_page(struct page *page)
 {
 	__page_cache_release(page);
 	mem_cgroup_uncharge(page);
-	free_unref_page(page);
+	free_unref_page(page, 0);
 }
 
 static void __put_compound_page(struct page *page)

From 203c06eef579c670b8eb3a24108b9837bf9b7737 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 19:43:11 -0700
Subject: [PATCH 3259/3292] mm/page_alloc: split pcp->high across all online
 CPUs for cpuless nodes

Dave Hansen reported the following about Feng Tang's tests on a machine
with persistent memory onlined as a DRAM-like device.

  Feng Tang tossed these on a "Cascade Lake" system with 96 threads and
  ~512G of persistent memory and 128G of DRAM.  The PMEM is in "volatile
  use" mode and being managed via the buddy just like the normal RAM.

  The PMEM zones are big ones:

        present  65011712 = 248 G
        high       134595 = 525 M

  The PMEM nodes, of course, don't have any CPUs in them.

  With your series, the pcp->high value per-cpu is 69584 pages or about
  270MB per CPU.  Scaled up by the 96 CPU threads, that's ~26GB of
  worst-case memory in the pcps per zone, or roughly 10% of the size of
  the zone.

This should not cause a problem as such although it could trigger reclaim
due to pages being stored on per-cpu lists for CPUs remote to a node.  It
is not possible to treat cpuless nodes exactly the same as normal nodes
but the worst-case scenario can be mitigated by splitting pcp->high across
all online CPUs for cpuless memory nodes.

Link: https://lkml.kernel.org/r/20210616110743.GK30378@techsingularity.net
Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Tang, Feng" <feng.tang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 34f097ecfe08..db00ee8d79d2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6790,7 +6790,7 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online)
 {
 #ifdef CONFIG_MMU
 	int high;
-	int nr_local_cpus;
+	int nr_split_cpus;
 	unsigned long total_pages;
 
 	if (!percpu_pagelist_high_fraction) {
@@ -6813,10 +6813,14 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online)
 	 * Split the high value across all online CPUs local to the zone. Note
 	 * that early in boot that CPUs may not be online yet and that during
 	 * CPU hotplug that the cpumask is not yet updated when a CPU is being
-	 * onlined.
+	 * onlined. For memory nodes that have no CPUs, split pcp->high across
+	 * all online CPUs to mitigate the risk that reclaim is triggered
+	 * prematurely due to pages stored on pcp lists.
 	 */
-	nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone)))) + cpu_online;
-	high = total_pages / nr_local_cpus;
+	nr_split_cpus = cpumask_weight(cpumask_of_node(zone_to_nid(zone))) + cpu_online;
+	if (!nr_split_cpus)
+		nr_split_cpus = num_online_cpus();
+	high = total_pages / nr_split_cpus;
 
 	/*
 	 * Ensure high is at least batch*4. The multiple is based on the

From a3f5d80ea401ac857f2910e28b15f35b2cf902f4 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Mon, 28 Jun 2021 19:43:14 -0700
Subject: [PATCH 3260/3292] mm,hwpoison: send SIGBUS with error virutal address

Now an action required MCE in already hwpoisoned address surely sends a
SIGBUS to current process, but the SIGBUS doesn't convey error virtual
address.  That's not optimal for hwpoison-aware applications.

To fix the issue, make memory_failure() call kill_accessing_process(),
that does pagetable walk to find the error virtual address.  It could find
multiple virtual addresses for the same error page, and it seems hard to
tell which virtual address is correct one.  But that's rare and sending
incorrect virtual address could be better than no address.  So let's
report the first found virtual address for now.

[naoya.horiguchi@nec.com: fix walk_page_range() return]
  Link: https://lkml.kernel.org/r/20210603051055.GA244241@hori.linux.bs1.fc.nec.co.jp

Link: https://lkml.kernel.org/r/20210521030156.2612074-4-nao.horiguchi@gmail.com
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Aili Yao <yaoaili@kingsoft.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Jue Wang <juew@google.com>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/mce/core.c |  13 ++-
 include/linux/swapops.h        |   5 ++
 mm/memory-failure.c            | 150 ++++++++++++++++++++++++++++++++-
 3 files changed, 165 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index bf7fe87a7e88..22791aadc085 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1257,19 +1257,28 @@ static void kill_me_maybe(struct callback_head *cb)
 {
 	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
 	int flags = MF_ACTION_REQUIRED;
+	int ret;
 
 	pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
 
 	if (!p->mce_ripv)
 		flags |= MF_MUST_KILL;
 
-	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
-	    !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
+	ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
+	if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
 		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
 		sync_core();
 		return;
 	}
 
+	/*
+	 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
+	 * to the current process with the proper error info, so no need to
+	 * send SIGBUS here again.
+	 */
+	if (ret == -EHWPOISON)
+		return;
+
 	if (p->mce_vaddr != (void __user *)-1l) {
 		force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
 	} else {
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6430a94c6981..5907205c712c 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -330,6 +330,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
+static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
+{
+	return swp_offset(entry);
+}
+
 static inline void num_poisoned_pages_inc(void)
 {
 	atomic_long_inc(&num_poisoned_pages);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6f5f78885ab4..4d151ce3e50d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -56,6 +56,7 @@
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
 #include <linux/page-isolation.h>
+#include <linux/pagewalk.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -554,6 +555,148 @@ static void collect_procs(struct page *page, struct list_head *tokill,
 		collect_procs_file(page, tokill, force_early);
 }
 
+struct hwp_walk {
+	struct to_kill tk;
+	unsigned long pfn;
+	int flags;
+};
+
+static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift)
+{
+	tk->addr = addr;
+	tk->size_shift = shift;
+}
+
+static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
+				unsigned long poisoned_pfn, struct to_kill *tk)
+{
+	unsigned long pfn = 0;
+
+	if (pte_present(pte)) {
+		pfn = pte_pfn(pte);
+	} else {
+		swp_entry_t swp = pte_to_swp_entry(pte);
+
+		if (is_hwpoison_entry(swp))
+			pfn = hwpoison_entry_to_pfn(swp);
+	}
+
+	if (!pfn || pfn != poisoned_pfn)
+		return 0;
+
+	set_to_kill(tk, addr, shift);
+	return 1;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
+				      struct hwp_walk *hwp)
+{
+	pmd_t pmd = *pmdp;
+	unsigned long pfn;
+	unsigned long hwpoison_vaddr;
+
+	if (!pmd_present(pmd))
+		return 0;
+	pfn = pmd_pfn(pmd);
+	if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) {
+		hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT);
+		set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT);
+		return 1;
+	}
+	return 0;
+}
+#else
+static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
+				      struct hwp_walk *hwp)
+{
+	return 0;
+}
+#endif
+
+static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
+			      unsigned long end, struct mm_walk *walk)
+{
+	struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+	int ret = 0;
+	pte_t *ptep;
+	spinlock_t *ptl;
+
+	ptl = pmd_trans_huge_lock(pmdp, walk->vma);
+	if (ptl) {
+		ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp);
+		spin_unlock(ptl);
+		goto out;
+	}
+
+	if (pmd_trans_unstable(pmdp))
+		goto out;
+
+	ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
+	for (; addr != end; ptep++, addr += PAGE_SIZE) {
+		ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
+					     hwp->pfn, &hwp->tk);
+		if (ret == 1)
+			break;
+	}
+	pte_unmap_unlock(ptep - 1, ptl);
+out:
+	cond_resched();
+	return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
+			    unsigned long addr, unsigned long end,
+			    struct mm_walk *walk)
+{
+	struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+	pte_t pte = huge_ptep_get(ptep);
+	struct hstate *h = hstate_vma(walk->vma);
+
+	return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
+				      hwp->pfn, &hwp->tk);
+}
+#else
+#define hwpoison_hugetlb_range	NULL
+#endif
+
+static struct mm_walk_ops hwp_walk_ops = {
+	.pmd_entry = hwpoison_pte_range,
+	.hugetlb_entry = hwpoison_hugetlb_range,
+};
+
+/*
+ * Sends SIGBUS to the current process with error info.
+ *
+ * This function is intended to handle "Action Required" MCEs on already
+ * hardware poisoned pages. They could happen, for example, when
+ * memory_failure() failed to unmap the error page at the first call, or
+ * when multiple local machine checks happened on different CPUs.
+ *
+ * MCE handler currently has no easy access to the error virtual address,
+ * so this function walks page table to find it. The returned virtual address
+ * is proper in most cases, but it could be wrong when the application
+ * process has multiple entries mapping the error page.
+ */
+static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
+				  int flags)
+{
+	int ret;
+	struct hwp_walk priv = {
+		.pfn = pfn,
+	};
+	priv.tk.tsk = p;
+
+	mmap_read_lock(p->mm);
+	ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+			      (void *)&priv);
+	if (ret == 1 && priv.tk.addr)
+		kill_proc(&priv.tk, pfn, flags);
+	mmap_read_unlock(p->mm);
+	return ret ? -EFAULT : -EHWPOISON;
+}
+
 static const char *action_name[] = {
 	[MF_IGNORED] = "Ignored",
 	[MF_FAILED] = "Failed",
@@ -1267,7 +1410,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 	if (TestSetPageHWPoison(head)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 		       pfn);
-		return -EHWPOISON;
+		res = -EHWPOISON;
+		if (flags & MF_ACTION_REQUIRED)
+			res = kill_accessing_process(current, page_to_pfn(head), flags);
+		return res;
 	}
 
 	num_poisoned_pages_inc();
@@ -1476,6 +1622,8 @@ try_again:
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
 		res = -EHWPOISON;
+		if (flags & MF_ACTION_REQUIRED)
+			res = kill_accessing_process(current, pfn, flags);
 		goto unlock_mutex;
 	}
 

From 0ed950d1f28142ccd9a9453c60df87853530d778 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Mon, 28 Jun 2021 19:43:17 -0700
Subject: [PATCH 3261/3292] mm,hwpoison: make get_hwpoison_page() call
 get_any_page()

__get_hwpoison_page() could fail to grab refcount by some race condition,
so it's helpful if we can handle it by retrying.  We already have retry
logic, so make get_hwpoison_page() call get_any_page() when called from
memory_failure().

As a result, get_hwpoison_page() can return negative values (i.e.  error
code), so some callers are also changed to handle error cases.
soft_offline_page() does nothing for -EBUSY because that's enough and
users in userspace can easily handle it.  unpoison_memory() is also
unchanged because it's broken and need thorough fixes (will be done
later).

Link: https://lkml.kernel.org/r/20210603233632.2964832-3-nao.horiguchi@gmail.com
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c        |   2 +
 mm/memory-failure.c | 200 +++++++++++++++++++++++++-------------------
 2 files changed, 114 insertions(+), 88 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5ba5a0da6d57..103f1187043f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5938,6 +5938,8 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
 		*hugetlb = true;
 		if (HPageFreed(page) || HPageMigratable(page))
 			ret = get_page_unless_zero(page);
+		else
+			ret = -EBUSY;
 	}
 	spin_unlock_irq(&hugetlb_lock);
 	return ret;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4d151ce3e50d..e5a1531f7f4e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1117,13 +1117,6 @@ static inline bool HWPoisonHandlable(struct page *page)
 	return PageLRU(page) || __PageMovable(page);
 }
 
-/**
- * __get_hwpoison_page() - Get refcount for memory error handling:
- * @page:	raw error page (hit by memory error)
- *
- * Return: return 0 if failed to grab the refcount, otherwise true (some
- * non-zero value.)
- */
 static int __get_hwpoison_page(struct page *page)
 {
 	struct page *head = compound_head(page);
@@ -1168,15 +1161,6 @@ static int __get_hwpoison_page(struct page *page)
 	return 0;
 }
 
-/*
- * Safely get reference count of an arbitrary page.
- *
- * Returns 0 for a free page, 1 for an in-use page,
- * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
- * allocation.
- * We only incremented refcount in case the page was already in-use and it
- * is a known type we can handle.
- */
 static int get_any_page(struct page *p, unsigned long flags)
 {
 	int ret = 0, pass = 0;
@@ -1186,50 +1170,77 @@ static int get_any_page(struct page *p, unsigned long flags)
 		count_increased = true;
 
 try_again:
-	if (!count_increased && !__get_hwpoison_page(p)) {
-		if (page_count(p)) {
-			/* We raced with an allocation, retry. */
-			if (pass++ < 3)
-				goto try_again;
-			ret = -EBUSY;
-		} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
-			/* We raced with put_page, retry. */
-			if (pass++ < 3)
-				goto try_again;
-			ret = -EIO;
-		}
-	} else {
-		if (PageHuge(p) || HWPoisonHandlable(p)) {
-			ret = 1;
-		} else {
-			/*
-			 * A page we cannot handle. Check whether we can turn
-			 * it into something we can handle.
-			 */
-			if (pass++ < 3) {
-				put_page(p);
-				shake_page(p, 1);
-				count_increased = false;
-				goto try_again;
+	if (!count_increased) {
+		ret = __get_hwpoison_page(p);
+		if (!ret) {
+			if (page_count(p)) {
+				/* We raced with an allocation, retry. */
+				if (pass++ < 3)
+					goto try_again;
+				ret = -EBUSY;
+			} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+				/* We raced with put_page, retry. */
+				if (pass++ < 3)
+					goto try_again;
+				ret = -EIO;
 			}
-			put_page(p);
-			ret = -EIO;
+			goto out;
+		} else if (ret == -EBUSY) {
+			/* We raced with freeing huge page to buddy, retry. */
+			if (pass++ < 3)
+				goto try_again;
+			goto out;
 		}
 	}
 
+	if (PageHuge(p) || HWPoisonHandlable(p)) {
+		ret = 1;
+	} else {
+		/*
+		 * A page we cannot handle. Check whether we can turn
+		 * it into something we can handle.
+		 */
+		if (pass++ < 3) {
+			put_page(p);
+			shake_page(p, 1);
+			count_increased = false;
+			goto try_again;
+		}
+		put_page(p);
+		ret = -EIO;
+	}
+out:
 	return ret;
 }
 
-static int get_hwpoison_page(struct page *p, unsigned long flags,
-			     enum mf_flags ctxt)
+/**
+ * get_hwpoison_page() - Get refcount for memory error handling
+ * @p:		Raw error page (hit by memory error)
+ * @flags:	Flags controlling behavior of error handling
+ *
+ * get_hwpoison_page() takes a page refcount of an error page to handle memory
+ * error on it, after checking that the error page is in a well-defined state
+ * (defined as a page-type we can successfully handle the memor error on it,
+ * such as LRU page and hugetlb page).
+ *
+ * Memory error handling could be triggered at any time on any type of page,
+ * so it's prone to race with typical memory management lifecycle (like
+ * allocation and free).  So to avoid such races, get_hwpoison_page() takes
+ * extra care for the error page's state (as done in __get_hwpoison_page()),
+ * and has some retry logic in get_any_page().
+ *
+ * Return: 0 on failure,
+ *         1 on success for in-use pages in a well-defined state,
+ *         -EIO for pages on which we can not handle memory errors,
+ *         -EBUSY when get_hwpoison_page() has raced with page lifecycle
+ *         operations like allocation and free.
+ */
+static int get_hwpoison_page(struct page *p, unsigned long flags)
 {
 	int ret;
 
 	zone_pcp_disable(page_zone(p));
-	if (ctxt == MF_SOFT_OFFLINE)
-		ret = get_any_page(p, flags);
-	else
-		ret = __get_hwpoison_page(p);
+	ret = get_any_page(p, flags);
 	zone_pcp_enable(page_zone(p));
 
 	return ret;
@@ -1418,27 +1429,33 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 
 	num_poisoned_pages_inc();
 
-	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
-		/*
-		 * Check "filter hit" and "race with other subpage."
-		 */
-		lock_page(head);
-		if (PageHWPoison(head)) {
-			if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
-			    || (p != head && TestSetPageHWPoison(head))) {
-				num_poisoned_pages_dec();
-				unlock_page(head);
-				return 0;
+	if (!(flags & MF_COUNT_INCREASED)) {
+		res = get_hwpoison_page(p, flags);
+		if (!res) {
+			/*
+			 * Check "filter hit" and "race with other subpage."
+			 */
+			lock_page(head);
+			if (PageHWPoison(head)) {
+				if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+				    || (p != head && TestSetPageHWPoison(head))) {
+					num_poisoned_pages_dec();
+					unlock_page(head);
+					return 0;
+				}
 			}
+			unlock_page(head);
+			res = MF_FAILED;
+			if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
+				page_ref_inc(p);
+				res = MF_RECOVERED;
+			}
+			action_result(pfn, MF_MSG_FREE_HUGE, res);
+			return res == MF_RECOVERED ? 0 : -EBUSY;
+		} else if (res < 0) {
+			action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
+			return -EBUSY;
 		}
-		unlock_page(head);
-		res = MF_FAILED;
-		if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
-			page_ref_inc(p);
-			res = MF_RECOVERED;
-		}
-		action_result(pfn, MF_MSG_FREE_HUGE, res);
-		return res == MF_RECOVERED ? 0 : -EBUSY;
 	}
 
 	lock_page(head);
@@ -1641,28 +1658,35 @@ try_again:
 	 * In fact it's dangerous to directly bump up page count from 0,
 	 * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
 	 */
-	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
-		if (is_free_buddy_page(p)) {
-			if (take_page_off_buddy(p)) {
-				page_ref_inc(p);
-				res = MF_RECOVERED;
-			} else {
-				/* We lost the race, try again */
-				if (retry) {
-					ClearPageHWPoison(p);
-					num_poisoned_pages_dec();
-					retry = false;
-					goto try_again;
+	if (!(flags & MF_COUNT_INCREASED)) {
+		res = get_hwpoison_page(p, flags);
+		if (!res) {
+			if (is_free_buddy_page(p)) {
+				if (take_page_off_buddy(p)) {
+					page_ref_inc(p);
+					res = MF_RECOVERED;
+				} else {
+					/* We lost the race, try again */
+					if (retry) {
+						ClearPageHWPoison(p);
+						num_poisoned_pages_dec();
+						retry = false;
+						goto try_again;
+					}
+					res = MF_FAILED;
 				}
-				res = MF_FAILED;
+				action_result(pfn, MF_MSG_BUDDY, res);
+				res = res == MF_RECOVERED ? 0 : -EBUSY;
+			} else {
+				action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
+				res = -EBUSY;
 			}
-			action_result(pfn, MF_MSG_BUDDY, res);
-			res = res == MF_RECOVERED ? 0 : -EBUSY;
-		} else {
-			action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
+			goto unlock_mutex;
+		} else if (res < 0) {
+			action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
 			res = -EBUSY;
+			goto unlock_mutex;
 		}
-		goto unlock_mutex;
 	}
 
 	if (PageTransHuge(hpage)) {
@@ -1940,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
-	if (!get_hwpoison_page(p, flags, 0)) {
+	if (!get_hwpoison_page(p, flags)) {
 		if (TestClearPageHWPoison(p))
 			num_poisoned_pages_dec();
 		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -2156,7 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags)
 
 retry:
 	get_online_mems();
-	ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
+	ret = get_hwpoison_page(page, flags);
 	put_online_mems();
 
 	if (ret > 0) {

From e3ae2365efc14269170a6326477e669332271ab3 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Sun, 27 Jun 2021 18:48:21 -0400
Subject: [PATCH 3262/3292] net: sock: introduce sk_error_report

This patch introduces a function wrapper to call the sk_error_report
callback. That will prepare to add additional handling whenever
sk_error_report is called, for example to trace socket errors.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../chelsio/inline_crypto/chtls/chtls_cm.c    |  2 +-
 drivers/vhost/vsock.c                         |  2 +-
 include/linux/skmsg.h                         |  2 +-
 include/net/sock.h                            |  2 ++
 include/net/tls.h                             |  2 +-
 net/caif/caif_socket.c                        |  2 +-
 net/can/bcm.c                                 |  4 ++--
 net/can/isotp.c                               | 20 +++++++++----------
 net/can/j1939/socket.c                        |  4 ++--
 net/can/raw.c                                 |  6 +++---
 net/core/skbuff.c                             |  6 +++---
 net/core/sock.c                               |  6 ++++++
 net/dccp/ipv4.c                               |  4 ++--
 net/dccp/ipv6.c                               |  4 ++--
 net/dccp/proto.c                              |  2 +-
 net/dccp/timer.c                              |  2 +-
 net/ipv4/ping.c                               |  2 +-
 net/ipv4/raw.c                                |  4 ++--
 net/ipv4/tcp.c                                |  4 ++--
 net/ipv4/tcp_input.c                          |  2 +-
 net/ipv4/tcp_ipv4.c                           |  4 ++--
 net/ipv4/tcp_timer.c                          |  2 +-
 net/ipv4/udp.c                                |  4 ++--
 net/ipv6/raw.c                                |  2 +-
 net/ipv6/tcp_ipv6.c                           |  4 ++--
 net/ipv6/udp.c                                |  2 +-
 net/kcm/kcmsock.c                             |  2 +-
 net/mptcp/subflow.c                           |  2 +-
 net/netlink/af_netlink.c                      |  8 ++++----
 net/nfc/rawsock.c                             |  2 +-
 net/packet/af_packet.c                        |  4 ++--
 net/qrtr/qrtr.c                               |  2 +-
 net/sctp/input.c                              |  2 +-
 net/sctp/ipv6.c                               |  2 +-
 net/smc/af_smc.c                              |  2 +-
 net/strparser/strparser.c                     |  2 +-
 net/unix/af_unix.c                            |  2 +-
 net/vmw_vsock/af_vsock.c                      |  2 +-
 net/vmw_vsock/virtio_transport.c              |  2 +-
 net/vmw_vsock/virtio_transport_common.c       |  2 +-
 net/vmw_vsock/vmci_transport.c                |  4 ++--
 net/xdp/xsk.c                                 |  2 +-
 42 files changed, 75 insertions(+), 67 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index 19dc7dc054a2..bcad69c48074 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -2134,7 +2134,7 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
 		sk->sk_err = ETIMEDOUT;
 
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 
 		if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
 			return;
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 119f08491d3c..d38c996b4f46 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -734,7 +734,7 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
 	vsk->peer_shutdown = SHUTDOWN_MASK;
 	sk->sk_state = SS_UNCONNECTED;
 	sk->sk_err = ECONNRESET;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 }
 
 static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index fcaa9a7996c8..31866031e370 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -347,7 +347,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 	struct sock *sk = psock->sk;
 
 	sk->sk_err = err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 }
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node);
diff --git a/include/net/sock.h b/include/net/sock.h
index ced2fc965ec7..8bdd80027ffb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2281,6 +2281,8 @@ static inline int sock_error(struct sock *sk)
 	return -err;
 }
 
+void sk_error_report(struct sock *sk);
+
 static inline unsigned long sock_wspace(struct sock *sk)
 {
 	int amt = 0;
diff --git a/include/net/tls.h b/include/net/tls.h
index 8d398a5de3ee..be4b3e1cac46 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -469,7 +469,7 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 static inline void tls_err_abort(struct sock *sk, int err)
 {
 	sk->sk_err = err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 }
 
 static inline bool tls_bigint_increment(unsigned char *seq, int len)
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 3ad0a1df6712..647554c9813b 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -243,7 +243,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
 		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
 		cf_sk->sk.sk_err = ECONNRESET;
 		set_rx_flow_on(cf_sk);
-		cf_sk->sk.sk_error_report(&cf_sk->sk);
+		sk_error_report(&cf_sk->sk);
 		break;
 
 	default:
diff --git a/net/can/bcm.c b/net/can/bcm.c
index f3e4d9528fa3..e15a7dbe5f6c 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1417,7 +1417,7 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
 		if (notify_enodev) {
 			sk->sk_err = ENODEV;
 			if (!sock_flag(sk, SOCK_DEAD))
-				sk->sk_error_report(sk);
+				sk_error_report(sk);
 		}
 		break;
 
@@ -1425,7 +1425,7 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
 		if (bo->bound && bo->ifindex == dev->ifindex) {
 			sk->sk_err = ENETDOWN;
 			if (!sock_flag(sk, SOCK_DEAD))
-				sk->sk_error_report(sk);
+				sk_error_report(sk);
 		}
 	}
 }
diff --git a/net/can/isotp.c b/net/can/isotp.c
index bd49299319a1..9fd274cf166b 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -168,7 +168,7 @@ static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
 		/* report 'connection timed out' */
 		sk->sk_err = ETIMEDOUT;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 
 		/* reset rx state */
 		so->rx.state = ISOTP_IDLE;
@@ -339,7 +339,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
 		/* malformed PDU - report 'not a data message' */
 		sk->sk_err = EBADMSG;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 
 		so->tx.state = ISOTP_IDLE;
 		wake_up_interruptible(&so->wait);
@@ -392,7 +392,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
 		/* overflow on receiver side - report 'message too long' */
 		sk->sk_err = EMSGSIZE;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		fallthrough;
 
 	default:
@@ -420,7 +420,7 @@ static int isotp_rcv_sf(struct sock *sk, struct canfd_frame *cf, int pcilen,
 		/* malformed PDU - report 'not a data message' */
 		sk->sk_err = EBADMSG;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		return 1;
 	}
 
@@ -535,7 +535,7 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae,
 		/* wrong sn detected - report 'illegal byte sequence' */
 		sk->sk_err = EILSEQ;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 
 		/* reset rx state */
 		so->rx.state = ISOTP_IDLE;
@@ -559,7 +559,7 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae,
 			/* malformed PDU - report 'not a data message' */
 			sk->sk_err = EBADMSG;
 			if (!sock_flag(sk, SOCK_DEAD))
-				sk->sk_error_report(sk);
+				sk_error_report(sk);
 			return 1;
 		}
 
@@ -758,7 +758,7 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
 		/* report 'communication error on send' */
 		sk->sk_err = ECOMM;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 
 		/* reset tx state */
 		so->tx.state = ISOTP_IDLE;
@@ -1157,7 +1157,7 @@ out:
 	if (notify_enetdown) {
 		sk->sk_err = ENETDOWN;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 	}
 
 	return err;
@@ -1356,13 +1356,13 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
 
 		sk->sk_err = ENODEV;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		break;
 
 	case NETDEV_DOWN:
 		sk->sk_err = ENETDOWN;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		break;
 	}
 }
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 56aa66147d5a..bf18a32dc6ae 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -1009,7 +1009,7 @@ void j1939_sk_send_loop_abort(struct sock *sk, int err)
 {
 	sk->sk_err = err;
 
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 }
 
 static int j1939_sk_send_loop(struct j1939_priv *priv,  struct sock *sk,
@@ -1189,7 +1189,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
 	list_for_each_entry(jsk, &priv->j1939_socks, list) {
 		jsk->sk.sk_err = error_code;
 		if (!sock_flag(&jsk->sk, SOCK_DEAD))
-			jsk->sk.sk_error_report(&jsk->sk);
+			sk_error_report(&jsk->sk);
 
 		j1939_sk_queue_drop_all(priv, jsk, error_code);
 	}
diff --git a/net/can/raw.c b/net/can/raw.c
index ac96fc210025..ed4fcb7ab0c3 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -295,13 +295,13 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
 
 		sk->sk_err = ENODEV;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		break;
 
 	case NETDEV_DOWN:
 		sk->sk_err = ENETDOWN;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		break;
 	}
 }
@@ -488,7 +488,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	if (notify_enetdown) {
 		sk->sk_err = ENETDOWN;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 	}
 
 	return err;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2531ac4ffa69..12aabcda6db2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1294,7 +1294,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
 	}
 	spin_unlock_irqrestore(&q->lock, flags);
 
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 
 release:
 	consume_skb(skb);
@@ -4685,7 +4685,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 
 	skb_queue_tail(&sk->sk_error_queue, skb);
 	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	return 0;
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
@@ -4716,7 +4716,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 		sk->sk_err = 0;
 
 	if (skb_next)
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 
 	return skb;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index a2337b37eba6..c30f8f4cbb22 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -331,6 +331,12 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(__sk_backlog_rcv);
 
+void sk_error_report(struct sock *sk)
+{
+	sk->sk_error_report(sk);
+}
+EXPORT_SYMBOL(sk_error_report);
+
 static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
 {
 	struct __kernel_sock_timeval tv;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f81c1df761d3..0ea29270d7e5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -329,7 +329,7 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
 			__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
 			sk->sk_err = err;
 
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 
 			dccp_done(sk);
 		} else
@@ -356,7 +356,7 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
 	inet = inet_sk(sk);
 	if (!sock_owned_by_user(sk) && inet->recverr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	} else /* Only an error on timeout */
 		sk->sk_err_soft = err;
 out:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6f5304db5a67..fa663518fa0e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -172,7 +172,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			 * Wake people up to see the error
 			 * (see connect in sock.c)
 			 */
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 			dccp_done(sk);
 		} else
 			sk->sk_err_soft = err;
@@ -181,7 +181,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	if (!sock_owned_by_user(sk) && np->recverr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	} else
 		sk->sk_err_soft = err;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6d705d90c614..7eb0fb231940 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -302,7 +302,7 @@ int dccp_disconnect(struct sock *sk, int flags)
 
 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 	return 0;
 }
 
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index db768f223ef7..27a3b37acd2e 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -20,7 +20,7 @@ int  sysctl_dccp_retries2		__read_mostly = TCP_RETR2;
 static void dccp_write_err(struct sock *sk)
 {
 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 
 	dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
 	dccp_done(sk);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 95a718397fd1..1e44a43acfe2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -573,7 +573,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
 		}
 	}
 	sk->sk_err = err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 out:
 	sock_put(sk);
 }
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 50a73178d63a..bb446e60cf58 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -280,7 +280,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
 
 	if (inet->recverr || harderr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	}
 }
 
@@ -929,7 +929,7 @@ int raw_abort(struct sock *sk, int err)
 	lock_sock(sk);
 
 	sk->sk_err = err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 	__udp_disconnect(sk, 0);
 
 	release_sock(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0e3f0e0e5b51..a0a96eb826c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3059,7 +3059,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 		sk->sk_frag.offset = 0;
 	}
 
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 	return 0;
 }
 EXPORT_SYMBOL(tcp_disconnect);
@@ -4448,7 +4448,7 @@ int tcp_abort(struct sock *sk, int err)
 		sk->sk_err = err;
 		/* This barrier is coupled with smp_rmb() in tcp_poll() */
 		smp_wmb();
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 		if (tcp_need_reset(sk->sk_state))
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 		tcp_done(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7d5e59f688de..e6ca5a1f3b59 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4270,7 +4270,7 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
 	tcp_done(sk);
 
 	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 }
 
 /*
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6cb8e269f1ab..e66ad6bfe808 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -585,7 +585,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
 
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 
 			tcp_done(sk);
 		} else {
@@ -613,7 +613,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
 	inet = inet_sk(sk);
 	if (!sock_owned_by_user(sk) && inet->recverr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	} else	{ /* Only an error on timeout */
 		sk->sk_err_soft = err;
 	}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 56b9d648f054..20cf4a98c69d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -68,7 +68,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
 static void tcp_write_err(struct sock *sk)
 {
 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 
 	tcp_write_queue_purge(sk);
 	tcp_done(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1307ad0d3b9e..f86ccbf7c135 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -776,7 +776,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
 
 	sk->sk_err = err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 out:
 	return 0;
 }
@@ -2867,7 +2867,7 @@ int udp_abort(struct sock *sk, int err)
 		goto out;
 
 	sk->sk_err = err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 	__udp_disconnect(sk, 0);
 
 out:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index bf3646b57c68..60f1e4f5be5a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -354,7 +354,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
 
 	if (np->recverr || harderr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	}
 }
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4d71464094b3..578ab6305c3f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -467,7 +467,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
-			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
+			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
 
 			tcp_done(sk);
 		} else
@@ -486,7 +486,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	if (!sock_owned_by_user(sk) && np->recverr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	} else
 		sk->sk_err_soft = err;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3fcd86f4dfdc..368972dbd919 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -610,7 +610,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	}
 
 	sk->sk_err = err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 out:
 	return 0;
 }
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 6201965bd822..11a715d76a4f 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -47,7 +47,7 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
 static void report_csk_error(struct sock *csk, int err)
 {
 	csk->sk_err = EPIPE;
-	csk->sk_error_report(csk);
+	sk_error_report(csk);
 }
 
 static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d55f4ef736a5..706a26a1b0fe 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1240,7 +1240,7 @@ void __mptcp_error_report(struct sock *sk)
 
 		/* This barrier is coupled with smp_rmb() in mptcp_poll() */
 		smp_wmb();
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 		break;
 	}
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6133e412b948..d233ac4a91b6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -351,7 +351,7 @@ static void netlink_overrun(struct sock *sk)
 		if (!test_and_set_bit(NETLINK_S_CONGESTED,
 				      &nlk_sk(sk)->state)) {
 			sk->sk_err = ENOBUFS;
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		}
 	}
 	atomic_inc(&sk->sk_drops);
@@ -1576,7 +1576,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
 	}
 
 	sk->sk_err = p->code;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 out:
 	return ret;
 }
@@ -2012,7 +2012,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		ret = netlink_dump(sk);
 		if (ret) {
 			sk->sk_err = -ret;
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 		}
 	}
 
@@ -2439,7 +2439,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 	skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
 	if (!skb) {
 		NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
-		NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
+		sk_error_report(NETLINK_CB(in_skb).sk);
 		return;
 	}
 
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 5f1d438a0a23..5e39640becdb 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -49,7 +49,7 @@ static void rawsock_report_error(struct sock *sk, int err)
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
 	sk->sk_err = -err;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 
 	rawsock_write_queue_purge(sk);
 }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 77b0cdab3810..77476184741d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3206,7 +3206,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
 	} else {
 		sk->sk_err = ENETDOWN;
 		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_error_report(sk);
+			sk_error_report(sk);
 	}
 
 out_unlock:
@@ -4103,7 +4103,7 @@ static int packet_notifier(struct notifier_block *this,
 					__unregister_prot_hook(sk, false);
 					sk->sk_err = ENETDOWN;
 					if (!sock_flag(sk, SOCK_DEAD))
-						sk->sk_error_report(sk);
+						sk_error_report(sk);
 				}
 				if (msg == NETDEV_UNREGISTER) {
 					packet_cached_dev_reset(po);
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index f2efaa4225f9..e6f4a6202f82 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -751,7 +751,7 @@ static void qrtr_reset_ports(void)
 	xa_for_each_start(&qrtr_ports, index, ipc, 1) {
 		sock_hold(&ipc->sk);
 		ipc->sk.sk_err = ENETRESET;
-		ipc->sk.sk_error_report(&ipc->sk);
+		sk_error_report(&ipc->sk);
 		sock_put(&ipc->sk);
 	}
 	rcu_read_unlock();
diff --git a/net/sctp/input.c b/net/sctp/input.c
index fe6429cc012f..76dcc137f761 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -593,7 +593,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
 	}
 	if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	} else {  /* Only an error on timeout */
 		sk->sk_err_soft = err;
 	}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 05f81a4d0ee7..d041bed86322 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -152,7 +152,7 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
 	icmpv6_err_convert(type, code, &err);
 	if (!sock_owned_by_user(sk) && np->recverr) {
 		sk->sk_err = err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	} else {
 		sk->sk_err_soft = err;
 	}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e41fdac606d4..898389611ae8 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2218,7 +2218,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 						   optval, optlen);
 	if (smc->clcsock->sk->sk_err) {
 		sk->sk_err = smc->clcsock->sk->sk_err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	}
 
 	if (optlen < sizeof(int))
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b3815c1e8f2e..9c0343568d2a 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -58,7 +58,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
 
 		/* Report an error on the lower socket */
 		sk->sk_err = -err;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 	}
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 58c2f318b0a8..23c92ad15c61 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -491,7 +491,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 		 */
 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 			other->sk_err = ECONNRESET;
-			other->sk_error_report(other);
+			sk_error_report(other);
 		}
 	}
 }
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 21ccf450e249..9f12da1ff406 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1281,7 +1281,7 @@ static void vsock_connect_timeout(struct work_struct *work)
 	    (sk->sk_shutdown != SHUTDOWN_MASK)) {
 		sk->sk_state = TCP_CLOSE;
 		sk->sk_err = ETIMEDOUT;
-		sk->sk_error_report(sk);
+		sk_error_report(sk);
 		vsock_transport_cancel_pkt(vsk);
 	}
 	release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ed1664e7bd88..e0c2c992ad9c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -360,7 +360,7 @@ static void virtio_vsock_reset_sock(struct sock *sk)
 	lock_sock(sk);
 	sk->sk_state = TCP_CLOSE;
 	sk->sk_err = ECONNRESET;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 	release_sock(sk);
 }
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index f014ccfdd9c2..169ba8b72a63 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1007,7 +1007,7 @@ destroy:
 	virtio_transport_reset(vsk, pkt);
 	sk->sk_state = TCP_CLOSE;
 	sk->sk_err = skerr;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 	return err;
 }
 
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index e617ed93f06b..7aef34e32bdf 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -831,7 +831,7 @@ static void vmci_transport_handle_detach(struct sock *sk)
 
 				sk->sk_state = TCP_CLOSE;
 				sk->sk_err = ECONNRESET;
-				sk->sk_error_report(sk);
+				sk_error_report(sk);
 				return;
 			}
 			sk->sk_state = TCP_CLOSE;
@@ -1365,7 +1365,7 @@ destroy:
 
 	sk->sk_state = TCP_CLOSE;
 	sk->sk_err = skerr;
-	sk->sk_error_report(sk);
+	sk_error_report(sk);
 	return err;
 }
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 996da915f520..d6b500dc4208 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1313,7 +1313,7 @@ static int xsk_notifier(struct notifier_block *this,
 			if (xs->dev == dev) {
 				sk->sk_err = ENETDOWN;
 				if (!sock_flag(sk, SOCK_DEAD))
-					sk->sk_error_report(sk);
+					sk_error_report(sk);
 
 				xsk_unbind_dev(xs);
 

From e6a3e4434000de5c36d606e5b5da5f7ba49444bd Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Sun, 27 Jun 2021 18:48:22 -0400
Subject: [PATCH 3263/3292] net: sock: add trace for socket errors

This patch will add tracers to trace inet socket errors only. A user
space monitor application can track connection errors indepedent from
socket lifetime and do additional handling. For example a cluster
manager can fence a node if errors occurs in a specific heuristic.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/sock.h | 60 +++++++++++++++++++++++++++++++++++++
 net/core/sock.c             | 10 +++++++
 2 files changed, 70 insertions(+)

diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index a966d4b5ab37..12c315782766 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -201,6 +201,66 @@ TRACE_EVENT(inet_sock_set_state,
 			show_tcp_state_name(__entry->newstate))
 );
 
+TRACE_EVENT(inet_sk_error_report,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk),
+
+	TP_STRUCT__entry(
+		__field(int, error)
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u16, family)
+		__field(__u16, protocol)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+	),
+
+	TP_fast_assign(
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->error = sk->sk_err;
+		__entry->family = sk->sk_family;
+		__entry->protocol = sk->sk_protocol;
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = inet->inet_saddr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 =  inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (sk->sk_family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			*pin6 = sk->sk_v6_rcv_saddr;
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			*pin6 = sk->sk_v6_daddr;
+		} else
+#endif
+		{
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+		}
+	),
+
+	TP_printk("family=%s protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c error=%d",
+		  show_family_name(__entry->family),
+		  show_inet_protocol_name(__entry->protocol),
+		  __entry->sport, __entry->dport,
+		  __entry->saddr, __entry->daddr,
+		  __entry->saddr_v6, __entry->daddr_v6,
+		  __entry->error)
+);
+
 #endif /* _TRACE_SOCK_H */
 
 /* This part must be outside protection */
diff --git a/net/core/sock.c b/net/core/sock.c
index c30f8f4cbb22..ba1c0f75cd45 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -334,6 +334,16 @@ EXPORT_SYMBOL(__sk_backlog_rcv);
 void sk_error_report(struct sock *sk)
 {
 	sk->sk_error_report(sk);
+
+	switch (sk->sk_family) {
+	case AF_INET:
+		fallthrough;
+	case AF_INET6:
+		trace_inet_sk_error_report(sk);
+		break;
+	default:
+		break;
+	}
 }
 EXPORT_SYMBOL(sk_error_report);
 

From 8602e40fc8132383298f304ae060d80f210be23c Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Mon, 28 Jun 2021 11:25:33 -0700
Subject: [PATCH 3264/3292] ptp: Set lookup cookie when creating a PTP PPS
 source.

When creating a PTP device, the configuration block allows
creation of an associated PPS device.  However, there isn't
any way to associate the two devices after creation.

Set the PPS cookie, so pps_lookup_dev(ptp) performs correctly.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 841d8900504d..a23a37a4d5dc 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -218,6 +218,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 			pr_err("failed to create ptp aux_worker %d\n", err);
 			goto kworker_err;
 		}
+		ptp->pps_source->lookup_cookie = ptp;
 	}
 
 	err = ptp_populate_pin_groups(ptp);

From 23ac0b421674fba943dd131e66b81ed7f3fb3d1d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 03:29:25 +0300
Subject: [PATCH 3265/3292] net: use netdev_info in ndo_dflt_fdb_{add,del}

Use the more modern printk helper for network interfaces, which also
contains information about the associated struct device, and results in
overall shorter line lengths compared to printing an open-coded
dev->name.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 745965e49f78..ab11c9d5002b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3947,12 +3947,12 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
 	 * implement its own handler for this.
 	 */
 	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
-		pr_info("%s: FDB only supports static addresses\n", dev->name);
+		netdev_info(dev, "FDB only supports static addresses\n");
 		return err;
 	}
 
 	if (vid) {
-		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+		netdev_info(dev, "vlans aren't supported yet for dev_uc|mc_add()\n");
 		return err;
 	}
 
@@ -4086,7 +4086,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
 	 * implement its own handler for this.
 	 */
 	if (!(ndm->ndm_state & NUD_PERMANENT)) {
-		pr_info("%s: FDB only supports static addresses\n", dev->name);
+		netdev_info(dev, "FDB only supports static addresses\n");
 		return err;
 	}
 

From 78ecc8903de2adf0387cbf06e5befe29c23f2739 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Jun 2021 03:29:26 +0300
Subject: [PATCH 3266/3292] net: say "local" instead of "static" addresses in
 ndo_dflt_fdb_{add,del}

"Static" is a loaded word, and probably not what the author meant when
the code was written.

In particular, this looks weird:
$ bridge fdb add dev swp0 00:01:02:03:04:05 local        # totally fine, but
$ bridge fdb add dev swp0 00:01:02:03:04:05 static
[ 2020.708298] swp0: FDB only supports static addresses  # hmm what?

By looking at the implementation which uses dev_uc_add/dev_uc_del it is
absolutely clear that only local addresses are supported, and the proper
Network Unreachability Detection state is being used for this purpose
(user space indeed sets NUD_PERMANENT when local addresses are meant).
So it is just the message that is wrong, fix it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ab11c9d5002b..f6af3e74fc44 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3947,7 +3947,7 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
 	 * implement its own handler for this.
 	 */
 	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
-		netdev_info(dev, "FDB only supports static addresses\n");
+		netdev_info(dev, "default FDB implementation only supports local addresses\n");
 		return err;
 	}
 
@@ -4086,7 +4086,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
 	 * implement its own handler for this.
 	 */
 	if (!(ndm->ndm_state & NUD_PERMANENT)) {
-		netdev_info(dev, "FDB only supports static addresses\n");
+		netdev_info(dev, "default FDB implementation only supports local addresses\n");
 		return err;
 	}
 

From 5a9b876e9d76810536bac70c78d961198612919c Mon Sep 17 00:00:00 2001
From: Ling Pei Lee <pei.lee.ling@intel.com>
Date: Tue, 29 Jun 2021 11:08:57 +0800
Subject: [PATCH 3267/3292] net: stmmac: option to enable PHY WOL with PMT
 enabled

The current stmmac driver WOL implementation will enable MAC WOL
if MAC HW PMT feature is on. Else, the driver will check for
PHY WOL support. There is another case where MAC HW PMT is
enabled but the platform still goes for the PHY WOL option.
E.g, Intel platform are designed for PHY WOL but not MAC WOL
although HW MAC PMT features are enabled.

Introduce use_phy_wol platform data to select PHY WOL
instead of depending on HW PMT features. Set use_phy_wol
will disable the plat->pmt which currently used to
determine the system to wake up by MAC WOL or PHY WOL.

Signed-off-by: Ling Pei Lee <pei.lee.ling@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++-
 include/linux/stmmac.h                            | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 219535ab2c0c..8d9d6ecf8c63 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6529,7 +6529,8 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 		 * register (if supported).
 		 */
 		priv->plat->enh_desc = priv->dma_cap.enh_desc;
-		priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up;
+		priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up &&
+				!priv->plat->use_phy_wol;
 		priv->hw->pmt = priv->plat->pmt;
 		if (priv->dma_cap.hash_tb_sz) {
 			priv->hw->multicast_filter_bins =
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 3867980d1447..d5ae621d66ba 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -265,5 +265,6 @@ struct plat_stmmacenet_data {
 	int msi_sfty_ue_vec;
 	int msi_rx_base_vec;
 	int msi_tx_base_vec;
+	bool use_phy_wol;
 };
 #endif

From 945beb7556334166900508fab7e4f50fcd233593 Mon Sep 17 00:00:00 2001
From: Ling Pei Lee <pei.lee.ling@intel.com>
Date: Tue, 29 Jun 2021 11:08:58 +0800
Subject: [PATCH 3268/3292] stmmac: intel: Enable PHY WOL option in EHL

Enable PHY Wake On LAN in Intel EHL Intel platform.
PHY Wake on LAN option is enabled due to
Intel EHL Intel platform is designed for
PHY Wake On LAN but not MAC Wake On LAN.

Signed-off-by: Ling Pei Lee <pei.lee.ling@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index e0a7d2b17921..689bcd947f7a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -593,6 +593,7 @@ static int ehl_common_data(struct pci_dev *pdev,
 	plat->rx_queues_to_use = 8;
 	plat->tx_queues_to_use = 8;
 	plat->clk_ptp_rate = 200000000;
+	plat->use_phy_wol = 1;
 
 	plat->safety_feat_cfg->tsoee = 1;
 	plat->safety_feat_cfg->mrxpee = 1;

From 1dd53a61488d3fd916967fa334e95866637b0b2a Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Tue, 29 Jun 2021 11:08:59 +0800
Subject: [PATCH 3269/3292] stmmac: intel: set PCI_D3hot in suspend

During suspend, set the Intel mgbe to D3hot state
to save power consumption.

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Ling Pei Lee <pei.lee.ling@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 689bcd947f7a..8e8778cfbbad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -1118,6 +1118,7 @@ static int __maybe_unused intel_eth_pci_suspend(struct device *dev)
 		return ret;
 
 	pci_wake_from_d3(pdev, true);
+	pci_set_power_state(pdev, PCI_D3hot);
 	return 0;
 }
 

From ecd89c02da85f724a2d24bc5a7e28043cc24b5d7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 29 Jun 2021 11:25:13 +0300
Subject: [PATCH 3270/3292] gve: DQO: Fix off by one in gve_rx_dqo()

The rx->dqo.buf_states[] array is allocated in gve_rx_alloc_ring_dqo()
and it has rx->dqo.num_buf_states so this > needs to >= to prevent an
out of bounds access.

Fixes: 9b8dd5e5ea48 ("gve: DQO: Add RX path")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_rx_dqo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 8738db020061..77bb8227f89b 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -525,7 +525,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
 	struct gve_priv *priv = rx->gve;
 	u16 buf_len;
 
-	if (unlikely(buffer_id > rx->dqo.num_buf_states)) {
+	if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
 		net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
 				    priv->dev->name, buffer_id);
 		return -EINVAL;

From 6706721d82f86e9360c3ad5339fe3da5e0988a51 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 29 Jun 2021 06:52:13 -0700
Subject: [PATCH 3271/3292] tcp_yeah: check struct yeah size at compile time

Compiler can perform the sanity check instead of waiting
to load the module and crash the host.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_yeah.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 3bb448761ca3..07c4c93b9fdb 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -221,7 +221,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
 
 static int __init tcp_yeah_register(void)
 {
-	BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
 	tcp_register_congestion_control(&tcp_yeah);
 	return 0;
 }

From 3f8ad50a9e43b6a59070e6c9c5eec79626f81095 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 29 Jun 2021 06:53:14 -0700
Subject: [PATCH 3272/3292] tcp: change ICSK_CA_PRIV_SIZE definition

Instead of a magic number (13 currently) and having
to change it every other year, use sizeof_field() macro.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 3c8c59471bc1..b06c2d02ec84 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -135,7 +135,7 @@ struct inet_connection_sock {
 	u32			  icsk_user_timeout;
 
 	u64			  icsk_ca_priv[104 / sizeof(u64)];
-#define ICSK_CA_PRIV_SIZE      (13 * sizeof(u64))
+#define ICSK_CA_PRIV_SIZE	  sizeof_field(struct inet_connection_sock, icsk_ca_priv)
 };
 
 #define ICSK_TIME_RETRANS	1	/* Retransmit timer */

From fca41af18e10318e4de090db47d9fa7169e1bf2f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 11 Feb 2021 12:42:58 -0700
Subject: [PATCH 3273/3292] qemu_fw_cfg: Make fw_cfg_rev_attr a proper
 kobj_attribute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fw_cfg_showrev() is called by an indirect call in kobj_attr_show(),
which violates clang's CFI checking because fw_cfg_showrev()'s second
parameter is 'struct attribute', whereas the ->show() member of 'struct
kobj_structure' expects the second parameter to be of type 'struct
kobj_attribute'.

$ cat /sys/firmware/qemu_fw_cfg/rev
3

$ dmesg | grep "CFI failure"
[   26.016832] CFI failure (target: fw_cfg_showrev+0x0/0x8):

Fix this by converting fw_cfg_rev_attr to 'struct kobj_attribute' where
this would have been caught automatically by the incompatible pointer
types compiler warning. Update fw_cfg_showrev() accordingly.

Fixes: 75f3e8e47f38 ("firmware: introduce sysfs driver for QEMU's fw_cfg device")
Link: https://github.com/ClangBuiltLinux/linux/issues/1299
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210211194258.4137998-1-nathan@kernel.org
---
 drivers/firmware/qemu_fw_cfg.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0078260fbabe..172c751a4f6c 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -299,15 +299,13 @@ static int fw_cfg_do_platform_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static ssize_t fw_cfg_showrev(struct kobject *k, struct attribute *a, char *buf)
+static ssize_t fw_cfg_showrev(struct kobject *k, struct kobj_attribute *a,
+			      char *buf)
 {
 	return sprintf(buf, "%u\n", fw_cfg_rev);
 }
 
-static const struct {
-	struct attribute attr;
-	ssize_t (*show)(struct kobject *k, struct attribute *a, char *buf);
-} fw_cfg_rev_attr = {
+static const struct kobj_attribute fw_cfg_rev_attr = {
 	.attr = { .name = "rev", .mode = S_IRUSR },
 	.show = fw_cfg_showrev,
 };

From d578b99443fde0968246cc7cbf3bc3016123c2f4 Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@nutanix.com>
Date: Fri, 11 Jun 2021 14:02:08 +0000
Subject: [PATCH 3274/3292] ext4: notify sysfs on errors_count value change

After s_error_count is incremented, signal the change in the
corresponding sysfs attribute via sysfs_notify. This allows userspace to
poll() on changes to /sys/fs/ext4/*/errors_count.

[ Moved call of ext4_notify_error_sysfs() to flush_stashed_error_work()
  to avoid BUG's caused by calling sysfs_notify trying to sleep after
  being called from an invalid context. -- TYT ]

Signed-off-by: Jonathan Davies <jonathan.davies@nutanix.com>
Link: https://lore.kernel.org/r/20210611140209.28903-1-jonathan.davies@nutanix.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h  | 1 +
 fs/ext4/super.c | 2 ++
 fs/ext4/sysfs.c | 5 +++++
 3 files changed, 8 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8ff4ae3b5715..3c51e243450d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3624,6 +3624,7 @@ extern const struct inode_operations ext4_symlink_inode_operations;
 extern const struct inode_operations ext4_fast_symlink_inode_operations;
 
 /* sysfs.c */
+extern void ext4_notify_error_sysfs(struct ext4_sb_info *sbi);
 extern int ext4_register_sysfs(struct super_block *sb);
 extern void ext4_unregister_sysfs(struct super_block *sb);
 extern int __init ext4_init_sysfs(void);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9e0fb798b807..20344633bdd9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -718,6 +718,7 @@ static void flush_stashed_error_work(struct work_struct *work)
 			goto write_directly;
 		}
 		jbd2_journal_stop(handle);
+		ext4_notify_error_sysfs(sbi);
 		return;
 	}
 write_directly:
@@ -726,6 +727,7 @@ write_directly:
 	 * out and hope for the best.
 	 */
 	ext4_commit_super(sbi->s_sb);
+	ext4_notify_error_sysfs(sbi);
 }
 
 #define ext4_error_ratelimit(sb)					\
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 55fcab60a59a..2314f7446592 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -506,6 +506,11 @@ static struct kobj_type ext4_feat_ktype = {
 	.release	= (void (*)(struct kobject *))kfree,
 };
 
+void ext4_notify_error_sysfs(struct ext4_sb_info *sbi)
+{
+	sysfs_notify(&sbi->s_kobj, NULL, "errors_count");
+}
+
 static struct kobject *ext4_root;
 
 static struct kobject *ext4_feat;

From 16aa4c9a1fbe763c147a964cdc1f5be8ed98ed13 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Wed, 30 Jun 2021 16:36:38 +0800
Subject: [PATCH 3275/3292] jbd2: export jbd2_journal_[un]register_shrinker()

Export jbd2_journal_[un]register_shrinker() to fix this error when
ext4 is built as a module:

  ERROR: modpost: "jbd2_journal_unregister_shrinker" undefined!
  ERROR: modpost: "jbd2_journal_register_shrinker" undefined!

Fixes: 4ba3fcdde7e3 ("jbd2,ext4: add a shrinker to release checkpointed buffers")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210630083638.140218-1-yi.zhang@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 7c52feb6f753..152880c298ca 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2122,6 +2122,7 @@ int jbd2_journal_register_shrinker(journal_t *journal)
 
 	return 0;
 }
+EXPORT_SYMBOL(jbd2_journal_register_shrinker);
 
 /**
  * jbd2_journal_unregister_shrinker()
@@ -2134,6 +2135,7 @@ void jbd2_journal_unregister_shrinker(journal_t *journal)
 	percpu_counter_destroy(&journal->j_jh_shrink_count);
 	unregister_shrinker(&journal->j_shrinker);
 }
+EXPORT_SYMBOL(jbd2_journal_unregister_shrinker);
 
 /**
  * jbd2_journal_destroy() - Release a journal_t structure.

From 948e19479cb649587165243c6cc12d113c9cbbe0 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 24 Jun 2021 15:09:55 +0100
Subject: [PATCH 3276/3292] io_uring: don't change sqpoll creds if not needed

SQPOLL doesn't need to change creds if it's not submitting requests.
Move creds overriding into __io_sq_thread() after checking if there are
SQEs pending.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/c54368da2357ac539e0a333f7cfff70d5fb045b2.1624543113.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e7997f9bf879..0cfbdee5da1a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6803,6 +6803,10 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 
 	if (!list_empty(&ctx->iopoll_list) || to_submit) {
 		unsigned nr_events = 0;
+		const struct cred *creds = NULL;
+
+		if (ctx->sq_creds != current_cred())
+			creds = override_creds(ctx->sq_creds);
 
 		mutex_lock(&ctx->uring_lock);
 		if (!list_empty(&ctx->iopoll_list))
@@ -6819,6 +6823,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
 
 		if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
 			wake_up(&ctx->sqo_sq_wait);
+		if (creds)
+			revert_creds(creds);
 	}
 
 	return ret;
@@ -6870,7 +6876,6 @@ static int io_sq_thread(void *data)
 
 	mutex_lock(&sqd->lock);
 	while (1) {
-		int ret;
 		bool cap_entries, sqt_spin, needs_sched;
 
 		if (io_sqd_events_pending(sqd) || signal_pending(current)) {
@@ -6883,13 +6888,8 @@ static int io_sq_thread(void *data)
 		sqt_spin = false;
 		cap_entries = !list_is_singular(&sqd->ctx_list);
 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
-			const struct cred *creds = NULL;
+			int ret = __io_sq_thread(ctx, cap_entries);
 
-			if (ctx->sq_creds != current_cred())
-				creds = override_creds(ctx->sq_creds);
-			ret = __io_sq_thread(ctx, cap_entries);
-			if (creds)
-				revert_creds(creds);
 			if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
 				sqt_spin = true;
 		}

From 1a924a808208c1880ef9f36b6bf98d27af045f06 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 24 Jun 2021 15:09:56 +0100
Subject: [PATCH 3277/3292] io_uring: refactor io_sq_thread()

Move needs_sched declaration into the block where it's used, so it's
harder to misuse/wrongfully reuse.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e4a07db1353ee38b924dd1b45394cf8e746130b4.1624543113.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0cfbdee5da1a..8b69982aa9e2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6876,7 +6876,7 @@ static int io_sq_thread(void *data)
 
 	mutex_lock(&sqd->lock);
 	while (1) {
-		bool cap_entries, sqt_spin, needs_sched;
+		bool cap_entries, sqt_spin = false;
 
 		if (io_sqd_events_pending(sqd) || signal_pending(current)) {
 			if (io_sqd_handle_event(sqd))
@@ -6885,7 +6885,6 @@ static int io_sq_thread(void *data)
 			continue;
 		}
 
-		sqt_spin = false;
 		cap_entries = !list_is_singular(&sqd->ctx_list);
 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
 			int ret = __io_sq_thread(ctx, cap_entries);
@@ -6904,7 +6903,8 @@ static int io_sq_thread(void *data)
 
 		prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
 		if (!io_sqd_events_pending(sqd) && !io_run_task_work()) {
-			needs_sched = true;
+			bool needs_sched = true;
+
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
 				io_ring_set_wakeup_flag(ctx);
 

From fe7e325750299126b9cc86d3071af594b46c4518 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 24 Jun 2021 15:09:57 +0100
Subject: [PATCH 3278/3292] io_uring: fix code style problems

Fix a bunch of problems mostly found by checkpatch.pl

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/cfaf9a2f27b43934144fe9422a916bd327099f44.1624543113.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8b69982aa9e2..7639bf3627c8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -173,7 +173,7 @@ struct io_rings {
 	 * Written by the application, shouldn't be modified by the
 	 * kernel.
 	 */
-	u32                     cq_flags;
+	u32			cq_flags;
 	/*
 	 * Number of completion events lost because the queue was full;
 	 * this should be avoided by the application by making sure
@@ -857,7 +857,7 @@ struct io_kiocb {
 	struct hlist_node		hash_node;
 	struct async_poll		*apoll;
 	struct io_wq_work		work;
-	const struct cred 		*creds;
+	const struct cred		*creds;
 
 	/* store used ubuf, so we can prevent reloading */
 	struct io_mapped_ubuf		*imu;
@@ -1707,7 +1707,7 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
 {
 	struct io_submit_state *state = &ctx->submit_state;
 
-	BUILD_BUG_ON(IO_REQ_ALLOC_BATCH > ARRAY_SIZE(state->reqs));
+	BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH);
 
 	if (!state->free_reqs) {
 		gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
@@ -2769,7 +2769,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 	else
 		io_rw_done(kiocb, ret);
 
-	if (check_reissue && req->flags & REQ_F_REISSUE) {
+	if (check_reissue && (req->flags & REQ_F_REISSUE)) {
 		req->flags &= ~REQ_F_REISSUE;
 		if (io_resubmit_prep(req)) {
 			req_ref_get(req);
@@ -3591,7 +3591,7 @@ static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
 static int __io_splice_prep(struct io_kiocb *req,
 			    const struct io_uring_sqe *sqe)
 {
-	struct io_splice* sp = &req->splice;
+	struct io_splice *sp = &req->splice;
 	unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -3645,7 +3645,7 @@ static int io_tee(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	struct io_splice* sp = &req->splice;
+	struct io_splice *sp = &req->splice;
 
 	sp->off_in = READ_ONCE(sqe->splice_off_in);
 	sp->off_out = READ_ONCE(sqe->off);
@@ -8567,6 +8567,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
 	ctx->cq_ev_fd = eventfd_ctx_fdget(fd);
 	if (IS_ERR(ctx->cq_ev_fd)) {
 		int ret = PTR_ERR(ctx->cq_ev_fd);
+
 		ctx->cq_ev_fd = NULL;
 		return ret;
 	}
@@ -9347,9 +9348,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		io_cqring_overflow_flush(ctx, false);
 
 		ret = -EOWNERDEAD;
-		if (unlikely(ctx->sq_data->thread == NULL)) {
+		if (unlikely(ctx->sq_data->thread == NULL))
 			goto out;
-		}
 		if (flags & IORING_ENTER_SQ_WAKEUP)
 			wake_up(&ctx->sq_data->wait);
 		if (flags & IORING_ENTER_SQ_WAIT) {

From 16340eab61a3ed1b5c983c19cfa9f51929b2beeb Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 24 Jun 2021 15:09:58 +0100
Subject: [PATCH 3279/3292] io_uring: update sqe layout build checks

Add missing BUILD_BUG_SQE_ELEM() for ->buf_group verifying that SQE
layout doesn't change.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1f9d21bd74599b856b3a632be4c23ffa184a3ef0.1624543113.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7639bf3627c8..fe543d639ab0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -10267,6 +10267,7 @@ static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(28, __u32,  splice_flags);
 	BUILD_BUG_SQE_ELEM(32, __u64,  user_data);
 	BUILD_BUG_SQE_ELEM(40, __u16,  buf_index);
+	BUILD_BUG_SQE_ELEM(40, __u16,  buf_group);
 	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
 	BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
 
@@ -10279,6 +10280,7 @@ static int __init io_uring_init(void)
 
 	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
 	BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
+
 	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
 				SLAB_ACCOUNT);
 	return 0;

From 9ba6a1c06279ce499fcf755d8134d679a1f3b4ed Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 24 Jun 2021 15:09:59 +0100
Subject: [PATCH 3280/3292] io_uring: simplify struct io_uring_sqe layout

Flatten struct io_uring_sqe, the last union is exactly 64B, so move them
out of union { struct { ... }}, and decrease __pad2 size.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/2e21ef7aed136293d654450bc3088973a8adc730.1624543113.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index f1f9ac114b51..79126d5cd289 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -46,21 +46,17 @@ struct io_uring_sqe {
 		__u32		unlink_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
+	/* pack this to avoid bogus arm OABI complaints */
 	union {
-		struct {
-			/* pack this to avoid bogus arm OABI complaints */
-			union {
-				/* index into fixed buffers, if used */
-				__u16	buf_index;
-				/* for grouped buffer selection */
-				__u16	buf_group;
-			} __attribute__((packed));
-			/* personality to use, if used */
-			__u16	personality;
-			__s32	splice_fd_in;
-		};
-		__u64	__pad2[3];
-	};
+		/* index into fixed buffers, if used */
+		__u16	buf_index;
+		/* for grouped buffer selection */
+		__u16	buf_group;
+	} __attribute__((packed));
+	/* personality to use, if used */
+	__u16	personality;
+	__s32	splice_fd_in;
+	__u64	__pad2[2];
 };
 
 enum {

From 12dcb58ac785ee678f577e1502d966b538375aae Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 24 Jun 2021 15:10:00 +0100
Subject: [PATCH 3281/3292] io_uring: refactor io_openat2()

Put do_filp_open() fail path of io_openat2() under a single if,
deduplicating put_unused_fd(), making it look better and helping
the hot path.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f4c84d25c049d0af2adc19c703bbfef607200209.1624543113.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fe543d639ab0..d7a68a2f2ec6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3851,27 +3851,26 @@ static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
 		goto err;
 
 	file = do_filp_open(req->open.dfd, req->open.filename, &op);
-	/* only retry if RESOLVE_CACHED wasn't already set by application */
-	if ((!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK)) &&
-	    file == ERR_PTR(-EAGAIN)) {
+	if (IS_ERR(file)) {
 		/*
-		 * We could hang on to this 'fd', but seems like marginal
-		 * gain for something that is now known to be a slower path.
-		 * So just put it, and we'll get a new one when we retry.
+		 * We could hang on to this 'fd' on retrying, but seems like
+		 * marginal gain for something that is now known to be a slower
+		 * path. So just put it, and we'll get a new one when we retry.
 		 */
 		put_unused_fd(ret);
-		return -EAGAIN;
+
+		ret = PTR_ERR(file);
+		/* only retry if RESOLVE_CACHED wasn't already set by application */
+		if (ret == -EAGAIN &&
+		    (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK)))
+			return -EAGAIN;
+		goto err;
 	}
 
-	if (IS_ERR(file)) {
-		put_unused_fd(ret);
-		ret = PTR_ERR(file);
-	} else {
-		if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
-			file->f_flags &= ~O_NONBLOCK;
-		fsnotify_open(file);
-		fd_install(ret, file);
-	}
+	if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
+		file->f_flags &= ~O_NONBLOCK;
+	fsnotify_open(file);
+	fd_install(ret, file);
 err:
 	putname(req->open.filename);
 	req->flags &= ~REQ_F_NEED_CLEANUP;

From ed7eb2592286ead7d3bfdf8adf65e65392167cc4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 23 Jun 2021 09:04:13 -0600
Subject: [PATCH 3282/3292] io_uring: add IOPOLL and reserved field checks to
 IORING_OP_RENAMEAT

We can't support IOPOLL with non-pollable request types, and we should
check for unused/reserved fields like we do for other request types.

Fixes: 80a261fd0032 ("io_uring: add support for IORING_OP_RENAMEAT")
Cc: stable@vger.kernel.org
Reported-by: Dmitry Kadashev <dkadashev@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d7a68a2f2ec6..02eda0575c56 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3463,6 +3463,10 @@ static int io_renameat_prep(struct io_kiocb *req,
 	struct io_rename *ren = &req->rename;
 	const char __user *oldf, *newf;
 
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->buf_index)
+		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
 

From 22634bc5620d29765e5199c7b230a372c7ddcda2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 23 Jun 2021 09:07:45 -0600
Subject: [PATCH 3283/3292] io_uring: add IOPOLL and reserved field checks to
 IORING_OP_UNLINKAT

We can't support IOPOLL with non-pollable request types, and we should
check for unused/reserved fields like we do for other request types.

Fixes: 14a1143b68ee ("io_uring: add support for IORING_OP_UNLINKAT")
Cc: stable@vger.kernel.org
Reported-by: Dmitry Kadashev <dkadashev@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 02eda0575c56..a0bb3cc7d3bb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3514,6 +3514,10 @@ static int io_unlinkat_prep(struct io_kiocb *req,
 	struct io_unlink *un = &req->unlink;
 	const char __user *fname;
 
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
+		return -EINVAL;
 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 		return -EBADF;
 

From 59b735aeeb0f23a760bc21f1c5a1ab6c79e9fe0e Mon Sep 17 00:00:00 2001
From: Olivier Langlois <olivier@trillion01.com>
Date: Tue, 22 Jun 2021 05:17:39 -0700
Subject: [PATCH 3284/3292] io_uring: reduce latency by reissueing the
 operation

It is quite frequent that when an operation fails and returns EAGAIN,
the data becomes available between that failure and the call to
vfs_poll() done by io_arm_poll_handler().

Detecting the situation and reissuing the operation is much faster
than going ahead and push the operation to the io-wq.

Performance improvement testing has been performed with:
Single thread, 1 TCP connection receiving a 5 Mbps stream, no sqpoll.

4 measurements have been taken:
1. The time it takes to process a read request when data is already available
2. The time it takes to process by calling twice io_issue_sqe() after vfs_poll() indicated that data was available
3. The time it takes to execute io_queue_async_work()
4. The time it takes to complete a read request asynchronously

2.25% of all the read operations did use the new path.

ready data (baseline)
avg	3657.94182918628
min	580
max	20098
stddev	1213.15975908162

reissue	completion
average	7882.67567567568
min	2316
max	28811
stddev	1982.79172973284

insert io-wq time
average	8983.82276995305
min	3324
max	87816
stddev	2551.60056552038

async time completion
average	24670.4758861127
min	10758
max	102612
stddev	3483.92416873804

Conclusion:
On average reissuing the sqe with the patch code is 1.1uSec faster and
in the worse case scenario 59uSec faster than placing the request on
io-wq

On average completion time by reissuing the sqe with the patch code is
16.79uSec faster and in the worse case scenario 73.8uSec faster than
async completion.

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9e8441419bb1b8f3c3fcc607b2713efecdef2136.1624364038.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a0bb3cc7d3bb..ab30c0a0c09b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5160,7 +5160,13 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
 	return mask;
 }
 
-static bool io_arm_poll_handler(struct io_kiocb *req)
+enum {
+	IO_APOLL_OK,
+	IO_APOLL_ABORTED,
+	IO_APOLL_READY
+};
+
+static int io_arm_poll_handler(struct io_kiocb *req)
 {
 	const struct io_op_def *def = &io_op_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
@@ -5170,22 +5176,22 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	int rw;
 
 	if (!req->file || !file_can_poll(req->file))
-		return false;
+		return IO_APOLL_ABORTED;
 	if (req->flags & REQ_F_POLLED)
-		return false;
+		return IO_APOLL_ABORTED;
 	if (def->pollin)
 		rw = READ;
 	else if (def->pollout)
 		rw = WRITE;
 	else
-		return false;
+		return IO_APOLL_ABORTED;
 	/* if we can't nonblock try, then no point in arming a poll handler */
 	if (!io_file_supports_async(req, rw))
-		return false;
+		return IO_APOLL_ABORTED;
 
 	apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
 	if (unlikely(!apoll))
-		return false;
+		return IO_APOLL_ABORTED;
 	apoll->double_poll = NULL;
 
 	req->flags |= REQ_F_POLLED;
@@ -5211,12 +5217,14 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	if (ret || ipt.error) {
 		io_poll_remove_double(req);
 		spin_unlock_irq(&ctx->completion_lock);
-		return false;
+		if (ret)
+			return IO_APOLL_READY;
+		return IO_APOLL_ABORTED;
 	}
 	spin_unlock_irq(&ctx->completion_lock);
 	trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data,
 				mask, apoll->poll.events);
-	return true;
+	return IO_APOLL_OK;
 }
 
 static bool __io_poll_remove_one(struct io_kiocb *req,
@@ -6445,6 +6453,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
 	struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
 	int ret;
 
+issue_sqe:
 	ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
 
 	/*
@@ -6464,12 +6473,16 @@ static void __io_queue_sqe(struct io_kiocb *req)
 			io_put_req(req);
 		}
 	} else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
-		if (!io_arm_poll_handler(req)) {
+		switch (io_arm_poll_handler(req)) {
+		case IO_APOLL_READY:
+			goto issue_sqe;
+		case IO_APOLL_ABORTED:
 			/*
 			 * Queued up for async execution, worker will release
 			 * submit reference when the iocb is actually submitted.
 			 */
 			io_queue_async_work(req);
+			break;
 		}
 	} else {
 		io_req_complete_failed(req, ret);

From b2d9c3da77115b5172749dec20312651e67e0adf Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 26 Jun 2021 21:40:44 +0100
Subject: [PATCH 3285/3292] io_uring: refactor io_arm_poll_handler()

gcc 11 goes a weird path and duplicates most of io_arm_poll_handler()
for READ and WRITE cases. Help it and move all pollin vs pollout
specific bits under a single if-else, so there is no temptation for this
kind of unfolding.

before vs after:
   text    data     bss     dec     hex filename
  85362   12650       8   98020   17ee4 ./fs/io_uring.o
  85186   12650       8   97844   17e34 ./fs/io_uring.o

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1deea0037293a922a0358e2958384b2e42437885.1624739600.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ab30c0a0c09b..bfba7558ea86 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5172,19 +5172,29 @@ static int io_arm_poll_handler(struct io_kiocb *req)
 	struct io_ring_ctx *ctx = req->ctx;
 	struct async_poll *apoll;
 	struct io_poll_table ipt;
-	__poll_t mask, ret;
+	__poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI;
 	int rw;
 
 	if (!req->file || !file_can_poll(req->file))
 		return IO_APOLL_ABORTED;
 	if (req->flags & REQ_F_POLLED)
 		return IO_APOLL_ABORTED;
-	if (def->pollin)
-		rw = READ;
-	else if (def->pollout)
-		rw = WRITE;
-	else
+	if (!def->pollin && !def->pollout)
 		return IO_APOLL_ABORTED;
+
+	if (def->pollin) {
+		rw = READ;
+		mask |= POLLIN | POLLRDNORM;
+
+		/* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
+		if ((req->opcode == IORING_OP_RECVMSG) &&
+		    (req->sr_msg.msg_flags & MSG_ERRQUEUE))
+			mask &= ~POLLIN;
+	} else {
+		rw = WRITE;
+		mask |= POLLOUT | POLLWRNORM;
+	}
+
 	/* if we can't nonblock try, then no point in arming a poll handler */
 	if (!io_file_supports_async(req, rw))
 		return IO_APOLL_ABORTED;
@@ -5193,23 +5203,8 @@ static int io_arm_poll_handler(struct io_kiocb *req)
 	if (unlikely(!apoll))
 		return IO_APOLL_ABORTED;
 	apoll->double_poll = NULL;
-
-	req->flags |= REQ_F_POLLED;
 	req->apoll = apoll;
-
-	mask = EPOLLONESHOT;
-	if (def->pollin)
-		mask |= POLLIN | POLLRDNORM;
-	if (def->pollout)
-		mask |= POLLOUT | POLLWRNORM;
-
-	/* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
-	if ((req->opcode == IORING_OP_RECVMSG) &&
-	    (req->sr_msg.msg_flags & MSG_ERRQUEUE))
-		mask &= ~POLLIN;
-
-	mask |= POLLERR | POLLPRI;
-
+	req->flags |= REQ_F_POLLED;
 	ipt.pt._qproc = io_async_queue_proc;
 
 	ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,

From dd432ea5204eeb92a2abf246ce518e68679da772 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 26 Jun 2021 21:40:45 +0100
Subject: [PATCH 3286/3292] io_uring: mainstream sqpoll task_work running

task_works are widely used, so place io_run_task_work() directly into
the main path of io_sq_thread(), and remove it from other places where
it's not needed anymore.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/24eb5e35d519c590d3dffbd694b4c61a5fe49029.1624739600.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bfba7558ea86..80b7a6f04841 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6868,7 +6868,6 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
 		cond_resched();
 		mutex_lock(&sqd->lock);
 	}
-	io_run_task_work();
 	return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
 }
 
@@ -6897,7 +6896,6 @@ static int io_sq_thread(void *data)
 			if (io_sqd_handle_event(sqd))
 				break;
 			timeout = jiffies + sqd->sq_thread_idle;
-			continue;
 		}
 
 		cap_entries = !list_is_singular(&sqd->ctx_list);
@@ -6907,9 +6905,10 @@ static int io_sq_thread(void *data)
 			if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
 				sqt_spin = true;
 		}
+		if (io_run_task_work())
+			sqt_spin = true;
 
 		if (sqt_spin || !time_after(jiffies, timeout)) {
-			io_run_task_work();
 			cond_resched();
 			if (sqt_spin)
 				timeout = jiffies + sqd->sq_thread_idle;
@@ -6917,7 +6916,7 @@ static int io_sq_thread(void *data)
 		}
 
 		prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
-		if (!io_sqd_events_pending(sqd) && !io_run_task_work()) {
+		if (!io_sqd_events_pending(sqd) && !current->task_works) {
 			bool needs_sched = true;
 
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {

From e5dc480d4ed9884274e95c757fa2d2e9cc1047ee Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 26 Jun 2021 21:40:46 +0100
Subject: [PATCH 3287/3292] io_uring: remove not needed PF_EXITING check

Since cancellation got moved before exit_signals(), there is no one left
who can call io_run_task_work() with PF_EXIING set, so remove the check.
Note that __io_req_task_submit() still needs a similar check.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f7f305ececb1e6044ea649fb983ca754805bb884.1624739600.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 80b7a6f04841..ce88ad58955a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2235,12 +2235,6 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
 
 static inline bool io_run_task_work(void)
 {
-	/*
-	 * Not safe to run on exiting task, and the task_work handling will
-	 * not add work to such a task.
-	 */
-	if (unlikely(current->flags & PF_EXITING))
-		return false;
 	if (current->task_works) {
 		__set_current_state(TASK_RUNNING);
 		task_work_run();
@@ -9020,7 +9014,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 		ret |= io_cancel_defer_files(ctx, task, cancel_all);
 		ret |= io_poll_remove_all(ctx, task, cancel_all);
 		ret |= io_kill_timeouts(ctx, task, cancel_all);
-		ret |= io_run_task_work();
+		if (task)
+			ret |= io_run_task_work();
 		ret |= io_run_ctx_fallback(ctx);
 		if (!ret)
 			break;

From 4cfb25bf8877c947e5ae4875e387babe87e12afa Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 26 Jun 2021 21:40:47 +0100
Subject: [PATCH 3288/3292] io_uring: optimise hot path restricted checks

Move likely/unlikely from io_check_restriction() to specifically
ctx->restricted check, because doesn't do what it supposed to and make
the common path take an extra jump.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/22bf70d0a543dfc935d7276bdc73081784e30698.1624739600.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ce88ad58955a..a5df65f6f9ab 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6506,7 +6506,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
 					struct io_kiocb *req,
 					unsigned int sqe_flags)
 {
-	if (!ctx->restricted)
+	if (likely(!ctx->restricted))
 		return true;
 
 	if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
@@ -6549,7 +6549,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		return -EINVAL;
 	if (unlikely(req->opcode >= IORING_OP_LAST))
 		return -EINVAL;
-	if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
+	if (!io_check_restriction(ctx, req, sqe_flags))
 		return -EACCES;
 
 	if ((sqe_flags & IOSQE_BUFFER_SELECT) &&

From 5182ed2e332e8e11fa3c1649ef6d6546ccca64d0 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 26 Jun 2021 21:40:48 +0100
Subject: [PATCH 3289/3292] io_uring: refactor io_submit_flush_completions

Don't init req_batch before we actually need it. Also, add a small clean
up for req declaration.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/ad85512e12bd3a20d521e9782750300970e5afc8.1624739600.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a5df65f6f9ab..b1620fbd69eb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2132,22 +2132,22 @@ static void io_submit_flush_completions(struct io_ring_ctx *ctx)
 {
 	struct io_comp_state *cs = &ctx->submit_state.comp;
 	int i, nr = cs->nr;
-	struct io_kiocb *req;
 	struct req_batch rb;
 
-	io_init_req_batch(&rb);
 	spin_lock_irq(&ctx->completion_lock);
 	for (i = 0; i < nr; i++) {
-		req = cs->reqs[i];
+		struct io_kiocb *req = cs->reqs[i];
+
 		__io_cqring_fill_event(ctx, req->user_data, req->result,
 					req->compl.cflags);
 	}
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
-
 	io_cqring_ev_posted(ctx);
+
+	io_init_req_batch(&rb);
 	for (i = 0; i < nr; i++) {
-		req = cs->reqs[i];
+		struct io_kiocb *req = cs->reqs[i];
 
 		/* submission and completion refs */
 		if (req_ref_sub_and_test(req, 2))

From 99ebe4efbd3882422db1fd6a1b477291ea8bdab7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 26 Jun 2021 21:40:49 +0100
Subject: [PATCH 3290/3292] io_uring: pre-initialise some of req fields

Most of requests are allocated from an internal cache, so it's waste of
time fully initialising them every time. Instead, let's pre-init some of
the fields we can during initial allocation (e.g. kmalloc(), see
io_alloc_req()) and keep them valid on request recycling. There are four
of them in this patch:

->ctx is always stays the same
->link is NULL on free, it's an invariant
->result is not even needed to init, just a precaution
->async_data we now clean in io_dismantle_req() as it's likely to
   never be allocated.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/892ba0e71309bba9fe9e0142472330bbf9d8f05d.1624739600.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b1620fbd69eb..b14de92832e1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1711,7 +1711,7 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
 
 	if (!state->free_reqs) {
 		gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
-		int ret;
+		int ret, i;
 
 		if (io_flush_cached_reqs(ctx))
 			goto got_req;
@@ -1729,6 +1729,20 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
 				return NULL;
 			ret = 1;
 		}
+
+		/*
+		 * Don't initialise the fields below on every allocation, but
+		 * do that in advance and keep valid on free.
+		 */
+		for (i = 0; i < ret; i++) {
+			struct io_kiocb *req = state->reqs[i];
+
+			req->ctx = ctx;
+			req->link = NULL;
+			req->async_data = NULL;
+			/* not necessary, but safer to zero */
+			req->result = 0;
+		}
 		state->free_reqs = ret;
 	}
 got_req:
@@ -1752,8 +1766,10 @@ static void io_dismantle_req(struct io_kiocb *req)
 		io_put_file(req->file);
 	if (req->fixed_rsrc_refs)
 		percpu_ref_put(req->fixed_rsrc_refs);
-	if (req->async_data)
+	if (req->async_data) {
 		kfree(req->async_data);
+		req->async_data = NULL;
+	}
 }
 
 /* must to be called somewhat shortly after putting a request */
@@ -6534,15 +6550,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	/* same numerical values with corresponding REQ_F_*, safe to copy */
 	req->flags = sqe_flags = READ_ONCE(sqe->flags);
 	req->user_data = READ_ONCE(sqe->user_data);
-	req->async_data = NULL;
 	req->file = NULL;
-	req->ctx = ctx;
-	req->link = NULL;
 	req->fixed_rsrc_refs = NULL;
 	/* one is dropped after submission, the other at completion */
 	atomic_set(&req->refs, 2);
 	req->task = current;
-	req->result = 0;
 
 	/* enforce forwards compatibility on users */
 	if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))

From 915b3dde9b72cb4f531b04208daafcd0a257b847 Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Mon, 28 Jun 2021 05:37:30 +0800
Subject: [PATCH 3291/3292] io_uring: spin in iopoll() only when reqs are in a
 single queue

We currently spin in iopoll() when requests to be iopolled are for
same file(device), while one device may have multiple hardware queues.
given an example:

hw_queue_0     |    hw_queue_1
req(30us)           req(10us)

If we first spin on iopolling for the hw_queue_0. the avg latency would
be (30us + 30us) / 2 = 30us. While if we do round robin, the avg
latency would be (30us + 10us) / 2 = 20us since we reap the request in
hw_queue_1 in time. So it's better to do spinning only when requests
are in same hardware queue.

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b14de92832e1..67099bb99a02 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -434,7 +434,7 @@ struct io_ring_ctx {
 		struct list_head	iopoll_list;
 		struct hlist_head	*cancel_hash;
 		unsigned		cancel_hash_bits;
-		bool			poll_multi_file;
+		bool			poll_multi_queue;
 	} ____cacheline_aligned_in_smp;
 
 	struct io_restriction		restrictions;
@@ -2314,7 +2314,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 	 * Only spin for completions if we don't have multiple devices hanging
 	 * off our complete list, and we're under the requested amount.
 	 */
-	spin = !ctx->poll_multi_file && *nr_events < min;
+	spin = !ctx->poll_multi_queue && *nr_events < min;
 
 	ret = 0;
 	list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
@@ -2553,14 +2553,22 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
 	 * different devices.
 	 */
 	if (list_empty(&ctx->iopoll_list)) {
-		ctx->poll_multi_file = false;
-	} else if (!ctx->poll_multi_file) {
+		ctx->poll_multi_queue = false;
+	} else if (!ctx->poll_multi_queue) {
 		struct io_kiocb *list_req;
+		unsigned int queue_num0, queue_num1;
 
 		list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
 						inflight_entry);
-		if (list_req->file != req->file)
-			ctx->poll_multi_file = true;
+
+		if (list_req->file != req->file) {
+			ctx->poll_multi_queue = true;
+		} else {
+			queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie);
+			queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie);
+			if (queue_num0 != queue_num1)
+				ctx->poll_multi_queue = true;
+		}
 	}
 
 	/*

From e149bd742b2db6a63fc078b1ea6843dc9b22678d Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Mon, 28 Jun 2021 05:48:05 +0800
Subject: [PATCH 3292/3292] io_uring: code clean for kiocb_done()

A simple code clean for kiocb_done()

Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 67099bb99a02..e55b21fc0ab2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2782,7 +2782,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 
 	if (req->flags & REQ_F_CUR_POS)
 		req->file->f_pos = kiocb->ki_pos;
-	if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
+	if (ret >= 0 && check_reissue)
 		__io_complete_rw(req, ret, 0, issue_flags);
 	else
 		io_rw_done(kiocb, ret);